{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 18423, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016284655783088385, "grad_norm": 26.8387451171875, "learning_rate": 2.7129679869777538e-08, "loss": 1.2654, "step": 1 }, { "epoch": 0.0003256931156617677, "grad_norm": 21.838634490966797, "learning_rate": 5.4259359739555076e-08, "loss": 1.1835, "step": 2 }, { "epoch": 0.0004885396734926516, "grad_norm": 28.06851577758789, "learning_rate": 8.13890396093326e-08, "loss": 1.2674, "step": 3 }, { "epoch": 0.0006513862313235354, "grad_norm": 26.327239990234375, "learning_rate": 1.0851871947911015e-07, "loss": 1.2807, "step": 4 }, { "epoch": 0.0008142327891544192, "grad_norm": 22.29421615600586, "learning_rate": 1.356483993488877e-07, "loss": 1.1924, "step": 5 }, { "epoch": 0.0009770793469853032, "grad_norm": 26.917884826660156, "learning_rate": 1.627780792186652e-07, "loss": 1.2284, "step": 6 }, { "epoch": 0.001139925904816187, "grad_norm": 25.6861629486084, "learning_rate": 1.8990775908844277e-07, "loss": 1.2602, "step": 7 }, { "epoch": 0.0013027724626470708, "grad_norm": 23.733572006225586, "learning_rate": 2.170374389582203e-07, "loss": 1.1575, "step": 8 }, { "epoch": 0.0014656190204779547, "grad_norm": 28.811185836791992, "learning_rate": 2.4416711882799786e-07, "loss": 1.1991, "step": 9 }, { "epoch": 0.0016284655783088385, "grad_norm": 26.767047882080078, "learning_rate": 2.712967986977754e-07, "loss": 1.2236, "step": 10 }, { "epoch": 0.0017913121361397223, "grad_norm": 32.37723159790039, "learning_rate": 2.984264785675529e-07, "loss": 1.2611, "step": 11 }, { "epoch": 0.0019541586939706064, "grad_norm": 30.011526107788086, "learning_rate": 3.255561584373304e-07, "loss": 1.2638, "step": 12 }, { "epoch": 0.00211700525180149, "grad_norm": 25.1384220123291, "learning_rate": 3.52685838307108e-07, "loss": 1.0642, "step": 13 }, { "epoch": 0.002279851809632374, "grad_norm": 25.534574508666992, "learning_rate": 3.7981551817688554e-07, "loss": 1.3376, "step": 14 }, { "epoch": 0.002442698367463258, "grad_norm": 21.540822982788086, "learning_rate": 4.0694519804666307e-07, "loss": 1.2711, "step": 15 }, { "epoch": 0.0026055449252941417, "grad_norm": 27.07478141784668, "learning_rate": 4.340748779164406e-07, "loss": 1.295, "step": 16 }, { "epoch": 0.0027683914831250255, "grad_norm": 27.001358032226562, "learning_rate": 4.6120455778621814e-07, "loss": 1.259, "step": 17 }, { "epoch": 0.0029312380409559093, "grad_norm": 23.242109298706055, "learning_rate": 4.883342376559957e-07, "loss": 1.2554, "step": 18 }, { "epoch": 0.003094084598786793, "grad_norm": 23.690879821777344, "learning_rate": 5.154639175257732e-07, "loss": 1.1523, "step": 19 }, { "epoch": 0.003256931156617677, "grad_norm": 15.80941390991211, "learning_rate": 5.425935973955508e-07, "loss": 1.1122, "step": 20 }, { "epoch": 0.003419777714448561, "grad_norm": 78.11163330078125, "learning_rate": 5.697232772653283e-07, "loss": 1.2424, "step": 21 }, { "epoch": 0.0035826242722794446, "grad_norm": 75.87837219238281, "learning_rate": 5.968529571351058e-07, "loss": 1.1936, "step": 22 }, { "epoch": 0.0037454708301103284, "grad_norm": 23.52526092529297, "learning_rate": 6.239826370048833e-07, "loss": 1.1788, "step": 23 }, { "epoch": 0.003908317387941213, "grad_norm": 21.37386131286621, "learning_rate": 6.511123168746608e-07, "loss": 1.2422, "step": 24 }, { "epoch": 0.004071163945772096, "grad_norm": 17.612640380859375, "learning_rate": 6.782419967444384e-07, "loss": 1.1332, "step": 25 }, { "epoch": 0.00423401050360298, "grad_norm": 32.772674560546875, "learning_rate": 7.05371676614216e-07, "loss": 1.1131, "step": 26 }, { "epoch": 0.004396857061433864, "grad_norm": 21.91839027404785, "learning_rate": 7.325013564839936e-07, "loss": 0.9842, "step": 27 }, { "epoch": 0.004559703619264748, "grad_norm": 20.819995880126953, "learning_rate": 7.596310363537711e-07, "loss": 1.0054, "step": 28 }, { "epoch": 0.004722550177095631, "grad_norm": 24.646459579467773, "learning_rate": 7.867607162235487e-07, "loss": 0.9904, "step": 29 }, { "epoch": 0.004885396734926516, "grad_norm": 20.579051971435547, "learning_rate": 8.138903960933261e-07, "loss": 0.9545, "step": 30 }, { "epoch": 0.005048243292757399, "grad_norm": 37.32696533203125, "learning_rate": 8.410200759631037e-07, "loss": 0.9289, "step": 31 }, { "epoch": 0.005211089850588283, "grad_norm": 75.29168701171875, "learning_rate": 8.681497558328812e-07, "loss": 1.0236, "step": 32 }, { "epoch": 0.005373936408419167, "grad_norm": 58.49039077758789, "learning_rate": 8.952794357026588e-07, "loss": 1.0229, "step": 33 }, { "epoch": 0.005536782966250051, "grad_norm": 52.851097106933594, "learning_rate": 9.224091155724363e-07, "loss": 0.9491, "step": 34 }, { "epoch": 0.005699629524080934, "grad_norm": 49.8441162109375, "learning_rate": 9.495387954422139e-07, "loss": 0.9729, "step": 35 }, { "epoch": 0.005862476081911819, "grad_norm": 37.720542907714844, "learning_rate": 9.766684753119915e-07, "loss": 0.9595, "step": 36 }, { "epoch": 0.006025322639742702, "grad_norm": 55.767295837402344, "learning_rate": 1.0037981551817688e-06, "loss": 0.9096, "step": 37 }, { "epoch": 0.006188169197573586, "grad_norm": 203.90345764160156, "learning_rate": 1.0309278350515464e-06, "loss": 0.8471, "step": 38 }, { "epoch": 0.0063510157554044705, "grad_norm": 229.84132385253906, "learning_rate": 1.058057514921324e-06, "loss": 0.8502, "step": 39 }, { "epoch": 0.006513862313235354, "grad_norm": 81.30517578125, "learning_rate": 1.0851871947911016e-06, "loss": 0.8437, "step": 40 }, { "epoch": 0.006676708871066238, "grad_norm": 12.894872665405273, "learning_rate": 1.112316874660879e-06, "loss": 0.8355, "step": 41 }, { "epoch": 0.006839555428897122, "grad_norm": 4.868096828460693, "learning_rate": 1.1394465545306566e-06, "loss": 0.8132, "step": 42 }, { "epoch": 0.007002401986728006, "grad_norm": 9.17147445678711, "learning_rate": 1.1665762344004341e-06, "loss": 0.9245, "step": 43 }, { "epoch": 0.007165248544558889, "grad_norm": 5.590785026550293, "learning_rate": 1.1937059142702115e-06, "loss": 0.8536, "step": 44 }, { "epoch": 0.0073280951023897735, "grad_norm": 6.955132484436035, "learning_rate": 1.220835594139989e-06, "loss": 0.8059, "step": 45 }, { "epoch": 0.007490941660220657, "grad_norm": 6.8412184715271, "learning_rate": 1.2479652740097667e-06, "loss": 0.8191, "step": 46 }, { "epoch": 0.007653788218051541, "grad_norm": 5.630576133728027, "learning_rate": 1.2750949538795443e-06, "loss": 0.8401, "step": 47 }, { "epoch": 0.007816634775882425, "grad_norm": 7.418978214263916, "learning_rate": 1.3022246337493217e-06, "loss": 0.8361, "step": 48 }, { "epoch": 0.007979481333713309, "grad_norm": 8.61477279663086, "learning_rate": 1.3293543136190992e-06, "loss": 0.8139, "step": 49 }, { "epoch": 0.008142327891544192, "grad_norm": 8.364998817443848, "learning_rate": 1.3564839934888768e-06, "loss": 0.8198, "step": 50 }, { "epoch": 0.008305174449375076, "grad_norm": 9.667012214660645, "learning_rate": 1.3836136733586544e-06, "loss": 0.8351, "step": 51 }, { "epoch": 0.00846802100720596, "grad_norm": 11.317605972290039, "learning_rate": 1.410743353228432e-06, "loss": 0.8471, "step": 52 }, { "epoch": 0.008630867565036844, "grad_norm": 4.969048023223877, "learning_rate": 1.4378730330982096e-06, "loss": 0.7496, "step": 53 }, { "epoch": 0.008793714122867727, "grad_norm": 6.531116485595703, "learning_rate": 1.4650027129679872e-06, "loss": 0.8079, "step": 54 }, { "epoch": 0.008956560680698611, "grad_norm": 4.324789047241211, "learning_rate": 1.4921323928377646e-06, "loss": 0.7494, "step": 55 }, { "epoch": 0.009119407238529496, "grad_norm": 4.646331310272217, "learning_rate": 1.5192620727075421e-06, "loss": 0.7757, "step": 56 }, { "epoch": 0.00928225379636038, "grad_norm": 4.177970886230469, "learning_rate": 1.5463917525773197e-06, "loss": 0.7128, "step": 57 }, { "epoch": 0.009445100354191263, "grad_norm": 3.3217785358428955, "learning_rate": 1.5735214324470973e-06, "loss": 0.7131, "step": 58 }, { "epoch": 0.009607946912022148, "grad_norm": 3.5142664909362793, "learning_rate": 1.6006511123168747e-06, "loss": 0.7396, "step": 59 }, { "epoch": 0.009770793469853031, "grad_norm": 3.225219249725342, "learning_rate": 1.6277807921866523e-06, "loss": 0.6539, "step": 60 }, { "epoch": 0.009933640027683915, "grad_norm": 3.2878341674804688, "learning_rate": 1.6549104720564299e-06, "loss": 0.7127, "step": 61 }, { "epoch": 0.010096486585514798, "grad_norm": 3.753382921218872, "learning_rate": 1.6820401519262075e-06, "loss": 0.7599, "step": 62 }, { "epoch": 0.010259333143345683, "grad_norm": 3.105297088623047, "learning_rate": 1.7091698317959848e-06, "loss": 0.7151, "step": 63 }, { "epoch": 0.010422179701176567, "grad_norm": 3.0562498569488525, "learning_rate": 1.7362995116657624e-06, "loss": 0.6799, "step": 64 }, { "epoch": 0.01058502625900745, "grad_norm": 2.584552049636841, "learning_rate": 1.76342919153554e-06, "loss": 0.7215, "step": 65 }, { "epoch": 0.010747872816838333, "grad_norm": 2.1621205806732178, "learning_rate": 1.7905588714053176e-06, "loss": 0.7004, "step": 66 }, { "epoch": 0.010910719374669219, "grad_norm": 1.9075329303741455, "learning_rate": 1.817688551275095e-06, "loss": 0.7059, "step": 67 }, { "epoch": 0.011073565932500102, "grad_norm": 2.255669116973877, "learning_rate": 1.8448182311448726e-06, "loss": 0.6234, "step": 68 }, { "epoch": 0.011236412490330985, "grad_norm": 1.903462290763855, "learning_rate": 1.8719479110146501e-06, "loss": 0.7271, "step": 69 }, { "epoch": 0.011399259048161869, "grad_norm": 3.764965057373047, "learning_rate": 1.8990775908844277e-06, "loss": 0.6417, "step": 70 }, { "epoch": 0.011562105605992754, "grad_norm": 2.8389527797698975, "learning_rate": 1.926207270754205e-06, "loss": 0.6698, "step": 71 }, { "epoch": 0.011724952163823637, "grad_norm": 2.988205909729004, "learning_rate": 1.953336950623983e-06, "loss": 0.7116, "step": 72 }, { "epoch": 0.01188779872165452, "grad_norm": 3.0430731773376465, "learning_rate": 1.9804666304937603e-06, "loss": 0.6637, "step": 73 }, { "epoch": 0.012050645279485404, "grad_norm": 2.8517634868621826, "learning_rate": 2.0075963103635377e-06, "loss": 0.6737, "step": 74 }, { "epoch": 0.01221349183731629, "grad_norm": 3.8812012672424316, "learning_rate": 2.0347259902333155e-06, "loss": 0.7341, "step": 75 }, { "epoch": 0.012376338395147173, "grad_norm": 3.224827289581299, "learning_rate": 2.061855670103093e-06, "loss": 0.6591, "step": 76 }, { "epoch": 0.012539184952978056, "grad_norm": 2.6492745876312256, "learning_rate": 2.08898534997287e-06, "loss": 0.6797, "step": 77 }, { "epoch": 0.012702031510808941, "grad_norm": 3.233771324157715, "learning_rate": 2.116115029842648e-06, "loss": 0.623, "step": 78 }, { "epoch": 0.012864878068639824, "grad_norm": 2.08512806892395, "learning_rate": 2.1432447097124254e-06, "loss": 0.6875, "step": 79 }, { "epoch": 0.013027724626470708, "grad_norm": 1.7173582315444946, "learning_rate": 2.170374389582203e-06, "loss": 0.6428, "step": 80 }, { "epoch": 0.013190571184301591, "grad_norm": 1.8224493265151978, "learning_rate": 2.1975040694519806e-06, "loss": 0.6418, "step": 81 }, { "epoch": 0.013353417742132476, "grad_norm": 1.9618295431137085, "learning_rate": 2.224633749321758e-06, "loss": 0.6728, "step": 82 }, { "epoch": 0.01351626429996336, "grad_norm": 1.6390341520309448, "learning_rate": 2.2517634291915357e-06, "loss": 0.5852, "step": 83 }, { "epoch": 0.013679110857794243, "grad_norm": 1.2489187717437744, "learning_rate": 2.278893109061313e-06, "loss": 0.6323, "step": 84 }, { "epoch": 0.013841957415625127, "grad_norm": 1.316605567932129, "learning_rate": 2.3060227889310905e-06, "loss": 0.6441, "step": 85 }, { "epoch": 0.014004803973456012, "grad_norm": 1.140224575996399, "learning_rate": 2.3331524688008683e-06, "loss": 0.5995, "step": 86 }, { "epoch": 0.014167650531286895, "grad_norm": 1.1001319885253906, "learning_rate": 2.3602821486706457e-06, "loss": 0.6009, "step": 87 }, { "epoch": 0.014330497089117778, "grad_norm": 1.81968355178833, "learning_rate": 2.387411828540423e-06, "loss": 0.5657, "step": 88 }, { "epoch": 0.014493343646948662, "grad_norm": 1.2266783714294434, "learning_rate": 2.414541508410201e-06, "loss": 0.6492, "step": 89 }, { "epoch": 0.014656190204779547, "grad_norm": 1.401109218597412, "learning_rate": 2.441671188279978e-06, "loss": 0.6703, "step": 90 }, { "epoch": 0.01481903676261043, "grad_norm": 1.6090548038482666, "learning_rate": 2.468800868149756e-06, "loss": 0.6725, "step": 91 }, { "epoch": 0.014981883320441314, "grad_norm": 1.4188933372497559, "learning_rate": 2.4959305480195334e-06, "loss": 0.5976, "step": 92 }, { "epoch": 0.015144729878272197, "grad_norm": 1.7752201557159424, "learning_rate": 2.5230602278893108e-06, "loss": 0.6315, "step": 93 }, { "epoch": 0.015307576436103082, "grad_norm": 1.4699472188949585, "learning_rate": 2.5501899077590886e-06, "loss": 0.6145, "step": 94 }, { "epoch": 0.015470422993933966, "grad_norm": 1.6508113145828247, "learning_rate": 2.577319587628866e-06, "loss": 0.5595, "step": 95 }, { "epoch": 0.01563326955176485, "grad_norm": 1.2853798866271973, "learning_rate": 2.6044492674986433e-06, "loss": 0.6333, "step": 96 }, { "epoch": 0.015796116109595734, "grad_norm": 1.360181212425232, "learning_rate": 2.631578947368421e-06, "loss": 0.6529, "step": 97 }, { "epoch": 0.015958962667426618, "grad_norm": 1.2485108375549316, "learning_rate": 2.6587086272381985e-06, "loss": 0.621, "step": 98 }, { "epoch": 0.0161218092252575, "grad_norm": 0.8449456095695496, "learning_rate": 2.6858383071079763e-06, "loss": 0.5591, "step": 99 }, { "epoch": 0.016284655783088384, "grad_norm": 0.8724983930587769, "learning_rate": 2.7129679869777537e-06, "loss": 0.5464, "step": 100 }, { "epoch": 0.016447502340919268, "grad_norm": 1.125311017036438, "learning_rate": 2.740097666847531e-06, "loss": 0.5961, "step": 101 }, { "epoch": 0.01661034889875015, "grad_norm": 1.0098700523376465, "learning_rate": 2.767227346717309e-06, "loss": 0.6113, "step": 102 }, { "epoch": 0.016773195456581038, "grad_norm": 1.1941372156143188, "learning_rate": 2.7943570265870866e-06, "loss": 0.6477, "step": 103 }, { "epoch": 0.01693604201441192, "grad_norm": 1.0765526294708252, "learning_rate": 2.821486706456864e-06, "loss": 0.5714, "step": 104 }, { "epoch": 0.017098888572242805, "grad_norm": 1.1545273065567017, "learning_rate": 2.848616386326642e-06, "loss": 0.5943, "step": 105 }, { "epoch": 0.017261735130073688, "grad_norm": 1.4611811637878418, "learning_rate": 2.875746066196419e-06, "loss": 0.6051, "step": 106 }, { "epoch": 0.01742458168790457, "grad_norm": 0.8389372229576111, "learning_rate": 2.9028757460661966e-06, "loss": 0.5312, "step": 107 }, { "epoch": 0.017587428245735455, "grad_norm": 0.9239583015441895, "learning_rate": 2.9300054259359744e-06, "loss": 0.6211, "step": 108 }, { "epoch": 0.01775027480356634, "grad_norm": 0.854373037815094, "learning_rate": 2.9571351058057517e-06, "loss": 0.5834, "step": 109 }, { "epoch": 0.017913121361397222, "grad_norm": 1.0828291177749634, "learning_rate": 2.984264785675529e-06, "loss": 0.6234, "step": 110 }, { "epoch": 0.01807596791922811, "grad_norm": 1.339524745941162, "learning_rate": 3.011394465545307e-06, "loss": 0.6125, "step": 111 }, { "epoch": 0.018238814477058992, "grad_norm": 0.8758399486541748, "learning_rate": 3.0385241454150843e-06, "loss": 0.5662, "step": 112 }, { "epoch": 0.018401661034889875, "grad_norm": 1.1950311660766602, "learning_rate": 3.065653825284862e-06, "loss": 0.5484, "step": 113 }, { "epoch": 0.01856450759272076, "grad_norm": 0.87191241979599, "learning_rate": 3.0927835051546395e-06, "loss": 0.5488, "step": 114 }, { "epoch": 0.018727354150551642, "grad_norm": 1.3220497369766235, "learning_rate": 3.119913185024417e-06, "loss": 0.5903, "step": 115 }, { "epoch": 0.018890200708382526, "grad_norm": 1.0710890293121338, "learning_rate": 3.1470428648941946e-06, "loss": 0.6286, "step": 116 }, { "epoch": 0.01905304726621341, "grad_norm": 0.9422585964202881, "learning_rate": 3.1741725447639716e-06, "loss": 0.5129, "step": 117 }, { "epoch": 0.019215893824044296, "grad_norm": 0.8847532868385315, "learning_rate": 3.2013022246337494e-06, "loss": 0.5601, "step": 118 }, { "epoch": 0.01937874038187518, "grad_norm": 0.8625254034996033, "learning_rate": 3.2284319045035268e-06, "loss": 0.5863, "step": 119 }, { "epoch": 0.019541586939706063, "grad_norm": 0.7909155488014221, "learning_rate": 3.2555615843733046e-06, "loss": 0.5952, "step": 120 }, { "epoch": 0.019704433497536946, "grad_norm": 0.8685516715049744, "learning_rate": 3.282691264243082e-06, "loss": 0.5794, "step": 121 }, { "epoch": 0.01986728005536783, "grad_norm": 0.8445191383361816, "learning_rate": 3.3098209441128597e-06, "loss": 0.6349, "step": 122 }, { "epoch": 0.020030126613198713, "grad_norm": 0.7952302694320679, "learning_rate": 3.3369506239826367e-06, "loss": 0.5904, "step": 123 }, { "epoch": 0.020192973171029596, "grad_norm": 1.2384543418884277, "learning_rate": 3.364080303852415e-06, "loss": 0.5579, "step": 124 }, { "epoch": 0.02035581972886048, "grad_norm": 0.8543194532394409, "learning_rate": 3.391209983722192e-06, "loss": 0.5577, "step": 125 }, { "epoch": 0.020518666286691366, "grad_norm": 0.8046339154243469, "learning_rate": 3.4183396635919697e-06, "loss": 0.5161, "step": 126 }, { "epoch": 0.02068151284452225, "grad_norm": 1.0303411483764648, "learning_rate": 3.445469343461747e-06, "loss": 0.5195, "step": 127 }, { "epoch": 0.020844359402353133, "grad_norm": 0.7929338216781616, "learning_rate": 3.472599023331525e-06, "loss": 0.5372, "step": 128 }, { "epoch": 0.021007205960184017, "grad_norm": 0.6742150187492371, "learning_rate": 3.4997287032013022e-06, "loss": 0.5551, "step": 129 }, { "epoch": 0.0211700525180149, "grad_norm": 1.1317448616027832, "learning_rate": 3.52685838307108e-06, "loss": 0.5066, "step": 130 }, { "epoch": 0.021332899075845783, "grad_norm": 1.0842419862747192, "learning_rate": 3.553988062940857e-06, "loss": 0.5871, "step": 131 }, { "epoch": 0.021495745633676667, "grad_norm": 0.7495983242988586, "learning_rate": 3.581117742810635e-06, "loss": 0.5131, "step": 132 }, { "epoch": 0.021658592191507554, "grad_norm": 0.8318480253219604, "learning_rate": 3.608247422680412e-06, "loss": 0.5325, "step": 133 }, { "epoch": 0.021821438749338437, "grad_norm": 0.8992881774902344, "learning_rate": 3.63537710255019e-06, "loss": 0.5777, "step": 134 }, { "epoch": 0.02198428530716932, "grad_norm": 0.9533233642578125, "learning_rate": 3.6625067824199673e-06, "loss": 0.5212, "step": 135 }, { "epoch": 0.022147131865000204, "grad_norm": 1.3078320026397705, "learning_rate": 3.689636462289745e-06, "loss": 0.5596, "step": 136 }, { "epoch": 0.022309978422831087, "grad_norm": 0.992885947227478, "learning_rate": 3.716766142159523e-06, "loss": 0.5234, "step": 137 }, { "epoch": 0.02247282498066197, "grad_norm": 0.948309600353241, "learning_rate": 3.7438958220293003e-06, "loss": 0.5408, "step": 138 }, { "epoch": 0.022635671538492854, "grad_norm": 0.8976426124572754, "learning_rate": 3.771025501899078e-06, "loss": 0.5231, "step": 139 }, { "epoch": 0.022798518096323737, "grad_norm": 0.7239863872528076, "learning_rate": 3.7981551817688555e-06, "loss": 0.5202, "step": 140 }, { "epoch": 0.022961364654154624, "grad_norm": 1.1441500186920166, "learning_rate": 3.825284861638633e-06, "loss": 0.5975, "step": 141 }, { "epoch": 0.023124211211985508, "grad_norm": 0.8521795868873596, "learning_rate": 3.85241454150841e-06, "loss": 0.5919, "step": 142 }, { "epoch": 0.02328705776981639, "grad_norm": 0.8247413039207458, "learning_rate": 3.8795442213781884e-06, "loss": 0.5764, "step": 143 }, { "epoch": 0.023449904327647274, "grad_norm": 0.9140257239341736, "learning_rate": 3.906673901247966e-06, "loss": 0.5664, "step": 144 }, { "epoch": 0.023612750885478158, "grad_norm": 0.6844326257705688, "learning_rate": 3.933803581117743e-06, "loss": 0.5604, "step": 145 }, { "epoch": 0.02377559744330904, "grad_norm": 0.9701638221740723, "learning_rate": 3.9609332609875206e-06, "loss": 0.5335, "step": 146 }, { "epoch": 0.023938444001139925, "grad_norm": 0.7020561099052429, "learning_rate": 3.988062940857299e-06, "loss": 0.4602, "step": 147 }, { "epoch": 0.024101290558970808, "grad_norm": 1.6978968381881714, "learning_rate": 4.015192620727075e-06, "loss": 0.5235, "step": 148 }, { "epoch": 0.024264137116801695, "grad_norm": 1.1541671752929688, "learning_rate": 4.0423223005968535e-06, "loss": 0.5245, "step": 149 }, { "epoch": 0.02442698367463258, "grad_norm": 0.8169616460800171, "learning_rate": 4.069451980466631e-06, "loss": 0.5322, "step": 150 }, { "epoch": 0.02458983023246346, "grad_norm": 0.9765011668205261, "learning_rate": 4.096581660336408e-06, "loss": 0.5721, "step": 151 }, { "epoch": 0.024752676790294345, "grad_norm": 0.7802464365959167, "learning_rate": 4.123711340206186e-06, "loss": 0.5013, "step": 152 }, { "epoch": 0.02491552334812523, "grad_norm": 0.8658196926116943, "learning_rate": 4.150841020075964e-06, "loss": 0.4855, "step": 153 }, { "epoch": 0.025078369905956112, "grad_norm": 0.725875198841095, "learning_rate": 4.17797069994574e-06, "loss": 0.5167, "step": 154 }, { "epoch": 0.025241216463786995, "grad_norm": 0.7796592116355896, "learning_rate": 4.205100379815519e-06, "loss": 0.5957, "step": 155 }, { "epoch": 0.025404063021617882, "grad_norm": 0.816615879535675, "learning_rate": 4.232230059685296e-06, "loss": 0.5186, "step": 156 }, { "epoch": 0.025566909579448766, "grad_norm": 0.7572610974311829, "learning_rate": 4.259359739555073e-06, "loss": 0.4684, "step": 157 }, { "epoch": 0.02572975613727965, "grad_norm": 0.8383011817932129, "learning_rate": 4.286489419424851e-06, "loss": 0.5084, "step": 158 }, { "epoch": 0.025892602695110532, "grad_norm": 0.7704216837882996, "learning_rate": 4.313619099294629e-06, "loss": 0.5, "step": 159 }, { "epoch": 0.026055449252941416, "grad_norm": 1.2968308925628662, "learning_rate": 4.340748779164406e-06, "loss": 0.5707, "step": 160 }, { "epoch": 0.0262182958107723, "grad_norm": 0.8176507353782654, "learning_rate": 4.367878459034184e-06, "loss": 0.5631, "step": 161 }, { "epoch": 0.026381142368603182, "grad_norm": 0.8241315484046936, "learning_rate": 4.395008138903961e-06, "loss": 0.474, "step": 162 }, { "epoch": 0.026543988926434066, "grad_norm": 0.6237398386001587, "learning_rate": 4.4221378187737385e-06, "loss": 0.4945, "step": 163 }, { "epoch": 0.026706835484264953, "grad_norm": 0.682911217212677, "learning_rate": 4.449267498643516e-06, "loss": 0.5064, "step": 164 }, { "epoch": 0.026869682042095836, "grad_norm": 0.7639620304107666, "learning_rate": 4.476397178513294e-06, "loss": 0.5241, "step": 165 }, { "epoch": 0.02703252859992672, "grad_norm": 0.5971587300300598, "learning_rate": 4.5035268583830715e-06, "loss": 0.5012, "step": 166 }, { "epoch": 0.027195375157757603, "grad_norm": 0.7062264084815979, "learning_rate": 4.530656538252849e-06, "loss": 0.5258, "step": 167 }, { "epoch": 0.027358221715588486, "grad_norm": 0.6684761643409729, "learning_rate": 4.557786218122626e-06, "loss": 0.5528, "step": 168 }, { "epoch": 0.02752106827341937, "grad_norm": 0.9504234194755554, "learning_rate": 4.5849158979924044e-06, "loss": 0.4767, "step": 169 }, { "epoch": 0.027683914831250253, "grad_norm": 0.6690292358398438, "learning_rate": 4.612045577862181e-06, "loss": 0.494, "step": 170 }, { "epoch": 0.02784676138908114, "grad_norm": 0.8545043468475342, "learning_rate": 4.639175257731959e-06, "loss": 0.5331, "step": 171 }, { "epoch": 0.028009607946912023, "grad_norm": 0.6568216681480408, "learning_rate": 4.6663049376017366e-06, "loss": 0.4984, "step": 172 }, { "epoch": 0.028172454504742907, "grad_norm": 0.5470753908157349, "learning_rate": 4.693434617471514e-06, "loss": 0.4531, "step": 173 }, { "epoch": 0.02833530106257379, "grad_norm": 0.7993559241294861, "learning_rate": 4.720564297341291e-06, "loss": 0.5109, "step": 174 }, { "epoch": 0.028498147620404674, "grad_norm": 1.0763046741485596, "learning_rate": 4.7476939772110695e-06, "loss": 0.5407, "step": 175 }, { "epoch": 0.028660994178235557, "grad_norm": 0.5807291865348816, "learning_rate": 4.774823657080846e-06, "loss": 0.5625, "step": 176 }, { "epoch": 0.02882384073606644, "grad_norm": 0.8935692310333252, "learning_rate": 4.801953336950624e-06, "loss": 0.5218, "step": 177 }, { "epoch": 0.028986687293897324, "grad_norm": 0.5807906985282898, "learning_rate": 4.829083016820402e-06, "loss": 0.5252, "step": 178 }, { "epoch": 0.02914953385172821, "grad_norm": 0.6294142603874207, "learning_rate": 4.856212696690179e-06, "loss": 0.4905, "step": 179 }, { "epoch": 0.029312380409559094, "grad_norm": 0.6021614670753479, "learning_rate": 4.883342376559956e-06, "loss": 0.521, "step": 180 }, { "epoch": 0.029475226967389977, "grad_norm": 1.025186538696289, "learning_rate": 4.910472056429735e-06, "loss": 0.4871, "step": 181 }, { "epoch": 0.02963807352522086, "grad_norm": 0.632241427898407, "learning_rate": 4.937601736299512e-06, "loss": 0.5114, "step": 182 }, { "epoch": 0.029800920083051744, "grad_norm": 0.7558436989784241, "learning_rate": 4.964731416169289e-06, "loss": 0.5061, "step": 183 }, { "epoch": 0.029963766640882628, "grad_norm": 0.6010175347328186, "learning_rate": 4.991861096039067e-06, "loss": 0.4502, "step": 184 }, { "epoch": 0.03012661319871351, "grad_norm": 0.8143193125724792, "learning_rate": 5.018990775908845e-06, "loss": 0.4967, "step": 185 }, { "epoch": 0.030289459756544394, "grad_norm": 1.3582186698913574, "learning_rate": 5.0461204557786215e-06, "loss": 0.4753, "step": 186 }, { "epoch": 0.03045230631437528, "grad_norm": 0.5626188516616821, "learning_rate": 5.0732501356484e-06, "loss": 0.432, "step": 187 }, { "epoch": 0.030615152872206165, "grad_norm": 0.8179471492767334, "learning_rate": 5.100379815518177e-06, "loss": 0.4887, "step": 188 }, { "epoch": 0.030777999430037048, "grad_norm": 0.7401441335678101, "learning_rate": 5.1275094953879545e-06, "loss": 0.4901, "step": 189 }, { "epoch": 0.03094084598786793, "grad_norm": 0.7661183476448059, "learning_rate": 5.154639175257732e-06, "loss": 0.5161, "step": 190 }, { "epoch": 0.031103692545698815, "grad_norm": 0.7576958537101746, "learning_rate": 5.18176885512751e-06, "loss": 0.5127, "step": 191 }, { "epoch": 0.0312665391035297, "grad_norm": 0.7997469902038574, "learning_rate": 5.208898534997287e-06, "loss": 0.4759, "step": 192 }, { "epoch": 0.03142938566136058, "grad_norm": 0.6806103587150574, "learning_rate": 5.236028214867065e-06, "loss": 0.5226, "step": 193 }, { "epoch": 0.03159223221919147, "grad_norm": 0.703517735004425, "learning_rate": 5.263157894736842e-06, "loss": 0.5131, "step": 194 }, { "epoch": 0.03175507877702235, "grad_norm": 0.7924356460571289, "learning_rate": 5.29028757460662e-06, "loss": 0.5264, "step": 195 }, { "epoch": 0.031917925334853235, "grad_norm": 0.5552158951759338, "learning_rate": 5.317417254476397e-06, "loss": 0.4649, "step": 196 }, { "epoch": 0.032080771892684115, "grad_norm": 0.821522057056427, "learning_rate": 5.344546934346175e-06, "loss": 0.5211, "step": 197 }, { "epoch": 0.032243618450515, "grad_norm": 1.0631200075149536, "learning_rate": 5.3716766142159526e-06, "loss": 0.5273, "step": 198 }, { "epoch": 0.03240646500834589, "grad_norm": 0.674878716468811, "learning_rate": 5.39880629408573e-06, "loss": 0.521, "step": 199 }, { "epoch": 0.03256931156617677, "grad_norm": 0.841880202293396, "learning_rate": 5.425935973955507e-06, "loss": 0.5668, "step": 200 }, { "epoch": 0.032732158124007656, "grad_norm": 0.9635846614837646, "learning_rate": 5.4530656538252855e-06, "loss": 0.5278, "step": 201 }, { "epoch": 0.032895004681838536, "grad_norm": 0.8944481611251831, "learning_rate": 5.480195333695062e-06, "loss": 0.4991, "step": 202 }, { "epoch": 0.03305785123966942, "grad_norm": 0.8170092701911926, "learning_rate": 5.50732501356484e-06, "loss": 0.4721, "step": 203 }, { "epoch": 0.0332206977975003, "grad_norm": 0.7621743679046631, "learning_rate": 5.534454693434618e-06, "loss": 0.5103, "step": 204 }, { "epoch": 0.03338354435533119, "grad_norm": 0.8802380561828613, "learning_rate": 5.561584373304395e-06, "loss": 0.4866, "step": 205 }, { "epoch": 0.033546390913162076, "grad_norm": 1.1269662380218506, "learning_rate": 5.588714053174173e-06, "loss": 0.5106, "step": 206 }, { "epoch": 0.033709237470992956, "grad_norm": 0.6734982132911682, "learning_rate": 5.615843733043951e-06, "loss": 0.4942, "step": 207 }, { "epoch": 0.03387208402882384, "grad_norm": 0.6484155058860779, "learning_rate": 5.642973412913728e-06, "loss": 0.4909, "step": 208 }, { "epoch": 0.03403493058665472, "grad_norm": 1.0875418186187744, "learning_rate": 5.670103092783505e-06, "loss": 0.5418, "step": 209 }, { "epoch": 0.03419777714448561, "grad_norm": 0.6102640628814697, "learning_rate": 5.697232772653284e-06, "loss": 0.5302, "step": 210 }, { "epoch": 0.03436062370231649, "grad_norm": 0.6222060918807983, "learning_rate": 5.72436245252306e-06, "loss": 0.496, "step": 211 }, { "epoch": 0.034523470260147376, "grad_norm": 0.7107225060462952, "learning_rate": 5.751492132392838e-06, "loss": 0.5232, "step": 212 }, { "epoch": 0.03468631681797826, "grad_norm": 0.8412028551101685, "learning_rate": 5.778621812262616e-06, "loss": 0.4981, "step": 213 }, { "epoch": 0.03484916337580914, "grad_norm": 0.7682175040245056, "learning_rate": 5.805751492132393e-06, "loss": 0.4942, "step": 214 }, { "epoch": 0.03501200993364003, "grad_norm": 0.7349792718887329, "learning_rate": 5.8328811720021705e-06, "loss": 0.5091, "step": 215 }, { "epoch": 0.03517485649147091, "grad_norm": 0.7865320444107056, "learning_rate": 5.860010851871949e-06, "loss": 0.4849, "step": 216 }, { "epoch": 0.0353377030493018, "grad_norm": 0.6979241371154785, "learning_rate": 5.887140531741725e-06, "loss": 0.5291, "step": 217 }, { "epoch": 0.03550054960713268, "grad_norm": 0.5121638178825378, "learning_rate": 5.9142702116115035e-06, "loss": 0.4869, "step": 218 }, { "epoch": 0.035663396164963564, "grad_norm": 0.5598961114883423, "learning_rate": 5.941399891481281e-06, "loss": 0.4941, "step": 219 }, { "epoch": 0.035826242722794444, "grad_norm": 0.6036998629570007, "learning_rate": 5.968529571351058e-06, "loss": 0.4897, "step": 220 }, { "epoch": 0.03598908928062533, "grad_norm": 0.6680541634559631, "learning_rate": 5.995659251220836e-06, "loss": 0.5036, "step": 221 }, { "epoch": 0.03615193583845622, "grad_norm": 0.5929523706436157, "learning_rate": 6.022788931090614e-06, "loss": 0.4827, "step": 222 }, { "epoch": 0.0363147823962871, "grad_norm": 0.6166229844093323, "learning_rate": 6.049918610960391e-06, "loss": 0.5019, "step": 223 }, { "epoch": 0.036477628954117984, "grad_norm": 0.5689573287963867, "learning_rate": 6.077048290830169e-06, "loss": 0.4833, "step": 224 }, { "epoch": 0.036640475511948864, "grad_norm": 0.5578492283821106, "learning_rate": 6.104177970699946e-06, "loss": 0.4749, "step": 225 }, { "epoch": 0.03680332206977975, "grad_norm": 0.6710678935050964, "learning_rate": 6.131307650569724e-06, "loss": 0.4465, "step": 226 }, { "epoch": 0.03696616862761063, "grad_norm": 0.6893644332885742, "learning_rate": 6.158437330439501e-06, "loss": 0.4764, "step": 227 }, { "epoch": 0.03712901518544152, "grad_norm": 0.624933123588562, "learning_rate": 6.185567010309279e-06, "loss": 0.4396, "step": 228 }, { "epoch": 0.037291861743272404, "grad_norm": 0.7651435136795044, "learning_rate": 6.212696690179056e-06, "loss": 0.4547, "step": 229 }, { "epoch": 0.037454708301103284, "grad_norm": 0.626541793346405, "learning_rate": 6.239826370048834e-06, "loss": 0.4969, "step": 230 }, { "epoch": 0.03761755485893417, "grad_norm": 0.5517471432685852, "learning_rate": 6.266956049918611e-06, "loss": 0.4938, "step": 231 }, { "epoch": 0.03778040141676505, "grad_norm": 0.7295474410057068, "learning_rate": 6.294085729788389e-06, "loss": 0.4514, "step": 232 }, { "epoch": 0.03794324797459594, "grad_norm": 0.6409031748771667, "learning_rate": 6.321215409658167e-06, "loss": 0.5115, "step": 233 }, { "epoch": 0.03810609453242682, "grad_norm": 0.68667072057724, "learning_rate": 6.348345089527943e-06, "loss": 0.448, "step": 234 }, { "epoch": 0.038268941090257705, "grad_norm": 0.6231024861335754, "learning_rate": 6.375474769397721e-06, "loss": 0.5105, "step": 235 }, { "epoch": 0.03843178764808859, "grad_norm": 0.8421512246131897, "learning_rate": 6.402604449267499e-06, "loss": 0.5023, "step": 236 }, { "epoch": 0.03859463420591947, "grad_norm": 0.9058128595352173, "learning_rate": 6.429734129137277e-06, "loss": 0.4734, "step": 237 }, { "epoch": 0.03875748076375036, "grad_norm": 0.7274054288864136, "learning_rate": 6.4568638090070535e-06, "loss": 0.4417, "step": 238 }, { "epoch": 0.03892032732158124, "grad_norm": 0.7996021509170532, "learning_rate": 6.483993488876832e-06, "loss": 0.4476, "step": 239 }, { "epoch": 0.039083173879412125, "grad_norm": 0.9105180501937866, "learning_rate": 6.511123168746609e-06, "loss": 0.4322, "step": 240 }, { "epoch": 0.039246020437243005, "grad_norm": 0.6499965786933899, "learning_rate": 6.538252848616387e-06, "loss": 0.4726, "step": 241 }, { "epoch": 0.03940886699507389, "grad_norm": 1.2197163105010986, "learning_rate": 6.565382528486164e-06, "loss": 0.4929, "step": 242 }, { "epoch": 0.03957171355290478, "grad_norm": 0.6353127360343933, "learning_rate": 6.592512208355941e-06, "loss": 0.4624, "step": 243 }, { "epoch": 0.03973456011073566, "grad_norm": 0.822608232498169, "learning_rate": 6.6196418882257195e-06, "loss": 0.4828, "step": 244 }, { "epoch": 0.039897406668566546, "grad_norm": 0.722618043422699, "learning_rate": 6.646771568095498e-06, "loss": 0.4804, "step": 245 }, { "epoch": 0.040060253226397426, "grad_norm": 0.619535505771637, "learning_rate": 6.673901247965273e-06, "loss": 0.5294, "step": 246 }, { "epoch": 0.04022309978422831, "grad_norm": 0.8423619270324707, "learning_rate": 6.701030927835052e-06, "loss": 0.4916, "step": 247 }, { "epoch": 0.04038594634205919, "grad_norm": 0.5185026526451111, "learning_rate": 6.72816060770483e-06, "loss": 0.4539, "step": 248 }, { "epoch": 0.04054879289989008, "grad_norm": 0.5416414141654968, "learning_rate": 6.755290287574607e-06, "loss": 0.4587, "step": 249 }, { "epoch": 0.04071163945772096, "grad_norm": 0.5707468390464783, "learning_rate": 6.782419967444384e-06, "loss": 0.4683, "step": 250 }, { "epoch": 0.040874486015551846, "grad_norm": 0.611980676651001, "learning_rate": 6.809549647314162e-06, "loss": 0.4372, "step": 251 }, { "epoch": 0.04103733257338273, "grad_norm": 0.5954374670982361, "learning_rate": 6.836679327183939e-06, "loss": 0.4176, "step": 252 }, { "epoch": 0.04120017913121361, "grad_norm": 0.7146753668785095, "learning_rate": 6.8638090070537176e-06, "loss": 0.43, "step": 253 }, { "epoch": 0.0413630256890445, "grad_norm": 0.6391221880912781, "learning_rate": 6.890938686923494e-06, "loss": 0.4435, "step": 254 }, { "epoch": 0.04152587224687538, "grad_norm": 0.5305324792861938, "learning_rate": 6.918068366793272e-06, "loss": 0.5221, "step": 255 }, { "epoch": 0.041688718804706267, "grad_norm": 0.7098801732063293, "learning_rate": 6.94519804666305e-06, "loss": 0.4258, "step": 256 }, { "epoch": 0.041851565362537146, "grad_norm": 0.6782824397087097, "learning_rate": 6.972327726532828e-06, "loss": 0.4726, "step": 257 }, { "epoch": 0.04201441192036803, "grad_norm": 0.6792131066322327, "learning_rate": 6.9994574064026044e-06, "loss": 0.4944, "step": 258 }, { "epoch": 0.04217725847819892, "grad_norm": 0.6406328678131104, "learning_rate": 7.026587086272382e-06, "loss": 0.4779, "step": 259 }, { "epoch": 0.0423401050360298, "grad_norm": 0.7254859805107117, "learning_rate": 7.05371676614216e-06, "loss": 0.4892, "step": 260 }, { "epoch": 0.04250295159386069, "grad_norm": 0.5897133946418762, "learning_rate": 7.080846446011937e-06, "loss": 0.4813, "step": 261 }, { "epoch": 0.04266579815169157, "grad_norm": 0.6248916983604431, "learning_rate": 7.107976125881714e-06, "loss": 0.4628, "step": 262 }, { "epoch": 0.042828644709522454, "grad_norm": 0.8271237015724182, "learning_rate": 7.135105805751492e-06, "loss": 0.5748, "step": 263 }, { "epoch": 0.042991491267353334, "grad_norm": 0.6151827573776245, "learning_rate": 7.16223548562127e-06, "loss": 0.5631, "step": 264 }, { "epoch": 0.04315433782518422, "grad_norm": 0.607612133026123, "learning_rate": 7.189365165491048e-06, "loss": 0.5049, "step": 265 }, { "epoch": 0.04331718438301511, "grad_norm": 0.5959752798080444, "learning_rate": 7.216494845360824e-06, "loss": 0.4781, "step": 266 }, { "epoch": 0.04348003094084599, "grad_norm": 0.6844730377197266, "learning_rate": 7.2436245252306025e-06, "loss": 0.4524, "step": 267 }, { "epoch": 0.043642877498676874, "grad_norm": 0.643876314163208, "learning_rate": 7.27075420510038e-06, "loss": 0.5296, "step": 268 }, { "epoch": 0.043805724056507754, "grad_norm": 1.3424979448318481, "learning_rate": 7.297883884970158e-06, "loss": 0.5058, "step": 269 }, { "epoch": 0.04396857061433864, "grad_norm": 0.8885830640792847, "learning_rate": 7.325013564839935e-06, "loss": 0.4679, "step": 270 }, { "epoch": 0.04413141717216952, "grad_norm": 0.5922513604164124, "learning_rate": 7.352143244709713e-06, "loss": 0.4251, "step": 271 }, { "epoch": 0.04429426373000041, "grad_norm": 0.907986044883728, "learning_rate": 7.37927292457949e-06, "loss": 0.4947, "step": 272 }, { "epoch": 0.04445711028783129, "grad_norm": 0.7185990810394287, "learning_rate": 7.4064026044492685e-06, "loss": 0.4907, "step": 273 }, { "epoch": 0.044619956845662175, "grad_norm": 0.6395279765129089, "learning_rate": 7.433532284319046e-06, "loss": 0.4527, "step": 274 }, { "epoch": 0.04478280340349306, "grad_norm": 0.621222734451294, "learning_rate": 7.460661964188822e-06, "loss": 0.4676, "step": 275 }, { "epoch": 0.04494564996132394, "grad_norm": 0.9395396709442139, "learning_rate": 7.487791644058601e-06, "loss": 0.4427, "step": 276 }, { "epoch": 0.04510849651915483, "grad_norm": 0.4971648156642914, "learning_rate": 7.514921323928378e-06, "loss": 0.4273, "step": 277 }, { "epoch": 0.04527134307698571, "grad_norm": 0.61372971534729, "learning_rate": 7.542051003798156e-06, "loss": 0.4479, "step": 278 }, { "epoch": 0.045434189634816595, "grad_norm": 1.1608730554580688, "learning_rate": 7.569180683667933e-06, "loss": 0.4877, "step": 279 }, { "epoch": 0.045597036192647475, "grad_norm": 0.510806143283844, "learning_rate": 7.596310363537711e-06, "loss": 0.4204, "step": 280 }, { "epoch": 0.04575988275047836, "grad_norm": 0.7103354334831238, "learning_rate": 7.623440043407488e-06, "loss": 0.4163, "step": 281 }, { "epoch": 0.04592272930830925, "grad_norm": 0.5354247689247131, "learning_rate": 7.650569723277266e-06, "loss": 0.4669, "step": 282 }, { "epoch": 0.04608557586614013, "grad_norm": 0.7159066200256348, "learning_rate": 7.677699403147042e-06, "loss": 0.4894, "step": 283 }, { "epoch": 0.046248422423971015, "grad_norm": 0.684296727180481, "learning_rate": 7.70482908301682e-06, "loss": 0.4127, "step": 284 }, { "epoch": 0.046411268981801895, "grad_norm": 0.5676387548446655, "learning_rate": 7.731958762886599e-06, "loss": 0.4639, "step": 285 }, { "epoch": 0.04657411553963278, "grad_norm": 0.6425021886825562, "learning_rate": 7.759088442756377e-06, "loss": 0.454, "step": 286 }, { "epoch": 0.04673696209746366, "grad_norm": 0.6307088732719421, "learning_rate": 7.786218122626153e-06, "loss": 0.4768, "step": 287 }, { "epoch": 0.04689980865529455, "grad_norm": 0.9940255284309387, "learning_rate": 7.813347802495932e-06, "loss": 0.4429, "step": 288 }, { "epoch": 0.047062655213125436, "grad_norm": 0.6097337007522583, "learning_rate": 7.840477482365708e-06, "loss": 0.4658, "step": 289 }, { "epoch": 0.047225501770956316, "grad_norm": 0.5663825869560242, "learning_rate": 7.867607162235486e-06, "loss": 0.4259, "step": 290 }, { "epoch": 0.0473883483287872, "grad_norm": 0.7673112750053406, "learning_rate": 7.894736842105263e-06, "loss": 0.4576, "step": 291 }, { "epoch": 0.04755119488661808, "grad_norm": 0.731289803981781, "learning_rate": 7.921866521975041e-06, "loss": 0.4549, "step": 292 }, { "epoch": 0.04771404144444897, "grad_norm": 0.7219842076301575, "learning_rate": 7.94899620184482e-06, "loss": 0.4533, "step": 293 }, { "epoch": 0.04787688800227985, "grad_norm": 0.6791386008262634, "learning_rate": 7.976125881714598e-06, "loss": 0.479, "step": 294 }, { "epoch": 0.048039734560110736, "grad_norm": 0.5842326283454895, "learning_rate": 8.003255561584372e-06, "loss": 0.4503, "step": 295 }, { "epoch": 0.048202581117941616, "grad_norm": 0.8386819958686829, "learning_rate": 8.03038524145415e-06, "loss": 0.4981, "step": 296 }, { "epoch": 0.0483654276757725, "grad_norm": 0.6501251459121704, "learning_rate": 8.057514921323929e-06, "loss": 0.5026, "step": 297 }, { "epoch": 0.04852827423360339, "grad_norm": 0.5321676135063171, "learning_rate": 8.084644601193707e-06, "loss": 0.4252, "step": 298 }, { "epoch": 0.04869112079143427, "grad_norm": 0.6365773677825928, "learning_rate": 8.111774281063484e-06, "loss": 0.5056, "step": 299 }, { "epoch": 0.04885396734926516, "grad_norm": 0.6534734964370728, "learning_rate": 8.138903960933262e-06, "loss": 0.4418, "step": 300 }, { "epoch": 0.049016813907096037, "grad_norm": 0.6935795545578003, "learning_rate": 8.166033640803038e-06, "loss": 0.5208, "step": 301 }, { "epoch": 0.04917966046492692, "grad_norm": 0.577378511428833, "learning_rate": 8.193163320672817e-06, "loss": 0.5027, "step": 302 }, { "epoch": 0.0493425070227578, "grad_norm": 0.7271364331245422, "learning_rate": 8.220293000542593e-06, "loss": 0.4656, "step": 303 }, { "epoch": 0.04950535358058869, "grad_norm": 0.5604428648948669, "learning_rate": 8.247422680412371e-06, "loss": 0.4217, "step": 304 }, { "epoch": 0.04966820013841958, "grad_norm": 0.6057191491127014, "learning_rate": 8.27455236028215e-06, "loss": 0.4959, "step": 305 }, { "epoch": 0.04983104669625046, "grad_norm": 0.5789348483085632, "learning_rate": 8.301682040151928e-06, "loss": 0.5, "step": 306 }, { "epoch": 0.049993893254081344, "grad_norm": 0.5821237564086914, "learning_rate": 8.328811720021704e-06, "loss": 0.4466, "step": 307 }, { "epoch": 0.050156739811912224, "grad_norm": 0.526866614818573, "learning_rate": 8.35594139989148e-06, "loss": 0.4773, "step": 308 }, { "epoch": 0.05031958636974311, "grad_norm": 0.6027454137802124, "learning_rate": 8.383071079761259e-06, "loss": 0.462, "step": 309 }, { "epoch": 0.05048243292757399, "grad_norm": 0.8254793286323547, "learning_rate": 8.410200759631037e-06, "loss": 0.4518, "step": 310 }, { "epoch": 0.05064527948540488, "grad_norm": 0.6471713781356812, "learning_rate": 8.437330439500814e-06, "loss": 0.4616, "step": 311 }, { "epoch": 0.050808126043235764, "grad_norm": 0.9380389451980591, "learning_rate": 8.464460119370592e-06, "loss": 0.4599, "step": 312 }, { "epoch": 0.050970972601066644, "grad_norm": 0.5876234769821167, "learning_rate": 8.49158979924037e-06, "loss": 0.5111, "step": 313 }, { "epoch": 0.05113381915889753, "grad_norm": 0.5192023515701294, "learning_rate": 8.518719479110147e-06, "loss": 0.4473, "step": 314 }, { "epoch": 0.05129666571672841, "grad_norm": 0.5165553092956543, "learning_rate": 8.545849158979923e-06, "loss": 0.4468, "step": 315 }, { "epoch": 0.0514595122745593, "grad_norm": 0.7417059540748596, "learning_rate": 8.572978838849702e-06, "loss": 0.4559, "step": 316 }, { "epoch": 0.05162235883239018, "grad_norm": 0.7032507658004761, "learning_rate": 8.60010851871948e-06, "loss": 0.4837, "step": 317 }, { "epoch": 0.051785205390221065, "grad_norm": 0.5971809029579163, "learning_rate": 8.627238198589258e-06, "loss": 0.4851, "step": 318 }, { "epoch": 0.05194805194805195, "grad_norm": 0.8667622208595276, "learning_rate": 8.654367878459035e-06, "loss": 0.5016, "step": 319 }, { "epoch": 0.05211089850588283, "grad_norm": 0.5765315294265747, "learning_rate": 8.681497558328813e-06, "loss": 0.3972, "step": 320 }, { "epoch": 0.05227374506371372, "grad_norm": 0.5937551259994507, "learning_rate": 8.70862723819859e-06, "loss": 0.4361, "step": 321 }, { "epoch": 0.0524365916215446, "grad_norm": 0.6407818794250488, "learning_rate": 8.735756918068367e-06, "loss": 0.435, "step": 322 }, { "epoch": 0.052599438179375485, "grad_norm": 0.6914768815040588, "learning_rate": 8.762886597938144e-06, "loss": 0.5435, "step": 323 }, { "epoch": 0.052762284737206365, "grad_norm": 0.7002124190330505, "learning_rate": 8.790016277807922e-06, "loss": 0.4966, "step": 324 }, { "epoch": 0.05292513129503725, "grad_norm": 0.5748800039291382, "learning_rate": 8.8171459576777e-06, "loss": 0.4373, "step": 325 }, { "epoch": 0.05308797785286813, "grad_norm": 0.5902935266494751, "learning_rate": 8.844275637547477e-06, "loss": 0.4458, "step": 326 }, { "epoch": 0.05325082441069902, "grad_norm": 0.6706932187080383, "learning_rate": 8.871405317417254e-06, "loss": 0.4839, "step": 327 }, { "epoch": 0.053413670968529905, "grad_norm": 0.6194947361946106, "learning_rate": 8.898534997287032e-06, "loss": 0.4312, "step": 328 }, { "epoch": 0.053576517526360785, "grad_norm": 0.9821650981903076, "learning_rate": 8.92566467715681e-06, "loss": 0.4642, "step": 329 }, { "epoch": 0.05373936408419167, "grad_norm": 0.6286726593971252, "learning_rate": 8.952794357026588e-06, "loss": 0.4455, "step": 330 }, { "epoch": 0.05390221064202255, "grad_norm": 1.1146910190582275, "learning_rate": 8.979924036896365e-06, "loss": 0.3993, "step": 331 }, { "epoch": 0.05406505719985344, "grad_norm": 0.7950713634490967, "learning_rate": 9.007053716766143e-06, "loss": 0.4433, "step": 332 }, { "epoch": 0.05422790375768432, "grad_norm": 0.7471950650215149, "learning_rate": 9.03418339663592e-06, "loss": 0.4351, "step": 333 }, { "epoch": 0.054390750315515206, "grad_norm": 0.8842043280601501, "learning_rate": 9.061313076505698e-06, "loss": 0.4371, "step": 334 }, { "epoch": 0.05455359687334609, "grad_norm": 0.737748384475708, "learning_rate": 9.088442756375474e-06, "loss": 0.4134, "step": 335 }, { "epoch": 0.05471644343117697, "grad_norm": 0.59358149766922, "learning_rate": 9.115572436245252e-06, "loss": 0.4223, "step": 336 }, { "epoch": 0.05487928998900786, "grad_norm": 0.9797989130020142, "learning_rate": 9.14270211611503e-06, "loss": 0.4231, "step": 337 }, { "epoch": 0.05504213654683874, "grad_norm": 0.5950849652290344, "learning_rate": 9.169831795984809e-06, "loss": 0.4397, "step": 338 }, { "epoch": 0.055204983104669626, "grad_norm": 0.6392645239830017, "learning_rate": 9.196961475854585e-06, "loss": 0.4428, "step": 339 }, { "epoch": 0.055367829662500506, "grad_norm": 1.287197232246399, "learning_rate": 9.224091155724362e-06, "loss": 0.4643, "step": 340 }, { "epoch": 0.05553067622033139, "grad_norm": 0.5639732480049133, "learning_rate": 9.25122083559414e-06, "loss": 0.442, "step": 341 }, { "epoch": 0.05569352277816228, "grad_norm": 0.7290991544723511, "learning_rate": 9.278350515463918e-06, "loss": 0.4633, "step": 342 }, { "epoch": 0.05585636933599316, "grad_norm": 0.5327490568161011, "learning_rate": 9.305480195333697e-06, "loss": 0.4498, "step": 343 }, { "epoch": 0.05601921589382405, "grad_norm": 1.1458865404129028, "learning_rate": 9.332609875203473e-06, "loss": 0.5056, "step": 344 }, { "epoch": 0.05618206245165493, "grad_norm": 0.9847500324249268, "learning_rate": 9.359739555073251e-06, "loss": 0.4599, "step": 345 }, { "epoch": 0.056344909009485813, "grad_norm": 0.5758962631225586, "learning_rate": 9.386869234943028e-06, "loss": 0.4042, "step": 346 }, { "epoch": 0.05650775556731669, "grad_norm": 0.8044791221618652, "learning_rate": 9.413998914812806e-06, "loss": 0.4649, "step": 347 }, { "epoch": 0.05667060212514758, "grad_norm": 0.5179723501205444, "learning_rate": 9.441128594682583e-06, "loss": 0.4301, "step": 348 }, { "epoch": 0.05683344868297846, "grad_norm": 0.7130536437034607, "learning_rate": 9.468258274552361e-06, "loss": 0.4493, "step": 349 }, { "epoch": 0.05699629524080935, "grad_norm": 1.1976193189620972, "learning_rate": 9.495387954422139e-06, "loss": 0.4601, "step": 350 }, { "epoch": 0.057159141798640234, "grad_norm": 0.7009406685829163, "learning_rate": 9.522517634291916e-06, "loss": 0.4422, "step": 351 }, { "epoch": 0.057321988356471114, "grad_norm": 0.8528466820716858, "learning_rate": 9.549647314161692e-06, "loss": 0.4344, "step": 352 }, { "epoch": 0.057484834914302, "grad_norm": 1.0654817819595337, "learning_rate": 9.57677699403147e-06, "loss": 0.4495, "step": 353 }, { "epoch": 0.05764768147213288, "grad_norm": 0.6958267688751221, "learning_rate": 9.603906673901249e-06, "loss": 0.4852, "step": 354 }, { "epoch": 0.05781052802996377, "grad_norm": 0.8503287434577942, "learning_rate": 9.631036353771027e-06, "loss": 0.4448, "step": 355 }, { "epoch": 0.05797337458779465, "grad_norm": 0.820824146270752, "learning_rate": 9.658166033640803e-06, "loss": 0.4693, "step": 356 }, { "epoch": 0.058136221145625534, "grad_norm": 0.8214882016181946, "learning_rate": 9.685295713510582e-06, "loss": 0.4439, "step": 357 }, { "epoch": 0.05829906770345642, "grad_norm": 0.5194812417030334, "learning_rate": 9.712425393380358e-06, "loss": 0.4407, "step": 358 }, { "epoch": 0.0584619142612873, "grad_norm": 0.812254786491394, "learning_rate": 9.739555073250136e-06, "loss": 0.4496, "step": 359 }, { "epoch": 0.05862476081911819, "grad_norm": 0.5245411992073059, "learning_rate": 9.766684753119913e-06, "loss": 0.4783, "step": 360 }, { "epoch": 0.05878760737694907, "grad_norm": 0.7796361446380615, "learning_rate": 9.793814432989691e-06, "loss": 0.438, "step": 361 }, { "epoch": 0.058950453934779955, "grad_norm": 0.6453011631965637, "learning_rate": 9.82094411285947e-06, "loss": 0.4425, "step": 362 }, { "epoch": 0.059113300492610835, "grad_norm": 0.6767668724060059, "learning_rate": 9.848073792729248e-06, "loss": 0.4752, "step": 363 }, { "epoch": 0.05927614705044172, "grad_norm": 0.6700632572174072, "learning_rate": 9.875203472599024e-06, "loss": 0.412, "step": 364 }, { "epoch": 0.05943899360827261, "grad_norm": 0.6000019311904907, "learning_rate": 9.9023331524688e-06, "loss": 0.4061, "step": 365 }, { "epoch": 0.05960184016610349, "grad_norm": 1.016812801361084, "learning_rate": 9.929462832338579e-06, "loss": 0.4472, "step": 366 }, { "epoch": 0.059764686723934375, "grad_norm": 0.7198256850242615, "learning_rate": 9.956592512208357e-06, "loss": 0.4755, "step": 367 }, { "epoch": 0.059927533281765255, "grad_norm": 0.5549175143241882, "learning_rate": 9.983722192078134e-06, "loss": 0.4414, "step": 368 }, { "epoch": 0.06009037983959614, "grad_norm": 1.6324551105499268, "learning_rate": 1.0010851871947912e-05, "loss": 0.4598, "step": 369 }, { "epoch": 0.06025322639742702, "grad_norm": 0.6527844667434692, "learning_rate": 1.003798155181769e-05, "loss": 0.4184, "step": 370 }, { "epoch": 0.06041607295525791, "grad_norm": 1.6401400566101074, "learning_rate": 1.0065111231687467e-05, "loss": 0.4446, "step": 371 }, { "epoch": 0.06057891951308879, "grad_norm": 1.3242506980895996, "learning_rate": 1.0092240911557243e-05, "loss": 0.422, "step": 372 }, { "epoch": 0.060741766070919676, "grad_norm": 0.6144616603851318, "learning_rate": 1.0119370591427021e-05, "loss": 0.4382, "step": 373 }, { "epoch": 0.06090461262875056, "grad_norm": 0.5817995071411133, "learning_rate": 1.01465002712968e-05, "loss": 0.4626, "step": 374 }, { "epoch": 0.06106745918658144, "grad_norm": 0.5641562342643738, "learning_rate": 1.0173629951166578e-05, "loss": 0.4772, "step": 375 }, { "epoch": 0.06123030574441233, "grad_norm": 0.5378983020782471, "learning_rate": 1.0200759631036354e-05, "loss": 0.4644, "step": 376 }, { "epoch": 0.06139315230224321, "grad_norm": 0.6041852831840515, "learning_rate": 1.022788931090613e-05, "loss": 0.4782, "step": 377 }, { "epoch": 0.061555998860074096, "grad_norm": 0.8906572461128235, "learning_rate": 1.0255018990775909e-05, "loss": 0.4523, "step": 378 }, { "epoch": 0.061718845417904976, "grad_norm": 0.6204880475997925, "learning_rate": 1.0282148670645687e-05, "loss": 0.4728, "step": 379 }, { "epoch": 0.06188169197573586, "grad_norm": 0.9811108708381653, "learning_rate": 1.0309278350515464e-05, "loss": 0.3949, "step": 380 }, { "epoch": 0.06204453853356675, "grad_norm": 0.6722950339317322, "learning_rate": 1.0336408030385242e-05, "loss": 0.4328, "step": 381 }, { "epoch": 0.06220738509139763, "grad_norm": 0.7572372555732727, "learning_rate": 1.036353771025502e-05, "loss": 0.4298, "step": 382 }, { "epoch": 0.062370231649228516, "grad_norm": 0.6283131837844849, "learning_rate": 1.0390667390124797e-05, "loss": 0.5081, "step": 383 }, { "epoch": 0.0625330782070594, "grad_norm": 0.6959724426269531, "learning_rate": 1.0417797069994573e-05, "loss": 0.4471, "step": 384 }, { "epoch": 0.06269592476489028, "grad_norm": 0.64736407995224, "learning_rate": 1.0444926749864351e-05, "loss": 0.4442, "step": 385 }, { "epoch": 0.06285877132272116, "grad_norm": 0.6154904365539551, "learning_rate": 1.047205642973413e-05, "loss": 0.4472, "step": 386 }, { "epoch": 0.06302161788055205, "grad_norm": 0.8106939792633057, "learning_rate": 1.0499186109603908e-05, "loss": 0.3882, "step": 387 }, { "epoch": 0.06318446443838294, "grad_norm": 0.792733371257782, "learning_rate": 1.0526315789473684e-05, "loss": 0.4305, "step": 388 }, { "epoch": 0.06334731099621382, "grad_norm": 0.5996662974357605, "learning_rate": 1.0553445469343463e-05, "loss": 0.4496, "step": 389 }, { "epoch": 0.0635101575540447, "grad_norm": 0.6358858942985535, "learning_rate": 1.058057514921324e-05, "loss": 0.4324, "step": 390 }, { "epoch": 0.06367300411187558, "grad_norm": 0.5721731185913086, "learning_rate": 1.0607704829083017e-05, "loss": 0.4218, "step": 391 }, { "epoch": 0.06383585066970647, "grad_norm": 0.6834592223167419, "learning_rate": 1.0634834508952794e-05, "loss": 0.4513, "step": 392 }, { "epoch": 0.06399869722753736, "grad_norm": 0.6970665454864502, "learning_rate": 1.0661964188822572e-05, "loss": 0.491, "step": 393 }, { "epoch": 0.06416154378536823, "grad_norm": 0.5554401874542236, "learning_rate": 1.068909386869235e-05, "loss": 0.4133, "step": 394 }, { "epoch": 0.06432439034319912, "grad_norm": 0.6865509152412415, "learning_rate": 1.0716223548562129e-05, "loss": 0.4442, "step": 395 }, { "epoch": 0.06448723690103, "grad_norm": 0.5768316984176636, "learning_rate": 1.0743353228431905e-05, "loss": 0.4341, "step": 396 }, { "epoch": 0.06465008345886089, "grad_norm": 0.5781079530715942, "learning_rate": 1.0770482908301682e-05, "loss": 0.4644, "step": 397 }, { "epoch": 0.06481293001669178, "grad_norm": 0.5931691527366638, "learning_rate": 1.079761258817146e-05, "loss": 0.4735, "step": 398 }, { "epoch": 0.06497577657452265, "grad_norm": 0.7162618637084961, "learning_rate": 1.0824742268041238e-05, "loss": 0.4139, "step": 399 }, { "epoch": 0.06513862313235354, "grad_norm": 0.598385214805603, "learning_rate": 1.0851871947911015e-05, "loss": 0.4481, "step": 400 }, { "epoch": 0.06530146969018442, "grad_norm": 0.5640032887458801, "learning_rate": 1.0879001627780793e-05, "loss": 0.4261, "step": 401 }, { "epoch": 0.06546431624801531, "grad_norm": 0.6532065272331238, "learning_rate": 1.0906131307650571e-05, "loss": 0.495, "step": 402 }, { "epoch": 0.0656271628058462, "grad_norm": 0.4986441135406494, "learning_rate": 1.0933260987520348e-05, "loss": 0.42, "step": 403 }, { "epoch": 0.06579000936367707, "grad_norm": 0.5485318899154663, "learning_rate": 1.0960390667390124e-05, "loss": 0.4333, "step": 404 }, { "epoch": 0.06595285592150796, "grad_norm": 0.5313650369644165, "learning_rate": 1.0987520347259902e-05, "loss": 0.4197, "step": 405 }, { "epoch": 0.06611570247933884, "grad_norm": 0.5656766295433044, "learning_rate": 1.101465002712968e-05, "loss": 0.448, "step": 406 }, { "epoch": 0.06627854903716973, "grad_norm": 0.5647386312484741, "learning_rate": 1.1041779706999459e-05, "loss": 0.4402, "step": 407 }, { "epoch": 0.0664413955950006, "grad_norm": 0.5141014456748962, "learning_rate": 1.1068909386869235e-05, "loss": 0.4115, "step": 408 }, { "epoch": 0.06660424215283149, "grad_norm": 0.610812246799469, "learning_rate": 1.1096039066739012e-05, "loss": 0.4742, "step": 409 }, { "epoch": 0.06676708871066238, "grad_norm": 0.6675206422805786, "learning_rate": 1.112316874660879e-05, "loss": 0.4522, "step": 410 }, { "epoch": 0.06692993526849327, "grad_norm": 1.7142959833145142, "learning_rate": 1.1150298426478568e-05, "loss": 0.4447, "step": 411 }, { "epoch": 0.06709278182632415, "grad_norm": 0.5319316983222961, "learning_rate": 1.1177428106348347e-05, "loss": 0.4244, "step": 412 }, { "epoch": 0.06725562838415503, "grad_norm": 0.46737125515937805, "learning_rate": 1.1204557786218123e-05, "loss": 0.4324, "step": 413 }, { "epoch": 0.06741847494198591, "grad_norm": 0.635084867477417, "learning_rate": 1.1231687466087901e-05, "loss": 0.4269, "step": 414 }, { "epoch": 0.0675813214998168, "grad_norm": 0.5984594821929932, "learning_rate": 1.1258817145957678e-05, "loss": 0.3946, "step": 415 }, { "epoch": 0.06774416805764769, "grad_norm": 0.6167760491371155, "learning_rate": 1.1285946825827456e-05, "loss": 0.3864, "step": 416 }, { "epoch": 0.06790701461547856, "grad_norm": 0.5718551278114319, "learning_rate": 1.1313076505697233e-05, "loss": 0.4687, "step": 417 }, { "epoch": 0.06806986117330945, "grad_norm": 0.8759437203407288, "learning_rate": 1.134020618556701e-05, "loss": 0.4045, "step": 418 }, { "epoch": 0.06823270773114033, "grad_norm": 0.5454111695289612, "learning_rate": 1.1367335865436789e-05, "loss": 0.4138, "step": 419 }, { "epoch": 0.06839555428897122, "grad_norm": 0.6960414052009583, "learning_rate": 1.1394465545306567e-05, "loss": 0.4334, "step": 420 }, { "epoch": 0.0685584008468021, "grad_norm": 0.9086368680000305, "learning_rate": 1.1421595225176344e-05, "loss": 0.3976, "step": 421 }, { "epoch": 0.06872124740463298, "grad_norm": 0.5824621915817261, "learning_rate": 1.144872490504612e-05, "loss": 0.4166, "step": 422 }, { "epoch": 0.06888409396246387, "grad_norm": 0.9047931432723999, "learning_rate": 1.1475854584915899e-05, "loss": 0.3818, "step": 423 }, { "epoch": 0.06904694052029475, "grad_norm": 0.5546779632568359, "learning_rate": 1.1502984264785677e-05, "loss": 0.4216, "step": 424 }, { "epoch": 0.06920978707812564, "grad_norm": 0.5461775660514832, "learning_rate": 1.1530113944655453e-05, "loss": 0.4503, "step": 425 }, { "epoch": 0.06937263363595653, "grad_norm": 0.6295607686042786, "learning_rate": 1.1557243624525231e-05, "loss": 0.4776, "step": 426 }, { "epoch": 0.0695354801937874, "grad_norm": 0.47105422616004944, "learning_rate": 1.158437330439501e-05, "loss": 0.431, "step": 427 }, { "epoch": 0.06969832675161829, "grad_norm": 0.531147837638855, "learning_rate": 1.1611502984264786e-05, "loss": 0.435, "step": 428 }, { "epoch": 0.06986117330944917, "grad_norm": 0.5928906202316284, "learning_rate": 1.1638632664134563e-05, "loss": 0.3904, "step": 429 }, { "epoch": 0.07002401986728006, "grad_norm": 0.5501102209091187, "learning_rate": 1.1665762344004341e-05, "loss": 0.4252, "step": 430 }, { "epoch": 0.07018686642511093, "grad_norm": 0.5222375392913818, "learning_rate": 1.169289202387412e-05, "loss": 0.4418, "step": 431 }, { "epoch": 0.07034971298294182, "grad_norm": 0.6432193517684937, "learning_rate": 1.1720021703743897e-05, "loss": 0.4351, "step": 432 }, { "epoch": 0.0705125595407727, "grad_norm": 0.5189712047576904, "learning_rate": 1.1747151383613674e-05, "loss": 0.4023, "step": 433 }, { "epoch": 0.0706754060986036, "grad_norm": 0.45558828115463257, "learning_rate": 1.177428106348345e-05, "loss": 0.4526, "step": 434 }, { "epoch": 0.07083825265643448, "grad_norm": 0.5408909320831299, "learning_rate": 1.1801410743353229e-05, "loss": 0.4087, "step": 435 }, { "epoch": 0.07100109921426535, "grad_norm": 0.5220383405685425, "learning_rate": 1.1828540423223007e-05, "loss": 0.3935, "step": 436 }, { "epoch": 0.07116394577209624, "grad_norm": 0.5210184454917908, "learning_rate": 1.1855670103092783e-05, "loss": 0.4466, "step": 437 }, { "epoch": 0.07132679232992713, "grad_norm": 0.6030948758125305, "learning_rate": 1.1882799782962562e-05, "loss": 0.4036, "step": 438 }, { "epoch": 0.07148963888775801, "grad_norm": 0.5239824652671814, "learning_rate": 1.190992946283234e-05, "loss": 0.4293, "step": 439 }, { "epoch": 0.07165248544558889, "grad_norm": 0.5320562124252319, "learning_rate": 1.1937059142702116e-05, "loss": 0.4624, "step": 440 }, { "epoch": 0.07181533200341977, "grad_norm": 0.6079339385032654, "learning_rate": 1.1964188822571893e-05, "loss": 0.4417, "step": 441 }, { "epoch": 0.07197817856125066, "grad_norm": 0.4246017634868622, "learning_rate": 1.1991318502441671e-05, "loss": 0.4072, "step": 442 }, { "epoch": 0.07214102511908155, "grad_norm": 0.5505573749542236, "learning_rate": 1.201844818231145e-05, "loss": 0.4305, "step": 443 }, { "epoch": 0.07230387167691243, "grad_norm": 0.6111746430397034, "learning_rate": 1.2045577862181228e-05, "loss": 0.4441, "step": 444 }, { "epoch": 0.07246671823474331, "grad_norm": 0.6193729639053345, "learning_rate": 1.2072707542051004e-05, "loss": 0.4966, "step": 445 }, { "epoch": 0.0726295647925742, "grad_norm": 0.5446475744247437, "learning_rate": 1.2099837221920782e-05, "loss": 0.4381, "step": 446 }, { "epoch": 0.07279241135040508, "grad_norm": 0.6640734672546387, "learning_rate": 1.2126966901790559e-05, "loss": 0.422, "step": 447 }, { "epoch": 0.07295525790823597, "grad_norm": 0.5329530835151672, "learning_rate": 1.2154096581660337e-05, "loss": 0.4056, "step": 448 }, { "epoch": 0.07311810446606685, "grad_norm": 0.6066680550575256, "learning_rate": 1.2181226261530114e-05, "loss": 0.4683, "step": 449 }, { "epoch": 0.07328095102389773, "grad_norm": 0.525145947933197, "learning_rate": 1.2208355941399892e-05, "loss": 0.4343, "step": 450 }, { "epoch": 0.07344379758172861, "grad_norm": 0.6634090542793274, "learning_rate": 1.223548562126967e-05, "loss": 0.3897, "step": 451 }, { "epoch": 0.0736066441395595, "grad_norm": 0.48484981060028076, "learning_rate": 1.2262615301139448e-05, "loss": 0.4217, "step": 452 }, { "epoch": 0.07376949069739039, "grad_norm": 0.6476560831069946, "learning_rate": 1.2289744981009225e-05, "loss": 0.5115, "step": 453 }, { "epoch": 0.07393233725522126, "grad_norm": 0.564330518245697, "learning_rate": 1.2316874660879001e-05, "loss": 0.441, "step": 454 }, { "epoch": 0.07409518381305215, "grad_norm": 0.6116547584533691, "learning_rate": 1.234400434074878e-05, "loss": 0.4829, "step": 455 }, { "epoch": 0.07425803037088304, "grad_norm": 0.6581254601478577, "learning_rate": 1.2371134020618558e-05, "loss": 0.4583, "step": 456 }, { "epoch": 0.07442087692871392, "grad_norm": 0.6788060665130615, "learning_rate": 1.2398263700488334e-05, "loss": 0.4055, "step": 457 }, { "epoch": 0.07458372348654481, "grad_norm": 0.4725711941719055, "learning_rate": 1.2425393380358113e-05, "loss": 0.3969, "step": 458 }, { "epoch": 0.07474657004437568, "grad_norm": 0.6787484288215637, "learning_rate": 1.245252306022789e-05, "loss": 0.4341, "step": 459 }, { "epoch": 0.07490941660220657, "grad_norm": 0.6104402542114258, "learning_rate": 1.2479652740097667e-05, "loss": 0.4841, "step": 460 }, { "epoch": 0.07507226316003746, "grad_norm": 0.5991618633270264, "learning_rate": 1.2506782419967444e-05, "loss": 0.4508, "step": 461 }, { "epoch": 0.07523510971786834, "grad_norm": 0.5182953476905823, "learning_rate": 1.2533912099837222e-05, "loss": 0.4719, "step": 462 }, { "epoch": 0.07539795627569922, "grad_norm": 0.5736607313156128, "learning_rate": 1.2561041779707e-05, "loss": 0.3852, "step": 463 }, { "epoch": 0.0755608028335301, "grad_norm": 0.665970504283905, "learning_rate": 1.2588171459576779e-05, "loss": 0.4153, "step": 464 }, { "epoch": 0.07572364939136099, "grad_norm": 1.389019250869751, "learning_rate": 1.2615301139446555e-05, "loss": 0.4514, "step": 465 }, { "epoch": 0.07588649594919188, "grad_norm": 0.47916463017463684, "learning_rate": 1.2642430819316333e-05, "loss": 0.4471, "step": 466 }, { "epoch": 0.07604934250702276, "grad_norm": 0.5653663277626038, "learning_rate": 1.2669560499186108e-05, "loss": 0.4012, "step": 467 }, { "epoch": 0.07621218906485364, "grad_norm": 0.6960974931716919, "learning_rate": 1.2696690179055886e-05, "loss": 0.428, "step": 468 }, { "epoch": 0.07637503562268452, "grad_norm": 0.5677073001861572, "learning_rate": 1.2723819858925665e-05, "loss": 0.3916, "step": 469 }, { "epoch": 0.07653788218051541, "grad_norm": 0.6383947730064392, "learning_rate": 1.2750949538795443e-05, "loss": 0.4115, "step": 470 }, { "epoch": 0.0767007287383463, "grad_norm": 0.5309544205665588, "learning_rate": 1.2778079218665221e-05, "loss": 0.4342, "step": 471 }, { "epoch": 0.07686357529617718, "grad_norm": 0.7472166419029236, "learning_rate": 1.2805208898534998e-05, "loss": 0.4501, "step": 472 }, { "epoch": 0.07702642185400806, "grad_norm": 0.6476346850395203, "learning_rate": 1.2832338578404776e-05, "loss": 0.3903, "step": 473 }, { "epoch": 0.07718926841183894, "grad_norm": 0.5812830328941345, "learning_rate": 1.2859468258274554e-05, "loss": 0.3961, "step": 474 }, { "epoch": 0.07735211496966983, "grad_norm": 0.8316624164581299, "learning_rate": 1.2886597938144329e-05, "loss": 0.4457, "step": 475 }, { "epoch": 0.07751496152750072, "grad_norm": 0.6649542450904846, "learning_rate": 1.2913727618014107e-05, "loss": 0.4198, "step": 476 }, { "epoch": 0.07767780808533159, "grad_norm": 0.5783931016921997, "learning_rate": 1.2940857297883885e-05, "loss": 0.3952, "step": 477 }, { "epoch": 0.07784065464316248, "grad_norm": 0.499863862991333, "learning_rate": 1.2967986977753664e-05, "loss": 0.4698, "step": 478 }, { "epoch": 0.07800350120099336, "grad_norm": 0.5399532914161682, "learning_rate": 1.299511665762344e-05, "loss": 0.4116, "step": 479 }, { "epoch": 0.07816634775882425, "grad_norm": 0.6640229821205139, "learning_rate": 1.3022246337493218e-05, "loss": 0.4178, "step": 480 }, { "epoch": 0.07832919431665514, "grad_norm": 0.5005292892456055, "learning_rate": 1.3049376017362996e-05, "loss": 0.4268, "step": 481 }, { "epoch": 0.07849204087448601, "grad_norm": 0.6086416244506836, "learning_rate": 1.3076505697232775e-05, "loss": 0.4455, "step": 482 }, { "epoch": 0.0786548874323169, "grad_norm": 0.70412677526474, "learning_rate": 1.3103635377102553e-05, "loss": 0.4854, "step": 483 }, { "epoch": 0.07881773399014778, "grad_norm": 0.5630834102630615, "learning_rate": 1.3130765056972328e-05, "loss": 0.4664, "step": 484 }, { "epoch": 0.07898058054797867, "grad_norm": 0.6632741093635559, "learning_rate": 1.3157894736842106e-05, "loss": 0.4387, "step": 485 }, { "epoch": 0.07914342710580956, "grad_norm": 0.6159310936927795, "learning_rate": 1.3185024416711883e-05, "loss": 0.4322, "step": 486 }, { "epoch": 0.07930627366364043, "grad_norm": 0.9145199656486511, "learning_rate": 1.321215409658166e-05, "loss": 0.403, "step": 487 }, { "epoch": 0.07946912022147132, "grad_norm": 0.5063183903694153, "learning_rate": 1.3239283776451439e-05, "loss": 0.4002, "step": 488 }, { "epoch": 0.0796319667793022, "grad_norm": 0.5251714587211609, "learning_rate": 1.3266413456321217e-05, "loss": 0.4037, "step": 489 }, { "epoch": 0.07979481333713309, "grad_norm": 0.7072340250015259, "learning_rate": 1.3293543136190995e-05, "loss": 0.4331, "step": 490 }, { "epoch": 0.07995765989496396, "grad_norm": 0.6571031808853149, "learning_rate": 1.3320672816060772e-05, "loss": 0.445, "step": 491 }, { "epoch": 0.08012050645279485, "grad_norm": 0.48848631978034973, "learning_rate": 1.3347802495930547e-05, "loss": 0.4523, "step": 492 }, { "epoch": 0.08028335301062574, "grad_norm": 0.4750298261642456, "learning_rate": 1.3374932175800325e-05, "loss": 0.399, "step": 493 }, { "epoch": 0.08044619956845663, "grad_norm": 0.6758391261100769, "learning_rate": 1.3402061855670103e-05, "loss": 0.4293, "step": 494 }, { "epoch": 0.08060904612628751, "grad_norm": 0.5078654289245605, "learning_rate": 1.3429191535539881e-05, "loss": 0.418, "step": 495 }, { "epoch": 0.08077189268411838, "grad_norm": 0.5405555367469788, "learning_rate": 1.345632121540966e-05, "loss": 0.4885, "step": 496 }, { "epoch": 0.08093473924194927, "grad_norm": 0.648765504360199, "learning_rate": 1.3483450895279436e-05, "loss": 0.4306, "step": 497 }, { "epoch": 0.08109758579978016, "grad_norm": 0.5382432341575623, "learning_rate": 1.3510580575149214e-05, "loss": 0.3958, "step": 498 }, { "epoch": 0.08126043235761105, "grad_norm": 0.5347074866294861, "learning_rate": 1.3537710255018993e-05, "loss": 0.4313, "step": 499 }, { "epoch": 0.08142327891544192, "grad_norm": 0.5596812963485718, "learning_rate": 1.3564839934888767e-05, "loss": 0.4335, "step": 500 }, { "epoch": 0.0815861254732728, "grad_norm": 0.5613042116165161, "learning_rate": 1.3591969614758546e-05, "loss": 0.4198, "step": 501 }, { "epoch": 0.08174897203110369, "grad_norm": 0.7056311368942261, "learning_rate": 1.3619099294628324e-05, "loss": 0.4117, "step": 502 }, { "epoch": 0.08191181858893458, "grad_norm": 0.5064714550971985, "learning_rate": 1.3646228974498102e-05, "loss": 0.4217, "step": 503 }, { "epoch": 0.08207466514676547, "grad_norm": 0.5183547139167786, "learning_rate": 1.3673358654367879e-05, "loss": 0.3772, "step": 504 }, { "epoch": 0.08223751170459634, "grad_norm": 0.6142357587814331, "learning_rate": 1.3700488334237657e-05, "loss": 0.486, "step": 505 }, { "epoch": 0.08240035826242723, "grad_norm": 0.5757888555526733, "learning_rate": 1.3727618014107435e-05, "loss": 0.4535, "step": 506 }, { "epoch": 0.08256320482025811, "grad_norm": 0.5046533346176147, "learning_rate": 1.3754747693977213e-05, "loss": 0.44, "step": 507 }, { "epoch": 0.082726051378089, "grad_norm": 0.5139784812927246, "learning_rate": 1.3781877373846988e-05, "loss": 0.4051, "step": 508 }, { "epoch": 0.08288889793591989, "grad_norm": 0.5004212856292725, "learning_rate": 1.3809007053716766e-05, "loss": 0.3982, "step": 509 }, { "epoch": 0.08305174449375076, "grad_norm": 0.5403602719306946, "learning_rate": 1.3836136733586545e-05, "loss": 0.4502, "step": 510 }, { "epoch": 0.08321459105158165, "grad_norm": 0.5330583453178406, "learning_rate": 1.3863266413456321e-05, "loss": 0.4618, "step": 511 }, { "epoch": 0.08337743760941253, "grad_norm": 0.6889708638191223, "learning_rate": 1.38903960933261e-05, "loss": 0.3953, "step": 512 }, { "epoch": 0.08354028416724342, "grad_norm": 0.6924014091491699, "learning_rate": 1.3917525773195878e-05, "loss": 0.4384, "step": 513 }, { "epoch": 0.08370313072507429, "grad_norm": 0.4904451072216034, "learning_rate": 1.3944655453065656e-05, "loss": 0.4561, "step": 514 }, { "epoch": 0.08386597728290518, "grad_norm": 0.573225200176239, "learning_rate": 1.3971785132935434e-05, "loss": 0.4731, "step": 515 }, { "epoch": 0.08402882384073607, "grad_norm": 0.797132134437561, "learning_rate": 1.3998914812805209e-05, "loss": 0.4567, "step": 516 }, { "epoch": 0.08419167039856695, "grad_norm": 0.6682335734367371, "learning_rate": 1.4026044492674985e-05, "loss": 0.438, "step": 517 }, { "epoch": 0.08435451695639784, "grad_norm": 0.9816511869430542, "learning_rate": 1.4053174172544764e-05, "loss": 0.3694, "step": 518 }, { "epoch": 0.08451736351422871, "grad_norm": 1.671593189239502, "learning_rate": 1.4080303852414542e-05, "loss": 0.4594, "step": 519 }, { "epoch": 0.0846802100720596, "grad_norm": 0.501702606678009, "learning_rate": 1.410743353228432e-05, "loss": 0.4419, "step": 520 }, { "epoch": 0.08484305662989049, "grad_norm": 0.5309339761734009, "learning_rate": 1.4134563212154098e-05, "loss": 0.413, "step": 521 }, { "epoch": 0.08500590318772137, "grad_norm": 1.4184552431106567, "learning_rate": 1.4161692892023875e-05, "loss": 0.4055, "step": 522 }, { "epoch": 0.08516874974555225, "grad_norm": 0.7835232615470886, "learning_rate": 1.4188822571893653e-05, "loss": 0.4692, "step": 523 }, { "epoch": 0.08533159630338313, "grad_norm": 0.7581824660301208, "learning_rate": 1.4215952251763428e-05, "loss": 0.4114, "step": 524 }, { "epoch": 0.08549444286121402, "grad_norm": 0.4808771014213562, "learning_rate": 1.4243081931633206e-05, "loss": 0.4054, "step": 525 }, { "epoch": 0.08565728941904491, "grad_norm": 0.5960814356803894, "learning_rate": 1.4270211611502984e-05, "loss": 0.3935, "step": 526 }, { "epoch": 0.0858201359768758, "grad_norm": 0.6144169569015503, "learning_rate": 1.4297341291372763e-05, "loss": 0.3805, "step": 527 }, { "epoch": 0.08598298253470667, "grad_norm": 0.5151111483573914, "learning_rate": 1.432447097124254e-05, "loss": 0.4624, "step": 528 }, { "epoch": 0.08614582909253755, "grad_norm": 0.5005784630775452, "learning_rate": 1.4351600651112317e-05, "loss": 0.481, "step": 529 }, { "epoch": 0.08630867565036844, "grad_norm": 0.6132022738456726, "learning_rate": 1.4378730330982096e-05, "loss": 0.4253, "step": 530 }, { "epoch": 0.08647152220819933, "grad_norm": 0.6939647793769836, "learning_rate": 1.4405860010851874e-05, "loss": 0.4454, "step": 531 }, { "epoch": 0.08663436876603021, "grad_norm": 0.6247993111610413, "learning_rate": 1.4432989690721649e-05, "loss": 0.4091, "step": 532 }, { "epoch": 0.08679721532386109, "grad_norm": 0.5988906025886536, "learning_rate": 1.4460119370591427e-05, "loss": 0.4688, "step": 533 }, { "epoch": 0.08696006188169197, "grad_norm": 0.5827085971832275, "learning_rate": 1.4487249050461205e-05, "loss": 0.4368, "step": 534 }, { "epoch": 0.08712290843952286, "grad_norm": 0.624792218208313, "learning_rate": 1.4514378730330983e-05, "loss": 0.4328, "step": 535 }, { "epoch": 0.08728575499735375, "grad_norm": 0.4961380064487457, "learning_rate": 1.454150841020076e-05, "loss": 0.3767, "step": 536 }, { "epoch": 0.08744860155518462, "grad_norm": 0.77992182970047, "learning_rate": 1.4568638090070538e-05, "loss": 0.3921, "step": 537 }, { "epoch": 0.08761144811301551, "grad_norm": 0.5040259957313538, "learning_rate": 1.4595767769940316e-05, "loss": 0.457, "step": 538 }, { "epoch": 0.0877742946708464, "grad_norm": 0.6467803716659546, "learning_rate": 1.4622897449810094e-05, "loss": 0.4347, "step": 539 }, { "epoch": 0.08793714122867728, "grad_norm": 0.6883578300476074, "learning_rate": 1.465002712967987e-05, "loss": 0.3867, "step": 540 }, { "epoch": 0.08809998778650817, "grad_norm": 0.7519223093986511, "learning_rate": 1.4677156809549647e-05, "loss": 0.4333, "step": 541 }, { "epoch": 0.08826283434433904, "grad_norm": 0.6373124122619629, "learning_rate": 1.4704286489419426e-05, "loss": 0.4162, "step": 542 }, { "epoch": 0.08842568090216993, "grad_norm": 0.7366703152656555, "learning_rate": 1.4731416169289202e-05, "loss": 0.4377, "step": 543 }, { "epoch": 0.08858852746000082, "grad_norm": 0.70503169298172, "learning_rate": 1.475854584915898e-05, "loss": 0.3897, "step": 544 }, { "epoch": 0.0887513740178317, "grad_norm": 0.6381354928016663, "learning_rate": 1.4785675529028759e-05, "loss": 0.3884, "step": 545 }, { "epoch": 0.08891422057566258, "grad_norm": 0.5391194820404053, "learning_rate": 1.4812805208898537e-05, "loss": 0.4228, "step": 546 }, { "epoch": 0.08907706713349346, "grad_norm": 0.6489073038101196, "learning_rate": 1.4839934888768313e-05, "loss": 0.4184, "step": 547 }, { "epoch": 0.08923991369132435, "grad_norm": 0.49954915046691895, "learning_rate": 1.4867064568638092e-05, "loss": 0.3958, "step": 548 }, { "epoch": 0.08940276024915524, "grad_norm": 0.6125675439834595, "learning_rate": 1.4894194248507866e-05, "loss": 0.392, "step": 549 }, { "epoch": 0.08956560680698612, "grad_norm": 0.6591945290565491, "learning_rate": 1.4921323928377645e-05, "loss": 0.4068, "step": 550 }, { "epoch": 0.089728453364817, "grad_norm": 0.6630287170410156, "learning_rate": 1.4948453608247423e-05, "loss": 0.4539, "step": 551 }, { "epoch": 0.08989129992264788, "grad_norm": 0.6244267821311951, "learning_rate": 1.4975583288117201e-05, "loss": 0.3969, "step": 552 }, { "epoch": 0.09005414648047877, "grad_norm": 0.47810468077659607, "learning_rate": 1.500271296798698e-05, "loss": 0.4262, "step": 553 }, { "epoch": 0.09021699303830966, "grad_norm": 0.6282757520675659, "learning_rate": 1.5029842647856756e-05, "loss": 0.4105, "step": 554 }, { "epoch": 0.09037983959614054, "grad_norm": 0.5363226532936096, "learning_rate": 1.5056972327726534e-05, "loss": 0.4026, "step": 555 }, { "epoch": 0.09054268615397142, "grad_norm": 0.6166423559188843, "learning_rate": 1.5084102007596312e-05, "loss": 0.3925, "step": 556 }, { "epoch": 0.0907055327118023, "grad_norm": 0.49082884192466736, "learning_rate": 1.5111231687466087e-05, "loss": 0.4092, "step": 557 }, { "epoch": 0.09086837926963319, "grad_norm": 0.5644713640213013, "learning_rate": 1.5138361367335865e-05, "loss": 0.4686, "step": 558 }, { "epoch": 0.09103122582746408, "grad_norm": 0.6756033301353455, "learning_rate": 1.5165491047205644e-05, "loss": 0.4184, "step": 559 }, { "epoch": 0.09119407238529495, "grad_norm": 0.5826607942581177, "learning_rate": 1.5192620727075422e-05, "loss": 0.5044, "step": 560 }, { "epoch": 0.09135691894312584, "grad_norm": 0.686532735824585, "learning_rate": 1.5219750406945198e-05, "loss": 0.3976, "step": 561 }, { "epoch": 0.09151976550095672, "grad_norm": 0.5890424251556396, "learning_rate": 1.5246880086814977e-05, "loss": 0.4337, "step": 562 }, { "epoch": 0.09168261205878761, "grad_norm": 1.3981904983520508, "learning_rate": 1.5274009766684755e-05, "loss": 0.3972, "step": 563 }, { "epoch": 0.0918454586166185, "grad_norm": 0.4592425525188446, "learning_rate": 1.530113944655453e-05, "loss": 0.3828, "step": 564 }, { "epoch": 0.09200830517444937, "grad_norm": 0.9553881883621216, "learning_rate": 1.5328269126424308e-05, "loss": 0.4094, "step": 565 }, { "epoch": 0.09217115173228026, "grad_norm": 0.48183146119117737, "learning_rate": 1.5355398806294084e-05, "loss": 0.4093, "step": 566 }, { "epoch": 0.09233399829011114, "grad_norm": 1.0759496688842773, "learning_rate": 1.5382528486163864e-05, "loss": 0.4212, "step": 567 }, { "epoch": 0.09249684484794203, "grad_norm": 0.6134999990463257, "learning_rate": 1.540965816603364e-05, "loss": 0.4103, "step": 568 }, { "epoch": 0.0926596914057729, "grad_norm": 0.6697738766670227, "learning_rate": 1.543678784590342e-05, "loss": 0.3938, "step": 569 }, { "epoch": 0.09282253796360379, "grad_norm": 1.2355830669403076, "learning_rate": 1.5463917525773197e-05, "loss": 0.4043, "step": 570 }, { "epoch": 0.09298538452143468, "grad_norm": 0.5468252897262573, "learning_rate": 1.5491047205642974e-05, "loss": 0.4254, "step": 571 }, { "epoch": 0.09314823107926556, "grad_norm": 0.8605688214302063, "learning_rate": 1.5518176885512754e-05, "loss": 0.4644, "step": 572 }, { "epoch": 0.09331107763709645, "grad_norm": 1.8923550844192505, "learning_rate": 1.5545306565382527e-05, "loss": 0.4422, "step": 573 }, { "epoch": 0.09347392419492732, "grad_norm": 0.5925383567810059, "learning_rate": 1.5572436245252307e-05, "loss": 0.4212, "step": 574 }, { "epoch": 0.09363677075275821, "grad_norm": 0.8313919901847839, "learning_rate": 1.5599565925122083e-05, "loss": 0.4197, "step": 575 }, { "epoch": 0.0937996173105891, "grad_norm": 0.5533359050750732, "learning_rate": 1.5626695604991863e-05, "loss": 0.3937, "step": 576 }, { "epoch": 0.09396246386841998, "grad_norm": 0.7532475590705872, "learning_rate": 1.565382528486164e-05, "loss": 0.4332, "step": 577 }, { "epoch": 0.09412531042625087, "grad_norm": 0.5968907475471497, "learning_rate": 1.5680954964731416e-05, "loss": 0.4442, "step": 578 }, { "epoch": 0.09428815698408174, "grad_norm": 1.6595044136047363, "learning_rate": 1.5708084644601196e-05, "loss": 0.3906, "step": 579 }, { "epoch": 0.09445100354191263, "grad_norm": 2.9169750213623047, "learning_rate": 1.5735214324470973e-05, "loss": 0.3849, "step": 580 }, { "epoch": 0.09461385009974352, "grad_norm": 0.6689615249633789, "learning_rate": 1.576234400434075e-05, "loss": 0.445, "step": 581 }, { "epoch": 0.0947766966575744, "grad_norm": 0.45344725251197815, "learning_rate": 1.5789473684210526e-05, "loss": 0.4002, "step": 582 }, { "epoch": 0.09493954321540528, "grad_norm": 0.5793074369430542, "learning_rate": 1.5816603364080306e-05, "loss": 0.3993, "step": 583 }, { "epoch": 0.09510238977323617, "grad_norm": 0.5652304887771606, "learning_rate": 1.5843733043950082e-05, "loss": 0.428, "step": 584 }, { "epoch": 0.09526523633106705, "grad_norm": 0.5178912878036499, "learning_rate": 1.587086272381986e-05, "loss": 0.445, "step": 585 }, { "epoch": 0.09542808288889794, "grad_norm": 0.5468807220458984, "learning_rate": 1.589799240368964e-05, "loss": 0.3934, "step": 586 }, { "epoch": 0.09559092944672883, "grad_norm": 0.5047475099563599, "learning_rate": 1.5925122083559415e-05, "loss": 0.3978, "step": 587 }, { "epoch": 0.0957537760045597, "grad_norm": 0.5965566039085388, "learning_rate": 1.5952251763429195e-05, "loss": 0.418, "step": 588 }, { "epoch": 0.09591662256239059, "grad_norm": 0.4604274332523346, "learning_rate": 1.5979381443298968e-05, "loss": 0.3987, "step": 589 }, { "epoch": 0.09607946912022147, "grad_norm": 0.45761269330978394, "learning_rate": 1.6006511123168745e-05, "loss": 0.4149, "step": 590 }, { "epoch": 0.09624231567805236, "grad_norm": 0.5480278134346008, "learning_rate": 1.6033640803038525e-05, "loss": 0.4116, "step": 591 }, { "epoch": 0.09640516223588323, "grad_norm": 0.5186944603919983, "learning_rate": 1.60607704829083e-05, "loss": 0.4394, "step": 592 }, { "epoch": 0.09656800879371412, "grad_norm": 0.45990443229675293, "learning_rate": 1.608790016277808e-05, "loss": 0.3664, "step": 593 }, { "epoch": 0.096730855351545, "grad_norm": 0.46997344493865967, "learning_rate": 1.6115029842647858e-05, "loss": 0.4111, "step": 594 }, { "epoch": 0.09689370190937589, "grad_norm": 0.5064854025840759, "learning_rate": 1.6142159522517634e-05, "loss": 0.4168, "step": 595 }, { "epoch": 0.09705654846720678, "grad_norm": 0.4551949203014374, "learning_rate": 1.6169289202387414e-05, "loss": 0.3997, "step": 596 }, { "epoch": 0.09721939502503765, "grad_norm": 0.5052622556686401, "learning_rate": 1.6196418882257187e-05, "loss": 0.4013, "step": 597 }, { "epoch": 0.09738224158286854, "grad_norm": 0.496770441532135, "learning_rate": 1.6223548562126967e-05, "loss": 0.4081, "step": 598 }, { "epoch": 0.09754508814069943, "grad_norm": 0.4850063621997833, "learning_rate": 1.6250678241996744e-05, "loss": 0.3955, "step": 599 }, { "epoch": 0.09770793469853031, "grad_norm": 0.5375972390174866, "learning_rate": 1.6277807921866524e-05, "loss": 0.4448, "step": 600 }, { "epoch": 0.0978707812563612, "grad_norm": 0.5261762738227844, "learning_rate": 1.63049376017363e-05, "loss": 0.3564, "step": 601 }, { "epoch": 0.09803362781419207, "grad_norm": 0.49853700399398804, "learning_rate": 1.6332067281606077e-05, "loss": 0.4041, "step": 602 }, { "epoch": 0.09819647437202296, "grad_norm": 0.4972701966762543, "learning_rate": 1.6359196961475857e-05, "loss": 0.4107, "step": 603 }, { "epoch": 0.09835932092985385, "grad_norm": 0.5467566251754761, "learning_rate": 1.6386326641345633e-05, "loss": 0.4608, "step": 604 }, { "epoch": 0.09852216748768473, "grad_norm": 0.5725856423377991, "learning_rate": 1.641345632121541e-05, "loss": 0.4361, "step": 605 }, { "epoch": 0.0986850140455156, "grad_norm": 0.46589720249176025, "learning_rate": 1.6440586001085186e-05, "loss": 0.399, "step": 606 }, { "epoch": 0.0988478606033465, "grad_norm": 0.5089349746704102, "learning_rate": 1.6467715680954966e-05, "loss": 0.4281, "step": 607 }, { "epoch": 0.09901070716117738, "grad_norm": 0.5101649165153503, "learning_rate": 1.6494845360824743e-05, "loss": 0.3964, "step": 608 }, { "epoch": 0.09917355371900827, "grad_norm": 0.4707399904727936, "learning_rate": 1.652197504069452e-05, "loss": 0.4117, "step": 609 }, { "epoch": 0.09933640027683915, "grad_norm": 0.4501561224460602, "learning_rate": 1.65491047205643e-05, "loss": 0.4325, "step": 610 }, { "epoch": 0.09949924683467003, "grad_norm": 0.47035422921180725, "learning_rate": 1.6576234400434076e-05, "loss": 0.3576, "step": 611 }, { "epoch": 0.09966209339250091, "grad_norm": 0.5738977193832397, "learning_rate": 1.6603364080303856e-05, "loss": 0.4071, "step": 612 }, { "epoch": 0.0998249399503318, "grad_norm": 0.49758657813072205, "learning_rate": 1.663049376017363e-05, "loss": 0.4293, "step": 613 }, { "epoch": 0.09998778650816269, "grad_norm": 0.4762756824493408, "learning_rate": 1.665762344004341e-05, "loss": 0.4169, "step": 614 }, { "epoch": 0.10015063306599356, "grad_norm": 0.5438569188117981, "learning_rate": 1.6684753119913185e-05, "loss": 0.4375, "step": 615 }, { "epoch": 0.10031347962382445, "grad_norm": 0.5021923780441284, "learning_rate": 1.671188279978296e-05, "loss": 0.4921, "step": 616 }, { "epoch": 0.10047632618165533, "grad_norm": 0.5281932950019836, "learning_rate": 1.673901247965274e-05, "loss": 0.4046, "step": 617 }, { "epoch": 0.10063917273948622, "grad_norm": 0.5052928924560547, "learning_rate": 1.6766142159522518e-05, "loss": 0.4167, "step": 618 }, { "epoch": 0.10080201929731711, "grad_norm": 0.5403948426246643, "learning_rate": 1.6793271839392298e-05, "loss": 0.4415, "step": 619 }, { "epoch": 0.10096486585514798, "grad_norm": 0.5140218734741211, "learning_rate": 1.6820401519262075e-05, "loss": 0.4301, "step": 620 }, { "epoch": 0.10112771241297887, "grad_norm": 0.6224655508995056, "learning_rate": 1.684753119913185e-05, "loss": 0.4632, "step": 621 }, { "epoch": 0.10129055897080975, "grad_norm": 0.5562294721603394, "learning_rate": 1.6874660879001628e-05, "loss": 0.4161, "step": 622 }, { "epoch": 0.10145340552864064, "grad_norm": 0.45920330286026, "learning_rate": 1.6901790558871404e-05, "loss": 0.3904, "step": 623 }, { "epoch": 0.10161625208647153, "grad_norm": 0.5304837226867676, "learning_rate": 1.6928920238741184e-05, "loss": 0.4262, "step": 624 }, { "epoch": 0.1017790986443024, "grad_norm": 0.6050227880477905, "learning_rate": 1.695604991861096e-05, "loss": 0.4463, "step": 625 }, { "epoch": 0.10194194520213329, "grad_norm": 0.5866563320159912, "learning_rate": 1.698317959848074e-05, "loss": 0.4177, "step": 626 }, { "epoch": 0.10210479175996418, "grad_norm": 0.5925689339637756, "learning_rate": 1.7010309278350517e-05, "loss": 0.4224, "step": 627 }, { "epoch": 0.10226763831779506, "grad_norm": 0.5189410448074341, "learning_rate": 1.7037438958220294e-05, "loss": 0.4158, "step": 628 }, { "epoch": 0.10243048487562594, "grad_norm": 0.5408751964569092, "learning_rate": 1.7064568638090073e-05, "loss": 0.435, "step": 629 }, { "epoch": 0.10259333143345682, "grad_norm": 0.5400001406669617, "learning_rate": 1.7091698317959847e-05, "loss": 0.4015, "step": 630 }, { "epoch": 0.10275617799128771, "grad_norm": 0.5285464525222778, "learning_rate": 1.7118827997829627e-05, "loss": 0.4217, "step": 631 }, { "epoch": 0.1029190245491186, "grad_norm": 0.6388393044471741, "learning_rate": 1.7145957677699403e-05, "loss": 0.4335, "step": 632 }, { "epoch": 0.10308187110694948, "grad_norm": 0.5922379493713379, "learning_rate": 1.7173087357569183e-05, "loss": 0.4122, "step": 633 }, { "epoch": 0.10324471766478036, "grad_norm": 0.5205716490745544, "learning_rate": 1.720021703743896e-05, "loss": 0.4191, "step": 634 }, { "epoch": 0.10340756422261124, "grad_norm": 0.5205332040786743, "learning_rate": 1.7227346717308736e-05, "loss": 0.3754, "step": 635 }, { "epoch": 0.10357041078044213, "grad_norm": 0.4945986270904541, "learning_rate": 1.7254476397178516e-05, "loss": 0.4095, "step": 636 }, { "epoch": 0.10373325733827302, "grad_norm": 0.5632529854774475, "learning_rate": 1.7281606077048292e-05, "loss": 0.4148, "step": 637 }, { "epoch": 0.1038961038961039, "grad_norm": 0.4688045382499695, "learning_rate": 1.730873575691807e-05, "loss": 0.4261, "step": 638 }, { "epoch": 0.10405895045393478, "grad_norm": 0.49371522665023804, "learning_rate": 1.7335865436787846e-05, "loss": 0.4207, "step": 639 }, { "epoch": 0.10422179701176566, "grad_norm": 0.4928632974624634, "learning_rate": 1.7362995116657625e-05, "loss": 0.4006, "step": 640 }, { "epoch": 0.10438464356959655, "grad_norm": 0.49046480655670166, "learning_rate": 1.7390124796527402e-05, "loss": 0.3786, "step": 641 }, { "epoch": 0.10454749012742744, "grad_norm": 0.553445041179657, "learning_rate": 1.741725447639718e-05, "loss": 0.4011, "step": 642 }, { "epoch": 0.10471033668525831, "grad_norm": 0.4880467355251312, "learning_rate": 1.744438415626696e-05, "loss": 0.4334, "step": 643 }, { "epoch": 0.1048731832430892, "grad_norm": 0.5895493626594543, "learning_rate": 1.7471513836136735e-05, "loss": 0.4494, "step": 644 }, { "epoch": 0.10503602980092008, "grad_norm": 0.6557528972625732, "learning_rate": 1.749864351600651e-05, "loss": 0.4004, "step": 645 }, { "epoch": 0.10519887635875097, "grad_norm": 0.54509037733078, "learning_rate": 1.7525773195876288e-05, "loss": 0.4189, "step": 646 }, { "epoch": 0.10536172291658186, "grad_norm": 0.54144686460495, "learning_rate": 1.7552902875746065e-05, "loss": 0.4267, "step": 647 }, { "epoch": 0.10552456947441273, "grad_norm": 0.5132995843887329, "learning_rate": 1.7580032555615844e-05, "loss": 0.4137, "step": 648 }, { "epoch": 0.10568741603224362, "grad_norm": 0.5781434178352356, "learning_rate": 1.760716223548562e-05, "loss": 0.4127, "step": 649 }, { "epoch": 0.1058502625900745, "grad_norm": 0.5972626209259033, "learning_rate": 1.76342919153554e-05, "loss": 0.4511, "step": 650 }, { "epoch": 0.10601310914790539, "grad_norm": 0.6146823167800903, "learning_rate": 1.7661421595225177e-05, "loss": 0.4508, "step": 651 }, { "epoch": 0.10617595570573626, "grad_norm": 0.46572232246398926, "learning_rate": 1.7688551275094954e-05, "loss": 0.4321, "step": 652 }, { "epoch": 0.10633880226356715, "grad_norm": 0.5965296030044556, "learning_rate": 1.7715680954964734e-05, "loss": 0.4187, "step": 653 }, { "epoch": 0.10650164882139804, "grad_norm": 0.56317538022995, "learning_rate": 1.7742810634834507e-05, "loss": 0.4498, "step": 654 }, { "epoch": 0.10666449537922892, "grad_norm": 0.4940341114997864, "learning_rate": 1.7769940314704287e-05, "loss": 0.4412, "step": 655 }, { "epoch": 0.10682734193705981, "grad_norm": 0.5229737758636475, "learning_rate": 1.7797069994574063e-05, "loss": 0.4183, "step": 656 }, { "epoch": 0.10699018849489068, "grad_norm": 0.5178452730178833, "learning_rate": 1.7824199674443843e-05, "loss": 0.414, "step": 657 }, { "epoch": 0.10715303505272157, "grad_norm": 0.458834707736969, "learning_rate": 1.785132935431362e-05, "loss": 0.4035, "step": 658 }, { "epoch": 0.10731588161055246, "grad_norm": 0.5564674139022827, "learning_rate": 1.7878459034183396e-05, "loss": 0.3864, "step": 659 }, { "epoch": 0.10747872816838334, "grad_norm": 0.5226605534553528, "learning_rate": 1.7905588714053176e-05, "loss": 0.405, "step": 660 }, { "epoch": 0.10764157472621423, "grad_norm": 0.5202674865722656, "learning_rate": 1.7932718393922953e-05, "loss": 0.394, "step": 661 }, { "epoch": 0.1078044212840451, "grad_norm": 0.4646974205970764, "learning_rate": 1.795984807379273e-05, "loss": 0.4159, "step": 662 }, { "epoch": 0.10796726784187599, "grad_norm": 0.5622280836105347, "learning_rate": 1.7986977753662506e-05, "loss": 0.3955, "step": 663 }, { "epoch": 0.10813011439970688, "grad_norm": 0.4912274479866028, "learning_rate": 1.8014107433532286e-05, "loss": 0.4334, "step": 664 }, { "epoch": 0.10829296095753776, "grad_norm": 0.4877324402332306, "learning_rate": 1.8041237113402062e-05, "loss": 0.3593, "step": 665 }, { "epoch": 0.10845580751536864, "grad_norm": 0.5493112206459045, "learning_rate": 1.806836679327184e-05, "loss": 0.4405, "step": 666 }, { "epoch": 0.10861865407319952, "grad_norm": 0.6282123327255249, "learning_rate": 1.809549647314162e-05, "loss": 0.4359, "step": 667 }, { "epoch": 0.10878150063103041, "grad_norm": 0.5185723304748535, "learning_rate": 1.8122626153011395e-05, "loss": 0.3906, "step": 668 }, { "epoch": 0.1089443471888613, "grad_norm": 0.46867355704307556, "learning_rate": 1.8149755832881175e-05, "loss": 0.3706, "step": 669 }, { "epoch": 0.10910719374669219, "grad_norm": 0.5659801959991455, "learning_rate": 1.817688551275095e-05, "loss": 0.3963, "step": 670 }, { "epoch": 0.10927004030452306, "grad_norm": 0.5704379081726074, "learning_rate": 1.820401519262073e-05, "loss": 0.4066, "step": 671 }, { "epoch": 0.10943288686235395, "grad_norm": 0.6523657441139221, "learning_rate": 1.8231144872490505e-05, "loss": 0.4577, "step": 672 }, { "epoch": 0.10959573342018483, "grad_norm": 0.5108274817466736, "learning_rate": 1.825827455236028e-05, "loss": 0.4315, "step": 673 }, { "epoch": 0.10975857997801572, "grad_norm": 0.5128151178359985, "learning_rate": 1.828540423223006e-05, "loss": 0.3692, "step": 674 }, { "epoch": 0.10992142653584659, "grad_norm": 0.4724285304546356, "learning_rate": 1.8312533912099838e-05, "loss": 0.4031, "step": 675 }, { "epoch": 0.11008427309367748, "grad_norm": 0.4960835874080658, "learning_rate": 1.8339663591969618e-05, "loss": 0.4333, "step": 676 }, { "epoch": 0.11024711965150837, "grad_norm": 0.5369781255722046, "learning_rate": 1.8366793271839394e-05, "loss": 0.4171, "step": 677 }, { "epoch": 0.11040996620933925, "grad_norm": 0.6403104662895203, "learning_rate": 1.839392295170917e-05, "loss": 0.4457, "step": 678 }, { "epoch": 0.11057281276717014, "grad_norm": 0.5129048228263855, "learning_rate": 1.8421052631578947e-05, "loss": 0.4232, "step": 679 }, { "epoch": 0.11073565932500101, "grad_norm": 0.576291024684906, "learning_rate": 1.8448182311448724e-05, "loss": 0.4095, "step": 680 }, { "epoch": 0.1108985058828319, "grad_norm": 0.5333300232887268, "learning_rate": 1.8475311991318504e-05, "loss": 0.3906, "step": 681 }, { "epoch": 0.11106135244066279, "grad_norm": 0.7888916730880737, "learning_rate": 1.850244167118828e-05, "loss": 0.5419, "step": 682 }, { "epoch": 0.11122419899849367, "grad_norm": 0.5895657539367676, "learning_rate": 1.852957135105806e-05, "loss": 0.4228, "step": 683 }, { "epoch": 0.11138704555632456, "grad_norm": 0.5287598371505737, "learning_rate": 1.8556701030927837e-05, "loss": 0.4083, "step": 684 }, { "epoch": 0.11154989211415543, "grad_norm": 0.6422684788703918, "learning_rate": 1.8583830710797613e-05, "loss": 0.4222, "step": 685 }, { "epoch": 0.11171273867198632, "grad_norm": 0.5987042188644409, "learning_rate": 1.8610960390667393e-05, "loss": 0.4058, "step": 686 }, { "epoch": 0.1118755852298172, "grad_norm": 0.5813549160957336, "learning_rate": 1.8638090070537166e-05, "loss": 0.4248, "step": 687 }, { "epoch": 0.1120384317876481, "grad_norm": 0.5419767498970032, "learning_rate": 1.8665219750406946e-05, "loss": 0.4016, "step": 688 }, { "epoch": 0.11220127834547897, "grad_norm": 0.5199453830718994, "learning_rate": 1.8692349430276723e-05, "loss": 0.431, "step": 689 }, { "epoch": 0.11236412490330985, "grad_norm": 0.5554742217063904, "learning_rate": 1.8719479110146503e-05, "loss": 0.4296, "step": 690 }, { "epoch": 0.11252697146114074, "grad_norm": 0.482744961977005, "learning_rate": 1.874660879001628e-05, "loss": 0.4461, "step": 691 }, { "epoch": 0.11268981801897163, "grad_norm": 0.5014987587928772, "learning_rate": 1.8773738469886056e-05, "loss": 0.4261, "step": 692 }, { "epoch": 0.11285266457680251, "grad_norm": 0.5512272119522095, "learning_rate": 1.8800868149755836e-05, "loss": 0.3691, "step": 693 }, { "epoch": 0.11301551113463339, "grad_norm": 0.5010823011398315, "learning_rate": 1.8827997829625612e-05, "loss": 0.4022, "step": 694 }, { "epoch": 0.11317835769246427, "grad_norm": 0.553906261920929, "learning_rate": 1.885512750949539e-05, "loss": 0.4577, "step": 695 }, { "epoch": 0.11334120425029516, "grad_norm": 0.5571849346160889, "learning_rate": 1.8882257189365165e-05, "loss": 0.4029, "step": 696 }, { "epoch": 0.11350405080812605, "grad_norm": 0.5961807370185852, "learning_rate": 1.8909386869234945e-05, "loss": 0.4167, "step": 697 }, { "epoch": 0.11366689736595692, "grad_norm": 0.6349856853485107, "learning_rate": 1.8936516549104722e-05, "loss": 0.4178, "step": 698 }, { "epoch": 0.11382974392378781, "grad_norm": 0.5012732744216919, "learning_rate": 1.8963646228974498e-05, "loss": 0.4023, "step": 699 }, { "epoch": 0.1139925904816187, "grad_norm": 0.6176965236663818, "learning_rate": 1.8990775908844278e-05, "loss": 0.4504, "step": 700 }, { "epoch": 0.11415543703944958, "grad_norm": 0.5240657329559326, "learning_rate": 1.9017905588714055e-05, "loss": 0.4528, "step": 701 }, { "epoch": 0.11431828359728047, "grad_norm": 0.5624739527702332, "learning_rate": 1.904503526858383e-05, "loss": 0.457, "step": 702 }, { "epoch": 0.11448113015511134, "grad_norm": 0.5082324147224426, "learning_rate": 1.9072164948453608e-05, "loss": 0.4135, "step": 703 }, { "epoch": 0.11464397671294223, "grad_norm": 0.5513611435890198, "learning_rate": 1.9099294628323384e-05, "loss": 0.4584, "step": 704 }, { "epoch": 0.11480682327077311, "grad_norm": 0.42507535219192505, "learning_rate": 1.9126424308193164e-05, "loss": 0.4153, "step": 705 }, { "epoch": 0.114969669828604, "grad_norm": 0.5486183762550354, "learning_rate": 1.915355398806294e-05, "loss": 0.4447, "step": 706 }, { "epoch": 0.11513251638643489, "grad_norm": 0.48238319158554077, "learning_rate": 1.918068366793272e-05, "loss": 0.3703, "step": 707 }, { "epoch": 0.11529536294426576, "grad_norm": 0.51045161485672, "learning_rate": 1.9207813347802497e-05, "loss": 0.4234, "step": 708 }, { "epoch": 0.11545820950209665, "grad_norm": 0.5094732642173767, "learning_rate": 1.9234943027672274e-05, "loss": 0.4661, "step": 709 }, { "epoch": 0.11562105605992753, "grad_norm": 0.5971421599388123, "learning_rate": 1.9262072707542054e-05, "loss": 0.4046, "step": 710 }, { "epoch": 0.11578390261775842, "grad_norm": 0.4575183689594269, "learning_rate": 1.9289202387411827e-05, "loss": 0.4226, "step": 711 }, { "epoch": 0.1159467491755893, "grad_norm": 0.5146524310112, "learning_rate": 1.9316332067281607e-05, "loss": 0.4087, "step": 712 }, { "epoch": 0.11610959573342018, "grad_norm": 0.490886926651001, "learning_rate": 1.9343461747151383e-05, "loss": 0.3758, "step": 713 }, { "epoch": 0.11627244229125107, "grad_norm": 0.5081219673156738, "learning_rate": 1.9370591427021163e-05, "loss": 0.4344, "step": 714 }, { "epoch": 0.11643528884908196, "grad_norm": 0.5101520419120789, "learning_rate": 1.939772110689094e-05, "loss": 0.4043, "step": 715 }, { "epoch": 0.11659813540691284, "grad_norm": 0.41452154517173767, "learning_rate": 1.9424850786760716e-05, "loss": 0.3979, "step": 716 }, { "epoch": 0.11676098196474372, "grad_norm": 0.4965195953845978, "learning_rate": 1.9451980466630496e-05, "loss": 0.4215, "step": 717 }, { "epoch": 0.1169238285225746, "grad_norm": 0.5246562957763672, "learning_rate": 1.9479110146500273e-05, "loss": 0.4504, "step": 718 }, { "epoch": 0.11708667508040549, "grad_norm": 0.502751350402832, "learning_rate": 1.950623982637005e-05, "loss": 0.3791, "step": 719 }, { "epoch": 0.11724952163823638, "grad_norm": 0.5301861763000488, "learning_rate": 1.9533369506239826e-05, "loss": 0.4231, "step": 720 }, { "epoch": 0.11741236819606725, "grad_norm": 0.5616775751113892, "learning_rate": 1.9560499186109606e-05, "loss": 0.4377, "step": 721 }, { "epoch": 0.11757521475389814, "grad_norm": 0.43559640645980835, "learning_rate": 1.9587628865979382e-05, "loss": 0.4002, "step": 722 }, { "epoch": 0.11773806131172902, "grad_norm": 0.42455437779426575, "learning_rate": 1.961475854584916e-05, "loss": 0.3714, "step": 723 }, { "epoch": 0.11790090786955991, "grad_norm": 0.5409876704216003, "learning_rate": 1.964188822571894e-05, "loss": 0.4327, "step": 724 }, { "epoch": 0.1180637544273908, "grad_norm": 0.46567249298095703, "learning_rate": 1.9669017905588715e-05, "loss": 0.404, "step": 725 }, { "epoch": 0.11822660098522167, "grad_norm": 0.5121949315071106, "learning_rate": 1.9696147585458495e-05, "loss": 0.4295, "step": 726 }, { "epoch": 0.11838944754305256, "grad_norm": 0.5278825163841248, "learning_rate": 1.9723277265328268e-05, "loss": 0.4295, "step": 727 }, { "epoch": 0.11855229410088344, "grad_norm": 0.6195667386054993, "learning_rate": 1.9750406945198048e-05, "loss": 0.4351, "step": 728 }, { "epoch": 0.11871514065871433, "grad_norm": 0.6645898222923279, "learning_rate": 1.9777536625067825e-05, "loss": 0.41, "step": 729 }, { "epoch": 0.11887798721654522, "grad_norm": 0.49465692043304443, "learning_rate": 1.98046663049376e-05, "loss": 0.3919, "step": 730 }, { "epoch": 0.11904083377437609, "grad_norm": 0.42688655853271484, "learning_rate": 1.983179598480738e-05, "loss": 0.4318, "step": 731 }, { "epoch": 0.11920368033220698, "grad_norm": 0.6533564329147339, "learning_rate": 1.9858925664677158e-05, "loss": 0.4887, "step": 732 }, { "epoch": 0.11936652689003786, "grad_norm": 0.5878970623016357, "learning_rate": 1.9886055344546938e-05, "loss": 0.4317, "step": 733 }, { "epoch": 0.11952937344786875, "grad_norm": 0.47443637251853943, "learning_rate": 1.9913185024416714e-05, "loss": 0.3827, "step": 734 }, { "epoch": 0.11969222000569962, "grad_norm": 0.4831632673740387, "learning_rate": 1.994031470428649e-05, "loss": 0.3849, "step": 735 }, { "epoch": 0.11985506656353051, "grad_norm": 0.533304750919342, "learning_rate": 1.9967444384156267e-05, "loss": 0.3977, "step": 736 }, { "epoch": 0.1200179131213614, "grad_norm": 0.5390227437019348, "learning_rate": 1.9994574064026044e-05, "loss": 0.431, "step": 737 }, { "epoch": 0.12018075967919228, "grad_norm": 0.5876805186271667, "learning_rate": 2.0021703743895824e-05, "loss": 0.461, "step": 738 }, { "epoch": 0.12034360623702317, "grad_norm": 0.5431119203567505, "learning_rate": 2.00488334237656e-05, "loss": 0.4436, "step": 739 }, { "epoch": 0.12050645279485404, "grad_norm": 0.4890023171901703, "learning_rate": 2.007596310363538e-05, "loss": 0.4143, "step": 740 }, { "epoch": 0.12066929935268493, "grad_norm": 0.49973824620246887, "learning_rate": 2.0103092783505157e-05, "loss": 0.3964, "step": 741 }, { "epoch": 0.12083214591051582, "grad_norm": 0.5055153965950012, "learning_rate": 2.0130222463374933e-05, "loss": 0.3945, "step": 742 }, { "epoch": 0.1209949924683467, "grad_norm": 0.6560674905776978, "learning_rate": 2.015735214324471e-05, "loss": 0.4314, "step": 743 }, { "epoch": 0.12115783902617758, "grad_norm": 0.4148188531398773, "learning_rate": 2.0184481823114486e-05, "loss": 0.3872, "step": 744 }, { "epoch": 0.12132068558400846, "grad_norm": 0.5383859872817993, "learning_rate": 2.0211611502984266e-05, "loss": 0.4009, "step": 745 }, { "epoch": 0.12148353214183935, "grad_norm": 0.7071426510810852, "learning_rate": 2.0238741182854043e-05, "loss": 0.4026, "step": 746 }, { "epoch": 0.12164637869967024, "grad_norm": 0.4705260097980499, "learning_rate": 2.0265870862723822e-05, "loss": 0.3854, "step": 747 }, { "epoch": 0.12180922525750112, "grad_norm": 0.5091960430145264, "learning_rate": 2.02930005425936e-05, "loss": 0.3848, "step": 748 }, { "epoch": 0.121972071815332, "grad_norm": 0.500548779964447, "learning_rate": 2.0320130222463376e-05, "loss": 0.379, "step": 749 }, { "epoch": 0.12213491837316288, "grad_norm": 0.4888949692249298, "learning_rate": 2.0347259902333155e-05, "loss": 0.4341, "step": 750 }, { "epoch": 0.12229776493099377, "grad_norm": 0.5191283822059631, "learning_rate": 2.0374389582202932e-05, "loss": 0.3764, "step": 751 }, { "epoch": 0.12246061148882466, "grad_norm": 0.5925695300102234, "learning_rate": 2.040151926207271e-05, "loss": 0.4185, "step": 752 }, { "epoch": 0.12262345804665555, "grad_norm": 0.4872934818267822, "learning_rate": 2.0428648941942485e-05, "loss": 0.3996, "step": 753 }, { "epoch": 0.12278630460448642, "grad_norm": 0.5119906067848206, "learning_rate": 2.045577862181226e-05, "loss": 0.3627, "step": 754 }, { "epoch": 0.1229491511623173, "grad_norm": 0.507542073726654, "learning_rate": 2.048290830168204e-05, "loss": 0.3783, "step": 755 }, { "epoch": 0.12311199772014819, "grad_norm": 0.4858055114746094, "learning_rate": 2.0510037981551818e-05, "loss": 0.3909, "step": 756 }, { "epoch": 0.12327484427797908, "grad_norm": 0.489907443523407, "learning_rate": 2.0537167661421598e-05, "loss": 0.4359, "step": 757 }, { "epoch": 0.12343769083580995, "grad_norm": 0.48289602994918823, "learning_rate": 2.0564297341291374e-05, "loss": 0.4095, "step": 758 }, { "epoch": 0.12360053739364084, "grad_norm": 0.422770619392395, "learning_rate": 2.059142702116115e-05, "loss": 0.3872, "step": 759 }, { "epoch": 0.12376338395147173, "grad_norm": 0.5696065425872803, "learning_rate": 2.0618556701030927e-05, "loss": 0.444, "step": 760 }, { "epoch": 0.12392623050930261, "grad_norm": 0.47441503405570984, "learning_rate": 2.0645686380900704e-05, "loss": 0.4011, "step": 761 }, { "epoch": 0.1240890770671335, "grad_norm": 0.5089815258979797, "learning_rate": 2.0672816060770484e-05, "loss": 0.4135, "step": 762 }, { "epoch": 0.12425192362496437, "grad_norm": 0.4950278103351593, "learning_rate": 2.069994574064026e-05, "loss": 0.3903, "step": 763 }, { "epoch": 0.12441477018279526, "grad_norm": 0.4437870979309082, "learning_rate": 2.072707542051004e-05, "loss": 0.4039, "step": 764 }, { "epoch": 0.12457761674062615, "grad_norm": 0.4788703918457031, "learning_rate": 2.0754205100379817e-05, "loss": 0.4371, "step": 765 }, { "epoch": 0.12474046329845703, "grad_norm": 0.585293710231781, "learning_rate": 2.0781334780249593e-05, "loss": 0.4529, "step": 766 }, { "epoch": 0.12490330985628792, "grad_norm": 0.48778074979782104, "learning_rate": 2.0808464460119373e-05, "loss": 0.4118, "step": 767 }, { "epoch": 0.1250661564141188, "grad_norm": 0.47367146611213684, "learning_rate": 2.0835594139989147e-05, "loss": 0.4352, "step": 768 }, { "epoch": 0.1252290029719497, "grad_norm": 0.45451295375823975, "learning_rate": 2.0862723819858926e-05, "loss": 0.3833, "step": 769 }, { "epoch": 0.12539184952978055, "grad_norm": 0.4798377454280853, "learning_rate": 2.0889853499728703e-05, "loss": 0.4657, "step": 770 }, { "epoch": 0.12555469608761144, "grad_norm": 0.5297368168830872, "learning_rate": 2.0916983179598483e-05, "loss": 0.4416, "step": 771 }, { "epoch": 0.12571754264544233, "grad_norm": 0.6550719738006592, "learning_rate": 2.094411285946826e-05, "loss": 0.4964, "step": 772 }, { "epoch": 0.1258803892032732, "grad_norm": 0.5049638152122498, "learning_rate": 2.0971242539338036e-05, "loss": 0.402, "step": 773 }, { "epoch": 0.1260432357611041, "grad_norm": 0.46479618549346924, "learning_rate": 2.0998372219207816e-05, "loss": 0.3985, "step": 774 }, { "epoch": 0.126206082318935, "grad_norm": 0.5122048854827881, "learning_rate": 2.1025501899077592e-05, "loss": 0.4153, "step": 775 }, { "epoch": 0.12636892887676587, "grad_norm": 0.5374476909637451, "learning_rate": 2.105263157894737e-05, "loss": 0.4887, "step": 776 }, { "epoch": 0.12653177543459676, "grad_norm": 0.4495176672935486, "learning_rate": 2.1079761258817145e-05, "loss": 0.3836, "step": 777 }, { "epoch": 0.12669462199242765, "grad_norm": 0.4991559386253357, "learning_rate": 2.1106890938686925e-05, "loss": 0.4287, "step": 778 }, { "epoch": 0.1268574685502585, "grad_norm": 0.4939398467540741, "learning_rate": 2.1134020618556702e-05, "loss": 0.4116, "step": 779 }, { "epoch": 0.1270203151080894, "grad_norm": 0.514137864112854, "learning_rate": 2.116115029842648e-05, "loss": 0.3861, "step": 780 }, { "epoch": 0.12718316166592028, "grad_norm": 0.5916429758071899, "learning_rate": 2.118827997829626e-05, "loss": 0.4473, "step": 781 }, { "epoch": 0.12734600822375117, "grad_norm": 0.48485124111175537, "learning_rate": 2.1215409658166035e-05, "loss": 0.4263, "step": 782 }, { "epoch": 0.12750885478158205, "grad_norm": 0.5048006176948547, "learning_rate": 2.1242539338035815e-05, "loss": 0.4021, "step": 783 }, { "epoch": 0.12767170133941294, "grad_norm": 0.5158031582832336, "learning_rate": 2.1269669017905588e-05, "loss": 0.4166, "step": 784 }, { "epoch": 0.12783454789724383, "grad_norm": 0.4991646111011505, "learning_rate": 2.1296798697775368e-05, "loss": 0.418, "step": 785 }, { "epoch": 0.12799739445507471, "grad_norm": 0.5878015160560608, "learning_rate": 2.1323928377645144e-05, "loss": 0.4279, "step": 786 }, { "epoch": 0.1281602410129056, "grad_norm": 0.5273152589797974, "learning_rate": 2.135105805751492e-05, "loss": 0.425, "step": 787 }, { "epoch": 0.12832308757073646, "grad_norm": 0.5091425776481628, "learning_rate": 2.13781877373847e-05, "loss": 0.3892, "step": 788 }, { "epoch": 0.12848593412856735, "grad_norm": 0.4749189615249634, "learning_rate": 2.1405317417254477e-05, "loss": 0.3954, "step": 789 }, { "epoch": 0.12864878068639823, "grad_norm": 0.5972817540168762, "learning_rate": 2.1432447097124257e-05, "loss": 0.4201, "step": 790 }, { "epoch": 0.12881162724422912, "grad_norm": 0.48030519485473633, "learning_rate": 2.1459576776994034e-05, "loss": 0.3478, "step": 791 }, { "epoch": 0.12897447380206, "grad_norm": 0.5422612428665161, "learning_rate": 2.148670645686381e-05, "loss": 0.4163, "step": 792 }, { "epoch": 0.1291373203598909, "grad_norm": 0.44878697395324707, "learning_rate": 2.1513836136733587e-05, "loss": 0.3915, "step": 793 }, { "epoch": 0.12930016691772178, "grad_norm": 0.4908897876739502, "learning_rate": 2.1540965816603363e-05, "loss": 0.4194, "step": 794 }, { "epoch": 0.12946301347555267, "grad_norm": 0.4139118194580078, "learning_rate": 2.1568095496473143e-05, "loss": 0.3763, "step": 795 }, { "epoch": 0.12962586003338356, "grad_norm": 0.5623334050178528, "learning_rate": 2.159522517634292e-05, "loss": 0.36, "step": 796 }, { "epoch": 0.12978870659121441, "grad_norm": 0.47083568572998047, "learning_rate": 2.16223548562127e-05, "loss": 0.3669, "step": 797 }, { "epoch": 0.1299515531490453, "grad_norm": 0.6227023601531982, "learning_rate": 2.1649484536082476e-05, "loss": 0.4518, "step": 798 }, { "epoch": 0.1301143997068762, "grad_norm": 0.4363906979560852, "learning_rate": 2.1676614215952253e-05, "loss": 0.3873, "step": 799 }, { "epoch": 0.13027724626470708, "grad_norm": 0.5405596494674683, "learning_rate": 2.170374389582203e-05, "loss": 0.4107, "step": 800 }, { "epoch": 0.13044009282253796, "grad_norm": 0.48094114661216736, "learning_rate": 2.1730873575691806e-05, "loss": 0.4557, "step": 801 }, { "epoch": 0.13060293938036885, "grad_norm": 0.4867396950721741, "learning_rate": 2.1758003255561586e-05, "loss": 0.41, "step": 802 }, { "epoch": 0.13076578593819974, "grad_norm": 0.5132375955581665, "learning_rate": 2.1785132935431362e-05, "loss": 0.3965, "step": 803 }, { "epoch": 0.13092863249603062, "grad_norm": 0.45234692096710205, "learning_rate": 2.1812262615301142e-05, "loss": 0.3732, "step": 804 }, { "epoch": 0.1310914790538615, "grad_norm": 0.5380355715751648, "learning_rate": 2.183939229517092e-05, "loss": 0.4216, "step": 805 }, { "epoch": 0.1312543256116924, "grad_norm": 0.6149552464485168, "learning_rate": 2.1866521975040695e-05, "loss": 0.3987, "step": 806 }, { "epoch": 0.13141717216952326, "grad_norm": 0.4973880350589752, "learning_rate": 2.1893651654910475e-05, "loss": 0.433, "step": 807 }, { "epoch": 0.13158001872735414, "grad_norm": 0.5098929405212402, "learning_rate": 2.1920781334780248e-05, "loss": 0.3994, "step": 808 }, { "epoch": 0.13174286528518503, "grad_norm": 0.5026618242263794, "learning_rate": 2.1947911014650028e-05, "loss": 0.4553, "step": 809 }, { "epoch": 0.13190571184301592, "grad_norm": 0.48551255464553833, "learning_rate": 2.1975040694519805e-05, "loss": 0.4165, "step": 810 }, { "epoch": 0.1320685584008468, "grad_norm": 0.518238365650177, "learning_rate": 2.200217037438958e-05, "loss": 0.4523, "step": 811 }, { "epoch": 0.1322314049586777, "grad_norm": 0.4875151515007019, "learning_rate": 2.202930005425936e-05, "loss": 0.4181, "step": 812 }, { "epoch": 0.13239425151650858, "grad_norm": 0.5263129472732544, "learning_rate": 2.2056429734129138e-05, "loss": 0.4055, "step": 813 }, { "epoch": 0.13255709807433946, "grad_norm": 0.6111821532249451, "learning_rate": 2.2083559413998918e-05, "loss": 0.4308, "step": 814 }, { "epoch": 0.13271994463217035, "grad_norm": 0.5017539262771606, "learning_rate": 2.2110689093868694e-05, "loss": 0.3758, "step": 815 }, { "epoch": 0.1328827911900012, "grad_norm": 0.5003057718276978, "learning_rate": 2.213781877373847e-05, "loss": 0.4107, "step": 816 }, { "epoch": 0.1330456377478321, "grad_norm": 0.6422538757324219, "learning_rate": 2.2164948453608247e-05, "loss": 0.4339, "step": 817 }, { "epoch": 0.13320848430566298, "grad_norm": 0.5134648084640503, "learning_rate": 2.2192078133478024e-05, "loss": 0.4354, "step": 818 }, { "epoch": 0.13337133086349387, "grad_norm": 0.6171595454216003, "learning_rate": 2.2219207813347804e-05, "loss": 0.4417, "step": 819 }, { "epoch": 0.13353417742132476, "grad_norm": 0.5240086317062378, "learning_rate": 2.224633749321758e-05, "loss": 0.403, "step": 820 }, { "epoch": 0.13369702397915564, "grad_norm": 0.4802520275115967, "learning_rate": 2.227346717308736e-05, "loss": 0.393, "step": 821 }, { "epoch": 0.13385987053698653, "grad_norm": 0.5437390208244324, "learning_rate": 2.2300596852957137e-05, "loss": 0.3846, "step": 822 }, { "epoch": 0.13402271709481742, "grad_norm": 0.5510417222976685, "learning_rate": 2.2327726532826913e-05, "loss": 0.438, "step": 823 }, { "epoch": 0.1341855636526483, "grad_norm": 0.5179148316383362, "learning_rate": 2.2354856212696693e-05, "loss": 0.3941, "step": 824 }, { "epoch": 0.13434841021047916, "grad_norm": 0.45509499311447144, "learning_rate": 2.2381985892566466e-05, "loss": 0.413, "step": 825 }, { "epoch": 0.13451125676831005, "grad_norm": 0.5557805895805359, "learning_rate": 2.2409115572436246e-05, "loss": 0.3974, "step": 826 }, { "epoch": 0.13467410332614094, "grad_norm": 0.6095238327980042, "learning_rate": 2.2436245252306023e-05, "loss": 0.3705, "step": 827 }, { "epoch": 0.13483694988397182, "grad_norm": 0.6165920495986938, "learning_rate": 2.2463374932175803e-05, "loss": 0.4152, "step": 828 }, { "epoch": 0.1349997964418027, "grad_norm": 0.5873479843139648, "learning_rate": 2.249050461204558e-05, "loss": 0.4266, "step": 829 }, { "epoch": 0.1351626429996336, "grad_norm": 0.5821573138237, "learning_rate": 2.2517634291915356e-05, "loss": 0.4244, "step": 830 }, { "epoch": 0.13532548955746448, "grad_norm": 0.571630597114563, "learning_rate": 2.2544763971785136e-05, "loss": 0.4322, "step": 831 }, { "epoch": 0.13548833611529537, "grad_norm": 0.5935735702514648, "learning_rate": 2.2571893651654912e-05, "loss": 0.5136, "step": 832 }, { "epoch": 0.13565118267312626, "grad_norm": 0.5170599222183228, "learning_rate": 2.259902333152469e-05, "loss": 0.3863, "step": 833 }, { "epoch": 0.13581402923095712, "grad_norm": 0.44840705394744873, "learning_rate": 2.2626153011394465e-05, "loss": 0.4295, "step": 834 }, { "epoch": 0.135976875788788, "grad_norm": 0.526780366897583, "learning_rate": 2.2653282691264245e-05, "loss": 0.4099, "step": 835 }, { "epoch": 0.1361397223466189, "grad_norm": 0.42487284541130066, "learning_rate": 2.268041237113402e-05, "loss": 0.3712, "step": 836 }, { "epoch": 0.13630256890444978, "grad_norm": 0.5552861094474792, "learning_rate": 2.2707542051003798e-05, "loss": 0.4325, "step": 837 }, { "epoch": 0.13646541546228066, "grad_norm": 0.4809282720088959, "learning_rate": 2.2734671730873578e-05, "loss": 0.4201, "step": 838 }, { "epoch": 0.13662826202011155, "grad_norm": 0.49733731150627136, "learning_rate": 2.2761801410743355e-05, "loss": 0.3876, "step": 839 }, { "epoch": 0.13679110857794244, "grad_norm": 0.44775861501693726, "learning_rate": 2.2788931090613134e-05, "loss": 0.4065, "step": 840 }, { "epoch": 0.13695395513577333, "grad_norm": 0.40382763743400574, "learning_rate": 2.2816060770482908e-05, "loss": 0.3725, "step": 841 }, { "epoch": 0.1371168016936042, "grad_norm": 0.5595361590385437, "learning_rate": 2.2843190450352688e-05, "loss": 0.4091, "step": 842 }, { "epoch": 0.1372796482514351, "grad_norm": 0.5283064842224121, "learning_rate": 2.2870320130222464e-05, "loss": 0.3675, "step": 843 }, { "epoch": 0.13744249480926596, "grad_norm": 0.4833933115005493, "learning_rate": 2.289744981009224e-05, "loss": 0.453, "step": 844 }, { "epoch": 0.13760534136709685, "grad_norm": 0.55884850025177, "learning_rate": 2.292457948996202e-05, "loss": 0.4241, "step": 845 }, { "epoch": 0.13776818792492773, "grad_norm": 0.5040020942687988, "learning_rate": 2.2951709169831797e-05, "loss": 0.3796, "step": 846 }, { "epoch": 0.13793103448275862, "grad_norm": 0.5821624994277954, "learning_rate": 2.2978838849701577e-05, "loss": 0.4472, "step": 847 }, { "epoch": 0.1380938810405895, "grad_norm": 0.4971519410610199, "learning_rate": 2.3005968529571353e-05, "loss": 0.4148, "step": 848 }, { "epoch": 0.1382567275984204, "grad_norm": 0.59944748878479, "learning_rate": 2.303309820944113e-05, "loss": 0.4432, "step": 849 }, { "epoch": 0.13841957415625128, "grad_norm": 0.618824303150177, "learning_rate": 2.3060227889310907e-05, "loss": 0.3924, "step": 850 }, { "epoch": 0.13858242071408217, "grad_norm": 0.5696449279785156, "learning_rate": 2.3087357569180683e-05, "loss": 0.4204, "step": 851 }, { "epoch": 0.13874526727191305, "grad_norm": 0.5213147401809692, "learning_rate": 2.3114487249050463e-05, "loss": 0.4038, "step": 852 }, { "epoch": 0.1389081138297439, "grad_norm": 0.6108518838882446, "learning_rate": 2.314161692892024e-05, "loss": 0.4654, "step": 853 }, { "epoch": 0.1390709603875748, "grad_norm": 0.4706670045852661, "learning_rate": 2.316874660879002e-05, "loss": 0.3598, "step": 854 }, { "epoch": 0.13923380694540569, "grad_norm": 0.5045498013496399, "learning_rate": 2.3195876288659796e-05, "loss": 0.3317, "step": 855 }, { "epoch": 0.13939665350323657, "grad_norm": 0.4619254469871521, "learning_rate": 2.3223005968529573e-05, "loss": 0.3841, "step": 856 }, { "epoch": 0.13955950006106746, "grad_norm": 0.4891134202480316, "learning_rate": 2.325013564839935e-05, "loss": 0.3365, "step": 857 }, { "epoch": 0.13972234661889835, "grad_norm": 0.5211732387542725, "learning_rate": 2.3277265328269126e-05, "loss": 0.3829, "step": 858 }, { "epoch": 0.13988519317672923, "grad_norm": 0.5139499306678772, "learning_rate": 2.3304395008138905e-05, "loss": 0.3706, "step": 859 }, { "epoch": 0.14004803973456012, "grad_norm": 0.6581329703330994, "learning_rate": 2.3331524688008682e-05, "loss": 0.4507, "step": 860 }, { "epoch": 0.140210886292391, "grad_norm": 0.6204432845115662, "learning_rate": 2.3358654367878462e-05, "loss": 0.3935, "step": 861 }, { "epoch": 0.14037373285022187, "grad_norm": 0.5221514105796814, "learning_rate": 2.338578404774824e-05, "loss": 0.4473, "step": 862 }, { "epoch": 0.14053657940805275, "grad_norm": 0.6498379111289978, "learning_rate": 2.3412913727618015e-05, "loss": 0.4573, "step": 863 }, { "epoch": 0.14069942596588364, "grad_norm": 0.46816325187683105, "learning_rate": 2.3440043407487795e-05, "loss": 0.3489, "step": 864 }, { "epoch": 0.14086227252371453, "grad_norm": 0.4817916750907898, "learning_rate": 2.3467173087357568e-05, "loss": 0.4045, "step": 865 }, { "epoch": 0.1410251190815454, "grad_norm": 0.4465988576412201, "learning_rate": 2.3494302767227348e-05, "loss": 0.4073, "step": 866 }, { "epoch": 0.1411879656393763, "grad_norm": 0.5207340121269226, "learning_rate": 2.3521432447097124e-05, "loss": 0.4294, "step": 867 }, { "epoch": 0.1413508121972072, "grad_norm": 0.5142188668251038, "learning_rate": 2.35485621269669e-05, "loss": 0.3991, "step": 868 }, { "epoch": 0.14151365875503807, "grad_norm": 0.5123836994171143, "learning_rate": 2.357569180683668e-05, "loss": 0.3877, "step": 869 }, { "epoch": 0.14167650531286896, "grad_norm": 0.6909604072570801, "learning_rate": 2.3602821486706457e-05, "loss": 0.3983, "step": 870 }, { "epoch": 0.14183935187069982, "grad_norm": 0.48747512698173523, "learning_rate": 2.3629951166576237e-05, "loss": 0.3507, "step": 871 }, { "epoch": 0.1420021984285307, "grad_norm": 0.4551921784877777, "learning_rate": 2.3657080846446014e-05, "loss": 0.359, "step": 872 }, { "epoch": 0.1421650449863616, "grad_norm": 0.5389395356178284, "learning_rate": 2.368421052631579e-05, "loss": 0.387, "step": 873 }, { "epoch": 0.14232789154419248, "grad_norm": 0.5087082386016846, "learning_rate": 2.3711340206185567e-05, "loss": 0.3884, "step": 874 }, { "epoch": 0.14249073810202337, "grad_norm": 0.6037630438804626, "learning_rate": 2.3738469886055343e-05, "loss": 0.4174, "step": 875 }, { "epoch": 0.14265358465985425, "grad_norm": 0.568187952041626, "learning_rate": 2.3765599565925123e-05, "loss": 0.4179, "step": 876 }, { "epoch": 0.14281643121768514, "grad_norm": 0.5106956958770752, "learning_rate": 2.37927292457949e-05, "loss": 0.3419, "step": 877 }, { "epoch": 0.14297927777551603, "grad_norm": 0.6453532576560974, "learning_rate": 2.381985892566468e-05, "loss": 0.4142, "step": 878 }, { "epoch": 0.14314212433334692, "grad_norm": 0.5917977094650269, "learning_rate": 2.3846988605534456e-05, "loss": 0.3863, "step": 879 }, { "epoch": 0.14330497089117777, "grad_norm": 0.4751652777194977, "learning_rate": 2.3874118285404233e-05, "loss": 0.3862, "step": 880 }, { "epoch": 0.14346781744900866, "grad_norm": 0.624803364276886, "learning_rate": 2.390124796527401e-05, "loss": 0.4192, "step": 881 }, { "epoch": 0.14363066400683955, "grad_norm": 0.47399747371673584, "learning_rate": 2.3928377645143786e-05, "loss": 0.4264, "step": 882 }, { "epoch": 0.14379351056467043, "grad_norm": 0.625428318977356, "learning_rate": 2.3955507325013566e-05, "loss": 0.4302, "step": 883 }, { "epoch": 0.14395635712250132, "grad_norm": 0.44710636138916016, "learning_rate": 2.3982637004883342e-05, "loss": 0.4088, "step": 884 }, { "epoch": 0.1441192036803322, "grad_norm": 0.4584357738494873, "learning_rate": 2.4009766684753122e-05, "loss": 0.3908, "step": 885 }, { "epoch": 0.1442820502381631, "grad_norm": 0.4849102199077606, "learning_rate": 2.40368963646229e-05, "loss": 0.3418, "step": 886 }, { "epoch": 0.14444489679599398, "grad_norm": 0.46740463376045227, "learning_rate": 2.4064026044492675e-05, "loss": 0.4298, "step": 887 }, { "epoch": 0.14460774335382487, "grad_norm": 0.42681965231895447, "learning_rate": 2.4091155724362455e-05, "loss": 0.3937, "step": 888 }, { "epoch": 0.14477058991165576, "grad_norm": 0.5437499284744263, "learning_rate": 2.4118285404232232e-05, "loss": 0.4072, "step": 889 }, { "epoch": 0.14493343646948662, "grad_norm": 0.5578685998916626, "learning_rate": 2.414541508410201e-05, "loss": 0.4453, "step": 890 }, { "epoch": 0.1450962830273175, "grad_norm": 0.46085256338119507, "learning_rate": 2.4172544763971785e-05, "loss": 0.4128, "step": 891 }, { "epoch": 0.1452591295851484, "grad_norm": 0.5439073443412781, "learning_rate": 2.4199674443841565e-05, "loss": 0.444, "step": 892 }, { "epoch": 0.14542197614297928, "grad_norm": 0.5320292115211487, "learning_rate": 2.422680412371134e-05, "loss": 0.3707, "step": 893 }, { "epoch": 0.14558482270081016, "grad_norm": 0.5580717325210571, "learning_rate": 2.4253933803581118e-05, "loss": 0.3855, "step": 894 }, { "epoch": 0.14574766925864105, "grad_norm": 0.48083192110061646, "learning_rate": 2.4281063483450898e-05, "loss": 0.378, "step": 895 }, { "epoch": 0.14591051581647194, "grad_norm": 0.583257794380188, "learning_rate": 2.4308193163320674e-05, "loss": 0.4015, "step": 896 }, { "epoch": 0.14607336237430282, "grad_norm": 0.5626338124275208, "learning_rate": 2.4335322843190454e-05, "loss": 0.4026, "step": 897 }, { "epoch": 0.1462362089321337, "grad_norm": 0.5456311702728271, "learning_rate": 2.4362452523060227e-05, "loss": 0.4006, "step": 898 }, { "epoch": 0.14639905548996457, "grad_norm": 0.5739469528198242, "learning_rate": 2.4389582202930007e-05, "loss": 0.4106, "step": 899 }, { "epoch": 0.14656190204779546, "grad_norm": 0.5268176794052124, "learning_rate": 2.4416711882799784e-05, "loss": 0.4124, "step": 900 }, { "epoch": 0.14672474860562634, "grad_norm": 0.5527244806289673, "learning_rate": 2.444384156266956e-05, "loss": 0.4216, "step": 901 }, { "epoch": 0.14688759516345723, "grad_norm": 0.4846963882446289, "learning_rate": 2.447097124253934e-05, "loss": 0.3966, "step": 902 }, { "epoch": 0.14705044172128812, "grad_norm": 0.48082438111305237, "learning_rate": 2.4498100922409117e-05, "loss": 0.3654, "step": 903 }, { "epoch": 0.147213288279119, "grad_norm": 0.4860510528087616, "learning_rate": 2.4525230602278897e-05, "loss": 0.3952, "step": 904 }, { "epoch": 0.1473761348369499, "grad_norm": 0.46206134557724, "learning_rate": 2.4552360282148673e-05, "loss": 0.4107, "step": 905 }, { "epoch": 0.14753898139478078, "grad_norm": 0.5668385624885559, "learning_rate": 2.457948996201845e-05, "loss": 0.4124, "step": 906 }, { "epoch": 0.14770182795261166, "grad_norm": 0.5278438329696655, "learning_rate": 2.4606619641888226e-05, "loss": 0.4169, "step": 907 }, { "epoch": 0.14786467451044252, "grad_norm": 0.4193305969238281, "learning_rate": 2.4633749321758003e-05, "loss": 0.4154, "step": 908 }, { "epoch": 0.1480275210682734, "grad_norm": 0.4208889603614807, "learning_rate": 2.4660879001627783e-05, "loss": 0.3834, "step": 909 }, { "epoch": 0.1481903676261043, "grad_norm": 0.605897843837738, "learning_rate": 2.468800868149756e-05, "loss": 0.424, "step": 910 }, { "epoch": 0.14835321418393518, "grad_norm": 0.48236408829689026, "learning_rate": 2.471513836136734e-05, "loss": 0.3859, "step": 911 }, { "epoch": 0.14851606074176607, "grad_norm": 0.5428618788719177, "learning_rate": 2.4742268041237116e-05, "loss": 0.484, "step": 912 }, { "epoch": 0.14867890729959696, "grad_norm": 0.4779956340789795, "learning_rate": 2.4769397721106892e-05, "loss": 0.3736, "step": 913 }, { "epoch": 0.14884175385742784, "grad_norm": 0.6743175983428955, "learning_rate": 2.479652740097667e-05, "loss": 0.4537, "step": 914 }, { "epoch": 0.14900460041525873, "grad_norm": 0.5466651320457458, "learning_rate": 2.4823657080846445e-05, "loss": 0.409, "step": 915 }, { "epoch": 0.14916744697308962, "grad_norm": 0.5712704658508301, "learning_rate": 2.4850786760716225e-05, "loss": 0.4596, "step": 916 }, { "epoch": 0.14933029353092048, "grad_norm": 0.5199049115180969, "learning_rate": 2.4877916440586002e-05, "loss": 0.4472, "step": 917 }, { "epoch": 0.14949314008875136, "grad_norm": 0.49373236298561096, "learning_rate": 2.490504612045578e-05, "loss": 0.3731, "step": 918 }, { "epoch": 0.14965598664658225, "grad_norm": 0.4951245188713074, "learning_rate": 2.4932175800325558e-05, "loss": 0.4176, "step": 919 }, { "epoch": 0.14981883320441314, "grad_norm": 0.5510042905807495, "learning_rate": 2.4959305480195335e-05, "loss": 0.3868, "step": 920 }, { "epoch": 0.14998167976224402, "grad_norm": 0.47438278794288635, "learning_rate": 2.4986435160065115e-05, "loss": 0.4136, "step": 921 }, { "epoch": 0.1501445263200749, "grad_norm": 0.5549811124801636, "learning_rate": 2.5013564839934888e-05, "loss": 0.3806, "step": 922 }, { "epoch": 0.1503073728779058, "grad_norm": 0.5433441400527954, "learning_rate": 2.504069451980467e-05, "loss": 0.4085, "step": 923 }, { "epoch": 0.15047021943573669, "grad_norm": 0.5307788848876953, "learning_rate": 2.5067824199674444e-05, "loss": 0.3843, "step": 924 }, { "epoch": 0.15063306599356757, "grad_norm": 0.5755129456520081, "learning_rate": 2.5094953879544224e-05, "loss": 0.4285, "step": 925 }, { "epoch": 0.15079591255139843, "grad_norm": 0.6613652110099792, "learning_rate": 2.5122083559414e-05, "loss": 0.4516, "step": 926 }, { "epoch": 0.15095875910922932, "grad_norm": 0.5315454006195068, "learning_rate": 2.5149213239283774e-05, "loss": 0.4084, "step": 927 }, { "epoch": 0.1511216056670602, "grad_norm": 0.5863503217697144, "learning_rate": 2.5176342919153557e-05, "loss": 0.4624, "step": 928 }, { "epoch": 0.1512844522248911, "grad_norm": 0.5159066319465637, "learning_rate": 2.520347259902333e-05, "loss": 0.4253, "step": 929 }, { "epoch": 0.15144729878272198, "grad_norm": 0.658316433429718, "learning_rate": 2.523060227889311e-05, "loss": 0.4632, "step": 930 }, { "epoch": 0.15161014534055287, "grad_norm": 0.5683454275131226, "learning_rate": 2.5257731958762887e-05, "loss": 0.4522, "step": 931 }, { "epoch": 0.15177299189838375, "grad_norm": 0.4829943776130676, "learning_rate": 2.5284861638632667e-05, "loss": 0.3488, "step": 932 }, { "epoch": 0.15193583845621464, "grad_norm": 0.5989509224891663, "learning_rate": 2.5311991318502443e-05, "loss": 0.4376, "step": 933 }, { "epoch": 0.15209868501404553, "grad_norm": 0.6205427646636963, "learning_rate": 2.5339120998372216e-05, "loss": 0.4203, "step": 934 }, { "epoch": 0.1522615315718764, "grad_norm": 0.5074525475502014, "learning_rate": 2.5366250678242e-05, "loss": 0.422, "step": 935 }, { "epoch": 0.15242437812970727, "grad_norm": 0.5407092571258545, "learning_rate": 2.5393380358111773e-05, "loss": 0.3828, "step": 936 }, { "epoch": 0.15258722468753816, "grad_norm": 0.4909968674182892, "learning_rate": 2.5420510037981553e-05, "loss": 0.4131, "step": 937 }, { "epoch": 0.15275007124536905, "grad_norm": 0.577716588973999, "learning_rate": 2.544763971785133e-05, "loss": 0.4031, "step": 938 }, { "epoch": 0.15291291780319993, "grad_norm": 0.5618370771408081, "learning_rate": 2.547476939772111e-05, "loss": 0.3881, "step": 939 }, { "epoch": 0.15307576436103082, "grad_norm": 0.4729074537754059, "learning_rate": 2.5501899077590886e-05, "loss": 0.4059, "step": 940 }, { "epoch": 0.1532386109188617, "grad_norm": 0.5707862377166748, "learning_rate": 2.5529028757460666e-05, "loss": 0.3689, "step": 941 }, { "epoch": 0.1534014574766926, "grad_norm": 0.5170270204544067, "learning_rate": 2.5556158437330442e-05, "loss": 0.3978, "step": 942 }, { "epoch": 0.15356430403452348, "grad_norm": 0.5642980933189392, "learning_rate": 2.5583288117200215e-05, "loss": 0.3761, "step": 943 }, { "epoch": 0.15372715059235437, "grad_norm": 0.6527085900306702, "learning_rate": 2.5610417797069995e-05, "loss": 0.4395, "step": 944 }, { "epoch": 0.15388999715018523, "grad_norm": 0.5223656892776489, "learning_rate": 2.563754747693977e-05, "loss": 0.3775, "step": 945 }, { "epoch": 0.1540528437080161, "grad_norm": 0.5794512629508972, "learning_rate": 2.566467715680955e-05, "loss": 0.4476, "step": 946 }, { "epoch": 0.154215690265847, "grad_norm": 0.5521391034126282, "learning_rate": 2.5691806836679328e-05, "loss": 0.4249, "step": 947 }, { "epoch": 0.1543785368236779, "grad_norm": 0.43524447083473206, "learning_rate": 2.5718936516549108e-05, "loss": 0.4053, "step": 948 }, { "epoch": 0.15454138338150877, "grad_norm": 0.5231226086616516, "learning_rate": 2.5746066196418885e-05, "loss": 0.4267, "step": 949 }, { "epoch": 0.15470422993933966, "grad_norm": 0.6121383905410767, "learning_rate": 2.5773195876288658e-05, "loss": 0.4644, "step": 950 }, { "epoch": 0.15486707649717055, "grad_norm": 0.4604598879814148, "learning_rate": 2.5800325556158438e-05, "loss": 0.3751, "step": 951 }, { "epoch": 0.15502992305500143, "grad_norm": 0.48898905515670776, "learning_rate": 2.5827455236028214e-05, "loss": 0.3894, "step": 952 }, { "epoch": 0.15519276961283232, "grad_norm": 0.5487722754478455, "learning_rate": 2.5854584915897994e-05, "loss": 0.4381, "step": 953 }, { "epoch": 0.15535561617066318, "grad_norm": 0.439723402261734, "learning_rate": 2.588171459576777e-05, "loss": 0.3639, "step": 954 }, { "epoch": 0.15551846272849407, "grad_norm": 0.515640914440155, "learning_rate": 2.590884427563755e-05, "loss": 0.4006, "step": 955 }, { "epoch": 0.15568130928632495, "grad_norm": 0.49082115292549133, "learning_rate": 2.5935973955507327e-05, "loss": 0.3949, "step": 956 }, { "epoch": 0.15584415584415584, "grad_norm": 0.6504629254341125, "learning_rate": 2.5963103635377107e-05, "loss": 0.441, "step": 957 }, { "epoch": 0.15600700240198673, "grad_norm": 0.43435654044151306, "learning_rate": 2.599023331524688e-05, "loss": 0.3648, "step": 958 }, { "epoch": 0.15616984895981761, "grad_norm": 0.4447210431098938, "learning_rate": 2.6017362995116657e-05, "loss": 0.3899, "step": 959 }, { "epoch": 0.1563326955176485, "grad_norm": 0.4987582564353943, "learning_rate": 2.6044492674986437e-05, "loss": 0.4018, "step": 960 }, { "epoch": 0.1564955420754794, "grad_norm": 0.4834066331386566, "learning_rate": 2.6071622354856213e-05, "loss": 0.3861, "step": 961 }, { "epoch": 0.15665838863331027, "grad_norm": 0.4816642105579376, "learning_rate": 2.6098752034725993e-05, "loss": 0.4193, "step": 962 }, { "epoch": 0.15682123519114113, "grad_norm": 0.4665887653827667, "learning_rate": 2.612588171459577e-05, "loss": 0.4138, "step": 963 }, { "epoch": 0.15698408174897202, "grad_norm": 0.47629380226135254, "learning_rate": 2.615301139446555e-05, "loss": 0.3581, "step": 964 }, { "epoch": 0.1571469283068029, "grad_norm": 0.5453920960426331, "learning_rate": 2.6180141074335323e-05, "loss": 0.3934, "step": 965 }, { "epoch": 0.1573097748646338, "grad_norm": 0.5047798752784729, "learning_rate": 2.6207270754205106e-05, "loss": 0.4062, "step": 966 }, { "epoch": 0.15747262142246468, "grad_norm": 0.48761141300201416, "learning_rate": 2.623440043407488e-05, "loss": 0.3845, "step": 967 }, { "epoch": 0.15763546798029557, "grad_norm": 0.4512278437614441, "learning_rate": 2.6261530113944656e-05, "loss": 0.4371, "step": 968 }, { "epoch": 0.15779831453812646, "grad_norm": 0.4423944354057312, "learning_rate": 2.6288659793814435e-05, "loss": 0.381, "step": 969 }, { "epoch": 0.15796116109595734, "grad_norm": 0.5630090832710266, "learning_rate": 2.6315789473684212e-05, "loss": 0.4428, "step": 970 }, { "epoch": 0.15812400765378823, "grad_norm": 0.5118087530136108, "learning_rate": 2.6342919153553992e-05, "loss": 0.3964, "step": 971 }, { "epoch": 0.15828685421161912, "grad_norm": 0.5868300199508667, "learning_rate": 2.6370048833423765e-05, "loss": 0.3772, "step": 972 }, { "epoch": 0.15844970076944997, "grad_norm": 0.44357597827911377, "learning_rate": 2.639717851329355e-05, "loss": 0.4303, "step": 973 }, { "epoch": 0.15861254732728086, "grad_norm": 0.542672336101532, "learning_rate": 2.642430819316332e-05, "loss": 0.3761, "step": 974 }, { "epoch": 0.15877539388511175, "grad_norm": 0.486563116312027, "learning_rate": 2.6451437873033098e-05, "loss": 0.3848, "step": 975 }, { "epoch": 0.15893824044294264, "grad_norm": 0.4913264513015747, "learning_rate": 2.6478567552902878e-05, "loss": 0.3781, "step": 976 }, { "epoch": 0.15910108700077352, "grad_norm": 0.48842787742614746, "learning_rate": 2.650569723277265e-05, "loss": 0.3966, "step": 977 }, { "epoch": 0.1592639335586044, "grad_norm": 0.5553522706031799, "learning_rate": 2.6532826912642434e-05, "loss": 0.4049, "step": 978 }, { "epoch": 0.1594267801164353, "grad_norm": 0.5880761742591858, "learning_rate": 2.6559956592512207e-05, "loss": 0.3714, "step": 979 }, { "epoch": 0.15958962667426618, "grad_norm": 0.5196591019630432, "learning_rate": 2.658708627238199e-05, "loss": 0.3732, "step": 980 }, { "epoch": 0.15975247323209707, "grad_norm": 0.5887094140052795, "learning_rate": 2.6614215952251764e-05, "loss": 0.378, "step": 981 }, { "epoch": 0.15991531978992793, "grad_norm": 0.466101735830307, "learning_rate": 2.6641345632121544e-05, "loss": 0.4022, "step": 982 }, { "epoch": 0.16007816634775882, "grad_norm": 0.4614103138446808, "learning_rate": 2.666847531199132e-05, "loss": 0.448, "step": 983 }, { "epoch": 0.1602410129055897, "grad_norm": 0.5357895493507385, "learning_rate": 2.6695604991861094e-05, "loss": 0.3887, "step": 984 }, { "epoch": 0.1604038594634206, "grad_norm": 0.4944898784160614, "learning_rate": 2.6722734671730877e-05, "loss": 0.3843, "step": 985 }, { "epoch": 0.16056670602125148, "grad_norm": 0.42239195108413696, "learning_rate": 2.674986435160065e-05, "loss": 0.3938, "step": 986 }, { "epoch": 0.16072955257908236, "grad_norm": 0.5088913440704346, "learning_rate": 2.677699403147043e-05, "loss": 0.4221, "step": 987 }, { "epoch": 0.16089239913691325, "grad_norm": 0.5222132205963135, "learning_rate": 2.6804123711340206e-05, "loss": 0.4124, "step": 988 }, { "epoch": 0.16105524569474414, "grad_norm": 0.46175336837768555, "learning_rate": 2.6831253391209986e-05, "loss": 0.3829, "step": 989 }, { "epoch": 0.16121809225257502, "grad_norm": 0.5839532017707825, "learning_rate": 2.6858383071079763e-05, "loss": 0.4047, "step": 990 }, { "epoch": 0.16138093881040588, "grad_norm": 0.5167249441146851, "learning_rate": 2.6885512750949536e-05, "loss": 0.3705, "step": 991 }, { "epoch": 0.16154378536823677, "grad_norm": 0.4548238515853882, "learning_rate": 2.691264243081932e-05, "loss": 0.3574, "step": 992 }, { "epoch": 0.16170663192606766, "grad_norm": 0.6036289930343628, "learning_rate": 2.6939772110689092e-05, "loss": 0.3813, "step": 993 }, { "epoch": 0.16186947848389854, "grad_norm": 0.7327390909194946, "learning_rate": 2.6966901790558872e-05, "loss": 0.4246, "step": 994 }, { "epoch": 0.16203232504172943, "grad_norm": 0.513565182685852, "learning_rate": 2.699403147042865e-05, "loss": 0.4433, "step": 995 }, { "epoch": 0.16219517159956032, "grad_norm": 0.6525708436965942, "learning_rate": 2.702116115029843e-05, "loss": 0.4258, "step": 996 }, { "epoch": 0.1623580181573912, "grad_norm": 0.6916959285736084, "learning_rate": 2.7048290830168205e-05, "loss": 0.3904, "step": 997 }, { "epoch": 0.1625208647152221, "grad_norm": 0.6490355134010315, "learning_rate": 2.7075420510037985e-05, "loss": 0.4017, "step": 998 }, { "epoch": 0.16268371127305298, "grad_norm": 0.5205593109130859, "learning_rate": 2.7102550189907762e-05, "loss": 0.3968, "step": 999 }, { "epoch": 0.16284655783088384, "grad_norm": 0.4976162910461426, "learning_rate": 2.7129679869777535e-05, "loss": 0.3678, "step": 1000 }, { "epoch": 0.16300940438871472, "grad_norm": 0.5416482090950012, "learning_rate": 2.7156809549647315e-05, "loss": 0.3894, "step": 1001 }, { "epoch": 0.1631722509465456, "grad_norm": 0.44836559891700745, "learning_rate": 2.718393922951709e-05, "loss": 0.3686, "step": 1002 }, { "epoch": 0.1633350975043765, "grad_norm": 0.4762788712978363, "learning_rate": 2.721106890938687e-05, "loss": 0.3994, "step": 1003 }, { "epoch": 0.16349794406220738, "grad_norm": 0.5168485641479492, "learning_rate": 2.7238198589256648e-05, "loss": 0.3979, "step": 1004 }, { "epoch": 0.16366079062003827, "grad_norm": 0.40508222579956055, "learning_rate": 2.7265328269126428e-05, "loss": 0.3592, "step": 1005 }, { "epoch": 0.16382363717786916, "grad_norm": 0.594632625579834, "learning_rate": 2.7292457948996204e-05, "loss": 0.3989, "step": 1006 }, { "epoch": 0.16398648373570004, "grad_norm": 0.5029085278511047, "learning_rate": 2.7319587628865977e-05, "loss": 0.3455, "step": 1007 }, { "epoch": 0.16414933029353093, "grad_norm": 0.47118332982063293, "learning_rate": 2.7346717308735757e-05, "loss": 0.4028, "step": 1008 }, { "epoch": 0.1643121768513618, "grad_norm": 0.5351996421813965, "learning_rate": 2.7373846988605534e-05, "loss": 0.337, "step": 1009 }, { "epoch": 0.16447502340919268, "grad_norm": 0.49083203077316284, "learning_rate": 2.7400976668475314e-05, "loss": 0.3763, "step": 1010 }, { "epoch": 0.16463786996702356, "grad_norm": 0.5017068386077881, "learning_rate": 2.742810634834509e-05, "loss": 0.4354, "step": 1011 }, { "epoch": 0.16480071652485445, "grad_norm": 0.5513691306114197, "learning_rate": 2.745523602821487e-05, "loss": 0.4351, "step": 1012 }, { "epoch": 0.16496356308268534, "grad_norm": 0.4958619177341461, "learning_rate": 2.7482365708084647e-05, "loss": 0.4063, "step": 1013 }, { "epoch": 0.16512640964051623, "grad_norm": 0.4794120192527771, "learning_rate": 2.7509495387954427e-05, "loss": 0.3993, "step": 1014 }, { "epoch": 0.1652892561983471, "grad_norm": 0.4703216850757599, "learning_rate": 2.75366250678242e-05, "loss": 0.4302, "step": 1015 }, { "epoch": 0.165452102756178, "grad_norm": 0.5476323366165161, "learning_rate": 2.7563754747693976e-05, "loss": 0.4451, "step": 1016 }, { "epoch": 0.16561494931400889, "grad_norm": 0.5241209864616394, "learning_rate": 2.7590884427563756e-05, "loss": 0.4068, "step": 1017 }, { "epoch": 0.16577779587183977, "grad_norm": 0.4971912205219269, "learning_rate": 2.7618014107433533e-05, "loss": 0.4468, "step": 1018 }, { "epoch": 0.16594064242967063, "grad_norm": 0.4582972228527069, "learning_rate": 2.7645143787303313e-05, "loss": 0.406, "step": 1019 }, { "epoch": 0.16610348898750152, "grad_norm": 0.5620726943016052, "learning_rate": 2.767227346717309e-05, "loss": 0.4121, "step": 1020 }, { "epoch": 0.1662663355453324, "grad_norm": 0.508629560470581, "learning_rate": 2.769940314704287e-05, "loss": 0.3831, "step": 1021 }, { "epoch": 0.1664291821031633, "grad_norm": 0.5254917740821838, "learning_rate": 2.7726532826912642e-05, "loss": 0.3956, "step": 1022 }, { "epoch": 0.16659202866099418, "grad_norm": 0.48867830634117126, "learning_rate": 2.775366250678242e-05, "loss": 0.3586, "step": 1023 }, { "epoch": 0.16675487521882507, "grad_norm": 0.47389906644821167, "learning_rate": 2.77807921866522e-05, "loss": 0.3683, "step": 1024 }, { "epoch": 0.16691772177665595, "grad_norm": 0.5498072504997253, "learning_rate": 2.7807921866521975e-05, "loss": 0.3859, "step": 1025 }, { "epoch": 0.16708056833448684, "grad_norm": 0.5507275462150574, "learning_rate": 2.7835051546391755e-05, "loss": 0.4028, "step": 1026 }, { "epoch": 0.16724341489231773, "grad_norm": 0.49492377042770386, "learning_rate": 2.7862181226261532e-05, "loss": 0.3453, "step": 1027 }, { "epoch": 0.16740626145014859, "grad_norm": 0.495253324508667, "learning_rate": 2.788931090613131e-05, "loss": 0.3897, "step": 1028 }, { "epoch": 0.16756910800797947, "grad_norm": 0.6100438833236694, "learning_rate": 2.7916440586001085e-05, "loss": 0.3895, "step": 1029 }, { "epoch": 0.16773195456581036, "grad_norm": 0.638953685760498, "learning_rate": 2.7943570265870868e-05, "loss": 0.4102, "step": 1030 }, { "epoch": 0.16789480112364125, "grad_norm": 0.5415827631950378, "learning_rate": 2.797069994574064e-05, "loss": 0.366, "step": 1031 }, { "epoch": 0.16805764768147213, "grad_norm": 0.6089842915534973, "learning_rate": 2.7997829625610418e-05, "loss": 0.3751, "step": 1032 }, { "epoch": 0.16822049423930302, "grad_norm": 0.6073843836784363, "learning_rate": 2.8024959305480198e-05, "loss": 0.4346, "step": 1033 }, { "epoch": 0.1683833407971339, "grad_norm": 0.5722084045410156, "learning_rate": 2.805208898534997e-05, "loss": 0.417, "step": 1034 }, { "epoch": 0.1685461873549648, "grad_norm": 0.6176499724388123, "learning_rate": 2.8079218665219754e-05, "loss": 0.3946, "step": 1035 }, { "epoch": 0.16870903391279568, "grad_norm": 0.561931848526001, "learning_rate": 2.8106348345089527e-05, "loss": 0.4463, "step": 1036 }, { "epoch": 0.16887188047062654, "grad_norm": 0.6057985424995422, "learning_rate": 2.813347802495931e-05, "loss": 0.4078, "step": 1037 }, { "epoch": 0.16903472702845743, "grad_norm": 0.569945752620697, "learning_rate": 2.8160607704829084e-05, "loss": 0.3772, "step": 1038 }, { "epoch": 0.1691975735862883, "grad_norm": 0.537619948387146, "learning_rate": 2.8187737384698864e-05, "loss": 0.3814, "step": 1039 }, { "epoch": 0.1693604201441192, "grad_norm": 0.5512102246284485, "learning_rate": 2.821486706456864e-05, "loss": 0.4184, "step": 1040 }, { "epoch": 0.1695232667019501, "grad_norm": 0.5289735198020935, "learning_rate": 2.8241996744438413e-05, "loss": 0.3838, "step": 1041 }, { "epoch": 0.16968611325978097, "grad_norm": 0.5113558173179626, "learning_rate": 2.8269126424308197e-05, "loss": 0.4247, "step": 1042 }, { "epoch": 0.16984895981761186, "grad_norm": 0.571722149848938, "learning_rate": 2.829625610417797e-05, "loss": 0.4183, "step": 1043 }, { "epoch": 0.17001180637544275, "grad_norm": 0.5545147061347961, "learning_rate": 2.832338578404775e-05, "loss": 0.4348, "step": 1044 }, { "epoch": 0.17017465293327363, "grad_norm": 0.41463974118232727, "learning_rate": 2.8350515463917526e-05, "loss": 0.392, "step": 1045 }, { "epoch": 0.1703374994911045, "grad_norm": 0.5651532411575317, "learning_rate": 2.8377645143787306e-05, "loss": 0.4431, "step": 1046 }, { "epoch": 0.17050034604893538, "grad_norm": 0.4280301332473755, "learning_rate": 2.8404774823657083e-05, "loss": 0.378, "step": 1047 }, { "epoch": 0.17066319260676627, "grad_norm": 0.579646110534668, "learning_rate": 2.8431904503526856e-05, "loss": 0.3814, "step": 1048 }, { "epoch": 0.17082603916459715, "grad_norm": 0.4541524648666382, "learning_rate": 2.845903418339664e-05, "loss": 0.3671, "step": 1049 }, { "epoch": 0.17098888572242804, "grad_norm": 0.42396125197410583, "learning_rate": 2.8486163863266412e-05, "loss": 0.3722, "step": 1050 }, { "epoch": 0.17115173228025893, "grad_norm": 0.4717560410499573, "learning_rate": 2.8513293543136192e-05, "loss": 0.3904, "step": 1051 }, { "epoch": 0.17131457883808981, "grad_norm": 0.5495410561561584, "learning_rate": 2.854042322300597e-05, "loss": 0.4021, "step": 1052 }, { "epoch": 0.1714774253959207, "grad_norm": 0.5691463947296143, "learning_rate": 2.856755290287575e-05, "loss": 0.4361, "step": 1053 }, { "epoch": 0.1716402719537516, "grad_norm": 0.49337875843048096, "learning_rate": 2.8594682582745525e-05, "loss": 0.3816, "step": 1054 }, { "epoch": 0.17180311851158245, "grad_norm": 0.5172205567359924, "learning_rate": 2.8621812262615305e-05, "loss": 0.3601, "step": 1055 }, { "epoch": 0.17196596506941333, "grad_norm": 0.49881479144096375, "learning_rate": 2.864894194248508e-05, "loss": 0.3536, "step": 1056 }, { "epoch": 0.17212881162724422, "grad_norm": 0.5423515439033508, "learning_rate": 2.8676071622354855e-05, "loss": 0.3819, "step": 1057 }, { "epoch": 0.1722916581850751, "grad_norm": 0.48172643780708313, "learning_rate": 2.8703201302224635e-05, "loss": 0.402, "step": 1058 }, { "epoch": 0.172454504742906, "grad_norm": 0.534349799156189, "learning_rate": 2.873033098209441e-05, "loss": 0.4187, "step": 1059 }, { "epoch": 0.17261735130073688, "grad_norm": 0.5733791589736938, "learning_rate": 2.875746066196419e-05, "loss": 0.3615, "step": 1060 }, { "epoch": 0.17278019785856777, "grad_norm": 0.5169264078140259, "learning_rate": 2.8784590341833968e-05, "loss": 0.3831, "step": 1061 }, { "epoch": 0.17294304441639866, "grad_norm": 0.4570654034614563, "learning_rate": 2.8811720021703747e-05, "loss": 0.3886, "step": 1062 }, { "epoch": 0.17310589097422954, "grad_norm": 0.48064562678337097, "learning_rate": 2.8838849701573524e-05, "loss": 0.3663, "step": 1063 }, { "epoch": 0.17326873753206043, "grad_norm": 0.5175647139549255, "learning_rate": 2.8865979381443297e-05, "loss": 0.3916, "step": 1064 }, { "epoch": 0.1734315840898913, "grad_norm": 0.45397621393203735, "learning_rate": 2.8893109061313077e-05, "loss": 0.3832, "step": 1065 }, { "epoch": 0.17359443064772218, "grad_norm": 0.4635765254497528, "learning_rate": 2.8920238741182854e-05, "loss": 0.3689, "step": 1066 }, { "epoch": 0.17375727720555306, "grad_norm": 0.4717180132865906, "learning_rate": 2.8947368421052634e-05, "loss": 0.3916, "step": 1067 }, { "epoch": 0.17392012376338395, "grad_norm": 0.5473349690437317, "learning_rate": 2.897449810092241e-05, "loss": 0.4649, "step": 1068 }, { "epoch": 0.17408297032121484, "grad_norm": 0.4668598175048828, "learning_rate": 2.900162778079219e-05, "loss": 0.384, "step": 1069 }, { "epoch": 0.17424581687904572, "grad_norm": 0.5472489595413208, "learning_rate": 2.9028757460661966e-05, "loss": 0.4233, "step": 1070 }, { "epoch": 0.1744086634368766, "grad_norm": 0.516355037689209, "learning_rate": 2.9055887140531746e-05, "loss": 0.4112, "step": 1071 }, { "epoch": 0.1745715099947075, "grad_norm": 0.4458382725715637, "learning_rate": 2.908301682040152e-05, "loss": 0.4329, "step": 1072 }, { "epoch": 0.17473435655253838, "grad_norm": 0.47433435916900635, "learning_rate": 2.9110146500271296e-05, "loss": 0.3703, "step": 1073 }, { "epoch": 0.17489720311036924, "grad_norm": 0.4663260281085968, "learning_rate": 2.9137276180141076e-05, "loss": 0.4092, "step": 1074 }, { "epoch": 0.17506004966820013, "grad_norm": 0.4412459135055542, "learning_rate": 2.9164405860010853e-05, "loss": 0.3878, "step": 1075 }, { "epoch": 0.17522289622603102, "grad_norm": 0.5966046452522278, "learning_rate": 2.9191535539880632e-05, "loss": 0.389, "step": 1076 }, { "epoch": 0.1753857427838619, "grad_norm": 0.497933566570282, "learning_rate": 2.921866521975041e-05, "loss": 0.4223, "step": 1077 }, { "epoch": 0.1755485893416928, "grad_norm": 0.5154885053634644, "learning_rate": 2.924579489962019e-05, "loss": 0.389, "step": 1078 }, { "epoch": 0.17571143589952368, "grad_norm": 0.5324742197990417, "learning_rate": 2.9272924579489962e-05, "loss": 0.3746, "step": 1079 }, { "epoch": 0.17587428245735456, "grad_norm": 0.48506951332092285, "learning_rate": 2.930005425935974e-05, "loss": 0.4163, "step": 1080 }, { "epoch": 0.17603712901518545, "grad_norm": 0.5684570670127869, "learning_rate": 2.932718393922952e-05, "loss": 0.4117, "step": 1081 }, { "epoch": 0.17619997557301634, "grad_norm": 0.523596465587616, "learning_rate": 2.9354313619099295e-05, "loss": 0.3929, "step": 1082 }, { "epoch": 0.1763628221308472, "grad_norm": 0.42334476113319397, "learning_rate": 2.9381443298969075e-05, "loss": 0.3864, "step": 1083 }, { "epoch": 0.17652566868867808, "grad_norm": 0.47832924127578735, "learning_rate": 2.940857297883885e-05, "loss": 0.3822, "step": 1084 }, { "epoch": 0.17668851524650897, "grad_norm": 0.46307751536369324, "learning_rate": 2.943570265870863e-05, "loss": 0.3752, "step": 1085 }, { "epoch": 0.17685136180433986, "grad_norm": 0.42833560705184937, "learning_rate": 2.9462832338578404e-05, "loss": 0.3474, "step": 1086 }, { "epoch": 0.17701420836217074, "grad_norm": 0.5923193693161011, "learning_rate": 2.9489962018448188e-05, "loss": 0.4125, "step": 1087 }, { "epoch": 0.17717705492000163, "grad_norm": 0.48006564378738403, "learning_rate": 2.951709169831796e-05, "loss": 0.3598, "step": 1088 }, { "epoch": 0.17733990147783252, "grad_norm": 0.47276872396469116, "learning_rate": 2.9544221378187737e-05, "loss": 0.3747, "step": 1089 }, { "epoch": 0.1775027480356634, "grad_norm": 0.4661802351474762, "learning_rate": 2.9571351058057517e-05, "loss": 0.3863, "step": 1090 }, { "epoch": 0.1776655945934943, "grad_norm": 0.5108867883682251, "learning_rate": 2.959848073792729e-05, "loss": 0.4304, "step": 1091 }, { "epoch": 0.17782844115132515, "grad_norm": 0.5271587371826172, "learning_rate": 2.9625610417797074e-05, "loss": 0.4152, "step": 1092 }, { "epoch": 0.17799128770915604, "grad_norm": 0.47835028171539307, "learning_rate": 2.9652740097666847e-05, "loss": 0.3853, "step": 1093 }, { "epoch": 0.17815413426698692, "grad_norm": 0.4816460609436035, "learning_rate": 2.9679869777536627e-05, "loss": 0.3765, "step": 1094 }, { "epoch": 0.1783169808248178, "grad_norm": 0.5607642531394958, "learning_rate": 2.9706999457406403e-05, "loss": 0.4137, "step": 1095 }, { "epoch": 0.1784798273826487, "grad_norm": 0.48502084612846375, "learning_rate": 2.9734129137276183e-05, "loss": 0.4349, "step": 1096 }, { "epoch": 0.17864267394047958, "grad_norm": 0.6038750410079956, "learning_rate": 2.976125881714596e-05, "loss": 0.4188, "step": 1097 }, { "epoch": 0.17880552049831047, "grad_norm": 0.5590898394584656, "learning_rate": 2.9788388497015733e-05, "loss": 0.4222, "step": 1098 }, { "epoch": 0.17896836705614136, "grad_norm": 0.4414880871772766, "learning_rate": 2.9815518176885516e-05, "loss": 0.3791, "step": 1099 }, { "epoch": 0.17913121361397225, "grad_norm": 0.6428449749946594, "learning_rate": 2.984264785675529e-05, "loss": 0.4025, "step": 1100 }, { "epoch": 0.17929406017180313, "grad_norm": 0.5867859721183777, "learning_rate": 2.986977753662507e-05, "loss": 0.4099, "step": 1101 }, { "epoch": 0.179456906729634, "grad_norm": 0.49880850315093994, "learning_rate": 2.9896907216494846e-05, "loss": 0.3903, "step": 1102 }, { "epoch": 0.17961975328746488, "grad_norm": 0.5921741724014282, "learning_rate": 2.9924036896364626e-05, "loss": 0.3863, "step": 1103 }, { "epoch": 0.17978259984529577, "grad_norm": 0.5231345295906067, "learning_rate": 2.9951166576234402e-05, "loss": 0.3858, "step": 1104 }, { "epoch": 0.17994544640312665, "grad_norm": 0.5101513266563416, "learning_rate": 2.9978296256104175e-05, "loss": 0.3909, "step": 1105 }, { "epoch": 0.18010829296095754, "grad_norm": 0.5930564999580383, "learning_rate": 3.000542593597396e-05, "loss": 0.4096, "step": 1106 }, { "epoch": 0.18027113951878843, "grad_norm": 0.5500158667564392, "learning_rate": 3.0032555615843732e-05, "loss": 0.3779, "step": 1107 }, { "epoch": 0.1804339860766193, "grad_norm": 0.49090367555618286, "learning_rate": 3.0059685295713512e-05, "loss": 0.3783, "step": 1108 }, { "epoch": 0.1805968326344502, "grad_norm": 0.589845597743988, "learning_rate": 3.008681497558329e-05, "loss": 0.4265, "step": 1109 }, { "epoch": 0.1807596791922811, "grad_norm": 0.5308411717414856, "learning_rate": 3.0113944655453068e-05, "loss": 0.4051, "step": 1110 }, { "epoch": 0.18092252575011195, "grad_norm": 0.4913882911205292, "learning_rate": 3.0141074335322845e-05, "loss": 0.4026, "step": 1111 }, { "epoch": 0.18108537230794283, "grad_norm": 0.4748975932598114, "learning_rate": 3.0168204015192625e-05, "loss": 0.4063, "step": 1112 }, { "epoch": 0.18124821886577372, "grad_norm": 0.43922874331474304, "learning_rate": 3.01953336950624e-05, "loss": 0.3469, "step": 1113 }, { "epoch": 0.1814110654236046, "grad_norm": 0.47683197259902954, "learning_rate": 3.0222463374932174e-05, "loss": 0.4355, "step": 1114 }, { "epoch": 0.1815739119814355, "grad_norm": 0.6017860174179077, "learning_rate": 3.0249593054801954e-05, "loss": 0.4139, "step": 1115 }, { "epoch": 0.18173675853926638, "grad_norm": 0.5166758298873901, "learning_rate": 3.027672273467173e-05, "loss": 0.4262, "step": 1116 }, { "epoch": 0.18189960509709727, "grad_norm": 0.46340256929397583, "learning_rate": 3.030385241454151e-05, "loss": 0.385, "step": 1117 }, { "epoch": 0.18206245165492815, "grad_norm": 0.5313060879707336, "learning_rate": 3.0330982094411287e-05, "loss": 0.3629, "step": 1118 }, { "epoch": 0.18222529821275904, "grad_norm": 0.49630486965179443, "learning_rate": 3.0358111774281067e-05, "loss": 0.4301, "step": 1119 }, { "epoch": 0.1823881447705899, "grad_norm": 0.42965880036354065, "learning_rate": 3.0385241454150844e-05, "loss": 0.4055, "step": 1120 }, { "epoch": 0.1825509913284208, "grad_norm": 0.47278445959091187, "learning_rate": 3.0412371134020617e-05, "loss": 0.3897, "step": 1121 }, { "epoch": 0.18271383788625167, "grad_norm": 0.5054792165756226, "learning_rate": 3.0439500813890397e-05, "loss": 0.4152, "step": 1122 }, { "epoch": 0.18287668444408256, "grad_norm": 0.4678436815738678, "learning_rate": 3.0466630493760173e-05, "loss": 0.4034, "step": 1123 }, { "epoch": 0.18303953100191345, "grad_norm": 0.4853518307209015, "learning_rate": 3.0493760173629953e-05, "loss": 0.334, "step": 1124 }, { "epoch": 0.18320237755974433, "grad_norm": 0.4811552166938782, "learning_rate": 3.052088985349973e-05, "loss": 0.426, "step": 1125 }, { "epoch": 0.18336522411757522, "grad_norm": 0.5441051125526428, "learning_rate": 3.054801953336951e-05, "loss": 0.3783, "step": 1126 }, { "epoch": 0.1835280706754061, "grad_norm": 0.4466227889060974, "learning_rate": 3.057514921323928e-05, "loss": 0.3702, "step": 1127 }, { "epoch": 0.183690917233237, "grad_norm": 0.5033822655677795, "learning_rate": 3.060227889310906e-05, "loss": 0.4199, "step": 1128 }, { "epoch": 0.18385376379106785, "grad_norm": 0.493899405002594, "learning_rate": 3.062940857297884e-05, "loss": 0.3762, "step": 1129 }, { "epoch": 0.18401661034889874, "grad_norm": 0.4792311489582062, "learning_rate": 3.0656538252848616e-05, "loss": 0.4021, "step": 1130 }, { "epoch": 0.18417945690672963, "grad_norm": 0.570108950138092, "learning_rate": 3.0683667932718396e-05, "loss": 0.3964, "step": 1131 }, { "epoch": 0.18434230346456051, "grad_norm": 0.5085268020629883, "learning_rate": 3.071079761258817e-05, "loss": 0.3944, "step": 1132 }, { "epoch": 0.1845051500223914, "grad_norm": 0.3960147500038147, "learning_rate": 3.073792729245795e-05, "loss": 0.3532, "step": 1133 }, { "epoch": 0.1846679965802223, "grad_norm": 0.445631742477417, "learning_rate": 3.076505697232773e-05, "loss": 0.4226, "step": 1134 }, { "epoch": 0.18483084313805317, "grad_norm": 0.5113312005996704, "learning_rate": 3.079218665219751e-05, "loss": 0.4369, "step": 1135 }, { "epoch": 0.18499368969588406, "grad_norm": 0.42141613364219666, "learning_rate": 3.081931633206728e-05, "loss": 0.396, "step": 1136 }, { "epoch": 0.18515653625371495, "grad_norm": 0.444905549287796, "learning_rate": 3.0846446011937055e-05, "loss": 0.3695, "step": 1137 }, { "epoch": 0.1853193828115458, "grad_norm": 0.5095675587654114, "learning_rate": 3.087357569180684e-05, "loss": 0.4515, "step": 1138 }, { "epoch": 0.1854822293693767, "grad_norm": 0.4066004157066345, "learning_rate": 3.0900705371676615e-05, "loss": 0.3674, "step": 1139 }, { "epoch": 0.18564507592720758, "grad_norm": 0.4187917113304138, "learning_rate": 3.0927835051546395e-05, "loss": 0.3803, "step": 1140 }, { "epoch": 0.18580792248503847, "grad_norm": 0.44390299916267395, "learning_rate": 3.095496473141617e-05, "loss": 0.3971, "step": 1141 }, { "epoch": 0.18597076904286935, "grad_norm": 0.5359463095664978, "learning_rate": 3.098209441128595e-05, "loss": 0.4019, "step": 1142 }, { "epoch": 0.18613361560070024, "grad_norm": 0.5399247407913208, "learning_rate": 3.100922409115573e-05, "loss": 0.4268, "step": 1143 }, { "epoch": 0.18629646215853113, "grad_norm": 0.4373055398464203, "learning_rate": 3.103635377102551e-05, "loss": 0.4039, "step": 1144 }, { "epoch": 0.18645930871636202, "grad_norm": 0.4485166072845459, "learning_rate": 3.106348345089528e-05, "loss": 0.3984, "step": 1145 }, { "epoch": 0.1866221552741929, "grad_norm": 0.4403257966041565, "learning_rate": 3.1090613130765054e-05, "loss": 0.3775, "step": 1146 }, { "epoch": 0.1867850018320238, "grad_norm": 0.4720664322376251, "learning_rate": 3.1117742810634834e-05, "loss": 0.4198, "step": 1147 }, { "epoch": 0.18694784838985465, "grad_norm": 0.5219618678092957, "learning_rate": 3.1144872490504614e-05, "loss": 0.4326, "step": 1148 }, { "epoch": 0.18711069494768554, "grad_norm": 0.48795321583747864, "learning_rate": 3.1172002170374394e-05, "loss": 0.4452, "step": 1149 }, { "epoch": 0.18727354150551642, "grad_norm": 0.4814828038215637, "learning_rate": 3.119913185024417e-05, "loss": 0.4117, "step": 1150 }, { "epoch": 0.1874363880633473, "grad_norm": 0.44087544083595276, "learning_rate": 3.1226261530113947e-05, "loss": 0.3759, "step": 1151 }, { "epoch": 0.1875992346211782, "grad_norm": 0.4157021939754486, "learning_rate": 3.1253391209983727e-05, "loss": 0.4066, "step": 1152 }, { "epoch": 0.18776208117900908, "grad_norm": 0.4233330190181732, "learning_rate": 3.12805208898535e-05, "loss": 0.4032, "step": 1153 }, { "epoch": 0.18792492773683997, "grad_norm": 0.43789753317832947, "learning_rate": 3.130765056972328e-05, "loss": 0.4098, "step": 1154 }, { "epoch": 0.18808777429467086, "grad_norm": 0.4077517092227936, "learning_rate": 3.133478024959305e-05, "loss": 0.3797, "step": 1155 }, { "epoch": 0.18825062085250174, "grad_norm": 0.45280617475509644, "learning_rate": 3.136190992946283e-05, "loss": 0.3488, "step": 1156 }, { "epoch": 0.1884134674103326, "grad_norm": 0.45268794894218445, "learning_rate": 3.138903960933261e-05, "loss": 0.417, "step": 1157 }, { "epoch": 0.1885763139681635, "grad_norm": 0.660794198513031, "learning_rate": 3.141616928920239e-05, "loss": 0.4418, "step": 1158 }, { "epoch": 0.18873916052599438, "grad_norm": 0.442953497171402, "learning_rate": 3.1443298969072166e-05, "loss": 0.3754, "step": 1159 }, { "epoch": 0.18890200708382526, "grad_norm": 0.46699124574661255, "learning_rate": 3.1470428648941946e-05, "loss": 0.3599, "step": 1160 }, { "epoch": 0.18906485364165615, "grad_norm": 0.5701581239700317, "learning_rate": 3.149755832881172e-05, "loss": 0.4161, "step": 1161 }, { "epoch": 0.18922770019948704, "grad_norm": 0.4471060335636139, "learning_rate": 3.15246880086815e-05, "loss": 0.4199, "step": 1162 }, { "epoch": 0.18939054675731792, "grad_norm": 0.47708413004875183, "learning_rate": 3.155181768855128e-05, "loss": 0.3927, "step": 1163 }, { "epoch": 0.1895533933151488, "grad_norm": 0.44285255670547485, "learning_rate": 3.157894736842105e-05, "loss": 0.4209, "step": 1164 }, { "epoch": 0.1897162398729797, "grad_norm": 0.498223215341568, "learning_rate": 3.160607704829083e-05, "loss": 0.4185, "step": 1165 }, { "epoch": 0.18987908643081056, "grad_norm": 0.5587790012359619, "learning_rate": 3.163320672816061e-05, "loss": 0.4285, "step": 1166 }, { "epoch": 0.19004193298864144, "grad_norm": 0.4201543927192688, "learning_rate": 3.166033640803039e-05, "loss": 0.4049, "step": 1167 }, { "epoch": 0.19020477954647233, "grad_norm": 0.5798609852790833, "learning_rate": 3.1687466087900165e-05, "loss": 0.4412, "step": 1168 }, { "epoch": 0.19036762610430322, "grad_norm": 0.48453646898269653, "learning_rate": 3.1714595767769944e-05, "loss": 0.3385, "step": 1169 }, { "epoch": 0.1905304726621341, "grad_norm": 0.44547420740127563, "learning_rate": 3.174172544763972e-05, "loss": 0.3641, "step": 1170 }, { "epoch": 0.190693319219965, "grad_norm": 0.4700927436351776, "learning_rate": 3.17688551275095e-05, "loss": 0.3923, "step": 1171 }, { "epoch": 0.19085616577779588, "grad_norm": 0.4979568421840668, "learning_rate": 3.179598480737928e-05, "loss": 0.3524, "step": 1172 }, { "epoch": 0.19101901233562676, "grad_norm": 0.6469025611877441, "learning_rate": 3.182311448724905e-05, "loss": 0.4933, "step": 1173 }, { "epoch": 0.19118185889345765, "grad_norm": 0.4693506956100464, "learning_rate": 3.185024416711883e-05, "loss": 0.3764, "step": 1174 }, { "epoch": 0.1913447054512885, "grad_norm": 0.40321245789527893, "learning_rate": 3.1877373846988604e-05, "loss": 0.3659, "step": 1175 }, { "epoch": 0.1915075520091194, "grad_norm": 0.8043332695960999, "learning_rate": 3.190450352685839e-05, "loss": 0.4338, "step": 1176 }, { "epoch": 0.19167039856695028, "grad_norm": 0.49981454014778137, "learning_rate": 3.1931633206728163e-05, "loss": 0.4011, "step": 1177 }, { "epoch": 0.19183324512478117, "grad_norm": 0.5098832845687866, "learning_rate": 3.1958762886597937e-05, "loss": 0.384, "step": 1178 }, { "epoch": 0.19199609168261206, "grad_norm": 0.5924244523048401, "learning_rate": 3.1985892566467717e-05, "loss": 0.4237, "step": 1179 }, { "epoch": 0.19215893824044294, "grad_norm": 0.4811334013938904, "learning_rate": 3.201302224633749e-05, "loss": 0.3648, "step": 1180 }, { "epoch": 0.19232178479827383, "grad_norm": 0.4579226076602936, "learning_rate": 3.2040151926207276e-05, "loss": 0.3165, "step": 1181 }, { "epoch": 0.19248463135610472, "grad_norm": 0.5203367471694946, "learning_rate": 3.206728160607705e-05, "loss": 0.4248, "step": 1182 }, { "epoch": 0.1926474779139356, "grad_norm": 0.577973484992981, "learning_rate": 3.209441128594683e-05, "loss": 0.3711, "step": 1183 }, { "epoch": 0.19281032447176646, "grad_norm": 0.5470460057258606, "learning_rate": 3.21215409658166e-05, "loss": 0.4124, "step": 1184 }, { "epoch": 0.19297317102959735, "grad_norm": 0.4704970419406891, "learning_rate": 3.214867064568638e-05, "loss": 0.411, "step": 1185 }, { "epoch": 0.19313601758742824, "grad_norm": 0.520366907119751, "learning_rate": 3.217580032555616e-05, "loss": 0.4264, "step": 1186 }, { "epoch": 0.19329886414525912, "grad_norm": 0.4634903371334076, "learning_rate": 3.2202930005425936e-05, "loss": 0.3381, "step": 1187 }, { "epoch": 0.19346171070309, "grad_norm": 0.492546945810318, "learning_rate": 3.2230059685295715e-05, "loss": 0.3978, "step": 1188 }, { "epoch": 0.1936245572609209, "grad_norm": 0.5972158312797546, "learning_rate": 3.225718936516549e-05, "loss": 0.4172, "step": 1189 }, { "epoch": 0.19378740381875179, "grad_norm": 0.5422229170799255, "learning_rate": 3.228431904503527e-05, "loss": 0.3831, "step": 1190 }, { "epoch": 0.19395025037658267, "grad_norm": 0.5170313119888306, "learning_rate": 3.231144872490505e-05, "loss": 0.3918, "step": 1191 }, { "epoch": 0.19411309693441356, "grad_norm": 0.660962700843811, "learning_rate": 3.233857840477483e-05, "loss": 0.3645, "step": 1192 }, { "epoch": 0.19427594349224445, "grad_norm": 0.524037778377533, "learning_rate": 3.23657080846446e-05, "loss": 0.3863, "step": 1193 }, { "epoch": 0.1944387900500753, "grad_norm": 1.4494457244873047, "learning_rate": 3.2392837764514375e-05, "loss": 0.4107, "step": 1194 }, { "epoch": 0.1946016366079062, "grad_norm": 0.5557844042778015, "learning_rate": 3.241996744438416e-05, "loss": 0.3632, "step": 1195 }, { "epoch": 0.19476448316573708, "grad_norm": 0.47165098786354065, "learning_rate": 3.2447097124253934e-05, "loss": 0.3627, "step": 1196 }, { "epoch": 0.19492732972356797, "grad_norm": 0.6416400074958801, "learning_rate": 3.2474226804123714e-05, "loss": 0.4205, "step": 1197 }, { "epoch": 0.19509017628139885, "grad_norm": 0.5246796607971191, "learning_rate": 3.250135648399349e-05, "loss": 0.3867, "step": 1198 }, { "epoch": 0.19525302283922974, "grad_norm": 0.4905373156070709, "learning_rate": 3.252848616386327e-05, "loss": 0.3714, "step": 1199 }, { "epoch": 0.19541586939706063, "grad_norm": 0.5135179162025452, "learning_rate": 3.255561584373305e-05, "loss": 0.3945, "step": 1200 }, { "epoch": 0.1955787159548915, "grad_norm": 0.46297961473464966, "learning_rate": 3.258274552360283e-05, "loss": 0.3852, "step": 1201 }, { "epoch": 0.1957415625127224, "grad_norm": 0.5009171366691589, "learning_rate": 3.26098752034726e-05, "loss": 0.3924, "step": 1202 }, { "epoch": 0.19590440907055326, "grad_norm": 0.4641645848751068, "learning_rate": 3.2637004883342374e-05, "loss": 0.3718, "step": 1203 }, { "epoch": 0.19606725562838415, "grad_norm": 0.531655490398407, "learning_rate": 3.2664134563212153e-05, "loss": 0.3894, "step": 1204 }, { "epoch": 0.19623010218621503, "grad_norm": 0.43300050497055054, "learning_rate": 3.269126424308193e-05, "loss": 0.4014, "step": 1205 }, { "epoch": 0.19639294874404592, "grad_norm": 0.5866546034812927, "learning_rate": 3.271839392295171e-05, "loss": 0.3814, "step": 1206 }, { "epoch": 0.1965557953018768, "grad_norm": 0.5101959705352783, "learning_rate": 3.2745523602821486e-05, "loss": 0.4034, "step": 1207 }, { "epoch": 0.1967186418597077, "grad_norm": 0.45896318554878235, "learning_rate": 3.2772653282691266e-05, "loss": 0.3923, "step": 1208 }, { "epoch": 0.19688148841753858, "grad_norm": 0.45873376727104187, "learning_rate": 3.2799782962561046e-05, "loss": 0.3441, "step": 1209 }, { "epoch": 0.19704433497536947, "grad_norm": 0.5172644257545471, "learning_rate": 3.282691264243082e-05, "loss": 0.351, "step": 1210 }, { "epoch": 0.19720718153320035, "grad_norm": 0.4911254942417145, "learning_rate": 3.28540423223006e-05, "loss": 0.3739, "step": 1211 }, { "epoch": 0.1973700280910312, "grad_norm": 0.4822833836078644, "learning_rate": 3.288117200217037e-05, "loss": 0.3634, "step": 1212 }, { "epoch": 0.1975328746488621, "grad_norm": 0.4672950208187103, "learning_rate": 3.290830168204015e-05, "loss": 0.3709, "step": 1213 }, { "epoch": 0.197695721206693, "grad_norm": 0.5317126512527466, "learning_rate": 3.293543136190993e-05, "loss": 0.3905, "step": 1214 }, { "epoch": 0.19785856776452387, "grad_norm": 0.519741415977478, "learning_rate": 3.296256104177971e-05, "loss": 0.4306, "step": 1215 }, { "epoch": 0.19802141432235476, "grad_norm": 0.46997928619384766, "learning_rate": 3.2989690721649485e-05, "loss": 0.373, "step": 1216 }, { "epoch": 0.19818426088018565, "grad_norm": 0.6276391744613647, "learning_rate": 3.3016820401519265e-05, "loss": 0.3943, "step": 1217 }, { "epoch": 0.19834710743801653, "grad_norm": 0.47429272532463074, "learning_rate": 3.304395008138904e-05, "loss": 0.4056, "step": 1218 }, { "epoch": 0.19850995399584742, "grad_norm": 0.5028574466705322, "learning_rate": 3.307107976125882e-05, "loss": 0.3718, "step": 1219 }, { "epoch": 0.1986728005536783, "grad_norm": 0.6017857193946838, "learning_rate": 3.30982094411286e-05, "loss": 0.4356, "step": 1220 }, { "epoch": 0.19883564711150917, "grad_norm": 0.5326900482177734, "learning_rate": 3.312533912099837e-05, "loss": 0.41, "step": 1221 }, { "epoch": 0.19899849366934005, "grad_norm": 0.4889702796936035, "learning_rate": 3.315246880086815e-05, "loss": 0.38, "step": 1222 }, { "epoch": 0.19916134022717094, "grad_norm": 0.5746612548828125, "learning_rate": 3.317959848073793e-05, "loss": 0.4049, "step": 1223 }, { "epoch": 0.19932418678500183, "grad_norm": 0.5071322917938232, "learning_rate": 3.320672816060771e-05, "loss": 0.4209, "step": 1224 }, { "epoch": 0.19948703334283271, "grad_norm": 0.6239801049232483, "learning_rate": 3.3233857840477484e-05, "loss": 0.4347, "step": 1225 }, { "epoch": 0.1996498799006636, "grad_norm": 0.5972309708595276, "learning_rate": 3.326098752034726e-05, "loss": 0.4156, "step": 1226 }, { "epoch": 0.1998127264584945, "grad_norm": 0.4395464360713959, "learning_rate": 3.328811720021704e-05, "loss": 0.3928, "step": 1227 }, { "epoch": 0.19997557301632538, "grad_norm": 0.6514634490013123, "learning_rate": 3.331524688008682e-05, "loss": 0.4357, "step": 1228 }, { "epoch": 0.20013841957415626, "grad_norm": 0.4485466480255127, "learning_rate": 3.33423765599566e-05, "loss": 0.3837, "step": 1229 }, { "epoch": 0.20030126613198712, "grad_norm": 0.5017017722129822, "learning_rate": 3.336950623982637e-05, "loss": 0.3457, "step": 1230 }, { "epoch": 0.200464112689818, "grad_norm": 0.49344611167907715, "learning_rate": 3.339663591969615e-05, "loss": 0.3717, "step": 1231 }, { "epoch": 0.2006269592476489, "grad_norm": 0.4369702637195587, "learning_rate": 3.342376559956592e-05, "loss": 0.406, "step": 1232 }, { "epoch": 0.20078980580547978, "grad_norm": 0.510250985622406, "learning_rate": 3.345089527943571e-05, "loss": 0.371, "step": 1233 }, { "epoch": 0.20095265236331067, "grad_norm": 0.5179007053375244, "learning_rate": 3.347802495930548e-05, "loss": 0.365, "step": 1234 }, { "epoch": 0.20111549892114156, "grad_norm": 0.5276218056678772, "learning_rate": 3.3505154639175256e-05, "loss": 0.4266, "step": 1235 }, { "epoch": 0.20127834547897244, "grad_norm": 0.47569283843040466, "learning_rate": 3.3532284319045036e-05, "loss": 0.3849, "step": 1236 }, { "epoch": 0.20144119203680333, "grad_norm": 0.5330779552459717, "learning_rate": 3.355941399891481e-05, "loss": 0.3732, "step": 1237 }, { "epoch": 0.20160403859463422, "grad_norm": 0.4485156238079071, "learning_rate": 3.3586543678784596e-05, "loss": 0.3731, "step": 1238 }, { "epoch": 0.2017668851524651, "grad_norm": 0.6546975374221802, "learning_rate": 3.361367335865437e-05, "loss": 0.4041, "step": 1239 }, { "epoch": 0.20192973171029596, "grad_norm": 0.4645155072212219, "learning_rate": 3.364080303852415e-05, "loss": 0.3602, "step": 1240 }, { "epoch": 0.20209257826812685, "grad_norm": 0.47976189851760864, "learning_rate": 3.366793271839392e-05, "loss": 0.3719, "step": 1241 }, { "epoch": 0.20225542482595774, "grad_norm": 0.4819985032081604, "learning_rate": 3.36950623982637e-05, "loss": 0.3883, "step": 1242 }, { "epoch": 0.20241827138378862, "grad_norm": 0.4257805049419403, "learning_rate": 3.372219207813348e-05, "loss": 0.3806, "step": 1243 }, { "epoch": 0.2025811179416195, "grad_norm": 0.42872709035873413, "learning_rate": 3.3749321758003255e-05, "loss": 0.3534, "step": 1244 }, { "epoch": 0.2027439644994504, "grad_norm": 0.4794819951057434, "learning_rate": 3.3776451437873035e-05, "loss": 0.3932, "step": 1245 }, { "epoch": 0.20290681105728128, "grad_norm": 0.42414960265159607, "learning_rate": 3.380358111774281e-05, "loss": 0.3971, "step": 1246 }, { "epoch": 0.20306965761511217, "grad_norm": 0.4674331545829773, "learning_rate": 3.383071079761259e-05, "loss": 0.4012, "step": 1247 }, { "epoch": 0.20323250417294306, "grad_norm": 0.5526852011680603, "learning_rate": 3.385784047748237e-05, "loss": 0.394, "step": 1248 }, { "epoch": 0.20339535073077392, "grad_norm": 0.5694528222084045, "learning_rate": 3.388497015735215e-05, "loss": 0.4243, "step": 1249 }, { "epoch": 0.2035581972886048, "grad_norm": 0.6182912588119507, "learning_rate": 3.391209983722192e-05, "loss": 0.4081, "step": 1250 }, { "epoch": 0.2037210438464357, "grad_norm": 0.5311327576637268, "learning_rate": 3.3939229517091694e-05, "loss": 0.3635, "step": 1251 }, { "epoch": 0.20388389040426658, "grad_norm": 0.556696355342865, "learning_rate": 3.396635919696148e-05, "loss": 0.4188, "step": 1252 }, { "epoch": 0.20404673696209746, "grad_norm": 0.5356526970863342, "learning_rate": 3.3993488876831254e-05, "loss": 0.3937, "step": 1253 }, { "epoch": 0.20420958351992835, "grad_norm": 0.4479004144668579, "learning_rate": 3.4020618556701034e-05, "loss": 0.3866, "step": 1254 }, { "epoch": 0.20437243007775924, "grad_norm": 0.5459151268005371, "learning_rate": 3.404774823657081e-05, "loss": 0.3623, "step": 1255 }, { "epoch": 0.20453527663559012, "grad_norm": 0.48993805050849915, "learning_rate": 3.407487791644059e-05, "loss": 0.3933, "step": 1256 }, { "epoch": 0.204698123193421, "grad_norm": 0.4638754725456238, "learning_rate": 3.410200759631037e-05, "loss": 0.3865, "step": 1257 }, { "epoch": 0.20486096975125187, "grad_norm": 0.452200710773468, "learning_rate": 3.412913727618015e-05, "loss": 0.3753, "step": 1258 }, { "epoch": 0.20502381630908276, "grad_norm": 0.5655470490455627, "learning_rate": 3.415626695604992e-05, "loss": 0.418, "step": 1259 }, { "epoch": 0.20518666286691364, "grad_norm": 0.5063064694404602, "learning_rate": 3.418339663591969e-05, "loss": 0.3757, "step": 1260 }, { "epoch": 0.20534950942474453, "grad_norm": 0.4350617825984955, "learning_rate": 3.421052631578947e-05, "loss": 0.3818, "step": 1261 }, { "epoch": 0.20551235598257542, "grad_norm": 0.5789582133293152, "learning_rate": 3.423765599565925e-05, "loss": 0.3835, "step": 1262 }, { "epoch": 0.2056752025404063, "grad_norm": 0.5390574932098389, "learning_rate": 3.426478567552903e-05, "loss": 0.4156, "step": 1263 }, { "epoch": 0.2058380490982372, "grad_norm": 0.46516335010528564, "learning_rate": 3.4291915355398806e-05, "loss": 0.3976, "step": 1264 }, { "epoch": 0.20600089565606808, "grad_norm": 0.5592113137245178, "learning_rate": 3.4319045035268586e-05, "loss": 0.4269, "step": 1265 }, { "epoch": 0.20616374221389897, "grad_norm": 0.5592746138572693, "learning_rate": 3.4346174715138366e-05, "loss": 0.3711, "step": 1266 }, { "epoch": 0.20632658877172982, "grad_norm": 0.4611956775188446, "learning_rate": 3.437330439500814e-05, "loss": 0.385, "step": 1267 }, { "epoch": 0.2064894353295607, "grad_norm": 0.5491933822631836, "learning_rate": 3.440043407487792e-05, "loss": 0.4043, "step": 1268 }, { "epoch": 0.2066522818873916, "grad_norm": 0.5460913181304932, "learning_rate": 3.442756375474769e-05, "loss": 0.3781, "step": 1269 }, { "epoch": 0.20681512844522248, "grad_norm": 0.46231207251548767, "learning_rate": 3.445469343461747e-05, "loss": 0.3487, "step": 1270 }, { "epoch": 0.20697797500305337, "grad_norm": 0.4139280319213867, "learning_rate": 3.448182311448725e-05, "loss": 0.3687, "step": 1271 }, { "epoch": 0.20714082156088426, "grad_norm": 0.4821222424507141, "learning_rate": 3.450895279435703e-05, "loss": 0.3907, "step": 1272 }, { "epoch": 0.20730366811871515, "grad_norm": 0.5772658586502075, "learning_rate": 3.4536082474226805e-05, "loss": 0.43, "step": 1273 }, { "epoch": 0.20746651467654603, "grad_norm": 0.42716139554977417, "learning_rate": 3.4563212154096585e-05, "loss": 0.3912, "step": 1274 }, { "epoch": 0.20762936123437692, "grad_norm": 0.5624809265136719, "learning_rate": 3.459034183396636e-05, "loss": 0.3833, "step": 1275 }, { "epoch": 0.2077922077922078, "grad_norm": 0.7215806841850281, "learning_rate": 3.461747151383614e-05, "loss": 0.3964, "step": 1276 }, { "epoch": 0.20795505435003867, "grad_norm": 0.5119558572769165, "learning_rate": 3.464460119370592e-05, "loss": 0.3802, "step": 1277 }, { "epoch": 0.20811790090786955, "grad_norm": 0.5599924921989441, "learning_rate": 3.467173087357569e-05, "loss": 0.3856, "step": 1278 }, { "epoch": 0.20828074746570044, "grad_norm": 0.5302122235298157, "learning_rate": 3.469886055344547e-05, "loss": 0.4413, "step": 1279 }, { "epoch": 0.20844359402353133, "grad_norm": 0.5208615064620972, "learning_rate": 3.472599023331525e-05, "loss": 0.3644, "step": 1280 }, { "epoch": 0.2086064405813622, "grad_norm": 0.5453923940658569, "learning_rate": 3.475311991318503e-05, "loss": 0.3882, "step": 1281 }, { "epoch": 0.2087692871391931, "grad_norm": 0.4828934371471405, "learning_rate": 3.4780249593054804e-05, "loss": 0.4355, "step": 1282 }, { "epoch": 0.208932133697024, "grad_norm": 0.5541012287139893, "learning_rate": 3.480737927292458e-05, "loss": 0.3843, "step": 1283 }, { "epoch": 0.20909498025485487, "grad_norm": 0.5421361923217773, "learning_rate": 3.483450895279436e-05, "loss": 0.4332, "step": 1284 }, { "epoch": 0.20925782681268576, "grad_norm": 0.5661693811416626, "learning_rate": 3.486163863266414e-05, "loss": 0.3825, "step": 1285 }, { "epoch": 0.20942067337051662, "grad_norm": 0.6099360585212708, "learning_rate": 3.488876831253392e-05, "loss": 0.4179, "step": 1286 }, { "epoch": 0.2095835199283475, "grad_norm": 0.47189855575561523, "learning_rate": 3.491589799240369e-05, "loss": 0.4131, "step": 1287 }, { "epoch": 0.2097463664861784, "grad_norm": 0.496852308511734, "learning_rate": 3.494302767227347e-05, "loss": 0.418, "step": 1288 }, { "epoch": 0.20990921304400928, "grad_norm": 0.5328952670097351, "learning_rate": 3.497015735214324e-05, "loss": 0.383, "step": 1289 }, { "epoch": 0.21007205960184017, "grad_norm": 0.5189229846000671, "learning_rate": 3.499728703201302e-05, "loss": 0.428, "step": 1290 }, { "epoch": 0.21023490615967105, "grad_norm": 0.4067670702934265, "learning_rate": 3.50244167118828e-05, "loss": 0.3868, "step": 1291 }, { "epoch": 0.21039775271750194, "grad_norm": 0.4750716984272003, "learning_rate": 3.5051546391752576e-05, "loss": 0.3692, "step": 1292 }, { "epoch": 0.21056059927533283, "grad_norm": 0.5291305780410767, "learning_rate": 3.5078676071622356e-05, "loss": 0.4251, "step": 1293 }, { "epoch": 0.21072344583316371, "grad_norm": 0.464614599943161, "learning_rate": 3.510580575149213e-05, "loss": 0.3595, "step": 1294 }, { "epoch": 0.21088629239099457, "grad_norm": 0.5130990743637085, "learning_rate": 3.5132935431361916e-05, "loss": 0.4092, "step": 1295 }, { "epoch": 0.21104913894882546, "grad_norm": 0.4485243558883667, "learning_rate": 3.516006511123169e-05, "loss": 0.389, "step": 1296 }, { "epoch": 0.21121198550665635, "grad_norm": 0.40059685707092285, "learning_rate": 3.518719479110147e-05, "loss": 0.3331, "step": 1297 }, { "epoch": 0.21137483206448723, "grad_norm": 0.6414148807525635, "learning_rate": 3.521432447097124e-05, "loss": 0.4449, "step": 1298 }, { "epoch": 0.21153767862231812, "grad_norm": 0.4991026520729065, "learning_rate": 3.524145415084102e-05, "loss": 0.3943, "step": 1299 }, { "epoch": 0.211700525180149, "grad_norm": 0.4687741696834564, "learning_rate": 3.52685838307108e-05, "loss": 0.3732, "step": 1300 }, { "epoch": 0.2118633717379799, "grad_norm": 0.5603764057159424, "learning_rate": 3.5295713510580575e-05, "loss": 0.4224, "step": 1301 }, { "epoch": 0.21202621829581078, "grad_norm": 1.1667531728744507, "learning_rate": 3.5322843190450355e-05, "loss": 0.4075, "step": 1302 }, { "epoch": 0.21218906485364167, "grad_norm": 0.4722895324230194, "learning_rate": 3.534997287032013e-05, "loss": 0.3527, "step": 1303 }, { "epoch": 0.21235191141147253, "grad_norm": 0.48638540506362915, "learning_rate": 3.537710255018991e-05, "loss": 0.3488, "step": 1304 }, { "epoch": 0.21251475796930341, "grad_norm": 0.3950049579143524, "learning_rate": 3.540423223005969e-05, "loss": 0.3534, "step": 1305 }, { "epoch": 0.2126776045271343, "grad_norm": 0.4700324237346649, "learning_rate": 3.543136190992947e-05, "loss": 0.3849, "step": 1306 }, { "epoch": 0.2128404510849652, "grad_norm": 0.4760834872722626, "learning_rate": 3.545849158979924e-05, "loss": 0.3471, "step": 1307 }, { "epoch": 0.21300329764279607, "grad_norm": 0.7568430304527283, "learning_rate": 3.5485621269669014e-05, "loss": 0.3378, "step": 1308 }, { "epoch": 0.21316614420062696, "grad_norm": 0.47854310274124146, "learning_rate": 3.55127509495388e-05, "loss": 0.3534, "step": 1309 }, { "epoch": 0.21332899075845785, "grad_norm": 0.5511554479598999, "learning_rate": 3.5539880629408574e-05, "loss": 0.3754, "step": 1310 }, { "epoch": 0.21349183731628874, "grad_norm": 0.5561842918395996, "learning_rate": 3.5567010309278354e-05, "loss": 0.4115, "step": 1311 }, { "epoch": 0.21365468387411962, "grad_norm": 0.5205609798431396, "learning_rate": 3.559413998914813e-05, "loss": 0.3791, "step": 1312 }, { "epoch": 0.21381753043195048, "grad_norm": 0.5468555688858032, "learning_rate": 3.562126966901791e-05, "loss": 0.4508, "step": 1313 }, { "epoch": 0.21398037698978137, "grad_norm": 0.5005224943161011, "learning_rate": 3.564839934888769e-05, "loss": 0.409, "step": 1314 }, { "epoch": 0.21414322354761225, "grad_norm": 0.43069860339164734, "learning_rate": 3.567552902875747e-05, "loss": 0.3769, "step": 1315 }, { "epoch": 0.21430607010544314, "grad_norm": 0.5353738069534302, "learning_rate": 3.570265870862724e-05, "loss": 0.3885, "step": 1316 }, { "epoch": 0.21446891666327403, "grad_norm": 0.4966299831867218, "learning_rate": 3.572978838849701e-05, "loss": 0.3874, "step": 1317 }, { "epoch": 0.21463176322110492, "grad_norm": 0.5024893879890442, "learning_rate": 3.575691806836679e-05, "loss": 0.4134, "step": 1318 }, { "epoch": 0.2147946097789358, "grad_norm": 0.59561687707901, "learning_rate": 3.578404774823657e-05, "loss": 0.4263, "step": 1319 }, { "epoch": 0.2149574563367667, "grad_norm": 0.5689294338226318, "learning_rate": 3.581117742810635e-05, "loss": 0.3996, "step": 1320 }, { "epoch": 0.21512030289459758, "grad_norm": 0.5030743479728699, "learning_rate": 3.5838307107976126e-05, "loss": 0.4025, "step": 1321 }, { "epoch": 0.21528314945242846, "grad_norm": 0.5585319399833679, "learning_rate": 3.5865436787845906e-05, "loss": 0.3849, "step": 1322 }, { "epoch": 0.21544599601025932, "grad_norm": 0.511440634727478, "learning_rate": 3.5892566467715686e-05, "loss": 0.4184, "step": 1323 }, { "epoch": 0.2156088425680902, "grad_norm": 0.49612319469451904, "learning_rate": 3.591969614758546e-05, "loss": 0.3649, "step": 1324 }, { "epoch": 0.2157716891259211, "grad_norm": 0.4517597258090973, "learning_rate": 3.594682582745524e-05, "loss": 0.4099, "step": 1325 }, { "epoch": 0.21593453568375198, "grad_norm": 0.4447905719280243, "learning_rate": 3.597395550732501e-05, "loss": 0.383, "step": 1326 }, { "epoch": 0.21609738224158287, "grad_norm": 0.49398523569107056, "learning_rate": 3.600108518719479e-05, "loss": 0.398, "step": 1327 }, { "epoch": 0.21626022879941376, "grad_norm": 0.5508198142051697, "learning_rate": 3.602821486706457e-05, "loss": 0.4215, "step": 1328 }, { "epoch": 0.21642307535724464, "grad_norm": 0.5067065954208374, "learning_rate": 3.605534454693435e-05, "loss": 0.4029, "step": 1329 }, { "epoch": 0.21658592191507553, "grad_norm": 0.4844646453857422, "learning_rate": 3.6082474226804125e-05, "loss": 0.3835, "step": 1330 }, { "epoch": 0.21674876847290642, "grad_norm": 0.41993799805641174, "learning_rate": 3.6109603906673905e-05, "loss": 0.3256, "step": 1331 }, { "epoch": 0.21691161503073728, "grad_norm": 0.523933470249176, "learning_rate": 3.613673358654368e-05, "loss": 0.3522, "step": 1332 }, { "epoch": 0.21707446158856816, "grad_norm": 0.47091519832611084, "learning_rate": 3.616386326641346e-05, "loss": 0.3913, "step": 1333 }, { "epoch": 0.21723730814639905, "grad_norm": 0.5253328084945679, "learning_rate": 3.619099294628324e-05, "loss": 0.4009, "step": 1334 }, { "epoch": 0.21740015470422994, "grad_norm": 0.5081021785736084, "learning_rate": 3.621812262615301e-05, "loss": 0.419, "step": 1335 }, { "epoch": 0.21756300126206082, "grad_norm": 0.4240596294403076, "learning_rate": 3.624525230602279e-05, "loss": 0.4145, "step": 1336 }, { "epoch": 0.2177258478198917, "grad_norm": 0.5087026953697205, "learning_rate": 3.627238198589257e-05, "loss": 0.3715, "step": 1337 }, { "epoch": 0.2178886943777226, "grad_norm": 0.5075206756591797, "learning_rate": 3.629951166576235e-05, "loss": 0.3571, "step": 1338 }, { "epoch": 0.21805154093555348, "grad_norm": 0.49113139510154724, "learning_rate": 3.6326641345632124e-05, "loss": 0.4084, "step": 1339 }, { "epoch": 0.21821438749338437, "grad_norm": 0.48882120847702026, "learning_rate": 3.63537710255019e-05, "loss": 0.3626, "step": 1340 }, { "epoch": 0.21837723405121523, "grad_norm": 0.5533801913261414, "learning_rate": 3.638090070537168e-05, "loss": 0.3689, "step": 1341 }, { "epoch": 0.21854008060904612, "grad_norm": 0.5219792723655701, "learning_rate": 3.640803038524146e-05, "loss": 0.3944, "step": 1342 }, { "epoch": 0.218702927166877, "grad_norm": 0.570345938205719, "learning_rate": 3.643516006511124e-05, "loss": 0.4068, "step": 1343 }, { "epoch": 0.2188657737247079, "grad_norm": 0.5027766823768616, "learning_rate": 3.646228974498101e-05, "loss": 0.4215, "step": 1344 }, { "epoch": 0.21902862028253878, "grad_norm": 0.5270296931266785, "learning_rate": 3.648941942485079e-05, "loss": 0.4025, "step": 1345 }, { "epoch": 0.21919146684036966, "grad_norm": 0.42289313673973083, "learning_rate": 3.651654910472056e-05, "loss": 0.362, "step": 1346 }, { "epoch": 0.21935431339820055, "grad_norm": 0.43637198209762573, "learning_rate": 3.654367878459034e-05, "loss": 0.4102, "step": 1347 }, { "epoch": 0.21951715995603144, "grad_norm": 0.44365912675857544, "learning_rate": 3.657080846446012e-05, "loss": 0.4022, "step": 1348 }, { "epoch": 0.21968000651386232, "grad_norm": 0.5808435678482056, "learning_rate": 3.6597938144329896e-05, "loss": 0.3718, "step": 1349 }, { "epoch": 0.21984285307169318, "grad_norm": 0.45519956946372986, "learning_rate": 3.6625067824199676e-05, "loss": 0.3709, "step": 1350 }, { "epoch": 0.22000569962952407, "grad_norm": 0.48780617117881775, "learning_rate": 3.665219750406945e-05, "loss": 0.4082, "step": 1351 }, { "epoch": 0.22016854618735496, "grad_norm": 0.46817082166671753, "learning_rate": 3.6679327183939236e-05, "loss": 0.4085, "step": 1352 }, { "epoch": 0.22033139274518584, "grad_norm": 0.4072086811065674, "learning_rate": 3.670645686380901e-05, "loss": 0.3409, "step": 1353 }, { "epoch": 0.22049423930301673, "grad_norm": 0.4354286789894104, "learning_rate": 3.673358654367879e-05, "loss": 0.3655, "step": 1354 }, { "epoch": 0.22065708586084762, "grad_norm": 0.3931443393230438, "learning_rate": 3.676071622354856e-05, "loss": 0.3506, "step": 1355 }, { "epoch": 0.2208199324186785, "grad_norm": 0.45952045917510986, "learning_rate": 3.678784590341834e-05, "loss": 0.4436, "step": 1356 }, { "epoch": 0.2209827789765094, "grad_norm": 0.5094476342201233, "learning_rate": 3.681497558328812e-05, "loss": 0.4387, "step": 1357 }, { "epoch": 0.22114562553434028, "grad_norm": 0.4491806924343109, "learning_rate": 3.6842105263157895e-05, "loss": 0.3666, "step": 1358 }, { "epoch": 0.22130847209217114, "grad_norm": 0.4908963739871979, "learning_rate": 3.6869234943027675e-05, "loss": 0.402, "step": 1359 }, { "epoch": 0.22147131865000202, "grad_norm": 0.5129070281982422, "learning_rate": 3.689636462289745e-05, "loss": 0.3934, "step": 1360 }, { "epoch": 0.2216341652078329, "grad_norm": 0.45673075318336487, "learning_rate": 3.692349430276723e-05, "loss": 0.369, "step": 1361 }, { "epoch": 0.2217970117656638, "grad_norm": 0.5521530508995056, "learning_rate": 3.695062398263701e-05, "loss": 0.4418, "step": 1362 }, { "epoch": 0.22195985832349469, "grad_norm": 0.4975653886795044, "learning_rate": 3.697775366250679e-05, "loss": 0.4027, "step": 1363 }, { "epoch": 0.22212270488132557, "grad_norm": 0.5604869723320007, "learning_rate": 3.700488334237656e-05, "loss": 0.4134, "step": 1364 }, { "epoch": 0.22228555143915646, "grad_norm": 0.4514196515083313, "learning_rate": 3.7032013022246334e-05, "loss": 0.3833, "step": 1365 }, { "epoch": 0.22244839799698735, "grad_norm": 0.43701431155204773, "learning_rate": 3.705914270211612e-05, "loss": 0.3812, "step": 1366 }, { "epoch": 0.22261124455481823, "grad_norm": 0.4788796603679657, "learning_rate": 3.7086272381985894e-05, "loss": 0.3896, "step": 1367 }, { "epoch": 0.22277409111264912, "grad_norm": 0.522684633731842, "learning_rate": 3.7113402061855674e-05, "loss": 0.4757, "step": 1368 }, { "epoch": 0.22293693767047998, "grad_norm": 0.5193307399749756, "learning_rate": 3.714053174172545e-05, "loss": 0.3936, "step": 1369 }, { "epoch": 0.22309978422831087, "grad_norm": 0.4267832934856415, "learning_rate": 3.716766142159523e-05, "loss": 0.3703, "step": 1370 }, { "epoch": 0.22326263078614175, "grad_norm": 0.5646998286247253, "learning_rate": 3.7194791101465007e-05, "loss": 0.4213, "step": 1371 }, { "epoch": 0.22342547734397264, "grad_norm": 0.5062430500984192, "learning_rate": 3.7221920781334786e-05, "loss": 0.4234, "step": 1372 }, { "epoch": 0.22358832390180353, "grad_norm": 0.458510160446167, "learning_rate": 3.724905046120456e-05, "loss": 0.4062, "step": 1373 }, { "epoch": 0.2237511704596344, "grad_norm": 0.496159166097641, "learning_rate": 3.727618014107433e-05, "loss": 0.4098, "step": 1374 }, { "epoch": 0.2239140170174653, "grad_norm": 0.5064298510551453, "learning_rate": 3.730330982094411e-05, "loss": 0.4041, "step": 1375 }, { "epoch": 0.2240768635752962, "grad_norm": 0.49588170647621155, "learning_rate": 3.733043950081389e-05, "loss": 0.3782, "step": 1376 }, { "epoch": 0.22423971013312707, "grad_norm": 0.5122025012969971, "learning_rate": 3.735756918068367e-05, "loss": 0.4477, "step": 1377 }, { "epoch": 0.22440255669095793, "grad_norm": 0.6076107621192932, "learning_rate": 3.7384698860553446e-05, "loss": 0.393, "step": 1378 }, { "epoch": 0.22456540324878882, "grad_norm": 0.5042504668235779, "learning_rate": 3.7411828540423226e-05, "loss": 0.3961, "step": 1379 }, { "epoch": 0.2247282498066197, "grad_norm": 0.5427629351615906, "learning_rate": 3.7438958220293005e-05, "loss": 0.3746, "step": 1380 }, { "epoch": 0.2248910963644506, "grad_norm": 0.47092577815055847, "learning_rate": 3.746608790016278e-05, "loss": 0.3773, "step": 1381 }, { "epoch": 0.22505394292228148, "grad_norm": 0.4696739912033081, "learning_rate": 3.749321758003256e-05, "loss": 0.425, "step": 1382 }, { "epoch": 0.22521678948011237, "grad_norm": 0.4873769283294678, "learning_rate": 3.752034725990233e-05, "loss": 0.4166, "step": 1383 }, { "epoch": 0.22537963603794325, "grad_norm": 0.4479173421859741, "learning_rate": 3.754747693977211e-05, "loss": 0.3839, "step": 1384 }, { "epoch": 0.22554248259577414, "grad_norm": 0.4312341511249542, "learning_rate": 3.757460661964189e-05, "loss": 0.3834, "step": 1385 }, { "epoch": 0.22570532915360503, "grad_norm": 0.478288471698761, "learning_rate": 3.760173629951167e-05, "loss": 0.3988, "step": 1386 }, { "epoch": 0.2258681757114359, "grad_norm": 0.47583454847335815, "learning_rate": 3.7628865979381445e-05, "loss": 0.3565, "step": 1387 }, { "epoch": 0.22603102226926677, "grad_norm": 0.39864346385002136, "learning_rate": 3.7655995659251224e-05, "loss": 0.3681, "step": 1388 }, { "epoch": 0.22619386882709766, "grad_norm": 0.4007217586040497, "learning_rate": 3.7683125339121e-05, "loss": 0.3392, "step": 1389 }, { "epoch": 0.22635671538492855, "grad_norm": 0.4951607584953308, "learning_rate": 3.771025501899078e-05, "loss": 0.4272, "step": 1390 }, { "epoch": 0.22651956194275943, "grad_norm": 0.43304041028022766, "learning_rate": 3.773738469886056e-05, "loss": 0.3545, "step": 1391 }, { "epoch": 0.22668240850059032, "grad_norm": 0.43963566422462463, "learning_rate": 3.776451437873033e-05, "loss": 0.3858, "step": 1392 }, { "epoch": 0.2268452550584212, "grad_norm": 0.454183429479599, "learning_rate": 3.779164405860011e-05, "loss": 0.3925, "step": 1393 }, { "epoch": 0.2270081016162521, "grad_norm": 0.43251708149909973, "learning_rate": 3.781877373846989e-05, "loss": 0.3933, "step": 1394 }, { "epoch": 0.22717094817408298, "grad_norm": 0.4517180323600769, "learning_rate": 3.784590341833967e-05, "loss": 0.3854, "step": 1395 }, { "epoch": 0.22733379473191384, "grad_norm": 0.48010364174842834, "learning_rate": 3.7873033098209443e-05, "loss": 0.3983, "step": 1396 }, { "epoch": 0.22749664128974473, "grad_norm": 0.46078693866729736, "learning_rate": 3.7900162778079217e-05, "loss": 0.3777, "step": 1397 }, { "epoch": 0.22765948784757561, "grad_norm": 0.5276618599891663, "learning_rate": 3.7927292457948997e-05, "loss": 0.3932, "step": 1398 }, { "epoch": 0.2278223344054065, "grad_norm": 0.5925973653793335, "learning_rate": 3.7954422137818776e-05, "loss": 0.4011, "step": 1399 }, { "epoch": 0.2279851809632374, "grad_norm": 0.4451169967651367, "learning_rate": 3.7981551817688556e-05, "loss": 0.3896, "step": 1400 }, { "epoch": 0.22814802752106828, "grad_norm": 0.4409002959728241, "learning_rate": 3.800868149755833e-05, "loss": 0.4054, "step": 1401 }, { "epoch": 0.22831087407889916, "grad_norm": 0.5257862210273743, "learning_rate": 3.803581117742811e-05, "loss": 0.4125, "step": 1402 }, { "epoch": 0.22847372063673005, "grad_norm": 0.5862941145896912, "learning_rate": 3.806294085729788e-05, "loss": 0.3923, "step": 1403 }, { "epoch": 0.22863656719456094, "grad_norm": 0.46168240904808044, "learning_rate": 3.809007053716766e-05, "loss": 0.4305, "step": 1404 }, { "epoch": 0.22879941375239182, "grad_norm": 0.6432879567146301, "learning_rate": 3.811720021703744e-05, "loss": 0.4216, "step": 1405 }, { "epoch": 0.22896226031022268, "grad_norm": 0.5440748333930969, "learning_rate": 3.8144329896907216e-05, "loss": 0.3963, "step": 1406 }, { "epoch": 0.22912510686805357, "grad_norm": 0.5058342218399048, "learning_rate": 3.8171459576776995e-05, "loss": 0.4219, "step": 1407 }, { "epoch": 0.22928795342588446, "grad_norm": 0.45631664991378784, "learning_rate": 3.819858925664677e-05, "loss": 0.3825, "step": 1408 }, { "epoch": 0.22945079998371534, "grad_norm": 0.4801349937915802, "learning_rate": 3.8225718936516555e-05, "loss": 0.3893, "step": 1409 }, { "epoch": 0.22961364654154623, "grad_norm": 0.4777928590774536, "learning_rate": 3.825284861638633e-05, "loss": 0.419, "step": 1410 }, { "epoch": 0.22977649309937712, "grad_norm": 0.5110885500907898, "learning_rate": 3.827997829625611e-05, "loss": 0.3609, "step": 1411 }, { "epoch": 0.229939339657208, "grad_norm": 0.4289621412754059, "learning_rate": 3.830710797612588e-05, "loss": 0.3755, "step": 1412 }, { "epoch": 0.2301021862150389, "grad_norm": 0.5332009792327881, "learning_rate": 3.833423765599566e-05, "loss": 0.3902, "step": 1413 }, { "epoch": 0.23026503277286978, "grad_norm": 0.4157821238040924, "learning_rate": 3.836136733586544e-05, "loss": 0.3461, "step": 1414 }, { "epoch": 0.23042787933070064, "grad_norm": 0.48931238055229187, "learning_rate": 3.8388497015735214e-05, "loss": 0.3516, "step": 1415 }, { "epoch": 0.23059072588853152, "grad_norm": 0.48254871368408203, "learning_rate": 3.8415626695604994e-05, "loss": 0.3831, "step": 1416 }, { "epoch": 0.2307535724463624, "grad_norm": 0.475296288728714, "learning_rate": 3.844275637547477e-05, "loss": 0.3796, "step": 1417 }, { "epoch": 0.2309164190041933, "grad_norm": 0.57911616563797, "learning_rate": 3.846988605534455e-05, "loss": 0.4292, "step": 1418 }, { "epoch": 0.23107926556202418, "grad_norm": 0.4155750572681427, "learning_rate": 3.849701573521433e-05, "loss": 0.3701, "step": 1419 }, { "epoch": 0.23124211211985507, "grad_norm": 0.6133988499641418, "learning_rate": 3.852414541508411e-05, "loss": 0.3997, "step": 1420 }, { "epoch": 0.23140495867768596, "grad_norm": 0.4456629753112793, "learning_rate": 3.855127509495388e-05, "loss": 0.3425, "step": 1421 }, { "epoch": 0.23156780523551684, "grad_norm": 0.5390467047691345, "learning_rate": 3.8578404774823654e-05, "loss": 0.3751, "step": 1422 }, { "epoch": 0.23173065179334773, "grad_norm": 0.46109068393707275, "learning_rate": 3.860553445469344e-05, "loss": 0.3551, "step": 1423 }, { "epoch": 0.2318934983511786, "grad_norm": 0.4797053635120392, "learning_rate": 3.863266413456321e-05, "loss": 0.3398, "step": 1424 }, { "epoch": 0.23205634490900948, "grad_norm": 0.45783987641334534, "learning_rate": 3.865979381443299e-05, "loss": 0.4078, "step": 1425 }, { "epoch": 0.23221919146684036, "grad_norm": 0.554729700088501, "learning_rate": 3.8686923494302766e-05, "loss": 0.3828, "step": 1426 }, { "epoch": 0.23238203802467125, "grad_norm": 0.5175896883010864, "learning_rate": 3.8714053174172546e-05, "loss": 0.3948, "step": 1427 }, { "epoch": 0.23254488458250214, "grad_norm": 0.4845796823501587, "learning_rate": 3.8741182854042326e-05, "loss": 0.4157, "step": 1428 }, { "epoch": 0.23270773114033302, "grad_norm": 0.4461766481399536, "learning_rate": 3.87683125339121e-05, "loss": 0.3925, "step": 1429 }, { "epoch": 0.2328705776981639, "grad_norm": 0.4927656054496765, "learning_rate": 3.879544221378188e-05, "loss": 0.3995, "step": 1430 }, { "epoch": 0.2330334242559948, "grad_norm": 0.39425423741340637, "learning_rate": 3.882257189365165e-05, "loss": 0.376, "step": 1431 }, { "epoch": 0.23319627081382568, "grad_norm": 0.47831106185913086, "learning_rate": 3.884970157352143e-05, "loss": 0.3676, "step": 1432 }, { "epoch": 0.23335911737165654, "grad_norm": 0.4138374626636505, "learning_rate": 3.887683125339121e-05, "loss": 0.3641, "step": 1433 }, { "epoch": 0.23352196392948743, "grad_norm": 0.40474963188171387, "learning_rate": 3.890396093326099e-05, "loss": 0.3264, "step": 1434 }, { "epoch": 0.23368481048731832, "grad_norm": 0.47307515144348145, "learning_rate": 3.8931090613130765e-05, "loss": 0.3686, "step": 1435 }, { "epoch": 0.2338476570451492, "grad_norm": 0.43733271956443787, "learning_rate": 3.8958220293000545e-05, "loss": 0.4096, "step": 1436 }, { "epoch": 0.2340105036029801, "grad_norm": 0.5141897201538086, "learning_rate": 3.8985349972870325e-05, "loss": 0.4202, "step": 1437 }, { "epoch": 0.23417335016081098, "grad_norm": 0.4445722699165344, "learning_rate": 3.90124796527401e-05, "loss": 0.3604, "step": 1438 }, { "epoch": 0.23433619671864186, "grad_norm": 0.43281567096710205, "learning_rate": 3.903960933260988e-05, "loss": 0.3906, "step": 1439 }, { "epoch": 0.23449904327647275, "grad_norm": 0.5179872512817383, "learning_rate": 3.906673901247965e-05, "loss": 0.413, "step": 1440 }, { "epoch": 0.23466188983430364, "grad_norm": 0.4548695385456085, "learning_rate": 3.909386869234943e-05, "loss": 0.3749, "step": 1441 }, { "epoch": 0.2348247363921345, "grad_norm": 0.42756175994873047, "learning_rate": 3.912099837221921e-05, "loss": 0.3954, "step": 1442 }, { "epoch": 0.23498758294996538, "grad_norm": 0.44595572352409363, "learning_rate": 3.914812805208899e-05, "loss": 0.377, "step": 1443 }, { "epoch": 0.23515042950779627, "grad_norm": 0.470281183719635, "learning_rate": 3.9175257731958764e-05, "loss": 0.4076, "step": 1444 }, { "epoch": 0.23531327606562716, "grad_norm": 0.3904097080230713, "learning_rate": 3.9202387411828544e-05, "loss": 0.3899, "step": 1445 }, { "epoch": 0.23547612262345805, "grad_norm": 0.4134519100189209, "learning_rate": 3.922951709169832e-05, "loss": 0.4161, "step": 1446 }, { "epoch": 0.23563896918128893, "grad_norm": 0.48975276947021484, "learning_rate": 3.92566467715681e-05, "loss": 0.3677, "step": 1447 }, { "epoch": 0.23580181573911982, "grad_norm": 0.4530782103538513, "learning_rate": 3.928377645143788e-05, "loss": 0.4084, "step": 1448 }, { "epoch": 0.2359646622969507, "grad_norm": 0.3890145719051361, "learning_rate": 3.931090613130765e-05, "loss": 0.3916, "step": 1449 }, { "epoch": 0.2361275088547816, "grad_norm": 0.5726819634437561, "learning_rate": 3.933803581117743e-05, "loss": 0.4313, "step": 1450 }, { "epoch": 0.23629035541261248, "grad_norm": 0.4177243113517761, "learning_rate": 3.93651654910472e-05, "loss": 0.3773, "step": 1451 }, { "epoch": 0.23645320197044334, "grad_norm": 0.49986764788627625, "learning_rate": 3.939229517091699e-05, "loss": 0.3703, "step": 1452 }, { "epoch": 0.23661604852827423, "grad_norm": 0.4718853533267975, "learning_rate": 3.941942485078676e-05, "loss": 0.369, "step": 1453 }, { "epoch": 0.2367788950861051, "grad_norm": 0.4346357583999634, "learning_rate": 3.9446554530656536e-05, "loss": 0.4158, "step": 1454 }, { "epoch": 0.236941741643936, "grad_norm": 0.5215493440628052, "learning_rate": 3.9473684210526316e-05, "loss": 0.3916, "step": 1455 }, { "epoch": 0.23710458820176689, "grad_norm": 0.47284582257270813, "learning_rate": 3.9500813890396096e-05, "loss": 0.3818, "step": 1456 }, { "epoch": 0.23726743475959777, "grad_norm": 0.4899005591869354, "learning_rate": 3.9527943570265876e-05, "loss": 0.4301, "step": 1457 }, { "epoch": 0.23743028131742866, "grad_norm": 0.48262324929237366, "learning_rate": 3.955507325013565e-05, "loss": 0.4003, "step": 1458 }, { "epoch": 0.23759312787525955, "grad_norm": 0.4606136679649353, "learning_rate": 3.958220293000543e-05, "loss": 0.4144, "step": 1459 }, { "epoch": 0.23775597443309043, "grad_norm": 0.4842362403869629, "learning_rate": 3.96093326098752e-05, "loss": 0.3442, "step": 1460 }, { "epoch": 0.2379188209909213, "grad_norm": 0.6288771033287048, "learning_rate": 3.963646228974498e-05, "loss": 0.4038, "step": 1461 }, { "epoch": 0.23808166754875218, "grad_norm": 0.4862722158432007, "learning_rate": 3.966359196961476e-05, "loss": 0.3861, "step": 1462 }, { "epoch": 0.23824451410658307, "grad_norm": 0.416065514087677, "learning_rate": 3.9690721649484535e-05, "loss": 0.3677, "step": 1463 }, { "epoch": 0.23840736066441395, "grad_norm": 0.4702129065990448, "learning_rate": 3.9717851329354315e-05, "loss": 0.3851, "step": 1464 }, { "epoch": 0.23857020722224484, "grad_norm": 0.4967953562736511, "learning_rate": 3.974498100922409e-05, "loss": 0.3829, "step": 1465 }, { "epoch": 0.23873305378007573, "grad_norm": 0.45691385865211487, "learning_rate": 3.9772110689093875e-05, "loss": 0.392, "step": 1466 }, { "epoch": 0.2388959003379066, "grad_norm": 0.41693976521492004, "learning_rate": 3.979924036896365e-05, "loss": 0.3317, "step": 1467 }, { "epoch": 0.2390587468957375, "grad_norm": 0.44892817735671997, "learning_rate": 3.982637004883343e-05, "loss": 0.353, "step": 1468 }, { "epoch": 0.2392215934535684, "grad_norm": 0.5127306580543518, "learning_rate": 3.98534997287032e-05, "loss": 0.3502, "step": 1469 }, { "epoch": 0.23938444001139925, "grad_norm": 0.49173593521118164, "learning_rate": 3.988062940857298e-05, "loss": 0.3887, "step": 1470 }, { "epoch": 0.23954728656923013, "grad_norm": 0.40639010071754456, "learning_rate": 3.990775908844276e-05, "loss": 0.3689, "step": 1471 }, { "epoch": 0.23971013312706102, "grad_norm": 0.4353596568107605, "learning_rate": 3.9934888768312534e-05, "loss": 0.3772, "step": 1472 }, { "epoch": 0.2398729796848919, "grad_norm": 0.45274606347084045, "learning_rate": 3.9962018448182314e-05, "loss": 0.3793, "step": 1473 }, { "epoch": 0.2400358262427228, "grad_norm": 0.48914241790771484, "learning_rate": 3.998914812805209e-05, "loss": 0.3374, "step": 1474 }, { "epoch": 0.24019867280055368, "grad_norm": 0.4706673324108124, "learning_rate": 4.001627780792187e-05, "loss": 0.3938, "step": 1475 }, { "epoch": 0.24036151935838457, "grad_norm": 0.4479151964187622, "learning_rate": 4.004340748779165e-05, "loss": 0.3964, "step": 1476 }, { "epoch": 0.24052436591621545, "grad_norm": 0.4749910533428192, "learning_rate": 4.007053716766143e-05, "loss": 0.3781, "step": 1477 }, { "epoch": 0.24068721247404634, "grad_norm": 0.44461214542388916, "learning_rate": 4.00976668475312e-05, "loss": 0.4207, "step": 1478 }, { "epoch": 0.2408500590318772, "grad_norm": 0.3971055746078491, "learning_rate": 4.012479652740097e-05, "loss": 0.3486, "step": 1479 }, { "epoch": 0.2410129055897081, "grad_norm": 0.455963671207428, "learning_rate": 4.015192620727076e-05, "loss": 0.3666, "step": 1480 }, { "epoch": 0.24117575214753897, "grad_norm": 0.5012855529785156, "learning_rate": 4.017905588714053e-05, "loss": 0.3811, "step": 1481 }, { "epoch": 0.24133859870536986, "grad_norm": 0.42261794209480286, "learning_rate": 4.020618556701031e-05, "loss": 0.4255, "step": 1482 }, { "epoch": 0.24150144526320075, "grad_norm": 0.4648958742618561, "learning_rate": 4.0233315246880086e-05, "loss": 0.4168, "step": 1483 }, { "epoch": 0.24166429182103163, "grad_norm": 0.4486434757709503, "learning_rate": 4.0260444926749866e-05, "loss": 0.4141, "step": 1484 }, { "epoch": 0.24182713837886252, "grad_norm": 0.45167699456214905, "learning_rate": 4.0287574606619646e-05, "loss": 0.446, "step": 1485 }, { "epoch": 0.2419899849366934, "grad_norm": 0.42774659395217896, "learning_rate": 4.031470428648942e-05, "loss": 0.456, "step": 1486 }, { "epoch": 0.2421528314945243, "grad_norm": 0.4026424288749695, "learning_rate": 4.03418339663592e-05, "loss": 0.3733, "step": 1487 }, { "epoch": 0.24231567805235515, "grad_norm": 0.4605429470539093, "learning_rate": 4.036896364622897e-05, "loss": 0.3942, "step": 1488 }, { "epoch": 0.24247852461018604, "grad_norm": 0.520420253276825, "learning_rate": 4.039609332609875e-05, "loss": 0.4405, "step": 1489 }, { "epoch": 0.24264137116801693, "grad_norm": 0.5544473528862, "learning_rate": 4.042322300596853e-05, "loss": 0.4341, "step": 1490 }, { "epoch": 0.24280421772584782, "grad_norm": 0.43170955777168274, "learning_rate": 4.045035268583831e-05, "loss": 0.3779, "step": 1491 }, { "epoch": 0.2429670642836787, "grad_norm": 0.46771013736724854, "learning_rate": 4.0477482365708085e-05, "loss": 0.3982, "step": 1492 }, { "epoch": 0.2431299108415096, "grad_norm": 0.5225098133087158, "learning_rate": 4.0504612045577865e-05, "loss": 0.4289, "step": 1493 }, { "epoch": 0.24329275739934048, "grad_norm": 0.4269978404045105, "learning_rate": 4.0531741725447645e-05, "loss": 0.3831, "step": 1494 }, { "epoch": 0.24345560395717136, "grad_norm": 0.4119569659233093, "learning_rate": 4.055887140531742e-05, "loss": 0.3726, "step": 1495 }, { "epoch": 0.24361845051500225, "grad_norm": 0.5088585019111633, "learning_rate": 4.05860010851872e-05, "loss": 0.4238, "step": 1496 }, { "epoch": 0.24378129707283314, "grad_norm": 0.39956575632095337, "learning_rate": 4.061313076505697e-05, "loss": 0.3719, "step": 1497 }, { "epoch": 0.243944143630664, "grad_norm": 0.4730818569660187, "learning_rate": 4.064026044492675e-05, "loss": 0.3694, "step": 1498 }, { "epoch": 0.24410699018849488, "grad_norm": 0.39523112773895264, "learning_rate": 4.066739012479653e-05, "loss": 0.383, "step": 1499 }, { "epoch": 0.24426983674632577, "grad_norm": 0.45621001720428467, "learning_rate": 4.069451980466631e-05, "loss": 0.3798, "step": 1500 }, { "epoch": 0.24443268330415666, "grad_norm": 0.4243468940258026, "learning_rate": 4.0721649484536084e-05, "loss": 0.4267, "step": 1501 }, { "epoch": 0.24459552986198754, "grad_norm": 0.3791022002696991, "learning_rate": 4.0748779164405864e-05, "loss": 0.3986, "step": 1502 }, { "epoch": 0.24475837641981843, "grad_norm": 0.4298471510410309, "learning_rate": 4.077590884427564e-05, "loss": 0.3851, "step": 1503 }, { "epoch": 0.24492122297764932, "grad_norm": 0.42848560214042664, "learning_rate": 4.080303852414542e-05, "loss": 0.3944, "step": 1504 }, { "epoch": 0.2450840695354802, "grad_norm": 0.5205062031745911, "learning_rate": 4.08301682040152e-05, "loss": 0.3915, "step": 1505 }, { "epoch": 0.2452469160933111, "grad_norm": 0.46349143981933594, "learning_rate": 4.085729788388497e-05, "loss": 0.3965, "step": 1506 }, { "epoch": 0.24540976265114195, "grad_norm": 0.49134185910224915, "learning_rate": 4.088442756375475e-05, "loss": 0.4071, "step": 1507 }, { "epoch": 0.24557260920897284, "grad_norm": 0.5615836381912231, "learning_rate": 4.091155724362452e-05, "loss": 0.4076, "step": 1508 }, { "epoch": 0.24573545576680372, "grad_norm": 0.45780253410339355, "learning_rate": 4.093868692349431e-05, "loss": 0.3982, "step": 1509 }, { "epoch": 0.2458983023246346, "grad_norm": 0.5046116709709167, "learning_rate": 4.096581660336408e-05, "loss": 0.3992, "step": 1510 }, { "epoch": 0.2460611488824655, "grad_norm": 0.5901451110839844, "learning_rate": 4.0992946283233856e-05, "loss": 0.3844, "step": 1511 }, { "epoch": 0.24622399544029638, "grad_norm": 0.5604932308197021, "learning_rate": 4.1020075963103636e-05, "loss": 0.3785, "step": 1512 }, { "epoch": 0.24638684199812727, "grad_norm": 0.581439197063446, "learning_rate": 4.1047205642973416e-05, "loss": 0.4229, "step": 1513 }, { "epoch": 0.24654968855595816, "grad_norm": 0.42706528306007385, "learning_rate": 4.1074335322843196e-05, "loss": 0.3735, "step": 1514 }, { "epoch": 0.24671253511378904, "grad_norm": 0.4850426912307739, "learning_rate": 4.110146500271297e-05, "loss": 0.375, "step": 1515 }, { "epoch": 0.2468753816716199, "grad_norm": 0.5428923964500427, "learning_rate": 4.112859468258275e-05, "loss": 0.4381, "step": 1516 }, { "epoch": 0.2470382282294508, "grad_norm": 0.43760061264038086, "learning_rate": 4.115572436245252e-05, "loss": 0.4164, "step": 1517 }, { "epoch": 0.24720107478728168, "grad_norm": 0.5254872441291809, "learning_rate": 4.11828540423223e-05, "loss": 0.4123, "step": 1518 }, { "epoch": 0.24736392134511256, "grad_norm": 0.4868192970752716, "learning_rate": 4.120998372219208e-05, "loss": 0.423, "step": 1519 }, { "epoch": 0.24752676790294345, "grad_norm": 0.4339751601219177, "learning_rate": 4.1237113402061855e-05, "loss": 0.4114, "step": 1520 }, { "epoch": 0.24768961446077434, "grad_norm": 0.5099534392356873, "learning_rate": 4.1264243081931635e-05, "loss": 0.3624, "step": 1521 }, { "epoch": 0.24785246101860522, "grad_norm": 0.5719138979911804, "learning_rate": 4.129137276180141e-05, "loss": 0.3981, "step": 1522 }, { "epoch": 0.2480153075764361, "grad_norm": 0.531113862991333, "learning_rate": 4.1318502441671195e-05, "loss": 0.3762, "step": 1523 }, { "epoch": 0.248178154134267, "grad_norm": 0.4038475453853607, "learning_rate": 4.134563212154097e-05, "loss": 0.3583, "step": 1524 }, { "epoch": 0.24834100069209786, "grad_norm": 0.5001990795135498, "learning_rate": 4.137276180141075e-05, "loss": 0.374, "step": 1525 }, { "epoch": 0.24850384724992874, "grad_norm": 0.5595395565032959, "learning_rate": 4.139989148128052e-05, "loss": 0.4168, "step": 1526 }, { "epoch": 0.24866669380775963, "grad_norm": 0.46058976650238037, "learning_rate": 4.14270211611503e-05, "loss": 0.3502, "step": 1527 }, { "epoch": 0.24882954036559052, "grad_norm": 0.891063928604126, "learning_rate": 4.145415084102008e-05, "loss": 0.4149, "step": 1528 }, { "epoch": 0.2489923869234214, "grad_norm": 0.5233928561210632, "learning_rate": 4.1481280520889854e-05, "loss": 0.4202, "step": 1529 }, { "epoch": 0.2491552334812523, "grad_norm": 0.5267454981803894, "learning_rate": 4.1508410200759634e-05, "loss": 0.3699, "step": 1530 }, { "epoch": 0.24931808003908318, "grad_norm": 0.4335690438747406, "learning_rate": 4.153553988062941e-05, "loss": 0.388, "step": 1531 }, { "epoch": 0.24948092659691407, "grad_norm": 0.4105895757675171, "learning_rate": 4.156266956049919e-05, "loss": 0.3507, "step": 1532 }, { "epoch": 0.24964377315474495, "grad_norm": 0.4485861659049988, "learning_rate": 4.158979924036897e-05, "loss": 0.355, "step": 1533 }, { "epoch": 0.24980661971257584, "grad_norm": 0.49610909819602966, "learning_rate": 4.161692892023875e-05, "loss": 0.3683, "step": 1534 }, { "epoch": 0.2499694662704067, "grad_norm": 0.5439954400062561, "learning_rate": 4.164405860010852e-05, "loss": 0.4132, "step": 1535 }, { "epoch": 0.2501323128282376, "grad_norm": 0.5285641551017761, "learning_rate": 4.167118827997829e-05, "loss": 0.3947, "step": 1536 }, { "epoch": 0.2502951593860685, "grad_norm": 0.5387890338897705, "learning_rate": 4.169831795984808e-05, "loss": 0.4204, "step": 1537 }, { "epoch": 0.2504580059438994, "grad_norm": 0.5102218389511108, "learning_rate": 4.172544763971785e-05, "loss": 0.4173, "step": 1538 }, { "epoch": 0.2506208525017302, "grad_norm": 0.5127475261688232, "learning_rate": 4.175257731958763e-05, "loss": 0.3885, "step": 1539 }, { "epoch": 0.2507836990595611, "grad_norm": 0.5970035195350647, "learning_rate": 4.1779706999457406e-05, "loss": 0.4386, "step": 1540 }, { "epoch": 0.250946545617392, "grad_norm": 0.5387725830078125, "learning_rate": 4.1806836679327186e-05, "loss": 0.4178, "step": 1541 }, { "epoch": 0.2511093921752229, "grad_norm": 0.5805031061172485, "learning_rate": 4.1833966359196966e-05, "loss": 0.4167, "step": 1542 }, { "epoch": 0.25127223873305377, "grad_norm": 0.49853023886680603, "learning_rate": 4.186109603906674e-05, "loss": 0.421, "step": 1543 }, { "epoch": 0.25143508529088465, "grad_norm": 0.6297460794448853, "learning_rate": 4.188822571893652e-05, "loss": 0.3627, "step": 1544 }, { "epoch": 0.25159793184871554, "grad_norm": 0.5320362448692322, "learning_rate": 4.191535539880629e-05, "loss": 0.4077, "step": 1545 }, { "epoch": 0.2517607784065464, "grad_norm": 0.6114585399627686, "learning_rate": 4.194248507867607e-05, "loss": 0.419, "step": 1546 }, { "epoch": 0.2519236249643773, "grad_norm": 0.630975604057312, "learning_rate": 4.196961475854585e-05, "loss": 0.4008, "step": 1547 }, { "epoch": 0.2520864715222082, "grad_norm": 0.4646144211292267, "learning_rate": 4.199674443841563e-05, "loss": 0.4209, "step": 1548 }, { "epoch": 0.2522493180800391, "grad_norm": 0.524515688419342, "learning_rate": 4.2023874118285405e-05, "loss": 0.3579, "step": 1549 }, { "epoch": 0.25241216463787, "grad_norm": 0.5801146030426025, "learning_rate": 4.2051003798155185e-05, "loss": 0.3758, "step": 1550 }, { "epoch": 0.25257501119570086, "grad_norm": 0.4425656497478485, "learning_rate": 4.2078133478024965e-05, "loss": 0.3662, "step": 1551 }, { "epoch": 0.25273785775353175, "grad_norm": 0.4453093111515045, "learning_rate": 4.210526315789474e-05, "loss": 0.3909, "step": 1552 }, { "epoch": 0.25290070431136263, "grad_norm": 0.5507324934005737, "learning_rate": 4.213239283776452e-05, "loss": 0.4382, "step": 1553 }, { "epoch": 0.2530635508691935, "grad_norm": 0.4274618327617645, "learning_rate": 4.215952251763429e-05, "loss": 0.3407, "step": 1554 }, { "epoch": 0.2532263974270244, "grad_norm": 0.5082084536552429, "learning_rate": 4.218665219750407e-05, "loss": 0.4105, "step": 1555 }, { "epoch": 0.2533892439848553, "grad_norm": 0.4634896218776703, "learning_rate": 4.221378187737385e-05, "loss": 0.3498, "step": 1556 }, { "epoch": 0.2535520905426862, "grad_norm": 0.5805667042732239, "learning_rate": 4.224091155724363e-05, "loss": 0.3761, "step": 1557 }, { "epoch": 0.253714937100517, "grad_norm": 0.607479453086853, "learning_rate": 4.2268041237113404e-05, "loss": 0.4512, "step": 1558 }, { "epoch": 0.2538777836583479, "grad_norm": 0.482871949672699, "learning_rate": 4.229517091698318e-05, "loss": 0.3589, "step": 1559 }, { "epoch": 0.2540406302161788, "grad_norm": 0.46907639503479004, "learning_rate": 4.232230059685296e-05, "loss": 0.3735, "step": 1560 }, { "epoch": 0.2542034767740097, "grad_norm": 0.4859382212162018, "learning_rate": 4.234943027672274e-05, "loss": 0.3366, "step": 1561 }, { "epoch": 0.25436632333184056, "grad_norm": 0.5949024558067322, "learning_rate": 4.237655995659252e-05, "loss": 0.4001, "step": 1562 }, { "epoch": 0.25452916988967145, "grad_norm": 0.5089821219444275, "learning_rate": 4.240368963646229e-05, "loss": 0.3766, "step": 1563 }, { "epoch": 0.25469201644750233, "grad_norm": 0.45543456077575684, "learning_rate": 4.243081931633207e-05, "loss": 0.3953, "step": 1564 }, { "epoch": 0.2548548630053332, "grad_norm": 0.6127356290817261, "learning_rate": 4.245794899620184e-05, "loss": 0.3862, "step": 1565 }, { "epoch": 0.2550177095631641, "grad_norm": 0.492351233959198, "learning_rate": 4.248507867607163e-05, "loss": 0.3965, "step": 1566 }, { "epoch": 0.255180556120995, "grad_norm": 0.5263499021530151, "learning_rate": 4.25122083559414e-05, "loss": 0.4073, "step": 1567 }, { "epoch": 0.2553434026788259, "grad_norm": 0.5590935349464417, "learning_rate": 4.2539338035811176e-05, "loss": 0.412, "step": 1568 }, { "epoch": 0.25550624923665677, "grad_norm": 0.5176772475242615, "learning_rate": 4.2566467715680956e-05, "loss": 0.3788, "step": 1569 }, { "epoch": 0.25566909579448766, "grad_norm": 0.5144397020339966, "learning_rate": 4.2593597395550736e-05, "loss": 0.4761, "step": 1570 }, { "epoch": 0.25583194235231854, "grad_norm": 0.5862990021705627, "learning_rate": 4.2620727075420516e-05, "loss": 0.3945, "step": 1571 }, { "epoch": 0.25599478891014943, "grad_norm": 0.5174688100814819, "learning_rate": 4.264785675529029e-05, "loss": 0.4019, "step": 1572 }, { "epoch": 0.2561576354679803, "grad_norm": 0.49961477518081665, "learning_rate": 4.267498643516007e-05, "loss": 0.4316, "step": 1573 }, { "epoch": 0.2563204820258112, "grad_norm": 0.5329394936561584, "learning_rate": 4.270211611502984e-05, "loss": 0.3875, "step": 1574 }, { "epoch": 0.2564833285836421, "grad_norm": 0.46020716428756714, "learning_rate": 4.272924579489962e-05, "loss": 0.3929, "step": 1575 }, { "epoch": 0.2566461751414729, "grad_norm": 0.3870908319950104, "learning_rate": 4.27563754747694e-05, "loss": 0.3801, "step": 1576 }, { "epoch": 0.2568090216993038, "grad_norm": 0.4179229438304901, "learning_rate": 4.2783505154639175e-05, "loss": 0.3445, "step": 1577 }, { "epoch": 0.2569718682571347, "grad_norm": 0.5455760955810547, "learning_rate": 4.2810634834508955e-05, "loss": 0.4261, "step": 1578 }, { "epoch": 0.2571347148149656, "grad_norm": 0.48829910159111023, "learning_rate": 4.283776451437873e-05, "loss": 0.3909, "step": 1579 }, { "epoch": 0.25729756137279647, "grad_norm": 0.5189917683601379, "learning_rate": 4.2864894194248514e-05, "loss": 0.3797, "step": 1580 }, { "epoch": 0.25746040793062736, "grad_norm": 0.5123583078384399, "learning_rate": 4.289202387411829e-05, "loss": 0.4371, "step": 1581 }, { "epoch": 0.25762325448845824, "grad_norm": 0.490360289812088, "learning_rate": 4.291915355398807e-05, "loss": 0.4021, "step": 1582 }, { "epoch": 0.25778610104628913, "grad_norm": 0.5002934336662292, "learning_rate": 4.294628323385784e-05, "loss": 0.389, "step": 1583 }, { "epoch": 0.25794894760412, "grad_norm": 0.48437556624412537, "learning_rate": 4.297341291372762e-05, "loss": 0.4489, "step": 1584 }, { "epoch": 0.2581117941619509, "grad_norm": 0.5588347315788269, "learning_rate": 4.30005425935974e-05, "loss": 0.4322, "step": 1585 }, { "epoch": 0.2582746407197818, "grad_norm": 0.4733457863330841, "learning_rate": 4.3027672273467174e-05, "loss": 0.386, "step": 1586 }, { "epoch": 0.2584374872776127, "grad_norm": 0.45215466618537903, "learning_rate": 4.3054801953336954e-05, "loss": 0.4376, "step": 1587 }, { "epoch": 0.25860033383544356, "grad_norm": 0.5019808411598206, "learning_rate": 4.308193163320673e-05, "loss": 0.4197, "step": 1588 }, { "epoch": 0.25876318039327445, "grad_norm": 0.33711352944374084, "learning_rate": 4.310906131307651e-05, "loss": 0.3534, "step": 1589 }, { "epoch": 0.25892602695110534, "grad_norm": 0.48949912190437317, "learning_rate": 4.3136190992946287e-05, "loss": 0.3615, "step": 1590 }, { "epoch": 0.2590888735089362, "grad_norm": 0.4758036732673645, "learning_rate": 4.3163320672816066e-05, "loss": 0.3991, "step": 1591 }, { "epoch": 0.2592517200667671, "grad_norm": 0.40285083651542664, "learning_rate": 4.319045035268584e-05, "loss": 0.3951, "step": 1592 }, { "epoch": 0.259414566624598, "grad_norm": 0.4593518078327179, "learning_rate": 4.321758003255561e-05, "loss": 0.4036, "step": 1593 }, { "epoch": 0.25957741318242883, "grad_norm": 0.41057875752449036, "learning_rate": 4.32447097124254e-05, "loss": 0.3367, "step": 1594 }, { "epoch": 0.2597402597402597, "grad_norm": 0.43845027685165405, "learning_rate": 4.327183939229517e-05, "loss": 0.3745, "step": 1595 }, { "epoch": 0.2599031062980906, "grad_norm": 0.4727671444416046, "learning_rate": 4.329896907216495e-05, "loss": 0.4105, "step": 1596 }, { "epoch": 0.2600659528559215, "grad_norm": 0.5697853565216064, "learning_rate": 4.3326098752034726e-05, "loss": 0.4077, "step": 1597 }, { "epoch": 0.2602287994137524, "grad_norm": 0.4669637084007263, "learning_rate": 4.3353228431904506e-05, "loss": 0.3525, "step": 1598 }, { "epoch": 0.26039164597158326, "grad_norm": 0.5378132462501526, "learning_rate": 4.3380358111774285e-05, "loss": 0.4062, "step": 1599 }, { "epoch": 0.26055449252941415, "grad_norm": 0.5440139770507812, "learning_rate": 4.340748779164406e-05, "loss": 0.383, "step": 1600 }, { "epoch": 0.26071733908724504, "grad_norm": 0.6262266635894775, "learning_rate": 4.343461747151384e-05, "loss": 0.4059, "step": 1601 }, { "epoch": 0.2608801856450759, "grad_norm": 0.5336539149284363, "learning_rate": 4.346174715138361e-05, "loss": 0.3894, "step": 1602 }, { "epoch": 0.2610430322029068, "grad_norm": 0.47455915808677673, "learning_rate": 4.348887683125339e-05, "loss": 0.4008, "step": 1603 }, { "epoch": 0.2612058787607377, "grad_norm": 0.5336470603942871, "learning_rate": 4.351600651112317e-05, "loss": 0.397, "step": 1604 }, { "epoch": 0.2613687253185686, "grad_norm": 0.49499136209487915, "learning_rate": 4.354313619099295e-05, "loss": 0.4205, "step": 1605 }, { "epoch": 0.26153157187639947, "grad_norm": 0.38649728894233704, "learning_rate": 4.3570265870862725e-05, "loss": 0.367, "step": 1606 }, { "epoch": 0.26169441843423036, "grad_norm": 0.42609772086143494, "learning_rate": 4.3597395550732504e-05, "loss": 0.3846, "step": 1607 }, { "epoch": 0.26185726499206124, "grad_norm": 0.5341659188270569, "learning_rate": 4.3624525230602284e-05, "loss": 0.3805, "step": 1608 }, { "epoch": 0.26202011154989213, "grad_norm": 0.3808041214942932, "learning_rate": 4.365165491047206e-05, "loss": 0.3702, "step": 1609 }, { "epoch": 0.262182958107723, "grad_norm": 0.424691379070282, "learning_rate": 4.367878459034184e-05, "loss": 0.4004, "step": 1610 }, { "epoch": 0.2623458046655539, "grad_norm": 0.46790915727615356, "learning_rate": 4.370591427021161e-05, "loss": 0.3643, "step": 1611 }, { "epoch": 0.2625086512233848, "grad_norm": 0.4698648750782013, "learning_rate": 4.373304395008139e-05, "loss": 0.4377, "step": 1612 }, { "epoch": 0.2626714977812156, "grad_norm": 0.4398749768733978, "learning_rate": 4.376017362995117e-05, "loss": 0.3888, "step": 1613 }, { "epoch": 0.2628343443390465, "grad_norm": 0.44916826486587524, "learning_rate": 4.378730330982095e-05, "loss": 0.3985, "step": 1614 }, { "epoch": 0.2629971908968774, "grad_norm": 0.38424649834632874, "learning_rate": 4.3814432989690723e-05, "loss": 0.3297, "step": 1615 }, { "epoch": 0.2631600374547083, "grad_norm": 0.4924272298812866, "learning_rate": 4.3841562669560497e-05, "loss": 0.4691, "step": 1616 }, { "epoch": 0.26332288401253917, "grad_norm": 0.43181318044662476, "learning_rate": 4.3868692349430277e-05, "loss": 0.3719, "step": 1617 }, { "epoch": 0.26348573057037006, "grad_norm": 0.4025484323501587, "learning_rate": 4.3895822029300056e-05, "loss": 0.3445, "step": 1618 }, { "epoch": 0.26364857712820094, "grad_norm": 0.47578921914100647, "learning_rate": 4.3922951709169836e-05, "loss": 0.402, "step": 1619 }, { "epoch": 0.26381142368603183, "grad_norm": 0.5110929012298584, "learning_rate": 4.395008138903961e-05, "loss": 0.4047, "step": 1620 }, { "epoch": 0.2639742702438627, "grad_norm": 0.468372642993927, "learning_rate": 4.397721106890939e-05, "loss": 0.4156, "step": 1621 }, { "epoch": 0.2641371168016936, "grad_norm": 0.4447181522846222, "learning_rate": 4.400434074877916e-05, "loss": 0.4182, "step": 1622 }, { "epoch": 0.2642999633595245, "grad_norm": 0.4158351719379425, "learning_rate": 4.403147042864895e-05, "loss": 0.3952, "step": 1623 }, { "epoch": 0.2644628099173554, "grad_norm": 0.5255342125892639, "learning_rate": 4.405860010851872e-05, "loss": 0.4301, "step": 1624 }, { "epoch": 0.26462565647518627, "grad_norm": 0.5527597665786743, "learning_rate": 4.4085729788388496e-05, "loss": 0.4266, "step": 1625 }, { "epoch": 0.26478850303301715, "grad_norm": 0.5129051804542542, "learning_rate": 4.4112859468258275e-05, "loss": 0.3804, "step": 1626 }, { "epoch": 0.26495134959084804, "grad_norm": 0.4657805860042572, "learning_rate": 4.4139989148128055e-05, "loss": 0.4153, "step": 1627 }, { "epoch": 0.2651141961486789, "grad_norm": 0.4835071861743927, "learning_rate": 4.4167118827997835e-05, "loss": 0.4063, "step": 1628 }, { "epoch": 0.2652770427065098, "grad_norm": 0.5393379330635071, "learning_rate": 4.419424850786761e-05, "loss": 0.4104, "step": 1629 }, { "epoch": 0.2654398892643407, "grad_norm": 0.43249252438545227, "learning_rate": 4.422137818773739e-05, "loss": 0.3616, "step": 1630 }, { "epoch": 0.26560273582217153, "grad_norm": 0.48457396030426025, "learning_rate": 4.424850786760716e-05, "loss": 0.3828, "step": 1631 }, { "epoch": 0.2657655823800024, "grad_norm": 0.5276483297348022, "learning_rate": 4.427563754747694e-05, "loss": 0.4334, "step": 1632 }, { "epoch": 0.2659284289378333, "grad_norm": 0.5225568413734436, "learning_rate": 4.430276722734672e-05, "loss": 0.369, "step": 1633 }, { "epoch": 0.2660912754956642, "grad_norm": 0.5582985877990723, "learning_rate": 4.4329896907216494e-05, "loss": 0.4342, "step": 1634 }, { "epoch": 0.2662541220534951, "grad_norm": 0.5143191814422607, "learning_rate": 4.4357026587086274e-05, "loss": 0.4036, "step": 1635 }, { "epoch": 0.26641696861132597, "grad_norm": 0.43640562891960144, "learning_rate": 4.438415626695605e-05, "loss": 0.3644, "step": 1636 }, { "epoch": 0.26657981516915685, "grad_norm": 0.3854784071445465, "learning_rate": 4.4411285946825834e-05, "loss": 0.3814, "step": 1637 }, { "epoch": 0.26674266172698774, "grad_norm": 0.4762828052043915, "learning_rate": 4.443841562669561e-05, "loss": 0.3804, "step": 1638 }, { "epoch": 0.2669055082848186, "grad_norm": 0.4670063257217407, "learning_rate": 4.446554530656539e-05, "loss": 0.3447, "step": 1639 }, { "epoch": 0.2670683548426495, "grad_norm": 0.4383365511894226, "learning_rate": 4.449267498643516e-05, "loss": 0.3992, "step": 1640 }, { "epoch": 0.2672312014004804, "grad_norm": 0.4708484411239624, "learning_rate": 4.451980466630494e-05, "loss": 0.4042, "step": 1641 }, { "epoch": 0.2673940479583113, "grad_norm": 0.4535488784313202, "learning_rate": 4.454693434617472e-05, "loss": 0.4401, "step": 1642 }, { "epoch": 0.2675568945161422, "grad_norm": 0.47024983167648315, "learning_rate": 4.457406402604449e-05, "loss": 0.3821, "step": 1643 }, { "epoch": 0.26771974107397306, "grad_norm": 0.4239389896392822, "learning_rate": 4.460119370591427e-05, "loss": 0.3905, "step": 1644 }, { "epoch": 0.26788258763180395, "grad_norm": 0.4928305447101593, "learning_rate": 4.4628323385784046e-05, "loss": 0.4124, "step": 1645 }, { "epoch": 0.26804543418963483, "grad_norm": 0.39210543036460876, "learning_rate": 4.4655453065653826e-05, "loss": 0.3634, "step": 1646 }, { "epoch": 0.2682082807474657, "grad_norm": 0.42122480273246765, "learning_rate": 4.4682582745523606e-05, "loss": 0.394, "step": 1647 }, { "epoch": 0.2683711273052966, "grad_norm": 0.5478378534317017, "learning_rate": 4.4709712425393386e-05, "loss": 0.3902, "step": 1648 }, { "epoch": 0.2685339738631275, "grad_norm": 0.48060551285743713, "learning_rate": 4.473684210526316e-05, "loss": 0.4502, "step": 1649 }, { "epoch": 0.2686968204209583, "grad_norm": 0.3626094162464142, "learning_rate": 4.476397178513293e-05, "loss": 0.3506, "step": 1650 }, { "epoch": 0.2688596669787892, "grad_norm": 0.43767398595809937, "learning_rate": 4.479110146500272e-05, "loss": 0.3868, "step": 1651 }, { "epoch": 0.2690225135366201, "grad_norm": 0.3884507715702057, "learning_rate": 4.481823114487249e-05, "loss": 0.3709, "step": 1652 }, { "epoch": 0.269185360094451, "grad_norm": 0.3771425187587738, "learning_rate": 4.484536082474227e-05, "loss": 0.3665, "step": 1653 }, { "epoch": 0.2693482066522819, "grad_norm": 0.4341685175895691, "learning_rate": 4.4872490504612045e-05, "loss": 0.3956, "step": 1654 }, { "epoch": 0.26951105321011276, "grad_norm": 0.3898180425167084, "learning_rate": 4.4899620184481825e-05, "loss": 0.3499, "step": 1655 }, { "epoch": 0.26967389976794365, "grad_norm": 0.4826945662498474, "learning_rate": 4.4926749864351605e-05, "loss": 0.411, "step": 1656 }, { "epoch": 0.26983674632577453, "grad_norm": 0.5016355514526367, "learning_rate": 4.495387954422138e-05, "loss": 0.4197, "step": 1657 }, { "epoch": 0.2699995928836054, "grad_norm": 0.4381600320339203, "learning_rate": 4.498100922409116e-05, "loss": 0.3773, "step": 1658 }, { "epoch": 0.2701624394414363, "grad_norm": 0.4382671117782593, "learning_rate": 4.500813890396093e-05, "loss": 0.3863, "step": 1659 }, { "epoch": 0.2703252859992672, "grad_norm": 0.5374458432197571, "learning_rate": 4.503526858383071e-05, "loss": 0.457, "step": 1660 }, { "epoch": 0.2704881325570981, "grad_norm": 0.4670999348163605, "learning_rate": 4.506239826370049e-05, "loss": 0.3792, "step": 1661 }, { "epoch": 0.27065097911492897, "grad_norm": 0.5349238514900208, "learning_rate": 4.508952794357027e-05, "loss": 0.4067, "step": 1662 }, { "epoch": 0.27081382567275986, "grad_norm": 0.4595048129558563, "learning_rate": 4.5116657623440044e-05, "loss": 0.3917, "step": 1663 }, { "epoch": 0.27097667223059074, "grad_norm": 0.4838627576828003, "learning_rate": 4.5143787303309824e-05, "loss": 0.3489, "step": 1664 }, { "epoch": 0.27113951878842163, "grad_norm": 0.4881138503551483, "learning_rate": 4.5170916983179604e-05, "loss": 0.4399, "step": 1665 }, { "epoch": 0.2713023653462525, "grad_norm": 0.49793127179145813, "learning_rate": 4.519804666304938e-05, "loss": 0.3884, "step": 1666 }, { "epoch": 0.2714652119040834, "grad_norm": 0.4389955401420593, "learning_rate": 4.522517634291916e-05, "loss": 0.3688, "step": 1667 }, { "epoch": 0.27162805846191423, "grad_norm": 0.5258277654647827, "learning_rate": 4.525230602278893e-05, "loss": 0.3858, "step": 1668 }, { "epoch": 0.2717909050197451, "grad_norm": 0.5325718522071838, "learning_rate": 4.527943570265871e-05, "loss": 0.3849, "step": 1669 }, { "epoch": 0.271953751577576, "grad_norm": 0.4017789363861084, "learning_rate": 4.530656538252849e-05, "loss": 0.3819, "step": 1670 }, { "epoch": 0.2721165981354069, "grad_norm": 0.5126985907554626, "learning_rate": 4.533369506239827e-05, "loss": 0.3822, "step": 1671 }, { "epoch": 0.2722794446932378, "grad_norm": 0.5189812183380127, "learning_rate": 4.536082474226804e-05, "loss": 0.4126, "step": 1672 }, { "epoch": 0.27244229125106867, "grad_norm": 0.49127256870269775, "learning_rate": 4.5387954422137816e-05, "loss": 0.3956, "step": 1673 }, { "epoch": 0.27260513780889956, "grad_norm": 0.4219798147678375, "learning_rate": 4.5415084102007596e-05, "loss": 0.3614, "step": 1674 }, { "epoch": 0.27276798436673044, "grad_norm": 0.3918357789516449, "learning_rate": 4.5442213781877376e-05, "loss": 0.4013, "step": 1675 }, { "epoch": 0.27293083092456133, "grad_norm": 0.4251794219017029, "learning_rate": 4.5469343461747156e-05, "loss": 0.3805, "step": 1676 }, { "epoch": 0.2730936774823922, "grad_norm": 0.4265967011451721, "learning_rate": 4.549647314161693e-05, "loss": 0.3798, "step": 1677 }, { "epoch": 0.2732565240402231, "grad_norm": 0.4179389178752899, "learning_rate": 4.552360282148671e-05, "loss": 0.3583, "step": 1678 }, { "epoch": 0.273419370598054, "grad_norm": 0.5130223631858826, "learning_rate": 4.555073250135648e-05, "loss": 0.4472, "step": 1679 }, { "epoch": 0.2735822171558849, "grad_norm": 0.4335542917251587, "learning_rate": 4.557786218122627e-05, "loss": 0.4245, "step": 1680 }, { "epoch": 0.27374506371371576, "grad_norm": 0.4708966612815857, "learning_rate": 4.560499186109604e-05, "loss": 0.4223, "step": 1681 }, { "epoch": 0.27390791027154665, "grad_norm": 0.4267095625400543, "learning_rate": 4.5632121540965815e-05, "loss": 0.4077, "step": 1682 }, { "epoch": 0.27407075682937754, "grad_norm": 0.4412172734737396, "learning_rate": 4.5659251220835595e-05, "loss": 0.3693, "step": 1683 }, { "epoch": 0.2742336033872084, "grad_norm": 0.41989627480506897, "learning_rate": 4.5686380900705375e-05, "loss": 0.3509, "step": 1684 }, { "epoch": 0.2743964499450393, "grad_norm": 0.44043657183647156, "learning_rate": 4.5713510580575155e-05, "loss": 0.4169, "step": 1685 }, { "epoch": 0.2745592965028702, "grad_norm": 0.4005829989910126, "learning_rate": 4.574064026044493e-05, "loss": 0.354, "step": 1686 }, { "epoch": 0.27472214306070103, "grad_norm": 0.46620282530784607, "learning_rate": 4.576776994031471e-05, "loss": 0.3952, "step": 1687 }, { "epoch": 0.2748849896185319, "grad_norm": 0.4252815842628479, "learning_rate": 4.579489962018448e-05, "loss": 0.3801, "step": 1688 }, { "epoch": 0.2750478361763628, "grad_norm": 0.3672003448009491, "learning_rate": 4.582202930005426e-05, "loss": 0.3992, "step": 1689 }, { "epoch": 0.2752106827341937, "grad_norm": 0.4880002737045288, "learning_rate": 4.584915897992404e-05, "loss": 0.403, "step": 1690 }, { "epoch": 0.2753735292920246, "grad_norm": 0.49483567476272583, "learning_rate": 4.5876288659793814e-05, "loss": 0.3815, "step": 1691 }, { "epoch": 0.27553637584985546, "grad_norm": 0.41830500960350037, "learning_rate": 4.5903418339663594e-05, "loss": 0.3548, "step": 1692 }, { "epoch": 0.27569922240768635, "grad_norm": 0.4708942472934723, "learning_rate": 4.593054801953337e-05, "loss": 0.3787, "step": 1693 }, { "epoch": 0.27586206896551724, "grad_norm": 0.4805351793766022, "learning_rate": 4.5957677699403154e-05, "loss": 0.3966, "step": 1694 }, { "epoch": 0.2760249155233481, "grad_norm": 0.43568092584609985, "learning_rate": 4.598480737927293e-05, "loss": 0.3883, "step": 1695 }, { "epoch": 0.276187762081179, "grad_norm": 0.46211740374565125, "learning_rate": 4.601193705914271e-05, "loss": 0.412, "step": 1696 }, { "epoch": 0.2763506086390099, "grad_norm": 0.42353948950767517, "learning_rate": 4.603906673901248e-05, "loss": 0.3904, "step": 1697 }, { "epoch": 0.2765134551968408, "grad_norm": 0.5907325148582458, "learning_rate": 4.606619641888226e-05, "loss": 0.4242, "step": 1698 }, { "epoch": 0.27667630175467167, "grad_norm": 0.4825391471385956, "learning_rate": 4.609332609875204e-05, "loss": 0.4152, "step": 1699 }, { "epoch": 0.27683914831250256, "grad_norm": 0.5140040516853333, "learning_rate": 4.612045577862181e-05, "loss": 0.4207, "step": 1700 }, { "epoch": 0.27700199487033345, "grad_norm": 0.49251264333724976, "learning_rate": 4.614758545849159e-05, "loss": 0.4114, "step": 1701 }, { "epoch": 0.27716484142816433, "grad_norm": 0.4182841181755066, "learning_rate": 4.6174715138361366e-05, "loss": 0.3324, "step": 1702 }, { "epoch": 0.2773276879859952, "grad_norm": 0.5095961093902588, "learning_rate": 4.6201844818231146e-05, "loss": 0.4054, "step": 1703 }, { "epoch": 0.2774905345438261, "grad_norm": 0.4388253688812256, "learning_rate": 4.6228974498100926e-05, "loss": 0.4005, "step": 1704 }, { "epoch": 0.27765338110165694, "grad_norm": 0.5039715766906738, "learning_rate": 4.62561041779707e-05, "loss": 0.3884, "step": 1705 }, { "epoch": 0.2778162276594878, "grad_norm": 0.45400765538215637, "learning_rate": 4.628323385784048e-05, "loss": 0.3746, "step": 1706 }, { "epoch": 0.2779790742173187, "grad_norm": 0.41164007782936096, "learning_rate": 4.631036353771025e-05, "loss": 0.4149, "step": 1707 }, { "epoch": 0.2781419207751496, "grad_norm": 0.364778071641922, "learning_rate": 4.633749321758004e-05, "loss": 0.357, "step": 1708 }, { "epoch": 0.2783047673329805, "grad_norm": 0.414793461561203, "learning_rate": 4.636462289744981e-05, "loss": 0.335, "step": 1709 }, { "epoch": 0.27846761389081137, "grad_norm": 0.47695615887641907, "learning_rate": 4.639175257731959e-05, "loss": 0.389, "step": 1710 }, { "epoch": 0.27863046044864226, "grad_norm": 0.4467203915119171, "learning_rate": 4.6418882257189365e-05, "loss": 0.3705, "step": 1711 }, { "epoch": 0.27879330700647315, "grad_norm": 0.47958293557167053, "learning_rate": 4.6446011937059145e-05, "loss": 0.4033, "step": 1712 }, { "epoch": 0.27895615356430403, "grad_norm": 0.4712936282157898, "learning_rate": 4.6473141616928925e-05, "loss": 0.4042, "step": 1713 }, { "epoch": 0.2791190001221349, "grad_norm": 0.5003166794776917, "learning_rate": 4.65002712967987e-05, "loss": 0.3787, "step": 1714 }, { "epoch": 0.2792818466799658, "grad_norm": 0.47270357608795166, "learning_rate": 4.652740097666848e-05, "loss": 0.3669, "step": 1715 }, { "epoch": 0.2794446932377967, "grad_norm": 0.4178507924079895, "learning_rate": 4.655453065653825e-05, "loss": 0.3605, "step": 1716 }, { "epoch": 0.2796075397956276, "grad_norm": 0.4766288995742798, "learning_rate": 4.658166033640803e-05, "loss": 0.379, "step": 1717 }, { "epoch": 0.27977038635345847, "grad_norm": 0.47376981377601624, "learning_rate": 4.660879001627781e-05, "loss": 0.395, "step": 1718 }, { "epoch": 0.27993323291128935, "grad_norm": 0.4641153812408447, "learning_rate": 4.663591969614759e-05, "loss": 0.3548, "step": 1719 }, { "epoch": 0.28009607946912024, "grad_norm": 0.47256046533584595, "learning_rate": 4.6663049376017364e-05, "loss": 0.3977, "step": 1720 }, { "epoch": 0.2802589260269511, "grad_norm": 0.42897936701774597, "learning_rate": 4.6690179055887144e-05, "loss": 0.3563, "step": 1721 }, { "epoch": 0.280421772584782, "grad_norm": 0.4516218900680542, "learning_rate": 4.6717308735756924e-05, "loss": 0.3947, "step": 1722 }, { "epoch": 0.28058461914261285, "grad_norm": 0.6535544991493225, "learning_rate": 4.67444384156267e-05, "loss": 0.4211, "step": 1723 }, { "epoch": 0.28074746570044373, "grad_norm": 0.4470267593860626, "learning_rate": 4.677156809549648e-05, "loss": 0.3823, "step": 1724 }, { "epoch": 0.2809103122582746, "grad_norm": 0.4864414632320404, "learning_rate": 4.679869777536625e-05, "loss": 0.4588, "step": 1725 }, { "epoch": 0.2810731588161055, "grad_norm": 0.42351940274238586, "learning_rate": 4.682582745523603e-05, "loss": 0.3945, "step": 1726 }, { "epoch": 0.2812360053739364, "grad_norm": 0.48578763008117676, "learning_rate": 4.685295713510581e-05, "loss": 0.3881, "step": 1727 }, { "epoch": 0.2813988519317673, "grad_norm": 0.4262954890727997, "learning_rate": 4.688008681497559e-05, "loss": 0.3574, "step": 1728 }, { "epoch": 0.28156169848959817, "grad_norm": 0.43538814783096313, "learning_rate": 4.690721649484536e-05, "loss": 0.3643, "step": 1729 }, { "epoch": 0.28172454504742905, "grad_norm": 0.5604320168495178, "learning_rate": 4.6934346174715136e-05, "loss": 0.421, "step": 1730 }, { "epoch": 0.28188739160525994, "grad_norm": 0.4902321994304657, "learning_rate": 4.6961475854584916e-05, "loss": 0.3728, "step": 1731 }, { "epoch": 0.2820502381630908, "grad_norm": 0.5249911546707153, "learning_rate": 4.6988605534454696e-05, "loss": 0.3829, "step": 1732 }, { "epoch": 0.2822130847209217, "grad_norm": 0.6229585409164429, "learning_rate": 4.7015735214324476e-05, "loss": 0.364, "step": 1733 }, { "epoch": 0.2823759312787526, "grad_norm": 0.5493625998497009, "learning_rate": 4.704286489419425e-05, "loss": 0.4336, "step": 1734 }, { "epoch": 0.2825387778365835, "grad_norm": 0.554928719997406, "learning_rate": 4.706999457406403e-05, "loss": 0.4722, "step": 1735 }, { "epoch": 0.2827016243944144, "grad_norm": 0.5930147767066956, "learning_rate": 4.70971242539338e-05, "loss": 0.4218, "step": 1736 }, { "epoch": 0.28286447095224526, "grad_norm": 0.5482152700424194, "learning_rate": 4.712425393380359e-05, "loss": 0.3862, "step": 1737 }, { "epoch": 0.28302731751007615, "grad_norm": 0.5088849067687988, "learning_rate": 4.715138361367336e-05, "loss": 0.355, "step": 1738 }, { "epoch": 0.28319016406790704, "grad_norm": 0.40790116786956787, "learning_rate": 4.7178513293543135e-05, "loss": 0.381, "step": 1739 }, { "epoch": 0.2833530106257379, "grad_norm": 0.474890798330307, "learning_rate": 4.7205642973412915e-05, "loss": 0.41, "step": 1740 }, { "epoch": 0.2835158571835688, "grad_norm": 0.44357502460479736, "learning_rate": 4.7232772653282695e-05, "loss": 0.4276, "step": 1741 }, { "epoch": 0.28367870374139964, "grad_norm": 0.44872748851776123, "learning_rate": 4.7259902333152475e-05, "loss": 0.4355, "step": 1742 }, { "epoch": 0.2838415502992305, "grad_norm": 0.46022579073905945, "learning_rate": 4.728703201302225e-05, "loss": 0.3807, "step": 1743 }, { "epoch": 0.2840043968570614, "grad_norm": 0.3686242401599884, "learning_rate": 4.731416169289203e-05, "loss": 0.3569, "step": 1744 }, { "epoch": 0.2841672434148923, "grad_norm": 0.42505568265914917, "learning_rate": 4.73412913727618e-05, "loss": 0.3857, "step": 1745 }, { "epoch": 0.2843300899727232, "grad_norm": 0.43508604168891907, "learning_rate": 4.736842105263158e-05, "loss": 0.3899, "step": 1746 }, { "epoch": 0.2844929365305541, "grad_norm": 0.3982173502445221, "learning_rate": 4.739555073250136e-05, "loss": 0.4141, "step": 1747 }, { "epoch": 0.28465578308838496, "grad_norm": 0.4182325303554535, "learning_rate": 4.7422680412371134e-05, "loss": 0.3868, "step": 1748 }, { "epoch": 0.28481862964621585, "grad_norm": 0.5239246487617493, "learning_rate": 4.7449810092240914e-05, "loss": 0.4706, "step": 1749 }, { "epoch": 0.28498147620404674, "grad_norm": 0.424461305141449, "learning_rate": 4.747693977211069e-05, "loss": 0.3624, "step": 1750 }, { "epoch": 0.2851443227618776, "grad_norm": 0.4663926661014557, "learning_rate": 4.7504069451980474e-05, "loss": 0.368, "step": 1751 }, { "epoch": 0.2853071693197085, "grad_norm": 0.5045701265335083, "learning_rate": 4.753119913185025e-05, "loss": 0.3746, "step": 1752 }, { "epoch": 0.2854700158775394, "grad_norm": 0.3928978741168976, "learning_rate": 4.755832881172003e-05, "loss": 0.4039, "step": 1753 }, { "epoch": 0.2856328624353703, "grad_norm": 0.4031592905521393, "learning_rate": 4.75854584915898e-05, "loss": 0.3912, "step": 1754 }, { "epoch": 0.28579570899320117, "grad_norm": 0.37560176849365234, "learning_rate": 4.761258817145958e-05, "loss": 0.3693, "step": 1755 }, { "epoch": 0.28595855555103206, "grad_norm": 0.5134719014167786, "learning_rate": 4.763971785132936e-05, "loss": 0.4103, "step": 1756 }, { "epoch": 0.28612140210886294, "grad_norm": 0.4017002284526825, "learning_rate": 4.766684753119913e-05, "loss": 0.3961, "step": 1757 }, { "epoch": 0.28628424866669383, "grad_norm": 0.518004834651947, "learning_rate": 4.769397721106891e-05, "loss": 0.4444, "step": 1758 }, { "epoch": 0.2864470952245247, "grad_norm": 0.41155868768692017, "learning_rate": 4.7721106890938686e-05, "loss": 0.3675, "step": 1759 }, { "epoch": 0.28660994178235555, "grad_norm": 0.4938783645629883, "learning_rate": 4.7748236570808466e-05, "loss": 0.4122, "step": 1760 }, { "epoch": 0.28677278834018644, "grad_norm": 0.37033894658088684, "learning_rate": 4.7775366250678246e-05, "loss": 0.3729, "step": 1761 }, { "epoch": 0.2869356348980173, "grad_norm": 0.513533353805542, "learning_rate": 4.780249593054802e-05, "loss": 0.3731, "step": 1762 }, { "epoch": 0.2870984814558482, "grad_norm": 0.5311691164970398, "learning_rate": 4.78296256104178e-05, "loss": 0.3916, "step": 1763 }, { "epoch": 0.2872613280136791, "grad_norm": 0.32101115584373474, "learning_rate": 4.785675529028757e-05, "loss": 0.3624, "step": 1764 }, { "epoch": 0.28742417457151, "grad_norm": 0.4881322681903839, "learning_rate": 4.788388497015736e-05, "loss": 0.3329, "step": 1765 }, { "epoch": 0.28758702112934087, "grad_norm": 0.5035054683685303, "learning_rate": 4.791101465002713e-05, "loss": 0.3685, "step": 1766 }, { "epoch": 0.28774986768717176, "grad_norm": 0.4310210645198822, "learning_rate": 4.793814432989691e-05, "loss": 0.3595, "step": 1767 }, { "epoch": 0.28791271424500264, "grad_norm": 0.45143163204193115, "learning_rate": 4.7965274009766685e-05, "loss": 0.3595, "step": 1768 }, { "epoch": 0.28807556080283353, "grad_norm": 0.6239156723022461, "learning_rate": 4.7992403689636465e-05, "loss": 0.4089, "step": 1769 }, { "epoch": 0.2882384073606644, "grad_norm": 0.4509783685207367, "learning_rate": 4.8019533369506245e-05, "loss": 0.3884, "step": 1770 }, { "epoch": 0.2884012539184953, "grad_norm": 0.4137568175792694, "learning_rate": 4.804666304937602e-05, "loss": 0.4019, "step": 1771 }, { "epoch": 0.2885641004763262, "grad_norm": 0.5734176635742188, "learning_rate": 4.80737927292458e-05, "loss": 0.4605, "step": 1772 }, { "epoch": 0.2887269470341571, "grad_norm": 0.5535383820533752, "learning_rate": 4.810092240911557e-05, "loss": 0.4106, "step": 1773 }, { "epoch": 0.28888979359198796, "grad_norm": 0.4672004282474518, "learning_rate": 4.812805208898535e-05, "loss": 0.4208, "step": 1774 }, { "epoch": 0.28905264014981885, "grad_norm": 0.5259729623794556, "learning_rate": 4.815518176885513e-05, "loss": 0.3978, "step": 1775 }, { "epoch": 0.28921548670764974, "grad_norm": 0.437869131565094, "learning_rate": 4.818231144872491e-05, "loss": 0.3889, "step": 1776 }, { "epoch": 0.2893783332654806, "grad_norm": 0.5291099548339844, "learning_rate": 4.8209441128594684e-05, "loss": 0.3721, "step": 1777 }, { "epoch": 0.2895411798233115, "grad_norm": 0.3971829116344452, "learning_rate": 4.8236570808464464e-05, "loss": 0.393, "step": 1778 }, { "epoch": 0.28970402638114234, "grad_norm": 0.4256095588207245, "learning_rate": 4.8263700488334244e-05, "loss": 0.3937, "step": 1779 }, { "epoch": 0.28986687293897323, "grad_norm": 0.42773380875587463, "learning_rate": 4.829083016820402e-05, "loss": 0.3771, "step": 1780 }, { "epoch": 0.2900297194968041, "grad_norm": 0.3959764838218689, "learning_rate": 4.83179598480738e-05, "loss": 0.3589, "step": 1781 }, { "epoch": 0.290192566054635, "grad_norm": 0.4625217914581299, "learning_rate": 4.834508952794357e-05, "loss": 0.4144, "step": 1782 }, { "epoch": 0.2903554126124659, "grad_norm": 0.3991395831108093, "learning_rate": 4.837221920781335e-05, "loss": 0.3875, "step": 1783 }, { "epoch": 0.2905182591702968, "grad_norm": 0.39539167284965515, "learning_rate": 4.839934888768313e-05, "loss": 0.3866, "step": 1784 }, { "epoch": 0.29068110572812766, "grad_norm": 0.421856164932251, "learning_rate": 4.842647856755291e-05, "loss": 0.3832, "step": 1785 }, { "epoch": 0.29084395228595855, "grad_norm": 0.48800837993621826, "learning_rate": 4.845360824742268e-05, "loss": 0.3699, "step": 1786 }, { "epoch": 0.29100679884378944, "grad_norm": 0.44993770122528076, "learning_rate": 4.8480737927292456e-05, "loss": 0.4401, "step": 1787 }, { "epoch": 0.2911696454016203, "grad_norm": 0.5069568157196045, "learning_rate": 4.8507867607162236e-05, "loss": 0.4218, "step": 1788 }, { "epoch": 0.2913324919594512, "grad_norm": 0.43044257164001465, "learning_rate": 4.8534997287032016e-05, "loss": 0.3636, "step": 1789 }, { "epoch": 0.2914953385172821, "grad_norm": 0.4453999400138855, "learning_rate": 4.8562126966901796e-05, "loss": 0.4006, "step": 1790 }, { "epoch": 0.291658185075113, "grad_norm": 0.47033676505088806, "learning_rate": 4.858925664677157e-05, "loss": 0.3973, "step": 1791 }, { "epoch": 0.2918210316329439, "grad_norm": 0.49737656116485596, "learning_rate": 4.861638632664135e-05, "loss": 0.41, "step": 1792 }, { "epoch": 0.29198387819077476, "grad_norm": 0.4044589102268219, "learning_rate": 4.864351600651112e-05, "loss": 0.3784, "step": 1793 }, { "epoch": 0.29214672474860565, "grad_norm": 0.5129124522209167, "learning_rate": 4.867064568638091e-05, "loss": 0.4047, "step": 1794 }, { "epoch": 0.29230957130643653, "grad_norm": 0.4321933090686798, "learning_rate": 4.869777536625068e-05, "loss": 0.4021, "step": 1795 }, { "epoch": 0.2924724178642674, "grad_norm": 0.47917285561561584, "learning_rate": 4.8724905046120455e-05, "loss": 0.4202, "step": 1796 }, { "epoch": 0.29263526442209825, "grad_norm": 0.4763645827770233, "learning_rate": 4.8752034725990235e-05, "loss": 0.4057, "step": 1797 }, { "epoch": 0.29279811097992914, "grad_norm": 0.43713700771331787, "learning_rate": 4.8779164405860015e-05, "loss": 0.3679, "step": 1798 }, { "epoch": 0.29296095753776, "grad_norm": 0.4641954302787781, "learning_rate": 4.8806294085729794e-05, "loss": 0.3644, "step": 1799 }, { "epoch": 0.2931238040955909, "grad_norm": 0.4616515040397644, "learning_rate": 4.883342376559957e-05, "loss": 0.4164, "step": 1800 }, { "epoch": 0.2932866506534218, "grad_norm": 0.38818079233169556, "learning_rate": 4.886055344546935e-05, "loss": 0.3619, "step": 1801 }, { "epoch": 0.2934494972112527, "grad_norm": 0.5460730791091919, "learning_rate": 4.888768312533912e-05, "loss": 0.4641, "step": 1802 }, { "epoch": 0.2936123437690836, "grad_norm": 0.4908372759819031, "learning_rate": 4.89148128052089e-05, "loss": 0.3912, "step": 1803 }, { "epoch": 0.29377519032691446, "grad_norm": 0.4668969213962555, "learning_rate": 4.894194248507868e-05, "loss": 0.3781, "step": 1804 }, { "epoch": 0.29393803688474535, "grad_norm": 0.48538532853126526, "learning_rate": 4.8969072164948454e-05, "loss": 0.3784, "step": 1805 }, { "epoch": 0.29410088344257623, "grad_norm": 0.46991315484046936, "learning_rate": 4.8996201844818234e-05, "loss": 0.3543, "step": 1806 }, { "epoch": 0.2942637300004071, "grad_norm": 0.44981738924980164, "learning_rate": 4.902333152468801e-05, "loss": 0.3873, "step": 1807 }, { "epoch": 0.294426576558238, "grad_norm": 0.5140652656555176, "learning_rate": 4.9050461204557793e-05, "loss": 0.3403, "step": 1808 }, { "epoch": 0.2945894231160689, "grad_norm": 0.4549691677093506, "learning_rate": 4.9077590884427567e-05, "loss": 0.3758, "step": 1809 }, { "epoch": 0.2947522696738998, "grad_norm": 0.5036628842353821, "learning_rate": 4.9104720564297346e-05, "loss": 0.3672, "step": 1810 }, { "epoch": 0.29491511623173067, "grad_norm": 0.4558015465736389, "learning_rate": 4.913185024416712e-05, "loss": 0.3815, "step": 1811 }, { "epoch": 0.29507796278956155, "grad_norm": 0.44079655408859253, "learning_rate": 4.91589799240369e-05, "loss": 0.3584, "step": 1812 }, { "epoch": 0.29524080934739244, "grad_norm": 0.44554761052131653, "learning_rate": 4.918610960390668e-05, "loss": 0.3905, "step": 1813 }, { "epoch": 0.29540365590522333, "grad_norm": 0.46409550309181213, "learning_rate": 4.921323928377645e-05, "loss": 0.4018, "step": 1814 }, { "epoch": 0.2955665024630542, "grad_norm": 0.393505334854126, "learning_rate": 4.924036896364623e-05, "loss": 0.3831, "step": 1815 }, { "epoch": 0.29572934902088505, "grad_norm": 0.3674624264240265, "learning_rate": 4.9267498643516006e-05, "loss": 0.3685, "step": 1816 }, { "epoch": 0.29589219557871593, "grad_norm": 0.39069920778274536, "learning_rate": 4.9294628323385786e-05, "loss": 0.3602, "step": 1817 }, { "epoch": 0.2960550421365468, "grad_norm": 0.4092268943786621, "learning_rate": 4.9321758003255565e-05, "loss": 0.3532, "step": 1818 }, { "epoch": 0.2962178886943777, "grad_norm": 0.40157848596572876, "learning_rate": 4.934888768312534e-05, "loss": 0.4399, "step": 1819 }, { "epoch": 0.2963807352522086, "grad_norm": 0.42340871691703796, "learning_rate": 4.937601736299512e-05, "loss": 0.4132, "step": 1820 }, { "epoch": 0.2965435818100395, "grad_norm": 0.47210943698883057, "learning_rate": 4.940314704286489e-05, "loss": 0.4203, "step": 1821 }, { "epoch": 0.29670642836787037, "grad_norm": 0.42663833498954773, "learning_rate": 4.943027672273468e-05, "loss": 0.4082, "step": 1822 }, { "epoch": 0.29686927492570125, "grad_norm": 0.41233840584754944, "learning_rate": 4.945740640260445e-05, "loss": 0.376, "step": 1823 }, { "epoch": 0.29703212148353214, "grad_norm": 0.4325723946094513, "learning_rate": 4.948453608247423e-05, "loss": 0.3659, "step": 1824 }, { "epoch": 0.29719496804136303, "grad_norm": 0.4947564899921417, "learning_rate": 4.9511665762344005e-05, "loss": 0.4363, "step": 1825 }, { "epoch": 0.2973578145991939, "grad_norm": 0.4026413857936859, "learning_rate": 4.9538795442213784e-05, "loss": 0.3657, "step": 1826 }, { "epoch": 0.2975206611570248, "grad_norm": 0.5247260332107544, "learning_rate": 4.9565925122083564e-05, "loss": 0.4217, "step": 1827 }, { "epoch": 0.2976835077148557, "grad_norm": 0.47907155752182007, "learning_rate": 4.959305480195334e-05, "loss": 0.4151, "step": 1828 }, { "epoch": 0.2978463542726866, "grad_norm": 0.4368479251861572, "learning_rate": 4.962018448182312e-05, "loss": 0.387, "step": 1829 }, { "epoch": 0.29800920083051746, "grad_norm": 0.426720529794693, "learning_rate": 4.964731416169289e-05, "loss": 0.3844, "step": 1830 }, { "epoch": 0.29817204738834835, "grad_norm": 0.4591591954231262, "learning_rate": 4.967444384156267e-05, "loss": 0.3729, "step": 1831 }, { "epoch": 0.29833489394617924, "grad_norm": 0.41933614015579224, "learning_rate": 4.970157352143245e-05, "loss": 0.3576, "step": 1832 }, { "epoch": 0.2984977405040101, "grad_norm": 0.4786593019962311, "learning_rate": 4.972870320130223e-05, "loss": 0.3558, "step": 1833 }, { "epoch": 0.29866058706184095, "grad_norm": 1.2780834436416626, "learning_rate": 4.9755832881172003e-05, "loss": 0.4459, "step": 1834 }, { "epoch": 0.29882343361967184, "grad_norm": 0.4350544810295105, "learning_rate": 4.978296256104178e-05, "loss": 0.3676, "step": 1835 }, { "epoch": 0.29898628017750273, "grad_norm": 0.4958527684211731, "learning_rate": 4.981009224091156e-05, "loss": 0.4024, "step": 1836 }, { "epoch": 0.2991491267353336, "grad_norm": 0.45605406165122986, "learning_rate": 4.9837221920781336e-05, "loss": 0.3644, "step": 1837 }, { "epoch": 0.2993119732931645, "grad_norm": 0.5797683000564575, "learning_rate": 4.9864351600651116e-05, "loss": 0.4219, "step": 1838 }, { "epoch": 0.2994748198509954, "grad_norm": 0.5543670654296875, "learning_rate": 4.989148128052089e-05, "loss": 0.4044, "step": 1839 }, { "epoch": 0.2996376664088263, "grad_norm": 0.3902692198753357, "learning_rate": 4.991861096039067e-05, "loss": 0.4012, "step": 1840 }, { "epoch": 0.29980051296665716, "grad_norm": 0.5592243075370789, "learning_rate": 4.994574064026045e-05, "loss": 0.3898, "step": 1841 }, { "epoch": 0.29996335952448805, "grad_norm": 0.5055451989173889, "learning_rate": 4.997287032013023e-05, "loss": 0.4056, "step": 1842 }, { "epoch": 0.30012620608231894, "grad_norm": 0.4121321439743042, "learning_rate": 5e-05, "loss": 0.3683, "step": 1843 }, { "epoch": 0.3002890526401498, "grad_norm": 0.49628302454948425, "learning_rate": 4.999999955121255e-05, "loss": 0.4324, "step": 1844 }, { "epoch": 0.3004518991979807, "grad_norm": 0.46689826250076294, "learning_rate": 4.999999820485022e-05, "loss": 0.3845, "step": 1845 }, { "epoch": 0.3006147457558116, "grad_norm": 0.4911034107208252, "learning_rate": 4.999999596091304e-05, "loss": 0.4373, "step": 1846 }, { "epoch": 0.3007775923136425, "grad_norm": 0.4395139515399933, "learning_rate": 4.999999281940111e-05, "loss": 0.3554, "step": 1847 }, { "epoch": 0.30094043887147337, "grad_norm": 0.45731988549232483, "learning_rate": 4.999998878031453e-05, "loss": 0.3741, "step": 1848 }, { "epoch": 0.30110328542930426, "grad_norm": 0.5391371250152588, "learning_rate": 4.999998384365346e-05, "loss": 0.4596, "step": 1849 }, { "epoch": 0.30126613198713514, "grad_norm": 0.41239264607429504, "learning_rate": 4.999997800941806e-05, "loss": 0.3615, "step": 1850 }, { "epoch": 0.30142897854496603, "grad_norm": 0.5178393721580505, "learning_rate": 4.999997127760855e-05, "loss": 0.4358, "step": 1851 }, { "epoch": 0.30159182510279686, "grad_norm": 0.4885878562927246, "learning_rate": 4.9999963648225175e-05, "loss": 0.4013, "step": 1852 }, { "epoch": 0.30175467166062775, "grad_norm": 0.4937381446361542, "learning_rate": 4.9999955121268195e-05, "loss": 0.3762, "step": 1853 }, { "epoch": 0.30191751821845864, "grad_norm": 0.4886046350002289, "learning_rate": 4.999994569673793e-05, "loss": 0.3642, "step": 1854 }, { "epoch": 0.3020803647762895, "grad_norm": 0.4245503842830658, "learning_rate": 4.999993537463471e-05, "loss": 0.3668, "step": 1855 }, { "epoch": 0.3022432113341204, "grad_norm": 0.4645192325115204, "learning_rate": 4.999992415495891e-05, "loss": 0.411, "step": 1856 }, { "epoch": 0.3024060578919513, "grad_norm": 0.5746769309043884, "learning_rate": 4.9999912037710925e-05, "loss": 0.393, "step": 1857 }, { "epoch": 0.3025689044497822, "grad_norm": 0.48152923583984375, "learning_rate": 4.999989902289121e-05, "loss": 0.3981, "step": 1858 }, { "epoch": 0.30273175100761307, "grad_norm": 0.4367913007736206, "learning_rate": 4.99998851105002e-05, "loss": 0.4292, "step": 1859 }, { "epoch": 0.30289459756544396, "grad_norm": 0.5228321552276611, "learning_rate": 4.999987030053842e-05, "loss": 0.4195, "step": 1860 }, { "epoch": 0.30305744412327484, "grad_norm": 0.5163108110427856, "learning_rate": 4.99998545930064e-05, "loss": 0.3869, "step": 1861 }, { "epoch": 0.30322029068110573, "grad_norm": 0.4471248984336853, "learning_rate": 4.9999837987904694e-05, "loss": 0.4026, "step": 1862 }, { "epoch": 0.3033831372389366, "grad_norm": 0.4600459337234497, "learning_rate": 4.9999820485233906e-05, "loss": 0.3922, "step": 1863 }, { "epoch": 0.3035459837967675, "grad_norm": 0.4757453203201294, "learning_rate": 4.9999802084994654e-05, "loss": 0.3971, "step": 1864 }, { "epoch": 0.3037088303545984, "grad_norm": 0.3649313151836395, "learning_rate": 4.999978278718761e-05, "loss": 0.3761, "step": 1865 }, { "epoch": 0.3038716769124293, "grad_norm": 0.4286883473396301, "learning_rate": 4.999976259181347e-05, "loss": 0.3985, "step": 1866 }, { "epoch": 0.30403452347026017, "grad_norm": 0.8065398335456848, "learning_rate": 4.999974149887294e-05, "loss": 0.3906, "step": 1867 }, { "epoch": 0.30419737002809105, "grad_norm": 0.41509971022605896, "learning_rate": 4.999971950836679e-05, "loss": 0.3993, "step": 1868 }, { "epoch": 0.30436021658592194, "grad_norm": 0.5640272498130798, "learning_rate": 4.999969662029582e-05, "loss": 0.4203, "step": 1869 }, { "epoch": 0.3045230631437528, "grad_norm": 0.9625499248504639, "learning_rate": 4.999967283466083e-05, "loss": 0.3885, "step": 1870 }, { "epoch": 0.30468590970158366, "grad_norm": 0.4035845100879669, "learning_rate": 4.9999648151462684e-05, "loss": 0.3469, "step": 1871 }, { "epoch": 0.30484875625941454, "grad_norm": 0.4721362888813019, "learning_rate": 4.9999622570702273e-05, "loss": 0.3858, "step": 1872 }, { "epoch": 0.30501160281724543, "grad_norm": 0.5886922478675842, "learning_rate": 4.999959609238051e-05, "loss": 0.3997, "step": 1873 }, { "epoch": 0.3051744493750763, "grad_norm": 0.7569371461868286, "learning_rate": 4.999956871649835e-05, "loss": 0.4146, "step": 1874 }, { "epoch": 0.3053372959329072, "grad_norm": 0.35800546407699585, "learning_rate": 4.999954044305676e-05, "loss": 0.371, "step": 1875 }, { "epoch": 0.3055001424907381, "grad_norm": 0.6515138149261475, "learning_rate": 4.9999511272056776e-05, "loss": 0.4291, "step": 1876 }, { "epoch": 0.305662989048569, "grad_norm": 0.5065401196479797, "learning_rate": 4.999948120349944e-05, "loss": 0.3709, "step": 1877 }, { "epoch": 0.30582583560639987, "grad_norm": 0.4293089509010315, "learning_rate": 4.9999450237385826e-05, "loss": 0.394, "step": 1878 }, { "epoch": 0.30598868216423075, "grad_norm": 0.5461025238037109, "learning_rate": 4.9999418373717045e-05, "loss": 0.4028, "step": 1879 }, { "epoch": 0.30615152872206164, "grad_norm": 0.5248792767524719, "learning_rate": 4.999938561249424e-05, "loss": 0.376, "step": 1880 }, { "epoch": 0.3063143752798925, "grad_norm": 0.3631563186645508, "learning_rate": 4.99993519537186e-05, "loss": 0.336, "step": 1881 }, { "epoch": 0.3064772218377234, "grad_norm": 0.3797168433666229, "learning_rate": 4.999931739739132e-05, "loss": 0.3788, "step": 1882 }, { "epoch": 0.3066400683955543, "grad_norm": 0.5389899611473083, "learning_rate": 4.9999281943513655e-05, "loss": 0.395, "step": 1883 }, { "epoch": 0.3068029149533852, "grad_norm": 0.4170883595943451, "learning_rate": 4.9999245592086856e-05, "loss": 0.3682, "step": 1884 }, { "epoch": 0.3069657615112161, "grad_norm": 0.4418940246105194, "learning_rate": 4.9999208343112246e-05, "loss": 0.3913, "step": 1885 }, { "epoch": 0.30712860806904696, "grad_norm": 0.36853376030921936, "learning_rate": 4.9999170196591155e-05, "loss": 0.3254, "step": 1886 }, { "epoch": 0.30729145462687785, "grad_norm": 0.4156794846057892, "learning_rate": 4.999913115252496e-05, "loss": 0.3924, "step": 1887 }, { "epoch": 0.30745430118470873, "grad_norm": 0.46038010716438293, "learning_rate": 4.999909121091505e-05, "loss": 0.4156, "step": 1888 }, { "epoch": 0.30761714774253957, "grad_norm": 0.5421790480613708, "learning_rate": 4.9999050371762866e-05, "loss": 0.4136, "step": 1889 }, { "epoch": 0.30777999430037045, "grad_norm": 0.4012993574142456, "learning_rate": 4.999900863506988e-05, "loss": 0.3454, "step": 1890 }, { "epoch": 0.30794284085820134, "grad_norm": 0.4491115212440491, "learning_rate": 4.999896600083758e-05, "loss": 0.3902, "step": 1891 }, { "epoch": 0.3081056874160322, "grad_norm": 0.5754231810569763, "learning_rate": 4.9998922469067514e-05, "loss": 0.4373, "step": 1892 }, { "epoch": 0.3082685339738631, "grad_norm": 0.4143843352794647, "learning_rate": 4.999887803976122e-05, "loss": 0.3772, "step": 1893 }, { "epoch": 0.308431380531694, "grad_norm": 0.4237886965274811, "learning_rate": 4.999883271292031e-05, "loss": 0.3331, "step": 1894 }, { "epoch": 0.3085942270895249, "grad_norm": 0.48162856698036194, "learning_rate": 4.99987864885464e-05, "loss": 0.3738, "step": 1895 }, { "epoch": 0.3087570736473558, "grad_norm": 0.4239383935928345, "learning_rate": 4.9998739366641165e-05, "loss": 0.3548, "step": 1896 }, { "epoch": 0.30891992020518666, "grad_norm": 0.39725416898727417, "learning_rate": 4.999869134720628e-05, "loss": 0.3873, "step": 1897 }, { "epoch": 0.30908276676301755, "grad_norm": 0.4261177182197571, "learning_rate": 4.999864243024349e-05, "loss": 0.351, "step": 1898 }, { "epoch": 0.30924561332084843, "grad_norm": 0.4738151729106903, "learning_rate": 4.999859261575453e-05, "loss": 0.3666, "step": 1899 }, { "epoch": 0.3094084598786793, "grad_norm": 0.41698741912841797, "learning_rate": 4.9998541903741205e-05, "loss": 0.3895, "step": 1900 }, { "epoch": 0.3095713064365102, "grad_norm": 0.48053058981895447, "learning_rate": 4.999849029420531e-05, "loss": 0.3606, "step": 1901 }, { "epoch": 0.3097341529943411, "grad_norm": 0.524414598941803, "learning_rate": 4.9998437787148734e-05, "loss": 0.3822, "step": 1902 }, { "epoch": 0.309896999552172, "grad_norm": 0.39323464035987854, "learning_rate": 4.9998384382573335e-05, "loss": 0.3856, "step": 1903 }, { "epoch": 0.31005984611000287, "grad_norm": 0.42522570490837097, "learning_rate": 4.9998330080481046e-05, "loss": 0.3634, "step": 1904 }, { "epoch": 0.31022269266783375, "grad_norm": 0.5094670653343201, "learning_rate": 4.9998274880873805e-05, "loss": 0.4087, "step": 1905 }, { "epoch": 0.31038553922566464, "grad_norm": 0.46980375051498413, "learning_rate": 4.9998218783753604e-05, "loss": 0.4077, "step": 1906 }, { "epoch": 0.31054838578349553, "grad_norm": 0.45186153054237366, "learning_rate": 4.9998161789122455e-05, "loss": 0.3834, "step": 1907 }, { "epoch": 0.31071123234132636, "grad_norm": 0.5202677249908447, "learning_rate": 4.99981038969824e-05, "loss": 0.3985, "step": 1908 }, { "epoch": 0.31087407889915725, "grad_norm": 0.5123717188835144, "learning_rate": 4.999804510733551e-05, "loss": 0.4066, "step": 1909 }, { "epoch": 0.31103692545698813, "grad_norm": 0.4768776297569275, "learning_rate": 4.9997985420183904e-05, "loss": 0.4086, "step": 1910 }, { "epoch": 0.311199772014819, "grad_norm": 0.48026999831199646, "learning_rate": 4.999792483552973e-05, "loss": 0.3788, "step": 1911 }, { "epoch": 0.3113626185726499, "grad_norm": 0.45890098810195923, "learning_rate": 4.999786335337516e-05, "loss": 0.4361, "step": 1912 }, { "epoch": 0.3115254651304808, "grad_norm": 0.3802589774131775, "learning_rate": 4.9997800973722396e-05, "loss": 0.4198, "step": 1913 }, { "epoch": 0.3116883116883117, "grad_norm": 0.5306681394577026, "learning_rate": 4.999773769657369e-05, "loss": 0.3913, "step": 1914 }, { "epoch": 0.31185115824614257, "grad_norm": 0.44030794501304626, "learning_rate": 4.99976735219313e-05, "loss": 0.3877, "step": 1915 }, { "epoch": 0.31201400480397345, "grad_norm": 0.4510795474052429, "learning_rate": 4.999760844979753e-05, "loss": 0.3598, "step": 1916 }, { "epoch": 0.31217685136180434, "grad_norm": 0.5157545208930969, "learning_rate": 4.999754248017473e-05, "loss": 0.4363, "step": 1917 }, { "epoch": 0.31233969791963523, "grad_norm": 0.3964964747428894, "learning_rate": 4.999747561306526e-05, "loss": 0.3863, "step": 1918 }, { "epoch": 0.3125025444774661, "grad_norm": 0.5337929129600525, "learning_rate": 4.9997407848471524e-05, "loss": 0.4163, "step": 1919 }, { "epoch": 0.312665391035297, "grad_norm": 0.38677194714546204, "learning_rate": 4.999733918639594e-05, "loss": 0.3774, "step": 1920 }, { "epoch": 0.3128282375931279, "grad_norm": 0.3985142409801483, "learning_rate": 4.999726962684099e-05, "loss": 0.3767, "step": 1921 }, { "epoch": 0.3129910841509588, "grad_norm": 0.4707549214363098, "learning_rate": 4.9997199169809174e-05, "loss": 0.3727, "step": 1922 }, { "epoch": 0.31315393070878966, "grad_norm": 0.41104477643966675, "learning_rate": 4.999712781530301e-05, "loss": 0.3976, "step": 1923 }, { "epoch": 0.31331677726662055, "grad_norm": 0.3584839403629303, "learning_rate": 4.9997055563325055e-05, "loss": 0.364, "step": 1924 }, { "epoch": 0.31347962382445144, "grad_norm": 0.4021787643432617, "learning_rate": 4.999698241387792e-05, "loss": 0.4298, "step": 1925 }, { "epoch": 0.31364247038228227, "grad_norm": 0.4372889995574951, "learning_rate": 4.999690836696423e-05, "loss": 0.3671, "step": 1926 }, { "epoch": 0.31380531694011315, "grad_norm": 0.35964301228523254, "learning_rate": 4.9996833422586634e-05, "loss": 0.3423, "step": 1927 }, { "epoch": 0.31396816349794404, "grad_norm": 0.4301716983318329, "learning_rate": 4.9996757580747816e-05, "loss": 0.3771, "step": 1928 }, { "epoch": 0.31413101005577493, "grad_norm": 0.4336283206939697, "learning_rate": 4.999668084145051e-05, "loss": 0.3916, "step": 1929 }, { "epoch": 0.3142938566136058, "grad_norm": 0.3806128203868866, "learning_rate": 4.999660320469748e-05, "loss": 0.3474, "step": 1930 }, { "epoch": 0.3144567031714367, "grad_norm": 0.41670528054237366, "learning_rate": 4.9996524670491495e-05, "loss": 0.3962, "step": 1931 }, { "epoch": 0.3146195497292676, "grad_norm": 0.4455869495868683, "learning_rate": 4.999644523883539e-05, "loss": 0.4212, "step": 1932 }, { "epoch": 0.3147823962870985, "grad_norm": 0.447350412607193, "learning_rate": 4.9996364909732e-05, "loss": 0.4033, "step": 1933 }, { "epoch": 0.31494524284492936, "grad_norm": 0.4013063609600067, "learning_rate": 4.999628368318422e-05, "loss": 0.3772, "step": 1934 }, { "epoch": 0.31510808940276025, "grad_norm": 0.3858206272125244, "learning_rate": 4.9996201559194974e-05, "loss": 0.3763, "step": 1935 }, { "epoch": 0.31527093596059114, "grad_norm": 0.4391472637653351, "learning_rate": 4.9996118537767195e-05, "loss": 0.3741, "step": 1936 }, { "epoch": 0.315433782518422, "grad_norm": 0.38890713453292847, "learning_rate": 4.999603461890387e-05, "loss": 0.3579, "step": 1937 }, { "epoch": 0.3155966290762529, "grad_norm": 0.3892892301082611, "learning_rate": 4.999594980260801e-05, "loss": 0.4145, "step": 1938 }, { "epoch": 0.3157594756340838, "grad_norm": 0.47846558690071106, "learning_rate": 4.999586408888267e-05, "loss": 0.4232, "step": 1939 }, { "epoch": 0.3159223221919147, "grad_norm": 0.3954562246799469, "learning_rate": 4.999577747773091e-05, "loss": 0.3459, "step": 1940 }, { "epoch": 0.31608516874974557, "grad_norm": 0.44611552357673645, "learning_rate": 4.999568996915586e-05, "loss": 0.4435, "step": 1941 }, { "epoch": 0.31624801530757646, "grad_norm": 0.4305401146411896, "learning_rate": 4.9995601563160645e-05, "loss": 0.3827, "step": 1942 }, { "epoch": 0.31641086186540734, "grad_norm": 0.46574655175209045, "learning_rate": 4.999551225974844e-05, "loss": 0.4162, "step": 1943 }, { "epoch": 0.31657370842323823, "grad_norm": 0.4451100826263428, "learning_rate": 4.999542205892246e-05, "loss": 0.347, "step": 1944 }, { "epoch": 0.31673655498106906, "grad_norm": 0.5067169070243835, "learning_rate": 4.9995330960685945e-05, "loss": 0.3927, "step": 1945 }, { "epoch": 0.31689940153889995, "grad_norm": 0.3931586444377899, "learning_rate": 4.9995238965042156e-05, "loss": 0.366, "step": 1946 }, { "epoch": 0.31706224809673084, "grad_norm": 0.44406524300575256, "learning_rate": 4.99951460719944e-05, "loss": 0.3978, "step": 1947 }, { "epoch": 0.3172250946545617, "grad_norm": 0.4269936680793762, "learning_rate": 4.9995052281546014e-05, "loss": 0.3912, "step": 1948 }, { "epoch": 0.3173879412123926, "grad_norm": 0.4230855107307434, "learning_rate": 4.999495759370036e-05, "loss": 0.3472, "step": 1949 }, { "epoch": 0.3175507877702235, "grad_norm": 0.4789096713066101, "learning_rate": 4.9994862008460843e-05, "loss": 0.4305, "step": 1950 }, { "epoch": 0.3177136343280544, "grad_norm": 0.4443901777267456, "learning_rate": 4.99947655258309e-05, "loss": 0.4026, "step": 1951 }, { "epoch": 0.31787648088588527, "grad_norm": 0.4781956672668457, "learning_rate": 4.999466814581398e-05, "loss": 0.4098, "step": 1952 }, { "epoch": 0.31803932744371616, "grad_norm": 0.40546634793281555, "learning_rate": 4.9994569868413596e-05, "loss": 0.4159, "step": 1953 }, { "epoch": 0.31820217400154704, "grad_norm": 0.35310637950897217, "learning_rate": 4.999447069363326e-05, "loss": 0.3379, "step": 1954 }, { "epoch": 0.31836502055937793, "grad_norm": 0.40478792786598206, "learning_rate": 4.9994370621476545e-05, "loss": 0.3861, "step": 1955 }, { "epoch": 0.3185278671172088, "grad_norm": 0.44414716958999634, "learning_rate": 4.9994269651947035e-05, "loss": 0.4087, "step": 1956 }, { "epoch": 0.3186907136750397, "grad_norm": 0.41882845759391785, "learning_rate": 4.999416778504836e-05, "loss": 0.3811, "step": 1957 }, { "epoch": 0.3188535602328706, "grad_norm": 0.38811194896698, "learning_rate": 4.999406502078418e-05, "loss": 0.3695, "step": 1958 }, { "epoch": 0.3190164067907015, "grad_norm": 0.38873228430747986, "learning_rate": 4.999396135915819e-05, "loss": 0.3766, "step": 1959 }, { "epoch": 0.31917925334853237, "grad_norm": 0.47855421900749207, "learning_rate": 4.999385680017409e-05, "loss": 0.4097, "step": 1960 }, { "epoch": 0.31934209990636325, "grad_norm": 0.3907240033149719, "learning_rate": 4.999375134383565e-05, "loss": 0.3486, "step": 1961 }, { "epoch": 0.31950494646419414, "grad_norm": 0.46873533725738525, "learning_rate": 4.999364499014666e-05, "loss": 0.3992, "step": 1962 }, { "epoch": 0.31966779302202497, "grad_norm": 0.3962244987487793, "learning_rate": 4.999353773911093e-05, "loss": 0.351, "step": 1963 }, { "epoch": 0.31983063957985586, "grad_norm": 0.46136006712913513, "learning_rate": 4.999342959073231e-05, "loss": 0.3902, "step": 1964 }, { "epoch": 0.31999348613768674, "grad_norm": 0.387317955493927, "learning_rate": 4.9993320545014686e-05, "loss": 0.365, "step": 1965 }, { "epoch": 0.32015633269551763, "grad_norm": 0.43367230892181396, "learning_rate": 4.999321060196197e-05, "loss": 0.4129, "step": 1966 }, { "epoch": 0.3203191792533485, "grad_norm": 0.3946807384490967, "learning_rate": 4.999309976157811e-05, "loss": 0.3901, "step": 1967 }, { "epoch": 0.3204820258111794, "grad_norm": 0.398958683013916, "learning_rate": 4.999298802386709e-05, "loss": 0.4131, "step": 1968 }, { "epoch": 0.3206448723690103, "grad_norm": 0.3469245135784149, "learning_rate": 4.9992875388832926e-05, "loss": 0.3361, "step": 1969 }, { "epoch": 0.3208077189268412, "grad_norm": 0.4140973687171936, "learning_rate": 4.999276185647965e-05, "loss": 0.3994, "step": 1970 }, { "epoch": 0.32097056548467207, "grad_norm": 0.4885014593601227, "learning_rate": 4.999264742681133e-05, "loss": 0.4043, "step": 1971 }, { "epoch": 0.32113341204250295, "grad_norm": 0.37557315826416016, "learning_rate": 4.999253209983211e-05, "loss": 0.3815, "step": 1972 }, { "epoch": 0.32129625860033384, "grad_norm": 0.447297602891922, "learning_rate": 4.9992415875546096e-05, "loss": 0.395, "step": 1973 }, { "epoch": 0.3214591051581647, "grad_norm": 0.3877882659435272, "learning_rate": 4.999229875395747e-05, "loss": 0.374, "step": 1974 }, { "epoch": 0.3216219517159956, "grad_norm": 0.4061221182346344, "learning_rate": 4.999218073507045e-05, "loss": 0.3538, "step": 1975 }, { "epoch": 0.3217847982738265, "grad_norm": 0.45624151825904846, "learning_rate": 4.999206181888925e-05, "loss": 0.3984, "step": 1976 }, { "epoch": 0.3219476448316574, "grad_norm": 0.38767486810684204, "learning_rate": 4.9991942005418165e-05, "loss": 0.3701, "step": 1977 }, { "epoch": 0.3221104913894883, "grad_norm": 0.45444682240486145, "learning_rate": 4.999182129466148e-05, "loss": 0.3735, "step": 1978 }, { "epoch": 0.32227333794731916, "grad_norm": 0.4448585510253906, "learning_rate": 4.999169968662353e-05, "loss": 0.3654, "step": 1979 }, { "epoch": 0.32243618450515005, "grad_norm": 0.38312727212905884, "learning_rate": 4.999157718130868e-05, "loss": 0.3725, "step": 1980 }, { "epoch": 0.3225990310629809, "grad_norm": 0.40579259395599365, "learning_rate": 4.999145377872134e-05, "loss": 0.3863, "step": 1981 }, { "epoch": 0.32276187762081177, "grad_norm": 0.4290969669818878, "learning_rate": 4.999132947886593e-05, "loss": 0.3607, "step": 1982 }, { "epoch": 0.32292472417864265, "grad_norm": 0.4783307909965515, "learning_rate": 4.999120428174692e-05, "loss": 0.396, "step": 1983 }, { "epoch": 0.32308757073647354, "grad_norm": 0.44998058676719666, "learning_rate": 4.9991078187368793e-05, "loss": 0.4021, "step": 1984 }, { "epoch": 0.3232504172943044, "grad_norm": 0.44964274764060974, "learning_rate": 4.999095119573609e-05, "loss": 0.3889, "step": 1985 }, { "epoch": 0.3234132638521353, "grad_norm": 0.49230119585990906, "learning_rate": 4.9990823306853364e-05, "loss": 0.423, "step": 1986 }, { "epoch": 0.3235761104099662, "grad_norm": 0.4178657829761505, "learning_rate": 4.99906945207252e-05, "loss": 0.3299, "step": 1987 }, { "epoch": 0.3237389569677971, "grad_norm": 0.4742202162742615, "learning_rate": 4.9990564837356236e-05, "loss": 0.3703, "step": 1988 }, { "epoch": 0.323901803525628, "grad_norm": 0.41599613428115845, "learning_rate": 4.999043425675112e-05, "loss": 0.3546, "step": 1989 }, { "epoch": 0.32406465008345886, "grad_norm": 0.36688482761383057, "learning_rate": 4.999030277891454e-05, "loss": 0.3644, "step": 1990 }, { "epoch": 0.32422749664128975, "grad_norm": 0.3833234906196594, "learning_rate": 4.9990170403851214e-05, "loss": 0.3951, "step": 1991 }, { "epoch": 0.32439034319912063, "grad_norm": 0.3920401930809021, "learning_rate": 4.99900371315659e-05, "loss": 0.3786, "step": 1992 }, { "epoch": 0.3245531897569515, "grad_norm": 0.4101138710975647, "learning_rate": 4.998990296206338e-05, "loss": 0.3854, "step": 1993 }, { "epoch": 0.3247160363147824, "grad_norm": 0.4439780116081238, "learning_rate": 4.998976789534847e-05, "loss": 0.4038, "step": 1994 }, { "epoch": 0.3248788828726133, "grad_norm": 0.40805020928382874, "learning_rate": 4.998963193142603e-05, "loss": 0.4052, "step": 1995 }, { "epoch": 0.3250417294304442, "grad_norm": 0.43000802397727966, "learning_rate": 4.998949507030093e-05, "loss": 0.4051, "step": 1996 }, { "epoch": 0.32520457598827507, "grad_norm": 0.4578833281993866, "learning_rate": 4.998935731197808e-05, "loss": 0.365, "step": 1997 }, { "epoch": 0.32536742254610596, "grad_norm": 0.34631478786468506, "learning_rate": 4.9989218656462435e-05, "loss": 0.3898, "step": 1998 }, { "epoch": 0.32553026910393684, "grad_norm": 0.3485005497932434, "learning_rate": 4.9989079103758976e-05, "loss": 0.3733, "step": 1999 }, { "epoch": 0.3256931156617677, "grad_norm": 0.4448842704296112, "learning_rate": 4.99889386538727e-05, "loss": 0.3717, "step": 2000 }, { "epoch": 0.32585596221959856, "grad_norm": 0.49329450726509094, "learning_rate": 4.998879730680866e-05, "loss": 0.4204, "step": 2001 }, { "epoch": 0.32601880877742945, "grad_norm": 0.3656468689441681, "learning_rate": 4.998865506257193e-05, "loss": 0.3544, "step": 2002 }, { "epoch": 0.32618165533526033, "grad_norm": 0.36369824409484863, "learning_rate": 4.9988511921167616e-05, "loss": 0.3767, "step": 2003 }, { "epoch": 0.3263445018930912, "grad_norm": 0.43777230381965637, "learning_rate": 4.998836788260085e-05, "loss": 0.3962, "step": 2004 }, { "epoch": 0.3265073484509221, "grad_norm": 0.3600282073020935, "learning_rate": 4.998822294687682e-05, "loss": 0.3959, "step": 2005 }, { "epoch": 0.326670195008753, "grad_norm": 0.41330718994140625, "learning_rate": 4.998807711400071e-05, "loss": 0.3896, "step": 2006 }, { "epoch": 0.3268330415665839, "grad_norm": 0.39146798849105835, "learning_rate": 4.9987930383977774e-05, "loss": 0.3588, "step": 2007 }, { "epoch": 0.32699588812441477, "grad_norm": 0.42383623123168945, "learning_rate": 4.998778275681326e-05, "loss": 0.428, "step": 2008 }, { "epoch": 0.32715873468224566, "grad_norm": 0.42482224106788635, "learning_rate": 4.998763423251249e-05, "loss": 0.3846, "step": 2009 }, { "epoch": 0.32732158124007654, "grad_norm": 0.3825829327106476, "learning_rate": 4.998748481108079e-05, "loss": 0.4002, "step": 2010 }, { "epoch": 0.32748442779790743, "grad_norm": 0.474324494600296, "learning_rate": 4.998733449252351e-05, "loss": 0.3724, "step": 2011 }, { "epoch": 0.3276472743557383, "grad_norm": 0.4783271849155426, "learning_rate": 4.998718327684606e-05, "loss": 0.4008, "step": 2012 }, { "epoch": 0.3278101209135692, "grad_norm": 0.3775007128715515, "learning_rate": 4.998703116405387e-05, "loss": 0.3748, "step": 2013 }, { "epoch": 0.3279729674714001, "grad_norm": 0.4184856414794922, "learning_rate": 4.9986878154152394e-05, "loss": 0.3918, "step": 2014 }, { "epoch": 0.328135814029231, "grad_norm": 0.4527062773704529, "learning_rate": 4.998672424714713e-05, "loss": 0.3842, "step": 2015 }, { "epoch": 0.32829866058706186, "grad_norm": 0.4891839325428009, "learning_rate": 4.998656944304361e-05, "loss": 0.4007, "step": 2016 }, { "epoch": 0.32846150714489275, "grad_norm": 0.4044787287712097, "learning_rate": 4.998641374184737e-05, "loss": 0.4064, "step": 2017 }, { "epoch": 0.3286243537027236, "grad_norm": 0.4751010835170746, "learning_rate": 4.998625714356403e-05, "loss": 0.4221, "step": 2018 }, { "epoch": 0.32878720026055447, "grad_norm": 0.4694335460662842, "learning_rate": 4.998609964819919e-05, "loss": 0.427, "step": 2019 }, { "epoch": 0.32895004681838536, "grad_norm": 0.40255796909332275, "learning_rate": 4.998594125575852e-05, "loss": 0.374, "step": 2020 }, { "epoch": 0.32911289337621624, "grad_norm": 0.4744682312011719, "learning_rate": 4.998578196624769e-05, "loss": 0.4287, "step": 2021 }, { "epoch": 0.32927573993404713, "grad_norm": 0.405286580324173, "learning_rate": 4.998562177967243e-05, "loss": 0.4163, "step": 2022 }, { "epoch": 0.329438586491878, "grad_norm": 0.4379030168056488, "learning_rate": 4.99854606960385e-05, "loss": 0.3471, "step": 2023 }, { "epoch": 0.3296014330497089, "grad_norm": 0.36888161301612854, "learning_rate": 4.998529871535166e-05, "loss": 0.3962, "step": 2024 }, { "epoch": 0.3297642796075398, "grad_norm": 0.38289666175842285, "learning_rate": 4.998513583761774e-05, "loss": 0.3819, "step": 2025 }, { "epoch": 0.3299271261653707, "grad_norm": 0.3616005778312683, "learning_rate": 4.9984972062842594e-05, "loss": 0.3764, "step": 2026 }, { "epoch": 0.33008997272320156, "grad_norm": 0.3772159814834595, "learning_rate": 4.9984807391032085e-05, "loss": 0.3769, "step": 2027 }, { "epoch": 0.33025281928103245, "grad_norm": 0.48927733302116394, "learning_rate": 4.9984641822192135e-05, "loss": 0.461, "step": 2028 }, { "epoch": 0.33041566583886334, "grad_norm": 0.35133200883865356, "learning_rate": 4.9984475356328696e-05, "loss": 0.3599, "step": 2029 }, { "epoch": 0.3305785123966942, "grad_norm": 0.4998719096183777, "learning_rate": 4.998430799344773e-05, "loss": 0.3963, "step": 2030 }, { "epoch": 0.3307413589545251, "grad_norm": 0.42197123169898987, "learning_rate": 4.998413973355525e-05, "loss": 0.3945, "step": 2031 }, { "epoch": 0.330904205512356, "grad_norm": 0.3725321888923645, "learning_rate": 4.99839705766573e-05, "loss": 0.3721, "step": 2032 }, { "epoch": 0.3310670520701869, "grad_norm": 0.47293907403945923, "learning_rate": 4.998380052275996e-05, "loss": 0.4242, "step": 2033 }, { "epoch": 0.33122989862801777, "grad_norm": 0.4646495282649994, "learning_rate": 4.998362957186932e-05, "loss": 0.3956, "step": 2034 }, { "epoch": 0.33139274518584866, "grad_norm": 0.45400699973106384, "learning_rate": 4.998345772399152e-05, "loss": 0.3709, "step": 2035 }, { "epoch": 0.33155559174367955, "grad_norm": 0.5162721276283264, "learning_rate": 4.998328497913275e-05, "loss": 0.4267, "step": 2036 }, { "epoch": 0.3317184383015104, "grad_norm": 0.4253499507904053, "learning_rate": 4.998311133729918e-05, "loss": 0.3847, "step": 2037 }, { "epoch": 0.33188128485934126, "grad_norm": 0.6872419714927673, "learning_rate": 4.9982936798497076e-05, "loss": 0.43, "step": 2038 }, { "epoch": 0.33204413141717215, "grad_norm": 0.42541801929473877, "learning_rate": 4.998276136273269e-05, "loss": 0.3868, "step": 2039 }, { "epoch": 0.33220697797500304, "grad_norm": 0.4885125458240509, "learning_rate": 4.998258503001231e-05, "loss": 0.3858, "step": 2040 }, { "epoch": 0.3323698245328339, "grad_norm": 0.43806421756744385, "learning_rate": 4.9982407800342286e-05, "loss": 0.3902, "step": 2041 }, { "epoch": 0.3325326710906648, "grad_norm": 0.8522404432296753, "learning_rate": 4.998222967372897e-05, "loss": 0.3874, "step": 2042 }, { "epoch": 0.3326955176484957, "grad_norm": 0.36749663949012756, "learning_rate": 4.998205065017876e-05, "loss": 0.3528, "step": 2043 }, { "epoch": 0.3328583642063266, "grad_norm": 0.3395629823207855, "learning_rate": 4.998187072969808e-05, "loss": 0.3492, "step": 2044 }, { "epoch": 0.33302121076415747, "grad_norm": 0.4336118698120117, "learning_rate": 4.99816899122934e-05, "loss": 0.4254, "step": 2045 }, { "epoch": 0.33318405732198836, "grad_norm": 0.3334692120552063, "learning_rate": 4.998150819797121e-05, "loss": 0.3541, "step": 2046 }, { "epoch": 0.33334690387981925, "grad_norm": 0.3295406103134155, "learning_rate": 4.998132558673801e-05, "loss": 0.3306, "step": 2047 }, { "epoch": 0.33350975043765013, "grad_norm": 0.42694276571273804, "learning_rate": 4.998114207860039e-05, "loss": 0.4012, "step": 2048 }, { "epoch": 0.333672596995481, "grad_norm": 0.3739783465862274, "learning_rate": 4.998095767356492e-05, "loss": 0.4025, "step": 2049 }, { "epoch": 0.3338354435533119, "grad_norm": 0.38268163800239563, "learning_rate": 4.9980772371638224e-05, "loss": 0.384, "step": 2050 }, { "epoch": 0.3339982901111428, "grad_norm": 0.526179850101471, "learning_rate": 4.998058617282696e-05, "loss": 0.4318, "step": 2051 }, { "epoch": 0.3341611366689737, "grad_norm": 0.4093352258205414, "learning_rate": 4.9980399077137804e-05, "loss": 0.4087, "step": 2052 }, { "epoch": 0.33432398322680457, "grad_norm": 0.3767792880535126, "learning_rate": 4.9980211084577476e-05, "loss": 0.3596, "step": 2053 }, { "epoch": 0.33448682978463545, "grad_norm": 0.5416701436042786, "learning_rate": 4.998002219515273e-05, "loss": 0.4182, "step": 2054 }, { "epoch": 0.3346496763424663, "grad_norm": 0.4793764650821686, "learning_rate": 4.997983240887034e-05, "loss": 0.4485, "step": 2055 }, { "epoch": 0.33481252290029717, "grad_norm": 0.46710532903671265, "learning_rate": 4.9979641725737135e-05, "loss": 0.4059, "step": 2056 }, { "epoch": 0.33497536945812806, "grad_norm": 0.39563119411468506, "learning_rate": 4.9979450145759945e-05, "loss": 0.3884, "step": 2057 }, { "epoch": 0.33513821601595895, "grad_norm": 0.5098652839660645, "learning_rate": 4.997925766894566e-05, "loss": 0.4373, "step": 2058 }, { "epoch": 0.33530106257378983, "grad_norm": 0.4758967161178589, "learning_rate": 4.997906429530118e-05, "loss": 0.3844, "step": 2059 }, { "epoch": 0.3354639091316207, "grad_norm": 0.5053810477256775, "learning_rate": 4.997887002483345e-05, "loss": 0.382, "step": 2060 }, { "epoch": 0.3356267556894516, "grad_norm": 0.4985145926475525, "learning_rate": 4.997867485754945e-05, "loss": 0.3956, "step": 2061 }, { "epoch": 0.3357896022472825, "grad_norm": 0.5014511942863464, "learning_rate": 4.9978478793456185e-05, "loss": 0.4256, "step": 2062 }, { "epoch": 0.3359524488051134, "grad_norm": 0.46065178513526917, "learning_rate": 4.997828183256069e-05, "loss": 0.3739, "step": 2063 }, { "epoch": 0.33611529536294427, "grad_norm": 0.39116591215133667, "learning_rate": 4.997808397487005e-05, "loss": 0.3579, "step": 2064 }, { "epoch": 0.33627814192077515, "grad_norm": 0.43590500950813293, "learning_rate": 4.997788522039135e-05, "loss": 0.3251, "step": 2065 }, { "epoch": 0.33644098847860604, "grad_norm": 0.5201831459999084, "learning_rate": 4.997768556913174e-05, "loss": 0.3996, "step": 2066 }, { "epoch": 0.3366038350364369, "grad_norm": 0.391645610332489, "learning_rate": 4.997748502109838e-05, "loss": 0.3445, "step": 2067 }, { "epoch": 0.3367666815942678, "grad_norm": 0.3749023377895355, "learning_rate": 4.997728357629847e-05, "loss": 0.3706, "step": 2068 }, { "epoch": 0.3369295281520987, "grad_norm": 0.42282524704933167, "learning_rate": 4.997708123473925e-05, "loss": 0.3947, "step": 2069 }, { "epoch": 0.3370923747099296, "grad_norm": 0.45475807785987854, "learning_rate": 4.9976877996427987e-05, "loss": 0.4019, "step": 2070 }, { "epoch": 0.3372552212677605, "grad_norm": 0.4161653518676758, "learning_rate": 4.997667386137196e-05, "loss": 0.4032, "step": 2071 }, { "epoch": 0.33741806782559136, "grad_norm": 0.46555209159851074, "learning_rate": 4.997646882957851e-05, "loss": 0.3577, "step": 2072 }, { "epoch": 0.33758091438342225, "grad_norm": 0.4146837592124939, "learning_rate": 4.9976262901055e-05, "loss": 0.4043, "step": 2073 }, { "epoch": 0.3377437609412531, "grad_norm": 0.37480098009109497, "learning_rate": 4.997605607580882e-05, "loss": 0.3848, "step": 2074 }, { "epoch": 0.33790660749908397, "grad_norm": 0.4584108293056488, "learning_rate": 4.99758483538474e-05, "loss": 0.4339, "step": 2075 }, { "epoch": 0.33806945405691485, "grad_norm": 0.4057260751724243, "learning_rate": 4.997563973517819e-05, "loss": 0.372, "step": 2076 }, { "epoch": 0.33823230061474574, "grad_norm": 0.3840216100215912, "learning_rate": 4.9975430219808693e-05, "loss": 0.4025, "step": 2077 }, { "epoch": 0.3383951471725766, "grad_norm": 0.3801320195198059, "learning_rate": 4.9975219807746415e-05, "loss": 0.3599, "step": 2078 }, { "epoch": 0.3385579937304075, "grad_norm": 0.46420666575431824, "learning_rate": 4.9975008498998924e-05, "loss": 0.4109, "step": 2079 }, { "epoch": 0.3387208402882384, "grad_norm": 0.424039751291275, "learning_rate": 4.99747962935738e-05, "loss": 0.3186, "step": 2080 }, { "epoch": 0.3388836868460693, "grad_norm": 0.46962660551071167, "learning_rate": 4.997458319147865e-05, "loss": 0.3871, "step": 2081 }, { "epoch": 0.3390465334039002, "grad_norm": 0.4286401867866516, "learning_rate": 4.9974369192721147e-05, "loss": 0.394, "step": 2082 }, { "epoch": 0.33920937996173106, "grad_norm": 0.39013904333114624, "learning_rate": 4.9974154297308965e-05, "loss": 0.3726, "step": 2083 }, { "epoch": 0.33937222651956195, "grad_norm": 0.42181921005249023, "learning_rate": 4.9973938505249824e-05, "loss": 0.3879, "step": 2084 }, { "epoch": 0.33953507307739283, "grad_norm": 0.38228505849838257, "learning_rate": 4.997372181655146e-05, "loss": 0.3648, "step": 2085 }, { "epoch": 0.3396979196352237, "grad_norm": 0.45028266310691833, "learning_rate": 4.997350423122166e-05, "loss": 0.4081, "step": 2086 }, { "epoch": 0.3398607661930546, "grad_norm": 0.4338091015815735, "learning_rate": 4.997328574926824e-05, "loss": 0.3899, "step": 2087 }, { "epoch": 0.3400236127508855, "grad_norm": 0.38011884689331055, "learning_rate": 4.9973066370699034e-05, "loss": 0.4105, "step": 2088 }, { "epoch": 0.3401864593087164, "grad_norm": 0.437603235244751, "learning_rate": 4.997284609552193e-05, "loss": 0.3688, "step": 2089 }, { "epoch": 0.34034930586654727, "grad_norm": 0.46192580461502075, "learning_rate": 4.997262492374483e-05, "loss": 0.3412, "step": 2090 }, { "epoch": 0.34051215242437816, "grad_norm": 0.4550076723098755, "learning_rate": 4.997240285537567e-05, "loss": 0.4569, "step": 2091 }, { "epoch": 0.340674998982209, "grad_norm": 0.40146222710609436, "learning_rate": 4.9972179890422425e-05, "loss": 0.4317, "step": 2092 }, { "epoch": 0.3408378455400399, "grad_norm": 0.4391135573387146, "learning_rate": 4.997195602889311e-05, "loss": 0.391, "step": 2093 }, { "epoch": 0.34100069209787076, "grad_norm": 0.4445488750934601, "learning_rate": 4.9971731270795755e-05, "loss": 0.4151, "step": 2094 }, { "epoch": 0.34116353865570165, "grad_norm": 0.4462047517299652, "learning_rate": 4.997150561613843e-05, "loss": 0.3652, "step": 2095 }, { "epoch": 0.34132638521353253, "grad_norm": 0.4092627763748169, "learning_rate": 4.997127906492923e-05, "loss": 0.3729, "step": 2096 }, { "epoch": 0.3414892317713634, "grad_norm": 0.3985903263092041, "learning_rate": 4.9971051617176306e-05, "loss": 0.3454, "step": 2097 }, { "epoch": 0.3416520783291943, "grad_norm": 0.4966728389263153, "learning_rate": 4.9970823272887814e-05, "loss": 0.4119, "step": 2098 }, { "epoch": 0.3418149248870252, "grad_norm": 0.45512592792510986, "learning_rate": 4.997059403207194e-05, "loss": 0.3256, "step": 2099 }, { "epoch": 0.3419777714448561, "grad_norm": 0.4301413595676422, "learning_rate": 4.997036389473694e-05, "loss": 0.3963, "step": 2100 }, { "epoch": 0.34214061800268697, "grad_norm": 0.43847987055778503, "learning_rate": 4.997013286089105e-05, "loss": 0.436, "step": 2101 }, { "epoch": 0.34230346456051786, "grad_norm": 0.5205087065696716, "learning_rate": 4.996990093054259e-05, "loss": 0.3905, "step": 2102 }, { "epoch": 0.34246631111834874, "grad_norm": 0.3459760844707489, "learning_rate": 4.996966810369987e-05, "loss": 0.3708, "step": 2103 }, { "epoch": 0.34262915767617963, "grad_norm": 0.3736145794391632, "learning_rate": 4.996943438037125e-05, "loss": 0.3722, "step": 2104 }, { "epoch": 0.3427920042340105, "grad_norm": 0.4012175500392914, "learning_rate": 4.9969199760565134e-05, "loss": 0.3633, "step": 2105 }, { "epoch": 0.3429548507918414, "grad_norm": 0.46045729517936707, "learning_rate": 4.996896424428993e-05, "loss": 0.3926, "step": 2106 }, { "epoch": 0.3431176973496723, "grad_norm": 0.35464543104171753, "learning_rate": 4.996872783155409e-05, "loss": 0.3415, "step": 2107 }, { "epoch": 0.3432805439075032, "grad_norm": 0.5184735059738159, "learning_rate": 4.9968490522366134e-05, "loss": 0.4461, "step": 2108 }, { "epoch": 0.34344339046533406, "grad_norm": 0.4004727303981781, "learning_rate": 4.9968252316734556e-05, "loss": 0.4084, "step": 2109 }, { "epoch": 0.3436062370231649, "grad_norm": 0.4020712673664093, "learning_rate": 4.9968013214667906e-05, "loss": 0.3723, "step": 2110 }, { "epoch": 0.3437690835809958, "grad_norm": 0.4670315980911255, "learning_rate": 4.9967773216174774e-05, "loss": 0.4168, "step": 2111 }, { "epoch": 0.34393193013882667, "grad_norm": 0.44190067052841187, "learning_rate": 4.996753232126378e-05, "loss": 0.4264, "step": 2112 }, { "epoch": 0.34409477669665756, "grad_norm": 0.412127822637558, "learning_rate": 4.996729052994358e-05, "loss": 0.3802, "step": 2113 }, { "epoch": 0.34425762325448844, "grad_norm": 0.48218488693237305, "learning_rate": 4.996704784222284e-05, "loss": 0.4438, "step": 2114 }, { "epoch": 0.34442046981231933, "grad_norm": 0.4646387994289398, "learning_rate": 4.9966804258110273e-05, "loss": 0.4213, "step": 2115 }, { "epoch": 0.3445833163701502, "grad_norm": 0.42086583375930786, "learning_rate": 4.996655977761463e-05, "loss": 0.3888, "step": 2116 }, { "epoch": 0.3447461629279811, "grad_norm": 0.4441312849521637, "learning_rate": 4.99663144007447e-05, "loss": 0.3683, "step": 2117 }, { "epoch": 0.344909009485812, "grad_norm": 0.3620243966579437, "learning_rate": 4.996606812750927e-05, "loss": 0.3866, "step": 2118 }, { "epoch": 0.3450718560436429, "grad_norm": 0.44868233799934387, "learning_rate": 4.996582095791721e-05, "loss": 0.4252, "step": 2119 }, { "epoch": 0.34523470260147376, "grad_norm": 0.4633602201938629, "learning_rate": 4.996557289197736e-05, "loss": 0.4061, "step": 2120 }, { "epoch": 0.34539754915930465, "grad_norm": 0.3699967563152313, "learning_rate": 4.996532392969866e-05, "loss": 0.3856, "step": 2121 }, { "epoch": 0.34556039571713554, "grad_norm": 0.34901508688926697, "learning_rate": 4.996507407109002e-05, "loss": 0.3327, "step": 2122 }, { "epoch": 0.3457232422749664, "grad_norm": 0.43473583459854126, "learning_rate": 4.996482331616043e-05, "loss": 0.408, "step": 2123 }, { "epoch": 0.3458860888327973, "grad_norm": 0.4134833812713623, "learning_rate": 4.9964571664918885e-05, "loss": 0.3873, "step": 2124 }, { "epoch": 0.3460489353906282, "grad_norm": 0.42830124497413635, "learning_rate": 4.9964319117374423e-05, "loss": 0.3684, "step": 2125 }, { "epoch": 0.3462117819484591, "grad_norm": 0.376161128282547, "learning_rate": 4.9964065673536106e-05, "loss": 0.3708, "step": 2126 }, { "epoch": 0.34637462850628997, "grad_norm": 0.37967541813850403, "learning_rate": 4.996381133341305e-05, "loss": 0.3688, "step": 2127 }, { "epoch": 0.34653747506412086, "grad_norm": 0.34348419308662415, "learning_rate": 4.996355609701435e-05, "loss": 0.3588, "step": 2128 }, { "epoch": 0.3467003216219517, "grad_norm": 0.339002788066864, "learning_rate": 4.9963299964349206e-05, "loss": 0.378, "step": 2129 }, { "epoch": 0.3468631681797826, "grad_norm": 0.432304710149765, "learning_rate": 4.996304293542681e-05, "loss": 0.3947, "step": 2130 }, { "epoch": 0.34702601473761346, "grad_norm": 0.3608505129814148, "learning_rate": 4.996278501025637e-05, "loss": 0.3783, "step": 2131 }, { "epoch": 0.34718886129544435, "grad_norm": 0.35463204979896545, "learning_rate": 4.996252618884716e-05, "loss": 0.3505, "step": 2132 }, { "epoch": 0.34735170785327524, "grad_norm": 0.446329265832901, "learning_rate": 4.9962266471208475e-05, "loss": 0.4068, "step": 2133 }, { "epoch": 0.3475145544111061, "grad_norm": 0.37989214062690735, "learning_rate": 4.9962005857349626e-05, "loss": 0.3589, "step": 2134 }, { "epoch": 0.347677400968937, "grad_norm": 0.3714139461517334, "learning_rate": 4.9961744347279985e-05, "loss": 0.38, "step": 2135 }, { "epoch": 0.3478402475267679, "grad_norm": 0.3931752145290375, "learning_rate": 4.996148194100893e-05, "loss": 0.3614, "step": 2136 }, { "epoch": 0.3480030940845988, "grad_norm": 0.415138840675354, "learning_rate": 4.996121863854589e-05, "loss": 0.3722, "step": 2137 }, { "epoch": 0.34816594064242967, "grad_norm": 0.3255549967288971, "learning_rate": 4.996095443990031e-05, "loss": 0.351, "step": 2138 }, { "epoch": 0.34832878720026056, "grad_norm": 0.3858198821544647, "learning_rate": 4.996068934508169e-05, "loss": 0.3706, "step": 2139 }, { "epoch": 0.34849163375809145, "grad_norm": 0.35322925448417664, "learning_rate": 4.996042335409953e-05, "loss": 0.3413, "step": 2140 }, { "epoch": 0.34865448031592233, "grad_norm": 0.4276290833950043, "learning_rate": 4.996015646696339e-05, "loss": 0.3891, "step": 2141 }, { "epoch": 0.3488173268737532, "grad_norm": 0.376669317483902, "learning_rate": 4.9959888683682857e-05, "loss": 0.3735, "step": 2142 }, { "epoch": 0.3489801734315841, "grad_norm": 0.3774469792842865, "learning_rate": 4.9959620004267525e-05, "loss": 0.3892, "step": 2143 }, { "epoch": 0.349143019989415, "grad_norm": 0.39294689893722534, "learning_rate": 4.995935042872707e-05, "loss": 0.3748, "step": 2144 }, { "epoch": 0.3493058665472459, "grad_norm": 0.42457038164138794, "learning_rate": 4.995907995707114e-05, "loss": 0.3771, "step": 2145 }, { "epoch": 0.34946871310507677, "grad_norm": 0.39257967472076416, "learning_rate": 4.995880858930947e-05, "loss": 0.4173, "step": 2146 }, { "epoch": 0.3496315596629076, "grad_norm": 0.54593825340271, "learning_rate": 4.995853632545179e-05, "loss": 0.4455, "step": 2147 }, { "epoch": 0.3497944062207385, "grad_norm": 0.3690642714500427, "learning_rate": 4.995826316550788e-05, "loss": 0.3504, "step": 2148 }, { "epoch": 0.34995725277856937, "grad_norm": 0.3748270273208618, "learning_rate": 4.995798910948754e-05, "loss": 0.3694, "step": 2149 }, { "epoch": 0.35012009933640026, "grad_norm": 0.3481821119785309, "learning_rate": 4.995771415740061e-05, "loss": 0.3762, "step": 2150 }, { "epoch": 0.35028294589423115, "grad_norm": 0.34976041316986084, "learning_rate": 4.995743830925698e-05, "loss": 0.3825, "step": 2151 }, { "epoch": 0.35044579245206203, "grad_norm": 0.4747844934463501, "learning_rate": 4.995716156506654e-05, "loss": 0.4243, "step": 2152 }, { "epoch": 0.3506086390098929, "grad_norm": 0.3723360598087311, "learning_rate": 4.9956883924839215e-05, "loss": 0.3934, "step": 2153 }, { "epoch": 0.3507714855677238, "grad_norm": 0.5668650269508362, "learning_rate": 4.995660538858499e-05, "loss": 0.4222, "step": 2154 }, { "epoch": 0.3509343321255547, "grad_norm": 0.4039671719074249, "learning_rate": 4.995632595631387e-05, "loss": 0.3987, "step": 2155 }, { "epoch": 0.3510971786833856, "grad_norm": 0.36633527278900146, "learning_rate": 4.9956045628035856e-05, "loss": 0.3791, "step": 2156 }, { "epoch": 0.35126002524121647, "grad_norm": 0.38272374868392944, "learning_rate": 4.9955764403761045e-05, "loss": 0.3788, "step": 2157 }, { "epoch": 0.35142287179904735, "grad_norm": 0.316112220287323, "learning_rate": 4.995548228349952e-05, "loss": 0.3514, "step": 2158 }, { "epoch": 0.35158571835687824, "grad_norm": 0.38623982667922974, "learning_rate": 4.9955199267261415e-05, "loss": 0.3874, "step": 2159 }, { "epoch": 0.3517485649147091, "grad_norm": 0.4492664337158203, "learning_rate": 4.9954915355056885e-05, "loss": 0.421, "step": 2160 }, { "epoch": 0.35191141147254, "grad_norm": 0.4379126727581024, "learning_rate": 4.995463054689613e-05, "loss": 0.3785, "step": 2161 }, { "epoch": 0.3520742580303709, "grad_norm": 0.38193631172180176, "learning_rate": 4.995434484278938e-05, "loss": 0.3889, "step": 2162 }, { "epoch": 0.3522371045882018, "grad_norm": 0.40527817606925964, "learning_rate": 4.995405824274687e-05, "loss": 0.3739, "step": 2163 }, { "epoch": 0.3523999511460327, "grad_norm": 0.3977220952510834, "learning_rate": 4.9953770746778914e-05, "loss": 0.4004, "step": 2164 }, { "epoch": 0.35256279770386356, "grad_norm": 0.4538644552230835, "learning_rate": 4.995348235489582e-05, "loss": 0.3873, "step": 2165 }, { "epoch": 0.3527256442616944, "grad_norm": 0.40847280621528625, "learning_rate": 4.995319306710795e-05, "loss": 0.3906, "step": 2166 }, { "epoch": 0.3528884908195253, "grad_norm": 0.3874794542789459, "learning_rate": 4.995290288342569e-05, "loss": 0.3878, "step": 2167 }, { "epoch": 0.35305133737735617, "grad_norm": 0.44386979937553406, "learning_rate": 4.995261180385945e-05, "loss": 0.404, "step": 2168 }, { "epoch": 0.35321418393518705, "grad_norm": 0.36885297298431396, "learning_rate": 4.995231982841969e-05, "loss": 0.366, "step": 2169 }, { "epoch": 0.35337703049301794, "grad_norm": 0.40673068165779114, "learning_rate": 4.995202695711688e-05, "loss": 0.3828, "step": 2170 }, { "epoch": 0.3535398770508488, "grad_norm": 0.40608054399490356, "learning_rate": 4.995173318996155e-05, "loss": 0.3959, "step": 2171 }, { "epoch": 0.3537027236086797, "grad_norm": 0.37374958395957947, "learning_rate": 4.995143852696424e-05, "loss": 0.3323, "step": 2172 }, { "epoch": 0.3538655701665106, "grad_norm": 0.4071354866027832, "learning_rate": 4.9951142968135536e-05, "loss": 0.4037, "step": 2173 }, { "epoch": 0.3540284167243415, "grad_norm": 0.42684364318847656, "learning_rate": 4.995084651348604e-05, "loss": 0.3983, "step": 2174 }, { "epoch": 0.3541912632821724, "grad_norm": 0.38050222396850586, "learning_rate": 4.99505491630264e-05, "loss": 0.3875, "step": 2175 }, { "epoch": 0.35435410984000326, "grad_norm": 0.4078112840652466, "learning_rate": 4.995025091676729e-05, "loss": 0.3953, "step": 2176 }, { "epoch": 0.35451695639783415, "grad_norm": 0.4302748739719391, "learning_rate": 4.994995177471942e-05, "loss": 0.3808, "step": 2177 }, { "epoch": 0.35467980295566504, "grad_norm": 0.37659764289855957, "learning_rate": 4.994965173689353e-05, "loss": 0.3592, "step": 2178 }, { "epoch": 0.3548426495134959, "grad_norm": 0.41658681631088257, "learning_rate": 4.994935080330039e-05, "loss": 0.3627, "step": 2179 }, { "epoch": 0.3550054960713268, "grad_norm": 0.37736308574676514, "learning_rate": 4.994904897395081e-05, "loss": 0.349, "step": 2180 }, { "epoch": 0.3551683426291577, "grad_norm": 0.3928869664669037, "learning_rate": 4.9948746248855614e-05, "loss": 0.3736, "step": 2181 }, { "epoch": 0.3553311891869886, "grad_norm": 0.3679029047489166, "learning_rate": 4.9948442628025687e-05, "loss": 0.3823, "step": 2182 }, { "epoch": 0.35549403574481947, "grad_norm": 0.4512062072753906, "learning_rate": 4.994813811147192e-05, "loss": 0.4268, "step": 2183 }, { "epoch": 0.3556568823026503, "grad_norm": 0.36844202876091003, "learning_rate": 4.9947832699205244e-05, "loss": 0.3897, "step": 2184 }, { "epoch": 0.3558197288604812, "grad_norm": 0.3968261182308197, "learning_rate": 4.994752639123663e-05, "loss": 0.3845, "step": 2185 }, { "epoch": 0.3559825754183121, "grad_norm": 0.35631492733955383, "learning_rate": 4.994721918757708e-05, "loss": 0.3311, "step": 2186 }, { "epoch": 0.35614542197614296, "grad_norm": 0.3328388035297394, "learning_rate": 4.9946911088237615e-05, "loss": 0.4236, "step": 2187 }, { "epoch": 0.35630826853397385, "grad_norm": 0.337625652551651, "learning_rate": 4.99466020932293e-05, "loss": 0.3902, "step": 2188 }, { "epoch": 0.35647111509180474, "grad_norm": 0.3538847267627716, "learning_rate": 4.9946292202563225e-05, "loss": 0.3699, "step": 2189 }, { "epoch": 0.3566339616496356, "grad_norm": 0.48189476132392883, "learning_rate": 4.994598141625052e-05, "loss": 0.3628, "step": 2190 }, { "epoch": 0.3567968082074665, "grad_norm": 0.3558766841888428, "learning_rate": 4.9945669734302336e-05, "loss": 0.398, "step": 2191 }, { "epoch": 0.3569596547652974, "grad_norm": 0.3956798315048218, "learning_rate": 4.9945357156729874e-05, "loss": 0.386, "step": 2192 }, { "epoch": 0.3571225013231283, "grad_norm": 0.4918650686740875, "learning_rate": 4.9945043683544355e-05, "loss": 0.4094, "step": 2193 }, { "epoch": 0.35728534788095917, "grad_norm": 0.33386892080307007, "learning_rate": 4.994472931475702e-05, "loss": 0.3464, "step": 2194 }, { "epoch": 0.35744819443879006, "grad_norm": 0.4901202321052551, "learning_rate": 4.9944414050379176e-05, "loss": 0.3975, "step": 2195 }, { "epoch": 0.35761104099662094, "grad_norm": 0.4349435567855835, "learning_rate": 4.994409789042213e-05, "loss": 0.4021, "step": 2196 }, { "epoch": 0.35777388755445183, "grad_norm": 0.4106043875217438, "learning_rate": 4.9943780834897235e-05, "loss": 0.3869, "step": 2197 }, { "epoch": 0.3579367341122827, "grad_norm": 0.36297690868377686, "learning_rate": 4.994346288381587e-05, "loss": 0.353, "step": 2198 }, { "epoch": 0.3580995806701136, "grad_norm": 0.3880611062049866, "learning_rate": 4.994314403718946e-05, "loss": 0.3874, "step": 2199 }, { "epoch": 0.3582624272279445, "grad_norm": 0.4265459477901459, "learning_rate": 4.994282429502945e-05, "loss": 0.3796, "step": 2200 }, { "epoch": 0.3584252737857754, "grad_norm": 0.4093751907348633, "learning_rate": 4.994250365734732e-05, "loss": 0.4336, "step": 2201 }, { "epoch": 0.35858812034360626, "grad_norm": 0.4478566646575928, "learning_rate": 4.994218212415457e-05, "loss": 0.3972, "step": 2202 }, { "epoch": 0.3587509669014371, "grad_norm": 0.45420119166374207, "learning_rate": 4.9941859695462756e-05, "loss": 0.4072, "step": 2203 }, { "epoch": 0.358913813459268, "grad_norm": 0.44971007108688354, "learning_rate": 4.994153637128345e-05, "loss": 0.3919, "step": 2204 }, { "epoch": 0.35907666001709887, "grad_norm": 0.4697023630142212, "learning_rate": 4.994121215162826e-05, "loss": 0.3896, "step": 2205 }, { "epoch": 0.35923950657492976, "grad_norm": 0.4448084235191345, "learning_rate": 4.9940887036508834e-05, "loss": 0.3983, "step": 2206 }, { "epoch": 0.35940235313276064, "grad_norm": 0.4529854357242584, "learning_rate": 4.994056102593683e-05, "loss": 0.4132, "step": 2207 }, { "epoch": 0.35956519969059153, "grad_norm": 0.41483160853385925, "learning_rate": 4.994023411992397e-05, "loss": 0.3772, "step": 2208 }, { "epoch": 0.3597280462484224, "grad_norm": 0.4955604076385498, "learning_rate": 4.993990631848198e-05, "loss": 0.3696, "step": 2209 }, { "epoch": 0.3598908928062533, "grad_norm": 0.46242326498031616, "learning_rate": 4.993957762162262e-05, "loss": 0.3539, "step": 2210 }, { "epoch": 0.3600537393640842, "grad_norm": 0.7175461053848267, "learning_rate": 4.993924802935772e-05, "loss": 0.4603, "step": 2211 }, { "epoch": 0.3602165859219151, "grad_norm": 0.46242567896842957, "learning_rate": 4.993891754169908e-05, "loss": 0.3344, "step": 2212 }, { "epoch": 0.36037943247974596, "grad_norm": 0.4832141697406769, "learning_rate": 4.9938586158658593e-05, "loss": 0.3998, "step": 2213 }, { "epoch": 0.36054227903757685, "grad_norm": 0.5337796211242676, "learning_rate": 4.9938253880248146e-05, "loss": 0.4255, "step": 2214 }, { "epoch": 0.36070512559540774, "grad_norm": 0.41763967275619507, "learning_rate": 4.993792070647966e-05, "loss": 0.3946, "step": 2215 }, { "epoch": 0.3608679721532386, "grad_norm": 0.42579391598701477, "learning_rate": 4.99375866373651e-05, "loss": 0.352, "step": 2216 }, { "epoch": 0.3610308187110695, "grad_norm": 0.43992528319358826, "learning_rate": 4.9937251672916475e-05, "loss": 0.4035, "step": 2217 }, { "epoch": 0.3611936652689004, "grad_norm": 0.46672430634498596, "learning_rate": 4.9936915813145796e-05, "loss": 0.3847, "step": 2218 }, { "epoch": 0.3613565118267313, "grad_norm": 0.47930967807769775, "learning_rate": 4.9936579058065126e-05, "loss": 0.4291, "step": 2219 }, { "epoch": 0.3615193583845622, "grad_norm": 0.40837588906288147, "learning_rate": 4.9936241407686556e-05, "loss": 0.4057, "step": 2220 }, { "epoch": 0.361682204942393, "grad_norm": 0.3604606091976166, "learning_rate": 4.993590286202221e-05, "loss": 0.3696, "step": 2221 }, { "epoch": 0.3618450515002239, "grad_norm": 0.4006814956665039, "learning_rate": 4.9935563421084244e-05, "loss": 0.3484, "step": 2222 }, { "epoch": 0.3620078980580548, "grad_norm": 0.42320871353149414, "learning_rate": 4.993522308488484e-05, "loss": 0.3997, "step": 2223 }, { "epoch": 0.36217074461588566, "grad_norm": 0.45390424132347107, "learning_rate": 4.993488185343622e-05, "loss": 0.4009, "step": 2224 }, { "epoch": 0.36233359117371655, "grad_norm": 0.41057971119880676, "learning_rate": 4.993453972675063e-05, "loss": 0.4008, "step": 2225 }, { "epoch": 0.36249643773154744, "grad_norm": 0.38136133551597595, "learning_rate": 4.9934196704840364e-05, "loss": 0.3751, "step": 2226 }, { "epoch": 0.3626592842893783, "grad_norm": 0.44969499111175537, "learning_rate": 4.9933852787717725e-05, "loss": 0.4101, "step": 2227 }, { "epoch": 0.3628221308472092, "grad_norm": 0.4812588691711426, "learning_rate": 4.9933507975395076e-05, "loss": 0.4218, "step": 2228 }, { "epoch": 0.3629849774050401, "grad_norm": 0.45897188782691956, "learning_rate": 4.993316226788478e-05, "loss": 0.3795, "step": 2229 }, { "epoch": 0.363147823962871, "grad_norm": 0.5051400661468506, "learning_rate": 4.9932815665199265e-05, "loss": 0.3939, "step": 2230 }, { "epoch": 0.3633106705207019, "grad_norm": 0.42064881324768066, "learning_rate": 4.993246816735096e-05, "loss": 0.403, "step": 2231 }, { "epoch": 0.36347351707853276, "grad_norm": 0.4536680281162262, "learning_rate": 4.993211977435235e-05, "loss": 0.4102, "step": 2232 }, { "epoch": 0.36363636363636365, "grad_norm": 0.4173213243484497, "learning_rate": 4.9931770486215946e-05, "loss": 0.3717, "step": 2233 }, { "epoch": 0.36379921019419453, "grad_norm": 0.40971994400024414, "learning_rate": 4.993142030295428e-05, "loss": 0.4449, "step": 2234 }, { "epoch": 0.3639620567520254, "grad_norm": 0.4842532277107239, "learning_rate": 4.9931069224579926e-05, "loss": 0.4067, "step": 2235 }, { "epoch": 0.3641249033098563, "grad_norm": 0.4983281195163727, "learning_rate": 4.993071725110549e-05, "loss": 0.4545, "step": 2236 }, { "epoch": 0.3642877498676872, "grad_norm": 0.3574967086315155, "learning_rate": 4.993036438254361e-05, "loss": 0.3793, "step": 2237 }, { "epoch": 0.3644505964255181, "grad_norm": 0.524706244468689, "learning_rate": 4.993001061890696e-05, "loss": 0.3851, "step": 2238 }, { "epoch": 0.3646134429833489, "grad_norm": 0.39731141924858093, "learning_rate": 4.992965596020824e-05, "loss": 0.3769, "step": 2239 }, { "epoch": 0.3647762895411798, "grad_norm": 0.45751646161079407, "learning_rate": 4.992930040646018e-05, "loss": 0.361, "step": 2240 }, { "epoch": 0.3649391360990107, "grad_norm": 0.3760072886943817, "learning_rate": 4.9928943957675535e-05, "loss": 0.3445, "step": 2241 }, { "epoch": 0.3651019826568416, "grad_norm": 0.3922520875930786, "learning_rate": 4.9928586613867115e-05, "loss": 0.4036, "step": 2242 }, { "epoch": 0.36526482921467246, "grad_norm": 0.4120993912220001, "learning_rate": 4.9928228375047755e-05, "loss": 0.3866, "step": 2243 }, { "epoch": 0.36542767577250335, "grad_norm": 0.39172282814979553, "learning_rate": 4.992786924123031e-05, "loss": 0.3877, "step": 2244 }, { "epoch": 0.36559052233033423, "grad_norm": 0.4963613748550415, "learning_rate": 4.992750921242766e-05, "loss": 0.4353, "step": 2245 }, { "epoch": 0.3657533688881651, "grad_norm": 0.45397165417671204, "learning_rate": 4.992714828865276e-05, "loss": 0.4045, "step": 2246 }, { "epoch": 0.365916215445996, "grad_norm": 0.4535813331604004, "learning_rate": 4.9926786469918544e-05, "loss": 0.4188, "step": 2247 }, { "epoch": 0.3660790620038269, "grad_norm": 0.41594403982162476, "learning_rate": 4.992642375623801e-05, "loss": 0.3659, "step": 2248 }, { "epoch": 0.3662419085616578, "grad_norm": 0.37076300382614136, "learning_rate": 4.992606014762419e-05, "loss": 0.3385, "step": 2249 }, { "epoch": 0.36640475511948867, "grad_norm": 0.4711206555366516, "learning_rate": 4.992569564409012e-05, "loss": 0.3813, "step": 2250 }, { "epoch": 0.36656760167731955, "grad_norm": 0.35906368494033813, "learning_rate": 4.99253302456489e-05, "loss": 0.3634, "step": 2251 }, { "epoch": 0.36673044823515044, "grad_norm": 0.517839789390564, "learning_rate": 4.9924963952313646e-05, "loss": 0.4085, "step": 2252 }, { "epoch": 0.36689329479298133, "grad_norm": 0.3466755747795105, "learning_rate": 4.992459676409751e-05, "loss": 0.3421, "step": 2253 }, { "epoch": 0.3670561413508122, "grad_norm": 0.4537397623062134, "learning_rate": 4.992422868101367e-05, "loss": 0.3668, "step": 2254 }, { "epoch": 0.3672189879086431, "grad_norm": 0.3967696726322174, "learning_rate": 4.992385970307536e-05, "loss": 0.3487, "step": 2255 }, { "epoch": 0.367381834466474, "grad_norm": 0.3878738284111023, "learning_rate": 4.99234898302958e-05, "loss": 0.3602, "step": 2256 }, { "epoch": 0.3675446810243049, "grad_norm": 0.41144394874572754, "learning_rate": 4.992311906268829e-05, "loss": 0.3634, "step": 2257 }, { "epoch": 0.3677075275821357, "grad_norm": 0.44289854168891907, "learning_rate": 4.992274740026612e-05, "loss": 0.3699, "step": 2258 }, { "epoch": 0.3678703741399666, "grad_norm": 0.3854486048221588, "learning_rate": 4.992237484304266e-05, "loss": 0.3773, "step": 2259 }, { "epoch": 0.3680332206977975, "grad_norm": 0.5372008681297302, "learning_rate": 4.992200139103127e-05, "loss": 0.4289, "step": 2260 }, { "epoch": 0.36819606725562837, "grad_norm": 0.34087619185447693, "learning_rate": 4.992162704424537e-05, "loss": 0.3386, "step": 2261 }, { "epoch": 0.36835891381345925, "grad_norm": 0.39439666271209717, "learning_rate": 4.9921251802698374e-05, "loss": 0.3781, "step": 2262 }, { "epoch": 0.36852176037129014, "grad_norm": 0.46089813113212585, "learning_rate": 4.992087566640379e-05, "loss": 0.3698, "step": 2263 }, { "epoch": 0.36868460692912103, "grad_norm": 0.3642447590827942, "learning_rate": 4.992049863537509e-05, "loss": 0.4169, "step": 2264 }, { "epoch": 0.3688474534869519, "grad_norm": 0.3604416847229004, "learning_rate": 4.992012070962584e-05, "loss": 0.3332, "step": 2265 }, { "epoch": 0.3690103000447828, "grad_norm": 0.4081519544124603, "learning_rate": 4.991974188916958e-05, "loss": 0.3983, "step": 2266 }, { "epoch": 0.3691731466026137, "grad_norm": 0.395015686750412, "learning_rate": 4.9919362174019935e-05, "loss": 0.3416, "step": 2267 }, { "epoch": 0.3693359931604446, "grad_norm": 0.4147191047668457, "learning_rate": 4.991898156419052e-05, "loss": 0.3801, "step": 2268 }, { "epoch": 0.36949883971827546, "grad_norm": 0.528299868106842, "learning_rate": 4.9918600059695006e-05, "loss": 0.4193, "step": 2269 }, { "epoch": 0.36966168627610635, "grad_norm": 0.5415095090866089, "learning_rate": 4.99182176605471e-05, "loss": 0.423, "step": 2270 }, { "epoch": 0.36982453283393724, "grad_norm": 0.40748000144958496, "learning_rate": 4.991783436676052e-05, "loss": 0.3565, "step": 2271 }, { "epoch": 0.3699873793917681, "grad_norm": 0.3970476984977722, "learning_rate": 4.9917450178349036e-05, "loss": 0.4027, "step": 2272 }, { "epoch": 0.370150225949599, "grad_norm": 0.40973320603370667, "learning_rate": 4.9917065095326424e-05, "loss": 0.4045, "step": 2273 }, { "epoch": 0.3703130725074299, "grad_norm": 0.4160992205142975, "learning_rate": 4.9916679117706536e-05, "loss": 0.4267, "step": 2274 }, { "epoch": 0.3704759190652608, "grad_norm": 0.405588299036026, "learning_rate": 4.9916292245503196e-05, "loss": 0.3996, "step": 2275 }, { "epoch": 0.3706387656230916, "grad_norm": 0.42068323493003845, "learning_rate": 4.991590447873032e-05, "loss": 0.3875, "step": 2276 }, { "epoch": 0.3708016121809225, "grad_norm": 0.4589841365814209, "learning_rate": 4.991551581740184e-05, "loss": 0.4587, "step": 2277 }, { "epoch": 0.3709644587387534, "grad_norm": 0.4504401981830597, "learning_rate": 4.991512626153167e-05, "loss": 0.3991, "step": 2278 }, { "epoch": 0.3711273052965843, "grad_norm": 0.3950580656528473, "learning_rate": 4.991473581113384e-05, "loss": 0.4097, "step": 2279 }, { "epoch": 0.37129015185441516, "grad_norm": 0.3899480998516083, "learning_rate": 4.9914344466222334e-05, "loss": 0.3665, "step": 2280 }, { "epoch": 0.37145299841224605, "grad_norm": 0.45389050245285034, "learning_rate": 4.9913952226811224e-05, "loss": 0.376, "step": 2281 }, { "epoch": 0.37161584497007694, "grad_norm": 0.4346579313278198, "learning_rate": 4.991355909291457e-05, "loss": 0.3558, "step": 2282 }, { "epoch": 0.3717786915279078, "grad_norm": 0.3745550811290741, "learning_rate": 4.991316506454652e-05, "loss": 0.3537, "step": 2283 }, { "epoch": 0.3719415380857387, "grad_norm": 0.3685104548931122, "learning_rate": 4.9912770141721194e-05, "loss": 0.3694, "step": 2284 }, { "epoch": 0.3721043846435696, "grad_norm": 0.4315987229347229, "learning_rate": 4.991237432445278e-05, "loss": 0.3813, "step": 2285 }, { "epoch": 0.3722672312014005, "grad_norm": 0.4272167682647705, "learning_rate": 4.991197761275549e-05, "loss": 0.4165, "step": 2286 }, { "epoch": 0.37243007775923137, "grad_norm": 0.3622126281261444, "learning_rate": 4.991158000664357e-05, "loss": 0.3515, "step": 2287 }, { "epoch": 0.37259292431706226, "grad_norm": 0.43331053853034973, "learning_rate": 4.9911181506131275e-05, "loss": 0.4282, "step": 2288 }, { "epoch": 0.37275577087489314, "grad_norm": 0.38315802812576294, "learning_rate": 4.9910782111232937e-05, "loss": 0.4103, "step": 2289 }, { "epoch": 0.37291861743272403, "grad_norm": 0.44078248739242554, "learning_rate": 4.991038182196289e-05, "loss": 0.3867, "step": 2290 }, { "epoch": 0.3730814639905549, "grad_norm": 0.4033052623271942, "learning_rate": 4.9909980638335495e-05, "loss": 0.3886, "step": 2291 }, { "epoch": 0.3732443105483858, "grad_norm": 0.43710383772850037, "learning_rate": 4.990957856036517e-05, "loss": 0.3884, "step": 2292 }, { "epoch": 0.3734071571062167, "grad_norm": 0.4534375071525574, "learning_rate": 4.9909175588066344e-05, "loss": 0.3879, "step": 2293 }, { "epoch": 0.3735700036640476, "grad_norm": 0.4133680462837219, "learning_rate": 4.990877172145348e-05, "loss": 0.4045, "step": 2294 }, { "epoch": 0.3737328502218784, "grad_norm": 0.3838525712490082, "learning_rate": 4.9908366960541087e-05, "loss": 0.3799, "step": 2295 }, { "epoch": 0.3738956967797093, "grad_norm": 0.5941034555435181, "learning_rate": 4.9907961305343686e-05, "loss": 0.4113, "step": 2296 }, { "epoch": 0.3740585433375402, "grad_norm": 0.4014985263347626, "learning_rate": 4.9907554755875855e-05, "loss": 0.4294, "step": 2297 }, { "epoch": 0.37422138989537107, "grad_norm": 0.45725682377815247, "learning_rate": 4.990714731215218e-05, "loss": 0.3657, "step": 2298 }, { "epoch": 0.37438423645320196, "grad_norm": 0.6091436147689819, "learning_rate": 4.990673897418728e-05, "loss": 0.3848, "step": 2299 }, { "epoch": 0.37454708301103284, "grad_norm": 0.42037850618362427, "learning_rate": 4.990632974199585e-05, "loss": 0.3886, "step": 2300 }, { "epoch": 0.37470992956886373, "grad_norm": 0.5255310535430908, "learning_rate": 4.990591961559255e-05, "loss": 0.4379, "step": 2301 }, { "epoch": 0.3748727761266946, "grad_norm": 0.4245888292789459, "learning_rate": 4.9905508594992114e-05, "loss": 0.405, "step": 2302 }, { "epoch": 0.3750356226845255, "grad_norm": 0.3845653235912323, "learning_rate": 4.990509668020931e-05, "loss": 0.3571, "step": 2303 }, { "epoch": 0.3751984692423564, "grad_norm": 0.48926565051078796, "learning_rate": 4.9904683871258906e-05, "loss": 0.3704, "step": 2304 }, { "epoch": 0.3753613158001873, "grad_norm": 0.43937960267066956, "learning_rate": 4.990427016815574e-05, "loss": 0.3598, "step": 2305 }, { "epoch": 0.37552416235801817, "grad_norm": 0.40409865975379944, "learning_rate": 4.990385557091466e-05, "loss": 0.3984, "step": 2306 }, { "epoch": 0.37568700891584905, "grad_norm": 0.47301772236824036, "learning_rate": 4.990344007955055e-05, "loss": 0.4158, "step": 2307 }, { "epoch": 0.37584985547367994, "grad_norm": 0.4257739186286926, "learning_rate": 4.9903023694078336e-05, "loss": 0.4179, "step": 2308 }, { "epoch": 0.3760127020315108, "grad_norm": 1.018164873123169, "learning_rate": 4.990260641451294e-05, "loss": 0.4093, "step": 2309 }, { "epoch": 0.3761755485893417, "grad_norm": 0.4268081486225128, "learning_rate": 4.990218824086938e-05, "loss": 0.3647, "step": 2310 }, { "epoch": 0.3763383951471726, "grad_norm": 0.43464723229408264, "learning_rate": 4.990176917316265e-05, "loss": 0.3463, "step": 2311 }, { "epoch": 0.3765012417050035, "grad_norm": 0.4567243456840515, "learning_rate": 4.99013492114078e-05, "loss": 0.3677, "step": 2312 }, { "epoch": 0.3766640882628343, "grad_norm": 0.3864005506038666, "learning_rate": 4.9900928355619903e-05, "loss": 0.3929, "step": 2313 }, { "epoch": 0.3768269348206652, "grad_norm": 0.3985978960990906, "learning_rate": 4.990050660581408e-05, "loss": 0.3634, "step": 2314 }, { "epoch": 0.3769897813784961, "grad_norm": 0.45568719506263733, "learning_rate": 4.990008396200546e-05, "loss": 0.4246, "step": 2315 }, { "epoch": 0.377152627936327, "grad_norm": 0.40435683727264404, "learning_rate": 4.989966042420923e-05, "loss": 0.4163, "step": 2316 }, { "epoch": 0.37731547449415787, "grad_norm": 0.44698354601860046, "learning_rate": 4.989923599244058e-05, "loss": 0.3508, "step": 2317 }, { "epoch": 0.37747832105198875, "grad_norm": 0.4710400402545929, "learning_rate": 4.9898810666714755e-05, "loss": 0.4094, "step": 2318 }, { "epoch": 0.37764116760981964, "grad_norm": 0.453056275844574, "learning_rate": 4.989838444704704e-05, "loss": 0.4077, "step": 2319 }, { "epoch": 0.3778040141676505, "grad_norm": 0.4928837716579437, "learning_rate": 4.989795733345272e-05, "loss": 0.3626, "step": 2320 }, { "epoch": 0.3779668607254814, "grad_norm": 0.5050565004348755, "learning_rate": 4.9897529325947135e-05, "loss": 0.4238, "step": 2321 }, { "epoch": 0.3781297072833123, "grad_norm": 0.44521957635879517, "learning_rate": 4.989710042454565e-05, "loss": 0.3785, "step": 2322 }, { "epoch": 0.3782925538411432, "grad_norm": 0.579787015914917, "learning_rate": 4.9896670629263674e-05, "loss": 0.3853, "step": 2323 }, { "epoch": 0.3784554003989741, "grad_norm": 0.48647966980934143, "learning_rate": 4.989623994011663e-05, "loss": 0.4558, "step": 2324 }, { "epoch": 0.37861824695680496, "grad_norm": 0.39802515506744385, "learning_rate": 4.989580835711997e-05, "loss": 0.3951, "step": 2325 }, { "epoch": 0.37878109351463585, "grad_norm": 0.5172072649002075, "learning_rate": 4.989537588028921e-05, "loss": 0.3817, "step": 2326 }, { "epoch": 0.37894394007246673, "grad_norm": 0.4668614864349365, "learning_rate": 4.989494250963987e-05, "loss": 0.4034, "step": 2327 }, { "epoch": 0.3791067866302976, "grad_norm": 0.39874759316444397, "learning_rate": 4.989450824518749e-05, "loss": 0.3658, "step": 2328 }, { "epoch": 0.3792696331881285, "grad_norm": 0.37500321865081787, "learning_rate": 4.989407308694769e-05, "loss": 0.3949, "step": 2329 }, { "epoch": 0.3794324797459594, "grad_norm": 0.42175155878067017, "learning_rate": 4.989363703493608e-05, "loss": 0.3783, "step": 2330 }, { "epoch": 0.3795953263037902, "grad_norm": 0.4220595955848694, "learning_rate": 4.9893200089168316e-05, "loss": 0.3796, "step": 2331 }, { "epoch": 0.3797581728616211, "grad_norm": 0.3608360290527344, "learning_rate": 4.989276224966009e-05, "loss": 0.3681, "step": 2332 }, { "epoch": 0.379921019419452, "grad_norm": 0.4421400725841522, "learning_rate": 4.989232351642712e-05, "loss": 0.3761, "step": 2333 }, { "epoch": 0.3800838659772829, "grad_norm": 0.40995219349861145, "learning_rate": 4.989188388948515e-05, "loss": 0.3687, "step": 2334 }, { "epoch": 0.3802467125351138, "grad_norm": 0.3721703886985779, "learning_rate": 4.9891443368849976e-05, "loss": 0.3742, "step": 2335 }, { "epoch": 0.38040955909294466, "grad_norm": 0.39160653948783875, "learning_rate": 4.98910019545374e-05, "loss": 0.3636, "step": 2336 }, { "epoch": 0.38057240565077555, "grad_norm": 0.44893941283226013, "learning_rate": 4.989055964656328e-05, "loss": 0.4091, "step": 2337 }, { "epoch": 0.38073525220860643, "grad_norm": 0.4548361301422119, "learning_rate": 4.989011644494351e-05, "loss": 0.4104, "step": 2338 }, { "epoch": 0.3808980987664373, "grad_norm": 0.3842722475528717, "learning_rate": 4.9889672349693976e-05, "loss": 0.3621, "step": 2339 }, { "epoch": 0.3810609453242682, "grad_norm": 0.3500516712665558, "learning_rate": 4.9889227360830625e-05, "loss": 0.3505, "step": 2340 }, { "epoch": 0.3812237918820991, "grad_norm": 0.4542026221752167, "learning_rate": 4.988878147836945e-05, "loss": 0.3875, "step": 2341 }, { "epoch": 0.38138663843993, "grad_norm": 0.5358873009681702, "learning_rate": 4.9888334702326456e-05, "loss": 0.4204, "step": 2342 }, { "epoch": 0.38154948499776087, "grad_norm": 0.43502113223075867, "learning_rate": 4.9887887032717676e-05, "loss": 0.3748, "step": 2343 }, { "epoch": 0.38171233155559176, "grad_norm": 0.38843536376953125, "learning_rate": 4.9887438469559176e-05, "loss": 0.3954, "step": 2344 }, { "epoch": 0.38187517811342264, "grad_norm": 0.5201306939125061, "learning_rate": 4.988698901286708e-05, "loss": 0.4122, "step": 2345 }, { "epoch": 0.38203802467125353, "grad_norm": 0.4692167341709137, "learning_rate": 4.988653866265751e-05, "loss": 0.4254, "step": 2346 }, { "epoch": 0.3822008712290844, "grad_norm": 0.5704302787780762, "learning_rate": 4.9886087418946644e-05, "loss": 0.4019, "step": 2347 }, { "epoch": 0.3823637177869153, "grad_norm": 0.5112109780311584, "learning_rate": 4.9885635281750675e-05, "loss": 0.3765, "step": 2348 }, { "epoch": 0.3825265643447462, "grad_norm": 0.42406290769577026, "learning_rate": 4.988518225108584e-05, "loss": 0.3698, "step": 2349 }, { "epoch": 0.382689410902577, "grad_norm": 0.3812192976474762, "learning_rate": 4.988472832696841e-05, "loss": 0.3384, "step": 2350 }, { "epoch": 0.3828522574604079, "grad_norm": 0.5296323895454407, "learning_rate": 4.9884273509414666e-05, "loss": 0.4205, "step": 2351 }, { "epoch": 0.3830151040182388, "grad_norm": 0.4679490625858307, "learning_rate": 4.9883817798440956e-05, "loss": 0.3731, "step": 2352 }, { "epoch": 0.3831779505760697, "grad_norm": 0.3970142900943756, "learning_rate": 4.9883361194063635e-05, "loss": 0.3686, "step": 2353 }, { "epoch": 0.38334079713390057, "grad_norm": 0.35603800415992737, "learning_rate": 4.988290369629909e-05, "loss": 0.3963, "step": 2354 }, { "epoch": 0.38350364369173146, "grad_norm": 0.42404118180274963, "learning_rate": 4.988244530516375e-05, "loss": 0.3852, "step": 2355 }, { "epoch": 0.38366649024956234, "grad_norm": 0.3970227837562561, "learning_rate": 4.988198602067408e-05, "loss": 0.3664, "step": 2356 }, { "epoch": 0.38382933680739323, "grad_norm": 0.40639182925224304, "learning_rate": 4.988152584284655e-05, "loss": 0.368, "step": 2357 }, { "epoch": 0.3839921833652241, "grad_norm": 0.38915616273880005, "learning_rate": 4.988106477169771e-05, "loss": 0.4072, "step": 2358 }, { "epoch": 0.384155029923055, "grad_norm": 0.403903990983963, "learning_rate": 4.988060280724409e-05, "loss": 0.401, "step": 2359 }, { "epoch": 0.3843178764808859, "grad_norm": 0.496229887008667, "learning_rate": 4.9880139949502294e-05, "loss": 0.4099, "step": 2360 }, { "epoch": 0.3844807230387168, "grad_norm": 0.45012781023979187, "learning_rate": 4.987967619848892e-05, "loss": 0.4625, "step": 2361 }, { "epoch": 0.38464356959654766, "grad_norm": 0.4159475266933441, "learning_rate": 4.987921155422064e-05, "loss": 0.4283, "step": 2362 }, { "epoch": 0.38480641615437855, "grad_norm": 0.4150163233280182, "learning_rate": 4.9878746016714115e-05, "loss": 0.3844, "step": 2363 }, { "epoch": 0.38496926271220944, "grad_norm": 0.44059836864471436, "learning_rate": 4.9878279585986076e-05, "loss": 0.3992, "step": 2364 }, { "epoch": 0.3851321092700403, "grad_norm": 0.40751969814300537, "learning_rate": 4.9877812262053256e-05, "loss": 0.3766, "step": 2365 }, { "epoch": 0.3852949558278712, "grad_norm": 0.4028109312057495, "learning_rate": 4.987734404493245e-05, "loss": 0.3852, "step": 2366 }, { "epoch": 0.3854578023857021, "grad_norm": 0.4652746617794037, "learning_rate": 4.987687493464045e-05, "loss": 0.3872, "step": 2367 }, { "epoch": 0.38562064894353293, "grad_norm": 0.4022543430328369, "learning_rate": 4.987640493119411e-05, "loss": 0.3696, "step": 2368 }, { "epoch": 0.3857834955013638, "grad_norm": 0.4159221649169922, "learning_rate": 4.98759340346103e-05, "loss": 0.4124, "step": 2369 }, { "epoch": 0.3859463420591947, "grad_norm": 0.378721684217453, "learning_rate": 4.987546224490593e-05, "loss": 0.3484, "step": 2370 }, { "epoch": 0.3861091886170256, "grad_norm": 0.38447606563568115, "learning_rate": 4.987498956209794e-05, "loss": 0.3598, "step": 2371 }, { "epoch": 0.3862720351748565, "grad_norm": 0.3568521738052368, "learning_rate": 4.9874515986203286e-05, "loss": 0.3672, "step": 2372 }, { "epoch": 0.38643488173268736, "grad_norm": 0.4041074514389038, "learning_rate": 4.987404151723899e-05, "loss": 0.4114, "step": 2373 }, { "epoch": 0.38659772829051825, "grad_norm": 0.40556713938713074, "learning_rate": 4.987356615522207e-05, "loss": 0.3461, "step": 2374 }, { "epoch": 0.38676057484834914, "grad_norm": 0.41597768664360046, "learning_rate": 4.987308990016961e-05, "loss": 0.3819, "step": 2375 }, { "epoch": 0.38692342140618, "grad_norm": 0.3498881757259369, "learning_rate": 4.98726127520987e-05, "loss": 0.3453, "step": 2376 }, { "epoch": 0.3870862679640109, "grad_norm": 0.3950353264808655, "learning_rate": 4.987213471102647e-05, "loss": 0.3874, "step": 2377 }, { "epoch": 0.3872491145218418, "grad_norm": 0.3957688510417938, "learning_rate": 4.987165577697009e-05, "loss": 0.3839, "step": 2378 }, { "epoch": 0.3874119610796727, "grad_norm": 0.34432855248451233, "learning_rate": 4.987117594994675e-05, "loss": 0.3708, "step": 2379 }, { "epoch": 0.38757480763750357, "grad_norm": 0.38075700402259827, "learning_rate": 4.987069522997368e-05, "loss": 0.3632, "step": 2380 }, { "epoch": 0.38773765419533446, "grad_norm": 0.42160868644714355, "learning_rate": 4.987021361706812e-05, "loss": 0.4237, "step": 2381 }, { "epoch": 0.38790050075316534, "grad_norm": 0.3753274381160736, "learning_rate": 4.986973111124739e-05, "loss": 0.3703, "step": 2382 }, { "epoch": 0.38806334731099623, "grad_norm": 0.3356972932815552, "learning_rate": 4.98692477125288e-05, "loss": 0.3741, "step": 2383 }, { "epoch": 0.3882261938688271, "grad_norm": 0.4024094343185425, "learning_rate": 4.986876342092971e-05, "loss": 0.3668, "step": 2384 }, { "epoch": 0.388389040426658, "grad_norm": 0.37613794207572937, "learning_rate": 4.98682782364675e-05, "loss": 0.3499, "step": 2385 }, { "epoch": 0.3885518869844889, "grad_norm": 0.41621634364128113, "learning_rate": 4.98677921591596e-05, "loss": 0.3681, "step": 2386 }, { "epoch": 0.3887147335423197, "grad_norm": 0.371761292219162, "learning_rate": 4.986730518902345e-05, "loss": 0.381, "step": 2387 }, { "epoch": 0.3888775801001506, "grad_norm": 0.45860302448272705, "learning_rate": 4.9866817326076536e-05, "loss": 0.4016, "step": 2388 }, { "epoch": 0.3890404266579815, "grad_norm": 0.4053901433944702, "learning_rate": 4.9866328570336385e-05, "loss": 0.4042, "step": 2389 }, { "epoch": 0.3892032732158124, "grad_norm": 0.40543225407600403, "learning_rate": 4.986583892182053e-05, "loss": 0.3947, "step": 2390 }, { "epoch": 0.38936611977364327, "grad_norm": 0.3884263038635254, "learning_rate": 4.986534838054656e-05, "loss": 0.3893, "step": 2391 }, { "epoch": 0.38952896633147416, "grad_norm": 0.40793344378471375, "learning_rate": 4.986485694653209e-05, "loss": 0.3694, "step": 2392 }, { "epoch": 0.38969181288930504, "grad_norm": 0.4403514564037323, "learning_rate": 4.9864364619794754e-05, "loss": 0.4075, "step": 2393 }, { "epoch": 0.38985465944713593, "grad_norm": 0.37417247891426086, "learning_rate": 4.986387140035223e-05, "loss": 0.3437, "step": 2394 }, { "epoch": 0.3900175060049668, "grad_norm": 0.405781626701355, "learning_rate": 4.986337728822223e-05, "loss": 0.3475, "step": 2395 }, { "epoch": 0.3901803525627977, "grad_norm": 0.36836546659469604, "learning_rate": 4.9862882283422493e-05, "loss": 0.385, "step": 2396 }, { "epoch": 0.3903431991206286, "grad_norm": 0.41831424832344055, "learning_rate": 4.98623863859708e-05, "loss": 0.3973, "step": 2397 }, { "epoch": 0.3905060456784595, "grad_norm": 0.3641310930252075, "learning_rate": 4.986188959588493e-05, "loss": 0.4167, "step": 2398 }, { "epoch": 0.39066889223629037, "grad_norm": 0.4577591121196747, "learning_rate": 4.986139191318274e-05, "loss": 0.4053, "step": 2399 }, { "epoch": 0.39083173879412125, "grad_norm": 0.406686395406723, "learning_rate": 4.9860893337882096e-05, "loss": 0.3603, "step": 2400 }, { "epoch": 0.39099458535195214, "grad_norm": 0.35574668645858765, "learning_rate": 4.98603938700009e-05, "loss": 0.3998, "step": 2401 }, { "epoch": 0.391157431909783, "grad_norm": 0.39000391960144043, "learning_rate": 4.985989350955708e-05, "loss": 0.4279, "step": 2402 }, { "epoch": 0.3913202784676139, "grad_norm": 0.3863001763820648, "learning_rate": 4.9859392256568594e-05, "loss": 0.4048, "step": 2403 }, { "epoch": 0.3914831250254448, "grad_norm": 0.35478100180625916, "learning_rate": 4.9858890111053443e-05, "loss": 0.3562, "step": 2404 }, { "epoch": 0.39164597158327563, "grad_norm": 0.38372671604156494, "learning_rate": 4.9858387073029664e-05, "loss": 0.4297, "step": 2405 }, { "epoch": 0.3918088181411065, "grad_norm": 0.4215872287750244, "learning_rate": 4.9857883142515315e-05, "loss": 0.3685, "step": 2406 }, { "epoch": 0.3919716646989374, "grad_norm": 0.402022123336792, "learning_rate": 4.985737831952848e-05, "loss": 0.4275, "step": 2407 }, { "epoch": 0.3921345112567683, "grad_norm": 0.3779134750366211, "learning_rate": 4.985687260408729e-05, "loss": 0.3999, "step": 2408 }, { "epoch": 0.3922973578145992, "grad_norm": 0.34867429733276367, "learning_rate": 4.98563659962099e-05, "loss": 0.3172, "step": 2409 }, { "epoch": 0.39246020437243007, "grad_norm": 0.5822467803955078, "learning_rate": 4.98558584959145e-05, "loss": 0.4139, "step": 2410 }, { "epoch": 0.39262305093026095, "grad_norm": 0.4226137697696686, "learning_rate": 4.985535010321931e-05, "loss": 0.3612, "step": 2411 }, { "epoch": 0.39278589748809184, "grad_norm": 0.3691907227039337, "learning_rate": 4.9854840818142576e-05, "loss": 0.392, "step": 2412 }, { "epoch": 0.3929487440459227, "grad_norm": 0.4032479226589203, "learning_rate": 4.98543306407026e-05, "loss": 0.3992, "step": 2413 }, { "epoch": 0.3931115906037536, "grad_norm": 0.3787313401699066, "learning_rate": 4.985381957091768e-05, "loss": 0.3923, "step": 2414 }, { "epoch": 0.3932744371615845, "grad_norm": 0.3833445608615875, "learning_rate": 4.9853307608806176e-05, "loss": 0.4024, "step": 2415 }, { "epoch": 0.3934372837194154, "grad_norm": 0.3765753209590912, "learning_rate": 4.985279475438647e-05, "loss": 0.3516, "step": 2416 }, { "epoch": 0.3936001302772463, "grad_norm": 0.42621204257011414, "learning_rate": 4.985228100767697e-05, "loss": 0.4125, "step": 2417 }, { "epoch": 0.39376297683507716, "grad_norm": 0.3749050498008728, "learning_rate": 4.985176636869612e-05, "loss": 0.3698, "step": 2418 }, { "epoch": 0.39392582339290805, "grad_norm": 0.3605683147907257, "learning_rate": 4.98512508374624e-05, "loss": 0.3629, "step": 2419 }, { "epoch": 0.39408866995073893, "grad_norm": 0.3585672080516815, "learning_rate": 4.985073441399432e-05, "loss": 0.3541, "step": 2420 }, { "epoch": 0.3942515165085698, "grad_norm": 0.5530930161476135, "learning_rate": 4.985021709831042e-05, "loss": 0.3622, "step": 2421 }, { "epoch": 0.3944143630664007, "grad_norm": 0.39331912994384766, "learning_rate": 4.984969889042928e-05, "loss": 0.3866, "step": 2422 }, { "epoch": 0.3945772096242316, "grad_norm": 0.4128577411174774, "learning_rate": 4.9849179790369485e-05, "loss": 0.428, "step": 2423 }, { "epoch": 0.3947400561820624, "grad_norm": 0.3104557693004608, "learning_rate": 4.984865979814969e-05, "loss": 0.3326, "step": 2424 }, { "epoch": 0.3949029027398933, "grad_norm": 0.3664581775665283, "learning_rate": 4.9848138913788554e-05, "loss": 0.3612, "step": 2425 }, { "epoch": 0.3950657492977242, "grad_norm": 0.3844570219516754, "learning_rate": 4.984761713730479e-05, "loss": 0.4025, "step": 2426 }, { "epoch": 0.3952285958555551, "grad_norm": 0.3480593264102936, "learning_rate": 4.984709446871713e-05, "loss": 0.383, "step": 2427 }, { "epoch": 0.395391442413386, "grad_norm": 0.39846381545066833, "learning_rate": 4.9846570908044333e-05, "loss": 0.436, "step": 2428 }, { "epoch": 0.39555428897121686, "grad_norm": 0.43143153190612793, "learning_rate": 4.984604645530519e-05, "loss": 0.4055, "step": 2429 }, { "epoch": 0.39571713552904775, "grad_norm": 0.39157047867774963, "learning_rate": 4.984552111051855e-05, "loss": 0.3859, "step": 2430 }, { "epoch": 0.39587998208687863, "grad_norm": 0.3653808832168579, "learning_rate": 4.984499487370325e-05, "loss": 0.3586, "step": 2431 }, { "epoch": 0.3960428286447095, "grad_norm": 0.4179985821247101, "learning_rate": 4.98444677448782e-05, "loss": 0.3851, "step": 2432 }, { "epoch": 0.3962056752025404, "grad_norm": 0.4166795611381531, "learning_rate": 4.984393972406233e-05, "loss": 0.4203, "step": 2433 }, { "epoch": 0.3963685217603713, "grad_norm": 0.3992055654525757, "learning_rate": 4.984341081127458e-05, "loss": 0.3369, "step": 2434 }, { "epoch": 0.3965313683182022, "grad_norm": 0.39179912209510803, "learning_rate": 4.9842881006533956e-05, "loss": 0.3921, "step": 2435 }, { "epoch": 0.39669421487603307, "grad_norm": 0.376461386680603, "learning_rate": 4.984235030985947e-05, "loss": 0.4126, "step": 2436 }, { "epoch": 0.39685706143386396, "grad_norm": 0.3627777099609375, "learning_rate": 4.984181872127018e-05, "loss": 0.385, "step": 2437 }, { "epoch": 0.39701990799169484, "grad_norm": 0.3821031153202057, "learning_rate": 4.984128624078517e-05, "loss": 0.398, "step": 2438 }, { "epoch": 0.39718275454952573, "grad_norm": 0.324520081281662, "learning_rate": 4.984075286842355e-05, "loss": 0.3496, "step": 2439 }, { "epoch": 0.3973456011073566, "grad_norm": 0.4098062217235565, "learning_rate": 4.9840218604204485e-05, "loss": 0.4249, "step": 2440 }, { "epoch": 0.3975084476651875, "grad_norm": 0.44659051299095154, "learning_rate": 4.9839683448147145e-05, "loss": 0.372, "step": 2441 }, { "epoch": 0.39767129422301833, "grad_norm": 0.3436124920845032, "learning_rate": 4.983914740027075e-05, "loss": 0.3507, "step": 2442 }, { "epoch": 0.3978341407808492, "grad_norm": 0.3932435214519501, "learning_rate": 4.983861046059454e-05, "loss": 0.401, "step": 2443 }, { "epoch": 0.3979969873386801, "grad_norm": 0.4145962595939636, "learning_rate": 4.98380726291378e-05, "loss": 0.3776, "step": 2444 }, { "epoch": 0.398159833896511, "grad_norm": 0.35684698820114136, "learning_rate": 4.983753390591984e-05, "loss": 0.3421, "step": 2445 }, { "epoch": 0.3983226804543419, "grad_norm": 0.35011017322540283, "learning_rate": 4.9836994290959986e-05, "loss": 0.369, "step": 2446 }, { "epoch": 0.39848552701217277, "grad_norm": 0.40860965847969055, "learning_rate": 4.983645378427763e-05, "loss": 0.3532, "step": 2447 }, { "epoch": 0.39864837357000366, "grad_norm": 0.3651144802570343, "learning_rate": 4.983591238589217e-05, "loss": 0.373, "step": 2448 }, { "epoch": 0.39881122012783454, "grad_norm": 0.3170398771762848, "learning_rate": 4.983537009582305e-05, "loss": 0.3394, "step": 2449 }, { "epoch": 0.39897406668566543, "grad_norm": 0.44794800877571106, "learning_rate": 4.983482691408973e-05, "loss": 0.3725, "step": 2450 }, { "epoch": 0.3991369132434963, "grad_norm": 0.4195692241191864, "learning_rate": 4.983428284071171e-05, "loss": 0.4247, "step": 2451 }, { "epoch": 0.3992997598013272, "grad_norm": 0.467215895652771, "learning_rate": 4.983373787570854e-05, "loss": 0.4267, "step": 2452 }, { "epoch": 0.3994626063591581, "grad_norm": 0.38549157977104187, "learning_rate": 4.9833192019099774e-05, "loss": 0.3809, "step": 2453 }, { "epoch": 0.399625452916989, "grad_norm": 0.3300473093986511, "learning_rate": 4.983264527090501e-05, "loss": 0.3159, "step": 2454 }, { "epoch": 0.39978829947481986, "grad_norm": 0.3967401087284088, "learning_rate": 4.983209763114388e-05, "loss": 0.4319, "step": 2455 }, { "epoch": 0.39995114603265075, "grad_norm": 0.38326510787010193, "learning_rate": 4.983154909983605e-05, "loss": 0.3781, "step": 2456 }, { "epoch": 0.40011399259048164, "grad_norm": 0.37438368797302246, "learning_rate": 4.983099967700121e-05, "loss": 0.3634, "step": 2457 }, { "epoch": 0.4002768391483125, "grad_norm": 0.3980781137943268, "learning_rate": 4.9830449362659085e-05, "loss": 0.3554, "step": 2458 }, { "epoch": 0.4004396857061434, "grad_norm": 0.39266830682754517, "learning_rate": 4.9829898156829434e-05, "loss": 0.3613, "step": 2459 }, { "epoch": 0.40060253226397424, "grad_norm": 0.36315929889678955, "learning_rate": 4.9829346059532044e-05, "loss": 0.3646, "step": 2460 }, { "epoch": 0.40076537882180513, "grad_norm": 0.4714886248111725, "learning_rate": 4.982879307078674e-05, "loss": 0.3964, "step": 2461 }, { "epoch": 0.400928225379636, "grad_norm": 0.6435990333557129, "learning_rate": 4.982823919061338e-05, "loss": 0.4709, "step": 2462 }, { "epoch": 0.4010910719374669, "grad_norm": 0.4058871269226074, "learning_rate": 4.982768441903184e-05, "loss": 0.3791, "step": 2463 }, { "epoch": 0.4012539184952978, "grad_norm": 0.4232746362686157, "learning_rate": 4.982712875606205e-05, "loss": 0.3513, "step": 2464 }, { "epoch": 0.4014167650531287, "grad_norm": 0.3547781705856323, "learning_rate": 4.982657220172395e-05, "loss": 0.3871, "step": 2465 }, { "epoch": 0.40157961161095956, "grad_norm": 0.35353589057922363, "learning_rate": 4.982601475603752e-05, "loss": 0.3843, "step": 2466 }, { "epoch": 0.40174245816879045, "grad_norm": 0.4800085425376892, "learning_rate": 4.9825456419022787e-05, "loss": 0.3878, "step": 2467 }, { "epoch": 0.40190530472662134, "grad_norm": 0.3723777234554291, "learning_rate": 4.9824897190699795e-05, "loss": 0.3656, "step": 2468 }, { "epoch": 0.4020681512844522, "grad_norm": 0.43674349784851074, "learning_rate": 4.98243370710886e-05, "loss": 0.4138, "step": 2469 }, { "epoch": 0.4022309978422831, "grad_norm": 0.38321369886398315, "learning_rate": 4.982377606020934e-05, "loss": 0.397, "step": 2470 }, { "epoch": 0.402393844400114, "grad_norm": 0.3640492260456085, "learning_rate": 4.9823214158082144e-05, "loss": 0.3471, "step": 2471 }, { "epoch": 0.4025566909579449, "grad_norm": 0.40306198596954346, "learning_rate": 4.982265136472719e-05, "loss": 0.4633, "step": 2472 }, { "epoch": 0.40271953751577577, "grad_norm": 0.3730575442314148, "learning_rate": 4.9822087680164675e-05, "loss": 0.3863, "step": 2473 }, { "epoch": 0.40288238407360666, "grad_norm": 0.34134384989738464, "learning_rate": 4.982152310441485e-05, "loss": 0.4139, "step": 2474 }, { "epoch": 0.40304523063143755, "grad_norm": 0.5436410903930664, "learning_rate": 4.9820957637497974e-05, "loss": 0.3855, "step": 2475 }, { "epoch": 0.40320807718926843, "grad_norm": 0.3858787715435028, "learning_rate": 4.982039127943435e-05, "loss": 0.3675, "step": 2476 }, { "epoch": 0.4033709237470993, "grad_norm": 0.42142000794410706, "learning_rate": 4.981982403024432e-05, "loss": 0.367, "step": 2477 }, { "epoch": 0.4035337703049302, "grad_norm": 0.4100750684738159, "learning_rate": 4.981925588994825e-05, "loss": 0.4024, "step": 2478 }, { "epoch": 0.40369661686276104, "grad_norm": 0.43181028962135315, "learning_rate": 4.981868685856652e-05, "loss": 0.361, "step": 2479 }, { "epoch": 0.4038594634205919, "grad_norm": 0.38489291071891785, "learning_rate": 4.981811693611958e-05, "loss": 0.3903, "step": 2480 }, { "epoch": 0.4040223099784228, "grad_norm": 0.4377588927745819, "learning_rate": 4.981754612262789e-05, "loss": 0.3929, "step": 2481 }, { "epoch": 0.4041851565362537, "grad_norm": 0.37393537163734436, "learning_rate": 4.9816974418111925e-05, "loss": 0.3652, "step": 2482 }, { "epoch": 0.4043480030940846, "grad_norm": 0.45890164375305176, "learning_rate": 4.981640182259224e-05, "loss": 0.373, "step": 2483 }, { "epoch": 0.40451084965191547, "grad_norm": 0.5033797025680542, "learning_rate": 4.981582833608937e-05, "loss": 0.3869, "step": 2484 }, { "epoch": 0.40467369620974636, "grad_norm": 0.39532527327537537, "learning_rate": 4.9815253958623916e-05, "loss": 0.3395, "step": 2485 }, { "epoch": 0.40483654276757725, "grad_norm": 0.5426596403121948, "learning_rate": 4.9814678690216495e-05, "loss": 0.3653, "step": 2486 }, { "epoch": 0.40499938932540813, "grad_norm": 0.43155843019485474, "learning_rate": 4.9814102530887766e-05, "loss": 0.4068, "step": 2487 }, { "epoch": 0.405162235883239, "grad_norm": 0.4831719696521759, "learning_rate": 4.9813525480658395e-05, "loss": 0.4487, "step": 2488 }, { "epoch": 0.4053250824410699, "grad_norm": 0.4815171957015991, "learning_rate": 4.981294753954913e-05, "loss": 0.3675, "step": 2489 }, { "epoch": 0.4054879289989008, "grad_norm": 0.35168230533599854, "learning_rate": 4.9812368707580706e-05, "loss": 0.3585, "step": 2490 }, { "epoch": 0.4056507755567317, "grad_norm": 0.5732851028442383, "learning_rate": 4.9811788984773905e-05, "loss": 0.4305, "step": 2491 }, { "epoch": 0.40581362211456257, "grad_norm": 0.4830509126186371, "learning_rate": 4.981120837114954e-05, "loss": 0.3887, "step": 2492 }, { "epoch": 0.40597646867239345, "grad_norm": 0.3786526918411255, "learning_rate": 4.981062686672846e-05, "loss": 0.3417, "step": 2493 }, { "epoch": 0.40613931523022434, "grad_norm": 0.4255758821964264, "learning_rate": 4.981004447153154e-05, "loss": 0.3531, "step": 2494 }, { "epoch": 0.4063021617880552, "grad_norm": 0.4540872275829315, "learning_rate": 4.9809461185579685e-05, "loss": 0.3414, "step": 2495 }, { "epoch": 0.4064650083458861, "grad_norm": 0.3750596046447754, "learning_rate": 4.980887700889385e-05, "loss": 0.3426, "step": 2496 }, { "epoch": 0.40662785490371695, "grad_norm": 0.511766791343689, "learning_rate": 4.9808291941495e-05, "loss": 0.4227, "step": 2497 }, { "epoch": 0.40679070146154783, "grad_norm": 0.3821066617965698, "learning_rate": 4.980770598340414e-05, "loss": 0.3665, "step": 2498 }, { "epoch": 0.4069535480193787, "grad_norm": 0.38111913204193115, "learning_rate": 4.98071191346423e-05, "loss": 0.3843, "step": 2499 }, { "epoch": 0.4071163945772096, "grad_norm": 0.4450906217098236, "learning_rate": 4.980653139523057e-05, "loss": 0.379, "step": 2500 }, { "epoch": 0.4072792411350405, "grad_norm": 0.3291199207305908, "learning_rate": 4.980594276519004e-05, "loss": 0.3706, "step": 2501 }, { "epoch": 0.4074420876928714, "grad_norm": 0.3793874680995941, "learning_rate": 4.980535324454185e-05, "loss": 0.3979, "step": 2502 }, { "epoch": 0.40760493425070227, "grad_norm": 0.3441563546657562, "learning_rate": 4.980476283330715e-05, "loss": 0.3629, "step": 2503 }, { "epoch": 0.40776778080853315, "grad_norm": 0.33264797925949097, "learning_rate": 4.980417153150715e-05, "loss": 0.3638, "step": 2504 }, { "epoch": 0.40793062736636404, "grad_norm": 0.4175379276275635, "learning_rate": 4.980357933916307e-05, "loss": 0.3768, "step": 2505 }, { "epoch": 0.4080934739241949, "grad_norm": 0.3289051353931427, "learning_rate": 4.980298625629619e-05, "loss": 0.3885, "step": 2506 }, { "epoch": 0.4082563204820258, "grad_norm": 0.3308337330818176, "learning_rate": 4.980239228292778e-05, "loss": 0.3724, "step": 2507 }, { "epoch": 0.4084191670398567, "grad_norm": 0.40009772777557373, "learning_rate": 4.9801797419079186e-05, "loss": 0.3496, "step": 2508 }, { "epoch": 0.4085820135976876, "grad_norm": 0.40873977541923523, "learning_rate": 4.9801201664771755e-05, "loss": 0.3955, "step": 2509 }, { "epoch": 0.4087448601555185, "grad_norm": 0.37950247526168823, "learning_rate": 4.980060502002688e-05, "loss": 0.4006, "step": 2510 }, { "epoch": 0.40890770671334936, "grad_norm": 0.40319693088531494, "learning_rate": 4.9800007484865976e-05, "loss": 0.4053, "step": 2511 }, { "epoch": 0.40907055327118025, "grad_norm": 0.4408920705318451, "learning_rate": 4.9799409059310495e-05, "loss": 0.3712, "step": 2512 }, { "epoch": 0.40923339982901114, "grad_norm": 0.37774527072906494, "learning_rate": 4.979880974338194e-05, "loss": 0.3798, "step": 2513 }, { "epoch": 0.409396246386842, "grad_norm": 0.39000147581100464, "learning_rate": 4.9798209537101815e-05, "loss": 0.3681, "step": 2514 }, { "epoch": 0.4095590929446729, "grad_norm": 0.43317919969558716, "learning_rate": 4.979760844049166e-05, "loss": 0.3718, "step": 2515 }, { "epoch": 0.40972193950250374, "grad_norm": 0.3793658912181854, "learning_rate": 4.979700645357307e-05, "loss": 0.3928, "step": 2516 }, { "epoch": 0.4098847860603346, "grad_norm": 0.4474175274372101, "learning_rate": 4.979640357636765e-05, "loss": 0.416, "step": 2517 }, { "epoch": 0.4100476326181655, "grad_norm": 0.4047335684299469, "learning_rate": 4.979579980889706e-05, "loss": 0.3538, "step": 2518 }, { "epoch": 0.4102104791759964, "grad_norm": 0.3204907774925232, "learning_rate": 4.9795195151182974e-05, "loss": 0.3596, "step": 2519 }, { "epoch": 0.4103733257338273, "grad_norm": 0.35195520520210266, "learning_rate": 4.9794589603247076e-05, "loss": 0.367, "step": 2520 }, { "epoch": 0.4105361722916582, "grad_norm": 0.34261849522590637, "learning_rate": 4.9793983165111134e-05, "loss": 0.394, "step": 2521 }, { "epoch": 0.41069901884948906, "grad_norm": 0.3817909359931946, "learning_rate": 4.979337583679691e-05, "loss": 0.3601, "step": 2522 }, { "epoch": 0.41086186540731995, "grad_norm": 0.37595000863075256, "learning_rate": 4.979276761832621e-05, "loss": 0.3852, "step": 2523 }, { "epoch": 0.41102471196515084, "grad_norm": 0.3616469204425812, "learning_rate": 4.9792158509720866e-05, "loss": 0.3855, "step": 2524 }, { "epoch": 0.4111875585229817, "grad_norm": 0.329602986574173, "learning_rate": 4.979154851100276e-05, "loss": 0.3315, "step": 2525 }, { "epoch": 0.4113504050808126, "grad_norm": 0.43072202801704407, "learning_rate": 4.9790937622193776e-05, "loss": 0.4153, "step": 2526 }, { "epoch": 0.4115132516386435, "grad_norm": 0.39781296253204346, "learning_rate": 4.979032584331587e-05, "loss": 0.4077, "step": 2527 }, { "epoch": 0.4116760981964744, "grad_norm": 0.39959368109703064, "learning_rate": 4.9789713174390976e-05, "loss": 0.3802, "step": 2528 }, { "epoch": 0.41183894475430527, "grad_norm": 0.40138354897499084, "learning_rate": 4.9789099615441113e-05, "loss": 0.3613, "step": 2529 }, { "epoch": 0.41200179131213616, "grad_norm": 0.3664921820163727, "learning_rate": 4.978848516648831e-05, "loss": 0.382, "step": 2530 }, { "epoch": 0.41216463786996704, "grad_norm": 0.4202539920806885, "learning_rate": 4.978786982755462e-05, "loss": 0.4109, "step": 2531 }, { "epoch": 0.41232748442779793, "grad_norm": 0.4169095456600189, "learning_rate": 4.978725359866213e-05, "loss": 0.3836, "step": 2532 }, { "epoch": 0.4124903309856288, "grad_norm": 0.38029739260673523, "learning_rate": 4.978663647983297e-05, "loss": 0.35, "step": 2533 }, { "epoch": 0.41265317754345965, "grad_norm": 0.44590091705322266, "learning_rate": 4.978601847108931e-05, "loss": 0.3707, "step": 2534 }, { "epoch": 0.41281602410129054, "grad_norm": 0.5273959636688232, "learning_rate": 4.9785399572453316e-05, "loss": 0.401, "step": 2535 }, { "epoch": 0.4129788706591214, "grad_norm": 0.33591318130493164, "learning_rate": 4.978477978394722e-05, "loss": 0.3669, "step": 2536 }, { "epoch": 0.4131417172169523, "grad_norm": 0.476229190826416, "learning_rate": 4.978415910559327e-05, "loss": 0.3997, "step": 2537 }, { "epoch": 0.4133045637747832, "grad_norm": 0.4173933267593384, "learning_rate": 4.978353753741376e-05, "loss": 0.3547, "step": 2538 }, { "epoch": 0.4134674103326141, "grad_norm": 0.34379857778549194, "learning_rate": 4.978291507943099e-05, "loss": 0.3607, "step": 2539 }, { "epoch": 0.41363025689044497, "grad_norm": 0.3814087510108948, "learning_rate": 4.978229173166733e-05, "loss": 0.3707, "step": 2540 }, { "epoch": 0.41379310344827586, "grad_norm": 0.35972684621810913, "learning_rate": 4.978166749414513e-05, "loss": 0.3618, "step": 2541 }, { "epoch": 0.41395595000610674, "grad_norm": 0.41255518794059753, "learning_rate": 4.978104236688683e-05, "loss": 0.3283, "step": 2542 }, { "epoch": 0.41411879656393763, "grad_norm": 0.3867642879486084, "learning_rate": 4.9780416349914866e-05, "loss": 0.3929, "step": 2543 }, { "epoch": 0.4142816431217685, "grad_norm": 0.38734114170074463, "learning_rate": 4.977978944325171e-05, "loss": 0.4215, "step": 2544 }, { "epoch": 0.4144444896795994, "grad_norm": 0.40252485871315, "learning_rate": 4.977916164691986e-05, "loss": 0.387, "step": 2545 }, { "epoch": 0.4146073362374303, "grad_norm": 0.9622208476066589, "learning_rate": 4.977853296094188e-05, "loss": 0.4149, "step": 2546 }, { "epoch": 0.4147701827952612, "grad_norm": 0.36776700615882874, "learning_rate": 4.977790338534032e-05, "loss": 0.3809, "step": 2547 }, { "epoch": 0.41493302935309206, "grad_norm": 0.3194258213043213, "learning_rate": 4.97772729201378e-05, "loss": 0.3322, "step": 2548 }, { "epoch": 0.41509587591092295, "grad_norm": 0.3791617155075073, "learning_rate": 4.977664156535694e-05, "loss": 0.3965, "step": 2549 }, { "epoch": 0.41525872246875384, "grad_norm": 0.341022253036499, "learning_rate": 4.977600932102041e-05, "loss": 0.3368, "step": 2550 }, { "epoch": 0.4154215690265847, "grad_norm": 0.3741472065448761, "learning_rate": 4.9775376187150926e-05, "loss": 0.3789, "step": 2551 }, { "epoch": 0.4155844155844156, "grad_norm": 0.370484858751297, "learning_rate": 4.9774742163771196e-05, "loss": 0.3836, "step": 2552 }, { "epoch": 0.41574726214224644, "grad_norm": 0.5424529314041138, "learning_rate": 4.9774107250904e-05, "loss": 0.4212, "step": 2553 }, { "epoch": 0.41591010870007733, "grad_norm": 0.43587514758110046, "learning_rate": 4.9773471448572136e-05, "loss": 0.3995, "step": 2554 }, { "epoch": 0.4160729552579082, "grad_norm": 0.41483810544013977, "learning_rate": 4.977283475679842e-05, "loss": 0.3807, "step": 2555 }, { "epoch": 0.4162358018157391, "grad_norm": 0.48580291867256165, "learning_rate": 4.977219717560571e-05, "loss": 0.4186, "step": 2556 }, { "epoch": 0.41639864837357, "grad_norm": 0.4093851149082184, "learning_rate": 4.9771558705016904e-05, "loss": 0.3891, "step": 2557 }, { "epoch": 0.4165614949314009, "grad_norm": 0.35274848341941833, "learning_rate": 4.9770919345054924e-05, "loss": 0.4072, "step": 2558 }, { "epoch": 0.41672434148923176, "grad_norm": 0.3399491310119629, "learning_rate": 4.9770279095742725e-05, "loss": 0.3601, "step": 2559 }, { "epoch": 0.41688718804706265, "grad_norm": 0.3479160964488983, "learning_rate": 4.9769637957103286e-05, "loss": 0.3327, "step": 2560 }, { "epoch": 0.41705003460489354, "grad_norm": 0.35566267371177673, "learning_rate": 4.9768995929159646e-05, "loss": 0.404, "step": 2561 }, { "epoch": 0.4172128811627244, "grad_norm": 0.37259000539779663, "learning_rate": 4.976835301193483e-05, "loss": 0.3835, "step": 2562 }, { "epoch": 0.4173757277205553, "grad_norm": 0.4267946779727936, "learning_rate": 4.976770920545194e-05, "loss": 0.3741, "step": 2563 }, { "epoch": 0.4175385742783862, "grad_norm": 0.3902011215686798, "learning_rate": 4.9767064509734075e-05, "loss": 0.3782, "step": 2564 }, { "epoch": 0.4177014208362171, "grad_norm": 0.38899755477905273, "learning_rate": 4.9766418924804396e-05, "loss": 0.3449, "step": 2565 }, { "epoch": 0.417864267394048, "grad_norm": 0.4205355644226074, "learning_rate": 4.976577245068608e-05, "loss": 0.3464, "step": 2566 }, { "epoch": 0.41802711395187886, "grad_norm": 0.385454922914505, "learning_rate": 4.976512508740232e-05, "loss": 0.4181, "step": 2567 }, { "epoch": 0.41818996050970975, "grad_norm": 0.46701934933662415, "learning_rate": 4.9764476834976384e-05, "loss": 0.397, "step": 2568 }, { "epoch": 0.41835280706754063, "grad_norm": 0.41167882084846497, "learning_rate": 4.976382769343153e-05, "loss": 0.388, "step": 2569 }, { "epoch": 0.4185156536253715, "grad_norm": 0.5124958157539368, "learning_rate": 4.976317766279106e-05, "loss": 0.4225, "step": 2570 }, { "epoch": 0.41867850018320235, "grad_norm": 0.4272834062576294, "learning_rate": 4.976252674307833e-05, "loss": 0.3534, "step": 2571 }, { "epoch": 0.41884134674103324, "grad_norm": 0.41625913977622986, "learning_rate": 4.976187493431669e-05, "loss": 0.3941, "step": 2572 }, { "epoch": 0.4190041932988641, "grad_norm": 0.38812440633773804, "learning_rate": 4.976122223652956e-05, "loss": 0.4183, "step": 2573 }, { "epoch": 0.419167039856695, "grad_norm": 0.33129552006721497, "learning_rate": 4.976056864974036e-05, "loss": 0.3483, "step": 2574 }, { "epoch": 0.4193298864145259, "grad_norm": 0.35698893666267395, "learning_rate": 4.975991417397256e-05, "loss": 0.3917, "step": 2575 }, { "epoch": 0.4194927329723568, "grad_norm": 0.4171442687511444, "learning_rate": 4.975925880924966e-05, "loss": 0.4016, "step": 2576 }, { "epoch": 0.4196555795301877, "grad_norm": 0.39062628149986267, "learning_rate": 4.975860255559519e-05, "loss": 0.3844, "step": 2577 }, { "epoch": 0.41981842608801856, "grad_norm": 0.34297746419906616, "learning_rate": 4.97579454130327e-05, "loss": 0.3586, "step": 2578 }, { "epoch": 0.41998127264584945, "grad_norm": 0.3398297429084778, "learning_rate": 4.9757287381585805e-05, "loss": 0.3442, "step": 2579 }, { "epoch": 0.42014411920368033, "grad_norm": 0.4235532879829407, "learning_rate": 4.975662846127812e-05, "loss": 0.3663, "step": 2580 }, { "epoch": 0.4203069657615112, "grad_norm": 0.3538091778755188, "learning_rate": 4.975596865213329e-05, "loss": 0.3783, "step": 2581 }, { "epoch": 0.4204698123193421, "grad_norm": 0.36713045835494995, "learning_rate": 4.975530795417502e-05, "loss": 0.3761, "step": 2582 }, { "epoch": 0.420632658877173, "grad_norm": 0.39643341302871704, "learning_rate": 4.975464636742702e-05, "loss": 0.3764, "step": 2583 }, { "epoch": 0.4207955054350039, "grad_norm": 0.3927460312843323, "learning_rate": 4.9753983891913054e-05, "loss": 0.3774, "step": 2584 }, { "epoch": 0.42095835199283477, "grad_norm": 0.40077081322669983, "learning_rate": 4.9753320527656896e-05, "loss": 0.3735, "step": 2585 }, { "epoch": 0.42112119855066565, "grad_norm": 0.3336019814014435, "learning_rate": 4.975265627468237e-05, "loss": 0.3424, "step": 2586 }, { "epoch": 0.42128404510849654, "grad_norm": 0.3931148648262024, "learning_rate": 4.9751991133013334e-05, "loss": 0.3735, "step": 2587 }, { "epoch": 0.42144689166632743, "grad_norm": 0.4234001040458679, "learning_rate": 4.975132510267365e-05, "loss": 0.3977, "step": 2588 }, { "epoch": 0.42160973822415826, "grad_norm": 0.3276737332344055, "learning_rate": 4.975065818368723e-05, "loss": 0.3322, "step": 2589 }, { "epoch": 0.42177258478198915, "grad_norm": 0.3842572867870331, "learning_rate": 4.974999037607804e-05, "loss": 0.3928, "step": 2590 }, { "epoch": 0.42193543133982003, "grad_norm": 0.38468292355537415, "learning_rate": 4.974932167987003e-05, "loss": 0.4263, "step": 2591 }, { "epoch": 0.4220982778976509, "grad_norm": 0.39593741297721863, "learning_rate": 4.974865209508722e-05, "loss": 0.3344, "step": 2592 }, { "epoch": 0.4222611244554818, "grad_norm": 0.45745590329170227, "learning_rate": 4.974798162175366e-05, "loss": 0.418, "step": 2593 }, { "epoch": 0.4224239710133127, "grad_norm": 0.46177393198013306, "learning_rate": 4.974731025989341e-05, "loss": 0.3962, "step": 2594 }, { "epoch": 0.4225868175711436, "grad_norm": 0.48700523376464844, "learning_rate": 4.974663800953058e-05, "loss": 0.3928, "step": 2595 }, { "epoch": 0.42274966412897447, "grad_norm": 0.3418383300304413, "learning_rate": 4.97459648706893e-05, "loss": 0.3589, "step": 2596 }, { "epoch": 0.42291251068680535, "grad_norm": 0.34674203395843506, "learning_rate": 4.9745290843393733e-05, "loss": 0.3423, "step": 2597 }, { "epoch": 0.42307535724463624, "grad_norm": 0.5913266539573669, "learning_rate": 4.974461592766809e-05, "loss": 0.405, "step": 2598 }, { "epoch": 0.42323820380246713, "grad_norm": 0.3948669731616974, "learning_rate": 4.9743940123536604e-05, "loss": 0.3578, "step": 2599 }, { "epoch": 0.423401050360298, "grad_norm": 0.41872021555900574, "learning_rate": 4.974326343102353e-05, "loss": 0.4254, "step": 2600 }, { "epoch": 0.4235638969181289, "grad_norm": 0.36012929677963257, "learning_rate": 4.9742585850153165e-05, "loss": 0.3517, "step": 2601 }, { "epoch": 0.4237267434759598, "grad_norm": 0.35148200392723083, "learning_rate": 4.9741907380949834e-05, "loss": 0.3686, "step": 2602 }, { "epoch": 0.4238895900337907, "grad_norm": 0.435979962348938, "learning_rate": 4.9741228023437904e-05, "loss": 0.3821, "step": 2603 }, { "epoch": 0.42405243659162156, "grad_norm": 0.40343669056892395, "learning_rate": 4.974054777764176e-05, "loss": 0.4081, "step": 2604 }, { "epoch": 0.42421528314945245, "grad_norm": 0.3209671974182129, "learning_rate": 4.973986664358583e-05, "loss": 0.3348, "step": 2605 }, { "epoch": 0.42437812970728334, "grad_norm": 0.41091519594192505, "learning_rate": 4.9739184621294556e-05, "loss": 0.3822, "step": 2606 }, { "epoch": 0.4245409762651142, "grad_norm": 0.3497019410133362, "learning_rate": 4.973850171079244e-05, "loss": 0.3761, "step": 2607 }, { "epoch": 0.42470382282294505, "grad_norm": 0.37443578243255615, "learning_rate": 4.973781791210399e-05, "loss": 0.3735, "step": 2608 }, { "epoch": 0.42486666938077594, "grad_norm": 0.3997231423854828, "learning_rate": 4.973713322525376e-05, "loss": 0.4064, "step": 2609 }, { "epoch": 0.42502951593860683, "grad_norm": 0.3712550103664398, "learning_rate": 4.9736447650266335e-05, "loss": 0.3161, "step": 2610 }, { "epoch": 0.4251923624964377, "grad_norm": 0.4328223466873169, "learning_rate": 4.973576118716633e-05, "loss": 0.4412, "step": 2611 }, { "epoch": 0.4253552090542686, "grad_norm": 0.34715238213539124, "learning_rate": 4.9735073835978385e-05, "loss": 0.3864, "step": 2612 }, { "epoch": 0.4255180556120995, "grad_norm": 0.3575264811515808, "learning_rate": 4.9734385596727185e-05, "loss": 0.3568, "step": 2613 }, { "epoch": 0.4256809021699304, "grad_norm": 0.39364296197891235, "learning_rate": 4.973369646943743e-05, "loss": 0.3607, "step": 2614 }, { "epoch": 0.42584374872776126, "grad_norm": 0.35956481099128723, "learning_rate": 4.973300645413387e-05, "loss": 0.3681, "step": 2615 }, { "epoch": 0.42600659528559215, "grad_norm": 0.4021948575973511, "learning_rate": 4.973231555084128e-05, "loss": 0.3595, "step": 2616 }, { "epoch": 0.42616944184342304, "grad_norm": 0.3722401559352875, "learning_rate": 4.973162375958446e-05, "loss": 0.3532, "step": 2617 }, { "epoch": 0.4263322884012539, "grad_norm": 0.36059674620628357, "learning_rate": 4.9730931080388245e-05, "loss": 0.38, "step": 2618 }, { "epoch": 0.4264951349590848, "grad_norm": 0.37987446784973145, "learning_rate": 4.973023751327751e-05, "loss": 0.3958, "step": 2619 }, { "epoch": 0.4266579815169157, "grad_norm": 0.3802259564399719, "learning_rate": 4.972954305827716e-05, "loss": 0.3777, "step": 2620 }, { "epoch": 0.4268208280747466, "grad_norm": 0.327216774225235, "learning_rate": 4.972884771541211e-05, "loss": 0.361, "step": 2621 }, { "epoch": 0.42698367463257747, "grad_norm": 0.36347904801368713, "learning_rate": 4.9728151484707355e-05, "loss": 0.3693, "step": 2622 }, { "epoch": 0.42714652119040836, "grad_norm": 0.34035390615463257, "learning_rate": 4.9727454366187856e-05, "loss": 0.3236, "step": 2623 }, { "epoch": 0.42730936774823924, "grad_norm": 0.4112633168697357, "learning_rate": 4.972675635987867e-05, "loss": 0.3868, "step": 2624 }, { "epoch": 0.42747221430607013, "grad_norm": 0.390279620885849, "learning_rate": 4.9726057465804845e-05, "loss": 0.3431, "step": 2625 }, { "epoch": 0.42763506086390096, "grad_norm": 0.3793753981590271, "learning_rate": 4.972535768399148e-05, "loss": 0.3835, "step": 2626 }, { "epoch": 0.42779790742173185, "grad_norm": 0.5287764072418213, "learning_rate": 4.972465701446369e-05, "loss": 0.4783, "step": 2627 }, { "epoch": 0.42796075397956274, "grad_norm": 0.37613049149513245, "learning_rate": 4.972395545724664e-05, "loss": 0.3447, "step": 2628 }, { "epoch": 0.4281236005373936, "grad_norm": 0.36870089173316956, "learning_rate": 4.972325301236551e-05, "loss": 0.3743, "step": 2629 }, { "epoch": 0.4282864470952245, "grad_norm": 0.3170594573020935, "learning_rate": 4.972254967984552e-05, "loss": 0.4186, "step": 2630 }, { "epoch": 0.4284492936530554, "grad_norm": 0.36955031752586365, "learning_rate": 4.972184545971194e-05, "loss": 0.3816, "step": 2631 }, { "epoch": 0.4286121402108863, "grad_norm": 0.33862391114234924, "learning_rate": 4.972114035199003e-05, "loss": 0.3687, "step": 2632 }, { "epoch": 0.42877498676871717, "grad_norm": 0.4140705168247223, "learning_rate": 4.9720434356705116e-05, "loss": 0.3851, "step": 2633 }, { "epoch": 0.42893783332654806, "grad_norm": 0.34367385506629944, "learning_rate": 4.9719727473882535e-05, "loss": 0.3386, "step": 2634 }, { "epoch": 0.42910067988437894, "grad_norm": 0.38658228516578674, "learning_rate": 4.971901970354769e-05, "loss": 0.3956, "step": 2635 }, { "epoch": 0.42926352644220983, "grad_norm": 0.3394659757614136, "learning_rate": 4.971831104572597e-05, "loss": 0.3289, "step": 2636 }, { "epoch": 0.4294263730000407, "grad_norm": 0.39534440636634827, "learning_rate": 4.971760150044282e-05, "loss": 0.4036, "step": 2637 }, { "epoch": 0.4295892195578716, "grad_norm": 0.40992656350135803, "learning_rate": 4.9716891067723734e-05, "loss": 0.3825, "step": 2638 }, { "epoch": 0.4297520661157025, "grad_norm": 0.39875590801239014, "learning_rate": 4.971617974759419e-05, "loss": 0.3573, "step": 2639 }, { "epoch": 0.4299149126735334, "grad_norm": 0.34045928716659546, "learning_rate": 4.971546754007976e-05, "loss": 0.375, "step": 2640 }, { "epoch": 0.43007775923136426, "grad_norm": 0.3479214906692505, "learning_rate": 4.971475444520598e-05, "loss": 0.3434, "step": 2641 }, { "epoch": 0.43024060578919515, "grad_norm": 0.4410504400730133, "learning_rate": 4.9714040462998477e-05, "loss": 0.3882, "step": 2642 }, { "epoch": 0.43040345234702604, "grad_norm": 0.40657418966293335, "learning_rate": 4.9713325593482876e-05, "loss": 0.4309, "step": 2643 }, { "epoch": 0.4305662989048569, "grad_norm": 0.3171824812889099, "learning_rate": 4.9712609836684834e-05, "loss": 0.4005, "step": 2644 }, { "epoch": 0.43072914546268776, "grad_norm": 0.38655343651771545, "learning_rate": 4.9711893192630066e-05, "loss": 0.3382, "step": 2645 }, { "epoch": 0.43089199202051864, "grad_norm": 0.3875287175178528, "learning_rate": 4.971117566134429e-05, "loss": 0.3447, "step": 2646 }, { "epoch": 0.43105483857834953, "grad_norm": 0.38535797595977783, "learning_rate": 4.971045724285327e-05, "loss": 0.3647, "step": 2647 }, { "epoch": 0.4312176851361804, "grad_norm": 0.35763221979141235, "learning_rate": 4.9709737937182803e-05, "loss": 0.3383, "step": 2648 }, { "epoch": 0.4313805316940113, "grad_norm": 0.4367870092391968, "learning_rate": 4.970901774435871e-05, "loss": 0.3839, "step": 2649 }, { "epoch": 0.4315433782518422, "grad_norm": 0.3413902223110199, "learning_rate": 4.970829666440685e-05, "loss": 0.3239, "step": 2650 }, { "epoch": 0.4317062248096731, "grad_norm": 0.3831839859485626, "learning_rate": 4.970757469735312e-05, "loss": 0.4045, "step": 2651 }, { "epoch": 0.43186907136750396, "grad_norm": 0.366083025932312, "learning_rate": 4.970685184322342e-05, "loss": 0.4199, "step": 2652 }, { "epoch": 0.43203191792533485, "grad_norm": 0.3782126307487488, "learning_rate": 4.9706128102043724e-05, "loss": 0.3607, "step": 2653 }, { "epoch": 0.43219476448316574, "grad_norm": 0.3546913266181946, "learning_rate": 4.9705403473840004e-05, "loss": 0.4261, "step": 2654 }, { "epoch": 0.4323576110409966, "grad_norm": 0.36219897866249084, "learning_rate": 4.970467795863828e-05, "loss": 0.3876, "step": 2655 }, { "epoch": 0.4325204575988275, "grad_norm": 0.40016117691993713, "learning_rate": 4.97039515564646e-05, "loss": 0.3961, "step": 2656 }, { "epoch": 0.4326833041566584, "grad_norm": 0.3378763496875763, "learning_rate": 4.9703224267345046e-05, "loss": 0.3452, "step": 2657 }, { "epoch": 0.4328461507144893, "grad_norm": 0.4613463282585144, "learning_rate": 4.970249609130572e-05, "loss": 0.4132, "step": 2658 }, { "epoch": 0.4330089972723202, "grad_norm": 0.3474118411540985, "learning_rate": 4.970176702837278e-05, "loss": 0.4005, "step": 2659 }, { "epoch": 0.43317184383015106, "grad_norm": 0.38908615708351135, "learning_rate": 4.97010370785724e-05, "loss": 0.3356, "step": 2660 }, { "epoch": 0.43333469038798195, "grad_norm": 0.3697229325771332, "learning_rate": 4.9700306241930776e-05, "loss": 0.3436, "step": 2661 }, { "epoch": 0.43349753694581283, "grad_norm": 0.47388747334480286, "learning_rate": 4.9699574518474156e-05, "loss": 0.3937, "step": 2662 }, { "epoch": 0.43366038350364366, "grad_norm": 0.3939633369445801, "learning_rate": 4.969884190822881e-05, "loss": 0.3681, "step": 2663 }, { "epoch": 0.43382323006147455, "grad_norm": 0.4137934148311615, "learning_rate": 4.969810841122103e-05, "loss": 0.4325, "step": 2664 }, { "epoch": 0.43398607661930544, "grad_norm": 0.4046456813812256, "learning_rate": 4.969737402747717e-05, "loss": 0.3913, "step": 2665 }, { "epoch": 0.4341489231771363, "grad_norm": 0.46052682399749756, "learning_rate": 4.969663875702359e-05, "loss": 0.3629, "step": 2666 }, { "epoch": 0.4343117697349672, "grad_norm": 0.39133894443511963, "learning_rate": 4.969590259988668e-05, "loss": 0.3844, "step": 2667 }, { "epoch": 0.4344746162927981, "grad_norm": 0.4177951216697693, "learning_rate": 4.969516555609288e-05, "loss": 0.4107, "step": 2668 }, { "epoch": 0.434637462850629, "grad_norm": 0.4101473093032837, "learning_rate": 4.969442762566865e-05, "loss": 0.4008, "step": 2669 }, { "epoch": 0.4348003094084599, "grad_norm": 0.40098604559898376, "learning_rate": 4.969368880864047e-05, "loss": 0.3583, "step": 2670 }, { "epoch": 0.43496315596629076, "grad_norm": 0.4301331639289856, "learning_rate": 4.9692949105034885e-05, "loss": 0.3535, "step": 2671 }, { "epoch": 0.43512600252412165, "grad_norm": 0.3476928770542145, "learning_rate": 4.9692208514878444e-05, "loss": 0.3474, "step": 2672 }, { "epoch": 0.43528884908195253, "grad_norm": 0.39648202061653137, "learning_rate": 4.969146703819774e-05, "loss": 0.3802, "step": 2673 }, { "epoch": 0.4354516956397834, "grad_norm": 0.36088934540748596, "learning_rate": 4.969072467501939e-05, "loss": 0.3884, "step": 2674 }, { "epoch": 0.4356145421976143, "grad_norm": 0.35474345088005066, "learning_rate": 4.968998142537005e-05, "loss": 0.333, "step": 2675 }, { "epoch": 0.4357773887554452, "grad_norm": 0.40405169129371643, "learning_rate": 4.96892372892764e-05, "loss": 0.3651, "step": 2676 }, { "epoch": 0.4359402353132761, "grad_norm": 0.3970520794391632, "learning_rate": 4.968849226676516e-05, "loss": 0.3651, "step": 2677 }, { "epoch": 0.43610308187110697, "grad_norm": 0.43869197368621826, "learning_rate": 4.9687746357863087e-05, "loss": 0.3718, "step": 2678 }, { "epoch": 0.43626592842893785, "grad_norm": 0.37669965624809265, "learning_rate": 4.968699956259695e-05, "loss": 0.3789, "step": 2679 }, { "epoch": 0.43642877498676874, "grad_norm": 0.4299353063106537, "learning_rate": 4.968625188099356e-05, "loss": 0.4094, "step": 2680 }, { "epoch": 0.43659162154459963, "grad_norm": 0.43174219131469727, "learning_rate": 4.968550331307976e-05, "loss": 0.4053, "step": 2681 }, { "epoch": 0.43675446810243046, "grad_norm": 1.1155766248703003, "learning_rate": 4.9684753858882444e-05, "loss": 0.5028, "step": 2682 }, { "epoch": 0.43691731466026135, "grad_norm": 0.38104838132858276, "learning_rate": 4.96840035184285e-05, "loss": 0.3809, "step": 2683 }, { "epoch": 0.43708016121809223, "grad_norm": 0.4075094163417816, "learning_rate": 4.9683252291744874e-05, "loss": 0.356, "step": 2684 }, { "epoch": 0.4372430077759231, "grad_norm": 0.3653469681739807, "learning_rate": 4.9682500178858546e-05, "loss": 0.3331, "step": 2685 }, { "epoch": 0.437405854333754, "grad_norm": 0.39974015951156616, "learning_rate": 4.96817471797965e-05, "loss": 0.3766, "step": 2686 }, { "epoch": 0.4375687008915849, "grad_norm": 0.504610002040863, "learning_rate": 4.968099329458579e-05, "loss": 0.3681, "step": 2687 }, { "epoch": 0.4377315474494158, "grad_norm": 0.38734644651412964, "learning_rate": 4.968023852325347e-05, "loss": 0.4237, "step": 2688 }, { "epoch": 0.43789439400724667, "grad_norm": 0.556093692779541, "learning_rate": 4.9679482865826645e-05, "loss": 0.4265, "step": 2689 }, { "epoch": 0.43805724056507755, "grad_norm": 0.4177858233451843, "learning_rate": 4.967872632233244e-05, "loss": 0.3659, "step": 2690 }, { "epoch": 0.43822008712290844, "grad_norm": 0.3423130214214325, "learning_rate": 4.967796889279802e-05, "loss": 0.3787, "step": 2691 }, { "epoch": 0.43838293368073933, "grad_norm": 0.52936190366745, "learning_rate": 4.967721057725059e-05, "loss": 0.449, "step": 2692 }, { "epoch": 0.4385457802385702, "grad_norm": 0.41375932097435, "learning_rate": 4.967645137571736e-05, "loss": 0.3983, "step": 2693 }, { "epoch": 0.4387086267964011, "grad_norm": 0.39833226799964905, "learning_rate": 4.967569128822559e-05, "loss": 0.3813, "step": 2694 }, { "epoch": 0.438871473354232, "grad_norm": 0.5055318474769592, "learning_rate": 4.967493031480257e-05, "loss": 0.4165, "step": 2695 }, { "epoch": 0.4390343199120629, "grad_norm": 0.42769744992256165, "learning_rate": 4.967416845547563e-05, "loss": 0.4044, "step": 2696 }, { "epoch": 0.43919716646989376, "grad_norm": 0.4962683320045471, "learning_rate": 4.967340571027212e-05, "loss": 0.3994, "step": 2697 }, { "epoch": 0.43936001302772465, "grad_norm": 0.4831601679325104, "learning_rate": 4.967264207921941e-05, "loss": 0.3639, "step": 2698 }, { "epoch": 0.43952285958555554, "grad_norm": 0.5335972905158997, "learning_rate": 4.9671877562344935e-05, "loss": 0.4037, "step": 2699 }, { "epoch": 0.43968570614338637, "grad_norm": 0.40126970410346985, "learning_rate": 4.967111215967613e-05, "loss": 0.3582, "step": 2700 }, { "epoch": 0.43984855270121725, "grad_norm": 0.4289371967315674, "learning_rate": 4.9670345871240486e-05, "loss": 0.3472, "step": 2701 }, { "epoch": 0.44001139925904814, "grad_norm": 0.37681829929351807, "learning_rate": 4.966957869706551e-05, "loss": 0.3808, "step": 2702 }, { "epoch": 0.44017424581687903, "grad_norm": 0.4201238751411438, "learning_rate": 4.9668810637178754e-05, "loss": 0.381, "step": 2703 }, { "epoch": 0.4403370923747099, "grad_norm": 0.44344377517700195, "learning_rate": 4.966804169160778e-05, "loss": 0.3891, "step": 2704 }, { "epoch": 0.4404999389325408, "grad_norm": 0.36607664823532104, "learning_rate": 4.9667271860380206e-05, "loss": 0.345, "step": 2705 }, { "epoch": 0.4406627854903717, "grad_norm": 0.3927536904811859, "learning_rate": 4.966650114352366e-05, "loss": 0.3946, "step": 2706 }, { "epoch": 0.4408256320482026, "grad_norm": 0.41121581196784973, "learning_rate": 4.966572954106582e-05, "loss": 0.4307, "step": 2707 }, { "epoch": 0.44098847860603346, "grad_norm": 0.30854737758636475, "learning_rate": 4.96649570530344e-05, "loss": 0.3575, "step": 2708 }, { "epoch": 0.44115132516386435, "grad_norm": 0.30624502897262573, "learning_rate": 4.966418367945711e-05, "loss": 0.3232, "step": 2709 }, { "epoch": 0.44131417172169524, "grad_norm": 0.43568989634513855, "learning_rate": 4.966340942036174e-05, "loss": 0.3787, "step": 2710 }, { "epoch": 0.4414770182795261, "grad_norm": 0.4132536053657532, "learning_rate": 4.966263427577608e-05, "loss": 0.378, "step": 2711 }, { "epoch": 0.441639864837357, "grad_norm": 0.4681408703327179, "learning_rate": 4.966185824572795e-05, "loss": 0.4461, "step": 2712 }, { "epoch": 0.4418027113951879, "grad_norm": 0.3494252860546112, "learning_rate": 4.966108133024522e-05, "loss": 0.392, "step": 2713 }, { "epoch": 0.4419655579530188, "grad_norm": 0.5403565168380737, "learning_rate": 4.966030352935579e-05, "loss": 0.4228, "step": 2714 }, { "epoch": 0.44212840451084967, "grad_norm": 0.3273389935493469, "learning_rate": 4.9659524843087576e-05, "loss": 0.4052, "step": 2715 }, { "epoch": 0.44229125106868056, "grad_norm": 0.3558856248855591, "learning_rate": 4.9658745271468546e-05, "loss": 0.3328, "step": 2716 }, { "epoch": 0.44245409762651144, "grad_norm": 0.3938702940940857, "learning_rate": 4.965796481452667e-05, "loss": 0.4216, "step": 2717 }, { "epoch": 0.4426169441843423, "grad_norm": 0.3691055476665497, "learning_rate": 4.965718347228998e-05, "loss": 0.3912, "step": 2718 }, { "epoch": 0.44277979074217316, "grad_norm": 0.3556017279624939, "learning_rate": 4.9656401244786535e-05, "loss": 0.3965, "step": 2719 }, { "epoch": 0.44294263730000405, "grad_norm": 0.3503643870353699, "learning_rate": 4.965561813204441e-05, "loss": 0.3602, "step": 2720 }, { "epoch": 0.44310548385783494, "grad_norm": 0.35323044657707214, "learning_rate": 4.965483413409172e-05, "loss": 0.3703, "step": 2721 }, { "epoch": 0.4432683304156658, "grad_norm": 0.32194191217422485, "learning_rate": 4.965404925095663e-05, "loss": 0.3843, "step": 2722 }, { "epoch": 0.4434311769734967, "grad_norm": 0.3689413070678711, "learning_rate": 4.965326348266729e-05, "loss": 0.3879, "step": 2723 }, { "epoch": 0.4435940235313276, "grad_norm": 0.4001958966255188, "learning_rate": 4.965247682925193e-05, "loss": 0.3799, "step": 2724 }, { "epoch": 0.4437568700891585, "grad_norm": 0.3582671880722046, "learning_rate": 4.96516892907388e-05, "loss": 0.3915, "step": 2725 }, { "epoch": 0.44391971664698937, "grad_norm": 0.3831034004688263, "learning_rate": 4.9650900867156156e-05, "loss": 0.4248, "step": 2726 }, { "epoch": 0.44408256320482026, "grad_norm": 0.4132821559906006, "learning_rate": 4.9650111558532324e-05, "loss": 0.4488, "step": 2727 }, { "epoch": 0.44424540976265114, "grad_norm": 0.3634258210659027, "learning_rate": 4.964932136489563e-05, "loss": 0.3509, "step": 2728 }, { "epoch": 0.44440825632048203, "grad_norm": 0.3728956878185272, "learning_rate": 4.964853028627445e-05, "loss": 0.3964, "step": 2729 }, { "epoch": 0.4445711028783129, "grad_norm": 0.39616137742996216, "learning_rate": 4.964773832269718e-05, "loss": 0.3481, "step": 2730 }, { "epoch": 0.4447339494361438, "grad_norm": 0.4481362998485565, "learning_rate": 4.9646945474192256e-05, "loss": 0.453, "step": 2731 }, { "epoch": 0.4448967959939747, "grad_norm": 0.37068167328834534, "learning_rate": 4.9646151740788146e-05, "loss": 0.366, "step": 2732 }, { "epoch": 0.4450596425518056, "grad_norm": 0.3972967267036438, "learning_rate": 4.9645357122513354e-05, "loss": 0.374, "step": 2733 }, { "epoch": 0.44522248910963647, "grad_norm": 0.38975876569747925, "learning_rate": 4.9644561619396396e-05, "loss": 0.331, "step": 2734 }, { "epoch": 0.44538533566746735, "grad_norm": 0.3556312024593353, "learning_rate": 4.964376523146584e-05, "loss": 0.3625, "step": 2735 }, { "epoch": 0.44554818222529824, "grad_norm": 0.40413233637809753, "learning_rate": 4.964296795875027e-05, "loss": 0.3759, "step": 2736 }, { "epoch": 0.44571102878312907, "grad_norm": 0.401001900434494, "learning_rate": 4.964216980127833e-05, "loss": 0.3873, "step": 2737 }, { "epoch": 0.44587387534095996, "grad_norm": 0.3794175386428833, "learning_rate": 4.964137075907866e-05, "loss": 0.3964, "step": 2738 }, { "epoch": 0.44603672189879084, "grad_norm": 0.39360758662223816, "learning_rate": 4.964057083217995e-05, "loss": 0.3963, "step": 2739 }, { "epoch": 0.44619956845662173, "grad_norm": 0.41915926337242126, "learning_rate": 4.9639770020610924e-05, "loss": 0.3882, "step": 2740 }, { "epoch": 0.4463624150144526, "grad_norm": 0.4744527041912079, "learning_rate": 4.963896832440033e-05, "loss": 0.4506, "step": 2741 }, { "epoch": 0.4465252615722835, "grad_norm": 0.395972341299057, "learning_rate": 4.963816574357695e-05, "loss": 0.3679, "step": 2742 }, { "epoch": 0.4466881081301144, "grad_norm": 0.3749639093875885, "learning_rate": 4.963736227816961e-05, "loss": 0.35, "step": 2743 }, { "epoch": 0.4468509546879453, "grad_norm": 0.5110949873924255, "learning_rate": 4.9636557928207144e-05, "loss": 0.397, "step": 2744 }, { "epoch": 0.44701380124577617, "grad_norm": 0.37556275725364685, "learning_rate": 4.963575269371844e-05, "loss": 0.3595, "step": 2745 }, { "epoch": 0.44717664780360705, "grad_norm": 0.3389323651790619, "learning_rate": 4.9634946574732394e-05, "loss": 0.3355, "step": 2746 }, { "epoch": 0.44733949436143794, "grad_norm": 0.41010740399360657, "learning_rate": 4.963413957127796e-05, "loss": 0.4028, "step": 2747 }, { "epoch": 0.4475023409192688, "grad_norm": 0.3679608106613159, "learning_rate": 4.963333168338412e-05, "loss": 0.3926, "step": 2748 }, { "epoch": 0.4476651874770997, "grad_norm": 0.4321809709072113, "learning_rate": 4.963252291107986e-05, "loss": 0.3471, "step": 2749 }, { "epoch": 0.4478280340349306, "grad_norm": 0.34754499793052673, "learning_rate": 4.9631713254394226e-05, "loss": 0.3586, "step": 2750 }, { "epoch": 0.4479908805927615, "grad_norm": 0.3869535028934479, "learning_rate": 4.9630902713356286e-05, "loss": 0.3904, "step": 2751 }, { "epoch": 0.4481537271505924, "grad_norm": 0.35349515080451965, "learning_rate": 4.963009128799515e-05, "loss": 0.3657, "step": 2752 }, { "epoch": 0.44831657370842326, "grad_norm": 0.3175506591796875, "learning_rate": 4.9629278978339936e-05, "loss": 0.3201, "step": 2753 }, { "epoch": 0.44847942026625415, "grad_norm": 0.3390922546386719, "learning_rate": 4.962846578441982e-05, "loss": 0.4084, "step": 2754 }, { "epoch": 0.448642266824085, "grad_norm": 0.40024223923683167, "learning_rate": 4.962765170626399e-05, "loss": 0.3631, "step": 2755 }, { "epoch": 0.44880511338191587, "grad_norm": 0.39360666275024414, "learning_rate": 4.962683674390168e-05, "loss": 0.4235, "step": 2756 }, { "epoch": 0.44896795993974675, "grad_norm": 0.3465847969055176, "learning_rate": 4.9626020897362144e-05, "loss": 0.3562, "step": 2757 }, { "epoch": 0.44913080649757764, "grad_norm": 0.34471169114112854, "learning_rate": 4.962520416667468e-05, "loss": 0.37, "step": 2758 }, { "epoch": 0.4492936530554085, "grad_norm": 0.3729623556137085, "learning_rate": 4.962438655186861e-05, "loss": 0.3779, "step": 2759 }, { "epoch": 0.4494564996132394, "grad_norm": 0.3587202727794647, "learning_rate": 4.962356805297328e-05, "loss": 0.3683, "step": 2760 }, { "epoch": 0.4496193461710703, "grad_norm": 0.44163739681243896, "learning_rate": 4.9622748670018084e-05, "loss": 0.3806, "step": 2761 }, { "epoch": 0.4497821927289012, "grad_norm": 0.33465564250946045, "learning_rate": 4.962192840303244e-05, "loss": 0.366, "step": 2762 }, { "epoch": 0.4499450392867321, "grad_norm": 0.3635154366493225, "learning_rate": 4.96211072520458e-05, "loss": 0.4237, "step": 2763 }, { "epoch": 0.45010788584456296, "grad_norm": 0.3554665744304657, "learning_rate": 4.9620285217087635e-05, "loss": 0.38, "step": 2764 }, { "epoch": 0.45027073240239385, "grad_norm": 0.3553295135498047, "learning_rate": 4.961946229818748e-05, "loss": 0.3449, "step": 2765 }, { "epoch": 0.45043357896022473, "grad_norm": 0.4313928782939911, "learning_rate": 4.9618638495374856e-05, "loss": 0.4288, "step": 2766 }, { "epoch": 0.4505964255180556, "grad_norm": 0.395663321018219, "learning_rate": 4.961781380867935e-05, "loss": 0.4042, "step": 2767 }, { "epoch": 0.4507592720758865, "grad_norm": 0.3862348198890686, "learning_rate": 4.961698823813057e-05, "loss": 0.3785, "step": 2768 }, { "epoch": 0.4509221186337174, "grad_norm": 0.40822505950927734, "learning_rate": 4.9616161783758164e-05, "loss": 0.3869, "step": 2769 }, { "epoch": 0.4510849651915483, "grad_norm": 0.35026511549949646, "learning_rate": 4.96153344455918e-05, "loss": 0.3619, "step": 2770 }, { "epoch": 0.45124781174937917, "grad_norm": 0.4417882561683655, "learning_rate": 4.961450622366117e-05, "loss": 0.3768, "step": 2771 }, { "epoch": 0.45141065830721006, "grad_norm": 0.418017715215683, "learning_rate": 4.9613677117996026e-05, "loss": 0.3586, "step": 2772 }, { "epoch": 0.45157350486504094, "grad_norm": 0.4110161364078522, "learning_rate": 4.961284712862613e-05, "loss": 0.3918, "step": 2773 }, { "epoch": 0.4517363514228718, "grad_norm": 0.3315100073814392, "learning_rate": 4.961201625558127e-05, "loss": 0.3581, "step": 2774 }, { "epoch": 0.45189919798070266, "grad_norm": 0.38787639141082764, "learning_rate": 4.96111844988913e-05, "loss": 0.3527, "step": 2775 }, { "epoch": 0.45206204453853355, "grad_norm": 0.3750769793987274, "learning_rate": 4.9610351858586065e-05, "loss": 0.3531, "step": 2776 }, { "epoch": 0.45222489109636443, "grad_norm": 0.4729388654232025, "learning_rate": 4.9609518334695456e-05, "loss": 0.4157, "step": 2777 }, { "epoch": 0.4523877376541953, "grad_norm": 0.3415033221244812, "learning_rate": 4.960868392724942e-05, "loss": 0.3612, "step": 2778 }, { "epoch": 0.4525505842120262, "grad_norm": 0.42463746666908264, "learning_rate": 4.960784863627789e-05, "loss": 0.383, "step": 2779 }, { "epoch": 0.4527134307698571, "grad_norm": 0.3889256417751312, "learning_rate": 4.9607012461810875e-05, "loss": 0.3716, "step": 2780 }, { "epoch": 0.452876277327688, "grad_norm": 0.41225722432136536, "learning_rate": 4.9606175403878385e-05, "loss": 0.3614, "step": 2781 }, { "epoch": 0.45303912388551887, "grad_norm": 0.37123146653175354, "learning_rate": 4.9605337462510474e-05, "loss": 0.3579, "step": 2782 }, { "epoch": 0.45320197044334976, "grad_norm": 0.49168631434440613, "learning_rate": 4.960449863773723e-05, "loss": 0.3857, "step": 2783 }, { "epoch": 0.45336481700118064, "grad_norm": 0.42886123061180115, "learning_rate": 4.960365892958877e-05, "loss": 0.3785, "step": 2784 }, { "epoch": 0.45352766355901153, "grad_norm": 0.3598293364048004, "learning_rate": 4.9602818338095235e-05, "loss": 0.3573, "step": 2785 }, { "epoch": 0.4536905101168424, "grad_norm": 0.40251103043556213, "learning_rate": 4.9601976863286816e-05, "loss": 0.369, "step": 2786 }, { "epoch": 0.4538533566746733, "grad_norm": 0.37327951192855835, "learning_rate": 4.9601134505193714e-05, "loss": 0.3686, "step": 2787 }, { "epoch": 0.4540162032325042, "grad_norm": 0.3585106432437897, "learning_rate": 4.960029126384618e-05, "loss": 0.3781, "step": 2788 }, { "epoch": 0.4541790497903351, "grad_norm": 0.33530256152153015, "learning_rate": 4.959944713927448e-05, "loss": 0.3187, "step": 2789 }, { "epoch": 0.45434189634816596, "grad_norm": 0.36259669065475464, "learning_rate": 4.959860213150893e-05, "loss": 0.3808, "step": 2790 }, { "epoch": 0.45450474290599685, "grad_norm": 0.43542060256004333, "learning_rate": 4.959775624057986e-05, "loss": 0.3908, "step": 2791 }, { "epoch": 0.4546675894638277, "grad_norm": 0.4383094608783722, "learning_rate": 4.959690946651765e-05, "loss": 0.3849, "step": 2792 }, { "epoch": 0.45483043602165857, "grad_norm": 0.3921186923980713, "learning_rate": 4.959606180935269e-05, "loss": 0.412, "step": 2793 }, { "epoch": 0.45499328257948946, "grad_norm": 0.4370906352996826, "learning_rate": 4.959521326911542e-05, "loss": 0.4156, "step": 2794 }, { "epoch": 0.45515612913732034, "grad_norm": 0.3663085401058197, "learning_rate": 4.9594363845836306e-05, "loss": 0.3947, "step": 2795 }, { "epoch": 0.45531897569515123, "grad_norm": 0.347575306892395, "learning_rate": 4.9593513539545846e-05, "loss": 0.3319, "step": 2796 }, { "epoch": 0.4554818222529821, "grad_norm": 0.6943673491477966, "learning_rate": 4.959266235027456e-05, "loss": 0.4829, "step": 2797 }, { "epoch": 0.455644668810813, "grad_norm": 0.38570675253868103, "learning_rate": 4.959181027805302e-05, "loss": 0.3889, "step": 2798 }, { "epoch": 0.4558075153686439, "grad_norm": 0.40221649408340454, "learning_rate": 4.9590957322911805e-05, "loss": 0.4118, "step": 2799 }, { "epoch": 0.4559703619264748, "grad_norm": 0.41890543699264526, "learning_rate": 4.959010348488155e-05, "loss": 0.4296, "step": 2800 }, { "epoch": 0.45613320848430566, "grad_norm": 0.3391473591327667, "learning_rate": 4.958924876399291e-05, "loss": 0.3773, "step": 2801 }, { "epoch": 0.45629605504213655, "grad_norm": 0.3614698052406311, "learning_rate": 4.958839316027656e-05, "loss": 0.3551, "step": 2802 }, { "epoch": 0.45645890159996744, "grad_norm": 0.39967605471611023, "learning_rate": 4.9587536673763235e-05, "loss": 0.3954, "step": 2803 }, { "epoch": 0.4566217481577983, "grad_norm": 0.39753296971321106, "learning_rate": 4.9586679304483665e-05, "loss": 0.3419, "step": 2804 }, { "epoch": 0.4567845947156292, "grad_norm": 0.436129629611969, "learning_rate": 4.9585821052468656e-05, "loss": 0.3817, "step": 2805 }, { "epoch": 0.4569474412734601, "grad_norm": 0.4805622100830078, "learning_rate": 4.9584961917749004e-05, "loss": 0.3683, "step": 2806 }, { "epoch": 0.457110287831291, "grad_norm": 0.32148995995521545, "learning_rate": 4.958410190035556e-05, "loss": 0.3397, "step": 2807 }, { "epoch": 0.45727313438912187, "grad_norm": 0.6041308045387268, "learning_rate": 4.958324100031921e-05, "loss": 0.3869, "step": 2808 }, { "epoch": 0.45743598094695276, "grad_norm": 0.530815839767456, "learning_rate": 4.958237921767085e-05, "loss": 0.422, "step": 2809 }, { "epoch": 0.45759882750478365, "grad_norm": 0.35433366894721985, "learning_rate": 4.958151655244142e-05, "loss": 0.3777, "step": 2810 }, { "epoch": 0.4577616740626145, "grad_norm": 0.3604532480239868, "learning_rate": 4.95806530046619e-05, "loss": 0.3643, "step": 2811 }, { "epoch": 0.45792452062044536, "grad_norm": 0.4213259816169739, "learning_rate": 4.9579788574363294e-05, "loss": 0.3813, "step": 2812 }, { "epoch": 0.45808736717827625, "grad_norm": 0.408260315656662, "learning_rate": 4.957892326157664e-05, "loss": 0.4246, "step": 2813 }, { "epoch": 0.45825021373610714, "grad_norm": 0.3488738536834717, "learning_rate": 4.9578057066332995e-05, "loss": 0.3881, "step": 2814 }, { "epoch": 0.458413060293938, "grad_norm": 0.39107316732406616, "learning_rate": 4.957718998866346e-05, "loss": 0.3847, "step": 2815 }, { "epoch": 0.4585759068517689, "grad_norm": 0.33039432764053345, "learning_rate": 4.9576322028599175e-05, "loss": 0.3613, "step": 2816 }, { "epoch": 0.4587387534095998, "grad_norm": 0.36130985617637634, "learning_rate": 4.9575453186171294e-05, "loss": 0.3598, "step": 2817 }, { "epoch": 0.4589015999674307, "grad_norm": 0.3606296181678772, "learning_rate": 4.957458346141102e-05, "loss": 0.3676, "step": 2818 }, { "epoch": 0.45906444652526157, "grad_norm": 0.36044564843177795, "learning_rate": 4.9573712854349566e-05, "loss": 0.3524, "step": 2819 }, { "epoch": 0.45922729308309246, "grad_norm": 0.31680935621261597, "learning_rate": 4.9572841365018196e-05, "loss": 0.3472, "step": 2820 }, { "epoch": 0.45939013964092335, "grad_norm": 0.37293580174446106, "learning_rate": 4.9571968993448194e-05, "loss": 0.4037, "step": 2821 }, { "epoch": 0.45955298619875423, "grad_norm": 0.3285520672798157, "learning_rate": 4.9571095739670894e-05, "loss": 0.3747, "step": 2822 }, { "epoch": 0.4597158327565851, "grad_norm": 0.3418324589729309, "learning_rate": 4.957022160371764e-05, "loss": 0.3902, "step": 2823 }, { "epoch": 0.459878679314416, "grad_norm": 0.35707032680511475, "learning_rate": 4.956934658561981e-05, "loss": 0.372, "step": 2824 }, { "epoch": 0.4600415258722469, "grad_norm": 0.33513399958610535, "learning_rate": 4.956847068540883e-05, "loss": 0.396, "step": 2825 }, { "epoch": 0.4602043724300778, "grad_norm": 0.3519292175769806, "learning_rate": 4.956759390311614e-05, "loss": 0.4083, "step": 2826 }, { "epoch": 0.46036721898790867, "grad_norm": 0.3632991909980774, "learning_rate": 4.9566716238773226e-05, "loss": 0.3936, "step": 2827 }, { "epoch": 0.46053006554573955, "grad_norm": 0.36139237880706787, "learning_rate": 4.956583769241159e-05, "loss": 0.3756, "step": 2828 }, { "epoch": 0.4606929121035704, "grad_norm": 0.3315095603466034, "learning_rate": 4.956495826406278e-05, "loss": 0.3641, "step": 2829 }, { "epoch": 0.46085575866140127, "grad_norm": 0.4258779287338257, "learning_rate": 4.956407795375837e-05, "loss": 0.4133, "step": 2830 }, { "epoch": 0.46101860521923216, "grad_norm": 0.34320780634880066, "learning_rate": 4.9563196761529975e-05, "loss": 0.3891, "step": 2831 }, { "epoch": 0.46118145177706305, "grad_norm": 0.35113024711608887, "learning_rate": 4.956231468740922e-05, "loss": 0.3741, "step": 2832 }, { "epoch": 0.46134429833489393, "grad_norm": 0.3472249209880829, "learning_rate": 4.9561431731427764e-05, "loss": 0.343, "step": 2833 }, { "epoch": 0.4615071448927248, "grad_norm": 0.3633914589881897, "learning_rate": 4.9560547893617336e-05, "loss": 0.3842, "step": 2834 }, { "epoch": 0.4616699914505557, "grad_norm": 0.3342939615249634, "learning_rate": 4.955966317400965e-05, "loss": 0.3521, "step": 2835 }, { "epoch": 0.4618328380083866, "grad_norm": 0.3974822759628296, "learning_rate": 4.955877757263647e-05, "loss": 0.4209, "step": 2836 }, { "epoch": 0.4619956845662175, "grad_norm": 0.36789682507514954, "learning_rate": 4.95578910895296e-05, "loss": 0.3253, "step": 2837 }, { "epoch": 0.46215853112404837, "grad_norm": 0.43826329708099365, "learning_rate": 4.955700372472085e-05, "loss": 0.3857, "step": 2838 }, { "epoch": 0.46232137768187925, "grad_norm": 0.37050822377204895, "learning_rate": 4.9556115478242106e-05, "loss": 0.3799, "step": 2839 }, { "epoch": 0.46248422423971014, "grad_norm": 0.34819409251213074, "learning_rate": 4.955522635012524e-05, "loss": 0.3458, "step": 2840 }, { "epoch": 0.462647070797541, "grad_norm": 0.42802688479423523, "learning_rate": 4.955433634040217e-05, "loss": 0.4229, "step": 2841 }, { "epoch": 0.4628099173553719, "grad_norm": 0.3219848871231079, "learning_rate": 4.955344544910486e-05, "loss": 0.3756, "step": 2842 }, { "epoch": 0.4629727639132028, "grad_norm": 0.415988951921463, "learning_rate": 4.95525536762653e-05, "loss": 0.3654, "step": 2843 }, { "epoch": 0.4631356104710337, "grad_norm": 0.3690062463283539, "learning_rate": 4.955166102191551e-05, "loss": 0.3526, "step": 2844 }, { "epoch": 0.4632984570288646, "grad_norm": 0.324305921792984, "learning_rate": 4.955076748608752e-05, "loss": 0.3758, "step": 2845 }, { "epoch": 0.46346130358669546, "grad_norm": 0.38706767559051514, "learning_rate": 4.954987306881342e-05, "loss": 0.3735, "step": 2846 }, { "epoch": 0.4636241501445263, "grad_norm": 0.4071418344974518, "learning_rate": 4.9548977770125324e-05, "loss": 0.3874, "step": 2847 }, { "epoch": 0.4637869967023572, "grad_norm": 0.33339056372642517, "learning_rate": 4.9548081590055384e-05, "loss": 0.3618, "step": 2848 }, { "epoch": 0.46394984326018807, "grad_norm": 0.34374889731407166, "learning_rate": 4.954718452863576e-05, "loss": 0.3413, "step": 2849 }, { "epoch": 0.46411268981801895, "grad_norm": 0.35441169142723083, "learning_rate": 4.954628658589866e-05, "loss": 0.342, "step": 2850 }, { "epoch": 0.46427553637584984, "grad_norm": 0.3973623514175415, "learning_rate": 4.954538776187634e-05, "loss": 0.3796, "step": 2851 }, { "epoch": 0.4644383829336807, "grad_norm": 0.36950016021728516, "learning_rate": 4.954448805660106e-05, "loss": 0.3822, "step": 2852 }, { "epoch": 0.4646012294915116, "grad_norm": 0.3959740698337555, "learning_rate": 4.954358747010511e-05, "loss": 0.3701, "step": 2853 }, { "epoch": 0.4647640760493425, "grad_norm": 0.3944811224937439, "learning_rate": 4.9542686002420845e-05, "loss": 0.3988, "step": 2854 }, { "epoch": 0.4649269226071734, "grad_norm": 0.3667997717857361, "learning_rate": 4.954178365358062e-05, "loss": 0.359, "step": 2855 }, { "epoch": 0.4650897691650043, "grad_norm": 0.448726087808609, "learning_rate": 4.954088042361683e-05, "loss": 0.4018, "step": 2856 }, { "epoch": 0.46525261572283516, "grad_norm": 0.38578200340270996, "learning_rate": 4.9539976312561904e-05, "loss": 0.387, "step": 2857 }, { "epoch": 0.46541546228066605, "grad_norm": 0.3784160017967224, "learning_rate": 4.953907132044831e-05, "loss": 0.3927, "step": 2858 }, { "epoch": 0.46557830883849693, "grad_norm": 0.3795545697212219, "learning_rate": 4.953816544730853e-05, "loss": 0.3715, "step": 2859 }, { "epoch": 0.4657411553963278, "grad_norm": 0.3678949177265167, "learning_rate": 4.9537258693175094e-05, "loss": 0.397, "step": 2860 }, { "epoch": 0.4659040019541587, "grad_norm": 0.35909926891326904, "learning_rate": 4.9536351058080556e-05, "loss": 0.3624, "step": 2861 }, { "epoch": 0.4660668485119896, "grad_norm": 0.33966195583343506, "learning_rate": 4.9535442542057496e-05, "loss": 0.3888, "step": 2862 }, { "epoch": 0.4662296950698205, "grad_norm": 0.38275280594825745, "learning_rate": 4.953453314513855e-05, "loss": 0.3526, "step": 2863 }, { "epoch": 0.46639254162765137, "grad_norm": 0.3559430241584778, "learning_rate": 4.953362286735635e-05, "loss": 0.3491, "step": 2864 }, { "epoch": 0.46655538818548226, "grad_norm": 0.35771265625953674, "learning_rate": 4.953271170874358e-05, "loss": 0.3458, "step": 2865 }, { "epoch": 0.4667182347433131, "grad_norm": 0.46412864327430725, "learning_rate": 4.9531799669332966e-05, "loss": 0.4875, "step": 2866 }, { "epoch": 0.466881081301144, "grad_norm": 0.4299660325050354, "learning_rate": 4.953088674915724e-05, "loss": 0.3855, "step": 2867 }, { "epoch": 0.46704392785897486, "grad_norm": 0.37462928891181946, "learning_rate": 4.952997294824919e-05, "loss": 0.3623, "step": 2868 }, { "epoch": 0.46720677441680575, "grad_norm": 0.5309403538703918, "learning_rate": 4.9529058266641606e-05, "loss": 0.3866, "step": 2869 }, { "epoch": 0.46736962097463663, "grad_norm": 0.4843451976776123, "learning_rate": 4.952814270436735e-05, "loss": 0.4058, "step": 2870 }, { "epoch": 0.4675324675324675, "grad_norm": 0.40149354934692383, "learning_rate": 4.952722626145928e-05, "loss": 0.3966, "step": 2871 }, { "epoch": 0.4676953140902984, "grad_norm": 0.37540683150291443, "learning_rate": 4.9526308937950295e-05, "loss": 0.3862, "step": 2872 }, { "epoch": 0.4678581606481293, "grad_norm": 0.44708532094955444, "learning_rate": 4.952539073387335e-05, "loss": 0.3932, "step": 2873 }, { "epoch": 0.4680210072059602, "grad_norm": 0.3786374628543854, "learning_rate": 4.9524471649261376e-05, "loss": 0.3311, "step": 2874 }, { "epoch": 0.46818385376379107, "grad_norm": 0.35333994030952454, "learning_rate": 4.952355168414741e-05, "loss": 0.3939, "step": 2875 }, { "epoch": 0.46834670032162196, "grad_norm": 0.37002232670783997, "learning_rate": 4.9522630838564455e-05, "loss": 0.3521, "step": 2876 }, { "epoch": 0.46850954687945284, "grad_norm": 0.4113650918006897, "learning_rate": 4.952170911254559e-05, "loss": 0.3717, "step": 2877 }, { "epoch": 0.46867239343728373, "grad_norm": 0.38558515906333923, "learning_rate": 4.9520786506123894e-05, "loss": 0.3816, "step": 2878 }, { "epoch": 0.4688352399951146, "grad_norm": 0.37715715169906616, "learning_rate": 4.951986301933249e-05, "loss": 0.3675, "step": 2879 }, { "epoch": 0.4689980865529455, "grad_norm": 0.4001818299293518, "learning_rate": 4.951893865220455e-05, "loss": 0.4309, "step": 2880 }, { "epoch": 0.4691609331107764, "grad_norm": 0.36463379859924316, "learning_rate": 4.9518013404773245e-05, "loss": 0.3486, "step": 2881 }, { "epoch": 0.4693237796686073, "grad_norm": 0.36060968041419983, "learning_rate": 4.951708727707181e-05, "loss": 0.3785, "step": 2882 }, { "epoch": 0.46948662622643816, "grad_norm": 0.3638606071472168, "learning_rate": 4.951616026913348e-05, "loss": 0.3559, "step": 2883 }, { "epoch": 0.469649472784269, "grad_norm": 0.4313969314098358, "learning_rate": 4.951523238099154e-05, "loss": 0.4281, "step": 2884 }, { "epoch": 0.4698123193420999, "grad_norm": 0.553321361541748, "learning_rate": 4.9514303612679317e-05, "loss": 0.4342, "step": 2885 }, { "epoch": 0.46997516589993077, "grad_norm": 0.615134060382843, "learning_rate": 4.951337396423014e-05, "loss": 0.3872, "step": 2886 }, { "epoch": 0.47013801245776166, "grad_norm": 0.3814436197280884, "learning_rate": 4.9512443435677395e-05, "loss": 0.3377, "step": 2887 }, { "epoch": 0.47030085901559254, "grad_norm": 0.3935967981815338, "learning_rate": 4.95115120270545e-05, "loss": 0.4011, "step": 2888 }, { "epoch": 0.47046370557342343, "grad_norm": 0.4798473119735718, "learning_rate": 4.951057973839487e-05, "loss": 0.3579, "step": 2889 }, { "epoch": 0.4706265521312543, "grad_norm": 0.38837382197380066, "learning_rate": 4.9509646569731996e-05, "loss": 0.3806, "step": 2890 }, { "epoch": 0.4707893986890852, "grad_norm": 0.4330155849456787, "learning_rate": 4.9508712521099374e-05, "loss": 0.3925, "step": 2891 }, { "epoch": 0.4709522452469161, "grad_norm": 0.4156915247440338, "learning_rate": 4.950777759253056e-05, "loss": 0.4439, "step": 2892 }, { "epoch": 0.471115091804747, "grad_norm": 0.6065873503684998, "learning_rate": 4.950684178405909e-05, "loss": 0.4163, "step": 2893 }, { "epoch": 0.47127793836257786, "grad_norm": 0.4189724922180176, "learning_rate": 4.9505905095718575e-05, "loss": 0.3946, "step": 2894 }, { "epoch": 0.47144078492040875, "grad_norm": 0.383040189743042, "learning_rate": 4.950496752754265e-05, "loss": 0.4052, "step": 2895 }, { "epoch": 0.47160363147823964, "grad_norm": 0.3810013234615326, "learning_rate": 4.950402907956496e-05, "loss": 0.3604, "step": 2896 }, { "epoch": 0.4717664780360705, "grad_norm": 0.39212536811828613, "learning_rate": 4.950308975181922e-05, "loss": 0.3721, "step": 2897 }, { "epoch": 0.4719293245939014, "grad_norm": 0.3956463932991028, "learning_rate": 4.950214954433914e-05, "loss": 0.3773, "step": 2898 }, { "epoch": 0.4720921711517323, "grad_norm": 0.42834270000457764, "learning_rate": 4.950120845715849e-05, "loss": 0.3734, "step": 2899 }, { "epoch": 0.4722550177095632, "grad_norm": 0.40253204107284546, "learning_rate": 4.950026649031104e-05, "loss": 0.4314, "step": 2900 }, { "epoch": 0.47241786426739407, "grad_norm": 0.37292060256004333, "learning_rate": 4.949932364383062e-05, "loss": 0.4008, "step": 2901 }, { "epoch": 0.47258071082522496, "grad_norm": 0.42606428265571594, "learning_rate": 4.949837991775108e-05, "loss": 0.3739, "step": 2902 }, { "epoch": 0.4727435573830558, "grad_norm": 0.4005255699157715, "learning_rate": 4.94974353121063e-05, "loss": 0.3983, "step": 2903 }, { "epoch": 0.4729064039408867, "grad_norm": 0.4341249465942383, "learning_rate": 4.9496489826930205e-05, "loss": 0.3871, "step": 2904 }, { "epoch": 0.47306925049871756, "grad_norm": 0.41940441727638245, "learning_rate": 4.949554346225672e-05, "loss": 0.4168, "step": 2905 }, { "epoch": 0.47323209705654845, "grad_norm": 0.3923583924770355, "learning_rate": 4.9494596218119835e-05, "loss": 0.3776, "step": 2906 }, { "epoch": 0.47339494361437934, "grad_norm": 0.42583543062210083, "learning_rate": 4.949364809455357e-05, "loss": 0.3659, "step": 2907 }, { "epoch": 0.4735577901722102, "grad_norm": 0.47542089223861694, "learning_rate": 4.949269909159194e-05, "loss": 0.385, "step": 2908 }, { "epoch": 0.4737206367300411, "grad_norm": 0.4196861982345581, "learning_rate": 4.949174920926903e-05, "loss": 0.372, "step": 2909 }, { "epoch": 0.473883483287872, "grad_norm": 0.3512827754020691, "learning_rate": 4.949079844761896e-05, "loss": 0.3682, "step": 2910 }, { "epoch": 0.4740463298457029, "grad_norm": 0.5331830978393555, "learning_rate": 4.948984680667584e-05, "loss": 0.4195, "step": 2911 }, { "epoch": 0.47420917640353377, "grad_norm": 0.5216928720474243, "learning_rate": 4.9488894286473844e-05, "loss": 0.4444, "step": 2912 }, { "epoch": 0.47437202296136466, "grad_norm": 0.31766965985298157, "learning_rate": 4.948794088704718e-05, "loss": 0.3304, "step": 2913 }, { "epoch": 0.47453486951919555, "grad_norm": 0.4552035927772522, "learning_rate": 4.948698660843006e-05, "loss": 0.3534, "step": 2914 }, { "epoch": 0.47469771607702643, "grad_norm": 0.3664852976799011, "learning_rate": 4.9486031450656756e-05, "loss": 0.3327, "step": 2915 }, { "epoch": 0.4748605626348573, "grad_norm": 0.48079583048820496, "learning_rate": 4.9485075413761564e-05, "loss": 0.4063, "step": 2916 }, { "epoch": 0.4750234091926882, "grad_norm": 0.44301000237464905, "learning_rate": 4.9484118497778805e-05, "loss": 0.3804, "step": 2917 }, { "epoch": 0.4751862557505191, "grad_norm": 0.38439279794692993, "learning_rate": 4.9483160702742834e-05, "loss": 0.3619, "step": 2918 }, { "epoch": 0.47534910230835, "grad_norm": 0.43863436579704285, "learning_rate": 4.9482202028688055e-05, "loss": 0.3972, "step": 2919 }, { "epoch": 0.47551194886618087, "grad_norm": 0.36370453238487244, "learning_rate": 4.9481242475648856e-05, "loss": 0.3687, "step": 2920 }, { "epoch": 0.4756747954240117, "grad_norm": 0.36940744519233704, "learning_rate": 4.948028204365971e-05, "loss": 0.3799, "step": 2921 }, { "epoch": 0.4758376419818426, "grad_norm": 0.3845975995063782, "learning_rate": 4.947932073275509e-05, "loss": 0.3775, "step": 2922 }, { "epoch": 0.47600048853967347, "grad_norm": 0.34298455715179443, "learning_rate": 4.9478358542969514e-05, "loss": 0.3654, "step": 2923 }, { "epoch": 0.47616333509750436, "grad_norm": 0.34972652792930603, "learning_rate": 4.947739547433754e-05, "loss": 0.3591, "step": 2924 }, { "epoch": 0.47632618165533525, "grad_norm": 0.38931551575660706, "learning_rate": 4.947643152689372e-05, "loss": 0.3677, "step": 2925 }, { "epoch": 0.47648902821316613, "grad_norm": 0.36398452520370483, "learning_rate": 4.947546670067267e-05, "loss": 0.3688, "step": 2926 }, { "epoch": 0.476651874770997, "grad_norm": 0.3273685574531555, "learning_rate": 4.9474500995709044e-05, "loss": 0.3553, "step": 2927 }, { "epoch": 0.4768147213288279, "grad_norm": 0.38607606291770935, "learning_rate": 4.94735344120375e-05, "loss": 0.3068, "step": 2928 }, { "epoch": 0.4769775678866588, "grad_norm": 0.49336183071136475, "learning_rate": 4.9472566949692756e-05, "loss": 0.4079, "step": 2929 }, { "epoch": 0.4771404144444897, "grad_norm": 0.3468334674835205, "learning_rate": 4.9471598608709525e-05, "loss": 0.3564, "step": 2930 }, { "epoch": 0.47730326100232057, "grad_norm": 0.5638741254806519, "learning_rate": 4.947062938912259e-05, "loss": 0.4181, "step": 2931 }, { "epoch": 0.47746610756015145, "grad_norm": 0.35242655873298645, "learning_rate": 4.946965929096674e-05, "loss": 0.3802, "step": 2932 }, { "epoch": 0.47762895411798234, "grad_norm": 0.35494324564933777, "learning_rate": 4.946868831427681e-05, "loss": 0.382, "step": 2933 }, { "epoch": 0.4777918006758132, "grad_norm": 0.34771963953971863, "learning_rate": 4.946771645908767e-05, "loss": 0.374, "step": 2934 }, { "epoch": 0.4779546472336441, "grad_norm": 0.38083600997924805, "learning_rate": 4.946674372543419e-05, "loss": 0.3903, "step": 2935 }, { "epoch": 0.478117493791475, "grad_norm": 0.33349570631980896, "learning_rate": 4.9465770113351305e-05, "loss": 0.3598, "step": 2936 }, { "epoch": 0.4782803403493059, "grad_norm": 0.33962979912757874, "learning_rate": 4.946479562287398e-05, "loss": 0.3326, "step": 2937 }, { "epoch": 0.4784431869071368, "grad_norm": 0.3940054178237915, "learning_rate": 4.946382025403718e-05, "loss": 0.3953, "step": 2938 }, { "epoch": 0.47860603346496766, "grad_norm": 0.3406343162059784, "learning_rate": 4.946284400687595e-05, "loss": 0.3855, "step": 2939 }, { "epoch": 0.4787688800227985, "grad_norm": 0.38594484329223633, "learning_rate": 4.9461866881425326e-05, "loss": 0.3894, "step": 2940 }, { "epoch": 0.4789317265806294, "grad_norm": 0.3552234470844269, "learning_rate": 4.946088887772039e-05, "loss": 0.379, "step": 2941 }, { "epoch": 0.47909457313846027, "grad_norm": 0.35464581847190857, "learning_rate": 4.9459909995796255e-05, "loss": 0.3295, "step": 2942 }, { "epoch": 0.47925741969629115, "grad_norm": 0.3487350344657898, "learning_rate": 4.9458930235688073e-05, "loss": 0.3683, "step": 2943 }, { "epoch": 0.47942026625412204, "grad_norm": 0.31223785877227783, "learning_rate": 4.9457949597431007e-05, "loss": 0.3159, "step": 2944 }, { "epoch": 0.4795831128119529, "grad_norm": 0.38947463035583496, "learning_rate": 4.945696808106027e-05, "loss": 0.3917, "step": 2945 }, { "epoch": 0.4797459593697838, "grad_norm": 0.33850210905075073, "learning_rate": 4.9455985686611115e-05, "loss": 0.3544, "step": 2946 }, { "epoch": 0.4799088059276147, "grad_norm": 0.38921356201171875, "learning_rate": 4.94550024141188e-05, "loss": 0.4093, "step": 2947 }, { "epoch": 0.4800716524854456, "grad_norm": 0.38198310136795044, "learning_rate": 4.9454018263618615e-05, "loss": 0.3711, "step": 2948 }, { "epoch": 0.4802344990432765, "grad_norm": 0.3863317370414734, "learning_rate": 4.945303323514592e-05, "loss": 0.3612, "step": 2949 }, { "epoch": 0.48039734560110736, "grad_norm": 0.33771687746047974, "learning_rate": 4.945204732873607e-05, "loss": 0.3859, "step": 2950 }, { "epoch": 0.48056019215893825, "grad_norm": 0.47323185205459595, "learning_rate": 4.945106054442445e-05, "loss": 0.3877, "step": 2951 }, { "epoch": 0.48072303871676914, "grad_norm": 0.40191739797592163, "learning_rate": 4.945007288224651e-05, "loss": 0.3612, "step": 2952 }, { "epoch": 0.4808858852746, "grad_norm": 0.42272695899009705, "learning_rate": 4.9449084342237696e-05, "loss": 0.3646, "step": 2953 }, { "epoch": 0.4810487318324309, "grad_norm": 0.4384748041629791, "learning_rate": 4.9448094924433505e-05, "loss": 0.3685, "step": 2954 }, { "epoch": 0.4812115783902618, "grad_norm": 0.35770487785339355, "learning_rate": 4.9447104628869455e-05, "loss": 0.3457, "step": 2955 }, { "epoch": 0.4813744249480927, "grad_norm": 0.356245756149292, "learning_rate": 4.94461134555811e-05, "loss": 0.3596, "step": 2956 }, { "epoch": 0.48153727150592357, "grad_norm": 0.403608113527298, "learning_rate": 4.944512140460403e-05, "loss": 0.3794, "step": 2957 }, { "epoch": 0.4817001180637544, "grad_norm": 0.37632089853286743, "learning_rate": 4.9444128475973874e-05, "loss": 0.3982, "step": 2958 }, { "epoch": 0.4818629646215853, "grad_norm": 0.3965683579444885, "learning_rate": 4.944313466972625e-05, "loss": 0.444, "step": 2959 }, { "epoch": 0.4820258111794162, "grad_norm": 0.37016236782073975, "learning_rate": 4.9442139985896876e-05, "loss": 0.3686, "step": 2960 }, { "epoch": 0.48218865773724706, "grad_norm": 0.41473186016082764, "learning_rate": 4.944114442452144e-05, "loss": 0.3774, "step": 2961 }, { "epoch": 0.48235150429507795, "grad_norm": 0.374460905790329, "learning_rate": 4.944014798563569e-05, "loss": 0.3651, "step": 2962 }, { "epoch": 0.48251435085290884, "grad_norm": 0.3397146165370941, "learning_rate": 4.9439150669275395e-05, "loss": 0.3578, "step": 2963 }, { "epoch": 0.4826771974107397, "grad_norm": 0.36932650208473206, "learning_rate": 4.943815247547638e-05, "loss": 0.3845, "step": 2964 }, { "epoch": 0.4828400439685706, "grad_norm": 0.43630510568618774, "learning_rate": 4.9437153404274467e-05, "loss": 0.3986, "step": 2965 }, { "epoch": 0.4830028905264015, "grad_norm": 0.34448501467704773, "learning_rate": 4.943615345570554e-05, "loss": 0.3667, "step": 2966 }, { "epoch": 0.4831657370842324, "grad_norm": 0.417606383562088, "learning_rate": 4.943515262980548e-05, "loss": 0.4067, "step": 2967 }, { "epoch": 0.48332858364206327, "grad_norm": 0.4238738417625427, "learning_rate": 4.9434150926610234e-05, "loss": 0.421, "step": 2968 }, { "epoch": 0.48349143019989416, "grad_norm": 0.36702245473861694, "learning_rate": 4.9433148346155766e-05, "loss": 0.4036, "step": 2969 }, { "epoch": 0.48365427675772504, "grad_norm": 0.3583706319332123, "learning_rate": 4.943214488847806e-05, "loss": 0.3779, "step": 2970 }, { "epoch": 0.48381712331555593, "grad_norm": 0.36328113079071045, "learning_rate": 4.943114055361316e-05, "loss": 0.3749, "step": 2971 }, { "epoch": 0.4839799698733868, "grad_norm": 0.40897125005722046, "learning_rate": 4.9430135341597114e-05, "loss": 0.4126, "step": 2972 }, { "epoch": 0.4841428164312177, "grad_norm": 0.3510180115699768, "learning_rate": 4.942912925246602e-05, "loss": 0.422, "step": 2973 }, { "epoch": 0.4843056629890486, "grad_norm": 0.43638065457344055, "learning_rate": 4.942812228625599e-05, "loss": 0.4056, "step": 2974 }, { "epoch": 0.4844685095468795, "grad_norm": 0.3534383177757263, "learning_rate": 4.942711444300318e-05, "loss": 0.3151, "step": 2975 }, { "epoch": 0.4846313561047103, "grad_norm": 0.3560332655906677, "learning_rate": 4.9426105722743776e-05, "loss": 0.3947, "step": 2976 }, { "epoch": 0.4847942026625412, "grad_norm": 0.4078187048435211, "learning_rate": 4.9425096125514e-05, "loss": 0.3948, "step": 2977 }, { "epoch": 0.4849570492203721, "grad_norm": 0.3220694959163666, "learning_rate": 4.942408565135008e-05, "loss": 0.3687, "step": 2978 }, { "epoch": 0.48511989577820297, "grad_norm": 0.40716448426246643, "learning_rate": 4.9423074300288325e-05, "loss": 0.3896, "step": 2979 }, { "epoch": 0.48528274233603386, "grad_norm": 0.39669719338417053, "learning_rate": 4.942206207236502e-05, "loss": 0.3585, "step": 2980 }, { "epoch": 0.48544558889386474, "grad_norm": 0.3754524290561676, "learning_rate": 4.9421048967616515e-05, "loss": 0.3894, "step": 2981 }, { "epoch": 0.48560843545169563, "grad_norm": 0.36976850032806396, "learning_rate": 4.942003498607919e-05, "loss": 0.3646, "step": 2982 }, { "epoch": 0.4857712820095265, "grad_norm": 0.35688042640686035, "learning_rate": 4.941902012778944e-05, "loss": 0.3527, "step": 2983 }, { "epoch": 0.4859341285673574, "grad_norm": 0.4024256467819214, "learning_rate": 4.941800439278371e-05, "loss": 0.4064, "step": 2984 }, { "epoch": 0.4860969751251883, "grad_norm": 0.2948455512523651, "learning_rate": 4.941698778109846e-05, "loss": 0.3636, "step": 2985 }, { "epoch": 0.4862598216830192, "grad_norm": 0.34977954626083374, "learning_rate": 4.9415970292770196e-05, "loss": 0.3732, "step": 2986 }, { "epoch": 0.48642266824085006, "grad_norm": 0.3586455285549164, "learning_rate": 4.941495192783545e-05, "loss": 0.3396, "step": 2987 }, { "epoch": 0.48658551479868095, "grad_norm": 0.36790603399276733, "learning_rate": 4.941393268633078e-05, "loss": 0.3722, "step": 2988 }, { "epoch": 0.48674836135651184, "grad_norm": 0.30753055214881897, "learning_rate": 4.941291256829277e-05, "loss": 0.3279, "step": 2989 }, { "epoch": 0.4869112079143427, "grad_norm": 0.38159623742103577, "learning_rate": 4.9411891573758064e-05, "loss": 0.4096, "step": 2990 }, { "epoch": 0.4870740544721736, "grad_norm": 0.37808558344841003, "learning_rate": 4.941086970276331e-05, "loss": 0.3852, "step": 2991 }, { "epoch": 0.4872369010300045, "grad_norm": 0.35667943954467773, "learning_rate": 4.940984695534519e-05, "loss": 0.3416, "step": 2992 }, { "epoch": 0.4873997475878354, "grad_norm": 0.3660946786403656, "learning_rate": 4.940882333154044e-05, "loss": 0.3751, "step": 2993 }, { "epoch": 0.4875625941456663, "grad_norm": 0.3317566514015198, "learning_rate": 4.94077988313858e-05, "loss": 0.3907, "step": 2994 }, { "epoch": 0.4877254407034971, "grad_norm": 0.36166056990623474, "learning_rate": 4.9406773454918054e-05, "loss": 0.3848, "step": 2995 }, { "epoch": 0.487888287261328, "grad_norm": 0.36606019735336304, "learning_rate": 4.9405747202174014e-05, "loss": 0.4011, "step": 2996 }, { "epoch": 0.4880511338191589, "grad_norm": 0.3377014994621277, "learning_rate": 4.9404720073190524e-05, "loss": 0.4183, "step": 2997 }, { "epoch": 0.48821398037698976, "grad_norm": 0.43745455145835876, "learning_rate": 4.940369206800447e-05, "loss": 0.3944, "step": 2998 }, { "epoch": 0.48837682693482065, "grad_norm": 0.35562658309936523, "learning_rate": 4.9402663186652756e-05, "loss": 0.3761, "step": 2999 }, { "epoch": 0.48853967349265154, "grad_norm": 0.37506306171417236, "learning_rate": 4.9401633429172324e-05, "loss": 0.3755, "step": 3000 }, { "epoch": 0.4887025200504824, "grad_norm": 0.4441492259502411, "learning_rate": 4.940060279560014e-05, "loss": 0.3984, "step": 3001 }, { "epoch": 0.4888653666083133, "grad_norm": 0.421421080827713, "learning_rate": 4.93995712859732e-05, "loss": 0.3476, "step": 3002 }, { "epoch": 0.4890282131661442, "grad_norm": 0.48758795857429504, "learning_rate": 4.939853890032856e-05, "loss": 0.3731, "step": 3003 }, { "epoch": 0.4891910597239751, "grad_norm": 0.35751497745513916, "learning_rate": 4.9397505638703266e-05, "loss": 0.3585, "step": 3004 }, { "epoch": 0.489353906281806, "grad_norm": 0.39283668994903564, "learning_rate": 4.939647150113443e-05, "loss": 0.3637, "step": 3005 }, { "epoch": 0.48951675283963686, "grad_norm": 0.47440141439437866, "learning_rate": 4.9395436487659165e-05, "loss": 0.4196, "step": 3006 }, { "epoch": 0.48967959939746775, "grad_norm": 0.3948454260826111, "learning_rate": 4.939440059831465e-05, "loss": 0.4005, "step": 3007 }, { "epoch": 0.48984244595529863, "grad_norm": 0.3543155789375305, "learning_rate": 4.939336383313805e-05, "loss": 0.3668, "step": 3008 }, { "epoch": 0.4900052925131295, "grad_norm": 0.46193718910217285, "learning_rate": 4.939232619216662e-05, "loss": 0.3314, "step": 3009 }, { "epoch": 0.4901681390709604, "grad_norm": 0.4164225459098816, "learning_rate": 4.939128767543759e-05, "loss": 0.4189, "step": 3010 }, { "epoch": 0.4903309856287913, "grad_norm": 0.3057769238948822, "learning_rate": 4.939024828298826e-05, "loss": 0.3413, "step": 3011 }, { "epoch": 0.4904938321866222, "grad_norm": 0.4223981201648712, "learning_rate": 4.9389208014855935e-05, "loss": 0.3882, "step": 3012 }, { "epoch": 0.490656678744453, "grad_norm": 0.3551565110683441, "learning_rate": 4.938816687107797e-05, "loss": 0.365, "step": 3013 }, { "epoch": 0.4908195253022839, "grad_norm": 0.4241223633289337, "learning_rate": 4.938712485169175e-05, "loss": 0.3469, "step": 3014 }, { "epoch": 0.4909823718601148, "grad_norm": 0.35579997301101685, "learning_rate": 4.9386081956734684e-05, "loss": 0.3377, "step": 3015 }, { "epoch": 0.4911452184179457, "grad_norm": 0.4033258259296417, "learning_rate": 4.93850381862442e-05, "loss": 0.4193, "step": 3016 }, { "epoch": 0.49130806497577656, "grad_norm": 0.44121792912483215, "learning_rate": 4.93839935402578e-05, "loss": 0.3653, "step": 3017 }, { "epoch": 0.49147091153360745, "grad_norm": 0.4379419982433319, "learning_rate": 4.938294801881297e-05, "loss": 0.3682, "step": 3018 }, { "epoch": 0.49163375809143833, "grad_norm": 0.3389052450656891, "learning_rate": 4.9381901621947255e-05, "loss": 0.3773, "step": 3019 }, { "epoch": 0.4917966046492692, "grad_norm": 0.4649069011211395, "learning_rate": 4.9380854349698224e-05, "loss": 0.4111, "step": 3020 }, { "epoch": 0.4919594512071001, "grad_norm": 0.44026002287864685, "learning_rate": 4.9379806202103465e-05, "loss": 0.363, "step": 3021 }, { "epoch": 0.492122297764931, "grad_norm": 0.38638147711753845, "learning_rate": 4.937875717920063e-05, "loss": 0.3917, "step": 3022 }, { "epoch": 0.4922851443227619, "grad_norm": 0.40857383608818054, "learning_rate": 4.9377707281027364e-05, "loss": 0.3305, "step": 3023 }, { "epoch": 0.49244799088059277, "grad_norm": 0.4658457636833191, "learning_rate": 4.9376656507621374e-05, "loss": 0.3905, "step": 3024 }, { "epoch": 0.49261083743842365, "grad_norm": 0.36272284388542175, "learning_rate": 4.937560485902038e-05, "loss": 0.3654, "step": 3025 }, { "epoch": 0.49277368399625454, "grad_norm": 0.38698673248291016, "learning_rate": 4.9374552335262134e-05, "loss": 0.3517, "step": 3026 }, { "epoch": 0.49293653055408543, "grad_norm": 0.3794856369495392, "learning_rate": 4.9373498936384446e-05, "loss": 0.3673, "step": 3027 }, { "epoch": 0.4930993771119163, "grad_norm": 0.4001924693584442, "learning_rate": 4.937244466242511e-05, "loss": 0.3995, "step": 3028 }, { "epoch": 0.4932622236697472, "grad_norm": 0.3418528735637665, "learning_rate": 4.937138951342198e-05, "loss": 0.3559, "step": 3029 }, { "epoch": 0.4934250702275781, "grad_norm": 0.33751022815704346, "learning_rate": 4.937033348941297e-05, "loss": 0.3629, "step": 3030 }, { "epoch": 0.493587916785409, "grad_norm": 0.4041525423526764, "learning_rate": 4.9369276590435954e-05, "loss": 0.3824, "step": 3031 }, { "epoch": 0.4937507633432398, "grad_norm": 0.374264657497406, "learning_rate": 4.936821881652891e-05, "loss": 0.395, "step": 3032 }, { "epoch": 0.4939136099010707, "grad_norm": 0.37838906049728394, "learning_rate": 4.9367160167729784e-05, "loss": 0.3637, "step": 3033 }, { "epoch": 0.4940764564589016, "grad_norm": 0.4027474522590637, "learning_rate": 4.936610064407662e-05, "loss": 0.377, "step": 3034 }, { "epoch": 0.49423930301673247, "grad_norm": 0.30591440200805664, "learning_rate": 4.936504024560743e-05, "loss": 0.3248, "step": 3035 }, { "epoch": 0.49440214957456335, "grad_norm": 0.3760291039943695, "learning_rate": 4.93639789723603e-05, "loss": 0.3498, "step": 3036 }, { "epoch": 0.49456499613239424, "grad_norm": 0.36015433073043823, "learning_rate": 4.9362916824373324e-05, "loss": 0.3528, "step": 3037 }, { "epoch": 0.49472784269022513, "grad_norm": 0.3994084894657135, "learning_rate": 4.9361853801684644e-05, "loss": 0.3928, "step": 3038 }, { "epoch": 0.494890689248056, "grad_norm": 0.3812892735004425, "learning_rate": 4.9360789904332415e-05, "loss": 0.3457, "step": 3039 }, { "epoch": 0.4950535358058869, "grad_norm": 0.3888704478740692, "learning_rate": 4.935972513235485e-05, "loss": 0.4165, "step": 3040 }, { "epoch": 0.4952163823637178, "grad_norm": 0.358725368976593, "learning_rate": 4.935865948579017e-05, "loss": 0.3808, "step": 3041 }, { "epoch": 0.4953792289215487, "grad_norm": 0.33564937114715576, "learning_rate": 4.9357592964676636e-05, "loss": 0.4041, "step": 3042 }, { "epoch": 0.49554207547937956, "grad_norm": 0.36632829904556274, "learning_rate": 4.9356525569052525e-05, "loss": 0.3721, "step": 3043 }, { "epoch": 0.49570492203721045, "grad_norm": 0.3406003713607788, "learning_rate": 4.935545729895618e-05, "loss": 0.4126, "step": 3044 }, { "epoch": 0.49586776859504134, "grad_norm": 0.39901208877563477, "learning_rate": 4.935438815442595e-05, "loss": 0.3812, "step": 3045 }, { "epoch": 0.4960306151528722, "grad_norm": 0.3197694718837738, "learning_rate": 4.9353318135500216e-05, "loss": 0.305, "step": 3046 }, { "epoch": 0.4961934617107031, "grad_norm": 0.3257903456687927, "learning_rate": 4.935224724221739e-05, "loss": 0.3827, "step": 3047 }, { "epoch": 0.496356308268534, "grad_norm": 0.38932570815086365, "learning_rate": 4.935117547461593e-05, "loss": 0.382, "step": 3048 }, { "epoch": 0.4965191548263649, "grad_norm": 0.34175318479537964, "learning_rate": 4.935010283273431e-05, "loss": 0.4132, "step": 3049 }, { "epoch": 0.4966820013841957, "grad_norm": 0.3789369761943817, "learning_rate": 4.9349029316611053e-05, "loss": 0.3731, "step": 3050 }, { "epoch": 0.4968448479420266, "grad_norm": 0.4309804141521454, "learning_rate": 4.934795492628469e-05, "loss": 0.3625, "step": 3051 }, { "epoch": 0.4970076944998575, "grad_norm": 0.42909422516822815, "learning_rate": 4.934687966179379e-05, "loss": 0.402, "step": 3052 }, { "epoch": 0.4971705410576884, "grad_norm": 0.3168916404247284, "learning_rate": 4.934580352317697e-05, "loss": 0.3484, "step": 3053 }, { "epoch": 0.49733338761551926, "grad_norm": 0.4253378212451935, "learning_rate": 4.9344726510472855e-05, "loss": 0.4025, "step": 3054 }, { "epoch": 0.49749623417335015, "grad_norm": 0.4019067883491516, "learning_rate": 4.934364862372012e-05, "loss": 0.3543, "step": 3055 }, { "epoch": 0.49765908073118104, "grad_norm": 0.38006266951560974, "learning_rate": 4.934256986295747e-05, "loss": 0.3462, "step": 3056 }, { "epoch": 0.4978219272890119, "grad_norm": 0.46254482865333557, "learning_rate": 4.934149022822363e-05, "loss": 0.4153, "step": 3057 }, { "epoch": 0.4979847738468428, "grad_norm": 0.3846089541912079, "learning_rate": 4.934040971955736e-05, "loss": 0.4048, "step": 3058 }, { "epoch": 0.4981476204046737, "grad_norm": 0.4388302266597748, "learning_rate": 4.9339328336997446e-05, "loss": 0.384, "step": 3059 }, { "epoch": 0.4983104669625046, "grad_norm": 0.3833116590976715, "learning_rate": 4.933824608058274e-05, "loss": 0.3994, "step": 3060 }, { "epoch": 0.49847331352033547, "grad_norm": 0.33080387115478516, "learning_rate": 4.933716295035207e-05, "loss": 0.3539, "step": 3061 }, { "epoch": 0.49863616007816636, "grad_norm": 0.32003337144851685, "learning_rate": 4.933607894634433e-05, "loss": 0.3763, "step": 3062 }, { "epoch": 0.49879900663599724, "grad_norm": 0.42696744203567505, "learning_rate": 4.933499406859845e-05, "loss": 0.3645, "step": 3063 }, { "epoch": 0.49896185319382813, "grad_norm": 0.40873682498931885, "learning_rate": 4.933390831715337e-05, "loss": 0.3527, "step": 3064 }, { "epoch": 0.499124699751659, "grad_norm": 0.4042898714542389, "learning_rate": 4.933282169204808e-05, "loss": 0.4178, "step": 3065 }, { "epoch": 0.4992875463094899, "grad_norm": 0.3312772810459137, "learning_rate": 4.9331734193321576e-05, "loss": 0.3916, "step": 3066 }, { "epoch": 0.4994503928673208, "grad_norm": 0.44795671105384827, "learning_rate": 4.933064582101293e-05, "loss": 0.3947, "step": 3067 }, { "epoch": 0.4996132394251517, "grad_norm": 0.36494573950767517, "learning_rate": 4.93295565751612e-05, "loss": 0.3552, "step": 3068 }, { "epoch": 0.4997760859829825, "grad_norm": 0.3673647940158844, "learning_rate": 4.9328466455805486e-05, "loss": 0.3823, "step": 3069 }, { "epoch": 0.4999389325408134, "grad_norm": 0.3317049741744995, "learning_rate": 4.932737546298494e-05, "loss": 0.3485, "step": 3070 }, { "epoch": 0.5001017790986443, "grad_norm": 0.3728806972503662, "learning_rate": 4.932628359673873e-05, "loss": 0.3867, "step": 3071 }, { "epoch": 0.5002646256564752, "grad_norm": 0.3762468695640564, "learning_rate": 4.932519085710605e-05, "loss": 0.3631, "step": 3072 }, { "epoch": 0.5004274722143061, "grad_norm": 0.4160269796848297, "learning_rate": 4.932409724412615e-05, "loss": 0.3766, "step": 3073 }, { "epoch": 0.500590318772137, "grad_norm": 0.3624891936779022, "learning_rate": 4.932300275783827e-05, "loss": 0.3768, "step": 3074 }, { "epoch": 0.5007531653299678, "grad_norm": 0.3669903874397278, "learning_rate": 4.932190739828172e-05, "loss": 0.3945, "step": 3075 }, { "epoch": 0.5009160118877988, "grad_norm": 0.32220521569252014, "learning_rate": 4.9320811165495825e-05, "loss": 0.3572, "step": 3076 }, { "epoch": 0.5010788584456296, "grad_norm": 0.37896957993507385, "learning_rate": 4.931971405951995e-05, "loss": 0.4188, "step": 3077 }, { "epoch": 0.5012417050034604, "grad_norm": 0.3583478629589081, "learning_rate": 4.9318616080393465e-05, "loss": 0.3561, "step": 3078 }, { "epoch": 0.5014045515612914, "grad_norm": 0.3453187644481659, "learning_rate": 4.931751722815581e-05, "loss": 0.3799, "step": 3079 }, { "epoch": 0.5015673981191222, "grad_norm": 0.3321470618247986, "learning_rate": 4.931641750284642e-05, "loss": 0.3719, "step": 3080 }, { "epoch": 0.5017302446769532, "grad_norm": 0.36725446581840515, "learning_rate": 4.93153169045048e-05, "loss": 0.4008, "step": 3081 }, { "epoch": 0.501893091234784, "grad_norm": 0.3832588195800781, "learning_rate": 4.9314215433170444e-05, "loss": 0.3992, "step": 3082 }, { "epoch": 0.5020559377926149, "grad_norm": 0.37611863017082214, "learning_rate": 4.931311308888291e-05, "loss": 0.4054, "step": 3083 }, { "epoch": 0.5022187843504458, "grad_norm": 0.3026279807090759, "learning_rate": 4.931200987168177e-05, "loss": 0.3714, "step": 3084 }, { "epoch": 0.5023816309082767, "grad_norm": 0.4449187219142914, "learning_rate": 4.9310905781606634e-05, "loss": 0.4205, "step": 3085 }, { "epoch": 0.5025444774661075, "grad_norm": 0.36707809567451477, "learning_rate": 4.930980081869715e-05, "loss": 0.3977, "step": 3086 }, { "epoch": 0.5027073240239385, "grad_norm": 0.3729420006275177, "learning_rate": 4.930869498299298e-05, "loss": 0.392, "step": 3087 }, { "epoch": 0.5028701705817693, "grad_norm": 0.559563934803009, "learning_rate": 4.930758827453382e-05, "loss": 0.3886, "step": 3088 }, { "epoch": 0.5030330171396002, "grad_norm": 0.3643656373023987, "learning_rate": 4.9306480693359424e-05, "loss": 0.3718, "step": 3089 }, { "epoch": 0.5031958636974311, "grad_norm": 0.3427000939846039, "learning_rate": 4.930537223950954e-05, "loss": 0.3586, "step": 3090 }, { "epoch": 0.503358710255262, "grad_norm": 0.2920103371143341, "learning_rate": 4.930426291302398e-05, "loss": 0.335, "step": 3091 }, { "epoch": 0.5035215568130929, "grad_norm": 0.2956799566745758, "learning_rate": 4.930315271394256e-05, "loss": 0.3395, "step": 3092 }, { "epoch": 0.5036844033709238, "grad_norm": 0.4073962867259979, "learning_rate": 4.930204164230514e-05, "loss": 0.3765, "step": 3093 }, { "epoch": 0.5038472499287546, "grad_norm": 0.4028083086013794, "learning_rate": 4.930092969815161e-05, "loss": 0.3676, "step": 3094 }, { "epoch": 0.5040100964865856, "grad_norm": 0.31399956345558167, "learning_rate": 4.929981688152191e-05, "loss": 0.3492, "step": 3095 }, { "epoch": 0.5041729430444164, "grad_norm": 0.405129998922348, "learning_rate": 4.929870319245597e-05, "loss": 0.4447, "step": 3096 }, { "epoch": 0.5043357896022472, "grad_norm": 0.3562757968902588, "learning_rate": 4.9297588630993787e-05, "loss": 0.4271, "step": 3097 }, { "epoch": 0.5044986361600782, "grad_norm": 0.3130459189414978, "learning_rate": 4.9296473197175376e-05, "loss": 0.3484, "step": 3098 }, { "epoch": 0.504661482717909, "grad_norm": 0.34732159972190857, "learning_rate": 4.9295356891040776e-05, "loss": 0.372, "step": 3099 }, { "epoch": 0.50482432927574, "grad_norm": 0.3927983343601227, "learning_rate": 4.929423971263009e-05, "loss": 0.409, "step": 3100 }, { "epoch": 0.5049871758335708, "grad_norm": 0.38489198684692383, "learning_rate": 4.9293121661983396e-05, "loss": 0.4286, "step": 3101 }, { "epoch": 0.5051500223914017, "grad_norm": 0.37603017687797546, "learning_rate": 4.929200273914085e-05, "loss": 0.3938, "step": 3102 }, { "epoch": 0.5053128689492326, "grad_norm": 0.32992950081825256, "learning_rate": 4.929088294414263e-05, "loss": 0.3551, "step": 3103 }, { "epoch": 0.5054757155070635, "grad_norm": 0.36826425790786743, "learning_rate": 4.9289762277028935e-05, "loss": 0.3501, "step": 3104 }, { "epoch": 0.5056385620648943, "grad_norm": 0.34262144565582275, "learning_rate": 4.9288640737839996e-05, "loss": 0.4143, "step": 3105 }, { "epoch": 0.5058014086227253, "grad_norm": 0.31711024045944214, "learning_rate": 4.928751832661609e-05, "loss": 0.3999, "step": 3106 }, { "epoch": 0.5059642551805561, "grad_norm": 0.3287356197834015, "learning_rate": 4.9286395043397505e-05, "loss": 0.3567, "step": 3107 }, { "epoch": 0.506127101738387, "grad_norm": 0.3846053183078766, "learning_rate": 4.928527088822457e-05, "loss": 0.3862, "step": 3108 }, { "epoch": 0.5062899482962179, "grad_norm": 0.36871016025543213, "learning_rate": 4.928414586113765e-05, "loss": 0.3776, "step": 3109 }, { "epoch": 0.5064527948540488, "grad_norm": 0.3769119381904602, "learning_rate": 4.928301996217714e-05, "loss": 0.4171, "step": 3110 }, { "epoch": 0.5066156414118796, "grad_norm": 0.3330841660499573, "learning_rate": 4.928189319138346e-05, "loss": 0.3481, "step": 3111 }, { "epoch": 0.5067784879697106, "grad_norm": 0.2905539274215698, "learning_rate": 4.928076554879707e-05, "loss": 0.3709, "step": 3112 }, { "epoch": 0.5069413345275414, "grad_norm": 0.3529425859451294, "learning_rate": 4.9279637034458435e-05, "loss": 0.3342, "step": 3113 }, { "epoch": 0.5071041810853724, "grad_norm": 0.3267395794391632, "learning_rate": 4.9278507648408104e-05, "loss": 0.3764, "step": 3114 }, { "epoch": 0.5072670276432032, "grad_norm": 0.34186649322509766, "learning_rate": 4.9277377390686595e-05, "loss": 0.3463, "step": 3115 }, { "epoch": 0.507429874201034, "grad_norm": 0.4136315882205963, "learning_rate": 4.92762462613345e-05, "loss": 0.4301, "step": 3116 }, { "epoch": 0.507592720758865, "grad_norm": 0.3741323947906494, "learning_rate": 4.927511426039244e-05, "loss": 0.3541, "step": 3117 }, { "epoch": 0.5077555673166958, "grad_norm": 0.3597993552684784, "learning_rate": 4.9273981387901046e-05, "loss": 0.4101, "step": 3118 }, { "epoch": 0.5079184138745267, "grad_norm": 0.36060404777526855, "learning_rate": 4.927284764390099e-05, "loss": 0.3176, "step": 3119 }, { "epoch": 0.5080812604323576, "grad_norm": 0.36868175864219666, "learning_rate": 4.927171302843298e-05, "loss": 0.345, "step": 3120 }, { "epoch": 0.5082441069901885, "grad_norm": 0.4902564287185669, "learning_rate": 4.9270577541537754e-05, "loss": 0.3917, "step": 3121 }, { "epoch": 0.5084069535480193, "grad_norm": 0.4223705530166626, "learning_rate": 4.926944118325607e-05, "loss": 0.3668, "step": 3122 }, { "epoch": 0.5085698001058503, "grad_norm": 0.39098259806632996, "learning_rate": 4.926830395362875e-05, "loss": 0.3863, "step": 3123 }, { "epoch": 0.5087326466636811, "grad_norm": 0.482052206993103, "learning_rate": 4.9267165852696595e-05, "loss": 0.3654, "step": 3124 }, { "epoch": 0.5088954932215121, "grad_norm": 0.444757878780365, "learning_rate": 4.926602688050048e-05, "loss": 0.3631, "step": 3125 }, { "epoch": 0.5090583397793429, "grad_norm": 0.47095930576324463, "learning_rate": 4.92648870370813e-05, "loss": 0.3588, "step": 3126 }, { "epoch": 0.5092211863371738, "grad_norm": 0.46841686964035034, "learning_rate": 4.926374632247998e-05, "loss": 0.3616, "step": 3127 }, { "epoch": 0.5093840328950047, "grad_norm": 0.4825991690158844, "learning_rate": 4.926260473673747e-05, "loss": 0.3822, "step": 3128 }, { "epoch": 0.5095468794528356, "grad_norm": 0.42560872435569763, "learning_rate": 4.926146227989475e-05, "loss": 0.3538, "step": 3129 }, { "epoch": 0.5097097260106664, "grad_norm": 0.361091285943985, "learning_rate": 4.926031895199285e-05, "loss": 0.368, "step": 3130 }, { "epoch": 0.5098725725684974, "grad_norm": 0.4612383544445038, "learning_rate": 4.925917475307281e-05, "loss": 0.3817, "step": 3131 }, { "epoch": 0.5100354191263282, "grad_norm": 0.5061969757080078, "learning_rate": 4.925802968317572e-05, "loss": 0.4138, "step": 3132 }, { "epoch": 0.510198265684159, "grad_norm": 0.3993317186832428, "learning_rate": 4.925688374234268e-05, "loss": 0.3618, "step": 3133 }, { "epoch": 0.51036111224199, "grad_norm": 0.3544561564922333, "learning_rate": 4.925573693061485e-05, "loss": 0.3656, "step": 3134 }, { "epoch": 0.5105239587998208, "grad_norm": 0.38060152530670166, "learning_rate": 4.9254589248033376e-05, "loss": 0.3286, "step": 3135 }, { "epoch": 0.5106868053576518, "grad_norm": 0.3993256390094757, "learning_rate": 4.925344069463948e-05, "loss": 0.3506, "step": 3136 }, { "epoch": 0.5108496519154826, "grad_norm": 0.44477757811546326, "learning_rate": 4.9252291270474416e-05, "loss": 0.3761, "step": 3137 }, { "epoch": 0.5110124984733135, "grad_norm": 0.3369402289390564, "learning_rate": 4.9251140975579416e-05, "loss": 0.373, "step": 3138 }, { "epoch": 0.5111753450311444, "grad_norm": 0.31486985087394714, "learning_rate": 4.92499898099958e-05, "loss": 0.3802, "step": 3139 }, { "epoch": 0.5113381915889753, "grad_norm": 0.3584831953048706, "learning_rate": 4.92488377737649e-05, "loss": 0.3923, "step": 3140 }, { "epoch": 0.5115010381468061, "grad_norm": 0.44633251428604126, "learning_rate": 4.924768486692807e-05, "loss": 0.3722, "step": 3141 }, { "epoch": 0.5116638847046371, "grad_norm": 0.34109610319137573, "learning_rate": 4.92465310895267e-05, "loss": 0.3521, "step": 3142 }, { "epoch": 0.5118267312624679, "grad_norm": 0.3252423107624054, "learning_rate": 4.924537644160223e-05, "loss": 0.3332, "step": 3143 }, { "epoch": 0.5119895778202989, "grad_norm": 0.3307587504386902, "learning_rate": 4.92442209231961e-05, "loss": 0.346, "step": 3144 }, { "epoch": 0.5121524243781297, "grad_norm": 0.44064322113990784, "learning_rate": 4.9243064534349804e-05, "loss": 0.3882, "step": 3145 }, { "epoch": 0.5123152709359606, "grad_norm": 0.3454102873802185, "learning_rate": 4.924190727510485e-05, "loss": 0.379, "step": 3146 }, { "epoch": 0.5124781174937915, "grad_norm": 0.3909529745578766, "learning_rate": 4.9240749145502796e-05, "loss": 0.3778, "step": 3147 }, { "epoch": 0.5126409640516224, "grad_norm": 0.3932008743286133, "learning_rate": 4.9239590145585226e-05, "loss": 0.4057, "step": 3148 }, { "epoch": 0.5128038106094532, "grad_norm": 0.3296937644481659, "learning_rate": 4.9238430275393737e-05, "loss": 0.3679, "step": 3149 }, { "epoch": 0.5129666571672842, "grad_norm": 0.36282041668891907, "learning_rate": 4.923726953496999e-05, "loss": 0.3764, "step": 3150 }, { "epoch": 0.513129503725115, "grad_norm": 0.37183764576911926, "learning_rate": 4.9236107924355644e-05, "loss": 0.3812, "step": 3151 }, { "epoch": 0.5132923502829458, "grad_norm": 0.354964941740036, "learning_rate": 4.923494544359241e-05, "loss": 0.3708, "step": 3152 }, { "epoch": 0.5134551968407768, "grad_norm": 0.3338194787502289, "learning_rate": 4.923378209272203e-05, "loss": 0.3661, "step": 3153 }, { "epoch": 0.5136180433986076, "grad_norm": 0.32598716020584106, "learning_rate": 4.923261787178626e-05, "loss": 0.406, "step": 3154 }, { "epoch": 0.5137808899564386, "grad_norm": 0.3553811311721802, "learning_rate": 4.923145278082691e-05, "loss": 0.3982, "step": 3155 }, { "epoch": 0.5139437365142694, "grad_norm": 0.30437371134757996, "learning_rate": 4.923028681988581e-05, "loss": 0.3961, "step": 3156 }, { "epoch": 0.5141065830721003, "grad_norm": 0.31667324900627136, "learning_rate": 4.92291199890048e-05, "loss": 0.3624, "step": 3157 }, { "epoch": 0.5142694296299312, "grad_norm": 0.3296147584915161, "learning_rate": 4.922795228822581e-05, "loss": 0.31, "step": 3158 }, { "epoch": 0.5144322761877621, "grad_norm": 0.3790818154811859, "learning_rate": 4.922678371759073e-05, "loss": 0.3862, "step": 3159 }, { "epoch": 0.5145951227455929, "grad_norm": 0.342041552066803, "learning_rate": 4.922561427714154e-05, "loss": 0.3715, "step": 3160 }, { "epoch": 0.5147579693034239, "grad_norm": 0.34700822830200195, "learning_rate": 4.922444396692021e-05, "loss": 0.3595, "step": 3161 }, { "epoch": 0.5149208158612547, "grad_norm": 0.36614760756492615, "learning_rate": 4.922327278696876e-05, "loss": 0.3704, "step": 3162 }, { "epoch": 0.5150836624190857, "grad_norm": 0.3813156485557556, "learning_rate": 4.922210073732925e-05, "loss": 0.3731, "step": 3163 }, { "epoch": 0.5152465089769165, "grad_norm": 0.3756825923919678, "learning_rate": 4.922092781804374e-05, "loss": 0.3782, "step": 3164 }, { "epoch": 0.5154093555347474, "grad_norm": 0.3473169803619385, "learning_rate": 4.9219754029154366e-05, "loss": 0.3562, "step": 3165 }, { "epoch": 0.5155722020925783, "grad_norm": 0.3640451431274414, "learning_rate": 4.921857937070326e-05, "loss": 0.3686, "step": 3166 }, { "epoch": 0.5157350486504092, "grad_norm": 0.30770811438560486, "learning_rate": 4.921740384273258e-05, "loss": 0.3342, "step": 3167 }, { "epoch": 0.51589789520824, "grad_norm": 0.331573486328125, "learning_rate": 4.9216227445284555e-05, "loss": 0.3357, "step": 3168 }, { "epoch": 0.516060741766071, "grad_norm": 0.3508707880973816, "learning_rate": 4.921505017840141e-05, "loss": 0.3603, "step": 3169 }, { "epoch": 0.5162235883239018, "grad_norm": 0.3526172339916229, "learning_rate": 4.921387204212541e-05, "loss": 0.4131, "step": 3170 }, { "epoch": 0.5163864348817326, "grad_norm": 0.3149321377277374, "learning_rate": 4.921269303649886e-05, "loss": 0.3155, "step": 3171 }, { "epoch": 0.5165492814395636, "grad_norm": 0.32850977778434753, "learning_rate": 4.9211513161564084e-05, "loss": 0.3624, "step": 3172 }, { "epoch": 0.5167121279973944, "grad_norm": 0.35946905612945557, "learning_rate": 4.921033241736345e-05, "loss": 0.419, "step": 3173 }, { "epoch": 0.5168749745552254, "grad_norm": 0.3606264591217041, "learning_rate": 4.920915080393933e-05, "loss": 0.3663, "step": 3174 }, { "epoch": 0.5170378211130562, "grad_norm": 0.3845195770263672, "learning_rate": 4.9207968321334186e-05, "loss": 0.4201, "step": 3175 }, { "epoch": 0.5172006676708871, "grad_norm": 0.3469277620315552, "learning_rate": 4.9206784969590437e-05, "loss": 0.349, "step": 3176 }, { "epoch": 0.517363514228718, "grad_norm": 0.36329877376556396, "learning_rate": 4.920560074875059e-05, "loss": 0.4, "step": 3177 }, { "epoch": 0.5175263607865489, "grad_norm": 0.3231412470340729, "learning_rate": 4.920441565885715e-05, "loss": 0.3449, "step": 3178 }, { "epoch": 0.5176892073443797, "grad_norm": 0.37454038858413696, "learning_rate": 4.920322969995267e-05, "loss": 0.3771, "step": 3179 }, { "epoch": 0.5178520539022107, "grad_norm": 0.313828706741333, "learning_rate": 4.920204287207972e-05, "loss": 0.3476, "step": 3180 }, { "epoch": 0.5180149004600415, "grad_norm": 0.3312549293041229, "learning_rate": 4.9200855175280934e-05, "loss": 0.3475, "step": 3181 }, { "epoch": 0.5181777470178724, "grad_norm": 0.3212340176105499, "learning_rate": 4.9199666609598936e-05, "loss": 0.3641, "step": 3182 }, { "epoch": 0.5183405935757033, "grad_norm": 0.36252009868621826, "learning_rate": 4.9198477175076395e-05, "loss": 0.3762, "step": 3183 }, { "epoch": 0.5185034401335342, "grad_norm": 0.33070626854896545, "learning_rate": 4.919728687175603e-05, "loss": 0.3144, "step": 3184 }, { "epoch": 0.518666286691365, "grad_norm": 0.33686041831970215, "learning_rate": 4.919609569968057e-05, "loss": 0.3552, "step": 3185 }, { "epoch": 0.518829133249196, "grad_norm": 0.46654942631721497, "learning_rate": 4.9194903658892774e-05, "loss": 0.409, "step": 3186 }, { "epoch": 0.5189919798070268, "grad_norm": 0.36060816049575806, "learning_rate": 4.919371074943546e-05, "loss": 0.4187, "step": 3187 }, { "epoch": 0.5191548263648577, "grad_norm": 0.3372405171394348, "learning_rate": 4.9192516971351435e-05, "loss": 0.3591, "step": 3188 }, { "epoch": 0.5193176729226886, "grad_norm": 0.3590679466724396, "learning_rate": 4.919132232468356e-05, "loss": 0.3842, "step": 3189 }, { "epoch": 0.5194805194805194, "grad_norm": 0.3403773009777069, "learning_rate": 4.919012680947475e-05, "loss": 0.3814, "step": 3190 }, { "epoch": 0.5196433660383504, "grad_norm": 0.3464028537273407, "learning_rate": 4.9188930425767895e-05, "loss": 0.3726, "step": 3191 }, { "epoch": 0.5198062125961812, "grad_norm": 0.41336312890052795, "learning_rate": 4.918773317360598e-05, "loss": 0.4013, "step": 3192 }, { "epoch": 0.5199690591540121, "grad_norm": 0.2955075800418854, "learning_rate": 4.918653505303197e-05, "loss": 0.359, "step": 3193 }, { "epoch": 0.520131905711843, "grad_norm": 0.38374266028404236, "learning_rate": 4.9185336064088895e-05, "loss": 0.4012, "step": 3194 }, { "epoch": 0.5202947522696739, "grad_norm": 0.3252803683280945, "learning_rate": 4.9184136206819785e-05, "loss": 0.355, "step": 3195 }, { "epoch": 0.5204575988275048, "grad_norm": 0.37180373072624207, "learning_rate": 4.918293548126773e-05, "loss": 0.4213, "step": 3196 }, { "epoch": 0.5206204453853357, "grad_norm": 0.33665210008621216, "learning_rate": 4.918173388747584e-05, "loss": 0.3673, "step": 3197 }, { "epoch": 0.5207832919431665, "grad_norm": 0.33575987815856934, "learning_rate": 4.918053142548724e-05, "loss": 0.3701, "step": 3198 }, { "epoch": 0.5209461385009975, "grad_norm": 0.410925954580307, "learning_rate": 4.917932809534513e-05, "loss": 0.3657, "step": 3199 }, { "epoch": 0.5211089850588283, "grad_norm": 0.3553693890571594, "learning_rate": 4.9178123897092695e-05, "loss": 0.3857, "step": 3200 }, { "epoch": 0.5212718316166592, "grad_norm": 0.4132240116596222, "learning_rate": 4.917691883077316e-05, "loss": 0.4052, "step": 3201 }, { "epoch": 0.5214346781744901, "grad_norm": 0.48171257972717285, "learning_rate": 4.917571289642982e-05, "loss": 0.3983, "step": 3202 }, { "epoch": 0.521597524732321, "grad_norm": 0.3741353750228882, "learning_rate": 4.917450609410595e-05, "loss": 0.3801, "step": 3203 }, { "epoch": 0.5217603712901518, "grad_norm": 0.3660421669483185, "learning_rate": 4.9173298423844885e-05, "loss": 0.3813, "step": 3204 }, { "epoch": 0.5219232178479828, "grad_norm": 0.5314256548881531, "learning_rate": 4.917208988568998e-05, "loss": 0.3733, "step": 3205 }, { "epoch": 0.5220860644058136, "grad_norm": 0.38828882575035095, "learning_rate": 4.9170880479684617e-05, "loss": 0.3686, "step": 3206 }, { "epoch": 0.5222489109636445, "grad_norm": 0.38178837299346924, "learning_rate": 4.9169670205872234e-05, "loss": 0.3579, "step": 3207 }, { "epoch": 0.5224117575214754, "grad_norm": 0.375981867313385, "learning_rate": 4.9168459064296275e-05, "loss": 0.3942, "step": 3208 }, { "epoch": 0.5225746040793062, "grad_norm": 0.3699842691421509, "learning_rate": 4.916724705500023e-05, "loss": 0.3367, "step": 3209 }, { "epoch": 0.5227374506371372, "grad_norm": 0.41127970814704895, "learning_rate": 4.91660341780276e-05, "loss": 0.3697, "step": 3210 }, { "epoch": 0.522900297194968, "grad_norm": 0.3405638635158539, "learning_rate": 4.916482043342194e-05, "loss": 0.3505, "step": 3211 }, { "epoch": 0.5230631437527989, "grad_norm": 0.455136775970459, "learning_rate": 4.916360582122683e-05, "loss": 0.4041, "step": 3212 }, { "epoch": 0.5232259903106298, "grad_norm": 0.4878956377506256, "learning_rate": 4.916239034148588e-05, "loss": 0.3963, "step": 3213 }, { "epoch": 0.5233888368684607, "grad_norm": 0.35300686955451965, "learning_rate": 4.916117399424272e-05, "loss": 0.3985, "step": 3214 }, { "epoch": 0.5235516834262915, "grad_norm": 0.3554278612136841, "learning_rate": 4.9159956779541026e-05, "loss": 0.3629, "step": 3215 }, { "epoch": 0.5237145299841225, "grad_norm": 0.4341237246990204, "learning_rate": 4.91587386974245e-05, "loss": 0.372, "step": 3216 }, { "epoch": 0.5238773765419533, "grad_norm": 0.37154701352119446, "learning_rate": 4.9157519747936865e-05, "loss": 0.395, "step": 3217 }, { "epoch": 0.5240402230997843, "grad_norm": 0.4451923966407776, "learning_rate": 4.91562999311219e-05, "loss": 0.3968, "step": 3218 }, { "epoch": 0.5242030696576151, "grad_norm": 0.41319558024406433, "learning_rate": 4.915507924702339e-05, "loss": 0.3856, "step": 3219 }, { "epoch": 0.524365916215446, "grad_norm": 0.35818272829055786, "learning_rate": 4.915385769568517e-05, "loss": 0.4042, "step": 3220 }, { "epoch": 0.5245287627732769, "grad_norm": 0.382959246635437, "learning_rate": 4.915263527715109e-05, "loss": 0.324, "step": 3221 }, { "epoch": 0.5246916093311078, "grad_norm": 0.3336436450481415, "learning_rate": 4.9151411991465036e-05, "loss": 0.3518, "step": 3222 }, { "epoch": 0.5248544558889386, "grad_norm": 0.38984084129333496, "learning_rate": 4.9150187838670937e-05, "loss": 0.3684, "step": 3223 }, { "epoch": 0.5250173024467696, "grad_norm": 0.40392282605171204, "learning_rate": 4.914896281881274e-05, "loss": 0.4288, "step": 3224 }, { "epoch": 0.5251801490046004, "grad_norm": 0.3345588147640228, "learning_rate": 4.9147736931934416e-05, "loss": 0.3811, "step": 3225 }, { "epoch": 0.5253429955624312, "grad_norm": 0.34810855984687805, "learning_rate": 4.914651017808e-05, "loss": 0.3415, "step": 3226 }, { "epoch": 0.5255058421202622, "grad_norm": 0.4348241984844208, "learning_rate": 4.914528255729351e-05, "loss": 0.3919, "step": 3227 }, { "epoch": 0.525668688678093, "grad_norm": 0.4602469801902771, "learning_rate": 4.9144054069619044e-05, "loss": 0.3401, "step": 3228 }, { "epoch": 0.525831535235924, "grad_norm": 0.314312219619751, "learning_rate": 4.91428247151007e-05, "loss": 0.381, "step": 3229 }, { "epoch": 0.5259943817937548, "grad_norm": 0.31706011295318604, "learning_rate": 4.9141594493782614e-05, "loss": 0.3595, "step": 3230 }, { "epoch": 0.5261572283515857, "grad_norm": 0.3662639260292053, "learning_rate": 4.914036340570895e-05, "loss": 0.3594, "step": 3231 }, { "epoch": 0.5263200749094166, "grad_norm": 0.4475546181201935, "learning_rate": 4.913913145092392e-05, "loss": 0.4096, "step": 3232 }, { "epoch": 0.5264829214672475, "grad_norm": 0.3733674883842468, "learning_rate": 4.9137898629471746e-05, "loss": 0.3869, "step": 3233 }, { "epoch": 0.5266457680250783, "grad_norm": 0.3804212510585785, "learning_rate": 4.9136664941396695e-05, "loss": 0.3686, "step": 3234 }, { "epoch": 0.5268086145829093, "grad_norm": 0.4827967882156372, "learning_rate": 4.913543038674305e-05, "loss": 0.3557, "step": 3235 }, { "epoch": 0.5269714611407401, "grad_norm": 0.4024789333343506, "learning_rate": 4.913419496555515e-05, "loss": 0.3645, "step": 3236 }, { "epoch": 0.5271343076985711, "grad_norm": 0.3848850429058075, "learning_rate": 4.913295867787734e-05, "loss": 0.3794, "step": 3237 }, { "epoch": 0.5272971542564019, "grad_norm": 0.5056990385055542, "learning_rate": 4.913172152375401e-05, "loss": 0.4235, "step": 3238 }, { "epoch": 0.5274600008142328, "grad_norm": 0.4731627404689789, "learning_rate": 4.9130483503229565e-05, "loss": 0.4224, "step": 3239 }, { "epoch": 0.5276228473720637, "grad_norm": 0.44184017181396484, "learning_rate": 4.912924461634848e-05, "loss": 0.3927, "step": 3240 }, { "epoch": 0.5277856939298946, "grad_norm": 0.4948413074016571, "learning_rate": 4.9128004863155215e-05, "loss": 0.3948, "step": 3241 }, { "epoch": 0.5279485404877254, "grad_norm": 0.3987753689289093, "learning_rate": 4.9126764243694286e-05, "loss": 0.346, "step": 3242 }, { "epoch": 0.5281113870455564, "grad_norm": 0.41937580704689026, "learning_rate": 4.9125522758010234e-05, "loss": 0.3827, "step": 3243 }, { "epoch": 0.5282742336033872, "grad_norm": 0.3571641147136688, "learning_rate": 4.9124280406147635e-05, "loss": 0.3776, "step": 3244 }, { "epoch": 0.528437080161218, "grad_norm": 0.3988814055919647, "learning_rate": 4.912303718815109e-05, "loss": 0.3598, "step": 3245 }, { "epoch": 0.528599926719049, "grad_norm": 0.46304383873939514, "learning_rate": 4.912179310406524e-05, "loss": 0.3593, "step": 3246 }, { "epoch": 0.5287627732768798, "grad_norm": 0.42286378145217896, "learning_rate": 4.912054815393474e-05, "loss": 0.3783, "step": 3247 }, { "epoch": 0.5289256198347108, "grad_norm": 0.4202365279197693, "learning_rate": 4.9119302337804296e-05, "loss": 0.393, "step": 3248 }, { "epoch": 0.5290884663925416, "grad_norm": 0.6178609728813171, "learning_rate": 4.9118055655718633e-05, "loss": 0.353, "step": 3249 }, { "epoch": 0.5292513129503725, "grad_norm": 0.37502557039260864, "learning_rate": 4.9116808107722525e-05, "loss": 0.328, "step": 3250 }, { "epoch": 0.5294141595082034, "grad_norm": 0.45688292384147644, "learning_rate": 4.9115559693860736e-05, "loss": 0.4597, "step": 3251 }, { "epoch": 0.5295770060660343, "grad_norm": 0.3524470031261444, "learning_rate": 4.91143104141781e-05, "loss": 0.3836, "step": 3252 }, { "epoch": 0.5297398526238651, "grad_norm": 0.40384456515312195, "learning_rate": 4.911306026871948e-05, "loss": 0.3872, "step": 3253 }, { "epoch": 0.5299026991816961, "grad_norm": 0.4031107425689697, "learning_rate": 4.9111809257529754e-05, "loss": 0.3918, "step": 3254 }, { "epoch": 0.5300655457395269, "grad_norm": 0.4007270932197571, "learning_rate": 4.911055738065383e-05, "loss": 0.3743, "step": 3255 }, { "epoch": 0.5302283922973579, "grad_norm": 0.3294467329978943, "learning_rate": 4.910930463813666e-05, "loss": 0.3301, "step": 3256 }, { "epoch": 0.5303912388551887, "grad_norm": 0.3969736695289612, "learning_rate": 4.9108051030023216e-05, "loss": 0.3715, "step": 3257 }, { "epoch": 0.5305540854130196, "grad_norm": 0.4449531137943268, "learning_rate": 4.910679655635851e-05, "loss": 0.3785, "step": 3258 }, { "epoch": 0.5307169319708505, "grad_norm": 0.3660672903060913, "learning_rate": 4.910554121718759e-05, "loss": 0.3765, "step": 3259 }, { "epoch": 0.5308797785286814, "grad_norm": 0.3329855799674988, "learning_rate": 4.910428501255551e-05, "loss": 0.3566, "step": 3260 }, { "epoch": 0.5310426250865122, "grad_norm": 0.41206613183021545, "learning_rate": 4.9103027942507385e-05, "loss": 0.3554, "step": 3261 }, { "epoch": 0.5312054716443431, "grad_norm": 0.3608652949333191, "learning_rate": 4.910177000708834e-05, "loss": 0.3391, "step": 3262 }, { "epoch": 0.531368318202174, "grad_norm": 0.37061595916748047, "learning_rate": 4.910051120634354e-05, "loss": 0.3699, "step": 3263 }, { "epoch": 0.5315311647600048, "grad_norm": 0.32536330819129944, "learning_rate": 4.909925154031818e-05, "loss": 0.3305, "step": 3264 }, { "epoch": 0.5316940113178358, "grad_norm": 0.35822829604148865, "learning_rate": 4.9097991009057484e-05, "loss": 0.3683, "step": 3265 }, { "epoch": 0.5318568578756666, "grad_norm": 0.42155125737190247, "learning_rate": 4.9096729612606726e-05, "loss": 0.3783, "step": 3266 }, { "epoch": 0.5320197044334976, "grad_norm": 0.3412022292613983, "learning_rate": 4.9095467351011166e-05, "loss": 0.3521, "step": 3267 }, { "epoch": 0.5321825509913284, "grad_norm": 0.355861097574234, "learning_rate": 4.9094204224316136e-05, "loss": 0.3662, "step": 3268 }, { "epoch": 0.5323453975491593, "grad_norm": 0.41256728768348694, "learning_rate": 4.909294023256699e-05, "loss": 0.4057, "step": 3269 }, { "epoch": 0.5325082441069902, "grad_norm": 0.35010528564453125, "learning_rate": 4.909167537580911e-05, "loss": 0.3934, "step": 3270 }, { "epoch": 0.5326710906648211, "grad_norm": 0.40157240629196167, "learning_rate": 4.909040965408789e-05, "loss": 0.3982, "step": 3271 }, { "epoch": 0.5328339372226519, "grad_norm": 0.37385293841362, "learning_rate": 4.9089143067448795e-05, "loss": 0.3337, "step": 3272 }, { "epoch": 0.5329967837804829, "grad_norm": 0.45463040471076965, "learning_rate": 4.9087875615937294e-05, "loss": 0.3804, "step": 3273 }, { "epoch": 0.5331596303383137, "grad_norm": 0.3358578383922577, "learning_rate": 4.9086607299598885e-05, "loss": 0.3738, "step": 3274 }, { "epoch": 0.5333224768961446, "grad_norm": 0.4344438910484314, "learning_rate": 4.9085338118479106e-05, "loss": 0.3883, "step": 3275 }, { "epoch": 0.5334853234539755, "grad_norm": 0.393250972032547, "learning_rate": 4.908406807262353e-05, "loss": 0.386, "step": 3276 }, { "epoch": 0.5336481700118064, "grad_norm": 0.4477848708629608, "learning_rate": 4.908279716207775e-05, "loss": 0.4144, "step": 3277 }, { "epoch": 0.5338110165696373, "grad_norm": 0.3257666826248169, "learning_rate": 4.9081525386887404e-05, "loss": 0.3428, "step": 3278 }, { "epoch": 0.5339738631274682, "grad_norm": 0.3514195382595062, "learning_rate": 4.9080252747098144e-05, "loss": 0.3474, "step": 3279 }, { "epoch": 0.534136709685299, "grad_norm": 0.38253849744796753, "learning_rate": 4.907897924275566e-05, "loss": 0.3667, "step": 3280 }, { "epoch": 0.5342995562431299, "grad_norm": 0.34821268916130066, "learning_rate": 4.907770487390568e-05, "loss": 0.4069, "step": 3281 }, { "epoch": 0.5344624028009608, "grad_norm": 0.32649266719818115, "learning_rate": 4.9076429640593965e-05, "loss": 0.3392, "step": 3282 }, { "epoch": 0.5346252493587916, "grad_norm": 0.539322555065155, "learning_rate": 4.907515354286628e-05, "loss": 0.3783, "step": 3283 }, { "epoch": 0.5347880959166226, "grad_norm": 0.37894126772880554, "learning_rate": 4.907387658076845e-05, "loss": 0.4104, "step": 3284 }, { "epoch": 0.5349509424744534, "grad_norm": 0.39046260714530945, "learning_rate": 4.9072598754346336e-05, "loss": 0.3946, "step": 3285 }, { "epoch": 0.5351137890322843, "grad_norm": 0.32019323110580444, "learning_rate": 4.90713200636458e-05, "loss": 0.3421, "step": 3286 }, { "epoch": 0.5352766355901152, "grad_norm": 0.3772989511489868, "learning_rate": 4.907004050871275e-05, "loss": 0.3308, "step": 3287 }, { "epoch": 0.5354394821479461, "grad_norm": 0.4589281976222992, "learning_rate": 4.9068760089593125e-05, "loss": 0.4298, "step": 3288 }, { "epoch": 0.535602328705777, "grad_norm": 0.38500285148620605, "learning_rate": 4.90674788063329e-05, "loss": 0.4028, "step": 3289 }, { "epoch": 0.5357651752636079, "grad_norm": 0.32849252223968506, "learning_rate": 4.906619665897809e-05, "loss": 0.3659, "step": 3290 }, { "epoch": 0.5359280218214387, "grad_norm": 0.36853063106536865, "learning_rate": 4.9064913647574714e-05, "loss": 0.3767, "step": 3291 }, { "epoch": 0.5360908683792697, "grad_norm": 0.34150180220603943, "learning_rate": 4.906362977216883e-05, "loss": 0.4153, "step": 3292 }, { "epoch": 0.5362537149371005, "grad_norm": 0.3567468225955963, "learning_rate": 4.9062345032806544e-05, "loss": 0.3805, "step": 3293 }, { "epoch": 0.5364165614949314, "grad_norm": 0.3730419874191284, "learning_rate": 4.906105942953398e-05, "loss": 0.401, "step": 3294 }, { "epoch": 0.5365794080527623, "grad_norm": 0.36812645196914673, "learning_rate": 4.905977296239729e-05, "loss": 0.3637, "step": 3295 }, { "epoch": 0.5367422546105932, "grad_norm": 0.3131570518016815, "learning_rate": 4.905848563144267e-05, "loss": 0.3487, "step": 3296 }, { "epoch": 0.536905101168424, "grad_norm": 0.39744681119918823, "learning_rate": 4.905719743671633e-05, "loss": 0.3682, "step": 3297 }, { "epoch": 0.537067947726255, "grad_norm": 0.4578061103820801, "learning_rate": 4.9055908378264525e-05, "loss": 0.4326, "step": 3298 }, { "epoch": 0.5372307942840858, "grad_norm": 0.46643775701522827, "learning_rate": 4.9054618456133536e-05, "loss": 0.409, "step": 3299 }, { "epoch": 0.5373936408419167, "grad_norm": 0.3906325399875641, "learning_rate": 4.905332767036968e-05, "loss": 0.4037, "step": 3300 }, { "epoch": 0.5375564873997476, "grad_norm": 0.3726259768009186, "learning_rate": 4.9052036021019294e-05, "loss": 0.3727, "step": 3301 }, { "epoch": 0.5377193339575784, "grad_norm": 0.3358158767223358, "learning_rate": 4.905074350812875e-05, "loss": 0.3796, "step": 3302 }, { "epoch": 0.5378821805154094, "grad_norm": 0.3940370976924896, "learning_rate": 4.904945013174446e-05, "loss": 0.4066, "step": 3303 }, { "epoch": 0.5380450270732402, "grad_norm": 0.37269827723503113, "learning_rate": 4.904815589191285e-05, "loss": 0.3461, "step": 3304 }, { "epoch": 0.5382078736310711, "grad_norm": 0.3651690185070038, "learning_rate": 4.904686078868039e-05, "loss": 0.3617, "step": 3305 }, { "epoch": 0.538370720188902, "grad_norm": 0.3531056046485901, "learning_rate": 4.904556482209359e-05, "loss": 0.3494, "step": 3306 }, { "epoch": 0.5385335667467329, "grad_norm": 0.3683009445667267, "learning_rate": 4.9044267992198975e-05, "loss": 0.3772, "step": 3307 }, { "epoch": 0.5386964133045637, "grad_norm": 0.35793253779411316, "learning_rate": 4.904297029904309e-05, "loss": 0.3468, "step": 3308 }, { "epoch": 0.5388592598623947, "grad_norm": 0.36393651366233826, "learning_rate": 4.904167174267255e-05, "loss": 0.334, "step": 3309 }, { "epoch": 0.5390221064202255, "grad_norm": 0.4324142038822174, "learning_rate": 4.9040372323133944e-05, "loss": 0.3981, "step": 3310 }, { "epoch": 0.5391849529780565, "grad_norm": 0.39017409086227417, "learning_rate": 4.903907204047396e-05, "loss": 0.4179, "step": 3311 }, { "epoch": 0.5393477995358873, "grad_norm": 0.30190861225128174, "learning_rate": 4.903777089473926e-05, "loss": 0.3057, "step": 3312 }, { "epoch": 0.5395106460937182, "grad_norm": 0.3737046420574188, "learning_rate": 4.9036468885976564e-05, "loss": 0.361, "step": 3313 }, { "epoch": 0.5396734926515491, "grad_norm": 0.3620101511478424, "learning_rate": 4.903516601423263e-05, "loss": 0.3713, "step": 3314 }, { "epoch": 0.53983633920938, "grad_norm": 0.3631826937198639, "learning_rate": 4.903386227955421e-05, "loss": 0.3448, "step": 3315 }, { "epoch": 0.5399991857672108, "grad_norm": 0.37216827273368835, "learning_rate": 4.903255768198814e-05, "loss": 0.3663, "step": 3316 }, { "epoch": 0.5401620323250417, "grad_norm": 0.38175079226493835, "learning_rate": 4.903125222158124e-05, "loss": 0.3696, "step": 3317 }, { "epoch": 0.5403248788828726, "grad_norm": 0.3521273136138916, "learning_rate": 4.902994589838038e-05, "loss": 0.3856, "step": 3318 }, { "epoch": 0.5404877254407034, "grad_norm": 0.3768962621688843, "learning_rate": 4.902863871243248e-05, "loss": 0.3966, "step": 3319 }, { "epoch": 0.5406505719985344, "grad_norm": 0.3581850528717041, "learning_rate": 4.902733066378445e-05, "loss": 0.4035, "step": 3320 }, { "epoch": 0.5408134185563652, "grad_norm": 0.33832916617393494, "learning_rate": 4.9026021752483266e-05, "loss": 0.3711, "step": 3321 }, { "epoch": 0.5409762651141962, "grad_norm": 0.383842796087265, "learning_rate": 4.902471197857591e-05, "loss": 0.3693, "step": 3322 }, { "epoch": 0.541139111672027, "grad_norm": 0.42055386304855347, "learning_rate": 4.902340134210942e-05, "loss": 0.4051, "step": 3323 }, { "epoch": 0.5413019582298579, "grad_norm": 0.3829021155834198, "learning_rate": 4.902208984313084e-05, "loss": 0.408, "step": 3324 }, { "epoch": 0.5414648047876888, "grad_norm": 0.3073815405368805, "learning_rate": 4.902077748168726e-05, "loss": 0.3605, "step": 3325 }, { "epoch": 0.5416276513455197, "grad_norm": 0.3989681303501129, "learning_rate": 4.901946425782581e-05, "loss": 0.3657, "step": 3326 }, { "epoch": 0.5417904979033505, "grad_norm": 0.39029332995414734, "learning_rate": 4.901815017159363e-05, "loss": 0.4146, "step": 3327 }, { "epoch": 0.5419533444611815, "grad_norm": 0.37345826625823975, "learning_rate": 4.9016835223037894e-05, "loss": 0.3526, "step": 3328 }, { "epoch": 0.5421161910190123, "grad_norm": 0.375272661447525, "learning_rate": 4.901551941220581e-05, "loss": 0.417, "step": 3329 }, { "epoch": 0.5422790375768433, "grad_norm": 0.43434232473373413, "learning_rate": 4.901420273914464e-05, "loss": 0.3783, "step": 3330 }, { "epoch": 0.5424418841346741, "grad_norm": 0.36437520384788513, "learning_rate": 4.901288520390164e-05, "loss": 0.3853, "step": 3331 }, { "epoch": 0.542604730692505, "grad_norm": 0.35181376338005066, "learning_rate": 4.901156680652411e-05, "loss": 0.3765, "step": 3332 }, { "epoch": 0.5427675772503359, "grad_norm": 0.39981546998023987, "learning_rate": 4.9010247547059396e-05, "loss": 0.3953, "step": 3333 }, { "epoch": 0.5429304238081668, "grad_norm": 0.37843412160873413, "learning_rate": 4.900892742555486e-05, "loss": 0.3853, "step": 3334 }, { "epoch": 0.5430932703659976, "grad_norm": 0.44047296047210693, "learning_rate": 4.9007606442057894e-05, "loss": 0.3986, "step": 3335 }, { "epoch": 0.5432561169238285, "grad_norm": 0.34684574604034424, "learning_rate": 4.9006284596615925e-05, "loss": 0.3609, "step": 3336 }, { "epoch": 0.5434189634816594, "grad_norm": 0.39604291319847107, "learning_rate": 4.9004961889276414e-05, "loss": 0.3781, "step": 3337 }, { "epoch": 0.5435818100394902, "grad_norm": 0.4150678217411041, "learning_rate": 4.9003638320086856e-05, "loss": 0.4067, "step": 3338 }, { "epoch": 0.5437446565973212, "grad_norm": 0.42866015434265137, "learning_rate": 4.9002313889094766e-05, "loss": 0.3902, "step": 3339 }, { "epoch": 0.543907503155152, "grad_norm": 0.3439212441444397, "learning_rate": 4.9000988596347686e-05, "loss": 0.3686, "step": 3340 }, { "epoch": 0.544070349712983, "grad_norm": 0.46927565336227417, "learning_rate": 4.899966244189321e-05, "loss": 0.4246, "step": 3341 }, { "epoch": 0.5442331962708138, "grad_norm": 0.37977731227874756, "learning_rate": 4.899833542577895e-05, "loss": 0.3759, "step": 3342 }, { "epoch": 0.5443960428286447, "grad_norm": 0.3657948672771454, "learning_rate": 4.8997007548052554e-05, "loss": 0.3904, "step": 3343 }, { "epoch": 0.5445588893864756, "grad_norm": 0.3453722596168518, "learning_rate": 4.8995678808761684e-05, "loss": 0.3924, "step": 3344 }, { "epoch": 0.5447217359443065, "grad_norm": 0.32078948616981506, "learning_rate": 4.899434920795405e-05, "loss": 0.3636, "step": 3345 }, { "epoch": 0.5448845825021373, "grad_norm": 0.3513758182525635, "learning_rate": 4.899301874567739e-05, "loss": 0.3943, "step": 3346 }, { "epoch": 0.5450474290599683, "grad_norm": 0.3205326795578003, "learning_rate": 4.899168742197947e-05, "loss": 0.3358, "step": 3347 }, { "epoch": 0.5452102756177991, "grad_norm": 0.31128600239753723, "learning_rate": 4.899035523690809e-05, "loss": 0.34, "step": 3348 }, { "epoch": 0.54537312217563, "grad_norm": 0.3563750684261322, "learning_rate": 4.8989022190511095e-05, "loss": 0.4033, "step": 3349 }, { "epoch": 0.5455359687334609, "grad_norm": 0.41313472390174866, "learning_rate": 4.8987688282836314e-05, "loss": 0.4015, "step": 3350 }, { "epoch": 0.5456988152912918, "grad_norm": 0.3196810483932495, "learning_rate": 4.898635351393166e-05, "loss": 0.3689, "step": 3351 }, { "epoch": 0.5458616618491227, "grad_norm": 0.32408997416496277, "learning_rate": 4.8985017883845046e-05, "loss": 0.3772, "step": 3352 }, { "epoch": 0.5460245084069536, "grad_norm": 0.40633469820022583, "learning_rate": 4.8983681392624435e-05, "loss": 0.3814, "step": 3353 }, { "epoch": 0.5461873549647844, "grad_norm": 0.3515988886356354, "learning_rate": 4.89823440403178e-05, "loss": 0.3751, "step": 3354 }, { "epoch": 0.5463502015226153, "grad_norm": 0.32603126764297485, "learning_rate": 4.8981005826973165e-05, "loss": 0.37, "step": 3355 }, { "epoch": 0.5465130480804462, "grad_norm": 0.38165587186813354, "learning_rate": 4.897966675263857e-05, "loss": 0.3786, "step": 3356 }, { "epoch": 0.546675894638277, "grad_norm": 0.32479605078697205, "learning_rate": 4.89783268173621e-05, "loss": 0.3458, "step": 3357 }, { "epoch": 0.546838741196108, "grad_norm": 0.3255041539669037, "learning_rate": 4.897698602119184e-05, "loss": 0.3791, "step": 3358 }, { "epoch": 0.5470015877539388, "grad_norm": 0.3331880271434784, "learning_rate": 4.897564436417596e-05, "loss": 0.3553, "step": 3359 }, { "epoch": 0.5471644343117698, "grad_norm": 0.32160893082618713, "learning_rate": 4.897430184636261e-05, "loss": 0.3625, "step": 3360 }, { "epoch": 0.5473272808696006, "grad_norm": 0.30154454708099365, "learning_rate": 4.897295846779999e-05, "loss": 0.3588, "step": 3361 }, { "epoch": 0.5474901274274315, "grad_norm": 0.45761409401893616, "learning_rate": 4.897161422853634e-05, "loss": 0.3709, "step": 3362 }, { "epoch": 0.5476529739852624, "grad_norm": 0.371318519115448, "learning_rate": 4.8970269128619916e-05, "loss": 0.356, "step": 3363 }, { "epoch": 0.5478158205430933, "grad_norm": 0.37100791931152344, "learning_rate": 4.896892316809902e-05, "loss": 0.4063, "step": 3364 }, { "epoch": 0.5479786671009241, "grad_norm": 0.32292652130126953, "learning_rate": 4.896757634702197e-05, "loss": 0.3492, "step": 3365 }, { "epoch": 0.5481415136587551, "grad_norm": 0.36340808868408203, "learning_rate": 4.896622866543711e-05, "loss": 0.3669, "step": 3366 }, { "epoch": 0.5483043602165859, "grad_norm": 0.41111958026885986, "learning_rate": 4.896488012339284e-05, "loss": 0.3387, "step": 3367 }, { "epoch": 0.5484672067744168, "grad_norm": 0.43949997425079346, "learning_rate": 4.896353072093758e-05, "loss": 0.3968, "step": 3368 }, { "epoch": 0.5486300533322477, "grad_norm": 0.3391619324684143, "learning_rate": 4.8962180458119764e-05, "loss": 0.3388, "step": 3369 }, { "epoch": 0.5487928998900786, "grad_norm": 0.3695572316646576, "learning_rate": 4.8960829334987876e-05, "loss": 0.371, "step": 3370 }, { "epoch": 0.5489557464479095, "grad_norm": 0.3682769238948822, "learning_rate": 4.8959477351590435e-05, "loss": 0.3641, "step": 3371 }, { "epoch": 0.5491185930057404, "grad_norm": 0.3882674276828766, "learning_rate": 4.895812450797596e-05, "loss": 0.3587, "step": 3372 }, { "epoch": 0.5492814395635712, "grad_norm": 0.35320937633514404, "learning_rate": 4.895677080419304e-05, "loss": 0.3879, "step": 3373 }, { "epoch": 0.5494442861214021, "grad_norm": 0.4363350570201874, "learning_rate": 4.895541624029028e-05, "loss": 0.4349, "step": 3374 }, { "epoch": 0.549607132679233, "grad_norm": 0.3804014325141907, "learning_rate": 4.89540608163163e-05, "loss": 0.4162, "step": 3375 }, { "epoch": 0.5497699792370638, "grad_norm": 0.2908776104450226, "learning_rate": 4.895270453231976e-05, "loss": 0.3654, "step": 3376 }, { "epoch": 0.5499328257948948, "grad_norm": 0.37012460827827454, "learning_rate": 4.895134738834937e-05, "loss": 0.4056, "step": 3377 }, { "epoch": 0.5500956723527256, "grad_norm": 0.4328266382217407, "learning_rate": 4.894998938445384e-05, "loss": 0.379, "step": 3378 }, { "epoch": 0.5502585189105565, "grad_norm": 0.33295348286628723, "learning_rate": 4.894863052068195e-05, "loss": 0.3454, "step": 3379 }, { "epoch": 0.5504213654683874, "grad_norm": 0.42219433188438416, "learning_rate": 4.8947270797082455e-05, "loss": 0.3999, "step": 3380 }, { "epoch": 0.5505842120262183, "grad_norm": 0.3615311086177826, "learning_rate": 4.8945910213704206e-05, "loss": 0.3933, "step": 3381 }, { "epoch": 0.5507470585840492, "grad_norm": 0.4026902914047241, "learning_rate": 4.8944548770596033e-05, "loss": 0.3611, "step": 3382 }, { "epoch": 0.5509099051418801, "grad_norm": 0.3482294976711273, "learning_rate": 4.8943186467806814e-05, "loss": 0.3524, "step": 3383 }, { "epoch": 0.5510727516997109, "grad_norm": 0.321359783411026, "learning_rate": 4.894182330538547e-05, "loss": 0.3842, "step": 3384 }, { "epoch": 0.5512355982575419, "grad_norm": 0.8370822072029114, "learning_rate": 4.8940459283380935e-05, "loss": 0.41, "step": 3385 }, { "epoch": 0.5513984448153727, "grad_norm": 0.3738079071044922, "learning_rate": 4.8939094401842185e-05, "loss": 0.3455, "step": 3386 }, { "epoch": 0.5515612913732036, "grad_norm": 0.4204179048538208, "learning_rate": 4.893772866081822e-05, "loss": 0.4035, "step": 3387 }, { "epoch": 0.5517241379310345, "grad_norm": 0.3663848638534546, "learning_rate": 4.8936362060358074e-05, "loss": 0.3769, "step": 3388 }, { "epoch": 0.5518869844888654, "grad_norm": 0.29807600378990173, "learning_rate": 4.893499460051083e-05, "loss": 0.3576, "step": 3389 }, { "epoch": 0.5520498310466962, "grad_norm": 0.3791220784187317, "learning_rate": 4.8933626281325554e-05, "loss": 0.3605, "step": 3390 }, { "epoch": 0.5522126776045271, "grad_norm": 0.35760992765426636, "learning_rate": 4.893225710285139e-05, "loss": 0.3812, "step": 3391 }, { "epoch": 0.552375524162358, "grad_norm": 0.3701789677143097, "learning_rate": 4.89308870651375e-05, "loss": 0.3805, "step": 3392 }, { "epoch": 0.5525383707201889, "grad_norm": 0.3889988958835602, "learning_rate": 4.892951616823306e-05, "loss": 0.4108, "step": 3393 }, { "epoch": 0.5527012172780198, "grad_norm": 0.32517173886299133, "learning_rate": 4.8928144412187297e-05, "loss": 0.3646, "step": 3394 }, { "epoch": 0.5528640638358506, "grad_norm": 0.35785409808158875, "learning_rate": 4.892677179704945e-05, "loss": 0.3759, "step": 3395 }, { "epoch": 0.5530269103936816, "grad_norm": 0.38565880060195923, "learning_rate": 4.892539832286882e-05, "loss": 0.3617, "step": 3396 }, { "epoch": 0.5531897569515124, "grad_norm": 0.45037853717803955, "learning_rate": 4.89240239896947e-05, "loss": 0.3934, "step": 3397 }, { "epoch": 0.5533526035093433, "grad_norm": 0.34457290172576904, "learning_rate": 4.892264879757645e-05, "loss": 0.3544, "step": 3398 }, { "epoch": 0.5535154500671742, "grad_norm": 0.3245963156223297, "learning_rate": 4.892127274656343e-05, "loss": 0.3391, "step": 3399 }, { "epoch": 0.5536782966250051, "grad_norm": 0.4953859746456146, "learning_rate": 4.8919895836705046e-05, "loss": 0.4284, "step": 3400 }, { "epoch": 0.553841143182836, "grad_norm": 0.3442746102809906, "learning_rate": 4.8918518068050734e-05, "loss": 0.3492, "step": 3401 }, { "epoch": 0.5540039897406669, "grad_norm": 0.4349163770675659, "learning_rate": 4.8917139440649964e-05, "loss": 0.3945, "step": 3402 }, { "epoch": 0.5541668362984977, "grad_norm": 0.44455191493034363, "learning_rate": 4.891575995455223e-05, "loss": 0.3487, "step": 3403 }, { "epoch": 0.5543296828563287, "grad_norm": 0.4095359742641449, "learning_rate": 4.891437960980706e-05, "loss": 0.3446, "step": 3404 }, { "epoch": 0.5544925294141595, "grad_norm": 0.3685012757778168, "learning_rate": 4.891299840646402e-05, "loss": 0.3573, "step": 3405 }, { "epoch": 0.5546553759719904, "grad_norm": 0.4356842637062073, "learning_rate": 4.891161634457268e-05, "loss": 0.4136, "step": 3406 }, { "epoch": 0.5548182225298213, "grad_norm": 0.5623223185539246, "learning_rate": 4.8910233424182684e-05, "loss": 0.4181, "step": 3407 }, { "epoch": 0.5549810690876522, "grad_norm": 0.4326215088367462, "learning_rate": 4.890884964534366e-05, "loss": 0.3766, "step": 3408 }, { "epoch": 0.555143915645483, "grad_norm": 0.382913202047348, "learning_rate": 4.89074650081053e-05, "loss": 0.3603, "step": 3409 }, { "epoch": 0.5553067622033139, "grad_norm": 0.40885889530181885, "learning_rate": 4.890607951251733e-05, "loss": 0.3241, "step": 3410 }, { "epoch": 0.5554696087611448, "grad_norm": 0.3836652338504791, "learning_rate": 4.890469315862947e-05, "loss": 0.3954, "step": 3411 }, { "epoch": 0.5556324553189756, "grad_norm": 0.41988685727119446, "learning_rate": 4.890330594649151e-05, "loss": 0.4322, "step": 3412 }, { "epoch": 0.5557953018768066, "grad_norm": 0.5116389393806458, "learning_rate": 4.890191787615325e-05, "loss": 0.3891, "step": 3413 }, { "epoch": 0.5559581484346374, "grad_norm": 0.3769863545894623, "learning_rate": 4.890052894766453e-05, "loss": 0.3323, "step": 3414 }, { "epoch": 0.5561209949924684, "grad_norm": 0.3859879970550537, "learning_rate": 4.88991391610752e-05, "loss": 0.3736, "step": 3415 }, { "epoch": 0.5562838415502992, "grad_norm": 0.35851114988327026, "learning_rate": 4.8897748516435184e-05, "loss": 0.3621, "step": 3416 }, { "epoch": 0.5564466881081301, "grad_norm": 0.36756256222724915, "learning_rate": 4.889635701379439e-05, "loss": 0.3771, "step": 3417 }, { "epoch": 0.556609534665961, "grad_norm": 0.4741816520690918, "learning_rate": 4.889496465320279e-05, "loss": 0.4312, "step": 3418 }, { "epoch": 0.5567723812237919, "grad_norm": 0.3195033073425293, "learning_rate": 4.889357143471036e-05, "loss": 0.3388, "step": 3419 }, { "epoch": 0.5569352277816227, "grad_norm": 0.3874966502189636, "learning_rate": 4.889217735836713e-05, "loss": 0.3887, "step": 3420 }, { "epoch": 0.5570980743394537, "grad_norm": 0.36670172214508057, "learning_rate": 4.889078242422315e-05, "loss": 0.3516, "step": 3421 }, { "epoch": 0.5572609208972845, "grad_norm": 0.37197986245155334, "learning_rate": 4.88893866323285e-05, "loss": 0.3648, "step": 3422 }, { "epoch": 0.5574237674551155, "grad_norm": 0.35315775871276855, "learning_rate": 4.8887989982733304e-05, "loss": 0.4125, "step": 3423 }, { "epoch": 0.5575866140129463, "grad_norm": 0.35049283504486084, "learning_rate": 4.888659247548769e-05, "loss": 0.3914, "step": 3424 }, { "epoch": 0.5577494605707772, "grad_norm": 0.33820122480392456, "learning_rate": 4.8885194110641843e-05, "loss": 0.3758, "step": 3425 }, { "epoch": 0.5579123071286081, "grad_norm": 0.3641842305660248, "learning_rate": 4.888379488824597e-05, "loss": 0.3702, "step": 3426 }, { "epoch": 0.558075153686439, "grad_norm": 0.335359662771225, "learning_rate": 4.8882394808350296e-05, "loss": 0.4051, "step": 3427 }, { "epoch": 0.5582380002442698, "grad_norm": 0.3448736369609833, "learning_rate": 4.88809938710051e-05, "loss": 0.3508, "step": 3428 }, { "epoch": 0.5584008468021007, "grad_norm": 0.3613097369670868, "learning_rate": 4.887959207626067e-05, "loss": 0.3511, "step": 3429 }, { "epoch": 0.5585636933599316, "grad_norm": 0.3681073784828186, "learning_rate": 4.887818942416734e-05, "loss": 0.3805, "step": 3430 }, { "epoch": 0.5587265399177624, "grad_norm": 0.30341407656669617, "learning_rate": 4.887678591477547e-05, "loss": 0.334, "step": 3431 }, { "epoch": 0.5588893864755934, "grad_norm": 0.362551748752594, "learning_rate": 4.887538154813545e-05, "loss": 0.3559, "step": 3432 }, { "epoch": 0.5590522330334242, "grad_norm": 0.34417587518692017, "learning_rate": 4.887397632429769e-05, "loss": 0.3554, "step": 3433 }, { "epoch": 0.5592150795912552, "grad_norm": 0.3210432529449463, "learning_rate": 4.887257024331267e-05, "loss": 0.3459, "step": 3434 }, { "epoch": 0.559377926149086, "grad_norm": 0.3313102424144745, "learning_rate": 4.8871163305230836e-05, "loss": 0.3518, "step": 3435 }, { "epoch": 0.5595407727069169, "grad_norm": 0.3703058063983917, "learning_rate": 4.886975551010273e-05, "loss": 0.3968, "step": 3436 }, { "epoch": 0.5597036192647478, "grad_norm": 0.3619089126586914, "learning_rate": 4.886834685797888e-05, "loss": 0.3989, "step": 3437 }, { "epoch": 0.5598664658225787, "grad_norm": 0.36589542031288147, "learning_rate": 4.886693734890987e-05, "loss": 0.3858, "step": 3438 }, { "epoch": 0.5600293123804095, "grad_norm": 0.39352160692214966, "learning_rate": 4.88655269829463e-05, "loss": 0.4151, "step": 3439 }, { "epoch": 0.5601921589382405, "grad_norm": 0.3650915026664734, "learning_rate": 4.8864115760138805e-05, "loss": 0.3979, "step": 3440 }, { "epoch": 0.5603550054960713, "grad_norm": 0.3308725357055664, "learning_rate": 4.886270368053807e-05, "loss": 0.3892, "step": 3441 }, { "epoch": 0.5605178520539023, "grad_norm": 0.35164061188697815, "learning_rate": 4.8861290744194764e-05, "loss": 0.3596, "step": 3442 }, { "epoch": 0.5606806986117331, "grad_norm": 0.41821566224098206, "learning_rate": 4.885987695115963e-05, "loss": 0.4213, "step": 3443 }, { "epoch": 0.560843545169564, "grad_norm": 0.3770613372325897, "learning_rate": 4.885846230148343e-05, "loss": 0.3876, "step": 3444 }, { "epoch": 0.5610063917273949, "grad_norm": 0.3805815577507019, "learning_rate": 4.8857046795216956e-05, "loss": 0.35, "step": 3445 }, { "epoch": 0.5611692382852257, "grad_norm": 0.3956581950187683, "learning_rate": 4.885563043241103e-05, "loss": 0.3911, "step": 3446 }, { "epoch": 0.5613320848430566, "grad_norm": 0.323844850063324, "learning_rate": 4.8854213213116484e-05, "loss": 0.374, "step": 3447 }, { "epoch": 0.5614949314008875, "grad_norm": 0.3875642418861389, "learning_rate": 4.8852795137384225e-05, "loss": 0.3961, "step": 3448 }, { "epoch": 0.5616577779587184, "grad_norm": 0.41669395565986633, "learning_rate": 4.8851376205265145e-05, "loss": 0.352, "step": 3449 }, { "epoch": 0.5618206245165492, "grad_norm": 0.38547101616859436, "learning_rate": 4.88499564168102e-05, "loss": 0.379, "step": 3450 }, { "epoch": 0.5619834710743802, "grad_norm": 0.3247505724430084, "learning_rate": 4.884853577207037e-05, "loss": 0.3505, "step": 3451 }, { "epoch": 0.562146317632211, "grad_norm": 0.4087514579296112, "learning_rate": 4.884711427109665e-05, "loss": 0.4048, "step": 3452 }, { "epoch": 0.562309164190042, "grad_norm": 0.37168875336647034, "learning_rate": 4.884569191394008e-05, "loss": 0.3795, "step": 3453 }, { "epoch": 0.5624720107478728, "grad_norm": 0.37022683024406433, "learning_rate": 4.884426870065173e-05, "loss": 0.3686, "step": 3454 }, { "epoch": 0.5626348573057037, "grad_norm": 0.4291357398033142, "learning_rate": 4.884284463128269e-05, "loss": 0.3753, "step": 3455 }, { "epoch": 0.5627977038635346, "grad_norm": 0.37785086035728455, "learning_rate": 4.88414197058841e-05, "loss": 0.3373, "step": 3456 }, { "epoch": 0.5629605504213655, "grad_norm": 0.3772519826889038, "learning_rate": 4.88399939245071e-05, "loss": 0.398, "step": 3457 }, { "epoch": 0.5631233969791963, "grad_norm": 0.3732435405254364, "learning_rate": 4.88385672872029e-05, "loss": 0.3559, "step": 3458 }, { "epoch": 0.5632862435370273, "grad_norm": 0.46143409609794617, "learning_rate": 4.883713979402271e-05, "loss": 0.3662, "step": 3459 }, { "epoch": 0.5634490900948581, "grad_norm": 0.3721323311328888, "learning_rate": 4.883571144501778e-05, "loss": 0.386, "step": 3460 }, { "epoch": 0.563611936652689, "grad_norm": 0.3801584541797638, "learning_rate": 4.8834282240239406e-05, "loss": 0.3902, "step": 3461 }, { "epoch": 0.5637747832105199, "grad_norm": 0.3544144034385681, "learning_rate": 4.883285217973888e-05, "loss": 0.3966, "step": 3462 }, { "epoch": 0.5639376297683508, "grad_norm": 0.3415985703468323, "learning_rate": 4.8831421263567555e-05, "loss": 0.3789, "step": 3463 }, { "epoch": 0.5641004763261817, "grad_norm": 0.37987446784973145, "learning_rate": 4.8829989491776814e-05, "loss": 0.3657, "step": 3464 }, { "epoch": 0.5642633228840125, "grad_norm": 0.33743995428085327, "learning_rate": 4.882855686441805e-05, "loss": 0.3421, "step": 3465 }, { "epoch": 0.5644261694418434, "grad_norm": 0.38526931405067444, "learning_rate": 4.88271233815427e-05, "loss": 0.4053, "step": 3466 }, { "epoch": 0.5645890159996743, "grad_norm": 0.3983170986175537, "learning_rate": 4.882568904320223e-05, "loss": 0.4399, "step": 3467 }, { "epoch": 0.5647518625575052, "grad_norm": 0.3352805972099304, "learning_rate": 4.882425384944815e-05, "loss": 0.3965, "step": 3468 }, { "epoch": 0.564914709115336, "grad_norm": 0.32144981622695923, "learning_rate": 4.8822817800331965e-05, "loss": 0.3678, "step": 3469 }, { "epoch": 0.565077555673167, "grad_norm": 0.3691091537475586, "learning_rate": 4.8821380895905256e-05, "loss": 0.3747, "step": 3470 }, { "epoch": 0.5652404022309978, "grad_norm": 0.4123254120349884, "learning_rate": 4.8819943136219596e-05, "loss": 0.4046, "step": 3471 }, { "epoch": 0.5654032487888287, "grad_norm": 0.3722352385520935, "learning_rate": 4.881850452132661e-05, "loss": 0.3888, "step": 3472 }, { "epoch": 0.5655660953466596, "grad_norm": 0.3257395327091217, "learning_rate": 4.881706505127795e-05, "loss": 0.3619, "step": 3473 }, { "epoch": 0.5657289419044905, "grad_norm": 0.3574829399585724, "learning_rate": 4.881562472612531e-05, "loss": 0.3842, "step": 3474 }, { "epoch": 0.5658917884623214, "grad_norm": 0.46485233306884766, "learning_rate": 4.8814183545920376e-05, "loss": 0.3954, "step": 3475 }, { "epoch": 0.5660546350201523, "grad_norm": 0.32907024025917053, "learning_rate": 4.8812741510714904e-05, "loss": 0.4112, "step": 3476 }, { "epoch": 0.5662174815779831, "grad_norm": 0.3555898666381836, "learning_rate": 4.881129862056067e-05, "loss": 0.4197, "step": 3477 }, { "epoch": 0.5663803281358141, "grad_norm": 0.40149301290512085, "learning_rate": 4.880985487550947e-05, "loss": 0.3673, "step": 3478 }, { "epoch": 0.5665431746936449, "grad_norm": 0.31030404567718506, "learning_rate": 4.880841027561315e-05, "loss": 0.3398, "step": 3479 }, { "epoch": 0.5667060212514758, "grad_norm": 0.3214099705219269, "learning_rate": 4.880696482092357e-05, "loss": 0.3481, "step": 3480 }, { "epoch": 0.5668688678093067, "grad_norm": 0.32011526823043823, "learning_rate": 4.8805518511492624e-05, "loss": 0.3924, "step": 3481 }, { "epoch": 0.5670317143671376, "grad_norm": 0.4072261452674866, "learning_rate": 4.880407134737224e-05, "loss": 0.4242, "step": 3482 }, { "epoch": 0.5671945609249684, "grad_norm": 0.37762901186943054, "learning_rate": 4.880262332861437e-05, "loss": 0.3823, "step": 3483 }, { "epoch": 0.5673574074827993, "grad_norm": 0.32805702090263367, "learning_rate": 4.880117445527101e-05, "loss": 0.3527, "step": 3484 }, { "epoch": 0.5675202540406302, "grad_norm": 0.41666266322135925, "learning_rate": 4.879972472739418e-05, "loss": 0.3677, "step": 3485 }, { "epoch": 0.567683100598461, "grad_norm": 0.39051520824432373, "learning_rate": 4.8798274145035926e-05, "loss": 0.3653, "step": 3486 }, { "epoch": 0.567845947156292, "grad_norm": 0.3112167716026306, "learning_rate": 4.879682270824833e-05, "loss": 0.3641, "step": 3487 }, { "epoch": 0.5680087937141228, "grad_norm": 0.36538824439048767, "learning_rate": 4.87953704170835e-05, "loss": 0.4156, "step": 3488 }, { "epoch": 0.5681716402719538, "grad_norm": 0.364045113325119, "learning_rate": 4.879391727159357e-05, "loss": 0.3985, "step": 3489 }, { "epoch": 0.5683344868297846, "grad_norm": 0.35036683082580566, "learning_rate": 4.879246327183074e-05, "loss": 0.3487, "step": 3490 }, { "epoch": 0.5684973333876155, "grad_norm": 0.33138373494148254, "learning_rate": 4.8791008417847177e-05, "loss": 0.385, "step": 3491 }, { "epoch": 0.5686601799454464, "grad_norm": 0.34400761127471924, "learning_rate": 4.878955270969514e-05, "loss": 0.3581, "step": 3492 }, { "epoch": 0.5688230265032773, "grad_norm": 0.41121527552604675, "learning_rate": 4.878809614742689e-05, "loss": 0.3668, "step": 3493 }, { "epoch": 0.5689858730611081, "grad_norm": 0.370195209980011, "learning_rate": 4.8786638731094705e-05, "loss": 0.4027, "step": 3494 }, { "epoch": 0.5691487196189391, "grad_norm": 0.364301472902298, "learning_rate": 4.878518046075093e-05, "loss": 0.3707, "step": 3495 }, { "epoch": 0.5693115661767699, "grad_norm": 0.4192847013473511, "learning_rate": 4.878372133644792e-05, "loss": 0.3635, "step": 3496 }, { "epoch": 0.5694744127346009, "grad_norm": 0.4338383674621582, "learning_rate": 4.8782261358238045e-05, "loss": 0.4169, "step": 3497 }, { "epoch": 0.5696372592924317, "grad_norm": 0.4188469350337982, "learning_rate": 4.878080052617374e-05, "loss": 0.3665, "step": 3498 }, { "epoch": 0.5698001058502626, "grad_norm": 0.3737475872039795, "learning_rate": 4.877933884030745e-05, "loss": 0.408, "step": 3499 }, { "epoch": 0.5699629524080935, "grad_norm": 0.47218191623687744, "learning_rate": 4.877787630069164e-05, "loss": 0.4032, "step": 3500 }, { "epoch": 0.5701257989659244, "grad_norm": 0.42542076110839844, "learning_rate": 4.877641290737884e-05, "loss": 0.3755, "step": 3501 }, { "epoch": 0.5702886455237552, "grad_norm": 0.368989497423172, "learning_rate": 4.8774948660421585e-05, "loss": 0.3832, "step": 3502 }, { "epoch": 0.5704514920815861, "grad_norm": 0.35575270652770996, "learning_rate": 4.877348355987243e-05, "loss": 0.3449, "step": 3503 }, { "epoch": 0.570614338639417, "grad_norm": 0.4041891098022461, "learning_rate": 4.877201760578399e-05, "loss": 0.3676, "step": 3504 }, { "epoch": 0.5707771851972478, "grad_norm": 0.4230341911315918, "learning_rate": 4.87705507982089e-05, "loss": 0.4077, "step": 3505 }, { "epoch": 0.5709400317550788, "grad_norm": 0.448265016078949, "learning_rate": 4.876908313719981e-05, "loss": 0.4267, "step": 3506 }, { "epoch": 0.5711028783129096, "grad_norm": 0.33054259419441223, "learning_rate": 4.876761462280943e-05, "loss": 0.3482, "step": 3507 }, { "epoch": 0.5712657248707406, "grad_norm": 0.4431569576263428, "learning_rate": 4.876614525509047e-05, "loss": 0.3549, "step": 3508 }, { "epoch": 0.5714285714285714, "grad_norm": 0.3790881037712097, "learning_rate": 4.8764675034095694e-05, "loss": 0.4041, "step": 3509 }, { "epoch": 0.5715914179864023, "grad_norm": 0.3642423748970032, "learning_rate": 4.876320395987788e-05, "loss": 0.3949, "step": 3510 }, { "epoch": 0.5717542645442332, "grad_norm": 0.3524002134799957, "learning_rate": 4.876173203248985e-05, "loss": 0.362, "step": 3511 }, { "epoch": 0.5719171111020641, "grad_norm": 0.5329718589782715, "learning_rate": 4.8760259251984444e-05, "loss": 0.4185, "step": 3512 }, { "epoch": 0.5720799576598949, "grad_norm": 0.43909141421318054, "learning_rate": 4.875878561841455e-05, "loss": 0.4278, "step": 3513 }, { "epoch": 0.5722428042177259, "grad_norm": 0.43752309679985046, "learning_rate": 4.875731113183306e-05, "loss": 0.3563, "step": 3514 }, { "epoch": 0.5724056507755567, "grad_norm": 0.3180556893348694, "learning_rate": 4.875583579229293e-05, "loss": 0.3927, "step": 3515 }, { "epoch": 0.5725684973333877, "grad_norm": 0.47066667675971985, "learning_rate": 4.875435959984711e-05, "loss": 0.3731, "step": 3516 }, { "epoch": 0.5727313438912185, "grad_norm": 0.49851667881011963, "learning_rate": 4.8752882554548625e-05, "loss": 0.3803, "step": 3517 }, { "epoch": 0.5728941904490494, "grad_norm": 0.3995470106601715, "learning_rate": 4.8751404656450486e-05, "loss": 0.3615, "step": 3518 }, { "epoch": 0.5730570370068803, "grad_norm": 0.36802488565444946, "learning_rate": 4.874992590560575e-05, "loss": 0.4232, "step": 3519 }, { "epoch": 0.5732198835647111, "grad_norm": 0.5008522272109985, "learning_rate": 4.874844630206753e-05, "loss": 0.3959, "step": 3520 }, { "epoch": 0.573382730122542, "grad_norm": 0.532459020614624, "learning_rate": 4.874696584588893e-05, "loss": 0.3742, "step": 3521 }, { "epoch": 0.5735455766803729, "grad_norm": 0.4026683270931244, "learning_rate": 4.87454845371231e-05, "loss": 0.3781, "step": 3522 }, { "epoch": 0.5737084232382038, "grad_norm": 0.43837690353393555, "learning_rate": 4.874400237582324e-05, "loss": 0.3734, "step": 3523 }, { "epoch": 0.5738712697960346, "grad_norm": 0.4582674205303192, "learning_rate": 4.874251936204256e-05, "loss": 0.375, "step": 3524 }, { "epoch": 0.5740341163538656, "grad_norm": 0.36944326758384705, "learning_rate": 4.874103549583429e-05, "loss": 0.3402, "step": 3525 }, { "epoch": 0.5741969629116964, "grad_norm": 0.44072437286376953, "learning_rate": 4.873955077725172e-05, "loss": 0.3799, "step": 3526 }, { "epoch": 0.5743598094695274, "grad_norm": 0.3366105556488037, "learning_rate": 4.8738065206348154e-05, "loss": 0.3501, "step": 3527 }, { "epoch": 0.5745226560273582, "grad_norm": 0.4295238256454468, "learning_rate": 4.8736578783176926e-05, "loss": 0.3842, "step": 3528 }, { "epoch": 0.5746855025851891, "grad_norm": 0.45686089992523193, "learning_rate": 4.8735091507791396e-05, "loss": 0.4206, "step": 3529 }, { "epoch": 0.57484834914302, "grad_norm": 0.369101345539093, "learning_rate": 4.8733603380244974e-05, "loss": 0.3709, "step": 3530 }, { "epoch": 0.5750111957008509, "grad_norm": 0.3970223069190979, "learning_rate": 4.873211440059109e-05, "loss": 0.4083, "step": 3531 }, { "epoch": 0.5751740422586817, "grad_norm": 0.37479016184806824, "learning_rate": 4.873062456888319e-05, "loss": 0.4077, "step": 3532 }, { "epoch": 0.5753368888165127, "grad_norm": 0.34688419103622437, "learning_rate": 4.872913388517476e-05, "loss": 0.3879, "step": 3533 }, { "epoch": 0.5754997353743435, "grad_norm": 0.3682580590248108, "learning_rate": 4.872764234951934e-05, "loss": 0.346, "step": 3534 }, { "epoch": 0.5756625819321745, "grad_norm": 0.3423125147819519, "learning_rate": 4.872614996197047e-05, "loss": 0.3435, "step": 3535 }, { "epoch": 0.5758254284900053, "grad_norm": 0.3854995369911194, "learning_rate": 4.872465672258172e-05, "loss": 0.3628, "step": 3536 }, { "epoch": 0.5759882750478362, "grad_norm": 0.33231228590011597, "learning_rate": 4.872316263140673e-05, "loss": 0.3686, "step": 3537 }, { "epoch": 0.5761511216056671, "grad_norm": 0.371643602848053, "learning_rate": 4.8721667688499104e-05, "loss": 0.3621, "step": 3538 }, { "epoch": 0.5763139681634979, "grad_norm": 0.3727481961250305, "learning_rate": 4.872017189391255e-05, "loss": 0.3517, "step": 3539 }, { "epoch": 0.5764768147213288, "grad_norm": 0.35906487703323364, "learning_rate": 4.8718675247700755e-05, "loss": 0.3647, "step": 3540 }, { "epoch": 0.5766396612791597, "grad_norm": 0.34331247210502625, "learning_rate": 4.8717177749917454e-05, "loss": 0.3876, "step": 3541 }, { "epoch": 0.5768025078369906, "grad_norm": 0.3306359648704529, "learning_rate": 4.871567940061642e-05, "loss": 0.3521, "step": 3542 }, { "epoch": 0.5769653543948214, "grad_norm": 0.3794357478618622, "learning_rate": 4.8714180199851434e-05, "loss": 0.3724, "step": 3543 }, { "epoch": 0.5771282009526524, "grad_norm": 0.3627888858318329, "learning_rate": 4.871268014767633e-05, "loss": 0.3582, "step": 3544 }, { "epoch": 0.5772910475104832, "grad_norm": 0.3337576389312744, "learning_rate": 4.8711179244144966e-05, "loss": 0.4312, "step": 3545 }, { "epoch": 0.5774538940683142, "grad_norm": 0.36235520243644714, "learning_rate": 4.8709677489311224e-05, "loss": 0.3665, "step": 3546 }, { "epoch": 0.577616740626145, "grad_norm": 0.4321887195110321, "learning_rate": 4.870817488322902e-05, "loss": 0.4, "step": 3547 }, { "epoch": 0.5777795871839759, "grad_norm": 0.3283737301826477, "learning_rate": 4.8706671425952316e-05, "loss": 0.376, "step": 3548 }, { "epoch": 0.5779424337418068, "grad_norm": 0.35751140117645264, "learning_rate": 4.870516711753508e-05, "loss": 0.3538, "step": 3549 }, { "epoch": 0.5781052802996377, "grad_norm": 0.44190800189971924, "learning_rate": 4.8703661958031315e-05, "loss": 0.3923, "step": 3550 }, { "epoch": 0.5782681268574685, "grad_norm": 0.39414218068122864, "learning_rate": 4.870215594749507e-05, "loss": 0.3936, "step": 3551 }, { "epoch": 0.5784309734152995, "grad_norm": 0.3145145773887634, "learning_rate": 4.870064908598042e-05, "loss": 0.3456, "step": 3552 }, { "epoch": 0.5785938199731303, "grad_norm": 0.3564443588256836, "learning_rate": 4.869914137354144e-05, "loss": 0.3265, "step": 3553 }, { "epoch": 0.5787566665309613, "grad_norm": 0.3370892107486725, "learning_rate": 4.869763281023229e-05, "loss": 0.3503, "step": 3554 }, { "epoch": 0.5789195130887921, "grad_norm": 0.3469233512878418, "learning_rate": 4.869612339610714e-05, "loss": 0.3374, "step": 3555 }, { "epoch": 0.579082359646623, "grad_norm": 0.3611031174659729, "learning_rate": 4.8694613131220144e-05, "loss": 0.3702, "step": 3556 }, { "epoch": 0.5792452062044539, "grad_norm": 0.34204787015914917, "learning_rate": 4.8693102015625544e-05, "loss": 0.3692, "step": 3557 }, { "epoch": 0.5794080527622847, "grad_norm": 0.36493197083473206, "learning_rate": 4.869159004937761e-05, "loss": 0.3794, "step": 3558 }, { "epoch": 0.5795708993201156, "grad_norm": 0.3145153224468231, "learning_rate": 4.86900772325306e-05, "loss": 0.3624, "step": 3559 }, { "epoch": 0.5797337458779465, "grad_norm": 0.3377738296985626, "learning_rate": 4.868856356513884e-05, "loss": 0.3697, "step": 3560 }, { "epoch": 0.5798965924357774, "grad_norm": 0.34872549772262573, "learning_rate": 4.8687049047256684e-05, "loss": 0.3643, "step": 3561 }, { "epoch": 0.5800594389936082, "grad_norm": 0.40491968393325806, "learning_rate": 4.86855336789385e-05, "loss": 0.375, "step": 3562 }, { "epoch": 0.5802222855514392, "grad_norm": 0.34692078828811646, "learning_rate": 4.868401746023868e-05, "loss": 0.374, "step": 3563 }, { "epoch": 0.58038513210927, "grad_norm": 0.34975665807724, "learning_rate": 4.868250039121168e-05, "loss": 0.3801, "step": 3564 }, { "epoch": 0.580547978667101, "grad_norm": 0.38896065950393677, "learning_rate": 4.868098247191196e-05, "loss": 0.3771, "step": 3565 }, { "epoch": 0.5807108252249318, "grad_norm": 0.3186640441417694, "learning_rate": 4.867946370239402e-05, "loss": 0.3553, "step": 3566 }, { "epoch": 0.5808736717827627, "grad_norm": 0.3469117283821106, "learning_rate": 4.867794408271239e-05, "loss": 0.3872, "step": 3567 }, { "epoch": 0.5810365183405936, "grad_norm": 0.4281579256057739, "learning_rate": 4.867642361292163e-05, "loss": 0.42, "step": 3568 }, { "epoch": 0.5811993648984245, "grad_norm": 0.3359798192977905, "learning_rate": 4.8674902293076316e-05, "loss": 0.3884, "step": 3569 }, { "epoch": 0.5813622114562553, "grad_norm": 0.37798118591308594, "learning_rate": 4.867338012323108e-05, "loss": 0.3826, "step": 3570 }, { "epoch": 0.5815250580140863, "grad_norm": 0.3750825524330139, "learning_rate": 4.867185710344058e-05, "loss": 0.371, "step": 3571 }, { "epoch": 0.5816879045719171, "grad_norm": 0.3162407875061035, "learning_rate": 4.867033323375948e-05, "loss": 0.4195, "step": 3572 }, { "epoch": 0.581850751129748, "grad_norm": 0.3424041271209717, "learning_rate": 4.86688085142425e-05, "loss": 0.3625, "step": 3573 }, { "epoch": 0.5820135976875789, "grad_norm": 0.3735741972923279, "learning_rate": 4.8667282944944384e-05, "loss": 0.3291, "step": 3574 }, { "epoch": 0.5821764442454097, "grad_norm": 0.3560391962528229, "learning_rate": 4.8665756525919894e-05, "loss": 0.4028, "step": 3575 }, { "epoch": 0.5823392908032407, "grad_norm": 0.4214010536670685, "learning_rate": 4.8664229257223835e-05, "loss": 0.4167, "step": 3576 }, { "epoch": 0.5825021373610715, "grad_norm": 0.38399335741996765, "learning_rate": 4.866270113891106e-05, "loss": 0.3636, "step": 3577 }, { "epoch": 0.5826649839189024, "grad_norm": 0.34593573212623596, "learning_rate": 4.8661172171036414e-05, "loss": 0.4039, "step": 3578 }, { "epoch": 0.5828278304767333, "grad_norm": 0.2893921136856079, "learning_rate": 4.865964235365479e-05, "loss": 0.3452, "step": 3579 }, { "epoch": 0.5829906770345642, "grad_norm": 0.3184647560119629, "learning_rate": 4.865811168682113e-05, "loss": 0.3497, "step": 3580 }, { "epoch": 0.583153523592395, "grad_norm": 0.38764870166778564, "learning_rate": 4.865658017059037e-05, "loss": 0.3787, "step": 3581 }, { "epoch": 0.583316370150226, "grad_norm": 0.308246910572052, "learning_rate": 4.865504780501751e-05, "loss": 0.3452, "step": 3582 }, { "epoch": 0.5834792167080568, "grad_norm": 0.2994306683540344, "learning_rate": 4.865351459015756e-05, "loss": 0.3364, "step": 3583 }, { "epoch": 0.5836420632658877, "grad_norm": 0.4070744216442108, "learning_rate": 4.8651980526065566e-05, "loss": 0.39, "step": 3584 }, { "epoch": 0.5838049098237186, "grad_norm": 0.34577783942222595, "learning_rate": 4.8650445612796605e-05, "loss": 0.4122, "step": 3585 }, { "epoch": 0.5839677563815495, "grad_norm": 0.31632596254348755, "learning_rate": 4.864890985040579e-05, "loss": 0.3602, "step": 3586 }, { "epoch": 0.5841306029393804, "grad_norm": 0.35672998428344727, "learning_rate": 4.864737323894827e-05, "loss": 0.3692, "step": 3587 }, { "epoch": 0.5842934494972113, "grad_norm": 0.44047632813453674, "learning_rate": 4.8645835778479186e-05, "loss": 0.4118, "step": 3588 }, { "epoch": 0.5844562960550421, "grad_norm": 0.29435521364212036, "learning_rate": 4.8644297469053754e-05, "loss": 0.3707, "step": 3589 }, { "epoch": 0.5846191426128731, "grad_norm": 0.3138776123523712, "learning_rate": 4.864275831072721e-05, "loss": 0.362, "step": 3590 }, { "epoch": 0.5847819891707039, "grad_norm": 0.3590720295906067, "learning_rate": 4.86412183035548e-05, "loss": 0.389, "step": 3591 }, { "epoch": 0.5849448357285348, "grad_norm": 0.37537625432014465, "learning_rate": 4.863967744759183e-05, "loss": 0.4016, "step": 3592 }, { "epoch": 0.5851076822863657, "grad_norm": 0.3532049357891083, "learning_rate": 4.863813574289361e-05, "loss": 0.3499, "step": 3593 }, { "epoch": 0.5852705288441965, "grad_norm": 0.3200233280658722, "learning_rate": 4.863659318951549e-05, "loss": 0.3738, "step": 3594 }, { "epoch": 0.5854333754020274, "grad_norm": 0.32086876034736633, "learning_rate": 4.863504978751286e-05, "loss": 0.3601, "step": 3595 }, { "epoch": 0.5855962219598583, "grad_norm": 0.3191973567008972, "learning_rate": 4.863350553694114e-05, "loss": 0.3445, "step": 3596 }, { "epoch": 0.5857590685176892, "grad_norm": 0.34320563077926636, "learning_rate": 4.863196043785575e-05, "loss": 0.3926, "step": 3597 }, { "epoch": 0.58592191507552, "grad_norm": 0.33211737871170044, "learning_rate": 4.8630414490312183e-05, "loss": 0.3608, "step": 3598 }, { "epoch": 0.586084761633351, "grad_norm": 0.3051816523075104, "learning_rate": 4.862886769436593e-05, "loss": 0.3533, "step": 3599 }, { "epoch": 0.5862476081911818, "grad_norm": 0.37319326400756836, "learning_rate": 4.8627320050072545e-05, "loss": 0.3752, "step": 3600 }, { "epoch": 0.5864104547490128, "grad_norm": 0.3951132297515869, "learning_rate": 4.8625771557487574e-05, "loss": 0.3689, "step": 3601 }, { "epoch": 0.5865733013068436, "grad_norm": 0.4587385058403015, "learning_rate": 4.862422221666662e-05, "loss": 0.3971, "step": 3602 }, { "epoch": 0.5867361478646745, "grad_norm": 127.00284576416016, "learning_rate": 4.8622672027665304e-05, "loss": 0.3674, "step": 3603 }, { "epoch": 0.5868989944225054, "grad_norm": 0.318462073802948, "learning_rate": 4.862112099053929e-05, "loss": 0.3973, "step": 3604 }, { "epoch": 0.5870618409803363, "grad_norm": 0.4290817081928253, "learning_rate": 4.861956910534427e-05, "loss": 0.3479, "step": 3605 }, { "epoch": 0.5872246875381671, "grad_norm": 0.5827561020851135, "learning_rate": 4.861801637213594e-05, "loss": 0.3762, "step": 3606 }, { "epoch": 0.5873875340959981, "grad_norm": 0.47234758734703064, "learning_rate": 4.8616462790970066e-05, "loss": 0.4307, "step": 3607 }, { "epoch": 0.5875503806538289, "grad_norm": 0.40453511476516724, "learning_rate": 4.8614908361902426e-05, "loss": 0.3991, "step": 3608 }, { "epoch": 0.5877132272116599, "grad_norm": 0.3293265700340271, "learning_rate": 4.8613353084988815e-05, "loss": 0.4378, "step": 3609 }, { "epoch": 0.5878760737694907, "grad_norm": 0.41992852091789246, "learning_rate": 4.861179696028509e-05, "loss": 0.3942, "step": 3610 }, { "epoch": 0.5880389203273216, "grad_norm": 0.37396931648254395, "learning_rate": 4.8610239987847104e-05, "loss": 0.3989, "step": 3611 }, { "epoch": 0.5882017668851525, "grad_norm": 0.34182360768318176, "learning_rate": 4.860868216773077e-05, "loss": 0.3432, "step": 3612 }, { "epoch": 0.5883646134429833, "grad_norm": 0.38504335284233093, "learning_rate": 4.8607123499992e-05, "loss": 0.3792, "step": 3613 }, { "epoch": 0.5885274600008142, "grad_norm": 0.2945307493209839, "learning_rate": 4.8605563984686785e-05, "loss": 0.3485, "step": 3614 }, { "epoch": 0.5886903065586451, "grad_norm": 0.374257355928421, "learning_rate": 4.8604003621871084e-05, "loss": 0.4075, "step": 3615 }, { "epoch": 0.588853153116476, "grad_norm": 0.3543165326118469, "learning_rate": 4.8602442411600945e-05, "loss": 0.3557, "step": 3616 }, { "epoch": 0.5890159996743068, "grad_norm": 0.3280712962150574, "learning_rate": 4.86008803539324e-05, "loss": 0.372, "step": 3617 }, { "epoch": 0.5891788462321378, "grad_norm": 0.3181438744068146, "learning_rate": 4.859931744892155e-05, "loss": 0.3701, "step": 3618 }, { "epoch": 0.5893416927899686, "grad_norm": 0.3754367530345917, "learning_rate": 4.859775369662449e-05, "loss": 0.3893, "step": 3619 }, { "epoch": 0.5895045393477996, "grad_norm": 0.34807848930358887, "learning_rate": 4.859618909709737e-05, "loss": 0.308, "step": 3620 }, { "epoch": 0.5896673859056304, "grad_norm": 0.32236814498901367, "learning_rate": 4.859462365039637e-05, "loss": 0.378, "step": 3621 }, { "epoch": 0.5898302324634613, "grad_norm": 0.37540215253829956, "learning_rate": 4.8593057356577686e-05, "loss": 0.4357, "step": 3622 }, { "epoch": 0.5899930790212922, "grad_norm": 0.39234238862991333, "learning_rate": 4.859149021569757e-05, "loss": 0.4163, "step": 3623 }, { "epoch": 0.5901559255791231, "grad_norm": 0.3400817811489105, "learning_rate": 4.8589922227812256e-05, "loss": 0.3849, "step": 3624 }, { "epoch": 0.5903187721369539, "grad_norm": 0.30193907022476196, "learning_rate": 4.858835339297806e-05, "loss": 0.3502, "step": 3625 }, { "epoch": 0.5904816186947849, "grad_norm": 0.37246111035346985, "learning_rate": 4.85867837112513e-05, "loss": 0.3749, "step": 3626 }, { "epoch": 0.5906444652526157, "grad_norm": 0.2943646013736725, "learning_rate": 4.858521318268835e-05, "loss": 0.3297, "step": 3627 }, { "epoch": 0.5908073118104467, "grad_norm": 0.35910192131996155, "learning_rate": 4.8583641807345583e-05, "loss": 0.3435, "step": 3628 }, { "epoch": 0.5909701583682775, "grad_norm": 0.32812777161598206, "learning_rate": 4.858206958527941e-05, "loss": 0.3739, "step": 3629 }, { "epoch": 0.5911330049261084, "grad_norm": 0.4261496961116791, "learning_rate": 4.858049651654628e-05, "loss": 0.4066, "step": 3630 }, { "epoch": 0.5912958514839393, "grad_norm": 0.43641793727874756, "learning_rate": 4.8578922601202685e-05, "loss": 0.3844, "step": 3631 }, { "epoch": 0.5914586980417701, "grad_norm": 0.4527589678764343, "learning_rate": 4.857734783930512e-05, "loss": 0.42, "step": 3632 }, { "epoch": 0.591621544599601, "grad_norm": 0.4008883833885193, "learning_rate": 4.857577223091013e-05, "loss": 0.3469, "step": 3633 }, { "epoch": 0.5917843911574319, "grad_norm": 0.5083178281784058, "learning_rate": 4.857419577607428e-05, "loss": 0.3991, "step": 3634 }, { "epoch": 0.5919472377152628, "grad_norm": 0.5147925019264221, "learning_rate": 4.8572618474854184e-05, "loss": 0.3605, "step": 3635 }, { "epoch": 0.5921100842730936, "grad_norm": 0.40114787220954895, "learning_rate": 4.857104032730644e-05, "loss": 0.3607, "step": 3636 }, { "epoch": 0.5922729308309246, "grad_norm": 0.39546963572502136, "learning_rate": 4.8569461333487744e-05, "loss": 0.3877, "step": 3637 }, { "epoch": 0.5924357773887554, "grad_norm": 0.3687094449996948, "learning_rate": 4.856788149345476e-05, "loss": 0.3652, "step": 3638 }, { "epoch": 0.5925986239465864, "grad_norm": 0.4945110082626343, "learning_rate": 4.856630080726422e-05, "loss": 0.365, "step": 3639 }, { "epoch": 0.5927614705044172, "grad_norm": 0.4717240035533905, "learning_rate": 4.856471927497287e-05, "loss": 0.3693, "step": 3640 }, { "epoch": 0.5929243170622481, "grad_norm": 0.33683326840400696, "learning_rate": 4.856313689663751e-05, "loss": 0.3357, "step": 3641 }, { "epoch": 0.593087163620079, "grad_norm": 0.34565746784210205, "learning_rate": 4.856155367231493e-05, "loss": 0.3658, "step": 3642 }, { "epoch": 0.5932500101779099, "grad_norm": 0.49964427947998047, "learning_rate": 4.855996960206198e-05, "loss": 0.37, "step": 3643 }, { "epoch": 0.5934128567357407, "grad_norm": 0.5134414434432983, "learning_rate": 4.855838468593553e-05, "loss": 0.3695, "step": 3644 }, { "epoch": 0.5935757032935717, "grad_norm": 0.33233731985092163, "learning_rate": 4.8556798923992494e-05, "loss": 0.3609, "step": 3645 }, { "epoch": 0.5937385498514025, "grad_norm": 0.4336993098258972, "learning_rate": 4.85552123162898e-05, "loss": 0.3777, "step": 3646 }, { "epoch": 0.5939013964092335, "grad_norm": 0.600525438785553, "learning_rate": 4.8553624862884404e-05, "loss": 0.3937, "step": 3647 }, { "epoch": 0.5940642429670643, "grad_norm": 0.31866493821144104, "learning_rate": 4.855203656383331e-05, "loss": 0.3433, "step": 3648 }, { "epoch": 0.5942270895248951, "grad_norm": 0.3356243371963501, "learning_rate": 4.855044741919354e-05, "loss": 0.3428, "step": 3649 }, { "epoch": 0.5943899360827261, "grad_norm": 0.3872354030609131, "learning_rate": 4.854885742902215e-05, "loss": 0.3701, "step": 3650 }, { "epoch": 0.5945527826405569, "grad_norm": 0.46629831194877625, "learning_rate": 4.854726659337622e-05, "loss": 0.4323, "step": 3651 }, { "epoch": 0.5947156291983878, "grad_norm": 0.334286630153656, "learning_rate": 4.8545674912312876e-05, "loss": 0.3708, "step": 3652 }, { "epoch": 0.5948784757562187, "grad_norm": 0.3560429811477661, "learning_rate": 4.8544082385889257e-05, "loss": 0.3599, "step": 3653 }, { "epoch": 0.5950413223140496, "grad_norm": 0.387719064950943, "learning_rate": 4.854248901416253e-05, "loss": 0.3647, "step": 3654 }, { "epoch": 0.5952041688718804, "grad_norm": 0.3507022261619568, "learning_rate": 4.854089479718992e-05, "loss": 0.385, "step": 3655 }, { "epoch": 0.5953670154297114, "grad_norm": 0.3843921720981598, "learning_rate": 4.853929973502866e-05, "loss": 0.3326, "step": 3656 }, { "epoch": 0.5955298619875422, "grad_norm": 0.3635764718055725, "learning_rate": 4.853770382773601e-05, "loss": 0.3674, "step": 3657 }, { "epoch": 0.5956927085453732, "grad_norm": 0.33824440836906433, "learning_rate": 4.8536107075369266e-05, "loss": 0.3723, "step": 3658 }, { "epoch": 0.595855555103204, "grad_norm": 0.3364553451538086, "learning_rate": 4.8534509477985766e-05, "loss": 0.3647, "step": 3659 }, { "epoch": 0.5960184016610349, "grad_norm": 0.34193670749664307, "learning_rate": 4.853291103564287e-05, "loss": 0.3762, "step": 3660 }, { "epoch": 0.5961812482188658, "grad_norm": 0.37472793459892273, "learning_rate": 4.853131174839796e-05, "loss": 0.3522, "step": 3661 }, { "epoch": 0.5963440947766967, "grad_norm": 0.31136322021484375, "learning_rate": 4.852971161630845e-05, "loss": 0.3084, "step": 3662 }, { "epoch": 0.5965069413345275, "grad_norm": 0.3438822329044342, "learning_rate": 4.85281106394318e-05, "loss": 0.4108, "step": 3663 }, { "epoch": 0.5966697878923585, "grad_norm": 0.4044322967529297, "learning_rate": 4.852650881782549e-05, "loss": 0.3931, "step": 3664 }, { "epoch": 0.5968326344501893, "grad_norm": 0.3409348130226135, "learning_rate": 4.8524906151547025e-05, "loss": 0.3804, "step": 3665 }, { "epoch": 0.5969954810080202, "grad_norm": 0.33107706904411316, "learning_rate": 4.852330264065394e-05, "loss": 0.3503, "step": 3666 }, { "epoch": 0.5971583275658511, "grad_norm": 0.37686997652053833, "learning_rate": 4.8521698285203814e-05, "loss": 0.37, "step": 3667 }, { "epoch": 0.5973211741236819, "grad_norm": 0.3482053577899933, "learning_rate": 4.852009308525425e-05, "loss": 0.3959, "step": 3668 }, { "epoch": 0.5974840206815129, "grad_norm": 0.329301655292511, "learning_rate": 4.851848704086287e-05, "loss": 0.3785, "step": 3669 }, { "epoch": 0.5976468672393437, "grad_norm": 0.36744195222854614, "learning_rate": 4.851688015208735e-05, "loss": 0.3821, "step": 3670 }, { "epoch": 0.5978097137971746, "grad_norm": 0.34855860471725464, "learning_rate": 4.851527241898537e-05, "loss": 0.3567, "step": 3671 }, { "epoch": 0.5979725603550055, "grad_norm": 0.3370457589626312, "learning_rate": 4.851366384161466e-05, "loss": 0.3553, "step": 3672 }, { "epoch": 0.5981354069128364, "grad_norm": 0.39884519577026367, "learning_rate": 4.851205442003297e-05, "loss": 0.3951, "step": 3673 }, { "epoch": 0.5982982534706672, "grad_norm": 0.4007999002933502, "learning_rate": 4.851044415429807e-05, "loss": 0.3532, "step": 3674 }, { "epoch": 0.5984611000284982, "grad_norm": 0.45822980999946594, "learning_rate": 4.85088330444678e-05, "loss": 0.4331, "step": 3675 }, { "epoch": 0.598623946586329, "grad_norm": 0.3119167387485504, "learning_rate": 4.8507221090599985e-05, "loss": 0.3314, "step": 3676 }, { "epoch": 0.59878679314416, "grad_norm": 0.3293502628803253, "learning_rate": 4.8505608292752503e-05, "loss": 0.3479, "step": 3677 }, { "epoch": 0.5989496397019908, "grad_norm": 0.44537463784217834, "learning_rate": 4.850399465098327e-05, "loss": 0.4055, "step": 3678 }, { "epoch": 0.5991124862598217, "grad_norm": 0.35352829098701477, "learning_rate": 4.8502380165350186e-05, "loss": 0.3523, "step": 3679 }, { "epoch": 0.5992753328176526, "grad_norm": 0.4570886492729187, "learning_rate": 4.850076483591125e-05, "loss": 0.3838, "step": 3680 }, { "epoch": 0.5994381793754835, "grad_norm": 0.41094252467155457, "learning_rate": 4.849914866272445e-05, "loss": 0.4063, "step": 3681 }, { "epoch": 0.5996010259333143, "grad_norm": 0.3421211540699005, "learning_rate": 4.84975316458478e-05, "loss": 0.3559, "step": 3682 }, { "epoch": 0.5997638724911453, "grad_norm": 0.40084266662597656, "learning_rate": 4.849591378533938e-05, "loss": 0.3776, "step": 3683 }, { "epoch": 0.5999267190489761, "grad_norm": 0.3462562561035156, "learning_rate": 4.849429508125723e-05, "loss": 0.3316, "step": 3684 }, { "epoch": 0.600089565606807, "grad_norm": 0.35898980498313904, "learning_rate": 4.849267553365952e-05, "loss": 0.3815, "step": 3685 }, { "epoch": 0.6002524121646379, "grad_norm": 0.4042990803718567, "learning_rate": 4.849105514260437e-05, "loss": 0.3745, "step": 3686 }, { "epoch": 0.6004152587224687, "grad_norm": 0.41989845037460327, "learning_rate": 4.848943390814995e-05, "loss": 0.3774, "step": 3687 }, { "epoch": 0.6005781052802996, "grad_norm": 0.3448423147201538, "learning_rate": 4.8487811830354486e-05, "loss": 0.3602, "step": 3688 }, { "epoch": 0.6007409518381305, "grad_norm": 0.31382226943969727, "learning_rate": 4.84861889092762e-05, "loss": 0.3742, "step": 3689 }, { "epoch": 0.6009037983959614, "grad_norm": 0.4225885570049286, "learning_rate": 4.8484565144973366e-05, "loss": 0.3589, "step": 3690 }, { "epoch": 0.6010666449537923, "grad_norm": 0.4055538773536682, "learning_rate": 4.8482940537504285e-05, "loss": 0.3899, "step": 3691 }, { "epoch": 0.6012294915116232, "grad_norm": 0.360266774892807, "learning_rate": 4.848131508692728e-05, "loss": 0.4154, "step": 3692 }, { "epoch": 0.601392338069454, "grad_norm": 0.381854385137558, "learning_rate": 4.847968879330071e-05, "loss": 0.3496, "step": 3693 }, { "epoch": 0.601555184627285, "grad_norm": 0.32753080129623413, "learning_rate": 4.847806165668296e-05, "loss": 0.3683, "step": 3694 }, { "epoch": 0.6017180311851158, "grad_norm": 0.3289436399936676, "learning_rate": 4.847643367713247e-05, "loss": 0.3688, "step": 3695 }, { "epoch": 0.6018808777429467, "grad_norm": 0.39188531041145325, "learning_rate": 4.847480485470766e-05, "loss": 0.3591, "step": 3696 }, { "epoch": 0.6020437243007776, "grad_norm": 0.3205183148384094, "learning_rate": 4.847317518946703e-05, "loss": 0.3284, "step": 3697 }, { "epoch": 0.6022065708586085, "grad_norm": 0.31324297189712524, "learning_rate": 4.847154468146908e-05, "loss": 0.387, "step": 3698 }, { "epoch": 0.6023694174164393, "grad_norm": 0.3497595191001892, "learning_rate": 4.846991333077235e-05, "loss": 0.3625, "step": 3699 }, { "epoch": 0.6025322639742703, "grad_norm": 0.35503676533699036, "learning_rate": 4.8468281137435425e-05, "loss": 0.3515, "step": 3700 }, { "epoch": 0.6026951105321011, "grad_norm": 0.40428030490875244, "learning_rate": 4.8466648101516884e-05, "loss": 0.443, "step": 3701 }, { "epoch": 0.6028579570899321, "grad_norm": 0.32346436381340027, "learning_rate": 4.8465014223075365e-05, "loss": 0.373, "step": 3702 }, { "epoch": 0.6030208036477629, "grad_norm": 0.4084634780883789, "learning_rate": 4.846337950216955e-05, "loss": 0.4021, "step": 3703 }, { "epoch": 0.6031836502055937, "grad_norm": 0.33658507466316223, "learning_rate": 4.8461743938858097e-05, "loss": 0.3648, "step": 3704 }, { "epoch": 0.6033464967634247, "grad_norm": 0.32434433698654175, "learning_rate": 4.846010753319975e-05, "loss": 0.3716, "step": 3705 }, { "epoch": 0.6035093433212555, "grad_norm": 0.350295752286911, "learning_rate": 4.845847028525325e-05, "loss": 0.3461, "step": 3706 }, { "epoch": 0.6036721898790864, "grad_norm": 0.3507319986820221, "learning_rate": 4.845683219507738e-05, "loss": 0.3994, "step": 3707 }, { "epoch": 0.6038350364369173, "grad_norm": 0.342661589384079, "learning_rate": 4.845519326273097e-05, "loss": 0.3765, "step": 3708 }, { "epoch": 0.6039978829947482, "grad_norm": 0.3366152346134186, "learning_rate": 4.845355348827284e-05, "loss": 0.3431, "step": 3709 }, { "epoch": 0.604160729552579, "grad_norm": 0.38912734389305115, "learning_rate": 4.845191287176186e-05, "loss": 0.3753, "step": 3710 }, { "epoch": 0.60432357611041, "grad_norm": 0.3327447175979614, "learning_rate": 4.845027141325696e-05, "loss": 0.3575, "step": 3711 }, { "epoch": 0.6044864226682408, "grad_norm": 0.4179299771785736, "learning_rate": 4.844862911281705e-05, "loss": 0.4469, "step": 3712 }, { "epoch": 0.6046492692260718, "grad_norm": 0.33423373103141785, "learning_rate": 4.84469859705011e-05, "loss": 0.3513, "step": 3713 }, { "epoch": 0.6048121157839026, "grad_norm": 0.3086690306663513, "learning_rate": 4.8445341986368106e-05, "loss": 0.3211, "step": 3714 }, { "epoch": 0.6049749623417335, "grad_norm": 0.30961140990257263, "learning_rate": 4.84436971604771e-05, "loss": 0.3328, "step": 3715 }, { "epoch": 0.6051378088995644, "grad_norm": 0.36640435457229614, "learning_rate": 4.844205149288711e-05, "loss": 0.3461, "step": 3716 }, { "epoch": 0.6053006554573953, "grad_norm": 0.37015604972839355, "learning_rate": 4.844040498365725e-05, "loss": 0.3936, "step": 3717 }, { "epoch": 0.6054635020152261, "grad_norm": 0.5434983372688293, "learning_rate": 4.843875763284662e-05, "loss": 0.3827, "step": 3718 }, { "epoch": 0.6056263485730571, "grad_norm": 0.3325209617614746, "learning_rate": 4.843710944051436e-05, "loss": 0.3531, "step": 3719 }, { "epoch": 0.6057891951308879, "grad_norm": 0.4333634376525879, "learning_rate": 4.843546040671966e-05, "loss": 0.381, "step": 3720 }, { "epoch": 0.6059520416887189, "grad_norm": 0.35849905014038086, "learning_rate": 4.843381053152171e-05, "loss": 0.4091, "step": 3721 }, { "epoch": 0.6061148882465497, "grad_norm": 0.3640923500061035, "learning_rate": 4.843215981497976e-05, "loss": 0.3768, "step": 3722 }, { "epoch": 0.6062777348043805, "grad_norm": 0.3016793131828308, "learning_rate": 4.843050825715306e-05, "loss": 0.3747, "step": 3723 }, { "epoch": 0.6064405813622115, "grad_norm": 0.34091752767562866, "learning_rate": 4.842885585810092e-05, "loss": 0.3571, "step": 3724 }, { "epoch": 0.6066034279200423, "grad_norm": 0.41215288639068604, "learning_rate": 4.842720261788265e-05, "loss": 0.4077, "step": 3725 }, { "epoch": 0.6067662744778732, "grad_norm": 0.3400474190711975, "learning_rate": 4.8425548536557625e-05, "loss": 0.3545, "step": 3726 }, { "epoch": 0.6069291210357041, "grad_norm": 0.3275359869003296, "learning_rate": 4.842389361418522e-05, "loss": 0.3528, "step": 3727 }, { "epoch": 0.607091967593535, "grad_norm": 0.3298853039741516, "learning_rate": 4.8422237850824856e-05, "loss": 0.3763, "step": 3728 }, { "epoch": 0.6072548141513658, "grad_norm": 0.39187437295913696, "learning_rate": 4.8420581246535976e-05, "loss": 0.4049, "step": 3729 }, { "epoch": 0.6074176607091968, "grad_norm": 0.3539641499519348, "learning_rate": 4.841892380137806e-05, "loss": 0.3709, "step": 3730 }, { "epoch": 0.6075805072670276, "grad_norm": 0.3771631121635437, "learning_rate": 4.8417265515410616e-05, "loss": 0.3678, "step": 3731 }, { "epoch": 0.6077433538248586, "grad_norm": 0.3685804605484009, "learning_rate": 4.841560638869318e-05, "loss": 0.3839, "step": 3732 }, { "epoch": 0.6079062003826894, "grad_norm": 0.3732113838195801, "learning_rate": 4.841394642128532e-05, "loss": 0.3748, "step": 3733 }, { "epoch": 0.6080690469405203, "grad_norm": 0.40523776412010193, "learning_rate": 4.841228561324663e-05, "loss": 0.3745, "step": 3734 }, { "epoch": 0.6082318934983512, "grad_norm": 0.3907490372657776, "learning_rate": 4.8410623964636746e-05, "loss": 0.3843, "step": 3735 }, { "epoch": 0.6083947400561821, "grad_norm": 0.4219784140586853, "learning_rate": 4.840896147551532e-05, "loss": 0.3944, "step": 3736 }, { "epoch": 0.6085575866140129, "grad_norm": 0.3140527009963989, "learning_rate": 4.840729814594204e-05, "loss": 0.3847, "step": 3737 }, { "epoch": 0.6087204331718439, "grad_norm": 0.3706476092338562, "learning_rate": 4.840563397597663e-05, "loss": 0.3956, "step": 3738 }, { "epoch": 0.6088832797296747, "grad_norm": 0.3710762560367584, "learning_rate": 4.8403968965678836e-05, "loss": 0.3528, "step": 3739 }, { "epoch": 0.6090461262875057, "grad_norm": 0.35822346806526184, "learning_rate": 4.840230311510843e-05, "loss": 0.347, "step": 3740 }, { "epoch": 0.6092089728453365, "grad_norm": 0.3637321889400482, "learning_rate": 4.840063642432523e-05, "loss": 0.3707, "step": 3741 }, { "epoch": 0.6093718194031673, "grad_norm": 0.36459973454475403, "learning_rate": 4.8398968893389076e-05, "loss": 0.39, "step": 3742 }, { "epoch": 0.6095346659609983, "grad_norm": 0.3554629981517792, "learning_rate": 4.839730052235982e-05, "loss": 0.3978, "step": 3743 }, { "epoch": 0.6096975125188291, "grad_norm": 0.34918317198753357, "learning_rate": 4.839563131129739e-05, "loss": 0.373, "step": 3744 }, { "epoch": 0.60986035907666, "grad_norm": 0.4267405569553375, "learning_rate": 4.839396126026169e-05, "loss": 0.399, "step": 3745 }, { "epoch": 0.6100232056344909, "grad_norm": 0.3155851364135742, "learning_rate": 4.83922903693127e-05, "loss": 0.3689, "step": 3746 }, { "epoch": 0.6101860521923218, "grad_norm": 0.445349782705307, "learning_rate": 4.8390618638510396e-05, "loss": 0.3564, "step": 3747 }, { "epoch": 0.6103488987501526, "grad_norm": 0.37579572200775146, "learning_rate": 4.838894606791481e-05, "loss": 0.3701, "step": 3748 }, { "epoch": 0.6105117453079836, "grad_norm": 0.35145285725593567, "learning_rate": 4.838727265758597e-05, "loss": 0.3774, "step": 3749 }, { "epoch": 0.6106745918658144, "grad_norm": 0.36075031757354736, "learning_rate": 4.838559840758398e-05, "loss": 0.3326, "step": 3750 }, { "epoch": 0.6108374384236454, "grad_norm": 0.4041690230369568, "learning_rate": 4.8383923317968944e-05, "loss": 0.3441, "step": 3751 }, { "epoch": 0.6110002849814762, "grad_norm": 0.37363773584365845, "learning_rate": 4.8382247388801e-05, "loss": 0.4035, "step": 3752 }, { "epoch": 0.6111631315393071, "grad_norm": 0.39723914861679077, "learning_rate": 4.838057062014032e-05, "loss": 0.413, "step": 3753 }, { "epoch": 0.611325978097138, "grad_norm": 0.32419857382774353, "learning_rate": 4.83788930120471e-05, "loss": 0.3546, "step": 3754 }, { "epoch": 0.6114888246549689, "grad_norm": 0.40028202533721924, "learning_rate": 4.837721456458158e-05, "loss": 0.4166, "step": 3755 }, { "epoch": 0.6116516712127997, "grad_norm": 0.343247652053833, "learning_rate": 4.837553527780402e-05, "loss": 0.3423, "step": 3756 }, { "epoch": 0.6118145177706307, "grad_norm": 0.318774551153183, "learning_rate": 4.83738551517747e-05, "loss": 0.3453, "step": 3757 }, { "epoch": 0.6119773643284615, "grad_norm": 0.34453505277633667, "learning_rate": 4.837217418655395e-05, "loss": 0.3599, "step": 3758 }, { "epoch": 0.6121402108862924, "grad_norm": 0.33336392045021057, "learning_rate": 4.837049238220213e-05, "loss": 0.3444, "step": 3759 }, { "epoch": 0.6123030574441233, "grad_norm": 0.3163773715496063, "learning_rate": 4.836880973877961e-05, "loss": 0.3393, "step": 3760 }, { "epoch": 0.6124659040019541, "grad_norm": 0.3218268156051636, "learning_rate": 4.836712625634681e-05, "loss": 0.3236, "step": 3761 }, { "epoch": 0.612628750559785, "grad_norm": 0.3368180990219116, "learning_rate": 4.8365441934964164e-05, "loss": 0.3731, "step": 3762 }, { "epoch": 0.6127915971176159, "grad_norm": 0.4013730585575104, "learning_rate": 4.8363756774692145e-05, "loss": 0.3821, "step": 3763 }, { "epoch": 0.6129544436754468, "grad_norm": 0.4532681405544281, "learning_rate": 4.8362070775591255e-05, "loss": 0.4124, "step": 3764 }, { "epoch": 0.6131172902332777, "grad_norm": 0.3132364749908447, "learning_rate": 4.836038393772204e-05, "loss": 0.3871, "step": 3765 }, { "epoch": 0.6132801367911086, "grad_norm": 0.38971540331840515, "learning_rate": 4.835869626114504e-05, "loss": 0.3765, "step": 3766 }, { "epoch": 0.6134429833489394, "grad_norm": 0.3394448459148407, "learning_rate": 4.8357007745920866e-05, "loss": 0.3503, "step": 3767 }, { "epoch": 0.6136058299067704, "grad_norm": 0.3451499938964844, "learning_rate": 4.8355318392110135e-05, "loss": 0.3445, "step": 3768 }, { "epoch": 0.6137686764646012, "grad_norm": 0.43234026432037354, "learning_rate": 4.835362819977351e-05, "loss": 0.4296, "step": 3769 }, { "epoch": 0.6139315230224321, "grad_norm": 0.36923474073410034, "learning_rate": 4.835193716897164e-05, "loss": 0.3762, "step": 3770 }, { "epoch": 0.614094369580263, "grad_norm": 0.366702675819397, "learning_rate": 4.835024529976528e-05, "loss": 0.3718, "step": 3771 }, { "epoch": 0.6142572161380939, "grad_norm": 0.3103436827659607, "learning_rate": 4.834855259221515e-05, "loss": 0.3605, "step": 3772 }, { "epoch": 0.6144200626959248, "grad_norm": 0.3546149432659149, "learning_rate": 4.834685904638202e-05, "loss": 0.3534, "step": 3773 }, { "epoch": 0.6145829092537557, "grad_norm": 0.3940007984638214, "learning_rate": 4.8345164662326714e-05, "loss": 0.3981, "step": 3774 }, { "epoch": 0.6147457558115865, "grad_norm": 0.4043726921081543, "learning_rate": 4.834346944011004e-05, "loss": 0.401, "step": 3775 }, { "epoch": 0.6149086023694175, "grad_norm": 0.3538324534893036, "learning_rate": 4.834177337979288e-05, "loss": 0.3761, "step": 3776 }, { "epoch": 0.6150714489272483, "grad_norm": 0.3463194966316223, "learning_rate": 4.834007648143613e-05, "loss": 0.3802, "step": 3777 }, { "epoch": 0.6152342954850791, "grad_norm": 0.4027412533760071, "learning_rate": 4.8338378745100695e-05, "loss": 0.3886, "step": 3778 }, { "epoch": 0.6153971420429101, "grad_norm": 0.45152878761291504, "learning_rate": 4.8336680170847534e-05, "loss": 0.3702, "step": 3779 }, { "epoch": 0.6155599886007409, "grad_norm": 0.40933671593666077, "learning_rate": 4.8334980758737646e-05, "loss": 0.4054, "step": 3780 }, { "epoch": 0.6157228351585718, "grad_norm": 0.29851675033569336, "learning_rate": 4.8333280508832036e-05, "loss": 0.3658, "step": 3781 }, { "epoch": 0.6158856817164027, "grad_norm": 0.3710566759109497, "learning_rate": 4.833157942119175e-05, "loss": 0.3675, "step": 3782 }, { "epoch": 0.6160485282742336, "grad_norm": 0.36776813864707947, "learning_rate": 4.832987749587785e-05, "loss": 0.3918, "step": 3783 }, { "epoch": 0.6162113748320645, "grad_norm": 0.3637010455131531, "learning_rate": 4.832817473295146e-05, "loss": 0.3352, "step": 3784 }, { "epoch": 0.6163742213898954, "grad_norm": 0.35087716579437256, "learning_rate": 4.8326471132473696e-05, "loss": 0.3895, "step": 3785 }, { "epoch": 0.6165370679477262, "grad_norm": 0.385743647813797, "learning_rate": 4.832476669450573e-05, "loss": 0.3533, "step": 3786 }, { "epoch": 0.6166999145055572, "grad_norm": 0.3464229702949524, "learning_rate": 4.832306141910876e-05, "loss": 0.3633, "step": 3787 }, { "epoch": 0.616862761063388, "grad_norm": 0.33906495571136475, "learning_rate": 4.8321355306344005e-05, "loss": 0.3607, "step": 3788 }, { "epoch": 0.6170256076212189, "grad_norm": 0.3321878910064697, "learning_rate": 4.8319648356272725e-05, "loss": 0.3751, "step": 3789 }, { "epoch": 0.6171884541790498, "grad_norm": 0.3765535056591034, "learning_rate": 4.83179405689562e-05, "loss": 0.4093, "step": 3790 }, { "epoch": 0.6173513007368807, "grad_norm": 0.4029456079006195, "learning_rate": 4.831623194445575e-05, "loss": 0.4023, "step": 3791 }, { "epoch": 0.6175141472947115, "grad_norm": 0.2895609438419342, "learning_rate": 4.831452248283271e-05, "loss": 0.3332, "step": 3792 }, { "epoch": 0.6176769938525425, "grad_norm": 0.35757678747177124, "learning_rate": 4.831281218414846e-05, "loss": 0.3616, "step": 3793 }, { "epoch": 0.6178398404103733, "grad_norm": 0.39688757061958313, "learning_rate": 4.8311101048464415e-05, "loss": 0.3769, "step": 3794 }, { "epoch": 0.6180026869682043, "grad_norm": 0.3590446412563324, "learning_rate": 4.830938907584199e-05, "loss": 0.3493, "step": 3795 }, { "epoch": 0.6181655335260351, "grad_norm": 0.292241632938385, "learning_rate": 4.830767626634266e-05, "loss": 0.3339, "step": 3796 }, { "epoch": 0.6183283800838659, "grad_norm": 0.33639848232269287, "learning_rate": 4.8305962620027925e-05, "loss": 0.3497, "step": 3797 }, { "epoch": 0.6184912266416969, "grad_norm": 0.37384575605392456, "learning_rate": 4.830424813695931e-05, "loss": 0.3909, "step": 3798 }, { "epoch": 0.6186540731995277, "grad_norm": 0.3199601173400879, "learning_rate": 4.8302532817198354e-05, "loss": 0.3574, "step": 3799 }, { "epoch": 0.6188169197573586, "grad_norm": 0.33252638578414917, "learning_rate": 4.830081666080666e-05, "loss": 0.3498, "step": 3800 }, { "epoch": 0.6189797663151895, "grad_norm": 0.43476030230522156, "learning_rate": 4.8299099667845835e-05, "loss": 0.4543, "step": 3801 }, { "epoch": 0.6191426128730204, "grad_norm": 0.34926968812942505, "learning_rate": 4.829738183837753e-05, "loss": 0.3999, "step": 3802 }, { "epoch": 0.6193054594308512, "grad_norm": 0.3261038661003113, "learning_rate": 4.829566317246342e-05, "loss": 0.3559, "step": 3803 }, { "epoch": 0.6194683059886822, "grad_norm": 0.36996257305145264, "learning_rate": 4.8293943670165196e-05, "loss": 0.368, "step": 3804 }, { "epoch": 0.619631152546513, "grad_norm": 0.3676151931285858, "learning_rate": 4.829222333154461e-05, "loss": 0.4228, "step": 3805 }, { "epoch": 0.619793999104344, "grad_norm": 0.3573971390724182, "learning_rate": 4.829050215666342e-05, "loss": 0.3323, "step": 3806 }, { "epoch": 0.6199568456621748, "grad_norm": 0.32971200346946716, "learning_rate": 4.8288780145583425e-05, "loss": 0.3239, "step": 3807 }, { "epoch": 0.6201196922200057, "grad_norm": 0.3667144179344177, "learning_rate": 4.828705729836644e-05, "loss": 0.3633, "step": 3808 }, { "epoch": 0.6202825387778366, "grad_norm": 0.35127270221710205, "learning_rate": 4.828533361507434e-05, "loss": 0.3802, "step": 3809 }, { "epoch": 0.6204453853356675, "grad_norm": 0.3273784816265106, "learning_rate": 4.8283609095768985e-05, "loss": 0.3763, "step": 3810 }, { "epoch": 0.6206082318934983, "grad_norm": 0.36520057916641235, "learning_rate": 4.828188374051231e-05, "loss": 0.372, "step": 3811 }, { "epoch": 0.6207710784513293, "grad_norm": 0.3327046036720276, "learning_rate": 4.828015754936625e-05, "loss": 0.3741, "step": 3812 }, { "epoch": 0.6209339250091601, "grad_norm": 0.3135378956794739, "learning_rate": 4.8278430522392795e-05, "loss": 0.3367, "step": 3813 }, { "epoch": 0.6210967715669911, "grad_norm": 0.3597366213798523, "learning_rate": 4.827670265965393e-05, "loss": 0.3535, "step": 3814 }, { "epoch": 0.6212596181248219, "grad_norm": 0.32876813411712646, "learning_rate": 4.827497396121171e-05, "loss": 0.3312, "step": 3815 }, { "epoch": 0.6214224646826527, "grad_norm": 0.39447081089019775, "learning_rate": 4.8273244427128186e-05, "loss": 0.4108, "step": 3816 }, { "epoch": 0.6215853112404837, "grad_norm": 0.33884215354919434, "learning_rate": 4.827151405746545e-05, "loss": 0.3869, "step": 3817 }, { "epoch": 0.6217481577983145, "grad_norm": 0.2916332185268402, "learning_rate": 4.826978285228565e-05, "loss": 0.3423, "step": 3818 }, { "epoch": 0.6219110043561454, "grad_norm": 0.37642133235931396, "learning_rate": 4.826805081165092e-05, "loss": 0.4302, "step": 3819 }, { "epoch": 0.6220738509139763, "grad_norm": 0.3702017366886139, "learning_rate": 4.826631793562345e-05, "loss": 0.3733, "step": 3820 }, { "epoch": 0.6222366974718072, "grad_norm": 0.2940879166126251, "learning_rate": 4.8264584224265466e-05, "loss": 0.3333, "step": 3821 }, { "epoch": 0.622399544029638, "grad_norm": 0.29194116592407227, "learning_rate": 4.826284967763921e-05, "loss": 0.3313, "step": 3822 }, { "epoch": 0.622562390587469, "grad_norm": 0.32419052720069885, "learning_rate": 4.826111429580694e-05, "loss": 0.3703, "step": 3823 }, { "epoch": 0.6227252371452998, "grad_norm": 0.38301417231559753, "learning_rate": 4.8259378078830985e-05, "loss": 0.3928, "step": 3824 }, { "epoch": 0.6228880837031308, "grad_norm": 0.30746063590049744, "learning_rate": 4.8257641026773664e-05, "loss": 0.3498, "step": 3825 }, { "epoch": 0.6230509302609616, "grad_norm": 0.33556458353996277, "learning_rate": 4.825590313969735e-05, "loss": 0.3863, "step": 3826 }, { "epoch": 0.6232137768187925, "grad_norm": 0.29514601826667786, "learning_rate": 4.8254164417664436e-05, "loss": 0.3637, "step": 3827 }, { "epoch": 0.6233766233766234, "grad_norm": 0.32703590393066406, "learning_rate": 4.825242486073735e-05, "loss": 0.3458, "step": 3828 }, { "epoch": 0.6235394699344543, "grad_norm": 0.3909912407398224, "learning_rate": 4.8250684468978546e-05, "loss": 0.4068, "step": 3829 }, { "epoch": 0.6237023164922851, "grad_norm": 0.3843972384929657, "learning_rate": 4.8248943242450505e-05, "loss": 0.4303, "step": 3830 }, { "epoch": 0.6238651630501161, "grad_norm": 0.284837543964386, "learning_rate": 4.824720118121575e-05, "loss": 0.3318, "step": 3831 }, { "epoch": 0.6240280096079469, "grad_norm": 0.3797878623008728, "learning_rate": 4.824545828533682e-05, "loss": 0.3927, "step": 3832 }, { "epoch": 0.6241908561657777, "grad_norm": 0.3045872747898102, "learning_rate": 4.824371455487629e-05, "loss": 0.361, "step": 3833 }, { "epoch": 0.6243537027236087, "grad_norm": 0.29744666814804077, "learning_rate": 4.8241969989896775e-05, "loss": 0.3435, "step": 3834 }, { "epoch": 0.6245165492814395, "grad_norm": 0.3166063129901886, "learning_rate": 4.824022459046089e-05, "loss": 0.3523, "step": 3835 }, { "epoch": 0.6246793958392705, "grad_norm": 0.3854144513607025, "learning_rate": 4.8238478356631326e-05, "loss": 0.3946, "step": 3836 }, { "epoch": 0.6248422423971013, "grad_norm": 0.32535281777381897, "learning_rate": 4.823673128847076e-05, "loss": 0.3549, "step": 3837 }, { "epoch": 0.6250050889549322, "grad_norm": 0.3992176353931427, "learning_rate": 4.8234983386041915e-05, "loss": 0.3939, "step": 3838 }, { "epoch": 0.6251679355127631, "grad_norm": 0.4053478240966797, "learning_rate": 4.823323464940756e-05, "loss": 0.383, "step": 3839 }, { "epoch": 0.625330782070594, "grad_norm": 0.27729085087776184, "learning_rate": 4.823148507863047e-05, "loss": 0.3357, "step": 3840 }, { "epoch": 0.6254936286284248, "grad_norm": 0.35839882493019104, "learning_rate": 4.8229734673773466e-05, "loss": 0.3981, "step": 3841 }, { "epoch": 0.6256564751862558, "grad_norm": 0.3806845247745514, "learning_rate": 4.822798343489938e-05, "loss": 0.3896, "step": 3842 }, { "epoch": 0.6258193217440866, "grad_norm": 0.3806055188179016, "learning_rate": 4.8226231362071105e-05, "loss": 0.3473, "step": 3843 }, { "epoch": 0.6259821683019176, "grad_norm": 0.43016162514686584, "learning_rate": 4.822447845535153e-05, "loss": 0.4227, "step": 3844 }, { "epoch": 0.6261450148597484, "grad_norm": 0.32890066504478455, "learning_rate": 4.8222724714803603e-05, "loss": 0.3973, "step": 3845 }, { "epoch": 0.6263078614175793, "grad_norm": 0.3965762257575989, "learning_rate": 4.8220970140490276e-05, "loss": 0.4164, "step": 3846 }, { "epoch": 0.6264707079754102, "grad_norm": 0.39428940415382385, "learning_rate": 4.821921473247455e-05, "loss": 0.4173, "step": 3847 }, { "epoch": 0.6266335545332411, "grad_norm": 0.3613874316215515, "learning_rate": 4.821745849081945e-05, "loss": 0.4195, "step": 3848 }, { "epoch": 0.6267964010910719, "grad_norm": 0.3633114695549011, "learning_rate": 4.821570141558803e-05, "loss": 0.3566, "step": 3849 }, { "epoch": 0.6269592476489029, "grad_norm": 0.4743572771549225, "learning_rate": 4.8213943506843374e-05, "loss": 0.4011, "step": 3850 }, { "epoch": 0.6271220942067337, "grad_norm": 0.31061553955078125, "learning_rate": 4.821218476464859e-05, "loss": 0.3895, "step": 3851 }, { "epoch": 0.6272849407645645, "grad_norm": 0.35438722372055054, "learning_rate": 4.821042518906683e-05, "loss": 0.3967, "step": 3852 }, { "epoch": 0.6274477873223955, "grad_norm": 0.34205392003059387, "learning_rate": 4.820866478016126e-05, "loss": 0.3661, "step": 3853 }, { "epoch": 0.6276106338802263, "grad_norm": 0.3120892643928528, "learning_rate": 4.820690353799509e-05, "loss": 0.3388, "step": 3854 }, { "epoch": 0.6277734804380573, "grad_norm": 0.35443729162216187, "learning_rate": 4.820514146263156e-05, "loss": 0.3485, "step": 3855 }, { "epoch": 0.6279363269958881, "grad_norm": 0.36348965764045715, "learning_rate": 4.8203378554133934e-05, "loss": 0.3635, "step": 3856 }, { "epoch": 0.628099173553719, "grad_norm": 0.3797495663166046, "learning_rate": 4.820161481256549e-05, "loss": 0.395, "step": 3857 }, { "epoch": 0.6282620201115499, "grad_norm": 0.34312376379966736, "learning_rate": 4.8199850237989555e-05, "loss": 0.3849, "step": 3858 }, { "epoch": 0.6284248666693808, "grad_norm": 0.2934499979019165, "learning_rate": 4.8198084830469494e-05, "loss": 0.354, "step": 3859 }, { "epoch": 0.6285877132272116, "grad_norm": 0.37859106063842773, "learning_rate": 4.8196318590068685e-05, "loss": 0.3662, "step": 3860 }, { "epoch": 0.6287505597850426, "grad_norm": 0.3812752068042755, "learning_rate": 4.819455151685054e-05, "loss": 0.3965, "step": 3861 }, { "epoch": 0.6289134063428734, "grad_norm": 0.34902653098106384, "learning_rate": 4.81927836108785e-05, "loss": 0.3662, "step": 3862 }, { "epoch": 0.6290762529007043, "grad_norm": 0.3882738947868347, "learning_rate": 4.819101487221605e-05, "loss": 0.3851, "step": 3863 }, { "epoch": 0.6292390994585352, "grad_norm": 0.33411478996276855, "learning_rate": 4.818924530092667e-05, "loss": 0.3521, "step": 3864 }, { "epoch": 0.6294019460163661, "grad_norm": 0.43336018919944763, "learning_rate": 4.8187474897073914e-05, "loss": 0.3807, "step": 3865 }, { "epoch": 0.629564792574197, "grad_norm": 0.37240034341812134, "learning_rate": 4.818570366072134e-05, "loss": 0.4086, "step": 3866 }, { "epoch": 0.6297276391320279, "grad_norm": 0.3398652672767639, "learning_rate": 4.818393159193253e-05, "loss": 0.3982, "step": 3867 }, { "epoch": 0.6298904856898587, "grad_norm": 0.35034775733947754, "learning_rate": 4.8182158690771116e-05, "loss": 0.3915, "step": 3868 }, { "epoch": 0.6300533322476897, "grad_norm": 0.3579016327857971, "learning_rate": 4.818038495730076e-05, "loss": 0.3802, "step": 3869 }, { "epoch": 0.6302161788055205, "grad_norm": 0.4189845323562622, "learning_rate": 4.817861039158513e-05, "loss": 0.3771, "step": 3870 }, { "epoch": 0.6303790253633513, "grad_norm": 0.3813154697418213, "learning_rate": 4.8176834993687936e-05, "loss": 0.3984, "step": 3871 }, { "epoch": 0.6305418719211823, "grad_norm": 0.35133299231529236, "learning_rate": 4.817505876367293e-05, "loss": 0.349, "step": 3872 }, { "epoch": 0.6307047184790131, "grad_norm": 0.3509170114994049, "learning_rate": 4.817328170160388e-05, "loss": 0.3909, "step": 3873 }, { "epoch": 0.630867565036844, "grad_norm": 0.3558672070503235, "learning_rate": 4.817150380754458e-05, "loss": 0.4036, "step": 3874 }, { "epoch": 0.6310304115946749, "grad_norm": 0.31488215923309326, "learning_rate": 4.816972508155888e-05, "loss": 0.3616, "step": 3875 }, { "epoch": 0.6311932581525058, "grad_norm": 0.36540472507476807, "learning_rate": 4.816794552371063e-05, "loss": 0.3673, "step": 3876 }, { "epoch": 0.6313561047103367, "grad_norm": 0.3355773687362671, "learning_rate": 4.8166165134063724e-05, "loss": 0.3664, "step": 3877 }, { "epoch": 0.6315189512681676, "grad_norm": 0.41253185272216797, "learning_rate": 4.8164383912682076e-05, "loss": 0.3785, "step": 3878 }, { "epoch": 0.6316817978259984, "grad_norm": 0.3047676980495453, "learning_rate": 4.816260185962965e-05, "loss": 0.3339, "step": 3879 }, { "epoch": 0.6318446443838294, "grad_norm": 0.40842586755752563, "learning_rate": 4.8160818974970414e-05, "loss": 0.386, "step": 3880 }, { "epoch": 0.6320074909416602, "grad_norm": 0.4220575988292694, "learning_rate": 4.815903525876839e-05, "loss": 0.3833, "step": 3881 }, { "epoch": 0.6321703374994911, "grad_norm": 0.34200388193130493, "learning_rate": 4.815725071108761e-05, "loss": 0.3293, "step": 3882 }, { "epoch": 0.632333184057322, "grad_norm": 0.3369062542915344, "learning_rate": 4.815546533199215e-05, "loss": 0.3945, "step": 3883 }, { "epoch": 0.6324960306151529, "grad_norm": 0.36580270528793335, "learning_rate": 4.8153679121546106e-05, "loss": 0.3665, "step": 3884 }, { "epoch": 0.6326588771729837, "grad_norm": 0.3849620819091797, "learning_rate": 4.815189207981362e-05, "loss": 0.4105, "step": 3885 }, { "epoch": 0.6328217237308147, "grad_norm": 0.3479671776294708, "learning_rate": 4.815010420685884e-05, "loss": 0.3325, "step": 3886 }, { "epoch": 0.6329845702886455, "grad_norm": 0.35427209734916687, "learning_rate": 4.814831550274595e-05, "loss": 0.3721, "step": 3887 }, { "epoch": 0.6331474168464765, "grad_norm": 0.3647332489490509, "learning_rate": 4.814652596753919e-05, "loss": 0.3582, "step": 3888 }, { "epoch": 0.6333102634043073, "grad_norm": 0.3454517126083374, "learning_rate": 4.814473560130279e-05, "loss": 0.372, "step": 3889 }, { "epoch": 0.6334731099621381, "grad_norm": 0.3337930738925934, "learning_rate": 4.8142944404101045e-05, "loss": 0.3772, "step": 3890 }, { "epoch": 0.6336359565199691, "grad_norm": 0.41681909561157227, "learning_rate": 4.814115237599826e-05, "loss": 0.3823, "step": 3891 }, { "epoch": 0.6337988030777999, "grad_norm": 0.35870054364204407, "learning_rate": 4.813935951705877e-05, "loss": 0.4012, "step": 3892 }, { "epoch": 0.6339616496356308, "grad_norm": 0.3930753767490387, "learning_rate": 4.813756582734694e-05, "loss": 0.3448, "step": 3893 }, { "epoch": 0.6341244961934617, "grad_norm": 0.4422408938407898, "learning_rate": 4.813577130692718e-05, "loss": 0.3645, "step": 3894 }, { "epoch": 0.6342873427512926, "grad_norm": 0.4693898856639862, "learning_rate": 4.8133975955863916e-05, "loss": 0.4363, "step": 3895 }, { "epoch": 0.6344501893091234, "grad_norm": 0.3603067100048065, "learning_rate": 4.81321797742216e-05, "loss": 0.372, "step": 3896 }, { "epoch": 0.6346130358669544, "grad_norm": 0.38419046998023987, "learning_rate": 4.813038276206472e-05, "loss": 0.3743, "step": 3897 }, { "epoch": 0.6347758824247852, "grad_norm": 0.3467629551887512, "learning_rate": 4.812858491945781e-05, "loss": 0.4003, "step": 3898 }, { "epoch": 0.6349387289826162, "grad_norm": 0.4281567633152008, "learning_rate": 4.8126786246465395e-05, "loss": 0.4163, "step": 3899 }, { "epoch": 0.635101575540447, "grad_norm": 0.40839725732803345, "learning_rate": 4.812498674315207e-05, "loss": 0.4041, "step": 3900 }, { "epoch": 0.6352644220982779, "grad_norm": 0.3539751470088959, "learning_rate": 4.812318640958243e-05, "loss": 0.3828, "step": 3901 }, { "epoch": 0.6354272686561088, "grad_norm": 0.317685067653656, "learning_rate": 4.812138524582113e-05, "loss": 0.3075, "step": 3902 }, { "epoch": 0.6355901152139397, "grad_norm": 0.4919357895851135, "learning_rate": 4.811958325193282e-05, "loss": 0.3743, "step": 3903 }, { "epoch": 0.6357529617717705, "grad_norm": 0.5760248303413391, "learning_rate": 4.81177804279822e-05, "loss": 0.4189, "step": 3904 }, { "epoch": 0.6359158083296015, "grad_norm": 0.3396369516849518, "learning_rate": 4.811597677403401e-05, "loss": 0.3827, "step": 3905 }, { "epoch": 0.6360786548874323, "grad_norm": 0.3858201503753662, "learning_rate": 4.8114172290152984e-05, "loss": 0.4069, "step": 3906 }, { "epoch": 0.6362415014452631, "grad_norm": 0.39777085185050964, "learning_rate": 4.8112366976403925e-05, "loss": 0.3825, "step": 3907 }, { "epoch": 0.6364043480030941, "grad_norm": 0.3311343193054199, "learning_rate": 4.811056083285165e-05, "loss": 0.3577, "step": 3908 }, { "epoch": 0.6365671945609249, "grad_norm": 0.31668177247047424, "learning_rate": 4.810875385956099e-05, "loss": 0.3221, "step": 3909 }, { "epoch": 0.6367300411187559, "grad_norm": 0.3271620273590088, "learning_rate": 4.810694605659685e-05, "loss": 0.3696, "step": 3910 }, { "epoch": 0.6368928876765867, "grad_norm": 0.32495325803756714, "learning_rate": 4.81051374240241e-05, "loss": 0.3575, "step": 3911 }, { "epoch": 0.6370557342344176, "grad_norm": 0.4624270498752594, "learning_rate": 4.8103327961907696e-05, "loss": 0.4142, "step": 3912 }, { "epoch": 0.6372185807922485, "grad_norm": 0.351484090089798, "learning_rate": 4.8101517670312594e-05, "loss": 0.4052, "step": 3913 }, { "epoch": 0.6373814273500794, "grad_norm": 0.34839290380477905, "learning_rate": 4.80997065493038e-05, "loss": 0.3753, "step": 3914 }, { "epoch": 0.6375442739079102, "grad_norm": 0.3109421133995056, "learning_rate": 4.809789459894633e-05, "loss": 0.3674, "step": 3915 }, { "epoch": 0.6377071204657412, "grad_norm": 0.35376232862472534, "learning_rate": 4.809608181930524e-05, "loss": 0.3893, "step": 3916 }, { "epoch": 0.637869967023572, "grad_norm": 0.30551788210868835, "learning_rate": 4.809426821044562e-05, "loss": 0.3655, "step": 3917 }, { "epoch": 0.638032813581403, "grad_norm": 0.3530201017856598, "learning_rate": 4.809245377243257e-05, "loss": 0.3477, "step": 3918 }, { "epoch": 0.6381956601392338, "grad_norm": 0.3032386898994446, "learning_rate": 4.809063850533125e-05, "loss": 0.3638, "step": 3919 }, { "epoch": 0.6383585066970647, "grad_norm": 0.3210413455963135, "learning_rate": 4.808882240920683e-05, "loss": 0.3735, "step": 3920 }, { "epoch": 0.6385213532548956, "grad_norm": 0.4232858419418335, "learning_rate": 4.8087005484124496e-05, "loss": 0.3956, "step": 3921 }, { "epoch": 0.6386841998127265, "grad_norm": 0.3171500563621521, "learning_rate": 4.8085187730149505e-05, "loss": 0.3577, "step": 3922 }, { "epoch": 0.6388470463705573, "grad_norm": 0.34611281752586365, "learning_rate": 4.80833691473471e-05, "loss": 0.4138, "step": 3923 }, { "epoch": 0.6390098929283883, "grad_norm": 0.3686622679233551, "learning_rate": 4.8081549735782594e-05, "loss": 0.3565, "step": 3924 }, { "epoch": 0.6391727394862191, "grad_norm": 0.4192856550216675, "learning_rate": 4.80797294955213e-05, "loss": 0.3807, "step": 3925 }, { "epoch": 0.6393355860440499, "grad_norm": 0.3515988886356354, "learning_rate": 4.807790842662856e-05, "loss": 0.3895, "step": 3926 }, { "epoch": 0.6394984326018809, "grad_norm": 0.3385526239871979, "learning_rate": 4.807608652916976e-05, "loss": 0.3965, "step": 3927 }, { "epoch": 0.6396612791597117, "grad_norm": 0.3994694650173187, "learning_rate": 4.807426380321033e-05, "loss": 0.3508, "step": 3928 }, { "epoch": 0.6398241257175427, "grad_norm": 0.3841162621974945, "learning_rate": 4.8072440248815684e-05, "loss": 0.3564, "step": 3929 }, { "epoch": 0.6399869722753735, "grad_norm": 0.3980065882205963, "learning_rate": 4.807061586605132e-05, "loss": 0.3964, "step": 3930 }, { "epoch": 0.6401498188332044, "grad_norm": 0.32413485646247864, "learning_rate": 4.806879065498271e-05, "loss": 0.3411, "step": 3931 }, { "epoch": 0.6403126653910353, "grad_norm": 0.4088596999645233, "learning_rate": 4.8066964615675405e-05, "loss": 0.4052, "step": 3932 }, { "epoch": 0.6404755119488662, "grad_norm": 0.3999430239200592, "learning_rate": 4.8065137748194966e-05, "loss": 0.354, "step": 3933 }, { "epoch": 0.640638358506697, "grad_norm": 0.40081071853637695, "learning_rate": 4.806331005260697e-05, "loss": 0.3513, "step": 3934 }, { "epoch": 0.640801205064528, "grad_norm": 0.37584081292152405, "learning_rate": 4.8061481528977044e-05, "loss": 0.3665, "step": 3935 }, { "epoch": 0.6409640516223588, "grad_norm": 0.3315086364746094, "learning_rate": 4.805965217737084e-05, "loss": 0.3543, "step": 3936 }, { "epoch": 0.6411268981801898, "grad_norm": 0.36898723244667053, "learning_rate": 4.805782199785403e-05, "loss": 0.388, "step": 3937 }, { "epoch": 0.6412897447380206, "grad_norm": 0.35820212960243225, "learning_rate": 4.805599099049233e-05, "loss": 0.3779, "step": 3938 }, { "epoch": 0.6414525912958515, "grad_norm": 0.3764117956161499, "learning_rate": 4.805415915535147e-05, "loss": 0.3686, "step": 3939 }, { "epoch": 0.6416154378536824, "grad_norm": 0.32163161039352417, "learning_rate": 4.8052326492497236e-05, "loss": 0.3463, "step": 3940 }, { "epoch": 0.6417782844115133, "grad_norm": 0.4027363657951355, "learning_rate": 4.805049300199541e-05, "loss": 0.3665, "step": 3941 }, { "epoch": 0.6419411309693441, "grad_norm": 0.3921201527118683, "learning_rate": 4.804865868391182e-05, "loss": 0.3961, "step": 3942 }, { "epoch": 0.6421039775271751, "grad_norm": 0.33232244849205017, "learning_rate": 4.804682353831233e-05, "loss": 0.372, "step": 3943 }, { "epoch": 0.6422668240850059, "grad_norm": 0.3451792895793915, "learning_rate": 4.8044987565262825e-05, "loss": 0.3685, "step": 3944 }, { "epoch": 0.6424296706428367, "grad_norm": 0.30081161856651306, "learning_rate": 4.8043150764829224e-05, "loss": 0.3457, "step": 3945 }, { "epoch": 0.6425925172006677, "grad_norm": 0.3501703143119812, "learning_rate": 4.804131313707747e-05, "loss": 0.4116, "step": 3946 }, { "epoch": 0.6427553637584985, "grad_norm": 0.3251813054084778, "learning_rate": 4.803947468207354e-05, "loss": 0.3782, "step": 3947 }, { "epoch": 0.6429182103163295, "grad_norm": 0.4688669741153717, "learning_rate": 4.803763539988344e-05, "loss": 0.4153, "step": 3948 }, { "epoch": 0.6430810568741603, "grad_norm": 0.3236391544342041, "learning_rate": 4.803579529057321e-05, "loss": 0.3538, "step": 3949 }, { "epoch": 0.6432439034319912, "grad_norm": 0.32377925515174866, "learning_rate": 4.803395435420891e-05, "loss": 0.3577, "step": 3950 }, { "epoch": 0.6434067499898221, "grad_norm": 0.32745227217674255, "learning_rate": 4.803211259085663e-05, "loss": 0.3711, "step": 3951 }, { "epoch": 0.643569596547653, "grad_norm": 0.36424025893211365, "learning_rate": 4.803027000058251e-05, "loss": 0.4027, "step": 3952 }, { "epoch": 0.6437324431054838, "grad_norm": 0.5087854266166687, "learning_rate": 4.802842658345269e-05, "loss": 0.4142, "step": 3953 }, { "epoch": 0.6438952896633148, "grad_norm": 0.32657840847969055, "learning_rate": 4.802658233953337e-05, "loss": 0.3398, "step": 3954 }, { "epoch": 0.6440581362211456, "grad_norm": 0.3750736117362976, "learning_rate": 4.8024737268890754e-05, "loss": 0.3649, "step": 3955 }, { "epoch": 0.6442209827789765, "grad_norm": 0.323543906211853, "learning_rate": 4.802289137159108e-05, "loss": 0.3777, "step": 3956 }, { "epoch": 0.6443838293368074, "grad_norm": 0.4618605673313141, "learning_rate": 4.802104464770063e-05, "loss": 0.3487, "step": 3957 }, { "epoch": 0.6445466758946383, "grad_norm": 0.39996662735939026, "learning_rate": 4.80191970972857e-05, "loss": 0.3484, "step": 3958 }, { "epoch": 0.6447095224524692, "grad_norm": 0.282396137714386, "learning_rate": 4.801734872041264e-05, "loss": 0.3446, "step": 3959 }, { "epoch": 0.6448723690103001, "grad_norm": 0.3800313472747803, "learning_rate": 4.8015499517147786e-05, "loss": 0.3553, "step": 3960 }, { "epoch": 0.6450352155681309, "grad_norm": 0.4448045492172241, "learning_rate": 4.801364948755754e-05, "loss": 0.3632, "step": 3961 }, { "epoch": 0.6451980621259618, "grad_norm": 0.28705450892448425, "learning_rate": 4.801179863170834e-05, "loss": 0.3781, "step": 3962 }, { "epoch": 0.6453609086837927, "grad_norm": 0.3621997833251953, "learning_rate": 4.800994694966662e-05, "loss": 0.3718, "step": 3963 }, { "epoch": 0.6455237552416235, "grad_norm": 0.30058184266090393, "learning_rate": 4.800809444149886e-05, "loss": 0.3596, "step": 3964 }, { "epoch": 0.6456866017994545, "grad_norm": 0.32409989833831787, "learning_rate": 4.800624110727158e-05, "loss": 0.3694, "step": 3965 }, { "epoch": 0.6458494483572853, "grad_norm": 0.3872927129268646, "learning_rate": 4.800438694705131e-05, "loss": 0.3907, "step": 3966 }, { "epoch": 0.6460122949151162, "grad_norm": 0.3547903299331665, "learning_rate": 4.8002531960904626e-05, "loss": 0.3425, "step": 3967 }, { "epoch": 0.6461751414729471, "grad_norm": 0.31716734170913696, "learning_rate": 4.800067614889813e-05, "loss": 0.3671, "step": 3968 }, { "epoch": 0.646337988030778, "grad_norm": 0.3570875823497772, "learning_rate": 4.7998819511098444e-05, "loss": 0.3797, "step": 3969 }, { "epoch": 0.6465008345886089, "grad_norm": 0.29803773760795593, "learning_rate": 4.799696204757224e-05, "loss": 0.3821, "step": 3970 }, { "epoch": 0.6466636811464398, "grad_norm": 0.3344045579433441, "learning_rate": 4.79951037583862e-05, "loss": 0.3745, "step": 3971 }, { "epoch": 0.6468265277042706, "grad_norm": 0.39944571256637573, "learning_rate": 4.799324464360703e-05, "loss": 0.4128, "step": 3972 }, { "epoch": 0.6469893742621016, "grad_norm": 0.2886591851711273, "learning_rate": 4.799138470330149e-05, "loss": 0.3169, "step": 3973 }, { "epoch": 0.6471522208199324, "grad_norm": 0.34423571825027466, "learning_rate": 4.798952393753635e-05, "loss": 0.399, "step": 3974 }, { "epoch": 0.6473150673777633, "grad_norm": 0.40710315108299255, "learning_rate": 4.7987662346378434e-05, "loss": 0.4275, "step": 3975 }, { "epoch": 0.6474779139355942, "grad_norm": 0.3363982141017914, "learning_rate": 4.7985799929894556e-05, "loss": 0.3591, "step": 3976 }, { "epoch": 0.6476407604934251, "grad_norm": 0.36368808150291443, "learning_rate": 4.79839366881516e-05, "loss": 0.3694, "step": 3977 }, { "epoch": 0.647803607051256, "grad_norm": 0.3765600025653839, "learning_rate": 4.7982072621216456e-05, "loss": 0.4044, "step": 3978 }, { "epoch": 0.6479664536090869, "grad_norm": 0.38475990295410156, "learning_rate": 4.7980207729156045e-05, "loss": 0.4105, "step": 3979 }, { "epoch": 0.6481293001669177, "grad_norm": 0.32025253772735596, "learning_rate": 4.797834201203733e-05, "loss": 0.3368, "step": 3980 }, { "epoch": 0.6482921467247486, "grad_norm": 0.32432299852371216, "learning_rate": 4.797647546992729e-05, "loss": 0.3602, "step": 3981 }, { "epoch": 0.6484549932825795, "grad_norm": 0.3646206259727478, "learning_rate": 4.7974608102892936e-05, "loss": 0.3534, "step": 3982 }, { "epoch": 0.6486178398404103, "grad_norm": 0.3526914417743683, "learning_rate": 4.797273991100133e-05, "loss": 0.3503, "step": 3983 }, { "epoch": 0.6487806863982413, "grad_norm": 0.3000272512435913, "learning_rate": 4.797087089431952e-05, "loss": 0.3595, "step": 3984 }, { "epoch": 0.6489435329560721, "grad_norm": 0.3388606905937195, "learning_rate": 4.796900105291463e-05, "loss": 0.3432, "step": 3985 }, { "epoch": 0.649106379513903, "grad_norm": 0.40035462379455566, "learning_rate": 4.796713038685378e-05, "loss": 0.3929, "step": 3986 }, { "epoch": 0.6492692260717339, "grad_norm": 0.31462520360946655, "learning_rate": 4.796525889620414e-05, "loss": 0.3779, "step": 3987 }, { "epoch": 0.6494320726295648, "grad_norm": 0.3165058195590973, "learning_rate": 4.79633865810329e-05, "loss": 0.3448, "step": 3988 }, { "epoch": 0.6495949191873956, "grad_norm": 0.34249448776245117, "learning_rate": 4.796151344140728e-05, "loss": 0.3835, "step": 3989 }, { "epoch": 0.6497577657452266, "grad_norm": 0.3961067795753479, "learning_rate": 4.795963947739453e-05, "loss": 0.4193, "step": 3990 }, { "epoch": 0.6499206123030574, "grad_norm": 0.3785518407821655, "learning_rate": 4.7957764689061945e-05, "loss": 0.3747, "step": 3991 }, { "epoch": 0.6500834588608884, "grad_norm": 0.3277905285358429, "learning_rate": 4.7955889076476825e-05, "loss": 0.4006, "step": 3992 }, { "epoch": 0.6502463054187192, "grad_norm": 0.296450138092041, "learning_rate": 4.79540126397065e-05, "loss": 0.3273, "step": 3993 }, { "epoch": 0.6504091519765501, "grad_norm": 0.31039750576019287, "learning_rate": 4.7952135378818356e-05, "loss": 0.3401, "step": 3994 }, { "epoch": 0.650571998534381, "grad_norm": 0.38633644580841064, "learning_rate": 4.795025729387977e-05, "loss": 0.3785, "step": 3995 }, { "epoch": 0.6507348450922119, "grad_norm": 0.3301464021205902, "learning_rate": 4.79483783849582e-05, "loss": 0.3803, "step": 3996 }, { "epoch": 0.6508976916500427, "grad_norm": 0.33524078130722046, "learning_rate": 4.7946498652121085e-05, "loss": 0.3473, "step": 3997 }, { "epoch": 0.6510605382078737, "grad_norm": 0.3776018023490906, "learning_rate": 4.7944618095435925e-05, "loss": 0.3412, "step": 3998 }, { "epoch": 0.6512233847657045, "grad_norm": 0.3709219992160797, "learning_rate": 4.7942736714970234e-05, "loss": 0.4016, "step": 3999 }, { "epoch": 0.6513862313235353, "grad_norm": 0.3474333882331848, "learning_rate": 4.794085451079155e-05, "loss": 0.3964, "step": 4000 }, { "epoch": 0.6515490778813663, "grad_norm": 0.3484940230846405, "learning_rate": 4.7938971482967457e-05, "loss": 0.3849, "step": 4001 }, { "epoch": 0.6517119244391971, "grad_norm": 0.35990798473358154, "learning_rate": 4.7937087631565556e-05, "loss": 0.3738, "step": 4002 }, { "epoch": 0.6518747709970281, "grad_norm": 0.37605857849121094, "learning_rate": 4.79352029566535e-05, "loss": 0.4313, "step": 4003 }, { "epoch": 0.6520376175548589, "grad_norm": 0.34093543887138367, "learning_rate": 4.793331745829893e-05, "loss": 0.3827, "step": 4004 }, { "epoch": 0.6522004641126898, "grad_norm": 0.30947452783584595, "learning_rate": 4.793143113656956e-05, "loss": 0.3488, "step": 4005 }, { "epoch": 0.6523633106705207, "grad_norm": 0.3360001742839813, "learning_rate": 4.792954399153311e-05, "loss": 0.3416, "step": 4006 }, { "epoch": 0.6525261572283516, "grad_norm": 0.2955411672592163, "learning_rate": 4.792765602325733e-05, "loss": 0.3331, "step": 4007 }, { "epoch": 0.6526890037861824, "grad_norm": 0.289423406124115, "learning_rate": 4.792576723181e-05, "loss": 0.3244, "step": 4008 }, { "epoch": 0.6528518503440134, "grad_norm": 0.34702688455581665, "learning_rate": 4.792387761725894e-05, "loss": 0.3817, "step": 4009 }, { "epoch": 0.6530146969018442, "grad_norm": 0.3026851713657379, "learning_rate": 4.7921987179672e-05, "loss": 0.3594, "step": 4010 }, { "epoch": 0.6531775434596752, "grad_norm": 0.39011523127555847, "learning_rate": 4.792009591911704e-05, "loss": 0.3652, "step": 4011 }, { "epoch": 0.653340390017506, "grad_norm": 0.3443703055381775, "learning_rate": 4.7918203835661956e-05, "loss": 0.3752, "step": 4012 }, { "epoch": 0.6535032365753369, "grad_norm": 0.3379608392715454, "learning_rate": 4.79163109293747e-05, "loss": 0.3961, "step": 4013 }, { "epoch": 0.6536660831331678, "grad_norm": 0.43831849098205566, "learning_rate": 4.791441720032323e-05, "loss": 0.4271, "step": 4014 }, { "epoch": 0.6538289296909987, "grad_norm": 0.3870636224746704, "learning_rate": 4.791252264857551e-05, "loss": 0.3339, "step": 4015 }, { "epoch": 0.6539917762488295, "grad_norm": 0.36397063732147217, "learning_rate": 4.791062727419959e-05, "loss": 0.4088, "step": 4016 }, { "epoch": 0.6541546228066605, "grad_norm": 0.35335487127304077, "learning_rate": 4.7908731077263515e-05, "loss": 0.4062, "step": 4017 }, { "epoch": 0.6543174693644913, "grad_norm": 0.3957574665546417, "learning_rate": 4.790683405783535e-05, "loss": 0.3638, "step": 4018 }, { "epoch": 0.6544803159223221, "grad_norm": 0.41770246624946594, "learning_rate": 4.790493621598321e-05, "loss": 0.4032, "step": 4019 }, { "epoch": 0.6546431624801531, "grad_norm": 0.36004993319511414, "learning_rate": 4.790303755177524e-05, "loss": 0.3604, "step": 4020 }, { "epoch": 0.6548060090379839, "grad_norm": 0.3146538734436035, "learning_rate": 4.7901138065279594e-05, "loss": 0.4001, "step": 4021 }, { "epoch": 0.6549688555958149, "grad_norm": 0.35895535349845886, "learning_rate": 4.789923775656449e-05, "loss": 0.385, "step": 4022 }, { "epoch": 0.6551317021536457, "grad_norm": 0.2935139238834381, "learning_rate": 4.7897336625698134e-05, "loss": 0.3264, "step": 4023 }, { "epoch": 0.6552945487114766, "grad_norm": 0.43361735343933105, "learning_rate": 4.78954346727488e-05, "loss": 0.3924, "step": 4024 }, { "epoch": 0.6554573952693075, "grad_norm": 0.34973663091659546, "learning_rate": 4.789353189778475e-05, "loss": 0.3544, "step": 4025 }, { "epoch": 0.6556202418271384, "grad_norm": 0.3083997368812561, "learning_rate": 4.7891628300874326e-05, "loss": 0.3546, "step": 4026 }, { "epoch": 0.6557830883849692, "grad_norm": 0.37951406836509705, "learning_rate": 4.788972388208586e-05, "loss": 0.3927, "step": 4027 }, { "epoch": 0.6559459349428002, "grad_norm": 0.4077611565589905, "learning_rate": 4.788781864148771e-05, "loss": 0.4268, "step": 4028 }, { "epoch": 0.656108781500631, "grad_norm": 0.3443884551525116, "learning_rate": 4.7885912579148316e-05, "loss": 0.3709, "step": 4029 }, { "epoch": 0.656271628058462, "grad_norm": 0.3512035012245178, "learning_rate": 4.788400569513609e-05, "loss": 0.3303, "step": 4030 }, { "epoch": 0.6564344746162928, "grad_norm": 0.4696807265281677, "learning_rate": 4.788209798951949e-05, "loss": 0.3464, "step": 4031 }, { "epoch": 0.6565973211741237, "grad_norm": 0.34500858187675476, "learning_rate": 4.788018946236703e-05, "loss": 0.3881, "step": 4032 }, { "epoch": 0.6567601677319546, "grad_norm": 0.4184112250804901, "learning_rate": 4.78782801137472e-05, "loss": 0.3689, "step": 4033 }, { "epoch": 0.6569230142897855, "grad_norm": 0.44619327783584595, "learning_rate": 4.787636994372858e-05, "loss": 0.3554, "step": 4034 }, { "epoch": 0.6570858608476163, "grad_norm": 0.39819562435150146, "learning_rate": 4.787445895237973e-05, "loss": 0.3802, "step": 4035 }, { "epoch": 0.6572487074054472, "grad_norm": 0.3404518663883209, "learning_rate": 4.787254713976928e-05, "loss": 0.3651, "step": 4036 }, { "epoch": 0.6574115539632781, "grad_norm": 0.35810616612434387, "learning_rate": 4.7870634505965856e-05, "loss": 0.339, "step": 4037 }, { "epoch": 0.6575744005211089, "grad_norm": 0.47557151317596436, "learning_rate": 4.786872105103813e-05, "loss": 0.4239, "step": 4038 }, { "epoch": 0.6577372470789399, "grad_norm": 0.30251458287239075, "learning_rate": 4.786680677505481e-05, "loss": 0.3801, "step": 4039 }, { "epoch": 0.6579000936367707, "grad_norm": 0.3436003625392914, "learning_rate": 4.78648916780846e-05, "loss": 0.353, "step": 4040 }, { "epoch": 0.6580629401946017, "grad_norm": 0.3234056234359741, "learning_rate": 4.7862975760196285e-05, "loss": 0.3525, "step": 4041 }, { "epoch": 0.6582257867524325, "grad_norm": 0.3763203024864197, "learning_rate": 4.786105902145864e-05, "loss": 0.3439, "step": 4042 }, { "epoch": 0.6583886333102634, "grad_norm": 0.360965758562088, "learning_rate": 4.785914146194048e-05, "loss": 0.3723, "step": 4043 }, { "epoch": 0.6585514798680943, "grad_norm": 0.3023998439311981, "learning_rate": 4.785722308171065e-05, "loss": 0.3432, "step": 4044 }, { "epoch": 0.6587143264259252, "grad_norm": 0.3672032356262207, "learning_rate": 4.785530388083803e-05, "loss": 0.3808, "step": 4045 }, { "epoch": 0.658877172983756, "grad_norm": 0.43340301513671875, "learning_rate": 4.785338385939153e-05, "loss": 0.3691, "step": 4046 }, { "epoch": 0.659040019541587, "grad_norm": 0.3211027979850769, "learning_rate": 4.7851463017440076e-05, "loss": 0.3274, "step": 4047 }, { "epoch": 0.6592028660994178, "grad_norm": 0.36864858865737915, "learning_rate": 4.784954135505264e-05, "loss": 0.3794, "step": 4048 }, { "epoch": 0.6593657126572487, "grad_norm": 0.4201609194278717, "learning_rate": 4.784761887229821e-05, "loss": 0.3736, "step": 4049 }, { "epoch": 0.6595285592150796, "grad_norm": 0.3491038978099823, "learning_rate": 4.7845695569245797e-05, "loss": 0.3473, "step": 4050 }, { "epoch": 0.6596914057729105, "grad_norm": 0.34914591908454895, "learning_rate": 4.784377144596448e-05, "loss": 0.3427, "step": 4051 }, { "epoch": 0.6598542523307414, "grad_norm": 0.4635235667228699, "learning_rate": 4.7841846502523315e-05, "loss": 0.3907, "step": 4052 }, { "epoch": 0.6600170988885723, "grad_norm": 0.4034869074821472, "learning_rate": 4.7839920738991425e-05, "loss": 0.408, "step": 4053 }, { "epoch": 0.6601799454464031, "grad_norm": 0.33853158354759216, "learning_rate": 4.783799415543796e-05, "loss": 0.3427, "step": 4054 }, { "epoch": 0.660342792004234, "grad_norm": 0.43372949957847595, "learning_rate": 4.7836066751932074e-05, "loss": 0.3767, "step": 4055 }, { "epoch": 0.6605056385620649, "grad_norm": 0.40087246894836426, "learning_rate": 4.7834138528542976e-05, "loss": 0.3398, "step": 4056 }, { "epoch": 0.6606684851198957, "grad_norm": 0.3508928716182709, "learning_rate": 4.783220948533989e-05, "loss": 0.3666, "step": 4057 }, { "epoch": 0.6608313316777267, "grad_norm": 0.33374646306037903, "learning_rate": 4.7830279622392076e-05, "loss": 0.387, "step": 4058 }, { "epoch": 0.6609941782355575, "grad_norm": 0.36169806122779846, "learning_rate": 4.782834893976883e-05, "loss": 0.3481, "step": 4059 }, { "epoch": 0.6611570247933884, "grad_norm": 0.38171321153640747, "learning_rate": 4.782641743753945e-05, "loss": 0.3887, "step": 4060 }, { "epoch": 0.6613198713512193, "grad_norm": 0.3847656548023224, "learning_rate": 4.78244851157733e-05, "loss": 0.3599, "step": 4061 }, { "epoch": 0.6614827179090502, "grad_norm": 0.3509505093097687, "learning_rate": 4.782255197453974e-05, "loss": 0.3877, "step": 4062 }, { "epoch": 0.661645564466881, "grad_norm": 0.3183690011501312, "learning_rate": 4.78206180139082e-05, "loss": 0.3446, "step": 4063 }, { "epoch": 0.661808411024712, "grad_norm": 0.3921719193458557, "learning_rate": 4.781868323394809e-05, "loss": 0.3484, "step": 4064 }, { "epoch": 0.6619712575825428, "grad_norm": 0.43100619316101074, "learning_rate": 4.78167476347289e-05, "loss": 0.3845, "step": 4065 }, { "epoch": 0.6621341041403738, "grad_norm": 0.35165122151374817, "learning_rate": 4.78148112163201e-05, "loss": 0.4132, "step": 4066 }, { "epoch": 0.6622969506982046, "grad_norm": 0.34388625621795654, "learning_rate": 4.7812873978791216e-05, "loss": 0.3679, "step": 4067 }, { "epoch": 0.6624597972560355, "grad_norm": 0.4074694514274597, "learning_rate": 4.781093592221182e-05, "loss": 0.3627, "step": 4068 }, { "epoch": 0.6626226438138664, "grad_norm": 0.3713773488998413, "learning_rate": 4.780899704665147e-05, "loss": 0.3552, "step": 4069 }, { "epoch": 0.6627854903716973, "grad_norm": 0.3163908123970032, "learning_rate": 4.780705735217979e-05, "loss": 0.3352, "step": 4070 }, { "epoch": 0.6629483369295281, "grad_norm": 0.47261640429496765, "learning_rate": 4.780511683886643e-05, "loss": 0.4699, "step": 4071 }, { "epoch": 0.6631111834873591, "grad_norm": 0.39091193675994873, "learning_rate": 4.780317550678104e-05, "loss": 0.3894, "step": 4072 }, { "epoch": 0.6632740300451899, "grad_norm": 0.4484170079231262, "learning_rate": 4.780123335599333e-05, "loss": 0.3715, "step": 4073 }, { "epoch": 0.6634368766030208, "grad_norm": 0.4258301556110382, "learning_rate": 4.7799290386573024e-05, "loss": 0.417, "step": 4074 }, { "epoch": 0.6635997231608517, "grad_norm": 0.3476913869380951, "learning_rate": 4.779734659858989e-05, "loss": 0.3692, "step": 4075 }, { "epoch": 0.6637625697186825, "grad_norm": 0.38752397894859314, "learning_rate": 4.779540199211371e-05, "loss": 0.3535, "step": 4076 }, { "epoch": 0.6639254162765135, "grad_norm": 0.387592077255249, "learning_rate": 4.77934565672143e-05, "loss": 0.3666, "step": 4077 }, { "epoch": 0.6640882628343443, "grad_norm": 0.37685608863830566, "learning_rate": 4.779151032396151e-05, "loss": 0.3876, "step": 4078 }, { "epoch": 0.6642511093921752, "grad_norm": 0.4124923348426819, "learning_rate": 4.778956326242522e-05, "loss": 0.3704, "step": 4079 }, { "epoch": 0.6644139559500061, "grad_norm": 0.3427654206752777, "learning_rate": 4.778761538267532e-05, "loss": 0.3876, "step": 4080 }, { "epoch": 0.664576802507837, "grad_norm": 0.40026888251304626, "learning_rate": 4.778566668478176e-05, "loss": 0.3573, "step": 4081 }, { "epoch": 0.6647396490656678, "grad_norm": 0.4474046230316162, "learning_rate": 4.7783717168814494e-05, "loss": 0.436, "step": 4082 }, { "epoch": 0.6649024956234988, "grad_norm": 0.3427039086818695, "learning_rate": 4.7781766834843524e-05, "loss": 0.3409, "step": 4083 }, { "epoch": 0.6650653421813296, "grad_norm": 0.3823118507862091, "learning_rate": 4.777981568293887e-05, "loss": 0.3564, "step": 4084 }, { "epoch": 0.6652281887391606, "grad_norm": 0.4359531104564667, "learning_rate": 4.777786371317058e-05, "loss": 0.3722, "step": 4085 }, { "epoch": 0.6653910352969914, "grad_norm": 0.3167751133441925, "learning_rate": 4.7775910925608736e-05, "loss": 0.3397, "step": 4086 }, { "epoch": 0.6655538818548223, "grad_norm": 0.3596748411655426, "learning_rate": 4.777395732032345e-05, "loss": 0.4022, "step": 4087 }, { "epoch": 0.6657167284126532, "grad_norm": 0.3573749363422394, "learning_rate": 4.777200289738486e-05, "loss": 0.3309, "step": 4088 }, { "epoch": 0.6658795749704841, "grad_norm": 0.37103235721588135, "learning_rate": 4.777004765686315e-05, "loss": 0.3534, "step": 4089 }, { "epoch": 0.6660424215283149, "grad_norm": 0.40459516644477844, "learning_rate": 4.7768091598828505e-05, "loss": 0.371, "step": 4090 }, { "epoch": 0.6662052680861458, "grad_norm": 0.3086009919643402, "learning_rate": 4.776613472335116e-05, "loss": 0.3609, "step": 4091 }, { "epoch": 0.6663681146439767, "grad_norm": 0.39290449023246765, "learning_rate": 4.776417703050136e-05, "loss": 0.3802, "step": 4092 }, { "epoch": 0.6665309612018075, "grad_norm": 0.32432273030281067, "learning_rate": 4.776221852034941e-05, "loss": 0.342, "step": 4093 }, { "epoch": 0.6666938077596385, "grad_norm": 0.3090788722038269, "learning_rate": 4.776025919296561e-05, "loss": 0.3672, "step": 4094 }, { "epoch": 0.6668566543174693, "grad_norm": 0.49510517716407776, "learning_rate": 4.775829904842032e-05, "loss": 0.4719, "step": 4095 }, { "epoch": 0.6670195008753003, "grad_norm": 0.3689325749874115, "learning_rate": 4.7756338086783905e-05, "loss": 0.3734, "step": 4096 }, { "epoch": 0.6671823474331311, "grad_norm": 0.3592819571495056, "learning_rate": 4.7754376308126773e-05, "loss": 0.429, "step": 4097 }, { "epoch": 0.667345193990962, "grad_norm": 0.29052790999412537, "learning_rate": 4.775241371251936e-05, "loss": 0.3359, "step": 4098 }, { "epoch": 0.6675080405487929, "grad_norm": 0.30970436334609985, "learning_rate": 4.775045030003212e-05, "loss": 0.366, "step": 4099 }, { "epoch": 0.6676708871066238, "grad_norm": 0.32604968547821045, "learning_rate": 4.7748486070735566e-05, "loss": 0.3878, "step": 4100 }, { "epoch": 0.6678337336644546, "grad_norm": 0.35829752683639526, "learning_rate": 4.77465210247002e-05, "loss": 0.3739, "step": 4101 }, { "epoch": 0.6679965802222856, "grad_norm": 0.32899120450019836, "learning_rate": 4.7744555161996576e-05, "loss": 0.3679, "step": 4102 }, { "epoch": 0.6681594267801164, "grad_norm": 0.33629292249679565, "learning_rate": 4.774258848269527e-05, "loss": 0.4285, "step": 4103 }, { "epoch": 0.6683222733379474, "grad_norm": 0.34298285841941833, "learning_rate": 4.774062098686691e-05, "loss": 0.3802, "step": 4104 }, { "epoch": 0.6684851198957782, "grad_norm": 0.38707712292671204, "learning_rate": 4.773865267458212e-05, "loss": 0.3324, "step": 4105 }, { "epoch": 0.6686479664536091, "grad_norm": 0.3186226785182953, "learning_rate": 4.773668354591157e-05, "loss": 0.3816, "step": 4106 }, { "epoch": 0.66881081301144, "grad_norm": 0.369473397731781, "learning_rate": 4.7734713600925964e-05, "loss": 0.3551, "step": 4107 }, { "epoch": 0.6689736595692709, "grad_norm": 0.38478338718414307, "learning_rate": 4.773274283969602e-05, "loss": 0.3659, "step": 4108 }, { "epoch": 0.6691365061271017, "grad_norm": 0.3682430684566498, "learning_rate": 4.77307712622925e-05, "loss": 0.4247, "step": 4109 }, { "epoch": 0.6692993526849326, "grad_norm": 0.3525402247905731, "learning_rate": 4.772879886878619e-05, "loss": 0.3698, "step": 4110 }, { "epoch": 0.6694621992427635, "grad_norm": 0.3600046932697296, "learning_rate": 4.7726825659247905e-05, "loss": 0.3616, "step": 4111 }, { "epoch": 0.6696250458005943, "grad_norm": 0.3737862706184387, "learning_rate": 4.7724851633748486e-05, "loss": 0.3555, "step": 4112 }, { "epoch": 0.6697878923584253, "grad_norm": 0.38425979018211365, "learning_rate": 4.7722876792358805e-05, "loss": 0.368, "step": 4113 }, { "epoch": 0.6699507389162561, "grad_norm": 0.3041740655899048, "learning_rate": 4.7720901135149774e-05, "loss": 0.3245, "step": 4114 }, { "epoch": 0.6701135854740871, "grad_norm": 0.36335813999176025, "learning_rate": 4.7718924662192313e-05, "loss": 0.4172, "step": 4115 }, { "epoch": 0.6702764320319179, "grad_norm": 0.3412034511566162, "learning_rate": 4.7716947373557385e-05, "loss": 0.3718, "step": 4116 }, { "epoch": 0.6704392785897488, "grad_norm": 0.31744006276130676, "learning_rate": 4.771496926931599e-05, "loss": 0.3355, "step": 4117 }, { "epoch": 0.6706021251475797, "grad_norm": 0.36656054854393005, "learning_rate": 4.7712990349539145e-05, "loss": 0.3651, "step": 4118 }, { "epoch": 0.6707649717054106, "grad_norm": 0.36391234397888184, "learning_rate": 4.77110106142979e-05, "loss": 0.3817, "step": 4119 }, { "epoch": 0.6709278182632414, "grad_norm": 0.3061615824699402, "learning_rate": 4.770903006366332e-05, "loss": 0.3451, "step": 4120 }, { "epoch": 0.6710906648210724, "grad_norm": 0.3228932321071625, "learning_rate": 4.770704869770653e-05, "loss": 0.3472, "step": 4121 }, { "epoch": 0.6712535113789032, "grad_norm": 0.34295743703842163, "learning_rate": 4.7705066516498653e-05, "loss": 0.3609, "step": 4122 }, { "epoch": 0.6714163579367342, "grad_norm": 0.32583844661712646, "learning_rate": 4.7703083520110865e-05, "loss": 0.3713, "step": 4123 }, { "epoch": 0.671579204494565, "grad_norm": 0.33767223358154297, "learning_rate": 4.770109970861436e-05, "loss": 0.4263, "step": 4124 }, { "epoch": 0.6717420510523959, "grad_norm": 0.3080520033836365, "learning_rate": 4.769911508208036e-05, "loss": 0.3516, "step": 4125 }, { "epoch": 0.6719048976102268, "grad_norm": 0.3900105357170105, "learning_rate": 4.769712964058012e-05, "loss": 0.4162, "step": 4126 }, { "epoch": 0.6720677441680577, "grad_norm": 0.3110819160938263, "learning_rate": 4.769514338418492e-05, "loss": 0.3495, "step": 4127 }, { "epoch": 0.6722305907258885, "grad_norm": 0.34000739455223083, "learning_rate": 4.769315631296608e-05, "loss": 0.3748, "step": 4128 }, { "epoch": 0.6723934372837194, "grad_norm": 0.35499078035354614, "learning_rate": 4.7691168426994934e-05, "loss": 0.3695, "step": 4129 }, { "epoch": 0.6725562838415503, "grad_norm": 0.39448297023773193, "learning_rate": 4.768917972634286e-05, "loss": 0.4107, "step": 4130 }, { "epoch": 0.6727191303993811, "grad_norm": 0.3530733287334442, "learning_rate": 4.768719021108125e-05, "loss": 0.4085, "step": 4131 }, { "epoch": 0.6728819769572121, "grad_norm": 0.3266637623310089, "learning_rate": 4.768519988128155e-05, "loss": 0.3919, "step": 4132 }, { "epoch": 0.6730448235150429, "grad_norm": 0.45313093066215515, "learning_rate": 4.7683208737015205e-05, "loss": 0.4056, "step": 4133 }, { "epoch": 0.6732076700728739, "grad_norm": 0.4053087830543518, "learning_rate": 4.76812167783537e-05, "loss": 0.4287, "step": 4134 }, { "epoch": 0.6733705166307047, "grad_norm": 0.3601927161216736, "learning_rate": 4.7679224005368564e-05, "loss": 0.3957, "step": 4135 }, { "epoch": 0.6735333631885356, "grad_norm": 0.3935633897781372, "learning_rate": 4.767723041813133e-05, "loss": 0.3629, "step": 4136 }, { "epoch": 0.6736962097463665, "grad_norm": 0.55213463306427, "learning_rate": 4.7675236016713585e-05, "loss": 0.3754, "step": 4137 }, { "epoch": 0.6738590563041974, "grad_norm": 0.3553777039051056, "learning_rate": 4.767324080118693e-05, "loss": 0.3664, "step": 4138 }, { "epoch": 0.6740219028620282, "grad_norm": 0.3209863305091858, "learning_rate": 4.7671244771623006e-05, "loss": 0.3526, "step": 4139 }, { "epoch": 0.6741847494198592, "grad_norm": 0.384823739528656, "learning_rate": 4.766924792809346e-05, "loss": 0.3429, "step": 4140 }, { "epoch": 0.67434759597769, "grad_norm": 0.4386827051639557, "learning_rate": 4.7667250270669995e-05, "loss": 0.4046, "step": 4141 }, { "epoch": 0.674510442535521, "grad_norm": 0.3354658782482147, "learning_rate": 4.766525179942434e-05, "loss": 0.3598, "step": 4142 }, { "epoch": 0.6746732890933518, "grad_norm": 0.4765503704547882, "learning_rate": 4.766325251442823e-05, "loss": 0.4018, "step": 4143 }, { "epoch": 0.6748361356511827, "grad_norm": 0.4116823375225067, "learning_rate": 4.7661252415753457e-05, "loss": 0.3953, "step": 4144 }, { "epoch": 0.6749989822090136, "grad_norm": 0.3670155107975006, "learning_rate": 4.765925150347182e-05, "loss": 0.3489, "step": 4145 }, { "epoch": 0.6751618287668445, "grad_norm": 0.3471640944480896, "learning_rate": 4.765724977765518e-05, "loss": 0.3787, "step": 4146 }, { "epoch": 0.6753246753246753, "grad_norm": 0.34696105122566223, "learning_rate": 4.7655247238375386e-05, "loss": 0.3454, "step": 4147 }, { "epoch": 0.6754875218825062, "grad_norm": 0.410884827375412, "learning_rate": 4.7653243885704335e-05, "loss": 0.4134, "step": 4148 }, { "epoch": 0.6756503684403371, "grad_norm": 0.32612621784210205, "learning_rate": 4.765123971971396e-05, "loss": 0.3461, "step": 4149 }, { "epoch": 0.6758132149981679, "grad_norm": 0.3216727077960968, "learning_rate": 4.7649234740476213e-05, "loss": 0.3628, "step": 4150 }, { "epoch": 0.6759760615559989, "grad_norm": 0.36476051807403564, "learning_rate": 4.7647228948063077e-05, "loss": 0.3394, "step": 4151 }, { "epoch": 0.6761389081138297, "grad_norm": 0.33893537521362305, "learning_rate": 4.764522234254657e-05, "loss": 0.3575, "step": 4152 }, { "epoch": 0.6763017546716606, "grad_norm": 0.3793931305408478, "learning_rate": 4.7643214923998735e-05, "loss": 0.3658, "step": 4153 }, { "epoch": 0.6764646012294915, "grad_norm": 0.37866172194480896, "learning_rate": 4.764120669249165e-05, "loss": 0.4062, "step": 4154 }, { "epoch": 0.6766274477873224, "grad_norm": 0.3778057098388672, "learning_rate": 4.76391976480974e-05, "loss": 0.3511, "step": 4155 }, { "epoch": 0.6767902943451533, "grad_norm": 0.36867040395736694, "learning_rate": 4.763718779088813e-05, "loss": 0.4185, "step": 4156 }, { "epoch": 0.6769531409029842, "grad_norm": 0.3605612516403198, "learning_rate": 4.7635177120935994e-05, "loss": 0.382, "step": 4157 }, { "epoch": 0.677115987460815, "grad_norm": 0.3313000798225403, "learning_rate": 4.763316563831319e-05, "loss": 0.3395, "step": 4158 }, { "epoch": 0.677278834018646, "grad_norm": 0.3768933415412903, "learning_rate": 4.763115334309192e-05, "loss": 0.3727, "step": 4159 }, { "epoch": 0.6774416805764768, "grad_norm": 0.3478267788887024, "learning_rate": 4.7629140235344436e-05, "loss": 0.357, "step": 4160 }, { "epoch": 0.6776045271343077, "grad_norm": 0.3612351715564728, "learning_rate": 4.7627126315143034e-05, "loss": 0.392, "step": 4161 }, { "epoch": 0.6777673736921386, "grad_norm": 0.3472605347633362, "learning_rate": 4.762511158255999e-05, "loss": 0.3917, "step": 4162 }, { "epoch": 0.6779302202499695, "grad_norm": 0.40050315856933594, "learning_rate": 4.762309603766766e-05, "loss": 0.4303, "step": 4163 }, { "epoch": 0.6780930668078003, "grad_norm": 0.42722538113594055, "learning_rate": 4.76210796805384e-05, "loss": 0.3795, "step": 4164 }, { "epoch": 0.6782559133656312, "grad_norm": 0.33917906880378723, "learning_rate": 4.761906251124461e-05, "loss": 0.3966, "step": 4165 }, { "epoch": 0.6784187599234621, "grad_norm": 0.3659929931163788, "learning_rate": 4.7617044529858704e-05, "loss": 0.3579, "step": 4166 }, { "epoch": 0.678581606481293, "grad_norm": 0.3191182017326355, "learning_rate": 4.7615025736453134e-05, "loss": 0.3672, "step": 4167 }, { "epoch": 0.6787444530391239, "grad_norm": 0.3802216053009033, "learning_rate": 4.761300613110039e-05, "loss": 0.4051, "step": 4168 }, { "epoch": 0.6789072995969547, "grad_norm": 0.30966678261756897, "learning_rate": 4.7610985713872965e-05, "loss": 0.3397, "step": 4169 }, { "epoch": 0.6790701461547857, "grad_norm": 0.3594992458820343, "learning_rate": 4.760896448484342e-05, "loss": 0.3613, "step": 4170 }, { "epoch": 0.6792329927126165, "grad_norm": 0.36340710520744324, "learning_rate": 4.7606942444084305e-05, "loss": 0.3744, "step": 4171 }, { "epoch": 0.6793958392704474, "grad_norm": 0.34489843249320984, "learning_rate": 4.760491959166823e-05, "loss": 0.3409, "step": 4172 }, { "epoch": 0.6795586858282783, "grad_norm": 0.401020884513855, "learning_rate": 4.760289592766781e-05, "loss": 0.3999, "step": 4173 }, { "epoch": 0.6797215323861092, "grad_norm": 0.32727113366127014, "learning_rate": 4.76008714521557e-05, "loss": 0.3154, "step": 4174 }, { "epoch": 0.67988437894394, "grad_norm": 0.36777934432029724, "learning_rate": 4.75988461652046e-05, "loss": 0.3655, "step": 4175 }, { "epoch": 0.680047225501771, "grad_norm": 0.3448021113872528, "learning_rate": 4.7596820066887216e-05, "loss": 0.3661, "step": 4176 }, { "epoch": 0.6802100720596018, "grad_norm": 0.3339032530784607, "learning_rate": 4.759479315727629e-05, "loss": 0.3616, "step": 4177 }, { "epoch": 0.6803729186174328, "grad_norm": 0.37943652272224426, "learning_rate": 4.759276543644459e-05, "loss": 0.4069, "step": 4178 }, { "epoch": 0.6805357651752636, "grad_norm": 0.37362635135650635, "learning_rate": 4.759073690446492e-05, "loss": 0.3814, "step": 4179 }, { "epoch": 0.6806986117330945, "grad_norm": 0.32657763361930847, "learning_rate": 4.758870756141012e-05, "loss": 0.3773, "step": 4180 }, { "epoch": 0.6808614582909254, "grad_norm": 0.29725193977355957, "learning_rate": 4.758667740735303e-05, "loss": 0.3539, "step": 4181 }, { "epoch": 0.6810243048487563, "grad_norm": 0.3523951768875122, "learning_rate": 4.758464644236655e-05, "loss": 0.4119, "step": 4182 }, { "epoch": 0.6811871514065871, "grad_norm": 0.36173537373542786, "learning_rate": 4.7582614666523605e-05, "loss": 0.3661, "step": 4183 }, { "epoch": 0.681349997964418, "grad_norm": 0.42122963070869446, "learning_rate": 4.758058207989713e-05, "loss": 0.4155, "step": 4184 }, { "epoch": 0.6815128445222489, "grad_norm": 0.3294627070426941, "learning_rate": 4.7578548682560104e-05, "loss": 0.3759, "step": 4185 }, { "epoch": 0.6816756910800797, "grad_norm": 0.3607524037361145, "learning_rate": 4.757651447458553e-05, "loss": 0.3701, "step": 4186 }, { "epoch": 0.6818385376379107, "grad_norm": 0.37171196937561035, "learning_rate": 4.7574479456046455e-05, "loss": 0.3727, "step": 4187 }, { "epoch": 0.6820013841957415, "grad_norm": 0.3550422489643097, "learning_rate": 4.7572443627015925e-05, "loss": 0.3647, "step": 4188 }, { "epoch": 0.6821642307535725, "grad_norm": 0.3839501142501831, "learning_rate": 4.757040698756705e-05, "loss": 0.4076, "step": 4189 }, { "epoch": 0.6823270773114033, "grad_norm": 0.3489411771297455, "learning_rate": 4.756836953777293e-05, "loss": 0.3953, "step": 4190 }, { "epoch": 0.6824899238692342, "grad_norm": 0.3273111879825592, "learning_rate": 4.7566331277706735e-05, "loss": 0.3367, "step": 4191 }, { "epoch": 0.6826527704270651, "grad_norm": 0.3613394498825073, "learning_rate": 4.756429220744163e-05, "loss": 0.416, "step": 4192 }, { "epoch": 0.682815616984896, "grad_norm": 0.3151252567768097, "learning_rate": 4.7562252327050824e-05, "loss": 0.3532, "step": 4193 }, { "epoch": 0.6829784635427268, "grad_norm": 0.31975045800209045, "learning_rate": 4.756021163660758e-05, "loss": 0.3671, "step": 4194 }, { "epoch": 0.6831413101005578, "grad_norm": 0.3509521782398224, "learning_rate": 4.755817013618513e-05, "loss": 0.4409, "step": 4195 }, { "epoch": 0.6833041566583886, "grad_norm": 0.3043898046016693, "learning_rate": 4.7556127825856796e-05, "loss": 0.3615, "step": 4196 }, { "epoch": 0.6834670032162196, "grad_norm": 0.3884095549583435, "learning_rate": 4.755408470569589e-05, "loss": 0.3822, "step": 4197 }, { "epoch": 0.6836298497740504, "grad_norm": 0.3666386604309082, "learning_rate": 4.755204077577576e-05, "loss": 0.3931, "step": 4198 }, { "epoch": 0.6837926963318813, "grad_norm": 0.31301990151405334, "learning_rate": 4.754999603616981e-05, "loss": 0.3287, "step": 4199 }, { "epoch": 0.6839555428897122, "grad_norm": 0.354404479265213, "learning_rate": 4.7547950486951445e-05, "loss": 0.3298, "step": 4200 }, { "epoch": 0.6841183894475431, "grad_norm": 0.3672575056552887, "learning_rate": 4.7545904128194085e-05, "loss": 0.3674, "step": 4201 }, { "epoch": 0.6842812360053739, "grad_norm": 0.3515990376472473, "learning_rate": 4.7543856959971235e-05, "loss": 0.3484, "step": 4202 }, { "epoch": 0.6844440825632048, "grad_norm": 0.4513574242591858, "learning_rate": 4.754180898235636e-05, "loss": 0.3973, "step": 4203 }, { "epoch": 0.6846069291210357, "grad_norm": 0.33636435866355896, "learning_rate": 4.753976019542302e-05, "loss": 0.3605, "step": 4204 }, { "epoch": 0.6847697756788665, "grad_norm": 0.30278444290161133, "learning_rate": 4.753771059924475e-05, "loss": 0.3362, "step": 4205 }, { "epoch": 0.6849326222366975, "grad_norm": 0.33808737993240356, "learning_rate": 4.753566019389515e-05, "loss": 0.3318, "step": 4206 }, { "epoch": 0.6850954687945283, "grad_norm": 0.3751372694969177, "learning_rate": 4.753360897944783e-05, "loss": 0.3224, "step": 4207 }, { "epoch": 0.6852583153523593, "grad_norm": 0.3311462104320526, "learning_rate": 4.753155695597643e-05, "loss": 0.3664, "step": 4208 }, { "epoch": 0.6854211619101901, "grad_norm": 0.3413108289241791, "learning_rate": 4.752950412355463e-05, "loss": 0.3715, "step": 4209 }, { "epoch": 0.685584008468021, "grad_norm": 0.3609400987625122, "learning_rate": 4.7527450482256133e-05, "loss": 0.3765, "step": 4210 }, { "epoch": 0.6857468550258519, "grad_norm": 0.32788169384002686, "learning_rate": 4.7525396032154665e-05, "loss": 0.3388, "step": 4211 }, { "epoch": 0.6859097015836828, "grad_norm": 0.381289541721344, "learning_rate": 4.7523340773324e-05, "loss": 0.3383, "step": 4212 }, { "epoch": 0.6860725481415136, "grad_norm": 0.414527028799057, "learning_rate": 4.752128470583792e-05, "loss": 0.4072, "step": 4213 }, { "epoch": 0.6862353946993446, "grad_norm": 0.41324907541275024, "learning_rate": 4.7519227829770244e-05, "loss": 0.3844, "step": 4214 }, { "epoch": 0.6863982412571754, "grad_norm": 0.3361555337905884, "learning_rate": 4.7517170145194814e-05, "loss": 0.3167, "step": 4215 }, { "epoch": 0.6865610878150064, "grad_norm": 0.3909572660923004, "learning_rate": 4.7515111652185505e-05, "loss": 0.3717, "step": 4216 }, { "epoch": 0.6867239343728372, "grad_norm": 0.39581039547920227, "learning_rate": 4.7513052350816245e-05, "loss": 0.4119, "step": 4217 }, { "epoch": 0.6868867809306681, "grad_norm": 0.40064769983291626, "learning_rate": 4.7510992241160944e-05, "loss": 0.3691, "step": 4218 }, { "epoch": 0.687049627488499, "grad_norm": 0.358188271522522, "learning_rate": 4.750893132329359e-05, "loss": 0.3287, "step": 4219 }, { "epoch": 0.6872124740463298, "grad_norm": 0.41802507638931274, "learning_rate": 4.750686959728815e-05, "loss": 0.3976, "step": 4220 }, { "epoch": 0.6873753206041607, "grad_norm": 0.3835899233818054, "learning_rate": 4.750480706321866e-05, "loss": 0.4246, "step": 4221 }, { "epoch": 0.6875381671619916, "grad_norm": 0.4097352921962738, "learning_rate": 4.750274372115918e-05, "loss": 0.3895, "step": 4222 }, { "epoch": 0.6877010137198225, "grad_norm": 0.3691653907299042, "learning_rate": 4.750067957118377e-05, "loss": 0.3549, "step": 4223 }, { "epoch": 0.6878638602776533, "grad_norm": 0.3498385548591614, "learning_rate": 4.7498614613366553e-05, "loss": 0.4025, "step": 4224 }, { "epoch": 0.6880267068354843, "grad_norm": 0.3341619074344635, "learning_rate": 4.749654884778167e-05, "loss": 0.363, "step": 4225 }, { "epoch": 0.6881895533933151, "grad_norm": 0.3848989009857178, "learning_rate": 4.7494482274503275e-05, "loss": 0.3893, "step": 4226 }, { "epoch": 0.688352399951146, "grad_norm": 0.49273180961608887, "learning_rate": 4.7492414893605574e-05, "loss": 0.4185, "step": 4227 }, { "epoch": 0.6885152465089769, "grad_norm": 0.32005488872528076, "learning_rate": 4.749034670516278e-05, "loss": 0.3173, "step": 4228 }, { "epoch": 0.6886780930668078, "grad_norm": 0.3246220648288727, "learning_rate": 4.7488277709249175e-05, "loss": 0.3321, "step": 4229 }, { "epoch": 0.6888409396246387, "grad_norm": 0.3768969178199768, "learning_rate": 4.7486207905939005e-05, "loss": 0.3754, "step": 4230 }, { "epoch": 0.6890037861824696, "grad_norm": 0.4248405694961548, "learning_rate": 4.7484137295306615e-05, "loss": 0.3912, "step": 4231 }, { "epoch": 0.6891666327403004, "grad_norm": 0.34930941462516785, "learning_rate": 4.748206587742632e-05, "loss": 0.3392, "step": 4232 }, { "epoch": 0.6893294792981314, "grad_norm": 0.3133397698402405, "learning_rate": 4.747999365237251e-05, "loss": 0.3838, "step": 4233 }, { "epoch": 0.6894923258559622, "grad_norm": 0.44505009055137634, "learning_rate": 4.747792062021958e-05, "loss": 0.3792, "step": 4234 }, { "epoch": 0.6896551724137931, "grad_norm": 0.39711716771125793, "learning_rate": 4.747584678104194e-05, "loss": 0.4296, "step": 4235 }, { "epoch": 0.689818018971624, "grad_norm": 0.3454335033893585, "learning_rate": 4.7473772134914076e-05, "loss": 0.3244, "step": 4236 }, { "epoch": 0.6899808655294549, "grad_norm": 0.29065877199172974, "learning_rate": 4.747169668191045e-05, "loss": 0.3489, "step": 4237 }, { "epoch": 0.6901437120872858, "grad_norm": 0.29914385080337524, "learning_rate": 4.7469620422105586e-05, "loss": 0.3527, "step": 4238 }, { "epoch": 0.6903065586451166, "grad_norm": 0.29973503947257996, "learning_rate": 4.7467543355574026e-05, "loss": 0.355, "step": 4239 }, { "epoch": 0.6904694052029475, "grad_norm": 0.34454622864723206, "learning_rate": 4.746546548239035e-05, "loss": 0.386, "step": 4240 }, { "epoch": 0.6906322517607784, "grad_norm": 0.3957078456878662, "learning_rate": 4.7463386802629155e-05, "loss": 0.3865, "step": 4241 }, { "epoch": 0.6907950983186093, "grad_norm": 0.3565004765987396, "learning_rate": 4.746130731636507e-05, "loss": 0.3897, "step": 4242 }, { "epoch": 0.6909579448764401, "grad_norm": 0.3127956986427307, "learning_rate": 4.745922702367276e-05, "loss": 0.35, "step": 4243 }, { "epoch": 0.6911207914342711, "grad_norm": 0.33396631479263306, "learning_rate": 4.7457145924626905e-05, "loss": 0.3365, "step": 4244 }, { "epoch": 0.6912836379921019, "grad_norm": 0.44893789291381836, "learning_rate": 4.745506401930223e-05, "loss": 0.3923, "step": 4245 }, { "epoch": 0.6914464845499328, "grad_norm": 0.38111284375190735, "learning_rate": 4.745298130777348e-05, "loss": 0.3836, "step": 4246 }, { "epoch": 0.6916093311077637, "grad_norm": 0.39026546478271484, "learning_rate": 4.745089779011543e-05, "loss": 0.3912, "step": 4247 }, { "epoch": 0.6917721776655946, "grad_norm": 0.3323372006416321, "learning_rate": 4.7448813466402884e-05, "loss": 0.3388, "step": 4248 }, { "epoch": 0.6919350242234255, "grad_norm": 0.32358312606811523, "learning_rate": 4.744672833671068e-05, "loss": 0.3368, "step": 4249 }, { "epoch": 0.6920978707812564, "grad_norm": 0.3251911997795105, "learning_rate": 4.744464240111367e-05, "loss": 0.3958, "step": 4250 }, { "epoch": 0.6922607173390872, "grad_norm": 0.3657413721084595, "learning_rate": 4.744255565968676e-05, "loss": 0.3714, "step": 4251 }, { "epoch": 0.6924235638969182, "grad_norm": 0.31780746579170227, "learning_rate": 4.7440468112504854e-05, "loss": 0.3447, "step": 4252 }, { "epoch": 0.692586410454749, "grad_norm": 0.4802896976470947, "learning_rate": 4.743837975964291e-05, "loss": 0.3867, "step": 4253 }, { "epoch": 0.6927492570125799, "grad_norm": 0.31170037388801575, "learning_rate": 4.7436290601175906e-05, "loss": 0.349, "step": 4254 }, { "epoch": 0.6929121035704108, "grad_norm": 0.32445043325424194, "learning_rate": 4.743420063717885e-05, "loss": 0.3929, "step": 4255 }, { "epoch": 0.6930749501282417, "grad_norm": 0.3986707925796509, "learning_rate": 4.743210986772678e-05, "loss": 0.4104, "step": 4256 }, { "epoch": 0.6932377966860725, "grad_norm": 0.4740089774131775, "learning_rate": 4.743001829289475e-05, "loss": 0.3941, "step": 4257 }, { "epoch": 0.6934006432439034, "grad_norm": 0.4467206299304962, "learning_rate": 4.742792591275787e-05, "loss": 0.4056, "step": 4258 }, { "epoch": 0.6935634898017343, "grad_norm": 0.49068689346313477, "learning_rate": 4.742583272739125e-05, "loss": 0.3974, "step": 4259 }, { "epoch": 0.6937263363595652, "grad_norm": 0.36129269003868103, "learning_rate": 4.7423738736870046e-05, "loss": 0.3775, "step": 4260 }, { "epoch": 0.6938891829173961, "grad_norm": 0.35598891973495483, "learning_rate": 4.742164394126943e-05, "loss": 0.3848, "step": 4261 }, { "epoch": 0.6940520294752269, "grad_norm": 0.30889424681663513, "learning_rate": 4.741954834066463e-05, "loss": 0.3771, "step": 4262 }, { "epoch": 0.6942148760330579, "grad_norm": 0.3905770182609558, "learning_rate": 4.741745193513087e-05, "loss": 0.3722, "step": 4263 }, { "epoch": 0.6943777225908887, "grad_norm": 0.3754138946533203, "learning_rate": 4.741535472474342e-05, "loss": 0.3585, "step": 4264 }, { "epoch": 0.6945405691487196, "grad_norm": 0.3779333531856537, "learning_rate": 4.741325670957758e-05, "loss": 0.3784, "step": 4265 }, { "epoch": 0.6947034157065505, "grad_norm": 0.3519122898578644, "learning_rate": 4.741115788970867e-05, "loss": 0.3949, "step": 4266 }, { "epoch": 0.6948662622643814, "grad_norm": 0.3835906982421875, "learning_rate": 4.7409058265212045e-05, "loss": 0.4171, "step": 4267 }, { "epoch": 0.6950291088222122, "grad_norm": 0.4706704914569855, "learning_rate": 4.740695783616309e-05, "loss": 0.414, "step": 4268 }, { "epoch": 0.6951919553800432, "grad_norm": 0.30763760209083557, "learning_rate": 4.7404856602637213e-05, "loss": 0.3895, "step": 4269 }, { "epoch": 0.695354801937874, "grad_norm": 0.35778239369392395, "learning_rate": 4.7402754564709866e-05, "loss": 0.3903, "step": 4270 }, { "epoch": 0.695517648495705, "grad_norm": 0.3312753140926361, "learning_rate": 4.74006517224565e-05, "loss": 0.3365, "step": 4271 }, { "epoch": 0.6956804950535358, "grad_norm": 0.34578630328178406, "learning_rate": 4.739854807595263e-05, "loss": 0.3616, "step": 4272 }, { "epoch": 0.6958433416113667, "grad_norm": 0.38312315940856934, "learning_rate": 4.7396443625273776e-05, "loss": 0.3755, "step": 4273 }, { "epoch": 0.6960061881691976, "grad_norm": 0.38368022441864014, "learning_rate": 4.739433837049549e-05, "loss": 0.3664, "step": 4274 }, { "epoch": 0.6961690347270285, "grad_norm": 0.4354775547981262, "learning_rate": 4.7392232311693363e-05, "loss": 0.3638, "step": 4275 }, { "epoch": 0.6963318812848593, "grad_norm": 0.3424474000930786, "learning_rate": 4.739012544894301e-05, "loss": 0.3848, "step": 4276 }, { "epoch": 0.6964947278426902, "grad_norm": 0.3299865126609802, "learning_rate": 4.7388017782320065e-05, "loss": 0.4054, "step": 4277 }, { "epoch": 0.6966575744005211, "grad_norm": 0.41233474016189575, "learning_rate": 4.738590931190021e-05, "loss": 0.4324, "step": 4278 }, { "epoch": 0.696820420958352, "grad_norm": 0.3825942873954773, "learning_rate": 4.738380003775914e-05, "loss": 0.364, "step": 4279 }, { "epoch": 0.6969832675161829, "grad_norm": 0.4237686097621918, "learning_rate": 4.7381689959972584e-05, "loss": 0.3679, "step": 4280 }, { "epoch": 0.6971461140740137, "grad_norm": 0.2915313243865967, "learning_rate": 4.73795790786163e-05, "loss": 0.3319, "step": 4281 }, { "epoch": 0.6973089606318447, "grad_norm": 0.569133460521698, "learning_rate": 4.7377467393766076e-05, "loss": 0.4281, "step": 4282 }, { "epoch": 0.6974718071896755, "grad_norm": 0.31867268681526184, "learning_rate": 4.7375354905497724e-05, "loss": 0.3571, "step": 4283 }, { "epoch": 0.6976346537475064, "grad_norm": 0.44802966713905334, "learning_rate": 4.7373241613887096e-05, "loss": 0.3876, "step": 4284 }, { "epoch": 0.6977975003053373, "grad_norm": 0.3689170479774475, "learning_rate": 4.737112751901007e-05, "loss": 0.3704, "step": 4285 }, { "epoch": 0.6979603468631682, "grad_norm": 0.3868010640144348, "learning_rate": 4.736901262094253e-05, "loss": 0.3731, "step": 4286 }, { "epoch": 0.698123193420999, "grad_norm": 0.3899184763431549, "learning_rate": 4.736689691976043e-05, "loss": 0.3336, "step": 4287 }, { "epoch": 0.69828603997883, "grad_norm": 0.3761274516582489, "learning_rate": 4.73647804155397e-05, "loss": 0.3443, "step": 4288 }, { "epoch": 0.6984488865366608, "grad_norm": 0.3505776524543762, "learning_rate": 4.7362663108356356e-05, "loss": 0.3708, "step": 4289 }, { "epoch": 0.6986117330944918, "grad_norm": 0.398902952671051, "learning_rate": 4.736054499828641e-05, "loss": 0.4145, "step": 4290 }, { "epoch": 0.6987745796523226, "grad_norm": 0.37764453887939453, "learning_rate": 4.73584260854059e-05, "loss": 0.3794, "step": 4291 }, { "epoch": 0.6989374262101535, "grad_norm": 0.3468831479549408, "learning_rate": 4.7356306369790904e-05, "loss": 0.3693, "step": 4292 }, { "epoch": 0.6991002727679844, "grad_norm": 0.3586876690387726, "learning_rate": 4.7354185851517526e-05, "loss": 0.3682, "step": 4293 }, { "epoch": 0.6992631193258152, "grad_norm": 0.4641515910625458, "learning_rate": 4.735206453066191e-05, "loss": 0.3951, "step": 4294 }, { "epoch": 0.6994259658836461, "grad_norm": 0.3688480854034424, "learning_rate": 4.7349942407300204e-05, "loss": 0.3883, "step": 4295 }, { "epoch": 0.699588812441477, "grad_norm": 0.3792573809623718, "learning_rate": 4.7347819481508604e-05, "loss": 0.3703, "step": 4296 }, { "epoch": 0.6997516589993079, "grad_norm": 0.3878152370452881, "learning_rate": 4.734569575336333e-05, "loss": 0.3553, "step": 4297 }, { "epoch": 0.6999145055571387, "grad_norm": 0.4139401912689209, "learning_rate": 4.734357122294062e-05, "loss": 0.3716, "step": 4298 }, { "epoch": 0.7000773521149697, "grad_norm": 0.4010353982448578, "learning_rate": 4.734144589031677e-05, "loss": 0.3714, "step": 4299 }, { "epoch": 0.7002401986728005, "grad_norm": 0.34044837951660156, "learning_rate": 4.733931975556808e-05, "loss": 0.3387, "step": 4300 }, { "epoch": 0.7004030452306315, "grad_norm": 0.40335553884506226, "learning_rate": 4.733719281877087e-05, "loss": 0.3713, "step": 4301 }, { "epoch": 0.7005658917884623, "grad_norm": 0.33824601769447327, "learning_rate": 4.7335065080001514e-05, "loss": 0.3719, "step": 4302 }, { "epoch": 0.7007287383462932, "grad_norm": 0.34871622920036316, "learning_rate": 4.7332936539336405e-05, "loss": 0.3436, "step": 4303 }, { "epoch": 0.7008915849041241, "grad_norm": 0.3898423910140991, "learning_rate": 4.7330807196851965e-05, "loss": 0.4071, "step": 4304 }, { "epoch": 0.701054431461955, "grad_norm": 0.3770400583744049, "learning_rate": 4.732867705262465e-05, "loss": 0.3569, "step": 4305 }, { "epoch": 0.7012172780197858, "grad_norm": 0.4388999044895172, "learning_rate": 4.7326546106730916e-05, "loss": 0.4001, "step": 4306 }, { "epoch": 0.7013801245776168, "grad_norm": 0.4018555283546448, "learning_rate": 4.7324414359247295e-05, "loss": 0.3638, "step": 4307 }, { "epoch": 0.7015429711354476, "grad_norm": 0.38769838213920593, "learning_rate": 4.7322281810250305e-05, "loss": 0.3526, "step": 4308 }, { "epoch": 0.7017058176932786, "grad_norm": 0.3590790927410126, "learning_rate": 4.7320148459816524e-05, "loss": 0.3914, "step": 4309 }, { "epoch": 0.7018686642511094, "grad_norm": 0.39645513892173767, "learning_rate": 4.731801430802254e-05, "loss": 0.3769, "step": 4310 }, { "epoch": 0.7020315108089403, "grad_norm": 0.31612586975097656, "learning_rate": 4.731587935494497e-05, "loss": 0.3637, "step": 4311 }, { "epoch": 0.7021943573667712, "grad_norm": 0.38848450779914856, "learning_rate": 4.731374360066048e-05, "loss": 0.3744, "step": 4312 }, { "epoch": 0.702357203924602, "grad_norm": 0.3840104341506958, "learning_rate": 4.731160704524573e-05, "loss": 0.4087, "step": 4313 }, { "epoch": 0.7025200504824329, "grad_norm": 0.3274107575416565, "learning_rate": 4.7309469688777445e-05, "loss": 0.3832, "step": 4314 }, { "epoch": 0.7026828970402638, "grad_norm": 0.34870249032974243, "learning_rate": 4.730733153133236e-05, "loss": 0.3455, "step": 4315 }, { "epoch": 0.7028457435980947, "grad_norm": 0.3505815267562866, "learning_rate": 4.7305192572987235e-05, "loss": 0.3323, "step": 4316 }, { "epoch": 0.7030085901559255, "grad_norm": 0.30879440903663635, "learning_rate": 4.7303052813818874e-05, "loss": 0.3297, "step": 4317 }, { "epoch": 0.7031714367137565, "grad_norm": 0.36883509159088135, "learning_rate": 4.730091225390409e-05, "loss": 0.3751, "step": 4318 }, { "epoch": 0.7033342832715873, "grad_norm": 0.3213629126548767, "learning_rate": 4.729877089331975e-05, "loss": 0.3252, "step": 4319 }, { "epoch": 0.7034971298294183, "grad_norm": 0.39585980772972107, "learning_rate": 4.729662873214271e-05, "loss": 0.3711, "step": 4320 }, { "epoch": 0.7036599763872491, "grad_norm": 0.35602790117263794, "learning_rate": 4.7294485770449905e-05, "loss": 0.3919, "step": 4321 }, { "epoch": 0.70382282294508, "grad_norm": 0.3508071005344391, "learning_rate": 4.7292342008318266e-05, "loss": 0.3947, "step": 4322 }, { "epoch": 0.7039856695029109, "grad_norm": 0.3370726406574249, "learning_rate": 4.7290197445824755e-05, "loss": 0.4066, "step": 4323 }, { "epoch": 0.7041485160607418, "grad_norm": 0.33400192856788635, "learning_rate": 4.728805208304638e-05, "loss": 0.3467, "step": 4324 }, { "epoch": 0.7043113626185726, "grad_norm": 0.4667479991912842, "learning_rate": 4.7285905920060145e-05, "loss": 0.3982, "step": 4325 }, { "epoch": 0.7044742091764036, "grad_norm": 0.3641546368598938, "learning_rate": 4.7283758956943124e-05, "loss": 0.3712, "step": 4326 }, { "epoch": 0.7046370557342344, "grad_norm": 0.30207088589668274, "learning_rate": 4.7281611193772395e-05, "loss": 0.3171, "step": 4327 }, { "epoch": 0.7047999022920654, "grad_norm": 0.2863956391811371, "learning_rate": 4.727946263062507e-05, "loss": 0.3635, "step": 4328 }, { "epoch": 0.7049627488498962, "grad_norm": 0.33378109335899353, "learning_rate": 4.727731326757828e-05, "loss": 0.3707, "step": 4329 }, { "epoch": 0.7051255954077271, "grad_norm": 0.3515019118785858, "learning_rate": 4.72751631047092e-05, "loss": 0.3704, "step": 4330 }, { "epoch": 0.705288441965558, "grad_norm": 0.3626934289932251, "learning_rate": 4.727301214209503e-05, "loss": 0.4361, "step": 4331 }, { "epoch": 0.7054512885233888, "grad_norm": 0.3259809613227844, "learning_rate": 4.727086037981299e-05, "loss": 0.405, "step": 4332 }, { "epoch": 0.7056141350812197, "grad_norm": 0.2781963050365448, "learning_rate": 4.726870781794033e-05, "loss": 0.3707, "step": 4333 }, { "epoch": 0.7057769816390506, "grad_norm": 0.2972210943698883, "learning_rate": 4.726655445655434e-05, "loss": 0.3849, "step": 4334 }, { "epoch": 0.7059398281968815, "grad_norm": 0.27329444885253906, "learning_rate": 4.7264400295732346e-05, "loss": 0.3353, "step": 4335 }, { "epoch": 0.7061026747547123, "grad_norm": 0.36850640177726746, "learning_rate": 4.726224533555167e-05, "loss": 0.3625, "step": 4336 }, { "epoch": 0.7062655213125433, "grad_norm": 0.39089369773864746, "learning_rate": 4.726008957608968e-05, "loss": 0.3673, "step": 4337 }, { "epoch": 0.7064283678703741, "grad_norm": 0.3107571601867676, "learning_rate": 4.72579330174238e-05, "loss": 0.3591, "step": 4338 }, { "epoch": 0.706591214428205, "grad_norm": 0.36370956897735596, "learning_rate": 4.725577565963142e-05, "loss": 0.3995, "step": 4339 }, { "epoch": 0.7067540609860359, "grad_norm": 0.3352470099925995, "learning_rate": 4.7253617502790014e-05, "loss": 0.3418, "step": 4340 }, { "epoch": 0.7069169075438668, "grad_norm": 0.35316506028175354, "learning_rate": 4.725145854697707e-05, "loss": 0.392, "step": 4341 }, { "epoch": 0.7070797541016977, "grad_norm": 0.3461746275424957, "learning_rate": 4.72492987922701e-05, "loss": 0.4064, "step": 4342 }, { "epoch": 0.7072426006595286, "grad_norm": 0.2980040907859802, "learning_rate": 4.724713823874664e-05, "loss": 0.3739, "step": 4343 }, { "epoch": 0.7074054472173594, "grad_norm": 0.33621877431869507, "learning_rate": 4.724497688648426e-05, "loss": 0.3267, "step": 4344 }, { "epoch": 0.7075682937751904, "grad_norm": 0.39297550916671753, "learning_rate": 4.724281473556057e-05, "loss": 0.3928, "step": 4345 }, { "epoch": 0.7077311403330212, "grad_norm": 0.3344658613204956, "learning_rate": 4.724065178605318e-05, "loss": 0.3725, "step": 4346 }, { "epoch": 0.7078939868908521, "grad_norm": 0.306293785572052, "learning_rate": 4.723848803803976e-05, "loss": 0.3898, "step": 4347 }, { "epoch": 0.708056833448683, "grad_norm": 0.3327005207538605, "learning_rate": 4.7236323491598e-05, "loss": 0.3917, "step": 4348 }, { "epoch": 0.7082196800065138, "grad_norm": 0.3533133864402771, "learning_rate": 4.7234158146805597e-05, "loss": 0.4328, "step": 4349 }, { "epoch": 0.7083825265643447, "grad_norm": 0.317940354347229, "learning_rate": 4.72319920037403e-05, "loss": 0.3526, "step": 4350 }, { "epoch": 0.7085453731221756, "grad_norm": 0.3255440592765808, "learning_rate": 4.722982506247988e-05, "loss": 0.3765, "step": 4351 }, { "epoch": 0.7087082196800065, "grad_norm": 0.3885067105293274, "learning_rate": 4.7227657323102145e-05, "loss": 0.4076, "step": 4352 }, { "epoch": 0.7088710662378374, "grad_norm": 0.3340207636356354, "learning_rate": 4.722548878568491e-05, "loss": 0.3901, "step": 4353 }, { "epoch": 0.7090339127956683, "grad_norm": 0.33053526282310486, "learning_rate": 4.7223319450306036e-05, "loss": 0.3723, "step": 4354 }, { "epoch": 0.7091967593534991, "grad_norm": 0.2895316183567047, "learning_rate": 4.722114931704342e-05, "loss": 0.3358, "step": 4355 }, { "epoch": 0.7093596059113301, "grad_norm": 0.3797907531261444, "learning_rate": 4.721897838597496e-05, "loss": 0.4036, "step": 4356 }, { "epoch": 0.7095224524691609, "grad_norm": 0.34775814414024353, "learning_rate": 4.721680665717861e-05, "loss": 0.3728, "step": 4357 }, { "epoch": 0.7096852990269918, "grad_norm": 0.3294849693775177, "learning_rate": 4.721463413073233e-05, "loss": 0.3844, "step": 4358 }, { "epoch": 0.7098481455848227, "grad_norm": 0.35642877221107483, "learning_rate": 4.721246080671414e-05, "loss": 0.3426, "step": 4359 }, { "epoch": 0.7100109921426536, "grad_norm": 0.3843303620815277, "learning_rate": 4.721028668520205e-05, "loss": 0.394, "step": 4360 }, { "epoch": 0.7101738387004844, "grad_norm": 0.38709741830825806, "learning_rate": 4.720811176627412e-05, "loss": 0.3987, "step": 4361 }, { "epoch": 0.7103366852583154, "grad_norm": 0.2743928134441376, "learning_rate": 4.7205936050008446e-05, "loss": 0.3381, "step": 4362 }, { "epoch": 0.7104995318161462, "grad_norm": 0.3378884196281433, "learning_rate": 4.720375953648314e-05, "loss": 0.3864, "step": 4363 }, { "epoch": 0.7106623783739772, "grad_norm": 0.3472919464111328, "learning_rate": 4.720158222577634e-05, "loss": 0.3898, "step": 4364 }, { "epoch": 0.710825224931808, "grad_norm": 0.344058096408844, "learning_rate": 4.7199404117966216e-05, "loss": 0.3734, "step": 4365 }, { "epoch": 0.7109880714896389, "grad_norm": 0.3679428994655609, "learning_rate": 4.719722521313098e-05, "loss": 0.4187, "step": 4366 }, { "epoch": 0.7111509180474698, "grad_norm": 0.3272559344768524, "learning_rate": 4.7195045511348845e-05, "loss": 0.373, "step": 4367 }, { "epoch": 0.7113137646053006, "grad_norm": 0.31043532490730286, "learning_rate": 4.7192865012698085e-05, "loss": 0.3258, "step": 4368 }, { "epoch": 0.7114766111631315, "grad_norm": 0.33212634921073914, "learning_rate": 4.719068371725698e-05, "loss": 0.3656, "step": 4369 }, { "epoch": 0.7116394577209624, "grad_norm": 0.3186299204826355, "learning_rate": 4.7188501625103844e-05, "loss": 0.3552, "step": 4370 }, { "epoch": 0.7118023042787933, "grad_norm": 0.2887749969959259, "learning_rate": 4.7186318736317015e-05, "loss": 0.3306, "step": 4371 }, { "epoch": 0.7119651508366241, "grad_norm": 0.35287460684776306, "learning_rate": 4.718413505097487e-05, "loss": 0.3768, "step": 4372 }, { "epoch": 0.7121279973944551, "grad_norm": 0.34511175751686096, "learning_rate": 4.718195056915582e-05, "loss": 0.374, "step": 4373 }, { "epoch": 0.7122908439522859, "grad_norm": 0.3091610372066498, "learning_rate": 4.717976529093828e-05, "loss": 0.3875, "step": 4374 }, { "epoch": 0.7124536905101169, "grad_norm": 0.3605922758579254, "learning_rate": 4.717757921640072e-05, "loss": 0.3605, "step": 4375 }, { "epoch": 0.7126165370679477, "grad_norm": 0.331881046295166, "learning_rate": 4.717539234562162e-05, "loss": 0.3571, "step": 4376 }, { "epoch": 0.7127793836257786, "grad_norm": 0.3447200655937195, "learning_rate": 4.7173204678679486e-05, "loss": 0.3619, "step": 4377 }, { "epoch": 0.7129422301836095, "grad_norm": 0.29166778922080994, "learning_rate": 4.717101621565288e-05, "loss": 0.3723, "step": 4378 }, { "epoch": 0.7131050767414404, "grad_norm": 0.3044801950454712, "learning_rate": 4.716882695662036e-05, "loss": 0.3512, "step": 4379 }, { "epoch": 0.7132679232992712, "grad_norm": 0.32020115852355957, "learning_rate": 4.716663690166054e-05, "loss": 0.3166, "step": 4380 }, { "epoch": 0.7134307698571022, "grad_norm": 0.358931303024292, "learning_rate": 4.716444605085204e-05, "loss": 0.4541, "step": 4381 }, { "epoch": 0.713593616414933, "grad_norm": 0.3323554992675781, "learning_rate": 4.716225440427352e-05, "loss": 0.383, "step": 4382 }, { "epoch": 0.713756462972764, "grad_norm": 0.31498080492019653, "learning_rate": 4.7160061962003666e-05, "loss": 0.3867, "step": 4383 }, { "epoch": 0.7139193095305948, "grad_norm": 0.4027664363384247, "learning_rate": 4.715786872412119e-05, "loss": 0.4327, "step": 4384 }, { "epoch": 0.7140821560884257, "grad_norm": 0.27210733294487, "learning_rate": 4.715567469070484e-05, "loss": 0.3657, "step": 4385 }, { "epoch": 0.7142450026462566, "grad_norm": 0.32168683409690857, "learning_rate": 4.7153479861833395e-05, "loss": 0.3731, "step": 4386 }, { "epoch": 0.7144078492040874, "grad_norm": 0.29346680641174316, "learning_rate": 4.715128423758565e-05, "loss": 0.3669, "step": 4387 }, { "epoch": 0.7145706957619183, "grad_norm": 0.3259907066822052, "learning_rate": 4.714908781804043e-05, "loss": 0.3389, "step": 4388 }, { "epoch": 0.7147335423197492, "grad_norm": 0.3091544806957245, "learning_rate": 4.71468906032766e-05, "loss": 0.3848, "step": 4389 }, { "epoch": 0.7148963888775801, "grad_norm": 0.32364898920059204, "learning_rate": 4.7144692593373035e-05, "loss": 0.404, "step": 4390 }, { "epoch": 0.715059235435411, "grad_norm": 0.33038443326950073, "learning_rate": 4.7142493788408664e-05, "loss": 0.3564, "step": 4391 }, { "epoch": 0.7152220819932419, "grad_norm": 0.30546027421951294, "learning_rate": 4.714029418846243e-05, "loss": 0.3904, "step": 4392 }, { "epoch": 0.7153849285510727, "grad_norm": 0.35362178087234497, "learning_rate": 4.713809379361329e-05, "loss": 0.3706, "step": 4393 }, { "epoch": 0.7155477751089037, "grad_norm": 0.32474541664123535, "learning_rate": 4.713589260394027e-05, "loss": 0.338, "step": 4394 }, { "epoch": 0.7157106216667345, "grad_norm": 0.31400445103645325, "learning_rate": 4.713369061952237e-05, "loss": 0.3515, "step": 4395 }, { "epoch": 0.7158734682245654, "grad_norm": 0.40592339634895325, "learning_rate": 4.713148784043867e-05, "loss": 0.3939, "step": 4396 }, { "epoch": 0.7160363147823963, "grad_norm": 0.33828240633010864, "learning_rate": 4.712928426676825e-05, "loss": 0.3684, "step": 4397 }, { "epoch": 0.7161991613402272, "grad_norm": 0.34326353669166565, "learning_rate": 4.7127079898590224e-05, "loss": 0.3621, "step": 4398 }, { "epoch": 0.716362007898058, "grad_norm": 0.3281448483467102, "learning_rate": 4.7124874735983725e-05, "loss": 0.373, "step": 4399 }, { "epoch": 0.716524854455889, "grad_norm": 0.34472543001174927, "learning_rate": 4.7122668779027946e-05, "loss": 0.3584, "step": 4400 }, { "epoch": 0.7166877010137198, "grad_norm": 0.3795437514781952, "learning_rate": 4.712046202780207e-05, "loss": 0.3785, "step": 4401 }, { "epoch": 0.7168505475715508, "grad_norm": 0.35010191798210144, "learning_rate": 4.711825448238534e-05, "loss": 0.4102, "step": 4402 }, { "epoch": 0.7170133941293816, "grad_norm": 0.3152003288269043, "learning_rate": 4.711604614285699e-05, "loss": 0.3717, "step": 4403 }, { "epoch": 0.7171762406872125, "grad_norm": 0.31069016456604004, "learning_rate": 4.711383700929634e-05, "loss": 0.351, "step": 4404 }, { "epoch": 0.7173390872450434, "grad_norm": 0.3762871026992798, "learning_rate": 4.711162708178268e-05, "loss": 0.4009, "step": 4405 }, { "epoch": 0.7175019338028742, "grad_norm": 0.36444392800331116, "learning_rate": 4.7109416360395365e-05, "loss": 0.386, "step": 4406 }, { "epoch": 0.7176647803607051, "grad_norm": 0.32484936714172363, "learning_rate": 4.710720484521375e-05, "loss": 0.4278, "step": 4407 }, { "epoch": 0.717827626918536, "grad_norm": 0.3530597984790802, "learning_rate": 4.7104992536317256e-05, "loss": 0.3728, "step": 4408 }, { "epoch": 0.7179904734763669, "grad_norm": 0.34918278455734253, "learning_rate": 4.710277943378529e-05, "loss": 0.345, "step": 4409 }, { "epoch": 0.7181533200341977, "grad_norm": 0.3321375250816345, "learning_rate": 4.710056553769733e-05, "loss": 0.3877, "step": 4410 }, { "epoch": 0.7183161665920287, "grad_norm": 0.32709309458732605, "learning_rate": 4.7098350848132856e-05, "loss": 0.3267, "step": 4411 }, { "epoch": 0.7184790131498595, "grad_norm": 0.3286609649658203, "learning_rate": 4.709613536517137e-05, "loss": 0.3439, "step": 4412 }, { "epoch": 0.7186418597076905, "grad_norm": 0.37311679124832153, "learning_rate": 4.709391908889243e-05, "loss": 0.371, "step": 4413 }, { "epoch": 0.7188047062655213, "grad_norm": 0.3629944324493408, "learning_rate": 4.7091702019375595e-05, "loss": 0.3556, "step": 4414 }, { "epoch": 0.7189675528233522, "grad_norm": 0.3217676281929016, "learning_rate": 4.7089484156700465e-05, "loss": 0.3669, "step": 4415 }, { "epoch": 0.7191303993811831, "grad_norm": 0.2932347059249878, "learning_rate": 4.708726550094667e-05, "loss": 0.3636, "step": 4416 }, { "epoch": 0.719293245939014, "grad_norm": 0.4269225001335144, "learning_rate": 4.708504605219388e-05, "loss": 0.377, "step": 4417 }, { "epoch": 0.7194560924968448, "grad_norm": 0.38854026794433594, "learning_rate": 4.708282581052176e-05, "loss": 0.3988, "step": 4418 }, { "epoch": 0.7196189390546758, "grad_norm": 0.355374813079834, "learning_rate": 4.7080604776010036e-05, "loss": 0.344, "step": 4419 }, { "epoch": 0.7197817856125066, "grad_norm": 0.3631688952445984, "learning_rate": 4.707838294873844e-05, "loss": 0.3836, "step": 4420 }, { "epoch": 0.7199446321703376, "grad_norm": 0.37567204236984253, "learning_rate": 4.707616032878675e-05, "loss": 0.3467, "step": 4421 }, { "epoch": 0.7201074787281684, "grad_norm": 0.40075966715812683, "learning_rate": 4.707393691623476e-05, "loss": 0.3823, "step": 4422 }, { "epoch": 0.7202703252859992, "grad_norm": 0.36708319187164307, "learning_rate": 4.707171271116231e-05, "loss": 0.3652, "step": 4423 }, { "epoch": 0.7204331718438302, "grad_norm": 0.37373772263526917, "learning_rate": 4.706948771364923e-05, "loss": 0.3707, "step": 4424 }, { "epoch": 0.720596018401661, "grad_norm": 0.3339042663574219, "learning_rate": 4.706726192377543e-05, "loss": 0.3374, "step": 4425 }, { "epoch": 0.7207588649594919, "grad_norm": 0.30668914318084717, "learning_rate": 4.706503534162081e-05, "loss": 0.3746, "step": 4426 }, { "epoch": 0.7209217115173228, "grad_norm": 0.31923699378967285, "learning_rate": 4.70628079672653e-05, "loss": 0.358, "step": 4427 }, { "epoch": 0.7210845580751537, "grad_norm": 0.37996983528137207, "learning_rate": 4.7060579800788896e-05, "loss": 0.3774, "step": 4428 }, { "epoch": 0.7212474046329845, "grad_norm": 0.3117096424102783, "learning_rate": 4.705835084227158e-05, "loss": 0.3736, "step": 4429 }, { "epoch": 0.7214102511908155, "grad_norm": 0.3336106836795807, "learning_rate": 4.705612109179338e-05, "loss": 0.3881, "step": 4430 }, { "epoch": 0.7215730977486463, "grad_norm": 0.3351365327835083, "learning_rate": 4.705389054943435e-05, "loss": 0.3702, "step": 4431 }, { "epoch": 0.7217359443064773, "grad_norm": 0.3316614031791687, "learning_rate": 4.705165921527457e-05, "loss": 0.3874, "step": 4432 }, { "epoch": 0.7218987908643081, "grad_norm": 0.2937780022621155, "learning_rate": 4.704942708939416e-05, "loss": 0.3642, "step": 4433 }, { "epoch": 0.722061637422139, "grad_norm": 0.3137097656726837, "learning_rate": 4.7047194171873254e-05, "loss": 0.369, "step": 4434 }, { "epoch": 0.7222244839799699, "grad_norm": 0.36867693066596985, "learning_rate": 4.704496046279203e-05, "loss": 0.4026, "step": 4435 }, { "epoch": 0.7223873305378008, "grad_norm": 0.3183695375919342, "learning_rate": 4.7042725962230666e-05, "loss": 0.3222, "step": 4436 }, { "epoch": 0.7225501770956316, "grad_norm": 0.3454432189464569, "learning_rate": 4.70404906702694e-05, "loss": 0.3887, "step": 4437 }, { "epoch": 0.7227130236534626, "grad_norm": 0.3068017363548279, "learning_rate": 4.703825458698849e-05, "loss": 0.3471, "step": 4438 }, { "epoch": 0.7228758702112934, "grad_norm": 0.38532277941703796, "learning_rate": 4.7036017712468206e-05, "loss": 0.3622, "step": 4439 }, { "epoch": 0.7230387167691243, "grad_norm": 0.32054826617240906, "learning_rate": 4.703378004678887e-05, "loss": 0.3395, "step": 4440 }, { "epoch": 0.7232015633269552, "grad_norm": 0.31320974230766296, "learning_rate": 4.70315415900308e-05, "loss": 0.3578, "step": 4441 }, { "epoch": 0.723364409884786, "grad_norm": 0.3866293728351593, "learning_rate": 4.702930234227439e-05, "loss": 0.4023, "step": 4442 }, { "epoch": 0.723527256442617, "grad_norm": 0.29291296005249023, "learning_rate": 4.702706230360003e-05, "loss": 0.3494, "step": 4443 }, { "epoch": 0.7236901030004478, "grad_norm": 0.3064672648906708, "learning_rate": 4.702482147408813e-05, "loss": 0.355, "step": 4444 }, { "epoch": 0.7238529495582787, "grad_norm": 0.34115907549858093, "learning_rate": 4.702257985381916e-05, "loss": 0.4047, "step": 4445 }, { "epoch": 0.7240157961161096, "grad_norm": 0.35890480875968933, "learning_rate": 4.7020337442873584e-05, "loss": 0.3412, "step": 4446 }, { "epoch": 0.7241786426739405, "grad_norm": 0.3127543032169342, "learning_rate": 4.7018094241331925e-05, "loss": 0.353, "step": 4447 }, { "epoch": 0.7243414892317713, "grad_norm": 0.3747575581073761, "learning_rate": 4.701585024927471e-05, "loss": 0.3504, "step": 4448 }, { "epoch": 0.7245043357896023, "grad_norm": 0.33891749382019043, "learning_rate": 4.7013605466782504e-05, "loss": 0.3533, "step": 4449 }, { "epoch": 0.7246671823474331, "grad_norm": 0.4050604999065399, "learning_rate": 4.7011359893935916e-05, "loss": 0.3859, "step": 4450 }, { "epoch": 0.724830028905264, "grad_norm": 0.34412744641304016, "learning_rate": 4.7009113530815564e-05, "loss": 0.3217, "step": 4451 }, { "epoch": 0.7249928754630949, "grad_norm": 0.3639427721500397, "learning_rate": 4.7006866377502086e-05, "loss": 0.3893, "step": 4452 }, { "epoch": 0.7251557220209258, "grad_norm": 0.3465985655784607, "learning_rate": 4.700461843407617e-05, "loss": 0.3301, "step": 4453 }, { "epoch": 0.7253185685787567, "grad_norm": 0.3855592608451843, "learning_rate": 4.700236970061853e-05, "loss": 0.3574, "step": 4454 }, { "epoch": 0.7254814151365876, "grad_norm": 0.35769331455230713, "learning_rate": 4.7000120177209896e-05, "loss": 0.3477, "step": 4455 }, { "epoch": 0.7256442616944184, "grad_norm": 0.33655649423599243, "learning_rate": 4.699786986393103e-05, "loss": 0.3772, "step": 4456 }, { "epoch": 0.7258071082522494, "grad_norm": 0.32463914155960083, "learning_rate": 4.699561876086274e-05, "loss": 0.3397, "step": 4457 }, { "epoch": 0.7259699548100802, "grad_norm": 0.3319620192050934, "learning_rate": 4.699336686808582e-05, "loss": 0.3666, "step": 4458 }, { "epoch": 0.7261328013679111, "grad_norm": 0.36219263076782227, "learning_rate": 4.6991114185681145e-05, "loss": 0.3727, "step": 4459 }, { "epoch": 0.726295647925742, "grad_norm": 0.331121027469635, "learning_rate": 4.698886071372958e-05, "loss": 0.3453, "step": 4460 }, { "epoch": 0.7264584944835728, "grad_norm": 0.3396735191345215, "learning_rate": 4.6986606452312034e-05, "loss": 0.3746, "step": 4461 }, { "epoch": 0.7266213410414037, "grad_norm": 0.3772244453430176, "learning_rate": 4.698435140150945e-05, "loss": 0.4376, "step": 4462 }, { "epoch": 0.7267841875992346, "grad_norm": 0.3179858326911926, "learning_rate": 4.698209556140278e-05, "loss": 0.3455, "step": 4463 }, { "epoch": 0.7269470341570655, "grad_norm": 0.3244320750236511, "learning_rate": 4.697983893207302e-05, "loss": 0.3744, "step": 4464 }, { "epoch": 0.7271098807148964, "grad_norm": 0.3258175849914551, "learning_rate": 4.697758151360118e-05, "loss": 0.3663, "step": 4465 }, { "epoch": 0.7272727272727273, "grad_norm": 0.3083261549472809, "learning_rate": 4.697532330606832e-05, "loss": 0.3688, "step": 4466 }, { "epoch": 0.7274355738305581, "grad_norm": 0.3836818039417267, "learning_rate": 4.697306430955552e-05, "loss": 0.4044, "step": 4467 }, { "epoch": 0.7275984203883891, "grad_norm": 0.29940512776374817, "learning_rate": 4.697080452414388e-05, "loss": 0.3446, "step": 4468 }, { "epoch": 0.7277612669462199, "grad_norm": 0.357376366853714, "learning_rate": 4.6968543949914525e-05, "loss": 0.3714, "step": 4469 }, { "epoch": 0.7279241135040508, "grad_norm": 0.2838585674762726, "learning_rate": 4.696628258694863e-05, "loss": 0.3886, "step": 4470 }, { "epoch": 0.7280869600618817, "grad_norm": 0.3861864507198334, "learning_rate": 4.696402043532737e-05, "loss": 0.4157, "step": 4471 }, { "epoch": 0.7282498066197126, "grad_norm": 0.3163653314113617, "learning_rate": 4.6961757495131975e-05, "loss": 0.3815, "step": 4472 }, { "epoch": 0.7284126531775434, "grad_norm": 0.3335365056991577, "learning_rate": 4.695949376644368e-05, "loss": 0.3498, "step": 4473 }, { "epoch": 0.7285754997353744, "grad_norm": 0.306384414434433, "learning_rate": 4.695722924934377e-05, "loss": 0.4061, "step": 4474 }, { "epoch": 0.7287383462932052, "grad_norm": 0.2802838981151581, "learning_rate": 4.6954963943913546e-05, "loss": 0.3125, "step": 4475 }, { "epoch": 0.7289011928510362, "grad_norm": 0.37576058506965637, "learning_rate": 4.695269785023434e-05, "loss": 0.365, "step": 4476 }, { "epoch": 0.729064039408867, "grad_norm": 0.32415544986724854, "learning_rate": 4.6950430968387506e-05, "loss": 0.3581, "step": 4477 }, { "epoch": 0.7292268859666978, "grad_norm": 0.3507150709629059, "learning_rate": 4.694816329845443e-05, "loss": 0.3952, "step": 4478 }, { "epoch": 0.7293897325245288, "grad_norm": 0.31464770436286926, "learning_rate": 4.6945894840516544e-05, "loss": 0.3905, "step": 4479 }, { "epoch": 0.7295525790823596, "grad_norm": 0.27943968772888184, "learning_rate": 4.694362559465527e-05, "loss": 0.3372, "step": 4480 }, { "epoch": 0.7297154256401905, "grad_norm": 0.43018144369125366, "learning_rate": 4.69413555609521e-05, "loss": 0.3571, "step": 4481 }, { "epoch": 0.7298782721980214, "grad_norm": 0.3679777681827545, "learning_rate": 4.6939084739488524e-05, "loss": 0.4359, "step": 4482 }, { "epoch": 0.7300411187558523, "grad_norm": 0.327786386013031, "learning_rate": 4.693681313034608e-05, "loss": 0.3581, "step": 4483 }, { "epoch": 0.7302039653136831, "grad_norm": 0.3330100178718567, "learning_rate": 4.693454073360632e-05, "loss": 0.3487, "step": 4484 }, { "epoch": 0.7303668118715141, "grad_norm": 0.38802480697631836, "learning_rate": 4.6932267549350815e-05, "loss": 0.3372, "step": 4485 }, { "epoch": 0.7305296584293449, "grad_norm": 0.36959367990493774, "learning_rate": 4.692999357766121e-05, "loss": 0.4366, "step": 4486 }, { "epoch": 0.7306925049871759, "grad_norm": 0.31500551104545593, "learning_rate": 4.692771881861913e-05, "loss": 0.3585, "step": 4487 }, { "epoch": 0.7308553515450067, "grad_norm": 0.36576682329177856, "learning_rate": 4.692544327230623e-05, "loss": 0.3999, "step": 4488 }, { "epoch": 0.7310181981028376, "grad_norm": 0.4639802575111389, "learning_rate": 4.692316693880425e-05, "loss": 0.3801, "step": 4489 }, { "epoch": 0.7311810446606685, "grad_norm": 0.359090119600296, "learning_rate": 4.692088981819488e-05, "loss": 0.3634, "step": 4490 }, { "epoch": 0.7313438912184994, "grad_norm": 0.35270217061042786, "learning_rate": 4.6918611910559895e-05, "loss": 0.343, "step": 4491 }, { "epoch": 0.7315067377763302, "grad_norm": 0.4560926854610443, "learning_rate": 4.691633321598107e-05, "loss": 0.3617, "step": 4492 }, { "epoch": 0.7316695843341612, "grad_norm": 0.34428805112838745, "learning_rate": 4.691405373454022e-05, "loss": 0.3475, "step": 4493 }, { "epoch": 0.731832430891992, "grad_norm": 0.35438141226768494, "learning_rate": 4.691177346631918e-05, "loss": 0.3845, "step": 4494 }, { "epoch": 0.731995277449823, "grad_norm": 0.45422419905662537, "learning_rate": 4.690949241139983e-05, "loss": 0.4177, "step": 4495 }, { "epoch": 0.7321581240076538, "grad_norm": 0.3192289471626282, "learning_rate": 4.690721056986406e-05, "loss": 0.3542, "step": 4496 }, { "epoch": 0.7323209705654846, "grad_norm": 0.355850487947464, "learning_rate": 4.690492794179379e-05, "loss": 0.377, "step": 4497 }, { "epoch": 0.7324838171233156, "grad_norm": 0.3334619402885437, "learning_rate": 4.690264452727098e-05, "loss": 0.3182, "step": 4498 }, { "epoch": 0.7326466636811464, "grad_norm": 0.32581862807273865, "learning_rate": 4.690036032637761e-05, "loss": 0.3281, "step": 4499 }, { "epoch": 0.7328095102389773, "grad_norm": 0.31100842356681824, "learning_rate": 4.689807533919569e-05, "loss": 0.3524, "step": 4500 }, { "epoch": 0.7329723567968082, "grad_norm": 0.38399603962898254, "learning_rate": 4.6895789565807244e-05, "loss": 0.3731, "step": 4501 }, { "epoch": 0.7331352033546391, "grad_norm": 0.29787224531173706, "learning_rate": 4.689350300629437e-05, "loss": 0.3403, "step": 4502 }, { "epoch": 0.7332980499124699, "grad_norm": 0.3406943082809448, "learning_rate": 4.6891215660739127e-05, "loss": 0.3776, "step": 4503 }, { "epoch": 0.7334608964703009, "grad_norm": 0.4037233889102936, "learning_rate": 4.688892752922367e-05, "loss": 0.4198, "step": 4504 }, { "epoch": 0.7336237430281317, "grad_norm": 0.3280681073665619, "learning_rate": 4.688663861183012e-05, "loss": 0.3498, "step": 4505 }, { "epoch": 0.7337865895859627, "grad_norm": 0.3963386118412018, "learning_rate": 4.688434890864068e-05, "loss": 0.3674, "step": 4506 }, { "epoch": 0.7339494361437935, "grad_norm": 0.3534907102584839, "learning_rate": 4.688205841973754e-05, "loss": 0.4047, "step": 4507 }, { "epoch": 0.7341122827016244, "grad_norm": 0.40179872512817383, "learning_rate": 4.687976714520295e-05, "loss": 0.4042, "step": 4508 }, { "epoch": 0.7342751292594553, "grad_norm": 0.3575461208820343, "learning_rate": 4.687747508511916e-05, "loss": 0.3658, "step": 4509 }, { "epoch": 0.7344379758172862, "grad_norm": 0.37815892696380615, "learning_rate": 4.687518223956847e-05, "loss": 0.3877, "step": 4510 }, { "epoch": 0.734600822375117, "grad_norm": 0.3348422348499298, "learning_rate": 4.687288860863319e-05, "loss": 0.3603, "step": 4511 }, { "epoch": 0.734763668932948, "grad_norm": 0.3472851812839508, "learning_rate": 4.687059419239569e-05, "loss": 0.4104, "step": 4512 }, { "epoch": 0.7349265154907788, "grad_norm": 0.31598666310310364, "learning_rate": 4.686829899093832e-05, "loss": 0.3464, "step": 4513 }, { "epoch": 0.7350893620486098, "grad_norm": 0.2817450761795044, "learning_rate": 4.686600300434351e-05, "loss": 0.3404, "step": 4514 }, { "epoch": 0.7352522086064406, "grad_norm": 0.348296195268631, "learning_rate": 4.6863706232693665e-05, "loss": 0.3925, "step": 4515 }, { "epoch": 0.7354150551642714, "grad_norm": 0.3798094391822815, "learning_rate": 4.686140867607127e-05, "loss": 0.4019, "step": 4516 }, { "epoch": 0.7355779017221024, "grad_norm": 0.592666745185852, "learning_rate": 4.68591103345588e-05, "loss": 0.4344, "step": 4517 }, { "epoch": 0.7357407482799332, "grad_norm": 0.35247623920440674, "learning_rate": 4.685681120823878e-05, "loss": 0.3287, "step": 4518 }, { "epoch": 0.7359035948377641, "grad_norm": 0.42859819531440735, "learning_rate": 4.6854511297193754e-05, "loss": 0.3698, "step": 4519 }, { "epoch": 0.736066441395595, "grad_norm": 0.36547887325286865, "learning_rate": 4.68522106015063e-05, "loss": 0.3626, "step": 4520 }, { "epoch": 0.7362292879534259, "grad_norm": 0.31357255578041077, "learning_rate": 4.6849909121259e-05, "loss": 0.3513, "step": 4521 }, { "epoch": 0.7363921345112567, "grad_norm": 0.34067869186401367, "learning_rate": 4.684760685653451e-05, "loss": 0.3693, "step": 4522 }, { "epoch": 0.7365549810690877, "grad_norm": 0.3065423369407654, "learning_rate": 4.684530380741547e-05, "loss": 0.3601, "step": 4523 }, { "epoch": 0.7367178276269185, "grad_norm": 0.3562273681163788, "learning_rate": 4.684299997398458e-05, "loss": 0.3566, "step": 4524 }, { "epoch": 0.7368806741847495, "grad_norm": 0.351799875497818, "learning_rate": 4.684069535632454e-05, "loss": 0.3609, "step": 4525 }, { "epoch": 0.7370435207425803, "grad_norm": 0.34275907278060913, "learning_rate": 4.68383899545181e-05, "loss": 0.3525, "step": 4526 }, { "epoch": 0.7372063673004112, "grad_norm": 0.33435532450675964, "learning_rate": 4.683608376864804e-05, "loss": 0.3736, "step": 4527 }, { "epoch": 0.7373692138582421, "grad_norm": 0.3313635289669037, "learning_rate": 4.6833776798797145e-05, "loss": 0.3197, "step": 4528 }, { "epoch": 0.737532060416073, "grad_norm": 0.31928375363349915, "learning_rate": 4.683146904504825e-05, "loss": 0.3662, "step": 4529 }, { "epoch": 0.7376949069739038, "grad_norm": 0.28836825489997864, "learning_rate": 4.6829160507484204e-05, "loss": 0.3688, "step": 4530 }, { "epoch": 0.7378577535317348, "grad_norm": 0.40336793661117554, "learning_rate": 4.6826851186187894e-05, "loss": 0.3713, "step": 4531 }, { "epoch": 0.7380206000895656, "grad_norm": 0.35409751534461975, "learning_rate": 4.6824541081242234e-05, "loss": 0.3644, "step": 4532 }, { "epoch": 0.7381834466473965, "grad_norm": 0.32414311170578003, "learning_rate": 4.682223019273015e-05, "loss": 0.3579, "step": 4533 }, { "epoch": 0.7383462932052274, "grad_norm": 0.3514683246612549, "learning_rate": 4.681991852073464e-05, "loss": 0.3512, "step": 4534 }, { "epoch": 0.7385091397630582, "grad_norm": 0.3551316261291504, "learning_rate": 4.681760606533867e-05, "loss": 0.3898, "step": 4535 }, { "epoch": 0.7386719863208892, "grad_norm": 0.30570274591445923, "learning_rate": 4.681529282662528e-05, "loss": 0.3281, "step": 4536 }, { "epoch": 0.73883483287872, "grad_norm": 0.3330424726009369, "learning_rate": 4.681297880467751e-05, "loss": 0.3776, "step": 4537 }, { "epoch": 0.7389976794365509, "grad_norm": 0.3555067479610443, "learning_rate": 4.6810663999578446e-05, "loss": 0.415, "step": 4538 }, { "epoch": 0.7391605259943818, "grad_norm": 0.35976824164390564, "learning_rate": 4.680834841141121e-05, "loss": 0.363, "step": 4539 }, { "epoch": 0.7393233725522127, "grad_norm": 0.334642231464386, "learning_rate": 4.680603204025892e-05, "loss": 0.3609, "step": 4540 }, { "epoch": 0.7394862191100435, "grad_norm": 0.30997970700263977, "learning_rate": 4.6803714886204744e-05, "loss": 0.3464, "step": 4541 }, { "epoch": 0.7396490656678745, "grad_norm": 0.33224427700042725, "learning_rate": 4.680139694933188e-05, "loss": 0.4106, "step": 4542 }, { "epoch": 0.7398119122257053, "grad_norm": 0.3574073612689972, "learning_rate": 4.6799078229723545e-05, "loss": 0.3566, "step": 4543 }, { "epoch": 0.7399747587835362, "grad_norm": 0.33354493975639343, "learning_rate": 4.679675872746299e-05, "loss": 0.3802, "step": 4544 }, { "epoch": 0.7401376053413671, "grad_norm": 0.32248836755752563, "learning_rate": 4.679443844263349e-05, "loss": 0.3668, "step": 4545 }, { "epoch": 0.740300451899198, "grad_norm": 0.3156881332397461, "learning_rate": 4.6792117375318356e-05, "loss": 0.3577, "step": 4546 }, { "epoch": 0.7404632984570289, "grad_norm": 0.30888086557388306, "learning_rate": 4.678979552560092e-05, "loss": 0.3402, "step": 4547 }, { "epoch": 0.7406261450148598, "grad_norm": 0.37317150831222534, "learning_rate": 4.6787472893564535e-05, "loss": 0.3718, "step": 4548 }, { "epoch": 0.7407889915726906, "grad_norm": 0.41430848836898804, "learning_rate": 4.67851494792926e-05, "loss": 0.4164, "step": 4549 }, { "epoch": 0.7409518381305216, "grad_norm": 0.32630598545074463, "learning_rate": 4.678282528286853e-05, "loss": 0.3978, "step": 4550 }, { "epoch": 0.7411146846883524, "grad_norm": 0.35570618510246277, "learning_rate": 4.6780500304375764e-05, "loss": 0.3882, "step": 4551 }, { "epoch": 0.7412775312461832, "grad_norm": 0.280703067779541, "learning_rate": 4.6778174543897786e-05, "loss": 0.3827, "step": 4552 }, { "epoch": 0.7414403778040142, "grad_norm": 0.34177297353744507, "learning_rate": 4.6775848001518085e-05, "loss": 0.392, "step": 4553 }, { "epoch": 0.741603224361845, "grad_norm": 0.3750324547290802, "learning_rate": 4.67735206773202e-05, "loss": 0.4179, "step": 4554 }, { "epoch": 0.741766070919676, "grad_norm": 0.37703999876976013, "learning_rate": 4.67711925713877e-05, "loss": 0.4, "step": 4555 }, { "epoch": 0.7419289174775068, "grad_norm": 0.320552259683609, "learning_rate": 4.6768863683804146e-05, "loss": 0.3163, "step": 4556 }, { "epoch": 0.7420917640353377, "grad_norm": 0.3389590084552765, "learning_rate": 4.6766534014653175e-05, "loss": 0.3866, "step": 4557 }, { "epoch": 0.7422546105931686, "grad_norm": 0.4870862662792206, "learning_rate": 4.6764203564018406e-05, "loss": 0.3603, "step": 4558 }, { "epoch": 0.7424174571509995, "grad_norm": 0.37614524364471436, "learning_rate": 4.676187233198353e-05, "loss": 0.3611, "step": 4559 }, { "epoch": 0.7425803037088303, "grad_norm": 0.38425540924072266, "learning_rate": 4.6759540318632234e-05, "loss": 0.3833, "step": 4560 }, { "epoch": 0.7427431502666613, "grad_norm": 0.3121049702167511, "learning_rate": 4.6757207524048245e-05, "loss": 0.3193, "step": 4561 }, { "epoch": 0.7429059968244921, "grad_norm": 0.5323511958122253, "learning_rate": 4.675487394831533e-05, "loss": 0.3437, "step": 4562 }, { "epoch": 0.743068843382323, "grad_norm": 0.3539394438266754, "learning_rate": 4.675253959151725e-05, "loss": 0.3893, "step": 4563 }, { "epoch": 0.7432316899401539, "grad_norm": 0.2969585955142975, "learning_rate": 4.6750204453737826e-05, "loss": 0.3863, "step": 4564 }, { "epoch": 0.7433945364979848, "grad_norm": 0.5521952509880066, "learning_rate": 4.67478685350609e-05, "loss": 0.3685, "step": 4565 }, { "epoch": 0.7435573830558156, "grad_norm": 0.44163379073143005, "learning_rate": 4.674553183557034e-05, "loss": 0.3448, "step": 4566 }, { "epoch": 0.7437202296136466, "grad_norm": 0.4008917212486267, "learning_rate": 4.674319435535003e-05, "loss": 0.3868, "step": 4567 }, { "epoch": 0.7438830761714774, "grad_norm": 0.364362508058548, "learning_rate": 4.67408560944839e-05, "loss": 0.36, "step": 4568 }, { "epoch": 0.7440459227293084, "grad_norm": 0.4480123817920685, "learning_rate": 4.6738517053055894e-05, "loss": 0.4047, "step": 4569 }, { "epoch": 0.7442087692871392, "grad_norm": 0.3697793185710907, "learning_rate": 4.6736177231150004e-05, "loss": 0.3678, "step": 4570 }, { "epoch": 0.74437161584497, "grad_norm": 0.32525959610939026, "learning_rate": 4.6733836628850225e-05, "loss": 0.3284, "step": 4571 }, { "epoch": 0.744534462402801, "grad_norm": 0.42170417308807373, "learning_rate": 4.6731495246240593e-05, "loss": 0.3844, "step": 4572 }, { "epoch": 0.7446973089606318, "grad_norm": 0.39232510328292847, "learning_rate": 4.672915308340518e-05, "loss": 0.3738, "step": 4573 }, { "epoch": 0.7448601555184627, "grad_norm": 0.3073717951774597, "learning_rate": 4.6726810140428056e-05, "loss": 0.3458, "step": 4574 }, { "epoch": 0.7450230020762936, "grad_norm": 0.3855130672454834, "learning_rate": 4.672446641739336e-05, "loss": 0.3645, "step": 4575 }, { "epoch": 0.7451858486341245, "grad_norm": 0.3938096761703491, "learning_rate": 4.672212191438523e-05, "loss": 0.3864, "step": 4576 }, { "epoch": 0.7453486951919553, "grad_norm": 0.32801997661590576, "learning_rate": 4.6719776631487843e-05, "loss": 0.3695, "step": 4577 }, { "epoch": 0.7455115417497863, "grad_norm": 0.39137476682662964, "learning_rate": 4.67174305687854e-05, "loss": 0.3736, "step": 4578 }, { "epoch": 0.7456743883076171, "grad_norm": 0.3372924029827118, "learning_rate": 4.671508372636213e-05, "loss": 0.3772, "step": 4579 }, { "epoch": 0.7458372348654481, "grad_norm": 0.34375813603401184, "learning_rate": 4.67127361043023e-05, "loss": 0.3943, "step": 4580 }, { "epoch": 0.7460000814232789, "grad_norm": 0.40681901574134827, "learning_rate": 4.671038770269018e-05, "loss": 0.3698, "step": 4581 }, { "epoch": 0.7461629279811098, "grad_norm": 0.3286571800708771, "learning_rate": 4.670803852161011e-05, "loss": 0.3572, "step": 4582 }, { "epoch": 0.7463257745389407, "grad_norm": 0.2959434688091278, "learning_rate": 4.670568856114641e-05, "loss": 0.3479, "step": 4583 }, { "epoch": 0.7464886210967716, "grad_norm": 0.33973297476768494, "learning_rate": 4.670333782138346e-05, "loss": 0.3395, "step": 4584 }, { "epoch": 0.7466514676546024, "grad_norm": 0.3102482259273529, "learning_rate": 4.670098630240566e-05, "loss": 0.3455, "step": 4585 }, { "epoch": 0.7468143142124334, "grad_norm": 0.3170558512210846, "learning_rate": 4.6698634004297425e-05, "loss": 0.3567, "step": 4586 }, { "epoch": 0.7469771607702642, "grad_norm": 0.40845346450805664, "learning_rate": 4.6696280927143224e-05, "loss": 0.4366, "step": 4587 }, { "epoch": 0.7471400073280952, "grad_norm": 0.26207804679870605, "learning_rate": 4.669392707102753e-05, "loss": 0.336, "step": 4588 }, { "epoch": 0.747302853885926, "grad_norm": 0.3869127631187439, "learning_rate": 4.6691572436034864e-05, "loss": 0.399, "step": 4589 }, { "epoch": 0.7474657004437568, "grad_norm": 0.4195515811443329, "learning_rate": 4.668921702224975e-05, "loss": 0.439, "step": 4590 }, { "epoch": 0.7476285470015878, "grad_norm": 0.29792848229408264, "learning_rate": 4.6686860829756774e-05, "loss": 0.4143, "step": 4591 }, { "epoch": 0.7477913935594186, "grad_norm": 0.28021931648254395, "learning_rate": 4.668450385864051e-05, "loss": 0.3549, "step": 4592 }, { "epoch": 0.7479542401172495, "grad_norm": 0.30771604180336, "learning_rate": 4.668214610898559e-05, "loss": 0.3346, "step": 4593 }, { "epoch": 0.7481170866750804, "grad_norm": 0.32782208919525146, "learning_rate": 4.6679787580876667e-05, "loss": 0.3936, "step": 4594 }, { "epoch": 0.7482799332329113, "grad_norm": 0.3256168067455292, "learning_rate": 4.6677428274398415e-05, "loss": 0.3705, "step": 4595 }, { "epoch": 0.7484427797907421, "grad_norm": 0.33917635679244995, "learning_rate": 4.667506818963554e-05, "loss": 0.3357, "step": 4596 }, { "epoch": 0.7486056263485731, "grad_norm": 0.4433431625366211, "learning_rate": 4.667270732667277e-05, "loss": 0.4264, "step": 4597 }, { "epoch": 0.7487684729064039, "grad_norm": 0.2967957854270935, "learning_rate": 4.667034568559489e-05, "loss": 0.3277, "step": 4598 }, { "epoch": 0.7489313194642349, "grad_norm": 0.453720360994339, "learning_rate": 4.6667983266486656e-05, "loss": 0.3928, "step": 4599 }, { "epoch": 0.7490941660220657, "grad_norm": 0.40130695700645447, "learning_rate": 4.666562006943291e-05, "loss": 0.3862, "step": 4600 }, { "epoch": 0.7492570125798966, "grad_norm": 0.38900095224380493, "learning_rate": 4.666325609451849e-05, "loss": 0.3982, "step": 4601 }, { "epoch": 0.7494198591377275, "grad_norm": 0.36120274662971497, "learning_rate": 4.666089134182827e-05, "loss": 0.3608, "step": 4602 }, { "epoch": 0.7495827056955584, "grad_norm": 0.3449665904045105, "learning_rate": 4.6658525811447164e-05, "loss": 0.3542, "step": 4603 }, { "epoch": 0.7497455522533892, "grad_norm": 0.29548409581184387, "learning_rate": 4.6656159503460084e-05, "loss": 0.3267, "step": 4604 }, { "epoch": 0.7499083988112202, "grad_norm": 0.41843315958976746, "learning_rate": 4.665379241795199e-05, "loss": 0.3782, "step": 4605 }, { "epoch": 0.750071245369051, "grad_norm": 0.4233231544494629, "learning_rate": 4.6651424555007874e-05, "loss": 0.4116, "step": 4606 }, { "epoch": 0.7502340919268818, "grad_norm": 0.3443589508533478, "learning_rate": 4.664905591471275e-05, "loss": 0.3564, "step": 4607 }, { "epoch": 0.7503969384847128, "grad_norm": 0.3859761357307434, "learning_rate": 4.664668649715165e-05, "loss": 0.3942, "step": 4608 }, { "epoch": 0.7505597850425436, "grad_norm": 0.37634527683258057, "learning_rate": 4.6644316302409655e-05, "loss": 0.3614, "step": 4609 }, { "epoch": 0.7507226316003746, "grad_norm": 0.3921613097190857, "learning_rate": 4.6641945330571854e-05, "loss": 0.3695, "step": 4610 }, { "epoch": 0.7508854781582054, "grad_norm": 0.366414874792099, "learning_rate": 4.663957358172338e-05, "loss": 0.3802, "step": 4611 }, { "epoch": 0.7510483247160363, "grad_norm": 0.35497212409973145, "learning_rate": 4.6637201055949376e-05, "loss": 0.3498, "step": 4612 }, { "epoch": 0.7512111712738672, "grad_norm": 0.33810579776763916, "learning_rate": 4.663482775333503e-05, "loss": 0.3402, "step": 4613 }, { "epoch": 0.7513740178316981, "grad_norm": 0.32962852716445923, "learning_rate": 4.6632453673965545e-05, "loss": 0.3645, "step": 4614 }, { "epoch": 0.7515368643895289, "grad_norm": 0.4764882028102875, "learning_rate": 4.6630078817926163e-05, "loss": 0.4129, "step": 4615 }, { "epoch": 0.7516997109473599, "grad_norm": 0.3201645612716675, "learning_rate": 4.662770318530215e-05, "loss": 0.3749, "step": 4616 }, { "epoch": 0.7518625575051907, "grad_norm": 0.3077351748943329, "learning_rate": 4.6625326776178784e-05, "loss": 0.3662, "step": 4617 }, { "epoch": 0.7520254040630217, "grad_norm": 0.3629700541496277, "learning_rate": 4.6622949590641405e-05, "loss": 0.3944, "step": 4618 }, { "epoch": 0.7521882506208525, "grad_norm": 0.40412500500679016, "learning_rate": 4.6620571628775345e-05, "loss": 0.3788, "step": 4619 }, { "epoch": 0.7523510971786834, "grad_norm": 0.3036063313484192, "learning_rate": 4.661819289066599e-05, "loss": 0.3522, "step": 4620 }, { "epoch": 0.7525139437365143, "grad_norm": 0.275126576423645, "learning_rate": 4.661581337639874e-05, "loss": 0.3257, "step": 4621 }, { "epoch": 0.7526767902943452, "grad_norm": 0.33133745193481445, "learning_rate": 4.661343308605902e-05, "loss": 0.3759, "step": 4622 }, { "epoch": 0.752839636852176, "grad_norm": 0.29217803478240967, "learning_rate": 4.6611052019732304e-05, "loss": 0.3267, "step": 4623 }, { "epoch": 0.753002483410007, "grad_norm": 0.3410983085632324, "learning_rate": 4.660867017750408e-05, "loss": 0.3816, "step": 4624 }, { "epoch": 0.7531653299678378, "grad_norm": 0.359649658203125, "learning_rate": 4.660628755945984e-05, "loss": 0.3696, "step": 4625 }, { "epoch": 0.7533281765256686, "grad_norm": 0.29135647416114807, "learning_rate": 4.6603904165685154e-05, "loss": 0.3374, "step": 4626 }, { "epoch": 0.7534910230834996, "grad_norm": 0.3220513164997101, "learning_rate": 4.660151999626557e-05, "loss": 0.399, "step": 4627 }, { "epoch": 0.7536538696413304, "grad_norm": 0.3544995188713074, "learning_rate": 4.659913505128671e-05, "loss": 0.3907, "step": 4628 }, { "epoch": 0.7538167161991614, "grad_norm": 0.3386608064174652, "learning_rate": 4.659674933083418e-05, "loss": 0.3384, "step": 4629 }, { "epoch": 0.7539795627569922, "grad_norm": 0.33768510818481445, "learning_rate": 4.6594362834993646e-05, "loss": 0.3575, "step": 4630 }, { "epoch": 0.7541424093148231, "grad_norm": 0.2980666160583496, "learning_rate": 4.659197556385079e-05, "loss": 0.3498, "step": 4631 }, { "epoch": 0.754305255872654, "grad_norm": 0.3098090887069702, "learning_rate": 4.658958751749132e-05, "loss": 0.331, "step": 4632 }, { "epoch": 0.7544681024304849, "grad_norm": 0.3545854985713959, "learning_rate": 4.658719869600097e-05, "loss": 0.3544, "step": 4633 }, { "epoch": 0.7546309489883157, "grad_norm": 0.3250499665737152, "learning_rate": 4.658480909946551e-05, "loss": 0.4055, "step": 4634 }, { "epoch": 0.7547937955461467, "grad_norm": 0.3415524959564209, "learning_rate": 4.6582418727970736e-05, "loss": 0.3991, "step": 4635 }, { "epoch": 0.7549566421039775, "grad_norm": 0.3488142788410187, "learning_rate": 4.658002758160247e-05, "loss": 0.3673, "step": 4636 }, { "epoch": 0.7551194886618084, "grad_norm": 0.39328867197036743, "learning_rate": 4.657763566044655e-05, "loss": 0.3882, "step": 4637 }, { "epoch": 0.7552823352196393, "grad_norm": 0.29836612939834595, "learning_rate": 4.657524296458887e-05, "loss": 0.3349, "step": 4638 }, { "epoch": 0.7554451817774702, "grad_norm": 0.35570579767227173, "learning_rate": 4.6572849494115324e-05, "loss": 0.4104, "step": 4639 }, { "epoch": 0.755608028335301, "grad_norm": 0.41502442955970764, "learning_rate": 4.657045524911185e-05, "loss": 0.3585, "step": 4640 }, { "epoch": 0.755770874893132, "grad_norm": 0.3893769085407257, "learning_rate": 4.65680602296644e-05, "loss": 0.3633, "step": 4641 }, { "epoch": 0.7559337214509628, "grad_norm": 0.28305569291114807, "learning_rate": 4.656566443585897e-05, "loss": 0.3464, "step": 4642 }, { "epoch": 0.7560965680087938, "grad_norm": 0.4157661199569702, "learning_rate": 4.656326786778158e-05, "loss": 0.3626, "step": 4643 }, { "epoch": 0.7562594145666246, "grad_norm": 0.38817650079727173, "learning_rate": 4.656087052551826e-05, "loss": 0.3856, "step": 4644 }, { "epoch": 0.7564222611244554, "grad_norm": 0.33416426181793213, "learning_rate": 4.65584724091551e-05, "loss": 0.3663, "step": 4645 }, { "epoch": 0.7565851076822864, "grad_norm": 0.3931443691253662, "learning_rate": 4.6556073518778175e-05, "loss": 0.4209, "step": 4646 }, { "epoch": 0.7567479542401172, "grad_norm": 0.34427115321159363, "learning_rate": 4.655367385447363e-05, "loss": 0.3716, "step": 4647 }, { "epoch": 0.7569108007979481, "grad_norm": 0.3838924169540405, "learning_rate": 4.6551273416327626e-05, "loss": 0.41, "step": 4648 }, { "epoch": 0.757073647355779, "grad_norm": 0.34085965156555176, "learning_rate": 4.654887220442633e-05, "loss": 0.3925, "step": 4649 }, { "epoch": 0.7572364939136099, "grad_norm": 0.3142017722129822, "learning_rate": 4.654647021885596e-05, "loss": 0.3531, "step": 4650 }, { "epoch": 0.7573993404714408, "grad_norm": 0.33632150292396545, "learning_rate": 4.654406745970276e-05, "loss": 0.3416, "step": 4651 }, { "epoch": 0.7575621870292717, "grad_norm": 0.3541465401649475, "learning_rate": 4.654166392705299e-05, "loss": 0.3209, "step": 4652 }, { "epoch": 0.7577250335871025, "grad_norm": 0.34365227818489075, "learning_rate": 4.653925962099293e-05, "loss": 0.3467, "step": 4653 }, { "epoch": 0.7578878801449335, "grad_norm": 0.3100414574146271, "learning_rate": 4.653685454160894e-05, "loss": 0.3512, "step": 4654 }, { "epoch": 0.7580507267027643, "grad_norm": 0.3602394461631775, "learning_rate": 4.653444868898733e-05, "loss": 0.3737, "step": 4655 }, { "epoch": 0.7582135732605952, "grad_norm": 0.3564399182796478, "learning_rate": 4.65320420632145e-05, "loss": 0.3608, "step": 4656 }, { "epoch": 0.7583764198184261, "grad_norm": 0.31219664216041565, "learning_rate": 4.652963466437684e-05, "loss": 0.3263, "step": 4657 }, { "epoch": 0.758539266376257, "grad_norm": 0.2845398187637329, "learning_rate": 4.65272264925608e-05, "loss": 0.3566, "step": 4658 }, { "epoch": 0.7587021129340878, "grad_norm": 0.3759877681732178, "learning_rate": 4.6524817547852825e-05, "loss": 0.395, "step": 4659 }, { "epoch": 0.7588649594919188, "grad_norm": 0.3288711607456207, "learning_rate": 4.652240783033941e-05, "loss": 0.4047, "step": 4660 }, { "epoch": 0.7590278060497496, "grad_norm": 0.3719812035560608, "learning_rate": 4.651999734010708e-05, "loss": 0.4046, "step": 4661 }, { "epoch": 0.7591906526075805, "grad_norm": 0.31180790066719055, "learning_rate": 4.6517586077242356e-05, "loss": 0.3726, "step": 4662 }, { "epoch": 0.7593534991654114, "grad_norm": 0.29497483372688293, "learning_rate": 4.6515174041831834e-05, "loss": 0.3812, "step": 4663 }, { "epoch": 0.7595163457232422, "grad_norm": 0.3284513056278229, "learning_rate": 4.65127612339621e-05, "loss": 0.3914, "step": 4664 }, { "epoch": 0.7596791922810732, "grad_norm": 0.3615168631076813, "learning_rate": 4.651034765371979e-05, "loss": 0.4115, "step": 4665 }, { "epoch": 0.759842038838904, "grad_norm": 0.34965527057647705, "learning_rate": 4.650793330119154e-05, "loss": 0.3744, "step": 4666 }, { "epoch": 0.7600048853967349, "grad_norm": 0.30478620529174805, "learning_rate": 4.6505518176464054e-05, "loss": 0.3721, "step": 4667 }, { "epoch": 0.7601677319545658, "grad_norm": 0.3321065902709961, "learning_rate": 4.6503102279624025e-05, "loss": 0.3559, "step": 4668 }, { "epoch": 0.7603305785123967, "grad_norm": 0.33546796441078186, "learning_rate": 4.650068561075821e-05, "loss": 0.3616, "step": 4669 }, { "epoch": 0.7604934250702275, "grad_norm": 0.4622245132923126, "learning_rate": 4.6498268169953353e-05, "loss": 0.3883, "step": 4670 }, { "epoch": 0.7606562716280585, "grad_norm": 0.3384813666343689, "learning_rate": 4.6495849957296264e-05, "loss": 0.3735, "step": 4671 }, { "epoch": 0.7608191181858893, "grad_norm": 0.43761736154556274, "learning_rate": 4.649343097287376e-05, "loss": 0.3939, "step": 4672 }, { "epoch": 0.7609819647437203, "grad_norm": 0.3788510262966156, "learning_rate": 4.649101121677268e-05, "loss": 0.3433, "step": 4673 }, { "epoch": 0.7611448113015511, "grad_norm": 0.3706590533256531, "learning_rate": 4.648859068907992e-05, "loss": 0.3791, "step": 4674 }, { "epoch": 0.761307657859382, "grad_norm": 0.40189576148986816, "learning_rate": 4.6486169389882365e-05, "loss": 0.3821, "step": 4675 }, { "epoch": 0.7614705044172129, "grad_norm": 0.37639570236206055, "learning_rate": 4.648374731926696e-05, "loss": 0.3261, "step": 4676 }, { "epoch": 0.7616333509750438, "grad_norm": 0.3400759696960449, "learning_rate": 4.6481324477320654e-05, "loss": 0.3927, "step": 4677 }, { "epoch": 0.7617961975328746, "grad_norm": 0.3276215195655823, "learning_rate": 4.6478900864130436e-05, "loss": 0.3505, "step": 4678 }, { "epoch": 0.7619590440907056, "grad_norm": 0.34359246492385864, "learning_rate": 4.647647647978333e-05, "loss": 0.3363, "step": 4679 }, { "epoch": 0.7621218906485364, "grad_norm": 0.5514836311340332, "learning_rate": 4.647405132436638e-05, "loss": 0.4336, "step": 4680 }, { "epoch": 0.7622847372063672, "grad_norm": 0.35812926292419434, "learning_rate": 4.647162539796663e-05, "loss": 0.3234, "step": 4681 }, { "epoch": 0.7624475837641982, "grad_norm": 0.38130420446395874, "learning_rate": 4.646919870067122e-05, "loss": 0.3878, "step": 4682 }, { "epoch": 0.762610430322029, "grad_norm": 0.468314528465271, "learning_rate": 4.646677123256724e-05, "loss": 0.3718, "step": 4683 }, { "epoch": 0.76277327687986, "grad_norm": 0.31248030066490173, "learning_rate": 4.646434299374186e-05, "loss": 0.3668, "step": 4684 }, { "epoch": 0.7629361234376908, "grad_norm": 0.378854900598526, "learning_rate": 4.6461913984282255e-05, "loss": 0.373, "step": 4685 }, { "epoch": 0.7630989699955217, "grad_norm": 0.3205396831035614, "learning_rate": 4.645948420427564e-05, "loss": 0.3427, "step": 4686 }, { "epoch": 0.7632618165533526, "grad_norm": 0.33715662360191345, "learning_rate": 4.6457053653809246e-05, "loss": 0.3747, "step": 4687 }, { "epoch": 0.7634246631111835, "grad_norm": 0.32274654507637024, "learning_rate": 4.6454622332970347e-05, "loss": 0.3564, "step": 4688 }, { "epoch": 0.7635875096690143, "grad_norm": 0.3127436637878418, "learning_rate": 4.645219024184621e-05, "loss": 0.3481, "step": 4689 }, { "epoch": 0.7637503562268453, "grad_norm": 0.3418111205101013, "learning_rate": 4.6449757380524184e-05, "loss": 0.3556, "step": 4690 }, { "epoch": 0.7639132027846761, "grad_norm": 0.37019479274749756, "learning_rate": 4.64473237490916e-05, "loss": 0.3782, "step": 4691 }, { "epoch": 0.7640760493425071, "grad_norm": 0.3246113359928131, "learning_rate": 4.644488934763583e-05, "loss": 0.3983, "step": 4692 }, { "epoch": 0.7642388959003379, "grad_norm": 0.2988186180591583, "learning_rate": 4.644245417624429e-05, "loss": 0.3444, "step": 4693 }, { "epoch": 0.7644017424581688, "grad_norm": 0.32981422543525696, "learning_rate": 4.644001823500439e-05, "loss": 0.3877, "step": 4694 }, { "epoch": 0.7645645890159997, "grad_norm": 0.36070024967193604, "learning_rate": 4.643758152400361e-05, "loss": 0.4258, "step": 4695 }, { "epoch": 0.7647274355738306, "grad_norm": 0.30920013785362244, "learning_rate": 4.6435144043329414e-05, "loss": 0.3724, "step": 4696 }, { "epoch": 0.7648902821316614, "grad_norm": 0.2966296970844269, "learning_rate": 4.643270579306934e-05, "loss": 0.3807, "step": 4697 }, { "epoch": 0.7650531286894924, "grad_norm": 0.3866778612136841, "learning_rate": 4.64302667733109e-05, "loss": 0.4382, "step": 4698 }, { "epoch": 0.7652159752473232, "grad_norm": 0.36718833446502686, "learning_rate": 4.642782698414169e-05, "loss": 0.3404, "step": 4699 }, { "epoch": 0.765378821805154, "grad_norm": 0.3864240050315857, "learning_rate": 4.642538642564928e-05, "loss": 0.3536, "step": 4700 }, { "epoch": 0.765541668362985, "grad_norm": 0.3471173942089081, "learning_rate": 4.6422945097921314e-05, "loss": 0.4127, "step": 4701 }, { "epoch": 0.7657045149208158, "grad_norm": 0.2956347167491913, "learning_rate": 4.6420503001045424e-05, "loss": 0.3402, "step": 4702 }, { "epoch": 0.7658673614786468, "grad_norm": 0.3996664881706238, "learning_rate": 4.641806013510931e-05, "loss": 0.3803, "step": 4703 }, { "epoch": 0.7660302080364776, "grad_norm": 0.4095534086227417, "learning_rate": 4.641561650020066e-05, "loss": 0.3619, "step": 4704 }, { "epoch": 0.7661930545943085, "grad_norm": 0.3414611518383026, "learning_rate": 4.641317209640722e-05, "loss": 0.3926, "step": 4705 }, { "epoch": 0.7663559011521394, "grad_norm": 0.36677831411361694, "learning_rate": 4.641072692381674e-05, "loss": 0.3535, "step": 4706 }, { "epoch": 0.7665187477099703, "grad_norm": 0.29258954524993896, "learning_rate": 4.640828098251702e-05, "loss": 0.3026, "step": 4707 }, { "epoch": 0.7666815942678011, "grad_norm": 0.4024745523929596, "learning_rate": 4.640583427259586e-05, "loss": 0.4034, "step": 4708 }, { "epoch": 0.7668444408256321, "grad_norm": 0.3286340534687042, "learning_rate": 4.640338679414113e-05, "loss": 0.3742, "step": 4709 }, { "epoch": 0.7670072873834629, "grad_norm": 0.3671807050704956, "learning_rate": 4.640093854724068e-05, "loss": 0.3473, "step": 4710 }, { "epoch": 0.7671701339412939, "grad_norm": 0.34191203117370605, "learning_rate": 4.639848953198243e-05, "loss": 0.3551, "step": 4711 }, { "epoch": 0.7673329804991247, "grad_norm": 0.3902393579483032, "learning_rate": 4.639603974845428e-05, "loss": 0.3507, "step": 4712 }, { "epoch": 0.7674958270569556, "grad_norm": 0.38800033926963806, "learning_rate": 4.6393589196744204e-05, "loss": 0.3219, "step": 4713 }, { "epoch": 0.7676586736147865, "grad_norm": 0.34086307883262634, "learning_rate": 4.6391137876940175e-05, "loss": 0.3525, "step": 4714 }, { "epoch": 0.7678215201726174, "grad_norm": 0.40370360016822815, "learning_rate": 4.638868578913021e-05, "loss": 0.4052, "step": 4715 }, { "epoch": 0.7679843667304482, "grad_norm": 0.28659138083457947, "learning_rate": 4.638623293340234e-05, "loss": 0.3392, "step": 4716 }, { "epoch": 0.7681472132882792, "grad_norm": 0.4066431522369385, "learning_rate": 4.6383779309844636e-05, "loss": 0.3967, "step": 4717 }, { "epoch": 0.76831005984611, "grad_norm": 0.41497665643692017, "learning_rate": 4.638132491854519e-05, "loss": 0.3814, "step": 4718 }, { "epoch": 0.7684729064039408, "grad_norm": 0.34497183561325073, "learning_rate": 4.6378869759592116e-05, "loss": 0.3611, "step": 4719 }, { "epoch": 0.7686357529617718, "grad_norm": 0.4451943337917328, "learning_rate": 4.637641383307357e-05, "loss": 0.3855, "step": 4720 }, { "epoch": 0.7687985995196026, "grad_norm": 0.41443079710006714, "learning_rate": 4.637395713907771e-05, "loss": 0.3821, "step": 4721 }, { "epoch": 0.7689614460774336, "grad_norm": 0.3061927556991577, "learning_rate": 4.6371499677692764e-05, "loss": 0.412, "step": 4722 }, { "epoch": 0.7691242926352644, "grad_norm": 0.4108762741088867, "learning_rate": 4.636904144900694e-05, "loss": 0.3751, "step": 4723 }, { "epoch": 0.7692871391930953, "grad_norm": 0.38752007484436035, "learning_rate": 4.6366582453108506e-05, "loss": 0.3899, "step": 4724 }, { "epoch": 0.7694499857509262, "grad_norm": 0.3624110221862793, "learning_rate": 4.636412269008575e-05, "loss": 0.3991, "step": 4725 }, { "epoch": 0.7696128323087571, "grad_norm": 0.38971588015556335, "learning_rate": 4.6361662160026977e-05, "loss": 0.3962, "step": 4726 }, { "epoch": 0.7697756788665879, "grad_norm": 0.368242472410202, "learning_rate": 4.635920086302053e-05, "loss": 0.3289, "step": 4727 }, { "epoch": 0.7699385254244189, "grad_norm": 0.37581849098205566, "learning_rate": 4.635673879915478e-05, "loss": 0.4359, "step": 4728 }, { "epoch": 0.7701013719822497, "grad_norm": 0.28445836901664734, "learning_rate": 4.635427596851812e-05, "loss": 0.3409, "step": 4729 }, { "epoch": 0.7702642185400806, "grad_norm": 0.2988907992839813, "learning_rate": 4.6351812371198974e-05, "loss": 0.3072, "step": 4730 }, { "epoch": 0.7704270650979115, "grad_norm": 0.3290705382823944, "learning_rate": 4.6349348007285796e-05, "loss": 0.3344, "step": 4731 }, { "epoch": 0.7705899116557424, "grad_norm": 0.32825741171836853, "learning_rate": 4.634688287686706e-05, "loss": 0.3743, "step": 4732 }, { "epoch": 0.7707527582135733, "grad_norm": 0.3165305256843567, "learning_rate": 4.634441698003127e-05, "loss": 0.3943, "step": 4733 }, { "epoch": 0.7709156047714042, "grad_norm": 0.3663202226161957, "learning_rate": 4.6341950316866956e-05, "loss": 0.3673, "step": 4734 }, { "epoch": 0.771078451329235, "grad_norm": 0.29647794365882874, "learning_rate": 4.633948288746268e-05, "loss": 0.3578, "step": 4735 }, { "epoch": 0.7712412978870659, "grad_norm": 0.33056116104125977, "learning_rate": 4.633701469190704e-05, "loss": 0.3887, "step": 4736 }, { "epoch": 0.7714041444448968, "grad_norm": 0.34575122594833374, "learning_rate": 4.633454573028865e-05, "loss": 0.3943, "step": 4737 }, { "epoch": 0.7715669910027276, "grad_norm": 0.38205525279045105, "learning_rate": 4.6332076002696144e-05, "loss": 0.3751, "step": 4738 }, { "epoch": 0.7717298375605586, "grad_norm": 0.3386288285255432, "learning_rate": 4.63296055092182e-05, "loss": 0.3904, "step": 4739 }, { "epoch": 0.7718926841183894, "grad_norm": 0.3480408191680908, "learning_rate": 4.6327134249943504e-05, "loss": 0.3886, "step": 4740 }, { "epoch": 0.7720555306762203, "grad_norm": 0.39086365699768066, "learning_rate": 4.63246622249608e-05, "loss": 0.3861, "step": 4741 }, { "epoch": 0.7722183772340512, "grad_norm": 0.3405575454235077, "learning_rate": 4.632218943435883e-05, "loss": 0.3633, "step": 4742 }, { "epoch": 0.7723812237918821, "grad_norm": 0.30531030893325806, "learning_rate": 4.6319715878226376e-05, "loss": 0.3648, "step": 4743 }, { "epoch": 0.772544070349713, "grad_norm": 0.330221563577652, "learning_rate": 4.631724155665224e-05, "loss": 0.3723, "step": 4744 }, { "epoch": 0.7727069169075439, "grad_norm": 0.32250508666038513, "learning_rate": 4.631476646972527e-05, "loss": 0.3572, "step": 4745 }, { "epoch": 0.7728697634653747, "grad_norm": 0.40478500723838806, "learning_rate": 4.631229061753432e-05, "loss": 0.3573, "step": 4746 }, { "epoch": 0.7730326100232057, "grad_norm": 0.306787371635437, "learning_rate": 4.630981400016829e-05, "loss": 0.3593, "step": 4747 }, { "epoch": 0.7731954565810365, "grad_norm": 0.27674898505210876, "learning_rate": 4.6307336617716085e-05, "loss": 0.3442, "step": 4748 }, { "epoch": 0.7733583031388674, "grad_norm": 0.3989448547363281, "learning_rate": 4.630485847026666e-05, "loss": 0.356, "step": 4749 }, { "epoch": 0.7735211496966983, "grad_norm": 0.35015344619750977, "learning_rate": 4.630237955790898e-05, "loss": 0.3438, "step": 4750 }, { "epoch": 0.7736839962545292, "grad_norm": 0.3207092583179474, "learning_rate": 4.6299899880732056e-05, "loss": 0.3309, "step": 4751 }, { "epoch": 0.77384684281236, "grad_norm": 0.35585254430770874, "learning_rate": 4.629741943882491e-05, "loss": 0.3946, "step": 4752 }, { "epoch": 0.774009689370191, "grad_norm": 0.35381656885147095, "learning_rate": 4.6294938232276594e-05, "loss": 0.3769, "step": 4753 }, { "epoch": 0.7741725359280218, "grad_norm": 0.39177805185317993, "learning_rate": 4.6292456261176196e-05, "loss": 0.3643, "step": 4754 }, { "epoch": 0.7743353824858527, "grad_norm": 0.36632758378982544, "learning_rate": 4.628997352561282e-05, "loss": 0.4136, "step": 4755 }, { "epoch": 0.7744982290436836, "grad_norm": 0.31628474593162537, "learning_rate": 4.6287490025675615e-05, "loss": 0.4122, "step": 4756 }, { "epoch": 0.7746610756015144, "grad_norm": 0.4172753691673279, "learning_rate": 4.6285005761453734e-05, "loss": 0.3965, "step": 4757 }, { "epoch": 0.7748239221593454, "grad_norm": 0.4083126187324524, "learning_rate": 4.628252073303638e-05, "loss": 0.4099, "step": 4758 }, { "epoch": 0.7749867687171762, "grad_norm": 0.2862468361854553, "learning_rate": 4.628003494051276e-05, "loss": 0.3728, "step": 4759 }, { "epoch": 0.7751496152750071, "grad_norm": 0.2943032383918762, "learning_rate": 4.6277548383972134e-05, "loss": 0.3591, "step": 4760 }, { "epoch": 0.775312461832838, "grad_norm": 0.3201507031917572, "learning_rate": 4.6275061063503775e-05, "loss": 0.3329, "step": 4761 }, { "epoch": 0.7754753083906689, "grad_norm": 0.3406035006046295, "learning_rate": 4.627257297919697e-05, "loss": 0.3535, "step": 4762 }, { "epoch": 0.7756381549484997, "grad_norm": 0.3462323248386383, "learning_rate": 4.6270084131141075e-05, "loss": 0.3104, "step": 4763 }, { "epoch": 0.7758010015063307, "grad_norm": 0.33650127053260803, "learning_rate": 4.6267594519425426e-05, "loss": 0.3364, "step": 4764 }, { "epoch": 0.7759638480641615, "grad_norm": 0.3302413821220398, "learning_rate": 4.6265104144139417e-05, "loss": 0.3429, "step": 4765 }, { "epoch": 0.7761266946219925, "grad_norm": 0.3160649240016937, "learning_rate": 4.6262613005372454e-05, "loss": 0.3459, "step": 4766 }, { "epoch": 0.7762895411798233, "grad_norm": 0.35029229521751404, "learning_rate": 4.626012110321398e-05, "loss": 0.3822, "step": 4767 }, { "epoch": 0.7764523877376542, "grad_norm": 0.3283268213272095, "learning_rate": 4.6257628437753457e-05, "loss": 0.3666, "step": 4768 }, { "epoch": 0.7766152342954851, "grad_norm": 0.33720862865448, "learning_rate": 4.625513500908039e-05, "loss": 0.3387, "step": 4769 }, { "epoch": 0.776778080853316, "grad_norm": 0.29260915517807007, "learning_rate": 4.625264081728429e-05, "loss": 0.3413, "step": 4770 }, { "epoch": 0.7769409274111468, "grad_norm": 0.31219950318336487, "learning_rate": 4.6250145862454716e-05, "loss": 0.3482, "step": 4771 }, { "epoch": 0.7771037739689778, "grad_norm": 0.29907333850860596, "learning_rate": 4.6247650144681235e-05, "loss": 0.3507, "step": 4772 }, { "epoch": 0.7772666205268086, "grad_norm": 0.29915133118629456, "learning_rate": 4.6245153664053455e-05, "loss": 0.3427, "step": 4773 }, { "epoch": 0.7774294670846394, "grad_norm": 0.3987474739551544, "learning_rate": 4.6242656420661e-05, "loss": 0.3701, "step": 4774 }, { "epoch": 0.7775923136424704, "grad_norm": 0.301532506942749, "learning_rate": 4.624015841459354e-05, "loss": 0.371, "step": 4775 }, { "epoch": 0.7777551602003012, "grad_norm": 0.37523889541625977, "learning_rate": 4.623765964594075e-05, "loss": 0.3801, "step": 4776 }, { "epoch": 0.7779180067581322, "grad_norm": 0.3223176896572113, "learning_rate": 4.623516011479236e-05, "loss": 0.3824, "step": 4777 }, { "epoch": 0.778080853315963, "grad_norm": 0.31532543897628784, "learning_rate": 4.62326598212381e-05, "loss": 0.3559, "step": 4778 }, { "epoch": 0.7782436998737939, "grad_norm": 0.29966485500335693, "learning_rate": 4.623015876536773e-05, "loss": 0.3389, "step": 4779 }, { "epoch": 0.7784065464316248, "grad_norm": 0.36595210433006287, "learning_rate": 4.622765694727106e-05, "loss": 0.3775, "step": 4780 }, { "epoch": 0.7785693929894557, "grad_norm": 0.3375012278556824, "learning_rate": 4.62251543670379e-05, "loss": 0.3703, "step": 4781 }, { "epoch": 0.7787322395472865, "grad_norm": 0.3113560974597931, "learning_rate": 4.622265102475811e-05, "loss": 0.3633, "step": 4782 }, { "epoch": 0.7788950861051175, "grad_norm": 0.34801286458969116, "learning_rate": 4.6220146920521554e-05, "loss": 0.3872, "step": 4783 }, { "epoch": 0.7790579326629483, "grad_norm": 0.30348265171051025, "learning_rate": 4.621764205441815e-05, "loss": 0.3242, "step": 4784 }, { "epoch": 0.7792207792207793, "grad_norm": 0.3312409222126007, "learning_rate": 4.621513642653783e-05, "loss": 0.4016, "step": 4785 }, { "epoch": 0.7793836257786101, "grad_norm": 0.29837408661842346, "learning_rate": 4.621263003697055e-05, "loss": 0.3512, "step": 4786 }, { "epoch": 0.779546472336441, "grad_norm": 0.5813236236572266, "learning_rate": 4.62101228858063e-05, "loss": 0.4276, "step": 4787 }, { "epoch": 0.7797093188942719, "grad_norm": 0.3382926881313324, "learning_rate": 4.6207614973135085e-05, "loss": 0.4081, "step": 4788 }, { "epoch": 0.7798721654521028, "grad_norm": 0.3207986652851105, "learning_rate": 4.620510629904696e-05, "loss": 0.3618, "step": 4789 }, { "epoch": 0.7800350120099336, "grad_norm": 0.31380772590637207, "learning_rate": 4.620259686363198e-05, "loss": 0.3826, "step": 4790 }, { "epoch": 0.7801978585677645, "grad_norm": 0.28404778242111206, "learning_rate": 4.6200086666980245e-05, "loss": 0.3442, "step": 4791 }, { "epoch": 0.7803607051255954, "grad_norm": 0.3568488657474518, "learning_rate": 4.61975757091819e-05, "loss": 0.375, "step": 4792 }, { "epoch": 0.7805235516834262, "grad_norm": 0.3018733859062195, "learning_rate": 4.6195063990327056e-05, "loss": 0.331, "step": 4793 }, { "epoch": 0.7806863982412572, "grad_norm": 0.42586779594421387, "learning_rate": 4.619255151050592e-05, "loss": 0.3603, "step": 4794 }, { "epoch": 0.780849244799088, "grad_norm": 0.2819579243659973, "learning_rate": 4.619003826980869e-05, "loss": 0.3572, "step": 4795 }, { "epoch": 0.781012091356919, "grad_norm": 0.3542795181274414, "learning_rate": 4.61875242683256e-05, "loss": 0.411, "step": 4796 }, { "epoch": 0.7811749379147498, "grad_norm": 0.34447672963142395, "learning_rate": 4.6185009506146906e-05, "loss": 0.3201, "step": 4797 }, { "epoch": 0.7813377844725807, "grad_norm": 0.3715387284755707, "learning_rate": 4.618249398336291e-05, "loss": 0.3425, "step": 4798 }, { "epoch": 0.7815006310304116, "grad_norm": 0.3589710295200348, "learning_rate": 4.61799777000639e-05, "loss": 0.3714, "step": 4799 }, { "epoch": 0.7816634775882425, "grad_norm": 0.33393624424934387, "learning_rate": 4.6177460656340244e-05, "loss": 0.3478, "step": 4800 }, { "epoch": 0.7818263241460733, "grad_norm": 0.4056151509284973, "learning_rate": 4.61749428522823e-05, "loss": 0.375, "step": 4801 }, { "epoch": 0.7819891707039043, "grad_norm": 0.33325955271720886, "learning_rate": 4.6172424287980466e-05, "loss": 0.3532, "step": 4802 }, { "epoch": 0.7821520172617351, "grad_norm": 0.45954009890556335, "learning_rate": 4.6169904963525156e-05, "loss": 0.3735, "step": 4803 }, { "epoch": 0.782314863819566, "grad_norm": 0.39518705010414124, "learning_rate": 4.616738487900684e-05, "loss": 0.3592, "step": 4804 }, { "epoch": 0.7824777103773969, "grad_norm": 0.3195440471172333, "learning_rate": 4.616486403451599e-05, "loss": 0.4389, "step": 4805 }, { "epoch": 0.7826405569352278, "grad_norm": 0.3782689571380615, "learning_rate": 4.61623424301431e-05, "loss": 0.3879, "step": 4806 }, { "epoch": 0.7828034034930587, "grad_norm": 0.35152092576026917, "learning_rate": 4.615982006597872e-05, "loss": 0.3534, "step": 4807 }, { "epoch": 0.7829662500508896, "grad_norm": 0.3707939386367798, "learning_rate": 4.615729694211341e-05, "loss": 0.373, "step": 4808 }, { "epoch": 0.7831290966087204, "grad_norm": 0.32142746448516846, "learning_rate": 4.615477305863773e-05, "loss": 0.37, "step": 4809 }, { "epoch": 0.7832919431665513, "grad_norm": 0.35969895124435425, "learning_rate": 4.615224841564233e-05, "loss": 0.3652, "step": 4810 }, { "epoch": 0.7834547897243822, "grad_norm": 0.36263203620910645, "learning_rate": 4.614972301321784e-05, "loss": 0.3645, "step": 4811 }, { "epoch": 0.783617636282213, "grad_norm": 0.32590681314468384, "learning_rate": 4.614719685145492e-05, "loss": 0.3805, "step": 4812 }, { "epoch": 0.783780482840044, "grad_norm": 0.3954017758369446, "learning_rate": 4.614466993044429e-05, "loss": 0.3675, "step": 4813 }, { "epoch": 0.7839433293978748, "grad_norm": 0.36446064710617065, "learning_rate": 4.6142142250276635e-05, "loss": 0.3794, "step": 4814 }, { "epoch": 0.7841061759557058, "grad_norm": 0.3269025981426239, "learning_rate": 4.613961381104275e-05, "loss": 0.3657, "step": 4815 }, { "epoch": 0.7842690225135366, "grad_norm": 0.32806143164634705, "learning_rate": 4.6137084612833384e-05, "loss": 0.3536, "step": 4816 }, { "epoch": 0.7844318690713675, "grad_norm": 0.30737167596817017, "learning_rate": 4.613455465573935e-05, "loss": 0.3297, "step": 4817 }, { "epoch": 0.7845947156291984, "grad_norm": 0.314325749874115, "learning_rate": 4.613202393985149e-05, "loss": 0.335, "step": 4818 }, { "epoch": 0.7847575621870293, "grad_norm": 0.3970620334148407, "learning_rate": 4.612949246526065e-05, "loss": 0.3787, "step": 4819 }, { "epoch": 0.7849204087448601, "grad_norm": 0.3356247842311859, "learning_rate": 4.6126960232057725e-05, "loss": 0.3422, "step": 4820 }, { "epoch": 0.7850832553026911, "grad_norm": 0.5406286716461182, "learning_rate": 4.6124427240333634e-05, "loss": 0.4398, "step": 4821 }, { "epoch": 0.7852461018605219, "grad_norm": 0.36848798394203186, "learning_rate": 4.612189349017931e-05, "loss": 0.3705, "step": 4822 }, { "epoch": 0.7854089484183528, "grad_norm": 0.3443678021430969, "learning_rate": 4.611935898168574e-05, "loss": 0.3725, "step": 4823 }, { "epoch": 0.7855717949761837, "grad_norm": 0.37502866983413696, "learning_rate": 4.6116823714943893e-05, "loss": 0.3785, "step": 4824 }, { "epoch": 0.7857346415340146, "grad_norm": 0.34353432059288025, "learning_rate": 4.611428769004481e-05, "loss": 0.3645, "step": 4825 }, { "epoch": 0.7858974880918455, "grad_norm": 0.2834469974040985, "learning_rate": 4.611175090707953e-05, "loss": 0.2934, "step": 4826 }, { "epoch": 0.7860603346496764, "grad_norm": 0.3606857359409332, "learning_rate": 4.610921336613916e-05, "loss": 0.3572, "step": 4827 }, { "epoch": 0.7862231812075072, "grad_norm": 0.36242836713790894, "learning_rate": 4.6106675067314774e-05, "loss": 0.3756, "step": 4828 }, { "epoch": 0.7863860277653381, "grad_norm": 0.34964537620544434, "learning_rate": 4.610413601069752e-05, "loss": 0.3553, "step": 4829 }, { "epoch": 0.786548874323169, "grad_norm": 0.31934866309165955, "learning_rate": 4.6101596196378546e-05, "loss": 0.3876, "step": 4830 }, { "epoch": 0.7867117208809998, "grad_norm": 0.3212851881980896, "learning_rate": 4.609905562444905e-05, "loss": 0.3275, "step": 4831 }, { "epoch": 0.7868745674388308, "grad_norm": 0.3169311583042145, "learning_rate": 4.6096514295000244e-05, "loss": 0.3332, "step": 4832 }, { "epoch": 0.7870374139966616, "grad_norm": 0.4035259485244751, "learning_rate": 4.609397220812337e-05, "loss": 0.4166, "step": 4833 }, { "epoch": 0.7872002605544925, "grad_norm": 0.32326844334602356, "learning_rate": 4.609142936390969e-05, "loss": 0.3582, "step": 4834 }, { "epoch": 0.7873631071123234, "grad_norm": 0.3494635224342346, "learning_rate": 4.6088885762450505e-05, "loss": 0.382, "step": 4835 }, { "epoch": 0.7875259536701543, "grad_norm": 0.3022269904613495, "learning_rate": 4.608634140383714e-05, "loss": 0.3627, "step": 4836 }, { "epoch": 0.7876888002279852, "grad_norm": 0.5068188905715942, "learning_rate": 4.6083796288160944e-05, "loss": 0.3663, "step": 4837 }, { "epoch": 0.7878516467858161, "grad_norm": 0.4909426271915436, "learning_rate": 4.6081250415513285e-05, "loss": 0.3443, "step": 4838 }, { "epoch": 0.7880144933436469, "grad_norm": 0.3133639395236969, "learning_rate": 4.607870378598558e-05, "loss": 0.376, "step": 4839 }, { "epoch": 0.7881773399014779, "grad_norm": 0.4017581641674042, "learning_rate": 4.6076156399669254e-05, "loss": 0.3753, "step": 4840 }, { "epoch": 0.7883401864593087, "grad_norm": 0.2985617220401764, "learning_rate": 4.607360825665577e-05, "loss": 0.3537, "step": 4841 }, { "epoch": 0.7885030330171396, "grad_norm": 0.33306485414505005, "learning_rate": 4.6071059357036606e-05, "loss": 0.3745, "step": 4842 }, { "epoch": 0.7886658795749705, "grad_norm": 0.3584228754043579, "learning_rate": 4.606850970090328e-05, "loss": 0.3677, "step": 4843 }, { "epoch": 0.7888287261328014, "grad_norm": 0.3352604806423187, "learning_rate": 4.606595928834734e-05, "loss": 0.3903, "step": 4844 }, { "epoch": 0.7889915726906322, "grad_norm": 0.31689396500587463, "learning_rate": 4.606340811946034e-05, "loss": 0.3168, "step": 4845 }, { "epoch": 0.7891544192484632, "grad_norm": 0.34354284405708313, "learning_rate": 4.606085619433389e-05, "loss": 0.3585, "step": 4846 }, { "epoch": 0.789317265806294, "grad_norm": 0.34407737851142883, "learning_rate": 4.60583035130596e-05, "loss": 0.325, "step": 4847 }, { "epoch": 0.7894801123641249, "grad_norm": 0.3097764551639557, "learning_rate": 4.6055750075729106e-05, "loss": 0.3185, "step": 4848 }, { "epoch": 0.7896429589219558, "grad_norm": 0.32882043719291687, "learning_rate": 4.605319588243412e-05, "loss": 0.3798, "step": 4849 }, { "epoch": 0.7898058054797866, "grad_norm": 0.3549964725971222, "learning_rate": 4.605064093326631e-05, "loss": 0.3576, "step": 4850 }, { "epoch": 0.7899686520376176, "grad_norm": 0.29909688234329224, "learning_rate": 4.604808522831743e-05, "loss": 0.3197, "step": 4851 }, { "epoch": 0.7901314985954484, "grad_norm": 0.38880887627601624, "learning_rate": 4.604552876767922e-05, "loss": 0.3601, "step": 4852 }, { "epoch": 0.7902943451532793, "grad_norm": 0.3936094045639038, "learning_rate": 4.604297155144348e-05, "loss": 0.3896, "step": 4853 }, { "epoch": 0.7904571917111102, "grad_norm": 0.3551054894924164, "learning_rate": 4.604041357970202e-05, "loss": 0.3737, "step": 4854 }, { "epoch": 0.7906200382689411, "grad_norm": 0.3510969877243042, "learning_rate": 4.603785485254667e-05, "loss": 0.4312, "step": 4855 }, { "epoch": 0.790782884826772, "grad_norm": 0.386184424161911, "learning_rate": 4.60352953700693e-05, "loss": 0.3788, "step": 4856 }, { "epoch": 0.7909457313846029, "grad_norm": 0.36854344606399536, "learning_rate": 4.603273513236179e-05, "loss": 0.3398, "step": 4857 }, { "epoch": 0.7911085779424337, "grad_norm": 0.32991576194763184, "learning_rate": 4.603017413951609e-05, "loss": 0.356, "step": 4858 }, { "epoch": 0.7912714245002647, "grad_norm": 0.33113494515419006, "learning_rate": 4.602761239162412e-05, "loss": 0.3915, "step": 4859 }, { "epoch": 0.7914342710580955, "grad_norm": 0.3135838806629181, "learning_rate": 4.602504988877787e-05, "loss": 0.3704, "step": 4860 }, { "epoch": 0.7915971176159264, "grad_norm": 0.3142680525779724, "learning_rate": 4.602248663106934e-05, "loss": 0.3813, "step": 4861 }, { "epoch": 0.7917599641737573, "grad_norm": 0.3407571613788605, "learning_rate": 4.601992261859054e-05, "loss": 0.3995, "step": 4862 }, { "epoch": 0.7919228107315882, "grad_norm": 0.3661751449108124, "learning_rate": 4.6017357851433554e-05, "loss": 0.4079, "step": 4863 }, { "epoch": 0.792085657289419, "grad_norm": 0.3564651310443878, "learning_rate": 4.601479232969045e-05, "loss": 0.3826, "step": 4864 }, { "epoch": 0.7922485038472499, "grad_norm": 0.3558118939399719, "learning_rate": 4.601222605345333e-05, "loss": 0.3579, "step": 4865 }, { "epoch": 0.7924113504050808, "grad_norm": 0.29212531447410583, "learning_rate": 4.6009659022814353e-05, "loss": 0.3338, "step": 4866 }, { "epoch": 0.7925741969629116, "grad_norm": 0.34644320607185364, "learning_rate": 4.600709123786566e-05, "loss": 0.3571, "step": 4867 }, { "epoch": 0.7927370435207426, "grad_norm": 0.3182062804698944, "learning_rate": 4.600452269869946e-05, "loss": 0.3605, "step": 4868 }, { "epoch": 0.7928998900785734, "grad_norm": 0.31708869338035583, "learning_rate": 4.600195340540796e-05, "loss": 0.3543, "step": 4869 }, { "epoch": 0.7930627366364044, "grad_norm": 0.3657853901386261, "learning_rate": 4.5999383358083415e-05, "loss": 0.3657, "step": 4870 }, { "epoch": 0.7932255831942352, "grad_norm": 0.2812715470790863, "learning_rate": 4.5996812556818086e-05, "loss": 0.3488, "step": 4871 }, { "epoch": 0.7933884297520661, "grad_norm": 0.3667362332344055, "learning_rate": 4.599424100170428e-05, "loss": 0.4122, "step": 4872 }, { "epoch": 0.793551276309897, "grad_norm": 0.31991758942604065, "learning_rate": 4.599166869283432e-05, "loss": 0.377, "step": 4873 }, { "epoch": 0.7937141228677279, "grad_norm": 0.41940751671791077, "learning_rate": 4.598909563030056e-05, "loss": 0.3937, "step": 4874 }, { "epoch": 0.7938769694255587, "grad_norm": 0.3198198676109314, "learning_rate": 4.5986521814195386e-05, "loss": 0.3611, "step": 4875 }, { "epoch": 0.7940398159833897, "grad_norm": 0.3864063620567322, "learning_rate": 4.59839472446112e-05, "loss": 0.4157, "step": 4876 }, { "epoch": 0.7942026625412205, "grad_norm": 0.2757321000099182, "learning_rate": 4.5981371921640437e-05, "loss": 0.3593, "step": 4877 }, { "epoch": 0.7943655090990515, "grad_norm": 0.36390671133995056, "learning_rate": 4.597879584537556e-05, "loss": 0.4063, "step": 4878 }, { "epoch": 0.7945283556568823, "grad_norm": 0.31107082962989807, "learning_rate": 4.597621901590906e-05, "loss": 0.3484, "step": 4879 }, { "epoch": 0.7946912022147132, "grad_norm": 0.32576191425323486, "learning_rate": 4.5973641433333445e-05, "loss": 0.3763, "step": 4880 }, { "epoch": 0.7948540487725441, "grad_norm": 0.32367146015167236, "learning_rate": 4.5971063097741276e-05, "loss": 0.3727, "step": 4881 }, { "epoch": 0.795016895330375, "grad_norm": 0.2988743484020233, "learning_rate": 4.59684840092251e-05, "loss": 0.3505, "step": 4882 }, { "epoch": 0.7951797418882058, "grad_norm": 0.3101158142089844, "learning_rate": 4.596590416787753e-05, "loss": 0.3487, "step": 4883 }, { "epoch": 0.7953425884460367, "grad_norm": 0.3817078769207001, "learning_rate": 4.596332357379118e-05, "loss": 0.3834, "step": 4884 }, { "epoch": 0.7955054350038676, "grad_norm": 0.32237792015075684, "learning_rate": 4.596074222705871e-05, "loss": 0.3383, "step": 4885 }, { "epoch": 0.7956682815616984, "grad_norm": 0.34526026248931885, "learning_rate": 4.59581601277728e-05, "loss": 0.3877, "step": 4886 }, { "epoch": 0.7958311281195294, "grad_norm": 0.29862284660339355, "learning_rate": 4.595557727602614e-05, "loss": 0.4019, "step": 4887 }, { "epoch": 0.7959939746773602, "grad_norm": 0.3022404909133911, "learning_rate": 4.5952993671911473e-05, "loss": 0.3694, "step": 4888 }, { "epoch": 0.7961568212351912, "grad_norm": 0.296184778213501, "learning_rate": 4.595040931552155e-05, "loss": 0.3642, "step": 4889 }, { "epoch": 0.796319667793022, "grad_norm": 0.32681265473365784, "learning_rate": 4.5947824206949177e-05, "loss": 0.3869, "step": 4890 }, { "epoch": 0.7964825143508529, "grad_norm": 0.2828090488910675, "learning_rate": 4.5945238346287146e-05, "loss": 0.3315, "step": 4891 }, { "epoch": 0.7966453609086838, "grad_norm": 0.2835480868816376, "learning_rate": 4.59426517336283e-05, "loss": 0.3173, "step": 4892 }, { "epoch": 0.7968082074665147, "grad_norm": 0.30058926343917847, "learning_rate": 4.594006436906552e-05, "loss": 0.3261, "step": 4893 }, { "epoch": 0.7969710540243455, "grad_norm": 0.34552061557769775, "learning_rate": 4.593747625269169e-05, "loss": 0.3857, "step": 4894 }, { "epoch": 0.7971339005821765, "grad_norm": 0.33282050490379333, "learning_rate": 4.5934887384599725e-05, "loss": 0.3906, "step": 4895 }, { "epoch": 0.7972967471400073, "grad_norm": 0.3088690936565399, "learning_rate": 4.593229776488259e-05, "loss": 0.3677, "step": 4896 }, { "epoch": 0.7974595936978383, "grad_norm": 0.2708967328071594, "learning_rate": 4.592970739363324e-05, "loss": 0.3136, "step": 4897 }, { "epoch": 0.7976224402556691, "grad_norm": 0.3650391697883606, "learning_rate": 4.59271162709447e-05, "loss": 0.3793, "step": 4898 }, { "epoch": 0.7977852868135, "grad_norm": 0.31029945611953735, "learning_rate": 4.592452439690997e-05, "loss": 0.3167, "step": 4899 }, { "epoch": 0.7979481333713309, "grad_norm": 0.31368696689605713, "learning_rate": 4.592193177162214e-05, "loss": 0.3666, "step": 4900 }, { "epoch": 0.7981109799291618, "grad_norm": 0.28910592198371887, "learning_rate": 4.5919338395174267e-05, "loss": 0.3554, "step": 4901 }, { "epoch": 0.7982738264869926, "grad_norm": 0.27458474040031433, "learning_rate": 4.591674426765947e-05, "loss": 0.3377, "step": 4902 }, { "epoch": 0.7984366730448235, "grad_norm": 0.3483346700668335, "learning_rate": 4.591414938917088e-05, "loss": 0.3473, "step": 4903 }, { "epoch": 0.7985995196026544, "grad_norm": 0.3570846617221832, "learning_rate": 4.591155375980167e-05, "loss": 0.3639, "step": 4904 }, { "epoch": 0.7987623661604852, "grad_norm": 0.33778828382492065, "learning_rate": 4.590895737964503e-05, "loss": 0.3746, "step": 4905 }, { "epoch": 0.7989252127183162, "grad_norm": 0.3273330628871918, "learning_rate": 4.590636024879418e-05, "loss": 0.3469, "step": 4906 }, { "epoch": 0.799088059276147, "grad_norm": 0.3125663101673126, "learning_rate": 4.5903762367342355e-05, "loss": 0.3585, "step": 4907 }, { "epoch": 0.799250905833978, "grad_norm": 0.38011279702186584, "learning_rate": 4.5901163735382837e-05, "loss": 0.3693, "step": 4908 }, { "epoch": 0.7994137523918088, "grad_norm": 0.3873703181743622, "learning_rate": 4.589856435300891e-05, "loss": 0.3388, "step": 4909 }, { "epoch": 0.7995765989496397, "grad_norm": 0.37481802701950073, "learning_rate": 4.589596422031392e-05, "loss": 0.3782, "step": 4910 }, { "epoch": 0.7997394455074706, "grad_norm": 0.3564058244228363, "learning_rate": 4.5893363337391196e-05, "loss": 0.3941, "step": 4911 }, { "epoch": 0.7999022920653015, "grad_norm": 0.3348034918308258, "learning_rate": 4.589076170433413e-05, "loss": 0.3328, "step": 4912 }, { "epoch": 0.8000651386231323, "grad_norm": 0.2927248179912567, "learning_rate": 4.5888159321236134e-05, "loss": 0.3497, "step": 4913 }, { "epoch": 0.8002279851809633, "grad_norm": 0.31591513752937317, "learning_rate": 4.5885556188190636e-05, "loss": 0.3494, "step": 4914 }, { "epoch": 0.8003908317387941, "grad_norm": 0.32166531682014465, "learning_rate": 4.5882952305291094e-05, "loss": 0.3742, "step": 4915 }, { "epoch": 0.800553678296625, "grad_norm": 0.3487502932548523, "learning_rate": 4.5880347672631e-05, "loss": 0.3813, "step": 4916 }, { "epoch": 0.8007165248544559, "grad_norm": 0.3174673318862915, "learning_rate": 4.5877742290303864e-05, "loss": 0.361, "step": 4917 }, { "epoch": 0.8008793714122868, "grad_norm": 0.3166733384132385, "learning_rate": 4.5875136158403234e-05, "loss": 0.396, "step": 4918 }, { "epoch": 0.8010422179701177, "grad_norm": 0.344027042388916, "learning_rate": 4.5872529277022665e-05, "loss": 0.3721, "step": 4919 }, { "epoch": 0.8012050645279485, "grad_norm": 0.29716041684150696, "learning_rate": 4.586992164625576e-05, "loss": 0.3556, "step": 4920 }, { "epoch": 0.8013679110857794, "grad_norm": 0.347201943397522, "learning_rate": 4.5867313266196146e-05, "loss": 0.3544, "step": 4921 }, { "epoch": 0.8015307576436103, "grad_norm": 0.34726518392562866, "learning_rate": 4.586470413693745e-05, "loss": 0.3436, "step": 4922 }, { "epoch": 0.8016936042014412, "grad_norm": 0.2950088083744049, "learning_rate": 4.586209425857338e-05, "loss": 0.3561, "step": 4923 }, { "epoch": 0.801856450759272, "grad_norm": 0.31204307079315186, "learning_rate": 4.585948363119761e-05, "loss": 0.3327, "step": 4924 }, { "epoch": 0.802019297317103, "grad_norm": 0.3654027283191681, "learning_rate": 4.5856872254903894e-05, "loss": 0.3885, "step": 4925 }, { "epoch": 0.8021821438749338, "grad_norm": 0.319804310798645, "learning_rate": 4.585426012978596e-05, "loss": 0.3688, "step": 4926 }, { "epoch": 0.8023449904327647, "grad_norm": 0.2962057590484619, "learning_rate": 4.585164725593762e-05, "loss": 0.3548, "step": 4927 }, { "epoch": 0.8025078369905956, "grad_norm": 0.2916525602340698, "learning_rate": 4.5849033633452666e-05, "loss": 0.3401, "step": 4928 }, { "epoch": 0.8026706835484265, "grad_norm": 0.31923848390579224, "learning_rate": 4.5846419262424936e-05, "loss": 0.3399, "step": 4929 }, { "epoch": 0.8028335301062574, "grad_norm": 0.352338969707489, "learning_rate": 4.5843804142948296e-05, "loss": 0.3938, "step": 4930 }, { "epoch": 0.8029963766640883, "grad_norm": 0.32140079140663147, "learning_rate": 4.5841188275116645e-05, "loss": 0.3595, "step": 4931 }, { "epoch": 0.8031592232219191, "grad_norm": 0.31584712862968445, "learning_rate": 4.583857165902388e-05, "loss": 0.3206, "step": 4932 }, { "epoch": 0.8033220697797501, "grad_norm": 0.38638344407081604, "learning_rate": 4.583595429476397e-05, "loss": 0.3754, "step": 4933 }, { "epoch": 0.8034849163375809, "grad_norm": 0.2716386914253235, "learning_rate": 4.583333618243087e-05, "loss": 0.3149, "step": 4934 }, { "epoch": 0.8036477628954118, "grad_norm": 0.2913658916950226, "learning_rate": 4.583071732211858e-05, "loss": 0.3457, "step": 4935 }, { "epoch": 0.8038106094532427, "grad_norm": 0.30646947026252747, "learning_rate": 4.582809771392114e-05, "loss": 0.32, "step": 4936 }, { "epoch": 0.8039734560110736, "grad_norm": 0.3059285283088684, "learning_rate": 4.582547735793258e-05, "loss": 0.3307, "step": 4937 }, { "epoch": 0.8041363025689044, "grad_norm": 0.3092489540576935, "learning_rate": 4.582285625424698e-05, "loss": 0.3255, "step": 4938 }, { "epoch": 0.8042991491267353, "grad_norm": 0.2753767669200897, "learning_rate": 4.582023440295847e-05, "loss": 0.3581, "step": 4939 }, { "epoch": 0.8044619956845662, "grad_norm": 0.318362832069397, "learning_rate": 4.581761180416115e-05, "loss": 0.384, "step": 4940 }, { "epoch": 0.804624842242397, "grad_norm": 0.2651921808719635, "learning_rate": 4.581498845794921e-05, "loss": 0.3539, "step": 4941 }, { "epoch": 0.804787688800228, "grad_norm": 0.2965821921825409, "learning_rate": 4.581236436441681e-05, "loss": 0.3424, "step": 4942 }, { "epoch": 0.8049505353580588, "grad_norm": 0.2797784209251404, "learning_rate": 4.580973952365818e-05, "loss": 0.3632, "step": 4943 }, { "epoch": 0.8051133819158898, "grad_norm": 0.29721495509147644, "learning_rate": 4.5807113935767556e-05, "loss": 0.3593, "step": 4944 }, { "epoch": 0.8052762284737206, "grad_norm": 0.3110272288322449, "learning_rate": 4.58044876008392e-05, "loss": 0.3504, "step": 4945 }, { "epoch": 0.8054390750315515, "grad_norm": 0.3452276885509491, "learning_rate": 4.58018605189674e-05, "loss": 0.3947, "step": 4946 }, { "epoch": 0.8056019215893824, "grad_norm": 0.31663739681243896, "learning_rate": 4.5799232690246486e-05, "loss": 0.3751, "step": 4947 }, { "epoch": 0.8057647681472133, "grad_norm": 0.28154686093330383, "learning_rate": 4.57966041147708e-05, "loss": 0.3148, "step": 4948 }, { "epoch": 0.8059276147050441, "grad_norm": 0.395536333322525, "learning_rate": 4.579397479263473e-05, "loss": 0.4125, "step": 4949 }, { "epoch": 0.8060904612628751, "grad_norm": 0.32524821162223816, "learning_rate": 4.579134472393265e-05, "loss": 0.3651, "step": 4950 }, { "epoch": 0.8062533078207059, "grad_norm": 0.3869190216064453, "learning_rate": 4.578871390875901e-05, "loss": 0.4041, "step": 4951 }, { "epoch": 0.8064161543785369, "grad_norm": 0.3518766164779663, "learning_rate": 4.578608234720826e-05, "loss": 0.3459, "step": 4952 }, { "epoch": 0.8065790009363677, "grad_norm": 0.3104449510574341, "learning_rate": 4.578345003937487e-05, "loss": 0.3831, "step": 4953 }, { "epoch": 0.8067418474941986, "grad_norm": 0.32484620809555054, "learning_rate": 4.578081698535336e-05, "loss": 0.3527, "step": 4954 }, { "epoch": 0.8069046940520295, "grad_norm": 0.2614061236381531, "learning_rate": 4.577818318523825e-05, "loss": 0.2841, "step": 4955 }, { "epoch": 0.8070675406098604, "grad_norm": 0.31472185254096985, "learning_rate": 4.5775548639124116e-05, "loss": 0.36, "step": 4956 }, { "epoch": 0.8072303871676912, "grad_norm": 0.2868671417236328, "learning_rate": 4.577291334710554e-05, "loss": 0.3445, "step": 4957 }, { "epoch": 0.8073932337255221, "grad_norm": 0.38258394598960876, "learning_rate": 4.5770277309277135e-05, "loss": 0.39, "step": 4958 }, { "epoch": 0.807556080283353, "grad_norm": 0.3482391834259033, "learning_rate": 4.576764052573355e-05, "loss": 0.3271, "step": 4959 }, { "epoch": 0.8077189268411838, "grad_norm": 0.3352154493331909, "learning_rate": 4.576500299656945e-05, "loss": 0.3904, "step": 4960 }, { "epoch": 0.8078817733990148, "grad_norm": 0.3574593663215637, "learning_rate": 4.576236472187952e-05, "loss": 0.3499, "step": 4961 }, { "epoch": 0.8080446199568456, "grad_norm": 0.3899151682853699, "learning_rate": 4.5759725701758506e-05, "loss": 0.3806, "step": 4962 }, { "epoch": 0.8082074665146766, "grad_norm": 0.3561607897281647, "learning_rate": 4.575708593630113e-05, "loss": 0.3236, "step": 4963 }, { "epoch": 0.8083703130725074, "grad_norm": 0.3472141623497009, "learning_rate": 4.575444542560218e-05, "loss": 0.3628, "step": 4964 }, { "epoch": 0.8085331596303383, "grad_norm": 0.3840325176715851, "learning_rate": 4.5751804169756464e-05, "loss": 0.3599, "step": 4965 }, { "epoch": 0.8086960061881692, "grad_norm": 0.45214876532554626, "learning_rate": 4.574916216885879e-05, "loss": 0.4432, "step": 4966 }, { "epoch": 0.8088588527460001, "grad_norm": 0.33320289850234985, "learning_rate": 4.5746519423004044e-05, "loss": 0.3722, "step": 4967 }, { "epoch": 0.8090216993038309, "grad_norm": 0.3382868468761444, "learning_rate": 4.5743875932287084e-05, "loss": 0.3783, "step": 4968 }, { "epoch": 0.8091845458616619, "grad_norm": 0.34237346053123474, "learning_rate": 4.5741231696802824e-05, "loss": 0.3604, "step": 4969 }, { "epoch": 0.8093473924194927, "grad_norm": 0.3191688656806946, "learning_rate": 4.5738586716646216e-05, "loss": 0.3383, "step": 4970 }, { "epoch": 0.8095102389773237, "grad_norm": 0.28313833475112915, "learning_rate": 4.573594099191219e-05, "loss": 0.3371, "step": 4971 }, { "epoch": 0.8096730855351545, "grad_norm": 0.3460673391819, "learning_rate": 4.573329452269578e-05, "loss": 0.3796, "step": 4972 }, { "epoch": 0.8098359320929854, "grad_norm": 0.35020190477371216, "learning_rate": 4.5730647309091966e-05, "loss": 0.3906, "step": 4973 }, { "epoch": 0.8099987786508163, "grad_norm": 0.3793877363204956, "learning_rate": 4.57279993511958e-05, "loss": 0.4322, "step": 4974 }, { "epoch": 0.8101616252086472, "grad_norm": 0.3659198582172394, "learning_rate": 4.572535064910236e-05, "loss": 0.3951, "step": 4975 }, { "epoch": 0.810324471766478, "grad_norm": 0.48061662912368774, "learning_rate": 4.5722701202906734e-05, "loss": 0.3644, "step": 4976 }, { "epoch": 0.8104873183243089, "grad_norm": 0.35057783126831055, "learning_rate": 4.5720051012704046e-05, "loss": 0.3768, "step": 4977 }, { "epoch": 0.8106501648821398, "grad_norm": 0.31728819012641907, "learning_rate": 4.5717400078589455e-05, "loss": 0.3794, "step": 4978 }, { "epoch": 0.8108130114399706, "grad_norm": 0.31371062994003296, "learning_rate": 4.571474840065813e-05, "loss": 0.3596, "step": 4979 }, { "epoch": 0.8109758579978016, "grad_norm": 0.3346266746520996, "learning_rate": 4.5712095979005267e-05, "loss": 0.3766, "step": 4980 }, { "epoch": 0.8111387045556324, "grad_norm": 0.2918032705783844, "learning_rate": 4.570944281372611e-05, "loss": 0.3458, "step": 4981 }, { "epoch": 0.8113015511134634, "grad_norm": 0.34070953726768494, "learning_rate": 4.57067889049159e-05, "loss": 0.3565, "step": 4982 }, { "epoch": 0.8114643976712942, "grad_norm": 0.3749617636203766, "learning_rate": 4.5704134252669936e-05, "loss": 0.3614, "step": 4983 }, { "epoch": 0.8116272442291251, "grad_norm": 0.30134862661361694, "learning_rate": 4.5701478857083524e-05, "loss": 0.3544, "step": 4984 }, { "epoch": 0.811790090786956, "grad_norm": 0.329353928565979, "learning_rate": 4.5698822718251986e-05, "loss": 0.3868, "step": 4985 }, { "epoch": 0.8119529373447869, "grad_norm": 0.2987889349460602, "learning_rate": 4.569616583627071e-05, "loss": 0.3718, "step": 4986 }, { "epoch": 0.8121157839026177, "grad_norm": 0.4646824598312378, "learning_rate": 4.569350821123507e-05, "loss": 0.3952, "step": 4987 }, { "epoch": 0.8122786304604487, "grad_norm": 0.3995329737663269, "learning_rate": 4.569084984324048e-05, "loss": 0.3549, "step": 4988 }, { "epoch": 0.8124414770182795, "grad_norm": 0.31828588247299194, "learning_rate": 4.5688190732382396e-05, "loss": 0.3993, "step": 4989 }, { "epoch": 0.8126043235761105, "grad_norm": 0.3373122811317444, "learning_rate": 4.568553087875628e-05, "loss": 0.3487, "step": 4990 }, { "epoch": 0.8127671701339413, "grad_norm": 0.35885950922966003, "learning_rate": 4.5682870282457624e-05, "loss": 0.3708, "step": 4991 }, { "epoch": 0.8129300166917722, "grad_norm": 0.3070005774497986, "learning_rate": 4.5680208943581956e-05, "loss": 0.333, "step": 4992 }, { "epoch": 0.8130928632496031, "grad_norm": 0.3776920437812805, "learning_rate": 4.567754686222484e-05, "loss": 0.3808, "step": 4993 }, { "epoch": 0.8132557098074339, "grad_norm": 0.31737303733825684, "learning_rate": 4.567488403848183e-05, "loss": 0.3598, "step": 4994 }, { "epoch": 0.8134185563652648, "grad_norm": 0.28761303424835205, "learning_rate": 4.5672220472448545e-05, "loss": 0.3308, "step": 4995 }, { "epoch": 0.8135814029230957, "grad_norm": 0.6715672016143799, "learning_rate": 4.566955616422061e-05, "loss": 0.4628, "step": 4996 }, { "epoch": 0.8137442494809266, "grad_norm": 0.4177633821964264, "learning_rate": 4.566689111389367e-05, "loss": 0.3832, "step": 4997 }, { "epoch": 0.8139070960387574, "grad_norm": 0.3531343936920166, "learning_rate": 4.5664225321563425e-05, "loss": 0.4083, "step": 4998 }, { "epoch": 0.8140699425965884, "grad_norm": 0.4797876179218292, "learning_rate": 4.566155878732559e-05, "loss": 0.3934, "step": 4999 }, { "epoch": 0.8142327891544192, "grad_norm": 0.4279330372810364, "learning_rate": 4.565889151127588e-05, "loss": 0.4277, "step": 5000 }, { "epoch": 0.8143956357122502, "grad_norm": 0.3624902367591858, "learning_rate": 4.565622349351007e-05, "loss": 0.3626, "step": 5001 }, { "epoch": 0.814558482270081, "grad_norm": 0.3647470772266388, "learning_rate": 4.565355473412395e-05, "loss": 0.3922, "step": 5002 }, { "epoch": 0.8147213288279119, "grad_norm": 0.37028053402900696, "learning_rate": 4.565088523321334e-05, "loss": 0.3657, "step": 5003 }, { "epoch": 0.8148841753857428, "grad_norm": 0.32584530115127563, "learning_rate": 4.564821499087407e-05, "loss": 0.3644, "step": 5004 }, { "epoch": 0.8150470219435737, "grad_norm": 0.3846951127052307, "learning_rate": 4.5645544007202025e-05, "loss": 0.3672, "step": 5005 }, { "epoch": 0.8152098685014045, "grad_norm": 0.2970885932445526, "learning_rate": 4.564287228229308e-05, "loss": 0.3626, "step": 5006 }, { "epoch": 0.8153727150592355, "grad_norm": 0.41733160614967346, "learning_rate": 4.564019981624319e-05, "loss": 0.3619, "step": 5007 }, { "epoch": 0.8155355616170663, "grad_norm": 0.3442259430885315, "learning_rate": 4.563752660914828e-05, "loss": 0.3307, "step": 5008 }, { "epoch": 0.8156984081748972, "grad_norm": 0.3691002130508423, "learning_rate": 4.563485266110433e-05, "loss": 0.3809, "step": 5009 }, { "epoch": 0.8158612547327281, "grad_norm": 0.3543544113636017, "learning_rate": 4.563217797220736e-05, "loss": 0.3549, "step": 5010 }, { "epoch": 0.816024101290559, "grad_norm": 0.31402474641799927, "learning_rate": 4.562950254255337e-05, "loss": 0.3763, "step": 5011 }, { "epoch": 0.8161869478483899, "grad_norm": 0.35893723368644714, "learning_rate": 4.562682637223843e-05, "loss": 0.4037, "step": 5012 }, { "epoch": 0.8163497944062207, "grad_norm": 0.40275466442108154, "learning_rate": 4.5624149461358636e-05, "loss": 0.3798, "step": 5013 }, { "epoch": 0.8165126409640516, "grad_norm": 0.3002925515174866, "learning_rate": 4.5621471810010077e-05, "loss": 0.3875, "step": 5014 }, { "epoch": 0.8166754875218825, "grad_norm": 0.33183228969573975, "learning_rate": 4.5618793418288897e-05, "loss": 0.3975, "step": 5015 }, { "epoch": 0.8168383340797134, "grad_norm": 0.3639487326145172, "learning_rate": 4.561611428629126e-05, "loss": 0.3949, "step": 5016 }, { "epoch": 0.8170011806375442, "grad_norm": 0.3385205566883087, "learning_rate": 4.561343441411335e-05, "loss": 0.368, "step": 5017 }, { "epoch": 0.8171640271953752, "grad_norm": 0.3138114809989929, "learning_rate": 4.561075380185139e-05, "loss": 0.3608, "step": 5018 }, { "epoch": 0.817326873753206, "grad_norm": 0.35325005650520325, "learning_rate": 4.560807244960161e-05, "loss": 0.3549, "step": 5019 }, { "epoch": 0.817489720311037, "grad_norm": 0.32883957028388977, "learning_rate": 4.5605390357460285e-05, "loss": 0.4026, "step": 5020 }, { "epoch": 0.8176525668688678, "grad_norm": 0.3002896010875702, "learning_rate": 4.560270752552371e-05, "loss": 0.3375, "step": 5021 }, { "epoch": 0.8178154134266987, "grad_norm": 0.3588086664676666, "learning_rate": 4.5600023953888214e-05, "loss": 0.3939, "step": 5022 }, { "epoch": 0.8179782599845296, "grad_norm": 0.35436463356018066, "learning_rate": 4.5597339642650136e-05, "loss": 0.3887, "step": 5023 }, { "epoch": 0.8181411065423605, "grad_norm": 0.35787513852119446, "learning_rate": 4.559465459190586e-05, "loss": 0.3909, "step": 5024 }, { "epoch": 0.8183039531001913, "grad_norm": 0.3334681987762451, "learning_rate": 4.559196880175177e-05, "loss": 0.3807, "step": 5025 }, { "epoch": 0.8184667996580223, "grad_norm": 0.30907005071640015, "learning_rate": 4.558928227228431e-05, "loss": 0.3738, "step": 5026 }, { "epoch": 0.8186296462158531, "grad_norm": 0.3357721269130707, "learning_rate": 4.558659500359993e-05, "loss": 0.3337, "step": 5027 }, { "epoch": 0.818792492773684, "grad_norm": 0.3166705071926117, "learning_rate": 4.558390699579511e-05, "loss": 0.3667, "step": 5028 }, { "epoch": 0.8189553393315149, "grad_norm": 0.3947760760784149, "learning_rate": 4.558121824896636e-05, "loss": 0.3622, "step": 5029 }, { "epoch": 0.8191181858893458, "grad_norm": 0.3659474849700928, "learning_rate": 4.557852876321021e-05, "loss": 0.3748, "step": 5030 }, { "epoch": 0.8192810324471766, "grad_norm": 0.3363109529018402, "learning_rate": 4.5575838538623225e-05, "loss": 0.3416, "step": 5031 }, { "epoch": 0.8194438790050075, "grad_norm": 0.45703956484794617, "learning_rate": 4.557314757530198e-05, "loss": 0.3933, "step": 5032 }, { "epoch": 0.8196067255628384, "grad_norm": 0.3853871822357178, "learning_rate": 4.557045587334311e-05, "loss": 0.3429, "step": 5033 }, { "epoch": 0.8197695721206693, "grad_norm": 0.31708937883377075, "learning_rate": 4.5567763432843235e-05, "loss": 0.3358, "step": 5034 }, { "epoch": 0.8199324186785002, "grad_norm": 0.4410877227783203, "learning_rate": 4.5565070253899034e-05, "loss": 0.4244, "step": 5035 }, { "epoch": 0.820095265236331, "grad_norm": 0.436335951089859, "learning_rate": 4.556237633660719e-05, "loss": 0.3303, "step": 5036 }, { "epoch": 0.820258111794162, "grad_norm": 0.38097280263900757, "learning_rate": 4.5559681681064436e-05, "loss": 0.3786, "step": 5037 }, { "epoch": 0.8204209583519928, "grad_norm": 0.3597646653652191, "learning_rate": 4.555698628736751e-05, "loss": 0.3488, "step": 5038 }, { "epoch": 0.8205838049098237, "grad_norm": 0.4094177186489105, "learning_rate": 4.555429015561319e-05, "loss": 0.3777, "step": 5039 }, { "epoch": 0.8207466514676546, "grad_norm": 0.3430057168006897, "learning_rate": 4.5551593285898265e-05, "loss": 0.3724, "step": 5040 }, { "epoch": 0.8209094980254855, "grad_norm": 0.42858418822288513, "learning_rate": 4.554889567831957e-05, "loss": 0.385, "step": 5041 }, { "epoch": 0.8210723445833163, "grad_norm": 0.3720020353794098, "learning_rate": 4.554619733297395e-05, "loss": 0.3736, "step": 5042 }, { "epoch": 0.8212351911411473, "grad_norm": 0.31385537981987, "learning_rate": 4.5543498249958294e-05, "loss": 0.375, "step": 5043 }, { "epoch": 0.8213980376989781, "grad_norm": 0.3069608509540558, "learning_rate": 4.554079842936949e-05, "loss": 0.3516, "step": 5044 }, { "epoch": 0.8215608842568091, "grad_norm": 0.3388117849826813, "learning_rate": 4.553809787130449e-05, "loss": 0.3512, "step": 5045 }, { "epoch": 0.8217237308146399, "grad_norm": 0.32962629199028015, "learning_rate": 4.553539657586024e-05, "loss": 0.351, "step": 5046 }, { "epoch": 0.8218865773724708, "grad_norm": 0.28436997532844543, "learning_rate": 4.553269454313372e-05, "loss": 0.3522, "step": 5047 }, { "epoch": 0.8220494239303017, "grad_norm": 0.3600708246231079, "learning_rate": 4.552999177322196e-05, "loss": 0.3746, "step": 5048 }, { "epoch": 0.8222122704881325, "grad_norm": 0.3786926567554474, "learning_rate": 4.552728826622199e-05, "loss": 0.3715, "step": 5049 }, { "epoch": 0.8223751170459634, "grad_norm": 0.37048354744911194, "learning_rate": 4.552458402223085e-05, "loss": 0.362, "step": 5050 }, { "epoch": 0.8225379636037943, "grad_norm": 0.3961424231529236, "learning_rate": 4.5521879041345675e-05, "loss": 0.348, "step": 5051 }, { "epoch": 0.8227008101616252, "grad_norm": 0.34902578592300415, "learning_rate": 4.5519173323663536e-05, "loss": 0.3936, "step": 5052 }, { "epoch": 0.822863656719456, "grad_norm": 0.2984142303466797, "learning_rate": 4.5516466869281607e-05, "loss": 0.3777, "step": 5053 }, { "epoch": 0.823026503277287, "grad_norm": 0.3336603045463562, "learning_rate": 4.5513759678297044e-05, "loss": 0.3314, "step": 5054 }, { "epoch": 0.8231893498351178, "grad_norm": 0.28595536947250366, "learning_rate": 4.5511051750807055e-05, "loss": 0.3359, "step": 5055 }, { "epoch": 0.8233521963929488, "grad_norm": 0.3575030267238617, "learning_rate": 4.550834308690885e-05, "loss": 0.4029, "step": 5056 }, { "epoch": 0.8235150429507796, "grad_norm": 0.36004191637039185, "learning_rate": 4.5505633686699684e-05, "loss": 0.3732, "step": 5057 }, { "epoch": 0.8236778895086105, "grad_norm": 0.3212713897228241, "learning_rate": 4.5502923550276835e-05, "loss": 0.3425, "step": 5058 }, { "epoch": 0.8238407360664414, "grad_norm": 0.35496339201927185, "learning_rate": 4.55002126777376e-05, "loss": 0.3825, "step": 5059 }, { "epoch": 0.8240035826242723, "grad_norm": 0.327518492937088, "learning_rate": 4.549750106917931e-05, "loss": 0.3249, "step": 5060 }, { "epoch": 0.8241664291821031, "grad_norm": 0.3221465051174164, "learning_rate": 4.549478872469932e-05, "loss": 0.38, "step": 5061 }, { "epoch": 0.8243292757399341, "grad_norm": 0.334210604429245, "learning_rate": 4.5492075644395e-05, "loss": 0.3795, "step": 5062 }, { "epoch": 0.8244921222977649, "grad_norm": 0.3963739573955536, "learning_rate": 4.5489361828363784e-05, "loss": 0.3906, "step": 5063 }, { "epoch": 0.8246549688555959, "grad_norm": 0.32430192828178406, "learning_rate": 4.5486647276703086e-05, "loss": 0.3293, "step": 5064 }, { "epoch": 0.8248178154134267, "grad_norm": 0.3054184019565582, "learning_rate": 4.548393198951037e-05, "loss": 0.3408, "step": 5065 }, { "epoch": 0.8249806619712576, "grad_norm": 0.33587220311164856, "learning_rate": 4.548121596688313e-05, "loss": 0.3687, "step": 5066 }, { "epoch": 0.8251435085290885, "grad_norm": 0.38333860039711, "learning_rate": 4.547849920891886e-05, "loss": 0.3874, "step": 5067 }, { "epoch": 0.8253063550869193, "grad_norm": 0.4198225140571594, "learning_rate": 4.547578171571512e-05, "loss": 0.3795, "step": 5068 }, { "epoch": 0.8254692016447502, "grad_norm": 0.3403146266937256, "learning_rate": 4.547306348736947e-05, "loss": 0.3838, "step": 5069 }, { "epoch": 0.8256320482025811, "grad_norm": 0.3218669593334198, "learning_rate": 4.5470344523979505e-05, "loss": 0.3489, "step": 5070 }, { "epoch": 0.825794894760412, "grad_norm": 0.4604528844356537, "learning_rate": 4.546762482564284e-05, "loss": 0.3831, "step": 5071 }, { "epoch": 0.8259577413182428, "grad_norm": 0.28245124220848083, "learning_rate": 4.546490439245712e-05, "loss": 0.3293, "step": 5072 }, { "epoch": 0.8261205878760738, "grad_norm": 0.33052706718444824, "learning_rate": 4.546218322452001e-05, "loss": 0.3818, "step": 5073 }, { "epoch": 0.8262834344339046, "grad_norm": 0.33950501680374146, "learning_rate": 4.5459461321929224e-05, "loss": 0.3996, "step": 5074 }, { "epoch": 0.8264462809917356, "grad_norm": 0.3470029830932617, "learning_rate": 4.545673868478248e-05, "loss": 0.3571, "step": 5075 }, { "epoch": 0.8266091275495664, "grad_norm": 0.31886959075927734, "learning_rate": 4.545401531317752e-05, "loss": 0.3428, "step": 5076 }, { "epoch": 0.8267719741073973, "grad_norm": 0.35408514738082886, "learning_rate": 4.5451291207212135e-05, "loss": 0.3585, "step": 5077 }, { "epoch": 0.8269348206652282, "grad_norm": 0.28383171558380127, "learning_rate": 4.544856636698412e-05, "loss": 0.373, "step": 5078 }, { "epoch": 0.8270976672230591, "grad_norm": 0.44953596591949463, "learning_rate": 4.54458407925913e-05, "loss": 0.3513, "step": 5079 }, { "epoch": 0.8272605137808899, "grad_norm": 0.3980730473995209, "learning_rate": 4.544311448413155e-05, "loss": 0.4133, "step": 5080 }, { "epoch": 0.8274233603387209, "grad_norm": 0.3420586585998535, "learning_rate": 4.5440387441702736e-05, "loss": 0.3981, "step": 5081 }, { "epoch": 0.8275862068965517, "grad_norm": 0.375125914812088, "learning_rate": 4.543765966540277e-05, "loss": 0.3741, "step": 5082 }, { "epoch": 0.8277490534543827, "grad_norm": 0.34658220410346985, "learning_rate": 4.5434931155329585e-05, "loss": 0.3481, "step": 5083 }, { "epoch": 0.8279119000122135, "grad_norm": 0.37054118514060974, "learning_rate": 4.543220191158116e-05, "loss": 0.3645, "step": 5084 }, { "epoch": 0.8280747465700444, "grad_norm": 0.2966628670692444, "learning_rate": 4.542947193425546e-05, "loss": 0.3604, "step": 5085 }, { "epoch": 0.8282375931278753, "grad_norm": 0.3471291661262512, "learning_rate": 4.5426741223450506e-05, "loss": 0.3395, "step": 5086 }, { "epoch": 0.8284004396857061, "grad_norm": 0.30629485845565796, "learning_rate": 4.542400977926435e-05, "loss": 0.3495, "step": 5087 }, { "epoch": 0.828563286243537, "grad_norm": 0.3491608798503876, "learning_rate": 4.542127760179505e-05, "loss": 0.3249, "step": 5088 }, { "epoch": 0.8287261328013679, "grad_norm": 0.35732364654541016, "learning_rate": 4.541854469114069e-05, "loss": 0.3416, "step": 5089 }, { "epoch": 0.8288889793591988, "grad_norm": 0.3406224846839905, "learning_rate": 4.541581104739942e-05, "loss": 0.337, "step": 5090 }, { "epoch": 0.8290518259170296, "grad_norm": 0.3329988121986389, "learning_rate": 4.541307667066935e-05, "loss": 0.3159, "step": 5091 }, { "epoch": 0.8292146724748606, "grad_norm": 0.3054507374763489, "learning_rate": 4.541034156104868e-05, "loss": 0.3579, "step": 5092 }, { "epoch": 0.8293775190326914, "grad_norm": 0.2921214699745178, "learning_rate": 4.540760571863558e-05, "loss": 0.3159, "step": 5093 }, { "epoch": 0.8295403655905224, "grad_norm": 0.3453315496444702, "learning_rate": 4.540486914352831e-05, "loss": 0.3955, "step": 5094 }, { "epoch": 0.8297032121483532, "grad_norm": 0.332434743642807, "learning_rate": 4.540213183582509e-05, "loss": 0.3476, "step": 5095 }, { "epoch": 0.8298660587061841, "grad_norm": 0.37585288286209106, "learning_rate": 4.539939379562421e-05, "loss": 0.3599, "step": 5096 }, { "epoch": 0.830028905264015, "grad_norm": 0.3222286105155945, "learning_rate": 4.5396655023023984e-05, "loss": 0.3845, "step": 5097 }, { "epoch": 0.8301917518218459, "grad_norm": 0.3297613859176636, "learning_rate": 4.539391551812273e-05, "loss": 0.39, "step": 5098 }, { "epoch": 0.8303545983796767, "grad_norm": 0.34298813343048096, "learning_rate": 4.5391175281018805e-05, "loss": 0.3756, "step": 5099 }, { "epoch": 0.8305174449375077, "grad_norm": 0.3403162360191345, "learning_rate": 4.538843431181059e-05, "loss": 0.394, "step": 5100 }, { "epoch": 0.8306802914953385, "grad_norm": 0.4025830924510956, "learning_rate": 4.538569261059651e-05, "loss": 0.408, "step": 5101 }, { "epoch": 0.8308431380531694, "grad_norm": 0.30922943353652954, "learning_rate": 4.538295017747498e-05, "loss": 0.3468, "step": 5102 }, { "epoch": 0.8310059846110003, "grad_norm": 0.3363969624042511, "learning_rate": 4.5380207012544465e-05, "loss": 0.3682, "step": 5103 }, { "epoch": 0.8311688311688312, "grad_norm": 0.28316250443458557, "learning_rate": 4.5377463115903465e-05, "loss": 0.327, "step": 5104 }, { "epoch": 0.831331677726662, "grad_norm": 0.35884565114974976, "learning_rate": 4.537471848765048e-05, "loss": 0.3872, "step": 5105 }, { "epoch": 0.8314945242844929, "grad_norm": 0.36131832003593445, "learning_rate": 4.5371973127884063e-05, "loss": 0.3538, "step": 5106 }, { "epoch": 0.8316573708423238, "grad_norm": 0.28588688373565674, "learning_rate": 4.536922703670278e-05, "loss": 0.3277, "step": 5107 }, { "epoch": 0.8318202174001547, "grad_norm": 0.36000171303749084, "learning_rate": 4.5366480214205205e-05, "loss": 0.3938, "step": 5108 }, { "epoch": 0.8319830639579856, "grad_norm": 0.28623661398887634, "learning_rate": 4.5363732660489975e-05, "loss": 0.3616, "step": 5109 }, { "epoch": 0.8321459105158164, "grad_norm": 0.3237670958042145, "learning_rate": 4.5360984375655735e-05, "loss": 0.3944, "step": 5110 }, { "epoch": 0.8323087570736474, "grad_norm": 0.3519614040851593, "learning_rate": 4.535823535980115e-05, "loss": 0.3618, "step": 5111 }, { "epoch": 0.8324716036314782, "grad_norm": 0.27284377813339233, "learning_rate": 4.5355485613024926e-05, "loss": 0.3862, "step": 5112 }, { "epoch": 0.8326344501893091, "grad_norm": 0.35595160722732544, "learning_rate": 4.5352735135425775e-05, "loss": 0.3951, "step": 5113 }, { "epoch": 0.83279729674714, "grad_norm": 0.31150537729263306, "learning_rate": 4.5349983927102455e-05, "loss": 0.3738, "step": 5114 }, { "epoch": 0.8329601433049709, "grad_norm": 0.3198454678058624, "learning_rate": 4.534723198815375e-05, "loss": 0.3489, "step": 5115 }, { "epoch": 0.8331229898628018, "grad_norm": 0.32658398151397705, "learning_rate": 4.534447931867845e-05, "loss": 0.3781, "step": 5116 }, { "epoch": 0.8332858364206327, "grad_norm": 0.29297947883605957, "learning_rate": 4.534172591877538e-05, "loss": 0.3981, "step": 5117 }, { "epoch": 0.8334486829784635, "grad_norm": 0.3562200367450714, "learning_rate": 4.533897178854341e-05, "loss": 0.3645, "step": 5118 }, { "epoch": 0.8336115295362945, "grad_norm": 0.35511156916618347, "learning_rate": 4.533621692808142e-05, "loss": 0.3643, "step": 5119 }, { "epoch": 0.8337743760941253, "grad_norm": 0.3594464659690857, "learning_rate": 4.5333461337488306e-05, "loss": 0.3733, "step": 5120 }, { "epoch": 0.8339372226519562, "grad_norm": 0.3207613527774811, "learning_rate": 4.533070501686302e-05, "loss": 0.3247, "step": 5121 }, { "epoch": 0.8341000692097871, "grad_norm": 0.3429202437400818, "learning_rate": 4.53279479663045e-05, "loss": 0.3613, "step": 5122 }, { "epoch": 0.8342629157676179, "grad_norm": 0.3548204302787781, "learning_rate": 4.532519018591175e-05, "loss": 0.3872, "step": 5123 }, { "epoch": 0.8344257623254488, "grad_norm": 0.32788196206092834, "learning_rate": 4.532243167578377e-05, "loss": 0.3645, "step": 5124 }, { "epoch": 0.8345886088832797, "grad_norm": 0.3822527527809143, "learning_rate": 4.531967243601962e-05, "loss": 0.3928, "step": 5125 }, { "epoch": 0.8347514554411106, "grad_norm": 0.3936161398887634, "learning_rate": 4.531691246671834e-05, "loss": 0.34, "step": 5126 }, { "epoch": 0.8349143019989415, "grad_norm": 0.344277948141098, "learning_rate": 4.531415176797903e-05, "loss": 0.3531, "step": 5127 }, { "epoch": 0.8350771485567724, "grad_norm": 0.3492881655693054, "learning_rate": 4.5311390339900804e-05, "loss": 0.3758, "step": 5128 }, { "epoch": 0.8352399951146032, "grad_norm": 0.32051071524620056, "learning_rate": 4.530862818258282e-05, "loss": 0.3453, "step": 5129 }, { "epoch": 0.8354028416724342, "grad_norm": 0.3174987733364105, "learning_rate": 4.5305865296124237e-05, "loss": 0.3491, "step": 5130 }, { "epoch": 0.835565688230265, "grad_norm": 0.34560954570770264, "learning_rate": 4.530310168062425e-05, "loss": 0.3623, "step": 5131 }, { "epoch": 0.835728534788096, "grad_norm": 0.34845170378685, "learning_rate": 4.530033733618208e-05, "loss": 0.375, "step": 5132 }, { "epoch": 0.8358913813459268, "grad_norm": 0.31653058528900146, "learning_rate": 4.529757226289698e-05, "loss": 0.3638, "step": 5133 }, { "epoch": 0.8360542279037577, "grad_norm": 0.3722882866859436, "learning_rate": 4.529480646086822e-05, "loss": 0.3506, "step": 5134 }, { "epoch": 0.8362170744615885, "grad_norm": 0.3884814977645874, "learning_rate": 4.5292039930195116e-05, "loss": 0.3519, "step": 5135 }, { "epoch": 0.8363799210194195, "grad_norm": 0.3650486171245575, "learning_rate": 4.528927267097697e-05, "loss": 0.3758, "step": 5136 }, { "epoch": 0.8365427675772503, "grad_norm": 0.34486067295074463, "learning_rate": 4.5286504683313146e-05, "loss": 0.3444, "step": 5137 }, { "epoch": 0.8367056141350813, "grad_norm": 0.3353707194328308, "learning_rate": 4.528373596730303e-05, "loss": 0.3097, "step": 5138 }, { "epoch": 0.8368684606929121, "grad_norm": 0.3373611271381378, "learning_rate": 4.528096652304602e-05, "loss": 0.3399, "step": 5139 }, { "epoch": 0.837031307250743, "grad_norm": 0.32969504594802856, "learning_rate": 4.5278196350641545e-05, "loss": 0.4058, "step": 5140 }, { "epoch": 0.8371941538085739, "grad_norm": 0.29658424854278564, "learning_rate": 4.5275425450189065e-05, "loss": 0.3119, "step": 5141 }, { "epoch": 0.8373570003664047, "grad_norm": 0.3325640857219696, "learning_rate": 4.527265382178807e-05, "loss": 0.3508, "step": 5142 }, { "epoch": 0.8375198469242356, "grad_norm": 0.29019036889076233, "learning_rate": 4.526988146553806e-05, "loss": 0.3046, "step": 5143 }, { "epoch": 0.8376826934820665, "grad_norm": 0.503582775592804, "learning_rate": 4.526710838153858e-05, "loss": 0.4401, "step": 5144 }, { "epoch": 0.8378455400398974, "grad_norm": 0.2611682116985321, "learning_rate": 4.526433456988918e-05, "loss": 0.3188, "step": 5145 }, { "epoch": 0.8380083865977282, "grad_norm": 0.2757588326931, "learning_rate": 4.5261560030689465e-05, "loss": 0.3629, "step": 5146 }, { "epoch": 0.8381712331555592, "grad_norm": 0.3079615533351898, "learning_rate": 4.525878476403903e-05, "loss": 0.3304, "step": 5147 }, { "epoch": 0.83833407971339, "grad_norm": 0.2972939908504486, "learning_rate": 4.525600877003753e-05, "loss": 0.3577, "step": 5148 }, { "epoch": 0.838496926271221, "grad_norm": 0.3255792260169983, "learning_rate": 4.525323204878463e-05, "loss": 0.3519, "step": 5149 }, { "epoch": 0.8386597728290518, "grad_norm": 0.27978718280792236, "learning_rate": 4.525045460038001e-05, "loss": 0.3694, "step": 5150 }, { "epoch": 0.8388226193868827, "grad_norm": 0.3238348364830017, "learning_rate": 4.52476764249234e-05, "loss": 0.3242, "step": 5151 }, { "epoch": 0.8389854659447136, "grad_norm": 0.3447387218475342, "learning_rate": 4.524489752251455e-05, "loss": 0.4037, "step": 5152 }, { "epoch": 0.8391483125025445, "grad_norm": 0.2680559754371643, "learning_rate": 4.524211789325322e-05, "loss": 0.3209, "step": 5153 }, { "epoch": 0.8393111590603753, "grad_norm": 0.3081147074699402, "learning_rate": 4.523933753723921e-05, "loss": 0.3748, "step": 5154 }, { "epoch": 0.8394740056182063, "grad_norm": 0.28882473707199097, "learning_rate": 4.523655645457234e-05, "loss": 0.3436, "step": 5155 }, { "epoch": 0.8396368521760371, "grad_norm": 0.3104209303855896, "learning_rate": 4.523377464535247e-05, "loss": 0.351, "step": 5156 }, { "epoch": 0.8397996987338681, "grad_norm": 0.3502800762653351, "learning_rate": 4.523099210967946e-05, "loss": 0.3381, "step": 5157 }, { "epoch": 0.8399625452916989, "grad_norm": 0.33671072125434875, "learning_rate": 4.522820884765323e-05, "loss": 0.3445, "step": 5158 }, { "epoch": 0.8401253918495298, "grad_norm": 0.3235054314136505, "learning_rate": 4.522542485937369e-05, "loss": 0.3529, "step": 5159 }, { "epoch": 0.8402882384073607, "grad_norm": 0.39115846157073975, "learning_rate": 4.52226401449408e-05, "loss": 0.3913, "step": 5160 }, { "epoch": 0.8404510849651915, "grad_norm": 0.3697371184825897, "learning_rate": 4.521985470445454e-05, "loss": 0.3604, "step": 5161 }, { "epoch": 0.8406139315230224, "grad_norm": 0.322469025850296, "learning_rate": 4.5217068538014915e-05, "loss": 0.3662, "step": 5162 }, { "epoch": 0.8407767780808533, "grad_norm": 0.38894879817962646, "learning_rate": 4.521428164572197e-05, "loss": 0.3894, "step": 5163 }, { "epoch": 0.8409396246386842, "grad_norm": 0.34064528346061707, "learning_rate": 4.521149402767574e-05, "loss": 0.3646, "step": 5164 }, { "epoch": 0.841102471196515, "grad_norm": 0.40577593445777893, "learning_rate": 4.520870568397631e-05, "loss": 0.3839, "step": 5165 }, { "epoch": 0.841265317754346, "grad_norm": 0.3229261338710785, "learning_rate": 4.520591661472381e-05, "loss": 0.3493, "step": 5166 }, { "epoch": 0.8414281643121768, "grad_norm": 0.37361329793930054, "learning_rate": 4.520312682001837e-05, "loss": 0.3906, "step": 5167 }, { "epoch": 0.8415910108700078, "grad_norm": 0.33174487948417664, "learning_rate": 4.520033629996014e-05, "loss": 0.3506, "step": 5168 }, { "epoch": 0.8417538574278386, "grad_norm": 0.3242723047733307, "learning_rate": 4.519754505464932e-05, "loss": 0.3867, "step": 5169 }, { "epoch": 0.8419167039856695, "grad_norm": 0.28948312997817993, "learning_rate": 4.519475308418611e-05, "loss": 0.2972, "step": 5170 }, { "epoch": 0.8420795505435004, "grad_norm": 0.3243795335292816, "learning_rate": 4.519196038867076e-05, "loss": 0.3732, "step": 5171 }, { "epoch": 0.8422423971013313, "grad_norm": 0.32937732338905334, "learning_rate": 4.518916696820354e-05, "loss": 0.3669, "step": 5172 }, { "epoch": 0.8424052436591621, "grad_norm": 0.30086416006088257, "learning_rate": 4.518637282288474e-05, "loss": 0.381, "step": 5173 }, { "epoch": 0.8425680902169931, "grad_norm": 0.348053514957428, "learning_rate": 4.518357795281467e-05, "loss": 0.4185, "step": 5174 }, { "epoch": 0.8427309367748239, "grad_norm": 0.33741942048072815, "learning_rate": 4.5180782358093685e-05, "loss": 0.4101, "step": 5175 }, { "epoch": 0.8428937833326549, "grad_norm": 0.29478907585144043, "learning_rate": 4.517798603882215e-05, "loss": 0.362, "step": 5176 }, { "epoch": 0.8430566298904857, "grad_norm": 0.2951113283634186, "learning_rate": 4.517518899510046e-05, "loss": 0.3601, "step": 5177 }, { "epoch": 0.8432194764483165, "grad_norm": 0.3188795745372772, "learning_rate": 4.517239122702903e-05, "loss": 0.3474, "step": 5178 }, { "epoch": 0.8433823230061475, "grad_norm": 0.3399430215358734, "learning_rate": 4.516959273470833e-05, "loss": 0.3511, "step": 5179 }, { "epoch": 0.8435451695639783, "grad_norm": 0.3231152892112732, "learning_rate": 4.5166793518238816e-05, "loss": 0.2997, "step": 5180 }, { "epoch": 0.8437080161218092, "grad_norm": 0.30161264538764954, "learning_rate": 4.516399357772099e-05, "loss": 0.3399, "step": 5181 }, { "epoch": 0.8438708626796401, "grad_norm": 0.30735138058662415, "learning_rate": 4.516119291325538e-05, "loss": 0.3386, "step": 5182 }, { "epoch": 0.844033709237471, "grad_norm": 0.37455669045448303, "learning_rate": 4.515839152494254e-05, "loss": 0.3567, "step": 5183 }, { "epoch": 0.8441965557953018, "grad_norm": 0.36983054876327515, "learning_rate": 4.515558941288306e-05, "loss": 0.3757, "step": 5184 }, { "epoch": 0.8443594023531328, "grad_norm": 0.3368876576423645, "learning_rate": 4.515278657717753e-05, "loss": 0.3478, "step": 5185 }, { "epoch": 0.8445222489109636, "grad_norm": 0.4019451439380646, "learning_rate": 4.514998301792657e-05, "loss": 0.3792, "step": 5186 }, { "epoch": 0.8446850954687946, "grad_norm": 0.2720210552215576, "learning_rate": 4.514717873523085e-05, "loss": 0.3686, "step": 5187 }, { "epoch": 0.8448479420266254, "grad_norm": 0.3177706003189087, "learning_rate": 4.514437372919107e-05, "loss": 0.3071, "step": 5188 }, { "epoch": 0.8450107885844563, "grad_norm": 0.3551273047924042, "learning_rate": 4.51415679999079e-05, "loss": 0.3797, "step": 5189 }, { "epoch": 0.8451736351422872, "grad_norm": 0.339987576007843, "learning_rate": 4.51387615474821e-05, "loss": 0.4149, "step": 5190 }, { "epoch": 0.8453364817001181, "grad_norm": 0.3137454390525818, "learning_rate": 4.513595437201442e-05, "loss": 0.4069, "step": 5191 }, { "epoch": 0.8454993282579489, "grad_norm": 0.45912453532218933, "learning_rate": 4.513314647360565e-05, "loss": 0.401, "step": 5192 }, { "epoch": 0.8456621748157799, "grad_norm": 0.34322598576545715, "learning_rate": 4.513033785235661e-05, "loss": 0.3813, "step": 5193 }, { "epoch": 0.8458250213736107, "grad_norm": 0.36825552582740784, "learning_rate": 4.5127528508368115e-05, "loss": 0.3663, "step": 5194 }, { "epoch": 0.8459878679314417, "grad_norm": 0.30244243144989014, "learning_rate": 4.5124718441741046e-05, "loss": 0.3452, "step": 5195 }, { "epoch": 0.8461507144892725, "grad_norm": 0.3201048672199249, "learning_rate": 4.51219076525763e-05, "loss": 0.3401, "step": 5196 }, { "epoch": 0.8463135610471033, "grad_norm": 0.32214632630348206, "learning_rate": 4.511909614097477e-05, "loss": 0.3625, "step": 5197 }, { "epoch": 0.8464764076049343, "grad_norm": 0.31543341279029846, "learning_rate": 4.511628390703741e-05, "loss": 0.3455, "step": 5198 }, { "epoch": 0.8466392541627651, "grad_norm": 0.3297494947910309, "learning_rate": 4.51134709508652e-05, "loss": 0.3456, "step": 5199 }, { "epoch": 0.846802100720596, "grad_norm": 0.44052836298942566, "learning_rate": 4.5110657272559106e-05, "loss": 0.3319, "step": 5200 }, { "epoch": 0.8469649472784269, "grad_norm": 0.3226180076599121, "learning_rate": 4.5107842872220174e-05, "loss": 0.3519, "step": 5201 }, { "epoch": 0.8471277938362578, "grad_norm": 0.29643523693084717, "learning_rate": 4.5105027749949436e-05, "loss": 0.36, "step": 5202 }, { "epoch": 0.8472906403940886, "grad_norm": 0.4058772027492523, "learning_rate": 4.510221190584797e-05, "loss": 0.3871, "step": 5203 }, { "epoch": 0.8474534869519196, "grad_norm": 0.3475492000579834, "learning_rate": 4.509939534001686e-05, "loss": 0.3592, "step": 5204 }, { "epoch": 0.8476163335097504, "grad_norm": 0.32543954253196716, "learning_rate": 4.509657805255724e-05, "loss": 0.399, "step": 5205 }, { "epoch": 0.8477791800675814, "grad_norm": 0.30778419971466064, "learning_rate": 4.5093760043570266e-05, "loss": 0.3505, "step": 5206 }, { "epoch": 0.8479420266254122, "grad_norm": 0.3355879783630371, "learning_rate": 4.509094131315709e-05, "loss": 0.3881, "step": 5207 }, { "epoch": 0.8481048731832431, "grad_norm": 0.2880488336086273, "learning_rate": 4.508812186141893e-05, "loss": 0.3756, "step": 5208 }, { "epoch": 0.848267719741074, "grad_norm": 0.3682771623134613, "learning_rate": 4.508530168845702e-05, "loss": 0.3729, "step": 5209 }, { "epoch": 0.8484305662989049, "grad_norm": 0.3995860517024994, "learning_rate": 4.5082480794372597e-05, "loss": 0.3974, "step": 5210 }, { "epoch": 0.8485934128567357, "grad_norm": 0.31488677859306335, "learning_rate": 4.5079659179266936e-05, "loss": 0.329, "step": 5211 }, { "epoch": 0.8487562594145667, "grad_norm": 0.3309246003627777, "learning_rate": 4.507683684324136e-05, "loss": 0.349, "step": 5212 }, { "epoch": 0.8489191059723975, "grad_norm": 0.3687293827533722, "learning_rate": 4.5074013786397184e-05, "loss": 0.4212, "step": 5213 }, { "epoch": 0.8490819525302284, "grad_norm": 0.37813642621040344, "learning_rate": 4.5071190008835776e-05, "loss": 0.3848, "step": 5214 }, { "epoch": 0.8492447990880593, "grad_norm": 0.522943377494812, "learning_rate": 4.5068365510658506e-05, "loss": 0.4235, "step": 5215 }, { "epoch": 0.8494076456458901, "grad_norm": 0.3784523904323578, "learning_rate": 4.506554029196679e-05, "loss": 0.3783, "step": 5216 }, { "epoch": 0.849570492203721, "grad_norm": 0.2970886826515198, "learning_rate": 4.506271435286206e-05, "loss": 0.3651, "step": 5217 }, { "epoch": 0.8497333387615519, "grad_norm": 0.34977903962135315, "learning_rate": 4.5059887693445765e-05, "loss": 0.3582, "step": 5218 }, { "epoch": 0.8498961853193828, "grad_norm": 0.309744268655777, "learning_rate": 4.5057060313819415e-05, "loss": 0.3533, "step": 5219 }, { "epoch": 0.8500590318772137, "grad_norm": 0.30398833751678467, "learning_rate": 4.50542322140845e-05, "loss": 0.3594, "step": 5220 }, { "epoch": 0.8502218784350446, "grad_norm": 0.32773375511169434, "learning_rate": 4.5051403394342574e-05, "loss": 0.3579, "step": 5221 }, { "epoch": 0.8503847249928754, "grad_norm": 0.3048364818096161, "learning_rate": 4.504857385469518e-05, "loss": 0.3487, "step": 5222 }, { "epoch": 0.8505475715507064, "grad_norm": 0.3106783330440521, "learning_rate": 4.5045743595243926e-05, "loss": 0.3274, "step": 5223 }, { "epoch": 0.8507104181085372, "grad_norm": 0.30657559633255005, "learning_rate": 4.5042912616090405e-05, "loss": 0.3726, "step": 5224 }, { "epoch": 0.8508732646663681, "grad_norm": 0.304218590259552, "learning_rate": 4.504008091733628e-05, "loss": 0.3895, "step": 5225 }, { "epoch": 0.851036111224199, "grad_norm": 0.3073332607746124, "learning_rate": 4.5037248499083204e-05, "loss": 0.3633, "step": 5226 }, { "epoch": 0.8511989577820299, "grad_norm": 0.3105320930480957, "learning_rate": 4.503441536143288e-05, "loss": 0.3501, "step": 5227 }, { "epoch": 0.8513618043398608, "grad_norm": 0.3235380947589874, "learning_rate": 4.503158150448702e-05, "loss": 0.4049, "step": 5228 }, { "epoch": 0.8515246508976917, "grad_norm": 0.3661324679851532, "learning_rate": 4.502874692834736e-05, "loss": 0.3379, "step": 5229 }, { "epoch": 0.8516874974555225, "grad_norm": 0.2724679708480835, "learning_rate": 4.5025911633115687e-05, "loss": 0.3509, "step": 5230 }, { "epoch": 0.8518503440133535, "grad_norm": 0.3161654472351074, "learning_rate": 4.502307561889378e-05, "loss": 0.3156, "step": 5231 }, { "epoch": 0.8520131905711843, "grad_norm": 0.3793548047542572, "learning_rate": 4.5020238885783475e-05, "loss": 0.4154, "step": 5232 }, { "epoch": 0.8521760371290152, "grad_norm": 0.3245110809803009, "learning_rate": 4.50174014338866e-05, "loss": 0.3753, "step": 5233 }, { "epoch": 0.8523388836868461, "grad_norm": 0.35471591353416443, "learning_rate": 4.501456326330506e-05, "loss": 0.3684, "step": 5234 }, { "epoch": 0.8525017302446769, "grad_norm": 0.30378150939941406, "learning_rate": 4.501172437414071e-05, "loss": 0.3562, "step": 5235 }, { "epoch": 0.8526645768025078, "grad_norm": 0.4304359555244446, "learning_rate": 4.500888476649551e-05, "loss": 0.3846, "step": 5236 }, { "epoch": 0.8528274233603387, "grad_norm": 0.45608848333358765, "learning_rate": 4.500604444047139e-05, "loss": 0.3621, "step": 5237 }, { "epoch": 0.8529902699181696, "grad_norm": 0.3560023903846741, "learning_rate": 4.5003203396170345e-05, "loss": 0.3771, "step": 5238 }, { "epoch": 0.8531531164760005, "grad_norm": 0.3133832812309265, "learning_rate": 4.500036163369437e-05, "loss": 0.378, "step": 5239 }, { "epoch": 0.8533159630338314, "grad_norm": 0.42123138904571533, "learning_rate": 4.499751915314548e-05, "loss": 0.3718, "step": 5240 }, { "epoch": 0.8534788095916622, "grad_norm": 0.40142831206321716, "learning_rate": 4.4994675954625745e-05, "loss": 0.3699, "step": 5241 }, { "epoch": 0.8536416561494932, "grad_norm": 0.31866538524627686, "learning_rate": 4.499183203823723e-05, "loss": 0.3424, "step": 5242 }, { "epoch": 0.853804502707324, "grad_norm": 0.32229289412498474, "learning_rate": 4.498898740408205e-05, "loss": 0.383, "step": 5243 }, { "epoch": 0.8539673492651549, "grad_norm": 0.36602577567100525, "learning_rate": 4.4986142052262334e-05, "loss": 0.3891, "step": 5244 }, { "epoch": 0.8541301958229858, "grad_norm": 0.3895883858203888, "learning_rate": 4.498329598288025e-05, "loss": 0.3769, "step": 5245 }, { "epoch": 0.8542930423808167, "grad_norm": 0.2894751727581024, "learning_rate": 4.498044919603796e-05, "loss": 0.3653, "step": 5246 }, { "epoch": 0.8544558889386475, "grad_norm": 0.336943119764328, "learning_rate": 4.497760169183767e-05, "loss": 0.3936, "step": 5247 }, { "epoch": 0.8546187354964785, "grad_norm": 0.29725682735443115, "learning_rate": 4.4974753470381634e-05, "loss": 0.338, "step": 5248 }, { "epoch": 0.8547815820543093, "grad_norm": 0.32503655552864075, "learning_rate": 4.49719045317721e-05, "loss": 0.3644, "step": 5249 }, { "epoch": 0.8549444286121403, "grad_norm": 0.3335414528846741, "learning_rate": 4.496905487611136e-05, "loss": 0.3615, "step": 5250 }, { "epoch": 0.8551072751699711, "grad_norm": 0.38566887378692627, "learning_rate": 4.496620450350172e-05, "loss": 0.4058, "step": 5251 }, { "epoch": 0.8552701217278019, "grad_norm": 0.28780320286750793, "learning_rate": 4.4963353414045516e-05, "loss": 0.3114, "step": 5252 }, { "epoch": 0.8554329682856329, "grad_norm": 0.28960806131362915, "learning_rate": 4.496050160784512e-05, "loss": 0.3414, "step": 5253 }, { "epoch": 0.8555958148434637, "grad_norm": 0.34802669286727905, "learning_rate": 4.4957649085002906e-05, "loss": 0.4233, "step": 5254 }, { "epoch": 0.8557586614012946, "grad_norm": 0.3747927248477936, "learning_rate": 4.4954795845621294e-05, "loss": 0.3405, "step": 5255 }, { "epoch": 0.8559215079591255, "grad_norm": 0.3660227656364441, "learning_rate": 4.495194188980273e-05, "loss": 0.3658, "step": 5256 }, { "epoch": 0.8560843545169564, "grad_norm": 0.36109644174575806, "learning_rate": 4.494908721764967e-05, "loss": 0.4063, "step": 5257 }, { "epoch": 0.8562472010747872, "grad_norm": 0.3206010162830353, "learning_rate": 4.4946231829264607e-05, "loss": 0.3528, "step": 5258 }, { "epoch": 0.8564100476326182, "grad_norm": 0.371005117893219, "learning_rate": 4.494337572475007e-05, "loss": 0.3473, "step": 5259 }, { "epoch": 0.856572894190449, "grad_norm": 0.4235215187072754, "learning_rate": 4.494051890420858e-05, "loss": 0.3575, "step": 5260 }, { "epoch": 0.85673574074828, "grad_norm": 0.39047253131866455, "learning_rate": 4.493766136774274e-05, "loss": 0.3666, "step": 5261 }, { "epoch": 0.8568985873061108, "grad_norm": 0.34007754921913147, "learning_rate": 4.493480311545511e-05, "loss": 0.3727, "step": 5262 }, { "epoch": 0.8570614338639417, "grad_norm": 0.36762532591819763, "learning_rate": 4.4931944147448314e-05, "loss": 0.383, "step": 5263 }, { "epoch": 0.8572242804217726, "grad_norm": 0.29040881991386414, "learning_rate": 4.492908446382501e-05, "loss": 0.3609, "step": 5264 }, { "epoch": 0.8573871269796035, "grad_norm": 0.3844158351421356, "learning_rate": 4.492622406468786e-05, "loss": 0.359, "step": 5265 }, { "epoch": 0.8575499735374343, "grad_norm": 0.4060531556606293, "learning_rate": 4.492336295013958e-05, "loss": 0.4272, "step": 5266 }, { "epoch": 0.8577128200952653, "grad_norm": 0.30074143409729004, "learning_rate": 4.492050112028287e-05, "loss": 0.3173, "step": 5267 }, { "epoch": 0.8578756666530961, "grad_norm": 0.2875753343105316, "learning_rate": 4.491763857522048e-05, "loss": 0.3503, "step": 5268 }, { "epoch": 0.8580385132109271, "grad_norm": 0.2938607931137085, "learning_rate": 4.4914775315055196e-05, "loss": 0.3443, "step": 5269 }, { "epoch": 0.8582013597687579, "grad_norm": 0.35582128167152405, "learning_rate": 4.4911911339889815e-05, "loss": 0.3868, "step": 5270 }, { "epoch": 0.8583642063265887, "grad_norm": 0.9528025984764099, "learning_rate": 4.4909046649827156e-05, "loss": 0.3602, "step": 5271 }, { "epoch": 0.8585270528844197, "grad_norm": 0.3564146161079407, "learning_rate": 4.490618124497007e-05, "loss": 0.4088, "step": 5272 }, { "epoch": 0.8586898994422505, "grad_norm": 0.2773539423942566, "learning_rate": 4.4903315125421444e-05, "loss": 0.3597, "step": 5273 }, { "epoch": 0.8588527460000814, "grad_norm": 0.3110659420490265, "learning_rate": 4.490044829128417e-05, "loss": 0.3461, "step": 5274 }, { "epoch": 0.8590155925579123, "grad_norm": 0.3040846288204193, "learning_rate": 4.4897580742661175e-05, "loss": 0.3663, "step": 5275 }, { "epoch": 0.8591784391157432, "grad_norm": 0.33376890420913696, "learning_rate": 4.489471247965542e-05, "loss": 0.36, "step": 5276 }, { "epoch": 0.859341285673574, "grad_norm": 0.33477213978767395, "learning_rate": 4.489184350236988e-05, "loss": 0.3257, "step": 5277 }, { "epoch": 0.859504132231405, "grad_norm": 0.380470335483551, "learning_rate": 4.4888973810907564e-05, "loss": 0.3962, "step": 5278 }, { "epoch": 0.8596669787892358, "grad_norm": 0.4362548887729645, "learning_rate": 4.4886103405371495e-05, "loss": 0.3683, "step": 5279 }, { "epoch": 0.8598298253470668, "grad_norm": 0.3342142105102539, "learning_rate": 4.488323228586474e-05, "loss": 0.3464, "step": 5280 }, { "epoch": 0.8599926719048976, "grad_norm": 0.2839004397392273, "learning_rate": 4.488036045249037e-05, "loss": 0.3434, "step": 5281 }, { "epoch": 0.8601555184627285, "grad_norm": 0.30603429675102234, "learning_rate": 4.487748790535149e-05, "loss": 0.3768, "step": 5282 }, { "epoch": 0.8603183650205594, "grad_norm": 0.3127606511116028, "learning_rate": 4.487461464455125e-05, "loss": 0.3407, "step": 5283 }, { "epoch": 0.8604812115783903, "grad_norm": 0.35636335611343384, "learning_rate": 4.487174067019279e-05, "loss": 0.3703, "step": 5284 }, { "epoch": 0.8606440581362211, "grad_norm": 0.2836374342441559, "learning_rate": 4.486886598237931e-05, "loss": 0.3515, "step": 5285 }, { "epoch": 0.8608069046940521, "grad_norm": 0.3387174606323242, "learning_rate": 4.4865990581214005e-05, "loss": 0.379, "step": 5286 }, { "epoch": 0.8609697512518829, "grad_norm": 0.35213911533355713, "learning_rate": 4.486311446680013e-05, "loss": 0.333, "step": 5287 }, { "epoch": 0.8611325978097139, "grad_norm": 0.3084503412246704, "learning_rate": 4.486023763924092e-05, "loss": 0.3222, "step": 5288 }, { "epoch": 0.8612954443675447, "grad_norm": 0.30789846181869507, "learning_rate": 4.485736009863968e-05, "loss": 0.3545, "step": 5289 }, { "epoch": 0.8614582909253755, "grad_norm": 0.35710111260414124, "learning_rate": 4.485448184509973e-05, "loss": 0.4147, "step": 5290 }, { "epoch": 0.8616211374832065, "grad_norm": 0.3606579005718231, "learning_rate": 4.485160287872438e-05, "loss": 0.3899, "step": 5291 }, { "epoch": 0.8617839840410373, "grad_norm": 0.2719701826572418, "learning_rate": 4.4848723199617015e-05, "loss": 0.3615, "step": 5292 }, { "epoch": 0.8619468305988682, "grad_norm": 0.3032914996147156, "learning_rate": 4.484584280788102e-05, "loss": 0.3564, "step": 5293 }, { "epoch": 0.8621096771566991, "grad_norm": 0.3506103754043579, "learning_rate": 4.484296170361981e-05, "loss": 0.391, "step": 5294 }, { "epoch": 0.86227252371453, "grad_norm": 0.29718273878097534, "learning_rate": 4.4840079886936826e-05, "loss": 0.351, "step": 5295 }, { "epoch": 0.8624353702723608, "grad_norm": 0.2981201410293579, "learning_rate": 4.483719735793552e-05, "loss": 0.3457, "step": 5296 }, { "epoch": 0.8625982168301918, "grad_norm": 0.3290780484676361, "learning_rate": 4.483431411671941e-05, "loss": 0.3622, "step": 5297 }, { "epoch": 0.8627610633880226, "grad_norm": 0.2923310101032257, "learning_rate": 4.4831430163391986e-05, "loss": 0.3291, "step": 5298 }, { "epoch": 0.8629239099458536, "grad_norm": 0.3619934618473053, "learning_rate": 4.4828545498056805e-05, "loss": 0.3493, "step": 5299 }, { "epoch": 0.8630867565036844, "grad_norm": 0.36490076780319214, "learning_rate": 4.4825660120817436e-05, "loss": 0.3668, "step": 5300 }, { "epoch": 0.8632496030615153, "grad_norm": 0.33959320187568665, "learning_rate": 4.482277403177747e-05, "loss": 0.3429, "step": 5301 }, { "epoch": 0.8634124496193462, "grad_norm": 0.3415062427520752, "learning_rate": 4.481988723104053e-05, "loss": 0.3702, "step": 5302 }, { "epoch": 0.8635752961771771, "grad_norm": 0.3702322244644165, "learning_rate": 4.481699971871025e-05, "loss": 0.3347, "step": 5303 }, { "epoch": 0.8637381427350079, "grad_norm": 0.3779192864894867, "learning_rate": 4.48141114948903e-05, "loss": 0.3338, "step": 5304 }, { "epoch": 0.8639009892928389, "grad_norm": 0.4108777344226837, "learning_rate": 4.4811222559684395e-05, "loss": 0.3993, "step": 5305 }, { "epoch": 0.8640638358506697, "grad_norm": 0.37265318632125854, "learning_rate": 4.480833291319624e-05, "loss": 0.3945, "step": 5306 }, { "epoch": 0.8642266824085005, "grad_norm": 0.399313747882843, "learning_rate": 4.4805442555529585e-05, "loss": 0.3886, "step": 5307 }, { "epoch": 0.8643895289663315, "grad_norm": 0.3840380609035492, "learning_rate": 4.4802551486788205e-05, "loss": 0.3711, "step": 5308 }, { "epoch": 0.8645523755241623, "grad_norm": 0.38719508051872253, "learning_rate": 4.479965970707589e-05, "loss": 0.3679, "step": 5309 }, { "epoch": 0.8647152220819933, "grad_norm": 0.2706124782562256, "learning_rate": 4.4796767216496486e-05, "loss": 0.3465, "step": 5310 }, { "epoch": 0.8648780686398241, "grad_norm": 0.30670762062072754, "learning_rate": 4.4793874015153806e-05, "loss": 0.3515, "step": 5311 }, { "epoch": 0.865040915197655, "grad_norm": 0.30421993136405945, "learning_rate": 4.479098010315176e-05, "loss": 0.3328, "step": 5312 }, { "epoch": 0.8652037617554859, "grad_norm": 0.3530656695365906, "learning_rate": 4.478808548059422e-05, "loss": 0.349, "step": 5313 }, { "epoch": 0.8653666083133168, "grad_norm": 0.2787899076938629, "learning_rate": 4.4785190147585136e-05, "loss": 0.3324, "step": 5314 }, { "epoch": 0.8655294548711476, "grad_norm": 0.46363651752471924, "learning_rate": 4.4782294104228444e-05, "loss": 0.3499, "step": 5315 }, { "epoch": 0.8656923014289786, "grad_norm": 0.33250346779823303, "learning_rate": 4.477939735062813e-05, "loss": 0.3737, "step": 5316 }, { "epoch": 0.8658551479868094, "grad_norm": 0.3236272633075714, "learning_rate": 4.4776499886888176e-05, "loss": 0.3748, "step": 5317 }, { "epoch": 0.8660179945446403, "grad_norm": 0.2975015938282013, "learning_rate": 4.477360171311263e-05, "loss": 0.3512, "step": 5318 }, { "epoch": 0.8661808411024712, "grad_norm": 0.35113322734832764, "learning_rate": 4.4770702829405555e-05, "loss": 0.3853, "step": 5319 }, { "epoch": 0.8663436876603021, "grad_norm": 0.3141186833381653, "learning_rate": 4.4767803235871e-05, "loss": 0.3486, "step": 5320 }, { "epoch": 0.866506534218133, "grad_norm": 0.3422425389289856, "learning_rate": 4.476490293261308e-05, "loss": 0.3668, "step": 5321 }, { "epoch": 0.8666693807759639, "grad_norm": 0.36064767837524414, "learning_rate": 4.476200191973593e-05, "loss": 0.3764, "step": 5322 }, { "epoch": 0.8668322273337947, "grad_norm": 0.3292834162712097, "learning_rate": 4.4759100197343704e-05, "loss": 0.3736, "step": 5323 }, { "epoch": 0.8669950738916257, "grad_norm": 0.28079378604888916, "learning_rate": 4.475619776554058e-05, "loss": 0.322, "step": 5324 }, { "epoch": 0.8671579204494565, "grad_norm": 0.31684696674346924, "learning_rate": 4.4753294624430765e-05, "loss": 0.3469, "step": 5325 }, { "epoch": 0.8673207670072873, "grad_norm": 0.3441360890865326, "learning_rate": 4.475039077411849e-05, "loss": 0.3655, "step": 5326 }, { "epoch": 0.8674836135651183, "grad_norm": 0.407730370759964, "learning_rate": 4.474748621470802e-05, "loss": 0.4025, "step": 5327 }, { "epoch": 0.8676464601229491, "grad_norm": 0.33397799730300903, "learning_rate": 4.474458094630362e-05, "loss": 0.344, "step": 5328 }, { "epoch": 0.86780930668078, "grad_norm": 0.30749088525772095, "learning_rate": 4.474167496900961e-05, "loss": 0.3861, "step": 5329 }, { "epoch": 0.8679721532386109, "grad_norm": 0.2980248034000397, "learning_rate": 4.473876828293032e-05, "loss": 0.3358, "step": 5330 }, { "epoch": 0.8681349997964418, "grad_norm": 0.3477414548397064, "learning_rate": 4.4735860888170116e-05, "loss": 0.3604, "step": 5331 }, { "epoch": 0.8682978463542727, "grad_norm": 0.3367716073989868, "learning_rate": 4.473295278483337e-05, "loss": 0.3579, "step": 5332 }, { "epoch": 0.8684606929121036, "grad_norm": 0.28480952978134155, "learning_rate": 4.47300439730245e-05, "loss": 0.3399, "step": 5333 }, { "epoch": 0.8686235394699344, "grad_norm": 0.35453277826309204, "learning_rate": 4.472713445284794e-05, "loss": 0.348, "step": 5334 }, { "epoch": 0.8687863860277654, "grad_norm": 0.334118127822876, "learning_rate": 4.4724224224408145e-05, "loss": 0.3741, "step": 5335 }, { "epoch": 0.8689492325855962, "grad_norm": 0.3991023898124695, "learning_rate": 4.472131328780961e-05, "loss": 0.445, "step": 5336 }, { "epoch": 0.8691120791434271, "grad_norm": 0.35995355248451233, "learning_rate": 4.471840164315684e-05, "loss": 0.3996, "step": 5337 }, { "epoch": 0.869274925701258, "grad_norm": 0.3623158037662506, "learning_rate": 4.471548929055437e-05, "loss": 0.402, "step": 5338 }, { "epoch": 0.8694377722590889, "grad_norm": 0.2982673943042755, "learning_rate": 4.471257623010677e-05, "loss": 0.3511, "step": 5339 }, { "epoch": 0.8696006188169197, "grad_norm": 0.3411839008331299, "learning_rate": 4.4709662461918616e-05, "loss": 0.368, "step": 5340 }, { "epoch": 0.8697634653747507, "grad_norm": 0.3671233654022217, "learning_rate": 4.470674798609453e-05, "loss": 0.3931, "step": 5341 }, { "epoch": 0.8699263119325815, "grad_norm": 0.3052576780319214, "learning_rate": 4.470383280273915e-05, "loss": 0.3766, "step": 5342 }, { "epoch": 0.8700891584904125, "grad_norm": 0.3333149552345276, "learning_rate": 4.470091691195714e-05, "loss": 0.3241, "step": 5343 }, { "epoch": 0.8702520050482433, "grad_norm": 0.35137781500816345, "learning_rate": 4.469800031385318e-05, "loss": 0.3608, "step": 5344 }, { "epoch": 0.8704148516060741, "grad_norm": 0.316902220249176, "learning_rate": 4.4695083008532e-05, "loss": 0.325, "step": 5345 }, { "epoch": 0.8705776981639051, "grad_norm": 0.33199942111968994, "learning_rate": 4.469216499609833e-05, "loss": 0.3634, "step": 5346 }, { "epoch": 0.8707405447217359, "grad_norm": 0.3144994378089905, "learning_rate": 4.468924627665694e-05, "loss": 0.3539, "step": 5347 }, { "epoch": 0.8709033912795668, "grad_norm": 0.3529750108718872, "learning_rate": 4.4686326850312615e-05, "loss": 0.4198, "step": 5348 }, { "epoch": 0.8710662378373977, "grad_norm": 0.2779342830181122, "learning_rate": 4.4683406717170175e-05, "loss": 0.3266, "step": 5349 }, { "epoch": 0.8712290843952286, "grad_norm": 0.28802740573883057, "learning_rate": 4.468048587733445e-05, "loss": 0.3516, "step": 5350 }, { "epoch": 0.8713919309530594, "grad_norm": 0.3268727660179138, "learning_rate": 4.4677564330910337e-05, "loss": 0.3867, "step": 5351 }, { "epoch": 0.8715547775108904, "grad_norm": 0.30926454067230225, "learning_rate": 4.46746420780027e-05, "loss": 0.3663, "step": 5352 }, { "epoch": 0.8717176240687212, "grad_norm": 0.3158229887485504, "learning_rate": 4.467171911871646e-05, "loss": 0.3381, "step": 5353 }, { "epoch": 0.8718804706265522, "grad_norm": 0.32337239384651184, "learning_rate": 4.466879545315658e-05, "loss": 0.3626, "step": 5354 }, { "epoch": 0.872043317184383, "grad_norm": 0.28433847427368164, "learning_rate": 4.4665871081428004e-05, "loss": 0.3476, "step": 5355 }, { "epoch": 0.8722061637422139, "grad_norm": 0.38367390632629395, "learning_rate": 4.466294600363573e-05, "loss": 0.4132, "step": 5356 }, { "epoch": 0.8723690103000448, "grad_norm": 0.3467840552330017, "learning_rate": 4.466002021988479e-05, "loss": 0.3477, "step": 5357 }, { "epoch": 0.8725318568578757, "grad_norm": 0.2834518253803253, "learning_rate": 4.465709373028022e-05, "loss": 0.3672, "step": 5358 }, { "epoch": 0.8726947034157065, "grad_norm": 0.2887676954269409, "learning_rate": 4.465416653492709e-05, "loss": 0.3617, "step": 5359 }, { "epoch": 0.8728575499735375, "grad_norm": 0.4621098041534424, "learning_rate": 4.4651238633930504e-05, "loss": 0.4097, "step": 5360 }, { "epoch": 0.8730203965313683, "grad_norm": 0.3716270923614502, "learning_rate": 4.4648310027395564e-05, "loss": 0.3639, "step": 5361 }, { "epoch": 0.8731832430891993, "grad_norm": 0.3583161532878876, "learning_rate": 4.464538071542743e-05, "loss": 0.3717, "step": 5362 }, { "epoch": 0.8733460896470301, "grad_norm": 0.36505159735679626, "learning_rate": 4.4642450698131265e-05, "loss": 0.3661, "step": 5363 }, { "epoch": 0.8735089362048609, "grad_norm": 0.3473808467388153, "learning_rate": 4.4639519975612275e-05, "loss": 0.3664, "step": 5364 }, { "epoch": 0.8736717827626919, "grad_norm": 0.37208816409111023, "learning_rate": 4.463658854797567e-05, "loss": 0.3877, "step": 5365 }, { "epoch": 0.8738346293205227, "grad_norm": 0.320436030626297, "learning_rate": 4.463365641532671e-05, "loss": 0.3786, "step": 5366 }, { "epoch": 0.8739974758783536, "grad_norm": 0.342185914516449, "learning_rate": 4.463072357777066e-05, "loss": 0.3332, "step": 5367 }, { "epoch": 0.8741603224361845, "grad_norm": 0.4126918613910675, "learning_rate": 4.462779003541282e-05, "loss": 0.425, "step": 5368 }, { "epoch": 0.8743231689940154, "grad_norm": 0.34229257702827454, "learning_rate": 4.4624855788358505e-05, "loss": 0.3346, "step": 5369 }, { "epoch": 0.8744860155518462, "grad_norm": 0.28235775232315063, "learning_rate": 4.462192083671307e-05, "loss": 0.3602, "step": 5370 }, { "epoch": 0.8746488621096772, "grad_norm": 0.3569241762161255, "learning_rate": 4.4618985180581895e-05, "loss": 0.3776, "step": 5371 }, { "epoch": 0.874811708667508, "grad_norm": 0.3448449373245239, "learning_rate": 4.461604882007037e-05, "loss": 0.3686, "step": 5372 }, { "epoch": 0.874974555225339, "grad_norm": 0.31524622440338135, "learning_rate": 4.4613111755283915e-05, "loss": 0.3609, "step": 5373 }, { "epoch": 0.8751374017831698, "grad_norm": 0.3227123022079468, "learning_rate": 4.4610173986327984e-05, "loss": 0.3506, "step": 5374 }, { "epoch": 0.8753002483410007, "grad_norm": 0.32526108622550964, "learning_rate": 4.460723551330806e-05, "loss": 0.3952, "step": 5375 }, { "epoch": 0.8754630948988316, "grad_norm": 0.3736424446105957, "learning_rate": 4.460429633632964e-05, "loss": 0.418, "step": 5376 }, { "epoch": 0.8756259414566625, "grad_norm": 0.3469066917896271, "learning_rate": 4.4601356455498245e-05, "loss": 0.3898, "step": 5377 }, { "epoch": 0.8757887880144933, "grad_norm": 0.3332265019416809, "learning_rate": 4.4598415870919416e-05, "loss": 0.3738, "step": 5378 }, { "epoch": 0.8759516345723243, "grad_norm": 0.2957490384578705, "learning_rate": 4.459547458269875e-05, "loss": 0.3407, "step": 5379 }, { "epoch": 0.8761144811301551, "grad_norm": 0.35398489236831665, "learning_rate": 4.4592532590941824e-05, "loss": 0.4048, "step": 5380 }, { "epoch": 0.8762773276879859, "grad_norm": 0.33998623490333557, "learning_rate": 4.458958989575429e-05, "loss": 0.3818, "step": 5381 }, { "epoch": 0.8764401742458169, "grad_norm": 0.2957214117050171, "learning_rate": 4.458664649724178e-05, "loss": 0.3153, "step": 5382 }, { "epoch": 0.8766030208036477, "grad_norm": 0.35197049379348755, "learning_rate": 4.4583702395509977e-05, "loss": 0.3754, "step": 5383 }, { "epoch": 0.8767658673614787, "grad_norm": 0.35587072372436523, "learning_rate": 4.458075759066458e-05, "loss": 0.3856, "step": 5384 }, { "epoch": 0.8769287139193095, "grad_norm": 0.31477510929107666, "learning_rate": 4.457781208281133e-05, "loss": 0.3433, "step": 5385 }, { "epoch": 0.8770915604771404, "grad_norm": 0.32944369316101074, "learning_rate": 4.4574865872055964e-05, "loss": 0.3546, "step": 5386 }, { "epoch": 0.8772544070349713, "grad_norm": 0.29301613569259644, "learning_rate": 4.457191895850427e-05, "loss": 0.3767, "step": 5387 }, { "epoch": 0.8774172535928022, "grad_norm": 0.33303534984588623, "learning_rate": 4.456897134226205e-05, "loss": 0.3798, "step": 5388 }, { "epoch": 0.877580100150633, "grad_norm": 0.3210590183734894, "learning_rate": 4.456602302343511e-05, "loss": 0.3675, "step": 5389 }, { "epoch": 0.877742946708464, "grad_norm": 0.35971879959106445, "learning_rate": 4.4563074002129335e-05, "loss": 0.3534, "step": 5390 }, { "epoch": 0.8779057932662948, "grad_norm": 0.3251604735851288, "learning_rate": 4.456012427845059e-05, "loss": 0.3538, "step": 5391 }, { "epoch": 0.8780686398241258, "grad_norm": 0.5027142763137817, "learning_rate": 4.4557173852504774e-05, "loss": 0.3832, "step": 5392 }, { "epoch": 0.8782314863819566, "grad_norm": 0.2981778383255005, "learning_rate": 4.455422272439783e-05, "loss": 0.343, "step": 5393 }, { "epoch": 0.8783943329397875, "grad_norm": 0.3488844931125641, "learning_rate": 4.4551270894235694e-05, "loss": 0.3494, "step": 5394 }, { "epoch": 0.8785571794976184, "grad_norm": 0.30649223923683167, "learning_rate": 4.454831836212437e-05, "loss": 0.3255, "step": 5395 }, { "epoch": 0.8787200260554493, "grad_norm": 0.3499997556209564, "learning_rate": 4.454536512816984e-05, "loss": 0.3909, "step": 5396 }, { "epoch": 0.8788828726132801, "grad_norm": 0.27153658866882324, "learning_rate": 4.4542411192478146e-05, "loss": 0.3782, "step": 5397 }, { "epoch": 0.8790457191711111, "grad_norm": 0.32673004269599915, "learning_rate": 4.453945655515534e-05, "loss": 0.3313, "step": 5398 }, { "epoch": 0.8792085657289419, "grad_norm": 0.3509279489517212, "learning_rate": 4.4536501216307494e-05, "loss": 0.3213, "step": 5399 }, { "epoch": 0.8793714122867727, "grad_norm": 0.2740625739097595, "learning_rate": 4.453354517604073e-05, "loss": 0.3666, "step": 5400 }, { "epoch": 0.8795342588446037, "grad_norm": 0.34873324632644653, "learning_rate": 4.453058843446116e-05, "loss": 0.3432, "step": 5401 }, { "epoch": 0.8796971054024345, "grad_norm": 0.4076334834098816, "learning_rate": 4.452763099167496e-05, "loss": 0.3839, "step": 5402 }, { "epoch": 0.8798599519602655, "grad_norm": 0.30286577343940735, "learning_rate": 4.45246728477883e-05, "loss": 0.3855, "step": 5403 }, { "epoch": 0.8800227985180963, "grad_norm": 0.30141711235046387, "learning_rate": 4.4521714002907386e-05, "loss": 0.3505, "step": 5404 }, { "epoch": 0.8801856450759272, "grad_norm": 0.3953431248664856, "learning_rate": 4.451875445713844e-05, "loss": 0.3528, "step": 5405 }, { "epoch": 0.8803484916337581, "grad_norm": 0.4252735674381256, "learning_rate": 4.451579421058775e-05, "loss": 0.4137, "step": 5406 }, { "epoch": 0.880511338191589, "grad_norm": 0.34054145216941833, "learning_rate": 4.4512833263361566e-05, "loss": 0.4283, "step": 5407 }, { "epoch": 0.8806741847494198, "grad_norm": 0.42126429080963135, "learning_rate": 4.45098716155662e-05, "loss": 0.3567, "step": 5408 }, { "epoch": 0.8808370313072508, "grad_norm": 0.38218510150909424, "learning_rate": 4.4506909267307996e-05, "loss": 0.3693, "step": 5409 }, { "epoch": 0.8809998778650816, "grad_norm": 0.3857840299606323, "learning_rate": 4.450394621869331e-05, "loss": 0.3813, "step": 5410 }, { "epoch": 0.8811627244229125, "grad_norm": 0.3137700855731964, "learning_rate": 4.450098246982851e-05, "loss": 0.3447, "step": 5411 }, { "epoch": 0.8813255709807434, "grad_norm": 0.3705417513847351, "learning_rate": 4.449801802082002e-05, "loss": 0.3313, "step": 5412 }, { "epoch": 0.8814884175385743, "grad_norm": 0.3509885370731354, "learning_rate": 4.4495052871774253e-05, "loss": 0.3828, "step": 5413 }, { "epoch": 0.8816512640964052, "grad_norm": 0.4955592453479767, "learning_rate": 4.4492087022797693e-05, "loss": 0.3906, "step": 5414 }, { "epoch": 0.8818141106542361, "grad_norm": 0.4065827429294586, "learning_rate": 4.448912047399681e-05, "loss": 0.3693, "step": 5415 }, { "epoch": 0.8819769572120669, "grad_norm": 0.3604043126106262, "learning_rate": 4.44861532254781e-05, "loss": 0.3536, "step": 5416 }, { "epoch": 0.8821398037698979, "grad_norm": 0.3745419383049011, "learning_rate": 4.448318527734811e-05, "loss": 0.3601, "step": 5417 }, { "epoch": 0.8823026503277287, "grad_norm": 0.28224804997444153, "learning_rate": 4.44802166297134e-05, "loss": 0.3433, "step": 5418 }, { "epoch": 0.8824654968855595, "grad_norm": 0.4091125726699829, "learning_rate": 4.4477247282680546e-05, "loss": 0.4532, "step": 5419 }, { "epoch": 0.8826283434433905, "grad_norm": 0.34818387031555176, "learning_rate": 4.447427723635616e-05, "loss": 0.3687, "step": 5420 }, { "epoch": 0.8827911900012213, "grad_norm": 0.44193997979164124, "learning_rate": 4.447130649084688e-05, "loss": 0.339, "step": 5421 }, { "epoch": 0.8829540365590522, "grad_norm": 0.35992127656936646, "learning_rate": 4.446833504625935e-05, "loss": 0.4054, "step": 5422 }, { "epoch": 0.8831168831168831, "grad_norm": 0.74310702085495, "learning_rate": 4.446536290270026e-05, "loss": 0.407, "step": 5423 }, { "epoch": 0.883279729674714, "grad_norm": 0.3214717209339142, "learning_rate": 4.446239006027633e-05, "loss": 0.3673, "step": 5424 }, { "epoch": 0.8834425762325449, "grad_norm": 0.325468510389328, "learning_rate": 4.445941651909429e-05, "loss": 0.3669, "step": 5425 }, { "epoch": 0.8836054227903758, "grad_norm": 0.3469528257846832, "learning_rate": 4.445644227926089e-05, "loss": 0.3945, "step": 5426 }, { "epoch": 0.8837682693482066, "grad_norm": 0.32028305530548096, "learning_rate": 4.4453467340882914e-05, "loss": 0.3605, "step": 5427 }, { "epoch": 0.8839311159060376, "grad_norm": 0.3279324769973755, "learning_rate": 4.445049170406719e-05, "loss": 0.3275, "step": 5428 }, { "epoch": 0.8840939624638684, "grad_norm": 0.36666956543922424, "learning_rate": 4.444751536892052e-05, "loss": 0.3418, "step": 5429 }, { "epoch": 0.8842568090216993, "grad_norm": 0.2913421094417572, "learning_rate": 4.44445383355498e-05, "loss": 0.3446, "step": 5430 }, { "epoch": 0.8844196555795302, "grad_norm": 0.2898324728012085, "learning_rate": 4.444156060406188e-05, "loss": 0.3702, "step": 5431 }, { "epoch": 0.8845825021373611, "grad_norm": 0.3326168954372406, "learning_rate": 4.44385821745637e-05, "loss": 0.3757, "step": 5432 }, { "epoch": 0.884745348695192, "grad_norm": 0.34921136498451233, "learning_rate": 4.443560304716217e-05, "loss": 0.3857, "step": 5433 }, { "epoch": 0.8849081952530229, "grad_norm": 0.28244519233703613, "learning_rate": 4.443262322196427e-05, "loss": 0.3347, "step": 5434 }, { "epoch": 0.8850710418108537, "grad_norm": 0.29351818561553955, "learning_rate": 4.4429642699076966e-05, "loss": 0.3455, "step": 5435 }, { "epoch": 0.8852338883686846, "grad_norm": 0.36077773571014404, "learning_rate": 4.442666147860728e-05, "loss": 0.3738, "step": 5436 }, { "epoch": 0.8853967349265155, "grad_norm": 0.3964611887931824, "learning_rate": 4.442367956066224e-05, "loss": 0.3925, "step": 5437 }, { "epoch": 0.8855595814843463, "grad_norm": 0.34371232986450195, "learning_rate": 4.442069694534892e-05, "loss": 0.3499, "step": 5438 }, { "epoch": 0.8857224280421773, "grad_norm": 0.3547186553478241, "learning_rate": 4.441771363277438e-05, "loss": 0.3695, "step": 5439 }, { "epoch": 0.8858852746000081, "grad_norm": 0.3492700159549713, "learning_rate": 4.441472962304575e-05, "loss": 0.3777, "step": 5440 }, { "epoch": 0.886048121157839, "grad_norm": 0.28371405601501465, "learning_rate": 4.441174491627016e-05, "loss": 0.341, "step": 5441 }, { "epoch": 0.8862109677156699, "grad_norm": 0.3390637934207916, "learning_rate": 4.440875951255476e-05, "loss": 0.3345, "step": 5442 }, { "epoch": 0.8863738142735008, "grad_norm": 0.3309284746646881, "learning_rate": 4.440577341200675e-05, "loss": 0.3904, "step": 5443 }, { "epoch": 0.8865366608313316, "grad_norm": 0.28371280431747437, "learning_rate": 4.440278661473333e-05, "loss": 0.2994, "step": 5444 }, { "epoch": 0.8866995073891626, "grad_norm": 0.3796999156475067, "learning_rate": 4.4399799120841744e-05, "loss": 0.3332, "step": 5445 }, { "epoch": 0.8868623539469934, "grad_norm": 0.3875291645526886, "learning_rate": 4.439681093043924e-05, "loss": 0.3866, "step": 5446 }, { "epoch": 0.8870252005048244, "grad_norm": 0.3486345410346985, "learning_rate": 4.4393822043633115e-05, "loss": 0.3542, "step": 5447 }, { "epoch": 0.8871880470626552, "grad_norm": 0.31443631649017334, "learning_rate": 4.439083246053067e-05, "loss": 0.3418, "step": 5448 }, { "epoch": 0.8873508936204861, "grad_norm": 0.41127145290374756, "learning_rate": 4.4387842181239244e-05, "loss": 0.32, "step": 5449 }, { "epoch": 0.887513740178317, "grad_norm": 0.37515777349472046, "learning_rate": 4.4384851205866196e-05, "loss": 0.3869, "step": 5450 }, { "epoch": 0.8876765867361479, "grad_norm": 0.394599050283432, "learning_rate": 4.438185953451891e-05, "loss": 0.4059, "step": 5451 }, { "epoch": 0.8878394332939787, "grad_norm": 0.40608036518096924, "learning_rate": 4.4378867167304795e-05, "loss": 0.3805, "step": 5452 }, { "epoch": 0.8880022798518097, "grad_norm": 0.4670502543449402, "learning_rate": 4.4375874104331296e-05, "loss": 0.3891, "step": 5453 }, { "epoch": 0.8881651264096405, "grad_norm": 0.34367063641548157, "learning_rate": 4.4372880345705854e-05, "loss": 0.41, "step": 5454 }, { "epoch": 0.8883279729674713, "grad_norm": 0.3865260183811188, "learning_rate": 4.4369885891535966e-05, "loss": 0.3826, "step": 5455 }, { "epoch": 0.8884908195253023, "grad_norm": 0.3624531328678131, "learning_rate": 4.436689074192915e-05, "loss": 0.3752, "step": 5456 }, { "epoch": 0.8886536660831331, "grad_norm": 0.3281830847263336, "learning_rate": 4.436389489699293e-05, "loss": 0.3655, "step": 5457 }, { "epoch": 0.8888165126409641, "grad_norm": 0.3520941436290741, "learning_rate": 4.436089835683486e-05, "loss": 0.3432, "step": 5458 }, { "epoch": 0.8889793591987949, "grad_norm": 0.5965993404388428, "learning_rate": 4.435790112156254e-05, "loss": 0.4441, "step": 5459 }, { "epoch": 0.8891422057566258, "grad_norm": 0.328678697347641, "learning_rate": 4.435490319128357e-05, "loss": 0.3568, "step": 5460 }, { "epoch": 0.8893050523144567, "grad_norm": 0.3600690960884094, "learning_rate": 4.435190456610558e-05, "loss": 0.3843, "step": 5461 }, { "epoch": 0.8894678988722876, "grad_norm": 0.42507851123809814, "learning_rate": 4.434890524613624e-05, "loss": 0.416, "step": 5462 }, { "epoch": 0.8896307454301184, "grad_norm": 0.31359896063804626, "learning_rate": 4.4345905231483234e-05, "loss": 0.4101, "step": 5463 }, { "epoch": 0.8897935919879494, "grad_norm": 0.33366337418556213, "learning_rate": 4.434290452225427e-05, "loss": 0.3376, "step": 5464 }, { "epoch": 0.8899564385457802, "grad_norm": 0.39459481835365295, "learning_rate": 4.4339903118557074e-05, "loss": 0.3565, "step": 5465 }, { "epoch": 0.8901192851036112, "grad_norm": 0.31089407205581665, "learning_rate": 4.433690102049942e-05, "loss": 0.3516, "step": 5466 }, { "epoch": 0.890282131661442, "grad_norm": 0.3588239252567291, "learning_rate": 4.433389822818908e-05, "loss": 0.3934, "step": 5467 }, { "epoch": 0.8904449782192729, "grad_norm": 0.3183304965496063, "learning_rate": 4.4330894741733866e-05, "loss": 0.3766, "step": 5468 }, { "epoch": 0.8906078247771038, "grad_norm": 0.31233343482017517, "learning_rate": 4.432789056124162e-05, "loss": 0.3546, "step": 5469 }, { "epoch": 0.8907706713349347, "grad_norm": 0.3257986009120941, "learning_rate": 4.432488568682018e-05, "loss": 0.3475, "step": 5470 }, { "epoch": 0.8909335178927655, "grad_norm": 0.34897902607917786, "learning_rate": 4.4321880118577455e-05, "loss": 0.3592, "step": 5471 }, { "epoch": 0.8910963644505965, "grad_norm": 0.3431977927684784, "learning_rate": 4.4318873856621346e-05, "loss": 0.4053, "step": 5472 }, { "epoch": 0.8912592110084273, "grad_norm": 0.4156709313392639, "learning_rate": 4.4315866901059786e-05, "loss": 0.3757, "step": 5473 }, { "epoch": 0.8914220575662581, "grad_norm": 0.44427767395973206, "learning_rate": 4.4312859252000726e-05, "loss": 0.4062, "step": 5474 }, { "epoch": 0.8915849041240891, "grad_norm": 0.32373538613319397, "learning_rate": 4.430985090955216e-05, "loss": 0.378, "step": 5475 }, { "epoch": 0.8917477506819199, "grad_norm": 0.3087783753871918, "learning_rate": 4.430684187382209e-05, "loss": 0.3686, "step": 5476 }, { "epoch": 0.8919105972397509, "grad_norm": 0.3042818605899811, "learning_rate": 4.430383214491856e-05, "loss": 0.3263, "step": 5477 }, { "epoch": 0.8920734437975817, "grad_norm": 0.42188820242881775, "learning_rate": 4.430082172294961e-05, "loss": 0.3672, "step": 5478 }, { "epoch": 0.8922362903554126, "grad_norm": 0.329961359500885, "learning_rate": 4.429781060802334e-05, "loss": 0.4033, "step": 5479 }, { "epoch": 0.8923991369132435, "grad_norm": 0.3730613887310028, "learning_rate": 4.4294798800247844e-05, "loss": 0.3961, "step": 5480 }, { "epoch": 0.8925619834710744, "grad_norm": 0.37678107619285583, "learning_rate": 4.4291786299731265e-05, "loss": 0.3293, "step": 5481 }, { "epoch": 0.8927248300289052, "grad_norm": 0.4437981843948364, "learning_rate": 4.428877310658176e-05, "loss": 0.39, "step": 5482 }, { "epoch": 0.8928876765867362, "grad_norm": 0.3375801742076874, "learning_rate": 4.428575922090751e-05, "loss": 0.3265, "step": 5483 }, { "epoch": 0.893050523144567, "grad_norm": 0.33806583285331726, "learning_rate": 4.428274464281672e-05, "loss": 0.3785, "step": 5484 }, { "epoch": 0.893213369702398, "grad_norm": 0.35058867931365967, "learning_rate": 4.4279729372417634e-05, "loss": 0.376, "step": 5485 }, { "epoch": 0.8933762162602288, "grad_norm": 0.4057682752609253, "learning_rate": 4.4276713409818485e-05, "loss": 0.3356, "step": 5486 }, { "epoch": 0.8935390628180597, "grad_norm": 0.2867082953453064, "learning_rate": 4.427369675512758e-05, "loss": 0.3315, "step": 5487 }, { "epoch": 0.8937019093758906, "grad_norm": 0.3617401421070099, "learning_rate": 4.427067940845321e-05, "loss": 0.3813, "step": 5488 }, { "epoch": 0.8938647559337215, "grad_norm": 0.34354373812675476, "learning_rate": 4.426766136990372e-05, "loss": 0.3635, "step": 5489 }, { "epoch": 0.8940276024915523, "grad_norm": 0.3457423448562622, "learning_rate": 4.4264642639587454e-05, "loss": 0.3861, "step": 5490 }, { "epoch": 0.8941904490493833, "grad_norm": 0.3166681230068207, "learning_rate": 4.42616232176128e-05, "loss": 0.3609, "step": 5491 }, { "epoch": 0.8943532956072141, "grad_norm": 0.2911684513092041, "learning_rate": 4.425860310408817e-05, "loss": 0.3257, "step": 5492 }, { "epoch": 0.8945161421650449, "grad_norm": 0.26897335052490234, "learning_rate": 4.4255582299121975e-05, "loss": 0.3347, "step": 5493 }, { "epoch": 0.8946789887228759, "grad_norm": 0.28357675671577454, "learning_rate": 4.4252560802822694e-05, "loss": 0.39, "step": 5494 }, { "epoch": 0.8948418352807067, "grad_norm": 0.33760780096054077, "learning_rate": 4.424953861529879e-05, "loss": 0.3529, "step": 5495 }, { "epoch": 0.8950046818385377, "grad_norm": 0.316917359828949, "learning_rate": 4.4246515736658786e-05, "loss": 0.3664, "step": 5496 }, { "epoch": 0.8951675283963685, "grad_norm": 0.32467612624168396, "learning_rate": 4.42434921670112e-05, "loss": 0.3695, "step": 5497 }, { "epoch": 0.8953303749541994, "grad_norm": 0.3368988335132599, "learning_rate": 4.424046790646459e-05, "loss": 0.3903, "step": 5498 }, { "epoch": 0.8954932215120303, "grad_norm": 0.3063381612300873, "learning_rate": 4.423744295512753e-05, "loss": 0.3722, "step": 5499 }, { "epoch": 0.8956560680698612, "grad_norm": 0.36825764179229736, "learning_rate": 4.4234417313108634e-05, "loss": 0.3305, "step": 5500 }, { "epoch": 0.895818914627692, "grad_norm": 0.36179015040397644, "learning_rate": 4.4231390980516535e-05, "loss": 0.3902, "step": 5501 }, { "epoch": 0.895981761185523, "grad_norm": 0.2997303307056427, "learning_rate": 4.422836395745987e-05, "loss": 0.3599, "step": 5502 }, { "epoch": 0.8961446077433538, "grad_norm": 0.30259814858436584, "learning_rate": 4.4225336244047346e-05, "loss": 0.3483, "step": 5503 }, { "epoch": 0.8963074543011847, "grad_norm": 0.33457133173942566, "learning_rate": 4.422230784038763e-05, "loss": 0.407, "step": 5504 }, { "epoch": 0.8964703008590156, "grad_norm": 0.32483890652656555, "learning_rate": 4.421927874658949e-05, "loss": 0.402, "step": 5505 }, { "epoch": 0.8966331474168465, "grad_norm": 0.3176102936267853, "learning_rate": 4.4216248962761646e-05, "loss": 0.3532, "step": 5506 }, { "epoch": 0.8967959939746774, "grad_norm": 0.3143283426761627, "learning_rate": 4.42132184890129e-05, "loss": 0.3838, "step": 5507 }, { "epoch": 0.8969588405325083, "grad_norm": 0.362169086933136, "learning_rate": 4.421018732545204e-05, "loss": 0.3761, "step": 5508 }, { "epoch": 0.8971216870903391, "grad_norm": 0.37604638934135437, "learning_rate": 4.42071554721879e-05, "loss": 0.4052, "step": 5509 }, { "epoch": 0.89728453364817, "grad_norm": 0.29849568009376526, "learning_rate": 4.420412292932934e-05, "loss": 0.3609, "step": 5510 }, { "epoch": 0.8974473802060009, "grad_norm": 0.2900264859199524, "learning_rate": 4.4201089696985224e-05, "loss": 0.3352, "step": 5511 }, { "epoch": 0.8976102267638317, "grad_norm": 0.29264703392982483, "learning_rate": 4.419805577526446e-05, "loss": 0.3374, "step": 5512 }, { "epoch": 0.8977730733216627, "grad_norm": 0.2995876669883728, "learning_rate": 4.419502116427597e-05, "loss": 0.3554, "step": 5513 }, { "epoch": 0.8979359198794935, "grad_norm": 0.3193024694919586, "learning_rate": 4.4191985864128714e-05, "loss": 0.3673, "step": 5514 }, { "epoch": 0.8980987664373244, "grad_norm": 0.29870519042015076, "learning_rate": 4.4188949874931665e-05, "loss": 0.3656, "step": 5515 }, { "epoch": 0.8982616129951553, "grad_norm": 0.3222547471523285, "learning_rate": 4.418591319679383e-05, "loss": 0.3885, "step": 5516 }, { "epoch": 0.8984244595529862, "grad_norm": 0.2962358593940735, "learning_rate": 4.4182875829824224e-05, "loss": 0.3623, "step": 5517 }, { "epoch": 0.898587306110817, "grad_norm": 0.29746291041374207, "learning_rate": 4.417983777413189e-05, "loss": 0.378, "step": 5518 }, { "epoch": 0.898750152668648, "grad_norm": 0.2996596693992615, "learning_rate": 4.417679902982593e-05, "loss": 0.385, "step": 5519 }, { "epoch": 0.8989129992264788, "grad_norm": 0.3460424244403839, "learning_rate": 4.4173759597015427e-05, "loss": 0.371, "step": 5520 }, { "epoch": 0.8990758457843098, "grad_norm": 0.31156760454177856, "learning_rate": 4.417071947580951e-05, "loss": 0.385, "step": 5521 }, { "epoch": 0.8992386923421406, "grad_norm": 0.31753402948379517, "learning_rate": 4.416767866631732e-05, "loss": 0.3829, "step": 5522 }, { "epoch": 0.8994015388999715, "grad_norm": 0.27891600131988525, "learning_rate": 4.416463716864805e-05, "loss": 0.3439, "step": 5523 }, { "epoch": 0.8995643854578024, "grad_norm": 0.32054030895233154, "learning_rate": 4.416159498291088e-05, "loss": 0.378, "step": 5524 }, { "epoch": 0.8997272320156333, "grad_norm": 0.3266076445579529, "learning_rate": 4.415855210921504e-05, "loss": 0.3653, "step": 5525 }, { "epoch": 0.8998900785734641, "grad_norm": 0.2900645136833191, "learning_rate": 4.4155508547669775e-05, "loss": 0.3496, "step": 5526 }, { "epoch": 0.9000529251312951, "grad_norm": 0.3065005838871002, "learning_rate": 4.415246429838437e-05, "loss": 0.3679, "step": 5527 }, { "epoch": 0.9002157716891259, "grad_norm": 0.30003049969673157, "learning_rate": 4.414941936146811e-05, "loss": 0.3967, "step": 5528 }, { "epoch": 0.9003786182469568, "grad_norm": 0.27846649289131165, "learning_rate": 4.4146373737030324e-05, "loss": 0.3836, "step": 5529 }, { "epoch": 0.9005414648047877, "grad_norm": 0.30255141854286194, "learning_rate": 4.4143327425180346e-05, "loss": 0.3794, "step": 5530 }, { "epoch": 0.9007043113626185, "grad_norm": 0.3191094398498535, "learning_rate": 4.414028042602757e-05, "loss": 0.3636, "step": 5531 }, { "epoch": 0.9008671579204495, "grad_norm": 0.3295920789241791, "learning_rate": 4.4137232739681376e-05, "loss": 0.3741, "step": 5532 }, { "epoch": 0.9010300044782803, "grad_norm": 0.3143955171108246, "learning_rate": 4.4134184366251196e-05, "loss": 0.3763, "step": 5533 }, { "epoch": 0.9011928510361112, "grad_norm": 0.2667892575263977, "learning_rate": 4.4131135305846464e-05, "loss": 0.3173, "step": 5534 }, { "epoch": 0.9013556975939421, "grad_norm": 0.29044756293296814, "learning_rate": 4.412808555857666e-05, "loss": 0.3464, "step": 5535 }, { "epoch": 0.901518544151773, "grad_norm": 0.3360150456428528, "learning_rate": 4.412503512455127e-05, "loss": 0.3691, "step": 5536 }, { "epoch": 0.9016813907096038, "grad_norm": 0.2646896243095398, "learning_rate": 4.412198400387982e-05, "loss": 0.3274, "step": 5537 }, { "epoch": 0.9018442372674348, "grad_norm": 0.2977801263332367, "learning_rate": 4.411893219667186e-05, "loss": 0.3652, "step": 5538 }, { "epoch": 0.9020070838252656, "grad_norm": 0.30062538385391235, "learning_rate": 4.4115879703036945e-05, "loss": 0.3889, "step": 5539 }, { "epoch": 0.9021699303830966, "grad_norm": 0.31521445512771606, "learning_rate": 4.411282652308468e-05, "loss": 0.3597, "step": 5540 }, { "epoch": 0.9023327769409274, "grad_norm": 0.2761610448360443, "learning_rate": 4.4109772656924675e-05, "loss": 0.3523, "step": 5541 }, { "epoch": 0.9024956234987583, "grad_norm": 0.3149428963661194, "learning_rate": 4.4106718104666586e-05, "loss": 0.3789, "step": 5542 }, { "epoch": 0.9026584700565892, "grad_norm": 0.31981900334358215, "learning_rate": 4.410366286642006e-05, "loss": 0.3725, "step": 5543 }, { "epoch": 0.9028213166144201, "grad_norm": 0.277780681848526, "learning_rate": 4.410060694229481e-05, "loss": 0.3803, "step": 5544 }, { "epoch": 0.9029841631722509, "grad_norm": 0.274960458278656, "learning_rate": 4.4097550332400536e-05, "loss": 0.3111, "step": 5545 }, { "epoch": 0.9031470097300819, "grad_norm": 0.3786827027797699, "learning_rate": 4.4094493036846994e-05, "loss": 0.3767, "step": 5546 }, { "epoch": 0.9033098562879127, "grad_norm": 0.35413238406181335, "learning_rate": 4.409143505574395e-05, "loss": 0.3572, "step": 5547 }, { "epoch": 0.9034727028457435, "grad_norm": 0.3211943805217743, "learning_rate": 4.408837638920117e-05, "loss": 0.3218, "step": 5548 }, { "epoch": 0.9036355494035745, "grad_norm": 0.34663575887680054, "learning_rate": 4.40853170373285e-05, "loss": 0.3727, "step": 5549 }, { "epoch": 0.9037983959614053, "grad_norm": 0.46267881989479065, "learning_rate": 4.408225700023576e-05, "loss": 0.3413, "step": 5550 }, { "epoch": 0.9039612425192363, "grad_norm": 0.2868475317955017, "learning_rate": 4.4079196278032824e-05, "loss": 0.3386, "step": 5551 }, { "epoch": 0.9041240890770671, "grad_norm": 0.2861233353614807, "learning_rate": 4.407613487082958e-05, "loss": 0.3389, "step": 5552 }, { "epoch": 0.904286935634898, "grad_norm": 0.416370689868927, "learning_rate": 4.407307277873595e-05, "loss": 0.3778, "step": 5553 }, { "epoch": 0.9044497821927289, "grad_norm": 0.34614554047584534, "learning_rate": 4.407001000186185e-05, "loss": 0.3939, "step": 5554 }, { "epoch": 0.9046126287505598, "grad_norm": 0.3199668824672699, "learning_rate": 4.406694654031726e-05, "loss": 0.3781, "step": 5555 }, { "epoch": 0.9047754753083906, "grad_norm": 0.32528626918792725, "learning_rate": 4.406388239421216e-05, "loss": 0.3671, "step": 5556 }, { "epoch": 0.9049383218662216, "grad_norm": 0.44334548711776733, "learning_rate": 4.406081756365657e-05, "loss": 0.3467, "step": 5557 }, { "epoch": 0.9051011684240524, "grad_norm": 0.3801724314689636, "learning_rate": 4.4057752048760514e-05, "loss": 0.3416, "step": 5558 }, { "epoch": 0.9052640149818834, "grad_norm": 0.30337437987327576, "learning_rate": 4.4054685849634066e-05, "loss": 0.3149, "step": 5559 }, { "epoch": 0.9054268615397142, "grad_norm": 0.30000612139701843, "learning_rate": 4.405161896638731e-05, "loss": 0.3538, "step": 5560 }, { "epoch": 0.9055897080975451, "grad_norm": 0.32403144240379333, "learning_rate": 4.4048551399130344e-05, "loss": 0.3633, "step": 5561 }, { "epoch": 0.905752554655376, "grad_norm": 0.3078153431415558, "learning_rate": 4.404548314797332e-05, "loss": 0.3329, "step": 5562 }, { "epoch": 0.9059154012132069, "grad_norm": 0.40689828991889954, "learning_rate": 4.404241421302638e-05, "loss": 0.4067, "step": 5563 }, { "epoch": 0.9060782477710377, "grad_norm": 0.36316391825675964, "learning_rate": 4.403934459439974e-05, "loss": 0.3883, "step": 5564 }, { "epoch": 0.9062410943288686, "grad_norm": 0.3107110559940338, "learning_rate": 4.4036274292203564e-05, "loss": 0.3682, "step": 5565 }, { "epoch": 0.9064039408866995, "grad_norm": 0.2991505563259125, "learning_rate": 4.403320330654811e-05, "loss": 0.343, "step": 5566 }, { "epoch": 0.9065667874445303, "grad_norm": 0.3341379463672638, "learning_rate": 4.403013163754363e-05, "loss": 0.3532, "step": 5567 }, { "epoch": 0.9067296340023613, "grad_norm": 0.4040378928184509, "learning_rate": 4.4027059285300414e-05, "loss": 0.3773, "step": 5568 }, { "epoch": 0.9068924805601921, "grad_norm": 0.3071313202381134, "learning_rate": 4.402398624992876e-05, "loss": 0.3218, "step": 5569 }, { "epoch": 0.9070553271180231, "grad_norm": 0.28697678446769714, "learning_rate": 4.4020912531539e-05, "loss": 0.3373, "step": 5570 }, { "epoch": 0.9072181736758539, "grad_norm": 0.3255434036254883, "learning_rate": 4.401783813024151e-05, "loss": 0.3439, "step": 5571 }, { "epoch": 0.9073810202336848, "grad_norm": 0.3127719461917877, "learning_rate": 4.4014763046146636e-05, "loss": 0.3533, "step": 5572 }, { "epoch": 0.9075438667915157, "grad_norm": 0.35093966126441956, "learning_rate": 4.40116872793648e-05, "loss": 0.3808, "step": 5573 }, { "epoch": 0.9077067133493466, "grad_norm": 0.29978814721107483, "learning_rate": 4.4008610830006424e-05, "loss": 0.3542, "step": 5574 }, { "epoch": 0.9078695599071774, "grad_norm": 0.3184840977191925, "learning_rate": 4.400553369818198e-05, "loss": 0.3497, "step": 5575 }, { "epoch": 0.9080324064650084, "grad_norm": 0.3039191961288452, "learning_rate": 4.4002455884001925e-05, "loss": 0.3536, "step": 5576 }, { "epoch": 0.9081952530228392, "grad_norm": 0.3286212384700775, "learning_rate": 4.399937738757678e-05, "loss": 0.3667, "step": 5577 }, { "epoch": 0.9083580995806702, "grad_norm": 0.30367887020111084, "learning_rate": 4.3996298209017054e-05, "loss": 0.3266, "step": 5578 }, { "epoch": 0.908520946138501, "grad_norm": 0.3157542049884796, "learning_rate": 4.399321834843332e-05, "loss": 0.372, "step": 5579 }, { "epoch": 0.9086837926963319, "grad_norm": 0.29975271224975586, "learning_rate": 4.399013780593613e-05, "loss": 0.4082, "step": 5580 }, { "epoch": 0.9088466392541628, "grad_norm": 0.32248982787132263, "learning_rate": 4.39870565816361e-05, "loss": 0.3429, "step": 5581 }, { "epoch": 0.9090094858119937, "grad_norm": 0.3215705454349518, "learning_rate": 4.398397467564386e-05, "loss": 0.3509, "step": 5582 }, { "epoch": 0.9091723323698245, "grad_norm": 0.2965138554573059, "learning_rate": 4.3980892088070036e-05, "loss": 0.3394, "step": 5583 }, { "epoch": 0.9093351789276554, "grad_norm": 0.29835885763168335, "learning_rate": 4.397780881902533e-05, "loss": 0.3706, "step": 5584 }, { "epoch": 0.9094980254854863, "grad_norm": 0.3117760419845581, "learning_rate": 4.3974724868620426e-05, "loss": 0.3591, "step": 5585 }, { "epoch": 0.9096608720433171, "grad_norm": 0.34967339038848877, "learning_rate": 4.397164023696605e-05, "loss": 0.3444, "step": 5586 }, { "epoch": 0.9098237186011481, "grad_norm": 0.38977718353271484, "learning_rate": 4.396855492417294e-05, "loss": 0.3747, "step": 5587 }, { "epoch": 0.9099865651589789, "grad_norm": 0.30442294478416443, "learning_rate": 4.396546893035188e-05, "loss": 0.3781, "step": 5588 }, { "epoch": 0.9101494117168099, "grad_norm": 0.3234368562698364, "learning_rate": 4.3962382255613674e-05, "loss": 0.3349, "step": 5589 }, { "epoch": 0.9103122582746407, "grad_norm": 0.3120911419391632, "learning_rate": 4.3959294900069124e-05, "loss": 0.3866, "step": 5590 }, { "epoch": 0.9104751048324716, "grad_norm": 0.3014228343963623, "learning_rate": 4.395620686382908e-05, "loss": 0.3706, "step": 5591 }, { "epoch": 0.9106379513903025, "grad_norm": 0.3221309185028076, "learning_rate": 4.395311814700443e-05, "loss": 0.3908, "step": 5592 }, { "epoch": 0.9108007979481334, "grad_norm": 0.2917787730693817, "learning_rate": 4.3950028749706044e-05, "loss": 0.3259, "step": 5593 }, { "epoch": 0.9109636445059642, "grad_norm": 0.4951983094215393, "learning_rate": 4.3946938672044846e-05, "loss": 0.4249, "step": 5594 }, { "epoch": 0.9111264910637952, "grad_norm": 0.27126410603523254, "learning_rate": 4.3943847914131787e-05, "loss": 0.3428, "step": 5595 }, { "epoch": 0.911289337621626, "grad_norm": 0.2875135540962219, "learning_rate": 4.3940756476077833e-05, "loss": 0.3313, "step": 5596 }, { "epoch": 0.911452184179457, "grad_norm": 0.3442513644695282, "learning_rate": 4.393766435799397e-05, "loss": 0.3619, "step": 5597 }, { "epoch": 0.9116150307372878, "grad_norm": 0.27933526039123535, "learning_rate": 4.393457155999122e-05, "loss": 0.3243, "step": 5598 }, { "epoch": 0.9117778772951187, "grad_norm": 0.3841957151889801, "learning_rate": 4.3931478082180625e-05, "loss": 0.4691, "step": 5599 }, { "epoch": 0.9119407238529496, "grad_norm": 0.31964585185050964, "learning_rate": 4.392838392467324e-05, "loss": 0.3846, "step": 5600 }, { "epoch": 0.9121035704107805, "grad_norm": 0.30407997965812683, "learning_rate": 4.3925289087580165e-05, "loss": 0.3466, "step": 5601 }, { "epoch": 0.9122664169686113, "grad_norm": 0.3565138876438141, "learning_rate": 4.39221935710125e-05, "loss": 0.3615, "step": 5602 }, { "epoch": 0.9124292635264422, "grad_norm": 0.29020535945892334, "learning_rate": 4.391909737508141e-05, "loss": 0.3277, "step": 5603 }, { "epoch": 0.9125921100842731, "grad_norm": 0.4015551805496216, "learning_rate": 4.391600049989803e-05, "loss": 0.3841, "step": 5604 }, { "epoch": 0.9127549566421039, "grad_norm": 0.34946003556251526, "learning_rate": 4.3912902945573565e-05, "loss": 0.3384, "step": 5605 }, { "epoch": 0.9129178031999349, "grad_norm": 0.34117773175239563, "learning_rate": 4.390980471221921e-05, "loss": 0.3673, "step": 5606 }, { "epoch": 0.9130806497577657, "grad_norm": 0.3905399441719055, "learning_rate": 4.390670579994622e-05, "loss": 0.4085, "step": 5607 }, { "epoch": 0.9132434963155966, "grad_norm": 0.46719229221343994, "learning_rate": 4.390360620886584e-05, "loss": 0.4425, "step": 5608 }, { "epoch": 0.9134063428734275, "grad_norm": 0.35088202357292175, "learning_rate": 4.390050593908937e-05, "loss": 0.3915, "step": 5609 }, { "epoch": 0.9135691894312584, "grad_norm": 0.34519702196121216, "learning_rate": 4.38974049907281e-05, "loss": 0.3447, "step": 5610 }, { "epoch": 0.9137320359890893, "grad_norm": 0.36226141452789307, "learning_rate": 4.389430336389337e-05, "loss": 0.3239, "step": 5611 }, { "epoch": 0.9138948825469202, "grad_norm": 0.35085156559944153, "learning_rate": 4.3891201058696544e-05, "loss": 0.3809, "step": 5612 }, { "epoch": 0.914057729104751, "grad_norm": 0.3252491056919098, "learning_rate": 4.388809807524901e-05, "loss": 0.3633, "step": 5613 }, { "epoch": 0.914220575662582, "grad_norm": 0.35618820786476135, "learning_rate": 4.388499441366215e-05, "loss": 0.3667, "step": 5614 }, { "epoch": 0.9143834222204128, "grad_norm": 0.2944798767566681, "learning_rate": 4.3881890074047414e-05, "loss": 0.3885, "step": 5615 }, { "epoch": 0.9145462687782437, "grad_norm": 0.3176346719264984, "learning_rate": 4.387878505651626e-05, "loss": 0.3878, "step": 5616 }, { "epoch": 0.9147091153360746, "grad_norm": 0.32910388708114624, "learning_rate": 4.387567936118015e-05, "loss": 0.407, "step": 5617 }, { "epoch": 0.9148719618939055, "grad_norm": 0.3197587728500366, "learning_rate": 4.38725729881506e-05, "loss": 0.3773, "step": 5618 }, { "epoch": 0.9150348084517363, "grad_norm": 0.2966514229774475, "learning_rate": 4.386946593753914e-05, "loss": 0.3445, "step": 5619 }, { "epoch": 0.9151976550095673, "grad_norm": 0.26751238107681274, "learning_rate": 4.386635820945731e-05, "loss": 0.3473, "step": 5620 }, { "epoch": 0.9153605015673981, "grad_norm": 0.29612675309181213, "learning_rate": 4.386324980401671e-05, "loss": 0.3372, "step": 5621 }, { "epoch": 0.915523348125229, "grad_norm": 0.3968595862388611, "learning_rate": 4.386014072132892e-05, "loss": 0.396, "step": 5622 }, { "epoch": 0.9156861946830599, "grad_norm": 0.2834877073764801, "learning_rate": 4.3857030961505566e-05, "loss": 0.3405, "step": 5623 }, { "epoch": 0.9158490412408907, "grad_norm": 0.3693414628505707, "learning_rate": 4.385392052465831e-05, "loss": 0.3613, "step": 5624 }, { "epoch": 0.9160118877987217, "grad_norm": 0.3269020915031433, "learning_rate": 4.385080941089882e-05, "loss": 0.3408, "step": 5625 }, { "epoch": 0.9161747343565525, "grad_norm": 0.40469440817832947, "learning_rate": 4.384769762033879e-05, "loss": 0.4062, "step": 5626 }, { "epoch": 0.9163375809143834, "grad_norm": 0.3040878176689148, "learning_rate": 4.3844585153089946e-05, "loss": 0.3498, "step": 5627 }, { "epoch": 0.9165004274722143, "grad_norm": 0.3490006625652313, "learning_rate": 4.384147200926403e-05, "loss": 0.3639, "step": 5628 }, { "epoch": 0.9166632740300452, "grad_norm": 0.27488064765930176, "learning_rate": 4.383835818897283e-05, "loss": 0.3346, "step": 5629 }, { "epoch": 0.916826120587876, "grad_norm": 0.3100191354751587, "learning_rate": 4.383524369232812e-05, "loss": 0.3806, "step": 5630 }, { "epoch": 0.916988967145707, "grad_norm": 0.30575981736183167, "learning_rate": 4.3832128519441735e-05, "loss": 0.3456, "step": 5631 }, { "epoch": 0.9171518137035378, "grad_norm": 0.30500704050064087, "learning_rate": 4.382901267042551e-05, "loss": 0.373, "step": 5632 }, { "epoch": 0.9173146602613688, "grad_norm": 0.31484904885292053, "learning_rate": 4.382589614539132e-05, "loss": 0.3757, "step": 5633 }, { "epoch": 0.9174775068191996, "grad_norm": 0.9135828614234924, "learning_rate": 4.382277894445106e-05, "loss": 0.3601, "step": 5634 }, { "epoch": 0.9176403533770305, "grad_norm": 0.2894173264503479, "learning_rate": 4.381966106771663e-05, "loss": 0.3892, "step": 5635 }, { "epoch": 0.9178031999348614, "grad_norm": 0.3100378215312958, "learning_rate": 4.3816542515299986e-05, "loss": 0.3889, "step": 5636 }, { "epoch": 0.9179660464926923, "grad_norm": 0.2915002703666687, "learning_rate": 4.381342328731309e-05, "loss": 0.3372, "step": 5637 }, { "epoch": 0.9181288930505231, "grad_norm": 0.32634320855140686, "learning_rate": 4.3810303383867937e-05, "loss": 0.3931, "step": 5638 }, { "epoch": 0.918291739608354, "grad_norm": 0.29995062947273254, "learning_rate": 4.3807182805076526e-05, "loss": 0.3676, "step": 5639 }, { "epoch": 0.9184545861661849, "grad_norm": 0.40091636776924133, "learning_rate": 4.3804061551050914e-05, "loss": 0.3904, "step": 5640 }, { "epoch": 0.9186174327240157, "grad_norm": 3.084505319595337, "learning_rate": 4.3800939621903146e-05, "loss": 0.4072, "step": 5641 }, { "epoch": 0.9187802792818467, "grad_norm": 0.3132023215293884, "learning_rate": 4.3797817017745316e-05, "loss": 0.3212, "step": 5642 }, { "epoch": 0.9189431258396775, "grad_norm": 0.4454541802406311, "learning_rate": 4.379469373868954e-05, "loss": 0.4198, "step": 5643 }, { "epoch": 0.9191059723975085, "grad_norm": 1.1924580335617065, "learning_rate": 4.379156978484795e-05, "loss": 0.3698, "step": 5644 }, { "epoch": 0.9192688189553393, "grad_norm": 0.35502803325653076, "learning_rate": 4.37884451563327e-05, "loss": 0.363, "step": 5645 }, { "epoch": 0.9194316655131702, "grad_norm": 0.3563356101512909, "learning_rate": 4.378531985325598e-05, "loss": 0.3617, "step": 5646 }, { "epoch": 0.9195945120710011, "grad_norm": 0.382683664560318, "learning_rate": 4.3782193875729986e-05, "loss": 0.3753, "step": 5647 }, { "epoch": 0.919757358628832, "grad_norm": 0.49872058629989624, "learning_rate": 4.3779067223866965e-05, "loss": 0.3538, "step": 5648 }, { "epoch": 0.9199202051866628, "grad_norm": 0.5763110518455505, "learning_rate": 4.3775939897779174e-05, "loss": 0.3873, "step": 5649 }, { "epoch": 0.9200830517444938, "grad_norm": 0.3731409013271332, "learning_rate": 4.377281189757888e-05, "loss": 0.3644, "step": 5650 }, { "epoch": 0.9202458983023246, "grad_norm": 0.3147086799144745, "learning_rate": 4.376968322337839e-05, "loss": 0.3515, "step": 5651 }, { "epoch": 0.9204087448601556, "grad_norm": 0.4780142605304718, "learning_rate": 4.376655387529005e-05, "loss": 0.4005, "step": 5652 }, { "epoch": 0.9205715914179864, "grad_norm": 0.36569878458976746, "learning_rate": 4.376342385342619e-05, "loss": 0.4009, "step": 5653 }, { "epoch": 0.9207344379758173, "grad_norm": 0.4029821753501892, "learning_rate": 4.376029315789919e-05, "loss": 0.3891, "step": 5654 }, { "epoch": 0.9208972845336482, "grad_norm": 0.42143720388412476, "learning_rate": 4.375716178882147e-05, "loss": 0.3846, "step": 5655 }, { "epoch": 0.9210601310914791, "grad_norm": 0.34981146454811096, "learning_rate": 4.3754029746305443e-05, "loss": 0.3629, "step": 5656 }, { "epoch": 0.9212229776493099, "grad_norm": 0.36312398314476013, "learning_rate": 4.3750897030463553e-05, "loss": 0.4183, "step": 5657 }, { "epoch": 0.9213858242071408, "grad_norm": 0.3309888541698456, "learning_rate": 4.3747763641408286e-05, "loss": 0.3947, "step": 5658 }, { "epoch": 0.9215486707649717, "grad_norm": 0.776951014995575, "learning_rate": 4.374462957925213e-05, "loss": 0.3641, "step": 5659 }, { "epoch": 0.9217115173228025, "grad_norm": 0.36549821496009827, "learning_rate": 4.374149484410762e-05, "loss": 0.3505, "step": 5660 }, { "epoch": 0.9218743638806335, "grad_norm": 0.34254926443099976, "learning_rate": 4.3738359436087285e-05, "loss": 0.3876, "step": 5661 }, { "epoch": 0.9220372104384643, "grad_norm": 0.3341211676597595, "learning_rate": 4.3735223355303715e-05, "loss": 0.3476, "step": 5662 }, { "epoch": 0.9222000569962953, "grad_norm": 0.34707534313201904, "learning_rate": 4.373208660186948e-05, "loss": 0.3828, "step": 5663 }, { "epoch": 0.9223629035541261, "grad_norm": 0.3983815014362335, "learning_rate": 4.372894917589722e-05, "loss": 0.3347, "step": 5664 }, { "epoch": 0.922525750111957, "grad_norm": 0.3336229622364044, "learning_rate": 4.3725811077499575e-05, "loss": 0.379, "step": 5665 }, { "epoch": 0.9226885966697879, "grad_norm": 0.3488173484802246, "learning_rate": 4.3722672306789205e-05, "loss": 0.3628, "step": 5666 }, { "epoch": 0.9228514432276188, "grad_norm": 0.31741514801979065, "learning_rate": 4.37195328638788e-05, "loss": 0.3838, "step": 5667 }, { "epoch": 0.9230142897854496, "grad_norm": 0.37634968757629395, "learning_rate": 4.3716392748881085e-05, "loss": 0.3663, "step": 5668 }, { "epoch": 0.9231771363432806, "grad_norm": 0.3886381685733795, "learning_rate": 4.3713251961908795e-05, "loss": 0.379, "step": 5669 }, { "epoch": 0.9233399829011114, "grad_norm": 0.37259501218795776, "learning_rate": 4.3710110503074695e-05, "loss": 0.3534, "step": 5670 }, { "epoch": 0.9235028294589424, "grad_norm": 0.4187506437301636, "learning_rate": 4.370696837249156e-05, "loss": 0.3526, "step": 5671 }, { "epoch": 0.9236656760167732, "grad_norm": 0.30572864413261414, "learning_rate": 4.3703825570272225e-05, "loss": 0.3729, "step": 5672 }, { "epoch": 0.9238285225746041, "grad_norm": 0.35834258794784546, "learning_rate": 4.370068209652951e-05, "loss": 0.3337, "step": 5673 }, { "epoch": 0.923991369132435, "grad_norm": 0.3976307809352875, "learning_rate": 4.369753795137628e-05, "loss": 0.3986, "step": 5674 }, { "epoch": 0.9241542156902659, "grad_norm": 0.32189303636550903, "learning_rate": 4.369439313492542e-05, "loss": 0.3677, "step": 5675 }, { "epoch": 0.9243170622480967, "grad_norm": 0.35835617780685425, "learning_rate": 4.3691247647289833e-05, "loss": 0.349, "step": 5676 }, { "epoch": 0.9244799088059276, "grad_norm": 0.3809877932071686, "learning_rate": 4.368810148858246e-05, "loss": 0.3728, "step": 5677 }, { "epoch": 0.9246427553637585, "grad_norm": 0.2891649901866913, "learning_rate": 4.368495465891625e-05, "loss": 0.3571, "step": 5678 }, { "epoch": 0.9248056019215893, "grad_norm": 0.30898433923721313, "learning_rate": 4.368180715840418e-05, "loss": 0.3627, "step": 5679 }, { "epoch": 0.9249684484794203, "grad_norm": 0.35239097476005554, "learning_rate": 4.367865898715926e-05, "loss": 0.3737, "step": 5680 }, { "epoch": 0.9251312950372511, "grad_norm": 0.43111154437065125, "learning_rate": 4.367551014529453e-05, "loss": 0.3921, "step": 5681 }, { "epoch": 0.925294141595082, "grad_norm": 0.32152602076530457, "learning_rate": 4.3672360632923025e-05, "loss": 0.3688, "step": 5682 }, { "epoch": 0.9254569881529129, "grad_norm": 0.32081589102745056, "learning_rate": 4.366921045015783e-05, "loss": 0.3497, "step": 5683 }, { "epoch": 0.9256198347107438, "grad_norm": 0.403670996427536, "learning_rate": 4.366605959711205e-05, "loss": 0.3589, "step": 5684 }, { "epoch": 0.9257826812685747, "grad_norm": 0.3699170649051666, "learning_rate": 4.36629080738988e-05, "loss": 0.3948, "step": 5685 }, { "epoch": 0.9259455278264056, "grad_norm": 0.31955093145370483, "learning_rate": 4.3659755880631236e-05, "loss": 0.3389, "step": 5686 }, { "epoch": 0.9261083743842364, "grad_norm": 0.4729933440685272, "learning_rate": 4.365660301742253e-05, "loss": 0.4456, "step": 5687 }, { "epoch": 0.9262712209420674, "grad_norm": 0.3629381060600281, "learning_rate": 4.365344948438588e-05, "loss": 0.3818, "step": 5688 }, { "epoch": 0.9264340674998982, "grad_norm": 0.516542375087738, "learning_rate": 4.365029528163451e-05, "loss": 0.3684, "step": 5689 }, { "epoch": 0.9265969140577291, "grad_norm": 0.3160174489021301, "learning_rate": 4.364714040928166e-05, "loss": 0.3443, "step": 5690 }, { "epoch": 0.92675976061556, "grad_norm": 0.3142080008983612, "learning_rate": 4.3643984867440594e-05, "loss": 0.3893, "step": 5691 }, { "epoch": 0.9269226071733909, "grad_norm": 0.380508154630661, "learning_rate": 4.364082865622462e-05, "loss": 0.3857, "step": 5692 }, { "epoch": 0.9270854537312218, "grad_norm": 0.36792051792144775, "learning_rate": 4.363767177574704e-05, "loss": 0.3864, "step": 5693 }, { "epoch": 0.9272483002890526, "grad_norm": 0.3747420310974121, "learning_rate": 4.363451422612121e-05, "loss": 0.3786, "step": 5694 }, { "epoch": 0.9274111468468835, "grad_norm": 0.2853164076805115, "learning_rate": 4.363135600746049e-05, "loss": 0.3595, "step": 5695 }, { "epoch": 0.9275739934047144, "grad_norm": 0.2857332229614258, "learning_rate": 4.3628197119878264e-05, "loss": 0.3467, "step": 5696 }, { "epoch": 0.9277368399625453, "grad_norm": 0.34649235010147095, "learning_rate": 4.362503756348795e-05, "loss": 0.3877, "step": 5697 }, { "epoch": 0.9278996865203761, "grad_norm": 0.3150785565376282, "learning_rate": 4.3621877338402985e-05, "loss": 0.4022, "step": 5698 }, { "epoch": 0.9280625330782071, "grad_norm": 0.29508426785469055, "learning_rate": 4.361871644473683e-05, "loss": 0.3387, "step": 5699 }, { "epoch": 0.9282253796360379, "grad_norm": 0.29206332564353943, "learning_rate": 4.3615554882602975e-05, "loss": 0.3796, "step": 5700 }, { "epoch": 0.9283882261938688, "grad_norm": 0.28657934069633484, "learning_rate": 4.361239265211493e-05, "loss": 0.3881, "step": 5701 }, { "epoch": 0.9285510727516997, "grad_norm": 0.3555898070335388, "learning_rate": 4.360922975338621e-05, "loss": 0.454, "step": 5702 }, { "epoch": 0.9287139193095306, "grad_norm": 0.3051011264324188, "learning_rate": 4.36060661865304e-05, "loss": 0.3654, "step": 5703 }, { "epoch": 0.9288767658673615, "grad_norm": 0.269015908241272, "learning_rate": 4.360290195166107e-05, "loss": 0.3491, "step": 5704 }, { "epoch": 0.9290396124251924, "grad_norm": 0.3398534655570984, "learning_rate": 4.359973704889182e-05, "loss": 0.3881, "step": 5705 }, { "epoch": 0.9292024589830232, "grad_norm": 0.3170180320739746, "learning_rate": 4.359657147833628e-05, "loss": 0.3427, "step": 5706 }, { "epoch": 0.9293653055408542, "grad_norm": 0.37383902072906494, "learning_rate": 4.3593405240108106e-05, "loss": 0.3658, "step": 5707 }, { "epoch": 0.929528152098685, "grad_norm": 0.2643008530139923, "learning_rate": 4.359023833432099e-05, "loss": 0.3653, "step": 5708 }, { "epoch": 0.9296909986565159, "grad_norm": 0.29962974786758423, "learning_rate": 4.3587070761088606e-05, "loss": 0.3859, "step": 5709 }, { "epoch": 0.9298538452143468, "grad_norm": 0.35595378279685974, "learning_rate": 4.35839025205247e-05, "loss": 0.4033, "step": 5710 }, { "epoch": 0.9300166917721777, "grad_norm": 0.3143410384654999, "learning_rate": 4.358073361274301e-05, "loss": 0.3579, "step": 5711 }, { "epoch": 0.9301795383300085, "grad_norm": 0.32084181904792786, "learning_rate": 4.357756403785732e-05, "loss": 0.3824, "step": 5712 }, { "epoch": 0.9303423848878394, "grad_norm": 0.2962409555912018, "learning_rate": 4.357439379598142e-05, "loss": 0.3666, "step": 5713 }, { "epoch": 0.9305052314456703, "grad_norm": 0.28645139932632446, "learning_rate": 4.357122288722913e-05, "loss": 0.3416, "step": 5714 }, { "epoch": 0.9306680780035012, "grad_norm": 0.43005654215812683, "learning_rate": 4.356805131171429e-05, "loss": 0.3951, "step": 5715 }, { "epoch": 0.9308309245613321, "grad_norm": 0.30489200353622437, "learning_rate": 4.356487906955079e-05, "loss": 0.328, "step": 5716 }, { "epoch": 0.9309937711191629, "grad_norm": 0.3289393484592438, "learning_rate": 4.35617061608525e-05, "loss": 0.3754, "step": 5717 }, { "epoch": 0.9311566176769939, "grad_norm": 0.34314867854118347, "learning_rate": 4.355853258573335e-05, "loss": 0.3482, "step": 5718 }, { "epoch": 0.9313194642348247, "grad_norm": 0.29265838861465454, "learning_rate": 4.355535834430728e-05, "loss": 0.3573, "step": 5719 }, { "epoch": 0.9314823107926556, "grad_norm": 0.28622642159461975, "learning_rate": 4.355218343668825e-05, "loss": 0.3512, "step": 5720 }, { "epoch": 0.9316451573504865, "grad_norm": 0.2915026545524597, "learning_rate": 4.354900786299025e-05, "loss": 0.34, "step": 5721 }, { "epoch": 0.9318080039083174, "grad_norm": 0.2986913025379181, "learning_rate": 4.3545831623327294e-05, "loss": 0.3268, "step": 5722 }, { "epoch": 0.9319708504661482, "grad_norm": 0.2547345459461212, "learning_rate": 4.3542654717813416e-05, "loss": 0.3052, "step": 5723 }, { "epoch": 0.9321336970239792, "grad_norm": 0.3231830596923828, "learning_rate": 4.353947714656268e-05, "loss": 0.3594, "step": 5724 }, { "epoch": 0.93229654358181, "grad_norm": 0.32600849866867065, "learning_rate": 4.353629890968917e-05, "loss": 0.3302, "step": 5725 }, { "epoch": 0.932459390139641, "grad_norm": 0.31401070952415466, "learning_rate": 4.3533120007306994e-05, "loss": 0.3734, "step": 5726 }, { "epoch": 0.9326222366974718, "grad_norm": 0.2868713438510895, "learning_rate": 4.352994043953028e-05, "loss": 0.3362, "step": 5727 }, { "epoch": 0.9327850832553027, "grad_norm": 0.273628294467926, "learning_rate": 4.352676020647318e-05, "loss": 0.3302, "step": 5728 }, { "epoch": 0.9329479298131336, "grad_norm": 0.35248735547065735, "learning_rate": 4.352357930824989e-05, "loss": 0.3957, "step": 5729 }, { "epoch": 0.9331107763709645, "grad_norm": 0.3044436573982239, "learning_rate": 4.352039774497461e-05, "loss": 0.4049, "step": 5730 }, { "epoch": 0.9332736229287953, "grad_norm": 0.34131699800491333, "learning_rate": 4.351721551676156e-05, "loss": 0.3738, "step": 5731 }, { "epoch": 0.9334364694866262, "grad_norm": 0.27987146377563477, "learning_rate": 4.351403262372498e-05, "loss": 0.3014, "step": 5732 }, { "epoch": 0.9335993160444571, "grad_norm": 0.3259086310863495, "learning_rate": 4.351084906597917e-05, "loss": 0.3651, "step": 5733 }, { "epoch": 0.933762162602288, "grad_norm": 0.2898736298084259, "learning_rate": 4.350766484363842e-05, "loss": 0.3614, "step": 5734 }, { "epoch": 0.9339250091601189, "grad_norm": 0.36077407002449036, "learning_rate": 4.350447995681704e-05, "loss": 0.3936, "step": 5735 }, { "epoch": 0.9340878557179497, "grad_norm": 0.323608934879303, "learning_rate": 4.350129440562941e-05, "loss": 0.376, "step": 5736 }, { "epoch": 0.9342507022757807, "grad_norm": 0.35596269369125366, "learning_rate": 4.349810819018987e-05, "loss": 0.3751, "step": 5737 }, { "epoch": 0.9344135488336115, "grad_norm": 0.29436397552490234, "learning_rate": 4.3494921310612816e-05, "loss": 0.3584, "step": 5738 }, { "epoch": 0.9345763953914424, "grad_norm": 0.28971928358078003, "learning_rate": 4.349173376701268e-05, "loss": 0.3425, "step": 5739 }, { "epoch": 0.9347392419492733, "grad_norm": 0.2796373963356018, "learning_rate": 4.34885455595039e-05, "loss": 0.3325, "step": 5740 }, { "epoch": 0.9349020885071042, "grad_norm": 0.3032178282737732, "learning_rate": 4.348535668820094e-05, "loss": 0.367, "step": 5741 }, { "epoch": 0.935064935064935, "grad_norm": 0.2838236093521118, "learning_rate": 4.348216715321829e-05, "loss": 0.3563, "step": 5742 }, { "epoch": 0.935227781622766, "grad_norm": 0.2882901132106781, "learning_rate": 4.347897695467047e-05, "loss": 0.3527, "step": 5743 }, { "epoch": 0.9353906281805968, "grad_norm": 0.3525685966014862, "learning_rate": 4.347578609267201e-05, "loss": 0.4444, "step": 5744 }, { "epoch": 0.9355534747384278, "grad_norm": 0.31557103991508484, "learning_rate": 4.347259456733747e-05, "loss": 0.3649, "step": 5745 }, { "epoch": 0.9357163212962586, "grad_norm": 0.35247278213500977, "learning_rate": 4.3469402378781445e-05, "loss": 0.3424, "step": 5746 }, { "epoch": 0.9358791678540895, "grad_norm": 0.3231440484523773, "learning_rate": 4.346620952711854e-05, "loss": 0.3577, "step": 5747 }, { "epoch": 0.9360420144119204, "grad_norm": 0.31329983472824097, "learning_rate": 4.346301601246339e-05, "loss": 0.3608, "step": 5748 }, { "epoch": 0.9362048609697513, "grad_norm": 0.31035295128822327, "learning_rate": 4.345982183493063e-05, "loss": 0.3616, "step": 5749 }, { "epoch": 0.9363677075275821, "grad_norm": 0.34120726585388184, "learning_rate": 4.3456626994634975e-05, "loss": 0.3573, "step": 5750 }, { "epoch": 0.936530554085413, "grad_norm": 0.32376527786254883, "learning_rate": 4.345343149169111e-05, "loss": 0.3618, "step": 5751 }, { "epoch": 0.9366934006432439, "grad_norm": 0.30295032262802124, "learning_rate": 4.345023532621377e-05, "loss": 0.354, "step": 5752 }, { "epoch": 0.9368562472010747, "grad_norm": 0.310386061668396, "learning_rate": 4.34470384983177e-05, "loss": 0.3238, "step": 5753 }, { "epoch": 0.9370190937589057, "grad_norm": 0.31593966484069824, "learning_rate": 4.3443841008117684e-05, "loss": 0.3629, "step": 5754 }, { "epoch": 0.9371819403167365, "grad_norm": 0.3291451334953308, "learning_rate": 4.3440642855728507e-05, "loss": 0.3757, "step": 5755 }, { "epoch": 0.9373447868745675, "grad_norm": 0.5923417806625366, "learning_rate": 4.3437444041265005e-05, "loss": 0.4133, "step": 5756 }, { "epoch": 0.9375076334323983, "grad_norm": 0.38135892152786255, "learning_rate": 4.343424456484202e-05, "loss": 0.3844, "step": 5757 }, { "epoch": 0.9376704799902292, "grad_norm": 0.298871785402298, "learning_rate": 4.343104442657443e-05, "loss": 0.336, "step": 5758 }, { "epoch": 0.9378333265480601, "grad_norm": 0.32879507541656494, "learning_rate": 4.342784362657713e-05, "loss": 0.3824, "step": 5759 }, { "epoch": 0.937996173105891, "grad_norm": 0.36328187584877014, "learning_rate": 4.342464216496502e-05, "loss": 0.4099, "step": 5760 }, { "epoch": 0.9381590196637218, "grad_norm": 0.3132408559322357, "learning_rate": 4.342144004185306e-05, "loss": 0.3681, "step": 5761 }, { "epoch": 0.9383218662215528, "grad_norm": 0.30112946033477783, "learning_rate": 4.341823725735621e-05, "loss": 0.3353, "step": 5762 }, { "epoch": 0.9384847127793836, "grad_norm": 0.29541975259780884, "learning_rate": 4.341503381158946e-05, "loss": 0.3407, "step": 5763 }, { "epoch": 0.9386475593372146, "grad_norm": 0.4338894486427307, "learning_rate": 4.341182970466782e-05, "loss": 0.4007, "step": 5764 }, { "epoch": 0.9388104058950454, "grad_norm": 0.3020726442337036, "learning_rate": 4.3408624936706335e-05, "loss": 0.3874, "step": 5765 }, { "epoch": 0.9389732524528763, "grad_norm": 0.4041827917098999, "learning_rate": 4.340541950782006e-05, "loss": 0.3652, "step": 5766 }, { "epoch": 0.9391360990107072, "grad_norm": 0.3340458571910858, "learning_rate": 4.3402213418124084e-05, "loss": 0.3621, "step": 5767 }, { "epoch": 0.939298945568538, "grad_norm": 0.27983319759368896, "learning_rate": 4.3399006667733506e-05, "loss": 0.3576, "step": 5768 }, { "epoch": 0.9394617921263689, "grad_norm": 0.3673953413963318, "learning_rate": 4.339579925676346e-05, "loss": 0.3679, "step": 5769 }, { "epoch": 0.9396246386841998, "grad_norm": 0.3417702913284302, "learning_rate": 4.339259118532911e-05, "loss": 0.3858, "step": 5770 }, { "epoch": 0.9397874852420307, "grad_norm": 0.31991055607795715, "learning_rate": 4.3389382453545634e-05, "loss": 0.3538, "step": 5771 }, { "epoch": 0.9399503317998615, "grad_norm": 0.9456542730331421, "learning_rate": 4.338617306152823e-05, "loss": 0.3672, "step": 5772 }, { "epoch": 0.9401131783576925, "grad_norm": 0.47106534242630005, "learning_rate": 4.3382963009392125e-05, "loss": 0.4092, "step": 5773 }, { "epoch": 0.9402760249155233, "grad_norm": 0.30578967928886414, "learning_rate": 4.3379752297252575e-05, "loss": 0.3586, "step": 5774 }, { "epoch": 0.9404388714733543, "grad_norm": 0.36077505350112915, "learning_rate": 4.3376540925224846e-05, "loss": 0.3686, "step": 5775 }, { "epoch": 0.9406017180311851, "grad_norm": 0.37658846378326416, "learning_rate": 4.337332889342424e-05, "loss": 0.391, "step": 5776 }, { "epoch": 0.940764564589016, "grad_norm": 0.39690670371055603, "learning_rate": 4.337011620196608e-05, "loss": 0.3875, "step": 5777 }, { "epoch": 0.9409274111468469, "grad_norm": 0.25892898440361023, "learning_rate": 4.336690285096572e-05, "loss": 0.3114, "step": 5778 }, { "epoch": 0.9410902577046778, "grad_norm": 0.4633639454841614, "learning_rate": 4.33636888405385e-05, "loss": 0.3929, "step": 5779 }, { "epoch": 0.9412531042625086, "grad_norm": 0.5003278851509094, "learning_rate": 4.3360474170799845e-05, "loss": 0.377, "step": 5780 }, { "epoch": 0.9414159508203396, "grad_norm": 0.29295313358306885, "learning_rate": 4.335725884186515e-05, "loss": 0.3583, "step": 5781 }, { "epoch": 0.9415787973781704, "grad_norm": 0.6221680641174316, "learning_rate": 4.335404285384987e-05, "loss": 0.3295, "step": 5782 }, { "epoch": 0.9417416439360013, "grad_norm": 0.39250051975250244, "learning_rate": 4.335082620686946e-05, "loss": 0.4117, "step": 5783 }, { "epoch": 0.9419044904938322, "grad_norm": 0.29905611276626587, "learning_rate": 4.33476089010394e-05, "loss": 0.3461, "step": 5784 }, { "epoch": 0.9420673370516631, "grad_norm": 0.30421701073646545, "learning_rate": 4.334439093647522e-05, "loss": 0.3577, "step": 5785 }, { "epoch": 0.942230183609494, "grad_norm": 0.36371931433677673, "learning_rate": 4.334117231329244e-05, "loss": 0.3769, "step": 5786 }, { "epoch": 0.9423930301673248, "grad_norm": 0.345096230506897, "learning_rate": 4.333795303160662e-05, "loss": 0.3862, "step": 5787 }, { "epoch": 0.9425558767251557, "grad_norm": 0.3534215986728668, "learning_rate": 4.3334733091533356e-05, "loss": 0.402, "step": 5788 }, { "epoch": 0.9427187232829866, "grad_norm": 0.3938220739364624, "learning_rate": 4.333151249318823e-05, "loss": 0.3925, "step": 5789 }, { "epoch": 0.9428815698408175, "grad_norm": 0.3519063889980316, "learning_rate": 4.332829123668688e-05, "loss": 0.3477, "step": 5790 }, { "epoch": 0.9430444163986483, "grad_norm": 0.3269406259059906, "learning_rate": 4.3325069322144965e-05, "loss": 0.3581, "step": 5791 }, { "epoch": 0.9432072629564793, "grad_norm": 0.3414889872074127, "learning_rate": 4.332184674967816e-05, "loss": 0.3771, "step": 5792 }, { "epoch": 0.9433701095143101, "grad_norm": 0.3845081031322479, "learning_rate": 4.331862351940217e-05, "loss": 0.3876, "step": 5793 }, { "epoch": 0.943532956072141, "grad_norm": 0.33241701126098633, "learning_rate": 4.3315399631432704e-05, "loss": 0.3313, "step": 5794 }, { "epoch": 0.9436958026299719, "grad_norm": 0.3386750817298889, "learning_rate": 4.3312175085885516e-05, "loss": 0.3574, "step": 5795 }, { "epoch": 0.9438586491878028, "grad_norm": 0.40717655420303345, "learning_rate": 4.330894988287638e-05, "loss": 0.3552, "step": 5796 }, { "epoch": 0.9440214957456337, "grad_norm": 0.2957695424556732, "learning_rate": 4.330572402252109e-05, "loss": 0.3507, "step": 5797 }, { "epoch": 0.9441843423034646, "grad_norm": 0.3645738363265991, "learning_rate": 4.3302497504935455e-05, "loss": 0.4134, "step": 5798 }, { "epoch": 0.9443471888612954, "grad_norm": 0.3294065296649933, "learning_rate": 4.329927033023533e-05, "loss": 0.4044, "step": 5799 }, { "epoch": 0.9445100354191264, "grad_norm": 0.33639469742774963, "learning_rate": 4.329604249853657e-05, "loss": 0.355, "step": 5800 }, { "epoch": 0.9446728819769572, "grad_norm": 0.35277435183525085, "learning_rate": 4.329281400995507e-05, "loss": 0.3884, "step": 5801 }, { "epoch": 0.9448357285347881, "grad_norm": 0.28674790263175964, "learning_rate": 4.3289584864606735e-05, "loss": 0.3696, "step": 5802 }, { "epoch": 0.944998575092619, "grad_norm": 0.371208131313324, "learning_rate": 4.328635506260752e-05, "loss": 0.3604, "step": 5803 }, { "epoch": 0.9451614216504499, "grad_norm": 0.3689272403717041, "learning_rate": 4.328312460407336e-05, "loss": 0.4036, "step": 5804 }, { "epoch": 0.9453242682082807, "grad_norm": 0.2925618588924408, "learning_rate": 4.3279893489120246e-05, "loss": 0.3291, "step": 5805 }, { "epoch": 0.9454871147661116, "grad_norm": 0.33475935459136963, "learning_rate": 4.3276661717864196e-05, "loss": 0.347, "step": 5806 }, { "epoch": 0.9456499613239425, "grad_norm": 0.34064576029777527, "learning_rate": 4.327342929042123e-05, "loss": 0.3475, "step": 5807 }, { "epoch": 0.9458128078817734, "grad_norm": 0.3097379207611084, "learning_rate": 4.327019620690741e-05, "loss": 0.3406, "step": 5808 }, { "epoch": 0.9459756544396043, "grad_norm": 0.3455532193183899, "learning_rate": 4.32669624674388e-05, "loss": 0.3639, "step": 5809 }, { "epoch": 0.9461385009974351, "grad_norm": 0.3624323010444641, "learning_rate": 4.3263728072131503e-05, "loss": 0.3498, "step": 5810 }, { "epoch": 0.9463013475552661, "grad_norm": 0.4132993519306183, "learning_rate": 4.3260493021101655e-05, "loss": 0.3779, "step": 5811 }, { "epoch": 0.9464641941130969, "grad_norm": 0.32453083992004395, "learning_rate": 4.325725731446539e-05, "loss": 0.3859, "step": 5812 }, { "epoch": 0.9466270406709278, "grad_norm": 0.2957616150379181, "learning_rate": 4.32540209523389e-05, "loss": 0.3326, "step": 5813 }, { "epoch": 0.9467898872287587, "grad_norm": 0.3305860161781311, "learning_rate": 4.3250783934838364e-05, "loss": 0.3492, "step": 5814 }, { "epoch": 0.9469527337865896, "grad_norm": 0.3728692829608917, "learning_rate": 4.324754626208e-05, "loss": 0.3969, "step": 5815 }, { "epoch": 0.9471155803444204, "grad_norm": 0.3495302200317383, "learning_rate": 4.3244307934180055e-05, "loss": 0.3765, "step": 5816 }, { "epoch": 0.9472784269022514, "grad_norm": 0.32297176122665405, "learning_rate": 4.3241068951254796e-05, "loss": 0.3781, "step": 5817 }, { "epoch": 0.9474412734600822, "grad_norm": 0.32026365399360657, "learning_rate": 4.323782931342051e-05, "loss": 0.3512, "step": 5818 }, { "epoch": 0.9476041200179132, "grad_norm": 0.3346673846244812, "learning_rate": 4.3234589020793504e-05, "loss": 0.329, "step": 5819 }, { "epoch": 0.947766966575744, "grad_norm": 0.2754901945590973, "learning_rate": 4.323134807349012e-05, "loss": 0.3149, "step": 5820 }, { "epoch": 0.9479298131335749, "grad_norm": 0.3261314332485199, "learning_rate": 4.322810647162672e-05, "loss": 0.3605, "step": 5821 }, { "epoch": 0.9480926596914058, "grad_norm": 0.30535122752189636, "learning_rate": 4.322486421531969e-05, "loss": 0.3576, "step": 5822 }, { "epoch": 0.9482555062492366, "grad_norm": 0.30834588408470154, "learning_rate": 4.3221621304685426e-05, "loss": 0.3815, "step": 5823 }, { "epoch": 0.9484183528070675, "grad_norm": 0.3159259557723999, "learning_rate": 4.321837773984037e-05, "loss": 0.3725, "step": 5824 }, { "epoch": 0.9485811993648984, "grad_norm": 0.30077096819877625, "learning_rate": 4.3215133520900966e-05, "loss": 0.3803, "step": 5825 }, { "epoch": 0.9487440459227293, "grad_norm": 0.34845951199531555, "learning_rate": 4.321188864798369e-05, "loss": 0.3526, "step": 5826 }, { "epoch": 0.9489068924805601, "grad_norm": 0.30852487683296204, "learning_rate": 4.320864312120505e-05, "loss": 0.3575, "step": 5827 }, { "epoch": 0.9490697390383911, "grad_norm": 0.37083378434181213, "learning_rate": 4.320539694068156e-05, "loss": 0.3769, "step": 5828 }, { "epoch": 0.9492325855962219, "grad_norm": 0.3067369759082794, "learning_rate": 4.320215010652979e-05, "loss": 0.3456, "step": 5829 }, { "epoch": 0.9493954321540529, "grad_norm": 0.3217785954475403, "learning_rate": 4.319890261886629e-05, "loss": 0.3392, "step": 5830 }, { "epoch": 0.9495582787118837, "grad_norm": 0.297940194606781, "learning_rate": 4.319565447780766e-05, "loss": 0.3467, "step": 5831 }, { "epoch": 0.9497211252697146, "grad_norm": 0.3771720826625824, "learning_rate": 4.3192405683470516e-05, "loss": 0.4235, "step": 5832 }, { "epoch": 0.9498839718275455, "grad_norm": 0.28698694705963135, "learning_rate": 4.31891562359715e-05, "loss": 0.3201, "step": 5833 }, { "epoch": 0.9500468183853764, "grad_norm": 0.33452415466308594, "learning_rate": 4.318590613542729e-05, "loss": 0.3678, "step": 5834 }, { "epoch": 0.9502096649432072, "grad_norm": 0.25751039385795593, "learning_rate": 4.318265538195456e-05, "loss": 0.3209, "step": 5835 }, { "epoch": 0.9503725115010382, "grad_norm": 0.3655453622341156, "learning_rate": 4.317940397567002e-05, "loss": 0.3741, "step": 5836 }, { "epoch": 0.950535358058869, "grad_norm": 0.26248809695243835, "learning_rate": 4.317615191669041e-05, "loss": 0.3343, "step": 5837 }, { "epoch": 0.9506982046167, "grad_norm": 0.2875501215457916, "learning_rate": 4.3172899205132486e-05, "loss": 0.3586, "step": 5838 }, { "epoch": 0.9508610511745308, "grad_norm": 0.338687539100647, "learning_rate": 4.316964584111304e-05, "loss": 0.3715, "step": 5839 }, { "epoch": 0.9510238977323617, "grad_norm": 0.30752450227737427, "learning_rate": 4.316639182474887e-05, "loss": 0.3623, "step": 5840 }, { "epoch": 0.9511867442901926, "grad_norm": 0.3362523019313812, "learning_rate": 4.31631371561568e-05, "loss": 0.3638, "step": 5841 }, { "epoch": 0.9513495908480234, "grad_norm": 0.2963846027851105, "learning_rate": 4.3159881835453696e-05, "loss": 0.3418, "step": 5842 }, { "epoch": 0.9515124374058543, "grad_norm": 0.29012206196784973, "learning_rate": 4.315662586275642e-05, "loss": 0.3446, "step": 5843 }, { "epoch": 0.9516752839636852, "grad_norm": 0.31923142075538635, "learning_rate": 4.3153369238181885e-05, "loss": 0.3732, "step": 5844 }, { "epoch": 0.9518381305215161, "grad_norm": 0.3201746344566345, "learning_rate": 4.3150111961847e-05, "loss": 0.3763, "step": 5845 }, { "epoch": 0.9520009770793469, "grad_norm": 0.2851199805736542, "learning_rate": 4.3146854033868713e-05, "loss": 0.3613, "step": 5846 }, { "epoch": 0.9521638236371779, "grad_norm": 0.35580289363861084, "learning_rate": 4.3143595454363995e-05, "loss": 0.3526, "step": 5847 }, { "epoch": 0.9523266701950087, "grad_norm": 0.2935449779033661, "learning_rate": 4.314033622344984e-05, "loss": 0.3821, "step": 5848 }, { "epoch": 0.9524895167528397, "grad_norm": 0.31229016184806824, "learning_rate": 4.3137076341243274e-05, "loss": 0.366, "step": 5849 }, { "epoch": 0.9526523633106705, "grad_norm": 0.3633984923362732, "learning_rate": 4.3133815807861324e-05, "loss": 0.3754, "step": 5850 }, { "epoch": 0.9528152098685014, "grad_norm": 0.33979663252830505, "learning_rate": 4.313055462342105e-05, "loss": 0.3372, "step": 5851 }, { "epoch": 0.9529780564263323, "grad_norm": 0.38198885321617126, "learning_rate": 4.312729278803955e-05, "loss": 0.4167, "step": 5852 }, { "epoch": 0.9531409029841632, "grad_norm": 0.3272015154361725, "learning_rate": 4.312403030183393e-05, "loss": 0.3323, "step": 5853 }, { "epoch": 0.953303749541994, "grad_norm": 0.31114834547042847, "learning_rate": 4.312076716492132e-05, "loss": 0.3457, "step": 5854 }, { "epoch": 0.953466596099825, "grad_norm": 0.2943725287914276, "learning_rate": 4.311750337741888e-05, "loss": 0.3325, "step": 5855 }, { "epoch": 0.9536294426576558, "grad_norm": 0.2988816499710083, "learning_rate": 4.311423893944378e-05, "loss": 0.3662, "step": 5856 }, { "epoch": 0.9537922892154868, "grad_norm": 0.341966837644577, "learning_rate": 4.311097385111323e-05, "loss": 0.382, "step": 5857 }, { "epoch": 0.9539551357733176, "grad_norm": 0.32142454385757446, "learning_rate": 4.3107708112544464e-05, "loss": 0.3371, "step": 5858 }, { "epoch": 0.9541179823311485, "grad_norm": 0.34285563230514526, "learning_rate": 4.310444172385472e-05, "loss": 0.4001, "step": 5859 }, { "epoch": 0.9542808288889794, "grad_norm": 0.30862411856651306, "learning_rate": 4.3101174685161274e-05, "loss": 0.3406, "step": 5860 }, { "epoch": 0.9544436754468102, "grad_norm": 0.31375622749328613, "learning_rate": 4.309790699658143e-05, "loss": 0.3941, "step": 5861 }, { "epoch": 0.9546065220046411, "grad_norm": 0.3139655292034149, "learning_rate": 4.309463865823249e-05, "loss": 0.3335, "step": 5862 }, { "epoch": 0.954769368562472, "grad_norm": 0.3793458938598633, "learning_rate": 4.309136967023182e-05, "loss": 0.4165, "step": 5863 }, { "epoch": 0.9549322151203029, "grad_norm": 0.3262350559234619, "learning_rate": 4.308810003269678e-05, "loss": 0.3487, "step": 5864 }, { "epoch": 0.9550950616781337, "grad_norm": 0.3530062735080719, "learning_rate": 4.308482974574475e-05, "loss": 0.3814, "step": 5865 }, { "epoch": 0.9552579082359647, "grad_norm": 0.3255915939807892, "learning_rate": 4.308155880949315e-05, "loss": 0.3718, "step": 5866 }, { "epoch": 0.9554207547937955, "grad_norm": 0.3659856617450714, "learning_rate": 4.307828722405941e-05, "loss": 0.3701, "step": 5867 }, { "epoch": 0.9555836013516265, "grad_norm": 0.3213347792625427, "learning_rate": 4.3075014989560994e-05, "loss": 0.4573, "step": 5868 }, { "epoch": 0.9557464479094573, "grad_norm": 0.36138418316841125, "learning_rate": 4.3071742106115384e-05, "loss": 0.3412, "step": 5869 }, { "epoch": 0.9559092944672882, "grad_norm": 0.3657291829586029, "learning_rate": 4.3068468573840104e-05, "loss": 0.3657, "step": 5870 }, { "epoch": 0.9560721410251191, "grad_norm": 0.31756776571273804, "learning_rate": 4.306519439285265e-05, "loss": 0.3342, "step": 5871 }, { "epoch": 0.95623498758295, "grad_norm": 0.27279049158096313, "learning_rate": 4.30619195632706e-05, "loss": 0.3652, "step": 5872 }, { "epoch": 0.9563978341407808, "grad_norm": 0.3615072965621948, "learning_rate": 4.3058644085211516e-05, "loss": 0.3595, "step": 5873 }, { "epoch": 0.9565606806986118, "grad_norm": 0.2532474100589752, "learning_rate": 4.305536795879301e-05, "loss": 0.3307, "step": 5874 }, { "epoch": 0.9567235272564426, "grad_norm": 0.4225667715072632, "learning_rate": 4.30520911841327e-05, "loss": 0.3883, "step": 5875 }, { "epoch": 0.9568863738142735, "grad_norm": 0.34822967648506165, "learning_rate": 4.304881376134824e-05, "loss": 0.3561, "step": 5876 }, { "epoch": 0.9570492203721044, "grad_norm": 0.31541508436203003, "learning_rate": 4.3045535690557273e-05, "loss": 0.3535, "step": 5877 }, { "epoch": 0.9572120669299353, "grad_norm": 0.363912969827652, "learning_rate": 4.304225697187752e-05, "loss": 0.3791, "step": 5878 }, { "epoch": 0.9573749134877662, "grad_norm": 0.3301502764225006, "learning_rate": 4.3038977605426675e-05, "loss": 0.3476, "step": 5879 }, { "epoch": 0.957537760045597, "grad_norm": 0.32568782567977905, "learning_rate": 4.30356975913225e-05, "loss": 0.3694, "step": 5880 }, { "epoch": 0.9577006066034279, "grad_norm": 0.31105560064315796, "learning_rate": 4.303241692968274e-05, "loss": 0.3536, "step": 5881 }, { "epoch": 0.9578634531612588, "grad_norm": 0.3242177963256836, "learning_rate": 4.302913562062518e-05, "loss": 0.3542, "step": 5882 }, { "epoch": 0.9580262997190897, "grad_norm": 0.4200502336025238, "learning_rate": 4.302585366426765e-05, "loss": 0.3829, "step": 5883 }, { "epoch": 0.9581891462769205, "grad_norm": 0.32957184314727783, "learning_rate": 4.302257106072796e-05, "loss": 0.3661, "step": 5884 }, { "epoch": 0.9583519928347515, "grad_norm": 0.32823318243026733, "learning_rate": 4.301928781012396e-05, "loss": 0.3897, "step": 5885 }, { "epoch": 0.9585148393925823, "grad_norm": 0.4092177152633667, "learning_rate": 4.3016003912573556e-05, "loss": 0.3906, "step": 5886 }, { "epoch": 0.9586776859504132, "grad_norm": 0.33728840947151184, "learning_rate": 4.301271936819462e-05, "loss": 0.3461, "step": 5887 }, { "epoch": 0.9588405325082441, "grad_norm": 0.3301776051521301, "learning_rate": 4.3009434177105104e-05, "loss": 0.3692, "step": 5888 }, { "epoch": 0.959003379066075, "grad_norm": 0.3131853938102722, "learning_rate": 4.3006148339422934e-05, "loss": 0.3665, "step": 5889 }, { "epoch": 0.9591662256239059, "grad_norm": 0.2758694887161255, "learning_rate": 4.300286185526609e-05, "loss": 0.3003, "step": 5890 }, { "epoch": 0.9593290721817368, "grad_norm": 0.4008253514766693, "learning_rate": 4.299957472475258e-05, "loss": 0.4042, "step": 5891 }, { "epoch": 0.9594919187395676, "grad_norm": 0.3064764738082886, "learning_rate": 4.2996286948000395e-05, "loss": 0.365, "step": 5892 }, { "epoch": 0.9596547652973986, "grad_norm": 0.33061569929122925, "learning_rate": 4.299299852512759e-05, "loss": 0.3833, "step": 5893 }, { "epoch": 0.9598176118552294, "grad_norm": 0.35887327790260315, "learning_rate": 4.2989709456252234e-05, "loss": 0.403, "step": 5894 }, { "epoch": 0.9599804584130603, "grad_norm": 0.29461589455604553, "learning_rate": 4.2986419741492416e-05, "loss": 0.376, "step": 5895 }, { "epoch": 0.9601433049708912, "grad_norm": 0.34652242064476013, "learning_rate": 4.298312938096624e-05, "loss": 0.3574, "step": 5896 }, { "epoch": 0.960306151528722, "grad_norm": 0.3189223110675812, "learning_rate": 4.2979838374791834e-05, "loss": 0.3836, "step": 5897 }, { "epoch": 0.960468998086553, "grad_norm": 0.3231244683265686, "learning_rate": 4.2976546723087365e-05, "loss": 0.3352, "step": 5898 }, { "epoch": 0.9606318446443838, "grad_norm": 0.31982865929603577, "learning_rate": 4.297325442597101e-05, "loss": 0.3344, "step": 5899 }, { "epoch": 0.9607946912022147, "grad_norm": 0.38841813802719116, "learning_rate": 4.296996148356097e-05, "loss": 0.3646, "step": 5900 }, { "epoch": 0.9609575377600456, "grad_norm": 0.28127244114875793, "learning_rate": 4.296666789597547e-05, "loss": 0.3521, "step": 5901 }, { "epoch": 0.9611203843178765, "grad_norm": 0.3081330955028534, "learning_rate": 4.296337366333276e-05, "loss": 0.368, "step": 5902 }, { "epoch": 0.9612832308757073, "grad_norm": 0.3317902088165283, "learning_rate": 4.2960078785751125e-05, "loss": 0.3565, "step": 5903 }, { "epoch": 0.9614460774335383, "grad_norm": 0.3876509666442871, "learning_rate": 4.2956783263348854e-05, "loss": 0.3829, "step": 5904 }, { "epoch": 0.9616089239913691, "grad_norm": 0.3007161021232605, "learning_rate": 4.295348709624425e-05, "loss": 0.3075, "step": 5905 }, { "epoch": 0.9617717705492, "grad_norm": 0.3296906054019928, "learning_rate": 4.295019028455568e-05, "loss": 0.323, "step": 5906 }, { "epoch": 0.9619346171070309, "grad_norm": 0.28519919514656067, "learning_rate": 4.2946892828401494e-05, "loss": 0.3387, "step": 5907 }, { "epoch": 0.9620974636648618, "grad_norm": 0.28975263237953186, "learning_rate": 4.294359472790009e-05, "loss": 0.3581, "step": 5908 }, { "epoch": 0.9622603102226926, "grad_norm": 0.3606334924697876, "learning_rate": 4.294029598316988e-05, "loss": 0.4012, "step": 5909 }, { "epoch": 0.9624231567805236, "grad_norm": 0.29519379138946533, "learning_rate": 4.293699659432928e-05, "loss": 0.4017, "step": 5910 }, { "epoch": 0.9625860033383544, "grad_norm": 0.31148985028266907, "learning_rate": 4.2933696561496766e-05, "loss": 0.357, "step": 5911 }, { "epoch": 0.9627488498961854, "grad_norm": 0.29188722372055054, "learning_rate": 4.293039588479082e-05, "loss": 0.3592, "step": 5912 }, { "epoch": 0.9629116964540162, "grad_norm": 0.35562795400619507, "learning_rate": 4.2927094564329936e-05, "loss": 0.3932, "step": 5913 }, { "epoch": 0.9630745430118471, "grad_norm": 0.34332963824272156, "learning_rate": 4.292379260023265e-05, "loss": 0.3652, "step": 5914 }, { "epoch": 0.963237389569678, "grad_norm": 0.37211304903030396, "learning_rate": 4.29204899926175e-05, "loss": 0.4263, "step": 5915 }, { "epoch": 0.9634002361275088, "grad_norm": 0.3390045464038849, "learning_rate": 4.291718674160308e-05, "loss": 0.3436, "step": 5916 }, { "epoch": 0.9635630826853397, "grad_norm": 0.32140862941741943, "learning_rate": 4.291388284730796e-05, "loss": 0.3478, "step": 5917 }, { "epoch": 0.9637259292431706, "grad_norm": 0.30639415979385376, "learning_rate": 4.2910578309850787e-05, "loss": 0.2952, "step": 5918 }, { "epoch": 0.9638887758010015, "grad_norm": 0.3746296465396881, "learning_rate": 4.2907273129350186e-05, "loss": 0.3439, "step": 5919 }, { "epoch": 0.9640516223588323, "grad_norm": 0.36105355620384216, "learning_rate": 4.290396730592483e-05, "loss": 0.3708, "step": 5920 }, { "epoch": 0.9642144689166633, "grad_norm": 0.32264402508735657, "learning_rate": 4.2900660839693405e-05, "loss": 0.3601, "step": 5921 }, { "epoch": 0.9643773154744941, "grad_norm": 0.29759612679481506, "learning_rate": 4.289735373077462e-05, "loss": 0.3281, "step": 5922 }, { "epoch": 0.9645401620323251, "grad_norm": 0.321607381105423, "learning_rate": 4.2894045979287224e-05, "loss": 0.3951, "step": 5923 }, { "epoch": 0.9647030085901559, "grad_norm": 0.3315381407737732, "learning_rate": 4.289073758534995e-05, "loss": 0.3542, "step": 5924 }, { "epoch": 0.9648658551479868, "grad_norm": 0.3483681082725525, "learning_rate": 4.288742854908161e-05, "loss": 0.3728, "step": 5925 }, { "epoch": 0.9650287017058177, "grad_norm": 0.2942352294921875, "learning_rate": 4.288411887060099e-05, "loss": 0.3643, "step": 5926 }, { "epoch": 0.9651915482636486, "grad_norm": 0.3546657860279083, "learning_rate": 4.2880808550026916e-05, "loss": 0.3717, "step": 5927 }, { "epoch": 0.9653543948214794, "grad_norm": 0.3233741223812103, "learning_rate": 4.287749758747824e-05, "loss": 0.3994, "step": 5928 }, { "epoch": 0.9655172413793104, "grad_norm": 0.3211047649383545, "learning_rate": 4.287418598307385e-05, "loss": 0.3869, "step": 5929 }, { "epoch": 0.9656800879371412, "grad_norm": 0.3016580045223236, "learning_rate": 4.287087373693261e-05, "loss": 0.3867, "step": 5930 }, { "epoch": 0.9658429344949722, "grad_norm": 0.3471583127975464, "learning_rate": 4.286756084917348e-05, "loss": 0.3892, "step": 5931 }, { "epoch": 0.966005781052803, "grad_norm": 0.34232276678085327, "learning_rate": 4.2864247319915376e-05, "loss": 0.3734, "step": 5932 }, { "epoch": 0.9661686276106339, "grad_norm": 0.3290078341960907, "learning_rate": 4.286093314927727e-05, "loss": 0.3666, "step": 5933 }, { "epoch": 0.9663314741684648, "grad_norm": 0.3372010886669159, "learning_rate": 4.2857618337378146e-05, "loss": 0.3703, "step": 5934 }, { "epoch": 0.9664943207262956, "grad_norm": 0.30911633372306824, "learning_rate": 4.285430288433703e-05, "loss": 0.3441, "step": 5935 }, { "epoch": 0.9666571672841265, "grad_norm": 0.3773972988128662, "learning_rate": 4.2850986790272946e-05, "loss": 0.364, "step": 5936 }, { "epoch": 0.9668200138419574, "grad_norm": 0.35275858640670776, "learning_rate": 4.284767005530495e-05, "loss": 0.3508, "step": 5937 }, { "epoch": 0.9669828603997883, "grad_norm": 0.29892924427986145, "learning_rate": 4.2844352679552126e-05, "loss": 0.3057, "step": 5938 }, { "epoch": 0.9671457069576191, "grad_norm": 0.41723117232322693, "learning_rate": 4.284103466313358e-05, "loss": 0.3961, "step": 5939 }, { "epoch": 0.9673085535154501, "grad_norm": 0.28880563378334045, "learning_rate": 4.2837716006168435e-05, "loss": 0.3451, "step": 5940 }, { "epoch": 0.9674714000732809, "grad_norm": 0.3634311556816101, "learning_rate": 4.2834396708775845e-05, "loss": 0.3721, "step": 5941 }, { "epoch": 0.9676342466311119, "grad_norm": 0.39144787192344666, "learning_rate": 4.2831076771074975e-05, "loss": 0.3827, "step": 5942 }, { "epoch": 0.9677970931889427, "grad_norm": 0.3003021776676178, "learning_rate": 4.282775619318503e-05, "loss": 0.3768, "step": 5943 }, { "epoch": 0.9679599397467736, "grad_norm": 0.30369073152542114, "learning_rate": 4.282443497522523e-05, "loss": 0.3552, "step": 5944 }, { "epoch": 0.9681227863046045, "grad_norm": 0.31132933497428894, "learning_rate": 4.282111311731479e-05, "loss": 0.3221, "step": 5945 }, { "epoch": 0.9682856328624354, "grad_norm": 0.36395931243896484, "learning_rate": 4.281779061957301e-05, "loss": 0.3877, "step": 5946 }, { "epoch": 0.9684484794202662, "grad_norm": 0.2906336486339569, "learning_rate": 4.281446748211917e-05, "loss": 0.3734, "step": 5947 }, { "epoch": 0.9686113259780972, "grad_norm": 0.30525144934654236, "learning_rate": 4.281114370507257e-05, "loss": 0.3688, "step": 5948 }, { "epoch": 0.968774172535928, "grad_norm": 0.2911463975906372, "learning_rate": 4.280781928855253e-05, "loss": 0.3648, "step": 5949 }, { "epoch": 0.968937019093759, "grad_norm": 0.3251287341117859, "learning_rate": 4.280449423267844e-05, "loss": 0.4254, "step": 5950 }, { "epoch": 0.9690998656515898, "grad_norm": 0.37256920337677, "learning_rate": 4.280116853756966e-05, "loss": 0.3697, "step": 5951 }, { "epoch": 0.9692627122094206, "grad_norm": 0.3047342002391815, "learning_rate": 4.27978422033456e-05, "loss": 0.3685, "step": 5952 }, { "epoch": 0.9694255587672516, "grad_norm": 0.32563841342926025, "learning_rate": 4.279451523012568e-05, "loss": 0.3642, "step": 5953 }, { "epoch": 0.9695884053250824, "grad_norm": 0.3569439947605133, "learning_rate": 4.2791187618029346e-05, "loss": 0.3656, "step": 5954 }, { "epoch": 0.9697512518829133, "grad_norm": 0.32716912031173706, "learning_rate": 4.278785936717607e-05, "loss": 0.3756, "step": 5955 }, { "epoch": 0.9699140984407442, "grad_norm": 0.3702610731124878, "learning_rate": 4.2784530477685354e-05, "loss": 0.4168, "step": 5956 }, { "epoch": 0.9700769449985751, "grad_norm": 0.3517455756664276, "learning_rate": 4.278120094967671e-05, "loss": 0.3933, "step": 5957 }, { "epoch": 0.9702397915564059, "grad_norm": 0.299623042345047, "learning_rate": 4.2777870783269676e-05, "loss": 0.363, "step": 5958 }, { "epoch": 0.9704026381142369, "grad_norm": 0.33957168459892273, "learning_rate": 4.277453997858381e-05, "loss": 0.3508, "step": 5959 }, { "epoch": 0.9705654846720677, "grad_norm": 0.31868165731430054, "learning_rate": 4.277120853573871e-05, "loss": 0.3435, "step": 5960 }, { "epoch": 0.9707283312298987, "grad_norm": 0.3161013424396515, "learning_rate": 4.2767876454854e-05, "loss": 0.4057, "step": 5961 }, { "epoch": 0.9708911777877295, "grad_norm": 0.3387358784675598, "learning_rate": 4.276454373604927e-05, "loss": 0.3605, "step": 5962 }, { "epoch": 0.9710540243455604, "grad_norm": 0.298168808221817, "learning_rate": 4.276121037944419e-05, "loss": 0.3616, "step": 5963 }, { "epoch": 0.9712168709033913, "grad_norm": 0.31057652831077576, "learning_rate": 4.275787638515846e-05, "loss": 0.361, "step": 5964 }, { "epoch": 0.9713797174612222, "grad_norm": 0.30428287386894226, "learning_rate": 4.275454175331175e-05, "loss": 0.3577, "step": 5965 }, { "epoch": 0.971542564019053, "grad_norm": 0.3046707808971405, "learning_rate": 4.2751206484023806e-05, "loss": 0.3406, "step": 5966 }, { "epoch": 0.971705410576884, "grad_norm": 0.404706209897995, "learning_rate": 4.274787057741436e-05, "loss": 0.3688, "step": 5967 }, { "epoch": 0.9718682571347148, "grad_norm": 0.3951598107814789, "learning_rate": 4.274453403360319e-05, "loss": 0.3872, "step": 5968 }, { "epoch": 0.9720311036925458, "grad_norm": 0.3062501549720764, "learning_rate": 4.2741196852710086e-05, "loss": 0.3483, "step": 5969 }, { "epoch": 0.9721939502503766, "grad_norm": 0.42732009291648865, "learning_rate": 4.2737859034854854e-05, "loss": 0.3909, "step": 5970 }, { "epoch": 0.9723567968082074, "grad_norm": 0.4207245409488678, "learning_rate": 4.2734520580157345e-05, "loss": 0.3865, "step": 5971 }, { "epoch": 0.9725196433660384, "grad_norm": 0.3108152449131012, "learning_rate": 4.273118148873741e-05, "loss": 0.3431, "step": 5972 }, { "epoch": 0.9726824899238692, "grad_norm": 0.3848322629928589, "learning_rate": 4.2727841760714937e-05, "loss": 0.3737, "step": 5973 }, { "epoch": 0.9728453364817001, "grad_norm": 0.30020639300346375, "learning_rate": 4.272450139620983e-05, "loss": 0.3262, "step": 5974 }, { "epoch": 0.973008183039531, "grad_norm": 0.3762408494949341, "learning_rate": 4.2721160395342014e-05, "loss": 0.3922, "step": 5975 }, { "epoch": 0.9731710295973619, "grad_norm": 0.3531384766101837, "learning_rate": 4.271781875823145e-05, "loss": 0.3852, "step": 5976 }, { "epoch": 0.9733338761551927, "grad_norm": 0.32215291261672974, "learning_rate": 4.271447648499811e-05, "loss": 0.3445, "step": 5977 }, { "epoch": 0.9734967227130237, "grad_norm": 0.3038754165172577, "learning_rate": 4.271113357576199e-05, "loss": 0.3644, "step": 5978 }, { "epoch": 0.9736595692708545, "grad_norm": 0.3175259530544281, "learning_rate": 4.2707790030643115e-05, "loss": 0.3707, "step": 5979 }, { "epoch": 0.9738224158286855, "grad_norm": 0.3830743730068207, "learning_rate": 4.270444584976151e-05, "loss": 0.3983, "step": 5980 }, { "epoch": 0.9739852623865163, "grad_norm": 0.3430050313472748, "learning_rate": 4.270110103323727e-05, "loss": 0.3877, "step": 5981 }, { "epoch": 0.9741481089443472, "grad_norm": 0.31908440589904785, "learning_rate": 4.2697755581190464e-05, "loss": 0.3697, "step": 5982 }, { "epoch": 0.974310955502178, "grad_norm": 0.2991389036178589, "learning_rate": 4.2694409493741204e-05, "loss": 0.3327, "step": 5983 }, { "epoch": 0.974473802060009, "grad_norm": 0.34590843319892883, "learning_rate": 4.2691062771009624e-05, "loss": 0.3656, "step": 5984 }, { "epoch": 0.9746366486178398, "grad_norm": 0.2993263900279999, "learning_rate": 4.26877154131159e-05, "loss": 0.3414, "step": 5985 }, { "epoch": 0.9747994951756708, "grad_norm": 0.26697975397109985, "learning_rate": 4.26843674201802e-05, "loss": 0.2802, "step": 5986 }, { "epoch": 0.9749623417335016, "grad_norm": 0.33254098892211914, "learning_rate": 4.268101879232272e-05, "loss": 0.3322, "step": 5987 }, { "epoch": 0.9751251882913325, "grad_norm": 0.34186503291130066, "learning_rate": 4.267766952966369e-05, "loss": 0.3531, "step": 5988 }, { "epoch": 0.9752880348491634, "grad_norm": 0.3252999782562256, "learning_rate": 4.267431963232336e-05, "loss": 0.3563, "step": 5989 }, { "epoch": 0.9754508814069942, "grad_norm": 0.3011332154273987, "learning_rate": 4.267096910042201e-05, "loss": 0.3695, "step": 5990 }, { "epoch": 0.9756137279648252, "grad_norm": 0.3619767129421234, "learning_rate": 4.266761793407992e-05, "loss": 0.3739, "step": 5991 }, { "epoch": 0.975776574522656, "grad_norm": 0.36820968985557556, "learning_rate": 4.266426613341741e-05, "loss": 0.3668, "step": 5992 }, { "epoch": 0.9759394210804869, "grad_norm": 0.30655133724212646, "learning_rate": 4.266091369855482e-05, "loss": 0.3426, "step": 5993 }, { "epoch": 0.9761022676383178, "grad_norm": 0.37563517689704895, "learning_rate": 4.265756062961252e-05, "loss": 0.4022, "step": 5994 }, { "epoch": 0.9762651141961487, "grad_norm": 0.3160140812397003, "learning_rate": 4.265420692671089e-05, "loss": 0.3827, "step": 5995 }, { "epoch": 0.9764279607539795, "grad_norm": 0.33316245675086975, "learning_rate": 4.265085258997034e-05, "loss": 0.3517, "step": 5996 }, { "epoch": 0.9765908073118105, "grad_norm": 0.4144081473350525, "learning_rate": 4.264749761951129e-05, "loss": 0.395, "step": 5997 }, { "epoch": 0.9767536538696413, "grad_norm": 0.3679368793964386, "learning_rate": 4.264414201545421e-05, "loss": 0.4208, "step": 5998 }, { "epoch": 0.9769165004274722, "grad_norm": 0.3788396120071411, "learning_rate": 4.264078577791957e-05, "loss": 0.3914, "step": 5999 }, { "epoch": 0.9770793469853031, "grad_norm": 0.31999850273132324, "learning_rate": 4.263742890702785e-05, "loss": 0.34, "step": 6000 }, { "epoch": 0.977242193543134, "grad_norm": 0.2940697968006134, "learning_rate": 4.263407140289961e-05, "loss": 0.3523, "step": 6001 }, { "epoch": 0.9774050401009649, "grad_norm": 0.29756394028663635, "learning_rate": 4.2630713265655364e-05, "loss": 0.3294, "step": 6002 }, { "epoch": 0.9775678866587958, "grad_norm": 0.358915776014328, "learning_rate": 4.262735449541569e-05, "loss": 0.389, "step": 6003 }, { "epoch": 0.9777307332166266, "grad_norm": 0.2435058206319809, "learning_rate": 4.262399509230118e-05, "loss": 0.3284, "step": 6004 }, { "epoch": 0.9778935797744576, "grad_norm": 0.3243318200111389, "learning_rate": 4.2620635056432444e-05, "loss": 0.3364, "step": 6005 }, { "epoch": 0.9780564263322884, "grad_norm": 0.32375308871269226, "learning_rate": 4.2617274387930104e-05, "loss": 0.3803, "step": 6006 }, { "epoch": 0.9782192728901193, "grad_norm": 0.28215470910072327, "learning_rate": 4.261391308691485e-05, "loss": 0.3677, "step": 6007 }, { "epoch": 0.9783821194479502, "grad_norm": 0.28523242473602295, "learning_rate": 4.261055115350733e-05, "loss": 0.3621, "step": 6008 }, { "epoch": 0.978544966005781, "grad_norm": 0.3009653389453888, "learning_rate": 4.260718858782826e-05, "loss": 0.3626, "step": 6009 }, { "epoch": 0.978707812563612, "grad_norm": 0.3382701277732849, "learning_rate": 4.260382538999837e-05, "loss": 0.3353, "step": 6010 }, { "epoch": 0.9788706591214428, "grad_norm": 0.31035926938056946, "learning_rate": 4.260046156013841e-05, "loss": 0.3405, "step": 6011 }, { "epoch": 0.9790335056792737, "grad_norm": 0.3444042205810547, "learning_rate": 4.2597097098369146e-05, "loss": 0.3782, "step": 6012 }, { "epoch": 0.9791963522371046, "grad_norm": 0.33684858679771423, "learning_rate": 4.259373200481137e-05, "loss": 0.3343, "step": 6013 }, { "epoch": 0.9793591987949355, "grad_norm": 0.36679232120513916, "learning_rate": 4.2590366279585916e-05, "loss": 0.4017, "step": 6014 }, { "epoch": 0.9795220453527663, "grad_norm": 0.2724775969982147, "learning_rate": 4.25869999228136e-05, "loss": 0.3353, "step": 6015 }, { "epoch": 0.9796848919105973, "grad_norm": 0.40890711545944214, "learning_rate": 4.258363293461529e-05, "loss": 0.3666, "step": 6016 }, { "epoch": 0.9798477384684281, "grad_norm": 0.3067302107810974, "learning_rate": 4.2580265315111876e-05, "loss": 0.3429, "step": 6017 }, { "epoch": 0.980010585026259, "grad_norm": 0.32378333806991577, "learning_rate": 4.2576897064424276e-05, "loss": 0.364, "step": 6018 }, { "epoch": 0.9801734315840899, "grad_norm": 0.3190210461616516, "learning_rate": 4.257352818267341e-05, "loss": 0.3943, "step": 6019 }, { "epoch": 0.9803362781419208, "grad_norm": 0.3470926582813263, "learning_rate": 4.257015866998021e-05, "loss": 0.355, "step": 6020 }, { "epoch": 0.9804991246997516, "grad_norm": 0.3072376847267151, "learning_rate": 4.256678852646569e-05, "loss": 0.3755, "step": 6021 }, { "epoch": 0.9806619712575826, "grad_norm": 0.39445003867149353, "learning_rate": 4.256341775225083e-05, "loss": 0.3935, "step": 6022 }, { "epoch": 0.9808248178154134, "grad_norm": 0.3508698046207428, "learning_rate": 4.256004634745665e-05, "loss": 0.3458, "step": 6023 }, { "epoch": 0.9809876643732444, "grad_norm": 0.3740207850933075, "learning_rate": 4.2556674312204195e-05, "loss": 0.3535, "step": 6024 }, { "epoch": 0.9811505109310752, "grad_norm": 0.3264593780040741, "learning_rate": 4.2553301646614524e-05, "loss": 0.3635, "step": 6025 }, { "epoch": 0.981313357488906, "grad_norm": 0.3542894721031189, "learning_rate": 4.254992835080874e-05, "loss": 0.3697, "step": 6026 }, { "epoch": 0.981476204046737, "grad_norm": 0.31853315234184265, "learning_rate": 4.254655442490795e-05, "loss": 0.3302, "step": 6027 }, { "epoch": 0.9816390506045678, "grad_norm": 0.36307764053344727, "learning_rate": 4.254317986903327e-05, "loss": 0.3896, "step": 6028 }, { "epoch": 0.9818018971623987, "grad_norm": 0.29014045000076294, "learning_rate": 4.253980468330588e-05, "loss": 0.3491, "step": 6029 }, { "epoch": 0.9819647437202296, "grad_norm": 0.2766018509864807, "learning_rate": 4.253642886784696e-05, "loss": 0.3654, "step": 6030 }, { "epoch": 0.9821275902780605, "grad_norm": 0.3254662752151489, "learning_rate": 4.2533052422777695e-05, "loss": 0.3566, "step": 6031 }, { "epoch": 0.9822904368358913, "grad_norm": 0.3490492105484009, "learning_rate": 4.2529675348219316e-05, "loss": 0.3544, "step": 6032 }, { "epoch": 0.9824532833937223, "grad_norm": 0.3071119487285614, "learning_rate": 4.2526297644293075e-05, "loss": 0.3668, "step": 6033 }, { "epoch": 0.9826161299515531, "grad_norm": 0.2946256995201111, "learning_rate": 4.252291931112024e-05, "loss": 0.3735, "step": 6034 }, { "epoch": 0.9827789765093841, "grad_norm": 0.27317318320274353, "learning_rate": 4.2519540348822094e-05, "loss": 0.3649, "step": 6035 }, { "epoch": 0.9829418230672149, "grad_norm": 0.31890982389450073, "learning_rate": 4.251616075751997e-05, "loss": 0.3469, "step": 6036 }, { "epoch": 0.9831046696250458, "grad_norm": 0.27904602885246277, "learning_rate": 4.251278053733519e-05, "loss": 0.3591, "step": 6037 }, { "epoch": 0.9832675161828767, "grad_norm": 0.30446457862854004, "learning_rate": 4.250939968838912e-05, "loss": 0.3584, "step": 6038 }, { "epoch": 0.9834303627407076, "grad_norm": 0.2908199727535248, "learning_rate": 4.250601821080314e-05, "loss": 0.3223, "step": 6039 }, { "epoch": 0.9835932092985384, "grad_norm": 0.3007378876209259, "learning_rate": 4.250263610469866e-05, "loss": 0.375, "step": 6040 }, { "epoch": 0.9837560558563694, "grad_norm": 0.33418959379196167, "learning_rate": 4.24992533701971e-05, "loss": 0.4032, "step": 6041 }, { "epoch": 0.9839189024142002, "grad_norm": 0.47491782903671265, "learning_rate": 4.249587000741991e-05, "loss": 0.3939, "step": 6042 }, { "epoch": 0.9840817489720312, "grad_norm": 0.23256610333919525, "learning_rate": 4.249248601648858e-05, "loss": 0.3304, "step": 6043 }, { "epoch": 0.984244595529862, "grad_norm": 0.31765642762184143, "learning_rate": 4.2489101397524585e-05, "loss": 0.3399, "step": 6044 }, { "epoch": 0.9844074420876928, "grad_norm": 0.31465914845466614, "learning_rate": 4.2485716150649454e-05, "loss": 0.3535, "step": 6045 }, { "epoch": 0.9845702886455238, "grad_norm": 0.3355167806148529, "learning_rate": 4.248233027598473e-05, "loss": 0.3226, "step": 6046 }, { "epoch": 0.9847331352033546, "grad_norm": 0.31415441632270813, "learning_rate": 4.247894377365197e-05, "loss": 0.3611, "step": 6047 }, { "epoch": 0.9848959817611855, "grad_norm": 0.3176732361316681, "learning_rate": 4.247555664377275e-05, "loss": 0.3463, "step": 6048 }, { "epoch": 0.9850588283190164, "grad_norm": 0.3427828252315521, "learning_rate": 4.2472168886468696e-05, "loss": 0.3824, "step": 6049 }, { "epoch": 0.9852216748768473, "grad_norm": 0.3266282379627228, "learning_rate": 4.2468780501861425e-05, "loss": 0.3538, "step": 6050 }, { "epoch": 0.9853845214346781, "grad_norm": 0.3360629379749298, "learning_rate": 4.2465391490072606e-05, "loss": 0.37, "step": 6051 }, { "epoch": 0.9855473679925091, "grad_norm": 0.3822832405567169, "learning_rate": 4.2462001851223894e-05, "loss": 0.4201, "step": 6052 }, { "epoch": 0.9857102145503399, "grad_norm": 0.28742554783821106, "learning_rate": 4.2458611585437e-05, "loss": 0.3343, "step": 6053 }, { "epoch": 0.9858730611081709, "grad_norm": 0.34541502594947815, "learning_rate": 4.245522069283365e-05, "loss": 0.38, "step": 6054 }, { "epoch": 0.9860359076660017, "grad_norm": 0.3135303258895874, "learning_rate": 4.2451829173535575e-05, "loss": 0.389, "step": 6055 }, { "epoch": 0.9861987542238326, "grad_norm": 0.39095860719680786, "learning_rate": 4.244843702766454e-05, "loss": 0.3522, "step": 6056 }, { "epoch": 0.9863616007816635, "grad_norm": 0.33909687399864197, "learning_rate": 4.244504425534235e-05, "loss": 0.3757, "step": 6057 }, { "epoch": 0.9865244473394944, "grad_norm": 0.297107458114624, "learning_rate": 4.24416508566908e-05, "loss": 0.3394, "step": 6058 }, { "epoch": 0.9866872938973252, "grad_norm": 0.2666410803794861, "learning_rate": 4.2438256831831724e-05, "loss": 0.3421, "step": 6059 }, { "epoch": 0.9868501404551562, "grad_norm": 0.4222930371761322, "learning_rate": 4.243486218088698e-05, "loss": 0.4277, "step": 6060 }, { "epoch": 0.987012987012987, "grad_norm": 0.349702388048172, "learning_rate": 4.243146690397846e-05, "loss": 0.3506, "step": 6061 }, { "epoch": 0.987175833570818, "grad_norm": 0.32593613862991333, "learning_rate": 4.242807100122804e-05, "loss": 0.3507, "step": 6062 }, { "epoch": 0.9873386801286488, "grad_norm": 0.3509102165699005, "learning_rate": 4.242467447275765e-05, "loss": 0.3344, "step": 6063 }, { "epoch": 0.9875015266864796, "grad_norm": 0.3903157413005829, "learning_rate": 4.242127731868926e-05, "loss": 0.3525, "step": 6064 }, { "epoch": 0.9876643732443106, "grad_norm": 0.4380998909473419, "learning_rate": 4.241787953914481e-05, "loss": 0.3981, "step": 6065 }, { "epoch": 0.9878272198021414, "grad_norm": 0.35886630415916443, "learning_rate": 4.241448113424629e-05, "loss": 0.3379, "step": 6066 }, { "epoch": 0.9879900663599723, "grad_norm": 0.3094140291213989, "learning_rate": 4.2411082104115727e-05, "loss": 0.3797, "step": 6067 }, { "epoch": 0.9881529129178032, "grad_norm": 0.3634208142757416, "learning_rate": 4.240768244887516e-05, "loss": 0.3667, "step": 6068 }, { "epoch": 0.9883157594756341, "grad_norm": 0.33630165457725525, "learning_rate": 4.240428216864663e-05, "loss": 0.4049, "step": 6069 }, { "epoch": 0.9884786060334649, "grad_norm": 0.2881421148777008, "learning_rate": 4.2400881263552225e-05, "loss": 0.3412, "step": 6070 }, { "epoch": 0.9886414525912959, "grad_norm": 0.2735201418399811, "learning_rate": 4.239747973371405e-05, "loss": 0.3328, "step": 6071 }, { "epoch": 0.9888042991491267, "grad_norm": 0.3267402648925781, "learning_rate": 4.239407757925423e-05, "loss": 0.3588, "step": 6072 }, { "epoch": 0.9889671457069577, "grad_norm": 0.3521595895290375, "learning_rate": 4.239067480029491e-05, "loss": 0.3239, "step": 6073 }, { "epoch": 0.9891299922647885, "grad_norm": 0.31658613681793213, "learning_rate": 4.2387271396958264e-05, "loss": 0.3328, "step": 6074 }, { "epoch": 0.9892928388226194, "grad_norm": 0.2850629985332489, "learning_rate": 4.238386736936648e-05, "loss": 0.3486, "step": 6075 }, { "epoch": 0.9894556853804503, "grad_norm": 0.30358627438545227, "learning_rate": 4.2380462717641774e-05, "loss": 0.3454, "step": 6076 }, { "epoch": 0.9896185319382812, "grad_norm": 0.37381038069725037, "learning_rate": 4.237705744190638e-05, "loss": 0.3355, "step": 6077 }, { "epoch": 0.989781378496112, "grad_norm": 0.39812004566192627, "learning_rate": 4.2373651542282565e-05, "loss": 0.3611, "step": 6078 }, { "epoch": 0.989944225053943, "grad_norm": 0.2674860954284668, "learning_rate": 4.2370245018892595e-05, "loss": 0.3251, "step": 6079 }, { "epoch": 0.9901070716117738, "grad_norm": 0.36088597774505615, "learning_rate": 4.23668378718588e-05, "loss": 0.3438, "step": 6080 }, { "epoch": 0.9902699181696046, "grad_norm": 0.32939133048057556, "learning_rate": 4.2363430101303484e-05, "loss": 0.3392, "step": 6081 }, { "epoch": 0.9904327647274356, "grad_norm": 0.356118381023407, "learning_rate": 4.2360021707349007e-05, "loss": 0.3793, "step": 6082 }, { "epoch": 0.9905956112852664, "grad_norm": 0.32938942313194275, "learning_rate": 4.235661269011775e-05, "loss": 0.3595, "step": 6083 }, { "epoch": 0.9907584578430974, "grad_norm": 0.30197957158088684, "learning_rate": 4.2353203049732084e-05, "loss": 0.3367, "step": 6084 }, { "epoch": 0.9909213044009282, "grad_norm": 0.31348085403442383, "learning_rate": 4.234979278631444e-05, "loss": 0.3449, "step": 6085 }, { "epoch": 0.9910841509587591, "grad_norm": 0.27922818064689636, "learning_rate": 4.234638189998725e-05, "loss": 0.3096, "step": 6086 }, { "epoch": 0.99124699751659, "grad_norm": 0.28035661578178406, "learning_rate": 4.2342970390872985e-05, "loss": 0.3382, "step": 6087 }, { "epoch": 0.9914098440744209, "grad_norm": 0.2912186086177826, "learning_rate": 4.233955825909411e-05, "loss": 0.3729, "step": 6088 }, { "epoch": 0.9915726906322517, "grad_norm": 0.805427074432373, "learning_rate": 4.233614550477316e-05, "loss": 0.3777, "step": 6089 }, { "epoch": 0.9917355371900827, "grad_norm": 0.34128403663635254, "learning_rate": 4.233273212803264e-05, "loss": 0.316, "step": 6090 }, { "epoch": 0.9918983837479135, "grad_norm": 0.34407201409339905, "learning_rate": 4.23293181289951e-05, "loss": 0.3812, "step": 6091 }, { "epoch": 0.9920612303057444, "grad_norm": 0.3586196303367615, "learning_rate": 4.232590350778312e-05, "loss": 0.392, "step": 6092 }, { "epoch": 0.9922240768635753, "grad_norm": 0.3193954527378082, "learning_rate": 4.23224882645193e-05, "loss": 0.3539, "step": 6093 }, { "epoch": 0.9923869234214062, "grad_norm": 0.3472837209701538, "learning_rate": 4.2319072399326246e-05, "loss": 0.3827, "step": 6094 }, { "epoch": 0.992549769979237, "grad_norm": 0.35405078530311584, "learning_rate": 4.2315655912326606e-05, "loss": 0.3664, "step": 6095 }, { "epoch": 0.992712616537068, "grad_norm": 0.2693786323070526, "learning_rate": 4.231223880364304e-05, "loss": 0.3488, "step": 6096 }, { "epoch": 0.9928754630948988, "grad_norm": 0.27635711431503296, "learning_rate": 4.230882107339824e-05, "loss": 0.3526, "step": 6097 }, { "epoch": 0.9930383096527298, "grad_norm": 0.31753426790237427, "learning_rate": 4.230540272171489e-05, "loss": 0.3705, "step": 6098 }, { "epoch": 0.9932011562105606, "grad_norm": 0.3380202054977417, "learning_rate": 4.230198374871575e-05, "loss": 0.3472, "step": 6099 }, { "epoch": 0.9933640027683914, "grad_norm": 0.4037318229675293, "learning_rate": 4.229856415452354e-05, "loss": 0.3765, "step": 6100 }, { "epoch": 0.9935268493262224, "grad_norm": 0.34051328897476196, "learning_rate": 4.229514393926105e-05, "loss": 0.3672, "step": 6101 }, { "epoch": 0.9936896958840532, "grad_norm": 0.2932374179363251, "learning_rate": 4.229172310305108e-05, "loss": 0.3683, "step": 6102 }, { "epoch": 0.9938525424418841, "grad_norm": 0.3371785581111908, "learning_rate": 4.2288301646016446e-05, "loss": 0.3368, "step": 6103 }, { "epoch": 0.994015388999715, "grad_norm": 0.3846682608127594, "learning_rate": 4.228487956827998e-05, "loss": 0.3664, "step": 6104 }, { "epoch": 0.9941782355575459, "grad_norm": 0.32315924763679504, "learning_rate": 4.2281456869964567e-05, "loss": 0.3709, "step": 6105 }, { "epoch": 0.9943410821153768, "grad_norm": 0.3279528021812439, "learning_rate": 4.227803355119305e-05, "loss": 0.3472, "step": 6106 }, { "epoch": 0.9945039286732077, "grad_norm": 0.37156909704208374, "learning_rate": 4.227460961208838e-05, "loss": 0.3455, "step": 6107 }, { "epoch": 0.9946667752310385, "grad_norm": 0.4308210611343384, "learning_rate": 4.227118505277347e-05, "loss": 0.3768, "step": 6108 }, { "epoch": 0.9948296217888695, "grad_norm": 0.32399895787239075, "learning_rate": 4.226775987337126e-05, "loss": 0.3628, "step": 6109 }, { "epoch": 0.9949924683467003, "grad_norm": 0.3640190064907074, "learning_rate": 4.2264334074004734e-05, "loss": 0.3591, "step": 6110 }, { "epoch": 0.9951553149045312, "grad_norm": 0.41553691029548645, "learning_rate": 4.22609076547969e-05, "loss": 0.363, "step": 6111 }, { "epoch": 0.9953181614623621, "grad_norm": 0.3198367655277252, "learning_rate": 4.2257480615870756e-05, "loss": 0.3365, "step": 6112 }, { "epoch": 0.995481008020193, "grad_norm": 0.3634047508239746, "learning_rate": 4.225405295734936e-05, "loss": 0.3956, "step": 6113 }, { "epoch": 0.9956438545780238, "grad_norm": 0.3084033131599426, "learning_rate": 4.2250624679355766e-05, "loss": 0.3612, "step": 6114 }, { "epoch": 0.9958067011358548, "grad_norm": 0.3439890742301941, "learning_rate": 4.224719578201306e-05, "loss": 0.3411, "step": 6115 }, { "epoch": 0.9959695476936856, "grad_norm": 0.3911621868610382, "learning_rate": 4.224376626544436e-05, "loss": 0.386, "step": 6116 }, { "epoch": 0.9961323942515166, "grad_norm": 0.3389756977558136, "learning_rate": 4.224033612977278e-05, "loss": 0.3806, "step": 6117 }, { "epoch": 0.9962952408093474, "grad_norm": 0.4983523488044739, "learning_rate": 4.223690537512148e-05, "loss": 0.4064, "step": 6118 }, { "epoch": 0.9964580873671782, "grad_norm": 0.39478376507759094, "learning_rate": 4.2233474001613635e-05, "loss": 0.3745, "step": 6119 }, { "epoch": 0.9966209339250092, "grad_norm": 0.33295655250549316, "learning_rate": 4.223004200937244e-05, "loss": 0.3795, "step": 6120 }, { "epoch": 0.99678378048284, "grad_norm": 0.27278631925582886, "learning_rate": 4.222660939852111e-05, "loss": 0.3655, "step": 6121 }, { "epoch": 0.9969466270406709, "grad_norm": 0.3572591543197632, "learning_rate": 4.222317616918289e-05, "loss": 0.3796, "step": 6122 }, { "epoch": 0.9971094735985018, "grad_norm": 0.3572358787059784, "learning_rate": 4.2219742321481055e-05, "loss": 0.378, "step": 6123 }, { "epoch": 0.9972723201563327, "grad_norm": 0.2856338918209076, "learning_rate": 4.221630785553887e-05, "loss": 0.3535, "step": 6124 }, { "epoch": 0.9974351667141635, "grad_norm": 0.2866969108581543, "learning_rate": 4.221287277147965e-05, "loss": 0.3452, "step": 6125 }, { "epoch": 0.9975980132719945, "grad_norm": 0.3403889238834381, "learning_rate": 4.2209437069426734e-05, "loss": 0.3261, "step": 6126 }, { "epoch": 0.9977608598298253, "grad_norm": 0.3075518012046814, "learning_rate": 4.220600074950346e-05, "loss": 0.3321, "step": 6127 }, { "epoch": 0.9979237063876563, "grad_norm": 0.3865841031074524, "learning_rate": 4.220256381183321e-05, "loss": 0.372, "step": 6128 }, { "epoch": 0.9980865529454871, "grad_norm": 0.318506121635437, "learning_rate": 4.2199126256539376e-05, "loss": 0.3141, "step": 6129 }, { "epoch": 0.998249399503318, "grad_norm": 0.32128143310546875, "learning_rate": 4.219568808374539e-05, "loss": 0.3957, "step": 6130 }, { "epoch": 0.9984122460611489, "grad_norm": 0.28657227754592896, "learning_rate": 4.2192249293574674e-05, "loss": 0.352, "step": 6131 }, { "epoch": 0.9985750926189798, "grad_norm": 0.3190858066082001, "learning_rate": 4.21888098861507e-05, "loss": 0.3323, "step": 6132 }, { "epoch": 0.9987379391768106, "grad_norm": 0.32951128482818604, "learning_rate": 4.2185369861596946e-05, "loss": 0.3557, "step": 6133 }, { "epoch": 0.9989007857346416, "grad_norm": 0.3429606258869171, "learning_rate": 4.2181929220036936e-05, "loss": 0.3295, "step": 6134 }, { "epoch": 0.9990636322924724, "grad_norm": 0.2809049189090729, "learning_rate": 4.217848796159418e-05, "loss": 0.3456, "step": 6135 }, { "epoch": 0.9992264788503034, "grad_norm": 0.3353903293609619, "learning_rate": 4.2175046086392246e-05, "loss": 0.3518, "step": 6136 }, { "epoch": 0.9993893254081342, "grad_norm": 0.3273230493068695, "learning_rate": 4.2171603594554695e-05, "loss": 0.37, "step": 6137 }, { "epoch": 0.999552171965965, "grad_norm": 0.40022504329681396, "learning_rate": 4.2168160486205136e-05, "loss": 0.4017, "step": 6138 }, { "epoch": 0.999715018523796, "grad_norm": 0.3256341218948364, "learning_rate": 4.216471676146716e-05, "loss": 0.3703, "step": 6139 }, { "epoch": 0.9998778650816268, "grad_norm": 0.29119983315467834, "learning_rate": 4.216127242046444e-05, "loss": 0.3762, "step": 6140 }, { "epoch": 1.0, "grad_norm": 0.29119983315467834, "learning_rate": 4.216127242046444e-05, "loss": 0.3442, "step": 6141 }, { "epoch": 1.000162846557831, "grad_norm": 0.16794952750205994, "learning_rate": 4.215782746332062e-05, "loss": 0.3352, "step": 6142 }, { "epoch": 1.0003256931156617, "grad_norm": 0.0895671620965004, "learning_rate": 4.2154381890159385e-05, "loss": 0.3386, "step": 6143 }, { "epoch": 1.0004885396734926, "grad_norm": 0.230873703956604, "learning_rate": 4.215093570110444e-05, "loss": 0.303, "step": 6144 }, { "epoch": 1.0006513862313235, "grad_norm": 0.11649920791387558, "learning_rate": 4.214748889627953e-05, "loss": 0.282, "step": 6145 }, { "epoch": 1.0008142327891545, "grad_norm": 0.109907366335392, "learning_rate": 4.214404147580839e-05, "loss": 0.3044, "step": 6146 }, { "epoch": 1.0009770793469852, "grad_norm": 0.10484790056943893, "learning_rate": 4.214059343981479e-05, "loss": 0.2927, "step": 6147 }, { "epoch": 1.0011399259048162, "grad_norm": 0.15800972282886505, "learning_rate": 4.213714478842253e-05, "loss": 0.3291, "step": 6148 }, { "epoch": 1.001302772462647, "grad_norm": 0.10833205282688141, "learning_rate": 4.213369552175544e-05, "loss": 0.2891, "step": 6149 }, { "epoch": 1.001465619020478, "grad_norm": 0.15033948421478271, "learning_rate": 4.213024563993734e-05, "loss": 0.3045, "step": 6150 }, { "epoch": 1.0016284655783088, "grad_norm": 0.09942686557769775, "learning_rate": 4.21267951430921e-05, "loss": 0.3216, "step": 6151 }, { "epoch": 1.0017913121361397, "grad_norm": 0.14550556242465973, "learning_rate": 4.212334403134359e-05, "loss": 0.3087, "step": 6152 }, { "epoch": 1.0019541586939706, "grad_norm": 0.11179196089506149, "learning_rate": 4.211989230481574e-05, "loss": 0.3062, "step": 6153 }, { "epoch": 1.0021170052518016, "grad_norm": 0.13612203299999237, "learning_rate": 4.211643996363246e-05, "loss": 0.3237, "step": 6154 }, { "epoch": 1.0022798518096323, "grad_norm": 0.12963619828224182, "learning_rate": 4.2112987007917704e-05, "loss": 0.3111, "step": 6155 }, { "epoch": 1.0024426983674632, "grad_norm": 0.1464671492576599, "learning_rate": 4.2109533437795434e-05, "loss": 0.3486, "step": 6156 }, { "epoch": 1.0026055449252942, "grad_norm": 0.17978748679161072, "learning_rate": 4.2106079253389665e-05, "loss": 0.334, "step": 6157 }, { "epoch": 1.0027683914831251, "grad_norm": 0.1434178650379181, "learning_rate": 4.210262445482439e-05, "loss": 0.3137, "step": 6158 }, { "epoch": 1.0029312380409559, "grad_norm": 0.1119498461484909, "learning_rate": 4.209916904222366e-05, "loss": 0.3242, "step": 6159 }, { "epoch": 1.0030940845987868, "grad_norm": 0.12576034665107727, "learning_rate": 4.2095713015711526e-05, "loss": 0.3426, "step": 6160 }, { "epoch": 1.0032569311566177, "grad_norm": 0.08365581929683685, "learning_rate": 4.209225637541208e-05, "loss": 0.3405, "step": 6161 }, { "epoch": 1.0034197777144485, "grad_norm": 0.13647909462451935, "learning_rate": 4.208879912144942e-05, "loss": 0.2934, "step": 6162 }, { "epoch": 1.0035826242722794, "grad_norm": 0.11476879566907883, "learning_rate": 4.208534125394767e-05, "loss": 0.2692, "step": 6163 }, { "epoch": 1.0037454708301103, "grad_norm": 0.19494859874248505, "learning_rate": 4.208188277303098e-05, "loss": 0.3408, "step": 6164 }, { "epoch": 1.0039083173879413, "grad_norm": 0.08719773590564728, "learning_rate": 4.207842367882352e-05, "loss": 0.3544, "step": 6165 }, { "epoch": 1.004071163945772, "grad_norm": 0.12186774611473083, "learning_rate": 4.207496397144949e-05, "loss": 0.3056, "step": 6166 }, { "epoch": 1.004234010503603, "grad_norm": 0.11137384921312332, "learning_rate": 4.207150365103308e-05, "loss": 0.3242, "step": 6167 }, { "epoch": 1.004396857061434, "grad_norm": 0.19761019945144653, "learning_rate": 4.206804271769855e-05, "loss": 0.3448, "step": 6168 }, { "epoch": 1.0045597036192648, "grad_norm": 0.1486184298992157, "learning_rate": 4.206458117157015e-05, "loss": 0.3003, "step": 6169 }, { "epoch": 1.0047225501770956, "grad_norm": 0.1146242767572403, "learning_rate": 4.2061119012772154e-05, "loss": 0.3323, "step": 6170 }, { "epoch": 1.0048853967349265, "grad_norm": 0.16765308380126953, "learning_rate": 4.2057656241428874e-05, "loss": 0.3539, "step": 6171 }, { "epoch": 1.0050482432927574, "grad_norm": 0.12051648646593094, "learning_rate": 4.205419285766463e-05, "loss": 0.2967, "step": 6172 }, { "epoch": 1.0052110898505884, "grad_norm": 0.12717804312705994, "learning_rate": 4.205072886160376e-05, "loss": 0.2757, "step": 6173 }, { "epoch": 1.005373936408419, "grad_norm": 0.12273933738470078, "learning_rate": 4.204726425337064e-05, "loss": 0.2948, "step": 6174 }, { "epoch": 1.00553678296625, "grad_norm": 0.10417983680963516, "learning_rate": 4.204379903308967e-05, "loss": 0.359, "step": 6175 }, { "epoch": 1.005699629524081, "grad_norm": 0.13910819590091705, "learning_rate": 4.2040333200885236e-05, "loss": 0.3474, "step": 6176 }, { "epoch": 1.005862476081912, "grad_norm": 0.10102292895317078, "learning_rate": 4.2036866756881785e-05, "loss": 0.3405, "step": 6177 }, { "epoch": 1.0060253226397426, "grad_norm": 0.15179064869880676, "learning_rate": 4.2033399701203785e-05, "loss": 0.3144, "step": 6178 }, { "epoch": 1.0061881691975736, "grad_norm": 0.21012865006923676, "learning_rate": 4.2029932033975694e-05, "loss": 0.3986, "step": 6179 }, { "epoch": 1.0063510157554045, "grad_norm": 0.13089074194431305, "learning_rate": 4.202646375532203e-05, "loss": 0.2989, "step": 6180 }, { "epoch": 1.0065138623132353, "grad_norm": 0.09184910356998444, "learning_rate": 4.202299486536729e-05, "loss": 0.2862, "step": 6181 }, { "epoch": 1.0066767088710662, "grad_norm": 0.14890354871749878, "learning_rate": 4.201952536423605e-05, "loss": 0.2903, "step": 6182 }, { "epoch": 1.0068395554288971, "grad_norm": 0.1548161804676056, "learning_rate": 4.201605525205284e-05, "loss": 0.3186, "step": 6183 }, { "epoch": 1.007002401986728, "grad_norm": 0.0936872586607933, "learning_rate": 4.2012584528942275e-05, "loss": 0.2828, "step": 6184 }, { "epoch": 1.0071652485445588, "grad_norm": 0.10092689841985703, "learning_rate": 4.200911319502895e-05, "loss": 0.3438, "step": 6185 }, { "epoch": 1.0073280951023897, "grad_norm": 0.10849665105342865, "learning_rate": 4.20056412504375e-05, "loss": 0.3114, "step": 6186 }, { "epoch": 1.0074909416602207, "grad_norm": 0.12134731560945511, "learning_rate": 4.200216869529257e-05, "loss": 0.2987, "step": 6187 }, { "epoch": 1.0076537882180516, "grad_norm": 0.16389313340187073, "learning_rate": 4.199869552971887e-05, "loss": 0.3013, "step": 6188 }, { "epoch": 1.0078166347758823, "grad_norm": 0.10608090460300446, "learning_rate": 4.199522175384105e-05, "loss": 0.2818, "step": 6189 }, { "epoch": 1.0079794813337133, "grad_norm": 0.12017296254634857, "learning_rate": 4.1991747367783856e-05, "loss": 0.2956, "step": 6190 }, { "epoch": 1.0081423278915442, "grad_norm": 0.15806810557842255, "learning_rate": 4.198827237167202e-05, "loss": 0.3018, "step": 6191 }, { "epoch": 1.0083051744493752, "grad_norm": 0.17889921367168427, "learning_rate": 4.198479676563032e-05, "loss": 0.353, "step": 6192 }, { "epoch": 1.008468021007206, "grad_norm": 0.12309638410806656, "learning_rate": 4.198132054978351e-05, "loss": 0.2937, "step": 6193 }, { "epoch": 1.0086308675650368, "grad_norm": 0.10402143001556396, "learning_rate": 4.197784372425643e-05, "loss": 0.2953, "step": 6194 }, { "epoch": 1.0087937141228678, "grad_norm": 0.11913610249757767, "learning_rate": 4.197436628917388e-05, "loss": 0.2903, "step": 6195 }, { "epoch": 1.0089565606806987, "grad_norm": 0.12341047823429108, "learning_rate": 4.197088824466073e-05, "loss": 0.3015, "step": 6196 }, { "epoch": 1.0091194072385294, "grad_norm": 0.1295827329158783, "learning_rate": 4.196740959084185e-05, "loss": 0.3518, "step": 6197 }, { "epoch": 1.0092822537963604, "grad_norm": 0.11578652262687683, "learning_rate": 4.1963930327842125e-05, "loss": 0.321, "step": 6198 }, { "epoch": 1.0094451003541913, "grad_norm": 0.06268744170665741, "learning_rate": 4.196045045578648e-05, "loss": 0.3184, "step": 6199 }, { "epoch": 1.009607946912022, "grad_norm": 0.13106577098369598, "learning_rate": 4.1956969974799844e-05, "loss": 0.2875, "step": 6200 }, { "epoch": 1.009770793469853, "grad_norm": 0.1522214263677597, "learning_rate": 4.195348888500719e-05, "loss": 0.2923, "step": 6201 }, { "epoch": 1.009933640027684, "grad_norm": 0.10547595471143723, "learning_rate": 4.195000718653348e-05, "loss": 0.2856, "step": 6202 }, { "epoch": 1.0100964865855149, "grad_norm": 0.08843553811311722, "learning_rate": 4.1946524879503726e-05, "loss": 0.2774, "step": 6203 }, { "epoch": 1.0102593331433456, "grad_norm": 0.08673998713493347, "learning_rate": 4.1943041964042967e-05, "loss": 0.3357, "step": 6204 }, { "epoch": 1.0104221797011765, "grad_norm": 0.11558045446872711, "learning_rate": 4.193955844027623e-05, "loss": 0.3049, "step": 6205 }, { "epoch": 1.0105850262590075, "grad_norm": 0.12176476418972015, "learning_rate": 4.19360743083286e-05, "loss": 0.3449, "step": 6206 }, { "epoch": 1.0107478728168384, "grad_norm": 0.13867387175559998, "learning_rate": 4.193258956832516e-05, "loss": 0.3469, "step": 6207 }, { "epoch": 1.0109107193746691, "grad_norm": 0.12393641471862793, "learning_rate": 4.1929104220391016e-05, "loss": 0.2936, "step": 6208 }, { "epoch": 1.0110735659325, "grad_norm": 0.1255662590265274, "learning_rate": 4.192561826465132e-05, "loss": 0.2874, "step": 6209 }, { "epoch": 1.011236412490331, "grad_norm": 0.14565341174602509, "learning_rate": 4.192213170123121e-05, "loss": 0.3149, "step": 6210 }, { "epoch": 1.011399259048162, "grad_norm": 0.12804646790027618, "learning_rate": 4.1918644530255875e-05, "loss": 0.2965, "step": 6211 }, { "epoch": 1.0115621056059927, "grad_norm": 0.11657164990901947, "learning_rate": 4.1915156751850514e-05, "loss": 0.3343, "step": 6212 }, { "epoch": 1.0117249521638236, "grad_norm": 0.16194887459278107, "learning_rate": 4.191166836614034e-05, "loss": 0.3454, "step": 6213 }, { "epoch": 1.0118877987216546, "grad_norm": 0.21441450715065002, "learning_rate": 4.19081793732506e-05, "loss": 0.3274, "step": 6214 }, { "epoch": 1.0120506452794853, "grad_norm": 0.08620448410511017, "learning_rate": 4.190468977330657e-05, "loss": 0.2836, "step": 6215 }, { "epoch": 1.0122134918373162, "grad_norm": 0.1561928242444992, "learning_rate": 4.190119956643353e-05, "loss": 0.302, "step": 6216 }, { "epoch": 1.0123763383951472, "grad_norm": 0.1380760669708252, "learning_rate": 4.189770875275679e-05, "loss": 0.2997, "step": 6217 }, { "epoch": 1.0125391849529781, "grad_norm": 0.08194991946220398, "learning_rate": 4.189421733240167e-05, "loss": 0.3144, "step": 6218 }, { "epoch": 1.0127020315108088, "grad_norm": 0.1521800458431244, "learning_rate": 4.1890725305493536e-05, "loss": 0.3538, "step": 6219 }, { "epoch": 1.0128648780686398, "grad_norm": 0.07255478203296661, "learning_rate": 4.188723267215776e-05, "loss": 0.2726, "step": 6220 }, { "epoch": 1.0130277246264707, "grad_norm": 0.1326034665107727, "learning_rate": 4.188373943251973e-05, "loss": 0.3125, "step": 6221 }, { "epoch": 1.0131905711843017, "grad_norm": 0.12689988315105438, "learning_rate": 4.1880245586704875e-05, "loss": 0.3047, "step": 6222 }, { "epoch": 1.0133534177421324, "grad_norm": 0.11646435409784317, "learning_rate": 4.187675113483863e-05, "loss": 0.334, "step": 6223 }, { "epoch": 1.0135162642999633, "grad_norm": 0.16135872900485992, "learning_rate": 4.187325607704645e-05, "loss": 0.3029, "step": 6224 }, { "epoch": 1.0136791108577943, "grad_norm": 0.15225397050380707, "learning_rate": 4.186976041345382e-05, "loss": 0.3406, "step": 6225 }, { "epoch": 1.0138419574156252, "grad_norm": 0.1018940657377243, "learning_rate": 4.186626414418625e-05, "loss": 0.3395, "step": 6226 }, { "epoch": 1.014004803973456, "grad_norm": 0.09908059984445572, "learning_rate": 4.1862767269369276e-05, "loss": 0.2761, "step": 6227 }, { "epoch": 1.0141676505312869, "grad_norm": 0.11853709816932678, "learning_rate": 4.185926978912842e-05, "loss": 0.2962, "step": 6228 }, { "epoch": 1.0143304970891178, "grad_norm": 0.0959373190999031, "learning_rate": 4.1855771703589274e-05, "loss": 0.3244, "step": 6229 }, { "epoch": 1.0144933436469488, "grad_norm": 0.1184442862868309, "learning_rate": 4.185227301287743e-05, "loss": 0.3033, "step": 6230 }, { "epoch": 1.0146561902047795, "grad_norm": 0.11389324814081192, "learning_rate": 4.184877371711848e-05, "loss": 0.3119, "step": 6231 }, { "epoch": 1.0148190367626104, "grad_norm": 0.12137935310602188, "learning_rate": 4.184527381643808e-05, "loss": 0.2851, "step": 6232 }, { "epoch": 1.0149818833204414, "grad_norm": 0.1088041439652443, "learning_rate": 4.184177331096188e-05, "loss": 0.3036, "step": 6233 }, { "epoch": 1.015144729878272, "grad_norm": 0.11135740578174591, "learning_rate": 4.183827220081555e-05, "loss": 0.299, "step": 6234 }, { "epoch": 1.015307576436103, "grad_norm": 0.2472606897354126, "learning_rate": 4.183477048612481e-05, "loss": 0.3246, "step": 6235 }, { "epoch": 1.015470422993934, "grad_norm": 0.11050759255886078, "learning_rate": 4.183126816701537e-05, "loss": 0.3213, "step": 6236 }, { "epoch": 1.015633269551765, "grad_norm": 0.1666816771030426, "learning_rate": 4.1827765243612975e-05, "loss": 0.3443, "step": 6237 }, { "epoch": 1.0157961161095956, "grad_norm": 0.07197434455156326, "learning_rate": 4.182426171604339e-05, "loss": 0.307, "step": 6238 }, { "epoch": 1.0159589626674266, "grad_norm": 0.09750358760356903, "learning_rate": 4.1820757584432405e-05, "loss": 0.3321, "step": 6239 }, { "epoch": 1.0161218092252575, "grad_norm": 0.1350092589855194, "learning_rate": 4.181725284890582e-05, "loss": 0.2895, "step": 6240 }, { "epoch": 1.0162846557830885, "grad_norm": 0.12184930592775345, "learning_rate": 4.181374750958947e-05, "loss": 0.3758, "step": 6241 }, { "epoch": 1.0164475023409192, "grad_norm": 0.12484266608953476, "learning_rate": 4.181024156660922e-05, "loss": 0.3019, "step": 6242 }, { "epoch": 1.0166103488987501, "grad_norm": 0.14660365879535675, "learning_rate": 4.180673502009093e-05, "loss": 0.3398, "step": 6243 }, { "epoch": 1.016773195456581, "grad_norm": 0.1577538549900055, "learning_rate": 4.180322787016049e-05, "loss": 0.3319, "step": 6244 }, { "epoch": 1.016936042014412, "grad_norm": 0.1265946477651596, "learning_rate": 4.179972011694383e-05, "loss": 0.3121, "step": 6245 }, { "epoch": 1.0170988885722427, "grad_norm": 0.11896396428346634, "learning_rate": 4.1796211760566884e-05, "loss": 0.318, "step": 6246 }, { "epoch": 1.0172617351300737, "grad_norm": 0.08469334989786148, "learning_rate": 4.179270280115561e-05, "loss": 0.3142, "step": 6247 }, { "epoch": 1.0174245816879046, "grad_norm": 0.08091965317726135, "learning_rate": 4.1789193238836e-05, "loss": 0.2889, "step": 6248 }, { "epoch": 1.0175874282457356, "grad_norm": 0.12954473495483398, "learning_rate": 4.178568307373405e-05, "loss": 0.3429, "step": 6249 }, { "epoch": 1.0177502748035663, "grad_norm": 0.4391576647758484, "learning_rate": 4.178217230597578e-05, "loss": 0.3787, "step": 6250 }, { "epoch": 1.0179131213613972, "grad_norm": 0.08187320083379745, "learning_rate": 4.1778660935687255e-05, "loss": 0.3024, "step": 6251 }, { "epoch": 1.0180759679192282, "grad_norm": 0.12111485004425049, "learning_rate": 4.1775148962994526e-05, "loss": 0.2936, "step": 6252 }, { "epoch": 1.0182388144770589, "grad_norm": 0.1062973365187645, "learning_rate": 4.177163638802369e-05, "loss": 0.2958, "step": 6253 }, { "epoch": 1.0184016610348898, "grad_norm": 0.13636177778244019, "learning_rate": 4.176812321090086e-05, "loss": 0.3029, "step": 6254 }, { "epoch": 1.0185645075927208, "grad_norm": 0.12673896551132202, "learning_rate": 4.176460943175217e-05, "loss": 0.3386, "step": 6255 }, { "epoch": 1.0187273541505517, "grad_norm": 0.11732669174671173, "learning_rate": 4.176109505070376e-05, "loss": 0.2983, "step": 6256 }, { "epoch": 1.0188902007083824, "grad_norm": 0.11088727414608002, "learning_rate": 4.175758006788183e-05, "loss": 0.2795, "step": 6257 }, { "epoch": 1.0190530472662134, "grad_norm": 0.22392240166664124, "learning_rate": 4.175406448341257e-05, "loss": 0.3856, "step": 6258 }, { "epoch": 1.0192158938240443, "grad_norm": 0.10810667276382446, "learning_rate": 4.17505482974222e-05, "loss": 0.325, "step": 6259 }, { "epoch": 1.0193787403818753, "grad_norm": 0.1026899591088295, "learning_rate": 4.174703151003695e-05, "loss": 0.3047, "step": 6260 }, { "epoch": 1.019541586939706, "grad_norm": 0.08854638785123825, "learning_rate": 4.17435141213831e-05, "loss": 0.3025, "step": 6261 }, { "epoch": 1.019704433497537, "grad_norm": 0.10841448605060577, "learning_rate": 4.173999613158693e-05, "loss": 0.3085, "step": 6262 }, { "epoch": 1.0198672800553679, "grad_norm": 0.13713522255420685, "learning_rate": 4.173647754077474e-05, "loss": 0.2765, "step": 6263 }, { "epoch": 1.0200301266131988, "grad_norm": 0.0939662829041481, "learning_rate": 4.173295834907286e-05, "loss": 0.2903, "step": 6264 }, { "epoch": 1.0201929731710295, "grad_norm": 0.11091411113739014, "learning_rate": 4.172943855660765e-05, "loss": 0.2838, "step": 6265 }, { "epoch": 1.0203558197288605, "grad_norm": 0.14847604930400848, "learning_rate": 4.1725918163505476e-05, "loss": 0.3367, "step": 6266 }, { "epoch": 1.0205186662866914, "grad_norm": 0.14966922998428345, "learning_rate": 4.1722397169892716e-05, "loss": 0.3307, "step": 6267 }, { "epoch": 1.0206815128445224, "grad_norm": 0.06976257264614105, "learning_rate": 4.17188755758958e-05, "loss": 0.318, "step": 6268 }, { "epoch": 1.020844359402353, "grad_norm": 0.08757726848125458, "learning_rate": 4.1715353381641166e-05, "loss": 0.3335, "step": 6269 }, { "epoch": 1.021007205960184, "grad_norm": 0.1035088449716568, "learning_rate": 4.1711830587255266e-05, "loss": 0.2988, "step": 6270 }, { "epoch": 1.021170052518015, "grad_norm": 0.12872052192687988, "learning_rate": 4.1708307192864565e-05, "loss": 0.3025, "step": 6271 }, { "epoch": 1.0213328990758457, "grad_norm": 0.1418086141347885, "learning_rate": 4.170478319859559e-05, "loss": 0.3032, "step": 6272 }, { "epoch": 1.0214957456336766, "grad_norm": 0.14634309709072113, "learning_rate": 4.170125860457483e-05, "loss": 0.3276, "step": 6273 }, { "epoch": 1.0216585921915076, "grad_norm": 0.1535785049200058, "learning_rate": 4.169773341092887e-05, "loss": 0.3223, "step": 6274 }, { "epoch": 1.0218214387493385, "grad_norm": 0.11407813429832458, "learning_rate": 4.169420761778424e-05, "loss": 0.3548, "step": 6275 }, { "epoch": 1.0219842853071692, "grad_norm": 0.11818213015794754, "learning_rate": 4.169068122526755e-05, "loss": 0.2908, "step": 6276 }, { "epoch": 1.0221471318650002, "grad_norm": 0.11852046102285385, "learning_rate": 4.1687154233505385e-05, "loss": 0.3347, "step": 6277 }, { "epoch": 1.0223099784228311, "grad_norm": 0.12432296574115753, "learning_rate": 4.168362664262439e-05, "loss": 0.3359, "step": 6278 }, { "epoch": 1.022472824980662, "grad_norm": 0.14404094219207764, "learning_rate": 4.1680098452751215e-05, "loss": 0.3035, "step": 6279 }, { "epoch": 1.0226356715384928, "grad_norm": 0.16453969478607178, "learning_rate": 4.1676569664012535e-05, "loss": 0.3047, "step": 6280 }, { "epoch": 1.0227985180963237, "grad_norm": 0.07788839936256409, "learning_rate": 4.1673040276535036e-05, "loss": 0.2734, "step": 6281 }, { "epoch": 1.0229613646541547, "grad_norm": 0.15812774002552032, "learning_rate": 4.1669510290445435e-05, "loss": 0.3218, "step": 6282 }, { "epoch": 1.0231242112119856, "grad_norm": 0.08271922916173935, "learning_rate": 4.1665979705870475e-05, "loss": 0.2877, "step": 6283 }, { "epoch": 1.0232870577698163, "grad_norm": 0.16037149727344513, "learning_rate": 4.1662448522936906e-05, "loss": 0.3164, "step": 6284 }, { "epoch": 1.0234499043276473, "grad_norm": 0.12849532067775726, "learning_rate": 4.165891674177151e-05, "loss": 0.3056, "step": 6285 }, { "epoch": 1.0236127508854782, "grad_norm": 0.09576205164194107, "learning_rate": 4.16553843625011e-05, "loss": 0.3152, "step": 6286 }, { "epoch": 1.0237755974433091, "grad_norm": 0.09578805416822433, "learning_rate": 4.165185138525249e-05, "loss": 0.3017, "step": 6287 }, { "epoch": 1.0239384440011399, "grad_norm": 0.1174480989575386, "learning_rate": 4.164831781015253e-05, "loss": 0.3253, "step": 6288 }, { "epoch": 1.0241012905589708, "grad_norm": 0.1083892360329628, "learning_rate": 4.1644783637328066e-05, "loss": 0.3466, "step": 6289 }, { "epoch": 1.0242641371168018, "grad_norm": 0.1371266096830368, "learning_rate": 4.1641248866906016e-05, "loss": 0.2914, "step": 6290 }, { "epoch": 1.0244269836746325, "grad_norm": 0.11971254646778107, "learning_rate": 4.1637713499013264e-05, "loss": 0.3212, "step": 6291 }, { "epoch": 1.0245898302324634, "grad_norm": 0.12185835093259811, "learning_rate": 4.163417753377675e-05, "loss": 0.2904, "step": 6292 }, { "epoch": 1.0247526767902944, "grad_norm": 0.09983968734741211, "learning_rate": 4.163064097132343e-05, "loss": 0.266, "step": 6293 }, { "epoch": 1.0249155233481253, "grad_norm": 0.13187819719314575, "learning_rate": 4.1627103811780264e-05, "loss": 0.2952, "step": 6294 }, { "epoch": 1.025078369905956, "grad_norm": 0.11968602240085602, "learning_rate": 4.162356605527426e-05, "loss": 0.301, "step": 6295 }, { "epoch": 1.025241216463787, "grad_norm": 0.11948058009147644, "learning_rate": 4.162002770193243e-05, "loss": 0.3151, "step": 6296 }, { "epoch": 1.025404063021618, "grad_norm": 0.17095915973186493, "learning_rate": 4.161648875188181e-05, "loss": 0.3341, "step": 6297 }, { "epoch": 1.0255669095794488, "grad_norm": 0.10414610058069229, "learning_rate": 4.1612949205249456e-05, "loss": 0.2586, "step": 6298 }, { "epoch": 1.0257297561372796, "grad_norm": 0.09817184507846832, "learning_rate": 4.160940906216246e-05, "loss": 0.2777, "step": 6299 }, { "epoch": 1.0258926026951105, "grad_norm": 0.09814576804637909, "learning_rate": 4.160586832274791e-05, "loss": 0.2858, "step": 6300 }, { "epoch": 1.0260554492529415, "grad_norm": 0.16295063495635986, "learning_rate": 4.160232698713293e-05, "loss": 0.2791, "step": 6301 }, { "epoch": 1.0262182958107724, "grad_norm": 0.1566469967365265, "learning_rate": 4.159878505544468e-05, "loss": 0.3173, "step": 6302 }, { "epoch": 1.0263811423686031, "grad_norm": 0.1354741007089615, "learning_rate": 4.1595242527810305e-05, "loss": 0.276, "step": 6303 }, { "epoch": 1.026543988926434, "grad_norm": 0.13424716889858246, "learning_rate": 4.159169940435701e-05, "loss": 0.3156, "step": 6304 }, { "epoch": 1.026706835484265, "grad_norm": 0.13213443756103516, "learning_rate": 4.158815568521199e-05, "loss": 0.315, "step": 6305 }, { "epoch": 1.026869682042096, "grad_norm": 0.15212146937847137, "learning_rate": 4.158461137050249e-05, "loss": 0.3, "step": 6306 }, { "epoch": 1.0270325285999267, "grad_norm": 0.09397560358047485, "learning_rate": 4.158106646035575e-05, "loss": 0.3479, "step": 6307 }, { "epoch": 1.0271953751577576, "grad_norm": 0.1310860514640808, "learning_rate": 4.157752095489904e-05, "loss": 0.3124, "step": 6308 }, { "epoch": 1.0273582217155885, "grad_norm": 0.10343627631664276, "learning_rate": 4.157397485425967e-05, "loss": 0.2839, "step": 6309 }, { "epoch": 1.0275210682734193, "grad_norm": 0.13195376098155975, "learning_rate": 4.1570428158564935e-05, "loss": 0.2818, "step": 6310 }, { "epoch": 1.0276839148312502, "grad_norm": 0.12679411470890045, "learning_rate": 4.156688086794219e-05, "loss": 0.2811, "step": 6311 }, { "epoch": 1.0278467613890812, "grad_norm": 0.11704973131418228, "learning_rate": 4.156333298251878e-05, "loss": 0.3064, "step": 6312 }, { "epoch": 1.028009607946912, "grad_norm": 0.07966373860836029, "learning_rate": 4.1559784502422096e-05, "loss": 0.3051, "step": 6313 }, { "epoch": 1.0281724545047428, "grad_norm": 0.10410062968730927, "learning_rate": 4.155623542777953e-05, "loss": 0.3037, "step": 6314 }, { "epoch": 1.0283353010625738, "grad_norm": 0.11124390363693237, "learning_rate": 4.155268575871851e-05, "loss": 0.273, "step": 6315 }, { "epoch": 1.0284981476204047, "grad_norm": 0.11077386885881424, "learning_rate": 4.154913549536648e-05, "loss": 0.3551, "step": 6316 }, { "epoch": 1.0286609941782356, "grad_norm": 0.13740839064121246, "learning_rate": 4.1545584637850904e-05, "loss": 0.3303, "step": 6317 }, { "epoch": 1.0288238407360664, "grad_norm": 0.09114281833171844, "learning_rate": 4.1542033186299266e-05, "loss": 0.333, "step": 6318 }, { "epoch": 1.0289866872938973, "grad_norm": 0.11069668084383011, "learning_rate": 4.153848114083907e-05, "loss": 0.3088, "step": 6319 }, { "epoch": 1.0291495338517282, "grad_norm": 0.11272017657756805, "learning_rate": 4.153492850159786e-05, "loss": 0.3271, "step": 6320 }, { "epoch": 1.0293123804095592, "grad_norm": 0.11936356127262115, "learning_rate": 4.1531375268703176e-05, "loss": 0.3259, "step": 6321 }, { "epoch": 1.02947522696739, "grad_norm": 0.12169528752565384, "learning_rate": 4.152782144228258e-05, "loss": 0.2932, "step": 6322 }, { "epoch": 1.0296380735252209, "grad_norm": 0.14691251516342163, "learning_rate": 4.152426702246369e-05, "loss": 0.3008, "step": 6323 }, { "epoch": 1.0298009200830518, "grad_norm": 0.12453523278236389, "learning_rate": 4.152071200937409e-05, "loss": 0.2954, "step": 6324 }, { "epoch": 1.0299637666408827, "grad_norm": 0.13547712564468384, "learning_rate": 4.1517156403141444e-05, "loss": 0.3168, "step": 6325 }, { "epoch": 1.0301266131987135, "grad_norm": 0.09218473732471466, "learning_rate": 4.151360020389339e-05, "loss": 0.302, "step": 6326 }, { "epoch": 1.0302894597565444, "grad_norm": 0.078832246363163, "learning_rate": 4.1510043411757615e-05, "loss": 0.3208, "step": 6327 }, { "epoch": 1.0304523063143753, "grad_norm": 0.14573630690574646, "learning_rate": 4.1506486026861815e-05, "loss": 0.322, "step": 6328 }, { "epoch": 1.030615152872206, "grad_norm": 0.165574848651886, "learning_rate": 4.150292804933371e-05, "loss": 0.3262, "step": 6329 }, { "epoch": 1.030777999430037, "grad_norm": 0.13960540294647217, "learning_rate": 4.1499369479301054e-05, "loss": 0.2973, "step": 6330 }, { "epoch": 1.030940845987868, "grad_norm": 0.1506124585866928, "learning_rate": 4.149581031689159e-05, "loss": 0.3368, "step": 6331 }, { "epoch": 1.031103692545699, "grad_norm": 0.11257033795118332, "learning_rate": 4.1492250562233114e-05, "loss": 0.2995, "step": 6332 }, { "epoch": 1.0312665391035296, "grad_norm": 0.0931965634226799, "learning_rate": 4.1488690215453436e-05, "loss": 0.3553, "step": 6333 }, { "epoch": 1.0314293856613606, "grad_norm": 0.1303136795759201, "learning_rate": 4.148512927668037e-05, "loss": 0.2739, "step": 6334 }, { "epoch": 1.0315922322191915, "grad_norm": 0.1418936401605606, "learning_rate": 4.148156774604178e-05, "loss": 0.2955, "step": 6335 }, { "epoch": 1.0317550787770224, "grad_norm": 0.11926375329494476, "learning_rate": 4.147800562366552e-05, "loss": 0.3493, "step": 6336 }, { "epoch": 1.0319179253348532, "grad_norm": 0.08589069545269012, "learning_rate": 4.14744429096795e-05, "loss": 0.2469, "step": 6337 }, { "epoch": 1.032080771892684, "grad_norm": 0.1279081553220749, "learning_rate": 4.147087960421161e-05, "loss": 0.3059, "step": 6338 }, { "epoch": 1.032243618450515, "grad_norm": 0.16654300689697266, "learning_rate": 4.14673157073898e-05, "loss": 0.3271, "step": 6339 }, { "epoch": 1.032406465008346, "grad_norm": 0.07972478121519089, "learning_rate": 4.1463751219342014e-05, "loss": 0.3378, "step": 6340 }, { "epoch": 1.0325693115661767, "grad_norm": 0.11759083718061447, "learning_rate": 4.146018614019624e-05, "loss": 0.2954, "step": 6341 }, { "epoch": 1.0327321581240076, "grad_norm": 0.18566687405109406, "learning_rate": 4.145662047008046e-05, "loss": 0.3077, "step": 6342 }, { "epoch": 1.0328950046818386, "grad_norm": 0.13861125707626343, "learning_rate": 4.1453054209122696e-05, "loss": 0.3238, "step": 6343 }, { "epoch": 1.0330578512396693, "grad_norm": 0.11635569483041763, "learning_rate": 4.1449487357451e-05, "loss": 0.2971, "step": 6344 }, { "epoch": 1.0332206977975003, "grad_norm": 0.13342002034187317, "learning_rate": 4.1445919915193425e-05, "loss": 0.3112, "step": 6345 }, { "epoch": 1.0333835443553312, "grad_norm": 0.13573499023914337, "learning_rate": 4.1442351882478045e-05, "loss": 0.3231, "step": 6346 }, { "epoch": 1.0335463909131621, "grad_norm": 0.10786639153957367, "learning_rate": 4.143878325943298e-05, "loss": 0.331, "step": 6347 }, { "epoch": 1.0337092374709929, "grad_norm": 0.11128062009811401, "learning_rate": 4.143521404618633e-05, "loss": 0.2987, "step": 6348 }, { "epoch": 1.0338720840288238, "grad_norm": 0.14366762340068817, "learning_rate": 4.1431644242866266e-05, "loss": 0.3179, "step": 6349 }, { "epoch": 1.0340349305866547, "grad_norm": 0.08856307715177536, "learning_rate": 4.1428073849600945e-05, "loss": 0.2846, "step": 6350 }, { "epoch": 1.0341977771444857, "grad_norm": 0.1610044240951538, "learning_rate": 4.142450286651854e-05, "loss": 0.3113, "step": 6351 }, { "epoch": 1.0343606237023164, "grad_norm": 0.09141842275857925, "learning_rate": 4.142093129374728e-05, "loss": 0.36, "step": 6352 }, { "epoch": 1.0345234702601473, "grad_norm": 0.09506196528673172, "learning_rate": 4.141735913141539e-05, "loss": 0.2957, "step": 6353 }, { "epoch": 1.0346863168179783, "grad_norm": 0.09268748015165329, "learning_rate": 4.141378637965112e-05, "loss": 0.2887, "step": 6354 }, { "epoch": 1.0348491633758092, "grad_norm": 0.13539229333400726, "learning_rate": 4.141021303858274e-05, "loss": 0.2892, "step": 6355 }, { "epoch": 1.03501200993364, "grad_norm": 0.09478522092103958, "learning_rate": 4.140663910833854e-05, "loss": 0.2744, "step": 6356 }, { "epoch": 1.035174856491471, "grad_norm": 0.16384917497634888, "learning_rate": 4.140306458904685e-05, "loss": 0.2928, "step": 6357 }, { "epoch": 1.0353377030493018, "grad_norm": 0.0970563068985939, "learning_rate": 4.1399489480835985e-05, "loss": 0.2754, "step": 6358 }, { "epoch": 1.0355005496071328, "grad_norm": 0.08171680569648743, "learning_rate": 4.1395913783834325e-05, "loss": 0.3172, "step": 6359 }, { "epoch": 1.0356633961649635, "grad_norm": 0.16556455194950104, "learning_rate": 4.139233749817023e-05, "loss": 0.2813, "step": 6360 }, { "epoch": 1.0358262427227944, "grad_norm": 0.13884907960891724, "learning_rate": 4.138876062397211e-05, "loss": 0.3241, "step": 6361 }, { "epoch": 1.0359890892806254, "grad_norm": 0.11295422911643982, "learning_rate": 4.138518316136838e-05, "loss": 0.2924, "step": 6362 }, { "epoch": 1.0361519358384563, "grad_norm": 0.1055593490600586, "learning_rate": 4.138160511048749e-05, "loss": 0.3011, "step": 6363 }, { "epoch": 1.036314782396287, "grad_norm": 0.0955401062965393, "learning_rate": 4.137802647145788e-05, "loss": 0.2975, "step": 6364 }, { "epoch": 1.036477628954118, "grad_norm": 0.1310938447713852, "learning_rate": 4.137444724440807e-05, "loss": 0.323, "step": 6365 }, { "epoch": 1.036640475511949, "grad_norm": 0.10173720866441727, "learning_rate": 4.137086742946652e-05, "loss": 0.3073, "step": 6366 }, { "epoch": 1.0368033220697797, "grad_norm": 0.16578130424022675, "learning_rate": 4.13672870267618e-05, "loss": 0.2652, "step": 6367 }, { "epoch": 1.0369661686276106, "grad_norm": 0.06177275627851486, "learning_rate": 4.136370603642243e-05, "loss": 0.2848, "step": 6368 }, { "epoch": 1.0371290151854415, "grad_norm": 0.15951913595199585, "learning_rate": 4.1360124458576986e-05, "loss": 0.3021, "step": 6369 }, { "epoch": 1.0372918617432725, "grad_norm": 0.13223841786384583, "learning_rate": 4.1356542293354064e-05, "loss": 0.2772, "step": 6370 }, { "epoch": 1.0374547083011032, "grad_norm": 0.12002333253622055, "learning_rate": 4.135295954088226e-05, "loss": 0.3016, "step": 6371 }, { "epoch": 1.0376175548589341, "grad_norm": 0.11371214687824249, "learning_rate": 4.1349376201290226e-05, "loss": 0.3135, "step": 6372 }, { "epoch": 1.037780401416765, "grad_norm": 0.11405675858259201, "learning_rate": 4.134579227470659e-05, "loss": 0.3325, "step": 6373 }, { "epoch": 1.037943247974596, "grad_norm": 0.2748304605484009, "learning_rate": 4.1342207761260044e-05, "loss": 0.3465, "step": 6374 }, { "epoch": 1.0381060945324267, "grad_norm": 0.10753801465034485, "learning_rate": 4.1338622661079276e-05, "loss": 0.2703, "step": 6375 }, { "epoch": 1.0382689410902577, "grad_norm": 0.10854783654212952, "learning_rate": 4.133503697429301e-05, "loss": 0.2843, "step": 6376 }, { "epoch": 1.0384317876480886, "grad_norm": 0.11502925306558609, "learning_rate": 4.133145070102996e-05, "loss": 0.2919, "step": 6377 }, { "epoch": 1.0385946342059196, "grad_norm": 0.15476223826408386, "learning_rate": 4.13278638414189e-05, "loss": 0.2616, "step": 6378 }, { "epoch": 1.0387574807637503, "grad_norm": 0.16169202327728271, "learning_rate": 4.1324276395588626e-05, "loss": 0.3726, "step": 6379 }, { "epoch": 1.0389203273215812, "grad_norm": 0.1328195333480835, "learning_rate": 4.13206883636679e-05, "loss": 0.3006, "step": 6380 }, { "epoch": 1.0390831738794122, "grad_norm": 0.11852680146694183, "learning_rate": 4.131709974578558e-05, "loss": 0.3188, "step": 6381 }, { "epoch": 1.039246020437243, "grad_norm": 0.12188111990690231, "learning_rate": 4.131351054207049e-05, "loss": 0.2995, "step": 6382 }, { "epoch": 1.0394088669950738, "grad_norm": 0.16886018216609955, "learning_rate": 4.1309920752651485e-05, "loss": 0.2937, "step": 6383 }, { "epoch": 1.0395717135529048, "grad_norm": 0.18706801533699036, "learning_rate": 4.130633037765746e-05, "loss": 0.3268, "step": 6384 }, { "epoch": 1.0397345601107357, "grad_norm": 0.205872043967247, "learning_rate": 4.130273941721733e-05, "loss": 0.3477, "step": 6385 }, { "epoch": 1.0398974066685664, "grad_norm": 0.1272006779909134, "learning_rate": 4.129914787146e-05, "loss": 0.307, "step": 6386 }, { "epoch": 1.0400602532263974, "grad_norm": 0.1427989900112152, "learning_rate": 4.1295555740514436e-05, "loss": 0.3427, "step": 6387 }, { "epoch": 1.0402230997842283, "grad_norm": 0.13152049481868744, "learning_rate": 4.129196302450959e-05, "loss": 0.3469, "step": 6388 }, { "epoch": 1.0403859463420593, "grad_norm": 0.08737733215093613, "learning_rate": 4.128836972357447e-05, "loss": 0.2812, "step": 6389 }, { "epoch": 1.04054879289989, "grad_norm": 0.11211428791284561, "learning_rate": 4.128477583783807e-05, "loss": 0.3365, "step": 6390 }, { "epoch": 1.040711639457721, "grad_norm": 0.10991501063108444, "learning_rate": 4.128118136742942e-05, "loss": 0.2589, "step": 6391 }, { "epoch": 1.0408744860155519, "grad_norm": 0.10723240673542023, "learning_rate": 4.127758631247759e-05, "loss": 0.3132, "step": 6392 }, { "epoch": 1.0410373325733828, "grad_norm": 0.129766047000885, "learning_rate": 4.127399067311164e-05, "loss": 0.3194, "step": 6393 }, { "epoch": 1.0412001791312135, "grad_norm": 0.1290326863527298, "learning_rate": 4.127039444946067e-05, "loss": 0.3219, "step": 6394 }, { "epoch": 1.0413630256890445, "grad_norm": 0.12652431428432465, "learning_rate": 4.1266797641653784e-05, "loss": 0.3235, "step": 6395 }, { "epoch": 1.0415258722468754, "grad_norm": 0.15793175995349884, "learning_rate": 4.1263200249820125e-05, "loss": 0.2818, "step": 6396 }, { "epoch": 1.0416887188047064, "grad_norm": 0.11369152367115021, "learning_rate": 4.125960227408886e-05, "loss": 0.3286, "step": 6397 }, { "epoch": 1.041851565362537, "grad_norm": 0.08404883742332458, "learning_rate": 4.125600371458915e-05, "loss": 0.312, "step": 6398 }, { "epoch": 1.042014411920368, "grad_norm": 0.10711844265460968, "learning_rate": 4.125240457145021e-05, "loss": 0.3175, "step": 6399 }, { "epoch": 1.042177258478199, "grad_norm": 0.12685377895832062, "learning_rate": 4.124880484480124e-05, "loss": 0.3143, "step": 6400 }, { "epoch": 1.0423401050360297, "grad_norm": 0.15749265253543854, "learning_rate": 4.124520453477151e-05, "loss": 0.361, "step": 6401 }, { "epoch": 1.0425029515938606, "grad_norm": 0.10316163301467896, "learning_rate": 4.124160364149025e-05, "loss": 0.2766, "step": 6402 }, { "epoch": 1.0426657981516916, "grad_norm": 0.1570059210062027, "learning_rate": 4.123800216508676e-05, "loss": 0.2899, "step": 6403 }, { "epoch": 1.0428286447095225, "grad_norm": 0.13081806898117065, "learning_rate": 4.123440010569035e-05, "loss": 0.3368, "step": 6404 }, { "epoch": 1.0429914912673532, "grad_norm": 0.08678782731294632, "learning_rate": 4.1230797463430337e-05, "loss": 0.2898, "step": 6405 }, { "epoch": 1.0431543378251842, "grad_norm": 0.1847866028547287, "learning_rate": 4.122719423843606e-05, "loss": 0.3413, "step": 6406 }, { "epoch": 1.0433171843830151, "grad_norm": 0.13254418969154358, "learning_rate": 4.1223590430836894e-05, "loss": 0.2645, "step": 6407 }, { "epoch": 1.043480030940846, "grad_norm": 0.13467584550380707, "learning_rate": 4.1219986040762224e-05, "loss": 0.2698, "step": 6408 }, { "epoch": 1.0436428774986768, "grad_norm": 0.13999886810779572, "learning_rate": 4.1216381068341466e-05, "loss": 0.3229, "step": 6409 }, { "epoch": 1.0438057240565077, "grad_norm": 0.16513487696647644, "learning_rate": 4.1212775513704033e-05, "loss": 0.3159, "step": 6410 }, { "epoch": 1.0439685706143387, "grad_norm": 0.1570923924446106, "learning_rate": 4.1209169376979386e-05, "loss": 0.3186, "step": 6411 }, { "epoch": 1.0441314171721696, "grad_norm": 0.12067223340272903, "learning_rate": 4.1205562658297e-05, "loss": 0.3053, "step": 6412 }, { "epoch": 1.0442942637300003, "grad_norm": 0.14021503925323486, "learning_rate": 4.120195535778635e-05, "loss": 0.3079, "step": 6413 }, { "epoch": 1.0444571102878313, "grad_norm": 0.1584201455116272, "learning_rate": 4.119834747557697e-05, "loss": 0.3365, "step": 6414 }, { "epoch": 1.0446199568456622, "grad_norm": 0.15869571268558502, "learning_rate": 4.1194739011798383e-05, "loss": 0.3232, "step": 6415 }, { "epoch": 1.0447828034034932, "grad_norm": 0.09310127049684525, "learning_rate": 4.119112996658015e-05, "loss": 0.338, "step": 6416 }, { "epoch": 1.0449456499613239, "grad_norm": 0.16434872150421143, "learning_rate": 4.1187520340051825e-05, "loss": 0.3131, "step": 6417 }, { "epoch": 1.0451084965191548, "grad_norm": 0.13527831435203552, "learning_rate": 4.118391013234303e-05, "loss": 0.2743, "step": 6418 }, { "epoch": 1.0452713430769858, "grad_norm": 0.0744151920080185, "learning_rate": 4.118029934358337e-05, "loss": 0.3205, "step": 6419 }, { "epoch": 1.0454341896348165, "grad_norm": 0.1750897467136383, "learning_rate": 4.11766879739025e-05, "loss": 0.3352, "step": 6420 }, { "epoch": 1.0455970361926474, "grad_norm": 0.1080302819609642, "learning_rate": 4.1173076023430044e-05, "loss": 0.3172, "step": 6421 }, { "epoch": 1.0457598827504784, "grad_norm": 0.08886876702308655, "learning_rate": 4.116946349229571e-05, "loss": 0.2944, "step": 6422 }, { "epoch": 1.0459227293083093, "grad_norm": 0.1023910865187645, "learning_rate": 4.11658503806292e-05, "loss": 0.3017, "step": 6423 }, { "epoch": 1.04608557586614, "grad_norm": 0.11800073087215424, "learning_rate": 4.116223668856023e-05, "loss": 0.2769, "step": 6424 }, { "epoch": 1.046248422423971, "grad_norm": 0.11252546310424805, "learning_rate": 4.115862241621853e-05, "loss": 0.3224, "step": 6425 }, { "epoch": 1.046411268981802, "grad_norm": 0.12611719965934753, "learning_rate": 4.1155007563733875e-05, "loss": 0.3203, "step": 6426 }, { "epoch": 1.0465741155396329, "grad_norm": 0.14948336780071259, "learning_rate": 4.115139213123604e-05, "loss": 0.2939, "step": 6427 }, { "epoch": 1.0467369620974636, "grad_norm": 0.08930575847625732, "learning_rate": 4.1147776118854844e-05, "loss": 0.2878, "step": 6428 }, { "epoch": 1.0468998086552945, "grad_norm": 0.06760214269161224, "learning_rate": 4.1144159526720106e-05, "loss": 0.3127, "step": 6429 }, { "epoch": 1.0470626552131255, "grad_norm": 0.0872267335653305, "learning_rate": 4.1140542354961664e-05, "loss": 0.2765, "step": 6430 }, { "epoch": 1.0472255017709564, "grad_norm": 0.12333992123603821, "learning_rate": 4.1136924603709406e-05, "loss": 0.3546, "step": 6431 }, { "epoch": 1.0473883483287871, "grad_norm": 0.3042480945587158, "learning_rate": 4.11333062730932e-05, "loss": 0.3237, "step": 6432 }, { "epoch": 1.047551194886618, "grad_norm": 0.09535694122314453, "learning_rate": 4.1129687363242956e-05, "loss": 0.2978, "step": 6433 }, { "epoch": 1.047714041444449, "grad_norm": 0.12525075674057007, "learning_rate": 4.112606787428862e-05, "loss": 0.2999, "step": 6434 }, { "epoch": 1.0478768880022797, "grad_norm": 0.13309365510940552, "learning_rate": 4.1122447806360126e-05, "loss": 0.3039, "step": 6435 }, { "epoch": 1.0480397345601107, "grad_norm": 0.11377657204866409, "learning_rate": 4.1118827159587455e-05, "loss": 0.3137, "step": 6436 }, { "epoch": 1.0482025811179416, "grad_norm": 0.11769222468137741, "learning_rate": 4.111520593410059e-05, "loss": 0.3284, "step": 6437 }, { "epoch": 1.0483654276757726, "grad_norm": 0.12471546977758408, "learning_rate": 4.1111584130029557e-05, "loss": 0.3266, "step": 6438 }, { "epoch": 1.0485282742336033, "grad_norm": 0.14368170499801636, "learning_rate": 4.1107961747504374e-05, "loss": 0.3102, "step": 6439 }, { "epoch": 1.0486911207914342, "grad_norm": 0.16222871840000153, "learning_rate": 4.1104338786655113e-05, "loss": 0.3112, "step": 6440 }, { "epoch": 1.0488539673492652, "grad_norm": 0.0953105241060257, "learning_rate": 4.110071524761184e-05, "loss": 0.2803, "step": 6441 }, { "epoch": 1.0490168139070961, "grad_norm": 0.1306803971529007, "learning_rate": 4.109709113050464e-05, "loss": 0.2734, "step": 6442 }, { "epoch": 1.0491796604649268, "grad_norm": 0.10854443907737732, "learning_rate": 4.1093466435463645e-05, "loss": 0.3104, "step": 6443 }, { "epoch": 1.0493425070227578, "grad_norm": 0.1568976193666458, "learning_rate": 4.108984116261899e-05, "loss": 0.3219, "step": 6444 }, { "epoch": 1.0495053535805887, "grad_norm": 0.11421667039394379, "learning_rate": 4.108621531210084e-05, "loss": 0.2878, "step": 6445 }, { "epoch": 1.0496682001384197, "grad_norm": 0.13799810409545898, "learning_rate": 4.108258888403935e-05, "loss": 0.2645, "step": 6446 }, { "epoch": 1.0498310466962504, "grad_norm": 0.0947251096367836, "learning_rate": 4.1078961878564736e-05, "loss": 0.3204, "step": 6447 }, { "epoch": 1.0499938932540813, "grad_norm": 0.11089742183685303, "learning_rate": 4.1075334295807225e-05, "loss": 0.3241, "step": 6448 }, { "epoch": 1.0501567398119123, "grad_norm": 0.12044576555490494, "learning_rate": 4.1071706135897036e-05, "loss": 0.261, "step": 6449 }, { "epoch": 1.0503195863697432, "grad_norm": 0.11543828994035721, "learning_rate": 4.1068077398964456e-05, "loss": 0.3114, "step": 6450 }, { "epoch": 1.050482432927574, "grad_norm": 0.13165637850761414, "learning_rate": 4.106444808513975e-05, "loss": 0.3141, "step": 6451 }, { "epoch": 1.0506452794854049, "grad_norm": 0.11527428776025772, "learning_rate": 4.106081819455323e-05, "loss": 0.302, "step": 6452 }, { "epoch": 1.0508081260432358, "grad_norm": 0.12040796875953674, "learning_rate": 4.105718772733522e-05, "loss": 0.3041, "step": 6453 }, { "epoch": 1.0509709726010668, "grad_norm": 0.1142316535115242, "learning_rate": 4.1053556683616055e-05, "loss": 0.3191, "step": 6454 }, { "epoch": 1.0511338191588975, "grad_norm": 0.18384681642055511, "learning_rate": 4.104992506352612e-05, "loss": 0.3229, "step": 6455 }, { "epoch": 1.0512966657167284, "grad_norm": 0.11747393757104874, "learning_rate": 4.104629286719578e-05, "loss": 0.2958, "step": 6456 }, { "epoch": 1.0514595122745594, "grad_norm": 0.09339719265699387, "learning_rate": 4.1042660094755444e-05, "loss": 0.3137, "step": 6457 }, { "epoch": 1.05162235883239, "grad_norm": 0.11147086322307587, "learning_rate": 4.103902674633555e-05, "loss": 0.2875, "step": 6458 }, { "epoch": 1.051785205390221, "grad_norm": 0.134502574801445, "learning_rate": 4.103539282206655e-05, "loss": 0.3202, "step": 6459 }, { "epoch": 1.051948051948052, "grad_norm": 0.1761142462491989, "learning_rate": 4.103175832207889e-05, "loss": 0.2985, "step": 6460 }, { "epoch": 1.052110898505883, "grad_norm": 0.14567376673221588, "learning_rate": 4.102812324650309e-05, "loss": 0.3194, "step": 6461 }, { "epoch": 1.0522737450637136, "grad_norm": 0.16269059479236603, "learning_rate": 4.102448759546963e-05, "loss": 0.3177, "step": 6462 }, { "epoch": 1.0524365916215446, "grad_norm": 0.1827513724565506, "learning_rate": 4.1020851369109065e-05, "loss": 0.3278, "step": 6463 }, { "epoch": 1.0525994381793755, "grad_norm": 0.11751002818346024, "learning_rate": 4.101721456755193e-05, "loss": 0.3103, "step": 6464 }, { "epoch": 1.0527622847372065, "grad_norm": 0.08044668287038803, "learning_rate": 4.101357719092881e-05, "loss": 0.3298, "step": 6465 }, { "epoch": 1.0529251312950372, "grad_norm": 0.12837466597557068, "learning_rate": 4.100993923937028e-05, "loss": 0.2693, "step": 6466 }, { "epoch": 1.0530879778528681, "grad_norm": 0.1511927992105484, "learning_rate": 4.100630071300697e-05, "loss": 0.2753, "step": 6467 }, { "epoch": 1.053250824410699, "grad_norm": 0.10600511729717255, "learning_rate": 4.1002661611969507e-05, "loss": 0.3081, "step": 6468 }, { "epoch": 1.05341367096853, "grad_norm": 0.10941321402788162, "learning_rate": 4.099902193638855e-05, "loss": 0.2787, "step": 6469 }, { "epoch": 1.0535765175263607, "grad_norm": 0.13526354730129242, "learning_rate": 4.099538168639477e-05, "loss": 0.3272, "step": 6470 }, { "epoch": 1.0537393640841917, "grad_norm": 0.19655318558216095, "learning_rate": 4.099174086211886e-05, "loss": 0.3404, "step": 6471 }, { "epoch": 1.0539022106420226, "grad_norm": 0.17434749007225037, "learning_rate": 4.098809946369154e-05, "loss": 0.3397, "step": 6472 }, { "epoch": 1.0540650571998533, "grad_norm": 0.0990394577383995, "learning_rate": 4.098445749124356e-05, "loss": 0.2865, "step": 6473 }, { "epoch": 1.0542279037576843, "grad_norm": 0.0957668200135231, "learning_rate": 4.098081494490566e-05, "loss": 0.352, "step": 6474 }, { "epoch": 1.0543907503155152, "grad_norm": 0.10329496115446091, "learning_rate": 4.097717182480861e-05, "loss": 0.3143, "step": 6475 }, { "epoch": 1.0545535968733462, "grad_norm": 0.12195643037557602, "learning_rate": 4.097352813108324e-05, "loss": 0.3326, "step": 6476 }, { "epoch": 1.0547164434311769, "grad_norm": 0.1339833289384842, "learning_rate": 4.0969883863860337e-05, "loss": 0.32, "step": 6477 }, { "epoch": 1.0548792899890078, "grad_norm": 0.19458065927028656, "learning_rate": 4.096623902327077e-05, "loss": 0.331, "step": 6478 }, { "epoch": 1.0550421365468388, "grad_norm": 0.1023450419306755, "learning_rate": 4.0962593609445376e-05, "loss": 0.3156, "step": 6479 }, { "epoch": 1.0552049831046697, "grad_norm": 0.08181213587522507, "learning_rate": 4.095894762251505e-05, "loss": 0.3313, "step": 6480 }, { "epoch": 1.0553678296625004, "grad_norm": 0.08509378135204315, "learning_rate": 4.09553010626107e-05, "loss": 0.2928, "step": 6481 }, { "epoch": 1.0555306762203314, "grad_norm": 0.11397818475961685, "learning_rate": 4.095165392986322e-05, "loss": 0.2838, "step": 6482 }, { "epoch": 1.0556935227781623, "grad_norm": 0.13535967469215393, "learning_rate": 4.0948006224403576e-05, "loss": 0.2932, "step": 6483 }, { "epoch": 1.0558563693359933, "grad_norm": 0.18049898743629456, "learning_rate": 4.0944357946362735e-05, "loss": 0.2984, "step": 6484 }, { "epoch": 1.056019215893824, "grad_norm": 0.1585858017206192, "learning_rate": 4.094070909587168e-05, "loss": 0.3043, "step": 6485 }, { "epoch": 1.056182062451655, "grad_norm": 0.14198049902915955, "learning_rate": 4.093705967306139e-05, "loss": 0.2832, "step": 6486 }, { "epoch": 1.0563449090094859, "grad_norm": 0.13173246383666992, "learning_rate": 4.093340967806292e-05, "loss": 0.3138, "step": 6487 }, { "epoch": 1.0565077555673168, "grad_norm": 0.1766088455915451, "learning_rate": 4.09297591110073e-05, "loss": 0.339, "step": 6488 }, { "epoch": 1.0566706021251475, "grad_norm": 0.16728803515434265, "learning_rate": 4.09261079720256e-05, "loss": 0.2939, "step": 6489 }, { "epoch": 1.0568334486829785, "grad_norm": 0.08328205347061157, "learning_rate": 4.0922456261248915e-05, "loss": 0.3187, "step": 6490 }, { "epoch": 1.0569962952408094, "grad_norm": 0.18306396901607513, "learning_rate": 4.091880397880834e-05, "loss": 0.322, "step": 6491 }, { "epoch": 1.0571591417986403, "grad_norm": 0.08259943127632141, "learning_rate": 4.091515112483501e-05, "loss": 0.2828, "step": 6492 }, { "epoch": 1.057321988356471, "grad_norm": 0.16057084500789642, "learning_rate": 4.091149769946006e-05, "loss": 0.3393, "step": 6493 }, { "epoch": 1.057484834914302, "grad_norm": 0.1693112850189209, "learning_rate": 4.09078437028147e-05, "loss": 0.3605, "step": 6494 }, { "epoch": 1.057647681472133, "grad_norm": 0.15613576769828796, "learning_rate": 4.0904189135030066e-05, "loss": 0.2966, "step": 6495 }, { "epoch": 1.0578105280299637, "grad_norm": 0.12696020305156708, "learning_rate": 4.09005339962374e-05, "loss": 0.307, "step": 6496 }, { "epoch": 1.0579733745877946, "grad_norm": 0.10237325727939606, "learning_rate": 4.089687828656792e-05, "loss": 0.3125, "step": 6497 }, { "epoch": 1.0581362211456256, "grad_norm": 0.13953343033790588, "learning_rate": 4.089322200615288e-05, "loss": 0.304, "step": 6498 }, { "epoch": 1.0582990677034565, "grad_norm": 0.1863505244255066, "learning_rate": 4.0889565155123554e-05, "loss": 0.3145, "step": 6499 }, { "epoch": 1.0584619142612872, "grad_norm": 0.1892157793045044, "learning_rate": 4.088590773361123e-05, "loss": 0.339, "step": 6500 }, { "epoch": 1.0586247608191182, "grad_norm": 0.19246919453144073, "learning_rate": 4.0882249741747233e-05, "loss": 0.3264, "step": 6501 }, { "epoch": 1.058787607376949, "grad_norm": 0.12542670965194702, "learning_rate": 4.087859117966288e-05, "loss": 0.2661, "step": 6502 }, { "epoch": 1.05895045393478, "grad_norm": 0.09329681843519211, "learning_rate": 4.087493204748953e-05, "loss": 0.301, "step": 6503 }, { "epoch": 1.0591133004926108, "grad_norm": 0.1316758096218109, "learning_rate": 4.0871272345358554e-05, "loss": 0.2841, "step": 6504 }, { "epoch": 1.0592761470504417, "grad_norm": 0.11301843076944351, "learning_rate": 4.0867612073401355e-05, "loss": 0.3155, "step": 6505 }, { "epoch": 1.0594389936082726, "grad_norm": 0.12633556127548218, "learning_rate": 4.086395123174934e-05, "loss": 0.3106, "step": 6506 }, { "epoch": 1.0596018401661036, "grad_norm": 0.16430066525936127, "learning_rate": 4.0860289820533944e-05, "loss": 0.3109, "step": 6507 }, { "epoch": 1.0597646867239343, "grad_norm": 0.12076854705810547, "learning_rate": 4.0856627839886623e-05, "loss": 0.2691, "step": 6508 }, { "epoch": 1.0599275332817653, "grad_norm": 0.1052497923374176, "learning_rate": 4.085296528993886e-05, "loss": 0.2964, "step": 6509 }, { "epoch": 1.0600903798395962, "grad_norm": 0.10066922754049301, "learning_rate": 4.0849302170822145e-05, "loss": 0.3078, "step": 6510 }, { "epoch": 1.060253226397427, "grad_norm": 0.08066663146018982, "learning_rate": 4.0845638482667994e-05, "loss": 0.3344, "step": 6511 }, { "epoch": 1.0604160729552579, "grad_norm": 0.1339479684829712, "learning_rate": 4.0841974225607946e-05, "loss": 0.2861, "step": 6512 }, { "epoch": 1.0605789195130888, "grad_norm": 0.1344396024942398, "learning_rate": 4.083830939977356e-05, "loss": 0.2904, "step": 6513 }, { "epoch": 1.0607417660709197, "grad_norm": 0.11990480870008469, "learning_rate": 4.083464400529642e-05, "loss": 0.2834, "step": 6514 }, { "epoch": 1.0609046126287505, "grad_norm": 0.18928635120391846, "learning_rate": 4.083097804230811e-05, "loss": 0.3142, "step": 6515 }, { "epoch": 1.0610674591865814, "grad_norm": 0.1314372420310974, "learning_rate": 4.082731151094026e-05, "loss": 0.3302, "step": 6516 }, { "epoch": 1.0612303057444123, "grad_norm": 0.08638086915016174, "learning_rate": 4.082364441132451e-05, "loss": 0.2986, "step": 6517 }, { "epoch": 1.0613931523022433, "grad_norm": 0.18432942032814026, "learning_rate": 4.081997674359251e-05, "loss": 0.3238, "step": 6518 }, { "epoch": 1.061555998860074, "grad_norm": 0.13626977801322937, "learning_rate": 4.0816308507875954e-05, "loss": 0.2996, "step": 6519 }, { "epoch": 1.061718845417905, "grad_norm": 0.11345792561769485, "learning_rate": 4.081263970430653e-05, "loss": 0.3027, "step": 6520 }, { "epoch": 1.061881691975736, "grad_norm": 0.07169169187545776, "learning_rate": 4.080897033301597e-05, "loss": 0.3483, "step": 6521 }, { "epoch": 1.0620445385335668, "grad_norm": 0.1531982272863388, "learning_rate": 4.080530039413601e-05, "loss": 0.3328, "step": 6522 }, { "epoch": 1.0622073850913976, "grad_norm": 0.15063582360744476, "learning_rate": 4.080162988779841e-05, "loss": 0.288, "step": 6523 }, { "epoch": 1.0623702316492285, "grad_norm": 0.13270393013954163, "learning_rate": 4.079795881413495e-05, "loss": 0.2806, "step": 6524 }, { "epoch": 1.0625330782070594, "grad_norm": 0.12500202655792236, "learning_rate": 4.079428717327744e-05, "loss": 0.3184, "step": 6525 }, { "epoch": 1.0626959247648902, "grad_norm": 0.09345503151416779, "learning_rate": 4.0790614965357704e-05, "loss": 0.323, "step": 6526 }, { "epoch": 1.062858771322721, "grad_norm": 0.11650926619768143, "learning_rate": 4.078694219050758e-05, "loss": 0.3182, "step": 6527 }, { "epoch": 1.063021617880552, "grad_norm": 0.2645052373409271, "learning_rate": 4.0783268848858925e-05, "loss": 0.3132, "step": 6528 }, { "epoch": 1.063184464438383, "grad_norm": 0.1349460333585739, "learning_rate": 4.077959494054363e-05, "loss": 0.311, "step": 6529 }, { "epoch": 1.063347310996214, "grad_norm": 0.1315019279718399, "learning_rate": 4.0775920465693603e-05, "loss": 0.2871, "step": 6530 }, { "epoch": 1.0635101575540447, "grad_norm": 0.11100945621728897, "learning_rate": 4.077224542444077e-05, "loss": 0.3022, "step": 6531 }, { "epoch": 1.0636730041118756, "grad_norm": 0.17260068655014038, "learning_rate": 4.0768569816917073e-05, "loss": 0.3079, "step": 6532 }, { "epoch": 1.0638358506697065, "grad_norm": 0.11596681922674179, "learning_rate": 4.0764893643254466e-05, "loss": 0.3472, "step": 6533 }, { "epoch": 1.0639986972275373, "grad_norm": 0.12732303142547607, "learning_rate": 4.0761216903584955e-05, "loss": 0.3142, "step": 6534 }, { "epoch": 1.0641615437853682, "grad_norm": 0.06962388008832932, "learning_rate": 4.075753959804053e-05, "loss": 0.3185, "step": 6535 }, { "epoch": 1.0643243903431991, "grad_norm": 0.1829998940229416, "learning_rate": 4.0753861726753215e-05, "loss": 0.318, "step": 6536 }, { "epoch": 1.06448723690103, "grad_norm": 0.11884647607803345, "learning_rate": 4.075018328985507e-05, "loss": 0.2997, "step": 6537 }, { "epoch": 1.0646500834588608, "grad_norm": 0.08029520511627197, "learning_rate": 4.074650428747816e-05, "loss": 0.3102, "step": 6538 }, { "epoch": 1.0648129300166917, "grad_norm": 0.12709034979343414, "learning_rate": 4.074282471975457e-05, "loss": 0.3208, "step": 6539 }, { "epoch": 1.0649757765745227, "grad_norm": 0.17934712767601013, "learning_rate": 4.0739144586816394e-05, "loss": 0.3658, "step": 6540 }, { "epoch": 1.0651386231323536, "grad_norm": 0.11420386284589767, "learning_rate": 4.073546388879579e-05, "loss": 0.3481, "step": 6541 }, { "epoch": 1.0653014696901844, "grad_norm": 0.14000347256660461, "learning_rate": 4.0731782625824865e-05, "loss": 0.3261, "step": 6542 }, { "epoch": 1.0654643162480153, "grad_norm": 0.11188580840826035, "learning_rate": 4.0728100798035826e-05, "loss": 0.3185, "step": 6543 }, { "epoch": 1.0656271628058462, "grad_norm": 0.195398211479187, "learning_rate": 4.072441840556084e-05, "loss": 0.3364, "step": 6544 }, { "epoch": 1.0657900093636772, "grad_norm": 0.15750430524349213, "learning_rate": 4.072073544853213e-05, "loss": 0.2874, "step": 6545 }, { "epoch": 1.065952855921508, "grad_norm": 0.1499447226524353, "learning_rate": 4.071705192708191e-05, "loss": 0.289, "step": 6546 }, { "epoch": 1.0661157024793388, "grad_norm": 0.13188156485557556, "learning_rate": 4.071336784134243e-05, "loss": 0.3145, "step": 6547 }, { "epoch": 1.0662785490371698, "grad_norm": 0.11176210641860962, "learning_rate": 4.070968319144597e-05, "loss": 0.3273, "step": 6548 }, { "epoch": 1.0664413955950005, "grad_norm": 0.06432512402534485, "learning_rate": 4.070599797752482e-05, "loss": 0.3074, "step": 6549 }, { "epoch": 1.0666042421528314, "grad_norm": 0.11056698113679886, "learning_rate": 4.070231219971129e-05, "loss": 0.3184, "step": 6550 }, { "epoch": 1.0667670887106624, "grad_norm": 0.16015204787254333, "learning_rate": 4.06986258581377e-05, "loss": 0.298, "step": 6551 }, { "epoch": 1.0669299352684933, "grad_norm": 0.130189448595047, "learning_rate": 4.0694938952936406e-05, "loss": 0.3213, "step": 6552 }, { "epoch": 1.067092781826324, "grad_norm": 0.12403954565525055, "learning_rate": 4.069125148423978e-05, "loss": 0.2782, "step": 6553 }, { "epoch": 1.067255628384155, "grad_norm": 0.13779431581497192, "learning_rate": 4.068756345218022e-05, "loss": 0.3012, "step": 6554 }, { "epoch": 1.067418474941986, "grad_norm": 0.07281094044446945, "learning_rate": 4.068387485689013e-05, "loss": 0.3024, "step": 6555 }, { "epoch": 1.0675813214998169, "grad_norm": 0.14172105491161346, "learning_rate": 4.0680185698501935e-05, "loss": 0.3011, "step": 6556 }, { "epoch": 1.0677441680576476, "grad_norm": 0.09583024680614471, "learning_rate": 4.06764959771481e-05, "loss": 0.2846, "step": 6557 }, { "epoch": 1.0679070146154785, "grad_norm": 0.08040963113307953, "learning_rate": 4.067280569296109e-05, "loss": 0.3043, "step": 6558 }, { "epoch": 1.0680698611733095, "grad_norm": 0.1725759357213974, "learning_rate": 4.06691148460734e-05, "loss": 0.3031, "step": 6559 }, { "epoch": 1.0682327077311404, "grad_norm": 0.19880735874176025, "learning_rate": 4.0665423436617534e-05, "loss": 0.352, "step": 6560 }, { "epoch": 1.0683955542889711, "grad_norm": 0.10987142473459244, "learning_rate": 4.066173146472604e-05, "loss": 0.2891, "step": 6561 }, { "epoch": 1.068558400846802, "grad_norm": 0.06788124144077301, "learning_rate": 4.065803893053146e-05, "loss": 0.3505, "step": 6562 }, { "epoch": 1.068721247404633, "grad_norm": 0.10528304427862167, "learning_rate": 4.065434583416637e-05, "loss": 0.2945, "step": 6563 }, { "epoch": 1.0688840939624638, "grad_norm": 0.15055854618549347, "learning_rate": 4.065065217576336e-05, "loss": 0.3287, "step": 6564 }, { "epoch": 1.0690469405202947, "grad_norm": 0.1489458531141281, "learning_rate": 4.064695795545505e-05, "loss": 0.3178, "step": 6565 }, { "epoch": 1.0692097870781256, "grad_norm": 0.11166234314441681, "learning_rate": 4.064326317337407e-05, "loss": 0.2741, "step": 6566 }, { "epoch": 1.0693726336359566, "grad_norm": 0.14296841621398926, "learning_rate": 4.063956782965308e-05, "loss": 0.3798, "step": 6567 }, { "epoch": 1.0695354801937873, "grad_norm": 0.09897289425134659, "learning_rate": 4.063587192442473e-05, "loss": 0.2942, "step": 6568 }, { "epoch": 1.0696983267516182, "grad_norm": 0.12831388413906097, "learning_rate": 4.063217545782175e-05, "loss": 0.3193, "step": 6569 }, { "epoch": 1.0698611733094492, "grad_norm": 0.09432195872068405, "learning_rate": 4.062847842997683e-05, "loss": 0.2709, "step": 6570 }, { "epoch": 1.0700240198672801, "grad_norm": 0.0870201364159584, "learning_rate": 4.0624780841022696e-05, "loss": 0.3522, "step": 6571 }, { "epoch": 1.0701868664251108, "grad_norm": 0.10332697629928589, "learning_rate": 4.0621082691092136e-05, "loss": 0.2673, "step": 6572 }, { "epoch": 1.0703497129829418, "grad_norm": 0.09204105287790298, "learning_rate": 4.061738398031789e-05, "loss": 0.3082, "step": 6573 }, { "epoch": 1.0705125595407727, "grad_norm": 0.10221158713102341, "learning_rate": 4.0613684708832776e-05, "loss": 0.2922, "step": 6574 }, { "epoch": 1.0706754060986037, "grad_norm": 0.17256838083267212, "learning_rate": 4.06099848767696e-05, "loss": 0.2968, "step": 6575 }, { "epoch": 1.0708382526564344, "grad_norm": 0.10102322697639465, "learning_rate": 4.0606284484261205e-05, "loss": 0.3209, "step": 6576 }, { "epoch": 1.0710010992142653, "grad_norm": 0.15685582160949707, "learning_rate": 4.060258353144043e-05, "loss": 0.3162, "step": 6577 }, { "epoch": 1.0711639457720963, "grad_norm": 0.10646357387304306, "learning_rate": 4.0598882018440164e-05, "loss": 0.303, "step": 6578 }, { "epoch": 1.0713267923299272, "grad_norm": 0.11390911042690277, "learning_rate": 4.05951799453933e-05, "loss": 0.2888, "step": 6579 }, { "epoch": 1.071489638887758, "grad_norm": 0.12000960111618042, "learning_rate": 4.059147731243275e-05, "loss": 0.301, "step": 6580 }, { "epoch": 1.0716524854455889, "grad_norm": 0.11438936740159988, "learning_rate": 4.058777411969145e-05, "loss": 0.313, "step": 6581 }, { "epoch": 1.0718153320034198, "grad_norm": 0.1252865046262741, "learning_rate": 4.0584070367302354e-05, "loss": 0.2904, "step": 6582 }, { "epoch": 1.0719781785612508, "grad_norm": 0.14082950353622437, "learning_rate": 4.0580366055398444e-05, "loss": 0.2982, "step": 6583 }, { "epoch": 1.0721410251190815, "grad_norm": 0.08616630733013153, "learning_rate": 4.0576661184112715e-05, "loss": 0.2916, "step": 6584 }, { "epoch": 1.0723038716769124, "grad_norm": 0.10880396515130997, "learning_rate": 4.057295575357818e-05, "loss": 0.3111, "step": 6585 }, { "epoch": 1.0724667182347434, "grad_norm": 0.08262060582637787, "learning_rate": 4.056924976392787e-05, "loss": 0.3057, "step": 6586 }, { "epoch": 1.072629564792574, "grad_norm": 0.10936203598976135, "learning_rate": 4.056554321529485e-05, "loss": 0.2675, "step": 6587 }, { "epoch": 1.072792411350405, "grad_norm": 0.1027633473277092, "learning_rate": 4.056183610781219e-05, "loss": 0.2783, "step": 6588 }, { "epoch": 1.072955257908236, "grad_norm": 0.09511929005384445, "learning_rate": 4.0558128441613e-05, "loss": 0.3178, "step": 6589 }, { "epoch": 1.073118104466067, "grad_norm": 0.1194213479757309, "learning_rate": 4.0554420216830366e-05, "loss": 0.3293, "step": 6590 }, { "epoch": 1.0732809510238976, "grad_norm": 0.14969994127750397, "learning_rate": 4.055071143359746e-05, "loss": 0.296, "step": 6591 }, { "epoch": 1.0734437975817286, "grad_norm": 0.11938375234603882, "learning_rate": 4.054700209204742e-05, "loss": 0.3279, "step": 6592 }, { "epoch": 1.0736066441395595, "grad_norm": 0.11557788401842117, "learning_rate": 4.054329219231342e-05, "loss": 0.326, "step": 6593 }, { "epoch": 1.0737694906973905, "grad_norm": 0.13428978621959686, "learning_rate": 4.0539581734528664e-05, "loss": 0.2789, "step": 6594 }, { "epoch": 1.0739323372552212, "grad_norm": 0.09600310772657394, "learning_rate": 4.053587071882637e-05, "loss": 0.3437, "step": 6595 }, { "epoch": 1.0740951838130521, "grad_norm": 0.12061364203691483, "learning_rate": 4.053215914533977e-05, "loss": 0.2844, "step": 6596 }, { "epoch": 1.074258030370883, "grad_norm": 0.15220598876476288, "learning_rate": 4.052844701420211e-05, "loss": 0.3358, "step": 6597 }, { "epoch": 1.074420876928714, "grad_norm": 0.10527708381414413, "learning_rate": 4.0524734325546686e-05, "loss": 0.3047, "step": 6598 }, { "epoch": 1.0745837234865447, "grad_norm": 0.15827159583568573, "learning_rate": 4.0521021079506786e-05, "loss": 0.2806, "step": 6599 }, { "epoch": 1.0747465700443757, "grad_norm": 0.12074378877878189, "learning_rate": 4.0517307276215735e-05, "loss": 0.3137, "step": 6600 }, { "epoch": 1.0749094166022066, "grad_norm": 0.14460420608520508, "learning_rate": 4.0513592915806855e-05, "loss": 0.2968, "step": 6601 }, { "epoch": 1.0750722631600373, "grad_norm": 0.13787677884101868, "learning_rate": 4.0509877998413504e-05, "loss": 0.3418, "step": 6602 }, { "epoch": 1.0752351097178683, "grad_norm": 0.1278361678123474, "learning_rate": 4.0506162524169065e-05, "loss": 0.3165, "step": 6603 }, { "epoch": 1.0753979562756992, "grad_norm": 0.1517341136932373, "learning_rate": 4.0502446493206936e-05, "loss": 0.3557, "step": 6604 }, { "epoch": 1.0755608028335302, "grad_norm": 0.08878108114004135, "learning_rate": 4.0498729905660534e-05, "loss": 0.3054, "step": 6605 }, { "epoch": 1.075723649391361, "grad_norm": 0.11268006265163422, "learning_rate": 4.0495012761663285e-05, "loss": 0.3041, "step": 6606 }, { "epoch": 1.0758864959491918, "grad_norm": 0.10124726593494415, "learning_rate": 4.049129506134867e-05, "loss": 0.3373, "step": 6607 }, { "epoch": 1.0760493425070228, "grad_norm": 0.17668280005455017, "learning_rate": 4.048757680485013e-05, "loss": 0.3372, "step": 6608 }, { "epoch": 1.0762121890648537, "grad_norm": 0.1188269779086113, "learning_rate": 4.0483857992301194e-05, "loss": 0.3167, "step": 6609 }, { "epoch": 1.0763750356226844, "grad_norm": 0.11732067167758942, "learning_rate": 4.0480138623835364e-05, "loss": 0.3075, "step": 6610 }, { "epoch": 1.0765378821805154, "grad_norm": 0.11865805089473724, "learning_rate": 4.047641869958617e-05, "loss": 0.3176, "step": 6611 }, { "epoch": 1.0767007287383463, "grad_norm": 0.11683054268360138, "learning_rate": 4.0472698219687174e-05, "loss": 0.3121, "step": 6612 }, { "epoch": 1.0768635752961773, "grad_norm": 0.1313445270061493, "learning_rate": 4.046897718427196e-05, "loss": 0.2792, "step": 6613 }, { "epoch": 1.077026421854008, "grad_norm": 0.13145971298217773, "learning_rate": 4.0465255593474126e-05, "loss": 0.3225, "step": 6614 }, { "epoch": 1.077189268411839, "grad_norm": 0.1223006546497345, "learning_rate": 4.046153344742727e-05, "loss": 0.4112, "step": 6615 }, { "epoch": 1.0773521149696699, "grad_norm": 0.13112038373947144, "learning_rate": 4.045781074626505e-05, "loss": 0.325, "step": 6616 }, { "epoch": 1.0775149615275008, "grad_norm": 0.1128876656293869, "learning_rate": 4.04540874901211e-05, "loss": 0.3233, "step": 6617 }, { "epoch": 1.0776778080853315, "grad_norm": 0.14896835386753082, "learning_rate": 4.0450363679129117e-05, "loss": 0.3249, "step": 6618 }, { "epoch": 1.0778406546431625, "grad_norm": 0.18013732135295868, "learning_rate": 4.0446639313422784e-05, "loss": 0.3099, "step": 6619 }, { "epoch": 1.0780035012009934, "grad_norm": 0.12885098159313202, "learning_rate": 4.0442914393135825e-05, "loss": 0.3244, "step": 6620 }, { "epoch": 1.0781663477588244, "grad_norm": 0.08553294837474823, "learning_rate": 4.043918891840196e-05, "loss": 0.3028, "step": 6621 }, { "epoch": 1.078329194316655, "grad_norm": 0.18008695542812347, "learning_rate": 4.043546288935496e-05, "loss": 0.3212, "step": 6622 }, { "epoch": 1.078492040874486, "grad_norm": 0.1212550550699234, "learning_rate": 4.0431736306128596e-05, "loss": 0.3315, "step": 6623 }, { "epoch": 1.078654887432317, "grad_norm": 0.14189109206199646, "learning_rate": 4.042800916885666e-05, "loss": 0.2853, "step": 6624 }, { "epoch": 1.0788177339901477, "grad_norm": 0.11742759495973587, "learning_rate": 4.042428147767298e-05, "loss": 0.3298, "step": 6625 }, { "epoch": 1.0789805805479786, "grad_norm": 0.10258667916059494, "learning_rate": 4.042055323271138e-05, "loss": 0.3168, "step": 6626 }, { "epoch": 1.0791434271058096, "grad_norm": 0.14048340916633606, "learning_rate": 4.041682443410572e-05, "loss": 0.2741, "step": 6627 }, { "epoch": 1.0793062736636405, "grad_norm": 0.1628209948539734, "learning_rate": 4.041309508198987e-05, "loss": 0.3326, "step": 6628 }, { "epoch": 1.0794691202214712, "grad_norm": 0.08942516893148422, "learning_rate": 4.040936517649773e-05, "loss": 0.2736, "step": 6629 }, { "epoch": 1.0796319667793022, "grad_norm": 0.11073312908411026, "learning_rate": 4.040563471776321e-05, "loss": 0.3054, "step": 6630 }, { "epoch": 1.0797948133371331, "grad_norm": 0.0920577272772789, "learning_rate": 4.040190370592024e-05, "loss": 0.2807, "step": 6631 }, { "epoch": 1.079957659894964, "grad_norm": 0.1023326963186264, "learning_rate": 4.03981721411028e-05, "loss": 0.3542, "step": 6632 }, { "epoch": 1.0801205064527948, "grad_norm": 0.16963912546634674, "learning_rate": 4.0394440023444825e-05, "loss": 0.2806, "step": 6633 }, { "epoch": 1.0802833530106257, "grad_norm": 0.17356449365615845, "learning_rate": 4.039070735308034e-05, "loss": 0.3468, "step": 6634 }, { "epoch": 1.0804461995684567, "grad_norm": 0.11865149438381195, "learning_rate": 4.038697413014335e-05, "loss": 0.3061, "step": 6635 }, { "epoch": 1.0806090461262876, "grad_norm": 0.10435042530298233, "learning_rate": 4.038324035476789e-05, "loss": 0.3168, "step": 6636 }, { "epoch": 1.0807718926841183, "grad_norm": 0.18342800438404083, "learning_rate": 4.037950602708801e-05, "loss": 0.3225, "step": 6637 }, { "epoch": 1.0809347392419493, "grad_norm": 0.11828211694955826, "learning_rate": 4.037577114723779e-05, "loss": 0.3173, "step": 6638 }, { "epoch": 1.0810975857997802, "grad_norm": 0.11445783078670502, "learning_rate": 4.037203571535131e-05, "loss": 0.2799, "step": 6639 }, { "epoch": 1.081260432357611, "grad_norm": 0.1367616355419159, "learning_rate": 4.03682997315627e-05, "loss": 0.3527, "step": 6640 }, { "epoch": 1.0814232789154419, "grad_norm": 0.11533341556787491, "learning_rate": 4.0364563196006075e-05, "loss": 0.3425, "step": 6641 }, { "epoch": 1.0815861254732728, "grad_norm": 0.10437577962875366, "learning_rate": 4.03608261088156e-05, "loss": 0.3247, "step": 6642 }, { "epoch": 1.0817489720311038, "grad_norm": 0.184202641248703, "learning_rate": 4.0357088470125445e-05, "loss": 0.3154, "step": 6643 }, { "epoch": 1.0819118185889345, "grad_norm": 0.09551999717950821, "learning_rate": 4.035335028006981e-05, "loss": 0.3049, "step": 6644 }, { "epoch": 1.0820746651467654, "grad_norm": 0.14536599814891815, "learning_rate": 4.034961153878289e-05, "loss": 0.3092, "step": 6645 }, { "epoch": 1.0822375117045964, "grad_norm": 0.19001121819019318, "learning_rate": 4.034587224639894e-05, "loss": 0.2963, "step": 6646 }, { "epoch": 1.0824003582624273, "grad_norm": 0.12922322750091553, "learning_rate": 4.0342132403052194e-05, "loss": 0.2678, "step": 6647 }, { "epoch": 1.082563204820258, "grad_norm": 0.11797767877578735, "learning_rate": 4.0338392008876924e-05, "loss": 0.3125, "step": 6648 }, { "epoch": 1.082726051378089, "grad_norm": 0.15104924142360687, "learning_rate": 4.033465106400743e-05, "loss": 0.2975, "step": 6649 }, { "epoch": 1.08288889793592, "grad_norm": 0.12251697480678558, "learning_rate": 4.0330909568578017e-05, "loss": 0.3012, "step": 6650 }, { "epoch": 1.0830517444937509, "grad_norm": 0.12713077664375305, "learning_rate": 4.032716752272302e-05, "loss": 0.3204, "step": 6651 }, { "epoch": 1.0832145910515816, "grad_norm": 0.1731662154197693, "learning_rate": 4.032342492657679e-05, "loss": 0.2814, "step": 6652 }, { "epoch": 1.0833774376094125, "grad_norm": 0.12208852916955948, "learning_rate": 4.031968178027369e-05, "loss": 0.3352, "step": 6653 }, { "epoch": 1.0835402841672435, "grad_norm": 0.12833727896213531, "learning_rate": 4.0315938083948116e-05, "loss": 0.3287, "step": 6654 }, { "epoch": 1.0837031307250742, "grad_norm": 0.13394832611083984, "learning_rate": 4.031219383773447e-05, "loss": 0.2918, "step": 6655 }, { "epoch": 1.0838659772829051, "grad_norm": 0.11184832453727722, "learning_rate": 4.0308449041767204e-05, "loss": 0.2897, "step": 6656 }, { "epoch": 1.084028823840736, "grad_norm": 0.12075425684452057, "learning_rate": 4.030470369618074e-05, "loss": 0.2806, "step": 6657 }, { "epoch": 1.084191670398567, "grad_norm": 0.10457520186901093, "learning_rate": 4.030095780110956e-05, "loss": 0.3273, "step": 6658 }, { "epoch": 1.084354516956398, "grad_norm": 0.1463453322649002, "learning_rate": 4.029721135668816e-05, "loss": 0.3114, "step": 6659 }, { "epoch": 1.0845173635142287, "grad_norm": 0.1271149218082428, "learning_rate": 4.029346436305104e-05, "loss": 0.3421, "step": 6660 }, { "epoch": 1.0846802100720596, "grad_norm": 0.08544496446847916, "learning_rate": 4.0289716820332726e-05, "loss": 0.303, "step": 6661 }, { "epoch": 1.0848430566298906, "grad_norm": 0.13508006930351257, "learning_rate": 4.0285968728667756e-05, "loss": 0.329, "step": 6662 }, { "epoch": 1.0850059031877213, "grad_norm": 0.10318908840417862, "learning_rate": 4.0282220088190726e-05, "loss": 0.302, "step": 6663 }, { "epoch": 1.0851687497455522, "grad_norm": 0.08597276359796524, "learning_rate": 4.0278470899036205e-05, "loss": 0.3075, "step": 6664 }, { "epoch": 1.0853315963033832, "grad_norm": 0.08909563720226288, "learning_rate": 4.0274721161338804e-05, "loss": 0.3347, "step": 6665 }, { "epoch": 1.085494442861214, "grad_norm": 0.1831119954586029, "learning_rate": 4.0270970875233146e-05, "loss": 0.3521, "step": 6666 }, { "epoch": 1.0856572894190448, "grad_norm": 0.0966852456331253, "learning_rate": 4.0267220040853884e-05, "loss": 0.2844, "step": 6667 }, { "epoch": 1.0858201359768758, "grad_norm": 0.11308364570140839, "learning_rate": 4.026346865833568e-05, "loss": 0.331, "step": 6668 }, { "epoch": 1.0859829825347067, "grad_norm": 0.17110304534435272, "learning_rate": 4.0259716727813216e-05, "loss": 0.3482, "step": 6669 }, { "epoch": 1.0861458290925377, "grad_norm": 0.08744799345731735, "learning_rate": 4.025596424942121e-05, "loss": 0.3268, "step": 6670 }, { "epoch": 1.0863086756503684, "grad_norm": 0.11464209109544754, "learning_rate": 4.0252211223294376e-05, "loss": 0.3391, "step": 6671 }, { "epoch": 1.0864715222081993, "grad_norm": 0.13719968497753143, "learning_rate": 4.024845764956746e-05, "loss": 0.3341, "step": 6672 }, { "epoch": 1.0866343687660303, "grad_norm": 0.11521962285041809, "learning_rate": 4.024470352837523e-05, "loss": 0.3286, "step": 6673 }, { "epoch": 1.0867972153238612, "grad_norm": 0.1287059783935547, "learning_rate": 4.0240948859852467e-05, "loss": 0.328, "step": 6674 }, { "epoch": 1.086960061881692, "grad_norm": 0.13961219787597656, "learning_rate": 4.023719364413397e-05, "loss": 0.2873, "step": 6675 }, { "epoch": 1.0871229084395229, "grad_norm": 0.10662619769573212, "learning_rate": 4.023343788135459e-05, "loss": 0.3087, "step": 6676 }, { "epoch": 1.0872857549973538, "grad_norm": 0.15490439534187317, "learning_rate": 4.022968157164913e-05, "loss": 0.3072, "step": 6677 }, { "epoch": 1.0874486015551845, "grad_norm": 0.12009883671998978, "learning_rate": 4.022592471515249e-05, "loss": 0.3062, "step": 6678 }, { "epoch": 1.0876114481130155, "grad_norm": 0.12140356004238129, "learning_rate": 4.0222167311999516e-05, "loss": 0.2934, "step": 6679 }, { "epoch": 1.0877742946708464, "grad_norm": 0.11127996444702148, "learning_rate": 4.021840936232514e-05, "loss": 0.2799, "step": 6680 }, { "epoch": 1.0879371412286774, "grad_norm": 0.11111043393611908, "learning_rate": 4.021465086626427e-05, "loss": 0.3069, "step": 6681 }, { "epoch": 1.088099987786508, "grad_norm": 0.09016823023557663, "learning_rate": 4.021089182395185e-05, "loss": 0.3352, "step": 6682 }, { "epoch": 1.088262834344339, "grad_norm": 0.10828476399183273, "learning_rate": 4.0207132235522835e-05, "loss": 0.2965, "step": 6683 }, { "epoch": 1.08842568090217, "grad_norm": 0.13655923306941986, "learning_rate": 4.020337210111221e-05, "loss": 0.3139, "step": 6684 }, { "epoch": 1.088588527460001, "grad_norm": 0.09739667177200317, "learning_rate": 4.0199611420854985e-05, "loss": 0.311, "step": 6685 }, { "epoch": 1.0887513740178316, "grad_norm": 0.11722031980752945, "learning_rate": 4.019585019488617e-05, "loss": 0.2802, "step": 6686 }, { "epoch": 1.0889142205756626, "grad_norm": 0.1436564177274704, "learning_rate": 4.0192088423340804e-05, "loss": 0.3251, "step": 6687 }, { "epoch": 1.0890770671334935, "grad_norm": 0.12547385692596436, "learning_rate": 4.0188326106353945e-05, "loss": 0.3102, "step": 6688 }, { "epoch": 1.0892399136913244, "grad_norm": 0.13094080984592438, "learning_rate": 4.018456324406067e-05, "loss": 0.292, "step": 6689 }, { "epoch": 1.0894027602491552, "grad_norm": 0.15513357520103455, "learning_rate": 4.018079983659608e-05, "loss": 0.3175, "step": 6690 }, { "epoch": 1.089565606806986, "grad_norm": 0.12226534634828568, "learning_rate": 4.01770358840953e-05, "loss": 0.3105, "step": 6691 }, { "epoch": 1.089728453364817, "grad_norm": 0.1195463165640831, "learning_rate": 4.0173271386693455e-05, "loss": 0.281, "step": 6692 }, { "epoch": 1.0898912999226478, "grad_norm": 0.09657886624336243, "learning_rate": 4.016950634452571e-05, "loss": 0.3348, "step": 6693 }, { "epoch": 1.0900541464804787, "grad_norm": 0.14717811346054077, "learning_rate": 4.0165740757727245e-05, "loss": 0.3477, "step": 6694 }, { "epoch": 1.0902169930383097, "grad_norm": 0.10385659337043762, "learning_rate": 4.016197462643323e-05, "loss": 0.359, "step": 6695 }, { "epoch": 1.0903798395961406, "grad_norm": 0.1141122505068779, "learning_rate": 4.015820795077892e-05, "loss": 0.292, "step": 6696 }, { "epoch": 1.0905426861539713, "grad_norm": 0.12038485705852509, "learning_rate": 4.015444073089952e-05, "loss": 0.316, "step": 6697 }, { "epoch": 1.0907055327118023, "grad_norm": 0.10445395112037659, "learning_rate": 4.015067296693029e-05, "loss": 0.284, "step": 6698 }, { "epoch": 1.0908683792696332, "grad_norm": 0.11447638273239136, "learning_rate": 4.014690465900651e-05, "loss": 0.3188, "step": 6699 }, { "epoch": 1.0910312258274641, "grad_norm": 0.17420127987861633, "learning_rate": 4.014313580726348e-05, "loss": 0.2903, "step": 6700 }, { "epoch": 1.0911940723852949, "grad_norm": 0.19746848940849304, "learning_rate": 4.01393664118365e-05, "loss": 0.3282, "step": 6701 }, { "epoch": 1.0913569189431258, "grad_norm": 0.09692027419805527, "learning_rate": 4.01355964728609e-05, "loss": 0.3176, "step": 6702 }, { "epoch": 1.0915197655009568, "grad_norm": 0.08383660763502121, "learning_rate": 4.0131825990472044e-05, "loss": 0.325, "step": 6703 }, { "epoch": 1.0916826120587877, "grad_norm": 0.09636850655078888, "learning_rate": 4.0128054964805296e-05, "loss": 0.313, "step": 6704 }, { "epoch": 1.0918454586166184, "grad_norm": 0.16073137521743774, "learning_rate": 4.012428339599605e-05, "loss": 0.3035, "step": 6705 }, { "epoch": 1.0920083051744494, "grad_norm": 0.13769719004631042, "learning_rate": 4.0120511284179716e-05, "loss": 0.3043, "step": 6706 }, { "epoch": 1.0921711517322803, "grad_norm": 0.11738118529319763, "learning_rate": 4.011673862949174e-05, "loss": 0.3197, "step": 6707 }, { "epoch": 1.0923339982901112, "grad_norm": 0.13522858917713165, "learning_rate": 4.011296543206754e-05, "loss": 0.2735, "step": 6708 }, { "epoch": 1.092496844847942, "grad_norm": 0.2682652771472931, "learning_rate": 4.01091916920426e-05, "loss": 0.2924, "step": 6709 }, { "epoch": 1.092659691405773, "grad_norm": 0.15589918196201324, "learning_rate": 4.010541740955242e-05, "loss": 0.3125, "step": 6710 }, { "epoch": 1.0928225379636038, "grad_norm": 0.08320023864507675, "learning_rate": 4.010164258473249e-05, "loss": 0.2871, "step": 6711 }, { "epoch": 1.0929853845214348, "grad_norm": 0.13185274600982666, "learning_rate": 4.009786721771835e-05, "loss": 0.2969, "step": 6712 }, { "epoch": 1.0931482310792655, "grad_norm": 0.1341279298067093, "learning_rate": 4.009409130864554e-05, "loss": 0.3186, "step": 6713 }, { "epoch": 1.0933110776370965, "grad_norm": 0.11786653846502304, "learning_rate": 4.009031485764963e-05, "loss": 0.2867, "step": 6714 }, { "epoch": 1.0934739241949274, "grad_norm": 0.10042250901460648, "learning_rate": 4.00865378648662e-05, "loss": 0.2752, "step": 6715 }, { "epoch": 1.0936367707527581, "grad_norm": 0.13297124207019806, "learning_rate": 4.0082760330430865e-05, "loss": 0.311, "step": 6716 }, { "epoch": 1.093799617310589, "grad_norm": 0.09676435589790344, "learning_rate": 4.007898225447925e-05, "loss": 0.3279, "step": 6717 }, { "epoch": 1.09396246386842, "grad_norm": 0.1571527123451233, "learning_rate": 4.007520363714699e-05, "loss": 0.2793, "step": 6718 }, { "epoch": 1.094125310426251, "grad_norm": 0.13431189954280853, "learning_rate": 4.0071424478569756e-05, "loss": 0.3013, "step": 6719 }, { "epoch": 1.0942881569840817, "grad_norm": 0.1563415378332138, "learning_rate": 4.006764477888322e-05, "loss": 0.3467, "step": 6720 }, { "epoch": 1.0944510035419126, "grad_norm": 0.09534849971532822, "learning_rate": 4.0063864538223094e-05, "loss": 0.2868, "step": 6721 }, { "epoch": 1.0946138500997435, "grad_norm": 0.17532314360141754, "learning_rate": 4.0060083756725106e-05, "loss": 0.325, "step": 6722 }, { "epoch": 1.0947766966575745, "grad_norm": 0.10313621163368225, "learning_rate": 4.0056302434524985e-05, "loss": 0.2847, "step": 6723 }, { "epoch": 1.0949395432154052, "grad_norm": 0.12884213030338287, "learning_rate": 4.005252057175849e-05, "loss": 0.3248, "step": 6724 }, { "epoch": 1.0951023897732362, "grad_norm": 0.1448335498571396, "learning_rate": 4.004873816856142e-05, "loss": 0.3255, "step": 6725 }, { "epoch": 1.095265236331067, "grad_norm": 0.14256137609481812, "learning_rate": 4.004495522506956e-05, "loss": 0.3042, "step": 6726 }, { "epoch": 1.095428082888898, "grad_norm": 0.11141753196716309, "learning_rate": 4.0041171741418726e-05, "loss": 0.2722, "step": 6727 }, { "epoch": 1.0955909294467288, "grad_norm": 0.17556388676166534, "learning_rate": 4.003738771774477e-05, "loss": 0.2565, "step": 6728 }, { "epoch": 1.0957537760045597, "grad_norm": 0.12602615356445312, "learning_rate": 4.003360315418353e-05, "loss": 0.3091, "step": 6729 }, { "epoch": 1.0959166225623906, "grad_norm": 0.16530874371528625, "learning_rate": 4.0029818050870905e-05, "loss": 0.3229, "step": 6730 }, { "epoch": 1.0960794691202214, "grad_norm": 0.15186923742294312, "learning_rate": 4.002603240794278e-05, "loss": 0.3345, "step": 6731 }, { "epoch": 1.0962423156780523, "grad_norm": 0.10206477344036102, "learning_rate": 4.002224622553506e-05, "loss": 0.3415, "step": 6732 }, { "epoch": 1.0964051622358832, "grad_norm": 0.11261685192584991, "learning_rate": 4.0018459503783705e-05, "loss": 0.2884, "step": 6733 }, { "epoch": 1.0965680087937142, "grad_norm": 0.0731956735253334, "learning_rate": 4.001467224282466e-05, "loss": 0.3128, "step": 6734 }, { "epoch": 1.096730855351545, "grad_norm": 0.12461458891630173, "learning_rate": 4.001088444279389e-05, "loss": 0.2903, "step": 6735 }, { "epoch": 1.0968937019093759, "grad_norm": 0.10222595930099487, "learning_rate": 4.0007096103827395e-05, "loss": 0.2935, "step": 6736 }, { "epoch": 1.0970565484672068, "grad_norm": 0.13262397050857544, "learning_rate": 4.000330722606119e-05, "loss": 0.3266, "step": 6737 }, { "epoch": 1.0972193950250377, "grad_norm": 0.15939557552337646, "learning_rate": 3.99995178096313e-05, "loss": 0.3214, "step": 6738 }, { "epoch": 1.0973822415828685, "grad_norm": 0.13365188241004944, "learning_rate": 3.999572785467379e-05, "loss": 0.3159, "step": 6739 }, { "epoch": 1.0975450881406994, "grad_norm": 0.20966151356697083, "learning_rate": 3.999193736132471e-05, "loss": 0.3295, "step": 6740 }, { "epoch": 1.0977079346985303, "grad_norm": 0.1081186830997467, "learning_rate": 3.998814632972017e-05, "loss": 0.2939, "step": 6741 }, { "epoch": 1.0978707812563613, "grad_norm": 0.11545291543006897, "learning_rate": 3.998435475999627e-05, "loss": 0.3255, "step": 6742 }, { "epoch": 1.098033627814192, "grad_norm": 0.14231280982494354, "learning_rate": 3.998056265228913e-05, "loss": 0.308, "step": 6743 }, { "epoch": 1.098196474372023, "grad_norm": 0.1350947469472885, "learning_rate": 3.997677000673492e-05, "loss": 0.3114, "step": 6744 }, { "epoch": 1.0983593209298539, "grad_norm": 0.11258760094642639, "learning_rate": 3.997297682346979e-05, "loss": 0.2923, "step": 6745 }, { "epoch": 1.0985221674876848, "grad_norm": 0.13533921539783478, "learning_rate": 3.996918310262994e-05, "loss": 0.326, "step": 6746 }, { "epoch": 1.0986850140455156, "grad_norm": 0.11548975110054016, "learning_rate": 3.996538884435156e-05, "loss": 0.3016, "step": 6747 }, { "epoch": 1.0988478606033465, "grad_norm": 0.10444048047065735, "learning_rate": 3.996159404877088e-05, "loss": 0.3277, "step": 6748 }, { "epoch": 1.0990107071611774, "grad_norm": 0.12627018988132477, "learning_rate": 3.995779871602415e-05, "loss": 0.2969, "step": 6749 }, { "epoch": 1.0991735537190084, "grad_norm": 0.11549177765846252, "learning_rate": 3.995400284624763e-05, "loss": 0.3324, "step": 6750 }, { "epoch": 1.099336400276839, "grad_norm": 0.11136720329523087, "learning_rate": 3.9950206439577606e-05, "loss": 0.2593, "step": 6751 }, { "epoch": 1.09949924683467, "grad_norm": 0.09392962604761124, "learning_rate": 3.994640949615038e-05, "loss": 0.3521, "step": 6752 }, { "epoch": 1.099662093392501, "grad_norm": 0.13229608535766602, "learning_rate": 3.994261201610228e-05, "loss": 0.3226, "step": 6753 }, { "epoch": 1.0998249399503317, "grad_norm": 0.13314978778362274, "learning_rate": 3.993881399956962e-05, "loss": 0.3058, "step": 6754 }, { "epoch": 1.0999877865081626, "grad_norm": 0.10428091138601303, "learning_rate": 3.993501544668879e-05, "loss": 0.2873, "step": 6755 }, { "epoch": 1.1001506330659936, "grad_norm": 0.09714414924383163, "learning_rate": 3.993121635759617e-05, "loss": 0.3262, "step": 6756 }, { "epoch": 1.1003134796238245, "grad_norm": 0.12470025569200516, "learning_rate": 3.992741673242813e-05, "loss": 0.2968, "step": 6757 }, { "epoch": 1.1004763261816553, "grad_norm": 0.16834257543087006, "learning_rate": 3.992361657132112e-05, "loss": 0.369, "step": 6758 }, { "epoch": 1.1006391727394862, "grad_norm": 0.16151383519172668, "learning_rate": 3.991981587441156e-05, "loss": 0.3389, "step": 6759 }, { "epoch": 1.1008020192973171, "grad_norm": 0.08458676934242249, "learning_rate": 3.99160146418359e-05, "loss": 0.3083, "step": 6760 }, { "epoch": 1.100964865855148, "grad_norm": 0.1549646258354187, "learning_rate": 3.9912212873730626e-05, "loss": 0.3426, "step": 6761 }, { "epoch": 1.1011277124129788, "grad_norm": 0.136720672249794, "learning_rate": 3.990841057023224e-05, "loss": 0.295, "step": 6762 }, { "epoch": 1.1012905589708097, "grad_norm": 0.07663948088884354, "learning_rate": 3.990460773147724e-05, "loss": 0.2993, "step": 6763 }, { "epoch": 1.1014534055286407, "grad_norm": 0.12537437677383423, "learning_rate": 3.990080435760217e-05, "loss": 0.286, "step": 6764 }, { "epoch": 1.1016162520864716, "grad_norm": 0.12372639775276184, "learning_rate": 3.989700044874358e-05, "loss": 0.3183, "step": 6765 }, { "epoch": 1.1017790986443023, "grad_norm": 0.10579647123813629, "learning_rate": 3.9893196005038045e-05, "loss": 0.2529, "step": 6766 }, { "epoch": 1.1019419452021333, "grad_norm": 0.07191664725542068, "learning_rate": 3.9889391026622144e-05, "loss": 0.2761, "step": 6767 }, { "epoch": 1.1021047917599642, "grad_norm": 0.10945068299770355, "learning_rate": 3.988558551363251e-05, "loss": 0.289, "step": 6768 }, { "epoch": 1.102267638317795, "grad_norm": 0.08835913240909576, "learning_rate": 3.9881779466205746e-05, "loss": 0.3353, "step": 6769 }, { "epoch": 1.102430484875626, "grad_norm": 0.12895230948925018, "learning_rate": 3.987797288447851e-05, "loss": 0.2908, "step": 6770 }, { "epoch": 1.1025933314334568, "grad_norm": 0.11083643138408661, "learning_rate": 3.987416576858748e-05, "loss": 0.2843, "step": 6771 }, { "epoch": 1.1027561779912878, "grad_norm": 0.09030264616012573, "learning_rate": 3.9870358118669325e-05, "loss": 0.323, "step": 6772 }, { "epoch": 1.1029190245491185, "grad_norm": 0.1494063138961792, "learning_rate": 3.9866549934860766e-05, "loss": 0.3221, "step": 6773 }, { "epoch": 1.1030818711069494, "grad_norm": 0.11033959686756134, "learning_rate": 3.986274121729853e-05, "loss": 0.3054, "step": 6774 }, { "epoch": 1.1032447176647804, "grad_norm": 0.18534986674785614, "learning_rate": 3.985893196611934e-05, "loss": 0.3166, "step": 6775 }, { "epoch": 1.1034075642226113, "grad_norm": 0.1285027265548706, "learning_rate": 3.985512218145998e-05, "loss": 0.3229, "step": 6776 }, { "epoch": 1.103570410780442, "grad_norm": 0.11632563173770905, "learning_rate": 3.985131186345723e-05, "loss": 0.2874, "step": 6777 }, { "epoch": 1.103733257338273, "grad_norm": 0.09060073643922806, "learning_rate": 3.984750101224788e-05, "loss": 0.2825, "step": 6778 }, { "epoch": 1.103896103896104, "grad_norm": 0.13925780355930328, "learning_rate": 3.984368962796877e-05, "loss": 0.3145, "step": 6779 }, { "epoch": 1.1040589504539349, "grad_norm": 0.1617552489042282, "learning_rate": 3.9839877710756725e-05, "loss": 0.2978, "step": 6780 }, { "epoch": 1.1042217970117656, "grad_norm": 0.10790249705314636, "learning_rate": 3.9836065260748604e-05, "loss": 0.3105, "step": 6781 }, { "epoch": 1.1043846435695965, "grad_norm": 0.15755614638328552, "learning_rate": 3.98322522780813e-05, "loss": 0.2666, "step": 6782 }, { "epoch": 1.1045474901274275, "grad_norm": 0.09015249460935593, "learning_rate": 3.982843876289169e-05, "loss": 0.2866, "step": 6783 }, { "epoch": 1.1047103366852582, "grad_norm": 0.13880324363708496, "learning_rate": 3.9824624715316705e-05, "loss": 0.3429, "step": 6784 }, { "epoch": 1.1048731832430891, "grad_norm": 0.11504634469747543, "learning_rate": 3.9820810135493285e-05, "loss": 0.3251, "step": 6785 }, { "epoch": 1.10503602980092, "grad_norm": 0.10679813474416733, "learning_rate": 3.9816995023558366e-05, "loss": 0.2871, "step": 6786 }, { "epoch": 1.105198876358751, "grad_norm": 0.13641956448554993, "learning_rate": 3.9813179379648945e-05, "loss": 0.3496, "step": 6787 }, { "epoch": 1.105361722916582, "grad_norm": 0.1300218105316162, "learning_rate": 3.9809363203902e-05, "loss": 0.2837, "step": 6788 }, { "epoch": 1.1055245694744127, "grad_norm": 0.15312758088111877, "learning_rate": 3.980554649645453e-05, "loss": 0.3085, "step": 6789 }, { "epoch": 1.1056874160322436, "grad_norm": 0.10576466470956802, "learning_rate": 3.980172925744359e-05, "loss": 0.2953, "step": 6790 }, { "epoch": 1.1058502625900746, "grad_norm": 0.19343455135822296, "learning_rate": 3.979791148700623e-05, "loss": 0.3194, "step": 6791 }, { "epoch": 1.1060131091479053, "grad_norm": 0.08455399423837662, "learning_rate": 3.9794093185279506e-05, "loss": 0.2886, "step": 6792 }, { "epoch": 1.1061759557057362, "grad_norm": 0.1365627944469452, "learning_rate": 3.9790274352400515e-05, "loss": 0.3108, "step": 6793 }, { "epoch": 1.1063388022635672, "grad_norm": 0.12334617972373962, "learning_rate": 3.978645498850636e-05, "loss": 0.324, "step": 6794 }, { "epoch": 1.1065016488213981, "grad_norm": 0.11634446680545807, "learning_rate": 3.978263509373417e-05, "loss": 0.2788, "step": 6795 }, { "epoch": 1.1066644953792288, "grad_norm": 0.24022069573402405, "learning_rate": 3.977881466822109e-05, "loss": 0.3219, "step": 6796 }, { "epoch": 1.1068273419370598, "grad_norm": 0.07054474949836731, "learning_rate": 3.9774993712104284e-05, "loss": 0.3408, "step": 6797 }, { "epoch": 1.1069901884948907, "grad_norm": 0.1155703142285347, "learning_rate": 3.977117222552094e-05, "loss": 0.3087, "step": 6798 }, { "epoch": 1.1071530350527217, "grad_norm": 0.10515070706605911, "learning_rate": 3.9767350208608255e-05, "loss": 0.3161, "step": 6799 }, { "epoch": 1.1073158816105524, "grad_norm": 0.09936880320310593, "learning_rate": 3.976352766150345e-05, "loss": 0.315, "step": 6800 }, { "epoch": 1.1074787281683833, "grad_norm": 0.10743293911218643, "learning_rate": 3.9759704584343774e-05, "loss": 0.4198, "step": 6801 }, { "epoch": 1.1076415747262143, "grad_norm": 0.10760404169559479, "learning_rate": 3.975588097726649e-05, "loss": 0.3297, "step": 6802 }, { "epoch": 1.1078044212840452, "grad_norm": 0.14226125180721283, "learning_rate": 3.975205684040886e-05, "loss": 0.2906, "step": 6803 }, { "epoch": 1.107967267841876, "grad_norm": 0.1357550323009491, "learning_rate": 3.9748232173908184e-05, "loss": 0.3212, "step": 6804 }, { "epoch": 1.1081301143997069, "grad_norm": 0.09961352497339249, "learning_rate": 3.9744406977901796e-05, "loss": 0.3165, "step": 6805 }, { "epoch": 1.1082929609575378, "grad_norm": 0.08177007734775543, "learning_rate": 3.974058125252702e-05, "loss": 0.3275, "step": 6806 }, { "epoch": 1.1084558075153685, "grad_norm": 0.09066463261842728, "learning_rate": 3.973675499792121e-05, "loss": 0.3437, "step": 6807 }, { "epoch": 1.1086186540731995, "grad_norm": 0.16615356504917145, "learning_rate": 3.973292821422175e-05, "loss": 0.3014, "step": 6808 }, { "epoch": 1.1087815006310304, "grad_norm": 0.17623035609722137, "learning_rate": 3.972910090156602e-05, "loss": 0.3344, "step": 6809 }, { "epoch": 1.1089443471888614, "grad_norm": 0.11346274614334106, "learning_rate": 3.972527306009144e-05, "loss": 0.3321, "step": 6810 }, { "epoch": 1.109107193746692, "grad_norm": 0.13732296228408813, "learning_rate": 3.9721444689935436e-05, "loss": 0.2727, "step": 6811 }, { "epoch": 1.109270040304523, "grad_norm": 0.1181369349360466, "learning_rate": 3.971761579123546e-05, "loss": 0.2694, "step": 6812 }, { "epoch": 1.109432886862354, "grad_norm": 0.11932972818613052, "learning_rate": 3.971378636412899e-05, "loss": 0.2717, "step": 6813 }, { "epoch": 1.109595733420185, "grad_norm": 0.08052345365285873, "learning_rate": 3.97099564087535e-05, "loss": 0.3201, "step": 6814 }, { "epoch": 1.1097585799780156, "grad_norm": 0.17893780767917633, "learning_rate": 3.9706125925246503e-05, "loss": 0.3437, "step": 6815 }, { "epoch": 1.1099214265358466, "grad_norm": 0.10583271086215973, "learning_rate": 3.970229491374553e-05, "loss": 0.284, "step": 6816 }, { "epoch": 1.1100842730936775, "grad_norm": 0.13025252521038055, "learning_rate": 3.969846337438812e-05, "loss": 0.3586, "step": 6817 }, { "epoch": 1.1102471196515085, "grad_norm": 0.13280494511127472, "learning_rate": 3.969463130731183e-05, "loss": 0.3314, "step": 6818 }, { "epoch": 1.1104099662093392, "grad_norm": 0.09159359335899353, "learning_rate": 3.969079871265425e-05, "loss": 0.3156, "step": 6819 }, { "epoch": 1.1105728127671701, "grad_norm": 0.10947602242231369, "learning_rate": 3.9686965590552996e-05, "loss": 0.2878, "step": 6820 }, { "epoch": 1.110735659325001, "grad_norm": 0.11371111869812012, "learning_rate": 3.968313194114566e-05, "loss": 0.3317, "step": 6821 }, { "epoch": 1.1108985058828318, "grad_norm": 0.10016866028308868, "learning_rate": 3.9679297764569895e-05, "loss": 0.3611, "step": 6822 }, { "epoch": 1.1110613524406627, "grad_norm": 0.09550793468952179, "learning_rate": 3.9675463060963355e-05, "loss": 0.3273, "step": 6823 }, { "epoch": 1.1112241989984937, "grad_norm": 0.1497795283794403, "learning_rate": 3.9671627830463745e-05, "loss": 0.2914, "step": 6824 }, { "epoch": 1.1113870455563246, "grad_norm": 0.11513722687959671, "learning_rate": 3.966779207320872e-05, "loss": 0.2729, "step": 6825 }, { "epoch": 1.1115498921141553, "grad_norm": 0.12752963602542877, "learning_rate": 3.966395578933601e-05, "loss": 0.3004, "step": 6826 }, { "epoch": 1.1117127386719863, "grad_norm": 0.11700548976659775, "learning_rate": 3.966011897898336e-05, "loss": 0.3062, "step": 6827 }, { "epoch": 1.1118755852298172, "grad_norm": 0.16476446390151978, "learning_rate": 3.965628164228852e-05, "loss": 0.3616, "step": 6828 }, { "epoch": 1.1120384317876482, "grad_norm": 0.10705509781837463, "learning_rate": 3.965244377938925e-05, "loss": 0.3512, "step": 6829 }, { "epoch": 1.1122012783454789, "grad_norm": 0.09523449093103409, "learning_rate": 3.9648605390423356e-05, "loss": 0.2827, "step": 6830 }, { "epoch": 1.1123641249033098, "grad_norm": 0.09379976987838745, "learning_rate": 3.964476647552864e-05, "loss": 0.2986, "step": 6831 }, { "epoch": 1.1125269714611408, "grad_norm": 0.1256323903799057, "learning_rate": 3.964092703484292e-05, "loss": 0.3288, "step": 6832 }, { "epoch": 1.1126898180189717, "grad_norm": 0.14583715796470642, "learning_rate": 3.9637087068504066e-05, "loss": 0.3191, "step": 6833 }, { "epoch": 1.1128526645768024, "grad_norm": 0.14523111283779144, "learning_rate": 3.9633246576649925e-05, "loss": 0.3195, "step": 6834 }, { "epoch": 1.1130155111346334, "grad_norm": 0.16696792840957642, "learning_rate": 3.962940555941839e-05, "loss": 0.3603, "step": 6835 }, { "epoch": 1.1131783576924643, "grad_norm": 0.11631731688976288, "learning_rate": 3.962556401694737e-05, "loss": 0.3282, "step": 6836 }, { "epoch": 1.1133412042502953, "grad_norm": 0.12929539382457733, "learning_rate": 3.962172194937477e-05, "loss": 0.3145, "step": 6837 }, { "epoch": 1.113504050808126, "grad_norm": 0.09757212549448013, "learning_rate": 3.961787935683856e-05, "loss": 0.2882, "step": 6838 }, { "epoch": 1.113666897365957, "grad_norm": 0.09249593317508698, "learning_rate": 3.961403623947667e-05, "loss": 0.2747, "step": 6839 }, { "epoch": 1.1138297439237879, "grad_norm": 0.16983701288700104, "learning_rate": 3.961019259742711e-05, "loss": 0.3145, "step": 6840 }, { "epoch": 1.1139925904816188, "grad_norm": 0.08720964938402176, "learning_rate": 3.960634843082785e-05, "loss": 0.3348, "step": 6841 }, { "epoch": 1.1141554370394495, "grad_norm": 0.12235982716083527, "learning_rate": 3.960250373981693e-05, "loss": 0.2828, "step": 6842 }, { "epoch": 1.1143182835972805, "grad_norm": 0.1442495435476303, "learning_rate": 3.959865852453236e-05, "loss": 0.2964, "step": 6843 }, { "epoch": 1.1144811301551114, "grad_norm": 0.13387750089168549, "learning_rate": 3.9594812785112225e-05, "loss": 0.2887, "step": 6844 }, { "epoch": 1.1146439767129421, "grad_norm": 0.12583504617214203, "learning_rate": 3.959096652169458e-05, "loss": 0.3225, "step": 6845 }, { "epoch": 1.114806823270773, "grad_norm": 0.09604765474796295, "learning_rate": 3.9587119734417516e-05, "loss": 0.3092, "step": 6846 }, { "epoch": 1.114969669828604, "grad_norm": 0.09519507735967636, "learning_rate": 3.9583272423419154e-05, "loss": 0.3239, "step": 6847 }, { "epoch": 1.115132516386435, "grad_norm": 0.14840058982372284, "learning_rate": 3.9579424588837615e-05, "loss": 0.3071, "step": 6848 }, { "epoch": 1.1152953629442657, "grad_norm": 0.11829027533531189, "learning_rate": 3.957557623081106e-05, "loss": 0.2901, "step": 6849 }, { "epoch": 1.1154582095020966, "grad_norm": 0.17805354297161102, "learning_rate": 3.957172734947764e-05, "loss": 0.3014, "step": 6850 }, { "epoch": 1.1156210560599276, "grad_norm": 0.21031901240348816, "learning_rate": 3.9567877944975565e-05, "loss": 0.3206, "step": 6851 }, { "epoch": 1.1157839026177585, "grad_norm": 0.15702633559703827, "learning_rate": 3.9564028017443e-05, "loss": 0.301, "step": 6852 }, { "epoch": 1.1159467491755892, "grad_norm": 0.12887738645076752, "learning_rate": 3.956017756701822e-05, "loss": 0.3007, "step": 6853 }, { "epoch": 1.1161095957334202, "grad_norm": 0.07738782465457916, "learning_rate": 3.955632659383943e-05, "loss": 0.2978, "step": 6854 }, { "epoch": 1.116272442291251, "grad_norm": 0.15092165768146515, "learning_rate": 3.955247509804489e-05, "loss": 0.321, "step": 6855 }, { "epoch": 1.116435288849082, "grad_norm": 0.1470612734556198, "learning_rate": 3.954862307977292e-05, "loss": 0.3358, "step": 6856 }, { "epoch": 1.1165981354069128, "grad_norm": 0.12674874067306519, "learning_rate": 3.954477053916178e-05, "loss": 0.3235, "step": 6857 }, { "epoch": 1.1167609819647437, "grad_norm": 0.17172512412071228, "learning_rate": 3.95409174763498e-05, "loss": 0.3188, "step": 6858 }, { "epoch": 1.1169238285225747, "grad_norm": 0.1171945258975029, "learning_rate": 3.953706389147531e-05, "loss": 0.3131, "step": 6859 }, { "epoch": 1.1170866750804054, "grad_norm": 0.20807386934757233, "learning_rate": 3.9533209784676686e-05, "loss": 0.3124, "step": 6860 }, { "epoch": 1.1172495216382363, "grad_norm": 0.11018043756484985, "learning_rate": 3.952935515609227e-05, "loss": 0.3138, "step": 6861 }, { "epoch": 1.1174123681960673, "grad_norm": 0.1746813803911209, "learning_rate": 3.952550000586049e-05, "loss": 0.3258, "step": 6862 }, { "epoch": 1.1175752147538982, "grad_norm": 0.07759582996368408, "learning_rate": 3.9521644334119734e-05, "loss": 0.302, "step": 6863 }, { "epoch": 1.117738061311729, "grad_norm": 0.2293839305639267, "learning_rate": 3.9517788141008445e-05, "loss": 0.3225, "step": 6864 }, { "epoch": 1.1179009078695599, "grad_norm": 0.07704583555459976, "learning_rate": 3.951393142666506e-05, "loss": 0.317, "step": 6865 }, { "epoch": 1.1180637544273908, "grad_norm": 0.15444017946720123, "learning_rate": 3.9510074191228044e-05, "loss": 0.3421, "step": 6866 }, { "epoch": 1.1182266009852218, "grad_norm": 0.17347797751426697, "learning_rate": 3.9506216434835905e-05, "loss": 0.3564, "step": 6867 }, { "epoch": 1.1183894475430525, "grad_norm": 0.10741379857063293, "learning_rate": 3.950235815762712e-05, "loss": 0.3034, "step": 6868 }, { "epoch": 1.1185522941008834, "grad_norm": 0.14142175018787384, "learning_rate": 3.949849935974024e-05, "loss": 0.2855, "step": 6869 }, { "epoch": 1.1187151406587144, "grad_norm": 0.11971338838338852, "learning_rate": 3.9494640041313784e-05, "loss": 0.3195, "step": 6870 }, { "epoch": 1.1188779872165453, "grad_norm": 0.110755555331707, "learning_rate": 3.949078020248633e-05, "loss": 0.3108, "step": 6871 }, { "epoch": 1.119040833774376, "grad_norm": 0.15711119771003723, "learning_rate": 3.9486919843396454e-05, "loss": 0.3516, "step": 6872 }, { "epoch": 1.119203680332207, "grad_norm": 0.10414404422044754, "learning_rate": 3.948305896418275e-05, "loss": 0.2933, "step": 6873 }, { "epoch": 1.119366526890038, "grad_norm": 0.09193812310695648, "learning_rate": 3.947919756498383e-05, "loss": 0.3023, "step": 6874 }, { "epoch": 1.1195293734478688, "grad_norm": 0.10026496648788452, "learning_rate": 3.947533564593834e-05, "loss": 0.3064, "step": 6875 }, { "epoch": 1.1196922200056996, "grad_norm": 0.10857179760932922, "learning_rate": 3.9471473207184926e-05, "loss": 0.3236, "step": 6876 }, { "epoch": 1.1198550665635305, "grad_norm": 0.10646715760231018, "learning_rate": 3.946761024886227e-05, "loss": 0.3441, "step": 6877 }, { "epoch": 1.1200179131213615, "grad_norm": 0.11533447355031967, "learning_rate": 3.9463746771109066e-05, "loss": 0.2742, "step": 6878 }, { "epoch": 1.1201807596791924, "grad_norm": 0.06800375133752823, "learning_rate": 3.945988277406401e-05, "loss": 0.2825, "step": 6879 }, { "epoch": 1.1203436062370231, "grad_norm": 0.1289490908384323, "learning_rate": 3.9456018257865844e-05, "loss": 0.293, "step": 6880 }, { "epoch": 1.120506452794854, "grad_norm": 0.08103475719690323, "learning_rate": 3.945215322265331e-05, "loss": 0.2772, "step": 6881 }, { "epoch": 1.120669299352685, "grad_norm": 0.13143523037433624, "learning_rate": 3.944828766856518e-05, "loss": 0.2834, "step": 6882 }, { "epoch": 1.1208321459105157, "grad_norm": 0.1402403861284256, "learning_rate": 3.9444421595740236e-05, "loss": 0.319, "step": 6883 }, { "epoch": 1.1209949924683467, "grad_norm": 0.07640191167593002, "learning_rate": 3.944055500431727e-05, "loss": 0.293, "step": 6884 }, { "epoch": 1.1211578390261776, "grad_norm": 0.11860793828964233, "learning_rate": 3.943668789443511e-05, "loss": 0.3194, "step": 6885 }, { "epoch": 1.1213206855840085, "grad_norm": 0.1152382344007492, "learning_rate": 3.943282026623262e-05, "loss": 0.2693, "step": 6886 }, { "epoch": 1.1214835321418393, "grad_norm": 0.12660323083400726, "learning_rate": 3.942895211984863e-05, "loss": 0.3142, "step": 6887 }, { "epoch": 1.1216463786996702, "grad_norm": 0.1046823263168335, "learning_rate": 3.9425083455422026e-05, "loss": 0.2825, "step": 6888 }, { "epoch": 1.1218092252575012, "grad_norm": 0.15552650392055511, "learning_rate": 3.942121427309171e-05, "loss": 0.3328, "step": 6889 }, { "epoch": 1.121972071815332, "grad_norm": 0.15377338230609894, "learning_rate": 3.941734457299659e-05, "loss": 0.3379, "step": 6890 }, { "epoch": 1.1221349183731628, "grad_norm": 0.16210927069187164, "learning_rate": 3.941347435527562e-05, "loss": 0.314, "step": 6891 }, { "epoch": 1.1222977649309938, "grad_norm": 0.10907071828842163, "learning_rate": 3.940960362006772e-05, "loss": 0.3063, "step": 6892 }, { "epoch": 1.1224606114888247, "grad_norm": 0.1129993423819542, "learning_rate": 3.9405732367511884e-05, "loss": 0.3216, "step": 6893 }, { "epoch": 1.1226234580466556, "grad_norm": 0.09528768062591553, "learning_rate": 3.9401860597747096e-05, "loss": 0.3037, "step": 6894 }, { "epoch": 1.1227863046044864, "grad_norm": 0.10575257986783981, "learning_rate": 3.939798831091236e-05, "loss": 0.3152, "step": 6895 }, { "epoch": 1.1229491511623173, "grad_norm": 0.12589676678180695, "learning_rate": 3.939411550714671e-05, "loss": 0.3409, "step": 6896 }, { "epoch": 1.1231119977201482, "grad_norm": 0.10160799324512482, "learning_rate": 3.9390242186589184e-05, "loss": 0.3023, "step": 6897 }, { "epoch": 1.123274844277979, "grad_norm": 0.2008846253156662, "learning_rate": 3.9386368349378856e-05, "loss": 0.332, "step": 6898 }, { "epoch": 1.12343769083581, "grad_norm": 0.16494044661521912, "learning_rate": 3.938249399565479e-05, "loss": 0.2931, "step": 6899 }, { "epoch": 1.1236005373936409, "grad_norm": 0.11727753281593323, "learning_rate": 3.93786191255561e-05, "loss": 0.324, "step": 6900 }, { "epoch": 1.1237633839514718, "grad_norm": 0.09597581624984741, "learning_rate": 3.9374743739221915e-05, "loss": 0.3114, "step": 6901 }, { "epoch": 1.1239262305093025, "grad_norm": 0.1029372289776802, "learning_rate": 3.937086783679136e-05, "loss": 0.2643, "step": 6902 }, { "epoch": 1.1240890770671335, "grad_norm": 0.09369345754384995, "learning_rate": 3.9366991418403586e-05, "loss": 0.3674, "step": 6903 }, { "epoch": 1.1242519236249644, "grad_norm": 0.10343486070632935, "learning_rate": 3.936311448419778e-05, "loss": 0.3024, "step": 6904 }, { "epoch": 1.1244147701827953, "grad_norm": 0.16255588829517365, "learning_rate": 3.9359237034313124e-05, "loss": 0.339, "step": 6905 }, { "epoch": 1.124577616740626, "grad_norm": 0.14149658381938934, "learning_rate": 3.9355359068888845e-05, "loss": 0.3077, "step": 6906 }, { "epoch": 1.124740463298457, "grad_norm": 0.11133017390966415, "learning_rate": 3.935148058806416e-05, "loss": 0.3478, "step": 6907 }, { "epoch": 1.124903309856288, "grad_norm": 0.12530481815338135, "learning_rate": 3.934760159197833e-05, "loss": 0.3114, "step": 6908 }, { "epoch": 1.125066156414119, "grad_norm": 0.17584837973117828, "learning_rate": 3.9343722080770617e-05, "loss": 0.3422, "step": 6909 }, { "epoch": 1.1252290029719496, "grad_norm": 0.11204204708337784, "learning_rate": 3.93398420545803e-05, "loss": 0.2869, "step": 6910 }, { "epoch": 1.1253918495297806, "grad_norm": 0.13424737751483917, "learning_rate": 3.9335961513546695e-05, "loss": 0.2933, "step": 6911 }, { "epoch": 1.1255546960876115, "grad_norm": 0.1892455369234085, "learning_rate": 3.933208045780912e-05, "loss": 0.3614, "step": 6912 }, { "epoch": 1.1257175426454422, "grad_norm": 0.12023527175188065, "learning_rate": 3.932819888750691e-05, "loss": 0.3067, "step": 6913 }, { "epoch": 1.1258803892032732, "grad_norm": 0.15132339298725128, "learning_rate": 3.9324316802779436e-05, "loss": 0.3471, "step": 6914 }, { "epoch": 1.126043235761104, "grad_norm": 0.09699384868144989, "learning_rate": 3.932043420376607e-05, "loss": 0.3315, "step": 6915 }, { "epoch": 1.126206082318935, "grad_norm": 0.11628855764865875, "learning_rate": 3.9316551090606215e-05, "loss": 0.2708, "step": 6916 }, { "epoch": 1.126368928876766, "grad_norm": 0.12989486753940582, "learning_rate": 3.9312667463439275e-05, "loss": 0.3102, "step": 6917 }, { "epoch": 1.1265317754345967, "grad_norm": 0.15015433728694916, "learning_rate": 3.93087833224047e-05, "loss": 0.3087, "step": 6918 }, { "epoch": 1.1266946219924276, "grad_norm": 0.13037118315696716, "learning_rate": 3.9304898667641925e-05, "loss": 0.3139, "step": 6919 }, { "epoch": 1.1268574685502586, "grad_norm": 0.15924692153930664, "learning_rate": 3.930101349929044e-05, "loss": 0.286, "step": 6920 }, { "epoch": 1.1270203151080893, "grad_norm": 0.15714262425899506, "learning_rate": 3.929712781748971e-05, "loss": 0.3305, "step": 6921 }, { "epoch": 1.1271831616659203, "grad_norm": 0.09208054840564728, "learning_rate": 3.929324162237927e-05, "loss": 0.3058, "step": 6922 }, { "epoch": 1.1273460082237512, "grad_norm": 0.15922829508781433, "learning_rate": 3.928935491409862e-05, "loss": 0.3341, "step": 6923 }, { "epoch": 1.1275088547815821, "grad_norm": 0.18556807935237885, "learning_rate": 3.9285467692787324e-05, "loss": 0.3419, "step": 6924 }, { "epoch": 1.1276717013394129, "grad_norm": 0.11815615743398666, "learning_rate": 3.928157995858493e-05, "loss": 0.2991, "step": 6925 }, { "epoch": 1.1278345478972438, "grad_norm": 0.08374461531639099, "learning_rate": 3.927769171163103e-05, "loss": 0.3005, "step": 6926 }, { "epoch": 1.1279973944550747, "grad_norm": 0.16853271424770355, "learning_rate": 3.9273802952065225e-05, "loss": 0.2923, "step": 6927 }, { "epoch": 1.1281602410129057, "grad_norm": 0.15949228405952454, "learning_rate": 3.9269913680027124e-05, "loss": 0.3391, "step": 6928 }, { "epoch": 1.1283230875707364, "grad_norm": 0.16468095779418945, "learning_rate": 3.9266023895656375e-05, "loss": 0.3054, "step": 6929 }, { "epoch": 1.1284859341285673, "grad_norm": 0.136072039604187, "learning_rate": 3.926213359909262e-05, "loss": 0.3047, "step": 6930 }, { "epoch": 1.1286487806863983, "grad_norm": 0.14906154572963715, "learning_rate": 3.925824279047554e-05, "loss": 0.3153, "step": 6931 }, { "epoch": 1.1288116272442292, "grad_norm": 0.08984553068876266, "learning_rate": 3.9254351469944824e-05, "loss": 0.3083, "step": 6932 }, { "epoch": 1.12897447380206, "grad_norm": 0.09347441047430038, "learning_rate": 3.925045963764018e-05, "loss": 0.3215, "step": 6933 }, { "epoch": 1.129137320359891, "grad_norm": 0.11522213369607925, "learning_rate": 3.9246567293701344e-05, "loss": 0.2931, "step": 6934 }, { "epoch": 1.1293001669177218, "grad_norm": 0.10876055806875229, "learning_rate": 3.924267443826806e-05, "loss": 0.3412, "step": 6935 }, { "epoch": 1.1294630134755526, "grad_norm": 0.14858239889144897, "learning_rate": 3.923878107148008e-05, "loss": 0.325, "step": 6936 }, { "epoch": 1.1296258600333835, "grad_norm": 0.09885525703430176, "learning_rate": 3.923488719347721e-05, "loss": 0.3109, "step": 6937 }, { "epoch": 1.1297887065912144, "grad_norm": 0.11990194767713547, "learning_rate": 3.923099280439924e-05, "loss": 0.3219, "step": 6938 }, { "epoch": 1.1299515531490454, "grad_norm": 0.11453370004892349, "learning_rate": 3.922709790438599e-05, "loss": 0.2909, "step": 6939 }, { "epoch": 1.130114399706876, "grad_norm": 0.1330733597278595, "learning_rate": 3.9223202493577306e-05, "loss": 0.3606, "step": 6940 }, { "epoch": 1.130277246264707, "grad_norm": 0.08890077471733093, "learning_rate": 3.9219306572113024e-05, "loss": 0.2731, "step": 6941 }, { "epoch": 1.130440092822538, "grad_norm": 0.14126019179821014, "learning_rate": 3.921541014013305e-05, "loss": 0.3068, "step": 6942 }, { "epoch": 1.130602939380369, "grad_norm": 0.1352255940437317, "learning_rate": 3.921151319777725e-05, "loss": 0.328, "step": 6943 }, { "epoch": 1.1307657859381997, "grad_norm": 0.13450373709201813, "learning_rate": 3.9207615745185545e-05, "loss": 0.329, "step": 6944 }, { "epoch": 1.1309286324960306, "grad_norm": 0.11380720138549805, "learning_rate": 3.920371778249787e-05, "loss": 0.3044, "step": 6945 }, { "epoch": 1.1310914790538615, "grad_norm": 0.12661829590797424, "learning_rate": 3.919981930985418e-05, "loss": 0.3303, "step": 6946 }, { "epoch": 1.1312543256116925, "grad_norm": 0.15455110371112823, "learning_rate": 3.9195920327394426e-05, "loss": 0.3343, "step": 6947 }, { "epoch": 1.1314171721695232, "grad_norm": 0.1856488287448883, "learning_rate": 3.91920208352586e-05, "loss": 0.3109, "step": 6948 }, { "epoch": 1.1315800187273541, "grad_norm": 0.09564864635467529, "learning_rate": 3.9188120833586706e-05, "loss": 0.2982, "step": 6949 }, { "epoch": 1.131742865285185, "grad_norm": 0.15890638530254364, "learning_rate": 3.9184220322518763e-05, "loss": 0.3202, "step": 6950 }, { "epoch": 1.1319057118430158, "grad_norm": 0.1094525083899498, "learning_rate": 3.918031930219483e-05, "loss": 0.297, "step": 6951 }, { "epoch": 1.1320685584008467, "grad_norm": 0.11204992234706879, "learning_rate": 3.917641777275493e-05, "loss": 0.3096, "step": 6952 }, { "epoch": 1.1322314049586777, "grad_norm": 0.11550881713628769, "learning_rate": 3.917251573433916e-05, "loss": 0.2795, "step": 6953 }, { "epoch": 1.1323942515165086, "grad_norm": 0.09957069903612137, "learning_rate": 3.9168613187087615e-05, "loss": 0.325, "step": 6954 }, { "epoch": 1.1325570980743396, "grad_norm": 0.08625872433185577, "learning_rate": 3.9164710131140414e-05, "loss": 0.2987, "step": 6955 }, { "epoch": 1.1327199446321703, "grad_norm": 0.16601094603538513, "learning_rate": 3.916080656663767e-05, "loss": 0.3427, "step": 6956 }, { "epoch": 1.1328827911900012, "grad_norm": 0.16818919777870178, "learning_rate": 3.915690249371955e-05, "loss": 0.347, "step": 6957 }, { "epoch": 1.1330456377478322, "grad_norm": 0.1145259216427803, "learning_rate": 3.9152997912526214e-05, "loss": 0.3659, "step": 6958 }, { "epoch": 1.133208484305663, "grad_norm": 0.12519913911819458, "learning_rate": 3.9149092823197846e-05, "loss": 0.3323, "step": 6959 }, { "epoch": 1.1333713308634938, "grad_norm": 0.11996486783027649, "learning_rate": 3.9145187225874654e-05, "loss": 0.3291, "step": 6960 }, { "epoch": 1.1335341774213248, "grad_norm": 0.12048550695180893, "learning_rate": 3.9141281120696874e-05, "loss": 0.3263, "step": 6961 }, { "epoch": 1.1336970239791557, "grad_norm": 0.16503140330314636, "learning_rate": 3.913737450780471e-05, "loss": 0.3226, "step": 6962 }, { "epoch": 1.1338598705369864, "grad_norm": 0.1422426551580429, "learning_rate": 3.913346738733846e-05, "loss": 0.3038, "step": 6963 }, { "epoch": 1.1340227170948174, "grad_norm": 0.9663782715797424, "learning_rate": 3.9129559759438384e-05, "loss": 0.3594, "step": 6964 }, { "epoch": 1.1341855636526483, "grad_norm": 0.07102137804031372, "learning_rate": 3.912565162424478e-05, "loss": 0.288, "step": 6965 }, { "epoch": 1.134348410210479, "grad_norm": 0.11011911183595657, "learning_rate": 3.912174298189796e-05, "loss": 0.3241, "step": 6966 }, { "epoch": 1.13451125676831, "grad_norm": 0.10487821698188782, "learning_rate": 3.911783383253825e-05, "loss": 0.3244, "step": 6967 }, { "epoch": 1.134674103326141, "grad_norm": 0.07863858342170715, "learning_rate": 3.9113924176306016e-05, "loss": 0.3003, "step": 6968 }, { "epoch": 1.1348369498839719, "grad_norm": 0.10542497038841248, "learning_rate": 3.9110014013341624e-05, "loss": 0.3346, "step": 6969 }, { "epoch": 1.1349997964418028, "grad_norm": 0.10393291711807251, "learning_rate": 3.910610334378544e-05, "loss": 0.3476, "step": 6970 }, { "epoch": 1.1351626429996335, "grad_norm": 0.1194852739572525, "learning_rate": 3.910219216777789e-05, "loss": 0.3576, "step": 6971 }, { "epoch": 1.1353254895574645, "grad_norm": 0.06887712329626083, "learning_rate": 3.909828048545938e-05, "loss": 0.2909, "step": 6972 }, { "epoch": 1.1354883361152954, "grad_norm": 0.12452398240566254, "learning_rate": 3.909436829697038e-05, "loss": 0.3351, "step": 6973 }, { "epoch": 1.1356511826731261, "grad_norm": 0.0686010867357254, "learning_rate": 3.9090455602451314e-05, "loss": 0.3217, "step": 6974 }, { "epoch": 1.135814029230957, "grad_norm": 0.22637218236923218, "learning_rate": 3.908654240204268e-05, "loss": 0.3458, "step": 6975 }, { "epoch": 1.135976875788788, "grad_norm": 0.130682110786438, "learning_rate": 3.9082628695884974e-05, "loss": 0.3305, "step": 6976 }, { "epoch": 1.136139722346619, "grad_norm": 0.09293036162853241, "learning_rate": 3.9078714484118693e-05, "loss": 0.3642, "step": 6977 }, { "epoch": 1.1363025689044497, "grad_norm": 0.1217336654663086, "learning_rate": 3.90747997668844e-05, "loss": 0.3094, "step": 6978 }, { "epoch": 1.1364654154622806, "grad_norm": 0.10100988298654556, "learning_rate": 3.907088454432261e-05, "loss": 0.3048, "step": 6979 }, { "epoch": 1.1366282620201116, "grad_norm": 0.1906188577413559, "learning_rate": 3.9066968816573917e-05, "loss": 0.3096, "step": 6980 }, { "epoch": 1.1367911085779425, "grad_norm": 0.08514397591352463, "learning_rate": 3.906305258377889e-05, "loss": 0.3138, "step": 6981 }, { "epoch": 1.1369539551357732, "grad_norm": 0.08941109478473663, "learning_rate": 3.905913584607815e-05, "loss": 0.3137, "step": 6982 }, { "epoch": 1.1371168016936042, "grad_norm": 0.11329153180122375, "learning_rate": 3.90552186036123e-05, "loss": 0.3412, "step": 6983 }, { "epoch": 1.1372796482514351, "grad_norm": 0.23673243820667267, "learning_rate": 3.9051300856521996e-05, "loss": 0.3226, "step": 6984 }, { "epoch": 1.137442494809266, "grad_norm": 0.15360113978385925, "learning_rate": 3.90473826049479e-05, "loss": 0.3427, "step": 6985 }, { "epoch": 1.1376053413670968, "grad_norm": 0.08536788076162338, "learning_rate": 3.904346384903068e-05, "loss": 0.2974, "step": 6986 }, { "epoch": 1.1377681879249277, "grad_norm": 0.08695284277200699, "learning_rate": 3.9039544588911025e-05, "loss": 0.3443, "step": 6987 }, { "epoch": 1.1379310344827587, "grad_norm": 0.09242796897888184, "learning_rate": 3.903562482472966e-05, "loss": 0.3123, "step": 6988 }, { "epoch": 1.1380938810405894, "grad_norm": 0.1747828871011734, "learning_rate": 3.903170455662731e-05, "loss": 0.3347, "step": 6989 }, { "epoch": 1.1382567275984203, "grad_norm": 0.10430700331926346, "learning_rate": 3.902778378474472e-05, "loss": 0.3101, "step": 6990 }, { "epoch": 1.1384195741562513, "grad_norm": 0.11023345589637756, "learning_rate": 3.9023862509222676e-05, "loss": 0.2881, "step": 6991 }, { "epoch": 1.1385824207140822, "grad_norm": 0.10646756738424301, "learning_rate": 3.901994073020194e-05, "loss": 0.2974, "step": 6992 }, { "epoch": 1.1387452672719132, "grad_norm": 0.1280306577682495, "learning_rate": 3.901601844782334e-05, "loss": 0.313, "step": 6993 }, { "epoch": 1.1389081138297439, "grad_norm": 0.1565985232591629, "learning_rate": 3.901209566222768e-05, "loss": 0.3057, "step": 6994 }, { "epoch": 1.1390709603875748, "grad_norm": 0.1372694969177246, "learning_rate": 3.9008172373555794e-05, "loss": 0.2942, "step": 6995 }, { "epoch": 1.1392338069454058, "grad_norm": 0.11542823910713196, "learning_rate": 3.900424858194856e-05, "loss": 0.3088, "step": 6996 }, { "epoch": 1.1393966535032365, "grad_norm": 0.07710174471139908, "learning_rate": 3.900032428754685e-05, "loss": 0.3287, "step": 6997 }, { "epoch": 1.1395595000610674, "grad_norm": 0.09584127366542816, "learning_rate": 3.8996399490491534e-05, "loss": 0.2895, "step": 6998 }, { "epoch": 1.1397223466188984, "grad_norm": 0.1250450164079666, "learning_rate": 3.8992474190923556e-05, "loss": 0.317, "step": 6999 }, { "epoch": 1.1398851931767293, "grad_norm": 0.13058282434940338, "learning_rate": 3.898854838898383e-05, "loss": 0.3357, "step": 7000 }, { "epoch": 1.14004803973456, "grad_norm": 0.10767976194620132, "learning_rate": 3.898462208481331e-05, "loss": 0.2977, "step": 7001 }, { "epoch": 1.140210886292391, "grad_norm": 0.0906643271446228, "learning_rate": 3.898069527855294e-05, "loss": 0.2994, "step": 7002 }, { "epoch": 1.140373732850222, "grad_norm": 0.14453737437725067, "learning_rate": 3.897676797034374e-05, "loss": 0.3413, "step": 7003 }, { "epoch": 1.1405365794080526, "grad_norm": 0.16285941004753113, "learning_rate": 3.8972840160326684e-05, "loss": 0.2701, "step": 7004 }, { "epoch": 1.1406994259658836, "grad_norm": 0.09163069725036621, "learning_rate": 3.896891184864281e-05, "loss": 0.3204, "step": 7005 }, { "epoch": 1.1408622725237145, "grad_norm": 0.10477344691753387, "learning_rate": 3.896498303543315e-05, "loss": 0.353, "step": 7006 }, { "epoch": 1.1410251190815455, "grad_norm": 0.15659040212631226, "learning_rate": 3.896105372083875e-05, "loss": 0.3489, "step": 7007 }, { "epoch": 1.1411879656393764, "grad_norm": 0.1175454631447792, "learning_rate": 3.89571239050007e-05, "loss": 0.3299, "step": 7008 }, { "epoch": 1.1413508121972071, "grad_norm": 0.14087878167629242, "learning_rate": 3.8953193588060083e-05, "loss": 0.2903, "step": 7009 }, { "epoch": 1.141513658755038, "grad_norm": 0.11981191486120224, "learning_rate": 3.8949262770158005e-05, "loss": 0.3526, "step": 7010 }, { "epoch": 1.141676505312869, "grad_norm": 0.12631337344646454, "learning_rate": 3.894533145143561e-05, "loss": 0.3248, "step": 7011 }, { "epoch": 1.1418393518706997, "grad_norm": 0.1524263620376587, "learning_rate": 3.894139963203403e-05, "loss": 0.3199, "step": 7012 }, { "epoch": 1.1420021984285307, "grad_norm": 0.08731228113174438, "learning_rate": 3.8937467312094433e-05, "loss": 0.3084, "step": 7013 }, { "epoch": 1.1421650449863616, "grad_norm": 0.14469316601753235, "learning_rate": 3.8933534491758004e-05, "loss": 0.3271, "step": 7014 }, { "epoch": 1.1423278915441926, "grad_norm": 0.104697085916996, "learning_rate": 3.892960117116594e-05, "loss": 0.2923, "step": 7015 }, { "epoch": 1.1424907381020233, "grad_norm": 0.14878949522972107, "learning_rate": 3.892566735045946e-05, "loss": 0.3075, "step": 7016 }, { "epoch": 1.1426535846598542, "grad_norm": 0.10231350362300873, "learning_rate": 3.892173302977981e-05, "loss": 0.327, "step": 7017 }, { "epoch": 1.1428164312176852, "grad_norm": 0.09934926778078079, "learning_rate": 3.891779820926822e-05, "loss": 0.2768, "step": 7018 }, { "epoch": 1.1429792777755161, "grad_norm": 0.08693882077932358, "learning_rate": 3.891386288906598e-05, "loss": 0.317, "step": 7019 }, { "epoch": 1.1431421243333468, "grad_norm": 0.07669974863529205, "learning_rate": 3.8909927069314365e-05, "loss": 0.2912, "step": 7020 }, { "epoch": 1.1433049708911778, "grad_norm": 0.09755321592092514, "learning_rate": 3.890599075015471e-05, "loss": 0.3025, "step": 7021 }, { "epoch": 1.1434678174490087, "grad_norm": 0.15520842373371124, "learning_rate": 3.890205393172831e-05, "loss": 0.323, "step": 7022 }, { "epoch": 1.1436306640068397, "grad_norm": 0.14523179829120636, "learning_rate": 3.8898116614176526e-05, "loss": 0.3046, "step": 7023 }, { "epoch": 1.1437935105646704, "grad_norm": 0.14743779599666595, "learning_rate": 3.8894178797640726e-05, "loss": 0.3377, "step": 7024 }, { "epoch": 1.1439563571225013, "grad_norm": 0.13978999853134155, "learning_rate": 3.8890240482262266e-05, "loss": 0.3116, "step": 7025 }, { "epoch": 1.1441192036803323, "grad_norm": 0.10240757465362549, "learning_rate": 3.888630166818256e-05, "loss": 0.3359, "step": 7026 }, { "epoch": 1.144282050238163, "grad_norm": 0.10287371277809143, "learning_rate": 3.888236235554302e-05, "loss": 0.314, "step": 7027 }, { "epoch": 1.144444896795994, "grad_norm": 0.08958294987678528, "learning_rate": 3.887842254448508e-05, "loss": 0.3377, "step": 7028 }, { "epoch": 1.1446077433538249, "grad_norm": 0.11057340353727341, "learning_rate": 3.887448223515019e-05, "loss": 0.2926, "step": 7029 }, { "epoch": 1.1447705899116558, "grad_norm": 0.14335180819034576, "learning_rate": 3.8870541427679815e-05, "loss": 0.3279, "step": 7030 }, { "epoch": 1.1449334364694865, "grad_norm": 0.20362848043441772, "learning_rate": 3.8866600122215455e-05, "loss": 0.3237, "step": 7031 }, { "epoch": 1.1450962830273175, "grad_norm": 0.11840026080608368, "learning_rate": 3.8862658318898596e-05, "loss": 0.3028, "step": 7032 }, { "epoch": 1.1452591295851484, "grad_norm": 0.07399740815162659, "learning_rate": 3.8858716017870774e-05, "loss": 0.316, "step": 7033 }, { "epoch": 1.1454219761429794, "grad_norm": 0.1830359548330307, "learning_rate": 3.885477321927352e-05, "loss": 0.3005, "step": 7034 }, { "epoch": 1.14558482270081, "grad_norm": 0.1377776861190796, "learning_rate": 3.8850829923248406e-05, "loss": 0.3108, "step": 7035 }, { "epoch": 1.145747669258641, "grad_norm": 0.08761931210756302, "learning_rate": 3.8846886129936986e-05, "loss": 0.3083, "step": 7036 }, { "epoch": 1.145910515816472, "grad_norm": 0.13985353708267212, "learning_rate": 3.884294183948088e-05, "loss": 0.3184, "step": 7037 }, { "epoch": 1.146073362374303, "grad_norm": 0.08094584196805954, "learning_rate": 3.8838997052021684e-05, "loss": 0.3102, "step": 7038 }, { "epoch": 1.1462362089321336, "grad_norm": 0.08825278282165527, "learning_rate": 3.883505176770103e-05, "loss": 0.307, "step": 7039 }, { "epoch": 1.1463990554899646, "grad_norm": 0.1000048816204071, "learning_rate": 3.8831105986660564e-05, "loss": 0.317, "step": 7040 }, { "epoch": 1.1465619020477955, "grad_norm": 0.113631971180439, "learning_rate": 3.882715970904196e-05, "loss": 0.3411, "step": 7041 }, { "epoch": 1.1467247486056262, "grad_norm": 0.08115994185209274, "learning_rate": 3.8823212934986894e-05, "loss": 0.3215, "step": 7042 }, { "epoch": 1.1468875951634572, "grad_norm": 0.10683317482471466, "learning_rate": 3.881926566463706e-05, "loss": 0.3102, "step": 7043 }, { "epoch": 1.1470504417212881, "grad_norm": 0.13150818645954132, "learning_rate": 3.8815317898134195e-05, "loss": 0.3032, "step": 7044 }, { "epoch": 1.147213288279119, "grad_norm": 0.10263355821371078, "learning_rate": 3.881136963562002e-05, "loss": 0.317, "step": 7045 }, { "epoch": 1.14737613483695, "grad_norm": 0.14015617966651917, "learning_rate": 3.88074208772363e-05, "loss": 0.3156, "step": 7046 }, { "epoch": 1.1475389813947807, "grad_norm": 0.07787293940782547, "learning_rate": 3.88034716231248e-05, "loss": 0.3027, "step": 7047 }, { "epoch": 1.1477018279526117, "grad_norm": 0.09959712624549866, "learning_rate": 3.879952187342731e-05, "loss": 0.3288, "step": 7048 }, { "epoch": 1.1478646745104426, "grad_norm": 0.2091330885887146, "learning_rate": 3.8795571628285644e-05, "loss": 0.371, "step": 7049 }, { "epoch": 1.1480275210682733, "grad_norm": 0.11069151759147644, "learning_rate": 3.8791620887841624e-05, "loss": 0.3155, "step": 7050 }, { "epoch": 1.1481903676261043, "grad_norm": 0.14172083139419556, "learning_rate": 3.878766965223709e-05, "loss": 0.342, "step": 7051 }, { "epoch": 1.1483532141839352, "grad_norm": 0.1143002137541771, "learning_rate": 3.8783717921613906e-05, "loss": 0.343, "step": 7052 }, { "epoch": 1.1485160607417662, "grad_norm": 0.12926647067070007, "learning_rate": 3.877976569611396e-05, "loss": 0.3067, "step": 7053 }, { "epoch": 1.1486789072995969, "grad_norm": 0.099985271692276, "learning_rate": 3.8775812975879135e-05, "loss": 0.3353, "step": 7054 }, { "epoch": 1.1488417538574278, "grad_norm": 0.11950940638780594, "learning_rate": 3.8771859761051346e-05, "loss": 0.3338, "step": 7055 }, { "epoch": 1.1490046004152588, "grad_norm": 0.13964582979679108, "learning_rate": 3.8767906051772526e-05, "loss": 0.3046, "step": 7056 }, { "epoch": 1.1491674469730897, "grad_norm": 0.11814512312412262, "learning_rate": 3.8763951848184644e-05, "loss": 0.29, "step": 7057 }, { "epoch": 1.1493302935309204, "grad_norm": 0.1129593625664711, "learning_rate": 3.875999715042964e-05, "loss": 0.3128, "step": 7058 }, { "epoch": 1.1494931400887514, "grad_norm": 0.1202833503484726, "learning_rate": 3.8756041958649526e-05, "loss": 0.3238, "step": 7059 }, { "epoch": 1.1496559866465823, "grad_norm": 0.13753758370876312, "learning_rate": 3.875208627298628e-05, "loss": 0.3236, "step": 7060 }, { "epoch": 1.1498188332044132, "grad_norm": 0.1534689962863922, "learning_rate": 3.874813009358193e-05, "loss": 0.3108, "step": 7061 }, { "epoch": 1.149981679762244, "grad_norm": 0.1436232477426529, "learning_rate": 3.874417342057854e-05, "loss": 0.319, "step": 7062 }, { "epoch": 1.150144526320075, "grad_norm": 0.12764814496040344, "learning_rate": 3.874021625411812e-05, "loss": 0.2837, "step": 7063 }, { "epoch": 1.1503073728779059, "grad_norm": 0.10413126647472382, "learning_rate": 3.873625859434279e-05, "loss": 0.2904, "step": 7064 }, { "epoch": 1.1504702194357366, "grad_norm": 0.12535396218299866, "learning_rate": 3.873230044139461e-05, "loss": 0.297, "step": 7065 }, { "epoch": 1.1506330659935675, "grad_norm": 0.12932325899600983, "learning_rate": 3.872834179541571e-05, "loss": 0.3371, "step": 7066 }, { "epoch": 1.1507959125513985, "grad_norm": 0.13138914108276367, "learning_rate": 3.8724382656548196e-05, "loss": 0.3152, "step": 7067 }, { "epoch": 1.1509587591092294, "grad_norm": 0.09256550669670105, "learning_rate": 3.872042302493424e-05, "loss": 0.3294, "step": 7068 }, { "epoch": 1.1511216056670601, "grad_norm": 0.12651127576828003, "learning_rate": 3.8716462900715975e-05, "loss": 0.3024, "step": 7069 }, { "epoch": 1.151284452224891, "grad_norm": 0.10792329907417297, "learning_rate": 3.8712502284035604e-05, "loss": 0.3355, "step": 7070 }, { "epoch": 1.151447298782722, "grad_norm": 0.12013090401887894, "learning_rate": 3.8708541175035316e-05, "loss": 0.3496, "step": 7071 }, { "epoch": 1.151610145340553, "grad_norm": 0.1490122526884079, "learning_rate": 3.8704579573857325e-05, "loss": 0.314, "step": 7072 }, { "epoch": 1.1517729918983837, "grad_norm": 0.08041221648454666, "learning_rate": 3.870061748064387e-05, "loss": 0.2902, "step": 7073 }, { "epoch": 1.1519358384562146, "grad_norm": 0.15334996581077576, "learning_rate": 3.8696654895537204e-05, "loss": 0.3178, "step": 7074 }, { "epoch": 1.1520986850140456, "grad_norm": 0.1840873807668686, "learning_rate": 3.8692691818679595e-05, "loss": 0.3252, "step": 7075 }, { "epoch": 1.1522615315718765, "grad_norm": 0.13352781534194946, "learning_rate": 3.8688728250213313e-05, "loss": 0.3313, "step": 7076 }, { "epoch": 1.1524243781297072, "grad_norm": 0.17757020890712738, "learning_rate": 3.8684764190280674e-05, "loss": 0.352, "step": 7077 }, { "epoch": 1.1525872246875382, "grad_norm": 0.10559951514005661, "learning_rate": 3.8680799639024005e-05, "loss": 0.3358, "step": 7078 }, { "epoch": 1.152750071245369, "grad_norm": 0.1460423320531845, "learning_rate": 3.867683459658564e-05, "loss": 0.3149, "step": 7079 }, { "epoch": 1.1529129178031998, "grad_norm": 0.12975461781024933, "learning_rate": 3.8672869063107934e-05, "loss": 0.2759, "step": 7080 }, { "epoch": 1.1530757643610308, "grad_norm": 0.12399507313966751, "learning_rate": 3.8668903038733265e-05, "loss": 0.3708, "step": 7081 }, { "epoch": 1.1532386109188617, "grad_norm": 0.09621056914329529, "learning_rate": 3.866493652360403e-05, "loss": 0.3262, "step": 7082 }, { "epoch": 1.1534014574766926, "grad_norm": 0.10796943306922913, "learning_rate": 3.8660969517862625e-05, "loss": 0.3368, "step": 7083 }, { "epoch": 1.1535643040345236, "grad_norm": 0.15549740195274353, "learning_rate": 3.865700202165149e-05, "loss": 0.2833, "step": 7084 }, { "epoch": 1.1537271505923543, "grad_norm": 0.10274054855108261, "learning_rate": 3.865303403511306e-05, "loss": 0.3023, "step": 7085 }, { "epoch": 1.1538899971501853, "grad_norm": 0.09870759397745132, "learning_rate": 3.864906555838981e-05, "loss": 0.3264, "step": 7086 }, { "epoch": 1.1540528437080162, "grad_norm": 0.09240368753671646, "learning_rate": 3.86450965916242e-05, "loss": 0.2563, "step": 7087 }, { "epoch": 1.154215690265847, "grad_norm": 0.11079588532447815, "learning_rate": 3.864112713495875e-05, "loss": 0.3227, "step": 7088 }, { "epoch": 1.1543785368236779, "grad_norm": 0.11244133859872818, "learning_rate": 3.863715718853596e-05, "loss": 0.3034, "step": 7089 }, { "epoch": 1.1545413833815088, "grad_norm": 0.08062413334846497, "learning_rate": 3.8633186752498375e-05, "loss": 0.2921, "step": 7090 }, { "epoch": 1.1547042299393397, "grad_norm": 0.08691763877868652, "learning_rate": 3.8629215826988536e-05, "loss": 0.278, "step": 7091 }, { "epoch": 1.1548670764971705, "grad_norm": 0.12637516856193542, "learning_rate": 3.862524441214901e-05, "loss": 0.319, "step": 7092 }, { "epoch": 1.1550299230550014, "grad_norm": 0.14672048389911652, "learning_rate": 3.86212725081224e-05, "loss": 0.3526, "step": 7093 }, { "epoch": 1.1551927696128323, "grad_norm": 0.1535833328962326, "learning_rate": 3.8617300115051286e-05, "loss": 0.3383, "step": 7094 }, { "epoch": 1.155355616170663, "grad_norm": 0.12750287353992462, "learning_rate": 3.8613327233078306e-05, "loss": 0.291, "step": 7095 }, { "epoch": 1.155518462728494, "grad_norm": 0.08250446617603302, "learning_rate": 3.8609353862346085e-05, "loss": 0.305, "step": 7096 }, { "epoch": 1.155681309286325, "grad_norm": 0.11449582129716873, "learning_rate": 3.860538000299729e-05, "loss": 0.3037, "step": 7097 }, { "epoch": 1.155844155844156, "grad_norm": 0.11172791570425034, "learning_rate": 3.86014056551746e-05, "loss": 0.3132, "step": 7098 }, { "epoch": 1.1560070024019868, "grad_norm": 0.10056537389755249, "learning_rate": 3.859743081902067e-05, "loss": 0.3046, "step": 7099 }, { "epoch": 1.1561698489598176, "grad_norm": 0.09656990319490433, "learning_rate": 3.859345549467825e-05, "loss": 0.3214, "step": 7100 }, { "epoch": 1.1563326955176485, "grad_norm": 0.17246176302433014, "learning_rate": 3.858947968229005e-05, "loss": 0.3257, "step": 7101 }, { "epoch": 1.1564955420754794, "grad_norm": 0.1478244662284851, "learning_rate": 3.858550338199882e-05, "loss": 0.3352, "step": 7102 }, { "epoch": 1.1566583886333102, "grad_norm": 0.09830132871866226, "learning_rate": 3.858152659394731e-05, "loss": 0.3073, "step": 7103 }, { "epoch": 1.156821235191141, "grad_norm": 0.10066094994544983, "learning_rate": 3.8577549318278304e-05, "loss": 0.3138, "step": 7104 }, { "epoch": 1.156984081748972, "grad_norm": 0.12107227742671967, "learning_rate": 3.85735715551346e-05, "loss": 0.2847, "step": 7105 }, { "epoch": 1.157146928306803, "grad_norm": 0.09370478242635727, "learning_rate": 3.8569593304659006e-05, "loss": 0.2799, "step": 7106 }, { "epoch": 1.1573097748646337, "grad_norm": 0.10464292764663696, "learning_rate": 3.8565614566994355e-05, "loss": 0.2905, "step": 7107 }, { "epoch": 1.1574726214224647, "grad_norm": 0.15487521886825562, "learning_rate": 3.856163534228351e-05, "loss": 0.3357, "step": 7108 }, { "epoch": 1.1576354679802956, "grad_norm": 0.18124467134475708, "learning_rate": 3.8557655630669306e-05, "loss": 0.3137, "step": 7109 }, { "epoch": 1.1577983145381265, "grad_norm": 0.09607841074466705, "learning_rate": 3.855367543229466e-05, "loss": 0.2876, "step": 7110 }, { "epoch": 1.1579611610959573, "grad_norm": 0.2638610303401947, "learning_rate": 3.854969474730245e-05, "loss": 0.3286, "step": 7111 }, { "epoch": 1.1581240076537882, "grad_norm": 0.21794462203979492, "learning_rate": 3.8545713575835605e-05, "loss": 0.3282, "step": 7112 }, { "epoch": 1.1582868542116191, "grad_norm": 0.11306150257587433, "learning_rate": 3.854173191803706e-05, "loss": 0.3256, "step": 7113 }, { "epoch": 1.15844970076945, "grad_norm": 0.11756451427936554, "learning_rate": 3.8537749774049766e-05, "loss": 0.3401, "step": 7114 }, { "epoch": 1.1586125473272808, "grad_norm": 0.1435081660747528, "learning_rate": 3.85337671440167e-05, "loss": 0.2957, "step": 7115 }, { "epoch": 1.1587753938851117, "grad_norm": 0.15152281522750854, "learning_rate": 3.852978402808084e-05, "loss": 0.3307, "step": 7116 }, { "epoch": 1.1589382404429427, "grad_norm": 0.11190039664506912, "learning_rate": 3.8525800426385194e-05, "loss": 0.3293, "step": 7117 }, { "epoch": 1.1591010870007734, "grad_norm": 0.15782815217971802, "learning_rate": 3.852181633907279e-05, "loss": 0.3137, "step": 7118 }, { "epoch": 1.1592639335586044, "grad_norm": 0.1026601493358612, "learning_rate": 3.851783176628667e-05, "loss": 0.2779, "step": 7119 }, { "epoch": 1.1594267801164353, "grad_norm": 0.1609094738960266, "learning_rate": 3.851384670816989e-05, "loss": 0.3571, "step": 7120 }, { "epoch": 1.1595896266742662, "grad_norm": 0.12155023962259293, "learning_rate": 3.850986116486551e-05, "loss": 0.338, "step": 7121 }, { "epoch": 1.1597524732320972, "grad_norm": 0.0841858759522438, "learning_rate": 3.850587513651666e-05, "loss": 0.3295, "step": 7122 }, { "epoch": 1.159915319789928, "grad_norm": 0.10141344368457794, "learning_rate": 3.850188862326641e-05, "loss": 0.298, "step": 7123 }, { "epoch": 1.1600781663477588, "grad_norm": 0.15712322294712067, "learning_rate": 3.8497901625257915e-05, "loss": 0.3202, "step": 7124 }, { "epoch": 1.1602410129055898, "grad_norm": 0.10732009261846542, "learning_rate": 3.849391414263431e-05, "loss": 0.2823, "step": 7125 }, { "epoch": 1.1604038594634205, "grad_norm": 0.16608715057373047, "learning_rate": 3.848992617553876e-05, "loss": 0.3115, "step": 7126 }, { "epoch": 1.1605667060212514, "grad_norm": 0.1302962452173233, "learning_rate": 3.848593772411444e-05, "loss": 0.2814, "step": 7127 }, { "epoch": 1.1607295525790824, "grad_norm": 0.14489547908306122, "learning_rate": 3.848194878850455e-05, "loss": 0.2894, "step": 7128 }, { "epoch": 1.1608923991369133, "grad_norm": 0.08988313376903534, "learning_rate": 3.847795936885231e-05, "loss": 0.3318, "step": 7129 }, { "epoch": 1.161055245694744, "grad_norm": 0.12031236290931702, "learning_rate": 3.847396946530094e-05, "loss": 0.2676, "step": 7130 }, { "epoch": 1.161218092252575, "grad_norm": 0.16511207818984985, "learning_rate": 3.84699790779937e-05, "loss": 0.2852, "step": 7131 }, { "epoch": 1.161380938810406, "grad_norm": 0.16112130880355835, "learning_rate": 3.8465988207073854e-05, "loss": 0.3269, "step": 7132 }, { "epoch": 1.1615437853682367, "grad_norm": 0.13477030396461487, "learning_rate": 3.84619968526847e-05, "loss": 0.3229, "step": 7133 }, { "epoch": 1.1617066319260676, "grad_norm": 0.10629865527153015, "learning_rate": 3.845800501496951e-05, "loss": 0.3369, "step": 7134 }, { "epoch": 1.1618694784838985, "grad_norm": 0.15451692044734955, "learning_rate": 3.8454012694071625e-05, "loss": 0.3093, "step": 7135 }, { "epoch": 1.1620323250417295, "grad_norm": 0.10700720548629761, "learning_rate": 3.845001989013437e-05, "loss": 0.3117, "step": 7136 }, { "epoch": 1.1621951715995604, "grad_norm": 0.08030398935079575, "learning_rate": 3.844602660330111e-05, "loss": 0.3061, "step": 7137 }, { "epoch": 1.1623580181573911, "grad_norm": 0.12383197247982025, "learning_rate": 3.84420328337152e-05, "loss": 0.3049, "step": 7138 }, { "epoch": 1.162520864715222, "grad_norm": 0.11839985102415085, "learning_rate": 3.8438038581520046e-05, "loss": 0.2838, "step": 7139 }, { "epoch": 1.162683711273053, "grad_norm": 0.1728026121854782, "learning_rate": 3.8434043846859044e-05, "loss": 0.3392, "step": 7140 }, { "epoch": 1.1628465578308838, "grad_norm": 0.12858600914478302, "learning_rate": 3.8430048629875615e-05, "loss": 0.3081, "step": 7141 }, { "epoch": 1.1630094043887147, "grad_norm": 0.09298215061426163, "learning_rate": 3.84260529307132e-05, "loss": 0.2932, "step": 7142 }, { "epoch": 1.1631722509465456, "grad_norm": 0.10778893530368805, "learning_rate": 3.842205674951527e-05, "loss": 0.3177, "step": 7143 }, { "epoch": 1.1633350975043766, "grad_norm": 0.09172570705413818, "learning_rate": 3.841806008642528e-05, "loss": 0.3079, "step": 7144 }, { "epoch": 1.1634979440622073, "grad_norm": 0.11331761628389359, "learning_rate": 3.841406294158674e-05, "loss": 0.2874, "step": 7145 }, { "epoch": 1.1636607906200382, "grad_norm": 0.14184200763702393, "learning_rate": 3.841006531514314e-05, "loss": 0.2936, "step": 7146 }, { "epoch": 1.1638236371778692, "grad_norm": 0.09096471220254898, "learning_rate": 3.840606720723802e-05, "loss": 0.3367, "step": 7147 }, { "epoch": 1.1639864837357001, "grad_norm": 0.1188526526093483, "learning_rate": 3.840206861801493e-05, "loss": 0.2863, "step": 7148 }, { "epoch": 1.1641493302935308, "grad_norm": 0.08100944757461548, "learning_rate": 3.8398069547617415e-05, "loss": 0.3615, "step": 7149 }, { "epoch": 1.1643121768513618, "grad_norm": 0.10013709217309952, "learning_rate": 3.839406999618907e-05, "loss": 0.313, "step": 7150 }, { "epoch": 1.1644750234091927, "grad_norm": 0.1092619001865387, "learning_rate": 3.839006996387348e-05, "loss": 0.3034, "step": 7151 }, { "epoch": 1.1646378699670237, "grad_norm": 0.12734822928905487, "learning_rate": 3.838606945081425e-05, "loss": 0.3145, "step": 7152 }, { "epoch": 1.1648007165248544, "grad_norm": 0.15702207386493683, "learning_rate": 3.838206845715503e-05, "loss": 0.2952, "step": 7153 }, { "epoch": 1.1649635630826853, "grad_norm": 0.16041824221611023, "learning_rate": 3.8378066983039454e-05, "loss": 0.3194, "step": 7154 }, { "epoch": 1.1651264096405163, "grad_norm": 0.13367709517478943, "learning_rate": 3.83740650286112e-05, "loss": 0.3245, "step": 7155 }, { "epoch": 1.165289256198347, "grad_norm": 0.12655013799667358, "learning_rate": 3.8370062594013936e-05, "loss": 0.3401, "step": 7156 }, { "epoch": 1.165452102756178, "grad_norm": 0.24650663137435913, "learning_rate": 3.8366059679391364e-05, "loss": 0.3213, "step": 7157 }, { "epoch": 1.1656149493140089, "grad_norm": 0.08990061283111572, "learning_rate": 3.83620562848872e-05, "loss": 0.3465, "step": 7158 }, { "epoch": 1.1657777958718398, "grad_norm": 0.12480225414037704, "learning_rate": 3.8358052410645195e-05, "loss": 0.3184, "step": 7159 }, { "epoch": 1.1659406424296705, "grad_norm": 0.12063267827033997, "learning_rate": 3.835404805680908e-05, "loss": 0.3014, "step": 7160 }, { "epoch": 1.1661034889875015, "grad_norm": 0.151103213429451, "learning_rate": 3.835004322352264e-05, "loss": 0.3284, "step": 7161 }, { "epoch": 1.1662663355453324, "grad_norm": 0.14362117648124695, "learning_rate": 3.834603791092963e-05, "loss": 0.3086, "step": 7162 }, { "epoch": 1.1664291821031634, "grad_norm": 0.10595999658107758, "learning_rate": 3.8342032119173884e-05, "loss": 0.3129, "step": 7163 }, { "epoch": 1.166592028660994, "grad_norm": 0.10186131298542023, "learning_rate": 3.833802584839921e-05, "loss": 0.3222, "step": 7164 }, { "epoch": 1.166754875218825, "grad_norm": 0.11343657225370407, "learning_rate": 3.8334019098749454e-05, "loss": 0.338, "step": 7165 }, { "epoch": 1.166917721776656, "grad_norm": 0.12334061414003372, "learning_rate": 3.833001187036845e-05, "loss": 0.3099, "step": 7166 }, { "epoch": 1.167080568334487, "grad_norm": 0.1313222348690033, "learning_rate": 3.8326004163400085e-05, "loss": 0.2832, "step": 7167 }, { "epoch": 1.1672434148923176, "grad_norm": 0.13688038289546967, "learning_rate": 3.8321995977988256e-05, "loss": 0.3279, "step": 7168 }, { "epoch": 1.1674062614501486, "grad_norm": 0.2575967013835907, "learning_rate": 3.831798731427685e-05, "loss": 0.2925, "step": 7169 }, { "epoch": 1.1675691080079795, "grad_norm": 0.10216803103685379, "learning_rate": 3.8313978172409795e-05, "loss": 0.3103, "step": 7170 }, { "epoch": 1.1677319545658102, "grad_norm": 0.11761456727981567, "learning_rate": 3.830996855253103e-05, "loss": 0.3385, "step": 7171 }, { "epoch": 1.1678948011236412, "grad_norm": 0.11148444563150406, "learning_rate": 3.8305958454784526e-05, "loss": 0.3103, "step": 7172 }, { "epoch": 1.1680576476814721, "grad_norm": 0.1355520784854889, "learning_rate": 3.830194787931425e-05, "loss": 0.2703, "step": 7173 }, { "epoch": 1.168220494239303, "grad_norm": 0.1947464793920517, "learning_rate": 3.829793682626418e-05, "loss": 0.331, "step": 7174 }, { "epoch": 1.168383340797134, "grad_norm": 0.1259772777557373, "learning_rate": 3.8293925295778346e-05, "loss": 0.3114, "step": 7175 }, { "epoch": 1.1685461873549647, "grad_norm": 0.13824822008609772, "learning_rate": 3.828991328800075e-05, "loss": 0.3022, "step": 7176 }, { "epoch": 1.1687090339127957, "grad_norm": 0.10404198616743088, "learning_rate": 3.8285900803075467e-05, "loss": 0.2937, "step": 7177 }, { "epoch": 1.1688718804706266, "grad_norm": 0.15483640134334564, "learning_rate": 3.828188784114653e-05, "loss": 0.3462, "step": 7178 }, { "epoch": 1.1690347270284573, "grad_norm": 0.08351797610521317, "learning_rate": 3.827787440235803e-05, "loss": 0.3212, "step": 7179 }, { "epoch": 1.1691975735862883, "grad_norm": 0.18769749999046326, "learning_rate": 3.827386048685405e-05, "loss": 0.3322, "step": 7180 }, { "epoch": 1.1693604201441192, "grad_norm": 0.09206870943307877, "learning_rate": 3.8269846094778714e-05, "loss": 0.3163, "step": 7181 }, { "epoch": 1.1695232667019502, "grad_norm": 0.2602170705795288, "learning_rate": 3.826583122627615e-05, "loss": 0.3262, "step": 7182 }, { "epoch": 1.169686113259781, "grad_norm": 0.13705895841121674, "learning_rate": 3.826181588149049e-05, "loss": 0.2892, "step": 7183 }, { "epoch": 1.1698489598176118, "grad_norm": 0.19405487179756165, "learning_rate": 3.825780006056591e-05, "loss": 0.3464, "step": 7184 }, { "epoch": 1.1700118063754428, "grad_norm": 0.12153012305498123, "learning_rate": 3.825378376364659e-05, "loss": 0.2887, "step": 7185 }, { "epoch": 1.1701746529332737, "grad_norm": 0.12491048872470856, "learning_rate": 3.824976699087672e-05, "loss": 0.3004, "step": 7186 }, { "epoch": 1.1703374994911044, "grad_norm": 0.09545347094535828, "learning_rate": 3.824574974240052e-05, "loss": 0.301, "step": 7187 }, { "epoch": 1.1705003460489354, "grad_norm": 0.18370598554611206, "learning_rate": 3.824173201836222e-05, "loss": 0.2859, "step": 7188 }, { "epoch": 1.1706631926067663, "grad_norm": 0.1397024691104889, "learning_rate": 3.823771381890606e-05, "loss": 0.3887, "step": 7189 }, { "epoch": 1.1708260391645973, "grad_norm": 0.08699779212474823, "learning_rate": 3.823369514417633e-05, "loss": 0.3184, "step": 7190 }, { "epoch": 1.170988885722428, "grad_norm": 0.09770151972770691, "learning_rate": 3.8229675994317275e-05, "loss": 0.349, "step": 7191 }, { "epoch": 1.171151732280259, "grad_norm": 0.14628252387046814, "learning_rate": 3.8225656369473226e-05, "loss": 0.285, "step": 7192 }, { "epoch": 1.1713145788380899, "grad_norm": 0.13219426572322845, "learning_rate": 3.822163626978848e-05, "loss": 0.3098, "step": 7193 }, { "epoch": 1.1714774253959206, "grad_norm": 0.1292310655117035, "learning_rate": 3.821761569540738e-05, "loss": 0.3027, "step": 7194 }, { "epoch": 1.1716402719537515, "grad_norm": 0.1130085289478302, "learning_rate": 3.821359464647429e-05, "loss": 0.3535, "step": 7195 }, { "epoch": 1.1718031185115825, "grad_norm": 0.13783280551433563, "learning_rate": 3.8209573123133546e-05, "loss": 0.3087, "step": 7196 }, { "epoch": 1.1719659650694134, "grad_norm": 0.12739518284797668, "learning_rate": 3.820555112552956e-05, "loss": 0.2884, "step": 7197 }, { "epoch": 1.1721288116272441, "grad_norm": 0.09860855340957642, "learning_rate": 3.820152865380672e-05, "loss": 0.3084, "step": 7198 }, { "epoch": 1.172291658185075, "grad_norm": 0.08337721973657608, "learning_rate": 3.819750570810945e-05, "loss": 0.2873, "step": 7199 }, { "epoch": 1.172454504742906, "grad_norm": 0.21578483283519745, "learning_rate": 3.819348228858218e-05, "loss": 0.3514, "step": 7200 }, { "epoch": 1.172617351300737, "grad_norm": 0.14398245513439178, "learning_rate": 3.818945839536937e-05, "loss": 0.3061, "step": 7201 }, { "epoch": 1.1727801978585677, "grad_norm": 0.10803484171628952, "learning_rate": 3.8185434028615485e-05, "loss": 0.3115, "step": 7202 }, { "epoch": 1.1729430444163986, "grad_norm": 0.14416126906871796, "learning_rate": 3.818140918846501e-05, "loss": 0.3151, "step": 7203 }, { "epoch": 1.1731058909742296, "grad_norm": 0.14544819295406342, "learning_rate": 3.8177383875062465e-05, "loss": 0.3086, "step": 7204 }, { "epoch": 1.1732687375320605, "grad_norm": 0.13237272202968597, "learning_rate": 3.817335808855235e-05, "loss": 0.3641, "step": 7205 }, { "epoch": 1.1734315840898912, "grad_norm": 0.0707739070057869, "learning_rate": 3.8169331829079215e-05, "loss": 0.292, "step": 7206 }, { "epoch": 1.1735944306477222, "grad_norm": 0.12306280434131622, "learning_rate": 3.816530509678761e-05, "loss": 0.3148, "step": 7207 }, { "epoch": 1.1737572772055531, "grad_norm": 0.1427507996559143, "learning_rate": 3.8161277891822105e-05, "loss": 0.3474, "step": 7208 }, { "epoch": 1.1739201237633838, "grad_norm": 0.08371229469776154, "learning_rate": 3.815725021432729e-05, "loss": 0.3274, "step": 7209 }, { "epoch": 1.1740829703212148, "grad_norm": 0.15090389549732208, "learning_rate": 3.815322206444778e-05, "loss": 0.3255, "step": 7210 }, { "epoch": 1.1742458168790457, "grad_norm": 0.11377959698438644, "learning_rate": 3.8149193442328186e-05, "loss": 0.2839, "step": 7211 }, { "epoch": 1.1744086634368767, "grad_norm": 0.10153503715991974, "learning_rate": 3.814516434811315e-05, "loss": 0.3179, "step": 7212 }, { "epoch": 1.1745715099947076, "grad_norm": 0.11395270377397537, "learning_rate": 3.814113478194733e-05, "loss": 0.3456, "step": 7213 }, { "epoch": 1.1747343565525383, "grad_norm": 0.07907138764858246, "learning_rate": 3.813710474397539e-05, "loss": 0.3337, "step": 7214 }, { "epoch": 1.1748972031103693, "grad_norm": 0.13706764578819275, "learning_rate": 3.813307423434205e-05, "loss": 0.3265, "step": 7215 }, { "epoch": 1.1750600496682002, "grad_norm": 0.1943936049938202, "learning_rate": 3.812904325319199e-05, "loss": 0.3431, "step": 7216 }, { "epoch": 1.175222896226031, "grad_norm": 0.1258726418018341, "learning_rate": 3.812501180066994e-05, "loss": 0.289, "step": 7217 }, { "epoch": 1.1753857427838619, "grad_norm": 0.13446101546287537, "learning_rate": 3.812097987692064e-05, "loss": 0.3093, "step": 7218 }, { "epoch": 1.1755485893416928, "grad_norm": 0.1455785185098648, "learning_rate": 3.8116947482088845e-05, "loss": 0.3096, "step": 7219 }, { "epoch": 1.1757114358995238, "grad_norm": 0.10600084811449051, "learning_rate": 3.811291461631934e-05, "loss": 0.3293, "step": 7220 }, { "epoch": 1.1758742824573545, "grad_norm": 0.09968981146812439, "learning_rate": 3.810888127975692e-05, "loss": 0.3071, "step": 7221 }, { "epoch": 1.1760371290151854, "grad_norm": 0.1510791927576065, "learning_rate": 3.8104847472546376e-05, "loss": 0.2928, "step": 7222 }, { "epoch": 1.1761999755730164, "grad_norm": 0.08855212479829788, "learning_rate": 3.8100813194832555e-05, "loss": 0.2966, "step": 7223 }, { "epoch": 1.176362822130847, "grad_norm": 0.14153827726840973, "learning_rate": 3.809677844676028e-05, "loss": 0.3307, "step": 7224 }, { "epoch": 1.176525668688678, "grad_norm": 0.17365644872188568, "learning_rate": 3.809274322847443e-05, "loss": 0.3472, "step": 7225 }, { "epoch": 1.176688515246509, "grad_norm": 0.13674890995025635, "learning_rate": 3.8088707540119875e-05, "loss": 0.3343, "step": 7226 }, { "epoch": 1.17685136180434, "grad_norm": 0.11503405123949051, "learning_rate": 3.808467138184148e-05, "loss": 0.2831, "step": 7227 }, { "epoch": 1.1770142083621709, "grad_norm": 0.10516607761383057, "learning_rate": 3.8080634753784204e-05, "loss": 0.2945, "step": 7228 }, { "epoch": 1.1771770549200016, "grad_norm": 0.10994426906108856, "learning_rate": 3.807659765609294e-05, "loss": 0.2921, "step": 7229 }, { "epoch": 1.1773399014778325, "grad_norm": 0.12089565396308899, "learning_rate": 3.8072560088912645e-05, "loss": 0.2895, "step": 7230 }, { "epoch": 1.1775027480356635, "grad_norm": 0.09209978580474854, "learning_rate": 3.806852205238828e-05, "loss": 0.3288, "step": 7231 }, { "epoch": 1.1776655945934942, "grad_norm": 0.10689747333526611, "learning_rate": 3.8064483546664806e-05, "loss": 0.3048, "step": 7232 }, { "epoch": 1.1778284411513251, "grad_norm": 0.10980784893035889, "learning_rate": 3.806044457188724e-05, "loss": 0.3229, "step": 7233 }, { "epoch": 1.177991287709156, "grad_norm": 0.13746538758277893, "learning_rate": 3.8056405128200575e-05, "loss": 0.3125, "step": 7234 }, { "epoch": 1.178154134266987, "grad_norm": 0.07101239264011383, "learning_rate": 3.805236521574986e-05, "loss": 0.3276, "step": 7235 }, { "epoch": 1.1783169808248177, "grad_norm": 0.08009088039398193, "learning_rate": 3.804832483468013e-05, "loss": 0.3145, "step": 7236 }, { "epoch": 1.1784798273826487, "grad_norm": 0.06999823451042175, "learning_rate": 3.804428398513643e-05, "loss": 0.3432, "step": 7237 }, { "epoch": 1.1786426739404796, "grad_norm": 0.11930731683969498, "learning_rate": 3.804024266726386e-05, "loss": 0.3259, "step": 7238 }, { "epoch": 1.1788055204983106, "grad_norm": 0.08496122062206268, "learning_rate": 3.803620088120751e-05, "loss": 0.3216, "step": 7239 }, { "epoch": 1.1789683670561413, "grad_norm": 0.17635859549045563, "learning_rate": 3.803215862711249e-05, "loss": 0.3465, "step": 7240 }, { "epoch": 1.1791312136139722, "grad_norm": 0.13881857693195343, "learning_rate": 3.802811590512393e-05, "loss": 0.2962, "step": 7241 }, { "epoch": 1.1792940601718032, "grad_norm": 0.1137939915060997, "learning_rate": 3.802407271538697e-05, "loss": 0.3337, "step": 7242 }, { "epoch": 1.179456906729634, "grad_norm": 0.15988893806934357, "learning_rate": 3.802002905804678e-05, "loss": 0.3609, "step": 7243 }, { "epoch": 1.1796197532874648, "grad_norm": 0.1056598648428917, "learning_rate": 3.801598493324855e-05, "loss": 0.3581, "step": 7244 }, { "epoch": 1.1797825998452958, "grad_norm": 0.09494031220674515, "learning_rate": 3.8011940341137455e-05, "loss": 0.2968, "step": 7245 }, { "epoch": 1.1799454464031267, "grad_norm": 0.15041470527648926, "learning_rate": 3.8007895281858716e-05, "loss": 0.3101, "step": 7246 }, { "epoch": 1.1801082929609574, "grad_norm": 0.14378073811531067, "learning_rate": 3.800384975555756e-05, "loss": 0.2808, "step": 7247 }, { "epoch": 1.1802711395187884, "grad_norm": 0.11947829276323318, "learning_rate": 3.799980376237925e-05, "loss": 0.2927, "step": 7248 }, { "epoch": 1.1804339860766193, "grad_norm": 0.15275515615940094, "learning_rate": 3.799575730246902e-05, "loss": 0.3158, "step": 7249 }, { "epoch": 1.1805968326344503, "grad_norm": 0.12722958624362946, "learning_rate": 3.799171037597218e-05, "loss": 0.2942, "step": 7250 }, { "epoch": 1.1807596791922812, "grad_norm": 0.06864909827709198, "learning_rate": 3.798766298303401e-05, "loss": 0.2956, "step": 7251 }, { "epoch": 1.180922525750112, "grad_norm": 0.10154921561479568, "learning_rate": 3.798361512379982e-05, "loss": 0.3146, "step": 7252 }, { "epoch": 1.1810853723079429, "grad_norm": 0.07895975559949875, "learning_rate": 3.7979566798414955e-05, "loss": 0.3045, "step": 7253 }, { "epoch": 1.1812482188657738, "grad_norm": 0.13750438392162323, "learning_rate": 3.7975518007024754e-05, "loss": 0.3249, "step": 7254 }, { "epoch": 1.1814110654236045, "grad_norm": 0.0959746316075325, "learning_rate": 3.797146874977458e-05, "loss": 0.333, "step": 7255 }, { "epoch": 1.1815739119814355, "grad_norm": 0.09884960949420929, "learning_rate": 3.796741902680981e-05, "loss": 0.3274, "step": 7256 }, { "epoch": 1.1817367585392664, "grad_norm": 0.12018154561519623, "learning_rate": 3.796336883827585e-05, "loss": 0.3182, "step": 7257 }, { "epoch": 1.1818996050970973, "grad_norm": 0.12864436209201813, "learning_rate": 3.7959318184318107e-05, "loss": 0.3087, "step": 7258 }, { "epoch": 1.182062451654928, "grad_norm": 0.17330491542816162, "learning_rate": 3.795526706508201e-05, "loss": 0.3157, "step": 7259 }, { "epoch": 1.182225298212759, "grad_norm": 0.08425676822662354, "learning_rate": 3.7951215480713015e-05, "loss": 0.3268, "step": 7260 }, { "epoch": 1.18238814477059, "grad_norm": 0.13033147156238556, "learning_rate": 3.7947163431356584e-05, "loss": 0.35, "step": 7261 }, { "epoch": 1.1825509913284207, "grad_norm": 0.11617162823677063, "learning_rate": 3.794311091715819e-05, "loss": 0.3394, "step": 7262 }, { "epoch": 1.1827138378862516, "grad_norm": 0.17152179777622223, "learning_rate": 3.7939057938263335e-05, "loss": 0.3646, "step": 7263 }, { "epoch": 1.1828766844440826, "grad_norm": 0.10597594827413559, "learning_rate": 3.793500449481754e-05, "loss": 0.2906, "step": 7264 }, { "epoch": 1.1830395310019135, "grad_norm": 0.11565708369016647, "learning_rate": 3.7930950586966324e-05, "loss": 0.2918, "step": 7265 }, { "epoch": 1.1832023775597444, "grad_norm": 0.09092506766319275, "learning_rate": 3.792689621485525e-05, "loss": 0.3101, "step": 7266 }, { "epoch": 1.1833652241175752, "grad_norm": 0.1526358425617218, "learning_rate": 3.792284137862986e-05, "loss": 0.3026, "step": 7267 }, { "epoch": 1.183528070675406, "grad_norm": 0.15078215301036835, "learning_rate": 3.7918786078435754e-05, "loss": 0.2981, "step": 7268 }, { "epoch": 1.183690917233237, "grad_norm": 0.11791184544563293, "learning_rate": 3.7914730314418515e-05, "loss": 0.2985, "step": 7269 }, { "epoch": 1.1838537637910678, "grad_norm": 0.10104012489318848, "learning_rate": 3.7910674086723765e-05, "loss": 0.321, "step": 7270 }, { "epoch": 1.1840166103488987, "grad_norm": 0.1684679388999939, "learning_rate": 3.790661739549713e-05, "loss": 0.3661, "step": 7271 }, { "epoch": 1.1841794569067297, "grad_norm": 0.1632300466299057, "learning_rate": 3.790256024088428e-05, "loss": 0.3177, "step": 7272 }, { "epoch": 1.1843423034645606, "grad_norm": 0.1576767861843109, "learning_rate": 3.789850262303084e-05, "loss": 0.323, "step": 7273 }, { "epoch": 1.1845051500223913, "grad_norm": 0.1458248347043991, "learning_rate": 3.789444454208252e-05, "loss": 0.3288, "step": 7274 }, { "epoch": 1.1846679965802223, "grad_norm": 0.10005246847867966, "learning_rate": 3.7890385998185006e-05, "loss": 0.3392, "step": 7275 }, { "epoch": 1.1848308431380532, "grad_norm": 0.12218011170625687, "learning_rate": 3.788632699148401e-05, "loss": 0.2889, "step": 7276 }, { "epoch": 1.1849936896958841, "grad_norm": 0.13039016723632812, "learning_rate": 3.788226752212527e-05, "loss": 0.3381, "step": 7277 }, { "epoch": 1.1851565362537149, "grad_norm": 0.17715895175933838, "learning_rate": 3.7878207590254535e-05, "loss": 0.331, "step": 7278 }, { "epoch": 1.1853193828115458, "grad_norm": 0.11647424846887589, "learning_rate": 3.7874147196017565e-05, "loss": 0.2751, "step": 7279 }, { "epoch": 1.1854822293693767, "grad_norm": 0.11671610176563263, "learning_rate": 3.787008633956013e-05, "loss": 0.2905, "step": 7280 }, { "epoch": 1.1856450759272077, "grad_norm": 0.10744508355855942, "learning_rate": 3.786602502102804e-05, "loss": 0.3258, "step": 7281 }, { "epoch": 1.1858079224850384, "grad_norm": 0.10860410332679749, "learning_rate": 3.78619632405671e-05, "loss": 0.3319, "step": 7282 }, { "epoch": 1.1859707690428694, "grad_norm": 0.12813560664653778, "learning_rate": 3.7857900998323144e-05, "loss": 0.3247, "step": 7283 }, { "epoch": 1.1861336156007003, "grad_norm": 0.15943650901317596, "learning_rate": 3.7853838294442026e-05, "loss": 0.3159, "step": 7284 }, { "epoch": 1.186296462158531, "grad_norm": 0.2585848271846771, "learning_rate": 3.784977512906959e-05, "loss": 0.3198, "step": 7285 }, { "epoch": 1.186459308716362, "grad_norm": 0.12817074358463287, "learning_rate": 3.784571150235175e-05, "loss": 0.2995, "step": 7286 }, { "epoch": 1.186622155274193, "grad_norm": 0.10189526528120041, "learning_rate": 3.784164741443436e-05, "loss": 0.3342, "step": 7287 }, { "epoch": 1.1867850018320238, "grad_norm": 0.11738517135381699, "learning_rate": 3.7837582865463363e-05, "loss": 0.2857, "step": 7288 }, { "epoch": 1.1869478483898546, "grad_norm": 0.13368573784828186, "learning_rate": 3.783351785558467e-05, "loss": 0.3122, "step": 7289 }, { "epoch": 1.1871106949476855, "grad_norm": 0.07965478301048279, "learning_rate": 3.782945238494424e-05, "loss": 0.265, "step": 7290 }, { "epoch": 1.1872735415055164, "grad_norm": 0.15089553594589233, "learning_rate": 3.782538645368803e-05, "loss": 0.3052, "step": 7291 }, { "epoch": 1.1874363880633474, "grad_norm": 0.0965753123164177, "learning_rate": 3.782132006196203e-05, "loss": 0.292, "step": 7292 }, { "epoch": 1.1875992346211781, "grad_norm": 0.09594987332820892, "learning_rate": 3.781725320991222e-05, "loss": 0.3048, "step": 7293 }, { "epoch": 1.187762081179009, "grad_norm": 0.0747998058795929, "learning_rate": 3.7813185897684614e-05, "loss": 0.3486, "step": 7294 }, { "epoch": 1.18792492773684, "grad_norm": 0.19150814414024353, "learning_rate": 3.780911812542525e-05, "loss": 0.3044, "step": 7295 }, { "epoch": 1.188087774294671, "grad_norm": 0.0949118584394455, "learning_rate": 3.780504989328016e-05, "loss": 0.3169, "step": 7296 }, { "epoch": 1.1882506208525017, "grad_norm": 0.2158184051513672, "learning_rate": 3.780098120139541e-05, "loss": 0.3373, "step": 7297 }, { "epoch": 1.1884134674103326, "grad_norm": 0.1314736157655716, "learning_rate": 3.77969120499171e-05, "loss": 0.3341, "step": 7298 }, { "epoch": 1.1885763139681635, "grad_norm": 0.1409393846988678, "learning_rate": 3.77928424389913e-05, "loss": 0.3176, "step": 7299 }, { "epoch": 1.1887391605259943, "grad_norm": 0.1762823909521103, "learning_rate": 3.7788772368764125e-05, "loss": 0.2981, "step": 7300 }, { "epoch": 1.1889020070838252, "grad_norm": 0.14738135039806366, "learning_rate": 3.7784701839381706e-05, "loss": 0.2845, "step": 7301 }, { "epoch": 1.1890648536416561, "grad_norm": 0.12278648465871811, "learning_rate": 3.778063085099019e-05, "loss": 0.3326, "step": 7302 }, { "epoch": 1.189227700199487, "grad_norm": 0.10425470024347305, "learning_rate": 3.7776559403735735e-05, "loss": 0.3257, "step": 7303 }, { "epoch": 1.189390546757318, "grad_norm": 0.27926820516586304, "learning_rate": 3.777248749776451e-05, "loss": 0.3827, "step": 7304 }, { "epoch": 1.1895533933151488, "grad_norm": 0.13247446715831757, "learning_rate": 3.776841513322272e-05, "loss": 0.3095, "step": 7305 }, { "epoch": 1.1897162398729797, "grad_norm": 0.11147201806306839, "learning_rate": 3.776434231025658e-05, "loss": 0.3091, "step": 7306 }, { "epoch": 1.1898790864308106, "grad_norm": 0.12396162748336792, "learning_rate": 3.77602690290123e-05, "loss": 0.3415, "step": 7307 }, { "epoch": 1.1900419329886414, "grad_norm": 0.10067914426326752, "learning_rate": 3.7756195289636136e-05, "loss": 0.2607, "step": 7308 }, { "epoch": 1.1902047795464723, "grad_norm": 0.08733479678630829, "learning_rate": 3.775212109227434e-05, "loss": 0.3191, "step": 7309 }, { "epoch": 1.1903676261043032, "grad_norm": 0.14254678785800934, "learning_rate": 3.774804643707319e-05, "loss": 0.3138, "step": 7310 }, { "epoch": 1.1905304726621342, "grad_norm": 0.12376745045185089, "learning_rate": 3.774397132417898e-05, "loss": 0.2868, "step": 7311 }, { "epoch": 1.190693319219965, "grad_norm": 0.10940168797969818, "learning_rate": 3.773989575373802e-05, "loss": 0.3013, "step": 7312 }, { "epoch": 1.1908561657777958, "grad_norm": 0.10133258998394012, "learning_rate": 3.773581972589663e-05, "loss": 0.2931, "step": 7313 }, { "epoch": 1.1910190123356268, "grad_norm": 0.21311916410923004, "learning_rate": 3.773174324080115e-05, "loss": 0.3268, "step": 7314 }, { "epoch": 1.1911818588934577, "grad_norm": 0.14024096727371216, "learning_rate": 3.772766629859795e-05, "loss": 0.3644, "step": 7315 }, { "epoch": 1.1913447054512885, "grad_norm": 0.13687722384929657, "learning_rate": 3.7723588899433387e-05, "loss": 0.3176, "step": 7316 }, { "epoch": 1.1915075520091194, "grad_norm": 0.1742609143257141, "learning_rate": 3.771951104345386e-05, "loss": 0.3567, "step": 7317 }, { "epoch": 1.1916703985669503, "grad_norm": 0.11980864405632019, "learning_rate": 3.7715432730805786e-05, "loss": 0.3, "step": 7318 }, { "epoch": 1.1918332451247813, "grad_norm": 0.1385888159275055, "learning_rate": 3.771135396163559e-05, "loss": 0.338, "step": 7319 }, { "epoch": 1.191996091682612, "grad_norm": 0.12514731287956238, "learning_rate": 3.770727473608968e-05, "loss": 0.293, "step": 7320 }, { "epoch": 1.192158938240443, "grad_norm": 0.08698874711990356, "learning_rate": 3.770319505431455e-05, "loss": 0.2749, "step": 7321 }, { "epoch": 1.1923217847982739, "grad_norm": 0.1741032749414444, "learning_rate": 3.769911491645666e-05, "loss": 0.3376, "step": 7322 }, { "epoch": 1.1924846313561046, "grad_norm": 0.12505124509334564, "learning_rate": 3.7695034322662494e-05, "loss": 0.2729, "step": 7323 }, { "epoch": 1.1926474779139355, "grad_norm": 0.13051700592041016, "learning_rate": 3.769095327307856e-05, "loss": 0.3213, "step": 7324 }, { "epoch": 1.1928103244717665, "grad_norm": 0.07129335403442383, "learning_rate": 3.768687176785137e-05, "loss": 0.3058, "step": 7325 }, { "epoch": 1.1929731710295974, "grad_norm": 0.10216677933931351, "learning_rate": 3.7682789807127485e-05, "loss": 0.3573, "step": 7326 }, { "epoch": 1.1931360175874282, "grad_norm": 0.15850317478179932, "learning_rate": 3.767870739105344e-05, "loss": 0.3273, "step": 7327 }, { "epoch": 1.193298864145259, "grad_norm": 0.11491160094738007, "learning_rate": 3.767462451977582e-05, "loss": 0.305, "step": 7328 }, { "epoch": 1.19346171070309, "grad_norm": 0.14037708938121796, "learning_rate": 3.76705411934412e-05, "loss": 0.3197, "step": 7329 }, { "epoch": 1.193624557260921, "grad_norm": 0.17171233892440796, "learning_rate": 3.7666457412196195e-05, "loss": 0.3236, "step": 7330 }, { "epoch": 1.1937874038187517, "grad_norm": 0.09300728142261505, "learning_rate": 3.766237317618741e-05, "loss": 0.2778, "step": 7331 }, { "epoch": 1.1939502503765826, "grad_norm": 0.1739901304244995, "learning_rate": 3.76582884855615e-05, "loss": 0.3224, "step": 7332 }, { "epoch": 1.1941130969344136, "grad_norm": 0.11805561929941177, "learning_rate": 3.765420334046511e-05, "loss": 0.3096, "step": 7333 }, { "epoch": 1.1942759434922445, "grad_norm": 0.10432026535272598, "learning_rate": 3.7650117741044894e-05, "loss": 0.3619, "step": 7334 }, { "epoch": 1.1944387900500752, "grad_norm": 0.10167500376701355, "learning_rate": 3.764603168744756e-05, "loss": 0.3515, "step": 7335 }, { "epoch": 1.1946016366079062, "grad_norm": 0.1502653807401657, "learning_rate": 3.7641945179819795e-05, "loss": 0.32, "step": 7336 }, { "epoch": 1.1947644831657371, "grad_norm": 0.10798375308513641, "learning_rate": 3.763785821830832e-05, "loss": 0.2948, "step": 7337 }, { "epoch": 1.1949273297235679, "grad_norm": 0.13485343754291534, "learning_rate": 3.763377080305988e-05, "loss": 0.314, "step": 7338 }, { "epoch": 1.1950901762813988, "grad_norm": 0.12474925816059113, "learning_rate": 3.7629682934221214e-05, "loss": 0.3333, "step": 7339 }, { "epoch": 1.1952530228392297, "grad_norm": 0.13699248433113098, "learning_rate": 3.7625594611939086e-05, "loss": 0.2911, "step": 7340 }, { "epoch": 1.1954158693970607, "grad_norm": 0.09060008823871613, "learning_rate": 3.762150583636029e-05, "loss": 0.2962, "step": 7341 }, { "epoch": 1.1955787159548916, "grad_norm": 0.1514594554901123, "learning_rate": 3.7617416607631614e-05, "loss": 0.3209, "step": 7342 }, { "epoch": 1.1957415625127223, "grad_norm": 0.10453683137893677, "learning_rate": 3.7613326925899886e-05, "loss": 0.313, "step": 7343 }, { "epoch": 1.1959044090705533, "grad_norm": 0.11854925751686096, "learning_rate": 3.760923679131192e-05, "loss": 0.3405, "step": 7344 }, { "epoch": 1.1960672556283842, "grad_norm": 0.11088418960571289, "learning_rate": 3.760514620401458e-05, "loss": 0.3158, "step": 7345 }, { "epoch": 1.196230102186215, "grad_norm": 0.0890185683965683, "learning_rate": 3.760105516415473e-05, "loss": 0.3149, "step": 7346 }, { "epoch": 1.196392948744046, "grad_norm": 0.12606751918792725, "learning_rate": 3.759696367187924e-05, "loss": 0.3348, "step": 7347 }, { "epoch": 1.1965557953018768, "grad_norm": 0.1179322898387909, "learning_rate": 3.759287172733502e-05, "loss": 0.3114, "step": 7348 }, { "epoch": 1.1967186418597078, "grad_norm": 0.19447271525859833, "learning_rate": 3.758877933066897e-05, "loss": 0.3324, "step": 7349 }, { "epoch": 1.1968814884175385, "grad_norm": 0.2243271768093109, "learning_rate": 3.758468648202803e-05, "loss": 0.3075, "step": 7350 }, { "epoch": 1.1970443349753694, "grad_norm": 0.1529836356639862, "learning_rate": 3.758059318155913e-05, "loss": 0.3337, "step": 7351 }, { "epoch": 1.1972071815332004, "grad_norm": 0.1150282621383667, "learning_rate": 3.7576499429409254e-05, "loss": 0.3566, "step": 7352 }, { "epoch": 1.197370028091031, "grad_norm": 0.14390850067138672, "learning_rate": 3.757240522572536e-05, "loss": 0.3144, "step": 7353 }, { "epoch": 1.197532874648862, "grad_norm": 0.09941338747739792, "learning_rate": 3.756831057065445e-05, "loss": 0.2991, "step": 7354 }, { "epoch": 1.197695721206693, "grad_norm": 0.11987212300300598, "learning_rate": 3.756421546434353e-05, "loss": 0.315, "step": 7355 }, { "epoch": 1.197858567764524, "grad_norm": 0.09720643609762192, "learning_rate": 3.756011990693965e-05, "loss": 0.2662, "step": 7356 }, { "epoch": 1.1980214143223549, "grad_norm": 0.10048341006040573, "learning_rate": 3.755602389858982e-05, "loss": 0.3052, "step": 7357 }, { "epoch": 1.1981842608801856, "grad_norm": 0.20506931841373444, "learning_rate": 3.7551927439441116e-05, "loss": 0.3437, "step": 7358 }, { "epoch": 1.1983471074380165, "grad_norm": 0.10581792891025543, "learning_rate": 3.754783052964062e-05, "loss": 0.3219, "step": 7359 }, { "epoch": 1.1985099539958475, "grad_norm": 0.10780125856399536, "learning_rate": 3.754373316933539e-05, "loss": 0.2988, "step": 7360 }, { "epoch": 1.1986728005536782, "grad_norm": 0.14392265677452087, "learning_rate": 3.7539635358672576e-05, "loss": 0.3125, "step": 7361 }, { "epoch": 1.1988356471115091, "grad_norm": 0.19092236459255219, "learning_rate": 3.753553709779928e-05, "loss": 0.3093, "step": 7362 }, { "epoch": 1.19899849366934, "grad_norm": 0.10783586651086807, "learning_rate": 3.753143838686265e-05, "loss": 0.3349, "step": 7363 }, { "epoch": 1.199161340227171, "grad_norm": 0.15987920761108398, "learning_rate": 3.7527339226009826e-05, "loss": 0.3337, "step": 7364 }, { "epoch": 1.1993241867850017, "grad_norm": 0.12803258001804352, "learning_rate": 3.7523239615387997e-05, "loss": 0.3253, "step": 7365 }, { "epoch": 1.1994870333428327, "grad_norm": 0.15239600837230682, "learning_rate": 3.751913955514435e-05, "loss": 0.3101, "step": 7366 }, { "epoch": 1.1996498799006636, "grad_norm": 0.08186610043048859, "learning_rate": 3.751503904542607e-05, "loss": 0.3137, "step": 7367 }, { "epoch": 1.1998127264584946, "grad_norm": 0.1036846786737442, "learning_rate": 3.75109380863804e-05, "loss": 0.3211, "step": 7368 }, { "epoch": 1.1999755730163253, "grad_norm": 0.09470977634191513, "learning_rate": 3.7506836678154564e-05, "loss": 0.3005, "step": 7369 }, { "epoch": 1.2001384195741562, "grad_norm": 0.10050147771835327, "learning_rate": 3.750273482089584e-05, "loss": 0.2967, "step": 7370 }, { "epoch": 1.2003012661319872, "grad_norm": 0.14780192077159882, "learning_rate": 3.749863251475145e-05, "loss": 0.3355, "step": 7371 }, { "epoch": 1.2004641126898181, "grad_norm": 0.1341594010591507, "learning_rate": 3.749452975986872e-05, "loss": 0.3487, "step": 7372 }, { "epoch": 1.2006269592476488, "grad_norm": 0.11014237999916077, "learning_rate": 3.749042655639494e-05, "loss": 0.3307, "step": 7373 }, { "epoch": 1.2007898058054798, "grad_norm": 0.10191984474658966, "learning_rate": 3.748632290447741e-05, "loss": 0.3036, "step": 7374 }, { "epoch": 1.2009526523633107, "grad_norm": 0.16829273104667664, "learning_rate": 3.748221880426349e-05, "loss": 0.3441, "step": 7375 }, { "epoch": 1.2011154989211414, "grad_norm": 0.1105458214879036, "learning_rate": 3.7478114255900506e-05, "loss": 0.295, "step": 7376 }, { "epoch": 1.2012783454789724, "grad_norm": 0.107310950756073, "learning_rate": 3.747400925953585e-05, "loss": 0.3144, "step": 7377 }, { "epoch": 1.2014411920368033, "grad_norm": 0.10232996940612793, "learning_rate": 3.7469903815316876e-05, "loss": 0.3051, "step": 7378 }, { "epoch": 1.2016040385946343, "grad_norm": 0.11168745905160904, "learning_rate": 3.7465797923391004e-05, "loss": 0.3276, "step": 7379 }, { "epoch": 1.2017668851524652, "grad_norm": 0.1225365549325943, "learning_rate": 3.7461691583905635e-05, "loss": 0.2979, "step": 7380 }, { "epoch": 1.201929731710296, "grad_norm": 0.11440188437700272, "learning_rate": 3.74575847970082e-05, "loss": 0.3307, "step": 7381 }, { "epoch": 1.2020925782681269, "grad_norm": 0.13472674787044525, "learning_rate": 3.745347756284614e-05, "loss": 0.2921, "step": 7382 }, { "epoch": 1.2022554248259578, "grad_norm": 0.13028335571289062, "learning_rate": 3.7449369881566944e-05, "loss": 0.3517, "step": 7383 }, { "epoch": 1.2024182713837885, "grad_norm": 0.14808981120586395, "learning_rate": 3.744526175331805e-05, "loss": 0.3297, "step": 7384 }, { "epoch": 1.2025811179416195, "grad_norm": 0.10407974570989609, "learning_rate": 3.744115317824698e-05, "loss": 0.3117, "step": 7385 }, { "epoch": 1.2027439644994504, "grad_norm": 0.1882166564464569, "learning_rate": 3.743704415650123e-05, "loss": 0.3005, "step": 7386 }, { "epoch": 1.2029068110572814, "grad_norm": 0.14493121206760406, "learning_rate": 3.743293468822834e-05, "loss": 0.3362, "step": 7387 }, { "epoch": 1.203069657615112, "grad_norm": 0.14357781410217285, "learning_rate": 3.742882477357585e-05, "loss": 0.296, "step": 7388 }, { "epoch": 1.203232504172943, "grad_norm": 0.09283388406038284, "learning_rate": 3.74247144126913e-05, "loss": 0.311, "step": 7389 }, { "epoch": 1.203395350730774, "grad_norm": 0.1694801300764084, "learning_rate": 3.742060360572229e-05, "loss": 0.2949, "step": 7390 }, { "epoch": 1.2035581972886047, "grad_norm": 0.100748211145401, "learning_rate": 3.741649235281638e-05, "loss": 0.3152, "step": 7391 }, { "epoch": 1.2037210438464356, "grad_norm": 0.1042524129152298, "learning_rate": 3.741238065412121e-05, "loss": 0.2675, "step": 7392 }, { "epoch": 1.2038838904042666, "grad_norm": 0.145471453666687, "learning_rate": 3.740826850978438e-05, "loss": 0.3321, "step": 7393 }, { "epoch": 1.2040467369620975, "grad_norm": 0.06040129065513611, "learning_rate": 3.740415591995353e-05, "loss": 0.3352, "step": 7394 }, { "epoch": 1.2042095835199285, "grad_norm": 0.12105800211429596, "learning_rate": 3.740004288477632e-05, "loss": 0.3127, "step": 7395 }, { "epoch": 1.2043724300777592, "grad_norm": 0.1656019687652588, "learning_rate": 3.739592940440042e-05, "loss": 0.2957, "step": 7396 }, { "epoch": 1.2045352766355901, "grad_norm": 0.09549395740032196, "learning_rate": 3.739181547897352e-05, "loss": 0.3231, "step": 7397 }, { "epoch": 1.204698123193421, "grad_norm": 0.12249744683504105, "learning_rate": 3.7387701108643316e-05, "loss": 0.3426, "step": 7398 }, { "epoch": 1.2048609697512518, "grad_norm": 0.16514462232589722, "learning_rate": 3.7383586293557524e-05, "loss": 0.3352, "step": 7399 }, { "epoch": 1.2050238163090827, "grad_norm": 0.21326415240764618, "learning_rate": 3.737947103386388e-05, "loss": 0.3433, "step": 7400 }, { "epoch": 1.2051866628669137, "grad_norm": 0.1342896670103073, "learning_rate": 3.737535532971014e-05, "loss": 0.3286, "step": 7401 }, { "epoch": 1.2053495094247446, "grad_norm": 0.08414845168590546, "learning_rate": 3.7371239181244064e-05, "loss": 0.2955, "step": 7402 }, { "epoch": 1.2055123559825753, "grad_norm": 0.17194370925426483, "learning_rate": 3.7367122588613444e-05, "loss": 0.3479, "step": 7403 }, { "epoch": 1.2056752025404063, "grad_norm": 0.12344010919332504, "learning_rate": 3.736300555196606e-05, "loss": 0.316, "step": 7404 }, { "epoch": 1.2058380490982372, "grad_norm": 0.17187629640102386, "learning_rate": 3.7358888071449736e-05, "loss": 0.3237, "step": 7405 }, { "epoch": 1.2060008956560682, "grad_norm": 0.09618689864873886, "learning_rate": 3.7354770147212315e-05, "loss": 0.337, "step": 7406 }, { "epoch": 1.2061637422138989, "grad_norm": 0.11465966701507568, "learning_rate": 3.7350651779401616e-05, "loss": 0.2904, "step": 7407 }, { "epoch": 1.2063265887717298, "grad_norm": 0.11340566724538803, "learning_rate": 3.734653296816552e-05, "loss": 0.3015, "step": 7408 }, { "epoch": 1.2064894353295608, "grad_norm": 0.14276595413684845, "learning_rate": 3.73424137136519e-05, "loss": 0.2993, "step": 7409 }, { "epoch": 1.2066522818873917, "grad_norm": 0.09243939071893692, "learning_rate": 3.733829401600865e-05, "loss": 0.3177, "step": 7410 }, { "epoch": 1.2068151284452224, "grad_norm": 0.10508272051811218, "learning_rate": 3.7334173875383674e-05, "loss": 0.318, "step": 7411 }, { "epoch": 1.2069779750030534, "grad_norm": 0.10477854311466217, "learning_rate": 3.7330053291924906e-05, "loss": 0.3078, "step": 7412 }, { "epoch": 1.2071408215608843, "grad_norm": 0.17105451226234436, "learning_rate": 3.732593226578028e-05, "loss": 0.3098, "step": 7413 }, { "epoch": 1.207303668118715, "grad_norm": 0.058636389672756195, "learning_rate": 3.732181079709775e-05, "loss": 0.2831, "step": 7414 }, { "epoch": 1.207466514676546, "grad_norm": 0.115080825984478, "learning_rate": 3.731768888602531e-05, "loss": 0.3159, "step": 7415 }, { "epoch": 1.207629361234377, "grad_norm": 0.14338363707065582, "learning_rate": 3.731356653271092e-05, "loss": 0.3108, "step": 7416 }, { "epoch": 1.2077922077922079, "grad_norm": 0.0748770609498024, "learning_rate": 3.730944373730261e-05, "loss": 0.3287, "step": 7417 }, { "epoch": 1.2079550543500386, "grad_norm": 0.14463025331497192, "learning_rate": 3.730532049994838e-05, "loss": 0.29, "step": 7418 }, { "epoch": 1.2081179009078695, "grad_norm": 0.19234763085842133, "learning_rate": 3.730119682079629e-05, "loss": 0.3501, "step": 7419 }, { "epoch": 1.2082807474657005, "grad_norm": 0.10474228858947754, "learning_rate": 3.729707269999436e-05, "loss": 0.3042, "step": 7420 }, { "epoch": 1.2084435940235314, "grad_norm": 0.09936463087797165, "learning_rate": 3.729294813769069e-05, "loss": 0.3368, "step": 7421 }, { "epoch": 1.2086064405813621, "grad_norm": 0.07574386894702911, "learning_rate": 3.728882313403335e-05, "loss": 0.3239, "step": 7422 }, { "epoch": 1.208769287139193, "grad_norm": 0.1190653070807457, "learning_rate": 3.7284697689170445e-05, "loss": 0.2833, "step": 7423 }, { "epoch": 1.208932133697024, "grad_norm": 0.11415480077266693, "learning_rate": 3.7280571803250076e-05, "loss": 0.2979, "step": 7424 }, { "epoch": 1.209094980254855, "grad_norm": 0.1732494980096817, "learning_rate": 3.7276445476420394e-05, "loss": 0.33, "step": 7425 }, { "epoch": 1.2092578268126857, "grad_norm": 0.09302964061498642, "learning_rate": 3.7272318708829535e-05, "loss": 0.3164, "step": 7426 }, { "epoch": 1.2094206733705166, "grad_norm": 0.10805390775203705, "learning_rate": 3.726819150062566e-05, "loss": 0.337, "step": 7427 }, { "epoch": 1.2095835199283476, "grad_norm": 0.09235066175460815, "learning_rate": 3.7264063851956967e-05, "loss": 0.3002, "step": 7428 }, { "epoch": 1.2097463664861783, "grad_norm": 0.12774208188056946, "learning_rate": 3.7259935762971623e-05, "loss": 0.3071, "step": 7429 }, { "epoch": 1.2099092130440092, "grad_norm": 0.15633109211921692, "learning_rate": 3.725580723381787e-05, "loss": 0.3279, "step": 7430 }, { "epoch": 1.2100720596018402, "grad_norm": 0.13057459890842438, "learning_rate": 3.7251678264643905e-05, "loss": 0.3488, "step": 7431 }, { "epoch": 1.210234906159671, "grad_norm": 0.08281223475933075, "learning_rate": 3.7247548855597985e-05, "loss": 0.3173, "step": 7432 }, { "epoch": 1.210397752717502, "grad_norm": 0.09015162289142609, "learning_rate": 3.724341900682837e-05, "loss": 0.3077, "step": 7433 }, { "epoch": 1.2105605992753328, "grad_norm": 0.14467008411884308, "learning_rate": 3.7239288718483326e-05, "loss": 0.338, "step": 7434 }, { "epoch": 1.2107234458331637, "grad_norm": 0.11505783349275589, "learning_rate": 3.723515799071115e-05, "loss": 0.2891, "step": 7435 }, { "epoch": 1.2108862923909947, "grad_norm": 0.11368197947740555, "learning_rate": 3.7231026823660145e-05, "loss": 0.3152, "step": 7436 }, { "epoch": 1.2110491389488254, "grad_norm": 0.11810445785522461, "learning_rate": 3.722689521747863e-05, "loss": 0.3091, "step": 7437 }, { "epoch": 1.2112119855066563, "grad_norm": 0.14454074203968048, "learning_rate": 3.7222763172314946e-05, "loss": 0.339, "step": 7438 }, { "epoch": 1.2113748320644873, "grad_norm": 0.12792780995368958, "learning_rate": 3.7218630688317443e-05, "loss": 0.3191, "step": 7439 }, { "epoch": 1.2115376786223182, "grad_norm": 0.07818510383367538, "learning_rate": 3.721449776563449e-05, "loss": 0.3105, "step": 7440 }, { "epoch": 1.211700525180149, "grad_norm": 0.0719941109418869, "learning_rate": 3.721036440441448e-05, "loss": 0.3635, "step": 7441 }, { "epoch": 1.2118633717379799, "grad_norm": 0.10149269551038742, "learning_rate": 3.720623060480579e-05, "loss": 0.2783, "step": 7442 }, { "epoch": 1.2120262182958108, "grad_norm": 0.1424659639596939, "learning_rate": 3.720209636695686e-05, "loss": 0.3069, "step": 7443 }, { "epoch": 1.2121890648536418, "grad_norm": 0.17361606657505035, "learning_rate": 3.7197961691016104e-05, "loss": 0.3511, "step": 7444 }, { "epoch": 1.2123519114114725, "grad_norm": 0.09170565009117126, "learning_rate": 3.7193826577131984e-05, "loss": 0.3291, "step": 7445 }, { "epoch": 1.2125147579693034, "grad_norm": 0.11174549907445908, "learning_rate": 3.718969102545296e-05, "loss": 0.3187, "step": 7446 }, { "epoch": 1.2126776045271344, "grad_norm": 0.15872125327587128, "learning_rate": 3.7185555036127496e-05, "loss": 0.3111, "step": 7447 }, { "epoch": 1.2128404510849653, "grad_norm": 0.08481009304523468, "learning_rate": 3.718141860930411e-05, "loss": 0.3215, "step": 7448 }, { "epoch": 1.213003297642796, "grad_norm": 0.1420053094625473, "learning_rate": 3.717728174513129e-05, "loss": 0.2944, "step": 7449 }, { "epoch": 1.213166144200627, "grad_norm": 0.12496621906757355, "learning_rate": 3.717314444375757e-05, "loss": 0.3372, "step": 7450 }, { "epoch": 1.213328990758458, "grad_norm": 0.18997468054294586, "learning_rate": 3.71690067053315e-05, "loss": 0.3463, "step": 7451 }, { "epoch": 1.2134918373162886, "grad_norm": 0.11638102680444717, "learning_rate": 3.716486853000162e-05, "loss": 0.3622, "step": 7452 }, { "epoch": 1.2136546838741196, "grad_norm": 0.11922318488359451, "learning_rate": 3.716072991791652e-05, "loss": 0.2823, "step": 7453 }, { "epoch": 1.2138175304319505, "grad_norm": 0.1505340039730072, "learning_rate": 3.715659086922478e-05, "loss": 0.3107, "step": 7454 }, { "epoch": 1.2139803769897815, "grad_norm": 0.10444534569978714, "learning_rate": 3.715245138407501e-05, "loss": 0.3129, "step": 7455 }, { "epoch": 1.2141432235476122, "grad_norm": 0.11530444771051407, "learning_rate": 3.7148311462615814e-05, "loss": 0.2884, "step": 7456 }, { "epoch": 1.2143060701054431, "grad_norm": 0.12211234122514725, "learning_rate": 3.714417110499585e-05, "loss": 0.2962, "step": 7457 }, { "epoch": 1.214468916663274, "grad_norm": 0.08735194057226181, "learning_rate": 3.714003031136375e-05, "loss": 0.3322, "step": 7458 }, { "epoch": 1.214631763221105, "grad_norm": 0.09335336089134216, "learning_rate": 3.71358890818682e-05, "loss": 0.2833, "step": 7459 }, { "epoch": 1.2147946097789357, "grad_norm": 0.09795810282230377, "learning_rate": 3.713174741665786e-05, "loss": 0.3174, "step": 7460 }, { "epoch": 1.2149574563367667, "grad_norm": 0.1374080330133438, "learning_rate": 3.7127605315881456e-05, "loss": 0.3514, "step": 7461 }, { "epoch": 1.2151203028945976, "grad_norm": 0.11647816002368927, "learning_rate": 3.7123462779687676e-05, "loss": 0.3011, "step": 7462 }, { "epoch": 1.2152831494524285, "grad_norm": 0.1768561601638794, "learning_rate": 3.711931980822526e-05, "loss": 0.3799, "step": 7463 }, { "epoch": 1.2154459960102593, "grad_norm": 0.0892597883939743, "learning_rate": 3.7115176401642947e-05, "loss": 0.2872, "step": 7464 }, { "epoch": 1.2156088425680902, "grad_norm": 0.14092804491519928, "learning_rate": 3.7111032560089517e-05, "loss": 0.3353, "step": 7465 }, { "epoch": 1.2157716891259212, "grad_norm": 0.10854443907737732, "learning_rate": 3.7106888283713715e-05, "loss": 0.3044, "step": 7466 }, { "epoch": 1.2159345356837519, "grad_norm": 0.11441521346569061, "learning_rate": 3.710274357266436e-05, "loss": 0.3447, "step": 7467 }, { "epoch": 1.2160973822415828, "grad_norm": 0.11504770815372467, "learning_rate": 3.7098598427090253e-05, "loss": 0.296, "step": 7468 }, { "epoch": 1.2162602287994138, "grad_norm": 0.12418639659881592, "learning_rate": 3.7094452847140206e-05, "loss": 0.3431, "step": 7469 }, { "epoch": 1.2164230753572447, "grad_norm": 0.10924121737480164, "learning_rate": 3.709030683296307e-05, "loss": 0.3414, "step": 7470 }, { "epoch": 1.2165859219150756, "grad_norm": 0.13793212175369263, "learning_rate": 3.7086160384707695e-05, "loss": 0.3443, "step": 7471 }, { "epoch": 1.2167487684729064, "grad_norm": 0.14387457072734833, "learning_rate": 3.708201350252295e-05, "loss": 0.341, "step": 7472 }, { "epoch": 1.2169116150307373, "grad_norm": 0.1295987367630005, "learning_rate": 3.707786618655772e-05, "loss": 0.3333, "step": 7473 }, { "epoch": 1.2170744615885682, "grad_norm": 0.12668119370937347, "learning_rate": 3.707371843696091e-05, "loss": 0.3206, "step": 7474 }, { "epoch": 1.217237308146399, "grad_norm": 0.12865552306175232, "learning_rate": 3.7069570253881436e-05, "loss": 0.3302, "step": 7475 }, { "epoch": 1.21740015470423, "grad_norm": 0.15490326285362244, "learning_rate": 3.706542163746823e-05, "loss": 0.3052, "step": 7476 }, { "epoch": 1.2175630012620609, "grad_norm": 0.09679581969976425, "learning_rate": 3.706127258787024e-05, "loss": 0.3394, "step": 7477 }, { "epoch": 1.2177258478198918, "grad_norm": 0.13392318785190582, "learning_rate": 3.705712310523642e-05, "loss": 0.2907, "step": 7478 }, { "epoch": 1.2178886943777225, "grad_norm": 0.6588558554649353, "learning_rate": 3.705297318971576e-05, "loss": 0.2831, "step": 7479 }, { "epoch": 1.2180515409355535, "grad_norm": 0.11924969404935837, "learning_rate": 3.7048822841457245e-05, "loss": 0.3463, "step": 7480 }, { "epoch": 1.2182143874933844, "grad_norm": 0.13775813579559326, "learning_rate": 3.70446720606099e-05, "loss": 0.2988, "step": 7481 }, { "epoch": 1.2183772340512151, "grad_norm": 0.10592171549797058, "learning_rate": 3.7040520847322734e-05, "loss": 0.2919, "step": 7482 }, { "epoch": 1.218540080609046, "grad_norm": 0.13470923900604248, "learning_rate": 3.70363692017448e-05, "loss": 0.2769, "step": 7483 }, { "epoch": 1.218702927166877, "grad_norm": 0.17837676405906677, "learning_rate": 3.7032217124025153e-05, "loss": 0.3143, "step": 7484 }, { "epoch": 1.218865773724708, "grad_norm": 0.15110209584236145, "learning_rate": 3.7028064614312854e-05, "loss": 0.3552, "step": 7485 }, { "epoch": 1.2190286202825389, "grad_norm": 0.11640719324350357, "learning_rate": 3.7023911672757e-05, "loss": 0.3143, "step": 7486 }, { "epoch": 1.2191914668403696, "grad_norm": 0.11326811462640762, "learning_rate": 3.7019758299506696e-05, "loss": 0.3554, "step": 7487 }, { "epoch": 1.2193543133982006, "grad_norm": 0.11858747899532318, "learning_rate": 3.701560449471106e-05, "loss": 0.3468, "step": 7488 }, { "epoch": 1.2195171599560315, "grad_norm": 0.19343796372413635, "learning_rate": 3.701145025851921e-05, "loss": 0.3058, "step": 7489 }, { "epoch": 1.2196800065138622, "grad_norm": 0.15259209275245667, "learning_rate": 3.700729559108031e-05, "loss": 0.3284, "step": 7490 }, { "epoch": 1.2198428530716932, "grad_norm": 0.1488756537437439, "learning_rate": 3.700314049254354e-05, "loss": 0.2806, "step": 7491 }, { "epoch": 1.220005699629524, "grad_norm": 0.14366169273853302, "learning_rate": 3.699898496305805e-05, "loss": 0.3222, "step": 7492 }, { "epoch": 1.220168546187355, "grad_norm": 0.11647211760282516, "learning_rate": 3.699482900277305e-05, "loss": 0.3072, "step": 7493 }, { "epoch": 1.2203313927451858, "grad_norm": 0.1068558320403099, "learning_rate": 3.6990672611837764e-05, "loss": 0.2973, "step": 7494 }, { "epoch": 1.2204942393030167, "grad_norm": 0.10139334201812744, "learning_rate": 3.6986515790401385e-05, "loss": 0.2973, "step": 7495 }, { "epoch": 1.2206570858608476, "grad_norm": 0.1697942167520523, "learning_rate": 3.69823585386132e-05, "loss": 0.3069, "step": 7496 }, { "epoch": 1.2208199324186786, "grad_norm": 0.1775846630334854, "learning_rate": 3.697820085662242e-05, "loss": 0.3191, "step": 7497 }, { "epoch": 1.2209827789765093, "grad_norm": 0.12752781808376312, "learning_rate": 3.697404274457836e-05, "loss": 0.3305, "step": 7498 }, { "epoch": 1.2211456255343403, "grad_norm": 0.09887612611055374, "learning_rate": 3.696988420263028e-05, "loss": 0.3453, "step": 7499 }, { "epoch": 1.2213084720921712, "grad_norm": 0.6535617113113403, "learning_rate": 3.69657252309275e-05, "loss": 0.3811, "step": 7500 }, { "epoch": 1.2214713186500021, "grad_norm": 0.06452739238739014, "learning_rate": 3.696156582961934e-05, "loss": 0.2943, "step": 7501 }, { "epoch": 1.2216341652078329, "grad_norm": 0.1224408894777298, "learning_rate": 3.695740599885512e-05, "loss": 0.2907, "step": 7502 }, { "epoch": 1.2217970117656638, "grad_norm": 0.11547859013080597, "learning_rate": 3.69532457387842e-05, "loss": 0.3322, "step": 7503 }, { "epoch": 1.2219598583234947, "grad_norm": 0.11988770961761475, "learning_rate": 3.694908504955594e-05, "loss": 0.3139, "step": 7504 }, { "epoch": 1.2221227048813255, "grad_norm": 0.16990812122821808, "learning_rate": 3.6944923931319734e-05, "loss": 0.3426, "step": 7505 }, { "epoch": 1.2222855514391564, "grad_norm": 0.12422624230384827, "learning_rate": 3.6940762384224966e-05, "loss": 0.2926, "step": 7506 }, { "epoch": 1.2224483979969873, "grad_norm": 0.11811283975839615, "learning_rate": 3.693660040842106e-05, "loss": 0.324, "step": 7507 }, { "epoch": 1.2226112445548183, "grad_norm": 0.11833613365888596, "learning_rate": 3.6932438004057426e-05, "loss": 0.3219, "step": 7508 }, { "epoch": 1.2227740911126492, "grad_norm": 0.10135837644338608, "learning_rate": 3.692827517128353e-05, "loss": 0.3303, "step": 7509 }, { "epoch": 1.22293693767048, "grad_norm": 0.12132732570171356, "learning_rate": 3.69241119102488e-05, "loss": 0.3144, "step": 7510 }, { "epoch": 1.223099784228311, "grad_norm": 0.12264510244131088, "learning_rate": 3.691994822110274e-05, "loss": 0.2934, "step": 7511 }, { "epoch": 1.2232626307861418, "grad_norm": 0.11833714693784714, "learning_rate": 3.691578410399482e-05, "loss": 0.2982, "step": 7512 }, { "epoch": 1.2234254773439726, "grad_norm": 0.17505134642124176, "learning_rate": 3.6911619559074555e-05, "loss": 0.2976, "step": 7513 }, { "epoch": 1.2235883239018035, "grad_norm": 0.13987405598163605, "learning_rate": 3.690745458649146e-05, "loss": 0.3615, "step": 7514 }, { "epoch": 1.2237511704596344, "grad_norm": 0.12995867431163788, "learning_rate": 3.690328918639506e-05, "loss": 0.2883, "step": 7515 }, { "epoch": 1.2239140170174654, "grad_norm": 0.09015968441963196, "learning_rate": 3.689912335893493e-05, "loss": 0.3395, "step": 7516 }, { "epoch": 1.224076863575296, "grad_norm": 0.12817764282226562, "learning_rate": 3.6894957104260615e-05, "loss": 0.3026, "step": 7517 }, { "epoch": 1.224239710133127, "grad_norm": 0.10813894122838974, "learning_rate": 3.689079042252169e-05, "loss": 0.2754, "step": 7518 }, { "epoch": 1.224402556690958, "grad_norm": 0.17902831733226776, "learning_rate": 3.688662331386778e-05, "loss": 0.3251, "step": 7519 }, { "epoch": 1.2245654032487887, "grad_norm": 0.12551254034042358, "learning_rate": 3.6882455778448474e-05, "loss": 0.318, "step": 7520 }, { "epoch": 1.2247282498066197, "grad_norm": 0.13805978000164032, "learning_rate": 3.6878287816413404e-05, "loss": 0.3233, "step": 7521 }, { "epoch": 1.2248910963644506, "grad_norm": 0.10286975651979446, "learning_rate": 3.687411942791221e-05, "loss": 0.3309, "step": 7522 }, { "epoch": 1.2250539429222815, "grad_norm": 0.1251329779624939, "learning_rate": 3.686995061309456e-05, "loss": 0.3183, "step": 7523 }, { "epoch": 1.2252167894801125, "grad_norm": 0.11247903853654861, "learning_rate": 3.686578137211011e-05, "loss": 0.3305, "step": 7524 }, { "epoch": 1.2253796360379432, "grad_norm": 0.17579983174800873, "learning_rate": 3.686161170510857e-05, "loss": 0.3305, "step": 7525 }, { "epoch": 1.2255424825957741, "grad_norm": 0.10919274389743805, "learning_rate": 3.685744161223962e-05, "loss": 0.3015, "step": 7526 }, { "epoch": 1.225705329153605, "grad_norm": 0.08177334815263748, "learning_rate": 3.6853271093652986e-05, "loss": 0.322, "step": 7527 }, { "epoch": 1.2258681757114358, "grad_norm": 0.09683560580015182, "learning_rate": 3.684910014949842e-05, "loss": 0.2741, "step": 7528 }, { "epoch": 1.2260310222692667, "grad_norm": 0.08976127952337265, "learning_rate": 3.684492877992565e-05, "loss": 0.3, "step": 7529 }, { "epoch": 1.2261938688270977, "grad_norm": 0.15405318140983582, "learning_rate": 3.684075698508445e-05, "loss": 0.3546, "step": 7530 }, { "epoch": 1.2263567153849286, "grad_norm": 0.11045064777135849, "learning_rate": 3.68365847651246e-05, "loss": 0.3423, "step": 7531 }, { "epoch": 1.2265195619427594, "grad_norm": 0.10085548460483551, "learning_rate": 3.683241212019589e-05, "loss": 0.2978, "step": 7532 }, { "epoch": 1.2266824085005903, "grad_norm": 0.1543308049440384, "learning_rate": 3.682823905044814e-05, "loss": 0.3316, "step": 7533 }, { "epoch": 1.2268452550584212, "grad_norm": 0.10248053818941116, "learning_rate": 3.682406555603117e-05, "loss": 0.2891, "step": 7534 }, { "epoch": 1.2270081016162522, "grad_norm": 0.11616720259189606, "learning_rate": 3.681989163709481e-05, "loss": 0.2932, "step": 7535 }, { "epoch": 1.227170948174083, "grad_norm": 0.12154297530651093, "learning_rate": 3.681571729378894e-05, "loss": 0.2988, "step": 7536 }, { "epoch": 1.2273337947319138, "grad_norm": 0.15434224903583527, "learning_rate": 3.6811542526263406e-05, "loss": 0.3144, "step": 7537 }, { "epoch": 1.2274966412897448, "grad_norm": 0.12413313239812851, "learning_rate": 3.680736733466812e-05, "loss": 0.3292, "step": 7538 }, { "epoch": 1.2276594878475757, "grad_norm": 0.10006589442491531, "learning_rate": 3.6803191719152964e-05, "loss": 0.3141, "step": 7539 }, { "epoch": 1.2278223344054064, "grad_norm": 0.155913844704628, "learning_rate": 3.6799015679867857e-05, "loss": 0.3212, "step": 7540 }, { "epoch": 1.2279851809632374, "grad_norm": 0.08603902161121368, "learning_rate": 3.679483921696275e-05, "loss": 0.2807, "step": 7541 }, { "epoch": 1.2281480275210683, "grad_norm": 0.11990290880203247, "learning_rate": 3.679066233058756e-05, "loss": 0.3566, "step": 7542 }, { "epoch": 1.228310874078899, "grad_norm": 0.10208559781312943, "learning_rate": 3.678648502089228e-05, "loss": 0.3376, "step": 7543 }, { "epoch": 1.22847372063673, "grad_norm": 0.13060061633586884, "learning_rate": 3.6782307288026867e-05, "loss": 0.3572, "step": 7544 }, { "epoch": 1.228636567194561, "grad_norm": 0.12838412821292877, "learning_rate": 3.677812913214133e-05, "loss": 0.3419, "step": 7545 }, { "epoch": 1.2287994137523919, "grad_norm": 0.13709799945354462, "learning_rate": 3.677395055338566e-05, "loss": 0.3245, "step": 7546 }, { "epoch": 1.2289622603102226, "grad_norm": 0.12665028870105743, "learning_rate": 3.676977155190989e-05, "loss": 0.3429, "step": 7547 }, { "epoch": 1.2291251068680535, "grad_norm": 0.08196313679218292, "learning_rate": 3.6765592127864066e-05, "loss": 0.2997, "step": 7548 }, { "epoch": 1.2292879534258845, "grad_norm": 0.12071419507265091, "learning_rate": 3.6761412281398226e-05, "loss": 0.3267, "step": 7549 }, { "epoch": 1.2294507999837154, "grad_norm": 0.12538032233715057, "learning_rate": 3.6757232012662447e-05, "loss": 0.3227, "step": 7550 }, { "epoch": 1.2296136465415461, "grad_norm": 0.1055472120642662, "learning_rate": 3.6753051321806815e-05, "loss": 0.2944, "step": 7551 }, { "epoch": 1.229776493099377, "grad_norm": 0.11723048985004425, "learning_rate": 3.6748870208981433e-05, "loss": 0.3113, "step": 7552 }, { "epoch": 1.229939339657208, "grad_norm": 0.09022923558950424, "learning_rate": 3.67446886743364e-05, "loss": 0.305, "step": 7553 }, { "epoch": 1.230102186215039, "grad_norm": 0.09010113775730133, "learning_rate": 3.674050671802187e-05, "loss": 0.3094, "step": 7554 }, { "epoch": 1.2302650327728697, "grad_norm": 0.08547884970903397, "learning_rate": 3.6736324340187956e-05, "loss": 0.2782, "step": 7555 }, { "epoch": 1.2304278793307006, "grad_norm": 0.13660623133182526, "learning_rate": 3.6732141540984845e-05, "loss": 0.2995, "step": 7556 }, { "epoch": 1.2305907258885316, "grad_norm": 0.12163082510232925, "learning_rate": 3.67279583205627e-05, "loss": 0.3126, "step": 7557 }, { "epoch": 1.2307535724463623, "grad_norm": 0.0945514440536499, "learning_rate": 3.672377467907171e-05, "loss": 0.2859, "step": 7558 }, { "epoch": 1.2309164190041932, "grad_norm": 0.1379997581243515, "learning_rate": 3.671959061666209e-05, "loss": 0.3152, "step": 7559 }, { "epoch": 1.2310792655620242, "grad_norm": 0.10501586645841599, "learning_rate": 3.671540613348405e-05, "loss": 0.2897, "step": 7560 }, { "epoch": 1.2312421121198551, "grad_norm": 0.1602412462234497, "learning_rate": 3.6711221229687836e-05, "loss": 0.3005, "step": 7561 }, { "epoch": 1.231404958677686, "grad_norm": 0.15259139239788055, "learning_rate": 3.670703590542369e-05, "loss": 0.3293, "step": 7562 }, { "epoch": 1.2315678052355168, "grad_norm": 0.10795371979475021, "learning_rate": 3.6702850160841874e-05, "loss": 0.3499, "step": 7563 }, { "epoch": 1.2317306517933477, "grad_norm": 0.16379623115062714, "learning_rate": 3.6698663996092674e-05, "loss": 0.3443, "step": 7564 }, { "epoch": 1.2318934983511787, "grad_norm": 0.1368076503276825, "learning_rate": 3.669447741132639e-05, "loss": 0.3409, "step": 7565 }, { "epoch": 1.2320563449090094, "grad_norm": 0.20932509005069733, "learning_rate": 3.669029040669333e-05, "loss": 0.3383, "step": 7566 }, { "epoch": 1.2322191914668403, "grad_norm": 0.19059310853481293, "learning_rate": 3.668610298234382e-05, "loss": 0.3439, "step": 7567 }, { "epoch": 1.2323820380246713, "grad_norm": 0.1243685781955719, "learning_rate": 3.6681915138428195e-05, "loss": 0.3126, "step": 7568 }, { "epoch": 1.2325448845825022, "grad_norm": 0.11834879219532013, "learning_rate": 3.667772687509682e-05, "loss": 0.3116, "step": 7569 }, { "epoch": 1.232707731140333, "grad_norm": 0.08809096366167068, "learning_rate": 3.6673538192500066e-05, "loss": 0.2935, "step": 7570 }, { "epoch": 1.2328705776981639, "grad_norm": 0.0844508707523346, "learning_rate": 3.666934909078831e-05, "loss": 0.3084, "step": 7571 }, { "epoch": 1.2330334242559948, "grad_norm": 0.121590755879879, "learning_rate": 3.666515957011197e-05, "loss": 0.2811, "step": 7572 }, { "epoch": 1.2331962708138258, "grad_norm": 0.092597596347332, "learning_rate": 3.6660969630621446e-05, "loss": 0.294, "step": 7573 }, { "epoch": 1.2333591173716565, "grad_norm": 0.1281185746192932, "learning_rate": 3.6656779272467176e-05, "loss": 0.3079, "step": 7574 }, { "epoch": 1.2335219639294874, "grad_norm": 0.07556327432394028, "learning_rate": 3.66525884957996e-05, "loss": 0.301, "step": 7575 }, { "epoch": 1.2336848104873184, "grad_norm": 0.09883490204811096, "learning_rate": 3.664839730076919e-05, "loss": 0.3005, "step": 7576 }, { "epoch": 1.2338476570451493, "grad_norm": 0.16244029998779297, "learning_rate": 3.664420568752641e-05, "loss": 0.3337, "step": 7577 }, { "epoch": 1.23401050360298, "grad_norm": 0.11188361048698425, "learning_rate": 3.664001365622176e-05, "loss": 0.2979, "step": 7578 }, { "epoch": 1.234173350160811, "grad_norm": 0.1317557394504547, "learning_rate": 3.6635821207005755e-05, "loss": 0.353, "step": 7579 }, { "epoch": 1.234336196718642, "grad_norm": 0.12123703211545944, "learning_rate": 3.66316283400289e-05, "loss": 0.2853, "step": 7580 }, { "epoch": 1.2344990432764726, "grad_norm": 0.1072559505701065, "learning_rate": 3.6627435055441745e-05, "loss": 0.3213, "step": 7581 }, { "epoch": 1.2346618898343036, "grad_norm": 0.1252499222755432, "learning_rate": 3.662324135339483e-05, "loss": 0.2759, "step": 7582 }, { "epoch": 1.2348247363921345, "grad_norm": 0.10395092517137527, "learning_rate": 3.6619047234038725e-05, "loss": 0.2751, "step": 7583 }, { "epoch": 1.2349875829499655, "grad_norm": 0.12029533088207245, "learning_rate": 3.661485269752401e-05, "loss": 0.3001, "step": 7584 }, { "epoch": 1.2351504295077962, "grad_norm": 0.1371302753686905, "learning_rate": 3.661065774400129e-05, "loss": 0.2889, "step": 7585 }, { "epoch": 1.2353132760656271, "grad_norm": 0.1527612954378128, "learning_rate": 3.660646237362116e-05, "loss": 0.3112, "step": 7586 }, { "epoch": 1.235476122623458, "grad_norm": 0.08661078661680222, "learning_rate": 3.6602266586534274e-05, "loss": 0.3057, "step": 7587 }, { "epoch": 1.235638969181289, "grad_norm": 0.10878783464431763, "learning_rate": 3.6598070382891244e-05, "loss": 0.3375, "step": 7588 }, { "epoch": 1.2358018157391197, "grad_norm": 0.10016565024852753, "learning_rate": 3.659387376284273e-05, "loss": 0.3386, "step": 7589 }, { "epoch": 1.2359646622969507, "grad_norm": 0.11920873820781708, "learning_rate": 3.6589676726539435e-05, "loss": 0.2925, "step": 7590 }, { "epoch": 1.2361275088547816, "grad_norm": 0.12469342350959778, "learning_rate": 3.658547927413201e-05, "loss": 0.307, "step": 7591 }, { "epoch": 1.2362903554126126, "grad_norm": 0.12607048451900482, "learning_rate": 3.658128140577117e-05, "loss": 0.2927, "step": 7592 }, { "epoch": 1.2364532019704433, "grad_norm": 0.20310771465301514, "learning_rate": 3.6577083121607625e-05, "loss": 0.3792, "step": 7593 }, { "epoch": 1.2366160485282742, "grad_norm": 0.10566483438014984, "learning_rate": 3.657288442179211e-05, "loss": 0.3025, "step": 7594 }, { "epoch": 1.2367788950861052, "grad_norm": 0.07694268971681595, "learning_rate": 3.656868530647538e-05, "loss": 0.3306, "step": 7595 }, { "epoch": 1.2369417416439359, "grad_norm": 0.11231674998998642, "learning_rate": 3.656448577580818e-05, "loss": 0.3497, "step": 7596 }, { "epoch": 1.2371045882017668, "grad_norm": 0.1102413758635521, "learning_rate": 3.656028582994129e-05, "loss": 0.358, "step": 7597 }, { "epoch": 1.2372674347595978, "grad_norm": 0.13809114694595337, "learning_rate": 3.655608546902551e-05, "loss": 0.3234, "step": 7598 }, { "epoch": 1.2374302813174287, "grad_norm": 0.08001948148012161, "learning_rate": 3.655188469321163e-05, "loss": 0.3222, "step": 7599 }, { "epoch": 1.2375931278752597, "grad_norm": 0.1058155745267868, "learning_rate": 3.654768350265048e-05, "loss": 0.3418, "step": 7600 }, { "epoch": 1.2377559744330904, "grad_norm": 0.12825646996498108, "learning_rate": 3.6543481897492904e-05, "loss": 0.3181, "step": 7601 }, { "epoch": 1.2379188209909213, "grad_norm": 0.17525595426559448, "learning_rate": 3.6539279877889734e-05, "loss": 0.3133, "step": 7602 }, { "epoch": 1.2380816675487523, "grad_norm": 0.10268501937389374, "learning_rate": 3.653507744399185e-05, "loss": 0.3123, "step": 7603 }, { "epoch": 1.238244514106583, "grad_norm": 0.0784471184015274, "learning_rate": 3.6530874595950123e-05, "loss": 0.2805, "step": 7604 }, { "epoch": 1.238407360664414, "grad_norm": 0.13484995067119598, "learning_rate": 3.652667133391545e-05, "loss": 0.3532, "step": 7605 }, { "epoch": 1.2385702072222449, "grad_norm": 0.1084415391087532, "learning_rate": 3.652246765803874e-05, "loss": 0.3558, "step": 7606 }, { "epoch": 1.2387330537800758, "grad_norm": 0.12764482200145721, "learning_rate": 3.651826356847092e-05, "loss": 0.3504, "step": 7607 }, { "epoch": 1.2388959003379065, "grad_norm": 0.09901882708072662, "learning_rate": 3.651405906536293e-05, "loss": 0.3353, "step": 7608 }, { "epoch": 1.2390587468957375, "grad_norm": 0.06999924033880234, "learning_rate": 3.650985414886572e-05, "loss": 0.323, "step": 7609 }, { "epoch": 1.2392215934535684, "grad_norm": 0.18498367071151733, "learning_rate": 3.650564881913026e-05, "loss": 0.3174, "step": 7610 }, { "epoch": 1.2393844400113991, "grad_norm": 0.10729064792394638, "learning_rate": 3.650144307630754e-05, "loss": 0.3264, "step": 7611 }, { "epoch": 1.23954728656923, "grad_norm": 0.1612747460603714, "learning_rate": 3.649723692054855e-05, "loss": 0.3518, "step": 7612 }, { "epoch": 1.239710133127061, "grad_norm": 0.09614752978086472, "learning_rate": 3.649303035200431e-05, "loss": 0.3162, "step": 7613 }, { "epoch": 1.239872979684892, "grad_norm": 0.1139899417757988, "learning_rate": 3.648882337082585e-05, "loss": 0.2887, "step": 7614 }, { "epoch": 1.240035826242723, "grad_norm": 0.13378018140792847, "learning_rate": 3.6484615977164205e-05, "loss": 0.2962, "step": 7615 }, { "epoch": 1.2401986728005536, "grad_norm": 0.1991778314113617, "learning_rate": 3.648040817117044e-05, "loss": 0.3144, "step": 7616 }, { "epoch": 1.2403615193583846, "grad_norm": 0.12695850431919098, "learning_rate": 3.647619995299563e-05, "loss": 0.3185, "step": 7617 }, { "epoch": 1.2405243659162155, "grad_norm": 0.12598422169685364, "learning_rate": 3.647199132279085e-05, "loss": 0.3149, "step": 7618 }, { "epoch": 1.2406872124740462, "grad_norm": 0.13257426023483276, "learning_rate": 3.646778228070721e-05, "loss": 0.3178, "step": 7619 }, { "epoch": 1.2408500590318772, "grad_norm": 0.11732596158981323, "learning_rate": 3.646357282689583e-05, "loss": 0.3249, "step": 7620 }, { "epoch": 1.2410129055897081, "grad_norm": 0.10833758115768433, "learning_rate": 3.6459362961507845e-05, "loss": 0.3268, "step": 7621 }, { "epoch": 1.241175752147539, "grad_norm": 0.11660224944353104, "learning_rate": 3.6455152684694385e-05, "loss": 0.3353, "step": 7622 }, { "epoch": 1.2413385987053698, "grad_norm": 0.13462254405021667, "learning_rate": 3.645094199660664e-05, "loss": 0.2811, "step": 7623 }, { "epoch": 1.2415014452632007, "grad_norm": 0.21575424075126648, "learning_rate": 3.644673089739575e-05, "loss": 0.3518, "step": 7624 }, { "epoch": 1.2416642918210317, "grad_norm": 0.1087702289223671, "learning_rate": 3.6442519387212934e-05, "loss": 0.2975, "step": 7625 }, { "epoch": 1.2418271383788626, "grad_norm": 0.1679593026638031, "learning_rate": 3.64383074662094e-05, "loss": 0.3462, "step": 7626 }, { "epoch": 1.2419899849366933, "grad_norm": 0.13065549731254578, "learning_rate": 3.643409513453634e-05, "loss": 0.335, "step": 7627 }, { "epoch": 1.2421528314945243, "grad_norm": 0.09335894137620926, "learning_rate": 3.6429882392345016e-05, "loss": 0.3138, "step": 7628 }, { "epoch": 1.2423156780523552, "grad_norm": 0.09052948653697968, "learning_rate": 3.6425669239786667e-05, "loss": 0.3297, "step": 7629 }, { "epoch": 1.2424785246101862, "grad_norm": 0.09409837424755096, "learning_rate": 3.6421455677012564e-05, "loss": 0.3332, "step": 7630 }, { "epoch": 1.2426413711680169, "grad_norm": 0.1243070662021637, "learning_rate": 3.6417241704173976e-05, "loss": 0.3048, "step": 7631 }, { "epoch": 1.2428042177258478, "grad_norm": 0.12844914197921753, "learning_rate": 3.641302732142221e-05, "loss": 0.3223, "step": 7632 }, { "epoch": 1.2429670642836788, "grad_norm": 0.1344153732061386, "learning_rate": 3.6408812528908564e-05, "loss": 0.3009, "step": 7633 }, { "epoch": 1.2431299108415095, "grad_norm": 0.12502798438072205, "learning_rate": 3.6404597326784375e-05, "loss": 0.3753, "step": 7634 }, { "epoch": 1.2432927573993404, "grad_norm": 0.11956389248371124, "learning_rate": 3.640038171520096e-05, "loss": 0.3286, "step": 7635 }, { "epoch": 1.2434556039571714, "grad_norm": 0.11180909723043442, "learning_rate": 3.63961656943097e-05, "loss": 0.3324, "step": 7636 }, { "epoch": 1.2436184505150023, "grad_norm": 0.06654294580221176, "learning_rate": 3.639194926426194e-05, "loss": 0.3285, "step": 7637 }, { "epoch": 1.2437812970728332, "grad_norm": 0.11783236265182495, "learning_rate": 3.638773242520907e-05, "loss": 0.3044, "step": 7638 }, { "epoch": 1.243944143630664, "grad_norm": 0.11408412456512451, "learning_rate": 3.6383515177302484e-05, "loss": 0.3006, "step": 7639 }, { "epoch": 1.244106990188495, "grad_norm": 0.07972881197929382, "learning_rate": 3.63792975206936e-05, "loss": 0.323, "step": 7640 }, { "epoch": 1.2442698367463259, "grad_norm": 0.09273988008499146, "learning_rate": 3.6375079455533844e-05, "loss": 0.3149, "step": 7641 }, { "epoch": 1.2444326833041566, "grad_norm": 0.10516280680894852, "learning_rate": 3.637086098197465e-05, "loss": 0.3256, "step": 7642 }, { "epoch": 1.2445955298619875, "grad_norm": 0.16710318624973297, "learning_rate": 3.6366642100167494e-05, "loss": 0.3693, "step": 7643 }, { "epoch": 1.2447583764198185, "grad_norm": 0.10382454842329025, "learning_rate": 3.636242281026381e-05, "loss": 0.3193, "step": 7644 }, { "epoch": 1.2449212229776494, "grad_norm": 0.12511344254016876, "learning_rate": 3.635820311241511e-05, "loss": 0.3176, "step": 7645 }, { "epoch": 1.2450840695354801, "grad_norm": 0.10025377571582794, "learning_rate": 3.6353983006772894e-05, "loss": 0.3269, "step": 7646 }, { "epoch": 1.245246916093311, "grad_norm": 0.18499809503555298, "learning_rate": 3.634976249348867e-05, "loss": 0.3239, "step": 7647 }, { "epoch": 1.245409762651142, "grad_norm": 0.09913375973701477, "learning_rate": 3.634554157271397e-05, "loss": 0.323, "step": 7648 }, { "epoch": 1.2455726092089727, "grad_norm": 0.08607383817434311, "learning_rate": 3.634132024460033e-05, "loss": 0.3577, "step": 7649 }, { "epoch": 1.2457354557668037, "grad_norm": 0.12132584303617477, "learning_rate": 3.633709850929932e-05, "loss": 0.3094, "step": 7650 }, { "epoch": 1.2458983023246346, "grad_norm": 0.1625608205795288, "learning_rate": 3.633287636696249e-05, "loss": 0.3122, "step": 7651 }, { "epoch": 1.2460611488824656, "grad_norm": 0.11432356387376785, "learning_rate": 3.6328653817741465e-05, "loss": 0.3086, "step": 7652 }, { "epoch": 1.2462239954402965, "grad_norm": 0.12136948853731155, "learning_rate": 3.632443086178782e-05, "loss": 0.3382, "step": 7653 }, { "epoch": 1.2463868419981272, "grad_norm": 0.10743417590856552, "learning_rate": 3.632020749925317e-05, "loss": 0.3009, "step": 7654 }, { "epoch": 1.2465496885559582, "grad_norm": 0.13507048785686493, "learning_rate": 3.6315983730289164e-05, "loss": 0.3403, "step": 7655 }, { "epoch": 1.246712535113789, "grad_norm": 0.12774986028671265, "learning_rate": 3.631175955504744e-05, "loss": 0.3245, "step": 7656 }, { "epoch": 1.2468753816716198, "grad_norm": 0.12837785482406616, "learning_rate": 3.6307534973679644e-05, "loss": 0.3492, "step": 7657 }, { "epoch": 1.2470382282294508, "grad_norm": 0.1122523620724678, "learning_rate": 3.6303309986337476e-05, "loss": 0.3256, "step": 7658 }, { "epoch": 1.2472010747872817, "grad_norm": 0.16387119889259338, "learning_rate": 3.629908459317261e-05, "loss": 0.3481, "step": 7659 }, { "epoch": 1.2473639213451126, "grad_norm": 0.11452596634626389, "learning_rate": 3.629485879433675e-05, "loss": 0.3675, "step": 7660 }, { "epoch": 1.2475267679029434, "grad_norm": 0.12323363125324249, "learning_rate": 3.6290632589981625e-05, "loss": 0.2992, "step": 7661 }, { "epoch": 1.2476896144607743, "grad_norm": 0.10763616114854813, "learning_rate": 3.628640598025896e-05, "loss": 0.2867, "step": 7662 }, { "epoch": 1.2478524610186053, "grad_norm": 0.08016145974397659, "learning_rate": 3.6282178965320505e-05, "loss": 0.3163, "step": 7663 }, { "epoch": 1.2480153075764362, "grad_norm": 0.13434815406799316, "learning_rate": 3.627795154531802e-05, "loss": 0.3705, "step": 7664 }, { "epoch": 1.248178154134267, "grad_norm": 0.11705715954303741, "learning_rate": 3.627372372040329e-05, "loss": 0.2985, "step": 7665 }, { "epoch": 1.2483410006920979, "grad_norm": 0.11965879052877426, "learning_rate": 3.62694954907281e-05, "loss": 0.2922, "step": 7666 }, { "epoch": 1.2485038472499288, "grad_norm": 0.11245701462030411, "learning_rate": 3.6265266856444256e-05, "loss": 0.3181, "step": 7667 }, { "epoch": 1.2486666938077597, "grad_norm": 0.11099095642566681, "learning_rate": 3.626103781770358e-05, "loss": 0.3236, "step": 7668 }, { "epoch": 1.2488295403655905, "grad_norm": 0.1354755312204361, "learning_rate": 3.6256808374657916e-05, "loss": 0.3144, "step": 7669 }, { "epoch": 1.2489923869234214, "grad_norm": 0.10383068025112152, "learning_rate": 3.62525785274591e-05, "loss": 0.3008, "step": 7670 }, { "epoch": 1.2491552334812523, "grad_norm": 0.22844451665878296, "learning_rate": 3.624834827625899e-05, "loss": 0.3418, "step": 7671 }, { "epoch": 1.249318080039083, "grad_norm": 0.1240634024143219, "learning_rate": 3.624411762120949e-05, "loss": 0.307, "step": 7672 }, { "epoch": 1.249480926596914, "grad_norm": 0.09997261315584183, "learning_rate": 3.623988656246248e-05, "loss": 0.3335, "step": 7673 }, { "epoch": 1.249643773154745, "grad_norm": 0.20920470356941223, "learning_rate": 3.623565510016986e-05, "loss": 0.3262, "step": 7674 }, { "epoch": 1.249806619712576, "grad_norm": 0.0971701517701149, "learning_rate": 3.623142323448356e-05, "loss": 0.3114, "step": 7675 }, { "epoch": 1.2499694662704066, "grad_norm": 0.0856374055147171, "learning_rate": 3.622719096555551e-05, "loss": 0.3474, "step": 7676 }, { "epoch": 1.2501323128282376, "grad_norm": 0.11101995408535004, "learning_rate": 3.6222958293537676e-05, "loss": 0.3129, "step": 7677 }, { "epoch": 1.2502951593860685, "grad_norm": 0.14151570200920105, "learning_rate": 3.621872521858202e-05, "loss": 0.2885, "step": 7678 }, { "epoch": 1.2504580059438994, "grad_norm": 0.15142600238323212, "learning_rate": 3.62144917408405e-05, "loss": 0.3166, "step": 7679 }, { "epoch": 1.2506208525017302, "grad_norm": 0.10518916696310043, "learning_rate": 3.621025786046513e-05, "loss": 0.3469, "step": 7680 }, { "epoch": 1.250783699059561, "grad_norm": 0.12454141676425934, "learning_rate": 3.620602357760793e-05, "loss": 0.3661, "step": 7681 }, { "epoch": 1.250946545617392, "grad_norm": 0.16877414286136627, "learning_rate": 3.620178889242089e-05, "loss": 0.3529, "step": 7682 }, { "epoch": 1.251109392175223, "grad_norm": 0.23677262663841248, "learning_rate": 3.619755380505609e-05, "loss": 0.3251, "step": 7683 }, { "epoch": 1.2512722387330537, "grad_norm": 0.11365741491317749, "learning_rate": 3.619331831566554e-05, "loss": 0.3101, "step": 7684 }, { "epoch": 1.2514350852908847, "grad_norm": 0.11486905068159103, "learning_rate": 3.618908242440133e-05, "loss": 0.3136, "step": 7685 }, { "epoch": 1.2515979318487156, "grad_norm": 0.13284504413604736, "learning_rate": 3.618484613141554e-05, "loss": 0.3075, "step": 7686 }, { "epoch": 1.2517607784065463, "grad_norm": 0.09333518892526627, "learning_rate": 3.6180609436860267e-05, "loss": 0.2942, "step": 7687 }, { "epoch": 1.2519236249643773, "grad_norm": 0.08733302354812622, "learning_rate": 3.617637234088761e-05, "loss": 0.2869, "step": 7688 }, { "epoch": 1.2520864715222082, "grad_norm": 0.0818144753575325, "learning_rate": 3.6172134843649706e-05, "loss": 0.3367, "step": 7689 }, { "epoch": 1.2522493180800391, "grad_norm": 0.08449426293373108, "learning_rate": 3.616789694529868e-05, "loss": 0.3292, "step": 7690 }, { "epoch": 1.25241216463787, "grad_norm": 0.07899940013885498, "learning_rate": 3.616365864598671e-05, "loss": 0.3106, "step": 7691 }, { "epoch": 1.2525750111957008, "grad_norm": 0.09344217926263809, "learning_rate": 3.6159419945865934e-05, "loss": 0.2874, "step": 7692 }, { "epoch": 1.2527378577535317, "grad_norm": 0.11274499446153641, "learning_rate": 3.6155180845088554e-05, "loss": 0.3097, "step": 7693 }, { "epoch": 1.2529007043113627, "grad_norm": 0.1217207983136177, "learning_rate": 3.615094134380676e-05, "loss": 0.339, "step": 7694 }, { "epoch": 1.2530635508691934, "grad_norm": 0.1156439557671547, "learning_rate": 3.614670144217276e-05, "loss": 0.286, "step": 7695 }, { "epoch": 1.2532263974270244, "grad_norm": 0.11775372177362442, "learning_rate": 3.6142461140338785e-05, "loss": 0.3203, "step": 7696 }, { "epoch": 1.2533892439848553, "grad_norm": 0.09085803478956223, "learning_rate": 3.6138220438457065e-05, "loss": 0.3285, "step": 7697 }, { "epoch": 1.2535520905426862, "grad_norm": 0.13753321766853333, "learning_rate": 3.613397933667987e-05, "loss": 0.2906, "step": 7698 }, { "epoch": 1.253714937100517, "grad_norm": 0.13214148581027985, "learning_rate": 3.612973783515945e-05, "loss": 0.3105, "step": 7699 }, { "epoch": 1.253877783658348, "grad_norm": 0.13171668350696564, "learning_rate": 3.612549593404809e-05, "loss": 0.3391, "step": 7700 }, { "epoch": 1.2540406302161788, "grad_norm": 0.10782822221517563, "learning_rate": 3.612125363349811e-05, "loss": 0.3154, "step": 7701 }, { "epoch": 1.2542034767740096, "grad_norm": 0.1308216154575348, "learning_rate": 3.611701093366179e-05, "loss": 0.2801, "step": 7702 }, { "epoch": 1.2543663233318405, "grad_norm": 0.13574115931987762, "learning_rate": 3.6112767834691475e-05, "loss": 0.3307, "step": 7703 }, { "epoch": 1.2545291698896714, "grad_norm": 0.13500767946243286, "learning_rate": 3.610852433673951e-05, "loss": 0.3198, "step": 7704 }, { "epoch": 1.2546920164475024, "grad_norm": 0.13616140186786652, "learning_rate": 3.610428043995823e-05, "loss": 0.3051, "step": 7705 }, { "epoch": 1.2548548630053333, "grad_norm": 0.10673623532056808, "learning_rate": 3.610003614450001e-05, "loss": 0.3134, "step": 7706 }, { "epoch": 1.255017709563164, "grad_norm": 0.13592423498630524, "learning_rate": 3.609579145051724e-05, "loss": 0.3018, "step": 7707 }, { "epoch": 1.255180556120995, "grad_norm": 0.15716616809368134, "learning_rate": 3.609154635816231e-05, "loss": 0.306, "step": 7708 }, { "epoch": 1.255343402678826, "grad_norm": 0.20208710432052612, "learning_rate": 3.6087300867587634e-05, "loss": 0.3706, "step": 7709 }, { "epoch": 1.2555062492366567, "grad_norm": 0.0984034538269043, "learning_rate": 3.608305497894564e-05, "loss": 0.3395, "step": 7710 }, { "epoch": 1.2556690957944876, "grad_norm": 0.10095589607954025, "learning_rate": 3.6078808692388766e-05, "loss": 0.2915, "step": 7711 }, { "epoch": 1.2558319423523185, "grad_norm": 0.15297870337963104, "learning_rate": 3.607456200806947e-05, "loss": 0.3449, "step": 7712 }, { "epoch": 1.2559947889101495, "grad_norm": 0.13852490484714508, "learning_rate": 3.607031492614022e-05, "loss": 0.3065, "step": 7713 }, { "epoch": 1.2561576354679804, "grad_norm": 0.14650316536426544, "learning_rate": 3.6066067446753494e-05, "loss": 0.3073, "step": 7714 }, { "epoch": 1.2563204820258111, "grad_norm": 0.11508509516716003, "learning_rate": 3.6061819570061786e-05, "loss": 0.3674, "step": 7715 }, { "epoch": 1.256483328583642, "grad_norm": 0.0982925072312355, "learning_rate": 3.6057571296217626e-05, "loss": 0.3464, "step": 7716 }, { "epoch": 1.2566461751414728, "grad_norm": 0.13503965735435486, "learning_rate": 3.6053322625373515e-05, "loss": 0.344, "step": 7717 }, { "epoch": 1.2568090216993038, "grad_norm": 0.10594501346349716, "learning_rate": 3.604907355768201e-05, "loss": 0.3184, "step": 7718 }, { "epoch": 1.2569718682571347, "grad_norm": 0.15716634690761566, "learning_rate": 3.604482409329566e-05, "loss": 0.3249, "step": 7719 }, { "epoch": 1.2571347148149656, "grad_norm": 0.12901705503463745, "learning_rate": 3.6040574232367034e-05, "loss": 0.2896, "step": 7720 }, { "epoch": 1.2572975613727966, "grad_norm": 0.14361557364463806, "learning_rate": 3.603632397504871e-05, "loss": 0.3721, "step": 7721 }, { "epoch": 1.2574604079306273, "grad_norm": 0.133122980594635, "learning_rate": 3.6032073321493296e-05, "loss": 0.3062, "step": 7722 }, { "epoch": 1.2576232544884582, "grad_norm": 0.13138523697853088, "learning_rate": 3.6027822271853396e-05, "loss": 0.3048, "step": 7723 }, { "epoch": 1.2577861010462892, "grad_norm": 0.1449400782585144, "learning_rate": 3.6023570826281626e-05, "loss": 0.3081, "step": 7724 }, { "epoch": 1.25794894760412, "grad_norm": 0.13178180158138275, "learning_rate": 3.601931898493065e-05, "loss": 0.3083, "step": 7725 }, { "epoch": 1.2581117941619508, "grad_norm": 0.0792853981256485, "learning_rate": 3.6015066747953105e-05, "loss": 0.3255, "step": 7726 }, { "epoch": 1.2582746407197818, "grad_norm": 0.13632732629776, "learning_rate": 3.601081411550166e-05, "loss": 0.2994, "step": 7727 }, { "epoch": 1.2584374872776127, "grad_norm": 0.09691067039966583, "learning_rate": 3.600656108772899e-05, "loss": 0.2905, "step": 7728 }, { "epoch": 1.2586003338354437, "grad_norm": 0.10054828226566315, "learning_rate": 3.600230766478782e-05, "loss": 0.2856, "step": 7729 }, { "epoch": 1.2587631803932744, "grad_norm": 0.18703055381774902, "learning_rate": 3.5998053846830826e-05, "loss": 0.3161, "step": 7730 }, { "epoch": 1.2589260269511053, "grad_norm": 0.13550080358982086, "learning_rate": 3.5993799634010746e-05, "loss": 0.2807, "step": 7731 }, { "epoch": 1.2590888735089363, "grad_norm": 0.12693986296653748, "learning_rate": 3.598954502648033e-05, "loss": 0.346, "step": 7732 }, { "epoch": 1.259251720066767, "grad_norm": 0.1303374469280243, "learning_rate": 3.5985290024392315e-05, "loss": 0.2976, "step": 7733 }, { "epoch": 1.259414566624598, "grad_norm": 0.19550363719463348, "learning_rate": 3.598103462789948e-05, "loss": 0.3182, "step": 7734 }, { "epoch": 1.2595774131824289, "grad_norm": 0.23551012575626373, "learning_rate": 3.59767788371546e-05, "loss": 0.3667, "step": 7735 }, { "epoch": 1.2597402597402598, "grad_norm": 0.13294486701488495, "learning_rate": 3.5972522652310476e-05, "loss": 0.3147, "step": 7736 }, { "epoch": 1.2599031062980905, "grad_norm": 0.10712501406669617, "learning_rate": 3.5968266073519905e-05, "loss": 0.2812, "step": 7737 }, { "epoch": 1.2600659528559215, "grad_norm": 0.16073240339756012, "learning_rate": 3.5964009100935734e-05, "loss": 0.316, "step": 7738 }, { "epoch": 1.2602287994137524, "grad_norm": 0.12177848070859909, "learning_rate": 3.595975173471078e-05, "loss": 0.2882, "step": 7739 }, { "epoch": 1.2603916459715832, "grad_norm": 0.11251486092805862, "learning_rate": 3.595549397499789e-05, "loss": 0.2817, "step": 7740 }, { "epoch": 1.260554492529414, "grad_norm": 0.06832873076200485, "learning_rate": 3.595123582194996e-05, "loss": 0.2993, "step": 7741 }, { "epoch": 1.260717339087245, "grad_norm": 0.11771171540021896, "learning_rate": 3.594697727571985e-05, "loss": 0.3136, "step": 7742 }, { "epoch": 1.260880185645076, "grad_norm": 0.11768881231546402, "learning_rate": 3.594271833646045e-05, "loss": 0.2884, "step": 7743 }, { "epoch": 1.261043032202907, "grad_norm": 0.10941921919584274, "learning_rate": 3.593845900432468e-05, "loss": 0.3476, "step": 7744 }, { "epoch": 1.2612058787607376, "grad_norm": 0.11773987114429474, "learning_rate": 3.5934199279465466e-05, "loss": 0.3016, "step": 7745 }, { "epoch": 1.2613687253185686, "grad_norm": 0.13085685670375824, "learning_rate": 3.592993916203573e-05, "loss": 0.2898, "step": 7746 }, { "epoch": 1.2615315718763995, "grad_norm": 0.14666332304477692, "learning_rate": 3.592567865218844e-05, "loss": 0.2925, "step": 7747 }, { "epoch": 1.2616944184342302, "grad_norm": 0.16879145801067352, "learning_rate": 3.592141775007654e-05, "loss": 0.3289, "step": 7748 }, { "epoch": 1.2618572649920612, "grad_norm": 0.11827687174081802, "learning_rate": 3.5917156455853036e-05, "loss": 0.3392, "step": 7749 }, { "epoch": 1.2620201115498921, "grad_norm": 0.1325369030237198, "learning_rate": 3.591289476967089e-05, "loss": 0.2846, "step": 7750 }, { "epoch": 1.262182958107723, "grad_norm": 0.10314399749040604, "learning_rate": 3.590863269168314e-05, "loss": 0.3421, "step": 7751 }, { "epoch": 1.262345804665554, "grad_norm": 0.13455668091773987, "learning_rate": 3.590437022204279e-05, "loss": 0.314, "step": 7752 }, { "epoch": 1.2625086512233847, "grad_norm": 0.10767076909542084, "learning_rate": 3.590010736090287e-05, "loss": 0.2955, "step": 7753 }, { "epoch": 1.2626714977812157, "grad_norm": 0.1732645183801651, "learning_rate": 3.5895844108416446e-05, "loss": 0.3216, "step": 7754 }, { "epoch": 1.2628343443390464, "grad_norm": 0.10859671235084534, "learning_rate": 3.589158046473657e-05, "loss": 0.3049, "step": 7755 }, { "epoch": 1.2629971908968773, "grad_norm": 0.14404292404651642, "learning_rate": 3.588731643001633e-05, "loss": 0.3051, "step": 7756 }, { "epoch": 1.2631600374547083, "grad_norm": 0.10261618345975876, "learning_rate": 3.5883052004408804e-05, "loss": 0.3157, "step": 7757 }, { "epoch": 1.2633228840125392, "grad_norm": 0.08460938185453415, "learning_rate": 3.587878718806711e-05, "loss": 0.2989, "step": 7758 }, { "epoch": 1.2634857305703702, "grad_norm": 0.12028133124113083, "learning_rate": 3.587452198114436e-05, "loss": 0.3191, "step": 7759 }, { "epoch": 1.263648577128201, "grad_norm": 0.09428276866674423, "learning_rate": 3.5870256383793685e-05, "loss": 0.3125, "step": 7760 }, { "epoch": 1.2638114236860318, "grad_norm": 0.09455729275941849, "learning_rate": 3.5865990396168244e-05, "loss": 0.323, "step": 7761 }, { "epoch": 1.2639742702438628, "grad_norm": 0.11949906498193741, "learning_rate": 3.5861724018421186e-05, "loss": 0.3293, "step": 7762 }, { "epoch": 1.2641371168016935, "grad_norm": 0.1166180819272995, "learning_rate": 3.5857457250705695e-05, "loss": 0.3093, "step": 7763 }, { "epoch": 1.2642999633595244, "grad_norm": 0.10665778070688248, "learning_rate": 3.585319009317496e-05, "loss": 0.3099, "step": 7764 }, { "epoch": 1.2644628099173554, "grad_norm": 0.10639883577823639, "learning_rate": 3.5848922545982186e-05, "loss": 0.3026, "step": 7765 }, { "epoch": 1.2646256564751863, "grad_norm": 0.07963623106479645, "learning_rate": 3.5844654609280584e-05, "loss": 0.326, "step": 7766 }, { "epoch": 1.2647885030330173, "grad_norm": 0.13495518267154694, "learning_rate": 3.58403862832234e-05, "loss": 0.316, "step": 7767 }, { "epoch": 1.264951349590848, "grad_norm": 0.12226057052612305, "learning_rate": 3.583611756796386e-05, "loss": 0.2943, "step": 7768 }, { "epoch": 1.265114196148679, "grad_norm": 0.13069592416286469, "learning_rate": 3.583184846365524e-05, "loss": 0.3691, "step": 7769 }, { "epoch": 1.2652770427065099, "grad_norm": 0.06518727540969849, "learning_rate": 3.5827578970450807e-05, "loss": 0.3248, "step": 7770 }, { "epoch": 1.2654398892643406, "grad_norm": 0.11504512280225754, "learning_rate": 3.582330908850384e-05, "loss": 0.3234, "step": 7771 }, { "epoch": 1.2656027358221715, "grad_norm": 0.10706854611635208, "learning_rate": 3.5819038817967656e-05, "loss": 0.2922, "step": 7772 }, { "epoch": 1.2657655823800025, "grad_norm": 0.09895937144756317, "learning_rate": 3.581476815899556e-05, "loss": 0.3042, "step": 7773 }, { "epoch": 1.2659284289378334, "grad_norm": 0.11012286692857742, "learning_rate": 3.581049711174089e-05, "loss": 0.31, "step": 7774 }, { "epoch": 1.2660912754956641, "grad_norm": 0.11122129112482071, "learning_rate": 3.580622567635698e-05, "loss": 0.2916, "step": 7775 }, { "epoch": 1.266254122053495, "grad_norm": 0.1653403341770172, "learning_rate": 3.5801953852997196e-05, "loss": 0.3404, "step": 7776 }, { "epoch": 1.266416968611326, "grad_norm": 0.15769881010055542, "learning_rate": 3.57976816418149e-05, "loss": 0.3144, "step": 7777 }, { "epoch": 1.2665798151691567, "grad_norm": 0.1587487757205963, "learning_rate": 3.579340904296349e-05, "loss": 0.331, "step": 7778 }, { "epoch": 1.2667426617269877, "grad_norm": 0.09969640523195267, "learning_rate": 3.5789136056596354e-05, "loss": 0.318, "step": 7779 }, { "epoch": 1.2669055082848186, "grad_norm": 0.11258552223443985, "learning_rate": 3.5784862682866896e-05, "loss": 0.3432, "step": 7780 }, { "epoch": 1.2670683548426496, "grad_norm": 0.0967080220580101, "learning_rate": 3.578058892192857e-05, "loss": 0.3367, "step": 7781 }, { "epoch": 1.2672312014004805, "grad_norm": 0.08134926110506058, "learning_rate": 3.57763147739348e-05, "loss": 0.3088, "step": 7782 }, { "epoch": 1.2673940479583112, "grad_norm": 0.1392299085855484, "learning_rate": 3.577204023903904e-05, "loss": 0.3217, "step": 7783 }, { "epoch": 1.2675568945161422, "grad_norm": 0.16319431364536285, "learning_rate": 3.576776531739476e-05, "loss": 0.3526, "step": 7784 }, { "epoch": 1.2677197410739731, "grad_norm": 0.1341274529695511, "learning_rate": 3.5763490009155454e-05, "loss": 0.2878, "step": 7785 }, { "epoch": 1.2678825876318038, "grad_norm": 0.11441535502672195, "learning_rate": 3.57592143144746e-05, "loss": 0.3023, "step": 7786 }, { "epoch": 1.2680454341896348, "grad_norm": 0.113190196454525, "learning_rate": 3.575493823350572e-05, "loss": 0.3733, "step": 7787 }, { "epoch": 1.2682082807474657, "grad_norm": 0.15763674676418304, "learning_rate": 3.5750661766402326e-05, "loss": 0.2979, "step": 7788 }, { "epoch": 1.2683711273052967, "grad_norm": 0.14249199628829956, "learning_rate": 3.5746384913317975e-05, "loss": 0.3308, "step": 7789 }, { "epoch": 1.2685339738631276, "grad_norm": 0.11861102283000946, "learning_rate": 3.5742107674406207e-05, "loss": 0.2901, "step": 7790 }, { "epoch": 1.2686968204209583, "grad_norm": 0.12617669999599457, "learning_rate": 3.5737830049820585e-05, "loss": 0.2966, "step": 7791 }, { "epoch": 1.2688596669787893, "grad_norm": 0.1289299875497818, "learning_rate": 3.573355203971469e-05, "loss": 0.3391, "step": 7792 }, { "epoch": 1.26902251353662, "grad_norm": 0.09924594312906265, "learning_rate": 3.572927364424213e-05, "loss": 0.3515, "step": 7793 }, { "epoch": 1.269185360094451, "grad_norm": 0.13653625547885895, "learning_rate": 3.572499486355649e-05, "loss": 0.3146, "step": 7794 }, { "epoch": 1.2693482066522819, "grad_norm": 0.1748332977294922, "learning_rate": 3.572071569781141e-05, "loss": 0.3204, "step": 7795 }, { "epoch": 1.2695110532101128, "grad_norm": 0.14956817030906677, "learning_rate": 3.571643614716052e-05, "loss": 0.3096, "step": 7796 }, { "epoch": 1.2696738997679438, "grad_norm": 0.10448159277439117, "learning_rate": 3.571215621175745e-05, "loss": 0.2889, "step": 7797 }, { "epoch": 1.2698367463257745, "grad_norm": 0.13279329240322113, "learning_rate": 3.570787589175588e-05, "loss": 0.3261, "step": 7798 }, { "epoch": 1.2699995928836054, "grad_norm": 0.1000482365489006, "learning_rate": 3.570359518730949e-05, "loss": 0.2968, "step": 7799 }, { "epoch": 1.2701624394414364, "grad_norm": 0.1669948697090149, "learning_rate": 3.569931409857196e-05, "loss": 0.3447, "step": 7800 }, { "epoch": 1.270325285999267, "grad_norm": 0.11184139549732208, "learning_rate": 3.5695032625697e-05, "loss": 0.3378, "step": 7801 }, { "epoch": 1.270488132557098, "grad_norm": 0.14159151911735535, "learning_rate": 3.5690750768838325e-05, "loss": 0.3325, "step": 7802 }, { "epoch": 1.270650979114929, "grad_norm": 0.0837806984782219, "learning_rate": 3.568646852814967e-05, "loss": 0.302, "step": 7803 }, { "epoch": 1.27081382567276, "grad_norm": 0.2032589316368103, "learning_rate": 3.568218590378477e-05, "loss": 0.3285, "step": 7804 }, { "epoch": 1.2709766722305909, "grad_norm": 0.10109824687242508, "learning_rate": 3.5677902895897395e-05, "loss": 0.3114, "step": 7805 }, { "epoch": 1.2711395187884216, "grad_norm": 0.10389512777328491, "learning_rate": 3.567361950464132e-05, "loss": 0.3867, "step": 7806 }, { "epoch": 1.2713023653462525, "grad_norm": 0.1510014683008194, "learning_rate": 3.566933573017032e-05, "loss": 0.3198, "step": 7807 }, { "epoch": 1.2714652119040835, "grad_norm": 0.11588550359010696, "learning_rate": 3.56650515726382e-05, "loss": 0.3323, "step": 7808 }, { "epoch": 1.2716280584619142, "grad_norm": 0.13586090505123138, "learning_rate": 3.5660767032198784e-05, "loss": 0.3251, "step": 7809 }, { "epoch": 1.2717909050197451, "grad_norm": 0.12652260065078735, "learning_rate": 3.565648210900589e-05, "loss": 0.3443, "step": 7810 }, { "epoch": 1.271953751577576, "grad_norm": 0.11420594155788422, "learning_rate": 3.565219680321335e-05, "loss": 0.3062, "step": 7811 }, { "epoch": 1.272116598135407, "grad_norm": 0.13493449985980988, "learning_rate": 3.564791111497504e-05, "loss": 0.3573, "step": 7812 }, { "epoch": 1.2722794446932377, "grad_norm": 0.15415138006210327, "learning_rate": 3.564362504444481e-05, "loss": 0.3037, "step": 7813 }, { "epoch": 1.2724422912510687, "grad_norm": 0.09802795201539993, "learning_rate": 3.563933859177655e-05, "loss": 0.3178, "step": 7814 }, { "epoch": 1.2726051378088996, "grad_norm": 0.1453268975019455, "learning_rate": 3.563505175712417e-05, "loss": 0.3737, "step": 7815 }, { "epoch": 1.2727679843667303, "grad_norm": 0.11280351877212524, "learning_rate": 3.563076454064156e-05, "loss": 0.296, "step": 7816 }, { "epoch": 1.2729308309245613, "grad_norm": 0.1283639371395111, "learning_rate": 3.5626476942482654e-05, "loss": 0.3448, "step": 7817 }, { "epoch": 1.2730936774823922, "grad_norm": 0.12049610167741776, "learning_rate": 3.562218896280139e-05, "loss": 0.3186, "step": 7818 }, { "epoch": 1.2732565240402232, "grad_norm": 0.11394000798463821, "learning_rate": 3.561790060175172e-05, "loss": 0.3222, "step": 7819 }, { "epoch": 1.273419370598054, "grad_norm": 0.0936134085059166, "learning_rate": 3.5613611859487595e-05, "loss": 0.3009, "step": 7820 }, { "epoch": 1.2735822171558848, "grad_norm": 0.1373923271894455, "learning_rate": 3.5609322736163014e-05, "loss": 0.3187, "step": 7821 }, { "epoch": 1.2737450637137158, "grad_norm": 0.15527227520942688, "learning_rate": 3.560503323193196e-05, "loss": 0.2904, "step": 7822 }, { "epoch": 1.2739079102715467, "grad_norm": 0.11770698428153992, "learning_rate": 3.560074334694844e-05, "loss": 0.3317, "step": 7823 }, { "epoch": 1.2740707568293774, "grad_norm": 0.16858386993408203, "learning_rate": 3.559645308136647e-05, "loss": 0.3122, "step": 7824 }, { "epoch": 1.2742336033872084, "grad_norm": 0.13550598919391632, "learning_rate": 3.559216243534009e-05, "loss": 0.3316, "step": 7825 }, { "epoch": 1.2743964499450393, "grad_norm": 0.10154718160629272, "learning_rate": 3.558787140902334e-05, "loss": 0.2967, "step": 7826 }, { "epoch": 1.2745592965028703, "grad_norm": 0.17541688680648804, "learning_rate": 3.5583580002570285e-05, "loss": 0.3195, "step": 7827 }, { "epoch": 1.274722143060701, "grad_norm": 0.107160285115242, "learning_rate": 3.5579288216135e-05, "loss": 0.3326, "step": 7828 }, { "epoch": 1.274884989618532, "grad_norm": 0.12865188717842102, "learning_rate": 3.557499604987157e-05, "loss": 0.3267, "step": 7829 }, { "epoch": 1.2750478361763629, "grad_norm": 0.12026043981313705, "learning_rate": 3.5570703503934096e-05, "loss": 0.3328, "step": 7830 }, { "epoch": 1.2752106827341936, "grad_norm": 0.10640300810337067, "learning_rate": 3.5566410578476706e-05, "loss": 0.3208, "step": 7831 }, { "epoch": 1.2753735292920245, "grad_norm": 0.09787904471158981, "learning_rate": 3.556211727365351e-05, "loss": 0.305, "step": 7832 }, { "epoch": 1.2755363758498555, "grad_norm": 0.1423945426940918, "learning_rate": 3.555782358961866e-05, "loss": 0.3096, "step": 7833 }, { "epoch": 1.2756992224076864, "grad_norm": 0.1323137730360031, "learning_rate": 3.5553529526526316e-05, "loss": 0.3397, "step": 7834 }, { "epoch": 1.2758620689655173, "grad_norm": 0.12149752676486969, "learning_rate": 3.554923508453063e-05, "loss": 0.3448, "step": 7835 }, { "epoch": 1.276024915523348, "grad_norm": 0.2006502002477646, "learning_rate": 3.5544940263785815e-05, "loss": 0.3525, "step": 7836 }, { "epoch": 1.276187762081179, "grad_norm": 0.1326783448457718, "learning_rate": 3.554064506444604e-05, "loss": 0.3331, "step": 7837 }, { "epoch": 1.27635060863901, "grad_norm": 0.17879384756088257, "learning_rate": 3.5536349486665526e-05, "loss": 0.317, "step": 7838 }, { "epoch": 1.2765134551968407, "grad_norm": 0.11694169789552689, "learning_rate": 3.5532053530598504e-05, "loss": 0.3179, "step": 7839 }, { "epoch": 1.2766763017546716, "grad_norm": 0.14933423697948456, "learning_rate": 3.55277571963992e-05, "loss": 0.3496, "step": 7840 }, { "epoch": 1.2768391483125026, "grad_norm": 0.12198371440172195, "learning_rate": 3.552346048422187e-05, "loss": 0.289, "step": 7841 }, { "epoch": 1.2770019948703335, "grad_norm": 0.15071775019168854, "learning_rate": 3.551916339422079e-05, "loss": 0.3323, "step": 7842 }, { "epoch": 1.2771648414281644, "grad_norm": 0.0969613790512085, "learning_rate": 3.551486592655021e-05, "loss": 0.3212, "step": 7843 }, { "epoch": 1.2773276879859952, "grad_norm": 0.12268485128879547, "learning_rate": 3.551056808136445e-05, "loss": 0.3099, "step": 7844 }, { "epoch": 1.277490534543826, "grad_norm": 0.10177633166313171, "learning_rate": 3.550626985881781e-05, "loss": 0.3541, "step": 7845 }, { "epoch": 1.2776533811016568, "grad_norm": 0.11680255830287933, "learning_rate": 3.55019712590646e-05, "loss": 0.2993, "step": 7846 }, { "epoch": 1.2778162276594878, "grad_norm": 0.1159524917602539, "learning_rate": 3.549767228225916e-05, "loss": 0.3124, "step": 7847 }, { "epoch": 1.2779790742173187, "grad_norm": 0.13247232139110565, "learning_rate": 3.549337292855582e-05, "loss": 0.3005, "step": 7848 }, { "epoch": 1.2781419207751497, "grad_norm": 0.12655648589134216, "learning_rate": 3.548907319810898e-05, "loss": 0.317, "step": 7849 }, { "epoch": 1.2783047673329806, "grad_norm": 0.2017298936843872, "learning_rate": 3.548477309107296e-05, "loss": 0.3176, "step": 7850 }, { "epoch": 1.2784676138908113, "grad_norm": 0.11544118821620941, "learning_rate": 3.548047260760218e-05, "loss": 0.3267, "step": 7851 }, { "epoch": 1.2786304604486423, "grad_norm": 0.21875976026058197, "learning_rate": 3.547617174785104e-05, "loss": 0.349, "step": 7852 }, { "epoch": 1.2787933070064732, "grad_norm": 0.08552423119544983, "learning_rate": 3.547187051197394e-05, "loss": 0.3031, "step": 7853 }, { "epoch": 1.278956153564304, "grad_norm": 0.1338232159614563, "learning_rate": 3.5467568900125316e-05, "loss": 0.3428, "step": 7854 }, { "epoch": 1.2791190001221349, "grad_norm": 0.25197097659111023, "learning_rate": 3.5463266912459605e-05, "loss": 0.3101, "step": 7855 }, { "epoch": 1.2792818466799658, "grad_norm": 0.10575595498085022, "learning_rate": 3.5458964549131265e-05, "loss": 0.2822, "step": 7856 }, { "epoch": 1.2794446932377967, "grad_norm": 0.13546721637248993, "learning_rate": 3.545466181029476e-05, "loss": 0.2983, "step": 7857 }, { "epoch": 1.2796075397956277, "grad_norm": 0.07166314870119095, "learning_rate": 3.545035869610457e-05, "loss": 0.3257, "step": 7858 }, { "epoch": 1.2797703863534584, "grad_norm": 0.11934100836515427, "learning_rate": 3.54460552067152e-05, "loss": 0.3148, "step": 7859 }, { "epoch": 1.2799332329112894, "grad_norm": 0.11276265978813171, "learning_rate": 3.544175134228115e-05, "loss": 0.2902, "step": 7860 }, { "epoch": 1.2800960794691203, "grad_norm": 0.13678458333015442, "learning_rate": 3.543744710295694e-05, "loss": 0.3156, "step": 7861 }, { "epoch": 1.280258926026951, "grad_norm": 0.1308504045009613, "learning_rate": 3.543314248889711e-05, "loss": 0.3483, "step": 7862 }, { "epoch": 1.280421772584782, "grad_norm": 0.1405731439590454, "learning_rate": 3.54288375002562e-05, "loss": 0.3211, "step": 7863 }, { "epoch": 1.280584619142613, "grad_norm": 0.20273438096046448, "learning_rate": 3.5424532137188784e-05, "loss": 0.3502, "step": 7864 }, { "epoch": 1.2807474657004438, "grad_norm": 0.11305632442235947, "learning_rate": 3.542022639984943e-05, "loss": 0.317, "step": 7865 }, { "epoch": 1.2809103122582746, "grad_norm": 0.08871898055076599, "learning_rate": 3.541592028839273e-05, "loss": 0.3534, "step": 7866 }, { "epoch": 1.2810731588161055, "grad_norm": 0.15101216733455658, "learning_rate": 3.541161380297328e-05, "loss": 0.3049, "step": 7867 }, { "epoch": 1.2812360053739364, "grad_norm": 0.07970321923494339, "learning_rate": 3.5407306943745704e-05, "loss": 0.3075, "step": 7868 }, { "epoch": 1.2813988519317672, "grad_norm": 0.11500973254442215, "learning_rate": 3.540299971086463e-05, "loss": 0.3205, "step": 7869 }, { "epoch": 1.2815616984895981, "grad_norm": 0.10935591906309128, "learning_rate": 3.5398692104484694e-05, "loss": 0.3332, "step": 7870 }, { "epoch": 1.281724545047429, "grad_norm": 0.12141381204128265, "learning_rate": 3.539438412476056e-05, "loss": 0.3045, "step": 7871 }, { "epoch": 1.28188739160526, "grad_norm": 0.13409677147865295, "learning_rate": 3.5390075771846884e-05, "loss": 0.3129, "step": 7872 }, { "epoch": 1.282050238163091, "grad_norm": 0.14773564040660858, "learning_rate": 3.538576704589837e-05, "loss": 0.3423, "step": 7873 }, { "epoch": 1.2822130847209217, "grad_norm": 0.11516719311475754, "learning_rate": 3.5381457947069696e-05, "loss": 0.3349, "step": 7874 }, { "epoch": 1.2823759312787526, "grad_norm": 0.2453145682811737, "learning_rate": 3.5377148475515584e-05, "loss": 0.2954, "step": 7875 }, { "epoch": 1.2825387778365835, "grad_norm": 0.09995491057634354, "learning_rate": 3.537283863139075e-05, "loss": 0.315, "step": 7876 }, { "epoch": 1.2827016243944143, "grad_norm": 0.13322322070598602, "learning_rate": 3.536852841484992e-05, "loss": 0.3229, "step": 7877 }, { "epoch": 1.2828644709522452, "grad_norm": 0.31678029894828796, "learning_rate": 3.5364217826047864e-05, "loss": 0.3837, "step": 7878 }, { "epoch": 1.2830273175100761, "grad_norm": 0.12440067529678345, "learning_rate": 3.535990686513933e-05, "loss": 0.3228, "step": 7879 }, { "epoch": 1.283190164067907, "grad_norm": 0.15193256735801697, "learning_rate": 3.5355595532279106e-05, "loss": 0.3324, "step": 7880 }, { "epoch": 1.283353010625738, "grad_norm": 0.14081117510795593, "learning_rate": 3.5351283827621976e-05, "loss": 0.3432, "step": 7881 }, { "epoch": 1.2835158571835688, "grad_norm": 0.08721871674060822, "learning_rate": 3.5346971751322744e-05, "loss": 0.3205, "step": 7882 }, { "epoch": 1.2836787037413997, "grad_norm": 0.13185037672519684, "learning_rate": 3.5342659303536226e-05, "loss": 0.3253, "step": 7883 }, { "epoch": 1.2838415502992304, "grad_norm": 0.10406725853681564, "learning_rate": 3.533834648441725e-05, "loss": 0.2973, "step": 7884 }, { "epoch": 1.2840043968570614, "grad_norm": 0.09809901565313339, "learning_rate": 3.533403329412066e-05, "loss": 0.3093, "step": 7885 }, { "epoch": 1.2841672434148923, "grad_norm": 0.09903697669506073, "learning_rate": 3.532971973280131e-05, "loss": 0.313, "step": 7886 }, { "epoch": 1.2843300899727232, "grad_norm": 0.1072012335062027, "learning_rate": 3.5325405800614084e-05, "loss": 0.3365, "step": 7887 }, { "epoch": 1.2844929365305542, "grad_norm": 0.10725001245737076, "learning_rate": 3.532109149771384e-05, "loss": 0.2942, "step": 7888 }, { "epoch": 1.284655783088385, "grad_norm": 0.09760268777608871, "learning_rate": 3.53167768242555e-05, "loss": 0.3137, "step": 7889 }, { "epoch": 1.2848186296462158, "grad_norm": 0.10779441148042679, "learning_rate": 3.5312461780393956e-05, "loss": 0.3732, "step": 7890 }, { "epoch": 1.2849814762040468, "grad_norm": 0.15812651813030243, "learning_rate": 3.5308146366284145e-05, "loss": 0.2857, "step": 7891 }, { "epoch": 1.2851443227618775, "grad_norm": 0.16068197786808014, "learning_rate": 3.530383058208099e-05, "loss": 0.3136, "step": 7892 }, { "epoch": 1.2853071693197085, "grad_norm": 0.14394919574260712, "learning_rate": 3.5299514427939444e-05, "loss": 0.3194, "step": 7893 }, { "epoch": 1.2854700158775394, "grad_norm": 0.1250208616256714, "learning_rate": 3.529519790401447e-05, "loss": 0.3376, "step": 7894 }, { "epoch": 1.2856328624353703, "grad_norm": 0.1335260421037674, "learning_rate": 3.5290881010461046e-05, "loss": 0.2958, "step": 7895 }, { "epoch": 1.2857957089932013, "grad_norm": 0.10768003761768341, "learning_rate": 3.528656374743417e-05, "loss": 0.3067, "step": 7896 }, { "epoch": 1.285958555551032, "grad_norm": 0.17056556046009064, "learning_rate": 3.528224611508883e-05, "loss": 0.3181, "step": 7897 }, { "epoch": 1.286121402108863, "grad_norm": 0.140292689204216, "learning_rate": 3.5277928113580044e-05, "loss": 0.3336, "step": 7898 }, { "epoch": 1.2862842486666939, "grad_norm": 0.1845802366733551, "learning_rate": 3.527360974306285e-05, "loss": 0.3002, "step": 7899 }, { "epoch": 1.2864470952245246, "grad_norm": 0.1051495298743248, "learning_rate": 3.5269291003692284e-05, "loss": 0.302, "step": 7900 }, { "epoch": 1.2866099417823555, "grad_norm": 0.10546989738941193, "learning_rate": 3.526497189562341e-05, "loss": 0.3619, "step": 7901 }, { "epoch": 1.2867727883401865, "grad_norm": 0.14392031729221344, "learning_rate": 3.526065241901128e-05, "loss": 0.3243, "step": 7902 }, { "epoch": 1.2869356348980174, "grad_norm": 0.13939104974269867, "learning_rate": 3.525633257401099e-05, "loss": 0.3539, "step": 7903 }, { "epoch": 1.2870984814558482, "grad_norm": 0.12487416714429855, "learning_rate": 3.525201236077763e-05, "loss": 0.3131, "step": 7904 }, { "epoch": 1.287261328013679, "grad_norm": 0.1469915807247162, "learning_rate": 3.524769177946632e-05, "loss": 0.3206, "step": 7905 }, { "epoch": 1.28742417457151, "grad_norm": 0.11951641738414764, "learning_rate": 3.5243370830232164e-05, "loss": 0.3385, "step": 7906 }, { "epoch": 1.2875870211293408, "grad_norm": 0.11646910756826401, "learning_rate": 3.523904951323031e-05, "loss": 0.3117, "step": 7907 }, { "epoch": 1.2877498676871717, "grad_norm": 0.10776710510253906, "learning_rate": 3.52347278286159e-05, "loss": 0.2915, "step": 7908 }, { "epoch": 1.2879127142450026, "grad_norm": 0.09030354022979736, "learning_rate": 3.523040577654409e-05, "loss": 0.2998, "step": 7909 }, { "epoch": 1.2880755608028336, "grad_norm": 0.1483747959136963, "learning_rate": 3.522608335717007e-05, "loss": 0.3298, "step": 7910 }, { "epoch": 1.2882384073606645, "grad_norm": 0.10727153718471527, "learning_rate": 3.522176057064902e-05, "loss": 0.3278, "step": 7911 }, { "epoch": 1.2884012539184952, "grad_norm": 0.12905576825141907, "learning_rate": 3.5217437417136134e-05, "loss": 0.3246, "step": 7912 }, { "epoch": 1.2885641004763262, "grad_norm": 0.09191998094320297, "learning_rate": 3.521311389678664e-05, "loss": 0.3223, "step": 7913 }, { "epoch": 1.2887269470341571, "grad_norm": 0.11745062470436096, "learning_rate": 3.5208790009755746e-05, "loss": 0.3181, "step": 7914 }, { "epoch": 1.2888897935919879, "grad_norm": 0.09176941215991974, "learning_rate": 3.5204465756198715e-05, "loss": 0.3088, "step": 7915 }, { "epoch": 1.2890526401498188, "grad_norm": 0.149699404835701, "learning_rate": 3.520014113627078e-05, "loss": 0.3243, "step": 7916 }, { "epoch": 1.2892154867076497, "grad_norm": 0.16279137134552002, "learning_rate": 3.519581615012723e-05, "loss": 0.2959, "step": 7917 }, { "epoch": 1.2893783332654807, "grad_norm": 0.09670933336019516, "learning_rate": 3.519149079792332e-05, "loss": 0.2817, "step": 7918 }, { "epoch": 1.2895411798233116, "grad_norm": 0.06630958616733551, "learning_rate": 3.518716507981437e-05, "loss": 0.3214, "step": 7919 }, { "epoch": 1.2897040263811423, "grad_norm": 0.11825203150510788, "learning_rate": 3.518283899595566e-05, "loss": 0.3074, "step": 7920 }, { "epoch": 1.2898668729389733, "grad_norm": 0.10227825492620468, "learning_rate": 3.517851254650252e-05, "loss": 0.288, "step": 7921 }, { "epoch": 1.290029719496804, "grad_norm": 0.16014689207077026, "learning_rate": 3.51741857316103e-05, "loss": 0.3441, "step": 7922 }, { "epoch": 1.290192566054635, "grad_norm": 0.12003445625305176, "learning_rate": 3.5169858551434314e-05, "loss": 0.3213, "step": 7923 }, { "epoch": 1.290355412612466, "grad_norm": 0.14009623229503632, "learning_rate": 3.516553100612995e-05, "loss": 0.3054, "step": 7924 }, { "epoch": 1.2905182591702968, "grad_norm": 0.12301286309957504, "learning_rate": 3.516120309585255e-05, "loss": 0.3465, "step": 7925 }, { "epoch": 1.2906811057281278, "grad_norm": 0.0780138447880745, "learning_rate": 3.515687482075752e-05, "loss": 0.3575, "step": 7926 }, { "epoch": 1.2908439522859585, "grad_norm": 0.16174839437007904, "learning_rate": 3.515254618100026e-05, "loss": 0.3098, "step": 7927 }, { "epoch": 1.2910067988437894, "grad_norm": 0.08424868434667587, "learning_rate": 3.514821717673617e-05, "loss": 0.3018, "step": 7928 }, { "epoch": 1.2911696454016204, "grad_norm": 0.1290617734193802, "learning_rate": 3.514388780812068e-05, "loss": 0.3379, "step": 7929 }, { "epoch": 1.291332491959451, "grad_norm": 0.16189831495285034, "learning_rate": 3.513955807530923e-05, "loss": 0.3135, "step": 7930 }, { "epoch": 1.291495338517282, "grad_norm": 0.12932395935058594, "learning_rate": 3.513522797845726e-05, "loss": 0.3026, "step": 7931 }, { "epoch": 1.291658185075113, "grad_norm": 0.11123022437095642, "learning_rate": 3.5130897517720244e-05, "loss": 0.321, "step": 7932 }, { "epoch": 1.291821031632944, "grad_norm": 0.22991643846035004, "learning_rate": 3.512656669325365e-05, "loss": 0.3482, "step": 7933 }, { "epoch": 1.2919838781907749, "grad_norm": 0.1378743201494217, "learning_rate": 3.512223550521297e-05, "loss": 0.3137, "step": 7934 }, { "epoch": 1.2921467247486056, "grad_norm": 0.08179070800542831, "learning_rate": 3.5117903953753704e-05, "loss": 0.2774, "step": 7935 }, { "epoch": 1.2923095713064365, "grad_norm": 0.09579689055681229, "learning_rate": 3.511357203903138e-05, "loss": 0.3179, "step": 7936 }, { "epoch": 1.2924724178642675, "grad_norm": 0.1134568303823471, "learning_rate": 3.510923976120152e-05, "loss": 0.3114, "step": 7937 }, { "epoch": 1.2926352644220982, "grad_norm": 0.10915634781122208, "learning_rate": 3.510490712041966e-05, "loss": 0.2956, "step": 7938 }, { "epoch": 1.2927981109799291, "grad_norm": 0.13056626915931702, "learning_rate": 3.5100574116841354e-05, "loss": 0.3354, "step": 7939 }, { "epoch": 1.29296095753776, "grad_norm": 0.11961887776851654, "learning_rate": 3.509624075062219e-05, "loss": 0.31, "step": 7940 }, { "epoch": 1.293123804095591, "grad_norm": 0.12044595181941986, "learning_rate": 3.5091907021917724e-05, "loss": 0.3082, "step": 7941 }, { "epoch": 1.2932866506534217, "grad_norm": 0.16915199160575867, "learning_rate": 3.508757293088356e-05, "loss": 0.3153, "step": 7942 }, { "epoch": 1.2934494972112527, "grad_norm": 0.11723341792821884, "learning_rate": 3.508323847767531e-05, "loss": 0.336, "step": 7943 }, { "epoch": 1.2936123437690836, "grad_norm": 0.08114053308963776, "learning_rate": 3.5078903662448587e-05, "loss": 0.3445, "step": 7944 }, { "epoch": 1.2937751903269143, "grad_norm": 0.24819596111774445, "learning_rate": 3.5074568485359024e-05, "loss": 0.3598, "step": 7945 }, { "epoch": 1.2939380368847453, "grad_norm": 0.10326382517814636, "learning_rate": 3.507023294656226e-05, "loss": 0.3503, "step": 7946 }, { "epoch": 1.2941008834425762, "grad_norm": 0.10585645586252213, "learning_rate": 3.506589704621397e-05, "loss": 0.3444, "step": 7947 }, { "epoch": 1.2942637300004072, "grad_norm": 0.15834735333919525, "learning_rate": 3.506156078446982e-05, "loss": 0.2946, "step": 7948 }, { "epoch": 1.2944265765582381, "grad_norm": 0.09081587195396423, "learning_rate": 3.505722416148549e-05, "loss": 0.3505, "step": 7949 }, { "epoch": 1.2945894231160688, "grad_norm": 0.15966500341892242, "learning_rate": 3.5052887177416684e-05, "loss": 0.3071, "step": 7950 }, { "epoch": 1.2947522696738998, "grad_norm": 0.11722991615533829, "learning_rate": 3.5048549832419104e-05, "loss": 0.3023, "step": 7951 }, { "epoch": 1.2949151162317307, "grad_norm": 0.14273978769779205, "learning_rate": 3.504421212664848e-05, "loss": 0.3282, "step": 7952 }, { "epoch": 1.2950779627895614, "grad_norm": 0.11384616792201996, "learning_rate": 3.5039874060260545e-05, "loss": 0.2884, "step": 7953 }, { "epoch": 1.2952408093473924, "grad_norm": 0.0970718115568161, "learning_rate": 3.503553563341105e-05, "loss": 0.3197, "step": 7954 }, { "epoch": 1.2954036559052233, "grad_norm": 0.10004134476184845, "learning_rate": 3.5031196846255764e-05, "loss": 0.3024, "step": 7955 }, { "epoch": 1.2955665024630543, "grad_norm": 0.15385307371616364, "learning_rate": 3.502685769895046e-05, "loss": 0.3167, "step": 7956 }, { "epoch": 1.295729349020885, "grad_norm": 0.14782147109508514, "learning_rate": 3.502251819165091e-05, "loss": 0.3334, "step": 7957 }, { "epoch": 1.295892195578716, "grad_norm": 0.12313909828662872, "learning_rate": 3.501817832451294e-05, "loss": 0.299, "step": 7958 }, { "epoch": 1.2960550421365469, "grad_norm": 0.10740791261196136, "learning_rate": 3.5013838097692346e-05, "loss": 0.2964, "step": 7959 }, { "epoch": 1.2962178886943776, "grad_norm": 0.1534583419561386, "learning_rate": 3.500949751134497e-05, "loss": 0.3628, "step": 7960 }, { "epoch": 1.2963807352522085, "grad_norm": 0.08138928562402725, "learning_rate": 3.500515656562664e-05, "loss": 0.3067, "step": 7961 }, { "epoch": 1.2965435818100395, "grad_norm": 0.1446019858121872, "learning_rate": 3.5000815260693214e-05, "loss": 0.3258, "step": 7962 }, { "epoch": 1.2967064283678704, "grad_norm": 0.09267659485340118, "learning_rate": 3.4996473596700554e-05, "loss": 0.2975, "step": 7963 }, { "epoch": 1.2968692749257014, "grad_norm": 0.145082488656044, "learning_rate": 3.499213157380455e-05, "loss": 0.2975, "step": 7964 }, { "epoch": 1.297032121483532, "grad_norm": 0.14743800461292267, "learning_rate": 3.498778919216107e-05, "loss": 0.3508, "step": 7965 }, { "epoch": 1.297194968041363, "grad_norm": 0.16419707238674164, "learning_rate": 3.498344645192605e-05, "loss": 0.3146, "step": 7966 }, { "epoch": 1.297357814599194, "grad_norm": 0.13646939396858215, "learning_rate": 3.497910335325538e-05, "loss": 0.309, "step": 7967 }, { "epoch": 1.2975206611570247, "grad_norm": 0.17022466659545898, "learning_rate": 3.497475989630501e-05, "loss": 0.3089, "step": 7968 }, { "epoch": 1.2976835077148556, "grad_norm": 0.10006608068943024, "learning_rate": 3.497041608123087e-05, "loss": 0.2748, "step": 7969 }, { "epoch": 1.2978463542726866, "grad_norm": 0.136893630027771, "learning_rate": 3.4966071908188914e-05, "loss": 0.3168, "step": 7970 }, { "epoch": 1.2980092008305175, "grad_norm": 0.18592031300067902, "learning_rate": 3.496172737733513e-05, "loss": 0.3248, "step": 7971 }, { "epoch": 1.2981720473883485, "grad_norm": 0.11817395687103271, "learning_rate": 3.4957382488825474e-05, "loss": 0.2716, "step": 7972 }, { "epoch": 1.2983348939461792, "grad_norm": 0.15533044934272766, "learning_rate": 3.4953037242815964e-05, "loss": 0.32, "step": 7973 }, { "epoch": 1.2984977405040101, "grad_norm": 0.13799065351486206, "learning_rate": 3.494869163946259e-05, "loss": 0.2896, "step": 7974 }, { "epoch": 1.2986605870618408, "grad_norm": 0.11581708490848541, "learning_rate": 3.494434567892138e-05, "loss": 0.3602, "step": 7975 }, { "epoch": 1.2988234336196718, "grad_norm": 0.10426298528909683, "learning_rate": 3.4939999361348367e-05, "loss": 0.2893, "step": 7976 }, { "epoch": 1.2989862801775027, "grad_norm": 0.14912043511867523, "learning_rate": 3.4935652686899594e-05, "loss": 0.3136, "step": 7977 }, { "epoch": 1.2991491267353337, "grad_norm": 0.14520418643951416, "learning_rate": 3.493130565573113e-05, "loss": 0.3247, "step": 7978 }, { "epoch": 1.2993119732931646, "grad_norm": 0.09808304160833359, "learning_rate": 3.4926958267999034e-05, "loss": 0.3027, "step": 7979 }, { "epoch": 1.2994748198509953, "grad_norm": 0.10365088284015656, "learning_rate": 3.492261052385939e-05, "loss": 0.3133, "step": 7980 }, { "epoch": 1.2996376664088263, "grad_norm": 0.12569400668144226, "learning_rate": 3.49182624234683e-05, "loss": 0.3135, "step": 7981 }, { "epoch": 1.2998005129666572, "grad_norm": 0.20142215490341187, "learning_rate": 3.491391396698188e-05, "loss": 0.3407, "step": 7982 }, { "epoch": 1.299963359524488, "grad_norm": 0.12335170060396194, "learning_rate": 3.490956515455624e-05, "loss": 0.3136, "step": 7983 }, { "epoch": 1.3001262060823189, "grad_norm": 0.14236581325531006, "learning_rate": 3.4905215986347526e-05, "loss": 0.3274, "step": 7984 }, { "epoch": 1.3002890526401498, "grad_norm": 0.10418237000703812, "learning_rate": 3.4900866462511875e-05, "loss": 0.296, "step": 7985 }, { "epoch": 1.3004518991979808, "grad_norm": 0.11638972908258438, "learning_rate": 3.489651658320545e-05, "loss": 0.2927, "step": 7986 }, { "epoch": 1.3006147457558117, "grad_norm": 0.22457200288772583, "learning_rate": 3.489216634858444e-05, "loss": 0.343, "step": 7987 }, { "epoch": 1.3007775923136424, "grad_norm": 0.15649212896823883, "learning_rate": 3.4887815758805016e-05, "loss": 0.3224, "step": 7988 }, { "epoch": 1.3009404388714734, "grad_norm": 0.16727083921432495, "learning_rate": 3.488346481402338e-05, "loss": 0.3042, "step": 7989 }, { "epoch": 1.3011032854293043, "grad_norm": 0.07727284729480743, "learning_rate": 3.487911351439574e-05, "loss": 0.3236, "step": 7990 }, { "epoch": 1.301266131987135, "grad_norm": 0.10052793473005295, "learning_rate": 3.487476186007834e-05, "loss": 0.2763, "step": 7991 }, { "epoch": 1.301428978544966, "grad_norm": 0.0762394517660141, "learning_rate": 3.4870409851227395e-05, "loss": 0.293, "step": 7992 }, { "epoch": 1.301591825102797, "grad_norm": 0.1503830850124359, "learning_rate": 3.4866057487999165e-05, "loss": 0.3187, "step": 7993 }, { "epoch": 1.3017546716606279, "grad_norm": 0.1385793536901474, "learning_rate": 3.486170477054991e-05, "loss": 0.3117, "step": 7994 }, { "epoch": 1.3019175182184586, "grad_norm": 0.12629494071006775, "learning_rate": 3.485735169903591e-05, "loss": 0.2836, "step": 7995 }, { "epoch": 1.3020803647762895, "grad_norm": 0.08360772579908371, "learning_rate": 3.485299827361345e-05, "loss": 0.3132, "step": 7996 }, { "epoch": 1.3022432113341205, "grad_norm": 0.13827192783355713, "learning_rate": 3.484864449443883e-05, "loss": 0.3237, "step": 7997 }, { "epoch": 1.3024060578919512, "grad_norm": 0.10454705357551575, "learning_rate": 3.484429036166837e-05, "loss": 0.2978, "step": 7998 }, { "epoch": 1.3025689044497821, "grad_norm": 0.1518239825963974, "learning_rate": 3.4839935875458384e-05, "loss": 0.3098, "step": 7999 }, { "epoch": 1.302731751007613, "grad_norm": 0.0937374085187912, "learning_rate": 3.483558103596523e-05, "loss": 0.3154, "step": 8000 }, { "epoch": 1.302894597565444, "grad_norm": 0.12591491639614105, "learning_rate": 3.483122584334524e-05, "loss": 0.2779, "step": 8001 }, { "epoch": 1.303057444123275, "grad_norm": 0.085625059902668, "learning_rate": 3.4826870297754797e-05, "loss": 0.2939, "step": 8002 }, { "epoch": 1.3032202906811057, "grad_norm": 0.13502125442028046, "learning_rate": 3.482251439935026e-05, "loss": 0.3374, "step": 8003 }, { "epoch": 1.3033831372389366, "grad_norm": 0.15721803903579712, "learning_rate": 3.481815814828803e-05, "loss": 0.2985, "step": 8004 }, { "epoch": 1.3035459837967676, "grad_norm": 0.1599072515964508, "learning_rate": 3.4813801544724494e-05, "loss": 0.3084, "step": 8005 }, { "epoch": 1.3037088303545983, "grad_norm": 0.16831056773662567, "learning_rate": 3.4809444588816084e-05, "loss": 0.3215, "step": 8006 }, { "epoch": 1.3038716769124292, "grad_norm": 0.10285582393407822, "learning_rate": 3.480508728071924e-05, "loss": 0.3487, "step": 8007 }, { "epoch": 1.3040345234702602, "grad_norm": 0.14439299702644348, "learning_rate": 3.480072962059037e-05, "loss": 0.3329, "step": 8008 }, { "epoch": 1.304197370028091, "grad_norm": 0.11844319105148315, "learning_rate": 3.4796371608585954e-05, "loss": 0.3252, "step": 8009 }, { "epoch": 1.304360216585922, "grad_norm": 0.13606677949428558, "learning_rate": 3.4792013244862436e-05, "loss": 0.3174, "step": 8010 }, { "epoch": 1.3045230631437528, "grad_norm": 0.15624985098838806, "learning_rate": 3.478765452957631e-05, "loss": 0.3113, "step": 8011 }, { "epoch": 1.3046859097015837, "grad_norm": 0.08892000466585159, "learning_rate": 3.478329546288406e-05, "loss": 0.2907, "step": 8012 }, { "epoch": 1.3048487562594144, "grad_norm": 0.08930811285972595, "learning_rate": 3.477893604494219e-05, "loss": 0.3081, "step": 8013 }, { "epoch": 1.3050116028172454, "grad_norm": 0.09919343888759613, "learning_rate": 3.477457627590722e-05, "loss": 0.342, "step": 8014 }, { "epoch": 1.3051744493750763, "grad_norm": 0.12640278041362762, "learning_rate": 3.4770216155935674e-05, "loss": 0.3087, "step": 8015 }, { "epoch": 1.3053372959329073, "grad_norm": 0.07611576467752457, "learning_rate": 3.47658556851841e-05, "loss": 0.2753, "step": 8016 }, { "epoch": 1.3055001424907382, "grad_norm": 0.11112739145755768, "learning_rate": 3.4761494863809044e-05, "loss": 0.3503, "step": 8017 }, { "epoch": 1.305662989048569, "grad_norm": 0.14556348323822021, "learning_rate": 3.4757133691967076e-05, "loss": 0.3269, "step": 8018 }, { "epoch": 1.3058258356063999, "grad_norm": 0.10942792147397995, "learning_rate": 3.475277216981477e-05, "loss": 0.3274, "step": 8019 }, { "epoch": 1.3059886821642308, "grad_norm": 0.09624193608760834, "learning_rate": 3.474841029750874e-05, "loss": 0.3268, "step": 8020 }, { "epoch": 1.3061515287220615, "grad_norm": 0.11951916664838791, "learning_rate": 3.474404807520556e-05, "loss": 0.315, "step": 8021 }, { "epoch": 1.3063143752798925, "grad_norm": 0.0970412939786911, "learning_rate": 3.473968550306187e-05, "loss": 0.3463, "step": 8022 }, { "epoch": 1.3064772218377234, "grad_norm": 0.14342468976974487, "learning_rate": 3.473532258123427e-05, "loss": 0.3238, "step": 8023 }, { "epoch": 1.3066400683955544, "grad_norm": 0.13894608616828918, "learning_rate": 3.473095930987944e-05, "loss": 0.2734, "step": 8024 }, { "epoch": 1.3068029149533853, "grad_norm": 0.08494900166988373, "learning_rate": 3.4726595689154016e-05, "loss": 0.3014, "step": 8025 }, { "epoch": 1.306965761511216, "grad_norm": 0.14446482062339783, "learning_rate": 3.4722231719214655e-05, "loss": 0.3472, "step": 8026 }, { "epoch": 1.307128608069047, "grad_norm": 0.15359988808631897, "learning_rate": 3.471786740021805e-05, "loss": 0.3171, "step": 8027 }, { "epoch": 1.307291454626878, "grad_norm": 0.10523606836795807, "learning_rate": 3.471350273232089e-05, "loss": 0.2794, "step": 8028 }, { "epoch": 1.3074543011847086, "grad_norm": 0.14315496385097504, "learning_rate": 3.4709137715679886e-05, "loss": 0.2972, "step": 8029 }, { "epoch": 1.3076171477425396, "grad_norm": 0.16460169851779938, "learning_rate": 3.470477235045174e-05, "loss": 0.3432, "step": 8030 }, { "epoch": 1.3077799943003705, "grad_norm": 0.1031024381518364, "learning_rate": 3.47004066367932e-05, "loss": 0.3306, "step": 8031 }, { "epoch": 1.3079428408582014, "grad_norm": 0.11063302308320999, "learning_rate": 3.469604057486099e-05, "loss": 0.3306, "step": 8032 }, { "epoch": 1.3081056874160322, "grad_norm": 0.15003061294555664, "learning_rate": 3.469167416481187e-05, "loss": 0.3209, "step": 8033 }, { "epoch": 1.3082685339738631, "grad_norm": 0.0964888259768486, "learning_rate": 3.468730740680262e-05, "loss": 0.3053, "step": 8034 }, { "epoch": 1.308431380531694, "grad_norm": 0.08281005918979645, "learning_rate": 3.468294030099e-05, "loss": 0.3554, "step": 8035 }, { "epoch": 1.3085942270895248, "grad_norm": 0.08271817862987518, "learning_rate": 3.467857284753082e-05, "loss": 0.3041, "step": 8036 }, { "epoch": 1.3087570736473557, "grad_norm": 0.1280369907617569, "learning_rate": 3.467420504658187e-05, "loss": 0.3322, "step": 8037 }, { "epoch": 1.3089199202051867, "grad_norm": 0.132316455245018, "learning_rate": 3.4669836898299985e-05, "loss": 0.3584, "step": 8038 }, { "epoch": 1.3090827667630176, "grad_norm": 0.16464777290821075, "learning_rate": 3.466546840284197e-05, "loss": 0.3103, "step": 8039 }, { "epoch": 1.3092456133208485, "grad_norm": 0.08486758917570114, "learning_rate": 3.46610995603647e-05, "loss": 0.3142, "step": 8040 }, { "epoch": 1.3094084598786793, "grad_norm": 0.11884614080190659, "learning_rate": 3.465673037102499e-05, "loss": 0.3105, "step": 8041 }, { "epoch": 1.3095713064365102, "grad_norm": 0.11415300518274307, "learning_rate": 3.465236083497974e-05, "loss": 0.3471, "step": 8042 }, { "epoch": 1.3097341529943411, "grad_norm": 0.09464887529611588, "learning_rate": 3.464799095238581e-05, "loss": 0.3127, "step": 8043 }, { "epoch": 1.3098969995521719, "grad_norm": 0.0691065788269043, "learning_rate": 3.464362072340011e-05, "loss": 0.3105, "step": 8044 }, { "epoch": 1.3100598461100028, "grad_norm": 0.1344408541917801, "learning_rate": 3.4639250148179526e-05, "loss": 0.2773, "step": 8045 }, { "epoch": 1.3102226926678338, "grad_norm": 0.11922778934240341, "learning_rate": 3.463487922688098e-05, "loss": 0.3382, "step": 8046 }, { "epoch": 1.3103855392256647, "grad_norm": 0.12125030159950256, "learning_rate": 3.46305079596614e-05, "loss": 0.3002, "step": 8047 }, { "epoch": 1.3105483857834956, "grad_norm": 0.13509884476661682, "learning_rate": 3.462613634667773e-05, "loss": 0.3211, "step": 8048 }, { "epoch": 1.3107112323413264, "grad_norm": 0.10567185282707214, "learning_rate": 3.462176438808694e-05, "loss": 0.3503, "step": 8049 }, { "epoch": 1.3108740788991573, "grad_norm": 0.13711264729499817, "learning_rate": 3.4617392084045966e-05, "loss": 0.3235, "step": 8050 }, { "epoch": 1.311036925456988, "grad_norm": 0.11953110992908478, "learning_rate": 3.4613019434711804e-05, "loss": 0.2839, "step": 8051 }, { "epoch": 1.311199772014819, "grad_norm": 0.09313427656888962, "learning_rate": 3.460864644024144e-05, "loss": 0.2945, "step": 8052 }, { "epoch": 1.31136261857265, "grad_norm": 0.10881924629211426, "learning_rate": 3.460427310079188e-05, "loss": 0.3463, "step": 8053 }, { "epoch": 1.3115254651304808, "grad_norm": 0.08000917732715607, "learning_rate": 3.459989941652015e-05, "loss": 0.2663, "step": 8054 }, { "epoch": 1.3116883116883118, "grad_norm": 0.08587313443422318, "learning_rate": 3.459552538758327e-05, "loss": 0.3605, "step": 8055 }, { "epoch": 1.3118511582461425, "grad_norm": 0.09541667997837067, "learning_rate": 3.459115101413827e-05, "loss": 0.31, "step": 8056 }, { "epoch": 1.3120140048039735, "grad_norm": 0.11294061690568924, "learning_rate": 3.458677629634221e-05, "loss": 0.3226, "step": 8057 }, { "epoch": 1.3121768513618044, "grad_norm": 0.0950009748339653, "learning_rate": 3.458240123435217e-05, "loss": 0.3578, "step": 8058 }, { "epoch": 1.3123396979196351, "grad_norm": 0.1491488367319107, "learning_rate": 3.45780258283252e-05, "loss": 0.3705, "step": 8059 }, { "epoch": 1.312502544477466, "grad_norm": 0.1293085217475891, "learning_rate": 3.4573650078418426e-05, "loss": 0.2854, "step": 8060 }, { "epoch": 1.312665391035297, "grad_norm": 0.13651065528392792, "learning_rate": 3.456927398478892e-05, "loss": 0.3026, "step": 8061 }, { "epoch": 1.312828237593128, "grad_norm": 0.0867525115609169, "learning_rate": 3.4564897547593814e-05, "loss": 0.3022, "step": 8062 }, { "epoch": 1.3129910841509589, "grad_norm": 0.14697633683681488, "learning_rate": 3.456052076699022e-05, "loss": 0.2831, "step": 8063 }, { "epoch": 1.3131539307087896, "grad_norm": 0.09242957085371017, "learning_rate": 3.4556143643135295e-05, "loss": 0.3082, "step": 8064 }, { "epoch": 1.3133167772666205, "grad_norm": 0.09016141295433044, "learning_rate": 3.455176617618617e-05, "loss": 0.3382, "step": 8065 }, { "epoch": 1.3134796238244515, "grad_norm": 0.16163623332977295, "learning_rate": 3.4547388366300036e-05, "loss": 0.3588, "step": 8066 }, { "epoch": 1.3136424703822822, "grad_norm": 0.15207162499427795, "learning_rate": 3.454301021363405e-05, "loss": 0.3138, "step": 8067 }, { "epoch": 1.3138053169401132, "grad_norm": 0.1230771392583847, "learning_rate": 3.45386317183454e-05, "loss": 0.3176, "step": 8068 }, { "epoch": 1.313968163497944, "grad_norm": 0.11197307705879211, "learning_rate": 3.45342528805913e-05, "loss": 0.3014, "step": 8069 }, { "epoch": 1.314131010055775, "grad_norm": 0.1329769790172577, "learning_rate": 3.4529873700528945e-05, "loss": 0.3194, "step": 8070 }, { "epoch": 1.3142938566136058, "grad_norm": 0.10714997351169586, "learning_rate": 3.4525494178315584e-05, "loss": 0.2994, "step": 8071 }, { "epoch": 1.3144567031714367, "grad_norm": 0.14836974442005157, "learning_rate": 3.4521114314108435e-05, "loss": 0.3272, "step": 8072 }, { "epoch": 1.3146195497292676, "grad_norm": 0.14734891057014465, "learning_rate": 3.451673410806476e-05, "loss": 0.3514, "step": 8073 }, { "epoch": 1.3147823962870984, "grad_norm": 0.08857455104589462, "learning_rate": 3.4512353560341815e-05, "loss": 0.3496, "step": 8074 }, { "epoch": 1.3149452428449293, "grad_norm": 0.12554779648780823, "learning_rate": 3.4507972671096885e-05, "loss": 0.3357, "step": 8075 }, { "epoch": 1.3151080894027602, "grad_norm": 0.08971434086561203, "learning_rate": 3.450359144048724e-05, "loss": 0.3279, "step": 8076 }, { "epoch": 1.3152709359605912, "grad_norm": 0.1281438171863556, "learning_rate": 3.449920986867019e-05, "loss": 0.3382, "step": 8077 }, { "epoch": 1.3154337825184221, "grad_norm": 0.15032732486724854, "learning_rate": 3.449482795580306e-05, "loss": 0.324, "step": 8078 }, { "epoch": 1.3155966290762529, "grad_norm": 0.10315538942813873, "learning_rate": 3.449044570204314e-05, "loss": 0.3919, "step": 8079 }, { "epoch": 1.3157594756340838, "grad_norm": 0.08912499248981476, "learning_rate": 3.4486063107547794e-05, "loss": 0.3084, "step": 8080 }, { "epoch": 1.3159223221919147, "grad_norm": 0.14324986934661865, "learning_rate": 3.448168017247436e-05, "loss": 0.3119, "step": 8081 }, { "epoch": 1.3160851687497455, "grad_norm": 0.08280237764120102, "learning_rate": 3.44772968969802e-05, "loss": 0.31, "step": 8082 }, { "epoch": 1.3162480153075764, "grad_norm": 0.1607607901096344, "learning_rate": 3.4472913281222686e-05, "loss": 0.3227, "step": 8083 }, { "epoch": 1.3164108618654073, "grad_norm": 0.16818256676197052, "learning_rate": 3.446852932535921e-05, "loss": 0.3063, "step": 8084 }, { "epoch": 1.3165737084232383, "grad_norm": 0.1620110273361206, "learning_rate": 3.446414502954715e-05, "loss": 0.3089, "step": 8085 }, { "epoch": 1.316736554981069, "grad_norm": 0.09850936383008957, "learning_rate": 3.445976039394394e-05, "loss": 0.319, "step": 8086 }, { "epoch": 1.3168994015389, "grad_norm": 0.18063117563724518, "learning_rate": 3.445537541870698e-05, "loss": 0.319, "step": 8087 }, { "epoch": 1.317062248096731, "grad_norm": 0.07924376428127289, "learning_rate": 3.4450990103993717e-05, "loss": 0.3431, "step": 8088 }, { "epoch": 1.3172250946545616, "grad_norm": 0.1487416923046112, "learning_rate": 3.44466044499616e-05, "loss": 0.327, "step": 8089 }, { "epoch": 1.3173879412123926, "grad_norm": 0.1436794549226761, "learning_rate": 3.444221845676807e-05, "loss": 0.3011, "step": 8090 }, { "epoch": 1.3175507877702235, "grad_norm": 0.11777375638484955, "learning_rate": 3.443783212457061e-05, "loss": 0.3296, "step": 8091 }, { "epoch": 1.3177136343280544, "grad_norm": 0.14653870463371277, "learning_rate": 3.44334454535267e-05, "loss": 0.3192, "step": 8092 }, { "epoch": 1.3178764808858854, "grad_norm": 0.11078928411006927, "learning_rate": 3.442905844379384e-05, "loss": 0.3176, "step": 8093 }, { "epoch": 1.318039327443716, "grad_norm": 0.18801476061344147, "learning_rate": 3.442467109552953e-05, "loss": 0.3533, "step": 8094 }, { "epoch": 1.318202174001547, "grad_norm": 0.08906915038824081, "learning_rate": 3.44202834088913e-05, "loss": 0.3187, "step": 8095 }, { "epoch": 1.318365020559378, "grad_norm": 0.1433948278427124, "learning_rate": 3.441589538403666e-05, "loss": 0.3055, "step": 8096 }, { "epoch": 1.3185278671172087, "grad_norm": 0.11045390367507935, "learning_rate": 3.4411507021123165e-05, "loss": 0.3163, "step": 8097 }, { "epoch": 1.3186907136750396, "grad_norm": 0.13132795691490173, "learning_rate": 3.440711832030837e-05, "loss": 0.304, "step": 8098 }, { "epoch": 1.3188535602328706, "grad_norm": 0.11723418533802032, "learning_rate": 3.440272928174984e-05, "loss": 0.3084, "step": 8099 }, { "epoch": 1.3190164067907015, "grad_norm": 0.08038221299648285, "learning_rate": 3.4398339905605166e-05, "loss": 0.3104, "step": 8100 }, { "epoch": 1.3191792533485325, "grad_norm": 0.10033344477415085, "learning_rate": 3.439395019203192e-05, "loss": 0.3067, "step": 8101 }, { "epoch": 1.3193420999063632, "grad_norm": 0.1559758335351944, "learning_rate": 3.438956014118773e-05, "loss": 0.3219, "step": 8102 }, { "epoch": 1.3195049464641941, "grad_norm": 0.18305404484272003, "learning_rate": 3.4385169753230194e-05, "loss": 0.3322, "step": 8103 }, { "epoch": 1.3196677930220249, "grad_norm": 0.1689806580543518, "learning_rate": 3.4380779028316945e-05, "loss": 0.3535, "step": 8104 }, { "epoch": 1.3198306395798558, "grad_norm": 0.12102136015892029, "learning_rate": 3.437638796660562e-05, "loss": 0.3102, "step": 8105 }, { "epoch": 1.3199934861376867, "grad_norm": 0.06466169655323029, "learning_rate": 3.4371996568253884e-05, "loss": 0.3241, "step": 8106 }, { "epoch": 1.3201563326955177, "grad_norm": 0.10061965882778168, "learning_rate": 3.436760483341939e-05, "loss": 0.2963, "step": 8107 }, { "epoch": 1.3203191792533486, "grad_norm": 0.07284752279520035, "learning_rate": 3.4363212762259806e-05, "loss": 0.3336, "step": 8108 }, { "epoch": 1.3204820258111793, "grad_norm": 0.09539250284433365, "learning_rate": 3.4358820354932834e-05, "loss": 0.3558, "step": 8109 }, { "epoch": 1.3206448723690103, "grad_norm": 0.15527912974357605, "learning_rate": 3.4354427611596174e-05, "loss": 0.3163, "step": 8110 }, { "epoch": 1.3208077189268412, "grad_norm": 0.10409275442361832, "learning_rate": 3.435003453240754e-05, "loss": 0.3374, "step": 8111 }, { "epoch": 1.320970565484672, "grad_norm": 0.12063818424940109, "learning_rate": 3.434564111752464e-05, "loss": 0.331, "step": 8112 }, { "epoch": 1.321133412042503, "grad_norm": 0.1467190384864807, "learning_rate": 3.434124736710524e-05, "loss": 0.3396, "step": 8113 }, { "epoch": 1.3212962586003338, "grad_norm": 0.14097319543361664, "learning_rate": 3.4336853281307066e-05, "loss": 0.3074, "step": 8114 }, { "epoch": 1.3214591051581648, "grad_norm": 0.09356217831373215, "learning_rate": 3.4332458860287883e-05, "loss": 0.3338, "step": 8115 }, { "epoch": 1.3216219517159957, "grad_norm": 0.07122134417295456, "learning_rate": 3.4328064104205465e-05, "loss": 0.3224, "step": 8116 }, { "epoch": 1.3217847982738264, "grad_norm": 0.16198904812335968, "learning_rate": 3.432366901321761e-05, "loss": 0.3378, "step": 8117 }, { "epoch": 1.3219476448316574, "grad_norm": 0.12260404974222183, "learning_rate": 3.4319273587482084e-05, "loss": 0.316, "step": 8118 }, { "epoch": 1.3221104913894883, "grad_norm": 0.14558061957359314, "learning_rate": 3.4314877827156726e-05, "loss": 0.3489, "step": 8119 }, { "epoch": 1.322273337947319, "grad_norm": 0.11440908908843994, "learning_rate": 3.431048173239935e-05, "loss": 0.3314, "step": 8120 }, { "epoch": 1.32243618450515, "grad_norm": 0.15742655098438263, "learning_rate": 3.4306085303367776e-05, "loss": 0.3102, "step": 8121 }, { "epoch": 1.322599031062981, "grad_norm": 0.09374112635850906, "learning_rate": 3.4301688540219864e-05, "loss": 0.287, "step": 8122 }, { "epoch": 1.3227618776208119, "grad_norm": 0.12223901599645615, "learning_rate": 3.429729144311346e-05, "loss": 0.3198, "step": 8123 }, { "epoch": 1.3229247241786426, "grad_norm": 0.12554916739463806, "learning_rate": 3.429289401220644e-05, "loss": 0.3575, "step": 8124 }, { "epoch": 1.3230875707364735, "grad_norm": 0.11687346547842026, "learning_rate": 3.4288496247656684e-05, "loss": 0.312, "step": 8125 }, { "epoch": 1.3232504172943045, "grad_norm": 0.11522538959980011, "learning_rate": 3.428409814962208e-05, "loss": 0.3203, "step": 8126 }, { "epoch": 1.3234132638521352, "grad_norm": 0.0793076753616333, "learning_rate": 3.427969971826054e-05, "loss": 0.3381, "step": 8127 }, { "epoch": 1.3235761104099661, "grad_norm": 0.13134361803531647, "learning_rate": 3.4275300953729975e-05, "loss": 0.2968, "step": 8128 }, { "epoch": 1.323738956967797, "grad_norm": 0.13167451322078705, "learning_rate": 3.427090185618832e-05, "loss": 0.2915, "step": 8129 }, { "epoch": 1.323901803525628, "grad_norm": 0.07896406203508377, "learning_rate": 3.426650242579351e-05, "loss": 0.3244, "step": 8130 }, { "epoch": 1.324064650083459, "grad_norm": 0.16174349188804626, "learning_rate": 3.426210266270349e-05, "loss": 0.3423, "step": 8131 }, { "epoch": 1.3242274966412897, "grad_norm": 0.08508605509996414, "learning_rate": 3.425770256707625e-05, "loss": 0.3079, "step": 8132 }, { "epoch": 1.3243903431991206, "grad_norm": 0.11786386370658875, "learning_rate": 3.4253302139069744e-05, "loss": 0.3152, "step": 8133 }, { "epoch": 1.3245531897569516, "grad_norm": 0.07357139885425568, "learning_rate": 3.424890137884196e-05, "loss": 0.3462, "step": 8134 }, { "epoch": 1.3247160363147823, "grad_norm": 0.11042514443397522, "learning_rate": 3.4244500286550915e-05, "loss": 0.3734, "step": 8135 }, { "epoch": 1.3248788828726132, "grad_norm": 0.12770190834999084, "learning_rate": 3.4240098862354613e-05, "loss": 0.3318, "step": 8136 }, { "epoch": 1.3250417294304442, "grad_norm": 0.09243670105934143, "learning_rate": 3.423569710641108e-05, "loss": 0.2943, "step": 8137 }, { "epoch": 1.3252045759882751, "grad_norm": 0.15094596147537231, "learning_rate": 3.4231295018878343e-05, "loss": 0.3207, "step": 8138 }, { "epoch": 1.325367422546106, "grad_norm": 0.08763521909713745, "learning_rate": 3.422689259991446e-05, "loss": 0.3274, "step": 8139 }, { "epoch": 1.3255302691039368, "grad_norm": 0.11855442076921463, "learning_rate": 3.422248984967749e-05, "loss": 0.3542, "step": 8140 }, { "epoch": 1.3256931156617677, "grad_norm": 0.14714325964450836, "learning_rate": 3.42180867683255e-05, "loss": 0.2991, "step": 8141 }, { "epoch": 1.3258559622195984, "grad_norm": 0.11247031390666962, "learning_rate": 3.421368335601658e-05, "loss": 0.3346, "step": 8142 }, { "epoch": 1.3260188087774294, "grad_norm": 0.1940893977880478, "learning_rate": 3.4209279612908826e-05, "loss": 0.3299, "step": 8143 }, { "epoch": 1.3261816553352603, "grad_norm": 0.10744889080524445, "learning_rate": 3.420487553916034e-05, "loss": 0.3262, "step": 8144 }, { "epoch": 1.3263445018930913, "grad_norm": 0.12319882214069366, "learning_rate": 3.420047113492924e-05, "loss": 0.3392, "step": 8145 }, { "epoch": 1.3265073484509222, "grad_norm": 0.1027836948633194, "learning_rate": 3.419606640037366e-05, "loss": 0.3108, "step": 8146 }, { "epoch": 1.326670195008753, "grad_norm": 0.13109014928340912, "learning_rate": 3.419166133565174e-05, "loss": 0.2967, "step": 8147 }, { "epoch": 1.3268330415665839, "grad_norm": 0.1008012592792511, "learning_rate": 3.4187255940921644e-05, "loss": 0.2894, "step": 8148 }, { "epoch": 1.3269958881244148, "grad_norm": 0.1482018232345581, "learning_rate": 3.4182850216341536e-05, "loss": 0.3745, "step": 8149 }, { "epoch": 1.3271587346822455, "grad_norm": 0.10625477880239487, "learning_rate": 3.4178444162069595e-05, "loss": 0.321, "step": 8150 }, { "epoch": 1.3273215812400765, "grad_norm": 0.10171355307102203, "learning_rate": 3.4174037778264e-05, "loss": 0.3249, "step": 8151 }, { "epoch": 1.3274844277979074, "grad_norm": 0.09567657113075256, "learning_rate": 3.4169631065082966e-05, "loss": 0.3132, "step": 8152 }, { "epoch": 1.3276472743557384, "grad_norm": 0.11789951473474503, "learning_rate": 3.416522402268471e-05, "loss": 0.3465, "step": 8153 }, { "epoch": 1.3278101209135693, "grad_norm": 0.10104760527610779, "learning_rate": 3.416081665122745e-05, "loss": 0.3505, "step": 8154 }, { "epoch": 1.3279729674714, "grad_norm": 0.11732622981071472, "learning_rate": 3.415640895086942e-05, "loss": 0.3635, "step": 8155 }, { "epoch": 1.328135814029231, "grad_norm": 0.11324549466371536, "learning_rate": 3.415200092176888e-05, "loss": 0.2996, "step": 8156 }, { "epoch": 1.328298660587062, "grad_norm": 0.09038849920034409, "learning_rate": 3.414759256408409e-05, "loss": 0.3302, "step": 8157 }, { "epoch": 1.3284615071448926, "grad_norm": 0.1390993446111679, "learning_rate": 3.4143183877973315e-05, "loss": 0.3289, "step": 8158 }, { "epoch": 1.3286243537027236, "grad_norm": 0.09955005347728729, "learning_rate": 3.413877486359485e-05, "loss": 0.3185, "step": 8159 }, { "epoch": 1.3287872002605545, "grad_norm": 0.09133689105510712, "learning_rate": 3.413436552110698e-05, "loss": 0.2933, "step": 8160 }, { "epoch": 1.3289500468183855, "grad_norm": 0.08817355334758759, "learning_rate": 3.4129955850668023e-05, "loss": 0.3176, "step": 8161 }, { "epoch": 1.3291128933762162, "grad_norm": 0.10652194172143936, "learning_rate": 3.41255458524363e-05, "loss": 0.3542, "step": 8162 }, { "epoch": 1.3292757399340471, "grad_norm": 0.10646398365497589, "learning_rate": 3.412113552657014e-05, "loss": 0.3598, "step": 8163 }, { "epoch": 1.329438586491878, "grad_norm": 0.1371290236711502, "learning_rate": 3.411672487322788e-05, "loss": 0.3223, "step": 8164 }, { "epoch": 1.3296014330497088, "grad_norm": 0.13896974921226501, "learning_rate": 3.4112313892567884e-05, "loss": 0.3276, "step": 8165 }, { "epoch": 1.3297642796075397, "grad_norm": 0.10923212021589279, "learning_rate": 3.410790258474852e-05, "loss": 0.3158, "step": 8166 }, { "epoch": 1.3299271261653707, "grad_norm": 0.12141520529985428, "learning_rate": 3.410349094992817e-05, "loss": 0.328, "step": 8167 }, { "epoch": 1.3300899727232016, "grad_norm": 0.11221268773078918, "learning_rate": 3.4099078988265215e-05, "loss": 0.2959, "step": 8168 }, { "epoch": 1.3302528192810326, "grad_norm": 0.1709628701210022, "learning_rate": 3.409466669991806e-05, "loss": 0.3286, "step": 8169 }, { "epoch": 1.3304156658388633, "grad_norm": 0.15485785901546478, "learning_rate": 3.4090254085045125e-05, "loss": 0.3042, "step": 8170 }, { "epoch": 1.3305785123966942, "grad_norm": 0.11482854932546616, "learning_rate": 3.408584114380484e-05, "loss": 0.352, "step": 8171 }, { "epoch": 1.3307413589545252, "grad_norm": 0.1213856115937233, "learning_rate": 3.4081427876355624e-05, "loss": 0.2762, "step": 8172 }, { "epoch": 1.3309042055123559, "grad_norm": 0.13002511858940125, "learning_rate": 3.4077014282855944e-05, "loss": 0.3213, "step": 8173 }, { "epoch": 1.3310670520701868, "grad_norm": 0.13873133063316345, "learning_rate": 3.407260036346426e-05, "loss": 0.2851, "step": 8174 }, { "epoch": 1.3312298986280178, "grad_norm": 0.13941624760627747, "learning_rate": 3.406818611833904e-05, "loss": 0.3359, "step": 8175 }, { "epoch": 1.3313927451858487, "grad_norm": 0.15054762363433838, "learning_rate": 3.406377154763877e-05, "loss": 0.3229, "step": 8176 }, { "epoch": 1.3315555917436797, "grad_norm": 0.06974721699953079, "learning_rate": 3.405935665152194e-05, "loss": 0.3149, "step": 8177 }, { "epoch": 1.3317184383015104, "grad_norm": 0.1073891744017601, "learning_rate": 3.4054941430147065e-05, "loss": 0.3286, "step": 8178 }, { "epoch": 1.3318812848593413, "grad_norm": 0.15693634748458862, "learning_rate": 3.405052588367266e-05, "loss": 0.28, "step": 8179 }, { "epoch": 1.332044131417172, "grad_norm": 0.1278637945652008, "learning_rate": 3.404611001225727e-05, "loss": 0.3382, "step": 8180 }, { "epoch": 1.332206977975003, "grad_norm": 0.09714356064796448, "learning_rate": 3.4041693816059416e-05, "loss": 0.3699, "step": 8181 }, { "epoch": 1.332369824532834, "grad_norm": 0.06217228248715401, "learning_rate": 3.4037277295237675e-05, "loss": 0.3371, "step": 8182 }, { "epoch": 1.3325326710906649, "grad_norm": 0.11367792636156082, "learning_rate": 3.40328604499506e-05, "loss": 0.3073, "step": 8183 }, { "epoch": 1.3326955176484958, "grad_norm": 0.08832140266895294, "learning_rate": 3.402844328035677e-05, "loss": 0.3241, "step": 8184 }, { "epoch": 1.3328583642063265, "grad_norm": 0.12410066276788712, "learning_rate": 3.402402578661477e-05, "loss": 0.2494, "step": 8185 }, { "epoch": 1.3330212107641575, "grad_norm": 0.18575599789619446, "learning_rate": 3.401960796888322e-05, "loss": 0.3546, "step": 8186 }, { "epoch": 1.3331840573219884, "grad_norm": 0.2383831888437271, "learning_rate": 3.401518982732071e-05, "loss": 0.3236, "step": 8187 }, { "epoch": 1.3333469038798191, "grad_norm": 0.0927732065320015, "learning_rate": 3.401077136208588e-05, "loss": 0.3064, "step": 8188 }, { "epoch": 1.33350975043765, "grad_norm": 0.14466099441051483, "learning_rate": 3.400635257333735e-05, "loss": 0.3159, "step": 8189 }, { "epoch": 1.333672596995481, "grad_norm": 0.08838220685720444, "learning_rate": 3.4001933461233794e-05, "loss": 0.2784, "step": 8190 }, { "epoch": 1.333835443553312, "grad_norm": 0.13523074984550476, "learning_rate": 3.399751402593385e-05, "loss": 0.3033, "step": 8191 }, { "epoch": 1.333998290111143, "grad_norm": 0.13393108546733856, "learning_rate": 3.3993094267596195e-05, "loss": 0.318, "step": 8192 }, { "epoch": 1.3341611366689736, "grad_norm": 0.06924277544021606, "learning_rate": 3.3988674186379514e-05, "loss": 0.3353, "step": 8193 }, { "epoch": 1.3343239832268046, "grad_norm": 0.062149904668331146, "learning_rate": 3.39842537824425e-05, "loss": 0.2845, "step": 8194 }, { "epoch": 1.3344868297846355, "grad_norm": 0.1214786246418953, "learning_rate": 3.3979833055943856e-05, "loss": 0.2931, "step": 8195 }, { "epoch": 1.3346496763424662, "grad_norm": 0.1288723647594452, "learning_rate": 3.39754120070423e-05, "loss": 0.3547, "step": 8196 }, { "epoch": 1.3348125229002972, "grad_norm": 0.14749139547348022, "learning_rate": 3.3970990635896565e-05, "loss": 0.2848, "step": 8197 }, { "epoch": 1.3349753694581281, "grad_norm": 0.14927372336387634, "learning_rate": 3.3966568942665386e-05, "loss": 0.341, "step": 8198 }, { "epoch": 1.335138216015959, "grad_norm": 0.09005707502365112, "learning_rate": 3.396214692750751e-05, "loss": 0.3307, "step": 8199 }, { "epoch": 1.3353010625737898, "grad_norm": 0.10262712836265564, "learning_rate": 3.3957724590581716e-05, "loss": 0.3041, "step": 8200 }, { "epoch": 1.3354639091316207, "grad_norm": 0.11711175739765167, "learning_rate": 3.395330193204677e-05, "loss": 0.3215, "step": 8201 }, { "epoch": 1.3356267556894517, "grad_norm": 0.12093789875507355, "learning_rate": 3.394887895206147e-05, "loss": 0.3285, "step": 8202 }, { "epoch": 1.3357896022472824, "grad_norm": 0.09153088927268982, "learning_rate": 3.394445565078459e-05, "loss": 0.3055, "step": 8203 }, { "epoch": 1.3359524488051133, "grad_norm": 0.11079825460910797, "learning_rate": 3.394003202837496e-05, "loss": 0.322, "step": 8204 }, { "epoch": 1.3361152953629443, "grad_norm": 0.10936981439590454, "learning_rate": 3.393560808499139e-05, "loss": 0.3521, "step": 8205 }, { "epoch": 1.3362781419207752, "grad_norm": 0.09918401390314102, "learning_rate": 3.3931183820792725e-05, "loss": 0.3228, "step": 8206 }, { "epoch": 1.3364409884786062, "grad_norm": 0.10505110770463943, "learning_rate": 3.39267592359378e-05, "loss": 0.3234, "step": 8207 }, { "epoch": 1.3366038350364369, "grad_norm": 0.10883542895317078, "learning_rate": 3.3922334330585465e-05, "loss": 0.2806, "step": 8208 }, { "epoch": 1.3367666815942678, "grad_norm": 0.12027616053819656, "learning_rate": 3.39179091048946e-05, "loss": 0.3063, "step": 8209 }, { "epoch": 1.3369295281520988, "grad_norm": 0.10460954904556274, "learning_rate": 3.391348355902408e-05, "loss": 0.3304, "step": 8210 }, { "epoch": 1.3370923747099295, "grad_norm": 0.1483910083770752, "learning_rate": 3.39090576931328e-05, "loss": 0.3194, "step": 8211 }, { "epoch": 1.3372552212677604, "grad_norm": 0.14167407155036926, "learning_rate": 3.3904631507379646e-05, "loss": 0.318, "step": 8212 }, { "epoch": 1.3374180678255914, "grad_norm": 0.12645290791988373, "learning_rate": 3.3900205001923556e-05, "loss": 0.3141, "step": 8213 }, { "epoch": 1.3375809143834223, "grad_norm": 0.13580922782421112, "learning_rate": 3.389577817692343e-05, "loss": 0.304, "step": 8214 }, { "epoch": 1.337743760941253, "grad_norm": 0.11798664927482605, "learning_rate": 3.389135103253822e-05, "loss": 0.3303, "step": 8215 }, { "epoch": 1.337906607499084, "grad_norm": 0.09885696321725845, "learning_rate": 3.388692356892686e-05, "loss": 0.3194, "step": 8216 }, { "epoch": 1.338069454056915, "grad_norm": 0.08475261181592941, "learning_rate": 3.388249578624834e-05, "loss": 0.328, "step": 8217 }, { "epoch": 1.3382323006147456, "grad_norm": 0.1305290162563324, "learning_rate": 3.387806768466158e-05, "loss": 0.3212, "step": 8218 }, { "epoch": 1.3383951471725766, "grad_norm": 0.08953825384378433, "learning_rate": 3.387363926432561e-05, "loss": 0.3329, "step": 8219 }, { "epoch": 1.3385579937304075, "grad_norm": 0.09348955750465393, "learning_rate": 3.3869210525399404e-05, "loss": 0.2992, "step": 8220 }, { "epoch": 1.3387208402882385, "grad_norm": 0.10617349296808243, "learning_rate": 3.3864781468041956e-05, "loss": 0.3213, "step": 8221 }, { "epoch": 1.3388836868460694, "grad_norm": 0.10226203501224518, "learning_rate": 3.3860352092412305e-05, "loss": 0.3142, "step": 8222 }, { "epoch": 1.3390465334039001, "grad_norm": 0.13538816571235657, "learning_rate": 3.385592239866946e-05, "loss": 0.3157, "step": 8223 }, { "epoch": 1.339209379961731, "grad_norm": 0.09778593480587006, "learning_rate": 3.3851492386972475e-05, "loss": 0.3352, "step": 8224 }, { "epoch": 1.339372226519562, "grad_norm": 0.11115071177482605, "learning_rate": 3.384706205748038e-05, "loss": 0.3354, "step": 8225 }, { "epoch": 1.3395350730773927, "grad_norm": 0.09021738916635513, "learning_rate": 3.384263141035226e-05, "loss": 0.3559, "step": 8226 }, { "epoch": 1.3396979196352237, "grad_norm": 0.13246440887451172, "learning_rate": 3.3838200445747176e-05, "loss": 0.2967, "step": 8227 }, { "epoch": 1.3398607661930546, "grad_norm": 0.09394426643848419, "learning_rate": 3.3833769163824225e-05, "loss": 0.3126, "step": 8228 }, { "epoch": 1.3400236127508856, "grad_norm": 0.08640360087156296, "learning_rate": 3.382933756474248e-05, "loss": 0.3324, "step": 8229 }, { "epoch": 1.3401864593087165, "grad_norm": 0.11230537295341492, "learning_rate": 3.382490564866107e-05, "loss": 0.2882, "step": 8230 }, { "epoch": 1.3403493058665472, "grad_norm": 0.10993734747171402, "learning_rate": 3.382047341573911e-05, "loss": 0.2891, "step": 8231 }, { "epoch": 1.3405121524243782, "grad_norm": 0.10414277762174606, "learning_rate": 3.381604086613572e-05, "loss": 0.3044, "step": 8232 }, { "epoch": 1.3406749989822089, "grad_norm": 0.13159847259521484, "learning_rate": 3.3811608000010065e-05, "loss": 0.3336, "step": 8233 }, { "epoch": 1.3408378455400398, "grad_norm": 0.15235041081905365, "learning_rate": 3.380717481752127e-05, "loss": 0.3073, "step": 8234 }, { "epoch": 1.3410006920978708, "grad_norm": 0.1343006044626236, "learning_rate": 3.3802741318828514e-05, "loss": 0.3315, "step": 8235 }, { "epoch": 1.3411635386557017, "grad_norm": 0.12437774986028671, "learning_rate": 3.379830750409097e-05, "loss": 0.2676, "step": 8236 }, { "epoch": 1.3413263852135326, "grad_norm": 0.08465509116649628, "learning_rate": 3.379387337346783e-05, "loss": 0.3258, "step": 8237 }, { "epoch": 1.3414892317713634, "grad_norm": 0.13276997208595276, "learning_rate": 3.378943892711829e-05, "loss": 0.3764, "step": 8238 }, { "epoch": 1.3416520783291943, "grad_norm": 0.081231988966465, "learning_rate": 3.378500416520155e-05, "loss": 0.33, "step": 8239 }, { "epoch": 1.3418149248870253, "grad_norm": 0.1366698443889618, "learning_rate": 3.3780569087876844e-05, "loss": 0.2831, "step": 8240 }, { "epoch": 1.341977771444856, "grad_norm": 0.11903838813304901, "learning_rate": 3.37761336953034e-05, "loss": 0.3178, "step": 8241 }, { "epoch": 1.342140618002687, "grad_norm": 0.09413740038871765, "learning_rate": 3.377169798764046e-05, "loss": 0.3014, "step": 8242 }, { "epoch": 1.3423034645605179, "grad_norm": 0.08394119888544083, "learning_rate": 3.3767261965047294e-05, "loss": 0.3592, "step": 8243 }, { "epoch": 1.3424663111183488, "grad_norm": 0.11621849238872528, "learning_rate": 3.376282562768315e-05, "loss": 0.34, "step": 8244 }, { "epoch": 1.3426291576761797, "grad_norm": 0.7001970410346985, "learning_rate": 3.37583889757073e-05, "loss": 0.4163, "step": 8245 }, { "epoch": 1.3427920042340105, "grad_norm": 0.13912254571914673, "learning_rate": 3.3753952009279054e-05, "loss": 0.3203, "step": 8246 }, { "epoch": 1.3429548507918414, "grad_norm": 0.13087457418441772, "learning_rate": 3.37495147285577e-05, "loss": 0.3344, "step": 8247 }, { "epoch": 1.3431176973496723, "grad_norm": 0.0826994925737381, "learning_rate": 3.3745077133702565e-05, "loss": 0.319, "step": 8248 }, { "epoch": 1.343280543907503, "grad_norm": 0.09735576808452606, "learning_rate": 3.374063922487294e-05, "loss": 0.3047, "step": 8249 }, { "epoch": 1.343443390465334, "grad_norm": 0.08351722359657288, "learning_rate": 3.37362010022282e-05, "loss": 0.3053, "step": 8250 }, { "epoch": 1.343606237023165, "grad_norm": 0.09425710141658783, "learning_rate": 3.3731762465927654e-05, "loss": 0.3212, "step": 8251 }, { "epoch": 1.343769083580996, "grad_norm": 0.1419348418712616, "learning_rate": 3.372732361613068e-05, "loss": 0.3368, "step": 8252 }, { "epoch": 1.3439319301388266, "grad_norm": 0.11943938583135605, "learning_rate": 3.372288445299665e-05, "loss": 0.2782, "step": 8253 }, { "epoch": 1.3440947766966576, "grad_norm": 0.14972753822803497, "learning_rate": 3.371844497668491e-05, "loss": 0.3204, "step": 8254 }, { "epoch": 1.3442576232544885, "grad_norm": 0.322768896818161, "learning_rate": 3.3714005187354896e-05, "loss": 0.3368, "step": 8255 }, { "epoch": 1.3444204698123192, "grad_norm": 0.2183561623096466, "learning_rate": 3.370956508516598e-05, "loss": 0.3154, "step": 8256 }, { "epoch": 1.3445833163701502, "grad_norm": 0.1876828819513321, "learning_rate": 3.370512467027759e-05, "loss": 0.3209, "step": 8257 }, { "epoch": 1.344746162927981, "grad_norm": 0.13168367743492126, "learning_rate": 3.370068394284913e-05, "loss": 0.3428, "step": 8258 }, { "epoch": 1.344909009485812, "grad_norm": 0.07881609350442886, "learning_rate": 3.369624290304006e-05, "loss": 0.319, "step": 8259 }, { "epoch": 1.345071856043643, "grad_norm": 0.1332453489303589, "learning_rate": 3.3691801551009804e-05, "loss": 0.3259, "step": 8260 }, { "epoch": 1.3452347026014737, "grad_norm": 0.1138257086277008, "learning_rate": 3.368735988691784e-05, "loss": 0.2947, "step": 8261 }, { "epoch": 1.3453975491593047, "grad_norm": 0.08856577426195145, "learning_rate": 3.368291791092363e-05, "loss": 0.317, "step": 8262 }, { "epoch": 1.3455603957171356, "grad_norm": 0.15425929427146912, "learning_rate": 3.367847562318665e-05, "loss": 0.3262, "step": 8263 }, { "epoch": 1.3457232422749663, "grad_norm": 0.09422607719898224, "learning_rate": 3.367403302386639e-05, "loss": 0.3319, "step": 8264 }, { "epoch": 1.3458860888327973, "grad_norm": 0.12087452411651611, "learning_rate": 3.3669590113122354e-05, "loss": 0.3317, "step": 8265 }, { "epoch": 1.3460489353906282, "grad_norm": 0.12300708889961243, "learning_rate": 3.3665146891114056e-05, "loss": 0.2828, "step": 8266 }, { "epoch": 1.3462117819484591, "grad_norm": 0.09346264600753784, "learning_rate": 3.3660703358001026e-05, "loss": 0.3027, "step": 8267 }, { "epoch": 1.34637462850629, "grad_norm": 0.18845894932746887, "learning_rate": 3.3656259513942804e-05, "loss": 0.3643, "step": 8268 }, { "epoch": 1.3465374750641208, "grad_norm": 0.2125720977783203, "learning_rate": 3.365181535909892e-05, "loss": 0.3268, "step": 8269 }, { "epoch": 1.3467003216219517, "grad_norm": 0.10473417490720749, "learning_rate": 3.364737089362896e-05, "loss": 0.3362, "step": 8270 }, { "epoch": 1.3468631681797825, "grad_norm": 0.11153100430965424, "learning_rate": 3.3642926117692464e-05, "loss": 0.3195, "step": 8271 }, { "epoch": 1.3470260147376134, "grad_norm": 0.09905815869569778, "learning_rate": 3.363848103144903e-05, "loss": 0.3278, "step": 8272 }, { "epoch": 1.3471888612954444, "grad_norm": 0.11839693039655685, "learning_rate": 3.363403563505824e-05, "loss": 0.3318, "step": 8273 }, { "epoch": 1.3473517078532753, "grad_norm": 0.09394383430480957, "learning_rate": 3.3629589928679704e-05, "loss": 0.3279, "step": 8274 }, { "epoch": 1.3475145544111062, "grad_norm": 0.1236010193824768, "learning_rate": 3.362514391247305e-05, "loss": 0.3395, "step": 8275 }, { "epoch": 1.347677400968937, "grad_norm": 0.12509937584400177, "learning_rate": 3.362069758659787e-05, "loss": 0.3664, "step": 8276 }, { "epoch": 1.347840247526768, "grad_norm": 0.12350168079137802, "learning_rate": 3.361625095121382e-05, "loss": 0.3279, "step": 8277 }, { "epoch": 1.3480030940845988, "grad_norm": 0.17692923545837402, "learning_rate": 3.361180400648055e-05, "loss": 0.3634, "step": 8278 }, { "epoch": 1.3481659406424296, "grad_norm": 0.13145309686660767, "learning_rate": 3.360735675255771e-05, "loss": 0.3358, "step": 8279 }, { "epoch": 1.3483287872002605, "grad_norm": 0.09650865942239761, "learning_rate": 3.360290918960498e-05, "loss": 0.3306, "step": 8280 }, { "epoch": 1.3484916337580914, "grad_norm": 0.1677035391330719, "learning_rate": 3.359846131778203e-05, "loss": 0.3427, "step": 8281 }, { "epoch": 1.3486544803159224, "grad_norm": 0.1602112203836441, "learning_rate": 3.3594013137248556e-05, "loss": 0.3685, "step": 8282 }, { "epoch": 1.3488173268737533, "grad_norm": 0.1543143391609192, "learning_rate": 3.358956464816427e-05, "loss": 0.3326, "step": 8283 }, { "epoch": 1.348980173431584, "grad_norm": 0.08122837543487549, "learning_rate": 3.358511585068887e-05, "loss": 0.3242, "step": 8284 }, { "epoch": 1.349143019989415, "grad_norm": 0.1282620131969452, "learning_rate": 3.3580666744982095e-05, "loss": 0.2746, "step": 8285 }, { "epoch": 1.349305866547246, "grad_norm": 0.10664796829223633, "learning_rate": 3.357621733120368e-05, "loss": 0.3269, "step": 8286 }, { "epoch": 1.3494687131050767, "grad_norm": 0.18356360495090485, "learning_rate": 3.357176760951335e-05, "loss": 0.3328, "step": 8287 }, { "epoch": 1.3496315596629076, "grad_norm": 0.18545354902744293, "learning_rate": 3.3567317580070896e-05, "loss": 0.3253, "step": 8288 }, { "epoch": 1.3497944062207385, "grad_norm": 0.10566793382167816, "learning_rate": 3.356286724303607e-05, "loss": 0.3311, "step": 8289 }, { "epoch": 1.3499572527785695, "grad_norm": 0.1734122335910797, "learning_rate": 3.355841659856866e-05, "loss": 0.351, "step": 8290 }, { "epoch": 1.3501200993364002, "grad_norm": 0.14984460175037384, "learning_rate": 3.355396564682844e-05, "loss": 0.3352, "step": 8291 }, { "epoch": 1.3502829458942311, "grad_norm": 0.0830860584974289, "learning_rate": 3.354951438797523e-05, "loss": 0.3394, "step": 8292 }, { "epoch": 1.350445792452062, "grad_norm": 0.17859742045402527, "learning_rate": 3.354506282216884e-05, "loss": 0.342, "step": 8293 }, { "epoch": 1.3506086390098928, "grad_norm": 0.07598699629306793, "learning_rate": 3.354061094956909e-05, "loss": 0.3097, "step": 8294 }, { "epoch": 1.3507714855677238, "grad_norm": 0.1559160053730011, "learning_rate": 3.3536158770335826e-05, "loss": 0.344, "step": 8295 }, { "epoch": 1.3509343321255547, "grad_norm": 0.10316728055477142, "learning_rate": 3.353170628462888e-05, "loss": 0.2859, "step": 8296 }, { "epoch": 1.3510971786833856, "grad_norm": 0.12803061306476593, "learning_rate": 3.3527253492608116e-05, "loss": 0.3096, "step": 8297 }, { "epoch": 1.3512600252412166, "grad_norm": 0.18572932481765747, "learning_rate": 3.3522800394433405e-05, "loss": 0.3273, "step": 8298 }, { "epoch": 1.3514228717990473, "grad_norm": 0.11965760588645935, "learning_rate": 3.351834699026463e-05, "loss": 0.3371, "step": 8299 }, { "epoch": 1.3515857183568782, "grad_norm": 0.1625382900238037, "learning_rate": 3.351389328026167e-05, "loss": 0.3323, "step": 8300 }, { "epoch": 1.3517485649147092, "grad_norm": 0.1323722004890442, "learning_rate": 3.350943926458444e-05, "loss": 0.344, "step": 8301 }, { "epoch": 1.35191141147254, "grad_norm": 0.13649946451187134, "learning_rate": 3.350498494339283e-05, "loss": 0.3114, "step": 8302 }, { "epoch": 1.3520742580303708, "grad_norm": 0.1443285048007965, "learning_rate": 3.350053031684679e-05, "loss": 0.3075, "step": 8303 }, { "epoch": 1.3522371045882018, "grad_norm": 0.0852239578962326, "learning_rate": 3.349607538510625e-05, "loss": 0.297, "step": 8304 }, { "epoch": 1.3523999511460327, "grad_norm": 0.13232272863388062, "learning_rate": 3.349162014833114e-05, "loss": 0.2975, "step": 8305 }, { "epoch": 1.3525627977038637, "grad_norm": 0.15684732794761658, "learning_rate": 3.348716460668143e-05, "loss": 0.3269, "step": 8306 }, { "epoch": 1.3527256442616944, "grad_norm": 0.13976937532424927, "learning_rate": 3.3482708760317074e-05, "loss": 0.3351, "step": 8307 }, { "epoch": 1.3528884908195253, "grad_norm": 0.17109960317611694, "learning_rate": 3.347825260939807e-05, "loss": 0.3399, "step": 8308 }, { "epoch": 1.353051337377356, "grad_norm": 0.11578638106584549, "learning_rate": 3.347379615408439e-05, "loss": 0.315, "step": 8309 }, { "epoch": 1.353214183935187, "grad_norm": 0.10349968820810318, "learning_rate": 3.3469339394536036e-05, "loss": 0.289, "step": 8310 }, { "epoch": 1.353377030493018, "grad_norm": 0.10547839850187302, "learning_rate": 3.346488233091303e-05, "loss": 0.3269, "step": 8311 }, { "epoch": 1.3535398770508489, "grad_norm": 0.09530988335609436, "learning_rate": 3.3460424963375384e-05, "loss": 0.3077, "step": 8312 }, { "epoch": 1.3537027236086798, "grad_norm": 0.10181395709514618, "learning_rate": 3.3455967292083126e-05, "loss": 0.2828, "step": 8313 }, { "epoch": 1.3538655701665105, "grad_norm": 0.07269968092441559, "learning_rate": 3.3451509317196316e-05, "loss": 0.3234, "step": 8314 }, { "epoch": 1.3540284167243415, "grad_norm": 0.1563885658979416, "learning_rate": 3.3447051038875004e-05, "loss": 0.3149, "step": 8315 }, { "epoch": 1.3541912632821724, "grad_norm": 0.15152078866958618, "learning_rate": 3.344259245727924e-05, "loss": 0.3403, "step": 8316 }, { "epoch": 1.3543541098400032, "grad_norm": 0.08696313202381134, "learning_rate": 3.3438133572569123e-05, "loss": 0.3207, "step": 8317 }, { "epoch": 1.354516956397834, "grad_norm": 0.14052633941173553, "learning_rate": 3.343367438490472e-05, "loss": 0.3498, "step": 8318 }, { "epoch": 1.354679802955665, "grad_norm": 0.11248752474784851, "learning_rate": 3.3429214894446145e-05, "loss": 0.3262, "step": 8319 }, { "epoch": 1.354842649513496, "grad_norm": 0.1144183874130249, "learning_rate": 3.34247551013535e-05, "loss": 0.2924, "step": 8320 }, { "epoch": 1.355005496071327, "grad_norm": 0.1183854267001152, "learning_rate": 3.3420295005786904e-05, "loss": 0.3176, "step": 8321 }, { "epoch": 1.3551683426291576, "grad_norm": 0.14971674978733063, "learning_rate": 3.341583460790649e-05, "loss": 0.3293, "step": 8322 }, { "epoch": 1.3553311891869886, "grad_norm": 0.1840110570192337, "learning_rate": 3.34113739078724e-05, "loss": 0.3599, "step": 8323 }, { "epoch": 1.3554940357448195, "grad_norm": 0.18182149529457092, "learning_rate": 3.340691290584478e-05, "loss": 0.3399, "step": 8324 }, { "epoch": 1.3556568823026502, "grad_norm": 0.14309293031692505, "learning_rate": 3.340245160198381e-05, "loss": 0.3235, "step": 8325 }, { "epoch": 1.3558197288604812, "grad_norm": 0.10411697626113892, "learning_rate": 3.3397989996449645e-05, "loss": 0.3554, "step": 8326 }, { "epoch": 1.3559825754183121, "grad_norm": 0.1194305270910263, "learning_rate": 3.339352808940248e-05, "loss": 0.3295, "step": 8327 }, { "epoch": 1.356145421976143, "grad_norm": 0.10212865471839905, "learning_rate": 3.338906588100251e-05, "loss": 0.3356, "step": 8328 }, { "epoch": 1.3563082685339738, "grad_norm": 0.20757730305194855, "learning_rate": 3.338460337140994e-05, "loss": 0.3371, "step": 8329 }, { "epoch": 1.3564711150918047, "grad_norm": 0.13266150653362274, "learning_rate": 3.338014056078499e-05, "loss": 0.3408, "step": 8330 }, { "epoch": 1.3566339616496357, "grad_norm": 0.1025005653500557, "learning_rate": 3.337567744928788e-05, "loss": 0.3363, "step": 8331 }, { "epoch": 1.3567968082074664, "grad_norm": 0.11490997672080994, "learning_rate": 3.3371214037078855e-05, "loss": 0.3145, "step": 8332 }, { "epoch": 1.3569596547652973, "grad_norm": 0.1065979152917862, "learning_rate": 3.336675032431817e-05, "loss": 0.3251, "step": 8333 }, { "epoch": 1.3571225013231283, "grad_norm": 0.09649523347616196, "learning_rate": 3.336228631116608e-05, "loss": 0.3136, "step": 8334 }, { "epoch": 1.3572853478809592, "grad_norm": 0.17937497794628143, "learning_rate": 3.335782199778286e-05, "loss": 0.3363, "step": 8335 }, { "epoch": 1.3574481944387902, "grad_norm": 0.1443771868944168, "learning_rate": 3.3353357384328786e-05, "loss": 0.3018, "step": 8336 }, { "epoch": 1.3576110409966209, "grad_norm": 0.11400794237852097, "learning_rate": 3.334889247096416e-05, "loss": 0.3094, "step": 8337 }, { "epoch": 1.3577738875544518, "grad_norm": 0.13205885887145996, "learning_rate": 3.3344427257849276e-05, "loss": 0.3187, "step": 8338 }, { "epoch": 1.3579367341122828, "grad_norm": 0.1256372630596161, "learning_rate": 3.3339961745144454e-05, "loss": 0.2827, "step": 8339 }, { "epoch": 1.3580995806701135, "grad_norm": 0.14746543765068054, "learning_rate": 3.3335495933010016e-05, "loss": 0.3456, "step": 8340 }, { "epoch": 1.3582624272279444, "grad_norm": 0.11032578349113464, "learning_rate": 3.33310298216063e-05, "loss": 0.3173, "step": 8341 }, { "epoch": 1.3584252737857754, "grad_norm": 0.07253409177064896, "learning_rate": 3.332656341109366e-05, "loss": 0.2882, "step": 8342 }, { "epoch": 1.3585881203436063, "grad_norm": 0.11430496722459793, "learning_rate": 3.332209670163244e-05, "loss": 0.3246, "step": 8343 }, { "epoch": 1.358750966901437, "grad_norm": 0.1513206958770752, "learning_rate": 3.3317629693383014e-05, "loss": 0.3497, "step": 8344 }, { "epoch": 1.358913813459268, "grad_norm": 0.09406371414661407, "learning_rate": 3.3313162386505756e-05, "loss": 0.3434, "step": 8345 }, { "epoch": 1.359076660017099, "grad_norm": 0.07613648474216461, "learning_rate": 3.330869478116107e-05, "loss": 0.332, "step": 8346 }, { "epoch": 1.3592395065749296, "grad_norm": 0.10456852614879608, "learning_rate": 3.3304226877509345e-05, "loss": 0.3019, "step": 8347 }, { "epoch": 1.3594023531327606, "grad_norm": 0.1251852661371231, "learning_rate": 3.3299758675711e-05, "loss": 0.3014, "step": 8348 }, { "epoch": 1.3595651996905915, "grad_norm": 0.12615284323692322, "learning_rate": 3.3295290175926445e-05, "loss": 0.2813, "step": 8349 }, { "epoch": 1.3597280462484225, "grad_norm": 0.11194894462823868, "learning_rate": 3.3290821378316126e-05, "loss": 0.3329, "step": 8350 }, { "epoch": 1.3598908928062534, "grad_norm": 0.08664833754301071, "learning_rate": 3.328635228304048e-05, "loss": 0.3264, "step": 8351 }, { "epoch": 1.3600537393640841, "grad_norm": 0.1459341198205948, "learning_rate": 3.3281882890259954e-05, "loss": 0.3263, "step": 8352 }, { "epoch": 1.360216585921915, "grad_norm": 0.12495029717683792, "learning_rate": 3.3277413200135024e-05, "loss": 0.3109, "step": 8353 }, { "epoch": 1.360379432479746, "grad_norm": 0.08013114333152771, "learning_rate": 3.327294321282615e-05, "loss": 0.2941, "step": 8354 }, { "epoch": 1.3605422790375767, "grad_norm": 0.18590286374092102, "learning_rate": 3.3268472928493846e-05, "loss": 0.3405, "step": 8355 }, { "epoch": 1.3607051255954077, "grad_norm": 0.13807880878448486, "learning_rate": 3.3264002347298576e-05, "loss": 0.3294, "step": 8356 }, { "epoch": 1.3608679721532386, "grad_norm": 0.13177776336669922, "learning_rate": 3.325953146940087e-05, "loss": 0.3208, "step": 8357 }, { "epoch": 1.3610308187110696, "grad_norm": 0.1202956885099411, "learning_rate": 3.325506029496123e-05, "loss": 0.3179, "step": 8358 }, { "epoch": 1.3611936652689005, "grad_norm": 0.09084895253181458, "learning_rate": 3.3250588824140205e-05, "loss": 0.327, "step": 8359 }, { "epoch": 1.3613565118267312, "grad_norm": 0.09893506020307541, "learning_rate": 3.3246117057098315e-05, "loss": 0.3517, "step": 8360 }, { "epoch": 1.3615193583845622, "grad_norm": 0.1468164622783661, "learning_rate": 3.3241644993996115e-05, "loss": 0.3051, "step": 8361 }, { "epoch": 1.361682204942393, "grad_norm": 0.12373268604278564, "learning_rate": 3.3237172634994164e-05, "loss": 0.2961, "step": 8362 }, { "epoch": 1.3618450515002238, "grad_norm": 0.10563378036022186, "learning_rate": 3.3232699980253044e-05, "loss": 0.3354, "step": 8363 }, { "epoch": 1.3620078980580548, "grad_norm": 0.10340023040771484, "learning_rate": 3.3228227029933326e-05, "loss": 0.3305, "step": 8364 }, { "epoch": 1.3621707446158857, "grad_norm": 0.12712731957435608, "learning_rate": 3.32237537841956e-05, "loss": 0.2896, "step": 8365 }, { "epoch": 1.3623335911737167, "grad_norm": 0.14349739253520966, "learning_rate": 3.321928024320048e-05, "loss": 0.3193, "step": 8366 }, { "epoch": 1.3624964377315474, "grad_norm": 0.15983380377292633, "learning_rate": 3.321480640710857e-05, "loss": 0.3442, "step": 8367 }, { "epoch": 1.3626592842893783, "grad_norm": 0.15532323718070984, "learning_rate": 3.32103322760805e-05, "loss": 0.3141, "step": 8368 }, { "epoch": 1.3628221308472093, "grad_norm": 0.1407996565103531, "learning_rate": 3.32058578502769e-05, "loss": 0.3253, "step": 8369 }, { "epoch": 1.36298497740504, "grad_norm": 0.15536023676395416, "learning_rate": 3.320138312985843e-05, "loss": 0.3144, "step": 8370 }, { "epoch": 1.363147823962871, "grad_norm": 0.08875022083520889, "learning_rate": 3.319690811498572e-05, "loss": 0.351, "step": 8371 }, { "epoch": 1.3633106705207019, "grad_norm": 0.15631568431854248, "learning_rate": 3.319243280581946e-05, "loss": 0.3423, "step": 8372 }, { "epoch": 1.3634735170785328, "grad_norm": 0.08994085341691971, "learning_rate": 3.31879572025203e-05, "loss": 0.3194, "step": 8373 }, { "epoch": 1.3636363636363638, "grad_norm": 0.12930235266685486, "learning_rate": 3.3183481305248956e-05, "loss": 0.3589, "step": 8374 }, { "epoch": 1.3637992101941945, "grad_norm": 0.09328873455524445, "learning_rate": 3.3179005114166125e-05, "loss": 0.2982, "step": 8375 }, { "epoch": 1.3639620567520254, "grad_norm": 0.13457268476486206, "learning_rate": 3.317452862943249e-05, "loss": 0.3381, "step": 8376 }, { "epoch": 1.3641249033098564, "grad_norm": 0.10846875607967377, "learning_rate": 3.3170051851208795e-05, "loss": 0.3041, "step": 8377 }, { "epoch": 1.364287749867687, "grad_norm": 0.10717424750328064, "learning_rate": 3.316557477965575e-05, "loss": 0.3362, "step": 8378 }, { "epoch": 1.364450596425518, "grad_norm": 0.07417230308055878, "learning_rate": 3.3161097414934114e-05, "loss": 0.3435, "step": 8379 }, { "epoch": 1.364613442983349, "grad_norm": 0.20670562982559204, "learning_rate": 3.3156619757204635e-05, "loss": 0.3407, "step": 8380 }, { "epoch": 1.36477628954118, "grad_norm": 0.08776869624853134, "learning_rate": 3.3152141806628055e-05, "loss": 0.289, "step": 8381 }, { "epoch": 1.3649391360990106, "grad_norm": 0.17086704075336456, "learning_rate": 3.3147663563365164e-05, "loss": 0.366, "step": 8382 }, { "epoch": 1.3651019826568416, "grad_norm": 0.13229040801525116, "learning_rate": 3.314318502757675e-05, "loss": 0.3133, "step": 8383 }, { "epoch": 1.3652648292146725, "grad_norm": 0.15601317584514618, "learning_rate": 3.313870619942358e-05, "loss": 0.3026, "step": 8384 }, { "epoch": 1.3654276757725032, "grad_norm": 0.15519355237483978, "learning_rate": 3.313422707906648e-05, "loss": 0.3414, "step": 8385 }, { "epoch": 1.3655905223303342, "grad_norm": 0.09352108836174011, "learning_rate": 3.3129747666666264e-05, "loss": 0.3116, "step": 8386 }, { "epoch": 1.3657533688881651, "grad_norm": 0.1308119148015976, "learning_rate": 3.312526796238374e-05, "loss": 0.3175, "step": 8387 }, { "epoch": 1.365916215445996, "grad_norm": 0.109311543405056, "learning_rate": 3.3120787966379755e-05, "loss": 0.3282, "step": 8388 }, { "epoch": 1.366079062003827, "grad_norm": 0.1133076548576355, "learning_rate": 3.311630767881515e-05, "loss": 0.3336, "step": 8389 }, { "epoch": 1.3662419085616577, "grad_norm": 0.11873488128185272, "learning_rate": 3.311182709985079e-05, "loss": 0.2913, "step": 8390 }, { "epoch": 1.3664047551194887, "grad_norm": 0.08224201947450638, "learning_rate": 3.3107346229647525e-05, "loss": 0.3157, "step": 8391 }, { "epoch": 1.3665676016773196, "grad_norm": 0.15425080060958862, "learning_rate": 3.310286506836625e-05, "loss": 0.3229, "step": 8392 }, { "epoch": 1.3667304482351503, "grad_norm": 0.12394766509532928, "learning_rate": 3.309838361616783e-05, "loss": 0.3249, "step": 8393 }, { "epoch": 1.3668932947929813, "grad_norm": 0.06649252027273178, "learning_rate": 3.309390187321317e-05, "loss": 0.2908, "step": 8394 }, { "epoch": 1.3670561413508122, "grad_norm": 0.07519636303186417, "learning_rate": 3.3089419839663206e-05, "loss": 0.3232, "step": 8395 }, { "epoch": 1.3672189879086432, "grad_norm": 0.08717040717601776, "learning_rate": 3.3084937515678815e-05, "loss": 0.3003, "step": 8396 }, { "epoch": 1.367381834466474, "grad_norm": 0.10473670065402985, "learning_rate": 3.308045490142096e-05, "loss": 0.3435, "step": 8397 }, { "epoch": 1.3675446810243048, "grad_norm": 0.11165751516819, "learning_rate": 3.307597199705054e-05, "loss": 0.3147, "step": 8398 }, { "epoch": 1.3677075275821358, "grad_norm": 0.1566716581583023, "learning_rate": 3.307148880272855e-05, "loss": 0.3086, "step": 8399 }, { "epoch": 1.3678703741399665, "grad_norm": 0.09795603901147842, "learning_rate": 3.3067005318615916e-05, "loss": 0.3587, "step": 8400 }, { "epoch": 1.3680332206977974, "grad_norm": 0.11190087348222733, "learning_rate": 3.306252154487362e-05, "loss": 0.3148, "step": 8401 }, { "epoch": 1.3681960672556284, "grad_norm": 0.1107340157032013, "learning_rate": 3.305803748166266e-05, "loss": 0.3088, "step": 8402 }, { "epoch": 1.3683589138134593, "grad_norm": 0.11558032035827637, "learning_rate": 3.3053553129144e-05, "loss": 0.3064, "step": 8403 }, { "epoch": 1.3685217603712903, "grad_norm": 0.14052703976631165, "learning_rate": 3.3049068487478656e-05, "loss": 0.3336, "step": 8404 }, { "epoch": 1.368684606929121, "grad_norm": 0.13408276438713074, "learning_rate": 3.3044583556827634e-05, "loss": 0.4445, "step": 8405 }, { "epoch": 1.368847453486952, "grad_norm": 0.15623685717582703, "learning_rate": 3.304009833735197e-05, "loss": 0.4025, "step": 8406 }, { "epoch": 1.3690103000447829, "grad_norm": 0.11645558476448059, "learning_rate": 3.303561282921268e-05, "loss": 0.33, "step": 8407 }, { "epoch": 1.3691731466026136, "grad_norm": 0.11375389248132706, "learning_rate": 3.303112703257082e-05, "loss": 0.3303, "step": 8408 }, { "epoch": 1.3693359931604445, "grad_norm": 0.07745666056871414, "learning_rate": 3.3026640947587436e-05, "loss": 0.3179, "step": 8409 }, { "epoch": 1.3694988397182755, "grad_norm": 0.0844118669629097, "learning_rate": 3.302215457442359e-05, "loss": 0.3533, "step": 8410 }, { "epoch": 1.3696616862761064, "grad_norm": 0.11661950498819351, "learning_rate": 3.301766791324036e-05, "loss": 0.35, "step": 8411 }, { "epoch": 1.3698245328339373, "grad_norm": 0.1138998344540596, "learning_rate": 3.301318096419884e-05, "loss": 0.3638, "step": 8412 }, { "epoch": 1.369987379391768, "grad_norm": 0.12674298882484436, "learning_rate": 3.30086937274601e-05, "loss": 0.3373, "step": 8413 }, { "epoch": 1.370150225949599, "grad_norm": 0.1494358777999878, "learning_rate": 3.300420620318526e-05, "loss": 0.3659, "step": 8414 }, { "epoch": 1.37031307250743, "grad_norm": 0.10970519483089447, "learning_rate": 3.299971839153545e-05, "loss": 0.3404, "step": 8415 }, { "epoch": 1.3704759190652607, "grad_norm": 0.09349238872528076, "learning_rate": 3.299523029267178e-05, "loss": 0.336, "step": 8416 }, { "epoch": 1.3706387656230916, "grad_norm": 0.09623878449201584, "learning_rate": 3.299074190675538e-05, "loss": 0.3505, "step": 8417 }, { "epoch": 1.3708016121809226, "grad_norm": 0.0855538547039032, "learning_rate": 3.298625323394741e-05, "loss": 0.3257, "step": 8418 }, { "epoch": 1.3709644587387535, "grad_norm": 0.147317573428154, "learning_rate": 3.298176427440901e-05, "loss": 0.3482, "step": 8419 }, { "epoch": 1.3711273052965842, "grad_norm": 0.1008078083395958, "learning_rate": 3.2977275028301375e-05, "loss": 0.3199, "step": 8420 }, { "epoch": 1.3712901518544152, "grad_norm": 0.14359402656555176, "learning_rate": 3.297278549578565e-05, "loss": 0.3165, "step": 8421 }, { "epoch": 1.371452998412246, "grad_norm": 0.11323332786560059, "learning_rate": 3.2968295677023054e-05, "loss": 0.3459, "step": 8422 }, { "epoch": 1.3716158449700768, "grad_norm": 0.11301582306623459, "learning_rate": 3.296380557217476e-05, "loss": 0.3149, "step": 8423 }, { "epoch": 1.3717786915279078, "grad_norm": 0.38808777928352356, "learning_rate": 3.295931518140199e-05, "loss": 0.367, "step": 8424 }, { "epoch": 1.3719415380857387, "grad_norm": 0.11590030044317245, "learning_rate": 3.2954824504865956e-05, "loss": 0.2711, "step": 8425 }, { "epoch": 1.3721043846435697, "grad_norm": 0.12251552194356918, "learning_rate": 3.295033354272789e-05, "loss": 0.3219, "step": 8426 }, { "epoch": 1.3722672312014006, "grad_norm": 0.10694136470556259, "learning_rate": 3.2945842295149024e-05, "loss": 0.3004, "step": 8427 }, { "epoch": 1.3724300777592313, "grad_norm": 0.11399400979280472, "learning_rate": 3.294135076229062e-05, "loss": 0.3339, "step": 8428 }, { "epoch": 1.3725929243170623, "grad_norm": 0.1339896023273468, "learning_rate": 3.2936858944313934e-05, "loss": 0.2676, "step": 8429 }, { "epoch": 1.3727557708748932, "grad_norm": 0.11457309871912003, "learning_rate": 3.293236684138023e-05, "loss": 0.3468, "step": 8430 }, { "epoch": 1.372918617432724, "grad_norm": 0.13952341675758362, "learning_rate": 3.2927874453650785e-05, "loss": 0.3373, "step": 8431 }, { "epoch": 1.3730814639905549, "grad_norm": 0.08107377588748932, "learning_rate": 3.2923381781286906e-05, "loss": 0.3063, "step": 8432 }, { "epoch": 1.3732443105483858, "grad_norm": 0.09776850789785385, "learning_rate": 3.2918888824449864e-05, "loss": 0.3465, "step": 8433 }, { "epoch": 1.3734071571062167, "grad_norm": 0.13703034818172455, "learning_rate": 3.2914395583301003e-05, "loss": 0.3307, "step": 8434 }, { "epoch": 1.3735700036640477, "grad_norm": 0.15526139736175537, "learning_rate": 3.2909902058001625e-05, "loss": 0.2976, "step": 8435 }, { "epoch": 1.3737328502218784, "grad_norm": 0.07293584942817688, "learning_rate": 3.290540824871306e-05, "loss": 0.3033, "step": 8436 }, { "epoch": 1.3738956967797094, "grad_norm": 0.14204458892345428, "learning_rate": 3.290091415559666e-05, "loss": 0.3575, "step": 8437 }, { "epoch": 1.37405854333754, "grad_norm": 0.151052325963974, "learning_rate": 3.2896419778813765e-05, "loss": 0.359, "step": 8438 }, { "epoch": 1.374221389895371, "grad_norm": 0.1370162069797516, "learning_rate": 3.289192511852574e-05, "loss": 0.3327, "step": 8439 }, { "epoch": 1.374384236453202, "grad_norm": 0.11305761337280273, "learning_rate": 3.2887430174893955e-05, "loss": 0.2986, "step": 8440 }, { "epoch": 1.374547083011033, "grad_norm": 0.10747375339269638, "learning_rate": 3.288293494807981e-05, "loss": 0.3201, "step": 8441 }, { "epoch": 1.3747099295688638, "grad_norm": 0.14892077445983887, "learning_rate": 3.287843943824467e-05, "loss": 0.3119, "step": 8442 }, { "epoch": 1.3748727761266946, "grad_norm": 0.10107722133398056, "learning_rate": 3.287394364554996e-05, "loss": 0.297, "step": 8443 }, { "epoch": 1.3750356226845255, "grad_norm": 0.12998449802398682, "learning_rate": 3.286944757015708e-05, "loss": 0.3152, "step": 8444 }, { "epoch": 1.3751984692423564, "grad_norm": 0.0990087166428566, "learning_rate": 3.286495121222744e-05, "loss": 0.3295, "step": 8445 }, { "epoch": 1.3753613158001872, "grad_norm": 0.07441246509552002, "learning_rate": 3.28604545719225e-05, "loss": 0.3275, "step": 8446 }, { "epoch": 1.375524162358018, "grad_norm": 0.10216811299324036, "learning_rate": 3.285595764940369e-05, "loss": 0.3216, "step": 8447 }, { "epoch": 1.375687008915849, "grad_norm": 0.0938534215092659, "learning_rate": 3.2851460444832464e-05, "loss": 0.3177, "step": 8448 }, { "epoch": 1.37584985547368, "grad_norm": 0.10399039834737778, "learning_rate": 3.284696295837028e-05, "loss": 0.3353, "step": 8449 }, { "epoch": 1.376012702031511, "grad_norm": 0.11456193029880524, "learning_rate": 3.2842465190178616e-05, "loss": 0.3087, "step": 8450 }, { "epoch": 1.3761755485893417, "grad_norm": 0.11957388371229172, "learning_rate": 3.2837967140418956e-05, "loss": 0.291, "step": 8451 }, { "epoch": 1.3763383951471726, "grad_norm": 0.17997169494628906, "learning_rate": 3.28334688092528e-05, "loss": 0.3576, "step": 8452 }, { "epoch": 1.3765012417050035, "grad_norm": 0.13546989858150482, "learning_rate": 3.282897019684163e-05, "loss": 0.321, "step": 8453 }, { "epoch": 1.3766640882628343, "grad_norm": 0.09878262132406235, "learning_rate": 3.282447130334698e-05, "loss": 0.2992, "step": 8454 }, { "epoch": 1.3768269348206652, "grad_norm": 0.10096263885498047, "learning_rate": 3.281997212893038e-05, "loss": 0.3358, "step": 8455 }, { "epoch": 1.3769897813784961, "grad_norm": 0.09255257248878479, "learning_rate": 3.281547267375333e-05, "loss": 0.3234, "step": 8456 }, { "epoch": 1.377152627936327, "grad_norm": 0.14576737582683563, "learning_rate": 3.2810972937977416e-05, "loss": 0.3244, "step": 8457 }, { "epoch": 1.3773154744941578, "grad_norm": 0.13735252618789673, "learning_rate": 3.280647292176415e-05, "loss": 0.2903, "step": 8458 }, { "epoch": 1.3774783210519888, "grad_norm": 0.13970832526683807, "learning_rate": 3.2801972625275126e-05, "loss": 0.3076, "step": 8459 }, { "epoch": 1.3776411676098197, "grad_norm": 0.1822516769170761, "learning_rate": 3.279747204867191e-05, "loss": 0.3656, "step": 8460 }, { "epoch": 1.3778040141676504, "grad_norm": 0.13380545377731323, "learning_rate": 3.2792971192116095e-05, "loss": 0.3603, "step": 8461 }, { "epoch": 1.3779668607254814, "grad_norm": 0.13489720225334167, "learning_rate": 3.2788470055769257e-05, "loss": 0.3304, "step": 8462 }, { "epoch": 1.3781297072833123, "grad_norm": 0.13166016340255737, "learning_rate": 3.278396863979301e-05, "loss": 0.3448, "step": 8463 }, { "epoch": 1.3782925538411432, "grad_norm": 0.05984525382518768, "learning_rate": 3.277946694434896e-05, "loss": 0.3602, "step": 8464 }, { "epoch": 1.3784554003989742, "grad_norm": 0.09394257515668869, "learning_rate": 3.277496496959875e-05, "loss": 0.3131, "step": 8465 }, { "epoch": 1.378618246956805, "grad_norm": 0.17630980908870697, "learning_rate": 3.2770462715703996e-05, "loss": 0.3283, "step": 8466 }, { "epoch": 1.3787810935146358, "grad_norm": 0.15174497663974762, "learning_rate": 3.276596018282636e-05, "loss": 0.3435, "step": 8467 }, { "epoch": 1.3789439400724668, "grad_norm": 0.15661248564720154, "learning_rate": 3.276145737112748e-05, "loss": 0.3155, "step": 8468 }, { "epoch": 1.3791067866302975, "grad_norm": 0.09407036006450653, "learning_rate": 3.275695428076902e-05, "loss": 0.3527, "step": 8469 }, { "epoch": 1.3792696331881285, "grad_norm": 0.09112722426652908, "learning_rate": 3.275245091191267e-05, "loss": 0.2993, "step": 8470 }, { "epoch": 1.3794324797459594, "grad_norm": 0.2453629970550537, "learning_rate": 3.27479472647201e-05, "loss": 0.3514, "step": 8471 }, { "epoch": 1.3795953263037903, "grad_norm": 0.17533482611179352, "learning_rate": 3.274344333935302e-05, "loss": 0.3116, "step": 8472 }, { "epoch": 1.379758172861621, "grad_norm": 0.10426071286201477, "learning_rate": 3.273893913597311e-05, "loss": 0.334, "step": 8473 }, { "epoch": 1.379921019419452, "grad_norm": 0.11388164013624191, "learning_rate": 3.27344346547421e-05, "loss": 0.2835, "step": 8474 }, { "epoch": 1.380083865977283, "grad_norm": 0.08992429822683334, "learning_rate": 3.272992989582171e-05, "loss": 0.2804, "step": 8475 }, { "epoch": 1.3802467125351137, "grad_norm": 0.0832461267709732, "learning_rate": 3.272542485937369e-05, "loss": 0.3233, "step": 8476 }, { "epoch": 1.3804095590929446, "grad_norm": 0.10262557119131088, "learning_rate": 3.272091954555977e-05, "loss": 0.3572, "step": 8477 }, { "epoch": 1.3805724056507755, "grad_norm": 0.08254161477088928, "learning_rate": 3.2716413954541696e-05, "loss": 0.3691, "step": 8478 }, { "epoch": 1.3807352522086065, "grad_norm": 0.21631255745887756, "learning_rate": 3.271190808648125e-05, "loss": 0.3657, "step": 8479 }, { "epoch": 1.3808980987664374, "grad_norm": 0.10192310810089111, "learning_rate": 3.270740194154019e-05, "loss": 0.3162, "step": 8480 }, { "epoch": 1.3810609453242682, "grad_norm": 0.12381087243556976, "learning_rate": 3.2702895519880317e-05, "loss": 0.3113, "step": 8481 }, { "epoch": 1.381223791882099, "grad_norm": 0.11548251658678055, "learning_rate": 3.269838882166341e-05, "loss": 0.2959, "step": 8482 }, { "epoch": 1.38138663843993, "grad_norm": 0.09133439511060715, "learning_rate": 3.269388184705128e-05, "loss": 0.3076, "step": 8483 }, { "epoch": 1.3815494849977608, "grad_norm": 0.13439421355724335, "learning_rate": 3.2689374596205744e-05, "loss": 0.3062, "step": 8484 }, { "epoch": 1.3817123315555917, "grad_norm": 0.10787692666053772, "learning_rate": 3.268486706928862e-05, "loss": 0.284, "step": 8485 }, { "epoch": 1.3818751781134226, "grad_norm": 0.11784107238054276, "learning_rate": 3.2680359266461746e-05, "loss": 0.3218, "step": 8486 }, { "epoch": 1.3820380246712536, "grad_norm": 0.05729785934090614, "learning_rate": 3.267585118788696e-05, "loss": 0.3622, "step": 8487 }, { "epoch": 1.3822008712290845, "grad_norm": 0.12032128125429153, "learning_rate": 3.267134283372613e-05, "loss": 0.3792, "step": 8488 }, { "epoch": 1.3823637177869152, "grad_norm": 0.12431509792804718, "learning_rate": 3.2666834204141097e-05, "loss": 0.3426, "step": 8489 }, { "epoch": 1.3825265643447462, "grad_norm": 0.11671575158834457, "learning_rate": 3.266232529929375e-05, "loss": 0.3099, "step": 8490 }, { "epoch": 1.382689410902577, "grad_norm": 0.15049585700035095, "learning_rate": 3.2657816119345954e-05, "loss": 0.3585, "step": 8491 }, { "epoch": 1.3828522574604079, "grad_norm": 0.11025846749544144, "learning_rate": 3.2653306664459635e-05, "loss": 0.2972, "step": 8492 }, { "epoch": 1.3830151040182388, "grad_norm": 0.11149679869413376, "learning_rate": 3.264879693479667e-05, "loss": 0.3077, "step": 8493 }, { "epoch": 1.3831779505760697, "grad_norm": 0.08295775949954987, "learning_rate": 3.264428693051898e-05, "loss": 0.325, "step": 8494 }, { "epoch": 1.3833407971339007, "grad_norm": 0.13668294250965118, "learning_rate": 3.263977665178848e-05, "loss": 0.3162, "step": 8495 }, { "epoch": 1.3835036436917314, "grad_norm": 0.10129299759864807, "learning_rate": 3.2635266098767114e-05, "loss": 0.3113, "step": 8496 }, { "epoch": 1.3836664902495623, "grad_norm": 0.13798600435256958, "learning_rate": 3.263075527161681e-05, "loss": 0.3151, "step": 8497 }, { "epoch": 1.3838293368073933, "grad_norm": 0.11598284542560577, "learning_rate": 3.262624417049954e-05, "loss": 0.3024, "step": 8498 }, { "epoch": 1.383992183365224, "grad_norm": 0.09452655911445618, "learning_rate": 3.2621732795577255e-05, "loss": 0.3247, "step": 8499 }, { "epoch": 1.384155029923055, "grad_norm": 0.09634251147508621, "learning_rate": 3.261722114701192e-05, "loss": 0.2977, "step": 8500 }, { "epoch": 1.384317876480886, "grad_norm": 0.11316970735788345, "learning_rate": 3.261270922496553e-05, "loss": 0.3377, "step": 8501 }, { "epoch": 1.3844807230387168, "grad_norm": 0.13050702214241028, "learning_rate": 3.260819702960008e-05, "loss": 0.3295, "step": 8502 }, { "epoch": 1.3846435695965478, "grad_norm": 0.09734152257442474, "learning_rate": 3.260368456107755e-05, "loss": 0.3117, "step": 8503 }, { "epoch": 1.3848064161543785, "grad_norm": 0.144313782453537, "learning_rate": 3.259917181955996e-05, "loss": 0.3234, "step": 8504 }, { "epoch": 1.3849692627122094, "grad_norm": 0.10535573959350586, "learning_rate": 3.259465880520934e-05, "loss": 0.3134, "step": 8505 }, { "epoch": 1.3851321092700404, "grad_norm": 0.11179589480161667, "learning_rate": 3.2590145518187714e-05, "loss": 0.3165, "step": 8506 }, { "epoch": 1.385294955827871, "grad_norm": 0.10786767303943634, "learning_rate": 3.258563195865712e-05, "loss": 0.2983, "step": 8507 }, { "epoch": 1.385457802385702, "grad_norm": 0.1440834254026413, "learning_rate": 3.258111812677962e-05, "loss": 0.3007, "step": 8508 }, { "epoch": 1.385620648943533, "grad_norm": 0.1330927014350891, "learning_rate": 3.2576604022717254e-05, "loss": 0.3258, "step": 8509 }, { "epoch": 1.385783495501364, "grad_norm": 0.07287335395812988, "learning_rate": 3.257208964663212e-05, "loss": 0.3103, "step": 8510 }, { "epoch": 1.3859463420591946, "grad_norm": 0.1302865743637085, "learning_rate": 3.256757499868626e-05, "loss": 0.3488, "step": 8511 }, { "epoch": 1.3861091886170256, "grad_norm": 0.12820279598236084, "learning_rate": 3.25630600790418e-05, "loss": 0.3374, "step": 8512 }, { "epoch": 1.3862720351748565, "grad_norm": 0.1216789036989212, "learning_rate": 3.255854488786081e-05, "loss": 0.3606, "step": 8513 }, { "epoch": 1.3864348817326873, "grad_norm": 0.11878103017807007, "learning_rate": 3.2554029425305434e-05, "loss": 0.3022, "step": 8514 }, { "epoch": 1.3865977282905182, "grad_norm": 0.11593399196863174, "learning_rate": 3.254951369153775e-05, "loss": 0.3081, "step": 8515 }, { "epoch": 1.3867605748483491, "grad_norm": 0.12369164824485779, "learning_rate": 3.2544997686719906e-05, "loss": 0.2931, "step": 8516 }, { "epoch": 1.38692342140618, "grad_norm": 0.11285579949617386, "learning_rate": 3.254048141101404e-05, "loss": 0.3194, "step": 8517 }, { "epoch": 1.387086267964011, "grad_norm": 0.13375720381736755, "learning_rate": 3.2535964864582314e-05, "loss": 0.3323, "step": 8518 }, { "epoch": 1.3872491145218417, "grad_norm": 0.09327518939971924, "learning_rate": 3.253144804758686e-05, "loss": 0.2867, "step": 8519 }, { "epoch": 1.3874119610796727, "grad_norm": 0.10189314186573029, "learning_rate": 3.252693096018986e-05, "loss": 0.2938, "step": 8520 }, { "epoch": 1.3875748076375036, "grad_norm": 0.10898496955633163, "learning_rate": 3.252241360255348e-05, "loss": 0.3169, "step": 8521 }, { "epoch": 1.3877376541953343, "grad_norm": 0.1133275255560875, "learning_rate": 3.251789597483992e-05, "loss": 0.3079, "step": 8522 }, { "epoch": 1.3879005007531653, "grad_norm": 0.10366314649581909, "learning_rate": 3.2513378077211376e-05, "loss": 0.2923, "step": 8523 }, { "epoch": 1.3880633473109962, "grad_norm": 0.09795588999986649, "learning_rate": 3.2508859909830036e-05, "loss": 0.3293, "step": 8524 }, { "epoch": 1.3882261938688272, "grad_norm": 0.08336690068244934, "learning_rate": 3.2504341472858134e-05, "loss": 0.3637, "step": 8525 }, { "epoch": 1.3883890404266581, "grad_norm": 0.12402059137821198, "learning_rate": 3.2499822766457885e-05, "loss": 0.311, "step": 8526 }, { "epoch": 1.3885518869844888, "grad_norm": 0.12711386382579803, "learning_rate": 3.2495303790791534e-05, "loss": 0.3242, "step": 8527 }, { "epoch": 1.3887147335423198, "grad_norm": 0.0963926613330841, "learning_rate": 3.2490784546021324e-05, "loss": 0.32, "step": 8528 }, { "epoch": 1.3888775801001505, "grad_norm": 0.13548515737056732, "learning_rate": 3.24862650323095e-05, "loss": 0.3363, "step": 8529 }, { "epoch": 1.3890404266579814, "grad_norm": 0.0743439644575119, "learning_rate": 3.2481745249818335e-05, "loss": 0.3533, "step": 8530 }, { "epoch": 1.3892032732158124, "grad_norm": 0.09473580121994019, "learning_rate": 3.24772251987101e-05, "loss": 0.2903, "step": 8531 }, { "epoch": 1.3893661197736433, "grad_norm": 0.11376278847455978, "learning_rate": 3.247270487914707e-05, "loss": 0.3221, "step": 8532 }, { "epoch": 1.3895289663314743, "grad_norm": 0.10733665525913239, "learning_rate": 3.246818429129155e-05, "loss": 0.3319, "step": 8533 }, { "epoch": 1.389691812889305, "grad_norm": 0.08322536200284958, "learning_rate": 3.246366343530585e-05, "loss": 0.3495, "step": 8534 }, { "epoch": 1.389854659447136, "grad_norm": 0.13271461427211761, "learning_rate": 3.245914231135226e-05, "loss": 0.3397, "step": 8535 }, { "epoch": 1.3900175060049669, "grad_norm": 0.06805124878883362, "learning_rate": 3.245462091959311e-05, "loss": 0.3128, "step": 8536 }, { "epoch": 1.3901803525627976, "grad_norm": 0.07115628570318222, "learning_rate": 3.2450099260190734e-05, "loss": 0.3154, "step": 8537 }, { "epoch": 1.3903431991206285, "grad_norm": 0.09315421432256699, "learning_rate": 3.2445577333307475e-05, "loss": 0.3633, "step": 8538 }, { "epoch": 1.3905060456784595, "grad_norm": 0.11781881004571915, "learning_rate": 3.2441055139105694e-05, "loss": 0.3047, "step": 8539 }, { "epoch": 1.3906688922362904, "grad_norm": 0.12422754615545273, "learning_rate": 3.243653267774773e-05, "loss": 0.3394, "step": 8540 }, { "epoch": 1.3908317387941214, "grad_norm": 0.10251989215612411, "learning_rate": 3.2432009949395966e-05, "loss": 0.3179, "step": 8541 }, { "epoch": 1.390994585351952, "grad_norm": 0.11392978578805923, "learning_rate": 3.242748695421277e-05, "loss": 0.3222, "step": 8542 }, { "epoch": 1.391157431909783, "grad_norm": 0.13036668300628662, "learning_rate": 3.242296369236055e-05, "loss": 0.3121, "step": 8543 }, { "epoch": 1.391320278467614, "grad_norm": 0.15373829007148743, "learning_rate": 3.241844016400168e-05, "loss": 0.3369, "step": 8544 }, { "epoch": 1.3914831250254447, "grad_norm": 0.11814811825752258, "learning_rate": 3.2413916369298595e-05, "loss": 0.3231, "step": 8545 }, { "epoch": 1.3916459715832756, "grad_norm": 0.139099583029747, "learning_rate": 3.240939230841369e-05, "loss": 0.2828, "step": 8546 }, { "epoch": 1.3918088181411066, "grad_norm": 0.10591995716094971, "learning_rate": 3.2404867981509404e-05, "loss": 0.3328, "step": 8547 }, { "epoch": 1.3919716646989375, "grad_norm": 0.11235430091619492, "learning_rate": 3.2400343388748176e-05, "loss": 0.3244, "step": 8548 }, { "epoch": 1.3921345112567682, "grad_norm": 0.10379144549369812, "learning_rate": 3.2395818530292446e-05, "loss": 0.3295, "step": 8549 }, { "epoch": 1.3922973578145992, "grad_norm": 0.11874721199274063, "learning_rate": 3.239129340630468e-05, "loss": 0.3337, "step": 8550 }, { "epoch": 1.3924602043724301, "grad_norm": 0.12636485695838928, "learning_rate": 3.238676801694732e-05, "loss": 0.3284, "step": 8551 }, { "epoch": 1.3926230509302608, "grad_norm": 0.08289530873298645, "learning_rate": 3.238224236238288e-05, "loss": 0.3133, "step": 8552 }, { "epoch": 1.3927858974880918, "grad_norm": 0.0959138348698616, "learning_rate": 3.23777164427738e-05, "loss": 0.3175, "step": 8553 }, { "epoch": 1.3929487440459227, "grad_norm": 0.10031089931726456, "learning_rate": 3.23731902582826e-05, "loss": 0.3193, "step": 8554 }, { "epoch": 1.3931115906037537, "grad_norm": 0.2541959285736084, "learning_rate": 3.236866380907178e-05, "loss": 0.3624, "step": 8555 }, { "epoch": 1.3932744371615846, "grad_norm": 0.11391039937734604, "learning_rate": 3.2364137095303856e-05, "loss": 0.3149, "step": 8556 }, { "epoch": 1.3934372837194153, "grad_norm": 0.10360821336507797, "learning_rate": 3.235961011714134e-05, "loss": 0.3269, "step": 8557 }, { "epoch": 1.3936001302772463, "grad_norm": 0.12552914023399353, "learning_rate": 3.235508287474677e-05, "loss": 0.3286, "step": 8558 }, { "epoch": 1.3937629768350772, "grad_norm": 0.09107133746147156, "learning_rate": 3.23505553682827e-05, "loss": 0.3168, "step": 8559 }, { "epoch": 1.393925823392908, "grad_norm": 0.0888267233967781, "learning_rate": 3.234602759791166e-05, "loss": 0.2929, "step": 8560 }, { "epoch": 1.3940886699507389, "grad_norm": 0.09630149602890015, "learning_rate": 3.234149956379622e-05, "loss": 0.3212, "step": 8561 }, { "epoch": 1.3942515165085698, "grad_norm": 0.10270296037197113, "learning_rate": 3.233697126609895e-05, "loss": 0.2754, "step": 8562 }, { "epoch": 1.3944143630664008, "grad_norm": 0.1202947199344635, "learning_rate": 3.233244270498244e-05, "loss": 0.3327, "step": 8563 }, { "epoch": 1.3945772096242317, "grad_norm": 0.10795648396015167, "learning_rate": 3.232791388060926e-05, "loss": 0.3486, "step": 8564 }, { "epoch": 1.3947400561820624, "grad_norm": 0.10163564234972, "learning_rate": 3.232338479314202e-05, "loss": 0.3274, "step": 8565 }, { "epoch": 1.3949029027398934, "grad_norm": 0.08138538151979446, "learning_rate": 3.231885544274332e-05, "loss": 0.301, "step": 8566 }, { "epoch": 1.395065749297724, "grad_norm": 0.13244406878948212, "learning_rate": 3.231432582957578e-05, "loss": 0.3095, "step": 8567 }, { "epoch": 1.395228595855555, "grad_norm": 0.14208592474460602, "learning_rate": 3.230979595380203e-05, "loss": 0.3149, "step": 8568 }, { "epoch": 1.395391442413386, "grad_norm": 0.11471560597419739, "learning_rate": 3.230526581558471e-05, "loss": 0.289, "step": 8569 }, { "epoch": 1.395554288971217, "grad_norm": 0.15322239696979523, "learning_rate": 3.230073541508646e-05, "loss": 0.3169, "step": 8570 }, { "epoch": 1.3957171355290479, "grad_norm": 0.07140267640352249, "learning_rate": 3.2296204752469927e-05, "loss": 0.3021, "step": 8571 }, { "epoch": 1.3958799820868786, "grad_norm": 0.1113639548420906, "learning_rate": 3.229167382789779e-05, "loss": 0.3157, "step": 8572 }, { "epoch": 1.3960428286447095, "grad_norm": 0.14165085554122925, "learning_rate": 3.228714264153271e-05, "loss": 0.3611, "step": 8573 }, { "epoch": 1.3962056752025405, "grad_norm": 0.11415015906095505, "learning_rate": 3.2282611193537384e-05, "loss": 0.3418, "step": 8574 }, { "epoch": 1.3963685217603712, "grad_norm": 0.09577735513448715, "learning_rate": 3.2278079484074485e-05, "loss": 0.2927, "step": 8575 }, { "epoch": 1.3965313683182021, "grad_norm": 0.13876520097255707, "learning_rate": 3.227354751330674e-05, "loss": 0.3236, "step": 8576 }, { "epoch": 1.396694214876033, "grad_norm": 0.09681668132543564, "learning_rate": 3.226901528139684e-05, "loss": 0.3722, "step": 8577 }, { "epoch": 1.396857061433864, "grad_norm": 0.08584416657686234, "learning_rate": 3.226448278850751e-05, "loss": 0.3508, "step": 8578 }, { "epoch": 1.397019907991695, "grad_norm": 0.18564458191394806, "learning_rate": 3.2259950034801497e-05, "loss": 0.3313, "step": 8579 }, { "epoch": 1.3971827545495257, "grad_norm": 0.13180366158485413, "learning_rate": 3.225541702044152e-05, "loss": 0.2996, "step": 8580 }, { "epoch": 1.3973456011073566, "grad_norm": 0.11857571452856064, "learning_rate": 3.225088374559033e-05, "loss": 0.3316, "step": 8581 }, { "epoch": 1.3975084476651876, "grad_norm": 0.0756792277097702, "learning_rate": 3.224635021041069e-05, "loss": 0.3153, "step": 8582 }, { "epoch": 1.3976712942230183, "grad_norm": 0.11699234694242477, "learning_rate": 3.2241816415065366e-05, "loss": 0.3434, "step": 8583 }, { "epoch": 1.3978341407808492, "grad_norm": 0.10636379569768906, "learning_rate": 3.223728235971713e-05, "loss": 0.3296, "step": 8584 }, { "epoch": 1.3979969873386802, "grad_norm": 0.11376652121543884, "learning_rate": 3.2232748044528785e-05, "loss": 0.3121, "step": 8585 }, { "epoch": 1.398159833896511, "grad_norm": 0.08520616590976715, "learning_rate": 3.222821346966311e-05, "loss": 0.3698, "step": 8586 }, { "epoch": 1.3983226804543418, "grad_norm": 0.08598270267248154, "learning_rate": 3.222367863528292e-05, "loss": 0.2853, "step": 8587 }, { "epoch": 1.3984855270121728, "grad_norm": 0.13145874440670013, "learning_rate": 3.2219143541551025e-05, "loss": 0.2939, "step": 8588 }, { "epoch": 1.3986483735700037, "grad_norm": 0.09830012917518616, "learning_rate": 3.2214608188630236e-05, "loss": 0.3234, "step": 8589 }, { "epoch": 1.3988112201278344, "grad_norm": 0.08275850117206573, "learning_rate": 3.221007257668341e-05, "loss": 0.2998, "step": 8590 }, { "epoch": 1.3989740666856654, "grad_norm": 0.18689756095409393, "learning_rate": 3.220553670587337e-05, "loss": 0.3242, "step": 8591 }, { "epoch": 1.3991369132434963, "grad_norm": 0.05922093987464905, "learning_rate": 3.220100057636297e-05, "loss": 0.3521, "step": 8592 }, { "epoch": 1.3992997598013273, "grad_norm": 0.14099638164043427, "learning_rate": 3.2196464188315086e-05, "loss": 0.3246, "step": 8593 }, { "epoch": 1.3994626063591582, "grad_norm": 0.09685862064361572, "learning_rate": 3.219192754189257e-05, "loss": 0.3295, "step": 8594 }, { "epoch": 1.399625452916989, "grad_norm": 0.17740096151828766, "learning_rate": 3.2187390637258294e-05, "loss": 0.3157, "step": 8595 }, { "epoch": 1.3997882994748199, "grad_norm": 0.12008257955312729, "learning_rate": 3.2182853474575184e-05, "loss": 0.3213, "step": 8596 }, { "epoch": 1.3999511460326508, "grad_norm": 0.13277658820152283, "learning_rate": 3.21783160540061e-05, "loss": 0.3129, "step": 8597 }, { "epoch": 1.4001139925904815, "grad_norm": 0.08479292690753937, "learning_rate": 3.2173778375713966e-05, "loss": 0.305, "step": 8598 }, { "epoch": 1.4002768391483125, "grad_norm": 0.11018712818622589, "learning_rate": 3.2169240439861695e-05, "loss": 0.2931, "step": 8599 }, { "epoch": 1.4004396857061434, "grad_norm": 0.12377660721540451, "learning_rate": 3.216470224661222e-05, "loss": 0.3265, "step": 8600 }, { "epoch": 1.4006025322639744, "grad_norm": 0.1061646044254303, "learning_rate": 3.216016379612846e-05, "loss": 0.3212, "step": 8601 }, { "epoch": 1.400765378821805, "grad_norm": 0.09650072455406189, "learning_rate": 3.2155625088573364e-05, "loss": 0.3279, "step": 8602 }, { "epoch": 1.400928225379636, "grad_norm": 0.13723336160182953, "learning_rate": 3.2151086124109896e-05, "loss": 0.3115, "step": 8603 }, { "epoch": 1.401091071937467, "grad_norm": 0.11733657121658325, "learning_rate": 3.214654690290101e-05, "loss": 0.3306, "step": 8604 }, { "epoch": 1.4012539184952977, "grad_norm": 0.1419731229543686, "learning_rate": 3.214200742510968e-05, "loss": 0.3069, "step": 8605 }, { "epoch": 1.4014167650531286, "grad_norm": 0.11936108022928238, "learning_rate": 3.213746769089888e-05, "loss": 0.3501, "step": 8606 }, { "epoch": 1.4015796116109596, "grad_norm": 0.09960450977087021, "learning_rate": 3.21329277004316e-05, "loss": 0.3128, "step": 8607 }, { "epoch": 1.4017424581687905, "grad_norm": 0.13896788656711578, "learning_rate": 3.212838745387086e-05, "loss": 0.3188, "step": 8608 }, { "epoch": 1.4019053047266214, "grad_norm": 0.12404665350914001, "learning_rate": 3.2123846951379646e-05, "loss": 0.3188, "step": 8609 }, { "epoch": 1.4020681512844522, "grad_norm": 0.1199253723025322, "learning_rate": 3.2119306193120983e-05, "loss": 0.388, "step": 8610 }, { "epoch": 1.4022309978422831, "grad_norm": 0.13736997544765472, "learning_rate": 3.21147651792579e-05, "loss": 0.3628, "step": 8611 }, { "epoch": 1.402393844400114, "grad_norm": 0.10871170461177826, "learning_rate": 3.211022390995343e-05, "loss": 0.2934, "step": 8612 }, { "epoch": 1.4025566909579448, "grad_norm": 0.10228833556175232, "learning_rate": 3.210568238537061e-05, "loss": 0.307, "step": 8613 }, { "epoch": 1.4027195375157757, "grad_norm": 0.12301763147115707, "learning_rate": 3.210114060567251e-05, "loss": 0.3301, "step": 8614 }, { "epoch": 1.4028823840736067, "grad_norm": 0.10356549173593521, "learning_rate": 3.209659857102219e-05, "loss": 0.3478, "step": 8615 }, { "epoch": 1.4030452306314376, "grad_norm": 0.11805350333452225, "learning_rate": 3.209205628158272e-05, "loss": 0.3285, "step": 8616 }, { "epoch": 1.4032080771892685, "grad_norm": 0.10959039628505707, "learning_rate": 3.208751373751717e-05, "loss": 0.3011, "step": 8617 }, { "epoch": 1.4033709237470993, "grad_norm": 0.1230599656701088, "learning_rate": 3.208297093898864e-05, "loss": 0.3701, "step": 8618 }, { "epoch": 1.4035337703049302, "grad_norm": 0.1137784868478775, "learning_rate": 3.2078427886160253e-05, "loss": 0.2776, "step": 8619 }, { "epoch": 1.403696616862761, "grad_norm": 0.16536371409893036, "learning_rate": 3.207388457919508e-05, "loss": 0.3218, "step": 8620 }, { "epoch": 1.4038594634205919, "grad_norm": 0.11614199727773666, "learning_rate": 3.2069341018256264e-05, "loss": 0.3237, "step": 8621 }, { "epoch": 1.4040223099784228, "grad_norm": 0.12153372168540955, "learning_rate": 3.2064797203506927e-05, "loss": 0.3032, "step": 8622 }, { "epoch": 1.4041851565362538, "grad_norm": 0.11919062584638596, "learning_rate": 3.20602531351102e-05, "loss": 0.3098, "step": 8623 }, { "epoch": 1.4043480030940847, "grad_norm": 0.1087343692779541, "learning_rate": 3.205570881322923e-05, "loss": 0.3325, "step": 8624 }, { "epoch": 1.4045108496519154, "grad_norm": 0.8986861705780029, "learning_rate": 3.205116423802718e-05, "loss": 0.3379, "step": 8625 }, { "epoch": 1.4046736962097464, "grad_norm": 0.11775419116020203, "learning_rate": 3.20466194096672e-05, "loss": 0.3013, "step": 8626 }, { "epoch": 1.4048365427675773, "grad_norm": 0.10644635558128357, "learning_rate": 3.2042074328312476e-05, "loss": 0.2958, "step": 8627 }, { "epoch": 1.404999389325408, "grad_norm": 0.10286280512809753, "learning_rate": 3.2037528994126184e-05, "loss": 0.3441, "step": 8628 }, { "epoch": 1.405162235883239, "grad_norm": 0.13435089588165283, "learning_rate": 3.203298340727152e-05, "loss": 0.3325, "step": 8629 }, { "epoch": 1.40532508244107, "grad_norm": 0.14990246295928955, "learning_rate": 3.202843756791168e-05, "loss": 0.3269, "step": 8630 }, { "epoch": 1.4054879289989008, "grad_norm": 0.11160799115896225, "learning_rate": 3.2023891476209864e-05, "loss": 0.2936, "step": 8631 }, { "epoch": 1.4056507755567318, "grad_norm": 0.11630240082740784, "learning_rate": 3.2019345132329304e-05, "loss": 0.3049, "step": 8632 }, { "epoch": 1.4058136221145625, "grad_norm": 0.10050062090158463, "learning_rate": 3.2014798536433235e-05, "loss": 0.3183, "step": 8633 }, { "epoch": 1.4059764686723935, "grad_norm": 0.12190332263708115, "learning_rate": 3.201025168868487e-05, "loss": 0.2875, "step": 8634 }, { "epoch": 1.4061393152302244, "grad_norm": 0.12956781685352325, "learning_rate": 3.2005704589247464e-05, "loss": 0.3421, "step": 8635 }, { "epoch": 1.4063021617880551, "grad_norm": 0.13637977838516235, "learning_rate": 3.2001157238284277e-05, "loss": 0.3204, "step": 8636 }, { "epoch": 1.406465008345886, "grad_norm": 0.1314207911491394, "learning_rate": 3.199660963595857e-05, "loss": 0.3152, "step": 8637 }, { "epoch": 1.406627854903717, "grad_norm": 0.11353262513875961, "learning_rate": 3.1992061782433615e-05, "loss": 0.3343, "step": 8638 }, { "epoch": 1.406790701461548, "grad_norm": 0.19502952694892883, "learning_rate": 3.1987513677872696e-05, "loss": 0.3415, "step": 8639 }, { "epoch": 1.4069535480193787, "grad_norm": 0.10408427566289902, "learning_rate": 3.1982965322439096e-05, "loss": 0.3297, "step": 8640 }, { "epoch": 1.4071163945772096, "grad_norm": 0.15264515578746796, "learning_rate": 3.1978416716296125e-05, "loss": 0.3103, "step": 8641 }, { "epoch": 1.4072792411350405, "grad_norm": 0.10908644646406174, "learning_rate": 3.1973867859607075e-05, "loss": 0.3356, "step": 8642 }, { "epoch": 1.4074420876928713, "grad_norm": 0.07877050340175629, "learning_rate": 3.196931875253529e-05, "loss": 0.3065, "step": 8643 }, { "epoch": 1.4076049342507022, "grad_norm": 0.13090553879737854, "learning_rate": 3.1964769395244063e-05, "loss": 0.3257, "step": 8644 }, { "epoch": 1.4077677808085332, "grad_norm": 0.1264023780822754, "learning_rate": 3.1960219787896764e-05, "loss": 0.3383, "step": 8645 }, { "epoch": 1.407930627366364, "grad_norm": 0.12228962779045105, "learning_rate": 3.195566993065672e-05, "loss": 0.3695, "step": 8646 }, { "epoch": 1.408093473924195, "grad_norm": 0.1300865113735199, "learning_rate": 3.195111982368728e-05, "loss": 0.3739, "step": 8647 }, { "epoch": 1.4082563204820258, "grad_norm": 0.09187734127044678, "learning_rate": 3.194656946715181e-05, "loss": 0.2952, "step": 8648 }, { "epoch": 1.4084191670398567, "grad_norm": 0.10893245041370392, "learning_rate": 3.1942018861213684e-05, "loss": 0.2964, "step": 8649 }, { "epoch": 1.4085820135976876, "grad_norm": 0.09552183747291565, "learning_rate": 3.1937468006036285e-05, "loss": 0.3584, "step": 8650 }, { "epoch": 1.4087448601555184, "grad_norm": 0.16398045420646667, "learning_rate": 3.1932916901783e-05, "loss": 0.3263, "step": 8651 }, { "epoch": 1.4089077067133493, "grad_norm": 0.11862363666296005, "learning_rate": 3.1928365548617224e-05, "loss": 0.3159, "step": 8652 }, { "epoch": 1.4090705532711802, "grad_norm": 0.1115160882472992, "learning_rate": 3.192381394670237e-05, "loss": 0.3203, "step": 8653 }, { "epoch": 1.4092333998290112, "grad_norm": 0.13806092739105225, "learning_rate": 3.191926209620185e-05, "loss": 0.3383, "step": 8654 }, { "epoch": 1.4093962463868421, "grad_norm": 0.14132064580917358, "learning_rate": 3.191470999727909e-05, "loss": 0.3501, "step": 8655 }, { "epoch": 1.4095590929446729, "grad_norm": 0.11985791474580765, "learning_rate": 3.191015765009752e-05, "loss": 0.3554, "step": 8656 }, { "epoch": 1.4097219395025038, "grad_norm": 0.11188704520463943, "learning_rate": 3.190560505482059e-05, "loss": 0.3083, "step": 8657 }, { "epoch": 1.4098847860603345, "grad_norm": 0.065605029463768, "learning_rate": 3.190105221161175e-05, "loss": 0.2834, "step": 8658 }, { "epoch": 1.4100476326181655, "grad_norm": 0.11018634587526321, "learning_rate": 3.189649912063446e-05, "loss": 0.2992, "step": 8659 }, { "epoch": 1.4102104791759964, "grad_norm": 0.10569926351308823, "learning_rate": 3.1891945782052186e-05, "loss": 0.3105, "step": 8660 }, { "epoch": 1.4103733257338273, "grad_norm": 0.15004034340381622, "learning_rate": 3.1887392196028415e-05, "loss": 0.3302, "step": 8661 }, { "epoch": 1.4105361722916583, "grad_norm": 0.38109198212623596, "learning_rate": 3.188283836272662e-05, "loss": 0.3231, "step": 8662 }, { "epoch": 1.410699018849489, "grad_norm": 0.11536628007888794, "learning_rate": 3.1878284282310314e-05, "loss": 0.3343, "step": 8663 }, { "epoch": 1.41086186540732, "grad_norm": 0.1693364977836609, "learning_rate": 3.187372995494299e-05, "loss": 0.3549, "step": 8664 }, { "epoch": 1.411024711965151, "grad_norm": 0.145134836435318, "learning_rate": 3.1869175380788176e-05, "loss": 0.3351, "step": 8665 }, { "epoch": 1.4111875585229816, "grad_norm": 0.1019844338297844, "learning_rate": 3.186462056000937e-05, "loss": 0.3289, "step": 8666 }, { "epoch": 1.4113504050808126, "grad_norm": 0.12285030633211136, "learning_rate": 3.186006549277014e-05, "loss": 0.2845, "step": 8667 }, { "epoch": 1.4115132516386435, "grad_norm": 0.08264219015836716, "learning_rate": 3.185551017923398e-05, "loss": 0.3331, "step": 8668 }, { "epoch": 1.4116760981964744, "grad_norm": 0.18572795391082764, "learning_rate": 3.1850954619564477e-05, "loss": 0.2994, "step": 8669 }, { "epoch": 1.4118389447543054, "grad_norm": 0.09812522679567337, "learning_rate": 3.184639881392518e-05, "loss": 0.3618, "step": 8670 }, { "epoch": 1.412001791312136, "grad_norm": 0.1303015947341919, "learning_rate": 3.184184276247965e-05, "loss": 0.396, "step": 8671 }, { "epoch": 1.412164637869967, "grad_norm": 0.11344275623559952, "learning_rate": 3.183728646539147e-05, "loss": 0.3151, "step": 8672 }, { "epoch": 1.412327484427798, "grad_norm": 0.11285215616226196, "learning_rate": 3.183272992282421e-05, "loss": 0.2928, "step": 8673 }, { "epoch": 1.4124903309856287, "grad_norm": 0.13218508660793304, "learning_rate": 3.1828173134941484e-05, "loss": 0.356, "step": 8674 }, { "epoch": 1.4126531775434596, "grad_norm": 0.11889689415693283, "learning_rate": 3.182361610190688e-05, "loss": 0.3365, "step": 8675 }, { "epoch": 1.4128160241012906, "grad_norm": 0.12689194083213806, "learning_rate": 3.1819058823884025e-05, "loss": 0.3342, "step": 8676 }, { "epoch": 1.4129788706591215, "grad_norm": 0.17261768877506256, "learning_rate": 3.1814501301036514e-05, "loss": 0.3297, "step": 8677 }, { "epoch": 1.4131417172169523, "grad_norm": 0.1527087539434433, "learning_rate": 3.1809943533527994e-05, "loss": 0.3234, "step": 8678 }, { "epoch": 1.4133045637747832, "grad_norm": 0.09546882659196854, "learning_rate": 3.18053855215221e-05, "loss": 0.3019, "step": 8679 }, { "epoch": 1.4134674103326141, "grad_norm": 0.13053002953529358, "learning_rate": 3.180082726518248e-05, "loss": 0.3278, "step": 8680 }, { "epoch": 1.4136302568904449, "grad_norm": 0.10879544168710709, "learning_rate": 3.1796268764672774e-05, "loss": 0.3141, "step": 8681 }, { "epoch": 1.4137931034482758, "grad_norm": 0.10974185168743134, "learning_rate": 3.179171002015666e-05, "loss": 0.308, "step": 8682 }, { "epoch": 1.4139559500061067, "grad_norm": 0.0830536037683487, "learning_rate": 3.178715103179782e-05, "loss": 0.308, "step": 8683 }, { "epoch": 1.4141187965639377, "grad_norm": 0.1607673317193985, "learning_rate": 3.178259179975991e-05, "loss": 0.352, "step": 8684 }, { "epoch": 1.4142816431217686, "grad_norm": 0.11248648166656494, "learning_rate": 3.177803232420664e-05, "loss": 0.3512, "step": 8685 }, { "epoch": 1.4144444896795993, "grad_norm": 0.13420024514198303, "learning_rate": 3.1773472605301704e-05, "loss": 0.3592, "step": 8686 }, { "epoch": 1.4146073362374303, "grad_norm": 0.14801214635372162, "learning_rate": 3.1768912643208807e-05, "loss": 0.318, "step": 8687 }, { "epoch": 1.4147701827952612, "grad_norm": 0.11404596269130707, "learning_rate": 3.176435243809165e-05, "loss": 0.3072, "step": 8688 }, { "epoch": 1.414933029353092, "grad_norm": 0.15296833217144012, "learning_rate": 3.175979199011399e-05, "loss": 0.3334, "step": 8689 }, { "epoch": 1.415095875910923, "grad_norm": 0.14449645578861237, "learning_rate": 3.175523129943954e-05, "loss": 0.3549, "step": 8690 }, { "epoch": 1.4152587224687538, "grad_norm": 0.07657190412282944, "learning_rate": 3.175067036623205e-05, "loss": 0.3177, "step": 8691 }, { "epoch": 1.4154215690265848, "grad_norm": 0.10152092576026917, "learning_rate": 3.174610919065527e-05, "loss": 0.2922, "step": 8692 }, { "epoch": 1.4155844155844157, "grad_norm": 0.093739815056324, "learning_rate": 3.1741547772872946e-05, "loss": 0.3187, "step": 8693 }, { "epoch": 1.4157472621422464, "grad_norm": 0.24814465641975403, "learning_rate": 3.173698611304887e-05, "loss": 0.2613, "step": 8694 }, { "epoch": 1.4159101087000774, "grad_norm": 0.13113485276699066, "learning_rate": 3.17324242113468e-05, "loss": 0.3482, "step": 8695 }, { "epoch": 1.416072955257908, "grad_norm": 0.09546070545911789, "learning_rate": 3.172786206793054e-05, "loss": 0.2934, "step": 8696 }, { "epoch": 1.416235801815739, "grad_norm": 0.13393963873386383, "learning_rate": 3.172329968296387e-05, "loss": 0.3336, "step": 8697 }, { "epoch": 1.41639864837357, "grad_norm": 0.11292900890111923, "learning_rate": 3.17187370566106e-05, "loss": 0.2965, "step": 8698 }, { "epoch": 1.416561494931401, "grad_norm": 0.1033451110124588, "learning_rate": 3.1714174189034536e-05, "loss": 0.3208, "step": 8699 }, { "epoch": 1.4167243414892319, "grad_norm": 0.17467358708381653, "learning_rate": 3.1709611080399495e-05, "loss": 0.3743, "step": 8700 }, { "epoch": 1.4168871880470626, "grad_norm": 0.1588647961616516, "learning_rate": 3.170504773086933e-05, "loss": 0.3706, "step": 8701 }, { "epoch": 1.4170500346048935, "grad_norm": 0.0985933467745781, "learning_rate": 3.170048414060786e-05, "loss": 0.3242, "step": 8702 }, { "epoch": 1.4172128811627245, "grad_norm": 0.12156084924936295, "learning_rate": 3.1695920309778926e-05, "loss": 0.309, "step": 8703 }, { "epoch": 1.4173757277205552, "grad_norm": 0.1416712999343872, "learning_rate": 3.169135623854639e-05, "loss": 0.3153, "step": 8704 }, { "epoch": 1.4175385742783861, "grad_norm": 0.06745438277721405, "learning_rate": 3.168679192707413e-05, "loss": 0.3192, "step": 8705 }, { "epoch": 1.417701420836217, "grad_norm": 0.10605906695127487, "learning_rate": 3.168222737552599e-05, "loss": 0.3235, "step": 8706 }, { "epoch": 1.417864267394048, "grad_norm": 0.08867039531469345, "learning_rate": 3.167766258406588e-05, "loss": 0.3446, "step": 8707 }, { "epoch": 1.418027113951879, "grad_norm": 0.13040906190872192, "learning_rate": 3.1673097552857665e-05, "loss": 0.3321, "step": 8708 }, { "epoch": 1.4181899605097097, "grad_norm": 0.16988228261470795, "learning_rate": 3.166853228206526e-05, "loss": 0.328, "step": 8709 }, { "epoch": 1.4183528070675406, "grad_norm": 0.20777320861816406, "learning_rate": 3.166396677185257e-05, "loss": 0.2754, "step": 8710 }, { "epoch": 1.4185156536253716, "grad_norm": 0.13401761651039124, "learning_rate": 3.16594010223835e-05, "loss": 0.3602, "step": 8711 }, { "epoch": 1.4186785001832023, "grad_norm": 0.14359426498413086, "learning_rate": 3.165483503382198e-05, "loss": 0.3489, "step": 8712 }, { "epoch": 1.4188413467410332, "grad_norm": 0.09531626850366592, "learning_rate": 3.1650268806331954e-05, "loss": 0.3246, "step": 8713 }, { "epoch": 1.4190041932988642, "grad_norm": 0.11020021885633469, "learning_rate": 3.164570234007735e-05, "loss": 0.2788, "step": 8714 }, { "epoch": 1.4191670398566951, "grad_norm": 0.14869627356529236, "learning_rate": 3.164113563522212e-05, "loss": 0.3392, "step": 8715 }, { "epoch": 1.4193298864145258, "grad_norm": 0.16831819713115692, "learning_rate": 3.1636568691930224e-05, "loss": 0.3392, "step": 8716 }, { "epoch": 1.4194927329723568, "grad_norm": 0.15213897824287415, "learning_rate": 3.163200151036562e-05, "loss": 0.3046, "step": 8717 }, { "epoch": 1.4196555795301877, "grad_norm": 0.10950586944818497, "learning_rate": 3.162743409069231e-05, "loss": 0.3152, "step": 8718 }, { "epoch": 1.4198184260880184, "grad_norm": 0.13329139351844788, "learning_rate": 3.1622866433074236e-05, "loss": 0.2825, "step": 8719 }, { "epoch": 1.4199812726458494, "grad_norm": 0.15297579765319824, "learning_rate": 3.161829853767543e-05, "loss": 0.2982, "step": 8720 }, { "epoch": 1.4201441192036803, "grad_norm": 0.1349007487297058, "learning_rate": 3.1613730404659874e-05, "loss": 0.3261, "step": 8721 }, { "epoch": 1.4203069657615113, "grad_norm": 0.08767058700323105, "learning_rate": 3.160916203419158e-05, "loss": 0.3237, "step": 8722 }, { "epoch": 1.4204698123193422, "grad_norm": 0.1000031903386116, "learning_rate": 3.1604593426434576e-05, "loss": 0.3058, "step": 8723 }, { "epoch": 1.420632658877173, "grad_norm": 0.11536984890699387, "learning_rate": 3.160002458155287e-05, "loss": 0.3285, "step": 8724 }, { "epoch": 1.4207955054350039, "grad_norm": 0.15808264911174774, "learning_rate": 3.159545549971051e-05, "loss": 0.3036, "step": 8725 }, { "epoch": 1.4209583519928348, "grad_norm": 0.10301915556192398, "learning_rate": 3.159088618107154e-05, "loss": 0.319, "step": 8726 }, { "epoch": 1.4211211985506655, "grad_norm": 0.11307377368211746, "learning_rate": 3.1586316625800006e-05, "loss": 0.3423, "step": 8727 }, { "epoch": 1.4212840451084965, "grad_norm": 0.12759946286678314, "learning_rate": 3.158174683405997e-05, "loss": 0.3473, "step": 8728 }, { "epoch": 1.4214468916663274, "grad_norm": 0.11260484904050827, "learning_rate": 3.1577176806015506e-05, "loss": 0.3463, "step": 8729 }, { "epoch": 1.4216097382241584, "grad_norm": 0.10533454269170761, "learning_rate": 3.1572606541830686e-05, "loss": 0.2971, "step": 8730 }, { "epoch": 1.421772584781989, "grad_norm": 0.09640663117170334, "learning_rate": 3.15680360416696e-05, "loss": 0.3285, "step": 8731 }, { "epoch": 1.42193543133982, "grad_norm": 0.08336476236581802, "learning_rate": 3.156346530569635e-05, "loss": 0.3136, "step": 8732 }, { "epoch": 1.422098277897651, "grad_norm": 0.16089819371700287, "learning_rate": 3.1558894334075016e-05, "loss": 0.3354, "step": 8733 }, { "epoch": 1.4222611244554817, "grad_norm": 0.16834183037281036, "learning_rate": 3.155432312696974e-05, "loss": 0.3206, "step": 8734 }, { "epoch": 1.4224239710133126, "grad_norm": 0.1145295649766922, "learning_rate": 3.154975168454462e-05, "loss": 0.3006, "step": 8735 }, { "epoch": 1.4225868175711436, "grad_norm": 0.1277390718460083, "learning_rate": 3.1545180006963786e-05, "loss": 0.3373, "step": 8736 }, { "epoch": 1.4227496641289745, "grad_norm": 0.09223675727844238, "learning_rate": 3.154060809439138e-05, "loss": 0.3367, "step": 8737 }, { "epoch": 1.4229125106868055, "grad_norm": 0.09545841068029404, "learning_rate": 3.153603594699156e-05, "loss": 0.3103, "step": 8738 }, { "epoch": 1.4230753572446362, "grad_norm": 0.11164858937263489, "learning_rate": 3.1531463564928455e-05, "loss": 0.3416, "step": 8739 }, { "epoch": 1.4232382038024671, "grad_norm": 0.1298970729112625, "learning_rate": 3.152689094836624e-05, "loss": 0.3488, "step": 8740 }, { "epoch": 1.423401050360298, "grad_norm": 0.09157788008451462, "learning_rate": 3.1522318097469095e-05, "loss": 0.3189, "step": 8741 }, { "epoch": 1.4235638969181288, "grad_norm": 0.13165763020515442, "learning_rate": 3.151774501240118e-05, "loss": 0.3147, "step": 8742 }, { "epoch": 1.4237267434759597, "grad_norm": 0.114789679646492, "learning_rate": 3.151317169332669e-05, "loss": 0.3651, "step": 8743 }, { "epoch": 1.4238895900337907, "grad_norm": 0.14795377850532532, "learning_rate": 3.1508598140409826e-05, "loss": 0.3098, "step": 8744 }, { "epoch": 1.4240524365916216, "grad_norm": 0.16055083274841309, "learning_rate": 3.150402435381479e-05, "loss": 0.3572, "step": 8745 }, { "epoch": 1.4242152831494526, "grad_norm": 0.1403442770242691, "learning_rate": 3.149945033370579e-05, "loss": 0.3322, "step": 8746 }, { "epoch": 1.4243781297072833, "grad_norm": 0.12027008831501007, "learning_rate": 3.149487608024705e-05, "loss": 0.3097, "step": 8747 }, { "epoch": 1.4245409762651142, "grad_norm": 0.11221586912870407, "learning_rate": 3.14903015936028e-05, "loss": 0.2977, "step": 8748 }, { "epoch": 1.424703822822945, "grad_norm": 0.15110725164413452, "learning_rate": 3.1485726873937275e-05, "loss": 0.3495, "step": 8749 }, { "epoch": 1.4248666693807759, "grad_norm": 0.10615157335996628, "learning_rate": 3.1481151921414724e-05, "loss": 0.3342, "step": 8750 }, { "epoch": 1.4250295159386068, "grad_norm": 0.12822787463665009, "learning_rate": 3.14765767361994e-05, "loss": 0.3366, "step": 8751 }, { "epoch": 1.4251923624964378, "grad_norm": 0.1326281726360321, "learning_rate": 3.147200131845557e-05, "loss": 0.2923, "step": 8752 }, { "epoch": 1.4253552090542687, "grad_norm": 0.13215918838977814, "learning_rate": 3.14674256683475e-05, "loss": 0.3259, "step": 8753 }, { "epoch": 1.4255180556120994, "grad_norm": 0.12214704602956772, "learning_rate": 3.146284978603948e-05, "loss": 0.3042, "step": 8754 }, { "epoch": 1.4256809021699304, "grad_norm": 0.12136821448802948, "learning_rate": 3.145827367169578e-05, "loss": 0.322, "step": 8755 }, { "epoch": 1.4258437487277613, "grad_norm": 0.12862145900726318, "learning_rate": 3.1453697325480716e-05, "loss": 0.309, "step": 8756 }, { "epoch": 1.426006595285592, "grad_norm": 0.14550615847110748, "learning_rate": 3.144912074755857e-05, "loss": 0.3325, "step": 8757 }, { "epoch": 1.426169441843423, "grad_norm": 0.15726974606513977, "learning_rate": 3.1444543938093676e-05, "loss": 0.348, "step": 8758 }, { "epoch": 1.426332288401254, "grad_norm": 0.10691791772842407, "learning_rate": 3.1439966897250337e-05, "loss": 0.3345, "step": 8759 }, { "epoch": 1.4264951349590849, "grad_norm": 0.119109146296978, "learning_rate": 3.1435389625192895e-05, "loss": 0.3578, "step": 8760 }, { "epoch": 1.4266579815169158, "grad_norm": 0.12555354833602905, "learning_rate": 3.143081212208569e-05, "loss": 0.3044, "step": 8761 }, { "epoch": 1.4268208280747465, "grad_norm": 0.11928848922252655, "learning_rate": 3.1426234388093055e-05, "loss": 0.2947, "step": 8762 }, { "epoch": 1.4269836746325775, "grad_norm": 0.12418530136346817, "learning_rate": 3.142165642337937e-05, "loss": 0.3431, "step": 8763 }, { "epoch": 1.4271465211904084, "grad_norm": 0.17800720036029816, "learning_rate": 3.1417078228108955e-05, "loss": 0.3434, "step": 8764 }, { "epoch": 1.4273093677482391, "grad_norm": 0.13180138170719147, "learning_rate": 3.141249980244622e-05, "loss": 0.3294, "step": 8765 }, { "epoch": 1.42747221430607, "grad_norm": 0.1631559282541275, "learning_rate": 3.140792114655552e-05, "loss": 0.3518, "step": 8766 }, { "epoch": 1.427635060863901, "grad_norm": 0.08854837715625763, "learning_rate": 3.140334226060126e-05, "loss": 0.3141, "step": 8767 }, { "epoch": 1.427797907421732, "grad_norm": 0.10332029312849045, "learning_rate": 3.139876314474783e-05, "loss": 0.3468, "step": 8768 }, { "epoch": 1.4279607539795627, "grad_norm": 0.05886734277009964, "learning_rate": 3.1394183799159626e-05, "loss": 0.2901, "step": 8769 }, { "epoch": 1.4281236005373936, "grad_norm": 0.14957314729690552, "learning_rate": 3.138960422400107e-05, "loss": 0.3163, "step": 8770 }, { "epoch": 1.4282864470952246, "grad_norm": 0.06927385926246643, "learning_rate": 3.138502441943657e-05, "loss": 0.3532, "step": 8771 }, { "epoch": 1.4284492936530553, "grad_norm": 0.07865848392248154, "learning_rate": 3.138044438563057e-05, "loss": 0.2928, "step": 8772 }, { "epoch": 1.4286121402108862, "grad_norm": 0.11981457471847534, "learning_rate": 3.13758641227475e-05, "loss": 0.3246, "step": 8773 }, { "epoch": 1.4287749867687172, "grad_norm": 0.0837615504860878, "learning_rate": 3.137128363095181e-05, "loss": 0.3043, "step": 8774 }, { "epoch": 1.4289378333265481, "grad_norm": 0.14690348505973816, "learning_rate": 3.1366702910407943e-05, "loss": 0.3092, "step": 8775 }, { "epoch": 1.429100679884379, "grad_norm": 0.1536279171705246, "learning_rate": 3.1362121961280365e-05, "loss": 0.3335, "step": 8776 }, { "epoch": 1.4292635264422098, "grad_norm": 0.09449410438537598, "learning_rate": 3.1357540783733554e-05, "loss": 0.3325, "step": 8777 }, { "epoch": 1.4294263730000407, "grad_norm": 0.128980353474617, "learning_rate": 3.135295937793198e-05, "loss": 0.2769, "step": 8778 }, { "epoch": 1.4295892195578717, "grad_norm": 0.25154298543930054, "learning_rate": 3.134837774404013e-05, "loss": 0.3007, "step": 8779 }, { "epoch": 1.4297520661157024, "grad_norm": 0.09769432246685028, "learning_rate": 3.1343795882222487e-05, "loss": 0.3559, "step": 8780 }, { "epoch": 1.4299149126735333, "grad_norm": 0.15213479101657867, "learning_rate": 3.133921379264358e-05, "loss": 0.3106, "step": 8781 }, { "epoch": 1.4300777592313643, "grad_norm": 0.10167353600263596, "learning_rate": 3.13346314754679e-05, "loss": 0.3222, "step": 8782 }, { "epoch": 1.4302406057891952, "grad_norm": 0.15298303961753845, "learning_rate": 3.133004893085997e-05, "loss": 0.3018, "step": 8783 }, { "epoch": 1.4304034523470261, "grad_norm": 0.13138630986213684, "learning_rate": 3.132546615898432e-05, "loss": 0.2844, "step": 8784 }, { "epoch": 1.4305662989048569, "grad_norm": 0.07614709436893463, "learning_rate": 3.132088316000548e-05, "loss": 0.3404, "step": 8785 }, { "epoch": 1.4307291454626878, "grad_norm": 0.1034446582198143, "learning_rate": 3.131629993408799e-05, "loss": 0.3052, "step": 8786 }, { "epoch": 1.4308919920205185, "grad_norm": 0.1041291356086731, "learning_rate": 3.131171648139642e-05, "loss": 0.2743, "step": 8787 }, { "epoch": 1.4310548385783495, "grad_norm": 0.07023482024669647, "learning_rate": 3.130713280209532e-05, "loss": 0.3607, "step": 8788 }, { "epoch": 1.4312176851361804, "grad_norm": 0.11733083426952362, "learning_rate": 3.130254889634925e-05, "loss": 0.2854, "step": 8789 }, { "epoch": 1.4313805316940114, "grad_norm": 0.10215728729963303, "learning_rate": 3.129796476432279e-05, "loss": 0.3018, "step": 8790 }, { "epoch": 1.4315433782518423, "grad_norm": 0.13352523744106293, "learning_rate": 3.129338040618052e-05, "loss": 0.3259, "step": 8791 }, { "epoch": 1.431706224809673, "grad_norm": 0.10116082429885864, "learning_rate": 3.128879582208705e-05, "loss": 0.3252, "step": 8792 }, { "epoch": 1.431869071367504, "grad_norm": 0.08510084450244904, "learning_rate": 3.128421101220696e-05, "loss": 0.3157, "step": 8793 }, { "epoch": 1.432031917925335, "grad_norm": 0.1044100821018219, "learning_rate": 3.1279625976704875e-05, "loss": 0.319, "step": 8794 }, { "epoch": 1.4321947644831656, "grad_norm": 0.1339038759469986, "learning_rate": 3.12750407157454e-05, "loss": 0.2953, "step": 8795 }, { "epoch": 1.4323576110409966, "grad_norm": 0.11193744838237762, "learning_rate": 3.1270455229493164e-05, "loss": 0.3241, "step": 8796 }, { "epoch": 1.4325204575988275, "grad_norm": 0.10802242904901505, "learning_rate": 3.126586951811279e-05, "loss": 0.3321, "step": 8797 }, { "epoch": 1.4326833041566585, "grad_norm": 0.10355273634195328, "learning_rate": 3.126128358176894e-05, "loss": 0.3109, "step": 8798 }, { "epoch": 1.4328461507144894, "grad_norm": 0.12357067316770554, "learning_rate": 3.125669742062624e-05, "loss": 0.3412, "step": 8799 }, { "epoch": 1.4330089972723201, "grad_norm": 0.10784202069044113, "learning_rate": 3.1252111034849357e-05, "loss": 0.3352, "step": 8800 }, { "epoch": 1.433171843830151, "grad_norm": 0.12198309600353241, "learning_rate": 3.124752442460297e-05, "loss": 0.3354, "step": 8801 }, { "epoch": 1.433334690387982, "grad_norm": 0.1879260241985321, "learning_rate": 3.1242937590051716e-05, "loss": 0.3221, "step": 8802 }, { "epoch": 1.4334975369458127, "grad_norm": 0.13939610123634338, "learning_rate": 3.1238350531360314e-05, "loss": 0.3325, "step": 8803 }, { "epoch": 1.4336603835036437, "grad_norm": 0.15251044929027557, "learning_rate": 3.123376324869343e-05, "loss": 0.3516, "step": 8804 }, { "epoch": 1.4338232300614746, "grad_norm": 0.1280757188796997, "learning_rate": 3.122917574221578e-05, "loss": 0.3344, "step": 8805 }, { "epoch": 1.4339860766193055, "grad_norm": 0.13618475198745728, "learning_rate": 3.1224588012092046e-05, "loss": 0.3616, "step": 8806 }, { "epoch": 1.4341489231771363, "grad_norm": 0.09943591058254242, "learning_rate": 3.122000005848696e-05, "loss": 0.3178, "step": 8807 }, { "epoch": 1.4343117697349672, "grad_norm": 0.11482800543308258, "learning_rate": 3.121541188156523e-05, "loss": 0.3358, "step": 8808 }, { "epoch": 1.4344746162927982, "grad_norm": 0.1416955143213272, "learning_rate": 3.121082348149159e-05, "loss": 0.3292, "step": 8809 }, { "epoch": 1.4346374628506289, "grad_norm": 0.12716495990753174, "learning_rate": 3.1206234858430784e-05, "loss": 0.3247, "step": 8810 }, { "epoch": 1.4348003094084598, "grad_norm": 0.17724379897117615, "learning_rate": 3.120164601254755e-05, "loss": 0.3264, "step": 8811 }, { "epoch": 1.4349631559662908, "grad_norm": 0.10228648781776428, "learning_rate": 3.1197056944006645e-05, "loss": 0.3272, "step": 8812 }, { "epoch": 1.4351260025241217, "grad_norm": 0.11910361051559448, "learning_rate": 3.1192467652972827e-05, "loss": 0.3264, "step": 8813 }, { "epoch": 1.4352888490819526, "grad_norm": 0.09133603423833847, "learning_rate": 3.118787813961087e-05, "loss": 0.2809, "step": 8814 }, { "epoch": 1.4354516956397834, "grad_norm": 0.13651852309703827, "learning_rate": 3.118328840408555e-05, "loss": 0.3169, "step": 8815 }, { "epoch": 1.4356145421976143, "grad_norm": 0.10366449505090714, "learning_rate": 3.1178698446561646e-05, "loss": 0.3421, "step": 8816 }, { "epoch": 1.4357773887554452, "grad_norm": 0.13178636133670807, "learning_rate": 3.117410826720396e-05, "loss": 0.3217, "step": 8817 }, { "epoch": 1.435940235313276, "grad_norm": 0.11224755644798279, "learning_rate": 3.116951786617729e-05, "loss": 0.3305, "step": 8818 }, { "epoch": 1.436103081871107, "grad_norm": 0.14339610934257507, "learning_rate": 3.1164927243646436e-05, "loss": 0.3086, "step": 8819 }, { "epoch": 1.4362659284289379, "grad_norm": 0.11113203316926956, "learning_rate": 3.116033639977623e-05, "loss": 0.2982, "step": 8820 }, { "epoch": 1.4364287749867688, "grad_norm": 0.14177939295768738, "learning_rate": 3.115574533473149e-05, "loss": 0.2931, "step": 8821 }, { "epoch": 1.4365916215445997, "grad_norm": 0.09874728322029114, "learning_rate": 3.115115404867705e-05, "loss": 0.326, "step": 8822 }, { "epoch": 1.4367544681024305, "grad_norm": 0.10912801325321198, "learning_rate": 3.1146562541777744e-05, "loss": 0.3362, "step": 8823 }, { "epoch": 1.4369173146602614, "grad_norm": 0.0875881239771843, "learning_rate": 3.114197081419843e-05, "loss": 0.3249, "step": 8824 }, { "epoch": 1.4370801612180921, "grad_norm": 0.13489432632923126, "learning_rate": 3.113737886610397e-05, "loss": 0.3449, "step": 8825 }, { "epoch": 1.437243007775923, "grad_norm": 0.1437300592660904, "learning_rate": 3.113278669765921e-05, "loss": 0.3119, "step": 8826 }, { "epoch": 1.437405854333754, "grad_norm": 0.13582675158977509, "learning_rate": 3.1128194309029035e-05, "loss": 0.3514, "step": 8827 }, { "epoch": 1.437568700891585, "grad_norm": 0.12603658437728882, "learning_rate": 3.1123601700378324e-05, "loss": 0.335, "step": 8828 }, { "epoch": 1.437731547449416, "grad_norm": 0.13467393815517426, "learning_rate": 3.111900887187197e-05, "loss": 0.3094, "step": 8829 }, { "epoch": 1.4378943940072466, "grad_norm": 0.07684680074453354, "learning_rate": 3.1114415823674864e-05, "loss": 0.2945, "step": 8830 }, { "epoch": 1.4380572405650776, "grad_norm": 0.07610589265823364, "learning_rate": 3.110982255595191e-05, "loss": 0.2917, "step": 8831 }, { "epoch": 1.4382200871229085, "grad_norm": 0.10139331966638565, "learning_rate": 3.1105229068868015e-05, "loss": 0.3363, "step": 8832 }, { "epoch": 1.4383829336807392, "grad_norm": 0.09087435156106949, "learning_rate": 3.110063536258811e-05, "loss": 0.3499, "step": 8833 }, { "epoch": 1.4385457802385702, "grad_norm": 0.133719801902771, "learning_rate": 3.109604143727712e-05, "loss": 0.3363, "step": 8834 }, { "epoch": 1.438708626796401, "grad_norm": 0.09576338529586792, "learning_rate": 3.109144729309998e-05, "loss": 0.3218, "step": 8835 }, { "epoch": 1.438871473354232, "grad_norm": 0.09063619375228882, "learning_rate": 3.108685293022162e-05, "loss": 0.3077, "step": 8836 }, { "epoch": 1.439034319912063, "grad_norm": 0.13310816884040833, "learning_rate": 3.1082258348807006e-05, "loss": 0.2953, "step": 8837 }, { "epoch": 1.4391971664698937, "grad_norm": 0.15219253301620483, "learning_rate": 3.10776635490211e-05, "loss": 0.3569, "step": 8838 }, { "epoch": 1.4393600130277246, "grad_norm": 0.12583830952644348, "learning_rate": 3.107306853102886e-05, "loss": 0.3365, "step": 8839 }, { "epoch": 1.4395228595855556, "grad_norm": 0.2022983878850937, "learning_rate": 3.106847329499527e-05, "loss": 0.3474, "step": 8840 }, { "epoch": 1.4396857061433863, "grad_norm": 0.1305001825094223, "learning_rate": 3.1063877841085295e-05, "loss": 0.2825, "step": 8841 }, { "epoch": 1.4398485527012173, "grad_norm": 0.0840771272778511, "learning_rate": 3.105928216946395e-05, "loss": 0.3197, "step": 8842 }, { "epoch": 1.4400113992590482, "grad_norm": 0.13113896548748016, "learning_rate": 3.105468628029622e-05, "loss": 0.3269, "step": 8843 }, { "epoch": 1.4401742458168791, "grad_norm": 0.10796142369508743, "learning_rate": 3.105009017374711e-05, "loss": 0.3233, "step": 8844 }, { "epoch": 1.4403370923747099, "grad_norm": 0.10728340595960617, "learning_rate": 3.104549384998163e-05, "loss": 0.3469, "step": 8845 }, { "epoch": 1.4404999389325408, "grad_norm": 0.11600887775421143, "learning_rate": 3.104089730916481e-05, "loss": 0.3605, "step": 8846 }, { "epoch": 1.4406627854903717, "grad_norm": 0.08905090391635895, "learning_rate": 3.103630055146168e-05, "loss": 0.3321, "step": 8847 }, { "epoch": 1.4408256320482025, "grad_norm": 0.12293851375579834, "learning_rate": 3.1031703577037277e-05, "loss": 0.3021, "step": 8848 }, { "epoch": 1.4409884786060334, "grad_norm": 0.09134234488010406, "learning_rate": 3.102710638605664e-05, "loss": 0.3019, "step": 8849 }, { "epoch": 1.4411513251638643, "grad_norm": 0.17557333409786224, "learning_rate": 3.102250897868483e-05, "loss": 0.3369, "step": 8850 }, { "epoch": 1.4413141717216953, "grad_norm": 0.09700880944728851, "learning_rate": 3.10179113550869e-05, "loss": 0.3689, "step": 8851 }, { "epoch": 1.4414770182795262, "grad_norm": 0.13423199951648712, "learning_rate": 3.101331351542793e-05, "loss": 0.3333, "step": 8852 }, { "epoch": 1.441639864837357, "grad_norm": 0.11228864639997482, "learning_rate": 3.100871545987298e-05, "loss": 0.3275, "step": 8853 }, { "epoch": 1.441802711395188, "grad_norm": 0.14350451529026031, "learning_rate": 3.1004117188587146e-05, "loss": 0.3238, "step": 8854 }, { "epoch": 1.4419655579530188, "grad_norm": 0.11138849705457687, "learning_rate": 3.0999518701735514e-05, "loss": 0.3114, "step": 8855 }, { "epoch": 1.4421284045108496, "grad_norm": 0.16681034862995148, "learning_rate": 3.0994919999483186e-05, "loss": 0.348, "step": 8856 }, { "epoch": 1.4422912510686805, "grad_norm": 0.14045372605323792, "learning_rate": 3.099032108199527e-05, "loss": 0.3716, "step": 8857 }, { "epoch": 1.4424540976265114, "grad_norm": 0.07382936775684357, "learning_rate": 3.098572194943688e-05, "loss": 0.2859, "step": 8858 }, { "epoch": 1.4426169441843424, "grad_norm": 0.07992753386497498, "learning_rate": 3.0981122601973137e-05, "loss": 0.304, "step": 8859 }, { "epoch": 1.442779790742173, "grad_norm": 0.11908270418643951, "learning_rate": 3.097652303976918e-05, "loss": 0.3654, "step": 8860 }, { "epoch": 1.442942637300004, "grad_norm": 0.05949262157082558, "learning_rate": 3.097192326299013e-05, "loss": 0.2994, "step": 8861 }, { "epoch": 1.443105483857835, "grad_norm": 0.07279700040817261, "learning_rate": 3.0967323271801143e-05, "loss": 0.2842, "step": 8862 }, { "epoch": 1.4432683304156657, "grad_norm": 0.127081036567688, "learning_rate": 3.096272306636738e-05, "loss": 0.3156, "step": 8863 }, { "epoch": 1.4434311769734967, "grad_norm": 0.0996660441160202, "learning_rate": 3.095812264685398e-05, "loss": 0.3438, "step": 8864 }, { "epoch": 1.4435940235313276, "grad_norm": 0.10759758204221725, "learning_rate": 3.095352201342613e-05, "loss": 0.2728, "step": 8865 }, { "epoch": 1.4437568700891585, "grad_norm": 0.09920012950897217, "learning_rate": 3.0948921166249015e-05, "loss": 0.344, "step": 8866 }, { "epoch": 1.4439197166469895, "grad_norm": 0.10051529854536057, "learning_rate": 3.094432010548779e-05, "loss": 0.3094, "step": 8867 }, { "epoch": 1.4440825632048202, "grad_norm": 0.1477859765291214, "learning_rate": 3.093971883130767e-05, "loss": 0.295, "step": 8868 }, { "epoch": 1.4442454097626511, "grad_norm": 0.11472252756357193, "learning_rate": 3.0935117343873856e-05, "loss": 0.3684, "step": 8869 }, { "epoch": 1.444408256320482, "grad_norm": 0.12231255322694778, "learning_rate": 3.093051564335154e-05, "loss": 0.331, "step": 8870 }, { "epoch": 1.4445711028783128, "grad_norm": 0.1677795648574829, "learning_rate": 3.092591372990595e-05, "loss": 0.341, "step": 8871 }, { "epoch": 1.4447339494361437, "grad_norm": 0.1386735737323761, "learning_rate": 3.09213116037023e-05, "loss": 0.3206, "step": 8872 }, { "epoch": 1.4448967959939747, "grad_norm": 0.0777222141623497, "learning_rate": 3.091670926490582e-05, "loss": 0.3075, "step": 8873 }, { "epoch": 1.4450596425518056, "grad_norm": 0.1558419167995453, "learning_rate": 3.091210671368175e-05, "loss": 0.2788, "step": 8874 }, { "epoch": 1.4452224891096366, "grad_norm": 0.12666532397270203, "learning_rate": 3.0907503950195336e-05, "loss": 0.3333, "step": 8875 }, { "epoch": 1.4453853356674673, "grad_norm": 0.1195584088563919, "learning_rate": 3.0902900974611835e-05, "loss": 0.3342, "step": 8876 }, { "epoch": 1.4455481822252982, "grad_norm": 0.08664007484912872, "learning_rate": 3.0898297787096497e-05, "loss": 0.3235, "step": 8877 }, { "epoch": 1.445711028783129, "grad_norm": 0.1342039704322815, "learning_rate": 3.0893694387814606e-05, "loss": 0.3449, "step": 8878 }, { "epoch": 1.44587387534096, "grad_norm": 0.10250669717788696, "learning_rate": 3.088909077693142e-05, "loss": 0.3348, "step": 8879 }, { "epoch": 1.4460367218987908, "grad_norm": 0.16736316680908203, "learning_rate": 3.088448695461224e-05, "loss": 0.3653, "step": 8880 }, { "epoch": 1.4461995684566218, "grad_norm": 0.12203950434923172, "learning_rate": 3.087988292102233e-05, "loss": 0.3529, "step": 8881 }, { "epoch": 1.4463624150144527, "grad_norm": 0.11230790615081787, "learning_rate": 3.087527867632702e-05, "loss": 0.3171, "step": 8882 }, { "epoch": 1.4465252615722834, "grad_norm": 0.134280726313591, "learning_rate": 3.0870674220691607e-05, "loss": 0.3231, "step": 8883 }, { "epoch": 1.4466881081301144, "grad_norm": 0.15409033000469208, "learning_rate": 3.086606955428139e-05, "loss": 0.3315, "step": 8884 }, { "epoch": 1.4468509546879453, "grad_norm": 0.11428572982549667, "learning_rate": 3.0861464677261704e-05, "loss": 0.285, "step": 8885 }, { "epoch": 1.447013801245776, "grad_norm": 0.24477146565914154, "learning_rate": 3.085685958979788e-05, "loss": 0.3286, "step": 8886 }, { "epoch": 1.447176647803607, "grad_norm": 0.08661950379610062, "learning_rate": 3.085225429205524e-05, "loss": 0.3272, "step": 8887 }, { "epoch": 1.447339494361438, "grad_norm": 0.15782061219215393, "learning_rate": 3.084764878419914e-05, "loss": 0.3306, "step": 8888 }, { "epoch": 1.4475023409192689, "grad_norm": 0.11477380990982056, "learning_rate": 3.084304306639494e-05, "loss": 0.3247, "step": 8889 }, { "epoch": 1.4476651874770998, "grad_norm": 0.08199212700128555, "learning_rate": 3.083843713880797e-05, "loss": 0.3264, "step": 8890 }, { "epoch": 1.4478280340349305, "grad_norm": 0.158159539103508, "learning_rate": 3.083383100160363e-05, "loss": 0.3715, "step": 8891 }, { "epoch": 1.4479908805927615, "grad_norm": 0.13628104329109192, "learning_rate": 3.082922465494727e-05, "loss": 0.3486, "step": 8892 }, { "epoch": 1.4481537271505924, "grad_norm": 0.09544713795185089, "learning_rate": 3.082461809900428e-05, "loss": 0.311, "step": 8893 }, { "epoch": 1.4483165737084231, "grad_norm": 0.11466734856367111, "learning_rate": 3.082001133394005e-05, "loss": 0.3036, "step": 8894 }, { "epoch": 1.448479420266254, "grad_norm": 0.11455409228801727, "learning_rate": 3.0815404359919974e-05, "loss": 0.3397, "step": 8895 }, { "epoch": 1.448642266824085, "grad_norm": 0.14496195316314697, "learning_rate": 3.0810797177109465e-05, "loss": 0.3339, "step": 8896 }, { "epoch": 1.448805113381916, "grad_norm": 0.08588019758462906, "learning_rate": 3.0806189785673914e-05, "loss": 0.3452, "step": 8897 }, { "epoch": 1.4489679599397467, "grad_norm": 0.11491511017084122, "learning_rate": 3.080158218577877e-05, "loss": 0.2899, "step": 8898 }, { "epoch": 1.4491308064975776, "grad_norm": 0.12247306853532791, "learning_rate": 3.079697437758944e-05, "loss": 0.2925, "step": 8899 }, { "epoch": 1.4492936530554086, "grad_norm": 0.12427041679620743, "learning_rate": 3.079236636127136e-05, "loss": 0.2955, "step": 8900 }, { "epoch": 1.4494564996132393, "grad_norm": 0.16301624476909637, "learning_rate": 3.078775813698996e-05, "loss": 0.3385, "step": 8901 }, { "epoch": 1.4496193461710702, "grad_norm": 0.10589565336704254, "learning_rate": 3.078314970491072e-05, "loss": 0.3022, "step": 8902 }, { "epoch": 1.4497821927289012, "grad_norm": 0.18840153515338898, "learning_rate": 3.0778541065199076e-05, "loss": 0.3366, "step": 8903 }, { "epoch": 1.4499450392867321, "grad_norm": 0.12526364624500275, "learning_rate": 3.077393221802049e-05, "loss": 0.3291, "step": 8904 }, { "epoch": 1.450107885844563, "grad_norm": 0.09418243914842606, "learning_rate": 3.076932316354043e-05, "loss": 0.344, "step": 8905 }, { "epoch": 1.4502707324023938, "grad_norm": 0.08566326647996902, "learning_rate": 3.076471390192439e-05, "loss": 0.315, "step": 8906 }, { "epoch": 1.4504335789602247, "grad_norm": 0.10647451877593994, "learning_rate": 3.076010443333786e-05, "loss": 0.2912, "step": 8907 }, { "epoch": 1.4505964255180557, "grad_norm": 0.1013374850153923, "learning_rate": 3.0755494757946315e-05, "loss": 0.3175, "step": 8908 }, { "epoch": 1.4507592720758864, "grad_norm": 0.16576673090457916, "learning_rate": 3.075088487591527e-05, "loss": 0.3007, "step": 8909 }, { "epoch": 1.4509221186337173, "grad_norm": 0.15471026301383972, "learning_rate": 3.074627478741021e-05, "loss": 0.3326, "step": 8910 }, { "epoch": 1.4510849651915483, "grad_norm": 0.10880323499441147, "learning_rate": 3.074166449259669e-05, "loss": 0.3283, "step": 8911 }, { "epoch": 1.4512478117493792, "grad_norm": 0.13719795644283295, "learning_rate": 3.07370539916402e-05, "loss": 0.3178, "step": 8912 }, { "epoch": 1.4514106583072102, "grad_norm": 0.14044173061847687, "learning_rate": 3.073244328470628e-05, "loss": 0.339, "step": 8913 }, { "epoch": 1.4515735048650409, "grad_norm": 0.11132722347974777, "learning_rate": 3.072783237196049e-05, "loss": 0.2818, "step": 8914 }, { "epoch": 1.4517363514228718, "grad_norm": 0.11069131642580032, "learning_rate": 3.0723221253568334e-05, "loss": 0.3373, "step": 8915 }, { "epoch": 1.4518991979807025, "grad_norm": 0.12266165018081665, "learning_rate": 3.07186099296954e-05, "loss": 0.3257, "step": 8916 }, { "epoch": 1.4520620445385335, "grad_norm": 0.12262775748968124, "learning_rate": 3.0713998400507236e-05, "loss": 0.314, "step": 8917 }, { "epoch": 1.4522248910963644, "grad_norm": 0.1123422384262085, "learning_rate": 3.0709386666169407e-05, "loss": 0.3138, "step": 8918 }, { "epoch": 1.4523877376541954, "grad_norm": 0.11341623216867447, "learning_rate": 3.070477472684749e-05, "loss": 0.2695, "step": 8919 }, { "epoch": 1.4525505842120263, "grad_norm": 0.09357236325740814, "learning_rate": 3.070016258270708e-05, "loss": 0.3169, "step": 8920 }, { "epoch": 1.452713430769857, "grad_norm": 0.08036542683839798, "learning_rate": 3.069555023391374e-05, "loss": 0.3068, "step": 8921 }, { "epoch": 1.452876277327688, "grad_norm": 0.14365620911121368, "learning_rate": 3.069093768063308e-05, "loss": 0.3971, "step": 8922 }, { "epoch": 1.453039123885519, "grad_norm": 0.15667834877967834, "learning_rate": 3.0686324923030726e-05, "loss": 0.3664, "step": 8923 }, { "epoch": 1.4532019704433496, "grad_norm": 0.11548279970884323, "learning_rate": 3.068171196127225e-05, "loss": 0.3239, "step": 8924 }, { "epoch": 1.4533648170011806, "grad_norm": 0.09095333516597748, "learning_rate": 3.06770987955233e-05, "loss": 0.3063, "step": 8925 }, { "epoch": 1.4535276635590115, "grad_norm": 0.12013288587331772, "learning_rate": 3.067248542594949e-05, "loss": 0.3273, "step": 8926 }, { "epoch": 1.4536905101168425, "grad_norm": 0.10033270716667175, "learning_rate": 3.066787185271647e-05, "loss": 0.362, "step": 8927 }, { "epoch": 1.4538533566746734, "grad_norm": 0.08266104012727737, "learning_rate": 3.066325807598986e-05, "loss": 0.3235, "step": 8928 }, { "epoch": 1.4540162032325041, "grad_norm": 0.15932461619377136, "learning_rate": 3.065864409593532e-05, "loss": 0.393, "step": 8929 }, { "epoch": 1.454179049790335, "grad_norm": 0.0884760320186615, "learning_rate": 3.06540299127185e-05, "loss": 0.3307, "step": 8930 }, { "epoch": 1.454341896348166, "grad_norm": 0.14592869579792023, "learning_rate": 3.0649415526505075e-05, "loss": 0.3207, "step": 8931 }, { "epoch": 1.4545047429059967, "grad_norm": 0.11850543320178986, "learning_rate": 3.06448009374607e-05, "loss": 0.3054, "step": 8932 }, { "epoch": 1.4546675894638277, "grad_norm": 0.11741858720779419, "learning_rate": 3.064018614575105e-05, "loss": 0.3264, "step": 8933 }, { "epoch": 1.4548304360216586, "grad_norm": 0.1086580902338028, "learning_rate": 3.0635571151541834e-05, "loss": 0.3106, "step": 8934 }, { "epoch": 1.4549932825794896, "grad_norm": 0.075407475233078, "learning_rate": 3.0630955954998723e-05, "loss": 0.3038, "step": 8935 }, { "epoch": 1.4551561291373203, "grad_norm": 0.11464820802211761, "learning_rate": 3.062634055628742e-05, "loss": 0.3575, "step": 8936 }, { "epoch": 1.4553189756951512, "grad_norm": 0.10483331233263016, "learning_rate": 3.0621724955573644e-05, "loss": 0.3003, "step": 8937 }, { "epoch": 1.4554818222529822, "grad_norm": 0.08846911042928696, "learning_rate": 3.0617109153023095e-05, "loss": 0.2955, "step": 8938 }, { "epoch": 1.455644668810813, "grad_norm": 0.13234086334705353, "learning_rate": 3.061249314880149e-05, "loss": 0.3201, "step": 8939 }, { "epoch": 1.4558075153686438, "grad_norm": 0.1026817336678505, "learning_rate": 3.0607876943074574e-05, "loss": 0.3347, "step": 8940 }, { "epoch": 1.4559703619264748, "grad_norm": 0.12347757071256638, "learning_rate": 3.060326053600807e-05, "loss": 0.3042, "step": 8941 }, { "epoch": 1.4561332084843057, "grad_norm": 0.11746173352003098, "learning_rate": 3.059864392776772e-05, "loss": 0.3217, "step": 8942 }, { "epoch": 1.4562960550421367, "grad_norm": 0.10305453836917877, "learning_rate": 3.059402711851929e-05, "loss": 0.3076, "step": 8943 }, { "epoch": 1.4564589015999674, "grad_norm": 0.08059147000312805, "learning_rate": 3.058941010842852e-05, "loss": 0.3242, "step": 8944 }, { "epoch": 1.4566217481577983, "grad_norm": 0.10718879103660583, "learning_rate": 3.058479289766118e-05, "loss": 0.2987, "step": 8945 }, { "epoch": 1.4567845947156293, "grad_norm": 0.14075477421283722, "learning_rate": 3.0580175486383036e-05, "loss": 0.2966, "step": 8946 }, { "epoch": 1.45694744127346, "grad_norm": 0.13643570244312286, "learning_rate": 3.057555787475989e-05, "loss": 0.3227, "step": 8947 }, { "epoch": 1.457110287831291, "grad_norm": 0.10114886611700058, "learning_rate": 3.05709400629575e-05, "loss": 0.314, "step": 8948 }, { "epoch": 1.4572731343891219, "grad_norm": 0.15162122249603271, "learning_rate": 3.056632205114168e-05, "loss": 0.2882, "step": 8949 }, { "epoch": 1.4574359809469528, "grad_norm": 0.09907864034175873, "learning_rate": 3.0561703839478203e-05, "loss": 0.3699, "step": 8950 }, { "epoch": 1.4575988275047838, "grad_norm": 0.07181460410356522, "learning_rate": 3.0557085428132917e-05, "loss": 0.3242, "step": 8951 }, { "epoch": 1.4577616740626145, "grad_norm": 0.11832507699728012, "learning_rate": 3.05524668172716e-05, "loss": 0.3101, "step": 8952 }, { "epoch": 1.4579245206204454, "grad_norm": 0.14420801401138306, "learning_rate": 3.054784800706009e-05, "loss": 0.3326, "step": 8953 }, { "epoch": 1.4580873671782761, "grad_norm": 0.1277536004781723, "learning_rate": 3.0543228997664224e-05, "loss": 0.3259, "step": 8954 }, { "epoch": 1.458250213736107, "grad_norm": 0.15081515908241272, "learning_rate": 3.0538609789249814e-05, "loss": 0.283, "step": 8955 }, { "epoch": 1.458413060293938, "grad_norm": 0.08641234040260315, "learning_rate": 3.053399038198274e-05, "loss": 0.3052, "step": 8956 }, { "epoch": 1.458575906851769, "grad_norm": 0.15096549689769745, "learning_rate": 3.052937077602881e-05, "loss": 0.3287, "step": 8957 }, { "epoch": 1.4587387534096, "grad_norm": 0.13939635455608368, "learning_rate": 3.052475097155392e-05, "loss": 0.317, "step": 8958 }, { "epoch": 1.4589015999674306, "grad_norm": 0.13895753026008606, "learning_rate": 3.05201309687239e-05, "loss": 0.3338, "step": 8959 }, { "epoch": 1.4590644465252616, "grad_norm": 0.13945259153842926, "learning_rate": 3.0515510767704653e-05, "loss": 0.3139, "step": 8960 }, { "epoch": 1.4592272930830925, "grad_norm": 0.0944817066192627, "learning_rate": 3.0510890368662036e-05, "loss": 0.3005, "step": 8961 }, { "epoch": 1.4593901396409232, "grad_norm": 0.1735408753156662, "learning_rate": 3.050626977176195e-05, "loss": 0.3511, "step": 8962 }, { "epoch": 1.4595529861987542, "grad_norm": 0.09706512093544006, "learning_rate": 3.050164897717027e-05, "loss": 0.2994, "step": 8963 }, { "epoch": 1.4597158327565851, "grad_norm": 0.07305599004030228, "learning_rate": 3.0497027985052924e-05, "loss": 0.3249, "step": 8964 }, { "epoch": 1.459878679314416, "grad_norm": 0.16200223565101624, "learning_rate": 3.0492406795575797e-05, "loss": 0.3403, "step": 8965 }, { "epoch": 1.460041525872247, "grad_norm": 0.12543262541294098, "learning_rate": 3.0487785408904805e-05, "loss": 0.3504, "step": 8966 }, { "epoch": 1.4602043724300777, "grad_norm": 0.12111669778823853, "learning_rate": 3.0483163825205886e-05, "loss": 0.2934, "step": 8967 }, { "epoch": 1.4603672189879087, "grad_norm": 0.05621028319001198, "learning_rate": 3.0478542044644953e-05, "loss": 0.3509, "step": 8968 }, { "epoch": 1.4605300655457396, "grad_norm": 0.09420169144868851, "learning_rate": 3.0473920067387947e-05, "loss": 0.3502, "step": 8969 }, { "epoch": 1.4606929121035703, "grad_norm": 0.11720865964889526, "learning_rate": 3.0469297893600808e-05, "loss": 0.2931, "step": 8970 }, { "epoch": 1.4608557586614013, "grad_norm": 0.1358993947505951, "learning_rate": 3.0464675523449493e-05, "loss": 0.3434, "step": 8971 }, { "epoch": 1.4610186052192322, "grad_norm": 0.1570664495229721, "learning_rate": 3.0460052957099944e-05, "loss": 0.3107, "step": 8972 }, { "epoch": 1.4611814517770632, "grad_norm": 0.15244892239570618, "learning_rate": 3.045543019471814e-05, "loss": 0.3447, "step": 8973 }, { "epoch": 1.4613442983348939, "grad_norm": 0.1362084150314331, "learning_rate": 3.0450807236470053e-05, "loss": 0.3103, "step": 8974 }, { "epoch": 1.4615071448927248, "grad_norm": 0.10171998292207718, "learning_rate": 3.0446184082521646e-05, "loss": 0.2875, "step": 8975 }, { "epoch": 1.4616699914505558, "grad_norm": 0.08825471997261047, "learning_rate": 3.0441560733038922e-05, "loss": 0.3293, "step": 8976 }, { "epoch": 1.4618328380083865, "grad_norm": 0.1268589049577713, "learning_rate": 3.043693718818786e-05, "loss": 0.286, "step": 8977 }, { "epoch": 1.4619956845662174, "grad_norm": 0.18347972631454468, "learning_rate": 3.0432313448134465e-05, "loss": 0.3017, "step": 8978 }, { "epoch": 1.4621585311240484, "grad_norm": 0.15530654788017273, "learning_rate": 3.042768951304474e-05, "loss": 0.3262, "step": 8979 }, { "epoch": 1.4623213776818793, "grad_norm": 0.11346716433763504, "learning_rate": 3.0423065383084702e-05, "loss": 0.3052, "step": 8980 }, { "epoch": 1.4624842242397103, "grad_norm": 0.09991335868835449, "learning_rate": 3.0418441058420366e-05, "loss": 0.3349, "step": 8981 }, { "epoch": 1.462647070797541, "grad_norm": 0.12053681164979935, "learning_rate": 3.041381653921776e-05, "loss": 0.3086, "step": 8982 }, { "epoch": 1.462809917355372, "grad_norm": 0.136042982339859, "learning_rate": 3.0409191825642918e-05, "loss": 0.3352, "step": 8983 }, { "epoch": 1.4629727639132029, "grad_norm": 0.16274124383926392, "learning_rate": 3.0404566917861887e-05, "loss": 0.3026, "step": 8984 }, { "epoch": 1.4631356104710336, "grad_norm": 0.13212843239307404, "learning_rate": 3.0399941816040715e-05, "loss": 0.3213, "step": 8985 }, { "epoch": 1.4632984570288645, "grad_norm": 0.09539274871349335, "learning_rate": 3.0395316520345445e-05, "loss": 0.2893, "step": 8986 }, { "epoch": 1.4634613035866955, "grad_norm": 0.12351198494434357, "learning_rate": 3.039069103094216e-05, "loss": 0.3561, "step": 8987 }, { "epoch": 1.4636241501445264, "grad_norm": 0.21087104082107544, "learning_rate": 3.0386065347996905e-05, "loss": 0.3593, "step": 8988 }, { "epoch": 1.4637869967023571, "grad_norm": 0.0774773508310318, "learning_rate": 3.0381439471675772e-05, "loss": 0.3496, "step": 8989 }, { "epoch": 1.463949843260188, "grad_norm": 0.13342204689979553, "learning_rate": 3.0376813402144832e-05, "loss": 0.3527, "step": 8990 }, { "epoch": 1.464112689818019, "grad_norm": 0.09202911704778671, "learning_rate": 3.0372187139570198e-05, "loss": 0.3394, "step": 8991 }, { "epoch": 1.4642755363758497, "grad_norm": 0.13972198963165283, "learning_rate": 3.0367560684117928e-05, "loss": 0.2986, "step": 8992 }, { "epoch": 1.4644383829336807, "grad_norm": 0.12189723551273346, "learning_rate": 3.0362934035954167e-05, "loss": 0.3113, "step": 8993 }, { "epoch": 1.4646012294915116, "grad_norm": 0.09931502491235733, "learning_rate": 3.0358307195244994e-05, "loss": 0.3256, "step": 8994 }, { "epoch": 1.4647640760493426, "grad_norm": 0.13539038598537445, "learning_rate": 3.035368016215654e-05, "loss": 0.3108, "step": 8995 }, { "epoch": 1.4649269226071735, "grad_norm": 0.07686782628297806, "learning_rate": 3.0349052936854932e-05, "loss": 0.3176, "step": 8996 }, { "epoch": 1.4650897691650042, "grad_norm": 0.10459878295660019, "learning_rate": 3.0344425519506298e-05, "loss": 0.3215, "step": 8997 }, { "epoch": 1.4652526157228352, "grad_norm": 0.11270097643136978, "learning_rate": 3.0339797910276773e-05, "loss": 0.3043, "step": 8998 }, { "epoch": 1.465415462280666, "grad_norm": 0.1338588446378708, "learning_rate": 3.0335170109332507e-05, "loss": 0.3322, "step": 8999 }, { "epoch": 1.4655783088384968, "grad_norm": 0.11062729358673096, "learning_rate": 3.0330542116839645e-05, "loss": 0.3215, "step": 9000 }, { "epoch": 1.4657411553963278, "grad_norm": 0.13859258592128754, "learning_rate": 3.0325913932964354e-05, "loss": 0.3234, "step": 9001 }, { "epoch": 1.4659040019541587, "grad_norm": 0.10896041989326477, "learning_rate": 3.0321285557872792e-05, "loss": 0.3356, "step": 9002 }, { "epoch": 1.4660668485119897, "grad_norm": 0.11151010543107986, "learning_rate": 3.0316656991731135e-05, "loss": 0.2951, "step": 9003 }, { "epoch": 1.4662296950698206, "grad_norm": 0.11753802001476288, "learning_rate": 3.0312028234705563e-05, "loss": 0.2904, "step": 9004 }, { "epoch": 1.4663925416276513, "grad_norm": 0.14291784167289734, "learning_rate": 3.0307399286962268e-05, "loss": 0.3337, "step": 9005 }, { "epoch": 1.4665553881854823, "grad_norm": 0.12848591804504395, "learning_rate": 3.0302770148667437e-05, "loss": 0.3379, "step": 9006 }, { "epoch": 1.466718234743313, "grad_norm": 0.14456845819950104, "learning_rate": 3.029814081998727e-05, "loss": 0.3223, "step": 9007 }, { "epoch": 1.466881081301144, "grad_norm": 0.08268392086029053, "learning_rate": 3.029351130108797e-05, "loss": 0.3064, "step": 9008 }, { "epoch": 1.4670439278589749, "grad_norm": 0.15384335815906525, "learning_rate": 3.0288881592135755e-05, "loss": 0.3129, "step": 9009 }, { "epoch": 1.4672067744168058, "grad_norm": 0.09264634549617767, "learning_rate": 3.0284251693296843e-05, "loss": 0.3339, "step": 9010 }, { "epoch": 1.4673696209746367, "grad_norm": 0.14181190729141235, "learning_rate": 3.0279621604737473e-05, "loss": 0.3259, "step": 9011 }, { "epoch": 1.4675324675324675, "grad_norm": 0.12232425063848495, "learning_rate": 3.027499132662386e-05, "loss": 0.3136, "step": 9012 }, { "epoch": 1.4676953140902984, "grad_norm": 0.1454380750656128, "learning_rate": 3.027036085912226e-05, "loss": 0.3303, "step": 9013 }, { "epoch": 1.4678581606481294, "grad_norm": 0.06803496181964874, "learning_rate": 3.0265730202398905e-05, "loss": 0.292, "step": 9014 }, { "epoch": 1.46802100720596, "grad_norm": 0.19426244497299194, "learning_rate": 3.0261099356620066e-05, "loss": 0.3502, "step": 9015 }, { "epoch": 1.468183853763791, "grad_norm": 0.1146186962723732, "learning_rate": 3.0256468321952e-05, "loss": 0.318, "step": 9016 }, { "epoch": 1.468346700321622, "grad_norm": 0.11038639396429062, "learning_rate": 3.0251837098560966e-05, "loss": 0.2821, "step": 9017 }, { "epoch": 1.468509546879453, "grad_norm": 0.16585767269134521, "learning_rate": 3.024720568661325e-05, "loss": 0.3244, "step": 9018 }, { "epoch": 1.4686723934372838, "grad_norm": 0.09891217201948166, "learning_rate": 3.0242574086275127e-05, "loss": 0.3149, "step": 9019 }, { "epoch": 1.4688352399951146, "grad_norm": 0.08873266726732254, "learning_rate": 3.0237942297712886e-05, "loss": 0.3216, "step": 9020 }, { "epoch": 1.4689980865529455, "grad_norm": 0.11692974716424942, "learning_rate": 3.0233310321092824e-05, "loss": 0.3348, "step": 9021 }, { "epoch": 1.4691609331107764, "grad_norm": 0.10571146011352539, "learning_rate": 3.022867815658124e-05, "loss": 0.3319, "step": 9022 }, { "epoch": 1.4693237796686072, "grad_norm": 0.11948119848966599, "learning_rate": 3.022404580434445e-05, "loss": 0.3145, "step": 9023 }, { "epoch": 1.469486626226438, "grad_norm": 0.11467956751585007, "learning_rate": 3.021941326454876e-05, "loss": 0.3375, "step": 9024 }, { "epoch": 1.469649472784269, "grad_norm": 0.08629411458969116, "learning_rate": 3.02147805373605e-05, "loss": 0.3717, "step": 9025 }, { "epoch": 1.4698123193421, "grad_norm": 0.12231983989477158, "learning_rate": 3.021014762294599e-05, "loss": 0.3208, "step": 9026 }, { "epoch": 1.4699751658999307, "grad_norm": 0.08890609443187714, "learning_rate": 3.0205514521471574e-05, "loss": 0.3166, "step": 9027 }, { "epoch": 1.4701380124577617, "grad_norm": 0.1006978452205658, "learning_rate": 3.0200881233103584e-05, "loss": 0.2941, "step": 9028 }, { "epoch": 1.4703008590155926, "grad_norm": 0.08305292576551437, "learning_rate": 3.0196247758008388e-05, "loss": 0.2862, "step": 9029 }, { "epoch": 1.4704637055734233, "grad_norm": 0.06857553869485855, "learning_rate": 3.0191614096352318e-05, "loss": 0.3153, "step": 9030 }, { "epoch": 1.4706265521312543, "grad_norm": 0.13960251212120056, "learning_rate": 3.0186980248301755e-05, "loss": 0.3227, "step": 9031 }, { "epoch": 1.4707893986890852, "grad_norm": 0.08265836536884308, "learning_rate": 3.0182346214023054e-05, "loss": 0.3061, "step": 9032 }, { "epoch": 1.4709522452469161, "grad_norm": 0.08955000340938568, "learning_rate": 3.01777119936826e-05, "loss": 0.3242, "step": 9033 }, { "epoch": 1.471115091804747, "grad_norm": 0.12511053681373596, "learning_rate": 3.0173077587446773e-05, "loss": 0.3269, "step": 9034 }, { "epoch": 1.4712779383625778, "grad_norm": 0.11941447854042053, "learning_rate": 3.0168442995481962e-05, "loss": 0.3376, "step": 9035 }, { "epoch": 1.4714407849204088, "grad_norm": 0.0892416387796402, "learning_rate": 3.016380821795456e-05, "loss": 0.3113, "step": 9036 }, { "epoch": 1.4716036314782397, "grad_norm": 0.42727160453796387, "learning_rate": 3.0159173255030972e-05, "loss": 0.3855, "step": 9037 }, { "epoch": 1.4717664780360704, "grad_norm": 0.12289924174547195, "learning_rate": 3.0154538106877612e-05, "loss": 0.3137, "step": 9038 }, { "epoch": 1.4719293245939014, "grad_norm": 0.17496074736118317, "learning_rate": 3.0149902773660883e-05, "loss": 0.2937, "step": 9039 }, { "epoch": 1.4720921711517323, "grad_norm": 0.15256349742412567, "learning_rate": 3.0145267255547215e-05, "loss": 0.3429, "step": 9040 }, { "epoch": 1.4722550177095632, "grad_norm": 0.14808492362499237, "learning_rate": 3.0140631552703037e-05, "loss": 0.3256, "step": 9041 }, { "epoch": 1.4724178642673942, "grad_norm": 0.10517065972089767, "learning_rate": 3.0135995665294792e-05, "loss": 0.2988, "step": 9042 }, { "epoch": 1.472580710825225, "grad_norm": 0.15300628542900085, "learning_rate": 3.0131359593488907e-05, "loss": 0.3556, "step": 9043 }, { "epoch": 1.4727435573830558, "grad_norm": 0.11880088597536087, "learning_rate": 3.0126723337451845e-05, "loss": 0.3348, "step": 9044 }, { "epoch": 1.4729064039408866, "grad_norm": 0.11795885115861893, "learning_rate": 3.0122086897350054e-05, "loss": 0.3118, "step": 9045 }, { "epoch": 1.4730692504987175, "grad_norm": 0.1534019261598587, "learning_rate": 3.011745027334999e-05, "loss": 0.3625, "step": 9046 }, { "epoch": 1.4732320970565485, "grad_norm": 0.27373984456062317, "learning_rate": 3.011281346561814e-05, "loss": 0.3961, "step": 9047 }, { "epoch": 1.4733949436143794, "grad_norm": 0.09418148547410965, "learning_rate": 3.010817647432096e-05, "loss": 0.3161, "step": 9048 }, { "epoch": 1.4735577901722103, "grad_norm": 0.1480366736650467, "learning_rate": 3.0103539299624943e-05, "loss": 0.3327, "step": 9049 }, { "epoch": 1.473720636730041, "grad_norm": 0.15109126269817352, "learning_rate": 3.0098901941696574e-05, "loss": 0.3097, "step": 9050 }, { "epoch": 1.473883483287872, "grad_norm": 0.10531148314476013, "learning_rate": 3.0094264400702355e-05, "loss": 0.3369, "step": 9051 }, { "epoch": 1.474046329845703, "grad_norm": 0.1457006335258484, "learning_rate": 3.008962667680877e-05, "loss": 0.3273, "step": 9052 }, { "epoch": 1.4742091764035337, "grad_norm": 0.09764193743467331, "learning_rate": 3.0084988770182347e-05, "loss": 0.3061, "step": 9053 }, { "epoch": 1.4743720229613646, "grad_norm": 0.13102956116199493, "learning_rate": 3.0080350680989588e-05, "loss": 0.3166, "step": 9054 }, { "epoch": 1.4745348695191955, "grad_norm": 0.09423895925283432, "learning_rate": 3.0075712409397017e-05, "loss": 0.3188, "step": 9055 }, { "epoch": 1.4746977160770265, "grad_norm": 0.2209392637014389, "learning_rate": 3.0071073955571167e-05, "loss": 0.3425, "step": 9056 }, { "epoch": 1.4748605626348574, "grad_norm": 0.1244359090924263, "learning_rate": 3.0066435319678565e-05, "loss": 0.3009, "step": 9057 }, { "epoch": 1.4750234091926882, "grad_norm": 0.15823222696781158, "learning_rate": 3.0061796501885757e-05, "loss": 0.3461, "step": 9058 }, { "epoch": 1.475186255750519, "grad_norm": 0.18512937426567078, "learning_rate": 3.005715750235929e-05, "loss": 0.3446, "step": 9059 }, { "epoch": 1.47534910230835, "grad_norm": 0.14952784776687622, "learning_rate": 3.0052518321265716e-05, "loss": 0.3047, "step": 9060 }, { "epoch": 1.4755119488661808, "grad_norm": 0.11161292344331741, "learning_rate": 3.0047878958771593e-05, "loss": 0.3259, "step": 9061 }, { "epoch": 1.4756747954240117, "grad_norm": 0.13954420387744904, "learning_rate": 3.0043239415043502e-05, "loss": 0.3446, "step": 9062 }, { "epoch": 1.4758376419818426, "grad_norm": 0.1453065127134323, "learning_rate": 3.0038599690247994e-05, "loss": 0.3519, "step": 9063 }, { "epoch": 1.4760004885396736, "grad_norm": 0.09592578560113907, "learning_rate": 3.003395978455167e-05, "loss": 0.3286, "step": 9064 }, { "epoch": 1.4761633350975043, "grad_norm": 0.1170257031917572, "learning_rate": 3.00293196981211e-05, "loss": 0.3389, "step": 9065 }, { "epoch": 1.4763261816553352, "grad_norm": 0.08527153730392456, "learning_rate": 3.0024679431122887e-05, "loss": 0.3048, "step": 9066 }, { "epoch": 1.4764890282131662, "grad_norm": 0.1276351809501648, "learning_rate": 3.0020038983723636e-05, "loss": 0.3228, "step": 9067 }, { "epoch": 1.476651874770997, "grad_norm": 0.13134612143039703, "learning_rate": 3.0015398356089936e-05, "loss": 0.291, "step": 9068 }, { "epoch": 1.4768147213288279, "grad_norm": 0.07159581780433655, "learning_rate": 3.001075754838841e-05, "loss": 0.2966, "step": 9069 }, { "epoch": 1.4769775678866588, "grad_norm": 0.17437522113323212, "learning_rate": 3.000611656078568e-05, "loss": 0.3115, "step": 9070 }, { "epoch": 1.4771404144444897, "grad_norm": 0.14216680824756622, "learning_rate": 3.0001475393448365e-05, "loss": 0.3235, "step": 9071 }, { "epoch": 1.4773032610023207, "grad_norm": 0.08998030424118042, "learning_rate": 2.9996834046543092e-05, "loss": 0.3132, "step": 9072 }, { "epoch": 1.4774661075601514, "grad_norm": 0.13491356372833252, "learning_rate": 2.999219252023652e-05, "loss": 0.3564, "step": 9073 }, { "epoch": 1.4776289541179823, "grad_norm": 0.09640717506408691, "learning_rate": 2.9987550814695263e-05, "loss": 0.2878, "step": 9074 }, { "epoch": 1.4777918006758133, "grad_norm": 0.15387201309204102, "learning_rate": 2.9982908930085997e-05, "loss": 0.3436, "step": 9075 }, { "epoch": 1.477954647233644, "grad_norm": 0.12359809875488281, "learning_rate": 2.997826686657537e-05, "loss": 0.3008, "step": 9076 }, { "epoch": 1.478117493791475, "grad_norm": 0.0941147729754448, "learning_rate": 2.9973624624330048e-05, "loss": 0.3373, "step": 9077 }, { "epoch": 1.4782803403493059, "grad_norm": 0.10608968138694763, "learning_rate": 2.99689822035167e-05, "loss": 0.2954, "step": 9078 }, { "epoch": 1.4784431869071368, "grad_norm": 0.08788453042507172, "learning_rate": 2.9964339604302006e-05, "loss": 0.3315, "step": 9079 }, { "epoch": 1.4786060334649678, "grad_norm": 0.08494104444980621, "learning_rate": 2.995969682685264e-05, "loss": 0.3759, "step": 9080 }, { "epoch": 1.4787688800227985, "grad_norm": 0.12547820806503296, "learning_rate": 2.9955053871335297e-05, "loss": 0.3381, "step": 9081 }, { "epoch": 1.4789317265806294, "grad_norm": 0.11871542781591415, "learning_rate": 2.9950410737916683e-05, "loss": 0.3029, "step": 9082 }, { "epoch": 1.4790945731384602, "grad_norm": 0.11177239567041397, "learning_rate": 2.9945767426763487e-05, "loss": 0.3253, "step": 9083 }, { "epoch": 1.479257419696291, "grad_norm": 0.12521959841251373, "learning_rate": 2.9941123938042433e-05, "loss": 0.3207, "step": 9084 }, { "epoch": 1.479420266254122, "grad_norm": 0.0791482925415039, "learning_rate": 2.9936480271920208e-05, "loss": 0.2849, "step": 9085 }, { "epoch": 1.479583112811953, "grad_norm": 0.1299554854631424, "learning_rate": 2.993183642856356e-05, "loss": 0.3071, "step": 9086 }, { "epoch": 1.479745959369784, "grad_norm": 0.07861040532588959, "learning_rate": 2.992719240813921e-05, "loss": 0.3106, "step": 9087 }, { "epoch": 1.4799088059276146, "grad_norm": 0.18843115866184235, "learning_rate": 2.9922548210813888e-05, "loss": 0.3569, "step": 9088 }, { "epoch": 1.4800716524854456, "grad_norm": 0.19294865429401398, "learning_rate": 2.991790383675434e-05, "loss": 0.357, "step": 9089 }, { "epoch": 1.4802344990432765, "grad_norm": 0.14881591498851776, "learning_rate": 2.9913259286127304e-05, "loss": 0.3168, "step": 9090 }, { "epoch": 1.4803973456011073, "grad_norm": 0.11726152896881104, "learning_rate": 2.9908614559099545e-05, "loss": 0.331, "step": 9091 }, { "epoch": 1.4805601921589382, "grad_norm": 0.10250712931156158, "learning_rate": 2.990396965583781e-05, "loss": 0.349, "step": 9092 }, { "epoch": 1.4807230387167691, "grad_norm": 0.1273154467344284, "learning_rate": 2.9899324576508882e-05, "loss": 0.3263, "step": 9093 }, { "epoch": 1.4808858852746, "grad_norm": 0.12746211886405945, "learning_rate": 2.989467932127951e-05, "loss": 0.2909, "step": 9094 }, { "epoch": 1.481048731832431, "grad_norm": 0.08733245730400085, "learning_rate": 2.9890033890316494e-05, "loss": 0.3027, "step": 9095 }, { "epoch": 1.4812115783902617, "grad_norm": 0.13296842575073242, "learning_rate": 2.988538828378661e-05, "loss": 0.2905, "step": 9096 }, { "epoch": 1.4813744249480927, "grad_norm": 0.09205017983913422, "learning_rate": 2.988074250185664e-05, "loss": 0.3229, "step": 9097 }, { "epoch": 1.4815372715059236, "grad_norm": 0.25111034512519836, "learning_rate": 2.98760965446934e-05, "loss": 0.332, "step": 9098 }, { "epoch": 1.4817001180637543, "grad_norm": 0.10975803434848785, "learning_rate": 2.9871450412463686e-05, "loss": 0.2952, "step": 9099 }, { "epoch": 1.4818629646215853, "grad_norm": 0.07894378155469894, "learning_rate": 2.98668041053343e-05, "loss": 0.3409, "step": 9100 }, { "epoch": 1.4820258111794162, "grad_norm": 0.06881628930568695, "learning_rate": 2.986215762347207e-05, "loss": 0.342, "step": 9101 }, { "epoch": 1.4821886577372472, "grad_norm": 0.11221130937337875, "learning_rate": 2.985751096704382e-05, "loss": 0.3222, "step": 9102 }, { "epoch": 1.482351504295078, "grad_norm": 0.11905089765787125, "learning_rate": 2.9852864136216364e-05, "loss": 0.2834, "step": 9103 }, { "epoch": 1.4825143508529088, "grad_norm": 0.11294862627983093, "learning_rate": 2.9848217131156546e-05, "loss": 0.2967, "step": 9104 }, { "epoch": 1.4826771974107398, "grad_norm": 0.1503704935312271, "learning_rate": 2.9843569952031208e-05, "loss": 0.3301, "step": 9105 }, { "epoch": 1.4828400439685705, "grad_norm": 0.08872713148593903, "learning_rate": 2.9838922599007203e-05, "loss": 0.2791, "step": 9106 }, { "epoch": 1.4830028905264014, "grad_norm": 0.14328154921531677, "learning_rate": 2.9834275072251373e-05, "loss": 0.2963, "step": 9107 }, { "epoch": 1.4831657370842324, "grad_norm": 0.0749233290553093, "learning_rate": 2.9829627371930585e-05, "loss": 0.3203, "step": 9108 }, { "epoch": 1.4833285836420633, "grad_norm": 0.143926203250885, "learning_rate": 2.9824979498211707e-05, "loss": 0.3239, "step": 9109 }, { "epoch": 1.4834914301998943, "grad_norm": 0.10038807988166809, "learning_rate": 2.9820331451261608e-05, "loss": 0.3202, "step": 9110 }, { "epoch": 1.483654276757725, "grad_norm": 0.07224787771701813, "learning_rate": 2.9815683231247167e-05, "loss": 0.3165, "step": 9111 }, { "epoch": 1.483817123315556, "grad_norm": 0.13931992650032043, "learning_rate": 2.9811034838335267e-05, "loss": 0.3203, "step": 9112 }, { "epoch": 1.4839799698733869, "grad_norm": 0.12056708335876465, "learning_rate": 2.980638627269281e-05, "loss": 0.3062, "step": 9113 }, { "epoch": 1.4841428164312176, "grad_norm": 0.08537415415048599, "learning_rate": 2.9801737534486678e-05, "loss": 0.3317, "step": 9114 }, { "epoch": 1.4843056629890485, "grad_norm": 0.1142449751496315, "learning_rate": 2.9797088623883784e-05, "loss": 0.3079, "step": 9115 }, { "epoch": 1.4844685095468795, "grad_norm": 0.15908749401569366, "learning_rate": 2.9792439541051036e-05, "loss": 0.3161, "step": 9116 }, { "epoch": 1.4846313561047104, "grad_norm": 0.0781586617231369, "learning_rate": 2.978779028615535e-05, "loss": 0.311, "step": 9117 }, { "epoch": 1.4847942026625411, "grad_norm": 0.15868863463401794, "learning_rate": 2.9783140859363645e-05, "loss": 0.3467, "step": 9118 }, { "epoch": 1.484957049220372, "grad_norm": 0.13203641772270203, "learning_rate": 2.9778491260842856e-05, "loss": 0.3049, "step": 9119 }, { "epoch": 1.485119895778203, "grad_norm": 0.11151566356420517, "learning_rate": 2.9773841490759914e-05, "loss": 0.323, "step": 9120 }, { "epoch": 1.4852827423360337, "grad_norm": 0.09527257829904556, "learning_rate": 2.9769191549281762e-05, "loss": 0.3394, "step": 9121 }, { "epoch": 1.4854455888938647, "grad_norm": 0.0920921191573143, "learning_rate": 2.9764541436575345e-05, "loss": 0.3471, "step": 9122 }, { "epoch": 1.4856084354516956, "grad_norm": 0.09299101680517197, "learning_rate": 2.9759891152807605e-05, "loss": 0.2984, "step": 9123 }, { "epoch": 1.4857712820095266, "grad_norm": 0.17821462452411652, "learning_rate": 2.9755240698145526e-05, "loss": 0.3436, "step": 9124 }, { "epoch": 1.4859341285673575, "grad_norm": 0.13403046131134033, "learning_rate": 2.9750590072756046e-05, "loss": 0.3181, "step": 9125 }, { "epoch": 1.4860969751251882, "grad_norm": 0.12672211229801178, "learning_rate": 2.9745939276806155e-05, "loss": 0.332, "step": 9126 }, { "epoch": 1.4862598216830192, "grad_norm": 0.060117386281490326, "learning_rate": 2.9741288310462827e-05, "loss": 0.3119, "step": 9127 }, { "epoch": 1.4864226682408501, "grad_norm": 0.18264077603816986, "learning_rate": 2.973663717389304e-05, "loss": 0.3221, "step": 9128 }, { "epoch": 1.4865855147986808, "grad_norm": 0.12637506425380707, "learning_rate": 2.9731985867263795e-05, "loss": 0.3033, "step": 9129 }, { "epoch": 1.4867483613565118, "grad_norm": 0.12016801536083221, "learning_rate": 2.9727334390742067e-05, "loss": 0.3088, "step": 9130 }, { "epoch": 1.4869112079143427, "grad_norm": 0.07234539836645126, "learning_rate": 2.9722682744494883e-05, "loss": 0.3049, "step": 9131 }, { "epoch": 1.4870740544721737, "grad_norm": 0.09690212458372116, "learning_rate": 2.9718030928689232e-05, "loss": 0.3228, "step": 9132 }, { "epoch": 1.4872369010300046, "grad_norm": 0.14677613973617554, "learning_rate": 2.9713378943492137e-05, "loss": 0.3172, "step": 9133 }, { "epoch": 1.4873997475878353, "grad_norm": 0.09095029532909393, "learning_rate": 2.970872678907062e-05, "loss": 0.3558, "step": 9134 }, { "epoch": 1.4875625941456663, "grad_norm": 0.1932687908411026, "learning_rate": 2.970407446559169e-05, "loss": 0.3459, "step": 9135 }, { "epoch": 1.487725440703497, "grad_norm": 0.09430133551359177, "learning_rate": 2.9699421973222408e-05, "loss": 0.3267, "step": 9136 }, { "epoch": 1.487888287261328, "grad_norm": 0.16990071535110474, "learning_rate": 2.969476931212979e-05, "loss": 0.3095, "step": 9137 }, { "epoch": 1.4880511338191589, "grad_norm": 0.09951753914356232, "learning_rate": 2.9690116482480883e-05, "loss": 0.3378, "step": 9138 }, { "epoch": 1.4882139803769898, "grad_norm": 0.11146020144224167, "learning_rate": 2.9685463484442748e-05, "loss": 0.3478, "step": 9139 }, { "epoch": 1.4883768269348208, "grad_norm": 0.09671079367399216, "learning_rate": 2.968081031818244e-05, "loss": 0.352, "step": 9140 }, { "epoch": 1.4885396734926515, "grad_norm": 0.1148214340209961, "learning_rate": 2.967615698386701e-05, "loss": 0.3261, "step": 9141 }, { "epoch": 1.4887025200504824, "grad_norm": 0.10283287614583969, "learning_rate": 2.9671503481663544e-05, "loss": 0.3425, "step": 9142 }, { "epoch": 1.4888653666083134, "grad_norm": 0.14491061866283417, "learning_rate": 2.9666849811739096e-05, "loss": 0.3408, "step": 9143 }, { "epoch": 1.489028213166144, "grad_norm": 0.12916864454746246, "learning_rate": 2.9662195974260764e-05, "loss": 0.3583, "step": 9144 }, { "epoch": 1.489191059723975, "grad_norm": 0.11489920318126678, "learning_rate": 2.965754196939563e-05, "loss": 0.3261, "step": 9145 }, { "epoch": 1.489353906281806, "grad_norm": 0.07318481057882309, "learning_rate": 2.965288779731078e-05, "loss": 0.3263, "step": 9146 }, { "epoch": 1.489516752839637, "grad_norm": 0.14971554279327393, "learning_rate": 2.964823345817332e-05, "loss": 0.3468, "step": 9147 }, { "epoch": 1.4896795993974679, "grad_norm": 0.16851669549942017, "learning_rate": 2.9643578952150348e-05, "loss": 0.3325, "step": 9148 }, { "epoch": 1.4898424459552986, "grad_norm": 0.11984327435493469, "learning_rate": 2.9638924279408987e-05, "loss": 0.3265, "step": 9149 }, { "epoch": 1.4900052925131295, "grad_norm": 0.14836108684539795, "learning_rate": 2.9634269440116334e-05, "loss": 0.3236, "step": 9150 }, { "epoch": 1.4901681390709605, "grad_norm": 0.10158023983240128, "learning_rate": 2.962961443443954e-05, "loss": 0.3031, "step": 9151 }, { "epoch": 1.4903309856287912, "grad_norm": 0.1251240223646164, "learning_rate": 2.9624959262545705e-05, "loss": 0.3088, "step": 9152 }, { "epoch": 1.4904938321866221, "grad_norm": 0.12103909254074097, "learning_rate": 2.962030392460198e-05, "loss": 0.3043, "step": 9153 }, { "epoch": 1.490656678744453, "grad_norm": 0.09688921272754669, "learning_rate": 2.9615648420775498e-05, "loss": 0.3187, "step": 9154 }, { "epoch": 1.490819525302284, "grad_norm": 0.11091458797454834, "learning_rate": 2.961099275123341e-05, "loss": 0.3592, "step": 9155 }, { "epoch": 1.4909823718601147, "grad_norm": 0.14400199055671692, "learning_rate": 2.9606336916142863e-05, "loss": 0.3124, "step": 9156 }, { "epoch": 1.4911452184179457, "grad_norm": 0.06560143083333969, "learning_rate": 2.9601680915671032e-05, "loss": 0.3396, "step": 9157 }, { "epoch": 1.4913080649757766, "grad_norm": 0.0829969197511673, "learning_rate": 2.959702474998506e-05, "loss": 0.2989, "step": 9158 }, { "epoch": 1.4914709115336073, "grad_norm": 0.11360305547714233, "learning_rate": 2.9592368419252124e-05, "loss": 0.328, "step": 9159 }, { "epoch": 1.4916337580914383, "grad_norm": 0.09269708395004272, "learning_rate": 2.958771192363941e-05, "loss": 0.3316, "step": 9160 }, { "epoch": 1.4917966046492692, "grad_norm": 0.09843464940786362, "learning_rate": 2.958305526331409e-05, "loss": 0.3155, "step": 9161 }, { "epoch": 1.4919594512071002, "grad_norm": 0.08368799090385437, "learning_rate": 2.9578398438443355e-05, "loss": 0.3077, "step": 9162 }, { "epoch": 1.492122297764931, "grad_norm": 0.07290221750736237, "learning_rate": 2.9573741449194396e-05, "loss": 0.3105, "step": 9163 }, { "epoch": 1.4922851443227618, "grad_norm": 0.11703277379274368, "learning_rate": 2.9569084295734422e-05, "loss": 0.3226, "step": 9164 }, { "epoch": 1.4924479908805928, "grad_norm": 0.07581275701522827, "learning_rate": 2.956442697823063e-05, "loss": 0.3293, "step": 9165 }, { "epoch": 1.4926108374384237, "grad_norm": 0.29870548844337463, "learning_rate": 2.955976949685023e-05, "loss": 0.3293, "step": 9166 }, { "epoch": 1.4927736839962544, "grad_norm": 0.166034996509552, "learning_rate": 2.9555111851760446e-05, "loss": 0.334, "step": 9167 }, { "epoch": 1.4929365305540854, "grad_norm": 0.17333944141864777, "learning_rate": 2.95504540431285e-05, "loss": 0.3399, "step": 9168 }, { "epoch": 1.4930993771119163, "grad_norm": 0.14420637488365173, "learning_rate": 2.9545796071121623e-05, "loss": 0.3133, "step": 9169 }, { "epoch": 1.4932622236697473, "grad_norm": 0.13249504566192627, "learning_rate": 2.9541137935907044e-05, "loss": 0.3477, "step": 9170 }, { "epoch": 1.4934250702275782, "grad_norm": 0.09951229393482208, "learning_rate": 2.953647963765201e-05, "loss": 0.3253, "step": 9171 }, { "epoch": 1.493587916785409, "grad_norm": 0.09592199325561523, "learning_rate": 2.953182117652376e-05, "loss": 0.3303, "step": 9172 }, { "epoch": 1.4937507633432399, "grad_norm": 0.14638930559158325, "learning_rate": 2.9527162552689557e-05, "loss": 0.368, "step": 9173 }, { "epoch": 1.4939136099010706, "grad_norm": 0.1561962366104126, "learning_rate": 2.952250376631665e-05, "loss": 0.3465, "step": 9174 }, { "epoch": 1.4940764564589015, "grad_norm": 0.13159742951393127, "learning_rate": 2.951784481757231e-05, "loss": 0.2796, "step": 9175 }, { "epoch": 1.4942393030167325, "grad_norm": 0.09328852593898773, "learning_rate": 2.9513185706623802e-05, "loss": 0.3476, "step": 9176 }, { "epoch": 1.4944021495745634, "grad_norm": 0.1302054077386856, "learning_rate": 2.9508526433638417e-05, "loss": 0.3483, "step": 9177 }, { "epoch": 1.4945649961323944, "grad_norm": 0.1024334728717804, "learning_rate": 2.9503866998783414e-05, "loss": 0.319, "step": 9178 }, { "epoch": 1.494727842690225, "grad_norm": 0.09596119076013565, "learning_rate": 2.9499207402226093e-05, "loss": 0.3033, "step": 9179 }, { "epoch": 1.494890689248056, "grad_norm": 0.1349383145570755, "learning_rate": 2.9494547644133752e-05, "loss": 0.3609, "step": 9180 }, { "epoch": 1.495053535805887, "grad_norm": 0.11796808987855911, "learning_rate": 2.9489887724673676e-05, "loss": 0.3215, "step": 9181 }, { "epoch": 1.4952163823637177, "grad_norm": 0.13412721455097198, "learning_rate": 2.9485227644013184e-05, "loss": 0.2944, "step": 9182 }, { "epoch": 1.4953792289215486, "grad_norm": 0.15763500332832336, "learning_rate": 2.9480567402319583e-05, "loss": 0.3088, "step": 9183 }, { "epoch": 1.4955420754793796, "grad_norm": 0.13482646644115448, "learning_rate": 2.9475906999760187e-05, "loss": 0.3154, "step": 9184 }, { "epoch": 1.4957049220372105, "grad_norm": 0.09408082067966461, "learning_rate": 2.947124643650232e-05, "loss": 0.311, "step": 9185 }, { "epoch": 1.4958677685950414, "grad_norm": 0.1258539855480194, "learning_rate": 2.9466585712713314e-05, "loss": 0.3303, "step": 9186 }, { "epoch": 1.4960306151528722, "grad_norm": 0.1409103125333786, "learning_rate": 2.946192482856049e-05, "loss": 0.3424, "step": 9187 }, { "epoch": 1.496193461710703, "grad_norm": 0.11765255033969879, "learning_rate": 2.94572637842112e-05, "loss": 0.3245, "step": 9188 }, { "epoch": 1.496356308268534, "grad_norm": 0.10323627293109894, "learning_rate": 2.9452602579832784e-05, "loss": 0.2985, "step": 9189 }, { "epoch": 1.4965191548263648, "grad_norm": 0.1029001846909523, "learning_rate": 2.9447941215592594e-05, "loss": 0.2961, "step": 9190 }, { "epoch": 1.4966820013841957, "grad_norm": 0.1262088418006897, "learning_rate": 2.9443279691657993e-05, "loss": 0.3498, "step": 9191 }, { "epoch": 1.4968448479420267, "grad_norm": 0.11575964093208313, "learning_rate": 2.9438618008196334e-05, "loss": 0.3208, "step": 9192 }, { "epoch": 1.4970076944998576, "grad_norm": 0.09390699118375778, "learning_rate": 2.9433956165374993e-05, "loss": 0.3439, "step": 9193 }, { "epoch": 1.4971705410576883, "grad_norm": 0.11464785039424896, "learning_rate": 2.9429294163361342e-05, "loss": 0.2773, "step": 9194 }, { "epoch": 1.4973333876155193, "grad_norm": 0.08806897699832916, "learning_rate": 2.9424632002322754e-05, "loss": 0.3259, "step": 9195 }, { "epoch": 1.4974962341733502, "grad_norm": 0.12448421120643616, "learning_rate": 2.9419969682426624e-05, "loss": 0.3224, "step": 9196 }, { "epoch": 1.497659080731181, "grad_norm": 0.12600365281105042, "learning_rate": 2.941530720384034e-05, "loss": 0.3298, "step": 9197 }, { "epoch": 1.4978219272890119, "grad_norm": 0.10896312445402145, "learning_rate": 2.94106445667313e-05, "loss": 0.2996, "step": 9198 }, { "epoch": 1.4979847738468428, "grad_norm": 0.0947503000497818, "learning_rate": 2.9405981771266905e-05, "loss": 0.3445, "step": 9199 }, { "epoch": 1.4981476204046738, "grad_norm": 0.23730027675628662, "learning_rate": 2.9401318817614566e-05, "loss": 0.3252, "step": 9200 }, { "epoch": 1.4983104669625047, "grad_norm": 0.14276014268398285, "learning_rate": 2.939665570594169e-05, "loss": 0.3421, "step": 9201 }, { "epoch": 1.4984733135203354, "grad_norm": 0.12953977286815643, "learning_rate": 2.9391992436415705e-05, "loss": 0.3108, "step": 9202 }, { "epoch": 1.4986361600781664, "grad_norm": 0.1124650090932846, "learning_rate": 2.9387329009204028e-05, "loss": 0.3214, "step": 9203 }, { "epoch": 1.4987990066359973, "grad_norm": 0.12221943587064743, "learning_rate": 2.9382665424474105e-05, "loss": 0.3208, "step": 9204 }, { "epoch": 1.498961853193828, "grad_norm": 0.12916415929794312, "learning_rate": 2.9378001682393353e-05, "loss": 0.3441, "step": 9205 }, { "epoch": 1.499124699751659, "grad_norm": 0.11397609114646912, "learning_rate": 2.9373337783129228e-05, "loss": 0.3189, "step": 9206 }, { "epoch": 1.49928754630949, "grad_norm": 0.10345420986413956, "learning_rate": 2.9368673726849172e-05, "loss": 0.3398, "step": 9207 }, { "epoch": 1.4994503928673208, "grad_norm": 0.16286085546016693, "learning_rate": 2.9364009513720638e-05, "loss": 0.3829, "step": 9208 }, { "epoch": 1.4996132394251518, "grad_norm": 0.14398762583732605, "learning_rate": 2.9359345143911093e-05, "loss": 0.363, "step": 9209 }, { "epoch": 1.4997760859829825, "grad_norm": 0.10147196054458618, "learning_rate": 2.9354680617587992e-05, "loss": 0.3013, "step": 9210 }, { "epoch": 1.4999389325408135, "grad_norm": 0.11758329719305038, "learning_rate": 2.9350015934918812e-05, "loss": 0.3028, "step": 9211 }, { "epoch": 1.5001017790986442, "grad_norm": 0.10433077067136765, "learning_rate": 2.9345351096071027e-05, "loss": 0.3111, "step": 9212 }, { "epoch": 1.5002646256564751, "grad_norm": 0.09258101135492325, "learning_rate": 2.9340686101212113e-05, "loss": 0.3371, "step": 9213 }, { "epoch": 1.500427472214306, "grad_norm": 0.15096113085746765, "learning_rate": 2.9336020950509575e-05, "loss": 0.3341, "step": 9214 }, { "epoch": 1.500590318772137, "grad_norm": 0.11229538917541504, "learning_rate": 2.9331355644130882e-05, "loss": 0.2936, "step": 9215 }, { "epoch": 1.500753165329968, "grad_norm": 0.10711856931447983, "learning_rate": 2.9326690182243544e-05, "loss": 0.329, "step": 9216 }, { "epoch": 1.5009160118877989, "grad_norm": 0.10658066719770432, "learning_rate": 2.9322024565015076e-05, "loss": 0.3389, "step": 9217 }, { "epoch": 1.5010788584456296, "grad_norm": 0.07300041615962982, "learning_rate": 2.931735879261297e-05, "loss": 0.3685, "step": 9218 }, { "epoch": 1.5012417050034603, "grad_norm": 0.136072039604187, "learning_rate": 2.9312692865204744e-05, "loss": 0.2624, "step": 9219 }, { "epoch": 1.5014045515612913, "grad_norm": 0.15090437233448029, "learning_rate": 2.9308026782957933e-05, "loss": 0.31, "step": 9220 }, { "epoch": 1.5015673981191222, "grad_norm": 0.1342563033103943, "learning_rate": 2.930336054604005e-05, "loss": 0.3309, "step": 9221 }, { "epoch": 1.5017302446769532, "grad_norm": 0.21971480548381805, "learning_rate": 2.929869415461863e-05, "loss": 0.3745, "step": 9222 }, { "epoch": 1.501893091234784, "grad_norm": 0.07489597797393799, "learning_rate": 2.9294027608861206e-05, "loss": 0.3154, "step": 9223 }, { "epoch": 1.502055937792615, "grad_norm": 0.12599007785320282, "learning_rate": 2.9289360908935333e-05, "loss": 0.3396, "step": 9224 }, { "epoch": 1.5022187843504458, "grad_norm": 0.11149067431688309, "learning_rate": 2.9284694055008543e-05, "loss": 0.3152, "step": 9225 }, { "epoch": 1.5023816309082767, "grad_norm": 0.13129086792469025, "learning_rate": 2.928002704724841e-05, "loss": 0.3154, "step": 9226 }, { "epoch": 1.5025444774661074, "grad_norm": 0.09790923446416855, "learning_rate": 2.9275359885822474e-05, "loss": 0.3302, "step": 9227 }, { "epoch": 1.5027073240239384, "grad_norm": 0.15280407667160034, "learning_rate": 2.927069257089831e-05, "loss": 0.3468, "step": 9228 }, { "epoch": 1.5028701705817693, "grad_norm": 0.1403975635766983, "learning_rate": 2.9266025102643492e-05, "loss": 0.3117, "step": 9229 }, { "epoch": 1.5030330171396002, "grad_norm": 0.1331418752670288, "learning_rate": 2.9261357481225587e-05, "loss": 0.3174, "step": 9230 }, { "epoch": 1.5031958636974312, "grad_norm": 0.13827696442604065, "learning_rate": 2.9256689706812186e-05, "loss": 0.3698, "step": 9231 }, { "epoch": 1.5033587102552621, "grad_norm": 0.10129844397306442, "learning_rate": 2.9252021779570864e-05, "loss": 0.3169, "step": 9232 }, { "epoch": 1.5035215568130929, "grad_norm": 0.13658447563648224, "learning_rate": 2.9247353699669223e-05, "loss": 0.3093, "step": 9233 }, { "epoch": 1.5036844033709238, "grad_norm": 0.1515842229127884, "learning_rate": 2.9242685467274866e-05, "loss": 0.296, "step": 9234 }, { "epoch": 1.5038472499287545, "grad_norm": 0.08373717963695526, "learning_rate": 2.923801708255538e-05, "loss": 0.3008, "step": 9235 }, { "epoch": 1.5040100964865855, "grad_norm": 0.1837514489889145, "learning_rate": 2.9233348545678386e-05, "loss": 0.3205, "step": 9236 }, { "epoch": 1.5041729430444164, "grad_norm": 0.12413095682859421, "learning_rate": 2.9228679856811498e-05, "loss": 0.3349, "step": 9237 }, { "epoch": 1.5043357896022473, "grad_norm": 0.12150156497955322, "learning_rate": 2.9224011016122334e-05, "loss": 0.3317, "step": 9238 }, { "epoch": 1.5044986361600783, "grad_norm": 0.1293516606092453, "learning_rate": 2.9219342023778513e-05, "loss": 0.3575, "step": 9239 }, { "epoch": 1.504661482717909, "grad_norm": 0.08052036166191101, "learning_rate": 2.921467287994768e-05, "loss": 0.3482, "step": 9240 }, { "epoch": 1.50482432927574, "grad_norm": 0.12256284803152084, "learning_rate": 2.9210003584797457e-05, "loss": 0.3608, "step": 9241 }, { "epoch": 1.5049871758335707, "grad_norm": 0.13856445252895355, "learning_rate": 2.9205334138495494e-05, "loss": 0.3096, "step": 9242 }, { "epoch": 1.5051500223914016, "grad_norm": 0.09049411118030548, "learning_rate": 2.920066454120944e-05, "loss": 0.3406, "step": 9243 }, { "epoch": 1.5053128689492326, "grad_norm": 0.07562018185853958, "learning_rate": 2.919599479310694e-05, "loss": 0.2996, "step": 9244 }, { "epoch": 1.5054757155070635, "grad_norm": 0.07487229257822037, "learning_rate": 2.9191324894355653e-05, "loss": 0.3221, "step": 9245 }, { "epoch": 1.5056385620648944, "grad_norm": 0.1020907387137413, "learning_rate": 2.918665484512325e-05, "loss": 0.3328, "step": 9246 }, { "epoch": 1.5058014086227254, "grad_norm": 0.12994346022605896, "learning_rate": 2.9181984645577388e-05, "loss": 0.3288, "step": 9247 }, { "epoch": 1.505964255180556, "grad_norm": 0.14850632846355438, "learning_rate": 2.917731429588575e-05, "loss": 0.3419, "step": 9248 }, { "epoch": 1.506127101738387, "grad_norm": 0.10621239244937897, "learning_rate": 2.9172643796216015e-05, "loss": 0.2815, "step": 9249 }, { "epoch": 1.5062899482962178, "grad_norm": 0.15716996788978577, "learning_rate": 2.916797314673586e-05, "loss": 0.352, "step": 9250 }, { "epoch": 1.5064527948540487, "grad_norm": 0.13605904579162598, "learning_rate": 2.9163302347612992e-05, "loss": 0.3414, "step": 9251 }, { "epoch": 1.5066156414118796, "grad_norm": 0.09778225421905518, "learning_rate": 2.915863139901509e-05, "loss": 0.2991, "step": 9252 }, { "epoch": 1.5067784879697106, "grad_norm": 0.09216982126235962, "learning_rate": 2.915396030110986e-05, "loss": 0.3183, "step": 9253 }, { "epoch": 1.5069413345275415, "grad_norm": 0.12077051401138306, "learning_rate": 2.914928905406501e-05, "loss": 0.3093, "step": 9254 }, { "epoch": 1.5071041810853725, "grad_norm": 0.10945329815149307, "learning_rate": 2.9144617658048257e-05, "loss": 0.3275, "step": 9255 }, { "epoch": 1.5072670276432032, "grad_norm": 0.10028288513422012, "learning_rate": 2.9139946113227307e-05, "loss": 0.3426, "step": 9256 }, { "epoch": 1.507429874201034, "grad_norm": 0.21025624871253967, "learning_rate": 2.9135274419769885e-05, "loss": 0.3818, "step": 9257 }, { "epoch": 1.5075927207588649, "grad_norm": 0.11728862673044205, "learning_rate": 2.913060257784373e-05, "loss": 0.3308, "step": 9258 }, { "epoch": 1.5077555673166958, "grad_norm": 0.12222476303577423, "learning_rate": 2.9125930587616558e-05, "loss": 0.3301, "step": 9259 }, { "epoch": 1.5079184138745267, "grad_norm": 0.15053009986877441, "learning_rate": 2.9121258449256128e-05, "loss": 0.3704, "step": 9260 }, { "epoch": 1.5080812604323577, "grad_norm": 0.09132813662290573, "learning_rate": 2.911658616293016e-05, "loss": 0.2987, "step": 9261 }, { "epoch": 1.5082441069901886, "grad_norm": 0.13276918232440948, "learning_rate": 2.9111913728806423e-05, "loss": 0.3152, "step": 9262 }, { "epoch": 1.5084069535480193, "grad_norm": 0.14360737800598145, "learning_rate": 2.9107241147052662e-05, "loss": 0.3726, "step": 9263 }, { "epoch": 1.5085698001058503, "grad_norm": 0.11580207943916321, "learning_rate": 2.910256841783664e-05, "loss": 0.374, "step": 9264 }, { "epoch": 1.508732646663681, "grad_norm": 0.16282109916210175, "learning_rate": 2.9097895541326113e-05, "loss": 0.3597, "step": 9265 }, { "epoch": 1.508895493221512, "grad_norm": 0.13946770131587982, "learning_rate": 2.9093222517688866e-05, "loss": 0.3039, "step": 9266 }, { "epoch": 1.509058339779343, "grad_norm": 0.13464713096618652, "learning_rate": 2.9088549347092664e-05, "loss": 0.307, "step": 9267 }, { "epoch": 1.5092211863371738, "grad_norm": 0.12447454780340195, "learning_rate": 2.9083876029705292e-05, "loss": 0.3257, "step": 9268 }, { "epoch": 1.5093840328950048, "grad_norm": 0.12169163674116135, "learning_rate": 2.907920256569453e-05, "loss": 0.3103, "step": 9269 }, { "epoch": 1.5095468794528357, "grad_norm": 0.15429018437862396, "learning_rate": 2.9074528955228176e-05, "loss": 0.3318, "step": 9270 }, { "epoch": 1.5097097260106664, "grad_norm": 0.11070756614208221, "learning_rate": 2.9069855198474027e-05, "loss": 0.2974, "step": 9271 }, { "epoch": 1.5098725725684974, "grad_norm": 0.12235776335000992, "learning_rate": 2.9065181295599884e-05, "loss": 0.2954, "step": 9272 }, { "epoch": 1.510035419126328, "grad_norm": 0.1021125316619873, "learning_rate": 2.906050724677355e-05, "loss": 0.3323, "step": 9273 }, { "epoch": 1.510198265684159, "grad_norm": 0.10645793378353119, "learning_rate": 2.905583305216284e-05, "loss": 0.3311, "step": 9274 }, { "epoch": 1.51036111224199, "grad_norm": 0.1009112298488617, "learning_rate": 2.9051158711935572e-05, "loss": 0.307, "step": 9275 }, { "epoch": 1.510523958799821, "grad_norm": 0.16649582982063293, "learning_rate": 2.904648422625957e-05, "loss": 0.3392, "step": 9276 }, { "epoch": 1.5106868053576519, "grad_norm": 0.10789214074611664, "learning_rate": 2.9041809595302665e-05, "loss": 0.2985, "step": 9277 }, { "epoch": 1.5108496519154826, "grad_norm": 0.14557228982448578, "learning_rate": 2.9037134819232682e-05, "loss": 0.3372, "step": 9278 }, { "epoch": 1.5110124984733135, "grad_norm": 0.13765597343444824, "learning_rate": 2.9032459898217456e-05, "loss": 0.3098, "step": 9279 }, { "epoch": 1.5111753450311443, "grad_norm": 0.08423464745283127, "learning_rate": 2.9027784832424847e-05, "loss": 0.279, "step": 9280 }, { "epoch": 1.5113381915889752, "grad_norm": 0.11968866735696793, "learning_rate": 2.902310962202269e-05, "loss": 0.3784, "step": 9281 }, { "epoch": 1.5115010381468061, "grad_norm": 0.15203019976615906, "learning_rate": 2.901843426717885e-05, "loss": 0.3303, "step": 9282 }, { "epoch": 1.511663884704637, "grad_norm": 0.17898352444171906, "learning_rate": 2.9013758768061177e-05, "loss": 0.3618, "step": 9283 }, { "epoch": 1.511826731262468, "grad_norm": 0.13607558608055115, "learning_rate": 2.9009083124837545e-05, "loss": 0.3346, "step": 9284 }, { "epoch": 1.511989577820299, "grad_norm": 0.1112748309969902, "learning_rate": 2.900440733767581e-05, "loss": 0.3127, "step": 9285 }, { "epoch": 1.5121524243781297, "grad_norm": 0.10967986285686493, "learning_rate": 2.8999731406743857e-05, "loss": 0.329, "step": 9286 }, { "epoch": 1.5123152709359606, "grad_norm": 0.10663550347089767, "learning_rate": 2.8995055332209557e-05, "loss": 0.3329, "step": 9287 }, { "epoch": 1.5124781174937914, "grad_norm": 0.1589100956916809, "learning_rate": 2.8990379114240813e-05, "loss": 0.3813, "step": 9288 }, { "epoch": 1.5126409640516223, "grad_norm": 0.13062931597232819, "learning_rate": 2.8985702753005488e-05, "loss": 0.3355, "step": 9289 }, { "epoch": 1.5128038106094532, "grad_norm": 0.08777404576539993, "learning_rate": 2.89810262486715e-05, "loss": 0.3715, "step": 9290 }, { "epoch": 1.5129666571672842, "grad_norm": 0.09107454866170883, "learning_rate": 2.8976349601406738e-05, "loss": 0.3264, "step": 9291 }, { "epoch": 1.5131295037251151, "grad_norm": 0.13844537734985352, "learning_rate": 2.8971672811379124e-05, "loss": 0.3288, "step": 9292 }, { "epoch": 1.5132923502829458, "grad_norm": 0.1001070886850357, "learning_rate": 2.8966995878756542e-05, "loss": 0.3063, "step": 9293 }, { "epoch": 1.5134551968407768, "grad_norm": 0.10437746345996857, "learning_rate": 2.896231880370693e-05, "loss": 0.3225, "step": 9294 }, { "epoch": 1.5136180433986075, "grad_norm": 0.08351027220487595, "learning_rate": 2.8957641586398203e-05, "loss": 0.3345, "step": 9295 }, { "epoch": 1.5137808899564384, "grad_norm": 0.11145865172147751, "learning_rate": 2.895296422699828e-05, "loss": 0.3179, "step": 9296 }, { "epoch": 1.5139437365142694, "grad_norm": 0.0936957374215126, "learning_rate": 2.8948286725675105e-05, "loss": 0.309, "step": 9297 }, { "epoch": 1.5141065830721003, "grad_norm": 0.07541800290346146, "learning_rate": 2.8943609082596605e-05, "loss": 0.3182, "step": 9298 }, { "epoch": 1.5142694296299313, "grad_norm": 0.1888742744922638, "learning_rate": 2.893893129793072e-05, "loss": 0.3442, "step": 9299 }, { "epoch": 1.5144322761877622, "grad_norm": 0.0588415153324604, "learning_rate": 2.8934253371845404e-05, "loss": 0.3033, "step": 9300 }, { "epoch": 1.514595122745593, "grad_norm": 0.14229705929756165, "learning_rate": 2.8929575304508605e-05, "loss": 0.3383, "step": 9301 }, { "epoch": 1.5147579693034239, "grad_norm": 0.0720701813697815, "learning_rate": 2.8924897096088278e-05, "loss": 0.3219, "step": 9302 }, { "epoch": 1.5149208158612546, "grad_norm": 0.1178135871887207, "learning_rate": 2.8920218746752382e-05, "loss": 0.3395, "step": 9303 }, { "epoch": 1.5150836624190855, "grad_norm": 0.19573628902435303, "learning_rate": 2.8915540256668893e-05, "loss": 0.3052, "step": 9304 }, { "epoch": 1.5152465089769165, "grad_norm": 0.11788944154977798, "learning_rate": 2.8910861626005776e-05, "loss": 0.3132, "step": 9305 }, { "epoch": 1.5154093555347474, "grad_norm": 0.08048341423273087, "learning_rate": 2.8906182854931006e-05, "loss": 0.3047, "step": 9306 }, { "epoch": 1.5155722020925784, "grad_norm": 0.10591293126344681, "learning_rate": 2.8901503943612575e-05, "loss": 0.3549, "step": 9307 }, { "epoch": 1.5157350486504093, "grad_norm": 0.13953542709350586, "learning_rate": 2.8896824892218462e-05, "loss": 0.3258, "step": 9308 }, { "epoch": 1.51589789520824, "grad_norm": 0.12485210597515106, "learning_rate": 2.8892145700916657e-05, "loss": 0.3142, "step": 9309 }, { "epoch": 1.516060741766071, "grad_norm": 0.12700465321540833, "learning_rate": 2.8887466369875156e-05, "loss": 0.3477, "step": 9310 }, { "epoch": 1.5162235883239017, "grad_norm": 0.12672238051891327, "learning_rate": 2.8882786899261967e-05, "loss": 0.3173, "step": 9311 }, { "epoch": 1.5163864348817326, "grad_norm": 0.10883746296167374, "learning_rate": 2.88781072892451e-05, "loss": 0.3363, "step": 9312 }, { "epoch": 1.5165492814395636, "grad_norm": 0.09839123487472534, "learning_rate": 2.8873427539992558e-05, "loss": 0.2921, "step": 9313 }, { "epoch": 1.5167121279973945, "grad_norm": 0.1372932344675064, "learning_rate": 2.8868747651672362e-05, "loss": 0.313, "step": 9314 }, { "epoch": 1.5168749745552255, "grad_norm": 0.1254909485578537, "learning_rate": 2.886406762445254e-05, "loss": 0.3451, "step": 9315 }, { "epoch": 1.5170378211130562, "grad_norm": 0.09898732602596283, "learning_rate": 2.8859387458501107e-05, "loss": 0.3453, "step": 9316 }, { "epoch": 1.5172006676708871, "grad_norm": 0.09472239017486572, "learning_rate": 2.885470715398611e-05, "loss": 0.3111, "step": 9317 }, { "epoch": 1.5173635142287178, "grad_norm": 0.08686795085668564, "learning_rate": 2.8850026711075568e-05, "loss": 0.352, "step": 9318 }, { "epoch": 1.5175263607865488, "grad_norm": 0.061077117919921875, "learning_rate": 2.884534612993754e-05, "loss": 0.3604, "step": 9319 }, { "epoch": 1.5176892073443797, "grad_norm": 0.10768693685531616, "learning_rate": 2.8840665410740053e-05, "loss": 0.3029, "step": 9320 }, { "epoch": 1.5178520539022107, "grad_norm": 0.0795334130525589, "learning_rate": 2.883598455365118e-05, "loss": 0.3009, "step": 9321 }, { "epoch": 1.5180149004600416, "grad_norm": 0.08460868895053864, "learning_rate": 2.8831303558838967e-05, "loss": 0.3074, "step": 9322 }, { "epoch": 1.5181777470178726, "grad_norm": 0.10156621038913727, "learning_rate": 2.8826622426471476e-05, "loss": 0.298, "step": 9323 }, { "epoch": 1.5183405935757033, "grad_norm": 0.1280212104320526, "learning_rate": 2.882194115671678e-05, "loss": 0.2907, "step": 9324 }, { "epoch": 1.5185034401335342, "grad_norm": 0.1589098870754242, "learning_rate": 2.8817259749742937e-05, "loss": 0.3545, "step": 9325 }, { "epoch": 1.518666286691365, "grad_norm": 0.10527440905570984, "learning_rate": 2.8812578205718044e-05, "loss": 0.3202, "step": 9326 }, { "epoch": 1.5188291332491959, "grad_norm": 0.13233475387096405, "learning_rate": 2.8807896524810157e-05, "loss": 0.3424, "step": 9327 }, { "epoch": 1.5189919798070268, "grad_norm": 0.09510055184364319, "learning_rate": 2.8803214707187388e-05, "loss": 0.2787, "step": 9328 }, { "epoch": 1.5191548263648578, "grad_norm": 0.12037348747253418, "learning_rate": 2.879853275301781e-05, "loss": 0.3457, "step": 9329 }, { "epoch": 1.5193176729226887, "grad_norm": 0.12625710666179657, "learning_rate": 2.8793850662469525e-05, "loss": 0.3031, "step": 9330 }, { "epoch": 1.5194805194805194, "grad_norm": 0.2611662447452545, "learning_rate": 2.878916843571063e-05, "loss": 0.319, "step": 9331 }, { "epoch": 1.5196433660383504, "grad_norm": 0.11819341778755188, "learning_rate": 2.878448607290924e-05, "loss": 0.2777, "step": 9332 }, { "epoch": 1.519806212596181, "grad_norm": 0.11745555698871613, "learning_rate": 2.8779803574233467e-05, "loss": 0.2962, "step": 9333 }, { "epoch": 1.519969059154012, "grad_norm": 0.14595435559749603, "learning_rate": 2.8775120939851414e-05, "loss": 0.2981, "step": 9334 }, { "epoch": 1.520131905711843, "grad_norm": 0.09212230890989304, "learning_rate": 2.8770438169931213e-05, "loss": 0.3288, "step": 9335 }, { "epoch": 1.520294752269674, "grad_norm": 0.1438244730234146, "learning_rate": 2.8765755264640982e-05, "loss": 0.3264, "step": 9336 }, { "epoch": 1.5204575988275049, "grad_norm": 0.10439243912696838, "learning_rate": 2.876107222414886e-05, "loss": 0.3215, "step": 9337 }, { "epoch": 1.5206204453853358, "grad_norm": 0.08575992286205292, "learning_rate": 2.8756389048622965e-05, "loss": 0.3395, "step": 9338 }, { "epoch": 1.5207832919431665, "grad_norm": 0.10207653045654297, "learning_rate": 2.8751705738231465e-05, "loss": 0.3089, "step": 9339 }, { "epoch": 1.5209461385009975, "grad_norm": 0.08281007409095764, "learning_rate": 2.8747022293142478e-05, "loss": 0.2846, "step": 9340 }, { "epoch": 1.5211089850588282, "grad_norm": 0.13912245631217957, "learning_rate": 2.8742338713524165e-05, "loss": 0.2894, "step": 9341 }, { "epoch": 1.5212718316166591, "grad_norm": 0.12413644045591354, "learning_rate": 2.8737654999544684e-05, "loss": 0.2842, "step": 9342 }, { "epoch": 1.52143467817449, "grad_norm": 0.09714622795581818, "learning_rate": 2.8732971151372186e-05, "loss": 0.2931, "step": 9343 }, { "epoch": 1.521597524732321, "grad_norm": 0.1513437181711197, "learning_rate": 2.872828716917484e-05, "loss": 0.3185, "step": 9344 }, { "epoch": 1.521760371290152, "grad_norm": 0.08687487244606018, "learning_rate": 2.8723603053120813e-05, "loss": 0.3018, "step": 9345 }, { "epoch": 1.521923217847983, "grad_norm": 0.09491769224405289, "learning_rate": 2.8718918803378285e-05, "loss": 0.3142, "step": 9346 }, { "epoch": 1.5220860644058136, "grad_norm": 0.0852535218000412, "learning_rate": 2.8714234420115426e-05, "loss": 0.3165, "step": 9347 }, { "epoch": 1.5222489109636443, "grad_norm": 0.1543126404285431, "learning_rate": 2.870954990350042e-05, "loss": 0.3014, "step": 9348 }, { "epoch": 1.5224117575214753, "grad_norm": 0.07767289876937866, "learning_rate": 2.870486525370147e-05, "loss": 0.3171, "step": 9349 }, { "epoch": 1.5225746040793062, "grad_norm": 0.09120125323534012, "learning_rate": 2.8700180470886746e-05, "loss": 0.3155, "step": 9350 }, { "epoch": 1.5227374506371372, "grad_norm": 0.1428910791873932, "learning_rate": 2.8695495555224462e-05, "loss": 0.2984, "step": 9351 }, { "epoch": 1.5229002971949681, "grad_norm": 0.12513227760791779, "learning_rate": 2.869081050688281e-05, "loss": 0.3565, "step": 9352 }, { "epoch": 1.523063143752799, "grad_norm": 0.11262910068035126, "learning_rate": 2.868612532603001e-05, "loss": 0.3033, "step": 9353 }, { "epoch": 1.5232259903106298, "grad_norm": 0.06631166487932205, "learning_rate": 2.868144001283426e-05, "loss": 0.3448, "step": 9354 }, { "epoch": 1.5233888368684607, "grad_norm": 0.08340290188789368, "learning_rate": 2.8676754567463794e-05, "loss": 0.3572, "step": 9355 }, { "epoch": 1.5235516834262914, "grad_norm": 0.09441952407360077, "learning_rate": 2.8672068990086816e-05, "loss": 0.3398, "step": 9356 }, { "epoch": 1.5237145299841224, "grad_norm": 0.10340870171785355, "learning_rate": 2.866738328087156e-05, "loss": 0.3104, "step": 9357 }, { "epoch": 1.5238773765419533, "grad_norm": 0.17004764080047607, "learning_rate": 2.866269743998625e-05, "loss": 0.333, "step": 9358 }, { "epoch": 1.5240402230997843, "grad_norm": 0.0967104434967041, "learning_rate": 2.8658011467599144e-05, "loss": 0.273, "step": 9359 }, { "epoch": 1.5242030696576152, "grad_norm": 0.1331065595149994, "learning_rate": 2.8653325363878457e-05, "loss": 0.3133, "step": 9360 }, { "epoch": 1.5243659162154461, "grad_norm": 0.11295019090175629, "learning_rate": 2.8648639128992437e-05, "loss": 0.3381, "step": 9361 }, { "epoch": 1.5245287627732769, "grad_norm": 0.1147247925400734, "learning_rate": 2.8643952763109354e-05, "loss": 0.2973, "step": 9362 }, { "epoch": 1.5246916093311078, "grad_norm": 0.12372297793626785, "learning_rate": 2.863926626639744e-05, "loss": 0.3448, "step": 9363 }, { "epoch": 1.5248544558889385, "grad_norm": 0.11177261918783188, "learning_rate": 2.8634579639024974e-05, "loss": 0.3584, "step": 9364 }, { "epoch": 1.5250173024467695, "grad_norm": 0.1257629692554474, "learning_rate": 2.8629892881160196e-05, "loss": 0.312, "step": 9365 }, { "epoch": 1.5251801490046004, "grad_norm": 0.13483984768390656, "learning_rate": 2.86252059929714e-05, "loss": 0.3208, "step": 9366 }, { "epoch": 1.5253429955624314, "grad_norm": 0.1296701580286026, "learning_rate": 2.8620518974626844e-05, "loss": 0.2964, "step": 9367 }, { "epoch": 1.5255058421202623, "grad_norm": 0.1283799558877945, "learning_rate": 2.8615831826294802e-05, "loss": 0.3205, "step": 9368 }, { "epoch": 1.525668688678093, "grad_norm": 0.11388231813907623, "learning_rate": 2.861114454814357e-05, "loss": 0.2983, "step": 9369 }, { "epoch": 1.525831535235924, "grad_norm": 0.0783543735742569, "learning_rate": 2.860645714034143e-05, "loss": 0.3027, "step": 9370 }, { "epoch": 1.5259943817937547, "grad_norm": 0.0866178646683693, "learning_rate": 2.8601769603056667e-05, "loss": 0.3354, "step": 9371 }, { "epoch": 1.5261572283515856, "grad_norm": 0.07307881861925125, "learning_rate": 2.8597081936457586e-05, "loss": 0.3971, "step": 9372 }, { "epoch": 1.5263200749094166, "grad_norm": 0.13563033938407898, "learning_rate": 2.8592394140712496e-05, "loss": 0.3329, "step": 9373 }, { "epoch": 1.5264829214672475, "grad_norm": 0.13426901400089264, "learning_rate": 2.8587706215989685e-05, "loss": 0.3544, "step": 9374 }, { "epoch": 1.5266457680250785, "grad_norm": 0.1672828048467636, "learning_rate": 2.8583018162457477e-05, "loss": 0.3461, "step": 9375 }, { "epoch": 1.5268086145829094, "grad_norm": 0.082709401845932, "learning_rate": 2.8578329980284176e-05, "loss": 0.3001, "step": 9376 }, { "epoch": 1.5269714611407401, "grad_norm": 0.07934501022100449, "learning_rate": 2.857364166963811e-05, "loss": 0.3356, "step": 9377 }, { "epoch": 1.527134307698571, "grad_norm": 0.13881231844425201, "learning_rate": 2.8568953230687602e-05, "loss": 0.2874, "step": 9378 }, { "epoch": 1.5272971542564018, "grad_norm": 0.13335201144218445, "learning_rate": 2.856426466360098e-05, "loss": 0.3251, "step": 9379 }, { "epoch": 1.5274600008142327, "grad_norm": 0.06691563874483109, "learning_rate": 2.855957596854658e-05, "loss": 0.3215, "step": 9380 }, { "epoch": 1.5276228473720637, "grad_norm": 0.16060103476047516, "learning_rate": 2.855488714569273e-05, "loss": 0.3388, "step": 9381 }, { "epoch": 1.5277856939298946, "grad_norm": 0.08273844420909882, "learning_rate": 2.8550198195207793e-05, "loss": 0.3041, "step": 9382 }, { "epoch": 1.5279485404877255, "grad_norm": 0.14062687754631042, "learning_rate": 2.8545509117260095e-05, "loss": 0.323, "step": 9383 }, { "epoch": 1.5281113870455565, "grad_norm": 0.0995192676782608, "learning_rate": 2.8540819912018e-05, "loss": 0.3059, "step": 9384 }, { "epoch": 1.5282742336033872, "grad_norm": 0.1016082689166069, "learning_rate": 2.8536130579649862e-05, "loss": 0.3039, "step": 9385 }, { "epoch": 1.528437080161218, "grad_norm": 0.11958592385053635, "learning_rate": 2.8531441120324044e-05, "loss": 0.307, "step": 9386 }, { "epoch": 1.5285999267190489, "grad_norm": 0.08693855255842209, "learning_rate": 2.85267515342089e-05, "loss": 0.3417, "step": 9387 }, { "epoch": 1.5287627732768798, "grad_norm": 0.16857191920280457, "learning_rate": 2.8522061821472816e-05, "loss": 0.3574, "step": 9388 }, { "epoch": 1.5289256198347108, "grad_norm": 0.11177261918783188, "learning_rate": 2.851737198228416e-05, "loss": 0.3426, "step": 9389 }, { "epoch": 1.5290884663925417, "grad_norm": 0.08914846926927567, "learning_rate": 2.851268201681131e-05, "loss": 0.3133, "step": 9390 }, { "epoch": 1.5292513129503726, "grad_norm": 0.13259856402873993, "learning_rate": 2.8507991925222654e-05, "loss": 0.3198, "step": 9391 }, { "epoch": 1.5294141595082034, "grad_norm": 0.12895925343036652, "learning_rate": 2.850330170768657e-05, "loss": 0.3004, "step": 9392 }, { "epoch": 1.5295770060660343, "grad_norm": 0.0767710879445076, "learning_rate": 2.849861136437147e-05, "loss": 0.3077, "step": 9393 }, { "epoch": 1.529739852623865, "grad_norm": 0.09973394125699997, "learning_rate": 2.8493920895445732e-05, "loss": 0.3223, "step": 9394 }, { "epoch": 1.529902699181696, "grad_norm": 0.12541937828063965, "learning_rate": 2.8489230301077773e-05, "loss": 0.3204, "step": 9395 }, { "epoch": 1.530065545739527, "grad_norm": 0.07240208983421326, "learning_rate": 2.8484539581435987e-05, "loss": 0.3655, "step": 9396 }, { "epoch": 1.5302283922973579, "grad_norm": 0.1157727763056755, "learning_rate": 2.8479848736688792e-05, "loss": 0.3259, "step": 9397 }, { "epoch": 1.5303912388551888, "grad_norm": 0.15301261842250824, "learning_rate": 2.8475157767004594e-05, "loss": 0.3198, "step": 9398 }, { "epoch": 1.5305540854130197, "grad_norm": 0.06384367495775223, "learning_rate": 2.8470466672551832e-05, "loss": 0.3078, "step": 9399 }, { "epoch": 1.5307169319708505, "grad_norm": 0.10357733070850372, "learning_rate": 2.8465775453498912e-05, "loss": 0.3446, "step": 9400 }, { "epoch": 1.5308797785286814, "grad_norm": 0.09166138619184494, "learning_rate": 2.846108411001427e-05, "loss": 0.3235, "step": 9401 }, { "epoch": 1.5310426250865121, "grad_norm": 0.0944158285856247, "learning_rate": 2.845639264226634e-05, "loss": 0.343, "step": 9402 }, { "epoch": 1.531205471644343, "grad_norm": 0.09270351380109787, "learning_rate": 2.8451701050423556e-05, "loss": 0.3227, "step": 9403 }, { "epoch": 1.531368318202174, "grad_norm": 0.107051320374012, "learning_rate": 2.8447009334654368e-05, "loss": 0.2916, "step": 9404 }, { "epoch": 1.531531164760005, "grad_norm": 0.14613038301467896, "learning_rate": 2.8442317495127214e-05, "loss": 0.3148, "step": 9405 }, { "epoch": 1.531694011317836, "grad_norm": 0.09688584506511688, "learning_rate": 2.8437625532010547e-05, "loss": 0.3282, "step": 9406 }, { "epoch": 1.5318568578756666, "grad_norm": 0.14030693471431732, "learning_rate": 2.8432933445472825e-05, "loss": 0.2728, "step": 9407 }, { "epoch": 1.5320197044334976, "grad_norm": 0.12516510486602783, "learning_rate": 2.842824123568251e-05, "loss": 0.313, "step": 9408 }, { "epoch": 1.5321825509913283, "grad_norm": 0.10685066878795624, "learning_rate": 2.8423548902808055e-05, "loss": 0.3509, "step": 9409 }, { "epoch": 1.5323453975491592, "grad_norm": 0.12240488827228546, "learning_rate": 2.8418856447017944e-05, "loss": 0.2846, "step": 9410 }, { "epoch": 1.5325082441069902, "grad_norm": 0.17252862453460693, "learning_rate": 2.8414163868480643e-05, "loss": 0.3467, "step": 9411 }, { "epoch": 1.532671090664821, "grad_norm": 0.18737170100212097, "learning_rate": 2.8409471167364627e-05, "loss": 0.3508, "step": 9412 }, { "epoch": 1.532833937222652, "grad_norm": 0.06855074316263199, "learning_rate": 2.8404778343838384e-05, "loss": 0.3387, "step": 9413 }, { "epoch": 1.532996783780483, "grad_norm": 0.13112150132656097, "learning_rate": 2.8400085398070396e-05, "loss": 0.3083, "step": 9414 }, { "epoch": 1.5331596303383137, "grad_norm": 0.1384282112121582, "learning_rate": 2.8395392330229158e-05, "loss": 0.359, "step": 9415 }, { "epoch": 1.5333224768961446, "grad_norm": 0.11655545979738235, "learning_rate": 2.839069914048316e-05, "loss": 0.3521, "step": 9416 }, { "epoch": 1.5334853234539754, "grad_norm": 0.10329712182283401, "learning_rate": 2.8386005829000906e-05, "loss": 0.3383, "step": 9417 }, { "epoch": 1.5336481700118063, "grad_norm": 0.08948850631713867, "learning_rate": 2.83813123959509e-05, "loss": 0.3276, "step": 9418 }, { "epoch": 1.5338110165696373, "grad_norm": 0.09755252301692963, "learning_rate": 2.8376618841501644e-05, "loss": 0.283, "step": 9419 }, { "epoch": 1.5339738631274682, "grad_norm": 0.07537470757961273, "learning_rate": 2.8371925165821655e-05, "loss": 0.3203, "step": 9420 }, { "epoch": 1.5341367096852991, "grad_norm": 0.09506051987409592, "learning_rate": 2.836723136907945e-05, "loss": 0.354, "step": 9421 }, { "epoch": 1.5342995562431299, "grad_norm": 0.09583331644535065, "learning_rate": 2.836253745144355e-05, "loss": 0.3595, "step": 9422 }, { "epoch": 1.5344624028009608, "grad_norm": 0.08142264187335968, "learning_rate": 2.835784341308248e-05, "loss": 0.3177, "step": 9423 }, { "epoch": 1.5346252493587915, "grad_norm": 0.09972239285707474, "learning_rate": 2.835314925416478e-05, "loss": 0.3559, "step": 9424 }, { "epoch": 1.5347880959166225, "grad_norm": 0.10661208629608154, "learning_rate": 2.8348454974858963e-05, "loss": 0.3351, "step": 9425 }, { "epoch": 1.5349509424744534, "grad_norm": 0.11624015122652054, "learning_rate": 2.834376057533359e-05, "loss": 0.2843, "step": 9426 }, { "epoch": 1.5351137890322843, "grad_norm": 0.10863351076841354, "learning_rate": 2.8339066055757192e-05, "loss": 0.2777, "step": 9427 }, { "epoch": 1.5352766355901153, "grad_norm": 0.07356984168291092, "learning_rate": 2.833437141629831e-05, "loss": 0.2923, "step": 9428 }, { "epoch": 1.5354394821479462, "grad_norm": 0.09971224516630173, "learning_rate": 2.832967665712551e-05, "loss": 0.364, "step": 9429 }, { "epoch": 1.535602328705777, "grad_norm": 0.11835826188325882, "learning_rate": 2.8324981778407346e-05, "loss": 0.3054, "step": 9430 }, { "epoch": 1.535765175263608, "grad_norm": 0.10800289362668991, "learning_rate": 2.8320286780312368e-05, "loss": 0.3537, "step": 9431 }, { "epoch": 1.5359280218214386, "grad_norm": 0.0868193581700325, "learning_rate": 2.831559166300915e-05, "loss": 0.2908, "step": 9432 }, { "epoch": 1.5360908683792696, "grad_norm": 0.11494462192058563, "learning_rate": 2.831089642666626e-05, "loss": 0.3286, "step": 9433 }, { "epoch": 1.5362537149371005, "grad_norm": 0.10006947070360184, "learning_rate": 2.8306201071452267e-05, "loss": 0.3018, "step": 9434 }, { "epoch": 1.5364165614949314, "grad_norm": 0.1578715443611145, "learning_rate": 2.8301505597535747e-05, "loss": 0.3396, "step": 9435 }, { "epoch": 1.5365794080527624, "grad_norm": 0.11736463010311127, "learning_rate": 2.8296810005085285e-05, "loss": 0.321, "step": 9436 }, { "epoch": 1.5367422546105933, "grad_norm": 0.11209940165281296, "learning_rate": 2.8292114294269473e-05, "loss": 0.3362, "step": 9437 }, { "epoch": 1.536905101168424, "grad_norm": 0.07833357900381088, "learning_rate": 2.828741846525688e-05, "loss": 0.3211, "step": 9438 }, { "epoch": 1.537067947726255, "grad_norm": 0.1780005395412445, "learning_rate": 2.8282722518216132e-05, "loss": 0.338, "step": 9439 }, { "epoch": 1.5372307942840857, "grad_norm": 0.16662365198135376, "learning_rate": 2.8278026453315798e-05, "loss": 0.3513, "step": 9440 }, { "epoch": 1.5373936408419167, "grad_norm": 0.14022774994373322, "learning_rate": 2.82733302707245e-05, "loss": 0.3249, "step": 9441 }, { "epoch": 1.5375564873997476, "grad_norm": 0.08012684434652328, "learning_rate": 2.8268633970610835e-05, "loss": 0.2992, "step": 9442 }, { "epoch": 1.5377193339575785, "grad_norm": 0.09038519114255905, "learning_rate": 2.8263937553143414e-05, "loss": 0.3789, "step": 9443 }, { "epoch": 1.5378821805154095, "grad_norm": 0.15755639970302582, "learning_rate": 2.8259241018490862e-05, "loss": 0.3402, "step": 9444 }, { "epoch": 1.5380450270732402, "grad_norm": 0.11850988864898682, "learning_rate": 2.825454436682179e-05, "loss": 0.3302, "step": 9445 }, { "epoch": 1.5382078736310711, "grad_norm": 0.09078934043645859, "learning_rate": 2.8249847598304823e-05, "loss": 0.3417, "step": 9446 }, { "epoch": 1.5383707201889019, "grad_norm": 0.1146509200334549, "learning_rate": 2.8245150713108593e-05, "loss": 0.336, "step": 9447 }, { "epoch": 1.5385335667467328, "grad_norm": 0.12330369651317596, "learning_rate": 2.8240453711401725e-05, "loss": 0.3087, "step": 9448 }, { "epoch": 1.5386964133045637, "grad_norm": 0.11515755206346512, "learning_rate": 2.8235756593352862e-05, "loss": 0.3319, "step": 9449 }, { "epoch": 1.5388592598623947, "grad_norm": 0.09979831427335739, "learning_rate": 2.8231059359130646e-05, "loss": 0.2674, "step": 9450 }, { "epoch": 1.5390221064202256, "grad_norm": 0.09841741621494293, "learning_rate": 2.8226362008903716e-05, "loss": 0.3262, "step": 9451 }, { "epoch": 1.5391849529780566, "grad_norm": 0.1828593760728836, "learning_rate": 2.822166454284072e-05, "loss": 0.3184, "step": 9452 }, { "epoch": 1.5393477995358873, "grad_norm": 0.09057825803756714, "learning_rate": 2.8216966961110326e-05, "loss": 0.3334, "step": 9453 }, { "epoch": 1.5395106460937182, "grad_norm": 0.1042889654636383, "learning_rate": 2.8212269263881168e-05, "loss": 0.2971, "step": 9454 }, { "epoch": 1.539673492651549, "grad_norm": 0.09292799234390259, "learning_rate": 2.820757145132193e-05, "loss": 0.3079, "step": 9455 }, { "epoch": 1.53983633920938, "grad_norm": 0.0994790568947792, "learning_rate": 2.8202873523601253e-05, "loss": 0.3003, "step": 9456 }, { "epoch": 1.5399991857672108, "grad_norm": 0.10557515174150467, "learning_rate": 2.8198175480887833e-05, "loss": 0.3182, "step": 9457 }, { "epoch": 1.5401620323250418, "grad_norm": 0.14442649483680725, "learning_rate": 2.8193477323350326e-05, "loss": 0.2972, "step": 9458 }, { "epoch": 1.5403248788828727, "grad_norm": 0.08027383685112, "learning_rate": 2.8188779051157415e-05, "loss": 0.3311, "step": 9459 }, { "epoch": 1.5404877254407034, "grad_norm": 0.11763273179531097, "learning_rate": 2.818408066447778e-05, "loss": 0.3004, "step": 9460 }, { "epoch": 1.5406505719985344, "grad_norm": 0.11865686625242233, "learning_rate": 2.8179382163480117e-05, "loss": 0.3499, "step": 9461 }, { "epoch": 1.540813418556365, "grad_norm": 0.11405785381793976, "learning_rate": 2.8174683548333104e-05, "loss": 0.3175, "step": 9462 }, { "epoch": 1.540976265114196, "grad_norm": 0.14118878543376923, "learning_rate": 2.8169984819205437e-05, "loss": 0.3149, "step": 9463 }, { "epoch": 1.541139111672027, "grad_norm": 0.12460941821336746, "learning_rate": 2.816528597626582e-05, "loss": 0.3128, "step": 9464 }, { "epoch": 1.541301958229858, "grad_norm": 0.06539027392864227, "learning_rate": 2.816058701968295e-05, "loss": 0.3369, "step": 9465 }, { "epoch": 1.5414648047876889, "grad_norm": 0.10768627375364304, "learning_rate": 2.8155887949625538e-05, "loss": 0.3314, "step": 9466 }, { "epoch": 1.5416276513455198, "grad_norm": 0.1421060413122177, "learning_rate": 2.8151188766262294e-05, "loss": 0.339, "step": 9467 }, { "epoch": 1.5417904979033505, "grad_norm": 0.1875634640455246, "learning_rate": 2.8146489469761937e-05, "loss": 0.3035, "step": 9468 }, { "epoch": 1.5419533444611815, "grad_norm": 0.09888039529323578, "learning_rate": 2.8141790060293172e-05, "loss": 0.3206, "step": 9469 }, { "epoch": 1.5421161910190122, "grad_norm": 0.11372821778059006, "learning_rate": 2.813709053802474e-05, "loss": 0.3035, "step": 9470 }, { "epoch": 1.5422790375768431, "grad_norm": 0.07903310656547546, "learning_rate": 2.8132390903125354e-05, "loss": 0.3259, "step": 9471 }, { "epoch": 1.542441884134674, "grad_norm": 0.14587900042533875, "learning_rate": 2.8127691155763752e-05, "loss": 0.3868, "step": 9472 }, { "epoch": 1.542604730692505, "grad_norm": 0.10098598897457123, "learning_rate": 2.8122991296108665e-05, "loss": 0.3104, "step": 9473 }, { "epoch": 1.542767577250336, "grad_norm": 0.09804800897836685, "learning_rate": 2.8118291324328828e-05, "loss": 0.3176, "step": 9474 }, { "epoch": 1.542930423808167, "grad_norm": 0.11960494518280029, "learning_rate": 2.8113591240593e-05, "loss": 0.3329, "step": 9475 }, { "epoch": 1.5430932703659976, "grad_norm": 0.08477237820625305, "learning_rate": 2.810889104506991e-05, "loss": 0.2946, "step": 9476 }, { "epoch": 1.5432561169238284, "grad_norm": 0.09801109880208969, "learning_rate": 2.8104190737928328e-05, "loss": 0.3442, "step": 9477 }, { "epoch": 1.5434189634816593, "grad_norm": 0.09680110961198807, "learning_rate": 2.8099490319336985e-05, "loss": 0.2744, "step": 9478 }, { "epoch": 1.5435818100394902, "grad_norm": 0.1198873519897461, "learning_rate": 2.809478978946466e-05, "loss": 0.3305, "step": 9479 }, { "epoch": 1.5437446565973212, "grad_norm": 0.12506404519081116, "learning_rate": 2.809008914848011e-05, "loss": 0.3402, "step": 9480 }, { "epoch": 1.5439075031551521, "grad_norm": 0.10984239727258682, "learning_rate": 2.80853883965521e-05, "loss": 0.3207, "step": 9481 }, { "epoch": 1.544070349712983, "grad_norm": 0.10603474080562592, "learning_rate": 2.8080687533849398e-05, "loss": 0.3294, "step": 9482 }, { "epoch": 1.5442331962708138, "grad_norm": 0.14659489691257477, "learning_rate": 2.8075986560540785e-05, "loss": 0.376, "step": 9483 }, { "epoch": 1.5443960428286447, "grad_norm": 0.13435466587543488, "learning_rate": 2.8071285476795045e-05, "loss": 0.3526, "step": 9484 }, { "epoch": 1.5445588893864755, "grad_norm": 0.15051934123039246, "learning_rate": 2.806658428278095e-05, "loss": 0.2939, "step": 9485 }, { "epoch": 1.5447217359443064, "grad_norm": 0.09611768275499344, "learning_rate": 2.8061882978667285e-05, "loss": 0.3096, "step": 9486 }, { "epoch": 1.5448845825021373, "grad_norm": 0.10062777996063232, "learning_rate": 2.805718156462286e-05, "loss": 0.3509, "step": 9487 }, { "epoch": 1.5450474290599683, "grad_norm": 0.08896886557340622, "learning_rate": 2.8052480040816452e-05, "loss": 0.3052, "step": 9488 }, { "epoch": 1.5452102756177992, "grad_norm": 0.14798372983932495, "learning_rate": 2.8047778407416863e-05, "loss": 0.3317, "step": 9489 }, { "epoch": 1.5453731221756302, "grad_norm": 0.10433103144168854, "learning_rate": 2.80430766645929e-05, "loss": 0.3065, "step": 9490 }, { "epoch": 1.5455359687334609, "grad_norm": 0.09785410761833191, "learning_rate": 2.8038374812513368e-05, "loss": 0.3023, "step": 9491 }, { "epoch": 1.5456988152912918, "grad_norm": 0.13502837717533112, "learning_rate": 2.8033672851347075e-05, "loss": 0.3452, "step": 9492 }, { "epoch": 1.5458616618491225, "grad_norm": 0.16801908612251282, "learning_rate": 2.8028970781262838e-05, "loss": 0.3023, "step": 9493 }, { "epoch": 1.5460245084069535, "grad_norm": 0.17073020339012146, "learning_rate": 2.802426860242947e-05, "loss": 0.3259, "step": 9494 }, { "epoch": 1.5461873549647844, "grad_norm": 0.10617312788963318, "learning_rate": 2.801956631501581e-05, "loss": 0.337, "step": 9495 }, { "epoch": 1.5463502015226154, "grad_norm": 0.07387016713619232, "learning_rate": 2.8014863919190665e-05, "loss": 0.3736, "step": 9496 }, { "epoch": 1.5465130480804463, "grad_norm": 0.17410409450531006, "learning_rate": 2.8010161415122877e-05, "loss": 0.3643, "step": 9497 }, { "epoch": 1.546675894638277, "grad_norm": 0.0853879302740097, "learning_rate": 2.800545880298127e-05, "loss": 0.2779, "step": 9498 }, { "epoch": 1.546838741196108, "grad_norm": 0.1117926836013794, "learning_rate": 2.8000756082934694e-05, "loss": 0.3599, "step": 9499 }, { "epoch": 1.5470015877539387, "grad_norm": 0.11081238836050034, "learning_rate": 2.7996053255151978e-05, "loss": 0.3013, "step": 9500 }, { "epoch": 1.5471644343117696, "grad_norm": 0.10350022464990616, "learning_rate": 2.799135031980198e-05, "loss": 0.3335, "step": 9501 }, { "epoch": 1.5473272808696006, "grad_norm": 0.13816004991531372, "learning_rate": 2.7986647277053536e-05, "loss": 0.3212, "step": 9502 }, { "epoch": 1.5474901274274315, "grad_norm": 0.08031178265810013, "learning_rate": 2.7981944127075506e-05, "loss": 0.3857, "step": 9503 }, { "epoch": 1.5476529739852625, "grad_norm": 0.09559421986341476, "learning_rate": 2.7977240870036753e-05, "loss": 0.3016, "step": 9504 }, { "epoch": 1.5478158205430934, "grad_norm": 0.08927009254693985, "learning_rate": 2.7972537506106128e-05, "loss": 0.2918, "step": 9505 }, { "epoch": 1.5479786671009241, "grad_norm": 0.10987944155931473, "learning_rate": 2.79678340354525e-05, "loss": 0.3239, "step": 9506 }, { "epoch": 1.548141513658755, "grad_norm": 0.13123869895935059, "learning_rate": 2.7963130458244742e-05, "loss": 0.3145, "step": 9507 }, { "epoch": 1.5483043602165858, "grad_norm": 0.07721959799528122, "learning_rate": 2.795842677465172e-05, "loss": 0.3573, "step": 9508 }, { "epoch": 1.5484672067744167, "grad_norm": 0.10663094371557236, "learning_rate": 2.795372298484231e-05, "loss": 0.3103, "step": 9509 }, { "epoch": 1.5486300533322477, "grad_norm": 0.17902329564094543, "learning_rate": 2.7949019088985407e-05, "loss": 0.3547, "step": 9510 }, { "epoch": 1.5487928998900786, "grad_norm": 0.11664141714572906, "learning_rate": 2.794431508724988e-05, "loss": 0.318, "step": 9511 }, { "epoch": 1.5489557464479096, "grad_norm": 0.10145417600870132, "learning_rate": 2.7939610979804613e-05, "loss": 0.288, "step": 9512 }, { "epoch": 1.5491185930057405, "grad_norm": 0.12015964090824127, "learning_rate": 2.793490676681851e-05, "loss": 0.3349, "step": 9513 }, { "epoch": 1.5492814395635712, "grad_norm": 0.10277725756168365, "learning_rate": 2.7930202448460457e-05, "loss": 0.3108, "step": 9514 }, { "epoch": 1.549444286121402, "grad_norm": 0.07322125136852264, "learning_rate": 2.792549802489936e-05, "loss": 0.2823, "step": 9515 }, { "epoch": 1.549607132679233, "grad_norm": 0.0893096774816513, "learning_rate": 2.7920793496304115e-05, "loss": 0.2892, "step": 9516 }, { "epoch": 1.5497699792370638, "grad_norm": 0.18102863430976868, "learning_rate": 2.7916088862843636e-05, "loss": 0.374, "step": 9517 }, { "epoch": 1.5499328257948948, "grad_norm": 0.09951746463775635, "learning_rate": 2.791138412468683e-05, "loss": 0.3136, "step": 9518 }, { "epoch": 1.5500956723527257, "grad_norm": 0.2091970443725586, "learning_rate": 2.7906679282002612e-05, "loss": 0.3676, "step": 9519 }, { "epoch": 1.5502585189105567, "grad_norm": 0.08850834518671036, "learning_rate": 2.7901974334959898e-05, "loss": 0.3078, "step": 9520 }, { "epoch": 1.5504213654683874, "grad_norm": 0.10750266909599304, "learning_rate": 2.789726928372761e-05, "loss": 0.324, "step": 9521 }, { "epoch": 1.5505842120262183, "grad_norm": 0.11638843268156052, "learning_rate": 2.7892564128474674e-05, "loss": 0.3485, "step": 9522 }, { "epoch": 1.550747058584049, "grad_norm": 0.11205942928791046, "learning_rate": 2.7887858869370022e-05, "loss": 0.332, "step": 9523 }, { "epoch": 1.55090990514188, "grad_norm": 0.10138506442308426, "learning_rate": 2.7883153506582578e-05, "loss": 0.3168, "step": 9524 }, { "epoch": 1.551072751699711, "grad_norm": 0.1441614031791687, "learning_rate": 2.787844804028129e-05, "loss": 0.3494, "step": 9525 }, { "epoch": 1.5512355982575419, "grad_norm": 0.13888530433177948, "learning_rate": 2.7873742470635096e-05, "loss": 0.2883, "step": 9526 }, { "epoch": 1.5513984448153728, "grad_norm": 0.0955568253993988, "learning_rate": 2.7869036797812932e-05, "loss": 0.3006, "step": 9527 }, { "epoch": 1.5515612913732038, "grad_norm": 0.08259762078523636, "learning_rate": 2.7864331021983752e-05, "loss": 0.3603, "step": 9528 }, { "epoch": 1.5517241379310345, "grad_norm": 0.12047525495290756, "learning_rate": 2.785962514331651e-05, "loss": 0.3148, "step": 9529 }, { "epoch": 1.5518869844888654, "grad_norm": 0.15274731814861298, "learning_rate": 2.785491916198016e-05, "loss": 0.3002, "step": 9530 }, { "epoch": 1.5520498310466961, "grad_norm": 0.128007709980011, "learning_rate": 2.785021307814365e-05, "loss": 0.2999, "step": 9531 }, { "epoch": 1.552212677604527, "grad_norm": 0.13127955794334412, "learning_rate": 2.784550689197596e-05, "loss": 0.3592, "step": 9532 }, { "epoch": 1.552375524162358, "grad_norm": 0.12019015848636627, "learning_rate": 2.784080060364604e-05, "loss": 0.3133, "step": 9533 }, { "epoch": 1.552538370720189, "grad_norm": 0.08975199609994888, "learning_rate": 2.7836094213322866e-05, "loss": 0.3434, "step": 9534 }, { "epoch": 1.55270121727802, "grad_norm": 0.11162018030881882, "learning_rate": 2.783138772117542e-05, "loss": 0.3206, "step": 9535 }, { "epoch": 1.5528640638358506, "grad_norm": 0.11362948268651962, "learning_rate": 2.782668112737266e-05, "loss": 0.3445, "step": 9536 }, { "epoch": 1.5530269103936816, "grad_norm": 0.14353595674037933, "learning_rate": 2.7821974432083593e-05, "loss": 0.3282, "step": 9537 }, { "epoch": 1.5531897569515123, "grad_norm": 0.1486390084028244, "learning_rate": 2.781726763547718e-05, "loss": 0.3675, "step": 9538 }, { "epoch": 1.5533526035093432, "grad_norm": 0.11848443746566772, "learning_rate": 2.781256073772242e-05, "loss": 0.2981, "step": 9539 }, { "epoch": 1.5535154500671742, "grad_norm": 0.11395349353551865, "learning_rate": 2.7807853738988304e-05, "loss": 0.2856, "step": 9540 }, { "epoch": 1.5536782966250051, "grad_norm": 0.12079402059316635, "learning_rate": 2.7803146639443828e-05, "loss": 0.3168, "step": 9541 }, { "epoch": 1.553841143182836, "grad_norm": 0.1256796419620514, "learning_rate": 2.7798439439257983e-05, "loss": 0.3224, "step": 9542 }, { "epoch": 1.554003989740667, "grad_norm": 0.13586224615573883, "learning_rate": 2.7793732138599783e-05, "loss": 0.3386, "step": 9543 }, { "epoch": 1.5541668362984977, "grad_norm": 0.20906968414783478, "learning_rate": 2.7789024737638225e-05, "loss": 0.363, "step": 9544 }, { "epoch": 1.5543296828563287, "grad_norm": 0.08514275401830673, "learning_rate": 2.7784317236542323e-05, "loss": 0.3259, "step": 9545 }, { "epoch": 1.5544925294141594, "grad_norm": 0.12300635874271393, "learning_rate": 2.777960963548109e-05, "loss": 0.3312, "step": 9546 }, { "epoch": 1.5546553759719903, "grad_norm": 0.11224191635847092, "learning_rate": 2.7774901934623547e-05, "loss": 0.3034, "step": 9547 }, { "epoch": 1.5548182225298213, "grad_norm": 0.16161800920963287, "learning_rate": 2.777019413413871e-05, "loss": 0.3082, "step": 9548 }, { "epoch": 1.5549810690876522, "grad_norm": 0.13888296484947205, "learning_rate": 2.7765486234195603e-05, "loss": 0.2955, "step": 9549 }, { "epoch": 1.5551439156454832, "grad_norm": 0.08767514675855637, "learning_rate": 2.7760778234963257e-05, "loss": 0.2811, "step": 9550 }, { "epoch": 1.5553067622033139, "grad_norm": 0.1329532116651535, "learning_rate": 2.7756070136610695e-05, "loss": 0.322, "step": 9551 }, { "epoch": 1.5554696087611448, "grad_norm": 0.11829017847776413, "learning_rate": 2.7751361939306968e-05, "loss": 0.3183, "step": 9552 }, { "epoch": 1.5556324553189755, "grad_norm": 0.07895653694868088, "learning_rate": 2.7746653643221094e-05, "loss": 0.3037, "step": 9553 }, { "epoch": 1.5557953018768065, "grad_norm": 0.12809492647647858, "learning_rate": 2.774194524852213e-05, "loss": 0.3271, "step": 9554 }, { "epoch": 1.5559581484346374, "grad_norm": 0.17405562102794647, "learning_rate": 2.7737236755379115e-05, "loss": 0.32, "step": 9555 }, { "epoch": 1.5561209949924684, "grad_norm": 0.11374755948781967, "learning_rate": 2.77325281639611e-05, "loss": 0.3263, "step": 9556 }, { "epoch": 1.5562838415502993, "grad_norm": 0.12581686675548553, "learning_rate": 2.7727819474437134e-05, "loss": 0.2803, "step": 9557 }, { "epoch": 1.5564466881081302, "grad_norm": 0.1209331825375557, "learning_rate": 2.7723110686976276e-05, "loss": 0.3389, "step": 9558 }, { "epoch": 1.556609534665961, "grad_norm": 0.12335702031850815, "learning_rate": 2.7718401801747596e-05, "loss": 0.3112, "step": 9559 }, { "epoch": 1.556772381223792, "grad_norm": 0.14209908246994019, "learning_rate": 2.7713692818920134e-05, "loss": 0.2982, "step": 9560 }, { "epoch": 1.5569352277816226, "grad_norm": 0.08639208972454071, "learning_rate": 2.7708983738662975e-05, "loss": 0.337, "step": 9561 }, { "epoch": 1.5570980743394536, "grad_norm": 0.1251346617937088, "learning_rate": 2.7704274561145183e-05, "loss": 0.3257, "step": 9562 }, { "epoch": 1.5572609208972845, "grad_norm": 0.10606740415096283, "learning_rate": 2.769956528653583e-05, "loss": 0.2906, "step": 9563 }, { "epoch": 1.5574237674551155, "grad_norm": 0.12801235914230347, "learning_rate": 2.7694855915003992e-05, "loss": 0.3811, "step": 9564 }, { "epoch": 1.5575866140129464, "grad_norm": 0.08417795598506927, "learning_rate": 2.7690146446718756e-05, "loss": 0.2835, "step": 9565 }, { "epoch": 1.5577494605707773, "grad_norm": 0.08660190552473068, "learning_rate": 2.768543688184921e-05, "loss": 0.2858, "step": 9566 }, { "epoch": 1.557912307128608, "grad_norm": 0.09767420589923859, "learning_rate": 2.7680727220564423e-05, "loss": 0.326, "step": 9567 }, { "epoch": 1.558075153686439, "grad_norm": 0.1720123142004013, "learning_rate": 2.7676017463033505e-05, "loss": 0.3271, "step": 9568 }, { "epoch": 1.5582380002442697, "grad_norm": 0.15845122933387756, "learning_rate": 2.767130760942554e-05, "loss": 0.3646, "step": 9569 }, { "epoch": 1.5584008468021007, "grad_norm": 0.11741479486227036, "learning_rate": 2.7666597659909633e-05, "loss": 0.329, "step": 9570 }, { "epoch": 1.5585636933599316, "grad_norm": 0.14667320251464844, "learning_rate": 2.766188761465487e-05, "loss": 0.3104, "step": 9571 }, { "epoch": 1.5587265399177626, "grad_norm": 0.10818704962730408, "learning_rate": 2.7657177473830377e-05, "loss": 0.3327, "step": 9572 }, { "epoch": 1.5588893864755935, "grad_norm": 0.08605903387069702, "learning_rate": 2.7652467237605244e-05, "loss": 0.3386, "step": 9573 }, { "epoch": 1.5590522330334242, "grad_norm": 0.1136036291718483, "learning_rate": 2.7647756906148592e-05, "loss": 0.2843, "step": 9574 }, { "epoch": 1.5592150795912552, "grad_norm": 0.1258399486541748, "learning_rate": 2.7643046479629535e-05, "loss": 0.3274, "step": 9575 }, { "epoch": 1.5593779261490859, "grad_norm": 0.12355941534042358, "learning_rate": 2.7638335958217194e-05, "loss": 0.3273, "step": 9576 }, { "epoch": 1.5595407727069168, "grad_norm": 0.13209298253059387, "learning_rate": 2.7633625342080687e-05, "loss": 0.3375, "step": 9577 }, { "epoch": 1.5597036192647478, "grad_norm": 0.12218068540096283, "learning_rate": 2.7628914631389137e-05, "loss": 0.3451, "step": 9578 }, { "epoch": 1.5598664658225787, "grad_norm": 0.07521352916955948, "learning_rate": 2.7624203826311672e-05, "loss": 0.339, "step": 9579 }, { "epoch": 1.5600293123804096, "grad_norm": 0.1525420993566513, "learning_rate": 2.7619492927017432e-05, "loss": 0.3401, "step": 9580 }, { "epoch": 1.5601921589382406, "grad_norm": 0.12194935232400894, "learning_rate": 2.7614781933675544e-05, "loss": 0.3515, "step": 9581 }, { "epoch": 1.5603550054960713, "grad_norm": 0.14178267121315002, "learning_rate": 2.7610070846455155e-05, "loss": 0.3299, "step": 9582 }, { "epoch": 1.5605178520539023, "grad_norm": 0.09824329614639282, "learning_rate": 2.7605359665525397e-05, "loss": 0.302, "step": 9583 }, { "epoch": 1.560680698611733, "grad_norm": 0.1207236722111702, "learning_rate": 2.7600648391055423e-05, "loss": 0.3413, "step": 9584 }, { "epoch": 1.560843545169564, "grad_norm": 0.16882480680942535, "learning_rate": 2.759593702321438e-05, "loss": 0.3018, "step": 9585 }, { "epoch": 1.5610063917273949, "grad_norm": 0.09389737993478775, "learning_rate": 2.7591225562171423e-05, "loss": 0.2987, "step": 9586 }, { "epoch": 1.5611692382852258, "grad_norm": 0.1354668289422989, "learning_rate": 2.7586514008095703e-05, "loss": 0.2877, "step": 9587 }, { "epoch": 1.5613320848430567, "grad_norm": 0.11803212761878967, "learning_rate": 2.7581802361156383e-05, "loss": 0.3255, "step": 9588 }, { "epoch": 1.5614949314008875, "grad_norm": 0.07835965603590012, "learning_rate": 2.7577090621522622e-05, "loss": 0.2872, "step": 9589 }, { "epoch": 1.5616577779587184, "grad_norm": 0.12692181766033173, "learning_rate": 2.757237878936359e-05, "loss": 0.3373, "step": 9590 }, { "epoch": 1.5618206245165491, "grad_norm": 0.11076603084802628, "learning_rate": 2.7567666864848446e-05, "loss": 0.3132, "step": 9591 }, { "epoch": 1.56198347107438, "grad_norm": 0.13910037279129028, "learning_rate": 2.7562954848146373e-05, "loss": 0.3404, "step": 9592 }, { "epoch": 1.562146317632211, "grad_norm": 0.14513790607452393, "learning_rate": 2.7558242739426537e-05, "loss": 0.2966, "step": 9593 }, { "epoch": 1.562309164190042, "grad_norm": 0.09758560359477997, "learning_rate": 2.755353053885813e-05, "loss": 0.3423, "step": 9594 }, { "epoch": 1.562472010747873, "grad_norm": 0.092023566365242, "learning_rate": 2.7548818246610314e-05, "loss": 0.3107, "step": 9595 }, { "epoch": 1.5626348573057038, "grad_norm": 0.0898042544722557, "learning_rate": 2.7544105862852294e-05, "loss": 0.2843, "step": 9596 }, { "epoch": 1.5627977038635346, "grad_norm": 0.08229818940162659, "learning_rate": 2.7539393387753255e-05, "loss": 0.3431, "step": 9597 }, { "epoch": 1.5629605504213655, "grad_norm": 0.11387749761343002, "learning_rate": 2.753468082148238e-05, "loss": 0.3231, "step": 9598 }, { "epoch": 1.5631233969791962, "grad_norm": 0.08790311962366104, "learning_rate": 2.7529968164208868e-05, "loss": 0.3243, "step": 9599 }, { "epoch": 1.5632862435370272, "grad_norm": 0.09413792192935944, "learning_rate": 2.752525541610192e-05, "loss": 0.3578, "step": 9600 }, { "epoch": 1.563449090094858, "grad_norm": 0.07630365341901779, "learning_rate": 2.752054257733074e-05, "loss": 0.337, "step": 9601 }, { "epoch": 1.563611936652689, "grad_norm": 0.11729612946510315, "learning_rate": 2.7515829648064523e-05, "loss": 0.3307, "step": 9602 }, { "epoch": 1.56377478321052, "grad_norm": 0.14105112850666046, "learning_rate": 2.751111662847249e-05, "loss": 0.3419, "step": 9603 }, { "epoch": 1.563937629768351, "grad_norm": 0.12900923192501068, "learning_rate": 2.7506403518723844e-05, "loss": 0.3075, "step": 9604 }, { "epoch": 1.5641004763261817, "grad_norm": 0.18904128670692444, "learning_rate": 2.75016903189878e-05, "loss": 0.3409, "step": 9605 }, { "epoch": 1.5642633228840124, "grad_norm": 0.10748482495546341, "learning_rate": 2.7496977029433583e-05, "loss": 0.3243, "step": 9606 }, { "epoch": 1.5644261694418433, "grad_norm": 0.09946861118078232, "learning_rate": 2.749226365023041e-05, "loss": 0.2744, "step": 9607 }, { "epoch": 1.5645890159996743, "grad_norm": 0.12894496321678162, "learning_rate": 2.7487550181547506e-05, "loss": 0.353, "step": 9608 }, { "epoch": 1.5647518625575052, "grad_norm": 0.1648220270872116, "learning_rate": 2.748283662355409e-05, "loss": 0.3498, "step": 9609 }, { "epoch": 1.5649147091153361, "grad_norm": 0.08386227488517761, "learning_rate": 2.7478122976419406e-05, "loss": 0.3005, "step": 9610 }, { "epoch": 1.565077555673167, "grad_norm": 0.10154242813587189, "learning_rate": 2.747340924031268e-05, "loss": 0.3268, "step": 9611 }, { "epoch": 1.5652404022309978, "grad_norm": 0.11096500605344772, "learning_rate": 2.746869541540316e-05, "loss": 0.2859, "step": 9612 }, { "epoch": 1.5654032487888287, "grad_norm": 0.1070442795753479, "learning_rate": 2.746398150186007e-05, "loss": 0.2789, "step": 9613 }, { "epoch": 1.5655660953466595, "grad_norm": 0.12101031839847565, "learning_rate": 2.7459267499852665e-05, "loss": 0.2978, "step": 9614 }, { "epoch": 1.5657289419044904, "grad_norm": 0.10549594461917877, "learning_rate": 2.745455340955019e-05, "loss": 0.3095, "step": 9615 }, { "epoch": 1.5658917884623214, "grad_norm": 0.1460365504026413, "learning_rate": 2.7449839231121884e-05, "loss": 0.3325, "step": 9616 }, { "epoch": 1.5660546350201523, "grad_norm": 0.16545726358890533, "learning_rate": 2.7445124964737016e-05, "loss": 0.3541, "step": 9617 }, { "epoch": 1.5662174815779832, "grad_norm": 0.0920349508523941, "learning_rate": 2.7440410610564838e-05, "loss": 0.3209, "step": 9618 }, { "epoch": 1.5663803281358142, "grad_norm": 0.09561797976493835, "learning_rate": 2.7435696168774606e-05, "loss": 0.3484, "step": 9619 }, { "epoch": 1.566543174693645, "grad_norm": 0.1432243138551712, "learning_rate": 2.743098163953558e-05, "loss": 0.3212, "step": 9620 }, { "epoch": 1.5667060212514758, "grad_norm": 0.11038947850465775, "learning_rate": 2.742626702301703e-05, "loss": 0.334, "step": 9621 }, { "epoch": 1.5668688678093066, "grad_norm": 0.12288721650838852, "learning_rate": 2.7421552319388227e-05, "loss": 0.3253, "step": 9622 }, { "epoch": 1.5670317143671375, "grad_norm": 0.10033957660198212, "learning_rate": 2.7416837528818447e-05, "loss": 0.3031, "step": 9623 }, { "epoch": 1.5671945609249684, "grad_norm": 0.09906261414289474, "learning_rate": 2.741212265147695e-05, "loss": 0.3484, "step": 9624 }, { "epoch": 1.5673574074827994, "grad_norm": 0.1233249381184578, "learning_rate": 2.7407407687533025e-05, "loss": 0.3754, "step": 9625 }, { "epoch": 1.5675202540406303, "grad_norm": 0.10487710684537888, "learning_rate": 2.7402692637155956e-05, "loss": 0.3192, "step": 9626 }, { "epoch": 1.567683100598461, "grad_norm": 0.08702248334884644, "learning_rate": 2.7397977500515016e-05, "loss": 0.3329, "step": 9627 }, { "epoch": 1.567845947156292, "grad_norm": 0.10618305951356888, "learning_rate": 2.739326227777951e-05, "loss": 0.2983, "step": 9628 }, { "epoch": 1.5680087937141227, "grad_norm": 0.13908669352531433, "learning_rate": 2.73885469691187e-05, "loss": 0.3396, "step": 9629 }, { "epoch": 1.5681716402719537, "grad_norm": 0.13001467287540436, "learning_rate": 2.7383831574701912e-05, "loss": 0.3432, "step": 9630 }, { "epoch": 1.5683344868297846, "grad_norm": 0.10381217300891876, "learning_rate": 2.737911609469842e-05, "loss": 0.2905, "step": 9631 }, { "epoch": 1.5684973333876155, "grad_norm": 0.08356919884681702, "learning_rate": 2.7374400529277543e-05, "loss": 0.2789, "step": 9632 }, { "epoch": 1.5686601799454465, "grad_norm": 0.10174167901277542, "learning_rate": 2.7369684878608566e-05, "loss": 0.2999, "step": 9633 }, { "epoch": 1.5688230265032774, "grad_norm": 0.1209452673792839, "learning_rate": 2.7364969142860802e-05, "loss": 0.3157, "step": 9634 }, { "epoch": 1.5689858730611081, "grad_norm": 0.12996339797973633, "learning_rate": 2.7360253322203562e-05, "loss": 0.316, "step": 9635 }, { "epoch": 1.569148719618939, "grad_norm": 0.0815204307436943, "learning_rate": 2.7355537416806147e-05, "loss": 0.3108, "step": 9636 }, { "epoch": 1.5693115661767698, "grad_norm": 0.09818227589130402, "learning_rate": 2.7350821426837893e-05, "loss": 0.3072, "step": 9637 }, { "epoch": 1.5694744127346008, "grad_norm": 0.11508981883525848, "learning_rate": 2.7346105352468098e-05, "loss": 0.3168, "step": 9638 }, { "epoch": 1.5696372592924317, "grad_norm": 0.09003358334302902, "learning_rate": 2.73413891938661e-05, "loss": 0.3212, "step": 9639 }, { "epoch": 1.5698001058502626, "grad_norm": 0.1652512401342392, "learning_rate": 2.7336672951201214e-05, "loss": 0.3493, "step": 9640 }, { "epoch": 1.5699629524080936, "grad_norm": 0.10908998548984528, "learning_rate": 2.733195662464276e-05, "loss": 0.3281, "step": 9641 }, { "epoch": 1.5701257989659245, "grad_norm": 0.11558933556079865, "learning_rate": 2.732724021436008e-05, "loss": 0.3124, "step": 9642 }, { "epoch": 1.5702886455237552, "grad_norm": 0.07336977869272232, "learning_rate": 2.732252372052251e-05, "loss": 0.2882, "step": 9643 }, { "epoch": 1.570451492081586, "grad_norm": 0.15767060220241547, "learning_rate": 2.7317807143299374e-05, "loss": 0.3299, "step": 9644 }, { "epoch": 1.570614338639417, "grad_norm": 0.13234861195087433, "learning_rate": 2.7313090482860022e-05, "loss": 0.2974, "step": 9645 }, { "epoch": 1.5707771851972478, "grad_norm": 0.10186504572629929, "learning_rate": 2.7308373739373783e-05, "loss": 0.3716, "step": 9646 }, { "epoch": 1.5709400317550788, "grad_norm": 0.09191518276929855, "learning_rate": 2.730365691301001e-05, "loss": 0.306, "step": 9647 }, { "epoch": 1.5711028783129097, "grad_norm": 0.07641424983739853, "learning_rate": 2.7298940003938066e-05, "loss": 0.3048, "step": 9648 }, { "epoch": 1.5712657248707407, "grad_norm": 0.06880488991737366, "learning_rate": 2.7294223012327274e-05, "loss": 0.3256, "step": 9649 }, { "epoch": 1.5714285714285714, "grad_norm": 0.12632496654987335, "learning_rate": 2.7289505938347016e-05, "loss": 0.3202, "step": 9650 }, { "epoch": 1.5715914179864023, "grad_norm": 0.07377190887928009, "learning_rate": 2.728478878216662e-05, "loss": 0.3298, "step": 9651 }, { "epoch": 1.571754264544233, "grad_norm": 0.16999340057373047, "learning_rate": 2.7280071543955476e-05, "loss": 0.3249, "step": 9652 }, { "epoch": 1.571917111102064, "grad_norm": 0.11834636330604553, "learning_rate": 2.727535422388292e-05, "loss": 0.3582, "step": 9653 }, { "epoch": 1.572079957659895, "grad_norm": 0.10617437958717346, "learning_rate": 2.727063682211834e-05, "loss": 0.2964, "step": 9654 }, { "epoch": 1.5722428042177259, "grad_norm": 0.08667565137147903, "learning_rate": 2.7265919338831086e-05, "loss": 0.2962, "step": 9655 }, { "epoch": 1.5724056507755568, "grad_norm": 0.13530570268630981, "learning_rate": 2.726120177419054e-05, "loss": 0.3233, "step": 9656 }, { "epoch": 1.5725684973333878, "grad_norm": 0.08611363917589188, "learning_rate": 2.7256484128366083e-05, "loss": 0.3502, "step": 9657 }, { "epoch": 1.5727313438912185, "grad_norm": 0.1299453228712082, "learning_rate": 2.7251766401527077e-05, "loss": 0.3142, "step": 9658 }, { "epoch": 1.5728941904490494, "grad_norm": 0.17110846936702728, "learning_rate": 2.724704859384291e-05, "loss": 0.3366, "step": 9659 }, { "epoch": 1.5730570370068802, "grad_norm": 0.11798974871635437, "learning_rate": 2.7242330705482978e-05, "loss": 0.3248, "step": 9660 }, { "epoch": 1.573219883564711, "grad_norm": 0.1320081353187561, "learning_rate": 2.7237612736616647e-05, "loss": 0.3289, "step": 9661 }, { "epoch": 1.573382730122542, "grad_norm": 0.10492788255214691, "learning_rate": 2.7232894687413314e-05, "loss": 0.3597, "step": 9662 }, { "epoch": 1.573545576680373, "grad_norm": 0.12495533376932144, "learning_rate": 2.7228176558042374e-05, "loss": 0.2962, "step": 9663 }, { "epoch": 1.573708423238204, "grad_norm": 0.14268212020397186, "learning_rate": 2.7223458348673215e-05, "loss": 0.3467, "step": 9664 }, { "epoch": 1.5738712697960346, "grad_norm": 0.0982704609632492, "learning_rate": 2.7218740059475252e-05, "loss": 0.2908, "step": 9665 }, { "epoch": 1.5740341163538656, "grad_norm": 0.11052721738815308, "learning_rate": 2.721402169061786e-05, "loss": 0.2971, "step": 9666 }, { "epoch": 1.5741969629116963, "grad_norm": 0.10390108823776245, "learning_rate": 2.7209303242270456e-05, "loss": 0.3264, "step": 9667 }, { "epoch": 1.5743598094695272, "grad_norm": 0.11584027111530304, "learning_rate": 2.720458471460246e-05, "loss": 0.3802, "step": 9668 }, { "epoch": 1.5745226560273582, "grad_norm": 0.14776787161827087, "learning_rate": 2.7199866107783256e-05, "loss": 0.3239, "step": 9669 }, { "epoch": 1.5746855025851891, "grad_norm": 0.12175551801919937, "learning_rate": 2.719514742198227e-05, "loss": 0.317, "step": 9670 }, { "epoch": 1.57484834914302, "grad_norm": 0.1113697811961174, "learning_rate": 2.7190428657368917e-05, "loss": 0.3219, "step": 9671 }, { "epoch": 1.575011195700851, "grad_norm": 0.10578563809394836, "learning_rate": 2.7185709814112613e-05, "loss": 0.3549, "step": 9672 }, { "epoch": 1.5751740422586817, "grad_norm": 0.09593594819307327, "learning_rate": 2.7180990892382775e-05, "loss": 0.2769, "step": 9673 }, { "epoch": 1.5753368888165127, "grad_norm": 0.09135986119508743, "learning_rate": 2.717627189234883e-05, "loss": 0.272, "step": 9674 }, { "epoch": 1.5754997353743434, "grad_norm": 0.1351490169763565, "learning_rate": 2.7171552814180206e-05, "loss": 0.3721, "step": 9675 }, { "epoch": 1.5756625819321743, "grad_norm": 0.11526563763618469, "learning_rate": 2.7166833658046326e-05, "loss": 0.2787, "step": 9676 }, { "epoch": 1.5758254284900053, "grad_norm": 0.1103401929140091, "learning_rate": 2.716211442411663e-05, "loss": 0.3044, "step": 9677 }, { "epoch": 1.5759882750478362, "grad_norm": 0.08625182509422302, "learning_rate": 2.7157395112560546e-05, "loss": 0.2735, "step": 9678 }, { "epoch": 1.5761511216056672, "grad_norm": 0.1684996336698532, "learning_rate": 2.715267572354751e-05, "loss": 0.3288, "step": 9679 }, { "epoch": 1.576313968163498, "grad_norm": 0.13788239657878876, "learning_rate": 2.7147956257246977e-05, "loss": 0.2996, "step": 9680 }, { "epoch": 1.5764768147213288, "grad_norm": 0.1278190314769745, "learning_rate": 2.714323671382837e-05, "loss": 0.3386, "step": 9681 }, { "epoch": 1.5766396612791596, "grad_norm": 0.11721470206975937, "learning_rate": 2.7138517093461142e-05, "loss": 0.2902, "step": 9682 }, { "epoch": 1.5768025078369905, "grad_norm": 0.14808428287506104, "learning_rate": 2.713379739631475e-05, "loss": 0.3129, "step": 9683 }, { "epoch": 1.5769653543948214, "grad_norm": 0.12082075327634811, "learning_rate": 2.712907762255863e-05, "loss": 0.3218, "step": 9684 }, { "epoch": 1.5771282009526524, "grad_norm": 0.10201960057020187, "learning_rate": 2.7124357772362253e-05, "loss": 0.3084, "step": 9685 }, { "epoch": 1.5772910475104833, "grad_norm": 0.12138596177101135, "learning_rate": 2.7119637845895062e-05, "loss": 0.3133, "step": 9686 }, { "epoch": 1.5774538940683143, "grad_norm": 0.09022337943315506, "learning_rate": 2.711491784332652e-05, "loss": 0.3423, "step": 9687 }, { "epoch": 1.577616740626145, "grad_norm": 0.09020110964775085, "learning_rate": 2.71101977648261e-05, "loss": 0.279, "step": 9688 }, { "epoch": 1.577779587183976, "grad_norm": 0.09367959946393967, "learning_rate": 2.710547761056325e-05, "loss": 0.2876, "step": 9689 }, { "epoch": 1.5779424337418066, "grad_norm": 0.11670494824647903, "learning_rate": 2.7100757380707454e-05, "loss": 0.3137, "step": 9690 }, { "epoch": 1.5781052802996376, "grad_norm": 0.11279988288879395, "learning_rate": 2.7096037075428166e-05, "loss": 0.3179, "step": 9691 }, { "epoch": 1.5782681268574685, "grad_norm": 0.08119277656078339, "learning_rate": 2.709131669489487e-05, "loss": 0.3003, "step": 9692 }, { "epoch": 1.5784309734152995, "grad_norm": 0.0805777981877327, "learning_rate": 2.7086596239277034e-05, "loss": 0.3562, "step": 9693 }, { "epoch": 1.5785938199731304, "grad_norm": 0.10399779677391052, "learning_rate": 2.7081875708744147e-05, "loss": 0.3288, "step": 9694 }, { "epoch": 1.5787566665309614, "grad_norm": 0.13718654215335846, "learning_rate": 2.7077155103465683e-05, "loss": 0.3303, "step": 9695 }, { "epoch": 1.578919513088792, "grad_norm": 0.0913708284497261, "learning_rate": 2.7072434423611133e-05, "loss": 0.2873, "step": 9696 }, { "epoch": 1.579082359646623, "grad_norm": 0.11856359243392944, "learning_rate": 2.706771366934997e-05, "loss": 0.2924, "step": 9697 }, { "epoch": 1.5792452062044537, "grad_norm": 0.1485523134469986, "learning_rate": 2.7062992840851694e-05, "loss": 0.3334, "step": 9698 }, { "epoch": 1.5794080527622847, "grad_norm": 0.127004474401474, "learning_rate": 2.705827193828579e-05, "loss": 0.365, "step": 9699 }, { "epoch": 1.5795708993201156, "grad_norm": 0.0923525020480156, "learning_rate": 2.7053550961821756e-05, "loss": 0.3191, "step": 9700 }, { "epoch": 1.5797337458779466, "grad_norm": 0.1457395702600479, "learning_rate": 2.7048829911629096e-05, "loss": 0.3557, "step": 9701 }, { "epoch": 1.5798965924357775, "grad_norm": 0.07429846376180649, "learning_rate": 2.7044108787877304e-05, "loss": 0.3034, "step": 9702 }, { "epoch": 1.5800594389936082, "grad_norm": 0.18680661916732788, "learning_rate": 2.703938759073588e-05, "loss": 0.3225, "step": 9703 }, { "epoch": 1.5802222855514392, "grad_norm": 0.09042870253324509, "learning_rate": 2.703466632037433e-05, "loss": 0.2735, "step": 9704 }, { "epoch": 1.58038513210927, "grad_norm": 0.14065781235694885, "learning_rate": 2.7029944976962167e-05, "loss": 0.2892, "step": 9705 }, { "epoch": 1.5805479786671008, "grad_norm": 0.13217251002788544, "learning_rate": 2.7025223560668895e-05, "loss": 0.3261, "step": 9706 }, { "epoch": 1.5807108252249318, "grad_norm": 0.06578996777534485, "learning_rate": 2.7020502071664028e-05, "loss": 0.3305, "step": 9707 }, { "epoch": 1.5808736717827627, "grad_norm": 0.0788314938545227, "learning_rate": 2.701578051011709e-05, "loss": 0.3821, "step": 9708 }, { "epoch": 1.5810365183405937, "grad_norm": 0.1365664303302765, "learning_rate": 2.701105887619758e-05, "loss": 0.3353, "step": 9709 }, { "epoch": 1.5811993648984246, "grad_norm": 0.09842217713594437, "learning_rate": 2.700633717007504e-05, "loss": 0.3147, "step": 9710 }, { "epoch": 1.5813622114562553, "grad_norm": 0.13130290806293488, "learning_rate": 2.7001615391918984e-05, "loss": 0.3288, "step": 9711 }, { "epoch": 1.5815250580140863, "grad_norm": 0.08193254470825195, "learning_rate": 2.6996893541898937e-05, "loss": 0.3227, "step": 9712 }, { "epoch": 1.581687904571917, "grad_norm": 0.13252714276313782, "learning_rate": 2.6992171620184432e-05, "loss": 0.3278, "step": 9713 }, { "epoch": 1.581850751129748, "grad_norm": 0.13835448026657104, "learning_rate": 2.6987449626944994e-05, "loss": 0.3015, "step": 9714 }, { "epoch": 1.5820135976875789, "grad_norm": 0.10239731520414352, "learning_rate": 2.6982727562350156e-05, "loss": 0.3036, "step": 9715 }, { "epoch": 1.5821764442454098, "grad_norm": 0.13789832592010498, "learning_rate": 2.6978005426569464e-05, "loss": 0.3429, "step": 9716 }, { "epoch": 1.5823392908032408, "grad_norm": 0.07618260383605957, "learning_rate": 2.697328321977245e-05, "loss": 0.3067, "step": 9717 }, { "epoch": 1.5825021373610715, "grad_norm": 0.10640224814414978, "learning_rate": 2.696856094212865e-05, "loss": 0.3112, "step": 9718 }, { "epoch": 1.5826649839189024, "grad_norm": 0.08769915997982025, "learning_rate": 2.696383859380762e-05, "loss": 0.3132, "step": 9719 }, { "epoch": 1.5828278304767331, "grad_norm": 0.1024048924446106, "learning_rate": 2.69591161749789e-05, "loss": 0.3113, "step": 9720 }, { "epoch": 1.582990677034564, "grad_norm": 0.09486277401447296, "learning_rate": 2.695439368581204e-05, "loss": 0.3128, "step": 9721 }, { "epoch": 1.583153523592395, "grad_norm": 0.08964680135250092, "learning_rate": 2.694967112647659e-05, "loss": 0.3398, "step": 9722 }, { "epoch": 1.583316370150226, "grad_norm": 0.0911778137087822, "learning_rate": 2.6944948497142107e-05, "loss": 0.3028, "step": 9723 }, { "epoch": 1.583479216708057, "grad_norm": 0.07433587312698364, "learning_rate": 2.6940225797978147e-05, "loss": 0.3052, "step": 9724 }, { "epoch": 1.5836420632658879, "grad_norm": 0.12025418132543564, "learning_rate": 2.693550302915427e-05, "loss": 0.2915, "step": 9725 }, { "epoch": 1.5838049098237186, "grad_norm": 0.07703634351491928, "learning_rate": 2.6930780190840028e-05, "loss": 0.2739, "step": 9726 }, { "epoch": 1.5839677563815495, "grad_norm": 0.12374170124530792, "learning_rate": 2.692605728320499e-05, "loss": 0.3127, "step": 9727 }, { "epoch": 1.5841306029393802, "grad_norm": 0.09820393472909927, "learning_rate": 2.692133430641873e-05, "loss": 0.2968, "step": 9728 }, { "epoch": 1.5842934494972112, "grad_norm": 0.0797097235918045, "learning_rate": 2.6916611260650814e-05, "loss": 0.3357, "step": 9729 }, { "epoch": 1.5844562960550421, "grad_norm": 0.139956533908844, "learning_rate": 2.6911888146070814e-05, "loss": 0.3579, "step": 9730 }, { "epoch": 1.584619142612873, "grad_norm": 0.10821084678173065, "learning_rate": 2.690716496284829e-05, "loss": 0.3263, "step": 9731 }, { "epoch": 1.584781989170704, "grad_norm": 0.10711504518985748, "learning_rate": 2.6902441711152844e-05, "loss": 0.3206, "step": 9732 }, { "epoch": 1.584944835728535, "grad_norm": 0.18458949029445648, "learning_rate": 2.6897718391154032e-05, "loss": 0.3484, "step": 9733 }, { "epoch": 1.5851076822863657, "grad_norm": 0.08549225330352783, "learning_rate": 2.689299500302145e-05, "loss": 0.2988, "step": 9734 }, { "epoch": 1.5852705288441964, "grad_norm": 0.0907832607626915, "learning_rate": 2.6888271546924667e-05, "loss": 0.3113, "step": 9735 }, { "epoch": 1.5854333754020273, "grad_norm": 0.1321263611316681, "learning_rate": 2.6883548023033278e-05, "loss": 0.3449, "step": 9736 }, { "epoch": 1.5855962219598583, "grad_norm": 0.09455649554729462, "learning_rate": 2.6878824431516876e-05, "loss": 0.3234, "step": 9737 }, { "epoch": 1.5857590685176892, "grad_norm": 0.15151511132717133, "learning_rate": 2.6874100772545054e-05, "loss": 0.3564, "step": 9738 }, { "epoch": 1.5859219150755202, "grad_norm": 0.12474493682384491, "learning_rate": 2.6869377046287387e-05, "loss": 0.3659, "step": 9739 }, { "epoch": 1.586084761633351, "grad_norm": 0.09238079935312271, "learning_rate": 2.6864653252913486e-05, "loss": 0.3129, "step": 9740 }, { "epoch": 1.5862476081911818, "grad_norm": 0.14871154725551605, "learning_rate": 2.6859929392592954e-05, "loss": 0.3406, "step": 9741 }, { "epoch": 1.5864104547490128, "grad_norm": 0.10383814573287964, "learning_rate": 2.6855205465495376e-05, "loss": 0.3538, "step": 9742 }, { "epoch": 1.5865733013068435, "grad_norm": 0.1136494129896164, "learning_rate": 2.6850481471790373e-05, "loss": 0.3137, "step": 9743 }, { "epoch": 1.5867361478646744, "grad_norm": 0.14672565460205078, "learning_rate": 2.684575741164753e-05, "loss": 0.3337, "step": 9744 }, { "epoch": 1.5868989944225054, "grad_norm": 0.10898636281490326, "learning_rate": 2.6841033285236478e-05, "loss": 0.332, "step": 9745 }, { "epoch": 1.5870618409803363, "grad_norm": 0.08593150228261948, "learning_rate": 2.6836309092726808e-05, "loss": 0.3296, "step": 9746 }, { "epoch": 1.5872246875381673, "grad_norm": 0.10323978960514069, "learning_rate": 2.6831584834288136e-05, "loss": 0.3229, "step": 9747 }, { "epoch": 1.5873875340959982, "grad_norm": 0.12296193093061447, "learning_rate": 2.682686051009009e-05, "loss": 0.3561, "step": 9748 }, { "epoch": 1.587550380653829, "grad_norm": 0.08468897640705109, "learning_rate": 2.682213612030227e-05, "loss": 0.3175, "step": 9749 }, { "epoch": 1.5877132272116599, "grad_norm": 0.07483989745378494, "learning_rate": 2.681741166509431e-05, "loss": 0.2982, "step": 9750 }, { "epoch": 1.5878760737694906, "grad_norm": 0.11330423504114151, "learning_rate": 2.6812687144635822e-05, "loss": 0.2927, "step": 9751 }, { "epoch": 1.5880389203273215, "grad_norm": 0.07831180840730667, "learning_rate": 2.6807962559096437e-05, "loss": 0.2763, "step": 9752 }, { "epoch": 1.5882017668851525, "grad_norm": 0.12627245485782623, "learning_rate": 2.6803237908645777e-05, "loss": 0.3164, "step": 9753 }, { "epoch": 1.5883646134429834, "grad_norm": 0.1045302003622055, "learning_rate": 2.6798513193453485e-05, "loss": 0.3225, "step": 9754 }, { "epoch": 1.5885274600008144, "grad_norm": 0.0989309772849083, "learning_rate": 2.6793788413689165e-05, "loss": 0.3065, "step": 9755 }, { "epoch": 1.588690306558645, "grad_norm": 0.14784406125545502, "learning_rate": 2.678906356952247e-05, "loss": 0.3259, "step": 9756 }, { "epoch": 1.588853153116476, "grad_norm": 0.12288561463356018, "learning_rate": 2.6784338661123037e-05, "loss": 0.3254, "step": 9757 }, { "epoch": 1.5890159996743067, "grad_norm": 0.07687561213970184, "learning_rate": 2.67796136886605e-05, "loss": 0.3131, "step": 9758 }, { "epoch": 1.5891788462321377, "grad_norm": 0.11031360924243927, "learning_rate": 2.6774888652304498e-05, "loss": 0.3094, "step": 9759 }, { "epoch": 1.5893416927899686, "grad_norm": 0.07438039034605026, "learning_rate": 2.677016355222467e-05, "loss": 0.2827, "step": 9760 }, { "epoch": 1.5895045393477996, "grad_norm": 0.09589896351099014, "learning_rate": 2.6765438388590676e-05, "loss": 0.2773, "step": 9761 }, { "epoch": 1.5896673859056305, "grad_norm": 0.12440940737724304, "learning_rate": 2.676071316157215e-05, "loss": 0.3235, "step": 9762 }, { "epoch": 1.5898302324634614, "grad_norm": 0.06731830537319183, "learning_rate": 2.675598787133875e-05, "loss": 0.3268, "step": 9763 }, { "epoch": 1.5899930790212922, "grad_norm": 0.0928802341222763, "learning_rate": 2.675126251806012e-05, "loss": 0.3292, "step": 9764 }, { "epoch": 1.590155925579123, "grad_norm": 0.10933240503072739, "learning_rate": 2.674653710190592e-05, "loss": 0.3458, "step": 9765 }, { "epoch": 1.5903187721369538, "grad_norm": 0.14466042816638947, "learning_rate": 2.674181162304581e-05, "loss": 0.3268, "step": 9766 }, { "epoch": 1.5904816186947848, "grad_norm": 0.08599459379911423, "learning_rate": 2.6737086081649443e-05, "loss": 0.2957, "step": 9767 }, { "epoch": 1.5906444652526157, "grad_norm": 0.21874293684959412, "learning_rate": 2.6732360477886474e-05, "loss": 0.3451, "step": 9768 }, { "epoch": 1.5908073118104467, "grad_norm": 0.10955596715211868, "learning_rate": 2.6727634811926573e-05, "loss": 0.3101, "step": 9769 }, { "epoch": 1.5909701583682776, "grad_norm": 0.13361476361751556, "learning_rate": 2.672290908393942e-05, "loss": 0.326, "step": 9770 }, { "epoch": 1.5911330049261085, "grad_norm": 0.13431566953659058, "learning_rate": 2.6718183294094652e-05, "loss": 0.3282, "step": 9771 }, { "epoch": 1.5912958514839393, "grad_norm": 0.15398292243480682, "learning_rate": 2.671345744256197e-05, "loss": 0.2802, "step": 9772 }, { "epoch": 1.59145869804177, "grad_norm": 0.11214230209589005, "learning_rate": 2.670873152951102e-05, "loss": 0.3543, "step": 9773 }, { "epoch": 1.591621544599601, "grad_norm": 0.11758473515510559, "learning_rate": 2.67040055551115e-05, "loss": 0.3147, "step": 9774 }, { "epoch": 1.5917843911574319, "grad_norm": 0.06394445896148682, "learning_rate": 2.6699279519533065e-05, "loss": 0.274, "step": 9775 }, { "epoch": 1.5919472377152628, "grad_norm": 0.08859275281429291, "learning_rate": 2.6694553422945406e-05, "loss": 0.3152, "step": 9776 }, { "epoch": 1.5921100842730938, "grad_norm": 0.1245456412434578, "learning_rate": 2.6689827265518202e-05, "loss": 0.3377, "step": 9777 }, { "epoch": 1.5922729308309247, "grad_norm": 0.09775689989328384, "learning_rate": 2.6685101047421142e-05, "loss": 0.3224, "step": 9778 }, { "epoch": 1.5924357773887554, "grad_norm": 0.13793909549713135, "learning_rate": 2.66803747688239e-05, "loss": 0.3372, "step": 9779 }, { "epoch": 1.5925986239465864, "grad_norm": 0.12747299671173096, "learning_rate": 2.667564842989617e-05, "loss": 0.3268, "step": 9780 }, { "epoch": 1.592761470504417, "grad_norm": 0.11777450889348984, "learning_rate": 2.6670922030807648e-05, "loss": 0.3211, "step": 9781 }, { "epoch": 1.592924317062248, "grad_norm": 0.0951077863574028, "learning_rate": 2.6666195571728014e-05, "loss": 0.3366, "step": 9782 }, { "epoch": 1.593087163620079, "grad_norm": 0.1576302945613861, "learning_rate": 2.6661469052826972e-05, "loss": 0.3264, "step": 9783 }, { "epoch": 1.59325001017791, "grad_norm": 0.07520852237939835, "learning_rate": 2.6656742474274205e-05, "loss": 0.3266, "step": 9784 }, { "epoch": 1.5934128567357408, "grad_norm": 0.10809902101755142, "learning_rate": 2.6652015836239426e-05, "loss": 0.3102, "step": 9785 }, { "epoch": 1.5935757032935718, "grad_norm": 0.10476423054933548, "learning_rate": 2.6647289138892317e-05, "loss": 0.3228, "step": 9786 }, { "epoch": 1.5937385498514025, "grad_norm": 0.14169950783252716, "learning_rate": 2.664256238240261e-05, "loss": 0.3206, "step": 9787 }, { "epoch": 1.5939013964092335, "grad_norm": 0.12681220471858978, "learning_rate": 2.663783556693998e-05, "loss": 0.3438, "step": 9788 }, { "epoch": 1.5940642429670642, "grad_norm": 0.09735076129436493, "learning_rate": 2.6633108692674148e-05, "loss": 0.3182, "step": 9789 }, { "epoch": 1.5942270895248951, "grad_norm": 0.13287930190563202, "learning_rate": 2.6628381759774824e-05, "loss": 0.3285, "step": 9790 }, { "epoch": 1.594389936082726, "grad_norm": 0.12900787591934204, "learning_rate": 2.662365476841171e-05, "loss": 0.2898, "step": 9791 }, { "epoch": 1.594552782640557, "grad_norm": 0.1110413447022438, "learning_rate": 2.661892771875453e-05, "loss": 0.3146, "step": 9792 }, { "epoch": 1.594715629198388, "grad_norm": 0.06487580388784409, "learning_rate": 2.6614200610972993e-05, "loss": 0.3155, "step": 9793 }, { "epoch": 1.5948784757562187, "grad_norm": 0.10883165895938873, "learning_rate": 2.6609473445236817e-05, "loss": 0.3002, "step": 9794 }, { "epoch": 1.5950413223140496, "grad_norm": 0.11782006919384003, "learning_rate": 2.6604746221715728e-05, "loss": 0.329, "step": 9795 }, { "epoch": 1.5952041688718803, "grad_norm": 0.08480463922023773, "learning_rate": 2.6600018940579434e-05, "loss": 0.3075, "step": 9796 }, { "epoch": 1.5953670154297113, "grad_norm": 0.09038813412189484, "learning_rate": 2.6595291601997667e-05, "loss": 0.3113, "step": 9797 }, { "epoch": 1.5955298619875422, "grad_norm": 0.12250368297100067, "learning_rate": 2.659056420614015e-05, "loss": 0.2902, "step": 9798 }, { "epoch": 1.5956927085453732, "grad_norm": 0.09229668229818344, "learning_rate": 2.6585836753176625e-05, "loss": 0.3081, "step": 9799 }, { "epoch": 1.595855555103204, "grad_norm": 0.10309470444917679, "learning_rate": 2.6581109243276797e-05, "loss": 0.3253, "step": 9800 }, { "epoch": 1.596018401661035, "grad_norm": 0.11808820813894272, "learning_rate": 2.6576381676610416e-05, "loss": 0.2965, "step": 9801 }, { "epoch": 1.5961812482188658, "grad_norm": 0.12018903344869614, "learning_rate": 2.6571654053347205e-05, "loss": 0.3363, "step": 9802 }, { "epoch": 1.5963440947766967, "grad_norm": 0.11496032774448395, "learning_rate": 2.6566926373656915e-05, "loss": 0.3045, "step": 9803 }, { "epoch": 1.5965069413345274, "grad_norm": 0.10853485018014908, "learning_rate": 2.6562198637709262e-05, "loss": 0.3308, "step": 9804 }, { "epoch": 1.5966697878923584, "grad_norm": 0.09068361669778824, "learning_rate": 2.6557470845674004e-05, "loss": 0.3332, "step": 9805 }, { "epoch": 1.5968326344501893, "grad_norm": 0.12650969624519348, "learning_rate": 2.6552742997720877e-05, "loss": 0.3596, "step": 9806 }, { "epoch": 1.5969954810080202, "grad_norm": 0.08481636643409729, "learning_rate": 2.6548015094019622e-05, "loss": 0.2982, "step": 9807 }, { "epoch": 1.5971583275658512, "grad_norm": 0.13326390087604523, "learning_rate": 2.6543287134739986e-05, "loss": 0.2754, "step": 9808 }, { "epoch": 1.597321174123682, "grad_norm": 0.11063283681869507, "learning_rate": 2.6538559120051716e-05, "loss": 0.2918, "step": 9809 }, { "epoch": 1.5974840206815129, "grad_norm": 0.14788882434368134, "learning_rate": 2.653383105012457e-05, "loss": 0.3276, "step": 9810 }, { "epoch": 1.5976468672393436, "grad_norm": 0.10450137406587601, "learning_rate": 2.6529102925128286e-05, "loss": 0.3506, "step": 9811 }, { "epoch": 1.5978097137971745, "grad_norm": 0.0991697907447815, "learning_rate": 2.6524374745232633e-05, "loss": 0.3157, "step": 9812 }, { "epoch": 1.5979725603550055, "grad_norm": 0.087265245616436, "learning_rate": 2.6519646510607353e-05, "loss": 0.3185, "step": 9813 }, { "epoch": 1.5981354069128364, "grad_norm": 0.13959522545337677, "learning_rate": 2.6514918221422214e-05, "loss": 0.3418, "step": 9814 }, { "epoch": 1.5982982534706673, "grad_norm": 0.1575554609298706, "learning_rate": 2.6510189877846975e-05, "loss": 0.3435, "step": 9815 }, { "epoch": 1.5984611000284983, "grad_norm": 0.1488628387451172, "learning_rate": 2.6505461480051392e-05, "loss": 0.3056, "step": 9816 }, { "epoch": 1.598623946586329, "grad_norm": 0.10681865364313126, "learning_rate": 2.6500733028205227e-05, "loss": 0.3469, "step": 9817 }, { "epoch": 1.59878679314416, "grad_norm": 0.10694149881601334, "learning_rate": 2.6496004522478256e-05, "loss": 0.3303, "step": 9818 }, { "epoch": 1.5989496397019907, "grad_norm": 0.12590447068214417, "learning_rate": 2.6491275963040242e-05, "loss": 0.3254, "step": 9819 }, { "epoch": 1.5991124862598216, "grad_norm": 0.08362259715795517, "learning_rate": 2.6486547350060943e-05, "loss": 0.3598, "step": 9820 }, { "epoch": 1.5992753328176526, "grad_norm": 0.12046300619840622, "learning_rate": 2.6481818683710152e-05, "loss": 0.3385, "step": 9821 }, { "epoch": 1.5994381793754835, "grad_norm": 0.07732051610946655, "learning_rate": 2.6477089964157618e-05, "loss": 0.3051, "step": 9822 }, { "epoch": 1.5996010259333144, "grad_norm": 0.09374771267175674, "learning_rate": 2.647236119157314e-05, "loss": 0.3237, "step": 9823 }, { "epoch": 1.5997638724911454, "grad_norm": 0.10774090141057968, "learning_rate": 2.6467632366126483e-05, "loss": 0.283, "step": 9824 }, { "epoch": 1.599926719048976, "grad_norm": 0.16527850925922394, "learning_rate": 2.646290348798743e-05, "loss": 0.3315, "step": 9825 }, { "epoch": 1.600089565606807, "grad_norm": 0.12400151044130325, "learning_rate": 2.645817455732575e-05, "loss": 0.3051, "step": 9826 }, { "epoch": 1.6002524121646378, "grad_norm": 0.13807976245880127, "learning_rate": 2.6453445574311235e-05, "loss": 0.344, "step": 9827 }, { "epoch": 1.6004152587224687, "grad_norm": 0.13194721937179565, "learning_rate": 2.6448716539113672e-05, "loss": 0.3032, "step": 9828 }, { "epoch": 1.6005781052802996, "grad_norm": 0.08260326087474823, "learning_rate": 2.6443987451902842e-05, "loss": 0.3035, "step": 9829 }, { "epoch": 1.6007409518381306, "grad_norm": 0.1499956250190735, "learning_rate": 2.643925831284854e-05, "loss": 0.3592, "step": 9830 }, { "epoch": 1.6009037983959615, "grad_norm": 0.09002488851547241, "learning_rate": 2.643452912212055e-05, "loss": 0.3022, "step": 9831 }, { "epoch": 1.6010666449537923, "grad_norm": 0.09122191369533539, "learning_rate": 2.6429799879888674e-05, "loss": 0.3153, "step": 9832 }, { "epoch": 1.6012294915116232, "grad_norm": 0.12618760764598846, "learning_rate": 2.6425070586322688e-05, "loss": 0.3618, "step": 9833 }, { "epoch": 1.601392338069454, "grad_norm": 0.11286790668964386, "learning_rate": 2.64203412415924e-05, "loss": 0.2778, "step": 9834 }, { "epoch": 1.6015551846272849, "grad_norm": 0.11749707162380219, "learning_rate": 2.6415611845867604e-05, "loss": 0.3306, "step": 9835 }, { "epoch": 1.6017180311851158, "grad_norm": 0.07663802057504654, "learning_rate": 2.641088239931811e-05, "loss": 0.2921, "step": 9836 }, { "epoch": 1.6018808777429467, "grad_norm": 0.09283436089754105, "learning_rate": 2.6406152902113708e-05, "loss": 0.3217, "step": 9837 }, { "epoch": 1.6020437243007777, "grad_norm": 0.1433301568031311, "learning_rate": 2.6401423354424203e-05, "loss": 0.3365, "step": 9838 }, { "epoch": 1.6022065708586086, "grad_norm": 0.12561699748039246, "learning_rate": 2.63966937564194e-05, "loss": 0.3294, "step": 9839 }, { "epoch": 1.6023694174164393, "grad_norm": 0.10919392108917236, "learning_rate": 2.6391964108269102e-05, "loss": 0.3036, "step": 9840 }, { "epoch": 1.6025322639742703, "grad_norm": 0.10967600345611572, "learning_rate": 2.638723441014313e-05, "loss": 0.3019, "step": 9841 }, { "epoch": 1.602695110532101, "grad_norm": 0.10439492017030716, "learning_rate": 2.638250466221128e-05, "loss": 0.328, "step": 9842 }, { "epoch": 1.602857957089932, "grad_norm": 0.10982105135917664, "learning_rate": 2.6377774864643378e-05, "loss": 0.3159, "step": 9843 }, { "epoch": 1.603020803647763, "grad_norm": 0.12273478507995605, "learning_rate": 2.637304501760923e-05, "loss": 0.2894, "step": 9844 }, { "epoch": 1.6031836502055938, "grad_norm": 0.09515064209699631, "learning_rate": 2.6368315121278646e-05, "loss": 0.3325, "step": 9845 }, { "epoch": 1.6033464967634248, "grad_norm": 0.1114647313952446, "learning_rate": 2.636358517582145e-05, "loss": 0.3395, "step": 9846 }, { "epoch": 1.6035093433212555, "grad_norm": 0.1167527288198471, "learning_rate": 2.6358855181407467e-05, "loss": 0.3095, "step": 9847 }, { "epoch": 1.6036721898790864, "grad_norm": 0.08495309203863144, "learning_rate": 2.6354125138206504e-05, "loss": 0.309, "step": 9848 }, { "epoch": 1.6038350364369172, "grad_norm": 0.1029413715004921, "learning_rate": 2.634939504638839e-05, "loss": 0.331, "step": 9849 }, { "epoch": 1.603997882994748, "grad_norm": 0.08054196089506149, "learning_rate": 2.6344664906122962e-05, "loss": 0.3039, "step": 9850 }, { "epoch": 1.604160729552579, "grad_norm": 0.10073753446340561, "learning_rate": 2.633993471758003e-05, "loss": 0.3191, "step": 9851 }, { "epoch": 1.60432357611041, "grad_norm": 0.13446293771266937, "learning_rate": 2.6335204480929426e-05, "loss": 0.3089, "step": 9852 }, { "epoch": 1.604486422668241, "grad_norm": 0.15411512553691864, "learning_rate": 2.633047419634098e-05, "loss": 0.3499, "step": 9853 }, { "epoch": 1.6046492692260719, "grad_norm": 0.1483059674501419, "learning_rate": 2.632574386398452e-05, "loss": 0.3414, "step": 9854 }, { "epoch": 1.6048121157839026, "grad_norm": 0.1732982099056244, "learning_rate": 2.632101348402989e-05, "loss": 0.3035, "step": 9855 }, { "epoch": 1.6049749623417335, "grad_norm": 0.11084261536598206, "learning_rate": 2.631628305664692e-05, "loss": 0.3172, "step": 9856 }, { "epoch": 1.6051378088995643, "grad_norm": 0.12769287824630737, "learning_rate": 2.6311552582005438e-05, "loss": 0.3238, "step": 9857 }, { "epoch": 1.6053006554573952, "grad_norm": 0.4027620255947113, "learning_rate": 2.6306822060275292e-05, "loss": 0.3748, "step": 9858 }, { "epoch": 1.6054635020152261, "grad_norm": 0.1365985870361328, "learning_rate": 2.6302091491626317e-05, "loss": 0.3091, "step": 9859 }, { "epoch": 1.605626348573057, "grad_norm": 0.1073790192604065, "learning_rate": 2.6297360876228354e-05, "loss": 0.3054, "step": 9860 }, { "epoch": 1.605789195130888, "grad_norm": 0.15356093645095825, "learning_rate": 2.6292630214251256e-05, "loss": 0.3436, "step": 9861 }, { "epoch": 1.605952041688719, "grad_norm": 0.20333965122699738, "learning_rate": 2.6287899505864854e-05, "loss": 0.3262, "step": 9862 }, { "epoch": 1.6061148882465497, "grad_norm": 0.13397572934627533, "learning_rate": 2.628316875123901e-05, "loss": 0.3119, "step": 9863 }, { "epoch": 1.6062777348043804, "grad_norm": 0.09795050323009491, "learning_rate": 2.6278437950543555e-05, "loss": 0.2817, "step": 9864 }, { "epoch": 1.6064405813622114, "grad_norm": 0.08006589859724045, "learning_rate": 2.6273707103948352e-05, "loss": 0.3389, "step": 9865 }, { "epoch": 1.6066034279200423, "grad_norm": 0.09739992022514343, "learning_rate": 2.6268976211623243e-05, "loss": 0.3358, "step": 9866 }, { "epoch": 1.6067662744778732, "grad_norm": 0.13458845019340515, "learning_rate": 2.6264245273738092e-05, "loss": 0.3114, "step": 9867 }, { "epoch": 1.6069291210357042, "grad_norm": 0.14112649857997894, "learning_rate": 2.6259514290462746e-05, "loss": 0.3141, "step": 9868 }, { "epoch": 1.6070919675935351, "grad_norm": 0.09803417325019836, "learning_rate": 2.6254783261967058e-05, "loss": 0.2954, "step": 9869 }, { "epoch": 1.6072548141513658, "grad_norm": 0.11149293184280396, "learning_rate": 2.6250052188420897e-05, "loss": 0.3196, "step": 9870 }, { "epoch": 1.6074176607091968, "grad_norm": 0.1529766023159027, "learning_rate": 2.6245321069994112e-05, "loss": 0.3305, "step": 9871 }, { "epoch": 1.6075805072670275, "grad_norm": 0.10336991399526596, "learning_rate": 2.624058990685657e-05, "loss": 0.3233, "step": 9872 }, { "epoch": 1.6077433538248584, "grad_norm": 0.11137073487043381, "learning_rate": 2.623585869917814e-05, "loss": 0.2987, "step": 9873 }, { "epoch": 1.6079062003826894, "grad_norm": 0.08729777485132217, "learning_rate": 2.6231127447128673e-05, "loss": 0.3343, "step": 9874 }, { "epoch": 1.6080690469405203, "grad_norm": 0.14310507476329803, "learning_rate": 2.622639615087804e-05, "loss": 0.3253, "step": 9875 }, { "epoch": 1.6082318934983513, "grad_norm": 0.11697877943515778, "learning_rate": 2.6221664810596126e-05, "loss": 0.3093, "step": 9876 }, { "epoch": 1.6083947400561822, "grad_norm": 0.10762878507375717, "learning_rate": 2.621693342645277e-05, "loss": 0.2999, "step": 9877 }, { "epoch": 1.608557586614013, "grad_norm": 0.12243849784135818, "learning_rate": 2.6212201998617868e-05, "loss": 0.3012, "step": 9878 }, { "epoch": 1.6087204331718439, "grad_norm": 0.1071041077375412, "learning_rate": 2.6207470527261273e-05, "loss": 0.3528, "step": 9879 }, { "epoch": 1.6088832797296746, "grad_norm": 0.1320687085390091, "learning_rate": 2.6202739012552875e-05, "loss": 0.3224, "step": 9880 }, { "epoch": 1.6090461262875055, "grad_norm": 0.11705686151981354, "learning_rate": 2.6198007454662544e-05, "loss": 0.3025, "step": 9881 }, { "epoch": 1.6092089728453365, "grad_norm": 0.12056080251932144, "learning_rate": 2.6193275853760148e-05, "loss": 0.352, "step": 9882 }, { "epoch": 1.6093718194031674, "grad_norm": 0.18762363493442535, "learning_rate": 2.618854421001558e-05, "loss": 0.3142, "step": 9883 }, { "epoch": 1.6095346659609984, "grad_norm": 0.13146904110908508, "learning_rate": 2.618381252359871e-05, "loss": 0.2887, "step": 9884 }, { "epoch": 1.609697512518829, "grad_norm": 0.09497418999671936, "learning_rate": 2.6179080794679427e-05, "loss": 0.3364, "step": 9885 }, { "epoch": 1.60986035907666, "grad_norm": 0.0786193311214447, "learning_rate": 2.6174349023427612e-05, "loss": 0.3586, "step": 9886 }, { "epoch": 1.6100232056344908, "grad_norm": 0.13066264986991882, "learning_rate": 2.6169617210013146e-05, "loss": 0.3408, "step": 9887 }, { "epoch": 1.6101860521923217, "grad_norm": 0.0975787416100502, "learning_rate": 2.6164885354605918e-05, "loss": 0.3316, "step": 9888 }, { "epoch": 1.6103488987501526, "grad_norm": 0.10337896645069122, "learning_rate": 2.6160153457375818e-05, "loss": 0.2829, "step": 9889 }, { "epoch": 1.6105117453079836, "grad_norm": 0.10312298685312271, "learning_rate": 2.6155421518492727e-05, "loss": 0.3636, "step": 9890 }, { "epoch": 1.6106745918658145, "grad_norm": 0.10369555652141571, "learning_rate": 2.615068953812655e-05, "loss": 0.3028, "step": 9891 }, { "epoch": 1.6108374384236455, "grad_norm": 0.11947066336870193, "learning_rate": 2.614595751644716e-05, "loss": 0.3141, "step": 9892 }, { "epoch": 1.6110002849814762, "grad_norm": 0.09988872706890106, "learning_rate": 2.6141225453624475e-05, "loss": 0.3055, "step": 9893 }, { "epoch": 1.6111631315393071, "grad_norm": 0.34782904386520386, "learning_rate": 2.613649334982837e-05, "loss": 0.3892, "step": 9894 }, { "epoch": 1.6113259780971378, "grad_norm": 0.14125840365886688, "learning_rate": 2.6131761205228744e-05, "loss": 0.3052, "step": 9895 }, { "epoch": 1.6114888246549688, "grad_norm": 0.12445245683193207, "learning_rate": 2.612702901999551e-05, "loss": 0.3143, "step": 9896 }, { "epoch": 1.6116516712127997, "grad_norm": 0.11811802536249161, "learning_rate": 2.612229679429855e-05, "loss": 0.3306, "step": 9897 }, { "epoch": 1.6118145177706307, "grad_norm": 0.09915027022361755, "learning_rate": 2.6117564528307782e-05, "loss": 0.3319, "step": 9898 }, { "epoch": 1.6119773643284616, "grad_norm": 0.10763050615787506, "learning_rate": 2.6112832222193086e-05, "loss": 0.3282, "step": 9899 }, { "epoch": 1.6121402108862926, "grad_norm": 0.10663647204637527, "learning_rate": 2.6108099876124388e-05, "loss": 0.3188, "step": 9900 }, { "epoch": 1.6123030574441233, "grad_norm": 0.10526233911514282, "learning_rate": 2.610336749027158e-05, "loss": 0.3163, "step": 9901 }, { "epoch": 1.612465904001954, "grad_norm": 0.13750366866588593, "learning_rate": 2.6098635064804577e-05, "loss": 0.3069, "step": 9902 }, { "epoch": 1.612628750559785, "grad_norm": 0.07881748676300049, "learning_rate": 2.6093902599893283e-05, "loss": 0.3469, "step": 9903 }, { "epoch": 1.6127915971176159, "grad_norm": 0.06134743615984917, "learning_rate": 2.6089170095707603e-05, "loss": 0.308, "step": 9904 }, { "epoch": 1.6129544436754468, "grad_norm": 0.13293807208538055, "learning_rate": 2.608443755241746e-05, "loss": 0.3383, "step": 9905 }, { "epoch": 1.6131172902332778, "grad_norm": 0.11511996388435364, "learning_rate": 2.607970497019276e-05, "loss": 0.2974, "step": 9906 }, { "epoch": 1.6132801367911087, "grad_norm": 0.10688482969999313, "learning_rate": 2.6074972349203414e-05, "loss": 0.3142, "step": 9907 }, { "epoch": 1.6134429833489394, "grad_norm": 0.12484926730394363, "learning_rate": 2.607023968961934e-05, "loss": 0.3055, "step": 9908 }, { "epoch": 1.6136058299067704, "grad_norm": 0.10963375121355057, "learning_rate": 2.6065506991610456e-05, "loss": 0.3394, "step": 9909 }, { "epoch": 1.613768676464601, "grad_norm": 0.07943430542945862, "learning_rate": 2.6060774255346676e-05, "loss": 0.298, "step": 9910 }, { "epoch": 1.613931523022432, "grad_norm": 0.09444878995418549, "learning_rate": 2.6056041480997923e-05, "loss": 0.3165, "step": 9911 }, { "epoch": 1.614094369580263, "grad_norm": 0.08770059794187546, "learning_rate": 2.6051308668734115e-05, "loss": 0.3882, "step": 9912 }, { "epoch": 1.614257216138094, "grad_norm": 0.07665127515792847, "learning_rate": 2.604657581872517e-05, "loss": 0.322, "step": 9913 }, { "epoch": 1.6144200626959249, "grad_norm": 0.14246124029159546, "learning_rate": 2.6041842931141032e-05, "loss": 0.3383, "step": 9914 }, { "epoch": 1.6145829092537558, "grad_norm": 0.0878647118806839, "learning_rate": 2.6037110006151606e-05, "loss": 0.3326, "step": 9915 }, { "epoch": 1.6147457558115865, "grad_norm": 0.15741053223609924, "learning_rate": 2.6032377043926826e-05, "loss": 0.3505, "step": 9916 }, { "epoch": 1.6149086023694175, "grad_norm": 0.163288876414299, "learning_rate": 2.6027644044636605e-05, "loss": 0.3683, "step": 9917 }, { "epoch": 1.6150714489272482, "grad_norm": 0.12433529645204544, "learning_rate": 2.60229110084509e-05, "loss": 0.3283, "step": 9918 }, { "epoch": 1.6152342954850791, "grad_norm": 0.10115935653448105, "learning_rate": 2.6018177935539618e-05, "loss": 0.3401, "step": 9919 }, { "epoch": 1.61539714204291, "grad_norm": 0.16197478771209717, "learning_rate": 2.6013444826072698e-05, "loss": 0.3108, "step": 9920 }, { "epoch": 1.615559988600741, "grad_norm": 0.14217549562454224, "learning_rate": 2.6008711680220065e-05, "loss": 0.3839, "step": 9921 }, { "epoch": 1.615722835158572, "grad_norm": 0.12944160401821136, "learning_rate": 2.6003978498151664e-05, "loss": 0.3316, "step": 9922 }, { "epoch": 1.6158856817164027, "grad_norm": 0.10721138119697571, "learning_rate": 2.5999245280037437e-05, "loss": 0.3337, "step": 9923 }, { "epoch": 1.6160485282742336, "grad_norm": 0.12049145996570587, "learning_rate": 2.5994512026047303e-05, "loss": 0.3184, "step": 9924 }, { "epoch": 1.6162113748320643, "grad_norm": 0.15275803208351135, "learning_rate": 2.5989778736351213e-05, "loss": 0.3505, "step": 9925 }, { "epoch": 1.6163742213898953, "grad_norm": 0.11076989024877548, "learning_rate": 2.5985045411119092e-05, "loss": 0.3456, "step": 9926 }, { "epoch": 1.6165370679477262, "grad_norm": 0.10321332514286041, "learning_rate": 2.5980312050520893e-05, "loss": 0.3005, "step": 9927 }, { "epoch": 1.6166999145055572, "grad_norm": 0.15683741867542267, "learning_rate": 2.5975578654726557e-05, "loss": 0.3368, "step": 9928 }, { "epoch": 1.616862761063388, "grad_norm": 0.10282336175441742, "learning_rate": 2.5970845223906016e-05, "loss": 0.3201, "step": 9929 }, { "epoch": 1.617025607621219, "grad_norm": 0.13212351500988007, "learning_rate": 2.5966111758229232e-05, "loss": 0.3185, "step": 9930 }, { "epoch": 1.6171884541790498, "grad_norm": 0.1680305302143097, "learning_rate": 2.5961378257866138e-05, "loss": 0.307, "step": 9931 }, { "epoch": 1.6173513007368807, "grad_norm": 0.12539976835250854, "learning_rate": 2.5956644722986685e-05, "loss": 0.3185, "step": 9932 }, { "epoch": 1.6175141472947114, "grad_norm": 0.10839644819498062, "learning_rate": 2.5951911153760815e-05, "loss": 0.3299, "step": 9933 }, { "epoch": 1.6176769938525424, "grad_norm": 0.179538294672966, "learning_rate": 2.5947177550358494e-05, "loss": 0.336, "step": 9934 }, { "epoch": 1.6178398404103733, "grad_norm": 0.10257022827863693, "learning_rate": 2.594244391294965e-05, "loss": 0.304, "step": 9935 }, { "epoch": 1.6180026869682043, "grad_norm": 0.07997861504554749, "learning_rate": 2.5937710241704248e-05, "loss": 0.3161, "step": 9936 }, { "epoch": 1.6181655335260352, "grad_norm": 0.12233582139015198, "learning_rate": 2.593297653679224e-05, "loss": 0.3485, "step": 9937 }, { "epoch": 1.618328380083866, "grad_norm": 0.14216338098049164, "learning_rate": 2.5928242798383585e-05, "loss": 0.2936, "step": 9938 }, { "epoch": 1.6184912266416969, "grad_norm": 0.10188036412000656, "learning_rate": 2.592350902664823e-05, "loss": 0.3634, "step": 9939 }, { "epoch": 1.6186540731995276, "grad_norm": 0.13283133506774902, "learning_rate": 2.591877522175613e-05, "loss": 0.327, "step": 9940 }, { "epoch": 1.6188169197573585, "grad_norm": 0.1421259343624115, "learning_rate": 2.5914041383877248e-05, "loss": 0.3442, "step": 9941 }, { "epoch": 1.6189797663151895, "grad_norm": 0.09203369170427322, "learning_rate": 2.590930751318154e-05, "loss": 0.2961, "step": 9942 }, { "epoch": 1.6191426128730204, "grad_norm": 0.13219350576400757, "learning_rate": 2.5904573609838974e-05, "loss": 0.305, "step": 9943 }, { "epoch": 1.6193054594308514, "grad_norm": 0.13150891661643982, "learning_rate": 2.58998396740195e-05, "loss": 0.3452, "step": 9944 }, { "epoch": 1.6194683059886823, "grad_norm": 0.12295182794332504, "learning_rate": 2.589510570589309e-05, "loss": 0.3198, "step": 9945 }, { "epoch": 1.619631152546513, "grad_norm": 0.0936211422085762, "learning_rate": 2.5890371705629697e-05, "loss": 0.3343, "step": 9946 }, { "epoch": 1.619793999104344, "grad_norm": 0.09606250375509262, "learning_rate": 2.58856376733993e-05, "loss": 0.2946, "step": 9947 }, { "epoch": 1.6199568456621747, "grad_norm": 0.11664433777332306, "learning_rate": 2.588090360937185e-05, "loss": 0.3448, "step": 9948 }, { "epoch": 1.6201196922200056, "grad_norm": 0.07204356044530869, "learning_rate": 2.587616951371732e-05, "loss": 0.3309, "step": 9949 }, { "epoch": 1.6202825387778366, "grad_norm": 0.10026708245277405, "learning_rate": 2.5871435386605685e-05, "loss": 0.3159, "step": 9950 }, { "epoch": 1.6204453853356675, "grad_norm": 0.10940388590097427, "learning_rate": 2.586670122820691e-05, "loss": 0.3035, "step": 9951 }, { "epoch": 1.6206082318934985, "grad_norm": 0.16682104766368866, "learning_rate": 2.5861967038690954e-05, "loss": 0.3235, "step": 9952 }, { "epoch": 1.6207710784513294, "grad_norm": 0.1666371077299118, "learning_rate": 2.5857232818227804e-05, "loss": 0.3089, "step": 9953 }, { "epoch": 1.6209339250091601, "grad_norm": 0.09614108502864838, "learning_rate": 2.5852498566987437e-05, "loss": 0.3088, "step": 9954 }, { "epoch": 1.621096771566991, "grad_norm": 0.11446280777454376, "learning_rate": 2.584776428513981e-05, "loss": 0.2957, "step": 9955 }, { "epoch": 1.6212596181248218, "grad_norm": 0.08739941567182541, "learning_rate": 2.5843029972854903e-05, "loss": 0.3395, "step": 9956 }, { "epoch": 1.6214224646826527, "grad_norm": 0.1017431765794754, "learning_rate": 2.5838295630302694e-05, "loss": 0.3194, "step": 9957 }, { "epoch": 1.6215853112404837, "grad_norm": 0.15241704881191254, "learning_rate": 2.5833561257653165e-05, "loss": 0.2933, "step": 9958 }, { "epoch": 1.6217481577983146, "grad_norm": 0.1213894635438919, "learning_rate": 2.5828826855076288e-05, "loss": 0.3045, "step": 9959 }, { "epoch": 1.6219110043561455, "grad_norm": 0.11899427324533463, "learning_rate": 2.5824092422742042e-05, "loss": 0.2714, "step": 9960 }, { "epoch": 1.6220738509139763, "grad_norm": 0.08583199232816696, "learning_rate": 2.581935796082041e-05, "loss": 0.3162, "step": 9961 }, { "epoch": 1.6222366974718072, "grad_norm": 0.1619519740343094, "learning_rate": 2.581462346948137e-05, "loss": 0.3214, "step": 9962 }, { "epoch": 1.622399544029638, "grad_norm": 0.10665678232908249, "learning_rate": 2.5809888948894912e-05, "loss": 0.324, "step": 9963 }, { "epoch": 1.6225623905874689, "grad_norm": 0.11804959923028946, "learning_rate": 2.5805154399231008e-05, "loss": 0.3534, "step": 9964 }, { "epoch": 1.6227252371452998, "grad_norm": 0.09319955110549927, "learning_rate": 2.5800419820659655e-05, "loss": 0.3185, "step": 9965 }, { "epoch": 1.6228880837031308, "grad_norm": 0.14177922904491425, "learning_rate": 2.5795685213350828e-05, "loss": 0.278, "step": 9966 }, { "epoch": 1.6230509302609617, "grad_norm": 0.19617536664009094, "learning_rate": 2.579095057747452e-05, "loss": 0.3653, "step": 9967 }, { "epoch": 1.6232137768187926, "grad_norm": 0.0799892321228981, "learning_rate": 2.5786215913200713e-05, "loss": 0.2921, "step": 9968 }, { "epoch": 1.6233766233766234, "grad_norm": 0.10706054419279099, "learning_rate": 2.57814812206994e-05, "loss": 0.2686, "step": 9969 }, { "epoch": 1.6235394699344543, "grad_norm": 0.08917029201984406, "learning_rate": 2.577674650014057e-05, "loss": 0.3573, "step": 9970 }, { "epoch": 1.623702316492285, "grad_norm": 0.1580347716808319, "learning_rate": 2.5772011751694215e-05, "loss": 0.3208, "step": 9971 }, { "epoch": 1.623865163050116, "grad_norm": 0.12553586065769196, "learning_rate": 2.5767276975530323e-05, "loss": 0.287, "step": 9972 }, { "epoch": 1.624028009607947, "grad_norm": 0.08393968641757965, "learning_rate": 2.5762542171818886e-05, "loss": 0.2773, "step": 9973 }, { "epoch": 1.6241908561657779, "grad_norm": 0.13360553979873657, "learning_rate": 2.575780734072991e-05, "loss": 0.3298, "step": 9974 }, { "epoch": 1.6243537027236088, "grad_norm": 0.11191819608211517, "learning_rate": 2.5753072482433378e-05, "loss": 0.3048, "step": 9975 }, { "epoch": 1.6245165492814395, "grad_norm": 0.11474437266588211, "learning_rate": 2.5748337597099288e-05, "loss": 0.3451, "step": 9976 }, { "epoch": 1.6246793958392705, "grad_norm": 0.11922316253185272, "learning_rate": 2.5743602684897628e-05, "loss": 0.3012, "step": 9977 }, { "epoch": 1.6248422423971012, "grad_norm": 0.10716678947210312, "learning_rate": 2.573886774599841e-05, "loss": 0.3543, "step": 9978 }, { "epoch": 1.6250050889549321, "grad_norm": 0.11507114768028259, "learning_rate": 2.5734132780571624e-05, "loss": 0.3144, "step": 9979 }, { "epoch": 1.625167935512763, "grad_norm": 0.15634652972221375, "learning_rate": 2.5729397788787275e-05, "loss": 0.305, "step": 9980 }, { "epoch": 1.625330782070594, "grad_norm": 0.12148400396108627, "learning_rate": 2.5724662770815355e-05, "loss": 0.324, "step": 9981 }, { "epoch": 1.625493628628425, "grad_norm": 0.11040489375591278, "learning_rate": 2.571992772682587e-05, "loss": 0.3326, "step": 9982 }, { "epoch": 1.625656475186256, "grad_norm": 0.07268813997507095, "learning_rate": 2.571519265698883e-05, "loss": 0.3129, "step": 9983 }, { "epoch": 1.6258193217440866, "grad_norm": 0.11291728913784027, "learning_rate": 2.571045756147422e-05, "loss": 0.3206, "step": 9984 }, { "epoch": 1.6259821683019176, "grad_norm": 0.10811196267604828, "learning_rate": 2.5705722440452068e-05, "loss": 0.3065, "step": 9985 }, { "epoch": 1.6261450148597483, "grad_norm": 0.1202542707324028, "learning_rate": 2.5700987294092354e-05, "loss": 0.3141, "step": 9986 }, { "epoch": 1.6263078614175792, "grad_norm": 0.07546447962522507, "learning_rate": 2.5696252122565107e-05, "loss": 0.3334, "step": 9987 }, { "epoch": 1.6264707079754102, "grad_norm": 0.12909476459026337, "learning_rate": 2.5691516926040315e-05, "loss": 0.2876, "step": 9988 }, { "epoch": 1.626633554533241, "grad_norm": 0.08671820163726807, "learning_rate": 2.5686781704687994e-05, "loss": 0.3094, "step": 9989 }, { "epoch": 1.626796401091072, "grad_norm": 0.08190849423408508, "learning_rate": 2.568204645867815e-05, "loss": 0.3247, "step": 9990 }, { "epoch": 1.626959247648903, "grad_norm": 0.10503330081701279, "learning_rate": 2.5677311188180803e-05, "loss": 0.285, "step": 9991 }, { "epoch": 1.6271220942067337, "grad_norm": 0.14290687441825867, "learning_rate": 2.5672575893365948e-05, "loss": 0.3377, "step": 9992 }, { "epoch": 1.6272849407645644, "grad_norm": 0.07825461030006409, "learning_rate": 2.56678405744036e-05, "loss": 0.2838, "step": 9993 }, { "epoch": 1.6274477873223954, "grad_norm": 0.0975080281496048, "learning_rate": 2.5663105231463786e-05, "loss": 0.3362, "step": 9994 }, { "epoch": 1.6276106338802263, "grad_norm": 0.06719411909580231, "learning_rate": 2.56583698647165e-05, "loss": 0.3331, "step": 9995 }, { "epoch": 1.6277734804380573, "grad_norm": 0.09385480731725693, "learning_rate": 2.5653634474331767e-05, "loss": 0.3299, "step": 9996 }, { "epoch": 1.6279363269958882, "grad_norm": 0.07347521930932999, "learning_rate": 2.5648899060479597e-05, "loss": 0.3001, "step": 9997 }, { "epoch": 1.6280991735537191, "grad_norm": 0.09289586544036865, "learning_rate": 2.5644163623330007e-05, "loss": 0.339, "step": 9998 }, { "epoch": 1.6282620201115499, "grad_norm": 0.15794318914413452, "learning_rate": 2.563942816305301e-05, "loss": 0.3252, "step": 9999 }, { "epoch": 1.6284248666693808, "grad_norm": 0.15874022245407104, "learning_rate": 2.5634692679818627e-05, "loss": 0.3449, "step": 10000 }, { "epoch": 1.6285877132272115, "grad_norm": 0.11019433289766312, "learning_rate": 2.5629957173796876e-05, "loss": 0.3695, "step": 10001 }, { "epoch": 1.6287505597850425, "grad_norm": 0.07999714463949203, "learning_rate": 2.5625221645157776e-05, "loss": 0.2897, "step": 10002 }, { "epoch": 1.6289134063428734, "grad_norm": 0.09246951341629028, "learning_rate": 2.5620486094071345e-05, "loss": 0.3434, "step": 10003 }, { "epoch": 1.6290762529007043, "grad_norm": 0.14258669316768646, "learning_rate": 2.5615750520707605e-05, "loss": 0.341, "step": 10004 }, { "epoch": 1.6292390994585353, "grad_norm": 0.11359962821006775, "learning_rate": 2.5611014925236576e-05, "loss": 0.3219, "step": 10005 }, { "epoch": 1.6294019460163662, "grad_norm": 0.12112712115049362, "learning_rate": 2.560627930782828e-05, "loss": 0.2985, "step": 10006 }, { "epoch": 1.629564792574197, "grad_norm": 0.12797395884990692, "learning_rate": 2.5601543668652735e-05, "loss": 0.3298, "step": 10007 }, { "epoch": 1.629727639132028, "grad_norm": 0.11540616303682327, "learning_rate": 2.559680800787998e-05, "loss": 0.2809, "step": 10008 }, { "epoch": 1.6298904856898586, "grad_norm": 0.11029327660799026, "learning_rate": 2.5592072325680023e-05, "loss": 0.3222, "step": 10009 }, { "epoch": 1.6300533322476896, "grad_norm": 0.12467832863330841, "learning_rate": 2.5587336622222895e-05, "loss": 0.3161, "step": 10010 }, { "epoch": 1.6302161788055205, "grad_norm": 0.13812126219272614, "learning_rate": 2.5582600897678627e-05, "loss": 0.3596, "step": 10011 }, { "epoch": 1.6303790253633514, "grad_norm": 0.19459643959999084, "learning_rate": 2.557786515221724e-05, "loss": 0.3316, "step": 10012 }, { "epoch": 1.6305418719211824, "grad_norm": 0.1119799092411995, "learning_rate": 2.5573129386008754e-05, "loss": 0.3428, "step": 10013 }, { "epoch": 1.630704718479013, "grad_norm": 0.09151595085859299, "learning_rate": 2.5568393599223218e-05, "loss": 0.2957, "step": 10014 }, { "epoch": 1.630867565036844, "grad_norm": 0.13179020583629608, "learning_rate": 2.5563657792030642e-05, "loss": 0.2987, "step": 10015 }, { "epoch": 1.6310304115946748, "grad_norm": 0.1226087138056755, "learning_rate": 2.5558921964601067e-05, "loss": 0.3092, "step": 10016 }, { "epoch": 1.6311932581525057, "grad_norm": 0.15447227656841278, "learning_rate": 2.5554186117104516e-05, "loss": 0.3175, "step": 10017 }, { "epoch": 1.6313561047103367, "grad_norm": 0.0984019935131073, "learning_rate": 2.5549450249711032e-05, "loss": 0.3046, "step": 10018 }, { "epoch": 1.6315189512681676, "grad_norm": 0.16147352755069733, "learning_rate": 2.5544714362590628e-05, "loss": 0.3218, "step": 10019 }, { "epoch": 1.6316817978259985, "grad_norm": 0.11767614632844925, "learning_rate": 2.553997845591335e-05, "loss": 0.3425, "step": 10020 }, { "epoch": 1.6318446443838295, "grad_norm": 0.1178833618760109, "learning_rate": 2.5535242529849223e-05, "loss": 0.3035, "step": 10021 }, { "epoch": 1.6320074909416602, "grad_norm": 0.08770488202571869, "learning_rate": 2.5530506584568288e-05, "loss": 0.3194, "step": 10022 }, { "epoch": 1.6321703374994911, "grad_norm": 0.1469719558954239, "learning_rate": 2.552577062024058e-05, "loss": 0.3108, "step": 10023 }, { "epoch": 1.6323331840573219, "grad_norm": 0.1705806702375412, "learning_rate": 2.5521034637036124e-05, "loss": 0.3239, "step": 10024 }, { "epoch": 1.6324960306151528, "grad_norm": 0.0992213636636734, "learning_rate": 2.5516298635124975e-05, "loss": 0.301, "step": 10025 }, { "epoch": 1.6326588771729837, "grad_norm": 0.12064102292060852, "learning_rate": 2.5511562614677147e-05, "loss": 0.3117, "step": 10026 }, { "epoch": 1.6328217237308147, "grad_norm": 0.11501320451498032, "learning_rate": 2.550682657586269e-05, "loss": 0.2719, "step": 10027 }, { "epoch": 1.6329845702886456, "grad_norm": 0.1328309327363968, "learning_rate": 2.5502090518851645e-05, "loss": 0.2869, "step": 10028 }, { "epoch": 1.6331474168464766, "grad_norm": 0.09307187050580978, "learning_rate": 2.5497354443814048e-05, "loss": 0.3281, "step": 10029 }, { "epoch": 1.6333102634043073, "grad_norm": 0.100768081843853, "learning_rate": 2.549261835091993e-05, "loss": 0.301, "step": 10030 }, { "epoch": 1.633473109962138, "grad_norm": 0.09846817702054977, "learning_rate": 2.548788224033934e-05, "loss": 0.3057, "step": 10031 }, { "epoch": 1.633635956519969, "grad_norm": 0.11262128502130508, "learning_rate": 2.5483146112242317e-05, "loss": 0.2916, "step": 10032 }, { "epoch": 1.6337988030778, "grad_norm": 0.15165773034095764, "learning_rate": 2.5478409966798894e-05, "loss": 0.3685, "step": 10033 }, { "epoch": 1.6339616496356308, "grad_norm": 0.08822973072528839, "learning_rate": 2.5473673804179134e-05, "loss": 0.2785, "step": 10034 }, { "epoch": 1.6341244961934618, "grad_norm": 0.1413610428571701, "learning_rate": 2.5468937624553053e-05, "loss": 0.3735, "step": 10035 }, { "epoch": 1.6342873427512927, "grad_norm": 0.11343264579772949, "learning_rate": 2.546420142809071e-05, "loss": 0.3613, "step": 10036 }, { "epoch": 1.6344501893091234, "grad_norm": 0.156605526804924, "learning_rate": 2.5459465214962146e-05, "loss": 0.309, "step": 10037 }, { "epoch": 1.6346130358669544, "grad_norm": 0.12429535388946533, "learning_rate": 2.5454728985337407e-05, "loss": 0.3553, "step": 10038 }, { "epoch": 1.634775882424785, "grad_norm": 0.07861199229955673, "learning_rate": 2.544999273938653e-05, "loss": 0.3243, "step": 10039 }, { "epoch": 1.634938728982616, "grad_norm": 0.11316780745983124, "learning_rate": 2.544525647727957e-05, "loss": 0.3269, "step": 10040 }, { "epoch": 1.635101575540447, "grad_norm": 0.13812381029129028, "learning_rate": 2.5440520199186563e-05, "loss": 0.2971, "step": 10041 }, { "epoch": 1.635264422098278, "grad_norm": 0.13022276759147644, "learning_rate": 2.5435783905277567e-05, "loss": 0.3937, "step": 10042 }, { "epoch": 1.6354272686561089, "grad_norm": 0.0887155681848526, "learning_rate": 2.5431047595722618e-05, "loss": 0.2885, "step": 10043 }, { "epoch": 1.6355901152139398, "grad_norm": 0.08312329649925232, "learning_rate": 2.5426311270691765e-05, "loss": 0.3195, "step": 10044 }, { "epoch": 1.6357529617717705, "grad_norm": 0.11254527419805527, "learning_rate": 2.5421574930355073e-05, "loss": 0.3036, "step": 10045 }, { "epoch": 1.6359158083296015, "grad_norm": 0.1399937868118286, "learning_rate": 2.5416838574882567e-05, "loss": 0.3025, "step": 10046 }, { "epoch": 1.6360786548874322, "grad_norm": 0.08666073530912399, "learning_rate": 2.541210220444431e-05, "loss": 0.3143, "step": 10047 }, { "epoch": 1.6362415014452631, "grad_norm": 0.08794038742780685, "learning_rate": 2.540736581921035e-05, "loss": 0.3307, "step": 10048 }, { "epoch": 1.636404348003094, "grad_norm": 0.0996122807264328, "learning_rate": 2.5402629419350737e-05, "loss": 0.3133, "step": 10049 }, { "epoch": 1.636567194560925, "grad_norm": 0.12745001912117004, "learning_rate": 2.5397893005035516e-05, "loss": 0.3094, "step": 10050 }, { "epoch": 1.636730041118756, "grad_norm": 0.09905792772769928, "learning_rate": 2.5393156576434756e-05, "loss": 0.3355, "step": 10051 }, { "epoch": 1.6368928876765867, "grad_norm": 0.10388538986444473, "learning_rate": 2.5388420133718486e-05, "loss": 0.3421, "step": 10052 }, { "epoch": 1.6370557342344176, "grad_norm": 0.15546129643917084, "learning_rate": 2.5383683677056775e-05, "loss": 0.3582, "step": 10053 }, { "epoch": 1.6372185807922484, "grad_norm": 0.11120527237653732, "learning_rate": 2.5378947206619668e-05, "loss": 0.3072, "step": 10054 }, { "epoch": 1.6373814273500793, "grad_norm": 0.11615081131458282, "learning_rate": 2.5374210722577223e-05, "loss": 0.3223, "step": 10055 }, { "epoch": 1.6375442739079102, "grad_norm": 0.10254724323749542, "learning_rate": 2.5369474225099494e-05, "loss": 0.323, "step": 10056 }, { "epoch": 1.6377071204657412, "grad_norm": 0.09196248650550842, "learning_rate": 2.5364737714356528e-05, "loss": 0.3495, "step": 10057 }, { "epoch": 1.6378699670235721, "grad_norm": 0.11401189863681793, "learning_rate": 2.536000119051839e-05, "loss": 0.3005, "step": 10058 }, { "epoch": 1.638032813581403, "grad_norm": 0.1291869580745697, "learning_rate": 2.535526465375513e-05, "loss": 0.2842, "step": 10059 }, { "epoch": 1.6381956601392338, "grad_norm": 0.13090138137340546, "learning_rate": 2.5350528104236803e-05, "loss": 0.3248, "step": 10060 }, { "epoch": 1.6383585066970647, "grad_norm": 0.09405755251646042, "learning_rate": 2.5345791542133462e-05, "loss": 0.3124, "step": 10061 }, { "epoch": 1.6385213532548955, "grad_norm": 0.095784492790699, "learning_rate": 2.534105496761518e-05, "loss": 0.3149, "step": 10062 }, { "epoch": 1.6386841998127264, "grad_norm": 0.09337091445922852, "learning_rate": 2.5336318380851993e-05, "loss": 0.3146, "step": 10063 }, { "epoch": 1.6388470463705573, "grad_norm": 0.07164687663316727, "learning_rate": 2.5331581782013968e-05, "loss": 0.2956, "step": 10064 }, { "epoch": 1.6390098929283883, "grad_norm": 0.17286385595798492, "learning_rate": 2.532684517127117e-05, "loss": 0.3366, "step": 10065 }, { "epoch": 1.6391727394862192, "grad_norm": 0.14064434170722961, "learning_rate": 2.5322108548793644e-05, "loss": 0.3163, "step": 10066 }, { "epoch": 1.63933558604405, "grad_norm": 0.1658543348312378, "learning_rate": 2.531737191475146e-05, "loss": 0.2797, "step": 10067 }, { "epoch": 1.6394984326018809, "grad_norm": 0.11668618023395538, "learning_rate": 2.5312635269314672e-05, "loss": 0.3034, "step": 10068 }, { "epoch": 1.6396612791597116, "grad_norm": 0.08902545273303986, "learning_rate": 2.5307898612653347e-05, "loss": 0.3362, "step": 10069 }, { "epoch": 1.6398241257175425, "grad_norm": 0.14126041531562805, "learning_rate": 2.530316194493753e-05, "loss": 0.3088, "step": 10070 }, { "epoch": 1.6399869722753735, "grad_norm": 0.12217500805854797, "learning_rate": 2.52984252663373e-05, "loss": 0.3386, "step": 10071 }, { "epoch": 1.6401498188332044, "grad_norm": 0.1443561464548111, "learning_rate": 2.52936885770227e-05, "loss": 0.338, "step": 10072 }, { "epoch": 1.6403126653910354, "grad_norm": 0.12140411138534546, "learning_rate": 2.5288951877163807e-05, "loss": 0.317, "step": 10073 }, { "epoch": 1.6404755119488663, "grad_norm": 0.1145615354180336, "learning_rate": 2.5284215166930673e-05, "loss": 0.3143, "step": 10074 }, { "epoch": 1.640638358506697, "grad_norm": 0.11192367225885391, "learning_rate": 2.5279478446493364e-05, "loss": 0.2856, "step": 10075 }, { "epoch": 1.640801205064528, "grad_norm": 0.13001155853271484, "learning_rate": 2.5274741716021944e-05, "loss": 0.3456, "step": 10076 }, { "epoch": 1.6409640516223587, "grad_norm": 0.12306760996580124, "learning_rate": 2.527000497568647e-05, "loss": 0.3348, "step": 10077 }, { "epoch": 1.6411268981801896, "grad_norm": 0.08576179295778275, "learning_rate": 2.5265268225657013e-05, "loss": 0.3139, "step": 10078 }, { "epoch": 1.6412897447380206, "grad_norm": 0.0823695957660675, "learning_rate": 2.5260531466103628e-05, "loss": 0.3039, "step": 10079 }, { "epoch": 1.6414525912958515, "grad_norm": 0.11497601866722107, "learning_rate": 2.5255794697196383e-05, "loss": 0.3187, "step": 10080 }, { "epoch": 1.6416154378536825, "grad_norm": 0.12259500473737717, "learning_rate": 2.5251057919105336e-05, "loss": 0.29, "step": 10081 }, { "epoch": 1.6417782844115134, "grad_norm": 0.10394566506147385, "learning_rate": 2.5246321132000568e-05, "loss": 0.2779, "step": 10082 }, { "epoch": 1.6419411309693441, "grad_norm": 0.1198996752500534, "learning_rate": 2.5241584336052128e-05, "loss": 0.3223, "step": 10083 }, { "epoch": 1.642103977527175, "grad_norm": 0.12164110690355301, "learning_rate": 2.5236847531430084e-05, "loss": 0.3385, "step": 10084 }, { "epoch": 1.6422668240850058, "grad_norm": 0.0932430848479271, "learning_rate": 2.5232110718304508e-05, "loss": 0.3058, "step": 10085 }, { "epoch": 1.6424296706428367, "grad_norm": 0.15388433635234833, "learning_rate": 2.5227373896845462e-05, "loss": 0.2912, "step": 10086 }, { "epoch": 1.6425925172006677, "grad_norm": 0.09918906539678574, "learning_rate": 2.522263706722301e-05, "loss": 0.3433, "step": 10087 }, { "epoch": 1.6427553637584986, "grad_norm": 0.09263879805803299, "learning_rate": 2.5217900229607218e-05, "loss": 0.3152, "step": 10088 }, { "epoch": 1.6429182103163296, "grad_norm": 0.13297197222709656, "learning_rate": 2.5213163384168158e-05, "loss": 0.3695, "step": 10089 }, { "epoch": 1.6430810568741603, "grad_norm": 0.15849615633487701, "learning_rate": 2.520842653107589e-05, "loss": 0.3501, "step": 10090 }, { "epoch": 1.6432439034319912, "grad_norm": 0.12617610394954681, "learning_rate": 2.5203689670500497e-05, "loss": 0.3616, "step": 10091 }, { "epoch": 1.643406749989822, "grad_norm": 0.07613400369882584, "learning_rate": 2.5198952802612018e-05, "loss": 0.2973, "step": 10092 }, { "epoch": 1.643569596547653, "grad_norm": 0.13418953120708466, "learning_rate": 2.5194215927580557e-05, "loss": 0.3144, "step": 10093 }, { "epoch": 1.6437324431054838, "grad_norm": 0.15119844675064087, "learning_rate": 2.5189479045576142e-05, "loss": 0.3163, "step": 10094 }, { "epoch": 1.6438952896633148, "grad_norm": 0.09782149642705917, "learning_rate": 2.518474215676887e-05, "loss": 0.3035, "step": 10095 }, { "epoch": 1.6440581362211457, "grad_norm": 0.09330122172832489, "learning_rate": 2.5180005261328804e-05, "loss": 0.2983, "step": 10096 }, { "epoch": 1.6442209827789767, "grad_norm": 0.11225885897874832, "learning_rate": 2.517526835942601e-05, "loss": 0.3089, "step": 10097 }, { "epoch": 1.6443838293368074, "grad_norm": 0.1149633526802063, "learning_rate": 2.5170531451230555e-05, "loss": 0.3026, "step": 10098 }, { "epoch": 1.6445466758946383, "grad_norm": 0.12244366854429245, "learning_rate": 2.5165794536912507e-05, "loss": 0.3425, "step": 10099 }, { "epoch": 1.644709522452469, "grad_norm": 0.12964531779289246, "learning_rate": 2.516105761664194e-05, "loss": 0.3173, "step": 10100 }, { "epoch": 1.6448723690103, "grad_norm": 0.09123829752206802, "learning_rate": 2.5156320690588926e-05, "loss": 0.3304, "step": 10101 }, { "epoch": 1.645035215568131, "grad_norm": 0.10078473389148712, "learning_rate": 2.515158375892353e-05, "loss": 0.3183, "step": 10102 }, { "epoch": 1.6451980621259619, "grad_norm": 0.11473894864320755, "learning_rate": 2.5146846821815818e-05, "loss": 0.3277, "step": 10103 }, { "epoch": 1.6453609086837928, "grad_norm": 0.14943687617778778, "learning_rate": 2.5142109879435866e-05, "loss": 0.3203, "step": 10104 }, { "epoch": 1.6455237552416235, "grad_norm": 0.15242289006710052, "learning_rate": 2.513737293195374e-05, "loss": 0.3235, "step": 10105 }, { "epoch": 1.6456866017994545, "grad_norm": 0.12935511767864227, "learning_rate": 2.5132635979539522e-05, "loss": 0.3262, "step": 10106 }, { "epoch": 1.6458494483572852, "grad_norm": 0.08564958721399307, "learning_rate": 2.512789902236327e-05, "loss": 0.3029, "step": 10107 }, { "epoch": 1.6460122949151161, "grad_norm": 0.08538403362035751, "learning_rate": 2.5123162060595063e-05, "loss": 0.3446, "step": 10108 }, { "epoch": 1.646175141472947, "grad_norm": 0.08087750524282455, "learning_rate": 2.5118425094404968e-05, "loss": 0.3404, "step": 10109 }, { "epoch": 1.646337988030778, "grad_norm": 0.141726553440094, "learning_rate": 2.511368812396306e-05, "loss": 0.3365, "step": 10110 }, { "epoch": 1.646500834588609, "grad_norm": 0.11136698722839355, "learning_rate": 2.5108951149439403e-05, "loss": 0.3314, "step": 10111 }, { "epoch": 1.64666368114644, "grad_norm": 0.11497148871421814, "learning_rate": 2.5104214171004074e-05, "loss": 0.3118, "step": 10112 }, { "epoch": 1.6468265277042706, "grad_norm": 0.1115911677479744, "learning_rate": 2.5099477188827147e-05, "loss": 0.3213, "step": 10113 }, { "epoch": 1.6469893742621016, "grad_norm": 0.11287064850330353, "learning_rate": 2.509474020307869e-05, "loss": 0.3139, "step": 10114 }, { "epoch": 1.6471522208199323, "grad_norm": 0.1140795573592186, "learning_rate": 2.509000321392877e-05, "loss": 0.3058, "step": 10115 }, { "epoch": 1.6473150673777632, "grad_norm": 0.12526483833789825, "learning_rate": 2.5085266221547477e-05, "loss": 0.3212, "step": 10116 }, { "epoch": 1.6474779139355942, "grad_norm": 0.13571284711360931, "learning_rate": 2.5080529226104858e-05, "loss": 0.3646, "step": 10117 }, { "epoch": 1.6476407604934251, "grad_norm": 0.12463678419589996, "learning_rate": 2.507579222777101e-05, "loss": 0.3184, "step": 10118 }, { "epoch": 1.647803607051256, "grad_norm": 0.25501391291618347, "learning_rate": 2.507105522671599e-05, "loss": 0.3377, "step": 10119 }, { "epoch": 1.647966453609087, "grad_norm": 0.08308073878288269, "learning_rate": 2.5066318223109874e-05, "loss": 0.3164, "step": 10120 }, { "epoch": 1.6481293001669177, "grad_norm": 0.1431647539138794, "learning_rate": 2.5061581217122732e-05, "loss": 0.2628, "step": 10121 }, { "epoch": 1.6482921467247484, "grad_norm": 0.12697583436965942, "learning_rate": 2.5056844208924645e-05, "loss": 0.3206, "step": 10122 }, { "epoch": 1.6484549932825794, "grad_norm": 0.10243409872055054, "learning_rate": 2.5052107198685675e-05, "loss": 0.2742, "step": 10123 }, { "epoch": 1.6486178398404103, "grad_norm": 0.10028207302093506, "learning_rate": 2.50473701865759e-05, "loss": 0.3156, "step": 10124 }, { "epoch": 1.6487806863982413, "grad_norm": 0.092779740691185, "learning_rate": 2.5042633172765394e-05, "loss": 0.3377, "step": 10125 }, { "epoch": 1.6489435329560722, "grad_norm": 0.10634223371744156, "learning_rate": 2.5037896157424233e-05, "loss": 0.2957, "step": 10126 }, { "epoch": 1.6491063795139032, "grad_norm": 0.14320459961891174, "learning_rate": 2.503315914072249e-05, "loss": 0.3337, "step": 10127 }, { "epoch": 1.6492692260717339, "grad_norm": 0.0859302505850792, "learning_rate": 2.5028422122830226e-05, "loss": 0.3346, "step": 10128 }, { "epoch": 1.6494320726295648, "grad_norm": 0.0971306711435318, "learning_rate": 2.5023685103917527e-05, "loss": 0.3639, "step": 10129 }, { "epoch": 1.6495949191873955, "grad_norm": 0.12542949616909027, "learning_rate": 2.5018948084154458e-05, "loss": 0.3509, "step": 10130 }, { "epoch": 1.6497577657452265, "grad_norm": 0.08607657998800278, "learning_rate": 2.5014211063711106e-05, "loss": 0.3009, "step": 10131 }, { "epoch": 1.6499206123030574, "grad_norm": 0.11072875559329987, "learning_rate": 2.5009474042757526e-05, "loss": 0.3418, "step": 10132 }, { "epoch": 1.6500834588608884, "grad_norm": 0.08432147651910782, "learning_rate": 2.5004737021463804e-05, "loss": 0.3428, "step": 10133 }, { "epoch": 1.6502463054187193, "grad_norm": 0.11501938104629517, "learning_rate": 2.5e-05, "loss": 0.3387, "step": 10134 }, { "epoch": 1.6504091519765502, "grad_norm": 0.15003548562526703, "learning_rate": 2.499526297853621e-05, "loss": 0.3095, "step": 10135 }, { "epoch": 1.650571998534381, "grad_norm": 0.13752757012844086, "learning_rate": 2.4990525957242473e-05, "loss": 0.3554, "step": 10136 }, { "epoch": 1.650734845092212, "grad_norm": 0.14879049360752106, "learning_rate": 2.49857889362889e-05, "loss": 0.3126, "step": 10137 }, { "epoch": 1.6508976916500426, "grad_norm": 0.12047852575778961, "learning_rate": 2.4981051915845544e-05, "loss": 0.3304, "step": 10138 }, { "epoch": 1.6510605382078736, "grad_norm": 0.1017102599143982, "learning_rate": 2.4976314896082482e-05, "loss": 0.3326, "step": 10139 }, { "epoch": 1.6512233847657045, "grad_norm": 0.12475667893886566, "learning_rate": 2.4971577877169773e-05, "loss": 0.3557, "step": 10140 }, { "epoch": 1.6513862313235355, "grad_norm": 0.1352468729019165, "learning_rate": 2.496684085927752e-05, "loss": 0.3644, "step": 10141 }, { "epoch": 1.6515490778813664, "grad_norm": 0.0974460020661354, "learning_rate": 2.4962103842575773e-05, "loss": 0.3123, "step": 10142 }, { "epoch": 1.6517119244391971, "grad_norm": 0.12637323141098022, "learning_rate": 2.495736682723461e-05, "loss": 0.3175, "step": 10143 }, { "epoch": 1.651874770997028, "grad_norm": 0.09365633130073547, "learning_rate": 2.49526298134241e-05, "loss": 0.3171, "step": 10144 }, { "epoch": 1.6520376175548588, "grad_norm": 0.10073872655630112, "learning_rate": 2.494789280131433e-05, "loss": 0.3241, "step": 10145 }, { "epoch": 1.6522004641126897, "grad_norm": 0.1487228125333786, "learning_rate": 2.4943155791075364e-05, "loss": 0.315, "step": 10146 }, { "epoch": 1.6523633106705207, "grad_norm": 0.2092706859111786, "learning_rate": 2.4938418782877273e-05, "loss": 0.3412, "step": 10147 }, { "epoch": 1.6525261572283516, "grad_norm": 0.15900224447250366, "learning_rate": 2.493368177689013e-05, "loss": 0.3446, "step": 10148 }, { "epoch": 1.6526890037861826, "grad_norm": 0.1038845106959343, "learning_rate": 2.492894477328402e-05, "loss": 0.3518, "step": 10149 }, { "epoch": 1.6528518503440135, "grad_norm": 0.10144037008285522, "learning_rate": 2.4924207772229e-05, "loss": 0.283, "step": 10150 }, { "epoch": 1.6530146969018442, "grad_norm": 0.1405886560678482, "learning_rate": 2.491947077389514e-05, "loss": 0.3244, "step": 10151 }, { "epoch": 1.6531775434596752, "grad_norm": 0.13223931193351746, "learning_rate": 2.491473377845253e-05, "loss": 0.3307, "step": 10152 }, { "epoch": 1.6533403900175059, "grad_norm": 0.1140432208776474, "learning_rate": 2.4909996786071233e-05, "loss": 0.3006, "step": 10153 }, { "epoch": 1.6535032365753368, "grad_norm": 0.10768603533506393, "learning_rate": 2.490525979692132e-05, "loss": 0.29, "step": 10154 }, { "epoch": 1.6536660831331678, "grad_norm": 0.11904673278331757, "learning_rate": 2.4900522811172855e-05, "loss": 0.319, "step": 10155 }, { "epoch": 1.6538289296909987, "grad_norm": 0.09771377593278885, "learning_rate": 2.4895785828995928e-05, "loss": 0.2935, "step": 10156 }, { "epoch": 1.6539917762488296, "grad_norm": 0.08235158771276474, "learning_rate": 2.4891048850560606e-05, "loss": 0.2786, "step": 10157 }, { "epoch": 1.6541546228066606, "grad_norm": 0.1283239722251892, "learning_rate": 2.4886311876036944e-05, "loss": 0.341, "step": 10158 }, { "epoch": 1.6543174693644913, "grad_norm": 0.16331201791763306, "learning_rate": 2.4881574905595038e-05, "loss": 0.3279, "step": 10159 }, { "epoch": 1.654480315922322, "grad_norm": 0.09421087056398392, "learning_rate": 2.4876837939404946e-05, "loss": 0.3058, "step": 10160 }, { "epoch": 1.654643162480153, "grad_norm": 0.10793612897396088, "learning_rate": 2.4872100977636732e-05, "loss": 0.3407, "step": 10161 }, { "epoch": 1.654806009037984, "grad_norm": 0.14507953822612762, "learning_rate": 2.486736402046048e-05, "loss": 0.3086, "step": 10162 }, { "epoch": 1.6549688555958149, "grad_norm": 0.10685498267412186, "learning_rate": 2.4862627068046265e-05, "loss": 0.3411, "step": 10163 }, { "epoch": 1.6551317021536458, "grad_norm": 0.10484250634908676, "learning_rate": 2.4857890120564143e-05, "loss": 0.2697, "step": 10164 }, { "epoch": 1.6552945487114767, "grad_norm": 0.08458472043275833, "learning_rate": 2.4853153178184188e-05, "loss": 0.2857, "step": 10165 }, { "epoch": 1.6554573952693075, "grad_norm": 0.10561361163854599, "learning_rate": 2.4848416241076478e-05, "loss": 0.3104, "step": 10166 }, { "epoch": 1.6556202418271384, "grad_norm": 0.0977843701839447, "learning_rate": 2.4843679309411083e-05, "loss": 0.2921, "step": 10167 }, { "epoch": 1.6557830883849691, "grad_norm": 0.10457760095596313, "learning_rate": 2.4838942383358068e-05, "loss": 0.2975, "step": 10168 }, { "epoch": 1.6559459349428, "grad_norm": 0.10328680276870728, "learning_rate": 2.4834205463087492e-05, "loss": 0.315, "step": 10169 }, { "epoch": 1.656108781500631, "grad_norm": 0.1476839929819107, "learning_rate": 2.4829468548769454e-05, "loss": 0.3484, "step": 10170 }, { "epoch": 1.656271628058462, "grad_norm": 0.0578385554254055, "learning_rate": 2.4824731640573997e-05, "loss": 0.3237, "step": 10171 }, { "epoch": 1.656434474616293, "grad_norm": 0.07674404978752136, "learning_rate": 2.4819994738671206e-05, "loss": 0.3196, "step": 10172 }, { "epoch": 1.6565973211741238, "grad_norm": 0.08755140751600266, "learning_rate": 2.481525784323113e-05, "loss": 0.3011, "step": 10173 }, { "epoch": 1.6567601677319546, "grad_norm": 0.0763726457953453, "learning_rate": 2.481052095442386e-05, "loss": 0.2961, "step": 10174 }, { "epoch": 1.6569230142897855, "grad_norm": 0.08384585380554199, "learning_rate": 2.480578407241946e-05, "loss": 0.3104, "step": 10175 }, { "epoch": 1.6570858608476162, "grad_norm": 0.0838785395026207, "learning_rate": 2.4801047197387978e-05, "loss": 0.3033, "step": 10176 }, { "epoch": 1.6572487074054472, "grad_norm": 0.12303003668785095, "learning_rate": 2.4796310329499512e-05, "loss": 0.3202, "step": 10177 }, { "epoch": 1.657411553963278, "grad_norm": 0.16229461133480072, "learning_rate": 2.4791573468924112e-05, "loss": 0.3294, "step": 10178 }, { "epoch": 1.657574400521109, "grad_norm": 0.10505356639623642, "learning_rate": 2.478683661583185e-05, "loss": 0.3194, "step": 10179 }, { "epoch": 1.65773724707894, "grad_norm": 0.11730413883924484, "learning_rate": 2.4782099770392785e-05, "loss": 0.2992, "step": 10180 }, { "epoch": 1.6579000936367707, "grad_norm": 0.10647916048765182, "learning_rate": 2.4777362932777e-05, "loss": 0.3182, "step": 10181 }, { "epoch": 1.6580629401946017, "grad_norm": 0.08678159862756729, "learning_rate": 2.4772626103154544e-05, "loss": 0.2889, "step": 10182 }, { "epoch": 1.6582257867524324, "grad_norm": 0.06941454112529755, "learning_rate": 2.47678892816955e-05, "loss": 0.2942, "step": 10183 }, { "epoch": 1.6583886333102633, "grad_norm": 0.1185278370976448, "learning_rate": 2.476315246856992e-05, "loss": 0.3184, "step": 10184 }, { "epoch": 1.6585514798680943, "grad_norm": 0.11561319231987, "learning_rate": 2.4758415663947878e-05, "loss": 0.3341, "step": 10185 }, { "epoch": 1.6587143264259252, "grad_norm": 0.08681176602840424, "learning_rate": 2.475367886799944e-05, "loss": 0.3625, "step": 10186 }, { "epoch": 1.6588771729837561, "grad_norm": 0.08687455207109451, "learning_rate": 2.474894208089466e-05, "loss": 0.323, "step": 10187 }, { "epoch": 1.659040019541587, "grad_norm": 0.07515392452478409, "learning_rate": 2.4744205302803626e-05, "loss": 0.3585, "step": 10188 }, { "epoch": 1.6592028660994178, "grad_norm": 0.10885671526193619, "learning_rate": 2.473946853389638e-05, "loss": 0.3322, "step": 10189 }, { "epoch": 1.6593657126572487, "grad_norm": 0.10205595940351486, "learning_rate": 2.4734731774343e-05, "loss": 0.3113, "step": 10190 }, { "epoch": 1.6595285592150795, "grad_norm": 0.08057509362697601, "learning_rate": 2.4729995024313534e-05, "loss": 0.2995, "step": 10191 }, { "epoch": 1.6596914057729104, "grad_norm": 0.13232383131980896, "learning_rate": 2.4725258283978062e-05, "loss": 0.323, "step": 10192 }, { "epoch": 1.6598542523307414, "grad_norm": 0.08814921975135803, "learning_rate": 2.472052155350664e-05, "loss": 0.3002, "step": 10193 }, { "epoch": 1.6600170988885723, "grad_norm": 0.08947721868753433, "learning_rate": 2.4715784833069336e-05, "loss": 0.3537, "step": 10194 }, { "epoch": 1.6601799454464032, "grad_norm": 0.12076587229967117, "learning_rate": 2.4711048122836195e-05, "loss": 0.308, "step": 10195 }, { "epoch": 1.660342792004234, "grad_norm": 0.1256609708070755, "learning_rate": 2.47063114229773e-05, "loss": 0.3274, "step": 10196 }, { "epoch": 1.660505638562065, "grad_norm": 0.170745849609375, "learning_rate": 2.470157473366271e-05, "loss": 0.3093, "step": 10197 }, { "epoch": 1.6606684851198956, "grad_norm": 0.09690362960100174, "learning_rate": 2.469683805506247e-05, "loss": 0.3143, "step": 10198 }, { "epoch": 1.6608313316777266, "grad_norm": 0.1622890830039978, "learning_rate": 2.4692101387346662e-05, "loss": 0.3602, "step": 10199 }, { "epoch": 1.6609941782355575, "grad_norm": 0.11858173459768295, "learning_rate": 2.4687364730685333e-05, "loss": 0.3051, "step": 10200 }, { "epoch": 1.6611570247933884, "grad_norm": 0.086652971804142, "learning_rate": 2.4682628085248546e-05, "loss": 0.3089, "step": 10201 }, { "epoch": 1.6613198713512194, "grad_norm": 0.10339855402708054, "learning_rate": 2.467789145120636e-05, "loss": 0.3178, "step": 10202 }, { "epoch": 1.6614827179090503, "grad_norm": 0.05904339998960495, "learning_rate": 2.4673154828728837e-05, "loss": 0.333, "step": 10203 }, { "epoch": 1.661645564466881, "grad_norm": 0.11200587451457977, "learning_rate": 2.4668418217986038e-05, "loss": 0.3087, "step": 10204 }, { "epoch": 1.661808411024712, "grad_norm": 0.11577613651752472, "learning_rate": 2.466368161914802e-05, "loss": 0.3072, "step": 10205 }, { "epoch": 1.6619712575825427, "grad_norm": 0.09320227801799774, "learning_rate": 2.465894503238483e-05, "loss": 0.346, "step": 10206 }, { "epoch": 1.6621341041403737, "grad_norm": 0.11690761148929596, "learning_rate": 2.4654208457866543e-05, "loss": 0.2944, "step": 10207 }, { "epoch": 1.6622969506982046, "grad_norm": 0.13675490021705627, "learning_rate": 2.464947189576321e-05, "loss": 0.3449, "step": 10208 }, { "epoch": 1.6624597972560355, "grad_norm": 0.10815577208995819, "learning_rate": 2.4644735346244874e-05, "loss": 0.3578, "step": 10209 }, { "epoch": 1.6626226438138665, "grad_norm": 0.09725695848464966, "learning_rate": 2.4639998809481614e-05, "loss": 0.3107, "step": 10210 }, { "epoch": 1.6627854903716974, "grad_norm": 0.08387166261672974, "learning_rate": 2.4635262285643478e-05, "loss": 0.3512, "step": 10211 }, { "epoch": 1.6629483369295281, "grad_norm": 0.09004319459199905, "learning_rate": 2.463052577490052e-05, "loss": 0.3107, "step": 10212 }, { "epoch": 1.663111183487359, "grad_norm": 0.11297386884689331, "learning_rate": 2.462578927742278e-05, "loss": 0.3202, "step": 10213 }, { "epoch": 1.6632740300451898, "grad_norm": 0.1012098416686058, "learning_rate": 2.4621052793380338e-05, "loss": 0.3123, "step": 10214 }, { "epoch": 1.6634368766030208, "grad_norm": 0.14136174321174622, "learning_rate": 2.4616316322943235e-05, "loss": 0.3002, "step": 10215 }, { "epoch": 1.6635997231608517, "grad_norm": 0.11162499338388443, "learning_rate": 2.4611579866281513e-05, "loss": 0.2903, "step": 10216 }, { "epoch": 1.6637625697186826, "grad_norm": 0.10985115170478821, "learning_rate": 2.460684342356525e-05, "loss": 0.3306, "step": 10217 }, { "epoch": 1.6639254162765136, "grad_norm": 0.14460237324237823, "learning_rate": 2.4602106994964486e-05, "loss": 0.3408, "step": 10218 }, { "epoch": 1.6640882628343443, "grad_norm": 0.11632297933101654, "learning_rate": 2.4597370580649268e-05, "loss": 0.2949, "step": 10219 }, { "epoch": 1.6642511093921752, "grad_norm": 0.12651580572128296, "learning_rate": 2.4592634180789654e-05, "loss": 0.3055, "step": 10220 }, { "epoch": 1.664413955950006, "grad_norm": 0.1373094767332077, "learning_rate": 2.4587897795555696e-05, "loss": 0.3154, "step": 10221 }, { "epoch": 1.664576802507837, "grad_norm": 0.11655165255069733, "learning_rate": 2.458316142511744e-05, "loss": 0.3095, "step": 10222 }, { "epoch": 1.6647396490656678, "grad_norm": 0.10916424542665482, "learning_rate": 2.457842506964494e-05, "loss": 0.3023, "step": 10223 }, { "epoch": 1.6649024956234988, "grad_norm": 0.09710490703582764, "learning_rate": 2.457368872930823e-05, "loss": 0.3173, "step": 10224 }, { "epoch": 1.6650653421813297, "grad_norm": 0.13576926290988922, "learning_rate": 2.4568952404277387e-05, "loss": 0.3358, "step": 10225 }, { "epoch": 1.6652281887391607, "grad_norm": 0.16800326108932495, "learning_rate": 2.4564216094722446e-05, "loss": 0.3312, "step": 10226 }, { "epoch": 1.6653910352969914, "grad_norm": 0.07029598951339722, "learning_rate": 2.4559479800813436e-05, "loss": 0.3535, "step": 10227 }, { "epoch": 1.6655538818548223, "grad_norm": 0.0829535722732544, "learning_rate": 2.4554743522720435e-05, "loss": 0.3122, "step": 10228 }, { "epoch": 1.665716728412653, "grad_norm": 0.10196786373853683, "learning_rate": 2.4550007260613474e-05, "loss": 0.3697, "step": 10229 }, { "epoch": 1.665879574970484, "grad_norm": 0.10175962746143341, "learning_rate": 2.4545271014662605e-05, "loss": 0.3371, "step": 10230 }, { "epoch": 1.666042421528315, "grad_norm": 0.17248257994651794, "learning_rate": 2.4540534785037853e-05, "loss": 0.3454, "step": 10231 }, { "epoch": 1.6662052680861459, "grad_norm": 0.12671618163585663, "learning_rate": 2.453579857190929e-05, "loss": 0.3517, "step": 10232 }, { "epoch": 1.6663681146439768, "grad_norm": 0.10944921523332596, "learning_rate": 2.4531062375446953e-05, "loss": 0.2807, "step": 10233 }, { "epoch": 1.6665309612018075, "grad_norm": 0.11998728662729263, "learning_rate": 2.452632619582088e-05, "loss": 0.3016, "step": 10234 }, { "epoch": 1.6666938077596385, "grad_norm": 0.11521486192941666, "learning_rate": 2.45215900332011e-05, "loss": 0.3475, "step": 10235 }, { "epoch": 1.6668566543174692, "grad_norm": 0.11978866904973984, "learning_rate": 2.451685388775769e-05, "loss": 0.3019, "step": 10236 }, { "epoch": 1.6670195008753002, "grad_norm": 0.11991877853870392, "learning_rate": 2.4512117759660667e-05, "loss": 0.3218, "step": 10237 }, { "epoch": 1.667182347433131, "grad_norm": 0.11017823219299316, "learning_rate": 2.4507381649080073e-05, "loss": 0.3508, "step": 10238 }, { "epoch": 1.667345193990962, "grad_norm": 0.09219803661108017, "learning_rate": 2.450264555618596e-05, "loss": 0.3162, "step": 10239 }, { "epoch": 1.667508040548793, "grad_norm": 0.16460849344730377, "learning_rate": 2.4497909481148357e-05, "loss": 0.3176, "step": 10240 }, { "epoch": 1.667670887106624, "grad_norm": 0.11007468402385712, "learning_rate": 2.4493173424137316e-05, "loss": 0.3097, "step": 10241 }, { "epoch": 1.6678337336644546, "grad_norm": 0.10162743180990219, "learning_rate": 2.4488437385322856e-05, "loss": 0.2991, "step": 10242 }, { "epoch": 1.6679965802222856, "grad_norm": 0.10873477905988693, "learning_rate": 2.4483701364875034e-05, "loss": 0.3201, "step": 10243 }, { "epoch": 1.6681594267801163, "grad_norm": 0.1020440012216568, "learning_rate": 2.447896536296388e-05, "loss": 0.3611, "step": 10244 }, { "epoch": 1.6683222733379472, "grad_norm": 0.1123468428850174, "learning_rate": 2.4474229379759432e-05, "loss": 0.3231, "step": 10245 }, { "epoch": 1.6684851198957782, "grad_norm": 0.14027626812458038, "learning_rate": 2.4469493415431715e-05, "loss": 0.3262, "step": 10246 }, { "epoch": 1.6686479664536091, "grad_norm": 0.15068088471889496, "learning_rate": 2.4464757470150783e-05, "loss": 0.3021, "step": 10247 }, { "epoch": 1.66881081301144, "grad_norm": 0.09409716725349426, "learning_rate": 2.446002154408666e-05, "loss": 0.33, "step": 10248 }, { "epoch": 1.668973659569271, "grad_norm": 0.07113645225763321, "learning_rate": 2.4455285637409374e-05, "loss": 0.3039, "step": 10249 }, { "epoch": 1.6691365061271017, "grad_norm": 0.09909191727638245, "learning_rate": 2.4450549750288977e-05, "loss": 0.3233, "step": 10250 }, { "epoch": 1.6692993526849325, "grad_norm": 0.13527722656726837, "learning_rate": 2.4445813882895487e-05, "loss": 0.34, "step": 10251 }, { "epoch": 1.6694621992427634, "grad_norm": 0.11698701232671738, "learning_rate": 2.444107803539894e-05, "loss": 0.3088, "step": 10252 }, { "epoch": 1.6696250458005943, "grad_norm": 0.15032215416431427, "learning_rate": 2.4436342207969357e-05, "loss": 0.3345, "step": 10253 }, { "epoch": 1.6697878923584253, "grad_norm": 0.0833006352186203, "learning_rate": 2.4431606400776785e-05, "loss": 0.2906, "step": 10254 }, { "epoch": 1.6699507389162562, "grad_norm": 0.144759863615036, "learning_rate": 2.442687061399125e-05, "loss": 0.3259, "step": 10255 }, { "epoch": 1.6701135854740872, "grad_norm": 0.1125585064291954, "learning_rate": 2.442213484778277e-05, "loss": 0.3229, "step": 10256 }, { "epoch": 1.670276432031918, "grad_norm": 0.11463699489831924, "learning_rate": 2.441739910232138e-05, "loss": 0.3319, "step": 10257 }, { "epoch": 1.6704392785897488, "grad_norm": 0.09719318151473999, "learning_rate": 2.441266337777711e-05, "loss": 0.3399, "step": 10258 }, { "epoch": 1.6706021251475796, "grad_norm": 0.1453775018453598, "learning_rate": 2.4407927674319983e-05, "loss": 0.3804, "step": 10259 }, { "epoch": 1.6707649717054105, "grad_norm": 0.07913671433925629, "learning_rate": 2.4403191992120024e-05, "loss": 0.3008, "step": 10260 }, { "epoch": 1.6709278182632414, "grad_norm": 0.14312508702278137, "learning_rate": 2.4398456331347268e-05, "loss": 0.2949, "step": 10261 }, { "epoch": 1.6710906648210724, "grad_norm": 0.08425687253475189, "learning_rate": 2.4393720692171727e-05, "loss": 0.3283, "step": 10262 }, { "epoch": 1.6712535113789033, "grad_norm": 0.06676442921161652, "learning_rate": 2.4388985074763433e-05, "loss": 0.3238, "step": 10263 }, { "epoch": 1.6714163579367343, "grad_norm": 0.08432498574256897, "learning_rate": 2.4384249479292397e-05, "loss": 0.2949, "step": 10264 }, { "epoch": 1.671579204494565, "grad_norm": 0.1089143380522728, "learning_rate": 2.437951390592866e-05, "loss": 0.3862, "step": 10265 }, { "epoch": 1.671742051052396, "grad_norm": 0.12415440380573273, "learning_rate": 2.4374778354842233e-05, "loss": 0.3374, "step": 10266 }, { "epoch": 1.6719048976102266, "grad_norm": 0.12431032210588455, "learning_rate": 2.4370042826203123e-05, "loss": 0.3616, "step": 10267 }, { "epoch": 1.6720677441680576, "grad_norm": 0.14148268103599548, "learning_rate": 2.4365307320181376e-05, "loss": 0.335, "step": 10268 }, { "epoch": 1.6722305907258885, "grad_norm": 0.1017434149980545, "learning_rate": 2.4360571836946996e-05, "loss": 0.3104, "step": 10269 }, { "epoch": 1.6723934372837195, "grad_norm": 0.13736078143119812, "learning_rate": 2.4355836376670005e-05, "loss": 0.3216, "step": 10270 }, { "epoch": 1.6725562838415504, "grad_norm": 0.11901559680700302, "learning_rate": 2.4351100939520405e-05, "loss": 0.3057, "step": 10271 }, { "epoch": 1.6727191303993811, "grad_norm": 0.10979362577199936, "learning_rate": 2.434636552566824e-05, "loss": 0.3229, "step": 10272 }, { "epoch": 1.672881976957212, "grad_norm": 0.09298457950353622, "learning_rate": 2.4341630135283504e-05, "loss": 0.3762, "step": 10273 }, { "epoch": 1.6730448235150428, "grad_norm": 0.13071219623088837, "learning_rate": 2.4336894768536226e-05, "loss": 0.3144, "step": 10274 }, { "epoch": 1.6732076700728737, "grad_norm": 0.0890638679265976, "learning_rate": 2.4332159425596395e-05, "loss": 0.2886, "step": 10275 }, { "epoch": 1.6733705166307047, "grad_norm": 0.11973886936903, "learning_rate": 2.4327424106634058e-05, "loss": 0.295, "step": 10276 }, { "epoch": 1.6735333631885356, "grad_norm": 0.09599290788173676, "learning_rate": 2.4322688811819206e-05, "loss": 0.3296, "step": 10277 }, { "epoch": 1.6736962097463666, "grad_norm": 0.10595274716615677, "learning_rate": 2.431795354132185e-05, "loss": 0.316, "step": 10278 }, { "epoch": 1.6738590563041975, "grad_norm": 0.11343491077423096, "learning_rate": 2.4313218295312012e-05, "loss": 0.3092, "step": 10279 }, { "epoch": 1.6740219028620282, "grad_norm": 0.0957176685333252, "learning_rate": 2.430848307395969e-05, "loss": 0.3293, "step": 10280 }, { "epoch": 1.6741847494198592, "grad_norm": 0.1083078607916832, "learning_rate": 2.4303747877434906e-05, "loss": 0.3093, "step": 10281 }, { "epoch": 1.67434759597769, "grad_norm": 0.10603640973567963, "learning_rate": 2.4299012705907648e-05, "loss": 0.3285, "step": 10282 }, { "epoch": 1.6745104425355208, "grad_norm": 0.10318206995725632, "learning_rate": 2.4294277559547938e-05, "loss": 0.3172, "step": 10283 }, { "epoch": 1.6746732890933518, "grad_norm": 0.11968924105167389, "learning_rate": 2.4289542438525782e-05, "loss": 0.2794, "step": 10284 }, { "epoch": 1.6748361356511827, "grad_norm": 0.08113160729408264, "learning_rate": 2.428480734301118e-05, "loss": 0.3015, "step": 10285 }, { "epoch": 1.6749989822090137, "grad_norm": 0.11650922149419785, "learning_rate": 2.4280072273174127e-05, "loss": 0.3082, "step": 10286 }, { "epoch": 1.6751618287668446, "grad_norm": 0.1151566430926323, "learning_rate": 2.427533722918465e-05, "loss": 0.3194, "step": 10287 }, { "epoch": 1.6753246753246753, "grad_norm": 0.10217379778623581, "learning_rate": 2.4270602211212737e-05, "loss": 0.2924, "step": 10288 }, { "epoch": 1.675487521882506, "grad_norm": 0.10025063157081604, "learning_rate": 2.4265867219428375e-05, "loss": 0.3567, "step": 10289 }, { "epoch": 1.675650368440337, "grad_norm": 0.1569240689277649, "learning_rate": 2.4261132254001594e-05, "loss": 0.3259, "step": 10290 }, { "epoch": 1.675813214998168, "grad_norm": 0.14493438601493835, "learning_rate": 2.4256397315102378e-05, "loss": 0.3134, "step": 10291 }, { "epoch": 1.6759760615559989, "grad_norm": 0.09009310603141785, "learning_rate": 2.4251662402900728e-05, "loss": 0.3328, "step": 10292 }, { "epoch": 1.6761389081138298, "grad_norm": 0.10406235605478287, "learning_rate": 2.4246927517566625e-05, "loss": 0.334, "step": 10293 }, { "epoch": 1.6763017546716608, "grad_norm": 0.12118538469076157, "learning_rate": 2.4242192659270095e-05, "loss": 0.3195, "step": 10294 }, { "epoch": 1.6764646012294915, "grad_norm": 0.14069348573684692, "learning_rate": 2.4237457828181116e-05, "loss": 0.2919, "step": 10295 }, { "epoch": 1.6766274477873224, "grad_norm": 0.1401505023241043, "learning_rate": 2.423272302446968e-05, "loss": 0.3467, "step": 10296 }, { "epoch": 1.6767902943451531, "grad_norm": 0.127424418926239, "learning_rate": 2.4227988248305787e-05, "loss": 0.31, "step": 10297 }, { "epoch": 1.676953140902984, "grad_norm": 0.14299260079860687, "learning_rate": 2.422325349985944e-05, "loss": 0.3329, "step": 10298 }, { "epoch": 1.677115987460815, "grad_norm": 0.12138693034648895, "learning_rate": 2.4218518779300606e-05, "loss": 0.3097, "step": 10299 }, { "epoch": 1.677278834018646, "grad_norm": 0.14461804926395416, "learning_rate": 2.4213784086799293e-05, "loss": 0.318, "step": 10300 }, { "epoch": 1.677441680576477, "grad_norm": 0.08064118772745132, "learning_rate": 2.4209049422525485e-05, "loss": 0.3427, "step": 10301 }, { "epoch": 1.6776045271343079, "grad_norm": 0.19723621010780334, "learning_rate": 2.4204314786649178e-05, "loss": 0.3245, "step": 10302 }, { "epoch": 1.6777673736921386, "grad_norm": 0.1178915798664093, "learning_rate": 2.4199580179340354e-05, "loss": 0.3405, "step": 10303 }, { "epoch": 1.6779302202499695, "grad_norm": 0.09803276509046555, "learning_rate": 2.419484560076899e-05, "loss": 0.28, "step": 10304 }, { "epoch": 1.6780930668078002, "grad_norm": 0.08561902493238449, "learning_rate": 2.4190111051105093e-05, "loss": 0.3156, "step": 10305 }, { "epoch": 1.6782559133656312, "grad_norm": 0.12032630294561386, "learning_rate": 2.4185376530518637e-05, "loss": 0.313, "step": 10306 }, { "epoch": 1.6784187599234621, "grad_norm": 0.10202282667160034, "learning_rate": 2.4180642039179602e-05, "loss": 0.3065, "step": 10307 }, { "epoch": 1.678581606481293, "grad_norm": 0.1484987437725067, "learning_rate": 2.417590757725796e-05, "loss": 0.324, "step": 10308 }, { "epoch": 1.678744453039124, "grad_norm": 0.1337873637676239, "learning_rate": 2.417117314492372e-05, "loss": 0.3044, "step": 10309 }, { "epoch": 1.6789072995969547, "grad_norm": 0.07977527379989624, "learning_rate": 2.4166438742346844e-05, "loss": 0.3262, "step": 10310 }, { "epoch": 1.6790701461547857, "grad_norm": 0.12025941163301468, "learning_rate": 2.4161704369697305e-05, "loss": 0.3063, "step": 10311 }, { "epoch": 1.6792329927126164, "grad_norm": 0.12848995625972748, "learning_rate": 2.4156970027145103e-05, "loss": 0.3432, "step": 10312 }, { "epoch": 1.6793958392704473, "grad_norm": 0.14963120222091675, "learning_rate": 2.41522357148602e-05, "loss": 0.3301, "step": 10313 }, { "epoch": 1.6795586858282783, "grad_norm": 0.11554432660341263, "learning_rate": 2.4147501433012575e-05, "loss": 0.2697, "step": 10314 }, { "epoch": 1.6797215323861092, "grad_norm": 0.35905635356903076, "learning_rate": 2.4142767181772195e-05, "loss": 0.3385, "step": 10315 }, { "epoch": 1.6798843789439402, "grad_norm": 0.10672498494386673, "learning_rate": 2.4138032961309048e-05, "loss": 0.3581, "step": 10316 }, { "epoch": 1.680047225501771, "grad_norm": 0.1288895457983017, "learning_rate": 2.4133298771793097e-05, "loss": 0.2854, "step": 10317 }, { "epoch": 1.6802100720596018, "grad_norm": 0.10687997937202454, "learning_rate": 2.4128564613394318e-05, "loss": 0.2971, "step": 10318 }, { "epoch": 1.6803729186174328, "grad_norm": 0.12391676753759384, "learning_rate": 2.4123830486282682e-05, "loss": 0.3317, "step": 10319 }, { "epoch": 1.6805357651752635, "grad_norm": 0.10380537807941437, "learning_rate": 2.4119096390628156e-05, "loss": 0.3256, "step": 10320 }, { "epoch": 1.6806986117330944, "grad_norm": 0.11612941324710846, "learning_rate": 2.4114362326600712e-05, "loss": 0.3348, "step": 10321 }, { "epoch": 1.6808614582909254, "grad_norm": 0.10522574931383133, "learning_rate": 2.4109628294370302e-05, "loss": 0.3351, "step": 10322 }, { "epoch": 1.6810243048487563, "grad_norm": 0.13770875334739685, "learning_rate": 2.4104894294106916e-05, "loss": 0.2893, "step": 10323 }, { "epoch": 1.6811871514065873, "grad_norm": 0.06904636323451996, "learning_rate": 2.4100160325980505e-05, "loss": 0.2805, "step": 10324 }, { "epoch": 1.681349997964418, "grad_norm": 0.09361602365970612, "learning_rate": 2.409542639016104e-05, "loss": 0.3013, "step": 10325 }, { "epoch": 1.681512844522249, "grad_norm": 0.1696997582912445, "learning_rate": 2.4090692486818462e-05, "loss": 0.3239, "step": 10326 }, { "epoch": 1.6816756910800796, "grad_norm": 0.12127777189016342, "learning_rate": 2.4085958616122758e-05, "loss": 0.3036, "step": 10327 }, { "epoch": 1.6818385376379106, "grad_norm": 0.09585277736186981, "learning_rate": 2.408122477824388e-05, "loss": 0.2963, "step": 10328 }, { "epoch": 1.6820013841957415, "grad_norm": 0.08445856720209122, "learning_rate": 2.4076490973351772e-05, "loss": 0.3075, "step": 10329 }, { "epoch": 1.6821642307535725, "grad_norm": 0.09211339056491852, "learning_rate": 2.4071757201616418e-05, "loss": 0.3465, "step": 10330 }, { "epoch": 1.6823270773114034, "grad_norm": 0.10349525511264801, "learning_rate": 2.4067023463207765e-05, "loss": 0.268, "step": 10331 }, { "epoch": 1.6824899238692343, "grad_norm": 0.09546849876642227, "learning_rate": 2.4062289758295758e-05, "loss": 0.2933, "step": 10332 }, { "epoch": 1.682652770427065, "grad_norm": 0.11637227237224579, "learning_rate": 2.405755608705035e-05, "loss": 0.3127, "step": 10333 }, { "epoch": 1.682815616984896, "grad_norm": 0.12482544034719467, "learning_rate": 2.4052822449641516e-05, "loss": 0.3716, "step": 10334 }, { "epoch": 1.6829784635427267, "grad_norm": 0.10468748956918716, "learning_rate": 2.404808884623919e-05, "loss": 0.3096, "step": 10335 }, { "epoch": 1.6831413101005577, "grad_norm": 0.08817495405673981, "learning_rate": 2.4043355277013324e-05, "loss": 0.3408, "step": 10336 }, { "epoch": 1.6833041566583886, "grad_norm": 0.13902734220027924, "learning_rate": 2.4038621742133868e-05, "loss": 0.2844, "step": 10337 }, { "epoch": 1.6834670032162196, "grad_norm": 0.0857432633638382, "learning_rate": 2.4033888241770773e-05, "loss": 0.3222, "step": 10338 }, { "epoch": 1.6836298497740505, "grad_norm": 0.12641975283622742, "learning_rate": 2.4029154776093986e-05, "loss": 0.3149, "step": 10339 }, { "epoch": 1.6837926963318814, "grad_norm": 0.09638375788927078, "learning_rate": 2.4024421345273452e-05, "loss": 0.2852, "step": 10340 }, { "epoch": 1.6839555428897122, "grad_norm": 0.09684580564498901, "learning_rate": 2.401968794947911e-05, "loss": 0.3374, "step": 10341 }, { "epoch": 1.684118389447543, "grad_norm": 0.11701758205890656, "learning_rate": 2.4014954588880914e-05, "loss": 0.3237, "step": 10342 }, { "epoch": 1.6842812360053738, "grad_norm": 0.1315765678882599, "learning_rate": 2.4010221263648803e-05, "loss": 0.3181, "step": 10343 }, { "epoch": 1.6844440825632048, "grad_norm": 0.11437271535396576, "learning_rate": 2.4005487973952696e-05, "loss": 0.3079, "step": 10344 }, { "epoch": 1.6846069291210357, "grad_norm": 0.10284671932458878, "learning_rate": 2.400075471996257e-05, "loss": 0.3307, "step": 10345 }, { "epoch": 1.6847697756788667, "grad_norm": 0.1479032188653946, "learning_rate": 2.399602150184834e-05, "loss": 0.3013, "step": 10346 }, { "epoch": 1.6849326222366976, "grad_norm": 0.07870887219905853, "learning_rate": 2.399128831977994e-05, "loss": 0.3062, "step": 10347 }, { "epoch": 1.6850954687945283, "grad_norm": 0.11927680671215057, "learning_rate": 2.3986555173927308e-05, "loss": 0.335, "step": 10348 }, { "epoch": 1.6852583153523593, "grad_norm": 0.11214762181043625, "learning_rate": 2.398182206446039e-05, "loss": 0.3147, "step": 10349 }, { "epoch": 1.68542116191019, "grad_norm": 0.12928986549377441, "learning_rate": 2.3977088991549113e-05, "loss": 0.3165, "step": 10350 }, { "epoch": 1.685584008468021, "grad_norm": 0.09289292991161346, "learning_rate": 2.397235595536339e-05, "loss": 0.3185, "step": 10351 }, { "epoch": 1.6857468550258519, "grad_norm": 0.12001944333314896, "learning_rate": 2.3967622956073183e-05, "loss": 0.3158, "step": 10352 }, { "epoch": 1.6859097015836828, "grad_norm": 0.15267165005207062, "learning_rate": 2.3962889993848403e-05, "loss": 0.332, "step": 10353 }, { "epoch": 1.6860725481415137, "grad_norm": 0.09919147938489914, "learning_rate": 2.395815706885897e-05, "loss": 0.3037, "step": 10354 }, { "epoch": 1.6862353946993447, "grad_norm": 0.12525014579296112, "learning_rate": 2.3953424181274825e-05, "loss": 0.3523, "step": 10355 }, { "epoch": 1.6863982412571754, "grad_norm": 0.13419251143932343, "learning_rate": 2.3948691331265894e-05, "loss": 0.3144, "step": 10356 }, { "epoch": 1.6865610878150064, "grad_norm": 0.1248617097735405, "learning_rate": 2.3943958519002086e-05, "loss": 0.3353, "step": 10357 }, { "epoch": 1.686723934372837, "grad_norm": 0.12535855174064636, "learning_rate": 2.3939225744653333e-05, "loss": 0.3254, "step": 10358 }, { "epoch": 1.686886780930668, "grad_norm": 0.10543232411146164, "learning_rate": 2.393449300838955e-05, "loss": 0.3101, "step": 10359 }, { "epoch": 1.687049627488499, "grad_norm": 0.10600326955318451, "learning_rate": 2.3929760310380668e-05, "loss": 0.3217, "step": 10360 }, { "epoch": 1.68721247404633, "grad_norm": 0.14491848647594452, "learning_rate": 2.3925027650796595e-05, "loss": 0.3562, "step": 10361 }, { "epoch": 1.6873753206041608, "grad_norm": 0.17898033559322357, "learning_rate": 2.392029502980724e-05, "loss": 0.3176, "step": 10362 }, { "epoch": 1.6875381671619916, "grad_norm": 0.13021478056907654, "learning_rate": 2.3915562447582544e-05, "loss": 0.3366, "step": 10363 }, { "epoch": 1.6877010137198225, "grad_norm": 0.12082128971815109, "learning_rate": 2.39108299042924e-05, "loss": 0.2844, "step": 10364 }, { "epoch": 1.6878638602776532, "grad_norm": 0.09928227216005325, "learning_rate": 2.390609740010673e-05, "loss": 0.317, "step": 10365 }, { "epoch": 1.6880267068354842, "grad_norm": 0.09034682810306549, "learning_rate": 2.3901364935195426e-05, "loss": 0.3343, "step": 10366 }, { "epoch": 1.6881895533933151, "grad_norm": 0.09191883355379105, "learning_rate": 2.3896632509728424e-05, "loss": 0.311, "step": 10367 }, { "epoch": 1.688352399951146, "grad_norm": 0.11135590821504593, "learning_rate": 2.389190012387562e-05, "loss": 0.307, "step": 10368 }, { "epoch": 1.688515246508977, "grad_norm": 0.10250978171825409, "learning_rate": 2.3887167777806913e-05, "loss": 0.3403, "step": 10369 }, { "epoch": 1.688678093066808, "grad_norm": 0.14488831162452698, "learning_rate": 2.3882435471692227e-05, "loss": 0.3311, "step": 10370 }, { "epoch": 1.6888409396246387, "grad_norm": 0.11216509342193604, "learning_rate": 2.3877703205701455e-05, "loss": 0.3025, "step": 10371 }, { "epoch": 1.6890037861824696, "grad_norm": 0.09642834961414337, "learning_rate": 2.38729709800045e-05, "loss": 0.3111, "step": 10372 }, { "epoch": 1.6891666327403003, "grad_norm": 0.10491962730884552, "learning_rate": 2.3868238794771255e-05, "loss": 0.3143, "step": 10373 }, { "epoch": 1.6893294792981313, "grad_norm": 0.0955234244465828, "learning_rate": 2.3863506650171637e-05, "loss": 0.3024, "step": 10374 }, { "epoch": 1.6894923258559622, "grad_norm": 0.06550051271915436, "learning_rate": 2.385877454637553e-05, "loss": 0.3074, "step": 10375 }, { "epoch": 1.6896551724137931, "grad_norm": 0.11962877959012985, "learning_rate": 2.3854042483552845e-05, "loss": 0.3233, "step": 10376 }, { "epoch": 1.689818018971624, "grad_norm": 0.10136782377958298, "learning_rate": 2.384931046187346e-05, "loss": 0.313, "step": 10377 }, { "epoch": 1.689980865529455, "grad_norm": 0.08574850112199783, "learning_rate": 2.3844578481507275e-05, "loss": 0.2838, "step": 10378 }, { "epoch": 1.6901437120872858, "grad_norm": 0.09689396619796753, "learning_rate": 2.3839846542624188e-05, "loss": 0.3667, "step": 10379 }, { "epoch": 1.6903065586451165, "grad_norm": 0.0682678371667862, "learning_rate": 2.383511464539408e-05, "loss": 0.3159, "step": 10380 }, { "epoch": 1.6904694052029474, "grad_norm": 0.15167425572872162, "learning_rate": 2.3830382789986856e-05, "loss": 0.3226, "step": 10381 }, { "epoch": 1.6906322517607784, "grad_norm": 0.09616386145353317, "learning_rate": 2.3825650976572394e-05, "loss": 0.3038, "step": 10382 }, { "epoch": 1.6907950983186093, "grad_norm": 0.08134733140468597, "learning_rate": 2.382091920532058e-05, "loss": 0.3198, "step": 10383 }, { "epoch": 1.6909579448764402, "grad_norm": 0.1268724948167801, "learning_rate": 2.381618747640129e-05, "loss": 0.328, "step": 10384 }, { "epoch": 1.6911207914342712, "grad_norm": 0.14314089715480804, "learning_rate": 2.3811455789984425e-05, "loss": 0.3054, "step": 10385 }, { "epoch": 1.691283637992102, "grad_norm": 0.10762999206781387, "learning_rate": 2.3806724146239858e-05, "loss": 0.2892, "step": 10386 }, { "epoch": 1.6914464845499328, "grad_norm": 0.12215928733348846, "learning_rate": 2.3801992545337472e-05, "loss": 0.3365, "step": 10387 }, { "epoch": 1.6916093311077636, "grad_norm": 0.17407266795635223, "learning_rate": 2.3797260987447128e-05, "loss": 0.3453, "step": 10388 }, { "epoch": 1.6917721776655945, "grad_norm": 0.09621939063072205, "learning_rate": 2.379252947273873e-05, "loss": 0.2983, "step": 10389 }, { "epoch": 1.6919350242234255, "grad_norm": 0.11208242177963257, "learning_rate": 2.3787798001382145e-05, "loss": 0.2891, "step": 10390 }, { "epoch": 1.6920978707812564, "grad_norm": 0.16261930763721466, "learning_rate": 2.378306657354723e-05, "loss": 0.35, "step": 10391 }, { "epoch": 1.6922607173390873, "grad_norm": 0.19477617740631104, "learning_rate": 2.3778335189403883e-05, "loss": 0.3118, "step": 10392 }, { "epoch": 1.6924235638969183, "grad_norm": 0.08502504974603653, "learning_rate": 2.377360384912196e-05, "loss": 0.3279, "step": 10393 }, { "epoch": 1.692586410454749, "grad_norm": 0.10488593578338623, "learning_rate": 2.3768872552871333e-05, "loss": 0.2783, "step": 10394 }, { "epoch": 1.69274925701258, "grad_norm": 0.11679483950138092, "learning_rate": 2.3764141300821865e-05, "loss": 0.3413, "step": 10395 }, { "epoch": 1.6929121035704107, "grad_norm": 0.14657706022262573, "learning_rate": 2.3759410093143435e-05, "loss": 0.3125, "step": 10396 }, { "epoch": 1.6930749501282416, "grad_norm": 0.12894892692565918, "learning_rate": 2.3754678930005894e-05, "loss": 0.3157, "step": 10397 }, { "epoch": 1.6932377966860725, "grad_norm": 0.14229314029216766, "learning_rate": 2.3749947811579116e-05, "loss": 0.3893, "step": 10398 }, { "epoch": 1.6934006432439035, "grad_norm": 0.08541938662528992, "learning_rate": 2.3745216738032944e-05, "loss": 0.3107, "step": 10399 }, { "epoch": 1.6935634898017344, "grad_norm": 0.09127751737833023, "learning_rate": 2.3740485709537263e-05, "loss": 0.2768, "step": 10400 }, { "epoch": 1.6937263363595652, "grad_norm": 0.11882879585027695, "learning_rate": 2.373575472626192e-05, "loss": 0.3228, "step": 10401 }, { "epoch": 1.693889182917396, "grad_norm": 0.12429013848304749, "learning_rate": 2.3731023788376756e-05, "loss": 0.3334, "step": 10402 }, { "epoch": 1.6940520294752268, "grad_norm": 0.12100137025117874, "learning_rate": 2.3726292896051654e-05, "loss": 0.2819, "step": 10403 }, { "epoch": 1.6942148760330578, "grad_norm": 0.13809096813201904, "learning_rate": 2.372156204945645e-05, "loss": 0.3361, "step": 10404 }, { "epoch": 1.6943777225908887, "grad_norm": 0.08421525359153748, "learning_rate": 2.3716831248761e-05, "loss": 0.3105, "step": 10405 }, { "epoch": 1.6945405691487196, "grad_norm": 0.11595138162374496, "learning_rate": 2.371210049413514e-05, "loss": 0.31, "step": 10406 }, { "epoch": 1.6947034157065506, "grad_norm": 0.09771495312452316, "learning_rate": 2.370736978574875e-05, "loss": 0.2998, "step": 10407 }, { "epoch": 1.6948662622643815, "grad_norm": 0.12464769184589386, "learning_rate": 2.3702639123771648e-05, "loss": 0.3422, "step": 10408 }, { "epoch": 1.6950291088222122, "grad_norm": 0.11117804795503616, "learning_rate": 2.3697908508373682e-05, "loss": 0.318, "step": 10409 }, { "epoch": 1.6951919553800432, "grad_norm": 0.13909347355365753, "learning_rate": 2.369317793972471e-05, "loss": 0.3168, "step": 10410 }, { "epoch": 1.695354801937874, "grad_norm": 0.12985442578792572, "learning_rate": 2.3688447417994568e-05, "loss": 0.2784, "step": 10411 }, { "epoch": 1.6955176484957049, "grad_norm": 0.12458118051290512, "learning_rate": 2.3683716943353094e-05, "loss": 0.3227, "step": 10412 }, { "epoch": 1.6956804950535358, "grad_norm": 0.10982111096382141, "learning_rate": 2.3678986515970113e-05, "loss": 0.3285, "step": 10413 }, { "epoch": 1.6958433416113667, "grad_norm": 0.07818964123725891, "learning_rate": 2.367425613601548e-05, "loss": 0.2882, "step": 10414 }, { "epoch": 1.6960061881691977, "grad_norm": 0.067290298640728, "learning_rate": 2.3669525803659025e-05, "loss": 0.2937, "step": 10415 }, { "epoch": 1.6961690347270286, "grad_norm": 0.13233590126037598, "learning_rate": 2.3664795519070583e-05, "loss": 0.2856, "step": 10416 }, { "epoch": 1.6963318812848593, "grad_norm": 0.08942487835884094, "learning_rate": 2.3660065282419977e-05, "loss": 0.3361, "step": 10417 }, { "epoch": 1.69649472784269, "grad_norm": 0.150700643658638, "learning_rate": 2.3655335093877044e-05, "loss": 0.3572, "step": 10418 }, { "epoch": 1.696657574400521, "grad_norm": 0.0821961984038353, "learning_rate": 2.365060495361161e-05, "loss": 0.2948, "step": 10419 }, { "epoch": 1.696820420958352, "grad_norm": 0.0921211987733841, "learning_rate": 2.3645874861793495e-05, "loss": 0.3183, "step": 10420 }, { "epoch": 1.696983267516183, "grad_norm": 0.1424497365951538, "learning_rate": 2.364114481859254e-05, "loss": 0.2933, "step": 10421 }, { "epoch": 1.6971461140740138, "grad_norm": 0.13944068551063538, "learning_rate": 2.3636414824178553e-05, "loss": 0.3106, "step": 10422 }, { "epoch": 1.6973089606318448, "grad_norm": 0.10354991257190704, "learning_rate": 2.3631684878721364e-05, "loss": 0.2995, "step": 10423 }, { "epoch": 1.6974718071896755, "grad_norm": 0.09913904964923859, "learning_rate": 2.3626954982390774e-05, "loss": 0.3086, "step": 10424 }, { "epoch": 1.6976346537475064, "grad_norm": 0.15491613745689392, "learning_rate": 2.3622225135356628e-05, "loss": 0.2956, "step": 10425 }, { "epoch": 1.6977975003053372, "grad_norm": 0.0970209613442421, "learning_rate": 2.361749533778872e-05, "loss": 0.3293, "step": 10426 }, { "epoch": 1.697960346863168, "grad_norm": 0.11770359426736832, "learning_rate": 2.361276558985688e-05, "loss": 0.3399, "step": 10427 }, { "epoch": 1.698123193420999, "grad_norm": 0.12762649357318878, "learning_rate": 2.3608035891730897e-05, "loss": 0.293, "step": 10428 }, { "epoch": 1.69828603997883, "grad_norm": 0.1367775797843933, "learning_rate": 2.360330624358061e-05, "loss": 0.3127, "step": 10429 }, { "epoch": 1.698448886536661, "grad_norm": 0.1129504069685936, "learning_rate": 2.3598576645575806e-05, "loss": 0.2813, "step": 10430 }, { "epoch": 1.6986117330944919, "grad_norm": 0.09071774780750275, "learning_rate": 2.3593847097886295e-05, "loss": 0.3063, "step": 10431 }, { "epoch": 1.6987745796523226, "grad_norm": 0.18113885819911957, "learning_rate": 2.3589117600681896e-05, "loss": 0.3658, "step": 10432 }, { "epoch": 1.6989374262101535, "grad_norm": 0.12394798547029495, "learning_rate": 2.35843881541324e-05, "loss": 0.3, "step": 10433 }, { "epoch": 1.6991002727679843, "grad_norm": 0.12361525744199753, "learning_rate": 2.3579658758407604e-05, "loss": 0.3041, "step": 10434 }, { "epoch": 1.6992631193258152, "grad_norm": 0.1596384048461914, "learning_rate": 2.3574929413677318e-05, "loss": 0.3085, "step": 10435 }, { "epoch": 1.6994259658836461, "grad_norm": 0.11301092058420181, "learning_rate": 2.3570200120111335e-05, "loss": 0.3099, "step": 10436 }, { "epoch": 1.699588812441477, "grad_norm": 0.13530035316944122, "learning_rate": 2.3565470877879456e-05, "loss": 0.2577, "step": 10437 }, { "epoch": 1.699751658999308, "grad_norm": 0.09779661893844604, "learning_rate": 2.356074168715147e-05, "loss": 0.3017, "step": 10438 }, { "epoch": 1.6999145055571387, "grad_norm": 0.1382475346326828, "learning_rate": 2.355601254809716e-05, "loss": 0.3224, "step": 10439 }, { "epoch": 1.7000773521149697, "grad_norm": 0.10068153589963913, "learning_rate": 2.3551283460886333e-05, "loss": 0.3088, "step": 10440 }, { "epoch": 1.7002401986728004, "grad_norm": 0.1249472051858902, "learning_rate": 2.3546554425688774e-05, "loss": 0.3515, "step": 10441 }, { "epoch": 1.7004030452306313, "grad_norm": 0.13010960817337036, "learning_rate": 2.3541825442674252e-05, "loss": 0.3069, "step": 10442 }, { "epoch": 1.7005658917884623, "grad_norm": 0.14260223507881165, "learning_rate": 2.353709651201258e-05, "loss": 0.3133, "step": 10443 }, { "epoch": 1.7007287383462932, "grad_norm": 0.1337946355342865, "learning_rate": 2.3532367633873526e-05, "loss": 0.3224, "step": 10444 }, { "epoch": 1.7008915849041242, "grad_norm": 0.09065495431423187, "learning_rate": 2.3527638808426865e-05, "loss": 0.3169, "step": 10445 }, { "epoch": 1.7010544314619551, "grad_norm": 0.1226029172539711, "learning_rate": 2.3522910035842377e-05, "loss": 0.3488, "step": 10446 }, { "epoch": 1.7012172780197858, "grad_norm": 0.08669482916593552, "learning_rate": 2.3518181316289857e-05, "loss": 0.2878, "step": 10447 }, { "epoch": 1.7013801245776168, "grad_norm": 0.10836292058229446, "learning_rate": 2.351345264993906e-05, "loss": 0.3449, "step": 10448 }, { "epoch": 1.7015429711354475, "grad_norm": 0.10134243220090866, "learning_rate": 2.3508724036959774e-05, "loss": 0.3046, "step": 10449 }, { "epoch": 1.7017058176932784, "grad_norm": 0.11799916625022888, "learning_rate": 2.350399547752175e-05, "loss": 0.2857, "step": 10450 }, { "epoch": 1.7018686642511094, "grad_norm": 0.09998020529747009, "learning_rate": 2.349926697179478e-05, "loss": 0.3109, "step": 10451 }, { "epoch": 1.7020315108089403, "grad_norm": 0.093077152967453, "learning_rate": 2.3494538519948614e-05, "loss": 0.3011, "step": 10452 }, { "epoch": 1.7021943573667713, "grad_norm": 0.08168795704841614, "learning_rate": 2.3489810122153028e-05, "loss": 0.3084, "step": 10453 }, { "epoch": 1.702357203924602, "grad_norm": 0.15613512694835663, "learning_rate": 2.348508177857779e-05, "loss": 0.3071, "step": 10454 }, { "epoch": 1.702520050482433, "grad_norm": 0.16508100926876068, "learning_rate": 2.348035348939265e-05, "loss": 0.3668, "step": 10455 }, { "epoch": 1.7026828970402637, "grad_norm": 0.07390334457159042, "learning_rate": 2.3475625254767376e-05, "loss": 0.3015, "step": 10456 }, { "epoch": 1.7028457435980946, "grad_norm": 0.11523764580488205, "learning_rate": 2.3470897074871713e-05, "loss": 0.3094, "step": 10457 }, { "epoch": 1.7030085901559255, "grad_norm": 0.11573709547519684, "learning_rate": 2.3466168949875437e-05, "loss": 0.3153, "step": 10458 }, { "epoch": 1.7031714367137565, "grad_norm": 0.07506291568279266, "learning_rate": 2.346144087994829e-05, "loss": 0.3422, "step": 10459 }, { "epoch": 1.7033342832715874, "grad_norm": 0.08178498595952988, "learning_rate": 2.3456712865260013e-05, "loss": 0.3406, "step": 10460 }, { "epoch": 1.7034971298294184, "grad_norm": 0.18343086540699005, "learning_rate": 2.3451984905980384e-05, "loss": 0.3538, "step": 10461 }, { "epoch": 1.703659976387249, "grad_norm": 0.10346503555774689, "learning_rate": 2.344725700227913e-05, "loss": 0.2708, "step": 10462 }, { "epoch": 1.70382282294508, "grad_norm": 0.13996483385562897, "learning_rate": 2.3442529154326005e-05, "loss": 0.3393, "step": 10463 }, { "epoch": 1.7039856695029107, "grad_norm": 0.10257889330387115, "learning_rate": 2.3437801362290737e-05, "loss": 0.3109, "step": 10464 }, { "epoch": 1.7041485160607417, "grad_norm": 0.11464150249958038, "learning_rate": 2.343307362634309e-05, "loss": 0.3674, "step": 10465 }, { "epoch": 1.7043113626185726, "grad_norm": 0.10304854810237885, "learning_rate": 2.3428345946652798e-05, "loss": 0.3198, "step": 10466 }, { "epoch": 1.7044742091764036, "grad_norm": 0.10956943035125732, "learning_rate": 2.3423618323389593e-05, "loss": 0.3042, "step": 10467 }, { "epoch": 1.7046370557342345, "grad_norm": 0.12514807283878326, "learning_rate": 2.3418890756723202e-05, "loss": 0.3657, "step": 10468 }, { "epoch": 1.7047999022920655, "grad_norm": 0.11319614201784134, "learning_rate": 2.341416324682338e-05, "loss": 0.3392, "step": 10469 }, { "epoch": 1.7049627488498962, "grad_norm": 0.14447088539600372, "learning_rate": 2.3409435793859853e-05, "loss": 0.3094, "step": 10470 }, { "epoch": 1.7051255954077271, "grad_norm": 0.0698624849319458, "learning_rate": 2.3404708398002332e-05, "loss": 0.336, "step": 10471 }, { "epoch": 1.7052884419655578, "grad_norm": 0.13556914031505585, "learning_rate": 2.3399981059420572e-05, "loss": 0.3195, "step": 10472 }, { "epoch": 1.7054512885233888, "grad_norm": 0.11333829909563065, "learning_rate": 2.3395253778284278e-05, "loss": 0.3097, "step": 10473 }, { "epoch": 1.7056141350812197, "grad_norm": 0.10614921897649765, "learning_rate": 2.339052655476319e-05, "loss": 0.3559, "step": 10474 }, { "epoch": 1.7057769816390507, "grad_norm": 0.11863035708665848, "learning_rate": 2.3385799389027013e-05, "loss": 0.349, "step": 10475 }, { "epoch": 1.7059398281968816, "grad_norm": 0.09773198515176773, "learning_rate": 2.338107228124547e-05, "loss": 0.2913, "step": 10476 }, { "epoch": 1.7061026747547123, "grad_norm": 0.11720582842826843, "learning_rate": 2.3376345231588296e-05, "loss": 0.3183, "step": 10477 }, { "epoch": 1.7062655213125433, "grad_norm": 0.11228755116462708, "learning_rate": 2.337161824022519e-05, "loss": 0.3354, "step": 10478 }, { "epoch": 1.706428367870374, "grad_norm": 0.06671928614377975, "learning_rate": 2.3366891307325854e-05, "loss": 0.3381, "step": 10479 }, { "epoch": 1.706591214428205, "grad_norm": 0.08869504183530807, "learning_rate": 2.3362164433060027e-05, "loss": 0.3459, "step": 10480 }, { "epoch": 1.7067540609860359, "grad_norm": 0.1287924349308014, "learning_rate": 2.3357437617597406e-05, "loss": 0.3758, "step": 10481 }, { "epoch": 1.7069169075438668, "grad_norm": 0.09640783071517944, "learning_rate": 2.335271086110768e-05, "loss": 0.2879, "step": 10482 }, { "epoch": 1.7070797541016978, "grad_norm": 0.11772297322750092, "learning_rate": 2.3347984163760583e-05, "loss": 0.3143, "step": 10483 }, { "epoch": 1.7072426006595287, "grad_norm": 0.09440208226442337, "learning_rate": 2.3343257525725804e-05, "loss": 0.2995, "step": 10484 }, { "epoch": 1.7074054472173594, "grad_norm": 0.1152491420507431, "learning_rate": 2.3338530947173044e-05, "loss": 0.2751, "step": 10485 }, { "epoch": 1.7075682937751904, "grad_norm": 0.08175840228796005, "learning_rate": 2.333380442827199e-05, "loss": 0.2799, "step": 10486 }, { "epoch": 1.707731140333021, "grad_norm": 0.12058722227811813, "learning_rate": 2.3329077969192358e-05, "loss": 0.2752, "step": 10487 }, { "epoch": 1.707893986890852, "grad_norm": 0.10402216017246246, "learning_rate": 2.3324351570103832e-05, "loss": 0.3132, "step": 10488 }, { "epoch": 1.708056833448683, "grad_norm": 0.09468825161457062, "learning_rate": 2.3319625231176102e-05, "loss": 0.3333, "step": 10489 }, { "epoch": 1.708219680006514, "grad_norm": 0.13452157378196716, "learning_rate": 2.331489895257886e-05, "loss": 0.3016, "step": 10490 }, { "epoch": 1.7083825265643449, "grad_norm": 0.08981010317802429, "learning_rate": 2.3310172734481804e-05, "loss": 0.3717, "step": 10491 }, { "epoch": 1.7085453731221756, "grad_norm": 0.11414945870637894, "learning_rate": 2.33054465770546e-05, "loss": 0.3276, "step": 10492 }, { "epoch": 1.7087082196800065, "grad_norm": 0.08738762885332108, "learning_rate": 2.330072048046694e-05, "loss": 0.3229, "step": 10493 }, { "epoch": 1.7088710662378372, "grad_norm": 0.11077383905649185, "learning_rate": 2.329599444488851e-05, "loss": 0.3293, "step": 10494 }, { "epoch": 1.7090339127956682, "grad_norm": 0.12123990058898926, "learning_rate": 2.3291268470488985e-05, "loss": 0.3022, "step": 10495 }, { "epoch": 1.7091967593534991, "grad_norm": 0.13722418248653412, "learning_rate": 2.3286542557438044e-05, "loss": 0.3309, "step": 10496 }, { "epoch": 1.70935960591133, "grad_norm": 0.11417318880558014, "learning_rate": 2.3281816705905347e-05, "loss": 0.3572, "step": 10497 }, { "epoch": 1.709522452469161, "grad_norm": 0.12129285931587219, "learning_rate": 2.327709091606059e-05, "loss": 0.3017, "step": 10498 }, { "epoch": 1.709685299026992, "grad_norm": 0.13440163433551788, "learning_rate": 2.327236518807343e-05, "loss": 0.3331, "step": 10499 }, { "epoch": 1.7098481455848227, "grad_norm": 0.14351283013820648, "learning_rate": 2.326763952211354e-05, "loss": 0.3433, "step": 10500 }, { "epoch": 1.7100109921426536, "grad_norm": 0.0784144252538681, "learning_rate": 2.3262913918350566e-05, "loss": 0.2999, "step": 10501 }, { "epoch": 1.7101738387004843, "grad_norm": 0.10885559022426605, "learning_rate": 2.32581883769542e-05, "loss": 0.2866, "step": 10502 }, { "epoch": 1.7103366852583153, "grad_norm": 0.16029976308345795, "learning_rate": 2.3253462898094085e-05, "loss": 0.3146, "step": 10503 }, { "epoch": 1.7104995318161462, "grad_norm": 0.14108067750930786, "learning_rate": 2.324873748193988e-05, "loss": 0.323, "step": 10504 }, { "epoch": 1.7106623783739772, "grad_norm": 0.12085924297571182, "learning_rate": 2.3244012128661255e-05, "loss": 0.2973, "step": 10505 }, { "epoch": 1.710825224931808, "grad_norm": 0.1172255203127861, "learning_rate": 2.3239286838427856e-05, "loss": 0.3412, "step": 10506 }, { "epoch": 1.710988071489639, "grad_norm": 0.11941760033369064, "learning_rate": 2.3234561611409333e-05, "loss": 0.3229, "step": 10507 }, { "epoch": 1.7111509180474698, "grad_norm": 0.11085697263479233, "learning_rate": 2.322983644777533e-05, "loss": 0.3231, "step": 10508 }, { "epoch": 1.7113137646053005, "grad_norm": 0.13454434275627136, "learning_rate": 2.322511134769551e-05, "loss": 0.3716, "step": 10509 }, { "epoch": 1.7114766111631314, "grad_norm": 0.12121172249317169, "learning_rate": 2.3220386311339507e-05, "loss": 0.3284, "step": 10510 }, { "epoch": 1.7116394577209624, "grad_norm": 0.10275658965110779, "learning_rate": 2.3215661338876966e-05, "loss": 0.3383, "step": 10511 }, { "epoch": 1.7118023042787933, "grad_norm": 0.12484204024076462, "learning_rate": 2.3210936430477533e-05, "loss": 0.2974, "step": 10512 }, { "epoch": 1.7119651508366243, "grad_norm": 0.13891440629959106, "learning_rate": 2.3206211586310838e-05, "loss": 0.3211, "step": 10513 }, { "epoch": 1.7121279973944552, "grad_norm": 0.09248799830675125, "learning_rate": 2.320148680654653e-05, "loss": 0.3444, "step": 10514 }, { "epoch": 1.712290843952286, "grad_norm": 0.10929054766893387, "learning_rate": 2.3196762091354225e-05, "loss": 0.2972, "step": 10515 }, { "epoch": 1.7124536905101169, "grad_norm": 0.10852331668138504, "learning_rate": 2.3192037440903565e-05, "loss": 0.3191, "step": 10516 }, { "epoch": 1.7126165370679476, "grad_norm": 0.08219637721776962, "learning_rate": 2.318731285536418e-05, "loss": 0.3096, "step": 10517 }, { "epoch": 1.7127793836257785, "grad_norm": 0.10056248307228088, "learning_rate": 2.31825883349057e-05, "loss": 0.3241, "step": 10518 }, { "epoch": 1.7129422301836095, "grad_norm": 0.11835721135139465, "learning_rate": 2.317786387969773e-05, "loss": 0.2982, "step": 10519 }, { "epoch": 1.7131050767414404, "grad_norm": 0.11168183386325836, "learning_rate": 2.3173139489909915e-05, "loss": 0.3249, "step": 10520 }, { "epoch": 1.7132679232992714, "grad_norm": 0.08238008618354797, "learning_rate": 2.3168415165711867e-05, "loss": 0.3472, "step": 10521 }, { "epoch": 1.7134307698571023, "grad_norm": 0.1103520467877388, "learning_rate": 2.3163690907273194e-05, "loss": 0.3136, "step": 10522 }, { "epoch": 1.713593616414933, "grad_norm": 0.08458509296178818, "learning_rate": 2.3158966714763528e-05, "loss": 0.335, "step": 10523 }, { "epoch": 1.713756462972764, "grad_norm": 0.06900662183761597, "learning_rate": 2.3154242588352474e-05, "loss": 0.3108, "step": 10524 }, { "epoch": 1.7139193095305947, "grad_norm": 0.07153231650590897, "learning_rate": 2.314951852820964e-05, "loss": 0.3336, "step": 10525 }, { "epoch": 1.7140821560884256, "grad_norm": 0.10539258271455765, "learning_rate": 2.314479453450462e-05, "loss": 0.3177, "step": 10526 }, { "epoch": 1.7142450026462566, "grad_norm": 0.10487696528434753, "learning_rate": 2.3140070607407052e-05, "loss": 0.3061, "step": 10527 }, { "epoch": 1.7144078492040875, "grad_norm": 0.12740370631217957, "learning_rate": 2.313534674708652e-05, "loss": 0.3048, "step": 10528 }, { "epoch": 1.7145706957619185, "grad_norm": 0.11359342932701111, "learning_rate": 2.313062295371262e-05, "loss": 0.2861, "step": 10529 }, { "epoch": 1.7147335423197492, "grad_norm": 0.14581604301929474, "learning_rate": 2.3125899227454956e-05, "loss": 0.3258, "step": 10530 }, { "epoch": 1.7148963888775801, "grad_norm": 0.1265638768672943, "learning_rate": 2.312117556848313e-05, "loss": 0.3177, "step": 10531 }, { "epoch": 1.7150592354354108, "grad_norm": 0.09083937108516693, "learning_rate": 2.3116451976966728e-05, "loss": 0.3077, "step": 10532 }, { "epoch": 1.7152220819932418, "grad_norm": 0.09984508901834488, "learning_rate": 2.311172845307534e-05, "loss": 0.3139, "step": 10533 }, { "epoch": 1.7153849285510727, "grad_norm": 0.08928725868463516, "learning_rate": 2.310700499697856e-05, "loss": 0.3055, "step": 10534 }, { "epoch": 1.7155477751089037, "grad_norm": 0.13094909489154816, "learning_rate": 2.3102281608845977e-05, "loss": 0.309, "step": 10535 }, { "epoch": 1.7157106216667346, "grad_norm": 0.12038739770650864, "learning_rate": 2.309755828884717e-05, "loss": 0.3268, "step": 10536 }, { "epoch": 1.7158734682245655, "grad_norm": 0.12285733968019485, "learning_rate": 2.3092835037151708e-05, "loss": 0.2744, "step": 10537 }, { "epoch": 1.7160363147823963, "grad_norm": 0.12037305533885956, "learning_rate": 2.308811185392919e-05, "loss": 0.3106, "step": 10538 }, { "epoch": 1.7161991613402272, "grad_norm": 0.12199179083108902, "learning_rate": 2.3083388739349192e-05, "loss": 0.3269, "step": 10539 }, { "epoch": 1.716362007898058, "grad_norm": 0.13259653747081757, "learning_rate": 2.3078665693581278e-05, "loss": 0.3437, "step": 10540 }, { "epoch": 1.7165248544558889, "grad_norm": 0.13721683621406555, "learning_rate": 2.3073942716795012e-05, "loss": 0.3168, "step": 10541 }, { "epoch": 1.7166877010137198, "grad_norm": 0.13723769783973694, "learning_rate": 2.306921980915998e-05, "loss": 0.3202, "step": 10542 }, { "epoch": 1.7168505475715508, "grad_norm": 0.10482954978942871, "learning_rate": 2.3064496970845744e-05, "loss": 0.2883, "step": 10543 }, { "epoch": 1.7170133941293817, "grad_norm": 0.1290288120508194, "learning_rate": 2.3059774202021855e-05, "loss": 0.3118, "step": 10544 }, { "epoch": 1.7171762406872126, "grad_norm": 0.10251753777265549, "learning_rate": 2.3055051502857895e-05, "loss": 0.3268, "step": 10545 }, { "epoch": 1.7173390872450434, "grad_norm": 0.06429962813854218, "learning_rate": 2.3050328873523417e-05, "loss": 0.3444, "step": 10546 }, { "epoch": 1.717501933802874, "grad_norm": 0.0991022139787674, "learning_rate": 2.304560631418797e-05, "loss": 0.3003, "step": 10547 }, { "epoch": 1.717664780360705, "grad_norm": 0.13249041140079498, "learning_rate": 2.30408838250211e-05, "loss": 0.3376, "step": 10548 }, { "epoch": 1.717827626918536, "grad_norm": 0.0927204042673111, "learning_rate": 2.3036161406192385e-05, "loss": 0.3302, "step": 10549 }, { "epoch": 1.717990473476367, "grad_norm": 0.057792242616415024, "learning_rate": 2.3031439057871352e-05, "loss": 0.2953, "step": 10550 }, { "epoch": 1.7181533200341979, "grad_norm": 0.11112391203641891, "learning_rate": 2.3026716780227562e-05, "loss": 0.2768, "step": 10551 }, { "epoch": 1.7183161665920288, "grad_norm": 0.16008877754211426, "learning_rate": 2.3021994573430546e-05, "loss": 0.3464, "step": 10552 }, { "epoch": 1.7184790131498595, "grad_norm": 0.10571806132793427, "learning_rate": 2.301727243764985e-05, "loss": 0.3052, "step": 10553 }, { "epoch": 1.7186418597076905, "grad_norm": 0.13432085514068604, "learning_rate": 2.3012550373055018e-05, "loss": 0.3128, "step": 10554 }, { "epoch": 1.7188047062655212, "grad_norm": 0.1108257994055748, "learning_rate": 2.300782837981557e-05, "loss": 0.3193, "step": 10555 }, { "epoch": 1.7189675528233521, "grad_norm": 0.08087730407714844, "learning_rate": 2.3003106458101066e-05, "loss": 0.332, "step": 10556 }, { "epoch": 1.719130399381183, "grad_norm": 0.13236908614635468, "learning_rate": 2.2998384608081022e-05, "loss": 0.2731, "step": 10557 }, { "epoch": 1.719293245939014, "grad_norm": 0.11325313150882721, "learning_rate": 2.2993662829924968e-05, "loss": 0.3281, "step": 10558 }, { "epoch": 1.719456092496845, "grad_norm": 0.1437460333108902, "learning_rate": 2.298894112380242e-05, "loss": 0.3322, "step": 10559 }, { "epoch": 1.7196189390546759, "grad_norm": 0.10252202302217484, "learning_rate": 2.298421948988292e-05, "loss": 0.2897, "step": 10560 }, { "epoch": 1.7197817856125066, "grad_norm": 0.09775181114673615, "learning_rate": 2.2979497928335978e-05, "loss": 0.3748, "step": 10561 }, { "epoch": 1.7199446321703376, "grad_norm": 0.098100446164608, "learning_rate": 2.2974776439331104e-05, "loss": 0.3255, "step": 10562 }, { "epoch": 1.7201074787281683, "grad_norm": 0.08853304386138916, "learning_rate": 2.297005502303784e-05, "loss": 0.334, "step": 10563 }, { "epoch": 1.7202703252859992, "grad_norm": 0.12708577513694763, "learning_rate": 2.2965333679625676e-05, "loss": 0.3604, "step": 10564 }, { "epoch": 1.7204331718438302, "grad_norm": 0.07085821777582169, "learning_rate": 2.296061240926413e-05, "loss": 0.3491, "step": 10565 }, { "epoch": 1.720596018401661, "grad_norm": 0.1191069632768631, "learning_rate": 2.29558912121227e-05, "loss": 0.3028, "step": 10566 }, { "epoch": 1.720758864959492, "grad_norm": 0.13964903354644775, "learning_rate": 2.2951170088370906e-05, "loss": 0.3377, "step": 10567 }, { "epoch": 1.7209217115173228, "grad_norm": 0.11096081137657166, "learning_rate": 2.294644903817825e-05, "loss": 0.3214, "step": 10568 }, { "epoch": 1.7210845580751537, "grad_norm": 0.0872291699051857, "learning_rate": 2.294172806171422e-05, "loss": 0.3286, "step": 10569 }, { "epoch": 1.7212474046329844, "grad_norm": 0.18410857021808624, "learning_rate": 2.2937007159148316e-05, "loss": 0.3316, "step": 10570 }, { "epoch": 1.7214102511908154, "grad_norm": 0.13074485957622528, "learning_rate": 2.2932286330650037e-05, "loss": 0.3099, "step": 10571 }, { "epoch": 1.7215730977486463, "grad_norm": 0.10907944291830063, "learning_rate": 2.292756557638888e-05, "loss": 0.3263, "step": 10572 }, { "epoch": 1.7217359443064773, "grad_norm": 0.12907534837722778, "learning_rate": 2.2922844896534322e-05, "loss": 0.3146, "step": 10573 }, { "epoch": 1.7218987908643082, "grad_norm": 0.13012517988681793, "learning_rate": 2.2918124291255856e-05, "loss": 0.2756, "step": 10574 }, { "epoch": 1.7220616374221391, "grad_norm": 0.14906173944473267, "learning_rate": 2.291340376072297e-05, "loss": 0.3072, "step": 10575 }, { "epoch": 1.7222244839799699, "grad_norm": 0.12666605412960052, "learning_rate": 2.290868330510514e-05, "loss": 0.3241, "step": 10576 }, { "epoch": 1.7223873305378008, "grad_norm": 0.09655191004276276, "learning_rate": 2.2903962924571836e-05, "loss": 0.3387, "step": 10577 }, { "epoch": 1.7225501770956315, "grad_norm": 0.1508810967206955, "learning_rate": 2.2899242619292555e-05, "loss": 0.3185, "step": 10578 }, { "epoch": 1.7227130236534625, "grad_norm": 0.13218583166599274, "learning_rate": 2.2894522389436756e-05, "loss": 0.3178, "step": 10579 }, { "epoch": 1.7228758702112934, "grad_norm": 0.08838911354541779, "learning_rate": 2.288980223517391e-05, "loss": 0.3104, "step": 10580 }, { "epoch": 1.7230387167691243, "grad_norm": 0.1030033528804779, "learning_rate": 2.2885082156673478e-05, "loss": 0.3152, "step": 10581 }, { "epoch": 1.7232015633269553, "grad_norm": 0.10658097267150879, "learning_rate": 2.288036215410494e-05, "loss": 0.3076, "step": 10582 }, { "epoch": 1.723364409884786, "grad_norm": 0.1294788122177124, "learning_rate": 2.2875642227637756e-05, "loss": 0.3512, "step": 10583 }, { "epoch": 1.723527256442617, "grad_norm": 0.09882286936044693, "learning_rate": 2.287092237744137e-05, "loss": 0.357, "step": 10584 }, { "epoch": 1.7236901030004477, "grad_norm": 0.08723773807287216, "learning_rate": 2.2866202603685257e-05, "loss": 0.3212, "step": 10585 }, { "epoch": 1.7238529495582786, "grad_norm": 0.09890055656433105, "learning_rate": 2.2861482906538867e-05, "loss": 0.3124, "step": 10586 }, { "epoch": 1.7240157961161096, "grad_norm": 0.1255119889974594, "learning_rate": 2.285676328617164e-05, "loss": 0.3316, "step": 10587 }, { "epoch": 1.7241786426739405, "grad_norm": 0.12729643285274506, "learning_rate": 2.2852043742753032e-05, "loss": 0.3349, "step": 10588 }, { "epoch": 1.7243414892317714, "grad_norm": 0.12511007487773895, "learning_rate": 2.2847324276452495e-05, "loss": 0.3161, "step": 10589 }, { "epoch": 1.7245043357896024, "grad_norm": 0.10749948024749756, "learning_rate": 2.2842604887439463e-05, "loss": 0.3377, "step": 10590 }, { "epoch": 1.724667182347433, "grad_norm": 0.0764843225479126, "learning_rate": 2.2837885575883382e-05, "loss": 0.3459, "step": 10591 }, { "epoch": 1.724830028905264, "grad_norm": 0.13616274297237396, "learning_rate": 2.2833166341953673e-05, "loss": 0.3649, "step": 10592 }, { "epoch": 1.7249928754630948, "grad_norm": 0.094505675137043, "learning_rate": 2.2828447185819803e-05, "loss": 0.3034, "step": 10593 }, { "epoch": 1.7251557220209257, "grad_norm": 0.0704546719789505, "learning_rate": 2.2823728107651175e-05, "loss": 0.3208, "step": 10594 }, { "epoch": 1.7253185685787567, "grad_norm": 0.11403783410787582, "learning_rate": 2.2819009107617224e-05, "loss": 0.2947, "step": 10595 }, { "epoch": 1.7254814151365876, "grad_norm": 0.11578428000211716, "learning_rate": 2.281429018588739e-05, "loss": 0.3069, "step": 10596 }, { "epoch": 1.7256442616944185, "grad_norm": 0.16353870928287506, "learning_rate": 2.280957134263109e-05, "loss": 0.2961, "step": 10597 }, { "epoch": 1.7258071082522495, "grad_norm": 0.11403097212314606, "learning_rate": 2.2804852578017737e-05, "loss": 0.3121, "step": 10598 }, { "epoch": 1.7259699548100802, "grad_norm": 0.11948162317276001, "learning_rate": 2.2800133892216747e-05, "loss": 0.3463, "step": 10599 }, { "epoch": 1.7261328013679111, "grad_norm": 0.12916810810565948, "learning_rate": 2.2795415285397546e-05, "loss": 0.3513, "step": 10600 }, { "epoch": 1.7262956479257419, "grad_norm": 0.1319837123155594, "learning_rate": 2.2790696757729546e-05, "loss": 0.3629, "step": 10601 }, { "epoch": 1.7264584944835728, "grad_norm": 0.12328082323074341, "learning_rate": 2.278597830938215e-05, "loss": 0.2691, "step": 10602 }, { "epoch": 1.7266213410414037, "grad_norm": 0.049728237092494965, "learning_rate": 2.2781259940524757e-05, "loss": 0.3548, "step": 10603 }, { "epoch": 1.7267841875992347, "grad_norm": 0.13874070346355438, "learning_rate": 2.2776541651326788e-05, "loss": 0.3349, "step": 10604 }, { "epoch": 1.7269470341570656, "grad_norm": 0.16363009810447693, "learning_rate": 2.277182344195764e-05, "loss": 0.3319, "step": 10605 }, { "epoch": 1.7271098807148964, "grad_norm": 0.09991519153118134, "learning_rate": 2.276710531258669e-05, "loss": 0.3449, "step": 10606 }, { "epoch": 1.7272727272727273, "grad_norm": 0.0786910280585289, "learning_rate": 2.2762387263383362e-05, "loss": 0.3206, "step": 10607 }, { "epoch": 1.727435573830558, "grad_norm": 0.08687937259674072, "learning_rate": 2.2757669294517028e-05, "loss": 0.3301, "step": 10608 }, { "epoch": 1.727598420388389, "grad_norm": 0.13008207082748413, "learning_rate": 2.2752951406157092e-05, "loss": 0.3424, "step": 10609 }, { "epoch": 1.72776126694622, "grad_norm": 0.11183959245681763, "learning_rate": 2.2748233598472925e-05, "loss": 0.3174, "step": 10610 }, { "epoch": 1.7279241135040508, "grad_norm": 0.10446704924106598, "learning_rate": 2.2743515871633923e-05, "loss": 0.3141, "step": 10611 }, { "epoch": 1.7280869600618818, "grad_norm": 0.06910203397274017, "learning_rate": 2.2738798225809464e-05, "loss": 0.3237, "step": 10612 }, { "epoch": 1.7282498066197127, "grad_norm": 0.13000528514385223, "learning_rate": 2.2734080661168913e-05, "loss": 0.2904, "step": 10613 }, { "epoch": 1.7284126531775434, "grad_norm": 0.10437105596065521, "learning_rate": 2.2729363177881664e-05, "loss": 0.2882, "step": 10614 }, { "epoch": 1.7285754997353744, "grad_norm": 0.15720419585704803, "learning_rate": 2.2724645776117085e-05, "loss": 0.3915, "step": 10615 }, { "epoch": 1.728738346293205, "grad_norm": 0.1087966337800026, "learning_rate": 2.2719928456044537e-05, "loss": 0.3235, "step": 10616 }, { "epoch": 1.728901192851036, "grad_norm": 0.12312992662191391, "learning_rate": 2.2715211217833376e-05, "loss": 0.2879, "step": 10617 }, { "epoch": 1.729064039408867, "grad_norm": 0.144196555018425, "learning_rate": 2.2710494061652993e-05, "loss": 0.3332, "step": 10618 }, { "epoch": 1.729226885966698, "grad_norm": 0.11020752787590027, "learning_rate": 2.270577698767273e-05, "loss": 0.3314, "step": 10619 }, { "epoch": 1.7293897325245289, "grad_norm": 0.12627921998500824, "learning_rate": 2.2701059996061946e-05, "loss": 0.33, "step": 10620 }, { "epoch": 1.7295525790823596, "grad_norm": 0.07985641807317734, "learning_rate": 2.2696343086989985e-05, "loss": 0.2961, "step": 10621 }, { "epoch": 1.7297154256401905, "grad_norm": 0.1279066950082779, "learning_rate": 2.2691626260626223e-05, "loss": 0.3109, "step": 10622 }, { "epoch": 1.7298782721980213, "grad_norm": 0.11317594349384308, "learning_rate": 2.2686909517139993e-05, "loss": 0.3424, "step": 10623 }, { "epoch": 1.7300411187558522, "grad_norm": 0.13382945954799652, "learning_rate": 2.2682192856700628e-05, "loss": 0.3069, "step": 10624 }, { "epoch": 1.7302039653136831, "grad_norm": 0.10571568459272385, "learning_rate": 2.2677476279477496e-05, "loss": 0.3284, "step": 10625 }, { "epoch": 1.730366811871514, "grad_norm": 0.07744442671537399, "learning_rate": 2.2672759785639923e-05, "loss": 0.2981, "step": 10626 }, { "epoch": 1.730529658429345, "grad_norm": 0.18175660073757172, "learning_rate": 2.2668043375357245e-05, "loss": 0.3027, "step": 10627 }, { "epoch": 1.730692504987176, "grad_norm": 0.1115645319223404, "learning_rate": 2.2663327048798792e-05, "loss": 0.3347, "step": 10628 }, { "epoch": 1.7308553515450067, "grad_norm": 0.1506655514240265, "learning_rate": 2.2658610806133905e-05, "loss": 0.3128, "step": 10629 }, { "epoch": 1.7310181981028376, "grad_norm": 0.11096744239330292, "learning_rate": 2.2653894647531904e-05, "loss": 0.3118, "step": 10630 }, { "epoch": 1.7311810446606684, "grad_norm": 0.07820044457912445, "learning_rate": 2.2649178573162116e-05, "loss": 0.3029, "step": 10631 }, { "epoch": 1.7313438912184993, "grad_norm": 0.1156509518623352, "learning_rate": 2.2644462583193852e-05, "loss": 0.2813, "step": 10632 }, { "epoch": 1.7315067377763302, "grad_norm": 0.10600164532661438, "learning_rate": 2.2639746677796447e-05, "loss": 0.3172, "step": 10633 }, { "epoch": 1.7316695843341612, "grad_norm": 0.17871972918510437, "learning_rate": 2.2635030857139207e-05, "loss": 0.3985, "step": 10634 }, { "epoch": 1.7318324308919921, "grad_norm": 0.1043974757194519, "learning_rate": 2.2630315121391436e-05, "loss": 0.3469, "step": 10635 }, { "epoch": 1.731995277449823, "grad_norm": 0.14906515181064606, "learning_rate": 2.2625599470722463e-05, "loss": 0.3245, "step": 10636 }, { "epoch": 1.7321581240076538, "grad_norm": 0.09127679467201233, "learning_rate": 2.2620883905301583e-05, "loss": 0.3027, "step": 10637 }, { "epoch": 1.7323209705654845, "grad_norm": 0.1257123053073883, "learning_rate": 2.2616168425298097e-05, "loss": 0.3433, "step": 10638 }, { "epoch": 1.7324838171233155, "grad_norm": 0.09184424579143524, "learning_rate": 2.2611453030881297e-05, "loss": 0.3531, "step": 10639 }, { "epoch": 1.7326466636811464, "grad_norm": 0.08201685547828674, "learning_rate": 2.26067377222205e-05, "loss": 0.3171, "step": 10640 }, { "epoch": 1.7328095102389773, "grad_norm": 0.09267237782478333, "learning_rate": 2.260202249948499e-05, "loss": 0.3077, "step": 10641 }, { "epoch": 1.7329723567968083, "grad_norm": 0.08528792858123779, "learning_rate": 2.2597307362844056e-05, "loss": 0.2882, "step": 10642 }, { "epoch": 1.7331352033546392, "grad_norm": 0.20043013989925385, "learning_rate": 2.2592592312466977e-05, "loss": 0.3236, "step": 10643 }, { "epoch": 1.73329804991247, "grad_norm": 0.12365415692329407, "learning_rate": 2.2587877348523054e-05, "loss": 0.3054, "step": 10644 }, { "epoch": 1.7334608964703009, "grad_norm": 0.1468672901391983, "learning_rate": 2.2583162471181556e-05, "loss": 0.321, "step": 10645 }, { "epoch": 1.7336237430281316, "grad_norm": 0.10390173643827438, "learning_rate": 2.257844768061177e-05, "loss": 0.3301, "step": 10646 }, { "epoch": 1.7337865895859625, "grad_norm": 0.07141894102096558, "learning_rate": 2.257373297698297e-05, "loss": 0.3382, "step": 10647 }, { "epoch": 1.7339494361437935, "grad_norm": 0.1096360832452774, "learning_rate": 2.256901836046442e-05, "loss": 0.3036, "step": 10648 }, { "epoch": 1.7341122827016244, "grad_norm": 0.09266672283411026, "learning_rate": 2.2564303831225403e-05, "loss": 0.3068, "step": 10649 }, { "epoch": 1.7342751292594554, "grad_norm": 0.11201921105384827, "learning_rate": 2.2559589389435168e-05, "loss": 0.3371, "step": 10650 }, { "epoch": 1.7344379758172863, "grad_norm": 0.13709184527397156, "learning_rate": 2.2554875035262986e-05, "loss": 0.2803, "step": 10651 }, { "epoch": 1.734600822375117, "grad_norm": 0.10381944477558136, "learning_rate": 2.2550160768878122e-05, "loss": 0.3499, "step": 10652 }, { "epoch": 1.734763668932948, "grad_norm": 0.15164430439472198, "learning_rate": 2.2545446590449827e-05, "loss": 0.3038, "step": 10653 }, { "epoch": 1.7349265154907787, "grad_norm": 0.1216602697968483, "learning_rate": 2.254073250014734e-05, "loss": 0.3099, "step": 10654 }, { "epoch": 1.7350893620486096, "grad_norm": 0.09514116495847702, "learning_rate": 2.2536018498139936e-05, "loss": 0.3478, "step": 10655 }, { "epoch": 1.7352522086064406, "grad_norm": 0.13307522237300873, "learning_rate": 2.253130458459685e-05, "loss": 0.311, "step": 10656 }, { "epoch": 1.7354150551642715, "grad_norm": 0.13373489677906036, "learning_rate": 2.2526590759687314e-05, "loss": 0.3318, "step": 10657 }, { "epoch": 1.7355779017221025, "grad_norm": 0.09693607687950134, "learning_rate": 2.2521877023580597e-05, "loss": 0.3402, "step": 10658 }, { "epoch": 1.7357407482799332, "grad_norm": 0.14827780425548553, "learning_rate": 2.251716337644591e-05, "loss": 0.3065, "step": 10659 }, { "epoch": 1.7359035948377641, "grad_norm": 0.1439925730228424, "learning_rate": 2.2512449818452506e-05, "loss": 0.326, "step": 10660 }, { "epoch": 1.7360664413955949, "grad_norm": 0.12136055529117584, "learning_rate": 2.2507736349769594e-05, "loss": 0.3094, "step": 10661 }, { "epoch": 1.7362292879534258, "grad_norm": 0.09351269900798798, "learning_rate": 2.250302297056642e-05, "loss": 0.3238, "step": 10662 }, { "epoch": 1.7363921345112567, "grad_norm": 0.18650346994400024, "learning_rate": 2.24983096810122e-05, "loss": 0.3206, "step": 10663 }, { "epoch": 1.7365549810690877, "grad_norm": 0.08915595710277557, "learning_rate": 2.2493596481276155e-05, "loss": 0.3097, "step": 10664 }, { "epoch": 1.7367178276269186, "grad_norm": 0.08525639772415161, "learning_rate": 2.2488883371527513e-05, "loss": 0.315, "step": 10665 }, { "epoch": 1.7368806741847496, "grad_norm": 0.0984746664762497, "learning_rate": 2.2484170351935483e-05, "loss": 0.3531, "step": 10666 }, { "epoch": 1.7370435207425803, "grad_norm": 0.1682387888431549, "learning_rate": 2.247945742266927e-05, "loss": 0.2998, "step": 10667 }, { "epoch": 1.7372063673004112, "grad_norm": 0.12026336789131165, "learning_rate": 2.2474744583898083e-05, "loss": 0.3112, "step": 10668 }, { "epoch": 1.737369213858242, "grad_norm": 0.10937105119228363, "learning_rate": 2.2470031835791138e-05, "loss": 0.3357, "step": 10669 }, { "epoch": 1.7375320604160729, "grad_norm": 0.12679462134838104, "learning_rate": 2.246531917851763e-05, "loss": 0.3213, "step": 10670 }, { "epoch": 1.7376949069739038, "grad_norm": 0.10392570495605469, "learning_rate": 2.2460606612246757e-05, "loss": 0.3331, "step": 10671 }, { "epoch": 1.7378577535317348, "grad_norm": 0.0730128064751625, "learning_rate": 2.2455894137147705e-05, "loss": 0.3526, "step": 10672 }, { "epoch": 1.7380206000895657, "grad_norm": 0.09988991916179657, "learning_rate": 2.2451181753389688e-05, "loss": 0.2753, "step": 10673 }, { "epoch": 1.7381834466473967, "grad_norm": 0.08530229330062866, "learning_rate": 2.2446469461141885e-05, "loss": 0.2744, "step": 10674 }, { "epoch": 1.7383462932052274, "grad_norm": 0.10740676522254944, "learning_rate": 2.2441757260573462e-05, "loss": 0.3421, "step": 10675 }, { "epoch": 1.738509139763058, "grad_norm": 0.0941639170050621, "learning_rate": 2.2437045151853633e-05, "loss": 0.3423, "step": 10676 }, { "epoch": 1.738671986320889, "grad_norm": 0.15242135524749756, "learning_rate": 2.2432333135151563e-05, "loss": 0.3182, "step": 10677 }, { "epoch": 1.73883483287872, "grad_norm": 0.12009242177009583, "learning_rate": 2.2427621210636422e-05, "loss": 0.3298, "step": 10678 }, { "epoch": 1.738997679436551, "grad_norm": 0.07426817715167999, "learning_rate": 2.242290937847738e-05, "loss": 0.3165, "step": 10679 }, { "epoch": 1.7391605259943819, "grad_norm": 0.12025131285190582, "learning_rate": 2.2418197638843623e-05, "loss": 0.3032, "step": 10680 }, { "epoch": 1.7393233725522128, "grad_norm": 0.08174902945756912, "learning_rate": 2.24134859919043e-05, "loss": 0.2928, "step": 10681 }, { "epoch": 1.7394862191100435, "grad_norm": 0.1657835990190506, "learning_rate": 2.2408774437828586e-05, "loss": 0.328, "step": 10682 }, { "epoch": 1.7396490656678745, "grad_norm": 0.06562911719083786, "learning_rate": 2.240406297678562e-05, "loss": 0.2979, "step": 10683 }, { "epoch": 1.7398119122257052, "grad_norm": 0.08563359826803207, "learning_rate": 2.2399351608944583e-05, "loss": 0.3215, "step": 10684 }, { "epoch": 1.7399747587835361, "grad_norm": 0.1074756532907486, "learning_rate": 2.2394640334474605e-05, "loss": 0.3041, "step": 10685 }, { "epoch": 1.740137605341367, "grad_norm": 0.12830109894275665, "learning_rate": 2.238992915354485e-05, "loss": 0.2895, "step": 10686 }, { "epoch": 1.740300451899198, "grad_norm": 0.1328006088733673, "learning_rate": 2.238521806632446e-05, "loss": 0.2857, "step": 10687 }, { "epoch": 1.740463298457029, "grad_norm": 0.0925522968173027, "learning_rate": 2.2380507072982574e-05, "loss": 0.3257, "step": 10688 }, { "epoch": 1.74062614501486, "grad_norm": 0.11598984152078629, "learning_rate": 2.2375796173688333e-05, "loss": 0.3053, "step": 10689 }, { "epoch": 1.7407889915726906, "grad_norm": 0.12793447077274323, "learning_rate": 2.2371085368610865e-05, "loss": 0.3387, "step": 10690 }, { "epoch": 1.7409518381305216, "grad_norm": 0.10140075534582138, "learning_rate": 2.236637465791932e-05, "loss": 0.3132, "step": 10691 }, { "epoch": 1.7411146846883523, "grad_norm": 0.090640127658844, "learning_rate": 2.2361664041782815e-05, "loss": 0.3224, "step": 10692 }, { "epoch": 1.7412775312461832, "grad_norm": 0.12724292278289795, "learning_rate": 2.235695352037047e-05, "loss": 0.2988, "step": 10693 }, { "epoch": 1.7414403778040142, "grad_norm": 0.10434878617525101, "learning_rate": 2.2352243093851407e-05, "loss": 0.3114, "step": 10694 }, { "epoch": 1.7416032243618451, "grad_norm": 0.08231931179761887, "learning_rate": 2.234753276239476e-05, "loss": 0.3154, "step": 10695 }, { "epoch": 1.741766070919676, "grad_norm": 0.09238824993371964, "learning_rate": 2.2342822526169636e-05, "loss": 0.3199, "step": 10696 }, { "epoch": 1.7419289174775068, "grad_norm": 0.14687734842300415, "learning_rate": 2.233811238534513e-05, "loss": 0.3125, "step": 10697 }, { "epoch": 1.7420917640353377, "grad_norm": 0.11256935447454453, "learning_rate": 2.2333402340090377e-05, "loss": 0.287, "step": 10698 }, { "epoch": 1.7422546105931684, "grad_norm": 0.08030503988265991, "learning_rate": 2.2328692390574464e-05, "loss": 0.3028, "step": 10699 }, { "epoch": 1.7424174571509994, "grad_norm": 0.09389014542102814, "learning_rate": 2.2323982536966504e-05, "loss": 0.2712, "step": 10700 }, { "epoch": 1.7425803037088303, "grad_norm": 0.10254257917404175, "learning_rate": 2.2319272779435576e-05, "loss": 0.3118, "step": 10701 }, { "epoch": 1.7427431502666613, "grad_norm": 0.14346754550933838, "learning_rate": 2.2314563118150796e-05, "loss": 0.3416, "step": 10702 }, { "epoch": 1.7429059968244922, "grad_norm": 0.09408800303936005, "learning_rate": 2.230985355328125e-05, "loss": 0.3272, "step": 10703 }, { "epoch": 1.7430688433823232, "grad_norm": 0.07928569614887238, "learning_rate": 2.2305144084996014e-05, "loss": 0.3428, "step": 10704 }, { "epoch": 1.7432316899401539, "grad_norm": 0.09999124705791473, "learning_rate": 2.2300434713464177e-05, "loss": 0.3114, "step": 10705 }, { "epoch": 1.7433945364979848, "grad_norm": 0.09920404851436615, "learning_rate": 2.2295725438854823e-05, "loss": 0.3051, "step": 10706 }, { "epoch": 1.7435573830558155, "grad_norm": 0.10395261645317078, "learning_rate": 2.2291016261337035e-05, "loss": 0.3256, "step": 10707 }, { "epoch": 1.7437202296136465, "grad_norm": 0.10490046441555023, "learning_rate": 2.2286307181079872e-05, "loss": 0.3047, "step": 10708 }, { "epoch": 1.7438830761714774, "grad_norm": 0.0838976576924324, "learning_rate": 2.2281598198252413e-05, "loss": 0.3591, "step": 10709 }, { "epoch": 1.7440459227293084, "grad_norm": 0.0882437527179718, "learning_rate": 2.2276889313023726e-05, "loss": 0.3112, "step": 10710 }, { "epoch": 1.7442087692871393, "grad_norm": 0.0853036642074585, "learning_rate": 2.2272180525562875e-05, "loss": 0.3227, "step": 10711 }, { "epoch": 1.74437161584497, "grad_norm": 0.10280521214008331, "learning_rate": 2.2267471836038904e-05, "loss": 0.3277, "step": 10712 }, { "epoch": 1.744534462402801, "grad_norm": 0.1035228967666626, "learning_rate": 2.2262763244620894e-05, "loss": 0.336, "step": 10713 }, { "epoch": 1.7446973089606317, "grad_norm": 0.10254094004631042, "learning_rate": 2.225805475147788e-05, "loss": 0.3085, "step": 10714 }, { "epoch": 1.7448601555184626, "grad_norm": 0.13340497016906738, "learning_rate": 2.2253346356778905e-05, "loss": 0.2836, "step": 10715 }, { "epoch": 1.7450230020762936, "grad_norm": 0.12592363357543945, "learning_rate": 2.2248638060693038e-05, "loss": 0.3487, "step": 10716 }, { "epoch": 1.7451858486341245, "grad_norm": 0.09059519320726395, "learning_rate": 2.224392986338931e-05, "loss": 0.3291, "step": 10717 }, { "epoch": 1.7453486951919555, "grad_norm": 0.11776606738567352, "learning_rate": 2.2239221765036752e-05, "loss": 0.3036, "step": 10718 }, { "epoch": 1.7455115417497864, "grad_norm": 0.10961654037237167, "learning_rate": 2.2234513765804396e-05, "loss": 0.2933, "step": 10719 }, { "epoch": 1.7456743883076171, "grad_norm": 0.08242981135845184, "learning_rate": 2.2229805865861296e-05, "loss": 0.3208, "step": 10720 }, { "epoch": 1.745837234865448, "grad_norm": 0.0950307548046112, "learning_rate": 2.222509806537646e-05, "loss": 0.3, "step": 10721 }, { "epoch": 1.7460000814232788, "grad_norm": 0.14935126900672913, "learning_rate": 2.2220390364518912e-05, "loss": 0.2885, "step": 10722 }, { "epoch": 1.7461629279811097, "grad_norm": 0.12242524325847626, "learning_rate": 2.221568276345768e-05, "loss": 0.3591, "step": 10723 }, { "epoch": 1.7463257745389407, "grad_norm": 0.09569031745195389, "learning_rate": 2.2210975262361784e-05, "loss": 0.3026, "step": 10724 }, { "epoch": 1.7464886210967716, "grad_norm": 0.08163300156593323, "learning_rate": 2.2206267861400226e-05, "loss": 0.3431, "step": 10725 }, { "epoch": 1.7466514676546026, "grad_norm": 0.11687366664409637, "learning_rate": 2.220156056074202e-05, "loss": 0.2852, "step": 10726 }, { "epoch": 1.7468143142124335, "grad_norm": 0.09610861539840698, "learning_rate": 2.2196853360556178e-05, "loss": 0.3073, "step": 10727 }, { "epoch": 1.7469771607702642, "grad_norm": 0.12916667759418488, "learning_rate": 2.21921462610117e-05, "loss": 0.2973, "step": 10728 }, { "epoch": 1.7471400073280952, "grad_norm": 0.14063704013824463, "learning_rate": 2.2187439262277586e-05, "loss": 0.3155, "step": 10729 }, { "epoch": 1.7473028538859259, "grad_norm": 0.10144629329442978, "learning_rate": 2.2182732364522822e-05, "loss": 0.3144, "step": 10730 }, { "epoch": 1.7474657004437568, "grad_norm": 0.08692005276679993, "learning_rate": 2.2178025567916413e-05, "loss": 0.321, "step": 10731 }, { "epoch": 1.7476285470015878, "grad_norm": 0.11807706207036972, "learning_rate": 2.2173318872627345e-05, "loss": 0.3373, "step": 10732 }, { "epoch": 1.7477913935594187, "grad_norm": 0.11842615157365799, "learning_rate": 2.2168612278824594e-05, "loss": 0.3281, "step": 10733 }, { "epoch": 1.7479542401172496, "grad_norm": 0.10666539520025253, "learning_rate": 2.2163905786677137e-05, "loss": 0.304, "step": 10734 }, { "epoch": 1.7481170866750804, "grad_norm": 0.1289316713809967, "learning_rate": 2.215919939635397e-05, "loss": 0.3077, "step": 10735 }, { "epoch": 1.7482799332329113, "grad_norm": 0.12116658687591553, "learning_rate": 2.2154493108024053e-05, "loss": 0.3071, "step": 10736 }, { "epoch": 1.748442779790742, "grad_norm": 0.07879728823900223, "learning_rate": 2.2149786921856352e-05, "loss": 0.2601, "step": 10737 }, { "epoch": 1.748605626348573, "grad_norm": 0.1149279773235321, "learning_rate": 2.214508083801985e-05, "loss": 0.3186, "step": 10738 }, { "epoch": 1.748768472906404, "grad_norm": 0.09250042587518692, "learning_rate": 2.2140374856683496e-05, "loss": 0.3543, "step": 10739 }, { "epoch": 1.7489313194642349, "grad_norm": 0.12124627828598022, "learning_rate": 2.2135668978016257e-05, "loss": 0.3179, "step": 10740 }, { "epoch": 1.7490941660220658, "grad_norm": 0.14018453657627106, "learning_rate": 2.213096320218707e-05, "loss": 0.2967, "step": 10741 }, { "epoch": 1.7492570125798967, "grad_norm": 0.1112324520945549, "learning_rate": 2.212625752936491e-05, "loss": 0.3067, "step": 10742 }, { "epoch": 1.7494198591377275, "grad_norm": 0.14533483982086182, "learning_rate": 2.212155195971871e-05, "loss": 0.3654, "step": 10743 }, { "epoch": 1.7495827056955584, "grad_norm": 0.13044898211956024, "learning_rate": 2.2116846493417424e-05, "loss": 0.3346, "step": 10744 }, { "epoch": 1.7497455522533891, "grad_norm": 0.1305169314146042, "learning_rate": 2.2112141130629984e-05, "loss": 0.3218, "step": 10745 }, { "epoch": 1.74990839881122, "grad_norm": 0.10100507736206055, "learning_rate": 2.210743587152533e-05, "loss": 0.357, "step": 10746 }, { "epoch": 1.750071245369051, "grad_norm": 0.0967487022280693, "learning_rate": 2.2102730716272395e-05, "loss": 0.3035, "step": 10747 }, { "epoch": 1.750234091926882, "grad_norm": 0.11313404887914658, "learning_rate": 2.20980256650401e-05, "loss": 0.2791, "step": 10748 }, { "epoch": 1.750396938484713, "grad_norm": 0.10596083849668503, "learning_rate": 2.209332071799739e-05, "loss": 0.3264, "step": 10749 }, { "epoch": 1.7505597850425436, "grad_norm": 0.11079125851392746, "learning_rate": 2.2088615875313173e-05, "loss": 0.3091, "step": 10750 }, { "epoch": 1.7507226316003746, "grad_norm": 0.0677114948630333, "learning_rate": 2.208391113715637e-05, "loss": 0.2968, "step": 10751 }, { "epoch": 1.7508854781582053, "grad_norm": 0.0867980495095253, "learning_rate": 2.2079206503695884e-05, "loss": 0.2934, "step": 10752 }, { "epoch": 1.7510483247160362, "grad_norm": 0.08824697881937027, "learning_rate": 2.2074501975100646e-05, "loss": 0.2877, "step": 10753 }, { "epoch": 1.7512111712738672, "grad_norm": 0.09874926507472992, "learning_rate": 2.206979755153955e-05, "loss": 0.3371, "step": 10754 }, { "epoch": 1.751374017831698, "grad_norm": 0.1665446162223816, "learning_rate": 2.2065093233181503e-05, "loss": 0.3091, "step": 10755 }, { "epoch": 1.751536864389529, "grad_norm": 0.15303194522857666, "learning_rate": 2.206038902019539e-05, "loss": 0.3361, "step": 10756 }, { "epoch": 1.75169971094736, "grad_norm": 0.11800681799650192, "learning_rate": 2.205568491275013e-05, "loss": 0.3058, "step": 10757 }, { "epoch": 1.7518625575051907, "grad_norm": 0.0866079330444336, "learning_rate": 2.2050980911014602e-05, "loss": 0.3126, "step": 10758 }, { "epoch": 1.7520254040630217, "grad_norm": 0.07493962347507477, "learning_rate": 2.2046277015157685e-05, "loss": 0.3376, "step": 10759 }, { "epoch": 1.7521882506208524, "grad_norm": 0.07715747505426407, "learning_rate": 2.2041573225348283e-05, "loss": 0.3411, "step": 10760 }, { "epoch": 1.7523510971786833, "grad_norm": 0.1037219986319542, "learning_rate": 2.2036869541755267e-05, "loss": 0.3516, "step": 10761 }, { "epoch": 1.7525139437365143, "grad_norm": 0.09785868972539902, "learning_rate": 2.2032165964547506e-05, "loss": 0.3602, "step": 10762 }, { "epoch": 1.7526767902943452, "grad_norm": 0.14017872512340546, "learning_rate": 2.202746249389388e-05, "loss": 0.3175, "step": 10763 }, { "epoch": 1.7528396368521761, "grad_norm": 0.18740564584732056, "learning_rate": 2.2022759129963257e-05, "loss": 0.3424, "step": 10764 }, { "epoch": 1.753002483410007, "grad_norm": 0.1145445927977562, "learning_rate": 2.2018055872924504e-05, "loss": 0.3284, "step": 10765 }, { "epoch": 1.7531653299678378, "grad_norm": 0.12153073400259018, "learning_rate": 2.2013352722946473e-05, "loss": 0.3718, "step": 10766 }, { "epoch": 1.7533281765256685, "grad_norm": 0.06843391805887222, "learning_rate": 2.200864968019803e-05, "loss": 0.2958, "step": 10767 }, { "epoch": 1.7534910230834995, "grad_norm": 0.08656556904315948, "learning_rate": 2.200394674484803e-05, "loss": 0.2563, "step": 10768 }, { "epoch": 1.7536538696413304, "grad_norm": 0.10113009065389633, "learning_rate": 2.1999243917065318e-05, "loss": 0.3051, "step": 10769 }, { "epoch": 1.7538167161991614, "grad_norm": 0.10738028585910797, "learning_rate": 2.199454119701873e-05, "loss": 0.2847, "step": 10770 }, { "epoch": 1.7539795627569923, "grad_norm": 0.09905651211738586, "learning_rate": 2.198983858487713e-05, "loss": 0.3204, "step": 10771 }, { "epoch": 1.7541424093148232, "grad_norm": 0.12622113525867462, "learning_rate": 2.198513608080934e-05, "loss": 0.3342, "step": 10772 }, { "epoch": 1.754305255872654, "grad_norm": 0.1457095444202423, "learning_rate": 2.1980433684984202e-05, "loss": 0.3221, "step": 10773 }, { "epoch": 1.754468102430485, "grad_norm": 0.10273237526416779, "learning_rate": 2.1975731397570525e-05, "loss": 0.3469, "step": 10774 }, { "epoch": 1.7546309489883156, "grad_norm": 0.11160924285650253, "learning_rate": 2.1971029218737164e-05, "loss": 0.2935, "step": 10775 }, { "epoch": 1.7547937955461466, "grad_norm": 0.1102401614189148, "learning_rate": 2.1966327148652934e-05, "loss": 0.3461, "step": 10776 }, { "epoch": 1.7549566421039775, "grad_norm": 0.10931827872991562, "learning_rate": 2.1961625187486635e-05, "loss": 0.3161, "step": 10777 }, { "epoch": 1.7551194886618084, "grad_norm": 0.17703326046466827, "learning_rate": 2.1956923335407103e-05, "loss": 0.3645, "step": 10778 }, { "epoch": 1.7552823352196394, "grad_norm": 0.1510501652956009, "learning_rate": 2.1952221592583146e-05, "loss": 0.3072, "step": 10779 }, { "epoch": 1.7554451817774703, "grad_norm": 0.14519882202148438, "learning_rate": 2.1947519959183553e-05, "loss": 0.2904, "step": 10780 }, { "epoch": 1.755608028335301, "grad_norm": 0.13785111904144287, "learning_rate": 2.1942818435377143e-05, "loss": 0.3009, "step": 10781 }, { "epoch": 1.755770874893132, "grad_norm": 0.16355963051319122, "learning_rate": 2.1938117021332717e-05, "loss": 0.3101, "step": 10782 }, { "epoch": 1.7559337214509627, "grad_norm": 0.10280781984329224, "learning_rate": 2.1933415717219056e-05, "loss": 0.3218, "step": 10783 }, { "epoch": 1.7560965680087937, "grad_norm": 0.11355119943618774, "learning_rate": 2.1928714523204964e-05, "loss": 0.3474, "step": 10784 }, { "epoch": 1.7562594145666246, "grad_norm": 0.0927046611905098, "learning_rate": 2.1924013439459217e-05, "loss": 0.3335, "step": 10785 }, { "epoch": 1.7564222611244555, "grad_norm": 0.10969076305627823, "learning_rate": 2.1919312466150605e-05, "loss": 0.3468, "step": 10786 }, { "epoch": 1.7565851076822865, "grad_norm": 0.19698122143745422, "learning_rate": 2.191461160344791e-05, "loss": 0.3348, "step": 10787 }, { "epoch": 1.7567479542401172, "grad_norm": 0.08804914355278015, "learning_rate": 2.1909910851519893e-05, "loss": 0.2918, "step": 10788 }, { "epoch": 1.7569108007979481, "grad_norm": 0.11172935366630554, "learning_rate": 2.1905210210535344e-05, "loss": 0.325, "step": 10789 }, { "epoch": 1.7570736473557789, "grad_norm": 0.141591876745224, "learning_rate": 2.190050968066302e-05, "loss": 0.2985, "step": 10790 }, { "epoch": 1.7572364939136098, "grad_norm": 0.11218049377202988, "learning_rate": 2.1895809262071688e-05, "loss": 0.2954, "step": 10791 }, { "epoch": 1.7573993404714408, "grad_norm": 0.09384430944919586, "learning_rate": 2.189110895493009e-05, "loss": 0.295, "step": 10792 }, { "epoch": 1.7575621870292717, "grad_norm": 0.10730619728565216, "learning_rate": 2.1886408759407006e-05, "loss": 0.2933, "step": 10793 }, { "epoch": 1.7577250335871026, "grad_norm": 0.15240736305713654, "learning_rate": 2.1881708675671175e-05, "loss": 0.3791, "step": 10794 }, { "epoch": 1.7578878801449336, "grad_norm": 0.14255990087985992, "learning_rate": 2.1877008703891348e-05, "loss": 0.3598, "step": 10795 }, { "epoch": 1.7580507267027643, "grad_norm": 0.15248346328735352, "learning_rate": 2.1872308844236254e-05, "loss": 0.3142, "step": 10796 }, { "epoch": 1.7582135732605952, "grad_norm": 0.09096355736255646, "learning_rate": 2.1867609096874652e-05, "loss": 0.3397, "step": 10797 }, { "epoch": 1.758376419818426, "grad_norm": 0.09107904881238937, "learning_rate": 2.186290946197527e-05, "loss": 0.318, "step": 10798 }, { "epoch": 1.758539266376257, "grad_norm": 0.1419817954301834, "learning_rate": 2.1858209939706824e-05, "loss": 0.3545, "step": 10799 }, { "epoch": 1.7587021129340878, "grad_norm": 0.10717682540416718, "learning_rate": 2.185351053023807e-05, "loss": 0.2878, "step": 10800 }, { "epoch": 1.7588649594919188, "grad_norm": 0.15579788386821747, "learning_rate": 2.1848811233737712e-05, "loss": 0.3297, "step": 10801 }, { "epoch": 1.7590278060497497, "grad_norm": 0.08766303956508636, "learning_rate": 2.1844112050374465e-05, "loss": 0.308, "step": 10802 }, { "epoch": 1.7591906526075805, "grad_norm": 0.11372840404510498, "learning_rate": 2.1839412980317053e-05, "loss": 0.3044, "step": 10803 }, { "epoch": 1.7593534991654114, "grad_norm": 0.11992926150560379, "learning_rate": 2.1834714023734186e-05, "loss": 0.3504, "step": 10804 }, { "epoch": 1.7595163457232421, "grad_norm": 0.1219312772154808, "learning_rate": 2.1830015180794572e-05, "loss": 0.3059, "step": 10805 }, { "epoch": 1.759679192281073, "grad_norm": 0.1314520239830017, "learning_rate": 2.1825316451666912e-05, "loss": 0.3055, "step": 10806 }, { "epoch": 1.759842038838904, "grad_norm": 0.13233739137649536, "learning_rate": 2.182061783651989e-05, "loss": 0.3103, "step": 10807 }, { "epoch": 1.760004885396735, "grad_norm": 0.10324166715145111, "learning_rate": 2.1815919335522223e-05, "loss": 0.3325, "step": 10808 }, { "epoch": 1.7601677319545659, "grad_norm": 0.10841420292854309, "learning_rate": 2.1811220948842594e-05, "loss": 0.2925, "step": 10809 }, { "epoch": 1.7603305785123968, "grad_norm": 0.07978358119726181, "learning_rate": 2.1806522676649677e-05, "loss": 0.2913, "step": 10810 }, { "epoch": 1.7604934250702275, "grad_norm": 0.11573998630046844, "learning_rate": 2.1801824519112172e-05, "loss": 0.3115, "step": 10811 }, { "epoch": 1.7606562716280585, "grad_norm": 0.10656055063009262, "learning_rate": 2.179712647639875e-05, "loss": 0.3502, "step": 10812 }, { "epoch": 1.7608191181858892, "grad_norm": 0.1054009422659874, "learning_rate": 2.1792428548678085e-05, "loss": 0.3182, "step": 10813 }, { "epoch": 1.7609819647437202, "grad_norm": 0.07274787873029709, "learning_rate": 2.1787730736118835e-05, "loss": 0.3399, "step": 10814 }, { "epoch": 1.761144811301551, "grad_norm": 0.12677763402462006, "learning_rate": 2.1783033038889683e-05, "loss": 0.3209, "step": 10815 }, { "epoch": 1.761307657859382, "grad_norm": 0.10871552675962448, "learning_rate": 2.177833545715928e-05, "loss": 0.2923, "step": 10816 }, { "epoch": 1.761470504417213, "grad_norm": 0.10271716862916946, "learning_rate": 2.1773637991096286e-05, "loss": 0.3306, "step": 10817 }, { "epoch": 1.761633350975044, "grad_norm": 0.08542787283658981, "learning_rate": 2.1768940640869357e-05, "loss": 0.3429, "step": 10818 }, { "epoch": 1.7617961975328746, "grad_norm": 0.10147380083799362, "learning_rate": 2.1764243406647143e-05, "loss": 0.3092, "step": 10819 }, { "epoch": 1.7619590440907056, "grad_norm": 0.07634030282497406, "learning_rate": 2.1759546288598277e-05, "loss": 0.2706, "step": 10820 }, { "epoch": 1.7621218906485363, "grad_norm": 0.09242742508649826, "learning_rate": 2.1754849286891412e-05, "loss": 0.294, "step": 10821 }, { "epoch": 1.7622847372063672, "grad_norm": 0.09064975380897522, "learning_rate": 2.1750152401695183e-05, "loss": 0.2999, "step": 10822 }, { "epoch": 1.7624475837641982, "grad_norm": 0.10595978796482086, "learning_rate": 2.1745455633178216e-05, "loss": 0.3623, "step": 10823 }, { "epoch": 1.7626104303220291, "grad_norm": 0.0942884087562561, "learning_rate": 2.1740758981509147e-05, "loss": 0.3319, "step": 10824 }, { "epoch": 1.76277327687986, "grad_norm": 0.12444143742322922, "learning_rate": 2.1736062446856588e-05, "loss": 0.3279, "step": 10825 }, { "epoch": 1.7629361234376908, "grad_norm": 0.09754183888435364, "learning_rate": 2.173136602938917e-05, "loss": 0.3274, "step": 10826 }, { "epoch": 1.7630989699955217, "grad_norm": 0.10083980113267899, "learning_rate": 2.1726669729275508e-05, "loss": 0.328, "step": 10827 }, { "epoch": 1.7632618165533525, "grad_norm": 0.11649113893508911, "learning_rate": 2.17219735466842e-05, "loss": 0.2944, "step": 10828 }, { "epoch": 1.7634246631111834, "grad_norm": 0.11672000586986542, "learning_rate": 2.1717277481783873e-05, "loss": 0.297, "step": 10829 }, { "epoch": 1.7635875096690143, "grad_norm": 0.11582469940185547, "learning_rate": 2.1712581534743122e-05, "loss": 0.3002, "step": 10830 }, { "epoch": 1.7637503562268453, "grad_norm": 0.12768419086933136, "learning_rate": 2.170788570573054e-05, "loss": 0.3228, "step": 10831 }, { "epoch": 1.7639132027846762, "grad_norm": 0.17240314185619354, "learning_rate": 2.1703189994914718e-05, "loss": 0.3594, "step": 10832 }, { "epoch": 1.7640760493425072, "grad_norm": 0.1303970366716385, "learning_rate": 2.169849440246426e-05, "loss": 0.3406, "step": 10833 }, { "epoch": 1.764238895900338, "grad_norm": 0.06914813816547394, "learning_rate": 2.1693798928547742e-05, "loss": 0.3418, "step": 10834 }, { "epoch": 1.7644017424581688, "grad_norm": 0.10428936779499054, "learning_rate": 2.1689103573333753e-05, "loss": 0.3166, "step": 10835 }, { "epoch": 1.7645645890159996, "grad_norm": 0.14548859000205994, "learning_rate": 2.1684408336990852e-05, "loss": 0.3127, "step": 10836 }, { "epoch": 1.7647274355738305, "grad_norm": 0.11096588522195816, "learning_rate": 2.1679713219687635e-05, "loss": 0.3187, "step": 10837 }, { "epoch": 1.7648902821316614, "grad_norm": 0.14846958220005035, "learning_rate": 2.1675018221592663e-05, "loss": 0.3359, "step": 10838 }, { "epoch": 1.7650531286894924, "grad_norm": 0.10520811378955841, "learning_rate": 2.167032334287449e-05, "loss": 0.31, "step": 10839 }, { "epoch": 1.7652159752473233, "grad_norm": 0.09642253071069717, "learning_rate": 2.1665628583701695e-05, "loss": 0.3508, "step": 10840 }, { "epoch": 1.765378821805154, "grad_norm": 0.11941887438297272, "learning_rate": 2.1660933944242817e-05, "loss": 0.295, "step": 10841 }, { "epoch": 1.765541668362985, "grad_norm": 0.13155348598957062, "learning_rate": 2.165623942466642e-05, "loss": 0.3252, "step": 10842 }, { "epoch": 1.7657045149208157, "grad_norm": 0.1151197999715805, "learning_rate": 2.165154502514104e-05, "loss": 0.2852, "step": 10843 }, { "epoch": 1.7658673614786466, "grad_norm": 0.13676577806472778, "learning_rate": 2.164685074583523e-05, "loss": 0.3501, "step": 10844 }, { "epoch": 1.7660302080364776, "grad_norm": 0.17627054452896118, "learning_rate": 2.1642156586917522e-05, "loss": 0.2989, "step": 10845 }, { "epoch": 1.7661930545943085, "grad_norm": 0.15365688502788544, "learning_rate": 2.163746254855646e-05, "loss": 0.2797, "step": 10846 }, { "epoch": 1.7663559011521395, "grad_norm": 0.119951531291008, "learning_rate": 2.163276863092055e-05, "loss": 0.314, "step": 10847 }, { "epoch": 1.7665187477099704, "grad_norm": 0.11522260308265686, "learning_rate": 2.162807483417835e-05, "loss": 0.3291, "step": 10848 }, { "epoch": 1.7666815942678011, "grad_norm": 0.15862888097763062, "learning_rate": 2.162338115849837e-05, "loss": 0.3373, "step": 10849 }, { "epoch": 1.766844440825632, "grad_norm": 0.09348741173744202, "learning_rate": 2.1618687604049103e-05, "loss": 0.3271, "step": 10850 }, { "epoch": 1.7670072873834628, "grad_norm": 0.11262381076812744, "learning_rate": 2.1613994170999096e-05, "loss": 0.3309, "step": 10851 }, { "epoch": 1.7671701339412937, "grad_norm": 0.16942697763442993, "learning_rate": 2.1609300859516844e-05, "loss": 0.3473, "step": 10852 }, { "epoch": 1.7673329804991247, "grad_norm": 0.089656300842762, "learning_rate": 2.160460766977085e-05, "loss": 0.2918, "step": 10853 }, { "epoch": 1.7674958270569556, "grad_norm": 0.14181219041347504, "learning_rate": 2.1599914601929606e-05, "loss": 0.3061, "step": 10854 }, { "epoch": 1.7676586736147866, "grad_norm": 0.13257057964801788, "learning_rate": 2.159522165616162e-05, "loss": 0.303, "step": 10855 }, { "epoch": 1.7678215201726175, "grad_norm": 0.15759602189064026, "learning_rate": 2.159052883263538e-05, "loss": 0.3572, "step": 10856 }, { "epoch": 1.7679843667304482, "grad_norm": 0.10295671969652176, "learning_rate": 2.1585836131519362e-05, "loss": 0.2941, "step": 10857 }, { "epoch": 1.7681472132882792, "grad_norm": 0.09320834279060364, "learning_rate": 2.1581143552982058e-05, "loss": 0.298, "step": 10858 }, { "epoch": 1.76831005984611, "grad_norm": 0.09759388864040375, "learning_rate": 2.157645109719195e-05, "loss": 0.3293, "step": 10859 }, { "epoch": 1.7684729064039408, "grad_norm": 0.06915811449289322, "learning_rate": 2.15717587643175e-05, "loss": 0.2954, "step": 10860 }, { "epoch": 1.7686357529617718, "grad_norm": 0.07805747538805008, "learning_rate": 2.156706655452718e-05, "loss": 0.334, "step": 10861 }, { "epoch": 1.7687985995196027, "grad_norm": 0.17500784993171692, "learning_rate": 2.1562374467989455e-05, "loss": 0.337, "step": 10862 }, { "epoch": 1.7689614460774337, "grad_norm": 0.09575624763965607, "learning_rate": 2.1557682504872795e-05, "loss": 0.3107, "step": 10863 }, { "epoch": 1.7691242926352644, "grad_norm": 0.14823660254478455, "learning_rate": 2.155299066534564e-05, "loss": 0.3235, "step": 10864 }, { "epoch": 1.7692871391930953, "grad_norm": 0.16162991523742676, "learning_rate": 2.1548298949576443e-05, "loss": 0.3318, "step": 10865 }, { "epoch": 1.769449985750926, "grad_norm": 0.11169744282960892, "learning_rate": 2.1543607357733663e-05, "loss": 0.2904, "step": 10866 }, { "epoch": 1.769612832308757, "grad_norm": 0.10850945860147476, "learning_rate": 2.1538915889985736e-05, "loss": 0.3364, "step": 10867 }, { "epoch": 1.769775678866588, "grad_norm": 0.13895903527736664, "learning_rate": 2.1534224546501087e-05, "loss": 0.302, "step": 10868 }, { "epoch": 1.7699385254244189, "grad_norm": 0.11912579834461212, "learning_rate": 2.1529533327448173e-05, "loss": 0.3202, "step": 10869 }, { "epoch": 1.7701013719822498, "grad_norm": 0.10925471782684326, "learning_rate": 2.152484223299541e-05, "loss": 0.313, "step": 10870 }, { "epoch": 1.7702642185400808, "grad_norm": 0.13386668264865875, "learning_rate": 2.152015126331122e-05, "loss": 0.3005, "step": 10871 }, { "epoch": 1.7704270650979115, "grad_norm": 0.12883831560611725, "learning_rate": 2.1515460418564016e-05, "loss": 0.3189, "step": 10872 }, { "epoch": 1.7705899116557424, "grad_norm": 0.14235831797122955, "learning_rate": 2.1510769698922232e-05, "loss": 0.2998, "step": 10873 }, { "epoch": 1.7707527582135731, "grad_norm": 0.11000187695026398, "learning_rate": 2.150607910455427e-05, "loss": 0.2842, "step": 10874 }, { "epoch": 1.770915604771404, "grad_norm": 0.09927918761968613, "learning_rate": 2.150138863562854e-05, "loss": 0.3238, "step": 10875 }, { "epoch": 1.771078451329235, "grad_norm": 0.09485036879777908, "learning_rate": 2.1496698292313426e-05, "loss": 0.2925, "step": 10876 }, { "epoch": 1.771241297887066, "grad_norm": 0.09366873651742935, "learning_rate": 2.1492008074777355e-05, "loss": 0.3312, "step": 10877 }, { "epoch": 1.771404144444897, "grad_norm": 0.1332535594701767, "learning_rate": 2.1487317983188692e-05, "loss": 0.3065, "step": 10878 }, { "epoch": 1.7715669910027276, "grad_norm": 0.11623440682888031, "learning_rate": 2.1482628017715843e-05, "loss": 0.3034, "step": 10879 }, { "epoch": 1.7717298375605586, "grad_norm": 0.12037251144647598, "learning_rate": 2.147793817852719e-05, "loss": 0.2732, "step": 10880 }, { "epoch": 1.7718926841183893, "grad_norm": 0.08927466720342636, "learning_rate": 2.1473248465791103e-05, "loss": 0.3292, "step": 10881 }, { "epoch": 1.7720555306762202, "grad_norm": 0.16514161229133606, "learning_rate": 2.146855887967597e-05, "loss": 0.2826, "step": 10882 }, { "epoch": 1.7722183772340512, "grad_norm": 0.08562086522579193, "learning_rate": 2.1463869420350137e-05, "loss": 0.3069, "step": 10883 }, { "epoch": 1.7723812237918821, "grad_norm": 0.10365741699934006, "learning_rate": 2.1459180087982005e-05, "loss": 0.3202, "step": 10884 }, { "epoch": 1.772544070349713, "grad_norm": 0.09516780078411102, "learning_rate": 2.145449088273991e-05, "loss": 0.2969, "step": 10885 }, { "epoch": 1.772706916907544, "grad_norm": 0.13823695480823517, "learning_rate": 2.144980180479222e-05, "loss": 0.3479, "step": 10886 }, { "epoch": 1.7728697634653747, "grad_norm": 0.09533225744962692, "learning_rate": 2.1445112854307268e-05, "loss": 0.2918, "step": 10887 }, { "epoch": 1.7730326100232057, "grad_norm": 0.087033212184906, "learning_rate": 2.1440424031453428e-05, "loss": 0.2777, "step": 10888 }, { "epoch": 1.7731954565810364, "grad_norm": 0.09672299772500992, "learning_rate": 2.1435735336399028e-05, "loss": 0.3439, "step": 10889 }, { "epoch": 1.7733583031388673, "grad_norm": 0.15078796446323395, "learning_rate": 2.1431046769312397e-05, "loss": 0.3299, "step": 10890 }, { "epoch": 1.7735211496966983, "grad_norm": 0.10806681960821152, "learning_rate": 2.142635833036189e-05, "loss": 0.2732, "step": 10891 }, { "epoch": 1.7736839962545292, "grad_norm": 0.0833340510725975, "learning_rate": 2.1421670019715827e-05, "loss": 0.3183, "step": 10892 }, { "epoch": 1.7738468428123602, "grad_norm": 0.08891507238149643, "learning_rate": 2.1416981837542536e-05, "loss": 0.3654, "step": 10893 }, { "epoch": 1.774009689370191, "grad_norm": 0.13237090408802032, "learning_rate": 2.1412293784010314e-05, "loss": 0.3324, "step": 10894 }, { "epoch": 1.7741725359280218, "grad_norm": 0.0913180485367775, "learning_rate": 2.140760585928751e-05, "loss": 0.2963, "step": 10895 }, { "epoch": 1.7743353824858525, "grad_norm": 0.10882537811994553, "learning_rate": 2.1402918063542417e-05, "loss": 0.3157, "step": 10896 }, { "epoch": 1.7744982290436835, "grad_norm": 0.13140596449375153, "learning_rate": 2.1398230396943336e-05, "loss": 0.3268, "step": 10897 }, { "epoch": 1.7746610756015144, "grad_norm": 0.13652552664279938, "learning_rate": 2.139354285965858e-05, "loss": 0.288, "step": 10898 }, { "epoch": 1.7748239221593454, "grad_norm": 0.11601193994283676, "learning_rate": 2.1388855451856432e-05, "loss": 0.3239, "step": 10899 }, { "epoch": 1.7749867687171763, "grad_norm": 0.17140141129493713, "learning_rate": 2.1384168173705204e-05, "loss": 0.3705, "step": 10900 }, { "epoch": 1.7751496152750073, "grad_norm": 0.13990700244903564, "learning_rate": 2.1379481025373166e-05, "loss": 0.3323, "step": 10901 }, { "epoch": 1.775312461832838, "grad_norm": 0.10254868865013123, "learning_rate": 2.1374794007028606e-05, "loss": 0.3194, "step": 10902 }, { "epoch": 1.775475308390669, "grad_norm": 0.11793535202741623, "learning_rate": 2.1370107118839806e-05, "loss": 0.3433, "step": 10903 }, { "epoch": 1.7756381549484996, "grad_norm": 0.14703373610973358, "learning_rate": 2.136542036097504e-05, "loss": 0.3257, "step": 10904 }, { "epoch": 1.7758010015063306, "grad_norm": 0.1698760986328125, "learning_rate": 2.1360733733602558e-05, "loss": 0.2982, "step": 10905 }, { "epoch": 1.7759638480641615, "grad_norm": 0.11741773039102554, "learning_rate": 2.135604723689065e-05, "loss": 0.3032, "step": 10906 }, { "epoch": 1.7761266946219925, "grad_norm": 0.08923681825399399, "learning_rate": 2.1351360871007565e-05, "loss": 0.3108, "step": 10907 }, { "epoch": 1.7762895411798234, "grad_norm": 0.12218548357486725, "learning_rate": 2.1346674636121556e-05, "loss": 0.2783, "step": 10908 }, { "epoch": 1.7764523877376543, "grad_norm": 0.10519662499427795, "learning_rate": 2.1341988532400862e-05, "loss": 0.2911, "step": 10909 }, { "epoch": 1.776615234295485, "grad_norm": 0.1518537551164627, "learning_rate": 2.133730256001375e-05, "loss": 0.3288, "step": 10910 }, { "epoch": 1.776778080853316, "grad_norm": 0.17226441204547882, "learning_rate": 2.133261671912845e-05, "loss": 0.3259, "step": 10911 }, { "epoch": 1.7769409274111467, "grad_norm": 0.07892867922782898, "learning_rate": 2.1327931009913187e-05, "loss": 0.3091, "step": 10912 }, { "epoch": 1.7771037739689777, "grad_norm": 0.0840110331773758, "learning_rate": 2.132324543253621e-05, "loss": 0.3227, "step": 10913 }, { "epoch": 1.7772666205268086, "grad_norm": 0.08228075504302979, "learning_rate": 2.1318559987165747e-05, "loss": 0.3042, "step": 10914 }, { "epoch": 1.7774294670846396, "grad_norm": 0.1734311878681183, "learning_rate": 2.1313874673969996e-05, "loss": 0.2961, "step": 10915 }, { "epoch": 1.7775923136424705, "grad_norm": 0.12317235767841339, "learning_rate": 2.1309189493117194e-05, "loss": 0.304, "step": 10916 }, { "epoch": 1.7777551602003012, "grad_norm": 0.12666438519954681, "learning_rate": 2.1304504444775547e-05, "loss": 0.2871, "step": 10917 }, { "epoch": 1.7779180067581322, "grad_norm": 0.12660767138004303, "learning_rate": 2.129981952911326e-05, "loss": 0.3123, "step": 10918 }, { "epoch": 1.7780808533159629, "grad_norm": 0.13176430761814117, "learning_rate": 2.129513474629854e-05, "loss": 0.3137, "step": 10919 }, { "epoch": 1.7782436998737938, "grad_norm": 0.10789518058300018, "learning_rate": 2.1290450096499583e-05, "loss": 0.3113, "step": 10920 }, { "epoch": 1.7784065464316248, "grad_norm": 0.10375877469778061, "learning_rate": 2.128576557988458e-05, "loss": 0.3071, "step": 10921 }, { "epoch": 1.7785693929894557, "grad_norm": 0.13865043222904205, "learning_rate": 2.1281081196621724e-05, "loss": 0.3263, "step": 10922 }, { "epoch": 1.7787322395472867, "grad_norm": 0.10458599776029587, "learning_rate": 2.1276396946879186e-05, "loss": 0.3132, "step": 10923 }, { "epoch": 1.7788950861051176, "grad_norm": 0.12597249448299408, "learning_rate": 2.1271712830825163e-05, "loss": 0.3032, "step": 10924 }, { "epoch": 1.7790579326629483, "grad_norm": 0.08150374889373779, "learning_rate": 2.1267028848627824e-05, "loss": 0.3518, "step": 10925 }, { "epoch": 1.7792207792207793, "grad_norm": 0.09217327833175659, "learning_rate": 2.126234500045533e-05, "loss": 0.298, "step": 10926 }, { "epoch": 1.77938362577861, "grad_norm": 0.0992010086774826, "learning_rate": 2.1257661286475838e-05, "loss": 0.2808, "step": 10927 }, { "epoch": 1.779546472336441, "grad_norm": 0.07040320336818695, "learning_rate": 2.125297770685753e-05, "loss": 0.3224, "step": 10928 }, { "epoch": 1.7797093188942719, "grad_norm": 0.11862733960151672, "learning_rate": 2.1248294261768547e-05, "loss": 0.3068, "step": 10929 }, { "epoch": 1.7798721654521028, "grad_norm": 0.15385285019874573, "learning_rate": 2.124361095137703e-05, "loss": 0.3584, "step": 10930 }, { "epoch": 1.7800350120099337, "grad_norm": 0.07193585485219955, "learning_rate": 2.1238927775851146e-05, "loss": 0.2719, "step": 10931 }, { "epoch": 1.7801978585677645, "grad_norm": 0.10407189279794693, "learning_rate": 2.1234244735359023e-05, "loss": 0.3143, "step": 10932 }, { "epoch": 1.7803607051255954, "grad_norm": 0.08845622837543488, "learning_rate": 2.12295618300688e-05, "loss": 0.3083, "step": 10933 }, { "epoch": 1.7805235516834261, "grad_norm": 0.10600779950618744, "learning_rate": 2.122487906014859e-05, "loss": 0.3011, "step": 10934 }, { "epoch": 1.780686398241257, "grad_norm": 0.12181956321001053, "learning_rate": 2.1220196425766542e-05, "loss": 0.3178, "step": 10935 }, { "epoch": 1.780849244799088, "grad_norm": 0.09374290704727173, "learning_rate": 2.1215513927090767e-05, "loss": 0.2975, "step": 10936 }, { "epoch": 1.781012091356919, "grad_norm": 0.14737050235271454, "learning_rate": 2.1210831564289374e-05, "loss": 0.2749, "step": 10937 }, { "epoch": 1.78117493791475, "grad_norm": 0.12958133220672607, "learning_rate": 2.1206149337530484e-05, "loss": 0.2976, "step": 10938 }, { "epoch": 1.7813377844725808, "grad_norm": 0.12860862910747528, "learning_rate": 2.12014672469822e-05, "loss": 0.2975, "step": 10939 }, { "epoch": 1.7815006310304116, "grad_norm": 0.07819932699203491, "learning_rate": 2.1196785292812624e-05, "loss": 0.3211, "step": 10940 }, { "epoch": 1.7816634775882425, "grad_norm": 0.1148894727230072, "learning_rate": 2.1192103475189845e-05, "loss": 0.3364, "step": 10941 }, { "epoch": 1.7818263241460732, "grad_norm": 0.11229263991117477, "learning_rate": 2.1187421794281965e-05, "loss": 0.3341, "step": 10942 }, { "epoch": 1.7819891707039042, "grad_norm": 0.12483325600624084, "learning_rate": 2.1182740250257066e-05, "loss": 0.3135, "step": 10943 }, { "epoch": 1.782152017261735, "grad_norm": 0.08812456578016281, "learning_rate": 2.1178058843283232e-05, "loss": 0.3201, "step": 10944 }, { "epoch": 1.782314863819566, "grad_norm": 0.15477152168750763, "learning_rate": 2.1173377573528526e-05, "loss": 0.3309, "step": 10945 }, { "epoch": 1.782477710377397, "grad_norm": 0.1299603134393692, "learning_rate": 2.1168696441161036e-05, "loss": 0.2825, "step": 10946 }, { "epoch": 1.782640556935228, "grad_norm": 0.08544963598251343, "learning_rate": 2.116401544634883e-05, "loss": 0.3223, "step": 10947 }, { "epoch": 1.7828034034930587, "grad_norm": 0.05446280539035797, "learning_rate": 2.1159334589259956e-05, "loss": 0.3075, "step": 10948 }, { "epoch": 1.7829662500508896, "grad_norm": 0.11744305491447449, "learning_rate": 2.115465387006247e-05, "loss": 0.3224, "step": 10949 }, { "epoch": 1.7831290966087203, "grad_norm": 0.136376291513443, "learning_rate": 2.1149973288924438e-05, "loss": 0.3233, "step": 10950 }, { "epoch": 1.7832919431665513, "grad_norm": 0.09987569600343704, "learning_rate": 2.1145292846013903e-05, "loss": 0.3501, "step": 10951 }, { "epoch": 1.7834547897243822, "grad_norm": 0.11639517545700073, "learning_rate": 2.114061254149889e-05, "loss": 0.2672, "step": 10952 }, { "epoch": 1.7836176362822131, "grad_norm": 0.0858687311410904, "learning_rate": 2.1135932375547466e-05, "loss": 0.3158, "step": 10953 }, { "epoch": 1.783780482840044, "grad_norm": 0.12835067510604858, "learning_rate": 2.1131252348327643e-05, "loss": 0.3423, "step": 10954 }, { "epoch": 1.7839433293978748, "grad_norm": 0.09163816273212433, "learning_rate": 2.1126572460007445e-05, "loss": 0.2748, "step": 10955 }, { "epoch": 1.7841061759557058, "grad_norm": 0.1227114275097847, "learning_rate": 2.1121892710754904e-05, "loss": 0.2955, "step": 10956 }, { "epoch": 1.7842690225135365, "grad_norm": 0.11520242691040039, "learning_rate": 2.111721310073804e-05, "loss": 0.3568, "step": 10957 }, { "epoch": 1.7844318690713674, "grad_norm": 0.09799632430076599, "learning_rate": 2.111253363012485e-05, "loss": 0.2932, "step": 10958 }, { "epoch": 1.7845947156291984, "grad_norm": 0.13515117764472961, "learning_rate": 2.1107854299083352e-05, "loss": 0.3042, "step": 10959 }, { "epoch": 1.7847575621870293, "grad_norm": 0.10967721045017242, "learning_rate": 2.1103175107781544e-05, "loss": 0.336, "step": 10960 }, { "epoch": 1.7849204087448602, "grad_norm": 0.073841892182827, "learning_rate": 2.109849605638743e-05, "loss": 0.2949, "step": 10961 }, { "epoch": 1.7850832553026912, "grad_norm": 0.148007333278656, "learning_rate": 2.1093817145069e-05, "loss": 0.3762, "step": 10962 }, { "epoch": 1.785246101860522, "grad_norm": 0.09704798460006714, "learning_rate": 2.1089138373994223e-05, "loss": 0.3112, "step": 10963 }, { "epoch": 1.7854089484183528, "grad_norm": 0.14484702050685883, "learning_rate": 2.1084459743331112e-05, "loss": 0.3441, "step": 10964 }, { "epoch": 1.7855717949761836, "grad_norm": 0.12447333335876465, "learning_rate": 2.1079781253247624e-05, "loss": 0.3546, "step": 10965 }, { "epoch": 1.7857346415340145, "grad_norm": 0.11888667941093445, "learning_rate": 2.1075102903911734e-05, "loss": 0.2846, "step": 10966 }, { "epoch": 1.7858974880918455, "grad_norm": 0.12366729974746704, "learning_rate": 2.1070424695491397e-05, "loss": 0.3337, "step": 10967 }, { "epoch": 1.7860603346496764, "grad_norm": 0.10287415236234665, "learning_rate": 2.1065746628154602e-05, "loss": 0.3146, "step": 10968 }, { "epoch": 1.7862231812075073, "grad_norm": 0.1425776183605194, "learning_rate": 2.1061068702069286e-05, "loss": 0.2764, "step": 10969 }, { "epoch": 1.786386027765338, "grad_norm": 0.07710480690002441, "learning_rate": 2.1056390917403397e-05, "loss": 0.3152, "step": 10970 }, { "epoch": 1.786548874323169, "grad_norm": 0.1130797490477562, "learning_rate": 2.1051713274324897e-05, "loss": 0.3207, "step": 10971 }, { "epoch": 1.7867117208809997, "grad_norm": 0.10256320238113403, "learning_rate": 2.104703577300172e-05, "loss": 0.2929, "step": 10972 }, { "epoch": 1.7868745674388307, "grad_norm": 0.12537290155887604, "learning_rate": 2.1042358413601806e-05, "loss": 0.3565, "step": 10973 }, { "epoch": 1.7870374139966616, "grad_norm": 0.12706106901168823, "learning_rate": 2.103768119629307e-05, "loss": 0.3257, "step": 10974 }, { "epoch": 1.7872002605544925, "grad_norm": 0.10942123830318451, "learning_rate": 2.103300412124346e-05, "loss": 0.2984, "step": 10975 }, { "epoch": 1.7873631071123235, "grad_norm": 0.12747785449028015, "learning_rate": 2.1028327188620882e-05, "loss": 0.2933, "step": 10976 }, { "epoch": 1.7875259536701544, "grad_norm": 0.13494504988193512, "learning_rate": 2.1023650398593264e-05, "loss": 0.302, "step": 10977 }, { "epoch": 1.7876888002279852, "grad_norm": 0.16973821818828583, "learning_rate": 2.1018973751328507e-05, "loss": 0.3404, "step": 10978 }, { "epoch": 1.787851646785816, "grad_norm": 0.0839066207408905, "learning_rate": 2.1014297246994518e-05, "loss": 0.2931, "step": 10979 }, { "epoch": 1.7880144933436468, "grad_norm": 0.15531785786151886, "learning_rate": 2.1009620885759203e-05, "loss": 0.3184, "step": 10980 }, { "epoch": 1.7881773399014778, "grad_norm": 0.12323896586894989, "learning_rate": 2.100494466779044e-05, "loss": 0.308, "step": 10981 }, { "epoch": 1.7883401864593087, "grad_norm": 0.12407787144184113, "learning_rate": 2.100026859325615e-05, "loss": 0.3476, "step": 10982 }, { "epoch": 1.7885030330171396, "grad_norm": 0.113249272108078, "learning_rate": 2.0995592662324197e-05, "loss": 0.3354, "step": 10983 }, { "epoch": 1.7886658795749706, "grad_norm": 0.09466109424829483, "learning_rate": 2.0990916875162467e-05, "loss": 0.3214, "step": 10984 }, { "epoch": 1.7888287261328015, "grad_norm": 0.10470572859048843, "learning_rate": 2.0986241231938822e-05, "loss": 0.3277, "step": 10985 }, { "epoch": 1.7889915726906322, "grad_norm": 0.10775657743215561, "learning_rate": 2.0981565732821152e-05, "loss": 0.2712, "step": 10986 }, { "epoch": 1.7891544192484632, "grad_norm": 0.09031736105680466, "learning_rate": 2.097689037797731e-05, "loss": 0.3288, "step": 10987 }, { "epoch": 1.789317265806294, "grad_norm": 0.13974128663539886, "learning_rate": 2.0972215167575162e-05, "loss": 0.2889, "step": 10988 }, { "epoch": 1.7894801123641249, "grad_norm": 0.143843412399292, "learning_rate": 2.0967540101782543e-05, "loss": 0.2761, "step": 10989 }, { "epoch": 1.7896429589219558, "grad_norm": 0.10103969275951385, "learning_rate": 2.0962865180767327e-05, "loss": 0.3347, "step": 10990 }, { "epoch": 1.7898058054797867, "grad_norm": 0.11352342367172241, "learning_rate": 2.0958190404697348e-05, "loss": 0.3094, "step": 10991 }, { "epoch": 1.7899686520376177, "grad_norm": 0.08521733433008194, "learning_rate": 2.095351577374043e-05, "loss": 0.2952, "step": 10992 }, { "epoch": 1.7901314985954484, "grad_norm": 0.13083529472351074, "learning_rate": 2.094884128806443e-05, "loss": 0.2829, "step": 10993 }, { "epoch": 1.7902943451532793, "grad_norm": 0.13012923300266266, "learning_rate": 2.0944166947837168e-05, "loss": 0.3206, "step": 10994 }, { "epoch": 1.79045719171111, "grad_norm": 0.10087219625711441, "learning_rate": 2.0939492753226457e-05, "loss": 0.2939, "step": 10995 }, { "epoch": 1.790620038268941, "grad_norm": 0.08356429636478424, "learning_rate": 2.093481870440012e-05, "loss": 0.299, "step": 10996 }, { "epoch": 1.790782884826772, "grad_norm": 0.14642536640167236, "learning_rate": 2.0930144801525975e-05, "loss": 0.3751, "step": 10997 }, { "epoch": 1.790945731384603, "grad_norm": 0.15083788335323334, "learning_rate": 2.092547104477183e-05, "loss": 0.3017, "step": 10998 }, { "epoch": 1.7911085779424338, "grad_norm": 0.08268047124147415, "learning_rate": 2.092079743430548e-05, "loss": 0.2813, "step": 10999 }, { "epoch": 1.7912714245002648, "grad_norm": 0.0900963842868805, "learning_rate": 2.0916123970294714e-05, "loss": 0.2987, "step": 11000 }, { "epoch": 1.7914342710580955, "grad_norm": 0.14453323185443878, "learning_rate": 2.0911450652907342e-05, "loss": 0.2966, "step": 11001 }, { "epoch": 1.7915971176159264, "grad_norm": 0.12983155250549316, "learning_rate": 2.0906777482311146e-05, "loss": 0.3106, "step": 11002 }, { "epoch": 1.7917599641737572, "grad_norm": 0.07019299268722534, "learning_rate": 2.090210445867389e-05, "loss": 0.3165, "step": 11003 }, { "epoch": 1.791922810731588, "grad_norm": 0.11960747092962265, "learning_rate": 2.089743158216337e-05, "loss": 0.2903, "step": 11004 }, { "epoch": 1.792085657289419, "grad_norm": 0.11052122712135315, "learning_rate": 2.0892758852947344e-05, "loss": 0.336, "step": 11005 }, { "epoch": 1.79224850384725, "grad_norm": 0.08241261541843414, "learning_rate": 2.0888086271193586e-05, "loss": 0.2811, "step": 11006 }, { "epoch": 1.792411350405081, "grad_norm": 0.10953358560800552, "learning_rate": 2.088341383706984e-05, "loss": 0.3273, "step": 11007 }, { "epoch": 1.7925741969629116, "grad_norm": 0.10026361793279648, "learning_rate": 2.087874155074388e-05, "loss": 0.3074, "step": 11008 }, { "epoch": 1.7927370435207426, "grad_norm": 0.06953786313533783, "learning_rate": 2.0874069412383444e-05, "loss": 0.3, "step": 11009 }, { "epoch": 1.7928998900785733, "grad_norm": 0.19557563960552216, "learning_rate": 2.0869397422156273e-05, "loss": 0.3423, "step": 11010 }, { "epoch": 1.7930627366364043, "grad_norm": 0.09237309545278549, "learning_rate": 2.0864725580230117e-05, "loss": 0.2968, "step": 11011 }, { "epoch": 1.7932255831942352, "grad_norm": 0.14017322659492493, "learning_rate": 2.0860053886772703e-05, "loss": 0.2719, "step": 11012 }, { "epoch": 1.7933884297520661, "grad_norm": 0.12586244940757751, "learning_rate": 2.0855382341951752e-05, "loss": 0.3085, "step": 11013 }, { "epoch": 1.793551276309897, "grad_norm": 0.06360642611980438, "learning_rate": 2.0850710945934992e-05, "loss": 0.2968, "step": 11014 }, { "epoch": 1.793714122867728, "grad_norm": 0.12919697165489197, "learning_rate": 2.0846039698890148e-05, "loss": 0.308, "step": 11015 }, { "epoch": 1.7938769694255587, "grad_norm": 0.1131867840886116, "learning_rate": 2.0841368600984915e-05, "loss": 0.3084, "step": 11016 }, { "epoch": 1.7940398159833897, "grad_norm": 0.09005410224199295, "learning_rate": 2.0836697652387017e-05, "loss": 0.3221, "step": 11017 }, { "epoch": 1.7942026625412204, "grad_norm": 0.09945695847272873, "learning_rate": 2.0832026853264136e-05, "loss": 0.3044, "step": 11018 }, { "epoch": 1.7943655090990513, "grad_norm": 0.143274188041687, "learning_rate": 2.082735620378399e-05, "loss": 0.3187, "step": 11019 }, { "epoch": 1.7945283556568823, "grad_norm": 0.12890589237213135, "learning_rate": 2.082268570411426e-05, "loss": 0.3027, "step": 11020 }, { "epoch": 1.7946912022147132, "grad_norm": 0.1350603550672531, "learning_rate": 2.081801535442261e-05, "loss": 0.3198, "step": 11021 }, { "epoch": 1.7948540487725442, "grad_norm": 0.10630377382040024, "learning_rate": 2.0813345154876758e-05, "loss": 0.3045, "step": 11022 }, { "epoch": 1.7950168953303751, "grad_norm": 0.12070015072822571, "learning_rate": 2.0808675105644353e-05, "loss": 0.3192, "step": 11023 }, { "epoch": 1.7951797418882058, "grad_norm": 0.15747793018817902, "learning_rate": 2.0804005206893072e-05, "loss": 0.3409, "step": 11024 }, { "epoch": 1.7953425884460366, "grad_norm": 0.1341947466135025, "learning_rate": 2.0799335458790563e-05, "loss": 0.3013, "step": 11025 }, { "epoch": 1.7955054350038675, "grad_norm": 0.12882131338119507, "learning_rate": 2.079466586150451e-05, "loss": 0.3185, "step": 11026 }, { "epoch": 1.7956682815616984, "grad_norm": 0.1314970701932907, "learning_rate": 2.0789996415202552e-05, "loss": 0.3238, "step": 11027 }, { "epoch": 1.7958311281195294, "grad_norm": 0.06674382090568542, "learning_rate": 2.0785327120052334e-05, "loss": 0.2857, "step": 11028 }, { "epoch": 1.7959939746773603, "grad_norm": 0.13593930006027222, "learning_rate": 2.078065797622149e-05, "loss": 0.3072, "step": 11029 }, { "epoch": 1.7961568212351913, "grad_norm": 0.13231591880321503, "learning_rate": 2.0775988983877675e-05, "loss": 0.3126, "step": 11030 }, { "epoch": 1.796319667793022, "grad_norm": 0.12352775782346725, "learning_rate": 2.077132014318851e-05, "loss": 0.3528, "step": 11031 }, { "epoch": 1.796482514350853, "grad_norm": 0.11428839713335037, "learning_rate": 2.0766651454321613e-05, "loss": 0.3716, "step": 11032 }, { "epoch": 1.7966453609086837, "grad_norm": 0.1599101573228836, "learning_rate": 2.0761982917444624e-05, "loss": 0.3276, "step": 11033 }, { "epoch": 1.7968082074665146, "grad_norm": 0.09676001220941544, "learning_rate": 2.0757314532725143e-05, "loss": 0.3643, "step": 11034 }, { "epoch": 1.7969710540243455, "grad_norm": 0.10614770650863647, "learning_rate": 2.075264630033078e-05, "loss": 0.3055, "step": 11035 }, { "epoch": 1.7971339005821765, "grad_norm": 0.1269771158695221, "learning_rate": 2.074797822042914e-05, "loss": 0.3535, "step": 11036 }, { "epoch": 1.7972967471400074, "grad_norm": 0.09380979835987091, "learning_rate": 2.074331029318782e-05, "loss": 0.2969, "step": 11037 }, { "epoch": 1.7974595936978384, "grad_norm": 0.08891874551773071, "learning_rate": 2.073864251877442e-05, "loss": 0.3389, "step": 11038 }, { "epoch": 1.797622440255669, "grad_norm": 0.11535695940256119, "learning_rate": 2.0733974897356517e-05, "loss": 0.3244, "step": 11039 }, { "epoch": 1.7977852868135, "grad_norm": 0.09522956609725952, "learning_rate": 2.072930742910169e-05, "loss": 0.3307, "step": 11040 }, { "epoch": 1.7979481333713307, "grad_norm": 0.13063910603523254, "learning_rate": 2.0724640114177528e-05, "loss": 0.3181, "step": 11041 }, { "epoch": 1.7981109799291617, "grad_norm": 0.10697992146015167, "learning_rate": 2.0719972952751602e-05, "loss": 0.3086, "step": 11042 }, { "epoch": 1.7982738264869926, "grad_norm": 0.06672733277082443, "learning_rate": 2.0715305944991452e-05, "loss": 0.3066, "step": 11043 }, { "epoch": 1.7984366730448236, "grad_norm": 0.11298391222953796, "learning_rate": 2.0710639091064673e-05, "loss": 0.3099, "step": 11044 }, { "epoch": 1.7985995196026545, "grad_norm": 0.14907404780387878, "learning_rate": 2.07059723911388e-05, "loss": 0.3566, "step": 11045 }, { "epoch": 1.7987623661604852, "grad_norm": 0.13323874771595, "learning_rate": 2.0701305845381384e-05, "loss": 0.3394, "step": 11046 }, { "epoch": 1.7989252127183162, "grad_norm": 0.09470875561237335, "learning_rate": 2.0696639453959953e-05, "loss": 0.3376, "step": 11047 }, { "epoch": 1.799088059276147, "grad_norm": 0.10115763545036316, "learning_rate": 2.069197321704207e-05, "loss": 0.3204, "step": 11048 }, { "epoch": 1.7992509058339778, "grad_norm": 0.08257133513689041, "learning_rate": 2.0687307134795258e-05, "loss": 0.2984, "step": 11049 }, { "epoch": 1.7994137523918088, "grad_norm": 0.08017825335264206, "learning_rate": 2.0682641207387038e-05, "loss": 0.3134, "step": 11050 }, { "epoch": 1.7995765989496397, "grad_norm": 0.0741652101278305, "learning_rate": 2.067797543498493e-05, "loss": 0.2931, "step": 11051 }, { "epoch": 1.7997394455074707, "grad_norm": 0.10531003028154373, "learning_rate": 2.0673309817756458e-05, "loss": 0.3315, "step": 11052 }, { "epoch": 1.7999022920653016, "grad_norm": 0.08976273983716965, "learning_rate": 2.0668644355869123e-05, "loss": 0.3034, "step": 11053 }, { "epoch": 1.8000651386231323, "grad_norm": 0.13875412940979004, "learning_rate": 2.0663979049490434e-05, "loss": 0.3335, "step": 11054 }, { "epoch": 1.8002279851809633, "grad_norm": 0.16605180501937866, "learning_rate": 2.065931389878789e-05, "loss": 0.3173, "step": 11055 }, { "epoch": 1.800390831738794, "grad_norm": 0.10028320550918579, "learning_rate": 2.065464890392898e-05, "loss": 0.3052, "step": 11056 }, { "epoch": 1.800553678296625, "grad_norm": 0.1227787658572197, "learning_rate": 2.0649984065081197e-05, "loss": 0.2866, "step": 11057 }, { "epoch": 1.8007165248544559, "grad_norm": 0.11564145982265472, "learning_rate": 2.064531938241201e-05, "loss": 0.3004, "step": 11058 }, { "epoch": 1.8008793714122868, "grad_norm": 0.13614577054977417, "learning_rate": 2.0640654856088913e-05, "loss": 0.3278, "step": 11059 }, { "epoch": 1.8010422179701178, "grad_norm": 0.116203673183918, "learning_rate": 2.0635990486279368e-05, "loss": 0.3038, "step": 11060 }, { "epoch": 1.8012050645279485, "grad_norm": 0.06251232326030731, "learning_rate": 2.063132627315083e-05, "loss": 0.3009, "step": 11061 }, { "epoch": 1.8013679110857794, "grad_norm": 0.12683717906475067, "learning_rate": 2.0626662216870775e-05, "loss": 0.3134, "step": 11062 }, { "epoch": 1.8015307576436101, "grad_norm": 0.13714101910591125, "learning_rate": 2.0621998317606653e-05, "loss": 0.3017, "step": 11063 }, { "epoch": 1.801693604201441, "grad_norm": 0.24660247564315796, "learning_rate": 2.0617334575525907e-05, "loss": 0.3782, "step": 11064 }, { "epoch": 1.801856450759272, "grad_norm": 0.09870605915784836, "learning_rate": 2.061267099079597e-05, "loss": 0.3143, "step": 11065 }, { "epoch": 1.802019297317103, "grad_norm": 0.10828811675310135, "learning_rate": 2.06080075635843e-05, "loss": 0.3034, "step": 11066 }, { "epoch": 1.802182143874934, "grad_norm": 0.13946178555488586, "learning_rate": 2.0603344294058316e-05, "loss": 0.3313, "step": 11067 }, { "epoch": 1.8023449904327649, "grad_norm": 0.1467859148979187, "learning_rate": 2.0598681182385446e-05, "loss": 0.3293, "step": 11068 }, { "epoch": 1.8025078369905956, "grad_norm": 0.1105695590376854, "learning_rate": 2.0594018228733098e-05, "loss": 0.3242, "step": 11069 }, { "epoch": 1.8026706835484265, "grad_norm": 0.09618773311376572, "learning_rate": 2.0589355433268706e-05, "loss": 0.3012, "step": 11070 }, { "epoch": 1.8028335301062572, "grad_norm": 0.205448716878891, "learning_rate": 2.058469279615967e-05, "loss": 0.3482, "step": 11071 }, { "epoch": 1.8029963766640882, "grad_norm": 0.13001926243305206, "learning_rate": 2.0580030317573375e-05, "loss": 0.2856, "step": 11072 }, { "epoch": 1.8031592232219191, "grad_norm": 0.11552916467189789, "learning_rate": 2.057536799767725e-05, "loss": 0.2985, "step": 11073 }, { "epoch": 1.80332206977975, "grad_norm": 0.08298207074403763, "learning_rate": 2.0570705836638664e-05, "loss": 0.2861, "step": 11074 }, { "epoch": 1.803484916337581, "grad_norm": 0.0644087865948677, "learning_rate": 2.0566043834625016e-05, "loss": 0.3409, "step": 11075 }, { "epoch": 1.803647762895412, "grad_norm": 0.18487422168254852, "learning_rate": 2.0561381991803672e-05, "loss": 0.3233, "step": 11076 }, { "epoch": 1.8038106094532427, "grad_norm": 0.07940852642059326, "learning_rate": 2.0556720308342013e-05, "loss": 0.3247, "step": 11077 }, { "epoch": 1.8039734560110736, "grad_norm": 0.1461685448884964, "learning_rate": 2.055205878440741e-05, "loss": 0.3275, "step": 11078 }, { "epoch": 1.8041363025689043, "grad_norm": 0.1187078133225441, "learning_rate": 2.054739742016723e-05, "loss": 0.3231, "step": 11079 }, { "epoch": 1.8042991491267353, "grad_norm": 0.11546722799539566, "learning_rate": 2.0542736215788803e-05, "loss": 0.3102, "step": 11080 }, { "epoch": 1.8044619956845662, "grad_norm": 0.1020355075597763, "learning_rate": 2.053807517143952e-05, "loss": 0.2681, "step": 11081 }, { "epoch": 1.8046248422423972, "grad_norm": 0.08923929184675217, "learning_rate": 2.05334142872867e-05, "loss": 0.2827, "step": 11082 }, { "epoch": 1.804787688800228, "grad_norm": 0.13726457953453064, "learning_rate": 2.052875356349768e-05, "loss": 0.3327, "step": 11083 }, { "epoch": 1.8049505353580588, "grad_norm": 0.15777409076690674, "learning_rate": 2.0524093000239815e-05, "loss": 0.3286, "step": 11084 }, { "epoch": 1.8051133819158898, "grad_norm": 0.11546412855386734, "learning_rate": 2.051943259768042e-05, "loss": 0.3045, "step": 11085 }, { "epoch": 1.8052762284737205, "grad_norm": 0.09979315847158432, "learning_rate": 2.051477235598682e-05, "loss": 0.3105, "step": 11086 }, { "epoch": 1.8054390750315514, "grad_norm": 0.09958980232477188, "learning_rate": 2.0510112275326323e-05, "loss": 0.2833, "step": 11087 }, { "epoch": 1.8056019215893824, "grad_norm": 0.12238934636116028, "learning_rate": 2.0505452355866257e-05, "loss": 0.3141, "step": 11088 }, { "epoch": 1.8057647681472133, "grad_norm": 0.11269489675760269, "learning_rate": 2.0500792597773916e-05, "loss": 0.3292, "step": 11089 }, { "epoch": 1.8059276147050443, "grad_norm": 0.13741062581539154, "learning_rate": 2.0496133001216596e-05, "loss": 0.3352, "step": 11090 }, { "epoch": 1.8060904612628752, "grad_norm": 0.13006125390529633, "learning_rate": 2.0491473566361592e-05, "loss": 0.3214, "step": 11091 }, { "epoch": 1.806253307820706, "grad_norm": 0.1217714473605156, "learning_rate": 2.0486814293376204e-05, "loss": 0.2895, "step": 11092 }, { "epoch": 1.8064161543785369, "grad_norm": 0.09478597342967987, "learning_rate": 2.0482155182427694e-05, "loss": 0.313, "step": 11093 }, { "epoch": 1.8065790009363676, "grad_norm": 0.11673001945018768, "learning_rate": 2.0477496233683353e-05, "loss": 0.3128, "step": 11094 }, { "epoch": 1.8067418474941985, "grad_norm": 0.12342672049999237, "learning_rate": 2.047283744731045e-05, "loss": 0.3033, "step": 11095 }, { "epoch": 1.8069046940520295, "grad_norm": 0.1107315942645073, "learning_rate": 2.0468178823476248e-05, "loss": 0.3106, "step": 11096 }, { "epoch": 1.8070675406098604, "grad_norm": 0.07763306051492691, "learning_rate": 2.0463520362348002e-05, "loss": 0.3347, "step": 11097 }, { "epoch": 1.8072303871676914, "grad_norm": 0.0981142520904541, "learning_rate": 2.0458862064092958e-05, "loss": 0.2912, "step": 11098 }, { "epoch": 1.807393233725522, "grad_norm": 0.11413537710905075, "learning_rate": 2.0454203928878383e-05, "loss": 0.316, "step": 11099 }, { "epoch": 1.807556080283353, "grad_norm": 0.12791822850704193, "learning_rate": 2.044954595687151e-05, "loss": 0.3213, "step": 11100 }, { "epoch": 1.8077189268411837, "grad_norm": 0.10179737210273743, "learning_rate": 2.0444888148239563e-05, "loss": 0.286, "step": 11101 }, { "epoch": 1.8078817733990147, "grad_norm": 0.1276501566171646, "learning_rate": 2.044023050314977e-05, "loss": 0.3226, "step": 11102 }, { "epoch": 1.8080446199568456, "grad_norm": 0.1139887124300003, "learning_rate": 2.0435573021769378e-05, "loss": 0.3317, "step": 11103 }, { "epoch": 1.8082074665146766, "grad_norm": 0.10760431736707687, "learning_rate": 2.0430915704265587e-05, "loss": 0.2842, "step": 11104 }, { "epoch": 1.8083703130725075, "grad_norm": 0.06389840692281723, "learning_rate": 2.0426258550805603e-05, "loss": 0.309, "step": 11105 }, { "epoch": 1.8085331596303384, "grad_norm": 0.1450813114643097, "learning_rate": 2.0421601561556648e-05, "loss": 0.3353, "step": 11106 }, { "epoch": 1.8086960061881692, "grad_norm": 0.30014216899871826, "learning_rate": 2.0416944736685918e-05, "loss": 0.3718, "step": 11107 }, { "epoch": 1.8088588527460001, "grad_norm": 0.07916031777858734, "learning_rate": 2.04122880763606e-05, "loss": 0.2752, "step": 11108 }, { "epoch": 1.8090216993038308, "grad_norm": 0.07317420840263367, "learning_rate": 2.0407631580747875e-05, "loss": 0.2834, "step": 11109 }, { "epoch": 1.8091845458616618, "grad_norm": 0.11752141267061234, "learning_rate": 2.0402975250014947e-05, "loss": 0.3107, "step": 11110 }, { "epoch": 1.8093473924194927, "grad_norm": 0.09154308587312698, "learning_rate": 2.0398319084328977e-05, "loss": 0.325, "step": 11111 }, { "epoch": 1.8095102389773237, "grad_norm": 0.11342409253120422, "learning_rate": 2.0393663083857136e-05, "loss": 0.2834, "step": 11112 }, { "epoch": 1.8096730855351546, "grad_norm": 0.11309675872325897, "learning_rate": 2.0389007248766594e-05, "loss": 0.3438, "step": 11113 }, { "epoch": 1.8098359320929855, "grad_norm": 0.11750422418117523, "learning_rate": 2.0384351579224508e-05, "loss": 0.2851, "step": 11114 }, { "epoch": 1.8099987786508163, "grad_norm": 0.0847533568739891, "learning_rate": 2.037969607539803e-05, "loss": 0.3174, "step": 11115 }, { "epoch": 1.8101616252086472, "grad_norm": 0.09879618883132935, "learning_rate": 2.0375040737454297e-05, "loss": 0.2974, "step": 11116 }, { "epoch": 1.810324471766478, "grad_norm": 0.09549860656261444, "learning_rate": 2.037038556556047e-05, "loss": 0.2688, "step": 11117 }, { "epoch": 1.8104873183243089, "grad_norm": 0.11289171129465103, "learning_rate": 2.0365730559883668e-05, "loss": 0.3709, "step": 11118 }, { "epoch": 1.8106501648821398, "grad_norm": 0.1599288284778595, "learning_rate": 2.0361075720591025e-05, "loss": 0.3132, "step": 11119 }, { "epoch": 1.8108130114399708, "grad_norm": 0.09197554737329483, "learning_rate": 2.035642104784965e-05, "loss": 0.3264, "step": 11120 }, { "epoch": 1.8109758579978017, "grad_norm": 0.09263613075017929, "learning_rate": 2.0351766541826686e-05, "loss": 0.3338, "step": 11121 }, { "epoch": 1.8111387045556324, "grad_norm": 0.13806577026844025, "learning_rate": 2.0347112202689228e-05, "loss": 0.2899, "step": 11122 }, { "epoch": 1.8113015511134634, "grad_norm": 0.06569831818342209, "learning_rate": 2.0342458030604373e-05, "loss": 0.3343, "step": 11123 }, { "epoch": 1.811464397671294, "grad_norm": 0.11337853223085403, "learning_rate": 2.033780402573924e-05, "loss": 0.3226, "step": 11124 }, { "epoch": 1.811627244229125, "grad_norm": 0.0750664696097374, "learning_rate": 2.0333150188260906e-05, "loss": 0.3221, "step": 11125 }, { "epoch": 1.811790090786956, "grad_norm": 0.12617069482803345, "learning_rate": 2.0328496518336468e-05, "loss": 0.3221, "step": 11126 }, { "epoch": 1.811952937344787, "grad_norm": 0.1087617427110672, "learning_rate": 2.0323843016132987e-05, "loss": 0.3214, "step": 11127 }, { "epoch": 1.8121157839026178, "grad_norm": 0.11958545446395874, "learning_rate": 2.0319189681817565e-05, "loss": 0.2943, "step": 11128 }, { "epoch": 1.8122786304604488, "grad_norm": 0.08158410340547562, "learning_rate": 2.0314536515557258e-05, "loss": 0.3, "step": 11129 }, { "epoch": 1.8124414770182795, "grad_norm": 0.061834000051021576, "learning_rate": 2.030988351751912e-05, "loss": 0.3154, "step": 11130 }, { "epoch": 1.8126043235761105, "grad_norm": 0.11173110455274582, "learning_rate": 2.0305230687870217e-05, "loss": 0.3024, "step": 11131 }, { "epoch": 1.8127671701339412, "grad_norm": 0.12163285166025162, "learning_rate": 2.0300578026777598e-05, "loss": 0.3027, "step": 11132 }, { "epoch": 1.8129300166917721, "grad_norm": 0.09007268399000168, "learning_rate": 2.0295925534408314e-05, "loss": 0.2961, "step": 11133 }, { "epoch": 1.813092863249603, "grad_norm": 0.09965348988771439, "learning_rate": 2.029127321092939e-05, "loss": 0.3048, "step": 11134 }, { "epoch": 1.813255709807434, "grad_norm": 0.0855172798037529, "learning_rate": 2.0286621056507866e-05, "loss": 0.3091, "step": 11135 }, { "epoch": 1.813418556365265, "grad_norm": 0.07736945152282715, "learning_rate": 2.0281969071310774e-05, "loss": 0.3106, "step": 11136 }, { "epoch": 1.8135814029230957, "grad_norm": 0.14134173095226288, "learning_rate": 2.027731725550513e-05, "loss": 0.3044, "step": 11137 }, { "epoch": 1.8137442494809266, "grad_norm": 0.15028256177902222, "learning_rate": 2.0272665609257928e-05, "loss": 0.3176, "step": 11138 }, { "epoch": 1.8139070960387573, "grad_norm": 0.1255461424589157, "learning_rate": 2.0268014132736214e-05, "loss": 0.3174, "step": 11139 }, { "epoch": 1.8140699425965883, "grad_norm": 0.14870420098304749, "learning_rate": 2.0263362826106964e-05, "loss": 0.346, "step": 11140 }, { "epoch": 1.8142327891544192, "grad_norm": 0.1252133846282959, "learning_rate": 2.0258711689537182e-05, "loss": 0.3152, "step": 11141 }, { "epoch": 1.8143956357122502, "grad_norm": 0.11000523716211319, "learning_rate": 2.0254060723193844e-05, "loss": 0.291, "step": 11142 }, { "epoch": 1.814558482270081, "grad_norm": 0.11756443232297897, "learning_rate": 2.0249409927243956e-05, "loss": 0.3233, "step": 11143 }, { "epoch": 1.814721328827912, "grad_norm": 0.11683204770088196, "learning_rate": 2.0244759301854486e-05, "loss": 0.2982, "step": 11144 }, { "epoch": 1.8148841753857428, "grad_norm": 0.12131033092737198, "learning_rate": 2.0240108847192394e-05, "loss": 0.282, "step": 11145 }, { "epoch": 1.8150470219435737, "grad_norm": 0.10287060588598251, "learning_rate": 2.0235458563424664e-05, "loss": 0.3066, "step": 11146 }, { "epoch": 1.8152098685014044, "grad_norm": 0.07249169796705246, "learning_rate": 2.0230808450718247e-05, "loss": 0.326, "step": 11147 }, { "epoch": 1.8153727150592354, "grad_norm": 0.13219736516475677, "learning_rate": 2.022615850924009e-05, "loss": 0.2877, "step": 11148 }, { "epoch": 1.8155355616170663, "grad_norm": 0.1245151162147522, "learning_rate": 2.0221508739157147e-05, "loss": 0.3493, "step": 11149 }, { "epoch": 1.8156984081748972, "grad_norm": 0.07171384245157242, "learning_rate": 2.021685914063636e-05, "loss": 0.3346, "step": 11150 }, { "epoch": 1.8158612547327282, "grad_norm": 0.10110219568014145, "learning_rate": 2.0212209713844654e-05, "loss": 0.2959, "step": 11151 }, { "epoch": 1.8160241012905591, "grad_norm": 0.11542710661888123, "learning_rate": 2.0207560458948973e-05, "loss": 0.3156, "step": 11152 }, { "epoch": 1.8161869478483899, "grad_norm": 0.11463016271591187, "learning_rate": 2.0202911376116218e-05, "loss": 0.3184, "step": 11153 }, { "epoch": 1.8163497944062206, "grad_norm": 0.1242787167429924, "learning_rate": 2.0198262465513328e-05, "loss": 0.2993, "step": 11154 }, { "epoch": 1.8165126409640515, "grad_norm": 0.13499939441680908, "learning_rate": 2.01936137273072e-05, "loss": 0.2762, "step": 11155 }, { "epoch": 1.8166754875218825, "grad_norm": 0.07447607070207596, "learning_rate": 2.018896516166473e-05, "loss": 0.3013, "step": 11156 }, { "epoch": 1.8168383340797134, "grad_norm": 0.13407841324806213, "learning_rate": 2.0184316768752835e-05, "loss": 0.3081, "step": 11157 }, { "epoch": 1.8170011806375443, "grad_norm": 0.10558173805475235, "learning_rate": 2.01796685487384e-05, "loss": 0.3621, "step": 11158 }, { "epoch": 1.8171640271953753, "grad_norm": 0.12992702424526215, "learning_rate": 2.0175020501788302e-05, "loss": 0.3305, "step": 11159 }, { "epoch": 1.817326873753206, "grad_norm": 0.11499373614788055, "learning_rate": 2.0170372628069414e-05, "loss": 0.3355, "step": 11160 }, { "epoch": 1.817489720311037, "grad_norm": 0.07809407263994217, "learning_rate": 2.016572492774863e-05, "loss": 0.3152, "step": 11161 }, { "epoch": 1.8176525668688677, "grad_norm": 0.12137865275144577, "learning_rate": 2.0161077400992807e-05, "loss": 0.344, "step": 11162 }, { "epoch": 1.8178154134266986, "grad_norm": 0.11043050140142441, "learning_rate": 2.0156430047968788e-05, "loss": 0.3115, "step": 11163 }, { "epoch": 1.8179782599845296, "grad_norm": 0.12565124034881592, "learning_rate": 2.0151782868843456e-05, "loss": 0.3354, "step": 11164 }, { "epoch": 1.8181411065423605, "grad_norm": 0.07299383729696274, "learning_rate": 2.0147135863783645e-05, "loss": 0.34, "step": 11165 }, { "epoch": 1.8183039531001914, "grad_norm": 0.12888675928115845, "learning_rate": 2.0142489032956194e-05, "loss": 0.3559, "step": 11166 }, { "epoch": 1.8184667996580224, "grad_norm": 0.13338696956634521, "learning_rate": 2.0137842376527928e-05, "loss": 0.3218, "step": 11167 }, { "epoch": 1.818629646215853, "grad_norm": 0.1785985380411148, "learning_rate": 2.01331958946657e-05, "loss": 0.2939, "step": 11168 }, { "epoch": 1.818792492773684, "grad_norm": 0.13528120517730713, "learning_rate": 2.012854958753632e-05, "loss": 0.3148, "step": 11169 }, { "epoch": 1.8189553393315148, "grad_norm": 0.11583591252565384, "learning_rate": 2.0123903455306607e-05, "loss": 0.3498, "step": 11170 }, { "epoch": 1.8191181858893457, "grad_norm": 0.08164218068122864, "learning_rate": 2.011925749814336e-05, "loss": 0.3316, "step": 11171 }, { "epoch": 1.8192810324471766, "grad_norm": 0.08626056462526321, "learning_rate": 2.01146117162134e-05, "loss": 0.3272, "step": 11172 }, { "epoch": 1.8194438790050076, "grad_norm": 0.10571254044771194, "learning_rate": 2.0109966109683515e-05, "loss": 0.3263, "step": 11173 }, { "epoch": 1.8196067255628385, "grad_norm": 0.11744363605976105, "learning_rate": 2.0105320678720498e-05, "loss": 0.3439, "step": 11174 }, { "epoch": 1.8197695721206693, "grad_norm": 0.07819265872240067, "learning_rate": 2.0100675423491127e-05, "loss": 0.3075, "step": 11175 }, { "epoch": 1.8199324186785002, "grad_norm": 0.10064254701137543, "learning_rate": 2.0096030344162196e-05, "loss": 0.3192, "step": 11176 }, { "epoch": 1.820095265236331, "grad_norm": 0.12066532671451569, "learning_rate": 2.0091385440900467e-05, "loss": 0.293, "step": 11177 }, { "epoch": 1.8202581117941619, "grad_norm": 0.10173767805099487, "learning_rate": 2.0086740713872698e-05, "loss": 0.3681, "step": 11178 }, { "epoch": 1.8204209583519928, "grad_norm": 0.07567401230335236, "learning_rate": 2.0082096163245666e-05, "loss": 0.3419, "step": 11179 }, { "epoch": 1.8205838049098237, "grad_norm": 0.12511444091796875, "learning_rate": 2.0077451789186118e-05, "loss": 0.358, "step": 11180 }, { "epoch": 1.8207466514676547, "grad_norm": 0.10856479406356812, "learning_rate": 2.00728075918608e-05, "loss": 0.3501, "step": 11181 }, { "epoch": 1.8209094980254856, "grad_norm": 0.1026381328701973, "learning_rate": 2.006816357143644e-05, "loss": 0.3303, "step": 11182 }, { "epoch": 1.8210723445833163, "grad_norm": 0.11213492602109909, "learning_rate": 2.0063519728079795e-05, "loss": 0.3047, "step": 11183 }, { "epoch": 1.8212351911411473, "grad_norm": 0.13589558005332947, "learning_rate": 2.0058876061957583e-05, "loss": 0.336, "step": 11184 }, { "epoch": 1.821398037698978, "grad_norm": 0.0925951674580574, "learning_rate": 2.0054232573236512e-05, "loss": 0.3196, "step": 11185 }, { "epoch": 1.821560884256809, "grad_norm": 0.09483668208122253, "learning_rate": 2.004958926208332e-05, "loss": 0.3143, "step": 11186 }, { "epoch": 1.82172373081464, "grad_norm": 0.11213813722133636, "learning_rate": 2.0044946128664705e-05, "loss": 0.3305, "step": 11187 }, { "epoch": 1.8218865773724708, "grad_norm": 0.06752968579530716, "learning_rate": 2.0040303173147365e-05, "loss": 0.3184, "step": 11188 }, { "epoch": 1.8220494239303018, "grad_norm": 0.12495134025812149, "learning_rate": 2.0035660395698003e-05, "loss": 0.3344, "step": 11189 }, { "epoch": 1.8222122704881325, "grad_norm": 0.13155820965766907, "learning_rate": 2.003101779648331e-05, "loss": 0.2866, "step": 11190 }, { "epoch": 1.8223751170459634, "grad_norm": 0.11236627399921417, "learning_rate": 2.0026375375669958e-05, "loss": 0.3235, "step": 11191 }, { "epoch": 1.8225379636037942, "grad_norm": 0.15829654037952423, "learning_rate": 2.002173313342464e-05, "loss": 0.3001, "step": 11192 }, { "epoch": 1.822700810161625, "grad_norm": 0.09707247465848923, "learning_rate": 2.0017091069914005e-05, "loss": 0.3447, "step": 11193 }, { "epoch": 1.822863656719456, "grad_norm": 0.07212193310260773, "learning_rate": 2.001244918530474e-05, "loss": 0.3349, "step": 11194 }, { "epoch": 1.823026503277287, "grad_norm": 0.09510642290115356, "learning_rate": 2.0007807479763494e-05, "loss": 0.3304, "step": 11195 }, { "epoch": 1.823189349835118, "grad_norm": 0.11710730940103531, "learning_rate": 2.0003165953456907e-05, "loss": 0.2918, "step": 11196 }, { "epoch": 1.8233521963929489, "grad_norm": 0.12348079681396484, "learning_rate": 1.999852460655164e-05, "loss": 0.3032, "step": 11197 }, { "epoch": 1.8235150429507796, "grad_norm": 0.06980299949645996, "learning_rate": 1.9993883439214327e-05, "loss": 0.2694, "step": 11198 }, { "epoch": 1.8236778895086105, "grad_norm": 0.10385195910930634, "learning_rate": 1.9989242451611598e-05, "loss": 0.3415, "step": 11199 }, { "epoch": 1.8238407360664413, "grad_norm": 0.07934451103210449, "learning_rate": 1.9984601643910067e-05, "loss": 0.3099, "step": 11200 }, { "epoch": 1.8240035826242722, "grad_norm": 0.10937253385782242, "learning_rate": 1.997996101627637e-05, "loss": 0.2912, "step": 11201 }, { "epoch": 1.8241664291821031, "grad_norm": 0.1347101479768753, "learning_rate": 1.997532056887712e-05, "loss": 0.3053, "step": 11202 }, { "epoch": 1.824329275739934, "grad_norm": 0.1349029242992401, "learning_rate": 1.9970680301878907e-05, "loss": 0.2932, "step": 11203 }, { "epoch": 1.824492122297765, "grad_norm": 0.1113327369093895, "learning_rate": 1.9966040215448337e-05, "loss": 0.2843, "step": 11204 }, { "epoch": 1.824654968855596, "grad_norm": 0.09593656659126282, "learning_rate": 1.9961400309752012e-05, "loss": 0.3045, "step": 11205 }, { "epoch": 1.8248178154134267, "grad_norm": 0.1263638734817505, "learning_rate": 1.9956760584956514e-05, "loss": 0.3195, "step": 11206 }, { "epoch": 1.8249806619712576, "grad_norm": 0.12447168678045273, "learning_rate": 1.995212104122841e-05, "loss": 0.3532, "step": 11207 }, { "epoch": 1.8251435085290884, "grad_norm": 0.059208352118730545, "learning_rate": 1.9947481678734294e-05, "loss": 0.306, "step": 11208 }, { "epoch": 1.8253063550869193, "grad_norm": 0.11113002896308899, "learning_rate": 1.9942842497640716e-05, "loss": 0.3096, "step": 11209 }, { "epoch": 1.8254692016447502, "grad_norm": 0.12833188474178314, "learning_rate": 1.9938203498114252e-05, "loss": 0.3226, "step": 11210 }, { "epoch": 1.8256320482025812, "grad_norm": 0.0762670636177063, "learning_rate": 1.9933564680321444e-05, "loss": 0.3381, "step": 11211 }, { "epoch": 1.8257948947604121, "grad_norm": 0.10318311303853989, "learning_rate": 1.992892604442884e-05, "loss": 0.2998, "step": 11212 }, { "epoch": 1.8259577413182428, "grad_norm": 0.16036418080329895, "learning_rate": 1.9924287590602992e-05, "loss": 0.3552, "step": 11213 }, { "epoch": 1.8261205878760738, "grad_norm": 0.10959983617067337, "learning_rate": 1.9919649319010415e-05, "loss": 0.3177, "step": 11214 }, { "epoch": 1.8262834344339045, "grad_norm": 0.058233752846717834, "learning_rate": 1.991501122981766e-05, "loss": 0.3306, "step": 11215 }, { "epoch": 1.8264462809917354, "grad_norm": 0.10418707132339478, "learning_rate": 1.9910373323191233e-05, "loss": 0.306, "step": 11216 }, { "epoch": 1.8266091275495664, "grad_norm": 0.09192999452352524, "learning_rate": 1.9905735599297657e-05, "loss": 0.2932, "step": 11217 }, { "epoch": 1.8267719741073973, "grad_norm": 0.12739954888820648, "learning_rate": 1.9901098058303425e-05, "loss": 0.3183, "step": 11218 }, { "epoch": 1.8269348206652283, "grad_norm": 0.0768040344119072, "learning_rate": 1.989646070037506e-05, "loss": 0.3147, "step": 11219 }, { "epoch": 1.8270976672230592, "grad_norm": 0.18039388954639435, "learning_rate": 1.9891823525679046e-05, "loss": 0.2963, "step": 11220 }, { "epoch": 1.82726051378089, "grad_norm": 0.11227302998304367, "learning_rate": 1.988718653438187e-05, "loss": 0.317, "step": 11221 }, { "epoch": 1.8274233603387209, "grad_norm": 0.14048810303211212, "learning_rate": 1.9882549726650008e-05, "loss": 0.3225, "step": 11222 }, { "epoch": 1.8275862068965516, "grad_norm": 0.11955950409173965, "learning_rate": 1.9877913102649955e-05, "loss": 0.316, "step": 11223 }, { "epoch": 1.8277490534543825, "grad_norm": 0.07850489765405655, "learning_rate": 1.987327666254816e-05, "loss": 0.3116, "step": 11224 }, { "epoch": 1.8279119000122135, "grad_norm": 0.16585922241210938, "learning_rate": 1.9868640406511092e-05, "loss": 0.3307, "step": 11225 }, { "epoch": 1.8280747465700444, "grad_norm": 0.1135687455534935, "learning_rate": 1.9864004334705214e-05, "loss": 0.3028, "step": 11226 }, { "epoch": 1.8282375931278754, "grad_norm": 0.09936957061290741, "learning_rate": 1.9859368447296965e-05, "loss": 0.2694, "step": 11227 }, { "epoch": 1.828400439685706, "grad_norm": 0.12309364974498749, "learning_rate": 1.985473274445279e-05, "loss": 0.3081, "step": 11228 }, { "epoch": 1.828563286243537, "grad_norm": 0.11819988489151001, "learning_rate": 1.9850097226339123e-05, "loss": 0.3103, "step": 11229 }, { "epoch": 1.8287261328013678, "grad_norm": 0.09819314628839493, "learning_rate": 1.9845461893122397e-05, "loss": 0.3312, "step": 11230 }, { "epoch": 1.8288889793591987, "grad_norm": 0.10243344306945801, "learning_rate": 1.9840826744969034e-05, "loss": 0.3139, "step": 11231 }, { "epoch": 1.8290518259170296, "grad_norm": 0.10353469103574753, "learning_rate": 1.9836191782045453e-05, "loss": 0.2742, "step": 11232 }, { "epoch": 1.8292146724748606, "grad_norm": 0.08858813345432281, "learning_rate": 1.983155700451804e-05, "loss": 0.3344, "step": 11233 }, { "epoch": 1.8293775190326915, "grad_norm": 0.13857495784759521, "learning_rate": 1.9826922412553233e-05, "loss": 0.2892, "step": 11234 }, { "epoch": 1.8295403655905225, "grad_norm": 0.13840936124324799, "learning_rate": 1.9822288006317408e-05, "loss": 0.3197, "step": 11235 }, { "epoch": 1.8297032121483532, "grad_norm": 0.11247801780700684, "learning_rate": 1.981765378597695e-05, "loss": 0.3371, "step": 11236 }, { "epoch": 1.8298660587061841, "grad_norm": 0.10092712938785553, "learning_rate": 1.9813019751698254e-05, "loss": 0.2912, "step": 11237 }, { "epoch": 1.8300289052640148, "grad_norm": 0.1292242705821991, "learning_rate": 1.9808385903647688e-05, "loss": 0.3082, "step": 11238 }, { "epoch": 1.8301917518218458, "grad_norm": 0.13076315820217133, "learning_rate": 1.9803752241991625e-05, "loss": 0.3115, "step": 11239 }, { "epoch": 1.8303545983796767, "grad_norm": 0.1375250369310379, "learning_rate": 1.9799118766896415e-05, "loss": 0.323, "step": 11240 }, { "epoch": 1.8305174449375077, "grad_norm": 0.10917460173368454, "learning_rate": 1.979448547852843e-05, "loss": 0.3126, "step": 11241 }, { "epoch": 1.8306802914953386, "grad_norm": 0.13686268031597137, "learning_rate": 1.978985237705402e-05, "loss": 0.3314, "step": 11242 }, { "epoch": 1.8308431380531696, "grad_norm": 0.10595690459012985, "learning_rate": 1.9785219462639513e-05, "loss": 0.3085, "step": 11243 }, { "epoch": 1.8310059846110003, "grad_norm": 0.09697506576776505, "learning_rate": 1.9780586735451244e-05, "loss": 0.3229, "step": 11244 }, { "epoch": 1.8311688311688312, "grad_norm": 0.10081420093774796, "learning_rate": 1.9775954195655554e-05, "loss": 0.2694, "step": 11245 }, { "epoch": 1.831331677726662, "grad_norm": 0.0782802626490593, "learning_rate": 1.9771321843418762e-05, "loss": 0.3416, "step": 11246 }, { "epoch": 1.8314945242844929, "grad_norm": 0.08908490091562271, "learning_rate": 1.9766689678907178e-05, "loss": 0.3004, "step": 11247 }, { "epoch": 1.8316573708423238, "grad_norm": 0.0904770940542221, "learning_rate": 1.976205770228712e-05, "loss": 0.3378, "step": 11248 }, { "epoch": 1.8318202174001548, "grad_norm": 0.13141068816184998, "learning_rate": 1.975742591372488e-05, "loss": 0.3106, "step": 11249 }, { "epoch": 1.8319830639579857, "grad_norm": 0.10856854915618896, "learning_rate": 1.9752794313386756e-05, "loss": 0.3398, "step": 11250 }, { "epoch": 1.8321459105158164, "grad_norm": 0.11116749793291092, "learning_rate": 1.9748162901439033e-05, "loss": 0.3256, "step": 11251 }, { "epoch": 1.8323087570736474, "grad_norm": 0.15020853281021118, "learning_rate": 1.9743531678048004e-05, "loss": 0.3607, "step": 11252 }, { "epoch": 1.832471603631478, "grad_norm": 0.10778902471065521, "learning_rate": 1.973890064337994e-05, "loss": 0.3027, "step": 11253 }, { "epoch": 1.832634450189309, "grad_norm": 0.14554600417613983, "learning_rate": 1.97342697976011e-05, "loss": 0.3246, "step": 11254 }, { "epoch": 1.83279729674714, "grad_norm": 0.09145340323448181, "learning_rate": 1.972963914087775e-05, "loss": 0.3101, "step": 11255 }, { "epoch": 1.832960143304971, "grad_norm": 0.10594069212675095, "learning_rate": 1.9725008673376147e-05, "loss": 0.2965, "step": 11256 }, { "epoch": 1.8331229898628019, "grad_norm": 0.11419224739074707, "learning_rate": 1.972037839526254e-05, "loss": 0.2833, "step": 11257 }, { "epoch": 1.8332858364206328, "grad_norm": 0.10423968732357025, "learning_rate": 1.9715748306703156e-05, "loss": 0.2617, "step": 11258 }, { "epoch": 1.8334486829784635, "grad_norm": 0.11461272835731506, "learning_rate": 1.9711118407864248e-05, "loss": 0.2672, "step": 11259 }, { "epoch": 1.8336115295362945, "grad_norm": 0.12563438713550568, "learning_rate": 1.9706488698912037e-05, "loss": 0.3223, "step": 11260 }, { "epoch": 1.8337743760941252, "grad_norm": 0.15868739783763885, "learning_rate": 1.9701859180012743e-05, "loss": 0.3351, "step": 11261 }, { "epoch": 1.8339372226519561, "grad_norm": 0.13546466827392578, "learning_rate": 1.9697229851332565e-05, "loss": 0.31, "step": 11262 }, { "epoch": 1.834100069209787, "grad_norm": 0.10341916233301163, "learning_rate": 1.9692600713037734e-05, "loss": 0.3186, "step": 11263 }, { "epoch": 1.834262915767618, "grad_norm": 0.13326455652713776, "learning_rate": 1.968797176529444e-05, "loss": 0.3169, "step": 11264 }, { "epoch": 1.834425762325449, "grad_norm": 0.10693047940731049, "learning_rate": 1.9683343008268864e-05, "loss": 0.3342, "step": 11265 }, { "epoch": 1.8345886088832797, "grad_norm": 0.1683693826198578, "learning_rate": 1.9678714442127214e-05, "loss": 0.3597, "step": 11266 }, { "epoch": 1.8347514554411106, "grad_norm": 0.13411444425582886, "learning_rate": 1.967408606703565e-05, "loss": 0.3257, "step": 11267 }, { "epoch": 1.8349143019989413, "grad_norm": 0.09895365685224533, "learning_rate": 1.9669457883160364e-05, "loss": 0.2721, "step": 11268 }, { "epoch": 1.8350771485567723, "grad_norm": 0.1128535196185112, "learning_rate": 1.9664829890667502e-05, "loss": 0.3192, "step": 11269 }, { "epoch": 1.8352399951146032, "grad_norm": 0.0986553281545639, "learning_rate": 1.9660202089723233e-05, "loss": 0.3258, "step": 11270 }, { "epoch": 1.8354028416724342, "grad_norm": 0.15626509487628937, "learning_rate": 1.965557448049371e-05, "loss": 0.3408, "step": 11271 }, { "epoch": 1.8355656882302651, "grad_norm": 0.13362643122673035, "learning_rate": 1.9650947063145077e-05, "loss": 0.3206, "step": 11272 }, { "epoch": 1.835728534788096, "grad_norm": 0.09365861862897873, "learning_rate": 1.964631983784346e-05, "loss": 0.3177, "step": 11273 }, { "epoch": 1.8358913813459268, "grad_norm": 0.11726247519254684, "learning_rate": 1.9641692804755012e-05, "loss": 0.3359, "step": 11274 }, { "epoch": 1.8360542279037577, "grad_norm": 0.10363368690013885, "learning_rate": 1.9637065964045846e-05, "loss": 0.3076, "step": 11275 }, { "epoch": 1.8362170744615884, "grad_norm": 0.13207171857357025, "learning_rate": 1.9632439315882068e-05, "loss": 0.3347, "step": 11276 }, { "epoch": 1.8363799210194194, "grad_norm": 0.0971779003739357, "learning_rate": 1.962781286042981e-05, "loss": 0.3191, "step": 11277 }, { "epoch": 1.8365427675772503, "grad_norm": 0.12998203933238983, "learning_rate": 1.962318659785517e-05, "loss": 0.3036, "step": 11278 }, { "epoch": 1.8367056141350813, "grad_norm": 0.1308251917362213, "learning_rate": 1.9618560528324237e-05, "loss": 0.3266, "step": 11279 }, { "epoch": 1.8368684606929122, "grad_norm": 0.10943474620580673, "learning_rate": 1.9613934652003098e-05, "loss": 0.3341, "step": 11280 }, { "epoch": 1.8370313072507432, "grad_norm": 0.11926770210266113, "learning_rate": 1.9609308969057846e-05, "loss": 0.3648, "step": 11281 }, { "epoch": 1.8371941538085739, "grad_norm": 0.11557544767856598, "learning_rate": 1.9604683479654557e-05, "loss": 0.318, "step": 11282 }, { "epoch": 1.8373570003664046, "grad_norm": 0.12689507007598877, "learning_rate": 1.960005818395929e-05, "loss": 0.3313, "step": 11283 }, { "epoch": 1.8375198469242355, "grad_norm": 0.12932142615318298, "learning_rate": 1.9595433082138112e-05, "loss": 0.2914, "step": 11284 }, { "epoch": 1.8376826934820665, "grad_norm": 0.10174798220396042, "learning_rate": 1.9590808174357088e-05, "loss": 0.34, "step": 11285 }, { "epoch": 1.8378455400398974, "grad_norm": 0.1363050639629364, "learning_rate": 1.9586183460782246e-05, "loss": 0.3335, "step": 11286 }, { "epoch": 1.8380083865977284, "grad_norm": 0.07189270853996277, "learning_rate": 1.9581558941579643e-05, "loss": 0.3598, "step": 11287 }, { "epoch": 1.8381712331555593, "grad_norm": 0.12079538404941559, "learning_rate": 1.9576934616915304e-05, "loss": 0.3066, "step": 11288 }, { "epoch": 1.83833407971339, "grad_norm": 0.124444380402565, "learning_rate": 1.9572310486955267e-05, "loss": 0.3145, "step": 11289 }, { "epoch": 1.838496926271221, "grad_norm": 0.15804579854011536, "learning_rate": 1.9567686551865544e-05, "loss": 0.3356, "step": 11290 }, { "epoch": 1.8386597728290517, "grad_norm": 0.09253565967082977, "learning_rate": 1.956306281181214e-05, "loss": 0.3396, "step": 11291 }, { "epoch": 1.8388226193868826, "grad_norm": 0.08676803857088089, "learning_rate": 1.955843926696108e-05, "loss": 0.3258, "step": 11292 }, { "epoch": 1.8389854659447136, "grad_norm": 0.14836634695529938, "learning_rate": 1.9553815917478357e-05, "loss": 0.3135, "step": 11293 }, { "epoch": 1.8391483125025445, "grad_norm": 0.08548245579004288, "learning_rate": 1.954919276352996e-05, "loss": 0.2959, "step": 11294 }, { "epoch": 1.8393111590603755, "grad_norm": 0.10578808933496475, "learning_rate": 1.954456980528186e-05, "loss": 0.3155, "step": 11295 }, { "epoch": 1.8394740056182064, "grad_norm": 0.08351442217826843, "learning_rate": 1.9539947042900058e-05, "loss": 0.3168, "step": 11296 }, { "epoch": 1.8396368521760371, "grad_norm": 0.14793676137924194, "learning_rate": 1.953532447655052e-05, "loss": 0.3589, "step": 11297 }, { "epoch": 1.839799698733868, "grad_norm": 0.07945624738931656, "learning_rate": 1.9530702106399195e-05, "loss": 0.295, "step": 11298 }, { "epoch": 1.8399625452916988, "grad_norm": 0.13442018628120422, "learning_rate": 1.952607993261206e-05, "loss": 0.3243, "step": 11299 }, { "epoch": 1.8401253918495297, "grad_norm": 0.08796336501836777, "learning_rate": 1.9521457955355056e-05, "loss": 0.3256, "step": 11300 }, { "epoch": 1.8402882384073607, "grad_norm": 0.14530996978282928, "learning_rate": 1.9516836174794123e-05, "loss": 0.3301, "step": 11301 }, { "epoch": 1.8404510849651916, "grad_norm": 0.11907077580690384, "learning_rate": 1.9512214591095194e-05, "loss": 0.332, "step": 11302 }, { "epoch": 1.8406139315230226, "grad_norm": 0.10350676625967026, "learning_rate": 1.950759320442421e-05, "loss": 0.3179, "step": 11303 }, { "epoch": 1.8407767780808533, "grad_norm": 0.09368498623371124, "learning_rate": 1.950297201494708e-05, "loss": 0.312, "step": 11304 }, { "epoch": 1.8409396246386842, "grad_norm": 0.0963183343410492, "learning_rate": 1.9498351022829732e-05, "loss": 0.3153, "step": 11305 }, { "epoch": 1.841102471196515, "grad_norm": 0.0937325730919838, "learning_rate": 1.9493730228238057e-05, "loss": 0.3009, "step": 11306 }, { "epoch": 1.8412653177543459, "grad_norm": 0.12420285493135452, "learning_rate": 1.9489109631337966e-05, "loss": 0.3244, "step": 11307 }, { "epoch": 1.8414281643121768, "grad_norm": 0.10104110836982727, "learning_rate": 1.9484489232295356e-05, "loss": 0.3313, "step": 11308 }, { "epoch": 1.8415910108700078, "grad_norm": 0.15851479768753052, "learning_rate": 1.9479869031276104e-05, "loss": 0.318, "step": 11309 }, { "epoch": 1.8417538574278387, "grad_norm": 0.1325390636920929, "learning_rate": 1.947524902844609e-05, "loss": 0.333, "step": 11310 }, { "epoch": 1.8419167039856696, "grad_norm": 0.11737120151519775, "learning_rate": 1.9470629223971194e-05, "loss": 0.3302, "step": 11311 }, { "epoch": 1.8420795505435004, "grad_norm": 0.08740176260471344, "learning_rate": 1.9466009618017276e-05, "loss": 0.3626, "step": 11312 }, { "epoch": 1.8422423971013313, "grad_norm": 0.1280147284269333, "learning_rate": 1.946139021075018e-05, "loss": 0.3029, "step": 11313 }, { "epoch": 1.842405243659162, "grad_norm": 0.14434044063091278, "learning_rate": 1.9456771002335782e-05, "loss": 0.3477, "step": 11314 }, { "epoch": 1.842568090216993, "grad_norm": 0.1153305172920227, "learning_rate": 1.945215199293991e-05, "loss": 0.2797, "step": 11315 }, { "epoch": 1.842730936774824, "grad_norm": 0.09292026609182358, "learning_rate": 1.9447533182728402e-05, "loss": 0.3248, "step": 11316 }, { "epoch": 1.8428937833326549, "grad_norm": 0.11160574853420258, "learning_rate": 1.944291457186709e-05, "loss": 0.3016, "step": 11317 }, { "epoch": 1.8430566298904858, "grad_norm": 0.12810508906841278, "learning_rate": 1.9438296160521796e-05, "loss": 0.268, "step": 11318 }, { "epoch": 1.8432194764483165, "grad_norm": 0.1108565479516983, "learning_rate": 1.9433677948858332e-05, "loss": 0.31, "step": 11319 }, { "epoch": 1.8433823230061475, "grad_norm": 0.11718548089265823, "learning_rate": 1.94290599370425e-05, "loss": 0.3061, "step": 11320 }, { "epoch": 1.8435451695639782, "grad_norm": 0.12298724055290222, "learning_rate": 1.9424442125240118e-05, "loss": 0.3318, "step": 11321 }, { "epoch": 1.8437080161218091, "grad_norm": 0.0758819431066513, "learning_rate": 1.9419824513616966e-05, "loss": 0.3048, "step": 11322 }, { "epoch": 1.84387086267964, "grad_norm": 0.1191706508398056, "learning_rate": 1.9415207102338828e-05, "loss": 0.3106, "step": 11323 }, { "epoch": 1.844033709237471, "grad_norm": 0.11894339323043823, "learning_rate": 1.9410589891571488e-05, "loss": 0.3167, "step": 11324 }, { "epoch": 1.844196555795302, "grad_norm": 0.1336585134267807, "learning_rate": 1.940597288148072e-05, "loss": 0.3255, "step": 11325 }, { "epoch": 1.844359402353133, "grad_norm": 0.09051267057657242, "learning_rate": 1.9401356072232283e-05, "loss": 0.3244, "step": 11326 }, { "epoch": 1.8445222489109636, "grad_norm": 0.107132688164711, "learning_rate": 1.9396739463991936e-05, "loss": 0.331, "step": 11327 }, { "epoch": 1.8446850954687946, "grad_norm": 0.11479759961366653, "learning_rate": 1.9392123056925432e-05, "loss": 0.3082, "step": 11328 }, { "epoch": 1.8448479420266253, "grad_norm": 0.13218961656093597, "learning_rate": 1.9387506851198513e-05, "loss": 0.3011, "step": 11329 }, { "epoch": 1.8450107885844562, "grad_norm": 0.07576671987771988, "learning_rate": 1.9382890846976917e-05, "loss": 0.3311, "step": 11330 }, { "epoch": 1.8451736351422872, "grad_norm": 0.09955229610204697, "learning_rate": 1.937827504442636e-05, "loss": 0.2675, "step": 11331 }, { "epoch": 1.845336481700118, "grad_norm": 0.10316376388072968, "learning_rate": 1.9373659443712577e-05, "loss": 0.3076, "step": 11332 }, { "epoch": 1.845499328257949, "grad_norm": 0.14014697074890137, "learning_rate": 1.936904404500128e-05, "loss": 0.3135, "step": 11333 }, { "epoch": 1.84566217481578, "grad_norm": 0.11528614908456802, "learning_rate": 1.9364428848458175e-05, "loss": 0.329, "step": 11334 }, { "epoch": 1.8458250213736107, "grad_norm": 0.12660475075244904, "learning_rate": 1.9359813854248942e-05, "loss": 0.298, "step": 11335 }, { "epoch": 1.8459878679314417, "grad_norm": 0.16794270277023315, "learning_rate": 1.9355199062539307e-05, "loss": 0.3346, "step": 11336 }, { "epoch": 1.8461507144892724, "grad_norm": 0.146750807762146, "learning_rate": 1.9350584473494935e-05, "loss": 0.3033, "step": 11337 }, { "epoch": 1.8463135610471033, "grad_norm": 0.16034221649169922, "learning_rate": 1.9345970087281496e-05, "loss": 0.3099, "step": 11338 }, { "epoch": 1.8464764076049343, "grad_norm": 0.14567431807518005, "learning_rate": 1.9341355904064683e-05, "loss": 0.2932, "step": 11339 }, { "epoch": 1.8466392541627652, "grad_norm": 0.11151367425918579, "learning_rate": 1.9336741924010146e-05, "loss": 0.3049, "step": 11340 }, { "epoch": 1.8468021007205961, "grad_norm": 0.11343557387590408, "learning_rate": 1.933212814728354e-05, "loss": 0.3221, "step": 11341 }, { "epoch": 1.8469649472784269, "grad_norm": 0.1517791748046875, "learning_rate": 1.9327514574050507e-05, "loss": 0.3415, "step": 11342 }, { "epoch": 1.8471277938362578, "grad_norm": 0.1186985895037651, "learning_rate": 1.9322901204476705e-05, "loss": 0.279, "step": 11343 }, { "epoch": 1.8472906403940885, "grad_norm": 0.10019218921661377, "learning_rate": 1.9318288038727754e-05, "loss": 0.3222, "step": 11344 }, { "epoch": 1.8474534869519195, "grad_norm": 0.18078601360321045, "learning_rate": 1.931367507696929e-05, "loss": 0.336, "step": 11345 }, { "epoch": 1.8476163335097504, "grad_norm": 0.12888525426387787, "learning_rate": 1.9309062319366923e-05, "loss": 0.3058, "step": 11346 }, { "epoch": 1.8477791800675814, "grad_norm": 0.11689432710409164, "learning_rate": 1.9304449766086266e-05, "loss": 0.3068, "step": 11347 }, { "epoch": 1.8479420266254123, "grad_norm": 0.11149542033672333, "learning_rate": 1.9299837417292935e-05, "loss": 0.3403, "step": 11348 }, { "epoch": 1.8481048731832432, "grad_norm": 0.12382252514362335, "learning_rate": 1.9295225273152506e-05, "loss": 0.3387, "step": 11349 }, { "epoch": 1.848267719741074, "grad_norm": 0.0946303978562355, "learning_rate": 1.9290613333830596e-05, "loss": 0.3419, "step": 11350 }, { "epoch": 1.848430566298905, "grad_norm": 0.10343471169471741, "learning_rate": 1.928600159949277e-05, "loss": 0.2999, "step": 11351 }, { "epoch": 1.8485934128567356, "grad_norm": 0.14849646389484406, "learning_rate": 1.928139007030461e-05, "loss": 0.2972, "step": 11352 }, { "epoch": 1.8487562594145666, "grad_norm": 0.1158239021897316, "learning_rate": 1.9276778746431665e-05, "loss": 0.325, "step": 11353 }, { "epoch": 1.8489191059723975, "grad_norm": 0.09875929355621338, "learning_rate": 1.9272167628039522e-05, "loss": 0.3469, "step": 11354 }, { "epoch": 1.8490819525302284, "grad_norm": 0.10642834007740021, "learning_rate": 1.926755671529372e-05, "loss": 0.3589, "step": 11355 }, { "epoch": 1.8492447990880594, "grad_norm": 0.10033227503299713, "learning_rate": 1.9262946008359813e-05, "loss": 0.2767, "step": 11356 }, { "epoch": 1.84940764564589, "grad_norm": 0.12154487520456314, "learning_rate": 1.9258335507403317e-05, "loss": 0.3283, "step": 11357 }, { "epoch": 1.849570492203721, "grad_norm": 0.10623877495527267, "learning_rate": 1.925372521258979e-05, "loss": 0.2901, "step": 11358 }, { "epoch": 1.8497333387615518, "grad_norm": 0.08859894424676895, "learning_rate": 1.9249115124084746e-05, "loss": 0.3428, "step": 11359 }, { "epoch": 1.8498961853193827, "grad_norm": 0.09825434535741806, "learning_rate": 1.924450524205369e-05, "loss": 0.3066, "step": 11360 }, { "epoch": 1.8500590318772137, "grad_norm": 0.14396139979362488, "learning_rate": 1.9239895566662146e-05, "loss": 0.3667, "step": 11361 }, { "epoch": 1.8502218784350446, "grad_norm": 0.11631803214550018, "learning_rate": 1.923528609807561e-05, "loss": 0.2796, "step": 11362 }, { "epoch": 1.8503847249928755, "grad_norm": 0.11513169854879379, "learning_rate": 1.923067683645957e-05, "loss": 0.2876, "step": 11363 }, { "epoch": 1.8505475715507065, "grad_norm": 0.1876125931739807, "learning_rate": 1.9226067781979518e-05, "loss": 0.3646, "step": 11364 }, { "epoch": 1.8507104181085372, "grad_norm": 0.07534223049879074, "learning_rate": 1.9221458934800933e-05, "loss": 0.3186, "step": 11365 }, { "epoch": 1.8508732646663681, "grad_norm": 0.11090055853128433, "learning_rate": 1.921685029508929e-05, "loss": 0.2839, "step": 11366 }, { "epoch": 1.8510361112241989, "grad_norm": 0.0725753903388977, "learning_rate": 1.921224186301004e-05, "loss": 0.3009, "step": 11367 }, { "epoch": 1.8511989577820298, "grad_norm": 0.10595377534627914, "learning_rate": 1.920763363872865e-05, "loss": 0.286, "step": 11368 }, { "epoch": 1.8513618043398608, "grad_norm": 0.09388858824968338, "learning_rate": 1.920302562241057e-05, "loss": 0.3046, "step": 11369 }, { "epoch": 1.8515246508976917, "grad_norm": 0.09749888628721237, "learning_rate": 1.919841781422124e-05, "loss": 0.3319, "step": 11370 }, { "epoch": 1.8516874974555226, "grad_norm": 0.07464020699262619, "learning_rate": 1.919381021432608e-05, "loss": 0.3016, "step": 11371 }, { "epoch": 1.8518503440133536, "grad_norm": 0.09365838021039963, "learning_rate": 1.918920282289054e-05, "loss": 0.3268, "step": 11372 }, { "epoch": 1.8520131905711843, "grad_norm": 0.0953272134065628, "learning_rate": 1.9184595640080032e-05, "loss": 0.3107, "step": 11373 }, { "epoch": 1.8521760371290152, "grad_norm": 0.08559931814670563, "learning_rate": 1.917998866605996e-05, "loss": 0.301, "step": 11374 }, { "epoch": 1.852338883686846, "grad_norm": 0.11187154799699783, "learning_rate": 1.9175381900995723e-05, "loss": 0.3572, "step": 11375 }, { "epoch": 1.852501730244677, "grad_norm": 0.09602232277393341, "learning_rate": 1.917077534505274e-05, "loss": 0.2805, "step": 11376 }, { "epoch": 1.8526645768025078, "grad_norm": 0.07776059955358505, "learning_rate": 1.9166168998396382e-05, "loss": 0.3223, "step": 11377 }, { "epoch": 1.8528274233603388, "grad_norm": 0.10416052490472794, "learning_rate": 1.9161562861192027e-05, "loss": 0.3001, "step": 11378 }, { "epoch": 1.8529902699181697, "grad_norm": 0.06721233576536179, "learning_rate": 1.915695693360507e-05, "loss": 0.3038, "step": 11379 }, { "epoch": 1.8531531164760005, "grad_norm": 0.11628817766904831, "learning_rate": 1.9152351215800863e-05, "loss": 0.3261, "step": 11380 }, { "epoch": 1.8533159630338314, "grad_norm": 0.07603810727596283, "learning_rate": 1.9147745707944763e-05, "loss": 0.3111, "step": 11381 }, { "epoch": 1.8534788095916621, "grad_norm": 0.10586507618427277, "learning_rate": 1.9143140410202124e-05, "loss": 0.3201, "step": 11382 }, { "epoch": 1.853641656149493, "grad_norm": 0.14028315246105194, "learning_rate": 1.9138535322738298e-05, "loss": 0.3114, "step": 11383 }, { "epoch": 1.853804502707324, "grad_norm": 0.10981665551662445, "learning_rate": 1.9133930445718612e-05, "loss": 0.3272, "step": 11384 }, { "epoch": 1.853967349265155, "grad_norm": 0.10305441915988922, "learning_rate": 1.9129325779308406e-05, "loss": 0.3208, "step": 11385 }, { "epoch": 1.8541301958229859, "grad_norm": 0.09778366982936859, "learning_rate": 1.9124721323672977e-05, "loss": 0.3093, "step": 11386 }, { "epoch": 1.8542930423808168, "grad_norm": 0.16054409742355347, "learning_rate": 1.9120117078977667e-05, "loss": 0.3206, "step": 11387 }, { "epoch": 1.8544558889386475, "grad_norm": 0.13223034143447876, "learning_rate": 1.911551304538777e-05, "loss": 0.2908, "step": 11388 }, { "epoch": 1.8546187354964785, "grad_norm": 0.09307289123535156, "learning_rate": 1.9110909223068578e-05, "loss": 0.3103, "step": 11389 }, { "epoch": 1.8547815820543092, "grad_norm": 0.11908946931362152, "learning_rate": 1.91063056121854e-05, "loss": 0.3115, "step": 11390 }, { "epoch": 1.8549444286121402, "grad_norm": 0.10396520048379898, "learning_rate": 1.9101702212903506e-05, "loss": 0.3004, "step": 11391 }, { "epoch": 1.855107275169971, "grad_norm": 0.11193568259477615, "learning_rate": 1.9097099025388177e-05, "loss": 0.3021, "step": 11392 }, { "epoch": 1.855270121727802, "grad_norm": 0.09017781168222427, "learning_rate": 1.9092496049804666e-05, "loss": 0.324, "step": 11393 }, { "epoch": 1.855432968285633, "grad_norm": 0.09831084311008453, "learning_rate": 1.9087893286318253e-05, "loss": 0.3105, "step": 11394 }, { "epoch": 1.8555958148434637, "grad_norm": 0.07819647341966629, "learning_rate": 1.908329073509419e-05, "loss": 0.2807, "step": 11395 }, { "epoch": 1.8557586614012946, "grad_norm": 0.08524835109710693, "learning_rate": 1.907868839629771e-05, "loss": 0.3307, "step": 11396 }, { "epoch": 1.8559215079591254, "grad_norm": 0.08805578202009201, "learning_rate": 1.9074086270094056e-05, "loss": 0.3073, "step": 11397 }, { "epoch": 1.8560843545169563, "grad_norm": 0.11291658878326416, "learning_rate": 1.906948435664846e-05, "loss": 0.2891, "step": 11398 }, { "epoch": 1.8562472010747872, "grad_norm": 0.1092100590467453, "learning_rate": 1.9064882656126153e-05, "loss": 0.3332, "step": 11399 }, { "epoch": 1.8564100476326182, "grad_norm": 0.0867282971739769, "learning_rate": 1.9060281168692323e-05, "loss": 0.3403, "step": 11400 }, { "epoch": 1.8565728941904491, "grad_norm": 0.11533015966415405, "learning_rate": 1.905567989451221e-05, "loss": 0.333, "step": 11401 }, { "epoch": 1.85673574074828, "grad_norm": 0.16442923247814178, "learning_rate": 1.905107883375099e-05, "loss": 0.3147, "step": 11402 }, { "epoch": 1.8568985873061108, "grad_norm": 0.10714744031429291, "learning_rate": 1.904647798657387e-05, "loss": 0.3156, "step": 11403 }, { "epoch": 1.8570614338639417, "grad_norm": 0.09153550863265991, "learning_rate": 1.9041877353146022e-05, "loss": 0.3178, "step": 11404 }, { "epoch": 1.8572242804217725, "grad_norm": 0.10949266701936722, "learning_rate": 1.903727693363263e-05, "loss": 0.3199, "step": 11405 }, { "epoch": 1.8573871269796034, "grad_norm": 0.12227778881788254, "learning_rate": 1.9032676728198866e-05, "loss": 0.2939, "step": 11406 }, { "epoch": 1.8575499735374343, "grad_norm": 0.1124349907040596, "learning_rate": 1.9028076737009885e-05, "loss": 0.3402, "step": 11407 }, { "epoch": 1.8577128200952653, "grad_norm": 0.07228071242570877, "learning_rate": 1.902347696023083e-05, "loss": 0.2943, "step": 11408 }, { "epoch": 1.8578756666530962, "grad_norm": 0.10155273973941803, "learning_rate": 1.901887739802687e-05, "loss": 0.2938, "step": 11409 }, { "epoch": 1.8580385132109272, "grad_norm": 0.1089356318116188, "learning_rate": 1.901427805056313e-05, "loss": 0.3236, "step": 11410 }, { "epoch": 1.8582013597687579, "grad_norm": 0.1045578271150589, "learning_rate": 1.9009678918004732e-05, "loss": 0.2854, "step": 11411 }, { "epoch": 1.8583642063265886, "grad_norm": 0.13035595417022705, "learning_rate": 1.9005080000516816e-05, "loss": 0.3085, "step": 11412 }, { "epoch": 1.8585270528844196, "grad_norm": 0.11807063966989517, "learning_rate": 1.9000481298264496e-05, "loss": 0.2987, "step": 11413 }, { "epoch": 1.8586898994422505, "grad_norm": 0.08208611607551575, "learning_rate": 1.8995882811412867e-05, "loss": 0.3429, "step": 11414 }, { "epoch": 1.8588527460000814, "grad_norm": 0.09726688265800476, "learning_rate": 1.8991284540127024e-05, "loss": 0.2877, "step": 11415 }, { "epoch": 1.8590155925579124, "grad_norm": 0.0731409415602684, "learning_rate": 1.898668648457208e-05, "loss": 0.3186, "step": 11416 }, { "epoch": 1.8591784391157433, "grad_norm": 0.16987305879592896, "learning_rate": 1.8982088644913108e-05, "loss": 0.2977, "step": 11417 }, { "epoch": 1.859341285673574, "grad_norm": 0.0988643616437912, "learning_rate": 1.8977491021315172e-05, "loss": 0.3038, "step": 11418 }, { "epoch": 1.859504132231405, "grad_norm": 0.14788232743740082, "learning_rate": 1.897289361394336e-05, "loss": 0.3328, "step": 11419 }, { "epoch": 1.8596669787892357, "grad_norm": 0.130782350897789, "learning_rate": 1.8968296422962733e-05, "loss": 0.294, "step": 11420 }, { "epoch": 1.8598298253470666, "grad_norm": 0.10149307548999786, "learning_rate": 1.8963699448538325e-05, "loss": 0.2918, "step": 11421 }, { "epoch": 1.8599926719048976, "grad_norm": 0.6871122121810913, "learning_rate": 1.895910269083519e-05, "loss": 0.346, "step": 11422 }, { "epoch": 1.8601555184627285, "grad_norm": 0.14936420321464539, "learning_rate": 1.8954506150018374e-05, "loss": 0.3016, "step": 11423 }, { "epoch": 1.8603183650205595, "grad_norm": 0.1744658201932907, "learning_rate": 1.8949909826252902e-05, "loss": 0.3343, "step": 11424 }, { "epoch": 1.8604812115783904, "grad_norm": 0.08238063752651215, "learning_rate": 1.8945313719703795e-05, "loss": 0.3319, "step": 11425 }, { "epoch": 1.8606440581362211, "grad_norm": 0.11939743906259537, "learning_rate": 1.8940717830536052e-05, "loss": 0.3376, "step": 11426 }, { "epoch": 1.860806904694052, "grad_norm": 0.12367577850818634, "learning_rate": 1.8936122158914704e-05, "loss": 0.3079, "step": 11427 }, { "epoch": 1.8609697512518828, "grad_norm": 0.14464186131954193, "learning_rate": 1.893152670500474e-05, "loss": 0.3512, "step": 11428 }, { "epoch": 1.8611325978097137, "grad_norm": 0.10045013576745987, "learning_rate": 1.8926931468971136e-05, "loss": 0.3343, "step": 11429 }, { "epoch": 1.8612954443675447, "grad_norm": 0.14558333158493042, "learning_rate": 1.8922336450978903e-05, "loss": 0.2802, "step": 11430 }, { "epoch": 1.8614582909253756, "grad_norm": 0.1176527813076973, "learning_rate": 1.8917741651193e-05, "loss": 0.2824, "step": 11431 }, { "epoch": 1.8616211374832066, "grad_norm": 0.18161530792713165, "learning_rate": 1.8913147069778393e-05, "loss": 0.3133, "step": 11432 }, { "epoch": 1.8617839840410373, "grad_norm": 0.10154686868190765, "learning_rate": 1.890855270690003e-05, "loss": 0.2903, "step": 11433 }, { "epoch": 1.8619468305988682, "grad_norm": 0.0733637884259224, "learning_rate": 1.890395856272289e-05, "loss": 0.3472, "step": 11434 }, { "epoch": 1.862109677156699, "grad_norm": 0.09470280259847641, "learning_rate": 1.88993646374119e-05, "loss": 0.3126, "step": 11435 }, { "epoch": 1.86227252371453, "grad_norm": 0.1298220008611679, "learning_rate": 1.8894770931131994e-05, "loss": 0.3241, "step": 11436 }, { "epoch": 1.8624353702723608, "grad_norm": 0.09250761568546295, "learning_rate": 1.8890177444048096e-05, "loss": 0.3284, "step": 11437 }, { "epoch": 1.8625982168301918, "grad_norm": 0.1388159543275833, "learning_rate": 1.8885584176325145e-05, "loss": 0.309, "step": 11438 }, { "epoch": 1.8627610633880227, "grad_norm": 0.161599799990654, "learning_rate": 1.888099112812804e-05, "loss": 0.3222, "step": 11439 }, { "epoch": 1.8629239099458537, "grad_norm": 0.11641152203083038, "learning_rate": 1.887639829962167e-05, "loss": 0.3388, "step": 11440 }, { "epoch": 1.8630867565036844, "grad_norm": 0.14174577593803406, "learning_rate": 1.8871805690970967e-05, "loss": 0.2995, "step": 11441 }, { "epoch": 1.8632496030615153, "grad_norm": 0.13349834084510803, "learning_rate": 1.8867213302340794e-05, "loss": 0.3348, "step": 11442 }, { "epoch": 1.863412449619346, "grad_norm": 0.11911116540431976, "learning_rate": 1.886262113389604e-05, "loss": 0.3405, "step": 11443 }, { "epoch": 1.863575296177177, "grad_norm": 0.1418730616569519, "learning_rate": 1.8858029185801574e-05, "loss": 0.3407, "step": 11444 }, { "epoch": 1.863738142735008, "grad_norm": 0.10703056305646896, "learning_rate": 1.885343745822226e-05, "loss": 0.2875, "step": 11445 }, { "epoch": 1.8639009892928389, "grad_norm": 0.17545261979103088, "learning_rate": 1.8848845951322962e-05, "loss": 0.3301, "step": 11446 }, { "epoch": 1.8640638358506698, "grad_norm": 0.12968672811985016, "learning_rate": 1.8844254665268522e-05, "loss": 0.3099, "step": 11447 }, { "epoch": 1.8642266824085005, "grad_norm": 0.08341970294713974, "learning_rate": 1.8839663600223773e-05, "loss": 0.309, "step": 11448 }, { "epoch": 1.8643895289663315, "grad_norm": 0.07994160801172256, "learning_rate": 1.8835072756353566e-05, "loss": 0.3443, "step": 11449 }, { "epoch": 1.8645523755241622, "grad_norm": 0.07898379862308502, "learning_rate": 1.883048213382272e-05, "loss": 0.2888, "step": 11450 }, { "epoch": 1.8647152220819931, "grad_norm": 0.11282394826412201, "learning_rate": 1.882589173279604e-05, "loss": 0.2967, "step": 11451 }, { "epoch": 1.864878068639824, "grad_norm": 0.17353597283363342, "learning_rate": 1.8821301553438356e-05, "loss": 0.3315, "step": 11452 }, { "epoch": 1.865040915197655, "grad_norm": 0.17196601629257202, "learning_rate": 1.8816711595914457e-05, "loss": 0.316, "step": 11453 }, { "epoch": 1.865203761755486, "grad_norm": 0.07098081707954407, "learning_rate": 1.8812121860389135e-05, "loss": 0.2999, "step": 11454 }, { "epoch": 1.865366608313317, "grad_norm": 0.14621809124946594, "learning_rate": 1.8807532347027172e-05, "loss": 0.3119, "step": 11455 }, { "epoch": 1.8655294548711476, "grad_norm": 0.12695661187171936, "learning_rate": 1.880294305599336e-05, "loss": 0.3679, "step": 11456 }, { "epoch": 1.8656923014289786, "grad_norm": 0.19057531654834747, "learning_rate": 1.8798353987452457e-05, "loss": 0.334, "step": 11457 }, { "epoch": 1.8658551479868093, "grad_norm": 0.14851173758506775, "learning_rate": 1.8793765141569218e-05, "loss": 0.3463, "step": 11458 }, { "epoch": 1.8660179945446402, "grad_norm": 0.18169870972633362, "learning_rate": 1.8789176518508414e-05, "loss": 0.3353, "step": 11459 }, { "epoch": 1.8661808411024712, "grad_norm": 0.09986235201358795, "learning_rate": 1.8784588118434783e-05, "loss": 0.3336, "step": 11460 }, { "epoch": 1.8663436876603021, "grad_norm": 0.12440623342990875, "learning_rate": 1.8779999941513052e-05, "loss": 0.38, "step": 11461 }, { "epoch": 1.866506534218133, "grad_norm": 0.11904655396938324, "learning_rate": 1.877541198790796e-05, "loss": 0.275, "step": 11462 }, { "epoch": 1.866669380775964, "grad_norm": 0.16980157792568207, "learning_rate": 1.8770824257784224e-05, "loss": 0.3068, "step": 11463 }, { "epoch": 1.8668322273337947, "grad_norm": 0.10550153255462646, "learning_rate": 1.876623675130657e-05, "loss": 0.3101, "step": 11464 }, { "epoch": 1.8669950738916257, "grad_norm": 0.10310178250074387, "learning_rate": 1.8761649468639695e-05, "loss": 0.2932, "step": 11465 }, { "epoch": 1.8671579204494564, "grad_norm": 0.14521166682243347, "learning_rate": 1.875706240994828e-05, "loss": 0.3101, "step": 11466 }, { "epoch": 1.8673207670072873, "grad_norm": 0.10566739737987518, "learning_rate": 1.8752475575397042e-05, "loss": 0.3398, "step": 11467 }, { "epoch": 1.8674836135651183, "grad_norm": 0.10453805327415466, "learning_rate": 1.8747888965150646e-05, "loss": 0.355, "step": 11468 }, { "epoch": 1.8676464601229492, "grad_norm": 0.09365194290876389, "learning_rate": 1.8743302579373757e-05, "loss": 0.3119, "step": 11469 }, { "epoch": 1.8678093066807802, "grad_norm": 0.09765975922346115, "learning_rate": 1.8738716418231067e-05, "loss": 0.3048, "step": 11470 }, { "epoch": 1.8679721532386109, "grad_norm": 0.11263368278741837, "learning_rate": 1.8734130481887214e-05, "loss": 0.3173, "step": 11471 }, { "epoch": 1.8681349997964418, "grad_norm": 0.14600540697574615, "learning_rate": 1.8729544770506845e-05, "loss": 0.3169, "step": 11472 }, { "epoch": 1.8682978463542725, "grad_norm": 0.13192526996135712, "learning_rate": 1.87249592842546e-05, "loss": 0.302, "step": 11473 }, { "epoch": 1.8684606929121035, "grad_norm": 0.1467556357383728, "learning_rate": 1.8720374023295127e-05, "loss": 0.3281, "step": 11474 }, { "epoch": 1.8686235394699344, "grad_norm": 0.09129385650157928, "learning_rate": 1.8715788987793043e-05, "loss": 0.296, "step": 11475 }, { "epoch": 1.8687863860277654, "grad_norm": 0.1316036880016327, "learning_rate": 1.871120417791296e-05, "loss": 0.3365, "step": 11476 }, { "epoch": 1.8689492325855963, "grad_norm": 0.11138421297073364, "learning_rate": 1.870661959381948e-05, "loss": 0.2999, "step": 11477 }, { "epoch": 1.8691120791434273, "grad_norm": 0.09396550804376602, "learning_rate": 1.8702035235677218e-05, "loss": 0.3131, "step": 11478 }, { "epoch": 1.869274925701258, "grad_norm": 0.1698114573955536, "learning_rate": 1.869745110365076e-05, "loss": 0.3232, "step": 11479 }, { "epoch": 1.869437772259089, "grad_norm": 0.14337098598480225, "learning_rate": 1.8692867197904686e-05, "loss": 0.3595, "step": 11480 }, { "epoch": 1.8696006188169196, "grad_norm": 0.16975364089012146, "learning_rate": 1.8688283518603585e-05, "loss": 0.3329, "step": 11481 }, { "epoch": 1.8697634653747506, "grad_norm": 0.11398496478796005, "learning_rate": 1.8683700065912006e-05, "loss": 0.3021, "step": 11482 }, { "epoch": 1.8699263119325815, "grad_norm": 0.10880865156650543, "learning_rate": 1.8679116839994533e-05, "loss": 0.3464, "step": 11483 }, { "epoch": 1.8700891584904125, "grad_norm": 0.27799472212791443, "learning_rate": 1.8674533841015685e-05, "loss": 0.3048, "step": 11484 }, { "epoch": 1.8702520050482434, "grad_norm": 0.15384145081043243, "learning_rate": 1.866995106914004e-05, "loss": 0.3308, "step": 11485 }, { "epoch": 1.8704148516060741, "grad_norm": 0.11822424829006195, "learning_rate": 1.8665368524532114e-05, "loss": 0.2859, "step": 11486 }, { "epoch": 1.870577698163905, "grad_norm": 0.13576915860176086, "learning_rate": 1.8660786207356435e-05, "loss": 0.3092, "step": 11487 }, { "epoch": 1.8707405447217358, "grad_norm": 0.09972041100263596, "learning_rate": 1.8656204117777516e-05, "loss": 0.2959, "step": 11488 }, { "epoch": 1.8709033912795667, "grad_norm": 0.07337009161710739, "learning_rate": 1.865162225595988e-05, "loss": 0.2982, "step": 11489 }, { "epoch": 1.8710662378373977, "grad_norm": 0.10483898222446442, "learning_rate": 1.8647040622068036e-05, "loss": 0.3012, "step": 11490 }, { "epoch": 1.8712290843952286, "grad_norm": 0.08773095905780792, "learning_rate": 1.864245921626645e-05, "loss": 0.3097, "step": 11491 }, { "epoch": 1.8713919309530596, "grad_norm": 0.14824770390987396, "learning_rate": 1.8637878038719638e-05, "loss": 0.2729, "step": 11492 }, { "epoch": 1.8715547775108905, "grad_norm": 0.10218732804059982, "learning_rate": 1.8633297089592062e-05, "loss": 0.2818, "step": 11493 }, { "epoch": 1.8717176240687212, "grad_norm": 0.13391068577766418, "learning_rate": 1.8628716369048203e-05, "loss": 0.2986, "step": 11494 }, { "epoch": 1.8718804706265522, "grad_norm": 0.1232321709394455, "learning_rate": 1.86241358772525e-05, "loss": 0.3118, "step": 11495 }, { "epoch": 1.8720433171843829, "grad_norm": 0.2021624892950058, "learning_rate": 1.8619555614369433e-05, "loss": 0.3485, "step": 11496 }, { "epoch": 1.8722061637422138, "grad_norm": 0.08525855094194412, "learning_rate": 1.8614975580563438e-05, "loss": 0.3443, "step": 11497 }, { "epoch": 1.8723690103000448, "grad_norm": 0.12956714630126953, "learning_rate": 1.8610395775998942e-05, "loss": 0.2942, "step": 11498 }, { "epoch": 1.8725318568578757, "grad_norm": 0.1502508819103241, "learning_rate": 1.8605816200840383e-05, "loss": 0.3357, "step": 11499 }, { "epoch": 1.8726947034157067, "grad_norm": 0.13620030879974365, "learning_rate": 1.8601236855252178e-05, "loss": 0.3421, "step": 11500 }, { "epoch": 1.8728575499735376, "grad_norm": 0.08687639981508255, "learning_rate": 1.8596657739398746e-05, "loss": 0.2866, "step": 11501 }, { "epoch": 1.8730203965313683, "grad_norm": 0.09558014571666718, "learning_rate": 1.8592078853444483e-05, "loss": 0.3135, "step": 11502 }, { "epoch": 1.8731832430891993, "grad_norm": 0.10293199867010117, "learning_rate": 1.8587500197553787e-05, "loss": 0.315, "step": 11503 }, { "epoch": 1.87334608964703, "grad_norm": 0.06951083242893219, "learning_rate": 1.8582921771891047e-05, "loss": 0.2914, "step": 11504 }, { "epoch": 1.873508936204861, "grad_norm": 0.09290983527898788, "learning_rate": 1.857834357662065e-05, "loss": 0.3007, "step": 11505 }, { "epoch": 1.8736717827626919, "grad_norm": 0.0923929214477539, "learning_rate": 1.857376561190694e-05, "loss": 0.275, "step": 11506 }, { "epoch": 1.8738346293205228, "grad_norm": 0.1228678897023201, "learning_rate": 1.856918787791431e-05, "loss": 0.2832, "step": 11507 }, { "epoch": 1.8739974758783537, "grad_norm": 0.07652826607227325, "learning_rate": 1.8564610374807108e-05, "loss": 0.3185, "step": 11508 }, { "epoch": 1.8741603224361845, "grad_norm": 0.10139583796262741, "learning_rate": 1.8560033102749662e-05, "loss": 0.3018, "step": 11509 }, { "epoch": 1.8743231689940154, "grad_norm": 0.10744668543338776, "learning_rate": 1.855545606190633e-05, "loss": 0.3274, "step": 11510 }, { "epoch": 1.8744860155518461, "grad_norm": 0.09760705381631851, "learning_rate": 1.8550879252441437e-05, "loss": 0.2657, "step": 11511 }, { "epoch": 1.874648862109677, "grad_norm": 0.1345130205154419, "learning_rate": 1.8546302674519296e-05, "loss": 0.2718, "step": 11512 }, { "epoch": 1.874811708667508, "grad_norm": 0.15521647036075592, "learning_rate": 1.854172632830422e-05, "loss": 0.3044, "step": 11513 }, { "epoch": 1.874974555225339, "grad_norm": 0.10231130570173264, "learning_rate": 1.8537150213960525e-05, "loss": 0.2992, "step": 11514 }, { "epoch": 1.87513740178317, "grad_norm": 0.0936625748872757, "learning_rate": 1.8532574331652503e-05, "loss": 0.3161, "step": 11515 }, { "epoch": 1.8753002483410008, "grad_norm": 0.07102397084236145, "learning_rate": 1.852799868154443e-05, "loss": 0.3555, "step": 11516 }, { "epoch": 1.8754630948988316, "grad_norm": 0.1093483567237854, "learning_rate": 1.8523423263800598e-05, "loss": 0.2756, "step": 11517 }, { "epoch": 1.8756259414566625, "grad_norm": 0.12685149908065796, "learning_rate": 1.8518848078585282e-05, "loss": 0.2981, "step": 11518 }, { "epoch": 1.8757887880144932, "grad_norm": 0.09342458844184875, "learning_rate": 1.851427312606273e-05, "loss": 0.2882, "step": 11519 }, { "epoch": 1.8759516345723242, "grad_norm": 0.17228154838085175, "learning_rate": 1.8509698406397207e-05, "loss": 0.2962, "step": 11520 }, { "epoch": 1.876114481130155, "grad_norm": 0.11369156837463379, "learning_rate": 1.8505123919752955e-05, "loss": 0.333, "step": 11521 }, { "epoch": 1.876277327687986, "grad_norm": 0.10058332234621048, "learning_rate": 1.8500549666294217e-05, "loss": 0.2761, "step": 11522 }, { "epoch": 1.876440174245817, "grad_norm": 0.07907748967409134, "learning_rate": 1.8495975646185222e-05, "loss": 0.3216, "step": 11523 }, { "epoch": 1.8766030208036477, "grad_norm": 0.11831533908843994, "learning_rate": 1.8491401859590173e-05, "loss": 0.2859, "step": 11524 }, { "epoch": 1.8767658673614787, "grad_norm": 0.11327169090509415, "learning_rate": 1.8486828306673314e-05, "loss": 0.2709, "step": 11525 }, { "epoch": 1.8769287139193094, "grad_norm": 0.07975355535745621, "learning_rate": 1.848225498759883e-05, "loss": 0.3266, "step": 11526 }, { "epoch": 1.8770915604771403, "grad_norm": 0.11158435046672821, "learning_rate": 1.8477681902530918e-05, "loss": 0.3188, "step": 11527 }, { "epoch": 1.8772544070349713, "grad_norm": 0.09563054889440536, "learning_rate": 1.847310905163376e-05, "loss": 0.3241, "step": 11528 }, { "epoch": 1.8774172535928022, "grad_norm": 0.09363768249750137, "learning_rate": 1.8468536435071548e-05, "loss": 0.3158, "step": 11529 }, { "epoch": 1.8775801001506331, "grad_norm": 0.12931424379348755, "learning_rate": 1.8463964053008453e-05, "loss": 0.2893, "step": 11530 }, { "epoch": 1.877742946708464, "grad_norm": 0.11662864685058594, "learning_rate": 1.8459391905608613e-05, "loss": 0.3357, "step": 11531 }, { "epoch": 1.8779057932662948, "grad_norm": 0.1246732547879219, "learning_rate": 1.8454819993036216e-05, "loss": 0.3152, "step": 11532 }, { "epoch": 1.8780686398241258, "grad_norm": 0.11077218502759933, "learning_rate": 1.845024831545539e-05, "loss": 0.3246, "step": 11533 }, { "epoch": 1.8782314863819565, "grad_norm": 0.08997311443090439, "learning_rate": 1.8445676873030272e-05, "loss": 0.3169, "step": 11534 }, { "epoch": 1.8783943329397874, "grad_norm": 0.08196455240249634, "learning_rate": 1.8441105665924983e-05, "loss": 0.3199, "step": 11535 }, { "epoch": 1.8785571794976184, "grad_norm": 0.09461524337530136, "learning_rate": 1.843653469430366e-05, "loss": 0.3383, "step": 11536 }, { "epoch": 1.8787200260554493, "grad_norm": 0.14461618661880493, "learning_rate": 1.84319639583304e-05, "loss": 0.3079, "step": 11537 }, { "epoch": 1.8788828726132802, "grad_norm": 0.14336372911930084, "learning_rate": 1.842739345816932e-05, "loss": 0.3259, "step": 11538 }, { "epoch": 1.8790457191711112, "grad_norm": 0.07881443947553635, "learning_rate": 1.84228231939845e-05, "loss": 0.3306, "step": 11539 }, { "epoch": 1.879208565728942, "grad_norm": 0.1017657071352005, "learning_rate": 1.8418253165940037e-05, "loss": 0.3157, "step": 11540 }, { "epoch": 1.8793714122867726, "grad_norm": 0.09995800256729126, "learning_rate": 1.8413683374200003e-05, "loss": 0.3433, "step": 11541 }, { "epoch": 1.8795342588446036, "grad_norm": 0.13489724695682526, "learning_rate": 1.8409113818928464e-05, "loss": 0.3351, "step": 11542 }, { "epoch": 1.8796971054024345, "grad_norm": 0.10668379813432693, "learning_rate": 1.8404544500289494e-05, "loss": 0.3233, "step": 11543 }, { "epoch": 1.8798599519602655, "grad_norm": 0.11324296146631241, "learning_rate": 1.8399975418447136e-05, "loss": 0.3381, "step": 11544 }, { "epoch": 1.8800227985180964, "grad_norm": 0.10318897664546967, "learning_rate": 1.839540657356544e-05, "loss": 0.3326, "step": 11545 }, { "epoch": 1.8801856450759273, "grad_norm": 0.11894802004098892, "learning_rate": 1.8390837965808418e-05, "loss": 0.3337, "step": 11546 }, { "epoch": 1.880348491633758, "grad_norm": 0.10833054780960083, "learning_rate": 1.8386269595340128e-05, "loss": 0.3285, "step": 11547 }, { "epoch": 1.880511338191589, "grad_norm": 0.09513181447982788, "learning_rate": 1.8381701462324576e-05, "loss": 0.2687, "step": 11548 }, { "epoch": 1.8806741847494197, "grad_norm": 0.08486883342266083, "learning_rate": 1.837713356692577e-05, "loss": 0.2882, "step": 11549 }, { "epoch": 1.8808370313072507, "grad_norm": 0.13971497118473053, "learning_rate": 1.8372565909307702e-05, "loss": 0.3226, "step": 11550 }, { "epoch": 1.8809998778650816, "grad_norm": 0.09252872318029404, "learning_rate": 1.8367998489634384e-05, "loss": 0.3013, "step": 11551 }, { "epoch": 1.8811627244229125, "grad_norm": 0.14520032703876495, "learning_rate": 1.836343130806979e-05, "loss": 0.3029, "step": 11552 }, { "epoch": 1.8813255709807435, "grad_norm": 0.11071161925792694, "learning_rate": 1.8358864364777882e-05, "loss": 0.3268, "step": 11553 }, { "epoch": 1.8814884175385744, "grad_norm": 0.11371198296546936, "learning_rate": 1.8354297659922658e-05, "loss": 0.2759, "step": 11554 }, { "epoch": 1.8816512640964052, "grad_norm": 0.11892037838697433, "learning_rate": 1.8349731193668052e-05, "loss": 0.3206, "step": 11555 }, { "epoch": 1.881814110654236, "grad_norm": 0.08390042930841446, "learning_rate": 1.8345164966178018e-05, "loss": 0.3345, "step": 11556 }, { "epoch": 1.8819769572120668, "grad_norm": 0.0748319923877716, "learning_rate": 1.8340598977616503e-05, "loss": 0.2964, "step": 11557 }, { "epoch": 1.8821398037698978, "grad_norm": 0.10925975441932678, "learning_rate": 1.833603322814744e-05, "loss": 0.2989, "step": 11558 }, { "epoch": 1.8823026503277287, "grad_norm": 0.10064654052257538, "learning_rate": 1.8331467717934743e-05, "loss": 0.2904, "step": 11559 }, { "epoch": 1.8824654968855596, "grad_norm": 0.1271730214357376, "learning_rate": 1.8326902447142334e-05, "loss": 0.3057, "step": 11560 }, { "epoch": 1.8826283434433906, "grad_norm": 0.11270549893379211, "learning_rate": 1.8322337415934126e-05, "loss": 0.3015, "step": 11561 }, { "epoch": 1.8827911900012213, "grad_norm": 0.13549695909023285, "learning_rate": 1.8317772624474013e-05, "loss": 0.3291, "step": 11562 }, { "epoch": 1.8829540365590522, "grad_norm": 0.11301019042730331, "learning_rate": 1.8313208072925882e-05, "loss": 0.3066, "step": 11563 }, { "epoch": 1.883116883116883, "grad_norm": 0.15113385021686554, "learning_rate": 1.8308643761453607e-05, "loss": 0.2727, "step": 11564 }, { "epoch": 1.883279729674714, "grad_norm": 0.07554606348276138, "learning_rate": 1.830407969022108e-05, "loss": 0.3111, "step": 11565 }, { "epoch": 1.8834425762325449, "grad_norm": 0.14280200004577637, "learning_rate": 1.829951585939215e-05, "loss": 0.3012, "step": 11566 }, { "epoch": 1.8836054227903758, "grad_norm": 0.09914250671863556, "learning_rate": 1.829495226913068e-05, "loss": 0.374, "step": 11567 }, { "epoch": 1.8837682693482067, "grad_norm": 0.11018873751163483, "learning_rate": 1.82903889196005e-05, "loss": 0.3216, "step": 11568 }, { "epoch": 1.8839311159060377, "grad_norm": 0.09634512662887573, "learning_rate": 1.828582581096547e-05, "loss": 0.2978, "step": 11569 }, { "epoch": 1.8840939624638684, "grad_norm": 0.09746000170707703, "learning_rate": 1.828126294338941e-05, "loss": 0.3168, "step": 11570 }, { "epoch": 1.8842568090216993, "grad_norm": 0.09538301080465317, "learning_rate": 1.8276700317036132e-05, "loss": 0.2995, "step": 11571 }, { "epoch": 1.88441965557953, "grad_norm": 0.11808541417121887, "learning_rate": 1.8272137932069463e-05, "loss": 0.3321, "step": 11572 }, { "epoch": 1.884582502137361, "grad_norm": 0.09294930100440979, "learning_rate": 1.8267575788653203e-05, "loss": 0.3092, "step": 11573 }, { "epoch": 1.884745348695192, "grad_norm": 0.12513774633407593, "learning_rate": 1.826301388695114e-05, "loss": 0.3226, "step": 11574 }, { "epoch": 1.8849081952530229, "grad_norm": 0.10551203787326813, "learning_rate": 1.8258452227127053e-05, "loss": 0.3281, "step": 11575 }, { "epoch": 1.8850710418108538, "grad_norm": 0.10329902917146683, "learning_rate": 1.8253890809344742e-05, "loss": 0.3341, "step": 11576 }, { "epoch": 1.8852338883686846, "grad_norm": 0.12498684227466583, "learning_rate": 1.8249329633767955e-05, "loss": 0.3071, "step": 11577 }, { "epoch": 1.8853967349265155, "grad_norm": 0.11277373880147934, "learning_rate": 1.8244768700560467e-05, "loss": 0.312, "step": 11578 }, { "epoch": 1.8855595814843462, "grad_norm": 0.12773427367210388, "learning_rate": 1.8240208009886017e-05, "loss": 0.2879, "step": 11579 }, { "epoch": 1.8857224280421772, "grad_norm": 0.12088712304830551, "learning_rate": 1.823564756190835e-05, "loss": 0.3253, "step": 11580 }, { "epoch": 1.885885274600008, "grad_norm": 0.12150489538908005, "learning_rate": 1.823108735679121e-05, "loss": 0.3796, "step": 11581 }, { "epoch": 1.886048121157839, "grad_norm": 0.1354999989271164, "learning_rate": 1.82265273946983e-05, "loss": 0.3132, "step": 11582 }, { "epoch": 1.88621096771567, "grad_norm": 0.12307904660701752, "learning_rate": 1.822196767579336e-05, "loss": 0.3122, "step": 11583 }, { "epoch": 1.886373814273501, "grad_norm": 0.11354686319828033, "learning_rate": 1.8217408200240094e-05, "loss": 0.3372, "step": 11584 }, { "epoch": 1.8865366608313316, "grad_norm": 0.12214557826519012, "learning_rate": 1.821284896820219e-05, "loss": 0.3153, "step": 11585 }, { "epoch": 1.8866995073891626, "grad_norm": 0.1310705542564392, "learning_rate": 1.8208289979843336e-05, "loss": 0.3186, "step": 11586 }, { "epoch": 1.8868623539469933, "grad_norm": 0.14275473356246948, "learning_rate": 1.8203731235327225e-05, "loss": 0.323, "step": 11587 }, { "epoch": 1.8870252005048243, "grad_norm": 0.0945548266172409, "learning_rate": 1.819917273481753e-05, "loss": 0.3139, "step": 11588 }, { "epoch": 1.8871880470626552, "grad_norm": 0.13888128101825714, "learning_rate": 1.819461447847791e-05, "loss": 0.3198, "step": 11589 }, { "epoch": 1.8873508936204861, "grad_norm": 0.12823772430419922, "learning_rate": 1.8190056466472005e-05, "loss": 0.3183, "step": 11590 }, { "epoch": 1.887513740178317, "grad_norm": 0.07918277382850647, "learning_rate": 1.8185498698963492e-05, "loss": 0.2736, "step": 11591 }, { "epoch": 1.887676586736148, "grad_norm": 0.13438382744789124, "learning_rate": 1.8180941176115988e-05, "loss": 0.3091, "step": 11592 }, { "epoch": 1.8878394332939787, "grad_norm": 0.09210003167390823, "learning_rate": 1.8176383898093118e-05, "loss": 0.3171, "step": 11593 }, { "epoch": 1.8880022798518097, "grad_norm": 0.1209377646446228, "learning_rate": 1.8171826865058515e-05, "loss": 0.2955, "step": 11594 }, { "epoch": 1.8881651264096404, "grad_norm": 0.13011370599269867, "learning_rate": 1.8167270077175794e-05, "loss": 0.3265, "step": 11595 }, { "epoch": 1.8883279729674713, "grad_norm": 0.09084661304950714, "learning_rate": 1.8162713534608538e-05, "loss": 0.3196, "step": 11596 }, { "epoch": 1.8884908195253023, "grad_norm": 0.15566463768482208, "learning_rate": 1.815815723752035e-05, "loss": 0.337, "step": 11597 }, { "epoch": 1.8886536660831332, "grad_norm": 0.10785068571567535, "learning_rate": 1.815360118607482e-05, "loss": 0.2908, "step": 11598 }, { "epoch": 1.8888165126409642, "grad_norm": 0.10088231414556503, "learning_rate": 1.8149045380435526e-05, "loss": 0.2925, "step": 11599 }, { "epoch": 1.888979359198795, "grad_norm": 0.12281890958547592, "learning_rate": 1.8144489820766024e-05, "loss": 0.3195, "step": 11600 }, { "epoch": 1.8891422057566258, "grad_norm": 0.09031768888235092, "learning_rate": 1.8139934507229873e-05, "loss": 0.3049, "step": 11601 }, { "epoch": 1.8893050523144566, "grad_norm": 0.11598950624465942, "learning_rate": 1.813537943999063e-05, "loss": 0.273, "step": 11602 }, { "epoch": 1.8894678988722875, "grad_norm": 0.12254055589437485, "learning_rate": 1.8130824619211833e-05, "loss": 0.3194, "step": 11603 }, { "epoch": 1.8896307454301184, "grad_norm": 0.15567591786384583, "learning_rate": 1.8126270045057007e-05, "loss": 0.3784, "step": 11604 }, { "epoch": 1.8897935919879494, "grad_norm": 0.09059968590736389, "learning_rate": 1.8121715717689685e-05, "loss": 0.3507, "step": 11605 }, { "epoch": 1.8899564385457803, "grad_norm": 0.10747475922107697, "learning_rate": 1.8117161637273383e-05, "loss": 0.3294, "step": 11606 }, { "epoch": 1.8901192851036113, "grad_norm": 0.1269347369670868, "learning_rate": 1.8112607803971597e-05, "loss": 0.3087, "step": 11607 }, { "epoch": 1.890282131661442, "grad_norm": 0.11233825236558914, "learning_rate": 1.8108054217947816e-05, "loss": 0.3226, "step": 11608 }, { "epoch": 1.890444978219273, "grad_norm": 0.08129613101482391, "learning_rate": 1.8103500879365544e-05, "loss": 0.3169, "step": 11609 }, { "epoch": 1.8906078247771037, "grad_norm": 0.11837383359670639, "learning_rate": 1.809894778838826e-05, "loss": 0.3192, "step": 11610 }, { "epoch": 1.8907706713349346, "grad_norm": 0.13077136874198914, "learning_rate": 1.809439494517941e-05, "loss": 0.3302, "step": 11611 }, { "epoch": 1.8909335178927655, "grad_norm": 0.07783914357423782, "learning_rate": 1.8089842349902482e-05, "loss": 0.3651, "step": 11612 }, { "epoch": 1.8910963644505965, "grad_norm": 0.19836054742336273, "learning_rate": 1.808529000272092e-05, "loss": 0.3255, "step": 11613 }, { "epoch": 1.8912592110084274, "grad_norm": 0.1088390201330185, "learning_rate": 1.8080737903798157e-05, "loss": 0.2834, "step": 11614 }, { "epoch": 1.8914220575662581, "grad_norm": 0.14221785962581635, "learning_rate": 1.8076186053297634e-05, "loss": 0.2771, "step": 11615 }, { "epoch": 1.891584904124089, "grad_norm": 0.13261668384075165, "learning_rate": 1.807163445138278e-05, "loss": 0.2812, "step": 11616 }, { "epoch": 1.8917477506819198, "grad_norm": 0.10453379899263382, "learning_rate": 1.8067083098217006e-05, "loss": 0.3011, "step": 11617 }, { "epoch": 1.8919105972397507, "grad_norm": 0.1506885290145874, "learning_rate": 1.806253199396372e-05, "loss": 0.3204, "step": 11618 }, { "epoch": 1.8920734437975817, "grad_norm": 0.14647938311100006, "learning_rate": 1.8057981138786318e-05, "loss": 0.3272, "step": 11619 }, { "epoch": 1.8922362903554126, "grad_norm": 0.10468850284814835, "learning_rate": 1.8053430532848198e-05, "loss": 0.3157, "step": 11620 }, { "epoch": 1.8923991369132436, "grad_norm": 0.10825475305318832, "learning_rate": 1.804888017631273e-05, "loss": 0.3399, "step": 11621 }, { "epoch": 1.8925619834710745, "grad_norm": 0.1636563092470169, "learning_rate": 1.8044330069343285e-05, "loss": 0.3491, "step": 11622 }, { "epoch": 1.8927248300289052, "grad_norm": 0.11653606593608856, "learning_rate": 1.8039780212103242e-05, "loss": 0.3413, "step": 11623 }, { "epoch": 1.8928876765867362, "grad_norm": 0.10397735238075256, "learning_rate": 1.8035230604755936e-05, "loss": 0.3005, "step": 11624 }, { "epoch": 1.893050523144567, "grad_norm": 0.10800731182098389, "learning_rate": 1.8030681247464722e-05, "loss": 0.3113, "step": 11625 }, { "epoch": 1.8932133697023978, "grad_norm": 0.06379927694797516, "learning_rate": 1.802613214039292e-05, "loss": 0.2948, "step": 11626 }, { "epoch": 1.8933762162602288, "grad_norm": 0.11468283832073212, "learning_rate": 1.8021583283703884e-05, "loss": 0.3158, "step": 11627 }, { "epoch": 1.8935390628180597, "grad_norm": 0.14238367974758148, "learning_rate": 1.8017034677560913e-05, "loss": 0.3399, "step": 11628 }, { "epoch": 1.8937019093758907, "grad_norm": 0.08833326399326324, "learning_rate": 1.8012486322127313e-05, "loss": 0.329, "step": 11629 }, { "epoch": 1.8938647559337216, "grad_norm": 0.14855098724365234, "learning_rate": 1.8007938217566387e-05, "loss": 0.3248, "step": 11630 }, { "epoch": 1.8940276024915523, "grad_norm": 0.12200628221035004, "learning_rate": 1.8003390364041432e-05, "loss": 0.3448, "step": 11631 }, { "epoch": 1.8941904490493833, "grad_norm": 0.11145056784152985, "learning_rate": 1.7998842761715733e-05, "loss": 0.3079, "step": 11632 }, { "epoch": 1.894353295607214, "grad_norm": 0.12239202111959457, "learning_rate": 1.799429541075254e-05, "loss": 0.3145, "step": 11633 }, { "epoch": 1.894516142165045, "grad_norm": 0.13250376284122467, "learning_rate": 1.798974831131514e-05, "loss": 0.3187, "step": 11634 }, { "epoch": 1.8946789887228759, "grad_norm": 0.19328875839710236, "learning_rate": 1.7985201463566774e-05, "loss": 0.3509, "step": 11635 }, { "epoch": 1.8948418352807068, "grad_norm": 0.13962644338607788, "learning_rate": 1.7980654867670698e-05, "loss": 0.3247, "step": 11636 }, { "epoch": 1.8950046818385378, "grad_norm": 0.12322686612606049, "learning_rate": 1.797610852379014e-05, "loss": 0.2911, "step": 11637 }, { "epoch": 1.8951675283963685, "grad_norm": 0.1499435007572174, "learning_rate": 1.797156243208833e-05, "loss": 0.32, "step": 11638 }, { "epoch": 1.8953303749541994, "grad_norm": 0.13121186196804047, "learning_rate": 1.7967016592728486e-05, "loss": 0.3014, "step": 11639 }, { "epoch": 1.8954932215120301, "grad_norm": 0.0976463332772255, "learning_rate": 1.7962471005873825e-05, "loss": 0.3494, "step": 11640 }, { "epoch": 1.895656068069861, "grad_norm": 0.10619697719812393, "learning_rate": 1.7957925671687527e-05, "loss": 0.299, "step": 11641 }, { "epoch": 1.895818914627692, "grad_norm": 0.1247241422533989, "learning_rate": 1.79533805903328e-05, "loss": 0.2822, "step": 11642 }, { "epoch": 1.895981761185523, "grad_norm": 0.12649184465408325, "learning_rate": 1.794883576197283e-05, "loss": 0.3072, "step": 11643 }, { "epoch": 1.896144607743354, "grad_norm": 0.18713322281837463, "learning_rate": 1.794429118677077e-05, "loss": 0.3465, "step": 11644 }, { "epoch": 1.8963074543011849, "grad_norm": 0.10706456005573273, "learning_rate": 1.7939746864889807e-05, "loss": 0.3152, "step": 11645 }, { "epoch": 1.8964703008590156, "grad_norm": 0.15337151288986206, "learning_rate": 1.7935202796493083e-05, "loss": 0.2943, "step": 11646 }, { "epoch": 1.8966331474168465, "grad_norm": 0.10558625310659409, "learning_rate": 1.7930658981743745e-05, "loss": 0.2958, "step": 11647 }, { "epoch": 1.8967959939746772, "grad_norm": 0.1169879361987114, "learning_rate": 1.792611542080492e-05, "loss": 0.3411, "step": 11648 }, { "epoch": 1.8969588405325082, "grad_norm": 0.1170993223786354, "learning_rate": 1.792157211383976e-05, "loss": 0.3735, "step": 11649 }, { "epoch": 1.8971216870903391, "grad_norm": 0.085910864174366, "learning_rate": 1.791702906101136e-05, "loss": 0.2903, "step": 11650 }, { "epoch": 1.89728453364817, "grad_norm": 0.08952578157186508, "learning_rate": 1.7912486262482835e-05, "loss": 0.2909, "step": 11651 }, { "epoch": 1.897447380206001, "grad_norm": 0.08953403681516647, "learning_rate": 1.7907943718417292e-05, "loss": 0.3311, "step": 11652 }, { "epoch": 1.8976102267638317, "grad_norm": 0.12073899805545807, "learning_rate": 1.790340142897782e-05, "loss": 0.3595, "step": 11653 }, { "epoch": 1.8977730733216627, "grad_norm": 0.13192817568778992, "learning_rate": 1.7898859394327495e-05, "loss": 0.2749, "step": 11654 }, { "epoch": 1.8979359198794934, "grad_norm": 0.10344406217336655, "learning_rate": 1.7894317614629393e-05, "loss": 0.2989, "step": 11655 }, { "epoch": 1.8980987664373243, "grad_norm": 0.13126957416534424, "learning_rate": 1.788977609004658e-05, "loss": 0.2909, "step": 11656 }, { "epoch": 1.8982616129951553, "grad_norm": 0.05585058405995369, "learning_rate": 1.7885234820742108e-05, "loss": 0.3367, "step": 11657 }, { "epoch": 1.8984244595529862, "grad_norm": 0.1414984166622162, "learning_rate": 1.7880693806879026e-05, "loss": 0.2958, "step": 11658 }, { "epoch": 1.8985873061108172, "grad_norm": 0.10339955985546112, "learning_rate": 1.7876153048620357e-05, "loss": 0.2876, "step": 11659 }, { "epoch": 1.898750152668648, "grad_norm": 0.17297276854515076, "learning_rate": 1.7871612546129148e-05, "loss": 0.3177, "step": 11660 }, { "epoch": 1.8989129992264788, "grad_norm": 0.11774206906557083, "learning_rate": 1.7867072299568398e-05, "loss": 0.3106, "step": 11661 }, { "epoch": 1.8990758457843098, "grad_norm": 0.10846026986837387, "learning_rate": 1.786253230910112e-05, "loss": 0.3155, "step": 11662 }, { "epoch": 1.8992386923421405, "grad_norm": 0.08787602186203003, "learning_rate": 1.7857992574890327e-05, "loss": 0.314, "step": 11663 }, { "epoch": 1.8994015388999714, "grad_norm": 0.10058999806642532, "learning_rate": 1.7853453097098998e-05, "loss": 0.2778, "step": 11664 }, { "epoch": 1.8995643854578024, "grad_norm": 0.14585529267787933, "learning_rate": 1.784891387589011e-05, "loss": 0.3075, "step": 11665 }, { "epoch": 1.8997272320156333, "grad_norm": 0.1225864440202713, "learning_rate": 1.7844374911426635e-05, "loss": 0.3572, "step": 11666 }, { "epoch": 1.8998900785734643, "grad_norm": 0.12012714892625809, "learning_rate": 1.7839836203871547e-05, "loss": 0.2743, "step": 11667 }, { "epoch": 1.9000529251312952, "grad_norm": 0.15147317945957184, "learning_rate": 1.783529775338779e-05, "loss": 0.329, "step": 11668 }, { "epoch": 1.900215771689126, "grad_norm": 0.11616642773151398, "learning_rate": 1.783075956013831e-05, "loss": 0.3198, "step": 11669 }, { "epoch": 1.9003786182469566, "grad_norm": 0.1343894600868225, "learning_rate": 1.7826221624286036e-05, "loss": 0.2897, "step": 11670 }, { "epoch": 1.9005414648047876, "grad_norm": 0.11145167797803879, "learning_rate": 1.7821683945993906e-05, "loss": 0.3097, "step": 11671 }, { "epoch": 1.9007043113626185, "grad_norm": 0.18150544166564941, "learning_rate": 1.7817146525424822e-05, "loss": 0.3008, "step": 11672 }, { "epoch": 1.9008671579204495, "grad_norm": 0.2092735469341278, "learning_rate": 1.78126093627417e-05, "loss": 0.3161, "step": 11673 }, { "epoch": 1.9010300044782804, "grad_norm": 0.08331454545259476, "learning_rate": 1.7808072458107442e-05, "loss": 0.302, "step": 11674 }, { "epoch": 1.9011928510361114, "grad_norm": 0.15693199634552002, "learning_rate": 1.7803535811684923e-05, "loss": 0.3595, "step": 11675 }, { "epoch": 1.901355697593942, "grad_norm": 0.09636957198381424, "learning_rate": 1.7798999423637038e-05, "loss": 0.2948, "step": 11676 }, { "epoch": 1.901518544151773, "grad_norm": 0.11222387105226517, "learning_rate": 1.7794463294126634e-05, "loss": 0.2989, "step": 11677 }, { "epoch": 1.9016813907096037, "grad_norm": 0.10869741439819336, "learning_rate": 1.77899274233166e-05, "loss": 0.3104, "step": 11678 }, { "epoch": 1.9018442372674347, "grad_norm": 0.13497893512248993, "learning_rate": 1.778539181136977e-05, "loss": 0.3179, "step": 11679 }, { "epoch": 1.9020070838252656, "grad_norm": 0.17214660346508026, "learning_rate": 1.778085645844899e-05, "loss": 0.3563, "step": 11680 }, { "epoch": 1.9021699303830966, "grad_norm": 0.12074048072099686, "learning_rate": 1.7776321364717085e-05, "loss": 0.3184, "step": 11681 }, { "epoch": 1.9023327769409275, "grad_norm": 0.14652778208255768, "learning_rate": 1.7771786530336893e-05, "loss": 0.3512, "step": 11682 }, { "epoch": 1.9024956234987584, "grad_norm": 0.09864022582769394, "learning_rate": 1.7767251955471224e-05, "loss": 0.3623, "step": 11683 }, { "epoch": 1.9026584700565892, "grad_norm": 0.0759047269821167, "learning_rate": 1.7762717640282866e-05, "loss": 0.3115, "step": 11684 }, { "epoch": 1.90282131661442, "grad_norm": 0.11477918177843094, "learning_rate": 1.775818358493464e-05, "loss": 0.3187, "step": 11685 }, { "epoch": 1.9029841631722508, "grad_norm": 0.09625880420207977, "learning_rate": 1.775364978958932e-05, "loss": 0.3145, "step": 11686 }, { "epoch": 1.9031470097300818, "grad_norm": 0.10035127401351929, "learning_rate": 1.7749116254409686e-05, "loss": 0.3515, "step": 11687 }, { "epoch": 1.9033098562879127, "grad_norm": 0.1203756034374237, "learning_rate": 1.774458297955849e-05, "loss": 0.3059, "step": 11688 }, { "epoch": 1.9034727028457437, "grad_norm": 0.11763718724250793, "learning_rate": 1.7740049965198513e-05, "loss": 0.3104, "step": 11689 }, { "epoch": 1.9036355494035746, "grad_norm": 0.11115443706512451, "learning_rate": 1.7735517211492492e-05, "loss": 0.2905, "step": 11690 }, { "epoch": 1.9037983959614053, "grad_norm": 0.12831665575504303, "learning_rate": 1.7730984718603165e-05, "loss": 0.2856, "step": 11691 }, { "epoch": 1.9039612425192363, "grad_norm": 0.13505026698112488, "learning_rate": 1.7726452486693265e-05, "loss": 0.3499, "step": 11692 }, { "epoch": 1.904124089077067, "grad_norm": 0.14303483068943024, "learning_rate": 1.7721920515925517e-05, "loss": 0.3029, "step": 11693 }, { "epoch": 1.904286935634898, "grad_norm": 0.0852474793791771, "learning_rate": 1.7717388806462625e-05, "loss": 0.3221, "step": 11694 }, { "epoch": 1.9044497821927289, "grad_norm": 0.1034345030784607, "learning_rate": 1.7712857358467294e-05, "loss": 0.337, "step": 11695 }, { "epoch": 1.9046126287505598, "grad_norm": 0.08414818346500397, "learning_rate": 1.7708326172102213e-05, "loss": 0.3014, "step": 11696 }, { "epoch": 1.9047754753083908, "grad_norm": 0.10514597594738007, "learning_rate": 1.770379524753008e-05, "loss": 0.3234, "step": 11697 }, { "epoch": 1.9049383218662217, "grad_norm": 0.12641561031341553, "learning_rate": 1.7699264584913553e-05, "loss": 0.3298, "step": 11698 }, { "epoch": 1.9051011684240524, "grad_norm": 0.08716441690921783, "learning_rate": 1.769473418441529e-05, "loss": 0.3115, "step": 11699 }, { "epoch": 1.9052640149818834, "grad_norm": 0.1725885421037674, "learning_rate": 1.7690204046197973e-05, "loss": 0.3209, "step": 11700 }, { "epoch": 1.905426861539714, "grad_norm": 0.08970040827989578, "learning_rate": 1.7685674170424226e-05, "loss": 0.3177, "step": 11701 }, { "epoch": 1.905589708097545, "grad_norm": 0.13312821090221405, "learning_rate": 1.7681144557256694e-05, "loss": 0.2992, "step": 11702 }, { "epoch": 1.905752554655376, "grad_norm": 0.1272776871919632, "learning_rate": 1.767661520685799e-05, "loss": 0.3063, "step": 11703 }, { "epoch": 1.905915401213207, "grad_norm": 0.11696626991033554, "learning_rate": 1.7672086119390746e-05, "loss": 0.2701, "step": 11704 }, { "epoch": 1.9060782477710378, "grad_norm": 0.09492974728345871, "learning_rate": 1.766755729501757e-05, "loss": 0.3197, "step": 11705 }, { "epoch": 1.9062410943288686, "grad_norm": 0.1573186218738556, "learning_rate": 1.7663028733901045e-05, "loss": 0.3535, "step": 11706 }, { "epoch": 1.9064039408866995, "grad_norm": 0.10559627413749695, "learning_rate": 1.765850043620378e-05, "loss": 0.2929, "step": 11707 }, { "epoch": 1.9065667874445302, "grad_norm": 0.06881680339574814, "learning_rate": 1.7653972402088347e-05, "loss": 0.3153, "step": 11708 }, { "epoch": 1.9067296340023612, "grad_norm": 0.13292817771434784, "learning_rate": 1.7649444631717312e-05, "loss": 0.339, "step": 11709 }, { "epoch": 1.9068924805601921, "grad_norm": 0.08624690771102905, "learning_rate": 1.764491712525323e-05, "loss": 0.2691, "step": 11710 }, { "epoch": 1.907055327118023, "grad_norm": 0.16223885118961334, "learning_rate": 1.7640389882858664e-05, "loss": 0.3401, "step": 11711 }, { "epoch": 1.907218173675854, "grad_norm": 0.20499499142169952, "learning_rate": 1.7635862904696153e-05, "loss": 0.3174, "step": 11712 }, { "epoch": 1.907381020233685, "grad_norm": 0.12479681521654129, "learning_rate": 1.7631336190928223e-05, "loss": 0.2925, "step": 11713 }, { "epoch": 1.9075438667915157, "grad_norm": 0.07659479975700378, "learning_rate": 1.762680974171741e-05, "loss": 0.3108, "step": 11714 }, { "epoch": 1.9077067133493466, "grad_norm": 0.1258622258901596, "learning_rate": 1.762228355722621e-05, "loss": 0.3477, "step": 11715 }, { "epoch": 1.9078695599071773, "grad_norm": 0.09176552295684814, "learning_rate": 1.7617757637617138e-05, "loss": 0.2887, "step": 11716 }, { "epoch": 1.9080324064650083, "grad_norm": 0.10770301520824432, "learning_rate": 1.7613231983052673e-05, "loss": 0.3222, "step": 11717 }, { "epoch": 1.9081952530228392, "grad_norm": 0.07671777158975601, "learning_rate": 1.760870659369533e-05, "loss": 0.3068, "step": 11718 }, { "epoch": 1.9083580995806702, "grad_norm": 0.09379621595144272, "learning_rate": 1.760418146970756e-05, "loss": 0.2976, "step": 11719 }, { "epoch": 1.908520946138501, "grad_norm": 0.1010788083076477, "learning_rate": 1.759965661125183e-05, "loss": 0.2878, "step": 11720 }, { "epoch": 1.908683792696332, "grad_norm": 0.07262909412384033, "learning_rate": 1.7595132018490595e-05, "loss": 0.3031, "step": 11721 }, { "epoch": 1.9088466392541628, "grad_norm": 0.12632092833518982, "learning_rate": 1.7590607691586314e-05, "loss": 0.2995, "step": 11722 }, { "epoch": 1.9090094858119937, "grad_norm": 0.16238968074321747, "learning_rate": 1.7586083630701418e-05, "loss": 0.2929, "step": 11723 }, { "epoch": 1.9091723323698244, "grad_norm": 0.122609943151474, "learning_rate": 1.758155983599832e-05, "loss": 0.3159, "step": 11724 }, { "epoch": 1.9093351789276554, "grad_norm": 0.10674311965703964, "learning_rate": 1.757703630763946e-05, "loss": 0.3602, "step": 11725 }, { "epoch": 1.9094980254854863, "grad_norm": 0.08503088355064392, "learning_rate": 1.7572513045787237e-05, "loss": 0.2763, "step": 11726 }, { "epoch": 1.9096608720433172, "grad_norm": 0.13816408812999725, "learning_rate": 1.756799005060405e-05, "loss": 0.3394, "step": 11727 }, { "epoch": 1.9098237186011482, "grad_norm": 0.11190861463546753, "learning_rate": 1.7563467322252274e-05, "loss": 0.2942, "step": 11728 }, { "epoch": 1.909986565158979, "grad_norm": 0.08403045684099197, "learning_rate": 1.7558944860894312e-05, "loss": 0.3152, "step": 11729 }, { "epoch": 1.9101494117168099, "grad_norm": 0.09246427565813065, "learning_rate": 1.7554422666692527e-05, "loss": 0.3503, "step": 11730 }, { "epoch": 1.9103122582746406, "grad_norm": 0.09223178774118423, "learning_rate": 1.754990073980927e-05, "loss": 0.3048, "step": 11731 }, { "epoch": 1.9104751048324715, "grad_norm": 0.11711651086807251, "learning_rate": 1.7545379080406895e-05, "loss": 0.279, "step": 11732 }, { "epoch": 1.9106379513903025, "grad_norm": 0.1198967769742012, "learning_rate": 1.754085768864775e-05, "loss": 0.3198, "step": 11733 }, { "epoch": 1.9108007979481334, "grad_norm": 0.1846582442522049, "learning_rate": 1.753633656469416e-05, "loss": 0.336, "step": 11734 }, { "epoch": 1.9109636445059643, "grad_norm": 0.08881623297929764, "learning_rate": 1.753181570870845e-05, "loss": 0.3234, "step": 11735 }, { "epoch": 1.9111264910637953, "grad_norm": 0.10610169917345047, "learning_rate": 1.752729512085293e-05, "loss": 0.3107, "step": 11736 }, { "epoch": 1.911289337621626, "grad_norm": 0.09513803571462631, "learning_rate": 1.752277480128991e-05, "loss": 0.3204, "step": 11737 }, { "epoch": 1.911452184179457, "grad_norm": 0.08980356901884079, "learning_rate": 1.7518254750181674e-05, "loss": 0.2742, "step": 11738 }, { "epoch": 1.9116150307372877, "grad_norm": 0.1348404586315155, "learning_rate": 1.7513734967690498e-05, "loss": 0.2938, "step": 11739 }, { "epoch": 1.9117778772951186, "grad_norm": 0.13563896715641022, "learning_rate": 1.750921545397868e-05, "loss": 0.3066, "step": 11740 }, { "epoch": 1.9119407238529496, "grad_norm": 0.13345928490161896, "learning_rate": 1.750469620920847e-05, "loss": 0.2897, "step": 11741 }, { "epoch": 1.9121035704107805, "grad_norm": 0.08772587776184082, "learning_rate": 1.750017723354212e-05, "loss": 0.3197, "step": 11742 }, { "epoch": 1.9122664169686114, "grad_norm": 0.11195043474435806, "learning_rate": 1.749565852714187e-05, "loss": 0.3531, "step": 11743 }, { "epoch": 1.9124292635264422, "grad_norm": 0.12447302788496017, "learning_rate": 1.749114009016997e-05, "loss": 0.3151, "step": 11744 }, { "epoch": 1.912592110084273, "grad_norm": 0.14816774427890778, "learning_rate": 1.7486621922788637e-05, "loss": 0.3357, "step": 11745 }, { "epoch": 1.9127549566421038, "grad_norm": 0.17273086309432983, "learning_rate": 1.7482104025160078e-05, "loss": 0.3365, "step": 11746 }, { "epoch": 1.9129178031999348, "grad_norm": 0.07141746580600739, "learning_rate": 1.7477586397446523e-05, "loss": 0.3068, "step": 11747 }, { "epoch": 1.9130806497577657, "grad_norm": 0.11175624281167984, "learning_rate": 1.747306903981015e-05, "loss": 0.308, "step": 11748 }, { "epoch": 1.9132434963155966, "grad_norm": 0.07593166828155518, "learning_rate": 1.7468551952413146e-05, "loss": 0.3447, "step": 11749 }, { "epoch": 1.9134063428734276, "grad_norm": 0.13746362924575806, "learning_rate": 1.746403513541769e-05, "loss": 0.2698, "step": 11750 }, { "epoch": 1.9135691894312585, "grad_norm": 0.085909403860569, "learning_rate": 1.745951858898596e-05, "loss": 0.3145, "step": 11751 }, { "epoch": 1.9137320359890893, "grad_norm": 0.11818141490221024, "learning_rate": 1.7455002313280096e-05, "loss": 0.2778, "step": 11752 }, { "epoch": 1.9138948825469202, "grad_norm": 0.12141098082065582, "learning_rate": 1.745048630846226e-05, "loss": 0.3038, "step": 11753 }, { "epoch": 1.914057729104751, "grad_norm": 0.133087620139122, "learning_rate": 1.7445970574694575e-05, "loss": 0.3266, "step": 11754 }, { "epoch": 1.9142205756625819, "grad_norm": 0.11295033991336823, "learning_rate": 1.744145511213919e-05, "loss": 0.2817, "step": 11755 }, { "epoch": 1.9143834222204128, "grad_norm": 0.0806899219751358, "learning_rate": 1.743693992095821e-05, "loss": 0.28, "step": 11756 }, { "epoch": 1.9145462687782437, "grad_norm": 0.11120691150426865, "learning_rate": 1.743242500131374e-05, "loss": 0.3123, "step": 11757 }, { "epoch": 1.9147091153360747, "grad_norm": 0.13976335525512695, "learning_rate": 1.7427910353367892e-05, "loss": 0.334, "step": 11758 }, { "epoch": 1.9148719618939056, "grad_norm": 0.1274377405643463, "learning_rate": 1.742339597728275e-05, "loss": 0.3043, "step": 11759 }, { "epoch": 1.9150348084517363, "grad_norm": 0.16234776377677917, "learning_rate": 1.7418881873220395e-05, "loss": 0.3279, "step": 11760 }, { "epoch": 1.9151976550095673, "grad_norm": 0.0930294468998909, "learning_rate": 1.741436804134288e-05, "loss": 0.3144, "step": 11761 }, { "epoch": 1.915360501567398, "grad_norm": 0.05756830424070358, "learning_rate": 1.7409854481812295e-05, "loss": 0.3188, "step": 11762 }, { "epoch": 1.915523348125229, "grad_norm": 0.1401902586221695, "learning_rate": 1.740534119479067e-05, "loss": 0.3054, "step": 11763 }, { "epoch": 1.91568619468306, "grad_norm": 0.13028542697429657, "learning_rate": 1.7400828180440042e-05, "loss": 0.2976, "step": 11764 }, { "epoch": 1.9158490412408908, "grad_norm": 0.1370052695274353, "learning_rate": 1.7396315438922457e-05, "loss": 0.2935, "step": 11765 }, { "epoch": 1.9160118877987218, "grad_norm": 0.11211714148521423, "learning_rate": 1.7391802970399936e-05, "loss": 0.3032, "step": 11766 }, { "epoch": 1.9161747343565525, "grad_norm": 0.1680680215358734, "learning_rate": 1.7387290775034477e-05, "loss": 0.344, "step": 11767 }, { "epoch": 1.9163375809143834, "grad_norm": 0.11210651695728302, "learning_rate": 1.7382778852988076e-05, "loss": 0.3252, "step": 11768 }, { "epoch": 1.9165004274722142, "grad_norm": 0.10238708555698395, "learning_rate": 1.7378267204422748e-05, "loss": 0.3095, "step": 11769 }, { "epoch": 1.916663274030045, "grad_norm": 0.1166049912571907, "learning_rate": 1.737375582950046e-05, "loss": 0.3042, "step": 11770 }, { "epoch": 1.916826120587876, "grad_norm": 0.09276531636714935, "learning_rate": 1.7369244728383194e-05, "loss": 0.2662, "step": 11771 }, { "epoch": 1.916988967145707, "grad_norm": 0.10981453955173492, "learning_rate": 1.7364733901232895e-05, "loss": 0.3155, "step": 11772 }, { "epoch": 1.917151813703538, "grad_norm": 0.15785236656665802, "learning_rate": 1.7360223348211524e-05, "loss": 0.3126, "step": 11773 }, { "epoch": 1.9173146602613689, "grad_norm": 0.09904119372367859, "learning_rate": 1.7355713069481035e-05, "loss": 0.3315, "step": 11774 }, { "epoch": 1.9174775068191996, "grad_norm": 0.1001453772187233, "learning_rate": 1.735120306520333e-05, "loss": 0.3003, "step": 11775 }, { "epoch": 1.9176403533770305, "grad_norm": 0.10128377377986908, "learning_rate": 1.734669333554037e-05, "loss": 0.3077, "step": 11776 }, { "epoch": 1.9178031999348613, "grad_norm": 0.12320911139249802, "learning_rate": 1.7342183880654045e-05, "loss": 0.2945, "step": 11777 }, { "epoch": 1.9179660464926922, "grad_norm": 0.1545713245868683, "learning_rate": 1.7337674700706265e-05, "loss": 0.3107, "step": 11778 }, { "epoch": 1.9181288930505231, "grad_norm": 0.1059308648109436, "learning_rate": 1.733316579585891e-05, "loss": 0.2739, "step": 11779 }, { "epoch": 1.918291739608354, "grad_norm": 0.1029118150472641, "learning_rate": 1.732865716627388e-05, "loss": 0.3052, "step": 11780 }, { "epoch": 1.918454586166185, "grad_norm": 0.08757215738296509, "learning_rate": 1.7324148812113045e-05, "loss": 0.3214, "step": 11781 }, { "epoch": 1.9186174327240157, "grad_norm": 0.0950542613863945, "learning_rate": 1.7319640733538263e-05, "loss": 0.3283, "step": 11782 }, { "epoch": 1.9187802792818467, "grad_norm": 0.10527146607637405, "learning_rate": 1.731513293071138e-05, "loss": 0.3128, "step": 11783 }, { "epoch": 1.9189431258396774, "grad_norm": 0.15549980103969574, "learning_rate": 1.7310625403794262e-05, "loss": 0.3179, "step": 11784 }, { "epoch": 1.9191059723975084, "grad_norm": 0.10751418024301529, "learning_rate": 1.7306118152948725e-05, "loss": 0.3133, "step": 11785 }, { "epoch": 1.9192688189553393, "grad_norm": 0.16757436096668243, "learning_rate": 1.730161117833659e-05, "loss": 0.334, "step": 11786 }, { "epoch": 1.9194316655131702, "grad_norm": 0.11450694501399994, "learning_rate": 1.7297104480119693e-05, "loss": 0.3044, "step": 11787 }, { "epoch": 1.9195945120710012, "grad_norm": 0.12132222205400467, "learning_rate": 1.7292598058459818e-05, "loss": 0.3271, "step": 11788 }, { "epoch": 1.9197573586288321, "grad_norm": 0.11986882239580154, "learning_rate": 1.7288091913518762e-05, "loss": 0.2881, "step": 11789 }, { "epoch": 1.9199202051866628, "grad_norm": 0.144121453166008, "learning_rate": 1.728358604545831e-05, "loss": 0.3229, "step": 11790 }, { "epoch": 1.9200830517444938, "grad_norm": 0.13576872646808624, "learning_rate": 1.7279080454440238e-05, "loss": 0.314, "step": 11791 }, { "epoch": 1.9202458983023245, "grad_norm": 0.08850608766078949, "learning_rate": 1.7274575140626318e-05, "loss": 0.3044, "step": 11792 }, { "epoch": 1.9204087448601554, "grad_norm": 0.12247774004936218, "learning_rate": 1.7270070104178294e-05, "loss": 0.3345, "step": 11793 }, { "epoch": 1.9205715914179864, "grad_norm": 0.1273549348115921, "learning_rate": 1.7265565345257904e-05, "loss": 0.3297, "step": 11794 }, { "epoch": 1.9207344379758173, "grad_norm": 0.1075807511806488, "learning_rate": 1.7261060864026902e-05, "loss": 0.3035, "step": 11795 }, { "epoch": 1.9208972845336483, "grad_norm": 0.12953712046146393, "learning_rate": 1.7256556660647e-05, "loss": 0.3272, "step": 11796 }, { "epoch": 1.9210601310914792, "grad_norm": 0.14371520280838013, "learning_rate": 1.72520527352799e-05, "loss": 0.2991, "step": 11797 }, { "epoch": 1.92122297764931, "grad_norm": 0.09153024107217789, "learning_rate": 1.7247549088087335e-05, "loss": 0.3214, "step": 11798 }, { "epoch": 1.9213858242071407, "grad_norm": 0.11000179499387741, "learning_rate": 1.7243045719230984e-05, "loss": 0.3176, "step": 11799 }, { "epoch": 1.9215486707649716, "grad_norm": 0.1732851266860962, "learning_rate": 1.7238542628872533e-05, "loss": 0.3297, "step": 11800 }, { "epoch": 1.9217115173228025, "grad_norm": 0.15982653200626373, "learning_rate": 1.7234039817173645e-05, "loss": 0.3073, "step": 11801 }, { "epoch": 1.9218743638806335, "grad_norm": 0.19032318890094757, "learning_rate": 1.7229537284296003e-05, "loss": 0.3338, "step": 11802 }, { "epoch": 1.9220372104384644, "grad_norm": 0.11010438203811646, "learning_rate": 1.7225035030401257e-05, "loss": 0.2948, "step": 11803 }, { "epoch": 1.9222000569962954, "grad_norm": 0.11561037600040436, "learning_rate": 1.7220533055651046e-05, "loss": 0.2942, "step": 11804 }, { "epoch": 1.922362903554126, "grad_norm": 0.09405802190303802, "learning_rate": 1.7216031360206996e-05, "loss": 0.3365, "step": 11805 }, { "epoch": 1.922525750111957, "grad_norm": 0.1359473019838333, "learning_rate": 1.7211529944230753e-05, "loss": 0.2769, "step": 11806 }, { "epoch": 1.9226885966697878, "grad_norm": 0.1228596419095993, "learning_rate": 1.7207028807883914e-05, "loss": 0.3341, "step": 11807 }, { "epoch": 1.9228514432276187, "grad_norm": 0.1288251280784607, "learning_rate": 1.7202527951328088e-05, "loss": 0.3056, "step": 11808 }, { "epoch": 1.9230142897854496, "grad_norm": 0.27585428953170776, "learning_rate": 1.7198027374724873e-05, "loss": 0.3726, "step": 11809 }, { "epoch": 1.9231771363432806, "grad_norm": 0.11124221235513687, "learning_rate": 1.719352707823585e-05, "loss": 0.3103, "step": 11810 }, { "epoch": 1.9233399829011115, "grad_norm": 0.15780015289783478, "learning_rate": 1.71890270620226e-05, "loss": 0.3176, "step": 11811 }, { "epoch": 1.9235028294589425, "grad_norm": 0.12458858639001846, "learning_rate": 1.718452732624667e-05, "loss": 0.2889, "step": 11812 }, { "epoch": 1.9236656760167732, "grad_norm": 0.13057708740234375, "learning_rate": 1.718002787106963e-05, "loss": 0.2994, "step": 11813 }, { "epoch": 1.9238285225746041, "grad_norm": 0.10543674975633621, "learning_rate": 1.717552869665302e-05, "loss": 0.2904, "step": 11814 }, { "epoch": 1.9239913691324348, "grad_norm": 0.09245172888040543, "learning_rate": 1.7171029803158363e-05, "loss": 0.2595, "step": 11815 }, { "epoch": 1.9241542156902658, "grad_norm": 0.15671488642692566, "learning_rate": 1.7166531190747202e-05, "loss": 0.3128, "step": 11816 }, { "epoch": 1.9243170622480967, "grad_norm": 0.10563310980796814, "learning_rate": 1.7162032859581046e-05, "loss": 0.3071, "step": 11817 }, { "epoch": 1.9244799088059277, "grad_norm": 0.1432427614927292, "learning_rate": 1.715753480982139e-05, "loss": 0.2638, "step": 11818 }, { "epoch": 1.9246427553637586, "grad_norm": 0.10008155554533005, "learning_rate": 1.7153037041629722e-05, "loss": 0.3057, "step": 11819 }, { "epoch": 1.9248056019215893, "grad_norm": 0.16680492460727692, "learning_rate": 1.7148539555167542e-05, "loss": 0.3299, "step": 11820 }, { "epoch": 1.9249684484794203, "grad_norm": 0.12605562806129456, "learning_rate": 1.7144042350596317e-05, "loss": 0.3346, "step": 11821 }, { "epoch": 1.925131295037251, "grad_norm": 0.08729247003793716, "learning_rate": 1.713954542807751e-05, "loss": 0.3324, "step": 11822 }, { "epoch": 1.925294141595082, "grad_norm": 0.12290435284376144, "learning_rate": 1.713504878777256e-05, "loss": 0.3215, "step": 11823 }, { "epoch": 1.9254569881529129, "grad_norm": 0.14318020641803741, "learning_rate": 1.7130552429842933e-05, "loss": 0.3582, "step": 11824 }, { "epoch": 1.9256198347107438, "grad_norm": 0.07773816585540771, "learning_rate": 1.7126056354450052e-05, "loss": 0.2835, "step": 11825 }, { "epoch": 1.9257826812685748, "grad_norm": 0.14592519402503967, "learning_rate": 1.712156056175533e-05, "loss": 0.3348, "step": 11826 }, { "epoch": 1.9259455278264057, "grad_norm": 0.14270056784152985, "learning_rate": 1.7117065051920196e-05, "loss": 0.3425, "step": 11827 }, { "epoch": 1.9261083743842364, "grad_norm": 0.11925949901342392, "learning_rate": 1.7112569825106044e-05, "loss": 0.2828, "step": 11828 }, { "epoch": 1.9262712209420674, "grad_norm": 0.09815934300422668, "learning_rate": 1.7108074881474264e-05, "loss": 0.3025, "step": 11829 }, { "epoch": 1.926434067499898, "grad_norm": 0.0803106352686882, "learning_rate": 1.710358022118624e-05, "loss": 0.2871, "step": 11830 }, { "epoch": 1.926596914057729, "grad_norm": 0.09215357899665833, "learning_rate": 1.7099085844403346e-05, "loss": 0.3207, "step": 11831 }, { "epoch": 1.92675976061556, "grad_norm": 0.13118237257003784, "learning_rate": 1.7094591751286944e-05, "loss": 0.3258, "step": 11832 }, { "epoch": 1.926922607173391, "grad_norm": 0.09300114214420319, "learning_rate": 1.7090097941998384e-05, "loss": 0.2947, "step": 11833 }, { "epoch": 1.9270854537312219, "grad_norm": 0.14883553981781006, "learning_rate": 1.7085604416698996e-05, "loss": 0.2932, "step": 11834 }, { "epoch": 1.9272483002890526, "grad_norm": 0.09589625895023346, "learning_rate": 1.7081111175550135e-05, "loss": 0.3699, "step": 11835 }, { "epoch": 1.9274111468468835, "grad_norm": 0.11001838743686676, "learning_rate": 1.7076618218713107e-05, "loss": 0.2675, "step": 11836 }, { "epoch": 1.9275739934047142, "grad_norm": 0.13018658757209778, "learning_rate": 1.707212554634921e-05, "loss": 0.3142, "step": 11837 }, { "epoch": 1.9277368399625452, "grad_norm": 0.08491635322570801, "learning_rate": 1.7067633158619773e-05, "loss": 0.3053, "step": 11838 }, { "epoch": 1.9278996865203761, "grad_norm": 0.12413980811834335, "learning_rate": 1.7063141055686072e-05, "loss": 0.3238, "step": 11839 }, { "epoch": 1.928062533078207, "grad_norm": 0.1049141064286232, "learning_rate": 1.7058649237709385e-05, "loss": 0.2746, "step": 11840 }, { "epoch": 1.928225379636038, "grad_norm": 0.10855740308761597, "learning_rate": 1.705415770485097e-05, "loss": 0.321, "step": 11841 }, { "epoch": 1.928388226193869, "grad_norm": 0.10775019973516464, "learning_rate": 1.7049666457272116e-05, "loss": 0.3082, "step": 11842 }, { "epoch": 1.9285510727516997, "grad_norm": 0.11596208065748215, "learning_rate": 1.7045175495134054e-05, "loss": 0.3122, "step": 11843 }, { "epoch": 1.9287139193095306, "grad_norm": 0.06859321147203445, "learning_rate": 1.7040684818598025e-05, "loss": 0.3479, "step": 11844 }, { "epoch": 1.9288767658673613, "grad_norm": 0.08086620271205902, "learning_rate": 1.703619442782524e-05, "loss": 0.3405, "step": 11845 }, { "epoch": 1.9290396124251923, "grad_norm": 0.12447547167539597, "learning_rate": 1.7031704322976955e-05, "loss": 0.2722, "step": 11846 }, { "epoch": 1.9292024589830232, "grad_norm": 0.10182179510593414, "learning_rate": 1.702721450421435e-05, "loss": 0.3402, "step": 11847 }, { "epoch": 1.9293653055408542, "grad_norm": 0.13370636105537415, "learning_rate": 1.7022724971698634e-05, "loss": 0.3041, "step": 11848 }, { "epoch": 1.9295281520986851, "grad_norm": 0.08948229253292084, "learning_rate": 1.7018235725590993e-05, "loss": 0.2707, "step": 11849 }, { "epoch": 1.929690998656516, "grad_norm": 0.09699544310569763, "learning_rate": 1.70137467660526e-05, "loss": 0.2953, "step": 11850 }, { "epoch": 1.9298538452143468, "grad_norm": 0.12121236324310303, "learning_rate": 1.700925809324463e-05, "loss": 0.33, "step": 11851 }, { "epoch": 1.9300166917721777, "grad_norm": 0.11010488867759705, "learning_rate": 1.7004769707328228e-05, "loss": 0.2899, "step": 11852 }, { "epoch": 1.9301795383300084, "grad_norm": 0.07981991767883301, "learning_rate": 1.7000281608464554e-05, "loss": 0.3351, "step": 11853 }, { "epoch": 1.9303423848878394, "grad_norm": 0.08082617819309235, "learning_rate": 1.699579379681474e-05, "loss": 0.2957, "step": 11854 }, { "epoch": 1.9305052314456703, "grad_norm": 0.14584031701087952, "learning_rate": 1.6991306272539913e-05, "loss": 0.3005, "step": 11855 }, { "epoch": 1.9306680780035013, "grad_norm": 0.11167420446872711, "learning_rate": 1.6986819035801172e-05, "loss": 0.3018, "step": 11856 }, { "epoch": 1.9308309245613322, "grad_norm": 0.13629479706287384, "learning_rate": 1.6982332086759646e-05, "loss": 0.3353, "step": 11857 }, { "epoch": 1.930993771119163, "grad_norm": 0.08138075470924377, "learning_rate": 1.697784542557642e-05, "loss": 0.3425, "step": 11858 }, { "epoch": 1.9311566176769939, "grad_norm": 0.10519473254680634, "learning_rate": 1.697335905241257e-05, "loss": 0.302, "step": 11859 }, { "epoch": 1.9313194642348246, "grad_norm": 0.1264273226261139, "learning_rate": 1.6968872967429183e-05, "loss": 0.3661, "step": 11860 }, { "epoch": 1.9314823107926555, "grad_norm": 0.11400529742240906, "learning_rate": 1.6964387170787322e-05, "loss": 0.3202, "step": 11861 }, { "epoch": 1.9316451573504865, "grad_norm": 0.14624527096748352, "learning_rate": 1.6959901662648036e-05, "loss": 0.2825, "step": 11862 }, { "epoch": 1.9318080039083174, "grad_norm": 0.12900787591934204, "learning_rate": 1.6955416443172362e-05, "loss": 0.3024, "step": 11863 }, { "epoch": 1.9319708504661484, "grad_norm": 0.11239411681890488, "learning_rate": 1.695093151252135e-05, "loss": 0.2915, "step": 11864 }, { "epoch": 1.9321336970239793, "grad_norm": 0.09374441206455231, "learning_rate": 1.694644687085601e-05, "loss": 0.3154, "step": 11865 }, { "epoch": 1.93229654358181, "grad_norm": 0.07258298248052597, "learning_rate": 1.6941962518337344e-05, "loss": 0.3219, "step": 11866 }, { "epoch": 1.932459390139641, "grad_norm": 0.08468684554100037, "learning_rate": 1.693747845512638e-05, "loss": 0.3125, "step": 11867 }, { "epoch": 1.9326222366974717, "grad_norm": 0.12164926528930664, "learning_rate": 1.693299468138409e-05, "loss": 0.3324, "step": 11868 }, { "epoch": 1.9327850832553026, "grad_norm": 0.1716938018798828, "learning_rate": 1.6928511197271462e-05, "loss": 0.3277, "step": 11869 }, { "epoch": 1.9329479298131336, "grad_norm": 0.08481653034687042, "learning_rate": 1.6924028002949463e-05, "loss": 0.35, "step": 11870 }, { "epoch": 1.9331107763709645, "grad_norm": 0.08434104174375534, "learning_rate": 1.6919545098579052e-05, "loss": 0.33, "step": 11871 }, { "epoch": 1.9332736229287955, "grad_norm": 0.14542256295681, "learning_rate": 1.6915062484321194e-05, "loss": 0.2837, "step": 11872 }, { "epoch": 1.9334364694866262, "grad_norm": 0.0978262796998024, "learning_rate": 1.6910580160336807e-05, "loss": 0.3134, "step": 11873 }, { "epoch": 1.9335993160444571, "grad_norm": 0.12527497112751007, "learning_rate": 1.6906098126786824e-05, "loss": 0.3053, "step": 11874 }, { "epoch": 1.9337621626022878, "grad_norm": 0.08520355075597763, "learning_rate": 1.690161638383218e-05, "loss": 0.3106, "step": 11875 }, { "epoch": 1.9339250091601188, "grad_norm": 0.1278924196958542, "learning_rate": 1.6897134931633765e-05, "loss": 0.3167, "step": 11876 }, { "epoch": 1.9340878557179497, "grad_norm": 0.07808209955692291, "learning_rate": 1.6892653770352477e-05, "loss": 0.2844, "step": 11877 }, { "epoch": 1.9342507022757807, "grad_norm": 0.12821096181869507, "learning_rate": 1.688817290014922e-05, "loss": 0.3147, "step": 11878 }, { "epoch": 1.9344135488336116, "grad_norm": 0.11587554216384888, "learning_rate": 1.6883692321184855e-05, "loss": 0.2905, "step": 11879 }, { "epoch": 1.9345763953914425, "grad_norm": 0.14246755838394165, "learning_rate": 1.6879212033620258e-05, "loss": 0.2866, "step": 11880 }, { "epoch": 1.9347392419492733, "grad_norm": 0.11944877356290817, "learning_rate": 1.6874732037616263e-05, "loss": 0.366, "step": 11881 }, { "epoch": 1.9349020885071042, "grad_norm": 0.11901791393756866, "learning_rate": 1.687025233333375e-05, "loss": 0.3012, "step": 11882 }, { "epoch": 1.935064935064935, "grad_norm": 0.14828406274318695, "learning_rate": 1.6865772920933526e-05, "loss": 0.2978, "step": 11883 }, { "epoch": 1.9352277816227659, "grad_norm": 0.10180521756410599, "learning_rate": 1.6861293800576427e-05, "loss": 0.2948, "step": 11884 }, { "epoch": 1.9353906281805968, "grad_norm": 0.11545813083648682, "learning_rate": 1.6856814972423262e-05, "loss": 0.3214, "step": 11885 }, { "epoch": 1.9355534747384278, "grad_norm": 0.12922178208827972, "learning_rate": 1.6852336436634842e-05, "loss": 0.3465, "step": 11886 }, { "epoch": 1.9357163212962587, "grad_norm": 0.0616893395781517, "learning_rate": 1.684785819337195e-05, "loss": 0.3142, "step": 11887 }, { "epoch": 1.9358791678540896, "grad_norm": 0.13214357197284698, "learning_rate": 1.6843380242795375e-05, "loss": 0.2943, "step": 11888 }, { "epoch": 1.9360420144119204, "grad_norm": 0.09852975606918335, "learning_rate": 1.6838902585065885e-05, "loss": 0.3255, "step": 11889 }, { "epoch": 1.9362048609697513, "grad_norm": 0.09905978292226791, "learning_rate": 1.683442522034425e-05, "loss": 0.2755, "step": 11890 }, { "epoch": 1.936367707527582, "grad_norm": 0.11135633289813995, "learning_rate": 1.6829948148791218e-05, "loss": 0.3187, "step": 11891 }, { "epoch": 1.936530554085413, "grad_norm": 0.11294884234666824, "learning_rate": 1.682547137056751e-05, "loss": 0.3417, "step": 11892 }, { "epoch": 1.936693400643244, "grad_norm": 0.10881870985031128, "learning_rate": 1.6820994885833884e-05, "loss": 0.3279, "step": 11893 }, { "epoch": 1.9368562472010749, "grad_norm": 0.09735366702079773, "learning_rate": 1.6816518694751043e-05, "loss": 0.2964, "step": 11894 }, { "epoch": 1.9370190937589058, "grad_norm": 0.10569672286510468, "learning_rate": 1.6812042797479703e-05, "loss": 0.2929, "step": 11895 }, { "epoch": 1.9371819403167365, "grad_norm": 0.1139933243393898, "learning_rate": 1.680756719418055e-05, "loss": 0.3311, "step": 11896 }, { "epoch": 1.9373447868745675, "grad_norm": 0.06535530090332031, "learning_rate": 1.6803091885014285e-05, "loss": 0.3071, "step": 11897 }, { "epoch": 1.9375076334323982, "grad_norm": 0.08969425410032272, "learning_rate": 1.679861687014158e-05, "loss": 0.2851, "step": 11898 }, { "epoch": 1.9376704799902291, "grad_norm": 0.15137560665607452, "learning_rate": 1.6794142149723096e-05, "loss": 0.2873, "step": 11899 }, { "epoch": 1.93783332654806, "grad_norm": 0.11473938822746277, "learning_rate": 1.67896677239195e-05, "loss": 0.2875, "step": 11900 }, { "epoch": 1.937996173105891, "grad_norm": 0.08022987842559814, "learning_rate": 1.6785193592891436e-05, "loss": 0.3278, "step": 11901 }, { "epoch": 1.938159019663722, "grad_norm": 0.10360479354858398, "learning_rate": 1.678071975679953e-05, "loss": 0.3346, "step": 11902 }, { "epoch": 1.938321866221553, "grad_norm": 0.15894781053066254, "learning_rate": 1.6776246215804404e-05, "loss": 0.3267, "step": 11903 }, { "epoch": 1.9384847127793836, "grad_norm": 0.11234628409147263, "learning_rate": 1.6771772970066686e-05, "loss": 0.319, "step": 11904 }, { "epoch": 1.9386475593372146, "grad_norm": 0.12481695413589478, "learning_rate": 1.676730001974696e-05, "loss": 0.3171, "step": 11905 }, { "epoch": 1.9388104058950453, "grad_norm": 0.13959403336048126, "learning_rate": 1.6762827365005842e-05, "loss": 0.3018, "step": 11906 }, { "epoch": 1.9389732524528762, "grad_norm": 0.10308384895324707, "learning_rate": 1.6758355006003894e-05, "loss": 0.3163, "step": 11907 }, { "epoch": 1.9391360990107072, "grad_norm": 0.0937696099281311, "learning_rate": 1.6753882942901694e-05, "loss": 0.3127, "step": 11908 }, { "epoch": 1.939298945568538, "grad_norm": 0.09997352957725525, "learning_rate": 1.6749411175859804e-05, "loss": 0.2821, "step": 11909 }, { "epoch": 1.939461792126369, "grad_norm": 0.09827656298875809, "learning_rate": 1.6744939705038766e-05, "loss": 0.3059, "step": 11910 }, { "epoch": 1.9396246386841998, "grad_norm": 0.13794364035129547, "learning_rate": 1.6740468530599134e-05, "loss": 0.3247, "step": 11911 }, { "epoch": 1.9397874852420307, "grad_norm": 0.12359657883644104, "learning_rate": 1.673599765270143e-05, "loss": 0.2964, "step": 11912 }, { "epoch": 1.9399503317998614, "grad_norm": 0.09775324165821075, "learning_rate": 1.6731527071506167e-05, "loss": 0.2963, "step": 11913 }, { "epoch": 1.9401131783576924, "grad_norm": 0.08499384671449661, "learning_rate": 1.6727056787173845e-05, "loss": 0.3402, "step": 11914 }, { "epoch": 1.9402760249155233, "grad_norm": 0.11460547149181366, "learning_rate": 1.6722586799864982e-05, "loss": 0.2978, "step": 11915 }, { "epoch": 1.9404388714733543, "grad_norm": 0.1060710921883583, "learning_rate": 1.6718117109740055e-05, "loss": 0.2957, "step": 11916 }, { "epoch": 1.9406017180311852, "grad_norm": 0.07872603088617325, "learning_rate": 1.6713647716959523e-05, "loss": 0.3001, "step": 11917 }, { "epoch": 1.9407645645890161, "grad_norm": 0.11564185470342636, "learning_rate": 1.6709178621683876e-05, "loss": 0.2942, "step": 11918 }, { "epoch": 1.9409274111468469, "grad_norm": 0.11755678802728653, "learning_rate": 1.6704709824073554e-05, "loss": 0.3354, "step": 11919 }, { "epoch": 1.9410902577046778, "grad_norm": 0.08531107753515244, "learning_rate": 1.670024132428901e-05, "loss": 0.311, "step": 11920 }, { "epoch": 1.9412531042625085, "grad_norm": 0.12049046903848648, "learning_rate": 1.6695773122490654e-05, "loss": 0.3446, "step": 11921 }, { "epoch": 1.9414159508203395, "grad_norm": 0.09559465944766998, "learning_rate": 1.6691305218838932e-05, "loss": 0.305, "step": 11922 }, { "epoch": 1.9415787973781704, "grad_norm": 0.10758570581674576, "learning_rate": 1.668683761349425e-05, "loss": 0.3292, "step": 11923 }, { "epoch": 1.9417416439360013, "grad_norm": 0.08586875349283218, "learning_rate": 1.6682370306616995e-05, "loss": 0.3167, "step": 11924 }, { "epoch": 1.9419044904938323, "grad_norm": 0.130070760846138, "learning_rate": 1.667790329836757e-05, "loss": 0.3185, "step": 11925 }, { "epoch": 1.9420673370516632, "grad_norm": 0.11680477112531662, "learning_rate": 1.667343658890635e-05, "loss": 0.3453, "step": 11926 }, { "epoch": 1.942230183609494, "grad_norm": 0.08965839445590973, "learning_rate": 1.6668970178393704e-05, "loss": 0.2901, "step": 11927 }, { "epoch": 1.9423930301673247, "grad_norm": 0.10113365948200226, "learning_rate": 1.666450406698999e-05, "loss": 0.3057, "step": 11928 }, { "epoch": 1.9425558767251556, "grad_norm": 0.11497383564710617, "learning_rate": 1.6660038254855548e-05, "loss": 0.2858, "step": 11929 }, { "epoch": 1.9427187232829866, "grad_norm": 0.101193368434906, "learning_rate": 1.665557274215073e-05, "loss": 0.3304, "step": 11930 }, { "epoch": 1.9428815698408175, "grad_norm": 0.1058393344283104, "learning_rate": 1.6651107529035847e-05, "loss": 0.3395, "step": 11931 }, { "epoch": 1.9430444163986484, "grad_norm": 0.11113078147172928, "learning_rate": 1.664664261567121e-05, "loss": 0.3126, "step": 11932 }, { "epoch": 1.9432072629564794, "grad_norm": 0.07434891909360886, "learning_rate": 1.664217800221714e-05, "loss": 0.3039, "step": 11933 }, { "epoch": 1.94337010951431, "grad_norm": 0.0873650535941124, "learning_rate": 1.6637713688833922e-05, "loss": 0.2963, "step": 11934 }, { "epoch": 1.943532956072141, "grad_norm": 0.11562996357679367, "learning_rate": 1.6633249675681834e-05, "loss": 0.29, "step": 11935 }, { "epoch": 1.9436958026299718, "grad_norm": 0.10562970489263535, "learning_rate": 1.6628785962921144e-05, "loss": 0.3325, "step": 11936 }, { "epoch": 1.9438586491878027, "grad_norm": 0.1508275717496872, "learning_rate": 1.6624322550712124e-05, "loss": 0.3052, "step": 11937 }, { "epoch": 1.9440214957456337, "grad_norm": 0.13641494512557983, "learning_rate": 1.661985943921502e-05, "loss": 0.2683, "step": 11938 }, { "epoch": 1.9441843423034646, "grad_norm": 0.12012358009815216, "learning_rate": 1.6615396628590065e-05, "loss": 0.2953, "step": 11939 }, { "epoch": 1.9443471888612955, "grad_norm": 0.13541673123836517, "learning_rate": 1.6610934118997494e-05, "loss": 0.3175, "step": 11940 }, { "epoch": 1.9445100354191265, "grad_norm": 0.09232387691736221, "learning_rate": 1.6606471910597527e-05, "loss": 0.2765, "step": 11941 }, { "epoch": 1.9446728819769572, "grad_norm": 0.09673597663640976, "learning_rate": 1.6602010003550358e-05, "loss": 0.3141, "step": 11942 }, { "epoch": 1.9448357285347881, "grad_norm": 0.10250964015722275, "learning_rate": 1.6597548398016194e-05, "loss": 0.3136, "step": 11943 }, { "epoch": 1.9449985750926189, "grad_norm": 0.081905797123909, "learning_rate": 1.659308709415522e-05, "loss": 0.3754, "step": 11944 }, { "epoch": 1.9451614216504498, "grad_norm": 0.10974328219890594, "learning_rate": 1.6588626092127603e-05, "loss": 0.3317, "step": 11945 }, { "epoch": 1.9453242682082807, "grad_norm": 0.08646870404481888, "learning_rate": 1.6584165392093513e-05, "loss": 0.3026, "step": 11946 }, { "epoch": 1.9454871147661117, "grad_norm": 0.08021938055753708, "learning_rate": 1.6579704994213098e-05, "loss": 0.309, "step": 11947 }, { "epoch": 1.9456499613239426, "grad_norm": 0.12224966287612915, "learning_rate": 1.6575244898646504e-05, "loss": 0.3007, "step": 11948 }, { "epoch": 1.9458128078817734, "grad_norm": 0.09531548619270325, "learning_rate": 1.657078510555386e-05, "loss": 0.3042, "step": 11949 }, { "epoch": 1.9459756544396043, "grad_norm": 0.10215826332569122, "learning_rate": 1.6566325615095275e-05, "loss": 0.3134, "step": 11950 }, { "epoch": 1.946138500997435, "grad_norm": 0.11825734376907349, "learning_rate": 1.656186642743088e-05, "loss": 0.3305, "step": 11951 }, { "epoch": 1.946301347555266, "grad_norm": 0.1300080418586731, "learning_rate": 1.6557407542720763e-05, "loss": 0.3423, "step": 11952 }, { "epoch": 1.946464194113097, "grad_norm": 0.09714516252279282, "learning_rate": 1.655294896112501e-05, "loss": 0.2648, "step": 11953 }, { "epoch": 1.9466270406709278, "grad_norm": 0.11126753687858582, "learning_rate": 1.6548490682803683e-05, "loss": 0.2992, "step": 11954 }, { "epoch": 1.9467898872287588, "grad_norm": 0.13596010208129883, "learning_rate": 1.6544032707916873e-05, "loss": 0.2978, "step": 11955 }, { "epoch": 1.9469527337865897, "grad_norm": 0.1330583691596985, "learning_rate": 1.6539575036624625e-05, "loss": 0.3119, "step": 11956 }, { "epoch": 1.9471155803444204, "grad_norm": 0.14384089410305023, "learning_rate": 1.6535117669086975e-05, "loss": 0.3071, "step": 11957 }, { "epoch": 1.9472784269022514, "grad_norm": 0.08495409786701202, "learning_rate": 1.6530660605463967e-05, "loss": 0.3028, "step": 11958 }, { "epoch": 1.9474412734600821, "grad_norm": 0.11315208673477173, "learning_rate": 1.652620384591562e-05, "loss": 0.3128, "step": 11959 }, { "epoch": 1.947604120017913, "grad_norm": 0.09303538501262665, "learning_rate": 1.6521747390601942e-05, "loss": 0.3279, "step": 11960 }, { "epoch": 1.947766966575744, "grad_norm": 0.1039189025759697, "learning_rate": 1.6517291239682925e-05, "loss": 0.2967, "step": 11961 }, { "epoch": 1.947929813133575, "grad_norm": 0.17334304749965668, "learning_rate": 1.6512835393318578e-05, "loss": 0.3224, "step": 11962 }, { "epoch": 1.9480926596914059, "grad_norm": 0.1030326634645462, "learning_rate": 1.650837985166887e-05, "loss": 0.2679, "step": 11963 }, { "epoch": 1.9482555062492366, "grad_norm": 0.13522110879421234, "learning_rate": 1.6503924614893758e-05, "loss": 0.2925, "step": 11964 }, { "epoch": 1.9484183528070675, "grad_norm": 0.0995958000421524, "learning_rate": 1.6499469683153206e-05, "loss": 0.2894, "step": 11965 }, { "epoch": 1.9485811993648983, "grad_norm": 0.10865806043148041, "learning_rate": 1.6495015056607166e-05, "loss": 0.3181, "step": 11966 }, { "epoch": 1.9487440459227292, "grad_norm": 0.15252606570720673, "learning_rate": 1.649056073541557e-05, "loss": 0.3145, "step": 11967 }, { "epoch": 1.9489068924805601, "grad_norm": 0.11105897277593613, "learning_rate": 1.648610671973833e-05, "loss": 0.2815, "step": 11968 }, { "epoch": 1.949069739038391, "grad_norm": 0.0942128375172615, "learning_rate": 1.6481653009735373e-05, "loss": 0.283, "step": 11969 }, { "epoch": 1.949232585596222, "grad_norm": 0.12694542109966278, "learning_rate": 1.6477199605566594e-05, "loss": 0.2869, "step": 11970 }, { "epoch": 1.949395432154053, "grad_norm": 0.0970105454325676, "learning_rate": 1.647274650739189e-05, "loss": 0.2944, "step": 11971 }, { "epoch": 1.9495582787118837, "grad_norm": 0.10169120877981186, "learning_rate": 1.6468293715371122e-05, "loss": 0.3124, "step": 11972 }, { "epoch": 1.9497211252697146, "grad_norm": 0.12627393007278442, "learning_rate": 1.646384122966418e-05, "loss": 0.3161, "step": 11973 }, { "epoch": 1.9498839718275454, "grad_norm": 0.12408801168203354, "learning_rate": 1.6459389050430916e-05, "loss": 0.3178, "step": 11974 }, { "epoch": 1.9500468183853763, "grad_norm": 0.1196695864200592, "learning_rate": 1.645493717783117e-05, "loss": 0.3059, "step": 11975 }, { "epoch": 1.9502096649432072, "grad_norm": 0.10114367306232452, "learning_rate": 1.6450485612024774e-05, "loss": 0.3113, "step": 11976 }, { "epoch": 1.9503725115010382, "grad_norm": 0.101231649518013, "learning_rate": 1.6446034353171568e-05, "loss": 0.3844, "step": 11977 }, { "epoch": 1.9505353580588691, "grad_norm": 0.14841637015342712, "learning_rate": 1.6441583401431355e-05, "loss": 0.3041, "step": 11978 }, { "epoch": 1.9506982046167, "grad_norm": 0.11389967799186707, "learning_rate": 1.6437132756963932e-05, "loss": 0.2759, "step": 11979 }, { "epoch": 1.9508610511745308, "grad_norm": 0.1669287085533142, "learning_rate": 1.6432682419929106e-05, "loss": 0.3155, "step": 11980 }, { "epoch": 1.9510238977323617, "grad_norm": 0.11198905110359192, "learning_rate": 1.642823239048665e-05, "loss": 0.3042, "step": 11981 }, { "epoch": 1.9511867442901925, "grad_norm": 0.13341468572616577, "learning_rate": 1.6423782668796335e-05, "loss": 0.2962, "step": 11982 }, { "epoch": 1.9513495908480234, "grad_norm": 0.1058368906378746, "learning_rate": 1.641933325501791e-05, "loss": 0.3054, "step": 11983 }, { "epoch": 1.9515124374058543, "grad_norm": 0.128633514046669, "learning_rate": 1.6414884149311137e-05, "loss": 0.3402, "step": 11984 }, { "epoch": 1.9516752839636853, "grad_norm": 0.09898019582033157, "learning_rate": 1.6410435351835738e-05, "loss": 0.2997, "step": 11985 }, { "epoch": 1.9518381305215162, "grad_norm": 0.09360044449567795, "learning_rate": 1.640598686275145e-05, "loss": 0.3262, "step": 11986 }, { "epoch": 1.952000977079347, "grad_norm": 0.14204558730125427, "learning_rate": 1.640153868221797e-05, "loss": 0.2932, "step": 11987 }, { "epoch": 1.9521638236371779, "grad_norm": 0.08397158980369568, "learning_rate": 1.6397090810395027e-05, "loss": 0.2857, "step": 11988 }, { "epoch": 1.9523266701950086, "grad_norm": 0.08384855091571808, "learning_rate": 1.6392643247442295e-05, "loss": 0.3539, "step": 11989 }, { "epoch": 1.9524895167528395, "grad_norm": 0.0985892042517662, "learning_rate": 1.6388195993519455e-05, "loss": 0.3, "step": 11990 }, { "epoch": 1.9526523633106705, "grad_norm": 0.10011185705661774, "learning_rate": 1.6383749048786185e-05, "loss": 0.3342, "step": 11991 }, { "epoch": 1.9528152098685014, "grad_norm": 0.08572551608085632, "learning_rate": 1.637930241340214e-05, "loss": 0.3018, "step": 11992 }, { "epoch": 1.9529780564263324, "grad_norm": 0.1099909096956253, "learning_rate": 1.6374856087526968e-05, "loss": 0.3188, "step": 11993 }, { "epoch": 1.9531409029841633, "grad_norm": 0.09188774973154068, "learning_rate": 1.6370410071320295e-05, "loss": 0.2976, "step": 11994 }, { "epoch": 1.953303749541994, "grad_norm": 0.12304352223873138, "learning_rate": 1.6365964364941764e-05, "loss": 0.2942, "step": 11995 }, { "epoch": 1.953466596099825, "grad_norm": 0.0845554992556572, "learning_rate": 1.636151896855098e-05, "loss": 0.2992, "step": 11996 }, { "epoch": 1.9536294426576557, "grad_norm": 0.10019697993993759, "learning_rate": 1.6357073882307545e-05, "loss": 0.313, "step": 11997 }, { "epoch": 1.9537922892154866, "grad_norm": 0.11707551777362823, "learning_rate": 1.6352629106371044e-05, "loss": 0.2869, "step": 11998 }, { "epoch": 1.9539551357733176, "grad_norm": 0.09527058899402618, "learning_rate": 1.6348184640901076e-05, "loss": 0.3039, "step": 11999 }, { "epoch": 1.9541179823311485, "grad_norm": 0.13307109475135803, "learning_rate": 1.6343740486057206e-05, "loss": 0.3346, "step": 12000 }, { "epoch": 1.9542808288889795, "grad_norm": 0.1040816605091095, "learning_rate": 1.633929664199897e-05, "loss": 0.277, "step": 12001 }, { "epoch": 1.9544436754468102, "grad_norm": 0.10258481651544571, "learning_rate": 1.6334853108885946e-05, "loss": 0.3059, "step": 12002 }, { "epoch": 1.9546065220046411, "grad_norm": 0.13762633502483368, "learning_rate": 1.633040988687765e-05, "loss": 0.2777, "step": 12003 }, { "epoch": 1.9547693685624719, "grad_norm": 0.10318760573863983, "learning_rate": 1.6325966976133623e-05, "loss": 0.3301, "step": 12004 }, { "epoch": 1.9549322151203028, "grad_norm": 0.08531111478805542, "learning_rate": 1.6321524376813362e-05, "loss": 0.3034, "step": 12005 }, { "epoch": 1.9550950616781337, "grad_norm": 0.1000073179602623, "learning_rate": 1.631708208907638e-05, "loss": 0.305, "step": 12006 }, { "epoch": 1.9552579082359647, "grad_norm": 0.10037685930728912, "learning_rate": 1.6312640113082168e-05, "loss": 0.2982, "step": 12007 }, { "epoch": 1.9554207547937956, "grad_norm": 0.11726240068674088, "learning_rate": 1.6308198448990192e-05, "loss": 0.3038, "step": 12008 }, { "epoch": 1.9555836013516266, "grad_norm": 0.11947629600763321, "learning_rate": 1.6303757096959947e-05, "loss": 0.3154, "step": 12009 }, { "epoch": 1.9557464479094573, "grad_norm": 0.0724048763513565, "learning_rate": 1.6299316057150874e-05, "loss": 0.3181, "step": 12010 }, { "epoch": 1.9559092944672882, "grad_norm": 0.11095841974020004, "learning_rate": 1.6294875329722422e-05, "loss": 0.3314, "step": 12011 }, { "epoch": 1.956072141025119, "grad_norm": 0.07503622770309448, "learning_rate": 1.629043491483402e-05, "loss": 0.3369, "step": 12012 }, { "epoch": 1.95623498758295, "grad_norm": 0.07917357981204987, "learning_rate": 1.6285994812645107e-05, "loss": 0.3181, "step": 12013 }, { "epoch": 1.9563978341407808, "grad_norm": 0.10020974278450012, "learning_rate": 1.6281555023315087e-05, "loss": 0.3041, "step": 12014 }, { "epoch": 1.9565606806986118, "grad_norm": 0.08874433487653732, "learning_rate": 1.6277115547003365e-05, "loss": 0.3203, "step": 12015 }, { "epoch": 1.9567235272564427, "grad_norm": 0.12618568539619446, "learning_rate": 1.6272676383869316e-05, "loss": 0.3238, "step": 12016 }, { "epoch": 1.9568863738142737, "grad_norm": 0.13464520871639252, "learning_rate": 1.626823753407235e-05, "loss": 0.2764, "step": 12017 }, { "epoch": 1.9570492203721044, "grad_norm": 0.10300253331661224, "learning_rate": 1.626379899777181e-05, "loss": 0.3249, "step": 12018 }, { "epoch": 1.9572120669299353, "grad_norm": 0.09734684228897095, "learning_rate": 1.6259360775127056e-05, "loss": 0.2934, "step": 12019 }, { "epoch": 1.957374913487766, "grad_norm": 0.09401918202638626, "learning_rate": 1.6254922866297444e-05, "loss": 0.3014, "step": 12020 }, { "epoch": 1.957537760045597, "grad_norm": 0.1416628509759903, "learning_rate": 1.6250485271442305e-05, "loss": 0.2822, "step": 12021 }, { "epoch": 1.957700606603428, "grad_norm": 0.07601793110370636, "learning_rate": 1.624604799072095e-05, "loss": 0.2843, "step": 12022 }, { "epoch": 1.9578634531612589, "grad_norm": 0.13523954153060913, "learning_rate": 1.6241611024292702e-05, "loss": 0.3335, "step": 12023 }, { "epoch": 1.9580262997190898, "grad_norm": 0.09115087985992432, "learning_rate": 1.623717437231686e-05, "loss": 0.3002, "step": 12024 }, { "epoch": 1.9581891462769205, "grad_norm": 0.14745217561721802, "learning_rate": 1.6232738034952715e-05, "loss": 0.3163, "step": 12025 }, { "epoch": 1.9583519928347515, "grad_norm": 0.13209299743175507, "learning_rate": 1.6228302012359543e-05, "loss": 0.3098, "step": 12026 }, { "epoch": 1.9585148393925822, "grad_norm": 0.08490284532308578, "learning_rate": 1.62238663046966e-05, "loss": 0.2905, "step": 12027 }, { "epoch": 1.9586776859504131, "grad_norm": 0.13388735055923462, "learning_rate": 1.6219430912123158e-05, "loss": 0.3164, "step": 12028 }, { "epoch": 1.958840532508244, "grad_norm": 0.113449826836586, "learning_rate": 1.6214995834798457e-05, "loss": 0.3149, "step": 12029 }, { "epoch": 1.959003379066075, "grad_norm": 0.10994232445955276, "learning_rate": 1.6210561072881714e-05, "loss": 0.2927, "step": 12030 }, { "epoch": 1.959166225623906, "grad_norm": 0.15553103387355804, "learning_rate": 1.6206126626532177e-05, "loss": 0.3302, "step": 12031 }, { "epoch": 1.959329072181737, "grad_norm": 0.12511539459228516, "learning_rate": 1.6201692495909035e-05, "loss": 0.3349, "step": 12032 }, { "epoch": 1.9594919187395676, "grad_norm": 0.09992558509111404, "learning_rate": 1.6197258681171496e-05, "loss": 0.3255, "step": 12033 }, { "epoch": 1.9596547652973986, "grad_norm": 0.10758344829082489, "learning_rate": 1.6192825182478734e-05, "loss": 0.2772, "step": 12034 }, { "epoch": 1.9598176118552293, "grad_norm": 0.08388854563236237, "learning_rate": 1.6188391999989945e-05, "loss": 0.343, "step": 12035 }, { "epoch": 1.9599804584130602, "grad_norm": 0.08316916227340698, "learning_rate": 1.6183959133864278e-05, "loss": 0.3072, "step": 12036 }, { "epoch": 1.9601433049708912, "grad_norm": 0.11271394789218903, "learning_rate": 1.61795265842609e-05, "loss": 0.2653, "step": 12037 }, { "epoch": 1.9603061515287221, "grad_norm": 0.11982642114162445, "learning_rate": 1.617509435133893e-05, "loss": 0.3005, "step": 12038 }, { "epoch": 1.960468998086553, "grad_norm": 0.1374167650938034, "learning_rate": 1.6170662435257523e-05, "loss": 0.328, "step": 12039 }, { "epoch": 1.9606318446443838, "grad_norm": 0.08811237663030624, "learning_rate": 1.6166230836175784e-05, "loss": 0.2862, "step": 12040 }, { "epoch": 1.9607946912022147, "grad_norm": 0.09691976010799408, "learning_rate": 1.6161799554252823e-05, "loss": 0.3873, "step": 12041 }, { "epoch": 1.9609575377600454, "grad_norm": 0.10081461817026138, "learning_rate": 1.6157368589647744e-05, "loss": 0.3108, "step": 12042 }, { "epoch": 1.9611203843178764, "grad_norm": 0.10726716369390488, "learning_rate": 1.6152937942519622e-05, "loss": 0.316, "step": 12043 }, { "epoch": 1.9612832308757073, "grad_norm": 0.12410304695367813, "learning_rate": 1.6148507613027538e-05, "loss": 0.315, "step": 12044 }, { "epoch": 1.9614460774335383, "grad_norm": 0.10302873700857162, "learning_rate": 1.6144077601330543e-05, "loss": 0.3225, "step": 12045 }, { "epoch": 1.9616089239913692, "grad_norm": 0.10825788229703903, "learning_rate": 1.61396479075877e-05, "loss": 0.3189, "step": 12046 }, { "epoch": 1.9617717705492002, "grad_norm": 0.08599226921796799, "learning_rate": 1.6135218531958046e-05, "loss": 0.3366, "step": 12047 }, { "epoch": 1.9619346171070309, "grad_norm": 0.1051541268825531, "learning_rate": 1.6130789474600612e-05, "loss": 0.3175, "step": 12048 }, { "epoch": 1.9620974636648618, "grad_norm": 0.13920444250106812, "learning_rate": 1.612636073567439e-05, "loss": 0.345, "step": 12049 }, { "epoch": 1.9622603102226925, "grad_norm": 0.11736447364091873, "learning_rate": 1.6121932315338416e-05, "loss": 0.2917, "step": 12050 }, { "epoch": 1.9624231567805235, "grad_norm": 0.1649899184703827, "learning_rate": 1.6117504213751675e-05, "loss": 0.3396, "step": 12051 }, { "epoch": 1.9625860033383544, "grad_norm": 0.13623303174972534, "learning_rate": 1.6113076431073133e-05, "loss": 0.2647, "step": 12052 }, { "epoch": 1.9627488498961854, "grad_norm": 0.0882018581032753, "learning_rate": 1.6108648967461783e-05, "loss": 0.292, "step": 12053 }, { "epoch": 1.9629116964540163, "grad_norm": 0.10826429724693298, "learning_rate": 1.6104221823076573e-05, "loss": 0.2961, "step": 12054 }, { "epoch": 1.9630745430118473, "grad_norm": 0.10769210755825043, "learning_rate": 1.6099794998076457e-05, "loss": 0.33, "step": 12055 }, { "epoch": 1.963237389569678, "grad_norm": 0.09109999984502792, "learning_rate": 1.609536849262035e-05, "loss": 0.3434, "step": 12056 }, { "epoch": 1.9634002361275087, "grad_norm": 0.10043082386255264, "learning_rate": 1.6090942306867206e-05, "loss": 0.3172, "step": 12057 }, { "epoch": 1.9635630826853396, "grad_norm": 0.11116255074739456, "learning_rate": 1.6086516440975923e-05, "loss": 0.2724, "step": 12058 }, { "epoch": 1.9637259292431706, "grad_norm": 0.11459086835384369, "learning_rate": 1.60820908951054e-05, "loss": 0.3332, "step": 12059 }, { "epoch": 1.9638887758010015, "grad_norm": 0.12338228523731232, "learning_rate": 1.6077665669414537e-05, "loss": 0.3267, "step": 12060 }, { "epoch": 1.9640516223588325, "grad_norm": 0.10534678399562836, "learning_rate": 1.6073240764062208e-05, "loss": 0.2941, "step": 12061 }, { "epoch": 1.9642144689166634, "grad_norm": 0.09951820224523544, "learning_rate": 1.6068816179207284e-05, "loss": 0.2831, "step": 12062 }, { "epoch": 1.9643773154744941, "grad_norm": 0.1135384738445282, "learning_rate": 1.6064391915008615e-05, "loss": 0.3244, "step": 12063 }, { "epoch": 1.964540162032325, "grad_norm": 0.10245107859373093, "learning_rate": 1.6059967971625045e-05, "loss": 0.2806, "step": 12064 }, { "epoch": 1.9647030085901558, "grad_norm": 0.09236616641283035, "learning_rate": 1.6055544349215417e-05, "loss": 0.2988, "step": 12065 }, { "epoch": 1.9648658551479867, "grad_norm": 0.12259906530380249, "learning_rate": 1.6051121047938545e-05, "loss": 0.3045, "step": 12066 }, { "epoch": 1.9650287017058177, "grad_norm": 0.1045636236667633, "learning_rate": 1.6046698067953226e-05, "loss": 0.3654, "step": 12067 }, { "epoch": 1.9651915482636486, "grad_norm": 0.15306667983531952, "learning_rate": 1.6042275409418283e-05, "loss": 0.2903, "step": 12068 }, { "epoch": 1.9653543948214796, "grad_norm": 0.13128022849559784, "learning_rate": 1.6037853072492494e-05, "loss": 0.2946, "step": 12069 }, { "epoch": 1.9655172413793105, "grad_norm": 0.12077610939741135, "learning_rate": 1.6033431057334616e-05, "loss": 0.2891, "step": 12070 }, { "epoch": 1.9656800879371412, "grad_norm": 0.10504322499036789, "learning_rate": 1.6029009364103438e-05, "loss": 0.2694, "step": 12071 }, { "epoch": 1.9658429344949722, "grad_norm": 0.10585350543260574, "learning_rate": 1.6024587992957707e-05, "loss": 0.3144, "step": 12072 }, { "epoch": 1.9660057810528029, "grad_norm": 0.13641254603862762, "learning_rate": 1.6020166944056153e-05, "loss": 0.3119, "step": 12073 }, { "epoch": 1.9661686276106338, "grad_norm": 0.080536849796772, "learning_rate": 1.6015746217557504e-05, "loss": 0.3181, "step": 12074 }, { "epoch": 1.9663314741684648, "grad_norm": 0.12079792469739914, "learning_rate": 1.6011325813620488e-05, "loss": 0.3015, "step": 12075 }, { "epoch": 1.9664943207262957, "grad_norm": 0.09424431622028351, "learning_rate": 1.600690573240381e-05, "loss": 0.3631, "step": 12076 }, { "epoch": 1.9666571672841267, "grad_norm": 0.08285845071077347, "learning_rate": 1.6002485974066155e-05, "loss": 0.3671, "step": 12077 }, { "epoch": 1.9668200138419574, "grad_norm": 0.10564189404249191, "learning_rate": 1.599806653876621e-05, "loss": 0.2712, "step": 12078 }, { "epoch": 1.9669828603997883, "grad_norm": 0.16358418762683868, "learning_rate": 1.599364742666265e-05, "loss": 0.3561, "step": 12079 }, { "epoch": 1.967145706957619, "grad_norm": 0.09940531849861145, "learning_rate": 1.598922863791413e-05, "loss": 0.3239, "step": 12080 }, { "epoch": 1.96730855351545, "grad_norm": 0.09982281923294067, "learning_rate": 1.598481017267929e-05, "loss": 0.2963, "step": 12081 }, { "epoch": 1.967471400073281, "grad_norm": 0.10610824078321457, "learning_rate": 1.598039203111679e-05, "loss": 0.3204, "step": 12082 }, { "epoch": 1.9676342466311119, "grad_norm": 0.12156409025192261, "learning_rate": 1.597597421338523e-05, "loss": 0.2847, "step": 12083 }, { "epoch": 1.9677970931889428, "grad_norm": 0.15428969264030457, "learning_rate": 1.5971556719643245e-05, "loss": 0.3451, "step": 12084 }, { "epoch": 1.9679599397467737, "grad_norm": 0.09621518105268478, "learning_rate": 1.5967139550049405e-05, "loss": 0.3341, "step": 12085 }, { "epoch": 1.9681227863046045, "grad_norm": 0.09549718350172043, "learning_rate": 1.5962722704762328e-05, "loss": 0.3089, "step": 12086 }, { "epoch": 1.9682856328624354, "grad_norm": 0.09814943373203278, "learning_rate": 1.5958306183940586e-05, "loss": 0.3147, "step": 12087 }, { "epoch": 1.9684484794202661, "grad_norm": 0.1449660062789917, "learning_rate": 1.595388998774274e-05, "loss": 0.3638, "step": 12088 }, { "epoch": 1.968611325978097, "grad_norm": 0.11890137195587158, "learning_rate": 1.594947411632734e-05, "loss": 0.2999, "step": 12089 }, { "epoch": 1.968774172535928, "grad_norm": 0.12390299886465073, "learning_rate": 1.594505856985294e-05, "loss": 0.3613, "step": 12090 }, { "epoch": 1.968937019093759, "grad_norm": 0.05350269749760628, "learning_rate": 1.594064334847807e-05, "loss": 0.3448, "step": 12091 }, { "epoch": 1.96909986565159, "grad_norm": 0.08371645212173462, "learning_rate": 1.5936228452361234e-05, "loss": 0.2701, "step": 12092 }, { "epoch": 1.9692627122094206, "grad_norm": 0.09482710808515549, "learning_rate": 1.5931813881660963e-05, "loss": 0.3114, "step": 12093 }, { "epoch": 1.9694255587672516, "grad_norm": 0.09116095304489136, "learning_rate": 1.5927399636535746e-05, "loss": 0.3121, "step": 12094 }, { "epoch": 1.9695884053250823, "grad_norm": 0.1657751500606537, "learning_rate": 1.5922985717144062e-05, "loss": 0.3139, "step": 12095 }, { "epoch": 1.9697512518829132, "grad_norm": 0.09160862863063812, "learning_rate": 1.5918572123644378e-05, "loss": 0.3323, "step": 12096 }, { "epoch": 1.9699140984407442, "grad_norm": 0.14825047552585602, "learning_rate": 1.591415885619517e-05, "loss": 0.3908, "step": 12097 }, { "epoch": 1.970076944998575, "grad_norm": 0.07551077008247375, "learning_rate": 1.5909745914954884e-05, "loss": 0.3285, "step": 12098 }, { "epoch": 1.970239791556406, "grad_norm": 0.08601853996515274, "learning_rate": 1.5905333300081945e-05, "loss": 0.2952, "step": 12099 }, { "epoch": 1.970402638114237, "grad_norm": 0.101773701608181, "learning_rate": 1.5900921011734794e-05, "loss": 0.2999, "step": 12100 }, { "epoch": 1.9705654846720677, "grad_norm": 0.08803485333919525, "learning_rate": 1.5896509050071835e-05, "loss": 0.3062, "step": 12101 }, { "epoch": 1.9707283312298987, "grad_norm": 0.09749481081962585, "learning_rate": 1.5892097415251484e-05, "loss": 0.2938, "step": 12102 }, { "epoch": 1.9708911777877294, "grad_norm": 0.09044191241264343, "learning_rate": 1.588768610743212e-05, "loss": 0.2984, "step": 12103 }, { "epoch": 1.9710540243455603, "grad_norm": 0.13803249597549438, "learning_rate": 1.5883275126772124e-05, "loss": 0.2884, "step": 12104 }, { "epoch": 1.9712168709033913, "grad_norm": 0.1436707228422165, "learning_rate": 1.587886447342987e-05, "loss": 0.2717, "step": 12105 }, { "epoch": 1.9713797174612222, "grad_norm": 0.10230638086795807, "learning_rate": 1.587445414756371e-05, "loss": 0.299, "step": 12106 }, { "epoch": 1.9715425640190531, "grad_norm": 0.12484655529260635, "learning_rate": 1.5870044149331982e-05, "loss": 0.2686, "step": 12107 }, { "epoch": 1.971705410576884, "grad_norm": 0.1019974872469902, "learning_rate": 1.5865634478893027e-05, "loss": 0.3338, "step": 12108 }, { "epoch": 1.9718682571347148, "grad_norm": 0.14267832040786743, "learning_rate": 1.586122513640516e-05, "loss": 0.3097, "step": 12109 }, { "epoch": 1.9720311036925458, "grad_norm": 0.06566927582025528, "learning_rate": 1.5856816122026684e-05, "loss": 0.2903, "step": 12110 }, { "epoch": 1.9721939502503765, "grad_norm": 0.15716084837913513, "learning_rate": 1.5852407435915915e-05, "loss": 0.3054, "step": 12111 }, { "epoch": 1.9723567968082074, "grad_norm": 0.15388216078281403, "learning_rate": 1.5847999078231125e-05, "loss": 0.3134, "step": 12112 }, { "epoch": 1.9725196433660384, "grad_norm": 0.13271746039390564, "learning_rate": 1.5843591049130585e-05, "loss": 0.3218, "step": 12113 }, { "epoch": 1.9726824899238693, "grad_norm": 0.07585521787405014, "learning_rate": 1.583918334877255e-05, "loss": 0.2711, "step": 12114 }, { "epoch": 1.9728453364817002, "grad_norm": 0.14407281577587128, "learning_rate": 1.5834775977315293e-05, "loss": 0.3069, "step": 12115 }, { "epoch": 1.973008183039531, "grad_norm": 0.12547548115253448, "learning_rate": 1.5830368934917036e-05, "loss": 0.3065, "step": 12116 }, { "epoch": 1.973171029597362, "grad_norm": 0.10267138481140137, "learning_rate": 1.5825962221736e-05, "loss": 0.3163, "step": 12117 }, { "epoch": 1.9733338761551926, "grad_norm": 0.09208265691995621, "learning_rate": 1.5821555837930414e-05, "loss": 0.2788, "step": 12118 }, { "epoch": 1.9734967227130236, "grad_norm": 0.1132039949297905, "learning_rate": 1.581714978365847e-05, "loss": 0.288, "step": 12119 }, { "epoch": 1.9736595692708545, "grad_norm": 0.12616679072380066, "learning_rate": 1.581274405907836e-05, "loss": 0.3014, "step": 12120 }, { "epoch": 1.9738224158286855, "grad_norm": 0.10483859479427338, "learning_rate": 1.580833866434826e-05, "loss": 0.2931, "step": 12121 }, { "epoch": 1.9739852623865164, "grad_norm": 0.15973293781280518, "learning_rate": 1.5803933599626346e-05, "loss": 0.3313, "step": 12122 }, { "epoch": 1.9741481089443473, "grad_norm": 0.11854982376098633, "learning_rate": 1.579952886507077e-05, "loss": 0.2804, "step": 12123 }, { "epoch": 1.974310955502178, "grad_norm": 0.15153416991233826, "learning_rate": 1.5795124460839676e-05, "loss": 0.3415, "step": 12124 }, { "epoch": 1.974473802060009, "grad_norm": 0.08863215148448944, "learning_rate": 1.579072038709118e-05, "loss": 0.2544, "step": 12125 }, { "epoch": 1.9746366486178397, "grad_norm": 0.09632637351751328, "learning_rate": 1.5786316643983424e-05, "loss": 0.2847, "step": 12126 }, { "epoch": 1.9747994951756707, "grad_norm": 0.11672794818878174, "learning_rate": 1.5781913231674507e-05, "loss": 0.3042, "step": 12127 }, { "epoch": 1.9749623417335016, "grad_norm": 0.09121908247470856, "learning_rate": 1.577751015032252e-05, "loss": 0.2837, "step": 12128 }, { "epoch": 1.9751251882913325, "grad_norm": 0.08246620744466782, "learning_rate": 1.577310740008554e-05, "loss": 0.2965, "step": 12129 }, { "epoch": 1.9752880348491635, "grad_norm": 0.11123484373092651, "learning_rate": 1.576870498112166e-05, "loss": 0.324, "step": 12130 }, { "epoch": 1.9754508814069942, "grad_norm": 0.11519219726324081, "learning_rate": 1.5764302893588927e-05, "loss": 0.292, "step": 12131 }, { "epoch": 1.9756137279648252, "grad_norm": 0.10148653388023376, "learning_rate": 1.5759901137645386e-05, "loss": 0.2944, "step": 12132 }, { "epoch": 1.9757765745226559, "grad_norm": 0.11559589207172394, "learning_rate": 1.5755499713449084e-05, "loss": 0.3288, "step": 12133 }, { "epoch": 1.9759394210804868, "grad_norm": 0.0998731330037117, "learning_rate": 1.5751098621158043e-05, "loss": 0.3045, "step": 12134 }, { "epoch": 1.9761022676383178, "grad_norm": 0.14249677956104279, "learning_rate": 1.574669786093027e-05, "loss": 0.2872, "step": 12135 }, { "epoch": 1.9762651141961487, "grad_norm": 0.1996951848268509, "learning_rate": 1.574229743292376e-05, "loss": 0.3985, "step": 12136 }, { "epoch": 1.9764279607539796, "grad_norm": 0.13313142955303192, "learning_rate": 1.5737897337296515e-05, "loss": 0.3171, "step": 12137 }, { "epoch": 1.9765908073118106, "grad_norm": 0.10535836219787598, "learning_rate": 1.57334975742065e-05, "loss": 0.3069, "step": 12138 }, { "epoch": 1.9767536538696413, "grad_norm": 0.1412787288427353, "learning_rate": 1.5729098143811693e-05, "loss": 0.328, "step": 12139 }, { "epoch": 1.9769165004274722, "grad_norm": 0.18439823389053345, "learning_rate": 1.572469904627003e-05, "loss": 0.3673, "step": 12140 }, { "epoch": 1.977079346985303, "grad_norm": 0.1478767693042755, "learning_rate": 1.5720300281739468e-05, "loss": 0.2925, "step": 12141 }, { "epoch": 1.977242193543134, "grad_norm": 0.1362697333097458, "learning_rate": 1.571590185037793e-05, "loss": 0.2778, "step": 12142 }, { "epoch": 1.9774050401009649, "grad_norm": 0.1056286096572876, "learning_rate": 1.5711503752343322e-05, "loss": 0.3049, "step": 12143 }, { "epoch": 1.9775678866587958, "grad_norm": 0.12406084686517715, "learning_rate": 1.5707105987793565e-05, "loss": 0.2988, "step": 12144 }, { "epoch": 1.9777307332166267, "grad_norm": 0.13251255452632904, "learning_rate": 1.5702708556886548e-05, "loss": 0.3662, "step": 12145 }, { "epoch": 1.9778935797744577, "grad_norm": 0.0976443812251091, "learning_rate": 1.5698311459780145e-05, "loss": 0.3326, "step": 12146 }, { "epoch": 1.9780564263322884, "grad_norm": 0.10122852772474289, "learning_rate": 1.5693914696632226e-05, "loss": 0.3122, "step": 12147 }, { "epoch": 1.9782192728901193, "grad_norm": 0.16352377831935883, "learning_rate": 1.5689518267600658e-05, "loss": 0.2943, "step": 12148 }, { "epoch": 1.97838211944795, "grad_norm": 14.311036109924316, "learning_rate": 1.568512217284328e-05, "loss": 0.3692, "step": 12149 }, { "epoch": 1.978544966005781, "grad_norm": 0.08007432520389557, "learning_rate": 1.5680726412517925e-05, "loss": 0.3483, "step": 12150 }, { "epoch": 1.978707812563612, "grad_norm": 0.12892405688762665, "learning_rate": 1.56763309867824e-05, "loss": 0.3057, "step": 12151 }, { "epoch": 1.9788706591214429, "grad_norm": 0.09554558247327805, "learning_rate": 1.5671935895794537e-05, "loss": 0.3085, "step": 12152 }, { "epoch": 1.9790335056792738, "grad_norm": 0.1104523167014122, "learning_rate": 1.5667541139712126e-05, "loss": 0.3254, "step": 12153 }, { "epoch": 1.9791963522371046, "grad_norm": 0.10106243938207626, "learning_rate": 1.566314671869294e-05, "loss": 0.2765, "step": 12154 }, { "epoch": 1.9793591987949355, "grad_norm": 0.12938228249549866, "learning_rate": 1.5658752632894767e-05, "loss": 0.3112, "step": 12155 }, { "epoch": 1.9795220453527662, "grad_norm": 0.09835892915725708, "learning_rate": 1.5654358882475363e-05, "loss": 0.3177, "step": 12156 }, { "epoch": 1.9796848919105972, "grad_norm": 0.11754753440618515, "learning_rate": 1.564996546759247e-05, "loss": 0.3057, "step": 12157 }, { "epoch": 1.979847738468428, "grad_norm": 0.12814465165138245, "learning_rate": 1.564557238840383e-05, "loss": 0.3067, "step": 12158 }, { "epoch": 1.980010585026259, "grad_norm": 0.1406460404396057, "learning_rate": 1.5641179645067168e-05, "loss": 0.3453, "step": 12159 }, { "epoch": 1.98017343158409, "grad_norm": 0.13463649153709412, "learning_rate": 1.5636787237740204e-05, "loss": 0.3598, "step": 12160 }, { "epoch": 1.980336278141921, "grad_norm": 0.16352416574954987, "learning_rate": 1.5632395166580623e-05, "loss": 0.3128, "step": 12161 }, { "epoch": 1.9804991246997516, "grad_norm": 0.09609521925449371, "learning_rate": 1.5628003431746122e-05, "loss": 0.316, "step": 12162 }, { "epoch": 1.9806619712575826, "grad_norm": 0.13876254856586456, "learning_rate": 1.562361203339438e-05, "loss": 0.3855, "step": 12163 }, { "epoch": 1.9808248178154133, "grad_norm": 0.16739551723003387, "learning_rate": 1.5619220971683064e-05, "loss": 0.2898, "step": 12164 }, { "epoch": 1.9809876643732443, "grad_norm": 0.1005466878414154, "learning_rate": 1.561483024676981e-05, "loss": 0.3097, "step": 12165 }, { "epoch": 1.9811505109310752, "grad_norm": 0.10680747032165527, "learning_rate": 1.5610439858812274e-05, "loss": 0.3077, "step": 12166 }, { "epoch": 1.9813133574889061, "grad_norm": 0.125192791223526, "learning_rate": 1.5606049807968083e-05, "loss": 0.3002, "step": 12167 }, { "epoch": 1.981476204046737, "grad_norm": 0.112886942923069, "learning_rate": 1.5601660094394843e-05, "loss": 0.3165, "step": 12168 }, { "epoch": 1.9816390506045678, "grad_norm": 0.09222982078790665, "learning_rate": 1.5597270718250158e-05, "loss": 0.3267, "step": 12169 }, { "epoch": 1.9818018971623987, "grad_norm": 0.14850273728370667, "learning_rate": 1.5592881679691637e-05, "loss": 0.3141, "step": 12170 }, { "epoch": 1.9819647437202295, "grad_norm": 0.15907934308052063, "learning_rate": 1.5588492978876847e-05, "loss": 0.3512, "step": 12171 }, { "epoch": 1.9821275902780604, "grad_norm": 0.09728636592626572, "learning_rate": 1.5584104615963345e-05, "loss": 0.3264, "step": 12172 }, { "epoch": 1.9822904368358913, "grad_norm": 0.153087317943573, "learning_rate": 1.557971659110871e-05, "loss": 0.3152, "step": 12173 }, { "epoch": 1.9824532833937223, "grad_norm": 0.08809243142604828, "learning_rate": 1.5575328904470478e-05, "loss": 0.3177, "step": 12174 }, { "epoch": 1.9826161299515532, "grad_norm": 0.21170726418495178, "learning_rate": 1.5570941556206163e-05, "loss": 0.3645, "step": 12175 }, { "epoch": 1.9827789765093842, "grad_norm": 0.07438288629055023, "learning_rate": 1.55665545464733e-05, "loss": 0.3912, "step": 12176 }, { "epoch": 1.982941823067215, "grad_norm": 0.09125932306051254, "learning_rate": 1.5562167875429396e-05, "loss": 0.2891, "step": 12177 }, { "epoch": 1.9831046696250458, "grad_norm": 0.10007213801145554, "learning_rate": 1.5557781543231936e-05, "loss": 0.3248, "step": 12178 }, { "epoch": 1.9832675161828766, "grad_norm": 0.12254940718412399, "learning_rate": 1.5553395550038414e-05, "loss": 0.3204, "step": 12179 }, { "epoch": 1.9834303627407075, "grad_norm": 0.0813315138220787, "learning_rate": 1.5549009896006282e-05, "loss": 0.3193, "step": 12180 }, { "epoch": 1.9835932092985384, "grad_norm": 0.10840490460395813, "learning_rate": 1.5544624581293022e-05, "loss": 0.2788, "step": 12181 }, { "epoch": 1.9837560558563694, "grad_norm": 0.08196932822465897, "learning_rate": 1.5540239606056073e-05, "loss": 0.2556, "step": 12182 }, { "epoch": 1.9839189024142003, "grad_norm": 0.09967852383852005, "learning_rate": 1.553585497045285e-05, "loss": 0.3333, "step": 12183 }, { "epoch": 1.9840817489720313, "grad_norm": 0.20732402801513672, "learning_rate": 1.5531470674640797e-05, "loss": 0.3132, "step": 12184 }, { "epoch": 1.984244595529862, "grad_norm": 0.14040425419807434, "learning_rate": 1.552708671877732e-05, "loss": 0.2897, "step": 12185 }, { "epoch": 1.9844074420876927, "grad_norm": 0.11875489354133606, "learning_rate": 1.5522703103019807e-05, "loss": 0.337, "step": 12186 }, { "epoch": 1.9845702886455237, "grad_norm": 0.11094069480895996, "learning_rate": 1.5518319827525642e-05, "loss": 0.3131, "step": 12187 }, { "epoch": 1.9847331352033546, "grad_norm": 0.09350330382585526, "learning_rate": 1.551393689245221e-05, "loss": 0.2874, "step": 12188 }, { "epoch": 1.9848959817611855, "grad_norm": 0.08509882539510727, "learning_rate": 1.550955429795687e-05, "loss": 0.3558, "step": 12189 }, { "epoch": 1.9850588283190165, "grad_norm": 0.16127049922943115, "learning_rate": 1.5505172044196958e-05, "loss": 0.358, "step": 12190 }, { "epoch": 1.9852216748768474, "grad_norm": 0.1019342765212059, "learning_rate": 1.5500790131329807e-05, "loss": 0.3352, "step": 12191 }, { "epoch": 1.9853845214346781, "grad_norm": 0.1148068755865097, "learning_rate": 1.5496408559512765e-05, "loss": 0.265, "step": 12192 }, { "epoch": 1.985547367992509, "grad_norm": 0.08962740004062653, "learning_rate": 1.5492027328903124e-05, "loss": 0.3231, "step": 12193 }, { "epoch": 1.9857102145503398, "grad_norm": 0.08419322967529297, "learning_rate": 1.5487646439658184e-05, "loss": 0.2915, "step": 12194 }, { "epoch": 1.9858730611081707, "grad_norm": 0.12225914746522903, "learning_rate": 1.5483265891935243e-05, "loss": 0.3323, "step": 12195 }, { "epoch": 1.9860359076660017, "grad_norm": 0.12205569446086884, "learning_rate": 1.5478885685891567e-05, "loss": 0.3421, "step": 12196 }, { "epoch": 1.9861987542238326, "grad_norm": 0.17284829914569855, "learning_rate": 1.5474505821684425e-05, "loss": 0.3576, "step": 12197 }, { "epoch": 1.9863616007816636, "grad_norm": 0.12590749561786652, "learning_rate": 1.5470126299471054e-05, "loss": 0.3158, "step": 12198 }, { "epoch": 1.9865244473394945, "grad_norm": 0.10964298248291016, "learning_rate": 1.5465747119408706e-05, "loss": 0.2884, "step": 12199 }, { "epoch": 1.9866872938973252, "grad_norm": 0.09087157249450684, "learning_rate": 1.5461368281654606e-05, "loss": 0.3037, "step": 12200 }, { "epoch": 1.9868501404551562, "grad_norm": 0.08687902987003326, "learning_rate": 1.5456989786365962e-05, "loss": 0.323, "step": 12201 }, { "epoch": 1.987012987012987, "grad_norm": 0.09996475279331207, "learning_rate": 1.545261163369997e-05, "loss": 0.2742, "step": 12202 }, { "epoch": 1.9871758335708178, "grad_norm": 0.07199890911579132, "learning_rate": 1.544823382381383e-05, "loss": 0.2803, "step": 12203 }, { "epoch": 1.9873386801286488, "grad_norm": 0.12981493771076202, "learning_rate": 1.5443856356864717e-05, "loss": 0.285, "step": 12204 }, { "epoch": 1.9875015266864797, "grad_norm": 0.1425575464963913, "learning_rate": 1.5439479233009778e-05, "loss": 0.3375, "step": 12205 }, { "epoch": 1.9876643732443107, "grad_norm": 0.146635964512825, "learning_rate": 1.5435102452406192e-05, "loss": 0.333, "step": 12206 }, { "epoch": 1.9878272198021414, "grad_norm": 0.09491489082574844, "learning_rate": 1.5430726015211084e-05, "loss": 0.3016, "step": 12207 }, { "epoch": 1.9879900663599723, "grad_norm": 0.09489070624113083, "learning_rate": 1.5426349921581583e-05, "loss": 0.297, "step": 12208 }, { "epoch": 1.988152912917803, "grad_norm": 0.0953744575381279, "learning_rate": 1.5421974171674793e-05, "loss": 0.3788, "step": 12209 }, { "epoch": 1.988315759475634, "grad_norm": 0.10676144808530807, "learning_rate": 1.5417598765647837e-05, "loss": 0.3659, "step": 12210 }, { "epoch": 1.988478606033465, "grad_norm": 0.09988070279359818, "learning_rate": 1.5413223703657794e-05, "loss": 0.3126, "step": 12211 }, { "epoch": 1.9886414525912959, "grad_norm": 0.09852146357297897, "learning_rate": 1.5408848985861734e-05, "loss": 0.2828, "step": 12212 }, { "epoch": 1.9888042991491268, "grad_norm": 0.11532770097255707, "learning_rate": 1.540447461241674e-05, "loss": 0.2901, "step": 12213 }, { "epoch": 1.9889671457069578, "grad_norm": 0.1661946177482605, "learning_rate": 1.5400100583479857e-05, "loss": 0.3732, "step": 12214 }, { "epoch": 1.9891299922647885, "grad_norm": 0.17847663164138794, "learning_rate": 1.5395726899208117e-05, "loss": 0.3363, "step": 12215 }, { "epoch": 1.9892928388226194, "grad_norm": 0.09376015514135361, "learning_rate": 1.5391353559758563e-05, "loss": 0.2913, "step": 12216 }, { "epoch": 1.9894556853804501, "grad_norm": 0.11283505707979202, "learning_rate": 1.5386980565288205e-05, "loss": 0.2877, "step": 12217 }, { "epoch": 1.989618531938281, "grad_norm": 0.14067380130290985, "learning_rate": 1.5382607915954046e-05, "loss": 0.3285, "step": 12218 }, { "epoch": 1.989781378496112, "grad_norm": 0.09759844839572906, "learning_rate": 1.5378235611913075e-05, "loss": 0.3089, "step": 12219 }, { "epoch": 1.989944225053943, "grad_norm": 0.08337506651878357, "learning_rate": 1.537386365332227e-05, "loss": 0.2428, "step": 12220 }, { "epoch": 1.990107071611774, "grad_norm": 0.11968190968036652, "learning_rate": 1.5369492040338603e-05, "loss": 0.32, "step": 12221 }, { "epoch": 1.9902699181696046, "grad_norm": 0.07133915275335312, "learning_rate": 1.536512077311903e-05, "loss": 0.3361, "step": 12222 }, { "epoch": 1.9904327647274356, "grad_norm": 0.08130544424057007, "learning_rate": 1.5360749851820477e-05, "loss": 0.3273, "step": 12223 }, { "epoch": 1.9905956112852663, "grad_norm": 0.1329684555530548, "learning_rate": 1.5356379276599897e-05, "loss": 0.2717, "step": 12224 }, { "epoch": 1.9907584578430972, "grad_norm": 0.09525448083877563, "learning_rate": 1.535200904761419e-05, "loss": 0.2936, "step": 12225 }, { "epoch": 1.9909213044009282, "grad_norm": 0.13271833956241608, "learning_rate": 1.5347639165020265e-05, "loss": 0.3047, "step": 12226 }, { "epoch": 1.9910841509587591, "grad_norm": 0.14275231957435608, "learning_rate": 1.5343269628975006e-05, "loss": 0.3374, "step": 12227 }, { "epoch": 1.99124699751659, "grad_norm": 0.07984742522239685, "learning_rate": 1.533890043963531e-05, "loss": 0.3121, "step": 12228 }, { "epoch": 1.991409844074421, "grad_norm": 0.12843899428844452, "learning_rate": 1.533453159715803e-05, "loss": 0.3177, "step": 12229 }, { "epoch": 1.9915726906322517, "grad_norm": 0.0906943529844284, "learning_rate": 1.5330163101700027e-05, "loss": 0.3444, "step": 12230 }, { "epoch": 1.9917355371900827, "grad_norm": 0.08375216275453568, "learning_rate": 1.532579495341813e-05, "loss": 0.3221, "step": 12231 }, { "epoch": 1.9918983837479134, "grad_norm": 0.07509870082139969, "learning_rate": 1.5321427152469186e-05, "loss": 0.3389, "step": 12232 }, { "epoch": 1.9920612303057443, "grad_norm": 0.09124533087015152, "learning_rate": 1.531705969901001e-05, "loss": 0.3118, "step": 12233 }, { "epoch": 1.9922240768635753, "grad_norm": 0.11893901973962784, "learning_rate": 1.5312692593197388e-05, "loss": 0.3064, "step": 12234 }, { "epoch": 1.9923869234214062, "grad_norm": 0.07763297855854034, "learning_rate": 1.5308325835188136e-05, "loss": 0.3572, "step": 12235 }, { "epoch": 1.9925497699792372, "grad_norm": 0.11584720015525818, "learning_rate": 1.530395942513902e-05, "loss": 0.305, "step": 12236 }, { "epoch": 1.992712616537068, "grad_norm": 0.10052389651536942, "learning_rate": 1.5299593363206816e-05, "loss": 0.3182, "step": 12237 }, { "epoch": 1.9928754630948988, "grad_norm": 0.1621672809123993, "learning_rate": 1.5295227649548265e-05, "loss": 0.33, "step": 12238 }, { "epoch": 1.9930383096527298, "grad_norm": 0.10141056776046753, "learning_rate": 1.529086228432012e-05, "loss": 0.2963, "step": 12239 }, { "epoch": 1.9932011562105605, "grad_norm": 0.13031800091266632, "learning_rate": 1.5286497267679113e-05, "loss": 0.2804, "step": 12240 }, { "epoch": 1.9933640027683914, "grad_norm": 0.11853756010532379, "learning_rate": 1.5282132599781958e-05, "loss": 0.3109, "step": 12241 }, { "epoch": 1.9935268493262224, "grad_norm": 0.12653999030590057, "learning_rate": 1.5277768280785348e-05, "loss": 0.3186, "step": 12242 }, { "epoch": 1.9936896958840533, "grad_norm": 0.11308037489652634, "learning_rate": 1.5273404310845994e-05, "loss": 0.29, "step": 12243 }, { "epoch": 1.9938525424418843, "grad_norm": 0.07867887616157532, "learning_rate": 1.5269040690120567e-05, "loss": 0.3179, "step": 12244 }, { "epoch": 1.994015388999715, "grad_norm": 0.12381312251091003, "learning_rate": 1.526467741876572e-05, "loss": 0.325, "step": 12245 }, { "epoch": 1.994178235557546, "grad_norm": 0.09803437441587448, "learning_rate": 1.526031449693814e-05, "loss": 0.3186, "step": 12246 }, { "epoch": 1.9943410821153766, "grad_norm": 0.09309868514537811, "learning_rate": 1.5255951924794445e-05, "loss": 0.2865, "step": 12247 }, { "epoch": 1.9945039286732076, "grad_norm": 0.12331362068653107, "learning_rate": 1.525158970249127e-05, "loss": 0.3138, "step": 12248 }, { "epoch": 1.9946667752310385, "grad_norm": 0.1511479765176773, "learning_rate": 1.5247227830185224e-05, "loss": 0.3022, "step": 12249 }, { "epoch": 1.9948296217888695, "grad_norm": 0.10478431731462479, "learning_rate": 1.5242866308032926e-05, "loss": 0.3506, "step": 12250 }, { "epoch": 1.9949924683467004, "grad_norm": 0.10010448098182678, "learning_rate": 1.5238505136190963e-05, "loss": 0.3191, "step": 12251 }, { "epoch": 1.9951553149045314, "grad_norm": 0.091097392141819, "learning_rate": 1.5234144314815906e-05, "loss": 0.2765, "step": 12252 }, { "epoch": 1.995318161462362, "grad_norm": 0.08544652163982391, "learning_rate": 1.5229783844064327e-05, "loss": 0.2777, "step": 12253 }, { "epoch": 1.995481008020193, "grad_norm": 0.12918129563331604, "learning_rate": 1.5225423724092789e-05, "loss": 0.3092, "step": 12254 }, { "epoch": 1.9956438545780237, "grad_norm": 0.09691078215837479, "learning_rate": 1.5221063955057815e-05, "loss": 0.3144, "step": 12255 }, { "epoch": 1.9958067011358547, "grad_norm": 0.09881319105625153, "learning_rate": 1.5216704537115944e-05, "loss": 0.2901, "step": 12256 }, { "epoch": 1.9959695476936856, "grad_norm": 0.14434055984020233, "learning_rate": 1.5212345470423694e-05, "loss": 0.3025, "step": 12257 }, { "epoch": 1.9961323942515166, "grad_norm": 0.08912962675094604, "learning_rate": 1.5207986755137571e-05, "loss": 0.2959, "step": 12258 }, { "epoch": 1.9962952408093475, "grad_norm": 0.10673844814300537, "learning_rate": 1.520362839141406e-05, "loss": 0.2959, "step": 12259 }, { "epoch": 1.9964580873671782, "grad_norm": 0.1261788308620453, "learning_rate": 1.519927037940963e-05, "loss": 0.3489, "step": 12260 }, { "epoch": 1.9966209339250092, "grad_norm": 0.09855952113866806, "learning_rate": 1.5194912719280769e-05, "loss": 0.3041, "step": 12261 }, { "epoch": 1.9967837804828399, "grad_norm": 0.08792373538017273, "learning_rate": 1.5190555411183916e-05, "loss": 0.3199, "step": 12262 }, { "epoch": 1.9969466270406708, "grad_norm": 0.0830516517162323, "learning_rate": 1.5186198455275503e-05, "loss": 0.2953, "step": 12263 }, { "epoch": 1.9971094735985018, "grad_norm": 0.0938442051410675, "learning_rate": 1.518184185171198e-05, "loss": 0.3011, "step": 12264 }, { "epoch": 1.9972723201563327, "grad_norm": 0.08229301124811172, "learning_rate": 1.5177485600649752e-05, "loss": 0.2876, "step": 12265 }, { "epoch": 1.9974351667141637, "grad_norm": 0.08288964629173279, "learning_rate": 1.5173129702245218e-05, "loss": 0.3137, "step": 12266 }, { "epoch": 1.9975980132719946, "grad_norm": 0.09109948575496674, "learning_rate": 1.5168774156654758e-05, "loss": 0.3371, "step": 12267 }, { "epoch": 1.9977608598298253, "grad_norm": 0.15432822704315186, "learning_rate": 1.5164418964034776e-05, "loss": 0.2979, "step": 12268 }, { "epoch": 1.9979237063876563, "grad_norm": 0.13962259888648987, "learning_rate": 1.5160064124541618e-05, "loss": 0.3084, "step": 12269 }, { "epoch": 1.998086552945487, "grad_norm": 0.11741236597299576, "learning_rate": 1.5155709638331638e-05, "loss": 0.327, "step": 12270 }, { "epoch": 1.998249399503318, "grad_norm": 0.08453007787466049, "learning_rate": 1.5151355505561171e-05, "loss": 0.2682, "step": 12271 }, { "epoch": 1.9984122460611489, "grad_norm": 0.1184144914150238, "learning_rate": 1.5147001726386553e-05, "loss": 0.3196, "step": 12272 }, { "epoch": 1.9985750926189798, "grad_norm": 0.09066279977560043, "learning_rate": 1.514264830096409e-05, "loss": 0.2932, "step": 12273 }, { "epoch": 1.9987379391768108, "grad_norm": 0.14341074228286743, "learning_rate": 1.5138295229450089e-05, "loss": 0.3109, "step": 12274 }, { "epoch": 1.9989007857346417, "grad_norm": 0.12202441692352295, "learning_rate": 1.513394251200084e-05, "loss": 0.2837, "step": 12275 }, { "epoch": 1.9990636322924724, "grad_norm": 0.07946627587080002, "learning_rate": 1.5129590148772607e-05, "loss": 0.336, "step": 12276 }, { "epoch": 1.9992264788503034, "grad_norm": 0.08848277479410172, "learning_rate": 1.5125238139921665e-05, "loss": 0.3113, "step": 12277 }, { "epoch": 1.999389325408134, "grad_norm": 0.09832104295492172, "learning_rate": 1.5120886485604252e-05, "loss": 0.2869, "step": 12278 }, { "epoch": 1.999552171965965, "grad_norm": 0.17022216320037842, "learning_rate": 1.5116535185976621e-05, "loss": 0.3355, "step": 12279 }, { "epoch": 1.999715018523796, "grad_norm": 0.08494146913290024, "learning_rate": 1.5112184241194994e-05, "loss": 0.3069, "step": 12280 }, { "epoch": 1.999877865081627, "grad_norm": 0.12909521162509918, "learning_rate": 1.510783365141557e-05, "loss": 0.3101, "step": 12281 }, { "epoch": 2.0, "grad_norm": 0.13654659688472748, "learning_rate": 1.5103483416794548e-05, "loss": 0.317, "step": 12282 }, { "epoch": 2.000162846557831, "grad_norm": 0.15622319281101227, "learning_rate": 1.5099133537488132e-05, "loss": 0.271, "step": 12283 }, { "epoch": 2.000325693115662, "grad_norm": 0.15885408222675323, "learning_rate": 1.5094784013652485e-05, "loss": 0.282, "step": 12284 }, { "epoch": 2.000488539673493, "grad_norm": 0.13808971643447876, "learning_rate": 1.5090434845443763e-05, "loss": 0.3006, "step": 12285 }, { "epoch": 2.0006513862313233, "grad_norm": 0.1553596407175064, "learning_rate": 1.5086086033018126e-05, "loss": 0.2557, "step": 12286 }, { "epoch": 2.0008142327891543, "grad_norm": 0.1628822386264801, "learning_rate": 1.5081737576531701e-05, "loss": 0.3164, "step": 12287 }, { "epoch": 2.000977079346985, "grad_norm": 0.14909252524375916, "learning_rate": 1.5077389476140619e-05, "loss": 0.3236, "step": 12288 }, { "epoch": 2.001139925904816, "grad_norm": 0.14888536930084229, "learning_rate": 1.507304173200097e-05, "loss": 0.2458, "step": 12289 }, { "epoch": 2.001302772462647, "grad_norm": 0.16965024173259735, "learning_rate": 1.5068694344268875e-05, "loss": 0.2731, "step": 12290 }, { "epoch": 2.001465619020478, "grad_norm": 0.156681090593338, "learning_rate": 1.5064347313100407e-05, "loss": 0.2967, "step": 12291 }, { "epoch": 2.001628465578309, "grad_norm": 0.13829690217971802, "learning_rate": 1.506000063865164e-05, "loss": 0.2687, "step": 12292 }, { "epoch": 2.00179131213614, "grad_norm": 0.1251167505979538, "learning_rate": 1.5055654321078621e-05, "loss": 0.3017, "step": 12293 }, { "epoch": 2.0019541586939704, "grad_norm": 0.1401359736919403, "learning_rate": 1.5051308360537414e-05, "loss": 0.276, "step": 12294 }, { "epoch": 2.0021170052518014, "grad_norm": 0.17851537466049194, "learning_rate": 1.5046962757184047e-05, "loss": 0.2782, "step": 12295 }, { "epoch": 2.0022798518096323, "grad_norm": 0.17677654325962067, "learning_rate": 1.5042617511174528e-05, "loss": 0.303, "step": 12296 }, { "epoch": 2.0024426983674632, "grad_norm": 0.158720463514328, "learning_rate": 1.5038272622664878e-05, "loss": 0.286, "step": 12297 }, { "epoch": 2.002605544925294, "grad_norm": 0.17386330664157867, "learning_rate": 1.5033928091811089e-05, "loss": 0.33, "step": 12298 }, { "epoch": 2.002768391483125, "grad_norm": 0.16583868861198425, "learning_rate": 1.5029583918769144e-05, "loss": 0.2814, "step": 12299 }, { "epoch": 2.002931238040956, "grad_norm": 0.14200657606124878, "learning_rate": 1.5025240103694996e-05, "loss": 0.275, "step": 12300 }, { "epoch": 2.0030940845987866, "grad_norm": 0.1958594024181366, "learning_rate": 1.5020896646744625e-05, "loss": 0.2485, "step": 12301 }, { "epoch": 2.0032569311566175, "grad_norm": 0.16047456860542297, "learning_rate": 1.501655354807396e-05, "loss": 0.2593, "step": 12302 }, { "epoch": 2.0034197777144485, "grad_norm": 0.15931637585163116, "learning_rate": 1.5012210807838936e-05, "loss": 0.2597, "step": 12303 }, { "epoch": 2.0035826242722794, "grad_norm": 0.12941361963748932, "learning_rate": 1.5007868426195459e-05, "loss": 0.2639, "step": 12304 }, { "epoch": 2.0037454708301103, "grad_norm": 0.2261953055858612, "learning_rate": 1.5003526403299451e-05, "loss": 0.3164, "step": 12305 }, { "epoch": 2.0039083173879413, "grad_norm": 0.16594256460666656, "learning_rate": 1.4999184739306795e-05, "loss": 0.304, "step": 12306 }, { "epoch": 2.0040711639457722, "grad_norm": 0.20841635763645172, "learning_rate": 1.4994843434373363e-05, "loss": 0.3265, "step": 12307 }, { "epoch": 2.004234010503603, "grad_norm": 0.2065277248620987, "learning_rate": 1.4990502488655034e-05, "loss": 0.3039, "step": 12308 }, { "epoch": 2.0043968570614337, "grad_norm": 0.17300476133823395, "learning_rate": 1.4986161902307655e-05, "loss": 0.2669, "step": 12309 }, { "epoch": 2.0045597036192646, "grad_norm": 0.19103549420833588, "learning_rate": 1.4981821675487065e-05, "loss": 0.2809, "step": 12310 }, { "epoch": 2.0047225501770956, "grad_norm": 0.20994919538497925, "learning_rate": 1.497748180834909e-05, "loss": 0.2813, "step": 12311 }, { "epoch": 2.0048853967349265, "grad_norm": 0.13832902908325195, "learning_rate": 1.497314230104955e-05, "loss": 0.2478, "step": 12312 }, { "epoch": 2.0050482432927574, "grad_norm": 0.13109096884727478, "learning_rate": 1.4968803153744237e-05, "loss": 0.2601, "step": 12313 }, { "epoch": 2.0052110898505884, "grad_norm": 0.45853373408317566, "learning_rate": 1.4964464366588948e-05, "loss": 0.3408, "step": 12314 }, { "epoch": 2.0053739364084193, "grad_norm": 0.17036780714988708, "learning_rate": 1.4960125939739456e-05, "loss": 0.2967, "step": 12315 }, { "epoch": 2.0055367829662503, "grad_norm": 0.13879670202732086, "learning_rate": 1.4955787873351526e-05, "loss": 0.2774, "step": 12316 }, { "epoch": 2.0056996295240808, "grad_norm": 0.20291033387184143, "learning_rate": 1.4951450167580905e-05, "loss": 0.3028, "step": 12317 }, { "epoch": 2.0058624760819117, "grad_norm": 0.16878642141819, "learning_rate": 1.494711282258332e-05, "loss": 0.2984, "step": 12318 }, { "epoch": 2.0060253226397426, "grad_norm": 0.17533473670482635, "learning_rate": 1.4942775838514512e-05, "loss": 0.2776, "step": 12319 }, { "epoch": 2.0061881691975736, "grad_norm": 0.13532692193984985, "learning_rate": 1.4938439215530187e-05, "loss": 0.2829, "step": 12320 }, { "epoch": 2.0063510157554045, "grad_norm": 0.15875953435897827, "learning_rate": 1.4934102953786036e-05, "loss": 0.2442, "step": 12321 }, { "epoch": 2.0065138623132355, "grad_norm": 0.22102293372154236, "learning_rate": 1.4929767053437737e-05, "loss": 0.3218, "step": 12322 }, { "epoch": 2.0066767088710664, "grad_norm": 0.1277981698513031, "learning_rate": 1.4925431514640985e-05, "loss": 0.2765, "step": 12323 }, { "epoch": 2.006839555428897, "grad_norm": 0.19796399772167206, "learning_rate": 1.4921096337551424e-05, "loss": 0.2731, "step": 12324 }, { "epoch": 2.007002401986728, "grad_norm": 0.18070994317531586, "learning_rate": 1.491676152232469e-05, "loss": 0.2582, "step": 12325 }, { "epoch": 2.007165248544559, "grad_norm": 0.13809502124786377, "learning_rate": 1.4912427069116442e-05, "loss": 0.2875, "step": 12326 }, { "epoch": 2.0073280951023897, "grad_norm": 0.1374608278274536, "learning_rate": 1.4908092978082284e-05, "loss": 0.3289, "step": 12327 }, { "epoch": 2.0074909416602207, "grad_norm": 0.13711796700954437, "learning_rate": 1.4903759249377822e-05, "loss": 0.2899, "step": 12328 }, { "epoch": 2.0076537882180516, "grad_norm": 0.18665733933448792, "learning_rate": 1.4899425883158642e-05, "loss": 0.2563, "step": 12329 }, { "epoch": 2.0078166347758826, "grad_norm": 0.16471350193023682, "learning_rate": 1.4895092879580346e-05, "loss": 0.2936, "step": 12330 }, { "epoch": 2.0079794813337135, "grad_norm": 0.1704246997833252, "learning_rate": 1.4890760238798485e-05, "loss": 0.281, "step": 12331 }, { "epoch": 2.008142327891544, "grad_norm": 0.19616016745567322, "learning_rate": 1.4886427960968627e-05, "loss": 0.3258, "step": 12332 }, { "epoch": 2.008305174449375, "grad_norm": 0.14947089552879333, "learning_rate": 1.48820960462463e-05, "loss": 0.2506, "step": 12333 }, { "epoch": 2.008468021007206, "grad_norm": 0.20111557841300964, "learning_rate": 1.4877764494787039e-05, "loss": 0.2908, "step": 12334 }, { "epoch": 2.008630867565037, "grad_norm": 0.18158553540706635, "learning_rate": 1.4873433306746366e-05, "loss": 0.3137, "step": 12335 }, { "epoch": 2.008793714122868, "grad_norm": 0.1483672708272934, "learning_rate": 1.4869102482279767e-05, "loss": 0.3029, "step": 12336 }, { "epoch": 2.0089565606806987, "grad_norm": 0.15616796910762787, "learning_rate": 1.4864772021542744e-05, "loss": 0.2624, "step": 12337 }, { "epoch": 2.0091194072385297, "grad_norm": 0.11960410326719284, "learning_rate": 1.486044192469078e-05, "loss": 0.2857, "step": 12338 }, { "epoch": 2.00928225379636, "grad_norm": 0.1540840119123459, "learning_rate": 1.4856112191879328e-05, "loss": 0.2533, "step": 12339 }, { "epoch": 2.009445100354191, "grad_norm": 0.18712949752807617, "learning_rate": 1.485178282326383e-05, "loss": 0.2782, "step": 12340 }, { "epoch": 2.009607946912022, "grad_norm": 0.14731596410274506, "learning_rate": 1.4847453818999743e-05, "loss": 0.2779, "step": 12341 }, { "epoch": 2.009770793469853, "grad_norm": 0.14480875432491302, "learning_rate": 1.4843125179242484e-05, "loss": 0.2883, "step": 12342 }, { "epoch": 2.009933640027684, "grad_norm": 0.18131603300571442, "learning_rate": 1.4838796904147456e-05, "loss": 0.2804, "step": 12343 }, { "epoch": 2.010096486585515, "grad_norm": 0.15527436137199402, "learning_rate": 1.483446899387006e-05, "loss": 0.2749, "step": 12344 }, { "epoch": 2.010259333143346, "grad_norm": 0.15956498682498932, "learning_rate": 1.483014144856569e-05, "loss": 0.2465, "step": 12345 }, { "epoch": 2.0104221797011768, "grad_norm": 0.17702674865722656, "learning_rate": 1.4825814268389712e-05, "loss": 0.29, "step": 12346 }, { "epoch": 2.0105850262590073, "grad_norm": 0.16645626723766327, "learning_rate": 1.4821487453497474e-05, "loss": 0.2673, "step": 12347 }, { "epoch": 2.010747872816838, "grad_norm": 0.14213106036186218, "learning_rate": 1.4817161004044344e-05, "loss": 0.2563, "step": 12348 }, { "epoch": 2.010910719374669, "grad_norm": 0.18965739011764526, "learning_rate": 1.4812834920185643e-05, "loss": 0.2812, "step": 12349 }, { "epoch": 2.0110735659325, "grad_norm": 0.2255106270313263, "learning_rate": 1.4808509202076681e-05, "loss": 0.2585, "step": 12350 }, { "epoch": 2.011236412490331, "grad_norm": 0.14364276826381683, "learning_rate": 1.4804183849872777e-05, "loss": 0.3243, "step": 12351 }, { "epoch": 2.011399259048162, "grad_norm": 0.15395955741405487, "learning_rate": 1.4799858863729224e-05, "loss": 0.3019, "step": 12352 }, { "epoch": 2.011562105605993, "grad_norm": 0.15644432604312897, "learning_rate": 1.4795534243801292e-05, "loss": 0.3027, "step": 12353 }, { "epoch": 2.011724952163824, "grad_norm": 0.144598126411438, "learning_rate": 1.479120999024426e-05, "loss": 0.2672, "step": 12354 }, { "epoch": 2.0118877987216544, "grad_norm": 0.172099307179451, "learning_rate": 1.4786886103213366e-05, "loss": 0.2743, "step": 12355 }, { "epoch": 2.0120506452794853, "grad_norm": 0.15669766068458557, "learning_rate": 1.4782562582863872e-05, "loss": 0.2646, "step": 12356 }, { "epoch": 2.0122134918373162, "grad_norm": 0.17817428708076477, "learning_rate": 1.4778239429350991e-05, "loss": 0.2641, "step": 12357 }, { "epoch": 2.012376338395147, "grad_norm": 0.1892799735069275, "learning_rate": 1.4773916642829929e-05, "loss": 0.2741, "step": 12358 }, { "epoch": 2.012539184952978, "grad_norm": 0.21685989201068878, "learning_rate": 1.476959422345591e-05, "loss": 0.2591, "step": 12359 }, { "epoch": 2.012702031510809, "grad_norm": 0.18785974383354187, "learning_rate": 1.4765272171384109e-05, "loss": 0.2804, "step": 12360 }, { "epoch": 2.01286487806864, "grad_norm": 0.15200690925121307, "learning_rate": 1.4760950486769697e-05, "loss": 0.2728, "step": 12361 }, { "epoch": 2.0130277246264705, "grad_norm": 0.12315016984939575, "learning_rate": 1.4756629169767835e-05, "loss": 0.2618, "step": 12362 }, { "epoch": 2.0131905711843014, "grad_norm": 0.16006501019001007, "learning_rate": 1.4752308220533684e-05, "loss": 0.267, "step": 12363 }, { "epoch": 2.0133534177421324, "grad_norm": 0.1727599948644638, "learning_rate": 1.4747987639222371e-05, "loss": 0.2414, "step": 12364 }, { "epoch": 2.0135162642999633, "grad_norm": 0.1696733683347702, "learning_rate": 1.4743667425989007e-05, "loss": 0.3388, "step": 12365 }, { "epoch": 2.0136791108577943, "grad_norm": 0.14978241920471191, "learning_rate": 1.4739347580988721e-05, "loss": 0.2579, "step": 12366 }, { "epoch": 2.013841957415625, "grad_norm": 0.2167307585477829, "learning_rate": 1.4735028104376599e-05, "loss": 0.3186, "step": 12367 }, { "epoch": 2.014004803973456, "grad_norm": 0.1611703336238861, "learning_rate": 1.4730708996307723e-05, "loss": 0.2768, "step": 12368 }, { "epoch": 2.014167650531287, "grad_norm": 0.1629185825586319, "learning_rate": 1.4726390256937151e-05, "loss": 0.2973, "step": 12369 }, { "epoch": 2.0143304970891176, "grad_norm": 0.15665164589881897, "learning_rate": 1.472207188641996e-05, "loss": 0.2984, "step": 12370 }, { "epoch": 2.0144933436469485, "grad_norm": 0.18955372273921967, "learning_rate": 1.4717753884911178e-05, "loss": 0.3141, "step": 12371 }, { "epoch": 2.0146561902047795, "grad_norm": 0.14086098968982697, "learning_rate": 1.471343625256584e-05, "loss": 0.2866, "step": 12372 }, { "epoch": 2.0148190367626104, "grad_norm": 0.1636047661304474, "learning_rate": 1.4709118989538957e-05, "loss": 0.2998, "step": 12373 }, { "epoch": 2.0149818833204414, "grad_norm": 0.1423758566379547, "learning_rate": 1.4704802095985534e-05, "loss": 0.3356, "step": 12374 }, { "epoch": 2.0151447298782723, "grad_norm": 0.16782040894031525, "learning_rate": 1.4700485572060565e-05, "loss": 0.3033, "step": 12375 }, { "epoch": 2.0153075764361033, "grad_norm": 0.16278184950351715, "learning_rate": 1.4696169417919014e-05, "loss": 0.2453, "step": 12376 }, { "epoch": 2.0154704229939338, "grad_norm": 0.16505666077136993, "learning_rate": 1.4691853633715864e-05, "loss": 0.2755, "step": 12377 }, { "epoch": 2.0156332695517647, "grad_norm": 0.12632282078266144, "learning_rate": 1.4687538219606045e-05, "loss": 0.2573, "step": 12378 }, { "epoch": 2.0157961161095956, "grad_norm": 0.1429663598537445, "learning_rate": 1.4683223175744507e-05, "loss": 0.2767, "step": 12379 }, { "epoch": 2.0159589626674266, "grad_norm": 0.1815613955259323, "learning_rate": 1.4678908502286155e-05, "loss": 0.3263, "step": 12380 }, { "epoch": 2.0161218092252575, "grad_norm": 0.1498880684375763, "learning_rate": 1.4674594199385922e-05, "loss": 0.2674, "step": 12381 }, { "epoch": 2.0162846557830885, "grad_norm": 0.16322264075279236, "learning_rate": 1.467028026719869e-05, "loss": 0.2795, "step": 12382 }, { "epoch": 2.0164475023409194, "grad_norm": 0.15709233283996582, "learning_rate": 1.4665966705879347e-05, "loss": 0.2864, "step": 12383 }, { "epoch": 2.0166103488987503, "grad_norm": 0.18612752854824066, "learning_rate": 1.4661653515582752e-05, "loss": 0.2958, "step": 12384 }, { "epoch": 2.016773195456581, "grad_norm": 0.15575580298900604, "learning_rate": 1.4657340696463778e-05, "loss": 0.2991, "step": 12385 }, { "epoch": 2.016936042014412, "grad_norm": 0.15034930408000946, "learning_rate": 1.4653028248677264e-05, "loss": 0.3087, "step": 12386 }, { "epoch": 2.0170988885722427, "grad_norm": 0.19828684628009796, "learning_rate": 1.4648716172378025e-05, "loss": 0.3206, "step": 12387 }, { "epoch": 2.0172617351300737, "grad_norm": 0.1655343919992447, "learning_rate": 1.4644404467720896e-05, "loss": 0.2755, "step": 12388 }, { "epoch": 2.0174245816879046, "grad_norm": 0.15512476861476898, "learning_rate": 1.4640093134860672e-05, "loss": 0.3033, "step": 12389 }, { "epoch": 2.0175874282457356, "grad_norm": 0.17748290300369263, "learning_rate": 1.4635782173952141e-05, "loss": 0.3103, "step": 12390 }, { "epoch": 2.0177502748035665, "grad_norm": 0.1658630222082138, "learning_rate": 1.4631471585150083e-05, "loss": 0.2593, "step": 12391 }, { "epoch": 2.0179131213613974, "grad_norm": 0.16572162508964539, "learning_rate": 1.462716136860926e-05, "loss": 0.2671, "step": 12392 }, { "epoch": 2.018075967919228, "grad_norm": 0.16484183073043823, "learning_rate": 1.4622851524484427e-05, "loss": 0.2733, "step": 12393 }, { "epoch": 2.018238814477059, "grad_norm": 0.14728781580924988, "learning_rate": 1.4618542052930312e-05, "loss": 0.2787, "step": 12394 }, { "epoch": 2.01840166103489, "grad_norm": 0.19275228679180145, "learning_rate": 1.4614232954101631e-05, "loss": 0.2779, "step": 12395 }, { "epoch": 2.0185645075927208, "grad_norm": 0.1870069056749344, "learning_rate": 1.4609924228153116e-05, "loss": 0.2582, "step": 12396 }, { "epoch": 2.0187273541505517, "grad_norm": 0.21710260212421417, "learning_rate": 1.460561587523945e-05, "loss": 0.3158, "step": 12397 }, { "epoch": 2.0188902007083827, "grad_norm": 0.16095612943172455, "learning_rate": 1.4601307895515307e-05, "loss": 0.279, "step": 12398 }, { "epoch": 2.0190530472662136, "grad_norm": 0.2065102905035019, "learning_rate": 1.4597000289135377e-05, "loss": 0.3198, "step": 12399 }, { "epoch": 2.019215893824044, "grad_norm": 0.1458464413881302, "learning_rate": 1.4592693056254298e-05, "loss": 0.2876, "step": 12400 }, { "epoch": 2.019378740381875, "grad_norm": 0.16326120495796204, "learning_rate": 1.458838619702672e-05, "loss": 0.2562, "step": 12401 }, { "epoch": 2.019541586939706, "grad_norm": 0.17500725388526917, "learning_rate": 1.4584079711607279e-05, "loss": 0.2679, "step": 12402 }, { "epoch": 2.019704433497537, "grad_norm": 0.1255984902381897, "learning_rate": 1.457977360015057e-05, "loss": 0.2464, "step": 12403 }, { "epoch": 2.019867280055368, "grad_norm": 0.24061480164527893, "learning_rate": 1.4575467862811217e-05, "loss": 0.3031, "step": 12404 }, { "epoch": 2.020030126613199, "grad_norm": 0.11558794230222702, "learning_rate": 1.4571162499743804e-05, "loss": 0.2814, "step": 12405 }, { "epoch": 2.0201929731710297, "grad_norm": 0.16878461837768555, "learning_rate": 1.456685751110289e-05, "loss": 0.2549, "step": 12406 }, { "epoch": 2.0203558197288607, "grad_norm": 0.1369466930627823, "learning_rate": 1.4562552897043064e-05, "loss": 0.2962, "step": 12407 }, { "epoch": 2.020518666286691, "grad_norm": 0.17995962500572205, "learning_rate": 1.4558248657718854e-05, "loss": 0.2752, "step": 12408 }, { "epoch": 2.020681512844522, "grad_norm": 0.15951353311538696, "learning_rate": 1.4553944793284797e-05, "loss": 0.295, "step": 12409 }, { "epoch": 2.020844359402353, "grad_norm": 0.17076018452644348, "learning_rate": 1.4549641303895429e-05, "loss": 0.2477, "step": 12410 }, { "epoch": 2.021007205960184, "grad_norm": 0.18261195719242096, "learning_rate": 1.4545338189705243e-05, "loss": 0.2581, "step": 12411 }, { "epoch": 2.021170052518015, "grad_norm": 0.17448784410953522, "learning_rate": 1.4541035450868746e-05, "loss": 0.2718, "step": 12412 }, { "epoch": 2.021332899075846, "grad_norm": 0.146986722946167, "learning_rate": 1.4536733087540397e-05, "loss": 0.2581, "step": 12413 }, { "epoch": 2.021495745633677, "grad_norm": 0.16440081596374512, "learning_rate": 1.4532431099874688e-05, "loss": 0.2737, "step": 12414 }, { "epoch": 2.0216585921915073, "grad_norm": 0.13820815086364746, "learning_rate": 1.4528129488026068e-05, "loss": 0.2625, "step": 12415 }, { "epoch": 2.0218214387493383, "grad_norm": 0.1561974734067917, "learning_rate": 1.4523828252148964e-05, "loss": 0.257, "step": 12416 }, { "epoch": 2.0219842853071692, "grad_norm": 0.17415930330753326, "learning_rate": 1.4519527392397819e-05, "loss": 0.2742, "step": 12417 }, { "epoch": 2.022147131865, "grad_norm": 0.21489214897155762, "learning_rate": 1.4515226908927046e-05, "loss": 0.3094, "step": 12418 }, { "epoch": 2.022309978422831, "grad_norm": 0.202470600605011, "learning_rate": 1.451092680189104e-05, "loss": 0.2969, "step": 12419 }, { "epoch": 2.022472824980662, "grad_norm": 0.14101074635982513, "learning_rate": 1.4506627071444173e-05, "loss": 0.2743, "step": 12420 }, { "epoch": 2.022635671538493, "grad_norm": 0.1673903614282608, "learning_rate": 1.450232771774085e-05, "loss": 0.2492, "step": 12421 }, { "epoch": 2.022798518096324, "grad_norm": 0.1810591220855713, "learning_rate": 1.4498028740935407e-05, "loss": 0.3062, "step": 12422 }, { "epoch": 2.0229613646541544, "grad_norm": 0.1735767424106598, "learning_rate": 1.4493730141182198e-05, "loss": 0.273, "step": 12423 }, { "epoch": 2.0231242112119854, "grad_norm": 0.2036239504814148, "learning_rate": 1.4489431918635548e-05, "loss": 0.3074, "step": 12424 }, { "epoch": 2.0232870577698163, "grad_norm": 0.18380993604660034, "learning_rate": 1.448513407344979e-05, "loss": 0.2572, "step": 12425 }, { "epoch": 2.0234499043276473, "grad_norm": 0.17078371345996857, "learning_rate": 1.4480836605779225e-05, "loss": 0.2768, "step": 12426 }, { "epoch": 2.023612750885478, "grad_norm": 0.15895862877368927, "learning_rate": 1.447653951577813e-05, "loss": 0.2896, "step": 12427 }, { "epoch": 2.023775597443309, "grad_norm": 0.14685434103012085, "learning_rate": 1.4472242803600804e-05, "loss": 0.2863, "step": 12428 }, { "epoch": 2.02393844400114, "grad_norm": 0.14269015192985535, "learning_rate": 1.4467946469401505e-05, "loss": 0.3078, "step": 12429 }, { "epoch": 2.0241012905589706, "grad_norm": 0.19454897940158844, "learning_rate": 1.4463650513334484e-05, "loss": 0.2865, "step": 12430 }, { "epoch": 2.0242641371168015, "grad_norm": 0.15721668303012848, "learning_rate": 1.4459354935553965e-05, "loss": 0.264, "step": 12431 }, { "epoch": 2.0244269836746325, "grad_norm": 0.1436469405889511, "learning_rate": 1.4455059736214196e-05, "loss": 0.2471, "step": 12432 }, { "epoch": 2.0245898302324634, "grad_norm": 0.13664066791534424, "learning_rate": 1.4450764915469378e-05, "loss": 0.2452, "step": 12433 }, { "epoch": 2.0247526767902944, "grad_norm": 0.15009960532188416, "learning_rate": 1.44464704734737e-05, "loss": 0.3105, "step": 12434 }, { "epoch": 2.0249155233481253, "grad_norm": 0.19201616942882538, "learning_rate": 1.4442176410381347e-05, "loss": 0.2417, "step": 12435 }, { "epoch": 2.0250783699059562, "grad_norm": 0.2015380710363388, "learning_rate": 1.4437882726346499e-05, "loss": 0.3084, "step": 12436 }, { "epoch": 2.025241216463787, "grad_norm": 0.19070596992969513, "learning_rate": 1.4433589421523297e-05, "loss": 0.308, "step": 12437 }, { "epoch": 2.0254040630216177, "grad_norm": 0.16581065952777863, "learning_rate": 1.4429296496065903e-05, "loss": 0.251, "step": 12438 }, { "epoch": 2.0255669095794486, "grad_norm": 0.18028220534324646, "learning_rate": 1.4425003950128436e-05, "loss": 0.2877, "step": 12439 }, { "epoch": 2.0257297561372796, "grad_norm": 0.16180746257305145, "learning_rate": 1.4420711783865002e-05, "loss": 0.282, "step": 12440 }, { "epoch": 2.0258926026951105, "grad_norm": 0.1590324193239212, "learning_rate": 1.441641999742972e-05, "loss": 0.2662, "step": 12441 }, { "epoch": 2.0260554492529415, "grad_norm": 0.1997813582420349, "learning_rate": 1.441212859097666e-05, "loss": 0.2669, "step": 12442 }, { "epoch": 2.0262182958107724, "grad_norm": 0.22775334119796753, "learning_rate": 1.4407837564659912e-05, "loss": 0.3264, "step": 12443 }, { "epoch": 2.0263811423686033, "grad_norm": 0.15926751494407654, "learning_rate": 1.4403546918633531e-05, "loss": 0.2775, "step": 12444 }, { "epoch": 2.0265439889264343, "grad_norm": 0.20526500046253204, "learning_rate": 1.4399256653051568e-05, "loss": 0.3022, "step": 12445 }, { "epoch": 2.026706835484265, "grad_norm": 0.14775212109088898, "learning_rate": 1.439496676806804e-05, "loss": 0.2557, "step": 12446 }, { "epoch": 2.0268696820420957, "grad_norm": 0.1686701774597168, "learning_rate": 1.4390677263836988e-05, "loss": 0.2764, "step": 12447 }, { "epoch": 2.0270325285999267, "grad_norm": 0.16326701641082764, "learning_rate": 1.4386388140512407e-05, "loss": 0.2824, "step": 12448 }, { "epoch": 2.0271953751577576, "grad_norm": 0.2085215300321579, "learning_rate": 1.4382099398248283e-05, "loss": 0.2789, "step": 12449 }, { "epoch": 2.0273582217155885, "grad_norm": 0.24774184823036194, "learning_rate": 1.4377811037198612e-05, "loss": 0.3163, "step": 12450 }, { "epoch": 2.0275210682734195, "grad_norm": 0.15783528983592987, "learning_rate": 1.4373523057517352e-05, "loss": 0.2517, "step": 12451 }, { "epoch": 2.0276839148312504, "grad_norm": 0.15695515275001526, "learning_rate": 1.436923545935845e-05, "loss": 0.2457, "step": 12452 }, { "epoch": 2.027846761389081, "grad_norm": 0.14446255564689636, "learning_rate": 1.4364948242875834e-05, "loss": 0.253, "step": 12453 }, { "epoch": 2.028009607946912, "grad_norm": 0.19329677522182465, "learning_rate": 1.4360661408223453e-05, "loss": 0.3421, "step": 12454 }, { "epoch": 2.028172454504743, "grad_norm": 0.22873814404010773, "learning_rate": 1.4356374955555199e-05, "loss": 0.2755, "step": 12455 }, { "epoch": 2.0283353010625738, "grad_norm": 0.18053030967712402, "learning_rate": 1.4352088885024979e-05, "loss": 0.2687, "step": 12456 }, { "epoch": 2.0284981476204047, "grad_norm": 0.1533372402191162, "learning_rate": 1.4347803196786655e-05, "loss": 0.3114, "step": 12457 }, { "epoch": 2.0286609941782356, "grad_norm": 0.1802690029144287, "learning_rate": 1.4343517890994124e-05, "loss": 0.2754, "step": 12458 }, { "epoch": 2.0288238407360666, "grad_norm": 0.1953757256269455, "learning_rate": 1.4339232967801225e-05, "loss": 0.2786, "step": 12459 }, { "epoch": 2.0289866872938975, "grad_norm": 0.1300404965877533, "learning_rate": 1.4334948427361797e-05, "loss": 0.2958, "step": 12460 }, { "epoch": 2.029149533851728, "grad_norm": 0.1499878615140915, "learning_rate": 1.433066426982968e-05, "loss": 0.2478, "step": 12461 }, { "epoch": 2.029312380409559, "grad_norm": 0.17407789826393127, "learning_rate": 1.4326380495358687e-05, "loss": 0.32, "step": 12462 }, { "epoch": 2.02947522696739, "grad_norm": 0.17129020392894745, "learning_rate": 1.432209710410261e-05, "loss": 0.2835, "step": 12463 }, { "epoch": 2.029638073525221, "grad_norm": 0.1546015441417694, "learning_rate": 1.4317814096215228e-05, "loss": 0.2732, "step": 12464 }, { "epoch": 2.029800920083052, "grad_norm": 0.20004260540008545, "learning_rate": 1.431353147185034e-05, "loss": 0.2874, "step": 12465 }, { "epoch": 2.0299637666408827, "grad_norm": 0.1519824117422104, "learning_rate": 1.430924923116168e-05, "loss": 0.2762, "step": 12466 }, { "epoch": 2.0301266131987137, "grad_norm": 0.14682850241661072, "learning_rate": 1.4304967374303001e-05, "loss": 0.2937, "step": 12467 }, { "epoch": 2.030289459756544, "grad_norm": 0.15887953341007233, "learning_rate": 1.4300685901428044e-05, "loss": 0.3058, "step": 12468 }, { "epoch": 2.030452306314375, "grad_norm": 0.17294077575206757, "learning_rate": 1.4296404812690517e-05, "loss": 0.3, "step": 12469 }, { "epoch": 2.030615152872206, "grad_norm": 0.15504591166973114, "learning_rate": 1.4292124108244132e-05, "loss": 0.2848, "step": 12470 }, { "epoch": 2.030777999430037, "grad_norm": 0.24676479399204254, "learning_rate": 1.4287843788242555e-05, "loss": 0.3063, "step": 12471 }, { "epoch": 2.030940845987868, "grad_norm": 0.15674300491809845, "learning_rate": 1.4283563852839499e-05, "loss": 0.2762, "step": 12472 }, { "epoch": 2.031103692545699, "grad_norm": 0.18023419380187988, "learning_rate": 1.4279284302188601e-05, "loss": 0.2604, "step": 12473 }, { "epoch": 2.03126653910353, "grad_norm": 0.18236669898033142, "learning_rate": 1.427500513644351e-05, "loss": 0.2561, "step": 12474 }, { "epoch": 2.0314293856613608, "grad_norm": 0.21286918222904205, "learning_rate": 1.4270726355757876e-05, "loss": 0.3206, "step": 12475 }, { "epoch": 2.0315922322191913, "grad_norm": 0.1416264921426773, "learning_rate": 1.4266447960285311e-05, "loss": 0.2492, "step": 12476 }, { "epoch": 2.031755078777022, "grad_norm": 0.2316030114889145, "learning_rate": 1.4262169950179416e-05, "loss": 0.261, "step": 12477 }, { "epoch": 2.031917925334853, "grad_norm": 0.19702593982219696, "learning_rate": 1.42578923255938e-05, "loss": 0.2653, "step": 12478 }, { "epoch": 2.032080771892684, "grad_norm": 0.18525123596191406, "learning_rate": 1.4253615086682031e-05, "loss": 0.2709, "step": 12479 }, { "epoch": 2.032243618450515, "grad_norm": 0.17039383947849274, "learning_rate": 1.4249338233597673e-05, "loss": 0.2955, "step": 12480 }, { "epoch": 2.032406465008346, "grad_norm": 0.18656164407730103, "learning_rate": 1.4245061766494288e-05, "loss": 0.3155, "step": 12481 }, { "epoch": 2.032569311566177, "grad_norm": 0.2171458750963211, "learning_rate": 1.4240785685525399e-05, "loss": 0.2781, "step": 12482 }, { "epoch": 2.032732158124008, "grad_norm": 0.16458769142627716, "learning_rate": 1.4236509990844549e-05, "loss": 0.3267, "step": 12483 }, { "epoch": 2.0328950046818384, "grad_norm": 0.16539478302001953, "learning_rate": 1.4232234682605238e-05, "loss": 0.2885, "step": 12484 }, { "epoch": 2.0330578512396693, "grad_norm": 0.18288347125053406, "learning_rate": 1.4227959760960965e-05, "loss": 0.3045, "step": 12485 }, { "epoch": 2.0332206977975003, "grad_norm": 0.17566761374473572, "learning_rate": 1.42236852260652e-05, "loss": 0.2921, "step": 12486 }, { "epoch": 2.033383544355331, "grad_norm": 0.18913759291172028, "learning_rate": 1.4219411078071431e-05, "loss": 0.2877, "step": 12487 }, { "epoch": 2.033546390913162, "grad_norm": 0.14397019147872925, "learning_rate": 1.4215137317133103e-05, "loss": 0.277, "step": 12488 }, { "epoch": 2.033709237470993, "grad_norm": 0.17554330825805664, "learning_rate": 1.4210863943403652e-05, "loss": 0.2775, "step": 12489 }, { "epoch": 2.033872084028824, "grad_norm": 0.13642559945583344, "learning_rate": 1.4206590957036516e-05, "loss": 0.2842, "step": 12490 }, { "epoch": 2.0340349305866545, "grad_norm": 0.15823255479335785, "learning_rate": 1.4202318358185107e-05, "loss": 0.3012, "step": 12491 }, { "epoch": 2.0341977771444855, "grad_norm": 0.1480199545621872, "learning_rate": 1.4198046147002813e-05, "loss": 0.2361, "step": 12492 }, { "epoch": 2.0343606237023164, "grad_norm": 0.16821151971817017, "learning_rate": 1.4193774323643022e-05, "loss": 0.2882, "step": 12493 }, { "epoch": 2.0345234702601473, "grad_norm": 0.12388138473033905, "learning_rate": 1.4189502888259116e-05, "loss": 0.2906, "step": 12494 }, { "epoch": 2.0346863168179783, "grad_norm": 0.1799665093421936, "learning_rate": 1.4185231841004449e-05, "loss": 0.2871, "step": 12495 }, { "epoch": 2.0348491633758092, "grad_norm": 0.14483919739723206, "learning_rate": 1.4180961182032354e-05, "loss": 0.2752, "step": 12496 }, { "epoch": 2.03501200993364, "grad_norm": 0.18704377114772797, "learning_rate": 1.4176690911496163e-05, "loss": 0.2919, "step": 12497 }, { "epoch": 2.035174856491471, "grad_norm": 0.12130138278007507, "learning_rate": 1.4172421029549204e-05, "loss": 0.2707, "step": 12498 }, { "epoch": 2.0353377030493016, "grad_norm": 0.19509543478488922, "learning_rate": 1.4168151536344771e-05, "loss": 0.3166, "step": 12499 }, { "epoch": 2.0355005496071326, "grad_norm": 0.1662577986717224, "learning_rate": 1.416388243203614e-05, "loss": 0.2715, "step": 12500 }, { "epoch": 2.0356633961649635, "grad_norm": 0.1737576574087143, "learning_rate": 1.4159613716776605e-05, "loss": 0.2556, "step": 12501 }, { "epoch": 2.0358262427227944, "grad_norm": 0.14350765943527222, "learning_rate": 1.4155345390719418e-05, "loss": 0.2634, "step": 12502 }, { "epoch": 2.0359890892806254, "grad_norm": 0.12563158571720123, "learning_rate": 1.4151077454017821e-05, "loss": 0.2762, "step": 12503 }, { "epoch": 2.0361519358384563, "grad_norm": 0.1663142442703247, "learning_rate": 1.414680990682504e-05, "loss": 0.2925, "step": 12504 }, { "epoch": 2.0363147823962873, "grad_norm": 0.15001004934310913, "learning_rate": 1.4142542749294307e-05, "loss": 0.3018, "step": 12505 }, { "epoch": 2.0364776289541178, "grad_norm": 0.12068889290094376, "learning_rate": 1.4138275981578825e-05, "loss": 0.29, "step": 12506 }, { "epoch": 2.0366404755119487, "grad_norm": 0.16217325627803802, "learning_rate": 1.413400960383176e-05, "loss": 0.288, "step": 12507 }, { "epoch": 2.0368033220697797, "grad_norm": 0.24020874500274658, "learning_rate": 1.412974361620632e-05, "loss": 0.3461, "step": 12508 }, { "epoch": 2.0369661686276106, "grad_norm": 0.15743477642536163, "learning_rate": 1.4125478018855653e-05, "loss": 0.2429, "step": 12509 }, { "epoch": 2.0371290151854415, "grad_norm": 0.16660654544830322, "learning_rate": 1.4121212811932904e-05, "loss": 0.2818, "step": 12510 }, { "epoch": 2.0372918617432725, "grad_norm": 0.17545369267463684, "learning_rate": 1.41169479955912e-05, "loss": 0.285, "step": 12511 }, { "epoch": 2.0374547083011034, "grad_norm": 0.15442663431167603, "learning_rate": 1.4112683569983681e-05, "loss": 0.2741, "step": 12512 }, { "epoch": 2.0376175548589344, "grad_norm": 0.16206789016723633, "learning_rate": 1.4108419535263439e-05, "loss": 0.2842, "step": 12513 }, { "epoch": 2.037780401416765, "grad_norm": 0.15450097620487213, "learning_rate": 1.410415589158356e-05, "loss": 0.2409, "step": 12514 }, { "epoch": 2.037943247974596, "grad_norm": 0.1491178423166275, "learning_rate": 1.4099892639097135e-05, "loss": 0.24, "step": 12515 }, { "epoch": 2.0381060945324267, "grad_norm": 0.18095284700393677, "learning_rate": 1.4095629777957226e-05, "loss": 0.2716, "step": 12516 }, { "epoch": 2.0382689410902577, "grad_norm": 0.18105505406856537, "learning_rate": 1.4091367308316866e-05, "loss": 0.3063, "step": 12517 }, { "epoch": 2.0384317876480886, "grad_norm": 0.1428457349538803, "learning_rate": 1.4087105230329112e-05, "loss": 0.2856, "step": 12518 }, { "epoch": 2.0385946342059196, "grad_norm": 0.1716044694185257, "learning_rate": 1.4082843544146968e-05, "loss": 0.2727, "step": 12519 }, { "epoch": 2.0387574807637505, "grad_norm": 0.11779630184173584, "learning_rate": 1.4078582249923458e-05, "loss": 0.3037, "step": 12520 }, { "epoch": 2.0389203273215815, "grad_norm": 0.1321110874414444, "learning_rate": 1.4074321347811567e-05, "loss": 0.2656, "step": 12521 }, { "epoch": 2.039083173879412, "grad_norm": 0.16595827043056488, "learning_rate": 1.4070060837964267e-05, "loss": 0.3089, "step": 12522 }, { "epoch": 2.039246020437243, "grad_norm": 0.12708793580532074, "learning_rate": 1.4065800720534533e-05, "loss": 0.2755, "step": 12523 }, { "epoch": 2.039408866995074, "grad_norm": 0.14140111207962036, "learning_rate": 1.406154099567532e-05, "loss": 0.3024, "step": 12524 }, { "epoch": 2.039571713552905, "grad_norm": 0.15879301726818085, "learning_rate": 1.4057281663539554e-05, "loss": 0.2616, "step": 12525 }, { "epoch": 2.0397345601107357, "grad_norm": 0.19010591506958008, "learning_rate": 1.4053022724280151e-05, "loss": 0.2758, "step": 12526 }, { "epoch": 2.0398974066685667, "grad_norm": 0.1911044716835022, "learning_rate": 1.4048764178050044e-05, "loss": 0.2751, "step": 12527 }, { "epoch": 2.0400602532263976, "grad_norm": 0.17297348380088806, "learning_rate": 1.4044506025002111e-05, "loss": 0.2982, "step": 12528 }, { "epoch": 2.040223099784228, "grad_norm": 0.15201644599437714, "learning_rate": 1.4040248265289224e-05, "loss": 0.3198, "step": 12529 }, { "epoch": 2.040385946342059, "grad_norm": 0.16719506680965424, "learning_rate": 1.4035990899064272e-05, "loss": 0.2601, "step": 12530 }, { "epoch": 2.04054879289989, "grad_norm": 0.12843088805675507, "learning_rate": 1.4031733926480098e-05, "loss": 0.2336, "step": 12531 }, { "epoch": 2.040711639457721, "grad_norm": 0.1733647584915161, "learning_rate": 1.4027477347689535e-05, "loss": 0.2271, "step": 12532 }, { "epoch": 2.040874486015552, "grad_norm": 0.1477915197610855, "learning_rate": 1.4023221162845402e-05, "loss": 0.2745, "step": 12533 }, { "epoch": 2.041037332573383, "grad_norm": 0.12035148590803146, "learning_rate": 1.4018965372100523e-05, "loss": 0.3183, "step": 12534 }, { "epoch": 2.0412001791312138, "grad_norm": 0.1696641445159912, "learning_rate": 1.401470997560769e-05, "loss": 0.2793, "step": 12535 }, { "epoch": 2.0413630256890447, "grad_norm": 0.1173982322216034, "learning_rate": 1.4010454973519683e-05, "loss": 0.2671, "step": 12536 }, { "epoch": 2.041525872246875, "grad_norm": 0.1995304822921753, "learning_rate": 1.4006200365989253e-05, "loss": 0.2823, "step": 12537 }, { "epoch": 2.041688718804706, "grad_norm": 0.17116758227348328, "learning_rate": 1.4001946153169183e-05, "loss": 0.2897, "step": 12538 }, { "epoch": 2.041851565362537, "grad_norm": 0.15762458741664886, "learning_rate": 1.3997692335212196e-05, "loss": 0.3289, "step": 12539 }, { "epoch": 2.042014411920368, "grad_norm": 0.16226762533187866, "learning_rate": 1.3993438912271006e-05, "loss": 0.2724, "step": 12540 }, { "epoch": 2.042177258478199, "grad_norm": 0.17338962852954865, "learning_rate": 1.3989185884498347e-05, "loss": 0.2515, "step": 12541 }, { "epoch": 2.04234010503603, "grad_norm": 0.23676584661006927, "learning_rate": 1.3984933252046906e-05, "loss": 0.3217, "step": 12542 }, { "epoch": 2.042502951593861, "grad_norm": 0.20655295252799988, "learning_rate": 1.3980681015069358e-05, "loss": 0.3073, "step": 12543 }, { "epoch": 2.0426657981516914, "grad_norm": 0.13384610414505005, "learning_rate": 1.397642917371837e-05, "loss": 0.2618, "step": 12544 }, { "epoch": 2.0428286447095223, "grad_norm": 0.15883339941501617, "learning_rate": 1.3972177728146613e-05, "loss": 0.3077, "step": 12545 }, { "epoch": 2.0429914912673532, "grad_norm": 0.15467457473278046, "learning_rate": 1.3967926678506715e-05, "loss": 0.2801, "step": 12546 }, { "epoch": 2.043154337825184, "grad_norm": 0.19860291481018066, "learning_rate": 1.39636760249513e-05, "loss": 0.2868, "step": 12547 }, { "epoch": 2.043317184383015, "grad_norm": 0.18373866379261017, "learning_rate": 1.3959425767632972e-05, "loss": 0.3333, "step": 12548 }, { "epoch": 2.043480030940846, "grad_norm": 0.19273823499679565, "learning_rate": 1.3955175906704348e-05, "loss": 0.2802, "step": 12549 }, { "epoch": 2.043642877498677, "grad_norm": 0.14251194894313812, "learning_rate": 1.3950926442318001e-05, "loss": 0.2746, "step": 12550 }, { "epoch": 2.043805724056508, "grad_norm": 0.14344410598278046, "learning_rate": 1.3946677374626488e-05, "loss": 0.2583, "step": 12551 }, { "epoch": 2.0439685706143385, "grad_norm": 0.14503400027751923, "learning_rate": 1.3942428703782385e-05, "loss": 0.2728, "step": 12552 }, { "epoch": 2.0441314171721694, "grad_norm": 0.12926319241523743, "learning_rate": 1.3938180429938218e-05, "loss": 0.2751, "step": 12553 }, { "epoch": 2.0442942637300003, "grad_norm": 0.7152196168899536, "learning_rate": 1.393393255324651e-05, "loss": 0.334, "step": 12554 }, { "epoch": 2.0444571102878313, "grad_norm": 0.1578308790922165, "learning_rate": 1.3929685073859788e-05, "loss": 0.2838, "step": 12555 }, { "epoch": 2.0446199568456622, "grad_norm": 0.1613197773694992, "learning_rate": 1.3925437991930527e-05, "loss": 0.2902, "step": 12556 }, { "epoch": 2.044782803403493, "grad_norm": 0.16363371908664703, "learning_rate": 1.3921191307611231e-05, "loss": 0.2805, "step": 12557 }, { "epoch": 2.044945649961324, "grad_norm": 0.17781898379325867, "learning_rate": 1.3916945021054361e-05, "loss": 0.3039, "step": 12558 }, { "epoch": 2.0451084965191546, "grad_norm": 0.2033035308122635, "learning_rate": 1.391269913241236e-05, "loss": 0.2658, "step": 12559 }, { "epoch": 2.0452713430769855, "grad_norm": 0.16166965663433075, "learning_rate": 1.3908453641837693e-05, "loss": 0.3028, "step": 12560 }, { "epoch": 2.0454341896348165, "grad_norm": 0.17446784675121307, "learning_rate": 1.3904208549482767e-05, "loss": 0.2508, "step": 12561 }, { "epoch": 2.0455970361926474, "grad_norm": 0.17875045537948608, "learning_rate": 1.3899963855499986e-05, "loss": 0.2563, "step": 12562 }, { "epoch": 2.0457598827504784, "grad_norm": 0.13678869605064392, "learning_rate": 1.3895719560041776e-05, "loss": 0.2626, "step": 12563 }, { "epoch": 2.0459227293083093, "grad_norm": 0.15206483006477356, "learning_rate": 1.38914756632605e-05, "loss": 0.2956, "step": 12564 }, { "epoch": 2.0460855758661403, "grad_norm": 0.1978175938129425, "learning_rate": 1.3887232165308528e-05, "loss": 0.2717, "step": 12565 }, { "epoch": 2.046248422423971, "grad_norm": 0.17397300899028778, "learning_rate": 1.3882989066338207e-05, "loss": 0.286, "step": 12566 }, { "epoch": 2.0464112689818017, "grad_norm": 0.13256801664829254, "learning_rate": 1.3878746366501894e-05, "loss": 0.2588, "step": 12567 }, { "epoch": 2.0465741155396326, "grad_norm": 0.16228240728378296, "learning_rate": 1.387450406595191e-05, "loss": 0.2991, "step": 12568 }, { "epoch": 2.0467369620974636, "grad_norm": 0.1609354317188263, "learning_rate": 1.3870262164840553e-05, "loss": 0.2604, "step": 12569 }, { "epoch": 2.0468998086552945, "grad_norm": 0.16135968267917633, "learning_rate": 1.3866020663320136e-05, "loss": 0.2624, "step": 12570 }, { "epoch": 2.0470626552131255, "grad_norm": 0.16864106059074402, "learning_rate": 1.3861779561542936e-05, "loss": 0.2674, "step": 12571 }, { "epoch": 2.0472255017709564, "grad_norm": 0.1743621528148651, "learning_rate": 1.3857538859661226e-05, "loss": 0.281, "step": 12572 }, { "epoch": 2.0473883483287874, "grad_norm": 0.16692927479743958, "learning_rate": 1.385329855782724e-05, "loss": 0.2824, "step": 12573 }, { "epoch": 2.0475511948866183, "grad_norm": 0.16494934260845184, "learning_rate": 1.3849058656193242e-05, "loss": 0.2897, "step": 12574 }, { "epoch": 2.047714041444449, "grad_norm": 0.14222604036331177, "learning_rate": 1.384481915491145e-05, "loss": 0.3006, "step": 12575 }, { "epoch": 2.0478768880022797, "grad_norm": 0.12917208671569824, "learning_rate": 1.3840580054134072e-05, "loss": 0.255, "step": 12576 }, { "epoch": 2.0480397345601107, "grad_norm": 0.18523985147476196, "learning_rate": 1.3836341354013294e-05, "loss": 0.2689, "step": 12577 }, { "epoch": 2.0482025811179416, "grad_norm": 0.1416512131690979, "learning_rate": 1.3832103054701317e-05, "loss": 0.2763, "step": 12578 }, { "epoch": 2.0483654276757726, "grad_norm": 0.23365135490894318, "learning_rate": 1.3827865156350301e-05, "loss": 0.2676, "step": 12579 }, { "epoch": 2.0485282742336035, "grad_norm": 0.19842009246349335, "learning_rate": 1.3823627659112389e-05, "loss": 0.3293, "step": 12580 }, { "epoch": 2.0486911207914345, "grad_norm": 0.20430994033813477, "learning_rate": 1.381939056313974e-05, "loss": 0.2931, "step": 12581 }, { "epoch": 2.048853967349265, "grad_norm": 0.20190683007240295, "learning_rate": 1.3815153868584468e-05, "loss": 0.2964, "step": 12582 }, { "epoch": 2.049016813907096, "grad_norm": 0.14941249787807465, "learning_rate": 1.3810917575598681e-05, "loss": 0.2556, "step": 12583 }, { "epoch": 2.049179660464927, "grad_norm": 0.17687052488327026, "learning_rate": 1.3806681684334467e-05, "loss": 0.2832, "step": 12584 }, { "epoch": 2.0493425070227578, "grad_norm": 0.17824001610279083, "learning_rate": 1.3802446194943927e-05, "loss": 0.2692, "step": 12585 }, { "epoch": 2.0495053535805887, "grad_norm": 0.18758484721183777, "learning_rate": 1.3798211107579118e-05, "loss": 0.2935, "step": 12586 }, { "epoch": 2.0496682001384197, "grad_norm": 0.1638873964548111, "learning_rate": 1.3793976422392091e-05, "loss": 0.2931, "step": 12587 }, { "epoch": 2.0498310466962506, "grad_norm": 0.15825724601745605, "learning_rate": 1.3789742139534872e-05, "loss": 0.2667, "step": 12588 }, { "epoch": 2.0499938932540815, "grad_norm": 0.154136523604393, "learning_rate": 1.378550825915951e-05, "loss": 0.2945, "step": 12589 }, { "epoch": 2.050156739811912, "grad_norm": 0.1754479706287384, "learning_rate": 1.3781274781418003e-05, "loss": 0.2984, "step": 12590 }, { "epoch": 2.050319586369743, "grad_norm": 0.1817411482334137, "learning_rate": 1.377704170646233e-05, "loss": 0.2726, "step": 12591 }, { "epoch": 2.050482432927574, "grad_norm": 0.19982659816741943, "learning_rate": 1.3772809034444494e-05, "loss": 0.2607, "step": 12592 }, { "epoch": 2.050645279485405, "grad_norm": 0.1780419945716858, "learning_rate": 1.3768576765516445e-05, "loss": 0.2638, "step": 12593 }, { "epoch": 2.050808126043236, "grad_norm": 0.17448696494102478, "learning_rate": 1.3764344899830146e-05, "loss": 0.2974, "step": 12594 }, { "epoch": 2.0509709726010668, "grad_norm": 0.1748247891664505, "learning_rate": 1.3760113437537531e-05, "loss": 0.258, "step": 12595 }, { "epoch": 2.0511338191588977, "grad_norm": 0.21454410254955292, "learning_rate": 1.3755882378790507e-05, "loss": 0.2927, "step": 12596 }, { "epoch": 2.051296665716728, "grad_norm": 0.15509508550167084, "learning_rate": 1.3751651723741007e-05, "loss": 0.2669, "step": 12597 }, { "epoch": 2.051459512274559, "grad_norm": 0.17127260565757751, "learning_rate": 1.3747421472540912e-05, "loss": 0.2856, "step": 12598 }, { "epoch": 2.05162235883239, "grad_norm": 0.18886204063892365, "learning_rate": 1.3743191625342083e-05, "loss": 0.282, "step": 12599 }, { "epoch": 2.051785205390221, "grad_norm": 0.18822701275348663, "learning_rate": 1.3738962182296416e-05, "loss": 0.2916, "step": 12600 }, { "epoch": 2.051948051948052, "grad_norm": 0.1445099413394928, "learning_rate": 1.3734733143555745e-05, "loss": 0.2737, "step": 12601 }, { "epoch": 2.052110898505883, "grad_norm": 0.16817711293697357, "learning_rate": 1.3730504509271896e-05, "loss": 0.2631, "step": 12602 }, { "epoch": 2.052273745063714, "grad_norm": 0.18359892070293427, "learning_rate": 1.372627627959671e-05, "loss": 0.3072, "step": 12603 }, { "epoch": 2.052436591621545, "grad_norm": 0.16491585969924927, "learning_rate": 1.372204845468198e-05, "loss": 0.27, "step": 12604 }, { "epoch": 2.0525994381793753, "grad_norm": 0.15172787010669708, "learning_rate": 1.3717821034679501e-05, "loss": 0.2407, "step": 12605 }, { "epoch": 2.0527622847372062, "grad_norm": 0.14486005902290344, "learning_rate": 1.3713594019741039e-05, "loss": 0.2855, "step": 12606 }, { "epoch": 2.052925131295037, "grad_norm": 0.17229977250099182, "learning_rate": 1.3709367410018378e-05, "loss": 0.2814, "step": 12607 }, { "epoch": 2.053087977852868, "grad_norm": 0.22349439561367035, "learning_rate": 1.3705141205663254e-05, "loss": 0.2624, "step": 12608 }, { "epoch": 2.053250824410699, "grad_norm": 0.20902593433856964, "learning_rate": 1.370091540682739e-05, "loss": 0.2824, "step": 12609 }, { "epoch": 2.05341367096853, "grad_norm": 0.17531490325927734, "learning_rate": 1.3696690013662528e-05, "loss": 0.3, "step": 12610 }, { "epoch": 2.053576517526361, "grad_norm": 0.15961401164531708, "learning_rate": 1.3692465026320362e-05, "loss": 0.2894, "step": 12611 }, { "epoch": 2.053739364084192, "grad_norm": 0.19529160857200623, "learning_rate": 1.3688240444952574e-05, "loss": 0.2903, "step": 12612 }, { "epoch": 2.0539022106420224, "grad_norm": 0.1981949359178543, "learning_rate": 1.3684016269710837e-05, "loss": 0.2765, "step": 12613 }, { "epoch": 2.0540650571998533, "grad_norm": 0.606889545917511, "learning_rate": 1.3679792500746829e-05, "loss": 0.2642, "step": 12614 }, { "epoch": 2.0542279037576843, "grad_norm": 0.1743212789297104, "learning_rate": 1.367556913821219e-05, "loss": 0.2484, "step": 12615 }, { "epoch": 2.054390750315515, "grad_norm": 0.17391154170036316, "learning_rate": 1.3671346182258546e-05, "loss": 0.3004, "step": 12616 }, { "epoch": 2.054553596873346, "grad_norm": 0.15095050632953644, "learning_rate": 1.3667123633037505e-05, "loss": 0.281, "step": 12617 }, { "epoch": 2.054716443431177, "grad_norm": 0.1582988053560257, "learning_rate": 1.3662901490700691e-05, "loss": 0.2417, "step": 12618 }, { "epoch": 2.054879289989008, "grad_norm": 0.1536482572555542, "learning_rate": 1.3658679755399679e-05, "loss": 0.2791, "step": 12619 }, { "epoch": 2.0550421365468385, "grad_norm": 0.30088871717453003, "learning_rate": 1.3654458427286032e-05, "loss": 0.246, "step": 12620 }, { "epoch": 2.0552049831046695, "grad_norm": 0.4563179612159729, "learning_rate": 1.3650237506511331e-05, "loss": 0.2816, "step": 12621 }, { "epoch": 2.0553678296625004, "grad_norm": 0.18127217888832092, "learning_rate": 1.3646016993227112e-05, "loss": 0.2901, "step": 12622 }, { "epoch": 2.0555306762203314, "grad_norm": 0.1451103240251541, "learning_rate": 1.3641796887584896e-05, "loss": 0.2848, "step": 12623 }, { "epoch": 2.0556935227781623, "grad_norm": 0.22418411076068878, "learning_rate": 1.3637577189736192e-05, "loss": 0.3099, "step": 12624 }, { "epoch": 2.0558563693359933, "grad_norm": 0.15520121157169342, "learning_rate": 1.3633357899832522e-05, "loss": 0.2486, "step": 12625 }, { "epoch": 2.056019215893824, "grad_norm": 0.15114375948905945, "learning_rate": 1.3629139018025355e-05, "loss": 0.2483, "step": 12626 }, { "epoch": 2.056182062451655, "grad_norm": 0.17260119318962097, "learning_rate": 1.3624920544466169e-05, "loss": 0.3043, "step": 12627 }, { "epoch": 2.0563449090094856, "grad_norm": 0.13159328699111938, "learning_rate": 1.3620702479306402e-05, "loss": 0.2357, "step": 12628 }, { "epoch": 2.0565077555673166, "grad_norm": 0.22139830887317657, "learning_rate": 1.3616484822697523e-05, "loss": 0.3069, "step": 12629 }, { "epoch": 2.0566706021251475, "grad_norm": 0.13699302077293396, "learning_rate": 1.3612267574790944e-05, "loss": 0.2812, "step": 12630 }, { "epoch": 2.0568334486829785, "grad_norm": 0.20063325762748718, "learning_rate": 1.3608050735738065e-05, "loss": 0.2922, "step": 12631 }, { "epoch": 2.0569962952408094, "grad_norm": 0.19395385682582855, "learning_rate": 1.3603834305690311e-05, "loss": 0.2999, "step": 12632 }, { "epoch": 2.0571591417986403, "grad_norm": 0.16861218214035034, "learning_rate": 1.3599618284799035e-05, "loss": 0.2713, "step": 12633 }, { "epoch": 2.0573219883564713, "grad_norm": 0.1783229112625122, "learning_rate": 1.359540267321563e-05, "loss": 0.2925, "step": 12634 }, { "epoch": 2.057484834914302, "grad_norm": 0.14913077652454376, "learning_rate": 1.3591187471091438e-05, "loss": 0.2756, "step": 12635 }, { "epoch": 2.0576476814721327, "grad_norm": 0.1714257001876831, "learning_rate": 1.3586972678577786e-05, "loss": 0.302, "step": 12636 }, { "epoch": 2.0578105280299637, "grad_norm": 0.19200436770915985, "learning_rate": 1.3582758295826023e-05, "loss": 0.3037, "step": 12637 }, { "epoch": 2.0579733745877946, "grad_norm": 0.2099306583404541, "learning_rate": 1.3578544322987442e-05, "loss": 0.2715, "step": 12638 }, { "epoch": 2.0581362211456256, "grad_norm": 0.16729150712490082, "learning_rate": 1.357433076021333e-05, "loss": 0.3094, "step": 12639 }, { "epoch": 2.0582990677034565, "grad_norm": 0.19930677115917206, "learning_rate": 1.3570117607654986e-05, "loss": 0.3153, "step": 12640 }, { "epoch": 2.0584619142612874, "grad_norm": 0.212778240442276, "learning_rate": 1.3565904865463663e-05, "loss": 0.3027, "step": 12641 }, { "epoch": 2.0586247608191184, "grad_norm": 0.1560204178094864, "learning_rate": 1.3561692533790604e-05, "loss": 0.2805, "step": 12642 }, { "epoch": 2.058787607376949, "grad_norm": 0.17451120913028717, "learning_rate": 1.3557480612787065e-05, "loss": 0.3011, "step": 12643 }, { "epoch": 2.05895045393478, "grad_norm": 0.19332832098007202, "learning_rate": 1.3553269102604252e-05, "loss": 0.2951, "step": 12644 }, { "epoch": 2.0591133004926108, "grad_norm": 0.1720760315656662, "learning_rate": 1.3549058003393372e-05, "loss": 0.2766, "step": 12645 }, { "epoch": 2.0592761470504417, "grad_norm": 0.14316773414611816, "learning_rate": 1.3544847315305612e-05, "loss": 0.2964, "step": 12646 }, { "epoch": 2.0594389936082726, "grad_norm": 0.1547316014766693, "learning_rate": 1.354063703849216e-05, "loss": 0.3001, "step": 12647 }, { "epoch": 2.0596018401661036, "grad_norm": 0.154932901263237, "learning_rate": 1.3536427173104176e-05, "loss": 0.2257, "step": 12648 }, { "epoch": 2.0597646867239345, "grad_norm": 0.1809747815132141, "learning_rate": 1.35322177192928e-05, "loss": 0.2941, "step": 12649 }, { "epoch": 2.0599275332817655, "grad_norm": 0.17065295577049255, "learning_rate": 1.3528008677209154e-05, "loss": 0.2641, "step": 12650 }, { "epoch": 2.060090379839596, "grad_norm": 0.2077576369047165, "learning_rate": 1.3523800047004382e-05, "loss": 0.2631, "step": 12651 }, { "epoch": 2.060253226397427, "grad_norm": 0.18051677942276, "learning_rate": 1.3519591828829568e-05, "loss": 0.2931, "step": 12652 }, { "epoch": 2.060416072955258, "grad_norm": 0.13716287910938263, "learning_rate": 1.3515384022835795e-05, "loss": 0.2678, "step": 12653 }, { "epoch": 2.060578919513089, "grad_norm": 0.27290064096450806, "learning_rate": 1.3511176629174155e-05, "loss": 0.3089, "step": 12654 }, { "epoch": 2.0607417660709197, "grad_norm": 0.15909434854984283, "learning_rate": 1.3506969647995693e-05, "loss": 0.2623, "step": 12655 }, { "epoch": 2.0609046126287507, "grad_norm": 0.16356293857097626, "learning_rate": 1.3502763079451458e-05, "loss": 0.2863, "step": 12656 }, { "epoch": 2.0610674591865816, "grad_norm": 0.18631647527217865, "learning_rate": 1.3498556923692462e-05, "loss": 0.2792, "step": 12657 }, { "epoch": 2.061230305744412, "grad_norm": 0.18600404262542725, "learning_rate": 1.3494351180869744e-05, "loss": 0.2692, "step": 12658 }, { "epoch": 2.061393152302243, "grad_norm": 0.16394953429698944, "learning_rate": 1.3490145851134289e-05, "loss": 0.2658, "step": 12659 }, { "epoch": 2.061555998860074, "grad_norm": 0.1785314381122589, "learning_rate": 1.3485940934637073e-05, "loss": 0.279, "step": 12660 }, { "epoch": 2.061718845417905, "grad_norm": 0.1677570790052414, "learning_rate": 1.3481736431529085e-05, "loss": 0.3034, "step": 12661 }, { "epoch": 2.061881691975736, "grad_norm": 0.20753051340579987, "learning_rate": 1.3477532341961268e-05, "loss": 0.2782, "step": 12662 }, { "epoch": 2.062044538533567, "grad_norm": 0.16551239788532257, "learning_rate": 1.3473328666084562e-05, "loss": 0.2379, "step": 12663 }, { "epoch": 2.062207385091398, "grad_norm": 0.1402515172958374, "learning_rate": 1.3469125404049882e-05, "loss": 0.2693, "step": 12664 }, { "epoch": 2.0623702316492287, "grad_norm": 0.14522185921669006, "learning_rate": 1.3464922556008158e-05, "loss": 0.2838, "step": 12665 }, { "epoch": 2.0625330782070592, "grad_norm": 0.1377403438091278, "learning_rate": 1.3460720122110273e-05, "loss": 0.2684, "step": 12666 }, { "epoch": 2.06269592476489, "grad_norm": 0.18420802056789398, "learning_rate": 1.3456518102507109e-05, "loss": 0.2313, "step": 12667 }, { "epoch": 2.062858771322721, "grad_norm": 0.17370249330997467, "learning_rate": 1.345231649734952e-05, "loss": 0.3255, "step": 12668 }, { "epoch": 2.063021617880552, "grad_norm": 0.17912383377552032, "learning_rate": 1.3448115306788378e-05, "loss": 0.2785, "step": 12669 }, { "epoch": 2.063184464438383, "grad_norm": 0.13367322087287903, "learning_rate": 1.3443914530974497e-05, "loss": 0.2452, "step": 12670 }, { "epoch": 2.063347310996214, "grad_norm": 0.2170647382736206, "learning_rate": 1.3439714170058714e-05, "loss": 0.3383, "step": 12671 }, { "epoch": 2.063510157554045, "grad_norm": 0.1743076592683792, "learning_rate": 1.343551422419183e-05, "loss": 0.2836, "step": 12672 }, { "epoch": 2.0636730041118754, "grad_norm": 0.19682306051254272, "learning_rate": 1.3431314693524624e-05, "loss": 0.2867, "step": 12673 }, { "epoch": 2.0638358506697063, "grad_norm": 0.1605837196111679, "learning_rate": 1.3427115578207892e-05, "loss": 0.2739, "step": 12674 }, { "epoch": 2.0639986972275373, "grad_norm": 0.17210498452186584, "learning_rate": 1.3422916878392372e-05, "loss": 0.3137, "step": 12675 }, { "epoch": 2.064161543785368, "grad_norm": 0.12398342788219452, "learning_rate": 1.3418718594228832e-05, "loss": 0.2798, "step": 12676 }, { "epoch": 2.064324390343199, "grad_norm": 0.1702207773923874, "learning_rate": 1.3414520725867993e-05, "loss": 0.2852, "step": 12677 }, { "epoch": 2.06448723690103, "grad_norm": 0.1401466727256775, "learning_rate": 1.3410323273460573e-05, "loss": 0.3078, "step": 12678 }, { "epoch": 2.064650083458861, "grad_norm": 0.2405071258544922, "learning_rate": 1.3406126237157256e-05, "loss": 0.2896, "step": 12679 }, { "epoch": 2.064812930016692, "grad_norm": 0.16528020799160004, "learning_rate": 1.3401929617108761e-05, "loss": 0.2596, "step": 12680 }, { "epoch": 2.0649757765745225, "grad_norm": 0.1923544853925705, "learning_rate": 1.3397733413465736e-05, "loss": 0.3001, "step": 12681 }, { "epoch": 2.0651386231323534, "grad_norm": 0.16195940971374512, "learning_rate": 1.3393537626378833e-05, "loss": 0.2797, "step": 12682 }, { "epoch": 2.0653014696901844, "grad_norm": 0.17453332245349884, "learning_rate": 1.3389342255998716e-05, "loss": 0.2787, "step": 12683 }, { "epoch": 2.0654643162480153, "grad_norm": 0.17512460052967072, "learning_rate": 1.3385147302475994e-05, "loss": 0.2508, "step": 12684 }, { "epoch": 2.0656271628058462, "grad_norm": 0.17642641067504883, "learning_rate": 1.3380952765961286e-05, "loss": 0.2863, "step": 12685 }, { "epoch": 2.065790009363677, "grad_norm": 0.17684394121170044, "learning_rate": 1.3376758646605176e-05, "loss": 0.3102, "step": 12686 }, { "epoch": 2.065952855921508, "grad_norm": 0.18137331306934357, "learning_rate": 1.3372564944558264e-05, "loss": 0.3183, "step": 12687 }, { "epoch": 2.0661157024793386, "grad_norm": 0.12929461896419525, "learning_rate": 1.3368371659971104e-05, "loss": 0.2689, "step": 12688 }, { "epoch": 2.0662785490371696, "grad_norm": 0.14414794743061066, "learning_rate": 1.3364178792994252e-05, "loss": 0.2937, "step": 12689 }, { "epoch": 2.0664413955950005, "grad_norm": 0.16243162751197815, "learning_rate": 1.3359986343778236e-05, "loss": 0.2601, "step": 12690 }, { "epoch": 2.0666042421528314, "grad_norm": 0.20247256755828857, "learning_rate": 1.3355794312473594e-05, "loss": 0.324, "step": 12691 }, { "epoch": 2.0667670887106624, "grad_norm": 0.20114955306053162, "learning_rate": 1.3351602699230822e-05, "loss": 0.2779, "step": 12692 }, { "epoch": 2.0669299352684933, "grad_norm": 0.19676944613456726, "learning_rate": 1.3347411504200403e-05, "loss": 0.2788, "step": 12693 }, { "epoch": 2.0670927818263243, "grad_norm": 0.16545557975769043, "learning_rate": 1.3343220727532835e-05, "loss": 0.2757, "step": 12694 }, { "epoch": 2.067255628384155, "grad_norm": 0.1432277262210846, "learning_rate": 1.3339030369378564e-05, "loss": 0.2665, "step": 12695 }, { "epoch": 2.0674184749419857, "grad_norm": 0.17623861134052277, "learning_rate": 1.3334840429888041e-05, "loss": 0.2757, "step": 12696 }, { "epoch": 2.0675813214998167, "grad_norm": 0.18037600815296173, "learning_rate": 1.3330650909211687e-05, "loss": 0.292, "step": 12697 }, { "epoch": 2.0677441680576476, "grad_norm": 0.14113226532936096, "learning_rate": 1.3326461807499938e-05, "loss": 0.2909, "step": 12698 }, { "epoch": 2.0679070146154785, "grad_norm": 0.14382420480251312, "learning_rate": 1.3322273124903184e-05, "loss": 0.281, "step": 12699 }, { "epoch": 2.0680698611733095, "grad_norm": 0.12629921734333038, "learning_rate": 1.331808486157181e-05, "loss": 0.2218, "step": 12700 }, { "epoch": 2.0682327077311404, "grad_norm": 0.1757841855287552, "learning_rate": 1.3313897017656185e-05, "loss": 0.2825, "step": 12701 }, { "epoch": 2.0683955542889714, "grad_norm": 0.18431171774864197, "learning_rate": 1.3309709593306676e-05, "loss": 0.2749, "step": 12702 }, { "epoch": 2.0685584008468023, "grad_norm": 0.16483177244663239, "learning_rate": 1.330552258867362e-05, "loss": 0.2528, "step": 12703 }, { "epoch": 2.068721247404633, "grad_norm": 0.13082170486450195, "learning_rate": 1.3301336003907328e-05, "loss": 0.2669, "step": 12704 }, { "epoch": 2.0688840939624638, "grad_norm": 0.1652815341949463, "learning_rate": 1.3297149839158135e-05, "loss": 0.2179, "step": 12705 }, { "epoch": 2.0690469405202947, "grad_norm": 0.1842648684978485, "learning_rate": 1.3292964094576324e-05, "loss": 0.2761, "step": 12706 }, { "epoch": 2.0692097870781256, "grad_norm": 0.15710850059986115, "learning_rate": 1.328877877031217e-05, "loss": 0.3075, "step": 12707 }, { "epoch": 2.0693726336359566, "grad_norm": 0.1682477742433548, "learning_rate": 1.3284593866515954e-05, "loss": 0.2588, "step": 12708 }, { "epoch": 2.0695354801937875, "grad_norm": 0.13551858067512512, "learning_rate": 1.3280409383337917e-05, "loss": 0.2479, "step": 12709 }, { "epoch": 2.0696983267516185, "grad_norm": 0.24505369365215302, "learning_rate": 1.3276225320928287e-05, "loss": 0.3044, "step": 12710 }, { "epoch": 2.069861173309449, "grad_norm": 0.16457884013652802, "learning_rate": 1.3272041679437306e-05, "loss": 0.2902, "step": 12711 }, { "epoch": 2.07002401986728, "grad_norm": 0.13860957324504852, "learning_rate": 1.3267858459015154e-05, "loss": 0.2586, "step": 12712 }, { "epoch": 2.070186866425111, "grad_norm": 0.16041135787963867, "learning_rate": 1.3263675659812045e-05, "loss": 0.2872, "step": 12713 }, { "epoch": 2.070349712982942, "grad_norm": 0.15039542317390442, "learning_rate": 1.3259493281978142e-05, "loss": 0.271, "step": 12714 }, { "epoch": 2.0705125595407727, "grad_norm": 0.17453430593013763, "learning_rate": 1.3255311325663594e-05, "loss": 0.2802, "step": 12715 }, { "epoch": 2.0706754060986037, "grad_norm": 0.1574164181947708, "learning_rate": 1.3251129791018569e-05, "loss": 0.2871, "step": 12716 }, { "epoch": 2.0708382526564346, "grad_norm": 0.15475519001483917, "learning_rate": 1.3246948678193185e-05, "loss": 0.272, "step": 12717 }, { "epoch": 2.0710010992142656, "grad_norm": 0.15341345965862274, "learning_rate": 1.3242767987337556e-05, "loss": 0.2558, "step": 12718 }, { "epoch": 2.071163945772096, "grad_norm": 0.23494333028793335, "learning_rate": 1.3238587718601775e-05, "loss": 0.3132, "step": 12719 }, { "epoch": 2.071326792329927, "grad_norm": 0.17869922518730164, "learning_rate": 1.323440787213594e-05, "loss": 0.2995, "step": 12720 }, { "epoch": 2.071489638887758, "grad_norm": 0.18272797763347626, "learning_rate": 1.3230228448090115e-05, "loss": 0.2972, "step": 12721 }, { "epoch": 2.071652485445589, "grad_norm": 0.14036469161510468, "learning_rate": 1.3226049446614338e-05, "loss": 0.3236, "step": 12722 }, { "epoch": 2.07181533200342, "grad_norm": 0.19440974295139313, "learning_rate": 1.3221870867858676e-05, "loss": 0.3079, "step": 12723 }, { "epoch": 2.0719781785612508, "grad_norm": 0.14773844182491302, "learning_rate": 1.3217692711973134e-05, "loss": 0.2798, "step": 12724 }, { "epoch": 2.0721410251190817, "grad_norm": 0.1945192664861679, "learning_rate": 1.3213514979107727e-05, "loss": 0.2812, "step": 12725 }, { "epoch": 2.0723038716769127, "grad_norm": 0.1404823660850525, "learning_rate": 1.3209337669412436e-05, "loss": 0.2434, "step": 12726 }, { "epoch": 2.072466718234743, "grad_norm": 0.17344848811626434, "learning_rate": 1.3205160783037258e-05, "loss": 0.279, "step": 12727 }, { "epoch": 2.072629564792574, "grad_norm": 0.1877620816230774, "learning_rate": 1.3200984320132146e-05, "loss": 0.2484, "step": 12728 }, { "epoch": 2.072792411350405, "grad_norm": 0.18134371936321259, "learning_rate": 1.3196808280847046e-05, "loss": 0.2593, "step": 12729 }, { "epoch": 2.072955257908236, "grad_norm": 0.1774781495332718, "learning_rate": 1.3192632665331884e-05, "loss": 0.2911, "step": 12730 }, { "epoch": 2.073118104466067, "grad_norm": 0.13082051277160645, "learning_rate": 1.3188457473736593e-05, "loss": 0.2729, "step": 12731 }, { "epoch": 2.073280951023898, "grad_norm": 0.13444122672080994, "learning_rate": 1.3184282706211071e-05, "loss": 0.294, "step": 12732 }, { "epoch": 2.073443797581729, "grad_norm": 0.20020364224910736, "learning_rate": 1.3180108362905186e-05, "loss": 0.2894, "step": 12733 }, { "epoch": 2.0736066441395593, "grad_norm": 0.2011481076478958, "learning_rate": 1.3175934443968838e-05, "loss": 0.2583, "step": 12734 }, { "epoch": 2.0737694906973902, "grad_norm": 0.2020660936832428, "learning_rate": 1.3171760949551868e-05, "loss": 0.3401, "step": 12735 }, { "epoch": 2.073932337255221, "grad_norm": 0.15025119483470917, "learning_rate": 1.3167587879804117e-05, "loss": 0.2464, "step": 12736 }, { "epoch": 2.074095183813052, "grad_norm": 0.17463527619838715, "learning_rate": 1.3163415234875404e-05, "loss": 0.2736, "step": 12737 }, { "epoch": 2.074258030370883, "grad_norm": 0.20183272659778595, "learning_rate": 1.3159243014915556e-05, "loss": 0.2943, "step": 12738 }, { "epoch": 2.074420876928714, "grad_norm": 0.17722976207733154, "learning_rate": 1.3155071220074362e-05, "loss": 0.2873, "step": 12739 }, { "epoch": 2.074583723486545, "grad_norm": 0.1506938487291336, "learning_rate": 1.3150899850501593e-05, "loss": 0.2676, "step": 12740 }, { "epoch": 2.0747465700443755, "grad_norm": 0.1615949422121048, "learning_rate": 1.3146728906347016e-05, "loss": 0.2901, "step": 12741 }, { "epoch": 2.0749094166022064, "grad_norm": 0.13865941762924194, "learning_rate": 1.3142558387760396e-05, "loss": 0.3119, "step": 12742 }, { "epoch": 2.0750722631600373, "grad_norm": 0.17299486696720123, "learning_rate": 1.3138388294891451e-05, "loss": 0.2564, "step": 12743 }, { "epoch": 2.0752351097178683, "grad_norm": 0.17170168459415436, "learning_rate": 1.3134218627889897e-05, "loss": 0.2447, "step": 12744 }, { "epoch": 2.0753979562756992, "grad_norm": 0.20150737464427948, "learning_rate": 1.3130049386905452e-05, "loss": 0.3094, "step": 12745 }, { "epoch": 2.07556080283353, "grad_norm": 0.1670556217432022, "learning_rate": 1.31258805720878e-05, "loss": 0.2477, "step": 12746 }, { "epoch": 2.075723649391361, "grad_norm": 0.19292008876800537, "learning_rate": 1.3121712183586605e-05, "loss": 0.2814, "step": 12747 }, { "epoch": 2.075886495949192, "grad_norm": 0.19958199560642242, "learning_rate": 1.3117544221551537e-05, "loss": 0.261, "step": 12748 }, { "epoch": 2.0760493425070226, "grad_norm": 0.1419409215450287, "learning_rate": 1.3113376686132229e-05, "loss": 0.289, "step": 12749 }, { "epoch": 2.0762121890648535, "grad_norm": 0.1488221436738968, "learning_rate": 1.3109209577478304e-05, "loss": 0.2903, "step": 12750 }, { "epoch": 2.0763750356226844, "grad_norm": 0.18846891820430756, "learning_rate": 1.3105042895739394e-05, "loss": 0.2579, "step": 12751 }, { "epoch": 2.0765378821805154, "grad_norm": 0.1657315194606781, "learning_rate": 1.3100876641065068e-05, "loss": 0.2689, "step": 12752 }, { "epoch": 2.0767007287383463, "grad_norm": 0.16142936050891876, "learning_rate": 1.3096710813604934e-05, "loss": 0.2663, "step": 12753 }, { "epoch": 2.0768635752961773, "grad_norm": 0.13869111239910126, "learning_rate": 1.3092545413508544e-05, "loss": 0.2465, "step": 12754 }, { "epoch": 2.077026421854008, "grad_norm": 0.1953112781047821, "learning_rate": 1.308838044092544e-05, "loss": 0.2889, "step": 12755 }, { "epoch": 2.077189268411839, "grad_norm": 0.1651705503463745, "learning_rate": 1.3084215896005175e-05, "loss": 0.2932, "step": 12756 }, { "epoch": 2.0773521149696696, "grad_norm": 0.18898914754390717, "learning_rate": 1.3080051778897263e-05, "loss": 0.2924, "step": 12757 }, { "epoch": 2.0775149615275006, "grad_norm": 0.18405017256736755, "learning_rate": 1.3075888089751201e-05, "loss": 0.2744, "step": 12758 }, { "epoch": 2.0776778080853315, "grad_norm": 0.23582586646080017, "learning_rate": 1.3071724828716474e-05, "loss": 0.2821, "step": 12759 }, { "epoch": 2.0778406546431625, "grad_norm": 0.210484117269516, "learning_rate": 1.3067561995942573e-05, "loss": 0.2744, "step": 12760 }, { "epoch": 2.0780035012009934, "grad_norm": 0.12991417944431305, "learning_rate": 1.306339959157895e-05, "loss": 0.2755, "step": 12761 }, { "epoch": 2.0781663477588244, "grad_norm": 0.17556993663311005, "learning_rate": 1.3059237615775033e-05, "loss": 0.2748, "step": 12762 }, { "epoch": 2.0783291943166553, "grad_norm": 0.14197342097759247, "learning_rate": 1.305507606868027e-05, "loss": 0.3007, "step": 12763 }, { "epoch": 2.078492040874486, "grad_norm": 0.16387857496738434, "learning_rate": 1.3050914950444065e-05, "loss": 0.2848, "step": 12764 }, { "epoch": 2.0786548874323167, "grad_norm": 0.15357007086277008, "learning_rate": 1.3046754261215812e-05, "loss": 0.2767, "step": 12765 }, { "epoch": 2.0788177339901477, "grad_norm": 0.1832093894481659, "learning_rate": 1.3042594001144887e-05, "loss": 0.345, "step": 12766 }, { "epoch": 2.0789805805479786, "grad_norm": 0.19587047398090363, "learning_rate": 1.303843417038067e-05, "loss": 0.3481, "step": 12767 }, { "epoch": 2.0791434271058096, "grad_norm": 0.13630113005638123, "learning_rate": 1.3034274769072504e-05, "loss": 0.263, "step": 12768 }, { "epoch": 2.0793062736636405, "grad_norm": 0.19689849019050598, "learning_rate": 1.3030115797369724e-05, "loss": 0.279, "step": 12769 }, { "epoch": 2.0794691202214715, "grad_norm": 0.2568368911743164, "learning_rate": 1.3025957255421641e-05, "loss": 0.3206, "step": 12770 }, { "epoch": 2.0796319667793024, "grad_norm": 0.15130223333835602, "learning_rate": 1.3021799143377577e-05, "loss": 0.2686, "step": 12771 }, { "epoch": 2.079794813337133, "grad_norm": 0.18138794600963593, "learning_rate": 1.3017641461386815e-05, "loss": 0.3043, "step": 12772 }, { "epoch": 2.079957659894964, "grad_norm": 0.15017865598201752, "learning_rate": 1.3013484209598609e-05, "loss": 0.2483, "step": 12773 }, { "epoch": 2.080120506452795, "grad_norm": 0.14382869005203247, "learning_rate": 1.3009327388162247e-05, "loss": 0.2658, "step": 12774 }, { "epoch": 2.0802833530106257, "grad_norm": 0.18691033124923706, "learning_rate": 1.3005170997226954e-05, "loss": 0.2717, "step": 12775 }, { "epoch": 2.0804461995684567, "grad_norm": 0.2197365164756775, "learning_rate": 1.300101503694196e-05, "loss": 0.2631, "step": 12776 }, { "epoch": 2.0806090461262876, "grad_norm": 0.1817225217819214, "learning_rate": 1.2996859507456467e-05, "loss": 0.2983, "step": 12777 }, { "epoch": 2.0807718926841186, "grad_norm": 0.2110803723335266, "learning_rate": 1.2992704408919686e-05, "loss": 0.304, "step": 12778 }, { "epoch": 2.0809347392419495, "grad_norm": 0.12877564132213593, "learning_rate": 1.2988549741480799e-05, "loss": 0.261, "step": 12779 }, { "epoch": 2.08109758579978, "grad_norm": 0.1739543229341507, "learning_rate": 1.2984395505288957e-05, "loss": 0.2798, "step": 12780 }, { "epoch": 2.081260432357611, "grad_norm": 0.16545967757701874, "learning_rate": 1.2980241700493309e-05, "loss": 0.2978, "step": 12781 }, { "epoch": 2.081423278915442, "grad_norm": 0.15613068640232086, "learning_rate": 1.2976088327243009e-05, "loss": 0.2524, "step": 12782 }, { "epoch": 2.081586125473273, "grad_norm": 0.18293684720993042, "learning_rate": 1.2971935385687157e-05, "loss": 0.2718, "step": 12783 }, { "epoch": 2.0817489720311038, "grad_norm": 0.16925156116485596, "learning_rate": 1.2967782875974854e-05, "loss": 0.281, "step": 12784 }, { "epoch": 2.0819118185889347, "grad_norm": 0.15880168974399567, "learning_rate": 1.2963630798255206e-05, "loss": 0.2644, "step": 12785 }, { "epoch": 2.0820746651467656, "grad_norm": 0.1356089860200882, "learning_rate": 1.2959479152677273e-05, "loss": 0.2511, "step": 12786 }, { "epoch": 2.082237511704596, "grad_norm": 0.18855077028274536, "learning_rate": 1.2955327939390103e-05, "loss": 0.2834, "step": 12787 }, { "epoch": 2.082400358262427, "grad_norm": 0.15455535054206848, "learning_rate": 1.2951177158542758e-05, "loss": 0.3183, "step": 12788 }, { "epoch": 2.082563204820258, "grad_norm": 0.17234674096107483, "learning_rate": 1.2947026810284241e-05, "loss": 0.3063, "step": 12789 }, { "epoch": 2.082726051378089, "grad_norm": 0.17542262375354767, "learning_rate": 1.2942876894763583e-05, "loss": 0.2709, "step": 12790 }, { "epoch": 2.08288889793592, "grad_norm": 0.1528126299381256, "learning_rate": 1.293872741212977e-05, "loss": 0.2751, "step": 12791 }, { "epoch": 2.083051744493751, "grad_norm": 0.1460283100605011, "learning_rate": 1.2934578362531768e-05, "loss": 0.2556, "step": 12792 }, { "epoch": 2.083214591051582, "grad_norm": 0.2004663497209549, "learning_rate": 1.293042974611856e-05, "loss": 0.278, "step": 12793 }, { "epoch": 2.0833774376094127, "grad_norm": 0.18705599009990692, "learning_rate": 1.2926281563039088e-05, "loss": 0.3091, "step": 12794 }, { "epoch": 2.0835402841672432, "grad_norm": 0.1946682631969452, "learning_rate": 1.2922133813442272e-05, "loss": 0.2764, "step": 12795 }, { "epoch": 2.083703130725074, "grad_norm": 0.14181245863437653, "learning_rate": 1.2917986497477047e-05, "loss": 0.2655, "step": 12796 }, { "epoch": 2.083865977282905, "grad_norm": 0.1323392540216446, "learning_rate": 1.2913839615292309e-05, "loss": 0.2427, "step": 12797 }, { "epoch": 2.084028823840736, "grad_norm": 0.18835173547267914, "learning_rate": 1.2909693167036935e-05, "loss": 0.2722, "step": 12798 }, { "epoch": 2.084191670398567, "grad_norm": 0.17036980390548706, "learning_rate": 1.2905547152859793e-05, "loss": 0.3129, "step": 12799 }, { "epoch": 2.084354516956398, "grad_norm": 0.16426466405391693, "learning_rate": 1.2901401572909754e-05, "loss": 0.2501, "step": 12800 }, { "epoch": 2.084517363514229, "grad_norm": 0.15845562517642975, "learning_rate": 1.2897256427335646e-05, "loss": 0.2959, "step": 12801 }, { "epoch": 2.0846802100720594, "grad_norm": 0.1716640293598175, "learning_rate": 1.2893111716286294e-05, "loss": 0.2592, "step": 12802 }, { "epoch": 2.0848430566298903, "grad_norm": 0.17950734496116638, "learning_rate": 1.2888967439910493e-05, "loss": 0.2493, "step": 12803 }, { "epoch": 2.0850059031877213, "grad_norm": 0.1458687037229538, "learning_rate": 1.2884823598357058e-05, "loss": 0.2817, "step": 12804 }, { "epoch": 2.085168749745552, "grad_norm": 0.1767456978559494, "learning_rate": 1.2880680191774751e-05, "loss": 0.2927, "step": 12805 }, { "epoch": 2.085331596303383, "grad_norm": 0.13861189782619476, "learning_rate": 1.2876537220312328e-05, "loss": 0.2891, "step": 12806 }, { "epoch": 2.085494442861214, "grad_norm": 0.19023597240447998, "learning_rate": 1.2872394684118553e-05, "loss": 0.2825, "step": 12807 }, { "epoch": 2.085657289419045, "grad_norm": 0.17087022960186005, "learning_rate": 1.2868252583342142e-05, "loss": 0.3545, "step": 12808 }, { "epoch": 2.085820135976876, "grad_norm": 0.157222181558609, "learning_rate": 1.286411091813181e-05, "loss": 0.2633, "step": 12809 }, { "epoch": 2.0859829825347065, "grad_norm": 0.1620623618364334, "learning_rate": 1.2859969688636247e-05, "loss": 0.2537, "step": 12810 }, { "epoch": 2.0861458290925374, "grad_norm": 0.21501979231834412, "learning_rate": 1.2855828895004157e-05, "loss": 0.2765, "step": 12811 }, { "epoch": 2.0863086756503684, "grad_norm": 0.17518271505832672, "learning_rate": 1.2851688537384193e-05, "loss": 0.2808, "step": 12812 }, { "epoch": 2.0864715222081993, "grad_norm": 0.15343299508094788, "learning_rate": 1.2847548615924998e-05, "loss": 0.2498, "step": 12813 }, { "epoch": 2.0866343687660303, "grad_norm": 0.16207565367221832, "learning_rate": 1.2843409130775225e-05, "loss": 0.2646, "step": 12814 }, { "epoch": 2.086797215323861, "grad_norm": 0.5198413729667664, "learning_rate": 1.2839270082083488e-05, "loss": 0.3061, "step": 12815 }, { "epoch": 2.086960061881692, "grad_norm": 0.16033460199832916, "learning_rate": 1.283513146999839e-05, "loss": 0.2603, "step": 12816 }, { "epoch": 2.0871229084395226, "grad_norm": 0.126622274518013, "learning_rate": 1.2830993294668509e-05, "loss": 0.2112, "step": 12817 }, { "epoch": 2.0872857549973536, "grad_norm": 0.17669355869293213, "learning_rate": 1.282685555624244e-05, "loss": 0.2749, "step": 12818 }, { "epoch": 2.0874486015551845, "grad_norm": 0.17130228877067566, "learning_rate": 1.2822718254868727e-05, "loss": 0.237, "step": 12819 }, { "epoch": 2.0876114481130155, "grad_norm": 0.2016458511352539, "learning_rate": 1.2818581390695911e-05, "loss": 0.2633, "step": 12820 }, { "epoch": 2.0877742946708464, "grad_norm": 0.17541095614433289, "learning_rate": 1.281444496387251e-05, "loss": 0.2794, "step": 12821 }, { "epoch": 2.0879371412286774, "grad_norm": 0.1710711419582367, "learning_rate": 1.2810308974547053e-05, "loss": 0.2842, "step": 12822 }, { "epoch": 2.0880999877865083, "grad_norm": 0.19612762331962585, "learning_rate": 1.2806173422868028e-05, "loss": 0.2804, "step": 12823 }, { "epoch": 2.0882628343443392, "grad_norm": 0.16974948346614838, "learning_rate": 1.2802038308983898e-05, "loss": 0.2731, "step": 12824 }, { "epoch": 2.0884256809021697, "grad_norm": 0.1833891123533249, "learning_rate": 1.2797903633043151e-05, "loss": 0.2493, "step": 12825 }, { "epoch": 2.0885885274600007, "grad_norm": 0.16692005097866058, "learning_rate": 1.279376939519421e-05, "loss": 0.2834, "step": 12826 }, { "epoch": 2.0887513740178316, "grad_norm": 0.1847706288099289, "learning_rate": 1.2789635595585531e-05, "loss": 0.2594, "step": 12827 }, { "epoch": 2.0889142205756626, "grad_norm": 0.19242912530899048, "learning_rate": 1.2785502234365514e-05, "loss": 0.259, "step": 12828 }, { "epoch": 2.0890770671334935, "grad_norm": 0.1340983659029007, "learning_rate": 1.2781369311682556e-05, "loss": 0.2792, "step": 12829 }, { "epoch": 2.0892399136913244, "grad_norm": 0.1519225835800171, "learning_rate": 1.2777236827685057e-05, "loss": 0.3138, "step": 12830 }, { "epoch": 2.0894027602491554, "grad_norm": 0.1630094200372696, "learning_rate": 1.2773104782521372e-05, "loss": 0.3023, "step": 12831 }, { "epoch": 2.0895656068069863, "grad_norm": 0.19871918857097626, "learning_rate": 1.2768973176339854e-05, "loss": 0.3208, "step": 12832 }, { "epoch": 2.089728453364817, "grad_norm": 0.19830310344696045, "learning_rate": 1.2764842009288851e-05, "loss": 0.2667, "step": 12833 }, { "epoch": 2.0898912999226478, "grad_norm": 0.17117957770824432, "learning_rate": 1.2760711281516677e-05, "loss": 0.2445, "step": 12834 }, { "epoch": 2.0900541464804787, "grad_norm": 0.19622156023979187, "learning_rate": 1.275658099317163e-05, "loss": 0.2946, "step": 12835 }, { "epoch": 2.0902169930383097, "grad_norm": 0.16166512668132782, "learning_rate": 1.2752451144402017e-05, "loss": 0.2626, "step": 12836 }, { "epoch": 2.0903798395961406, "grad_norm": 0.2142687290906906, "learning_rate": 1.27483217353561e-05, "loss": 0.2863, "step": 12837 }, { "epoch": 2.0905426861539715, "grad_norm": 0.13505162298679352, "learning_rate": 1.2744192766182142e-05, "loss": 0.2789, "step": 12838 }, { "epoch": 2.0907055327118025, "grad_norm": 0.14692699909210205, "learning_rate": 1.274006423702837e-05, "loss": 0.2918, "step": 12839 }, { "epoch": 2.090868379269633, "grad_norm": 0.15488214790821075, "learning_rate": 1.2735936148043037e-05, "loss": 0.2462, "step": 12840 }, { "epoch": 2.091031225827464, "grad_norm": 0.19398345053195953, "learning_rate": 1.2731808499374343e-05, "loss": 0.2945, "step": 12841 }, { "epoch": 2.091194072385295, "grad_norm": 0.17033255100250244, "learning_rate": 1.2727681291170474e-05, "loss": 0.2595, "step": 12842 }, { "epoch": 2.091356918943126, "grad_norm": 0.2015538364648819, "learning_rate": 1.2723554523579612e-05, "loss": 0.2735, "step": 12843 }, { "epoch": 2.0915197655009568, "grad_norm": 0.1607324779033661, "learning_rate": 1.2719428196749928e-05, "loss": 0.2715, "step": 12844 }, { "epoch": 2.0916826120587877, "grad_norm": 0.2174219936132431, "learning_rate": 1.2715302310829569e-05, "loss": 0.2755, "step": 12845 }, { "epoch": 2.0918454586166186, "grad_norm": 0.17274053394794464, "learning_rate": 1.2711176865966652e-05, "loss": 0.2803, "step": 12846 }, { "epoch": 2.0920083051744496, "grad_norm": 0.1583327203989029, "learning_rate": 1.270705186230931e-05, "loss": 0.2764, "step": 12847 }, { "epoch": 2.09217115173228, "grad_norm": 0.17308276891708374, "learning_rate": 1.270292730000564e-05, "loss": 0.2498, "step": 12848 }, { "epoch": 2.092333998290111, "grad_norm": 0.1517096310853958, "learning_rate": 1.2698803179203724e-05, "loss": 0.2999, "step": 12849 }, { "epoch": 2.092496844847942, "grad_norm": 0.20828081667423248, "learning_rate": 1.2694679500051621e-05, "loss": 0.3095, "step": 12850 }, { "epoch": 2.092659691405773, "grad_norm": 0.17302654683589935, "learning_rate": 1.2690556262697397e-05, "loss": 0.2937, "step": 12851 }, { "epoch": 2.092822537963604, "grad_norm": 0.16037575900554657, "learning_rate": 1.2686433467289086e-05, "loss": 0.264, "step": 12852 }, { "epoch": 2.092985384521435, "grad_norm": 0.17807109653949738, "learning_rate": 1.2682311113974704e-05, "loss": 0.2911, "step": 12853 }, { "epoch": 2.0931482310792657, "grad_norm": 0.13842815160751343, "learning_rate": 1.2678189202902251e-05, "loss": 0.2897, "step": 12854 }, { "epoch": 2.0933110776370967, "grad_norm": 0.16390103101730347, "learning_rate": 1.267406773421973e-05, "loss": 0.2923, "step": 12855 }, { "epoch": 2.093473924194927, "grad_norm": 0.1718931347131729, "learning_rate": 1.2669946708075108e-05, "loss": 0.2961, "step": 12856 }, { "epoch": 2.093636770752758, "grad_norm": 0.19365158677101135, "learning_rate": 1.2665826124616332e-05, "loss": 0.3068, "step": 12857 }, { "epoch": 2.093799617310589, "grad_norm": 0.144525408744812, "learning_rate": 1.266170598399136e-05, "loss": 0.2523, "step": 12858 }, { "epoch": 2.09396246386842, "grad_norm": 0.1396816074848175, "learning_rate": 1.2657586286348111e-05, "loss": 0.2723, "step": 12859 }, { "epoch": 2.094125310426251, "grad_norm": 0.17926129698753357, "learning_rate": 1.2653467031834493e-05, "loss": 0.288, "step": 12860 }, { "epoch": 2.094288156984082, "grad_norm": 0.18115872144699097, "learning_rate": 1.264934822059839e-05, "loss": 0.2948, "step": 12861 }, { "epoch": 2.094451003541913, "grad_norm": 0.11936245858669281, "learning_rate": 1.2645229852787699e-05, "loss": 0.3402, "step": 12862 }, { "epoch": 2.0946138500997433, "grad_norm": 0.17473645508289337, "learning_rate": 1.264111192855027e-05, "loss": 0.2731, "step": 12863 }, { "epoch": 2.0947766966575743, "grad_norm": 0.1894751638174057, "learning_rate": 1.2636994448033945e-05, "loss": 0.2612, "step": 12864 }, { "epoch": 2.094939543215405, "grad_norm": 0.2482960969209671, "learning_rate": 1.2632877411386568e-05, "loss": 0.3137, "step": 12865 }, { "epoch": 2.095102389773236, "grad_norm": 0.2069508135318756, "learning_rate": 1.2628760818755934e-05, "loss": 0.2912, "step": 12866 }, { "epoch": 2.095265236331067, "grad_norm": 0.16248613595962524, "learning_rate": 1.2624644670289861e-05, "loss": 0.292, "step": 12867 }, { "epoch": 2.095428082888898, "grad_norm": 0.15927435457706451, "learning_rate": 1.2620528966136124e-05, "loss": 0.2666, "step": 12868 }, { "epoch": 2.095590929446729, "grad_norm": 0.21537932753562927, "learning_rate": 1.2616413706442475e-05, "loss": 0.2912, "step": 12869 }, { "epoch": 2.0957537760045595, "grad_norm": 0.17402997612953186, "learning_rate": 1.2612298891356688e-05, "loss": 0.2669, "step": 12870 }, { "epoch": 2.0959166225623904, "grad_norm": 0.17764361202716827, "learning_rate": 1.2608184521026487e-05, "loss": 0.2923, "step": 12871 }, { "epoch": 2.0960794691202214, "grad_norm": 0.17509090900421143, "learning_rate": 1.2604070595599577e-05, "loss": 0.259, "step": 12872 }, { "epoch": 2.0962423156780523, "grad_norm": 0.2013239860534668, "learning_rate": 1.2599957115223681e-05, "loss": 0.3041, "step": 12873 }, { "epoch": 2.0964051622358832, "grad_norm": 0.1984729766845703, "learning_rate": 1.2595844080046476e-05, "loss": 0.259, "step": 12874 }, { "epoch": 2.096568008793714, "grad_norm": 0.14485031366348267, "learning_rate": 1.2591731490215626e-05, "loss": 0.2696, "step": 12875 }, { "epoch": 2.096730855351545, "grad_norm": 0.1622832715511322, "learning_rate": 1.2587619345878796e-05, "loss": 0.2824, "step": 12876 }, { "epoch": 2.096893701909376, "grad_norm": 0.14131984114646912, "learning_rate": 1.2583507647183623e-05, "loss": 0.2653, "step": 12877 }, { "epoch": 2.0970565484672066, "grad_norm": 0.11750808358192444, "learning_rate": 1.2579396394277726e-05, "loss": 0.2788, "step": 12878 }, { "epoch": 2.0972193950250375, "grad_norm": 0.15561631321907043, "learning_rate": 1.2575285587308699e-05, "loss": 0.2841, "step": 12879 }, { "epoch": 2.0973822415828685, "grad_norm": 0.1731596440076828, "learning_rate": 1.2571175226424159e-05, "loss": 0.2624, "step": 12880 }, { "epoch": 2.0975450881406994, "grad_norm": 0.16051559150218964, "learning_rate": 1.2567065311771664e-05, "loss": 0.2594, "step": 12881 }, { "epoch": 2.0977079346985303, "grad_norm": 0.2673218548297882, "learning_rate": 1.2562955843498775e-05, "loss": 0.2575, "step": 12882 }, { "epoch": 2.0978707812563613, "grad_norm": 0.16431869566440582, "learning_rate": 1.2558846821753024e-05, "loss": 0.3485, "step": 12883 }, { "epoch": 2.0980336278141922, "grad_norm": 0.17748107016086578, "learning_rate": 1.2554738246681952e-05, "loss": 0.2637, "step": 12884 }, { "epoch": 2.098196474372023, "grad_norm": 0.1861269325017929, "learning_rate": 1.2550630118433072e-05, "loss": 0.2846, "step": 12885 }, { "epoch": 2.0983593209298537, "grad_norm": 0.133789524435997, "learning_rate": 1.2546522437153854e-05, "loss": 0.2467, "step": 12886 }, { "epoch": 2.0985221674876846, "grad_norm": 0.17894193530082703, "learning_rate": 1.2542415202991803e-05, "loss": 0.2731, "step": 12887 }, { "epoch": 2.0986850140455156, "grad_norm": 0.16549789905548096, "learning_rate": 1.2538308416094375e-05, "loss": 0.2873, "step": 12888 }, { "epoch": 2.0988478606033465, "grad_norm": 0.16071121394634247, "learning_rate": 1.2534202076609004e-05, "loss": 0.2552, "step": 12889 }, { "epoch": 2.0990107071611774, "grad_norm": 0.1527944654226303, "learning_rate": 1.2530096184683121e-05, "loss": 0.29, "step": 12890 }, { "epoch": 2.0991735537190084, "grad_norm": 0.15748941898345947, "learning_rate": 1.2525990740464155e-05, "loss": 0.299, "step": 12891 }, { "epoch": 2.0993364002768393, "grad_norm": 0.14761914312839508, "learning_rate": 1.2521885744099493e-05, "loss": 0.2779, "step": 12892 }, { "epoch": 2.09949924683467, "grad_norm": 0.19717493653297424, "learning_rate": 1.2517781195736523e-05, "loss": 0.3123, "step": 12893 }, { "epoch": 2.0996620933925008, "grad_norm": 0.15147894620895386, "learning_rate": 1.2513677095522591e-05, "loss": 0.2835, "step": 12894 }, { "epoch": 2.0998249399503317, "grad_norm": 0.19775603711605072, "learning_rate": 1.2509573443605072e-05, "loss": 0.287, "step": 12895 }, { "epoch": 2.0999877865081626, "grad_norm": 0.14731070399284363, "learning_rate": 1.250547024013129e-05, "loss": 0.2784, "step": 12896 }, { "epoch": 2.1001506330659936, "grad_norm": 0.1405358910560608, "learning_rate": 1.250136748524855e-05, "loss": 0.2483, "step": 12897 }, { "epoch": 2.1003134796238245, "grad_norm": 0.15320654213428497, "learning_rate": 1.2497265179104175e-05, "loss": 0.2537, "step": 12898 }, { "epoch": 2.1004763261816555, "grad_norm": 0.18838705122470856, "learning_rate": 1.2493163321845442e-05, "loss": 0.2879, "step": 12899 }, { "epoch": 2.1006391727394864, "grad_norm": 0.15047186613082886, "learning_rate": 1.2489061913619612e-05, "loss": 0.2648, "step": 12900 }, { "epoch": 2.100802019297317, "grad_norm": 0.16328465938568115, "learning_rate": 1.2484960954573935e-05, "loss": 0.2744, "step": 12901 }, { "epoch": 2.100964865855148, "grad_norm": 0.18875859677791595, "learning_rate": 1.2480860444855666e-05, "loss": 0.2728, "step": 12902 }, { "epoch": 2.101127712412979, "grad_norm": 0.16074131429195404, "learning_rate": 1.2476760384612008e-05, "loss": 0.2634, "step": 12903 }, { "epoch": 2.1012905589708097, "grad_norm": 0.14714273810386658, "learning_rate": 1.247266077399018e-05, "loss": 0.3047, "step": 12904 }, { "epoch": 2.1014534055286407, "grad_norm": 0.19964294135570526, "learning_rate": 1.2468561613137363e-05, "loss": 0.2763, "step": 12905 }, { "epoch": 2.1016162520864716, "grad_norm": 0.1905716359615326, "learning_rate": 1.2464462902200718e-05, "loss": 0.2789, "step": 12906 }, { "epoch": 2.1017790986443026, "grad_norm": 0.14312708377838135, "learning_rate": 1.2460364641327427e-05, "loss": 0.2935, "step": 12907 }, { "epoch": 2.1019419452021335, "grad_norm": 0.19072270393371582, "learning_rate": 1.2456266830664601e-05, "loss": 0.2856, "step": 12908 }, { "epoch": 2.102104791759964, "grad_norm": 0.15967516601085663, "learning_rate": 1.245216947035939e-05, "loss": 0.339, "step": 12909 }, { "epoch": 2.102267638317795, "grad_norm": 0.17569859325885773, "learning_rate": 1.2448072560558888e-05, "loss": 0.2512, "step": 12910 }, { "epoch": 2.102430484875626, "grad_norm": 0.15052007138729095, "learning_rate": 1.2443976101410187e-05, "loss": 0.2608, "step": 12911 }, { "epoch": 2.102593331433457, "grad_norm": 0.1301213502883911, "learning_rate": 1.2439880093060355e-05, "loss": 0.2988, "step": 12912 }, { "epoch": 2.102756177991288, "grad_norm": 0.19155164062976837, "learning_rate": 1.2435784535656467e-05, "loss": 0.2582, "step": 12913 }, { "epoch": 2.1029190245491187, "grad_norm": 0.15903539955615997, "learning_rate": 1.2431689429345558e-05, "loss": 0.2601, "step": 12914 }, { "epoch": 2.1030818711069497, "grad_norm": 0.15832743048667908, "learning_rate": 1.2427594774274645e-05, "loss": 0.2616, "step": 12915 }, { "epoch": 2.10324471766478, "grad_norm": 0.1868724822998047, "learning_rate": 1.2423500570590754e-05, "loss": 0.2694, "step": 12916 }, { "epoch": 2.103407564222611, "grad_norm": 0.18131986260414124, "learning_rate": 1.2419406818440876e-05, "loss": 0.2892, "step": 12917 }, { "epoch": 2.103570410780442, "grad_norm": 0.1644407957792282, "learning_rate": 1.2415313517971986e-05, "loss": 0.318, "step": 12918 }, { "epoch": 2.103733257338273, "grad_norm": 0.19004184007644653, "learning_rate": 1.2411220669331034e-05, "loss": 0.2762, "step": 12919 }, { "epoch": 2.103896103896104, "grad_norm": 0.2036786675453186, "learning_rate": 1.240712827266499e-05, "loss": 0.3014, "step": 12920 }, { "epoch": 2.104058950453935, "grad_norm": 0.16662321984767914, "learning_rate": 1.2403036328120765e-05, "loss": 0.2968, "step": 12921 }, { "epoch": 2.104221797011766, "grad_norm": 0.15200969576835632, "learning_rate": 1.239894483584528e-05, "loss": 0.2968, "step": 12922 }, { "epoch": 2.1043846435695968, "grad_norm": 0.17113196849822998, "learning_rate": 1.239485379598542e-05, "loss": 0.241, "step": 12923 }, { "epoch": 2.1045474901274273, "grad_norm": 0.22795379161834717, "learning_rate": 1.2390763208688083e-05, "loss": 0.2949, "step": 12924 }, { "epoch": 2.104710336685258, "grad_norm": 0.2175571769475937, "learning_rate": 1.2386673074100127e-05, "loss": 0.2987, "step": 12925 }, { "epoch": 2.104873183243089, "grad_norm": 0.12593792378902435, "learning_rate": 1.2382583392368388e-05, "loss": 0.2676, "step": 12926 }, { "epoch": 2.10503602980092, "grad_norm": 0.18599648773670197, "learning_rate": 1.2378494163639717e-05, "loss": 0.2556, "step": 12927 }, { "epoch": 2.105198876358751, "grad_norm": 0.20062051713466644, "learning_rate": 1.2374405388060923e-05, "loss": 0.2966, "step": 12928 }, { "epoch": 2.105361722916582, "grad_norm": 0.15962731838226318, "learning_rate": 1.23703170657788e-05, "loss": 0.3317, "step": 12929 }, { "epoch": 2.105524569474413, "grad_norm": 0.16569237411022186, "learning_rate": 1.2366229196940123e-05, "loss": 0.2596, "step": 12930 }, { "epoch": 2.1056874160322434, "grad_norm": 0.14097124338150024, "learning_rate": 1.2362141781691682e-05, "loss": 0.2903, "step": 12931 }, { "epoch": 2.1058502625900744, "grad_norm": 0.13404932618141174, "learning_rate": 1.2358054820180214e-05, "loss": 0.2584, "step": 12932 }, { "epoch": 2.1060131091479053, "grad_norm": 0.1660013347864151, "learning_rate": 1.2353968312552455e-05, "loss": 0.2747, "step": 12933 }, { "epoch": 2.1061759557057362, "grad_norm": 0.1772814691066742, "learning_rate": 1.2349882258955112e-05, "loss": 0.2662, "step": 12934 }, { "epoch": 2.106338802263567, "grad_norm": 0.13547766208648682, "learning_rate": 1.2345796659534905e-05, "loss": 0.2929, "step": 12935 }, { "epoch": 2.106501648821398, "grad_norm": 0.1514846533536911, "learning_rate": 1.2341711514438512e-05, "loss": 0.2487, "step": 12936 }, { "epoch": 2.106664495379229, "grad_norm": 0.16162283718585968, "learning_rate": 1.2337626823812592e-05, "loss": 0.277, "step": 12937 }, { "epoch": 2.10682734193706, "grad_norm": 0.19992490112781525, "learning_rate": 1.2333542587803814e-05, "loss": 0.2591, "step": 12938 }, { "epoch": 2.1069901884948905, "grad_norm": 0.12609773874282837, "learning_rate": 1.232945880655881e-05, "loss": 0.2633, "step": 12939 }, { "epoch": 2.1071530350527214, "grad_norm": 0.1912752091884613, "learning_rate": 1.2325375480224185e-05, "loss": 0.2682, "step": 12940 }, { "epoch": 2.1073158816105524, "grad_norm": 0.12501366436481476, "learning_rate": 1.2321292608946563e-05, "loss": 0.276, "step": 12941 }, { "epoch": 2.1074787281683833, "grad_norm": 0.17614886164665222, "learning_rate": 1.2317210192872525e-05, "loss": 0.2533, "step": 12942 }, { "epoch": 2.1076415747262143, "grad_norm": 0.15847836434841156, "learning_rate": 1.231312823214863e-05, "loss": 0.2544, "step": 12943 }, { "epoch": 2.107804421284045, "grad_norm": 0.14861945807933807, "learning_rate": 1.2309046726921452e-05, "loss": 0.2711, "step": 12944 }, { "epoch": 2.107967267841876, "grad_norm": 0.1713266670703888, "learning_rate": 1.230496567733751e-05, "loss": 0.2257, "step": 12945 }, { "epoch": 2.1081301143997067, "grad_norm": 0.1221042275428772, "learning_rate": 1.2300885083543342e-05, "loss": 0.2724, "step": 12946 }, { "epoch": 2.1082929609575376, "grad_norm": 0.201478511095047, "learning_rate": 1.229680494568545e-05, "loss": 0.2379, "step": 12947 }, { "epoch": 2.1084558075153685, "grad_norm": 0.1797793209552765, "learning_rate": 1.2292725263910313e-05, "loss": 0.2752, "step": 12948 }, { "epoch": 2.1086186540731995, "grad_norm": 0.20635077357292175, "learning_rate": 1.2288646038364415e-05, "loss": 0.263, "step": 12949 }, { "epoch": 2.1087815006310304, "grad_norm": 0.16744783520698547, "learning_rate": 1.2284567269194213e-05, "loss": 0.3144, "step": 12950 }, { "epoch": 2.1089443471888614, "grad_norm": 0.1691606342792511, "learning_rate": 1.2280488956546141e-05, "loss": 0.2613, "step": 12951 }, { "epoch": 2.1091071937466923, "grad_norm": 0.1639632135629654, "learning_rate": 1.2276411100566614e-05, "loss": 0.2839, "step": 12952 }, { "epoch": 2.1092700403045233, "grad_norm": 0.18126744031906128, "learning_rate": 1.2272333701402059e-05, "loss": 0.298, "step": 12953 }, { "epoch": 2.1094328868623538, "grad_norm": 0.20123328268527985, "learning_rate": 1.2268256759198857e-05, "loss": 0.2896, "step": 12954 }, { "epoch": 2.1095957334201847, "grad_norm": 0.18008677661418915, "learning_rate": 1.2264180274103384e-05, "loss": 0.2253, "step": 12955 }, { "epoch": 2.1097585799780156, "grad_norm": 0.15902429819107056, "learning_rate": 1.2260104246261986e-05, "loss": 0.2909, "step": 12956 }, { "epoch": 2.1099214265358466, "grad_norm": 0.20008178055286407, "learning_rate": 1.2256028675821025e-05, "loss": 0.2574, "step": 12957 }, { "epoch": 2.1100842730936775, "grad_norm": 0.42610153555870056, "learning_rate": 1.2251953562926816e-05, "loss": 0.3617, "step": 12958 }, { "epoch": 2.1102471196515085, "grad_norm": 0.15260577201843262, "learning_rate": 1.224787890772566e-05, "loss": 0.2803, "step": 12959 }, { "epoch": 2.1104099662093394, "grad_norm": 0.15922288596630096, "learning_rate": 1.2243804710363865e-05, "loss": 0.2684, "step": 12960 }, { "epoch": 2.1105728127671703, "grad_norm": 0.17220814526081085, "learning_rate": 1.2239730970987704e-05, "loss": 0.2717, "step": 12961 }, { "epoch": 2.110735659325001, "grad_norm": 0.16894514858722687, "learning_rate": 1.2235657689743426e-05, "loss": 0.2499, "step": 12962 }, { "epoch": 2.110898505882832, "grad_norm": 0.16286200284957886, "learning_rate": 1.2231584866777273e-05, "loss": 0.2721, "step": 12963 }, { "epoch": 2.1110613524406627, "grad_norm": 0.18546853959560394, "learning_rate": 1.2227512502235491e-05, "loss": 0.278, "step": 12964 }, { "epoch": 2.1112241989984937, "grad_norm": 0.16386264562606812, "learning_rate": 1.2223440596264274e-05, "loss": 0.2816, "step": 12965 }, { "epoch": 2.1113870455563246, "grad_norm": 0.18763110041618347, "learning_rate": 1.221936914900981e-05, "loss": 0.2958, "step": 12966 }, { "epoch": 2.1115498921141556, "grad_norm": 0.15397749841213226, "learning_rate": 1.2215298160618297e-05, "loss": 0.2954, "step": 12967 }, { "epoch": 2.1117127386719865, "grad_norm": 0.15316757559776306, "learning_rate": 1.2211227631235881e-05, "loss": 0.3054, "step": 12968 }, { "epoch": 2.111875585229817, "grad_norm": 0.19244417548179626, "learning_rate": 1.2207157561008711e-05, "loss": 0.3054, "step": 12969 }, { "epoch": 2.112038431787648, "grad_norm": 0.17474384605884552, "learning_rate": 1.2203087950082904e-05, "loss": 0.2705, "step": 12970 }, { "epoch": 2.112201278345479, "grad_norm": 0.1596837043762207, "learning_rate": 1.219901879860459e-05, "loss": 0.2548, "step": 12971 }, { "epoch": 2.11236412490331, "grad_norm": 0.1472437083721161, "learning_rate": 1.2194950106719849e-05, "loss": 0.2365, "step": 12972 }, { "epoch": 2.1125269714611408, "grad_norm": 0.24701419472694397, "learning_rate": 1.219088187457477e-05, "loss": 0.3371, "step": 12973 }, { "epoch": 2.1126898180189717, "grad_norm": 0.17088083922863007, "learning_rate": 1.2186814102315397e-05, "loss": 0.2394, "step": 12974 }, { "epoch": 2.1128526645768027, "grad_norm": 0.19797220826148987, "learning_rate": 1.2182746790087795e-05, "loss": 0.2797, "step": 12975 }, { "epoch": 2.1130155111346336, "grad_norm": 0.13843217492103577, "learning_rate": 1.2178679938037988e-05, "loss": 0.2516, "step": 12976 }, { "epoch": 2.113178357692464, "grad_norm": 0.17800085246562958, "learning_rate": 1.2174613546311972e-05, "loss": 0.2967, "step": 12977 }, { "epoch": 2.113341204250295, "grad_norm": 0.15426352620124817, "learning_rate": 1.217054761505577e-05, "loss": 0.2339, "step": 12978 }, { "epoch": 2.113504050808126, "grad_norm": 0.16378653049468994, "learning_rate": 1.2166482144415342e-05, "loss": 0.276, "step": 12979 }, { "epoch": 2.113666897365957, "grad_norm": 0.1656707227230072, "learning_rate": 1.2162417134536647e-05, "loss": 0.2687, "step": 12980 }, { "epoch": 2.113829743923788, "grad_norm": 0.17632430791854858, "learning_rate": 1.215835258556565e-05, "loss": 0.3019, "step": 12981 }, { "epoch": 2.113992590481619, "grad_norm": 0.1399284452199936, "learning_rate": 1.2154288497648267e-05, "loss": 0.2568, "step": 12982 }, { "epoch": 2.1141554370394497, "grad_norm": 0.15413349866867065, "learning_rate": 1.2150224870930408e-05, "loss": 0.2963, "step": 12983 }, { "epoch": 2.1143182835972807, "grad_norm": 0.1887083351612091, "learning_rate": 1.2146161705557981e-05, "loss": 0.2583, "step": 12984 }, { "epoch": 2.114481130155111, "grad_norm": 0.14686740934848785, "learning_rate": 1.2142099001676852e-05, "loss": 0.264, "step": 12985 }, { "epoch": 2.114643976712942, "grad_norm": 0.1915006935596466, "learning_rate": 1.2138036759432903e-05, "loss": 0.2336, "step": 12986 }, { "epoch": 2.114806823270773, "grad_norm": 0.14691497385501862, "learning_rate": 1.2133974978971968e-05, "loss": 0.2719, "step": 12987 }, { "epoch": 2.114969669828604, "grad_norm": 0.1359160840511322, "learning_rate": 1.212991366043987e-05, "loss": 0.2579, "step": 12988 }, { "epoch": 2.115132516386435, "grad_norm": 0.1939825862646103, "learning_rate": 1.212585280398244e-05, "loss": 0.2908, "step": 12989 }, { "epoch": 2.115295362944266, "grad_norm": 0.1856224685907364, "learning_rate": 1.2121792409745467e-05, "loss": 0.279, "step": 12990 }, { "epoch": 2.115458209502097, "grad_norm": 0.2045823633670807, "learning_rate": 1.2117732477874732e-05, "loss": 0.2991, "step": 12991 }, { "epoch": 2.1156210560599273, "grad_norm": 0.14892181754112244, "learning_rate": 1.2113673008515986e-05, "loss": 0.2911, "step": 12992 }, { "epoch": 2.1157839026177583, "grad_norm": 0.15882360935211182, "learning_rate": 1.2109614001814999e-05, "loss": 0.2785, "step": 12993 }, { "epoch": 2.1159467491755892, "grad_norm": 0.17255403101444244, "learning_rate": 1.2105555457917487e-05, "loss": 0.2745, "step": 12994 }, { "epoch": 2.11610959573342, "grad_norm": 0.15629473328590393, "learning_rate": 1.2101497376969168e-05, "loss": 0.324, "step": 12995 }, { "epoch": 2.116272442291251, "grad_norm": 0.18598458170890808, "learning_rate": 1.209743975911573e-05, "loss": 0.2937, "step": 12996 }, { "epoch": 2.116435288849082, "grad_norm": 0.2165430337190628, "learning_rate": 1.209338260450287e-05, "loss": 0.2712, "step": 12997 }, { "epoch": 2.116598135406913, "grad_norm": 0.14357692003250122, "learning_rate": 1.2089325913276244e-05, "loss": 0.2517, "step": 12998 }, { "epoch": 2.1167609819647435, "grad_norm": 0.14560285210609436, "learning_rate": 1.2085269685581487e-05, "loss": 0.2349, "step": 12999 }, { "epoch": 2.1169238285225744, "grad_norm": 0.18148523569107056, "learning_rate": 1.2081213921564255e-05, "loss": 0.3137, "step": 13000 }, { "epoch": 2.1170866750804054, "grad_norm": 0.1812438666820526, "learning_rate": 1.207715862137015e-05, "loss": 0.3031, "step": 13001 }, { "epoch": 2.1172495216382363, "grad_norm": 0.1637285351753235, "learning_rate": 1.2073103785144766e-05, "loss": 0.2978, "step": 13002 }, { "epoch": 2.1174123681960673, "grad_norm": 0.19650810956954956, "learning_rate": 1.2069049413033675e-05, "loss": 0.3006, "step": 13003 }, { "epoch": 2.117575214753898, "grad_norm": 0.19535616040229797, "learning_rate": 1.2064995505182464e-05, "loss": 0.2555, "step": 13004 }, { "epoch": 2.117738061311729, "grad_norm": 0.16472093760967255, "learning_rate": 1.2060942061736669e-05, "loss": 0.2582, "step": 13005 }, { "epoch": 2.11790090786956, "grad_norm": 0.1645928919315338, "learning_rate": 1.205688908284181e-05, "loss": 0.2695, "step": 13006 }, { "epoch": 2.1180637544273906, "grad_norm": 0.19189125299453735, "learning_rate": 1.2052836568643422e-05, "loss": 0.3174, "step": 13007 }, { "epoch": 2.1182266009852215, "grad_norm": 0.16324841976165771, "learning_rate": 1.2048784519286991e-05, "loss": 0.2318, "step": 13008 }, { "epoch": 2.1183894475430525, "grad_norm": 0.218971848487854, "learning_rate": 1.2044732934917999e-05, "loss": 0.3065, "step": 13009 }, { "epoch": 2.1185522941008834, "grad_norm": 0.18164406716823578, "learning_rate": 1.2040681815681897e-05, "loss": 0.284, "step": 13010 }, { "epoch": 2.1187151406587144, "grad_norm": 0.1282738745212555, "learning_rate": 1.203663116172416e-05, "loss": 0.2724, "step": 13011 }, { "epoch": 2.1188779872165453, "grad_norm": 0.1360360085964203, "learning_rate": 1.20325809731902e-05, "loss": 0.2151, "step": 13012 }, { "epoch": 2.1190408337743762, "grad_norm": 0.170199915766716, "learning_rate": 1.2028531250225437e-05, "loss": 0.3013, "step": 13013 }, { "epoch": 2.119203680332207, "grad_norm": 0.17493389546871185, "learning_rate": 1.2024481992975253e-05, "loss": 0.2904, "step": 13014 }, { "epoch": 2.1193665268900377, "grad_norm": 0.19254012405872345, "learning_rate": 1.2020433201585052e-05, "loss": 0.2882, "step": 13015 }, { "epoch": 2.1195293734478686, "grad_norm": 0.19696243107318878, "learning_rate": 1.2016384876200187e-05, "loss": 0.291, "step": 13016 }, { "epoch": 2.1196922200056996, "grad_norm": 0.1608564257621765, "learning_rate": 1.2012337016965997e-05, "loss": 0.3607, "step": 13017 }, { "epoch": 2.1198550665635305, "grad_norm": 0.1869172602891922, "learning_rate": 1.2008289624027827e-05, "loss": 0.2931, "step": 13018 }, { "epoch": 2.1200179131213615, "grad_norm": 0.22836188971996307, "learning_rate": 1.2004242697530984e-05, "loss": 0.2772, "step": 13019 }, { "epoch": 2.1201807596791924, "grad_norm": 0.14483685791492462, "learning_rate": 1.200019623762076e-05, "loss": 0.2649, "step": 13020 }, { "epoch": 2.1203436062370233, "grad_norm": 0.15236635506153107, "learning_rate": 1.1996150244442442e-05, "loss": 0.2676, "step": 13021 }, { "epoch": 2.120506452794854, "grad_norm": 0.13391122221946716, "learning_rate": 1.1992104718141285e-05, "loss": 0.2794, "step": 13022 }, { "epoch": 2.1206692993526848, "grad_norm": 0.1938079446554184, "learning_rate": 1.198805965886255e-05, "loss": 0.3388, "step": 13023 }, { "epoch": 2.1208321459105157, "grad_norm": 0.17105185985565186, "learning_rate": 1.1984015066751458e-05, "loss": 0.2601, "step": 13024 }, { "epoch": 2.1209949924683467, "grad_norm": 0.19576996564865112, "learning_rate": 1.1979970941953211e-05, "loss": 0.2867, "step": 13025 }, { "epoch": 2.1211578390261776, "grad_norm": 0.1859215646982193, "learning_rate": 1.1975927284613029e-05, "loss": 0.2701, "step": 13026 }, { "epoch": 2.1213206855840085, "grad_norm": 0.1622031033039093, "learning_rate": 1.1971884094876077e-05, "loss": 0.3089, "step": 13027 }, { "epoch": 2.1214835321418395, "grad_norm": 0.171758770942688, "learning_rate": 1.1967841372887508e-05, "loss": 0.2528, "step": 13028 }, { "epoch": 2.1216463786996704, "grad_norm": 0.20749931037425995, "learning_rate": 1.1963799118792493e-05, "loss": 0.2696, "step": 13029 }, { "epoch": 2.121809225257501, "grad_norm": 0.167218416929245, "learning_rate": 1.1959757332736144e-05, "loss": 0.2869, "step": 13030 }, { "epoch": 2.121972071815332, "grad_norm": 0.1539057195186615, "learning_rate": 1.1955716014863577e-05, "loss": 0.289, "step": 13031 }, { "epoch": 2.122134918373163, "grad_norm": 0.19416506588459015, "learning_rate": 1.1951675165319875e-05, "loss": 0.2761, "step": 13032 }, { "epoch": 2.1222977649309938, "grad_norm": 0.14399689435958862, "learning_rate": 1.1947634784250142e-05, "loss": 0.2642, "step": 13033 }, { "epoch": 2.1224606114888247, "grad_norm": 0.25096768140792847, "learning_rate": 1.1943594871799424e-05, "loss": 0.2832, "step": 13034 }, { "epoch": 2.1226234580466556, "grad_norm": 0.18710841238498688, "learning_rate": 1.1939555428112768e-05, "loss": 0.3167, "step": 13035 }, { "epoch": 2.1227863046044866, "grad_norm": 0.17904901504516602, "learning_rate": 1.1935516453335194e-05, "loss": 0.2785, "step": 13036 }, { "epoch": 2.1229491511623175, "grad_norm": 0.1894155889749527, "learning_rate": 1.193147794761173e-05, "loss": 0.253, "step": 13037 }, { "epoch": 2.123111997720148, "grad_norm": 0.16127604246139526, "learning_rate": 1.1927439911087363e-05, "loss": 0.3002, "step": 13038 }, { "epoch": 2.123274844277979, "grad_norm": 0.18666549026966095, "learning_rate": 1.192340234390706e-05, "loss": 0.2544, "step": 13039 }, { "epoch": 2.12343769083581, "grad_norm": 0.18648268282413483, "learning_rate": 1.1919365246215797e-05, "loss": 0.2635, "step": 13040 }, { "epoch": 2.123600537393641, "grad_norm": 0.17132888734340668, "learning_rate": 1.1915328618158517e-05, "loss": 0.2523, "step": 13041 }, { "epoch": 2.123763383951472, "grad_norm": 0.1723729372024536, "learning_rate": 1.191129245988014e-05, "loss": 0.2916, "step": 13042 }, { "epoch": 2.1239262305093027, "grad_norm": 0.1670254021883011, "learning_rate": 1.190725677152557e-05, "loss": 0.2879, "step": 13043 }, { "epoch": 2.1240890770671337, "grad_norm": 0.17250266671180725, "learning_rate": 1.190322155323972e-05, "loss": 0.272, "step": 13044 }, { "epoch": 2.124251923624964, "grad_norm": 0.1699884682893753, "learning_rate": 1.189918680516745e-05, "loss": 0.2625, "step": 13045 }, { "epoch": 2.124414770182795, "grad_norm": 0.142755925655365, "learning_rate": 1.189515252745363e-05, "loss": 0.2689, "step": 13046 }, { "epoch": 2.124577616740626, "grad_norm": 0.1665550172328949, "learning_rate": 1.1891118720243085e-05, "loss": 0.2511, "step": 13047 }, { "epoch": 2.124740463298457, "grad_norm": 0.18048471212387085, "learning_rate": 1.1887085383680666e-05, "loss": 0.2783, "step": 13048 }, { "epoch": 2.124903309856288, "grad_norm": 0.1578512191772461, "learning_rate": 1.1883052517911164e-05, "loss": 0.3124, "step": 13049 }, { "epoch": 2.125066156414119, "grad_norm": 0.170707568526268, "learning_rate": 1.187902012307937e-05, "loss": 0.2519, "step": 13050 }, { "epoch": 2.12522900297195, "grad_norm": 0.20829492807388306, "learning_rate": 1.1874988199330076e-05, "loss": 0.2774, "step": 13051 }, { "epoch": 2.1253918495297803, "grad_norm": 0.1769496649503708, "learning_rate": 1.1870956746808026e-05, "loss": 0.2905, "step": 13052 }, { "epoch": 2.1255546960876113, "grad_norm": 0.17370393872261047, "learning_rate": 1.1866925765657965e-05, "loss": 0.2975, "step": 13053 }, { "epoch": 2.125717542645442, "grad_norm": 0.1839071363210678, "learning_rate": 1.1862895256024608e-05, "loss": 0.2777, "step": 13054 }, { "epoch": 2.125880389203273, "grad_norm": 0.1599501222372055, "learning_rate": 1.1858865218052681e-05, "loss": 0.2854, "step": 13055 }, { "epoch": 2.126043235761104, "grad_norm": 0.1910182684659958, "learning_rate": 1.1854835651886864e-05, "loss": 0.2591, "step": 13056 }, { "epoch": 2.126206082318935, "grad_norm": 0.20238347351551056, "learning_rate": 1.185080655767182e-05, "loss": 0.265, "step": 13057 }, { "epoch": 2.126368928876766, "grad_norm": 0.1526806801557541, "learning_rate": 1.1846777935552227e-05, "loss": 0.3031, "step": 13058 }, { "epoch": 2.126531775434597, "grad_norm": 0.1784805804491043, "learning_rate": 1.1842749785672708e-05, "loss": 0.2541, "step": 13059 }, { "epoch": 2.126694621992428, "grad_norm": 0.18821628391742706, "learning_rate": 1.1838722108177899e-05, "loss": 0.3026, "step": 13060 }, { "epoch": 2.1268574685502584, "grad_norm": 0.19492733478546143, "learning_rate": 1.1834694903212398e-05, "loss": 0.317, "step": 13061 }, { "epoch": 2.1270203151080893, "grad_norm": 0.17493557929992676, "learning_rate": 1.1830668170920785e-05, "loss": 0.3037, "step": 13062 }, { "epoch": 2.1271831616659203, "grad_norm": 0.14837780594825745, "learning_rate": 1.182664191144765e-05, "loss": 0.248, "step": 13063 }, { "epoch": 2.127346008223751, "grad_norm": 0.1332523226737976, "learning_rate": 1.182261612493754e-05, "loss": 0.2552, "step": 13064 }, { "epoch": 2.127508854781582, "grad_norm": 0.19281254708766937, "learning_rate": 1.1818590811534982e-05, "loss": 0.2913, "step": 13065 }, { "epoch": 2.127671701339413, "grad_norm": 0.1897236406803131, "learning_rate": 1.1814565971384514e-05, "loss": 0.3165, "step": 13066 }, { "epoch": 2.127834547897244, "grad_norm": 0.1943107396364212, "learning_rate": 1.1810541604630634e-05, "loss": 0.2947, "step": 13067 }, { "epoch": 2.1279973944550745, "grad_norm": 0.16062840819358826, "learning_rate": 1.180651771141782e-05, "loss": 0.3459, "step": 13068 }, { "epoch": 2.1281602410129055, "grad_norm": 0.14875422418117523, "learning_rate": 1.1802494291890553e-05, "loss": 0.2877, "step": 13069 }, { "epoch": 2.1283230875707364, "grad_norm": 0.19380946457386017, "learning_rate": 1.1798471346193285e-05, "loss": 0.2756, "step": 13070 }, { "epoch": 2.1284859341285673, "grad_norm": 0.15990477800369263, "learning_rate": 1.1794448874470448e-05, "loss": 0.2606, "step": 13071 }, { "epoch": 2.1286487806863983, "grad_norm": 0.15648281574249268, "learning_rate": 1.179042687686645e-05, "loss": 0.2778, "step": 13072 }, { "epoch": 2.1288116272442292, "grad_norm": 0.18234528601169586, "learning_rate": 1.1786405353525718e-05, "loss": 0.2541, "step": 13073 }, { "epoch": 2.12897447380206, "grad_norm": 0.16970033943653107, "learning_rate": 1.178238430459262e-05, "loss": 0.2715, "step": 13074 }, { "epoch": 2.1291373203598907, "grad_norm": 0.1521478146314621, "learning_rate": 1.1778363730211525e-05, "loss": 0.277, "step": 13075 }, { "epoch": 2.1293001669177216, "grad_norm": 0.1521328091621399, "learning_rate": 1.1774343630526777e-05, "loss": 0.2457, "step": 13076 }, { "epoch": 2.1294630134755526, "grad_norm": 0.173697829246521, "learning_rate": 1.177032400568273e-05, "loss": 0.2596, "step": 13077 }, { "epoch": 2.1296258600333835, "grad_norm": 0.13414081931114197, "learning_rate": 1.1766304855823684e-05, "loss": 0.2377, "step": 13078 }, { "epoch": 2.1297887065912144, "grad_norm": 0.13131803274154663, "learning_rate": 1.1762286181093934e-05, "loss": 0.2492, "step": 13079 }, { "epoch": 2.1299515531490454, "grad_norm": 0.16003510355949402, "learning_rate": 1.1758267981637782e-05, "loss": 0.2725, "step": 13080 }, { "epoch": 2.1301143997068763, "grad_norm": 0.16428419947624207, "learning_rate": 1.1754250257599484e-05, "loss": 0.2866, "step": 13081 }, { "epoch": 2.1302772462647073, "grad_norm": 0.19106946885585785, "learning_rate": 1.1750233009123287e-05, "loss": 0.3397, "step": 13082 }, { "epoch": 2.1304400928225378, "grad_norm": 0.17513848841190338, "learning_rate": 1.174621623635341e-05, "loss": 0.311, "step": 13083 }, { "epoch": 2.1306029393803687, "grad_norm": 0.17266753315925598, "learning_rate": 1.1742199939434092e-05, "loss": 0.2679, "step": 13084 }, { "epoch": 2.1307657859381997, "grad_norm": 0.14508271217346191, "learning_rate": 1.1738184118509515e-05, "loss": 0.3051, "step": 13085 }, { "epoch": 2.1309286324960306, "grad_norm": 0.19861769676208496, "learning_rate": 1.1734168773723863e-05, "loss": 0.3046, "step": 13086 }, { "epoch": 2.1310914790538615, "grad_norm": 0.19989906251430511, "learning_rate": 1.1730153905221288e-05, "loss": 0.2926, "step": 13087 }, { "epoch": 2.1312543256116925, "grad_norm": 0.151119664311409, "learning_rate": 1.1726139513145956e-05, "loss": 0.2618, "step": 13088 }, { "epoch": 2.1314171721695234, "grad_norm": 0.13311703503131866, "learning_rate": 1.1722125597641983e-05, "loss": 0.2667, "step": 13089 }, { "epoch": 2.1315800187273544, "grad_norm": 0.13132505118846893, "learning_rate": 1.1718112158853475e-05, "loss": 0.2804, "step": 13090 }, { "epoch": 2.131742865285185, "grad_norm": 0.15146368741989136, "learning_rate": 1.1714099196924541e-05, "loss": 0.3001, "step": 13091 }, { "epoch": 2.131905711843016, "grad_norm": 0.15353350341320038, "learning_rate": 1.1710086711999255e-05, "loss": 0.249, "step": 13092 }, { "epoch": 2.1320685584008467, "grad_norm": 0.17865876853466034, "learning_rate": 1.170607470422167e-05, "loss": 0.3343, "step": 13093 }, { "epoch": 2.1322314049586777, "grad_norm": 0.14987927675247192, "learning_rate": 1.1702063173735825e-05, "loss": 0.2628, "step": 13094 }, { "epoch": 2.1323942515165086, "grad_norm": 0.16964754462242126, "learning_rate": 1.1698052120685765e-05, "loss": 0.2775, "step": 13095 }, { "epoch": 2.1325570980743396, "grad_norm": 0.16747869551181793, "learning_rate": 1.1694041545215476e-05, "loss": 0.2581, "step": 13096 }, { "epoch": 2.1327199446321705, "grad_norm": 0.22517527639865875, "learning_rate": 1.169003144746897e-05, "loss": 0.2885, "step": 13097 }, { "epoch": 2.132882791190001, "grad_norm": 0.12428254634141922, "learning_rate": 1.1686021827590213e-05, "loss": 0.2492, "step": 13098 }, { "epoch": 2.133045637747832, "grad_norm": 0.20776985585689545, "learning_rate": 1.1682012685723153e-05, "loss": 0.2793, "step": 13099 }, { "epoch": 2.133208484305663, "grad_norm": 0.2142394483089447, "learning_rate": 1.1678004022011752e-05, "loss": 0.2919, "step": 13100 }, { "epoch": 2.133371330863494, "grad_norm": 0.17768386006355286, "learning_rate": 1.1673995836599907e-05, "loss": 0.3177, "step": 13101 }, { "epoch": 2.133534177421325, "grad_norm": 0.16917818784713745, "learning_rate": 1.1669988129631549e-05, "loss": 0.315, "step": 13102 }, { "epoch": 2.1336970239791557, "grad_norm": 0.1500416100025177, "learning_rate": 1.1665980901250554e-05, "loss": 0.2565, "step": 13103 }, { "epoch": 2.1338598705369867, "grad_norm": 0.14777715504169464, "learning_rate": 1.1661974151600793e-05, "loss": 0.2716, "step": 13104 }, { "epoch": 2.1340227170948176, "grad_norm": 0.16448169946670532, "learning_rate": 1.1657967880826112e-05, "loss": 0.2606, "step": 13105 }, { "epoch": 2.134185563652648, "grad_norm": 0.19569288194179535, "learning_rate": 1.165396208907037e-05, "loss": 0.2769, "step": 13106 }, { "epoch": 2.134348410210479, "grad_norm": 0.20419509708881378, "learning_rate": 1.1649956776477373e-05, "loss": 0.2787, "step": 13107 }, { "epoch": 2.13451125676831, "grad_norm": 0.12966203689575195, "learning_rate": 1.1645951943190917e-05, "loss": 0.2453, "step": 13108 }, { "epoch": 2.134674103326141, "grad_norm": 0.18631093204021454, "learning_rate": 1.1641947589354804e-05, "loss": 0.2278, "step": 13109 }, { "epoch": 2.134836949883972, "grad_norm": 0.17929325997829437, "learning_rate": 1.1637943715112795e-05, "loss": 0.2678, "step": 13110 }, { "epoch": 2.134999796441803, "grad_norm": 0.21525715291500092, "learning_rate": 1.1633940320608642e-05, "loss": 0.3008, "step": 13111 }, { "epoch": 2.1351626429996338, "grad_norm": 0.1663842648267746, "learning_rate": 1.1629937405986066e-05, "loss": 0.3204, "step": 13112 }, { "epoch": 2.1353254895574647, "grad_norm": 0.18326331675052643, "learning_rate": 1.1625934971388805e-05, "loss": 0.2454, "step": 13113 }, { "epoch": 2.135488336115295, "grad_norm": 0.19592924416065216, "learning_rate": 1.1621933016960549e-05, "loss": 0.2679, "step": 13114 }, { "epoch": 2.135651182673126, "grad_norm": 0.1994594782590866, "learning_rate": 1.1617931542844979e-05, "loss": 0.2737, "step": 13115 }, { "epoch": 2.135814029230957, "grad_norm": 0.24136260151863098, "learning_rate": 1.161393054918575e-05, "loss": 0.2704, "step": 13116 }, { "epoch": 2.135976875788788, "grad_norm": 0.1726086288690567, "learning_rate": 1.1609930036126532e-05, "loss": 0.2728, "step": 13117 }, { "epoch": 2.136139722346619, "grad_norm": 0.1665239781141281, "learning_rate": 1.160593000381094e-05, "loss": 0.2673, "step": 13118 }, { "epoch": 2.13630256890445, "grad_norm": 0.17311759293079376, "learning_rate": 1.1601930452382584e-05, "loss": 0.2709, "step": 13119 }, { "epoch": 2.136465415462281, "grad_norm": 0.16314101219177246, "learning_rate": 1.1597931381985075e-05, "loss": 0.2736, "step": 13120 }, { "epoch": 2.1366282620201114, "grad_norm": 0.1586960107088089, "learning_rate": 1.1593932792761983e-05, "loss": 0.2565, "step": 13121 }, { "epoch": 2.1367911085779423, "grad_norm": 0.17782220244407654, "learning_rate": 1.1589934684856869e-05, "loss": 0.2917, "step": 13122 }, { "epoch": 2.1369539551357732, "grad_norm": 0.14387227594852448, "learning_rate": 1.1585937058413266e-05, "loss": 0.2884, "step": 13123 }, { "epoch": 2.137116801693604, "grad_norm": 0.15444380044937134, "learning_rate": 1.1581939913574724e-05, "loss": 0.2776, "step": 13124 }, { "epoch": 2.137279648251435, "grad_norm": 0.15820646286010742, "learning_rate": 1.1577943250484741e-05, "loss": 0.2508, "step": 13125 }, { "epoch": 2.137442494809266, "grad_norm": 0.1762145757675171, "learning_rate": 1.1573947069286809e-05, "loss": 0.2734, "step": 13126 }, { "epoch": 2.137605341367097, "grad_norm": 0.15158617496490479, "learning_rate": 1.156995137012439e-05, "loss": 0.2868, "step": 13127 }, { "epoch": 2.1377681879249275, "grad_norm": 0.25433632731437683, "learning_rate": 1.1565956153140967e-05, "loss": 0.3055, "step": 13128 }, { "epoch": 2.1379310344827585, "grad_norm": 0.1426524519920349, "learning_rate": 1.1561961418479967e-05, "loss": 0.2494, "step": 13129 }, { "epoch": 2.1380938810405894, "grad_norm": 0.14283843338489532, "learning_rate": 1.1557967166284805e-05, "loss": 0.2822, "step": 13130 }, { "epoch": 2.1382567275984203, "grad_norm": 0.14546406269073486, "learning_rate": 1.1553973396698903e-05, "loss": 0.2723, "step": 13131 }, { "epoch": 2.1384195741562513, "grad_norm": 0.16035370528697968, "learning_rate": 1.1549980109865642e-05, "loss": 0.2515, "step": 13132 }, { "epoch": 2.138582420714082, "grad_norm": 0.17238642275333405, "learning_rate": 1.1545987305928394e-05, "loss": 0.2692, "step": 13133 }, { "epoch": 2.138745267271913, "grad_norm": 0.18348242342472076, "learning_rate": 1.1541994985030499e-05, "loss": 0.2589, "step": 13134 }, { "epoch": 2.138908113829744, "grad_norm": 0.19741687178611755, "learning_rate": 1.1538003147315315e-05, "loss": 0.3057, "step": 13135 }, { "epoch": 2.1390709603875746, "grad_norm": 0.2550640106201172, "learning_rate": 1.1534011792926141e-05, "loss": 0.3142, "step": 13136 }, { "epoch": 2.1392338069454055, "grad_norm": 0.1614769995212555, "learning_rate": 1.1530020922006301e-05, "loss": 0.27, "step": 13137 }, { "epoch": 2.1393966535032365, "grad_norm": 0.19634433090686798, "learning_rate": 1.1526030534699065e-05, "loss": 0.2657, "step": 13138 }, { "epoch": 2.1395595000610674, "grad_norm": 0.15342243015766144, "learning_rate": 1.1522040631147693e-05, "loss": 0.2373, "step": 13139 }, { "epoch": 2.1397223466188984, "grad_norm": 0.15784938633441925, "learning_rate": 1.1518051211495453e-05, "loss": 0.2612, "step": 13140 }, { "epoch": 2.1398851931767293, "grad_norm": 0.13494214415550232, "learning_rate": 1.151406227588556e-05, "loss": 0.2536, "step": 13141 }, { "epoch": 2.1400480397345603, "grad_norm": 0.1746339499950409, "learning_rate": 1.1510073824461243e-05, "loss": 0.2531, "step": 13142 }, { "epoch": 2.140210886292391, "grad_norm": 0.14564546942710876, "learning_rate": 1.1506085857365693e-05, "loss": 0.297, "step": 13143 }, { "epoch": 2.1403737328502217, "grad_norm": 0.18001359701156616, "learning_rate": 1.150209837474209e-05, "loss": 0.2575, "step": 13144 }, { "epoch": 2.1405365794080526, "grad_norm": 0.16663619875907898, "learning_rate": 1.1498111376733586e-05, "loss": 0.2729, "step": 13145 }, { "epoch": 2.1406994259658836, "grad_norm": 0.16568273305892944, "learning_rate": 1.1494124863483347e-05, "loss": 0.277, "step": 13146 }, { "epoch": 2.1408622725237145, "grad_norm": 0.15199479460716248, "learning_rate": 1.149013883513449e-05, "loss": 0.2807, "step": 13147 }, { "epoch": 2.1410251190815455, "grad_norm": 0.15948207676410675, "learning_rate": 1.1486153291830123e-05, "loss": 0.3365, "step": 13148 }, { "epoch": 2.1411879656393764, "grad_norm": 0.16194464266300201, "learning_rate": 1.1482168233713333e-05, "loss": 0.2758, "step": 13149 }, { "epoch": 2.1413508121972074, "grad_norm": 0.16950242221355438, "learning_rate": 1.1478183660927217e-05, "loss": 0.2709, "step": 13150 }, { "epoch": 2.141513658755038, "grad_norm": 0.1726447194814682, "learning_rate": 1.1474199573614817e-05, "loss": 0.2929, "step": 13151 }, { "epoch": 2.141676505312869, "grad_norm": 0.15227212011814117, "learning_rate": 1.1470215971919165e-05, "loss": 0.3043, "step": 13152 }, { "epoch": 2.1418393518706997, "grad_norm": 0.11599738150835037, "learning_rate": 1.1466232855983309e-05, "loss": 0.2453, "step": 13153 }, { "epoch": 2.1420021984285307, "grad_norm": 0.2229309380054474, "learning_rate": 1.1462250225950242e-05, "loss": 0.2521, "step": 13154 }, { "epoch": 2.1421650449863616, "grad_norm": 0.1850815713405609, "learning_rate": 1.1458268081962951e-05, "loss": 0.2912, "step": 13155 }, { "epoch": 2.1423278915441926, "grad_norm": 0.19433283805847168, "learning_rate": 1.1454286424164396e-05, "loss": 0.2493, "step": 13156 }, { "epoch": 2.1424907381020235, "grad_norm": 0.18426713347434998, "learning_rate": 1.1450305252697555e-05, "loss": 0.2982, "step": 13157 }, { "epoch": 2.1426535846598544, "grad_norm": 0.1259365826845169, "learning_rate": 1.1446324567705349e-05, "loss": 0.3112, "step": 13158 }, { "epoch": 2.142816431217685, "grad_norm": 0.15480609238147736, "learning_rate": 1.1442344369330691e-05, "loss": 0.2842, "step": 13159 }, { "epoch": 2.142979277775516, "grad_norm": 0.17891472578048706, "learning_rate": 1.14383646577165e-05, "loss": 0.288, "step": 13160 }, { "epoch": 2.143142124333347, "grad_norm": 0.18235601484775543, "learning_rate": 1.1434385433005649e-05, "loss": 0.2756, "step": 13161 }, { "epoch": 2.1433049708911778, "grad_norm": 0.1884503960609436, "learning_rate": 1.1430406695341003e-05, "loss": 0.2982, "step": 13162 }, { "epoch": 2.1434678174490087, "grad_norm": 0.18196837604045868, "learning_rate": 1.1426428444865405e-05, "loss": 0.2966, "step": 13163 }, { "epoch": 2.1436306640068397, "grad_norm": 0.14045867323875427, "learning_rate": 1.1422450681721702e-05, "loss": 0.2494, "step": 13164 }, { "epoch": 2.1437935105646706, "grad_norm": 0.16690672934055328, "learning_rate": 1.14184734060527e-05, "loss": 0.2417, "step": 13165 }, { "epoch": 2.1439563571225015, "grad_norm": 0.18115869164466858, "learning_rate": 1.1414496618001191e-05, "loss": 0.2829, "step": 13166 }, { "epoch": 2.144119203680332, "grad_norm": 0.175628662109375, "learning_rate": 1.141052031770995e-05, "loss": 0.2378, "step": 13167 }, { "epoch": 2.144282050238163, "grad_norm": 0.16291718184947968, "learning_rate": 1.1406544505321754e-05, "loss": 0.2948, "step": 13168 }, { "epoch": 2.144444896795994, "grad_norm": 0.1413535326719284, "learning_rate": 1.1402569180979336e-05, "loss": 0.3059, "step": 13169 }, { "epoch": 2.144607743353825, "grad_norm": 0.19206903874874115, "learning_rate": 1.1398594344825414e-05, "loss": 0.2893, "step": 13170 }, { "epoch": 2.144770589911656, "grad_norm": 0.15215060114860535, "learning_rate": 1.1394619997002717e-05, "loss": 0.2564, "step": 13171 }, { "epoch": 2.1449334364694868, "grad_norm": 0.1676567792892456, "learning_rate": 1.1390646137653923e-05, "loss": 0.2521, "step": 13172 }, { "epoch": 2.1450962830273177, "grad_norm": 0.13064688444137573, "learning_rate": 1.13866727669217e-05, "loss": 0.2422, "step": 13173 }, { "epoch": 2.145259129585148, "grad_norm": 0.17192089557647705, "learning_rate": 1.138269988494872e-05, "loss": 0.2645, "step": 13174 }, { "epoch": 2.145421976142979, "grad_norm": 0.1728566586971283, "learning_rate": 1.1378727491877612e-05, "loss": 0.3139, "step": 13175 }, { "epoch": 2.14558482270081, "grad_norm": 0.18208995461463928, "learning_rate": 1.1374755587850988e-05, "loss": 0.2335, "step": 13176 }, { "epoch": 2.145747669258641, "grad_norm": 0.14946366846561432, "learning_rate": 1.137078417301147e-05, "loss": 0.2718, "step": 13177 }, { "epoch": 2.145910515816472, "grad_norm": 0.1466558873653412, "learning_rate": 1.1366813247501626e-05, "loss": 0.2587, "step": 13178 }, { "epoch": 2.146073362374303, "grad_norm": 0.17392584681510925, "learning_rate": 1.1362842811464039e-05, "loss": 0.3004, "step": 13179 }, { "epoch": 2.146236208932134, "grad_norm": 0.20646564662456512, "learning_rate": 1.1358872865041256e-05, "loss": 0.2874, "step": 13180 }, { "epoch": 2.1463990554899643, "grad_norm": 0.14030736684799194, "learning_rate": 1.1354903408375797e-05, "loss": 0.242, "step": 13181 }, { "epoch": 2.1465619020477953, "grad_norm": 0.1671227663755417, "learning_rate": 1.1350934441610198e-05, "loss": 0.2517, "step": 13182 }, { "epoch": 2.1467247486056262, "grad_norm": 0.18647009134292603, "learning_rate": 1.1346965964886944e-05, "loss": 0.3026, "step": 13183 }, { "epoch": 2.146887595163457, "grad_norm": 0.1835269033908844, "learning_rate": 1.1342997978348519e-05, "loss": 0.2751, "step": 13184 }, { "epoch": 2.147050441721288, "grad_norm": 0.16471265256404877, "learning_rate": 1.1339030482137376e-05, "loss": 0.2516, "step": 13185 }, { "epoch": 2.147213288279119, "grad_norm": 0.17605186998844147, "learning_rate": 1.1335063476395975e-05, "loss": 0.2902, "step": 13186 }, { "epoch": 2.14737613483695, "grad_norm": 0.1817743331193924, "learning_rate": 1.1331096961266736e-05, "loss": 0.2833, "step": 13187 }, { "epoch": 2.147538981394781, "grad_norm": 0.15522915124893188, "learning_rate": 1.132713093689207e-05, "loss": 0.253, "step": 13188 }, { "epoch": 2.147701827952612, "grad_norm": 0.1789834350347519, "learning_rate": 1.132316540341436e-05, "loss": 0.2761, "step": 13189 }, { "epoch": 2.1478646745104424, "grad_norm": 0.15608355402946472, "learning_rate": 1.1319200360975998e-05, "loss": 0.2325, "step": 13190 }, { "epoch": 2.1480275210682733, "grad_norm": 0.12814901769161224, "learning_rate": 1.131523580971933e-05, "loss": 0.2689, "step": 13191 }, { "epoch": 2.1481903676261043, "grad_norm": 0.18816028535366058, "learning_rate": 1.1311271749786689e-05, "loss": 0.2774, "step": 13192 }, { "epoch": 2.148353214183935, "grad_norm": 0.22321243584156036, "learning_rate": 1.1307308181320414e-05, "loss": 0.2743, "step": 13193 }, { "epoch": 2.148516060741766, "grad_norm": 0.19134269654750824, "learning_rate": 1.13033451044628e-05, "loss": 0.2584, "step": 13194 }, { "epoch": 2.148678907299597, "grad_norm": 0.17822860181331635, "learning_rate": 1.1299382519356134e-05, "loss": 0.3041, "step": 13195 }, { "epoch": 2.148841753857428, "grad_norm": 0.19285117089748383, "learning_rate": 1.1295420426142672e-05, "loss": 0.284, "step": 13196 }, { "epoch": 2.1490046004152585, "grad_norm": 0.18042990565299988, "learning_rate": 1.1291458824964687e-05, "loss": 0.2808, "step": 13197 }, { "epoch": 2.1491674469730895, "grad_norm": 0.15992344915866852, "learning_rate": 1.1287497715964404e-05, "loss": 0.2896, "step": 13198 }, { "epoch": 2.1493302935309204, "grad_norm": 0.15312328934669495, "learning_rate": 1.1283537099284033e-05, "loss": 0.2909, "step": 13199 }, { "epoch": 2.1494931400887514, "grad_norm": 0.21379424631595612, "learning_rate": 1.1279576975065769e-05, "loss": 0.2536, "step": 13200 }, { "epoch": 2.1496559866465823, "grad_norm": 0.15031225979328156, "learning_rate": 1.1275617343451808e-05, "loss": 0.2508, "step": 13201 }, { "epoch": 2.1498188332044132, "grad_norm": 0.1544826328754425, "learning_rate": 1.1271658204584304e-05, "loss": 0.2347, "step": 13202 }, { "epoch": 2.149981679762244, "grad_norm": 0.13215504586696625, "learning_rate": 1.1267699558605394e-05, "loss": 0.2673, "step": 13203 }, { "epoch": 2.1501445263200747, "grad_norm": 0.1708768755197525, "learning_rate": 1.1263741405657218e-05, "loss": 0.2702, "step": 13204 }, { "epoch": 2.1503073728779056, "grad_norm": 0.15401780605316162, "learning_rate": 1.1259783745881882e-05, "loss": 0.3172, "step": 13205 }, { "epoch": 2.1504702194357366, "grad_norm": 0.1682043969631195, "learning_rate": 1.1255826579421477e-05, "loss": 0.2688, "step": 13206 }, { "epoch": 2.1506330659935675, "grad_norm": 0.13719266653060913, "learning_rate": 1.125186990641807e-05, "loss": 0.2593, "step": 13207 }, { "epoch": 2.1507959125513985, "grad_norm": 0.20692646503448486, "learning_rate": 1.1247913727013729e-05, "loss": 0.2894, "step": 13208 }, { "epoch": 2.1509587591092294, "grad_norm": 0.17252226173877716, "learning_rate": 1.1243958041350488e-05, "loss": 0.3025, "step": 13209 }, { "epoch": 2.1511216056670603, "grad_norm": 0.15005919337272644, "learning_rate": 1.1240002849570361e-05, "loss": 0.2461, "step": 13210 }, { "epoch": 2.1512844522248913, "grad_norm": 0.25483545660972595, "learning_rate": 1.1236048151815362e-05, "loss": 0.3036, "step": 13211 }, { "epoch": 2.151447298782722, "grad_norm": 0.22455567121505737, "learning_rate": 1.1232093948227477e-05, "loss": 0.2698, "step": 13212 }, { "epoch": 2.1516101453405527, "grad_norm": 0.22089840471744537, "learning_rate": 1.122814023894866e-05, "loss": 0.297, "step": 13213 }, { "epoch": 2.1517729918983837, "grad_norm": 0.17452341318130493, "learning_rate": 1.1224187024120875e-05, "loss": 0.2691, "step": 13214 }, { "epoch": 2.1519358384562146, "grad_norm": 0.1844286173582077, "learning_rate": 1.1220234303886042e-05, "loss": 0.2758, "step": 13215 }, { "epoch": 2.1520986850140456, "grad_norm": 0.17085231840610504, "learning_rate": 1.1216282078386093e-05, "loss": 0.2555, "step": 13216 }, { "epoch": 2.1522615315718765, "grad_norm": 0.21926407516002655, "learning_rate": 1.1212330347762914e-05, "loss": 0.2775, "step": 13217 }, { "epoch": 2.1524243781297074, "grad_norm": 0.15389087796211243, "learning_rate": 1.1208379112158374e-05, "loss": 0.2571, "step": 13218 }, { "epoch": 2.1525872246875384, "grad_norm": 0.18045677244663239, "learning_rate": 1.1204428371714357e-05, "loss": 0.2421, "step": 13219 }, { "epoch": 2.152750071245369, "grad_norm": 0.15495039522647858, "learning_rate": 1.1200478126572691e-05, "loss": 0.2523, "step": 13220 }, { "epoch": 2.1529129178032, "grad_norm": 0.2019437700510025, "learning_rate": 1.1196528376875196e-05, "loss": 0.3027, "step": 13221 }, { "epoch": 2.1530757643610308, "grad_norm": 0.19416527450084686, "learning_rate": 1.11925791227637e-05, "loss": 0.2808, "step": 13222 }, { "epoch": 2.1532386109188617, "grad_norm": 0.2231137901544571, "learning_rate": 1.118863036437998e-05, "loss": 0.314, "step": 13223 }, { "epoch": 2.1534014574766926, "grad_norm": 0.13352496922016144, "learning_rate": 1.1184682101865809e-05, "loss": 0.282, "step": 13224 }, { "epoch": 2.1535643040345236, "grad_norm": 0.16637414693832397, "learning_rate": 1.1180734335362933e-05, "loss": 0.3334, "step": 13225 }, { "epoch": 2.1537271505923545, "grad_norm": 0.23244909942150116, "learning_rate": 1.1176787065013108e-05, "loss": 0.3061, "step": 13226 }, { "epoch": 2.153889997150185, "grad_norm": 0.16862329840660095, "learning_rate": 1.1172840290958045e-05, "loss": 0.2675, "step": 13227 }, { "epoch": 2.154052843708016, "grad_norm": 0.19764621555805206, "learning_rate": 1.116889401333944e-05, "loss": 0.3115, "step": 13228 }, { "epoch": 2.154215690265847, "grad_norm": 0.19669802486896515, "learning_rate": 1.1164948232298972e-05, "loss": 0.2588, "step": 13229 }, { "epoch": 2.154378536823678, "grad_norm": 0.22017118334770203, "learning_rate": 1.116100294797832e-05, "loss": 0.3149, "step": 13230 }, { "epoch": 2.154541383381509, "grad_norm": 0.14195042848587036, "learning_rate": 1.1157058160519127e-05, "loss": 0.2897, "step": 13231 }, { "epoch": 2.1547042299393397, "grad_norm": 0.17206960916519165, "learning_rate": 1.115311387006301e-05, "loss": 0.3038, "step": 13232 }, { "epoch": 2.1548670764971707, "grad_norm": 0.1818290799856186, "learning_rate": 1.11491700767516e-05, "loss": 0.2643, "step": 13233 }, { "epoch": 2.1550299230550016, "grad_norm": 0.1737167090177536, "learning_rate": 1.1145226780726484e-05, "loss": 0.3056, "step": 13234 }, { "epoch": 2.155192769612832, "grad_norm": 0.19076044857501984, "learning_rate": 1.1141283982129235e-05, "loss": 0.2975, "step": 13235 }, { "epoch": 2.155355616170663, "grad_norm": 0.16366377472877502, "learning_rate": 1.1137341681101407e-05, "loss": 0.2651, "step": 13236 }, { "epoch": 2.155518462728494, "grad_norm": 0.15314918756484985, "learning_rate": 1.1133399877784553e-05, "loss": 0.2501, "step": 13237 }, { "epoch": 2.155681309286325, "grad_norm": 0.196183443069458, "learning_rate": 1.1129458572320189e-05, "loss": 0.2662, "step": 13238 }, { "epoch": 2.155844155844156, "grad_norm": 0.18090255558490753, "learning_rate": 1.1125517764849821e-05, "loss": 0.2702, "step": 13239 }, { "epoch": 2.156007002401987, "grad_norm": 0.1553708165884018, "learning_rate": 1.1121577455514923e-05, "loss": 0.3329, "step": 13240 }, { "epoch": 2.156169848959818, "grad_norm": 0.17314045131206512, "learning_rate": 1.1117637644456986e-05, "loss": 0.2963, "step": 13241 }, { "epoch": 2.1563326955176487, "grad_norm": 0.15855923295021057, "learning_rate": 1.1113698331817449e-05, "loss": 0.2695, "step": 13242 }, { "epoch": 2.156495542075479, "grad_norm": 0.17994360625743866, "learning_rate": 1.1109759517737736e-05, "loss": 0.3048, "step": 13243 }, { "epoch": 2.15665838863331, "grad_norm": 0.14527013897895813, "learning_rate": 1.1105821202359285e-05, "loss": 0.2901, "step": 13244 }, { "epoch": 2.156821235191141, "grad_norm": 0.1372206211090088, "learning_rate": 1.1101883385823478e-05, "loss": 0.2575, "step": 13245 }, { "epoch": 2.156984081748972, "grad_norm": 0.1442599594593048, "learning_rate": 1.1097946068271701e-05, "loss": 0.2558, "step": 13246 }, { "epoch": 2.157146928306803, "grad_norm": 0.16939784586429596, "learning_rate": 1.1094009249845299e-05, "loss": 0.3116, "step": 13247 }, { "epoch": 2.157309774864634, "grad_norm": 0.1969953179359436, "learning_rate": 1.1090072930685639e-05, "loss": 0.2718, "step": 13248 }, { "epoch": 2.157472621422465, "grad_norm": 0.1493365466594696, "learning_rate": 1.1086137110934037e-05, "loss": 0.2796, "step": 13249 }, { "epoch": 2.1576354679802954, "grad_norm": 0.15485426783561707, "learning_rate": 1.1082201790731788e-05, "loss": 0.2868, "step": 13250 }, { "epoch": 2.1577983145381263, "grad_norm": 0.207968071103096, "learning_rate": 1.1078266970220205e-05, "loss": 0.2833, "step": 13251 }, { "epoch": 2.1579611610959573, "grad_norm": 0.23662668466567993, "learning_rate": 1.1074332649540547e-05, "loss": 0.2863, "step": 13252 }, { "epoch": 2.158124007653788, "grad_norm": 0.18495991826057434, "learning_rate": 1.1070398828834061e-05, "loss": 0.3037, "step": 13253 }, { "epoch": 2.158286854211619, "grad_norm": 0.17371675372123718, "learning_rate": 1.1066465508241999e-05, "loss": 0.2701, "step": 13254 }, { "epoch": 2.15844970076945, "grad_norm": 0.2180459350347519, "learning_rate": 1.1062532687905564e-05, "loss": 0.3095, "step": 13255 }, { "epoch": 2.158612547327281, "grad_norm": 0.1575106829404831, "learning_rate": 1.105860036796597e-05, "loss": 0.2603, "step": 13256 }, { "epoch": 2.1587753938851115, "grad_norm": 0.17725610733032227, "learning_rate": 1.1054668548564395e-05, "loss": 0.2727, "step": 13257 }, { "epoch": 2.1589382404429425, "grad_norm": 0.1333237588405609, "learning_rate": 1.1050737229841989e-05, "loss": 0.2531, "step": 13258 }, { "epoch": 2.1591010870007734, "grad_norm": 0.14715777337551117, "learning_rate": 1.1046806411939919e-05, "loss": 0.2586, "step": 13259 }, { "epoch": 2.1592639335586044, "grad_norm": 0.16027770936489105, "learning_rate": 1.1042876094999307e-05, "loss": 0.2837, "step": 13260 }, { "epoch": 2.1594267801164353, "grad_norm": 0.22801950573921204, "learning_rate": 1.1038946279161247e-05, "loss": 0.3214, "step": 13261 }, { "epoch": 2.1595896266742662, "grad_norm": 0.23012198507785797, "learning_rate": 1.1035016964566855e-05, "loss": 0.2738, "step": 13262 }, { "epoch": 2.159752473232097, "grad_norm": 0.17001239955425262, "learning_rate": 1.1031088151357194e-05, "loss": 0.2682, "step": 13263 }, { "epoch": 2.159915319789928, "grad_norm": 0.14385269582271576, "learning_rate": 1.102715983967332e-05, "loss": 0.2749, "step": 13264 }, { "epoch": 2.1600781663477586, "grad_norm": 0.19135549664497375, "learning_rate": 1.102323202965626e-05, "loss": 0.33, "step": 13265 }, { "epoch": 2.1602410129055896, "grad_norm": 0.2234077900648117, "learning_rate": 1.101930472144706e-05, "loss": 0.2834, "step": 13266 }, { "epoch": 2.1604038594634205, "grad_norm": 0.18453355133533478, "learning_rate": 1.1015377915186704e-05, "loss": 0.2461, "step": 13267 }, { "epoch": 2.1605667060212514, "grad_norm": 0.14589866995811462, "learning_rate": 1.101145161101618e-05, "loss": 0.2561, "step": 13268 }, { "epoch": 2.1607295525790824, "grad_norm": 0.1615808606147766, "learning_rate": 1.1007525809076446e-05, "loss": 0.2974, "step": 13269 }, { "epoch": 2.1608923991369133, "grad_norm": 0.1631925106048584, "learning_rate": 1.1003600509508465e-05, "loss": 0.2493, "step": 13270 }, { "epoch": 2.1610552456947443, "grad_norm": 0.1643354296684265, "learning_rate": 1.0999675712453163e-05, "loss": 0.2965, "step": 13271 }, { "epoch": 2.161218092252575, "grad_norm": 0.1508144587278366, "learning_rate": 1.0995751418051439e-05, "loss": 0.2895, "step": 13272 }, { "epoch": 2.1613809388104057, "grad_norm": 0.2188287377357483, "learning_rate": 1.0991827626444207e-05, "loss": 0.2759, "step": 13273 }, { "epoch": 2.1615437853682367, "grad_norm": 0.1566888391971588, "learning_rate": 1.098790433777233e-05, "loss": 0.2746, "step": 13274 }, { "epoch": 2.1617066319260676, "grad_norm": 0.19477351009845734, "learning_rate": 1.0983981552176672e-05, "loss": 0.2791, "step": 13275 }, { "epoch": 2.1618694784838985, "grad_norm": 0.18478092551231384, "learning_rate": 1.0980059269798057e-05, "loss": 0.2433, "step": 13276 }, { "epoch": 2.1620323250417295, "grad_norm": 0.14345206320285797, "learning_rate": 1.097613749077733e-05, "loss": 0.2555, "step": 13277 }, { "epoch": 2.1621951715995604, "grad_norm": 0.20078597962856293, "learning_rate": 1.0972216215255285e-05, "loss": 0.2812, "step": 13278 }, { "epoch": 2.1623580181573914, "grad_norm": 0.20868326723575592, "learning_rate": 1.0968295443372702e-05, "loss": 0.3131, "step": 13279 }, { "epoch": 2.162520864715222, "grad_norm": 0.12360714375972748, "learning_rate": 1.0964375175270347e-05, "loss": 0.2622, "step": 13280 }, { "epoch": 2.162683711273053, "grad_norm": 0.22313545644283295, "learning_rate": 1.0960455411088986e-05, "loss": 0.2905, "step": 13281 }, { "epoch": 2.1628465578308838, "grad_norm": 0.2051531970500946, "learning_rate": 1.0956536150969334e-05, "loss": 0.2908, "step": 13282 }, { "epoch": 2.1630094043887147, "grad_norm": 0.15385328233242035, "learning_rate": 1.0952617395052103e-05, "loss": 0.2671, "step": 13283 }, { "epoch": 2.1631722509465456, "grad_norm": 0.15622112154960632, "learning_rate": 1.0948699143478006e-05, "loss": 0.2949, "step": 13284 }, { "epoch": 2.1633350975043766, "grad_norm": 0.19119755923748016, "learning_rate": 1.0944781396387704e-05, "loss": 0.2973, "step": 13285 }, { "epoch": 2.1634979440622075, "grad_norm": 0.16175948083400726, "learning_rate": 1.0940864153921865e-05, "loss": 0.2754, "step": 13286 }, { "epoch": 2.1636607906200385, "grad_norm": 0.19604545831680298, "learning_rate": 1.0936947416221113e-05, "loss": 0.2907, "step": 13287 }, { "epoch": 2.163823637177869, "grad_norm": 0.17653788626194, "learning_rate": 1.0933031183426093e-05, "loss": 0.2753, "step": 13288 }, { "epoch": 2.1639864837357, "grad_norm": 0.18999901413917542, "learning_rate": 1.09291154556774e-05, "loss": 0.2857, "step": 13289 }, { "epoch": 2.164149330293531, "grad_norm": 0.14804179966449738, "learning_rate": 1.0925200233115607e-05, "loss": 0.2881, "step": 13290 }, { "epoch": 2.164312176851362, "grad_norm": 0.18689240515232086, "learning_rate": 1.0921285515881307e-05, "loss": 0.2972, "step": 13291 }, { "epoch": 2.1644750234091927, "grad_norm": 0.1665722280740738, "learning_rate": 1.0917371304115029e-05, "loss": 0.2923, "step": 13292 }, { "epoch": 2.1646378699670237, "grad_norm": 0.15241068601608276, "learning_rate": 1.0913457597957321e-05, "loss": 0.3073, "step": 13293 }, { "epoch": 2.1648007165248546, "grad_norm": 0.16706958413124084, "learning_rate": 1.0909544397548691e-05, "loss": 0.2866, "step": 13294 }, { "epoch": 2.1649635630826856, "grad_norm": 0.1469210535287857, "learning_rate": 1.0905631703029626e-05, "loss": 0.2588, "step": 13295 }, { "epoch": 2.165126409640516, "grad_norm": 0.15619812905788422, "learning_rate": 1.0901719514540618e-05, "loss": 0.2679, "step": 13296 }, { "epoch": 2.165289256198347, "grad_norm": 0.15947148203849792, "learning_rate": 1.0897807832222117e-05, "loss": 0.2518, "step": 13297 }, { "epoch": 2.165452102756178, "grad_norm": 0.17656053602695465, "learning_rate": 1.0893896656214559e-05, "loss": 0.2575, "step": 13298 }, { "epoch": 2.165614949314009, "grad_norm": 0.16660140454769135, "learning_rate": 1.0889985986658383e-05, "loss": 0.2902, "step": 13299 }, { "epoch": 2.16577779587184, "grad_norm": 0.16990409791469574, "learning_rate": 1.0886075823693987e-05, "loss": 0.2612, "step": 13300 }, { "epoch": 2.1659406424296708, "grad_norm": 0.1805448979139328, "learning_rate": 1.0882166167461752e-05, "loss": 0.273, "step": 13301 }, { "epoch": 2.1661034889875017, "grad_norm": 0.1657552570104599, "learning_rate": 1.0878257018102042e-05, "loss": 0.2795, "step": 13302 }, { "epoch": 2.166266335545332, "grad_norm": 0.17127396166324615, "learning_rate": 1.0874348375755223e-05, "loss": 0.2717, "step": 13303 }, { "epoch": 2.166429182103163, "grad_norm": 0.16657094657421112, "learning_rate": 1.0870440240561619e-05, "loss": 0.2758, "step": 13304 }, { "epoch": 2.166592028660994, "grad_norm": 0.16138644516468048, "learning_rate": 1.0866532612661537e-05, "loss": 0.2636, "step": 13305 }, { "epoch": 2.166754875218825, "grad_norm": 0.14212825894355774, "learning_rate": 1.0862625492195286e-05, "loss": 0.2666, "step": 13306 }, { "epoch": 2.166917721776656, "grad_norm": 0.15968458354473114, "learning_rate": 1.0858718879303136e-05, "loss": 0.2788, "step": 13307 }, { "epoch": 2.167080568334487, "grad_norm": 0.15707087516784668, "learning_rate": 1.085481277412535e-05, "loss": 0.255, "step": 13308 }, { "epoch": 2.167243414892318, "grad_norm": 0.1736985594034195, "learning_rate": 1.0850907176802153e-05, "loss": 0.2958, "step": 13309 }, { "epoch": 2.1674062614501484, "grad_norm": 0.21059362590312958, "learning_rate": 1.084700208747379e-05, "loss": 0.3198, "step": 13310 }, { "epoch": 2.1675691080079793, "grad_norm": 0.18298569321632385, "learning_rate": 1.0843097506280459e-05, "loss": 0.2912, "step": 13311 }, { "epoch": 2.1677319545658102, "grad_norm": 0.15732477605342865, "learning_rate": 1.0839193433362329e-05, "loss": 0.2506, "step": 13312 }, { "epoch": 2.167894801123641, "grad_norm": 0.16426832973957062, "learning_rate": 1.0835289868859593e-05, "loss": 0.3117, "step": 13313 }, { "epoch": 2.168057647681472, "grad_norm": 0.16938389837741852, "learning_rate": 1.083138681291239e-05, "loss": 0.28, "step": 13314 }, { "epoch": 2.168220494239303, "grad_norm": 0.18203461170196533, "learning_rate": 1.0827484265660848e-05, "loss": 0.2912, "step": 13315 }, { "epoch": 2.168383340797134, "grad_norm": 0.19562789797782898, "learning_rate": 1.0823582227245074e-05, "loss": 0.2112, "step": 13316 }, { "epoch": 2.168546187354965, "grad_norm": 0.12331360578536987, "learning_rate": 1.0819680697805182e-05, "loss": 0.2577, "step": 13317 }, { "epoch": 2.168709033912796, "grad_norm": 0.15615011751651764, "learning_rate": 1.0815779677481239e-05, "loss": 0.2924, "step": 13318 }, { "epoch": 2.1688718804706264, "grad_norm": 0.12432575970888138, "learning_rate": 1.0811879166413302e-05, "loss": 0.2835, "step": 13319 }, { "epoch": 2.1690347270284573, "grad_norm": 0.14258213341236115, "learning_rate": 1.0807979164741403e-05, "loss": 0.2375, "step": 13320 }, { "epoch": 2.1691975735862883, "grad_norm": 0.19597239792346954, "learning_rate": 1.0804079672605583e-05, "loss": 0.3251, "step": 13321 }, { "epoch": 2.1693604201441192, "grad_norm": 0.165462464094162, "learning_rate": 1.080018069014583e-05, "loss": 0.278, "step": 13322 }, { "epoch": 2.16952326670195, "grad_norm": 0.15995629131793976, "learning_rate": 1.079628221750213e-05, "loss": 0.2725, "step": 13323 }, { "epoch": 2.169686113259781, "grad_norm": 0.17268343269824982, "learning_rate": 1.0792384254814461e-05, "loss": 0.306, "step": 13324 }, { "epoch": 2.169848959817612, "grad_norm": 0.16351783275604248, "learning_rate": 1.0788486802222763e-05, "loss": 0.2991, "step": 13325 }, { "epoch": 2.1700118063754426, "grad_norm": 0.2350330948829651, "learning_rate": 1.078458985986697e-05, "loss": 0.3326, "step": 13326 }, { "epoch": 2.1701746529332735, "grad_norm": 0.15623149275779724, "learning_rate": 1.0780693427886982e-05, "loss": 0.2809, "step": 13327 }, { "epoch": 2.1703374994911044, "grad_norm": 0.17128606140613556, "learning_rate": 1.0776797506422708e-05, "loss": 0.287, "step": 13328 }, { "epoch": 2.1705003460489354, "grad_norm": 0.16334392130374908, "learning_rate": 1.0772902095614013e-05, "loss": 0.2893, "step": 13329 }, { "epoch": 2.1706631926067663, "grad_norm": 0.14122313261032104, "learning_rate": 1.0769007195600764e-05, "loss": 0.2921, "step": 13330 }, { "epoch": 2.1708260391645973, "grad_norm": 0.12086212635040283, "learning_rate": 1.0765112806522794e-05, "loss": 0.2591, "step": 13331 }, { "epoch": 2.170988885722428, "grad_norm": 0.18055738508701324, "learning_rate": 1.0761218928519918e-05, "loss": 0.2845, "step": 13332 }, { "epoch": 2.1711517322802587, "grad_norm": 0.1855420023202896, "learning_rate": 1.0757325561731948e-05, "loss": 0.272, "step": 13333 }, { "epoch": 2.1713145788380896, "grad_norm": 0.18740013241767883, "learning_rate": 1.0753432706298655e-05, "loss": 0.2583, "step": 13334 }, { "epoch": 2.1714774253959206, "grad_norm": 0.16941915452480316, "learning_rate": 1.074954036235982e-05, "loss": 0.2769, "step": 13335 }, { "epoch": 2.1716402719537515, "grad_norm": 0.15715394914150238, "learning_rate": 1.074564853005518e-05, "loss": 0.2667, "step": 13336 }, { "epoch": 2.1718031185115825, "grad_norm": 0.11766411364078522, "learning_rate": 1.0741757209524467e-05, "loss": 0.2579, "step": 13337 }, { "epoch": 2.1719659650694134, "grad_norm": 0.17648202180862427, "learning_rate": 1.0737866400907382e-05, "loss": 0.296, "step": 13338 }, { "epoch": 2.1721288116272444, "grad_norm": 0.16104669868946075, "learning_rate": 1.073397610434363e-05, "loss": 0.2719, "step": 13339 }, { "epoch": 2.1722916581850753, "grad_norm": 0.16470371186733246, "learning_rate": 1.0730086319972878e-05, "loss": 0.2935, "step": 13340 }, { "epoch": 2.172454504742906, "grad_norm": 0.16663219034671783, "learning_rate": 1.072619704793478e-05, "loss": 0.3011, "step": 13341 }, { "epoch": 2.1726173513007367, "grad_norm": 0.14316824078559875, "learning_rate": 1.0722308288368968e-05, "loss": 0.2611, "step": 13342 }, { "epoch": 2.1727801978585677, "grad_norm": 0.16361846029758453, "learning_rate": 1.0718420041415073e-05, "loss": 0.256, "step": 13343 }, { "epoch": 2.1729430444163986, "grad_norm": 0.21299757063388824, "learning_rate": 1.0714532307212685e-05, "loss": 0.3185, "step": 13344 }, { "epoch": 2.1731058909742296, "grad_norm": 0.16603988409042358, "learning_rate": 1.071064508590138e-05, "loss": 0.2956, "step": 13345 }, { "epoch": 2.1732687375320605, "grad_norm": 0.22121912240982056, "learning_rate": 1.0706758377620738e-05, "loss": 0.2616, "step": 13346 }, { "epoch": 2.1734315840898915, "grad_norm": 0.1624455600976944, "learning_rate": 1.0702872182510293e-05, "loss": 0.2979, "step": 13347 }, { "epoch": 2.1735944306477224, "grad_norm": 0.14684456586837769, "learning_rate": 1.0698986500709573e-05, "loss": 0.3185, "step": 13348 }, { "epoch": 2.173757277205553, "grad_norm": 0.1921507567167282, "learning_rate": 1.0695101332358076e-05, "loss": 0.3174, "step": 13349 }, { "epoch": 2.173920123763384, "grad_norm": 0.19593754410743713, "learning_rate": 1.0691216677595308e-05, "loss": 0.2813, "step": 13350 }, { "epoch": 2.174082970321215, "grad_norm": 0.1702880859375, "learning_rate": 1.0687332536560731e-05, "loss": 0.2823, "step": 13351 }, { "epoch": 2.1742458168790457, "grad_norm": 0.16986405849456787, "learning_rate": 1.0683448909393798e-05, "loss": 0.2819, "step": 13352 }, { "epoch": 2.1744086634368767, "grad_norm": 0.17031069099903107, "learning_rate": 1.0679565796233935e-05, "loss": 0.2828, "step": 13353 }, { "epoch": 2.1745715099947076, "grad_norm": 0.16515901684761047, "learning_rate": 1.0675683197220573e-05, "loss": 0.2677, "step": 13354 }, { "epoch": 2.1747343565525386, "grad_norm": 0.1894172579050064, "learning_rate": 1.0671801112493102e-05, "loss": 0.2847, "step": 13355 }, { "epoch": 2.174897203110369, "grad_norm": 0.15265759825706482, "learning_rate": 1.0667919542190887e-05, "loss": 0.2699, "step": 13356 }, { "epoch": 2.1750600496682, "grad_norm": 0.16704589128494263, "learning_rate": 1.0664038486453314e-05, "loss": 0.2736, "step": 13357 }, { "epoch": 2.175222896226031, "grad_norm": 0.14604386687278748, "learning_rate": 1.0660157945419708e-05, "loss": 0.2438, "step": 13358 }, { "epoch": 2.175385742783862, "grad_norm": 0.16926907002925873, "learning_rate": 1.0656277919229396e-05, "loss": 0.3027, "step": 13359 }, { "epoch": 2.175548589341693, "grad_norm": 0.1679646521806717, "learning_rate": 1.0652398408021674e-05, "loss": 0.3118, "step": 13360 }, { "epoch": 2.1757114358995238, "grad_norm": 0.15525805950164795, "learning_rate": 1.0648519411935843e-05, "loss": 0.29, "step": 13361 }, { "epoch": 2.1758742824573547, "grad_norm": 0.18795251846313477, "learning_rate": 1.0644640931111163e-05, "loss": 0.2554, "step": 13362 }, { "epoch": 2.1760371290151856, "grad_norm": 0.15042074024677277, "learning_rate": 1.0640762965686877e-05, "loss": 0.2832, "step": 13363 }, { "epoch": 2.176199975573016, "grad_norm": 0.156047523021698, "learning_rate": 1.0636885515802228e-05, "loss": 0.2774, "step": 13364 }, { "epoch": 2.176362822130847, "grad_norm": 0.19250817596912384, "learning_rate": 1.0633008581596423e-05, "loss": 0.3172, "step": 13365 }, { "epoch": 2.176525668688678, "grad_norm": 0.1504720151424408, "learning_rate": 1.0629132163208647e-05, "loss": 0.273, "step": 13366 }, { "epoch": 2.176688515246509, "grad_norm": 0.17246761918067932, "learning_rate": 1.0625256260778091e-05, "loss": 0.263, "step": 13367 }, { "epoch": 2.17685136180434, "grad_norm": 0.14415577054023743, "learning_rate": 1.06213808744439e-05, "loss": 0.2812, "step": 13368 }, { "epoch": 2.177014208362171, "grad_norm": 0.13035528361797333, "learning_rate": 1.0617506004345209e-05, "loss": 0.267, "step": 13369 }, { "epoch": 2.177177054920002, "grad_norm": 0.13901664316654205, "learning_rate": 1.0613631650621153e-05, "loss": 0.2836, "step": 13370 }, { "epoch": 2.1773399014778327, "grad_norm": 0.20184354484081268, "learning_rate": 1.0609757813410823e-05, "loss": 0.2836, "step": 13371 }, { "epoch": 2.1775027480356632, "grad_norm": 0.15022894740104675, "learning_rate": 1.0605884492853294e-05, "loss": 0.2696, "step": 13372 }, { "epoch": 2.177665594593494, "grad_norm": 0.18071067333221436, "learning_rate": 1.0602011689087643e-05, "loss": 0.3019, "step": 13373 }, { "epoch": 2.177828441151325, "grad_norm": 0.15494322776794434, "learning_rate": 1.0598139402252905e-05, "loss": 0.294, "step": 13374 }, { "epoch": 2.177991287709156, "grad_norm": 0.17148172855377197, "learning_rate": 1.0594267632488117e-05, "loss": 0.2302, "step": 13375 }, { "epoch": 2.178154134266987, "grad_norm": 0.21214307844638824, "learning_rate": 1.0590396379932282e-05, "loss": 0.3165, "step": 13376 }, { "epoch": 2.178316980824818, "grad_norm": 0.159828782081604, "learning_rate": 1.0586525644724394e-05, "loss": 0.2812, "step": 13377 }, { "epoch": 2.178479827382649, "grad_norm": 0.14192239940166473, "learning_rate": 1.0582655427003405e-05, "loss": 0.272, "step": 13378 }, { "epoch": 2.1786426739404794, "grad_norm": 0.22095449268817902, "learning_rate": 1.0578785726908292e-05, "loss": 0.299, "step": 13379 }, { "epoch": 2.1788055204983103, "grad_norm": 0.1576850563287735, "learning_rate": 1.0574916544577978e-05, "loss": 0.2984, "step": 13380 }, { "epoch": 2.1789683670561413, "grad_norm": 0.15585705637931824, "learning_rate": 1.057104788015138e-05, "loss": 0.2766, "step": 13381 }, { "epoch": 2.179131213613972, "grad_norm": 0.1917869746685028, "learning_rate": 1.0567179733767385e-05, "loss": 0.2822, "step": 13382 }, { "epoch": 2.179294060171803, "grad_norm": 0.15940667688846588, "learning_rate": 1.0563312105564888e-05, "loss": 0.2778, "step": 13383 }, { "epoch": 2.179456906729634, "grad_norm": 0.17922542989253998, "learning_rate": 1.0559444995682738e-05, "loss": 0.2673, "step": 13384 }, { "epoch": 2.179619753287465, "grad_norm": 0.15480779111385345, "learning_rate": 1.0555578404259772e-05, "loss": 0.2867, "step": 13385 }, { "epoch": 2.1797825998452955, "grad_norm": 0.1693418323993683, "learning_rate": 1.0551712331434824e-05, "loss": 0.2727, "step": 13386 }, { "epoch": 2.1799454464031265, "grad_norm": 0.11038249731063843, "learning_rate": 1.0547846777346693e-05, "loss": 0.2675, "step": 13387 }, { "epoch": 2.1801082929609574, "grad_norm": 0.18132132291793823, "learning_rate": 1.0543981742134162e-05, "loss": 0.3059, "step": 13388 }, { "epoch": 2.1802711395187884, "grad_norm": 0.20213961601257324, "learning_rate": 1.0540117225935989e-05, "loss": 0.3388, "step": 13389 }, { "epoch": 2.1804339860766193, "grad_norm": 0.17031744122505188, "learning_rate": 1.0536253228890941e-05, "loss": 0.246, "step": 13390 }, { "epoch": 2.1805968326344503, "grad_norm": 0.18929964303970337, "learning_rate": 1.0532389751137733e-05, "loss": 0.2838, "step": 13391 }, { "epoch": 2.180759679192281, "grad_norm": 0.19907931983470917, "learning_rate": 1.0528526792815082e-05, "loss": 0.2635, "step": 13392 }, { "epoch": 2.180922525750112, "grad_norm": 0.16862796247005463, "learning_rate": 1.0524664354061667e-05, "loss": 0.2829, "step": 13393 }, { "epoch": 2.1810853723079426, "grad_norm": 0.20540070533752441, "learning_rate": 1.052080243501618e-05, "loss": 0.2785, "step": 13394 }, { "epoch": 2.1812482188657736, "grad_norm": 0.1425146758556366, "learning_rate": 1.0516941035817266e-05, "loss": 0.2418, "step": 13395 }, { "epoch": 2.1814110654236045, "grad_norm": 0.15242701768875122, "learning_rate": 1.0513080156603552e-05, "loss": 0.2262, "step": 13396 }, { "epoch": 2.1815739119814355, "grad_norm": 0.14206519722938538, "learning_rate": 1.0509219797513674e-05, "loss": 0.2367, "step": 13397 }, { "epoch": 2.1817367585392664, "grad_norm": 0.15349529683589935, "learning_rate": 1.0505359958686222e-05, "loss": 0.2408, "step": 13398 }, { "epoch": 2.1818996050970973, "grad_norm": 0.15744788944721222, "learning_rate": 1.0501500640259773e-05, "loss": 0.2956, "step": 13399 }, { "epoch": 2.1820624516549283, "grad_norm": 0.18653403222560883, "learning_rate": 1.0497641842372882e-05, "loss": 0.2769, "step": 13400 }, { "epoch": 2.1822252982127592, "grad_norm": 0.1611001193523407, "learning_rate": 1.0493783565164106e-05, "loss": 0.2359, "step": 13401 }, { "epoch": 2.1823881447705897, "grad_norm": 0.15058879554271698, "learning_rate": 1.0489925808771964e-05, "loss": 0.2779, "step": 13402 }, { "epoch": 2.1825509913284207, "grad_norm": 0.156708762049675, "learning_rate": 1.0486068573334955e-05, "loss": 0.2706, "step": 13403 }, { "epoch": 2.1827138378862516, "grad_norm": 0.15447886288166046, "learning_rate": 1.0482211858991565e-05, "loss": 0.2934, "step": 13404 }, { "epoch": 2.1828766844440826, "grad_norm": 0.1440708041191101, "learning_rate": 1.047835566588027e-05, "loss": 0.2555, "step": 13405 }, { "epoch": 2.1830395310019135, "grad_norm": 0.17853692173957825, "learning_rate": 1.047449999413951e-05, "loss": 0.3021, "step": 13406 }, { "epoch": 2.1832023775597444, "grad_norm": 0.18588048219680786, "learning_rate": 1.0470644843907726e-05, "loss": 0.2624, "step": 13407 }, { "epoch": 2.1833652241175754, "grad_norm": 0.1774185597896576, "learning_rate": 1.0466790215323324e-05, "loss": 0.2876, "step": 13408 }, { "epoch": 2.183528070675406, "grad_norm": 0.15833032131195068, "learning_rate": 1.0462936108524688e-05, "loss": 0.2783, "step": 13409 }, { "epoch": 2.183690917233237, "grad_norm": 0.19826599955558777, "learning_rate": 1.0459082523650207e-05, "loss": 0.2988, "step": 13410 }, { "epoch": 2.1838537637910678, "grad_norm": 0.1814301460981369, "learning_rate": 1.0455229460838223e-05, "loss": 0.2843, "step": 13411 }, { "epoch": 2.1840166103488987, "grad_norm": 0.13996122777462006, "learning_rate": 1.0451376920227086e-05, "loss": 0.2496, "step": 13412 }, { "epoch": 2.1841794569067297, "grad_norm": 0.20058557391166687, "learning_rate": 1.0447524901955106e-05, "loss": 0.2681, "step": 13413 }, { "epoch": 2.1843423034645606, "grad_norm": 0.20084409415721893, "learning_rate": 1.0443673406160573e-05, "loss": 0.3259, "step": 13414 }, { "epoch": 2.1845051500223915, "grad_norm": 0.17014193534851074, "learning_rate": 1.0439822432981785e-05, "loss": 0.2687, "step": 13415 }, { "epoch": 2.1846679965802225, "grad_norm": 0.15991875529289246, "learning_rate": 1.0435971982556996e-05, "loss": 0.2595, "step": 13416 }, { "epoch": 2.184830843138053, "grad_norm": 0.18578313291072845, "learning_rate": 1.043212205502445e-05, "loss": 0.2658, "step": 13417 }, { "epoch": 2.184993689695884, "grad_norm": 0.17861883342266083, "learning_rate": 1.0428272650522359e-05, "loss": 0.3015, "step": 13418 }, { "epoch": 2.185156536253715, "grad_norm": 0.20032095909118652, "learning_rate": 1.0424423769188943e-05, "loss": 0.2604, "step": 13419 }, { "epoch": 2.185319382811546, "grad_norm": 0.16167382895946503, "learning_rate": 1.0420575411162386e-05, "loss": 0.2556, "step": 13420 }, { "epoch": 2.1854822293693767, "grad_norm": 0.16797924041748047, "learning_rate": 1.0416727576580853e-05, "loss": 0.2578, "step": 13421 }, { "epoch": 2.1856450759272077, "grad_norm": 0.15629121661186218, "learning_rate": 1.0412880265582483e-05, "loss": 0.3075, "step": 13422 }, { "epoch": 2.1858079224850386, "grad_norm": 0.1714576929807663, "learning_rate": 1.0409033478305428e-05, "loss": 0.2964, "step": 13423 }, { "epoch": 2.1859707690428696, "grad_norm": 0.2192683070898056, "learning_rate": 1.0405187214887782e-05, "loss": 0.316, "step": 13424 }, { "epoch": 2.1861336156007, "grad_norm": 0.18546995520591736, "learning_rate": 1.0401341475467636e-05, "loss": 0.2815, "step": 13425 }, { "epoch": 2.186296462158531, "grad_norm": 0.15574321150779724, "learning_rate": 1.0397496260183079e-05, "loss": 0.2933, "step": 13426 }, { "epoch": 2.186459308716362, "grad_norm": 0.1555568426847458, "learning_rate": 1.0393651569172155e-05, "loss": 0.271, "step": 13427 }, { "epoch": 2.186622155274193, "grad_norm": 0.15153954923152924, "learning_rate": 1.0389807402572902e-05, "loss": 0.2605, "step": 13428 }, { "epoch": 2.186785001832024, "grad_norm": 0.16585882008075714, "learning_rate": 1.0385963760523327e-05, "loss": 0.2577, "step": 13429 }, { "epoch": 2.186947848389855, "grad_norm": 0.1716185063123703, "learning_rate": 1.0382120643161448e-05, "loss": 0.2783, "step": 13430 }, { "epoch": 2.1871106949476857, "grad_norm": 0.14375315606594086, "learning_rate": 1.0378278050625231e-05, "loss": 0.311, "step": 13431 }, { "epoch": 2.1872735415055162, "grad_norm": 0.2781507074832916, "learning_rate": 1.0374435983052643e-05, "loss": 0.3339, "step": 13432 }, { "epoch": 2.187436388063347, "grad_norm": 0.18295159935951233, "learning_rate": 1.0370594440581613e-05, "loss": 0.2601, "step": 13433 }, { "epoch": 2.187599234621178, "grad_norm": 0.15710990130901337, "learning_rate": 1.0366753423350084e-05, "loss": 0.2585, "step": 13434 }, { "epoch": 2.187762081179009, "grad_norm": 0.14649400115013123, "learning_rate": 1.0362912931495947e-05, "loss": 0.289, "step": 13435 }, { "epoch": 2.18792492773684, "grad_norm": 0.15220697224140167, "learning_rate": 1.0359072965157083e-05, "loss": 0.293, "step": 13436 }, { "epoch": 2.188087774294671, "grad_norm": 0.14682960510253906, "learning_rate": 1.0355233524471372e-05, "loss": 0.2487, "step": 13437 }, { "epoch": 2.188250620852502, "grad_norm": 0.1804431676864624, "learning_rate": 1.0351394609576653e-05, "loss": 0.2867, "step": 13438 }, { "epoch": 2.1884134674103324, "grad_norm": 0.16985395550727844, "learning_rate": 1.034755622061076e-05, "loss": 0.3733, "step": 13439 }, { "epoch": 2.1885763139681633, "grad_norm": 0.16791728138923645, "learning_rate": 1.0343718357711485e-05, "loss": 0.2733, "step": 13440 }, { "epoch": 2.1887391605259943, "grad_norm": 0.17607910931110382, "learning_rate": 1.0339881021016645e-05, "loss": 0.2917, "step": 13441 }, { "epoch": 2.188902007083825, "grad_norm": 0.17495408654212952, "learning_rate": 1.0336044210663998e-05, "loss": 0.2567, "step": 13442 }, { "epoch": 2.189064853641656, "grad_norm": 0.17836244404315948, "learning_rate": 1.0332207926791287e-05, "loss": 0.2876, "step": 13443 }, { "epoch": 2.189227700199487, "grad_norm": 0.1829071193933487, "learning_rate": 1.0328372169536271e-05, "loss": 0.2542, "step": 13444 }, { "epoch": 2.189390546757318, "grad_norm": 0.16635604202747345, "learning_rate": 1.0324536939036647e-05, "loss": 0.3116, "step": 13445 }, { "epoch": 2.189553393315149, "grad_norm": 0.14053119719028473, "learning_rate": 1.032070223543011e-05, "loss": 0.2802, "step": 13446 }, { "epoch": 2.18971623987298, "grad_norm": 0.20817098021507263, "learning_rate": 1.0316868058854352e-05, "loss": 0.2623, "step": 13447 }, { "epoch": 2.1898790864308104, "grad_norm": 0.2753472626209259, "learning_rate": 1.0313034409447011e-05, "loss": 0.2634, "step": 13448 }, { "epoch": 2.1900419329886414, "grad_norm": 0.13026423752307892, "learning_rate": 1.0309201287345748e-05, "loss": 0.2912, "step": 13449 }, { "epoch": 2.1902047795464723, "grad_norm": 0.17619949579238892, "learning_rate": 1.0305368692688174e-05, "loss": 0.2707, "step": 13450 }, { "epoch": 2.1903676261043032, "grad_norm": 0.16892588138580322, "learning_rate": 1.0301536625611882e-05, "loss": 0.2698, "step": 13451 }, { "epoch": 2.190530472662134, "grad_norm": 0.13635696470737457, "learning_rate": 1.029770508625447e-05, "loss": 0.2416, "step": 13452 }, { "epoch": 2.190693319219965, "grad_norm": 0.18312326073646545, "learning_rate": 1.0293874074753496e-05, "loss": 0.2785, "step": 13453 }, { "epoch": 2.190856165777796, "grad_norm": 0.18143552541732788, "learning_rate": 1.0290043591246504e-05, "loss": 0.3222, "step": 13454 }, { "epoch": 2.1910190123356266, "grad_norm": 0.18423514068126678, "learning_rate": 1.0286213635871009e-05, "loss": 0.2722, "step": 13455 }, { "epoch": 2.1911818588934575, "grad_norm": 0.23275963962078094, "learning_rate": 1.0282384208764537e-05, "loss": 0.2905, "step": 13456 }, { "epoch": 2.1913447054512885, "grad_norm": 0.1988009363412857, "learning_rate": 1.0278555310064566e-05, "loss": 0.2658, "step": 13457 }, { "epoch": 2.1915075520091194, "grad_norm": 0.15251921117305756, "learning_rate": 1.0274726939908561e-05, "loss": 0.2646, "step": 13458 }, { "epoch": 2.1916703985669503, "grad_norm": 0.14008264243602753, "learning_rate": 1.0270899098433984e-05, "loss": 0.2828, "step": 13459 }, { "epoch": 2.1918332451247813, "grad_norm": 0.16582521796226501, "learning_rate": 1.0267071785778256e-05, "loss": 0.2496, "step": 13460 }, { "epoch": 2.1919960916826122, "grad_norm": 0.15511833131313324, "learning_rate": 1.0263245002078795e-05, "loss": 0.2669, "step": 13461 }, { "epoch": 2.1921589382404427, "grad_norm": 0.1855117231607437, "learning_rate": 1.0259418747472982e-05, "loss": 0.2611, "step": 13462 }, { "epoch": 2.1923217847982737, "grad_norm": 0.20617757737636566, "learning_rate": 1.0255593022098208e-05, "loss": 0.292, "step": 13463 }, { "epoch": 2.1924846313561046, "grad_norm": 0.1987105756998062, "learning_rate": 1.0251767826091822e-05, "loss": 0.2988, "step": 13464 }, { "epoch": 2.1926474779139355, "grad_norm": 0.19376033544540405, "learning_rate": 1.0247943159591144e-05, "loss": 0.2932, "step": 13465 }, { "epoch": 2.1928103244717665, "grad_norm": 0.1636366993188858, "learning_rate": 1.0244119022733517e-05, "loss": 0.2797, "step": 13466 }, { "epoch": 2.1929731710295974, "grad_norm": 0.20030082762241364, "learning_rate": 1.0240295415656229e-05, "loss": 0.266, "step": 13467 }, { "epoch": 2.1931360175874284, "grad_norm": 0.13242140412330627, "learning_rate": 1.0236472338496552e-05, "loss": 0.2658, "step": 13468 }, { "epoch": 2.1932988641452593, "grad_norm": 0.1482185572385788, "learning_rate": 1.0232649791391746e-05, "loss": 0.2438, "step": 13469 }, { "epoch": 2.19346171070309, "grad_norm": 0.1685754805803299, "learning_rate": 1.0228827774479063e-05, "loss": 0.3029, "step": 13470 }, { "epoch": 2.1936245572609208, "grad_norm": 0.12458626180887222, "learning_rate": 1.0225006287895722e-05, "loss": 0.2338, "step": 13471 }, { "epoch": 2.1937874038187517, "grad_norm": 0.18032492697238922, "learning_rate": 1.0221185331778918e-05, "loss": 0.2752, "step": 13472 }, { "epoch": 2.1939502503765826, "grad_norm": 0.13489748537540436, "learning_rate": 1.0217364906265834e-05, "loss": 0.271, "step": 13473 }, { "epoch": 2.1941130969344136, "grad_norm": 0.18866871297359467, "learning_rate": 1.0213545011493647e-05, "loss": 0.2529, "step": 13474 }, { "epoch": 2.1942759434922445, "grad_norm": 0.16153590381145477, "learning_rate": 1.0209725647599494e-05, "loss": 0.2427, "step": 13475 }, { "epoch": 2.1944387900500755, "grad_norm": 0.18114830553531647, "learning_rate": 1.0205906814720495e-05, "loss": 0.2794, "step": 13476 }, { "epoch": 2.1946016366079064, "grad_norm": 0.21412663161754608, "learning_rate": 1.0202088512993777e-05, "loss": 0.3044, "step": 13477 }, { "epoch": 2.194764483165737, "grad_norm": 0.16111141443252563, "learning_rate": 1.0198270742556415e-05, "loss": 0.292, "step": 13478 }, { "epoch": 2.194927329723568, "grad_norm": 0.1673496961593628, "learning_rate": 1.0194453503545478e-05, "loss": 0.28, "step": 13479 }, { "epoch": 2.195090176281399, "grad_norm": 0.17410483956336975, "learning_rate": 1.0190636796098014e-05, "loss": 0.3152, "step": 13480 }, { "epoch": 2.1952530228392297, "grad_norm": 0.13646486401557922, "learning_rate": 1.0186820620351067e-05, "loss": 0.2409, "step": 13481 }, { "epoch": 2.1954158693970607, "grad_norm": 0.18740396201610565, "learning_rate": 1.018300497644164e-05, "loss": 0.2709, "step": 13482 }, { "epoch": 2.1955787159548916, "grad_norm": 0.17192943394184113, "learning_rate": 1.017918986450672e-05, "loss": 0.25, "step": 13483 }, { "epoch": 2.1957415625127226, "grad_norm": 0.19641970098018646, "learning_rate": 1.0175375284683295e-05, "loss": 0.2794, "step": 13484 }, { "epoch": 2.195904409070553, "grad_norm": 0.12591323256492615, "learning_rate": 1.0171561237108308e-05, "loss": 0.2811, "step": 13485 }, { "epoch": 2.196067255628384, "grad_norm": 0.17032846808433533, "learning_rate": 1.0167747721918704e-05, "loss": 0.3171, "step": 13486 }, { "epoch": 2.196230102186215, "grad_norm": 0.1824387162923813, "learning_rate": 1.0163934739251397e-05, "loss": 0.3, "step": 13487 }, { "epoch": 2.196392948744046, "grad_norm": 0.18845853209495544, "learning_rate": 1.0160122289243274e-05, "loss": 0.2891, "step": 13488 }, { "epoch": 2.196555795301877, "grad_norm": 0.13944146037101746, "learning_rate": 1.0156310372031233e-05, "loss": 0.2591, "step": 13489 }, { "epoch": 2.1967186418597078, "grad_norm": 0.1879035383462906, "learning_rate": 1.015249898775212e-05, "loss": 0.2912, "step": 13490 }, { "epoch": 2.1968814884175387, "grad_norm": 0.2094983458518982, "learning_rate": 1.0148688136542769e-05, "loss": 0.2773, "step": 13491 }, { "epoch": 2.1970443349753697, "grad_norm": 0.16106486320495605, "learning_rate": 1.0144877818540021e-05, "loss": 0.2884, "step": 13492 }, { "epoch": 2.1972071815332, "grad_norm": 0.1474021077156067, "learning_rate": 1.0141068033880663e-05, "loss": 0.3011, "step": 13493 }, { "epoch": 2.197370028091031, "grad_norm": 0.17799216508865356, "learning_rate": 1.0137258782701483e-05, "loss": 0.2775, "step": 13494 }, { "epoch": 2.197532874648862, "grad_norm": 0.14776939153671265, "learning_rate": 1.0133450065139236e-05, "loss": 0.287, "step": 13495 }, { "epoch": 2.197695721206693, "grad_norm": 0.15580004453659058, "learning_rate": 1.012964188133068e-05, "loss": 0.2511, "step": 13496 }, { "epoch": 2.197858567764524, "grad_norm": 0.25240853428840637, "learning_rate": 1.0125834231412532e-05, "loss": 0.3085, "step": 13497 }, { "epoch": 2.198021414322355, "grad_norm": 0.1599385142326355, "learning_rate": 1.012202711552149e-05, "loss": 0.3006, "step": 13498 }, { "epoch": 2.198184260880186, "grad_norm": 0.16834083199501038, "learning_rate": 1.0118220533794262e-05, "loss": 0.2669, "step": 13499 }, { "epoch": 2.1983471074380168, "grad_norm": 0.15752838551998138, "learning_rate": 1.0114414486367502e-05, "loss": 0.289, "step": 13500 }, { "epoch": 2.1985099539958473, "grad_norm": 0.14644569158554077, "learning_rate": 1.0110608973377858e-05, "loss": 0.2501, "step": 13501 }, { "epoch": 2.198672800553678, "grad_norm": 0.1805572211742401, "learning_rate": 1.0106803994961955e-05, "loss": 0.2664, "step": 13502 }, { "epoch": 2.198835647111509, "grad_norm": 0.17341582477092743, "learning_rate": 1.010299955125642e-05, "loss": 0.2641, "step": 13503 }, { "epoch": 2.19899849366934, "grad_norm": 0.18297961354255676, "learning_rate": 1.0099195642397832e-05, "loss": 0.2457, "step": 13504 }, { "epoch": 2.199161340227171, "grad_norm": 0.14958906173706055, "learning_rate": 1.0095392268522758e-05, "loss": 0.2218, "step": 13505 }, { "epoch": 2.199324186785002, "grad_norm": 0.14762979745864868, "learning_rate": 1.0091589429767765e-05, "loss": 0.2266, "step": 13506 }, { "epoch": 2.199487033342833, "grad_norm": 0.18726810812950134, "learning_rate": 1.0087787126269375e-05, "loss": 0.2863, "step": 13507 }, { "epoch": 2.1996498799006634, "grad_norm": 0.17488490045070648, "learning_rate": 1.0083985358164108e-05, "loss": 0.2842, "step": 13508 }, { "epoch": 2.1998127264584943, "grad_norm": 0.1334068477153778, "learning_rate": 1.0080184125588447e-05, "loss": 0.2351, "step": 13509 }, { "epoch": 2.1999755730163253, "grad_norm": 0.1580098271369934, "learning_rate": 1.0076383428678884e-05, "loss": 0.2585, "step": 13510 }, { "epoch": 2.2001384195741562, "grad_norm": 0.19181674718856812, "learning_rate": 1.0072583267571873e-05, "loss": 0.2466, "step": 13511 }, { "epoch": 2.200301266131987, "grad_norm": 0.26323726773262024, "learning_rate": 1.0068783642403842e-05, "loss": 0.298, "step": 13512 }, { "epoch": 2.200464112689818, "grad_norm": 0.16815641522407532, "learning_rate": 1.0064984553311205e-05, "loss": 0.2545, "step": 13513 }, { "epoch": 2.200626959247649, "grad_norm": 0.18484948575496674, "learning_rate": 1.0061186000430379e-05, "loss": 0.3067, "step": 13514 }, { "epoch": 2.2007898058054796, "grad_norm": 0.17727874219417572, "learning_rate": 1.0057387983897734e-05, "loss": 0.2747, "step": 13515 }, { "epoch": 2.2009526523633105, "grad_norm": 0.2038653939962387, "learning_rate": 1.005359050384962e-05, "loss": 0.2613, "step": 13516 }, { "epoch": 2.2011154989211414, "grad_norm": 0.14547836780548096, "learning_rate": 1.0049793560422396e-05, "loss": 0.2847, "step": 13517 }, { "epoch": 2.2012783454789724, "grad_norm": 0.1648719161748886, "learning_rate": 1.0045997153752376e-05, "loss": 0.2657, "step": 13518 }, { "epoch": 2.2014411920368033, "grad_norm": 0.186851367354393, "learning_rate": 1.004220128397586e-05, "loss": 0.2743, "step": 13519 }, { "epoch": 2.2016040385946343, "grad_norm": 0.15804696083068848, "learning_rate": 1.0038405951229127e-05, "loss": 0.2832, "step": 13520 }, { "epoch": 2.201766885152465, "grad_norm": 0.23411160707473755, "learning_rate": 1.0034611155648452e-05, "loss": 0.3026, "step": 13521 }, { "epoch": 2.201929731710296, "grad_norm": 0.16096672415733337, "learning_rate": 1.0030816897370073e-05, "loss": 0.2658, "step": 13522 }, { "epoch": 2.2020925782681267, "grad_norm": 0.1482253074645996, "learning_rate": 1.0027023176530211e-05, "loss": 0.2309, "step": 13523 }, { "epoch": 2.2022554248259576, "grad_norm": 0.16770854592323303, "learning_rate": 1.0023229993265085e-05, "loss": 0.2974, "step": 13524 }, { "epoch": 2.2024182713837885, "grad_norm": 0.15765494108200073, "learning_rate": 1.0019437347710864e-05, "loss": 0.2622, "step": 13525 }, { "epoch": 2.2025811179416195, "grad_norm": 0.23493100702762604, "learning_rate": 1.0015645240003735e-05, "loss": 0.2754, "step": 13526 }, { "epoch": 2.2027439644994504, "grad_norm": 0.17834371328353882, "learning_rate": 1.0011853670279836e-05, "loss": 0.291, "step": 13527 }, { "epoch": 2.2029068110572814, "grad_norm": 0.15063321590423584, "learning_rate": 1.000806263867529e-05, "loss": 0.2783, "step": 13528 }, { "epoch": 2.2030696576151123, "grad_norm": 0.16756665706634521, "learning_rate": 1.0004272145326215e-05, "loss": 0.2942, "step": 13529 }, { "epoch": 2.2032325041729433, "grad_norm": 0.16532796621322632, "learning_rate": 1.0000482190368704e-05, "loss": 0.2417, "step": 13530 }, { "epoch": 2.2033953507307737, "grad_norm": 0.1978028416633606, "learning_rate": 9.99669277393881e-06, "loss": 0.2349, "step": 13531 }, { "epoch": 2.2035581972886047, "grad_norm": 0.1885668933391571, "learning_rate": 9.992903896172607e-06, "loss": 0.2896, "step": 13532 }, { "epoch": 2.2037210438464356, "grad_norm": 0.14870135486125946, "learning_rate": 9.989115557206113e-06, "loss": 0.2852, "step": 13533 }, { "epoch": 2.2038838904042666, "grad_norm": 0.14752914011478424, "learning_rate": 9.985327757175347e-06, "loss": 0.2373, "step": 13534 }, { "epoch": 2.2040467369620975, "grad_norm": 0.25096777081489563, "learning_rate": 9.981540496216294e-06, "loss": 0.2928, "step": 13535 }, { "epoch": 2.2042095835199285, "grad_norm": 0.16668365895748138, "learning_rate": 9.977753774464937e-06, "loss": 0.2491, "step": 13536 }, { "epoch": 2.2043724300777594, "grad_norm": 0.19388063251972198, "learning_rate": 9.973967592057232e-06, "loss": 0.3099, "step": 13537 }, { "epoch": 2.20453527663559, "grad_norm": 0.17066718637943268, "learning_rate": 9.970181949129097e-06, "loss": 0.2614, "step": 13538 }, { "epoch": 2.204698123193421, "grad_norm": 0.13978338241577148, "learning_rate": 9.966396845816473e-06, "loss": 0.2833, "step": 13539 }, { "epoch": 2.204860969751252, "grad_norm": 0.15698084235191345, "learning_rate": 9.96261228225524e-06, "loss": 0.2834, "step": 13540 }, { "epoch": 2.2050238163090827, "grad_norm": 0.22200638055801392, "learning_rate": 9.958828258581282e-06, "loss": 0.2683, "step": 13541 }, { "epoch": 2.2051866628669137, "grad_norm": 0.1835116744041443, "learning_rate": 9.955044774930444e-06, "loss": 0.309, "step": 13542 }, { "epoch": 2.2053495094247446, "grad_norm": 0.14306913316249847, "learning_rate": 9.951261831438585e-06, "loss": 0.2781, "step": 13543 }, { "epoch": 2.2055123559825756, "grad_norm": 0.21643030643463135, "learning_rate": 9.947479428241513e-06, "loss": 0.2604, "step": 13544 }, { "epoch": 2.2056752025404065, "grad_norm": 0.2319948524236679, "learning_rate": 9.943697565475028e-06, "loss": 0.2903, "step": 13545 }, { "epoch": 2.205838049098237, "grad_norm": 0.16815580427646637, "learning_rate": 9.9399162432749e-06, "loss": 0.256, "step": 13546 }, { "epoch": 2.206000895656068, "grad_norm": 0.21466416120529175, "learning_rate": 9.93613546177691e-06, "loss": 0.2884, "step": 13547 }, { "epoch": 2.206163742213899, "grad_norm": 0.13526774942874908, "learning_rate": 9.932355221116787e-06, "loss": 0.2475, "step": 13548 }, { "epoch": 2.20632658877173, "grad_norm": 0.16666097939014435, "learning_rate": 9.92857552143025e-06, "loss": 0.2758, "step": 13549 }, { "epoch": 2.2064894353295608, "grad_norm": 0.13244600594043732, "learning_rate": 9.924796362853015e-06, "loss": 0.2844, "step": 13550 }, { "epoch": 2.2066522818873917, "grad_norm": 0.20166444778442383, "learning_rate": 9.921017745520758e-06, "loss": 0.2704, "step": 13551 }, { "epoch": 2.2068151284452227, "grad_norm": 0.18104015290737152, "learning_rate": 9.917239669569139e-06, "loss": 0.2998, "step": 13552 }, { "epoch": 2.2069779750030536, "grad_norm": 0.17547814548015594, "learning_rate": 9.913462135133797e-06, "loss": 0.2699, "step": 13553 }, { "epoch": 2.207140821560884, "grad_norm": 0.13983409106731415, "learning_rate": 9.909685142350375e-06, "loss": 0.2728, "step": 13554 }, { "epoch": 2.207303668118715, "grad_norm": 0.6024442911148071, "learning_rate": 9.905908691354468e-06, "loss": 0.31, "step": 13555 }, { "epoch": 2.207466514676546, "grad_norm": 0.18369007110595703, "learning_rate": 9.902132782281653e-06, "loss": 0.2661, "step": 13556 }, { "epoch": 2.207629361234377, "grad_norm": 0.17416013777256012, "learning_rate": 9.898357415267515e-06, "loss": 0.2915, "step": 13557 }, { "epoch": 2.207792207792208, "grad_norm": 0.15533889830112457, "learning_rate": 9.89458259044759e-06, "loss": 0.255, "step": 13558 }, { "epoch": 2.207955054350039, "grad_norm": 0.16576746106147766, "learning_rate": 9.890808307957409e-06, "loss": 0.2762, "step": 13559 }, { "epoch": 2.2081179009078697, "grad_norm": 0.16464924812316895, "learning_rate": 9.887034567932466e-06, "loss": 0.2972, "step": 13560 }, { "epoch": 2.2082807474657002, "grad_norm": 0.15579289197921753, "learning_rate": 9.883261370508273e-06, "loss": 0.2535, "step": 13561 }, { "epoch": 2.208443594023531, "grad_norm": 0.18618643283843994, "learning_rate": 9.87948871582028e-06, "loss": 0.2761, "step": 13562 }, { "epoch": 2.208606440581362, "grad_norm": 0.2334563136100769, "learning_rate": 9.875716604003951e-06, "loss": 0.2879, "step": 13563 }, { "epoch": 2.208769287139193, "grad_norm": 0.1854349821805954, "learning_rate": 9.87194503519471e-06, "loss": 0.3002, "step": 13564 }, { "epoch": 2.208932133697024, "grad_norm": 0.1679978370666504, "learning_rate": 9.868174009527959e-06, "loss": 0.2336, "step": 13565 }, { "epoch": 2.209094980254855, "grad_norm": 0.16049036383628845, "learning_rate": 9.864403527139105e-06, "loss": 0.2743, "step": 13566 }, { "epoch": 2.209257826812686, "grad_norm": 0.20048007369041443, "learning_rate": 9.860633588163504e-06, "loss": 0.3277, "step": 13567 }, { "epoch": 2.2094206733705164, "grad_norm": 0.39343827962875366, "learning_rate": 9.856864192736523e-06, "loss": 0.2543, "step": 13568 }, { "epoch": 2.2095835199283473, "grad_norm": 0.15971232950687408, "learning_rate": 9.853095340993488e-06, "loss": 0.2848, "step": 13569 }, { "epoch": 2.2097463664861783, "grad_norm": 0.14279396831989288, "learning_rate": 9.849327033069714e-06, "loss": 0.309, "step": 13570 }, { "epoch": 2.2099092130440092, "grad_norm": 0.16926799714565277, "learning_rate": 9.845559269100479e-06, "loss": 0.2515, "step": 13571 }, { "epoch": 2.21007205960184, "grad_norm": 0.21613743901252747, "learning_rate": 9.841792049221082e-06, "loss": 0.2787, "step": 13572 }, { "epoch": 2.210234906159671, "grad_norm": 0.14623408019542694, "learning_rate": 9.838025373566768e-06, "loss": 0.2466, "step": 13573 }, { "epoch": 2.210397752717502, "grad_norm": 0.17274118959903717, "learning_rate": 9.834259242272764e-06, "loss": 0.267, "step": 13574 }, { "epoch": 2.210560599275333, "grad_norm": 0.1853676736354828, "learning_rate": 9.830493655474286e-06, "loss": 0.2916, "step": 13575 }, { "epoch": 2.210723445833164, "grad_norm": 0.1559196412563324, "learning_rate": 9.826728613306544e-06, "loss": 0.2824, "step": 13576 }, { "epoch": 2.2108862923909944, "grad_norm": 0.17389126121997833, "learning_rate": 9.822964115904704e-06, "loss": 0.3235, "step": 13577 }, { "epoch": 2.2110491389488254, "grad_norm": 0.1671593338251114, "learning_rate": 9.819200163403913e-06, "loss": 0.2662, "step": 13578 }, { "epoch": 2.2112119855066563, "grad_norm": 0.2652946710586548, "learning_rate": 9.81543675593933e-06, "loss": 0.3194, "step": 13579 }, { "epoch": 2.2113748320644873, "grad_norm": 0.14097072184085846, "learning_rate": 9.811673893646062e-06, "loss": 0.29, "step": 13580 }, { "epoch": 2.211537678622318, "grad_norm": 0.15045605599880219, "learning_rate": 9.807911576659204e-06, "loss": 0.2381, "step": 13581 }, { "epoch": 2.211700525180149, "grad_norm": 0.15414734184741974, "learning_rate": 9.80414980511383e-06, "loss": 0.2695, "step": 13582 }, { "epoch": 2.21186337173798, "grad_norm": 0.16070260107517242, "learning_rate": 9.800388579145014e-06, "loss": 0.2491, "step": 13583 }, { "epoch": 2.2120262182958106, "grad_norm": 0.2123611867427826, "learning_rate": 9.796627898887788e-06, "loss": 0.2904, "step": 13584 }, { "epoch": 2.2121890648536415, "grad_norm": 0.17790380120277405, "learning_rate": 9.792867764477173e-06, "loss": 0.2613, "step": 13585 }, { "epoch": 2.2123519114114725, "grad_norm": 0.11988482624292374, "learning_rate": 9.789108176048156e-06, "loss": 0.2878, "step": 13586 }, { "epoch": 2.2125147579693034, "grad_norm": 0.1593429446220398, "learning_rate": 9.785349133735738e-06, "loss": 0.2647, "step": 13587 }, { "epoch": 2.2126776045271344, "grad_norm": 0.15475863218307495, "learning_rate": 9.78159063767487e-06, "loss": 0.219, "step": 13588 }, { "epoch": 2.2128404510849653, "grad_norm": 0.1759970784187317, "learning_rate": 9.777832688000484e-06, "loss": 0.3315, "step": 13589 }, { "epoch": 2.2130032976427962, "grad_norm": 0.17605601251125336, "learning_rate": 9.774075284847522e-06, "loss": 0.2864, "step": 13590 }, { "epoch": 2.2131661442006267, "grad_norm": 0.17292161285877228, "learning_rate": 9.770318428350875e-06, "loss": 0.2745, "step": 13591 }, { "epoch": 2.2133289907584577, "grad_norm": 0.15195833146572113, "learning_rate": 9.766562118645425e-06, "loss": 0.2948, "step": 13592 }, { "epoch": 2.2134918373162886, "grad_norm": 0.18723012506961823, "learning_rate": 9.762806355866028e-06, "loss": 0.3037, "step": 13593 }, { "epoch": 2.2136546838741196, "grad_norm": 0.1925869584083557, "learning_rate": 9.759051140147541e-06, "loss": 0.2871, "step": 13594 }, { "epoch": 2.2138175304319505, "grad_norm": 0.15753024816513062, "learning_rate": 9.755296471624784e-06, "loss": 0.2765, "step": 13595 }, { "epoch": 2.2139803769897815, "grad_norm": 0.1657925844192505, "learning_rate": 9.751542350432555e-06, "loss": 0.2903, "step": 13596 }, { "epoch": 2.2141432235476124, "grad_norm": 0.17406319081783295, "learning_rate": 9.747788776705635e-06, "loss": 0.2596, "step": 13597 }, { "epoch": 2.2143060701054433, "grad_norm": 0.18805086612701416, "learning_rate": 9.744035750578803e-06, "loss": 0.2904, "step": 13598 }, { "epoch": 2.214468916663274, "grad_norm": 0.1699061095714569, "learning_rate": 9.740283272186788e-06, "loss": 0.2816, "step": 13599 }, { "epoch": 2.2146317632211048, "grad_norm": 0.1744246780872345, "learning_rate": 9.73653134166433e-06, "loss": 0.2896, "step": 13600 }, { "epoch": 2.2147946097789357, "grad_norm": 0.1706417053937912, "learning_rate": 9.732779959146127e-06, "loss": 0.2647, "step": 13601 }, { "epoch": 2.2149574563367667, "grad_norm": 0.19850367307662964, "learning_rate": 9.729029124766856e-06, "loss": 0.2759, "step": 13602 }, { "epoch": 2.2151203028945976, "grad_norm": 0.15634292364120483, "learning_rate": 9.725278838661204e-06, "loss": 0.2503, "step": 13603 }, { "epoch": 2.2152831494524285, "grad_norm": 0.16868890821933746, "learning_rate": 9.721529100963794e-06, "loss": 0.3003, "step": 13604 }, { "epoch": 2.2154459960102595, "grad_norm": 0.16974875330924988, "learning_rate": 9.717779911809275e-06, "loss": 0.2642, "step": 13605 }, { "epoch": 2.2156088425680904, "grad_norm": 0.17020611464977264, "learning_rate": 9.714031271332245e-06, "loss": 0.2613, "step": 13606 }, { "epoch": 2.215771689125921, "grad_norm": 0.21328428387641907, "learning_rate": 9.710283179667278e-06, "loss": 0.2779, "step": 13607 }, { "epoch": 2.215934535683752, "grad_norm": 0.16626395285129547, "learning_rate": 9.706535636948966e-06, "loss": 0.2614, "step": 13608 }, { "epoch": 2.216097382241583, "grad_norm": 0.17574864625930786, "learning_rate": 9.702788643311844e-06, "loss": 0.2613, "step": 13609 }, { "epoch": 2.2162602287994138, "grad_norm": 0.1793527454137802, "learning_rate": 9.699042198890442e-06, "loss": 0.278, "step": 13610 }, { "epoch": 2.2164230753572447, "grad_norm": 0.15945610404014587, "learning_rate": 9.69529630381926e-06, "loss": 0.2512, "step": 13611 }, { "epoch": 2.2165859219150756, "grad_norm": 0.15114770829677582, "learning_rate": 9.6915509582328e-06, "loss": 0.2806, "step": 13612 }, { "epoch": 2.2167487684729066, "grad_norm": 0.21290430426597595, "learning_rate": 9.687806162265531e-06, "loss": 0.2739, "step": 13613 }, { "epoch": 2.216911615030737, "grad_norm": 0.2049230933189392, "learning_rate": 9.684061916051895e-06, "loss": 0.2486, "step": 13614 }, { "epoch": 2.217074461588568, "grad_norm": 0.15314115583896637, "learning_rate": 9.680318219726315e-06, "loss": 0.2699, "step": 13615 }, { "epoch": 2.217237308146399, "grad_norm": 0.23384377360343933, "learning_rate": 9.67657507342322e-06, "loss": 0.3121, "step": 13616 }, { "epoch": 2.21740015470423, "grad_norm": 0.15038655698299408, "learning_rate": 9.672832477276986e-06, "loss": 0.2542, "step": 13617 }, { "epoch": 2.217563001262061, "grad_norm": 0.18875683844089508, "learning_rate": 9.66909043142198e-06, "loss": 0.2649, "step": 13618 }, { "epoch": 2.217725847819892, "grad_norm": 0.1680394411087036, "learning_rate": 9.66534893599257e-06, "loss": 0.2964, "step": 13619 }, { "epoch": 2.2178886943777227, "grad_norm": 0.15360166132450104, "learning_rate": 9.66160799112308e-06, "loss": 0.296, "step": 13620 }, { "epoch": 2.2180515409355537, "grad_norm": 0.17169035971164703, "learning_rate": 9.657867596947814e-06, "loss": 0.2807, "step": 13621 }, { "epoch": 2.218214387493384, "grad_norm": 0.14236553013324738, "learning_rate": 9.654127753601061e-06, "loss": 0.2558, "step": 13622 }, { "epoch": 2.218377234051215, "grad_norm": 0.17626996338367462, "learning_rate": 9.650388461217108e-06, "loss": 0.2845, "step": 13623 }, { "epoch": 2.218540080609046, "grad_norm": 0.16233676671981812, "learning_rate": 9.646649719930198e-06, "loss": 0.2916, "step": 13624 }, { "epoch": 2.218702927166877, "grad_norm": 0.2086867392063141, "learning_rate": 9.642911529874563e-06, "loss": 0.2776, "step": 13625 }, { "epoch": 2.218865773724708, "grad_norm": 0.15245181322097778, "learning_rate": 9.639173891184403e-06, "loss": 0.2632, "step": 13626 }, { "epoch": 2.219028620282539, "grad_norm": 0.20998133718967438, "learning_rate": 9.635436803993935e-06, "loss": 0.3065, "step": 13627 }, { "epoch": 2.21919146684037, "grad_norm": 0.17969577014446259, "learning_rate": 9.631700268437319e-06, "loss": 0.2875, "step": 13628 }, { "epoch": 2.2193543133982008, "grad_norm": 0.17870193719863892, "learning_rate": 9.627964284648696e-06, "loss": 0.2962, "step": 13629 }, { "epoch": 2.2195171599560313, "grad_norm": 0.15517187118530273, "learning_rate": 9.624228852762224e-06, "loss": 0.2769, "step": 13630 }, { "epoch": 2.219680006513862, "grad_norm": 0.17627187073230743, "learning_rate": 9.620493972912e-06, "loss": 0.2545, "step": 13631 }, { "epoch": 2.219842853071693, "grad_norm": 0.13205580413341522, "learning_rate": 9.61675964523212e-06, "loss": 0.2676, "step": 13632 }, { "epoch": 2.220005699629524, "grad_norm": 0.179857075214386, "learning_rate": 9.613025869856654e-06, "loss": 0.2863, "step": 13633 }, { "epoch": 2.220168546187355, "grad_norm": 0.20175081491470337, "learning_rate": 9.609292646919665e-06, "loss": 0.2841, "step": 13634 }, { "epoch": 2.220331392745186, "grad_norm": 0.17093312740325928, "learning_rate": 9.60555997655518e-06, "loss": 0.2477, "step": 13635 }, { "epoch": 2.220494239303017, "grad_norm": 0.2642720341682434, "learning_rate": 9.601827858897211e-06, "loss": 0.282, "step": 13636 }, { "epoch": 2.2206570858608474, "grad_norm": 0.2086404711008072, "learning_rate": 9.59809629407976e-06, "loss": 0.2818, "step": 13637 }, { "epoch": 2.2208199324186784, "grad_norm": 0.13682308793067932, "learning_rate": 9.594365282236803e-06, "loss": 0.2583, "step": 13638 }, { "epoch": 2.2209827789765093, "grad_norm": 0.16360636055469513, "learning_rate": 9.590634823502276e-06, "loss": 0.3087, "step": 13639 }, { "epoch": 2.2211456255343403, "grad_norm": 0.1866348534822464, "learning_rate": 9.586904918010136e-06, "loss": 0.2857, "step": 13640 }, { "epoch": 2.221308472092171, "grad_norm": 0.19208014011383057, "learning_rate": 9.583175565894289e-06, "loss": 0.2878, "step": 13641 }, { "epoch": 2.221471318650002, "grad_norm": 0.1713215559720993, "learning_rate": 9.57944676728862e-06, "loss": 0.2773, "step": 13642 }, { "epoch": 2.221634165207833, "grad_norm": 0.20634664595127106, "learning_rate": 9.57571852232702e-06, "loss": 0.2795, "step": 13643 }, { "epoch": 2.2217970117656636, "grad_norm": 0.1792323738336563, "learning_rate": 9.571990831143333e-06, "loss": 0.2751, "step": 13644 }, { "epoch": 2.2219598583234945, "grad_norm": 0.16969488561153412, "learning_rate": 9.568263693871404e-06, "loss": 0.2827, "step": 13645 }, { "epoch": 2.2221227048813255, "grad_norm": 0.15701043605804443, "learning_rate": 9.564537110645045e-06, "loss": 0.2862, "step": 13646 }, { "epoch": 2.2222855514391564, "grad_norm": 0.12748503684997559, "learning_rate": 9.560811081598048e-06, "loss": 0.2731, "step": 13647 }, { "epoch": 2.2224483979969873, "grad_norm": 0.14769962430000305, "learning_rate": 9.557085606864183e-06, "loss": 0.253, "step": 13648 }, { "epoch": 2.2226112445548183, "grad_norm": 0.20759648084640503, "learning_rate": 9.55336068657722e-06, "loss": 0.2763, "step": 13649 }, { "epoch": 2.2227740911126492, "grad_norm": 0.15755265951156616, "learning_rate": 9.54963632087089e-06, "loss": 0.2625, "step": 13650 }, { "epoch": 2.22293693767048, "grad_norm": 0.1782371997833252, "learning_rate": 9.545912509878897e-06, "loss": 0.2714, "step": 13651 }, { "epoch": 2.2230997842283107, "grad_norm": 0.15957024693489075, "learning_rate": 9.542189253734953e-06, "loss": 0.2644, "step": 13652 }, { "epoch": 2.2232626307861416, "grad_norm": 0.22074294090270996, "learning_rate": 9.538466552572732e-06, "loss": 0.2746, "step": 13653 }, { "epoch": 2.2234254773439726, "grad_norm": 0.1875056028366089, "learning_rate": 9.534744406525881e-06, "loss": 0.2758, "step": 13654 }, { "epoch": 2.2235883239018035, "grad_norm": 0.1409618854522705, "learning_rate": 9.531022815728035e-06, "loss": 0.2492, "step": 13655 }, { "epoch": 2.2237511704596344, "grad_norm": 0.16818949580192566, "learning_rate": 9.527301780312825e-06, "loss": 0.2569, "step": 13656 }, { "epoch": 2.2239140170174654, "grad_norm": 0.16572076082229614, "learning_rate": 9.523581300413836e-06, "loss": 0.2586, "step": 13657 }, { "epoch": 2.2240768635752963, "grad_norm": 0.2213190346956253, "learning_rate": 9.51986137616464e-06, "loss": 0.2677, "step": 13658 }, { "epoch": 2.2242397101331273, "grad_norm": 0.22825385630130768, "learning_rate": 9.516142007698811e-06, "loss": 0.2923, "step": 13659 }, { "epoch": 2.2244025566909578, "grad_norm": 0.20441125333309174, "learning_rate": 9.512423195149872e-06, "loss": 0.2886, "step": 13660 }, { "epoch": 2.2245654032487887, "grad_norm": 0.15132653713226318, "learning_rate": 9.508704938651345e-06, "loss": 0.2333, "step": 13661 }, { "epoch": 2.2247282498066197, "grad_norm": 0.17143145203590393, "learning_rate": 9.504987238336714e-06, "loss": 0.2537, "step": 13662 }, { "epoch": 2.2248910963644506, "grad_norm": 0.1901645064353943, "learning_rate": 9.501270094339473e-06, "loss": 0.2729, "step": 13663 }, { "epoch": 2.2250539429222815, "grad_norm": 0.22013463079929352, "learning_rate": 9.497553506793069e-06, "loss": 0.2784, "step": 13664 }, { "epoch": 2.2252167894801125, "grad_norm": 0.1960667222738266, "learning_rate": 9.493837475830945e-06, "loss": 0.2826, "step": 13665 }, { "epoch": 2.2253796360379434, "grad_norm": 0.18626992404460907, "learning_rate": 9.490122001586502e-06, "loss": 0.2818, "step": 13666 }, { "epoch": 2.225542482595774, "grad_norm": 0.1598718911409378, "learning_rate": 9.486407084193158e-06, "loss": 0.2646, "step": 13667 }, { "epoch": 2.225705329153605, "grad_norm": 0.207841694355011, "learning_rate": 9.482692723784278e-06, "loss": 0.3036, "step": 13668 }, { "epoch": 2.225868175711436, "grad_norm": 0.21637801826000214, "learning_rate": 9.478978920493215e-06, "loss": 0.2777, "step": 13669 }, { "epoch": 2.2260310222692667, "grad_norm": 0.18916764855384827, "learning_rate": 9.475265674453318e-06, "loss": 0.2611, "step": 13670 }, { "epoch": 2.2261938688270977, "grad_norm": 0.16291651129722595, "learning_rate": 9.471552985797896e-06, "loss": 0.2599, "step": 13671 }, { "epoch": 2.2263567153849286, "grad_norm": 0.18161354959011078, "learning_rate": 9.467840854660246e-06, "loss": 0.283, "step": 13672 }, { "epoch": 2.2265195619427596, "grad_norm": 0.17883561551570892, "learning_rate": 9.464129281173636e-06, "loss": 0.2463, "step": 13673 }, { "epoch": 2.2266824085005905, "grad_norm": 0.18672288954257965, "learning_rate": 9.460418265471341e-06, "loss": 0.297, "step": 13674 }, { "epoch": 2.226845255058421, "grad_norm": 0.20057761669158936, "learning_rate": 9.456707807686588e-06, "loss": 0.2676, "step": 13675 }, { "epoch": 2.227008101616252, "grad_norm": 0.17399151623249054, "learning_rate": 9.452997907952584e-06, "loss": 0.2907, "step": 13676 }, { "epoch": 2.227170948174083, "grad_norm": 0.2076089084148407, "learning_rate": 9.449288566402544e-06, "loss": 0.2918, "step": 13677 }, { "epoch": 2.227333794731914, "grad_norm": 0.15406163036823273, "learning_rate": 9.445579783169637e-06, "loss": 0.259, "step": 13678 }, { "epoch": 2.227496641289745, "grad_norm": 0.14608429372310638, "learning_rate": 9.441871558387008e-06, "loss": 0.2776, "step": 13679 }, { "epoch": 2.2276594878475757, "grad_norm": 0.16159969568252563, "learning_rate": 9.438163892187816e-06, "loss": 0.2747, "step": 13680 }, { "epoch": 2.2278223344054067, "grad_norm": 0.17409047484397888, "learning_rate": 9.434456784705152e-06, "loss": 0.2663, "step": 13681 }, { "epoch": 2.2279851809632376, "grad_norm": 0.18633025884628296, "learning_rate": 9.430750236072134e-06, "loss": 0.2438, "step": 13682 }, { "epoch": 2.228148027521068, "grad_norm": 0.2778683006763458, "learning_rate": 9.427044246421829e-06, "loss": 0.314, "step": 13683 }, { "epoch": 2.228310874078899, "grad_norm": 0.15560166537761688, "learning_rate": 9.423338815887287e-06, "loss": 0.2837, "step": 13684 }, { "epoch": 2.22847372063673, "grad_norm": 0.21511505544185638, "learning_rate": 9.419633944601558e-06, "loss": 0.2603, "step": 13685 }, { "epoch": 2.228636567194561, "grad_norm": 0.19014191627502441, "learning_rate": 9.415929632697648e-06, "loss": 0.2914, "step": 13686 }, { "epoch": 2.228799413752392, "grad_norm": 0.1982765942811966, "learning_rate": 9.412225880308558e-06, "loss": 0.274, "step": 13687 }, { "epoch": 2.228962260310223, "grad_norm": 0.15103770792484283, "learning_rate": 9.408522687567253e-06, "loss": 0.2707, "step": 13688 }, { "epoch": 2.2291251068680538, "grad_norm": 0.16466562449932098, "learning_rate": 9.404820054606703e-06, "loss": 0.2653, "step": 13689 }, { "epoch": 2.2292879534258843, "grad_norm": 0.22418661415576935, "learning_rate": 9.40111798155984e-06, "loss": 0.2737, "step": 13690 }, { "epoch": 2.229450799983715, "grad_norm": 0.21124249696731567, "learning_rate": 9.397416468559566e-06, "loss": 0.2562, "step": 13691 }, { "epoch": 2.229613646541546, "grad_norm": 0.20244814455509186, "learning_rate": 9.393715515738799e-06, "loss": 0.3094, "step": 13692 }, { "epoch": 2.229776493099377, "grad_norm": 0.19220110774040222, "learning_rate": 9.3900151232304e-06, "loss": 0.2532, "step": 13693 }, { "epoch": 2.229939339657208, "grad_norm": 0.16631464660167694, "learning_rate": 9.386315291167228e-06, "loss": 0.3112, "step": 13694 }, { "epoch": 2.230102186215039, "grad_norm": 0.18027199804782867, "learning_rate": 9.382616019682108e-06, "loss": 0.303, "step": 13695 }, { "epoch": 2.23026503277287, "grad_norm": 0.15798614919185638, "learning_rate": 9.378917308907872e-06, "loss": 0.2778, "step": 13696 }, { "epoch": 2.2304278793307004, "grad_norm": 0.10942501574754715, "learning_rate": 9.375219158977305e-06, "loss": 0.2929, "step": 13697 }, { "epoch": 2.2305907258885314, "grad_norm": 0.16015173494815826, "learning_rate": 9.371521570023186e-06, "loss": 0.285, "step": 13698 }, { "epoch": 2.2307535724463623, "grad_norm": 0.1458483338356018, "learning_rate": 9.367824542178259e-06, "loss": 0.2571, "step": 13699 }, { "epoch": 2.2309164190041932, "grad_norm": 0.19059965014457703, "learning_rate": 9.364128075575274e-06, "loss": 0.2427, "step": 13700 }, { "epoch": 2.231079265562024, "grad_norm": 0.14220668375492096, "learning_rate": 9.360432170346937e-06, "loss": 0.2817, "step": 13701 }, { "epoch": 2.231242112119855, "grad_norm": 0.14029882848262787, "learning_rate": 9.356736826625931e-06, "loss": 0.3026, "step": 13702 }, { "epoch": 2.231404958677686, "grad_norm": 0.15538068115711212, "learning_rate": 9.353042044544954e-06, "loss": 0.3041, "step": 13703 }, { "epoch": 2.231567805235517, "grad_norm": 0.16410505771636963, "learning_rate": 9.349347824236645e-06, "loss": 0.2601, "step": 13704 }, { "epoch": 2.231730651793348, "grad_norm": 0.18903791904449463, "learning_rate": 9.345654165833639e-06, "loss": 0.3042, "step": 13705 }, { "epoch": 2.2318934983511785, "grad_norm": 0.2014460265636444, "learning_rate": 9.341961069468545e-06, "loss": 0.2518, "step": 13706 }, { "epoch": 2.2320563449090094, "grad_norm": 0.17375457286834717, "learning_rate": 9.338268535273968e-06, "loss": 0.2636, "step": 13707 }, { "epoch": 2.2322191914668403, "grad_norm": 0.14647702872753143, "learning_rate": 9.334576563382474e-06, "loss": 0.2415, "step": 13708 }, { "epoch": 2.2323820380246713, "grad_norm": 0.1701553761959076, "learning_rate": 9.330885153926608e-06, "loss": 0.2804, "step": 13709 }, { "epoch": 2.232544884582502, "grad_norm": 0.1411495953798294, "learning_rate": 9.32719430703892e-06, "loss": 0.2973, "step": 13710 }, { "epoch": 2.232707731140333, "grad_norm": 0.13704246282577515, "learning_rate": 9.323504022851911e-06, "loss": 0.2605, "step": 13711 }, { "epoch": 2.232870577698164, "grad_norm": 0.133106529712677, "learning_rate": 9.319814301498078e-06, "loss": 0.2624, "step": 13712 }, { "epoch": 2.2330334242559946, "grad_norm": 0.1525106132030487, "learning_rate": 9.31612514310988e-06, "loss": 0.3089, "step": 13713 }, { "epoch": 2.2331962708138255, "grad_norm": 0.18942660093307495, "learning_rate": 9.31243654781979e-06, "loss": 0.2693, "step": 13714 }, { "epoch": 2.2333591173716565, "grad_norm": 0.2093682438135147, "learning_rate": 9.30874851576023e-06, "loss": 0.3097, "step": 13715 }, { "epoch": 2.2335219639294874, "grad_norm": 0.17074072360992432, "learning_rate": 9.305061047063599e-06, "loss": 0.2705, "step": 13716 }, { "epoch": 2.2336848104873184, "grad_norm": 0.20548833906650543, "learning_rate": 9.301374141862309e-06, "loss": 0.3127, "step": 13717 }, { "epoch": 2.2338476570451493, "grad_norm": 0.1771468222141266, "learning_rate": 9.297687800288716e-06, "loss": 0.2587, "step": 13718 }, { "epoch": 2.2340105036029803, "grad_norm": 0.17217989265918732, "learning_rate": 9.294002022475182e-06, "loss": 0.2713, "step": 13719 }, { "epoch": 2.2341733501608108, "grad_norm": 0.15155577659606934, "learning_rate": 9.290316808554031e-06, "loss": 0.2476, "step": 13720 }, { "epoch": 2.2343361967186417, "grad_norm": 0.22832217812538147, "learning_rate": 9.286632158657568e-06, "loss": 0.2767, "step": 13721 }, { "epoch": 2.2344990432764726, "grad_norm": 0.14676061272621155, "learning_rate": 9.282948072918097e-06, "loss": 0.3017, "step": 13722 }, { "epoch": 2.2346618898343036, "grad_norm": 0.17122669517993927, "learning_rate": 9.279264551467879e-06, "loss": 0.2601, "step": 13723 }, { "epoch": 2.2348247363921345, "grad_norm": 0.14642173051834106, "learning_rate": 9.275581594439157e-06, "loss": 0.2766, "step": 13724 }, { "epoch": 2.2349875829499655, "grad_norm": 0.2194902002811432, "learning_rate": 9.271899201964173e-06, "loss": 0.3011, "step": 13725 }, { "epoch": 2.2351504295077964, "grad_norm": 0.15349052846431732, "learning_rate": 9.268217374175134e-06, "loss": 0.2402, "step": 13726 }, { "epoch": 2.2353132760656274, "grad_norm": 0.17160075902938843, "learning_rate": 9.264536111204222e-06, "loss": 0.2728, "step": 13727 }, { "epoch": 2.235476122623458, "grad_norm": 0.22815540432929993, "learning_rate": 9.2608554131836e-06, "loss": 0.2891, "step": 13728 }, { "epoch": 2.235638969181289, "grad_norm": 0.176189124584198, "learning_rate": 9.257175280245436e-06, "loss": 0.3003, "step": 13729 }, { "epoch": 2.2358018157391197, "grad_norm": 0.23851414024829865, "learning_rate": 9.253495712521842e-06, "loss": 0.3051, "step": 13730 }, { "epoch": 2.2359646622969507, "grad_norm": 0.15509070456027985, "learning_rate": 9.249816710144923e-06, "loss": 0.2713, "step": 13731 }, { "epoch": 2.2361275088547816, "grad_norm": 0.19145233929157257, "learning_rate": 9.246138273246783e-06, "loss": 0.2766, "step": 13732 }, { "epoch": 2.2362903554126126, "grad_norm": 0.17023752629756927, "learning_rate": 9.242460401959477e-06, "loss": 0.2763, "step": 13733 }, { "epoch": 2.2364532019704435, "grad_norm": 0.18870101869106293, "learning_rate": 9.238783096415054e-06, "loss": 0.2833, "step": 13734 }, { "epoch": 2.2366160485282744, "grad_norm": 0.15166358649730682, "learning_rate": 9.235106356745533e-06, "loss": 0.2831, "step": 13735 }, { "epoch": 2.236778895086105, "grad_norm": 0.15043826401233673, "learning_rate": 9.23143018308293e-06, "loss": 0.2679, "step": 13736 }, { "epoch": 2.236941741643936, "grad_norm": 0.14875511825084686, "learning_rate": 9.227754575559232e-06, "loss": 0.2783, "step": 13737 }, { "epoch": 2.237104588201767, "grad_norm": 0.17351855337619781, "learning_rate": 9.224079534306399e-06, "loss": 0.2838, "step": 13738 }, { "epoch": 2.2372674347595978, "grad_norm": 0.14477629959583282, "learning_rate": 9.220405059456366e-06, "loss": 0.2606, "step": 13739 }, { "epoch": 2.2374302813174287, "grad_norm": 0.1730792373418808, "learning_rate": 9.21673115114108e-06, "loss": 0.2594, "step": 13740 }, { "epoch": 2.2375931278752597, "grad_norm": 0.21076150238513947, "learning_rate": 9.21305780949243e-06, "loss": 0.2599, "step": 13741 }, { "epoch": 2.2377559744330906, "grad_norm": 0.1416325867176056, "learning_rate": 9.209385034642298e-06, "loss": 0.2991, "step": 13742 }, { "epoch": 2.237918820990921, "grad_norm": 0.17606763541698456, "learning_rate": 9.20571282672256e-06, "loss": 0.2943, "step": 13743 }, { "epoch": 2.238081667548752, "grad_norm": 0.19569896161556244, "learning_rate": 9.202041185865054e-06, "loss": 0.2824, "step": 13744 }, { "epoch": 2.238244514106583, "grad_norm": 0.21606740355491638, "learning_rate": 9.198370112201602e-06, "loss": 0.287, "step": 13745 }, { "epoch": 2.238407360664414, "grad_norm": 0.22329162061214447, "learning_rate": 9.194699605863993e-06, "loss": 0.296, "step": 13746 }, { "epoch": 2.238570207222245, "grad_norm": 0.1856454461812973, "learning_rate": 9.191029666984036e-06, "loss": 0.3202, "step": 13747 }, { "epoch": 2.238733053780076, "grad_norm": 0.14377999305725098, "learning_rate": 9.187360295693476e-06, "loss": 0.2434, "step": 13748 }, { "epoch": 2.2388959003379068, "grad_norm": 0.14854471385478973, "learning_rate": 9.183691492124055e-06, "loss": 0.2504, "step": 13749 }, { "epoch": 2.2390587468957377, "grad_norm": 0.15995529294013977, "learning_rate": 9.180023256407491e-06, "loss": 0.2358, "step": 13750 }, { "epoch": 2.239221593453568, "grad_norm": 0.18136122822761536, "learning_rate": 9.176355588675497e-06, "loss": 0.2984, "step": 13751 }, { "epoch": 2.239384440011399, "grad_norm": 0.1424059420824051, "learning_rate": 9.17268848905975e-06, "loss": 0.2583, "step": 13752 }, { "epoch": 2.23954728656923, "grad_norm": 0.13839231431484222, "learning_rate": 9.169021957691895e-06, "loss": 0.2514, "step": 13753 }, { "epoch": 2.239710133127061, "grad_norm": 0.18859036266803741, "learning_rate": 9.16535599470359e-06, "loss": 0.312, "step": 13754 }, { "epoch": 2.239872979684892, "grad_norm": 0.22079485654830933, "learning_rate": 9.161690600226439e-06, "loss": 0.2783, "step": 13755 }, { "epoch": 2.240035826242723, "grad_norm": 0.24757881462574005, "learning_rate": 9.158025774392057e-06, "loss": 0.2873, "step": 13756 }, { "epoch": 2.240198672800554, "grad_norm": 0.1980895698070526, "learning_rate": 9.154361517332013e-06, "loss": 0.2694, "step": 13757 }, { "epoch": 2.240361519358385, "grad_norm": 0.16025429964065552, "learning_rate": 9.150697829177857e-06, "loss": 0.242, "step": 13758 }, { "epoch": 2.2405243659162153, "grad_norm": 0.20166973769664764, "learning_rate": 9.147034710061145e-06, "loss": 0.2616, "step": 13759 }, { "epoch": 2.2406872124740462, "grad_norm": 0.19947662949562073, "learning_rate": 9.14337216011338e-06, "loss": 0.2676, "step": 13760 }, { "epoch": 2.240850059031877, "grad_norm": 0.15987281501293182, "learning_rate": 9.139710179466057e-06, "loss": 0.254, "step": 13761 }, { "epoch": 2.241012905589708, "grad_norm": 0.160439133644104, "learning_rate": 9.136048768250665e-06, "loss": 0.2754, "step": 13762 }, { "epoch": 2.241175752147539, "grad_norm": 0.1781858205795288, "learning_rate": 9.13238792659865e-06, "loss": 0.2593, "step": 13763 }, { "epoch": 2.24133859870537, "grad_norm": 0.13694077730178833, "learning_rate": 9.128727654641441e-06, "loss": 0.2486, "step": 13764 }, { "epoch": 2.241501445263201, "grad_norm": 0.18338467180728912, "learning_rate": 9.125067952510472e-06, "loss": 0.2597, "step": 13765 }, { "epoch": 2.2416642918210314, "grad_norm": 0.15723638236522675, "learning_rate": 9.121408820337124e-06, "loss": 0.2975, "step": 13766 }, { "epoch": 2.2418271383788624, "grad_norm": 0.15969328582286835, "learning_rate": 9.117750258252774e-06, "loss": 0.2901, "step": 13767 }, { "epoch": 2.2419899849366933, "grad_norm": 0.17491397261619568, "learning_rate": 9.114092266388766e-06, "loss": 0.2693, "step": 13768 }, { "epoch": 2.2421528314945243, "grad_norm": 0.17792415618896484, "learning_rate": 9.110434844876447e-06, "loss": 0.3134, "step": 13769 }, { "epoch": 2.242315678052355, "grad_norm": 0.1535695195198059, "learning_rate": 9.106777993847126e-06, "loss": 0.2804, "step": 13770 }, { "epoch": 2.242478524610186, "grad_norm": 0.1731785088777542, "learning_rate": 9.103121713432084e-06, "loss": 0.2865, "step": 13771 }, { "epoch": 2.242641371168017, "grad_norm": 0.16560199856758118, "learning_rate": 9.099466003762609e-06, "loss": 0.2648, "step": 13772 }, { "epoch": 2.2428042177258476, "grad_norm": 0.17348062992095947, "learning_rate": 9.095810864969941e-06, "loss": 0.2865, "step": 13773 }, { "epoch": 2.2429670642836785, "grad_norm": 0.16534529626369476, "learning_rate": 9.092156297185315e-06, "loss": 0.2644, "step": 13774 }, { "epoch": 2.2431299108415095, "grad_norm": 0.14778369665145874, "learning_rate": 9.08850230053993e-06, "loss": 0.2612, "step": 13775 }, { "epoch": 2.2432927573993404, "grad_norm": 0.1363227218389511, "learning_rate": 9.084848875164995e-06, "loss": 0.2499, "step": 13776 }, { "epoch": 2.2434556039571714, "grad_norm": 0.19477802515029907, "learning_rate": 9.081196021191666e-06, "loss": 0.2988, "step": 13777 }, { "epoch": 2.2436184505150023, "grad_norm": 0.1843681037425995, "learning_rate": 9.077543738751096e-06, "loss": 0.2712, "step": 13778 }, { "epoch": 2.2437812970728332, "grad_norm": 0.15312612056732178, "learning_rate": 9.073892027974401e-06, "loss": 0.2778, "step": 13779 }, { "epoch": 2.243944143630664, "grad_norm": 0.14595885574817657, "learning_rate": 9.070240888992706e-06, "loss": 0.2515, "step": 13780 }, { "epoch": 2.2441069901884947, "grad_norm": 0.1699971705675125, "learning_rate": 9.06659032193709e-06, "loss": 0.2471, "step": 13781 }, { "epoch": 2.2442698367463256, "grad_norm": 0.18018849194049835, "learning_rate": 9.062940326938612e-06, "loss": 0.2921, "step": 13782 }, { "epoch": 2.2444326833041566, "grad_norm": 0.1859099268913269, "learning_rate": 9.059290904128334e-06, "loss": 0.2485, "step": 13783 }, { "epoch": 2.2445955298619875, "grad_norm": 0.14556002616882324, "learning_rate": 9.05564205363727e-06, "loss": 0.2596, "step": 13784 }, { "epoch": 2.2447583764198185, "grad_norm": 0.12559650838375092, "learning_rate": 9.05199377559643e-06, "loss": 0.2456, "step": 13785 }, { "epoch": 2.2449212229776494, "grad_norm": 0.18939216434955597, "learning_rate": 9.048346070136785e-06, "loss": 0.2636, "step": 13786 }, { "epoch": 2.2450840695354803, "grad_norm": 0.20559431612491608, "learning_rate": 9.044698937389316e-06, "loss": 0.2775, "step": 13787 }, { "epoch": 2.2452469160933113, "grad_norm": 0.14683276414871216, "learning_rate": 9.041052377484957e-06, "loss": 0.2675, "step": 13788 }, { "epoch": 2.245409762651142, "grad_norm": 0.16811972856521606, "learning_rate": 9.037406390554634e-06, "loss": 0.274, "step": 13789 }, { "epoch": 2.2455726092089727, "grad_norm": 0.17205089330673218, "learning_rate": 9.033760976729238e-06, "loss": 0.2727, "step": 13790 }, { "epoch": 2.2457354557668037, "grad_norm": 0.1379205584526062, "learning_rate": 9.03011613613967e-06, "loss": 0.2583, "step": 13791 }, { "epoch": 2.2458983023246346, "grad_norm": 0.16203683614730835, "learning_rate": 9.026471868916775e-06, "loss": 0.2969, "step": 13792 }, { "epoch": 2.2460611488824656, "grad_norm": 0.17392021417617798, "learning_rate": 9.022828175191392e-06, "loss": 0.2567, "step": 13793 }, { "epoch": 2.2462239954402965, "grad_norm": 0.14115947484970093, "learning_rate": 9.019185055094356e-06, "loss": 0.298, "step": 13794 }, { "epoch": 2.2463868419981274, "grad_norm": 0.14213119447231293, "learning_rate": 9.015542508756447e-06, "loss": 0.2543, "step": 13795 }, { "epoch": 2.246549688555958, "grad_norm": 0.16684502363204956, "learning_rate": 9.01190053630846e-06, "loss": 0.3102, "step": 13796 }, { "epoch": 2.246712535113789, "grad_norm": 0.17490513622760773, "learning_rate": 9.008259137881146e-06, "loss": 0.2793, "step": 13797 }, { "epoch": 2.24687538167162, "grad_norm": 0.12901897728443146, "learning_rate": 9.004618313605232e-06, "loss": 0.2423, "step": 13798 }, { "epoch": 2.2470382282294508, "grad_norm": 0.15965446829795837, "learning_rate": 9.000978063611453e-06, "loss": 0.2273, "step": 13799 }, { "epoch": 2.2472010747872817, "grad_norm": 0.16915355622768402, "learning_rate": 8.9973383880305e-06, "loss": 0.2664, "step": 13800 }, { "epoch": 2.2473639213451126, "grad_norm": 0.18665675818920135, "learning_rate": 8.993699286993029e-06, "loss": 0.264, "step": 13801 }, { "epoch": 2.2475267679029436, "grad_norm": 0.18577030301094055, "learning_rate": 8.990060760629723e-06, "loss": 0.2755, "step": 13802 }, { "epoch": 2.2476896144607745, "grad_norm": 0.19304580986499786, "learning_rate": 8.986422809071198e-06, "loss": 0.2619, "step": 13803 }, { "epoch": 2.247852461018605, "grad_norm": 0.18479742109775543, "learning_rate": 8.982785432448069e-06, "loss": 0.2408, "step": 13804 }, { "epoch": 2.248015307576436, "grad_norm": 0.1326363980770111, "learning_rate": 8.979148630890939e-06, "loss": 0.3052, "step": 13805 }, { "epoch": 2.248178154134267, "grad_norm": 0.12860247492790222, "learning_rate": 8.975512404530371e-06, "loss": 0.2939, "step": 13806 }, { "epoch": 2.248341000692098, "grad_norm": 0.1752350777387619, "learning_rate": 8.971876753496919e-06, "loss": 0.2912, "step": 13807 }, { "epoch": 2.248503847249929, "grad_norm": 0.16879183053970337, "learning_rate": 8.968241677921105e-06, "loss": 0.2544, "step": 13808 }, { "epoch": 2.2486666938077597, "grad_norm": 0.19552162289619446, "learning_rate": 8.964607177933455e-06, "loss": 0.2793, "step": 13809 }, { "epoch": 2.2488295403655907, "grad_norm": 0.1879986673593521, "learning_rate": 8.96097325366445e-06, "loss": 0.3213, "step": 13810 }, { "epoch": 2.2489923869234216, "grad_norm": 0.18234480917453766, "learning_rate": 8.957339905244553e-06, "loss": 0.3191, "step": 13811 }, { "epoch": 2.249155233481252, "grad_norm": 0.19152392446994781, "learning_rate": 8.953707132804227e-06, "loss": 0.2788, "step": 13812 }, { "epoch": 2.249318080039083, "grad_norm": 0.21010707318782806, "learning_rate": 8.95007493647389e-06, "loss": 0.2771, "step": 13813 }, { "epoch": 2.249480926596914, "grad_norm": 0.15741392970085144, "learning_rate": 8.946443316383949e-06, "loss": 0.2619, "step": 13814 }, { "epoch": 2.249643773154745, "grad_norm": 0.18006323277950287, "learning_rate": 8.942812272664783e-06, "loss": 0.2671, "step": 13815 }, { "epoch": 2.249806619712576, "grad_norm": 0.1845940351486206, "learning_rate": 8.939181805446772e-06, "loss": 0.3053, "step": 13816 }, { "epoch": 2.249969466270407, "grad_norm": 0.17370697855949402, "learning_rate": 8.935551914860255e-06, "loss": 0.3056, "step": 13817 }, { "epoch": 2.250132312828238, "grad_norm": 0.19378872215747833, "learning_rate": 8.931922601035555e-06, "loss": 0.2583, "step": 13818 }, { "epoch": 2.2502951593860683, "grad_norm": 0.2018062323331833, "learning_rate": 8.928293864102965e-06, "loss": 0.2716, "step": 13819 }, { "epoch": 2.250458005943899, "grad_norm": 0.15733632445335388, "learning_rate": 8.924665704192786e-06, "loss": 0.2798, "step": 13820 }, { "epoch": 2.25062085250173, "grad_norm": 0.19537073373794556, "learning_rate": 8.921038121435274e-06, "loss": 0.2474, "step": 13821 }, { "epoch": 2.250783699059561, "grad_norm": 0.2024754136800766, "learning_rate": 8.917411115960656e-06, "loss": 0.2865, "step": 13822 }, { "epoch": 2.250946545617392, "grad_norm": 0.14571614563465118, "learning_rate": 8.913784687899174e-06, "loss": 0.2865, "step": 13823 }, { "epoch": 2.251109392175223, "grad_norm": 0.19343633949756622, "learning_rate": 8.910158837381017e-06, "loss": 0.26, "step": 13824 }, { "epoch": 2.251272238733054, "grad_norm": 0.16798149049282074, "learning_rate": 8.906533564536362e-06, "loss": 0.234, "step": 13825 }, { "epoch": 2.2514350852908844, "grad_norm": 0.16815604269504547, "learning_rate": 8.90290886949536e-06, "loss": 0.268, "step": 13826 }, { "epoch": 2.2515979318487154, "grad_norm": 0.18424469232559204, "learning_rate": 8.89928475238817e-06, "loss": 0.3025, "step": 13827 }, { "epoch": 2.2517607784065463, "grad_norm": 0.1578761786222458, "learning_rate": 8.895661213344894e-06, "loss": 0.2746, "step": 13828 }, { "epoch": 2.2519236249643773, "grad_norm": 0.14010152220726013, "learning_rate": 8.892038252495632e-06, "loss": 0.3024, "step": 13829 }, { "epoch": 2.252086471522208, "grad_norm": 0.13190484046936035, "learning_rate": 8.88841586997045e-06, "loss": 0.2798, "step": 13830 }, { "epoch": 2.252249318080039, "grad_norm": 0.1644444465637207, "learning_rate": 8.884794065899416e-06, "loss": 0.2588, "step": 13831 }, { "epoch": 2.25241216463787, "grad_norm": 0.18276964128017426, "learning_rate": 8.881172840412549e-06, "loss": 0.2995, "step": 13832 }, { "epoch": 2.252575011195701, "grad_norm": 0.18531757593154907, "learning_rate": 8.877552193639878e-06, "loss": 0.26, "step": 13833 }, { "epoch": 2.252737857753532, "grad_norm": 0.19964046776294708, "learning_rate": 8.87393212571139e-06, "loss": 0.2649, "step": 13834 }, { "epoch": 2.2529007043113625, "grad_norm": 0.17563439905643463, "learning_rate": 8.870312636757045e-06, "loss": 0.2762, "step": 13835 }, { "epoch": 2.2530635508691934, "grad_norm": 0.15490518510341644, "learning_rate": 8.866693726906808e-06, "loss": 0.2433, "step": 13836 }, { "epoch": 2.2532263974270244, "grad_norm": 0.15447749197483063, "learning_rate": 8.863075396290597e-06, "loss": 0.259, "step": 13837 }, { "epoch": 2.2533892439848553, "grad_norm": 0.17138968408107758, "learning_rate": 8.859457645038335e-06, "loss": 0.2789, "step": 13838 }, { "epoch": 2.2535520905426862, "grad_norm": 0.1435946375131607, "learning_rate": 8.8558404732799e-06, "loss": 0.2552, "step": 13839 }, { "epoch": 2.253714937100517, "grad_norm": 0.18702758848667145, "learning_rate": 8.852223881145164e-06, "loss": 0.2903, "step": 13840 }, { "epoch": 2.253877783658348, "grad_norm": 0.1677926629781723, "learning_rate": 8.848607868763958e-06, "loss": 0.3011, "step": 13841 }, { "epoch": 2.2540406302161786, "grad_norm": 0.14824923872947693, "learning_rate": 8.844992436266133e-06, "loss": 0.2556, "step": 13842 }, { "epoch": 2.2542034767740096, "grad_norm": 0.1846870630979538, "learning_rate": 8.841377583781479e-06, "loss": 0.2761, "step": 13843 }, { "epoch": 2.2543663233318405, "grad_norm": 0.16928093135356903, "learning_rate": 8.837763311439775e-06, "loss": 0.2888, "step": 13844 }, { "epoch": 2.2545291698896714, "grad_norm": 0.1716630458831787, "learning_rate": 8.8341496193708e-06, "loss": 0.2704, "step": 13845 }, { "epoch": 2.2546920164475024, "grad_norm": 0.15242472290992737, "learning_rate": 8.830536507704288e-06, "loss": 0.2843, "step": 13846 }, { "epoch": 2.2548548630053333, "grad_norm": 0.15072229504585266, "learning_rate": 8.82692397656996e-06, "loss": 0.2378, "step": 13847 }, { "epoch": 2.2550177095631643, "grad_norm": 0.15875081717967987, "learning_rate": 8.823312026097507e-06, "loss": 0.2271, "step": 13848 }, { "epoch": 2.2551805561209948, "grad_norm": 0.16522812843322754, "learning_rate": 8.819700656416627e-06, "loss": 0.2555, "step": 13849 }, { "epoch": 2.2553434026788257, "grad_norm": 0.21430106461048126, "learning_rate": 8.816089867656973e-06, "loss": 0.2713, "step": 13850 }, { "epoch": 2.2555062492366567, "grad_norm": 0.16099834442138672, "learning_rate": 8.81247965994818e-06, "loss": 0.2763, "step": 13851 }, { "epoch": 2.2556690957944876, "grad_norm": 0.1590704321861267, "learning_rate": 8.808870033419859e-06, "loss": 0.2368, "step": 13852 }, { "epoch": 2.2558319423523185, "grad_norm": 0.18362535536289215, "learning_rate": 8.805260988201622e-06, "loss": 0.2654, "step": 13853 }, { "epoch": 2.2559947889101495, "grad_norm": 0.1745726615190506, "learning_rate": 8.801652524423035e-06, "loss": 0.2483, "step": 13854 }, { "epoch": 2.2561576354679804, "grad_norm": 0.18444706499576569, "learning_rate": 8.798044642213647e-06, "loss": 0.2588, "step": 13855 }, { "epoch": 2.2563204820258114, "grad_norm": 0.14797677099704742, "learning_rate": 8.794437341703008e-06, "loss": 0.245, "step": 13856 }, { "epoch": 2.2564833285836423, "grad_norm": 0.21138396859169006, "learning_rate": 8.790830623020618e-06, "loss": 0.2655, "step": 13857 }, { "epoch": 2.256646175141473, "grad_norm": 0.17584626376628876, "learning_rate": 8.787224486295976e-06, "loss": 0.2645, "step": 13858 }, { "epoch": 2.2568090216993038, "grad_norm": 0.15772588551044464, "learning_rate": 8.783618931658538e-06, "loss": 0.264, "step": 13859 }, { "epoch": 2.2569718682571347, "grad_norm": 0.18187502026557922, "learning_rate": 8.780013959237777e-06, "loss": 0.2526, "step": 13860 }, { "epoch": 2.2571347148149656, "grad_norm": 0.1475887894630432, "learning_rate": 8.77640956916311e-06, "loss": 0.2937, "step": 13861 }, { "epoch": 2.2572975613727966, "grad_norm": 0.16711114346981049, "learning_rate": 8.77280576156394e-06, "loss": 0.2858, "step": 13862 }, { "epoch": 2.2574604079306275, "grad_norm": 0.19585461914539337, "learning_rate": 8.769202536569667e-06, "loss": 0.2827, "step": 13863 }, { "epoch": 2.2576232544884585, "grad_norm": 0.15135261416435242, "learning_rate": 8.765599894309653e-06, "loss": 0.2407, "step": 13864 }, { "epoch": 2.257786101046289, "grad_norm": 0.14476333558559418, "learning_rate": 8.761997834913244e-06, "loss": 0.2381, "step": 13865 }, { "epoch": 2.25794894760412, "grad_norm": 0.17163066565990448, "learning_rate": 8.758396358509752e-06, "loss": 0.2729, "step": 13866 }, { "epoch": 2.258111794161951, "grad_norm": 0.16439086198806763, "learning_rate": 8.754795465228502e-06, "loss": 0.2887, "step": 13867 }, { "epoch": 2.258274640719782, "grad_norm": 0.18237629532814026, "learning_rate": 8.751195155198765e-06, "loss": 0.2611, "step": 13868 }, { "epoch": 2.2584374872776127, "grad_norm": 0.20339752733707428, "learning_rate": 8.747595428549798e-06, "loss": 0.3007, "step": 13869 }, { "epoch": 2.2586003338354437, "grad_norm": 0.14180690050125122, "learning_rate": 8.743996285410855e-06, "loss": 0.2657, "step": 13870 }, { "epoch": 2.2587631803932746, "grad_norm": 0.24061185121536255, "learning_rate": 8.740397725911151e-06, "loss": 0.3327, "step": 13871 }, { "epoch": 2.258926026951105, "grad_norm": 0.18154466152191162, "learning_rate": 8.736799750179874e-06, "loss": 0.2879, "step": 13872 }, { "epoch": 2.259088873508936, "grad_norm": 0.17105889320373535, "learning_rate": 8.733202358346223e-06, "loss": 0.2954, "step": 13873 }, { "epoch": 2.259251720066767, "grad_norm": 0.1350947767496109, "learning_rate": 8.729605550539336e-06, "loss": 0.2961, "step": 13874 }, { "epoch": 2.259414566624598, "grad_norm": 0.16013407707214355, "learning_rate": 8.726009326888363e-06, "loss": 0.3074, "step": 13875 }, { "epoch": 2.259577413182429, "grad_norm": 0.15742947161197662, "learning_rate": 8.722413687522413e-06, "loss": 0.2953, "step": 13876 }, { "epoch": 2.25974025974026, "grad_norm": 0.18077310919761658, "learning_rate": 8.718818632570573e-06, "loss": 0.2731, "step": 13877 }, { "epoch": 2.2599031062980908, "grad_norm": 0.1661640852689743, "learning_rate": 8.715224162161933e-06, "loss": 0.2628, "step": 13878 }, { "epoch": 2.2600659528559213, "grad_norm": 0.1609259992837906, "learning_rate": 8.711630276425533e-06, "loss": 0.2535, "step": 13879 }, { "epoch": 2.260228799413752, "grad_norm": 0.1918058544397354, "learning_rate": 8.70803697549041e-06, "loss": 0.2991, "step": 13880 }, { "epoch": 2.260391645971583, "grad_norm": 0.1724778264760971, "learning_rate": 8.704444259485565e-06, "loss": 0.2918, "step": 13881 }, { "epoch": 2.260554492529414, "grad_norm": 0.17533190548419952, "learning_rate": 8.70085212854e-06, "loss": 0.2651, "step": 13882 }, { "epoch": 2.260717339087245, "grad_norm": 0.180095836520195, "learning_rate": 8.697260582782677e-06, "loss": 0.2839, "step": 13883 }, { "epoch": 2.260880185645076, "grad_norm": 0.17450518906116486, "learning_rate": 8.693669622342535e-06, "loss": 0.3137, "step": 13884 }, { "epoch": 2.261043032202907, "grad_norm": 0.19688044488430023, "learning_rate": 8.690079247348517e-06, "loss": 0.2851, "step": 13885 }, { "epoch": 2.261205878760738, "grad_norm": 0.13329064846038818, "learning_rate": 8.68648945792952e-06, "loss": 0.2778, "step": 13886 }, { "epoch": 2.261368725318569, "grad_norm": 0.18133561313152313, "learning_rate": 8.682900254214426e-06, "loss": 0.2512, "step": 13887 }, { "epoch": 2.2615315718763993, "grad_norm": 0.17209254205226898, "learning_rate": 8.679311636332096e-06, "loss": 0.3385, "step": 13888 }, { "epoch": 2.2616944184342302, "grad_norm": 0.21652643382549286, "learning_rate": 8.675723604411381e-06, "loss": 0.264, "step": 13889 }, { "epoch": 2.261857264992061, "grad_norm": 0.1666075736284256, "learning_rate": 8.672136158581101e-06, "loss": 0.2752, "step": 13890 }, { "epoch": 2.262020111549892, "grad_norm": 0.13475391268730164, "learning_rate": 8.66854929897005e-06, "loss": 0.2522, "step": 13891 }, { "epoch": 2.262182958107723, "grad_norm": 0.162857323884964, "learning_rate": 8.664963025707e-06, "loss": 0.2865, "step": 13892 }, { "epoch": 2.262345804665554, "grad_norm": 0.20098039507865906, "learning_rate": 8.661377338920732e-06, "loss": 0.3021, "step": 13893 }, { "epoch": 2.262508651223385, "grad_norm": 0.1913299560546875, "learning_rate": 8.657792238739965e-06, "loss": 0.306, "step": 13894 }, { "epoch": 2.2626714977812155, "grad_norm": 0.15802812576293945, "learning_rate": 8.654207725293411e-06, "loss": 0.2749, "step": 13895 }, { "epoch": 2.2628343443390464, "grad_norm": 0.26107028126716614, "learning_rate": 8.650623798709781e-06, "loss": 0.2327, "step": 13896 }, { "epoch": 2.2629971908968773, "grad_norm": 0.1583070158958435, "learning_rate": 8.647040459117743e-06, "loss": 0.2969, "step": 13897 }, { "epoch": 2.2631600374547083, "grad_norm": 0.14514191448688507, "learning_rate": 8.643457706645947e-06, "loss": 0.2743, "step": 13898 }, { "epoch": 2.2633228840125392, "grad_norm": 0.15824653208255768, "learning_rate": 8.639875541423014e-06, "loss": 0.2548, "step": 13899 }, { "epoch": 2.26348573057037, "grad_norm": 0.14087799191474915, "learning_rate": 8.636293963577574e-06, "loss": 0.2885, "step": 13900 }, { "epoch": 2.263648577128201, "grad_norm": 0.1836087852716446, "learning_rate": 8.632712973238211e-06, "loss": 0.2728, "step": 13901 }, { "epoch": 2.2638114236860316, "grad_norm": 0.1978100687265396, "learning_rate": 8.629132570533488e-06, "loss": 0.2578, "step": 13902 }, { "epoch": 2.2639742702438626, "grad_norm": 0.1747070550918579, "learning_rate": 8.625552755591945e-06, "loss": 0.2875, "step": 13903 }, { "epoch": 2.2641371168016935, "grad_norm": 0.16379830241203308, "learning_rate": 8.621973528542126e-06, "loss": 0.2747, "step": 13904 }, { "epoch": 2.2642999633595244, "grad_norm": 0.15886299312114716, "learning_rate": 8.618394889512526e-06, "loss": 0.2529, "step": 13905 }, { "epoch": 2.2644628099173554, "grad_norm": 0.17277418076992035, "learning_rate": 8.614816838631624e-06, "loss": 0.2905, "step": 13906 }, { "epoch": 2.2646256564751863, "grad_norm": 0.14930571615695953, "learning_rate": 8.611239376027897e-06, "loss": 0.2438, "step": 13907 }, { "epoch": 2.2647885030330173, "grad_norm": 0.1590302586555481, "learning_rate": 8.607662501829777e-06, "loss": 0.3101, "step": 13908 }, { "epoch": 2.264951349590848, "grad_norm": 0.17118346691131592, "learning_rate": 8.604086216165677e-06, "loss": 0.2822, "step": 13909 }, { "epoch": 2.265114196148679, "grad_norm": 0.1889207810163498, "learning_rate": 8.600510519164016e-06, "loss": 0.3303, "step": 13910 }, { "epoch": 2.2652770427065096, "grad_norm": 0.16252057254314423, "learning_rate": 8.59693541095316e-06, "loss": 0.254, "step": 13911 }, { "epoch": 2.2654398892643406, "grad_norm": 0.12815116345882416, "learning_rate": 8.593360891661462e-06, "loss": 0.2619, "step": 13912 }, { "epoch": 2.2656027358221715, "grad_norm": 0.1441047340631485, "learning_rate": 8.589786961417268e-06, "loss": 0.2917, "step": 13913 }, { "epoch": 2.2657655823800025, "grad_norm": 0.22746865451335907, "learning_rate": 8.586213620348882e-06, "loss": 0.2395, "step": 13914 }, { "epoch": 2.2659284289378334, "grad_norm": 0.14516447484493256, "learning_rate": 8.582640868584612e-06, "loss": 0.2682, "step": 13915 }, { "epoch": 2.2660912754956644, "grad_norm": 0.20963594317436218, "learning_rate": 8.579068706252722e-06, "loss": 0.2945, "step": 13916 }, { "epoch": 2.2662541220534953, "grad_norm": 0.1399429589509964, "learning_rate": 8.575497133481455e-06, "loss": 0.2699, "step": 13917 }, { "epoch": 2.266416968611326, "grad_norm": 0.18681591749191284, "learning_rate": 8.571926150399063e-06, "loss": 0.2638, "step": 13918 }, { "epoch": 2.2665798151691567, "grad_norm": 0.21371634304523468, "learning_rate": 8.568355757133737e-06, "loss": 0.3015, "step": 13919 }, { "epoch": 2.2667426617269877, "grad_norm": 0.17287719249725342, "learning_rate": 8.56478595381367e-06, "loss": 0.2442, "step": 13920 }, { "epoch": 2.2669055082848186, "grad_norm": 0.1389186531305313, "learning_rate": 8.561216740567022e-06, "loss": 0.25, "step": 13921 }, { "epoch": 2.2670683548426496, "grad_norm": 0.16359958052635193, "learning_rate": 8.557648117521953e-06, "loss": 0.248, "step": 13922 }, { "epoch": 2.2672312014004805, "grad_norm": 0.21638695895671844, "learning_rate": 8.554080084806579e-06, "loss": 0.2691, "step": 13923 }, { "epoch": 2.2673940479583115, "grad_norm": 0.20998357236385345, "learning_rate": 8.550512642548994e-06, "loss": 0.278, "step": 13924 }, { "epoch": 2.267556894516142, "grad_norm": 0.21647489070892334, "learning_rate": 8.5469457908773e-06, "loss": 0.2863, "step": 13925 }, { "epoch": 2.267719741073973, "grad_norm": 0.17827928066253662, "learning_rate": 8.543379529919546e-06, "loss": 0.2892, "step": 13926 }, { "epoch": 2.267882587631804, "grad_norm": 0.21996000409126282, "learning_rate": 8.53981385980377e-06, "loss": 0.2685, "step": 13927 }, { "epoch": 2.268045434189635, "grad_norm": 0.16933128237724304, "learning_rate": 8.536248780657985e-06, "loss": 0.2265, "step": 13928 }, { "epoch": 2.2682082807474657, "grad_norm": 0.15754148364067078, "learning_rate": 8.532684292610205e-06, "loss": 0.2895, "step": 13929 }, { "epoch": 2.2683711273052967, "grad_norm": 0.2094559669494629, "learning_rate": 8.529120395788395e-06, "loss": 0.2874, "step": 13930 }, { "epoch": 2.2685339738631276, "grad_norm": 0.1717619001865387, "learning_rate": 8.52555709032051e-06, "loss": 0.2702, "step": 13931 }, { "epoch": 2.268696820420958, "grad_norm": 0.28553104400634766, "learning_rate": 8.52199437633448e-06, "loss": 0.2802, "step": 13932 }, { "epoch": 2.268859666978789, "grad_norm": 0.1812339425086975, "learning_rate": 8.518432253958225e-06, "loss": 0.2588, "step": 13933 }, { "epoch": 2.26902251353662, "grad_norm": 0.15986980497837067, "learning_rate": 8.514870723319635e-06, "loss": 0.2741, "step": 13934 }, { "epoch": 2.269185360094451, "grad_norm": 0.1786632090806961, "learning_rate": 8.511309784546567e-06, "loss": 0.2852, "step": 13935 }, { "epoch": 2.269348206652282, "grad_norm": 0.19789911806583405, "learning_rate": 8.507749437766887e-06, "loss": 0.2823, "step": 13936 }, { "epoch": 2.269511053210113, "grad_norm": 0.16110573709011078, "learning_rate": 8.504189683108415e-06, "loss": 0.2729, "step": 13937 }, { "epoch": 2.2696738997679438, "grad_norm": 0.1737545281648636, "learning_rate": 8.500630520698957e-06, "loss": 0.264, "step": 13938 }, { "epoch": 2.2698367463257747, "grad_norm": 0.14692793786525726, "learning_rate": 8.497071950666288e-06, "loss": 0.2785, "step": 13939 }, { "epoch": 2.2699995928836056, "grad_norm": 0.18491505086421967, "learning_rate": 8.493513973138189e-06, "loss": 0.2732, "step": 13940 }, { "epoch": 2.270162439441436, "grad_norm": 0.20934216678142548, "learning_rate": 8.489956588242393e-06, "loss": 0.2576, "step": 13941 }, { "epoch": 2.270325285999267, "grad_norm": 0.23870573937892914, "learning_rate": 8.48639979610662e-06, "loss": 0.2778, "step": 13942 }, { "epoch": 2.270488132557098, "grad_norm": 0.15523681044578552, "learning_rate": 8.482843596858562e-06, "loss": 0.2833, "step": 13943 }, { "epoch": 2.270650979114929, "grad_norm": 0.19671978056430817, "learning_rate": 8.479287990625914e-06, "loss": 0.2762, "step": 13944 }, { "epoch": 2.27081382567276, "grad_norm": 0.1913757473230362, "learning_rate": 8.475732977536325e-06, "loss": 0.2853, "step": 13945 }, { "epoch": 2.270976672230591, "grad_norm": 0.16028201580047607, "learning_rate": 8.472178557717423e-06, "loss": 0.2473, "step": 13946 }, { "epoch": 2.271139518788422, "grad_norm": 0.1641290783882141, "learning_rate": 8.468624731296835e-06, "loss": 0.2308, "step": 13947 }, { "epoch": 2.2713023653462523, "grad_norm": 0.17125873267650604, "learning_rate": 8.46507149840215e-06, "loss": 0.265, "step": 13948 }, { "epoch": 2.2714652119040832, "grad_norm": 0.17403896152973175, "learning_rate": 8.46151885916093e-06, "loss": 0.2603, "step": 13949 }, { "epoch": 2.271628058461914, "grad_norm": 0.1994960755109787, "learning_rate": 8.45796681370074e-06, "loss": 0.3318, "step": 13950 }, { "epoch": 2.271790905019745, "grad_norm": 0.15183918178081512, "learning_rate": 8.454415362149098e-06, "loss": 0.3125, "step": 13951 }, { "epoch": 2.271953751577576, "grad_norm": 0.16281376779079437, "learning_rate": 8.450864504633521e-06, "loss": 0.2551, "step": 13952 }, { "epoch": 2.272116598135407, "grad_norm": 0.1728174090385437, "learning_rate": 8.447314241281492e-06, "loss": 0.2683, "step": 13953 }, { "epoch": 2.272279444693238, "grad_norm": 0.20626498758792877, "learning_rate": 8.44376457222047e-06, "loss": 0.2555, "step": 13954 }, { "epoch": 2.2724422912510684, "grad_norm": 0.17757397890090942, "learning_rate": 8.440215497577908e-06, "loss": 0.3206, "step": 13955 }, { "epoch": 2.2726051378088994, "grad_norm": 0.20344877243041992, "learning_rate": 8.436667017481223e-06, "loss": 0.2607, "step": 13956 }, { "epoch": 2.2727679843667303, "grad_norm": 0.167478546500206, "learning_rate": 8.433119132057813e-06, "loss": 0.3328, "step": 13957 }, { "epoch": 2.2729308309245613, "grad_norm": 0.24156877398490906, "learning_rate": 8.429571841435066e-06, "loss": 0.2557, "step": 13958 }, { "epoch": 2.273093677482392, "grad_norm": 0.14361326396465302, "learning_rate": 8.42602514574034e-06, "loss": 0.2854, "step": 13959 }, { "epoch": 2.273256524040223, "grad_norm": 0.12969297170639038, "learning_rate": 8.422479045100964e-06, "loss": 0.2949, "step": 13960 }, { "epoch": 2.273419370598054, "grad_norm": 0.190165713429451, "learning_rate": 8.418933539644252e-06, "loss": 0.2726, "step": 13961 }, { "epoch": 2.273582217155885, "grad_norm": 0.16951440274715424, "learning_rate": 8.415388629497515e-06, "loss": 0.2644, "step": 13962 }, { "epoch": 2.273745063713716, "grad_norm": 0.15852823853492737, "learning_rate": 8.411844314788011e-06, "loss": 0.2425, "step": 13963 }, { "epoch": 2.2739079102715465, "grad_norm": 0.13851052522659302, "learning_rate": 8.40830059564299e-06, "loss": 0.3032, "step": 13964 }, { "epoch": 2.2740707568293774, "grad_norm": 0.1798313707113266, "learning_rate": 8.404757472189694e-06, "loss": 0.2554, "step": 13965 }, { "epoch": 2.2742336033872084, "grad_norm": 0.18712593615055084, "learning_rate": 8.401214944555327e-06, "loss": 0.258, "step": 13966 }, { "epoch": 2.2743964499450393, "grad_norm": 0.22630298137664795, "learning_rate": 8.397673012867071e-06, "loss": 0.3016, "step": 13967 }, { "epoch": 2.2745592965028703, "grad_norm": 0.17543499171733856, "learning_rate": 8.394131677252093e-06, "loss": 0.2691, "step": 13968 }, { "epoch": 2.274722143060701, "grad_norm": 0.2196406126022339, "learning_rate": 8.390590937837544e-06, "loss": 0.3012, "step": 13969 }, { "epoch": 2.274884989618532, "grad_norm": 0.1301005631685257, "learning_rate": 8.387050794750548e-06, "loss": 0.2693, "step": 13970 }, { "epoch": 2.2750478361763626, "grad_norm": 0.16312626004219055, "learning_rate": 8.383511248118197e-06, "loss": 0.2416, "step": 13971 }, { "epoch": 2.2752106827341936, "grad_norm": 0.18515415489673615, "learning_rate": 8.379972298067573e-06, "loss": 0.2856, "step": 13972 }, { "epoch": 2.2753735292920245, "grad_norm": 0.1491144448518753, "learning_rate": 8.376433944725741e-06, "loss": 0.3072, "step": 13973 }, { "epoch": 2.2755363758498555, "grad_norm": 0.19829745590686798, "learning_rate": 8.372896188219742e-06, "loss": 0.2605, "step": 13974 }, { "epoch": 2.2756992224076864, "grad_norm": 0.19382989406585693, "learning_rate": 8.369359028676574e-06, "loss": 0.2792, "step": 13975 }, { "epoch": 2.2758620689655173, "grad_norm": 0.17416058480739594, "learning_rate": 8.365822466223252e-06, "loss": 0.2967, "step": 13976 }, { "epoch": 2.2760249155233483, "grad_norm": 0.1916881799697876, "learning_rate": 8.362286500986744e-06, "loss": 0.2619, "step": 13977 }, { "epoch": 2.276187762081179, "grad_norm": 0.20376834273338318, "learning_rate": 8.358751133093995e-06, "loss": 0.3317, "step": 13978 }, { "epoch": 2.2763506086390097, "grad_norm": 0.16465993225574493, "learning_rate": 8.355216362671933e-06, "loss": 0.2568, "step": 13979 }, { "epoch": 2.2765134551968407, "grad_norm": 0.1531556397676468, "learning_rate": 8.351682189847479e-06, "loss": 0.2433, "step": 13980 }, { "epoch": 2.2766763017546716, "grad_norm": 0.16044603288173676, "learning_rate": 8.348148614747514e-06, "loss": 0.2931, "step": 13981 }, { "epoch": 2.2768391483125026, "grad_norm": 0.16663439571857452, "learning_rate": 8.344615637498906e-06, "loss": 0.2959, "step": 13982 }, { "epoch": 2.2770019948703335, "grad_norm": 0.12135230004787445, "learning_rate": 8.341083258228488e-06, "loss": 0.2765, "step": 13983 }, { "epoch": 2.2771648414281644, "grad_norm": 0.1842063069343567, "learning_rate": 8.337551477063102e-06, "loss": 0.2367, "step": 13984 }, { "epoch": 2.2773276879859954, "grad_norm": 0.19541184604167938, "learning_rate": 8.334020294129538e-06, "loss": 0.2699, "step": 13985 }, { "epoch": 2.2774905345438263, "grad_norm": 0.18696638941764832, "learning_rate": 8.330489709554571e-06, "loss": 0.3108, "step": 13986 }, { "epoch": 2.277653381101657, "grad_norm": 0.16987942159175873, "learning_rate": 8.326959723464975e-06, "loss": 0.2304, "step": 13987 }, { "epoch": 2.2778162276594878, "grad_norm": 0.17616425454616547, "learning_rate": 8.32343033598747e-06, "loss": 0.2957, "step": 13988 }, { "epoch": 2.2779790742173187, "grad_norm": 0.1537146270275116, "learning_rate": 8.319901547248788e-06, "loss": 0.2544, "step": 13989 }, { "epoch": 2.2781419207751497, "grad_norm": 0.16232776641845703, "learning_rate": 8.316373357375615e-06, "loss": 0.2651, "step": 13990 }, { "epoch": 2.2783047673329806, "grad_norm": 0.14273950457572937, "learning_rate": 8.312845766494618e-06, "loss": 0.2925, "step": 13991 }, { "epoch": 2.2784676138908115, "grad_norm": 0.207235187292099, "learning_rate": 8.30931877473246e-06, "loss": 0.2838, "step": 13992 }, { "epoch": 2.2786304604486425, "grad_norm": 0.17798852920532227, "learning_rate": 8.305792382215766e-06, "loss": 0.277, "step": 13993 }, { "epoch": 2.278793307006473, "grad_norm": 0.14155855774879456, "learning_rate": 8.302266589071134e-06, "loss": 0.2594, "step": 13994 }, { "epoch": 2.278956153564304, "grad_norm": 0.2026265561580658, "learning_rate": 8.298741395425166e-06, "loss": 0.2811, "step": 13995 }, { "epoch": 2.279119000122135, "grad_norm": 0.16150371730327606, "learning_rate": 8.295216801404421e-06, "loss": 0.2869, "step": 13996 }, { "epoch": 2.279281846679966, "grad_norm": 0.20450741052627563, "learning_rate": 8.291692807135434e-06, "loss": 0.2714, "step": 13997 }, { "epoch": 2.2794446932377967, "grad_norm": 0.20287837088108063, "learning_rate": 8.288169412744742e-06, "loss": 0.2848, "step": 13998 }, { "epoch": 2.2796075397956277, "grad_norm": 0.17679516971111298, "learning_rate": 8.28464661835884e-06, "loss": 0.2696, "step": 13999 }, { "epoch": 2.2797703863534586, "grad_norm": 0.1847851425409317, "learning_rate": 8.281124424104203e-06, "loss": 0.2331, "step": 14000 }, { "epoch": 2.279933232911289, "grad_norm": 0.16994312405586243, "learning_rate": 8.277602830107283e-06, "loss": 0.2788, "step": 14001 }, { "epoch": 2.28009607946912, "grad_norm": 0.18363335728645325, "learning_rate": 8.274081836494532e-06, "loss": 0.2788, "step": 14002 }, { "epoch": 2.280258926026951, "grad_norm": 0.18633057177066803, "learning_rate": 8.270561443392356e-06, "loss": 0.3352, "step": 14003 }, { "epoch": 2.280421772584782, "grad_norm": 0.2176559567451477, "learning_rate": 8.267041650927144e-06, "loss": 0.2678, "step": 14004 }, { "epoch": 2.280584619142613, "grad_norm": 0.1698375940322876, "learning_rate": 8.263522459225262e-06, "loss": 0.259, "step": 14005 }, { "epoch": 2.280747465700444, "grad_norm": 0.13457055389881134, "learning_rate": 8.260003868413077e-06, "loss": 0.2737, "step": 14006 }, { "epoch": 2.280910312258275, "grad_norm": 0.16260461509227753, "learning_rate": 8.256485878616907e-06, "loss": 0.2804, "step": 14007 }, { "epoch": 2.2810731588161053, "grad_norm": 0.19189274311065674, "learning_rate": 8.25296848996305e-06, "loss": 0.2783, "step": 14008 }, { "epoch": 2.2812360053739362, "grad_norm": 0.15991750359535217, "learning_rate": 8.24945170257781e-06, "loss": 0.2841, "step": 14009 }, { "epoch": 2.281398851931767, "grad_norm": 0.2631703019142151, "learning_rate": 8.245935516587438e-06, "loss": 0.3376, "step": 14010 }, { "epoch": 2.281561698489598, "grad_norm": 0.2049790769815445, "learning_rate": 8.242419932118179e-06, "loss": 0.2767, "step": 14011 }, { "epoch": 2.281724545047429, "grad_norm": 0.1482919305562973, "learning_rate": 8.238904949296242e-06, "loss": 0.2893, "step": 14012 }, { "epoch": 2.28188739160526, "grad_norm": 0.17791669070720673, "learning_rate": 8.235390568247845e-06, "loss": 0.2841, "step": 14013 }, { "epoch": 2.282050238163091, "grad_norm": 0.19641393423080444, "learning_rate": 8.231876789099152e-06, "loss": 0.2812, "step": 14014 }, { "epoch": 2.282213084720922, "grad_norm": 0.17534270882606506, "learning_rate": 8.228363611976315e-06, "loss": 0.2708, "step": 14015 }, { "epoch": 2.282375931278753, "grad_norm": 0.1526378095149994, "learning_rate": 8.22485103700548e-06, "loss": 0.2856, "step": 14016 }, { "epoch": 2.2825387778365833, "grad_norm": 0.1352141797542572, "learning_rate": 8.221339064312752e-06, "loss": 0.2876, "step": 14017 }, { "epoch": 2.2827016243944143, "grad_norm": 0.1585397720336914, "learning_rate": 8.217827694024221e-06, "loss": 0.2454, "step": 14018 }, { "epoch": 2.282864470952245, "grad_norm": 0.19313205778598785, "learning_rate": 8.214316926265953e-06, "loss": 0.2877, "step": 14019 }, { "epoch": 2.283027317510076, "grad_norm": 0.15830188989639282, "learning_rate": 8.210806761164002e-06, "loss": 0.2475, "step": 14020 }, { "epoch": 2.283190164067907, "grad_norm": 0.23218920826911926, "learning_rate": 8.207297198844393e-06, "loss": 0.2627, "step": 14021 }, { "epoch": 2.283353010625738, "grad_norm": 0.2056502252817154, "learning_rate": 8.203788239433127e-06, "loss": 0.2993, "step": 14022 }, { "epoch": 2.283515857183569, "grad_norm": 0.1937299519777298, "learning_rate": 8.200279883056175e-06, "loss": 0.2774, "step": 14023 }, { "epoch": 2.2836787037413995, "grad_norm": 0.18075378239154816, "learning_rate": 8.19677212983952e-06, "loss": 0.2953, "step": 14024 }, { "epoch": 2.2838415502992304, "grad_norm": 0.18188099563121796, "learning_rate": 8.193264979909079e-06, "loss": 0.2543, "step": 14025 }, { "epoch": 2.2840043968570614, "grad_norm": 0.23233148455619812, "learning_rate": 8.189758433390787e-06, "loss": 0.3153, "step": 14026 }, { "epoch": 2.2841672434148923, "grad_norm": 0.18541862070560455, "learning_rate": 8.186252490410535e-06, "loss": 0.2611, "step": 14027 }, { "epoch": 2.2843300899727232, "grad_norm": 0.1389061063528061, "learning_rate": 8.182747151094183e-06, "loss": 0.2503, "step": 14028 }, { "epoch": 2.284492936530554, "grad_norm": 0.1492311805486679, "learning_rate": 8.179242415567604e-06, "loss": 0.2611, "step": 14029 }, { "epoch": 2.284655783088385, "grad_norm": 0.159955695271492, "learning_rate": 8.175738283956616e-06, "loss": 0.2723, "step": 14030 }, { "epoch": 2.2848186296462156, "grad_norm": 0.21058940887451172, "learning_rate": 8.172234756387026e-06, "loss": 0.2418, "step": 14031 }, { "epoch": 2.2849814762040466, "grad_norm": 0.16082671284675598, "learning_rate": 8.16873183298463e-06, "loss": 0.2327, "step": 14032 }, { "epoch": 2.2851443227618775, "grad_norm": 0.17083729803562164, "learning_rate": 8.165229513875191e-06, "loss": 0.2339, "step": 14033 }, { "epoch": 2.2853071693197085, "grad_norm": 0.17192263901233673, "learning_rate": 8.161727799184443e-06, "loss": 0.2602, "step": 14034 }, { "epoch": 2.2854700158775394, "grad_norm": 0.19551415741443634, "learning_rate": 8.158226689038123e-06, "loss": 0.2964, "step": 14035 }, { "epoch": 2.2856328624353703, "grad_norm": 0.16930906474590302, "learning_rate": 8.154726183561926e-06, "loss": 0.2788, "step": 14036 }, { "epoch": 2.2857957089932013, "grad_norm": 0.21201148629188538, "learning_rate": 8.151226282881518e-06, "loss": 0.3077, "step": 14037 }, { "epoch": 2.2859585555510322, "grad_norm": 0.16479699313640594, "learning_rate": 8.147726987122579e-06, "loss": 0.2855, "step": 14038 }, { "epoch": 2.286121402108863, "grad_norm": 0.14045007526874542, "learning_rate": 8.144228296410727e-06, "loss": 0.259, "step": 14039 }, { "epoch": 2.2862842486666937, "grad_norm": 0.15570230782032013, "learning_rate": 8.140730210871583e-06, "loss": 0.2832, "step": 14040 }, { "epoch": 2.2864470952245246, "grad_norm": 0.17033171653747559, "learning_rate": 8.137232730630728e-06, "loss": 0.2795, "step": 14041 }, { "epoch": 2.2866099417823555, "grad_norm": 0.2068382054567337, "learning_rate": 8.133735855813749e-06, "loss": 0.292, "step": 14042 }, { "epoch": 2.2867727883401865, "grad_norm": 0.15077199041843414, "learning_rate": 8.130239586546181e-06, "loss": 0.2549, "step": 14043 }, { "epoch": 2.2869356348980174, "grad_norm": 0.15189850330352783, "learning_rate": 8.126743922953561e-06, "loss": 0.2923, "step": 14044 }, { "epoch": 2.2870984814558484, "grad_norm": 0.1522017866373062, "learning_rate": 8.123248865161375e-06, "loss": 0.2699, "step": 14045 }, { "epoch": 2.2872613280136793, "grad_norm": 0.1616068333387375, "learning_rate": 8.119754413295131e-06, "loss": 0.2718, "step": 14046 }, { "epoch": 2.28742417457151, "grad_norm": 0.15450812876224518, "learning_rate": 8.116260567480275e-06, "loss": 0.2594, "step": 14047 }, { "epoch": 2.2875870211293408, "grad_norm": 0.17835576832294464, "learning_rate": 8.112767327842242e-06, "loss": 0.2495, "step": 14048 }, { "epoch": 2.2877498676871717, "grad_norm": 0.1695723831653595, "learning_rate": 8.109274694506467e-06, "loss": 0.2345, "step": 14049 }, { "epoch": 2.2879127142450026, "grad_norm": 0.11806027591228485, "learning_rate": 8.105782667598335e-06, "loss": 0.2656, "step": 14050 }, { "epoch": 2.2880755608028336, "grad_norm": 0.19421951472759247, "learning_rate": 8.102291247243221e-06, "loss": 0.2483, "step": 14051 }, { "epoch": 2.2882384073606645, "grad_norm": 0.18994414806365967, "learning_rate": 8.098800433566473e-06, "loss": 0.2589, "step": 14052 }, { "epoch": 2.2884012539184955, "grad_norm": 0.17759911715984344, "learning_rate": 8.095310226693434e-06, "loss": 0.2988, "step": 14053 }, { "epoch": 2.288564100476326, "grad_norm": 0.18921232223510742, "learning_rate": 8.091820626749402e-06, "loss": 0.2779, "step": 14054 }, { "epoch": 2.288726947034157, "grad_norm": 0.17044153809547424, "learning_rate": 8.088331633859664e-06, "loss": 0.268, "step": 14055 }, { "epoch": 2.288889793591988, "grad_norm": 0.16568168997764587, "learning_rate": 8.084843248149496e-06, "loss": 0.266, "step": 14056 }, { "epoch": 2.289052640149819, "grad_norm": 0.18159501254558563, "learning_rate": 8.081355469744134e-06, "loss": 0.293, "step": 14057 }, { "epoch": 2.2892154867076497, "grad_norm": 0.21330276131629944, "learning_rate": 8.0778682987688e-06, "loss": 0.3036, "step": 14058 }, { "epoch": 2.2893783332654807, "grad_norm": 0.16080088913440704, "learning_rate": 8.074381735348685e-06, "loss": 0.2422, "step": 14059 }, { "epoch": 2.2895411798233116, "grad_norm": 0.15992866456508636, "learning_rate": 8.070895779608986e-06, "loss": 0.2477, "step": 14060 }, { "epoch": 2.289704026381142, "grad_norm": 0.184626966714859, "learning_rate": 8.067410431674848e-06, "loss": 0.245, "step": 14061 }, { "epoch": 2.289866872938973, "grad_norm": 0.19971689581871033, "learning_rate": 8.063925691671408e-06, "loss": 0.2833, "step": 14062 }, { "epoch": 2.290029719496804, "grad_norm": 0.22208476066589355, "learning_rate": 8.060441559723771e-06, "loss": 0.2896, "step": 14063 }, { "epoch": 2.290192566054635, "grad_norm": 0.2171751856803894, "learning_rate": 8.056958035957041e-06, "loss": 0.3132, "step": 14064 }, { "epoch": 2.290355412612466, "grad_norm": 0.15099012851715088, "learning_rate": 8.053475120496273e-06, "loss": 0.2392, "step": 14065 }, { "epoch": 2.290518259170297, "grad_norm": 0.1706400215625763, "learning_rate": 8.049992813466528e-06, "loss": 0.2844, "step": 14066 }, { "epoch": 2.2906811057281278, "grad_norm": 0.16975393891334534, "learning_rate": 8.046511114992825e-06, "loss": 0.2758, "step": 14067 }, { "epoch": 2.2908439522859587, "grad_norm": 0.17640741169452667, "learning_rate": 8.043030025200158e-06, "loss": 0.2572, "step": 14068 }, { "epoch": 2.2910067988437897, "grad_norm": 0.2313419133424759, "learning_rate": 8.039549544213526e-06, "loss": 0.272, "step": 14069 }, { "epoch": 2.29116964540162, "grad_norm": 0.18931446969509125, "learning_rate": 8.036069672157872e-06, "loss": 0.2817, "step": 14070 }, { "epoch": 2.291332491959451, "grad_norm": 0.1458805948495865, "learning_rate": 8.032590409158153e-06, "loss": 0.2633, "step": 14071 }, { "epoch": 2.291495338517282, "grad_norm": 0.206597238779068, "learning_rate": 8.029111755339268e-06, "loss": 0.2494, "step": 14072 }, { "epoch": 2.291658185075113, "grad_norm": 0.13333354890346527, "learning_rate": 8.025633710826123e-06, "loss": 0.3223, "step": 14073 }, { "epoch": 2.291821031632944, "grad_norm": 0.2156541645526886, "learning_rate": 8.022156275743575e-06, "loss": 0.2869, "step": 14074 }, { "epoch": 2.291983878190775, "grad_norm": 0.1896364539861679, "learning_rate": 8.01867945021649e-06, "loss": 0.3226, "step": 14075 }, { "epoch": 2.292146724748606, "grad_norm": 0.17836704850196838, "learning_rate": 8.015203234369692e-06, "loss": 0.2972, "step": 14076 }, { "epoch": 2.2923095713064363, "grad_norm": 0.1920025795698166, "learning_rate": 8.011727628327975e-06, "loss": 0.2606, "step": 14077 }, { "epoch": 2.2924724178642673, "grad_norm": 0.15377679467201233, "learning_rate": 8.008252632216145e-06, "loss": 0.2724, "step": 14078 }, { "epoch": 2.292635264422098, "grad_norm": 0.1700218915939331, "learning_rate": 8.004778246158953e-06, "loss": 0.2743, "step": 14079 }, { "epoch": 2.292798110979929, "grad_norm": 0.19377994537353516, "learning_rate": 8.00130447028114e-06, "loss": 0.2575, "step": 14080 }, { "epoch": 2.29296095753776, "grad_norm": 0.19574213027954102, "learning_rate": 7.99783130470742e-06, "loss": 0.2794, "step": 14081 }, { "epoch": 2.293123804095591, "grad_norm": 0.18378427624702454, "learning_rate": 7.994358749562502e-06, "loss": 0.2611, "step": 14082 }, { "epoch": 2.293286650653422, "grad_norm": 0.15861554443836212, "learning_rate": 7.990886804971056e-06, "loss": 0.283, "step": 14083 }, { "epoch": 2.2934494972112525, "grad_norm": 0.20327693223953247, "learning_rate": 7.987415471057736e-06, "loss": 0.3077, "step": 14084 }, { "epoch": 2.2936123437690834, "grad_norm": 0.15712760388851166, "learning_rate": 7.983944747947162e-06, "loss": 0.2706, "step": 14085 }, { "epoch": 2.2937751903269143, "grad_norm": 0.18327116966247559, "learning_rate": 7.980474635763963e-06, "loss": 0.2612, "step": 14086 }, { "epoch": 2.2939380368847453, "grad_norm": 0.1251046359539032, "learning_rate": 7.977005134632714e-06, "loss": 0.3074, "step": 14087 }, { "epoch": 2.2941008834425762, "grad_norm": 0.1514347344636917, "learning_rate": 7.973536244677977e-06, "loss": 0.3021, "step": 14088 }, { "epoch": 2.294263730000407, "grad_norm": 0.1680893450975418, "learning_rate": 7.970067966024305e-06, "loss": 0.3018, "step": 14089 }, { "epoch": 2.294426576558238, "grad_norm": 0.15446636080741882, "learning_rate": 7.96660029879622e-06, "loss": 0.2967, "step": 14090 }, { "epoch": 2.294589423116069, "grad_norm": 0.16341620683670044, "learning_rate": 7.963133243118217e-06, "loss": 0.2791, "step": 14091 }, { "epoch": 2.2947522696739, "grad_norm": 0.14379630982875824, "learning_rate": 7.959666799114768e-06, "loss": 0.3255, "step": 14092 }, { "epoch": 2.2949151162317305, "grad_norm": 0.16400432586669922, "learning_rate": 7.95620096691034e-06, "loss": 0.2667, "step": 14093 }, { "epoch": 2.2950779627895614, "grad_norm": 0.13902251422405243, "learning_rate": 7.952735746629363e-06, "loss": 0.2618, "step": 14094 }, { "epoch": 2.2952408093473924, "grad_norm": 0.1747092604637146, "learning_rate": 7.949271138396247e-06, "loss": 0.3062, "step": 14095 }, { "epoch": 2.2954036559052233, "grad_norm": 0.22870436310768127, "learning_rate": 7.945807142335376e-06, "loss": 0.2625, "step": 14096 }, { "epoch": 2.2955665024630543, "grad_norm": 0.15115179121494293, "learning_rate": 7.942343758571133e-06, "loss": 0.2191, "step": 14097 }, { "epoch": 2.295729349020885, "grad_norm": 0.1394350379705429, "learning_rate": 7.938880987227854e-06, "loss": 0.239, "step": 14098 }, { "epoch": 2.295892195578716, "grad_norm": 0.18997755646705627, "learning_rate": 7.935418828429855e-06, "loss": 0.288, "step": 14099 }, { "epoch": 2.2960550421365467, "grad_norm": 0.20412816107273102, "learning_rate": 7.931957282301455e-06, "loss": 0.2535, "step": 14100 }, { "epoch": 2.2962178886943776, "grad_norm": 0.13936004042625427, "learning_rate": 7.928496348966927e-06, "loss": 0.2632, "step": 14101 }, { "epoch": 2.2963807352522085, "grad_norm": 0.16802765429019928, "learning_rate": 7.925036028550519e-06, "loss": 0.2915, "step": 14102 }, { "epoch": 2.2965435818100395, "grad_norm": 0.17407497763633728, "learning_rate": 7.921576321176486e-06, "loss": 0.3007, "step": 14103 }, { "epoch": 2.2967064283678704, "grad_norm": 0.17905153334140778, "learning_rate": 7.918117226969027e-06, "loss": 0.2699, "step": 14104 }, { "epoch": 2.2968692749257014, "grad_norm": 0.22666941583156586, "learning_rate": 7.914658746052333e-06, "loss": 0.3041, "step": 14105 }, { "epoch": 2.2970321214835323, "grad_norm": 0.20938995480537415, "learning_rate": 7.911200878550587e-06, "loss": 0.2539, "step": 14106 }, { "epoch": 2.297194968041363, "grad_norm": 0.20104177296161652, "learning_rate": 7.907743624587921e-06, "loss": 0.2777, "step": 14107 }, { "epoch": 2.2973578145991937, "grad_norm": 0.19642578065395355, "learning_rate": 7.904286984288475e-06, "loss": 0.2907, "step": 14108 }, { "epoch": 2.2975206611570247, "grad_norm": 0.1545451432466507, "learning_rate": 7.900830957776348e-06, "loss": 0.2677, "step": 14109 }, { "epoch": 2.2976835077148556, "grad_norm": 0.15930116176605225, "learning_rate": 7.89737554517561e-06, "loss": 0.2911, "step": 14110 }, { "epoch": 2.2978463542726866, "grad_norm": 0.19107133150100708, "learning_rate": 7.89392074661034e-06, "loss": 0.323, "step": 14111 }, { "epoch": 2.2980092008305175, "grad_norm": 0.16271565854549408, "learning_rate": 7.890466562204568e-06, "loss": 0.2732, "step": 14112 }, { "epoch": 2.2981720473883485, "grad_norm": 0.17073126137256622, "learning_rate": 7.887012992082307e-06, "loss": 0.3052, "step": 14113 }, { "epoch": 2.2983348939461794, "grad_norm": 0.17401212453842163, "learning_rate": 7.88356003636754e-06, "loss": 0.3329, "step": 14114 }, { "epoch": 2.2984977405040103, "grad_norm": 0.1706511676311493, "learning_rate": 7.880107695184263e-06, "loss": 0.2816, "step": 14115 }, { "epoch": 2.298660587061841, "grad_norm": 0.17733626067638397, "learning_rate": 7.87665596865641e-06, "loss": 0.2522, "step": 14116 }, { "epoch": 2.298823433619672, "grad_norm": 0.15802305936813354, "learning_rate": 7.873204856907903e-06, "loss": 0.2647, "step": 14117 }, { "epoch": 2.2989862801775027, "grad_norm": 0.16474993526935577, "learning_rate": 7.869754360062662e-06, "loss": 0.2756, "step": 14118 }, { "epoch": 2.2991491267353337, "grad_norm": 0.2011934518814087, "learning_rate": 7.866304478244565e-06, "loss": 0.2635, "step": 14119 }, { "epoch": 2.2993119732931646, "grad_norm": 0.22105121612548828, "learning_rate": 7.862855211577469e-06, "loss": 0.2682, "step": 14120 }, { "epoch": 2.2994748198509956, "grad_norm": 0.12686482071876526, "learning_rate": 7.859406560185209e-06, "loss": 0.2799, "step": 14121 }, { "epoch": 2.2996376664088265, "grad_norm": 0.17677205801010132, "learning_rate": 7.855958524191614e-06, "loss": 0.2754, "step": 14122 }, { "epoch": 2.299800512966657, "grad_norm": 0.2027255892753601, "learning_rate": 7.852511103720475e-06, "loss": 0.2988, "step": 14123 }, { "epoch": 2.299963359524488, "grad_norm": 0.21091386675834656, "learning_rate": 7.849064298895561e-06, "loss": 0.2483, "step": 14124 }, { "epoch": 2.300126206082319, "grad_norm": 0.15529274940490723, "learning_rate": 7.845618109840616e-06, "loss": 0.2644, "step": 14125 }, { "epoch": 2.30028905264015, "grad_norm": 0.1643984615802765, "learning_rate": 7.842172536679387e-06, "loss": 0.2732, "step": 14126 }, { "epoch": 2.3004518991979808, "grad_norm": 0.15348167717456818, "learning_rate": 7.838727579535567e-06, "loss": 0.2688, "step": 14127 }, { "epoch": 2.3006147457558117, "grad_norm": 0.17096281051635742, "learning_rate": 7.83528323853284e-06, "loss": 0.2429, "step": 14128 }, { "epoch": 2.3007775923136426, "grad_norm": 0.13540838658809662, "learning_rate": 7.831839513794875e-06, "loss": 0.2586, "step": 14129 }, { "epoch": 2.300940438871473, "grad_norm": 0.16992506384849548, "learning_rate": 7.828396405445312e-06, "loss": 0.2682, "step": 14130 }, { "epoch": 2.301103285429304, "grad_norm": 0.18294112384319305, "learning_rate": 7.824953913607764e-06, "loss": 0.3114, "step": 14131 }, { "epoch": 2.301266131987135, "grad_norm": 0.15908801555633545, "learning_rate": 7.82151203840582e-06, "loss": 0.2719, "step": 14132 }, { "epoch": 2.301428978544966, "grad_norm": 0.16973398625850677, "learning_rate": 7.81807077996307e-06, "loss": 0.2927, "step": 14133 }, { "epoch": 2.301591825102797, "grad_norm": 0.15882764756679535, "learning_rate": 7.814630138403056e-06, "loss": 0.303, "step": 14134 }, { "epoch": 2.301754671660628, "grad_norm": 0.18248122930526733, "learning_rate": 7.811190113849312e-06, "loss": 0.2565, "step": 14135 }, { "epoch": 2.301917518218459, "grad_norm": 0.17849600315093994, "learning_rate": 7.807750706425332e-06, "loss": 0.2408, "step": 14136 }, { "epoch": 2.3020803647762893, "grad_norm": 0.18106313049793243, "learning_rate": 7.80431191625462e-06, "loss": 0.27, "step": 14137 }, { "epoch": 2.3022432113341202, "grad_norm": 0.1563299596309662, "learning_rate": 7.80087374346063e-06, "loss": 0.2638, "step": 14138 }, { "epoch": 2.302406057891951, "grad_norm": 0.1701343059539795, "learning_rate": 7.797436188166793e-06, "loss": 0.2762, "step": 14139 }, { "epoch": 2.302568904449782, "grad_norm": 0.1808495670557022, "learning_rate": 7.793999250496544e-06, "loss": 0.3171, "step": 14140 }, { "epoch": 2.302731751007613, "grad_norm": 0.15635299682617188, "learning_rate": 7.790562930573275e-06, "loss": 0.2956, "step": 14141 }, { "epoch": 2.302894597565444, "grad_norm": 0.21006609499454498, "learning_rate": 7.787127228520349e-06, "loss": 0.283, "step": 14142 }, { "epoch": 2.303057444123275, "grad_norm": 0.18486876785755157, "learning_rate": 7.783692144461135e-06, "loss": 0.2757, "step": 14143 }, { "epoch": 2.303220290681106, "grad_norm": 0.16519340872764587, "learning_rate": 7.780257678518949e-06, "loss": 0.2727, "step": 14144 }, { "epoch": 2.303383137238937, "grad_norm": 0.2001880407333374, "learning_rate": 7.776823830817107e-06, "loss": 0.2613, "step": 14145 }, { "epoch": 2.3035459837967673, "grad_norm": 0.17537425458431244, "learning_rate": 7.773390601478894e-06, "loss": 0.281, "step": 14146 }, { "epoch": 2.3037088303545983, "grad_norm": 0.22353731095790863, "learning_rate": 7.769957990627564e-06, "loss": 0.2431, "step": 14147 }, { "epoch": 2.3038716769124292, "grad_norm": 0.18062056601047516, "learning_rate": 7.76652599838637e-06, "loss": 0.3082, "step": 14148 }, { "epoch": 2.30403452347026, "grad_norm": 0.1900133192539215, "learning_rate": 7.763094624878525e-06, "loss": 0.2577, "step": 14149 }, { "epoch": 2.304197370028091, "grad_norm": 0.18928463757038116, "learning_rate": 7.759663870227222e-06, "loss": 0.2882, "step": 14150 }, { "epoch": 2.304360216585922, "grad_norm": 0.18596512079238892, "learning_rate": 7.756233734555646e-06, "loss": 0.2596, "step": 14151 }, { "epoch": 2.304523063143753, "grad_norm": 0.157062828540802, "learning_rate": 7.75280421798694e-06, "loss": 0.27, "step": 14152 }, { "epoch": 2.3046859097015835, "grad_norm": 0.18126440048217773, "learning_rate": 7.74937532064424e-06, "loss": 0.2556, "step": 14153 }, { "epoch": 2.3048487562594144, "grad_norm": 0.14553935825824738, "learning_rate": 7.74594704265064e-06, "loss": 0.3064, "step": 14154 }, { "epoch": 2.3050116028172454, "grad_norm": 0.15951669216156006, "learning_rate": 7.742519384129243e-06, "loss": 0.2808, "step": 14155 }, { "epoch": 2.3051744493750763, "grad_norm": 0.2041565477848053, "learning_rate": 7.739092345203109e-06, "loss": 0.2808, "step": 14156 }, { "epoch": 2.3053372959329073, "grad_norm": 0.17816860973834991, "learning_rate": 7.735665925995261e-06, "loss": 0.2707, "step": 14157 }, { "epoch": 2.305500142490738, "grad_norm": 0.14805901050567627, "learning_rate": 7.732240126628742e-06, "loss": 0.221, "step": 14158 }, { "epoch": 2.305662989048569, "grad_norm": 0.19155478477478027, "learning_rate": 7.72881494722654e-06, "loss": 0.2862, "step": 14159 }, { "epoch": 2.3058258356063996, "grad_norm": 0.1812281459569931, "learning_rate": 7.725390387911627e-06, "loss": 0.2719, "step": 14160 }, { "epoch": 2.3059886821642306, "grad_norm": 0.1485326737165451, "learning_rate": 7.721966448806945e-06, "loss": 0.2447, "step": 14161 }, { "epoch": 2.3061515287220615, "grad_norm": 0.14188557863235474, "learning_rate": 7.718543130035444e-06, "loss": 0.2711, "step": 14162 }, { "epoch": 2.3063143752798925, "grad_norm": 0.16504767537117004, "learning_rate": 7.715120431720018e-06, "loss": 0.2918, "step": 14163 }, { "epoch": 2.3064772218377234, "grad_norm": 0.21836082637310028, "learning_rate": 7.71169835398356e-06, "loss": 0.293, "step": 14164 }, { "epoch": 2.3066400683955544, "grad_norm": 0.1356232464313507, "learning_rate": 7.708276896948919e-06, "loss": 0.2816, "step": 14165 }, { "epoch": 2.3068029149533853, "grad_norm": 0.1846378743648529, "learning_rate": 7.70485606073895e-06, "loss": 0.2645, "step": 14166 }, { "epoch": 2.3069657615112162, "grad_norm": 0.17087236046791077, "learning_rate": 7.701435845476468e-06, "loss": 0.2695, "step": 14167 }, { "epoch": 2.307128608069047, "grad_norm": 0.14955472946166992, "learning_rate": 7.69801625128426e-06, "loss": 0.253, "step": 14168 }, { "epoch": 2.3072914546268777, "grad_norm": 0.21142049133777618, "learning_rate": 7.694597278285112e-06, "loss": 0.2803, "step": 14169 }, { "epoch": 2.3074543011847086, "grad_norm": 0.2015944868326187, "learning_rate": 7.691178926601772e-06, "loss": 0.2919, "step": 14170 }, { "epoch": 2.3076171477425396, "grad_norm": 0.15917186439037323, "learning_rate": 7.687761196356965e-06, "loss": 0.2913, "step": 14171 }, { "epoch": 2.3077799943003705, "grad_norm": 0.19627608358860016, "learning_rate": 7.684344087673394e-06, "loss": 0.291, "step": 14172 }, { "epoch": 2.3079428408582014, "grad_norm": 0.19834491610527039, "learning_rate": 7.680927600673757e-06, "loss": 0.2861, "step": 14173 }, { "epoch": 2.3081056874160324, "grad_norm": 0.14204619824886322, "learning_rate": 7.677511735480708e-06, "loss": 0.2869, "step": 14174 }, { "epoch": 2.3082685339738633, "grad_norm": 0.18108394742012024, "learning_rate": 7.674096492216889e-06, "loss": 0.2708, "step": 14175 }, { "epoch": 2.308431380531694, "grad_norm": 0.18276146054267883, "learning_rate": 7.670681871004905e-06, "loss": 0.2721, "step": 14176 }, { "epoch": 2.3085942270895248, "grad_norm": 0.18887527287006378, "learning_rate": 7.667267871967373e-06, "loss": 0.2371, "step": 14177 }, { "epoch": 2.3087570736473557, "grad_norm": 0.1267242580652237, "learning_rate": 7.663854495226853e-06, "loss": 0.242, "step": 14178 }, { "epoch": 2.3089199202051867, "grad_norm": 0.16703534126281738, "learning_rate": 7.660441740905888e-06, "loss": 0.2618, "step": 14179 }, { "epoch": 2.3090827667630176, "grad_norm": 0.15440645813941956, "learning_rate": 7.657029609127023e-06, "loss": 0.303, "step": 14180 }, { "epoch": 2.3092456133208485, "grad_norm": 0.20866616070270538, "learning_rate": 7.653618100012758e-06, "loss": 0.3316, "step": 14181 }, { "epoch": 2.3094084598786795, "grad_norm": 0.194614976644516, "learning_rate": 7.650207213685565e-06, "loss": 0.2829, "step": 14182 }, { "epoch": 2.30957130643651, "grad_norm": 0.14002494513988495, "learning_rate": 7.646796950267924e-06, "loss": 0.2185, "step": 14183 }, { "epoch": 2.309734152994341, "grad_norm": 0.17348840832710266, "learning_rate": 7.643387309882255e-06, "loss": 0.2647, "step": 14184 }, { "epoch": 2.309896999552172, "grad_norm": 0.1988847255706787, "learning_rate": 7.63997829265099e-06, "loss": 0.3022, "step": 14185 }, { "epoch": 2.310059846110003, "grad_norm": 0.1979847401380539, "learning_rate": 7.636569898696518e-06, "loss": 0.2683, "step": 14186 }, { "epoch": 2.3102226926678338, "grad_norm": 0.21370822191238403, "learning_rate": 7.6331621281412e-06, "loss": 0.2831, "step": 14187 }, { "epoch": 2.3103855392256647, "grad_norm": 0.16126495599746704, "learning_rate": 7.629754981107404e-06, "loss": 0.2561, "step": 14188 }, { "epoch": 2.3105483857834956, "grad_norm": 0.16576793789863586, "learning_rate": 7.626348457717445e-06, "loss": 0.2358, "step": 14189 }, { "epoch": 2.310711232341326, "grad_norm": 0.1424904316663742, "learning_rate": 7.622942558093621e-06, "loss": 0.3027, "step": 14190 }, { "epoch": 2.310874078899157, "grad_norm": 0.1717885583639145, "learning_rate": 7.61953728235823e-06, "loss": 0.2608, "step": 14191 }, { "epoch": 2.311036925456988, "grad_norm": 0.16446544229984283, "learning_rate": 7.616132630633524e-06, "loss": 0.252, "step": 14192 }, { "epoch": 2.311199772014819, "grad_norm": 0.15537197887897491, "learning_rate": 7.6127286030417414e-06, "loss": 0.2993, "step": 14193 }, { "epoch": 2.31136261857265, "grad_norm": 0.1581101417541504, "learning_rate": 7.609325199705086e-06, "loss": 0.3013, "step": 14194 }, { "epoch": 2.311525465130481, "grad_norm": 0.2056041657924652, "learning_rate": 7.60592242074577e-06, "loss": 0.2807, "step": 14195 }, { "epoch": 2.311688311688312, "grad_norm": 0.16840563714504242, "learning_rate": 7.602520266285951e-06, "loss": 0.2929, "step": 14196 }, { "epoch": 2.3118511582461427, "grad_norm": 0.15131564438343048, "learning_rate": 7.59911873644778e-06, "loss": 0.316, "step": 14197 }, { "epoch": 2.3120140048039737, "grad_norm": 0.15601058304309845, "learning_rate": 7.595717831353372e-06, "loss": 0.2968, "step": 14198 }, { "epoch": 2.312176851361804, "grad_norm": 0.15952156484127045, "learning_rate": 7.5923175511248475e-06, "loss": 0.2813, "step": 14199 }, { "epoch": 2.312339697919635, "grad_norm": 0.17505574226379395, "learning_rate": 7.588917895884276e-06, "loss": 0.2717, "step": 14200 }, { "epoch": 2.312502544477466, "grad_norm": 0.17237429320812225, "learning_rate": 7.58551886575371e-06, "loss": 0.2719, "step": 14201 }, { "epoch": 2.312665391035297, "grad_norm": 0.1837940514087677, "learning_rate": 7.582120460855199e-06, "loss": 0.3157, "step": 14202 }, { "epoch": 2.312828237593128, "grad_norm": 0.1490597277879715, "learning_rate": 7.578722681310749e-06, "loss": 0.2408, "step": 14203 }, { "epoch": 2.312991084150959, "grad_norm": 0.17553183436393738, "learning_rate": 7.575325527242352e-06, "loss": 0.2568, "step": 14204 }, { "epoch": 2.31315393070879, "grad_norm": 0.24285295605659485, "learning_rate": 7.571928998771965e-06, "loss": 0.3032, "step": 14205 }, { "epoch": 2.3133167772666203, "grad_norm": 0.1823919117450714, "learning_rate": 7.568533096021551e-06, "loss": 0.275, "step": 14206 }, { "epoch": 2.3134796238244513, "grad_norm": 0.17018531262874603, "learning_rate": 7.5651378191130246e-06, "loss": 0.2633, "step": 14207 }, { "epoch": 2.313642470382282, "grad_norm": 0.16128630936145782, "learning_rate": 7.561743168168278e-06, "loss": 0.2877, "step": 14208 }, { "epoch": 2.313805316940113, "grad_norm": 0.18120937049388885, "learning_rate": 7.558349143309207e-06, "loss": 0.2689, "step": 14209 }, { "epoch": 2.313968163497944, "grad_norm": 0.1876026690006256, "learning_rate": 7.55495574465766e-06, "loss": 0.2495, "step": 14210 }, { "epoch": 2.314131010055775, "grad_norm": 0.2135942131280899, "learning_rate": 7.5515629723354655e-06, "loss": 0.3108, "step": 14211 }, { "epoch": 2.314293856613606, "grad_norm": 0.15335865318775177, "learning_rate": 7.548170826464432e-06, "loss": 0.2857, "step": 14212 }, { "epoch": 2.3144567031714365, "grad_norm": 0.19335529208183289, "learning_rate": 7.544779307166358e-06, "loss": 0.3136, "step": 14213 }, { "epoch": 2.3146195497292674, "grad_norm": 0.16355463862419128, "learning_rate": 7.5413884145630084e-06, "loss": 0.2787, "step": 14214 }, { "epoch": 2.3147823962870984, "grad_norm": 0.17723864316940308, "learning_rate": 7.537998148776118e-06, "loss": 0.2856, "step": 14215 }, { "epoch": 2.3149452428449293, "grad_norm": 0.18712729215621948, "learning_rate": 7.534608509927405e-06, "loss": 0.2629, "step": 14216 }, { "epoch": 2.3151080894027602, "grad_norm": 0.19242960214614868, "learning_rate": 7.531219498138581e-06, "loss": 0.247, "step": 14217 }, { "epoch": 2.315270935960591, "grad_norm": 0.2284156084060669, "learning_rate": 7.5278311135313165e-06, "loss": 0.2799, "step": 14218 }, { "epoch": 2.315433782518422, "grad_norm": 0.17742091417312622, "learning_rate": 7.524443356227257e-06, "loss": 0.2689, "step": 14219 }, { "epoch": 2.315596629076253, "grad_norm": 0.16782553493976593, "learning_rate": 7.521056226348044e-06, "loss": 0.3092, "step": 14220 }, { "epoch": 2.315759475634084, "grad_norm": 0.15719656646251678, "learning_rate": 7.517669724015272e-06, "loss": 0.2616, "step": 14221 }, { "epoch": 2.3159223221919145, "grad_norm": 0.17203384637832642, "learning_rate": 7.514283849350545e-06, "loss": 0.2744, "step": 14222 }, { "epoch": 2.3160851687497455, "grad_norm": 0.14715710282325745, "learning_rate": 7.510898602475416e-06, "loss": 0.254, "step": 14223 }, { "epoch": 2.3162480153075764, "grad_norm": 0.18875718116760254, "learning_rate": 7.507513983511419e-06, "loss": 0.2872, "step": 14224 }, { "epoch": 2.3164108618654073, "grad_norm": 0.18339869379997253, "learning_rate": 7.5041299925800865e-06, "loss": 0.2481, "step": 14225 }, { "epoch": 2.3165737084232383, "grad_norm": 0.20753973722457886, "learning_rate": 7.500746629802905e-06, "loss": 0.3019, "step": 14226 }, { "epoch": 2.3167365549810692, "grad_norm": 0.1651344746351242, "learning_rate": 7.497363895301343e-06, "loss": 0.2647, "step": 14227 }, { "epoch": 2.3168994015389, "grad_norm": 0.14726552367210388, "learning_rate": 7.493981789196864e-06, "loss": 0.217, "step": 14228 }, { "epoch": 2.3170622480967307, "grad_norm": 0.1766723394393921, "learning_rate": 7.490600311610885e-06, "loss": 0.2691, "step": 14229 }, { "epoch": 2.3172250946545616, "grad_norm": 0.14483588933944702, "learning_rate": 7.4872194626648105e-06, "loss": 0.2483, "step": 14230 }, { "epoch": 2.3173879412123926, "grad_norm": 0.12631312012672424, "learning_rate": 7.4838392424800314e-06, "loss": 0.2922, "step": 14231 }, { "epoch": 2.3175507877702235, "grad_norm": 0.1457419991493225, "learning_rate": 7.480459651177904e-06, "loss": 0.2421, "step": 14232 }, { "epoch": 2.3177136343280544, "grad_norm": 0.20451940596103668, "learning_rate": 7.477080688879768e-06, "loss": 0.2847, "step": 14233 }, { "epoch": 2.3178764808858854, "grad_norm": 0.21238702535629272, "learning_rate": 7.473702355706924e-06, "loss": 0.3192, "step": 14234 }, { "epoch": 2.3180393274437163, "grad_norm": 0.2195204496383667, "learning_rate": 7.470324651780686e-06, "loss": 0.27, "step": 14235 }, { "epoch": 2.318202174001547, "grad_norm": 0.1437278389930725, "learning_rate": 7.466947577222311e-06, "loss": 0.3118, "step": 14236 }, { "epoch": 2.3183650205593778, "grad_norm": 0.14552845060825348, "learning_rate": 7.463571132153049e-06, "loss": 0.284, "step": 14237 }, { "epoch": 2.3185278671172087, "grad_norm": 0.15353694558143616, "learning_rate": 7.460195316694119e-06, "loss": 0.2375, "step": 14238 }, { "epoch": 2.3186907136750396, "grad_norm": 0.20232261717319489, "learning_rate": 7.4568201309667326e-06, "loss": 0.2357, "step": 14239 }, { "epoch": 2.3188535602328706, "grad_norm": 0.1588238626718521, "learning_rate": 7.453445575092066e-06, "loss": 0.2231, "step": 14240 }, { "epoch": 2.3190164067907015, "grad_norm": 0.2078489512205124, "learning_rate": 7.450071649191265e-06, "loss": 0.2302, "step": 14241 }, { "epoch": 2.3191792533485325, "grad_norm": 0.18151672184467316, "learning_rate": 7.446698353385481e-06, "loss": 0.261, "step": 14242 }, { "epoch": 2.3193420999063634, "grad_norm": 0.16400329768657684, "learning_rate": 7.4433256877958155e-06, "loss": 0.2669, "step": 14243 }, { "epoch": 2.3195049464641944, "grad_norm": 0.15099036693572998, "learning_rate": 7.4399536525433606e-06, "loss": 0.2829, "step": 14244 }, { "epoch": 2.319667793022025, "grad_norm": 0.16726164519786835, "learning_rate": 7.436582247749174e-06, "loss": 0.2833, "step": 14245 }, { "epoch": 2.319830639579856, "grad_norm": 0.15758062899112701, "learning_rate": 7.4332114735343125e-06, "loss": 0.2752, "step": 14246 }, { "epoch": 2.3199934861376867, "grad_norm": 0.16768719255924225, "learning_rate": 7.42984133001979e-06, "loss": 0.2719, "step": 14247 }, { "epoch": 2.3201563326955177, "grad_norm": 0.20921945571899414, "learning_rate": 7.426471817326608e-06, "loss": 0.2571, "step": 14248 }, { "epoch": 2.3203191792533486, "grad_norm": 0.14943553507328033, "learning_rate": 7.4231029355757294e-06, "loss": 0.233, "step": 14249 }, { "epoch": 2.3204820258111796, "grad_norm": 0.19309282302856445, "learning_rate": 7.419734684888127e-06, "loss": 0.3052, "step": 14250 }, { "epoch": 2.3206448723690105, "grad_norm": 0.13439588248729706, "learning_rate": 7.416367065384719e-06, "loss": 0.2861, "step": 14251 }, { "epoch": 2.320807718926841, "grad_norm": 0.19825513660907745, "learning_rate": 7.4130000771864076e-06, "loss": 0.3025, "step": 14252 }, { "epoch": 2.320970565484672, "grad_norm": 0.14884057641029358, "learning_rate": 7.409633720414095e-06, "loss": 0.2819, "step": 14253 }, { "epoch": 2.321133412042503, "grad_norm": 0.14236055314540863, "learning_rate": 7.406267995188634e-06, "loss": 0.2721, "step": 14254 }, { "epoch": 2.321296258600334, "grad_norm": 0.16929441690444946, "learning_rate": 7.402902901630862e-06, "loss": 0.2881, "step": 14255 }, { "epoch": 2.321459105158165, "grad_norm": 0.15546323359012604, "learning_rate": 7.399538439861592e-06, "loss": 0.2724, "step": 14256 }, { "epoch": 2.3216219517159957, "grad_norm": 0.1428815871477127, "learning_rate": 7.396174610001633e-06, "loss": 0.2797, "step": 14257 }, { "epoch": 2.3217847982738267, "grad_norm": 0.1875031739473343, "learning_rate": 7.392811412171738e-06, "loss": 0.2532, "step": 14258 }, { "epoch": 2.321947644831657, "grad_norm": 0.17846140265464783, "learning_rate": 7.3894488464926745e-06, "loss": 0.2593, "step": 14259 }, { "epoch": 2.322110491389488, "grad_norm": 0.15170346200466156, "learning_rate": 7.3860869130851604e-06, "loss": 0.3101, "step": 14260 }, { "epoch": 2.322273337947319, "grad_norm": 0.15231038630008698, "learning_rate": 7.382725612069891e-06, "loss": 0.2665, "step": 14261 }, { "epoch": 2.32243618450515, "grad_norm": 0.21121487021446228, "learning_rate": 7.3793649435675625e-06, "loss": 0.2853, "step": 14262 }, { "epoch": 2.322599031062981, "grad_norm": 0.19027003645896912, "learning_rate": 7.3760049076988194e-06, "loss": 0.2694, "step": 14263 }, { "epoch": 2.322761877620812, "grad_norm": 0.1784338802099228, "learning_rate": 7.372645504584308e-06, "loss": 0.2661, "step": 14264 }, { "epoch": 2.322924724178643, "grad_norm": 0.20905353128910065, "learning_rate": 7.3692867343446385e-06, "loss": 0.2772, "step": 14265 }, { "epoch": 2.3230875707364733, "grad_norm": 0.21805551648139954, "learning_rate": 7.365928597100394e-06, "loss": 0.2919, "step": 14266 }, { "epoch": 2.3232504172943043, "grad_norm": 0.18813453614711761, "learning_rate": 7.3625710929721425e-06, "loss": 0.2604, "step": 14267 }, { "epoch": 2.323413263852135, "grad_norm": 0.1711728274822235, "learning_rate": 7.3592142220804375e-06, "loss": 0.2457, "step": 14268 }, { "epoch": 2.323576110409966, "grad_norm": 0.14604872465133667, "learning_rate": 7.355857984545797e-06, "loss": 0.3199, "step": 14269 }, { "epoch": 2.323738956967797, "grad_norm": 0.14979252219200134, "learning_rate": 7.352502380488708e-06, "loss": 0.2169, "step": 14270 }, { "epoch": 2.323901803525628, "grad_norm": 0.19138263165950775, "learning_rate": 7.349147410029667e-06, "loss": 0.2283, "step": 14271 }, { "epoch": 2.324064650083459, "grad_norm": 0.15218189358711243, "learning_rate": 7.345793073289114e-06, "loss": 0.2471, "step": 14272 }, { "epoch": 2.32422749664129, "grad_norm": 0.17552271485328674, "learning_rate": 7.342439370387486e-06, "loss": 0.2594, "step": 14273 }, { "epoch": 2.324390343199121, "grad_norm": 0.2132667899131775, "learning_rate": 7.3390863014451776e-06, "loss": 0.3256, "step": 14274 }, { "epoch": 2.3245531897569514, "grad_norm": 0.16539296507835388, "learning_rate": 7.335733866582595e-06, "loss": 0.2646, "step": 14275 }, { "epoch": 2.3247160363147823, "grad_norm": 0.23824580013751984, "learning_rate": 7.33238206592009e-06, "loss": 0.2678, "step": 14276 }, { "epoch": 2.3248788828726132, "grad_norm": 0.18990108370780945, "learning_rate": 7.329030899578002e-06, "loss": 0.2649, "step": 14277 }, { "epoch": 2.325041729430444, "grad_norm": 0.19840244948863983, "learning_rate": 7.325680367676641e-06, "loss": 0.2729, "step": 14278 }, { "epoch": 2.325204575988275, "grad_norm": 0.16731278598308563, "learning_rate": 7.3223304703363135e-06, "loss": 0.2395, "step": 14279 }, { "epoch": 2.325367422546106, "grad_norm": 0.19106006622314453, "learning_rate": 7.318981207677289e-06, "loss": 0.2862, "step": 14280 }, { "epoch": 2.325530269103937, "grad_norm": 0.14492715895175934, "learning_rate": 7.315632579819803e-06, "loss": 0.2499, "step": 14281 }, { "epoch": 2.3256931156617675, "grad_norm": 0.20703472197055817, "learning_rate": 7.312284586884099e-06, "loss": 0.3013, "step": 14282 }, { "epoch": 2.3258559622195984, "grad_norm": 0.1738152652978897, "learning_rate": 7.308937228990373e-06, "loss": 0.254, "step": 14283 }, { "epoch": 2.3260188087774294, "grad_norm": 0.1657252013683319, "learning_rate": 7.305590506258805e-06, "loss": 0.2745, "step": 14284 }, { "epoch": 2.3261816553352603, "grad_norm": 0.22129176557064056, "learning_rate": 7.3022444188095426e-06, "loss": 0.2478, "step": 14285 }, { "epoch": 2.3263445018930913, "grad_norm": 0.19340485334396362, "learning_rate": 7.2988989667627375e-06, "loss": 0.2965, "step": 14286 }, { "epoch": 2.326507348450922, "grad_norm": 0.15574054419994354, "learning_rate": 7.295554150238496e-06, "loss": 0.2446, "step": 14287 }, { "epoch": 2.326670195008753, "grad_norm": 0.157098188996315, "learning_rate": 7.2922099693569e-06, "loss": 0.2982, "step": 14288 }, { "epoch": 2.3268330415665837, "grad_norm": 0.1515834629535675, "learning_rate": 7.288866424238014e-06, "loss": 0.2551, "step": 14289 }, { "epoch": 2.3269958881244146, "grad_norm": 0.18586045503616333, "learning_rate": 7.285523515001896e-06, "loss": 0.3222, "step": 14290 }, { "epoch": 2.3271587346822455, "grad_norm": 0.17624713480472565, "learning_rate": 7.2821812417685566e-06, "loss": 0.2811, "step": 14291 }, { "epoch": 2.3273215812400765, "grad_norm": 0.20176635682582855, "learning_rate": 7.2788396046579885e-06, "loss": 0.236, "step": 14292 }, { "epoch": 2.3274844277979074, "grad_norm": 0.1850067675113678, "learning_rate": 7.275498603790179e-06, "loss": 0.283, "step": 14293 }, { "epoch": 2.3276472743557384, "grad_norm": 0.11009956151247025, "learning_rate": 7.272158239285076e-06, "loss": 0.252, "step": 14294 }, { "epoch": 2.3278101209135693, "grad_norm": 0.17898981273174286, "learning_rate": 7.268818511262604e-06, "loss": 0.2604, "step": 14295 }, { "epoch": 2.3279729674714003, "grad_norm": 0.21921579539775848, "learning_rate": 7.265479419842663e-06, "loss": 0.2841, "step": 14296 }, { "epoch": 2.328135814029231, "grad_norm": 0.21275560557842255, "learning_rate": 7.262140965145153e-06, "loss": 0.269, "step": 14297 }, { "epoch": 2.3282986605870617, "grad_norm": 0.1838454306125641, "learning_rate": 7.258803147289919e-06, "loss": 0.2911, "step": 14298 }, { "epoch": 2.3284615071448926, "grad_norm": 0.23155701160430908, "learning_rate": 7.255465966396815e-06, "loss": 0.2871, "step": 14299 }, { "epoch": 2.3286243537027236, "grad_norm": 0.14303968846797943, "learning_rate": 7.252129422585646e-06, "loss": 0.2568, "step": 14300 }, { "epoch": 2.3287872002605545, "grad_norm": 0.14992199838161469, "learning_rate": 7.248793515976196e-06, "loss": 0.2799, "step": 14301 }, { "epoch": 2.3289500468183855, "grad_norm": 0.16888274252414703, "learning_rate": 7.245458246688252e-06, "loss": 0.2456, "step": 14302 }, { "epoch": 2.3291128933762164, "grad_norm": 0.16404740512371063, "learning_rate": 7.242123614841545e-06, "loss": 0.2323, "step": 14303 }, { "epoch": 2.3292757399340474, "grad_norm": 0.1787661612033844, "learning_rate": 7.238789620555808e-06, "loss": 0.222, "step": 14304 }, { "epoch": 2.329438586491878, "grad_norm": 0.1772363781929016, "learning_rate": 7.235456263950741e-06, "loss": 0.2618, "step": 14305 }, { "epoch": 2.329601433049709, "grad_norm": 0.15187065303325653, "learning_rate": 7.2321235451460166e-06, "loss": 0.2503, "step": 14306 }, { "epoch": 2.3297642796075397, "grad_norm": 0.17251387238502502, "learning_rate": 7.2287914642612805e-06, "loss": 0.2584, "step": 14307 }, { "epoch": 2.3299271261653707, "grad_norm": 0.16950692236423492, "learning_rate": 7.225460021416186e-06, "loss": 0.2599, "step": 14308 }, { "epoch": 2.3300899727232016, "grad_norm": 0.1986212432384491, "learning_rate": 7.22212921673033e-06, "loss": 0.2621, "step": 14309 }, { "epoch": 2.3302528192810326, "grad_norm": 0.16135606169700623, "learning_rate": 7.218799050323291e-06, "loss": 0.2547, "step": 14310 }, { "epoch": 2.3304156658388635, "grad_norm": 0.16772450506687164, "learning_rate": 7.2154695223146465e-06, "loss": 0.2388, "step": 14311 }, { "epoch": 2.330578512396694, "grad_norm": 0.1447717696428299, "learning_rate": 7.212140632823933e-06, "loss": 0.2895, "step": 14312 }, { "epoch": 2.330741358954525, "grad_norm": 0.1853741854429245, "learning_rate": 7.208812381970662e-06, "loss": 0.2952, "step": 14313 }, { "epoch": 2.330904205512356, "grad_norm": 0.20963765680789948, "learning_rate": 7.2054847698743235e-06, "loss": 0.3042, "step": 14314 }, { "epoch": 2.331067052070187, "grad_norm": 0.17057226598262787, "learning_rate": 7.202157796654405e-06, "loss": 0.267, "step": 14315 }, { "epoch": 2.3312298986280178, "grad_norm": 0.16829371452331543, "learning_rate": 7.1988314624303414e-06, "loss": 0.2874, "step": 14316 }, { "epoch": 2.3313927451858487, "grad_norm": 0.18248271942138672, "learning_rate": 7.195505767321567e-06, "loss": 0.2825, "step": 14317 }, { "epoch": 2.3315555917436797, "grad_norm": 0.16773314774036407, "learning_rate": 7.192180711447468e-06, "loss": 0.2442, "step": 14318 }, { "epoch": 2.33171843830151, "grad_norm": 0.18791605532169342, "learning_rate": 7.188856294927443e-06, "loss": 0.2702, "step": 14319 }, { "epoch": 2.331881284859341, "grad_norm": 0.208907350897789, "learning_rate": 7.185532517880842e-06, "loss": 0.2724, "step": 14320 }, { "epoch": 2.332044131417172, "grad_norm": 0.19553381204605103, "learning_rate": 7.182209380426988e-06, "loss": 0.2665, "step": 14321 }, { "epoch": 2.332206977975003, "grad_norm": 0.18166819214820862, "learning_rate": 7.1788868826852095e-06, "loss": 0.2772, "step": 14322 }, { "epoch": 2.332369824532834, "grad_norm": 0.18618053197860718, "learning_rate": 7.175565024774786e-06, "loss": 0.2607, "step": 14323 }, { "epoch": 2.332532671090665, "grad_norm": 0.14926405251026154, "learning_rate": 7.172243806814979e-06, "loss": 0.268, "step": 14324 }, { "epoch": 2.332695517648496, "grad_norm": 0.16564162075519562, "learning_rate": 7.168923228925026e-06, "loss": 0.2982, "step": 14325 }, { "epoch": 2.3328583642063268, "grad_norm": 0.15073704719543457, "learning_rate": 7.165603291224163e-06, "loss": 0.2835, "step": 14326 }, { "epoch": 2.3330212107641577, "grad_norm": 0.21895286440849304, "learning_rate": 7.162283993831573e-06, "loss": 0.3028, "step": 14327 }, { "epoch": 2.333184057321988, "grad_norm": 0.14258679747581482, "learning_rate": 7.15896533686643e-06, "loss": 0.2417, "step": 14328 }, { "epoch": 2.333346903879819, "grad_norm": 0.2086726874113083, "learning_rate": 7.155647320447878e-06, "loss": 0.2936, "step": 14329 }, { "epoch": 2.33350975043765, "grad_norm": 0.21439895033836365, "learning_rate": 7.152329944695058e-06, "loss": 0.2806, "step": 14330 }, { "epoch": 2.333672596995481, "grad_norm": 0.18384650349617004, "learning_rate": 7.149013209727065e-06, "loss": 0.3108, "step": 14331 }, { "epoch": 2.333835443553312, "grad_norm": 0.15650829672813416, "learning_rate": 7.145697115662972e-06, "loss": 0.2852, "step": 14332 }, { "epoch": 2.333998290111143, "grad_norm": 0.15656453371047974, "learning_rate": 7.142381662621855e-06, "loss": 0.2737, "step": 14333 }, { "epoch": 2.334161136668974, "grad_norm": 0.17539739608764648, "learning_rate": 7.139066850722739e-06, "loss": 0.2577, "step": 14334 }, { "epoch": 2.3343239832268043, "grad_norm": 0.18118195235729218, "learning_rate": 7.1357526800846286e-06, "loss": 0.2748, "step": 14335 }, { "epoch": 2.3344868297846353, "grad_norm": 0.17792527377605438, "learning_rate": 7.1324391508265284e-06, "loss": 0.279, "step": 14336 }, { "epoch": 2.3346496763424662, "grad_norm": 0.1422465741634369, "learning_rate": 7.129126263067393e-06, "loss": 0.2731, "step": 14337 }, { "epoch": 2.334812522900297, "grad_norm": 0.18495501577854156, "learning_rate": 7.125814016926161e-06, "loss": 0.2419, "step": 14338 }, { "epoch": 2.334975369458128, "grad_norm": 0.1869032084941864, "learning_rate": 7.1225024125217645e-06, "loss": 0.2849, "step": 14339 }, { "epoch": 2.335138216015959, "grad_norm": 0.1909761130809784, "learning_rate": 7.119191449973087e-06, "loss": 0.2703, "step": 14340 }, { "epoch": 2.33530106257379, "grad_norm": 0.1723160296678543, "learning_rate": 7.115881129399016e-06, "loss": 0.2432, "step": 14341 }, { "epoch": 2.3354639091316205, "grad_norm": 0.18785496056079865, "learning_rate": 7.112571450918395e-06, "loss": 0.2316, "step": 14342 }, { "epoch": 2.3356267556894514, "grad_norm": 0.2347005158662796, "learning_rate": 7.1092624146500445e-06, "loss": 0.3227, "step": 14343 }, { "epoch": 2.3357896022472824, "grad_norm": 0.16611026227474213, "learning_rate": 7.105954020712782e-06, "loss": 0.2987, "step": 14344 }, { "epoch": 2.3359524488051133, "grad_norm": 0.20198051631450653, "learning_rate": 7.102646269225382e-06, "loss": 0.3304, "step": 14345 }, { "epoch": 2.3361152953629443, "grad_norm": 0.17329274117946625, "learning_rate": 7.099339160306604e-06, "loss": 0.2967, "step": 14346 }, { "epoch": 2.336278141920775, "grad_norm": 0.16967898607254028, "learning_rate": 7.096032694075172e-06, "loss": 0.3134, "step": 14347 }, { "epoch": 2.336440988478606, "grad_norm": 0.17259638011455536, "learning_rate": 7.092726870649818e-06, "loss": 0.2934, "step": 14348 }, { "epoch": 2.336603835036437, "grad_norm": 0.14518313109874725, "learning_rate": 7.08942169014922e-06, "loss": 0.2734, "step": 14349 }, { "epoch": 2.336766681594268, "grad_norm": 0.17406243085861206, "learning_rate": 7.086117152692046e-06, "loss": 0.2393, "step": 14350 }, { "epoch": 2.3369295281520985, "grad_norm": 0.1691761463880539, "learning_rate": 7.082813258396928e-06, "loss": 0.2754, "step": 14351 }, { "epoch": 2.3370923747099295, "grad_norm": 0.14424142241477966, "learning_rate": 7.079510007382503e-06, "loss": 0.2544, "step": 14352 }, { "epoch": 2.3372552212677604, "grad_norm": 0.16430170834064484, "learning_rate": 7.076207399767362e-06, "loss": 0.3054, "step": 14353 }, { "epoch": 2.3374180678255914, "grad_norm": 0.14719681441783905, "learning_rate": 7.072905435670066e-06, "loss": 0.2711, "step": 14354 }, { "epoch": 2.3375809143834223, "grad_norm": 0.1700105369091034, "learning_rate": 7.069604115209186e-06, "loss": 0.3056, "step": 14355 }, { "epoch": 2.3377437609412532, "grad_norm": 0.2019866704940796, "learning_rate": 7.066303438503239e-06, "loss": 0.2814, "step": 14356 }, { "epoch": 2.337906607499084, "grad_norm": 0.16636092960834503, "learning_rate": 7.063003405670729e-06, "loss": 0.2335, "step": 14357 }, { "epoch": 2.3380694540569147, "grad_norm": 0.2034176141023636, "learning_rate": 7.059704016830129e-06, "loss": 0.3247, "step": 14358 }, { "epoch": 2.3382323006147456, "grad_norm": 0.20605970919132233, "learning_rate": 7.056405272099914e-06, "loss": 0.3105, "step": 14359 }, { "epoch": 2.3383951471725766, "grad_norm": 0.1626036912202835, "learning_rate": 7.053107171598508e-06, "loss": 0.2793, "step": 14360 }, { "epoch": 2.3385579937304075, "grad_norm": 0.21558208763599396, "learning_rate": 7.049809715444319e-06, "loss": 0.2765, "step": 14361 }, { "epoch": 2.3387208402882385, "grad_norm": 0.18788212537765503, "learning_rate": 7.04651290375575e-06, "loss": 0.2723, "step": 14362 }, { "epoch": 2.3388836868460694, "grad_norm": 0.14897498488426208, "learning_rate": 7.043216736651156e-06, "loss": 0.3043, "step": 14363 }, { "epoch": 2.3390465334039003, "grad_norm": 0.1895088255405426, "learning_rate": 7.039921214248882e-06, "loss": 0.2849, "step": 14364 }, { "epoch": 2.339209379961731, "grad_norm": 0.14291562139987946, "learning_rate": 7.036626336667238e-06, "loss": 0.2679, "step": 14365 }, { "epoch": 2.339372226519562, "grad_norm": 0.20695358514785767, "learning_rate": 7.033332104024537e-06, "loss": 0.2722, "step": 14366 }, { "epoch": 2.3395350730773927, "grad_norm": 0.20253607630729675, "learning_rate": 7.030038516439042e-06, "loss": 0.2926, "step": 14367 }, { "epoch": 2.3396979196352237, "grad_norm": 0.20934991538524628, "learning_rate": 7.026745574029004e-06, "loss": 0.2701, "step": 14368 }, { "epoch": 2.3398607661930546, "grad_norm": 0.15758968889713287, "learning_rate": 7.0234532769126425e-06, "loss": 0.2812, "step": 14369 }, { "epoch": 2.3400236127508856, "grad_norm": 0.17997558414936066, "learning_rate": 7.020161625208175e-06, "loss": 0.2637, "step": 14370 }, { "epoch": 2.3401864593087165, "grad_norm": 0.18838278949260712, "learning_rate": 7.016870619033772e-06, "loss": 0.2956, "step": 14371 }, { "epoch": 2.3403493058665474, "grad_norm": 0.1674719601869583, "learning_rate": 7.013580258507588e-06, "loss": 0.222, "step": 14372 }, { "epoch": 2.3405121524243784, "grad_norm": 0.18437837064266205, "learning_rate": 7.010290543747766e-06, "loss": 0.2666, "step": 14373 }, { "epoch": 2.340674998982209, "grad_norm": 0.1557450294494629, "learning_rate": 7.007001474872413e-06, "loss": 0.2482, "step": 14374 }, { "epoch": 2.34083784554004, "grad_norm": 0.1881650686264038, "learning_rate": 7.003713051999608e-06, "loss": 0.2469, "step": 14375 }, { "epoch": 2.3410006920978708, "grad_norm": 0.1893233358860016, "learning_rate": 7.000425275247432e-06, "loss": 0.2731, "step": 14376 }, { "epoch": 2.3411635386557017, "grad_norm": 0.16706424951553345, "learning_rate": 6.997138144733908e-06, "loss": 0.2581, "step": 14377 }, { "epoch": 2.3413263852135326, "grad_norm": 0.19169749319553375, "learning_rate": 6.99385166057707e-06, "loss": 0.3118, "step": 14378 }, { "epoch": 2.3414892317713636, "grad_norm": 0.19935554265975952, "learning_rate": 6.990565822894907e-06, "loss": 0.294, "step": 14379 }, { "epoch": 2.3416520783291945, "grad_norm": 0.18350905179977417, "learning_rate": 6.987280631805379e-06, "loss": 0.2859, "step": 14380 }, { "epoch": 2.341814924887025, "grad_norm": 0.13850945234298706, "learning_rate": 6.983996087426453e-06, "loss": 0.302, "step": 14381 }, { "epoch": 2.341977771444856, "grad_norm": 0.1922394335269928, "learning_rate": 6.980712189876043e-06, "loss": 0.3203, "step": 14382 }, { "epoch": 2.342140618002687, "grad_norm": 0.15184779465198517, "learning_rate": 6.977428939272046e-06, "loss": 0.2407, "step": 14383 }, { "epoch": 2.342303464560518, "grad_norm": 0.16529928147792816, "learning_rate": 6.974146335732354e-06, "loss": 0.2965, "step": 14384 }, { "epoch": 2.342466311118349, "grad_norm": 0.14227014780044556, "learning_rate": 6.970864379374817e-06, "loss": 0.2487, "step": 14385 }, { "epoch": 2.3426291576761797, "grad_norm": 0.1884317249059677, "learning_rate": 6.967583070317265e-06, "loss": 0.2825, "step": 14386 }, { "epoch": 2.3427920042340107, "grad_norm": 0.15870487689971924, "learning_rate": 6.964302408677497e-06, "loss": 0.2625, "step": 14387 }, { "epoch": 2.342954850791841, "grad_norm": 0.14897961914539337, "learning_rate": 6.9610223945733215e-06, "loss": 0.2625, "step": 14388 }, { "epoch": 2.343117697349672, "grad_norm": 0.217227965593338, "learning_rate": 6.9577430281224845e-06, "loss": 0.2816, "step": 14389 }, { "epoch": 2.343280543907503, "grad_norm": 0.1829788237810135, "learning_rate": 6.954464309442729e-06, "loss": 0.2809, "step": 14390 }, { "epoch": 2.343443390465334, "grad_norm": 0.13672178983688354, "learning_rate": 6.951186238651766e-06, "loss": 0.2658, "step": 14391 }, { "epoch": 2.343606237023165, "grad_norm": 0.13677671551704407, "learning_rate": 6.9479088158672985e-06, "loss": 0.2552, "step": 14392 }, { "epoch": 2.343769083580996, "grad_norm": 0.195815309882164, "learning_rate": 6.944632041206989e-06, "loss": 0.2606, "step": 14393 }, { "epoch": 2.343931930138827, "grad_norm": 0.18379223346710205, "learning_rate": 6.941355914788478e-06, "loss": 0.2747, "step": 14394 }, { "epoch": 2.3440947766966573, "grad_norm": 0.16730724275112152, "learning_rate": 6.938080436729402e-06, "loss": 0.2844, "step": 14395 }, { "epoch": 2.3442576232544883, "grad_norm": 0.1706780642271042, "learning_rate": 6.934805607147355e-06, "loss": 0.2929, "step": 14396 }, { "epoch": 2.344420469812319, "grad_norm": 0.19174742698669434, "learning_rate": 6.9315314261599096e-06, "loss": 0.2578, "step": 14397 }, { "epoch": 2.34458331637015, "grad_norm": 0.18662671744823456, "learning_rate": 6.928257893884612e-06, "loss": 0.2728, "step": 14398 }, { "epoch": 2.344746162927981, "grad_norm": 0.18496885895729065, "learning_rate": 6.924985010439008e-06, "loss": 0.2895, "step": 14399 }, { "epoch": 2.344909009485812, "grad_norm": 0.18637049198150635, "learning_rate": 6.9217127759406e-06, "loss": 0.2703, "step": 14400 }, { "epoch": 2.345071856043643, "grad_norm": 0.17245642840862274, "learning_rate": 6.918441190506864e-06, "loss": 0.2738, "step": 14401 }, { "epoch": 2.345234702601474, "grad_norm": 0.17929601669311523, "learning_rate": 6.915170254255257e-06, "loss": 0.2433, "step": 14402 }, { "epoch": 2.345397549159305, "grad_norm": 0.17520684003829956, "learning_rate": 6.911899967303229e-06, "loss": 0.3196, "step": 14403 }, { "epoch": 2.3455603957171354, "grad_norm": 0.18625091016292572, "learning_rate": 6.908630329768184e-06, "loss": 0.2557, "step": 14404 }, { "epoch": 2.3457232422749663, "grad_norm": 0.16833384335041046, "learning_rate": 6.905361341767508e-06, "loss": 0.2594, "step": 14405 }, { "epoch": 2.3458860888327973, "grad_norm": 0.22673432528972626, "learning_rate": 6.902093003418578e-06, "loss": 0.2633, "step": 14406 }, { "epoch": 2.346048935390628, "grad_norm": 0.14933091402053833, "learning_rate": 6.898825314838733e-06, "loss": 0.2268, "step": 14407 }, { "epoch": 2.346211781948459, "grad_norm": 0.17522194981575012, "learning_rate": 6.895558276145292e-06, "loss": 0.2851, "step": 14408 }, { "epoch": 2.34637462850629, "grad_norm": 0.18050824105739594, "learning_rate": 6.892291887455543e-06, "loss": 0.2681, "step": 14409 }, { "epoch": 2.346537475064121, "grad_norm": 0.17551769316196442, "learning_rate": 6.889026148886777e-06, "loss": 0.2785, "step": 14410 }, { "epoch": 2.3467003216219515, "grad_norm": 0.13971592485904694, "learning_rate": 6.885761060556231e-06, "loss": 0.3118, "step": 14411 }, { "epoch": 2.3468631681797825, "grad_norm": 0.19285845756530762, "learning_rate": 6.88249662258113e-06, "loss": 0.2753, "step": 14412 }, { "epoch": 2.3470260147376134, "grad_norm": 0.17454789578914642, "learning_rate": 6.879232835078689e-06, "loss": 0.264, "step": 14413 }, { "epoch": 2.3471888612954444, "grad_norm": 0.1981508731842041, "learning_rate": 6.875969698166079e-06, "loss": 0.2728, "step": 14414 }, { "epoch": 2.3473517078532753, "grad_norm": 0.2055361270904541, "learning_rate": 6.872707211960449e-06, "loss": 0.2918, "step": 14415 }, { "epoch": 2.3475145544111062, "grad_norm": 0.13636378943920135, "learning_rate": 6.8694453765789505e-06, "loss": 0.2721, "step": 14416 }, { "epoch": 2.347677400968937, "grad_norm": 0.2411046326160431, "learning_rate": 6.866184192138678e-06, "loss": 0.3147, "step": 14417 }, { "epoch": 2.3478402475267677, "grad_norm": 0.13240332901477814, "learning_rate": 6.862923658756728e-06, "loss": 0.2396, "step": 14418 }, { "epoch": 2.3480030940845986, "grad_norm": 0.17194794118404388, "learning_rate": 6.859663776550159e-06, "loss": 0.2688, "step": 14419 }, { "epoch": 2.3481659406424296, "grad_norm": 0.14919966459274292, "learning_rate": 6.856404545636002e-06, "loss": 0.2565, "step": 14420 }, { "epoch": 2.3483287872002605, "grad_norm": 0.2024966925382614, "learning_rate": 6.8531459661312915e-06, "loss": 0.3119, "step": 14421 }, { "epoch": 2.3484916337580914, "grad_norm": 0.16663381457328796, "learning_rate": 6.84988803815301e-06, "loss": 0.2763, "step": 14422 }, { "epoch": 2.3486544803159224, "grad_norm": 0.17275269329547882, "learning_rate": 6.846630761818118e-06, "loss": 0.2538, "step": 14423 }, { "epoch": 2.3488173268737533, "grad_norm": 0.1755254715681076, "learning_rate": 6.84337413724358e-06, "loss": 0.3177, "step": 14424 }, { "epoch": 2.3489801734315843, "grad_norm": 0.14739470183849335, "learning_rate": 6.840118164546308e-06, "loss": 0.2686, "step": 14425 }, { "epoch": 2.349143019989415, "grad_norm": 0.1979944407939911, "learning_rate": 6.836862843843201e-06, "loss": 0.2819, "step": 14426 }, { "epoch": 2.3493058665472457, "grad_norm": 0.17379257082939148, "learning_rate": 6.83360817525113e-06, "loss": 0.2639, "step": 14427 }, { "epoch": 2.3494687131050767, "grad_norm": 0.14206774532794952, "learning_rate": 6.830354158886962e-06, "loss": 0.2649, "step": 14428 }, { "epoch": 2.3496315596629076, "grad_norm": 0.17996348440647125, "learning_rate": 6.827100794867516e-06, "loss": 0.3027, "step": 14429 }, { "epoch": 2.3497944062207385, "grad_norm": 0.18250364065170288, "learning_rate": 6.823848083309597e-06, "loss": 0.2634, "step": 14430 }, { "epoch": 2.3499572527785695, "grad_norm": 0.15178319811820984, "learning_rate": 6.820596024329987e-06, "loss": 0.268, "step": 14431 }, { "epoch": 2.3501200993364004, "grad_norm": 0.17655344307422638, "learning_rate": 6.817344618045449e-06, "loss": 0.2754, "step": 14432 }, { "epoch": 2.3502829458942314, "grad_norm": 0.20870108902454376, "learning_rate": 6.814093864572718e-06, "loss": 0.3099, "step": 14433 }, { "epoch": 2.350445792452062, "grad_norm": 0.15805785357952118, "learning_rate": 6.810843764028496e-06, "loss": 0.282, "step": 14434 }, { "epoch": 2.350608639009893, "grad_norm": 0.18263618648052216, "learning_rate": 6.807594316529486e-06, "loss": 0.2656, "step": 14435 }, { "epoch": 2.3507714855677238, "grad_norm": 0.17292167246341705, "learning_rate": 6.804345522192346e-06, "loss": 0.2782, "step": 14436 }, { "epoch": 2.3509343321255547, "grad_norm": 0.18787744641304016, "learning_rate": 6.80109738113372e-06, "loss": 0.2608, "step": 14437 }, { "epoch": 2.3510971786833856, "grad_norm": 0.21563264727592468, "learning_rate": 6.7978498934702125e-06, "loss": 0.3005, "step": 14438 }, { "epoch": 2.3512600252412166, "grad_norm": 0.16470462083816528, "learning_rate": 6.794603059318438e-06, "loss": 0.2268, "step": 14439 }, { "epoch": 2.3514228717990475, "grad_norm": 0.14502209424972534, "learning_rate": 6.791356878794958e-06, "loss": 0.3103, "step": 14440 }, { "epoch": 2.351585718356878, "grad_norm": 0.19571207463741302, "learning_rate": 6.7881113520163214e-06, "loss": 0.259, "step": 14441 }, { "epoch": 2.351748564914709, "grad_norm": 0.15581074357032776, "learning_rate": 6.784866479099042e-06, "loss": 0.2639, "step": 14442 }, { "epoch": 2.35191141147254, "grad_norm": 0.1746995896100998, "learning_rate": 6.78162226015964e-06, "loss": 0.2596, "step": 14443 }, { "epoch": 2.352074258030371, "grad_norm": 0.15943004190921783, "learning_rate": 6.77837869531458e-06, "loss": 0.2549, "step": 14444 }, { "epoch": 2.352237104588202, "grad_norm": 0.17306488752365112, "learning_rate": 6.775135784680314e-06, "loss": 0.2847, "step": 14445 }, { "epoch": 2.3523999511460327, "grad_norm": 0.13063068687915802, "learning_rate": 6.771893528373283e-06, "loss": 0.2817, "step": 14446 }, { "epoch": 2.3525627977038637, "grad_norm": 0.1367645412683487, "learning_rate": 6.768651926509884e-06, "loss": 0.2434, "step": 14447 }, { "epoch": 2.352725644261694, "grad_norm": 0.16392508149147034, "learning_rate": 6.765410979206507e-06, "loss": 0.2587, "step": 14448 }, { "epoch": 2.352888490819525, "grad_norm": 0.15424194931983948, "learning_rate": 6.7621706865795e-06, "loss": 0.2557, "step": 14449 }, { "epoch": 2.353051337377356, "grad_norm": 0.24841611087322235, "learning_rate": 6.7589310487452126e-06, "loss": 0.2761, "step": 14450 }, { "epoch": 2.353214183935187, "grad_norm": 0.194005087018013, "learning_rate": 6.755692065819955e-06, "loss": 0.2358, "step": 14451 }, { "epoch": 2.353377030493018, "grad_norm": 0.1724909394979477, "learning_rate": 6.752453737920006e-06, "loss": 0.2663, "step": 14452 }, { "epoch": 2.353539877050849, "grad_norm": 0.20229586958885193, "learning_rate": 6.749216065161645e-06, "loss": 0.2538, "step": 14453 }, { "epoch": 2.35370272360868, "grad_norm": 0.17080076038837433, "learning_rate": 6.745979047661099e-06, "loss": 0.2653, "step": 14454 }, { "epoch": 2.3538655701665108, "grad_norm": 0.12787552177906036, "learning_rate": 6.742742685534606e-06, "loss": 0.2778, "step": 14455 }, { "epoch": 2.3540284167243417, "grad_norm": 0.19352392852306366, "learning_rate": 6.73950697889835e-06, "loss": 0.221, "step": 14456 }, { "epoch": 2.354191263282172, "grad_norm": 0.20488306879997253, "learning_rate": 6.736271927868496e-06, "loss": 0.2748, "step": 14457 }, { "epoch": 2.354354109840003, "grad_norm": 0.16540490090847015, "learning_rate": 6.733037532561207e-06, "loss": 0.2625, "step": 14458 }, { "epoch": 2.354516956397834, "grad_norm": 0.20768117904663086, "learning_rate": 6.7298037930925985e-06, "loss": 0.2881, "step": 14459 }, { "epoch": 2.354679802955665, "grad_norm": 0.15166649222373962, "learning_rate": 6.726570709578767e-06, "loss": 0.2683, "step": 14460 }, { "epoch": 2.354842649513496, "grad_norm": 0.1796637773513794, "learning_rate": 6.7233382821358025e-06, "loss": 0.2995, "step": 14461 }, { "epoch": 2.355005496071327, "grad_norm": 0.13812270760536194, "learning_rate": 6.720106510879753e-06, "loss": 0.2691, "step": 14462 }, { "epoch": 2.355168342629158, "grad_norm": 0.20068298280239105, "learning_rate": 6.716875395926639e-06, "loss": 0.2862, "step": 14463 }, { "epoch": 2.3553311891869884, "grad_norm": 0.15709622204303741, "learning_rate": 6.713644937392482e-06, "loss": 0.2993, "step": 14464 }, { "epoch": 2.3554940357448193, "grad_norm": 0.17232248187065125, "learning_rate": 6.7104151353932635e-06, "loss": 0.2692, "step": 14465 }, { "epoch": 2.3556568823026502, "grad_norm": 0.18554003536701202, "learning_rate": 6.707185990044937e-06, "loss": 0.2639, "step": 14466 }, { "epoch": 2.355819728860481, "grad_norm": 0.16650421917438507, "learning_rate": 6.703957501463432e-06, "loss": 0.2669, "step": 14467 }, { "epoch": 2.355982575418312, "grad_norm": 0.1626349240541458, "learning_rate": 6.700729669764674e-06, "loss": 0.2536, "step": 14468 }, { "epoch": 2.356145421976143, "grad_norm": 0.13807672262191772, "learning_rate": 6.697502495064551e-06, "loss": 0.2607, "step": 14469 }, { "epoch": 2.356308268533974, "grad_norm": 0.17734862864017487, "learning_rate": 6.694275977478923e-06, "loss": 0.2757, "step": 14470 }, { "epoch": 2.3564711150918045, "grad_norm": 0.16016799211502075, "learning_rate": 6.691050117123623e-06, "loss": 0.2608, "step": 14471 }, { "epoch": 2.3566339616496355, "grad_norm": 0.2060948610305786, "learning_rate": 6.68782491411449e-06, "loss": 0.2764, "step": 14472 }, { "epoch": 2.3567968082074664, "grad_norm": 0.16170714795589447, "learning_rate": 6.684600368567304e-06, "loss": 0.2732, "step": 14473 }, { "epoch": 2.3569596547652973, "grad_norm": 0.22337105870246887, "learning_rate": 6.681376480597834e-06, "loss": 0.2745, "step": 14474 }, { "epoch": 2.3571225013231283, "grad_norm": 0.14789336919784546, "learning_rate": 6.678153250321839e-06, "loss": 0.2552, "step": 14475 }, { "epoch": 2.3572853478809592, "grad_norm": 0.1629229038953781, "learning_rate": 6.674930677855038e-06, "loss": 0.2792, "step": 14476 }, { "epoch": 2.35744819443879, "grad_norm": 0.21841008961200714, "learning_rate": 6.671708763313125e-06, "loss": 0.2862, "step": 14477 }, { "epoch": 2.357611040996621, "grad_norm": 0.15824590623378754, "learning_rate": 6.668487506811777e-06, "loss": 0.2678, "step": 14478 }, { "epoch": 2.357773887554452, "grad_norm": 0.18631544709205627, "learning_rate": 6.665266908466655e-06, "loss": 0.2865, "step": 14479 }, { "epoch": 2.3579367341122826, "grad_norm": 0.1788068562746048, "learning_rate": 6.662046968393384e-06, "loss": 0.2638, "step": 14480 }, { "epoch": 2.3580995806701135, "grad_norm": 0.20278778672218323, "learning_rate": 6.658827686707569e-06, "loss": 0.2548, "step": 14481 }, { "epoch": 2.3582624272279444, "grad_norm": 0.19608673453330994, "learning_rate": 6.655609063524784e-06, "loss": 0.2984, "step": 14482 }, { "epoch": 2.3584252737857754, "grad_norm": 0.18082121014595032, "learning_rate": 6.652391098960603e-06, "loss": 0.2853, "step": 14483 }, { "epoch": 2.3585881203436063, "grad_norm": 0.14144368469715118, "learning_rate": 6.6491737931305506e-06, "loss": 0.2532, "step": 14484 }, { "epoch": 2.3587509669014373, "grad_norm": 0.1776283085346222, "learning_rate": 6.645957146150134e-06, "loss": 0.26, "step": 14485 }, { "epoch": 2.358913813459268, "grad_norm": 0.18012629449367523, "learning_rate": 6.642741158134852e-06, "loss": 0.2777, "step": 14486 }, { "epoch": 2.3590766600170987, "grad_norm": 0.16728521883487701, "learning_rate": 6.639525829200163e-06, "loss": 0.288, "step": 14487 }, { "epoch": 2.3592395065749296, "grad_norm": 0.1990658938884735, "learning_rate": 6.636311159461508e-06, "loss": 0.2606, "step": 14488 }, { "epoch": 2.3594023531327606, "grad_norm": 0.17662115395069122, "learning_rate": 6.633097149034293e-06, "loss": 0.2551, "step": 14489 }, { "epoch": 2.3595651996905915, "grad_norm": 0.173638254404068, "learning_rate": 6.629883798033926e-06, "loss": 0.2508, "step": 14490 }, { "epoch": 2.3597280462484225, "grad_norm": 0.19958217442035675, "learning_rate": 6.62667110657576e-06, "loss": 0.2593, "step": 14491 }, { "epoch": 2.3598908928062534, "grad_norm": 0.16716374456882477, "learning_rate": 6.623459074775157e-06, "loss": 0.2834, "step": 14492 }, { "epoch": 2.3600537393640844, "grad_norm": 0.19490453600883484, "learning_rate": 6.620247702747434e-06, "loss": 0.2422, "step": 14493 }, { "epoch": 2.360216585921915, "grad_norm": 0.12589988112449646, "learning_rate": 6.617036990607875e-06, "loss": 0.2975, "step": 14494 }, { "epoch": 2.360379432479746, "grad_norm": 0.2027909755706787, "learning_rate": 6.613826938471773e-06, "loss": 0.2866, "step": 14495 }, { "epoch": 2.3605422790375767, "grad_norm": 0.20095984637737274, "learning_rate": 6.610617546454364e-06, "loss": 0.2781, "step": 14496 }, { "epoch": 2.3607051255954077, "grad_norm": 0.13457584381103516, "learning_rate": 6.607408814670887e-06, "loss": 0.2675, "step": 14497 }, { "epoch": 2.3608679721532386, "grad_norm": 0.14203226566314697, "learning_rate": 6.604200743236541e-06, "loss": 0.2805, "step": 14498 }, { "epoch": 2.3610308187110696, "grad_norm": 0.1662093847990036, "learning_rate": 6.600993332266503e-06, "loss": 0.267, "step": 14499 }, { "epoch": 2.3611936652689005, "grad_norm": 0.19076324999332428, "learning_rate": 6.59778658187592e-06, "loss": 0.2683, "step": 14500 }, { "epoch": 2.3613565118267315, "grad_norm": 0.16895779967308044, "learning_rate": 6.594580492179944e-06, "loss": 0.2078, "step": 14501 }, { "epoch": 2.3615193583845624, "grad_norm": 0.17236098647117615, "learning_rate": 6.59137506329367e-06, "loss": 0.3061, "step": 14502 }, { "epoch": 2.361682204942393, "grad_norm": 0.18589909374713898, "learning_rate": 6.588170295332185e-06, "loss": 0.2518, "step": 14503 }, { "epoch": 2.361845051500224, "grad_norm": 0.14930734038352966, "learning_rate": 6.5849661884105435e-06, "loss": 0.2285, "step": 14504 }, { "epoch": 2.3620078980580548, "grad_norm": 0.22290466725826263, "learning_rate": 6.581762742643796e-06, "loss": 0.3122, "step": 14505 }, { "epoch": 2.3621707446158857, "grad_norm": 0.16573713719844818, "learning_rate": 6.578559958146946e-06, "loss": 0.2489, "step": 14506 }, { "epoch": 2.3623335911737167, "grad_norm": 0.15940147638320923, "learning_rate": 6.575357835034982e-06, "loss": 0.278, "step": 14507 }, { "epoch": 2.3624964377315476, "grad_norm": 0.19114136695861816, "learning_rate": 6.572156373422877e-06, "loss": 0.2785, "step": 14508 }, { "epoch": 2.3626592842893785, "grad_norm": 0.14323434233665466, "learning_rate": 6.568955573425573e-06, "loss": 0.2615, "step": 14509 }, { "epoch": 2.362822130847209, "grad_norm": 0.167485773563385, "learning_rate": 6.565755435157983e-06, "loss": 0.2658, "step": 14510 }, { "epoch": 2.36298497740504, "grad_norm": 0.17985010147094727, "learning_rate": 6.562555958734998e-06, "loss": 0.264, "step": 14511 }, { "epoch": 2.363147823962871, "grad_norm": 0.17898356914520264, "learning_rate": 6.559357144271499e-06, "loss": 0.2315, "step": 14512 }, { "epoch": 2.363310670520702, "grad_norm": 0.18213479220867157, "learning_rate": 6.556158991882327e-06, "loss": 0.2614, "step": 14513 }, { "epoch": 2.363473517078533, "grad_norm": 0.16725121438503265, "learning_rate": 6.5529615016823034e-06, "loss": 0.2754, "step": 14514 }, { "epoch": 2.3636363636363638, "grad_norm": 0.19897879660129547, "learning_rate": 6.549764673786235e-06, "loss": 0.2699, "step": 14515 }, { "epoch": 2.3637992101941947, "grad_norm": 0.19078144431114197, "learning_rate": 6.546568508308895e-06, "loss": 0.2853, "step": 14516 }, { "epoch": 2.363962056752025, "grad_norm": 0.18643003702163696, "learning_rate": 6.543373005365031e-06, "loss": 0.2513, "step": 14517 }, { "epoch": 2.364124903309856, "grad_norm": 0.1710098385810852, "learning_rate": 6.5401781650693686e-06, "loss": 0.2842, "step": 14518 }, { "epoch": 2.364287749867687, "grad_norm": 0.15376406908035278, "learning_rate": 6.536983987536624e-06, "loss": 0.2363, "step": 14519 }, { "epoch": 2.364450596425518, "grad_norm": 0.14301851391792297, "learning_rate": 6.533790472881471e-06, "loss": 0.2575, "step": 14520 }, { "epoch": 2.364613442983349, "grad_norm": 0.1850380152463913, "learning_rate": 6.5305976212185645e-06, "loss": 0.2924, "step": 14521 }, { "epoch": 2.36477628954118, "grad_norm": 0.16688284277915955, "learning_rate": 6.527405432662534e-06, "loss": 0.2374, "step": 14522 }, { "epoch": 2.364939136099011, "grad_norm": 0.208238884806633, "learning_rate": 6.524213907328e-06, "loss": 0.2877, "step": 14523 }, { "epoch": 2.3651019826568414, "grad_norm": 0.20286104083061218, "learning_rate": 6.521023045329541e-06, "loss": 0.2836, "step": 14524 }, { "epoch": 2.3652648292146723, "grad_norm": 0.1473992019891739, "learning_rate": 6.517832846781713e-06, "loss": 0.2382, "step": 14525 }, { "epoch": 2.3654276757725032, "grad_norm": 0.17750650644302368, "learning_rate": 6.5146433117990675e-06, "loss": 0.2552, "step": 14526 }, { "epoch": 2.365590522330334, "grad_norm": 0.16379965841770172, "learning_rate": 6.511454440496109e-06, "loss": 0.2644, "step": 14527 }, { "epoch": 2.365753368888165, "grad_norm": 0.15821190178394318, "learning_rate": 6.508266232987323e-06, "loss": 0.2663, "step": 14528 }, { "epoch": 2.365916215445996, "grad_norm": 0.16703172028064728, "learning_rate": 6.50507868938719e-06, "loss": 0.2779, "step": 14529 }, { "epoch": 2.366079062003827, "grad_norm": 0.13821309804916382, "learning_rate": 6.501891809810143e-06, "loss": 0.2793, "step": 14530 }, { "epoch": 2.366241908561658, "grad_norm": 0.21932026743888855, "learning_rate": 6.498705594370596e-06, "loss": 0.2318, "step": 14531 }, { "epoch": 2.366404755119489, "grad_norm": 0.16021285951137543, "learning_rate": 6.4955200431829564e-06, "loss": 0.2398, "step": 14532 }, { "epoch": 2.3665676016773194, "grad_norm": 0.17364315688610077, "learning_rate": 6.492335156361587e-06, "loss": 0.2826, "step": 14533 }, { "epoch": 2.3667304482351503, "grad_norm": 0.1636306345462799, "learning_rate": 6.489150934020829e-06, "loss": 0.256, "step": 14534 }, { "epoch": 2.3668932947929813, "grad_norm": 0.17705747485160828, "learning_rate": 6.48596737627502e-06, "loss": 0.2677, "step": 14535 }, { "epoch": 2.367056141350812, "grad_norm": 0.15701310336589813, "learning_rate": 6.482784483238446e-06, "loss": 0.2834, "step": 14536 }, { "epoch": 2.367218987908643, "grad_norm": 0.21786750853061676, "learning_rate": 6.479602255025394e-06, "loss": 0.2743, "step": 14537 }, { "epoch": 2.367381834466474, "grad_norm": 0.17130431532859802, "learning_rate": 6.47642069175011e-06, "loss": 0.2586, "step": 14538 }, { "epoch": 2.367544681024305, "grad_norm": 0.19534336030483246, "learning_rate": 6.47323979352682e-06, "loss": 0.272, "step": 14539 }, { "epoch": 2.3677075275821355, "grad_norm": 0.1760832518339157, "learning_rate": 6.470059560469721e-06, "loss": 0.2468, "step": 14540 }, { "epoch": 2.3678703741399665, "grad_norm": 0.1548098623752594, "learning_rate": 6.466879992693012e-06, "loss": 0.2504, "step": 14541 }, { "epoch": 2.3680332206977974, "grad_norm": 0.18393808603286743, "learning_rate": 6.4637010903108365e-06, "loss": 0.2657, "step": 14542 }, { "epoch": 2.3681960672556284, "grad_norm": 0.2027910053730011, "learning_rate": 6.460522853437326e-06, "loss": 0.2869, "step": 14543 }, { "epoch": 2.3683589138134593, "grad_norm": 0.1402100920677185, "learning_rate": 6.457345282186586e-06, "loss": 0.2532, "step": 14544 }, { "epoch": 2.3685217603712903, "grad_norm": 0.13576091825962067, "learning_rate": 6.4541683766727115e-06, "loss": 0.3011, "step": 14545 }, { "epoch": 2.368684606929121, "grad_norm": 0.14671120047569275, "learning_rate": 6.4509921370097564e-06, "loss": 0.2714, "step": 14546 }, { "epoch": 2.3688474534869517, "grad_norm": 0.1954043209552765, "learning_rate": 6.447816563311753e-06, "loss": 0.297, "step": 14547 }, { "epoch": 2.3690103000447826, "grad_norm": 0.15308934450149536, "learning_rate": 6.444641655692724e-06, "loss": 0.2479, "step": 14548 }, { "epoch": 2.3691731466026136, "grad_norm": 0.1662936955690384, "learning_rate": 6.441467414266655e-06, "loss": 0.2781, "step": 14549 }, { "epoch": 2.3693359931604445, "grad_norm": 0.15186934173107147, "learning_rate": 6.4382938391475074e-06, "loss": 0.2582, "step": 14550 }, { "epoch": 2.3694988397182755, "grad_norm": 0.1516696661710739, "learning_rate": 6.4351209304492145e-06, "loss": 0.2688, "step": 14551 }, { "epoch": 2.3696616862761064, "grad_norm": 0.15862460434436798, "learning_rate": 6.431948688285711e-06, "loss": 0.2564, "step": 14552 }, { "epoch": 2.3698245328339373, "grad_norm": 0.18688325583934784, "learning_rate": 6.4287771127708815e-06, "loss": 0.2837, "step": 14553 }, { "epoch": 2.3699873793917683, "grad_norm": 0.15145130455493927, "learning_rate": 6.4256062040185864e-06, "loss": 0.2466, "step": 14554 }, { "epoch": 2.3701502259495992, "grad_norm": 0.20793238282203674, "learning_rate": 6.4224359621426864e-06, "loss": 0.2811, "step": 14555 }, { "epoch": 2.3703130725074297, "grad_norm": 0.1399536430835724, "learning_rate": 6.419266387256995e-06, "loss": 0.2388, "step": 14556 }, { "epoch": 2.3704759190652607, "grad_norm": 0.1696871966123581, "learning_rate": 6.416097479475311e-06, "loss": 0.2637, "step": 14557 }, { "epoch": 2.3706387656230916, "grad_norm": 0.1577107012271881, "learning_rate": 6.412929238911397e-06, "loss": 0.2913, "step": 14558 }, { "epoch": 2.3708016121809226, "grad_norm": 0.15287823975086212, "learning_rate": 6.409761665679021e-06, "loss": 0.2296, "step": 14559 }, { "epoch": 2.3709644587387535, "grad_norm": 0.15323412418365479, "learning_rate": 6.406594759891896e-06, "loss": 0.285, "step": 14560 }, { "epoch": 2.3711273052965844, "grad_norm": 0.17401078343391418, "learning_rate": 6.403428521663729e-06, "loss": 0.2546, "step": 14561 }, { "epoch": 2.3712901518544154, "grad_norm": 0.19147826731204987, "learning_rate": 6.400262951108188e-06, "loss": 0.2959, "step": 14562 }, { "epoch": 2.371452998412246, "grad_norm": 0.2055273801088333, "learning_rate": 6.39709804833894e-06, "loss": 0.2483, "step": 14563 }, { "epoch": 2.371615844970077, "grad_norm": 0.22275486588478088, "learning_rate": 6.393933813469605e-06, "loss": 0.2593, "step": 14564 }, { "epoch": 2.3717786915279078, "grad_norm": 0.1468350887298584, "learning_rate": 6.390770246613786e-06, "loss": 0.2757, "step": 14565 }, { "epoch": 2.3719415380857387, "grad_norm": 0.16321268677711487, "learning_rate": 6.38760734788508e-06, "loss": 0.2621, "step": 14566 }, { "epoch": 2.3721043846435697, "grad_norm": 0.14570550620555878, "learning_rate": 6.384445117397031e-06, "loss": 0.2802, "step": 14567 }, { "epoch": 2.3722672312014006, "grad_norm": 0.15357181429862976, "learning_rate": 6.3812835552631706e-06, "loss": 0.2847, "step": 14568 }, { "epoch": 2.3724300777592315, "grad_norm": 0.15272273123264313, "learning_rate": 6.378122661597019e-06, "loss": 0.2488, "step": 14569 }, { "epoch": 2.372592924317062, "grad_norm": 0.1624702662229538, "learning_rate": 6.374962436512058e-06, "loss": 0.2789, "step": 14570 }, { "epoch": 2.372755770874893, "grad_norm": 0.13812068104743958, "learning_rate": 6.3718028801217415e-06, "loss": 0.2558, "step": 14571 }, { "epoch": 2.372918617432724, "grad_norm": 0.20728959143161774, "learning_rate": 6.368643992539519e-06, "loss": 0.3008, "step": 14572 }, { "epoch": 2.373081463990555, "grad_norm": 0.1660926640033722, "learning_rate": 6.36548577387879e-06, "loss": 0.2876, "step": 14573 }, { "epoch": 2.373244310548386, "grad_norm": 0.1630016714334488, "learning_rate": 6.362328224252961e-06, "loss": 0.2551, "step": 14574 }, { "epoch": 2.3734071571062167, "grad_norm": 0.15014204382896423, "learning_rate": 6.359171343775388e-06, "loss": 0.2355, "step": 14575 }, { "epoch": 2.3735700036640477, "grad_norm": 0.15266790986061096, "learning_rate": 6.356015132559406e-06, "loss": 0.2549, "step": 14576 }, { "epoch": 2.373732850221878, "grad_norm": 0.18526099622249603, "learning_rate": 6.3528595907183466e-06, "loss": 0.29, "step": 14577 }, { "epoch": 2.373895696779709, "grad_norm": 0.17089132964611053, "learning_rate": 6.3497047183654955e-06, "loss": 0.265, "step": 14578 }, { "epoch": 2.37405854333754, "grad_norm": 0.1742272973060608, "learning_rate": 6.346550515614125e-06, "loss": 0.2459, "step": 14579 }, { "epoch": 2.374221389895371, "grad_norm": 0.16603045165538788, "learning_rate": 6.343396982577468e-06, "loss": 0.287, "step": 14580 }, { "epoch": 2.374384236453202, "grad_norm": 0.1733018159866333, "learning_rate": 6.3402441193687655e-06, "loss": 0.2446, "step": 14581 }, { "epoch": 2.374547083011033, "grad_norm": 0.18344157934188843, "learning_rate": 6.337091926101205e-06, "loss": 0.2614, "step": 14582 }, { "epoch": 2.374709929568864, "grad_norm": 0.1510176807641983, "learning_rate": 6.3339404028879585e-06, "loss": 0.2835, "step": 14583 }, { "epoch": 2.374872776126695, "grad_norm": 0.2024475485086441, "learning_rate": 6.330789549842172e-06, "loss": 0.3254, "step": 14584 }, { "epoch": 2.3750356226845257, "grad_norm": 0.13886380195617676, "learning_rate": 6.327639367076979e-06, "loss": 0.2609, "step": 14585 }, { "epoch": 2.3751984692423562, "grad_norm": 0.16275011003017426, "learning_rate": 6.3244898547054785e-06, "loss": 0.3139, "step": 14586 }, { "epoch": 2.375361315800187, "grad_norm": 0.19053827226161957, "learning_rate": 6.3213410128407395e-06, "loss": 0.253, "step": 14587 }, { "epoch": 2.375524162358018, "grad_norm": 0.18255873024463654, "learning_rate": 6.318192841595827e-06, "loss": 0.3152, "step": 14588 }, { "epoch": 2.375687008915849, "grad_norm": 0.14797687530517578, "learning_rate": 6.315045341083764e-06, "loss": 0.2871, "step": 14589 }, { "epoch": 2.37584985547368, "grad_norm": 0.14814156293869019, "learning_rate": 6.311898511417552e-06, "loss": 0.2414, "step": 14590 }, { "epoch": 2.376012702031511, "grad_norm": 0.15537190437316895, "learning_rate": 6.308752352710171e-06, "loss": 0.3074, "step": 14591 }, { "epoch": 2.376175548589342, "grad_norm": 0.15092021226882935, "learning_rate": 6.305606865074587e-06, "loss": 0.2815, "step": 14592 }, { "epoch": 2.3763383951471724, "grad_norm": 0.2051839977502823, "learning_rate": 6.302462048623725e-06, "loss": 0.2554, "step": 14593 }, { "epoch": 2.3765012417050033, "grad_norm": 0.20520387589931488, "learning_rate": 6.299317903470497e-06, "loss": 0.2952, "step": 14594 }, { "epoch": 2.3766640882628343, "grad_norm": 0.19449745118618011, "learning_rate": 6.296174429727778e-06, "loss": 0.3081, "step": 14595 }, { "epoch": 2.376826934820665, "grad_norm": 0.17984265089035034, "learning_rate": 6.2930316275084385e-06, "loss": 0.2523, "step": 14596 }, { "epoch": 2.376989781378496, "grad_norm": 0.15275488793849945, "learning_rate": 6.289889496925316e-06, "loss": 0.2445, "step": 14597 }, { "epoch": 2.377152627936327, "grad_norm": 0.23512299358844757, "learning_rate": 6.286748038091209e-06, "loss": 0.274, "step": 14598 }, { "epoch": 2.377315474494158, "grad_norm": 0.18456964194774628, "learning_rate": 6.28360725111892e-06, "loss": 0.3262, "step": 14599 }, { "epoch": 2.3774783210519885, "grad_norm": 0.18916484713554382, "learning_rate": 6.280467136121204e-06, "loss": 0.2443, "step": 14600 }, { "epoch": 2.3776411676098195, "grad_norm": 0.14430370926856995, "learning_rate": 6.2773276932108074e-06, "loss": 0.2784, "step": 14601 }, { "epoch": 2.3778040141676504, "grad_norm": 0.252080500125885, "learning_rate": 6.2741889225004316e-06, "loss": 0.2609, "step": 14602 }, { "epoch": 2.3779668607254814, "grad_norm": 0.14133958518505096, "learning_rate": 6.271050824102787e-06, "loss": 0.2653, "step": 14603 }, { "epoch": 2.3781297072833123, "grad_norm": 0.2014516443014145, "learning_rate": 6.267913398130529e-06, "loss": 0.2784, "step": 14604 }, { "epoch": 2.3782925538411432, "grad_norm": 0.16403429210186005, "learning_rate": 6.264776644696296e-06, "loss": 0.2583, "step": 14605 }, { "epoch": 2.378455400398974, "grad_norm": 0.1559690684080124, "learning_rate": 6.261640563912721e-06, "loss": 0.2967, "step": 14606 }, { "epoch": 2.378618246956805, "grad_norm": 0.1673988252878189, "learning_rate": 6.258505155892391e-06, "loss": 0.2182, "step": 14607 }, { "epoch": 2.378781093514636, "grad_norm": 0.20951823890209198, "learning_rate": 6.255370420747872e-06, "loss": 0.2729, "step": 14608 }, { "epoch": 2.3789439400724666, "grad_norm": 0.181718647480011, "learning_rate": 6.25223635859172e-06, "loss": 0.2697, "step": 14609 }, { "epoch": 2.3791067866302975, "grad_norm": 0.1352790892124176, "learning_rate": 6.249102969536446e-06, "loss": 0.3001, "step": 14610 }, { "epoch": 2.3792696331881285, "grad_norm": 0.12088273465633392, "learning_rate": 6.245970253694561e-06, "loss": 0.264, "step": 14611 }, { "epoch": 2.3794324797459594, "grad_norm": 0.17695821821689606, "learning_rate": 6.242838211178534e-06, "loss": 0.2686, "step": 14612 }, { "epoch": 2.3795953263037903, "grad_norm": 0.15066976845264435, "learning_rate": 6.239706842100807e-06, "loss": 0.272, "step": 14613 }, { "epoch": 2.3797581728616213, "grad_norm": 0.1970876306295395, "learning_rate": 6.236576146573817e-06, "loss": 0.2758, "step": 14614 }, { "epoch": 2.379921019419452, "grad_norm": 0.16010920703411102, "learning_rate": 6.233446124709957e-06, "loss": 0.2829, "step": 14615 }, { "epoch": 2.3800838659772827, "grad_norm": 0.18205195665359497, "learning_rate": 6.2303167766216055e-06, "loss": 0.251, "step": 14616 }, { "epoch": 2.3802467125351137, "grad_norm": 0.17888040840625763, "learning_rate": 6.2271881024211224e-06, "loss": 0.2559, "step": 14617 }, { "epoch": 2.3804095590929446, "grad_norm": 0.1901322454214096, "learning_rate": 6.22406010222083e-06, "loss": 0.2762, "step": 14618 }, { "epoch": 2.3805724056507755, "grad_norm": 0.19137132167816162, "learning_rate": 6.2209327761330345e-06, "loss": 0.3057, "step": 14619 }, { "epoch": 2.3807352522086065, "grad_norm": 0.16206705570220947, "learning_rate": 6.217806124270009e-06, "loss": 0.258, "step": 14620 }, { "epoch": 2.3808980987664374, "grad_norm": 0.16716599464416504, "learning_rate": 6.214680146744026e-06, "loss": 0.2298, "step": 14621 }, { "epoch": 2.3810609453242684, "grad_norm": 0.15914124250411987, "learning_rate": 6.211554843667306e-06, "loss": 0.2404, "step": 14622 }, { "epoch": 2.381223791882099, "grad_norm": 0.19073115289211273, "learning_rate": 6.208430215152058e-06, "loss": 0.2389, "step": 14623 }, { "epoch": 2.38138663843993, "grad_norm": 0.21958209574222565, "learning_rate": 6.2053062613104616e-06, "loss": 0.3228, "step": 14624 }, { "epoch": 2.3815494849977608, "grad_norm": 0.17581821978092194, "learning_rate": 6.202182982254687e-06, "loss": 0.283, "step": 14625 }, { "epoch": 2.3817123315555917, "grad_norm": 0.1939261108636856, "learning_rate": 6.199060378096863e-06, "loss": 0.255, "step": 14626 }, { "epoch": 2.3818751781134226, "grad_norm": 0.136506587266922, "learning_rate": 6.195938448949093e-06, "loss": 0.3345, "step": 14627 }, { "epoch": 2.3820380246712536, "grad_norm": 0.216884583234787, "learning_rate": 6.192817194923478e-06, "loss": 0.278, "step": 14628 }, { "epoch": 2.3822008712290845, "grad_norm": 0.16951313614845276, "learning_rate": 6.189696616132073e-06, "loss": 0.2391, "step": 14629 }, { "epoch": 2.3823637177869155, "grad_norm": 0.12643137574195862, "learning_rate": 6.186576712686917e-06, "loss": 0.2518, "step": 14630 }, { "epoch": 2.3825265643447464, "grad_norm": 0.16852407157421112, "learning_rate": 6.183457484700017e-06, "loss": 0.2657, "step": 14631 }, { "epoch": 2.382689410902577, "grad_norm": 0.18255828320980072, "learning_rate": 6.1803389322833775e-06, "loss": 0.254, "step": 14632 }, { "epoch": 2.382852257460408, "grad_norm": 0.12188895791769028, "learning_rate": 6.177221055548954e-06, "loss": 0.236, "step": 14633 }, { "epoch": 2.383015104018239, "grad_norm": 0.1437647044658661, "learning_rate": 6.17410385460869e-06, "loss": 0.268, "step": 14634 }, { "epoch": 2.3831779505760697, "grad_norm": 0.13644848763942719, "learning_rate": 6.170987329574493e-06, "loss": 0.3126, "step": 14635 }, { "epoch": 2.3833407971339007, "grad_norm": 0.1644800454378128, "learning_rate": 6.167871480558271e-06, "loss": 0.2688, "step": 14636 }, { "epoch": 2.3835036436917316, "grad_norm": 0.16219274699687958, "learning_rate": 6.1647563076718875e-06, "loss": 0.2674, "step": 14637 }, { "epoch": 2.3836664902495626, "grad_norm": 0.19269394874572754, "learning_rate": 6.161641811027178e-06, "loss": 0.2544, "step": 14638 }, { "epoch": 2.383829336807393, "grad_norm": 0.16784165799617767, "learning_rate": 6.158527990735971e-06, "loss": 0.2175, "step": 14639 }, { "epoch": 2.383992183365224, "grad_norm": 0.16865617036819458, "learning_rate": 6.155414846910065e-06, "loss": 0.2592, "step": 14640 }, { "epoch": 2.384155029923055, "grad_norm": 0.14219866693019867, "learning_rate": 6.152302379661223e-06, "loss": 0.2601, "step": 14641 }, { "epoch": 2.384317876480886, "grad_norm": 0.21373777091503143, "learning_rate": 6.149190589101189e-06, "loss": 0.3018, "step": 14642 }, { "epoch": 2.384480723038717, "grad_norm": 0.2011289894580841, "learning_rate": 6.146079475341695e-06, "loss": 0.3046, "step": 14643 }, { "epoch": 2.3846435695965478, "grad_norm": 0.1652790904045105, "learning_rate": 6.142969038494439e-06, "loss": 0.2469, "step": 14644 }, { "epoch": 2.3848064161543787, "grad_norm": 0.1855514943599701, "learning_rate": 6.139859278671084e-06, "loss": 0.295, "step": 14645 }, { "epoch": 2.384969262712209, "grad_norm": 0.14754648506641388, "learning_rate": 6.136750195983296e-06, "loss": 0.2865, "step": 14646 }, { "epoch": 2.38513210927004, "grad_norm": 0.1668402999639511, "learning_rate": 6.133641790542682e-06, "loss": 0.3076, "step": 14647 }, { "epoch": 2.385294955827871, "grad_norm": 0.1961388885974884, "learning_rate": 6.130534062460863e-06, "loss": 0.2691, "step": 14648 }, { "epoch": 2.385457802385702, "grad_norm": 0.2196480929851532, "learning_rate": 6.1274270118494045e-06, "loss": 0.2752, "step": 14649 }, { "epoch": 2.385620648943533, "grad_norm": 0.2083815187215805, "learning_rate": 6.12432063881985e-06, "loss": 0.2879, "step": 14650 }, { "epoch": 2.385783495501364, "grad_norm": 0.15739233791828156, "learning_rate": 6.121214943483747e-06, "loss": 0.2515, "step": 14651 }, { "epoch": 2.385946342059195, "grad_norm": 0.16495010256767273, "learning_rate": 6.11810992595259e-06, "loss": 0.2419, "step": 14652 }, { "epoch": 2.3861091886170254, "grad_norm": 0.16308391094207764, "learning_rate": 6.115005586337852e-06, "loss": 0.2889, "step": 14653 }, { "epoch": 2.3862720351748563, "grad_norm": 0.12761075794696808, "learning_rate": 6.1119019247509985e-06, "loss": 0.2506, "step": 14654 }, { "epoch": 2.3864348817326873, "grad_norm": 0.12633660435676575, "learning_rate": 6.108798941303459e-06, "loss": 0.2326, "step": 14655 }, { "epoch": 2.386597728290518, "grad_norm": 0.1326761692762375, "learning_rate": 6.105696636106628e-06, "loss": 0.2689, "step": 14656 }, { "epoch": 2.386760574848349, "grad_norm": 0.1615253984928131, "learning_rate": 6.102595009271905e-06, "loss": 0.248, "step": 14657 }, { "epoch": 2.38692342140618, "grad_norm": 0.14482782781124115, "learning_rate": 6.099494060910641e-06, "loss": 0.266, "step": 14658 }, { "epoch": 2.387086267964011, "grad_norm": 0.18987680971622467, "learning_rate": 6.096393791134164e-06, "loss": 0.2813, "step": 14659 }, { "epoch": 2.387249114521842, "grad_norm": 0.15213488042354584, "learning_rate": 6.093294200053781e-06, "loss": 0.2656, "step": 14660 }, { "epoch": 2.387411961079673, "grad_norm": 0.16963531076908112, "learning_rate": 6.090195287780789e-06, "loss": 0.2521, "step": 14661 }, { "epoch": 2.3875748076375034, "grad_norm": 0.17149867117404938, "learning_rate": 6.087097054426444e-06, "loss": 0.2414, "step": 14662 }, { "epoch": 2.3877376541953343, "grad_norm": 0.1666407436132431, "learning_rate": 6.083999500101978e-06, "loss": 0.2479, "step": 14663 }, { "epoch": 2.3879005007531653, "grad_norm": 0.20070090889930725, "learning_rate": 6.080902624918594e-06, "loss": 0.2509, "step": 14664 }, { "epoch": 2.3880633473109962, "grad_norm": 0.1675606369972229, "learning_rate": 6.077806428987498e-06, "loss": 0.2558, "step": 14665 }, { "epoch": 2.388226193868827, "grad_norm": 0.16607466340065002, "learning_rate": 6.074710912419846e-06, "loss": 0.2736, "step": 14666 }, { "epoch": 2.388389040426658, "grad_norm": 0.1615365892648697, "learning_rate": 6.071616075326764e-06, "loss": 0.2808, "step": 14667 }, { "epoch": 2.388551886984489, "grad_norm": 0.15757319331169128, "learning_rate": 6.068521917819381e-06, "loss": 0.2591, "step": 14668 }, { "epoch": 2.3887147335423196, "grad_norm": 0.18677324056625366, "learning_rate": 6.065428440008786e-06, "loss": 0.2298, "step": 14669 }, { "epoch": 2.3888775801001505, "grad_norm": 0.1766103208065033, "learning_rate": 6.062335642006037e-06, "loss": 0.2902, "step": 14670 }, { "epoch": 2.3890404266579814, "grad_norm": 0.1581798940896988, "learning_rate": 6.05924352392217e-06, "loss": 0.2839, "step": 14671 }, { "epoch": 2.3892032732158124, "grad_norm": 0.16936559975147247, "learning_rate": 6.056152085868217e-06, "loss": 0.2547, "step": 14672 }, { "epoch": 2.3893661197736433, "grad_norm": 0.23305274546146393, "learning_rate": 6.053061327955159e-06, "loss": 0.2822, "step": 14673 }, { "epoch": 2.3895289663314743, "grad_norm": 0.16385960578918457, "learning_rate": 6.049971250293967e-06, "loss": 0.2819, "step": 14674 }, { "epoch": 2.389691812889305, "grad_norm": 0.19544072449207306, "learning_rate": 6.046881852995576e-06, "loss": 0.2837, "step": 14675 }, { "epoch": 2.3898546594471357, "grad_norm": 0.1712682545185089, "learning_rate": 6.0437931361709195e-06, "loss": 0.2775, "step": 14676 }, { "epoch": 2.3900175060049667, "grad_norm": 0.19204352796077728, "learning_rate": 6.040705099930882e-06, "loss": 0.2686, "step": 14677 }, { "epoch": 2.3901803525627976, "grad_norm": 0.15680551528930664, "learning_rate": 6.037617744386329e-06, "loss": 0.2888, "step": 14678 }, { "epoch": 2.3903431991206285, "grad_norm": 0.16500599682331085, "learning_rate": 6.0345310696481195e-06, "loss": 0.3086, "step": 14679 }, { "epoch": 2.3905060456784595, "grad_norm": 0.1665618121623993, "learning_rate": 6.031445075827064e-06, "loss": 0.2768, "step": 14680 }, { "epoch": 2.3906688922362904, "grad_norm": 0.16198796033859253, "learning_rate": 6.028359763033964e-06, "loss": 0.2802, "step": 14681 }, { "epoch": 2.3908317387941214, "grad_norm": 0.19249965250492096, "learning_rate": 6.025275131379582e-06, "loss": 0.287, "step": 14682 }, { "epoch": 2.3909945853519523, "grad_norm": 0.15059036016464233, "learning_rate": 6.022191180974679e-06, "loss": 0.2586, "step": 14683 }, { "epoch": 2.3911574319097832, "grad_norm": 0.2059933990240097, "learning_rate": 6.019107911929972e-06, "loss": 0.2467, "step": 14684 }, { "epoch": 2.3913202784676137, "grad_norm": 0.18682031333446503, "learning_rate": 6.016025324356151e-06, "loss": 0.252, "step": 14685 }, { "epoch": 2.3914831250254447, "grad_norm": 0.17951898276805878, "learning_rate": 6.012943418363906e-06, "loss": 0.2961, "step": 14686 }, { "epoch": 2.3916459715832756, "grad_norm": 0.19062991440296173, "learning_rate": 6.009862194063873e-06, "loss": 0.3201, "step": 14687 }, { "epoch": 2.3918088181411066, "grad_norm": 0.1633167862892151, "learning_rate": 6.00678165156669e-06, "loss": 0.3107, "step": 14688 }, { "epoch": 2.3919716646989375, "grad_norm": 0.15943510830402374, "learning_rate": 6.003701790982949e-06, "loss": 0.2671, "step": 14689 }, { "epoch": 2.3921345112567685, "grad_norm": 0.1657707691192627, "learning_rate": 6.000622612423221e-06, "loss": 0.3126, "step": 14690 }, { "epoch": 2.3922973578145994, "grad_norm": 0.18025068938732147, "learning_rate": 5.997544115998075e-06, "loss": 0.2583, "step": 14691 }, { "epoch": 2.39246020437243, "grad_norm": 0.19950063526630402, "learning_rate": 5.994466301818025e-06, "loss": 0.2596, "step": 14692 }, { "epoch": 2.392623050930261, "grad_norm": 0.1704069823026657, "learning_rate": 5.991389169993572e-06, "loss": 0.2444, "step": 14693 }, { "epoch": 2.392785897488092, "grad_norm": 0.16297651827335358, "learning_rate": 5.9883127206352056e-06, "loss": 0.2259, "step": 14694 }, { "epoch": 2.3929487440459227, "grad_norm": 0.1843152493238449, "learning_rate": 5.985236953853373e-06, "loss": 0.2663, "step": 14695 }, { "epoch": 2.3931115906037537, "grad_norm": 0.1854558140039444, "learning_rate": 5.982161869758504e-06, "loss": 0.298, "step": 14696 }, { "epoch": 2.3932744371615846, "grad_norm": 0.18173426389694214, "learning_rate": 5.979087468460995e-06, "loss": 0.2651, "step": 14697 }, { "epoch": 2.3934372837194156, "grad_norm": 0.15008635818958282, "learning_rate": 5.976013750071241e-06, "loss": 0.2778, "step": 14698 }, { "epoch": 2.393600130277246, "grad_norm": 0.17768655717372894, "learning_rate": 5.97294071469959e-06, "loss": 0.3078, "step": 14699 }, { "epoch": 2.393762976835077, "grad_norm": 0.17199863493442535, "learning_rate": 5.969868362456368e-06, "loss": 0.2565, "step": 14700 }, { "epoch": 2.393925823392908, "grad_norm": 0.17349666357040405, "learning_rate": 5.966796693451895e-06, "loss": 0.271, "step": 14701 }, { "epoch": 2.394088669950739, "grad_norm": 0.17978467047214508, "learning_rate": 5.963725707796444e-06, "loss": 0.2583, "step": 14702 }, { "epoch": 2.39425151650857, "grad_norm": 0.1464693248271942, "learning_rate": 5.9606554056002764e-06, "loss": 0.2486, "step": 14703 }, { "epoch": 2.3944143630664008, "grad_norm": 0.18564242124557495, "learning_rate": 5.9575857869736155e-06, "loss": 0.2706, "step": 14704 }, { "epoch": 2.3945772096242317, "grad_norm": 0.16178110241889954, "learning_rate": 5.954516852026684e-06, "loss": 0.2982, "step": 14705 }, { "epoch": 2.394740056182062, "grad_norm": 0.19510158896446228, "learning_rate": 5.951448600869661e-06, "loss": 0.2617, "step": 14706 }, { "epoch": 2.394902902739893, "grad_norm": 0.15942813456058502, "learning_rate": 5.9483810336126934e-06, "loss": 0.2552, "step": 14707 }, { "epoch": 2.395065749297724, "grad_norm": 0.15622977912425995, "learning_rate": 5.945314150365938e-06, "loss": 0.2553, "step": 14708 }, { "epoch": 2.395228595855555, "grad_norm": 0.2093154340982437, "learning_rate": 5.9422479512394905e-06, "loss": 0.2936, "step": 14709 }, { "epoch": 2.395391442413386, "grad_norm": 0.22445376217365265, "learning_rate": 5.939182436343443e-06, "loss": 0.2724, "step": 14710 }, { "epoch": 2.395554288971217, "grad_norm": 0.1396324336528778, "learning_rate": 5.936117605787844e-06, "loss": 0.2836, "step": 14711 }, { "epoch": 2.395717135529048, "grad_norm": 0.1503780335187912, "learning_rate": 5.93305345968275e-06, "loss": 0.2379, "step": 14712 }, { "epoch": 2.395879982086879, "grad_norm": 0.1591486632823944, "learning_rate": 5.929989998138161e-06, "loss": 0.2568, "step": 14713 }, { "epoch": 2.3960428286447097, "grad_norm": 0.1682310551404953, "learning_rate": 5.926927221264067e-06, "loss": 0.2834, "step": 14714 }, { "epoch": 2.3962056752025402, "grad_norm": 0.18073733150959015, "learning_rate": 5.923865129170422e-06, "loss": 0.2559, "step": 14715 }, { "epoch": 2.396368521760371, "grad_norm": 0.18921363353729248, "learning_rate": 5.92080372196718e-06, "loss": 0.2789, "step": 14716 }, { "epoch": 2.396531368318202, "grad_norm": 0.195175901055336, "learning_rate": 5.917742999764247e-06, "loss": 0.2917, "step": 14717 }, { "epoch": 2.396694214876033, "grad_norm": 0.17432644963264465, "learning_rate": 5.914682962671506e-06, "loss": 0.2838, "step": 14718 }, { "epoch": 2.396857061433864, "grad_norm": 0.1890358179807663, "learning_rate": 5.911623610798836e-06, "loss": 0.2596, "step": 14719 }, { "epoch": 2.397019907991695, "grad_norm": 0.15700063109397888, "learning_rate": 5.908564944256064e-06, "loss": 0.2766, "step": 14720 }, { "epoch": 2.397182754549526, "grad_norm": 0.15927408635616302, "learning_rate": 5.905506963153015e-06, "loss": 0.2956, "step": 14721 }, { "epoch": 2.3973456011073564, "grad_norm": 0.1481330543756485, "learning_rate": 5.902449667599466e-06, "loss": 0.233, "step": 14722 }, { "epoch": 2.3975084476651873, "grad_norm": 0.1687689572572708, "learning_rate": 5.899393057705199e-06, "loss": 0.2697, "step": 14723 }, { "epoch": 2.3976712942230183, "grad_norm": 0.18051648139953613, "learning_rate": 5.89633713357994e-06, "loss": 0.2943, "step": 14724 }, { "epoch": 2.397834140780849, "grad_norm": 0.1833534836769104, "learning_rate": 5.893281895333422e-06, "loss": 0.2876, "step": 14725 }, { "epoch": 2.39799698733868, "grad_norm": 0.1673193871974945, "learning_rate": 5.890227343075327e-06, "loss": 0.2677, "step": 14726 }, { "epoch": 2.398159833896511, "grad_norm": 0.18762388825416565, "learning_rate": 5.8871734769153225e-06, "loss": 0.2922, "step": 14727 }, { "epoch": 2.398322680454342, "grad_norm": 0.1708751916885376, "learning_rate": 5.884120296963055e-06, "loss": 0.2619, "step": 14728 }, { "epoch": 2.3984855270121725, "grad_norm": 0.15088734030723572, "learning_rate": 5.8810678033281385e-06, "loss": 0.241, "step": 14729 }, { "epoch": 2.3986483735700035, "grad_norm": 0.16293679177761078, "learning_rate": 5.878015996120176e-06, "loss": 0.2451, "step": 14730 }, { "epoch": 2.3988112201278344, "grad_norm": 0.17396333813667297, "learning_rate": 5.8749648754487295e-06, "loss": 0.2688, "step": 14731 }, { "epoch": 2.3989740666856654, "grad_norm": 0.15497010946273804, "learning_rate": 5.871914441423346e-06, "loss": 0.2527, "step": 14732 }, { "epoch": 2.3991369132434963, "grad_norm": 0.16843532025814056, "learning_rate": 5.868864694153534e-06, "loss": 0.2822, "step": 14733 }, { "epoch": 2.3992997598013273, "grad_norm": 0.16776776313781738, "learning_rate": 5.865815633748806e-06, "loss": 0.265, "step": 14734 }, { "epoch": 2.399462606359158, "grad_norm": 0.17804916203022003, "learning_rate": 5.862767260318624e-06, "loss": 0.2641, "step": 14735 }, { "epoch": 2.399625452916989, "grad_norm": 0.15517669916152954, "learning_rate": 5.859719573972433e-06, "loss": 0.2447, "step": 14736 }, { "epoch": 2.39978829947482, "grad_norm": 0.17294888198375702, "learning_rate": 5.856672574819649e-06, "loss": 0.2862, "step": 14737 }, { "epoch": 2.3999511460326506, "grad_norm": 0.17660671472549438, "learning_rate": 5.853626262969683e-06, "loss": 0.2745, "step": 14738 }, { "epoch": 2.4001139925904815, "grad_norm": 0.17266015708446503, "learning_rate": 5.850580638531897e-06, "loss": 0.27, "step": 14739 }, { "epoch": 2.4002768391483125, "grad_norm": 0.18790289759635925, "learning_rate": 5.8475357016156305e-06, "loss": 0.2753, "step": 14740 }, { "epoch": 2.4004396857061434, "grad_norm": 0.21285539865493774, "learning_rate": 5.8444914523302256e-06, "loss": 0.2942, "step": 14741 }, { "epoch": 2.4006025322639744, "grad_norm": 0.1606287956237793, "learning_rate": 5.841447890784965e-06, "loss": 0.2603, "step": 14742 }, { "epoch": 2.4007653788218053, "grad_norm": 0.18507476150989532, "learning_rate": 5.838405017089127e-06, "loss": 0.2668, "step": 14743 }, { "epoch": 2.4009282253796362, "grad_norm": 0.19003421068191528, "learning_rate": 5.835362831351954e-06, "loss": 0.2825, "step": 14744 }, { "epoch": 2.4010910719374667, "grad_norm": 0.18436914682388306, "learning_rate": 5.8323213336826775e-06, "loss": 0.2692, "step": 14745 }, { "epoch": 2.4012539184952977, "grad_norm": 0.19951221346855164, "learning_rate": 5.829280524190495e-06, "loss": 0.257, "step": 14746 }, { "epoch": 2.4014167650531286, "grad_norm": 0.17996592819690704, "learning_rate": 5.8262404029845784e-06, "loss": 0.2773, "step": 14747 }, { "epoch": 2.4015796116109596, "grad_norm": 0.1494002491235733, "learning_rate": 5.82320097017407e-06, "loss": 0.286, "step": 14748 }, { "epoch": 2.4017424581687905, "grad_norm": 0.16405117511749268, "learning_rate": 5.820162225868109e-06, "loss": 0.2539, "step": 14749 }, { "epoch": 2.4019053047266214, "grad_norm": 0.1848946064710617, "learning_rate": 5.817124170175789e-06, "loss": 0.3013, "step": 14750 }, { "epoch": 2.4020681512844524, "grad_norm": 0.21033909916877747, "learning_rate": 5.814086803206176e-06, "loss": 0.2931, "step": 14751 }, { "epoch": 2.402230997842283, "grad_norm": 0.17373977601528168, "learning_rate": 5.811050125068337e-06, "loss": 0.2816, "step": 14752 }, { "epoch": 2.402393844400114, "grad_norm": 0.16588743031024933, "learning_rate": 5.808014135871292e-06, "loss": 0.2505, "step": 14753 }, { "epoch": 2.4025566909579448, "grad_norm": 0.18004755675792694, "learning_rate": 5.804978835724039e-06, "loss": 0.3222, "step": 14754 }, { "epoch": 2.4027195375157757, "grad_norm": 0.20481504499912262, "learning_rate": 5.801944224735548e-06, "loss": 0.2811, "step": 14755 }, { "epoch": 2.4028823840736067, "grad_norm": 0.21114535629749298, "learning_rate": 5.798910303014787e-06, "loss": 0.2642, "step": 14756 }, { "epoch": 2.4030452306314376, "grad_norm": 0.16041068732738495, "learning_rate": 5.795877070670671e-06, "loss": 0.2872, "step": 14757 }, { "epoch": 2.4032080771892685, "grad_norm": 0.1914868801832199, "learning_rate": 5.792844527812102e-06, "loss": 0.2992, "step": 14758 }, { "epoch": 2.4033709237470995, "grad_norm": 0.20124471187591553, "learning_rate": 5.789812674547964e-06, "loss": 0.2579, "step": 14759 }, { "epoch": 2.4035337703049304, "grad_norm": 0.16337113082408905, "learning_rate": 5.7867815109871105e-06, "loss": 0.2373, "step": 14760 }, { "epoch": 2.403696616862761, "grad_norm": 0.16776396334171295, "learning_rate": 5.783751037238355e-06, "loss": 0.2501, "step": 14761 }, { "epoch": 2.403859463420592, "grad_norm": 0.16609734296798706, "learning_rate": 5.780721253410517e-06, "loss": 0.2778, "step": 14762 }, { "epoch": 2.404022309978423, "grad_norm": 0.2096012383699417, "learning_rate": 5.777692159612372e-06, "loss": 0.2694, "step": 14763 }, { "epoch": 2.4041851565362538, "grad_norm": 0.19897772371768951, "learning_rate": 5.774663755952661e-06, "loss": 0.2637, "step": 14764 }, { "epoch": 2.4043480030940847, "grad_norm": 0.18339192867279053, "learning_rate": 5.771636042540129e-06, "loss": 0.2618, "step": 14765 }, { "epoch": 2.4045108496519156, "grad_norm": 0.15282677114009857, "learning_rate": 5.768609019483464e-06, "loss": 0.3069, "step": 14766 }, { "epoch": 2.4046736962097466, "grad_norm": 0.16049879789352417, "learning_rate": 5.765582686891363e-06, "loss": 0.2767, "step": 14767 }, { "epoch": 2.404836542767577, "grad_norm": 0.18219567835330963, "learning_rate": 5.762557044872471e-06, "loss": 0.2352, "step": 14768 }, { "epoch": 2.404999389325408, "grad_norm": 0.19129981100559235, "learning_rate": 5.759532093535411e-06, "loss": 0.2636, "step": 14769 }, { "epoch": 2.405162235883239, "grad_norm": 0.1818382292985916, "learning_rate": 5.756507832988803e-06, "loss": 0.2612, "step": 14770 }, { "epoch": 2.40532508244107, "grad_norm": 0.1339392513036728, "learning_rate": 5.7534842633412165e-06, "loss": 0.2623, "step": 14771 }, { "epoch": 2.405487928998901, "grad_norm": 0.1823887825012207, "learning_rate": 5.750461384701211e-06, "loss": 0.3041, "step": 14772 }, { "epoch": 2.405650775556732, "grad_norm": 0.18098706007003784, "learning_rate": 5.747439197177307e-06, "loss": 0.3, "step": 14773 }, { "epoch": 2.4058136221145627, "grad_norm": 0.11997941136360168, "learning_rate": 5.744417700878024e-06, "loss": 0.3182, "step": 14774 }, { "epoch": 2.4059764686723932, "grad_norm": 0.17279408872127533, "learning_rate": 5.741396895911838e-06, "loss": 0.2621, "step": 14775 }, { "epoch": 2.406139315230224, "grad_norm": 0.19457393884658813, "learning_rate": 5.738376782387203e-06, "loss": 0.3115, "step": 14776 }, { "epoch": 2.406302161788055, "grad_norm": 0.13619451224803925, "learning_rate": 5.735357360412544e-06, "loss": 0.2643, "step": 14777 }, { "epoch": 2.406465008345886, "grad_norm": 0.1872643381357193, "learning_rate": 5.732338630096281e-06, "loss": 0.2579, "step": 14778 }, { "epoch": 2.406627854903717, "grad_norm": 0.17535708844661713, "learning_rate": 5.729320591546791e-06, "loss": 0.3037, "step": 14779 }, { "epoch": 2.406790701461548, "grad_norm": 0.16612818837165833, "learning_rate": 5.726303244872419e-06, "loss": 0.2649, "step": 14780 }, { "epoch": 2.406953548019379, "grad_norm": 0.1579783707857132, "learning_rate": 5.723286590181515e-06, "loss": 0.2622, "step": 14781 }, { "epoch": 2.4071163945772094, "grad_norm": 0.19579118490219116, "learning_rate": 5.7202706275823745e-06, "loss": 0.262, "step": 14782 }, { "epoch": 2.4072792411350403, "grad_norm": 0.17048466205596924, "learning_rate": 5.717255357183285e-06, "loss": 0.2749, "step": 14783 }, { "epoch": 2.4074420876928713, "grad_norm": 0.1503487527370453, "learning_rate": 5.714240779092492e-06, "loss": 0.2347, "step": 14784 }, { "epoch": 2.407604934250702, "grad_norm": 0.18077772855758667, "learning_rate": 5.7112268934182425e-06, "loss": 0.2434, "step": 14785 }, { "epoch": 2.407767780808533, "grad_norm": 0.16928797960281372, "learning_rate": 5.708213700268739e-06, "loss": 0.2753, "step": 14786 }, { "epoch": 2.407930627366364, "grad_norm": 0.16161507368087769, "learning_rate": 5.705201199752163e-06, "loss": 0.2695, "step": 14787 }, { "epoch": 2.408093473924195, "grad_norm": 0.15779133141040802, "learning_rate": 5.7021893919766675e-06, "loss": 0.2908, "step": 14788 }, { "epoch": 2.408256320482026, "grad_norm": 0.15617112815380096, "learning_rate": 5.699178277050396e-06, "loss": 0.245, "step": 14789 }, { "epoch": 2.408419167039857, "grad_norm": 0.19392041862010956, "learning_rate": 5.696167855081452e-06, "loss": 0.2564, "step": 14790 }, { "epoch": 2.4085820135976874, "grad_norm": 0.16781409084796906, "learning_rate": 5.693158126177911e-06, "loss": 0.2815, "step": 14791 }, { "epoch": 2.4087448601555184, "grad_norm": 0.15734238922595978, "learning_rate": 5.690149090447844e-06, "loss": 0.2365, "step": 14792 }, { "epoch": 2.4089077067133493, "grad_norm": 0.16694016754627228, "learning_rate": 5.687140747999281e-06, "loss": 0.2828, "step": 14793 }, { "epoch": 2.4090705532711802, "grad_norm": 0.1351306140422821, "learning_rate": 5.684133098940225e-06, "loss": 0.2623, "step": 14794 }, { "epoch": 2.409233399829011, "grad_norm": 0.16148728132247925, "learning_rate": 5.681126143378657e-06, "loss": 0.2633, "step": 14795 }, { "epoch": 2.409396246386842, "grad_norm": 0.22922928631305695, "learning_rate": 5.678119881422547e-06, "loss": 0.2916, "step": 14796 }, { "epoch": 2.409559092944673, "grad_norm": 0.1694638580083847, "learning_rate": 5.675114313179825e-06, "loss": 0.2916, "step": 14797 }, { "epoch": 2.4097219395025036, "grad_norm": 0.14970679581165314, "learning_rate": 5.672109438758391e-06, "loss": 0.2942, "step": 14798 }, { "epoch": 2.4098847860603345, "grad_norm": 0.1661500632762909, "learning_rate": 5.669105258266141e-06, "loss": 0.2845, "step": 14799 }, { "epoch": 2.4100476326181655, "grad_norm": 0.1439821571111679, "learning_rate": 5.66610177181093e-06, "loss": 0.2847, "step": 14800 }, { "epoch": 2.4102104791759964, "grad_norm": 0.18370164930820465, "learning_rate": 5.663098979500586e-06, "loss": 0.2528, "step": 14801 }, { "epoch": 2.4103733257338273, "grad_norm": 0.18099889159202576, "learning_rate": 5.660096881442931e-06, "loss": 0.2586, "step": 14802 }, { "epoch": 2.4105361722916583, "grad_norm": 0.1662815809249878, "learning_rate": 5.657095477745739e-06, "loss": 0.2755, "step": 14803 }, { "epoch": 2.4106990188494892, "grad_norm": 0.43028852343559265, "learning_rate": 5.654094768516766e-06, "loss": 0.2716, "step": 14804 }, { "epoch": 2.4108618654073197, "grad_norm": 0.1853851079940796, "learning_rate": 5.651094753863762e-06, "loss": 0.284, "step": 14805 }, { "epoch": 2.4110247119651507, "grad_norm": 0.13778825104236603, "learning_rate": 5.648095433894418e-06, "loss": 0.2759, "step": 14806 }, { "epoch": 2.4111875585229816, "grad_norm": 0.1598455309867859, "learning_rate": 5.6450968087164355e-06, "loss": 0.2754, "step": 14807 }, { "epoch": 2.4113504050808126, "grad_norm": 0.18453480303287506, "learning_rate": 5.6420988784374655e-06, "loss": 0.3112, "step": 14808 }, { "epoch": 2.4115132516386435, "grad_norm": 0.19036856293678284, "learning_rate": 5.639101643165138e-06, "loss": 0.2396, "step": 14809 }, { "epoch": 2.4116760981964744, "grad_norm": 0.1688438355922699, "learning_rate": 5.636105103007072e-06, "loss": 0.2724, "step": 14810 }, { "epoch": 2.4118389447543054, "grad_norm": 0.13452355563640594, "learning_rate": 5.633109258070851e-06, "loss": 0.2477, "step": 14811 }, { "epoch": 2.4120017913121363, "grad_norm": 0.18701380491256714, "learning_rate": 5.630114108464032e-06, "loss": 0.2594, "step": 14812 }, { "epoch": 2.4121646378699673, "grad_norm": 0.16985999047756195, "learning_rate": 5.627119654294144e-06, "loss": 0.2667, "step": 14813 }, { "epoch": 2.4123274844277978, "grad_norm": 0.1732475608587265, "learning_rate": 5.624125895668711e-06, "loss": 0.2814, "step": 14814 }, { "epoch": 2.4124903309856287, "grad_norm": 0.16027884185314178, "learning_rate": 5.621132832695208e-06, "loss": 0.2639, "step": 14815 }, { "epoch": 2.4126531775434596, "grad_norm": 0.22709007561206818, "learning_rate": 5.618140465481098e-06, "loss": 0.2857, "step": 14816 }, { "epoch": 2.4128160241012906, "grad_norm": 0.17893078923225403, "learning_rate": 5.61514879413381e-06, "loss": 0.2393, "step": 14817 }, { "epoch": 2.4129788706591215, "grad_norm": 0.15618425607681274, "learning_rate": 5.6121578187607604e-06, "loss": 0.2597, "step": 14818 }, { "epoch": 2.4131417172169525, "grad_norm": 0.1592283993959427, "learning_rate": 5.609167539469337e-06, "loss": 0.2847, "step": 14819 }, { "epoch": 2.4133045637747834, "grad_norm": 0.16326846182346344, "learning_rate": 5.606177956366887e-06, "loss": 0.2962, "step": 14820 }, { "epoch": 2.413467410332614, "grad_norm": 0.17886804044246674, "learning_rate": 5.60318906956076e-06, "loss": 0.318, "step": 14821 }, { "epoch": 2.413630256890445, "grad_norm": 0.1741892695426941, "learning_rate": 5.600200879158263e-06, "loss": 0.2399, "step": 14822 }, { "epoch": 2.413793103448276, "grad_norm": 0.20141981542110443, "learning_rate": 5.597213385266675e-06, "loss": 0.2774, "step": 14823 }, { "epoch": 2.4139559500061067, "grad_norm": 0.20068363845348358, "learning_rate": 5.594226587993251e-06, "loss": 0.2823, "step": 14824 }, { "epoch": 2.4141187965639377, "grad_norm": 0.19768819212913513, "learning_rate": 5.591240487445243e-06, "loss": 0.285, "step": 14825 }, { "epoch": 2.4142816431217686, "grad_norm": 0.20693325996398926, "learning_rate": 5.588255083729849e-06, "loss": 0.2785, "step": 14826 }, { "epoch": 2.4144444896795996, "grad_norm": 0.1461484134197235, "learning_rate": 5.58527037695426e-06, "loss": 0.2884, "step": 14827 }, { "epoch": 2.41460733623743, "grad_norm": 0.17322129011154175, "learning_rate": 5.5822863672256226e-06, "loss": 0.2356, "step": 14828 }, { "epoch": 2.414770182795261, "grad_norm": 0.1709786206483841, "learning_rate": 5.579303054651092e-06, "loss": 0.2661, "step": 14829 }, { "epoch": 2.414933029353092, "grad_norm": 0.16611875593662262, "learning_rate": 5.576320439337765e-06, "loss": 0.2194, "step": 14830 }, { "epoch": 2.415095875910923, "grad_norm": 0.20002473890781403, "learning_rate": 5.573338521392721e-06, "loss": 0.2772, "step": 14831 }, { "epoch": 2.415258722468754, "grad_norm": 0.17179228365421295, "learning_rate": 5.570357300923038e-06, "loss": 0.2532, "step": 14832 }, { "epoch": 2.415421569026585, "grad_norm": 0.21754413843154907, "learning_rate": 5.567376778035738e-06, "loss": 0.2745, "step": 14833 }, { "epoch": 2.4155844155844157, "grad_norm": 0.22034478187561035, "learning_rate": 5.564396952837836e-06, "loss": 0.2739, "step": 14834 }, { "epoch": 2.415747262142246, "grad_norm": 0.1841970831155777, "learning_rate": 5.5614178254363045e-06, "loss": 0.238, "step": 14835 }, { "epoch": 2.415910108700077, "grad_norm": 0.2105882167816162, "learning_rate": 5.55843939593812e-06, "loss": 0.2906, "step": 14836 }, { "epoch": 2.416072955257908, "grad_norm": 0.17879025638103485, "learning_rate": 5.555461664450212e-06, "loss": 0.2372, "step": 14837 }, { "epoch": 2.416235801815739, "grad_norm": 0.16123618185520172, "learning_rate": 5.552484631079477e-06, "loss": 0.293, "step": 14838 }, { "epoch": 2.41639864837357, "grad_norm": 0.16422730684280396, "learning_rate": 5.5495082959328205e-06, "loss": 0.2624, "step": 14839 }, { "epoch": 2.416561494931401, "grad_norm": 0.1692659705877304, "learning_rate": 5.546532659117091e-06, "loss": 0.2829, "step": 14840 }, { "epoch": 2.416724341489232, "grad_norm": 0.17191877961158752, "learning_rate": 5.543557720739115e-06, "loss": 0.2922, "step": 14841 }, { "epoch": 2.416887188047063, "grad_norm": 0.15005551278591156, "learning_rate": 5.540583480905717e-06, "loss": 0.2618, "step": 14842 }, { "epoch": 2.4170500346048938, "grad_norm": 0.19891637563705444, "learning_rate": 5.5376099397236676e-06, "loss": 0.2688, "step": 14843 }, { "epoch": 2.4172128811627243, "grad_norm": 0.16708044707775116, "learning_rate": 5.534637097299738e-06, "loss": 0.2698, "step": 14844 }, { "epoch": 2.417375727720555, "grad_norm": 0.19201092422008514, "learning_rate": 5.531664953740656e-06, "loss": 0.2748, "step": 14845 }, { "epoch": 2.417538574278386, "grad_norm": 0.17858631908893585, "learning_rate": 5.528693509153127e-06, "loss": 0.2645, "step": 14846 }, { "epoch": 2.417701420836217, "grad_norm": 0.17171001434326172, "learning_rate": 5.525722763643842e-06, "loss": 0.2482, "step": 14847 }, { "epoch": 2.417864267394048, "grad_norm": 0.15421424806118011, "learning_rate": 5.522752717319457e-06, "loss": 0.2694, "step": 14848 }, { "epoch": 2.418027113951879, "grad_norm": 0.14331240952014923, "learning_rate": 5.519783370286608e-06, "loss": 0.2589, "step": 14849 }, { "epoch": 2.41818996050971, "grad_norm": 0.19983553886413574, "learning_rate": 5.516814722651889e-06, "loss": 0.3111, "step": 14850 }, { "epoch": 2.4183528070675404, "grad_norm": 0.15063486993312836, "learning_rate": 5.513846774521905e-06, "loss": 0.2372, "step": 14851 }, { "epoch": 2.4185156536253714, "grad_norm": 0.198250874876976, "learning_rate": 5.5108795260032015e-06, "loss": 0.2808, "step": 14852 }, { "epoch": 2.4186785001832023, "grad_norm": 0.19922490417957306, "learning_rate": 5.507912977202309e-06, "loss": 0.2911, "step": 14853 }, { "epoch": 2.4188413467410332, "grad_norm": 0.16202233731746674, "learning_rate": 5.504947128225746e-06, "loss": 0.2355, "step": 14854 }, { "epoch": 2.419004193298864, "grad_norm": 0.2419888973236084, "learning_rate": 5.50198197917999e-06, "loss": 0.2938, "step": 14855 }, { "epoch": 2.419167039856695, "grad_norm": 0.19160620868206024, "learning_rate": 5.499017530171499e-06, "loss": 0.2708, "step": 14856 }, { "epoch": 2.419329886414526, "grad_norm": 0.17408236861228943, "learning_rate": 5.4960537813066985e-06, "loss": 0.2674, "step": 14857 }, { "epoch": 2.4194927329723566, "grad_norm": 0.15831413865089417, "learning_rate": 5.493090732692008e-06, "loss": 0.264, "step": 14858 }, { "epoch": 2.4196555795301875, "grad_norm": 0.17502762377262115, "learning_rate": 5.490128384433804e-06, "loss": 0.2587, "step": 14859 }, { "epoch": 2.4198184260880184, "grad_norm": 0.19958357512950897, "learning_rate": 5.487166736638441e-06, "loss": 0.2957, "step": 14860 }, { "epoch": 2.4199812726458494, "grad_norm": 0.1388930231332779, "learning_rate": 5.484205789412258e-06, "loss": 0.2663, "step": 14861 }, { "epoch": 2.4201441192036803, "grad_norm": 0.17676250636577606, "learning_rate": 5.481245542861557e-06, "loss": 0.2003, "step": 14862 }, { "epoch": 2.4203069657615113, "grad_norm": 0.17318613827228546, "learning_rate": 5.478285997092625e-06, "loss": 0.2675, "step": 14863 }, { "epoch": 2.420469812319342, "grad_norm": 0.16575823724269867, "learning_rate": 5.475327152211704e-06, "loss": 0.2634, "step": 14864 }, { "epoch": 2.420632658877173, "grad_norm": 0.1963389366865158, "learning_rate": 5.472369008325043e-06, "loss": 0.2732, "step": 14865 }, { "epoch": 2.420795505435004, "grad_norm": 0.1343948394060135, "learning_rate": 5.4694115655388426e-06, "loss": 0.2743, "step": 14866 }, { "epoch": 2.4209583519928346, "grad_norm": 0.20758293569087982, "learning_rate": 5.46645482395928e-06, "loss": 0.2592, "step": 14867 }, { "epoch": 2.4211211985506655, "grad_norm": 0.19999149441719055, "learning_rate": 5.4634987836925085e-06, "loss": 0.2754, "step": 14868 }, { "epoch": 2.4212840451084965, "grad_norm": 0.11344755440950394, "learning_rate": 5.460543444844671e-06, "loss": 0.2511, "step": 14869 }, { "epoch": 2.4214468916663274, "grad_norm": 0.167313352227211, "learning_rate": 5.457588807521863e-06, "loss": 0.2485, "step": 14870 }, { "epoch": 2.4216097382241584, "grad_norm": 0.17365343868732452, "learning_rate": 5.454634871830161e-06, "loss": 0.2448, "step": 14871 }, { "epoch": 2.4217725847819893, "grad_norm": 0.17414924502372742, "learning_rate": 5.451681637875636e-06, "loss": 0.2504, "step": 14872 }, { "epoch": 2.4219354313398203, "grad_norm": 0.19945324957370758, "learning_rate": 5.448729105764305e-06, "loss": 0.2703, "step": 14873 }, { "epoch": 2.4220982778976508, "grad_norm": 0.15868617594242096, "learning_rate": 5.445777275602179e-06, "loss": 0.2686, "step": 14874 }, { "epoch": 2.4222611244554817, "grad_norm": 0.14617186784744263, "learning_rate": 5.442826147495225e-06, "loss": 0.2776, "step": 14875 }, { "epoch": 2.4224239710133126, "grad_norm": 0.18980489671230316, "learning_rate": 5.4398757215494175e-06, "loss": 0.2598, "step": 14876 }, { "epoch": 2.4225868175711436, "grad_norm": 0.17207415401935577, "learning_rate": 5.436925997870673e-06, "loss": 0.2663, "step": 14877 }, { "epoch": 2.4227496641289745, "grad_norm": 0.13876450061798096, "learning_rate": 5.433976976564892e-06, "loss": 0.2407, "step": 14878 }, { "epoch": 2.4229125106868055, "grad_norm": 0.167927086353302, "learning_rate": 5.431028657737966e-06, "loss": 0.3063, "step": 14879 }, { "epoch": 2.4230753572446364, "grad_norm": 0.15646614134311676, "learning_rate": 5.4280810414957336e-06, "loss": 0.2847, "step": 14880 }, { "epoch": 2.423238203802467, "grad_norm": 0.18046718835830688, "learning_rate": 5.4251341279440375e-06, "loss": 0.3169, "step": 14881 }, { "epoch": 2.423401050360298, "grad_norm": 0.16557326912879944, "learning_rate": 5.422187917188673e-06, "loss": 0.2733, "step": 14882 }, { "epoch": 2.423563896918129, "grad_norm": 0.18803997337818146, "learning_rate": 5.419242409335415e-06, "loss": 0.2397, "step": 14883 }, { "epoch": 2.4237267434759597, "grad_norm": 0.11407487839460373, "learning_rate": 5.416297604490026e-06, "loss": 0.2661, "step": 14884 }, { "epoch": 2.4238895900337907, "grad_norm": 0.1852472871541977, "learning_rate": 5.413353502758226e-06, "loss": 0.3016, "step": 14885 }, { "epoch": 2.4240524365916216, "grad_norm": 0.1800108700990677, "learning_rate": 5.410410104245711e-06, "loss": 0.2687, "step": 14886 }, { "epoch": 2.4242152831494526, "grad_norm": 0.16939234733581543, "learning_rate": 5.407467409058175e-06, "loss": 0.263, "step": 14887 }, { "epoch": 2.4243781297072835, "grad_norm": 0.2105792611837387, "learning_rate": 5.404525417301259e-06, "loss": 0.2497, "step": 14888 }, { "epoch": 2.4245409762651144, "grad_norm": 0.1702221930027008, "learning_rate": 5.401584129080589e-06, "loss": 0.2641, "step": 14889 }, { "epoch": 2.424703822822945, "grad_norm": 0.18234838545322418, "learning_rate": 5.398643544501761e-06, "loss": 0.2734, "step": 14890 }, { "epoch": 2.424866669380776, "grad_norm": 0.1848021000623703, "learning_rate": 5.395703663670362e-06, "loss": 0.2903, "step": 14891 }, { "epoch": 2.425029515938607, "grad_norm": 0.18542900681495667, "learning_rate": 5.39276448669194e-06, "loss": 0.2916, "step": 14892 }, { "epoch": 2.4251923624964378, "grad_norm": 0.19250333309173584, "learning_rate": 5.389826013672009e-06, "loss": 0.2841, "step": 14893 }, { "epoch": 2.4253552090542687, "grad_norm": 0.1351359784603119, "learning_rate": 5.386888244716087e-06, "loss": 0.2594, "step": 14894 }, { "epoch": 2.4255180556120997, "grad_norm": 0.2895791530609131, "learning_rate": 5.383951179929636e-06, "loss": 0.3394, "step": 14895 }, { "epoch": 2.4256809021699306, "grad_norm": 0.1949554681777954, "learning_rate": 5.381014819418112e-06, "loss": 0.248, "step": 14896 }, { "epoch": 2.425843748727761, "grad_norm": 0.1674189567565918, "learning_rate": 5.378079163286928e-06, "loss": 0.2716, "step": 14897 }, { "epoch": 2.426006595285592, "grad_norm": 0.16617754101753235, "learning_rate": 5.375144211641497e-06, "loss": 0.2868, "step": 14898 }, { "epoch": 2.426169441843423, "grad_norm": 0.15808852016925812, "learning_rate": 5.372209964587188e-06, "loss": 0.2416, "step": 14899 }, { "epoch": 2.426332288401254, "grad_norm": 0.12894132733345032, "learning_rate": 5.369276422229347e-06, "loss": 0.2592, "step": 14900 }, { "epoch": 2.426495134959085, "grad_norm": 0.1637287586927414, "learning_rate": 5.3663435846732916e-06, "loss": 0.2511, "step": 14901 }, { "epoch": 2.426657981516916, "grad_norm": 0.18117524683475494, "learning_rate": 5.363411452024331e-06, "loss": 0.2671, "step": 14902 }, { "epoch": 2.4268208280747467, "grad_norm": 0.2161424309015274, "learning_rate": 5.360480024387732e-06, "loss": 0.282, "step": 14903 }, { "epoch": 2.4269836746325772, "grad_norm": 0.16853000223636627, "learning_rate": 5.3575493018687364e-06, "loss": 0.2856, "step": 14904 }, { "epoch": 2.427146521190408, "grad_norm": 0.13896740972995758, "learning_rate": 5.354619284572576e-06, "loss": 0.3112, "step": 14905 }, { "epoch": 2.427309367748239, "grad_norm": 0.19559121131896973, "learning_rate": 5.351689972604446e-06, "loss": 0.301, "step": 14906 }, { "epoch": 2.42747221430607, "grad_norm": 0.17315392196178436, "learning_rate": 5.34876136606951e-06, "loss": 0.2363, "step": 14907 }, { "epoch": 2.427635060863901, "grad_norm": 0.13668251037597656, "learning_rate": 5.345833465072914e-06, "loss": 0.2911, "step": 14908 }, { "epoch": 2.427797907421732, "grad_norm": 0.14025118947029114, "learning_rate": 5.342906269719786e-06, "loss": 0.2716, "step": 14909 }, { "epoch": 2.427960753979563, "grad_norm": 0.23453208804130554, "learning_rate": 5.339979780115217e-06, "loss": 0.3137, "step": 14910 }, { "epoch": 2.4281236005373934, "grad_norm": 0.19077229499816895, "learning_rate": 5.337053996364272e-06, "loss": 0.2694, "step": 14911 }, { "epoch": 2.4282864470952243, "grad_norm": 0.17818261682987213, "learning_rate": 5.334128918572007e-06, "loss": 0.2695, "step": 14912 }, { "epoch": 2.4284492936530553, "grad_norm": 0.18452101945877075, "learning_rate": 5.331204546843432e-06, "loss": 0.2355, "step": 14913 }, { "epoch": 2.4286121402108862, "grad_norm": 0.1832338571548462, "learning_rate": 5.328280881283546e-06, "loss": 0.2884, "step": 14914 }, { "epoch": 2.428774986768717, "grad_norm": 0.15172359347343445, "learning_rate": 5.325357921997307e-06, "loss": 0.2711, "step": 14915 }, { "epoch": 2.428937833326548, "grad_norm": 0.2108488231897354, "learning_rate": 5.32243566908967e-06, "loss": 0.2863, "step": 14916 }, { "epoch": 2.429100679884379, "grad_norm": 0.15678435564041138, "learning_rate": 5.319514122665545e-06, "loss": 0.2781, "step": 14917 }, { "epoch": 2.42926352644221, "grad_norm": 0.22849875688552856, "learning_rate": 5.316593282829832e-06, "loss": 0.2875, "step": 14918 }, { "epoch": 2.429426373000041, "grad_norm": 0.1799795776605606, "learning_rate": 5.313673149687393e-06, "loss": 0.2258, "step": 14919 }, { "epoch": 2.4295892195578714, "grad_norm": 0.19258800148963928, "learning_rate": 5.310753723343062e-06, "loss": 0.3018, "step": 14920 }, { "epoch": 2.4297520661157024, "grad_norm": 0.13811780512332916, "learning_rate": 5.30783500390167e-06, "loss": 0.2901, "step": 14921 }, { "epoch": 2.4299149126735333, "grad_norm": 0.1856049746274948, "learning_rate": 5.304916991468001e-06, "loss": 0.2866, "step": 14922 }, { "epoch": 2.4300777592313643, "grad_norm": 0.13576415181159973, "learning_rate": 5.301999686146814e-06, "loss": 0.2495, "step": 14923 }, { "epoch": 2.430240605789195, "grad_norm": 0.1576535850763321, "learning_rate": 5.299083088042861e-06, "loss": 0.2723, "step": 14924 }, { "epoch": 2.430403452347026, "grad_norm": 0.14400796592235565, "learning_rate": 5.296167197260851e-06, "loss": 0.2905, "step": 14925 }, { "epoch": 2.430566298904857, "grad_norm": 0.17037926614284515, "learning_rate": 5.293252013905467e-06, "loss": 0.2769, "step": 14926 }, { "epoch": 2.4307291454626876, "grad_norm": 0.16359765827655792, "learning_rate": 5.290337538081386e-06, "loss": 0.2634, "step": 14927 }, { "epoch": 2.4308919920205185, "grad_norm": 0.17501193284988403, "learning_rate": 5.2874237698932374e-06, "loss": 0.2661, "step": 14928 }, { "epoch": 2.4310548385783495, "grad_norm": 0.15935665369033813, "learning_rate": 5.2845107094456365e-06, "loss": 0.2778, "step": 14929 }, { "epoch": 2.4312176851361804, "grad_norm": 0.1828666627407074, "learning_rate": 5.281598356843162e-06, "loss": 0.2744, "step": 14930 }, { "epoch": 2.4313805316940114, "grad_norm": 0.18510635197162628, "learning_rate": 5.278686712190395e-06, "loss": 0.2646, "step": 14931 }, { "epoch": 2.4315433782518423, "grad_norm": 0.19938531517982483, "learning_rate": 5.275775775591857e-06, "loss": 0.3027, "step": 14932 }, { "epoch": 2.4317062248096732, "grad_norm": 0.19166170060634613, "learning_rate": 5.2728655471520615e-06, "loss": 0.2639, "step": 14933 }, { "epoch": 2.4318690713675037, "grad_norm": 0.18966567516326904, "learning_rate": 5.269956026975501e-06, "loss": 0.2544, "step": 14934 }, { "epoch": 2.4320319179253347, "grad_norm": 0.14062714576721191, "learning_rate": 5.267047215166631e-06, "loss": 0.2838, "step": 14935 }, { "epoch": 2.4321947644831656, "grad_norm": 0.18255040049552917, "learning_rate": 5.264139111829891e-06, "loss": 0.2518, "step": 14936 }, { "epoch": 2.4323576110409966, "grad_norm": 0.16449959576129913, "learning_rate": 5.261231717069678e-06, "loss": 0.2622, "step": 14937 }, { "epoch": 2.4325204575988275, "grad_norm": 0.1871141940355301, "learning_rate": 5.258325030990391e-06, "loss": 0.2701, "step": 14938 }, { "epoch": 2.4326833041566585, "grad_norm": 0.15705829858779907, "learning_rate": 5.2554190536963865e-06, "loss": 0.2623, "step": 14939 }, { "epoch": 2.4328461507144894, "grad_norm": 0.1881205290555954, "learning_rate": 5.2525137852919905e-06, "loss": 0.2695, "step": 14940 }, { "epoch": 2.4330089972723203, "grad_norm": 0.1880299150943756, "learning_rate": 5.24960922588151e-06, "loss": 0.254, "step": 14941 }, { "epoch": 2.4331718438301513, "grad_norm": 0.15941603481769562, "learning_rate": 5.246705375569239e-06, "loss": 0.2885, "step": 14942 }, { "epoch": 2.433334690387982, "grad_norm": 0.17394202947616577, "learning_rate": 5.243802234459427e-06, "loss": 0.2719, "step": 14943 }, { "epoch": 2.4334975369458127, "grad_norm": 0.21169449388980865, "learning_rate": 5.240899802656299e-06, "loss": 0.2392, "step": 14944 }, { "epoch": 2.4336603835036437, "grad_norm": 0.14478425681591034, "learning_rate": 5.2379980802640755e-06, "loss": 0.2435, "step": 14945 }, { "epoch": 2.4338232300614746, "grad_norm": 0.17003130912780762, "learning_rate": 5.2350970673869294e-06, "loss": 0.3005, "step": 14946 }, { "epoch": 2.4339860766193055, "grad_norm": 0.16706502437591553, "learning_rate": 5.2321967641290135e-06, "loss": 0.2919, "step": 14947 }, { "epoch": 2.4341489231771365, "grad_norm": 0.2257063090801239, "learning_rate": 5.229297170594455e-06, "loss": 0.2881, "step": 14948 }, { "epoch": 2.4343117697349674, "grad_norm": 0.16650643944740295, "learning_rate": 5.226398286887368e-06, "loss": 0.3058, "step": 14949 }, { "epoch": 2.434474616292798, "grad_norm": 0.22948279976844788, "learning_rate": 5.223500113111829e-06, "loss": 0.3077, "step": 14950 }, { "epoch": 2.434637462850629, "grad_norm": 0.19997116923332214, "learning_rate": 5.220602649371883e-06, "loss": 0.2801, "step": 14951 }, { "epoch": 2.43480030940846, "grad_norm": 0.14175184071063995, "learning_rate": 5.2177058957715605e-06, "loss": 0.2787, "step": 14952 }, { "epoch": 2.4349631559662908, "grad_norm": 0.17136815190315247, "learning_rate": 5.21480985241487e-06, "loss": 0.2532, "step": 14953 }, { "epoch": 2.4351260025241217, "grad_norm": 0.1891072392463684, "learning_rate": 5.211914519405784e-06, "loss": 0.2545, "step": 14954 }, { "epoch": 2.4352888490819526, "grad_norm": 0.17594566941261292, "learning_rate": 5.2090198968482465e-06, "loss": 0.2733, "step": 14955 }, { "epoch": 2.4354516956397836, "grad_norm": 0.16548773646354675, "learning_rate": 5.206125984846197e-06, "loss": 0.2839, "step": 14956 }, { "epoch": 2.435614542197614, "grad_norm": 0.15328697860240936, "learning_rate": 5.2032327835035226e-06, "loss": 0.2518, "step": 14957 }, { "epoch": 2.435777388755445, "grad_norm": 0.13305379450321198, "learning_rate": 5.2003402929241104e-06, "loss": 0.3304, "step": 14958 }, { "epoch": 2.435940235313276, "grad_norm": 0.18651112914085388, "learning_rate": 5.197448513211803e-06, "loss": 0.3024, "step": 14959 }, { "epoch": 2.436103081871107, "grad_norm": 0.19385772943496704, "learning_rate": 5.194557444470416e-06, "loss": 0.2636, "step": 14960 }, { "epoch": 2.436265928428938, "grad_norm": 0.17690031230449677, "learning_rate": 5.191667086803764e-06, "loss": 0.3002, "step": 14961 }, { "epoch": 2.436428774986769, "grad_norm": 0.22400949895381927, "learning_rate": 5.188777440315603e-06, "loss": 0.2799, "step": 14962 }, { "epoch": 2.4365916215445997, "grad_norm": 0.18490499258041382, "learning_rate": 5.185888505109696e-06, "loss": 0.3099, "step": 14963 }, { "epoch": 2.4367544681024302, "grad_norm": 0.1891535222530365, "learning_rate": 5.1830002812897545e-06, "loss": 0.3047, "step": 14964 }, { "epoch": 2.436917314660261, "grad_norm": 0.13067014515399933, "learning_rate": 5.180112768959478e-06, "loss": 0.2646, "step": 14965 }, { "epoch": 2.437080161218092, "grad_norm": 0.1909065544605255, "learning_rate": 5.177225968222527e-06, "loss": 0.2988, "step": 14966 }, { "epoch": 2.437243007775923, "grad_norm": 0.16702677309513092, "learning_rate": 5.174339879182563e-06, "loss": 0.2831, "step": 14967 }, { "epoch": 2.437405854333754, "grad_norm": 0.22150951623916626, "learning_rate": 5.171454501943193e-06, "loss": 0.2721, "step": 14968 }, { "epoch": 2.437568700891585, "grad_norm": 0.17401213943958282, "learning_rate": 5.168569836608017e-06, "loss": 0.3058, "step": 14969 }, { "epoch": 2.437731547449416, "grad_norm": 0.1568203717470169, "learning_rate": 5.165685883280594e-06, "loss": 0.3102, "step": 14970 }, { "epoch": 2.437894394007247, "grad_norm": 0.15415161848068237, "learning_rate": 5.1628026420644795e-06, "loss": 0.2345, "step": 14971 }, { "epoch": 2.4380572405650778, "grad_norm": 0.11403396725654602, "learning_rate": 5.159920113063183e-06, "loss": 0.2916, "step": 14972 }, { "epoch": 2.4382200871229083, "grad_norm": 0.19169655442237854, "learning_rate": 5.15703829638019e-06, "loss": 0.3464, "step": 14973 }, { "epoch": 2.438382933680739, "grad_norm": 0.17594321072101593, "learning_rate": 5.1541571921189805e-06, "loss": 0.2728, "step": 14974 }, { "epoch": 2.43854578023857, "grad_norm": 0.1873418688774109, "learning_rate": 5.151276800382987e-06, "loss": 0.2352, "step": 14975 }, { "epoch": 2.438708626796401, "grad_norm": 0.166685551404953, "learning_rate": 5.148397121275628e-06, "loss": 0.2706, "step": 14976 }, { "epoch": 2.438871473354232, "grad_norm": 0.18463678658008575, "learning_rate": 5.145518154900278e-06, "loss": 0.2508, "step": 14977 }, { "epoch": 2.439034319912063, "grad_norm": 0.21345213055610657, "learning_rate": 5.142639901360321e-06, "loss": 0.2476, "step": 14978 }, { "epoch": 2.439197166469894, "grad_norm": 0.19876588881015778, "learning_rate": 5.139762360759085e-06, "loss": 0.2703, "step": 14979 }, { "epoch": 2.4393600130277244, "grad_norm": 0.17093174159526825, "learning_rate": 5.1368855331998835e-06, "loss": 0.2296, "step": 14980 }, { "epoch": 2.4395228595855554, "grad_norm": 0.19635330140590668, "learning_rate": 5.134009418785995e-06, "loss": 0.2646, "step": 14981 }, { "epoch": 2.4396857061433863, "grad_norm": 0.23011644184589386, "learning_rate": 5.131134017620698e-06, "loss": 0.2893, "step": 14982 }, { "epoch": 2.4398485527012173, "grad_norm": 0.15962927043437958, "learning_rate": 5.128259329807217e-06, "loss": 0.2741, "step": 14983 }, { "epoch": 2.440011399259048, "grad_norm": 0.17579300701618195, "learning_rate": 5.125385355448756e-06, "loss": 0.2808, "step": 14984 }, { "epoch": 2.440174245816879, "grad_norm": 0.14914365112781525, "learning_rate": 5.122512094648513e-06, "loss": 0.2438, "step": 14985 }, { "epoch": 2.44033709237471, "grad_norm": 0.1773146390914917, "learning_rate": 5.119639547509642e-06, "loss": 0.2515, "step": 14986 }, { "epoch": 2.4404999389325406, "grad_norm": 0.18649974465370178, "learning_rate": 5.116767714135273e-06, "loss": 0.2644, "step": 14987 }, { "epoch": 2.4406627854903715, "grad_norm": 0.1607396900653839, "learning_rate": 5.1138965946285065e-06, "loss": 0.2874, "step": 14988 }, { "epoch": 2.4408256320482025, "grad_norm": 0.18283867835998535, "learning_rate": 5.111026189092441e-06, "loss": 0.2903, "step": 14989 }, { "epoch": 2.4409884786060334, "grad_norm": 0.18546824157238007, "learning_rate": 5.108156497630126e-06, "loss": 0.2756, "step": 14990 }, { "epoch": 2.4411513251638643, "grad_norm": 0.1896076798439026, "learning_rate": 5.10528752034459e-06, "loss": 0.279, "step": 14991 }, { "epoch": 2.4413141717216953, "grad_norm": 0.17935918271541595, "learning_rate": 5.102419257338828e-06, "loss": 0.3257, "step": 14992 }, { "epoch": 2.4414770182795262, "grad_norm": 0.16384653747081757, "learning_rate": 5.0995517087158386e-06, "loss": 0.2931, "step": 14993 }, { "epoch": 2.441639864837357, "grad_norm": 0.18216738104820251, "learning_rate": 5.0966848745785614e-06, "loss": 0.2724, "step": 14994 }, { "epoch": 2.441802711395188, "grad_norm": 0.16084741055965424, "learning_rate": 5.093818755029933e-06, "loss": 0.2846, "step": 14995 }, { "epoch": 2.4419655579530186, "grad_norm": 0.15609179437160492, "learning_rate": 5.090953350172853e-06, "loss": 0.2779, "step": 14996 }, { "epoch": 2.4421284045108496, "grad_norm": 0.22662420570850372, "learning_rate": 5.0880886601101875e-06, "loss": 0.3185, "step": 14997 }, { "epoch": 2.4422912510686805, "grad_norm": 0.19519184529781342, "learning_rate": 5.085224684944806e-06, "loss": 0.2585, "step": 14998 }, { "epoch": 2.4424540976265114, "grad_norm": 0.21314416825771332, "learning_rate": 5.082361424779519e-06, "loss": 0.263, "step": 14999 }, { "epoch": 2.4426169441843424, "grad_norm": 0.18209214508533478, "learning_rate": 5.079498879717137e-06, "loss": 0.279, "step": 15000 }, { "epoch": 2.4427797907421733, "grad_norm": 0.14148391783237457, "learning_rate": 5.076637049860428e-06, "loss": 0.2492, "step": 15001 }, { "epoch": 2.4429426373000043, "grad_norm": 0.19819940626621246, "learning_rate": 5.073775935312142e-06, "loss": 0.277, "step": 15002 }, { "epoch": 2.4431054838578348, "grad_norm": 0.18202169239521027, "learning_rate": 5.0709155361749915e-06, "loss": 0.2586, "step": 15003 }, { "epoch": 2.4432683304156657, "grad_norm": 0.17985016107559204, "learning_rate": 5.068055852551692e-06, "loss": 0.2296, "step": 15004 }, { "epoch": 2.4434311769734967, "grad_norm": 0.15872809290885925, "learning_rate": 5.0651968845449034e-06, "loss": 0.2725, "step": 15005 }, { "epoch": 2.4435940235313276, "grad_norm": 0.17394179105758667, "learning_rate": 5.062338632257266e-06, "loss": 0.2311, "step": 15006 }, { "epoch": 2.4437568700891585, "grad_norm": 0.18873421847820282, "learning_rate": 5.059481095791416e-06, "loss": 0.2891, "step": 15007 }, { "epoch": 2.4439197166469895, "grad_norm": 0.21475344896316528, "learning_rate": 5.056624275249936e-06, "loss": 0.2619, "step": 15008 }, { "epoch": 2.4440825632048204, "grad_norm": 0.1949414610862732, "learning_rate": 5.053768170735396e-06, "loss": 0.2831, "step": 15009 }, { "epoch": 2.444245409762651, "grad_norm": 0.18968075513839722, "learning_rate": 5.050912782350331e-06, "loss": 0.2822, "step": 15010 }, { "epoch": 2.444408256320482, "grad_norm": 0.15403705835342407, "learning_rate": 5.048058110197276e-06, "loss": 0.269, "step": 15011 }, { "epoch": 2.444571102878313, "grad_norm": 0.1627994030714035, "learning_rate": 5.04520415437871e-06, "loss": 0.2515, "step": 15012 }, { "epoch": 2.4447339494361437, "grad_norm": 0.17721378803253174, "learning_rate": 5.042350914997096e-06, "loss": 0.2646, "step": 15013 }, { "epoch": 2.4448967959939747, "grad_norm": 0.13817620277404785, "learning_rate": 5.0394983921548875e-06, "loss": 0.2519, "step": 15014 }, { "epoch": 2.4450596425518056, "grad_norm": 0.17800574004650116, "learning_rate": 5.036646585954485e-06, "loss": 0.3014, "step": 15015 }, { "epoch": 2.4452224891096366, "grad_norm": 0.12710575759410858, "learning_rate": 5.033795496498286e-06, "loss": 0.2892, "step": 15016 }, { "epoch": 2.4453853356674675, "grad_norm": 0.17495444416999817, "learning_rate": 5.030945123888639e-06, "loss": 0.2483, "step": 15017 }, { "epoch": 2.4455481822252985, "grad_norm": 0.22533971071243286, "learning_rate": 5.0280954682279e-06, "loss": 0.2749, "step": 15018 }, { "epoch": 2.445711028783129, "grad_norm": 0.16834412515163422, "learning_rate": 5.025246529618372e-06, "loss": 0.2694, "step": 15019 }, { "epoch": 2.44587387534096, "grad_norm": 0.18753333389759064, "learning_rate": 5.022398308162338e-06, "loss": 0.2867, "step": 15020 }, { "epoch": 2.446036721898791, "grad_norm": 0.165211021900177, "learning_rate": 5.019550803962053e-06, "loss": 0.292, "step": 15021 }, { "epoch": 2.446199568456622, "grad_norm": 0.1979825794696808, "learning_rate": 5.016704017119761e-06, "loss": 0.2999, "step": 15022 }, { "epoch": 2.4463624150144527, "grad_norm": 0.16505052149295807, "learning_rate": 5.013857947737671e-06, "loss": 0.2571, "step": 15023 }, { "epoch": 2.4465252615722837, "grad_norm": 0.18979741632938385, "learning_rate": 5.0110125959179525e-06, "loss": 0.265, "step": 15024 }, { "epoch": 2.4466881081301146, "grad_norm": 0.1666291356086731, "learning_rate": 5.008167961762775e-06, "loss": 0.2462, "step": 15025 }, { "epoch": 2.446850954687945, "grad_norm": 0.1885579377412796, "learning_rate": 5.005324045374266e-06, "loss": 0.2878, "step": 15026 }, { "epoch": 2.447013801245776, "grad_norm": 0.14126631617546082, "learning_rate": 5.002480846854529e-06, "loss": 0.2424, "step": 15027 }, { "epoch": 2.447176647803607, "grad_norm": 0.18269751965999603, "learning_rate": 4.999638366305639e-06, "loss": 0.2644, "step": 15028 }, { "epoch": 2.447339494361438, "grad_norm": 0.18416079878807068, "learning_rate": 4.996796603829659e-06, "loss": 0.2811, "step": 15029 }, { "epoch": 2.447502340919269, "grad_norm": 0.16817623376846313, "learning_rate": 4.993955559528612e-06, "loss": 0.2805, "step": 15030 }, { "epoch": 2.4476651874771, "grad_norm": 0.22241143882274628, "learning_rate": 4.991115233504495e-06, "loss": 0.2832, "step": 15031 }, { "epoch": 2.4478280340349308, "grad_norm": 0.17463380098342896, "learning_rate": 4.988275625859296e-06, "loss": 0.2389, "step": 15032 }, { "epoch": 2.4479908805927613, "grad_norm": 0.2339615374803543, "learning_rate": 4.985436736694957e-06, "loss": 0.2923, "step": 15033 }, { "epoch": 2.448153727150592, "grad_norm": 0.2205367386341095, "learning_rate": 4.9825985661134e-06, "loss": 0.2708, "step": 15034 }, { "epoch": 2.448316573708423, "grad_norm": 0.18387147784233093, "learning_rate": 4.979761114216533e-06, "loss": 0.2336, "step": 15035 }, { "epoch": 2.448479420266254, "grad_norm": 0.21356840431690216, "learning_rate": 4.976924381106218e-06, "loss": 0.2803, "step": 15036 }, { "epoch": 2.448642266824085, "grad_norm": 0.17678521573543549, "learning_rate": 4.974088366884317e-06, "loss": 0.2999, "step": 15037 }, { "epoch": 2.448805113381916, "grad_norm": 0.17916598916053772, "learning_rate": 4.9712530716526404e-06, "loss": 0.2928, "step": 15038 }, { "epoch": 2.448967959939747, "grad_norm": 0.15284693241119385, "learning_rate": 4.9684184955129805e-06, "loss": 0.3054, "step": 15039 }, { "epoch": 2.4491308064975774, "grad_norm": 0.16117486357688904, "learning_rate": 4.96558463856712e-06, "loss": 0.2699, "step": 15040 }, { "epoch": 2.4492936530554084, "grad_norm": 0.18850383162498474, "learning_rate": 4.962751500916793e-06, "loss": 0.2916, "step": 15041 }, { "epoch": 2.4494564996132393, "grad_norm": 0.1800023317337036, "learning_rate": 4.9599190826637224e-06, "loss": 0.2568, "step": 15042 }, { "epoch": 2.4496193461710702, "grad_norm": 0.16278421878814697, "learning_rate": 4.957087383909592e-06, "loss": 0.2895, "step": 15043 }, { "epoch": 2.449782192728901, "grad_norm": 0.14747853577136993, "learning_rate": 4.95425640475608e-06, "loss": 0.294, "step": 15044 }, { "epoch": 2.449945039286732, "grad_norm": 0.16019316017627716, "learning_rate": 4.951426145304824e-06, "loss": 0.2569, "step": 15045 }, { "epoch": 2.450107885844563, "grad_norm": 0.1489129215478897, "learning_rate": 4.9485966056574295e-06, "loss": 0.2624, "step": 15046 }, { "epoch": 2.450270732402394, "grad_norm": 0.15982554852962494, "learning_rate": 4.945767785915497e-06, "loss": 0.2699, "step": 15047 }, { "epoch": 2.450433578960225, "grad_norm": 0.20997492969036102, "learning_rate": 4.9429396861805865e-06, "loss": 0.2957, "step": 15048 }, { "epoch": 2.4505964255180555, "grad_norm": 0.20953603088855743, "learning_rate": 4.940112306554234e-06, "loss": 0.2404, "step": 15049 }, { "epoch": 2.4507592720758864, "grad_norm": 0.14578521251678467, "learning_rate": 4.937285647137943e-06, "loss": 0.2853, "step": 15050 }, { "epoch": 2.4509221186337173, "grad_norm": 0.1495213657617569, "learning_rate": 4.934459708033215e-06, "loss": 0.3024, "step": 15051 }, { "epoch": 2.4510849651915483, "grad_norm": 0.18691033124923706, "learning_rate": 4.9316344893414995e-06, "loss": 0.2836, "step": 15052 }, { "epoch": 2.4512478117493792, "grad_norm": 0.15642249584197998, "learning_rate": 4.928809991164235e-06, "loss": 0.2712, "step": 15053 }, { "epoch": 2.45141065830721, "grad_norm": 0.18945550918579102, "learning_rate": 4.925986213602818e-06, "loss": 0.2688, "step": 15054 }, { "epoch": 2.451573504865041, "grad_norm": 0.15680931508541107, "learning_rate": 4.923163156758645e-06, "loss": 0.2934, "step": 15055 }, { "epoch": 2.4517363514228716, "grad_norm": 0.14937615394592285, "learning_rate": 4.9203408207330685e-06, "loss": 0.285, "step": 15056 }, { "epoch": 2.4518991979807025, "grad_norm": 0.21301645040512085, "learning_rate": 4.917519205627411e-06, "loss": 0.2605, "step": 15057 }, { "epoch": 2.4520620445385335, "grad_norm": 0.19014880061149597, "learning_rate": 4.914698311542987e-06, "loss": 0.2791, "step": 15058 }, { "epoch": 2.4522248910963644, "grad_norm": 0.18182477355003357, "learning_rate": 4.911878138581072e-06, "loss": 0.2551, "step": 15059 }, { "epoch": 2.4523877376541954, "grad_norm": 0.17545704543590546, "learning_rate": 4.909058686842916e-06, "loss": 0.2768, "step": 15060 }, { "epoch": 2.4525505842120263, "grad_norm": 0.1882573366165161, "learning_rate": 4.906239956429742e-06, "loss": 0.2952, "step": 15061 }, { "epoch": 2.4527134307698573, "grad_norm": 0.15509329736232758, "learning_rate": 4.903421947442763e-06, "loss": 0.2534, "step": 15062 }, { "epoch": 2.4528762773276878, "grad_norm": 0.1613827347755432, "learning_rate": 4.900604659983146e-06, "loss": 0.2561, "step": 15063 }, { "epoch": 2.4530391238855187, "grad_norm": 0.1570514291524887, "learning_rate": 4.897788094152034e-06, "loss": 0.2622, "step": 15064 }, { "epoch": 2.4532019704433496, "grad_norm": 0.13316982984542847, "learning_rate": 4.894972250050564e-06, "loss": 0.2811, "step": 15065 }, { "epoch": 2.4533648170011806, "grad_norm": 0.21678535640239716, "learning_rate": 4.892157127779828e-06, "loss": 0.2603, "step": 15066 }, { "epoch": 2.4535276635590115, "grad_norm": 0.20373469591140747, "learning_rate": 4.889342727440896e-06, "loss": 0.2581, "step": 15067 }, { "epoch": 2.4536905101168425, "grad_norm": 0.16327887773513794, "learning_rate": 4.886529049134806e-06, "loss": 0.2797, "step": 15068 }, { "epoch": 2.4538533566746734, "grad_norm": 0.16955435276031494, "learning_rate": 4.883716092962589e-06, "loss": 0.2768, "step": 15069 }, { "epoch": 2.4540162032325044, "grad_norm": 0.16679725050926208, "learning_rate": 4.880903859025237e-06, "loss": 0.2809, "step": 15070 }, { "epoch": 2.4541790497903353, "grad_norm": 0.14944376051425934, "learning_rate": 4.878092347423707e-06, "loss": 0.2583, "step": 15071 }, { "epoch": 2.454341896348166, "grad_norm": 0.14593200385570526, "learning_rate": 4.8752815582589566e-06, "loss": 0.2479, "step": 15072 }, { "epoch": 2.4545047429059967, "grad_norm": 0.1653032749891281, "learning_rate": 4.872471491631891e-06, "loss": 0.2819, "step": 15073 }, { "epoch": 2.4546675894638277, "grad_norm": 0.19755572080612183, "learning_rate": 4.869662147643397e-06, "loss": 0.2419, "step": 15074 }, { "epoch": 2.4548304360216586, "grad_norm": 0.16354277729988098, "learning_rate": 4.866853526394352e-06, "loss": 0.2755, "step": 15075 }, { "epoch": 2.4549932825794896, "grad_norm": 0.15489499270915985, "learning_rate": 4.864045627985578e-06, "loss": 0.3035, "step": 15076 }, { "epoch": 2.4551561291373205, "grad_norm": 0.1671236753463745, "learning_rate": 4.8612384525179025e-06, "loss": 0.295, "step": 15077 }, { "epoch": 2.4553189756951515, "grad_norm": 0.17805726826190948, "learning_rate": 4.858432000092103e-06, "loss": 0.2647, "step": 15078 }, { "epoch": 2.455481822252982, "grad_norm": 0.17493340373039246, "learning_rate": 4.8556262708089355e-06, "loss": 0.2766, "step": 15079 }, { "epoch": 2.455644668810813, "grad_norm": 0.1724073886871338, "learning_rate": 4.8528212647691444e-06, "loss": 0.2754, "step": 15080 }, { "epoch": 2.455807515368644, "grad_norm": 0.16891975700855255, "learning_rate": 4.850016982073433e-06, "loss": 0.2777, "step": 15081 }, { "epoch": 2.4559703619264748, "grad_norm": 0.1604612022638321, "learning_rate": 4.8472134228224806e-06, "loss": 0.2628, "step": 15082 }, { "epoch": 2.4561332084843057, "grad_norm": 0.1670670062303543, "learning_rate": 4.844410587116941e-06, "loss": 0.2369, "step": 15083 }, { "epoch": 2.4562960550421367, "grad_norm": 0.14773885905742645, "learning_rate": 4.841608475057455e-06, "loss": 0.2658, "step": 15084 }, { "epoch": 2.4564589015999676, "grad_norm": 0.1938871592283249, "learning_rate": 4.8388070867446205e-06, "loss": 0.2746, "step": 15085 }, { "epoch": 2.456621748157798, "grad_norm": 0.18515442311763763, "learning_rate": 4.83600642227901e-06, "loss": 0.235, "step": 15086 }, { "epoch": 2.456784594715629, "grad_norm": 0.1579795628786087, "learning_rate": 4.83320648176119e-06, "loss": 0.263, "step": 15087 }, { "epoch": 2.45694744127346, "grad_norm": 0.20668061077594757, "learning_rate": 4.830407265291678e-06, "loss": 0.2531, "step": 15088 }, { "epoch": 2.457110287831291, "grad_norm": 0.2122403234243393, "learning_rate": 4.8276087729709736e-06, "loss": 0.3046, "step": 15089 }, { "epoch": 2.457273134389122, "grad_norm": 0.20106565952301025, "learning_rate": 4.824811004899546e-06, "loss": 0.2833, "step": 15090 }, { "epoch": 2.457435980946953, "grad_norm": 0.16666758060455322, "learning_rate": 4.822013961177857e-06, "loss": 0.2862, "step": 15091 }, { "epoch": 2.4575988275047838, "grad_norm": 0.15608133375644684, "learning_rate": 4.8192176419063205e-06, "loss": 0.3015, "step": 15092 }, { "epoch": 2.4577616740626143, "grad_norm": 0.1751995086669922, "learning_rate": 4.816422047185337e-06, "loss": 0.2602, "step": 15093 }, { "epoch": 2.457924520620445, "grad_norm": 0.21702037751674652, "learning_rate": 4.813627177115265e-06, "loss": 0.2815, "step": 15094 }, { "epoch": 2.458087367178276, "grad_norm": 0.16367828845977783, "learning_rate": 4.810833031796463e-06, "loss": 0.2685, "step": 15095 }, { "epoch": 2.458250213736107, "grad_norm": 0.14691466093063354, "learning_rate": 4.808039611329243e-06, "loss": 0.2269, "step": 15096 }, { "epoch": 2.458413060293938, "grad_norm": 0.18711549043655396, "learning_rate": 4.805246915813894e-06, "loss": 0.2578, "step": 15097 }, { "epoch": 2.458575906851769, "grad_norm": 0.18991787731647491, "learning_rate": 4.80245494535069e-06, "loss": 0.2821, "step": 15098 }, { "epoch": 2.4587387534096, "grad_norm": 0.1530718207359314, "learning_rate": 4.799663700039869e-06, "loss": 0.2484, "step": 15099 }, { "epoch": 2.458901599967431, "grad_norm": 0.15145528316497803, "learning_rate": 4.79687317998164e-06, "loss": 0.2585, "step": 15100 }, { "epoch": 2.459064446525262, "grad_norm": 0.20600594580173492, "learning_rate": 4.794083385276191e-06, "loss": 0.2713, "step": 15101 }, { "epoch": 2.4592272930830923, "grad_norm": 0.18647317588329315, "learning_rate": 4.791294316023689e-06, "loss": 0.2662, "step": 15102 }, { "epoch": 2.4593901396409232, "grad_norm": 0.1788596659898758, "learning_rate": 4.788505972324272e-06, "loss": 0.2586, "step": 15103 }, { "epoch": 2.459552986198754, "grad_norm": 0.17609363794326782, "learning_rate": 4.785718354278038e-06, "loss": 0.2932, "step": 15104 }, { "epoch": 2.459715832756585, "grad_norm": 0.2089037150144577, "learning_rate": 4.782931461985085e-06, "loss": 0.2873, "step": 15105 }, { "epoch": 2.459878679314416, "grad_norm": 0.15884485840797424, "learning_rate": 4.780145295545465e-06, "loss": 0.2649, "step": 15106 }, { "epoch": 2.460041525872247, "grad_norm": 0.16965411603450775, "learning_rate": 4.77735985505921e-06, "loss": 0.234, "step": 15107 }, { "epoch": 2.460204372430078, "grad_norm": 0.20221784710884094, "learning_rate": 4.7745751406263165e-06, "loss": 0.2351, "step": 15108 }, { "epoch": 2.4603672189879084, "grad_norm": 0.16099444031715393, "learning_rate": 4.77179115234678e-06, "loss": 0.2414, "step": 15109 }, { "epoch": 2.4605300655457394, "grad_norm": 0.2054792046546936, "learning_rate": 4.769007890320545e-06, "loss": 0.2565, "step": 15110 }, { "epoch": 2.4606929121035703, "grad_norm": 0.20089127123355865, "learning_rate": 4.766225354647535e-06, "loss": 0.2901, "step": 15111 }, { "epoch": 2.4608557586614013, "grad_norm": 0.20439505577087402, "learning_rate": 4.763443545427665e-06, "loss": 0.3053, "step": 15112 }, { "epoch": 2.461018605219232, "grad_norm": 0.17989037930965424, "learning_rate": 4.760662462760793e-06, "loss": 0.2746, "step": 15113 }, { "epoch": 2.461181451777063, "grad_norm": 0.19708044826984406, "learning_rate": 4.757882106746783e-06, "loss": 0.2897, "step": 15114 }, { "epoch": 2.461344298334894, "grad_norm": 0.17250289022922516, "learning_rate": 4.755102477485454e-06, "loss": 0.2736, "step": 15115 }, { "epoch": 2.4615071448927246, "grad_norm": 0.17432624101638794, "learning_rate": 4.7523235750765945e-06, "loss": 0.2877, "step": 15116 }, { "epoch": 2.4616699914505555, "grad_norm": 0.194960355758667, "learning_rate": 4.749545399619989e-06, "loss": 0.2869, "step": 15117 }, { "epoch": 2.4618328380083865, "grad_norm": 0.16817723214626312, "learning_rate": 4.746767951215378e-06, "loss": 0.2422, "step": 15118 }, { "epoch": 2.4619956845662174, "grad_norm": 0.14692102372646332, "learning_rate": 4.743991229962469e-06, "loss": 0.2481, "step": 15119 }, { "epoch": 2.4621585311240484, "grad_norm": 0.17940154671669006, "learning_rate": 4.741215235960969e-06, "loss": 0.2683, "step": 15120 }, { "epoch": 2.4623213776818793, "grad_norm": 0.14229914546012878, "learning_rate": 4.73843996931054e-06, "loss": 0.2229, "step": 15121 }, { "epoch": 2.4624842242397103, "grad_norm": 0.21311426162719727, "learning_rate": 4.735665430110822e-06, "loss": 0.2601, "step": 15122 }, { "epoch": 2.462647070797541, "grad_norm": 0.2509743273258209, "learning_rate": 4.732891618461421e-06, "loss": 0.2917, "step": 15123 }, { "epoch": 2.462809917355372, "grad_norm": 0.1753275990486145, "learning_rate": 4.730118534461942e-06, "loss": 0.2848, "step": 15124 }, { "epoch": 2.4629727639132026, "grad_norm": 0.13081662356853485, "learning_rate": 4.727346178211934e-06, "loss": 0.2451, "step": 15125 }, { "epoch": 2.4631356104710336, "grad_norm": 0.20953421294689178, "learning_rate": 4.724574549810931e-06, "loss": 0.2858, "step": 15126 }, { "epoch": 2.4632984570288645, "grad_norm": 0.16942688822746277, "learning_rate": 4.721803649358455e-06, "loss": 0.2835, "step": 15127 }, { "epoch": 2.4634613035866955, "grad_norm": 0.17889197170734406, "learning_rate": 4.719033476953985e-06, "loss": 0.2515, "step": 15128 }, { "epoch": 2.4636241501445264, "grad_norm": 0.20832377672195435, "learning_rate": 4.716264032696977e-06, "loss": 0.3109, "step": 15129 }, { "epoch": 2.4637869967023573, "grad_norm": 0.16657137870788574, "learning_rate": 4.7134953166868525e-06, "loss": 0.242, "step": 15130 }, { "epoch": 2.4639498432601883, "grad_norm": 0.19126762449741364, "learning_rate": 4.710727329023035e-06, "loss": 0.2595, "step": 15131 }, { "epoch": 2.464112689818019, "grad_norm": 0.1436898559331894, "learning_rate": 4.707960069804893e-06, "loss": 0.2357, "step": 15132 }, { "epoch": 2.4642755363758497, "grad_norm": 0.14338895678520203, "learning_rate": 4.70519353913178e-06, "loss": 0.2801, "step": 15133 }, { "epoch": 2.4644383829336807, "grad_norm": 0.1719459891319275, "learning_rate": 4.7024277371030214e-06, "loss": 0.2669, "step": 15134 }, { "epoch": 2.4646012294915116, "grad_norm": 0.17595931887626648, "learning_rate": 4.699662663817922e-06, "loss": 0.2554, "step": 15135 }, { "epoch": 2.4647640760493426, "grad_norm": 0.17976686358451843, "learning_rate": 4.6968983193757584e-06, "loss": 0.2651, "step": 15136 }, { "epoch": 2.4649269226071735, "grad_norm": 0.1303415447473526, "learning_rate": 4.694134703875766e-06, "loss": 0.2929, "step": 15137 }, { "epoch": 2.4650897691650044, "grad_norm": 0.19056251645088196, "learning_rate": 4.691371817417184e-06, "loss": 0.2669, "step": 15138 }, { "epoch": 2.465252615722835, "grad_norm": 0.19810965657234192, "learning_rate": 4.688609660099197e-06, "loss": 0.2952, "step": 15139 }, { "epoch": 2.465415462280666, "grad_norm": 0.16020308434963226, "learning_rate": 4.6858482320209816e-06, "loss": 0.2458, "step": 15140 }, { "epoch": 2.465578308838497, "grad_norm": 0.18312802910804749, "learning_rate": 4.683087533281669e-06, "loss": 0.2941, "step": 15141 }, { "epoch": 2.4657411553963278, "grad_norm": 0.15811416506767273, "learning_rate": 4.6803275639803915e-06, "loss": 0.2572, "step": 15142 }, { "epoch": 2.4659040019541587, "grad_norm": 0.1289721429347992, "learning_rate": 4.677568324216233e-06, "loss": 0.2511, "step": 15143 }, { "epoch": 2.4660668485119897, "grad_norm": 0.18972641229629517, "learning_rate": 4.67480981408826e-06, "loss": 0.253, "step": 15144 }, { "epoch": 2.4662296950698206, "grad_norm": 0.21055248379707336, "learning_rate": 4.6720520336955035e-06, "loss": 0.2894, "step": 15145 }, { "epoch": 2.4663925416276515, "grad_norm": 0.1794136017560959, "learning_rate": 4.66929498313699e-06, "loss": 0.2705, "step": 15146 }, { "epoch": 2.4665553881854825, "grad_norm": 0.1618914008140564, "learning_rate": 4.666538662511699e-06, "loss": 0.2784, "step": 15147 }, { "epoch": 2.466718234743313, "grad_norm": 0.1550866812467575, "learning_rate": 4.663783071918587e-06, "loss": 0.2557, "step": 15148 }, { "epoch": 2.466881081301144, "grad_norm": 0.21934925019741058, "learning_rate": 4.661028211456595e-06, "loss": 0.2873, "step": 15149 }, { "epoch": 2.467043927858975, "grad_norm": 0.1950732171535492, "learning_rate": 4.65827408122462e-06, "loss": 0.2683, "step": 15150 }, { "epoch": 2.467206774416806, "grad_norm": 0.17596514523029327, "learning_rate": 4.65552068132156e-06, "loss": 0.2724, "step": 15151 }, { "epoch": 2.4673696209746367, "grad_norm": 0.14037154614925385, "learning_rate": 4.652768011846262e-06, "loss": 0.2867, "step": 15152 }, { "epoch": 2.4675324675324677, "grad_norm": 0.15473207831382751, "learning_rate": 4.650016072897545e-06, "loss": 0.2717, "step": 15153 }, { "epoch": 2.4676953140902986, "grad_norm": 0.18566907942295074, "learning_rate": 4.647264864574227e-06, "loss": 0.2541, "step": 15154 }, { "epoch": 2.467858160648129, "grad_norm": 0.1738317757844925, "learning_rate": 4.644514386975074e-06, "loss": 0.2467, "step": 15155 }, { "epoch": 2.46802100720596, "grad_norm": 0.15894852578639984, "learning_rate": 4.6417646401988476e-06, "loss": 0.2525, "step": 15156 }, { "epoch": 2.468183853763791, "grad_norm": 0.1726563274860382, "learning_rate": 4.639015624344267e-06, "loss": 0.257, "step": 15157 }, { "epoch": 2.468346700321622, "grad_norm": 0.19764412939548492, "learning_rate": 4.636267339510028e-06, "loss": 0.279, "step": 15158 }, { "epoch": 2.468509546879453, "grad_norm": 0.15702717006206512, "learning_rate": 4.633519785794796e-06, "loss": 0.2625, "step": 15159 }, { "epoch": 2.468672393437284, "grad_norm": 0.19915343821048737, "learning_rate": 4.630772963297228e-06, "loss": 0.2753, "step": 15160 }, { "epoch": 2.468835239995115, "grad_norm": 0.18187928199768066, "learning_rate": 4.62802687211594e-06, "loss": 0.2615, "step": 15161 }, { "epoch": 2.4689980865529453, "grad_norm": 0.18970806896686554, "learning_rate": 4.625281512349522e-06, "loss": 0.2462, "step": 15162 }, { "epoch": 2.4691609331107762, "grad_norm": 0.1644868552684784, "learning_rate": 4.622536884096537e-06, "loss": 0.2575, "step": 15163 }, { "epoch": 2.469323779668607, "grad_norm": 0.1816210299730301, "learning_rate": 4.619792987455537e-06, "loss": 0.2669, "step": 15164 }, { "epoch": 2.469486626226438, "grad_norm": 0.17623648047447205, "learning_rate": 4.61704982252503e-06, "loss": 0.2625, "step": 15165 }, { "epoch": 2.469649472784269, "grad_norm": 0.19405384361743927, "learning_rate": 4.6143073894034965e-06, "loss": 0.2828, "step": 15166 }, { "epoch": 2.4698123193421, "grad_norm": 0.18464860320091248, "learning_rate": 4.611565688189409e-06, "loss": 0.2584, "step": 15167 }, { "epoch": 2.469975165899931, "grad_norm": 0.1827733814716339, "learning_rate": 4.6088247189812e-06, "loss": 0.2888, "step": 15168 }, { "epoch": 2.4701380124577614, "grad_norm": 0.21285782754421234, "learning_rate": 4.606084481877279e-06, "loss": 0.35, "step": 15169 }, { "epoch": 2.4703008590155924, "grad_norm": 0.21537432074546814, "learning_rate": 4.6033449769760176e-06, "loss": 0.2803, "step": 15170 }, { "epoch": 2.4704637055734233, "grad_norm": 0.17278434336185455, "learning_rate": 4.60060620437579e-06, "loss": 0.3021, "step": 15171 }, { "epoch": 2.4706265521312543, "grad_norm": 0.16438083350658417, "learning_rate": 4.597868164174918e-06, "loss": 0.2668, "step": 15172 }, { "epoch": 2.470789398689085, "grad_norm": 0.1636602282524109, "learning_rate": 4.5951308564717046e-06, "loss": 0.2984, "step": 15173 }, { "epoch": 2.470952245246916, "grad_norm": 0.15750813484191895, "learning_rate": 4.592394281364421e-06, "loss": 0.2442, "step": 15174 }, { "epoch": 2.471115091804747, "grad_norm": 0.19009408354759216, "learning_rate": 4.589658438951333e-06, "loss": 0.2659, "step": 15175 }, { "epoch": 2.471277938362578, "grad_norm": 0.16905412077903748, "learning_rate": 4.586923329330658e-06, "loss": 0.269, "step": 15176 }, { "epoch": 2.471440784920409, "grad_norm": 0.18285495042800903, "learning_rate": 4.584188952600588e-06, "loss": 0.2707, "step": 15177 }, { "epoch": 2.4716036314782395, "grad_norm": 0.1376044601202011, "learning_rate": 4.581455308859308e-06, "loss": 0.2898, "step": 15178 }, { "epoch": 2.4717664780360704, "grad_norm": 0.1564771682024002, "learning_rate": 4.5787223982049574e-06, "loss": 0.2356, "step": 15179 }, { "epoch": 2.4719293245939014, "grad_norm": 0.20980148017406464, "learning_rate": 4.575990220735657e-06, "loss": 0.2692, "step": 15180 }, { "epoch": 2.4720921711517323, "grad_norm": 0.17581360042095184, "learning_rate": 4.573258776549494e-06, "loss": 0.2371, "step": 15181 }, { "epoch": 2.4722550177095632, "grad_norm": 0.20578905940055847, "learning_rate": 4.570528065744547e-06, "loss": 0.2628, "step": 15182 }, { "epoch": 2.472417864267394, "grad_norm": 0.21422700583934784, "learning_rate": 4.5677980884188506e-06, "loss": 0.2677, "step": 15183 }, { "epoch": 2.472580710825225, "grad_norm": 0.14956338703632355, "learning_rate": 4.5650688446704196e-06, "loss": 0.2635, "step": 15184 }, { "epoch": 2.4727435573830556, "grad_norm": 0.18039146065711975, "learning_rate": 4.562340334597237e-06, "loss": 0.2575, "step": 15185 }, { "epoch": 2.4729064039408866, "grad_norm": 0.1431182324886322, "learning_rate": 4.559612558297272e-06, "loss": 0.2589, "step": 15186 }, { "epoch": 2.4730692504987175, "grad_norm": 0.1975347250699997, "learning_rate": 4.556885515868453e-06, "loss": 0.2911, "step": 15187 }, { "epoch": 2.4732320970565485, "grad_norm": 0.19170072674751282, "learning_rate": 4.5541592074087005e-06, "loss": 0.2659, "step": 15188 }, { "epoch": 2.4733949436143794, "grad_norm": 0.20508000254631042, "learning_rate": 4.551433633015889e-06, "loss": 0.2497, "step": 15189 }, { "epoch": 2.4735577901722103, "grad_norm": 0.16948851943016052, "learning_rate": 4.5487087927878676e-06, "loss": 0.2569, "step": 15190 }, { "epoch": 2.4737206367300413, "grad_norm": 0.25443199276924133, "learning_rate": 4.5459846868224824e-06, "loss": 0.3046, "step": 15191 }, { "epoch": 2.4738834832878718, "grad_norm": 0.186687171459198, "learning_rate": 4.543261315217529e-06, "loss": 0.2754, "step": 15192 }, { "epoch": 2.4740463298457027, "grad_norm": 0.1289072185754776, "learning_rate": 4.540538678070777e-06, "loss": 0.2711, "step": 15193 }, { "epoch": 2.4742091764035337, "grad_norm": 0.19370253384113312, "learning_rate": 4.537816775479992e-06, "loss": 0.2854, "step": 15194 }, { "epoch": 2.4743720229613646, "grad_norm": 0.1585540622472763, "learning_rate": 4.535095607542889e-06, "loss": 0.2632, "step": 15195 }, { "epoch": 2.4745348695191955, "grad_norm": 0.22689391672611237, "learning_rate": 4.532375174357165e-06, "loss": 0.2826, "step": 15196 }, { "epoch": 2.4746977160770265, "grad_norm": 0.15613795816898346, "learning_rate": 4.529655476020497e-06, "loss": 0.3465, "step": 15197 }, { "epoch": 2.4748605626348574, "grad_norm": 0.1619666963815689, "learning_rate": 4.526936512630533e-06, "loss": 0.259, "step": 15198 }, { "epoch": 2.4750234091926884, "grad_norm": 0.18616792559623718, "learning_rate": 4.524218284284879e-06, "loss": 0.2741, "step": 15199 }, { "epoch": 2.4751862557505193, "grad_norm": 0.19375993311405182, "learning_rate": 4.5215007910811405e-06, "loss": 0.2719, "step": 15200 }, { "epoch": 2.47534910230835, "grad_norm": 0.13823626935482025, "learning_rate": 4.518784033116879e-06, "loss": 0.2774, "step": 15201 }, { "epoch": 2.4755119488661808, "grad_norm": 0.2668560743331909, "learning_rate": 4.5160680104896355e-06, "loss": 0.288, "step": 15202 }, { "epoch": 2.4756747954240117, "grad_norm": 0.1686675101518631, "learning_rate": 4.513352723296915e-06, "loss": 0.2562, "step": 15203 }, { "epoch": 2.4758376419818426, "grad_norm": 0.17430396378040314, "learning_rate": 4.510638171636217e-06, "loss": 0.276, "step": 15204 }, { "epoch": 2.4760004885396736, "grad_norm": 0.17611786723136902, "learning_rate": 4.5079243556049975e-06, "loss": 0.2776, "step": 15205 }, { "epoch": 2.4761633350975045, "grad_norm": 0.21233783662319183, "learning_rate": 4.505211275300683e-06, "loss": 0.2447, "step": 15206 }, { "epoch": 2.4763261816553355, "grad_norm": 0.19015032052993774, "learning_rate": 4.502498930820695e-06, "loss": 0.2818, "step": 15207 }, { "epoch": 2.476489028213166, "grad_norm": 0.17290225625038147, "learning_rate": 4.499787322262406e-06, "loss": 0.2508, "step": 15208 }, { "epoch": 2.476651874770997, "grad_norm": 0.1830592006444931, "learning_rate": 4.497076449723172e-06, "loss": 0.2489, "step": 15209 }, { "epoch": 2.476814721328828, "grad_norm": 0.13488925993442535, "learning_rate": 4.494366313300316e-06, "loss": 0.2574, "step": 15210 }, { "epoch": 2.476977567886659, "grad_norm": 0.17253530025482178, "learning_rate": 4.491656913091152e-06, "loss": 0.2707, "step": 15211 }, { "epoch": 2.4771404144444897, "grad_norm": 0.19302845001220703, "learning_rate": 4.488948249192951e-06, "loss": 0.2519, "step": 15212 }, { "epoch": 2.4773032610023207, "grad_norm": 0.18851913511753082, "learning_rate": 4.486240321702959e-06, "loss": 0.3081, "step": 15213 }, { "epoch": 2.4774661075601516, "grad_norm": 0.19420143961906433, "learning_rate": 4.483533130718395e-06, "loss": 0.2831, "step": 15214 }, { "epoch": 2.477628954117982, "grad_norm": 0.19322651624679565, "learning_rate": 4.480826676336466e-06, "loss": 0.2642, "step": 15215 }, { "epoch": 2.477791800675813, "grad_norm": 0.18018287420272827, "learning_rate": 4.478120958654338e-06, "loss": 0.2561, "step": 15216 }, { "epoch": 2.477954647233644, "grad_norm": 0.1513078212738037, "learning_rate": 4.475415977769146e-06, "loss": 0.257, "step": 15217 }, { "epoch": 2.478117493791475, "grad_norm": 0.18817193806171417, "learning_rate": 4.472711733778018e-06, "loss": 0.2494, "step": 15218 }, { "epoch": 2.478280340349306, "grad_norm": 0.13640369474887848, "learning_rate": 4.470008226778042e-06, "loss": 0.2713, "step": 15219 }, { "epoch": 2.478443186907137, "grad_norm": 0.15766416490077972, "learning_rate": 4.46730545686628e-06, "loss": 0.236, "step": 15220 }, { "epoch": 2.4786060334649678, "grad_norm": 0.16755756735801697, "learning_rate": 4.464603424139763e-06, "loss": 0.2412, "step": 15221 }, { "epoch": 2.4787688800227983, "grad_norm": 0.1653989851474762, "learning_rate": 4.461902128695514e-06, "loss": 0.2515, "step": 15222 }, { "epoch": 2.478931726580629, "grad_norm": 0.1673355996608734, "learning_rate": 4.459201570630514e-06, "loss": 0.2905, "step": 15223 }, { "epoch": 2.47909457313846, "grad_norm": 0.16474060714244843, "learning_rate": 4.456501750041719e-06, "loss": 0.2615, "step": 15224 }, { "epoch": 2.479257419696291, "grad_norm": 0.19615209102630615, "learning_rate": 4.453802667026055e-06, "loss": 0.2874, "step": 15225 }, { "epoch": 2.479420266254122, "grad_norm": 0.15562698245048523, "learning_rate": 4.451104321680438e-06, "loss": 0.2897, "step": 15226 }, { "epoch": 2.479583112811953, "grad_norm": 0.1980506032705307, "learning_rate": 4.448406714101738e-06, "loss": 0.2495, "step": 15227 }, { "epoch": 2.479745959369784, "grad_norm": 0.15812233090400696, "learning_rate": 4.445709844386816e-06, "loss": 0.2643, "step": 15228 }, { "epoch": 2.479908805927615, "grad_norm": 0.19734622538089752, "learning_rate": 4.443013712632493e-06, "loss": 0.2783, "step": 15229 }, { "epoch": 2.480071652485446, "grad_norm": 0.20162172615528107, "learning_rate": 4.4403183189355625e-06, "loss": 0.2629, "step": 15230 }, { "epoch": 2.4802344990432763, "grad_norm": 0.15155838429927826, "learning_rate": 4.4376236633928095e-06, "loss": 0.2929, "step": 15231 }, { "epoch": 2.4803973456011073, "grad_norm": 0.1371164321899414, "learning_rate": 4.434929746100966e-06, "loss": 0.2657, "step": 15232 }, { "epoch": 2.480560192158938, "grad_norm": 0.20968645811080933, "learning_rate": 4.432236567156767e-06, "loss": 0.2936, "step": 15233 }, { "epoch": 2.480723038716769, "grad_norm": 0.17387743294239044, "learning_rate": 4.429544126656895e-06, "loss": 0.3109, "step": 15234 }, { "epoch": 2.4808858852746, "grad_norm": 0.15937954187393188, "learning_rate": 4.426852424698022e-06, "loss": 0.2301, "step": 15235 }, { "epoch": 2.481048731832431, "grad_norm": 0.21112078428268433, "learning_rate": 4.42416146137678e-06, "loss": 0.2631, "step": 15236 }, { "epoch": 2.481211578390262, "grad_norm": 0.18265365064144135, "learning_rate": 4.421471236789792e-06, "loss": 0.2906, "step": 15237 }, { "epoch": 2.4813744249480925, "grad_norm": 0.19900238513946533, "learning_rate": 4.418781751033644e-06, "loss": 0.272, "step": 15238 }, { "epoch": 2.4815372715059234, "grad_norm": 0.17533573508262634, "learning_rate": 4.416093004204888e-06, "loss": 0.2591, "step": 15239 }, { "epoch": 2.4817001180637543, "grad_norm": 0.17857599258422852, "learning_rate": 4.41340499640007e-06, "loss": 0.2708, "step": 15240 }, { "epoch": 2.4818629646215853, "grad_norm": 0.1754220724105835, "learning_rate": 4.410717727715691e-06, "loss": 0.2691, "step": 15241 }, { "epoch": 2.4820258111794162, "grad_norm": 0.17783395946025848, "learning_rate": 4.4080311982482305e-06, "loss": 0.2647, "step": 15242 }, { "epoch": 2.482188657737247, "grad_norm": 0.15957510471343994, "learning_rate": 4.405345408094141e-06, "loss": 0.3106, "step": 15243 }, { "epoch": 2.482351504295078, "grad_norm": 0.17093071341514587, "learning_rate": 4.402660357349864e-06, "loss": 0.2649, "step": 15244 }, { "epoch": 2.4825143508529086, "grad_norm": 0.1874622106552124, "learning_rate": 4.399976046111787e-06, "loss": 0.2629, "step": 15245 }, { "epoch": 2.4826771974107396, "grad_norm": 0.1858845353126526, "learning_rate": 4.3972924744762915e-06, "loss": 0.2334, "step": 15246 }, { "epoch": 2.4828400439685705, "grad_norm": 0.1524101048707962, "learning_rate": 4.394609642539715e-06, "loss": 0.2554, "step": 15247 }, { "epoch": 2.4830028905264014, "grad_norm": 0.1821020543575287, "learning_rate": 4.391927550398397e-06, "loss": 0.2366, "step": 15248 }, { "epoch": 2.4831657370842324, "grad_norm": 0.15593089163303375, "learning_rate": 4.3892461981486224e-06, "loss": 0.2854, "step": 15249 }, { "epoch": 2.4833285836420633, "grad_norm": 0.16762292385101318, "learning_rate": 4.386565585886654e-06, "loss": 0.2349, "step": 15250 }, { "epoch": 2.4834914301998943, "grad_norm": 0.17160072922706604, "learning_rate": 4.383885713708746e-06, "loss": 0.2889, "step": 15251 }, { "epoch": 2.483654276757725, "grad_norm": 0.25770148634910583, "learning_rate": 4.3812065817111084e-06, "loss": 0.2886, "step": 15252 }, { "epoch": 2.483817123315556, "grad_norm": 0.1706368774175644, "learning_rate": 4.37852818998993e-06, "loss": 0.2558, "step": 15253 }, { "epoch": 2.4839799698733867, "grad_norm": 0.20688128471374512, "learning_rate": 4.375850538641368e-06, "loss": 0.2645, "step": 15254 }, { "epoch": 2.4841428164312176, "grad_norm": 0.1839320957660675, "learning_rate": 4.373173627761571e-06, "loss": 0.2804, "step": 15255 }, { "epoch": 2.4843056629890485, "grad_norm": 0.20448032021522522, "learning_rate": 4.370497457446637e-06, "loss": 0.2872, "step": 15256 }, { "epoch": 2.4844685095468795, "grad_norm": 0.18945249915122986, "learning_rate": 4.367822027792648e-06, "loss": 0.2904, "step": 15257 }, { "epoch": 2.4846313561047104, "grad_norm": 0.18744198977947235, "learning_rate": 4.36514733889567e-06, "loss": 0.2621, "step": 15258 }, { "epoch": 2.4847942026625414, "grad_norm": 0.18053312599658966, "learning_rate": 4.362473390851726e-06, "loss": 0.2514, "step": 15259 }, { "epoch": 2.4849570492203723, "grad_norm": 0.18597173690795898, "learning_rate": 4.359800183756818e-06, "loss": 0.2752, "step": 15260 }, { "epoch": 2.485119895778203, "grad_norm": 0.23467424511909485, "learning_rate": 4.357127717706918e-06, "loss": 0.2774, "step": 15261 }, { "epoch": 2.4852827423360337, "grad_norm": 0.16873499751091003, "learning_rate": 4.354455992797985e-06, "loss": 0.2537, "step": 15262 }, { "epoch": 2.4854455888938647, "grad_norm": 0.15143071115016937, "learning_rate": 4.351785009125939e-06, "loss": 0.2972, "step": 15263 }, { "epoch": 2.4856084354516956, "grad_norm": 0.186735138297081, "learning_rate": 4.349114766786669e-06, "loss": 0.2889, "step": 15264 }, { "epoch": 2.4857712820095266, "grad_norm": 0.16854961216449738, "learning_rate": 4.346445265876056e-06, "loss": 0.2509, "step": 15265 }, { "epoch": 2.4859341285673575, "grad_norm": 0.17099635303020477, "learning_rate": 4.343776506489938e-06, "loss": 0.2655, "step": 15266 }, { "epoch": 2.4860969751251885, "grad_norm": 0.16464181244373322, "learning_rate": 4.341108488724124e-06, "loss": 0.2435, "step": 15267 }, { "epoch": 2.486259821683019, "grad_norm": 0.17939749360084534, "learning_rate": 4.3384412126744185e-06, "loss": 0.2465, "step": 15268 }, { "epoch": 2.48642266824085, "grad_norm": 0.16480448842048645, "learning_rate": 4.335774678436572e-06, "loss": 0.2608, "step": 15269 }, { "epoch": 2.486585514798681, "grad_norm": 0.16120655834674835, "learning_rate": 4.333108886106329e-06, "loss": 0.2699, "step": 15270 }, { "epoch": 2.486748361356512, "grad_norm": 0.19018739461898804, "learning_rate": 4.330443835779399e-06, "loss": 0.287, "step": 15271 }, { "epoch": 2.4869112079143427, "grad_norm": 0.16734232008457184, "learning_rate": 4.327779527551457e-06, "loss": 0.2492, "step": 15272 }, { "epoch": 2.4870740544721737, "grad_norm": 0.18572014570236206, "learning_rate": 4.32511596151817e-06, "loss": 0.2546, "step": 15273 }, { "epoch": 2.4872369010300046, "grad_norm": 0.17017951607704163, "learning_rate": 4.322453137775165e-06, "loss": 0.2525, "step": 15274 }, { "epoch": 2.4873997475878356, "grad_norm": 0.13194836676120758, "learning_rate": 4.319791056418043e-06, "loss": 0.2744, "step": 15275 }, { "epoch": 2.4875625941456665, "grad_norm": 0.20144185423851013, "learning_rate": 4.317129717542376e-06, "loss": 0.2713, "step": 15276 }, { "epoch": 2.487725440703497, "grad_norm": 0.20595207810401917, "learning_rate": 4.314469121243728e-06, "loss": 0.2788, "step": 15277 }, { "epoch": 2.487888287261328, "grad_norm": 0.16369405388832092, "learning_rate": 4.311809267617611e-06, "loss": 0.2812, "step": 15278 }, { "epoch": 2.488051133819159, "grad_norm": 0.15156030654907227, "learning_rate": 4.30915015675952e-06, "loss": 0.2968, "step": 15279 }, { "epoch": 2.48821398037699, "grad_norm": 0.14025038480758667, "learning_rate": 4.306491788764936e-06, "loss": 0.2633, "step": 15280 }, { "epoch": 2.4883768269348208, "grad_norm": 0.16774161159992218, "learning_rate": 4.303834163729295e-06, "loss": 0.2459, "step": 15281 }, { "epoch": 2.4885396734926517, "grad_norm": 0.23116415739059448, "learning_rate": 4.3011772817480144e-06, "loss": 0.2581, "step": 15282 }, { "epoch": 2.4887025200504826, "grad_norm": 0.2026083767414093, "learning_rate": 4.2985211429164814e-06, "loss": 0.2674, "step": 15283 }, { "epoch": 2.488865366608313, "grad_norm": 0.1790422648191452, "learning_rate": 4.295865747330066e-06, "loss": 0.2405, "step": 15284 }, { "epoch": 2.489028213166144, "grad_norm": 0.21810272336006165, "learning_rate": 4.293211095084102e-06, "loss": 0.2722, "step": 15285 }, { "epoch": 2.489191059723975, "grad_norm": 0.2021256983280182, "learning_rate": 4.2905571862738995e-06, "loss": 0.2717, "step": 15286 }, { "epoch": 2.489353906281806, "grad_norm": 0.2018996775150299, "learning_rate": 4.287904020994735e-06, "loss": 0.2549, "step": 15287 }, { "epoch": 2.489516752839637, "grad_norm": 0.17241200804710388, "learning_rate": 4.2852515993418775e-06, "loss": 0.2837, "step": 15288 }, { "epoch": 2.489679599397468, "grad_norm": 0.16065150499343872, "learning_rate": 4.282599921410551e-06, "loss": 0.2551, "step": 15289 }, { "epoch": 2.489842445955299, "grad_norm": 0.20354795455932617, "learning_rate": 4.279948987295951e-06, "loss": 0.2502, "step": 15290 }, { "epoch": 2.4900052925131293, "grad_norm": 0.18220645189285278, "learning_rate": 4.277298797093268e-06, "loss": 0.2739, "step": 15291 }, { "epoch": 2.4901681390709602, "grad_norm": 0.1916143000125885, "learning_rate": 4.274649350897647e-06, "loss": 0.2898, "step": 15292 }, { "epoch": 2.490330985628791, "grad_norm": 0.1391797661781311, "learning_rate": 4.272000648804206e-06, "loss": 0.2585, "step": 15293 }, { "epoch": 2.490493832186622, "grad_norm": 0.15637357532978058, "learning_rate": 4.269352690908041e-06, "loss": 0.2422, "step": 15294 }, { "epoch": 2.490656678744453, "grad_norm": 0.1662655919790268, "learning_rate": 4.266705477304228e-06, "loss": 0.2808, "step": 15295 }, { "epoch": 2.490819525302284, "grad_norm": 0.14715662598609924, "learning_rate": 4.264059008087809e-06, "loss": 0.2413, "step": 15296 }, { "epoch": 2.490982371860115, "grad_norm": 0.19795642793178558, "learning_rate": 4.261413283353799e-06, "loss": 0.2457, "step": 15297 }, { "epoch": 2.4911452184179455, "grad_norm": 0.16901515424251556, "learning_rate": 4.258768303197178e-06, "loss": 0.2705, "step": 15298 }, { "epoch": 2.4913080649757764, "grad_norm": 0.19295625388622284, "learning_rate": 4.256124067712925e-06, "loss": 0.3395, "step": 15299 }, { "epoch": 2.4914709115336073, "grad_norm": 0.15673455595970154, "learning_rate": 4.253480576995969e-06, "loss": 0.2984, "step": 15300 }, { "epoch": 2.4916337580914383, "grad_norm": 0.19411903619766235, "learning_rate": 4.25083783114121e-06, "loss": 0.2893, "step": 15301 }, { "epoch": 2.491796604649269, "grad_norm": 0.1970546394586563, "learning_rate": 4.248195830243548e-06, "loss": 0.2704, "step": 15302 }, { "epoch": 2.4919594512071, "grad_norm": 0.1811898946762085, "learning_rate": 4.245554574397828e-06, "loss": 0.2867, "step": 15303 }, { "epoch": 2.492122297764931, "grad_norm": 0.1891506165266037, "learning_rate": 4.2429140636988745e-06, "loss": 0.2451, "step": 15304 }, { "epoch": 2.492285144322762, "grad_norm": 0.15361380577087402, "learning_rate": 4.240274298241503e-06, "loss": 0.3028, "step": 15305 }, { "epoch": 2.492447990880593, "grad_norm": 0.18111568689346313, "learning_rate": 4.2376352781204745e-06, "loss": 0.276, "step": 15306 }, { "epoch": 2.4926108374384235, "grad_norm": 0.17660215497016907, "learning_rate": 4.234997003430552e-06, "loss": 0.3105, "step": 15307 }, { "epoch": 2.4927736839962544, "grad_norm": 0.19955956935882568, "learning_rate": 4.232359474266451e-06, "loss": 0.2644, "step": 15308 }, { "epoch": 2.4929365305540854, "grad_norm": 0.16443927586078644, "learning_rate": 4.2297226907228624e-06, "loss": 0.3227, "step": 15309 }, { "epoch": 2.4930993771119163, "grad_norm": 0.18756480515003204, "learning_rate": 4.22708665289446e-06, "loss": 0.295, "step": 15310 }, { "epoch": 2.4932622236697473, "grad_norm": 0.23501044511795044, "learning_rate": 4.224451360875886e-06, "loss": 0.2471, "step": 15311 }, { "epoch": 2.493425070227578, "grad_norm": 0.16537824273109436, "learning_rate": 4.2218168147617505e-06, "loss": 0.2524, "step": 15312 }, { "epoch": 2.493587916785409, "grad_norm": 0.17954468727111816, "learning_rate": 4.219183014646647e-06, "loss": 0.242, "step": 15313 }, { "epoch": 2.4937507633432396, "grad_norm": 0.16518303751945496, "learning_rate": 4.216549960625135e-06, "loss": 0.2644, "step": 15314 }, { "epoch": 2.4939136099010706, "grad_norm": 0.1788705438375473, "learning_rate": 4.213917652791749e-06, "loss": 0.2446, "step": 15315 }, { "epoch": 2.4940764564589015, "grad_norm": 0.16949772834777832, "learning_rate": 4.2112860912409905e-06, "loss": 0.2861, "step": 15316 }, { "epoch": 2.4942393030167325, "grad_norm": 0.20497030019760132, "learning_rate": 4.20865527606735e-06, "loss": 0.2849, "step": 15317 }, { "epoch": 2.4944021495745634, "grad_norm": 0.1886887550354004, "learning_rate": 4.2060252073652776e-06, "loss": 0.2863, "step": 15318 }, { "epoch": 2.4945649961323944, "grad_norm": 0.18843533098697662, "learning_rate": 4.2033958852291974e-06, "loss": 0.2628, "step": 15319 }, { "epoch": 2.4947278426902253, "grad_norm": 0.1742417961359024, "learning_rate": 4.2007673097535175e-06, "loss": 0.2688, "step": 15320 }, { "epoch": 2.494890689248056, "grad_norm": 0.1815282702445984, "learning_rate": 4.198139481032607e-06, "loss": 0.2524, "step": 15321 }, { "epoch": 2.4950535358058867, "grad_norm": 0.1919362097978592, "learning_rate": 4.195512399160811e-06, "loss": 0.2684, "step": 15322 }, { "epoch": 2.4952163823637177, "grad_norm": 0.16604910790920258, "learning_rate": 4.192886064232448e-06, "loss": 0.2568, "step": 15323 }, { "epoch": 2.4953792289215486, "grad_norm": 0.17206551134586334, "learning_rate": 4.190260476341823e-06, "loss": 0.2704, "step": 15324 }, { "epoch": 2.4955420754793796, "grad_norm": 0.17590126395225525, "learning_rate": 4.1876356355831925e-06, "loss": 0.2858, "step": 15325 }, { "epoch": 2.4957049220372105, "grad_norm": 0.19801422953605652, "learning_rate": 4.185011542050799e-06, "loss": 0.3345, "step": 15326 }, { "epoch": 2.4958677685950414, "grad_norm": 0.1862652748823166, "learning_rate": 4.182388195838846e-06, "loss": 0.277, "step": 15327 }, { "epoch": 2.4960306151528724, "grad_norm": 0.20414015650749207, "learning_rate": 4.1797655970415375e-06, "loss": 0.2719, "step": 15328 }, { "epoch": 2.4961934617107033, "grad_norm": 0.13930247724056244, "learning_rate": 4.177143745753021e-06, "loss": 0.2752, "step": 15329 }, { "epoch": 2.496356308268534, "grad_norm": 0.2644590735435486, "learning_rate": 4.174522642067427e-06, "loss": 0.2789, "step": 15330 }, { "epoch": 2.4965191548263648, "grad_norm": 0.19313985109329224, "learning_rate": 4.171902286078869e-06, "loss": 0.2453, "step": 15331 }, { "epoch": 2.4966820013841957, "grad_norm": 0.1648569256067276, "learning_rate": 4.169282677881422e-06, "loss": 0.2693, "step": 15332 }, { "epoch": 2.4968448479420267, "grad_norm": 0.20010846853256226, "learning_rate": 4.166663817569139e-06, "loss": 0.2823, "step": 15333 }, { "epoch": 2.4970076944998576, "grad_norm": 0.19033968448638916, "learning_rate": 4.164045705236033e-06, "loss": 0.2642, "step": 15334 }, { "epoch": 2.4971705410576885, "grad_norm": 0.1942058801651001, "learning_rate": 4.161428340976123e-06, "loss": 0.2528, "step": 15335 }, { "epoch": 2.4973333876155195, "grad_norm": 0.1890626847743988, "learning_rate": 4.158811724883369e-06, "loss": 0.2461, "step": 15336 }, { "epoch": 2.49749623417335, "grad_norm": 0.19151657819747925, "learning_rate": 4.156195857051712e-06, "loss": 0.2187, "step": 15337 }, { "epoch": 2.497659080731181, "grad_norm": 0.16224339604377747, "learning_rate": 4.153580737575072e-06, "loss": 0.2641, "step": 15338 }, { "epoch": 2.497821927289012, "grad_norm": 0.16754062473773956, "learning_rate": 4.150966366547343e-06, "loss": 0.2668, "step": 15339 }, { "epoch": 2.497984773846843, "grad_norm": 0.17669671773910522, "learning_rate": 4.1483527440623895e-06, "loss": 0.2644, "step": 15340 }, { "epoch": 2.4981476204046738, "grad_norm": 0.13765092194080353, "learning_rate": 4.145739870214041e-06, "loss": 0.2639, "step": 15341 }, { "epoch": 2.4983104669625047, "grad_norm": 0.16811995208263397, "learning_rate": 4.143127745096115e-06, "loss": 0.291, "step": 15342 }, { "epoch": 2.4984733135203356, "grad_norm": 0.22389933466911316, "learning_rate": 4.140516368802394e-06, "loss": 0.2798, "step": 15343 }, { "epoch": 2.498636160078166, "grad_norm": 0.2098999172449112, "learning_rate": 4.137905741426623e-06, "loss": 0.2844, "step": 15344 }, { "epoch": 2.498799006635997, "grad_norm": 0.1657029092311859, "learning_rate": 4.135295863062549e-06, "loss": 0.2655, "step": 15345 }, { "epoch": 2.498961853193828, "grad_norm": 0.17621466517448425, "learning_rate": 4.132686733803862e-06, "loss": 0.2846, "step": 15346 }, { "epoch": 2.499124699751659, "grad_norm": 0.16865792870521545, "learning_rate": 4.130078353744243e-06, "loss": 0.2666, "step": 15347 }, { "epoch": 2.49928754630949, "grad_norm": 0.1924932301044464, "learning_rate": 4.1274707229773415e-06, "loss": 0.2656, "step": 15348 }, { "epoch": 2.499450392867321, "grad_norm": 0.14343702793121338, "learning_rate": 4.124863841596768e-06, "loss": 0.2496, "step": 15349 }, { "epoch": 2.499613239425152, "grad_norm": 0.1930767148733139, "learning_rate": 4.122257709696137e-06, "loss": 0.3043, "step": 15350 }, { "epoch": 2.4997760859829823, "grad_norm": 0.16260270774364471, "learning_rate": 4.119652327369003e-06, "loss": 0.2625, "step": 15351 }, { "epoch": 2.4999389325408132, "grad_norm": 0.12194745987653732, "learning_rate": 4.117047694708903e-06, "loss": 0.2508, "step": 15352 }, { "epoch": 2.500101779098644, "grad_norm": 0.15204766392707825, "learning_rate": 4.114443811809363e-06, "loss": 0.2277, "step": 15353 }, { "epoch": 2.500264625656475, "grad_norm": 0.19223731756210327, "learning_rate": 4.1118406787638676e-06, "loss": 0.2331, "step": 15354 }, { "epoch": 2.500427472214306, "grad_norm": 0.1554867923259735, "learning_rate": 4.109238295665874e-06, "loss": 0.2566, "step": 15355 }, { "epoch": 2.500590318772137, "grad_norm": 0.19927260279655457, "learning_rate": 4.106636662608807e-06, "loss": 0.2516, "step": 15356 }, { "epoch": 2.500753165329968, "grad_norm": 0.173194020986557, "learning_rate": 4.104035779686091e-06, "loss": 0.2606, "step": 15357 }, { "epoch": 2.500916011887799, "grad_norm": 0.1626611202955246, "learning_rate": 4.101435646991095e-06, "loss": 0.2706, "step": 15358 }, { "epoch": 2.50107885844563, "grad_norm": 0.17900584638118744, "learning_rate": 4.0988362646171676e-06, "loss": 0.2933, "step": 15359 }, { "epoch": 2.5012417050034603, "grad_norm": 0.20751403272151947, "learning_rate": 4.096237632657646e-06, "loss": 0.263, "step": 15360 }, { "epoch": 2.5014045515612913, "grad_norm": 0.19612906873226166, "learning_rate": 4.093639751205821e-06, "loss": 0.2481, "step": 15361 }, { "epoch": 2.501567398119122, "grad_norm": 0.18838712573051453, "learning_rate": 4.091042620354971e-06, "loss": 0.2738, "step": 15362 }, { "epoch": 2.501730244676953, "grad_norm": 0.14800399541854858, "learning_rate": 4.088446240198324e-06, "loss": 0.2553, "step": 15363 }, { "epoch": 2.501893091234784, "grad_norm": 0.18421900272369385, "learning_rate": 4.08585061082912e-06, "loss": 0.252, "step": 15364 }, { "epoch": 2.502055937792615, "grad_norm": 0.158122256398201, "learning_rate": 4.083255732340538e-06, "loss": 0.2648, "step": 15365 }, { "epoch": 2.502218784350446, "grad_norm": 0.18243588507175446, "learning_rate": 4.080661604825742e-06, "loss": 0.2717, "step": 15366 }, { "epoch": 2.5023816309082765, "grad_norm": 0.14702460169792175, "learning_rate": 4.078068228377865e-06, "loss": 0.2243, "step": 15367 }, { "epoch": 2.5025444774661074, "grad_norm": 0.15542347729206085, "learning_rate": 4.0754756030900285e-06, "loss": 0.2442, "step": 15368 }, { "epoch": 2.5027073240239384, "grad_norm": 0.15149453282356262, "learning_rate": 4.07288372905531e-06, "loss": 0.2479, "step": 15369 }, { "epoch": 2.5028701705817693, "grad_norm": 0.16905823349952698, "learning_rate": 4.0702926063667605e-06, "loss": 0.253, "step": 15370 }, { "epoch": 2.5030330171396002, "grad_norm": 0.19476212561130524, "learning_rate": 4.0677022351174155e-06, "loss": 0.2778, "step": 15371 }, { "epoch": 2.503195863697431, "grad_norm": 0.172125443816185, "learning_rate": 4.06511261540028e-06, "loss": 0.2669, "step": 15372 }, { "epoch": 2.503358710255262, "grad_norm": 0.19494713842868805, "learning_rate": 4.0625237473083195e-06, "loss": 0.2549, "step": 15373 }, { "epoch": 2.5035215568130926, "grad_norm": 0.16736440360546112, "learning_rate": 4.059935630934483e-06, "loss": 0.2574, "step": 15374 }, { "epoch": 2.503684403370924, "grad_norm": 0.1603940725326538, "learning_rate": 4.057348266371702e-06, "loss": 0.2755, "step": 15375 }, { "epoch": 2.5038472499287545, "grad_norm": 0.18625517189502716, "learning_rate": 4.054761653712863e-06, "loss": 0.2613, "step": 15376 }, { "epoch": 2.5040100964865855, "grad_norm": 0.23680205643177032, "learning_rate": 4.052175793050833e-06, "loss": 0.2391, "step": 15377 }, { "epoch": 2.5041729430444164, "grad_norm": 0.23865553736686707, "learning_rate": 4.049590684478449e-06, "loss": 0.3041, "step": 15378 }, { "epoch": 2.5043357896022473, "grad_norm": 0.1520160734653473, "learning_rate": 4.047006328088535e-06, "loss": 0.2556, "step": 15379 }, { "epoch": 2.5044986361600783, "grad_norm": 0.16304728388786316, "learning_rate": 4.04442272397387e-06, "loss": 0.2541, "step": 15380 }, { "epoch": 2.504661482717909, "grad_norm": 0.18684130907058716, "learning_rate": 4.04183987222721e-06, "loss": 0.2445, "step": 15381 }, { "epoch": 2.50482432927574, "grad_norm": 0.21156972646713257, "learning_rate": 4.0392577729412935e-06, "loss": 0.2979, "step": 15382 }, { "epoch": 2.5049871758335707, "grad_norm": 0.17756043374538422, "learning_rate": 4.036676426208819e-06, "loss": 0.2695, "step": 15383 }, { "epoch": 2.5051500223914016, "grad_norm": 0.1591484695672989, "learning_rate": 4.034095832122473e-06, "loss": 0.2384, "step": 15384 }, { "epoch": 2.5053128689492326, "grad_norm": 0.17760570347309113, "learning_rate": 4.031515990774903e-06, "loss": 0.2598, "step": 15385 }, { "epoch": 2.5054757155070635, "grad_norm": 0.14285631477832794, "learning_rate": 4.028936902258726e-06, "loss": 0.2716, "step": 15386 }, { "epoch": 2.5056385620648944, "grad_norm": 0.19043609499931335, "learning_rate": 4.026358566666552e-06, "loss": 0.2747, "step": 15387 }, { "epoch": 2.5058014086227254, "grad_norm": 0.19356472790241241, "learning_rate": 4.023780984090944e-06, "loss": 0.3006, "step": 15388 }, { "epoch": 2.5059642551805563, "grad_norm": 0.17629064619541168, "learning_rate": 4.021204154624439e-06, "loss": 0.2636, "step": 15389 }, { "epoch": 2.506127101738387, "grad_norm": 0.20151887834072113, "learning_rate": 4.018628078359565e-06, "loss": 0.2848, "step": 15390 }, { "epoch": 2.5062899482962178, "grad_norm": 0.21566760540008545, "learning_rate": 4.016052755388805e-06, "loss": 0.2612, "step": 15391 }, { "epoch": 2.5064527948540487, "grad_norm": 0.18463446199893951, "learning_rate": 4.013478185804614e-06, "loss": 0.2925, "step": 15392 }, { "epoch": 2.5066156414118796, "grad_norm": 0.1502283662557602, "learning_rate": 4.010904369699439e-06, "loss": 0.2571, "step": 15393 }, { "epoch": 2.5067784879697106, "grad_norm": 0.20971205830574036, "learning_rate": 4.0083313071656834e-06, "loss": 0.2816, "step": 15394 }, { "epoch": 2.5069413345275415, "grad_norm": 0.19992566108703613, "learning_rate": 4.005758998295725e-06, "loss": 0.3149, "step": 15395 }, { "epoch": 2.5071041810853725, "grad_norm": 0.258852481842041, "learning_rate": 4.003187443181916e-06, "loss": 0.2925, "step": 15396 }, { "epoch": 2.507267027643203, "grad_norm": 0.14891812205314636, "learning_rate": 4.000616641916588e-06, "loss": 0.254, "step": 15397 }, { "epoch": 2.507429874201034, "grad_norm": 0.1382422149181366, "learning_rate": 3.998046594592042e-06, "loss": 0.2868, "step": 15398 }, { "epoch": 2.507592720758865, "grad_norm": 0.20650005340576172, "learning_rate": 3.995477301300543e-06, "loss": 0.2931, "step": 15399 }, { "epoch": 2.507755567316696, "grad_norm": 0.15783487260341644, "learning_rate": 3.992908762134337e-06, "loss": 0.2482, "step": 15400 }, { "epoch": 2.5079184138745267, "grad_norm": 0.1556854546070099, "learning_rate": 3.99034097718565e-06, "loss": 0.276, "step": 15401 }, { "epoch": 2.5080812604323577, "grad_norm": 0.18409323692321777, "learning_rate": 3.987773946546669e-06, "loss": 0.2483, "step": 15402 }, { "epoch": 2.5082441069901886, "grad_norm": 0.16306892037391663, "learning_rate": 3.985207670309555e-06, "loss": 0.294, "step": 15403 }, { "epoch": 2.508406953548019, "grad_norm": 0.15725044906139374, "learning_rate": 3.982642148566448e-06, "loss": 0.3264, "step": 15404 }, { "epoch": 2.5085698001058505, "grad_norm": 0.18649552762508392, "learning_rate": 3.980077381409461e-06, "loss": 0.2861, "step": 15405 }, { "epoch": 2.508732646663681, "grad_norm": 0.22505220770835876, "learning_rate": 3.977513368930671e-06, "loss": 0.2687, "step": 15406 }, { "epoch": 2.508895493221512, "grad_norm": 0.17069190740585327, "learning_rate": 3.974950111222131e-06, "loss": 0.2475, "step": 15407 }, { "epoch": 2.509058339779343, "grad_norm": 0.15755774080753326, "learning_rate": 3.972387608375883e-06, "loss": 0.2985, "step": 15408 }, { "epoch": 2.509221186337174, "grad_norm": 0.19411331415176392, "learning_rate": 3.969825860483917e-06, "loss": 0.2613, "step": 15409 }, { "epoch": 2.509384032895005, "grad_norm": 0.20570030808448792, "learning_rate": 3.967264867638207e-06, "loss": 0.2589, "step": 15410 }, { "epoch": 2.5095468794528357, "grad_norm": 0.1622946560382843, "learning_rate": 3.964704629930707e-06, "loss": 0.2908, "step": 15411 }, { "epoch": 2.5097097260106667, "grad_norm": 0.19110694527626038, "learning_rate": 3.962145147453339e-06, "loss": 0.2425, "step": 15412 }, { "epoch": 2.509872572568497, "grad_norm": 0.16773834824562073, "learning_rate": 3.959586420297987e-06, "loss": 0.3045, "step": 15413 }, { "epoch": 2.510035419126328, "grad_norm": 0.1633695363998413, "learning_rate": 3.957028448556519e-06, "loss": 0.2883, "step": 15414 }, { "epoch": 2.510198265684159, "grad_norm": 0.21641592681407928, "learning_rate": 3.954471232320778e-06, "loss": 0.2852, "step": 15415 }, { "epoch": 2.51036111224199, "grad_norm": 0.15671394765377045, "learning_rate": 3.951914771682577e-06, "loss": 0.2691, "step": 15416 }, { "epoch": 2.510523958799821, "grad_norm": 0.2389814555644989, "learning_rate": 3.949359066733696e-06, "loss": 0.2917, "step": 15417 }, { "epoch": 2.510686805357652, "grad_norm": 0.1860068440437317, "learning_rate": 3.946804117565889e-06, "loss": 0.2649, "step": 15418 }, { "epoch": 2.510849651915483, "grad_norm": 0.174067422747612, "learning_rate": 3.944249924270896e-06, "loss": 0.2736, "step": 15419 }, { "epoch": 2.5110124984733133, "grad_norm": 0.2078111171722412, "learning_rate": 3.941696486940407e-06, "loss": 0.2835, "step": 15420 }, { "epoch": 2.5111753450311443, "grad_norm": 0.1702081859111786, "learning_rate": 3.939143805666118e-06, "loss": 0.2774, "step": 15421 }, { "epoch": 2.511338191588975, "grad_norm": 0.14621847867965698, "learning_rate": 3.936591880539664e-06, "loss": 0.2466, "step": 15422 }, { "epoch": 2.511501038146806, "grad_norm": 0.21747367084026337, "learning_rate": 3.934040711652662e-06, "loss": 0.2511, "step": 15423 }, { "epoch": 2.511663884704637, "grad_norm": 0.16958533227443695, "learning_rate": 3.931490299096721e-06, "loss": 0.2276, "step": 15424 }, { "epoch": 2.511826731262468, "grad_norm": 0.1781109869480133, "learning_rate": 3.928940642963394e-06, "loss": 0.2704, "step": 15425 }, { "epoch": 2.511989577820299, "grad_norm": 0.16866637766361237, "learning_rate": 3.926391743344235e-06, "loss": 0.2536, "step": 15426 }, { "epoch": 2.5121524243781295, "grad_norm": 0.1589614748954773, "learning_rate": 3.923843600330751e-06, "loss": 0.2768, "step": 15427 }, { "epoch": 2.512315270935961, "grad_norm": 0.18512900173664093, "learning_rate": 3.921296214014428e-06, "loss": 0.2738, "step": 15428 }, { "epoch": 2.5124781174937914, "grad_norm": 0.19164958596229553, "learning_rate": 3.918749584486717e-06, "loss": 0.265, "step": 15429 }, { "epoch": 2.5126409640516223, "grad_norm": 0.1635199338197708, "learning_rate": 3.916203711839064e-06, "loss": 0.2623, "step": 15430 }, { "epoch": 2.5128038106094532, "grad_norm": 0.16675399243831635, "learning_rate": 3.913658596162867e-06, "loss": 0.2459, "step": 15431 }, { "epoch": 2.512966657167284, "grad_norm": 0.22005240619182587, "learning_rate": 3.911114237549496e-06, "loss": 0.2676, "step": 15432 }, { "epoch": 2.513129503725115, "grad_norm": 0.16223272681236267, "learning_rate": 3.908570636090314e-06, "loss": 0.2394, "step": 15433 }, { "epoch": 2.5132923502829456, "grad_norm": 0.21536193788051605, "learning_rate": 3.906027791876638e-06, "loss": 0.2564, "step": 15434 }, { "epoch": 2.513455196840777, "grad_norm": 0.14285556972026825, "learning_rate": 3.903485704999762e-06, "loss": 0.2857, "step": 15435 }, { "epoch": 2.5136180433986075, "grad_norm": 0.1992861032485962, "learning_rate": 3.9009443755509525e-06, "loss": 0.2869, "step": 15436 }, { "epoch": 2.5137808899564384, "grad_norm": 0.1485525369644165, "learning_rate": 3.898403803621459e-06, "loss": 0.2835, "step": 15437 }, { "epoch": 2.5139437365142694, "grad_norm": 0.14943447709083557, "learning_rate": 3.895863989302492e-06, "loss": 0.2642, "step": 15438 }, { "epoch": 2.5141065830721003, "grad_norm": 0.18994230031967163, "learning_rate": 3.893324932685236e-06, "loss": 0.252, "step": 15439 }, { "epoch": 2.5142694296299313, "grad_norm": 0.15644660592079163, "learning_rate": 3.8907866338608466e-06, "loss": 0.2604, "step": 15440 }, { "epoch": 2.514432276187762, "grad_norm": 0.18543341755867004, "learning_rate": 3.888249092920468e-06, "loss": 0.2493, "step": 15441 }, { "epoch": 2.514595122745593, "grad_norm": 0.19683945178985596, "learning_rate": 3.8857123099552e-06, "loss": 0.2324, "step": 15442 }, { "epoch": 2.5147579693034237, "grad_norm": 0.22050629556179047, "learning_rate": 3.883176285056114e-06, "loss": 0.2904, "step": 15443 }, { "epoch": 2.5149208158612546, "grad_norm": 0.16258259117603302, "learning_rate": 3.880641018314274e-06, "loss": 0.2835, "step": 15444 }, { "epoch": 2.5150836624190855, "grad_norm": 0.18034544587135315, "learning_rate": 3.8781065098206944e-06, "loss": 0.2332, "step": 15445 }, { "epoch": 2.5152465089769165, "grad_norm": 0.227676659822464, "learning_rate": 3.875572759666374e-06, "loss": 0.268, "step": 15446 }, { "epoch": 2.5154093555347474, "grad_norm": 0.19698701798915863, "learning_rate": 3.873039767942277e-06, "loss": 0.2904, "step": 15447 }, { "epoch": 2.5155722020925784, "grad_norm": 0.19771555066108704, "learning_rate": 3.8705075347393565e-06, "loss": 0.2784, "step": 15448 }, { "epoch": 2.5157350486504093, "grad_norm": 0.18339323997497559, "learning_rate": 3.867976060148523e-06, "loss": 0.2622, "step": 15449 }, { "epoch": 2.51589789520824, "grad_norm": 0.14780156314373016, "learning_rate": 3.86544534426066e-06, "loss": 0.2433, "step": 15450 }, { "epoch": 2.516060741766071, "grad_norm": 0.16645880043506622, "learning_rate": 3.862915387166624e-06, "loss": 0.2765, "step": 15451 }, { "epoch": 2.5162235883239017, "grad_norm": 0.17210863530635834, "learning_rate": 3.8603861889572605e-06, "loss": 0.2589, "step": 15452 }, { "epoch": 2.5163864348817326, "grad_norm": 0.17679733037948608, "learning_rate": 3.857857749723368e-06, "loss": 0.28, "step": 15453 }, { "epoch": 2.5165492814395636, "grad_norm": 0.16623607277870178, "learning_rate": 3.855330069555721e-06, "loss": 0.2756, "step": 15454 }, { "epoch": 2.5167121279973945, "grad_norm": 0.23683899641036987, "learning_rate": 3.852803148545081e-06, "loss": 0.2796, "step": 15455 }, { "epoch": 2.5168749745552255, "grad_norm": 0.181498184800148, "learning_rate": 3.850276986782167e-06, "loss": 0.2736, "step": 15456 }, { "epoch": 2.517037821113056, "grad_norm": 0.22416596114635468, "learning_rate": 3.847751584357675e-06, "loss": 0.2721, "step": 15457 }, { "epoch": 2.5172006676708873, "grad_norm": 0.1806727796792984, "learning_rate": 3.84522694136227e-06, "loss": 0.2655, "step": 15458 }, { "epoch": 2.517363514228718, "grad_norm": 0.1755143254995346, "learning_rate": 3.842703057886604e-06, "loss": 0.2737, "step": 15459 }, { "epoch": 2.517526360786549, "grad_norm": 0.19475778937339783, "learning_rate": 3.840179934021282e-06, "loss": 0.2456, "step": 15460 }, { "epoch": 2.5176892073443797, "grad_norm": 0.21871790289878845, "learning_rate": 3.8376575698569e-06, "loss": 0.2619, "step": 15461 }, { "epoch": 2.5178520539022107, "grad_norm": 0.21965017914772034, "learning_rate": 3.835135965484018e-06, "loss": 0.2685, "step": 15462 }, { "epoch": 2.5180149004600416, "grad_norm": 0.16380448639392853, "learning_rate": 3.832615120993161e-06, "loss": 0.2659, "step": 15463 }, { "epoch": 2.5181777470178726, "grad_norm": 0.18549248576164246, "learning_rate": 3.830095036474846e-06, "loss": 0.2213, "step": 15464 }, { "epoch": 2.5183405935757035, "grad_norm": 0.189262256026268, "learning_rate": 3.827575712019538e-06, "loss": 0.2454, "step": 15465 }, { "epoch": 2.518503440133534, "grad_norm": 0.145707368850708, "learning_rate": 3.825057147717703e-06, "loss": 0.2441, "step": 15466 }, { "epoch": 2.518666286691365, "grad_norm": 0.2328348457813263, "learning_rate": 3.8225393436597585e-06, "loss": 0.2445, "step": 15467 }, { "epoch": 2.518829133249196, "grad_norm": 0.17871081829071045, "learning_rate": 3.820022299936102e-06, "loss": 0.2616, "step": 15468 }, { "epoch": 2.518991979807027, "grad_norm": 0.20025698840618134, "learning_rate": 3.817506016637096e-06, "loss": 0.2803, "step": 15469 }, { "epoch": 2.5191548263648578, "grad_norm": 0.1581353098154068, "learning_rate": 3.8149904938530935e-06, "loss": 0.2911, "step": 15470 }, { "epoch": 2.5193176729226887, "grad_norm": 0.16302785277366638, "learning_rate": 3.812475731674403e-06, "loss": 0.2411, "step": 15471 }, { "epoch": 2.5194805194805197, "grad_norm": 0.1401035636663437, "learning_rate": 3.809961730191311e-06, "loss": 0.2627, "step": 15472 }, { "epoch": 2.51964336603835, "grad_norm": 0.16916443407535553, "learning_rate": 3.807448489494081e-06, "loss": 0.2849, "step": 15473 }, { "epoch": 2.519806212596181, "grad_norm": 0.22414745390415192, "learning_rate": 3.8049360096729493e-06, "loss": 0.2887, "step": 15474 }, { "epoch": 2.519969059154012, "grad_norm": 0.17255663871765137, "learning_rate": 3.8024242908181127e-06, "loss": 0.2562, "step": 15475 }, { "epoch": 2.520131905711843, "grad_norm": 0.18377237021923065, "learning_rate": 3.79991333301975e-06, "loss": 0.2961, "step": 15476 }, { "epoch": 2.520294752269674, "grad_norm": 0.1787051260471344, "learning_rate": 3.7974031363680216e-06, "loss": 0.2841, "step": 15477 }, { "epoch": 2.520457598827505, "grad_norm": 0.16409701108932495, "learning_rate": 3.794893700953048e-06, "loss": 0.2342, "step": 15478 }, { "epoch": 2.520620445385336, "grad_norm": 0.18673570454120636, "learning_rate": 3.7923850268649197e-06, "loss": 0.2891, "step": 15479 }, { "epoch": 2.5207832919431663, "grad_norm": 0.317445307970047, "learning_rate": 3.7898771141937017e-06, "loss": 0.2834, "step": 15480 }, { "epoch": 2.5209461385009977, "grad_norm": 0.19886468350887299, "learning_rate": 3.7873699630294496e-06, "loss": 0.2947, "step": 15481 }, { "epoch": 2.521108985058828, "grad_norm": 0.1274564415216446, "learning_rate": 3.784863573462172e-06, "loss": 0.253, "step": 15482 }, { "epoch": 2.521271831616659, "grad_norm": 0.20435038208961487, "learning_rate": 3.782357945581846e-06, "loss": 0.2374, "step": 15483 }, { "epoch": 2.52143467817449, "grad_norm": 0.1446809619665146, "learning_rate": 3.779853079478449e-06, "loss": 0.2976, "step": 15484 }, { "epoch": 2.521597524732321, "grad_norm": 0.20020370185375214, "learning_rate": 3.7773489752418983e-06, "loss": 0.3099, "step": 15485 }, { "epoch": 2.521760371290152, "grad_norm": 0.19114179909229279, "learning_rate": 3.7748456329621098e-06, "loss": 0.2848, "step": 15486 }, { "epoch": 2.521923217847983, "grad_norm": 0.19765648245811462, "learning_rate": 3.7723430527289466e-06, "loss": 0.3032, "step": 15487 }, { "epoch": 2.522086064405814, "grad_norm": 0.16942740976810455, "learning_rate": 3.769841234632274e-06, "loss": 0.2595, "step": 15488 }, { "epoch": 2.5222489109636443, "grad_norm": 0.19340243935585022, "learning_rate": 3.767340178761908e-06, "loss": 0.3031, "step": 15489 }, { "epoch": 2.5224117575214753, "grad_norm": 0.22351832687854767, "learning_rate": 3.764839885207644e-06, "loss": 0.264, "step": 15490 }, { "epoch": 2.5225746040793062, "grad_norm": 0.15537787973880768, "learning_rate": 3.762340354059243e-06, "loss": 0.2896, "step": 15491 }, { "epoch": 2.522737450637137, "grad_norm": 0.19846002757549286, "learning_rate": 3.7598415854064623e-06, "loss": 0.238, "step": 15492 }, { "epoch": 2.522900297194968, "grad_norm": 0.19184353947639465, "learning_rate": 3.757343579339004e-06, "loss": 0.2246, "step": 15493 }, { "epoch": 2.523063143752799, "grad_norm": 0.20609554648399353, "learning_rate": 3.7548463359465492e-06, "loss": 0.2796, "step": 15494 }, { "epoch": 2.52322599031063, "grad_norm": 0.16616007685661316, "learning_rate": 3.752349855318771e-06, "loss": 0.2896, "step": 15495 }, { "epoch": 2.5233888368684605, "grad_norm": 0.2165166735649109, "learning_rate": 3.7498541375452917e-06, "loss": 0.2791, "step": 15496 }, { "epoch": 2.5235516834262914, "grad_norm": 0.18894994258880615, "learning_rate": 3.7473591827157097e-06, "loss": 0.2406, "step": 15497 }, { "epoch": 2.5237145299841224, "grad_norm": 0.22688961029052734, "learning_rate": 3.7448649909196137e-06, "loss": 0.2621, "step": 15498 }, { "epoch": 2.5238773765419533, "grad_norm": 0.1634511649608612, "learning_rate": 3.7423715622465466e-06, "loss": 0.2658, "step": 15499 }, { "epoch": 2.5240402230997843, "grad_norm": 0.1429157555103302, "learning_rate": 3.7398788967860255e-06, "loss": 0.2508, "step": 15500 }, { "epoch": 2.524203069657615, "grad_norm": 0.171784907579422, "learning_rate": 3.7373869946275536e-06, "loss": 0.2539, "step": 15501 }, { "epoch": 2.524365916215446, "grad_norm": 0.17931431531906128, "learning_rate": 3.7348958558605875e-06, "loss": 0.2465, "step": 15502 }, { "epoch": 2.5245287627732766, "grad_norm": 0.1983090192079544, "learning_rate": 3.732405480574577e-06, "loss": 0.2874, "step": 15503 }, { "epoch": 2.524691609331108, "grad_norm": 0.20581714808940887, "learning_rate": 3.7299158688589296e-06, "loss": 0.2935, "step": 15504 }, { "epoch": 2.5248544558889385, "grad_norm": 0.2189672738313675, "learning_rate": 3.727427020803026e-06, "loss": 0.301, "step": 15505 }, { "epoch": 2.5250173024467695, "grad_norm": 0.18307188153266907, "learning_rate": 3.7249389364962308e-06, "loss": 0.2841, "step": 15506 }, { "epoch": 2.5251801490046004, "grad_norm": 0.13198579847812653, "learning_rate": 3.7224516160278693e-06, "loss": 0.2752, "step": 15507 }, { "epoch": 2.5253429955624314, "grad_norm": 0.17349834740161896, "learning_rate": 3.7199650594872434e-06, "loss": 0.2483, "step": 15508 }, { "epoch": 2.5255058421202623, "grad_norm": 0.1567748486995697, "learning_rate": 3.7174792669636223e-06, "loss": 0.2593, "step": 15509 }, { "epoch": 2.525668688678093, "grad_norm": 0.1484590470790863, "learning_rate": 3.714994238546268e-06, "loss": 0.2587, "step": 15510 }, { "epoch": 2.525831535235924, "grad_norm": 0.18261820077896118, "learning_rate": 3.7125099743243907e-06, "loss": 0.2944, "step": 15511 }, { "epoch": 2.5259943817937547, "grad_norm": 0.17427048087120056, "learning_rate": 3.7100264743871764e-06, "loss": 0.2441, "step": 15512 }, { "epoch": 2.5261572283515856, "grad_norm": 0.15930268168449402, "learning_rate": 3.707543738823807e-06, "loss": 0.2335, "step": 15513 }, { "epoch": 2.5263200749094166, "grad_norm": 0.1914900243282318, "learning_rate": 3.7050617677234113e-06, "loss": 0.297, "step": 15514 }, { "epoch": 2.5264829214672475, "grad_norm": 0.1385311633348465, "learning_rate": 3.702580561175098e-06, "loss": 0.2664, "step": 15515 }, { "epoch": 2.5266457680250785, "grad_norm": 0.15467482805252075, "learning_rate": 3.7001001192679462e-06, "loss": 0.2446, "step": 15516 }, { "epoch": 2.5268086145829094, "grad_norm": 0.1555618941783905, "learning_rate": 3.697620442091021e-06, "loss": 0.2684, "step": 15517 }, { "epoch": 2.5269714611407403, "grad_norm": 0.1993245631456375, "learning_rate": 3.6951415297333452e-06, "loss": 0.285, "step": 15518 }, { "epoch": 2.527134307698571, "grad_norm": 0.1701667606830597, "learning_rate": 3.692663382283923e-06, "loss": 0.2592, "step": 15519 }, { "epoch": 2.527297154256402, "grad_norm": 0.1654595285654068, "learning_rate": 3.6901859998317135e-06, "loss": 0.2663, "step": 15520 }, { "epoch": 2.5274600008142327, "grad_norm": 0.16630613803863525, "learning_rate": 3.6877093824656816e-06, "loss": 0.2981, "step": 15521 }, { "epoch": 2.5276228473720637, "grad_norm": 0.21685218811035156, "learning_rate": 3.6852335302747345e-06, "loss": 0.2709, "step": 15522 }, { "epoch": 2.5277856939298946, "grad_norm": 0.19238343834877014, "learning_rate": 3.6827584433477615e-06, "loss": 0.2639, "step": 15523 }, { "epoch": 2.5279485404877255, "grad_norm": 0.1832979917526245, "learning_rate": 3.6802841217736304e-06, "loss": 0.3221, "step": 15524 }, { "epoch": 2.5281113870455565, "grad_norm": 0.18704542517662048, "learning_rate": 3.6778105656411767e-06, "loss": 0.2702, "step": 15525 }, { "epoch": 2.528274233603387, "grad_norm": 0.184188112616539, "learning_rate": 3.675337775039206e-06, "loss": 0.3098, "step": 15526 }, { "epoch": 2.528437080161218, "grad_norm": 0.16585026681423187, "learning_rate": 3.672865750056495e-06, "loss": 0.2588, "step": 15527 }, { "epoch": 2.528599926719049, "grad_norm": 0.14206238090991974, "learning_rate": 3.670394490781806e-06, "loss": 0.2682, "step": 15528 }, { "epoch": 2.52876277327688, "grad_norm": 0.16600801050662994, "learning_rate": 3.6679239973038627e-06, "loss": 0.2735, "step": 15529 }, { "epoch": 2.5289256198347108, "grad_norm": 0.1894616037607193, "learning_rate": 3.6654542697113577e-06, "loss": 0.2879, "step": 15530 }, { "epoch": 2.5290884663925417, "grad_norm": 0.13589952886104584, "learning_rate": 3.662985308092959e-06, "loss": 0.248, "step": 15531 }, { "epoch": 2.5292513129503726, "grad_norm": 0.19840382039546967, "learning_rate": 3.6605171125373206e-06, "loss": 0.291, "step": 15532 }, { "epoch": 2.529414159508203, "grad_norm": 0.2001098096370697, "learning_rate": 3.6580496831330522e-06, "loss": 0.2687, "step": 15533 }, { "epoch": 2.5295770060660345, "grad_norm": 0.18495260179042816, "learning_rate": 3.655583019968739e-06, "loss": 0.2628, "step": 15534 }, { "epoch": 2.529739852623865, "grad_norm": 0.18928328156471252, "learning_rate": 3.6531171231329488e-06, "loss": 0.2887, "step": 15535 }, { "epoch": 2.529902699181696, "grad_norm": 0.17934271693229675, "learning_rate": 3.650651992714213e-06, "loss": 0.2526, "step": 15536 }, { "epoch": 2.530065545739527, "grad_norm": 0.20014409720897675, "learning_rate": 3.648187628801025e-06, "loss": 0.2554, "step": 15537 }, { "epoch": 2.530228392297358, "grad_norm": 0.1824413537979126, "learning_rate": 3.645724031481884e-06, "loss": 0.3073, "step": 15538 }, { "epoch": 2.530391238855189, "grad_norm": 0.19157132506370544, "learning_rate": 3.643261200845219e-06, "loss": 0.3051, "step": 15539 }, { "epoch": 2.5305540854130197, "grad_norm": 0.22044235467910767, "learning_rate": 3.6407991369794727e-06, "loss": 0.2642, "step": 15540 }, { "epoch": 2.5307169319708507, "grad_norm": 0.12989160418510437, "learning_rate": 3.63833783997303e-06, "loss": 0.254, "step": 15541 }, { "epoch": 2.530879778528681, "grad_norm": 0.22683461010456085, "learning_rate": 3.635877309914254e-06, "loss": 0.2815, "step": 15542 }, { "epoch": 2.531042625086512, "grad_norm": 0.183083638548851, "learning_rate": 3.633417546891496e-06, "loss": 0.2689, "step": 15543 }, { "epoch": 2.531205471644343, "grad_norm": 0.18924814462661743, "learning_rate": 3.6309585509930634e-06, "loss": 0.2592, "step": 15544 }, { "epoch": 2.531368318202174, "grad_norm": 0.21822988986968994, "learning_rate": 3.6285003223072377e-06, "loss": 0.2863, "step": 15545 }, { "epoch": 2.531531164760005, "grad_norm": 0.16140112280845642, "learning_rate": 3.626042860922288e-06, "loss": 0.282, "step": 15546 }, { "epoch": 2.531694011317836, "grad_norm": 0.16592788696289062, "learning_rate": 3.6235861669264375e-06, "loss": 0.2809, "step": 15547 }, { "epoch": 2.531856857875667, "grad_norm": 0.19217737019062042, "learning_rate": 3.6211302404078882e-06, "loss": 0.297, "step": 15548 }, { "epoch": 2.5320197044334973, "grad_norm": 0.1594468206167221, "learning_rate": 3.6186750814548117e-06, "loss": 0.2627, "step": 15549 }, { "epoch": 2.5321825509913283, "grad_norm": 0.17622148990631104, "learning_rate": 3.616220690155364e-06, "loss": 0.2629, "step": 15550 }, { "epoch": 2.532345397549159, "grad_norm": 0.20104099810123444, "learning_rate": 3.613767066597662e-06, "loss": 0.2926, "step": 15551 }, { "epoch": 2.53250824410699, "grad_norm": 0.17493382096290588, "learning_rate": 3.6113142108697954e-06, "loss": 0.2287, "step": 15552 }, { "epoch": 2.532671090664821, "grad_norm": 0.19044984877109528, "learning_rate": 3.6088621230598253e-06, "loss": 0.2653, "step": 15553 }, { "epoch": 2.532833937222652, "grad_norm": 0.14137400686740875, "learning_rate": 3.6064108032558025e-06, "loss": 0.2819, "step": 15554 }, { "epoch": 2.532996783780483, "grad_norm": 0.1956983357667923, "learning_rate": 3.6039602515457264e-06, "loss": 0.2703, "step": 15555 }, { "epoch": 2.5331596303383135, "grad_norm": 0.17357346415519714, "learning_rate": 3.6015104680175772e-06, "loss": 0.2298, "step": 15556 }, { "epoch": 2.533322476896145, "grad_norm": 0.20723970234394073, "learning_rate": 3.5990614527593196e-06, "loss": 0.2578, "step": 15557 }, { "epoch": 2.5334853234539754, "grad_norm": 0.17118191719055176, "learning_rate": 3.5966132058588757e-06, "loss": 0.2696, "step": 15558 }, { "epoch": 2.5336481700118063, "grad_norm": 0.14288319647312164, "learning_rate": 3.594165727404142e-06, "loss": 0.2811, "step": 15559 }, { "epoch": 2.5338110165696373, "grad_norm": 0.17657876014709473, "learning_rate": 3.591719017482989e-06, "loss": 0.2748, "step": 15560 }, { "epoch": 2.533973863127468, "grad_norm": 0.17814302444458008, "learning_rate": 3.5892730761832664e-06, "loss": 0.2543, "step": 15561 }, { "epoch": 2.534136709685299, "grad_norm": 0.15379908680915833, "learning_rate": 3.5868279035927903e-06, "loss": 0.2838, "step": 15562 }, { "epoch": 2.5342995562431296, "grad_norm": 0.13365475833415985, "learning_rate": 3.584383499799343e-06, "loss": 0.2779, "step": 15563 }, { "epoch": 2.534462402800961, "grad_norm": 0.24124804139137268, "learning_rate": 3.581939864890696e-06, "loss": 0.3013, "step": 15564 }, { "epoch": 2.5346252493587915, "grad_norm": 0.18134255707263947, "learning_rate": 3.579496998954579e-06, "loss": 0.2744, "step": 15565 }, { "epoch": 2.5347880959166225, "grad_norm": 0.18492357432842255, "learning_rate": 3.577054902078697e-06, "loss": 0.2769, "step": 15566 }, { "epoch": 2.5349509424744534, "grad_norm": 0.16748258471488953, "learning_rate": 3.5746135743507216e-06, "loss": 0.3009, "step": 15567 }, { "epoch": 2.5351137890322843, "grad_norm": 0.1890670508146286, "learning_rate": 3.5721730158583183e-06, "loss": 0.2847, "step": 15568 }, { "epoch": 2.5352766355901153, "grad_norm": 0.14418363571166992, "learning_rate": 3.569733226689101e-06, "loss": 0.2593, "step": 15569 }, { "epoch": 2.5354394821479462, "grad_norm": 0.2036612182855606, "learning_rate": 3.5672942069306716e-06, "loss": 0.2653, "step": 15570 }, { "epoch": 2.535602328705777, "grad_norm": 0.16719622910022736, "learning_rate": 3.564855956670585e-06, "loss": 0.2465, "step": 15571 }, { "epoch": 2.5357651752636077, "grad_norm": 0.1874343901872635, "learning_rate": 3.5624184759963985e-06, "loss": 0.2571, "step": 15572 }, { "epoch": 2.5359280218214386, "grad_norm": 0.21134254336357117, "learning_rate": 3.5599817649956154e-06, "loss": 0.2749, "step": 15573 }, { "epoch": 2.5360908683792696, "grad_norm": 0.16152596473693848, "learning_rate": 3.5575458237557204e-06, "loss": 0.283, "step": 15574 }, { "epoch": 2.5362537149371005, "grad_norm": 0.1947588324546814, "learning_rate": 3.555110652364177e-06, "loss": 0.2308, "step": 15575 }, { "epoch": 2.5364165614949314, "grad_norm": 0.1586907058954239, "learning_rate": 3.552676250908407e-06, "loss": 0.2722, "step": 15576 }, { "epoch": 2.5365794080527624, "grad_norm": 0.17954783141613007, "learning_rate": 3.5502426194758216e-06, "loss": 0.2745, "step": 15577 }, { "epoch": 2.5367422546105933, "grad_norm": 0.20174285769462585, "learning_rate": 3.5478097581537943e-06, "loss": 0.2739, "step": 15578 }, { "epoch": 2.536905101168424, "grad_norm": 0.19523100554943085, "learning_rate": 3.5453776670296618e-06, "loss": 0.2896, "step": 15579 }, { "epoch": 2.537067947726255, "grad_norm": 0.1645498275756836, "learning_rate": 3.5429463461907562e-06, "loss": 0.2988, "step": 15580 }, { "epoch": 2.5372307942840857, "grad_norm": 0.1939801275730133, "learning_rate": 3.540515795724364e-06, "loss": 0.2196, "step": 15581 }, { "epoch": 2.5373936408419167, "grad_norm": 0.1865791380405426, "learning_rate": 3.5380860157177425e-06, "loss": 0.2957, "step": 15582 }, { "epoch": 2.5375564873997476, "grad_norm": 0.18934094905853271, "learning_rate": 3.5356570062581417e-06, "loss": 0.2218, "step": 15583 }, { "epoch": 2.5377193339575785, "grad_norm": 0.20514319837093353, "learning_rate": 3.533228767432764e-06, "loss": 0.2822, "step": 15584 }, { "epoch": 2.5378821805154095, "grad_norm": 0.1491950899362564, "learning_rate": 3.5308012993287816e-06, "loss": 0.2739, "step": 15585 }, { "epoch": 2.53804502707324, "grad_norm": 0.23128360509872437, "learning_rate": 3.528374602033363e-06, "loss": 0.2751, "step": 15586 }, { "epoch": 2.5382078736310714, "grad_norm": 0.18783557415008545, "learning_rate": 3.525948675633628e-06, "loss": 0.3003, "step": 15587 }, { "epoch": 2.538370720188902, "grad_norm": 0.19478839635849, "learning_rate": 3.523523520216673e-06, "loss": 0.2565, "step": 15588 }, { "epoch": 2.538533566746733, "grad_norm": 0.12919051945209503, "learning_rate": 3.5210991358695627e-06, "loss": 0.2897, "step": 15589 }, { "epoch": 2.5386964133045637, "grad_norm": 0.18426725268363953, "learning_rate": 3.5186755226793516e-06, "loss": 0.3217, "step": 15590 }, { "epoch": 2.5388592598623947, "grad_norm": 0.19368860125541687, "learning_rate": 3.516252680733048e-06, "loss": 0.3033, "step": 15591 }, { "epoch": 2.5390221064202256, "grad_norm": 0.18425306677818298, "learning_rate": 3.513830610117641e-06, "loss": 0.256, "step": 15592 }, { "epoch": 2.5391849529780566, "grad_norm": 0.14138682186603546, "learning_rate": 3.511409310920083e-06, "loss": 0.2772, "step": 15593 }, { "epoch": 2.5393477995358875, "grad_norm": 0.15566903352737427, "learning_rate": 3.508988783227318e-06, "loss": 0.2726, "step": 15594 }, { "epoch": 2.539510646093718, "grad_norm": 0.20217928290367126, "learning_rate": 3.506569027126247e-06, "loss": 0.2911, "step": 15595 }, { "epoch": 2.539673492651549, "grad_norm": 0.17484231293201447, "learning_rate": 3.5041500427037354e-06, "loss": 0.2514, "step": 15596 }, { "epoch": 2.53983633920938, "grad_norm": 0.16799694299697876, "learning_rate": 3.5017318300466477e-06, "loss": 0.277, "step": 15597 }, { "epoch": 2.539999185767211, "grad_norm": 0.18161886930465698, "learning_rate": 3.499314389241798e-06, "loss": 0.2925, "step": 15598 }, { "epoch": 2.540162032325042, "grad_norm": 0.1441347897052765, "learning_rate": 3.4968977203759803e-06, "loss": 0.2705, "step": 15599 }, { "epoch": 2.5403248788828727, "grad_norm": 0.17264005541801453, "learning_rate": 3.49448182353595e-06, "loss": 0.2184, "step": 15600 }, { "epoch": 2.5404877254407037, "grad_norm": 0.13687817752361298, "learning_rate": 3.4920666988084634e-06, "loss": 0.3061, "step": 15601 }, { "epoch": 2.540650571998534, "grad_norm": 0.1855974644422531, "learning_rate": 3.48965234628022e-06, "loss": 0.2879, "step": 15602 }, { "epoch": 2.540813418556365, "grad_norm": 0.1599399298429489, "learning_rate": 3.487238766037901e-06, "loss": 0.2591, "step": 15603 }, { "epoch": 2.540976265114196, "grad_norm": 0.15998131036758423, "learning_rate": 3.484825958168167e-06, "loss": 0.2339, "step": 15604 }, { "epoch": 2.541139111672027, "grad_norm": 0.1504492610692978, "learning_rate": 3.4824139227576437e-06, "loss": 0.2681, "step": 15605 }, { "epoch": 2.541301958229858, "grad_norm": 0.17589640617370605, "learning_rate": 3.480002659892931e-06, "loss": 0.2775, "step": 15606 }, { "epoch": 2.541464804787689, "grad_norm": 0.20184533298015594, "learning_rate": 3.4775921696605902e-06, "loss": 0.2917, "step": 15607 }, { "epoch": 2.54162765134552, "grad_norm": 0.19529560208320618, "learning_rate": 3.475182452147177e-06, "loss": 0.2457, "step": 15608 }, { "epoch": 2.5417904979033503, "grad_norm": 0.1494363695383072, "learning_rate": 3.4727735074392088e-06, "loss": 0.2674, "step": 15609 }, { "epoch": 2.5419533444611817, "grad_norm": 0.1868310421705246, "learning_rate": 3.470365335623166e-06, "loss": 0.2413, "step": 15610 }, { "epoch": 2.542116191019012, "grad_norm": 0.17352263629436493, "learning_rate": 3.467957936785507e-06, "loss": 0.238, "step": 15611 }, { "epoch": 2.542279037576843, "grad_norm": 0.19659440219402313, "learning_rate": 3.465551311012674e-06, "loss": 0.2815, "step": 15612 }, { "epoch": 2.542441884134674, "grad_norm": 0.16685150563716888, "learning_rate": 3.4631454583910703e-06, "loss": 0.2696, "step": 15613 }, { "epoch": 2.542604730692505, "grad_norm": 0.18571561574935913, "learning_rate": 3.460740379007063e-06, "loss": 0.2672, "step": 15614 }, { "epoch": 2.542767577250336, "grad_norm": 0.16186341643333435, "learning_rate": 3.458336072947016e-06, "loss": 0.255, "step": 15615 }, { "epoch": 2.542930423808167, "grad_norm": 0.21544992923736572, "learning_rate": 3.455932540297241e-06, "loss": 0.2723, "step": 15616 }, { "epoch": 2.543093270365998, "grad_norm": 0.1739986538887024, "learning_rate": 3.453529781144038e-06, "loss": 0.2919, "step": 15617 }, { "epoch": 2.5432561169238284, "grad_norm": 0.16230683028697968, "learning_rate": 3.451127795573672e-06, "loss": 0.2557, "step": 15618 }, { "epoch": 2.5434189634816593, "grad_norm": 0.1569286286830902, "learning_rate": 3.4487265836723765e-06, "loss": 0.277, "step": 15619 }, { "epoch": 2.5435818100394902, "grad_norm": 0.18564526736736298, "learning_rate": 3.4463261455263685e-06, "loss": 0.2669, "step": 15620 }, { "epoch": 2.543744656597321, "grad_norm": 0.13772492110729218, "learning_rate": 3.443926481221829e-06, "loss": 0.2642, "step": 15621 }, { "epoch": 2.543907503155152, "grad_norm": 0.23270146548748016, "learning_rate": 3.441527590844909e-06, "loss": 0.2778, "step": 15622 }, { "epoch": 2.544070349712983, "grad_norm": 0.2091788649559021, "learning_rate": 3.439129474481742e-06, "loss": 0.2981, "step": 15623 }, { "epoch": 2.544233196270814, "grad_norm": 0.3024635314941406, "learning_rate": 3.4367321322184288e-06, "loss": 0.2793, "step": 15624 }, { "epoch": 2.5443960428286445, "grad_norm": 0.1945921778678894, "learning_rate": 3.434335564141028e-06, "loss": 0.2727, "step": 15625 }, { "epoch": 2.5445588893864755, "grad_norm": 0.15097388625144958, "learning_rate": 3.4319397703356014e-06, "loss": 0.2596, "step": 15626 }, { "epoch": 2.5447217359443064, "grad_norm": 0.2041090726852417, "learning_rate": 3.429544750888156e-06, "loss": 0.2488, "step": 15627 }, { "epoch": 2.5448845825021373, "grad_norm": 0.17622701823711395, "learning_rate": 3.4271505058846797e-06, "loss": 0.2473, "step": 15628 }, { "epoch": 2.5450474290599683, "grad_norm": 0.20343299210071564, "learning_rate": 3.4247570354111277e-06, "loss": 0.3289, "step": 15629 }, { "epoch": 2.5452102756177992, "grad_norm": 0.15298974514007568, "learning_rate": 3.4223643395534466e-06, "loss": 0.2644, "step": 15630 }, { "epoch": 2.54537312217563, "grad_norm": 0.20485906302928925, "learning_rate": 3.4199724183975324e-06, "loss": 0.2499, "step": 15631 }, { "epoch": 2.5455359687334607, "grad_norm": 0.15955539047718048, "learning_rate": 3.417581272029266e-06, "loss": 0.2605, "step": 15632 }, { "epoch": 2.545698815291292, "grad_norm": 0.16081728041172028, "learning_rate": 3.4151909005344875e-06, "loss": 0.3017, "step": 15633 }, { "epoch": 2.5458616618491225, "grad_norm": 0.20430046319961548, "learning_rate": 3.412801303999033e-06, "loss": 0.2873, "step": 15634 }, { "epoch": 2.5460245084069535, "grad_norm": 0.19036532938480377, "learning_rate": 3.4104124825086875e-06, "loss": 0.2883, "step": 15635 }, { "epoch": 2.5461873549647844, "grad_norm": 0.174308642745018, "learning_rate": 3.4080244361492124e-06, "loss": 0.2697, "step": 15636 }, { "epoch": 2.5463502015226154, "grad_norm": 0.2114754617214203, "learning_rate": 3.4056371650063562e-06, "loss": 0.2926, "step": 15637 }, { "epoch": 2.5465130480804463, "grad_norm": 0.14939665794372559, "learning_rate": 3.403250669165825e-06, "loss": 0.2712, "step": 15638 }, { "epoch": 2.546675894638277, "grad_norm": 0.15055416524410248, "learning_rate": 3.4008649487133002e-06, "loss": 0.244, "step": 15639 }, { "epoch": 2.546838741196108, "grad_norm": 0.1825432926416397, "learning_rate": 3.3984800037344306e-06, "loss": 0.2602, "step": 15640 }, { "epoch": 2.5470015877539387, "grad_norm": 0.18721415102481842, "learning_rate": 3.396095834314855e-06, "loss": 0.2494, "step": 15641 }, { "epoch": 2.5471644343117696, "grad_norm": 0.1936669647693634, "learning_rate": 3.3937124405401644e-06, "loss": 0.2544, "step": 15642 }, { "epoch": 2.5473272808696006, "grad_norm": 0.1614731252193451, "learning_rate": 3.391329822495934e-06, "loss": 0.2567, "step": 15643 }, { "epoch": 2.5474901274274315, "grad_norm": 0.21255403757095337, "learning_rate": 3.3889479802676953e-06, "loss": 0.2076, "step": 15644 }, { "epoch": 2.5476529739852625, "grad_norm": 0.1926528662443161, "learning_rate": 3.3865669139409804e-06, "loss": 0.2824, "step": 15645 }, { "epoch": 2.5478158205430934, "grad_norm": 0.20467446744441986, "learning_rate": 3.384186623601268e-06, "loss": 0.2601, "step": 15646 }, { "epoch": 2.5479786671009244, "grad_norm": 0.16042755544185638, "learning_rate": 3.381807109334015e-06, "loss": 0.2409, "step": 15647 }, { "epoch": 2.548141513658755, "grad_norm": 0.16305764019489288, "learning_rate": 3.3794283712246606e-06, "loss": 0.2753, "step": 15648 }, { "epoch": 2.548304360216586, "grad_norm": 0.1729244738817215, "learning_rate": 3.3770504093586035e-06, "loss": 0.2562, "step": 15649 }, { "epoch": 2.5484672067744167, "grad_norm": 0.22621124982833862, "learning_rate": 3.374673223821223e-06, "loss": 0.2376, "step": 15650 }, { "epoch": 2.5486300533322477, "grad_norm": 0.18145500123500824, "learning_rate": 3.372296814697859e-06, "loss": 0.2436, "step": 15651 }, { "epoch": 2.5487928998900786, "grad_norm": 0.1934400200843811, "learning_rate": 3.369921182073843e-06, "loss": 0.2804, "step": 15652 }, { "epoch": 2.5489557464479096, "grad_norm": 0.2298119217157364, "learning_rate": 3.367546326034454e-06, "loss": 0.2515, "step": 15653 }, { "epoch": 2.5491185930057405, "grad_norm": 0.22818298637866974, "learning_rate": 3.3651722466649716e-06, "loss": 0.3148, "step": 15654 }, { "epoch": 2.549281439563571, "grad_norm": 0.18414834141731262, "learning_rate": 3.362798944050627e-06, "loss": 0.2673, "step": 15655 }, { "epoch": 2.549444286121402, "grad_norm": 0.15403582155704498, "learning_rate": 3.360426418276619e-06, "loss": 0.2491, "step": 15656 }, { "epoch": 2.549607132679233, "grad_norm": 0.17770858108997345, "learning_rate": 3.358054669428143e-06, "loss": 0.2655, "step": 15657 }, { "epoch": 2.549769979237064, "grad_norm": 0.17821446061134338, "learning_rate": 3.355683697590342e-06, "loss": 0.2932, "step": 15658 }, { "epoch": 2.5499328257948948, "grad_norm": 0.18541868031024933, "learning_rate": 3.353313502848346e-06, "loss": 0.2632, "step": 15659 }, { "epoch": 2.5500956723527257, "grad_norm": 0.15186707675457, "learning_rate": 3.3509440852872527e-06, "loss": 0.2582, "step": 15660 }, { "epoch": 2.5502585189105567, "grad_norm": 0.1797901690006256, "learning_rate": 3.3485754449921304e-06, "loss": 0.3013, "step": 15661 }, { "epoch": 2.550421365468387, "grad_norm": 0.18321701884269714, "learning_rate": 3.346207582048011e-06, "loss": 0.2579, "step": 15662 }, { "epoch": 2.5505842120262185, "grad_norm": 0.23306451737880707, "learning_rate": 3.3438404965399213e-06, "loss": 0.3059, "step": 15663 }, { "epoch": 2.550747058584049, "grad_norm": 0.16900905966758728, "learning_rate": 3.341474188552843e-06, "loss": 0.2481, "step": 15664 }, { "epoch": 2.55090990514188, "grad_norm": 0.304343581199646, "learning_rate": 3.3391086581717247e-06, "loss": 0.2882, "step": 15665 }, { "epoch": 2.551072751699711, "grad_norm": 0.17033030092716217, "learning_rate": 3.3367439054815095e-06, "loss": 0.3014, "step": 15666 }, { "epoch": 2.551235598257542, "grad_norm": 0.1948775202035904, "learning_rate": 3.334379930567094e-06, "loss": 0.2799, "step": 15667 }, { "epoch": 2.551398444815373, "grad_norm": 0.19154877960681915, "learning_rate": 3.3320167335133516e-06, "loss": 0.2663, "step": 15668 }, { "epoch": 2.5515612913732038, "grad_norm": 0.15340201556682587, "learning_rate": 3.32965431440512e-06, "loss": 0.2419, "step": 15669 }, { "epoch": 2.5517241379310347, "grad_norm": 0.19685207307338715, "learning_rate": 3.3272926733272316e-06, "loss": 0.2753, "step": 15670 }, { "epoch": 2.551886984488865, "grad_norm": 0.18302850425243378, "learning_rate": 3.324931810364468e-06, "loss": 0.2607, "step": 15671 }, { "epoch": 2.552049831046696, "grad_norm": 0.15524041652679443, "learning_rate": 3.322571725601592e-06, "loss": 0.2527, "step": 15672 }, { "epoch": 2.552212677604527, "grad_norm": 0.19355906546115875, "learning_rate": 3.3202124191233363e-06, "loss": 0.2406, "step": 15673 }, { "epoch": 2.552375524162358, "grad_norm": 0.1807066649198532, "learning_rate": 3.3178538910144107e-06, "loss": 0.2471, "step": 15674 }, { "epoch": 2.552538370720189, "grad_norm": 0.15707536041736603, "learning_rate": 3.315496141359495e-06, "loss": 0.261, "step": 15675 }, { "epoch": 2.55270121727802, "grad_norm": 0.19542498886585236, "learning_rate": 3.313139170243229e-06, "loss": 0.2824, "step": 15676 }, { "epoch": 2.552864063835851, "grad_norm": 0.1432807743549347, "learning_rate": 3.3107829777502485e-06, "loss": 0.2289, "step": 15677 }, { "epoch": 2.5530269103936813, "grad_norm": 0.22323912382125854, "learning_rate": 3.308427563965141e-06, "loss": 0.2909, "step": 15678 }, { "epoch": 2.5531897569515123, "grad_norm": 0.1923568993806839, "learning_rate": 3.306072928972473e-06, "loss": 0.2887, "step": 15679 }, { "epoch": 2.5533526035093432, "grad_norm": 0.19087378680706024, "learning_rate": 3.3037190728567784e-06, "loss": 0.2647, "step": 15680 }, { "epoch": 2.553515450067174, "grad_norm": 0.2082243710756302, "learning_rate": 3.3013659957025794e-06, "loss": 0.2455, "step": 15681 }, { "epoch": 2.553678296625005, "grad_norm": 0.15244536101818085, "learning_rate": 3.2990136975943526e-06, "loss": 0.2766, "step": 15682 }, { "epoch": 2.553841143182836, "grad_norm": 0.24430260062217712, "learning_rate": 3.2966621786165497e-06, "loss": 0.2747, "step": 15683 }, { "epoch": 2.554003989740667, "grad_norm": 0.1837640255689621, "learning_rate": 3.294311438853598e-06, "loss": 0.2625, "step": 15684 }, { "epoch": 2.5541668362984975, "grad_norm": 0.16934740543365479, "learning_rate": 3.291961478389899e-06, "loss": 0.2874, "step": 15685 }, { "epoch": 2.554329682856329, "grad_norm": 0.21813169121742249, "learning_rate": 3.289612297309824e-06, "loss": 0.2966, "step": 15686 }, { "epoch": 2.5544925294141594, "grad_norm": 0.18623536825180054, "learning_rate": 3.2872638956977064e-06, "loss": 0.2576, "step": 15687 }, { "epoch": 2.5546553759719903, "grad_norm": 0.2043209820985794, "learning_rate": 3.2849162736378752e-06, "loss": 0.3512, "step": 15688 }, { "epoch": 2.5548182225298213, "grad_norm": 0.19198572635650635, "learning_rate": 3.2825694312146105e-06, "loss": 0.2887, "step": 15689 }, { "epoch": 2.554981069087652, "grad_norm": 0.14485590159893036, "learning_rate": 3.2802233685121615e-06, "loss": 0.3244, "step": 15690 }, { "epoch": 2.555143915645483, "grad_norm": 0.20166805386543274, "learning_rate": 3.2778780856147774e-06, "loss": 0.2778, "step": 15691 }, { "epoch": 2.5553067622033137, "grad_norm": 0.1632608324289322, "learning_rate": 3.275533582606649e-06, "loss": 0.3011, "step": 15692 }, { "epoch": 2.555469608761145, "grad_norm": 0.168311208486557, "learning_rate": 3.2731898595719484e-06, "loss": 0.2631, "step": 15693 }, { "epoch": 2.5556324553189755, "grad_norm": 0.18964411318302155, "learning_rate": 3.27084691659483e-06, "loss": 0.276, "step": 15694 }, { "epoch": 2.5557953018768065, "grad_norm": 0.20934490859508514, "learning_rate": 3.2685047537594077e-06, "loss": 0.2702, "step": 15695 }, { "epoch": 2.5559581484346374, "grad_norm": 0.19467765092849731, "learning_rate": 3.2661633711497774e-06, "loss": 0.2852, "step": 15696 }, { "epoch": 2.5561209949924684, "grad_norm": 0.20742489397525787, "learning_rate": 3.26382276885e-06, "loss": 0.2748, "step": 15697 }, { "epoch": 2.5562838415502993, "grad_norm": 0.18387183547019958, "learning_rate": 3.2614829469441027e-06, "loss": 0.2889, "step": 15698 }, { "epoch": 2.5564466881081302, "grad_norm": 0.18676160275936127, "learning_rate": 3.2591439055161016e-06, "loss": 0.3225, "step": 15699 }, { "epoch": 2.556609534665961, "grad_norm": 0.22159472107887268, "learning_rate": 3.256805644649974e-06, "loss": 0.3048, "step": 15700 }, { "epoch": 2.5567723812237917, "grad_norm": 0.1495189219713211, "learning_rate": 3.2544681644296664e-06, "loss": 0.2506, "step": 15701 }, { "epoch": 2.5569352277816226, "grad_norm": 0.15264250338077545, "learning_rate": 3.252131464939098e-06, "loss": 0.2906, "step": 15702 }, { "epoch": 2.5570980743394536, "grad_norm": 0.18691478669643402, "learning_rate": 3.2497955462621744e-06, "loss": 0.2876, "step": 15703 }, { "epoch": 2.5572609208972845, "grad_norm": 0.23394674062728882, "learning_rate": 3.247460408482755e-06, "loss": 0.2716, "step": 15704 }, { "epoch": 2.5574237674551155, "grad_norm": 0.17629210650920868, "learning_rate": 3.2451260516846743e-06, "loss": 0.2767, "step": 15705 }, { "epoch": 2.5575866140129464, "grad_norm": 0.20412610471248627, "learning_rate": 3.242792475951753e-06, "loss": 0.2687, "step": 15706 }, { "epoch": 2.5577494605707773, "grad_norm": 0.2122138887643814, "learning_rate": 3.240459681367769e-06, "loss": 0.3248, "step": 15707 }, { "epoch": 2.557912307128608, "grad_norm": 0.13510878384113312, "learning_rate": 3.2381276680164747e-06, "loss": 0.3008, "step": 15708 }, { "epoch": 2.5580751536864392, "grad_norm": 0.18924319744110107, "learning_rate": 3.2357964359815946e-06, "loss": 0.2691, "step": 15709 }, { "epoch": 2.5582380002442697, "grad_norm": 0.18934345245361328, "learning_rate": 3.2334659853468314e-06, "loss": 0.2505, "step": 15710 }, { "epoch": 2.5584008468021007, "grad_norm": 0.1514994204044342, "learning_rate": 3.2311363161958568e-06, "loss": 0.2667, "step": 15711 }, { "epoch": 2.5585636933599316, "grad_norm": 0.21854744851589203, "learning_rate": 3.228807428612307e-06, "loss": 0.2743, "step": 15712 }, { "epoch": 2.5587265399177626, "grad_norm": 0.1581905633211136, "learning_rate": 3.2264793226797957e-06, "loss": 0.2574, "step": 15713 }, { "epoch": 2.5588893864755935, "grad_norm": 0.14816352725028992, "learning_rate": 3.224151998481914e-06, "loss": 0.2772, "step": 15714 }, { "epoch": 2.559052233033424, "grad_norm": 0.1527245193719864, "learning_rate": 3.2218254561022206e-06, "loss": 0.2248, "step": 15715 }, { "epoch": 2.5592150795912554, "grad_norm": 0.15218360722064972, "learning_rate": 3.2194996956242367e-06, "loss": 0.2494, "step": 15716 }, { "epoch": 2.559377926149086, "grad_norm": 0.1915082335472107, "learning_rate": 3.2171747171314743e-06, "loss": 0.2785, "step": 15717 }, { "epoch": 2.559540772706917, "grad_norm": 0.18678288161754608, "learning_rate": 3.2148505207074046e-06, "loss": 0.2912, "step": 15718 }, { "epoch": 2.5597036192647478, "grad_norm": 0.15996815264225006, "learning_rate": 3.2125271064354702e-06, "loss": 0.2591, "step": 15719 }, { "epoch": 2.5598664658225787, "grad_norm": 0.15656553208827972, "learning_rate": 3.210204474399084e-06, "loss": 0.2412, "step": 15720 }, { "epoch": 2.5600293123804096, "grad_norm": 0.16421210765838623, "learning_rate": 3.2078826246816463e-06, "loss": 0.2268, "step": 15721 }, { "epoch": 2.5601921589382406, "grad_norm": 0.1700696349143982, "learning_rate": 3.2055615573665125e-06, "loss": 0.252, "step": 15722 }, { "epoch": 2.5603550054960715, "grad_norm": 0.2409752905368805, "learning_rate": 3.203241272537019e-06, "loss": 0.2936, "step": 15723 }, { "epoch": 2.560517852053902, "grad_norm": 0.2026566118001938, "learning_rate": 3.2009217702764607e-06, "loss": 0.277, "step": 15724 }, { "epoch": 2.560680698611733, "grad_norm": 0.16348335146903992, "learning_rate": 3.1986030506681307e-06, "loss": 0.2699, "step": 15725 }, { "epoch": 2.560843545169564, "grad_norm": 0.1925652027130127, "learning_rate": 3.1962851137952664e-06, "loss": 0.2922, "step": 15726 }, { "epoch": 2.561006391727395, "grad_norm": 0.20037783682346344, "learning_rate": 3.19396795974109e-06, "loss": 0.3024, "step": 15727 }, { "epoch": 2.561169238285226, "grad_norm": 0.15874816477298737, "learning_rate": 3.191651588588801e-06, "loss": 0.3077, "step": 15728 }, { "epoch": 2.5613320848430567, "grad_norm": 0.20023617148399353, "learning_rate": 3.1893360004215584e-06, "loss": 0.3107, "step": 15729 }, { "epoch": 2.5614949314008877, "grad_norm": 0.21179385483264923, "learning_rate": 3.1870211953224956e-06, "loss": 0.2902, "step": 15730 }, { "epoch": 2.561657777958718, "grad_norm": 0.14699684083461761, "learning_rate": 3.184707173374729e-06, "loss": 0.2598, "step": 15731 }, { "epoch": 2.561820624516549, "grad_norm": 0.19358251988887787, "learning_rate": 3.1823939346613375e-06, "loss": 0.2825, "step": 15732 }, { "epoch": 2.56198347107438, "grad_norm": 0.18919788300991058, "learning_rate": 3.180081479265365e-06, "loss": 0.2938, "step": 15733 }, { "epoch": 2.562146317632211, "grad_norm": 0.17626795172691345, "learning_rate": 3.177769807269848e-06, "loss": 0.237, "step": 15734 }, { "epoch": 2.562309164190042, "grad_norm": 0.14688435196876526, "learning_rate": 3.1754589187577677e-06, "loss": 0.2641, "step": 15735 }, { "epoch": 2.562472010747873, "grad_norm": 0.1627541035413742, "learning_rate": 3.1731488138121073e-06, "loss": 0.2588, "step": 15736 }, { "epoch": 2.562634857305704, "grad_norm": 0.17945155501365662, "learning_rate": 3.170839492515801e-06, "loss": 0.2403, "step": 15737 }, { "epoch": 2.5627977038635343, "grad_norm": 0.138590469956398, "learning_rate": 3.1685309549517513e-06, "loss": 0.2527, "step": 15738 }, { "epoch": 2.5629605504213657, "grad_norm": 0.17395642399787903, "learning_rate": 3.166223201202856e-06, "loss": 0.253, "step": 15739 }, { "epoch": 2.5631233969791962, "grad_norm": 0.1971951723098755, "learning_rate": 3.1639162313519625e-06, "loss": 0.284, "step": 15740 }, { "epoch": 2.563286243537027, "grad_norm": 0.19789840281009674, "learning_rate": 3.1616100454818998e-06, "loss": 0.2665, "step": 15741 }, { "epoch": 2.563449090094858, "grad_norm": 0.1455075889825821, "learning_rate": 3.159304643675459e-06, "loss": 0.2741, "step": 15742 }, { "epoch": 2.563611936652689, "grad_norm": 0.21088235080242157, "learning_rate": 3.157000026015425e-06, "loss": 0.305, "step": 15743 }, { "epoch": 2.56377478321052, "grad_norm": 0.183313250541687, "learning_rate": 3.154696192584533e-06, "loss": 0.2829, "step": 15744 }, { "epoch": 2.563937629768351, "grad_norm": 0.17983153462409973, "learning_rate": 3.1523931434654987e-06, "loss": 0.2577, "step": 15745 }, { "epoch": 2.564100476326182, "grad_norm": 0.179719939827919, "learning_rate": 3.1500908787409995e-06, "loss": 0.2661, "step": 15746 }, { "epoch": 2.5642633228840124, "grad_norm": 0.18949714303016663, "learning_rate": 3.1477893984937084e-06, "loss": 0.2127, "step": 15747 }, { "epoch": 2.5644261694418433, "grad_norm": 0.13700270652770996, "learning_rate": 3.1454887028062514e-06, "loss": 0.2906, "step": 15748 }, { "epoch": 2.5645890159996743, "grad_norm": 0.16148445010185242, "learning_rate": 3.14318879176122e-06, "loss": 0.2375, "step": 15749 }, { "epoch": 2.564751862557505, "grad_norm": 0.22414667904376984, "learning_rate": 3.1408896654412016e-06, "loss": 0.2263, "step": 15750 }, { "epoch": 2.564914709115336, "grad_norm": 0.1652567833662033, "learning_rate": 3.1385913239287355e-06, "loss": 0.2949, "step": 15751 }, { "epoch": 2.565077555673167, "grad_norm": 0.22634463012218475, "learning_rate": 3.136293767306342e-06, "loss": 0.2812, "step": 15752 }, { "epoch": 2.565240402230998, "grad_norm": 0.1708967089653015, "learning_rate": 3.133996995656499e-06, "loss": 0.2316, "step": 15753 }, { "epoch": 2.5654032487888285, "grad_norm": 0.17819905281066895, "learning_rate": 3.131701009061683e-06, "loss": 0.2661, "step": 15754 }, { "epoch": 2.5655660953466595, "grad_norm": 0.1799868941307068, "learning_rate": 3.1294058076043187e-06, "loss": 0.278, "step": 15755 }, { "epoch": 2.5657289419044904, "grad_norm": 0.2094586044549942, "learning_rate": 3.1271113913668076e-06, "loss": 0.3675, "step": 15756 }, { "epoch": 2.5658917884623214, "grad_norm": 0.16465453803539276, "learning_rate": 3.1248177604315364e-06, "loss": 0.2677, "step": 15757 }, { "epoch": 2.5660546350201523, "grad_norm": 0.1569516807794571, "learning_rate": 3.122524914880848e-06, "loss": 0.2519, "step": 15758 }, { "epoch": 2.5662174815779832, "grad_norm": 0.20792600512504578, "learning_rate": 3.120232854797059e-06, "loss": 0.2868, "step": 15759 }, { "epoch": 2.566380328135814, "grad_norm": 0.1608732044696808, "learning_rate": 3.1179415802624594e-06, "loss": 0.2637, "step": 15760 }, { "epoch": 2.5665431746936447, "grad_norm": 0.1923040747642517, "learning_rate": 3.1156510913593256e-06, "loss": 0.2542, "step": 15761 }, { "epoch": 2.566706021251476, "grad_norm": 0.12705160677433014, "learning_rate": 3.1133613881698833e-06, "loss": 0.2605, "step": 15762 }, { "epoch": 2.5668688678093066, "grad_norm": 0.1544627845287323, "learning_rate": 3.1110724707763413e-06, "loss": 0.278, "step": 15763 }, { "epoch": 2.5670317143671375, "grad_norm": 0.17763976752758026, "learning_rate": 3.108784339260873e-06, "loss": 0.2423, "step": 15764 }, { "epoch": 2.5671945609249684, "grad_norm": 0.16905798017978668, "learning_rate": 3.1064969937056376e-06, "loss": 0.2519, "step": 15765 }, { "epoch": 2.5673574074827994, "grad_norm": 0.17744740843772888, "learning_rate": 3.1042104341927587e-06, "loss": 0.2371, "step": 15766 }, { "epoch": 2.5675202540406303, "grad_norm": 0.14558812975883484, "learning_rate": 3.10192466080432e-06, "loss": 0.2529, "step": 15767 }, { "epoch": 2.567683100598461, "grad_norm": 0.15961523354053497, "learning_rate": 3.0996396736223983e-06, "loss": 0.2479, "step": 15768 }, { "epoch": 2.567845947156292, "grad_norm": 0.16562709212303162, "learning_rate": 3.09735547272903e-06, "loss": 0.2635, "step": 15769 }, { "epoch": 2.5680087937141227, "grad_norm": 0.16807620227336884, "learning_rate": 3.0950720582062164e-06, "loss": 0.2375, "step": 15770 }, { "epoch": 2.5681716402719537, "grad_norm": 0.15100789070129395, "learning_rate": 3.0927894301359478e-06, "loss": 0.27, "step": 15771 }, { "epoch": 2.5683344868297846, "grad_norm": 0.16923561692237854, "learning_rate": 3.0905075886001723e-06, "loss": 0.2592, "step": 15772 }, { "epoch": 2.5684973333876155, "grad_norm": 0.15131838619709015, "learning_rate": 3.0882265336808213e-06, "loss": 0.2671, "step": 15773 }, { "epoch": 2.5686601799454465, "grad_norm": 0.21196193993091583, "learning_rate": 3.0859462654597856e-06, "loss": 0.2699, "step": 15774 }, { "epoch": 2.5688230265032774, "grad_norm": 0.1910911351442337, "learning_rate": 3.083666784018932e-06, "loss": 0.2926, "step": 15775 }, { "epoch": 2.5689858730611084, "grad_norm": 0.18975046277046204, "learning_rate": 3.081388089440107e-06, "loss": 0.2758, "step": 15776 }, { "epoch": 2.569148719618939, "grad_norm": 0.15349625051021576, "learning_rate": 3.07911018180512e-06, "loss": 0.2278, "step": 15777 }, { "epoch": 2.56931156617677, "grad_norm": 0.1905115842819214, "learning_rate": 3.0768330611957496e-06, "loss": 0.2699, "step": 15778 }, { "epoch": 2.5694744127346008, "grad_norm": 0.14668570458889008, "learning_rate": 3.0745567276937615e-06, "loss": 0.2415, "step": 15779 }, { "epoch": 2.5696372592924317, "grad_norm": 0.20432613790035248, "learning_rate": 3.072281181380879e-06, "loss": 0.2494, "step": 15780 }, { "epoch": 2.5698001058502626, "grad_norm": 0.184594064950943, "learning_rate": 3.0700064223387946e-06, "loss": 0.3219, "step": 15781 }, { "epoch": 2.5699629524080936, "grad_norm": 0.16381612420082092, "learning_rate": 3.0677324506491826e-06, "loss": 0.2866, "step": 15782 }, { "epoch": 2.5701257989659245, "grad_norm": 0.16849713027477264, "learning_rate": 3.0654592663936886e-06, "loss": 0.2598, "step": 15783 }, { "epoch": 2.570288645523755, "grad_norm": 0.21339277923107147, "learning_rate": 3.063186869653928e-06, "loss": 0.2724, "step": 15784 }, { "epoch": 2.570451492081586, "grad_norm": 0.20702844858169556, "learning_rate": 3.060915260511482e-06, "loss": 0.2716, "step": 15785 }, { "epoch": 2.570614338639417, "grad_norm": 0.1765889823436737, "learning_rate": 3.058644439047903e-06, "loss": 0.2536, "step": 15786 }, { "epoch": 2.570777185197248, "grad_norm": 0.18506819009780884, "learning_rate": 3.056374405344731e-06, "loss": 0.2863, "step": 15787 }, { "epoch": 2.570940031755079, "grad_norm": 0.16708756983280182, "learning_rate": 3.0541051594834643e-06, "loss": 0.263, "step": 15788 }, { "epoch": 2.5711028783129097, "grad_norm": 0.15035194158554077, "learning_rate": 3.0518367015455686e-06, "loss": 0.2345, "step": 15789 }, { "epoch": 2.5712657248707407, "grad_norm": 0.12809588015079498, "learning_rate": 3.049569031612501e-06, "loss": 0.2432, "step": 15790 }, { "epoch": 2.571428571428571, "grad_norm": 0.196630597114563, "learning_rate": 3.0473021497656683e-06, "loss": 0.2757, "step": 15791 }, { "epoch": 2.5715914179864026, "grad_norm": 0.1504732072353363, "learning_rate": 3.0450360560864588e-06, "loss": 0.2436, "step": 15792 }, { "epoch": 2.571754264544233, "grad_norm": 0.14700806140899658, "learning_rate": 3.042770750656232e-06, "loss": 0.2786, "step": 15793 }, { "epoch": 2.571917111102064, "grad_norm": 0.18504218757152557, "learning_rate": 3.0405062335563228e-06, "loss": 0.2307, "step": 15794 }, { "epoch": 2.572079957659895, "grad_norm": 0.1538519710302353, "learning_rate": 3.038242504868033e-06, "loss": 0.2631, "step": 15795 }, { "epoch": 2.572242804217726, "grad_norm": 0.17749004065990448, "learning_rate": 3.0359795646726393e-06, "loss": 0.3112, "step": 15796 }, { "epoch": 2.572405650775557, "grad_norm": 0.23302766680717468, "learning_rate": 3.033717413051376e-06, "loss": 0.3049, "step": 15797 }, { "epoch": 2.5725684973333878, "grad_norm": 0.15932832658290863, "learning_rate": 3.0314560500854793e-06, "loss": 0.2663, "step": 15798 }, { "epoch": 2.5727313438912187, "grad_norm": 0.14740750193595886, "learning_rate": 3.029195475856128e-06, "loss": 0.2606, "step": 15799 }, { "epoch": 2.572894190449049, "grad_norm": 0.13724549114704132, "learning_rate": 3.0269356904444795e-06, "loss": 0.2451, "step": 15800 }, { "epoch": 2.57305703700688, "grad_norm": 0.17279157042503357, "learning_rate": 3.0246766939316775e-06, "loss": 0.284, "step": 15801 }, { "epoch": 2.573219883564711, "grad_norm": 0.18188120424747467, "learning_rate": 3.022418486398823e-06, "loss": 0.2617, "step": 15802 }, { "epoch": 2.573382730122542, "grad_norm": 0.13723063468933105, "learning_rate": 3.0201610679269935e-06, "loss": 0.2441, "step": 15803 }, { "epoch": 2.573545576680373, "grad_norm": 0.16237515211105347, "learning_rate": 3.017904438597227e-06, "loss": 0.242, "step": 15804 }, { "epoch": 2.573708423238204, "grad_norm": 0.1860424280166626, "learning_rate": 3.0156485984905586e-06, "loss": 0.2802, "step": 15805 }, { "epoch": 2.573871269796035, "grad_norm": 0.17656053602695465, "learning_rate": 3.0133935476879706e-06, "loss": 0.2702, "step": 15806 }, { "epoch": 2.5740341163538654, "grad_norm": 0.19757579267024994, "learning_rate": 3.011139286270423e-06, "loss": 0.288, "step": 15807 }, { "epoch": 2.5741969629116963, "grad_norm": 0.19625742733478546, "learning_rate": 3.0088858143188597e-06, "loss": 0.2925, "step": 15808 }, { "epoch": 2.5743598094695272, "grad_norm": 0.20548905432224274, "learning_rate": 3.00663313191418e-06, "loss": 0.3036, "step": 15809 }, { "epoch": 2.574522656027358, "grad_norm": 0.16353663802146912, "learning_rate": 3.0043812391372687e-06, "loss": 0.2425, "step": 15810 }, { "epoch": 2.574685502585189, "grad_norm": 0.19518136978149414, "learning_rate": 3.00213013606897e-06, "loss": 0.2994, "step": 15811 }, { "epoch": 2.57484834914302, "grad_norm": 0.2113698422908783, "learning_rate": 2.9998798227901053e-06, "loss": 0.2379, "step": 15812 }, { "epoch": 2.575011195700851, "grad_norm": 0.1882387399673462, "learning_rate": 2.997630299381471e-06, "loss": 0.2826, "step": 15813 }, { "epoch": 2.5751740422586815, "grad_norm": 0.1620412915945053, "learning_rate": 2.9953815659238327e-06, "loss": 0.2628, "step": 15814 }, { "epoch": 2.575336888816513, "grad_norm": 0.16064560413360596, "learning_rate": 2.993133622497915e-06, "loss": 0.2839, "step": 15815 }, { "epoch": 2.5754997353743434, "grad_norm": 0.18163105845451355, "learning_rate": 2.9908864691844422e-06, "loss": 0.2834, "step": 15816 }, { "epoch": 2.5756625819321743, "grad_norm": 0.1610085517168045, "learning_rate": 2.988640106064086e-06, "loss": 0.256, "step": 15817 }, { "epoch": 2.5758254284900053, "grad_norm": 0.18921588361263275, "learning_rate": 2.9863945332174904e-06, "loss": 0.2895, "step": 15818 }, { "epoch": 2.5759882750478362, "grad_norm": 0.1824103444814682, "learning_rate": 2.984149750725293e-06, "loss": 0.2529, "step": 15819 }, { "epoch": 2.576151121605667, "grad_norm": 0.17624913156032562, "learning_rate": 2.98190575866808e-06, "loss": 0.2435, "step": 15820 }, { "epoch": 2.5763139681634977, "grad_norm": 0.21349458396434784, "learning_rate": 2.9796625571264197e-06, "loss": 0.2599, "step": 15821 }, { "epoch": 2.576476814721329, "grad_norm": 0.17738835513591766, "learning_rate": 2.9774201461808425e-06, "loss": 0.2628, "step": 15822 }, { "epoch": 2.5766396612791596, "grad_norm": 0.17407137155532837, "learning_rate": 2.9751785259118676e-06, "loss": 0.3195, "step": 15823 }, { "epoch": 2.5768025078369905, "grad_norm": 0.18427510559558868, "learning_rate": 2.972937696399972e-06, "loss": 0.2506, "step": 15824 }, { "epoch": 2.5769653543948214, "grad_norm": 0.1749339997768402, "learning_rate": 2.9706976577256075e-06, "loss": 0.2612, "step": 15825 }, { "epoch": 2.5771282009526524, "grad_norm": 0.1650724560022354, "learning_rate": 2.9684584099691965e-06, "loss": 0.2495, "step": 15826 }, { "epoch": 2.5772910475104833, "grad_norm": 0.22069166600704193, "learning_rate": 2.9662199532111386e-06, "loss": 0.2909, "step": 15827 }, { "epoch": 2.5774538940683143, "grad_norm": 0.1699003130197525, "learning_rate": 2.9639822875317996e-06, "loss": 0.2711, "step": 15828 }, { "epoch": 2.577616740626145, "grad_norm": 0.20450283586978912, "learning_rate": 2.9617454130115157e-06, "loss": 0.2481, "step": 15829 }, { "epoch": 2.5777795871839757, "grad_norm": 0.19568228721618652, "learning_rate": 2.9595093297306032e-06, "loss": 0.2244, "step": 15830 }, { "epoch": 2.5779424337418066, "grad_norm": 0.18668343126773834, "learning_rate": 2.957274037769339e-06, "loss": 0.2306, "step": 15831 }, { "epoch": 2.5781052802996376, "grad_norm": 0.15593145787715912, "learning_rate": 2.955039537207982e-06, "loss": 0.2676, "step": 15832 }, { "epoch": 2.5782681268574685, "grad_norm": 0.14615857601165771, "learning_rate": 2.9528058281267452e-06, "loss": 0.2155, "step": 15833 }, { "epoch": 2.5784309734152995, "grad_norm": 0.16944487392902374, "learning_rate": 2.9505729106058424e-06, "loss": 0.2835, "step": 15834 }, { "epoch": 2.5785938199731304, "grad_norm": 0.18809470534324646, "learning_rate": 2.948340784725434e-06, "loss": 0.2786, "step": 15835 }, { "epoch": 2.5787566665309614, "grad_norm": 0.1598093956708908, "learning_rate": 2.9461094505656594e-06, "loss": 0.2768, "step": 15836 }, { "epoch": 2.578919513088792, "grad_norm": 0.18647564947605133, "learning_rate": 2.9438789082066236e-06, "loss": 0.2486, "step": 15837 }, { "epoch": 2.5790823596466232, "grad_norm": 0.18774493038654327, "learning_rate": 2.941649157728424e-06, "loss": 0.2827, "step": 15838 }, { "epoch": 2.5792452062044537, "grad_norm": 0.18837504088878632, "learning_rate": 2.939420199211107e-06, "loss": 0.2817, "step": 15839 }, { "epoch": 2.5794080527622847, "grad_norm": 0.18950651586055756, "learning_rate": 2.937192032734698e-06, "loss": 0.2658, "step": 15840 }, { "epoch": 2.5795708993201156, "grad_norm": 0.13200204074382782, "learning_rate": 2.9349646583791995e-06, "loss": 0.2482, "step": 15841 }, { "epoch": 2.5797337458779466, "grad_norm": 0.16831159591674805, "learning_rate": 2.932738076224578e-06, "loss": 0.2481, "step": 15842 }, { "epoch": 2.5798965924357775, "grad_norm": 0.17025460302829742, "learning_rate": 2.9305122863507754e-06, "loss": 0.2666, "step": 15843 }, { "epoch": 2.580059438993608, "grad_norm": 0.17588412761688232, "learning_rate": 2.9282872888377e-06, "loss": 0.2576, "step": 15844 }, { "epoch": 2.5802222855514394, "grad_norm": 0.1254960596561432, "learning_rate": 2.9260630837652433e-06, "loss": 0.2287, "step": 15845 }, { "epoch": 2.58038513210927, "grad_norm": 0.17592251300811768, "learning_rate": 2.9238396712132578e-06, "loss": 0.2506, "step": 15846 }, { "epoch": 2.580547978667101, "grad_norm": 0.21726812422275543, "learning_rate": 2.921617051261563e-06, "loss": 0.2511, "step": 15847 }, { "epoch": 2.580710825224932, "grad_norm": 0.1711370199918747, "learning_rate": 2.9193952239899707e-06, "loss": 0.2731, "step": 15848 }, { "epoch": 2.5808736717827627, "grad_norm": 0.1771227866411209, "learning_rate": 2.9171741894782416e-06, "loss": 0.2499, "step": 15849 }, { "epoch": 2.5810365183405937, "grad_norm": 0.18456360697746277, "learning_rate": 2.914953947806126e-06, "loss": 0.2317, "step": 15850 }, { "epoch": 2.5811993648984246, "grad_norm": 0.13870610296726227, "learning_rate": 2.9127344990533296e-06, "loss": 0.2631, "step": 15851 }, { "epoch": 2.5813622114562556, "grad_norm": 0.1508655697107315, "learning_rate": 2.9105158432995383e-06, "loss": 0.3177, "step": 15852 }, { "epoch": 2.581525058014086, "grad_norm": 0.19564303755760193, "learning_rate": 2.9082979806244137e-06, "loss": 0.2591, "step": 15853 }, { "epoch": 2.581687904571917, "grad_norm": 0.2079589068889618, "learning_rate": 2.906080911107578e-06, "loss": 0.2568, "step": 15854 }, { "epoch": 2.581850751129748, "grad_norm": 0.15106570720672607, "learning_rate": 2.903864634828629e-06, "loss": 0.2791, "step": 15855 }, { "epoch": 2.582013597687579, "grad_norm": 0.1731211394071579, "learning_rate": 2.9016491518671474e-06, "loss": 0.2629, "step": 15856 }, { "epoch": 2.58217644424541, "grad_norm": 0.17862728238105774, "learning_rate": 2.8994344623026694e-06, "loss": 0.281, "step": 15857 }, { "epoch": 2.5823392908032408, "grad_norm": 0.13088104128837585, "learning_rate": 2.897220566214703e-06, "loss": 0.2823, "step": 15858 }, { "epoch": 2.5825021373610717, "grad_norm": 0.21917420625686646, "learning_rate": 2.8950074636827474e-06, "loss": 0.2837, "step": 15859 }, { "epoch": 2.582664983918902, "grad_norm": 0.19671551883220673, "learning_rate": 2.892795154786251e-06, "loss": 0.2995, "step": 15860 }, { "epoch": 2.582827830476733, "grad_norm": 0.15356318652629852, "learning_rate": 2.890583639604644e-06, "loss": 0.2585, "step": 15861 }, { "epoch": 2.582990677034564, "grad_norm": 0.16316942870616913, "learning_rate": 2.888372918217319e-06, "loss": 0.2648, "step": 15862 }, { "epoch": 2.583153523592395, "grad_norm": 0.17814694344997406, "learning_rate": 2.8861629907036614e-06, "loss": 0.2605, "step": 15863 }, { "epoch": 2.583316370150226, "grad_norm": 0.18890297412872314, "learning_rate": 2.8839538571430067e-06, "loss": 0.3024, "step": 15864 }, { "epoch": 2.583479216708057, "grad_norm": 0.2234632521867752, "learning_rate": 2.8817455176146703e-06, "loss": 0.2544, "step": 15865 }, { "epoch": 2.583642063265888, "grad_norm": 0.22698524594306946, "learning_rate": 2.879537972197932e-06, "loss": 0.2843, "step": 15866 }, { "epoch": 2.5838049098237184, "grad_norm": 0.1626417189836502, "learning_rate": 2.87733122097206e-06, "loss": 0.3099, "step": 15867 }, { "epoch": 2.5839677563815497, "grad_norm": 0.1957516074180603, "learning_rate": 2.875125264016279e-06, "loss": 0.2711, "step": 15868 }, { "epoch": 2.5841306029393802, "grad_norm": 0.19200029969215393, "learning_rate": 2.8729201014097844e-06, "loss": 0.277, "step": 15869 }, { "epoch": 2.584293449497211, "grad_norm": 0.22006972134113312, "learning_rate": 2.8707157332317576e-06, "loss": 0.2664, "step": 15870 }, { "epoch": 2.584456296055042, "grad_norm": 0.23045961558818817, "learning_rate": 2.8685121595613346e-06, "loss": 0.2605, "step": 15871 }, { "epoch": 2.584619142612873, "grad_norm": 0.16084496676921844, "learning_rate": 2.8663093804776357e-06, "loss": 0.2539, "step": 15872 }, { "epoch": 2.584781989170704, "grad_norm": 0.16912266612052917, "learning_rate": 2.864107396059737e-06, "loss": 0.254, "step": 15873 }, { "epoch": 2.584944835728535, "grad_norm": 0.17037025094032288, "learning_rate": 2.8619062063867107e-06, "loss": 0.2614, "step": 15874 }, { "epoch": 2.585107682286366, "grad_norm": 0.19059330224990845, "learning_rate": 2.8597058115375804e-06, "loss": 0.2474, "step": 15875 }, { "epoch": 2.5852705288441964, "grad_norm": 0.15585844218730927, "learning_rate": 2.8575062115913433e-06, "loss": 0.2885, "step": 15876 }, { "epoch": 2.5854333754020273, "grad_norm": 0.15039242804050446, "learning_rate": 2.855307406626967e-06, "loss": 0.2669, "step": 15877 }, { "epoch": 2.5855962219598583, "grad_norm": 0.22080521285533905, "learning_rate": 2.8531093967234106e-06, "loss": 0.2571, "step": 15878 }, { "epoch": 2.585759068517689, "grad_norm": 0.17451079189777374, "learning_rate": 2.850912181959578e-06, "loss": 0.2835, "step": 15879 }, { "epoch": 2.58592191507552, "grad_norm": 0.22011874616146088, "learning_rate": 2.8487157624143555e-06, "loss": 0.2546, "step": 15880 }, { "epoch": 2.586084761633351, "grad_norm": 0.18550479412078857, "learning_rate": 2.8465201381666086e-06, "loss": 0.2818, "step": 15881 }, { "epoch": 2.586247608191182, "grad_norm": 0.16288666427135468, "learning_rate": 2.844325309295162e-06, "loss": 0.2733, "step": 15882 }, { "epoch": 2.5864104547490125, "grad_norm": 0.18328383564949036, "learning_rate": 2.8421312758788178e-06, "loss": 0.2484, "step": 15883 }, { "epoch": 2.5865733013068435, "grad_norm": 0.13065040111541748, "learning_rate": 2.8399380379963423e-06, "loss": 0.2015, "step": 15884 }, { "epoch": 2.5867361478646744, "grad_norm": 0.16526775062084198, "learning_rate": 2.8377455957264903e-06, "loss": 0.2439, "step": 15885 }, { "epoch": 2.5868989944225054, "grad_norm": 0.16228090226650238, "learning_rate": 2.835553949147965e-06, "loss": 0.2546, "step": 15886 }, { "epoch": 2.5870618409803363, "grad_norm": 0.16810530424118042, "learning_rate": 2.833363098339464e-06, "loss": 0.2634, "step": 15887 }, { "epoch": 2.5872246875381673, "grad_norm": 0.16301268339157104, "learning_rate": 2.8311730433796425e-06, "loss": 0.2521, "step": 15888 }, { "epoch": 2.587387534095998, "grad_norm": 0.1732432246208191, "learning_rate": 2.8289837843471196e-06, "loss": 0.3124, "step": 15889 }, { "epoch": 2.5875503806538287, "grad_norm": 0.1931713968515396, "learning_rate": 2.8267953213205135e-06, "loss": 0.2456, "step": 15890 }, { "epoch": 2.58771322721166, "grad_norm": 0.14538778364658356, "learning_rate": 2.8246076543783832e-06, "loss": 0.2375, "step": 15891 }, { "epoch": 2.5878760737694906, "grad_norm": 0.19364438951015472, "learning_rate": 2.8224207835992804e-06, "loss": 0.2596, "step": 15892 }, { "epoch": 2.5880389203273215, "grad_norm": 0.1576528698205948, "learning_rate": 2.820234709061717e-06, "loss": 0.2384, "step": 15893 }, { "epoch": 2.5882017668851525, "grad_norm": 0.17135384678840637, "learning_rate": 2.818049430844183e-06, "loss": 0.267, "step": 15894 }, { "epoch": 2.5883646134429834, "grad_norm": 0.19665448367595673, "learning_rate": 2.8158649490251266e-06, "loss": 0.3092, "step": 15895 }, { "epoch": 2.5885274600008144, "grad_norm": 0.18435023725032806, "learning_rate": 2.813681263682988e-06, "loss": 0.2663, "step": 15896 }, { "epoch": 2.588690306558645, "grad_norm": 0.17406977713108063, "learning_rate": 2.8114983748961654e-06, "loss": 0.2692, "step": 15897 }, { "epoch": 2.5888531531164762, "grad_norm": 0.18618349730968475, "learning_rate": 2.809316282743027e-06, "loss": 0.2744, "step": 15898 }, { "epoch": 2.5890159996743067, "grad_norm": 0.1871008425951004, "learning_rate": 2.8071349873019183e-06, "loss": 0.2731, "step": 15899 }, { "epoch": 2.5891788462321377, "grad_norm": 0.17182132601737976, "learning_rate": 2.804954488651157e-06, "loss": 0.2176, "step": 15900 }, { "epoch": 2.5893416927899686, "grad_norm": 0.10700148344039917, "learning_rate": 2.8027747868690277e-06, "loss": 0.2493, "step": 15901 }, { "epoch": 2.5895045393477996, "grad_norm": 0.18831762671470642, "learning_rate": 2.800595882033785e-06, "loss": 0.2655, "step": 15902 }, { "epoch": 2.5896673859056305, "grad_norm": 0.1876138150691986, "learning_rate": 2.7984177742236663e-06, "loss": 0.2684, "step": 15903 }, { "epoch": 2.5898302324634614, "grad_norm": 0.19370479881763458, "learning_rate": 2.7962404635168666e-06, "loss": 0.2851, "step": 15904 }, { "epoch": 2.5899930790212924, "grad_norm": 0.25645554065704346, "learning_rate": 2.794063949991557e-06, "loss": 0.2669, "step": 15905 }, { "epoch": 2.590155925579123, "grad_norm": 0.190135657787323, "learning_rate": 2.7918882337258783e-06, "loss": 0.2653, "step": 15906 }, { "epoch": 2.590318772136954, "grad_norm": 0.1750655472278595, "learning_rate": 2.7897133147979567e-06, "loss": 0.2683, "step": 15907 }, { "epoch": 2.5904816186947848, "grad_norm": 0.18342959880828857, "learning_rate": 2.787539193285868e-06, "loss": 0.2928, "step": 15908 }, { "epoch": 2.5906444652526157, "grad_norm": 0.18751807510852814, "learning_rate": 2.785365869267667e-06, "loss": 0.2654, "step": 15909 }, { "epoch": 2.5908073118104467, "grad_norm": 0.2073032706975937, "learning_rate": 2.7831933428213967e-06, "loss": 0.2624, "step": 15910 }, { "epoch": 2.5909701583682776, "grad_norm": 0.3872077167034149, "learning_rate": 2.781021614025045e-06, "loss": 0.2901, "step": 15911 }, { "epoch": 2.5911330049261085, "grad_norm": 0.14515277743339539, "learning_rate": 2.778850682956588e-06, "loss": 0.3022, "step": 15912 }, { "epoch": 2.591295851483939, "grad_norm": 0.1354261040687561, "learning_rate": 2.7766805496939636e-06, "loss": 0.2434, "step": 15913 }, { "epoch": 2.59145869804177, "grad_norm": 0.18169720470905304, "learning_rate": 2.7745112143150955e-06, "loss": 0.2594, "step": 15914 }, { "epoch": 2.591621544599601, "grad_norm": 0.17314749956130981, "learning_rate": 2.7723426768978654e-06, "loss": 0.2367, "step": 15915 }, { "epoch": 2.591784391157432, "grad_norm": 0.1810978651046753, "learning_rate": 2.770174937520126e-06, "loss": 0.2792, "step": 15916 }, { "epoch": 2.591947237715263, "grad_norm": 0.23423027992248535, "learning_rate": 2.7680079962597056e-06, "loss": 0.2798, "step": 15917 }, { "epoch": 2.5921100842730938, "grad_norm": 0.12776248157024384, "learning_rate": 2.765841853194412e-06, "loss": 0.2465, "step": 15918 }, { "epoch": 2.5922729308309247, "grad_norm": 0.16347239911556244, "learning_rate": 2.7636765084020105e-06, "loss": 0.2513, "step": 15919 }, { "epoch": 2.592435777388755, "grad_norm": 0.17737632989883423, "learning_rate": 2.761511961960239e-06, "loss": 0.275, "step": 15920 }, { "epoch": 2.5925986239465866, "grad_norm": 0.13291341066360474, "learning_rate": 2.7593482139468218e-06, "loss": 0.3014, "step": 15921 }, { "epoch": 2.592761470504417, "grad_norm": 0.18075329065322876, "learning_rate": 2.757185264439438e-06, "loss": 0.2641, "step": 15922 }, { "epoch": 2.592924317062248, "grad_norm": 0.20277631282806396, "learning_rate": 2.7550231135157425e-06, "loss": 0.3017, "step": 15923 }, { "epoch": 2.593087163620079, "grad_norm": 0.17755678296089172, "learning_rate": 2.752861761253367e-06, "loss": 0.2891, "step": 15924 }, { "epoch": 2.59325001017791, "grad_norm": 0.16840006411075592, "learning_rate": 2.7507012077299082e-06, "loss": 0.2588, "step": 15925 }, { "epoch": 2.593412856735741, "grad_norm": 0.20278948545455933, "learning_rate": 2.748541453022932e-06, "loss": 0.2431, "step": 15926 }, { "epoch": 2.593575703293572, "grad_norm": 0.1675555557012558, "learning_rate": 2.74638249720999e-06, "loss": 0.2716, "step": 15927 }, { "epoch": 2.5937385498514027, "grad_norm": 0.14842143654823303, "learning_rate": 2.744224340368584e-06, "loss": 0.2812, "step": 15928 }, { "epoch": 2.5939013964092332, "grad_norm": 0.18438035249710083, "learning_rate": 2.742066982576211e-06, "loss": 0.2812, "step": 15929 }, { "epoch": 2.594064242967064, "grad_norm": 0.1607995480298996, "learning_rate": 2.739910423910319e-06, "loss": 0.2886, "step": 15930 }, { "epoch": 2.594227089524895, "grad_norm": 0.17356076836585999, "learning_rate": 2.7377546644483305e-06, "loss": 0.2547, "step": 15931 }, { "epoch": 2.594389936082726, "grad_norm": 0.19146807491779327, "learning_rate": 2.735599704267655e-06, "loss": 0.2992, "step": 15932 }, { "epoch": 2.594552782640557, "grad_norm": 0.16322441399097443, "learning_rate": 2.7334455434456563e-06, "loss": 0.2839, "step": 15933 }, { "epoch": 2.594715629198388, "grad_norm": 0.17434340715408325, "learning_rate": 2.731292182059675e-06, "loss": 0.2725, "step": 15934 }, { "epoch": 2.594878475756219, "grad_norm": 0.18170244991779327, "learning_rate": 2.7291396201870155e-06, "loss": 0.2837, "step": 15935 }, { "epoch": 2.5950413223140494, "grad_norm": 0.20315515995025635, "learning_rate": 2.726987857904978e-06, "loss": 0.2632, "step": 15936 }, { "epoch": 2.5952041688718803, "grad_norm": 0.18200843036174774, "learning_rate": 2.7248368952908053e-06, "loss": 0.269, "step": 15937 }, { "epoch": 2.5953670154297113, "grad_norm": 0.178934246301651, "learning_rate": 2.7226867324217254e-06, "loss": 0.2311, "step": 15938 }, { "epoch": 2.595529861987542, "grad_norm": 0.17279528081417084, "learning_rate": 2.720537369374934e-06, "loss": 0.2993, "step": 15939 }, { "epoch": 2.595692708545373, "grad_norm": 0.1630067378282547, "learning_rate": 2.7183888062276066e-06, "loss": 0.2743, "step": 15940 }, { "epoch": 2.595855555103204, "grad_norm": 0.23840855062007904, "learning_rate": 2.7162410430568773e-06, "loss": 0.2554, "step": 15941 }, { "epoch": 2.596018401661035, "grad_norm": 0.1802629977464676, "learning_rate": 2.7140940799398553e-06, "loss": 0.2838, "step": 15942 }, { "epoch": 2.5961812482188655, "grad_norm": 0.17273928225040436, "learning_rate": 2.711947916953628e-06, "loss": 0.2526, "step": 15943 }, { "epoch": 2.596344094776697, "grad_norm": 0.13911846280097961, "learning_rate": 2.7098025541752503e-06, "loss": 0.2531, "step": 15944 }, { "epoch": 2.5965069413345274, "grad_norm": 0.15200312435626984, "learning_rate": 2.7076579916817418e-06, "loss": 0.342, "step": 15945 }, { "epoch": 2.5966697878923584, "grad_norm": 0.22429445385932922, "learning_rate": 2.7055142295500992e-06, "loss": 0.3131, "step": 15946 }, { "epoch": 2.5968326344501893, "grad_norm": 0.17167069017887115, "learning_rate": 2.7033712678572938e-06, "loss": 0.2747, "step": 15947 }, { "epoch": 2.5969954810080202, "grad_norm": 0.23566806316375732, "learning_rate": 2.701229106680264e-06, "loss": 0.2578, "step": 15948 }, { "epoch": 2.597158327565851, "grad_norm": 0.17108604311943054, "learning_rate": 2.699087746095916e-06, "loss": 0.2595, "step": 15949 }, { "epoch": 2.5973211741236817, "grad_norm": 0.1697460263967514, "learning_rate": 2.6969471861811315e-06, "loss": 0.2432, "step": 15950 }, { "epoch": 2.597484020681513, "grad_norm": 0.1585492640733719, "learning_rate": 2.694807427012769e-06, "loss": 0.2548, "step": 15951 }, { "epoch": 2.5976468672393436, "grad_norm": 0.36726462841033936, "learning_rate": 2.6926684686676445e-06, "loss": 0.2963, "step": 15952 }, { "epoch": 2.5978097137971745, "grad_norm": 0.16276822984218597, "learning_rate": 2.6905303112225556e-06, "loss": 0.2498, "step": 15953 }, { "epoch": 2.5979725603550055, "grad_norm": 0.18016545474529266, "learning_rate": 2.6883929547542735e-06, "loss": 0.2604, "step": 15954 }, { "epoch": 2.5981354069128364, "grad_norm": 0.20599676668643951, "learning_rate": 2.686256399339529e-06, "loss": 0.262, "step": 15955 }, { "epoch": 2.5982982534706673, "grad_norm": 0.1348457932472229, "learning_rate": 2.684120645055038e-06, "loss": 0.2905, "step": 15956 }, { "epoch": 2.5984611000284983, "grad_norm": 0.18237924575805664, "learning_rate": 2.6819856919774677e-06, "loss": 0.2935, "step": 15957 }, { "epoch": 2.5986239465863292, "grad_norm": 0.15214824676513672, "learning_rate": 2.6798515401834838e-06, "loss": 0.2947, "step": 15958 }, { "epoch": 2.5987867931441597, "grad_norm": 0.14568880200386047, "learning_rate": 2.6777181897497034e-06, "loss": 0.2588, "step": 15959 }, { "epoch": 2.5989496397019907, "grad_norm": 0.13430550694465637, "learning_rate": 2.675585640752712e-06, "loss": 0.2577, "step": 15960 }, { "epoch": 2.5991124862598216, "grad_norm": 0.14848357439041138, "learning_rate": 2.6734538932690874e-06, "loss": 0.3067, "step": 15961 }, { "epoch": 2.5992753328176526, "grad_norm": 0.18026623129844666, "learning_rate": 2.6713229473753627e-06, "loss": 0.2688, "step": 15962 }, { "epoch": 2.5994381793754835, "grad_norm": 0.15789151191711426, "learning_rate": 2.669192803148035e-06, "loss": 0.2269, "step": 15963 }, { "epoch": 2.5996010259333144, "grad_norm": 0.17518600821495056, "learning_rate": 2.6670634606635964e-06, "loss": 0.2502, "step": 15964 }, { "epoch": 2.5997638724911454, "grad_norm": 0.16444890201091766, "learning_rate": 2.6649349199984907e-06, "loss": 0.242, "step": 15965 }, { "epoch": 2.599926719048976, "grad_norm": 0.17787235975265503, "learning_rate": 2.662807181229135e-06, "loss": 0.2445, "step": 15966 }, { "epoch": 2.6000895656068073, "grad_norm": 0.15512238442897797, "learning_rate": 2.660680244431929e-06, "loss": 0.2972, "step": 15967 }, { "epoch": 2.6002524121646378, "grad_norm": 0.3885771930217743, "learning_rate": 2.6585541096832283e-06, "loss": 0.281, "step": 15968 }, { "epoch": 2.6004152587224687, "grad_norm": 0.18578998744487762, "learning_rate": 2.6564287770593776e-06, "loss": 0.2162, "step": 15969 }, { "epoch": 2.6005781052802996, "grad_norm": 0.15710538625717163, "learning_rate": 2.654304246636674e-06, "loss": 0.24, "step": 15970 }, { "epoch": 2.6007409518381306, "grad_norm": 0.187406525015831, "learning_rate": 2.6521805184913956e-06, "loss": 0.2656, "step": 15971 }, { "epoch": 2.6009037983959615, "grad_norm": 0.16353677213191986, "learning_rate": 2.6500575926997976e-06, "loss": 0.245, "step": 15972 }, { "epoch": 2.601066644953792, "grad_norm": 0.1713666021823883, "learning_rate": 2.647935469338092e-06, "loss": 0.2772, "step": 15973 }, { "epoch": 2.6012294915116234, "grad_norm": 0.18517036736011505, "learning_rate": 2.6458141484824727e-06, "loss": 0.2768, "step": 15974 }, { "epoch": 2.601392338069454, "grad_norm": 0.16389428079128265, "learning_rate": 2.6436936302090958e-06, "loss": 0.2719, "step": 15975 }, { "epoch": 2.601555184627285, "grad_norm": 0.17294661700725555, "learning_rate": 2.6415739145941033e-06, "loss": 0.2727, "step": 15976 }, { "epoch": 2.601718031185116, "grad_norm": 0.17824901640415192, "learning_rate": 2.6394550017135948e-06, "loss": 0.2654, "step": 15977 }, { "epoch": 2.6018808777429467, "grad_norm": 0.19557981193065643, "learning_rate": 2.6373368916436466e-06, "loss": 0.2794, "step": 15978 }, { "epoch": 2.6020437243007777, "grad_norm": 0.18531562387943268, "learning_rate": 2.635219584460297e-06, "loss": 0.2595, "step": 15979 }, { "epoch": 2.6022065708586086, "grad_norm": 0.20375066995620728, "learning_rate": 2.6331030802395794e-06, "loss": 0.2937, "step": 15980 }, { "epoch": 2.6023694174164396, "grad_norm": 0.16223277151584625, "learning_rate": 2.6309873790574703e-06, "loss": 0.2733, "step": 15981 }, { "epoch": 2.60253226397427, "grad_norm": 0.1487642079591751, "learning_rate": 2.628872480989933e-06, "loss": 0.2834, "step": 15982 }, { "epoch": 2.602695110532101, "grad_norm": 0.20117077231407166, "learning_rate": 2.6267583861129015e-06, "loss": 0.2661, "step": 15983 }, { "epoch": 2.602857957089932, "grad_norm": 0.11393435299396515, "learning_rate": 2.624645094502276e-06, "loss": 0.2486, "step": 15984 }, { "epoch": 2.603020803647763, "grad_norm": 0.18955539166927338, "learning_rate": 2.6225326062339295e-06, "loss": 0.2472, "step": 15985 }, { "epoch": 2.603183650205594, "grad_norm": 0.16760437190532684, "learning_rate": 2.6204209213837037e-06, "loss": 0.2518, "step": 15986 }, { "epoch": 2.6033464967634248, "grad_norm": 0.16873808205127716, "learning_rate": 2.618310040027422e-06, "loss": 0.2171, "step": 15987 }, { "epoch": 2.6035093433212557, "grad_norm": 0.17459413409233093, "learning_rate": 2.6161999622408675e-06, "loss": 0.2818, "step": 15988 }, { "epoch": 2.603672189879086, "grad_norm": 0.13404801487922668, "learning_rate": 2.614090688099799e-06, "loss": 0.2911, "step": 15989 }, { "epoch": 2.603835036436917, "grad_norm": 0.17275594174861908, "learning_rate": 2.611982217679937e-06, "loss": 0.2607, "step": 15990 }, { "epoch": 2.603997882994748, "grad_norm": 0.1883687973022461, "learning_rate": 2.6098745510569984e-06, "loss": 0.2627, "step": 15991 }, { "epoch": 2.604160729552579, "grad_norm": 0.1637783795595169, "learning_rate": 2.607767688306642e-06, "loss": 0.2551, "step": 15992 }, { "epoch": 2.60432357611041, "grad_norm": 0.19149309396743774, "learning_rate": 2.6056616295045135e-06, "loss": 0.2782, "step": 15993 }, { "epoch": 2.604486422668241, "grad_norm": 0.17754563689231873, "learning_rate": 2.6035563747262294e-06, "loss": 0.2619, "step": 15994 }, { "epoch": 2.604649269226072, "grad_norm": 0.21049651503562927, "learning_rate": 2.601451924047374e-06, "loss": 0.2849, "step": 15995 }, { "epoch": 2.6048121157839024, "grad_norm": 0.2074601799249649, "learning_rate": 2.5993482775435037e-06, "loss": 0.248, "step": 15996 }, { "epoch": 2.6049749623417338, "grad_norm": 0.1593116968870163, "learning_rate": 2.597245435290138e-06, "loss": 0.2541, "step": 15997 }, { "epoch": 2.6051378088995643, "grad_norm": 0.12502485513687134, "learning_rate": 2.595143397362787e-06, "loss": 0.2626, "step": 15998 }, { "epoch": 2.605300655457395, "grad_norm": 0.1959506720304489, "learning_rate": 2.5930421638369144e-06, "loss": 0.243, "step": 15999 }, { "epoch": 2.605463502015226, "grad_norm": 0.17788174748420715, "learning_rate": 2.590941734787958e-06, "loss": 0.247, "step": 16000 }, { "epoch": 2.605626348573057, "grad_norm": 0.225473091006279, "learning_rate": 2.588842110291334e-06, "loss": 0.2628, "step": 16001 }, { "epoch": 2.605789195130888, "grad_norm": 0.19973012804985046, "learning_rate": 2.5867432904224275e-06, "loss": 0.2392, "step": 16002 }, { "epoch": 2.605952041688719, "grad_norm": 0.14512762427330017, "learning_rate": 2.584645275256581e-06, "loss": 0.2447, "step": 16003 }, { "epoch": 2.60611488824655, "grad_norm": 0.19267956912517548, "learning_rate": 2.5825480648691334e-06, "loss": 0.2887, "step": 16004 }, { "epoch": 2.6062777348043804, "grad_norm": 0.17258328199386597, "learning_rate": 2.58045165933537e-06, "loss": 0.2722, "step": 16005 }, { "epoch": 2.6064405813622114, "grad_norm": 0.9304295182228088, "learning_rate": 2.5783560587305687e-06, "loss": 0.3208, "step": 16006 }, { "epoch": 2.6066034279200423, "grad_norm": 0.17427121102809906, "learning_rate": 2.5762612631299615e-06, "loss": 0.288, "step": 16007 }, { "epoch": 2.6067662744778732, "grad_norm": 0.165268674492836, "learning_rate": 2.5741672726087517e-06, "loss": 0.2354, "step": 16008 }, { "epoch": 2.606929121035704, "grad_norm": 0.21445228159427643, "learning_rate": 2.5720740872421324e-06, "loss": 0.2865, "step": 16009 }, { "epoch": 2.607091967593535, "grad_norm": 0.15609613060951233, "learning_rate": 2.5699817071052483e-06, "loss": 0.2243, "step": 16010 }, { "epoch": 2.607254814151366, "grad_norm": 0.17098358273506165, "learning_rate": 2.567890132273221e-06, "loss": 0.249, "step": 16011 }, { "epoch": 2.6074176607091966, "grad_norm": 0.20391051471233368, "learning_rate": 2.5657993628211473e-06, "loss": 0.3107, "step": 16012 }, { "epoch": 2.6075805072670275, "grad_norm": 0.15724484622478485, "learning_rate": 2.5637093988240933e-06, "loss": 0.267, "step": 16013 }, { "epoch": 2.6077433538248584, "grad_norm": 0.1948387175798416, "learning_rate": 2.5616202403570933e-06, "loss": 0.2452, "step": 16014 }, { "epoch": 2.6079062003826894, "grad_norm": 0.16182337701320648, "learning_rate": 2.559531887495148e-06, "loss": 0.2698, "step": 16015 }, { "epoch": 2.6080690469405203, "grad_norm": 0.19162720441818237, "learning_rate": 2.5574443403132474e-06, "loss": 0.2912, "step": 16016 }, { "epoch": 2.6082318934983513, "grad_norm": 0.18432122468948364, "learning_rate": 2.5553575988863347e-06, "loss": 0.2686, "step": 16017 }, { "epoch": 2.608394740056182, "grad_norm": 0.124763123691082, "learning_rate": 2.553271663289328e-06, "loss": 0.2714, "step": 16018 }, { "epoch": 2.6085575866140127, "grad_norm": 0.1994353085756302, "learning_rate": 2.5511865335971163e-06, "loss": 0.2637, "step": 16019 }, { "epoch": 2.608720433171844, "grad_norm": 0.18832539021968842, "learning_rate": 2.549102209884574e-06, "loss": 0.2789, "step": 16020 }, { "epoch": 2.6088832797296746, "grad_norm": 0.16061466932296753, "learning_rate": 2.5470186922265245e-06, "loss": 0.24, "step": 16021 }, { "epoch": 2.6090461262875055, "grad_norm": 0.16881462931632996, "learning_rate": 2.5449359806977714e-06, "loss": 0.275, "step": 16022 }, { "epoch": 2.6092089728453365, "grad_norm": 0.13868308067321777, "learning_rate": 2.5428540753730966e-06, "loss": 0.2613, "step": 16023 }, { "epoch": 2.6093718194031674, "grad_norm": 0.149278461933136, "learning_rate": 2.5407729763272466e-06, "loss": 0.2688, "step": 16024 }, { "epoch": 2.6095346659609984, "grad_norm": 0.19079343974590302, "learning_rate": 2.5386926836349357e-06, "loss": 0.2629, "step": 16025 }, { "epoch": 2.609697512518829, "grad_norm": 0.14909245073795319, "learning_rate": 2.536613197370846e-06, "loss": 0.245, "step": 16026 }, { "epoch": 2.6098603590766603, "grad_norm": 0.14931613206863403, "learning_rate": 2.5345345176096518e-06, "loss": 0.2773, "step": 16027 }, { "epoch": 2.6100232056344908, "grad_norm": 0.17065922915935516, "learning_rate": 2.532456644425976e-06, "loss": 0.2503, "step": 16028 }, { "epoch": 2.6101860521923217, "grad_norm": 0.16944105923175812, "learning_rate": 2.5303795778944235e-06, "loss": 0.2591, "step": 16029 }, { "epoch": 2.6103488987501526, "grad_norm": 0.1529504507780075, "learning_rate": 2.528303318089556e-06, "loss": 0.2394, "step": 16030 }, { "epoch": 2.6105117453079836, "grad_norm": 0.22123317420482635, "learning_rate": 2.526227865085934e-06, "loss": 0.2816, "step": 16031 }, { "epoch": 2.6106745918658145, "grad_norm": 0.20040203630924225, "learning_rate": 2.524153218958064e-06, "loss": 0.2834, "step": 16032 }, { "epoch": 2.6108374384236455, "grad_norm": 0.19249778985977173, "learning_rate": 2.522079379780429e-06, "loss": 0.2492, "step": 16033 }, { "epoch": 2.6110002849814764, "grad_norm": 0.16975857317447662, "learning_rate": 2.5200063476274933e-06, "loss": 0.2496, "step": 16034 }, { "epoch": 2.611163131539307, "grad_norm": 0.17235495150089264, "learning_rate": 2.5179341225736814e-06, "loss": 0.2188, "step": 16035 }, { "epoch": 2.611325978097138, "grad_norm": 0.15190786123275757, "learning_rate": 2.515862704693395e-06, "loss": 0.2554, "step": 16036 }, { "epoch": 2.611488824654969, "grad_norm": 0.21647948026657104, "learning_rate": 2.5137920940609935e-06, "loss": 0.273, "step": 16037 }, { "epoch": 2.6116516712127997, "grad_norm": 0.17526116967201233, "learning_rate": 2.5117222907508343e-06, "loss": 0.234, "step": 16038 }, { "epoch": 2.6118145177706307, "grad_norm": 0.23577550053596497, "learning_rate": 2.5096532948372193e-06, "loss": 0.249, "step": 16039 }, { "epoch": 2.6119773643284616, "grad_norm": 0.20172518491744995, "learning_rate": 2.5075851063944305e-06, "loss": 0.2554, "step": 16040 }, { "epoch": 2.6121402108862926, "grad_norm": 0.17202280461788177, "learning_rate": 2.5055177254967283e-06, "loss": 0.2647, "step": 16041 }, { "epoch": 2.612303057444123, "grad_norm": 0.16645529866218567, "learning_rate": 2.5034511522183334e-06, "loss": 0.2751, "step": 16042 }, { "epoch": 2.612465904001954, "grad_norm": 0.14405161142349243, "learning_rate": 2.501385386633445e-06, "loss": 0.2802, "step": 16043 }, { "epoch": 2.612628750559785, "grad_norm": 0.23526911437511444, "learning_rate": 2.499320428816232e-06, "loss": 0.2559, "step": 16044 }, { "epoch": 2.612791597117616, "grad_norm": 0.17249765992164612, "learning_rate": 2.497256278840823e-06, "loss": 0.2705, "step": 16045 }, { "epoch": 2.612954443675447, "grad_norm": 0.1421874612569809, "learning_rate": 2.4951929367813375e-06, "loss": 0.2823, "step": 16046 }, { "epoch": 2.6131172902332778, "grad_norm": 0.16472403705120087, "learning_rate": 2.493130402711852e-06, "loss": 0.235, "step": 16047 }, { "epoch": 2.6132801367911087, "grad_norm": 0.17589807510375977, "learning_rate": 2.491068676706415e-06, "loss": 0.2407, "step": 16048 }, { "epoch": 2.613442983348939, "grad_norm": 0.1635272055864334, "learning_rate": 2.489007758839054e-06, "loss": 0.2789, "step": 16049 }, { "epoch": 2.6136058299067706, "grad_norm": 0.16493578255176544, "learning_rate": 2.48694764918376e-06, "loss": 0.256, "step": 16050 }, { "epoch": 2.613768676464601, "grad_norm": 0.1493883728981018, "learning_rate": 2.4848883478144958e-06, "loss": 0.2855, "step": 16051 }, { "epoch": 2.613931523022432, "grad_norm": 0.19632422924041748, "learning_rate": 2.482829854805191e-06, "loss": 0.2839, "step": 16052 }, { "epoch": 2.614094369580263, "grad_norm": 0.17905175685882568, "learning_rate": 2.480772170229764e-06, "loss": 0.2491, "step": 16053 }, { "epoch": 2.614257216138094, "grad_norm": 0.20475323498249054, "learning_rate": 2.4787152941620843e-06, "loss": 0.2853, "step": 16054 }, { "epoch": 2.614420062695925, "grad_norm": 0.193293035030365, "learning_rate": 2.476659226675998e-06, "loss": 0.2589, "step": 16055 }, { "epoch": 2.614582909253756, "grad_norm": 0.17633551359176636, "learning_rate": 2.474603967845332e-06, "loss": 0.267, "step": 16056 }, { "epoch": 2.6147457558115867, "grad_norm": 0.14352469146251678, "learning_rate": 2.472549517743869e-06, "loss": 0.2683, "step": 16057 }, { "epoch": 2.6149086023694172, "grad_norm": 0.1906311959028244, "learning_rate": 2.470495876445375e-06, "loss": 0.2753, "step": 16058 }, { "epoch": 2.615071448927248, "grad_norm": 0.15636619925498962, "learning_rate": 2.468443044023572e-06, "loss": 0.2906, "step": 16059 }, { "epoch": 2.615234295485079, "grad_norm": 0.15639272332191467, "learning_rate": 2.4663910205521776e-06, "loss": 0.2809, "step": 16060 }, { "epoch": 2.61539714204291, "grad_norm": 0.16258962452411652, "learning_rate": 2.464339806104857e-06, "loss": 0.2869, "step": 16061 }, { "epoch": 2.615559988600741, "grad_norm": 0.16127784550189972, "learning_rate": 2.4622894007552495e-06, "loss": 0.2423, "step": 16062 }, { "epoch": 2.615722835158572, "grad_norm": 0.2034662365913391, "learning_rate": 2.460239804576983e-06, "loss": 0.2807, "step": 16063 }, { "epoch": 2.615885681716403, "grad_norm": 0.17832060158252716, "learning_rate": 2.458191017643638e-06, "loss": 0.3216, "step": 16064 }, { "epoch": 2.6160485282742334, "grad_norm": 0.18783769011497498, "learning_rate": 2.4561430400287745e-06, "loss": 0.277, "step": 16065 }, { "epoch": 2.6162113748320643, "grad_norm": 0.1694481372833252, "learning_rate": 2.4540958718059145e-06, "loss": 0.2537, "step": 16066 }, { "epoch": 2.6163742213898953, "grad_norm": 0.1803048700094223, "learning_rate": 2.4520495130485654e-06, "loss": 0.2572, "step": 16067 }, { "epoch": 2.6165370679477262, "grad_norm": 0.15986883640289307, "learning_rate": 2.4500039638301943e-06, "loss": 0.2482, "step": 16068 }, { "epoch": 2.616699914505557, "grad_norm": 0.17942337691783905, "learning_rate": 2.447959224224242e-06, "loss": 0.27, "step": 16069 }, { "epoch": 2.616862761063388, "grad_norm": 0.2038082629442215, "learning_rate": 2.4459152943041157e-06, "loss": 0.2808, "step": 16070 }, { "epoch": 2.617025607621219, "grad_norm": 0.1976698935031891, "learning_rate": 2.443872174143211e-06, "loss": 0.2853, "step": 16071 }, { "epoch": 2.6171884541790496, "grad_norm": 0.1906740814447403, "learning_rate": 2.4418298638148733e-06, "loss": 0.277, "step": 16072 }, { "epoch": 2.617351300736881, "grad_norm": 0.1609717607498169, "learning_rate": 2.439788363392426e-06, "loss": 0.2538, "step": 16073 }, { "epoch": 2.6175141472947114, "grad_norm": 0.1836545467376709, "learning_rate": 2.4377476729491725e-06, "loss": 0.2533, "step": 16074 }, { "epoch": 2.6176769938525424, "grad_norm": 0.1723761260509491, "learning_rate": 2.4357077925583782e-06, "loss": 0.2915, "step": 16075 }, { "epoch": 2.6178398404103733, "grad_norm": 0.2651531398296356, "learning_rate": 2.433668722293278e-06, "loss": 0.267, "step": 16076 }, { "epoch": 2.6180026869682043, "grad_norm": 0.16404792666435242, "learning_rate": 2.4316304622270746e-06, "loss": 0.2431, "step": 16077 }, { "epoch": 2.618165533526035, "grad_norm": 0.16776736080646515, "learning_rate": 2.4295930124329597e-06, "loss": 0.2542, "step": 16078 }, { "epoch": 2.6183283800838657, "grad_norm": 0.15765081346035004, "learning_rate": 2.4275563729840744e-06, "loss": 0.2476, "step": 16079 }, { "epoch": 2.618491226641697, "grad_norm": 0.18299025297164917, "learning_rate": 2.4255205439535465e-06, "loss": 0.2461, "step": 16080 }, { "epoch": 2.6186540731995276, "grad_norm": 0.1667768508195877, "learning_rate": 2.4234855254144707e-06, "loss": 0.225, "step": 16081 }, { "epoch": 2.6188169197573585, "grad_norm": 0.1791858673095703, "learning_rate": 2.4214513174398963e-06, "loss": 0.2616, "step": 16082 }, { "epoch": 2.6189797663151895, "grad_norm": 0.16851124167442322, "learning_rate": 2.4194179201028733e-06, "loss": 0.2506, "step": 16083 }, { "epoch": 2.6191426128730204, "grad_norm": 0.15913522243499756, "learning_rate": 2.4173853334763993e-06, "loss": 0.265, "step": 16084 }, { "epoch": 2.6193054594308514, "grad_norm": 0.17713075876235962, "learning_rate": 2.4153535576334487e-06, "loss": 0.2392, "step": 16085 }, { "epoch": 2.6194683059886823, "grad_norm": 0.15851648151874542, "learning_rate": 2.4133225926469744e-06, "loss": 0.2393, "step": 16086 }, { "epoch": 2.6196311525465132, "grad_norm": 0.17124566435813904, "learning_rate": 2.4112924385898877e-06, "loss": 0.2835, "step": 16087 }, { "epoch": 2.6197939991043437, "grad_norm": 0.19397111237049103, "learning_rate": 2.4092630955350805e-06, "loss": 0.2602, "step": 16088 }, { "epoch": 2.6199568456621747, "grad_norm": 0.2046697586774826, "learning_rate": 2.407234563555413e-06, "loss": 0.224, "step": 16089 }, { "epoch": 2.6201196922200056, "grad_norm": 0.20259007811546326, "learning_rate": 2.4052068427237146e-06, "loss": 0.2221, "step": 16090 }, { "epoch": 2.6202825387778366, "grad_norm": 0.21366487443447113, "learning_rate": 2.4031799331127867e-06, "loss": 0.3041, "step": 16091 }, { "epoch": 2.6204453853356675, "grad_norm": 0.21904751658439636, "learning_rate": 2.401153834795397e-06, "loss": 0.2909, "step": 16092 }, { "epoch": 2.6206082318934985, "grad_norm": 0.2030714750289917, "learning_rate": 2.3991285478442953e-06, "loss": 0.2765, "step": 16093 }, { "epoch": 2.6207710784513294, "grad_norm": 0.19125355780124664, "learning_rate": 2.397104072332196e-06, "loss": 0.2911, "step": 16094 }, { "epoch": 2.62093392500916, "grad_norm": 0.16760745644569397, "learning_rate": 2.3950804083317733e-06, "loss": 0.3122, "step": 16095 }, { "epoch": 2.6210967715669913, "grad_norm": 0.17630141973495483, "learning_rate": 2.3930575559156954e-06, "loss": 0.2487, "step": 16096 }, { "epoch": 2.6212596181248218, "grad_norm": 0.15373191237449646, "learning_rate": 2.391035515156584e-06, "loss": 0.2805, "step": 16097 }, { "epoch": 2.6214224646826527, "grad_norm": 0.19752255082130432, "learning_rate": 2.3890142861270366e-06, "loss": 0.3085, "step": 16098 }, { "epoch": 2.6215853112404837, "grad_norm": 0.17518745362758636, "learning_rate": 2.3869938688996146e-06, "loss": 0.2639, "step": 16099 }, { "epoch": 2.6217481577983146, "grad_norm": 0.1518184393644333, "learning_rate": 2.3849742635468686e-06, "loss": 0.2687, "step": 16100 }, { "epoch": 2.6219110043561455, "grad_norm": 0.14455191791057587, "learning_rate": 2.3829554701413015e-06, "loss": 0.2599, "step": 16101 }, { "epoch": 2.622073850913976, "grad_norm": 0.20454055070877075, "learning_rate": 2.3809374887553916e-06, "loss": 0.2745, "step": 16102 }, { "epoch": 2.6222366974718074, "grad_norm": 0.1701347827911377, "learning_rate": 2.3789203194616e-06, "loss": 0.2506, "step": 16103 }, { "epoch": 2.622399544029638, "grad_norm": 0.18429458141326904, "learning_rate": 2.3769039623323412e-06, "loss": 0.2466, "step": 16104 }, { "epoch": 2.622562390587469, "grad_norm": 0.17152202129364014, "learning_rate": 2.3748884174400134e-06, "loss": 0.2508, "step": 16105 }, { "epoch": 2.6227252371453, "grad_norm": 0.1970241516828537, "learning_rate": 2.3728736848569716e-06, "loss": 0.2659, "step": 16106 }, { "epoch": 2.6228880837031308, "grad_norm": 0.18905311822891235, "learning_rate": 2.3708597646555643e-06, "loss": 0.2585, "step": 16107 }, { "epoch": 2.6230509302609617, "grad_norm": 0.16907146573066711, "learning_rate": 2.3688466569080857e-06, "loss": 0.2774, "step": 16108 }, { "epoch": 2.6232137768187926, "grad_norm": 0.21930204331874847, "learning_rate": 2.3668343616868204e-06, "loss": 0.2755, "step": 16109 }, { "epoch": 2.6233766233766236, "grad_norm": 0.16079533100128174, "learning_rate": 2.3648228790640076e-06, "loss": 0.2782, "step": 16110 }, { "epoch": 2.623539469934454, "grad_norm": 0.1637360006570816, "learning_rate": 2.3628122091118725e-06, "loss": 0.2792, "step": 16111 }, { "epoch": 2.623702316492285, "grad_norm": 0.15860503911972046, "learning_rate": 2.360802351902605e-06, "loss": 0.2724, "step": 16112 }, { "epoch": 2.623865163050116, "grad_norm": 0.17213033139705658, "learning_rate": 2.358793307508356e-06, "loss": 0.2569, "step": 16113 }, { "epoch": 2.624028009607947, "grad_norm": 0.18228915333747864, "learning_rate": 2.356785076001267e-06, "loss": 0.2488, "step": 16114 }, { "epoch": 2.624190856165778, "grad_norm": 0.19025298953056335, "learning_rate": 2.354777657453433e-06, "loss": 0.2641, "step": 16115 }, { "epoch": 2.624353702723609, "grad_norm": 0.2235611528158188, "learning_rate": 2.352771051936928e-06, "loss": 0.249, "step": 16116 }, { "epoch": 2.6245165492814397, "grad_norm": 0.18474070727825165, "learning_rate": 2.350765259523793e-06, "loss": 0.256, "step": 16117 }, { "epoch": 2.6246793958392702, "grad_norm": 0.19214250147342682, "learning_rate": 2.348760280286047e-06, "loss": 0.2447, "step": 16118 }, { "epoch": 2.624842242397101, "grad_norm": 0.1536267101764679, "learning_rate": 2.3467561142956674e-06, "loss": 0.2507, "step": 16119 }, { "epoch": 2.625005088954932, "grad_norm": 0.14961984753608704, "learning_rate": 2.344752761624619e-06, "loss": 0.2674, "step": 16120 }, { "epoch": 2.625167935512763, "grad_norm": 0.17426232993602753, "learning_rate": 2.342750222344825e-06, "loss": 0.237, "step": 16121 }, { "epoch": 2.625330782070594, "grad_norm": 0.22009973227977753, "learning_rate": 2.340748496528172e-06, "loss": 0.2814, "step": 16122 }, { "epoch": 2.625493628628425, "grad_norm": 0.19911760091781616, "learning_rate": 2.338747584246545e-06, "loss": 0.2853, "step": 16123 }, { "epoch": 2.625656475186256, "grad_norm": 0.22655746340751648, "learning_rate": 2.336747485571769e-06, "loss": 0.2692, "step": 16124 }, { "epoch": 2.6258193217440864, "grad_norm": 0.22236992418766022, "learning_rate": 2.334748200575665e-06, "loss": 0.2659, "step": 16125 }, { "epoch": 2.6259821683019178, "grad_norm": 0.1586415022611618, "learning_rate": 2.3327497293300053e-06, "loss": 0.2713, "step": 16126 }, { "epoch": 2.6261450148597483, "grad_norm": 0.1741289496421814, "learning_rate": 2.3307520719065447e-06, "loss": 0.2776, "step": 16127 }, { "epoch": 2.626307861417579, "grad_norm": 0.18675485253334045, "learning_rate": 2.328755228377e-06, "loss": 0.2352, "step": 16128 }, { "epoch": 2.62647070797541, "grad_norm": 0.1890517920255661, "learning_rate": 2.3267591988130723e-06, "loss": 0.2717, "step": 16129 }, { "epoch": 2.626633554533241, "grad_norm": 0.15383405983448029, "learning_rate": 2.3247639832864183e-06, "loss": 0.2433, "step": 16130 }, { "epoch": 2.626796401091072, "grad_norm": 0.1934850811958313, "learning_rate": 2.322769581868672e-06, "loss": 0.2791, "step": 16131 }, { "epoch": 2.626959247648903, "grad_norm": 0.16412058472633362, "learning_rate": 2.3207759946314413e-06, "loss": 0.2525, "step": 16132 }, { "epoch": 2.627122094206734, "grad_norm": 0.20623166859149933, "learning_rate": 2.3187832216463007e-06, "loss": 0.2688, "step": 16133 }, { "epoch": 2.6272849407645644, "grad_norm": 0.14854533970355988, "learning_rate": 2.3167912629847993e-06, "loss": 0.2324, "step": 16134 }, { "epoch": 2.6274477873223954, "grad_norm": 0.17803551256656647, "learning_rate": 2.314800118718449e-06, "loss": 0.2654, "step": 16135 }, { "epoch": 2.6276106338802263, "grad_norm": 0.17217209935188293, "learning_rate": 2.3128097889187432e-06, "loss": 0.2401, "step": 16136 }, { "epoch": 2.6277734804380573, "grad_norm": 0.1628810614347458, "learning_rate": 2.310820273657141e-06, "loss": 0.2072, "step": 16137 }, { "epoch": 2.627936326995888, "grad_norm": 0.20582057535648346, "learning_rate": 2.3088315730050685e-06, "loss": 0.2505, "step": 16138 }, { "epoch": 2.628099173553719, "grad_norm": 0.21613124012947083, "learning_rate": 2.306843687033922e-06, "loss": 0.2513, "step": 16139 }, { "epoch": 2.62826202011155, "grad_norm": 0.15619315207004547, "learning_rate": 2.3048566158150846e-06, "loss": 0.2186, "step": 16140 }, { "epoch": 2.6284248666693806, "grad_norm": 0.15092213451862335, "learning_rate": 2.302870359419887e-06, "loss": 0.2548, "step": 16141 }, { "epoch": 2.6285877132272115, "grad_norm": 0.21181927621364594, "learning_rate": 2.300884917919649e-06, "loss": 0.2715, "step": 16142 }, { "epoch": 2.6287505597850425, "grad_norm": 0.16908229887485504, "learning_rate": 2.2989002913856444e-06, "loss": 0.2601, "step": 16143 }, { "epoch": 2.6289134063428734, "grad_norm": 0.1838613599538803, "learning_rate": 2.2969164798891412e-06, "loss": 0.2438, "step": 16144 }, { "epoch": 2.6290762529007043, "grad_norm": 0.21370674669742584, "learning_rate": 2.294933483501355e-06, "loss": 0.2776, "step": 16145 }, { "epoch": 2.6292390994585353, "grad_norm": 0.1534554809331894, "learning_rate": 2.292951302293478e-06, "loss": 0.2661, "step": 16146 }, { "epoch": 2.6294019460163662, "grad_norm": 0.2089403122663498, "learning_rate": 2.290969936336684e-06, "loss": 0.2817, "step": 16147 }, { "epoch": 2.6295647925741967, "grad_norm": 0.18012860417366028, "learning_rate": 2.288989385702109e-06, "loss": 0.2864, "step": 16148 }, { "epoch": 2.629727639132028, "grad_norm": 0.1878402680158615, "learning_rate": 2.2870096504608612e-06, "loss": 0.2568, "step": 16149 }, { "epoch": 2.6298904856898586, "grad_norm": 0.20553408563137054, "learning_rate": 2.2850307306840096e-06, "loss": 0.2483, "step": 16150 }, { "epoch": 2.6300533322476896, "grad_norm": 0.14448527991771698, "learning_rate": 2.2830526264426167e-06, "loss": 0.2567, "step": 16151 }, { "epoch": 2.6302161788055205, "grad_norm": 0.14743126928806305, "learning_rate": 2.281075337807695e-06, "loss": 0.2867, "step": 16152 }, { "epoch": 2.6303790253633514, "grad_norm": 0.1962258368730545, "learning_rate": 2.2790988648502322e-06, "loss": 0.2724, "step": 16153 }, { "epoch": 2.6305418719211824, "grad_norm": 0.15119701623916626, "learning_rate": 2.277123207641199e-06, "loss": 0.2454, "step": 16154 }, { "epoch": 2.630704718479013, "grad_norm": 0.18279553949832916, "learning_rate": 2.2751483662515226e-06, "loss": 0.252, "step": 16155 }, { "epoch": 2.6308675650368443, "grad_norm": 0.226128488779068, "learning_rate": 2.273174340752099e-06, "loss": 0.2811, "step": 16156 }, { "epoch": 2.6310304115946748, "grad_norm": 0.19191986322402954, "learning_rate": 2.2712011312138126e-06, "loss": 0.2424, "step": 16157 }, { "epoch": 2.6311932581525057, "grad_norm": 0.20459991693496704, "learning_rate": 2.2692287377075044e-06, "loss": 0.2789, "step": 16158 }, { "epoch": 2.6313561047103367, "grad_norm": 0.227335125207901, "learning_rate": 2.2672571603039844e-06, "loss": 0.2935, "step": 16159 }, { "epoch": 2.6315189512681676, "grad_norm": 0.14286740124225616, "learning_rate": 2.265286399074043e-06, "loss": 0.2469, "step": 16160 }, { "epoch": 2.6316817978259985, "grad_norm": 0.18153376877307892, "learning_rate": 2.263316454088432e-06, "loss": 0.2828, "step": 16161 }, { "epoch": 2.6318446443838295, "grad_norm": 0.1702825278043747, "learning_rate": 2.261347325417884e-06, "loss": 0.2975, "step": 16162 }, { "epoch": 2.6320074909416604, "grad_norm": 0.19182220101356506, "learning_rate": 2.259379013133095e-06, "loss": 0.2721, "step": 16163 }, { "epoch": 2.632170337499491, "grad_norm": 0.1529897004365921, "learning_rate": 2.2574115173047277e-06, "loss": 0.256, "step": 16164 }, { "epoch": 2.632333184057322, "grad_norm": 0.1698509156703949, "learning_rate": 2.2554448380034316e-06, "loss": 0.2725, "step": 16165 }, { "epoch": 2.632496030615153, "grad_norm": 0.23663587868213654, "learning_rate": 2.253478975299808e-06, "loss": 0.2954, "step": 16166 }, { "epoch": 2.6326588771729837, "grad_norm": 0.19634956121444702, "learning_rate": 2.25151392926444e-06, "loss": 0.2581, "step": 16167 }, { "epoch": 2.6328217237308147, "grad_norm": 0.15342408418655396, "learning_rate": 2.2495496999678734e-06, "loss": 0.2659, "step": 16168 }, { "epoch": 2.6329845702886456, "grad_norm": 0.19945743680000305, "learning_rate": 2.247586287480641e-06, "loss": 0.2797, "step": 16169 }, { "epoch": 2.6331474168464766, "grad_norm": 0.1782228797674179, "learning_rate": 2.2456236918732277e-06, "loss": 0.2576, "step": 16170 }, { "epoch": 2.633310263404307, "grad_norm": 0.1795714646577835, "learning_rate": 2.2436619132160997e-06, "loss": 0.2733, "step": 16171 }, { "epoch": 2.633473109962138, "grad_norm": 0.18171662092208862, "learning_rate": 2.241700951579681e-06, "loss": 0.2954, "step": 16172 }, { "epoch": 2.633635956519969, "grad_norm": 0.15212316811084747, "learning_rate": 2.2397408070343906e-06, "loss": 0.2665, "step": 16173 }, { "epoch": 2.6337988030778, "grad_norm": 0.16025032103061676, "learning_rate": 2.237781479650597e-06, "loss": 0.2264, "step": 16174 }, { "epoch": 2.633961649635631, "grad_norm": 0.15427470207214355, "learning_rate": 2.235822969498638e-06, "loss": 0.2969, "step": 16175 }, { "epoch": 2.634124496193462, "grad_norm": 0.20347969233989716, "learning_rate": 2.2338652766488443e-06, "loss": 0.2821, "step": 16176 }, { "epoch": 2.6342873427512927, "grad_norm": 0.19436383247375488, "learning_rate": 2.231908401171498e-06, "loss": 0.2404, "step": 16177 }, { "epoch": 2.6344501893091232, "grad_norm": 0.14794759452342987, "learning_rate": 2.229952343136854e-06, "loss": 0.2584, "step": 16178 }, { "epoch": 2.6346130358669546, "grad_norm": 0.22015610337257385, "learning_rate": 2.227997102615137e-06, "loss": 0.2771, "step": 16179 }, { "epoch": 2.634775882424785, "grad_norm": 0.17972618341445923, "learning_rate": 2.2260426796765543e-06, "loss": 0.2445, "step": 16180 }, { "epoch": 2.634938728982616, "grad_norm": 0.1509491503238678, "learning_rate": 2.2240890743912725e-06, "loss": 0.2734, "step": 16181 }, { "epoch": 2.635101575540447, "grad_norm": 0.19778670370578766, "learning_rate": 2.222136286829432e-06, "loss": 0.2373, "step": 16182 }, { "epoch": 2.635264422098278, "grad_norm": 0.1426304578781128, "learning_rate": 2.2201843170611385e-06, "loss": 0.3079, "step": 16183 }, { "epoch": 2.635427268656109, "grad_norm": 0.1708969622850418, "learning_rate": 2.218233165156483e-06, "loss": 0.216, "step": 16184 }, { "epoch": 2.63559011521394, "grad_norm": 0.19412358105182648, "learning_rate": 2.216282831185512e-06, "loss": 0.2845, "step": 16185 }, { "epoch": 2.6357529617717708, "grad_norm": 0.2130674123764038, "learning_rate": 2.2143333152182448e-06, "loss": 0.2843, "step": 16186 }, { "epoch": 2.6359158083296013, "grad_norm": 0.18344423174858093, "learning_rate": 2.2123846173246836e-06, "loss": 0.2818, "step": 16187 }, { "epoch": 2.636078654887432, "grad_norm": 0.20605090260505676, "learning_rate": 2.210436737574789e-06, "loss": 0.2815, "step": 16188 }, { "epoch": 2.636241501445263, "grad_norm": 0.22742681205272675, "learning_rate": 2.208489676038494e-06, "loss": 0.2552, "step": 16189 }, { "epoch": 2.636404348003094, "grad_norm": 0.16959556937217712, "learning_rate": 2.2065434327857014e-06, "loss": 0.2942, "step": 16190 }, { "epoch": 2.636567194560925, "grad_norm": 0.15394188463687897, "learning_rate": 2.204598007886294e-06, "loss": 0.2283, "step": 16191 }, { "epoch": 2.636730041118756, "grad_norm": 0.1645243614912033, "learning_rate": 2.202653401410115e-06, "loss": 0.2501, "step": 16192 }, { "epoch": 2.636892887676587, "grad_norm": 0.16855517029762268, "learning_rate": 2.2007096134269766e-06, "loss": 0.2975, "step": 16193 }, { "epoch": 2.6370557342344174, "grad_norm": 0.1729707568883896, "learning_rate": 2.1987666440066777e-06, "loss": 0.2566, "step": 16194 }, { "epoch": 2.6372185807922484, "grad_norm": 0.18019719421863556, "learning_rate": 2.1968244932189687e-06, "loss": 0.2428, "step": 16195 }, { "epoch": 2.6373814273500793, "grad_norm": 0.20500624179840088, "learning_rate": 2.194883161133579e-06, "loss": 0.2604, "step": 16196 }, { "epoch": 2.6375442739079102, "grad_norm": 0.16032806038856506, "learning_rate": 2.192942647820212e-06, "loss": 0.2496, "step": 16197 }, { "epoch": 2.637707120465741, "grad_norm": 0.16145072877407074, "learning_rate": 2.191002953348531e-06, "loss": 0.2629, "step": 16198 }, { "epoch": 2.637869967023572, "grad_norm": 0.26561665534973145, "learning_rate": 2.189064077788186e-06, "loss": 0.265, "step": 16199 }, { "epoch": 2.638032813581403, "grad_norm": 0.19292329251766205, "learning_rate": 2.187126021208785e-06, "loss": 0.2742, "step": 16200 }, { "epoch": 2.6381956601392336, "grad_norm": 0.16927306354045868, "learning_rate": 2.1851887836799035e-06, "loss": 0.2649, "step": 16201 }, { "epoch": 2.638358506697065, "grad_norm": 0.16029323637485504, "learning_rate": 2.1832523652711075e-06, "loss": 0.2523, "step": 16202 }, { "epoch": 2.6385213532548955, "grad_norm": 0.1711784303188324, "learning_rate": 2.181316766051908e-06, "loss": 0.289, "step": 16203 }, { "epoch": 2.6386841998127264, "grad_norm": 0.18676190078258514, "learning_rate": 2.1793819860917997e-06, "loss": 0.2917, "step": 16204 }, { "epoch": 2.6388470463705573, "grad_norm": 0.18359772861003876, "learning_rate": 2.1774480254602575e-06, "loss": 0.2437, "step": 16205 }, { "epoch": 2.6390098929283883, "grad_norm": 0.14168381690979004, "learning_rate": 2.1755148842267066e-06, "loss": 0.2491, "step": 16206 }, { "epoch": 2.639172739486219, "grad_norm": 0.15853837132453918, "learning_rate": 2.1735825624605575e-06, "loss": 0.2742, "step": 16207 }, { "epoch": 2.6393355860440497, "grad_norm": 0.19371412694454193, "learning_rate": 2.171651060231178e-06, "loss": 0.307, "step": 16208 }, { "epoch": 2.639498432601881, "grad_norm": 0.1977149397134781, "learning_rate": 2.1697203776079256e-06, "loss": 0.2951, "step": 16209 }, { "epoch": 2.6396612791597116, "grad_norm": 0.19319863617420197, "learning_rate": 2.1677905146601118e-06, "loss": 0.3273, "step": 16210 }, { "epoch": 2.6398241257175425, "grad_norm": 0.17381195724010468, "learning_rate": 2.1658614714570285e-06, "loss": 0.293, "step": 16211 }, { "epoch": 2.6399869722753735, "grad_norm": 0.1751963496208191, "learning_rate": 2.1639332480679257e-06, "loss": 0.2597, "step": 16212 }, { "epoch": 2.6401498188332044, "grad_norm": 0.1808290034532547, "learning_rate": 2.1620058445620426e-06, "loss": 0.2595, "step": 16213 }, { "epoch": 2.6403126653910354, "grad_norm": 0.15625815093517303, "learning_rate": 2.160079261008574e-06, "loss": 0.286, "step": 16214 }, { "epoch": 2.6404755119488663, "grad_norm": 0.18075700104236603, "learning_rate": 2.1581534974766836e-06, "loss": 0.2319, "step": 16215 }, { "epoch": 2.6406383585066973, "grad_norm": 0.18575263023376465, "learning_rate": 2.156228554035525e-06, "loss": 0.2727, "step": 16216 }, { "epoch": 2.6408012050645278, "grad_norm": 0.19427216053009033, "learning_rate": 2.1543044307542033e-06, "loss": 0.3013, "step": 16217 }, { "epoch": 2.6409640516223587, "grad_norm": 0.17797839641571045, "learning_rate": 2.1523811277017996e-06, "loss": 0.2842, "step": 16218 }, { "epoch": 2.6411268981801896, "grad_norm": 0.20025813579559326, "learning_rate": 2.1504586449473647e-06, "loss": 0.3153, "step": 16219 }, { "epoch": 2.6412897447380206, "grad_norm": 0.19609007239341736, "learning_rate": 2.1485369825599235e-06, "loss": 0.2552, "step": 16220 }, { "epoch": 2.6414525912958515, "grad_norm": 0.16707967221736908, "learning_rate": 2.1466161406084733e-06, "loss": 0.2637, "step": 16221 }, { "epoch": 2.6416154378536825, "grad_norm": 0.1919861137866974, "learning_rate": 2.144696119161971e-06, "loss": 0.2721, "step": 16222 }, { "epoch": 2.6417782844115134, "grad_norm": 0.14308324456214905, "learning_rate": 2.142776918289352e-06, "loss": 0.2385, "step": 16223 }, { "epoch": 2.641941130969344, "grad_norm": 0.15099535882472992, "learning_rate": 2.1408585380595285e-06, "loss": 0.2758, "step": 16224 }, { "epoch": 2.6421039775271753, "grad_norm": 0.16823454201221466, "learning_rate": 2.1389409785413707e-06, "loss": 0.3018, "step": 16225 }, { "epoch": 2.642266824085006, "grad_norm": 0.22351951897144318, "learning_rate": 2.13702423980372e-06, "loss": 0.2923, "step": 16226 }, { "epoch": 2.6424296706428367, "grad_norm": 0.15603426098823547, "learning_rate": 2.135108321915402e-06, "loss": 0.2244, "step": 16227 }, { "epoch": 2.6425925172006677, "grad_norm": 0.1519814431667328, "learning_rate": 2.1331932249452013e-06, "loss": 0.2938, "step": 16228 }, { "epoch": 2.6427553637584986, "grad_norm": 0.18943800032138824, "learning_rate": 2.1312789489618763e-06, "loss": 0.2607, "step": 16229 }, { "epoch": 2.6429182103163296, "grad_norm": 0.1701716184616089, "learning_rate": 2.1293654940341467e-06, "loss": 0.2626, "step": 16230 }, { "epoch": 2.64308105687416, "grad_norm": 0.14814972877502441, "learning_rate": 2.1274528602307246e-06, "loss": 0.2688, "step": 16231 }, { "epoch": 2.6432439034319914, "grad_norm": 0.11621103435754776, "learning_rate": 2.125541047620272e-06, "loss": 0.2555, "step": 16232 }, { "epoch": 2.643406749989822, "grad_norm": 0.16473856568336487, "learning_rate": 2.123630056271425e-06, "loss": 0.2327, "step": 16233 }, { "epoch": 2.643569596547653, "grad_norm": 0.18755093216896057, "learning_rate": 2.1217198862528015e-06, "loss": 0.2629, "step": 16234 }, { "epoch": 2.643732443105484, "grad_norm": 0.19901388883590698, "learning_rate": 2.1198105376329822e-06, "loss": 0.266, "step": 16235 }, { "epoch": 2.6438952896633148, "grad_norm": 0.1681259572505951, "learning_rate": 2.1179020104805074e-06, "loss": 0.2752, "step": 16236 }, { "epoch": 2.6440581362211457, "grad_norm": 0.19126415252685547, "learning_rate": 2.1159943048639137e-06, "loss": 0.259, "step": 16237 }, { "epoch": 2.6442209827789767, "grad_norm": 0.17703384160995483, "learning_rate": 2.114087420851682e-06, "loss": 0.2779, "step": 16238 }, { "epoch": 2.6443838293368076, "grad_norm": 0.19178397953510284, "learning_rate": 2.1121813585122857e-06, "loss": 0.2819, "step": 16239 }, { "epoch": 2.644546675894638, "grad_norm": 0.21391628682613373, "learning_rate": 2.1102761179141506e-06, "loss": 0.2823, "step": 16240 }, { "epoch": 2.644709522452469, "grad_norm": 0.22956401109695435, "learning_rate": 2.1083716991256776e-06, "loss": 0.2772, "step": 16241 }, { "epoch": 2.6448723690103, "grad_norm": 0.15007486939430237, "learning_rate": 2.1064681022152503e-06, "loss": 0.2592, "step": 16242 }, { "epoch": 2.645035215568131, "grad_norm": 0.19400185346603394, "learning_rate": 2.1045653272512124e-06, "loss": 0.2579, "step": 16243 }, { "epoch": 2.645198062125962, "grad_norm": 0.18624581396579742, "learning_rate": 2.102663374301872e-06, "loss": 0.3025, "step": 16244 }, { "epoch": 2.645360908683793, "grad_norm": 0.1944485306739807, "learning_rate": 2.100762243435514e-06, "loss": 0.2534, "step": 16245 }, { "epoch": 2.6455237552416238, "grad_norm": 0.18994832038879395, "learning_rate": 2.098861934720406e-06, "loss": 0.2588, "step": 16246 }, { "epoch": 2.6456866017994543, "grad_norm": 0.19172367453575134, "learning_rate": 2.096962448224768e-06, "loss": 0.2966, "step": 16247 }, { "epoch": 2.645849448357285, "grad_norm": 0.13181695342063904, "learning_rate": 2.0950637840167903e-06, "loss": 0.2494, "step": 16248 }, { "epoch": 2.646012294915116, "grad_norm": 0.1994381844997406, "learning_rate": 2.0931659421646544e-06, "loss": 0.2428, "step": 16249 }, { "epoch": 2.646175141472947, "grad_norm": 0.16169941425323486, "learning_rate": 2.0912689227364917e-06, "loss": 0.2685, "step": 16250 }, { "epoch": 2.646337988030778, "grad_norm": 0.195636585354805, "learning_rate": 2.089372725800412e-06, "loss": 0.2756, "step": 16251 }, { "epoch": 2.646500834588609, "grad_norm": 0.18662433326244354, "learning_rate": 2.0874773514244885e-06, "loss": 0.2822, "step": 16252 }, { "epoch": 2.64666368114644, "grad_norm": 0.17800745368003845, "learning_rate": 2.08558279967678e-06, "loss": 0.2819, "step": 16253 }, { "epoch": 2.6468265277042704, "grad_norm": 0.1706145852804184, "learning_rate": 2.0836890706253026e-06, "loss": 0.2397, "step": 16254 }, { "epoch": 2.646989374262102, "grad_norm": 0.1805364489555359, "learning_rate": 2.08179616433804e-06, "loss": 0.2578, "step": 16255 }, { "epoch": 2.6471522208199323, "grad_norm": 0.2522757947444916, "learning_rate": 2.0799040808829664e-06, "loss": 0.2964, "step": 16256 }, { "epoch": 2.6473150673777632, "grad_norm": 0.15168361365795135, "learning_rate": 2.078012820328007e-06, "loss": 0.2818, "step": 16257 }, { "epoch": 2.647477913935594, "grad_norm": 0.18318335711956024, "learning_rate": 2.0761223827410636e-06, "loss": 0.2792, "step": 16258 }, { "epoch": 2.647640760493425, "grad_norm": 0.1867290884256363, "learning_rate": 2.0742327681900016e-06, "loss": 0.2975, "step": 16259 }, { "epoch": 2.647803607051256, "grad_norm": 0.18172377347946167, "learning_rate": 2.072343976742677e-06, "loss": 0.2462, "step": 16260 }, { "epoch": 2.647966453609087, "grad_norm": 0.22654591500759125, "learning_rate": 2.0704560084668977e-06, "loss": 0.2575, "step": 16261 }, { "epoch": 2.648129300166918, "grad_norm": 0.1869095414876938, "learning_rate": 2.068568863430445e-06, "loss": 0.2095, "step": 16262 }, { "epoch": 2.6482921467247484, "grad_norm": 0.17971032857894897, "learning_rate": 2.0666825417010703e-06, "loss": 0.2422, "step": 16263 }, { "epoch": 2.6484549932825794, "grad_norm": 0.17843161523342133, "learning_rate": 2.064797043346506e-06, "loss": 0.238, "step": 16264 }, { "epoch": 2.6486178398404103, "grad_norm": 0.21336829662322998, "learning_rate": 2.062912368434444e-06, "loss": 0.2609, "step": 16265 }, { "epoch": 2.6487806863982413, "grad_norm": 0.16357924044132233, "learning_rate": 2.0610285170325454e-06, "loss": 0.2842, "step": 16266 }, { "epoch": 2.648943532956072, "grad_norm": 0.16859501600265503, "learning_rate": 2.0591454892084557e-06, "loss": 0.3365, "step": 16267 }, { "epoch": 2.649106379513903, "grad_norm": 0.15639325976371765, "learning_rate": 2.0572632850297756e-06, "loss": 0.2524, "step": 16268 }, { "epoch": 2.649269226071734, "grad_norm": 0.17616364359855652, "learning_rate": 2.0553819045640797e-06, "loss": 0.2834, "step": 16269 }, { "epoch": 2.6494320726295646, "grad_norm": 0.16270911693572998, "learning_rate": 2.053501347878914e-06, "loss": 0.2717, "step": 16270 }, { "epoch": 2.6495949191873955, "grad_norm": 0.2174839973449707, "learning_rate": 2.0516216150418016e-06, "loss": 0.2688, "step": 16271 }, { "epoch": 2.6497577657452265, "grad_norm": 0.17776454985141754, "learning_rate": 2.0497427061202306e-06, "loss": 0.27, "step": 16272 }, { "epoch": 2.6499206123030574, "grad_norm": 0.16824980080127716, "learning_rate": 2.0478646211816528e-06, "loss": 0.2415, "step": 16273 }, { "epoch": 2.6500834588608884, "grad_norm": 0.15666750073432922, "learning_rate": 2.0459873602935053e-06, "loss": 0.2415, "step": 16274 }, { "epoch": 2.6502463054187193, "grad_norm": 0.18657925724983215, "learning_rate": 2.044110923523182e-06, "loss": 0.2553, "step": 16275 }, { "epoch": 2.6504091519765502, "grad_norm": 0.18642573058605194, "learning_rate": 2.0422353109380565e-06, "loss": 0.2386, "step": 16276 }, { "epoch": 2.6505719985343807, "grad_norm": 0.15936040878295898, "learning_rate": 2.040360522605464e-06, "loss": 0.2506, "step": 16277 }, { "epoch": 2.650734845092212, "grad_norm": 0.1747284084558487, "learning_rate": 2.038486558592717e-06, "loss": 0.2805, "step": 16278 }, { "epoch": 2.6508976916500426, "grad_norm": 0.1811968982219696, "learning_rate": 2.036613418967101e-06, "loss": 0.2585, "step": 16279 }, { "epoch": 2.6510605382078736, "grad_norm": 0.1827581375837326, "learning_rate": 2.034741103795862e-06, "loss": 0.2621, "step": 16280 }, { "epoch": 2.6512233847657045, "grad_norm": 0.13200442492961884, "learning_rate": 2.0328696131462185e-06, "loss": 0.2691, "step": 16281 }, { "epoch": 2.6513862313235355, "grad_norm": 0.18764807283878326, "learning_rate": 2.030998947085372e-06, "loss": 0.2892, "step": 16282 }, { "epoch": 2.6515490778813664, "grad_norm": 0.17888621985912323, "learning_rate": 2.0291291056804828e-06, "loss": 0.2754, "step": 16283 }, { "epoch": 2.651711924439197, "grad_norm": 0.1544369012117386, "learning_rate": 2.0272600889986782e-06, "loss": 0.2742, "step": 16284 }, { "epoch": 2.6518747709970283, "grad_norm": 0.18926160037517548, "learning_rate": 2.0253918971070597e-06, "loss": 0.2878, "step": 16285 }, { "epoch": 2.652037617554859, "grad_norm": 0.1887810230255127, "learning_rate": 2.0235245300727123e-06, "loss": 0.3103, "step": 16286 }, { "epoch": 2.6522004641126897, "grad_norm": 0.15529905259609222, "learning_rate": 2.0216579879626748e-06, "loss": 0.2722, "step": 16287 }, { "epoch": 2.6523633106705207, "grad_norm": 0.1640201359987259, "learning_rate": 2.019792270843954e-06, "loss": 0.2522, "step": 16288 }, { "epoch": 2.6525261572283516, "grad_norm": 0.17490221560001373, "learning_rate": 2.0179273787835463e-06, "loss": 0.2876, "step": 16289 }, { "epoch": 2.6526890037861826, "grad_norm": 0.21401451528072357, "learning_rate": 2.0160633118484014e-06, "loss": 0.2788, "step": 16290 }, { "epoch": 2.6528518503440135, "grad_norm": 0.21084199845790863, "learning_rate": 2.014200070105443e-06, "loss": 0.2735, "step": 16291 }, { "epoch": 2.6530146969018444, "grad_norm": 0.24216000735759735, "learning_rate": 2.0123376536215683e-06, "loss": 0.2886, "step": 16292 }, { "epoch": 2.653177543459675, "grad_norm": 0.14560623466968536, "learning_rate": 2.0104760624636483e-06, "loss": 0.253, "step": 16293 }, { "epoch": 2.653340390017506, "grad_norm": 0.19675235450267792, "learning_rate": 2.0086152966985156e-06, "loss": 0.2437, "step": 16294 }, { "epoch": 2.653503236575337, "grad_norm": 0.19214504957199097, "learning_rate": 2.0067553563929782e-06, "loss": 0.2616, "step": 16295 }, { "epoch": 2.6536660831331678, "grad_norm": 0.20404569804668427, "learning_rate": 2.00489624161381e-06, "loss": 0.2965, "step": 16296 }, { "epoch": 2.6538289296909987, "grad_norm": 0.1736006736755371, "learning_rate": 2.0030379524277637e-06, "loss": 0.3037, "step": 16297 }, { "epoch": 2.6539917762488296, "grad_norm": 0.23092882335186005, "learning_rate": 2.0011804889015577e-06, "loss": 0.2994, "step": 16298 }, { "epoch": 2.6541546228066606, "grad_norm": 0.17510022222995758, "learning_rate": 1.9993238511018725e-06, "loss": 0.2743, "step": 16299 }, { "epoch": 2.654317469364491, "grad_norm": 0.18077926337718964, "learning_rate": 1.9974680390953765e-06, "loss": 0.2706, "step": 16300 }, { "epoch": 2.654480315922322, "grad_norm": 0.17609833180904388, "learning_rate": 1.995613052948697e-06, "loss": 0.2546, "step": 16301 }, { "epoch": 2.654643162480153, "grad_norm": 0.27296552062034607, "learning_rate": 1.9937588927284338e-06, "loss": 0.3005, "step": 16302 }, { "epoch": 2.654806009037984, "grad_norm": 0.16305483877658844, "learning_rate": 1.9919055585011474e-06, "loss": 0.288, "step": 16303 }, { "epoch": 2.654968855595815, "grad_norm": 0.20621678233146667, "learning_rate": 1.9900530503333904e-06, "loss": 0.2967, "step": 16304 }, { "epoch": 2.655131702153646, "grad_norm": 0.20434731245040894, "learning_rate": 1.98820136829167e-06, "loss": 0.2695, "step": 16305 }, { "epoch": 2.6552945487114767, "grad_norm": 0.21135377883911133, "learning_rate": 1.9863505124424585e-06, "loss": 0.2985, "step": 16306 }, { "epoch": 2.6554573952693072, "grad_norm": 0.24158084392547607, "learning_rate": 1.9845004828522224e-06, "loss": 0.2594, "step": 16307 }, { "epoch": 2.6556202418271386, "grad_norm": 0.1624230444431305, "learning_rate": 1.982651279587372e-06, "loss": 0.3031, "step": 16308 }, { "epoch": 2.655783088384969, "grad_norm": 0.1695455014705658, "learning_rate": 1.9808029027143045e-06, "loss": 0.2734, "step": 16309 }, { "epoch": 2.6559459349428, "grad_norm": 0.15239198505878448, "learning_rate": 1.9789553522993748e-06, "loss": 0.2746, "step": 16310 }, { "epoch": 2.656108781500631, "grad_norm": 0.1405424028635025, "learning_rate": 1.9771086284089247e-06, "loss": 0.2942, "step": 16311 }, { "epoch": 2.656271628058462, "grad_norm": 0.19330641627311707, "learning_rate": 1.9752627311092507e-06, "loss": 0.261, "step": 16312 }, { "epoch": 2.656434474616293, "grad_norm": 0.1715540587902069, "learning_rate": 1.973417660466631e-06, "loss": 0.2758, "step": 16313 }, { "epoch": 2.656597321174124, "grad_norm": 0.1876111775636673, "learning_rate": 1.9715734165473095e-06, "loss": 0.2994, "step": 16314 }, { "epoch": 2.656760167731955, "grad_norm": 0.17359843850135803, "learning_rate": 1.969729999417491e-06, "loss": 0.2881, "step": 16315 }, { "epoch": 2.6569230142897853, "grad_norm": 0.14335761964321136, "learning_rate": 1.9678874091433707e-06, "loss": 0.2476, "step": 16316 }, { "epoch": 2.657085860847616, "grad_norm": 0.14914511144161224, "learning_rate": 1.9660456457910925e-06, "loss": 0.2662, "step": 16317 }, { "epoch": 2.657248707405447, "grad_norm": 0.18231983482837677, "learning_rate": 1.964204709426795e-06, "loss": 0.2837, "step": 16318 }, { "epoch": 2.657411553963278, "grad_norm": 0.1445889174938202, "learning_rate": 1.962364600116562e-06, "loss": 0.2329, "step": 16319 }, { "epoch": 2.657574400521109, "grad_norm": 0.15159885585308075, "learning_rate": 1.9605253179264647e-06, "loss": 0.2458, "step": 16320 }, { "epoch": 2.65773724707894, "grad_norm": 0.21879079937934875, "learning_rate": 1.958686862922532e-06, "loss": 0.2381, "step": 16321 }, { "epoch": 2.657900093636771, "grad_norm": 0.16512173414230347, "learning_rate": 1.9568492351707797e-06, "loss": 0.2571, "step": 16322 }, { "epoch": 2.6580629401946014, "grad_norm": 0.18657080829143524, "learning_rate": 1.9550124347371773e-06, "loss": 0.2714, "step": 16323 }, { "epoch": 2.6582257867524324, "grad_norm": 0.13996927440166473, "learning_rate": 1.9531764616876724e-06, "loss": 0.2289, "step": 16324 }, { "epoch": 2.6583886333102633, "grad_norm": 0.18974275887012482, "learning_rate": 1.951341316088179e-06, "loss": 0.268, "step": 16325 }, { "epoch": 2.6585514798680943, "grad_norm": 0.17036403715610504, "learning_rate": 1.9495069980045938e-06, "loss": 0.2799, "step": 16326 }, { "epoch": 2.658714326425925, "grad_norm": 0.15385739505290985, "learning_rate": 1.9476735075027676e-06, "loss": 0.2866, "step": 16327 }, { "epoch": 2.658877172983756, "grad_norm": 0.18714579939842224, "learning_rate": 1.945840844648525e-06, "loss": 0.2785, "step": 16328 }, { "epoch": 2.659040019541587, "grad_norm": 0.15763157606124878, "learning_rate": 1.9440090095076716e-06, "loss": 0.2766, "step": 16329 }, { "epoch": 2.6592028660994176, "grad_norm": 0.1652274876832962, "learning_rate": 1.9421780021459724e-06, "loss": 0.2675, "step": 16330 }, { "epoch": 2.659365712657249, "grad_norm": 0.2072853446006775, "learning_rate": 1.9403478226291684e-06, "loss": 0.2674, "step": 16331 }, { "epoch": 2.6595285592150795, "grad_norm": 0.14203456044197083, "learning_rate": 1.9385184710229596e-06, "loss": 0.2532, "step": 16332 }, { "epoch": 2.6596914057729104, "grad_norm": 0.1953849345445633, "learning_rate": 1.936689947393036e-06, "loss": 0.2726, "step": 16333 }, { "epoch": 2.6598542523307414, "grad_norm": 0.2038402259349823, "learning_rate": 1.934862251805042e-06, "loss": 0.2808, "step": 16334 }, { "epoch": 2.6600170988885723, "grad_norm": 0.16952790319919586, "learning_rate": 1.9330353843245995e-06, "loss": 0.2776, "step": 16335 }, { "epoch": 2.6601799454464032, "grad_norm": 0.1213829517364502, "learning_rate": 1.931209345017293e-06, "loss": 0.2446, "step": 16336 }, { "epoch": 2.6603427920042337, "grad_norm": 0.17206226289272308, "learning_rate": 1.9293841339486918e-06, "loss": 0.291, "step": 16337 }, { "epoch": 2.660505638562065, "grad_norm": 0.17279621958732605, "learning_rate": 1.9275597511843184e-06, "loss": 0.2755, "step": 16338 }, { "epoch": 2.6606684851198956, "grad_norm": 0.17053383588790894, "learning_rate": 1.9257361967896766e-06, "loss": 0.2728, "step": 16339 }, { "epoch": 2.6608313316777266, "grad_norm": 0.1740466058254242, "learning_rate": 1.923913470830241e-06, "loss": 0.3144, "step": 16340 }, { "epoch": 2.6609941782355575, "grad_norm": 0.147789865732193, "learning_rate": 1.9220915733714485e-06, "loss": 0.2627, "step": 16341 }, { "epoch": 2.6611570247933884, "grad_norm": 0.20477239787578583, "learning_rate": 1.9202705044787106e-06, "loss": 0.2641, "step": 16342 }, { "epoch": 2.6613198713512194, "grad_norm": 0.19472859799861908, "learning_rate": 1.9184502642174083e-06, "loss": 0.2724, "step": 16343 }, { "epoch": 2.6614827179090503, "grad_norm": 0.2000766396522522, "learning_rate": 1.916630852652898e-06, "loss": 0.2615, "step": 16344 }, { "epoch": 2.6616455644668813, "grad_norm": 0.18388886749744415, "learning_rate": 1.914812269850502e-06, "loss": 0.3187, "step": 16345 }, { "epoch": 2.6618084110247118, "grad_norm": 0.1965174823999405, "learning_rate": 1.9129945158755105e-06, "loss": 0.2997, "step": 16346 }, { "epoch": 2.6619712575825427, "grad_norm": 0.1922743171453476, "learning_rate": 1.911177590793181e-06, "loss": 0.231, "step": 16347 }, { "epoch": 2.6621341041403737, "grad_norm": 0.21461591124534607, "learning_rate": 1.9093614946687543e-06, "loss": 0.2964, "step": 16348 }, { "epoch": 2.6622969506982046, "grad_norm": 0.19457164406776428, "learning_rate": 1.9075462275674304e-06, "loss": 0.239, "step": 16349 }, { "epoch": 2.6624597972560355, "grad_norm": 0.18284101784229279, "learning_rate": 1.9057317895543852e-06, "loss": 0.2469, "step": 16350 }, { "epoch": 2.6626226438138665, "grad_norm": 0.17762133479118347, "learning_rate": 1.9039181806947637e-06, "loss": 0.3113, "step": 16351 }, { "epoch": 2.6627854903716974, "grad_norm": 0.1726323962211609, "learning_rate": 1.902105401053672e-06, "loss": 0.2445, "step": 16352 }, { "epoch": 2.662948336929528, "grad_norm": 0.16676941514015198, "learning_rate": 1.9002934506962028e-06, "loss": 0.2968, "step": 16353 }, { "epoch": 2.6631111834873593, "grad_norm": 0.15643753111362457, "learning_rate": 1.8984823296874095e-06, "loss": 0.2364, "step": 16354 }, { "epoch": 2.66327403004519, "grad_norm": 0.2162485122680664, "learning_rate": 1.8966720380923065e-06, "loss": 0.2629, "step": 16355 }, { "epoch": 2.6634368766030208, "grad_norm": 0.1786918342113495, "learning_rate": 1.894862575975906e-06, "loss": 0.2726, "step": 16356 }, { "epoch": 2.6635997231608517, "grad_norm": 0.1616268754005432, "learning_rate": 1.8930539434031557e-06, "loss": 0.2688, "step": 16357 }, { "epoch": 2.6637625697186826, "grad_norm": 0.15160071849822998, "learning_rate": 1.8912461404390041e-06, "loss": 0.2549, "step": 16358 }, { "epoch": 2.6639254162765136, "grad_norm": 0.19645635783672333, "learning_rate": 1.8894391671483518e-06, "loss": 0.2515, "step": 16359 }, { "epoch": 2.664088262834344, "grad_norm": 0.16207826137542725, "learning_rate": 1.8876330235960748e-06, "loss": 0.2958, "step": 16360 }, { "epoch": 2.6642511093921755, "grad_norm": 0.177451029419899, "learning_rate": 1.8858277098470156e-06, "loss": 0.2483, "step": 16361 }, { "epoch": 2.664413955950006, "grad_norm": 0.169669508934021, "learning_rate": 1.8840232259659974e-06, "loss": 0.2665, "step": 16362 }, { "epoch": 2.664576802507837, "grad_norm": 0.17214958369731903, "learning_rate": 1.8822195720178015e-06, "loss": 0.2525, "step": 16363 }, { "epoch": 2.664739649065668, "grad_norm": 0.1820257157087326, "learning_rate": 1.8804167480671848e-06, "loss": 0.2512, "step": 16364 }, { "epoch": 2.664902495623499, "grad_norm": 0.17354434728622437, "learning_rate": 1.8786147541788729e-06, "loss": 0.2476, "step": 16365 }, { "epoch": 2.6650653421813297, "grad_norm": 0.20544393360614777, "learning_rate": 1.8768135904175698e-06, "loss": 0.3299, "step": 16366 }, { "epoch": 2.6652281887391607, "grad_norm": 0.19450876116752625, "learning_rate": 1.875013256847935e-06, "loss": 0.2933, "step": 16367 }, { "epoch": 2.6653910352969916, "grad_norm": 0.1947191059589386, "learning_rate": 1.8732137535346056e-06, "loss": 0.295, "step": 16368 }, { "epoch": 2.665553881854822, "grad_norm": 0.17466461658477783, "learning_rate": 1.8714150805421964e-06, "loss": 0.2508, "step": 16369 }, { "epoch": 2.665716728412653, "grad_norm": 0.16181078553199768, "learning_rate": 1.869617237935281e-06, "loss": 0.2485, "step": 16370 }, { "epoch": 2.665879574970484, "grad_norm": 0.16185234487056732, "learning_rate": 1.8678202257784077e-06, "loss": 0.2787, "step": 16371 }, { "epoch": 2.666042421528315, "grad_norm": 0.22311754524707794, "learning_rate": 1.8660240441360887e-06, "loss": 0.2562, "step": 16372 }, { "epoch": 2.666205268086146, "grad_norm": 0.17576134204864502, "learning_rate": 1.8642286930728225e-06, "loss": 0.2978, "step": 16373 }, { "epoch": 2.666368114643977, "grad_norm": 0.17212119698524475, "learning_rate": 1.862434172653063e-06, "loss": 0.2572, "step": 16374 }, { "epoch": 2.6665309612018078, "grad_norm": 0.15663929283618927, "learning_rate": 1.8606404829412394e-06, "loss": 0.2474, "step": 16375 }, { "epoch": 2.6666938077596383, "grad_norm": 0.23332136869430542, "learning_rate": 1.8588476240017443e-06, "loss": 0.2733, "step": 16376 }, { "epoch": 2.666856654317469, "grad_norm": 0.15908077359199524, "learning_rate": 1.857055595898957e-06, "loss": 0.2603, "step": 16377 }, { "epoch": 2.6670195008753, "grad_norm": 0.18117032945156097, "learning_rate": 1.8552643986972124e-06, "loss": 0.2349, "step": 16378 }, { "epoch": 2.667182347433131, "grad_norm": 0.16293488442897797, "learning_rate": 1.853474032460814e-06, "loss": 0.2547, "step": 16379 }, { "epoch": 2.667345193990962, "grad_norm": 0.13296177983283997, "learning_rate": 1.8516844972540525e-06, "loss": 0.2278, "step": 16380 }, { "epoch": 2.667508040548793, "grad_norm": 0.19696517288684845, "learning_rate": 1.849895793141171e-06, "loss": 0.2501, "step": 16381 }, { "epoch": 2.667670887106624, "grad_norm": 0.18647149205207825, "learning_rate": 1.8481079201863899e-06, "loss": 0.2677, "step": 16382 }, { "epoch": 2.6678337336644544, "grad_norm": 0.2497899979352951, "learning_rate": 1.8463208784538944e-06, "loss": 0.2637, "step": 16383 }, { "epoch": 2.667996580222286, "grad_norm": 0.18925105035305023, "learning_rate": 1.8445346680078552e-06, "loss": 0.2847, "step": 16384 }, { "epoch": 2.6681594267801163, "grad_norm": 0.18392135202884674, "learning_rate": 1.842749288912396e-06, "loss": 0.2787, "step": 16385 }, { "epoch": 2.6683222733379472, "grad_norm": 0.1628040075302124, "learning_rate": 1.8409647412316183e-06, "loss": 0.2599, "step": 16386 }, { "epoch": 2.668485119895778, "grad_norm": 0.20647594332695007, "learning_rate": 1.8391810250295905e-06, "loss": 0.2436, "step": 16387 }, { "epoch": 2.668647966453609, "grad_norm": 0.18587453663349152, "learning_rate": 1.8373981403703584e-06, "loss": 0.2346, "step": 16388 }, { "epoch": 2.66881081301144, "grad_norm": 0.18502174317836761, "learning_rate": 1.8356160873179235e-06, "loss": 0.2384, "step": 16389 }, { "epoch": 2.668973659569271, "grad_norm": 0.15171630680561066, "learning_rate": 1.833834865936282e-06, "loss": 0.2691, "step": 16390 }, { "epoch": 2.669136506127102, "grad_norm": 0.1737573742866516, "learning_rate": 1.8320544762893742e-06, "loss": 0.2598, "step": 16391 }, { "epoch": 2.6692993526849325, "grad_norm": 0.20032620429992676, "learning_rate": 1.8302749184411188e-06, "loss": 0.2395, "step": 16392 }, { "epoch": 2.6694621992427634, "grad_norm": 0.16606935858726501, "learning_rate": 1.8284961924554173e-06, "loss": 0.2971, "step": 16393 }, { "epoch": 2.6696250458005943, "grad_norm": 0.15389764308929443, "learning_rate": 1.8267182983961212e-06, "loss": 0.2808, "step": 16394 }, { "epoch": 2.6697878923584253, "grad_norm": 0.1807326376438141, "learning_rate": 1.8249412363270714e-06, "loss": 0.2636, "step": 16395 }, { "epoch": 2.6699507389162562, "grad_norm": 0.15595778822898865, "learning_rate": 1.8231650063120665e-06, "loss": 0.2646, "step": 16396 }, { "epoch": 2.670113585474087, "grad_norm": 0.29315292835235596, "learning_rate": 1.8213896084148778e-06, "loss": 0.3116, "step": 16397 }, { "epoch": 2.670276432031918, "grad_norm": 0.15916284918785095, "learning_rate": 1.8196150426992437e-06, "loss": 0.2582, "step": 16398 }, { "epoch": 2.6704392785897486, "grad_norm": 0.16138802468776703, "learning_rate": 1.817841309228882e-06, "loss": 0.2855, "step": 16399 }, { "epoch": 2.6706021251475796, "grad_norm": 0.15807455778121948, "learning_rate": 1.8160684080674728e-06, "loss": 0.2321, "step": 16400 }, { "epoch": 2.6707649717054105, "grad_norm": 0.2042308747768402, "learning_rate": 1.8142963392786648e-06, "loss": 0.2892, "step": 16401 }, { "epoch": 2.6709278182632414, "grad_norm": 0.17495682835578918, "learning_rate": 1.812525102926091e-06, "loss": 0.2309, "step": 16402 }, { "epoch": 2.6710906648210724, "grad_norm": 0.13957276940345764, "learning_rate": 1.8107546990733331e-06, "loss": 0.2845, "step": 16403 }, { "epoch": 2.6712535113789033, "grad_norm": 0.1361522078514099, "learning_rate": 1.8089851277839632e-06, "loss": 0.2527, "step": 16404 }, { "epoch": 2.6714163579367343, "grad_norm": 0.18540705740451813, "learning_rate": 1.807216389121502e-06, "loss": 0.279, "step": 16405 }, { "epoch": 2.6715792044945648, "grad_norm": 0.18135079741477966, "learning_rate": 1.8054484831494633e-06, "loss": 0.2632, "step": 16406 }, { "epoch": 2.671742051052396, "grad_norm": 0.17422275245189667, "learning_rate": 1.803681409931321e-06, "loss": 0.252, "step": 16407 }, { "epoch": 2.6719048976102266, "grad_norm": 0.18458452820777893, "learning_rate": 1.8019151695305047e-06, "loss": 0.2595, "step": 16408 }, { "epoch": 2.6720677441680576, "grad_norm": 0.18843133747577667, "learning_rate": 1.8001497620104446e-06, "loss": 0.296, "step": 16409 }, { "epoch": 2.6722305907258885, "grad_norm": 0.18124240636825562, "learning_rate": 1.7983851874345175e-06, "loss": 0.2461, "step": 16410 }, { "epoch": 2.6723934372837195, "grad_norm": 0.17118941247463226, "learning_rate": 1.796621445866073e-06, "loss": 0.2473, "step": 16411 }, { "epoch": 2.6725562838415504, "grad_norm": 0.21331796050071716, "learning_rate": 1.7948585373684352e-06, "loss": 0.277, "step": 16412 }, { "epoch": 2.672719130399381, "grad_norm": 0.20645196735858917, "learning_rate": 1.7930964620049034e-06, "loss": 0.2599, "step": 16413 }, { "epoch": 2.6728819769572123, "grad_norm": 0.2053312212228775, "learning_rate": 1.7913352198387412e-06, "loss": 0.2482, "step": 16414 }, { "epoch": 2.673044823515043, "grad_norm": 0.15588217973709106, "learning_rate": 1.7895748109331784e-06, "loss": 0.2496, "step": 16415 }, { "epoch": 2.6732076700728737, "grad_norm": 0.16428634524345398, "learning_rate": 1.7878152353514144e-06, "loss": 0.276, "step": 16416 }, { "epoch": 2.6733705166307047, "grad_norm": 0.16844750940799713, "learning_rate": 1.7860564931566348e-06, "loss": 0.2922, "step": 16417 }, { "epoch": 2.6735333631885356, "grad_norm": 0.14839528501033783, "learning_rate": 1.7842985844119752e-06, "loss": 0.2667, "step": 16418 }, { "epoch": 2.6736962097463666, "grad_norm": 0.21714933216571808, "learning_rate": 1.782541509180552e-06, "loss": 0.2643, "step": 16419 }, { "epoch": 2.6738590563041975, "grad_norm": 0.14168338477611542, "learning_rate": 1.7807852675254533e-06, "loss": 0.2455, "step": 16420 }, { "epoch": 2.6740219028620285, "grad_norm": 0.19104816019535065, "learning_rate": 1.7790298595097288e-06, "loss": 0.2447, "step": 16421 }, { "epoch": 2.674184749419859, "grad_norm": 0.1717926561832428, "learning_rate": 1.7772752851964032e-06, "loss": 0.2688, "step": 16422 }, { "epoch": 2.67434759597769, "grad_norm": 0.17280279099941254, "learning_rate": 1.7755215446484702e-06, "loss": 0.2359, "step": 16423 }, { "epoch": 2.674510442535521, "grad_norm": 0.20282764732837677, "learning_rate": 1.7737686379288992e-06, "loss": 0.2578, "step": 16424 }, { "epoch": 2.674673289093352, "grad_norm": 0.15718506276607513, "learning_rate": 1.7720165651006233e-06, "loss": 0.264, "step": 16425 }, { "epoch": 2.6748361356511827, "grad_norm": 0.15041546523571014, "learning_rate": 1.770265326226539e-06, "loss": 0.2801, "step": 16426 }, { "epoch": 2.6749989822090137, "grad_norm": 0.17095771431922913, "learning_rate": 1.7685149213695352e-06, "loss": 0.2684, "step": 16427 }, { "epoch": 2.6751618287668446, "grad_norm": 0.14777658879756927, "learning_rate": 1.7667653505924448e-06, "loss": 0.2581, "step": 16428 }, { "epoch": 2.675324675324675, "grad_norm": 0.21142618358135223, "learning_rate": 1.765016613958087e-06, "loss": 0.2576, "step": 16429 }, { "epoch": 2.675487521882506, "grad_norm": 0.15033908188343048, "learning_rate": 1.7632687115292478e-06, "loss": 0.2351, "step": 16430 }, { "epoch": 2.675650368440337, "grad_norm": 0.21265754103660583, "learning_rate": 1.761521643368677e-06, "loss": 0.2855, "step": 16431 }, { "epoch": 2.675813214998168, "grad_norm": 0.15857042372226715, "learning_rate": 1.7597754095391078e-06, "loss": 0.2742, "step": 16432 }, { "epoch": 2.675976061555999, "grad_norm": 0.19759522378444672, "learning_rate": 1.7580300101032315e-06, "loss": 0.2446, "step": 16433 }, { "epoch": 2.67613890811383, "grad_norm": 0.20920240879058838, "learning_rate": 1.7562854451237092e-06, "loss": 0.2441, "step": 16434 }, { "epoch": 2.6763017546716608, "grad_norm": 0.1268995851278305, "learning_rate": 1.7545417146631826e-06, "loss": 0.2356, "step": 16435 }, { "epoch": 2.6764646012294913, "grad_norm": 0.18465568125247955, "learning_rate": 1.7527988187842542e-06, "loss": 0.3015, "step": 16436 }, { "epoch": 2.6766274477873226, "grad_norm": 0.2078535258769989, "learning_rate": 1.751056757549499e-06, "loss": 0.2682, "step": 16437 }, { "epoch": 2.676790294345153, "grad_norm": 0.1593671292066574, "learning_rate": 1.7493155310214588e-06, "loss": 0.263, "step": 16438 }, { "epoch": 2.676953140902984, "grad_norm": 0.19088774919509888, "learning_rate": 1.7475751392626528e-06, "loss": 0.2869, "step": 16439 }, { "epoch": 2.677115987460815, "grad_norm": 0.1712539941072464, "learning_rate": 1.7458355823355671e-06, "loss": 0.2751, "step": 16440 }, { "epoch": 2.677278834018646, "grad_norm": 0.16685925424098969, "learning_rate": 1.7440968603026519e-06, "loss": 0.2574, "step": 16441 }, { "epoch": 2.677441680576477, "grad_norm": 0.18077512085437775, "learning_rate": 1.742358973226338e-06, "loss": 0.2721, "step": 16442 }, { "epoch": 2.677604527134308, "grad_norm": 0.16074243187904358, "learning_rate": 1.7406219211690222e-06, "loss": 0.2947, "step": 16443 }, { "epoch": 2.677767373692139, "grad_norm": 0.16921748220920563, "learning_rate": 1.7388857041930634e-06, "loss": 0.2832, "step": 16444 }, { "epoch": 2.6779302202499693, "grad_norm": 0.1827036440372467, "learning_rate": 1.7371503223607976e-06, "loss": 0.2398, "step": 16445 }, { "epoch": 2.6780930668078002, "grad_norm": 0.21513472497463226, "learning_rate": 1.7354157757345363e-06, "loss": 0.2678, "step": 16446 }, { "epoch": 2.678255913365631, "grad_norm": 0.16506391763687134, "learning_rate": 1.7336820643765489e-06, "loss": 0.2624, "step": 16447 }, { "epoch": 2.678418759923462, "grad_norm": 0.13904523849487305, "learning_rate": 1.7319491883490858e-06, "loss": 0.2793, "step": 16448 }, { "epoch": 2.678581606481293, "grad_norm": 0.16997602581977844, "learning_rate": 1.7302171477143552e-06, "loss": 0.2574, "step": 16449 }, { "epoch": 2.678744453039124, "grad_norm": 0.18349821865558624, "learning_rate": 1.7284859425345495e-06, "loss": 0.3453, "step": 16450 }, { "epoch": 2.678907299596955, "grad_norm": 0.19405190646648407, "learning_rate": 1.7267555728718242e-06, "loss": 0.2643, "step": 16451 }, { "epoch": 2.6790701461547854, "grad_norm": 0.1669773906469345, "learning_rate": 1.7250260387882965e-06, "loss": 0.2431, "step": 16452 }, { "epoch": 2.6792329927126164, "grad_norm": 0.21642518043518066, "learning_rate": 1.7232973403460722e-06, "loss": 0.2755, "step": 16453 }, { "epoch": 2.6793958392704473, "grad_norm": 0.17381951212882996, "learning_rate": 1.7215694776072128e-06, "loss": 0.2666, "step": 16454 }, { "epoch": 2.6795586858282783, "grad_norm": 0.1824360489845276, "learning_rate": 1.7198424506337546e-06, "loss": 0.2672, "step": 16455 }, { "epoch": 2.679721532386109, "grad_norm": 0.18199078738689423, "learning_rate": 1.7181162594876954e-06, "loss": 0.2226, "step": 16456 }, { "epoch": 2.67988437894394, "grad_norm": 0.2182982861995697, "learning_rate": 1.7163909042310217e-06, "loss": 0.3084, "step": 16457 }, { "epoch": 2.680047225501771, "grad_norm": 0.1209711804986, "learning_rate": 1.7146663849256729e-06, "loss": 0.2901, "step": 16458 }, { "epoch": 2.6802100720596016, "grad_norm": 0.18999075889587402, "learning_rate": 1.712942701633563e-06, "loss": 0.2568, "step": 16459 }, { "epoch": 2.680372918617433, "grad_norm": 0.16135428845882416, "learning_rate": 1.7112198544165848e-06, "loss": 0.2365, "step": 16460 }, { "epoch": 2.6805357651752635, "grad_norm": 0.2110091596841812, "learning_rate": 1.7094978433365855e-06, "loss": 0.2688, "step": 16461 }, { "epoch": 2.6806986117330944, "grad_norm": 0.18494273722171783, "learning_rate": 1.7077766684553965e-06, "loss": 0.2506, "step": 16462 }, { "epoch": 2.6808614582909254, "grad_norm": 0.23136195540428162, "learning_rate": 1.706056329834807e-06, "loss": 0.2912, "step": 16463 }, { "epoch": 2.6810243048487563, "grad_norm": 0.19398446381092072, "learning_rate": 1.7043368275365873e-06, "loss": 0.2497, "step": 16464 }, { "epoch": 2.6811871514065873, "grad_norm": 0.23024709522724152, "learning_rate": 1.702618161622474e-06, "loss": 0.313, "step": 16465 }, { "epoch": 2.6813499979644178, "grad_norm": 0.1730271279811859, "learning_rate": 1.7009003321541621e-06, "loss": 0.281, "step": 16466 }, { "epoch": 2.681512844522249, "grad_norm": 0.17260652780532837, "learning_rate": 1.6991833391933414e-06, "loss": 0.2842, "step": 16467 }, { "epoch": 2.6816756910800796, "grad_norm": 0.18493229150772095, "learning_rate": 1.697467182801643e-06, "loss": 0.2941, "step": 16468 }, { "epoch": 2.6818385376379106, "grad_norm": 0.15543700754642487, "learning_rate": 1.6957518630406954e-06, "loss": 0.2708, "step": 16469 }, { "epoch": 2.6820013841957415, "grad_norm": 0.1736605316400528, "learning_rate": 1.6940373799720772e-06, "loss": 0.2697, "step": 16470 }, { "epoch": 2.6821642307535725, "grad_norm": 0.2217109352350235, "learning_rate": 1.692323733657339e-06, "loss": 0.2377, "step": 16471 }, { "epoch": 2.6823270773114034, "grad_norm": 0.15745383501052856, "learning_rate": 1.6906109241580148e-06, "loss": 0.2692, "step": 16472 }, { "epoch": 2.6824899238692343, "grad_norm": 0.20877736806869507, "learning_rate": 1.6888989515355945e-06, "loss": 0.3024, "step": 16473 }, { "epoch": 2.6826527704270653, "grad_norm": 0.18820469081401825, "learning_rate": 1.6871878158515403e-06, "loss": 0.289, "step": 16474 }, { "epoch": 2.682815616984896, "grad_norm": 0.15089204907417297, "learning_rate": 1.6854775171672944e-06, "loss": 0.2714, "step": 16475 }, { "epoch": 2.6829784635427267, "grad_norm": 0.1621631532907486, "learning_rate": 1.6837680555442576e-06, "loss": 0.2502, "step": 16476 }, { "epoch": 2.6831413101005577, "grad_norm": 0.14608199894428253, "learning_rate": 1.6820594310438059e-06, "loss": 0.2749, "step": 16477 }, { "epoch": 2.6833041566583886, "grad_norm": 0.18833249807357788, "learning_rate": 1.6803516437272765e-06, "loss": 0.2392, "step": 16478 }, { "epoch": 2.6834670032162196, "grad_norm": 0.21919561922550201, "learning_rate": 1.678644693655998e-06, "loss": 0.2961, "step": 16479 }, { "epoch": 2.6836298497740505, "grad_norm": 0.1705714613199234, "learning_rate": 1.6769385808912436e-06, "loss": 0.2308, "step": 16480 }, { "epoch": 2.6837926963318814, "grad_norm": 0.21545705199241638, "learning_rate": 1.6752333054942698e-06, "loss": 0.2758, "step": 16481 }, { "epoch": 2.683955542889712, "grad_norm": 0.16707555949687958, "learning_rate": 1.6735288675263083e-06, "loss": 0.2357, "step": 16482 }, { "epoch": 2.6841183894475433, "grad_norm": 0.1855689287185669, "learning_rate": 1.671825267048549e-06, "loss": 0.2739, "step": 16483 }, { "epoch": 2.684281236005374, "grad_norm": 0.173119455575943, "learning_rate": 1.670122504122154e-06, "loss": 0.2524, "step": 16484 }, { "epoch": 2.6844440825632048, "grad_norm": 0.1709887832403183, "learning_rate": 1.6684205788082552e-06, "loss": 0.2488, "step": 16485 }, { "epoch": 2.6846069291210357, "grad_norm": 0.19189532101154327, "learning_rate": 1.6667194911679645e-06, "loss": 0.2688, "step": 16486 }, { "epoch": 2.6847697756788667, "grad_norm": 0.19221307337284088, "learning_rate": 1.6650192412623555e-06, "loss": 0.2525, "step": 16487 }, { "epoch": 2.6849326222366976, "grad_norm": 0.21061651408672333, "learning_rate": 1.6633198291524654e-06, "loss": 0.2745, "step": 16488 }, { "epoch": 2.685095468794528, "grad_norm": 0.1769949197769165, "learning_rate": 1.6616212548993094e-06, "loss": 0.2736, "step": 16489 }, { "epoch": 2.6852583153523595, "grad_norm": 0.17961615324020386, "learning_rate": 1.6599235185638802e-06, "loss": 0.274, "step": 16490 }, { "epoch": 2.68542116191019, "grad_norm": 0.1626172810792923, "learning_rate": 1.6582266202071233e-06, "loss": 0.2689, "step": 16491 }, { "epoch": 2.685584008468021, "grad_norm": 0.1411558985710144, "learning_rate": 1.6565305598899599e-06, "loss": 0.2492, "step": 16492 }, { "epoch": 2.685746855025852, "grad_norm": 0.19125862419605255, "learning_rate": 1.654835337673294e-06, "loss": 0.2212, "step": 16493 }, { "epoch": 2.685909701583683, "grad_norm": 0.16854488849639893, "learning_rate": 1.653140953617982e-06, "loss": 0.2618, "step": 16494 }, { "epoch": 2.6860725481415137, "grad_norm": 0.19389936327934265, "learning_rate": 1.6514474077848591e-06, "loss": 0.2616, "step": 16495 }, { "epoch": 2.6862353946993447, "grad_norm": 0.15835589170455933, "learning_rate": 1.6497547002347235e-06, "loss": 0.2912, "step": 16496 }, { "epoch": 2.6863982412571756, "grad_norm": 0.2054927498102188, "learning_rate": 1.6480628310283603e-06, "loss": 0.3075, "step": 16497 }, { "epoch": 2.686561087815006, "grad_norm": 0.2493295967578888, "learning_rate": 1.6463718002265038e-06, "loss": 0.2888, "step": 16498 }, { "epoch": 2.686723934372837, "grad_norm": 0.17360219359397888, "learning_rate": 1.644681607889867e-06, "loss": 0.28, "step": 16499 }, { "epoch": 2.686886780930668, "grad_norm": 0.18252068758010864, "learning_rate": 1.6429922540791343e-06, "loss": 0.2551, "step": 16500 }, { "epoch": 2.687049627488499, "grad_norm": 0.15846924483776093, "learning_rate": 1.641303738854963e-06, "loss": 0.2444, "step": 16501 }, { "epoch": 2.68721247404633, "grad_norm": 0.15294909477233887, "learning_rate": 1.639616062277971e-06, "loss": 0.2308, "step": 16502 }, { "epoch": 2.687375320604161, "grad_norm": 0.17200852930545807, "learning_rate": 1.6379292244087463e-06, "loss": 0.2497, "step": 16503 }, { "epoch": 2.687538167161992, "grad_norm": 0.1991649568080902, "learning_rate": 1.6362432253078625e-06, "loss": 0.2923, "step": 16504 }, { "epoch": 2.6877010137198223, "grad_norm": 0.1483941376209259, "learning_rate": 1.634558065035846e-06, "loss": 0.2393, "step": 16505 }, { "epoch": 2.6878638602776532, "grad_norm": 0.1887020468711853, "learning_rate": 1.632873743653196e-06, "loss": 0.2837, "step": 16506 }, { "epoch": 2.688026706835484, "grad_norm": 0.14366039633750916, "learning_rate": 1.6311902612203945e-06, "loss": 0.2683, "step": 16507 }, { "epoch": 2.688189553393315, "grad_norm": 0.15994296967983246, "learning_rate": 1.6295076177978708e-06, "loss": 0.247, "step": 16508 }, { "epoch": 2.688352399951146, "grad_norm": 0.1679593175649643, "learning_rate": 1.6278258134460489e-06, "loss": 0.2705, "step": 16509 }, { "epoch": 2.688515246508977, "grad_norm": 0.1607663929462433, "learning_rate": 1.6261448482253055e-06, "loss": 0.2232, "step": 16510 }, { "epoch": 2.688678093066808, "grad_norm": 0.16482336819171906, "learning_rate": 1.624464722195987e-06, "loss": 0.2494, "step": 16511 }, { "epoch": 2.6888409396246384, "grad_norm": 0.1959947943687439, "learning_rate": 1.6227854354184224e-06, "loss": 0.2398, "step": 16512 }, { "epoch": 2.68900378618247, "grad_norm": 0.19804182648658752, "learning_rate": 1.6211069879529028e-06, "loss": 0.2693, "step": 16513 }, { "epoch": 2.6891666327403003, "grad_norm": 0.17457610368728638, "learning_rate": 1.6194293798596826e-06, "loss": 0.2704, "step": 16514 }, { "epoch": 2.6893294792981313, "grad_norm": 0.20340262353420258, "learning_rate": 1.6177526111990027e-06, "loss": 0.2561, "step": 16515 }, { "epoch": 2.689492325855962, "grad_norm": 0.18090063333511353, "learning_rate": 1.6160766820310592e-06, "loss": 0.2826, "step": 16516 }, { "epoch": 2.689655172413793, "grad_norm": 0.13865987956523895, "learning_rate": 1.6144015924160205e-06, "loss": 0.2374, "step": 16517 }, { "epoch": 2.689818018971624, "grad_norm": 0.16636502742767334, "learning_rate": 1.6127273424140277e-06, "loss": 0.2604, "step": 16518 }, { "epoch": 2.689980865529455, "grad_norm": 0.18550817668437958, "learning_rate": 1.6110539320851991e-06, "loss": 0.2573, "step": 16519 }, { "epoch": 2.690143712087286, "grad_norm": 0.18704786896705627, "learning_rate": 1.6093813614896065e-06, "loss": 0.2562, "step": 16520 }, { "epoch": 2.6903065586451165, "grad_norm": 0.15514519810676575, "learning_rate": 1.6077096306873012e-06, "loss": 0.2493, "step": 16521 }, { "epoch": 2.6904694052029474, "grad_norm": 0.13577105104923248, "learning_rate": 1.6060387397383082e-06, "loss": 0.2605, "step": 16522 }, { "epoch": 2.6906322517607784, "grad_norm": 0.19645830988883972, "learning_rate": 1.6043686887026149e-06, "loss": 0.265, "step": 16523 }, { "epoch": 2.6907950983186093, "grad_norm": 0.15045878291130066, "learning_rate": 1.6026994776401793e-06, "loss": 0.2926, "step": 16524 }, { "epoch": 2.6909579448764402, "grad_norm": 0.16356267035007477, "learning_rate": 1.6010311066109312e-06, "loss": 0.2708, "step": 16525 }, { "epoch": 2.691120791434271, "grad_norm": 0.1695115864276886, "learning_rate": 1.5993635756747727e-06, "loss": 0.3094, "step": 16526 }, { "epoch": 2.691283637992102, "grad_norm": 0.15760192275047302, "learning_rate": 1.5976968848915751e-06, "loss": 0.2675, "step": 16527 }, { "epoch": 2.6914464845499326, "grad_norm": 0.1555716097354889, "learning_rate": 1.5960310343211716e-06, "loss": 0.2971, "step": 16528 }, { "epoch": 2.6916093311077636, "grad_norm": 0.18484902381896973, "learning_rate": 1.5943660240233721e-06, "loss": 0.2465, "step": 16529 }, { "epoch": 2.6917721776655945, "grad_norm": 0.13631977140903473, "learning_rate": 1.5927018540579624e-06, "loss": 0.2408, "step": 16530 }, { "epoch": 2.6919350242234255, "grad_norm": 0.19055317342281342, "learning_rate": 1.5910385244846838e-06, "loss": 0.2533, "step": 16531 }, { "epoch": 2.6920978707812564, "grad_norm": 0.2222004383802414, "learning_rate": 1.5893760353632547e-06, "loss": 0.3012, "step": 16532 }, { "epoch": 2.6922607173390873, "grad_norm": 0.20498570799827576, "learning_rate": 1.5877143867533695e-06, "loss": 0.2846, "step": 16533 }, { "epoch": 2.6924235638969183, "grad_norm": 0.14316460490226746, "learning_rate": 1.5860535787146829e-06, "loss": 0.2466, "step": 16534 }, { "epoch": 2.692586410454749, "grad_norm": 0.2022794932126999, "learning_rate": 1.5843936113068225e-06, "loss": 0.2944, "step": 16535 }, { "epoch": 2.69274925701258, "grad_norm": 0.15893399715423584, "learning_rate": 1.5827344845893848e-06, "loss": 0.2585, "step": 16536 }, { "epoch": 2.6929121035704107, "grad_norm": 0.1460282951593399, "learning_rate": 1.5810761986219418e-06, "loss": 0.274, "step": 16537 }, { "epoch": 2.6930749501282416, "grad_norm": 0.17775163054466248, "learning_rate": 1.5794187534640265e-06, "loss": 0.2724, "step": 16538 }, { "epoch": 2.6932377966860725, "grad_norm": 0.1922309249639511, "learning_rate": 1.5777621491751492e-06, "loss": 0.295, "step": 16539 }, { "epoch": 2.6934006432439035, "grad_norm": 0.17632074654102325, "learning_rate": 1.5761063858147824e-06, "loss": 0.2309, "step": 16540 }, { "epoch": 2.6935634898017344, "grad_norm": 0.13535653054714203, "learning_rate": 1.5744514634423807e-06, "loss": 0.2807, "step": 16541 }, { "epoch": 2.693726336359565, "grad_norm": 0.17589205503463745, "learning_rate": 1.5727973821173524e-06, "loss": 0.2943, "step": 16542 }, { "epoch": 2.6938891829173963, "grad_norm": 0.18770140409469604, "learning_rate": 1.571144141899089e-06, "loss": 0.3358, "step": 16543 }, { "epoch": 2.694052029475227, "grad_norm": 0.17805224657058716, "learning_rate": 1.5694917428469458e-06, "loss": 0.2576, "step": 16544 }, { "epoch": 2.6942148760330578, "grad_norm": 0.18690314888954163, "learning_rate": 1.5678401850202473e-06, "loss": 0.2705, "step": 16545 }, { "epoch": 2.6943777225908887, "grad_norm": 0.1649184674024582, "learning_rate": 1.5661894684782935e-06, "loss": 0.2984, "step": 16546 }, { "epoch": 2.6945405691487196, "grad_norm": 0.20275317132472992, "learning_rate": 1.5645395932803452e-06, "loss": 0.2618, "step": 16547 }, { "epoch": 2.6947034157065506, "grad_norm": 0.19813504815101624, "learning_rate": 1.5628905594856386e-06, "loss": 0.2802, "step": 16548 }, { "epoch": 2.6948662622643815, "grad_norm": 0.25993314385414124, "learning_rate": 1.5612423671533844e-06, "loss": 0.2873, "step": 16549 }, { "epoch": 2.6950291088222125, "grad_norm": 0.1555178016424179, "learning_rate": 1.5595950163427519e-06, "loss": 0.2833, "step": 16550 }, { "epoch": 2.695191955380043, "grad_norm": 0.2333715558052063, "learning_rate": 1.5579485071128858e-06, "loss": 0.3115, "step": 16551 }, { "epoch": 2.695354801937874, "grad_norm": 0.25829172134399414, "learning_rate": 1.556302839522905e-06, "loss": 0.2663, "step": 16552 }, { "epoch": 2.695517648495705, "grad_norm": 0.1764134019613266, "learning_rate": 1.5546580136318933e-06, "loss": 0.2737, "step": 16553 }, { "epoch": 2.695680495053536, "grad_norm": 0.1781623363494873, "learning_rate": 1.5530140294988977e-06, "loss": 0.3008, "step": 16554 }, { "epoch": 2.6958433416113667, "grad_norm": 0.20278145372867584, "learning_rate": 1.5513708871829513e-06, "loss": 0.2692, "step": 16555 }, { "epoch": 2.6960061881691977, "grad_norm": 0.18805554509162903, "learning_rate": 1.5497285867430432e-06, "loss": 0.2929, "step": 16556 }, { "epoch": 2.6961690347270286, "grad_norm": 0.17119841277599335, "learning_rate": 1.5480871282381403e-06, "loss": 0.2905, "step": 16557 }, { "epoch": 2.696331881284859, "grad_norm": 0.22175046801567078, "learning_rate": 1.5464465117271676e-06, "loss": 0.2692, "step": 16558 }, { "epoch": 2.69649472784269, "grad_norm": 0.14995920658111572, "learning_rate": 1.5448067372690388e-06, "loss": 0.2869, "step": 16559 }, { "epoch": 2.696657574400521, "grad_norm": 0.19280602037906647, "learning_rate": 1.543167804922621e-06, "loss": 0.2672, "step": 16560 }, { "epoch": 2.696820420958352, "grad_norm": 0.18157097697257996, "learning_rate": 1.5415297147467533e-06, "loss": 0.2503, "step": 16561 }, { "epoch": 2.696983267516183, "grad_norm": 0.21060946583747864, "learning_rate": 1.5398924668002552e-06, "loss": 0.2605, "step": 16562 }, { "epoch": 2.697146114074014, "grad_norm": 0.21466992795467377, "learning_rate": 1.5382560611419072e-06, "loss": 0.3014, "step": 16563 }, { "epoch": 2.6973089606318448, "grad_norm": 0.20937921106815338, "learning_rate": 1.5366204978304599e-06, "loss": 0.2834, "step": 16564 }, { "epoch": 2.6974718071896753, "grad_norm": 0.17303669452667236, "learning_rate": 1.5349857769246328e-06, "loss": 0.2292, "step": 16565 }, { "epoch": 2.6976346537475067, "grad_norm": 0.1927996277809143, "learning_rate": 1.5333518984831207e-06, "loss": 0.2804, "step": 16566 }, { "epoch": 2.697797500305337, "grad_norm": 0.25244924426078796, "learning_rate": 1.531718862564585e-06, "loss": 0.2638, "step": 16567 }, { "epoch": 2.697960346863168, "grad_norm": 0.18208782374858856, "learning_rate": 1.5300866692276539e-06, "loss": 0.253, "step": 16568 }, { "epoch": 2.698123193420999, "grad_norm": 0.23166094720363617, "learning_rate": 1.5284553185309247e-06, "loss": 0.2898, "step": 16569 }, { "epoch": 2.69828603997883, "grad_norm": 0.15323790907859802, "learning_rate": 1.5268248105329757e-06, "loss": 0.2478, "step": 16570 }, { "epoch": 2.698448886536661, "grad_norm": 0.1736622303724289, "learning_rate": 1.5251951452923463e-06, "loss": 0.2677, "step": 16571 }, { "epoch": 2.698611733094492, "grad_norm": 0.18528777360916138, "learning_rate": 1.5235663228675368e-06, "loss": 0.2627, "step": 16572 }, { "epoch": 2.698774579652323, "grad_norm": 0.19750310480594635, "learning_rate": 1.5219383433170392e-06, "loss": 0.2312, "step": 16573 }, { "epoch": 2.6989374262101533, "grad_norm": 0.19003409147262573, "learning_rate": 1.5203112066992959e-06, "loss": 0.2779, "step": 16574 }, { "epoch": 2.6991002727679843, "grad_norm": 0.20541886985301971, "learning_rate": 1.5186849130727294e-06, "loss": 0.267, "step": 16575 }, { "epoch": 2.699263119325815, "grad_norm": 0.16387738287448883, "learning_rate": 1.5170594624957207e-06, "loss": 0.2124, "step": 16576 }, { "epoch": 2.699425965883646, "grad_norm": 0.20246684551239014, "learning_rate": 1.5154348550266373e-06, "loss": 0.2738, "step": 16577 }, { "epoch": 2.699588812441477, "grad_norm": 0.19734346866607666, "learning_rate": 1.5138110907238072e-06, "loss": 0.2602, "step": 16578 }, { "epoch": 2.699751658999308, "grad_norm": 0.15739275515079498, "learning_rate": 1.512188169645523e-06, "loss": 0.254, "step": 16579 }, { "epoch": 2.699914505557139, "grad_norm": 0.14704743027687073, "learning_rate": 1.5105660918500519e-06, "loss": 0.2616, "step": 16580 }, { "epoch": 2.7000773521149695, "grad_norm": 0.19061243534088135, "learning_rate": 1.5089448573956388e-06, "loss": 0.2869, "step": 16581 }, { "epoch": 2.7002401986728004, "grad_norm": 0.15866532921791077, "learning_rate": 1.5073244663404818e-06, "loss": 0.2713, "step": 16582 }, { "epoch": 2.7004030452306313, "grad_norm": 0.22022387385368347, "learning_rate": 1.5057049187427646e-06, "loss": 0.2862, "step": 16583 }, { "epoch": 2.7005658917884623, "grad_norm": 0.19027674198150635, "learning_rate": 1.5040862146606326e-06, "loss": 0.219, "step": 16584 }, { "epoch": 2.7007287383462932, "grad_norm": 0.18111957609653473, "learning_rate": 1.5024683541521978e-06, "loss": 0.2638, "step": 16585 }, { "epoch": 2.700891584904124, "grad_norm": 0.16572371125221252, "learning_rate": 1.5008513372755551e-06, "loss": 0.2477, "step": 16586 }, { "epoch": 2.701054431461955, "grad_norm": 0.1646125614643097, "learning_rate": 1.4992351640887469e-06, "loss": 0.243, "step": 16587 }, { "epoch": 2.7012172780197856, "grad_norm": 0.19264784455299377, "learning_rate": 1.4976198346498132e-06, "loss": 0.3058, "step": 16588 }, { "epoch": 2.701380124577617, "grad_norm": 0.1718471795320511, "learning_rate": 1.4960053490167437e-06, "loss": 0.2497, "step": 16589 }, { "epoch": 2.7015429711354475, "grad_norm": 0.1961871087551117, "learning_rate": 1.4943917072475e-06, "loss": 0.2831, "step": 16590 }, { "epoch": 2.7017058176932784, "grad_norm": 0.16300468146800995, "learning_rate": 1.4927789094000138e-06, "loss": 0.2721, "step": 16591 }, { "epoch": 2.7018686642511094, "grad_norm": 0.19837592542171478, "learning_rate": 1.4911669555322e-06, "loss": 0.2812, "step": 16592 }, { "epoch": 2.7020315108089403, "grad_norm": 0.14973516762256622, "learning_rate": 1.489555845701926e-06, "loss": 0.2489, "step": 16593 }, { "epoch": 2.7021943573667713, "grad_norm": 0.22422310709953308, "learning_rate": 1.4879455799670317e-06, "loss": 0.2474, "step": 16594 }, { "epoch": 2.7023572039246018, "grad_norm": 0.23921240866184235, "learning_rate": 1.4863361583853402e-06, "loss": 0.2745, "step": 16595 }, { "epoch": 2.702520050482433, "grad_norm": 0.182396799325943, "learning_rate": 1.4847275810146305e-06, "loss": 0.2799, "step": 16596 }, { "epoch": 2.7026828970402637, "grad_norm": 0.1581973284482956, "learning_rate": 1.4831198479126506e-06, "loss": 0.238, "step": 16597 }, { "epoch": 2.7028457435980946, "grad_norm": 0.16203907132148743, "learning_rate": 1.4815129591371268e-06, "loss": 0.3024, "step": 16598 }, { "epoch": 2.7030085901559255, "grad_norm": 0.2111230343580246, "learning_rate": 1.4799069147457516e-06, "loss": 0.276, "step": 16599 }, { "epoch": 2.7031714367137565, "grad_norm": 0.1601678729057312, "learning_rate": 1.4783017147961874e-06, "loss": 0.2543, "step": 16600 }, { "epoch": 2.7033342832715874, "grad_norm": 0.12550100684165955, "learning_rate": 1.476697359346066e-06, "loss": 0.2881, "step": 16601 }, { "epoch": 2.7034971298294184, "grad_norm": 0.22847333550453186, "learning_rate": 1.4750938484529824e-06, "loss": 0.3015, "step": 16602 }, { "epoch": 2.7036599763872493, "grad_norm": 0.2297677993774414, "learning_rate": 1.4734911821745163e-06, "loss": 0.2682, "step": 16603 }, { "epoch": 2.70382282294508, "grad_norm": 0.14167547225952148, "learning_rate": 1.4718893605682015e-06, "loss": 0.2471, "step": 16604 }, { "epoch": 2.7039856695029107, "grad_norm": 0.1728304773569107, "learning_rate": 1.470288383691551e-06, "loss": 0.2802, "step": 16605 }, { "epoch": 2.7041485160607417, "grad_norm": 0.15386627614498138, "learning_rate": 1.468688251602046e-06, "loss": 0.2432, "step": 16606 }, { "epoch": 2.7043113626185726, "grad_norm": 0.16065166890621185, "learning_rate": 1.467088964357133e-06, "loss": 0.3146, "step": 16607 }, { "epoch": 2.7044742091764036, "grad_norm": 0.19281642138957977, "learning_rate": 1.4654905220142345e-06, "loss": 0.2745, "step": 16608 }, { "epoch": 2.7046370557342345, "grad_norm": 0.1645796298980713, "learning_rate": 1.4638929246307332e-06, "loss": 0.3435, "step": 16609 }, { "epoch": 2.7047999022920655, "grad_norm": 0.18197211623191833, "learning_rate": 1.4622961722639967e-06, "loss": 0.2896, "step": 16610 }, { "epoch": 2.704962748849896, "grad_norm": 0.22572538256645203, "learning_rate": 1.4607002649713485e-06, "loss": 0.2479, "step": 16611 }, { "epoch": 2.7051255954077273, "grad_norm": 0.16535750031471252, "learning_rate": 1.4591052028100794e-06, "loss": 0.3066, "step": 16612 }, { "epoch": 2.705288441965558, "grad_norm": 0.2043856382369995, "learning_rate": 1.4575109858374707e-06, "loss": 0.251, "step": 16613 }, { "epoch": 2.705451288523389, "grad_norm": 0.19777534902095795, "learning_rate": 1.4559176141107522e-06, "loss": 0.2786, "step": 16614 }, { "epoch": 2.7056141350812197, "grad_norm": 0.1669735312461853, "learning_rate": 1.4543250876871333e-06, "loss": 0.2332, "step": 16615 }, { "epoch": 2.7057769816390507, "grad_norm": 0.21252669394016266, "learning_rate": 1.4527334066237825e-06, "loss": 0.3366, "step": 16616 }, { "epoch": 2.7059398281968816, "grad_norm": 0.1739211231470108, "learning_rate": 1.4511425709778536e-06, "loss": 0.2501, "step": 16617 }, { "epoch": 2.706102674754712, "grad_norm": 0.1628400981426239, "learning_rate": 1.4495525808064654e-06, "loss": 0.2747, "step": 16618 }, { "epoch": 2.7062655213125435, "grad_norm": 0.1906941533088684, "learning_rate": 1.447963436166691e-06, "loss": 0.2839, "step": 16619 }, { "epoch": 2.706428367870374, "grad_norm": 0.15332169830799103, "learning_rate": 1.4463751371155992e-06, "loss": 0.2797, "step": 16620 }, { "epoch": 2.706591214428205, "grad_norm": 0.2307489663362503, "learning_rate": 1.4447876837102054e-06, "loss": 0.3117, "step": 16621 }, { "epoch": 2.706754060986036, "grad_norm": 0.17176201939582825, "learning_rate": 1.4432010760075054e-06, "loss": 0.2649, "step": 16622 }, { "epoch": 2.706916907543867, "grad_norm": 0.21594899892807007, "learning_rate": 1.4416153140644706e-06, "loss": 0.2842, "step": 16623 }, { "epoch": 2.7070797541016978, "grad_norm": 0.16367579996585846, "learning_rate": 1.4400303979380248e-06, "loss": 0.2651, "step": 16624 }, { "epoch": 2.7072426006595287, "grad_norm": 0.17735017836093903, "learning_rate": 1.438446327685075e-06, "loss": 0.2785, "step": 16625 }, { "epoch": 2.7074054472173597, "grad_norm": 0.16498105227947235, "learning_rate": 1.4368631033624958e-06, "loss": 0.239, "step": 16626 }, { "epoch": 2.70756829377519, "grad_norm": 0.24165655672550201, "learning_rate": 1.4352807250271244e-06, "loss": 0.2569, "step": 16627 }, { "epoch": 2.707731140333021, "grad_norm": 0.16082873940467834, "learning_rate": 1.4336991927357823e-06, "loss": 0.2454, "step": 16628 }, { "epoch": 2.707893986890852, "grad_norm": 0.18569768965244293, "learning_rate": 1.4321185065452464e-06, "loss": 0.2356, "step": 16629 }, { "epoch": 2.708056833448683, "grad_norm": 0.17242836952209473, "learning_rate": 1.4305386665122656e-06, "loss": 0.2663, "step": 16630 }, { "epoch": 2.708219680006514, "grad_norm": 0.1827026903629303, "learning_rate": 1.4289596726935584e-06, "loss": 0.2493, "step": 16631 }, { "epoch": 2.708382526564345, "grad_norm": 0.19847621023654938, "learning_rate": 1.4273815251458238e-06, "loss": 0.2389, "step": 16632 }, { "epoch": 2.708545373122176, "grad_norm": 0.18728868663311005, "learning_rate": 1.425804223925717e-06, "loss": 0.2839, "step": 16633 }, { "epoch": 2.7087082196800063, "grad_norm": 0.18987694382667542, "learning_rate": 1.424227769089867e-06, "loss": 0.2996, "step": 16634 }, { "epoch": 2.7088710662378372, "grad_norm": 0.16502326726913452, "learning_rate": 1.422652160694879e-06, "loss": 0.2786, "step": 16635 }, { "epoch": 2.709033912795668, "grad_norm": 0.14428232610225677, "learning_rate": 1.421077398797316e-06, "loss": 0.2589, "step": 16636 }, { "epoch": 2.709196759353499, "grad_norm": 0.2121686190366745, "learning_rate": 1.4195034834537184e-06, "loss": 0.2851, "step": 16637 }, { "epoch": 2.70935960591133, "grad_norm": 0.15717720985412598, "learning_rate": 1.4179304147205947e-06, "loss": 0.3003, "step": 16638 }, { "epoch": 2.709522452469161, "grad_norm": 0.14890311658382416, "learning_rate": 1.4163581926544239e-06, "loss": 0.3012, "step": 16639 }, { "epoch": 2.709685299026992, "grad_norm": 0.15072643756866455, "learning_rate": 1.414786817311653e-06, "loss": 0.2504, "step": 16640 }, { "epoch": 2.7098481455848225, "grad_norm": 0.17465908825397491, "learning_rate": 1.4132162887486978e-06, "loss": 0.2785, "step": 16641 }, { "epoch": 2.710010992142654, "grad_norm": 0.16503670811653137, "learning_rate": 1.4116466070219408e-06, "loss": 0.2453, "step": 16642 }, { "epoch": 2.7101738387004843, "grad_norm": 0.15206998586654663, "learning_rate": 1.4100777721877483e-06, "loss": 0.2793, "step": 16643 }, { "epoch": 2.7103366852583153, "grad_norm": 0.1649392694234848, "learning_rate": 1.4085097843024419e-06, "loss": 0.2404, "step": 16644 }, { "epoch": 2.7104995318161462, "grad_norm": 0.1704077571630478, "learning_rate": 1.4069426434223127e-06, "loss": 0.2854, "step": 16645 }, { "epoch": 2.710662378373977, "grad_norm": 0.19677488505840302, "learning_rate": 1.405376349603632e-06, "loss": 0.2861, "step": 16646 }, { "epoch": 2.710825224931808, "grad_norm": 0.23875492811203003, "learning_rate": 1.403810902902633e-06, "loss": 0.2453, "step": 16647 }, { "epoch": 2.710988071489639, "grad_norm": 0.21330857276916504, "learning_rate": 1.402246303375518e-06, "loss": 0.2667, "step": 16648 }, { "epoch": 2.71115091804747, "grad_norm": 0.1642547845840454, "learning_rate": 1.4006825510784555e-06, "loss": 0.2362, "step": 16649 }, { "epoch": 2.7113137646053005, "grad_norm": 0.1764487624168396, "learning_rate": 1.3991196460676008e-06, "loss": 0.2502, "step": 16650 }, { "epoch": 2.7114766111631314, "grad_norm": 0.19861002266407013, "learning_rate": 1.397557588399062e-06, "loss": 0.2714, "step": 16651 }, { "epoch": 2.7116394577209624, "grad_norm": 0.168007954955101, "learning_rate": 1.3959963781289138e-06, "loss": 0.2599, "step": 16652 }, { "epoch": 2.7118023042787933, "grad_norm": 0.18443803489208221, "learning_rate": 1.3944360153132218e-06, "loss": 0.2773, "step": 16653 }, { "epoch": 2.7119651508366243, "grad_norm": 0.17111270129680634, "learning_rate": 1.3928765000080001e-06, "loss": 0.2837, "step": 16654 }, { "epoch": 2.712127997394455, "grad_norm": 0.18360505998134613, "learning_rate": 1.39131783226924e-06, "loss": 0.2711, "step": 16655 }, { "epoch": 2.712290843952286, "grad_norm": 0.18887709081172943, "learning_rate": 1.3897600121528992e-06, "loss": 0.2456, "step": 16656 }, { "epoch": 2.7124536905101166, "grad_norm": 0.1670873761177063, "learning_rate": 1.3882030397149166e-06, "loss": 0.2694, "step": 16657 }, { "epoch": 2.7126165370679476, "grad_norm": 0.17280082404613495, "learning_rate": 1.386646915011189e-06, "loss": 0.2667, "step": 16658 }, { "epoch": 2.7127793836257785, "grad_norm": 0.15727810561656952, "learning_rate": 1.3850916380975797e-06, "loss": 0.2852, "step": 16659 }, { "epoch": 2.7129422301836095, "grad_norm": 0.18898069858551025, "learning_rate": 1.3835372090299364e-06, "loss": 0.2814, "step": 16660 }, { "epoch": 2.7131050767414404, "grad_norm": 0.17417463660240173, "learning_rate": 1.381983627864064e-06, "loss": 0.2518, "step": 16661 }, { "epoch": 2.7132679232992714, "grad_norm": 0.20093664526939392, "learning_rate": 1.3804308946557375e-06, "loss": 0.2936, "step": 16662 }, { "epoch": 2.7134307698571023, "grad_norm": 0.12964807450771332, "learning_rate": 1.3788790094607095e-06, "loss": 0.2622, "step": 16663 }, { "epoch": 2.713593616414933, "grad_norm": 0.1819463074207306, "learning_rate": 1.3773279723346938e-06, "loss": 0.3006, "step": 16664 }, { "epoch": 2.713756462972764, "grad_norm": 0.14621639251708984, "learning_rate": 1.3757777833333846e-06, "loss": 0.2471, "step": 16665 }, { "epoch": 2.7139193095305947, "grad_norm": 0.15400008857250214, "learning_rate": 1.3742284425124318e-06, "loss": 0.2945, "step": 16666 }, { "epoch": 2.7140821560884256, "grad_norm": 0.2134730964899063, "learning_rate": 1.3726799499274579e-06, "loss": 0.2763, "step": 16667 }, { "epoch": 2.7142450026462566, "grad_norm": 0.16171491146087646, "learning_rate": 1.371132305634068e-06, "loss": 0.2449, "step": 16668 }, { "epoch": 2.7144078492040875, "grad_norm": 0.16477859020233154, "learning_rate": 1.3695855096878236e-06, "loss": 0.271, "step": 16669 }, { "epoch": 2.7145706957619185, "grad_norm": 0.21705196797847748, "learning_rate": 1.3680395621442548e-06, "loss": 0.2681, "step": 16670 }, { "epoch": 2.714733542319749, "grad_norm": 0.19377321004867554, "learning_rate": 1.3664944630588678e-06, "loss": 0.259, "step": 16671 }, { "epoch": 2.7148963888775803, "grad_norm": 0.1593606024980545, "learning_rate": 1.36495021248714e-06, "loss": 0.2805, "step": 16672 }, { "epoch": 2.715059235435411, "grad_norm": 0.15059122443199158, "learning_rate": 1.3634068104845133e-06, "loss": 0.2679, "step": 16673 }, { "epoch": 2.7152220819932418, "grad_norm": 0.16712990403175354, "learning_rate": 1.3618642571063934e-06, "loss": 0.2869, "step": 16674 }, { "epoch": 2.7153849285510727, "grad_norm": 0.1671086847782135, "learning_rate": 1.360322552408172e-06, "loss": 0.2629, "step": 16675 }, { "epoch": 2.7155477751089037, "grad_norm": 0.16873694956302643, "learning_rate": 1.3587816964451994e-06, "loss": 0.2368, "step": 16676 }, { "epoch": 2.7157106216667346, "grad_norm": 0.22827011346817017, "learning_rate": 1.3572416892727953e-06, "loss": 0.3172, "step": 16677 }, { "epoch": 2.7158734682245655, "grad_norm": 0.1415119767189026, "learning_rate": 1.3557025309462456e-06, "loss": 0.2827, "step": 16678 }, { "epoch": 2.7160363147823965, "grad_norm": 0.20177866518497467, "learning_rate": 1.3541642215208177e-06, "loss": 0.245, "step": 16679 }, { "epoch": 2.716199161340227, "grad_norm": 0.22402134537696838, "learning_rate": 1.3526267610517395e-06, "loss": 0.2979, "step": 16680 }, { "epoch": 2.716362007898058, "grad_norm": 0.17115531861782074, "learning_rate": 1.3510901495942114e-06, "loss": 0.2418, "step": 16681 }, { "epoch": 2.716524854455889, "grad_norm": 0.17724822461605072, "learning_rate": 1.3495543872033944e-06, "loss": 0.3089, "step": 16682 }, { "epoch": 2.71668770101372, "grad_norm": 0.17246203124523163, "learning_rate": 1.3480194739344392e-06, "loss": 0.288, "step": 16683 }, { "epoch": 2.7168505475715508, "grad_norm": 0.24746499955654144, "learning_rate": 1.346485409842449e-06, "loss": 0.2573, "step": 16684 }, { "epoch": 2.7170133941293817, "grad_norm": 0.15565899014472961, "learning_rate": 1.3449521949824962e-06, "loss": 0.2743, "step": 16685 }, { "epoch": 2.7171762406872126, "grad_norm": 0.16416674852371216, "learning_rate": 1.343419829409634e-06, "loss": 0.2476, "step": 16686 }, { "epoch": 2.717339087245043, "grad_norm": 0.1937076300382614, "learning_rate": 1.3418883131788795e-06, "loss": 0.2551, "step": 16687 }, { "epoch": 2.717501933802874, "grad_norm": 0.2070130705833435, "learning_rate": 1.3403576463452138e-06, "loss": 0.2681, "step": 16688 }, { "epoch": 2.717664780360705, "grad_norm": 0.18716038763523102, "learning_rate": 1.338827828963593e-06, "loss": 0.287, "step": 16689 }, { "epoch": 2.717827626918536, "grad_norm": 0.22862562537193298, "learning_rate": 1.3372988610889453e-06, "loss": 0.2825, "step": 16690 }, { "epoch": 2.717990473476367, "grad_norm": 0.15728722512722015, "learning_rate": 1.3357707427761657e-06, "loss": 0.2618, "step": 16691 }, { "epoch": 2.718153320034198, "grad_norm": 0.2022000253200531, "learning_rate": 1.3342434740801158e-06, "loss": 0.2913, "step": 16692 }, { "epoch": 2.718316166592029, "grad_norm": 0.2046690285205841, "learning_rate": 1.3327170550556267e-06, "loss": 0.2503, "step": 16693 }, { "epoch": 2.7184790131498593, "grad_norm": 0.2129102200269699, "learning_rate": 1.3311914857575075e-06, "loss": 0.308, "step": 16694 }, { "epoch": 2.7186418597076907, "grad_norm": 0.18278197944164276, "learning_rate": 1.3296667662405255e-06, "loss": 0.246, "step": 16695 }, { "epoch": 2.718804706265521, "grad_norm": 0.19709409773349762, "learning_rate": 1.3281428965594257e-06, "loss": 0.2854, "step": 16696 }, { "epoch": 2.718967552823352, "grad_norm": 0.2140934318304062, "learning_rate": 1.326619876768917e-06, "loss": 0.258, "step": 16697 }, { "epoch": 2.719130399381183, "grad_norm": 0.16791072487831116, "learning_rate": 1.3250977069236864e-06, "loss": 0.2677, "step": 16698 }, { "epoch": 2.719293245939014, "grad_norm": 0.1639719307422638, "learning_rate": 1.3235763870783735e-06, "loss": 0.2751, "step": 16699 }, { "epoch": 2.719456092496845, "grad_norm": 0.18273231387138367, "learning_rate": 1.3220559172876095e-06, "loss": 0.2623, "step": 16700 }, { "epoch": 2.719618939054676, "grad_norm": 0.18575498461723328, "learning_rate": 1.320536297605976e-06, "loss": 0.2481, "step": 16701 }, { "epoch": 2.719781785612507, "grad_norm": 0.14037436246871948, "learning_rate": 1.3190175280880402e-06, "loss": 0.2674, "step": 16702 }, { "epoch": 2.7199446321703373, "grad_norm": 0.2077021300792694, "learning_rate": 1.3174996087883223e-06, "loss": 0.2629, "step": 16703 }, { "epoch": 2.7201074787281683, "grad_norm": 0.16154485940933228, "learning_rate": 1.3159825397613207e-06, "loss": 0.3111, "step": 16704 }, { "epoch": 2.720270325285999, "grad_norm": 0.16006909310817719, "learning_rate": 1.3144663210615083e-06, "loss": 0.2592, "step": 16705 }, { "epoch": 2.72043317184383, "grad_norm": 0.16480396687984467, "learning_rate": 1.3129509527433192e-06, "loss": 0.3047, "step": 16706 }, { "epoch": 2.720596018401661, "grad_norm": 0.21968883275985718, "learning_rate": 1.3114364348611546e-06, "loss": 0.2839, "step": 16707 }, { "epoch": 2.720758864959492, "grad_norm": 0.1780027151107788, "learning_rate": 1.3099227674693986e-06, "loss": 0.2282, "step": 16708 }, { "epoch": 2.720921711517323, "grad_norm": 0.1605425924062729, "learning_rate": 1.308409950622394e-06, "loss": 0.2588, "step": 16709 }, { "epoch": 2.7210845580751535, "grad_norm": 0.16619127988815308, "learning_rate": 1.3068979843744528e-06, "loss": 0.2502, "step": 16710 }, { "epoch": 2.7212474046329844, "grad_norm": 0.16015362739562988, "learning_rate": 1.3053868687798592e-06, "loss": 0.2505, "step": 16711 }, { "epoch": 2.7214102511908154, "grad_norm": 0.1840205043554306, "learning_rate": 1.3038766038928674e-06, "loss": 0.2469, "step": 16712 }, { "epoch": 2.7215730977486463, "grad_norm": 0.17962689697742462, "learning_rate": 1.3023671897677031e-06, "loss": 0.275, "step": 16713 }, { "epoch": 2.7217359443064773, "grad_norm": 0.17967402935028076, "learning_rate": 1.3008586264585537e-06, "loss": 0.2539, "step": 16714 }, { "epoch": 2.721898790864308, "grad_norm": 0.18495239317417145, "learning_rate": 1.299350914019587e-06, "loss": 0.2382, "step": 16715 }, { "epoch": 2.722061637422139, "grad_norm": 0.18585623800754547, "learning_rate": 1.2978440525049318e-06, "loss": 0.2674, "step": 16716 }, { "epoch": 2.7222244839799696, "grad_norm": 0.15094123780727386, "learning_rate": 1.2963380419686893e-06, "loss": 0.272, "step": 16717 }, { "epoch": 2.722387330537801, "grad_norm": 0.19372820854187012, "learning_rate": 1.2948328824649247e-06, "loss": 0.2481, "step": 16718 }, { "epoch": 2.7225501770956315, "grad_norm": 0.1848393976688385, "learning_rate": 1.2933285740476863e-06, "loss": 0.2787, "step": 16719 }, { "epoch": 2.7227130236534625, "grad_norm": 0.16518519818782806, "learning_rate": 1.2918251167709782e-06, "loss": 0.2594, "step": 16720 }, { "epoch": 2.7228758702112934, "grad_norm": 0.16384975612163544, "learning_rate": 1.2903225106887823e-06, "loss": 0.2631, "step": 16721 }, { "epoch": 2.7230387167691243, "grad_norm": 0.1728079915046692, "learning_rate": 1.2888207558550385e-06, "loss": 0.2732, "step": 16722 }, { "epoch": 2.7232015633269553, "grad_norm": 0.18037447333335876, "learning_rate": 1.2873198523236735e-06, "loss": 0.2866, "step": 16723 }, { "epoch": 2.723364409884786, "grad_norm": 0.20956672728061676, "learning_rate": 1.2858198001485716e-06, "loss": 0.3009, "step": 16724 }, { "epoch": 2.723527256442617, "grad_norm": 0.1755371391773224, "learning_rate": 1.2843205993835844e-06, "loss": 0.2645, "step": 16725 }, { "epoch": 2.7236901030004477, "grad_norm": 0.1972401887178421, "learning_rate": 1.2828222500825466e-06, "loss": 0.2593, "step": 16726 }, { "epoch": 2.7238529495582786, "grad_norm": 0.14798498153686523, "learning_rate": 1.2813247522992484e-06, "loss": 0.2753, "step": 16727 }, { "epoch": 2.7240157961161096, "grad_norm": 0.17259487509727478, "learning_rate": 1.279828106087455e-06, "loss": 0.2312, "step": 16728 }, { "epoch": 2.7241786426739405, "grad_norm": 0.20909476280212402, "learning_rate": 1.278332311500896e-06, "loss": 0.2717, "step": 16729 }, { "epoch": 2.7243414892317714, "grad_norm": 0.15181675553321838, "learning_rate": 1.2768373685932812e-06, "loss": 0.2302, "step": 16730 }, { "epoch": 2.7245043357896024, "grad_norm": 0.17081955075263977, "learning_rate": 1.275343277418281e-06, "loss": 0.2376, "step": 16731 }, { "epoch": 2.7246671823474333, "grad_norm": 0.15237683057785034, "learning_rate": 1.273850038029542e-06, "loss": 0.24, "step": 16732 }, { "epoch": 2.724830028905264, "grad_norm": 0.17852631211280823, "learning_rate": 1.2723576504806656e-06, "loss": 0.2563, "step": 16733 }, { "epoch": 2.7249928754630948, "grad_norm": 0.14761322736740112, "learning_rate": 1.270866114825242e-06, "loss": 0.2706, "step": 16734 }, { "epoch": 2.7251557220209257, "grad_norm": 0.2296009361743927, "learning_rate": 1.2693754311168205e-06, "loss": 0.266, "step": 16735 }, { "epoch": 2.7253185685787567, "grad_norm": 0.13134890794754028, "learning_rate": 1.267885599408916e-06, "loss": 0.2467, "step": 16736 }, { "epoch": 2.7254814151365876, "grad_norm": 0.20506466925144196, "learning_rate": 1.2663966197550282e-06, "loss": 0.2656, "step": 16737 }, { "epoch": 2.7256442616944185, "grad_norm": 0.17590869963169098, "learning_rate": 1.2649084922086025e-06, "loss": 0.2886, "step": 16738 }, { "epoch": 2.7258071082522495, "grad_norm": 0.1638314127922058, "learning_rate": 1.2634212168230796e-06, "loss": 0.3007, "step": 16739 }, { "epoch": 2.72596995481008, "grad_norm": 0.19515115022659302, "learning_rate": 1.2619347936518506e-06, "loss": 0.2783, "step": 16740 }, { "epoch": 2.7261328013679114, "grad_norm": 0.17412029206752777, "learning_rate": 1.2604492227482806e-06, "loss": 0.3095, "step": 16741 }, { "epoch": 2.726295647925742, "grad_norm": 0.15282028913497925, "learning_rate": 1.2589645041657132e-06, "loss": 0.3099, "step": 16742 }, { "epoch": 2.726458494483573, "grad_norm": 0.17724379897117615, "learning_rate": 1.2574806379574477e-06, "loss": 0.2739, "step": 16743 }, { "epoch": 2.7266213410414037, "grad_norm": 0.21056240797042847, "learning_rate": 1.2559976241767607e-06, "loss": 0.2553, "step": 16744 }, { "epoch": 2.7267841875992347, "grad_norm": 0.1810699999332428, "learning_rate": 1.2545154628769012e-06, "loss": 0.2651, "step": 16745 }, { "epoch": 2.7269470341570656, "grad_norm": 0.1708795577287674, "learning_rate": 1.2530341541110796e-06, "loss": 0.2521, "step": 16746 }, { "epoch": 2.727109880714896, "grad_norm": 0.2162630259990692, "learning_rate": 1.2515536979324754e-06, "loss": 0.2547, "step": 16747 }, { "epoch": 2.7272727272727275, "grad_norm": 0.15415547788143158, "learning_rate": 1.250074094394249e-06, "loss": 0.2607, "step": 16748 }, { "epoch": 2.727435573830558, "grad_norm": 0.1815587282180786, "learning_rate": 1.2485953435495219e-06, "loss": 0.2565, "step": 16749 }, { "epoch": 2.727598420388389, "grad_norm": 0.3052556812763214, "learning_rate": 1.2471174454513817e-06, "loss": 0.3313, "step": 16750 }, { "epoch": 2.72776126694622, "grad_norm": 0.185648113489151, "learning_rate": 1.2456404001528865e-06, "loss": 0.288, "step": 16751 }, { "epoch": 2.727924113504051, "grad_norm": 0.18209007382392883, "learning_rate": 1.2441642077070743e-06, "loss": 0.2698, "step": 16752 }, { "epoch": 2.728086960061882, "grad_norm": 0.18107999861240387, "learning_rate": 1.2426888681669413e-06, "loss": 0.2575, "step": 16753 }, { "epoch": 2.7282498066197127, "grad_norm": 0.2107105404138565, "learning_rate": 1.2412143815854538e-06, "loss": 0.2939, "step": 16754 }, { "epoch": 2.7284126531775437, "grad_norm": 0.18210294842720032, "learning_rate": 1.2397407480155553e-06, "loss": 0.2747, "step": 16755 }, { "epoch": 2.728575499735374, "grad_norm": 0.17043407261371613, "learning_rate": 1.2382679675101538e-06, "loss": 0.2738, "step": 16756 }, { "epoch": 2.728738346293205, "grad_norm": 0.14997132122516632, "learning_rate": 1.2367960401221235e-06, "loss": 0.254, "step": 16757 }, { "epoch": 2.728901192851036, "grad_norm": 0.15838778018951416, "learning_rate": 1.2353249659043081e-06, "loss": 0.2706, "step": 16758 }, { "epoch": 2.729064039408867, "grad_norm": 0.19976675510406494, "learning_rate": 1.2338547449095322e-06, "loss": 0.2696, "step": 16759 }, { "epoch": 2.729226885966698, "grad_norm": 0.20694245398044586, "learning_rate": 1.2323853771905757e-06, "loss": 0.298, "step": 16760 }, { "epoch": 2.729389732524529, "grad_norm": 0.19169367849826813, "learning_rate": 1.230916862800191e-06, "loss": 0.2605, "step": 16761 }, { "epoch": 2.72955257908236, "grad_norm": 0.1526167243719101, "learning_rate": 1.2294492017911052e-06, "loss": 0.2416, "step": 16762 }, { "epoch": 2.7297154256401903, "grad_norm": 0.1483415812253952, "learning_rate": 1.2279823942160123e-06, "loss": 0.2479, "step": 16763 }, { "epoch": 2.7298782721980213, "grad_norm": 0.17840000987052917, "learning_rate": 1.2265164401275758e-06, "loss": 0.2477, "step": 16764 }, { "epoch": 2.730041118755852, "grad_norm": 0.18527978658676147, "learning_rate": 1.225051339578423e-06, "loss": 0.3009, "step": 16765 }, { "epoch": 2.730203965313683, "grad_norm": 0.1964409053325653, "learning_rate": 1.2235870926211619e-06, "loss": 0.261, "step": 16766 }, { "epoch": 2.730366811871514, "grad_norm": 0.18641474843025208, "learning_rate": 1.2221236993083585e-06, "loss": 0.2547, "step": 16767 }, { "epoch": 2.730529658429345, "grad_norm": 0.16895751655101776, "learning_rate": 1.220661159692557e-06, "loss": 0.2393, "step": 16768 }, { "epoch": 2.730692504987176, "grad_norm": 0.17910265922546387, "learning_rate": 1.21919947382626e-06, "loss": 0.2668, "step": 16769 }, { "epoch": 2.7308553515450065, "grad_norm": 0.15628401935100555, "learning_rate": 1.2177386417619562e-06, "loss": 0.2916, "step": 16770 }, { "epoch": 2.731018198102838, "grad_norm": 0.2046244591474533, "learning_rate": 1.2162786635520867e-06, "loss": 0.2639, "step": 16771 }, { "epoch": 2.7311810446606684, "grad_norm": 0.19438791275024414, "learning_rate": 1.2148195392490735e-06, "loss": 0.308, "step": 16772 }, { "epoch": 2.7313438912184993, "grad_norm": 0.18949830532073975, "learning_rate": 1.2133612689052942e-06, "loss": 0.238, "step": 16773 }, { "epoch": 2.7315067377763302, "grad_norm": 0.20131367444992065, "learning_rate": 1.211903852573118e-06, "loss": 0.2808, "step": 16774 }, { "epoch": 2.731669584334161, "grad_norm": 0.16339291632175446, "learning_rate": 1.2104472903048642e-06, "loss": 0.2717, "step": 16775 }, { "epoch": 2.731832430891992, "grad_norm": 0.18018774688243866, "learning_rate": 1.2089915821528242e-06, "loss": 0.2781, "step": 16776 }, { "epoch": 2.731995277449823, "grad_norm": 0.1970699578523636, "learning_rate": 1.2075367281692701e-06, "loss": 0.2919, "step": 16777 }, { "epoch": 2.732158124007654, "grad_norm": 0.16754674911499023, "learning_rate": 1.206082728406427e-06, "loss": 0.2569, "step": 16778 }, { "epoch": 2.7323209705654845, "grad_norm": 0.19931471347808838, "learning_rate": 1.2046295829165083e-06, "loss": 0.2339, "step": 16779 }, { "epoch": 2.7324838171233155, "grad_norm": 0.17366379499435425, "learning_rate": 1.203177291751678e-06, "loss": 0.2453, "step": 16780 }, { "epoch": 2.7326466636811464, "grad_norm": 0.16376914083957672, "learning_rate": 1.2017258549640781e-06, "loss": 0.2442, "step": 16781 }, { "epoch": 2.7328095102389773, "grad_norm": 0.1719106137752533, "learning_rate": 1.2002752726058247e-06, "loss": 0.2467, "step": 16782 }, { "epoch": 2.7329723567968083, "grad_norm": 0.1577145904302597, "learning_rate": 1.1988255447289932e-06, "loss": 0.2636, "step": 16783 }, { "epoch": 2.733135203354639, "grad_norm": 0.1559130847454071, "learning_rate": 1.1973766713856333e-06, "loss": 0.3081, "step": 16784 }, { "epoch": 2.73329804991247, "grad_norm": 0.15490688383579254, "learning_rate": 1.1959286526277675e-06, "loss": 0.253, "step": 16785 }, { "epoch": 2.7334608964703007, "grad_norm": 0.17749153077602386, "learning_rate": 1.1944814885073818e-06, "loss": 0.2447, "step": 16786 }, { "epoch": 2.7336237430281316, "grad_norm": 0.18568874895572662, "learning_rate": 1.193035179076432e-06, "loss": 0.2569, "step": 16787 }, { "epoch": 2.7337865895859625, "grad_norm": 0.13022594153881073, "learning_rate": 1.1915897243868517e-06, "loss": 0.2572, "step": 16788 }, { "epoch": 2.7339494361437935, "grad_norm": 0.1959606260061264, "learning_rate": 1.1901451244905297e-06, "loss": 0.2887, "step": 16789 }, { "epoch": 2.7341122827016244, "grad_norm": 0.1930149793624878, "learning_rate": 1.188701379439333e-06, "loss": 0.2877, "step": 16790 }, { "epoch": 2.7342751292594554, "grad_norm": 0.23936063051223755, "learning_rate": 1.1872584892850951e-06, "loss": 0.2915, "step": 16791 }, { "epoch": 2.7344379758172863, "grad_norm": 0.19278985261917114, "learning_rate": 1.1858164540796274e-06, "loss": 0.2489, "step": 16792 }, { "epoch": 2.734600822375117, "grad_norm": 0.18383240699768066, "learning_rate": 1.1843752738746966e-06, "loss": 0.2325, "step": 16793 }, { "epoch": 2.734763668932948, "grad_norm": 0.2228815257549286, "learning_rate": 1.1829349487220476e-06, "loss": 0.2459, "step": 16794 }, { "epoch": 2.7349265154907787, "grad_norm": 0.1792159229516983, "learning_rate": 1.1814954786733863e-06, "loss": 0.271, "step": 16795 }, { "epoch": 2.7350893620486096, "grad_norm": 0.16187097132205963, "learning_rate": 1.1800568637804043e-06, "loss": 0.2531, "step": 16796 }, { "epoch": 2.7352522086064406, "grad_norm": 0.1277378648519516, "learning_rate": 1.1786191040947498e-06, "loss": 0.2744, "step": 16797 }, { "epoch": 2.7354150551642715, "grad_norm": 0.17801308631896973, "learning_rate": 1.1771821996680338e-06, "loss": 0.2666, "step": 16798 }, { "epoch": 2.7355779017221025, "grad_norm": 0.1524932086467743, "learning_rate": 1.175746150551857e-06, "loss": 0.2321, "step": 16799 }, { "epoch": 2.735740748279933, "grad_norm": 0.18820840120315552, "learning_rate": 1.1743109567977696e-06, "loss": 0.2888, "step": 16800 }, { "epoch": 2.7359035948377644, "grad_norm": 0.18729938566684723, "learning_rate": 1.1728766184573054e-06, "loss": 0.314, "step": 16801 }, { "epoch": 2.736066441395595, "grad_norm": 0.19290325045585632, "learning_rate": 1.1714431355819538e-06, "loss": 0.318, "step": 16802 }, { "epoch": 2.736229287953426, "grad_norm": 0.1763676404953003, "learning_rate": 1.1700105082231905e-06, "loss": 0.2372, "step": 16803 }, { "epoch": 2.7363921345112567, "grad_norm": 0.1831701695919037, "learning_rate": 1.1685787364324464e-06, "loss": 0.2798, "step": 16804 }, { "epoch": 2.7365549810690877, "grad_norm": 0.19647659361362457, "learning_rate": 1.1671478202611219e-06, "loss": 0.2998, "step": 16805 }, { "epoch": 2.7367178276269186, "grad_norm": 0.2067338079214096, "learning_rate": 1.1657177597606011e-06, "loss": 0.2535, "step": 16806 }, { "epoch": 2.7368806741847496, "grad_norm": 0.21701419353485107, "learning_rate": 1.1642885549822207e-06, "loss": 0.303, "step": 16807 }, { "epoch": 2.7370435207425805, "grad_norm": 0.20555374026298523, "learning_rate": 1.1628602059772953e-06, "loss": 0.3102, "step": 16808 }, { "epoch": 2.737206367300411, "grad_norm": 0.19969652593135834, "learning_rate": 1.161432712797103e-06, "loss": 0.3005, "step": 16809 }, { "epoch": 2.737369213858242, "grad_norm": 0.13606934249401093, "learning_rate": 1.1600060754929031e-06, "loss": 0.2423, "step": 16810 }, { "epoch": 2.737532060416073, "grad_norm": 0.17854171991348267, "learning_rate": 1.1585802941159102e-06, "loss": 0.2398, "step": 16811 }, { "epoch": 2.737694906973904, "grad_norm": 0.15701225399971008, "learning_rate": 1.1571553687173137e-06, "loss": 0.2784, "step": 16812 }, { "epoch": 2.7378577535317348, "grad_norm": 0.18011710047721863, "learning_rate": 1.1557312993482755e-06, "loss": 0.2715, "step": 16813 }, { "epoch": 2.7380206000895657, "grad_norm": 0.16649127006530762, "learning_rate": 1.1543080860599242e-06, "loss": 0.3146, "step": 16814 }, { "epoch": 2.7381834466473967, "grad_norm": 0.18347109854221344, "learning_rate": 1.1528857289033523e-06, "loss": 0.276, "step": 16815 }, { "epoch": 2.738346293205227, "grad_norm": 0.1942090392112732, "learning_rate": 1.151464227929633e-06, "loss": 0.2703, "step": 16816 }, { "epoch": 2.738509139763058, "grad_norm": 0.1992902010679245, "learning_rate": 1.1500435831898027e-06, "loss": 0.2465, "step": 16817 }, { "epoch": 2.738671986320889, "grad_norm": 0.2007330358028412, "learning_rate": 1.14862379473486e-06, "loss": 0.2595, "step": 16818 }, { "epoch": 2.73883483287872, "grad_norm": 0.17551341652870178, "learning_rate": 1.147204862615786e-06, "loss": 0.3061, "step": 16819 }, { "epoch": 2.738997679436551, "grad_norm": 0.18801026046276093, "learning_rate": 1.145786786883518e-06, "loss": 0.2757, "step": 16820 }, { "epoch": 2.739160525994382, "grad_norm": 0.1649509072303772, "learning_rate": 1.1443695675889793e-06, "loss": 0.2661, "step": 16821 }, { "epoch": 2.739323372552213, "grad_norm": 0.1832326054573059, "learning_rate": 1.1429532047830455e-06, "loss": 0.2875, "step": 16822 }, { "epoch": 2.7394862191100433, "grad_norm": 0.1594320386648178, "learning_rate": 1.1415376985165704e-06, "loss": 0.2685, "step": 16823 }, { "epoch": 2.7396490656678747, "grad_norm": 0.17079424858093262, "learning_rate": 1.1401230488403692e-06, "loss": 0.2673, "step": 16824 }, { "epoch": 2.739811912225705, "grad_norm": 0.192348912358284, "learning_rate": 1.1387092558052397e-06, "loss": 0.2339, "step": 16825 }, { "epoch": 2.739974758783536, "grad_norm": 0.19784428179264069, "learning_rate": 1.1372963194619391e-06, "loss": 0.2377, "step": 16826 }, { "epoch": 2.740137605341367, "grad_norm": 0.17072021961212158, "learning_rate": 1.1358842398611902e-06, "loss": 0.268, "step": 16827 }, { "epoch": 2.740300451899198, "grad_norm": 0.15561404824256897, "learning_rate": 1.1344730170537e-06, "loss": 0.2456, "step": 16828 }, { "epoch": 2.740463298457029, "grad_norm": 0.16505683958530426, "learning_rate": 1.1330626510901331e-06, "loss": 0.281, "step": 16829 }, { "epoch": 2.74062614501486, "grad_norm": 0.16131451725959778, "learning_rate": 1.1316531420211218e-06, "loss": 0.2591, "step": 16830 }, { "epoch": 2.740788991572691, "grad_norm": 0.1602059006690979, "learning_rate": 1.1302444898972725e-06, "loss": 0.3093, "step": 16831 }, { "epoch": 2.7409518381305213, "grad_norm": 0.21414026618003845, "learning_rate": 1.1288366947691642e-06, "loss": 0.2999, "step": 16832 }, { "epoch": 2.7411146846883523, "grad_norm": 0.1396505981683731, "learning_rate": 1.1274297566873399e-06, "loss": 0.2332, "step": 16833 }, { "epoch": 2.7412775312461832, "grad_norm": 0.19530411064624786, "learning_rate": 1.1260236757023118e-06, "loss": 0.2553, "step": 16834 }, { "epoch": 2.741440377804014, "grad_norm": 0.20852980017662048, "learning_rate": 1.1246184518645565e-06, "loss": 0.2743, "step": 16835 }, { "epoch": 2.741603224361845, "grad_norm": 0.23981235921382904, "learning_rate": 1.123214085224536e-06, "loss": 0.2338, "step": 16836 }, { "epoch": 2.741766070919676, "grad_norm": 0.19202525913715363, "learning_rate": 1.1218105758326657e-06, "loss": 0.2617, "step": 16837 }, { "epoch": 2.741928917477507, "grad_norm": 0.1409621387720108, "learning_rate": 1.1204079237393333e-06, "loss": 0.2468, "step": 16838 }, { "epoch": 2.7420917640353375, "grad_norm": 0.16025832295417786, "learning_rate": 1.1190061289949067e-06, "loss": 0.2351, "step": 16839 }, { "epoch": 2.7422546105931684, "grad_norm": 0.18417507410049438, "learning_rate": 1.1176051916497067e-06, "loss": 0.2572, "step": 16840 }, { "epoch": 2.7424174571509994, "grad_norm": 0.1767052561044693, "learning_rate": 1.1162051117540372e-06, "loss": 0.2517, "step": 16841 }, { "epoch": 2.7425803037088303, "grad_norm": 0.1647920310497284, "learning_rate": 1.1148058893581558e-06, "loss": 0.2405, "step": 16842 }, { "epoch": 2.7427431502666613, "grad_norm": 0.19565977156162262, "learning_rate": 1.1134075245123082e-06, "loss": 0.2571, "step": 16843 }, { "epoch": 2.742905996824492, "grad_norm": 0.20453940331935883, "learning_rate": 1.1120100172666986e-06, "loss": 0.2941, "step": 16844 }, { "epoch": 2.743068843382323, "grad_norm": 0.18562282621860504, "learning_rate": 1.110613367671498e-06, "loss": 0.2803, "step": 16845 }, { "epoch": 2.7432316899401537, "grad_norm": 0.1501121073961258, "learning_rate": 1.1092175757768497e-06, "loss": 0.2364, "step": 16846 }, { "epoch": 2.743394536497985, "grad_norm": 0.203340545296669, "learning_rate": 1.1078226416328747e-06, "loss": 0.2795, "step": 16847 }, { "epoch": 2.7435573830558155, "grad_norm": 0.19285142421722412, "learning_rate": 1.1064285652896466e-06, "loss": 0.2444, "step": 16848 }, { "epoch": 2.7437202296136465, "grad_norm": 0.20022712647914886, "learning_rate": 1.1050353467972174e-06, "loss": 0.2933, "step": 16849 }, { "epoch": 2.7438830761714774, "grad_norm": 0.13234291970729828, "learning_rate": 1.1036429862056136e-06, "loss": 0.2856, "step": 16850 }, { "epoch": 2.7440459227293084, "grad_norm": 0.20528006553649902, "learning_rate": 1.1022514835648228e-06, "loss": 0.2862, "step": 16851 }, { "epoch": 2.7442087692871393, "grad_norm": 0.19191715121269226, "learning_rate": 1.100860838924797e-06, "loss": 0.2842, "step": 16852 }, { "epoch": 2.74437161584497, "grad_norm": 0.17386408150196075, "learning_rate": 1.0994710523354768e-06, "loss": 0.2812, "step": 16853 }, { "epoch": 2.744534462402801, "grad_norm": 0.145732119679451, "learning_rate": 1.0980821238467553e-06, "loss": 0.2535, "step": 16854 }, { "epoch": 2.7446973089606317, "grad_norm": 0.19336514174938202, "learning_rate": 1.0966940535084929e-06, "loss": 0.2819, "step": 16855 }, { "epoch": 2.7448601555184626, "grad_norm": 0.19388015568256378, "learning_rate": 1.0953068413705331e-06, "loss": 0.224, "step": 16856 }, { "epoch": 2.7450230020762936, "grad_norm": 0.15383613109588623, "learning_rate": 1.0939204874826719e-06, "loss": 0.2774, "step": 16857 }, { "epoch": 2.7451858486341245, "grad_norm": 0.17329367995262146, "learning_rate": 1.0925349918946975e-06, "loss": 0.2622, "step": 16858 }, { "epoch": 2.7453486951919555, "grad_norm": 0.2070489525794983, "learning_rate": 1.0911503546563423e-06, "loss": 0.2596, "step": 16859 }, { "epoch": 2.7455115417497864, "grad_norm": 0.20587095618247986, "learning_rate": 1.089766575817322e-06, "loss": 0.2503, "step": 16860 }, { "epoch": 2.7456743883076173, "grad_norm": 0.18068790435791016, "learning_rate": 1.0883836554273192e-06, "loss": 0.2105, "step": 16861 }, { "epoch": 2.745837234865448, "grad_norm": 0.2020432949066162, "learning_rate": 1.0870015935359856e-06, "loss": 0.2979, "step": 16862 }, { "epoch": 2.746000081423279, "grad_norm": 0.1858728528022766, "learning_rate": 1.0856203901929402e-06, "loss": 0.2571, "step": 16863 }, { "epoch": 2.7461629279811097, "grad_norm": 0.1897733360528946, "learning_rate": 1.0842400454477681e-06, "loss": 0.2496, "step": 16864 }, { "epoch": 2.7463257745389407, "grad_norm": 0.1747560203075409, "learning_rate": 1.0828605593500379e-06, "loss": 0.2657, "step": 16865 }, { "epoch": 2.7464886210967716, "grad_norm": 0.1681809425354004, "learning_rate": 1.0814819319492687e-06, "loss": 0.247, "step": 16866 }, { "epoch": 2.7466514676546026, "grad_norm": 0.16556991636753082, "learning_rate": 1.0801041632949566e-06, "loss": 0.2632, "step": 16867 }, { "epoch": 2.7468143142124335, "grad_norm": 0.19505834579467773, "learning_rate": 1.0787272534365733e-06, "loss": 0.2415, "step": 16868 }, { "epoch": 2.746977160770264, "grad_norm": 0.17148953676223755, "learning_rate": 1.0773512024235544e-06, "loss": 0.2506, "step": 16869 }, { "epoch": 2.7471400073280954, "grad_norm": 0.20389261841773987, "learning_rate": 1.075976010305299e-06, "loss": 0.2519, "step": 16870 }, { "epoch": 2.747302853885926, "grad_norm": 0.15366315841674805, "learning_rate": 1.0746016771311818e-06, "loss": 0.2298, "step": 16871 }, { "epoch": 2.747465700443757, "grad_norm": 0.20512951910495758, "learning_rate": 1.0732282029505492e-06, "loss": 0.2553, "step": 16872 }, { "epoch": 2.7476285470015878, "grad_norm": 0.17791618406772614, "learning_rate": 1.071855587812709e-06, "loss": 0.2663, "step": 16873 }, { "epoch": 2.7477913935594187, "grad_norm": 0.21026338636875153, "learning_rate": 1.070483831766947e-06, "loss": 0.2913, "step": 16874 }, { "epoch": 2.7479542401172496, "grad_norm": 0.19207537174224854, "learning_rate": 1.069112934862504e-06, "loss": 0.2574, "step": 16875 }, { "epoch": 2.74811708667508, "grad_norm": 0.20977294445037842, "learning_rate": 1.0677428971486103e-06, "loss": 0.2402, "step": 16876 }, { "epoch": 2.7482799332329115, "grad_norm": 0.19642183184623718, "learning_rate": 1.0663737186744487e-06, "loss": 0.2882, "step": 16877 }, { "epoch": 2.748442779790742, "grad_norm": 0.19401998817920685, "learning_rate": 1.0650053994891774e-06, "loss": 0.2885, "step": 16878 }, { "epoch": 2.748605626348573, "grad_norm": 0.18452337384223938, "learning_rate": 1.0636379396419232e-06, "loss": 0.2724, "step": 16879 }, { "epoch": 2.748768472906404, "grad_norm": 0.17519043385982513, "learning_rate": 1.0622713391817835e-06, "loss": 0.2814, "step": 16880 }, { "epoch": 2.748931319464235, "grad_norm": 0.20205143094062805, "learning_rate": 1.0609055981578214e-06, "loss": 0.2831, "step": 16881 }, { "epoch": 2.749094166022066, "grad_norm": 0.2054138481616974, "learning_rate": 1.0595407166190702e-06, "loss": 0.295, "step": 16882 }, { "epoch": 2.7492570125798967, "grad_norm": 0.2199345827102661, "learning_rate": 1.058176694614535e-06, "loss": 0.2737, "step": 16883 }, { "epoch": 2.7494198591377277, "grad_norm": 0.22228054702281952, "learning_rate": 1.0568135321931904e-06, "loss": 0.2759, "step": 16884 }, { "epoch": 2.749582705695558, "grad_norm": 0.15621306002140045, "learning_rate": 1.0554512294039753e-06, "loss": 0.2554, "step": 16885 }, { "epoch": 2.749745552253389, "grad_norm": 0.18795287609100342, "learning_rate": 1.0540897862957977e-06, "loss": 0.2185, "step": 16886 }, { "epoch": 2.74990839881122, "grad_norm": 0.1298491209745407, "learning_rate": 1.0527292029175433e-06, "loss": 0.2705, "step": 16887 }, { "epoch": 2.750071245369051, "grad_norm": 0.1687203198671341, "learning_rate": 1.0513694793180595e-06, "loss": 0.2352, "step": 16888 }, { "epoch": 2.750234091926882, "grad_norm": 0.19639067351818085, "learning_rate": 1.0500106155461598e-06, "loss": 0.2403, "step": 16889 }, { "epoch": 2.750396938484713, "grad_norm": 0.19712892174720764, "learning_rate": 1.0486526116506384e-06, "loss": 0.2855, "step": 16890 }, { "epoch": 2.750559785042544, "grad_norm": 0.1948256492614746, "learning_rate": 1.0472954676802482e-06, "loss": 0.3505, "step": 16891 }, { "epoch": 2.7507226316003743, "grad_norm": 0.16477026045322418, "learning_rate": 1.0459391836837112e-06, "loss": 0.2753, "step": 16892 }, { "epoch": 2.7508854781582053, "grad_norm": 0.20125031471252441, "learning_rate": 1.0445837597097302e-06, "loss": 0.2852, "step": 16893 }, { "epoch": 2.751048324716036, "grad_norm": 0.16714932024478912, "learning_rate": 1.0432291958069634e-06, "loss": 0.2382, "step": 16894 }, { "epoch": 2.751211171273867, "grad_norm": 0.23046277463436127, "learning_rate": 1.0418754920240415e-06, "loss": 0.3071, "step": 16895 }, { "epoch": 2.751374017831698, "grad_norm": 0.18277175724506378, "learning_rate": 1.0405226484095726e-06, "loss": 0.2562, "step": 16896 }, { "epoch": 2.751536864389529, "grad_norm": 0.13070838153362274, "learning_rate": 1.0391706650121235e-06, "loss": 0.27, "step": 16897 }, { "epoch": 2.75169971094736, "grad_norm": 0.1748049557209015, "learning_rate": 1.0378195418802388e-06, "loss": 0.2612, "step": 16898 }, { "epoch": 2.7518625575051905, "grad_norm": 0.15994924306869507, "learning_rate": 1.0364692790624242e-06, "loss": 0.2628, "step": 16899 }, { "epoch": 2.752025404063022, "grad_norm": 0.203939750790596, "learning_rate": 1.0351198766071574e-06, "loss": 0.2879, "step": 16900 }, { "epoch": 2.7521882506208524, "grad_norm": 0.15117788314819336, "learning_rate": 1.0337713345628912e-06, "loss": 0.2383, "step": 16901 }, { "epoch": 2.7523510971786833, "grad_norm": 0.23593083024024963, "learning_rate": 1.0324236529780373e-06, "loss": 0.2696, "step": 16902 }, { "epoch": 2.7525139437365143, "grad_norm": 0.16870586574077606, "learning_rate": 1.0310768319009844e-06, "loss": 0.2799, "step": 16903 }, { "epoch": 2.752676790294345, "grad_norm": 0.19976496696472168, "learning_rate": 1.0297308713800801e-06, "loss": 0.3031, "step": 16904 }, { "epoch": 2.752839636852176, "grad_norm": 0.20640979707241058, "learning_rate": 1.0283857714636607e-06, "loss": 0.2581, "step": 16905 }, { "epoch": 2.753002483410007, "grad_norm": 0.16980527341365814, "learning_rate": 1.0270415322000126e-06, "loss": 0.2623, "step": 16906 }, { "epoch": 2.753165329967838, "grad_norm": 0.20453476905822754, "learning_rate": 1.0256981536373944e-06, "loss": 0.2447, "step": 16907 }, { "epoch": 2.7533281765256685, "grad_norm": 0.23544161021709442, "learning_rate": 1.0243556358240426e-06, "loss": 0.2858, "step": 16908 }, { "epoch": 2.7534910230834995, "grad_norm": 0.17399868369102478, "learning_rate": 1.02301397880816e-06, "loss": 0.2444, "step": 16909 }, { "epoch": 2.7536538696413304, "grad_norm": 0.17205701768398285, "learning_rate": 1.0216731826379111e-06, "loss": 0.2786, "step": 16910 }, { "epoch": 2.7538167161991614, "grad_norm": 0.17782779037952423, "learning_rate": 1.0203332473614324e-06, "loss": 0.2233, "step": 16911 }, { "epoch": 2.7539795627569923, "grad_norm": 0.20861320197582245, "learning_rate": 1.0189941730268383e-06, "loss": 0.2821, "step": 16912 }, { "epoch": 2.7541424093148232, "grad_norm": 0.1803075671195984, "learning_rate": 1.017655959682201e-06, "loss": 0.2859, "step": 16913 }, { "epoch": 2.754305255872654, "grad_norm": 0.1815078854560852, "learning_rate": 1.0163186073755714e-06, "loss": 0.3042, "step": 16914 }, { "epoch": 2.7544681024304847, "grad_norm": 0.1665075123310089, "learning_rate": 1.0149821161549555e-06, "loss": 0.2353, "step": 16915 }, { "epoch": 2.7546309489883156, "grad_norm": 0.12932659685611725, "learning_rate": 1.0136464860683454e-06, "loss": 0.2727, "step": 16916 }, { "epoch": 2.7547937955461466, "grad_norm": 0.20059709250926971, "learning_rate": 1.0123117171636915e-06, "loss": 0.2602, "step": 16917 }, { "epoch": 2.7549566421039775, "grad_norm": 0.17670419812202454, "learning_rate": 1.0109778094889138e-06, "loss": 0.3055, "step": 16918 }, { "epoch": 2.7551194886618084, "grad_norm": 0.17579388618469238, "learning_rate": 1.0096447630919076e-06, "loss": 0.273, "step": 16919 }, { "epoch": 2.7552823352196394, "grad_norm": 0.19603084027767181, "learning_rate": 1.0083125780205315e-06, "loss": 0.2538, "step": 16920 }, { "epoch": 2.7554451817774703, "grad_norm": 0.1998162418603897, "learning_rate": 1.006981254322617e-06, "loss": 0.2722, "step": 16921 }, { "epoch": 2.755608028335301, "grad_norm": 0.1665838062763214, "learning_rate": 1.0056507920459562e-06, "loss": 0.2815, "step": 16922 }, { "epoch": 2.755770874893132, "grad_norm": 0.16676649451255798, "learning_rate": 1.0043211912383222e-06, "loss": 0.2717, "step": 16923 }, { "epoch": 2.7559337214509627, "grad_norm": 0.23118901252746582, "learning_rate": 1.0029924519474542e-06, "loss": 0.2525, "step": 16924 }, { "epoch": 2.7560965680087937, "grad_norm": 0.2110823094844818, "learning_rate": 1.0016645742210507e-06, "loss": 0.2957, "step": 16925 }, { "epoch": 2.7562594145666246, "grad_norm": 0.16789382696151733, "learning_rate": 1.000337558106787e-06, "loss": 0.2716, "step": 16926 }, { "epoch": 2.7564222611244555, "grad_norm": 0.18865971267223358, "learning_rate": 9.99011403652314e-07, "loss": 0.2587, "step": 16927 }, { "epoch": 2.7565851076822865, "grad_norm": 0.20975983142852783, "learning_rate": 9.97686110905241e-07, "loss": 0.2767, "step": 16928 }, { "epoch": 2.756747954240117, "grad_norm": 0.15901121497154236, "learning_rate": 9.963616799131465e-07, "loss": 0.2563, "step": 16929 }, { "epoch": 2.7569108007979484, "grad_norm": 0.18085747957229614, "learning_rate": 9.950381107235868e-07, "loss": 0.2453, "step": 16930 }, { "epoch": 2.757073647355779, "grad_norm": 0.14864473044872284, "learning_rate": 9.937154033840794e-07, "loss": 0.2838, "step": 16931 }, { "epoch": 2.75723649391361, "grad_norm": 0.18952229619026184, "learning_rate": 9.923935579421118e-07, "loss": 0.3053, "step": 16932 }, { "epoch": 2.7573993404714408, "grad_norm": 0.19194073975086212, "learning_rate": 9.910725744451482e-07, "loss": 0.254, "step": 16933 }, { "epoch": 2.7575621870292717, "grad_norm": 0.19935794174671173, "learning_rate": 9.897524529406065e-07, "loss": 0.2352, "step": 16934 }, { "epoch": 2.7577250335871026, "grad_norm": 0.1347072422504425, "learning_rate": 9.884331934758906e-07, "loss": 0.279, "step": 16935 }, { "epoch": 2.7578878801449336, "grad_norm": 0.1796979010105133, "learning_rate": 9.87114796098365e-07, "loss": 0.2356, "step": 16936 }, { "epoch": 2.7580507267027645, "grad_norm": 0.1718117743730545, "learning_rate": 9.857972608553618e-07, "loss": 0.2471, "step": 16937 }, { "epoch": 2.758213573260595, "grad_norm": 0.1578943431377411, "learning_rate": 9.84480587794187e-07, "loss": 0.2732, "step": 16938 }, { "epoch": 2.758376419818426, "grad_norm": 0.15197321772575378, "learning_rate": 9.831647769621088e-07, "loss": 0.2463, "step": 16939 }, { "epoch": 2.758539266376257, "grad_norm": 0.22967039048671722, "learning_rate": 9.818498284063727e-07, "loss": 0.2976, "step": 16940 }, { "epoch": 2.758702112934088, "grad_norm": 0.18133530020713806, "learning_rate": 9.80535742174188e-07, "loss": 0.2753, "step": 16941 }, { "epoch": 2.758864959491919, "grad_norm": 0.19622647762298584, "learning_rate": 9.792225183127362e-07, "loss": 0.2355, "step": 16942 }, { "epoch": 2.7590278060497497, "grad_norm": 0.15922103822231293, "learning_rate": 9.779101568691634e-07, "loss": 0.2667, "step": 16943 }, { "epoch": 2.7591906526075807, "grad_norm": 0.2177416831254959, "learning_rate": 9.765986578905844e-07, "loss": 0.2853, "step": 16944 }, { "epoch": 2.759353499165411, "grad_norm": 0.1789499819278717, "learning_rate": 9.752880214240923e-07, "loss": 0.3111, "step": 16945 }, { "epoch": 2.759516345723242, "grad_norm": 0.17637820541858673, "learning_rate": 9.739782475167408e-07, "loss": 0.2636, "step": 16946 }, { "epoch": 2.759679192281073, "grad_norm": 0.1825983226299286, "learning_rate": 9.726693362155565e-07, "loss": 0.2594, "step": 16947 }, { "epoch": 2.759842038838904, "grad_norm": 0.1785127818584442, "learning_rate": 9.713612875675237e-07, "loss": 0.2581, "step": 16948 }, { "epoch": 2.760004885396735, "grad_norm": 0.19477462768554688, "learning_rate": 9.70054101619619e-07, "loss": 0.2911, "step": 16949 }, { "epoch": 2.760167731954566, "grad_norm": 0.19256417453289032, "learning_rate": 9.68747778418766e-07, "loss": 0.2818, "step": 16950 }, { "epoch": 2.760330578512397, "grad_norm": 0.16966202855110168, "learning_rate": 9.67442318011863e-07, "loss": 0.2398, "step": 16951 }, { "epoch": 2.7604934250702273, "grad_norm": 0.18867139518260956, "learning_rate": 9.661377204457894e-07, "loss": 0.218, "step": 16952 }, { "epoch": 2.7606562716280587, "grad_norm": 0.18554021418094635, "learning_rate": 9.648339857673799e-07, "loss": 0.3153, "step": 16953 }, { "epoch": 2.760819118185889, "grad_norm": 0.1869792491197586, "learning_rate": 9.635311140234388e-07, "loss": 0.2686, "step": 16954 }, { "epoch": 2.76098196474372, "grad_norm": 0.1623300313949585, "learning_rate": 9.622291052607452e-07, "loss": 0.2365, "step": 16955 }, { "epoch": 2.761144811301551, "grad_norm": 0.17323333024978638, "learning_rate": 9.60927959526045e-07, "loss": 0.2396, "step": 16956 }, { "epoch": 2.761307657859382, "grad_norm": 0.1493619978427887, "learning_rate": 9.596276768660595e-07, "loss": 0.2795, "step": 16957 }, { "epoch": 2.761470504417213, "grad_norm": 0.19507841765880585, "learning_rate": 9.583282573274592e-07, "loss": 0.2771, "step": 16958 }, { "epoch": 2.761633350975044, "grad_norm": 0.1799451857805252, "learning_rate": 9.570297009569101e-07, "loss": 0.2261, "step": 16959 }, { "epoch": 2.761796197532875, "grad_norm": 0.1598111093044281, "learning_rate": 9.557320078010301e-07, "loss": 0.2336, "step": 16960 }, { "epoch": 2.7619590440907054, "grad_norm": 0.1417122632265091, "learning_rate": 9.544351779064099e-07, "loss": 0.2673, "step": 16961 }, { "epoch": 2.7621218906485363, "grad_norm": 0.15309002995491028, "learning_rate": 9.531392113196064e-07, "loss": 0.268, "step": 16962 }, { "epoch": 2.7622847372063672, "grad_norm": 0.1796911507844925, "learning_rate": 9.518441080871521e-07, "loss": 0.2495, "step": 16963 }, { "epoch": 2.762447583764198, "grad_norm": 0.2079642415046692, "learning_rate": 9.505498682555486e-07, "loss": 0.2351, "step": 16964 }, { "epoch": 2.762610430322029, "grad_norm": 0.16180969774723053, "learning_rate": 9.49256491871256e-07, "loss": 0.2364, "step": 16965 }, { "epoch": 2.76277327687986, "grad_norm": 0.1961883157491684, "learning_rate": 9.479639789807093e-07, "loss": 0.2524, "step": 16966 }, { "epoch": 2.762936123437691, "grad_norm": 0.16702675819396973, "learning_rate": 9.466723296303214e-07, "loss": 0.274, "step": 16967 }, { "epoch": 2.7630989699955215, "grad_norm": 0.18573787808418274, "learning_rate": 9.453815438664637e-07, "loss": 0.3034, "step": 16968 }, { "epoch": 2.7632618165533525, "grad_norm": 0.17428050935268402, "learning_rate": 9.440916217354739e-07, "loss": 0.2502, "step": 16969 }, { "epoch": 2.7634246631111834, "grad_norm": 0.1352839469909668, "learning_rate": 9.428025632836734e-07, "loss": 0.2731, "step": 16970 }, { "epoch": 2.7635875096690143, "grad_norm": 0.17752835154533386, "learning_rate": 9.415143685573335e-07, "loss": 0.2733, "step": 16971 }, { "epoch": 2.7637503562268453, "grad_norm": 0.1816217601299286, "learning_rate": 9.402270376027117e-07, "loss": 0.2626, "step": 16972 }, { "epoch": 2.7639132027846762, "grad_norm": 0.19862954318523407, "learning_rate": 9.389405704660264e-07, "loss": 0.2542, "step": 16973 }, { "epoch": 2.764076049342507, "grad_norm": 0.1781257838010788, "learning_rate": 9.376549671934576e-07, "loss": 0.2427, "step": 16974 }, { "epoch": 2.7642388959003377, "grad_norm": 0.2217012345790863, "learning_rate": 9.363702278311737e-07, "loss": 0.2888, "step": 16975 }, { "epoch": 2.764401742458169, "grad_norm": 0.15076857805252075, "learning_rate": 9.350863524252934e-07, "loss": 0.2239, "step": 16976 }, { "epoch": 2.7645645890159996, "grad_norm": 0.18374967575073242, "learning_rate": 9.338033410219104e-07, "loss": 0.2731, "step": 16977 }, { "epoch": 2.7647274355738305, "grad_norm": 0.18509021401405334, "learning_rate": 9.325211936670963e-07, "loss": 0.268, "step": 16978 }, { "epoch": 2.7648902821316614, "grad_norm": 0.1894369274377823, "learning_rate": 9.312399104068781e-07, "loss": 0.2385, "step": 16979 }, { "epoch": 2.7650531286894924, "grad_norm": 0.1801947057247162, "learning_rate": 9.29959491287255e-07, "loss": 0.3005, "step": 16980 }, { "epoch": 2.7652159752473233, "grad_norm": 0.19085903465747833, "learning_rate": 9.28679936354207e-07, "loss": 0.2501, "step": 16981 }, { "epoch": 2.765378821805154, "grad_norm": 0.22984744608402252, "learning_rate": 9.274012456536668e-07, "loss": 0.2948, "step": 16982 }, { "epoch": 2.765541668362985, "grad_norm": 0.18437525629997253, "learning_rate": 9.261234192315476e-07, "loss": 0.2648, "step": 16983 }, { "epoch": 2.7657045149208157, "grad_norm": 0.1817968636751175, "learning_rate": 9.248464571337212e-07, "loss": 0.2573, "step": 16984 }, { "epoch": 2.7658673614786466, "grad_norm": 0.20165473222732544, "learning_rate": 9.235703594060396e-07, "loss": 0.2249, "step": 16985 }, { "epoch": 2.7660302080364776, "grad_norm": 0.19655880331993103, "learning_rate": 9.222951260943191e-07, "loss": 0.2695, "step": 16986 }, { "epoch": 2.7661930545943085, "grad_norm": 0.16218975186347961, "learning_rate": 9.210207572443424e-07, "loss": 0.2734, "step": 16987 }, { "epoch": 2.7663559011521395, "grad_norm": 0.15140961110591888, "learning_rate": 9.197472529018591e-07, "loss": 0.2736, "step": 16988 }, { "epoch": 2.7665187477099704, "grad_norm": 0.22781161963939667, "learning_rate": 9.184746131125965e-07, "loss": 0.2781, "step": 16989 }, { "epoch": 2.7666815942678014, "grad_norm": 0.13540488481521606, "learning_rate": 9.172028379222486e-07, "loss": 0.2632, "step": 16990 }, { "epoch": 2.766844440825632, "grad_norm": 0.15102028846740723, "learning_rate": 9.159319273764704e-07, "loss": 0.2709, "step": 16991 }, { "epoch": 2.767007287383463, "grad_norm": 0.16983938217163086, "learning_rate": 9.146618815208952e-07, "loss": 0.2612, "step": 16992 }, { "epoch": 2.7671701339412937, "grad_norm": 0.19310680031776428, "learning_rate": 9.133927004011194e-07, "loss": 0.2893, "step": 16993 }, { "epoch": 2.7673329804991247, "grad_norm": 0.17460590600967407, "learning_rate": 9.121243840627125e-07, "loss": 0.2336, "step": 16994 }, { "epoch": 2.7674958270569556, "grad_norm": 0.2690132260322571, "learning_rate": 9.108569325512046e-07, "loss": 0.2547, "step": 16995 }, { "epoch": 2.7676586736147866, "grad_norm": 0.18335123360157013, "learning_rate": 9.095903459121091e-07, "loss": 0.2974, "step": 16996 }, { "epoch": 2.7678215201726175, "grad_norm": 0.14510120451450348, "learning_rate": 9.083246241908983e-07, "loss": 0.2462, "step": 16997 }, { "epoch": 2.767984366730448, "grad_norm": 0.1560554951429367, "learning_rate": 9.070597674330133e-07, "loss": 0.286, "step": 16998 }, { "epoch": 2.7681472132882794, "grad_norm": 0.18655335903167725, "learning_rate": 9.057957756838653e-07, "loss": 0.2793, "step": 16999 }, { "epoch": 2.76831005984611, "grad_norm": 0.18333809077739716, "learning_rate": 9.045326489888401e-07, "loss": 0.2834, "step": 17000 }, { "epoch": 2.768472906403941, "grad_norm": 0.18943102657794952, "learning_rate": 9.032703873932819e-07, "loss": 0.2583, "step": 17001 }, { "epoch": 2.768635752961772, "grad_norm": 0.16171635687351227, "learning_rate": 9.020089909425128e-07, "loss": 0.2801, "step": 17002 }, { "epoch": 2.7687985995196027, "grad_norm": 0.1764492392539978, "learning_rate": 9.007484596818216e-07, "loss": 0.2674, "step": 17003 }, { "epoch": 2.7689614460774337, "grad_norm": 0.18234771490097046, "learning_rate": 8.994887936564639e-07, "loss": 0.2857, "step": 17004 }, { "epoch": 2.769124292635264, "grad_norm": 0.15847693383693695, "learning_rate": 8.982299929116672e-07, "loss": 0.2722, "step": 17005 }, { "epoch": 2.7692871391930955, "grad_norm": 0.18886582553386688, "learning_rate": 8.969720574926205e-07, "loss": 0.2764, "step": 17006 }, { "epoch": 2.769449985750926, "grad_norm": 0.202693372964859, "learning_rate": 8.957149874444931e-07, "loss": 0.2708, "step": 17007 }, { "epoch": 2.769612832308757, "grad_norm": 0.18760190904140472, "learning_rate": 8.944587828124185e-07, "loss": 0.2372, "step": 17008 }, { "epoch": 2.769775678866588, "grad_norm": 0.1578368842601776, "learning_rate": 8.93203443641491e-07, "loss": 0.2377, "step": 17009 }, { "epoch": 2.769938525424419, "grad_norm": 0.19327720999717712, "learning_rate": 8.919489699767886e-07, "loss": 0.2675, "step": 17010 }, { "epoch": 2.77010137198225, "grad_norm": 0.19063661992549896, "learning_rate": 8.906953618633473e-07, "loss": 0.2766, "step": 17011 }, { "epoch": 2.7702642185400808, "grad_norm": 0.18685521185398102, "learning_rate": 8.894426193461758e-07, "loss": 0.2902, "step": 17012 }, { "epoch": 2.7704270650979117, "grad_norm": 0.18678726255893707, "learning_rate": 8.881907424702518e-07, "loss": 0.2767, "step": 17013 }, { "epoch": 2.770589911655742, "grad_norm": 0.17847706377506256, "learning_rate": 8.8693973128052e-07, "loss": 0.2675, "step": 17014 }, { "epoch": 2.770752758213573, "grad_norm": 0.1577034890651703, "learning_rate": 8.856895858219e-07, "loss": 0.2536, "step": 17015 }, { "epoch": 2.770915604771404, "grad_norm": 0.20804089307785034, "learning_rate": 8.844403061392697e-07, "loss": 0.2591, "step": 17016 }, { "epoch": 2.771078451329235, "grad_norm": 0.15405365824699402, "learning_rate": 8.831918922774824e-07, "loss": 0.2711, "step": 17017 }, { "epoch": 2.771241297887066, "grad_norm": 0.17661437392234802, "learning_rate": 8.819443442813657e-07, "loss": 0.2937, "step": 17018 }, { "epoch": 2.771404144444897, "grad_norm": 0.1549343764781952, "learning_rate": 8.806976621957064e-07, "loss": 0.3057, "step": 17019 }, { "epoch": 2.771566991002728, "grad_norm": 0.20899061858654022, "learning_rate": 8.794518460652601e-07, "loss": 0.2599, "step": 17020 }, { "epoch": 2.7717298375605584, "grad_norm": 0.14782363176345825, "learning_rate": 8.782068959347634e-07, "loss": 0.2525, "step": 17021 }, { "epoch": 2.7718926841183893, "grad_norm": 0.21907855570316315, "learning_rate": 8.769628118489109e-07, "loss": 0.2915, "step": 17022 }, { "epoch": 2.7720555306762202, "grad_norm": 0.19403882324695587, "learning_rate": 8.757195938523671e-07, "loss": 0.2807, "step": 17023 }, { "epoch": 2.772218377234051, "grad_norm": 0.18339131772518158, "learning_rate": 8.744772419897657e-07, "loss": 0.3369, "step": 17024 }, { "epoch": 2.772381223791882, "grad_norm": 0.18551643192768097, "learning_rate": 8.732357563057181e-07, "loss": 0.2755, "step": 17025 }, { "epoch": 2.772544070349713, "grad_norm": 0.17000773549079895, "learning_rate": 8.719951368447887e-07, "loss": 0.2618, "step": 17026 }, { "epoch": 2.772706916907544, "grad_norm": 0.23840148746967316, "learning_rate": 8.707553836515253e-07, "loss": 0.2539, "step": 17027 }, { "epoch": 2.7728697634653745, "grad_norm": 0.22735419869422913, "learning_rate": 8.695164967704339e-07, "loss": 0.2676, "step": 17028 }, { "epoch": 2.773032610023206, "grad_norm": 0.18275117874145508, "learning_rate": 8.682784762460011e-07, "loss": 0.2646, "step": 17029 }, { "epoch": 2.7731954565810364, "grad_norm": 0.16250477731227875, "learning_rate": 8.670413221226692e-07, "loss": 0.2325, "step": 17030 }, { "epoch": 2.7733583031388673, "grad_norm": 0.1724524199962616, "learning_rate": 8.658050344448554e-07, "loss": 0.2579, "step": 17031 }, { "epoch": 2.7735211496966983, "grad_norm": 0.19530411064624786, "learning_rate": 8.645696132569547e-07, "loss": 0.2772, "step": 17032 }, { "epoch": 2.773683996254529, "grad_norm": 0.1855006217956543, "learning_rate": 8.633350586033123e-07, "loss": 0.2722, "step": 17033 }, { "epoch": 2.77384684281236, "grad_norm": 0.1565006971359253, "learning_rate": 8.621013705282594e-07, "loss": 0.2174, "step": 17034 }, { "epoch": 2.774009689370191, "grad_norm": 0.18188023567199707, "learning_rate": 8.608685490760827e-07, "loss": 0.2836, "step": 17035 }, { "epoch": 2.774172535928022, "grad_norm": 0.17397357523441315, "learning_rate": 8.596365942910522e-07, "loss": 0.2745, "step": 17036 }, { "epoch": 2.7743353824858525, "grad_norm": 0.17791245877742767, "learning_rate": 8.58405506217394e-07, "loss": 0.2401, "step": 17037 }, { "epoch": 2.7744982290436835, "grad_norm": 0.19143666326999664, "learning_rate": 8.571752848993058e-07, "loss": 0.252, "step": 17038 }, { "epoch": 2.7746610756015144, "grad_norm": 0.18181663751602173, "learning_rate": 8.55945930380958e-07, "loss": 0.2615, "step": 17039 }, { "epoch": 2.7748239221593454, "grad_norm": 0.17269763350486755, "learning_rate": 8.547174427064903e-07, "loss": 0.2709, "step": 17040 }, { "epoch": 2.7749867687171763, "grad_norm": 0.16372540593147278, "learning_rate": 8.534898219200088e-07, "loss": 0.2464, "step": 17041 }, { "epoch": 2.7751496152750073, "grad_norm": 0.17781157791614532, "learning_rate": 8.522630680655841e-07, "loss": 0.283, "step": 17042 }, { "epoch": 2.775312461832838, "grad_norm": 0.1784026026725769, "learning_rate": 8.51037181187267e-07, "loss": 0.261, "step": 17043 }, { "epoch": 2.7754753083906687, "grad_norm": 0.1798379123210907, "learning_rate": 8.498121613290694e-07, "loss": 0.2701, "step": 17044 }, { "epoch": 2.7756381549484996, "grad_norm": 0.21271909773349762, "learning_rate": 8.485880085349673e-07, "loss": 0.2881, "step": 17045 }, { "epoch": 2.7758010015063306, "grad_norm": 0.18706190586090088, "learning_rate": 8.473647228489146e-07, "loss": 0.26, "step": 17046 }, { "epoch": 2.7759638480641615, "grad_norm": 0.20537027716636658, "learning_rate": 8.461423043148342e-07, "loss": 0.3001, "step": 17047 }, { "epoch": 2.7761266946219925, "grad_norm": 0.1818206012248993, "learning_rate": 8.449207529766079e-07, "loss": 0.2792, "step": 17048 }, { "epoch": 2.7762895411798234, "grad_norm": 0.14737945795059204, "learning_rate": 8.437000688781005e-07, "loss": 0.2685, "step": 17049 }, { "epoch": 2.7764523877376543, "grad_norm": 0.20568186044692993, "learning_rate": 8.424802520631353e-07, "loss": 0.2838, "step": 17050 }, { "epoch": 2.776615234295485, "grad_norm": 0.15758216381072998, "learning_rate": 8.41261302575505e-07, "loss": 0.2525, "step": 17051 }, { "epoch": 2.7767780808533162, "grad_norm": 0.18586653470993042, "learning_rate": 8.400432204589776e-07, "loss": 0.2872, "step": 17052 }, { "epoch": 2.7769409274111467, "grad_norm": 0.1710326373577118, "learning_rate": 8.388260057572789e-07, "loss": 0.2762, "step": 17053 }, { "epoch": 2.7771037739689777, "grad_norm": 0.2106277197599411, "learning_rate": 8.376096585141213e-07, "loss": 0.2701, "step": 17054 }, { "epoch": 2.7772666205268086, "grad_norm": 0.19515030086040497, "learning_rate": 8.363941787731671e-07, "loss": 0.2833, "step": 17055 }, { "epoch": 2.7774294670846396, "grad_norm": 0.20029866695404053, "learning_rate": 8.351795665780593e-07, "loss": 0.2596, "step": 17056 }, { "epoch": 2.7775923136424705, "grad_norm": 0.21300671994686127, "learning_rate": 8.339658219724017e-07, "loss": 0.2892, "step": 17057 }, { "epoch": 2.777755160200301, "grad_norm": 0.1318810135126114, "learning_rate": 8.327529449997761e-07, "loss": 0.2471, "step": 17058 }, { "epoch": 2.7779180067581324, "grad_norm": 0.18941816687583923, "learning_rate": 8.315409357037285e-07, "loss": 0.2497, "step": 17059 }, { "epoch": 2.778080853315963, "grad_norm": 0.21539784967899323, "learning_rate": 8.303297941277682e-07, "loss": 0.2518, "step": 17060 }, { "epoch": 2.778243699873794, "grad_norm": 0.1875184327363968, "learning_rate": 8.291195203153856e-07, "loss": 0.2718, "step": 17061 }, { "epoch": 2.7784065464316248, "grad_norm": 0.1291627138853073, "learning_rate": 8.279101143100321e-07, "loss": 0.2842, "step": 17062 }, { "epoch": 2.7785693929894557, "grad_norm": 0.17870792746543884, "learning_rate": 8.267015761551228e-07, "loss": 0.2444, "step": 17063 }, { "epoch": 2.7787322395472867, "grad_norm": 0.14473415911197662, "learning_rate": 8.254939058940536e-07, "loss": 0.2423, "step": 17064 }, { "epoch": 2.7788950861051176, "grad_norm": 0.20014336705207825, "learning_rate": 8.242871035701816e-07, "loss": 0.261, "step": 17065 }, { "epoch": 2.7790579326629485, "grad_norm": 0.18980014324188232, "learning_rate": 8.230811692268358e-07, "loss": 0.2776, "step": 17066 }, { "epoch": 2.779220779220779, "grad_norm": 0.17094972729682922, "learning_rate": 8.218761029073124e-07, "loss": 0.2787, "step": 17067 }, { "epoch": 2.77938362577861, "grad_norm": 0.18111135065555573, "learning_rate": 8.206719046548739e-07, "loss": 0.2599, "step": 17068 }, { "epoch": 2.779546472336441, "grad_norm": 0.1936051845550537, "learning_rate": 8.194685745127578e-07, "loss": 0.2776, "step": 17069 }, { "epoch": 2.779709318894272, "grad_norm": 0.15303876996040344, "learning_rate": 8.182661125241686e-07, "loss": 0.2775, "step": 17070 }, { "epoch": 2.779872165452103, "grad_norm": 0.19843773543834686, "learning_rate": 8.170645187322745e-07, "loss": 0.2303, "step": 17071 }, { "epoch": 2.7800350120099337, "grad_norm": 0.15866073966026306, "learning_rate": 8.158637931802188e-07, "loss": 0.2574, "step": 17072 }, { "epoch": 2.7801978585677647, "grad_norm": 0.22036157548427582, "learning_rate": 8.146639359111114e-07, "loss": 0.2462, "step": 17073 }, { "epoch": 2.780360705125595, "grad_norm": 0.20246799290180206, "learning_rate": 8.134649469680316e-07, "loss": 0.2287, "step": 17074 }, { "epoch": 2.780523551683426, "grad_norm": 0.17085495591163635, "learning_rate": 8.122668263940203e-07, "loss": 0.2452, "step": 17075 }, { "epoch": 2.780686398241257, "grad_norm": 0.21249710023403168, "learning_rate": 8.110695742321011e-07, "loss": 0.292, "step": 17076 }, { "epoch": 2.780849244799088, "grad_norm": 0.17104654014110565, "learning_rate": 8.098731905252566e-07, "loss": 0.2709, "step": 17077 }, { "epoch": 2.781012091356919, "grad_norm": 0.1771952360868454, "learning_rate": 8.08677675316441e-07, "loss": 0.2418, "step": 17078 }, { "epoch": 2.78117493791475, "grad_norm": 0.1448463797569275, "learning_rate": 8.074830286485702e-07, "loss": 0.2613, "step": 17079 }, { "epoch": 2.781337784472581, "grad_norm": 0.1903747171163559, "learning_rate": 8.062892505645486e-07, "loss": 0.2615, "step": 17080 }, { "epoch": 2.7815006310304113, "grad_norm": 0.18880528211593628, "learning_rate": 8.050963411072255e-07, "loss": 0.2698, "step": 17081 }, { "epoch": 2.7816634775882427, "grad_norm": 0.16133685410022736, "learning_rate": 8.039043003194329e-07, "loss": 0.2406, "step": 17082 }, { "epoch": 2.7818263241460732, "grad_norm": 0.15781129896640778, "learning_rate": 8.027131282439704e-07, "loss": 0.3046, "step": 17083 }, { "epoch": 2.781989170703904, "grad_norm": 0.19958710670471191, "learning_rate": 8.015228249236062e-07, "loss": 0.25, "step": 17084 }, { "epoch": 2.782152017261735, "grad_norm": 0.16532281041145325, "learning_rate": 8.003333904010673e-07, "loss": 0.249, "step": 17085 }, { "epoch": 2.782314863819566, "grad_norm": 0.22109702229499817, "learning_rate": 7.991448247190697e-07, "loss": 0.3023, "step": 17086 }, { "epoch": 2.782477710377397, "grad_norm": 0.2131834328174591, "learning_rate": 7.979571279202791e-07, "loss": 0.287, "step": 17087 }, { "epoch": 2.782640556935228, "grad_norm": 0.19832676649093628, "learning_rate": 7.967703000473337e-07, "loss": 0.2381, "step": 17088 }, { "epoch": 2.782803403493059, "grad_norm": 0.18116173148155212, "learning_rate": 7.955843411428549e-07, "loss": 0.2459, "step": 17089 }, { "epoch": 2.7829662500508894, "grad_norm": 0.1981237828731537, "learning_rate": 7.943992512494141e-07, "loss": 0.2634, "step": 17090 }, { "epoch": 2.7831290966087203, "grad_norm": 0.17526526749134064, "learning_rate": 7.932150304095637e-07, "loss": 0.2775, "step": 17091 }, { "epoch": 2.7832919431665513, "grad_norm": 0.1910914182662964, "learning_rate": 7.920316786658194e-07, "loss": 0.2692, "step": 17092 }, { "epoch": 2.783454789724382, "grad_norm": 0.17273731529712677, "learning_rate": 7.90849196060664e-07, "loss": 0.2606, "step": 17093 }, { "epoch": 2.783617636282213, "grad_norm": 0.18462714552879333, "learning_rate": 7.896675826365579e-07, "loss": 0.3021, "step": 17094 }, { "epoch": 2.783780482840044, "grad_norm": 0.18245747685432434, "learning_rate": 7.884868384359228e-07, "loss": 0.2628, "step": 17095 }, { "epoch": 2.783943329397875, "grad_norm": 0.19529661536216736, "learning_rate": 7.873069635011471e-07, "loss": 0.2619, "step": 17096 }, { "epoch": 2.7841061759557055, "grad_norm": 0.21873389184474945, "learning_rate": 7.861279578745939e-07, "loss": 0.2665, "step": 17097 }, { "epoch": 2.7842690225135365, "grad_norm": 0.18694809079170227, "learning_rate": 7.849498215985962e-07, "loss": 0.2802, "step": 17098 }, { "epoch": 2.7844318690713674, "grad_norm": 0.16431035101413727, "learning_rate": 7.837725547154506e-07, "loss": 0.2672, "step": 17099 }, { "epoch": 2.7845947156291984, "grad_norm": 0.14408457279205322, "learning_rate": 7.825961572674234e-07, "loss": 0.2284, "step": 17100 }, { "epoch": 2.7847575621870293, "grad_norm": 0.20635336637496948, "learning_rate": 7.814206292967474e-07, "loss": 0.2901, "step": 17101 }, { "epoch": 2.7849204087448602, "grad_norm": 0.13228216767311096, "learning_rate": 7.802459708456361e-07, "loss": 0.285, "step": 17102 }, { "epoch": 2.785083255302691, "grad_norm": 0.17159965634346008, "learning_rate": 7.790721819562586e-07, "loss": 0.231, "step": 17103 }, { "epoch": 2.7852461018605217, "grad_norm": 0.15518708527088165, "learning_rate": 7.778992626707559e-07, "loss": 0.2552, "step": 17104 }, { "epoch": 2.785408948418353, "grad_norm": 0.2103845179080963, "learning_rate": 7.76727213031242e-07, "loss": 0.2279, "step": 17105 }, { "epoch": 2.7855717949761836, "grad_norm": 0.18468894064426422, "learning_rate": 7.755560330797968e-07, "loss": 0.2689, "step": 17106 }, { "epoch": 2.7857346415340145, "grad_norm": 0.1891648769378662, "learning_rate": 7.743857228584672e-07, "loss": 0.2835, "step": 17107 }, { "epoch": 2.7858974880918455, "grad_norm": 0.149269238114357, "learning_rate": 7.732162824092697e-07, "loss": 0.2295, "step": 17108 }, { "epoch": 2.7860603346496764, "grad_norm": 0.19389699399471283, "learning_rate": 7.720477117741959e-07, "loss": 0.2701, "step": 17109 }, { "epoch": 2.7862231812075073, "grad_norm": 0.16257411241531372, "learning_rate": 7.70880010995198e-07, "loss": 0.2619, "step": 17110 }, { "epoch": 2.786386027765338, "grad_norm": 0.18498322367668152, "learning_rate": 7.697131801141982e-07, "loss": 0.2772, "step": 17111 }, { "epoch": 2.7865488743231692, "grad_norm": 0.20324952900409698, "learning_rate": 7.685472191730936e-07, "loss": 0.2749, "step": 17112 }, { "epoch": 2.7867117208809997, "grad_norm": 0.18431608378887177, "learning_rate": 7.673821282137422e-07, "loss": 0.2684, "step": 17113 }, { "epoch": 2.7868745674388307, "grad_norm": 0.13629217445850372, "learning_rate": 7.662179072779774e-07, "loss": 0.2232, "step": 17114 }, { "epoch": 2.7870374139966616, "grad_norm": 0.16128937900066376, "learning_rate": 7.650545564075906e-07, "loss": 0.263, "step": 17115 }, { "epoch": 2.7872002605544925, "grad_norm": 0.2192179411649704, "learning_rate": 7.638920756443596e-07, "loss": 0.3041, "step": 17116 }, { "epoch": 2.7873631071123235, "grad_norm": 0.17378707230091095, "learning_rate": 7.627304650300177e-07, "loss": 0.2654, "step": 17117 }, { "epoch": 2.7875259536701544, "grad_norm": 0.16928108036518097, "learning_rate": 7.615697246062676e-07, "loss": 0.262, "step": 17118 }, { "epoch": 2.7876888002279854, "grad_norm": 0.19019928574562073, "learning_rate": 7.604098544147814e-07, "loss": 0.2945, "step": 17119 }, { "epoch": 2.787851646785816, "grad_norm": 0.16609713435173035, "learning_rate": 7.592508544972065e-07, "loss": 0.263, "step": 17120 }, { "epoch": 2.788014493343647, "grad_norm": 0.20324374735355377, "learning_rate": 7.580927248951541e-07, "loss": 0.2751, "step": 17121 }, { "epoch": 2.7881773399014778, "grad_norm": 0.21617838740348816, "learning_rate": 7.56935465650202e-07, "loss": 0.3051, "step": 17122 }, { "epoch": 2.7883401864593087, "grad_norm": 0.17837417125701904, "learning_rate": 7.55779076803903e-07, "loss": 0.2267, "step": 17123 }, { "epoch": 2.7885030330171396, "grad_norm": 0.18741397559642792, "learning_rate": 7.54623558397774e-07, "loss": 0.3007, "step": 17124 }, { "epoch": 2.7886658795749706, "grad_norm": 0.16875335574150085, "learning_rate": 7.534689104732956e-07, "loss": 0.2641, "step": 17125 }, { "epoch": 2.7888287261328015, "grad_norm": 0.14920799434185028, "learning_rate": 7.523151330719319e-07, "loss": 0.2922, "step": 17126 }, { "epoch": 2.788991572690632, "grad_norm": 0.16996854543685913, "learning_rate": 7.511622262351026e-07, "loss": 0.261, "step": 17127 }, { "epoch": 2.7891544192484634, "grad_norm": 0.1650959849357605, "learning_rate": 7.500101900041967e-07, "loss": 0.2837, "step": 17128 }, { "epoch": 2.789317265806294, "grad_norm": 0.20728135108947754, "learning_rate": 7.48859024420584e-07, "loss": 0.2576, "step": 17129 }, { "epoch": 2.789480112364125, "grad_norm": 0.20945370197296143, "learning_rate": 7.477087295255869e-07, "loss": 0.2885, "step": 17130 }, { "epoch": 2.789642958921956, "grad_norm": 0.1412220001220703, "learning_rate": 7.465593053605114e-07, "loss": 0.2481, "step": 17131 }, { "epoch": 2.7898058054797867, "grad_norm": 0.1796531230211258, "learning_rate": 7.454107519666243e-07, "loss": 0.2939, "step": 17132 }, { "epoch": 2.7899686520376177, "grad_norm": 0.41412320733070374, "learning_rate": 7.442630693851538e-07, "loss": 0.2894, "step": 17133 }, { "epoch": 2.790131498595448, "grad_norm": 0.22006995975971222, "learning_rate": 7.43116257657317e-07, "loss": 0.2676, "step": 17134 }, { "epoch": 2.7902943451532796, "grad_norm": 0.16586488485336304, "learning_rate": 7.419703168242808e-07, "loss": 0.241, "step": 17135 }, { "epoch": 2.79045719171111, "grad_norm": 0.16485579311847687, "learning_rate": 7.408252469271875e-07, "loss": 0.2408, "step": 17136 }, { "epoch": 2.790620038268941, "grad_norm": 0.19957977533340454, "learning_rate": 7.396810480071487e-07, "loss": 0.2335, "step": 17137 }, { "epoch": 2.790782884826772, "grad_norm": 0.1658906191587448, "learning_rate": 7.385377201052507e-07, "loss": 0.2649, "step": 17138 }, { "epoch": 2.790945731384603, "grad_norm": 0.1344386637210846, "learning_rate": 7.373952632625358e-07, "loss": 0.2194, "step": 17139 }, { "epoch": 2.791108577942434, "grad_norm": 0.16407248377799988, "learning_rate": 7.362536775200241e-07, "loss": 0.2615, "step": 17140 }, { "epoch": 2.7912714245002648, "grad_norm": 0.17444369196891785, "learning_rate": 7.351129629186992e-07, "loss": 0.2698, "step": 17141 }, { "epoch": 2.7914342710580957, "grad_norm": 0.20345532894134521, "learning_rate": 7.339731194995203e-07, "loss": 0.2594, "step": 17142 }, { "epoch": 2.791597117615926, "grad_norm": 0.2002142369747162, "learning_rate": 7.3283414730341e-07, "loss": 0.2689, "step": 17143 }, { "epoch": 2.791759964173757, "grad_norm": 0.14460444450378418, "learning_rate": 7.31696046371258e-07, "loss": 0.2572, "step": 17144 }, { "epoch": 2.791922810731588, "grad_norm": 0.19196100533008575, "learning_rate": 7.305588167439286e-07, "loss": 0.2533, "step": 17145 }, { "epoch": 2.792085657289419, "grad_norm": 0.17868800461292267, "learning_rate": 7.294224584622533e-07, "loss": 0.2601, "step": 17146 }, { "epoch": 2.79224850384725, "grad_norm": 0.16813845932483673, "learning_rate": 7.282869715670271e-07, "loss": 0.2554, "step": 17147 }, { "epoch": 2.792411350405081, "grad_norm": 0.1810643970966339, "learning_rate": 7.271523560990145e-07, "loss": 0.2641, "step": 17148 }, { "epoch": 2.792574196962912, "grad_norm": 0.20348162949085236, "learning_rate": 7.26018612098961e-07, "loss": 0.2726, "step": 17149 }, { "epoch": 2.7927370435207424, "grad_norm": 0.21543926000595093, "learning_rate": 7.248857396075642e-07, "loss": 0.2456, "step": 17150 }, { "epoch": 2.7928998900785733, "grad_norm": 0.17893017828464508, "learning_rate": 7.237537386654974e-07, "loss": 0.2821, "step": 17151 }, { "epoch": 2.7930627366364043, "grad_norm": 0.17652596533298492, "learning_rate": 7.226226093134087e-07, "loss": 0.2529, "step": 17152 }, { "epoch": 2.793225583194235, "grad_norm": 0.18248498439788818, "learning_rate": 7.214923515919042e-07, "loss": 0.2996, "step": 17153 }, { "epoch": 2.793388429752066, "grad_norm": 0.19963912665843964, "learning_rate": 7.203629655415628e-07, "loss": 0.2549, "step": 17154 }, { "epoch": 2.793551276309897, "grad_norm": 0.17307262122631073, "learning_rate": 7.192344512029353e-07, "loss": 0.288, "step": 17155 }, { "epoch": 2.793714122867728, "grad_norm": 0.1967819631099701, "learning_rate": 7.181068086165394e-07, "loss": 0.277, "step": 17156 }, { "epoch": 2.7938769694255585, "grad_norm": 0.17160218954086304, "learning_rate": 7.169800378228591e-07, "loss": 0.2347, "step": 17157 }, { "epoch": 2.79403981598339, "grad_norm": 0.16284239292144775, "learning_rate": 7.158541388623513e-07, "loss": 0.2452, "step": 17158 }, { "epoch": 2.7942026625412204, "grad_norm": 0.20237073302268982, "learning_rate": 7.147291117754307e-07, "loss": 0.2562, "step": 17159 }, { "epoch": 2.7943655090990513, "grad_norm": 0.1661868691444397, "learning_rate": 7.13604956602501e-07, "loss": 0.2675, "step": 17160 }, { "epoch": 2.7945283556568823, "grad_norm": 0.17328481376171112, "learning_rate": 7.124816733839163e-07, "loss": 0.2577, "step": 17161 }, { "epoch": 2.7946912022147132, "grad_norm": 0.20122647285461426, "learning_rate": 7.113592621600052e-07, "loss": 0.2745, "step": 17162 }, { "epoch": 2.794854048772544, "grad_norm": 0.15029393136501312, "learning_rate": 7.102377229710716e-07, "loss": 0.2412, "step": 17163 }, { "epoch": 2.795016895330375, "grad_norm": 0.1689123511314392, "learning_rate": 7.091170558573751e-07, "loss": 0.302, "step": 17164 }, { "epoch": 2.795179741888206, "grad_norm": 0.15618948638439178, "learning_rate": 7.079972608591501e-07, "loss": 0.2494, "step": 17165 }, { "epoch": 2.7953425884460366, "grad_norm": 0.14346101880073547, "learning_rate": 7.068783380166089e-07, "loss": 0.2653, "step": 17166 }, { "epoch": 2.7955054350038675, "grad_norm": 0.18847697973251343, "learning_rate": 7.057602873699165e-07, "loss": 0.2348, "step": 17167 }, { "epoch": 2.7956682815616984, "grad_norm": 0.1624954640865326, "learning_rate": 7.046431089592215e-07, "loss": 0.2731, "step": 17168 }, { "epoch": 2.7958311281195294, "grad_norm": 0.14011774957180023, "learning_rate": 7.035268028246278e-07, "loss": 0.2817, "step": 17169 }, { "epoch": 2.7959939746773603, "grad_norm": 0.16003070771694183, "learning_rate": 7.024113690062146e-07, "loss": 0.2829, "step": 17170 }, { "epoch": 2.7961568212351913, "grad_norm": 0.2623598277568817, "learning_rate": 7.012968075440301e-07, "loss": 0.2587, "step": 17171 }, { "epoch": 2.796319667793022, "grad_norm": 0.17223389446735382, "learning_rate": 7.001831184780955e-07, "loss": 0.2658, "step": 17172 }, { "epoch": 2.7964825143508527, "grad_norm": 0.16558164358139038, "learning_rate": 6.990703018483869e-07, "loss": 0.2787, "step": 17173 }, { "epoch": 2.7966453609086837, "grad_norm": 0.19471584260463715, "learning_rate": 6.97958357694864e-07, "loss": 0.2658, "step": 17174 }, { "epoch": 2.7968082074665146, "grad_norm": 0.20016902685165405, "learning_rate": 6.968472860574449e-07, "loss": 0.2858, "step": 17175 }, { "epoch": 2.7969710540243455, "grad_norm": 0.22310416400432587, "learning_rate": 6.957370869760255e-07, "loss": 0.2619, "step": 17176 }, { "epoch": 2.7971339005821765, "grad_norm": 0.17375312745571136, "learning_rate": 6.946277604904599e-07, "loss": 0.2742, "step": 17177 }, { "epoch": 2.7972967471400074, "grad_norm": 0.16187219321727753, "learning_rate": 6.935193066405776e-07, "loss": 0.2486, "step": 17178 }, { "epoch": 2.7974595936978384, "grad_norm": 0.19034047424793243, "learning_rate": 6.924117254661799e-07, "loss": 0.2439, "step": 17179 }, { "epoch": 2.797622440255669, "grad_norm": 0.15778818726539612, "learning_rate": 6.913050170070268e-07, "loss": 0.3001, "step": 17180 }, { "epoch": 2.7977852868135002, "grad_norm": 0.17129069566726685, "learning_rate": 6.90199181302853e-07, "loss": 0.2368, "step": 17181 }, { "epoch": 2.7979481333713307, "grad_norm": 0.15181121230125427, "learning_rate": 6.890942183933658e-07, "loss": 0.2539, "step": 17182 }, { "epoch": 2.7981109799291617, "grad_norm": 0.17717903852462769, "learning_rate": 6.879901283182305e-07, "loss": 0.2857, "step": 17183 }, { "epoch": 2.7982738264869926, "grad_norm": 0.1700747013092041, "learning_rate": 6.868869111170905e-07, "loss": 0.2209, "step": 17184 }, { "epoch": 2.7984366730448236, "grad_norm": 0.16315889358520508, "learning_rate": 6.857845668295559e-07, "loss": 0.2527, "step": 17185 }, { "epoch": 2.7985995196026545, "grad_norm": 0.14936913549900055, "learning_rate": 6.846830954952033e-07, "loss": 0.2797, "step": 17186 }, { "epoch": 2.798762366160485, "grad_norm": 0.217774897813797, "learning_rate": 6.835824971535787e-07, "loss": 0.274, "step": 17187 }, { "epoch": 2.7989252127183164, "grad_norm": 0.1916843205690384, "learning_rate": 6.824827718441923e-07, "loss": 0.2605, "step": 17188 }, { "epoch": 2.799088059276147, "grad_norm": 0.1635189801454544, "learning_rate": 6.813839196065347e-07, "loss": 0.2583, "step": 17189 }, { "epoch": 2.799250905833978, "grad_norm": 0.22970406711101532, "learning_rate": 6.802859404800576e-07, "loss": 0.292, "step": 17190 }, { "epoch": 2.799413752391809, "grad_norm": 0.18461798131465912, "learning_rate": 6.791888345041742e-07, "loss": 0.2832, "step": 17191 }, { "epoch": 2.7995765989496397, "grad_norm": 0.1547493040561676, "learning_rate": 6.780926017182804e-07, "loss": 0.2549, "step": 17192 }, { "epoch": 2.7997394455074707, "grad_norm": 0.172359436750412, "learning_rate": 6.769972421617337e-07, "loss": 0.2677, "step": 17193 }, { "epoch": 2.7999022920653016, "grad_norm": 0.21147270500659943, "learning_rate": 6.759027558738585e-07, "loss": 0.2509, "step": 17194 }, { "epoch": 2.8000651386231326, "grad_norm": 0.20980122685432434, "learning_rate": 6.748091428939479e-07, "loss": 0.2328, "step": 17195 }, { "epoch": 2.800227985180963, "grad_norm": 0.20150882005691528, "learning_rate": 6.737164032612736e-07, "loss": 0.2892, "step": 17196 }, { "epoch": 2.800390831738794, "grad_norm": 0.2124544382095337, "learning_rate": 6.726245370150652e-07, "loss": 0.294, "step": 17197 }, { "epoch": 2.800553678296625, "grad_norm": 0.15491439402103424, "learning_rate": 6.715335441945219e-07, "loss": 0.2753, "step": 17198 }, { "epoch": 2.800716524854456, "grad_norm": 0.14593924582004547, "learning_rate": 6.704434248388097e-07, "loss": 0.2693, "step": 17199 }, { "epoch": 2.800879371412287, "grad_norm": 0.23240871727466583, "learning_rate": 6.693541789870778e-07, "loss": 0.2238, "step": 17200 }, { "epoch": 2.8010422179701178, "grad_norm": 0.1962585300207138, "learning_rate": 6.682658066784253e-07, "loss": 0.25, "step": 17201 }, { "epoch": 2.8012050645279487, "grad_norm": 0.2159583866596222, "learning_rate": 6.671783079519267e-07, "loss": 0.2472, "step": 17202 }, { "epoch": 2.801367911085779, "grad_norm": 0.18171627819538116, "learning_rate": 6.660916828466341e-07, "loss": 0.2659, "step": 17203 }, { "epoch": 2.80153075764361, "grad_norm": 0.16340234875679016, "learning_rate": 6.650059314015522e-07, "loss": 0.2857, "step": 17204 }, { "epoch": 2.801693604201441, "grad_norm": 0.22845619916915894, "learning_rate": 6.639210536556723e-07, "loss": 0.2632, "step": 17205 }, { "epoch": 2.801856450759272, "grad_norm": 0.15585510432720184, "learning_rate": 6.62837049647938e-07, "loss": 0.2622, "step": 17206 }, { "epoch": 2.802019297317103, "grad_norm": 0.19756175577640533, "learning_rate": 6.617539194172656e-07, "loss": 0.2393, "step": 17207 }, { "epoch": 2.802182143874934, "grad_norm": 0.17288242280483246, "learning_rate": 6.606716630025517e-07, "loss": 0.2477, "step": 17208 }, { "epoch": 2.802344990432765, "grad_norm": 0.1980534791946411, "learning_rate": 6.595902804426457e-07, "loss": 0.2546, "step": 17209 }, { "epoch": 2.8025078369905954, "grad_norm": 0.1778448075056076, "learning_rate": 6.585097717763722e-07, "loss": 0.2652, "step": 17210 }, { "epoch": 2.8026706835484267, "grad_norm": 0.22523698210716248, "learning_rate": 6.574301370425307e-07, "loss": 0.2968, "step": 17211 }, { "epoch": 2.8028335301062572, "grad_norm": 0.1569608449935913, "learning_rate": 6.563513762798789e-07, "loss": 0.253, "step": 17212 }, { "epoch": 2.802996376664088, "grad_norm": 0.16041870415210724, "learning_rate": 6.552734895271473e-07, "loss": 0.2453, "step": 17213 }, { "epoch": 2.803159223221919, "grad_norm": 0.18986451625823975, "learning_rate": 6.541964768230352e-07, "loss": 0.2714, "step": 17214 }, { "epoch": 2.80332206977975, "grad_norm": 0.20685938000679016, "learning_rate": 6.531203382062145e-07, "loss": 0.276, "step": 17215 }, { "epoch": 2.803484916337581, "grad_norm": 0.19427460432052612, "learning_rate": 6.520450737153184e-07, "loss": 0.2548, "step": 17216 }, { "epoch": 2.803647762895412, "grad_norm": 0.17342424392700195, "learning_rate": 6.509706833889489e-07, "loss": 0.2698, "step": 17217 }, { "epoch": 2.803810609453243, "grad_norm": 0.1646186262369156, "learning_rate": 6.498971672656867e-07, "loss": 0.2312, "step": 17218 }, { "epoch": 2.8039734560110734, "grad_norm": 0.16444824635982513, "learning_rate": 6.488245253840702e-07, "loss": 0.2688, "step": 17219 }, { "epoch": 2.8041363025689043, "grad_norm": 0.21860730648040771, "learning_rate": 6.477527577826131e-07, "loss": 0.2828, "step": 17220 }, { "epoch": 2.8042991491267353, "grad_norm": 0.16556310653686523, "learning_rate": 6.466818644997902e-07, "loss": 0.222, "step": 17221 }, { "epoch": 2.8044619956845662, "grad_norm": 0.15886013209819794, "learning_rate": 6.456118455740539e-07, "loss": 0.266, "step": 17222 }, { "epoch": 2.804624842242397, "grad_norm": 0.21346639096736908, "learning_rate": 6.44542701043821e-07, "loss": 0.2357, "step": 17223 }, { "epoch": 2.804787688800228, "grad_norm": 0.15601152181625366, "learning_rate": 6.434744309474744e-07, "loss": 0.2687, "step": 17224 }, { "epoch": 2.804950535358059, "grad_norm": 0.17662663757801056, "learning_rate": 6.424070353233724e-07, "loss": 0.2846, "step": 17225 }, { "epoch": 2.8051133819158895, "grad_norm": 0.17224453389644623, "learning_rate": 6.413405142098344e-07, "loss": 0.2762, "step": 17226 }, { "epoch": 2.8052762284737205, "grad_norm": 0.21563668549060822, "learning_rate": 6.40274867645152e-07, "loss": 0.2433, "step": 17227 }, { "epoch": 2.8054390750315514, "grad_norm": 0.21891328692436218, "learning_rate": 6.392100956675833e-07, "loss": 0.2505, "step": 17228 }, { "epoch": 2.8056019215893824, "grad_norm": 0.1650383174419403, "learning_rate": 6.381461983153591e-07, "loss": 0.3097, "step": 17229 }, { "epoch": 2.8057647681472133, "grad_norm": 0.17864440381526947, "learning_rate": 6.370831756266793e-07, "loss": 0.236, "step": 17230 }, { "epoch": 2.8059276147050443, "grad_norm": 0.2100190371274948, "learning_rate": 6.360210276397077e-07, "loss": 0.239, "step": 17231 }, { "epoch": 2.806090461262875, "grad_norm": 0.20475295186042786, "learning_rate": 6.349597543925723e-07, "loss": 0.3203, "step": 17232 }, { "epoch": 2.8062533078207057, "grad_norm": 0.22840555012226105, "learning_rate": 6.338993559233869e-07, "loss": 0.2653, "step": 17233 }, { "epoch": 2.806416154378537, "grad_norm": 0.1657525599002838, "learning_rate": 6.328398322702155e-07, "loss": 0.3024, "step": 17234 }, { "epoch": 2.8065790009363676, "grad_norm": 0.1638934165239334, "learning_rate": 6.317811834710973e-07, "loss": 0.271, "step": 17235 }, { "epoch": 2.8067418474941985, "grad_norm": 0.22817450761795044, "learning_rate": 6.307234095640463e-07, "loss": 0.2989, "step": 17236 }, { "epoch": 2.8069046940520295, "grad_norm": 0.2521979808807373, "learning_rate": 6.296665105870375e-07, "loss": 0.3169, "step": 17237 }, { "epoch": 2.8070675406098604, "grad_norm": 0.16902019083499908, "learning_rate": 6.286104865780185e-07, "loss": 0.2547, "step": 17238 }, { "epoch": 2.8072303871676914, "grad_norm": 0.16413657367229462, "learning_rate": 6.275553375748977e-07, "loss": 0.2848, "step": 17239 }, { "epoch": 2.807393233725522, "grad_norm": 0.17926326394081116, "learning_rate": 6.265010636155616e-07, "loss": 0.2385, "step": 17240 }, { "epoch": 2.8075560802833532, "grad_norm": 0.16707727313041687, "learning_rate": 6.254476647378632e-07, "loss": 0.2915, "step": 17241 }, { "epoch": 2.8077189268411837, "grad_norm": 0.2086363434791565, "learning_rate": 6.24395140979625e-07, "loss": 0.2928, "step": 17242 }, { "epoch": 2.8078817733990147, "grad_norm": 0.21971334517002106, "learning_rate": 6.233434923786308e-07, "loss": 0.2867, "step": 17243 }, { "epoch": 2.8080446199568456, "grad_norm": 0.16618692874908447, "learning_rate": 6.222927189726363e-07, "loss": 0.2378, "step": 17244 }, { "epoch": 2.8082074665146766, "grad_norm": 0.18188683688640594, "learning_rate": 6.212428207993726e-07, "loss": 0.2561, "step": 17245 }, { "epoch": 2.8083703130725075, "grad_norm": 0.2100667953491211, "learning_rate": 6.201937978965316e-07, "loss": 0.2891, "step": 17246 }, { "epoch": 2.8085331596303384, "grad_norm": 0.19188064336776733, "learning_rate": 6.191456503017806e-07, "loss": 0.2921, "step": 17247 }, { "epoch": 2.8086960061881694, "grad_norm": 0.164246067404747, "learning_rate": 6.180983780527477e-07, "loss": 0.24, "step": 17248 }, { "epoch": 2.808858852746, "grad_norm": 0.17417116463184357, "learning_rate": 6.170519811870307e-07, "loss": 0.2565, "step": 17249 }, { "epoch": 2.809021699303831, "grad_norm": 0.2220769077539444, "learning_rate": 6.160064597421994e-07, "loss": 0.2815, "step": 17250 }, { "epoch": 2.8091845458616618, "grad_norm": 0.1668715476989746, "learning_rate": 6.149618137557961e-07, "loss": 0.3017, "step": 17251 }, { "epoch": 2.8093473924194927, "grad_norm": 0.19768017530441284, "learning_rate": 6.139180432653213e-07, "loss": 0.2702, "step": 17252 }, { "epoch": 2.8095102389773237, "grad_norm": 0.1926693469285965, "learning_rate": 6.128751483082507e-07, "loss": 0.2539, "step": 17253 }, { "epoch": 2.8096730855351546, "grad_norm": 0.16678370535373688, "learning_rate": 6.118331289220291e-07, "loss": 0.2767, "step": 17254 }, { "epoch": 2.8098359320929855, "grad_norm": 0.22496068477630615, "learning_rate": 6.107919851440685e-07, "loss": 0.2501, "step": 17255 }, { "epoch": 2.809998778650816, "grad_norm": 0.19073902070522308, "learning_rate": 6.097517170117472e-07, "loss": 0.2956, "step": 17256 }, { "epoch": 2.8101616252086474, "grad_norm": 0.19489429891109467, "learning_rate": 6.087123245624104e-07, "loss": 0.2749, "step": 17257 }, { "epoch": 2.810324471766478, "grad_norm": 0.1749172955751419, "learning_rate": 6.076738078333838e-07, "loss": 0.2511, "step": 17258 }, { "epoch": 2.810487318324309, "grad_norm": 0.219797745347023, "learning_rate": 6.066361668619486e-07, "loss": 0.3054, "step": 17259 }, { "epoch": 2.81065016488214, "grad_norm": 0.21680279076099396, "learning_rate": 6.055994016853583e-07, "loss": 0.2816, "step": 17260 }, { "epoch": 2.8108130114399708, "grad_norm": 0.15903285145759583, "learning_rate": 6.045635123408361e-07, "loss": 0.2621, "step": 17261 }, { "epoch": 2.8109758579978017, "grad_norm": 0.1947595477104187, "learning_rate": 6.035284988655771e-07, "loss": 0.269, "step": 17262 }, { "epoch": 2.811138704555632, "grad_norm": 0.16734421253204346, "learning_rate": 6.024943612967376e-07, "loss": 0.2486, "step": 17263 }, { "epoch": 2.8113015511134636, "grad_norm": 0.1705225557088852, "learning_rate": 6.014610996714437e-07, "loss": 0.2739, "step": 17264 }, { "epoch": 2.811464397671294, "grad_norm": 0.1753927320241928, "learning_rate": 6.004287140268017e-07, "loss": 0.2566, "step": 17265 }, { "epoch": 2.811627244229125, "grad_norm": 0.15736043453216553, "learning_rate": 5.993972043998708e-07, "loss": 0.2467, "step": 17266 }, { "epoch": 2.811790090786956, "grad_norm": 0.19349728524684906, "learning_rate": 5.983665708276854e-07, "loss": 0.2852, "step": 17267 }, { "epoch": 2.811952937344787, "grad_norm": 0.15276429057121277, "learning_rate": 5.973368133472462e-07, "loss": 0.2431, "step": 17268 }, { "epoch": 2.812115783902618, "grad_norm": 0.17945408821105957, "learning_rate": 5.963079319955322e-07, "loss": 0.3019, "step": 17269 }, { "epoch": 2.812278630460449, "grad_norm": 0.19368192553520203, "learning_rate": 5.952799268094778e-07, "loss": 0.2851, "step": 17270 }, { "epoch": 2.8124414770182797, "grad_norm": 0.1794026792049408, "learning_rate": 5.942527978259948e-07, "loss": 0.2694, "step": 17271 }, { "epoch": 2.8126043235761102, "grad_norm": 0.1989540159702301, "learning_rate": 5.932265450819513e-07, "loss": 0.3029, "step": 17272 }, { "epoch": 2.812767170133941, "grad_norm": 0.1332867592573166, "learning_rate": 5.922011686142065e-07, "loss": 0.2663, "step": 17273 }, { "epoch": 2.812930016691772, "grad_norm": 0.20069490373134613, "learning_rate": 5.911766684595643e-07, "loss": 0.2362, "step": 17274 }, { "epoch": 2.813092863249603, "grad_norm": 0.1880926489830017, "learning_rate": 5.901530446548092e-07, "loss": 0.2658, "step": 17275 }, { "epoch": 2.813255709807434, "grad_norm": 0.1835739016532898, "learning_rate": 5.891302972366952e-07, "loss": 0.2432, "step": 17276 }, { "epoch": 2.813418556365265, "grad_norm": 0.17658807337284088, "learning_rate": 5.8810842624194e-07, "loss": 0.291, "step": 17277 }, { "epoch": 2.813581402923096, "grad_norm": 0.1614377200603485, "learning_rate": 5.870874317072339e-07, "loss": 0.2499, "step": 17278 }, { "epoch": 2.8137442494809264, "grad_norm": 0.15891824662685394, "learning_rate": 5.860673136692308e-07, "loss": 0.2908, "step": 17279 }, { "epoch": 2.8139070960387573, "grad_norm": 0.1946312040090561, "learning_rate": 5.850480721645568e-07, "loss": 0.3003, "step": 17280 }, { "epoch": 2.8140699425965883, "grad_norm": 0.17766080796718597, "learning_rate": 5.840297072298051e-07, "loss": 0.25, "step": 17281 }, { "epoch": 2.814232789154419, "grad_norm": 0.17318537831306458, "learning_rate": 5.830122189015408e-07, "loss": 0.2669, "step": 17282 }, { "epoch": 2.81439563571225, "grad_norm": 0.1817905157804489, "learning_rate": 5.81995607216293e-07, "loss": 0.2377, "step": 17283 }, { "epoch": 2.814558482270081, "grad_norm": 0.18429048359394073, "learning_rate": 5.809798722105603e-07, "loss": 0.2611, "step": 17284 }, { "epoch": 2.814721328827912, "grad_norm": 0.17416156828403473, "learning_rate": 5.799650139208135e-07, "loss": 0.2344, "step": 17285 }, { "epoch": 2.8148841753857425, "grad_norm": 0.16780132055282593, "learning_rate": 5.789510323834845e-07, "loss": 0.2625, "step": 17286 }, { "epoch": 2.815047021943574, "grad_norm": 0.2149088978767395, "learning_rate": 5.779379276349834e-07, "loss": 0.2847, "step": 17287 }, { "epoch": 2.8152098685014044, "grad_norm": 0.18887700140476227, "learning_rate": 5.769256997116779e-07, "loss": 0.2666, "step": 17288 }, { "epoch": 2.8153727150592354, "grad_norm": 0.16395504772663116, "learning_rate": 5.759143486499169e-07, "loss": 0.2199, "step": 17289 }, { "epoch": 2.8155355616170663, "grad_norm": 0.20611844956874847, "learning_rate": 5.749038744860047e-07, "loss": 0.2696, "step": 17290 }, { "epoch": 2.8156984081748972, "grad_norm": 0.20357593894004822, "learning_rate": 5.738942772562234e-07, "loss": 0.2739, "step": 17291 }, { "epoch": 2.815861254732728, "grad_norm": 0.18093308806419373, "learning_rate": 5.728855569968217e-07, "loss": 0.2869, "step": 17292 }, { "epoch": 2.816024101290559, "grad_norm": 0.18422412872314453, "learning_rate": 5.718777137440151e-07, "loss": 0.2379, "step": 17293 }, { "epoch": 2.81618694784839, "grad_norm": 0.18489684164524078, "learning_rate": 5.708707475339831e-07, "loss": 0.252, "step": 17294 }, { "epoch": 2.8163497944062206, "grad_norm": 0.1959390938282013, "learning_rate": 5.698646584028855e-07, "loss": 0.3284, "step": 17295 }, { "epoch": 2.8165126409640515, "grad_norm": 0.20337188243865967, "learning_rate": 5.688594463868407e-07, "loss": 0.2583, "step": 17296 }, { "epoch": 2.8166754875218825, "grad_norm": 0.18963825702667236, "learning_rate": 5.678551115219394e-07, "loss": 0.2399, "step": 17297 }, { "epoch": 2.8168383340797134, "grad_norm": 0.1865091472864151, "learning_rate": 5.668516538442387e-07, "loss": 0.2641, "step": 17298 }, { "epoch": 2.8170011806375443, "grad_norm": 0.20120036602020264, "learning_rate": 5.658490733897709e-07, "loss": 0.2607, "step": 17299 }, { "epoch": 2.8171640271953753, "grad_norm": 0.16861556470394135, "learning_rate": 5.648473701945267e-07, "loss": 0.2558, "step": 17300 }, { "epoch": 2.8173268737532062, "grad_norm": 0.1547107696533203, "learning_rate": 5.638465442944691e-07, "loss": 0.2824, "step": 17301 }, { "epoch": 2.8174897203110367, "grad_norm": 0.18286409974098206, "learning_rate": 5.628465957255358e-07, "loss": 0.2621, "step": 17302 }, { "epoch": 2.8176525668688677, "grad_norm": 0.1823817640542984, "learning_rate": 5.61847524523626e-07, "loss": 0.2391, "step": 17303 }, { "epoch": 2.8178154134266986, "grad_norm": 0.27809804677963257, "learning_rate": 5.608493307246055e-07, "loss": 0.2924, "step": 17304 }, { "epoch": 2.8179782599845296, "grad_norm": 0.16675810515880585, "learning_rate": 5.598520143643177e-07, "loss": 0.2486, "step": 17305 }, { "epoch": 2.8181411065423605, "grad_norm": 0.17658454179763794, "learning_rate": 5.588555754785673e-07, "loss": 0.2773, "step": 17306 }, { "epoch": 2.8183039531001914, "grad_norm": 0.21589140594005585, "learning_rate": 5.578600141031287e-07, "loss": 0.2604, "step": 17307 }, { "epoch": 2.8184667996580224, "grad_norm": 0.19296857714653015, "learning_rate": 5.568653302737453e-07, "loss": 0.2798, "step": 17308 }, { "epoch": 2.818629646215853, "grad_norm": 0.22605182230472565, "learning_rate": 5.558715240261331e-07, "loss": 0.2732, "step": 17309 }, { "epoch": 2.8187924927736843, "grad_norm": 0.17516930401325226, "learning_rate": 5.54878595395969e-07, "loss": 0.2687, "step": 17310 }, { "epoch": 2.8189553393315148, "grad_norm": 0.21590730547904968, "learning_rate": 5.538865444189023e-07, "loss": 0.2654, "step": 17311 }, { "epoch": 2.8191181858893457, "grad_norm": 0.17013762891292572, "learning_rate": 5.52895371130549e-07, "loss": 0.2784, "step": 17312 }, { "epoch": 2.8192810324471766, "grad_norm": 0.21232075989246368, "learning_rate": 5.519050755665e-07, "loss": 0.274, "step": 17313 }, { "epoch": 2.8194438790050076, "grad_norm": 0.17146489024162292, "learning_rate": 5.509156577623076e-07, "loss": 0.2624, "step": 17314 }, { "epoch": 2.8196067255628385, "grad_norm": 0.2545278072357178, "learning_rate": 5.499271177534904e-07, "loss": 0.258, "step": 17315 }, { "epoch": 2.819769572120669, "grad_norm": 0.19064828753471375, "learning_rate": 5.489394555755478e-07, "loss": 0.2868, "step": 17316 }, { "epoch": 2.8199324186785004, "grad_norm": 0.19088514149188995, "learning_rate": 5.479526712639349e-07, "loss": 0.2418, "step": 17317 }, { "epoch": 2.820095265236331, "grad_norm": 0.1952372044324875, "learning_rate": 5.469667648540816e-07, "loss": 0.2352, "step": 17318 }, { "epoch": 2.820258111794162, "grad_norm": 0.17956383526325226, "learning_rate": 5.459817363813846e-07, "loss": 0.2349, "step": 17319 }, { "epoch": 2.820420958351993, "grad_norm": 0.1883140206336975, "learning_rate": 5.449975858812101e-07, "loss": 0.2613, "step": 17320 }, { "epoch": 2.8205838049098237, "grad_norm": 0.15524153411388397, "learning_rate": 5.440143133888881e-07, "loss": 0.2848, "step": 17321 }, { "epoch": 2.8207466514676547, "grad_norm": 0.1715157926082611, "learning_rate": 5.430319189397293e-07, "loss": 0.2839, "step": 17322 }, { "epoch": 2.8209094980254856, "grad_norm": 0.20829592645168304, "learning_rate": 5.420504025689943e-07, "loss": 0.2638, "step": 17323 }, { "epoch": 2.8210723445833166, "grad_norm": 0.18040437996387482, "learning_rate": 5.410697643119328e-07, "loss": 0.2445, "step": 17324 }, { "epoch": 2.821235191141147, "grad_norm": 0.157992884516716, "learning_rate": 5.400900042037471e-07, "loss": 0.2879, "step": 17325 }, { "epoch": 2.821398037698978, "grad_norm": 0.167124405503273, "learning_rate": 5.391111222796091e-07, "loss": 0.2505, "step": 17326 }, { "epoch": 2.821560884256809, "grad_norm": 0.1970318853855133, "learning_rate": 5.381331185746741e-07, "loss": 0.265, "step": 17327 }, { "epoch": 2.82172373081464, "grad_norm": 0.1388709545135498, "learning_rate": 5.371559931240499e-07, "loss": 0.2761, "step": 17328 }, { "epoch": 2.821886577372471, "grad_norm": 0.19789087772369385, "learning_rate": 5.36179745962817e-07, "loss": 0.2534, "step": 17329 }, { "epoch": 2.822049423930302, "grad_norm": 0.1578737199306488, "learning_rate": 5.352043771260224e-07, "loss": 0.2111, "step": 17330 }, { "epoch": 2.8222122704881327, "grad_norm": 0.17441311478614807, "learning_rate": 5.342298866486933e-07, "loss": 0.2476, "step": 17331 }, { "epoch": 2.822375117045963, "grad_norm": 0.2863922417163849, "learning_rate": 5.332562745658132e-07, "loss": 0.2621, "step": 17332 }, { "epoch": 2.822537963603794, "grad_norm": 0.18567733466625214, "learning_rate": 5.322835409123373e-07, "loss": 0.2625, "step": 17333 }, { "epoch": 2.822700810161625, "grad_norm": 0.16473491489887238, "learning_rate": 5.313116857231876e-07, "loss": 0.2333, "step": 17334 }, { "epoch": 2.822863656719456, "grad_norm": 0.1675369143486023, "learning_rate": 5.303407090332585e-07, "loss": 0.2506, "step": 17335 }, { "epoch": 2.823026503277287, "grad_norm": 0.16757944226264954, "learning_rate": 5.293706108774138e-07, "loss": 0.2829, "step": 17336 }, { "epoch": 2.823189349835118, "grad_norm": 0.2184773087501526, "learning_rate": 5.284013912904756e-07, "loss": 0.2852, "step": 17337 }, { "epoch": 2.823352196392949, "grad_norm": 0.17670750617980957, "learning_rate": 5.274330503072494e-07, "loss": 0.2773, "step": 17338 }, { "epoch": 2.8235150429507794, "grad_norm": 0.18628863990306854, "learning_rate": 5.264655879624992e-07, "loss": 0.2603, "step": 17339 }, { "epoch": 2.8236778895086108, "grad_norm": 0.1853054165840149, "learning_rate": 5.25499004290958e-07, "loss": 0.2539, "step": 17340 }, { "epoch": 2.8238407360664413, "grad_norm": 0.17697229981422424, "learning_rate": 5.245332993273289e-07, "loss": 0.2835, "step": 17341 }, { "epoch": 2.824003582624272, "grad_norm": 0.17490635812282562, "learning_rate": 5.235684731062868e-07, "loss": 0.2683, "step": 17342 }, { "epoch": 2.824166429182103, "grad_norm": 0.16754461824893951, "learning_rate": 5.226045256624706e-07, "loss": 0.2596, "step": 17343 }, { "epoch": 2.824329275739934, "grad_norm": 0.1436263769865036, "learning_rate": 5.216414570304861e-07, "loss": 0.2659, "step": 17344 }, { "epoch": 2.824492122297765, "grad_norm": 0.2138856202363968, "learning_rate": 5.206792672449112e-07, "loss": 0.318, "step": 17345 }, { "epoch": 2.824654968855596, "grad_norm": 0.22138845920562744, "learning_rate": 5.197179563402932e-07, "loss": 0.2581, "step": 17346 }, { "epoch": 2.824817815413427, "grad_norm": 0.16794946789741516, "learning_rate": 5.187575243511489e-07, "loss": 0.261, "step": 17347 }, { "epoch": 2.8249806619712574, "grad_norm": 0.16076964139938354, "learning_rate": 5.177979713119535e-07, "loss": 0.2478, "step": 17348 }, { "epoch": 2.8251435085290884, "grad_norm": 0.2089911699295044, "learning_rate": 5.168392972571628e-07, "loss": 0.2793, "step": 17349 }, { "epoch": 2.8253063550869193, "grad_norm": 0.18901242315769196, "learning_rate": 5.158815022211938e-07, "loss": 0.2575, "step": 17350 }, { "epoch": 2.8254692016447502, "grad_norm": 0.1473526656627655, "learning_rate": 5.149245862384383e-07, "loss": 0.2375, "step": 17351 }, { "epoch": 2.825632048202581, "grad_norm": 0.16634011268615723, "learning_rate": 5.139685493432467e-07, "loss": 0.2738, "step": 17352 }, { "epoch": 2.825794894760412, "grad_norm": 0.20230691134929657, "learning_rate": 5.13013391569947e-07, "loss": 0.263, "step": 17353 }, { "epoch": 2.825957741318243, "grad_norm": 0.2359922081232071, "learning_rate": 5.120591129528312e-07, "loss": 0.2716, "step": 17354 }, { "epoch": 2.8261205878760736, "grad_norm": 0.17382602393627167, "learning_rate": 5.11105713526161e-07, "loss": 0.2833, "step": 17355 }, { "epoch": 2.8262834344339045, "grad_norm": 0.17522075772285461, "learning_rate": 5.101531933241671e-07, "loss": 0.256, "step": 17356 }, { "epoch": 2.8264462809917354, "grad_norm": 0.1861804723739624, "learning_rate": 5.092015523810472e-07, "loss": 0.2777, "step": 17357 }, { "epoch": 2.8266091275495664, "grad_norm": 0.1776752769947052, "learning_rate": 5.082507907309658e-07, "loss": 0.2711, "step": 17358 }, { "epoch": 2.8267719741073973, "grad_norm": 0.17413228750228882, "learning_rate": 5.073009084080622e-07, "loss": 0.2676, "step": 17359 }, { "epoch": 2.8269348206652283, "grad_norm": 0.1800931692123413, "learning_rate": 5.063519054464366e-07, "loss": 0.2666, "step": 17360 }, { "epoch": 2.827097667223059, "grad_norm": 0.1569720059633255, "learning_rate": 5.054037818801649e-07, "loss": 0.2716, "step": 17361 }, { "epoch": 2.8272605137808897, "grad_norm": 0.19349758327007294, "learning_rate": 5.044565377432836e-07, "loss": 0.2689, "step": 17362 }, { "epoch": 2.827423360338721, "grad_norm": 0.19093327224254608, "learning_rate": 5.035101730698016e-07, "loss": 0.3019, "step": 17363 }, { "epoch": 2.8275862068965516, "grad_norm": 0.2522314190864563, "learning_rate": 5.025646878937001e-07, "loss": 0.2842, "step": 17364 }, { "epoch": 2.8277490534543825, "grad_norm": 0.19295485317707062, "learning_rate": 5.016200822489242e-07, "loss": 0.2634, "step": 17365 }, { "epoch": 2.8279119000122135, "grad_norm": 0.1749453842639923, "learning_rate": 5.006763561693828e-07, "loss": 0.2785, "step": 17366 }, { "epoch": 2.8280747465700444, "grad_norm": 0.1929541975259781, "learning_rate": 4.997335096889627e-07, "loss": 0.2531, "step": 17367 }, { "epoch": 2.8282375931278754, "grad_norm": 0.22778135538101196, "learning_rate": 4.987915428415174e-07, "loss": 0.2687, "step": 17368 }, { "epoch": 2.828400439685706, "grad_norm": 0.14931681752204895, "learning_rate": 4.978504556608615e-07, "loss": 0.2328, "step": 17369 }, { "epoch": 2.8285632862435373, "grad_norm": 0.17541375756263733, "learning_rate": 4.969102481807846e-07, "loss": 0.2534, "step": 17370 }, { "epoch": 2.8287261328013678, "grad_norm": 0.17009185254573822, "learning_rate": 4.959709204350432e-07, "loss": 0.2365, "step": 17371 }, { "epoch": 2.8288889793591987, "grad_norm": 0.16331014037132263, "learning_rate": 4.950324724573602e-07, "loss": 0.2811, "step": 17372 }, { "epoch": 2.8290518259170296, "grad_norm": 0.18912948668003082, "learning_rate": 4.940949042814336e-07, "loss": 0.2881, "step": 17373 }, { "epoch": 2.8292146724748606, "grad_norm": 0.20085269212722778, "learning_rate": 4.931582159409171e-07, "loss": 0.2412, "step": 17374 }, { "epoch": 2.8293775190326915, "grad_norm": 0.17113138735294342, "learning_rate": 4.922224074694503e-07, "loss": 0.2796, "step": 17375 }, { "epoch": 2.8295403655905225, "grad_norm": 0.18347303569316864, "learning_rate": 4.912874789006233e-07, "loss": 0.3021, "step": 17376 }, { "epoch": 2.8297032121483534, "grad_norm": 0.21014241874217987, "learning_rate": 4.903534302680035e-07, "loss": 0.25, "step": 17377 }, { "epoch": 2.829866058706184, "grad_norm": 0.19221282005310059, "learning_rate": 4.894202616051335e-07, "loss": 0.2934, "step": 17378 }, { "epoch": 2.830028905264015, "grad_norm": 0.17284347116947174, "learning_rate": 4.884879729455088e-07, "loss": 0.3032, "step": 17379 }, { "epoch": 2.830191751821846, "grad_norm": 0.23180612921714783, "learning_rate": 4.875565643226054e-07, "loss": 0.3425, "step": 17380 }, { "epoch": 2.8303545983796767, "grad_norm": 0.15091998875141144, "learning_rate": 4.866260357698604e-07, "loss": 0.2654, "step": 17381 }, { "epoch": 2.8305174449375077, "grad_norm": 0.1622130572795868, "learning_rate": 4.856963873206888e-07, "loss": 0.2739, "step": 17382 }, { "epoch": 2.8306802914953386, "grad_norm": 0.18519392609596252, "learning_rate": 4.84767619008461e-07, "loss": 0.2725, "step": 17383 }, { "epoch": 2.8308431380531696, "grad_norm": 0.23028770089149475, "learning_rate": 4.838397308665254e-07, "loss": 0.2651, "step": 17384 }, { "epoch": 2.831005984611, "grad_norm": 0.17287473380565643, "learning_rate": 4.829127229281943e-07, "loss": 0.2393, "step": 17385 }, { "epoch": 2.8311688311688314, "grad_norm": 0.14864152669906616, "learning_rate": 4.819865952267549e-07, "loss": 0.2747, "step": 17386 }, { "epoch": 2.831331677726662, "grad_norm": 0.19388435781002045, "learning_rate": 4.810613477954529e-07, "loss": 0.2761, "step": 17387 }, { "epoch": 2.831494524284493, "grad_norm": 0.16396762430667877, "learning_rate": 4.801369806675088e-07, "loss": 0.2499, "step": 17388 }, { "epoch": 2.831657370842324, "grad_norm": 0.1568882316350937, "learning_rate": 4.7921349387611e-07, "loss": 0.2874, "step": 17389 }, { "epoch": 2.8318202174001548, "grad_norm": 0.17332366108894348, "learning_rate": 4.782908874544161e-07, "loss": 0.2659, "step": 17390 }, { "epoch": 2.8319830639579857, "grad_norm": 0.2569730877876282, "learning_rate": 4.77369161435548e-07, "loss": 0.2638, "step": 17391 }, { "epoch": 2.832145910515816, "grad_norm": 0.17814035713672638, "learning_rate": 4.7644831585259566e-07, "loss": 0.2344, "step": 17392 }, { "epoch": 2.8323087570736476, "grad_norm": 0.20093421638011932, "learning_rate": 4.7552835073862434e-07, "loss": 0.3053, "step": 17393 }, { "epoch": 2.832471603631478, "grad_norm": 0.19700053334236145, "learning_rate": 4.746092661266632e-07, "loss": 0.2532, "step": 17394 }, { "epoch": 2.832634450189309, "grad_norm": 0.17233797907829285, "learning_rate": 4.7369106204970817e-07, "loss": 0.2353, "step": 17395 }, { "epoch": 2.83279729674714, "grad_norm": 0.16826660931110382, "learning_rate": 4.7277373854072717e-07, "loss": 0.2758, "step": 17396 }, { "epoch": 2.832960143304971, "grad_norm": 0.17172691226005554, "learning_rate": 4.718572956326578e-07, "loss": 0.2306, "step": 17397 }, { "epoch": 2.833122989862802, "grad_norm": 0.21896302700042725, "learning_rate": 4.70941733358396e-07, "loss": 0.2993, "step": 17398 }, { "epoch": 2.833285836420633, "grad_norm": 0.16898898780345917, "learning_rate": 4.700270517508182e-07, "loss": 0.2887, "step": 17399 }, { "epoch": 2.8334486829784638, "grad_norm": 0.15736259520053864, "learning_rate": 4.691132508427593e-07, "loss": 0.2536, "step": 17400 }, { "epoch": 2.8336115295362942, "grad_norm": 0.1767459660768509, "learning_rate": 4.682003306670374e-07, "loss": 0.257, "step": 17401 }, { "epoch": 2.833774376094125, "grad_norm": 0.18846365809440613, "learning_rate": 4.672882912564208e-07, "loss": 0.2586, "step": 17402 }, { "epoch": 2.833937222651956, "grad_norm": 0.19452889263629913, "learning_rate": 4.663771326436528e-07, "loss": 0.27, "step": 17403 }, { "epoch": 2.834100069209787, "grad_norm": 0.15191027522087097, "learning_rate": 4.6546685486145434e-07, "loss": 0.2426, "step": 17404 }, { "epoch": 2.834262915767618, "grad_norm": 0.15409478545188904, "learning_rate": 4.6455745794250495e-07, "loss": 0.238, "step": 17405 }, { "epoch": 2.834425762325449, "grad_norm": 0.1962069869041443, "learning_rate": 4.636489419194479e-07, "loss": 0.2835, "step": 17406 }, { "epoch": 2.83458860888328, "grad_norm": 0.18376362323760986, "learning_rate": 4.6274130682490977e-07, "loss": 0.2672, "step": 17407 }, { "epoch": 2.8347514554411104, "grad_norm": 0.19443492591381073, "learning_rate": 4.618345526914758e-07, "loss": 0.3034, "step": 17408 }, { "epoch": 2.8349143019989413, "grad_norm": 0.17330707609653473, "learning_rate": 4.609286795516976e-07, "loss": 0.2518, "step": 17409 }, { "epoch": 2.8350771485567723, "grad_norm": 0.19037972390651703, "learning_rate": 4.600236874380992e-07, "loss": 0.2552, "step": 17410 }, { "epoch": 2.8352399951146032, "grad_norm": 0.19520875811576843, "learning_rate": 4.5911957638317684e-07, "loss": 0.2706, "step": 17411 }, { "epoch": 2.835402841672434, "grad_norm": 0.19511272013187408, "learning_rate": 4.5821634641938784e-07, "loss": 0.2434, "step": 17412 }, { "epoch": 2.835565688230265, "grad_norm": 0.15447521209716797, "learning_rate": 4.573139975791618e-07, "loss": 0.2674, "step": 17413 }, { "epoch": 2.835728534788096, "grad_norm": 0.17730912566184998, "learning_rate": 4.564125298948896e-07, "loss": 0.2892, "step": 17414 }, { "epoch": 2.8358913813459266, "grad_norm": 0.18109916150569916, "learning_rate": 4.55511943398948e-07, "loss": 0.287, "step": 17415 }, { "epoch": 2.836054227903758, "grad_norm": 0.18320545554161072, "learning_rate": 4.5461223812366394e-07, "loss": 0.2438, "step": 17416 }, { "epoch": 2.8362170744615884, "grad_norm": 0.1890343576669693, "learning_rate": 4.537134141013394e-07, "loss": 0.2788, "step": 17417 }, { "epoch": 2.8363799210194194, "grad_norm": 0.20277824997901917, "learning_rate": 4.5281547136424575e-07, "loss": 0.277, "step": 17418 }, { "epoch": 2.8365427675772503, "grad_norm": 0.21349254250526428, "learning_rate": 4.519184099446239e-07, "loss": 0.2637, "step": 17419 }, { "epoch": 2.8367056141350813, "grad_norm": 0.17933663725852966, "learning_rate": 4.510222298746786e-07, "loss": 0.2501, "step": 17420 }, { "epoch": 2.836868460692912, "grad_norm": 0.16719655692577362, "learning_rate": 4.5012693118658135e-07, "loss": 0.2864, "step": 17421 }, { "epoch": 2.837031307250743, "grad_norm": 0.1860337257385254, "learning_rate": 4.49232513912487e-07, "loss": 0.259, "step": 17422 }, { "epoch": 2.837194153808574, "grad_norm": 0.24815811216831207, "learning_rate": 4.483389780844976e-07, "loss": 0.299, "step": 17423 }, { "epoch": 2.8373570003664046, "grad_norm": 0.14140410721302032, "learning_rate": 4.474463237347015e-07, "loss": 0.2514, "step": 17424 }, { "epoch": 2.8375198469242355, "grad_norm": 0.16452203691005707, "learning_rate": 4.4655455089513963e-07, "loss": 0.2553, "step": 17425 }, { "epoch": 2.8376826934820665, "grad_norm": 0.1810709536075592, "learning_rate": 4.4566365959783365e-07, "loss": 0.2656, "step": 17426 }, { "epoch": 2.8378455400398974, "grad_norm": 0.25519946217536926, "learning_rate": 4.447736498747718e-07, "loss": 0.2216, "step": 17427 }, { "epoch": 2.8380083865977284, "grad_norm": 0.18748484551906586, "learning_rate": 4.438845217579035e-07, "loss": 0.2263, "step": 17428 }, { "epoch": 2.8381712331555593, "grad_norm": 0.15970222651958466, "learning_rate": 4.4299627527915334e-07, "loss": 0.3039, "step": 17429 }, { "epoch": 2.8383340797133902, "grad_norm": 0.2077999860048294, "learning_rate": 4.4210891047041234e-07, "loss": 0.285, "step": 17430 }, { "epoch": 2.8384969262712207, "grad_norm": 0.1707332283258438, "learning_rate": 4.412224273635385e-07, "loss": 0.2635, "step": 17431 }, { "epoch": 2.8386597728290517, "grad_norm": 0.1482459008693695, "learning_rate": 4.4033682599035617e-07, "loss": 0.2271, "step": 17432 }, { "epoch": 2.8388226193868826, "grad_norm": 0.1525624394416809, "learning_rate": 4.3945210638267064e-07, "loss": 0.2922, "step": 17433 }, { "epoch": 2.8389854659447136, "grad_norm": 0.1491483598947525, "learning_rate": 4.38568268572237e-07, "loss": 0.2785, "step": 17434 }, { "epoch": 2.8391483125025445, "grad_norm": 0.1749461591243744, "learning_rate": 4.3768531259078815e-07, "loss": 0.283, "step": 17435 }, { "epoch": 2.8393111590603755, "grad_norm": 0.1591978371143341, "learning_rate": 4.3680323847002937e-07, "loss": 0.2991, "step": 17436 }, { "epoch": 2.8394740056182064, "grad_norm": 0.15094788372516632, "learning_rate": 4.359220462416269e-07, "loss": 0.2505, "step": 17437 }, { "epoch": 2.839636852176037, "grad_norm": 0.20543330907821655, "learning_rate": 4.350417359372194e-07, "loss": 0.2764, "step": 17438 }, { "epoch": 2.8397996987338683, "grad_norm": 0.19737359881401062, "learning_rate": 4.341623075884149e-07, "loss": 0.2599, "step": 17439 }, { "epoch": 2.839962545291699, "grad_norm": 0.23645621538162231, "learning_rate": 4.3328376122677985e-07, "loss": 0.328, "step": 17440 }, { "epoch": 2.8401253918495297, "grad_norm": 0.1672177016735077, "learning_rate": 4.324060968838639e-07, "loss": 0.2483, "step": 17441 }, { "epoch": 2.8402882384073607, "grad_norm": 0.15533867478370667, "learning_rate": 4.3152931459117807e-07, "loss": 0.2452, "step": 17442 }, { "epoch": 2.8404510849651916, "grad_norm": 0.17851325869560242, "learning_rate": 4.3065341438019434e-07, "loss": 0.2707, "step": 17443 }, { "epoch": 2.8406139315230226, "grad_norm": 0.20031467080116272, "learning_rate": 4.2977839628236815e-07, "loss": 0.2439, "step": 17444 }, { "epoch": 2.840776778080853, "grad_norm": 0.19117571413516998, "learning_rate": 4.2890426032911045e-07, "loss": 0.2504, "step": 17445 }, { "epoch": 2.8409396246386844, "grad_norm": 0.16416911780834198, "learning_rate": 4.280310065518073e-07, "loss": 0.2233, "step": 17446 }, { "epoch": 2.841102471196515, "grad_norm": 0.19880282878875732, "learning_rate": 4.2715863498180853e-07, "loss": 0.235, "step": 17447 }, { "epoch": 2.841265317754346, "grad_norm": 0.1819666475057602, "learning_rate": 4.2628714565043915e-07, "loss": 0.2638, "step": 17448 }, { "epoch": 2.841428164312177, "grad_norm": 0.1749960482120514, "learning_rate": 4.25416538588988e-07, "loss": 0.2643, "step": 17449 }, { "epoch": 2.8415910108700078, "grad_norm": 0.19836728274822235, "learning_rate": 4.2454681382870507e-07, "loss": 0.2693, "step": 17450 }, { "epoch": 2.8417538574278387, "grad_norm": 0.17737142741680145, "learning_rate": 4.2367797140082657e-07, "loss": 0.263, "step": 17451 }, { "epoch": 2.8419167039856696, "grad_norm": 0.16501036286354065, "learning_rate": 4.228100113365385e-07, "loss": 0.2816, "step": 17452 }, { "epoch": 2.8420795505435006, "grad_norm": 0.16455791890621185, "learning_rate": 4.2194293366700777e-07, "loss": 0.2522, "step": 17453 }, { "epoch": 2.842242397101331, "grad_norm": 0.21011647582054138, "learning_rate": 4.2107673842336214e-07, "loss": 0.2389, "step": 17454 }, { "epoch": 2.842405243659162, "grad_norm": 0.22106508910655975, "learning_rate": 4.2021142563670456e-07, "loss": 0.3121, "step": 17455 }, { "epoch": 2.842568090216993, "grad_norm": 0.1790269911289215, "learning_rate": 4.193469953380991e-07, "loss": 0.2556, "step": 17456 }, { "epoch": 2.842730936774824, "grad_norm": 0.1642875373363495, "learning_rate": 4.1848344755857914e-07, "loss": 0.3132, "step": 17457 }, { "epoch": 2.842893783332655, "grad_norm": 0.19259700179100037, "learning_rate": 4.176207823291534e-07, "loss": 0.2755, "step": 17458 }, { "epoch": 2.843056629890486, "grad_norm": 0.17004911601543427, "learning_rate": 4.167589996807941e-07, "loss": 0.2635, "step": 17459 }, { "epoch": 2.8432194764483167, "grad_norm": 0.14567755162715912, "learning_rate": 4.158980996444406e-07, "loss": 0.2682, "step": 17460 }, { "epoch": 2.8433823230061472, "grad_norm": 0.18710725009441376, "learning_rate": 4.1503808225099584e-07, "loss": 0.2492, "step": 17461 }, { "epoch": 2.843545169563978, "grad_norm": 0.13529224693775177, "learning_rate": 4.141789475313462e-07, "loss": 0.2544, "step": 17462 }, { "epoch": 2.843708016121809, "grad_norm": 0.19523660838603973, "learning_rate": 4.133206955163338e-07, "loss": 0.2994, "step": 17463 }, { "epoch": 2.84387086267964, "grad_norm": 0.19582679867744446, "learning_rate": 4.1246332623677273e-07, "loss": 0.2509, "step": 17464 }, { "epoch": 2.844033709237471, "grad_norm": 0.18299128115177155, "learning_rate": 4.116068397234413e-07, "loss": 0.2523, "step": 17465 }, { "epoch": 2.844196555795302, "grad_norm": 0.16264957189559937, "learning_rate": 4.107512360070953e-07, "loss": 0.2728, "step": 17466 }, { "epoch": 2.844359402353133, "grad_norm": 0.15483301877975464, "learning_rate": 4.0989651511845194e-07, "loss": 0.2481, "step": 17467 }, { "epoch": 2.8445222489109634, "grad_norm": 0.21613088250160217, "learning_rate": 4.09042677088195e-07, "loss": 0.2537, "step": 17468 }, { "epoch": 2.8446850954687948, "grad_norm": 0.1621837615966797, "learning_rate": 4.08189721946986e-07, "loss": 0.2656, "step": 17469 }, { "epoch": 2.8448479420266253, "grad_norm": 0.17030633985996246, "learning_rate": 4.073376497254422e-07, "loss": 0.2569, "step": 17470 }, { "epoch": 2.845010788584456, "grad_norm": 0.18743520975112915, "learning_rate": 4.064864604541613e-07, "loss": 0.2908, "step": 17471 }, { "epoch": 2.845173635142287, "grad_norm": 0.165554940700531, "learning_rate": 4.056361541636966e-07, "loss": 0.2847, "step": 17472 }, { "epoch": 2.845336481700118, "grad_norm": 0.17073753476142883, "learning_rate": 4.047867308845821e-07, "loss": 0.2136, "step": 17473 }, { "epoch": 2.845499328257949, "grad_norm": 0.19097597897052765, "learning_rate": 4.0393819064731274e-07, "loss": 0.2613, "step": 17474 }, { "epoch": 2.84566217481578, "grad_norm": 0.17637072503566742, "learning_rate": 4.030905334823559e-07, "loss": 0.2845, "step": 17475 }, { "epoch": 2.845825021373611, "grad_norm": 0.17994728684425354, "learning_rate": 4.022437594201428e-07, "loss": 0.2736, "step": 17476 }, { "epoch": 2.8459878679314414, "grad_norm": 0.19244588911533356, "learning_rate": 4.013978684910741e-07, "loss": 0.2685, "step": 17477 }, { "epoch": 2.8461507144892724, "grad_norm": 0.23021355271339417, "learning_rate": 4.0055286072552265e-07, "loss": 0.3152, "step": 17478 }, { "epoch": 2.8463135610471033, "grad_norm": 0.18064208328723907, "learning_rate": 3.9970873615382266e-07, "loss": 0.2585, "step": 17479 }, { "epoch": 2.8464764076049343, "grad_norm": 0.20159782469272614, "learning_rate": 3.988654948062859e-07, "loss": 0.2293, "step": 17480 }, { "epoch": 2.846639254162765, "grad_norm": 0.16553330421447754, "learning_rate": 3.9802313671318534e-07, "loss": 0.2433, "step": 17481 }, { "epoch": 2.846802100720596, "grad_norm": 0.18893995881080627, "learning_rate": 3.971816619047636e-07, "loss": 0.2975, "step": 17482 }, { "epoch": 2.846964947278427, "grad_norm": 0.1667158454656601, "learning_rate": 3.9634107041123246e-07, "loss": 0.2638, "step": 17483 }, { "epoch": 2.8471277938362576, "grad_norm": 0.18599487841129303, "learning_rate": 3.9550136226277067e-07, "loss": 0.2556, "step": 17484 }, { "epoch": 2.8472906403940885, "grad_norm": 0.19383031129837036, "learning_rate": 3.946625374895263e-07, "loss": 0.2991, "step": 17485 }, { "epoch": 2.8474534869519195, "grad_norm": 0.231297567486763, "learning_rate": 3.9382459612161695e-07, "loss": 0.2783, "step": 17486 }, { "epoch": 2.8476163335097504, "grad_norm": 0.20057973265647888, "learning_rate": 3.9298753818912683e-07, "loss": 0.2838, "step": 17487 }, { "epoch": 2.8477791800675814, "grad_norm": 0.2616986930370331, "learning_rate": 3.921513637221097e-07, "loss": 0.2624, "step": 17488 }, { "epoch": 2.8479420266254123, "grad_norm": 0.13640369474887848, "learning_rate": 3.9131607275058324e-07, "loss": 0.2732, "step": 17489 }, { "epoch": 2.8481048731832432, "grad_norm": 0.16938015818595886, "learning_rate": 3.904816653045401e-07, "loss": 0.2478, "step": 17490 }, { "epoch": 2.8482677197410737, "grad_norm": 0.22834371030330658, "learning_rate": 3.896481414139397e-07, "loss": 0.2902, "step": 17491 }, { "epoch": 2.848430566298905, "grad_norm": 0.20466992259025574, "learning_rate": 3.888155011087025e-07, "loss": 0.2893, "step": 17492 }, { "epoch": 2.8485934128567356, "grad_norm": 0.23002150654792786, "learning_rate": 3.8798374441872956e-07, "loss": 0.2649, "step": 17493 }, { "epoch": 2.8487562594145666, "grad_norm": 0.16804078221321106, "learning_rate": 3.871528713738748e-07, "loss": 0.2499, "step": 17494 }, { "epoch": 2.8489191059723975, "grad_norm": 0.17203423380851746, "learning_rate": 3.8632288200397826e-07, "loss": 0.2464, "step": 17495 }, { "epoch": 2.8490819525302284, "grad_norm": 0.2040746808052063, "learning_rate": 3.8549377633883275e-07, "loss": 0.2693, "step": 17496 }, { "epoch": 2.8492447990880594, "grad_norm": 0.16102907061576843, "learning_rate": 3.8466555440820893e-07, "loss": 0.2461, "step": 17497 }, { "epoch": 2.84940764564589, "grad_norm": 0.1632944792509079, "learning_rate": 3.8383821624183856e-07, "loss": 0.2719, "step": 17498 }, { "epoch": 2.8495704922037213, "grad_norm": 0.1772356778383255, "learning_rate": 3.830117618694312e-07, "loss": 0.2627, "step": 17499 }, { "epoch": 2.8497333387615518, "grad_norm": 0.15501397848129272, "learning_rate": 3.821861913206548e-07, "loss": 0.2671, "step": 17500 }, { "epoch": 2.8498961853193827, "grad_norm": 0.18391314148902893, "learning_rate": 3.813615046251495e-07, "loss": 0.2963, "step": 17501 }, { "epoch": 2.8500590318772137, "grad_norm": 0.15022143721580505, "learning_rate": 3.805377018125278e-07, "loss": 0.2869, "step": 17502 }, { "epoch": 2.8502218784350446, "grad_norm": 0.1621645838022232, "learning_rate": 3.7971478291236596e-07, "loss": 0.2678, "step": 17503 }, { "epoch": 2.8503847249928755, "grad_norm": 0.18652494251728058, "learning_rate": 3.7889274795420425e-07, "loss": 0.2383, "step": 17504 }, { "epoch": 2.8505475715507065, "grad_norm": 0.18008437752723694, "learning_rate": 3.780715969675608e-07, "loss": 0.2876, "step": 17505 }, { "epoch": 2.8507104181085374, "grad_norm": 0.1569024920463562, "learning_rate": 3.7725132998191746e-07, "loss": 0.3084, "step": 17506 }, { "epoch": 2.850873264666368, "grad_norm": 0.1493591070175171, "learning_rate": 3.76431947026723e-07, "loss": 0.2592, "step": 17507 }, { "epoch": 2.851036111224199, "grad_norm": 0.16576381027698517, "learning_rate": 3.756134481313928e-07, "loss": 0.2555, "step": 17508 }, { "epoch": 2.85119895778203, "grad_norm": 0.21147261559963226, "learning_rate": 3.7479583332531986e-07, "loss": 0.2651, "step": 17509 }, { "epoch": 2.8513618043398608, "grad_norm": 0.14174103736877441, "learning_rate": 3.739791026378559e-07, "loss": 0.3211, "step": 17510 }, { "epoch": 2.8515246508976917, "grad_norm": 0.18019521236419678, "learning_rate": 3.731632560983189e-07, "loss": 0.268, "step": 17511 }, { "epoch": 2.8516874974555226, "grad_norm": 0.2100287526845932, "learning_rate": 3.7234829373601067e-07, "loss": 0.2532, "step": 17512 }, { "epoch": 2.8518503440133536, "grad_norm": 0.16851414740085602, "learning_rate": 3.7153421558018266e-07, "loss": 0.2607, "step": 17513 }, { "epoch": 2.852013190571184, "grad_norm": 0.19283798336982727, "learning_rate": 3.7072102166006427e-07, "loss": 0.2704, "step": 17514 }, { "epoch": 2.8521760371290155, "grad_norm": 0.24770872294902802, "learning_rate": 3.6990871200485445e-07, "loss": 0.2934, "step": 17515 }, { "epoch": 2.852338883686846, "grad_norm": 0.17250938713550568, "learning_rate": 3.690972866437131e-07, "loss": 0.2277, "step": 17516 }, { "epoch": 2.852501730244677, "grad_norm": 0.18571680784225464, "learning_rate": 3.682867456057754e-07, "loss": 0.2582, "step": 17517 }, { "epoch": 2.852664576802508, "grad_norm": 0.12892667949199677, "learning_rate": 3.6747708892014563e-07, "loss": 0.2813, "step": 17518 }, { "epoch": 2.852827423360339, "grad_norm": 0.15672743320465088, "learning_rate": 3.666683166158841e-07, "loss": 0.2806, "step": 17519 }, { "epoch": 2.8529902699181697, "grad_norm": 0.16698525846004486, "learning_rate": 3.6586042872203686e-07, "loss": 0.2848, "step": 17520 }, { "epoch": 2.8531531164760002, "grad_norm": 0.21794162690639496, "learning_rate": 3.6505342526760856e-07, "loss": 0.2499, "step": 17521 }, { "epoch": 2.8533159630338316, "grad_norm": 0.1602819263935089, "learning_rate": 3.642473062815677e-07, "loss": 0.2717, "step": 17522 }, { "epoch": 2.853478809591662, "grad_norm": 0.20784412324428558, "learning_rate": 3.634420717928577e-07, "loss": 0.2712, "step": 17523 }, { "epoch": 2.853641656149493, "grad_norm": 0.17857420444488525, "learning_rate": 3.626377218303917e-07, "loss": 0.2447, "step": 17524 }, { "epoch": 2.853804502707324, "grad_norm": 0.21321581304073334, "learning_rate": 3.6183425642304927e-07, "loss": 0.2467, "step": 17525 }, { "epoch": 2.853967349265155, "grad_norm": 0.2341412454843521, "learning_rate": 3.61031675599674e-07, "loss": 0.2722, "step": 17526 }, { "epoch": 2.854130195822986, "grad_norm": 0.1871667057275772, "learning_rate": 3.60229979389079e-07, "loss": 0.2271, "step": 17527 }, { "epoch": 2.854293042380817, "grad_norm": 0.150689035654068, "learning_rate": 3.5942916782005233e-07, "loss": 0.2595, "step": 17528 }, { "epoch": 2.8544558889386478, "grad_norm": 0.14362281560897827, "learning_rate": 3.586292409213432e-07, "loss": 0.3103, "step": 17529 }, { "epoch": 2.8546187354964783, "grad_norm": 0.20394523441791534, "learning_rate": 3.5783019872167313e-07, "loss": 0.2733, "step": 17530 }, { "epoch": 2.854781582054309, "grad_norm": 0.21889567375183105, "learning_rate": 3.5703204124972745e-07, "loss": 0.2564, "step": 17531 }, { "epoch": 2.85494442861214, "grad_norm": 0.20249804854393005, "learning_rate": 3.562347685341638e-07, "loss": 0.2603, "step": 17532 }, { "epoch": 2.855107275169971, "grad_norm": 0.1651190221309662, "learning_rate": 3.5543838060360935e-07, "loss": 0.2733, "step": 17533 }, { "epoch": 2.855270121727802, "grad_norm": 0.1763876974582672, "learning_rate": 3.546428774866495e-07, "loss": 0.2513, "step": 17534 }, { "epoch": 2.855432968285633, "grad_norm": 0.16159725189208984, "learning_rate": 3.5384825921185304e-07, "loss": 0.258, "step": 17535 }, { "epoch": 2.855595814843464, "grad_norm": 0.18903084099292755, "learning_rate": 3.5305452580774725e-07, "loss": 0.2567, "step": 17536 }, { "epoch": 2.8557586614012944, "grad_norm": 0.16424447298049927, "learning_rate": 3.5226167730282587e-07, "loss": 0.2347, "step": 17537 }, { "epoch": 2.8559215079591254, "grad_norm": 0.2000497579574585, "learning_rate": 3.5146971372555516e-07, "loss": 0.2776, "step": 17538 }, { "epoch": 2.8560843545169563, "grad_norm": 0.19080697000026703, "learning_rate": 3.506786351043734e-07, "loss": 0.2302, "step": 17539 }, { "epoch": 2.8562472010747872, "grad_norm": 0.16940169036388397, "learning_rate": 3.4988844146768006e-07, "loss": 0.2639, "step": 17540 }, { "epoch": 2.856410047632618, "grad_norm": 0.17589910328388214, "learning_rate": 3.4909913284384143e-07, "loss": 0.263, "step": 17541 }, { "epoch": 2.856572894190449, "grad_norm": 0.16988402605056763, "learning_rate": 3.483107092612042e-07, "loss": 0.2964, "step": 17542 }, { "epoch": 2.85673574074828, "grad_norm": 0.1730775237083435, "learning_rate": 3.4752317074807085e-07, "loss": 0.2283, "step": 17543 }, { "epoch": 2.8568985873061106, "grad_norm": 0.1892615109682083, "learning_rate": 3.467365173327158e-07, "loss": 0.2484, "step": 17544 }, { "epoch": 2.857061433863942, "grad_norm": 0.14670464396476746, "learning_rate": 3.4595074904338054e-07, "loss": 0.2445, "step": 17545 }, { "epoch": 2.8572242804217725, "grad_norm": 0.1833905726671219, "learning_rate": 3.4516586590828126e-07, "loss": 0.2862, "step": 17546 }, { "epoch": 2.8573871269796034, "grad_norm": 0.1648797243833542, "learning_rate": 3.443818679555955e-07, "loss": 0.2985, "step": 17547 }, { "epoch": 2.8575499735374343, "grad_norm": 0.18015064299106598, "learning_rate": 3.4359875521347286e-07, "loss": 0.2575, "step": 17548 }, { "epoch": 2.8577128200952653, "grad_norm": 0.13632576167583466, "learning_rate": 3.428165277100215e-07, "loss": 0.2372, "step": 17549 }, { "epoch": 2.8578756666530962, "grad_norm": 0.16310058534145355, "learning_rate": 3.420351854733356e-07, "loss": 0.2735, "step": 17550 }, { "epoch": 2.858038513210927, "grad_norm": 0.22444355487823486, "learning_rate": 3.412547285314621e-07, "loss": 0.2681, "step": 17551 }, { "epoch": 2.858201359768758, "grad_norm": 0.18237803876399994, "learning_rate": 3.4047515691242595e-07, "loss": 0.2685, "step": 17552 }, { "epoch": 2.8583642063265886, "grad_norm": 0.1536843478679657, "learning_rate": 3.3969647064421293e-07, "loss": 0.2861, "step": 17553 }, { "epoch": 2.8585270528844196, "grad_norm": 0.22674746811389923, "learning_rate": 3.389186697547786e-07, "loss": 0.2701, "step": 17554 }, { "epoch": 2.8586898994422505, "grad_norm": 0.17031444609165192, "learning_rate": 3.381417542720533e-07, "loss": 0.2369, "step": 17555 }, { "epoch": 2.8588527460000814, "grad_norm": 0.14986911416053772, "learning_rate": 3.373657242239259e-07, "loss": 0.2377, "step": 17556 }, { "epoch": 2.8590155925579124, "grad_norm": 0.16236720979213715, "learning_rate": 3.3659057963826014e-07, "loss": 0.2817, "step": 17557 }, { "epoch": 2.8591784391157433, "grad_norm": 0.20597702264785767, "learning_rate": 3.358163205428866e-07, "loss": 0.2445, "step": 17558 }, { "epoch": 2.8593412856735743, "grad_norm": 0.159471794962883, "learning_rate": 3.350429469656025e-07, "loss": 0.2329, "step": 17559 }, { "epoch": 2.8595041322314048, "grad_norm": 0.17112164199352264, "learning_rate": 3.342704589341772e-07, "loss": 0.2585, "step": 17560 }, { "epoch": 2.8596669787892357, "grad_norm": 0.1204267218708992, "learning_rate": 3.3349885647634136e-07, "loss": 0.2461, "step": 17561 }, { "epoch": 2.8598298253470666, "grad_norm": 0.15245157480239868, "learning_rate": 3.3272813961980065e-07, "loss": 0.257, "step": 17562 }, { "epoch": 2.8599926719048976, "grad_norm": 0.1745193600654602, "learning_rate": 3.3195830839222175e-07, "loss": 0.2751, "step": 17563 }, { "epoch": 2.8601555184627285, "grad_norm": 0.1523178517818451, "learning_rate": 3.311893628212492e-07, "loss": 0.2564, "step": 17564 }, { "epoch": 2.8603183650205595, "grad_norm": 0.2179357260465622, "learning_rate": 3.304213029344888e-07, "loss": 0.2663, "step": 17565 }, { "epoch": 2.8604812115783904, "grad_norm": 0.13488143682479858, "learning_rate": 3.2965412875951563e-07, "loss": 0.2551, "step": 17566 }, { "epoch": 2.860644058136221, "grad_norm": 0.19885514676570892, "learning_rate": 3.2888784032387155e-07, "loss": 0.3123, "step": 17567 }, { "epoch": 2.8608069046940523, "grad_norm": 0.16961631178855896, "learning_rate": 3.2812243765507067e-07, "loss": 0.2629, "step": 17568 }, { "epoch": 2.860969751251883, "grad_norm": 0.1713850200176239, "learning_rate": 3.2735792078059657e-07, "loss": 0.3036, "step": 17569 }, { "epoch": 2.8611325978097137, "grad_norm": 0.17484015226364136, "learning_rate": 3.265942897278912e-07, "loss": 0.2709, "step": 17570 }, { "epoch": 2.8612954443675447, "grad_norm": 0.18357117474079132, "learning_rate": 3.2583154452437426e-07, "loss": 0.2692, "step": 17571 }, { "epoch": 2.8614582909253756, "grad_norm": 0.20093825459480286, "learning_rate": 3.250696851974322e-07, "loss": 0.2663, "step": 17572 }, { "epoch": 2.8616211374832066, "grad_norm": 0.20853137969970703, "learning_rate": 3.2430871177441534e-07, "loss": 0.2925, "step": 17573 }, { "epoch": 2.861783984041037, "grad_norm": 0.16916358470916748, "learning_rate": 3.235486242826463e-07, "loss": 0.2509, "step": 17574 }, { "epoch": 2.8619468305988685, "grad_norm": 0.1493946760892868, "learning_rate": 3.227894227494144e-07, "loss": 0.2527, "step": 17575 }, { "epoch": 2.862109677156699, "grad_norm": 0.16825290024280548, "learning_rate": 3.2203110720197826e-07, "loss": 0.2915, "step": 17576 }, { "epoch": 2.86227252371453, "grad_norm": 0.1854640543460846, "learning_rate": 3.2127367766756346e-07, "loss": 0.2649, "step": 17577 }, { "epoch": 2.862435370272361, "grad_norm": 0.185267373919487, "learning_rate": 3.2051713417335926e-07, "loss": 0.2565, "step": 17578 }, { "epoch": 2.8625982168301918, "grad_norm": 0.19030676782131195, "learning_rate": 3.197614767465329e-07, "loss": 0.2544, "step": 17579 }, { "epoch": 2.8627610633880227, "grad_norm": 0.17357952892780304, "learning_rate": 3.190067054142154e-07, "loss": 0.217, "step": 17580 }, { "epoch": 2.8629239099458537, "grad_norm": 0.17230477929115295, "learning_rate": 3.182528202035018e-07, "loss": 0.2858, "step": 17581 }, { "epoch": 2.8630867565036846, "grad_norm": 0.18656058609485626, "learning_rate": 3.174998211414593e-07, "loss": 0.2741, "step": 17582 }, { "epoch": 2.863249603061515, "grad_norm": 0.1729615032672882, "learning_rate": 3.1674770825512735e-07, "loss": 0.2663, "step": 17583 }, { "epoch": 2.863412449619346, "grad_norm": 0.20361030101776123, "learning_rate": 3.1599648157150384e-07, "loss": 0.2606, "step": 17584 }, { "epoch": 2.863575296177177, "grad_norm": 0.16156956553459167, "learning_rate": 3.152461411175589e-07, "loss": 0.2612, "step": 17585 }, { "epoch": 2.863738142735008, "grad_norm": 0.18204030394554138, "learning_rate": 3.144966869202404e-07, "loss": 0.2319, "step": 17586 }, { "epoch": 2.863900989292839, "grad_norm": 0.19318565726280212, "learning_rate": 3.1374811900644896e-07, "loss": 0.2708, "step": 17587 }, { "epoch": 2.86406383585067, "grad_norm": 0.1933377981185913, "learning_rate": 3.130004374030576e-07, "loss": 0.2811, "step": 17588 }, { "epoch": 2.8642266824085008, "grad_norm": 0.17432105541229248, "learning_rate": 3.122536421369199e-07, "loss": 0.2461, "step": 17589 }, { "epoch": 2.8643895289663313, "grad_norm": 0.2020028531551361, "learning_rate": 3.1150773323484205e-07, "loss": 0.2755, "step": 17590 }, { "epoch": 2.864552375524162, "grad_norm": 0.16643312573432922, "learning_rate": 3.1076271072360277e-07, "loss": 0.272, "step": 17591 }, { "epoch": 2.864715222081993, "grad_norm": 0.17362499237060547, "learning_rate": 3.1001857462995554e-07, "loss": 0.2552, "step": 17592 }, { "epoch": 2.864878068639824, "grad_norm": 0.16557438671588898, "learning_rate": 3.092753249806124e-07, "loss": 0.2514, "step": 17593 }, { "epoch": 2.865040915197655, "grad_norm": 0.19585953652858734, "learning_rate": 3.085329618022631e-07, "loss": 0.2632, "step": 17594 }, { "epoch": 2.865203761755486, "grad_norm": 0.20740357041358948, "learning_rate": 3.077914851215585e-07, "loss": 0.2683, "step": 17595 }, { "epoch": 2.865366608313317, "grad_norm": 0.1824677288532257, "learning_rate": 3.070508949651163e-07, "loss": 0.2638, "step": 17596 }, { "epoch": 2.8655294548711474, "grad_norm": 0.13190706074237823, "learning_rate": 3.0631119135952904e-07, "loss": 0.2656, "step": 17597 }, { "epoch": 2.865692301428979, "grad_norm": 0.15596814453601837, "learning_rate": 3.0557237433135874e-07, "loss": 0.2794, "step": 17598 }, { "epoch": 2.8658551479868093, "grad_norm": 0.25183799862861633, "learning_rate": 3.0483444390712325e-07, "loss": 0.237, "step": 17599 }, { "epoch": 2.8660179945446402, "grad_norm": 0.14827193319797516, "learning_rate": 3.040974001133179e-07, "loss": 0.2849, "step": 17600 }, { "epoch": 2.866180841102471, "grad_norm": 0.1899234652519226, "learning_rate": 3.0336124297641044e-07, "loss": 0.2631, "step": 17601 }, { "epoch": 2.866343687660302, "grad_norm": 0.18212993443012238, "learning_rate": 3.0262597252282696e-07, "loss": 0.2419, "step": 17602 }, { "epoch": 2.866506534218133, "grad_norm": 0.20068606734275818, "learning_rate": 3.0189158877896586e-07, "loss": 0.2398, "step": 17603 }, { "epoch": 2.866669380775964, "grad_norm": 0.14193043112754822, "learning_rate": 3.011580917711948e-07, "loss": 0.2768, "step": 17604 }, { "epoch": 2.866832227333795, "grad_norm": 0.1681443154811859, "learning_rate": 3.004254815258456e-07, "loss": 0.2741, "step": 17605 }, { "epoch": 2.8669950738916254, "grad_norm": 0.15863136947155, "learning_rate": 2.996937580692277e-07, "loss": 0.2669, "step": 17606 }, { "epoch": 2.8671579204494564, "grad_norm": 0.20106108486652374, "learning_rate": 2.989629214276035e-07, "loss": 0.2799, "step": 17607 }, { "epoch": 2.8673207670072873, "grad_norm": 0.17049194872379303, "learning_rate": 2.9823297162721865e-07, "loss": 0.2529, "step": 17608 }, { "epoch": 2.8674836135651183, "grad_norm": 0.17457029223442078, "learning_rate": 2.975039086942799e-07, "loss": 0.3002, "step": 17609 }, { "epoch": 2.867646460122949, "grad_norm": 0.22439788281917572, "learning_rate": 2.967757326549581e-07, "loss": 0.3126, "step": 17610 }, { "epoch": 2.86780930668078, "grad_norm": 0.26689037680625916, "learning_rate": 2.960484435354044e-07, "loss": 0.2879, "step": 17611 }, { "epoch": 2.867972153238611, "grad_norm": 0.21067026257514954, "learning_rate": 2.9532204136172306e-07, "loss": 0.2459, "step": 17612 }, { "epoch": 2.8681349997964416, "grad_norm": 0.14628687500953674, "learning_rate": 2.9459652616000146e-07, "loss": 0.2683, "step": 17613 }, { "epoch": 2.8682978463542725, "grad_norm": 0.1571284532546997, "learning_rate": 2.938718979562799e-07, "loss": 0.2447, "step": 17614 }, { "epoch": 2.8684606929121035, "grad_norm": 0.19465023279190063, "learning_rate": 2.9314815677657927e-07, "loss": 0.2894, "step": 17615 }, { "epoch": 2.8686235394699344, "grad_norm": 0.16155342757701874, "learning_rate": 2.9242530264688704e-07, "loss": 0.2776, "step": 17616 }, { "epoch": 2.8687863860277654, "grad_norm": 0.12941059470176697, "learning_rate": 2.917033355931492e-07, "loss": 0.2791, "step": 17617 }, { "epoch": 2.8689492325855963, "grad_norm": 0.13704946637153625, "learning_rate": 2.9098225564128944e-07, "loss": 0.2397, "step": 17618 }, { "epoch": 2.8691120791434273, "grad_norm": 0.15908215939998627, "learning_rate": 2.9026206281719814e-07, "loss": 0.2551, "step": 17619 }, { "epoch": 2.8692749257012578, "grad_norm": 0.2011527419090271, "learning_rate": 2.895427571467324e-07, "loss": 0.2644, "step": 17620 }, { "epoch": 2.869437772259089, "grad_norm": 0.1807330697774887, "learning_rate": 2.888243386557132e-07, "loss": 0.2454, "step": 17621 }, { "epoch": 2.8696006188169196, "grad_norm": 0.1905236393213272, "learning_rate": 2.8810680736993943e-07, "loss": 0.2781, "step": 17622 }, { "epoch": 2.8697634653747506, "grad_norm": 0.15679268538951874, "learning_rate": 2.873901633151682e-07, "loss": 0.2365, "step": 17623 }, { "epoch": 2.8699263119325815, "grad_norm": 0.15133197605609894, "learning_rate": 2.8667440651713173e-07, "loss": 0.2799, "step": 17624 }, { "epoch": 2.8700891584904125, "grad_norm": 0.22509774565696716, "learning_rate": 2.859595370015261e-07, "loss": 0.2803, "step": 17625 }, { "epoch": 2.8702520050482434, "grad_norm": 0.16782523691654205, "learning_rate": 2.852455547940225e-07, "loss": 0.3266, "step": 17626 }, { "epoch": 2.870414851606074, "grad_norm": 0.17146523296833038, "learning_rate": 2.8453245992024756e-07, "loss": 0.2748, "step": 17627 }, { "epoch": 2.8705776981639053, "grad_norm": 0.15954098105430603, "learning_rate": 2.838202524058059e-07, "loss": 0.2616, "step": 17628 }, { "epoch": 2.870740544721736, "grad_norm": 0.18696482479572296, "learning_rate": 2.831089322762687e-07, "loss": 0.2512, "step": 17629 }, { "epoch": 2.8709033912795667, "grad_norm": 0.1725226640701294, "learning_rate": 2.823984995571766e-07, "loss": 0.2385, "step": 17630 }, { "epoch": 2.8710662378373977, "grad_norm": 0.14023703336715698, "learning_rate": 2.816889542740342e-07, "loss": 0.2978, "step": 17631 }, { "epoch": 2.8712290843952286, "grad_norm": 0.16515806317329407, "learning_rate": 2.8098029645231563e-07, "loss": 0.259, "step": 17632 }, { "epoch": 2.8713919309530596, "grad_norm": 0.19176121056079865, "learning_rate": 2.8027252611746437e-07, "loss": 0.3131, "step": 17633 }, { "epoch": 2.8715547775108905, "grad_norm": 0.1914045810699463, "learning_rate": 2.7956564329489063e-07, "loss": 0.2671, "step": 17634 }, { "epoch": 2.8717176240687214, "grad_norm": 0.1353704035282135, "learning_rate": 2.78859648009977e-07, "loss": 0.2383, "step": 17635 }, { "epoch": 2.871880470626552, "grad_norm": 0.19097302854061127, "learning_rate": 2.78154540288067e-07, "loss": 0.2915, "step": 17636 }, { "epoch": 2.872043317184383, "grad_norm": 0.19522114098072052, "learning_rate": 2.7745032015447926e-07, "loss": 0.2657, "step": 17637 }, { "epoch": 2.872206163742214, "grad_norm": 0.20865881443023682, "learning_rate": 2.7674698763449636e-07, "loss": 0.2606, "step": 17638 }, { "epoch": 2.8723690103000448, "grad_norm": 0.18114064633846283, "learning_rate": 2.7604454275336756e-07, "loss": 0.2833, "step": 17639 }, { "epoch": 2.8725318568578757, "grad_norm": 0.20342840254306793, "learning_rate": 2.753429855363143e-07, "loss": 0.286, "step": 17640 }, { "epoch": 2.8726947034157067, "grad_norm": 0.15889112651348114, "learning_rate": 2.7464231600852487e-07, "loss": 0.2793, "step": 17641 }, { "epoch": 2.8728575499735376, "grad_norm": 0.1906178742647171, "learning_rate": 2.739425341951568e-07, "loss": 0.28, "step": 17642 }, { "epoch": 2.873020396531368, "grad_norm": 0.19504034519195557, "learning_rate": 2.7324364012133175e-07, "loss": 0.2568, "step": 17643 }, { "epoch": 2.8731832430891995, "grad_norm": 0.17911851406097412, "learning_rate": 2.725456338121435e-07, "loss": 0.2902, "step": 17644 }, { "epoch": 2.87334608964703, "grad_norm": 0.21156203746795654, "learning_rate": 2.7184851529265253e-07, "loss": 0.2502, "step": 17645 }, { "epoch": 2.873508936204861, "grad_norm": 0.16912537813186646, "learning_rate": 2.711522845878889e-07, "loss": 0.2904, "step": 17646 }, { "epoch": 2.873671782762692, "grad_norm": 0.17045670747756958, "learning_rate": 2.704569417228464e-07, "loss": 0.2628, "step": 17647 }, { "epoch": 2.873834629320523, "grad_norm": 0.18189622461795807, "learning_rate": 2.697624867224913e-07, "loss": 0.2526, "step": 17648 }, { "epoch": 2.8739974758783537, "grad_norm": 0.14390617609024048, "learning_rate": 2.6906891961175627e-07, "loss": 0.2411, "step": 17649 }, { "epoch": 2.8741603224361842, "grad_norm": 0.18675917387008667, "learning_rate": 2.6837624041554377e-07, "loss": 0.2768, "step": 17650 }, { "epoch": 2.8743231689940156, "grad_norm": 0.19287370145320892, "learning_rate": 2.676844491587227e-07, "loss": 0.2635, "step": 17651 }, { "epoch": 2.874486015551846, "grad_norm": 0.19102972745895386, "learning_rate": 2.669935458661288e-07, "loss": 0.2801, "step": 17652 }, { "epoch": 2.874648862109677, "grad_norm": 0.1555243730545044, "learning_rate": 2.6630353056256994e-07, "loss": 0.2387, "step": 17653 }, { "epoch": 2.874811708667508, "grad_norm": 0.17810380458831787, "learning_rate": 2.656144032728153e-07, "loss": 0.2803, "step": 17654 }, { "epoch": 2.874974555225339, "grad_norm": 0.18621249496936798, "learning_rate": 2.649261640216144e-07, "loss": 0.3265, "step": 17655 }, { "epoch": 2.87513740178317, "grad_norm": 0.1789543330669403, "learning_rate": 2.642388128336698e-07, "loss": 0.2567, "step": 17656 }, { "epoch": 2.875300248341001, "grad_norm": 0.1636045277118683, "learning_rate": 2.6355234973366447e-07, "loss": 0.2707, "step": 17657 }, { "epoch": 2.875463094898832, "grad_norm": 0.216147780418396, "learning_rate": 2.628667747462371e-07, "loss": 0.2845, "step": 17658 }, { "epoch": 2.8756259414566623, "grad_norm": 0.1662359982728958, "learning_rate": 2.621820878960124e-07, "loss": 0.2376, "step": 17659 }, { "epoch": 2.8757887880144932, "grad_norm": 0.1996295154094696, "learning_rate": 2.6149828920756515e-07, "loss": 0.2604, "step": 17660 }, { "epoch": 2.875951634572324, "grad_norm": 0.1651531159877777, "learning_rate": 2.6081537870544513e-07, "loss": 0.261, "step": 17661 }, { "epoch": 2.876114481130155, "grad_norm": 0.1667405217885971, "learning_rate": 2.6013335641417723e-07, "loss": 0.2283, "step": 17662 }, { "epoch": 2.876277327687986, "grad_norm": 0.19466198980808258, "learning_rate": 2.5945222235824186e-07, "loss": 0.2737, "step": 17663 }, { "epoch": 2.876440174245817, "grad_norm": 0.1868138164281845, "learning_rate": 2.5877197656209996e-07, "loss": 0.3061, "step": 17664 }, { "epoch": 2.876603020803648, "grad_norm": 0.1721695214509964, "learning_rate": 2.5809261905016534e-07, "loss": 0.2334, "step": 17665 }, { "epoch": 2.8767658673614784, "grad_norm": 0.1852407306432724, "learning_rate": 2.5741414984683796e-07, "loss": 0.2545, "step": 17666 }, { "epoch": 2.8769287139193094, "grad_norm": 0.18259483575820923, "learning_rate": 2.5673656897647615e-07, "loss": 0.2472, "step": 17667 }, { "epoch": 2.8770915604771403, "grad_norm": 0.2040420025587082, "learning_rate": 2.5605987646339924e-07, "loss": 0.2992, "step": 17668 }, { "epoch": 2.8772544070349713, "grad_norm": 0.20069517195224762, "learning_rate": 2.553840723319101e-07, "loss": 0.2733, "step": 17669 }, { "epoch": 2.877417253592802, "grad_norm": 0.21758249402046204, "learning_rate": 2.5470915660626705e-07, "loss": 0.2729, "step": 17670 }, { "epoch": 2.877580100150633, "grad_norm": 0.15219251811504364, "learning_rate": 2.5403512931070627e-07, "loss": 0.2515, "step": 17671 }, { "epoch": 2.877742946708464, "grad_norm": 0.20543010532855988, "learning_rate": 2.533619904694251e-07, "loss": 0.2804, "step": 17672 }, { "epoch": 2.8779057932662946, "grad_norm": 0.21185992658138275, "learning_rate": 2.526897401065903e-07, "loss": 0.2742, "step": 17673 }, { "epoch": 2.878068639824126, "grad_norm": 0.1718752533197403, "learning_rate": 2.520183782463409e-07, "loss": 0.271, "step": 17674 }, { "epoch": 2.8782314863819565, "grad_norm": 0.16842298209667206, "learning_rate": 2.5134790491277705e-07, "loss": 0.2887, "step": 17675 }, { "epoch": 2.8783943329397874, "grad_norm": 0.14346462488174438, "learning_rate": 2.5067832012997394e-07, "loss": 0.2531, "step": 17676 }, { "epoch": 2.8785571794976184, "grad_norm": 0.16027647256851196, "learning_rate": 2.500096239219679e-07, "loss": 0.2375, "step": 17677 }, { "epoch": 2.8787200260554493, "grad_norm": 0.17608681321144104, "learning_rate": 2.493418163127703e-07, "loss": 0.2608, "step": 17678 }, { "epoch": 2.8788828726132802, "grad_norm": 0.17396287620067596, "learning_rate": 2.486748973263592e-07, "loss": 0.2325, "step": 17679 }, { "epoch": 2.879045719171111, "grad_norm": 0.14213815331459045, "learning_rate": 2.480088669866709e-07, "loss": 0.229, "step": 17680 }, { "epoch": 2.879208565728942, "grad_norm": 0.1737702190876007, "learning_rate": 2.4734372531762804e-07, "loss": 0.2252, "step": 17681 }, { "epoch": 2.8793714122867726, "grad_norm": 0.17014998197555542, "learning_rate": 2.4667947234310316e-07, "loss": 0.2676, "step": 17682 }, { "epoch": 2.8795342588446036, "grad_norm": 0.16166846454143524, "learning_rate": 2.460161080869494e-07, "loss": 0.2663, "step": 17683 }, { "epoch": 2.8796971054024345, "grad_norm": 0.14971886575222015, "learning_rate": 2.4535363257298105e-07, "loss": 0.2458, "step": 17684 }, { "epoch": 2.8798599519602655, "grad_norm": 0.16278859972953796, "learning_rate": 2.446920458249874e-07, "loss": 0.2533, "step": 17685 }, { "epoch": 2.8800227985180964, "grad_norm": 0.2165241539478302, "learning_rate": 2.440313478667161e-07, "loss": 0.2597, "step": 17686 }, { "epoch": 2.8801856450759273, "grad_norm": 0.16727003455162048, "learning_rate": 2.433715387218871e-07, "loss": 0.2507, "step": 17687 }, { "epoch": 2.8803484916337583, "grad_norm": 0.18813344836235046, "learning_rate": 2.427126184141981e-07, "loss": 0.2389, "step": 17688 }, { "epoch": 2.8805113381915888, "grad_norm": 0.1742069125175476, "learning_rate": 2.420545869672969e-07, "loss": 0.2477, "step": 17689 }, { "epoch": 2.8806741847494197, "grad_norm": 0.16249850392341614, "learning_rate": 2.4139744440481724e-07, "loss": 0.2622, "step": 17690 }, { "epoch": 2.8808370313072507, "grad_norm": 0.18778806924819946, "learning_rate": 2.407411907503432e-07, "loss": 0.2816, "step": 17691 }, { "epoch": 2.8809998778650816, "grad_norm": 0.21254253387451172, "learning_rate": 2.400858260274447e-07, "loss": 0.3147, "step": 17692 }, { "epoch": 2.8811627244229125, "grad_norm": 0.20569875836372375, "learning_rate": 2.394313502596446e-07, "loss": 0.2539, "step": 17693 }, { "epoch": 2.8813255709807435, "grad_norm": 0.16205406188964844, "learning_rate": 2.3877776347044634e-07, "loss": 0.2508, "step": 17694 }, { "epoch": 2.8814884175385744, "grad_norm": 0.16833184659481049, "learning_rate": 2.3812506568331172e-07, "loss": 0.2469, "step": 17695 }, { "epoch": 2.881651264096405, "grad_norm": 0.1732691079378128, "learning_rate": 2.3747325692167476e-07, "loss": 0.3051, "step": 17696 }, { "epoch": 2.8818141106542363, "grad_norm": 0.18039138615131378, "learning_rate": 2.3682233720894175e-07, "loss": 0.2417, "step": 17697 }, { "epoch": 2.881976957212067, "grad_norm": 0.17999449372291565, "learning_rate": 2.3617230656847455e-07, "loss": 0.2534, "step": 17698 }, { "epoch": 2.8821398037698978, "grad_norm": 0.16353261470794678, "learning_rate": 2.3552316502362116e-07, "loss": 0.2974, "step": 17699 }, { "epoch": 2.8823026503277287, "grad_norm": 0.18218907713890076, "learning_rate": 2.3487491259767957e-07, "loss": 0.2936, "step": 17700 }, { "epoch": 2.8824654968855596, "grad_norm": 0.1932162344455719, "learning_rate": 2.3422754931392566e-07, "loss": 0.2585, "step": 17701 }, { "epoch": 2.8826283434433906, "grad_norm": 0.14870616793632507, "learning_rate": 2.3358107519560467e-07, "loss": 0.2865, "step": 17702 }, { "epoch": 2.882791190001221, "grad_norm": 0.1795320063829422, "learning_rate": 2.3293549026592588e-07, "loss": 0.2955, "step": 17703 }, { "epoch": 2.8829540365590525, "grad_norm": 0.17050231993198395, "learning_rate": 2.322907945480679e-07, "loss": 0.2858, "step": 17704 }, { "epoch": 2.883116883116883, "grad_norm": 0.22614967823028564, "learning_rate": 2.3164698806517338e-07, "loss": 0.2522, "step": 17705 }, { "epoch": 2.883279729674714, "grad_norm": 0.22032558917999268, "learning_rate": 2.3100407084036268e-07, "loss": 0.315, "step": 17706 }, { "epoch": 2.883442576232545, "grad_norm": 0.18831580877304077, "learning_rate": 2.3036204289671183e-07, "loss": 0.2684, "step": 17707 }, { "epoch": 2.883605422790376, "grad_norm": 0.18798844516277313, "learning_rate": 2.297209042572801e-07, "loss": 0.2521, "step": 17708 }, { "epoch": 2.8837682693482067, "grad_norm": 0.1843399703502655, "learning_rate": 2.2908065494507968e-07, "loss": 0.2635, "step": 17709 }, { "epoch": 2.8839311159060377, "grad_norm": 0.20851649343967438, "learning_rate": 2.2844129498309773e-07, "loss": 0.281, "step": 17710 }, { "epoch": 2.8840939624638686, "grad_norm": 0.21693117916584015, "learning_rate": 2.2780282439429367e-07, "loss": 0.2483, "step": 17711 }, { "epoch": 2.884256809021699, "grad_norm": 0.17145167291164398, "learning_rate": 2.2716524320158528e-07, "loss": 0.2654, "step": 17712 }, { "epoch": 2.88441965557953, "grad_norm": 0.1682845950126648, "learning_rate": 2.265285514278681e-07, "loss": 0.2658, "step": 17713 }, { "epoch": 2.884582502137361, "grad_norm": 0.18764273822307587, "learning_rate": 2.258927490959989e-07, "loss": 0.239, "step": 17714 }, { "epoch": 2.884745348695192, "grad_norm": 0.21087205410003662, "learning_rate": 2.252578362288038e-07, "loss": 0.2431, "step": 17715 }, { "epoch": 2.884908195253023, "grad_norm": 0.18427257239818573, "learning_rate": 2.246238128490813e-07, "loss": 0.257, "step": 17716 }, { "epoch": 2.885071041810854, "grad_norm": 0.18536101281642914, "learning_rate": 2.239906789795909e-07, "loss": 0.2404, "step": 17717 }, { "epoch": 2.8852338883686848, "grad_norm": 0.2001621127128601, "learning_rate": 2.2335843464306727e-07, "loss": 0.3275, "step": 17718 }, { "epoch": 2.8853967349265153, "grad_norm": 0.1466934233903885, "learning_rate": 2.2272707986220887e-07, "loss": 0.2633, "step": 17719 }, { "epoch": 2.885559581484346, "grad_norm": 0.18911682069301605, "learning_rate": 2.2209661465968367e-07, "loss": 0.2557, "step": 17720 }, { "epoch": 2.885722428042177, "grad_norm": 0.21187010407447815, "learning_rate": 2.2146703905812638e-07, "loss": 0.2621, "step": 17721 }, { "epoch": 2.885885274600008, "grad_norm": 0.1522437483072281, "learning_rate": 2.2083835308013834e-07, "loss": 0.2768, "step": 17722 }, { "epoch": 2.886048121157839, "grad_norm": 0.1517256796360016, "learning_rate": 2.2021055674829595e-07, "loss": 0.2401, "step": 17723 }, { "epoch": 2.88621096771567, "grad_norm": 0.22331328690052032, "learning_rate": 2.195836500851367e-07, "loss": 0.2905, "step": 17724 }, { "epoch": 2.886373814273501, "grad_norm": 0.2078131139278412, "learning_rate": 2.1895763311316764e-07, "loss": 0.2478, "step": 17725 }, { "epoch": 2.8865366608313314, "grad_norm": 0.1919635385274887, "learning_rate": 2.1833250585486798e-07, "loss": 0.2913, "step": 17726 }, { "epoch": 2.886699507389163, "grad_norm": 0.22804808616638184, "learning_rate": 2.177082683326753e-07, "loss": 0.2598, "step": 17727 }, { "epoch": 2.8868623539469933, "grad_norm": 0.1707768738269806, "learning_rate": 2.1708492056900787e-07, "loss": 0.2739, "step": 17728 }, { "epoch": 2.8870252005048243, "grad_norm": 0.1264285296201706, "learning_rate": 2.1646246258624215e-07, "loss": 0.2482, "step": 17729 }, { "epoch": 2.887188047062655, "grad_norm": 0.1850917488336563, "learning_rate": 2.1584089440672973e-07, "loss": 0.3031, "step": 17730 }, { "epoch": 2.887350893620486, "grad_norm": 0.1529558002948761, "learning_rate": 2.1522021605278331e-07, "loss": 0.2736, "step": 17731 }, { "epoch": 2.887513740178317, "grad_norm": 0.16467751562595367, "learning_rate": 2.1460042754668787e-07, "loss": 0.2343, "step": 17732 }, { "epoch": 2.887676586736148, "grad_norm": 0.14734819531440735, "learning_rate": 2.1398152891069502e-07, "loss": 0.2614, "step": 17733 }, { "epoch": 2.887839433293979, "grad_norm": 0.17167715728282928, "learning_rate": 2.1336352016702866e-07, "loss": 0.2648, "step": 17734 }, { "epoch": 2.8880022798518095, "grad_norm": 0.21759262681007385, "learning_rate": 2.1274640133787384e-07, "loss": 0.2458, "step": 17735 }, { "epoch": 2.8881651264096404, "grad_norm": 0.13132788240909576, "learning_rate": 2.121301724453878e-07, "loss": 0.2539, "step": 17736 }, { "epoch": 2.8883279729674713, "grad_norm": 0.1568693369626999, "learning_rate": 2.1151483351169454e-07, "loss": 0.2686, "step": 17737 }, { "epoch": 2.8884908195253023, "grad_norm": 0.18069030344486237, "learning_rate": 2.1090038455888749e-07, "loss": 0.2616, "step": 17738 }, { "epoch": 2.8886536660831332, "grad_norm": 0.19528119266033173, "learning_rate": 2.1028682560902678e-07, "loss": 0.2755, "step": 17739 }, { "epoch": 2.888816512640964, "grad_norm": 0.16860145330429077, "learning_rate": 2.0967415668413926e-07, "loss": 0.2307, "step": 17740 }, { "epoch": 2.888979359198795, "grad_norm": 0.1381574422121048, "learning_rate": 2.0906237780622673e-07, "loss": 0.2687, "step": 17741 }, { "epoch": 2.8891422057566256, "grad_norm": 0.20075392723083496, "learning_rate": 2.0845148899724666e-07, "loss": 0.2822, "step": 17742 }, { "epoch": 2.8893050523144566, "grad_norm": 0.1572137475013733, "learning_rate": 2.0784149027913703e-07, "loss": 0.2677, "step": 17743 }, { "epoch": 2.8894678988722875, "grad_norm": 0.1856735646724701, "learning_rate": 2.07232381673797e-07, "loss": 0.2513, "step": 17744 }, { "epoch": 2.8896307454301184, "grad_norm": 0.13221997022628784, "learning_rate": 2.0662416320309795e-07, "loss": 0.2644, "step": 17745 }, { "epoch": 2.8897935919879494, "grad_norm": 0.1554349660873413, "learning_rate": 2.0601683488887247e-07, "loss": 0.3137, "step": 17746 }, { "epoch": 2.8899564385457803, "grad_norm": 0.18812701106071472, "learning_rate": 2.0541039675292528e-07, "loss": 0.2839, "step": 17747 }, { "epoch": 2.8901192851036113, "grad_norm": 0.1726892590522766, "learning_rate": 2.0480484881703344e-07, "loss": 0.296, "step": 17748 }, { "epoch": 2.8902821316614418, "grad_norm": 0.1905289888381958, "learning_rate": 2.042001911029351e-07, "loss": 0.2648, "step": 17749 }, { "epoch": 2.890444978219273, "grad_norm": 0.19851379096508026, "learning_rate": 2.0359642363234344e-07, "loss": 0.2622, "step": 17750 }, { "epoch": 2.8906078247771037, "grad_norm": 0.1489008516073227, "learning_rate": 2.0299354642693003e-07, "loss": 0.2655, "step": 17751 }, { "epoch": 2.8907706713349346, "grad_norm": 0.18184097111225128, "learning_rate": 2.0239155950833866e-07, "loss": 0.2647, "step": 17752 }, { "epoch": 2.8909335178927655, "grad_norm": 0.16322390735149384, "learning_rate": 2.0179046289819092e-07, "loss": 0.2233, "step": 17753 }, { "epoch": 2.8910963644505965, "grad_norm": 0.1628393828868866, "learning_rate": 2.0119025661806125e-07, "loss": 0.2859, "step": 17754 }, { "epoch": 2.8912592110084274, "grad_norm": 0.1964350789785385, "learning_rate": 2.005909406895018e-07, "loss": 0.2563, "step": 17755 }, { "epoch": 2.891422057566258, "grad_norm": 0.16628728806972504, "learning_rate": 1.9999251513402594e-07, "loss": 0.262, "step": 17756 }, { "epoch": 2.8915849041240893, "grad_norm": 0.15932480990886688, "learning_rate": 1.9939497997312206e-07, "loss": 0.2577, "step": 17757 }, { "epoch": 2.89174775068192, "grad_norm": 0.21115073561668396, "learning_rate": 1.987983352282452e-07, "loss": 0.2772, "step": 17758 }, { "epoch": 2.8919105972397507, "grad_norm": 0.17711327970027924, "learning_rate": 1.9820258092081434e-07, "loss": 0.2346, "step": 17759 }, { "epoch": 2.8920734437975817, "grad_norm": 0.17918352782726288, "learning_rate": 1.9760771707221515e-07, "loss": 0.2542, "step": 17760 }, { "epoch": 2.8922362903554126, "grad_norm": 0.2059723436832428, "learning_rate": 1.9701374370381388e-07, "loss": 0.3028, "step": 17761 }, { "epoch": 2.8923991369132436, "grad_norm": 0.1766074299812317, "learning_rate": 1.9642066083692956e-07, "loss": 0.2624, "step": 17762 }, { "epoch": 2.8925619834710745, "grad_norm": 0.17019568383693695, "learning_rate": 1.9582846849285353e-07, "loss": 0.2572, "step": 17763 }, { "epoch": 2.8927248300289055, "grad_norm": 0.12966743111610413, "learning_rate": 1.9523716669285486e-07, "loss": 0.2463, "step": 17764 }, { "epoch": 2.892887676586736, "grad_norm": 0.1447702944278717, "learning_rate": 1.9464675545815824e-07, "loss": 0.245, "step": 17765 }, { "epoch": 2.893050523144567, "grad_norm": 0.20937004685401917, "learning_rate": 1.9405723480996062e-07, "loss": 0.2848, "step": 17766 }, { "epoch": 2.893213369702398, "grad_norm": 0.17027537524700165, "learning_rate": 1.9346860476942841e-07, "loss": 0.2384, "step": 17767 }, { "epoch": 2.893376216260229, "grad_norm": 0.18381398916244507, "learning_rate": 1.9288086535769744e-07, "loss": 0.2723, "step": 17768 }, { "epoch": 2.8935390628180597, "grad_norm": 0.16732117533683777, "learning_rate": 1.9229401659586478e-07, "loss": 0.2815, "step": 17769 }, { "epoch": 2.8937019093758907, "grad_norm": 0.15760932862758636, "learning_rate": 1.9170805850500518e-07, "loss": 0.2752, "step": 17770 }, { "epoch": 2.8938647559337216, "grad_norm": 0.18836519122123718, "learning_rate": 1.9112299110615462e-07, "loss": 0.3239, "step": 17771 }, { "epoch": 2.894027602491552, "grad_norm": 0.17970439791679382, "learning_rate": 1.9053881442031574e-07, "loss": 0.2805, "step": 17772 }, { "epoch": 2.8941904490493835, "grad_norm": 0.16707615554332733, "learning_rate": 1.899555284684662e-07, "loss": 0.2959, "step": 17773 }, { "epoch": 2.894353295607214, "grad_norm": 0.20187678933143616, "learning_rate": 1.8937313327154483e-07, "loss": 0.25, "step": 17774 }, { "epoch": 2.894516142165045, "grad_norm": 0.16141992807388306, "learning_rate": 1.8879162885046265e-07, "loss": 0.2482, "step": 17775 }, { "epoch": 2.894678988722876, "grad_norm": 0.12537969648838043, "learning_rate": 1.882110152261002e-07, "loss": 0.2427, "step": 17776 }, { "epoch": 2.894841835280707, "grad_norm": 0.1803325116634369, "learning_rate": 1.8763129241929634e-07, "loss": 0.266, "step": 17777 }, { "epoch": 2.8950046818385378, "grad_norm": 0.17160676419734955, "learning_rate": 1.8705246045087056e-07, "loss": 0.2428, "step": 17778 }, { "epoch": 2.8951675283963683, "grad_norm": 0.22205233573913574, "learning_rate": 1.8647451934160342e-07, "loss": 0.2634, "step": 17779 }, { "epoch": 2.8953303749541996, "grad_norm": 0.1334187537431717, "learning_rate": 1.8589746911224504e-07, "loss": 0.26, "step": 17780 }, { "epoch": 2.89549322151203, "grad_norm": 0.2082894742488861, "learning_rate": 1.8532130978350937e-07, "loss": 0.2597, "step": 17781 }, { "epoch": 2.895656068069861, "grad_norm": 0.16425086557865143, "learning_rate": 1.847460413760882e-07, "loss": 0.2545, "step": 17782 }, { "epoch": 2.895818914627692, "grad_norm": 0.1744047999382019, "learning_rate": 1.8417166391063444e-07, "loss": 0.2892, "step": 17783 }, { "epoch": 2.895981761185523, "grad_norm": 0.17421142756938934, "learning_rate": 1.835981774077622e-07, "loss": 0.2415, "step": 17784 }, { "epoch": 2.896144607743354, "grad_norm": 0.1353888213634491, "learning_rate": 1.8302558188807162e-07, "loss": 0.2187, "step": 17785 }, { "epoch": 2.896307454301185, "grad_norm": 0.17473842203617096, "learning_rate": 1.824538773721157e-07, "loss": 0.2497, "step": 17786 }, { "epoch": 2.896470300859016, "grad_norm": 0.216492161154747, "learning_rate": 1.8188306388041976e-07, "loss": 0.266, "step": 17787 }, { "epoch": 2.8966331474168463, "grad_norm": 0.16243602335453033, "learning_rate": 1.8131314143348122e-07, "loss": 0.2675, "step": 17788 }, { "epoch": 2.8967959939746772, "grad_norm": 0.18015246093273163, "learning_rate": 1.8074411005175874e-07, "loss": 0.2787, "step": 17789 }, { "epoch": 2.896958840532508, "grad_norm": 0.1984226554632187, "learning_rate": 1.8017596975568318e-07, "loss": 0.2419, "step": 17790 }, { "epoch": 2.897121687090339, "grad_norm": 0.17992162704467773, "learning_rate": 1.7960872056565214e-07, "loss": 0.2869, "step": 17791 }, { "epoch": 2.89728453364817, "grad_norm": 0.17829400300979614, "learning_rate": 1.7904236250203265e-07, "loss": 0.2471, "step": 17792 }, { "epoch": 2.897447380206001, "grad_norm": 0.24530930817127228, "learning_rate": 1.7847689558515567e-07, "loss": 0.3208, "step": 17793 }, { "epoch": 2.897610226763832, "grad_norm": 0.17323900759220123, "learning_rate": 1.779123198353272e-07, "loss": 0.2545, "step": 17794 }, { "epoch": 2.8977730733216625, "grad_norm": 0.25539425015449524, "learning_rate": 1.773486352728143e-07, "loss": 0.3106, "step": 17795 }, { "epoch": 2.8979359198794934, "grad_norm": 0.19601291418075562, "learning_rate": 1.767858419178564e-07, "loss": 0.2784, "step": 17796 }, { "epoch": 2.8980987664373243, "grad_norm": 0.1946970522403717, "learning_rate": 1.7622393979065954e-07, "loss": 0.3011, "step": 17797 }, { "epoch": 2.8982616129951553, "grad_norm": 0.19700448215007782, "learning_rate": 1.7566292891139923e-07, "loss": 0.2906, "step": 17798 }, { "epoch": 2.898424459552986, "grad_norm": 0.18947787582874298, "learning_rate": 1.7510280930021217e-07, "loss": 0.2592, "step": 17799 }, { "epoch": 2.898587306110817, "grad_norm": 0.19168777763843536, "learning_rate": 1.745435809772128e-07, "loss": 0.3048, "step": 17800 }, { "epoch": 2.898750152668648, "grad_norm": 0.154625803232193, "learning_rate": 1.739852439624795e-07, "loss": 0.2836, "step": 17801 }, { "epoch": 2.8989129992264786, "grad_norm": 0.14998547732830048, "learning_rate": 1.734277982760546e-07, "loss": 0.2692, "step": 17802 }, { "epoch": 2.89907584578431, "grad_norm": 0.16818012297153473, "learning_rate": 1.728712439379526e-07, "loss": 0.2679, "step": 17803 }, { "epoch": 2.8992386923421405, "grad_norm": 0.1674560159444809, "learning_rate": 1.7231558096816036e-07, "loss": 0.2861, "step": 17804 }, { "epoch": 2.8994015388999714, "grad_norm": 0.15340974926948547, "learning_rate": 1.7176080938662297e-07, "loss": 0.2597, "step": 17805 }, { "epoch": 2.8995643854578024, "grad_norm": 0.1270681917667389, "learning_rate": 1.7120692921326065e-07, "loss": 0.2623, "step": 17806 }, { "epoch": 2.8997272320156333, "grad_norm": 0.1830175668001175, "learning_rate": 1.7065394046795747e-07, "loss": 0.2753, "step": 17807 }, { "epoch": 2.8998900785734643, "grad_norm": 0.18352951109409332, "learning_rate": 1.7010184317056976e-07, "loss": 0.2669, "step": 17808 }, { "epoch": 2.900052925131295, "grad_norm": 0.1803596317768097, "learning_rate": 1.695506373409178e-07, "loss": 0.2758, "step": 17809 }, { "epoch": 2.900215771689126, "grad_norm": 0.16741326451301575, "learning_rate": 1.690003229987941e-07, "loss": 0.2829, "step": 17810 }, { "epoch": 2.9003786182469566, "grad_norm": 0.2556489109992981, "learning_rate": 1.6845090016394949e-07, "loss": 0.2761, "step": 17811 }, { "epoch": 2.9005414648047876, "grad_norm": 0.15746496617794037, "learning_rate": 1.679023688561182e-07, "loss": 0.2896, "step": 17812 }, { "epoch": 2.9007043113626185, "grad_norm": 0.17365692555904388, "learning_rate": 1.6735472909499284e-07, "loss": 0.2775, "step": 17813 }, { "epoch": 2.9008671579204495, "grad_norm": 0.18866609036922455, "learning_rate": 1.6680798090022987e-07, "loss": 0.2555, "step": 17814 }, { "epoch": 2.9010300044782804, "grad_norm": 0.21883997321128845, "learning_rate": 1.662621242914636e-07, "loss": 0.2477, "step": 17815 }, { "epoch": 2.9011928510361114, "grad_norm": 0.1753060221672058, "learning_rate": 1.6571715928828946e-07, "loss": 0.287, "step": 17816 }, { "epoch": 2.9013556975939423, "grad_norm": 0.1471451222896576, "learning_rate": 1.6517308591027792e-07, "loss": 0.2702, "step": 17817 }, { "epoch": 2.901518544151773, "grad_norm": 0.16181409358978271, "learning_rate": 1.64629904176955e-07, "loss": 0.2808, "step": 17818 }, { "epoch": 2.9016813907096037, "grad_norm": 0.22022639214992523, "learning_rate": 1.6408761410783292e-07, "loss": 0.2394, "step": 17819 }, { "epoch": 2.9018442372674347, "grad_norm": 0.1619419902563095, "learning_rate": 1.6354621572237382e-07, "loss": 0.2822, "step": 17820 }, { "epoch": 2.9020070838252656, "grad_norm": 0.20600903034210205, "learning_rate": 1.63005709040015e-07, "loss": 0.247, "step": 17821 }, { "epoch": 2.9021699303830966, "grad_norm": 0.17725726962089539, "learning_rate": 1.624660940801659e-07, "loss": 0.2954, "step": 17822 }, { "epoch": 2.9023327769409275, "grad_norm": 0.1856337934732437, "learning_rate": 1.6192737086220266e-07, "loss": 0.2685, "step": 17823 }, { "epoch": 2.9024956234987584, "grad_norm": 0.17452870309352875, "learning_rate": 1.6138953940545986e-07, "loss": 0.2827, "step": 17824 }, { "epoch": 2.902658470056589, "grad_norm": 0.2081277072429657, "learning_rate": 1.6085259972925258e-07, "loss": 0.2696, "step": 17825 }, { "epoch": 2.9028213166144203, "grad_norm": 0.17834840714931488, "learning_rate": 1.6031655185285432e-07, "loss": 0.2985, "step": 17826 }, { "epoch": 2.902984163172251, "grad_norm": 0.2003364861011505, "learning_rate": 1.5978139579551632e-07, "loss": 0.2543, "step": 17827 }, { "epoch": 2.9031470097300818, "grad_norm": 0.20060531795024872, "learning_rate": 1.59247131576451e-07, "loss": 0.2985, "step": 17828 }, { "epoch": 2.9033098562879127, "grad_norm": 0.18962447345256805, "learning_rate": 1.5871375921483466e-07, "loss": 0.233, "step": 17829 }, { "epoch": 2.9034727028457437, "grad_norm": 0.1816984862089157, "learning_rate": 1.5818127872982424e-07, "loss": 0.2599, "step": 17830 }, { "epoch": 2.9036355494035746, "grad_norm": 0.16649797558784485, "learning_rate": 1.5764969014053221e-07, "loss": 0.2921, "step": 17831 }, { "epoch": 2.903798395961405, "grad_norm": 0.16798900067806244, "learning_rate": 1.5711899346604607e-07, "loss": 0.2776, "step": 17832 }, { "epoch": 2.9039612425192365, "grad_norm": 0.15264251828193665, "learning_rate": 1.5658918872542005e-07, "loss": 0.2646, "step": 17833 }, { "epoch": 2.904124089077067, "grad_norm": 0.19549688696861267, "learning_rate": 1.5606027593767503e-07, "loss": 0.2532, "step": 17834 }, { "epoch": 2.904286935634898, "grad_norm": 0.23172546923160553, "learning_rate": 1.555322551217986e-07, "loss": 0.2473, "step": 17835 }, { "epoch": 2.904449782192729, "grad_norm": 0.25807473063468933, "learning_rate": 1.5500512629675057e-07, "loss": 0.2997, "step": 17836 }, { "epoch": 2.90461262875056, "grad_norm": 0.19304077327251434, "learning_rate": 1.544788894814575e-07, "loss": 0.2725, "step": 17837 }, { "epoch": 2.9047754753083908, "grad_norm": 0.21729305386543274, "learning_rate": 1.539535446948126e-07, "loss": 0.2706, "step": 17838 }, { "epoch": 2.9049383218662217, "grad_norm": 0.17339953780174255, "learning_rate": 1.5342909195567578e-07, "loss": 0.2619, "step": 17839 }, { "epoch": 2.9051011684240526, "grad_norm": 0.16262805461883545, "learning_rate": 1.5290553128287367e-07, "loss": 0.2801, "step": 17840 }, { "epoch": 2.905264014981883, "grad_norm": 0.17962835729122162, "learning_rate": 1.5238286269521063e-07, "loss": 0.2431, "step": 17841 }, { "epoch": 2.905426861539714, "grad_norm": 0.18079139292240143, "learning_rate": 1.5186108621144667e-07, "loss": 0.2508, "step": 17842 }, { "epoch": 2.905589708097545, "grad_norm": 0.1729051172733307, "learning_rate": 1.5134020185031683e-07, "loss": 0.2595, "step": 17843 }, { "epoch": 2.905752554655376, "grad_norm": 0.18523170053958893, "learning_rate": 1.5082020963052e-07, "loss": 0.3118, "step": 17844 }, { "epoch": 2.905915401213207, "grad_norm": 0.18784038722515106, "learning_rate": 1.5030110957073018e-07, "loss": 0.2571, "step": 17845 }, { "epoch": 2.906078247771038, "grad_norm": 0.1890716701745987, "learning_rate": 1.497829016895824e-07, "loss": 0.2918, "step": 17846 }, { "epoch": 2.906241094328869, "grad_norm": 0.18429161608219147, "learning_rate": 1.492655860056813e-07, "loss": 0.3045, "step": 17847 }, { "epoch": 2.9064039408866993, "grad_norm": 0.16847927868366241, "learning_rate": 1.4874916253760086e-07, "loss": 0.2393, "step": 17848 }, { "epoch": 2.9065667874445302, "grad_norm": 0.1447426676750183, "learning_rate": 1.482336313038818e-07, "loss": 0.3025, "step": 17849 }, { "epoch": 2.906729634002361, "grad_norm": 0.1416369378566742, "learning_rate": 1.4771899232303433e-07, "loss": 0.28, "step": 17850 }, { "epoch": 2.906892480560192, "grad_norm": 0.16873657703399658, "learning_rate": 1.4720524561352978e-07, "loss": 0.2724, "step": 17851 }, { "epoch": 2.907055327118023, "grad_norm": 0.17690353095531464, "learning_rate": 1.4669239119382283e-07, "loss": 0.2516, "step": 17852 }, { "epoch": 2.907218173675854, "grad_norm": 0.26805347204208374, "learning_rate": 1.4618042908232099e-07, "loss": 0.3078, "step": 17853 }, { "epoch": 2.907381020233685, "grad_norm": 0.13743442296981812, "learning_rate": 1.4566935929740122e-07, "loss": 0.2867, "step": 17854 }, { "epoch": 2.9075438667915154, "grad_norm": 0.17859026789665222, "learning_rate": 1.4515918185742104e-07, "loss": 0.2516, "step": 17855 }, { "epoch": 2.907706713349347, "grad_norm": 0.1872246414422989, "learning_rate": 1.4464989678069363e-07, "loss": 0.2746, "step": 17856 }, { "epoch": 2.9078695599071773, "grad_norm": 0.23296065628528595, "learning_rate": 1.4414150408550432e-07, "loss": 0.2633, "step": 17857 }, { "epoch": 2.9080324064650083, "grad_norm": 0.15841113030910492, "learning_rate": 1.436340037901024e-07, "loss": 0.2319, "step": 17858 }, { "epoch": 2.908195253022839, "grad_norm": 0.16338153183460236, "learning_rate": 1.4312739591271219e-07, "loss": 0.2305, "step": 17859 }, { "epoch": 2.90835809958067, "grad_norm": 0.17123156785964966, "learning_rate": 1.426216804715219e-07, "loss": 0.2576, "step": 17860 }, { "epoch": 2.908520946138501, "grad_norm": 0.16432268917560577, "learning_rate": 1.421168574846865e-07, "loss": 0.2085, "step": 17861 }, { "epoch": 2.908683792696332, "grad_norm": 0.2002553939819336, "learning_rate": 1.4161292697033313e-07, "loss": 0.2759, "step": 17862 }, { "epoch": 2.908846639254163, "grad_norm": 0.16103801131248474, "learning_rate": 1.4110988894655285e-07, "loss": 0.2998, "step": 17863 }, { "epoch": 2.9090094858119935, "grad_norm": 0.18545429408550262, "learning_rate": 1.4060774343140903e-07, "loss": 0.2364, "step": 17864 }, { "epoch": 2.9091723323698244, "grad_norm": 0.18210603296756744, "learning_rate": 1.4010649044292613e-07, "loss": 0.2361, "step": 17865 }, { "epoch": 2.9093351789276554, "grad_norm": 0.16817866265773773, "learning_rate": 1.396061299991036e-07, "loss": 0.2448, "step": 17866 }, { "epoch": 2.9094980254854863, "grad_norm": 0.15828552842140198, "learning_rate": 1.3910666211790214e-07, "loss": 0.2741, "step": 17867 }, { "epoch": 2.9096608720433172, "grad_norm": 0.22176088392734528, "learning_rate": 1.3860808681726012e-07, "loss": 0.2433, "step": 17868 }, { "epoch": 2.909823718601148, "grad_norm": 0.180885910987854, "learning_rate": 1.381104041150716e-07, "loss": 0.266, "step": 17869 }, { "epoch": 2.909986565158979, "grad_norm": 0.1762767732143402, "learning_rate": 1.3761361402921113e-07, "loss": 0.2214, "step": 17870 }, { "epoch": 2.9101494117168096, "grad_norm": 0.17195962369441986, "learning_rate": 1.3711771657750894e-07, "loss": 0.2404, "step": 17871 }, { "epoch": 2.9103122582746406, "grad_norm": 0.16438427567481995, "learning_rate": 1.3662271177777297e-07, "loss": 0.2508, "step": 17872 }, { "epoch": 2.9104751048324715, "grad_norm": 0.20849980413913727, "learning_rate": 1.3612859964777235e-07, "loss": 0.2711, "step": 17873 }, { "epoch": 2.9106379513903025, "grad_norm": 0.155149444937706, "learning_rate": 1.3563538020525124e-07, "loss": 0.2658, "step": 17874 }, { "epoch": 2.9108007979481334, "grad_norm": 0.15823233127593994, "learning_rate": 1.3514305346791489e-07, "loss": 0.2661, "step": 17875 }, { "epoch": 2.9109636445059643, "grad_norm": 0.1739356815814972, "learning_rate": 1.3465161945344085e-07, "loss": 0.3111, "step": 17876 }, { "epoch": 2.9111264910637953, "grad_norm": 0.18618708848953247, "learning_rate": 1.3416107817947056e-07, "loss": 0.2528, "step": 17877 }, { "epoch": 2.911289337621626, "grad_norm": 0.20764365792274475, "learning_rate": 1.3367142966362046e-07, "loss": 0.2785, "step": 17878 }, { "epoch": 2.911452184179457, "grad_norm": 0.1896679401397705, "learning_rate": 1.331826739234654e-07, "loss": 0.2817, "step": 17879 }, { "epoch": 2.9116150307372877, "grad_norm": 0.15305732190608978, "learning_rate": 1.3269481097655523e-07, "loss": 0.2569, "step": 17880 }, { "epoch": 2.9117778772951186, "grad_norm": 0.20396788418293, "learning_rate": 1.322078408404065e-07, "loss": 0.2932, "step": 17881 }, { "epoch": 2.9119407238529496, "grad_norm": 0.13620823621749878, "learning_rate": 1.3172176353250244e-07, "loss": 0.2311, "step": 17882 }, { "epoch": 2.9121035704107805, "grad_norm": 0.18331216275691986, "learning_rate": 1.3123657907029297e-07, "loss": 0.2634, "step": 17883 }, { "epoch": 2.9122664169686114, "grad_norm": 0.20081016421318054, "learning_rate": 1.3075228747120029e-07, "loss": 0.2607, "step": 17884 }, { "epoch": 2.912429263526442, "grad_norm": 0.1818314641714096, "learning_rate": 1.3026888875261046e-07, "loss": 0.2664, "step": 17885 }, { "epoch": 2.9125921100842733, "grad_norm": 0.1885581612586975, "learning_rate": 1.2978638293187905e-07, "loss": 0.2707, "step": 17886 }, { "epoch": 2.912754956642104, "grad_norm": 0.1739625632762909, "learning_rate": 1.2930477002632834e-07, "loss": 0.3207, "step": 17887 }, { "epoch": 2.9129178031999348, "grad_norm": 0.1764914095401764, "learning_rate": 1.2882405005325281e-07, "loss": 0.2295, "step": 17888 }, { "epoch": 2.9130806497577657, "grad_norm": 0.1673179715871811, "learning_rate": 1.283442230299109e-07, "loss": 0.2643, "step": 17889 }, { "epoch": 2.9132434963155966, "grad_norm": 0.1831657439470291, "learning_rate": 1.2786528897352768e-07, "loss": 0.2566, "step": 17890 }, { "epoch": 2.9134063428734276, "grad_norm": 0.21686242520809174, "learning_rate": 1.2738724790129774e-07, "loss": 0.2854, "step": 17891 }, { "epoch": 2.9135691894312585, "grad_norm": 0.14073315262794495, "learning_rate": 1.2691009983038793e-07, "loss": 0.2617, "step": 17892 }, { "epoch": 2.9137320359890895, "grad_norm": 0.15464910864830017, "learning_rate": 1.2643384477792898e-07, "loss": 0.2442, "step": 17893 }, { "epoch": 2.91389488254692, "grad_norm": 0.15461920201778412, "learning_rate": 1.259584827610155e-07, "loss": 0.2762, "step": 17894 }, { "epoch": 2.914057729104751, "grad_norm": 0.19252362847328186, "learning_rate": 1.2548401379671727e-07, "loss": 0.2807, "step": 17895 }, { "epoch": 2.914220575662582, "grad_norm": 0.22477561235427856, "learning_rate": 1.2501043790207056e-07, "loss": 0.3063, "step": 17896 }, { "epoch": 2.914383422220413, "grad_norm": 0.19669216871261597, "learning_rate": 1.2453775509407573e-07, "loss": 0.2845, "step": 17897 }, { "epoch": 2.9145462687782437, "grad_norm": 0.19799649715423584, "learning_rate": 1.240659653897025e-07, "loss": 0.2788, "step": 17898 }, { "epoch": 2.9147091153360747, "grad_norm": 0.17712844908237457, "learning_rate": 1.235950688058929e-07, "loss": 0.2949, "step": 17899 }, { "epoch": 2.9148719618939056, "grad_norm": 0.1817813217639923, "learning_rate": 1.2312506535955004e-07, "loss": 0.2691, "step": 17900 }, { "epoch": 2.915034808451736, "grad_norm": 0.16436699032783508, "learning_rate": 1.226559550675549e-07, "loss": 0.2401, "step": 17901 }, { "epoch": 2.9151976550095675, "grad_norm": 0.19944638013839722, "learning_rate": 1.221877379467412e-07, "loss": 0.233, "step": 17902 }, { "epoch": 2.915360501567398, "grad_norm": 0.18401630222797394, "learning_rate": 1.217204140139261e-07, "loss": 0.2728, "step": 17903 }, { "epoch": 2.915523348125229, "grad_norm": 0.19335737824440002, "learning_rate": 1.21253983285885e-07, "loss": 0.2917, "step": 17904 }, { "epoch": 2.91568619468306, "grad_norm": 0.15692853927612305, "learning_rate": 1.2078844577936566e-07, "loss": 0.2389, "step": 17905 }, { "epoch": 2.915849041240891, "grad_norm": 0.1788489669561386, "learning_rate": 1.2032380151107692e-07, "loss": 0.264, "step": 17906 }, { "epoch": 2.916011887798722, "grad_norm": 0.1607067883014679, "learning_rate": 1.1986005049771099e-07, "loss": 0.2807, "step": 17907 }, { "epoch": 2.9161747343565523, "grad_norm": 0.16108548641204834, "learning_rate": 1.1939719275590732e-07, "loss": 0.2598, "step": 17908 }, { "epoch": 2.9163375809143837, "grad_norm": 0.15973034501075745, "learning_rate": 1.1893522830229153e-07, "loss": 0.2699, "step": 17909 }, { "epoch": 2.916500427472214, "grad_norm": 0.19562828540802002, "learning_rate": 1.1847415715344479e-07, "loss": 0.2239, "step": 17910 }, { "epoch": 2.916663274030045, "grad_norm": 0.16087248921394348, "learning_rate": 1.1801397932592607e-07, "loss": 0.2278, "step": 17911 }, { "epoch": 2.916826120587876, "grad_norm": 0.1776711493730545, "learning_rate": 1.175546948362527e-07, "loss": 0.2426, "step": 17912 }, { "epoch": 2.916988967145707, "grad_norm": 0.23529985547065735, "learning_rate": 1.1709630370091429e-07, "loss": 0.2887, "step": 17913 }, { "epoch": 2.917151813703538, "grad_norm": 0.18479083478450775, "learning_rate": 1.1663880593636989e-07, "loss": 0.2805, "step": 17914 }, { "epoch": 2.917314660261369, "grad_norm": 0.17825688421726227, "learning_rate": 1.1618220155904524e-07, "loss": 0.2481, "step": 17915 }, { "epoch": 2.9174775068192, "grad_norm": 0.18338090181350708, "learning_rate": 1.157264905853328e-07, "loss": 0.2718, "step": 17916 }, { "epoch": 2.9176403533770303, "grad_norm": 0.162345752120018, "learning_rate": 1.1527167303159447e-07, "loss": 0.2693, "step": 17917 }, { "epoch": 2.9178031999348613, "grad_norm": 0.21025590598583221, "learning_rate": 1.1481774891416163e-07, "loss": 0.3093, "step": 17918 }, { "epoch": 2.917966046492692, "grad_norm": 0.17564791440963745, "learning_rate": 1.1436471824932959e-07, "loss": 0.2477, "step": 17919 }, { "epoch": 2.918128893050523, "grad_norm": 0.19067934155464172, "learning_rate": 1.1391258105336033e-07, "loss": 0.2368, "step": 17920 }, { "epoch": 2.918291739608354, "grad_norm": 0.2172662764787674, "learning_rate": 1.1346133734249365e-07, "loss": 0.229, "step": 17921 }, { "epoch": 2.918454586166185, "grad_norm": 0.20555584132671356, "learning_rate": 1.130109871329249e-07, "loss": 0.2867, "step": 17922 }, { "epoch": 2.918617432724016, "grad_norm": 0.13978393375873566, "learning_rate": 1.1256153044082451e-07, "loss": 0.2184, "step": 17923 }, { "epoch": 2.9187802792818465, "grad_norm": 0.1518307328224182, "learning_rate": 1.1211296728233234e-07, "loss": 0.2731, "step": 17924 }, { "epoch": 2.9189431258396774, "grad_norm": 0.17359943687915802, "learning_rate": 1.1166529767354938e-07, "loss": 0.2728, "step": 17925 }, { "epoch": 2.9191059723975084, "grad_norm": 0.1407046616077423, "learning_rate": 1.1121852163054891e-07, "loss": 0.2545, "step": 17926 }, { "epoch": 2.9192688189553393, "grad_norm": 0.2027541995048523, "learning_rate": 1.1077263916937364e-07, "loss": 0.2719, "step": 17927 }, { "epoch": 2.9194316655131702, "grad_norm": 0.19341307878494263, "learning_rate": 1.103276503060302e-07, "loss": 0.2464, "step": 17928 }, { "epoch": 2.919594512071001, "grad_norm": 0.16410070657730103, "learning_rate": 1.098835550564975e-07, "loss": 0.2451, "step": 17929 }, { "epoch": 2.919757358628832, "grad_norm": 0.22025027871131897, "learning_rate": 1.0944035343671555e-07, "loss": 0.3313, "step": 17930 }, { "epoch": 2.9199202051866626, "grad_norm": 0.14556241035461426, "learning_rate": 1.0899804546259939e-07, "loss": 0.2745, "step": 17931 }, { "epoch": 2.920083051744494, "grad_norm": 0.212999165058136, "learning_rate": 1.08556631150028e-07, "loss": 0.2553, "step": 17932 }, { "epoch": 2.9202458983023245, "grad_norm": 0.14431673288345337, "learning_rate": 1.081161105148526e-07, "loss": 0.2933, "step": 17933 }, { "epoch": 2.9204087448601554, "grad_norm": 0.17156080901622772, "learning_rate": 1.0767648357288552e-07, "loss": 0.2694, "step": 17934 }, { "epoch": 2.9205715914179864, "grad_norm": 0.21641406416893005, "learning_rate": 1.0723775033991135e-07, "loss": 0.2742, "step": 17935 }, { "epoch": 2.9207344379758173, "grad_norm": 0.20192179083824158, "learning_rate": 1.0679991083168418e-07, "loss": 0.2688, "step": 17936 }, { "epoch": 2.9208972845336483, "grad_norm": 0.21105578541755676, "learning_rate": 1.0636296506392197e-07, "loss": 0.2721, "step": 17937 }, { "epoch": 2.921060131091479, "grad_norm": 0.5537495613098145, "learning_rate": 1.0592691305231217e-07, "loss": 0.301, "step": 17938 }, { "epoch": 2.92122297764931, "grad_norm": 0.20374006032943726, "learning_rate": 1.0549175481250895e-07, "loss": 0.2563, "step": 17939 }, { "epoch": 2.9213858242071407, "grad_norm": 0.21105030179023743, "learning_rate": 1.0505749036013868e-07, "loss": 0.2597, "step": 17940 }, { "epoch": 2.9215486707649716, "grad_norm": 0.1797781139612198, "learning_rate": 1.0462411971079444e-07, "loss": 0.2622, "step": 17941 }, { "epoch": 2.9217115173228025, "grad_norm": 0.2376088947057724, "learning_rate": 1.0419164288003047e-07, "loss": 0.2686, "step": 17942 }, { "epoch": 2.9218743638806335, "grad_norm": 0.18108485639095306, "learning_rate": 1.0376005988337601e-07, "loss": 0.2923, "step": 17943 }, { "epoch": 2.9220372104384644, "grad_norm": 0.12205532938241959, "learning_rate": 1.0332937073632698e-07, "loss": 0.2511, "step": 17944 }, { "epoch": 2.9222000569962954, "grad_norm": 0.18148604035377502, "learning_rate": 1.0289957545434881e-07, "loss": 0.2297, "step": 17945 }, { "epoch": 2.9223629035541263, "grad_norm": 0.18855179846286774, "learning_rate": 1.0247067405286526e-07, "loss": 0.2825, "step": 17946 }, { "epoch": 2.922525750111957, "grad_norm": 0.14315079152584076, "learning_rate": 1.0204266654728067e-07, "loss": 0.2732, "step": 17947 }, { "epoch": 2.9226885966697878, "grad_norm": 0.184539332985878, "learning_rate": 1.0161555295296333e-07, "loss": 0.2719, "step": 17948 }, { "epoch": 2.9228514432276187, "grad_norm": 0.1466420590877533, "learning_rate": 1.011893332852426e-07, "loss": 0.2658, "step": 17949 }, { "epoch": 2.9230142897854496, "grad_norm": 0.13709120452404022, "learning_rate": 1.0076400755942295e-07, "loss": 0.2758, "step": 17950 }, { "epoch": 2.9231771363432806, "grad_norm": 0.17826594412326813, "learning_rate": 1.0033957579077824e-07, "loss": 0.2575, "step": 17951 }, { "epoch": 2.9233399829011115, "grad_norm": 0.20910073816776276, "learning_rate": 9.991603799454352e-08, "loss": 0.2915, "step": 17952 }, { "epoch": 2.9235028294589425, "grad_norm": 0.1600368618965149, "learning_rate": 9.94933941859233e-08, "loss": 0.309, "step": 17953 }, { "epoch": 2.923665676016773, "grad_norm": 0.16208221018314362, "learning_rate": 9.907164438009708e-08, "loss": 0.2435, "step": 17954 }, { "epoch": 2.9238285225746043, "grad_norm": 0.20994573831558228, "learning_rate": 9.865078859220278e-08, "loss": 0.2778, "step": 17955 }, { "epoch": 2.923991369132435, "grad_norm": 0.19428902864456177, "learning_rate": 9.823082683735052e-08, "loss": 0.2481, "step": 17956 }, { "epoch": 2.924154215690266, "grad_norm": 0.2296476513147354, "learning_rate": 9.781175913061991e-08, "loss": 0.2989, "step": 17957 }, { "epoch": 2.9243170622480967, "grad_norm": 0.19490903615951538, "learning_rate": 9.739358548705724e-08, "loss": 0.2847, "step": 17958 }, { "epoch": 2.9244799088059277, "grad_norm": 0.1706024557352066, "learning_rate": 9.697630592167273e-08, "loss": 0.2187, "step": 17959 }, { "epoch": 2.9246427553637586, "grad_norm": 0.1897812783718109, "learning_rate": 9.655992044945162e-08, "loss": 0.2664, "step": 17960 }, { "epoch": 2.924805601921589, "grad_norm": 0.19758233428001404, "learning_rate": 9.614442908534304e-08, "loss": 0.2818, "step": 17961 }, { "epoch": 2.9249684484794205, "grad_norm": 0.1823899745941162, "learning_rate": 9.572983184426288e-08, "loss": 0.291, "step": 17962 }, { "epoch": 2.925131295037251, "grad_norm": 0.18764854967594147, "learning_rate": 9.531612874109641e-08, "loss": 0.238, "step": 17963 }, { "epoch": 2.925294141595082, "grad_norm": 0.15686440467834473, "learning_rate": 9.490331979069567e-08, "loss": 0.2755, "step": 17964 }, { "epoch": 2.925456988152913, "grad_norm": 0.18351882696151733, "learning_rate": 9.449140500788767e-08, "loss": 0.2798, "step": 17965 }, { "epoch": 2.925619834710744, "grad_norm": 0.19019602239131927, "learning_rate": 9.408038440745503e-08, "loss": 0.3048, "step": 17966 }, { "epoch": 2.9257826812685748, "grad_norm": 0.15201173722743988, "learning_rate": 9.367025800415541e-08, "loss": 0.2709, "step": 17967 }, { "epoch": 2.9259455278264057, "grad_norm": 0.1808438003063202, "learning_rate": 9.32610258127159e-08, "loss": 0.2436, "step": 17968 }, { "epoch": 2.9261083743842367, "grad_norm": 0.15310481190681458, "learning_rate": 9.285268784782752e-08, "loss": 0.2587, "step": 17969 }, { "epoch": 2.926271220942067, "grad_norm": 0.19685542583465576, "learning_rate": 9.244524412415357e-08, "loss": 0.2698, "step": 17970 }, { "epoch": 2.926434067499898, "grad_norm": 0.19622942805290222, "learning_rate": 9.203869465631842e-08, "loss": 0.3118, "step": 17971 }, { "epoch": 2.926596914057729, "grad_norm": 0.20978151261806488, "learning_rate": 9.163303945891876e-08, "loss": 0.2893, "step": 17972 }, { "epoch": 2.92675976061556, "grad_norm": 0.13928133249282837, "learning_rate": 9.122827854652349e-08, "loss": 0.2683, "step": 17973 }, { "epoch": 2.926922607173391, "grad_norm": 0.16449123620986938, "learning_rate": 9.082441193365987e-08, "loss": 0.2746, "step": 17974 }, { "epoch": 2.927085453731222, "grad_norm": 0.21136145293712616, "learning_rate": 9.042143963483018e-08, "loss": 0.2657, "step": 17975 }, { "epoch": 2.927248300289053, "grad_norm": 0.18287543952465057, "learning_rate": 9.001936166450341e-08, "loss": 0.2592, "step": 17976 }, { "epoch": 2.9274111468468833, "grad_norm": 0.14439760148525238, "learning_rate": 8.961817803711248e-08, "loss": 0.2906, "step": 17977 }, { "epoch": 2.9275739934047142, "grad_norm": 0.1704351305961609, "learning_rate": 8.921788876706249e-08, "loss": 0.2449, "step": 17978 }, { "epoch": 2.927736839962545, "grad_norm": 0.15425507724285126, "learning_rate": 8.881849386872531e-08, "loss": 0.2459, "step": 17979 }, { "epoch": 2.927899686520376, "grad_norm": 0.15205539762973785, "learning_rate": 8.841999335643947e-08, "loss": 0.2405, "step": 17980 }, { "epoch": 2.928062533078207, "grad_norm": 0.1683875322341919, "learning_rate": 8.802238724451295e-08, "loss": 0.2562, "step": 17981 }, { "epoch": 2.928225379636038, "grad_norm": 0.2042251080274582, "learning_rate": 8.762567554722045e-08, "loss": 0.2617, "step": 17982 }, { "epoch": 2.928388226193869, "grad_norm": 0.17615589499473572, "learning_rate": 8.722985827880892e-08, "loss": 0.262, "step": 17983 }, { "epoch": 2.9285510727516995, "grad_norm": 0.1646536886692047, "learning_rate": 8.683493545348365e-08, "loss": 0.2602, "step": 17984 }, { "epoch": 2.928713919309531, "grad_norm": 0.1727379560470581, "learning_rate": 8.644090708542496e-08, "loss": 0.2754, "step": 17985 }, { "epoch": 2.9288767658673613, "grad_norm": 0.19462114572525024, "learning_rate": 8.604777318878265e-08, "loss": 0.3184, "step": 17986 }, { "epoch": 2.9290396124251923, "grad_norm": 0.1615336686372757, "learning_rate": 8.565553377766767e-08, "loss": 0.2809, "step": 17987 }, { "epoch": 2.9292024589830232, "grad_norm": 0.16069410741329193, "learning_rate": 8.526418886616593e-08, "loss": 0.2613, "step": 17988 }, { "epoch": 2.929365305540854, "grad_norm": 0.19574415683746338, "learning_rate": 8.487373846832736e-08, "loss": 0.2626, "step": 17989 }, { "epoch": 2.929528152098685, "grad_norm": 0.21879011392593384, "learning_rate": 8.448418259816571e-08, "loss": 0.2777, "step": 17990 }, { "epoch": 2.929690998656516, "grad_norm": 0.1919446587562561, "learning_rate": 8.409552126967534e-08, "loss": 0.2476, "step": 17991 }, { "epoch": 2.929853845214347, "grad_norm": 0.19003939628601074, "learning_rate": 8.370775449680346e-08, "loss": 0.2993, "step": 17992 }, { "epoch": 2.9300166917721775, "grad_norm": 0.19381330907344818, "learning_rate": 8.332088229347224e-08, "loss": 0.2248, "step": 17993 }, { "epoch": 2.9301795383300084, "grad_norm": 0.16585996747016907, "learning_rate": 8.293490467357611e-08, "loss": 0.2688, "step": 17994 }, { "epoch": 2.9303423848878394, "grad_norm": 0.22082261741161346, "learning_rate": 8.254982165097069e-08, "loss": 0.2822, "step": 17995 }, { "epoch": 2.9305052314456703, "grad_norm": 0.21827954053878784, "learning_rate": 8.2165633239481e-08, "loss": 0.247, "step": 17996 }, { "epoch": 2.9306680780035013, "grad_norm": 0.16029231250286102, "learning_rate": 8.178233945290159e-08, "loss": 0.2314, "step": 17997 }, { "epoch": 2.930830924561332, "grad_norm": 0.3067714273929596, "learning_rate": 8.139994030499087e-08, "loss": 0.2551, "step": 17998 }, { "epoch": 2.930993771119163, "grad_norm": 0.16754062473773956, "learning_rate": 8.101843580948232e-08, "loss": 0.2517, "step": 17999 }, { "epoch": 2.9311566176769936, "grad_norm": 0.1482720822095871, "learning_rate": 8.063782598007052e-08, "loss": 0.295, "step": 18000 }, { "epoch": 2.9313194642348246, "grad_norm": 0.19777680933475494, "learning_rate": 8.025811083042235e-08, "loss": 0.2478, "step": 18001 }, { "epoch": 2.9314823107926555, "grad_norm": 0.1488533467054367, "learning_rate": 7.987929037416852e-08, "loss": 0.2936, "step": 18002 }, { "epoch": 2.9316451573504865, "grad_norm": 0.2024259865283966, "learning_rate": 7.950136462490932e-08, "loss": 0.2781, "step": 18003 }, { "epoch": 2.9318080039083174, "grad_norm": 0.1934460699558258, "learning_rate": 7.912433359621718e-08, "loss": 0.2724, "step": 18004 }, { "epoch": 2.9319708504661484, "grad_norm": 0.17409758269786835, "learning_rate": 7.874819730162574e-08, "loss": 0.239, "step": 18005 }, { "epoch": 2.9321336970239793, "grad_norm": 0.1792655736207962, "learning_rate": 7.837295575464088e-08, "loss": 0.2653, "step": 18006 }, { "epoch": 2.93229654358181, "grad_norm": 0.1563330590724945, "learning_rate": 7.799860896873234e-08, "loss": 0.2814, "step": 18007 }, { "epoch": 2.932459390139641, "grad_norm": 0.17299070954322815, "learning_rate": 7.762515695734219e-08, "loss": 0.297, "step": 18008 }, { "epoch": 2.9326222366974717, "grad_norm": 0.19582930207252502, "learning_rate": 7.725259973387911e-08, "loss": 0.3035, "step": 18009 }, { "epoch": 2.9327850832553026, "grad_norm": 0.17845773696899414, "learning_rate": 7.688093731171852e-08, "loss": 0.2501, "step": 18010 }, { "epoch": 2.9329479298131336, "grad_norm": 0.15273243188858032, "learning_rate": 7.651016970420255e-08, "loss": 0.2571, "step": 18011 }, { "epoch": 2.9331107763709645, "grad_norm": 0.18564216792583466, "learning_rate": 7.614029692464552e-08, "loss": 0.2529, "step": 18012 }, { "epoch": 2.9332736229287955, "grad_norm": 0.15745556354522705, "learning_rate": 7.577131898632573e-08, "loss": 0.2345, "step": 18013 }, { "epoch": 2.933436469486626, "grad_norm": 0.19809256494045258, "learning_rate": 7.540323590249088e-08, "loss": 0.2752, "step": 18014 }, { "epoch": 2.9335993160444573, "grad_norm": 0.18087653815746307, "learning_rate": 7.503604768635541e-08, "loss": 0.2559, "step": 18015 }, { "epoch": 2.933762162602288, "grad_norm": 0.1840267777442932, "learning_rate": 7.466975435110324e-08, "loss": 0.2364, "step": 18016 }, { "epoch": 2.933925009160119, "grad_norm": 0.16766950488090515, "learning_rate": 7.430435590988216e-08, "loss": 0.2959, "step": 18017 }, { "epoch": 2.9340878557179497, "grad_norm": 0.15003910660743713, "learning_rate": 7.39398523758178e-08, "loss": 0.2559, "step": 18018 }, { "epoch": 2.9342507022757807, "grad_norm": 0.20845413208007812, "learning_rate": 7.357624376199135e-08, "loss": 0.2568, "step": 18019 }, { "epoch": 2.9344135488336116, "grad_norm": 0.21401499211788177, "learning_rate": 7.321353008146182e-08, "loss": 0.2567, "step": 18020 }, { "epoch": 2.9345763953914425, "grad_norm": 0.21568268537521362, "learning_rate": 7.285171134724655e-08, "loss": 0.2931, "step": 18021 }, { "epoch": 2.9347392419492735, "grad_norm": 0.18822385370731354, "learning_rate": 7.249078757234074e-08, "loss": 0.266, "step": 18022 }, { "epoch": 2.934902088507104, "grad_norm": 0.1424555629491806, "learning_rate": 7.213075876969788e-08, "loss": 0.242, "step": 18023 }, { "epoch": 2.935064935064935, "grad_norm": 0.19269663095474243, "learning_rate": 7.177162495224932e-08, "loss": 0.2452, "step": 18024 }, { "epoch": 2.935227781622766, "grad_norm": 0.1815825253725052, "learning_rate": 7.141338613288473e-08, "loss": 0.2796, "step": 18025 }, { "epoch": 2.935390628180597, "grad_norm": 0.21265475451946259, "learning_rate": 7.105604232446606e-08, "loss": 0.2309, "step": 18026 }, { "epoch": 2.9355534747384278, "grad_norm": 0.21116913855075836, "learning_rate": 7.069959353982747e-08, "loss": 0.2897, "step": 18027 }, { "epoch": 2.9357163212962587, "grad_norm": 0.16563573479652405, "learning_rate": 7.034403979176429e-08, "loss": 0.2622, "step": 18028 }, { "epoch": 2.9358791678540896, "grad_norm": 0.1997418850660324, "learning_rate": 6.998938109303855e-08, "loss": 0.2693, "step": 18029 }, { "epoch": 2.93604201441192, "grad_norm": 0.1938038021326065, "learning_rate": 6.963561745638724e-08, "loss": 0.2605, "step": 18030 }, { "epoch": 2.9362048609697515, "grad_norm": 0.17084677517414093, "learning_rate": 6.928274889451137e-08, "loss": 0.277, "step": 18031 }, { "epoch": 2.936367707527582, "grad_norm": 0.18853634595870972, "learning_rate": 6.893077542007853e-08, "loss": 0.3097, "step": 18032 }, { "epoch": 2.936530554085413, "grad_norm": 0.1908130943775177, "learning_rate": 6.857969704572586e-08, "loss": 0.2451, "step": 18033 }, { "epoch": 2.936693400643244, "grad_norm": 0.20997254550457, "learning_rate": 6.822951378405996e-08, "loss": 0.2766, "step": 18034 }, { "epoch": 2.936856247201075, "grad_norm": 0.1827007383108139, "learning_rate": 6.788022564765128e-08, "loss": 0.2593, "step": 18035 }, { "epoch": 2.937019093758906, "grad_norm": 0.19362397491931915, "learning_rate": 6.753183264904262e-08, "loss": 0.274, "step": 18036 }, { "epoch": 2.9371819403167363, "grad_norm": 0.26637259125709534, "learning_rate": 6.718433480073783e-08, "loss": 0.2552, "step": 18037 }, { "epoch": 2.9373447868745677, "grad_norm": 0.1961422860622406, "learning_rate": 6.68377321152186e-08, "loss": 0.2989, "step": 18038 }, { "epoch": 2.937507633432398, "grad_norm": 0.22202853858470917, "learning_rate": 6.649202460492498e-08, "loss": 0.2794, "step": 18039 }, { "epoch": 2.937670479990229, "grad_norm": 0.18981821835041046, "learning_rate": 6.614721228227205e-08, "loss": 0.2784, "step": 18040 }, { "epoch": 2.93783332654806, "grad_norm": 0.16503101587295532, "learning_rate": 6.580329515963602e-08, "loss": 0.24, "step": 18041 }, { "epoch": 2.937996173105891, "grad_norm": 0.16488875448703766, "learning_rate": 6.54602732493681e-08, "loss": 0.2443, "step": 18042 }, { "epoch": 2.938159019663722, "grad_norm": 0.1771288365125656, "learning_rate": 6.511814656378345e-08, "loss": 0.2713, "step": 18043 }, { "epoch": 2.938321866221553, "grad_norm": 0.1677350550889969, "learning_rate": 6.477691511516115e-08, "loss": 0.2441, "step": 18044 }, { "epoch": 2.938484712779384, "grad_norm": 0.2134358286857605, "learning_rate": 6.443657891575805e-08, "loss": 0.2777, "step": 18045 }, { "epoch": 2.9386475593372143, "grad_norm": 0.17586837708950043, "learning_rate": 6.409713797778937e-08, "loss": 0.2646, "step": 18046 }, { "epoch": 2.9388104058950453, "grad_norm": 0.1901659369468689, "learning_rate": 6.375859231344261e-08, "loss": 0.2734, "step": 18047 }, { "epoch": 2.938973252452876, "grad_norm": 0.16618448495864868, "learning_rate": 6.342094193487469e-08, "loss": 0.2495, "step": 18048 }, { "epoch": 2.939136099010707, "grad_norm": 0.1590796411037445, "learning_rate": 6.308418685420647e-08, "loss": 0.2662, "step": 18049 }, { "epoch": 2.939298945568538, "grad_norm": 0.17585329711437225, "learning_rate": 6.274832708353106e-08, "loss": 0.2724, "step": 18050 }, { "epoch": 2.939461792126369, "grad_norm": 0.2124401181936264, "learning_rate": 6.241336263489995e-08, "loss": 0.2632, "step": 18051 }, { "epoch": 2.9396246386842, "grad_norm": 0.13556206226348877, "learning_rate": 6.207929352034792e-08, "loss": 0.2588, "step": 18052 }, { "epoch": 2.9397874852420305, "grad_norm": 0.20664316415786743, "learning_rate": 6.174611975186539e-08, "loss": 0.265, "step": 18053 }, { "epoch": 2.9399503317998614, "grad_norm": 0.17310281097888947, "learning_rate": 6.141384134141226e-08, "loss": 0.2764, "step": 18054 }, { "epoch": 2.9401131783576924, "grad_norm": 0.15964621305465698, "learning_rate": 6.108245830092063e-08, "loss": 0.24, "step": 18055 }, { "epoch": 2.9402760249155233, "grad_norm": 0.19864797592163086, "learning_rate": 6.075197064228932e-08, "loss": 0.2668, "step": 18056 }, { "epoch": 2.9404388714733543, "grad_norm": 0.19225695729255676, "learning_rate": 6.04223783773783e-08, "loss": 0.2344, "step": 18057 }, { "epoch": 2.940601718031185, "grad_norm": 0.1983325481414795, "learning_rate": 6.009368151802808e-08, "loss": 0.2807, "step": 18058 }, { "epoch": 2.940764564589016, "grad_norm": 0.19044965505599976, "learning_rate": 5.976588007603756e-08, "loss": 0.2688, "step": 18059 }, { "epoch": 2.9409274111468466, "grad_norm": 0.22439312934875488, "learning_rate": 5.943897406317234e-08, "loss": 0.3171, "step": 18060 }, { "epoch": 2.941090257704678, "grad_norm": 0.19248566031455994, "learning_rate": 5.911296349117301e-08, "loss": 0.2848, "step": 18061 }, { "epoch": 2.9412531042625085, "grad_norm": 0.15524284541606903, "learning_rate": 5.8787848371744114e-08, "loss": 0.2801, "step": 18062 }, { "epoch": 2.9414159508203395, "grad_norm": 0.20175911486148834, "learning_rate": 5.846362871655409e-08, "loss": 0.2772, "step": 18063 }, { "epoch": 2.9415787973781704, "grad_norm": 0.1733473688364029, "learning_rate": 5.814030453724917e-08, "loss": 0.2776, "step": 18064 }, { "epoch": 2.9417416439360013, "grad_norm": 0.21283839643001556, "learning_rate": 5.7817875845436745e-08, "loss": 0.2964, "step": 18065 }, { "epoch": 2.9419044904938323, "grad_norm": 0.17022563517093658, "learning_rate": 5.7496342652688104e-08, "loss": 0.2503, "step": 18066 }, { "epoch": 2.9420673370516632, "grad_norm": 0.18544118106365204, "learning_rate": 5.7175704970552354e-08, "loss": 0.2799, "step": 18067 }, { "epoch": 2.942230183609494, "grad_norm": 0.18763047456741333, "learning_rate": 5.685596281053973e-08, "loss": 0.2786, "step": 18068 }, { "epoch": 2.9423930301673247, "grad_norm": 0.1784161478281021, "learning_rate": 5.653711618412716e-08, "loss": 0.2601, "step": 18069 }, { "epoch": 2.9425558767251556, "grad_norm": 0.18357157707214355, "learning_rate": 5.621916510276659e-08, "loss": 0.2655, "step": 18070 }, { "epoch": 2.9427187232829866, "grad_norm": 0.20289236307144165, "learning_rate": 5.590210957787112e-08, "loss": 0.2812, "step": 18071 }, { "epoch": 2.9428815698408175, "grad_norm": 0.15568023920059204, "learning_rate": 5.558594962082608e-08, "loss": 0.2272, "step": 18072 }, { "epoch": 2.9430444163986484, "grad_norm": 0.1955135464668274, "learning_rate": 5.527068524297796e-08, "loss": 0.2545, "step": 18073 }, { "epoch": 2.9432072629564794, "grad_norm": 0.17979247868061066, "learning_rate": 5.4956316455651025e-08, "loss": 0.2414, "step": 18074 }, { "epoch": 2.9433701095143103, "grad_norm": 0.19597062468528748, "learning_rate": 5.464284327013069e-08, "loss": 0.2709, "step": 18075 }, { "epoch": 2.943532956072141, "grad_norm": 0.14277400076389313, "learning_rate": 5.433026569766908e-08, "loss": 0.2169, "step": 18076 }, { "epoch": 2.9436958026299718, "grad_norm": 0.20341640710830688, "learning_rate": 5.401858374948776e-08, "loss": 0.2754, "step": 18077 }, { "epoch": 2.9438586491878027, "grad_norm": 0.19948509335517883, "learning_rate": 5.370779743678334e-08, "loss": 0.2857, "step": 18078 }, { "epoch": 2.9440214957456337, "grad_norm": 0.18199901282787323, "learning_rate": 5.3397906770707995e-08, "loss": 0.2336, "step": 18079 }, { "epoch": 2.9441843423034646, "grad_norm": 0.21097402274608612, "learning_rate": 5.3088911762388946e-08, "loss": 0.2639, "step": 18080 }, { "epoch": 2.9443471888612955, "grad_norm": 0.2431836575269699, "learning_rate": 5.2780812422922876e-08, "loss": 0.2731, "step": 18081 }, { "epoch": 2.9445100354191265, "grad_norm": 0.19540667533874512, "learning_rate": 5.24736087633676e-08, "loss": 0.2591, "step": 18082 }, { "epoch": 2.944672881976957, "grad_norm": 0.15629726648330688, "learning_rate": 5.2167300794755956e-08, "loss": 0.2936, "step": 18083 }, { "epoch": 2.9448357285347884, "grad_norm": 0.18663053214550018, "learning_rate": 5.186188852808471e-08, "loss": 0.2657, "step": 18084 }, { "epoch": 2.944998575092619, "grad_norm": 0.1468013972043991, "learning_rate": 5.1557371974317315e-08, "loss": 0.256, "step": 18085 }, { "epoch": 2.94516142165045, "grad_norm": 0.17859621345996857, "learning_rate": 5.1253751144386686e-08, "loss": 0.2677, "step": 18086 }, { "epoch": 2.9453242682082807, "grad_norm": 0.1865404099225998, "learning_rate": 5.0951026049195216e-08, "loss": 0.2627, "step": 18087 }, { "epoch": 2.9454871147661117, "grad_norm": 0.18639585375785828, "learning_rate": 5.064919669961199e-08, "loss": 0.2892, "step": 18088 }, { "epoch": 2.9456499613239426, "grad_norm": 0.19693109393119812, "learning_rate": 5.034826310647278e-08, "loss": 0.2655, "step": 18089 }, { "epoch": 2.945812807881773, "grad_norm": 0.21755222976207733, "learning_rate": 5.004822528058284e-08, "loss": 0.2779, "step": 18090 }, { "epoch": 2.9459756544396045, "grad_norm": 0.15940317511558533, "learning_rate": 4.9749083232711325e-08, "loss": 0.2645, "step": 18091 }, { "epoch": 2.946138500997435, "grad_norm": 0.18791000545024872, "learning_rate": 4.945083697360242e-08, "loss": 0.2845, "step": 18092 }, { "epoch": 2.946301347555266, "grad_norm": 0.213485985994339, "learning_rate": 4.9153486513961454e-08, "loss": 0.2837, "step": 18093 }, { "epoch": 2.946464194113097, "grad_norm": 0.1736929565668106, "learning_rate": 4.8857031864465996e-08, "loss": 0.2955, "step": 18094 }, { "epoch": 2.946627040670928, "grad_norm": 0.16401998698711395, "learning_rate": 4.856147303575753e-08, "loss": 0.2671, "step": 18095 }, { "epoch": 2.946789887228759, "grad_norm": 0.1731797456741333, "learning_rate": 4.8266810038449794e-08, "loss": 0.2956, "step": 18096 }, { "epoch": 2.9469527337865897, "grad_norm": 0.18132485449314117, "learning_rate": 4.797304288312043e-08, "loss": 0.2855, "step": 18097 }, { "epoch": 2.9471155803444207, "grad_norm": 0.17699289321899414, "learning_rate": 4.768017158031657e-08, "loss": 0.247, "step": 18098 }, { "epoch": 2.947278426902251, "grad_norm": 0.18747232854366302, "learning_rate": 4.7388196140554785e-08, "loss": 0.2692, "step": 18099 }, { "epoch": 2.947441273460082, "grad_norm": 0.1530875712633133, "learning_rate": 4.7097116574315594e-08, "loss": 0.2272, "step": 18100 }, { "epoch": 2.947604120017913, "grad_norm": 0.15842559933662415, "learning_rate": 4.680693289205174e-08, "loss": 0.2142, "step": 18101 }, { "epoch": 2.947766966575744, "grad_norm": 0.21427161991596222, "learning_rate": 4.651764510418266e-08, "loss": 0.2702, "step": 18102 }, { "epoch": 2.947929813133575, "grad_norm": 0.16176387667655945, "learning_rate": 4.6229253221088954e-08, "loss": 0.2591, "step": 18103 }, { "epoch": 2.948092659691406, "grad_norm": 0.1950945407152176, "learning_rate": 4.594175725313177e-08, "loss": 0.2912, "step": 18104 }, { "epoch": 2.948255506249237, "grad_norm": 0.16984716057777405, "learning_rate": 4.565515721062785e-08, "loss": 0.2436, "step": 18105 }, { "epoch": 2.9484183528070673, "grad_norm": 0.13403205573558807, "learning_rate": 4.536945310386897e-08, "loss": 0.2914, "step": 18106 }, { "epoch": 2.9485811993648983, "grad_norm": 0.1766064167022705, "learning_rate": 4.50846449431136e-08, "loss": 0.2613, "step": 18107 }, { "epoch": 2.948744045922729, "grad_norm": 0.16785456240177155, "learning_rate": 4.480073273858687e-08, "loss": 0.2613, "step": 18108 }, { "epoch": 2.94890689248056, "grad_norm": 0.19455371797084808, "learning_rate": 4.4517716500480644e-08, "loss": 0.2835, "step": 18109 }, { "epoch": 2.949069739038391, "grad_norm": 0.13867545127868652, "learning_rate": 4.4235596238956237e-08, "loss": 0.282, "step": 18110 }, { "epoch": 2.949232585596222, "grad_norm": 0.21854832768440247, "learning_rate": 4.3954371964144424e-08, "loss": 0.3168, "step": 18111 }, { "epoch": 2.949395432154053, "grad_norm": 0.1668606847524643, "learning_rate": 4.3674043686142693e-08, "loss": 0.2804, "step": 18112 }, { "epoch": 2.9495582787118835, "grad_norm": 0.19124886393547058, "learning_rate": 4.3394611415009666e-08, "loss": 0.2825, "step": 18113 }, { "epoch": 2.949721125269715, "grad_norm": 0.20573025941848755, "learning_rate": 4.311607516078453e-08, "loss": 0.2676, "step": 18114 }, { "epoch": 2.9498839718275454, "grad_norm": 0.14237810671329498, "learning_rate": 4.283843493346762e-08, "loss": 0.281, "step": 18115 }, { "epoch": 2.9500468183853763, "grad_norm": 0.27377697825431824, "learning_rate": 4.256169074302041e-08, "loss": 0.2694, "step": 18116 }, { "epoch": 2.9502096649432072, "grad_norm": 0.16799594461917877, "learning_rate": 4.228584259938495e-08, "loss": 0.2529, "step": 18117 }, { "epoch": 2.950372511501038, "grad_norm": 0.13014087080955505, "learning_rate": 4.201089051246165e-08, "loss": 0.2491, "step": 18118 }, { "epoch": 2.950535358058869, "grad_norm": 0.20986682176589966, "learning_rate": 4.1736834492125956e-08, "loss": 0.2819, "step": 18119 }, { "epoch": 2.9506982046167, "grad_norm": 0.17960131168365479, "learning_rate": 4.146367454821443e-08, "loss": 0.2797, "step": 18120 }, { "epoch": 2.950861051174531, "grad_norm": 0.15678779780864716, "learning_rate": 4.1191410690533115e-08, "loss": 0.2711, "step": 18121 }, { "epoch": 2.9510238977323615, "grad_norm": 0.1685411036014557, "learning_rate": 4.092004292886031e-08, "loss": 0.2936, "step": 18122 }, { "epoch": 2.9511867442901925, "grad_norm": 0.20709700882434845, "learning_rate": 4.064957127293545e-08, "loss": 0.2882, "step": 18123 }, { "epoch": 2.9513495908480234, "grad_norm": 0.17878016829490662, "learning_rate": 4.0379995732472975e-08, "loss": 0.2459, "step": 18124 }, { "epoch": 2.9515124374058543, "grad_norm": 0.19013872742652893, "learning_rate": 4.011131631714849e-08, "loss": 0.2498, "step": 18125 }, { "epoch": 2.9516752839636853, "grad_norm": 0.1623237133026123, "learning_rate": 3.984353303660981e-08, "loss": 0.2354, "step": 18126 }, { "epoch": 2.9518381305215162, "grad_norm": 0.20174473524093628, "learning_rate": 3.9576645900471495e-08, "loss": 0.2612, "step": 18127 }, { "epoch": 2.952000977079347, "grad_norm": 0.174010768532753, "learning_rate": 3.931065491831476e-08, "loss": 0.2159, "step": 18128 }, { "epoch": 2.9521638236371777, "grad_norm": 0.17705677449703217, "learning_rate": 3.9045560099687516e-08, "loss": 0.2504, "step": 18129 }, { "epoch": 2.9523266701950086, "grad_norm": 0.13616864383220673, "learning_rate": 3.878136145411271e-08, "loss": 0.2681, "step": 18130 }, { "epoch": 2.9524895167528395, "grad_norm": 0.19066570699214935, "learning_rate": 3.8518058991071634e-08, "loss": 0.2914, "step": 18131 }, { "epoch": 2.9526523633106705, "grad_norm": 0.15666574239730835, "learning_rate": 3.825565272002063e-08, "loss": 0.2466, "step": 18132 }, { "epoch": 2.9528152098685014, "grad_norm": 0.15999247133731842, "learning_rate": 3.799414265037715e-08, "loss": 0.2107, "step": 18133 }, { "epoch": 2.9529780564263324, "grad_norm": 0.19195066392421722, "learning_rate": 3.7733528791530916e-08, "loss": 0.2596, "step": 18134 }, { "epoch": 2.9531409029841633, "grad_norm": 0.2192281186580658, "learning_rate": 3.74738111528411e-08, "loss": 0.2975, "step": 18135 }, { "epoch": 2.953303749541994, "grad_norm": 0.20481829345226288, "learning_rate": 3.7214989743633577e-08, "loss": 0.2552, "step": 18136 }, { "epoch": 2.953466596099825, "grad_norm": 0.1495012491941452, "learning_rate": 3.6957064573195367e-08, "loss": 0.2527, "step": 18137 }, { "epoch": 2.9536294426576557, "grad_norm": 0.17201553285121918, "learning_rate": 3.6700035650791274e-08, "loss": 0.2497, "step": 18138 }, { "epoch": 2.9537922892154866, "grad_norm": 0.13840843737125397, "learning_rate": 3.644390298564726e-08, "loss": 0.2522, "step": 18139 }, { "epoch": 2.9539551357733176, "grad_norm": 0.16926869750022888, "learning_rate": 3.618866658695874e-08, "loss": 0.2421, "step": 18140 }, { "epoch": 2.9541179823311485, "grad_norm": 0.20011138916015625, "learning_rate": 3.5934326463893384e-08, "loss": 0.2927, "step": 18141 }, { "epoch": 2.9542808288889795, "grad_norm": 0.20332081615924835, "learning_rate": 3.5680882625580004e-08, "loss": 0.2543, "step": 18142 }, { "epoch": 2.95444367544681, "grad_norm": 0.20111966133117676, "learning_rate": 3.5428335081116873e-08, "loss": 0.2765, "step": 18143 }, { "epoch": 2.9546065220046414, "grad_norm": 0.14788596332073212, "learning_rate": 3.517668383957173e-08, "loss": 0.2805, "step": 18144 }, { "epoch": 2.954769368562472, "grad_norm": 0.16125881671905518, "learning_rate": 3.492592890998181e-08, "loss": 0.2644, "step": 18145 }, { "epoch": 2.954932215120303, "grad_norm": 0.2024383842945099, "learning_rate": 3.467607030134545e-08, "loss": 0.2606, "step": 18146 }, { "epoch": 2.9550950616781337, "grad_norm": 0.17958307266235352, "learning_rate": 3.4427108022638824e-08, "loss": 0.3045, "step": 18147 }, { "epoch": 2.9552579082359647, "grad_norm": 0.16737313568592072, "learning_rate": 3.417904208279643e-08, "loss": 0.2564, "step": 18148 }, { "epoch": 2.9554207547937956, "grad_norm": 0.19181643426418304, "learning_rate": 3.3931872490727825e-08, "loss": 0.2812, "step": 18149 }, { "epoch": 2.9555836013516266, "grad_norm": 0.1920517086982727, "learning_rate": 3.368559925530368e-08, "loss": 0.2392, "step": 18150 }, { "epoch": 2.9557464479094575, "grad_norm": 0.165144681930542, "learning_rate": 3.34402223853697e-08, "loss": 0.2636, "step": 18151 }, { "epoch": 2.955909294467288, "grad_norm": 0.24800603091716766, "learning_rate": 3.319574188972996e-08, "loss": 0.2761, "step": 18152 }, { "epoch": 2.956072141025119, "grad_norm": 0.1422317922115326, "learning_rate": 3.295215777716909e-08, "loss": 0.2982, "step": 18153 }, { "epoch": 2.95623498758295, "grad_norm": 0.21663102507591248, "learning_rate": 3.2709470056427326e-08, "loss": 0.2492, "step": 18154 }, { "epoch": 2.956397834140781, "grad_norm": 0.17067104578018188, "learning_rate": 3.246767873621992e-08, "loss": 0.2645, "step": 18155 }, { "epoch": 2.9565606806986118, "grad_norm": 0.16449715197086334, "learning_rate": 3.222678382522881e-08, "loss": 0.2631, "step": 18156 }, { "epoch": 2.9567235272564427, "grad_norm": 0.17070989310741425, "learning_rate": 3.198678533209987e-08, "loss": 0.2759, "step": 18157 }, { "epoch": 2.9568863738142737, "grad_norm": 0.2165667563676834, "learning_rate": 3.174768326545119e-08, "loss": 0.2518, "step": 18158 }, { "epoch": 2.957049220372104, "grad_norm": 0.24437952041625977, "learning_rate": 3.1509477633870354e-08, "loss": 0.2623, "step": 18159 }, { "epoch": 2.9572120669299355, "grad_norm": 0.21696196496486664, "learning_rate": 3.127216844590331e-08, "loss": 0.3055, "step": 18160 }, { "epoch": 2.957374913487766, "grad_norm": 0.23812468349933624, "learning_rate": 3.103575571007655e-08, "loss": 0.3077, "step": 18161 }, { "epoch": 2.957537760045597, "grad_norm": 0.20286531746387482, "learning_rate": 3.0800239434872206e-08, "loss": 0.2418, "step": 18162 }, { "epoch": 2.957700606603428, "grad_norm": 0.18737150728702545, "learning_rate": 3.0565619628752926e-08, "loss": 0.269, "step": 18163 }, { "epoch": 2.957863453161259, "grad_norm": 0.1417667418718338, "learning_rate": 3.033189630013422e-08, "loss": 0.308, "step": 18164 }, { "epoch": 2.95802629971909, "grad_norm": 0.19742333889007568, "learning_rate": 3.009906945741492e-08, "loss": 0.3311, "step": 18165 }, { "epoch": 2.9581891462769203, "grad_norm": 0.2056322544813156, "learning_rate": 2.986713910894945e-08, "loss": 0.2802, "step": 18166 }, { "epoch": 2.9583519928347517, "grad_norm": 0.19323833286762238, "learning_rate": 2.9636105263064483e-08, "loss": 0.2794, "step": 18167 }, { "epoch": 2.958514839392582, "grad_norm": 0.19306573271751404, "learning_rate": 2.9405967928058943e-08, "loss": 0.2595, "step": 18168 }, { "epoch": 2.958677685950413, "grad_norm": 0.17767925560474396, "learning_rate": 2.917672711219288e-08, "loss": 0.2716, "step": 18169 }, { "epoch": 2.958840532508244, "grad_norm": 0.21225999295711517, "learning_rate": 2.894838282369583e-08, "loss": 0.2452, "step": 18170 }, { "epoch": 2.959003379066075, "grad_norm": 0.16843371093273163, "learning_rate": 2.8720935070766786e-08, "loss": 0.2905, "step": 18171 }, { "epoch": 2.959166225623906, "grad_norm": 0.19839073717594147, "learning_rate": 2.8494383861574213e-08, "loss": 0.2629, "step": 18172 }, { "epoch": 2.959329072181737, "grad_norm": 0.1787991225719452, "learning_rate": 2.8268729204247724e-08, "loss": 0.2735, "step": 18173 }, { "epoch": 2.959491918739568, "grad_norm": 0.22629797458648682, "learning_rate": 2.8043971106891942e-08, "loss": 0.3019, "step": 18174 }, { "epoch": 2.9596547652973983, "grad_norm": 0.17249436676502228, "learning_rate": 2.7820109577575414e-08, "loss": 0.2647, "step": 18175 }, { "epoch": 2.9598176118552293, "grad_norm": 0.15325123071670532, "learning_rate": 2.759714462433616e-08, "loss": 0.2307, "step": 18176 }, { "epoch": 2.9599804584130602, "grad_norm": 0.15794414281845093, "learning_rate": 2.737507625517888e-08, "loss": 0.2753, "step": 18177 }, { "epoch": 2.960143304970891, "grad_norm": 0.19815589487552643, "learning_rate": 2.7153904478074976e-08, "loss": 0.2816, "step": 18178 }, { "epoch": 2.960306151528722, "grad_norm": 0.19896039366722107, "learning_rate": 2.69336293009681e-08, "loss": 0.26, "step": 18179 }, { "epoch": 2.960468998086553, "grad_norm": 0.15923017263412476, "learning_rate": 2.6714250731763036e-08, "loss": 0.2503, "step": 18180 }, { "epoch": 2.960631844644384, "grad_norm": 0.16458509862422943, "learning_rate": 2.649576877833959e-08, "loss": 0.2636, "step": 18181 }, { "epoch": 2.9607946912022145, "grad_norm": 0.1880371868610382, "learning_rate": 2.6278183448541494e-08, "loss": 0.2649, "step": 18182 }, { "epoch": 2.9609575377600454, "grad_norm": 0.1897340565919876, "learning_rate": 2.606149475017916e-08, "loss": 0.2669, "step": 18183 }, { "epoch": 2.9611203843178764, "grad_norm": 0.1614224910736084, "learning_rate": 2.5845702691035256e-08, "loss": 0.2876, "step": 18184 }, { "epoch": 2.9612832308757073, "grad_norm": 0.16171583533287048, "learning_rate": 2.563080727885081e-08, "loss": 0.2586, "step": 18185 }, { "epoch": 2.9614460774335383, "grad_norm": 0.18563102185726166, "learning_rate": 2.5416808521347423e-08, "loss": 0.2309, "step": 18186 }, { "epoch": 2.961608923991369, "grad_norm": 0.19415749609470367, "learning_rate": 2.520370642620784e-08, "loss": 0.2545, "step": 18187 }, { "epoch": 2.9617717705492, "grad_norm": 0.142021045088768, "learning_rate": 2.4991501001081495e-08, "loss": 0.3015, "step": 18188 }, { "epoch": 2.9619346171070307, "grad_norm": 0.19611823558807373, "learning_rate": 2.4780192253587297e-08, "loss": 0.2731, "step": 18189 }, { "epoch": 2.962097463664862, "grad_norm": 0.22052177786827087, "learning_rate": 2.4569780191310842e-08, "loss": 0.2848, "step": 18190 }, { "epoch": 2.9622603102226925, "grad_norm": 0.1691523790359497, "learning_rate": 2.43602648218072e-08, "loss": 0.261, "step": 18191 }, { "epoch": 2.9624231567805235, "grad_norm": 0.19435976445674896, "learning_rate": 2.4151646152600903e-08, "loss": 0.2807, "step": 18192 }, { "epoch": 2.9625860033383544, "grad_norm": 0.1545051485300064, "learning_rate": 2.3943924191177637e-08, "loss": 0.2701, "step": 18193 }, { "epoch": 2.9627488498961854, "grad_norm": 0.17483434081077576, "learning_rate": 2.3737098945000868e-08, "loss": 0.2648, "step": 18194 }, { "epoch": 2.9629116964540163, "grad_norm": 0.1525787115097046, "learning_rate": 2.3531170421492444e-08, "loss": 0.2193, "step": 18195 }, { "epoch": 2.9630745430118473, "grad_norm": 0.15799374878406525, "learning_rate": 2.332613862804367e-08, "loss": 0.2872, "step": 18196 }, { "epoch": 2.963237389569678, "grad_norm": 0.16812506318092346, "learning_rate": 2.312200357202088e-08, "loss": 0.3001, "step": 18197 }, { "epoch": 2.9634002361275087, "grad_norm": 0.1616968959569931, "learning_rate": 2.2918765260751542e-08, "loss": 0.279, "step": 18198 }, { "epoch": 2.9635630826853396, "grad_norm": 0.17929938435554504, "learning_rate": 2.2716423701529822e-08, "loss": 0.2775, "step": 18199 }, { "epoch": 2.9637259292431706, "grad_norm": 0.16949914395809174, "learning_rate": 2.2514978901624905e-08, "loss": 0.2573, "step": 18200 }, { "epoch": 2.9638887758010015, "grad_norm": 0.18831925094127655, "learning_rate": 2.2314430868264346e-08, "loss": 0.2745, "step": 18201 }, { "epoch": 2.9640516223588325, "grad_norm": 0.18232198059558868, "learning_rate": 2.211477960865349e-08, "loss": 0.2618, "step": 18202 }, { "epoch": 2.9642144689166634, "grad_norm": 0.15132509171962738, "learning_rate": 2.1916025129956054e-08, "loss": 0.2146, "step": 18203 }, { "epoch": 2.9643773154744943, "grad_norm": 0.16522373259067535, "learning_rate": 2.171816743930799e-08, "loss": 0.2495, "step": 18204 }, { "epoch": 2.964540162032325, "grad_norm": 0.20376507937908173, "learning_rate": 2.1521206543817507e-08, "loss": 0.2452, "step": 18205 }, { "epoch": 2.964703008590156, "grad_norm": 0.19141033291816711, "learning_rate": 2.1325142450553947e-08, "loss": 0.2755, "step": 18206 }, { "epoch": 2.9648658551479867, "grad_norm": 0.1460002362728119, "learning_rate": 2.1129975166553352e-08, "loss": 0.2899, "step": 18207 }, { "epoch": 2.9650287017058177, "grad_norm": 0.16529573500156403, "learning_rate": 2.0935704698826773e-08, "loss": 0.2314, "step": 18208 }, { "epoch": 2.9651915482636486, "grad_norm": 0.16306735575199127, "learning_rate": 2.074233105434642e-08, "loss": 0.2596, "step": 18209 }, { "epoch": 2.9653543948214796, "grad_norm": 0.16807173192501068, "learning_rate": 2.0549854240056733e-08, "loss": 0.2913, "step": 18210 }, { "epoch": 2.9655172413793105, "grad_norm": 0.15620261430740356, "learning_rate": 2.035827426286607e-08, "loss": 0.2746, "step": 18211 }, { "epoch": 2.965680087937141, "grad_norm": 0.14697884023189545, "learning_rate": 2.0167591129657826e-08, "loss": 0.2392, "step": 18212 }, { "epoch": 2.9658429344949724, "grad_norm": 0.15177230536937714, "learning_rate": 1.9977804847270966e-08, "loss": 0.2464, "step": 18213 }, { "epoch": 2.966005781052803, "grad_norm": 0.18334904313087463, "learning_rate": 1.9788915422525034e-08, "loss": 0.2643, "step": 18214 }, { "epoch": 2.966168627610634, "grad_norm": 0.15007977187633514, "learning_rate": 1.9600922862200723e-08, "loss": 0.2631, "step": 18215 }, { "epoch": 2.9663314741684648, "grad_norm": 0.1770053207874298, "learning_rate": 1.9413827173045405e-08, "loss": 0.2834, "step": 18216 }, { "epoch": 2.9664943207262957, "grad_norm": 0.13162797689437866, "learning_rate": 1.9227628361778716e-08, "loss": 0.2698, "step": 18217 }, { "epoch": 2.9666571672841267, "grad_norm": 0.13598483800888062, "learning_rate": 1.9042326435081416e-08, "loss": 0.2848, "step": 18218 }, { "epoch": 2.966820013841957, "grad_norm": 0.18208110332489014, "learning_rate": 1.8857921399612066e-08, "loss": 0.2406, "step": 18219 }, { "epoch": 2.9669828603997885, "grad_norm": 0.1770639419555664, "learning_rate": 1.8674413261990376e-08, "loss": 0.2562, "step": 18220 }, { "epoch": 2.967145706957619, "grad_norm": 0.11770778894424438, "learning_rate": 1.8491802028799965e-08, "loss": 0.2364, "step": 18221 }, { "epoch": 2.96730855351545, "grad_norm": 0.12480232119560242, "learning_rate": 1.831008770660225e-08, "loss": 0.2754, "step": 18222 }, { "epoch": 2.967471400073281, "grad_norm": 0.2333698570728302, "learning_rate": 1.81292703019198e-08, "loss": 0.243, "step": 18223 }, { "epoch": 2.967634246631112, "grad_norm": 0.1584460437297821, "learning_rate": 1.7949349821241856e-08, "loss": 0.2517, "step": 18224 }, { "epoch": 2.967797093188943, "grad_norm": 0.1867617964744568, "learning_rate": 1.77703262710327e-08, "loss": 0.256, "step": 18225 }, { "epoch": 2.9679599397467737, "grad_norm": 0.13631361722946167, "learning_rate": 1.7592199657717745e-08, "loss": 0.2678, "step": 18226 }, { "epoch": 2.9681227863046047, "grad_norm": 0.13524702191352844, "learning_rate": 1.7414969987689102e-08, "loss": 0.2672, "step": 18227 }, { "epoch": 2.968285632862435, "grad_norm": 0.19956238567829132, "learning_rate": 1.723863726731667e-08, "loss": 0.3063, "step": 18228 }, { "epoch": 2.968448479420266, "grad_norm": 0.16881658136844635, "learning_rate": 1.706320150292595e-08, "loss": 0.2681, "step": 18229 }, { "epoch": 2.968611325978097, "grad_norm": 0.1880345493555069, "learning_rate": 1.688866270081746e-08, "loss": 0.2775, "step": 18230 }, { "epoch": 2.968774172535928, "grad_norm": 0.16462640464305878, "learning_rate": 1.6715020867255625e-08, "loss": 0.2479, "step": 18231 }, { "epoch": 2.968937019093759, "grad_norm": 0.17698313295841217, "learning_rate": 1.6542276008479907e-08, "loss": 0.2616, "step": 18232 }, { "epoch": 2.96909986565159, "grad_norm": 0.17171664535999298, "learning_rate": 1.6370428130685346e-08, "loss": 0.2333, "step": 18233 }, { "epoch": 2.969262712209421, "grad_norm": 0.18928182125091553, "learning_rate": 1.6199477240047557e-08, "loss": 0.2774, "step": 18234 }, { "epoch": 2.9694255587672513, "grad_norm": 0.16772176325321198, "learning_rate": 1.6029423342700524e-08, "loss": 0.2971, "step": 18235 }, { "epoch": 2.9695884053250823, "grad_norm": 0.2065553069114685, "learning_rate": 1.5860266444753247e-08, "loss": 0.2471, "step": 18236 }, { "epoch": 2.9697512518829132, "grad_norm": 0.15552447736263275, "learning_rate": 1.5692006552275872e-08, "loss": 0.2697, "step": 18237 }, { "epoch": 2.969914098440744, "grad_norm": 0.18202857673168182, "learning_rate": 1.552464367131079e-08, "loss": 0.2733, "step": 18238 }, { "epoch": 2.970076944998575, "grad_norm": 0.1752808541059494, "learning_rate": 1.5358177807864305e-08, "loss": 0.2687, "step": 18239 }, { "epoch": 2.970239791556406, "grad_norm": 0.17174403369426727, "learning_rate": 1.5192608967917744e-08, "loss": 0.2734, "step": 18240 }, { "epoch": 2.970402638114237, "grad_norm": 0.18312475085258484, "learning_rate": 1.50279371574108e-08, "loss": 0.2798, "step": 18241 }, { "epoch": 2.9705654846720675, "grad_norm": 0.1848241239786148, "learning_rate": 1.486416238226096e-08, "loss": 0.2713, "step": 18242 }, { "epoch": 2.970728331229899, "grad_norm": 0.16491834819316864, "learning_rate": 1.4701284648341307e-08, "loss": 0.2604, "step": 18243 }, { "epoch": 2.9708911777877294, "grad_norm": 0.23155640065670013, "learning_rate": 1.453930396150549e-08, "loss": 0.2718, "step": 18244 }, { "epoch": 2.9710540243455603, "grad_norm": 0.15794342756271362, "learning_rate": 1.4378220327568304e-08, "loss": 0.2129, "step": 18245 }, { "epoch": 2.9712168709033913, "grad_norm": 0.16660498082637787, "learning_rate": 1.4218033752311232e-08, "loss": 0.2559, "step": 18246 }, { "epoch": 2.971379717461222, "grad_norm": 0.15415161848068237, "learning_rate": 1.4058744241485233e-08, "loss": 0.2618, "step": 18247 }, { "epoch": 2.971542564019053, "grad_norm": 0.18782399594783783, "learning_rate": 1.390035180081073e-08, "loss": 0.2747, "step": 18248 }, { "epoch": 2.971705410576884, "grad_norm": 0.1760873645544052, "learning_rate": 1.3742856435974838e-08, "loss": 0.2742, "step": 18249 }, { "epoch": 2.971868257134715, "grad_norm": 0.16492123901844025, "learning_rate": 1.3586258152628595e-08, "loss": 0.2621, "step": 18250 }, { "epoch": 2.9720311036925455, "grad_norm": 0.21098043024539948, "learning_rate": 1.3430556956398055e-08, "loss": 0.2755, "step": 18251 }, { "epoch": 2.9721939502503765, "grad_norm": 0.19423674046993256, "learning_rate": 1.3275752852873191e-08, "loss": 0.2393, "step": 18252 }, { "epoch": 2.9723567968082074, "grad_norm": 0.15877416729927063, "learning_rate": 1.312184584761067e-08, "loss": 0.2796, "step": 18253 }, { "epoch": 2.9725196433660384, "grad_norm": 0.19366252422332764, "learning_rate": 1.2968835946136626e-08, "loss": 0.2653, "step": 18254 }, { "epoch": 2.9726824899238693, "grad_norm": 0.13163331151008606, "learning_rate": 1.2816723153943888e-08, "loss": 0.2739, "step": 18255 }, { "epoch": 2.9728453364817002, "grad_norm": 0.21946343779563904, "learning_rate": 1.2665507476494754e-08, "loss": 0.2528, "step": 18256 }, { "epoch": 2.973008183039531, "grad_norm": 0.175210103392601, "learning_rate": 1.2515188919218212e-08, "loss": 0.2212, "step": 18257 }, { "epoch": 2.9731710295973617, "grad_norm": 0.17652995884418488, "learning_rate": 1.2365767487512724e-08, "loss": 0.2895, "step": 18258 }, { "epoch": 2.9733338761551926, "grad_norm": 0.23722952604293823, "learning_rate": 1.2217243186737892e-08, "loss": 0.2843, "step": 18259 }, { "epoch": 2.9734967227130236, "grad_norm": 0.18573161959648132, "learning_rate": 1.2069616022231111e-08, "loss": 0.238, "step": 18260 }, { "epoch": 2.9736595692708545, "grad_norm": 0.21786676347255707, "learning_rate": 1.1922885999290923e-08, "loss": 0.2824, "step": 18261 }, { "epoch": 2.9738224158286855, "grad_norm": 0.18437877297401428, "learning_rate": 1.1777053123185334e-08, "loss": 0.2761, "step": 18262 }, { "epoch": 2.9739852623865164, "grad_norm": 0.16034261882305145, "learning_rate": 1.1632117399149045e-08, "loss": 0.2743, "step": 18263 }, { "epoch": 2.9741481089443473, "grad_norm": 0.11903581023216248, "learning_rate": 1.1488078832389005e-08, "loss": 0.2675, "step": 18264 }, { "epoch": 2.974310955502178, "grad_norm": 0.13548643887043, "learning_rate": 1.1344937428073299e-08, "loss": 0.2512, "step": 18265 }, { "epoch": 2.974473802060009, "grad_norm": 0.20860010385513306, "learning_rate": 1.1202693191342262e-08, "loss": 0.2776, "step": 18266 }, { "epoch": 2.9746366486178397, "grad_norm": 0.15235185623168945, "learning_rate": 1.1061346127302918e-08, "loss": 0.2682, "step": 18267 }, { "epoch": 2.9747994951756707, "grad_norm": 0.15882903337478638, "learning_rate": 1.0920896241028988e-08, "loss": 0.2759, "step": 18268 }, { "epoch": 2.9749623417335016, "grad_norm": 0.16631224751472473, "learning_rate": 1.0781343537566435e-08, "loss": 0.273, "step": 18269 }, { "epoch": 2.9751251882913325, "grad_norm": 0.17744998633861542, "learning_rate": 1.0642688021922365e-08, "loss": 0.2673, "step": 18270 }, { "epoch": 2.9752880348491635, "grad_norm": 0.1807941347360611, "learning_rate": 1.0504929699076126e-08, "loss": 0.2763, "step": 18271 }, { "epoch": 2.975450881406994, "grad_norm": 0.19053590297698975, "learning_rate": 1.0368068573973766e-08, "loss": 0.2643, "step": 18272 }, { "epoch": 2.9756137279648254, "grad_norm": 0.1519966721534729, "learning_rate": 1.0232104651528019e-08, "loss": 0.2667, "step": 18273 }, { "epoch": 2.975776574522656, "grad_norm": 0.17858481407165527, "learning_rate": 1.009703793662109e-08, "loss": 0.2661, "step": 18274 }, { "epoch": 2.975939421080487, "grad_norm": 0.2157897800207138, "learning_rate": 9.96286843410188e-09, "loss": 0.3193, "step": 18275 }, { "epoch": 2.9761022676383178, "grad_norm": 0.1391235589981079, "learning_rate": 9.829596148788755e-09, "loss": 0.2822, "step": 18276 }, { "epoch": 2.9762651141961487, "grad_norm": 0.17895713448524475, "learning_rate": 9.697221085464003e-09, "loss": 0.2658, "step": 18277 }, { "epoch": 2.9764279607539796, "grad_norm": 0.14718860387802124, "learning_rate": 9.565743248882153e-09, "loss": 0.2724, "step": 18278 }, { "epoch": 2.9765908073118106, "grad_norm": 0.1472516506910324, "learning_rate": 9.435162643764428e-09, "loss": 0.2229, "step": 18279 }, { "epoch": 2.9767536538696415, "grad_norm": 0.21051546931266785, "learning_rate": 9.305479274798745e-09, "loss": 0.2619, "step": 18280 }, { "epoch": 2.976916500427472, "grad_norm": 0.188534215092659, "learning_rate": 9.176693146639715e-09, "loss": 0.2606, "step": 18281 }, { "epoch": 2.977079346985303, "grad_norm": 0.19506455957889557, "learning_rate": 9.048804263911414e-09, "loss": 0.3075, "step": 18282 }, { "epoch": 2.977242193543134, "grad_norm": 0.2562916576862335, "learning_rate": 8.921812631207394e-09, "loss": 0.307, "step": 18283 }, { "epoch": 2.977405040100965, "grad_norm": 0.16579055786132812, "learning_rate": 8.795718253085117e-09, "loss": 0.2611, "step": 18284 }, { "epoch": 2.977567886658796, "grad_norm": 0.22067677974700928, "learning_rate": 8.670521134071518e-09, "loss": 0.299, "step": 18285 }, { "epoch": 2.9777307332166267, "grad_norm": 0.16137069463729858, "learning_rate": 8.546221278663002e-09, "loss": 0.2602, "step": 18286 }, { "epoch": 2.9778935797744577, "grad_norm": 0.14561185240745544, "learning_rate": 8.42281869131989e-09, "loss": 0.239, "step": 18287 }, { "epoch": 2.978056426332288, "grad_norm": 0.16685841977596283, "learning_rate": 8.300313376474744e-09, "loss": 0.3011, "step": 18288 }, { "epoch": 2.9782192728901196, "grad_norm": 0.16206789016723633, "learning_rate": 8.178705338526826e-09, "loss": 0.2187, "step": 18289 }, { "epoch": 2.97838211944795, "grad_norm": 0.15920303761959076, "learning_rate": 8.057994581839313e-09, "loss": 0.2186, "step": 18290 }, { "epoch": 2.978544966005781, "grad_norm": 0.1523638814687729, "learning_rate": 7.938181110750398e-09, "loss": 0.3007, "step": 18291 }, { "epoch": 2.978707812563612, "grad_norm": 0.19075456261634827, "learning_rate": 7.819264929556646e-09, "loss": 0.2977, "step": 18292 }, { "epoch": 2.978870659121443, "grad_norm": 0.18885719776153564, "learning_rate": 7.701246042529641e-09, "loss": 0.2333, "step": 18293 }, { "epoch": 2.979033505679274, "grad_norm": 0.17746292054653168, "learning_rate": 7.58412445390766e-09, "loss": 0.2511, "step": 18294 }, { "epoch": 2.9791963522371043, "grad_norm": 0.1681508868932724, "learning_rate": 7.46790016789567e-09, "loss": 0.2921, "step": 18295 }, { "epoch": 2.9793591987949357, "grad_norm": 0.18781030178070068, "learning_rate": 7.3525731886653345e-09, "loss": 0.2372, "step": 18296 }, { "epoch": 2.979522045352766, "grad_norm": 0.1514512449502945, "learning_rate": 7.238143520355012e-09, "loss": 0.2995, "step": 18297 }, { "epoch": 2.979684891910597, "grad_norm": 0.1829044073820114, "learning_rate": 7.124611167078077e-09, "loss": 0.2682, "step": 18298 }, { "epoch": 2.979847738468428, "grad_norm": 0.1821814477443695, "learning_rate": 7.011976132909048e-09, "loss": 0.2325, "step": 18299 }, { "epoch": 2.980010585026259, "grad_norm": 0.15505331754684448, "learning_rate": 6.900238421889138e-09, "loss": 0.283, "step": 18300 }, { "epoch": 2.98017343158409, "grad_norm": 0.15147311985492706, "learning_rate": 6.789398038034578e-09, "loss": 0.283, "step": 18301 }, { "epoch": 2.980336278141921, "grad_norm": 0.20281225442886353, "learning_rate": 6.679454985319966e-09, "loss": 0.2538, "step": 18302 }, { "epoch": 2.980499124699752, "grad_norm": 0.19370995461940765, "learning_rate": 6.570409267694921e-09, "loss": 0.2579, "step": 18303 }, { "epoch": 2.9806619712575824, "grad_norm": 0.21548667550086975, "learning_rate": 6.4622608890757555e-09, "loss": 0.3125, "step": 18304 }, { "epoch": 2.9808248178154133, "grad_norm": 0.19456584751605988, "learning_rate": 6.3550098533426976e-09, "loss": 0.2536, "step": 18305 }, { "epoch": 2.9809876643732443, "grad_norm": 0.18150439858436584, "learning_rate": 6.248656164348221e-09, "loss": 0.2441, "step": 18306 }, { "epoch": 2.981150510931075, "grad_norm": 0.1982237994670868, "learning_rate": 6.143199825908719e-09, "loss": 0.2964, "step": 18307 }, { "epoch": 2.981313357488906, "grad_norm": 0.20912528038024902, "learning_rate": 6.038640841815601e-09, "loss": 0.3121, "step": 18308 }, { "epoch": 2.981476204046737, "grad_norm": 0.1743614226579666, "learning_rate": 5.934979215815873e-09, "loss": 0.247, "step": 18309 }, { "epoch": 2.981639050604568, "grad_norm": 0.17582294344902039, "learning_rate": 5.832214951637105e-09, "loss": 0.2751, "step": 18310 }, { "epoch": 2.9818018971623985, "grad_norm": 0.15166239440441132, "learning_rate": 5.73034805296524e-09, "loss": 0.2674, "step": 18311 }, { "epoch": 2.9819647437202295, "grad_norm": 0.1952436864376068, "learning_rate": 5.629378523458462e-09, "loss": 0.2801, "step": 18312 }, { "epoch": 2.9821275902780604, "grad_norm": 0.14411285519599915, "learning_rate": 5.529306366741649e-09, "loss": 0.2635, "step": 18313 }, { "epoch": 2.9822904368358913, "grad_norm": 0.20639806985855103, "learning_rate": 5.430131586409148e-09, "loss": 0.3046, "step": 18314 }, { "epoch": 2.9824532833937223, "grad_norm": 0.15199613571166992, "learning_rate": 5.331854186019225e-09, "loss": 0.2482, "step": 18315 }, { "epoch": 2.9826161299515532, "grad_norm": 0.1299588978290558, "learning_rate": 5.234474169102388e-09, "loss": 0.2837, "step": 18316 }, { "epoch": 2.982778976509384, "grad_norm": 0.15883220732212067, "learning_rate": 5.13799153915584e-09, "loss": 0.2723, "step": 18317 }, { "epoch": 2.9829418230672147, "grad_norm": 0.16495905816555023, "learning_rate": 5.0424062996407e-09, "loss": 0.2725, "step": 18318 }, { "epoch": 2.983104669625046, "grad_norm": 0.17286135256290436, "learning_rate": 4.947718453990335e-09, "loss": 0.2501, "step": 18319 }, { "epoch": 2.9832675161828766, "grad_norm": 0.14790920913219452, "learning_rate": 4.853928005602026e-09, "loss": 0.3035, "step": 18320 }, { "epoch": 2.9834303627407075, "grad_norm": 0.19096703827381134, "learning_rate": 4.761034957848076e-09, "loss": 0.268, "step": 18321 }, { "epoch": 2.9835932092985384, "grad_norm": 0.14762072265148163, "learning_rate": 4.669039314059153e-09, "loss": 0.2232, "step": 18322 }, { "epoch": 2.9837560558563694, "grad_norm": 0.1903219074010849, "learning_rate": 4.577941077540948e-09, "loss": 0.2623, "step": 18323 }, { "epoch": 2.9839189024142003, "grad_norm": 0.1800784021615982, "learning_rate": 4.487740251563066e-09, "loss": 0.2796, "step": 18324 }, { "epoch": 2.9840817489720313, "grad_norm": 0.17911124229431152, "learning_rate": 4.398436839361808e-09, "loss": 0.2748, "step": 18325 }, { "epoch": 2.984244595529862, "grad_norm": 0.179494708776474, "learning_rate": 4.310030844145718e-09, "loss": 0.2844, "step": 18326 }, { "epoch": 2.9844074420876927, "grad_norm": 0.16784130036830902, "learning_rate": 4.222522269090035e-09, "loss": 0.2741, "step": 18327 }, { "epoch": 2.9845702886455237, "grad_norm": 0.1922093629837036, "learning_rate": 4.1359111173339126e-09, "loss": 0.2804, "step": 18328 }, { "epoch": 2.9847331352033546, "grad_norm": 0.198909729719162, "learning_rate": 4.050197391988752e-09, "loss": 0.2554, "step": 18329 }, { "epoch": 2.9848959817611855, "grad_norm": 0.25423407554626465, "learning_rate": 3.965381096132648e-09, "loss": 0.2948, "step": 18330 }, { "epoch": 2.9850588283190165, "grad_norm": 0.17412903904914856, "learning_rate": 3.881462232807609e-09, "loss": 0.263, "step": 18331 }, { "epoch": 2.9852216748768474, "grad_norm": 0.14489682018756866, "learning_rate": 3.798440805030668e-09, "loss": 0.2514, "step": 18332 }, { "epoch": 2.9853845214346784, "grad_norm": 0.19567181169986725, "learning_rate": 3.716316815777221e-09, "loss": 0.2578, "step": 18333 }, { "epoch": 2.985547367992509, "grad_norm": 0.1806316077709198, "learning_rate": 3.6350902680004628e-09, "loss": 0.2493, "step": 18334 }, { "epoch": 2.98571021455034, "grad_norm": 0.20568707585334778, "learning_rate": 3.5547611646175038e-09, "loss": 0.2578, "step": 18335 }, { "epoch": 2.9858730611081707, "grad_norm": 0.1722942739725113, "learning_rate": 3.4753295085065973e-09, "loss": 0.2724, "step": 18336 }, { "epoch": 2.9860359076660017, "grad_norm": 0.1579410880804062, "learning_rate": 3.396795302523792e-09, "loss": 0.2399, "step": 18337 }, { "epoch": 2.9861987542238326, "grad_norm": 0.15101000666618347, "learning_rate": 3.319158549489054e-09, "loss": 0.2519, "step": 18338 }, { "epoch": 2.9863616007816636, "grad_norm": 0.20369106531143188, "learning_rate": 3.2424192521862684e-09, "loss": 0.2459, "step": 18339 }, { "epoch": 2.9865244473394945, "grad_norm": 0.1755034178495407, "learning_rate": 3.166577413374339e-09, "loss": 0.2463, "step": 18340 }, { "epoch": 2.986687293897325, "grad_norm": 0.1867510974407196, "learning_rate": 3.0916330357733113e-09, "loss": 0.2946, "step": 18341 }, { "epoch": 2.9868501404551564, "grad_norm": 0.15653319656848907, "learning_rate": 3.017586122078253e-09, "loss": 0.2723, "step": 18342 }, { "epoch": 2.987012987012987, "grad_norm": 0.21392112970352173, "learning_rate": 2.9444366749425966e-09, "loss": 0.2428, "step": 18343 }, { "epoch": 2.987175833570818, "grad_norm": 0.20327450335025787, "learning_rate": 2.872184696994795e-09, "loss": 0.3072, "step": 18344 }, { "epoch": 2.987338680128649, "grad_norm": 0.176986426115036, "learning_rate": 2.800830190829995e-09, "loss": 0.23, "step": 18345 }, { "epoch": 2.9875015266864797, "grad_norm": 0.16736893355846405, "learning_rate": 2.7303731590100356e-09, "loss": 0.2573, "step": 18346 }, { "epoch": 2.9876643732443107, "grad_norm": 0.18710942566394806, "learning_rate": 2.6608136040606747e-09, "loss": 0.2792, "step": 18347 }, { "epoch": 2.987827219802141, "grad_norm": 0.18474017083644867, "learning_rate": 2.5921515284854645e-09, "loss": 0.2272, "step": 18348 }, { "epoch": 2.9879900663599726, "grad_norm": 0.18850025534629822, "learning_rate": 2.52438693474355e-09, "loss": 0.2791, "step": 18349 }, { "epoch": 2.988152912917803, "grad_norm": 0.1718531847000122, "learning_rate": 2.4575198252718702e-09, "loss": 0.241, "step": 18350 }, { "epoch": 2.988315759475634, "grad_norm": 0.20083150267601013, "learning_rate": 2.3915502024685064e-09, "loss": 0.2782, "step": 18351 }, { "epoch": 2.988478606033465, "grad_norm": 0.15592417120933533, "learning_rate": 2.3264780687037855e-09, "loss": 0.2663, "step": 18352 }, { "epoch": 2.988641452591296, "grad_norm": 0.16007418930530548, "learning_rate": 2.262303426314727e-09, "loss": 0.247, "step": 18353 }, { "epoch": 2.988804299149127, "grad_norm": 0.20094165205955505, "learning_rate": 2.1990262776022673e-09, "loss": 0.2776, "step": 18354 }, { "epoch": 2.9889671457069578, "grad_norm": 0.21187986433506012, "learning_rate": 2.1366466248423644e-09, "loss": 0.3196, "step": 18355 }, { "epoch": 2.9891299922647887, "grad_norm": 0.1910051852464676, "learning_rate": 2.075164470269342e-09, "loss": 0.2339, "step": 18356 }, { "epoch": 2.989292838822619, "grad_norm": 0.13589848577976227, "learning_rate": 2.0145798160980942e-09, "loss": 0.2619, "step": 18357 }, { "epoch": 2.98945568538045, "grad_norm": 0.19654971361160278, "learning_rate": 1.954892664496333e-09, "loss": 0.2869, "step": 18358 }, { "epoch": 2.989618531938281, "grad_norm": 0.1850905567407608, "learning_rate": 1.896103017609563e-09, "loss": 0.2877, "step": 18359 }, { "epoch": 2.989781378496112, "grad_norm": 0.17161163687705994, "learning_rate": 1.838210877549984e-09, "loss": 0.2606, "step": 18360 }, { "epoch": 2.989944225053943, "grad_norm": 0.1595127135515213, "learning_rate": 1.7812162463964888e-09, "loss": 0.2537, "step": 18361 }, { "epoch": 2.990107071611774, "grad_norm": 0.19256795942783356, "learning_rate": 1.7251191261918875e-09, "loss": 0.305, "step": 18362 }, { "epoch": 2.990269918169605, "grad_norm": 0.19330695271492004, "learning_rate": 1.6699195189540106e-09, "loss": 0.2686, "step": 18363 }, { "epoch": 2.9904327647274354, "grad_norm": 0.15776778757572174, "learning_rate": 1.6156174266618307e-09, "loss": 0.2507, "step": 18364 }, { "epoch": 2.9905956112852663, "grad_norm": 0.16203735768795013, "learning_rate": 1.5622128512665645e-09, "loss": 0.2664, "step": 18365 }, { "epoch": 2.9907584578430972, "grad_norm": 0.17320890724658966, "learning_rate": 1.5097057946861225e-09, "loss": 0.2698, "step": 18366 }, { "epoch": 2.990921304400928, "grad_norm": 0.16364754736423492, "learning_rate": 1.4580962588023329e-09, "loss": 0.2506, "step": 18367 }, { "epoch": 2.991084150958759, "grad_norm": 0.15698006749153137, "learning_rate": 1.4073842454720432e-09, "loss": 0.2931, "step": 18368 }, { "epoch": 2.99124699751659, "grad_norm": 0.1720704734325409, "learning_rate": 1.3575697565160195e-09, "loss": 0.2668, "step": 18369 }, { "epoch": 2.991409844074421, "grad_norm": 0.20006497204303741, "learning_rate": 1.3086527937189451e-09, "loss": 0.2386, "step": 18370 }, { "epoch": 2.9915726906322515, "grad_norm": 0.1589651107788086, "learning_rate": 1.2606333588405239e-09, "loss": 0.2519, "step": 18371 }, { "epoch": 2.991735537190083, "grad_norm": 0.17292644083499908, "learning_rate": 1.2135114536016012e-09, "loss": 0.2307, "step": 18372 }, { "epoch": 2.9918983837479134, "grad_norm": 0.1533481925725937, "learning_rate": 1.167287079698043e-09, "loss": 0.2723, "step": 18373 }, { "epoch": 2.9920612303057443, "grad_norm": 0.17664524912834167, "learning_rate": 1.1219602387868567e-09, "loss": 0.2613, "step": 18374 }, { "epoch": 2.9922240768635753, "grad_norm": 0.14166153967380524, "learning_rate": 1.0775309324945194e-09, "loss": 0.2455, "step": 18375 }, { "epoch": 2.992386923421406, "grad_norm": 0.18995828926563263, "learning_rate": 1.033999162419752e-09, "loss": 0.2742, "step": 18376 }, { "epoch": 2.992549769979237, "grad_norm": 0.16855424642562866, "learning_rate": 9.913649301224182e-10, "loss": 0.2441, "step": 18377 }, { "epoch": 2.992712616537068, "grad_norm": 0.19544537365436554, "learning_rate": 9.496282371346254e-10, "loss": 0.2475, "step": 18378 }, { "epoch": 2.992875463094899, "grad_norm": 0.20766758918762207, "learning_rate": 9.087890849523995e-10, "loss": 0.3033, "step": 18379 }, { "epoch": 2.9930383096527295, "grad_norm": 0.18584291636943817, "learning_rate": 8.688474750440101e-10, "loss": 0.2656, "step": 18380 }, { "epoch": 2.9932011562105605, "grad_norm": 0.20510053634643555, "learning_rate": 8.298034088471962e-10, "loss": 0.2593, "step": 18381 }, { "epoch": 2.9933640027683914, "grad_norm": 0.14903832972049713, "learning_rate": 7.916568877552877e-10, "loss": 0.3024, "step": 18382 }, { "epoch": 2.9935268493262224, "grad_norm": 0.15596559643745422, "learning_rate": 7.54407913144961e-10, "loss": 0.2733, "step": 18383 }, { "epoch": 2.9936896958840533, "grad_norm": 0.1619231104850769, "learning_rate": 7.180564863512596e-10, "loss": 0.2516, "step": 18384 }, { "epoch": 2.9938525424418843, "grad_norm": 0.15402840077877045, "learning_rate": 6.826026086786952e-10, "loss": 0.2775, "step": 18385 }, { "epoch": 2.994015388999715, "grad_norm": 0.17519018054008484, "learning_rate": 6.480462814012489e-10, "loss": 0.2559, "step": 18386 }, { "epoch": 2.9941782355575457, "grad_norm": 0.15230636298656464, "learning_rate": 6.143875057568193e-10, "loss": 0.2591, "step": 18387 }, { "epoch": 2.9943410821153766, "grad_norm": 0.20089974999427795, "learning_rate": 5.816262829583252e-10, "loss": 0.2904, "step": 18388 }, { "epoch": 2.9945039286732076, "grad_norm": 0.21014490723609924, "learning_rate": 5.497626141798273e-10, "loss": 0.2685, "step": 18389 }, { "epoch": 2.9946667752310385, "grad_norm": 0.22575442492961884, "learning_rate": 5.187965005648554e-10, "loss": 0.3216, "step": 18390 }, { "epoch": 2.9948296217888695, "grad_norm": 0.16874238848686218, "learning_rate": 4.887279432236325e-10, "loss": 0.2796, "step": 18391 }, { "epoch": 2.9949924683467004, "grad_norm": 0.16655504703521729, "learning_rate": 4.5955694323862596e-10, "loss": 0.2266, "step": 18392 }, { "epoch": 2.9951553149045314, "grad_norm": 0.18338875472545624, "learning_rate": 4.312835016562211e-10, "loss": 0.2762, "step": 18393 }, { "epoch": 2.995318161462362, "grad_norm": 0.1966954469680786, "learning_rate": 4.0390761949227194e-10, "loss": 0.2591, "step": 18394 }, { "epoch": 2.9954810080201932, "grad_norm": 0.18893428146839142, "learning_rate": 3.774292977293259e-10, "loss": 0.2981, "step": 18395 }, { "epoch": 2.9956438545780237, "grad_norm": 0.1469869166612625, "learning_rate": 3.518485373166236e-10, "loss": 0.2606, "step": 18396 }, { "epoch": 2.9958067011358547, "grad_norm": 0.1901504397392273, "learning_rate": 3.271653391728746e-10, "loss": 0.2664, "step": 18397 }, { "epoch": 2.9959695476936856, "grad_norm": 0.20473451912403107, "learning_rate": 3.0337970418625737e-10, "loss": 0.2629, "step": 18398 }, { "epoch": 2.9961323942515166, "grad_norm": 0.21719814836978912, "learning_rate": 2.8049163320886805e-10, "loss": 0.2627, "step": 18399 }, { "epoch": 2.9962952408093475, "grad_norm": 0.1757599413394928, "learning_rate": 2.585011270622717e-10, "loss": 0.2697, "step": 18400 }, { "epoch": 2.996458087367178, "grad_norm": 0.1958523392677307, "learning_rate": 2.374081865375022e-10, "loss": 0.2989, "step": 18401 }, { "epoch": 2.9966209339250094, "grad_norm": 0.19104747474193573, "learning_rate": 2.172128123895112e-10, "loss": 0.2754, "step": 18402 }, { "epoch": 2.99678378048284, "grad_norm": 0.1716107428073883, "learning_rate": 1.979150053454948e-10, "loss": 0.2514, "step": 18403 }, { "epoch": 2.996946627040671, "grad_norm": 0.145446315407753, "learning_rate": 1.7951476609934236e-10, "loss": 0.266, "step": 18404 }, { "epoch": 2.9971094735985018, "grad_norm": 0.1603689342737198, "learning_rate": 1.620120953088611e-10, "loss": 0.2854, "step": 18405 }, { "epoch": 2.9972723201563327, "grad_norm": 0.18240191042423248, "learning_rate": 1.454069936041025e-10, "loss": 0.2627, "step": 18406 }, { "epoch": 2.9974351667141637, "grad_norm": 0.18689370155334473, "learning_rate": 1.2969946157903588e-10, "loss": 0.2638, "step": 18407 }, { "epoch": 2.9975980132719946, "grad_norm": 0.17211709916591644, "learning_rate": 1.1488949980265063e-10, "loss": 0.2301, "step": 18408 }, { "epoch": 2.9977608598298255, "grad_norm": 0.158354252576828, "learning_rate": 1.0097710879952704e-10, "loss": 0.2733, "step": 18409 }, { "epoch": 2.997923706387656, "grad_norm": 0.17968913912773132, "learning_rate": 8.796228907759219e-11, "loss": 0.2921, "step": 18410 }, { "epoch": 2.998086552945487, "grad_norm": 0.16669918596744537, "learning_rate": 7.584504109481305e-11, "loss": 0.2624, "step": 18411 }, { "epoch": 2.998249399503318, "grad_norm": 0.20876352488994598, "learning_rate": 6.462536529527885e-11, "loss": 0.285, "step": 18412 }, { "epoch": 2.998412246061149, "grad_norm": 0.18627439439296722, "learning_rate": 5.430326207311875e-11, "loss": 0.2789, "step": 18413 }, { "epoch": 2.99857509261898, "grad_norm": 0.2085745930671692, "learning_rate": 4.4878731805808594e-11, "loss": 0.247, "step": 18414 }, { "epoch": 2.9987379391768108, "grad_norm": 0.22021101415157318, "learning_rate": 3.6351774829190835e-11, "loss": 0.2878, "step": 18415 }, { "epoch": 2.9989007857346417, "grad_norm": 0.1868390440940857, "learning_rate": 2.87223914485768e-11, "loss": 0.2747, "step": 18416 }, { "epoch": 2.999063632292472, "grad_norm": 0.2202432006597519, "learning_rate": 2.199058194152226e-11, "loss": 0.2359, "step": 18417 }, { "epoch": 2.9992264788503036, "grad_norm": 0.17941707372665405, "learning_rate": 1.6156346543949595e-11, "loss": 0.2556, "step": 18418 }, { "epoch": 2.999389325408134, "grad_norm": 0.18112149834632874, "learning_rate": 1.1219685469576747e-11, "loss": 0.2515, "step": 18419 }, { "epoch": 2.999552171965965, "grad_norm": 0.13845261931419373, "learning_rate": 7.180598893263835e-12, "loss": 0.2587, "step": 18420 }, { "epoch": 2.999715018523796, "grad_norm": 0.17315414547920227, "learning_rate": 4.0390869621154125e-12, "loss": 0.2689, "step": 18421 }, { "epoch": 2.999877865081627, "grad_norm": 0.17593155801296234, "learning_rate": 1.7951497871537827e-12, "loss": 0.2432, "step": 18422 }, { "epoch": 3.0, "grad_norm": 0.24592043459415436, "learning_rate": 4.487874516456714e-13, "loss": 0.2463, "step": 18423 }, { "epoch": 3.0, "step": 18423, "total_flos": 1.1894348430401392e+20, "train_loss": 0.3277732414296426, "train_runtime": 225192.7747, "train_samples_per_second": 2.618, "train_steps_per_second": 0.082 } ], "logging_steps": 1, "max_steps": 18423, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1894348430401392e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }