| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.18681840472650563, |
| "eval_steps": 500, |
| "global_step": 12000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 7.784100196937735e-05, |
| "grad_norm": 18.677021026611328, |
| "learning_rate": 1.2453300124533002e-09, |
| "loss": 1.0606, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0001556820039387547, |
| "grad_norm": 8.49953842163086, |
| "learning_rate": 2.801992528019925e-09, |
| "loss": 1.0495, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00023352300590813205, |
| "grad_norm": 4.764372825622559, |
| "learning_rate": 4.358655043586551e-09, |
| "loss": 1.0324, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0003113640078775094, |
| "grad_norm": 6.350065231323242, |
| "learning_rate": 5.915317559153175e-09, |
| "loss": 1.0236, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00038920500984688676, |
| "grad_norm": 21.48935317993164, |
| "learning_rate": 7.471980074719801e-09, |
| "loss": 1.0156, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0004670460118162641, |
| "grad_norm": 11.964753150939941, |
| "learning_rate": 9.028642590286426e-09, |
| "loss": 1.1045, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0005448870137856414, |
| "grad_norm": 16.780696868896484, |
| "learning_rate": 1.0585305105853052e-08, |
| "loss": 0.9926, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0006227280157550188, |
| "grad_norm": 9.311758041381836, |
| "learning_rate": 1.2141967621419675e-08, |
| "loss": 1.0672, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0007005690177243961, |
| "grad_norm": 10.671490669250488, |
| "learning_rate": 1.36986301369863e-08, |
| "loss": 1.1432, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0007784100196937735, |
| "grad_norm": 6.056899547576904, |
| "learning_rate": 1.5255292652552926e-08, |
| "loss": 1.0207, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0008562510216631508, |
| "grad_norm": 12.727471351623535, |
| "learning_rate": 1.6811955168119553e-08, |
| "loss": 1.0626, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0009340920236325282, |
| "grad_norm": 3.2297894954681396, |
| "learning_rate": 1.8368617683686178e-08, |
| "loss": 1.0551, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0010119330256019056, |
| "grad_norm": 5.09151554107666, |
| "learning_rate": 1.9925280199252803e-08, |
| "loss": 1.0051, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0010897740275712829, |
| "grad_norm": 9.829240798950195, |
| "learning_rate": 2.1481942714819424e-08, |
| "loss": 1.0936, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0011676150295406602, |
| "grad_norm": 4.1169023513793945, |
| "learning_rate": 2.3038605230386048e-08, |
| "loss": 0.9367, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0012454560315100377, |
| "grad_norm": 22.784198760986328, |
| "learning_rate": 2.4595267745952676e-08, |
| "loss": 1.1475, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.001323297033479415, |
| "grad_norm": 6.216701507568359, |
| "learning_rate": 2.61519302615193e-08, |
| "loss": 1.1738, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0014011380354487922, |
| "grad_norm": 8.767633438110352, |
| "learning_rate": 2.7708592777085925e-08, |
| "loss": 1.0995, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0014789790374181697, |
| "grad_norm": 6.444150447845459, |
| "learning_rate": 2.926525529265255e-08, |
| "loss": 1.1674, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.001556820039387547, |
| "grad_norm": 21.939842224121094, |
| "learning_rate": 3.082191780821918e-08, |
| "loss": 0.9658, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0016346610413569243, |
| "grad_norm": 6.07455587387085, |
| "learning_rate": 3.23785803237858e-08, |
| "loss": 1.3969, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0017125020433263016, |
| "grad_norm": 7.66893196105957, |
| "learning_rate": 3.3935242839352427e-08, |
| "loss": 1.0274, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0017903430452956791, |
| "grad_norm": 6.411283016204834, |
| "learning_rate": 3.549190535491906e-08, |
| "loss": 1.0922, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0018681840472650564, |
| "grad_norm": 15.535103797912598, |
| "learning_rate": 3.704856787048568e-08, |
| "loss": 1.058, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0019460250492344337, |
| "grad_norm": 13.108068466186523, |
| "learning_rate": 3.860523038605231e-08, |
| "loss": 1.1347, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.002023866051203811, |
| "grad_norm": 5.452599048614502, |
| "learning_rate": 4.016189290161893e-08, |
| "loss": 1.07, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0021017070531731885, |
| "grad_norm": 13.57522964477539, |
| "learning_rate": 4.1718555417185556e-08, |
| "loss": 1.0556, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.0021795480551425658, |
| "grad_norm": 4.844541072845459, |
| "learning_rate": 4.3275217932752174e-08, |
| "loss": 1.0538, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.002257389057111943, |
| "grad_norm": 7.6000800132751465, |
| "learning_rate": 4.48318804483188e-08, |
| "loss": 1.0884, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.0023352300590813203, |
| "grad_norm": 6.445258617401123, |
| "learning_rate": 4.638854296388542e-08, |
| "loss": 1.0247, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.002413071061050698, |
| "grad_norm": 4.861091136932373, |
| "learning_rate": 4.794520547945205e-08, |
| "loss": 1.0553, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0024909120630200753, |
| "grad_norm": 6.040435314178467, |
| "learning_rate": 4.950186799501867e-08, |
| "loss": 1.0682, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0025687530649894526, |
| "grad_norm": 10.561899185180664, |
| "learning_rate": 5.10585305105853e-08, |
| "loss": 0.9904, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.00264659406695883, |
| "grad_norm": 8.238300323486328, |
| "learning_rate": 5.261519302615193e-08, |
| "loss": 1.0576, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.002724435068928207, |
| "grad_norm": 10.821751594543457, |
| "learning_rate": 5.417185554171855e-08, |
| "loss": 1.0507, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0028022760708975845, |
| "grad_norm": 20.215164184570312, |
| "learning_rate": 5.5728518057285177e-08, |
| "loss": 1.2059, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0028801170728669618, |
| "grad_norm": 15.447042465209961, |
| "learning_rate": 5.72851805728518e-08, |
| "loss": 1.1047, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.0029579580748363395, |
| "grad_norm": 13.472341537475586, |
| "learning_rate": 5.8841843088418426e-08, |
| "loss": 1.0508, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.003035799076805717, |
| "grad_norm": 16.09784507751465, |
| "learning_rate": 6.039850560398505e-08, |
| "loss": 1.1144, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.003113640078775094, |
| "grad_norm": 5.519948959350586, |
| "learning_rate": 6.195516811955167e-08, |
| "loss": 1.1127, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0031914810807444714, |
| "grad_norm": 9.467545509338379, |
| "learning_rate": 6.351183063511831e-08, |
| "loss": 1.0882, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.0032693220827138486, |
| "grad_norm": 8.895452499389648, |
| "learning_rate": 6.506849315068492e-08, |
| "loss": 1.0845, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.003347163084683226, |
| "grad_norm": 10.007709503173828, |
| "learning_rate": 6.662515566625156e-08, |
| "loss": 1.1585, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0034250040866526032, |
| "grad_norm": 10.499605178833008, |
| "learning_rate": 6.818181818181817e-08, |
| "loss": 1.103, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.003502845088621981, |
| "grad_norm": 5.367983818054199, |
| "learning_rate": 6.973848069738481e-08, |
| "loss": 1.1586, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.0035806860905913582, |
| "grad_norm": 19.27895164489746, |
| "learning_rate": 7.129514321295142e-08, |
| "loss": 1.0898, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0036585270925607355, |
| "grad_norm": 3.7263176441192627, |
| "learning_rate": 7.285180572851806e-08, |
| "loss": 1.0541, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.003736368094530113, |
| "grad_norm": 21.48790740966797, |
| "learning_rate": 7.440846824408468e-08, |
| "loss": 1.2173, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.00381420909649949, |
| "grad_norm": 5.5661702156066895, |
| "learning_rate": 7.596513075965131e-08, |
| "loss": 1.1816, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.0038920500984688674, |
| "grad_norm": 13.601526260375977, |
| "learning_rate": 7.752179327521793e-08, |
| "loss": 1.0989, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.003969891100438245, |
| "grad_norm": 9.873005867004395, |
| "learning_rate": 7.907845579078456e-08, |
| "loss": 1.1859, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.004047732102407622, |
| "grad_norm": 4.8417277336120605, |
| "learning_rate": 8.063511830635118e-08, |
| "loss": 0.9859, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.004125573104377, |
| "grad_norm": 3.8291945457458496, |
| "learning_rate": 8.21917808219178e-08, |
| "loss": 0.9709, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.004203414106346377, |
| "grad_norm": 5.504295349121094, |
| "learning_rate": 8.374844333748443e-08, |
| "loss": 1.1271, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.004281255108315754, |
| "grad_norm": 10.665711402893066, |
| "learning_rate": 8.530510585305104e-08, |
| "loss": 1.1773, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.0043590961102851315, |
| "grad_norm": 8.259835243225098, |
| "learning_rate": 8.686176836861768e-08, |
| "loss": 1.1428, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.004436937112254509, |
| "grad_norm": 15.531925201416016, |
| "learning_rate": 8.84184308841843e-08, |
| "loss": 1.1423, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.004514778114223886, |
| "grad_norm": 17.920616149902344, |
| "learning_rate": 8.997509339975093e-08, |
| "loss": 1.3731, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.004592619116193263, |
| "grad_norm": 5.740132808685303, |
| "learning_rate": 9.153175591531755e-08, |
| "loss": 0.9373, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.004670460118162641, |
| "grad_norm": 6.698586463928223, |
| "learning_rate": 9.308841843088418e-08, |
| "loss": 1.0603, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.004748301120132018, |
| "grad_norm": 4.851785182952881, |
| "learning_rate": 9.46450809464508e-08, |
| "loss": 1.004, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.004826142122101396, |
| "grad_norm": 7.876951217651367, |
| "learning_rate": 9.620174346201743e-08, |
| "loss": 1.1607, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.004903983124070773, |
| "grad_norm": 9.093779563903809, |
| "learning_rate": 9.775840597758405e-08, |
| "loss": 1.1045, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.004981824126040151, |
| "grad_norm": 16.582103729248047, |
| "learning_rate": 9.931506849315068e-08, |
| "loss": 1.1154, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.005059665128009528, |
| "grad_norm": 13.140198707580566, |
| "learning_rate": 1.008717310087173e-07, |
| "loss": 1.17, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.005137506129978905, |
| "grad_norm": 3.4895646572113037, |
| "learning_rate": 1.0242839352428394e-07, |
| "loss": 0.9535, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0052153471319482825, |
| "grad_norm": 6.645687103271484, |
| "learning_rate": 1.0398505603985055e-07, |
| "loss": 1.05, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.00529318813391766, |
| "grad_norm": 7.615957736968994, |
| "learning_rate": 1.0554171855541719e-07, |
| "loss": 0.9174, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.005371029135887037, |
| "grad_norm": 8.536812782287598, |
| "learning_rate": 1.070983810709838e-07, |
| "loss": 1.1338, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.005448870137856414, |
| "grad_norm": 4.573184967041016, |
| "learning_rate": 1.0865504358655044e-07, |
| "loss": 1.2018, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.005526711139825792, |
| "grad_norm": 11.614198684692383, |
| "learning_rate": 1.1021170610211705e-07, |
| "loss": 1.1853, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.005604552141795169, |
| "grad_norm": 12.930988311767578, |
| "learning_rate": 1.1176836861768369e-07, |
| "loss": 1.1772, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.005682393143764546, |
| "grad_norm": 5.334465980529785, |
| "learning_rate": 1.133250311332503e-07, |
| "loss": 1.1402, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.0057602341457339236, |
| "grad_norm": 19.55135726928711, |
| "learning_rate": 1.1488169364881693e-07, |
| "loss": 0.9569, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.005838075147703301, |
| "grad_norm": 14.209831237792969, |
| "learning_rate": 1.1643835616438355e-07, |
| "loss": 1.1239, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.005915916149672679, |
| "grad_norm": 5.5656352043151855, |
| "learning_rate": 1.1799501867995018e-07, |
| "loss": 1.1074, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.005993757151642056, |
| "grad_norm": 10.571775436401367, |
| "learning_rate": 1.1955168119551682e-07, |
| "loss": 1.1663, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.006071598153611434, |
| "grad_norm": 5.807967662811279, |
| "learning_rate": 1.2110834371108342e-07, |
| "loss": 1.1868, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.006149439155580811, |
| "grad_norm": 7.003355503082275, |
| "learning_rate": 1.2266500622665007e-07, |
| "loss": 1.0249, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.006227280157550188, |
| "grad_norm": 14.337294578552246, |
| "learning_rate": 1.2422166874221667e-07, |
| "loss": 1.0405, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.006305121159519565, |
| "grad_norm": 12.388212203979492, |
| "learning_rate": 1.2577833125778332e-07, |
| "loss": 1.1801, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.006382962161488943, |
| "grad_norm": 11.25795841217041, |
| "learning_rate": 1.2733499377334994e-07, |
| "loss": 1.1672, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.00646080316345832, |
| "grad_norm": 15.970906257629395, |
| "learning_rate": 1.2889165628891654e-07, |
| "loss": 1.0815, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.006538644165427697, |
| "grad_norm": 16.4951114654541, |
| "learning_rate": 1.3044831880448317e-07, |
| "loss": 1.039, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.006616485167397075, |
| "grad_norm": 16.199981689453125, |
| "learning_rate": 1.3200498132004982e-07, |
| "loss": 1.1636, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.006694326169366452, |
| "grad_norm": 7.787930965423584, |
| "learning_rate": 1.3356164383561644e-07, |
| "loss": 1.0949, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.006772167171335829, |
| "grad_norm": 4.226932525634766, |
| "learning_rate": 1.3511830635118307e-07, |
| "loss": 1.0409, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.0068500081733052064, |
| "grad_norm": 19.068387985229492, |
| "learning_rate": 1.3667496886674967e-07, |
| "loss": 0.9881, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.006927849175274584, |
| "grad_norm": 3.8829450607299805, |
| "learning_rate": 1.3823163138231632e-07, |
| "loss": 0.9989, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.007005690177243962, |
| "grad_norm": 5.948785305023193, |
| "learning_rate": 1.3978829389788294e-07, |
| "loss": 1.007, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.007083531179213339, |
| "grad_norm": 5.125, |
| "learning_rate": 1.4134495641344957e-07, |
| "loss": 0.8301, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.0071613721811827164, |
| "grad_norm": 12.499361038208008, |
| "learning_rate": 1.4290161892901616e-07, |
| "loss": 1.0923, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.007239213183152094, |
| "grad_norm": 6.266834735870361, |
| "learning_rate": 1.4445828144458281e-07, |
| "loss": 0.88, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.007317054185121471, |
| "grad_norm": 9.417441368103027, |
| "learning_rate": 1.4601494396014944e-07, |
| "loss": 0.9669, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.007394895187090848, |
| "grad_norm": 9.376644134521484, |
| "learning_rate": 1.4757160647571606e-07, |
| "loss": 1.0241, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.007472736189060226, |
| "grad_norm": 10.515301704406738, |
| "learning_rate": 1.491282689912827e-07, |
| "loss": 1.0468, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.007550577191029603, |
| "grad_norm": 8.439921379089355, |
| "learning_rate": 1.506849315068493e-07, |
| "loss": 0.9356, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.00762841819299898, |
| "grad_norm": 8.198512077331543, |
| "learning_rate": 1.5224159402241594e-07, |
| "loss": 1.1591, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0077062591949683575, |
| "grad_norm": 6.289046287536621, |
| "learning_rate": 1.5379825653798256e-07, |
| "loss": 1.0259, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.007784100196937735, |
| "grad_norm": 18.078012466430664, |
| "learning_rate": 1.5535491905354919e-07, |
| "loss": 1.1059, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.007861941198907112, |
| "grad_norm": 8.508400917053223, |
| "learning_rate": 1.569115815691158e-07, |
| "loss": 1.001, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.00793978220087649, |
| "grad_norm": 6.552981853485107, |
| "learning_rate": 1.5846824408468243e-07, |
| "loss": 1.0819, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.008017623202845867, |
| "grad_norm": 5.941412925720215, |
| "learning_rate": 1.6002490660024906e-07, |
| "loss": 1.0143, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.008095464204815245, |
| "grad_norm": 10.764496803283691, |
| "learning_rate": 1.6158156911581568e-07, |
| "loss": 1.0228, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.008173305206784621, |
| "grad_norm": 5.186371326446533, |
| "learning_rate": 1.6313823163138233e-07, |
| "loss": 0.9794, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.008251146208754, |
| "grad_norm": 11.401899337768555, |
| "learning_rate": 1.6469489414694893e-07, |
| "loss": 1.0773, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.008328987210723376, |
| "grad_norm": 5.4313788414001465, |
| "learning_rate": 1.6625155666251556e-07, |
| "loss": 0.9984, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.008406828212692754, |
| "grad_norm": 7.18859338760376, |
| "learning_rate": 1.6780821917808218e-07, |
| "loss": 1.0894, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.00848466921466213, |
| "grad_norm": 5.814337253570557, |
| "learning_rate": 1.6936488169364883e-07, |
| "loss": 0.983, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.008562510216631508, |
| "grad_norm": 11.842198371887207, |
| "learning_rate": 1.7092154420921543e-07, |
| "loss": 0.994, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.008640351218600887, |
| "grad_norm": 10.12619400024414, |
| "learning_rate": 1.7247820672478206e-07, |
| "loss": 1.015, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.008718192220570263, |
| "grad_norm": 7.895757675170898, |
| "learning_rate": 1.7403486924034868e-07, |
| "loss": 1.1194, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.008796033222539641, |
| "grad_norm": 5.340054512023926, |
| "learning_rate": 1.755915317559153e-07, |
| "loss": 1.0283, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.008873874224509018, |
| "grad_norm": 13.950590133666992, |
| "learning_rate": 1.7714819427148193e-07, |
| "loss": 1.155, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.008951715226478396, |
| "grad_norm": 10.90434741973877, |
| "learning_rate": 1.7870485678704855e-07, |
| "loss": 1.0135, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.009029556228447772, |
| "grad_norm": 4.94070291519165, |
| "learning_rate": 1.8026151930261518e-07, |
| "loss": 0.9843, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.00910739723041715, |
| "grad_norm": 9.50981616973877, |
| "learning_rate": 1.818181818181818e-07, |
| "loss": 1.0777, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.009185238232386527, |
| "grad_norm": 9.218316078186035, |
| "learning_rate": 1.8337484433374845e-07, |
| "loss": 1.1192, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.009263079234355905, |
| "grad_norm": 17.782791137695312, |
| "learning_rate": 1.8493150684931505e-07, |
| "loss": 1.0513, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.009340920236325281, |
| "grad_norm": 5.774691581726074, |
| "learning_rate": 1.8648816936488168e-07, |
| "loss": 0.872, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.00941876123829466, |
| "grad_norm": 6.310098171234131, |
| "learning_rate": 1.880448318804483e-07, |
| "loss": 0.9395, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.009496602240264036, |
| "grad_norm": 6.68503999710083, |
| "learning_rate": 1.8960149439601495e-07, |
| "loss": 1.0394, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.009574443242233414, |
| "grad_norm": 6.972198486328125, |
| "learning_rate": 1.9115815691158155e-07, |
| "loss": 1.066, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.009652284244202792, |
| "grad_norm": 6.581061363220215, |
| "learning_rate": 1.9271481942714817e-07, |
| "loss": 0.9428, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.009730125246172169, |
| "grad_norm": 10.010781288146973, |
| "learning_rate": 1.942714819427148e-07, |
| "loss": 1.097, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.009807966248141547, |
| "grad_norm": 10.270834922790527, |
| "learning_rate": 1.9582814445828145e-07, |
| "loss": 1.0373, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.009885807250110923, |
| "grad_norm": 7.189127445220947, |
| "learning_rate": 1.9738480697384807e-07, |
| "loss": 1.2356, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.009963648252080301, |
| "grad_norm": 11.2526216506958, |
| "learning_rate": 1.9894146948941467e-07, |
| "loss": 0.8422, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.010041489254049678, |
| "grad_norm": 5.1716203689575195, |
| "learning_rate": 2.004981320049813e-07, |
| "loss": 1.0132, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.010119330256019056, |
| "grad_norm": 4.592648983001709, |
| "learning_rate": 2.0205479452054795e-07, |
| "loss": 1.0677, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.010197171257988432, |
| "grad_norm": 4.74710750579834, |
| "learning_rate": 2.0361145703611457e-07, |
| "loss": 0.9573, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.01027501225995781, |
| "grad_norm": 5.075165748596191, |
| "learning_rate": 2.0516811955168117e-07, |
| "loss": 0.919, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.010352853261927187, |
| "grad_norm": 5.705000400543213, |
| "learning_rate": 2.067247820672478e-07, |
| "loss": 0.9722, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.010430694263896565, |
| "grad_norm": 8.337606430053711, |
| "learning_rate": 2.0828144458281445e-07, |
| "loss": 1.0511, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.010508535265865942, |
| "grad_norm": 7.93868350982666, |
| "learning_rate": 2.0983810709838107e-07, |
| "loss": 1.0511, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.01058637626783532, |
| "grad_norm": 7.5352325439453125, |
| "learning_rate": 2.113947696139477e-07, |
| "loss": 1.0529, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.010664217269804696, |
| "grad_norm": 12.067502975463867, |
| "learning_rate": 2.129514321295143e-07, |
| "loss": 0.9818, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.010742058271774074, |
| "grad_norm": 4.793339729309082, |
| "learning_rate": 2.1450809464508094e-07, |
| "loss": 0.9757, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.010819899273743452, |
| "grad_norm": 5.648492336273193, |
| "learning_rate": 2.1606475716064757e-07, |
| "loss": 0.9838, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.010897740275712829, |
| "grad_norm": 14.50791072845459, |
| "learning_rate": 2.176214196762142e-07, |
| "loss": 1.025, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.010975581277682207, |
| "grad_norm": 6.976552486419678, |
| "learning_rate": 2.191780821917808e-07, |
| "loss": 1.0636, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.011053422279651583, |
| "grad_norm": 8.440703392028809, |
| "learning_rate": 2.2073474470734744e-07, |
| "loss": 0.8979, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.011131263281620962, |
| "grad_norm": 17.822824478149414, |
| "learning_rate": 2.2229140722291407e-07, |
| "loss": 1.126, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.011209104283590338, |
| "grad_norm": 3.6384825706481934, |
| "learning_rate": 2.238480697384807e-07, |
| "loss": 0.9756, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.011286945285559716, |
| "grad_norm": 9.758706092834473, |
| "learning_rate": 2.2540473225404732e-07, |
| "loss": 1.0776, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.011364786287529093, |
| "grad_norm": 6.821314334869385, |
| "learning_rate": 2.2696139476961394e-07, |
| "loss": 1.0809, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.01144262728949847, |
| "grad_norm": 5.796785831451416, |
| "learning_rate": 2.2851805728518056e-07, |
| "loss": 1.0388, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.011520468291467847, |
| "grad_norm": 14.487456321716309, |
| "learning_rate": 2.300747198007472e-07, |
| "loss": 0.9906, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.011598309293437225, |
| "grad_norm": 5.587100505828857, |
| "learning_rate": 2.3163138231631381e-07, |
| "loss": 0.955, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.011676150295406602, |
| "grad_norm": 5.029387474060059, |
| "learning_rate": 2.3318804483188044e-07, |
| "loss": 0.9509, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.01175399129737598, |
| "grad_norm": 16.782621383666992, |
| "learning_rate": 2.3474470734744706e-07, |
| "loss": 1.0278, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.011831832299345358, |
| "grad_norm": 8.211995124816895, |
| "learning_rate": 2.363013698630137e-07, |
| "loss": 1.1128, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.011909673301314734, |
| "grad_norm": 8.179312705993652, |
| "learning_rate": 2.378580323785803e-07, |
| "loss": 0.9175, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.011987514303284113, |
| "grad_norm": 3.8183233737945557, |
| "learning_rate": 2.3941469489414696e-07, |
| "loss": 1.0116, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.012065355305253489, |
| "grad_norm": 11.87375545501709, |
| "learning_rate": 2.4097135740971356e-07, |
| "loss": 0.975, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.012143196307222867, |
| "grad_norm": 20.000045776367188, |
| "learning_rate": 2.425280199252802e-07, |
| "loss": 1.0561, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.012221037309192244, |
| "grad_norm": 4.025638103485107, |
| "learning_rate": 2.440846824408468e-07, |
| "loss": 0.977, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.012298878311161622, |
| "grad_norm": 8.214958190917969, |
| "learning_rate": 2.4564134495641346e-07, |
| "loss": 0.9728, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.012376719313130998, |
| "grad_norm": 13.562061309814453, |
| "learning_rate": 2.4719800747198006e-07, |
| "loss": 0.8595, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.012454560315100376, |
| "grad_norm": 4.473455905914307, |
| "learning_rate": 2.4875466998754666e-07, |
| "loss": 0.8972, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.012532401317069753, |
| "grad_norm": 5.311202049255371, |
| "learning_rate": 2.503113325031133e-07, |
| "loss": 0.8578, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.01261024231903913, |
| "grad_norm": 11.063155174255371, |
| "learning_rate": 2.5186799501867996e-07, |
| "loss": 0.9938, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.012688083321008507, |
| "grad_norm": 7.260047435760498, |
| "learning_rate": 2.5342465753424656e-07, |
| "loss": 0.9745, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.012765924322977885, |
| "grad_norm": 5.3101067543029785, |
| "learning_rate": 2.549813200498132e-07, |
| "loss": 1.0001, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.012843765324947264, |
| "grad_norm": 4.430516719818115, |
| "learning_rate": 2.5653798256537986e-07, |
| "loss": 0.9283, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.01292160632691664, |
| "grad_norm": 4.081624507904053, |
| "learning_rate": 2.580946450809464e-07, |
| "loss": 0.9694, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.012999447328886018, |
| "grad_norm": 6.339404106140137, |
| "learning_rate": 2.5965130759651306e-07, |
| "loss": 1.0485, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.013077288330855395, |
| "grad_norm": 8.773398399353027, |
| "learning_rate": 2.6120797011207965e-07, |
| "loss": 0.8494, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.013155129332824773, |
| "grad_norm": 9.235841751098633, |
| "learning_rate": 2.627646326276463e-07, |
| "loss": 0.9223, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.01323297033479415, |
| "grad_norm": 5.350943565368652, |
| "learning_rate": 2.6432129514321296e-07, |
| "loss": 1.0852, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.013310811336763527, |
| "grad_norm": 5.6170268058776855, |
| "learning_rate": 2.6587795765877955e-07, |
| "loss": 0.9529, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.013388652338732904, |
| "grad_norm": 6.033858776092529, |
| "learning_rate": 2.674346201743462e-07, |
| "loss": 0.9264, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.013466493340702282, |
| "grad_norm": 10.408087730407715, |
| "learning_rate": 2.6899128268991286e-07, |
| "loss": 0.9435, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.013544334342671658, |
| "grad_norm": 3.902411460876465, |
| "learning_rate": 2.7054794520547945e-07, |
| "loss": 1.0717, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.013622175344641036, |
| "grad_norm": 6.315438270568848, |
| "learning_rate": 2.7210460772104605e-07, |
| "loss": 1.0222, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.013700016346610413, |
| "grad_norm": 9.6283540725708, |
| "learning_rate": 2.7366127023661265e-07, |
| "loss": 0.9607, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.013777857348579791, |
| "grad_norm": 8.017468452453613, |
| "learning_rate": 2.752179327521793e-07, |
| "loss": 0.908, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.013855698350549167, |
| "grad_norm": 4.06109094619751, |
| "learning_rate": 2.7677459526774595e-07, |
| "loss": 0.9779, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.013933539352518546, |
| "grad_norm": 4.540249347686768, |
| "learning_rate": 2.7833125778331255e-07, |
| "loss": 1.0685, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.014011380354487924, |
| "grad_norm": 5.971028804779053, |
| "learning_rate": 2.798879202988792e-07, |
| "loss": 1.0487, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0140892213564573, |
| "grad_norm": 7.365455150604248, |
| "learning_rate": 2.8144458281444585e-07, |
| "loss": 1.0255, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.014167062358426678, |
| "grad_norm": 5.49646520614624, |
| "learning_rate": 2.8300124533001245e-07, |
| "loss": 1.0066, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.014244903360396055, |
| "grad_norm": 5.0211615562438965, |
| "learning_rate": 2.845579078455791e-07, |
| "loss": 0.9018, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.014322744362365433, |
| "grad_norm": 3.7670419216156006, |
| "learning_rate": 2.8611457036114565e-07, |
| "loss": 1.0793, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.01440058536433481, |
| "grad_norm": 10.098974227905273, |
| "learning_rate": 2.876712328767123e-07, |
| "loss": 1.0537, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.014478426366304187, |
| "grad_norm": 8.83332633972168, |
| "learning_rate": 2.8922789539227895e-07, |
| "loss": 0.9274, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.014556267368273564, |
| "grad_norm": 13.259550094604492, |
| "learning_rate": 2.9078455790784555e-07, |
| "loss": 0.9684, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.014634108370242942, |
| "grad_norm": 9.241827964782715, |
| "learning_rate": 2.923412204234122e-07, |
| "loss": 1.031, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.014711949372212318, |
| "grad_norm": 7.292890548706055, |
| "learning_rate": 2.9389788293897885e-07, |
| "loss": 0.9006, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.014789790374181697, |
| "grad_norm": 4.794684886932373, |
| "learning_rate": 2.9545454545454545e-07, |
| "loss": 1.0308, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.014867631376151073, |
| "grad_norm": 4.3201518058776855, |
| "learning_rate": 2.970112079701121e-07, |
| "loss": 0.9706, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.014945472378120451, |
| "grad_norm": 3.5388669967651367, |
| "learning_rate": 2.985678704856787e-07, |
| "loss": 0.9782, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.01502331338008983, |
| "grad_norm": 10.980652809143066, |
| "learning_rate": 3.001245330012453e-07, |
| "loss": 1.1015, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.015101154382059206, |
| "grad_norm": 7.639592170715332, |
| "learning_rate": 3.0168119551681194e-07, |
| "loss": 0.9046, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.015178995384028584, |
| "grad_norm": 5.50681734085083, |
| "learning_rate": 3.0323785803237854e-07, |
| "loss": 1.0192, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.01525683638599796, |
| "grad_norm": 4.924655437469482, |
| "learning_rate": 3.047945205479452e-07, |
| "loss": 1.0545, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.015334677387967338, |
| "grad_norm": 6.294414043426514, |
| "learning_rate": 3.0635118306351184e-07, |
| "loss": 0.9807, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.015412518389936715, |
| "grad_norm": 4.609034538269043, |
| "learning_rate": 3.0790784557907844e-07, |
| "loss": 1.0205, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.015490359391906093, |
| "grad_norm": 3.4544599056243896, |
| "learning_rate": 3.094645080946451e-07, |
| "loss": 0.9761, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.01556820039387547, |
| "grad_norm": 5.186591148376465, |
| "learning_rate": 3.110211706102117e-07, |
| "loss": 0.9933, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.015646041395844846, |
| "grad_norm": 4.516424179077148, |
| "learning_rate": 3.125778331257783e-07, |
| "loss": 0.9057, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.015723882397814224, |
| "grad_norm": 4.458924293518066, |
| "learning_rate": 3.1413449564134494e-07, |
| "loss": 0.9526, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.015801723399783602, |
| "grad_norm": 5.840490341186523, |
| "learning_rate": 3.1569115815691154e-07, |
| "loss": 1.0032, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.01587956440175298, |
| "grad_norm": 13.803277015686035, |
| "learning_rate": 3.172478206724782e-07, |
| "loss": 1.0162, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.015957405403722355, |
| "grad_norm": 3.742831230163574, |
| "learning_rate": 3.1880448318804484e-07, |
| "loss": 0.9714, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.016035246405691733, |
| "grad_norm": 5.748800277709961, |
| "learning_rate": 3.2036114570361144e-07, |
| "loss": 0.9545, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.01611308740766111, |
| "grad_norm": 4.5021491050720215, |
| "learning_rate": 3.219178082191781e-07, |
| "loss": 0.9503, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.01619092840963049, |
| "grad_norm": 6.095613956451416, |
| "learning_rate": 3.234744707347447e-07, |
| "loss": 0.991, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.016268769411599868, |
| "grad_norm": 4.993571758270264, |
| "learning_rate": 3.2503113325031134e-07, |
| "loss": 0.9221, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.016346610413569242, |
| "grad_norm": 5.949316501617432, |
| "learning_rate": 3.2658779576587794e-07, |
| "loss": 0.9897, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.01642445141553862, |
| "grad_norm": 5.225283622741699, |
| "learning_rate": 3.2814445828144453e-07, |
| "loss": 0.9719, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.016502292417508, |
| "grad_norm": 15.378800392150879, |
| "learning_rate": 3.297011207970112e-07, |
| "loss": 0.8633, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.016580133419477377, |
| "grad_norm": 4.347599506378174, |
| "learning_rate": 3.312577833125778e-07, |
| "loss": 0.9054, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.01665797442144675, |
| "grad_norm": 5.208911895751953, |
| "learning_rate": 3.3281444582814443e-07, |
| "loss": 0.899, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.01673581542341613, |
| "grad_norm": 6.316863059997559, |
| "learning_rate": 3.343711083437111e-07, |
| "loss": 1.0165, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.016813656425385508, |
| "grad_norm": 5.477814197540283, |
| "learning_rate": 3.359277708592777e-07, |
| "loss": 0.96, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.016891497427354886, |
| "grad_norm": 4.848371505737305, |
| "learning_rate": 3.3748443337484433e-07, |
| "loss": 0.9703, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.01696933842932426, |
| "grad_norm": 9.025872230529785, |
| "learning_rate": 3.39041095890411e-07, |
| "loss": 0.9239, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.01704717943129364, |
| "grad_norm": 3.3916220664978027, |
| "learning_rate": 3.4059775840597753e-07, |
| "loss": 1.0136, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.017125020433263017, |
| "grad_norm": 9.25607967376709, |
| "learning_rate": 3.421544209215442e-07, |
| "loss": 0.9626, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.017202861435232395, |
| "grad_norm": 7.245452880859375, |
| "learning_rate": 3.437110834371108e-07, |
| "loss": 1.0026, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.017280702437201773, |
| "grad_norm": 3.3463306427001953, |
| "learning_rate": 3.4526774595267743e-07, |
| "loss": 0.9243, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.017358543439171148, |
| "grad_norm": 5.334697723388672, |
| "learning_rate": 3.468244084682441e-07, |
| "loss": 1.1001, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.017436384441140526, |
| "grad_norm": 4.7469305992126465, |
| "learning_rate": 3.483810709838107e-07, |
| "loss": 1.0421, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.017514225443109904, |
| "grad_norm": 4.398116111755371, |
| "learning_rate": 3.4993773349937733e-07, |
| "loss": 0.9502, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.017592066445079282, |
| "grad_norm": 3.972031831741333, |
| "learning_rate": 3.51494396014944e-07, |
| "loss": 0.8974, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.017669907447048657, |
| "grad_norm": 5.13526725769043, |
| "learning_rate": 3.530510585305106e-07, |
| "loss": 0.9668, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.017747748449018035, |
| "grad_norm": 3.752171754837036, |
| "learning_rate": 3.546077210460772e-07, |
| "loss": 1.0589, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.017825589450987413, |
| "grad_norm": 6.005197048187256, |
| "learning_rate": 3.561643835616438e-07, |
| "loss": 0.9786, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.01790343045295679, |
| "grad_norm": 5.12382173538208, |
| "learning_rate": 3.5772104607721043e-07, |
| "loss": 0.936, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.017981271454926166, |
| "grad_norm": 7.456275939941406, |
| "learning_rate": 3.592777085927771e-07, |
| "loss": 0.9261, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.018059112456895544, |
| "grad_norm": 3.7287797927856445, |
| "learning_rate": 3.608343711083437e-07, |
| "loss": 0.9291, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.018136953458864923, |
| "grad_norm": 3.916651725769043, |
| "learning_rate": 3.6239103362391033e-07, |
| "loss": 0.9193, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.0182147944608343, |
| "grad_norm": 4.2813720703125, |
| "learning_rate": 3.63947696139477e-07, |
| "loss": 1.0079, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.01829263546280368, |
| "grad_norm": 8.352608680725098, |
| "learning_rate": 3.655043586550436e-07, |
| "loss": 0.9901, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.018370476464773054, |
| "grad_norm": 5.297429084777832, |
| "learning_rate": 3.6706102117061023e-07, |
| "loss": 0.9049, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.01844831746674243, |
| "grad_norm": 4.064713478088379, |
| "learning_rate": 3.6861768368617677e-07, |
| "loss": 1.1307, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.01852615846871181, |
| "grad_norm": 6.08450174331665, |
| "learning_rate": 3.701743462017434e-07, |
| "loss": 1.0267, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.018603999470681188, |
| "grad_norm": 4.351869106292725, |
| "learning_rate": 3.717310087173101e-07, |
| "loss": 1.0315, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.018681840472650563, |
| "grad_norm": 7.120603084564209, |
| "learning_rate": 3.7328767123287667e-07, |
| "loss": 1.0092, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.01875968147461994, |
| "grad_norm": 4.8134660720825195, |
| "learning_rate": 3.748443337484433e-07, |
| "loss": 0.9491, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.01883752247658932, |
| "grad_norm": 5.852837085723877, |
| "learning_rate": 3.7640099626401e-07, |
| "loss": 1.0029, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.018915363478558697, |
| "grad_norm": 5.291375160217285, |
| "learning_rate": 3.7795765877957657e-07, |
| "loss": 0.8198, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.018993204480528072, |
| "grad_norm": 3.2667717933654785, |
| "learning_rate": 3.795143212951432e-07, |
| "loss": 0.9155, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.01907104548249745, |
| "grad_norm": 4.952467918395996, |
| "learning_rate": 3.810709838107098e-07, |
| "loss": 0.9144, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.019148886484466828, |
| "grad_norm": 4.495504379272461, |
| "learning_rate": 3.826276463262764e-07, |
| "loss": 0.9236, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.019226727486436206, |
| "grad_norm": 5.554149627685547, |
| "learning_rate": 3.8418430884184307e-07, |
| "loss": 0.7856, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.019304568488405584, |
| "grad_norm": 6.092937469482422, |
| "learning_rate": 3.8574097135740967e-07, |
| "loss": 0.8681, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.01938240949037496, |
| "grad_norm": 3.9643170833587646, |
| "learning_rate": 3.872976338729763e-07, |
| "loss": 0.836, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.019460250492344337, |
| "grad_norm": 3.9617724418640137, |
| "learning_rate": 3.8885429638854297e-07, |
| "loss": 1.015, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.019538091494313715, |
| "grad_norm": 8.572834014892578, |
| "learning_rate": 3.9041095890410957e-07, |
| "loss": 0.9308, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.019615932496283094, |
| "grad_norm": 6.380552291870117, |
| "learning_rate": 3.919676214196762e-07, |
| "loss": 0.9056, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.01969377349825247, |
| "grad_norm": 5.703736782073975, |
| "learning_rate": 3.935242839352428e-07, |
| "loss": 0.9864, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.019771614500221846, |
| "grad_norm": 4.2661824226379395, |
| "learning_rate": 3.9508094645080947e-07, |
| "loss": 0.8801, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.019849455502191225, |
| "grad_norm": 3.4654171466827393, |
| "learning_rate": 3.9663760896637607e-07, |
| "loss": 0.8954, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.019927296504160603, |
| "grad_norm": 5.910457611083984, |
| "learning_rate": 3.9819427148194266e-07, |
| "loss": 0.8456, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.020005137506129977, |
| "grad_norm": 6.345880031585693, |
| "learning_rate": 3.997509339975093e-07, |
| "loss": 0.9825, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.020082978508099356, |
| "grad_norm": 11.178544044494629, |
| "learning_rate": 4.0130759651307597e-07, |
| "loss": 0.9716, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.020160819510068734, |
| "grad_norm": 3.9438936710357666, |
| "learning_rate": 4.0286425902864256e-07, |
| "loss": 0.9629, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.020238660512038112, |
| "grad_norm": 7.4510273933410645, |
| "learning_rate": 4.044209215442092e-07, |
| "loss": 0.9815, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.020316501514007487, |
| "grad_norm": 6.15594482421875, |
| "learning_rate": 4.059775840597758e-07, |
| "loss": 0.9805, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.020394342515976865, |
| "grad_norm": 5.105663776397705, |
| "learning_rate": 4.0753424657534246e-07, |
| "loss": 1.0047, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.020472183517946243, |
| "grad_norm": 4.2579779624938965, |
| "learning_rate": 4.090909090909091e-07, |
| "loss": 0.797, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.02055002451991562, |
| "grad_norm": 3.6263747215270996, |
| "learning_rate": 4.1064757160647566e-07, |
| "loss": 0.9526, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.020627865521885, |
| "grad_norm": 4.003891944885254, |
| "learning_rate": 4.122042341220423e-07, |
| "loss": 0.862, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.020705706523854374, |
| "grad_norm": 4.833682060241699, |
| "learning_rate": 4.137608966376089e-07, |
| "loss": 1.0438, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.020783547525823752, |
| "grad_norm": 8.875425338745117, |
| "learning_rate": 4.1531755915317556e-07, |
| "loss": 1.0013, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.02086138852779313, |
| "grad_norm": 5.356649398803711, |
| "learning_rate": 4.168742216687422e-07, |
| "loss": 0.8956, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.02093922952976251, |
| "grad_norm": 5.640366554260254, |
| "learning_rate": 4.184308841843088e-07, |
| "loss": 0.8346, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.021017070531731883, |
| "grad_norm": 3.717663288116455, |
| "learning_rate": 4.1998754669987546e-07, |
| "loss": 0.8512, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.02109491153370126, |
| "grad_norm": 3.557542324066162, |
| "learning_rate": 4.215442092154421e-07, |
| "loss": 0.9313, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.02117275253567064, |
| "grad_norm": 5.178566932678223, |
| "learning_rate": 4.231008717310087e-07, |
| "loss": 0.9086, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.021250593537640017, |
| "grad_norm": 5.773383140563965, |
| "learning_rate": 4.246575342465753e-07, |
| "loss": 0.9678, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.021328434539609392, |
| "grad_norm": 4.725634574890137, |
| "learning_rate": 4.262141967621419e-07, |
| "loss": 0.9356, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.02140627554157877, |
| "grad_norm": 3.0198757648468018, |
| "learning_rate": 4.2777085927770856e-07, |
| "loss": 0.9342, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.02148411654354815, |
| "grad_norm": 5.704006195068359, |
| "learning_rate": 4.293275217932752e-07, |
| "loss": 1.0469, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.021561957545517527, |
| "grad_norm": 4.559571743011475, |
| "learning_rate": 4.308841843088418e-07, |
| "loss": 0.845, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.021639798547486905, |
| "grad_norm": 9.018213272094727, |
| "learning_rate": 4.3244084682440846e-07, |
| "loss": 0.9221, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.02171763954945628, |
| "grad_norm": 6.414641380310059, |
| "learning_rate": 4.339975093399751e-07, |
| "loss": 1.0877, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.021795480551425658, |
| "grad_norm": 4.217600345611572, |
| "learning_rate": 4.355541718555417e-07, |
| "loss": 0.8761, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.021873321553395036, |
| "grad_norm": 5.274855136871338, |
| "learning_rate": 4.3711083437110836e-07, |
| "loss": 0.9046, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.021951162555364414, |
| "grad_norm": 11.607494354248047, |
| "learning_rate": 4.386674968866749e-07, |
| "loss": 0.981, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.02202900355733379, |
| "grad_norm": 5.442785263061523, |
| "learning_rate": 4.4022415940224155e-07, |
| "loss": 0.9357, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.022106844559303167, |
| "grad_norm": 4.934208869934082, |
| "learning_rate": 4.417808219178082e-07, |
| "loss": 0.9124, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.022184685561272545, |
| "grad_norm": 5.2812933921813965, |
| "learning_rate": 4.433374844333748e-07, |
| "loss": 0.9655, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.022262526563241923, |
| "grad_norm": 3.1578216552734375, |
| "learning_rate": 4.4489414694894145e-07, |
| "loss": 0.9452, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.022340367565211298, |
| "grad_norm": 10.148691177368164, |
| "learning_rate": 4.464508094645081e-07, |
| "loss": 0.9015, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.022418208567180676, |
| "grad_norm": 3.150479793548584, |
| "learning_rate": 4.480074719800747e-07, |
| "loss": 0.8744, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.022496049569150054, |
| "grad_norm": 5.963056564331055, |
| "learning_rate": 4.4956413449564135e-07, |
| "loss": 1.0312, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.022573890571119432, |
| "grad_norm": 5.098721981048584, |
| "learning_rate": 4.5112079701120795e-07, |
| "loss": 0.8031, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.02265173157308881, |
| "grad_norm": 5.2625017166137695, |
| "learning_rate": 4.5267745952677455e-07, |
| "loss": 0.8132, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.022729572575058185, |
| "grad_norm": 8.537793159484863, |
| "learning_rate": 4.542341220423412e-07, |
| "loss": 0.8296, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.022807413577027563, |
| "grad_norm": 6.819812774658203, |
| "learning_rate": 4.557907845579078e-07, |
| "loss": 0.9476, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.02288525457899694, |
| "grad_norm": 4.941056251525879, |
| "learning_rate": 4.5734744707347445e-07, |
| "loss": 0.8785, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.02296309558096632, |
| "grad_norm": 5.378219127655029, |
| "learning_rate": 4.589041095890411e-07, |
| "loss": 0.8987, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.023040936582935694, |
| "grad_norm": 4.793314456939697, |
| "learning_rate": 4.604607721046077e-07, |
| "loss": 0.8309, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.023118777584905072, |
| "grad_norm": 7.7251434326171875, |
| "learning_rate": 4.6201743462017435e-07, |
| "loss": 0.959, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.02319661858687445, |
| "grad_norm": 3.7208149433135986, |
| "learning_rate": 4.6357409713574095e-07, |
| "loss": 0.9126, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.02327445958884383, |
| "grad_norm": 4.322316646575928, |
| "learning_rate": 4.651307596513076e-07, |
| "loss": 0.9567, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.023352300590813203, |
| "grad_norm": 5.451142311096191, |
| "learning_rate": 4.666874221668742e-07, |
| "loss": 0.9943, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.02343014159278258, |
| "grad_norm": 6.478999614715576, |
| "learning_rate": 4.682440846824408e-07, |
| "loss": 0.8577, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.02350798259475196, |
| "grad_norm": 5.626023292541504, |
| "learning_rate": 4.6980074719800745e-07, |
| "loss": 0.9176, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.023585823596721338, |
| "grad_norm": 9.153360366821289, |
| "learning_rate": 4.713574097135741e-07, |
| "loss": 1.0269, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.023663664598690716, |
| "grad_norm": 11.129598617553711, |
| "learning_rate": 4.729140722291407e-07, |
| "loss": 0.9428, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.02374150560066009, |
| "grad_norm": 5.8177313804626465, |
| "learning_rate": 4.7447073474470735e-07, |
| "loss": 1.0107, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.02381934660262947, |
| "grad_norm": 6.537820816040039, |
| "learning_rate": 4.7602739726027394e-07, |
| "loss": 0.8104, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.023897187604598847, |
| "grad_norm": 4.420594692230225, |
| "learning_rate": 4.775840597758406e-07, |
| "loss": 0.8484, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.023975028606568225, |
| "grad_norm": 6.306564807891846, |
| "learning_rate": 4.791407222914072e-07, |
| "loss": 0.796, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.0240528696085376, |
| "grad_norm": 11.836288452148438, |
| "learning_rate": 4.806973848069738e-07, |
| "loss": 0.8949, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.024130710610506978, |
| "grad_norm": 4.565202713012695, |
| "learning_rate": 4.822540473225404e-07, |
| "loss": 0.8881, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.024208551612476356, |
| "grad_norm": 4.610184669494629, |
| "learning_rate": 4.83810709838107e-07, |
| "loss": 1.0267, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.024286392614445734, |
| "grad_norm": 4.136282444000244, |
| "learning_rate": 4.853673723536737e-07, |
| "loss": 0.9593, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.02436423361641511, |
| "grad_norm": 4.203325271606445, |
| "learning_rate": 4.869240348692403e-07, |
| "loss": 0.9377, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.024442074618384487, |
| "grad_norm": 8.468722343444824, |
| "learning_rate": 4.88480697384807e-07, |
| "loss": 0.9949, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.024519915620353865, |
| "grad_norm": 7.116949558258057, |
| "learning_rate": 4.900373599003736e-07, |
| "loss": 0.92, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.024597756622323243, |
| "grad_norm": 4.857876777648926, |
| "learning_rate": 4.915940224159402e-07, |
| "loss": 0.8945, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.02467559762429262, |
| "grad_norm": 7.421228408813477, |
| "learning_rate": 4.931506849315068e-07, |
| "loss": 0.8807, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.024753438626261996, |
| "grad_norm": 7.203330993652344, |
| "learning_rate": 4.947073474470734e-07, |
| "loss": 0.8998, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.024831279628231374, |
| "grad_norm": 12.598939895629883, |
| "learning_rate": 4.9626400996264e-07, |
| "loss": 0.8843, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.024909120630200753, |
| "grad_norm": 6.573790073394775, |
| "learning_rate": 4.978206724782067e-07, |
| "loss": 0.8954, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.02498696163217013, |
| "grad_norm": 5.063882350921631, |
| "learning_rate": 4.993773349937733e-07, |
| "loss": 0.8539, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.025064802634139505, |
| "grad_norm": 5.859914779663086, |
| "learning_rate": 5.0093399750934e-07, |
| "loss": 0.9102, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.025142643636108884, |
| "grad_norm": 4.542943954467773, |
| "learning_rate": 5.024906600249066e-07, |
| "loss": 0.876, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.02522048463807826, |
| "grad_norm": 6.943472862243652, |
| "learning_rate": 5.040473225404732e-07, |
| "loss": 0.8886, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.02529832564004764, |
| "grad_norm": 5.794211387634277, |
| "learning_rate": 5.056039850560398e-07, |
| "loss": 0.9136, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.025376166642017015, |
| "grad_norm": 3.58612322807312, |
| "learning_rate": 5.071606475716065e-07, |
| "loss": 0.8483, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.025454007643986393, |
| "grad_norm": 8.513461112976074, |
| "learning_rate": 5.087173100871731e-07, |
| "loss": 0.9484, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.02553184864595577, |
| "grad_norm": 3.152209997177124, |
| "learning_rate": 5.102739726027398e-07, |
| "loss": 0.8564, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.02560968964792515, |
| "grad_norm": 11.711703300476074, |
| "learning_rate": 5.118306351183063e-07, |
| "loss": 0.7842, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.025687530649894527, |
| "grad_norm": 4.101468086242676, |
| "learning_rate": 5.13387297633873e-07, |
| "loss": 0.8646, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.025765371651863902, |
| "grad_norm": 3.844512462615967, |
| "learning_rate": 5.149439601494395e-07, |
| "loss": 0.8094, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.02584321265383328, |
| "grad_norm": 3.546029567718506, |
| "learning_rate": 5.165006226650062e-07, |
| "loss": 0.87, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.025921053655802658, |
| "grad_norm": 3.3729195594787598, |
| "learning_rate": 5.180572851805728e-07, |
| "loss": 0.9467, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.025998894657772036, |
| "grad_norm": 3.984131336212158, |
| "learning_rate": 5.196139476961394e-07, |
| "loss": 0.8571, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.02607673565974141, |
| "grad_norm": 5.9442291259765625, |
| "learning_rate": 5.21170610211706e-07, |
| "loss": 0.892, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.02615457666171079, |
| "grad_norm": 6.404414653778076, |
| "learning_rate": 5.227272727272727e-07, |
| "loss": 0.8978, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.026232417663680167, |
| "grad_norm": 8.53201961517334, |
| "learning_rate": 5.242839352428393e-07, |
| "loss": 0.8403, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.026310258665649545, |
| "grad_norm": 7.944653511047363, |
| "learning_rate": 5.25840597758406e-07, |
| "loss": 0.7752, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.02638809966761892, |
| "grad_norm": 4.13915491104126, |
| "learning_rate": 5.273972602739725e-07, |
| "loss": 1.053, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.0264659406695883, |
| "grad_norm": 9.199925422668457, |
| "learning_rate": 5.289539227895392e-07, |
| "loss": 1.0808, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.026543781671557676, |
| "grad_norm": 4.507978439331055, |
| "learning_rate": 5.305105853051058e-07, |
| "loss": 0.9654, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.026621622673527055, |
| "grad_norm": 5.004615783691406, |
| "learning_rate": 5.320672478206725e-07, |
| "loss": 0.9003, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.02669946367549643, |
| "grad_norm": 9.572540283203125, |
| "learning_rate": 5.336239103362391e-07, |
| "loss": 1.0496, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.026777304677465807, |
| "grad_norm": 6.494607925415039, |
| "learning_rate": 5.351805728518058e-07, |
| "loss": 0.9283, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.026855145679435186, |
| "grad_norm": 6.419877529144287, |
| "learning_rate": 5.367372353673723e-07, |
| "loss": 0.9118, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.026932986681404564, |
| "grad_norm": 8.065162658691406, |
| "learning_rate": 5.38293897882939e-07, |
| "loss": 0.9376, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.027010827683373942, |
| "grad_norm": 11.1658935546875, |
| "learning_rate": 5.398505603985056e-07, |
| "loss": 0.9734, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.027088668685343317, |
| "grad_norm": 8.80482006072998, |
| "learning_rate": 5.414072229140723e-07, |
| "loss": 0.9357, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.027166509687312695, |
| "grad_norm": 9.545907974243164, |
| "learning_rate": 5.429638854296388e-07, |
| "loss": 1.0168, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.027244350689282073, |
| "grad_norm": 3.2502315044403076, |
| "learning_rate": 5.445205479452054e-07, |
| "loss": 0.8814, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.02732219169125145, |
| "grad_norm": 7.160440921783447, |
| "learning_rate": 5.46077210460772e-07, |
| "loss": 0.8058, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.027400032693220826, |
| "grad_norm": 4.625821113586426, |
| "learning_rate": 5.476338729763387e-07, |
| "loss": 0.8834, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.027477873695190204, |
| "grad_norm": 6.714595317840576, |
| "learning_rate": 5.491905354919053e-07, |
| "loss": 0.9709, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.027555714697159582, |
| "grad_norm": 5.669415473937988, |
| "learning_rate": 5.50747198007472e-07, |
| "loss": 0.9646, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.02763355569912896, |
| "grad_norm": 6.046622276306152, |
| "learning_rate": 5.523038605230385e-07, |
| "loss": 0.8469, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.027711396701098335, |
| "grad_norm": 16.526947021484375, |
| "learning_rate": 5.538605230386052e-07, |
| "loss": 0.877, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.027789237703067713, |
| "grad_norm": 4.415500164031982, |
| "learning_rate": 5.554171855541718e-07, |
| "loss": 0.8356, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.02786707870503709, |
| "grad_norm": 4.823260307312012, |
| "learning_rate": 5.569738480697385e-07, |
| "loss": 0.9628, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.02794491970700647, |
| "grad_norm": 8.501585006713867, |
| "learning_rate": 5.585305105853051e-07, |
| "loss": 0.7859, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.028022760708975848, |
| "grad_norm": 10.616768836975098, |
| "learning_rate": 5.600871731008718e-07, |
| "loss": 0.9145, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.028100601710945222, |
| "grad_norm": 6.610407829284668, |
| "learning_rate": 5.616438356164383e-07, |
| "loss": 0.8058, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.0281784427129146, |
| "grad_norm": 4.978299617767334, |
| "learning_rate": 5.63200498132005e-07, |
| "loss": 0.8, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.02825628371488398, |
| "grad_norm": 4.922807693481445, |
| "learning_rate": 5.647571606475716e-07, |
| "loss": 1.0388, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.028334124716853357, |
| "grad_norm": 6.71333122253418, |
| "learning_rate": 5.663138231631383e-07, |
| "loss": 0.9221, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.02841196571882273, |
| "grad_norm": 4.787428379058838, |
| "learning_rate": 5.678704856787049e-07, |
| "loss": 0.8824, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.02848980672079211, |
| "grad_norm": 8.047598838806152, |
| "learning_rate": 5.694271481942715e-07, |
| "loss": 0.8187, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.028567647722761488, |
| "grad_norm": 6.064495086669922, |
| "learning_rate": 5.70983810709838e-07, |
| "loss": 0.7595, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.028645488724730866, |
| "grad_norm": 4.46295690536499, |
| "learning_rate": 5.725404732254047e-07, |
| "loss": 0.9397, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.02872332972670024, |
| "grad_norm": 7.761974334716797, |
| "learning_rate": 5.740971357409713e-07, |
| "loss": 0.915, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.02880117072866962, |
| "grad_norm": 5.134248733520508, |
| "learning_rate": 5.75653798256538e-07, |
| "loss": 0.862, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.028879011730638997, |
| "grad_norm": 5.424485206604004, |
| "learning_rate": 5.772104607721045e-07, |
| "loss": 0.8359, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.028956852732608375, |
| "grad_norm": 2.9714298248291016, |
| "learning_rate": 5.787671232876712e-07, |
| "loss": 0.7999, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.029034693734577753, |
| "grad_norm": 6.131465911865234, |
| "learning_rate": 5.803237858032378e-07, |
| "loss": 0.8619, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.029112534736547128, |
| "grad_norm": 7.894665241241455, |
| "learning_rate": 5.818804483188045e-07, |
| "loss": 0.771, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.029190375738516506, |
| "grad_norm": 3.163548469543457, |
| "learning_rate": 5.834371108343711e-07, |
| "loss": 0.7482, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.029268216740485884, |
| "grad_norm": 5.383469581604004, |
| "learning_rate": 5.849937733499378e-07, |
| "loss": 0.895, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.029346057742455262, |
| "grad_norm": 6.841033935546875, |
| "learning_rate": 5.865504358655043e-07, |
| "loss": 0.8822, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.029423898744424637, |
| "grad_norm": 9.069436073303223, |
| "learning_rate": 5.88107098381071e-07, |
| "loss": 0.8947, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.029501739746394015, |
| "grad_norm": 5.3066725730896, |
| "learning_rate": 5.896637608966376e-07, |
| "loss": 0.8046, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.029579580748363393, |
| "grad_norm": 5.761783599853516, |
| "learning_rate": 5.912204234122043e-07, |
| "loss": 0.9087, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.02965742175033277, |
| "grad_norm": 3.4487996101379395, |
| "learning_rate": 5.927770859277709e-07, |
| "loss": 0.9291, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.029735262752302146, |
| "grad_norm": 5.8793816566467285, |
| "learning_rate": 5.943337484433375e-07, |
| "loss": 0.8244, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.029813103754271524, |
| "grad_norm": 6.812746047973633, |
| "learning_rate": 5.958904109589041e-07, |
| "loss": 0.8169, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.029890944756240902, |
| "grad_norm": 5.695523738861084, |
| "learning_rate": 5.974470734744707e-07, |
| "loss": 0.9072, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.02996878575821028, |
| "grad_norm": 3.443061590194702, |
| "learning_rate": 5.990037359900373e-07, |
| "loss": 0.8709, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.03004662676017966, |
| "grad_norm": 6.014828681945801, |
| "learning_rate": 6.00560398505604e-07, |
| "loss": 0.917, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.030124467762149033, |
| "grad_norm": 4.14946985244751, |
| "learning_rate": 6.021170610211705e-07, |
| "loss": 0.8827, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.03020230876411841, |
| "grad_norm": 4.128273963928223, |
| "learning_rate": 6.036737235367372e-07, |
| "loss": 0.8062, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.03028014976608779, |
| "grad_norm": 5.5036115646362305, |
| "learning_rate": 6.052303860523038e-07, |
| "loss": 0.9832, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.030357990768057168, |
| "grad_norm": 5.694386005401611, |
| "learning_rate": 6.067870485678705e-07, |
| "loss": 0.7735, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.030435831770026543, |
| "grad_norm": 3.861293315887451, |
| "learning_rate": 6.083437110834371e-07, |
| "loss": 0.8782, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.03051367277199592, |
| "grad_norm": 5.179184436798096, |
| "learning_rate": 6.099003735990037e-07, |
| "loss": 0.908, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.0305915137739653, |
| "grad_norm": 4.929222106933594, |
| "learning_rate": 6.114570361145703e-07, |
| "loss": 0.8967, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.030669354775934677, |
| "grad_norm": 3.300053596496582, |
| "learning_rate": 6.13013698630137e-07, |
| "loss": 0.9517, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.03074719577790405, |
| "grad_norm": 4.976810932159424, |
| "learning_rate": 6.145703611457036e-07, |
| "loss": 0.8676, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.03082503677987343, |
| "grad_norm": 3.866328477859497, |
| "learning_rate": 6.161270236612703e-07, |
| "loss": 0.9735, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.030902877781842808, |
| "grad_norm": 4.272680759429932, |
| "learning_rate": 6.176836861768369e-07, |
| "loss": 0.9716, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.030980718783812186, |
| "grad_norm": 6.74641752243042, |
| "learning_rate": 6.192403486924035e-07, |
| "loss": 0.815, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.031058559785781564, |
| "grad_norm": 3.4278452396392822, |
| "learning_rate": 6.207970112079701e-07, |
| "loss": 0.9165, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.03113640078775094, |
| "grad_norm": 7.538846492767334, |
| "learning_rate": 6.223536737235368e-07, |
| "loss": 0.9513, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.031214241789720317, |
| "grad_norm": 4.969770431518555, |
| "learning_rate": 6.239103362391034e-07, |
| "loss": 0.816, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.03129208279168969, |
| "grad_norm": 9.244134902954102, |
| "learning_rate": 6.2546699875467e-07, |
| "loss": 0.9293, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.03136992379365907, |
| "grad_norm": 5.617055416107178, |
| "learning_rate": 6.270236612702365e-07, |
| "loss": 0.8553, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.03144776479562845, |
| "grad_norm": 4.888432502746582, |
| "learning_rate": 6.285803237858031e-07, |
| "loss": 0.8679, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.031525605797597826, |
| "grad_norm": 4.528554916381836, |
| "learning_rate": 6.301369863013698e-07, |
| "loss": 0.9086, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.031603446799567204, |
| "grad_norm": 6.504762172698975, |
| "learning_rate": 6.316936488169364e-07, |
| "loss": 0.9501, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.03168128780153658, |
| "grad_norm": 3.974257230758667, |
| "learning_rate": 6.332503113325031e-07, |
| "loss": 0.888, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.03175912880350596, |
| "grad_norm": 8.628198623657227, |
| "learning_rate": 6.348069738480696e-07, |
| "loss": 0.8664, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.03183696980547534, |
| "grad_norm": 4.1892805099487305, |
| "learning_rate": 6.363636363636363e-07, |
| "loss": 0.9896, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.03191481080744471, |
| "grad_norm": 5.350588321685791, |
| "learning_rate": 6.37920298879203e-07, |
| "loss": 0.8815, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.03199265180941409, |
| "grad_norm": 5.569740295410156, |
| "learning_rate": 6.394769613947696e-07, |
| "loss": 0.8785, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.032070492811383466, |
| "grad_norm": 7.358509063720703, |
| "learning_rate": 6.410336239103362e-07, |
| "loss": 0.8415, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.032148333813352845, |
| "grad_norm": 5.384446144104004, |
| "learning_rate": 6.425902864259029e-07, |
| "loss": 0.9108, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.03222617481532222, |
| "grad_norm": 4.48892068862915, |
| "learning_rate": 6.441469489414694e-07, |
| "loss": 0.9423, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.0323040158172916, |
| "grad_norm": 4.302936553955078, |
| "learning_rate": 6.457036114570361e-07, |
| "loss": 0.8849, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.03238185681926098, |
| "grad_norm": 5.185121536254883, |
| "learning_rate": 6.472602739726027e-07, |
| "loss": 0.8177, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.03245969782123036, |
| "grad_norm": 3.2999234199523926, |
| "learning_rate": 6.488169364881694e-07, |
| "loss": 0.9199, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.032537538823199735, |
| "grad_norm": 19.133163452148438, |
| "learning_rate": 6.50373599003736e-07, |
| "loss": 0.9283, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.03261537982516911, |
| "grad_norm": 3.4535083770751953, |
| "learning_rate": 6.519302615193026e-07, |
| "loss": 0.9707, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.032693220827138485, |
| "grad_norm": 11.507316589355469, |
| "learning_rate": 6.534869240348691e-07, |
| "loss": 0.8878, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.03277106182910786, |
| "grad_norm": 21.363101959228516, |
| "learning_rate": 6.550435865504358e-07, |
| "loss": 0.9761, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.03284890283107724, |
| "grad_norm": 4.29213285446167, |
| "learning_rate": 6.566002490660024e-07, |
| "loss": 0.7789, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.03292674383304662, |
| "grad_norm": 7.540319442749023, |
| "learning_rate": 6.581569115815691e-07, |
| "loss": 0.7991, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.033004584835016, |
| "grad_norm": 3.658780097961426, |
| "learning_rate": 6.597135740971356e-07, |
| "loss": 0.9211, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.033082425836985375, |
| "grad_norm": 8.205567359924316, |
| "learning_rate": 6.612702366127023e-07, |
| "loss": 0.8083, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.033160266838954754, |
| "grad_norm": 6.272342681884766, |
| "learning_rate": 6.628268991282689e-07, |
| "loss": 0.9724, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.03323810784092413, |
| "grad_norm": 7.037917137145996, |
| "learning_rate": 6.643835616438356e-07, |
| "loss": 0.8821, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.0333159488428935, |
| "grad_norm": 6.3946027755737305, |
| "learning_rate": 6.659402241594022e-07, |
| "loss": 0.9065, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.03339378984486288, |
| "grad_norm": 7.079307556152344, |
| "learning_rate": 6.674968866749689e-07, |
| "loss": 0.993, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.03347163084683226, |
| "grad_norm": 6.372123718261719, |
| "learning_rate": 6.690535491905354e-07, |
| "loss": 0.9372, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.03354947184880164, |
| "grad_norm": 2.9949862957000732, |
| "learning_rate": 6.706102117061021e-07, |
| "loss": 0.7785, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.033627312850771016, |
| "grad_norm": 5.278440475463867, |
| "learning_rate": 6.721668742216687e-07, |
| "loss": 0.8646, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.033705153852740394, |
| "grad_norm": 3.972559928894043, |
| "learning_rate": 6.737235367372354e-07, |
| "loss": 0.8816, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.03378299485470977, |
| "grad_norm": 7.038811683654785, |
| "learning_rate": 6.75280199252802e-07, |
| "loss": 0.8586, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.03386083585667915, |
| "grad_norm": 4.659327507019043, |
| "learning_rate": 6.768368617683686e-07, |
| "loss": 0.8671, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.03393867685864852, |
| "grad_norm": 3.272244453430176, |
| "learning_rate": 6.783935242839352e-07, |
| "loss": 0.8553, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.0340165178606179, |
| "grad_norm": 4.486519813537598, |
| "learning_rate": 6.799501867995019e-07, |
| "loss": 0.9569, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.03409435886258728, |
| "grad_norm": 3.5172436237335205, |
| "learning_rate": 6.815068493150684e-07, |
| "loss": 0.9172, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.034172199864556656, |
| "grad_norm": 8.919556617736816, |
| "learning_rate": 6.830635118306351e-07, |
| "loss": 0.9005, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.034250040866526034, |
| "grad_norm": 3.1688411235809326, |
| "learning_rate": 6.846201743462016e-07, |
| "loss": 0.9052, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.03432788186849541, |
| "grad_norm": 8.181324005126953, |
| "learning_rate": 6.861768368617683e-07, |
| "loss": 0.9344, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.03440572287046479, |
| "grad_norm": 14.188647270202637, |
| "learning_rate": 6.877334993773349e-07, |
| "loss": 0.8221, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.03448356387243417, |
| "grad_norm": 2.8779571056365967, |
| "learning_rate": 6.892901618929016e-07, |
| "loss": 0.8213, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.034561404874403547, |
| "grad_norm": 4.762483596801758, |
| "learning_rate": 6.908468244084682e-07, |
| "loss": 0.9539, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.03463924587637292, |
| "grad_norm": 5.372674942016602, |
| "learning_rate": 6.924034869240348e-07, |
| "loss": 0.9323, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.034717086878342296, |
| "grad_norm": 4.73727560043335, |
| "learning_rate": 6.939601494396014e-07, |
| "loss": 0.9555, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.034794927880311674, |
| "grad_norm": 2.479062557220459, |
| "learning_rate": 6.955168119551681e-07, |
| "loss": 0.8026, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.03487276888228105, |
| "grad_norm": 4.98023796081543, |
| "learning_rate": 6.970734744707347e-07, |
| "loss": 0.9514, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.03495060988425043, |
| "grad_norm": 4.072389125823975, |
| "learning_rate": 6.986301369863014e-07, |
| "loss": 0.9739, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.03502845088621981, |
| "grad_norm": 3.26598858833313, |
| "learning_rate": 7.00186799501868e-07, |
| "loss": 0.8112, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.03510629188818919, |
| "grad_norm": 10.324394226074219, |
| "learning_rate": 7.017434620174346e-07, |
| "loss": 0.8578, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.035184132890158565, |
| "grad_norm": 7.579793453216553, |
| "learning_rate": 7.033001245330012e-07, |
| "loss": 0.8586, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.035261973892127936, |
| "grad_norm": 3.6266613006591797, |
| "learning_rate": 7.048567870485679e-07, |
| "loss": 0.8904, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.035339814894097314, |
| "grad_norm": 4.336295127868652, |
| "learning_rate": 7.064134495641345e-07, |
| "loss": 0.84, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.03541765589606669, |
| "grad_norm": 3.5872817039489746, |
| "learning_rate": 7.079701120797012e-07, |
| "loss": 0.7951, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.03549549689803607, |
| "grad_norm": 4.598228454589844, |
| "learning_rate": 7.095267745952676e-07, |
| "loss": 0.7467, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.03557333790000545, |
| "grad_norm": 3.560222625732422, |
| "learning_rate": 7.110834371108343e-07, |
| "loss": 0.9047, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.03565117890197483, |
| "grad_norm": 2.8487563133239746, |
| "learning_rate": 7.126400996264009e-07, |
| "loss": 0.9243, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.035729019903944205, |
| "grad_norm": 5.525490760803223, |
| "learning_rate": 7.141967621419676e-07, |
| "loss": 0.8549, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.03580686090591358, |
| "grad_norm": 3.5428950786590576, |
| "learning_rate": 7.157534246575342e-07, |
| "loss": 0.9336, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.03588470190788296, |
| "grad_norm": 8.396724700927734, |
| "learning_rate": 7.173100871731008e-07, |
| "loss": 0.9101, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.03596254290985233, |
| "grad_norm": 6.355068206787109, |
| "learning_rate": 7.188667496886674e-07, |
| "loss": 0.8673, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.03604038391182171, |
| "grad_norm": 8.388739585876465, |
| "learning_rate": 7.204234122042341e-07, |
| "loss": 0.9225, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.03611822491379109, |
| "grad_norm": 4.088027477264404, |
| "learning_rate": 7.219800747198007e-07, |
| "loss": 0.8003, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.03619606591576047, |
| "grad_norm": 3.6764137744903564, |
| "learning_rate": 7.235367372353674e-07, |
| "loss": 0.782, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.036273906917729845, |
| "grad_norm": 3.6554110050201416, |
| "learning_rate": 7.25093399750934e-07, |
| "loss": 0.9257, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.03635174791969922, |
| "grad_norm": 6.99379301071167, |
| "learning_rate": 7.266500622665006e-07, |
| "loss": 0.9329, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.0364295889216686, |
| "grad_norm": 3.984800100326538, |
| "learning_rate": 7.282067247820672e-07, |
| "loss": 0.9596, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.03650742992363798, |
| "grad_norm": 7.992112159729004, |
| "learning_rate": 7.297633872976339e-07, |
| "loss": 0.8945, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.03658527092560736, |
| "grad_norm": 3.314192295074463, |
| "learning_rate": 7.313200498132005e-07, |
| "loss": 0.817, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.03666311192757673, |
| "grad_norm": 5.738452434539795, |
| "learning_rate": 7.328767123287672e-07, |
| "loss": 0.8133, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.03674095292954611, |
| "grad_norm": 4.364063739776611, |
| "learning_rate": 7.344333748443337e-07, |
| "loss": 0.7555, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.036818793931515485, |
| "grad_norm": 6.397834777832031, |
| "learning_rate": 7.359900373599004e-07, |
| "loss": 0.9289, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.03689663493348486, |
| "grad_norm": 4.602386951446533, |
| "learning_rate": 7.375466998754669e-07, |
| "loss": 0.8466, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.03697447593545424, |
| "grad_norm": 4.438021659851074, |
| "learning_rate": 7.391033623910336e-07, |
| "loss": 0.8155, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.03705231693742362, |
| "grad_norm": 5.829861164093018, |
| "learning_rate": 7.406600249066002e-07, |
| "loss": 0.9119, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.037130157939393, |
| "grad_norm": 3.999397039413452, |
| "learning_rate": 7.422166874221668e-07, |
| "loss": 0.9544, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.037207998941362376, |
| "grad_norm": 7.094069480895996, |
| "learning_rate": 7.437733499377334e-07, |
| "loss": 0.8562, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.03728583994333175, |
| "grad_norm": 7.502668857574463, |
| "learning_rate": 7.453300124533001e-07, |
| "loss": 0.7626, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.037363680945301125, |
| "grad_norm": 4.224865913391113, |
| "learning_rate": 7.468866749688667e-07, |
| "loss": 0.8287, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.037441521947270504, |
| "grad_norm": 4.2678046226501465, |
| "learning_rate": 7.484433374844334e-07, |
| "loss": 0.9631, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.03751936294923988, |
| "grad_norm": 4.143566608428955, |
| "learning_rate": 7.5e-07, |
| "loss": 0.9013, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.03759720395120926, |
| "grad_norm": 3.8706650733947754, |
| "learning_rate": 7.515566625155666e-07, |
| "loss": 0.8463, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.03767504495317864, |
| "grad_norm": 6.372035503387451, |
| "learning_rate": 7.531133250311332e-07, |
| "loss": 0.8966, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.037752885955148016, |
| "grad_norm": 4.3398613929748535, |
| "learning_rate": 7.546699875466999e-07, |
| "loss": 0.8045, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.037830726957117394, |
| "grad_norm": 2.7824904918670654, |
| "learning_rate": 7.562266500622665e-07, |
| "loss": 0.8311, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.03790856795908677, |
| "grad_norm": 3.9570069313049316, |
| "learning_rate": 7.577833125778332e-07, |
| "loss": 0.9548, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.037986408961056144, |
| "grad_norm": 4.316530227661133, |
| "learning_rate": 7.593399750933997e-07, |
| "loss": 0.7945, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.03806424996302552, |
| "grad_norm": 4.4045844078063965, |
| "learning_rate": 7.608966376089664e-07, |
| "loss": 0.9145, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.0381420909649949, |
| "grad_norm": 3.736820697784424, |
| "learning_rate": 7.62453300124533e-07, |
| "loss": 0.9292, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.03821993196696428, |
| "grad_norm": 3.8448410034179688, |
| "learning_rate": 7.640099626400996e-07, |
| "loss": 0.8863, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.038297772968933656, |
| "grad_norm": 7.468678951263428, |
| "learning_rate": 7.655666251556662e-07, |
| "loss": 0.8776, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.038375613970903034, |
| "grad_norm": 4.066128253936768, |
| "learning_rate": 7.671232876712328e-07, |
| "loss": 0.906, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.03845345497287241, |
| "grad_norm": 8.009504318237305, |
| "learning_rate": 7.686799501867994e-07, |
| "loss": 0.9295, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.03853129597484179, |
| "grad_norm": 3.9662601947784424, |
| "learning_rate": 7.702366127023661e-07, |
| "loss": 0.927, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.03860913697681117, |
| "grad_norm": 3.94587779045105, |
| "learning_rate": 7.717932752179327e-07, |
| "loss": 0.9679, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.03868697797878054, |
| "grad_norm": 3.856196641921997, |
| "learning_rate": 7.733499377334994e-07, |
| "loss": 0.981, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.03876481898074992, |
| "grad_norm": 12.542234420776367, |
| "learning_rate": 7.749066002490659e-07, |
| "loss": 0.8065, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.038842659982719296, |
| "grad_norm": 5.717936038970947, |
| "learning_rate": 7.764632627646326e-07, |
| "loss": 0.8912, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.038920500984688675, |
| "grad_norm": 9.94604206085205, |
| "learning_rate": 7.780199252801992e-07, |
| "loss": 0.9442, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.03899834198665805, |
| "grad_norm": 5.26216983795166, |
| "learning_rate": 7.795765877957659e-07, |
| "loss": 0.9552, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.03907618298862743, |
| "grad_norm": 6.468954563140869, |
| "learning_rate": 7.811332503113325e-07, |
| "loss": 0.9372, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.03915402399059681, |
| "grad_norm": 2.9301857948303223, |
| "learning_rate": 7.826899128268992e-07, |
| "loss": 0.8479, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.03923186499256619, |
| "grad_norm": 6.389108657836914, |
| "learning_rate": 7.842465753424657e-07, |
| "loss": 0.9255, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.03930970599453556, |
| "grad_norm": 4.842959880828857, |
| "learning_rate": 7.858032378580324e-07, |
| "loss": 0.8478, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.03938754699650494, |
| "grad_norm": 3.118706464767456, |
| "learning_rate": 7.87359900373599e-07, |
| "loss": 0.8668, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.039465387998474315, |
| "grad_norm": 6.257364273071289, |
| "learning_rate": 7.889165628891657e-07, |
| "loss": 0.873, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.03954322900044369, |
| "grad_norm": 4.405180931091309, |
| "learning_rate": 7.904732254047323e-07, |
| "loss": 0.841, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.03962107000241307, |
| "grad_norm": 9.870434761047363, |
| "learning_rate": 7.920298879202987e-07, |
| "loss": 0.937, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.03969891100438245, |
| "grad_norm": 3.4615135192871094, |
| "learning_rate": 7.935865504358654e-07, |
| "loss": 0.9465, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.03977675200635183, |
| "grad_norm": 4.9833760261535645, |
| "learning_rate": 7.95143212951432e-07, |
| "loss": 0.8261, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.039854593008321205, |
| "grad_norm": 4.042236804962158, |
| "learning_rate": 7.966998754669987e-07, |
| "loss": 0.9602, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.039932434010290584, |
| "grad_norm": 4.549630641937256, |
| "learning_rate": 7.982565379825654e-07, |
| "loss": 0.8674, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.040010275012259955, |
| "grad_norm": 3.67543363571167, |
| "learning_rate": 7.998132004981319e-07, |
| "loss": 0.9073, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.04008811601422933, |
| "grad_norm": 6.078221321105957, |
| "learning_rate": 8.013698630136985e-07, |
| "loss": 0.9359, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.04016595701619871, |
| "grad_norm": 5.599534034729004, |
| "learning_rate": 8.029265255292652e-07, |
| "loss": 0.8628, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.04024379801816809, |
| "grad_norm": 5.098958492279053, |
| "learning_rate": 8.044831880448319e-07, |
| "loss": 0.9412, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.04032163902013747, |
| "grad_norm": 7.108897686004639, |
| "learning_rate": 8.060398505603985e-07, |
| "loss": 0.9517, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.040399480022106846, |
| "grad_norm": 4.495419979095459, |
| "learning_rate": 8.075965130759652e-07, |
| "loss": 0.8605, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.040477321024076224, |
| "grad_norm": 4.583033084869385, |
| "learning_rate": 8.091531755915317e-07, |
| "loss": 0.8956, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.0405551620260456, |
| "grad_norm": 5.067065238952637, |
| "learning_rate": 8.107098381070983e-07, |
| "loss": 0.8306, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.04063300302801497, |
| "grad_norm": 7.724658012390137, |
| "learning_rate": 8.12266500622665e-07, |
| "loss": 0.8716, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.04071084402998435, |
| "grad_norm": 2.8972911834716797, |
| "learning_rate": 8.138231631382317e-07, |
| "loss": 0.8987, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.04078868503195373, |
| "grad_norm": 7.840747833251953, |
| "learning_rate": 8.153798256537983e-07, |
| "loss": 0.8418, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.04086652603392311, |
| "grad_norm": 7.727685928344727, |
| "learning_rate": 8.169364881693648e-07, |
| "loss": 0.9107, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.040944367035892486, |
| "grad_norm": 3.801807165145874, |
| "learning_rate": 8.184931506849315e-07, |
| "loss": 0.8083, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.041022208037861864, |
| "grad_norm": 12.985006332397461, |
| "learning_rate": 8.20049813200498e-07, |
| "loss": 0.9866, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.04110004903983124, |
| "grad_norm": 3.2062785625457764, |
| "learning_rate": 8.216064757160647e-07, |
| "loss": 0.8945, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.04117789004180062, |
| "grad_norm": 4.6915459632873535, |
| "learning_rate": 8.231631382316313e-07, |
| "loss": 0.8236, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.04125573104377, |
| "grad_norm": 3.5803701877593994, |
| "learning_rate": 8.247198007471979e-07, |
| "loss": 0.8776, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.04133357204573937, |
| "grad_norm": 12.053580284118652, |
| "learning_rate": 8.262764632627645e-07, |
| "loss": 0.9105, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.04141141304770875, |
| "grad_norm": 6.285280227661133, |
| "learning_rate": 8.278331257783312e-07, |
| "loss": 0.9066, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.041489254049678126, |
| "grad_norm": 5.232326984405518, |
| "learning_rate": 8.293897882938978e-07, |
| "loss": 0.8508, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.041567095051647504, |
| "grad_norm": 3.038318395614624, |
| "learning_rate": 8.309464508094645e-07, |
| "loss": 0.8356, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.04164493605361688, |
| "grad_norm": 7.6262335777282715, |
| "learning_rate": 8.32503113325031e-07, |
| "loss": 0.9568, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.04172277705558626, |
| "grad_norm": 3.0321080684661865, |
| "learning_rate": 8.340597758405977e-07, |
| "loss": 0.7881, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.04180061805755564, |
| "grad_norm": 9.739387512207031, |
| "learning_rate": 8.356164383561643e-07, |
| "loss": 0.74, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.04187845905952502, |
| "grad_norm": 8.000276565551758, |
| "learning_rate": 8.37173100871731e-07, |
| "loss": 0.9413, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.041956300061494395, |
| "grad_norm": 6.706925868988037, |
| "learning_rate": 8.387297633872976e-07, |
| "loss": 0.7788, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.042034141063463766, |
| "grad_norm": 6.568419933319092, |
| "learning_rate": 8.402864259028643e-07, |
| "loss": 0.8554, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.042111982065433144, |
| "grad_norm": 3.8879165649414062, |
| "learning_rate": 8.418430884184308e-07, |
| "loss": 0.9076, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.04218982306740252, |
| "grad_norm": 5.89036226272583, |
| "learning_rate": 8.433997509339975e-07, |
| "loss": 0.91, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.0422676640693719, |
| "grad_norm": 5.522625923156738, |
| "learning_rate": 8.449564134495641e-07, |
| "loss": 0.9102, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.04234550507134128, |
| "grad_norm": 4.862393379211426, |
| "learning_rate": 8.465130759651308e-07, |
| "loss": 0.8503, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.04242334607331066, |
| "grad_norm": 8.545342445373535, |
| "learning_rate": 8.480697384806973e-07, |
| "loss": 0.7882, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.042501187075280035, |
| "grad_norm": 3.1325466632843018, |
| "learning_rate": 8.496264009962639e-07, |
| "loss": 0.7993, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.04257902807724941, |
| "grad_norm": 3.6244635581970215, |
| "learning_rate": 8.511830635118305e-07, |
| "loss": 0.7498, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.042656869079218784, |
| "grad_norm": 7.154248237609863, |
| "learning_rate": 8.527397260273972e-07, |
| "loss": 0.856, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.04273471008118816, |
| "grad_norm": 4.3253960609436035, |
| "learning_rate": 8.542963885429638e-07, |
| "loss": 1.0214, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.04281255108315754, |
| "grad_norm": 4.56231164932251, |
| "learning_rate": 8.558530510585305e-07, |
| "loss": 0.8501, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.04289039208512692, |
| "grad_norm": 3.396204710006714, |
| "learning_rate": 8.57409713574097e-07, |
| "loss": 0.9002, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.0429682330870963, |
| "grad_norm": 5.2896952629089355, |
| "learning_rate": 8.589663760896637e-07, |
| "loss": 0.8824, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.043046074089065675, |
| "grad_norm": 2.9441330432891846, |
| "learning_rate": 8.605230386052303e-07, |
| "loss": 0.915, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.04312391509103505, |
| "grad_norm": 3.7935092449188232, |
| "learning_rate": 8.62079701120797e-07, |
| "loss": 0.9273, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.04320175609300443, |
| "grad_norm": 2.87821102142334, |
| "learning_rate": 8.636363636363636e-07, |
| "loss": 0.7991, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.04327959709497381, |
| "grad_norm": 6.359185218811035, |
| "learning_rate": 8.651930261519303e-07, |
| "loss": 0.8009, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.04335743809694318, |
| "grad_norm": 4.339592456817627, |
| "learning_rate": 8.667496886674968e-07, |
| "loss": 0.9357, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.04343527909891256, |
| "grad_norm": 5.373045921325684, |
| "learning_rate": 8.683063511830635e-07, |
| "loss": 0.9278, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.04351312010088194, |
| "grad_norm": 4.687058448791504, |
| "learning_rate": 8.698630136986301e-07, |
| "loss": 0.8102, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.043590961102851315, |
| "grad_norm": 3.0270180702209473, |
| "learning_rate": 8.714196762141968e-07, |
| "loss": 0.7115, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.04366880210482069, |
| "grad_norm": 4.379403114318848, |
| "learning_rate": 8.729763387297634e-07, |
| "loss": 0.9258, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.04374664310679007, |
| "grad_norm": 5.339996814727783, |
| "learning_rate": 8.7453300124533e-07, |
| "loss": 0.8254, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.04382448410875945, |
| "grad_norm": 8.269269943237305, |
| "learning_rate": 8.760896637608965e-07, |
| "loss": 0.8743, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.04390232511072883, |
| "grad_norm": 3.314060688018799, |
| "learning_rate": 8.776463262764632e-07, |
| "loss": 0.8462, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.043980166112698206, |
| "grad_norm": 5.628077507019043, |
| "learning_rate": 8.792029887920298e-07, |
| "loss": 0.8659, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.04405800711466758, |
| "grad_norm": 5.050654888153076, |
| "learning_rate": 8.807596513075965e-07, |
| "loss": 0.9523, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.044135848116636955, |
| "grad_norm": 3.8180229663848877, |
| "learning_rate": 8.82316313823163e-07, |
| "loss": 0.8504, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.044213689118606334, |
| "grad_norm": 3.3269238471984863, |
| "learning_rate": 8.838729763387297e-07, |
| "loss": 0.8544, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.04429153012057571, |
| "grad_norm": 5.524733066558838, |
| "learning_rate": 8.854296388542963e-07, |
| "loss": 0.8963, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.04436937112254509, |
| "grad_norm": 4.741829872131348, |
| "learning_rate": 8.86986301369863e-07, |
| "loss": 0.8262, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.04444721212451447, |
| "grad_norm": 4.213449001312256, |
| "learning_rate": 8.885429638854296e-07, |
| "loss": 0.8722, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.044525053126483846, |
| "grad_norm": 3.3883323669433594, |
| "learning_rate": 8.900996264009963e-07, |
| "loss": 0.8378, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.044602894128453224, |
| "grad_norm": 5.192069053649902, |
| "learning_rate": 8.916562889165628e-07, |
| "loss": 0.8204, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.044680735130422596, |
| "grad_norm": 3.5852484703063965, |
| "learning_rate": 8.932129514321295e-07, |
| "loss": 0.9008, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.044758576132391974, |
| "grad_norm": 5.334090709686279, |
| "learning_rate": 8.947696139476961e-07, |
| "loss": 0.7885, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.04483641713436135, |
| "grad_norm": 5.502117156982422, |
| "learning_rate": 8.963262764632628e-07, |
| "loss": 0.8715, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.04491425813633073, |
| "grad_norm": 3.577226400375366, |
| "learning_rate": 8.978829389788294e-07, |
| "loss": 0.8076, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.04499209913830011, |
| "grad_norm": 2.6925950050354004, |
| "learning_rate": 8.99439601494396e-07, |
| "loss": 0.8624, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.045069940140269486, |
| "grad_norm": 15.992047309875488, |
| "learning_rate": 9.009962640099626e-07, |
| "loss": 0.8628, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.045147781142238864, |
| "grad_norm": 4.682984352111816, |
| "learning_rate": 9.025529265255293e-07, |
| "loss": 0.876, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.04522562214420824, |
| "grad_norm": 3.694166421890259, |
| "learning_rate": 9.041095890410958e-07, |
| "loss": 0.7707, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.04530346314617762, |
| "grad_norm": 6.382852077484131, |
| "learning_rate": 9.056662515566625e-07, |
| "loss": 0.8372, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.04538130414814699, |
| "grad_norm": 21.609174728393555, |
| "learning_rate": 9.07222914072229e-07, |
| "loss": 0.9099, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.04545914515011637, |
| "grad_norm": 2.4359121322631836, |
| "learning_rate": 9.087795765877957e-07, |
| "loss": 0.9096, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.04553698615208575, |
| "grad_norm": 3.6651458740234375, |
| "learning_rate": 9.103362391033623e-07, |
| "loss": 0.9408, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.045614827154055126, |
| "grad_norm": 5.385332107543945, |
| "learning_rate": 9.11892901618929e-07, |
| "loss": 0.9312, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.045692668156024505, |
| "grad_norm": 3.9548020362854004, |
| "learning_rate": 9.134495641344956e-07, |
| "loss": 0.8158, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.04577050915799388, |
| "grad_norm": 2.7402524948120117, |
| "learning_rate": 9.150062266500622e-07, |
| "loss": 0.7451, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.04584835015996326, |
| "grad_norm": 6.259926795959473, |
| "learning_rate": 9.165628891656288e-07, |
| "loss": 0.7399, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.04592619116193264, |
| "grad_norm": 2.590571403503418, |
| "learning_rate": 9.181195516811955e-07, |
| "loss": 0.9088, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.04600403216390201, |
| "grad_norm": 4.331900596618652, |
| "learning_rate": 9.196762141967621e-07, |
| "loss": 0.8636, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.04608187316587139, |
| "grad_norm": 5.567667007446289, |
| "learning_rate": 9.212328767123288e-07, |
| "loss": 0.9059, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.04615971416784077, |
| "grad_norm": 4.826610565185547, |
| "learning_rate": 9.227895392278954e-07, |
| "loss": 0.7942, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.046237555169810145, |
| "grad_norm": 7.561775207519531, |
| "learning_rate": 9.24346201743462e-07, |
| "loss": 0.8461, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.04631539617177952, |
| "grad_norm": 4.251841068267822, |
| "learning_rate": 9.259028642590286e-07, |
| "loss": 0.8541, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.0463932371737489, |
| "grad_norm": 5.157746315002441, |
| "learning_rate": 9.274595267745953e-07, |
| "loss": 0.8863, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.04647107817571828, |
| "grad_norm": 4.906675815582275, |
| "learning_rate": 9.290161892901619e-07, |
| "loss": 0.8432, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.04654891917768766, |
| "grad_norm": 4.339780807495117, |
| "learning_rate": 9.305728518057285e-07, |
| "loss": 0.8448, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.046626760179657036, |
| "grad_norm": 7.889379024505615, |
| "learning_rate": 9.32129514321295e-07, |
| "loss": 0.9431, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.04670460118162641, |
| "grad_norm": 3.7697620391845703, |
| "learning_rate": 9.336861768368617e-07, |
| "loss": 0.8569, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.046782442183595785, |
| "grad_norm": 8.277153968811035, |
| "learning_rate": 9.352428393524283e-07, |
| "loss": 0.7687, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.04686028318556516, |
| "grad_norm": 6.6820149421691895, |
| "learning_rate": 9.36799501867995e-07, |
| "loss": 0.8283, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.04693812418753454, |
| "grad_norm": 6.457581996917725, |
| "learning_rate": 9.383561643835616e-07, |
| "loss": 0.8447, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.04701596518950392, |
| "grad_norm": 8.55042552947998, |
| "learning_rate": 9.399128268991282e-07, |
| "loss": 0.8986, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.0470938061914733, |
| "grad_norm": 8.297921180725098, |
| "learning_rate": 9.414694894146948e-07, |
| "loss": 0.9012, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.047171647193442676, |
| "grad_norm": 7.09883975982666, |
| "learning_rate": 9.430261519302615e-07, |
| "loss": 0.7454, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.047249488195412054, |
| "grad_norm": 8.166378021240234, |
| "learning_rate": 9.445828144458281e-07, |
| "loss": 0.8405, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.04732732919738143, |
| "grad_norm": 4.509795665740967, |
| "learning_rate": 9.461394769613948e-07, |
| "loss": 0.8076, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.0474051701993508, |
| "grad_norm": 8.811022758483887, |
| "learning_rate": 9.476961394769614e-07, |
| "loss": 0.8387, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.04748301120132018, |
| "grad_norm": 3.122080087661743, |
| "learning_rate": 9.49252801992528e-07, |
| "loss": 0.7069, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.04756085220328956, |
| "grad_norm": 6.84942626953125, |
| "learning_rate": 9.508094645080946e-07, |
| "loss": 0.7412, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.04763869320525894, |
| "grad_norm": 7.2728424072265625, |
| "learning_rate": 9.523661270236613e-07, |
| "loss": 0.8156, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.047716534207228316, |
| "grad_norm": 4.008538246154785, |
| "learning_rate": 9.539227895392278e-07, |
| "loss": 0.8618, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.047794375209197694, |
| "grad_norm": 3.9167230129241943, |
| "learning_rate": 9.554794520547946e-07, |
| "loss": 0.8026, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.04787221621116707, |
| "grad_norm": 3.683629274368286, |
| "learning_rate": 9.570361145703611e-07, |
| "loss": 0.9918, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.04795005721313645, |
| "grad_norm": 4.745260238647461, |
| "learning_rate": 9.585927770859277e-07, |
| "loss": 0.8148, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.04802789821510582, |
| "grad_norm": 2.839996814727783, |
| "learning_rate": 9.601494396014944e-07, |
| "loss": 0.8176, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.0481057392170752, |
| "grad_norm": 5.715896129608154, |
| "learning_rate": 9.61706102117061e-07, |
| "loss": 0.9266, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.04818358021904458, |
| "grad_norm": 3.44376540184021, |
| "learning_rate": 9.632627646326275e-07, |
| "loss": 0.7851, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.048261421221013956, |
| "grad_norm": 4.266874313354492, |
| "learning_rate": 9.648194271481943e-07, |
| "loss": 0.8701, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.048339262222983334, |
| "grad_norm": 3.6180949211120605, |
| "learning_rate": 9.663760896637608e-07, |
| "loss": 0.9206, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.04841710322495271, |
| "grad_norm": 4.103425979614258, |
| "learning_rate": 9.679327521793276e-07, |
| "loss": 0.9025, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.04849494422692209, |
| "grad_norm": 3.339601516723633, |
| "learning_rate": 9.69489414694894e-07, |
| "loss": 0.7176, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.04857278522889147, |
| "grad_norm": 5.723580360412598, |
| "learning_rate": 9.710460772104606e-07, |
| "loss": 0.7712, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.04865062623086085, |
| "grad_norm": 9.84174919128418, |
| "learning_rate": 9.726027397260274e-07, |
| "loss": 0.8301, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.04872846723283022, |
| "grad_norm": 4.004911422729492, |
| "learning_rate": 9.74159402241594e-07, |
| "loss": 0.8232, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.048806308234799596, |
| "grad_norm": 4.22821044921875, |
| "learning_rate": 9.757160647571607e-07, |
| "loss": 0.8408, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.048884149236768974, |
| "grad_norm": 3.268477439880371, |
| "learning_rate": 9.772727272727273e-07, |
| "loss": 0.854, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.04896199023873835, |
| "grad_norm": 3.3312723636627197, |
| "learning_rate": 9.788293897882938e-07, |
| "loss": 0.7584, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.04903983124070773, |
| "grad_norm": 2.6721420288085938, |
| "learning_rate": 9.803860523038606e-07, |
| "loss": 0.7101, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.04911767224267711, |
| "grad_norm": 3.1036221981048584, |
| "learning_rate": 9.81942714819427e-07, |
| "loss": 0.8943, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.04919551324464649, |
| "grad_norm": 4.581750869750977, |
| "learning_rate": 9.834993773349939e-07, |
| "loss": 0.956, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.049273354246615865, |
| "grad_norm": 5.273120880126953, |
| "learning_rate": 9.850560398505604e-07, |
| "loss": 0.8077, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.04935119524858524, |
| "grad_norm": 7.310013771057129, |
| "learning_rate": 9.86612702366127e-07, |
| "loss": 0.8506, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.049429036250554614, |
| "grad_norm": 5.949068069458008, |
| "learning_rate": 9.881693648816935e-07, |
| "loss": 0.8132, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.04950687725252399, |
| "grad_norm": 5.228186130523682, |
| "learning_rate": 9.897260273972602e-07, |
| "loss": 0.9303, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.04958471825449337, |
| "grad_norm": 4.407035827636719, |
| "learning_rate": 9.912826899128268e-07, |
| "loss": 0.8184, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.04966255925646275, |
| "grad_norm": 4.605864524841309, |
| "learning_rate": 9.928393524283936e-07, |
| "loss": 0.9336, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.04974040025843213, |
| "grad_norm": 3.0708847045898438, |
| "learning_rate": 9.9439601494396e-07, |
| "loss": 0.8725, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.049818241260401505, |
| "grad_norm": 3.3742926120758057, |
| "learning_rate": 9.959526774595266e-07, |
| "loss": 0.8121, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.04989608226237088, |
| "grad_norm": 2.685382843017578, |
| "learning_rate": 9.975093399750934e-07, |
| "loss": 0.7798, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.04997392326434026, |
| "grad_norm": 4.932633876800537, |
| "learning_rate": 9.9906600249066e-07, |
| "loss": 0.8492, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.05005176426630963, |
| "grad_norm": 8.489307403564453, |
| "learning_rate": 9.999672243981579e-07, |
| "loss": 0.8355, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.05012960526827901, |
| "grad_norm": 4.679005146026611, |
| "learning_rate": 9.99885285393553e-07, |
| "loss": 0.9012, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.05020744627024839, |
| "grad_norm": 6.65717887878418, |
| "learning_rate": 9.99803346388948e-07, |
| "loss": 1.0277, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.05028528727221777, |
| "grad_norm": 5.373363494873047, |
| "learning_rate": 9.99721407384343e-07, |
| "loss": 1.0007, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.050363128274187145, |
| "grad_norm": 3.9103312492370605, |
| "learning_rate": 9.996394683797382e-07, |
| "loss": 0.8015, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.05044096927615652, |
| "grad_norm": 6.019688606262207, |
| "learning_rate": 9.995575293751332e-07, |
| "loss": 0.8575, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.0505188102781259, |
| "grad_norm": 17.253416061401367, |
| "learning_rate": 9.99475590370528e-07, |
| "loss": 0.7818, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.05059665128009528, |
| "grad_norm": 9.291438102722168, |
| "learning_rate": 9.993936513659232e-07, |
| "loss": 0.9093, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.05067449228206466, |
| "grad_norm": 4.7031121253967285, |
| "learning_rate": 9.993117123613182e-07, |
| "loss": 0.792, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.05075233328403403, |
| "grad_norm": 3.9141600131988525, |
| "learning_rate": 9.992297733567131e-07, |
| "loss": 0.8803, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.05083017428600341, |
| "grad_norm": 5.731180191040039, |
| "learning_rate": 9.991478343521082e-07, |
| "loss": 0.9267, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.050908015287972785, |
| "grad_norm": 4.963929653167725, |
| "learning_rate": 9.990658953475033e-07, |
| "loss": 0.8762, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.050985856289942164, |
| "grad_norm": 5.126701831817627, |
| "learning_rate": 9.989839563428983e-07, |
| "loss": 0.9762, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.05106369729191154, |
| "grad_norm": 5.1071953773498535, |
| "learning_rate": 9.989020173382934e-07, |
| "loss": 0.9504, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.05114153829388092, |
| "grad_norm": 4.061114311218262, |
| "learning_rate": 9.988200783336883e-07, |
| "loss": 0.821, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.0512193792958503, |
| "grad_norm": 3.604483127593994, |
| "learning_rate": 9.987381393290833e-07, |
| "loss": 0.9565, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.051297220297819676, |
| "grad_norm": 4.070693016052246, |
| "learning_rate": 9.986562003244784e-07, |
| "loss": 0.7469, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.051375061299789054, |
| "grad_norm": 3.4125092029571533, |
| "learning_rate": 9.985742613198735e-07, |
| "loss": 0.7926, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.051452902301758426, |
| "grad_norm": 7.950231075286865, |
| "learning_rate": 9.984923223152686e-07, |
| "loss": 0.8422, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.051530743303727804, |
| "grad_norm": 3.185955762863159, |
| "learning_rate": 9.984103833106634e-07, |
| "loss": 0.7959, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.05160858430569718, |
| "grad_norm": 4.626750946044922, |
| "learning_rate": 9.983284443060585e-07, |
| "loss": 0.9932, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.05168642530766656, |
| "grad_norm": 2.5758249759674072, |
| "learning_rate": 9.982465053014536e-07, |
| "loss": 0.7739, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.05176426630963594, |
| "grad_norm": 3.6274349689483643, |
| "learning_rate": 9.981645662968484e-07, |
| "loss": 0.8351, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.051842107311605316, |
| "grad_norm": 3.520857572555542, |
| "learning_rate": 9.980826272922435e-07, |
| "loss": 0.8815, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.051919948313574694, |
| "grad_norm": 4.665640354156494, |
| "learning_rate": 9.980006882876386e-07, |
| "loss": 0.8575, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.05199778931554407, |
| "grad_norm": 5.597052574157715, |
| "learning_rate": 9.979187492830337e-07, |
| "loss": 0.8373, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.052075630317513444, |
| "grad_norm": 5.660586357116699, |
| "learning_rate": 9.978368102784287e-07, |
| "loss": 0.9164, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.05215347131948282, |
| "grad_norm": 11.376925468444824, |
| "learning_rate": 9.977548712738238e-07, |
| "loss": 0.8779, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.0522313123214522, |
| "grad_norm": 3.930678606033325, |
| "learning_rate": 9.976729322692187e-07, |
| "loss": 0.8638, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.05230915332342158, |
| "grad_norm": 4.059145450592041, |
| "learning_rate": 9.975909932646138e-07, |
| "loss": 0.7965, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.052386994325390956, |
| "grad_norm": 4.585720539093018, |
| "learning_rate": 9.975090542600088e-07, |
| "loss": 0.8034, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.052464835327360335, |
| "grad_norm": 5.015563488006592, |
| "learning_rate": 9.974271152554037e-07, |
| "loss": 0.8109, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.05254267632932971, |
| "grad_norm": 3.2969090938568115, |
| "learning_rate": 9.973451762507988e-07, |
| "loss": 0.9502, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.05262051733129909, |
| "grad_norm": 3.2702388763427734, |
| "learning_rate": 9.972632372461938e-07, |
| "loss": 0.8148, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.05269835833326847, |
| "grad_norm": 2.95889949798584, |
| "learning_rate": 9.97181298241589e-07, |
| "loss": 0.8935, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.05277619933523784, |
| "grad_norm": 5.157326698303223, |
| "learning_rate": 9.97099359236984e-07, |
| "loss": 0.9001, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.05285404033720722, |
| "grad_norm": 3.6577107906341553, |
| "learning_rate": 9.97017420232379e-07, |
| "loss": 0.7983, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.0529318813391766, |
| "grad_norm": 2.539867401123047, |
| "learning_rate": 9.969354812277741e-07, |
| "loss": 0.732, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.053009722341145975, |
| "grad_norm": 6.6847076416015625, |
| "learning_rate": 9.96853542223169e-07, |
| "loss": 0.8909, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.05308756334311535, |
| "grad_norm": 3.6293387413024902, |
| "learning_rate": 9.96771603218564e-07, |
| "loss": 0.7757, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.05316540434508473, |
| "grad_norm": 9.500846862792969, |
| "learning_rate": 9.966896642139592e-07, |
| "loss": 0.8709, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.05324324534705411, |
| "grad_norm": 8.317655563354492, |
| "learning_rate": 9.96607725209354e-07, |
| "loss": 0.8833, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.05332108634902349, |
| "grad_norm": 6.386698246002197, |
| "learning_rate": 9.96525786204749e-07, |
| "loss": 0.9136, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.05339892735099286, |
| "grad_norm": 3.567600965499878, |
| "learning_rate": 9.964438472001442e-07, |
| "loss": 0.8465, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.05347676835296224, |
| "grad_norm": 7.062701225280762, |
| "learning_rate": 9.963619081955392e-07, |
| "loss": 0.8179, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.053554609354931615, |
| "grad_norm": 3.983492851257324, |
| "learning_rate": 9.962799691909343e-07, |
| "loss": 0.899, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.05363245035690099, |
| "grad_norm": 7.150521278381348, |
| "learning_rate": 9.961980301863292e-07, |
| "loss": 0.7949, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.05371029135887037, |
| "grad_norm": 5.3643107414245605, |
| "learning_rate": 9.961160911817243e-07, |
| "loss": 0.893, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.05378813236083975, |
| "grad_norm": 7.8569440841674805, |
| "learning_rate": 9.960341521771193e-07, |
| "loss": 0.7597, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.05386597336280913, |
| "grad_norm": 2.990384817123413, |
| "learning_rate": 9.959522131725144e-07, |
| "loss": 0.8968, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.053943814364778506, |
| "grad_norm": 11.023333549499512, |
| "learning_rate": 9.958702741679093e-07, |
| "loss": 0.8577, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.054021655366747884, |
| "grad_norm": 3.8599610328674316, |
| "learning_rate": 9.957883351633043e-07, |
| "loss": 0.8187, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.054099496368717255, |
| "grad_norm": 4.514223575592041, |
| "learning_rate": 9.957063961586994e-07, |
| "loss": 0.8948, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.05417733737068663, |
| "grad_norm": 5.561735153198242, |
| "learning_rate": 9.956244571540945e-07, |
| "loss": 0.7144, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.05425517837265601, |
| "grad_norm": 2.5921874046325684, |
| "learning_rate": 9.955425181494896e-07, |
| "loss": 0.8599, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.05433301937462539, |
| "grad_norm": 4.871161937713623, |
| "learning_rate": 9.954605791448844e-07, |
| "loss": 0.9644, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.05441086037659477, |
| "grad_norm": 6.471960544586182, |
| "learning_rate": 9.953786401402795e-07, |
| "loss": 0.764, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.054488701378564146, |
| "grad_norm": 5.133829593658447, |
| "learning_rate": 9.952967011356746e-07, |
| "loss": 0.8484, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.054566542380533524, |
| "grad_norm": 15.294747352600098, |
| "learning_rate": 9.952147621310697e-07, |
| "loss": 0.9278, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.0546443833825029, |
| "grad_norm": 4.0458526611328125, |
| "learning_rate": 9.951328231264645e-07, |
| "loss": 0.8015, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.05472222438447228, |
| "grad_norm": 3.96840238571167, |
| "learning_rate": 9.950508841218596e-07, |
| "loss": 0.9182, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.05480006538644165, |
| "grad_norm": 3.493230104446411, |
| "learning_rate": 9.949689451172547e-07, |
| "loss": 0.7351, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.05487790638841103, |
| "grad_norm": 6.453081130981445, |
| "learning_rate": 9.948870061126497e-07, |
| "loss": 0.6706, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.05495574739038041, |
| "grad_norm": 4.883228302001953, |
| "learning_rate": 9.948050671080446e-07, |
| "loss": 0.8926, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.055033588392349786, |
| "grad_norm": 8.88487434387207, |
| "learning_rate": 9.947231281034397e-07, |
| "loss": 0.7815, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.055111429394319164, |
| "grad_norm": 3.5414915084838867, |
| "learning_rate": 9.946411890988348e-07, |
| "loss": 0.8066, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.05518927039628854, |
| "grad_norm": 3.3924477100372314, |
| "learning_rate": 9.945592500942298e-07, |
| "loss": 0.8942, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.05526711139825792, |
| "grad_norm": 8.606155395507812, |
| "learning_rate": 9.94477311089625e-07, |
| "loss": 0.8062, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.0553449524002273, |
| "grad_norm": 3.5798611640930176, |
| "learning_rate": 9.9439537208502e-07, |
| "loss": 0.8145, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.05542279340219667, |
| "grad_norm": 4.816424369812012, |
| "learning_rate": 9.943134330804148e-07, |
| "loss": 0.9767, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.05550063440416605, |
| "grad_norm": 3.161212682723999, |
| "learning_rate": 9.9423149407581e-07, |
| "loss": 0.7526, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.055578475406135426, |
| "grad_norm": 5.3241143226623535, |
| "learning_rate": 9.94149555071205e-07, |
| "loss": 0.8756, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.055656316408104804, |
| "grad_norm": 4.702089786529541, |
| "learning_rate": 9.940676160665999e-07, |
| "loss": 0.7844, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.05573415741007418, |
| "grad_norm": 3.6324615478515625, |
| "learning_rate": 9.93985677061995e-07, |
| "loss": 0.9757, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.05581199841204356, |
| "grad_norm": 5.574779510498047, |
| "learning_rate": 9.9390373805739e-07, |
| "loss": 0.8368, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.05588983941401294, |
| "grad_norm": 3.3760433197021484, |
| "learning_rate": 9.93821799052785e-07, |
| "loss": 0.9753, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.05596768041598232, |
| "grad_norm": 3.6447086334228516, |
| "learning_rate": 9.937398600481802e-07, |
| "loss": 0.8654, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.056045521417951695, |
| "grad_norm": 10.935750007629395, |
| "learning_rate": 9.936579210435752e-07, |
| "loss": 0.8504, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.056123362419921066, |
| "grad_norm": 5.356347560882568, |
| "learning_rate": 9.9357598203897e-07, |
| "loss": 0.8439, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.056201203421890444, |
| "grad_norm": 7.737555027008057, |
| "learning_rate": 9.934940430343652e-07, |
| "loss": 0.8997, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.05627904442385982, |
| "grad_norm": 4.059571266174316, |
| "learning_rate": 9.934121040297602e-07, |
| "loss": 0.736, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.0563568854258292, |
| "grad_norm": 10.28212833404541, |
| "learning_rate": 9.933301650251551e-07, |
| "loss": 0.8219, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.05643472642779858, |
| "grad_norm": 7.522468090057373, |
| "learning_rate": 9.932482260205502e-07, |
| "loss": 0.7058, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.05651256742976796, |
| "grad_norm": 4.0811872482299805, |
| "learning_rate": 9.931662870159453e-07, |
| "loss": 0.8334, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.056590408431737335, |
| "grad_norm": 2.533539295196533, |
| "learning_rate": 9.930843480113403e-07, |
| "loss": 0.8185, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.05666824943370671, |
| "grad_norm": 2.272587776184082, |
| "learning_rate": 9.930024090067354e-07, |
| "loss": 0.8294, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.05674609043567609, |
| "grad_norm": 4.402963638305664, |
| "learning_rate": 9.929204700021305e-07, |
| "loss": 0.8253, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.05682393143764546, |
| "grad_norm": 4.450977802276611, |
| "learning_rate": 9.928385309975253e-07, |
| "loss": 0.7287, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.05690177243961484, |
| "grad_norm": 4.995216369628906, |
| "learning_rate": 9.927565919929204e-07, |
| "loss": 0.7744, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.05697961344158422, |
| "grad_norm": 4.42352294921875, |
| "learning_rate": 9.926746529883155e-07, |
| "loss": 0.8216, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.0570574544435536, |
| "grad_norm": 5.005922317504883, |
| "learning_rate": 9.925927139837106e-07, |
| "loss": 0.88, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.057135295445522975, |
| "grad_norm": 4.319427013397217, |
| "learning_rate": 9.925107749791054e-07, |
| "loss": 0.9386, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.05721313644749235, |
| "grad_norm": 4.61904239654541, |
| "learning_rate": 9.924288359745005e-07, |
| "loss": 0.8248, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.05729097744946173, |
| "grad_norm": 3.656996250152588, |
| "learning_rate": 9.923468969698956e-07, |
| "loss": 0.8898, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.05736881845143111, |
| "grad_norm": 10.73847484588623, |
| "learning_rate": 9.922649579652907e-07, |
| "loss": 0.7295, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.05744665945340048, |
| "grad_norm": 3.2956910133361816, |
| "learning_rate": 9.921830189606855e-07, |
| "loss": 0.7937, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.05752450045536986, |
| "grad_norm": 3.310476541519165, |
| "learning_rate": 9.921010799560806e-07, |
| "loss": 0.7597, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.05760234145733924, |
| "grad_norm": 6.073892116546631, |
| "learning_rate": 9.920191409514757e-07, |
| "loss": 0.775, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.057680182459308615, |
| "grad_norm": 4.651096820831299, |
| "learning_rate": 9.919372019468707e-07, |
| "loss": 0.9085, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.057758023461277994, |
| "grad_norm": 5.112009048461914, |
| "learning_rate": 9.918552629422658e-07, |
| "loss": 0.854, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.05783586446324737, |
| "grad_norm": 3.9226460456848145, |
| "learning_rate": 9.917733239376607e-07, |
| "loss": 0.8815, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.05791370546521675, |
| "grad_norm": 5.9531707763671875, |
| "learning_rate": 9.916913849330558e-07, |
| "loss": 0.8794, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.05799154646718613, |
| "grad_norm": 7.749881744384766, |
| "learning_rate": 9.916094459284508e-07, |
| "loss": 0.8347, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.058069387469155506, |
| "grad_norm": 3.2161874771118164, |
| "learning_rate": 9.915275069238457e-07, |
| "loss": 0.8297, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.05814722847112488, |
| "grad_norm": 3.4381978511810303, |
| "learning_rate": 9.914455679192408e-07, |
| "loss": 0.8016, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.058225069473094256, |
| "grad_norm": 6.175289630889893, |
| "learning_rate": 9.913636289146358e-07, |
| "loss": 0.7378, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.058302910475063634, |
| "grad_norm": 7.808245658874512, |
| "learning_rate": 9.91281689910031e-07, |
| "loss": 0.8631, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.05838075147703301, |
| "grad_norm": 8.13048267364502, |
| "learning_rate": 9.91199750905426e-07, |
| "loss": 0.7241, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.05845859247900239, |
| "grad_norm": 14.47769546508789, |
| "learning_rate": 9.91117811900821e-07, |
| "loss": 0.7604, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.05853643348097177, |
| "grad_norm": 13.544578552246094, |
| "learning_rate": 9.91035872896216e-07, |
| "loss": 0.9168, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.058614274482941146, |
| "grad_norm": 3.012338638305664, |
| "learning_rate": 9.90953933891611e-07, |
| "loss": 0.8438, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.058692115484910524, |
| "grad_norm": 8.543879508972168, |
| "learning_rate": 9.90871994887006e-07, |
| "loss": 0.8027, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.058769956486879896, |
| "grad_norm": 3.5552265644073486, |
| "learning_rate": 9.907900558824012e-07, |
| "loss": 0.9394, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.058847797488849274, |
| "grad_norm": 2.7634129524230957, |
| "learning_rate": 9.90708116877796e-07, |
| "loss": 0.8544, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.05892563849081865, |
| "grad_norm": 4.050414085388184, |
| "learning_rate": 9.90626177873191e-07, |
| "loss": 0.8405, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.05900347949278803, |
| "grad_norm": 3.3038461208343506, |
| "learning_rate": 9.905442388685862e-07, |
| "loss": 0.763, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.05908132049475741, |
| "grad_norm": 5.79196834564209, |
| "learning_rate": 9.904622998639812e-07, |
| "loss": 0.8174, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.059159161496726786, |
| "grad_norm": 4.359936714172363, |
| "learning_rate": 9.903803608593763e-07, |
| "loss": 0.8229, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.059237002498696165, |
| "grad_norm": 6.546017169952393, |
| "learning_rate": 9.902984218547714e-07, |
| "loss": 0.835, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.05931484350066554, |
| "grad_norm": 6.203246593475342, |
| "learning_rate": 9.902164828501663e-07, |
| "loss": 0.9859, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.05939268450263492, |
| "grad_norm": 3.92028546333313, |
| "learning_rate": 9.901345438455613e-07, |
| "loss": 0.84, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.05947052550460429, |
| "grad_norm": 4.098803520202637, |
| "learning_rate": 9.900526048409564e-07, |
| "loss": 0.8088, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.05954836650657367, |
| "grad_norm": 4.060965061187744, |
| "learning_rate": 9.899706658363513e-07, |
| "loss": 0.8048, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.05962620750854305, |
| "grad_norm": 7.130313873291016, |
| "learning_rate": 9.898887268317463e-07, |
| "loss": 0.991, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.05970404851051243, |
| "grad_norm": 4.355027198791504, |
| "learning_rate": 9.898067878271414e-07, |
| "loss": 0.9168, |
| "step": 3835 |
| }, |
| { |
| "epoch": 0.059781889512481805, |
| "grad_norm": 4.409844398498535, |
| "learning_rate": 9.897248488225365e-07, |
| "loss": 0.7811, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.05985973051445118, |
| "grad_norm": 4.593713283538818, |
| "learning_rate": 9.896429098179316e-07, |
| "loss": 0.9282, |
| "step": 3845 |
| }, |
| { |
| "epoch": 0.05993757151642056, |
| "grad_norm": 3.813417911529541, |
| "learning_rate": 9.895609708133266e-07, |
| "loss": 0.8671, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.06001541251838994, |
| "grad_norm": 9.554966926574707, |
| "learning_rate": 9.894790318087215e-07, |
| "loss": 0.8516, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.06009325352035932, |
| "grad_norm": 3.616415500640869, |
| "learning_rate": 9.893970928041166e-07, |
| "loss": 0.8382, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.06017109452232869, |
| "grad_norm": 3.379333019256592, |
| "learning_rate": 9.893151537995117e-07, |
| "loss": 0.9661, |
| "step": 3865 |
| }, |
| { |
| "epoch": 0.06024893552429807, |
| "grad_norm": 2.6693906784057617, |
| "learning_rate": 9.892332147949065e-07, |
| "loss": 0.8133, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.060326776526267445, |
| "grad_norm": 4.557685375213623, |
| "learning_rate": 9.891512757903016e-07, |
| "loss": 0.8617, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.06040461752823682, |
| "grad_norm": 2.69423770904541, |
| "learning_rate": 9.890693367856967e-07, |
| "loss": 0.7904, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.0604824585302062, |
| "grad_norm": 3.213026762008667, |
| "learning_rate": 9.889873977810917e-07, |
| "loss": 0.7852, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.06056029953217558, |
| "grad_norm": 3.25534725189209, |
| "learning_rate": 9.889054587764868e-07, |
| "loss": 0.8165, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.06063814053414496, |
| "grad_norm": 5.834784984588623, |
| "learning_rate": 9.888235197718817e-07, |
| "loss": 0.9304, |
| "step": 3895 |
| }, |
| { |
| "epoch": 0.060715981536114336, |
| "grad_norm": 3.369537353515625, |
| "learning_rate": 9.887415807672768e-07, |
| "loss": 0.7562, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.06079382253808371, |
| "grad_norm": 5.367571830749512, |
| "learning_rate": 9.886596417626718e-07, |
| "loss": 0.8158, |
| "step": 3905 |
| }, |
| { |
| "epoch": 0.060871663540053085, |
| "grad_norm": 4.397671222686768, |
| "learning_rate": 9.88577702758067e-07, |
| "loss": 0.8699, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.06094950454202246, |
| "grad_norm": 3.270768404006958, |
| "learning_rate": 9.88495763753462e-07, |
| "loss": 0.9022, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.06102734554399184, |
| "grad_norm": 4.194687366485596, |
| "learning_rate": 9.884138247488568e-07, |
| "loss": 0.937, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.06110518654596122, |
| "grad_norm": 3.5028905868530273, |
| "learning_rate": 9.88331885744252e-07, |
| "loss": 0.9853, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.0611830275479306, |
| "grad_norm": 9.81811237335205, |
| "learning_rate": 9.88249946739647e-07, |
| "loss": 0.9332, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.061260868549899976, |
| "grad_norm": 9.531314849853516, |
| "learning_rate": 9.881680077350419e-07, |
| "loss": 0.8402, |
| "step": 3935 |
| }, |
| { |
| "epoch": 0.061338709551869354, |
| "grad_norm": 6.465907096862793, |
| "learning_rate": 9.88086068730437e-07, |
| "loss": 0.9443, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.06141655055383873, |
| "grad_norm": 9.462715148925781, |
| "learning_rate": 9.88004129725832e-07, |
| "loss": 0.7544, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.0614943915558081, |
| "grad_norm": 4.005988121032715, |
| "learning_rate": 9.87922190721227e-07, |
| "loss": 0.8655, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.06157223255777748, |
| "grad_norm": 6.533730983734131, |
| "learning_rate": 9.878402517166222e-07, |
| "loss": 0.9202, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.06165007355974686, |
| "grad_norm": 4.695230484008789, |
| "learning_rate": 9.877583127120172e-07, |
| "loss": 0.7832, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.06172791456171624, |
| "grad_norm": 4.281477451324463, |
| "learning_rate": 9.87676373707412e-07, |
| "loss": 0.8885, |
| "step": 3965 |
| }, |
| { |
| "epoch": 0.061805755563685616, |
| "grad_norm": 4.162761688232422, |
| "learning_rate": 9.875944347028072e-07, |
| "loss": 0.8782, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.061883596565654994, |
| "grad_norm": 3.2788217067718506, |
| "learning_rate": 9.875124956982022e-07, |
| "loss": 0.901, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.06196143756762437, |
| "grad_norm": 3.823699951171875, |
| "learning_rate": 9.874305566935971e-07, |
| "loss": 0.8811, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.06203927856959375, |
| "grad_norm": 5.366037368774414, |
| "learning_rate": 9.873486176889922e-07, |
| "loss": 0.9297, |
| "step": 3985 |
| }, |
| { |
| "epoch": 0.06211711957156313, |
| "grad_norm": 3.4064414501190186, |
| "learning_rate": 9.872666786843873e-07, |
| "loss": 0.8329, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.0621949605735325, |
| "grad_norm": 6.189504146575928, |
| "learning_rate": 9.871847396797823e-07, |
| "loss": 0.8325, |
| "step": 3995 |
| }, |
| { |
| "epoch": 0.06227280157550188, |
| "grad_norm": 2.825984001159668, |
| "learning_rate": 9.871028006751774e-07, |
| "loss": 0.7901, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.062350642577471256, |
| "grad_norm": 3.610321521759033, |
| "learning_rate": 9.870208616705725e-07, |
| "loss": 0.7974, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.062428483579440634, |
| "grad_norm": 4.4487128257751465, |
| "learning_rate": 9.869389226659676e-07, |
| "loss": 0.9165, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.06250632458141, |
| "grad_norm": 3.201486110687256, |
| "learning_rate": 9.868569836613624e-07, |
| "loss": 0.9165, |
| "step": 4015 |
| }, |
| { |
| "epoch": 0.06258416558337938, |
| "grad_norm": 6.013232231140137, |
| "learning_rate": 9.867750446567575e-07, |
| "loss": 0.7316, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.06266200658534876, |
| "grad_norm": 4.562684535980225, |
| "learning_rate": 9.866931056521526e-07, |
| "loss": 0.8648, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.06273984758731814, |
| "grad_norm": 3.915780544281006, |
| "learning_rate": 9.866111666475474e-07, |
| "loss": 0.7684, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.06281768858928752, |
| "grad_norm": 13.098698616027832, |
| "learning_rate": 9.865292276429425e-07, |
| "loss": 0.8222, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.0628955295912569, |
| "grad_norm": 5.85524320602417, |
| "learning_rate": 9.864472886383376e-07, |
| "loss": 0.8593, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.06297337059322627, |
| "grad_norm": 12.446966171264648, |
| "learning_rate": 9.863653496337327e-07, |
| "loss": 0.6881, |
| "step": 4045 |
| }, |
| { |
| "epoch": 0.06305121159519565, |
| "grad_norm": 3.663348436355591, |
| "learning_rate": 9.862834106291277e-07, |
| "loss": 0.6791, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.06312905259716503, |
| "grad_norm": 5.9468159675598145, |
| "learning_rate": 9.862014716245226e-07, |
| "loss": 0.883, |
| "step": 4055 |
| }, |
| { |
| "epoch": 0.06320689359913441, |
| "grad_norm": 4.544028282165527, |
| "learning_rate": 9.861195326199177e-07, |
| "loss": 0.6979, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.06328473460110379, |
| "grad_norm": 4.25548791885376, |
| "learning_rate": 9.860375936153127e-07, |
| "loss": 0.757, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.06336257560307317, |
| "grad_norm": 4.892475128173828, |
| "learning_rate": 9.859556546107078e-07, |
| "loss": 0.8346, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.06344041660504254, |
| "grad_norm": 3.967132091522217, |
| "learning_rate": 9.858737156061027e-07, |
| "loss": 0.7614, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.06351825760701192, |
| "grad_norm": 9.065237998962402, |
| "learning_rate": 9.857917766014978e-07, |
| "loss": 0.8471, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.0635960986089813, |
| "grad_norm": 5.109429359436035, |
| "learning_rate": 9.857098375968928e-07, |
| "loss": 0.7441, |
| "step": 4085 |
| }, |
| { |
| "epoch": 0.06367393961095068, |
| "grad_norm": 13.242950439453125, |
| "learning_rate": 9.85627898592288e-07, |
| "loss": 0.7784, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.06375178061292006, |
| "grad_norm": 7.870430946350098, |
| "learning_rate": 9.855459595876828e-07, |
| "loss": 0.9225, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.06382962161488942, |
| "grad_norm": 6.2109761238098145, |
| "learning_rate": 9.854640205830778e-07, |
| "loss": 0.8741, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.0639074626168588, |
| "grad_norm": 4.566768169403076, |
| "learning_rate": 9.85382081578473e-07, |
| "loss": 0.7312, |
| "step": 4105 |
| }, |
| { |
| "epoch": 0.06398530361882818, |
| "grad_norm": 4.343275547027588, |
| "learning_rate": 9.85300142573868e-07, |
| "loss": 0.8077, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.06406314462079755, |
| "grad_norm": 3.710590124130249, |
| "learning_rate": 9.85218203569263e-07, |
| "loss": 0.8512, |
| "step": 4115 |
| }, |
| { |
| "epoch": 0.06414098562276693, |
| "grad_norm": 5.875495433807373, |
| "learning_rate": 9.85136264564658e-07, |
| "loss": 0.8588, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.06421882662473631, |
| "grad_norm": 5.609859943389893, |
| "learning_rate": 9.85054325560053e-07, |
| "loss": 0.9756, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.06429666762670569, |
| "grad_norm": 3.695260763168335, |
| "learning_rate": 9.84972386555448e-07, |
| "loss": 0.8677, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.06437450862867507, |
| "grad_norm": 4.265758991241455, |
| "learning_rate": 9.848904475508432e-07, |
| "loss": 0.868, |
| "step": 4135 |
| }, |
| { |
| "epoch": 0.06445234963064445, |
| "grad_norm": 5.0540361404418945, |
| "learning_rate": 9.84808508546238e-07, |
| "loss": 0.7448, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.06453019063261382, |
| "grad_norm": 3.1422078609466553, |
| "learning_rate": 9.84726569541633e-07, |
| "loss": 0.8085, |
| "step": 4145 |
| }, |
| { |
| "epoch": 0.0646080316345832, |
| "grad_norm": 3.257333755493164, |
| "learning_rate": 9.846446305370282e-07, |
| "loss": 0.904, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.06468587263655258, |
| "grad_norm": 6.303824424743652, |
| "learning_rate": 9.845626915324232e-07, |
| "loss": 0.6844, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.06476371363852196, |
| "grad_norm": 7.541611194610596, |
| "learning_rate": 9.844807525278183e-07, |
| "loss": 0.9385, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.06484155464049134, |
| "grad_norm": 3.217496633529663, |
| "learning_rate": 9.843988135232134e-07, |
| "loss": 0.8201, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.06491939564246071, |
| "grad_norm": 4.375589370727539, |
| "learning_rate": 9.843168745186083e-07, |
| "loss": 0.8042, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.06499723664443009, |
| "grad_norm": 6.62051248550415, |
| "learning_rate": 9.842349355140033e-07, |
| "loss": 0.7035, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.06507507764639947, |
| "grad_norm": 4.503577709197998, |
| "learning_rate": 9.841529965093984e-07, |
| "loss": 0.8564, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.06515291864836883, |
| "grad_norm": 3.583695411682129, |
| "learning_rate": 9.840710575047933e-07, |
| "loss": 0.9069, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.06523075965033821, |
| "grad_norm": 4.029445648193359, |
| "learning_rate": 9.839891185001883e-07, |
| "loss": 0.8835, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.06530860065230759, |
| "grad_norm": 3.6656410694122314, |
| "learning_rate": 9.839071794955834e-07, |
| "loss": 0.7814, |
| "step": 4195 |
| }, |
| { |
| "epoch": 0.06538644165427697, |
| "grad_norm": 3.0505213737487793, |
| "learning_rate": 9.838252404909785e-07, |
| "loss": 0.7942, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.06546428265624635, |
| "grad_norm": 4.775297164916992, |
| "learning_rate": 9.837433014863736e-07, |
| "loss": 0.8875, |
| "step": 4205 |
| }, |
| { |
| "epoch": 0.06554212365821573, |
| "grad_norm": 5.490566253662109, |
| "learning_rate": 9.836613624817686e-07, |
| "loss": 0.7635, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.0656199646601851, |
| "grad_norm": 3.202033519744873, |
| "learning_rate": 9.835794234771635e-07, |
| "loss": 0.7558, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.06569780566215448, |
| "grad_norm": 5.484325408935547, |
| "learning_rate": 9.834974844725586e-07, |
| "loss": 0.8066, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.06577564666412386, |
| "grad_norm": 2.903610944747925, |
| "learning_rate": 9.834155454679537e-07, |
| "loss": 0.7833, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.06585348766609324, |
| "grad_norm": 3.188546895980835, |
| "learning_rate": 9.833336064633485e-07, |
| "loss": 0.7774, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.06593132866806262, |
| "grad_norm": 3.055574655532837, |
| "learning_rate": 9.832516674587436e-07, |
| "loss": 0.6551, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.066009169670032, |
| "grad_norm": 4.439972877502441, |
| "learning_rate": 9.831697284541387e-07, |
| "loss": 0.7456, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.06608701067200137, |
| "grad_norm": 2.4513139724731445, |
| "learning_rate": 9.830877894495337e-07, |
| "loss": 0.7752, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.06616485167397075, |
| "grad_norm": 4.66846227645874, |
| "learning_rate": 9.830058504449288e-07, |
| "loss": 0.9322, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.06624269267594013, |
| "grad_norm": 4.819527626037598, |
| "learning_rate": 9.82923911440324e-07, |
| "loss": 0.9361, |
| "step": 4255 |
| }, |
| { |
| "epoch": 0.06632053367790951, |
| "grad_norm": 8.028414726257324, |
| "learning_rate": 9.828419724357188e-07, |
| "loss": 0.8214, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.06639837467987889, |
| "grad_norm": 3.565459728240967, |
| "learning_rate": 9.827600334311138e-07, |
| "loss": 0.7668, |
| "step": 4265 |
| }, |
| { |
| "epoch": 0.06647621568184826, |
| "grad_norm": 2.9492602348327637, |
| "learning_rate": 9.82678094426509e-07, |
| "loss": 0.7513, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.06655405668381763, |
| "grad_norm": 4.8683576583862305, |
| "learning_rate": 9.82596155421904e-07, |
| "loss": 0.8725, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.066631897685787, |
| "grad_norm": 4.162265300750732, |
| "learning_rate": 9.825142164172989e-07, |
| "loss": 0.8275, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.06670973868775638, |
| "grad_norm": 3.6537702083587646, |
| "learning_rate": 9.82432277412694e-07, |
| "loss": 0.7264, |
| "step": 4285 |
| }, |
| { |
| "epoch": 0.06678757968972576, |
| "grad_norm": 3.9282073974609375, |
| "learning_rate": 9.82350338408089e-07, |
| "loss": 0.977, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.06686542069169514, |
| "grad_norm": 5.129037857055664, |
| "learning_rate": 9.82268399403484e-07, |
| "loss": 0.8609, |
| "step": 4295 |
| }, |
| { |
| "epoch": 0.06694326169366452, |
| "grad_norm": 4.563994884490967, |
| "learning_rate": 9.82186460398879e-07, |
| "loss": 0.8303, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.0670211026956339, |
| "grad_norm": 3.177889585494995, |
| "learning_rate": 9.82104521394274e-07, |
| "loss": 0.9134, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.06709894369760327, |
| "grad_norm": 4.675817966461182, |
| "learning_rate": 9.82022582389669e-07, |
| "loss": 0.7188, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.06717678469957265, |
| "grad_norm": 6.9661173820495605, |
| "learning_rate": 9.819406433850642e-07, |
| "loss": 0.7871, |
| "step": 4315 |
| }, |
| { |
| "epoch": 0.06725462570154203, |
| "grad_norm": 6.177728176116943, |
| "learning_rate": 9.818587043804592e-07, |
| "loss": 0.7438, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.06733246670351141, |
| "grad_norm": 3.9021103382110596, |
| "learning_rate": 9.81776765375854e-07, |
| "loss": 0.8456, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.06741030770548079, |
| "grad_norm": 6.576573371887207, |
| "learning_rate": 9.816948263712492e-07, |
| "loss": 0.8173, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.06748814870745017, |
| "grad_norm": 3.117799997329712, |
| "learning_rate": 9.816128873666442e-07, |
| "loss": 0.8552, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.06756598970941954, |
| "grad_norm": 5.52931022644043, |
| "learning_rate": 9.815309483620391e-07, |
| "loss": 0.7353, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.06764383071138892, |
| "grad_norm": 3.3571298122406006, |
| "learning_rate": 9.814490093574342e-07, |
| "loss": 0.7253, |
| "step": 4345 |
| }, |
| { |
| "epoch": 0.0677216717133583, |
| "grad_norm": 4.7125468254089355, |
| "learning_rate": 9.813670703528293e-07, |
| "loss": 0.8708, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.06779951271532768, |
| "grad_norm": 3.7811620235443115, |
| "learning_rate": 9.812851313482243e-07, |
| "loss": 0.744, |
| "step": 4355 |
| }, |
| { |
| "epoch": 0.06787735371729704, |
| "grad_norm": 4.079869270324707, |
| "learning_rate": 9.812031923436194e-07, |
| "loss": 0.8291, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.06795519471926642, |
| "grad_norm": 2.9714179039001465, |
| "learning_rate": 9.811212533390145e-07, |
| "loss": 0.879, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.0680330357212358, |
| "grad_norm": 4.301975250244141, |
| "learning_rate": 9.810393143344094e-07, |
| "loss": 0.7528, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.06811087672320518, |
| "grad_norm": 4.707742214202881, |
| "learning_rate": 9.809573753298044e-07, |
| "loss": 0.7686, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.06818871772517456, |
| "grad_norm": 2.911092758178711, |
| "learning_rate": 9.808754363251995e-07, |
| "loss": 0.8224, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.06826655872714393, |
| "grad_norm": 3.809354543685913, |
| "learning_rate": 9.807934973205944e-07, |
| "loss": 0.9337, |
| "step": 4385 |
| }, |
| { |
| "epoch": 0.06834439972911331, |
| "grad_norm": 3.0105934143066406, |
| "learning_rate": 9.807115583159894e-07, |
| "loss": 0.7952, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.06842224073108269, |
| "grad_norm": 4.267519474029541, |
| "learning_rate": 9.806296193113845e-07, |
| "loss": 0.9312, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.06850008173305207, |
| "grad_norm": 13.714824676513672, |
| "learning_rate": 9.805476803067796e-07, |
| "loss": 0.7266, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.06857792273502145, |
| "grad_norm": 5.861302852630615, |
| "learning_rate": 9.804657413021747e-07, |
| "loss": 0.8267, |
| "step": 4405 |
| }, |
| { |
| "epoch": 0.06865576373699082, |
| "grad_norm": 4.226170539855957, |
| "learning_rate": 9.803838022975697e-07, |
| "loss": 0.6108, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.0687336047389602, |
| "grad_norm": 4.260887145996094, |
| "learning_rate": 9.803018632929648e-07, |
| "loss": 0.7917, |
| "step": 4415 |
| }, |
| { |
| "epoch": 0.06881144574092958, |
| "grad_norm": 2.1800050735473633, |
| "learning_rate": 9.802199242883597e-07, |
| "loss": 0.7279, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.06888928674289896, |
| "grad_norm": 4.386568069458008, |
| "learning_rate": 9.801379852837548e-07, |
| "loss": 0.7997, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.06896712774486834, |
| "grad_norm": 7.1831135749816895, |
| "learning_rate": 9.800560462791498e-07, |
| "loss": 0.9703, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.06904496874683771, |
| "grad_norm": 7.631860733032227, |
| "learning_rate": 9.799741072745447e-07, |
| "loss": 0.7836, |
| "step": 4435 |
| }, |
| { |
| "epoch": 0.06912280974880709, |
| "grad_norm": 3.6150078773498535, |
| "learning_rate": 9.798921682699398e-07, |
| "loss": 0.6982, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.06920065075077646, |
| "grad_norm": 5.267273902893066, |
| "learning_rate": 9.798102292653348e-07, |
| "loss": 0.9292, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.06927849175274584, |
| "grad_norm": 6.139009952545166, |
| "learning_rate": 9.7972829026073e-07, |
| "loss": 0.776, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.06935633275471521, |
| "grad_norm": 6.20229959487915, |
| "learning_rate": 9.79646351256125e-07, |
| "loss": 0.8239, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.06943417375668459, |
| "grad_norm": 3.204371929168701, |
| "learning_rate": 9.7956441225152e-07, |
| "loss": 0.8123, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.06951201475865397, |
| "grad_norm": 4.521599769592285, |
| "learning_rate": 9.79482473246915e-07, |
| "loss": 0.7049, |
| "step": 4465 |
| }, |
| { |
| "epoch": 0.06958985576062335, |
| "grad_norm": 5.0935750007629395, |
| "learning_rate": 9.7940053424231e-07, |
| "loss": 0.8673, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.06966769676259273, |
| "grad_norm": 7.926290512084961, |
| "learning_rate": 9.79318595237705e-07, |
| "loss": 0.8195, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.0697455377645621, |
| "grad_norm": 4.315165042877197, |
| "learning_rate": 9.792366562331e-07, |
| "loss": 0.8674, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.06982337876653148, |
| "grad_norm": 3.775836706161499, |
| "learning_rate": 9.79154717228495e-07, |
| "loss": 0.8334, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.06990121976850086, |
| "grad_norm": 2.560904026031494, |
| "learning_rate": 9.7907277822389e-07, |
| "loss": 0.8271, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.06997906077047024, |
| "grad_norm": 11.29925537109375, |
| "learning_rate": 9.789908392192852e-07, |
| "loss": 0.9633, |
| "step": 4495 |
| }, |
| { |
| "epoch": 0.07005690177243962, |
| "grad_norm": 4.101975917816162, |
| "learning_rate": 9.789089002146802e-07, |
| "loss": 0.8858, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.070134742774409, |
| "grad_norm": 2.970782518386841, |
| "learning_rate": 9.78826961210075e-07, |
| "loss": 0.8608, |
| "step": 4505 |
| }, |
| { |
| "epoch": 0.07021258377637837, |
| "grad_norm": 7.289088726043701, |
| "learning_rate": 9.787450222054702e-07, |
| "loss": 0.8347, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.07029042477834775, |
| "grad_norm": 7.107760429382324, |
| "learning_rate": 9.786630832008653e-07, |
| "loss": 0.771, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.07036826578031713, |
| "grad_norm": 3.630275249481201, |
| "learning_rate": 9.785811441962603e-07, |
| "loss": 0.7113, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.07044610678228651, |
| "grad_norm": 4.681270122528076, |
| "learning_rate": 9.784992051916554e-07, |
| "loss": 0.8278, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.07052394778425587, |
| "grad_norm": 3.6923000812530518, |
| "learning_rate": 9.784172661870503e-07, |
| "loss": 0.8067, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.07060178878622525, |
| "grad_norm": 3.538496255874634, |
| "learning_rate": 9.783353271824453e-07, |
| "loss": 0.7577, |
| "step": 4535 |
| }, |
| { |
| "epoch": 0.07067962978819463, |
| "grad_norm": 3.3996520042419434, |
| "learning_rate": 9.782533881778404e-07, |
| "loss": 0.9051, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.070757470790164, |
| "grad_norm": 4.107473850250244, |
| "learning_rate": 9.781714491732353e-07, |
| "loss": 0.8982, |
| "step": 4545 |
| }, |
| { |
| "epoch": 0.07083531179213338, |
| "grad_norm": 2.9986937046051025, |
| "learning_rate": 9.780895101686304e-07, |
| "loss": 0.9025, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.07091315279410276, |
| "grad_norm": 3.413224697113037, |
| "learning_rate": 9.780075711640254e-07, |
| "loss": 0.7699, |
| "step": 4555 |
| }, |
| { |
| "epoch": 0.07099099379607214, |
| "grad_norm": 3.332380771636963, |
| "learning_rate": 9.779256321594205e-07, |
| "loss": 0.6913, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.07106883479804152, |
| "grad_norm": 3.161701202392578, |
| "learning_rate": 9.778436931548156e-07, |
| "loss": 0.7502, |
| "step": 4565 |
| }, |
| { |
| "epoch": 0.0711466758000109, |
| "grad_norm": 3.6863913536071777, |
| "learning_rate": 9.777617541502107e-07, |
| "loss": 0.7959, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.07122451680198028, |
| "grad_norm": 4.537403583526611, |
| "learning_rate": 9.776798151456055e-07, |
| "loss": 0.8646, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.07130235780394965, |
| "grad_norm": 4.111873149871826, |
| "learning_rate": 9.775978761410006e-07, |
| "loss": 0.8503, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.07138019880591903, |
| "grad_norm": 8.788448333740234, |
| "learning_rate": 9.775159371363957e-07, |
| "loss": 0.8046, |
| "step": 4585 |
| }, |
| { |
| "epoch": 0.07145803980788841, |
| "grad_norm": 5.538233757019043, |
| "learning_rate": 9.774339981317905e-07, |
| "loss": 0.8018, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.07153588080985779, |
| "grad_norm": 6.06341028213501, |
| "learning_rate": 9.773520591271856e-07, |
| "loss": 0.8073, |
| "step": 4595 |
| }, |
| { |
| "epoch": 0.07161372181182717, |
| "grad_norm": 3.6553616523742676, |
| "learning_rate": 9.772701201225807e-07, |
| "loss": 0.8142, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.07169156281379654, |
| "grad_norm": 4.252196311950684, |
| "learning_rate": 9.771881811179758e-07, |
| "loss": 0.7505, |
| "step": 4605 |
| }, |
| { |
| "epoch": 0.07176940381576592, |
| "grad_norm": 3.3813109397888184, |
| "learning_rate": 9.771062421133708e-07, |
| "loss": 0.7076, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.0718472448177353, |
| "grad_norm": 9.012163162231445, |
| "learning_rate": 9.77024303108766e-07, |
| "loss": 0.8812, |
| "step": 4615 |
| }, |
| { |
| "epoch": 0.07192508581970466, |
| "grad_norm": 6.12354040145874, |
| "learning_rate": 9.769423641041608e-07, |
| "loss": 0.8218, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.07200292682167404, |
| "grad_norm": 3.364898681640625, |
| "learning_rate": 9.768604250995558e-07, |
| "loss": 0.8775, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.07208076782364342, |
| "grad_norm": 13.047234535217285, |
| "learning_rate": 9.76778486094951e-07, |
| "loss": 0.7578, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.0721586088256128, |
| "grad_norm": 6.722197532653809, |
| "learning_rate": 9.766965470903458e-07, |
| "loss": 0.8446, |
| "step": 4635 |
| }, |
| { |
| "epoch": 0.07223644982758218, |
| "grad_norm": 4.028960227966309, |
| "learning_rate": 9.766146080857409e-07, |
| "loss": 0.8284, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.07231429082955156, |
| "grad_norm": 3.668736219406128, |
| "learning_rate": 9.76532669081136e-07, |
| "loss": 0.8298, |
| "step": 4645 |
| }, |
| { |
| "epoch": 0.07239213183152093, |
| "grad_norm": 3.391463041305542, |
| "learning_rate": 9.76450730076531e-07, |
| "loss": 0.8038, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.07246997283349031, |
| "grad_norm": 2.8080356121063232, |
| "learning_rate": 9.76368791071926e-07, |
| "loss": 0.8614, |
| "step": 4655 |
| }, |
| { |
| "epoch": 0.07254781383545969, |
| "grad_norm": 3.9080796241760254, |
| "learning_rate": 9.762868520673212e-07, |
| "loss": 0.8239, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.07262565483742907, |
| "grad_norm": 3.0968992710113525, |
| "learning_rate": 9.76204913062716e-07, |
| "loss": 0.8751, |
| "step": 4665 |
| }, |
| { |
| "epoch": 0.07270349583939845, |
| "grad_norm": 6.975797176361084, |
| "learning_rate": 9.76122974058111e-07, |
| "loss": 0.7877, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.07278133684136782, |
| "grad_norm": 5.175839424133301, |
| "learning_rate": 9.760410350535062e-07, |
| "loss": 0.7242, |
| "step": 4675 |
| }, |
| { |
| "epoch": 0.0728591778433372, |
| "grad_norm": 3.86811900138855, |
| "learning_rate": 9.759590960489012e-07, |
| "loss": 0.8628, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.07293701884530658, |
| "grad_norm": 4.670974254608154, |
| "learning_rate": 9.75877157044296e-07, |
| "loss": 0.7741, |
| "step": 4685 |
| }, |
| { |
| "epoch": 0.07301485984727596, |
| "grad_norm": 3.4863369464874268, |
| "learning_rate": 9.757952180396912e-07, |
| "loss": 0.9401, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.07309270084924534, |
| "grad_norm": 4.012441158294678, |
| "learning_rate": 9.757132790350863e-07, |
| "loss": 0.8949, |
| "step": 4695 |
| }, |
| { |
| "epoch": 0.07317054185121472, |
| "grad_norm": 3.7120773792266846, |
| "learning_rate": 9.756313400304813e-07, |
| "loss": 0.912, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.07324838285318408, |
| "grad_norm": 4.149153232574463, |
| "learning_rate": 9.755494010258762e-07, |
| "loss": 0.7284, |
| "step": 4705 |
| }, |
| { |
| "epoch": 0.07332622385515346, |
| "grad_norm": 3.724862813949585, |
| "learning_rate": 9.754674620212713e-07, |
| "loss": 0.8486, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.07340406485712284, |
| "grad_norm": 5.275464057922363, |
| "learning_rate": 9.753855230166663e-07, |
| "loss": 0.7661, |
| "step": 4715 |
| }, |
| { |
| "epoch": 0.07348190585909221, |
| "grad_norm": 8.389967918395996, |
| "learning_rate": 9.753035840120614e-07, |
| "loss": 0.8646, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.07355974686106159, |
| "grad_norm": 4.1537017822265625, |
| "learning_rate": 9.752216450074565e-07, |
| "loss": 0.8596, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.07363758786303097, |
| "grad_norm": 3.4971349239349365, |
| "learning_rate": 9.751397060028514e-07, |
| "loss": 0.7888, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.07371542886500035, |
| "grad_norm": 2.9952375888824463, |
| "learning_rate": 9.750577669982464e-07, |
| "loss": 0.8127, |
| "step": 4735 |
| }, |
| { |
| "epoch": 0.07379326986696973, |
| "grad_norm": 3.0156424045562744, |
| "learning_rate": 9.749758279936415e-07, |
| "loss": 0.6775, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.0738711108689391, |
| "grad_norm": 4.386186122894287, |
| "learning_rate": 9.748938889890366e-07, |
| "loss": 0.8813, |
| "step": 4745 |
| }, |
| { |
| "epoch": 0.07394895187090848, |
| "grad_norm": 8.352777481079102, |
| "learning_rate": 9.748119499844314e-07, |
| "loss": 0.841, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.07402679287287786, |
| "grad_norm": 3.9071156978607178, |
| "learning_rate": 9.747300109798265e-07, |
| "loss": 0.8124, |
| "step": 4755 |
| }, |
| { |
| "epoch": 0.07410463387484724, |
| "grad_norm": 6.337040901184082, |
| "learning_rate": 9.746480719752216e-07, |
| "loss": 0.7601, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.07418247487681662, |
| "grad_norm": 4.740725040435791, |
| "learning_rate": 9.745661329706167e-07, |
| "loss": 0.8621, |
| "step": 4765 |
| }, |
| { |
| "epoch": 0.074260315878786, |
| "grad_norm": 3.6366703510284424, |
| "learning_rate": 9.744841939660117e-07, |
| "loss": 0.7521, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.07433815688075537, |
| "grad_norm": 5.869968891143799, |
| "learning_rate": 9.744022549614068e-07, |
| "loss": 0.8219, |
| "step": 4775 |
| }, |
| { |
| "epoch": 0.07441599788272475, |
| "grad_norm": 4.8249006271362305, |
| "learning_rate": 9.743203159568017e-07, |
| "loss": 0.8733, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.07449383888469413, |
| "grad_norm": 3.9930624961853027, |
| "learning_rate": 9.742383769521968e-07, |
| "loss": 0.7968, |
| "step": 4785 |
| }, |
| { |
| "epoch": 0.0745716798866635, |
| "grad_norm": 5.8335418701171875, |
| "learning_rate": 9.741564379475918e-07, |
| "loss": 0.8224, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.07464952088863287, |
| "grad_norm": 5.657021522521973, |
| "learning_rate": 9.740744989429867e-07, |
| "loss": 0.7896, |
| "step": 4795 |
| }, |
| { |
| "epoch": 0.07472736189060225, |
| "grad_norm": 6.225119590759277, |
| "learning_rate": 9.739925599383818e-07, |
| "loss": 0.8297, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.07480520289257163, |
| "grad_norm": 3.373596429824829, |
| "learning_rate": 9.739106209337768e-07, |
| "loss": 0.8255, |
| "step": 4805 |
| }, |
| { |
| "epoch": 0.07488304389454101, |
| "grad_norm": 2.2436752319335938, |
| "learning_rate": 9.73828681929172e-07, |
| "loss": 0.7097, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.07496088489651039, |
| "grad_norm": 3.6879262924194336, |
| "learning_rate": 9.73746742924567e-07, |
| "loss": 0.6912, |
| "step": 4815 |
| }, |
| { |
| "epoch": 0.07503872589847976, |
| "grad_norm": 3.399632692337036, |
| "learning_rate": 9.73664803919962e-07, |
| "loss": 0.8371, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.07511656690044914, |
| "grad_norm": 9.186985969543457, |
| "learning_rate": 9.73582864915357e-07, |
| "loss": 0.8157, |
| "step": 4825 |
| }, |
| { |
| "epoch": 0.07519440790241852, |
| "grad_norm": 3.6295411586761475, |
| "learning_rate": 9.73500925910752e-07, |
| "loss": 0.7785, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.0752722489043879, |
| "grad_norm": 3.534175395965576, |
| "learning_rate": 9.73418986906147e-07, |
| "loss": 0.878, |
| "step": 4835 |
| }, |
| { |
| "epoch": 0.07535008990635728, |
| "grad_norm": 3.0165436267852783, |
| "learning_rate": 9.73337047901542e-07, |
| "loss": 0.7593, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.07542793090832665, |
| "grad_norm": 4.980969429016113, |
| "learning_rate": 9.73255108896937e-07, |
| "loss": 0.7592, |
| "step": 4845 |
| }, |
| { |
| "epoch": 0.07550577191029603, |
| "grad_norm": 3.377429723739624, |
| "learning_rate": 9.73173169892332e-07, |
| "loss": 0.7849, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.07558361291226541, |
| "grad_norm": 5.916225910186768, |
| "learning_rate": 9.730912308877272e-07, |
| "loss": 0.8283, |
| "step": 4855 |
| }, |
| { |
| "epoch": 0.07566145391423479, |
| "grad_norm": 6.396664619445801, |
| "learning_rate": 9.730092918831222e-07, |
| "loss": 0.8801, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.07573929491620417, |
| "grad_norm": 3.2279000282287598, |
| "learning_rate": 9.729273528785173e-07, |
| "loss": 0.7746, |
| "step": 4865 |
| }, |
| { |
| "epoch": 0.07581713591817354, |
| "grad_norm": 3.3522236347198486, |
| "learning_rate": 9.728454138739122e-07, |
| "loss": 1.0139, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.07589497692014291, |
| "grad_norm": 7.16496467590332, |
| "learning_rate": 9.727634748693073e-07, |
| "loss": 0.8203, |
| "step": 4875 |
| }, |
| { |
| "epoch": 0.07597281792211229, |
| "grad_norm": 3.7520346641540527, |
| "learning_rate": 9.726815358647023e-07, |
| "loss": 0.7859, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.07605065892408167, |
| "grad_norm": 5.511653900146484, |
| "learning_rate": 9.725995968600972e-07, |
| "loss": 0.8594, |
| "step": 4885 |
| }, |
| { |
| "epoch": 0.07612849992605104, |
| "grad_norm": 5.89841365814209, |
| "learning_rate": 9.725176578554923e-07, |
| "loss": 0.6535, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.07620634092802042, |
| "grad_norm": 4.694098949432373, |
| "learning_rate": 9.724357188508873e-07, |
| "loss": 0.8221, |
| "step": 4895 |
| }, |
| { |
| "epoch": 0.0762841819299898, |
| "grad_norm": 4.192508220672607, |
| "learning_rate": 9.723537798462824e-07, |
| "loss": 0.8594, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.07636202293195918, |
| "grad_norm": 4.4052534103393555, |
| "learning_rate": 9.722718408416775e-07, |
| "loss": 0.7326, |
| "step": 4905 |
| }, |
| { |
| "epoch": 0.07643986393392856, |
| "grad_norm": 4.1674299240112305, |
| "learning_rate": 9.721899018370724e-07, |
| "loss": 0.7525, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.07651770493589793, |
| "grad_norm": 4.367162227630615, |
| "learning_rate": 9.721079628324674e-07, |
| "loss": 0.9038, |
| "step": 4915 |
| }, |
| { |
| "epoch": 0.07659554593786731, |
| "grad_norm": 3.0957272052764893, |
| "learning_rate": 9.720260238278625e-07, |
| "loss": 0.8247, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.07667338693983669, |
| "grad_norm": 6.5740532875061035, |
| "learning_rate": 9.719440848232576e-07, |
| "loss": 0.7707, |
| "step": 4925 |
| }, |
| { |
| "epoch": 0.07675122794180607, |
| "grad_norm": 5.08697509765625, |
| "learning_rate": 9.718621458186527e-07, |
| "loss": 0.7561, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.07682906894377545, |
| "grad_norm": 6.9134063720703125, |
| "learning_rate": 9.717802068140475e-07, |
| "loss": 0.7645, |
| "step": 4935 |
| }, |
| { |
| "epoch": 0.07690690994574483, |
| "grad_norm": 3.2047367095947266, |
| "learning_rate": 9.716982678094426e-07, |
| "loss": 0.854, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.0769847509477142, |
| "grad_norm": 3.4643442630767822, |
| "learning_rate": 9.716163288048377e-07, |
| "loss": 0.7598, |
| "step": 4945 |
| }, |
| { |
| "epoch": 0.07706259194968358, |
| "grad_norm": 5.208106517791748, |
| "learning_rate": 9.715343898002325e-07, |
| "loss": 0.7699, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.07714043295165296, |
| "grad_norm": 4.935080051422119, |
| "learning_rate": 9.714524507956276e-07, |
| "loss": 0.8839, |
| "step": 4955 |
| }, |
| { |
| "epoch": 0.07721827395362234, |
| "grad_norm": 4.052170753479004, |
| "learning_rate": 9.713705117910227e-07, |
| "loss": 0.841, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.0772961149555917, |
| "grad_norm": 3.409742593765259, |
| "learning_rate": 9.712885727864178e-07, |
| "loss": 0.8768, |
| "step": 4965 |
| }, |
| { |
| "epoch": 0.07737395595756108, |
| "grad_norm": 5.577835559844971, |
| "learning_rate": 9.712066337818128e-07, |
| "loss": 0.7609, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.07745179695953046, |
| "grad_norm": 4.966437816619873, |
| "learning_rate": 9.71124694777208e-07, |
| "loss": 0.9478, |
| "step": 4975 |
| }, |
| { |
| "epoch": 0.07752963796149984, |
| "grad_norm": 5.092791557312012, |
| "learning_rate": 9.710427557726028e-07, |
| "loss": 0.8237, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.07760747896346921, |
| "grad_norm": 2.992233991622925, |
| "learning_rate": 9.709608167679978e-07, |
| "loss": 0.8486, |
| "step": 4985 |
| }, |
| { |
| "epoch": 0.07768531996543859, |
| "grad_norm": 3.947547197341919, |
| "learning_rate": 9.70878877763393e-07, |
| "loss": 1.0368, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.07776316096740797, |
| "grad_norm": 4.660312652587891, |
| "learning_rate": 9.707969387587878e-07, |
| "loss": 0.7254, |
| "step": 4995 |
| }, |
| { |
| "epoch": 0.07784100196937735, |
| "grad_norm": 5.542099475860596, |
| "learning_rate": 9.707149997541829e-07, |
| "loss": 0.7871, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.07791884297134673, |
| "grad_norm": 2.8513717651367188, |
| "learning_rate": 9.70633060749578e-07, |
| "loss": 0.7405, |
| "step": 5005 |
| }, |
| { |
| "epoch": 0.0779966839733161, |
| "grad_norm": 3.067697286605835, |
| "learning_rate": 9.70551121744973e-07, |
| "loss": 0.8251, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.07807452497528548, |
| "grad_norm": 4.706809043884277, |
| "learning_rate": 9.70469182740368e-07, |
| "loss": 0.894, |
| "step": 5015 |
| }, |
| { |
| "epoch": 0.07815236597725486, |
| "grad_norm": 3.1183722019195557, |
| "learning_rate": 9.703872437357632e-07, |
| "loss": 0.7985, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.07823020697922424, |
| "grad_norm": 3.95314884185791, |
| "learning_rate": 9.703053047311582e-07, |
| "loss": 0.8673, |
| "step": 5025 |
| }, |
| { |
| "epoch": 0.07830804798119362, |
| "grad_norm": 5.186405658721924, |
| "learning_rate": 9.70223365726553e-07, |
| "loss": 0.8834, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.078385888983163, |
| "grad_norm": 6.825442790985107, |
| "learning_rate": 9.701414267219482e-07, |
| "loss": 0.8638, |
| "step": 5035 |
| }, |
| { |
| "epoch": 0.07846372998513237, |
| "grad_norm": 4.547275543212891, |
| "learning_rate": 9.700594877173432e-07, |
| "loss": 0.7591, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.07854157098710175, |
| "grad_norm": 3.055347204208374, |
| "learning_rate": 9.69977548712738e-07, |
| "loss": 0.7955, |
| "step": 5045 |
| }, |
| { |
| "epoch": 0.07861941198907112, |
| "grad_norm": 6.120547294616699, |
| "learning_rate": 9.698956097081332e-07, |
| "loss": 0.8606, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.0786972529910405, |
| "grad_norm": 8.125998497009277, |
| "learning_rate": 9.698136707035283e-07, |
| "loss": 0.7676, |
| "step": 5055 |
| }, |
| { |
| "epoch": 0.07877509399300987, |
| "grad_norm": 3.794414758682251, |
| "learning_rate": 9.697317316989233e-07, |
| "loss": 0.8594, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.07885293499497925, |
| "grad_norm": 4.892978191375732, |
| "learning_rate": 9.696497926943184e-07, |
| "loss": 0.774, |
| "step": 5065 |
| }, |
| { |
| "epoch": 0.07893077599694863, |
| "grad_norm": 4.139584064483643, |
| "learning_rate": 9.695678536897135e-07, |
| "loss": 0.83, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.07900861699891801, |
| "grad_norm": 7.144068241119385, |
| "learning_rate": 9.694859146851083e-07, |
| "loss": 0.8541, |
| "step": 5075 |
| }, |
| { |
| "epoch": 0.07908645800088739, |
| "grad_norm": 10.283439636230469, |
| "learning_rate": 9.694039756805034e-07, |
| "loss": 0.9355, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.07916429900285676, |
| "grad_norm": 3.0185656547546387, |
| "learning_rate": 9.693220366758985e-07, |
| "loss": 0.8761, |
| "step": 5085 |
| }, |
| { |
| "epoch": 0.07924214000482614, |
| "grad_norm": 3.299808979034424, |
| "learning_rate": 9.692400976712934e-07, |
| "loss": 0.715, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.07931998100679552, |
| "grad_norm": 7.163717746734619, |
| "learning_rate": 9.691581586666884e-07, |
| "loss": 0.6974, |
| "step": 5095 |
| }, |
| { |
| "epoch": 0.0793978220087649, |
| "grad_norm": 3.0995216369628906, |
| "learning_rate": 9.690762196620835e-07, |
| "loss": 0.8159, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.07947566301073428, |
| "grad_norm": 2.7312302589416504, |
| "learning_rate": 9.689942806574786e-07, |
| "loss": 0.8009, |
| "step": 5105 |
| }, |
| { |
| "epoch": 0.07955350401270365, |
| "grad_norm": 4.884325981140137, |
| "learning_rate": 9.689123416528737e-07, |
| "loss": 0.8839, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.07963134501467303, |
| "grad_norm": 3.1511213779449463, |
| "learning_rate": 9.688304026482685e-07, |
| "loss": 0.6633, |
| "step": 5115 |
| }, |
| { |
| "epoch": 0.07970918601664241, |
| "grad_norm": 3.034996271133423, |
| "learning_rate": 9.687484636436636e-07, |
| "loss": 0.8636, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.07978702701861179, |
| "grad_norm": 6.756342887878418, |
| "learning_rate": 9.686665246390587e-07, |
| "loss": 0.844, |
| "step": 5125 |
| }, |
| { |
| "epoch": 0.07986486802058117, |
| "grad_norm": 4.012609958648682, |
| "learning_rate": 9.685845856344537e-07, |
| "loss": 1.0347, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.07994270902255053, |
| "grad_norm": 4.039714336395264, |
| "learning_rate": 9.685026466298486e-07, |
| "loss": 0.8549, |
| "step": 5135 |
| }, |
| { |
| "epoch": 0.08002055002451991, |
| "grad_norm": 4.654749393463135, |
| "learning_rate": 9.684207076252437e-07, |
| "loss": 0.7294, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.08009839102648929, |
| "grad_norm": 5.652122497558594, |
| "learning_rate": 9.683387686206388e-07, |
| "loss": 0.8808, |
| "step": 5145 |
| }, |
| { |
| "epoch": 0.08017623202845867, |
| "grad_norm": 5.13718318939209, |
| "learning_rate": 9.682568296160338e-07, |
| "loss": 0.8012, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.08025407303042804, |
| "grad_norm": 4.274785995483398, |
| "learning_rate": 9.681748906114287e-07, |
| "loss": 0.9497, |
| "step": 5155 |
| }, |
| { |
| "epoch": 0.08033191403239742, |
| "grad_norm": 3.5715765953063965, |
| "learning_rate": 9.680929516068238e-07, |
| "loss": 0.6932, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.0804097550343668, |
| "grad_norm": 3.721369504928589, |
| "learning_rate": 9.680110126022188e-07, |
| "loss": 0.7604, |
| "step": 5165 |
| }, |
| { |
| "epoch": 0.08048759603633618, |
| "grad_norm": 4.815948486328125, |
| "learning_rate": 9.67929073597614e-07, |
| "loss": 0.8056, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.08056543703830556, |
| "grad_norm": 3.9973649978637695, |
| "learning_rate": 9.67847134593009e-07, |
| "loss": 0.8245, |
| "step": 5175 |
| }, |
| { |
| "epoch": 0.08064327804027493, |
| "grad_norm": 6.30864143371582, |
| "learning_rate": 9.67765195588404e-07, |
| "loss": 0.8158, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.08072111904224431, |
| "grad_norm": 3.627049207687378, |
| "learning_rate": 9.67683256583799e-07, |
| "loss": 0.6924, |
| "step": 5185 |
| }, |
| { |
| "epoch": 0.08079896004421369, |
| "grad_norm": 3.445680618286133, |
| "learning_rate": 9.67601317579194e-07, |
| "loss": 0.7619, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.08087680104618307, |
| "grad_norm": 5.6612868309021, |
| "learning_rate": 9.67519378574589e-07, |
| "loss": 0.8712, |
| "step": 5195 |
| }, |
| { |
| "epoch": 0.08095464204815245, |
| "grad_norm": 8.172099113464355, |
| "learning_rate": 9.67437439569984e-07, |
| "loss": 0.875, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.08103248305012183, |
| "grad_norm": 3.6549482345581055, |
| "learning_rate": 9.67355500565379e-07, |
| "loss": 0.7416, |
| "step": 5205 |
| }, |
| { |
| "epoch": 0.0811103240520912, |
| "grad_norm": 4.237252712249756, |
| "learning_rate": 9.67273561560774e-07, |
| "loss": 0.7864, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.08118816505406058, |
| "grad_norm": 3.6416895389556885, |
| "learning_rate": 9.671916225561692e-07, |
| "loss": 0.8346, |
| "step": 5215 |
| }, |
| { |
| "epoch": 0.08126600605602995, |
| "grad_norm": 7.055088996887207, |
| "learning_rate": 9.671096835515642e-07, |
| "loss": 0.8257, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.08134384705799932, |
| "grad_norm": 4.3031511306762695, |
| "learning_rate": 9.670277445469593e-07, |
| "loss": 0.9243, |
| "step": 5225 |
| }, |
| { |
| "epoch": 0.0814216880599687, |
| "grad_norm": 12.051529884338379, |
| "learning_rate": 9.669458055423542e-07, |
| "loss": 0.8012, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.08149952906193808, |
| "grad_norm": 3.5274226665496826, |
| "learning_rate": 9.668638665377493e-07, |
| "loss": 0.8752, |
| "step": 5235 |
| }, |
| { |
| "epoch": 0.08157737006390746, |
| "grad_norm": 3.1642568111419678, |
| "learning_rate": 9.667819275331443e-07, |
| "loss": 0.7385, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.08165521106587684, |
| "grad_norm": 3.645951271057129, |
| "learning_rate": 9.666999885285392e-07, |
| "loss": 0.7538, |
| "step": 5245 |
| }, |
| { |
| "epoch": 0.08173305206784622, |
| "grad_norm": 5.045301914215088, |
| "learning_rate": 9.666180495239343e-07, |
| "loss": 0.8496, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.0818108930698156, |
| "grad_norm": 3.8335864543914795, |
| "learning_rate": 9.665361105193293e-07, |
| "loss": 0.8149, |
| "step": 5255 |
| }, |
| { |
| "epoch": 0.08188873407178497, |
| "grad_norm": 5.525310516357422, |
| "learning_rate": 9.664541715147244e-07, |
| "loss": 0.8061, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.08196657507375435, |
| "grad_norm": 3.721007823944092, |
| "learning_rate": 9.663722325101195e-07, |
| "loss": 0.7835, |
| "step": 5265 |
| }, |
| { |
| "epoch": 0.08204441607572373, |
| "grad_norm": 4.0820393562316895, |
| "learning_rate": 9.662902935055146e-07, |
| "loss": 0.8629, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.0821222570776931, |
| "grad_norm": 2.5007712841033936, |
| "learning_rate": 9.662083545009094e-07, |
| "loss": 0.8987, |
| "step": 5275 |
| }, |
| { |
| "epoch": 0.08220009807966248, |
| "grad_norm": 5.49976110458374, |
| "learning_rate": 9.661264154963045e-07, |
| "loss": 0.8664, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.08227793908163186, |
| "grad_norm": 3.953249931335449, |
| "learning_rate": 9.660444764916996e-07, |
| "loss": 0.8364, |
| "step": 5285 |
| }, |
| { |
| "epoch": 0.08235578008360124, |
| "grad_norm": 5.422050476074219, |
| "learning_rate": 9.659625374870947e-07, |
| "loss": 0.825, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.08243362108557062, |
| "grad_norm": 6.019737720489502, |
| "learning_rate": 9.658805984824895e-07, |
| "loss": 0.7429, |
| "step": 5295 |
| }, |
| { |
| "epoch": 0.08251146208754, |
| "grad_norm": 4.360890865325928, |
| "learning_rate": 9.657986594778846e-07, |
| "loss": 0.8384, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.08258930308950937, |
| "grad_norm": 2.676135540008545, |
| "learning_rate": 9.657167204732797e-07, |
| "loss": 0.7928, |
| "step": 5305 |
| }, |
| { |
| "epoch": 0.08266714409147874, |
| "grad_norm": 2.602173328399658, |
| "learning_rate": 9.656347814686747e-07, |
| "loss": 0.8301, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.08274498509344812, |
| "grad_norm": 3.2521512508392334, |
| "learning_rate": 9.655528424640696e-07, |
| "loss": 0.8491, |
| "step": 5315 |
| }, |
| { |
| "epoch": 0.0828228260954175, |
| "grad_norm": 3.9603660106658936, |
| "learning_rate": 9.654709034594647e-07, |
| "loss": 0.9349, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.08290066709738687, |
| "grad_norm": 3.615999698638916, |
| "learning_rate": 9.653889644548598e-07, |
| "loss": 0.7493, |
| "step": 5325 |
| }, |
| { |
| "epoch": 0.08297850809935625, |
| "grad_norm": 4.19753360748291, |
| "learning_rate": 9.653070254502548e-07, |
| "loss": 0.8548, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.08305634910132563, |
| "grad_norm": 3.5472726821899414, |
| "learning_rate": 9.6522508644565e-07, |
| "loss": 0.6982, |
| "step": 5335 |
| }, |
| { |
| "epoch": 0.08313419010329501, |
| "grad_norm": 8.160552024841309, |
| "learning_rate": 9.651431474410448e-07, |
| "loss": 0.801, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.08321203110526439, |
| "grad_norm": 5.538876056671143, |
| "learning_rate": 9.650612084364398e-07, |
| "loss": 0.7735, |
| "step": 5345 |
| }, |
| { |
| "epoch": 0.08328987210723376, |
| "grad_norm": 5.047536849975586, |
| "learning_rate": 9.64979269431835e-07, |
| "loss": 0.9173, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.08336771310920314, |
| "grad_norm": 3.526073932647705, |
| "learning_rate": 9.648973304272298e-07, |
| "loss": 0.9703, |
| "step": 5355 |
| }, |
| { |
| "epoch": 0.08344555411117252, |
| "grad_norm": 12.305222511291504, |
| "learning_rate": 9.648153914226249e-07, |
| "loss": 0.8025, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.0835233951131419, |
| "grad_norm": 4.003148078918457, |
| "learning_rate": 9.6473345241802e-07, |
| "loss": 0.8206, |
| "step": 5365 |
| }, |
| { |
| "epoch": 0.08360123611511128, |
| "grad_norm": 3.3531124591827393, |
| "learning_rate": 9.64651513413415e-07, |
| "loss": 0.7922, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.08367907711708066, |
| "grad_norm": 3.3483853340148926, |
| "learning_rate": 9.6456957440881e-07, |
| "loss": 0.805, |
| "step": 5375 |
| }, |
| { |
| "epoch": 0.08375691811905003, |
| "grad_norm": 3.580211639404297, |
| "learning_rate": 9.644876354042052e-07, |
| "loss": 0.8038, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.08383475912101941, |
| "grad_norm": 4.441928863525391, |
| "learning_rate": 9.644056963996e-07, |
| "loss": 0.8188, |
| "step": 5385 |
| }, |
| { |
| "epoch": 0.08391260012298879, |
| "grad_norm": 4.342660903930664, |
| "learning_rate": 9.64323757394995e-07, |
| "loss": 0.9376, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.08399044112495815, |
| "grad_norm": 3.5513997077941895, |
| "learning_rate": 9.642418183903902e-07, |
| "loss": 0.8407, |
| "step": 5395 |
| }, |
| { |
| "epoch": 0.08406828212692753, |
| "grad_norm": 3.7131507396698, |
| "learning_rate": 9.64159879385785e-07, |
| "loss": 0.8832, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.08414612312889691, |
| "grad_norm": 4.675576686859131, |
| "learning_rate": 9.6407794038118e-07, |
| "loss": 0.8137, |
| "step": 5405 |
| }, |
| { |
| "epoch": 0.08422396413086629, |
| "grad_norm": 5.775442600250244, |
| "learning_rate": 9.639960013765752e-07, |
| "loss": 0.8705, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.08430180513283567, |
| "grad_norm": 3.2232508659362793, |
| "learning_rate": 9.639140623719703e-07, |
| "loss": 0.8266, |
| "step": 5415 |
| }, |
| { |
| "epoch": 0.08437964613480504, |
| "grad_norm": 3.5636298656463623, |
| "learning_rate": 9.638321233673653e-07, |
| "loss": 0.739, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.08445748713677442, |
| "grad_norm": 6.0133442878723145, |
| "learning_rate": 9.637501843627604e-07, |
| "loss": 0.7714, |
| "step": 5425 |
| }, |
| { |
| "epoch": 0.0845353281387438, |
| "grad_norm": 3.2928476333618164, |
| "learning_rate": 9.636682453581555e-07, |
| "loss": 0.7455, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.08461316914071318, |
| "grad_norm": 3.734174966812134, |
| "learning_rate": 9.635863063535503e-07, |
| "loss": 0.8668, |
| "step": 5435 |
| }, |
| { |
| "epoch": 0.08469101014268256, |
| "grad_norm": 3.125318765640259, |
| "learning_rate": 9.635043673489454e-07, |
| "loss": 0.8308, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.08476885114465194, |
| "grad_norm": 4.821923732757568, |
| "learning_rate": 9.634224283443405e-07, |
| "loss": 0.7993, |
| "step": 5445 |
| }, |
| { |
| "epoch": 0.08484669214662131, |
| "grad_norm": 3.52372407913208, |
| "learning_rate": 9.633404893397354e-07, |
| "loss": 0.9816, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.08492453314859069, |
| "grad_norm": 4.727661609649658, |
| "learning_rate": 9.632585503351304e-07, |
| "loss": 0.8819, |
| "step": 5455 |
| }, |
| { |
| "epoch": 0.08500237415056007, |
| "grad_norm": 6.401661396026611, |
| "learning_rate": 9.631766113305255e-07, |
| "loss": 0.879, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.08508021515252945, |
| "grad_norm": 3.203312873840332, |
| "learning_rate": 9.630946723259206e-07, |
| "loss": 0.7982, |
| "step": 5465 |
| }, |
| { |
| "epoch": 0.08515805615449883, |
| "grad_norm": 4.19862174987793, |
| "learning_rate": 9.630127333213157e-07, |
| "loss": 0.8441, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.0852358971564682, |
| "grad_norm": 3.9910812377929688, |
| "learning_rate": 9.629307943167107e-07, |
| "loss": 0.9595, |
| "step": 5475 |
| }, |
| { |
| "epoch": 0.08531373815843757, |
| "grad_norm": 3.739917755126953, |
| "learning_rate": 9.628488553121056e-07, |
| "loss": 0.9027, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.08539157916040695, |
| "grad_norm": 3.8963537216186523, |
| "learning_rate": 9.627669163075007e-07, |
| "loss": 0.7635, |
| "step": 5485 |
| }, |
| { |
| "epoch": 0.08546942016237633, |
| "grad_norm": 11.066873550415039, |
| "learning_rate": 9.626849773028957e-07, |
| "loss": 0.828, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.0855472611643457, |
| "grad_norm": 5.069997310638428, |
| "learning_rate": 9.626030382982906e-07, |
| "loss": 0.892, |
| "step": 5495 |
| }, |
| { |
| "epoch": 0.08562510216631508, |
| "grad_norm": 2.1011128425598145, |
| "learning_rate": 9.625210992936857e-07, |
| "loss": 0.7157, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.08570294316828446, |
| "grad_norm": 5.490849494934082, |
| "learning_rate": 9.624391602890808e-07, |
| "loss": 0.7912, |
| "step": 5505 |
| }, |
| { |
| "epoch": 0.08578078417025384, |
| "grad_norm": 5.189328670501709, |
| "learning_rate": 9.623572212844758e-07, |
| "loss": 0.6751, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.08585862517222322, |
| "grad_norm": 3.257615089416504, |
| "learning_rate": 9.62275282279871e-07, |
| "loss": 0.7616, |
| "step": 5515 |
| }, |
| { |
| "epoch": 0.0859364661741926, |
| "grad_norm": 3.8244619369506836, |
| "learning_rate": 9.621933432752658e-07, |
| "loss": 0.7356, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.08601430717616197, |
| "grad_norm": 4.616507530212402, |
| "learning_rate": 9.621114042706608e-07, |
| "loss": 0.8844, |
| "step": 5525 |
| }, |
| { |
| "epoch": 0.08609214817813135, |
| "grad_norm": 8.950932502746582, |
| "learning_rate": 9.62029465266056e-07, |
| "loss": 0.8493, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.08616998918010073, |
| "grad_norm": 3.257582187652588, |
| "learning_rate": 9.61947526261451e-07, |
| "loss": 0.7523, |
| "step": 5535 |
| }, |
| { |
| "epoch": 0.0862478301820701, |
| "grad_norm": 9.792999267578125, |
| "learning_rate": 9.61865587256846e-07, |
| "loss": 0.8037, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.08632567118403948, |
| "grad_norm": 3.294633626937866, |
| "learning_rate": 9.61783648252241e-07, |
| "loss": 0.7864, |
| "step": 5545 |
| }, |
| { |
| "epoch": 0.08640351218600886, |
| "grad_norm": 3.527974843978882, |
| "learning_rate": 9.61701709247636e-07, |
| "loss": 0.7226, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.08648135318797824, |
| "grad_norm": 10.905069351196289, |
| "learning_rate": 9.61619770243031e-07, |
| "loss": 0.8916, |
| "step": 5555 |
| }, |
| { |
| "epoch": 0.08655919418994762, |
| "grad_norm": 5.191342830657959, |
| "learning_rate": 9.61537831238426e-07, |
| "loss": 0.7855, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.08663703519191698, |
| "grad_norm": 4.45928430557251, |
| "learning_rate": 9.61455892233821e-07, |
| "loss": 0.6989, |
| "step": 5565 |
| }, |
| { |
| "epoch": 0.08671487619388636, |
| "grad_norm": 3.573596954345703, |
| "learning_rate": 9.61373953229216e-07, |
| "loss": 0.718, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.08679271719585574, |
| "grad_norm": 3.490968942642212, |
| "learning_rate": 9.612920142246112e-07, |
| "loss": 0.9475, |
| "step": 5575 |
| }, |
| { |
| "epoch": 0.08687055819782512, |
| "grad_norm": 5.315331935882568, |
| "learning_rate": 9.612100752200062e-07, |
| "loss": 0.7643, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.0869483991997945, |
| "grad_norm": 5.576305389404297, |
| "learning_rate": 9.611281362154013e-07, |
| "loss": 0.8273, |
| "step": 5585 |
| }, |
| { |
| "epoch": 0.08702624020176387, |
| "grad_norm": 3.3249528408050537, |
| "learning_rate": 9.610461972107962e-07, |
| "loss": 0.7327, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.08710408120373325, |
| "grad_norm": 5.021561622619629, |
| "learning_rate": 9.609642582061913e-07, |
| "loss": 0.7693, |
| "step": 5595 |
| }, |
| { |
| "epoch": 0.08718192220570263, |
| "grad_norm": 4.7560834884643555, |
| "learning_rate": 9.608823192015863e-07, |
| "loss": 0.7056, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.08725976320767201, |
| "grad_norm": 4.182785987854004, |
| "learning_rate": 9.608003801969812e-07, |
| "loss": 0.7807, |
| "step": 5605 |
| }, |
| { |
| "epoch": 0.08733760420964139, |
| "grad_norm": 3.104510545730591, |
| "learning_rate": 9.607184411923763e-07, |
| "loss": 0.771, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.08741544521161076, |
| "grad_norm": 4.316323280334473, |
| "learning_rate": 9.606365021877713e-07, |
| "loss": 0.8881, |
| "step": 5615 |
| }, |
| { |
| "epoch": 0.08749328621358014, |
| "grad_norm": 4.004445552825928, |
| "learning_rate": 9.605545631831664e-07, |
| "loss": 0.8125, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.08757112721554952, |
| "grad_norm": 5.998608112335205, |
| "learning_rate": 9.604726241785615e-07, |
| "loss": 0.8896, |
| "step": 5625 |
| }, |
| { |
| "epoch": 0.0876489682175189, |
| "grad_norm": 6.251708507537842, |
| "learning_rate": 9.603906851739566e-07, |
| "loss": 0.97, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.08772680921948828, |
| "grad_norm": 4.202377796173096, |
| "learning_rate": 9.603087461693514e-07, |
| "loss": 0.9106, |
| "step": 5635 |
| }, |
| { |
| "epoch": 0.08780465022145766, |
| "grad_norm": 7.748138427734375, |
| "learning_rate": 9.602268071647465e-07, |
| "loss": 0.8634, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.08788249122342703, |
| "grad_norm": 5.498707294464111, |
| "learning_rate": 9.601448681601416e-07, |
| "loss": 0.7508, |
| "step": 5645 |
| }, |
| { |
| "epoch": 0.08796033222539641, |
| "grad_norm": 3.504171133041382, |
| "learning_rate": 9.600629291555364e-07, |
| "loss": 0.8638, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.08803817322736578, |
| "grad_norm": 4.243772983551025, |
| "learning_rate": 9.599809901509315e-07, |
| "loss": 0.8651, |
| "step": 5655 |
| }, |
| { |
| "epoch": 0.08811601422933515, |
| "grad_norm": 2.332878589630127, |
| "learning_rate": 9.598990511463266e-07, |
| "loss": 0.6884, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.08819385523130453, |
| "grad_norm": 5.470850944519043, |
| "learning_rate": 9.598171121417217e-07, |
| "loss": 0.9159, |
| "step": 5665 |
| }, |
| { |
| "epoch": 0.08827169623327391, |
| "grad_norm": 3.167588710784912, |
| "learning_rate": 9.597351731371167e-07, |
| "loss": 0.7925, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.08834953723524329, |
| "grad_norm": 8.463876724243164, |
| "learning_rate": 9.596532341325118e-07, |
| "loss": 0.773, |
| "step": 5675 |
| }, |
| { |
| "epoch": 0.08842737823721267, |
| "grad_norm": 5.318755626678467, |
| "learning_rate": 9.595712951279067e-07, |
| "loss": 0.8523, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.08850521923918205, |
| "grad_norm": 8.276546478271484, |
| "learning_rate": 9.594893561233018e-07, |
| "loss": 0.7573, |
| "step": 5685 |
| }, |
| { |
| "epoch": 0.08858306024115142, |
| "grad_norm": 3.6410884857177734, |
| "learning_rate": 9.594074171186968e-07, |
| "loss": 1.0501, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.0886609012431208, |
| "grad_norm": 4.722231388092041, |
| "learning_rate": 9.59325478114092e-07, |
| "loss": 0.8005, |
| "step": 5695 |
| }, |
| { |
| "epoch": 0.08873874224509018, |
| "grad_norm": 4.808355808258057, |
| "learning_rate": 9.592435391094868e-07, |
| "loss": 0.8858, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.08881658324705956, |
| "grad_norm": 3.3222663402557373, |
| "learning_rate": 9.591616001048818e-07, |
| "loss": 0.8279, |
| "step": 5705 |
| }, |
| { |
| "epoch": 0.08889442424902894, |
| "grad_norm": 6.019637584686279, |
| "learning_rate": 9.59079661100277e-07, |
| "loss": 0.8425, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.08897226525099831, |
| "grad_norm": 3.4430840015411377, |
| "learning_rate": 9.58997722095672e-07, |
| "loss": 0.8277, |
| "step": 5715 |
| }, |
| { |
| "epoch": 0.08905010625296769, |
| "grad_norm": 2.4599595069885254, |
| "learning_rate": 9.58915783091067e-07, |
| "loss": 0.8047, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.08912794725493707, |
| "grad_norm": 5.123390197753906, |
| "learning_rate": 9.58833844086462e-07, |
| "loss": 0.7134, |
| "step": 5725 |
| }, |
| { |
| "epoch": 0.08920578825690645, |
| "grad_norm": 5.264007568359375, |
| "learning_rate": 9.58751905081857e-07, |
| "loss": 0.914, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.08928362925887583, |
| "grad_norm": 2.6615512371063232, |
| "learning_rate": 9.58669966077252e-07, |
| "loss": 0.8613, |
| "step": 5735 |
| }, |
| { |
| "epoch": 0.08936147026084519, |
| "grad_norm": 2.7306411266326904, |
| "learning_rate": 9.585880270726472e-07, |
| "loss": 0.6556, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.08943931126281457, |
| "grad_norm": 3.124546766281128, |
| "learning_rate": 9.58506088068042e-07, |
| "loss": 0.7402, |
| "step": 5745 |
| }, |
| { |
| "epoch": 0.08951715226478395, |
| "grad_norm": 3.2253921031951904, |
| "learning_rate": 9.58424149063437e-07, |
| "loss": 0.8324, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.08959499326675333, |
| "grad_norm": 4.765871524810791, |
| "learning_rate": 9.583422100588322e-07, |
| "loss": 0.8187, |
| "step": 5755 |
| }, |
| { |
| "epoch": 0.0896728342687227, |
| "grad_norm": 5.348093509674072, |
| "learning_rate": 9.582602710542272e-07, |
| "loss": 0.8607, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.08975067527069208, |
| "grad_norm": 3.306044340133667, |
| "learning_rate": 9.581783320496221e-07, |
| "loss": 0.8503, |
| "step": 5765 |
| }, |
| { |
| "epoch": 0.08982851627266146, |
| "grad_norm": 5.045707702636719, |
| "learning_rate": 9.580963930450172e-07, |
| "loss": 0.8423, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.08990635727463084, |
| "grad_norm": 6.576409816741943, |
| "learning_rate": 9.580144540404123e-07, |
| "loss": 0.9193, |
| "step": 5775 |
| }, |
| { |
| "epoch": 0.08998419827660022, |
| "grad_norm": 7.775379180908203, |
| "learning_rate": 9.579325150358073e-07, |
| "loss": 0.8257, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.0900620392785696, |
| "grad_norm": 8.183690071105957, |
| "learning_rate": 9.578505760312024e-07, |
| "loss": 0.7978, |
| "step": 5785 |
| }, |
| { |
| "epoch": 0.09013988028053897, |
| "grad_norm": 5.8509087562561035, |
| "learning_rate": 9.577686370265975e-07, |
| "loss": 0.7486, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.09021772128250835, |
| "grad_norm": 7.354578971862793, |
| "learning_rate": 9.576866980219923e-07, |
| "loss": 0.8432, |
| "step": 5795 |
| }, |
| { |
| "epoch": 0.09029556228447773, |
| "grad_norm": 3.6449766159057617, |
| "learning_rate": 9.576047590173874e-07, |
| "loss": 0.8296, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.09037340328644711, |
| "grad_norm": 9.557231903076172, |
| "learning_rate": 9.575228200127825e-07, |
| "loss": 0.7974, |
| "step": 5805 |
| }, |
| { |
| "epoch": 0.09045124428841649, |
| "grad_norm": 4.868302345275879, |
| "learning_rate": 9.574408810081774e-07, |
| "loss": 0.9218, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.09052908529038586, |
| "grad_norm": 4.260608196258545, |
| "learning_rate": 9.573589420035724e-07, |
| "loss": 0.7428, |
| "step": 5815 |
| }, |
| { |
| "epoch": 0.09060692629235524, |
| "grad_norm": 3.023204803466797, |
| "learning_rate": 9.572770029989675e-07, |
| "loss": 0.8903, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.0906847672943246, |
| "grad_norm": 3.036348819732666, |
| "learning_rate": 9.571950639943626e-07, |
| "loss": 0.89, |
| "step": 5825 |
| }, |
| { |
| "epoch": 0.09076260829629398, |
| "grad_norm": 4.273719310760498, |
| "learning_rate": 9.571131249897577e-07, |
| "loss": 0.8611, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.09084044929826336, |
| "grad_norm": 3.51576828956604, |
| "learning_rate": 9.570311859851527e-07, |
| "loss": 0.829, |
| "step": 5835 |
| }, |
| { |
| "epoch": 0.09091829030023274, |
| "grad_norm": 3.904651641845703, |
| "learning_rate": 9.569492469805476e-07, |
| "loss": 0.9588, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.09099613130220212, |
| "grad_norm": 3.824842691421509, |
| "learning_rate": 9.568673079759427e-07, |
| "loss": 0.7191, |
| "step": 5845 |
| }, |
| { |
| "epoch": 0.0910739723041715, |
| "grad_norm": 5.210089683532715, |
| "learning_rate": 9.567853689713377e-07, |
| "loss": 0.8695, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.09115181330614087, |
| "grad_norm": 2.847330093383789, |
| "learning_rate": 9.567034299667326e-07, |
| "loss": 0.8151, |
| "step": 5855 |
| }, |
| { |
| "epoch": 0.09122965430811025, |
| "grad_norm": 4.297481060028076, |
| "learning_rate": 9.566214909621277e-07, |
| "loss": 0.91, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.09130749531007963, |
| "grad_norm": 5.124939918518066, |
| "learning_rate": 9.565395519575228e-07, |
| "loss": 0.7646, |
| "step": 5865 |
| }, |
| { |
| "epoch": 0.09138533631204901, |
| "grad_norm": 4.084904193878174, |
| "learning_rate": 9.564576129529178e-07, |
| "loss": 0.8639, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.09146317731401839, |
| "grad_norm": 7.2979254722595215, |
| "learning_rate": 9.56375673948313e-07, |
| "loss": 0.909, |
| "step": 5875 |
| }, |
| { |
| "epoch": 0.09154101831598777, |
| "grad_norm": 3.893127202987671, |
| "learning_rate": 9.56293734943708e-07, |
| "loss": 0.7701, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.09161885931795714, |
| "grad_norm": 3.9665653705596924, |
| "learning_rate": 9.562117959391028e-07, |
| "loss": 0.8257, |
| "step": 5885 |
| }, |
| { |
| "epoch": 0.09169670031992652, |
| "grad_norm": 3.298375129699707, |
| "learning_rate": 9.56129856934498e-07, |
| "loss": 0.8005, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.0917745413218959, |
| "grad_norm": 3.643336057662964, |
| "learning_rate": 9.56047917929893e-07, |
| "loss": 0.8725, |
| "step": 5895 |
| }, |
| { |
| "epoch": 0.09185238232386528, |
| "grad_norm": 11.060576438903809, |
| "learning_rate": 9.559659789252879e-07, |
| "loss": 0.8681, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.09193022332583466, |
| "grad_norm": 4.433795928955078, |
| "learning_rate": 9.55884039920683e-07, |
| "loss": 0.8007, |
| "step": 5905 |
| }, |
| { |
| "epoch": 0.09200806432780402, |
| "grad_norm": 6.115171909332275, |
| "learning_rate": 9.55802100916078e-07, |
| "loss": 0.73, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.0920859053297734, |
| "grad_norm": 4.329387187957764, |
| "learning_rate": 9.55720161911473e-07, |
| "loss": 0.7698, |
| "step": 5915 |
| }, |
| { |
| "epoch": 0.09216374633174278, |
| "grad_norm": 4.206638813018799, |
| "learning_rate": 9.556382229068682e-07, |
| "loss": 0.7973, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.09224158733371216, |
| "grad_norm": 3.0813913345336914, |
| "learning_rate": 9.55556283902263e-07, |
| "loss": 0.7804, |
| "step": 5925 |
| }, |
| { |
| "epoch": 0.09231942833568153, |
| "grad_norm": 6.411551475524902, |
| "learning_rate": 9.55474344897658e-07, |
| "loss": 0.8881, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.09239726933765091, |
| "grad_norm": 2.5208792686462402, |
| "learning_rate": 9.553924058930532e-07, |
| "loss": 0.841, |
| "step": 5935 |
| }, |
| { |
| "epoch": 0.09247511033962029, |
| "grad_norm": 2.8447041511535645, |
| "learning_rate": 9.553104668884482e-07, |
| "loss": 0.791, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.09255295134158967, |
| "grad_norm": 4.374822616577148, |
| "learning_rate": 9.552285278838433e-07, |
| "loss": 0.8153, |
| "step": 5945 |
| }, |
| { |
| "epoch": 0.09263079234355905, |
| "grad_norm": 6.252150058746338, |
| "learning_rate": 9.551465888792382e-07, |
| "loss": 0.8223, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.09270863334552842, |
| "grad_norm": 3.3018994331359863, |
| "learning_rate": 9.550646498746333e-07, |
| "loss": 0.9067, |
| "step": 5955 |
| }, |
| { |
| "epoch": 0.0927864743474978, |
| "grad_norm": 4.026679515838623, |
| "learning_rate": 9.549827108700283e-07, |
| "loss": 0.7962, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.09286431534946718, |
| "grad_norm": 3.2476413249969482, |
| "learning_rate": 9.549007718654232e-07, |
| "loss": 0.8074, |
| "step": 5965 |
| }, |
| { |
| "epoch": 0.09294215635143656, |
| "grad_norm": 2.852954149246216, |
| "learning_rate": 9.548188328608183e-07, |
| "loss": 0.8071, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.09301999735340594, |
| "grad_norm": 3.4490416049957275, |
| "learning_rate": 9.547368938562133e-07, |
| "loss": 0.9519, |
| "step": 5975 |
| }, |
| { |
| "epoch": 0.09309783835537531, |
| "grad_norm": 2.473008155822754, |
| "learning_rate": 9.546549548516084e-07, |
| "loss": 0.8446, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.09317567935734469, |
| "grad_norm": 7.381313800811768, |
| "learning_rate": 9.545730158470035e-07, |
| "loss": 0.802, |
| "step": 5985 |
| }, |
| { |
| "epoch": 0.09325352035931407, |
| "grad_norm": 4.133596897125244, |
| "learning_rate": 9.544910768423986e-07, |
| "loss": 0.7167, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.09333136136128345, |
| "grad_norm": 4.466127872467041, |
| "learning_rate": 9.544091378377934e-07, |
| "loss": 0.9329, |
| "step": 5995 |
| }, |
| { |
| "epoch": 0.09340920236325281, |
| "grad_norm": 4.252684593200684, |
| "learning_rate": 9.543271988331885e-07, |
| "loss": 0.856, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.09348704336522219, |
| "grad_norm": 3.630127429962158, |
| "learning_rate": 9.542452598285836e-07, |
| "loss": 0.8738, |
| "step": 6005 |
| }, |
| { |
| "epoch": 0.09356488436719157, |
| "grad_norm": 8.133733749389648, |
| "learning_rate": 9.541633208239784e-07, |
| "loss": 0.8339, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.09364272536916095, |
| "grad_norm": 11.18271541595459, |
| "learning_rate": 9.540813818193735e-07, |
| "loss": 0.7978, |
| "step": 6015 |
| }, |
| { |
| "epoch": 0.09372056637113033, |
| "grad_norm": 6.3515214920043945, |
| "learning_rate": 9.539994428147686e-07, |
| "loss": 0.7742, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.0937984073730997, |
| "grad_norm": 3.030446767807007, |
| "learning_rate": 9.539175038101637e-07, |
| "loss": 0.6648, |
| "step": 6025 |
| }, |
| { |
| "epoch": 0.09387624837506908, |
| "grad_norm": 5.10403299331665, |
| "learning_rate": 9.538355648055587e-07, |
| "loss": 0.9393, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.09395408937703846, |
| "grad_norm": 2.5483992099761963, |
| "learning_rate": 9.537536258009538e-07, |
| "loss": 0.8155, |
| "step": 6035 |
| }, |
| { |
| "epoch": 0.09403193037900784, |
| "grad_norm": 6.4216814041137695, |
| "learning_rate": 9.536716867963488e-07, |
| "loss": 0.9805, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.09410977138097722, |
| "grad_norm": 8.188408851623535, |
| "learning_rate": 9.535897477917439e-07, |
| "loss": 0.8092, |
| "step": 6045 |
| }, |
| { |
| "epoch": 0.0941876123829466, |
| "grad_norm": 4.022781848907471, |
| "learning_rate": 9.535078087871387e-07, |
| "loss": 0.8009, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.09426545338491597, |
| "grad_norm": 3.303135871887207, |
| "learning_rate": 9.534258697825338e-07, |
| "loss": 0.7763, |
| "step": 6055 |
| }, |
| { |
| "epoch": 0.09434329438688535, |
| "grad_norm": 5.49419641494751, |
| "learning_rate": 9.533439307779289e-07, |
| "loss": 0.8246, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.09442113538885473, |
| "grad_norm": 4.208410739898682, |
| "learning_rate": 9.532619917733238e-07, |
| "loss": 0.8875, |
| "step": 6065 |
| }, |
| { |
| "epoch": 0.09449897639082411, |
| "grad_norm": 2.5194616317749023, |
| "learning_rate": 9.531800527687189e-07, |
| "loss": 0.8782, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.09457681739279349, |
| "grad_norm": 3.4753055572509766, |
| "learning_rate": 9.53098113764114e-07, |
| "loss": 0.7719, |
| "step": 6075 |
| }, |
| { |
| "epoch": 0.09465465839476286, |
| "grad_norm": 4.319244861602783, |
| "learning_rate": 9.53016174759509e-07, |
| "loss": 0.9141, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.09473249939673223, |
| "grad_norm": 2.9613096714019775, |
| "learning_rate": 9.52934235754904e-07, |
| "loss": 0.7196, |
| "step": 6085 |
| }, |
| { |
| "epoch": 0.0948103403987016, |
| "grad_norm": 6.506518840789795, |
| "learning_rate": 9.52852296750299e-07, |
| "loss": 0.8589, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.09488818140067098, |
| "grad_norm": 7.105751037597656, |
| "learning_rate": 9.527703577456941e-07, |
| "loss": 0.8392, |
| "step": 6095 |
| }, |
| { |
| "epoch": 0.09496602240264036, |
| "grad_norm": 3.0105667114257812, |
| "learning_rate": 9.52688418741089e-07, |
| "loss": 0.6962, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.09504386340460974, |
| "grad_norm": 7.148667812347412, |
| "learning_rate": 9.526064797364841e-07, |
| "loss": 0.7958, |
| "step": 6105 |
| }, |
| { |
| "epoch": 0.09512170440657912, |
| "grad_norm": 11.00757122039795, |
| "learning_rate": 9.525245407318792e-07, |
| "loss": 0.8516, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.0951995454085485, |
| "grad_norm": 5.520313739776611, |
| "learning_rate": 9.524426017272742e-07, |
| "loss": 0.9142, |
| "step": 6115 |
| }, |
| { |
| "epoch": 0.09527738641051788, |
| "grad_norm": 3.418109893798828, |
| "learning_rate": 9.523606627226692e-07, |
| "loss": 0.7326, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.09535522741248725, |
| "grad_norm": 3.9674932956695557, |
| "learning_rate": 9.522787237180643e-07, |
| "loss": 0.8561, |
| "step": 6125 |
| }, |
| { |
| "epoch": 0.09543306841445663, |
| "grad_norm": 3.5758800506591797, |
| "learning_rate": 9.521967847134592e-07, |
| "loss": 0.8119, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.09551090941642601, |
| "grad_norm": 4.39679479598999, |
| "learning_rate": 9.521148457088543e-07, |
| "loss": 0.8427, |
| "step": 6135 |
| }, |
| { |
| "epoch": 0.09558875041839539, |
| "grad_norm": 5.498544216156006, |
| "learning_rate": 9.520329067042493e-07, |
| "loss": 0.8786, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.09566659142036477, |
| "grad_norm": 3.5914194583892822, |
| "learning_rate": 9.519509676996443e-07, |
| "loss": 0.8465, |
| "step": 6145 |
| }, |
| { |
| "epoch": 0.09574443242233414, |
| "grad_norm": 5.639887809753418, |
| "learning_rate": 9.518690286950394e-07, |
| "loss": 0.764, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.09582227342430352, |
| "grad_norm": 7.638282299041748, |
| "learning_rate": 9.517870896904345e-07, |
| "loss": 0.858, |
| "step": 6155 |
| }, |
| { |
| "epoch": 0.0959001144262729, |
| "grad_norm": 3.7768096923828125, |
| "learning_rate": 9.517051506858294e-07, |
| "loss": 0.8739, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.09597795542824228, |
| "grad_norm": 3.388122081756592, |
| "learning_rate": 9.516232116812245e-07, |
| "loss": 0.7646, |
| "step": 6165 |
| }, |
| { |
| "epoch": 0.09605579643021164, |
| "grad_norm": 3.0460891723632812, |
| "learning_rate": 9.515412726766195e-07, |
| "loss": 0.9288, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.09613363743218102, |
| "grad_norm": 3.9041805267333984, |
| "learning_rate": 9.514593336720144e-07, |
| "loss": 0.735, |
| "step": 6175 |
| }, |
| { |
| "epoch": 0.0962114784341504, |
| "grad_norm": 3.894850254058838, |
| "learning_rate": 9.513773946674095e-07, |
| "loss": 0.7104, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.09628931943611978, |
| "grad_norm": 3.6872172355651855, |
| "learning_rate": 9.512954556628046e-07, |
| "loss": 0.7743, |
| "step": 6185 |
| }, |
| { |
| "epoch": 0.09636716043808916, |
| "grad_norm": 2.9574503898620605, |
| "learning_rate": 9.512135166581996e-07, |
| "loss": 0.8923, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.09644500144005853, |
| "grad_norm": 3.6874301433563232, |
| "learning_rate": 9.511315776535946e-07, |
| "loss": 0.7798, |
| "step": 6195 |
| }, |
| { |
| "epoch": 0.09652284244202791, |
| "grad_norm": 5.110114574432373, |
| "learning_rate": 9.510496386489897e-07, |
| "loss": 0.8202, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.09660068344399729, |
| "grad_norm": 4.243130683898926, |
| "learning_rate": 9.509676996443848e-07, |
| "loss": 0.9956, |
| "step": 6205 |
| }, |
| { |
| "epoch": 0.09667852444596667, |
| "grad_norm": 9.388118743896484, |
| "learning_rate": 9.508857606397796e-07, |
| "loss": 0.8103, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.09675636544793605, |
| "grad_norm": 4.399020671844482, |
| "learning_rate": 9.508038216351747e-07, |
| "loss": 0.9899, |
| "step": 6215 |
| }, |
| { |
| "epoch": 0.09683420644990542, |
| "grad_norm": 5.260294437408447, |
| "learning_rate": 9.507218826305698e-07, |
| "loss": 0.7776, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.0969120474518748, |
| "grad_norm": 2.903243064880371, |
| "learning_rate": 9.506399436259648e-07, |
| "loss": 0.8065, |
| "step": 6225 |
| }, |
| { |
| "epoch": 0.09698988845384418, |
| "grad_norm": 7.704418182373047, |
| "learning_rate": 9.505580046213598e-07, |
| "loss": 0.6625, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.09706772945581356, |
| "grad_norm": 3.7152814865112305, |
| "learning_rate": 9.504760656167549e-07, |
| "loss": 0.8702, |
| "step": 6235 |
| }, |
| { |
| "epoch": 0.09714557045778294, |
| "grad_norm": 6.636418342590332, |
| "learning_rate": 9.503941266121499e-07, |
| "loss": 0.7592, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.09722341145975232, |
| "grad_norm": 5.01901388168335, |
| "learning_rate": 9.50312187607545e-07, |
| "loss": 0.8377, |
| "step": 6245 |
| }, |
| { |
| "epoch": 0.0973012524617217, |
| "grad_norm": 6.149816989898682, |
| "learning_rate": 9.502302486029399e-07, |
| "loss": 0.7698, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.09737909346369107, |
| "grad_norm": 4.017423152923584, |
| "learning_rate": 9.501483095983349e-07, |
| "loss": 0.7522, |
| "step": 6255 |
| }, |
| { |
| "epoch": 0.09745693446566044, |
| "grad_norm": 3.395038366317749, |
| "learning_rate": 9.5006637059373e-07, |
| "loss": 0.7814, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.09753477546762981, |
| "grad_norm": 8.359529495239258, |
| "learning_rate": 9.49984431589125e-07, |
| "loss": 0.8817, |
| "step": 6265 |
| }, |
| { |
| "epoch": 0.09761261646959919, |
| "grad_norm": 5.801593780517578, |
| "learning_rate": 9.4990249258452e-07, |
| "loss": 0.8346, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.09769045747156857, |
| "grad_norm": 3.440136432647705, |
| "learning_rate": 9.498205535799151e-07, |
| "loss": 0.7793, |
| "step": 6275 |
| }, |
| { |
| "epoch": 0.09776829847353795, |
| "grad_norm": 3.3891918659210205, |
| "learning_rate": 9.497386145753102e-07, |
| "loss": 0.7846, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.09784613947550733, |
| "grad_norm": 3.6862120628356934, |
| "learning_rate": 9.496566755707051e-07, |
| "loss": 0.8316, |
| "step": 6285 |
| }, |
| { |
| "epoch": 0.0979239804774767, |
| "grad_norm": 4.2608642578125, |
| "learning_rate": 9.495747365661001e-07, |
| "loss": 0.8244, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.09800182147944608, |
| "grad_norm": 4.8404459953308105, |
| "learning_rate": 9.494927975614952e-07, |
| "loss": 0.7919, |
| "step": 6295 |
| }, |
| { |
| "epoch": 0.09807966248141546, |
| "grad_norm": 2.3203227519989014, |
| "learning_rate": 9.494108585568901e-07, |
| "loss": 0.7593, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.09815750348338484, |
| "grad_norm": 2.870492935180664, |
| "learning_rate": 9.493289195522852e-07, |
| "loss": 0.7388, |
| "step": 6305 |
| }, |
| { |
| "epoch": 0.09823534448535422, |
| "grad_norm": 3.6634552478790283, |
| "learning_rate": 9.492469805476803e-07, |
| "loss": 0.6632, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.0983131854873236, |
| "grad_norm": 4.569755554199219, |
| "learning_rate": 9.491650415430753e-07, |
| "loss": 0.866, |
| "step": 6315 |
| }, |
| { |
| "epoch": 0.09839102648929297, |
| "grad_norm": 3.319843053817749, |
| "learning_rate": 9.490831025384703e-07, |
| "loss": 0.8812, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.09846886749126235, |
| "grad_norm": 5.647189140319824, |
| "learning_rate": 9.490011635338654e-07, |
| "loss": 0.893, |
| "step": 6325 |
| }, |
| { |
| "epoch": 0.09854670849323173, |
| "grad_norm": 4.285895347595215, |
| "learning_rate": 9.489192245292603e-07, |
| "loss": 0.9623, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.09862454949520111, |
| "grad_norm": 4.257463455200195, |
| "learning_rate": 9.488372855246553e-07, |
| "loss": 0.7959, |
| "step": 6335 |
| }, |
| { |
| "epoch": 0.09870239049717049, |
| "grad_norm": 4.747158050537109, |
| "learning_rate": 9.487553465200504e-07, |
| "loss": 0.6869, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.09878023149913985, |
| "grad_norm": 4.191068172454834, |
| "learning_rate": 9.486734075154455e-07, |
| "loss": 0.8163, |
| "step": 6345 |
| }, |
| { |
| "epoch": 0.09885807250110923, |
| "grad_norm": 4.583565711975098, |
| "learning_rate": 9.485914685108405e-07, |
| "loss": 0.7348, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.0989359135030786, |
| "grad_norm": 3.9108059406280518, |
| "learning_rate": 9.485095295062355e-07, |
| "loss": 0.7968, |
| "step": 6355 |
| }, |
| { |
| "epoch": 0.09901375450504799, |
| "grad_norm": 5.722688674926758, |
| "learning_rate": 9.484275905016306e-07, |
| "loss": 0.8138, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.09909159550701736, |
| "grad_norm": 2.972755193710327, |
| "learning_rate": 9.483456514970256e-07, |
| "loss": 0.795, |
| "step": 6365 |
| }, |
| { |
| "epoch": 0.09916943650898674, |
| "grad_norm": 8.901226997375488, |
| "learning_rate": 9.482637124924207e-07, |
| "loss": 0.7491, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.09924727751095612, |
| "grad_norm": 7.961559772491455, |
| "learning_rate": 9.481817734878156e-07, |
| "loss": 1.0205, |
| "step": 6375 |
| }, |
| { |
| "epoch": 0.0993251185129255, |
| "grad_norm": 6.042298316955566, |
| "learning_rate": 9.480998344832106e-07, |
| "loss": 0.7984, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.09940295951489488, |
| "grad_norm": 10.556697845458984, |
| "learning_rate": 9.480178954786057e-07, |
| "loss": 0.8639, |
| "step": 6385 |
| }, |
| { |
| "epoch": 0.09948080051686425, |
| "grad_norm": 2.7401647567749023, |
| "learning_rate": 9.479359564740007e-07, |
| "loss": 0.799, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.09955864151883363, |
| "grad_norm": 6.288196086883545, |
| "learning_rate": 9.478540174693957e-07, |
| "loss": 0.9338, |
| "step": 6395 |
| }, |
| { |
| "epoch": 0.09963648252080301, |
| "grad_norm": 4.34282112121582, |
| "learning_rate": 9.477720784647908e-07, |
| "loss": 0.8355, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.09971432352277239, |
| "grad_norm": 3.5038483142852783, |
| "learning_rate": 9.476901394601859e-07, |
| "loss": 0.7796, |
| "step": 6405 |
| }, |
| { |
| "epoch": 0.09979216452474177, |
| "grad_norm": 4.715381622314453, |
| "learning_rate": 9.476082004555808e-07, |
| "loss": 0.815, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.09987000552671114, |
| "grad_norm": 3.7928483486175537, |
| "learning_rate": 9.475262614509758e-07, |
| "loss": 0.8346, |
| "step": 6415 |
| }, |
| { |
| "epoch": 0.09994784652868052, |
| "grad_norm": 5.622752666473389, |
| "learning_rate": 9.474443224463709e-07, |
| "loss": 0.8558, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.1000256875306499, |
| "grad_norm": 5.325289726257324, |
| "learning_rate": 9.473623834417658e-07, |
| "loss": 0.8078, |
| "step": 6425 |
| }, |
| { |
| "epoch": 0.10010352853261927, |
| "grad_norm": 3.5389554500579834, |
| "learning_rate": 9.472804444371609e-07, |
| "loss": 0.7461, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.10018136953458864, |
| "grad_norm": 8.74923038482666, |
| "learning_rate": 9.47198505432556e-07, |
| "loss": 0.7609, |
| "step": 6435 |
| }, |
| { |
| "epoch": 0.10025921053655802, |
| "grad_norm": 4.170187473297119, |
| "learning_rate": 9.47116566427951e-07, |
| "loss": 0.8243, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.1003370515385274, |
| "grad_norm": 12.243910789489746, |
| "learning_rate": 9.47034627423346e-07, |
| "loss": 0.7365, |
| "step": 6445 |
| }, |
| { |
| "epoch": 0.10041489254049678, |
| "grad_norm": 6.56355094909668, |
| "learning_rate": 9.469526884187411e-07, |
| "loss": 0.8693, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.10049273354246616, |
| "grad_norm": 4.917191982269287, |
| "learning_rate": 9.46870749414136e-07, |
| "loss": 0.8323, |
| "step": 6455 |
| }, |
| { |
| "epoch": 0.10057057454443553, |
| "grad_norm": 4.455476760864258, |
| "learning_rate": 9.467888104095311e-07, |
| "loss": 0.7539, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.10064841554640491, |
| "grad_norm": 4.135006904602051, |
| "learning_rate": 9.467068714049261e-07, |
| "loss": 0.772, |
| "step": 6465 |
| }, |
| { |
| "epoch": 0.10072625654837429, |
| "grad_norm": 5.814565658569336, |
| "learning_rate": 9.466249324003212e-07, |
| "loss": 0.8389, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.10080409755034367, |
| "grad_norm": 3.4807469844818115, |
| "learning_rate": 9.465429933957162e-07, |
| "loss": 0.8365, |
| "step": 6475 |
| }, |
| { |
| "epoch": 0.10088193855231305, |
| "grad_norm": 5.241673946380615, |
| "learning_rate": 9.464610543911112e-07, |
| "loss": 0.6689, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.10095977955428243, |
| "grad_norm": 6.0900678634643555, |
| "learning_rate": 9.463791153865063e-07, |
| "loss": 0.813, |
| "step": 6485 |
| }, |
| { |
| "epoch": 0.1010376205562518, |
| "grad_norm": 4.659064769744873, |
| "learning_rate": 9.462971763819013e-07, |
| "loss": 0.8624, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.10111546155822118, |
| "grad_norm": 7.9358320236206055, |
| "learning_rate": 9.462152373772963e-07, |
| "loss": 0.7209, |
| "step": 6495 |
| }, |
| { |
| "epoch": 0.10119330256019056, |
| "grad_norm": 3.9600491523742676, |
| "learning_rate": 9.461332983726913e-07, |
| "loss": 0.7995, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.10127114356215994, |
| "grad_norm": 4.832655906677246, |
| "learning_rate": 9.460513593680863e-07, |
| "loss": 0.8935, |
| "step": 6505 |
| }, |
| { |
| "epoch": 0.10134898456412932, |
| "grad_norm": 4.184332370758057, |
| "learning_rate": 9.459694203634814e-07, |
| "loss": 0.7468, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.10142682556609868, |
| "grad_norm": 3.2403645515441895, |
| "learning_rate": 9.458874813588765e-07, |
| "loss": 0.7502, |
| "step": 6515 |
| }, |
| { |
| "epoch": 0.10150466656806806, |
| "grad_norm": 6.45439338684082, |
| "learning_rate": 9.458055423542714e-07, |
| "loss": 0.8359, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.10158250757003744, |
| "grad_norm": 3.9225785732269287, |
| "learning_rate": 9.457236033496665e-07, |
| "loss": 0.822, |
| "step": 6525 |
| }, |
| { |
| "epoch": 0.10166034857200681, |
| "grad_norm": 6.211043834686279, |
| "learning_rate": 9.456416643450616e-07, |
| "loss": 0.7675, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.10173818957397619, |
| "grad_norm": 5.109851360321045, |
| "learning_rate": 9.455597253404564e-07, |
| "loss": 0.8539, |
| "step": 6535 |
| }, |
| { |
| "epoch": 0.10181603057594557, |
| "grad_norm": 3.1654608249664307, |
| "learning_rate": 9.454777863358515e-07, |
| "loss": 0.851, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.10189387157791495, |
| "grad_norm": 6.30355167388916, |
| "learning_rate": 9.453958473312466e-07, |
| "loss": 0.8668, |
| "step": 6545 |
| }, |
| { |
| "epoch": 0.10197171257988433, |
| "grad_norm": 2.9073293209075928, |
| "learning_rate": 9.453139083266416e-07, |
| "loss": 0.8354, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.1020495535818537, |
| "grad_norm": 4.239645481109619, |
| "learning_rate": 9.452319693220366e-07, |
| "loss": 0.8394, |
| "step": 6555 |
| }, |
| { |
| "epoch": 0.10212739458382308, |
| "grad_norm": 4.341432094573975, |
| "learning_rate": 9.451500303174317e-07, |
| "loss": 0.8248, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.10220523558579246, |
| "grad_norm": 5.958523273468018, |
| "learning_rate": 9.450680913128267e-07, |
| "loss": 0.7696, |
| "step": 6565 |
| }, |
| { |
| "epoch": 0.10228307658776184, |
| "grad_norm": 2.9546141624450684, |
| "learning_rate": 9.449861523082217e-07, |
| "loss": 0.8008, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.10236091758973122, |
| "grad_norm": 3.216296672821045, |
| "learning_rate": 9.449042133036167e-07, |
| "loss": 0.8286, |
| "step": 6575 |
| }, |
| { |
| "epoch": 0.1024387585917006, |
| "grad_norm": 5.784662246704102, |
| "learning_rate": 9.448222742990117e-07, |
| "loss": 0.8118, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.10251659959366997, |
| "grad_norm": 18.038049697875977, |
| "learning_rate": 9.447403352944068e-07, |
| "loss": 0.7844, |
| "step": 6585 |
| }, |
| { |
| "epoch": 0.10259444059563935, |
| "grad_norm": 3.434221029281616, |
| "learning_rate": 9.446583962898018e-07, |
| "loss": 0.7745, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.10267228159760873, |
| "grad_norm": 3.0332863330841064, |
| "learning_rate": 9.445764572851969e-07, |
| "loss": 0.9026, |
| "step": 6595 |
| }, |
| { |
| "epoch": 0.10275012259957811, |
| "grad_norm": 4.459526538848877, |
| "learning_rate": 9.444945182805919e-07, |
| "loss": 0.8231, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.10282796360154747, |
| "grad_norm": 11.914100646972656, |
| "learning_rate": 9.44412579275987e-07, |
| "loss": 0.7191, |
| "step": 6605 |
| }, |
| { |
| "epoch": 0.10290580460351685, |
| "grad_norm": 9.491118431091309, |
| "learning_rate": 9.44330640271382e-07, |
| "loss": 0.8438, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.10298364560548623, |
| "grad_norm": 3.312546968460083, |
| "learning_rate": 9.442487012667769e-07, |
| "loss": 0.7754, |
| "step": 6615 |
| }, |
| { |
| "epoch": 0.10306148660745561, |
| "grad_norm": 2.4198150634765625, |
| "learning_rate": 9.44166762262172e-07, |
| "loss": 0.7373, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.10313932760942499, |
| "grad_norm": 3.8953001499176025, |
| "learning_rate": 9.44084823257567e-07, |
| "loss": 0.6972, |
| "step": 6625 |
| }, |
| { |
| "epoch": 0.10321716861139436, |
| "grad_norm": 3.971245050430298, |
| "learning_rate": 9.44002884252962e-07, |
| "loss": 0.7959, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.10329500961336374, |
| "grad_norm": 3.9119505882263184, |
| "learning_rate": 9.439209452483571e-07, |
| "loss": 0.8072, |
| "step": 6635 |
| }, |
| { |
| "epoch": 0.10337285061533312, |
| "grad_norm": 3.6322784423828125, |
| "learning_rate": 9.438390062437522e-07, |
| "loss": 0.8389, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.1034506916173025, |
| "grad_norm": 3.221548557281494, |
| "learning_rate": 9.437570672391471e-07, |
| "loss": 0.8113, |
| "step": 6645 |
| }, |
| { |
| "epoch": 0.10352853261927188, |
| "grad_norm": 3.768453598022461, |
| "learning_rate": 9.436751282345422e-07, |
| "loss": 0.8859, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.10360637362124125, |
| "grad_norm": 3.436704635620117, |
| "learning_rate": 9.435931892299372e-07, |
| "loss": 0.9308, |
| "step": 6655 |
| }, |
| { |
| "epoch": 0.10368421462321063, |
| "grad_norm": 3.5479848384857178, |
| "learning_rate": 9.435112502253321e-07, |
| "loss": 0.8624, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.10376205562518001, |
| "grad_norm": 5.35614538192749, |
| "learning_rate": 9.434293112207272e-07, |
| "loss": 0.8095, |
| "step": 6665 |
| }, |
| { |
| "epoch": 0.10383989662714939, |
| "grad_norm": 3.0815038681030273, |
| "learning_rate": 9.433473722161223e-07, |
| "loss": 0.6846, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.10391773762911877, |
| "grad_norm": 5.047412872314453, |
| "learning_rate": 9.432654332115173e-07, |
| "loss": 0.7785, |
| "step": 6675 |
| }, |
| { |
| "epoch": 0.10399557863108815, |
| "grad_norm": 4.322173595428467, |
| "learning_rate": 9.431834942069123e-07, |
| "loss": 0.8362, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.10407341963305752, |
| "grad_norm": 4.15039587020874, |
| "learning_rate": 9.431015552023074e-07, |
| "loss": 0.7475, |
| "step": 6685 |
| }, |
| { |
| "epoch": 0.10415126063502689, |
| "grad_norm": 3.9758059978485107, |
| "learning_rate": 9.430196161977024e-07, |
| "loss": 0.8058, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.10422910163699627, |
| "grad_norm": 3.778308629989624, |
| "learning_rate": 9.429376771930975e-07, |
| "loss": 0.9209, |
| "step": 6695 |
| }, |
| { |
| "epoch": 0.10430694263896564, |
| "grad_norm": 7.6523566246032715, |
| "learning_rate": 9.428557381884924e-07, |
| "loss": 0.765, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.10438478364093502, |
| "grad_norm": 4.295438289642334, |
| "learning_rate": 9.427737991838874e-07, |
| "loss": 0.8556, |
| "step": 6705 |
| }, |
| { |
| "epoch": 0.1044626246429044, |
| "grad_norm": 9.01634407043457, |
| "learning_rate": 9.426918601792825e-07, |
| "loss": 0.7078, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.10454046564487378, |
| "grad_norm": 4.1538987159729, |
| "learning_rate": 9.426099211746775e-07, |
| "loss": 1.0486, |
| "step": 6715 |
| }, |
| { |
| "epoch": 0.10461830664684316, |
| "grad_norm": 5.460824489593506, |
| "learning_rate": 9.425279821700726e-07, |
| "loss": 0.7296, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.10469614764881253, |
| "grad_norm": 8.995347023010254, |
| "learning_rate": 9.424460431654676e-07, |
| "loss": 0.7228, |
| "step": 6725 |
| }, |
| { |
| "epoch": 0.10477398865078191, |
| "grad_norm": 3.254420042037964, |
| "learning_rate": 9.423641041608627e-07, |
| "loss": 0.8683, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.10485182965275129, |
| "grad_norm": 3.987894058227539, |
| "learning_rate": 9.422821651562577e-07, |
| "loss": 0.7663, |
| "step": 6735 |
| }, |
| { |
| "epoch": 0.10492967065472067, |
| "grad_norm": 3.244363307952881, |
| "learning_rate": 9.422002261516526e-07, |
| "loss": 0.8443, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.10500751165669005, |
| "grad_norm": 4.715000152587891, |
| "learning_rate": 9.421182871470477e-07, |
| "loss": 0.8059, |
| "step": 6745 |
| }, |
| { |
| "epoch": 0.10508535265865943, |
| "grad_norm": 5.014405727386475, |
| "learning_rate": 9.420363481424427e-07, |
| "loss": 0.8381, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.1051631936606288, |
| "grad_norm": 4.085587978363037, |
| "learning_rate": 9.419544091378377e-07, |
| "loss": 0.8104, |
| "step": 6755 |
| }, |
| { |
| "epoch": 0.10524103466259818, |
| "grad_norm": 12.998879432678223, |
| "learning_rate": 9.418724701332328e-07, |
| "loss": 0.8814, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.10531887566456756, |
| "grad_norm": 5.057702541351318, |
| "learning_rate": 9.417905311286279e-07, |
| "loss": 0.7355, |
| "step": 6765 |
| }, |
| { |
| "epoch": 0.10539671666653694, |
| "grad_norm": 4.116156578063965, |
| "learning_rate": 9.417085921240228e-07, |
| "loss": 0.907, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.1054745576685063, |
| "grad_norm": 3.0526468753814697, |
| "learning_rate": 9.416266531194179e-07, |
| "loss": 0.832, |
| "step": 6775 |
| }, |
| { |
| "epoch": 0.10555239867047568, |
| "grad_norm": 5.218168258666992, |
| "learning_rate": 9.415447141148129e-07, |
| "loss": 0.7835, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.10563023967244506, |
| "grad_norm": 7.534468650817871, |
| "learning_rate": 9.414627751102078e-07, |
| "loss": 0.8966, |
| "step": 6785 |
| }, |
| { |
| "epoch": 0.10570808067441444, |
| "grad_norm": 14.327566146850586, |
| "learning_rate": 9.413808361056029e-07, |
| "loss": 0.789, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.10578592167638382, |
| "grad_norm": 4.58953332901001, |
| "learning_rate": 9.41298897100998e-07, |
| "loss": 0.8134, |
| "step": 6795 |
| }, |
| { |
| "epoch": 0.1058637626783532, |
| "grad_norm": 3.4943652153015137, |
| "learning_rate": 9.41216958096393e-07, |
| "loss": 0.9615, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.10594160368032257, |
| "grad_norm": 5.815013408660889, |
| "learning_rate": 9.41135019091788e-07, |
| "loss": 0.7671, |
| "step": 6805 |
| }, |
| { |
| "epoch": 0.10601944468229195, |
| "grad_norm": 4.9490580558776855, |
| "learning_rate": 9.410530800871831e-07, |
| "loss": 0.7993, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.10609728568426133, |
| "grad_norm": 3.030304193496704, |
| "learning_rate": 9.409711410825781e-07, |
| "loss": 0.7573, |
| "step": 6815 |
| }, |
| { |
| "epoch": 0.1061751266862307, |
| "grad_norm": 3.3977646827697754, |
| "learning_rate": 9.408892020779731e-07, |
| "loss": 0.7966, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.10625296768820008, |
| "grad_norm": 9.117260932922363, |
| "learning_rate": 9.408072630733681e-07, |
| "loss": 0.8123, |
| "step": 6825 |
| }, |
| { |
| "epoch": 0.10633080869016946, |
| "grad_norm": 3.8861453533172607, |
| "learning_rate": 9.407253240687631e-07, |
| "loss": 0.8245, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.10640864969213884, |
| "grad_norm": 3.4242775440216064, |
| "learning_rate": 9.406433850641582e-07, |
| "loss": 0.7318, |
| "step": 6835 |
| }, |
| { |
| "epoch": 0.10648649069410822, |
| "grad_norm": 4.729854106903076, |
| "learning_rate": 9.405614460595532e-07, |
| "loss": 0.7959, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.1065643316960776, |
| "grad_norm": 3.1164026260375977, |
| "learning_rate": 9.404795070549483e-07, |
| "loss": 0.8327, |
| "step": 6845 |
| }, |
| { |
| "epoch": 0.10664217269804697, |
| "grad_norm": 4.031877040863037, |
| "learning_rate": 9.403975680503433e-07, |
| "loss": 0.807, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.10672001370001635, |
| "grad_norm": 3.205714702606201, |
| "learning_rate": 9.403156290457384e-07, |
| "loss": 0.7962, |
| "step": 6855 |
| }, |
| { |
| "epoch": 0.10679785470198572, |
| "grad_norm": 3.2358205318450928, |
| "learning_rate": 9.402336900411333e-07, |
| "loss": 0.8694, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.1068756957039551, |
| "grad_norm": 2.7498748302459717, |
| "learning_rate": 9.401517510365283e-07, |
| "loss": 0.7554, |
| "step": 6865 |
| }, |
| { |
| "epoch": 0.10695353670592447, |
| "grad_norm": 7.2536420822143555, |
| "learning_rate": 9.400698120319234e-07, |
| "loss": 0.9135, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.10703137770789385, |
| "grad_norm": 5.090606689453125, |
| "learning_rate": 9.399878730273185e-07, |
| "loss": 0.8358, |
| "step": 6875 |
| }, |
| { |
| "epoch": 0.10710921870986323, |
| "grad_norm": 3.6972696781158447, |
| "learning_rate": 9.399059340227134e-07, |
| "loss": 0.9106, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.10718705971183261, |
| "grad_norm": 3.833972692489624, |
| "learning_rate": 9.398239950181085e-07, |
| "loss": 0.9021, |
| "step": 6885 |
| }, |
| { |
| "epoch": 0.10726490071380199, |
| "grad_norm": 6.692166805267334, |
| "learning_rate": 9.397420560135036e-07, |
| "loss": 0.9014, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.10734274171577136, |
| "grad_norm": 3.5323872566223145, |
| "learning_rate": 9.396601170088985e-07, |
| "loss": 0.8122, |
| "step": 6895 |
| }, |
| { |
| "epoch": 0.10742058271774074, |
| "grad_norm": 5.148552894592285, |
| "learning_rate": 9.395781780042935e-07, |
| "loss": 0.7712, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.10749842371971012, |
| "grad_norm": 4.791245460510254, |
| "learning_rate": 9.394962389996886e-07, |
| "loss": 0.8229, |
| "step": 6905 |
| }, |
| { |
| "epoch": 0.1075762647216795, |
| "grad_norm": 7.922582149505615, |
| "learning_rate": 9.394142999950836e-07, |
| "loss": 0.8641, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.10765410572364888, |
| "grad_norm": 4.787046432495117, |
| "learning_rate": 9.393323609904786e-07, |
| "loss": 0.8567, |
| "step": 6915 |
| }, |
| { |
| "epoch": 0.10773194672561826, |
| "grad_norm": 7.581035137176514, |
| "learning_rate": 9.392504219858737e-07, |
| "loss": 0.9418, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.10780978772758763, |
| "grad_norm": 3.7408881187438965, |
| "learning_rate": 9.391684829812687e-07, |
| "loss": 0.7569, |
| "step": 6925 |
| }, |
| { |
| "epoch": 0.10788762872955701, |
| "grad_norm": 4.957324981689453, |
| "learning_rate": 9.390865439766637e-07, |
| "loss": 0.9865, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.10796546973152639, |
| "grad_norm": 4.249368190765381, |
| "learning_rate": 9.390046049720588e-07, |
| "loss": 0.7513, |
| "step": 6935 |
| }, |
| { |
| "epoch": 0.10804331073349577, |
| "grad_norm": 4.029480934143066, |
| "learning_rate": 9.389226659674537e-07, |
| "loss": 0.8097, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.10812115173546515, |
| "grad_norm": 3.9717726707458496, |
| "learning_rate": 9.388407269628488e-07, |
| "loss": 0.8199, |
| "step": 6945 |
| }, |
| { |
| "epoch": 0.10819899273743451, |
| "grad_norm": 4.825889587402344, |
| "learning_rate": 9.387587879582438e-07, |
| "loss": 0.8086, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.10827683373940389, |
| "grad_norm": 6.288622856140137, |
| "learning_rate": 9.386768489536388e-07, |
| "loss": 0.764, |
| "step": 6955 |
| }, |
| { |
| "epoch": 0.10835467474137327, |
| "grad_norm": 4.316305637359619, |
| "learning_rate": 9.385949099490339e-07, |
| "loss": 0.8855, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.10843251574334264, |
| "grad_norm": 2.9733645915985107, |
| "learning_rate": 9.38512970944429e-07, |
| "loss": 0.8997, |
| "step": 6965 |
| }, |
| { |
| "epoch": 0.10851035674531202, |
| "grad_norm": 7.586787700653076, |
| "learning_rate": 9.38431031939824e-07, |
| "loss": 0.7612, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.1085881977472814, |
| "grad_norm": 6.496944904327393, |
| "learning_rate": 9.38349092935219e-07, |
| "loss": 0.8064, |
| "step": 6975 |
| }, |
| { |
| "epoch": 0.10866603874925078, |
| "grad_norm": 10.352307319641113, |
| "learning_rate": 9.382671539306141e-07, |
| "loss": 0.8054, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.10874387975122016, |
| "grad_norm": 3.3039493560791016, |
| "learning_rate": 9.38185214926009e-07, |
| "loss": 0.7777, |
| "step": 6985 |
| }, |
| { |
| "epoch": 0.10882172075318954, |
| "grad_norm": 2.823133945465088, |
| "learning_rate": 9.38103275921404e-07, |
| "loss": 0.8755, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.10889956175515891, |
| "grad_norm": 5.029725074768066, |
| "learning_rate": 9.380213369167991e-07, |
| "loss": 0.7765, |
| "step": 6995 |
| }, |
| { |
| "epoch": 0.10897740275712829, |
| "grad_norm": 5.5392889976501465, |
| "learning_rate": 9.379393979121942e-07, |
| "loss": 0.8166, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.10905524375909767, |
| "grad_norm": 5.657524585723877, |
| "learning_rate": 9.378574589075891e-07, |
| "loss": 0.7781, |
| "step": 7005 |
| }, |
| { |
| "epoch": 0.10913308476106705, |
| "grad_norm": 5.030917167663574, |
| "learning_rate": 9.377755199029842e-07, |
| "loss": 0.8928, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.10921092576303643, |
| "grad_norm": 4.488454341888428, |
| "learning_rate": 9.376935808983793e-07, |
| "loss": 0.835, |
| "step": 7015 |
| }, |
| { |
| "epoch": 0.1092887667650058, |
| "grad_norm": 3.592827081680298, |
| "learning_rate": 9.376116418937743e-07, |
| "loss": 0.7534, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.10936660776697518, |
| "grad_norm": 6.4170331954956055, |
| "learning_rate": 9.375297028891692e-07, |
| "loss": 0.7978, |
| "step": 7025 |
| }, |
| { |
| "epoch": 0.10944444876894456, |
| "grad_norm": 3.166126251220703, |
| "learning_rate": 9.374477638845643e-07, |
| "loss": 0.8483, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.10952228977091392, |
| "grad_norm": 2.965501070022583, |
| "learning_rate": 9.373658248799593e-07, |
| "loss": 0.7824, |
| "step": 7035 |
| }, |
| { |
| "epoch": 0.1096001307728833, |
| "grad_norm": 4.2378058433532715, |
| "learning_rate": 9.372838858753543e-07, |
| "loss": 0.9217, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.10967797177485268, |
| "grad_norm": 5.209421634674072, |
| "learning_rate": 9.372019468707494e-07, |
| "loss": 0.835, |
| "step": 7045 |
| }, |
| { |
| "epoch": 0.10975581277682206, |
| "grad_norm": 4.27461576461792, |
| "learning_rate": 9.371200078661444e-07, |
| "loss": 0.9258, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.10983365377879144, |
| "grad_norm": 2.5676474571228027, |
| "learning_rate": 9.370380688615395e-07, |
| "loss": 0.8691, |
| "step": 7055 |
| }, |
| { |
| "epoch": 0.10991149478076082, |
| "grad_norm": 3.422879934310913, |
| "learning_rate": 9.369561298569345e-07, |
| "loss": 0.7527, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.1099893357827302, |
| "grad_norm": 4.083531379699707, |
| "learning_rate": 9.368741908523294e-07, |
| "loss": 0.7531, |
| "step": 7065 |
| }, |
| { |
| "epoch": 0.11006717678469957, |
| "grad_norm": 4.684252738952637, |
| "learning_rate": 9.367922518477245e-07, |
| "loss": 0.7772, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.11014501778666895, |
| "grad_norm": 3.0496606826782227, |
| "learning_rate": 9.367103128431195e-07, |
| "loss": 0.7993, |
| "step": 7075 |
| }, |
| { |
| "epoch": 0.11022285878863833, |
| "grad_norm": 3.641996145248413, |
| "learning_rate": 9.366283738385145e-07, |
| "loss": 0.7938, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.1103006997906077, |
| "grad_norm": 3.0637736320495605, |
| "learning_rate": 9.365464348339096e-07, |
| "loss": 0.8944, |
| "step": 7085 |
| }, |
| { |
| "epoch": 0.11037854079257708, |
| "grad_norm": 4.9412455558776855, |
| "learning_rate": 9.364644958293047e-07, |
| "loss": 0.7653, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.11045638179454646, |
| "grad_norm": 4.0071516036987305, |
| "learning_rate": 9.363825568246997e-07, |
| "loss": 0.8582, |
| "step": 7095 |
| }, |
| { |
| "epoch": 0.11053422279651584, |
| "grad_norm": 3.297551155090332, |
| "learning_rate": 9.363006178200947e-07, |
| "loss": 0.6726, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.11061206379848522, |
| "grad_norm": 6.013480186462402, |
| "learning_rate": 9.362186788154897e-07, |
| "loss": 0.777, |
| "step": 7105 |
| }, |
| { |
| "epoch": 0.1106899048004546, |
| "grad_norm": 4.557566165924072, |
| "learning_rate": 9.361367398108848e-07, |
| "loss": 0.9073, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.11076774580242398, |
| "grad_norm": 3.922395706176758, |
| "learning_rate": 9.360548008062797e-07, |
| "loss": 0.7608, |
| "step": 7115 |
| }, |
| { |
| "epoch": 0.11084558680439334, |
| "grad_norm": 4.4782867431640625, |
| "learning_rate": 9.359728618016748e-07, |
| "loss": 0.8138, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.11092342780636272, |
| "grad_norm": 3.320688486099243, |
| "learning_rate": 9.358909227970699e-07, |
| "loss": 0.7954, |
| "step": 7125 |
| }, |
| { |
| "epoch": 0.1110012688083321, |
| "grad_norm": 14.582179069519043, |
| "learning_rate": 9.358089837924648e-07, |
| "loss": 0.8295, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.11107910981030147, |
| "grad_norm": 5.56791877746582, |
| "learning_rate": 9.357270447878599e-07, |
| "loss": 0.7778, |
| "step": 7135 |
| }, |
| { |
| "epoch": 0.11115695081227085, |
| "grad_norm": 4.387538909912109, |
| "learning_rate": 9.35645105783255e-07, |
| "loss": 0.7354, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.11123479181424023, |
| "grad_norm": 3.590179443359375, |
| "learning_rate": 9.355631667786499e-07, |
| "loss": 0.7492, |
| "step": 7145 |
| }, |
| { |
| "epoch": 0.11131263281620961, |
| "grad_norm": 2.9361941814422607, |
| "learning_rate": 9.354812277740449e-07, |
| "loss": 0.9192, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.11139047381817899, |
| "grad_norm": 4.104539394378662, |
| "learning_rate": 9.3539928876944e-07, |
| "loss": 0.7818, |
| "step": 7155 |
| }, |
| { |
| "epoch": 0.11146831482014836, |
| "grad_norm": 3.3516862392425537, |
| "learning_rate": 9.35317349764835e-07, |
| "loss": 0.7937, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.11154615582211774, |
| "grad_norm": 3.5534565448760986, |
| "learning_rate": 9.3523541076023e-07, |
| "loss": 0.7168, |
| "step": 7165 |
| }, |
| { |
| "epoch": 0.11162399682408712, |
| "grad_norm": 3.8620402812957764, |
| "learning_rate": 9.351534717556251e-07, |
| "loss": 0.7666, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.1117018378260565, |
| "grad_norm": 5.330255031585693, |
| "learning_rate": 9.350715327510201e-07, |
| "loss": 0.7302, |
| "step": 7175 |
| }, |
| { |
| "epoch": 0.11177967882802588, |
| "grad_norm": 3.8225488662719727, |
| "learning_rate": 9.349895937464152e-07, |
| "loss": 0.7087, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.11185751982999526, |
| "grad_norm": 4.536187648773193, |
| "learning_rate": 9.349076547418101e-07, |
| "loss": 0.6757, |
| "step": 7185 |
| }, |
| { |
| "epoch": 0.11193536083196463, |
| "grad_norm": 3.3316333293914795, |
| "learning_rate": 9.348257157372051e-07, |
| "loss": 0.8506, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.11201320183393401, |
| "grad_norm": 4.451030731201172, |
| "learning_rate": 9.347437767326002e-07, |
| "loss": 0.8501, |
| "step": 7195 |
| }, |
| { |
| "epoch": 0.11209104283590339, |
| "grad_norm": 6.453036308288574, |
| "learning_rate": 9.346618377279953e-07, |
| "loss": 0.7665, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.11216888383787275, |
| "grad_norm": 3.7804341316223145, |
| "learning_rate": 9.345798987233902e-07, |
| "loss": 0.8482, |
| "step": 7205 |
| }, |
| { |
| "epoch": 0.11224672483984213, |
| "grad_norm": 4.703028678894043, |
| "learning_rate": 9.344979597187853e-07, |
| "loss": 0.8001, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.11232456584181151, |
| "grad_norm": 3.5996668338775635, |
| "learning_rate": 9.344160207141804e-07, |
| "loss": 0.8508, |
| "step": 7215 |
| }, |
| { |
| "epoch": 0.11240240684378089, |
| "grad_norm": 3.470485210418701, |
| "learning_rate": 9.343340817095754e-07, |
| "loss": 0.8929, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.11248024784575027, |
| "grad_norm": 9.36178970336914, |
| "learning_rate": 9.342521427049703e-07, |
| "loss": 0.7707, |
| "step": 7225 |
| }, |
| { |
| "epoch": 0.11255808884771965, |
| "grad_norm": 7.091135025024414, |
| "learning_rate": 9.341702037003654e-07, |
| "loss": 0.9335, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.11263592984968902, |
| "grad_norm": 4.552675247192383, |
| "learning_rate": 9.340882646957605e-07, |
| "loss": 0.9234, |
| "step": 7235 |
| }, |
| { |
| "epoch": 0.1127137708516584, |
| "grad_norm": 2.9877877235412598, |
| "learning_rate": 9.340063256911554e-07, |
| "loss": 0.8213, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.11279161185362778, |
| "grad_norm": 3.49109148979187, |
| "learning_rate": 9.339243866865505e-07, |
| "loss": 0.7713, |
| "step": 7245 |
| }, |
| { |
| "epoch": 0.11286945285559716, |
| "grad_norm": 3.662997245788574, |
| "learning_rate": 9.338424476819456e-07, |
| "loss": 0.8424, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.11294729385756654, |
| "grad_norm": 3.4681499004364014, |
| "learning_rate": 9.337605086773405e-07, |
| "loss": 0.9026, |
| "step": 7255 |
| }, |
| { |
| "epoch": 0.11302513485953591, |
| "grad_norm": 3.360700845718384, |
| "learning_rate": 9.336785696727356e-07, |
| "loss": 0.9131, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.11310297586150529, |
| "grad_norm": 3.470808982849121, |
| "learning_rate": 9.335966306681306e-07, |
| "loss": 0.8477, |
| "step": 7265 |
| }, |
| { |
| "epoch": 0.11318081686347467, |
| "grad_norm": 4.002136707305908, |
| "learning_rate": 9.335146916635256e-07, |
| "loss": 0.8753, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.11325865786544405, |
| "grad_norm": 3.301177978515625, |
| "learning_rate": 9.334327526589206e-07, |
| "loss": 0.8266, |
| "step": 7275 |
| }, |
| { |
| "epoch": 0.11333649886741343, |
| "grad_norm": 3.6634960174560547, |
| "learning_rate": 9.333508136543157e-07, |
| "loss": 0.6648, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.1134143398693828, |
| "grad_norm": 4.124035835266113, |
| "learning_rate": 9.332688746497107e-07, |
| "loss": 0.8151, |
| "step": 7285 |
| }, |
| { |
| "epoch": 0.11349218087135218, |
| "grad_norm": 5.273459434509277, |
| "learning_rate": 9.331869356451058e-07, |
| "loss": 0.7747, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.11357002187332155, |
| "grad_norm": 3.6614978313446045, |
| "learning_rate": 9.331049966405008e-07, |
| "loss": 0.8157, |
| "step": 7295 |
| }, |
| { |
| "epoch": 0.11364786287529093, |
| "grad_norm": 5.9759368896484375, |
| "learning_rate": 9.330230576358958e-07, |
| "loss": 0.7475, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.1137257038772603, |
| "grad_norm": 3.271934747695923, |
| "learning_rate": 9.329411186312909e-07, |
| "loss": 0.8176, |
| "step": 7305 |
| }, |
| { |
| "epoch": 0.11380354487922968, |
| "grad_norm": 6.224942207336426, |
| "learning_rate": 9.328591796266858e-07, |
| "loss": 0.7874, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.11388138588119906, |
| "grad_norm": 4.060842990875244, |
| "learning_rate": 9.327772406220808e-07, |
| "loss": 0.7104, |
| "step": 7315 |
| }, |
| { |
| "epoch": 0.11395922688316844, |
| "grad_norm": 3.273303985595703, |
| "learning_rate": 9.326953016174759e-07, |
| "loss": 0.9288, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.11403706788513782, |
| "grad_norm": 5.721134662628174, |
| "learning_rate": 9.32613362612871e-07, |
| "loss": 0.9447, |
| "step": 7325 |
| }, |
| { |
| "epoch": 0.1141149088871072, |
| "grad_norm": 3.942401885986328, |
| "learning_rate": 9.325314236082659e-07, |
| "loss": 0.6934, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.11419274988907657, |
| "grad_norm": 8.272555351257324, |
| "learning_rate": 9.32449484603661e-07, |
| "loss": 0.7917, |
| "step": 7335 |
| }, |
| { |
| "epoch": 0.11427059089104595, |
| "grad_norm": 9.704336166381836, |
| "learning_rate": 9.323675455990561e-07, |
| "loss": 0.816, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.11434843189301533, |
| "grad_norm": 3.2745420932769775, |
| "learning_rate": 9.322856065944512e-07, |
| "loss": 0.8432, |
| "step": 7345 |
| }, |
| { |
| "epoch": 0.1144262728949847, |
| "grad_norm": 4.300100803375244, |
| "learning_rate": 9.32203667589846e-07, |
| "loss": 0.8954, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.11450411389695409, |
| "grad_norm": 3.520085334777832, |
| "learning_rate": 9.321217285852411e-07, |
| "loss": 0.6849, |
| "step": 7355 |
| }, |
| { |
| "epoch": 0.11458195489892346, |
| "grad_norm": 3.1472392082214355, |
| "learning_rate": 9.320397895806362e-07, |
| "loss": 0.9081, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.11465979590089284, |
| "grad_norm": 5.217727184295654, |
| "learning_rate": 9.319578505760311e-07, |
| "loss": 0.9009, |
| "step": 7365 |
| }, |
| { |
| "epoch": 0.11473763690286222, |
| "grad_norm": 2.461811065673828, |
| "learning_rate": 9.318759115714262e-07, |
| "loss": 0.8018, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.1148154779048316, |
| "grad_norm": 4.464178562164307, |
| "learning_rate": 9.317939725668213e-07, |
| "loss": 0.7952, |
| "step": 7375 |
| }, |
| { |
| "epoch": 0.11489331890680096, |
| "grad_norm": 8.113191604614258, |
| "learning_rate": 9.317120335622163e-07, |
| "loss": 0.8338, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.11497115990877034, |
| "grad_norm": 3.7389204502105713, |
| "learning_rate": 9.316300945576113e-07, |
| "loss": 0.7343, |
| "step": 7385 |
| }, |
| { |
| "epoch": 0.11504900091073972, |
| "grad_norm": 3.955479145050049, |
| "learning_rate": 9.315481555530063e-07, |
| "loss": 0.8868, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.1151268419127091, |
| "grad_norm": 3.5472121238708496, |
| "learning_rate": 9.314662165484013e-07, |
| "loss": 0.7299, |
| "step": 7395 |
| }, |
| { |
| "epoch": 0.11520468291467847, |
| "grad_norm": 3.3682563304901123, |
| "learning_rate": 9.313842775437963e-07, |
| "loss": 0.8079, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.11528252391664785, |
| "grad_norm": 4.840713977813721, |
| "learning_rate": 9.313023385391914e-07, |
| "loss": 0.8041, |
| "step": 7405 |
| }, |
| { |
| "epoch": 0.11536036491861723, |
| "grad_norm": 6.368528366088867, |
| "learning_rate": 9.312203995345864e-07, |
| "loss": 0.8727, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.11543820592058661, |
| "grad_norm": 8.412065505981445, |
| "learning_rate": 9.311384605299815e-07, |
| "loss": 0.7993, |
| "step": 7415 |
| }, |
| { |
| "epoch": 0.11551604692255599, |
| "grad_norm": 4.714696884155273, |
| "learning_rate": 9.310565215253765e-07, |
| "loss": 0.7354, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.11559388792452537, |
| "grad_norm": 5.104187488555908, |
| "learning_rate": 9.309745825207715e-07, |
| "loss": 0.8301, |
| "step": 7425 |
| }, |
| { |
| "epoch": 0.11567172892649474, |
| "grad_norm": 2.960247039794922, |
| "learning_rate": 9.308926435161665e-07, |
| "loss": 0.7949, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.11574956992846412, |
| "grad_norm": 5.484702110290527, |
| "learning_rate": 9.308107045115615e-07, |
| "loss": 0.7747, |
| "step": 7435 |
| }, |
| { |
| "epoch": 0.1158274109304335, |
| "grad_norm": 3.192422866821289, |
| "learning_rate": 9.307287655069565e-07, |
| "loss": 0.8138, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.11590525193240288, |
| "grad_norm": 4.892508029937744, |
| "learning_rate": 9.306468265023516e-07, |
| "loss": 0.8059, |
| "step": 7445 |
| }, |
| { |
| "epoch": 0.11598309293437226, |
| "grad_norm": 3.488111972808838, |
| "learning_rate": 9.305648874977467e-07, |
| "loss": 0.7975, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.11606093393634163, |
| "grad_norm": 4.7030029296875, |
| "learning_rate": 9.304829484931416e-07, |
| "loss": 0.752, |
| "step": 7455 |
| }, |
| { |
| "epoch": 0.11613877493831101, |
| "grad_norm": 5.344095706939697, |
| "learning_rate": 9.304010094885367e-07, |
| "loss": 0.774, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.11621661594028038, |
| "grad_norm": 2.944584846496582, |
| "learning_rate": 9.303190704839318e-07, |
| "loss": 0.7948, |
| "step": 7465 |
| }, |
| { |
| "epoch": 0.11629445694224975, |
| "grad_norm": 2.8411998748779297, |
| "learning_rate": 9.302371314793266e-07, |
| "loss": 0.7698, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.11637229794421913, |
| "grad_norm": 6.780023097991943, |
| "learning_rate": 9.301551924747217e-07, |
| "loss": 0.8843, |
| "step": 7475 |
| }, |
| { |
| "epoch": 0.11645013894618851, |
| "grad_norm": 4.495726108551025, |
| "learning_rate": 9.300732534701168e-07, |
| "loss": 0.9019, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.11652797994815789, |
| "grad_norm": 4.61745023727417, |
| "learning_rate": 9.299913144655119e-07, |
| "loss": 0.8717, |
| "step": 7485 |
| }, |
| { |
| "epoch": 0.11660582095012727, |
| "grad_norm": 3.0337278842926025, |
| "learning_rate": 9.299093754609068e-07, |
| "loss": 0.791, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.11668366195209665, |
| "grad_norm": 6.76452112197876, |
| "learning_rate": 9.298274364563019e-07, |
| "loss": 0.7373, |
| "step": 7495 |
| }, |
| { |
| "epoch": 0.11676150295406602, |
| "grad_norm": 2.7412028312683105, |
| "learning_rate": 9.29745497451697e-07, |
| "loss": 0.7638, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.1168393439560354, |
| "grad_norm": 4.742910861968994, |
| "learning_rate": 9.29663558447092e-07, |
| "loss": 0.8951, |
| "step": 7505 |
| }, |
| { |
| "epoch": 0.11691718495800478, |
| "grad_norm": 2.368957042694092, |
| "learning_rate": 9.295816194424869e-07, |
| "loss": 0.7237, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.11699502595997416, |
| "grad_norm": 4.270255088806152, |
| "learning_rate": 9.29499680437882e-07, |
| "loss": 0.789, |
| "step": 7515 |
| }, |
| { |
| "epoch": 0.11707286696194354, |
| "grad_norm": 4.186617851257324, |
| "learning_rate": 9.29417741433277e-07, |
| "loss": 0.8655, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.11715070796391291, |
| "grad_norm": 2.4891741275787354, |
| "learning_rate": 9.29335802428672e-07, |
| "loss": 0.8008, |
| "step": 7525 |
| }, |
| { |
| "epoch": 0.11722854896588229, |
| "grad_norm": 3.5113582611083984, |
| "learning_rate": 9.292538634240671e-07, |
| "loss": 0.8465, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.11730638996785167, |
| "grad_norm": 4.365012168884277, |
| "learning_rate": 9.291719244194621e-07, |
| "loss": 0.8378, |
| "step": 7535 |
| }, |
| { |
| "epoch": 0.11738423096982105, |
| "grad_norm": 4.17802619934082, |
| "learning_rate": 9.290899854148572e-07, |
| "loss": 0.7605, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.11746207197179043, |
| "grad_norm": 3.046833038330078, |
| "learning_rate": 9.290080464102522e-07, |
| "loss": 0.6796, |
| "step": 7545 |
| }, |
| { |
| "epoch": 0.11753991297375979, |
| "grad_norm": 2.9827167987823486, |
| "learning_rate": 9.289261074056471e-07, |
| "loss": 0.7666, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.11761775397572917, |
| "grad_norm": 2.8555498123168945, |
| "learning_rate": 9.288441684010422e-07, |
| "loss": 0.8337, |
| "step": 7555 |
| }, |
| { |
| "epoch": 0.11769559497769855, |
| "grad_norm": 4.013402462005615, |
| "learning_rate": 9.287622293964373e-07, |
| "loss": 0.9353, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.11777343597966793, |
| "grad_norm": 6.579110622406006, |
| "learning_rate": 9.286802903918322e-07, |
| "loss": 0.7464, |
| "step": 7565 |
| }, |
| { |
| "epoch": 0.1178512769816373, |
| "grad_norm": 6.643786907196045, |
| "learning_rate": 9.285983513872273e-07, |
| "loss": 0.8852, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.11792911798360668, |
| "grad_norm": 3.837496042251587, |
| "learning_rate": 9.285164123826224e-07, |
| "loss": 0.7909, |
| "step": 7575 |
| }, |
| { |
| "epoch": 0.11800695898557606, |
| "grad_norm": 3.4954562187194824, |
| "learning_rate": 9.284344733780173e-07, |
| "loss": 0.8559, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.11808479998754544, |
| "grad_norm": 3.962890148162842, |
| "learning_rate": 9.283525343734124e-07, |
| "loss": 0.833, |
| "step": 7585 |
| }, |
| { |
| "epoch": 0.11816264098951482, |
| "grad_norm": 7.724937438964844, |
| "learning_rate": 9.282705953688074e-07, |
| "loss": 0.9339, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.1182404819914842, |
| "grad_norm": 5.779001235961914, |
| "learning_rate": 9.281886563642024e-07, |
| "loss": 0.6968, |
| "step": 7595 |
| }, |
| { |
| "epoch": 0.11831832299345357, |
| "grad_norm": 6.85791540145874, |
| "learning_rate": 9.281067173595974e-07, |
| "loss": 0.8279, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.11839616399542295, |
| "grad_norm": 3.720306158065796, |
| "learning_rate": 9.280247783549925e-07, |
| "loss": 0.7751, |
| "step": 7605 |
| }, |
| { |
| "epoch": 0.11847400499739233, |
| "grad_norm": 4.524914741516113, |
| "learning_rate": 9.279428393503876e-07, |
| "loss": 0.7227, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.11855184599936171, |
| "grad_norm": 3.6757309436798096, |
| "learning_rate": 9.278609003457825e-07, |
| "loss": 0.8816, |
| "step": 7615 |
| }, |
| { |
| "epoch": 0.11862968700133109, |
| "grad_norm": 3.6419453620910645, |
| "learning_rate": 9.277789613411776e-07, |
| "loss": 0.7741, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.11870752800330046, |
| "grad_norm": 3.0829155445098877, |
| "learning_rate": 9.276970223365727e-07, |
| "loss": 0.7712, |
| "step": 7625 |
| }, |
| { |
| "epoch": 0.11878536900526984, |
| "grad_norm": 6.524753570556641, |
| "learning_rate": 9.276150833319677e-07, |
| "loss": 0.8025, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.11886321000723922, |
| "grad_norm": 2.959907054901123, |
| "learning_rate": 9.275331443273626e-07, |
| "loss": 0.932, |
| "step": 7635 |
| }, |
| { |
| "epoch": 0.11894105100920858, |
| "grad_norm": 3.6245338916778564, |
| "learning_rate": 9.274512053227577e-07, |
| "loss": 0.6997, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.11901889201117796, |
| "grad_norm": 4.328200340270996, |
| "learning_rate": 9.273692663181527e-07, |
| "loss": 0.8786, |
| "step": 7645 |
| }, |
| { |
| "epoch": 0.11909673301314734, |
| "grad_norm": 4.048555374145508, |
| "learning_rate": 9.272873273135478e-07, |
| "loss": 0.6936, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.11917457401511672, |
| "grad_norm": 3.2797539234161377, |
| "learning_rate": 9.272053883089428e-07, |
| "loss": 0.7915, |
| "step": 7655 |
| }, |
| { |
| "epoch": 0.1192524150170861, |
| "grad_norm": 9.63089656829834, |
| "learning_rate": 9.271234493043378e-07, |
| "loss": 0.8417, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.11933025601905548, |
| "grad_norm": 4.521198272705078, |
| "learning_rate": 9.270415102997329e-07, |
| "loss": 0.8831, |
| "step": 7665 |
| }, |
| { |
| "epoch": 0.11940809702102485, |
| "grad_norm": 16.66404151916504, |
| "learning_rate": 9.269595712951279e-07, |
| "loss": 0.8582, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.11948593802299423, |
| "grad_norm": 8.25177001953125, |
| "learning_rate": 9.268776322905228e-07, |
| "loss": 0.818, |
| "step": 7675 |
| }, |
| { |
| "epoch": 0.11956377902496361, |
| "grad_norm": 3.3690059185028076, |
| "learning_rate": 9.267956932859179e-07, |
| "loss": 0.689, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.11964162002693299, |
| "grad_norm": 4.987194061279297, |
| "learning_rate": 9.26713754281313e-07, |
| "loss": 0.8771, |
| "step": 7685 |
| }, |
| { |
| "epoch": 0.11971946102890237, |
| "grad_norm": 5.207099914550781, |
| "learning_rate": 9.266318152767079e-07, |
| "loss": 0.898, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.11979730203087174, |
| "grad_norm": 4.396019458770752, |
| "learning_rate": 9.26549876272103e-07, |
| "loss": 0.7181, |
| "step": 7695 |
| }, |
| { |
| "epoch": 0.11987514303284112, |
| "grad_norm": 3.1832330226898193, |
| "learning_rate": 9.264679372674981e-07, |
| "loss": 0.8264, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.1199529840348105, |
| "grad_norm": 3.0821919441223145, |
| "learning_rate": 9.26385998262893e-07, |
| "loss": 0.8412, |
| "step": 7705 |
| }, |
| { |
| "epoch": 0.12003082503677988, |
| "grad_norm": 4.4039626121521, |
| "learning_rate": 9.263040592582881e-07, |
| "loss": 0.7891, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.12010866603874926, |
| "grad_norm": 3.0394608974456787, |
| "learning_rate": 9.262221202536831e-07, |
| "loss": 0.7594, |
| "step": 7715 |
| }, |
| { |
| "epoch": 0.12018650704071863, |
| "grad_norm": 3.430525779724121, |
| "learning_rate": 9.261401812490781e-07, |
| "loss": 0.9487, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.120264348042688, |
| "grad_norm": 3.6181464195251465, |
| "learning_rate": 9.260582422444731e-07, |
| "loss": 0.881, |
| "step": 7725 |
| }, |
| { |
| "epoch": 0.12034218904465738, |
| "grad_norm": 9.917842864990234, |
| "learning_rate": 9.259763032398682e-07, |
| "loss": 0.7738, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.12042003004662676, |
| "grad_norm": 5.7242255210876465, |
| "learning_rate": 9.258943642352633e-07, |
| "loss": 0.9353, |
| "step": 7735 |
| }, |
| { |
| "epoch": 0.12049787104859613, |
| "grad_norm": 5.354982852935791, |
| "learning_rate": 9.258124252306583e-07, |
| "loss": 0.7719, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.12057571205056551, |
| "grad_norm": 5.967920780181885, |
| "learning_rate": 9.257304862260533e-07, |
| "loss": 0.7304, |
| "step": 7745 |
| }, |
| { |
| "epoch": 0.12065355305253489, |
| "grad_norm": 3.245041847229004, |
| "learning_rate": 9.256485472214484e-07, |
| "loss": 0.6752, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.12073139405450427, |
| "grad_norm": 6.362682342529297, |
| "learning_rate": 9.255666082168433e-07, |
| "loss": 0.8159, |
| "step": 7755 |
| }, |
| { |
| "epoch": 0.12080923505647365, |
| "grad_norm": 5.759777069091797, |
| "learning_rate": 9.254846692122383e-07, |
| "loss": 0.6947, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.12088707605844302, |
| "grad_norm": 3.7885186672210693, |
| "learning_rate": 9.254027302076334e-07, |
| "loss": 0.8279, |
| "step": 7765 |
| }, |
| { |
| "epoch": 0.1209649170604124, |
| "grad_norm": 2.970975399017334, |
| "learning_rate": 9.253207912030284e-07, |
| "loss": 0.8366, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.12104275806238178, |
| "grad_norm": 3.9208946228027344, |
| "learning_rate": 9.252388521984235e-07, |
| "loss": 0.8122, |
| "step": 7775 |
| }, |
| { |
| "epoch": 0.12112059906435116, |
| "grad_norm": 3.411137580871582, |
| "learning_rate": 9.251569131938185e-07, |
| "loss": 0.736, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.12119844006632054, |
| "grad_norm": 10.479853630065918, |
| "learning_rate": 9.250749741892135e-07, |
| "loss": 0.9075, |
| "step": 7785 |
| }, |
| { |
| "epoch": 0.12127628106828992, |
| "grad_norm": 4.920512676239014, |
| "learning_rate": 9.249930351846086e-07, |
| "loss": 0.8346, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.1213541220702593, |
| "grad_norm": 5.350609302520752, |
| "learning_rate": 9.249110961800035e-07, |
| "loss": 0.8034, |
| "step": 7795 |
| }, |
| { |
| "epoch": 0.12143196307222867, |
| "grad_norm": 6.461511611938477, |
| "learning_rate": 9.248291571753985e-07, |
| "loss": 0.6603, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.12150980407419805, |
| "grad_norm": 4.6280837059021, |
| "learning_rate": 9.247472181707936e-07, |
| "loss": 0.801, |
| "step": 7805 |
| }, |
| { |
| "epoch": 0.12158764507616741, |
| "grad_norm": 8.22148323059082, |
| "learning_rate": 9.246652791661887e-07, |
| "loss": 0.8335, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.12166548607813679, |
| "grad_norm": 7.433074951171875, |
| "learning_rate": 9.245833401615836e-07, |
| "loss": 0.7171, |
| "step": 7815 |
| }, |
| { |
| "epoch": 0.12174332708010617, |
| "grad_norm": 5.692368507385254, |
| "learning_rate": 9.245014011569787e-07, |
| "loss": 0.8649, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.12182116808207555, |
| "grad_norm": 3.0104875564575195, |
| "learning_rate": 9.244194621523738e-07, |
| "loss": 0.6021, |
| "step": 7825 |
| }, |
| { |
| "epoch": 0.12189900908404493, |
| "grad_norm": 6.160313606262207, |
| "learning_rate": 9.243375231477688e-07, |
| "loss": 0.8652, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.1219768500860143, |
| "grad_norm": 4.373056411743164, |
| "learning_rate": 9.242555841431637e-07, |
| "loss": 0.7689, |
| "step": 7835 |
| }, |
| { |
| "epoch": 0.12205469108798368, |
| "grad_norm": 5.760363578796387, |
| "learning_rate": 9.241736451385588e-07, |
| "loss": 0.8888, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.12213253208995306, |
| "grad_norm": 4.07029914855957, |
| "learning_rate": 9.240917061339538e-07, |
| "loss": 0.7693, |
| "step": 7845 |
| }, |
| { |
| "epoch": 0.12221037309192244, |
| "grad_norm": 3.7920992374420166, |
| "learning_rate": 9.240097671293488e-07, |
| "loss": 0.8221, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.12228821409389182, |
| "grad_norm": 7.572727203369141, |
| "learning_rate": 9.239278281247439e-07, |
| "loss": 0.8255, |
| "step": 7855 |
| }, |
| { |
| "epoch": 0.1223660550958612, |
| "grad_norm": 5.047962188720703, |
| "learning_rate": 9.23845889120139e-07, |
| "loss": 0.8189, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.12244389609783057, |
| "grad_norm": 6.445250988006592, |
| "learning_rate": 9.23763950115534e-07, |
| "loss": 0.9137, |
| "step": 7865 |
| }, |
| { |
| "epoch": 0.12252173709979995, |
| "grad_norm": 2.9722390174865723, |
| "learning_rate": 9.23682011110929e-07, |
| "loss": 0.895, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.12259957810176933, |
| "grad_norm": 3.0519466400146484, |
| "learning_rate": 9.23600072106324e-07, |
| "loss": 0.8048, |
| "step": 7875 |
| }, |
| { |
| "epoch": 0.12267741910373871, |
| "grad_norm": 5.44154167175293, |
| "learning_rate": 9.23518133101719e-07, |
| "loss": 0.6992, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.12275526010570809, |
| "grad_norm": 11.307611465454102, |
| "learning_rate": 9.23436194097114e-07, |
| "loss": 0.8595, |
| "step": 7885 |
| }, |
| { |
| "epoch": 0.12283310110767746, |
| "grad_norm": 6.504110336303711, |
| "learning_rate": 9.233542550925091e-07, |
| "loss": 0.7882, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.12291094210964683, |
| "grad_norm": 2.5726773738861084, |
| "learning_rate": 9.232723160879041e-07, |
| "loss": 0.8535, |
| "step": 7895 |
| }, |
| { |
| "epoch": 0.1229887831116162, |
| "grad_norm": 3.1990294456481934, |
| "learning_rate": 9.231903770832992e-07, |
| "loss": 0.7941, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.12306662411358558, |
| "grad_norm": 3.614875555038452, |
| "learning_rate": 9.231084380786942e-07, |
| "loss": 0.6803, |
| "step": 7905 |
| }, |
| { |
| "epoch": 0.12314446511555496, |
| "grad_norm": 5.973575592041016, |
| "learning_rate": 9.230264990740892e-07, |
| "loss": 0.7994, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.12322230611752434, |
| "grad_norm": 3.558375358581543, |
| "learning_rate": 9.229445600694842e-07, |
| "loss": 0.8292, |
| "step": 7915 |
| }, |
| { |
| "epoch": 0.12330014711949372, |
| "grad_norm": 4.37818717956543, |
| "learning_rate": 9.228626210648793e-07, |
| "loss": 0.8512, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.1233779881214631, |
| "grad_norm": 7.052612781524658, |
| "learning_rate": 9.227806820602742e-07, |
| "loss": 0.7453, |
| "step": 7925 |
| }, |
| { |
| "epoch": 0.12345582912343248, |
| "grad_norm": 9.181270599365234, |
| "learning_rate": 9.226987430556693e-07, |
| "loss": 0.6847, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.12353367012540185, |
| "grad_norm": 4.499727249145508, |
| "learning_rate": 9.226168040510644e-07, |
| "loss": 0.7718, |
| "step": 7935 |
| }, |
| { |
| "epoch": 0.12361151112737123, |
| "grad_norm": 3.2490360736846924, |
| "learning_rate": 9.225348650464593e-07, |
| "loss": 0.7631, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.12368935212934061, |
| "grad_norm": 3.26198410987854, |
| "learning_rate": 9.224529260418544e-07, |
| "loss": 0.8257, |
| "step": 7945 |
| }, |
| { |
| "epoch": 0.12376719313130999, |
| "grad_norm": 7.192509174346924, |
| "learning_rate": 9.223709870372495e-07, |
| "loss": 0.809, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.12384503413327937, |
| "grad_norm": 3.1407008171081543, |
| "learning_rate": 9.222890480326445e-07, |
| "loss": 0.8444, |
| "step": 7955 |
| }, |
| { |
| "epoch": 0.12392287513524874, |
| "grad_norm": 4.12625789642334, |
| "learning_rate": 9.222071090280394e-07, |
| "loss": 0.8275, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.12400071613721812, |
| "grad_norm": 5.525479316711426, |
| "learning_rate": 9.221251700234345e-07, |
| "loss": 0.8999, |
| "step": 7965 |
| }, |
| { |
| "epoch": 0.1240785571391875, |
| "grad_norm": 6.302455902099609, |
| "learning_rate": 9.220432310188295e-07, |
| "loss": 0.7642, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.12415639814115688, |
| "grad_norm": 4.649979114532471, |
| "learning_rate": 9.219612920142245e-07, |
| "loss": 0.7002, |
| "step": 7975 |
| }, |
| { |
| "epoch": 0.12423423914312626, |
| "grad_norm": 5.463395595550537, |
| "learning_rate": 9.218793530096196e-07, |
| "loss": 0.8473, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.12431208014509562, |
| "grad_norm": 6.704756736755371, |
| "learning_rate": 9.217974140050147e-07, |
| "loss": 0.6967, |
| "step": 7985 |
| }, |
| { |
| "epoch": 0.124389921147065, |
| "grad_norm": 4.5808539390563965, |
| "learning_rate": 9.217154750004097e-07, |
| "loss": 0.7898, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.12446776214903438, |
| "grad_norm": 2.9757680892944336, |
| "learning_rate": 9.216335359958047e-07, |
| "loss": 0.698, |
| "step": 7995 |
| }, |
| { |
| "epoch": 0.12454560315100376, |
| "grad_norm": 5.5388617515563965, |
| "learning_rate": 9.215515969911997e-07, |
| "loss": 0.8798, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.12462344415297313, |
| "grad_norm": 5.424650192260742, |
| "learning_rate": 9.214696579865947e-07, |
| "loss": 0.814, |
| "step": 8005 |
| }, |
| { |
| "epoch": 0.12470128515494251, |
| "grad_norm": 4.0730109214782715, |
| "learning_rate": 9.213877189819898e-07, |
| "loss": 0.6547, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.12477912615691189, |
| "grad_norm": 5.436715126037598, |
| "learning_rate": 9.213057799773848e-07, |
| "loss": 0.776, |
| "step": 8015 |
| }, |
| { |
| "epoch": 0.12485696715888127, |
| "grad_norm": 3.6354191303253174, |
| "learning_rate": 9.212238409727798e-07, |
| "loss": 0.7196, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.12493480816085065, |
| "grad_norm": 3.5990068912506104, |
| "learning_rate": 9.211419019681749e-07, |
| "loss": 0.8079, |
| "step": 8025 |
| }, |
| { |
| "epoch": 0.12501264916282, |
| "grad_norm": 4.007763385772705, |
| "learning_rate": 9.2105996296357e-07, |
| "loss": 0.7711, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.1250904901647894, |
| "grad_norm": 4.223349571228027, |
| "learning_rate": 9.209780239589649e-07, |
| "loss": 0.8481, |
| "step": 8035 |
| }, |
| { |
| "epoch": 0.12516833116675877, |
| "grad_norm": 4.40108060836792, |
| "learning_rate": 9.208960849543599e-07, |
| "loss": 0.7981, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.12524617216872816, |
| "grad_norm": 2.7487142086029053, |
| "learning_rate": 9.20814145949755e-07, |
| "loss": 0.6693, |
| "step": 8045 |
| }, |
| { |
| "epoch": 0.12532401317069752, |
| "grad_norm": 3.568763017654419, |
| "learning_rate": 9.207322069451499e-07, |
| "loss": 0.7716, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.12540185417266692, |
| "grad_norm": 3.7271010875701904, |
| "learning_rate": 9.20650267940545e-07, |
| "loss": 0.7969, |
| "step": 8055 |
| }, |
| { |
| "epoch": 0.12547969517463628, |
| "grad_norm": 4.352176189422607, |
| "learning_rate": 9.205683289359401e-07, |
| "loss": 0.8513, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.12555753617660567, |
| "grad_norm": 3.4279236793518066, |
| "learning_rate": 9.20486389931335e-07, |
| "loss": 0.8315, |
| "step": 8065 |
| }, |
| { |
| "epoch": 0.12563537717857504, |
| "grad_norm": 5.192807197570801, |
| "learning_rate": 9.204044509267301e-07, |
| "loss": 0.6887, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.12571321818054443, |
| "grad_norm": 7.44572114944458, |
| "learning_rate": 9.203225119221252e-07, |
| "loss": 0.8179, |
| "step": 8075 |
| }, |
| { |
| "epoch": 0.1257910591825138, |
| "grad_norm": 2.792656183242798, |
| "learning_rate": 9.202405729175201e-07, |
| "loss": 0.8136, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.12586890018448318, |
| "grad_norm": 3.840090036392212, |
| "learning_rate": 9.201586339129151e-07, |
| "loss": 0.7216, |
| "step": 8085 |
| }, |
| { |
| "epoch": 0.12594674118645255, |
| "grad_norm": 6.609809398651123, |
| "learning_rate": 9.200766949083102e-07, |
| "loss": 0.8109, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.12602458218842194, |
| "grad_norm": 3.8064773082733154, |
| "learning_rate": 9.199947559037052e-07, |
| "loss": 0.7874, |
| "step": 8095 |
| }, |
| { |
| "epoch": 0.1261024231903913, |
| "grad_norm": 4.612837791442871, |
| "learning_rate": 9.199128168991003e-07, |
| "loss": 0.8989, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.1261802641923607, |
| "grad_norm": 3.837954044342041, |
| "learning_rate": 9.198308778944953e-07, |
| "loss": 0.7964, |
| "step": 8105 |
| }, |
| { |
| "epoch": 0.12625810519433006, |
| "grad_norm": 5.080657482147217, |
| "learning_rate": 9.197489388898904e-07, |
| "loss": 0.7835, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.12633594619629943, |
| "grad_norm": 5.589513301849365, |
| "learning_rate": 9.196669998852854e-07, |
| "loss": 0.8027, |
| "step": 8115 |
| }, |
| { |
| "epoch": 0.12641378719826882, |
| "grad_norm": 5.392527103424072, |
| "learning_rate": 9.195850608806803e-07, |
| "loss": 0.7721, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.12649162820023818, |
| "grad_norm": 8.648225784301758, |
| "learning_rate": 9.195031218760754e-07, |
| "loss": 0.8396, |
| "step": 8125 |
| }, |
| { |
| "epoch": 0.12656946920220757, |
| "grad_norm": 3.0926201343536377, |
| "learning_rate": 9.194211828714704e-07, |
| "loss": 0.8282, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.12664731020417694, |
| "grad_norm": 4.515932083129883, |
| "learning_rate": 9.193392438668655e-07, |
| "loss": 0.8238, |
| "step": 8135 |
| }, |
| { |
| "epoch": 0.12672515120614633, |
| "grad_norm": 6.131096839904785, |
| "learning_rate": 9.192573048622605e-07, |
| "loss": 0.8638, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.1268029922081157, |
| "grad_norm": 3.634267807006836, |
| "learning_rate": 9.191753658576555e-07, |
| "loss": 0.716, |
| "step": 8145 |
| }, |
| { |
| "epoch": 0.1268808332100851, |
| "grad_norm": 4.676586627960205, |
| "learning_rate": 9.190934268530506e-07, |
| "loss": 0.8273, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.12695867421205445, |
| "grad_norm": 3.584019422531128, |
| "learning_rate": 9.190114878484457e-07, |
| "loss": 0.805, |
| "step": 8155 |
| }, |
| { |
| "epoch": 0.12703651521402384, |
| "grad_norm": 3.7464358806610107, |
| "learning_rate": 9.189295488438405e-07, |
| "loss": 0.7724, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.1271143562159932, |
| "grad_norm": 3.974726438522339, |
| "learning_rate": 9.188476098392356e-07, |
| "loss": 0.807, |
| "step": 8165 |
| }, |
| { |
| "epoch": 0.1271921972179626, |
| "grad_norm": 5.144652843475342, |
| "learning_rate": 9.187656708346307e-07, |
| "loss": 0.8721, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.12727003821993196, |
| "grad_norm": 7.679945945739746, |
| "learning_rate": 9.186837318300256e-07, |
| "loss": 0.7945, |
| "step": 8175 |
| }, |
| { |
| "epoch": 0.12734787922190136, |
| "grad_norm": 5.530436992645264, |
| "learning_rate": 9.186017928254207e-07, |
| "loss": 0.9354, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.12742572022387072, |
| "grad_norm": 3.981515884399414, |
| "learning_rate": 9.185198538208158e-07, |
| "loss": 0.6024, |
| "step": 8185 |
| }, |
| { |
| "epoch": 0.1275035612258401, |
| "grad_norm": 3.5425384044647217, |
| "learning_rate": 9.184379148162108e-07, |
| "loss": 0.8061, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.12758140222780948, |
| "grad_norm": 3.2570059299468994, |
| "learning_rate": 9.183559758116058e-07, |
| "loss": 0.7803, |
| "step": 8195 |
| }, |
| { |
| "epoch": 0.12765924322977884, |
| "grad_norm": 3.267265558242798, |
| "learning_rate": 9.182740368070008e-07, |
| "loss": 0.7927, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.12773708423174823, |
| "grad_norm": 3.4302942752838135, |
| "learning_rate": 9.181920978023958e-07, |
| "loss": 0.8104, |
| "step": 8205 |
| }, |
| { |
| "epoch": 0.1278149252337176, |
| "grad_norm": 4.691220760345459, |
| "learning_rate": 9.181101587977908e-07, |
| "loss": 0.7826, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.127892766235687, |
| "grad_norm": 4.321291446685791, |
| "learning_rate": 9.180282197931859e-07, |
| "loss": 0.8503, |
| "step": 8215 |
| }, |
| { |
| "epoch": 0.12797060723765635, |
| "grad_norm": 6.356113433837891, |
| "learning_rate": 9.179462807885809e-07, |
| "loss": 0.8026, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.12804844823962575, |
| "grad_norm": 6.146854400634766, |
| "learning_rate": 9.17864341783976e-07, |
| "loss": 0.7779, |
| "step": 8225 |
| }, |
| { |
| "epoch": 0.1281262892415951, |
| "grad_norm": 4.845507621765137, |
| "learning_rate": 9.17782402779371e-07, |
| "loss": 0.855, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.1282041302435645, |
| "grad_norm": 4.020603179931641, |
| "learning_rate": 9.177004637747661e-07, |
| "loss": 0.8117, |
| "step": 8235 |
| }, |
| { |
| "epoch": 0.12828197124553387, |
| "grad_norm": 3.1681554317474365, |
| "learning_rate": 9.17618524770161e-07, |
| "loss": 1.0093, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.12835981224750326, |
| "grad_norm": 12.957433700561523, |
| "learning_rate": 9.17536585765556e-07, |
| "loss": 0.8047, |
| "step": 8245 |
| }, |
| { |
| "epoch": 0.12843765324947262, |
| "grad_norm": 3.9429116249084473, |
| "learning_rate": 9.174546467609511e-07, |
| "loss": 0.7415, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.12851549425144201, |
| "grad_norm": 5.232780933380127, |
| "learning_rate": 9.173727077563461e-07, |
| "loss": 0.9999, |
| "step": 8255 |
| }, |
| { |
| "epoch": 0.12859333525341138, |
| "grad_norm": 5.215351104736328, |
| "learning_rate": 9.172907687517412e-07, |
| "loss": 0.7994, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.12867117625538077, |
| "grad_norm": 3.917405366897583, |
| "learning_rate": 9.172088297471362e-07, |
| "loss": 0.7882, |
| "step": 8265 |
| }, |
| { |
| "epoch": 0.12874901725735013, |
| "grad_norm": 5.005404949188232, |
| "learning_rate": 9.171268907425312e-07, |
| "loss": 0.8112, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.12882685825931953, |
| "grad_norm": 3.008211851119995, |
| "learning_rate": 9.170449517379263e-07, |
| "loss": 0.8249, |
| "step": 8275 |
| }, |
| { |
| "epoch": 0.1289046992612889, |
| "grad_norm": 9.127318382263184, |
| "learning_rate": 9.169630127333214e-07, |
| "loss": 0.8224, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.12898254026325826, |
| "grad_norm": 4.283410549163818, |
| "learning_rate": 9.168810737287162e-07, |
| "loss": 0.8319, |
| "step": 8285 |
| }, |
| { |
| "epoch": 0.12906038126522765, |
| "grad_norm": 9.957722663879395, |
| "learning_rate": 9.167991347241113e-07, |
| "loss": 0.7951, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.129138222267197, |
| "grad_norm": 3.6089370250701904, |
| "learning_rate": 9.167171957195064e-07, |
| "loss": 0.7409, |
| "step": 8295 |
| }, |
| { |
| "epoch": 0.1292160632691664, |
| "grad_norm": 6.038301467895508, |
| "learning_rate": 9.166352567149013e-07, |
| "loss": 0.802, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.12929390427113577, |
| "grad_norm": 5.123950481414795, |
| "learning_rate": 9.165533177102964e-07, |
| "loss": 0.723, |
| "step": 8305 |
| }, |
| { |
| "epoch": 0.12937174527310516, |
| "grad_norm": 3.86824369430542, |
| "learning_rate": 9.164713787056915e-07, |
| "loss": 0.811, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.12944958627507452, |
| "grad_norm": 4.312297821044922, |
| "learning_rate": 9.163894397010865e-07, |
| "loss": 0.7, |
| "step": 8315 |
| }, |
| { |
| "epoch": 0.12952742727704392, |
| "grad_norm": 2.920485258102417, |
| "learning_rate": 9.163075006964815e-07, |
| "loss": 0.8012, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.12960526827901328, |
| "grad_norm": 3.8033828735351562, |
| "learning_rate": 9.162255616918765e-07, |
| "loss": 0.8236, |
| "step": 8325 |
| }, |
| { |
| "epoch": 0.12968310928098267, |
| "grad_norm": 7.963630199432373, |
| "learning_rate": 9.161436226872715e-07, |
| "loss": 0.8041, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.12976095028295204, |
| "grad_norm": 3.964761734008789, |
| "learning_rate": 9.160616836826666e-07, |
| "loss": 0.8897, |
| "step": 8335 |
| }, |
| { |
| "epoch": 0.12983879128492143, |
| "grad_norm": 3.360156297683716, |
| "learning_rate": 9.159797446780616e-07, |
| "loss": 0.7087, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.1299166322868908, |
| "grad_norm": 4.731776237487793, |
| "learning_rate": 9.158978056734567e-07, |
| "loss": 0.7926, |
| "step": 8345 |
| }, |
| { |
| "epoch": 0.12999447328886019, |
| "grad_norm": 3.076554775238037, |
| "learning_rate": 9.158158666688517e-07, |
| "loss": 0.7662, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.13007231429082955, |
| "grad_norm": 3.2537529468536377, |
| "learning_rate": 9.157339276642467e-07, |
| "loss": 0.8568, |
| "step": 8355 |
| }, |
| { |
| "epoch": 0.13015015529279894, |
| "grad_norm": 4.160289287567139, |
| "learning_rate": 9.156519886596418e-07, |
| "loss": 0.7748, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.1302279962947683, |
| "grad_norm": 3.388763666152954, |
| "learning_rate": 9.155700496550367e-07, |
| "loss": 0.7411, |
| "step": 8365 |
| }, |
| { |
| "epoch": 0.13030583729673767, |
| "grad_norm": 3.7193074226379395, |
| "learning_rate": 9.154881106504318e-07, |
| "loss": 0.7553, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.13038367829870706, |
| "grad_norm": 4.124868392944336, |
| "learning_rate": 9.154061716458268e-07, |
| "loss": 0.8061, |
| "step": 8375 |
| }, |
| { |
| "epoch": 0.13046151930067643, |
| "grad_norm": 3.1243176460266113, |
| "learning_rate": 9.153242326412218e-07, |
| "loss": 0.7673, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.13053936030264582, |
| "grad_norm": 3.175187587738037, |
| "learning_rate": 9.152422936366169e-07, |
| "loss": 0.636, |
| "step": 8385 |
| }, |
| { |
| "epoch": 0.13061720130461518, |
| "grad_norm": 3.486941337585449, |
| "learning_rate": 9.15160354632012e-07, |
| "loss": 0.768, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.13069504230658457, |
| "grad_norm": 3.408848762512207, |
| "learning_rate": 9.150784156274069e-07, |
| "loss": 0.8326, |
| "step": 8395 |
| }, |
| { |
| "epoch": 0.13077288330855394, |
| "grad_norm": 5.37129545211792, |
| "learning_rate": 9.14996476622802e-07, |
| "loss": 0.7712, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.13085072431052333, |
| "grad_norm": 2.642165422439575, |
| "learning_rate": 9.14914537618197e-07, |
| "loss": 0.8611, |
| "step": 8405 |
| }, |
| { |
| "epoch": 0.1309285653124927, |
| "grad_norm": 3.8213489055633545, |
| "learning_rate": 9.148325986135919e-07, |
| "loss": 0.7938, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.1310064063144621, |
| "grad_norm": 3.893542528152466, |
| "learning_rate": 9.14750659608987e-07, |
| "loss": 0.7761, |
| "step": 8415 |
| }, |
| { |
| "epoch": 0.13108424731643145, |
| "grad_norm": 3.6185567378997803, |
| "learning_rate": 9.146687206043821e-07, |
| "loss": 0.776, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.13116208831840084, |
| "grad_norm": 7.76255989074707, |
| "learning_rate": 9.14586781599777e-07, |
| "loss": 0.8723, |
| "step": 8425 |
| }, |
| { |
| "epoch": 0.1312399293203702, |
| "grad_norm": 3.2361936569213867, |
| "learning_rate": 9.145048425951721e-07, |
| "loss": 0.7151, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.1313177703223396, |
| "grad_norm": 7.259923458099365, |
| "learning_rate": 9.144229035905672e-07, |
| "loss": 0.7714, |
| "step": 8435 |
| }, |
| { |
| "epoch": 0.13139561132430896, |
| "grad_norm": 8.949355125427246, |
| "learning_rate": 9.143409645859622e-07, |
| "loss": 0.8829, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.13147345232627836, |
| "grad_norm": 5.755862712860107, |
| "learning_rate": 9.142590255813571e-07, |
| "loss": 0.8672, |
| "step": 8445 |
| }, |
| { |
| "epoch": 0.13155129332824772, |
| "grad_norm": 4.543202877044678, |
| "learning_rate": 9.141770865767522e-07, |
| "loss": 0.8165, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.1316291343302171, |
| "grad_norm": 3.6541123390197754, |
| "learning_rate": 9.140951475721472e-07, |
| "loss": 0.8017, |
| "step": 8455 |
| }, |
| { |
| "epoch": 0.13170697533218648, |
| "grad_norm": 3.5702321529388428, |
| "learning_rate": 9.140132085675423e-07, |
| "loss": 0.8063, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.13178481633415584, |
| "grad_norm": 3.831411361694336, |
| "learning_rate": 9.139312695629373e-07, |
| "loss": 0.832, |
| "step": 8465 |
| }, |
| { |
| "epoch": 0.13186265733612523, |
| "grad_norm": 3.0359880924224854, |
| "learning_rate": 9.138493305583324e-07, |
| "loss": 0.6776, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.1319404983380946, |
| "grad_norm": 4.264082908630371, |
| "learning_rate": 9.137673915537274e-07, |
| "loss": 0.6582, |
| "step": 8475 |
| }, |
| { |
| "epoch": 0.132018339340064, |
| "grad_norm": 4.2727508544921875, |
| "learning_rate": 9.136854525491225e-07, |
| "loss": 0.7432, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.13209618034203335, |
| "grad_norm": 7.4130539894104, |
| "learning_rate": 9.136035135445174e-07, |
| "loss": 0.7692, |
| "step": 8485 |
| }, |
| { |
| "epoch": 0.13217402134400275, |
| "grad_norm": 5.631756782531738, |
| "learning_rate": 9.135215745399124e-07, |
| "loss": 0.7297, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.1322518623459721, |
| "grad_norm": 4.975935459136963, |
| "learning_rate": 9.134396355353075e-07, |
| "loss": 0.8036, |
| "step": 8495 |
| }, |
| { |
| "epoch": 0.1323297033479415, |
| "grad_norm": 3.5903608798980713, |
| "learning_rate": 9.133576965307025e-07, |
| "loss": 0.8833, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.13240754434991087, |
| "grad_norm": 4.563084602355957, |
| "learning_rate": 9.132757575260975e-07, |
| "loss": 0.7386, |
| "step": 8505 |
| }, |
| { |
| "epoch": 0.13248538535188026, |
| "grad_norm": 5.170467853546143, |
| "learning_rate": 9.131938185214926e-07, |
| "loss": 0.8154, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.13256322635384962, |
| "grad_norm": 6.0783233642578125, |
| "learning_rate": 9.131118795168877e-07, |
| "loss": 0.8525, |
| "step": 8515 |
| }, |
| { |
| "epoch": 0.13264106735581901, |
| "grad_norm": 3.2167932987213135, |
| "learning_rate": 9.130299405122826e-07, |
| "loss": 0.7605, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.13271890835778838, |
| "grad_norm": 4.35626745223999, |
| "learning_rate": 9.129480015076776e-07, |
| "loss": 0.7577, |
| "step": 8525 |
| }, |
| { |
| "epoch": 0.13279674935975777, |
| "grad_norm": 6.491021633148193, |
| "learning_rate": 9.128660625030727e-07, |
| "loss": 0.8463, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.13287459036172714, |
| "grad_norm": 3.174940347671509, |
| "learning_rate": 9.127841234984676e-07, |
| "loss": 0.7382, |
| "step": 8535 |
| }, |
| { |
| "epoch": 0.13295243136369653, |
| "grad_norm": 4.856359004974365, |
| "learning_rate": 9.127021844938627e-07, |
| "loss": 0.7025, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.1330302723656659, |
| "grad_norm": 3.6859755516052246, |
| "learning_rate": 9.126202454892578e-07, |
| "loss": 0.642, |
| "step": 8545 |
| }, |
| { |
| "epoch": 0.13310811336763526, |
| "grad_norm": 3.6811821460723877, |
| "learning_rate": 9.125383064846528e-07, |
| "loss": 0.828, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.13318595436960465, |
| "grad_norm": 7.908188819885254, |
| "learning_rate": 9.124563674800478e-07, |
| "loss": 0.8043, |
| "step": 8555 |
| }, |
| { |
| "epoch": 0.133263795371574, |
| "grad_norm": 3.508521795272827, |
| "learning_rate": 9.123744284754429e-07, |
| "loss": 0.8052, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.1333416363735434, |
| "grad_norm": 4.004594326019287, |
| "learning_rate": 9.122924894708378e-07, |
| "loss": 0.8126, |
| "step": 8565 |
| }, |
| { |
| "epoch": 0.13341947737551277, |
| "grad_norm": 4.229026794433594, |
| "learning_rate": 9.122105504662328e-07, |
| "loss": 0.7793, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.13349731837748216, |
| "grad_norm": 5.319640159606934, |
| "learning_rate": 9.121286114616279e-07, |
| "loss": 0.765, |
| "step": 8575 |
| }, |
| { |
| "epoch": 0.13357515937945152, |
| "grad_norm": 3.8783299922943115, |
| "learning_rate": 9.120466724570229e-07, |
| "loss": 0.7036, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.13365300038142092, |
| "grad_norm": 5.243676662445068, |
| "learning_rate": 9.11964733452418e-07, |
| "loss": 0.8525, |
| "step": 8585 |
| }, |
| { |
| "epoch": 0.13373084138339028, |
| "grad_norm": 6.118826866149902, |
| "learning_rate": 9.11882794447813e-07, |
| "loss": 0.8391, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.13380868238535967, |
| "grad_norm": 2.1617751121520996, |
| "learning_rate": 9.118008554432081e-07, |
| "loss": 0.7353, |
| "step": 8595 |
| }, |
| { |
| "epoch": 0.13388652338732904, |
| "grad_norm": 7.590507507324219, |
| "learning_rate": 9.117189164386031e-07, |
| "loss": 0.8919, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.13396436438929843, |
| "grad_norm": 3.345130443572998, |
| "learning_rate": 9.116369774339982e-07, |
| "loss": 0.7615, |
| "step": 8605 |
| }, |
| { |
| "epoch": 0.1340422053912678, |
| "grad_norm": 3.144171953201294, |
| "learning_rate": 9.115550384293931e-07, |
| "loss": 0.7714, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.13412004639323719, |
| "grad_norm": 5.711132049560547, |
| "learning_rate": 9.114730994247881e-07, |
| "loss": 0.9119, |
| "step": 8615 |
| }, |
| { |
| "epoch": 0.13419788739520655, |
| "grad_norm": 3.997664451599121, |
| "learning_rate": 9.113911604201832e-07, |
| "loss": 0.7821, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.13427572839717594, |
| "grad_norm": 5.540621757507324, |
| "learning_rate": 9.113092214155782e-07, |
| "loss": 0.8796, |
| "step": 8625 |
| }, |
| { |
| "epoch": 0.1343535693991453, |
| "grad_norm": 4.296466827392578, |
| "learning_rate": 9.112272824109732e-07, |
| "loss": 0.839, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.13443141040111467, |
| "grad_norm": 5.527231693267822, |
| "learning_rate": 9.111453434063683e-07, |
| "loss": 0.7966, |
| "step": 8635 |
| }, |
| { |
| "epoch": 0.13450925140308406, |
| "grad_norm": 4.798453330993652, |
| "learning_rate": 9.110634044017634e-07, |
| "loss": 0.8808, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.13458709240505343, |
| "grad_norm": 4.989645957946777, |
| "learning_rate": 9.109814653971583e-07, |
| "loss": 0.7845, |
| "step": 8645 |
| }, |
| { |
| "epoch": 0.13466493340702282, |
| "grad_norm": 5.687048435211182, |
| "learning_rate": 9.108995263925533e-07, |
| "loss": 0.8627, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.13474277440899218, |
| "grad_norm": 3.1098756790161133, |
| "learning_rate": 9.108175873879484e-07, |
| "loss": 0.7594, |
| "step": 8655 |
| }, |
| { |
| "epoch": 0.13482061541096158, |
| "grad_norm": 8.40995979309082, |
| "learning_rate": 9.107356483833433e-07, |
| "loss": 0.7599, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.13489845641293094, |
| "grad_norm": 6.420483589172363, |
| "learning_rate": 9.106537093787384e-07, |
| "loss": 0.8199, |
| "step": 8665 |
| }, |
| { |
| "epoch": 0.13497629741490033, |
| "grad_norm": 5.545563697814941, |
| "learning_rate": 9.105717703741335e-07, |
| "loss": 0.7403, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.1350541384168697, |
| "grad_norm": 8.34080696105957, |
| "learning_rate": 9.104898313695285e-07, |
| "loss": 0.7979, |
| "step": 8675 |
| }, |
| { |
| "epoch": 0.1351319794188391, |
| "grad_norm": 6.4696550369262695, |
| "learning_rate": 9.104078923649235e-07, |
| "loss": 0.6935, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.13520982042080845, |
| "grad_norm": 7.9759521484375, |
| "learning_rate": 9.103259533603186e-07, |
| "loss": 0.7174, |
| "step": 8685 |
| }, |
| { |
| "epoch": 0.13528766142277784, |
| "grad_norm": 4.094228267669678, |
| "learning_rate": 9.102440143557135e-07, |
| "loss": 0.9191, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.1353655024247472, |
| "grad_norm": 3.2784035205841064, |
| "learning_rate": 9.101620753511086e-07, |
| "loss": 0.8157, |
| "step": 8695 |
| }, |
| { |
| "epoch": 0.1354433434267166, |
| "grad_norm": 3.680067300796509, |
| "learning_rate": 9.100801363465036e-07, |
| "loss": 0.8478, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.13552118442868596, |
| "grad_norm": 3.991107225418091, |
| "learning_rate": 9.099981973418986e-07, |
| "loss": 0.9003, |
| "step": 8705 |
| }, |
| { |
| "epoch": 0.13559902543065536, |
| "grad_norm": 2.9558584690093994, |
| "learning_rate": 9.099162583372937e-07, |
| "loss": 0.7571, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.13567686643262472, |
| "grad_norm": 2.952221632003784, |
| "learning_rate": 9.098343193326887e-07, |
| "loss": 0.8597, |
| "step": 8715 |
| }, |
| { |
| "epoch": 0.13575470743459409, |
| "grad_norm": 8.548612594604492, |
| "learning_rate": 9.097523803280838e-07, |
| "loss": 0.7225, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.13583254843656348, |
| "grad_norm": 9.161630630493164, |
| "learning_rate": 9.096704413234788e-07, |
| "loss": 0.6646, |
| "step": 8725 |
| }, |
| { |
| "epoch": 0.13591038943853284, |
| "grad_norm": 4.948508262634277, |
| "learning_rate": 9.095885023188738e-07, |
| "loss": 0.6339, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.13598823044050223, |
| "grad_norm": 6.353402614593506, |
| "learning_rate": 9.095065633142688e-07, |
| "loss": 0.7525, |
| "step": 8735 |
| }, |
| { |
| "epoch": 0.1360660714424716, |
| "grad_norm": 2.8166439533233643, |
| "learning_rate": 9.094246243096638e-07, |
| "loss": 0.7942, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.136143912444441, |
| "grad_norm": 3.2891948223114014, |
| "learning_rate": 9.093426853050589e-07, |
| "loss": 0.7832, |
| "step": 8745 |
| }, |
| { |
| "epoch": 0.13622175344641035, |
| "grad_norm": 6.029998779296875, |
| "learning_rate": 9.09260746300454e-07, |
| "loss": 0.9606, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.13629959444837975, |
| "grad_norm": 3.9812381267547607, |
| "learning_rate": 9.091788072958489e-07, |
| "loss": 0.8119, |
| "step": 8755 |
| }, |
| { |
| "epoch": 0.1363774354503491, |
| "grad_norm": 2.964101791381836, |
| "learning_rate": 9.09096868291244e-07, |
| "loss": 0.7792, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.1364552764523185, |
| "grad_norm": 5.025110244750977, |
| "learning_rate": 9.090149292866391e-07, |
| "loss": 0.9316, |
| "step": 8765 |
| }, |
| { |
| "epoch": 0.13653311745428787, |
| "grad_norm": 8.131609916687012, |
| "learning_rate": 9.089329902820339e-07, |
| "loss": 0.8358, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.13661095845625726, |
| "grad_norm": 6.380354881286621, |
| "learning_rate": 9.08851051277429e-07, |
| "loss": 0.8002, |
| "step": 8775 |
| }, |
| { |
| "epoch": 0.13668879945822662, |
| "grad_norm": 3.922022581100464, |
| "learning_rate": 9.087691122728241e-07, |
| "loss": 0.7736, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.13676664046019602, |
| "grad_norm": 7.275602340698242, |
| "learning_rate": 9.08687173268219e-07, |
| "loss": 0.8083, |
| "step": 8785 |
| }, |
| { |
| "epoch": 0.13684448146216538, |
| "grad_norm": 4.583987236022949, |
| "learning_rate": 9.086052342636141e-07, |
| "loss": 0.7587, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.13692232246413477, |
| "grad_norm": 2.945908784866333, |
| "learning_rate": 9.085232952590092e-07, |
| "loss": 0.7746, |
| "step": 8795 |
| }, |
| { |
| "epoch": 0.13700016346610414, |
| "grad_norm": 3.0301320552825928, |
| "learning_rate": 9.084413562544042e-07, |
| "loss": 0.7701, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.1370780044680735, |
| "grad_norm": 2.6927056312561035, |
| "learning_rate": 9.083594172497992e-07, |
| "loss": 0.6471, |
| "step": 8805 |
| }, |
| { |
| "epoch": 0.1371558454700429, |
| "grad_norm": 3.8082404136657715, |
| "learning_rate": 9.082774782451942e-07, |
| "loss": 0.8069, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.13723368647201226, |
| "grad_norm": 3.635481595993042, |
| "learning_rate": 9.081955392405892e-07, |
| "loss": 0.7559, |
| "step": 8815 |
| }, |
| { |
| "epoch": 0.13731152747398165, |
| "grad_norm": 3.12910795211792, |
| "learning_rate": 9.081136002359843e-07, |
| "loss": 0.7618, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.137389368475951, |
| "grad_norm": 4.329128742218018, |
| "learning_rate": 9.080316612313793e-07, |
| "loss": 0.7491, |
| "step": 8825 |
| }, |
| { |
| "epoch": 0.1374672094779204, |
| "grad_norm": 3.8912899494171143, |
| "learning_rate": 9.079497222267743e-07, |
| "loss": 0.8315, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.13754505047988977, |
| "grad_norm": 5.182728290557861, |
| "learning_rate": 9.078677832221694e-07, |
| "loss": 0.8504, |
| "step": 8835 |
| }, |
| { |
| "epoch": 0.13762289148185916, |
| "grad_norm": 2.6342267990112305, |
| "learning_rate": 9.077858442175645e-07, |
| "loss": 0.8111, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.13770073248382853, |
| "grad_norm": 5.059619903564453, |
| "learning_rate": 9.077039052129595e-07, |
| "loss": 0.9168, |
| "step": 8845 |
| }, |
| { |
| "epoch": 0.13777857348579792, |
| "grad_norm": 3.4851198196411133, |
| "learning_rate": 9.076219662083544e-07, |
| "loss": 0.8299, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.13785641448776728, |
| "grad_norm": 8.638031959533691, |
| "learning_rate": 9.075400272037495e-07, |
| "loss": 0.6855, |
| "step": 8855 |
| }, |
| { |
| "epoch": 0.13793425548973667, |
| "grad_norm": 4.0812811851501465, |
| "learning_rate": 9.074580881991445e-07, |
| "loss": 0.7596, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.13801209649170604, |
| "grad_norm": 3.0092952251434326, |
| "learning_rate": 9.073761491945395e-07, |
| "loss": 0.7839, |
| "step": 8865 |
| }, |
| { |
| "epoch": 0.13808993749367543, |
| "grad_norm": 3.5875368118286133, |
| "learning_rate": 9.072942101899346e-07, |
| "loss": 0.8173, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.1381677784956448, |
| "grad_norm": 6.807621955871582, |
| "learning_rate": 9.072122711853297e-07, |
| "loss": 0.8204, |
| "step": 8875 |
| }, |
| { |
| "epoch": 0.13824561949761419, |
| "grad_norm": 5.541608810424805, |
| "learning_rate": 9.071303321807246e-07, |
| "loss": 0.7516, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.13832346049958355, |
| "grad_norm": 4.05478048324585, |
| "learning_rate": 9.070483931761197e-07, |
| "loss": 0.841, |
| "step": 8885 |
| }, |
| { |
| "epoch": 0.13840130150155291, |
| "grad_norm": 3.128432512283325, |
| "learning_rate": 9.069664541715148e-07, |
| "loss": 0.694, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.1384791425035223, |
| "grad_norm": 4.354421138763428, |
| "learning_rate": 9.068845151669096e-07, |
| "loss": 0.885, |
| "step": 8895 |
| }, |
| { |
| "epoch": 0.13855698350549167, |
| "grad_norm": 4.6781134605407715, |
| "learning_rate": 9.068025761623047e-07, |
| "loss": 0.9041, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.13863482450746106, |
| "grad_norm": 12.059392929077148, |
| "learning_rate": 9.067206371576998e-07, |
| "loss": 0.7997, |
| "step": 8905 |
| }, |
| { |
| "epoch": 0.13871266550943043, |
| "grad_norm": 2.994907855987549, |
| "learning_rate": 9.066386981530948e-07, |
| "loss": 0.7988, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.13879050651139982, |
| "grad_norm": 4.420156478881836, |
| "learning_rate": 9.065567591484898e-07, |
| "loss": 0.8653, |
| "step": 8915 |
| }, |
| { |
| "epoch": 0.13886834751336918, |
| "grad_norm": 8.454998016357422, |
| "learning_rate": 9.064748201438849e-07, |
| "loss": 0.8991, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.13894618851533858, |
| "grad_norm": 3.3839731216430664, |
| "learning_rate": 9.063928811392799e-07, |
| "loss": 0.7173, |
| "step": 8925 |
| }, |
| { |
| "epoch": 0.13902402951730794, |
| "grad_norm": 5.453253746032715, |
| "learning_rate": 9.06310942134675e-07, |
| "loss": 0.7141, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.13910187051927733, |
| "grad_norm": 9.229926109313965, |
| "learning_rate": 9.062290031300699e-07, |
| "loss": 0.7292, |
| "step": 8935 |
| }, |
| { |
| "epoch": 0.1391797115212467, |
| "grad_norm": 5.860846042633057, |
| "learning_rate": 9.061470641254649e-07, |
| "loss": 0.8563, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.1392575525232161, |
| "grad_norm": 4.182551383972168, |
| "learning_rate": 9.0606512512086e-07, |
| "loss": 0.7552, |
| "step": 8945 |
| }, |
| { |
| "epoch": 0.13933539352518545, |
| "grad_norm": 3.165614604949951, |
| "learning_rate": 9.05983186116255e-07, |
| "loss": 0.7887, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.13941323452715484, |
| "grad_norm": 3.805906057357788, |
| "learning_rate": 9.0590124711165e-07, |
| "loss": 0.8226, |
| "step": 8955 |
| }, |
| { |
| "epoch": 0.1394910755291242, |
| "grad_norm": 4.190842151641846, |
| "learning_rate": 9.058193081070451e-07, |
| "loss": 0.8951, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.1395689165310936, |
| "grad_norm": 3.0468575954437256, |
| "learning_rate": 9.057373691024402e-07, |
| "loss": 0.7346, |
| "step": 8965 |
| }, |
| { |
| "epoch": 0.13964675753306297, |
| "grad_norm": 4.917840003967285, |
| "learning_rate": 9.056554300978352e-07, |
| "loss": 0.8478, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.13972459853503233, |
| "grad_norm": 3.5989246368408203, |
| "learning_rate": 9.055734910932301e-07, |
| "loss": 0.702, |
| "step": 8975 |
| }, |
| { |
| "epoch": 0.13980243953700172, |
| "grad_norm": 3.706799030303955, |
| "learning_rate": 9.054915520886252e-07, |
| "loss": 0.6731, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.13988028053897109, |
| "grad_norm": 2.194471836090088, |
| "learning_rate": 9.054096130840202e-07, |
| "loss": 0.7718, |
| "step": 8985 |
| }, |
| { |
| "epoch": 0.13995812154094048, |
| "grad_norm": 4.610592842102051, |
| "learning_rate": 9.053276740794152e-07, |
| "loss": 0.657, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.14003596254290984, |
| "grad_norm": 9.641939163208008, |
| "learning_rate": 9.052457350748103e-07, |
| "loss": 0.7753, |
| "step": 8995 |
| }, |
| { |
| "epoch": 0.14011380354487923, |
| "grad_norm": 4.634001731872559, |
| "learning_rate": 9.051637960702054e-07, |
| "loss": 0.8575, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.1401916445468486, |
| "grad_norm": 4.204237461090088, |
| "learning_rate": 9.050818570656003e-07, |
| "loss": 0.8538, |
| "step": 9005 |
| }, |
| { |
| "epoch": 0.140269485548818, |
| "grad_norm": 4.357415199279785, |
| "learning_rate": 9.049999180609954e-07, |
| "loss": 0.7938, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.14034732655078735, |
| "grad_norm": 6.758501052856445, |
| "learning_rate": 9.049179790563904e-07, |
| "loss": 0.867, |
| "step": 9015 |
| }, |
| { |
| "epoch": 0.14042516755275675, |
| "grad_norm": 4.51594877243042, |
| "learning_rate": 9.048360400517853e-07, |
| "loss": 0.7706, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.1405030085547261, |
| "grad_norm": 3.3741414546966553, |
| "learning_rate": 9.047541010471804e-07, |
| "loss": 0.8655, |
| "step": 9025 |
| }, |
| { |
| "epoch": 0.1405808495566955, |
| "grad_norm": 5.02528190612793, |
| "learning_rate": 9.046721620425755e-07, |
| "loss": 0.7714, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.14065869055866487, |
| "grad_norm": 3.8374040126800537, |
| "learning_rate": 9.045902230379705e-07, |
| "loss": 0.8834, |
| "step": 9035 |
| }, |
| { |
| "epoch": 0.14073653156063426, |
| "grad_norm": 3.829531192779541, |
| "learning_rate": 9.045082840333655e-07, |
| "loss": 0.8477, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.14081437256260362, |
| "grad_norm": 7.790329456329346, |
| "learning_rate": 9.044263450287606e-07, |
| "loss": 0.8359, |
| "step": 9045 |
| }, |
| { |
| "epoch": 0.14089221356457302, |
| "grad_norm": 4.2702460289001465, |
| "learning_rate": 9.043444060241556e-07, |
| "loss": 0.8458, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.14097005456654238, |
| "grad_norm": 5.226367950439453, |
| "learning_rate": 9.042624670195506e-07, |
| "loss": 0.9577, |
| "step": 9055 |
| }, |
| { |
| "epoch": 0.14104789556851174, |
| "grad_norm": 4.627621650695801, |
| "learning_rate": 9.041805280149456e-07, |
| "loss": 0.8082, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.14112573657048114, |
| "grad_norm": 4.152894973754883, |
| "learning_rate": 9.040985890103406e-07, |
| "loss": 0.6948, |
| "step": 9065 |
| }, |
| { |
| "epoch": 0.1412035775724505, |
| "grad_norm": 6.219531059265137, |
| "learning_rate": 9.040166500057357e-07, |
| "loss": 0.8201, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.1412814185744199, |
| "grad_norm": 4.419485569000244, |
| "learning_rate": 9.039347110011307e-07, |
| "loss": 0.8803, |
| "step": 9075 |
| }, |
| { |
| "epoch": 0.14135925957638926, |
| "grad_norm": 4.359714508056641, |
| "learning_rate": 9.038527719965257e-07, |
| "loss": 0.7647, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.14143710057835865, |
| "grad_norm": 6.366148948669434, |
| "learning_rate": 9.037708329919208e-07, |
| "loss": 0.8826, |
| "step": 9085 |
| }, |
| { |
| "epoch": 0.141514941580328, |
| "grad_norm": 3.5685646533966064, |
| "learning_rate": 9.036888939873159e-07, |
| "loss": 0.7997, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.1415927825822974, |
| "grad_norm": 3.0451033115386963, |
| "learning_rate": 9.036069549827107e-07, |
| "loss": 0.7774, |
| "step": 9095 |
| }, |
| { |
| "epoch": 0.14167062358426677, |
| "grad_norm": 4.7669291496276855, |
| "learning_rate": 9.035250159781058e-07, |
| "loss": 0.8111, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.14174846458623616, |
| "grad_norm": 3.8222289085388184, |
| "learning_rate": 9.034430769735009e-07, |
| "loss": 0.8369, |
| "step": 9105 |
| }, |
| { |
| "epoch": 0.14182630558820553, |
| "grad_norm": 9.232769966125488, |
| "learning_rate": 9.03361137968896e-07, |
| "loss": 0.7547, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.14190414659017492, |
| "grad_norm": 2.8610751628875732, |
| "learning_rate": 9.032791989642909e-07, |
| "loss": 0.6919, |
| "step": 9115 |
| }, |
| { |
| "epoch": 0.14198198759214428, |
| "grad_norm": 5.900112152099609, |
| "learning_rate": 9.03197259959686e-07, |
| "loss": 0.8343, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.14205982859411367, |
| "grad_norm": 4.022780418395996, |
| "learning_rate": 9.031153209550811e-07, |
| "loss": 0.867, |
| "step": 9125 |
| }, |
| { |
| "epoch": 0.14213766959608304, |
| "grad_norm": 3.068723201751709, |
| "learning_rate": 9.03033381950476e-07, |
| "loss": 0.7645, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.14221551059805243, |
| "grad_norm": 3.156970977783203, |
| "learning_rate": 9.02951442945871e-07, |
| "loss": 0.6631, |
| "step": 9135 |
| }, |
| { |
| "epoch": 0.1422933516000218, |
| "grad_norm": 3.7697770595550537, |
| "learning_rate": 9.028695039412661e-07, |
| "loss": 0.7945, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.1423711926019912, |
| "grad_norm": 9.904714584350586, |
| "learning_rate": 9.027875649366611e-07, |
| "loss": 0.8339, |
| "step": 9145 |
| }, |
| { |
| "epoch": 0.14244903360396055, |
| "grad_norm": 3.7701642513275146, |
| "learning_rate": 9.027056259320561e-07, |
| "loss": 0.7597, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.14252687460592992, |
| "grad_norm": 4.5266828536987305, |
| "learning_rate": 9.026236869274512e-07, |
| "loss": 0.7383, |
| "step": 9155 |
| }, |
| { |
| "epoch": 0.1426047156078993, |
| "grad_norm": 5.614555358886719, |
| "learning_rate": 9.025417479228462e-07, |
| "loss": 0.7889, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.14268255660986867, |
| "grad_norm": 7.812586784362793, |
| "learning_rate": 9.024598089182412e-07, |
| "loss": 0.8535, |
| "step": 9165 |
| }, |
| { |
| "epoch": 0.14276039761183806, |
| "grad_norm": 5.218164443969727, |
| "learning_rate": 9.023778699136363e-07, |
| "loss": 0.814, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.14283823861380743, |
| "grad_norm": 2.824345588684082, |
| "learning_rate": 9.022959309090312e-07, |
| "loss": 0.8305, |
| "step": 9175 |
| }, |
| { |
| "epoch": 0.14291607961577682, |
| "grad_norm": 3.954688787460327, |
| "learning_rate": 9.022139919044263e-07, |
| "loss": 0.8707, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.14299392061774618, |
| "grad_norm": 4.215836048126221, |
| "learning_rate": 9.021320528998213e-07, |
| "loss": 0.9382, |
| "step": 9185 |
| }, |
| { |
| "epoch": 0.14307176161971558, |
| "grad_norm": 9.17077350616455, |
| "learning_rate": 9.020501138952163e-07, |
| "loss": 0.8275, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.14314960262168494, |
| "grad_norm": 6.527652740478516, |
| "learning_rate": 9.019681748906114e-07, |
| "loss": 0.7504, |
| "step": 9195 |
| }, |
| { |
| "epoch": 0.14322744362365433, |
| "grad_norm": 3.313584089279175, |
| "learning_rate": 9.018862358860065e-07, |
| "loss": 0.8984, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.1433052846256237, |
| "grad_norm": 4.872673988342285, |
| "learning_rate": 9.018042968814014e-07, |
| "loss": 0.9078, |
| "step": 9205 |
| }, |
| { |
| "epoch": 0.1433831256275931, |
| "grad_norm": 4.736050128936768, |
| "learning_rate": 9.017223578767965e-07, |
| "loss": 0.7167, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.14346096662956245, |
| "grad_norm": 3.0113682746887207, |
| "learning_rate": 9.016404188721916e-07, |
| "loss": 0.901, |
| "step": 9215 |
| }, |
| { |
| "epoch": 0.14353880763153185, |
| "grad_norm": 4.565042495727539, |
| "learning_rate": 9.015584798675864e-07, |
| "loss": 0.7495, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.1436166486335012, |
| "grad_norm": 3.0236637592315674, |
| "learning_rate": 9.014765408629815e-07, |
| "loss": 0.7655, |
| "step": 9225 |
| }, |
| { |
| "epoch": 0.1436944896354706, |
| "grad_norm": 5.903986930847168, |
| "learning_rate": 9.013946018583766e-07, |
| "loss": 0.8993, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.14377233063743997, |
| "grad_norm": 4.84224796295166, |
| "learning_rate": 9.013126628537717e-07, |
| "loss": 0.7633, |
| "step": 9235 |
| }, |
| { |
| "epoch": 0.14385017163940933, |
| "grad_norm": 3.5455453395843506, |
| "learning_rate": 9.012307238491666e-07, |
| "loss": 0.7984, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.14392801264137872, |
| "grad_norm": 9.098531723022461, |
| "learning_rate": 9.011487848445617e-07, |
| "loss": 0.8721, |
| "step": 9245 |
| }, |
| { |
| "epoch": 0.1440058536433481, |
| "grad_norm": 3.4469218254089355, |
| "learning_rate": 9.010668458399568e-07, |
| "loss": 0.7364, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.14408369464531748, |
| "grad_norm": 3.0461971759796143, |
| "learning_rate": 9.009849068353517e-07, |
| "loss": 0.8158, |
| "step": 9255 |
| }, |
| { |
| "epoch": 0.14416153564728684, |
| "grad_norm": 5.753521919250488, |
| "learning_rate": 9.009029678307467e-07, |
| "loss": 0.7637, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.14423937664925623, |
| "grad_norm": 10.035242080688477, |
| "learning_rate": 9.008210288261418e-07, |
| "loss": 0.8554, |
| "step": 9265 |
| }, |
| { |
| "epoch": 0.1443172176512256, |
| "grad_norm": 3.796072483062744, |
| "learning_rate": 9.007390898215368e-07, |
| "loss": 0.8241, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.144395058653195, |
| "grad_norm": 4.330554962158203, |
| "learning_rate": 9.006571508169318e-07, |
| "loss": 0.7456, |
| "step": 9275 |
| }, |
| { |
| "epoch": 0.14447289965516436, |
| "grad_norm": 3.3564655780792236, |
| "learning_rate": 9.005752118123269e-07, |
| "loss": 0.8016, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.14455074065713375, |
| "grad_norm": 2.9985568523406982, |
| "learning_rate": 9.004932728077219e-07, |
| "loss": 0.8099, |
| "step": 9285 |
| }, |
| { |
| "epoch": 0.1446285816591031, |
| "grad_norm": 2.705263376235962, |
| "learning_rate": 9.00411333803117e-07, |
| "loss": 0.7769, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.1447064226610725, |
| "grad_norm": 3.700831890106201, |
| "learning_rate": 9.00329394798512e-07, |
| "loss": 0.7327, |
| "step": 9295 |
| }, |
| { |
| "epoch": 0.14478426366304187, |
| "grad_norm": 6.56169319152832, |
| "learning_rate": 9.002474557939069e-07, |
| "loss": 0.7674, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.14486210466501126, |
| "grad_norm": 4.469850063323975, |
| "learning_rate": 9.00165516789302e-07, |
| "loss": 0.8658, |
| "step": 9305 |
| }, |
| { |
| "epoch": 0.14493994566698062, |
| "grad_norm": 8.878783226013184, |
| "learning_rate": 9.00083577784697e-07, |
| "loss": 0.7462, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.14501778666895002, |
| "grad_norm": 2.435040235519409, |
| "learning_rate": 9.00001638780092e-07, |
| "loss": 0.8363, |
| "step": 9315 |
| }, |
| { |
| "epoch": 0.14509562767091938, |
| "grad_norm": 3.0248606204986572, |
| "learning_rate": 8.999196997754871e-07, |
| "loss": 0.8078, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.14517346867288874, |
| "grad_norm": 3.466975450515747, |
| "learning_rate": 8.998377607708822e-07, |
| "loss": 0.7242, |
| "step": 9325 |
| }, |
| { |
| "epoch": 0.14525130967485814, |
| "grad_norm": 3.6273536682128906, |
| "learning_rate": 8.997558217662771e-07, |
| "loss": 0.8677, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.1453291506768275, |
| "grad_norm": 6.970489025115967, |
| "learning_rate": 8.996738827616722e-07, |
| "loss": 0.8854, |
| "step": 9335 |
| }, |
| { |
| "epoch": 0.1454069916787969, |
| "grad_norm": 3.735153913497925, |
| "learning_rate": 8.995919437570672e-07, |
| "loss": 0.7732, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.14548483268076626, |
| "grad_norm": 8.700521469116211, |
| "learning_rate": 8.995100047524621e-07, |
| "loss": 0.9042, |
| "step": 9345 |
| }, |
| { |
| "epoch": 0.14556267368273565, |
| "grad_norm": 7.9652276039123535, |
| "learning_rate": 8.994280657478572e-07, |
| "loss": 0.867, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.145640514684705, |
| "grad_norm": 3.8336143493652344, |
| "learning_rate": 8.993461267432523e-07, |
| "loss": 0.8283, |
| "step": 9355 |
| }, |
| { |
| "epoch": 0.1457183556866744, |
| "grad_norm": 7.037674903869629, |
| "learning_rate": 8.992641877386474e-07, |
| "loss": 0.7827, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.14579619668864377, |
| "grad_norm": 6.455174922943115, |
| "learning_rate": 8.991822487340423e-07, |
| "loss": 0.7666, |
| "step": 9365 |
| }, |
| { |
| "epoch": 0.14587403769061316, |
| "grad_norm": 3.610822916030884, |
| "learning_rate": 8.991003097294374e-07, |
| "loss": 0.8234, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.14595187869258253, |
| "grad_norm": 4.823882102966309, |
| "learning_rate": 8.990183707248325e-07, |
| "loss": 0.7568, |
| "step": 9375 |
| }, |
| { |
| "epoch": 0.14602971969455192, |
| "grad_norm": 5.625290393829346, |
| "learning_rate": 8.989364317202273e-07, |
| "loss": 0.7957, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.14610756069652128, |
| "grad_norm": 5.454258441925049, |
| "learning_rate": 8.988544927156224e-07, |
| "loss": 0.6412, |
| "step": 9385 |
| }, |
| { |
| "epoch": 0.14618540169849067, |
| "grad_norm": 5.896919250488281, |
| "learning_rate": 8.987725537110175e-07, |
| "loss": 0.7889, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.14626324270046004, |
| "grad_norm": 3.3774161338806152, |
| "learning_rate": 8.986906147064125e-07, |
| "loss": 0.9058, |
| "step": 9395 |
| }, |
| { |
| "epoch": 0.14634108370242943, |
| "grad_norm": 3.2311689853668213, |
| "learning_rate": 8.986086757018075e-07, |
| "loss": 0.7764, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.1464189247043988, |
| "grad_norm": 5.298305511474609, |
| "learning_rate": 8.985267366972026e-07, |
| "loss": 0.7435, |
| "step": 9405 |
| }, |
| { |
| "epoch": 0.14649676570636816, |
| "grad_norm": 4.45401668548584, |
| "learning_rate": 8.984447976925976e-07, |
| "loss": 0.7631, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.14657460670833755, |
| "grad_norm": 3.3566505908966064, |
| "learning_rate": 8.983628586879927e-07, |
| "loss": 0.8497, |
| "step": 9415 |
| }, |
| { |
| "epoch": 0.14665244771030692, |
| "grad_norm": 3.440115213394165, |
| "learning_rate": 8.982809196833876e-07, |
| "loss": 0.7614, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.1467302887122763, |
| "grad_norm": 3.5758979320526123, |
| "learning_rate": 8.981989806787826e-07, |
| "loss": 0.651, |
| "step": 9425 |
| }, |
| { |
| "epoch": 0.14680812971424567, |
| "grad_norm": 4.706757545471191, |
| "learning_rate": 8.981170416741777e-07, |
| "loss": 0.9389, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.14688597071621506, |
| "grad_norm": 4.142934799194336, |
| "learning_rate": 8.980351026695727e-07, |
| "loss": 0.9398, |
| "step": 9435 |
| }, |
| { |
| "epoch": 0.14696381171818443, |
| "grad_norm": 3.549654483795166, |
| "learning_rate": 8.979531636649677e-07, |
| "loss": 0.7552, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.14704165272015382, |
| "grad_norm": 3.9092509746551514, |
| "learning_rate": 8.978712246603628e-07, |
| "loss": 0.8207, |
| "step": 9445 |
| }, |
| { |
| "epoch": 0.14711949372212318, |
| "grad_norm": 4.208457946777344, |
| "learning_rate": 8.977892856557579e-07, |
| "loss": 0.7046, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.14719733472409258, |
| "grad_norm": 8.339073181152344, |
| "learning_rate": 8.977073466511528e-07, |
| "loss": 0.7736, |
| "step": 9455 |
| }, |
| { |
| "epoch": 0.14727517572606194, |
| "grad_norm": 2.834137439727783, |
| "learning_rate": 8.976254076465478e-07, |
| "loss": 0.893, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.14735301672803133, |
| "grad_norm": 4.870460033416748, |
| "learning_rate": 8.975434686419429e-07, |
| "loss": 0.819, |
| "step": 9465 |
| }, |
| { |
| "epoch": 0.1474308577300007, |
| "grad_norm": 4.1774582862854, |
| "learning_rate": 8.974615296373378e-07, |
| "loss": 0.7164, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.1475086987319701, |
| "grad_norm": 3.573882818222046, |
| "learning_rate": 8.973795906327329e-07, |
| "loss": 0.7982, |
| "step": 9475 |
| }, |
| { |
| "epoch": 0.14758653973393945, |
| "grad_norm": 3.5654754638671875, |
| "learning_rate": 8.97297651628128e-07, |
| "loss": 0.8293, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.14766438073590885, |
| "grad_norm": 3.387242078781128, |
| "learning_rate": 8.972157126235231e-07, |
| "loss": 0.7351, |
| "step": 9485 |
| }, |
| { |
| "epoch": 0.1477422217378782, |
| "grad_norm": 7.289836883544922, |
| "learning_rate": 8.97133773618918e-07, |
| "loss": 0.8013, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.14782006273984757, |
| "grad_norm": 4.657222270965576, |
| "learning_rate": 8.970518346143131e-07, |
| "loss": 0.8237, |
| "step": 9495 |
| }, |
| { |
| "epoch": 0.14789790374181697, |
| "grad_norm": 4.462035179138184, |
| "learning_rate": 8.969698956097081e-07, |
| "loss": 0.7025, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.14797574474378633, |
| "grad_norm": 4.082338809967041, |
| "learning_rate": 8.968879566051031e-07, |
| "loss": 0.7665, |
| "step": 9505 |
| }, |
| { |
| "epoch": 0.14805358574575572, |
| "grad_norm": 4.678539276123047, |
| "learning_rate": 8.968060176004981e-07, |
| "loss": 0.8325, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.1481314267477251, |
| "grad_norm": 4.395145893096924, |
| "learning_rate": 8.967240785958932e-07, |
| "loss": 0.7172, |
| "step": 9515 |
| }, |
| { |
| "epoch": 0.14820926774969448, |
| "grad_norm": 3.5010783672332764, |
| "learning_rate": 8.966421395912882e-07, |
| "loss": 0.7459, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.14828710875166384, |
| "grad_norm": 5.013630390167236, |
| "learning_rate": 8.965602005866832e-07, |
| "loss": 0.7536, |
| "step": 9525 |
| }, |
| { |
| "epoch": 0.14836494975363324, |
| "grad_norm": 3.0716755390167236, |
| "learning_rate": 8.964782615820783e-07, |
| "loss": 0.8559, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.1484427907556026, |
| "grad_norm": 8.94082260131836, |
| "learning_rate": 8.963963225774733e-07, |
| "loss": 0.8765, |
| "step": 9535 |
| }, |
| { |
| "epoch": 0.148520631757572, |
| "grad_norm": 5.629791736602783, |
| "learning_rate": 8.963143835728684e-07, |
| "loss": 0.9076, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.14859847275954136, |
| "grad_norm": 4.679615020751953, |
| "learning_rate": 8.962324445682633e-07, |
| "loss": 0.7733, |
| "step": 9545 |
| }, |
| { |
| "epoch": 0.14867631376151075, |
| "grad_norm": 2.7258129119873047, |
| "learning_rate": 8.961505055636583e-07, |
| "loss": 0.7082, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.1487541547634801, |
| "grad_norm": 4.701254844665527, |
| "learning_rate": 8.960685665590534e-07, |
| "loss": 0.6848, |
| "step": 9555 |
| }, |
| { |
| "epoch": 0.1488319957654495, |
| "grad_norm": 3.3973538875579834, |
| "learning_rate": 8.959866275544485e-07, |
| "loss": 0.87, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.14890983676741887, |
| "grad_norm": 5.04672384262085, |
| "learning_rate": 8.959046885498434e-07, |
| "loss": 0.7941, |
| "step": 9565 |
| }, |
| { |
| "epoch": 0.14898767776938826, |
| "grad_norm": 5.399540901184082, |
| "learning_rate": 8.958227495452385e-07, |
| "loss": 0.7677, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.14906551877135762, |
| "grad_norm": 3.8206582069396973, |
| "learning_rate": 8.957408105406336e-07, |
| "loss": 0.7326, |
| "step": 9575 |
| }, |
| { |
| "epoch": 0.149143359773327, |
| "grad_norm": 2.07293963432312, |
| "learning_rate": 8.956588715360285e-07, |
| "loss": 0.7492, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.14922120077529638, |
| "grad_norm": 8.431818008422852, |
| "learning_rate": 8.955769325314235e-07, |
| "loss": 0.7455, |
| "step": 9585 |
| }, |
| { |
| "epoch": 0.14929904177726575, |
| "grad_norm": 7.549500465393066, |
| "learning_rate": 8.954949935268186e-07, |
| "loss": 0.7746, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.14937688277923514, |
| "grad_norm": 5.316057205200195, |
| "learning_rate": 8.954130545222136e-07, |
| "loss": 0.7876, |
| "step": 9595 |
| }, |
| { |
| "epoch": 0.1494547237812045, |
| "grad_norm": 7.522194862365723, |
| "learning_rate": 8.953311155176086e-07, |
| "loss": 0.802, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.1495325647831739, |
| "grad_norm": 3.6271915435791016, |
| "learning_rate": 8.952491765130037e-07, |
| "loss": 0.7795, |
| "step": 9605 |
| }, |
| { |
| "epoch": 0.14961040578514326, |
| "grad_norm": 5.297993183135986, |
| "learning_rate": 8.951672375083988e-07, |
| "loss": 0.7834, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.14968824678711265, |
| "grad_norm": 3.3175265789031982, |
| "learning_rate": 8.950852985037937e-07, |
| "loss": 0.6759, |
| "step": 9615 |
| }, |
| { |
| "epoch": 0.14976608778908201, |
| "grad_norm": 3.857908010482788, |
| "learning_rate": 8.950033594991888e-07, |
| "loss": 0.7961, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.1498439287910514, |
| "grad_norm": 6.590905666351318, |
| "learning_rate": 8.949214204945838e-07, |
| "loss": 0.8008, |
| "step": 9625 |
| }, |
| { |
| "epoch": 0.14992176979302077, |
| "grad_norm": 3.9845802783966064, |
| "learning_rate": 8.948394814899788e-07, |
| "loss": 0.8439, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.14999961079499016, |
| "grad_norm": 4.234025001525879, |
| "learning_rate": 8.947575424853738e-07, |
| "loss": 0.829, |
| "step": 9635 |
| }, |
| { |
| "epoch": 0.15007745179695953, |
| "grad_norm": 4.937478542327881, |
| "learning_rate": 8.946756034807689e-07, |
| "loss": 0.8498, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.15015529279892892, |
| "grad_norm": 5.999597072601318, |
| "learning_rate": 8.945936644761639e-07, |
| "loss": 0.7593, |
| "step": 9645 |
| }, |
| { |
| "epoch": 0.15023313380089828, |
| "grad_norm": 5.461806774139404, |
| "learning_rate": 8.94511725471559e-07, |
| "loss": 0.8598, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.15031097480286768, |
| "grad_norm": 5.157901763916016, |
| "learning_rate": 8.94429786466954e-07, |
| "loss": 0.8738, |
| "step": 9655 |
| }, |
| { |
| "epoch": 0.15038881580483704, |
| "grad_norm": 3.0504953861236572, |
| "learning_rate": 8.94347847462349e-07, |
| "loss": 0.7486, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.1504666568068064, |
| "grad_norm": 10.305487632751465, |
| "learning_rate": 8.94265908457744e-07, |
| "loss": 0.7844, |
| "step": 9665 |
| }, |
| { |
| "epoch": 0.1505444978087758, |
| "grad_norm": 3.5192887783050537, |
| "learning_rate": 8.94183969453139e-07, |
| "loss": 0.8386, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.15062233881074516, |
| "grad_norm": 4.425221920013428, |
| "learning_rate": 8.94102030448534e-07, |
| "loss": 0.7428, |
| "step": 9675 |
| }, |
| { |
| "epoch": 0.15070017981271455, |
| "grad_norm": 5.125747203826904, |
| "learning_rate": 8.940200914439291e-07, |
| "loss": 0.7855, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.15077802081468392, |
| "grad_norm": 5.673059463500977, |
| "learning_rate": 8.939381524393242e-07, |
| "loss": 0.9193, |
| "step": 9685 |
| }, |
| { |
| "epoch": 0.1508558618166533, |
| "grad_norm": 2.934014320373535, |
| "learning_rate": 8.938562134347191e-07, |
| "loss": 0.6473, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.15093370281862267, |
| "grad_norm": 6.39523983001709, |
| "learning_rate": 8.937742744301142e-07, |
| "loss": 0.7309, |
| "step": 9695 |
| }, |
| { |
| "epoch": 0.15101154382059206, |
| "grad_norm": 3.2489750385284424, |
| "learning_rate": 8.936923354255093e-07, |
| "loss": 0.8391, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.15108938482256143, |
| "grad_norm": 3.2904443740844727, |
| "learning_rate": 8.936103964209041e-07, |
| "loss": 0.6639, |
| "step": 9705 |
| }, |
| { |
| "epoch": 0.15116722582453082, |
| "grad_norm": 5.773887634277344, |
| "learning_rate": 8.935284574162992e-07, |
| "loss": 0.7859, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.15124506682650019, |
| "grad_norm": 4.7919697761535645, |
| "learning_rate": 8.934465184116943e-07, |
| "loss": 0.7519, |
| "step": 9715 |
| }, |
| { |
| "epoch": 0.15132290782846958, |
| "grad_norm": 4.2467942237854, |
| "learning_rate": 8.933645794070893e-07, |
| "loss": 0.8234, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.15140074883043894, |
| "grad_norm": 3.2644877433776855, |
| "learning_rate": 8.932826404024843e-07, |
| "loss": 0.6703, |
| "step": 9725 |
| }, |
| { |
| "epoch": 0.15147858983240833, |
| "grad_norm": 4.7553534507751465, |
| "learning_rate": 8.932007013978794e-07, |
| "loss": 0.7273, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.1515564308343777, |
| "grad_norm": 3.009917974472046, |
| "learning_rate": 8.931187623932745e-07, |
| "loss": 0.8056, |
| "step": 9735 |
| }, |
| { |
| "epoch": 0.1516342718363471, |
| "grad_norm": 2.296459436416626, |
| "learning_rate": 8.930368233886695e-07, |
| "loss": 0.7561, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.15171211283831645, |
| "grad_norm": 9.049659729003906, |
| "learning_rate": 8.929548843840644e-07, |
| "loss": 0.7144, |
| "step": 9745 |
| }, |
| { |
| "epoch": 0.15178995384028582, |
| "grad_norm": 6.223394393920898, |
| "learning_rate": 8.928729453794595e-07, |
| "loss": 0.737, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.1518677948422552, |
| "grad_norm": 7.632771968841553, |
| "learning_rate": 8.927910063748545e-07, |
| "loss": 0.8498, |
| "step": 9755 |
| }, |
| { |
| "epoch": 0.15194563584422457, |
| "grad_norm": 6.848038196563721, |
| "learning_rate": 8.927090673702495e-07, |
| "loss": 0.806, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.15202347684619397, |
| "grad_norm": 4.97011137008667, |
| "learning_rate": 8.926271283656446e-07, |
| "loss": 0.7157, |
| "step": 9765 |
| }, |
| { |
| "epoch": 0.15210131784816333, |
| "grad_norm": 3.5741093158721924, |
| "learning_rate": 8.925451893610396e-07, |
| "loss": 0.8259, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.15217915885013272, |
| "grad_norm": 3.7775120735168457, |
| "learning_rate": 8.924632503564347e-07, |
| "loss": 0.7166, |
| "step": 9775 |
| }, |
| { |
| "epoch": 0.1522569998521021, |
| "grad_norm": 3.5487003326416016, |
| "learning_rate": 8.923813113518297e-07, |
| "loss": 0.7971, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.15233484085407148, |
| "grad_norm": 4.203275203704834, |
| "learning_rate": 8.922993723472246e-07, |
| "loss": 0.8493, |
| "step": 9785 |
| }, |
| { |
| "epoch": 0.15241268185604084, |
| "grad_norm": 3.4415955543518066, |
| "learning_rate": 8.922174333426197e-07, |
| "loss": 0.7878, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.15249052285801024, |
| "grad_norm": 3.142867088317871, |
| "learning_rate": 8.921354943380148e-07, |
| "loss": 0.7424, |
| "step": 9795 |
| }, |
| { |
| "epoch": 0.1525683638599796, |
| "grad_norm": 4.120011806488037, |
| "learning_rate": 8.920535553334097e-07, |
| "loss": 0.8535, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.152646204861949, |
| "grad_norm": 3.5447614192962646, |
| "learning_rate": 8.919716163288048e-07, |
| "loss": 0.7767, |
| "step": 9805 |
| }, |
| { |
| "epoch": 0.15272404586391836, |
| "grad_norm": 3.2715671062469482, |
| "learning_rate": 8.918896773241999e-07, |
| "loss": 0.8062, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.15280188686588775, |
| "grad_norm": 2.4431989192962646, |
| "learning_rate": 8.918077383195948e-07, |
| "loss": 0.7578, |
| "step": 9815 |
| }, |
| { |
| "epoch": 0.1528797278678571, |
| "grad_norm": 3.4583024978637695, |
| "learning_rate": 8.917257993149899e-07, |
| "loss": 0.7531, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.1529575688698265, |
| "grad_norm": 2.993116617202759, |
| "learning_rate": 8.916438603103849e-07, |
| "loss": 0.7672, |
| "step": 9825 |
| }, |
| { |
| "epoch": 0.15303540987179587, |
| "grad_norm": 3.1958775520324707, |
| "learning_rate": 8.915619213057799e-07, |
| "loss": 0.8598, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.15311325087376526, |
| "grad_norm": 6.617530345916748, |
| "learning_rate": 8.914799823011749e-07, |
| "loss": 0.6993, |
| "step": 9835 |
| }, |
| { |
| "epoch": 0.15319109187573463, |
| "grad_norm": 3.9187653064727783, |
| "learning_rate": 8.9139804329657e-07, |
| "loss": 0.7635, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.153268932877704, |
| "grad_norm": 3.482074499130249, |
| "learning_rate": 8.91316104291965e-07, |
| "loss": 0.7737, |
| "step": 9845 |
| }, |
| { |
| "epoch": 0.15334677387967338, |
| "grad_norm": 9.398933410644531, |
| "learning_rate": 8.9123416528736e-07, |
| "loss": 0.8737, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.15342461488164275, |
| "grad_norm": 3.637568473815918, |
| "learning_rate": 8.911522262827551e-07, |
| "loss": 0.7667, |
| "step": 9855 |
| }, |
| { |
| "epoch": 0.15350245588361214, |
| "grad_norm": 3.3821427822113037, |
| "learning_rate": 8.910702872781502e-07, |
| "loss": 0.7836, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.1535802968855815, |
| "grad_norm": 2.9914300441741943, |
| "learning_rate": 8.909883482735452e-07, |
| "loss": 0.8564, |
| "step": 9865 |
| }, |
| { |
| "epoch": 0.1536581378875509, |
| "grad_norm": 6.171716690063477, |
| "learning_rate": 8.909064092689401e-07, |
| "loss": 0.8064, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.15373597888952026, |
| "grad_norm": 3.4968278408050537, |
| "learning_rate": 8.908244702643352e-07, |
| "loss": 0.7524, |
| "step": 9875 |
| }, |
| { |
| "epoch": 0.15381381989148965, |
| "grad_norm": 5.08829402923584, |
| "learning_rate": 8.907425312597302e-07, |
| "loss": 0.8207, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.15389166089345901, |
| "grad_norm": 3.5643150806427, |
| "learning_rate": 8.906605922551253e-07, |
| "loss": 0.7776, |
| "step": 9885 |
| }, |
| { |
| "epoch": 0.1539695018954284, |
| "grad_norm": 3.4858274459838867, |
| "learning_rate": 8.905786532505203e-07, |
| "loss": 0.8979, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.15404734289739777, |
| "grad_norm": 3.9792239665985107, |
| "learning_rate": 8.904967142459153e-07, |
| "loss": 0.797, |
| "step": 9895 |
| }, |
| { |
| "epoch": 0.15412518389936716, |
| "grad_norm": 9.95738410949707, |
| "learning_rate": 8.904147752413104e-07, |
| "loss": 0.7492, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.15420302490133653, |
| "grad_norm": 4.636844158172607, |
| "learning_rate": 8.903328362367054e-07, |
| "loss": 0.7867, |
| "step": 9905 |
| }, |
| { |
| "epoch": 0.15428086590330592, |
| "grad_norm": 7.289409637451172, |
| "learning_rate": 8.902508972321003e-07, |
| "loss": 0.7982, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.15435870690527528, |
| "grad_norm": 3.7324790954589844, |
| "learning_rate": 8.901689582274954e-07, |
| "loss": 0.7001, |
| "step": 9915 |
| }, |
| { |
| "epoch": 0.15443654790724468, |
| "grad_norm": 4.735339641571045, |
| "learning_rate": 8.900870192228905e-07, |
| "loss": 0.7155, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.15451438890921404, |
| "grad_norm": 8.172463417053223, |
| "learning_rate": 8.900050802182854e-07, |
| "loss": 0.8489, |
| "step": 9925 |
| }, |
| { |
| "epoch": 0.1545922299111834, |
| "grad_norm": 3.3313515186309814, |
| "learning_rate": 8.899231412136805e-07, |
| "loss": 0.8064, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.1546700709131528, |
| "grad_norm": 9.073691368103027, |
| "learning_rate": 8.898412022090756e-07, |
| "loss": 0.8577, |
| "step": 9935 |
| }, |
| { |
| "epoch": 0.15474791191512216, |
| "grad_norm": 7.126258373260498, |
| "learning_rate": 8.897592632044705e-07, |
| "loss": 0.7679, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.15482575291709155, |
| "grad_norm": 11.254960060119629, |
| "learning_rate": 8.896773241998656e-07, |
| "loss": 0.7725, |
| "step": 9945 |
| }, |
| { |
| "epoch": 0.15490359391906092, |
| "grad_norm": 3.706859827041626, |
| "learning_rate": 8.895953851952606e-07, |
| "loss": 0.9131, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.1549814349210303, |
| "grad_norm": 3.0554311275482178, |
| "learning_rate": 8.895134461906556e-07, |
| "loss": 0.7683, |
| "step": 9955 |
| }, |
| { |
| "epoch": 0.15505927592299967, |
| "grad_norm": 3.64799165725708, |
| "learning_rate": 8.894315071860506e-07, |
| "loss": 0.7275, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.15513711692496907, |
| "grad_norm": 3.383768320083618, |
| "learning_rate": 8.893495681814457e-07, |
| "loss": 0.8155, |
| "step": 9965 |
| }, |
| { |
| "epoch": 0.15521495792693843, |
| "grad_norm": 3.561455726623535, |
| "learning_rate": 8.892676291768407e-07, |
| "loss": 0.7256, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.15529279892890782, |
| "grad_norm": 3.7967913150787354, |
| "learning_rate": 8.891856901722358e-07, |
| "loss": 0.8299, |
| "step": 9975 |
| }, |
| { |
| "epoch": 0.15537063993087719, |
| "grad_norm": 3.6479313373565674, |
| "learning_rate": 8.891037511676308e-07, |
| "loss": 0.7851, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.15544848093284658, |
| "grad_norm": 4.59113883972168, |
| "learning_rate": 8.890218121630259e-07, |
| "loss": 0.8115, |
| "step": 9985 |
| }, |
| { |
| "epoch": 0.15552632193481594, |
| "grad_norm": 5.832945823669434, |
| "learning_rate": 8.889398731584208e-07, |
| "loss": 0.9049, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.15560416293678533, |
| "grad_norm": 3.0644237995147705, |
| "learning_rate": 8.888579341538158e-07, |
| "loss": 0.6787, |
| "step": 9995 |
| }, |
| { |
| "epoch": 0.1556820039387547, |
| "grad_norm": 4.14565372467041, |
| "learning_rate": 8.887759951492109e-07, |
| "loss": 0.8273, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.1557598449407241, |
| "grad_norm": 10.58462142944336, |
| "learning_rate": 8.886940561446059e-07, |
| "loss": 0.7526, |
| "step": 10005 |
| }, |
| { |
| "epoch": 0.15583768594269345, |
| "grad_norm": 4.249096870422363, |
| "learning_rate": 8.88612117140001e-07, |
| "loss": 0.7715, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.15591552694466282, |
| "grad_norm": 6.341519355773926, |
| "learning_rate": 8.88530178135396e-07, |
| "loss": 0.7869, |
| "step": 10015 |
| }, |
| { |
| "epoch": 0.1559933679466322, |
| "grad_norm": 2.831510543823242, |
| "learning_rate": 8.88448239130791e-07, |
| "loss": 0.807, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.15607120894860158, |
| "grad_norm": 3.297983169555664, |
| "learning_rate": 8.883663001261861e-07, |
| "loss": 0.7433, |
| "step": 10025 |
| }, |
| { |
| "epoch": 0.15614904995057097, |
| "grad_norm": 3.2775771617889404, |
| "learning_rate": 8.88284361121581e-07, |
| "loss": 0.7378, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.15622689095254033, |
| "grad_norm": 4.8905768394470215, |
| "learning_rate": 8.88202422116976e-07, |
| "loss": 0.9689, |
| "step": 10035 |
| }, |
| { |
| "epoch": 0.15630473195450972, |
| "grad_norm": 5.260064601898193, |
| "learning_rate": 8.881204831123711e-07, |
| "loss": 0.8044, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.1563825729564791, |
| "grad_norm": 3.0201990604400635, |
| "learning_rate": 8.880385441077662e-07, |
| "loss": 0.7943, |
| "step": 10045 |
| }, |
| { |
| "epoch": 0.15646041395844848, |
| "grad_norm": 3.7755088806152344, |
| "learning_rate": 8.879566051031611e-07, |
| "loss": 0.7369, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.15653825496041784, |
| "grad_norm": 4.074087619781494, |
| "learning_rate": 8.878746660985562e-07, |
| "loss": 0.8229, |
| "step": 10055 |
| }, |
| { |
| "epoch": 0.15661609596238724, |
| "grad_norm": 4.723263263702393, |
| "learning_rate": 8.877927270939513e-07, |
| "loss": 0.6489, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.1566939369643566, |
| "grad_norm": 4.477192401885986, |
| "learning_rate": 8.877107880893463e-07, |
| "loss": 0.8427, |
| "step": 10065 |
| }, |
| { |
| "epoch": 0.156771777966326, |
| "grad_norm": 2.5688400268554688, |
| "learning_rate": 8.876288490847412e-07, |
| "loss": 0.8412, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.15684961896829536, |
| "grad_norm": 4.53289794921875, |
| "learning_rate": 8.875469100801363e-07, |
| "loss": 0.8295, |
| "step": 10075 |
| }, |
| { |
| "epoch": 0.15692745997026475, |
| "grad_norm": 7.105226993560791, |
| "learning_rate": 8.874649710755313e-07, |
| "loss": 0.8648, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.1570053009722341, |
| "grad_norm": 2.8844571113586426, |
| "learning_rate": 8.873830320709263e-07, |
| "loss": 0.7657, |
| "step": 10085 |
| }, |
| { |
| "epoch": 0.1570831419742035, |
| "grad_norm": 4.569636344909668, |
| "learning_rate": 8.873010930663214e-07, |
| "loss": 0.6868, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.15716098297617287, |
| "grad_norm": 3.5974533557891846, |
| "learning_rate": 8.872191540617164e-07, |
| "loss": 0.9193, |
| "step": 10095 |
| }, |
| { |
| "epoch": 0.15723882397814223, |
| "grad_norm": 4.02683687210083, |
| "learning_rate": 8.871372150571115e-07, |
| "loss": 0.8309, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.15731666498011163, |
| "grad_norm": 3.6681370735168457, |
| "learning_rate": 8.870552760525065e-07, |
| "loss": 0.7995, |
| "step": 10105 |
| }, |
| { |
| "epoch": 0.157394505982081, |
| "grad_norm": 3.227896213531494, |
| "learning_rate": 8.869733370479014e-07, |
| "loss": 0.7712, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.15747234698405038, |
| "grad_norm": 7.877114295959473, |
| "learning_rate": 8.868913980432965e-07, |
| "loss": 0.7099, |
| "step": 10115 |
| }, |
| { |
| "epoch": 0.15755018798601975, |
| "grad_norm": 4.573225021362305, |
| "learning_rate": 8.868094590386915e-07, |
| "loss": 0.6932, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.15762802898798914, |
| "grad_norm": 4.980703353881836, |
| "learning_rate": 8.867275200340866e-07, |
| "loss": 0.9058, |
| "step": 10125 |
| }, |
| { |
| "epoch": 0.1577058699899585, |
| "grad_norm": 4.040239334106445, |
| "learning_rate": 8.866455810294816e-07, |
| "loss": 0.813, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.1577837109919279, |
| "grad_norm": 3.489124059677124, |
| "learning_rate": 8.865636420248767e-07, |
| "loss": 0.7237, |
| "step": 10135 |
| }, |
| { |
| "epoch": 0.15786155199389726, |
| "grad_norm": 2.5940945148468018, |
| "learning_rate": 8.864817030202717e-07, |
| "loss": 0.7494, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.15793939299586665, |
| "grad_norm": 3.312206745147705, |
| "learning_rate": 8.863997640156667e-07, |
| "loss": 0.8447, |
| "step": 10145 |
| }, |
| { |
| "epoch": 0.15801723399783602, |
| "grad_norm": 3.279050350189209, |
| "learning_rate": 8.863178250110617e-07, |
| "loss": 0.8192, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.1580950749998054, |
| "grad_norm": 3.221031904220581, |
| "learning_rate": 8.862358860064568e-07, |
| "loss": 0.8593, |
| "step": 10155 |
| }, |
| { |
| "epoch": 0.15817291600177477, |
| "grad_norm": 5.654365062713623, |
| "learning_rate": 8.861539470018517e-07, |
| "loss": 0.8396, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.15825075700374416, |
| "grad_norm": 3.0688130855560303, |
| "learning_rate": 8.860720079972468e-07, |
| "loss": 0.6908, |
| "step": 10165 |
| }, |
| { |
| "epoch": 0.15832859800571353, |
| "grad_norm": 4.515695571899414, |
| "learning_rate": 8.859900689926419e-07, |
| "loss": 0.8116, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.15840643900768292, |
| "grad_norm": 5.167048454284668, |
| "learning_rate": 8.859081299880368e-07, |
| "loss": 0.8373, |
| "step": 10175 |
| }, |
| { |
| "epoch": 0.15848428000965228, |
| "grad_norm": 8.9721097946167, |
| "learning_rate": 8.858261909834319e-07, |
| "loss": 0.9478, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.15856212101162165, |
| "grad_norm": 3.8844072818756104, |
| "learning_rate": 8.85744251978827e-07, |
| "loss": 0.8713, |
| "step": 10185 |
| }, |
| { |
| "epoch": 0.15863996201359104, |
| "grad_norm": 3.705676794052124, |
| "learning_rate": 8.85662312974222e-07, |
| "loss": 0.817, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.1587178030155604, |
| "grad_norm": 4.741096496582031, |
| "learning_rate": 8.855803739696169e-07, |
| "loss": 0.8058, |
| "step": 10195 |
| }, |
| { |
| "epoch": 0.1587956440175298, |
| "grad_norm": 6.481576442718506, |
| "learning_rate": 8.85498434965012e-07, |
| "loss": 0.9077, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.15887348501949916, |
| "grad_norm": 5.847426414489746, |
| "learning_rate": 8.85416495960407e-07, |
| "loss": 0.7257, |
| "step": 10205 |
| }, |
| { |
| "epoch": 0.15895132602146855, |
| "grad_norm": 3.6452407836914062, |
| "learning_rate": 8.85334556955802e-07, |
| "loss": 0.7282, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.15902916702343792, |
| "grad_norm": 3.5716402530670166, |
| "learning_rate": 8.852526179511971e-07, |
| "loss": 0.7504, |
| "step": 10215 |
| }, |
| { |
| "epoch": 0.1591070080254073, |
| "grad_norm": 4.702507495880127, |
| "learning_rate": 8.851706789465921e-07, |
| "loss": 0.8345, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.15918484902737667, |
| "grad_norm": 3.6276495456695557, |
| "learning_rate": 8.850887399419872e-07, |
| "loss": 0.8267, |
| "step": 10225 |
| }, |
| { |
| "epoch": 0.15926269002934607, |
| "grad_norm": 3.432413101196289, |
| "learning_rate": 8.850068009373822e-07, |
| "loss": 0.9476, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.15934053103131543, |
| "grad_norm": 4.081861972808838, |
| "learning_rate": 8.849248619327771e-07, |
| "loss": 0.7688, |
| "step": 10235 |
| }, |
| { |
| "epoch": 0.15941837203328482, |
| "grad_norm": 6.799472332000732, |
| "learning_rate": 8.848429229281722e-07, |
| "loss": 0.7084, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.1594962130352542, |
| "grad_norm": 2.736294984817505, |
| "learning_rate": 8.847609839235673e-07, |
| "loss": 0.7835, |
| "step": 10245 |
| }, |
| { |
| "epoch": 0.15957405403722358, |
| "grad_norm": 4.227167129516602, |
| "learning_rate": 8.846790449189623e-07, |
| "loss": 0.7833, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.15965189503919294, |
| "grad_norm": 3.3678765296936035, |
| "learning_rate": 8.845971059143573e-07, |
| "loss": 0.9005, |
| "step": 10255 |
| }, |
| { |
| "epoch": 0.15972973604116233, |
| "grad_norm": 3.6415300369262695, |
| "learning_rate": 8.845151669097524e-07, |
| "loss": 0.8214, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.1598075770431317, |
| "grad_norm": 4.28493595123291, |
| "learning_rate": 8.844332279051474e-07, |
| "loss": 0.7597, |
| "step": 10265 |
| }, |
| { |
| "epoch": 0.15988541804510106, |
| "grad_norm": 3.2147064208984375, |
| "learning_rate": 8.843512889005424e-07, |
| "loss": 0.7652, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.15996325904707046, |
| "grad_norm": 7.0327959060668945, |
| "learning_rate": 8.842693498959374e-07, |
| "loss": 0.7703, |
| "step": 10275 |
| }, |
| { |
| "epoch": 0.16004110004903982, |
| "grad_norm": 3.714036464691162, |
| "learning_rate": 8.841874108913325e-07, |
| "loss": 0.8864, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.1601189410510092, |
| "grad_norm": 3.1920130252838135, |
| "learning_rate": 8.841054718867274e-07, |
| "loss": 0.8203, |
| "step": 10285 |
| }, |
| { |
| "epoch": 0.16019678205297858, |
| "grad_norm": 4.356620788574219, |
| "learning_rate": 8.840235328821225e-07, |
| "loss": 0.8215, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.16027462305494797, |
| "grad_norm": 3.031799793243408, |
| "learning_rate": 8.839415938775176e-07, |
| "loss": 0.6909, |
| "step": 10295 |
| }, |
| { |
| "epoch": 0.16035246405691733, |
| "grad_norm": 3.817898750305176, |
| "learning_rate": 8.838596548729125e-07, |
| "loss": 0.7237, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.16043030505888672, |
| "grad_norm": 4.679711818695068, |
| "learning_rate": 8.837777158683076e-07, |
| "loss": 0.7296, |
| "step": 10305 |
| }, |
| { |
| "epoch": 0.1605081460608561, |
| "grad_norm": 3.7384095191955566, |
| "learning_rate": 8.836957768637027e-07, |
| "loss": 0.9011, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.16058598706282548, |
| "grad_norm": 4.7621541023254395, |
| "learning_rate": 8.836138378590976e-07, |
| "loss": 0.9026, |
| "step": 10315 |
| }, |
| { |
| "epoch": 0.16066382806479484, |
| "grad_norm": 6.402016639709473, |
| "learning_rate": 8.835318988544926e-07, |
| "loss": 0.8081, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.16074166906676424, |
| "grad_norm": 4.472290515899658, |
| "learning_rate": 8.834499598498877e-07, |
| "loss": 0.7606, |
| "step": 10325 |
| }, |
| { |
| "epoch": 0.1608195100687336, |
| "grad_norm": 5.197412967681885, |
| "learning_rate": 8.833680208452827e-07, |
| "loss": 0.7824, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.160897351070703, |
| "grad_norm": 3.40364408493042, |
| "learning_rate": 8.832860818406778e-07, |
| "loss": 0.7884, |
| "step": 10335 |
| }, |
| { |
| "epoch": 0.16097519207267236, |
| "grad_norm": 6.786139965057373, |
| "learning_rate": 8.832041428360728e-07, |
| "loss": 0.8743, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.16105303307464175, |
| "grad_norm": 4.048831462860107, |
| "learning_rate": 8.831222038314678e-07, |
| "loss": 0.8958, |
| "step": 10345 |
| }, |
| { |
| "epoch": 0.1611308740766111, |
| "grad_norm": 4.102180480957031, |
| "learning_rate": 8.830402648268629e-07, |
| "loss": 0.7835, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.16120871507858048, |
| "grad_norm": 3.135587453842163, |
| "learning_rate": 8.829583258222578e-07, |
| "loss": 0.6362, |
| "step": 10355 |
| }, |
| { |
| "epoch": 0.16128655608054987, |
| "grad_norm": 3.9217071533203125, |
| "learning_rate": 8.828763868176528e-07, |
| "loss": 0.7069, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.16136439708251923, |
| "grad_norm": 3.276561737060547, |
| "learning_rate": 8.827944478130479e-07, |
| "loss": 0.8588, |
| "step": 10365 |
| }, |
| { |
| "epoch": 0.16144223808448863, |
| "grad_norm": 3.9652113914489746, |
| "learning_rate": 8.82712508808443e-07, |
| "loss": 0.8276, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.161520079086458, |
| "grad_norm": 5.678191184997559, |
| "learning_rate": 8.82630569803838e-07, |
| "loss": 0.7526, |
| "step": 10375 |
| }, |
| { |
| "epoch": 0.16159792008842738, |
| "grad_norm": 5.475320816040039, |
| "learning_rate": 8.82548630799233e-07, |
| "loss": 0.8913, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.16167576109039675, |
| "grad_norm": 15.008077621459961, |
| "learning_rate": 8.824666917946281e-07, |
| "loss": 0.7912, |
| "step": 10385 |
| }, |
| { |
| "epoch": 0.16175360209236614, |
| "grad_norm": 3.8481225967407227, |
| "learning_rate": 8.823847527900232e-07, |
| "loss": 0.7949, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.1618314430943355, |
| "grad_norm": 2.759436845779419, |
| "learning_rate": 8.82302813785418e-07, |
| "loss": 0.7449, |
| "step": 10395 |
| }, |
| { |
| "epoch": 0.1619092840963049, |
| "grad_norm": 8.001230239868164, |
| "learning_rate": 8.822208747808131e-07, |
| "loss": 0.853, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.16198712509827426, |
| "grad_norm": 5.410604953765869, |
| "learning_rate": 8.821389357762082e-07, |
| "loss": 0.8032, |
| "step": 10405 |
| }, |
| { |
| "epoch": 0.16206496610024365, |
| "grad_norm": 17.351768493652344, |
| "learning_rate": 8.820569967716031e-07, |
| "loss": 0.7418, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.16214280710221302, |
| "grad_norm": 3.534604549407959, |
| "learning_rate": 8.819750577669982e-07, |
| "loss": 0.7653, |
| "step": 10415 |
| }, |
| { |
| "epoch": 0.1622206481041824, |
| "grad_norm": 8.916220664978027, |
| "learning_rate": 8.818931187623933e-07, |
| "loss": 0.837, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.16229848910615177, |
| "grad_norm": 4.538114547729492, |
| "learning_rate": 8.818111797577883e-07, |
| "loss": 0.7677, |
| "step": 10425 |
| }, |
| { |
| "epoch": 0.16237633010812116, |
| "grad_norm": 3.6164751052856445, |
| "learning_rate": 8.817292407531833e-07, |
| "loss": 0.7675, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.16245417111009053, |
| "grad_norm": 4.763157844543457, |
| "learning_rate": 8.816473017485783e-07, |
| "loss": 0.7237, |
| "step": 10435 |
| }, |
| { |
| "epoch": 0.1625320121120599, |
| "grad_norm": 4.728906631469727, |
| "learning_rate": 8.815653627439733e-07, |
| "loss": 0.8593, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.16260985311402928, |
| "grad_norm": 3.878848075866699, |
| "learning_rate": 8.814834237393683e-07, |
| "loss": 0.8701, |
| "step": 10445 |
| }, |
| { |
| "epoch": 0.16268769411599865, |
| "grad_norm": 5.231772422790527, |
| "learning_rate": 8.814014847347634e-07, |
| "loss": 0.8108, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.16276553511796804, |
| "grad_norm": 9.139507293701172, |
| "learning_rate": 8.813195457301584e-07, |
| "loss": 0.7747, |
| "step": 10455 |
| }, |
| { |
| "epoch": 0.1628433761199374, |
| "grad_norm": 4.208261013031006, |
| "learning_rate": 8.812376067255535e-07, |
| "loss": 0.7136, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.1629212171219068, |
| "grad_norm": 4.353900909423828, |
| "learning_rate": 8.811556677209485e-07, |
| "loss": 0.7753, |
| "step": 10465 |
| }, |
| { |
| "epoch": 0.16299905812387616, |
| "grad_norm": 2.729976177215576, |
| "learning_rate": 8.810737287163435e-07, |
| "loss": 0.7926, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.16307689912584555, |
| "grad_norm": 2.928906202316284, |
| "learning_rate": 8.809917897117385e-07, |
| "loss": 0.7747, |
| "step": 10475 |
| }, |
| { |
| "epoch": 0.16315474012781492, |
| "grad_norm": 4.206002712249756, |
| "learning_rate": 8.809098507071335e-07, |
| "loss": 0.8576, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.1632325811297843, |
| "grad_norm": 7.724112510681152, |
| "learning_rate": 8.808279117025286e-07, |
| "loss": 0.8393, |
| "step": 10485 |
| }, |
| { |
| "epoch": 0.16331042213175367, |
| "grad_norm": 3.295132637023926, |
| "learning_rate": 8.807459726979236e-07, |
| "loss": 0.778, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.16338826313372307, |
| "grad_norm": 3.3869988918304443, |
| "learning_rate": 8.806640336933187e-07, |
| "loss": 0.7569, |
| "step": 10495 |
| }, |
| { |
| "epoch": 0.16346610413569243, |
| "grad_norm": 3.718599319458008, |
| "learning_rate": 8.805820946887137e-07, |
| "loss": 0.8747, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.16354394513766182, |
| "grad_norm": 3.8476598262786865, |
| "learning_rate": 8.805001556841087e-07, |
| "loss": 0.6924, |
| "step": 10505 |
| }, |
| { |
| "epoch": 0.1636217861396312, |
| "grad_norm": 3.9861888885498047, |
| "learning_rate": 8.804182166795038e-07, |
| "loss": 0.8145, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.16369962714160058, |
| "grad_norm": 3.1306610107421875, |
| "learning_rate": 8.803362776748989e-07, |
| "loss": 0.6579, |
| "step": 10515 |
| }, |
| { |
| "epoch": 0.16377746814356994, |
| "grad_norm": 4.904189586639404, |
| "learning_rate": 8.802543386702937e-07, |
| "loss": 0.8639, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.16385530914553934, |
| "grad_norm": 5.17719841003418, |
| "learning_rate": 8.801723996656888e-07, |
| "loss": 0.8722, |
| "step": 10525 |
| }, |
| { |
| "epoch": 0.1639331501475087, |
| "grad_norm": 2.9521892070770264, |
| "learning_rate": 8.800904606610839e-07, |
| "loss": 0.7665, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.16401099114947806, |
| "grad_norm": 3.255908966064453, |
| "learning_rate": 8.800085216564788e-07, |
| "loss": 0.7702, |
| "step": 10535 |
| }, |
| { |
| "epoch": 0.16408883215144746, |
| "grad_norm": 6.015521049499512, |
| "learning_rate": 8.799265826518739e-07, |
| "loss": 0.7845, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.16416667315341682, |
| "grad_norm": 4.091128826141357, |
| "learning_rate": 8.79844643647269e-07, |
| "loss": 0.9369, |
| "step": 10545 |
| }, |
| { |
| "epoch": 0.1642445141553862, |
| "grad_norm": 4.375763416290283, |
| "learning_rate": 8.79762704642664e-07, |
| "loss": 0.8318, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.16432235515735558, |
| "grad_norm": 6.054563522338867, |
| "learning_rate": 8.79680765638059e-07, |
| "loss": 0.8422, |
| "step": 10555 |
| }, |
| { |
| "epoch": 0.16440019615932497, |
| "grad_norm": 4.5666303634643555, |
| "learning_rate": 8.79598826633454e-07, |
| "loss": 0.8438, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.16447803716129433, |
| "grad_norm": 3.462005615234375, |
| "learning_rate": 8.79516887628849e-07, |
| "loss": 0.7772, |
| "step": 10565 |
| }, |
| { |
| "epoch": 0.16455587816326372, |
| "grad_norm": 3.66213321685791, |
| "learning_rate": 8.79434948624244e-07, |
| "loss": 0.9158, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.1646337191652331, |
| "grad_norm": 3.454305648803711, |
| "learning_rate": 8.793530096196391e-07, |
| "loss": 0.7452, |
| "step": 10575 |
| }, |
| { |
| "epoch": 0.16471156016720248, |
| "grad_norm": 4.568371295928955, |
| "learning_rate": 8.792710706150341e-07, |
| "loss": 0.855, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.16478940116917185, |
| "grad_norm": 5.541522979736328, |
| "learning_rate": 8.791891316104292e-07, |
| "loss": 0.9156, |
| "step": 10585 |
| }, |
| { |
| "epoch": 0.16486724217114124, |
| "grad_norm": 4.69566011428833, |
| "learning_rate": 8.791071926058242e-07, |
| "loss": 0.7321, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.1649450831731106, |
| "grad_norm": 7.592962741851807, |
| "learning_rate": 8.790252536012192e-07, |
| "loss": 0.7888, |
| "step": 10595 |
| }, |
| { |
| "epoch": 0.16502292417508, |
| "grad_norm": 5.3040080070495605, |
| "learning_rate": 8.789433145966142e-07, |
| "loss": 0.7643, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.16510076517704936, |
| "grad_norm": 2.616908073425293, |
| "learning_rate": 8.788613755920093e-07, |
| "loss": 0.7614, |
| "step": 10605 |
| }, |
| { |
| "epoch": 0.16517860617901875, |
| "grad_norm": 3.683166980743408, |
| "learning_rate": 8.787794365874043e-07, |
| "loss": 0.715, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.16525644718098811, |
| "grad_norm": 4.2956132888793945, |
| "learning_rate": 8.786974975827993e-07, |
| "loss": 0.9153, |
| "step": 10615 |
| }, |
| { |
| "epoch": 0.16533428818295748, |
| "grad_norm": 3.76724910736084, |
| "learning_rate": 8.786155585781944e-07, |
| "loss": 0.86, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.16541212918492687, |
| "grad_norm": 3.4380204677581787, |
| "learning_rate": 8.785336195735894e-07, |
| "loss": 0.8662, |
| "step": 10625 |
| }, |
| { |
| "epoch": 0.16548997018689623, |
| "grad_norm": 8.281620025634766, |
| "learning_rate": 8.784516805689844e-07, |
| "loss": 0.8607, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.16556781118886563, |
| "grad_norm": 7.735804557800293, |
| "learning_rate": 8.783697415643795e-07, |
| "loss": 0.7513, |
| "step": 10635 |
| }, |
| { |
| "epoch": 0.165645652190835, |
| "grad_norm": 6.491322040557861, |
| "learning_rate": 8.782878025597745e-07, |
| "loss": 0.824, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.16572349319280438, |
| "grad_norm": 2.7615108489990234, |
| "learning_rate": 8.782058635551694e-07, |
| "loss": 0.9231, |
| "step": 10645 |
| }, |
| { |
| "epoch": 0.16580133419477375, |
| "grad_norm": 5.219547748565674, |
| "learning_rate": 8.781239245505645e-07, |
| "loss": 0.7178, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.16587917519674314, |
| "grad_norm": 8.346015930175781, |
| "learning_rate": 8.780419855459596e-07, |
| "loss": 0.8779, |
| "step": 10655 |
| }, |
| { |
| "epoch": 0.1659570161987125, |
| "grad_norm": 9.989750862121582, |
| "learning_rate": 8.779600465413545e-07, |
| "loss": 0.7609, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.1660348572006819, |
| "grad_norm": 6.761223793029785, |
| "learning_rate": 8.778781075367496e-07, |
| "loss": 0.9394, |
| "step": 10665 |
| }, |
| { |
| "epoch": 0.16611269820265126, |
| "grad_norm": 3.055026054382324, |
| "learning_rate": 8.777961685321447e-07, |
| "loss": 0.8548, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.16619053920462065, |
| "grad_norm": 7.2378950119018555, |
| "learning_rate": 8.777142295275397e-07, |
| "loss": 0.8538, |
| "step": 10675 |
| }, |
| { |
| "epoch": 0.16626838020659002, |
| "grad_norm": 11.686674118041992, |
| "learning_rate": 8.776322905229346e-07, |
| "loss": 0.7245, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.1663462212085594, |
| "grad_norm": 3.3093395233154297, |
| "learning_rate": 8.775503515183297e-07, |
| "loss": 0.7876, |
| "step": 10685 |
| }, |
| { |
| "epoch": 0.16642406221052877, |
| "grad_norm": 5.579384803771973, |
| "learning_rate": 8.774684125137247e-07, |
| "loss": 0.8294, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.16650190321249816, |
| "grad_norm": 3.092195749282837, |
| "learning_rate": 8.773864735091198e-07, |
| "loss": 0.8116, |
| "step": 10695 |
| }, |
| { |
| "epoch": 0.16657974421446753, |
| "grad_norm": 5.8653130531311035, |
| "learning_rate": 8.773045345045148e-07, |
| "loss": 0.7827, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.1666575852164369, |
| "grad_norm": 8.496586799621582, |
| "learning_rate": 8.772225954999098e-07, |
| "loss": 0.871, |
| "step": 10705 |
| }, |
| { |
| "epoch": 0.16673542621840629, |
| "grad_norm": 3.554830312728882, |
| "learning_rate": 8.771406564953049e-07, |
| "loss": 0.7876, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.16681326722037565, |
| "grad_norm": 4.988160610198975, |
| "learning_rate": 8.770587174907e-07, |
| "loss": 0.8173, |
| "step": 10715 |
| }, |
| { |
| "epoch": 0.16689110822234504, |
| "grad_norm": 4.899982929229736, |
| "learning_rate": 8.769767784860948e-07, |
| "loss": 0.7582, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.1669689492243144, |
| "grad_norm": 3.1257996559143066, |
| "learning_rate": 8.768948394814899e-07, |
| "loss": 0.7396, |
| "step": 10725 |
| }, |
| { |
| "epoch": 0.1670467902262838, |
| "grad_norm": 7.962161064147949, |
| "learning_rate": 8.76812900476885e-07, |
| "loss": 0.8768, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.16712463122825316, |
| "grad_norm": 9.329161643981934, |
| "learning_rate": 8.7673096147228e-07, |
| "loss": 0.7469, |
| "step": 10735 |
| }, |
| { |
| "epoch": 0.16720247223022255, |
| "grad_norm": 2.5543742179870605, |
| "learning_rate": 8.76649022467675e-07, |
| "loss": 0.8198, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.16728031323219192, |
| "grad_norm": 3.192782402038574, |
| "learning_rate": 8.765670834630701e-07, |
| "loss": 0.7747, |
| "step": 10745 |
| }, |
| { |
| "epoch": 0.1673581542341613, |
| "grad_norm": 3.3908746242523193, |
| "learning_rate": 8.764851444584652e-07, |
| "loss": 0.8149, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.16743599523613067, |
| "grad_norm": 3.8879611492156982, |
| "learning_rate": 8.764032054538601e-07, |
| "loss": 0.774, |
| "step": 10755 |
| }, |
| { |
| "epoch": 0.16751383623810007, |
| "grad_norm": 5.076687335968018, |
| "learning_rate": 8.763212664492551e-07, |
| "loss": 0.799, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.16759167724006943, |
| "grad_norm": 5.339931964874268, |
| "learning_rate": 8.762393274446502e-07, |
| "loss": 0.756, |
| "step": 10765 |
| }, |
| { |
| "epoch": 0.16766951824203882, |
| "grad_norm": 4.076517581939697, |
| "learning_rate": 8.761573884400451e-07, |
| "loss": 0.8323, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.1677473592440082, |
| "grad_norm": 4.2539777755737305, |
| "learning_rate": 8.760754494354402e-07, |
| "loss": 0.6913, |
| "step": 10775 |
| }, |
| { |
| "epoch": 0.16782520024597758, |
| "grad_norm": 3.196856737136841, |
| "learning_rate": 8.759935104308353e-07, |
| "loss": 0.7324, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.16790304124794694, |
| "grad_norm": 13.658886909484863, |
| "learning_rate": 8.759115714262303e-07, |
| "loss": 0.7811, |
| "step": 10785 |
| }, |
| { |
| "epoch": 0.1679808822499163, |
| "grad_norm": 3.423370838165283, |
| "learning_rate": 8.758296324216253e-07, |
| "loss": 0.7902, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.1680587232518857, |
| "grad_norm": 4.14307165145874, |
| "learning_rate": 8.757476934170204e-07, |
| "loss": 0.7751, |
| "step": 10795 |
| }, |
| { |
| "epoch": 0.16813656425385506, |
| "grad_norm": 3.6597414016723633, |
| "learning_rate": 8.756657544124153e-07, |
| "loss": 0.7519, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.16821440525582446, |
| "grad_norm": 3.839320182800293, |
| "learning_rate": 8.755838154078103e-07, |
| "loss": 0.6788, |
| "step": 10805 |
| }, |
| { |
| "epoch": 0.16829224625779382, |
| "grad_norm": 5.377815246582031, |
| "learning_rate": 8.755018764032054e-07, |
| "loss": 0.8277, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.1683700872597632, |
| "grad_norm": 2.5293519496917725, |
| "learning_rate": 8.754199373986004e-07, |
| "loss": 0.7754, |
| "step": 10815 |
| }, |
| { |
| "epoch": 0.16844792826173258, |
| "grad_norm": 3.5276260375976562, |
| "learning_rate": 8.753379983939955e-07, |
| "loss": 0.6878, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.16852576926370197, |
| "grad_norm": 4.435384273529053, |
| "learning_rate": 8.752560593893905e-07, |
| "loss": 0.7848, |
| "step": 10825 |
| }, |
| { |
| "epoch": 0.16860361026567133, |
| "grad_norm": 4.338689804077148, |
| "learning_rate": 8.751741203847855e-07, |
| "loss": 0.7934, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.16868145126764073, |
| "grad_norm": 5.802286148071289, |
| "learning_rate": 8.750921813801806e-07, |
| "loss": 0.6343, |
| "step": 10835 |
| }, |
| { |
| "epoch": 0.1687592922696101, |
| "grad_norm": 5.789932727813721, |
| "learning_rate": 8.750102423755757e-07, |
| "loss": 0.8002, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.16883713327157948, |
| "grad_norm": 3.8039424419403076, |
| "learning_rate": 8.749283033709705e-07, |
| "loss": 0.7334, |
| "step": 10845 |
| }, |
| { |
| "epoch": 0.16891497427354885, |
| "grad_norm": 3.930701971054077, |
| "learning_rate": 8.748463643663656e-07, |
| "loss": 0.8077, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.16899281527551824, |
| "grad_norm": 15.397842407226562, |
| "learning_rate": 8.747644253617607e-07, |
| "loss": 0.9553, |
| "step": 10855 |
| }, |
| { |
| "epoch": 0.1690706562774876, |
| "grad_norm": 7.080071926116943, |
| "learning_rate": 8.746824863571557e-07, |
| "loss": 0.7472, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.169148497279457, |
| "grad_norm": 3.849839687347412, |
| "learning_rate": 8.746005473525507e-07, |
| "loss": 0.7191, |
| "step": 10865 |
| }, |
| { |
| "epoch": 0.16922633828142636, |
| "grad_norm": 3.95355486869812, |
| "learning_rate": 8.745186083479458e-07, |
| "loss": 0.661, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.16930417928339572, |
| "grad_norm": 2.4966964721679688, |
| "learning_rate": 8.744366693433409e-07, |
| "loss": 0.8823, |
| "step": 10875 |
| }, |
| { |
| "epoch": 0.16938202028536511, |
| "grad_norm": 6.468384742736816, |
| "learning_rate": 8.743547303387358e-07, |
| "loss": 0.7943, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.16945986128733448, |
| "grad_norm": 4.023189544677734, |
| "learning_rate": 8.742727913341308e-07, |
| "loss": 0.7107, |
| "step": 10885 |
| }, |
| { |
| "epoch": 0.16953770228930387, |
| "grad_norm": 3.180260419845581, |
| "learning_rate": 8.741908523295259e-07, |
| "loss": 0.7373, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.16961554329127324, |
| "grad_norm": 4.347747802734375, |
| "learning_rate": 8.741089133249208e-07, |
| "loss": 0.7309, |
| "step": 10895 |
| }, |
| { |
| "epoch": 0.16969338429324263, |
| "grad_norm": 6.062042713165283, |
| "learning_rate": 8.740269743203159e-07, |
| "loss": 0.7572, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.169771225295212, |
| "grad_norm": 2.9714925289154053, |
| "learning_rate": 8.73945035315711e-07, |
| "loss": 0.7924, |
| "step": 10905 |
| }, |
| { |
| "epoch": 0.16984906629718138, |
| "grad_norm": 3.827422618865967, |
| "learning_rate": 8.73863096311106e-07, |
| "loss": 0.912, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.16992690729915075, |
| "grad_norm": 5.133169174194336, |
| "learning_rate": 8.73781157306501e-07, |
| "loss": 0.7078, |
| "step": 10915 |
| }, |
| { |
| "epoch": 0.17000474830112014, |
| "grad_norm": 2.856977939605713, |
| "learning_rate": 8.736992183018961e-07, |
| "loss": 0.8076, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.1700825893030895, |
| "grad_norm": 4.684539318084717, |
| "learning_rate": 8.73617279297291e-07, |
| "loss": 0.7045, |
| "step": 10925 |
| }, |
| { |
| "epoch": 0.1701604303050589, |
| "grad_norm": 4.420496940612793, |
| "learning_rate": 8.73535340292686e-07, |
| "loss": 0.8248, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.17023827130702826, |
| "grad_norm": 3.429518461227417, |
| "learning_rate": 8.734534012880811e-07, |
| "loss": 0.7643, |
| "step": 10935 |
| }, |
| { |
| "epoch": 0.17031611230899765, |
| "grad_norm": 3.1888604164123535, |
| "learning_rate": 8.733714622834761e-07, |
| "loss": 0.7827, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.17039395331096702, |
| "grad_norm": 3.491792917251587, |
| "learning_rate": 8.732895232788712e-07, |
| "loss": 0.8594, |
| "step": 10945 |
| }, |
| { |
| "epoch": 0.1704717943129364, |
| "grad_norm": 4.644289493560791, |
| "learning_rate": 8.732075842742662e-07, |
| "loss": 0.7508, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.17054963531490577, |
| "grad_norm": 3.222562313079834, |
| "learning_rate": 8.731256452696612e-07, |
| "loss": 0.7755, |
| "step": 10955 |
| }, |
| { |
| "epoch": 0.17062747631687514, |
| "grad_norm": 7.415604591369629, |
| "learning_rate": 8.730437062650563e-07, |
| "loss": 0.677, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.17070531731884453, |
| "grad_norm": 3.6360597610473633, |
| "learning_rate": 8.729617672604513e-07, |
| "loss": 0.8338, |
| "step": 10965 |
| }, |
| { |
| "epoch": 0.1707831583208139, |
| "grad_norm": 9.364253044128418, |
| "learning_rate": 8.728798282558462e-07, |
| "loss": 0.7644, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.17086099932278329, |
| "grad_norm": 3.551168441772461, |
| "learning_rate": 8.727978892512413e-07, |
| "loss": 0.7808, |
| "step": 10975 |
| }, |
| { |
| "epoch": 0.17093884032475265, |
| "grad_norm": 6.72664213180542, |
| "learning_rate": 8.727159502466364e-07, |
| "loss": 0.7789, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.17101668132672204, |
| "grad_norm": 3.8760182857513428, |
| "learning_rate": 8.726340112420314e-07, |
| "loss": 0.7943, |
| "step": 10985 |
| }, |
| { |
| "epoch": 0.1710945223286914, |
| "grad_norm": 4.414963722229004, |
| "learning_rate": 8.725520722374264e-07, |
| "loss": 0.753, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.1711723633306608, |
| "grad_norm": 8.448708534240723, |
| "learning_rate": 8.724701332328215e-07, |
| "loss": 0.704, |
| "step": 10995 |
| }, |
| { |
| "epoch": 0.17125020433263016, |
| "grad_norm": 3.2329261302948, |
| "learning_rate": 8.723881942282166e-07, |
| "loss": 0.7565, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.17132804533459955, |
| "grad_norm": 7.1403303146362305, |
| "learning_rate": 8.723062552236114e-07, |
| "loss": 0.7924, |
| "step": 11005 |
| }, |
| { |
| "epoch": 0.17140588633656892, |
| "grad_norm": 2.50464129447937, |
| "learning_rate": 8.722243162190065e-07, |
| "loss": 0.8038, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.1714837273385383, |
| "grad_norm": 3.236157178878784, |
| "learning_rate": 8.721423772144016e-07, |
| "loss": 0.7118, |
| "step": 11015 |
| }, |
| { |
| "epoch": 0.17156156834050768, |
| "grad_norm": 4.607430934906006, |
| "learning_rate": 8.720604382097966e-07, |
| "loss": 0.8654, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.17163940934247707, |
| "grad_norm": 3.002549171447754, |
| "learning_rate": 8.719784992051916e-07, |
| "loss": 0.7738, |
| "step": 11025 |
| }, |
| { |
| "epoch": 0.17171725034444643, |
| "grad_norm": 3.545802116394043, |
| "learning_rate": 8.718965602005867e-07, |
| "loss": 0.7659, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.17179509134641582, |
| "grad_norm": 4.228000640869141, |
| "learning_rate": 8.718146211959817e-07, |
| "loss": 0.89, |
| "step": 11035 |
| }, |
| { |
| "epoch": 0.1718729323483852, |
| "grad_norm": 10.417171478271484, |
| "learning_rate": 8.717326821913767e-07, |
| "loss": 0.7852, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.17195077335035455, |
| "grad_norm": 5.213564872741699, |
| "learning_rate": 8.716507431867717e-07, |
| "loss": 0.8346, |
| "step": 11045 |
| }, |
| { |
| "epoch": 0.17202861435232394, |
| "grad_norm": 3.869044780731201, |
| "learning_rate": 8.715688041821667e-07, |
| "loss": 0.7275, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.1721064553542933, |
| "grad_norm": 3.7918996810913086, |
| "learning_rate": 8.714868651775618e-07, |
| "loss": 0.9288, |
| "step": 11055 |
| }, |
| { |
| "epoch": 0.1721842963562627, |
| "grad_norm": 3.620591640472412, |
| "learning_rate": 8.714049261729568e-07, |
| "loss": 0.8667, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.17226213735823206, |
| "grad_norm": 3.3272628784179688, |
| "learning_rate": 8.713229871683518e-07, |
| "loss": 0.7839, |
| "step": 11065 |
| }, |
| { |
| "epoch": 0.17233997836020146, |
| "grad_norm": 3.3981659412384033, |
| "learning_rate": 8.712410481637469e-07, |
| "loss": 0.8703, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.17241781936217082, |
| "grad_norm": 2.7353670597076416, |
| "learning_rate": 8.71159109159142e-07, |
| "loss": 0.8323, |
| "step": 11075 |
| }, |
| { |
| "epoch": 0.1724956603641402, |
| "grad_norm": 3.0876946449279785, |
| "learning_rate": 8.710771701545369e-07, |
| "loss": 0.7806, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.17257350136610958, |
| "grad_norm": 4.163149833679199, |
| "learning_rate": 8.709952311499319e-07, |
| "loss": 0.738, |
| "step": 11085 |
| }, |
| { |
| "epoch": 0.17265134236807897, |
| "grad_norm": 3.454596519470215, |
| "learning_rate": 8.70913292145327e-07, |
| "loss": 0.7692, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.17272918337004833, |
| "grad_norm": 5.42886209487915, |
| "learning_rate": 8.708313531407219e-07, |
| "loss": 0.8061, |
| "step": 11095 |
| }, |
| { |
| "epoch": 0.17280702437201773, |
| "grad_norm": 6.216919898986816, |
| "learning_rate": 8.70749414136117e-07, |
| "loss": 0.7982, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.1728848653739871, |
| "grad_norm": 3.19822359085083, |
| "learning_rate": 8.706674751315121e-07, |
| "loss": 0.7365, |
| "step": 11105 |
| }, |
| { |
| "epoch": 0.17296270637595648, |
| "grad_norm": 6.890682220458984, |
| "learning_rate": 8.705855361269072e-07, |
| "loss": 0.8121, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.17304054737792585, |
| "grad_norm": 3.8538219928741455, |
| "learning_rate": 8.705035971223021e-07, |
| "loss": 0.8005, |
| "step": 11115 |
| }, |
| { |
| "epoch": 0.17311838837989524, |
| "grad_norm": 4.351493835449219, |
| "learning_rate": 8.704216581176972e-07, |
| "loss": 0.7921, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.1731962293818646, |
| "grad_norm": 4.173168182373047, |
| "learning_rate": 8.703397191130923e-07, |
| "loss": 0.7759, |
| "step": 11125 |
| }, |
| { |
| "epoch": 0.17327407038383397, |
| "grad_norm": 3.0455987453460693, |
| "learning_rate": 8.702577801084871e-07, |
| "loss": 0.7061, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.17335191138580336, |
| "grad_norm": 8.838937759399414, |
| "learning_rate": 8.701758411038822e-07, |
| "loss": 0.6888, |
| "step": 11135 |
| }, |
| { |
| "epoch": 0.17342975238777272, |
| "grad_norm": 10.514293670654297, |
| "learning_rate": 8.700939020992773e-07, |
| "loss": 0.8403, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.17350759338974212, |
| "grad_norm": 2.9291927814483643, |
| "learning_rate": 8.700119630946723e-07, |
| "loss": 0.8609, |
| "step": 11145 |
| }, |
| { |
| "epoch": 0.17358543439171148, |
| "grad_norm": 4.258464336395264, |
| "learning_rate": 8.699300240900673e-07, |
| "loss": 0.8436, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.17366327539368087, |
| "grad_norm": 4.499458312988281, |
| "learning_rate": 8.698480850854624e-07, |
| "loss": 0.883, |
| "step": 11155 |
| }, |
| { |
| "epoch": 0.17374111639565024, |
| "grad_norm": 4.2294745445251465, |
| "learning_rate": 8.697661460808574e-07, |
| "loss": 0.711, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.17381895739761963, |
| "grad_norm": 2.956054925918579, |
| "learning_rate": 8.696842070762525e-07, |
| "loss": 0.8658, |
| "step": 11165 |
| }, |
| { |
| "epoch": 0.173896798399589, |
| "grad_norm": 6.596224308013916, |
| "learning_rate": 8.696022680716474e-07, |
| "loss": 0.8289, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.17397463940155838, |
| "grad_norm": 4.582150936126709, |
| "learning_rate": 8.695203290670424e-07, |
| "loss": 0.7524, |
| "step": 11175 |
| }, |
| { |
| "epoch": 0.17405248040352775, |
| "grad_norm": 2.815945625305176, |
| "learning_rate": 8.694383900624375e-07, |
| "loss": 0.8105, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.17413032140549714, |
| "grad_norm": 7.399906635284424, |
| "learning_rate": 8.693564510578325e-07, |
| "loss": 0.628, |
| "step": 11185 |
| }, |
| { |
| "epoch": 0.1742081624074665, |
| "grad_norm": 2.3162524700164795, |
| "learning_rate": 8.692745120532275e-07, |
| "loss": 0.6802, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.1742860034094359, |
| "grad_norm": 4.691956043243408, |
| "learning_rate": 8.691925730486226e-07, |
| "loss": 0.9559, |
| "step": 11195 |
| }, |
| { |
| "epoch": 0.17436384441140526, |
| "grad_norm": 5.132546901702881, |
| "learning_rate": 8.691106340440177e-07, |
| "loss": 0.8418, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.17444168541337465, |
| "grad_norm": 6.407838344573975, |
| "learning_rate": 8.690286950394126e-07, |
| "loss": 0.9149, |
| "step": 11205 |
| }, |
| { |
| "epoch": 0.17451952641534402, |
| "grad_norm": 2.8634774684906006, |
| "learning_rate": 8.689467560348076e-07, |
| "loss": 0.8361, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.1745973674173134, |
| "grad_norm": 5.304955959320068, |
| "learning_rate": 8.688648170302027e-07, |
| "loss": 0.7911, |
| "step": 11215 |
| }, |
| { |
| "epoch": 0.17467520841928277, |
| "grad_norm": 6.025475978851318, |
| "learning_rate": 8.687828780255976e-07, |
| "loss": 0.7571, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.17475304942125214, |
| "grad_norm": 4.901416301727295, |
| "learning_rate": 8.687009390209927e-07, |
| "loss": 0.7321, |
| "step": 11225 |
| }, |
| { |
| "epoch": 0.17483089042322153, |
| "grad_norm": 2.951046943664551, |
| "learning_rate": 8.686190000163878e-07, |
| "loss": 0.756, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.1749087314251909, |
| "grad_norm": 3.0569169521331787, |
| "learning_rate": 8.685370610117829e-07, |
| "loss": 0.7937, |
| "step": 11235 |
| }, |
| { |
| "epoch": 0.1749865724271603, |
| "grad_norm": 3.96439528465271, |
| "learning_rate": 8.684551220071778e-07, |
| "loss": 0.7065, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.17506441342912965, |
| "grad_norm": 5.546309947967529, |
| "learning_rate": 8.683731830025729e-07, |
| "loss": 0.7787, |
| "step": 11245 |
| }, |
| { |
| "epoch": 0.17514225443109904, |
| "grad_norm": 4.913859844207764, |
| "learning_rate": 8.682912439979679e-07, |
| "loss": 0.7689, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.1752200954330684, |
| "grad_norm": 3.5462117195129395, |
| "learning_rate": 8.682093049933628e-07, |
| "loss": 0.7766, |
| "step": 11255 |
| }, |
| { |
| "epoch": 0.1752979364350378, |
| "grad_norm": 5.14613676071167, |
| "learning_rate": 8.681273659887579e-07, |
| "loss": 0.8672, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.17537577743700716, |
| "grad_norm": 3.5507776737213135, |
| "learning_rate": 8.68045426984153e-07, |
| "loss": 0.8519, |
| "step": 11265 |
| }, |
| { |
| "epoch": 0.17545361843897656, |
| "grad_norm": 5.96008825302124, |
| "learning_rate": 8.67963487979548e-07, |
| "loss": 0.8072, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.17553145944094592, |
| "grad_norm": 3.9602410793304443, |
| "learning_rate": 8.67881548974943e-07, |
| "loss": 0.7046, |
| "step": 11275 |
| }, |
| { |
| "epoch": 0.1756093004429153, |
| "grad_norm": 9.932755470275879, |
| "learning_rate": 8.677996099703381e-07, |
| "loss": 0.6901, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.17568714144488468, |
| "grad_norm": 3.0417749881744385, |
| "learning_rate": 8.677176709657331e-07, |
| "loss": 0.8085, |
| "step": 11285 |
| }, |
| { |
| "epoch": 0.17576498244685407, |
| "grad_norm": 8.406500816345215, |
| "learning_rate": 8.67635731961128e-07, |
| "loss": 0.8751, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.17584282344882343, |
| "grad_norm": 3.1526453495025635, |
| "learning_rate": 8.675537929565231e-07, |
| "loss": 0.779, |
| "step": 11295 |
| }, |
| { |
| "epoch": 0.17592066445079282, |
| "grad_norm": 8.264491081237793, |
| "learning_rate": 8.674718539519181e-07, |
| "loss": 0.6696, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.1759985054527622, |
| "grad_norm": 4.0117011070251465, |
| "learning_rate": 8.673899149473132e-07, |
| "loss": 0.7622, |
| "step": 11305 |
| }, |
| { |
| "epoch": 0.17607634645473155, |
| "grad_norm": 5.8008527755737305, |
| "learning_rate": 8.673079759427082e-07, |
| "loss": 0.9383, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.17615418745670094, |
| "grad_norm": 3.362180709838867, |
| "learning_rate": 8.672260369381032e-07, |
| "loss": 0.7315, |
| "step": 11315 |
| }, |
| { |
| "epoch": 0.1762320284586703, |
| "grad_norm": 3.2175207138061523, |
| "learning_rate": 8.671440979334983e-07, |
| "loss": 0.7064, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.1763098694606397, |
| "grad_norm": 3.180907964706421, |
| "learning_rate": 8.670621589288934e-07, |
| "loss": 0.7622, |
| "step": 11325 |
| }, |
| { |
| "epoch": 0.17638771046260907, |
| "grad_norm": 4.250461578369141, |
| "learning_rate": 8.669802199242882e-07, |
| "loss": 0.9496, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.17646555146457846, |
| "grad_norm": 3.719573497772217, |
| "learning_rate": 8.668982809196833e-07, |
| "loss": 0.7536, |
| "step": 11335 |
| }, |
| { |
| "epoch": 0.17654339246654782, |
| "grad_norm": 4.155417442321777, |
| "learning_rate": 8.668163419150784e-07, |
| "loss": 0.7122, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.1766212334685172, |
| "grad_norm": 5.191723346710205, |
| "learning_rate": 8.667344029104733e-07, |
| "loss": 0.6908, |
| "step": 11345 |
| }, |
| { |
| "epoch": 0.17669907447048658, |
| "grad_norm": 6.202426910400391, |
| "learning_rate": 8.666524639058684e-07, |
| "loss": 0.8704, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.17677691547245597, |
| "grad_norm": 3.2510268688201904, |
| "learning_rate": 8.665705249012635e-07, |
| "loss": 0.702, |
| "step": 11355 |
| }, |
| { |
| "epoch": 0.17685475647442533, |
| "grad_norm": 7.769083023071289, |
| "learning_rate": 8.664885858966586e-07, |
| "loss": 0.8048, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.17693259747639473, |
| "grad_norm": 3.8902037143707275, |
| "learning_rate": 8.664066468920535e-07, |
| "loss": 0.7719, |
| "step": 11365 |
| }, |
| { |
| "epoch": 0.1770104384783641, |
| "grad_norm": 4.6013407707214355, |
| "learning_rate": 8.663247078874485e-07, |
| "loss": 0.7691, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.17708827948033348, |
| "grad_norm": 7.063052654266357, |
| "learning_rate": 8.662427688828436e-07, |
| "loss": 0.8091, |
| "step": 11375 |
| }, |
| { |
| "epoch": 0.17716612048230285, |
| "grad_norm": 4.544634819030762, |
| "learning_rate": 8.661608298782386e-07, |
| "loss": 0.9371, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.17724396148427224, |
| "grad_norm": 4.195473670959473, |
| "learning_rate": 8.660788908736336e-07, |
| "loss": 0.7634, |
| "step": 11385 |
| }, |
| { |
| "epoch": 0.1773218024862416, |
| "grad_norm": 18.608367919921875, |
| "learning_rate": 8.659969518690287e-07, |
| "loss": 0.7902, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.17739964348821097, |
| "grad_norm": 3.536041498184204, |
| "learning_rate": 8.659150128644237e-07, |
| "loss": 0.7095, |
| "step": 11395 |
| }, |
| { |
| "epoch": 0.17747748449018036, |
| "grad_norm": 3.1353678703308105, |
| "learning_rate": 8.658330738598187e-07, |
| "loss": 0.688, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.17755532549214972, |
| "grad_norm": 2.603710889816284, |
| "learning_rate": 8.657511348552138e-07, |
| "loss": 0.8218, |
| "step": 11405 |
| }, |
| { |
| "epoch": 0.17763316649411912, |
| "grad_norm": 3.3999760150909424, |
| "learning_rate": 8.656691958506087e-07, |
| "loss": 0.7279, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.17771100749608848, |
| "grad_norm": 3.0341594219207764, |
| "learning_rate": 8.655872568460038e-07, |
| "loss": 0.754, |
| "step": 11415 |
| }, |
| { |
| "epoch": 0.17778884849805787, |
| "grad_norm": 5.157776355743408, |
| "learning_rate": 8.655053178413988e-07, |
| "loss": 0.8347, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.17786668950002724, |
| "grad_norm": 5.881651878356934, |
| "learning_rate": 8.654233788367938e-07, |
| "loss": 0.868, |
| "step": 11425 |
| }, |
| { |
| "epoch": 0.17794453050199663, |
| "grad_norm": 7.980086326599121, |
| "learning_rate": 8.653414398321889e-07, |
| "loss": 0.8467, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.178022371503966, |
| "grad_norm": 3.4080753326416016, |
| "learning_rate": 8.65259500827584e-07, |
| "loss": 0.641, |
| "step": 11435 |
| }, |
| { |
| "epoch": 0.17810021250593538, |
| "grad_norm": 8.840136528015137, |
| "learning_rate": 8.651775618229789e-07, |
| "loss": 0.8391, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.17817805350790475, |
| "grad_norm": 6.058139324188232, |
| "learning_rate": 8.65095622818374e-07, |
| "loss": 0.6896, |
| "step": 11445 |
| }, |
| { |
| "epoch": 0.17825589450987414, |
| "grad_norm": 3.771533250808716, |
| "learning_rate": 8.650136838137691e-07, |
| "loss": 0.7656, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.1783337355118435, |
| "grad_norm": 5.53726053237915, |
| "learning_rate": 8.649317448091639e-07, |
| "loss": 0.7161, |
| "step": 11455 |
| }, |
| { |
| "epoch": 0.1784115765138129, |
| "grad_norm": 4.151732444763184, |
| "learning_rate": 8.64849805804559e-07, |
| "loss": 0.7258, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.17848941751578226, |
| "grad_norm": 5.9917731285095215, |
| "learning_rate": 8.647678667999541e-07, |
| "loss": 0.8906, |
| "step": 11465 |
| }, |
| { |
| "epoch": 0.17856725851775165, |
| "grad_norm": 3.5444159507751465, |
| "learning_rate": 8.64685927795349e-07, |
| "loss": 0.8006, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.17864509951972102, |
| "grad_norm": 7.527516841888428, |
| "learning_rate": 8.646039887907441e-07, |
| "loss": 0.837, |
| "step": 11475 |
| }, |
| { |
| "epoch": 0.17872294052169038, |
| "grad_norm": 3.505826711654663, |
| "learning_rate": 8.645220497861392e-07, |
| "loss": 0.7962, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.17880078152365977, |
| "grad_norm": 4.634178161621094, |
| "learning_rate": 8.644401107815343e-07, |
| "loss": 0.7389, |
| "step": 11485 |
| }, |
| { |
| "epoch": 0.17887862252562914, |
| "grad_norm": 2.834869623184204, |
| "learning_rate": 8.643581717769292e-07, |
| "loss": 0.837, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.17895646352759853, |
| "grad_norm": 4.649651527404785, |
| "learning_rate": 8.642762327723242e-07, |
| "loss": 0.8676, |
| "step": 11495 |
| }, |
| { |
| "epoch": 0.1790343045295679, |
| "grad_norm": 4.7943925857543945, |
| "learning_rate": 8.641942937677193e-07, |
| "loss": 0.7031, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.1791121455315373, |
| "grad_norm": 3.4490435123443604, |
| "learning_rate": 8.641123547631143e-07, |
| "loss": 0.8332, |
| "step": 11505 |
| }, |
| { |
| "epoch": 0.17918998653350665, |
| "grad_norm": 2.6124467849731445, |
| "learning_rate": 8.640304157585093e-07, |
| "loss": 0.8277, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.17926782753547604, |
| "grad_norm": 3.379868507385254, |
| "learning_rate": 8.639484767539044e-07, |
| "loss": 0.8617, |
| "step": 11515 |
| }, |
| { |
| "epoch": 0.1793456685374454, |
| "grad_norm": 4.773552417755127, |
| "learning_rate": 8.638665377492994e-07, |
| "loss": 0.8771, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.1794235095394148, |
| "grad_norm": 8.424163818359375, |
| "learning_rate": 8.637845987446945e-07, |
| "loss": 0.8067, |
| "step": 11525 |
| }, |
| { |
| "epoch": 0.17950135054138416, |
| "grad_norm": 4.11102294921875, |
| "learning_rate": 8.637026597400895e-07, |
| "loss": 0.7825, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.17957919154335356, |
| "grad_norm": 3.056248188018799, |
| "learning_rate": 8.636207207354844e-07, |
| "loss": 0.805, |
| "step": 11535 |
| }, |
| { |
| "epoch": 0.17965703254532292, |
| "grad_norm": 6.354325294494629, |
| "learning_rate": 8.635387817308795e-07, |
| "loss": 0.697, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.1797348735472923, |
| "grad_norm": 4.208707809448242, |
| "learning_rate": 8.634568427262745e-07, |
| "loss": 0.7442, |
| "step": 11545 |
| }, |
| { |
| "epoch": 0.17981271454926168, |
| "grad_norm": 3.256444215774536, |
| "learning_rate": 8.633749037216695e-07, |
| "loss": 0.7955, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.17989055555123107, |
| "grad_norm": 2.9025378227233887, |
| "learning_rate": 8.632929647170646e-07, |
| "loss": 0.8062, |
| "step": 11555 |
| }, |
| { |
| "epoch": 0.17996839655320043, |
| "grad_norm": 5.104341506958008, |
| "learning_rate": 8.632110257124597e-07, |
| "loss": 0.9282, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.1800462375551698, |
| "grad_norm": 3.7267444133758545, |
| "learning_rate": 8.631290867078546e-07, |
| "loss": 0.7416, |
| "step": 11565 |
| }, |
| { |
| "epoch": 0.1801240785571392, |
| "grad_norm": 2.9586052894592285, |
| "learning_rate": 8.630471477032497e-07, |
| "loss": 0.7961, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.18020191955910855, |
| "grad_norm": 4.650422096252441, |
| "learning_rate": 8.629652086986447e-07, |
| "loss": 0.7148, |
| "step": 11575 |
| }, |
| { |
| "epoch": 0.18027976056107795, |
| "grad_norm": 3.344991445541382, |
| "learning_rate": 8.628832696940396e-07, |
| "loss": 0.7804, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.1803576015630473, |
| "grad_norm": 3.1043858528137207, |
| "learning_rate": 8.628013306894347e-07, |
| "loss": 0.7004, |
| "step": 11585 |
| }, |
| { |
| "epoch": 0.1804354425650167, |
| "grad_norm": 4.275179862976074, |
| "learning_rate": 8.627193916848298e-07, |
| "loss": 0.702, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.18051328356698607, |
| "grad_norm": 5.884884357452393, |
| "learning_rate": 8.626374526802248e-07, |
| "loss": 0.7196, |
| "step": 11595 |
| }, |
| { |
| "epoch": 0.18059112456895546, |
| "grad_norm": 5.935474395751953, |
| "learning_rate": 8.625555136756198e-07, |
| "loss": 0.8548, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.18066896557092482, |
| "grad_norm": 3.8844761848449707, |
| "learning_rate": 8.624735746710149e-07, |
| "loss": 0.7467, |
| "step": 11605 |
| }, |
| { |
| "epoch": 0.18074680657289421, |
| "grad_norm": 3.4248223304748535, |
| "learning_rate": 8.6239163566641e-07, |
| "loss": 0.8664, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.18082464757486358, |
| "grad_norm": 4.0504679679870605, |
| "learning_rate": 8.623096966618048e-07, |
| "loss": 0.8649, |
| "step": 11615 |
| }, |
| { |
| "epoch": 0.18090248857683297, |
| "grad_norm": 3.0257060527801514, |
| "learning_rate": 8.622277576571999e-07, |
| "loss": 0.7091, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.18098032957880233, |
| "grad_norm": 4.030515193939209, |
| "learning_rate": 8.62145818652595e-07, |
| "loss": 0.8485, |
| "step": 11625 |
| }, |
| { |
| "epoch": 0.18105817058077173, |
| "grad_norm": 3.375437021255493, |
| "learning_rate": 8.6206387964799e-07, |
| "loss": 0.6832, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.1811360115827411, |
| "grad_norm": 4.179788112640381, |
| "learning_rate": 8.61981940643385e-07, |
| "loss": 0.8009, |
| "step": 11635 |
| }, |
| { |
| "epoch": 0.18121385258471048, |
| "grad_norm": 5.095760822296143, |
| "learning_rate": 8.619000016387801e-07, |
| "loss": 0.7984, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.18129169358667985, |
| "grad_norm": 3.096256732940674, |
| "learning_rate": 8.618180626341751e-07, |
| "loss": 0.8468, |
| "step": 11645 |
| }, |
| { |
| "epoch": 0.1813695345886492, |
| "grad_norm": 3.6533854007720947, |
| "learning_rate": 8.617361236295702e-07, |
| "loss": 0.7336, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.1814473755906186, |
| "grad_norm": 3.505079984664917, |
| "learning_rate": 8.616541846249651e-07, |
| "loss": 0.7025, |
| "step": 11655 |
| }, |
| { |
| "epoch": 0.18152521659258797, |
| "grad_norm": 3.6542341709136963, |
| "learning_rate": 8.615722456203601e-07, |
| "loss": 0.8489, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.18160305759455736, |
| "grad_norm": 6.4186811447143555, |
| "learning_rate": 8.614903066157552e-07, |
| "loss": 0.7712, |
| "step": 11665 |
| }, |
| { |
| "epoch": 0.18168089859652672, |
| "grad_norm": 5.470929145812988, |
| "learning_rate": 8.614083676111502e-07, |
| "loss": 0.819, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.18175873959849612, |
| "grad_norm": 3.3178632259368896, |
| "learning_rate": 8.613264286065452e-07, |
| "loss": 0.813, |
| "step": 11675 |
| }, |
| { |
| "epoch": 0.18183658060046548, |
| "grad_norm": 3.1316630840301514, |
| "learning_rate": 8.612444896019403e-07, |
| "loss": 0.7939, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.18191442160243487, |
| "grad_norm": 5.289381980895996, |
| "learning_rate": 8.611625505973354e-07, |
| "loss": 0.7354, |
| "step": 11685 |
| }, |
| { |
| "epoch": 0.18199226260440424, |
| "grad_norm": 2.2420859336853027, |
| "learning_rate": 8.610806115927303e-07, |
| "loss": 0.7458, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.18207010360637363, |
| "grad_norm": 5.712198257446289, |
| "learning_rate": 8.609986725881253e-07, |
| "loss": 0.7175, |
| "step": 11695 |
| }, |
| { |
| "epoch": 0.182147944608343, |
| "grad_norm": 4.2332353591918945, |
| "learning_rate": 8.609167335835204e-07, |
| "loss": 0.7712, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.18222578561031239, |
| "grad_norm": 3.4181714057922363, |
| "learning_rate": 8.608347945789153e-07, |
| "loss": 0.7699, |
| "step": 11705 |
| }, |
| { |
| "epoch": 0.18230362661228175, |
| "grad_norm": 4.554285049438477, |
| "learning_rate": 8.607528555743104e-07, |
| "loss": 0.7718, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.18238146761425114, |
| "grad_norm": 4.693836212158203, |
| "learning_rate": 8.606709165697055e-07, |
| "loss": 0.8118, |
| "step": 11715 |
| }, |
| { |
| "epoch": 0.1824593086162205, |
| "grad_norm": 5.117660999298096, |
| "learning_rate": 8.605889775651005e-07, |
| "loss": 0.7083, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.1825371496181899, |
| "grad_norm": 7.209866046905518, |
| "learning_rate": 8.605070385604955e-07, |
| "loss": 0.832, |
| "step": 11725 |
| }, |
| { |
| "epoch": 0.18261499062015926, |
| "grad_norm": 3.6614935398101807, |
| "learning_rate": 8.604250995558906e-07, |
| "loss": 0.7817, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.18269283162212863, |
| "grad_norm": 3.863678216934204, |
| "learning_rate": 8.603431605512855e-07, |
| "loss": 0.8006, |
| "step": 11735 |
| }, |
| { |
| "epoch": 0.18277067262409802, |
| "grad_norm": 3.742063522338867, |
| "learning_rate": 8.602612215466806e-07, |
| "loss": 0.7947, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.18284851362606738, |
| "grad_norm": 2.856513261795044, |
| "learning_rate": 8.601792825420756e-07, |
| "loss": 0.745, |
| "step": 11745 |
| }, |
| { |
| "epoch": 0.18292635462803677, |
| "grad_norm": 2.845435619354248, |
| "learning_rate": 8.600973435374707e-07, |
| "loss": 0.794, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.18300419563000614, |
| "grad_norm": 8.055949211120605, |
| "learning_rate": 8.600154045328657e-07, |
| "loss": 0.7383, |
| "step": 11755 |
| }, |
| { |
| "epoch": 0.18308203663197553, |
| "grad_norm": 4.0927510261535645, |
| "learning_rate": 8.599334655282607e-07, |
| "loss": 0.6759, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.1831598776339449, |
| "grad_norm": 3.1355292797088623, |
| "learning_rate": 8.598515265236558e-07, |
| "loss": 0.771, |
| "step": 11765 |
| }, |
| { |
| "epoch": 0.1832377186359143, |
| "grad_norm": 9.864462852478027, |
| "learning_rate": 8.597695875190508e-07, |
| "loss": 0.709, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.18331555963788365, |
| "grad_norm": 5.377257823944092, |
| "learning_rate": 8.596876485144459e-07, |
| "loss": 0.903, |
| "step": 11775 |
| }, |
| { |
| "epoch": 0.18339340063985304, |
| "grad_norm": 3.909209728240967, |
| "learning_rate": 8.596057095098408e-07, |
| "loss": 0.7645, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.1834712416418224, |
| "grad_norm": 4.229231834411621, |
| "learning_rate": 8.595237705052358e-07, |
| "loss": 0.8469, |
| "step": 11785 |
| }, |
| { |
| "epoch": 0.1835490826437918, |
| "grad_norm": 9.634142875671387, |
| "learning_rate": 8.594418315006309e-07, |
| "loss": 0.88, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.18362692364576116, |
| "grad_norm": 2.9738218784332275, |
| "learning_rate": 8.59359892496026e-07, |
| "loss": 0.689, |
| "step": 11795 |
| }, |
| { |
| "epoch": 0.18370476464773056, |
| "grad_norm": 3.0469038486480713, |
| "learning_rate": 8.592779534914209e-07, |
| "loss": 0.8383, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.18378260564969992, |
| "grad_norm": 4.129268646240234, |
| "learning_rate": 8.59196014486816e-07, |
| "loss": 0.6902, |
| "step": 11805 |
| }, |
| { |
| "epoch": 0.1838604466516693, |
| "grad_norm": 4.603461265563965, |
| "learning_rate": 8.591140754822111e-07, |
| "loss": 0.813, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.18393828765363868, |
| "grad_norm": 4.814962387084961, |
| "learning_rate": 8.59032136477606e-07, |
| "loss": 0.8819, |
| "step": 11815 |
| }, |
| { |
| "epoch": 0.18401612865560804, |
| "grad_norm": 7.185861110687256, |
| "learning_rate": 8.58950197473001e-07, |
| "loss": 0.8366, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.18409396965757743, |
| "grad_norm": 7.243460655212402, |
| "learning_rate": 8.588682584683961e-07, |
| "loss": 0.9382, |
| "step": 11825 |
| }, |
| { |
| "epoch": 0.1841718106595468, |
| "grad_norm": 6.482030391693115, |
| "learning_rate": 8.587863194637911e-07, |
| "loss": 0.8316, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.1842496516615162, |
| "grad_norm": 12.124528884887695, |
| "learning_rate": 8.587043804591861e-07, |
| "loss": 0.8205, |
| "step": 11835 |
| }, |
| { |
| "epoch": 0.18432749266348555, |
| "grad_norm": 4.70367431640625, |
| "learning_rate": 8.586224414545812e-07, |
| "loss": 0.6944, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.18440533366545495, |
| "grad_norm": 6.1089558601379395, |
| "learning_rate": 8.585405024499762e-07, |
| "loss": 0.8094, |
| "step": 11845 |
| }, |
| { |
| "epoch": 0.1844831746674243, |
| "grad_norm": 3.807187557220459, |
| "learning_rate": 8.584585634453712e-07, |
| "loss": 0.8715, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.1845610156693937, |
| "grad_norm": 4.469877243041992, |
| "learning_rate": 8.583766244407663e-07, |
| "loss": 0.7794, |
| "step": 11855 |
| }, |
| { |
| "epoch": 0.18463885667136307, |
| "grad_norm": 5.07852029800415, |
| "learning_rate": 8.582946854361612e-07, |
| "loss": 0.7603, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.18471669767333246, |
| "grad_norm": 3.7635741233825684, |
| "learning_rate": 8.582127464315563e-07, |
| "loss": 0.817, |
| "step": 11865 |
| }, |
| { |
| "epoch": 0.18479453867530182, |
| "grad_norm": 5.198869228363037, |
| "learning_rate": 8.581308074269513e-07, |
| "loss": 0.8442, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.18487237967727121, |
| "grad_norm": 3.176208019256592, |
| "learning_rate": 8.580488684223464e-07, |
| "loss": 0.8135, |
| "step": 11875 |
| }, |
| { |
| "epoch": 0.18495022067924058, |
| "grad_norm": 5.479611396789551, |
| "learning_rate": 8.579669294177414e-07, |
| "loss": 0.686, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.18502806168120997, |
| "grad_norm": 5.823884010314941, |
| "learning_rate": 8.578849904131365e-07, |
| "loss": 0.7851, |
| "step": 11885 |
| }, |
| { |
| "epoch": 0.18510590268317934, |
| "grad_norm": 4.050318717956543, |
| "learning_rate": 8.578030514085315e-07, |
| "loss": 0.7321, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.18518374368514873, |
| "grad_norm": 2.703254461288452, |
| "learning_rate": 8.577211124039265e-07, |
| "loss": 0.7569, |
| "step": 11895 |
| }, |
| { |
| "epoch": 0.1852615846871181, |
| "grad_norm": 6.201961517333984, |
| "learning_rate": 8.576391733993215e-07, |
| "loss": 0.8172, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.18533942568908748, |
| "grad_norm": 4.213263511657715, |
| "learning_rate": 8.575572343947165e-07, |
| "loss": 0.8344, |
| "step": 11905 |
| }, |
| { |
| "epoch": 0.18541726669105685, |
| "grad_norm": 6.157925128936768, |
| "learning_rate": 8.574752953901115e-07, |
| "loss": 0.7343, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.1854951076930262, |
| "grad_norm": 4.5453691482543945, |
| "learning_rate": 8.573933563855066e-07, |
| "loss": 0.7038, |
| "step": 11915 |
| }, |
| { |
| "epoch": 0.1855729486949956, |
| "grad_norm": 4.831943035125732, |
| "learning_rate": 8.573114173809017e-07, |
| "loss": 0.8805, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.18565078969696497, |
| "grad_norm": 2.977743625640869, |
| "learning_rate": 8.572294783762966e-07, |
| "loss": 0.784, |
| "step": 11925 |
| }, |
| { |
| "epoch": 0.18572863069893436, |
| "grad_norm": 5.694007396697998, |
| "learning_rate": 8.571475393716917e-07, |
| "loss": 0.6915, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.18580647170090372, |
| "grad_norm": 4.1263017654418945, |
| "learning_rate": 8.570656003670868e-07, |
| "loss": 0.7476, |
| "step": 11935 |
| }, |
| { |
| "epoch": 0.18588431270287312, |
| "grad_norm": 5.350509166717529, |
| "learning_rate": 8.569836613624816e-07, |
| "loss": 0.6784, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.18596215370484248, |
| "grad_norm": 3.7994115352630615, |
| "learning_rate": 8.569017223578767e-07, |
| "loss": 0.7191, |
| "step": 11945 |
| }, |
| { |
| "epoch": 0.18603999470681187, |
| "grad_norm": 5.086226940155029, |
| "learning_rate": 8.568197833532718e-07, |
| "loss": 0.7841, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.18611783570878124, |
| "grad_norm": 4.248946189880371, |
| "learning_rate": 8.567378443486668e-07, |
| "loss": 0.8489, |
| "step": 11955 |
| }, |
| { |
| "epoch": 0.18619567671075063, |
| "grad_norm": 2.877885580062866, |
| "learning_rate": 8.566559053440618e-07, |
| "loss": 0.66, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.18627351771272, |
| "grad_norm": 6.250997066497803, |
| "learning_rate": 8.565739663394569e-07, |
| "loss": 0.7974, |
| "step": 11965 |
| }, |
| { |
| "epoch": 0.18635135871468939, |
| "grad_norm": 5.646812915802002, |
| "learning_rate": 8.564920273348519e-07, |
| "loss": 0.87, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.18642919971665875, |
| "grad_norm": 3.944369316101074, |
| "learning_rate": 8.56410088330247e-07, |
| "loss": 0.8484, |
| "step": 11975 |
| }, |
| { |
| "epoch": 0.18650704071862814, |
| "grad_norm": 3.154167413711548, |
| "learning_rate": 8.563281493256419e-07, |
| "loss": 0.728, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.1865848817205975, |
| "grad_norm": 5.012053489685059, |
| "learning_rate": 8.562462103210369e-07, |
| "loss": 0.7125, |
| "step": 11985 |
| }, |
| { |
| "epoch": 0.1866627227225669, |
| "grad_norm": 5.746982097625732, |
| "learning_rate": 8.56164271316432e-07, |
| "loss": 0.7059, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.18674056372453626, |
| "grad_norm": 3.114208698272705, |
| "learning_rate": 8.56082332311827e-07, |
| "loss": 0.76, |
| "step": 11995 |
| }, |
| { |
| "epoch": 0.18681840472650563, |
| "grad_norm": 3.1285858154296875, |
| "learning_rate": 8.560003933072221e-07, |
| "loss": 0.8049, |
| "step": 12000 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 64233, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 3000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.805456425474064e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|