{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1098, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00546448087431694, "grad_norm": 0.23017044365406036, "learning_rate": 3.6363636363636366e-07, "loss": 1.9264214038848877, "step": 2 }, { "epoch": 0.01092896174863388, "grad_norm": 0.29104915261268616, "learning_rate": 1.090909090909091e-06, "loss": 2.0267927646636963, "step": 4 }, { "epoch": 0.01639344262295082, "grad_norm": 0.17760822176933289, "learning_rate": 1.8181818181818183e-06, "loss": 1.8153414726257324, "step": 6 }, { "epoch": 0.02185792349726776, "grad_norm": 0.17689602077007294, "learning_rate": 2.5454545454545456e-06, "loss": 1.998296856880188, "step": 8 }, { "epoch": 0.0273224043715847, "grad_norm": 0.3168371915817261, "learning_rate": 3.272727272727273e-06, "loss": 2.2566328048706055, "step": 10 }, { "epoch": 0.03278688524590164, "grad_norm": 6.834624290466309, "learning_rate": 4.000000000000001e-06, "loss": 3.0779101848602295, "step": 12 }, { "epoch": 0.03825136612021858, "grad_norm": 0.23897646367549896, "learning_rate": 4.727272727272728e-06, "loss": 1.984060525894165, "step": 14 }, { "epoch": 0.04371584699453552, "grad_norm": 0.17694684863090515, "learning_rate": 5.4545454545454545e-06, "loss": 1.8395605087280273, "step": 16 }, { "epoch": 0.04918032786885246, "grad_norm": 0.5123416185379028, "learning_rate": 6.181818181818182e-06, "loss": 2.626169443130493, "step": 18 }, { "epoch": 0.0546448087431694, "grad_norm": 0.5538182258605957, "learning_rate": 6.90909090909091e-06, "loss": 2.009780168533325, "step": 20 }, { "epoch": 0.060109289617486336, "grad_norm": 1.7161580324172974, "learning_rate": 7.636363636363638e-06, "loss": 2.2599620819091797, "step": 22 }, { "epoch": 0.06557377049180328, "grad_norm": 0.20273339748382568, "learning_rate": 8.363636363636365e-06, "loss": 1.8350398540496826, "step": 24 }, { "epoch": 0.07103825136612021, "grad_norm": 0.1498890072107315, "learning_rate": 9.090909090909091e-06, "loss": 1.871530294418335, "step": 26 }, { "epoch": 0.07650273224043716, "grad_norm": 0.16071414947509766, "learning_rate": 9.81818181818182e-06, "loss": 1.820814847946167, "step": 28 }, { "epoch": 0.08196721311475409, "grad_norm": 0.3428778350353241, "learning_rate": 1.0545454545454546e-05, "loss": 1.6833339929580688, "step": 30 }, { "epoch": 0.08743169398907104, "grad_norm": 1.4728891849517822, "learning_rate": 1.1272727272727272e-05, "loss": 2.2020010948181152, "step": 32 }, { "epoch": 0.09289617486338798, "grad_norm": 0.1522647738456726, "learning_rate": 1.2e-05, "loss": 1.7234824895858765, "step": 34 }, { "epoch": 0.09836065573770492, "grad_norm": 0.3773060142993927, "learning_rate": 1.2727272727272728e-05, "loss": 2.4320225715637207, "step": 36 }, { "epoch": 0.10382513661202186, "grad_norm": 0.489437073469162, "learning_rate": 1.3454545454545455e-05, "loss": 1.9707622528076172, "step": 38 }, { "epoch": 0.1092896174863388, "grad_norm": 0.17363367974758148, "learning_rate": 1.4181818181818183e-05, "loss": 1.6727865934371948, "step": 40 }, { "epoch": 0.11475409836065574, "grad_norm": 0.19784805178642273, "learning_rate": 1.4909090909090911e-05, "loss": 1.621885061264038, "step": 42 }, { "epoch": 0.12021857923497267, "grad_norm": 0.16416551172733307, "learning_rate": 1.563636363636364e-05, "loss": 1.6609282493591309, "step": 44 }, { "epoch": 0.12568306010928962, "grad_norm": 0.1397552490234375, "learning_rate": 1.6363636363636366e-05, "loss": 1.3601855039596558, "step": 46 }, { "epoch": 0.13114754098360656, "grad_norm": 0.16565820574760437, "learning_rate": 1.7090909090909092e-05, "loss": 1.6233359575271606, "step": 48 }, { "epoch": 0.1366120218579235, "grad_norm": 0.18556559085845947, "learning_rate": 1.781818181818182e-05, "loss": 1.619398832321167, "step": 50 }, { "epoch": 0.14207650273224043, "grad_norm": 0.13526372611522675, "learning_rate": 1.8545454545454545e-05, "loss": 1.6659531593322754, "step": 52 }, { "epoch": 0.14754098360655737, "grad_norm": 0.14406871795654297, "learning_rate": 1.9272727272727275e-05, "loss": 1.7703707218170166, "step": 54 }, { "epoch": 0.15300546448087432, "grad_norm": 0.13732993602752686, "learning_rate": 2e-05, "loss": 1.5436078310012817, "step": 56 }, { "epoch": 0.15846994535519127, "grad_norm": 0.5554673671722412, "learning_rate": 1.9998327792599505e-05, "loss": 1.1738401651382446, "step": 58 }, { "epoch": 0.16393442622950818, "grad_norm": 0.2894555330276489, "learning_rate": 1.999331179179304e-05, "loss": 1.1613794565200806, "step": 60 }, { "epoch": 0.16939890710382513, "grad_norm": 0.2021346539258957, "learning_rate": 1.9984953861534752e-05, "loss": 1.5094225406646729, "step": 62 }, { "epoch": 0.17486338797814208, "grad_norm": 0.13112854957580566, "learning_rate": 1.997325710764527e-05, "loss": 1.2485407590866089, "step": 64 }, { "epoch": 0.18032786885245902, "grad_norm": 0.37511691451072693, "learning_rate": 1.9958225876657575e-05, "loss": 1.0438565015792847, "step": 66 }, { "epoch": 0.18579234972677597, "grad_norm": 0.26088741421699524, "learning_rate": 1.9939865754201825e-05, "loss": 1.4915159940719604, "step": 68 }, { "epoch": 0.1912568306010929, "grad_norm": 0.3718518018722534, "learning_rate": 1.9918183562929717e-05, "loss": 1.3824762105941772, "step": 70 }, { "epoch": 0.19672131147540983, "grad_norm": 0.7932000160217285, "learning_rate": 1.9893187359979183e-05, "loss": 0.9995588660240173, "step": 72 }, { "epoch": 0.20218579234972678, "grad_norm": 0.33288639783859253, "learning_rate": 1.986488643398035e-05, "loss": 1.1461442708969116, "step": 74 }, { "epoch": 0.20765027322404372, "grad_norm": 1.125511646270752, "learning_rate": 1.9833291301603863e-05, "loss": 1.3429020643234253, "step": 76 }, { "epoch": 0.21311475409836064, "grad_norm": 0.1951577067375183, "learning_rate": 1.9798413703652867e-05, "loss": 1.4258100986480713, "step": 78 }, { "epoch": 0.2185792349726776, "grad_norm": 0.2255648821592331, "learning_rate": 1.976026660070012e-05, "loss": 1.4055513143539429, "step": 80 }, { "epoch": 0.22404371584699453, "grad_norm": 0.15050630271434784, "learning_rate": 1.9718864168271823e-05, "loss": 1.4016798734664917, "step": 82 }, { "epoch": 0.22950819672131148, "grad_norm": 0.1393994390964508, "learning_rate": 1.9674221791579946e-05, "loss": 1.362338662147522, "step": 84 }, { "epoch": 0.23497267759562843, "grad_norm": 0.13078755140304565, "learning_rate": 1.9626356059805085e-05, "loss": 1.2792019844055176, "step": 86 }, { "epoch": 0.24043715846994534, "grad_norm": 0.12456239014863968, "learning_rate": 1.957528475993189e-05, "loss": 0.8783624768257141, "step": 88 }, { "epoch": 0.2459016393442623, "grad_norm": 0.45559144020080566, "learning_rate": 1.952102687013938e-05, "loss": 0.9341011643409729, "step": 90 }, { "epoch": 0.25136612021857924, "grad_norm": 0.2474871426820755, "learning_rate": 1.946360255274863e-05, "loss": 1.2796369791030884, "step": 92 }, { "epoch": 0.2568306010928962, "grad_norm": 0.1871662735939026, "learning_rate": 1.9403033146730424e-05, "loss": 0.7436278462409973, "step": 94 }, { "epoch": 0.26229508196721313, "grad_norm": 0.1684049814939499, "learning_rate": 1.9339341159775647e-05, "loss": 1.3334097862243652, "step": 96 }, { "epoch": 0.2677595628415301, "grad_norm": 0.08828236162662506, "learning_rate": 1.9272550259931398e-05, "loss": 1.3062154054641724, "step": 98 }, { "epoch": 0.273224043715847, "grad_norm": 0.12633274495601654, "learning_rate": 1.9202685266805896e-05, "loss": 1.183910846710205, "step": 100 }, { "epoch": 0.2786885245901639, "grad_norm": 0.14135880768299103, "learning_rate": 1.9129772142345484e-05, "loss": 0.8230882883071899, "step": 102 }, { "epoch": 0.28415300546448086, "grad_norm": 0.20387791097164154, "learning_rate": 1.9053837981187125e-05, "loss": 1.356655478477478, "step": 104 }, { "epoch": 0.2896174863387978, "grad_norm": 0.09430485963821411, "learning_rate": 1.897491100058998e-05, "loss": 1.318677306175232, "step": 106 }, { "epoch": 0.29508196721311475, "grad_norm": 0.21191561222076416, "learning_rate": 1.8893020529949838e-05, "loss": 1.4180920124053955, "step": 108 }, { "epoch": 0.3005464480874317, "grad_norm": 0.1686364710330963, "learning_rate": 1.880819699990027e-05, "loss": 1.3188916444778442, "step": 110 }, { "epoch": 0.30601092896174864, "grad_norm": 0.17423325777053833, "learning_rate": 1.8720471931004526e-05, "loss": 1.3028515577316284, "step": 112 }, { "epoch": 0.3114754098360656, "grad_norm": 0.16852694749832153, "learning_rate": 1.8629877922042485e-05, "loss": 1.0075663328170776, "step": 114 }, { "epoch": 0.31693989071038253, "grad_norm": 0.16135092079639435, "learning_rate": 1.8536448637896866e-05, "loss": 1.2840803861618042, "step": 116 }, { "epoch": 0.3224043715846995, "grad_norm": 0.14016355574131012, "learning_rate": 1.84402187970433e-05, "loss": 1.2989857196807861, "step": 118 }, { "epoch": 0.32786885245901637, "grad_norm": 1.949554204940796, "learning_rate": 1.834122415864891e-05, "loss": 1.011613130569458, "step": 120 }, { "epoch": 0.3333333333333333, "grad_norm": 0.3110104203224182, "learning_rate": 1.8239501509284123e-05, "loss": 1.6019999980926514, "step": 122 }, { "epoch": 0.33879781420765026, "grad_norm": 0.12443661689758301, "learning_rate": 1.8135088649252725e-05, "loss": 1.2844353914260864, "step": 124 }, { "epoch": 0.3442622950819672, "grad_norm": 0.15131407976150513, "learning_rate": 1.8028024378545224e-05, "loss": 1.2516601085662842, "step": 126 }, { "epoch": 0.34972677595628415, "grad_norm": 0.19532890617847443, "learning_rate": 1.7918348482420692e-05, "loss": 0.8701586723327637, "step": 128 }, { "epoch": 0.3551912568306011, "grad_norm": 0.14577443897724152, "learning_rate": 1.7806101716622486e-05, "loss": 1.0323655605316162, "step": 130 }, { "epoch": 0.36065573770491804, "grad_norm": 0.21451306343078613, "learning_rate": 1.7691325792233378e-05, "loss": 1.0055516958236694, "step": 132 }, { "epoch": 0.366120218579235, "grad_norm": 0.14420630037784576, "learning_rate": 1.7574063360175625e-05, "loss": 1.2558668851852417, "step": 134 }, { "epoch": 0.37158469945355194, "grad_norm": 0.1369626522064209, "learning_rate": 1.745435799536183e-05, "loss": 1.25625741481781, "step": 136 }, { "epoch": 0.3770491803278688, "grad_norm": 0.23049037158489227, "learning_rate": 1.7332254180502407e-05, "loss": 1.2635902166366577, "step": 138 }, { "epoch": 0.3825136612021858, "grad_norm": 0.1365140974521637, "learning_rate": 1.7207797289575777e-05, "loss": 0.7525888085365295, "step": 140 }, { "epoch": 0.3879781420765027, "grad_norm": 0.2103356122970581, "learning_rate": 1.708103357096728e-05, "loss": 0.8429027199745178, "step": 142 }, { "epoch": 0.39344262295081966, "grad_norm": 0.4111814796924591, "learning_rate": 1.695201013028322e-05, "loss": 0.8284322619438171, "step": 144 }, { "epoch": 0.3989071038251366, "grad_norm": 0.7463256120681763, "learning_rate": 1.6820774912846335e-05, "loss": 0.7427368760108948, "step": 146 }, { "epoch": 0.40437158469945356, "grad_norm": 0.1264885663986206, "learning_rate": 1.668737668587926e-05, "loss": 1.304437518119812, "step": 148 }, { "epoch": 0.4098360655737705, "grad_norm": 0.16682182252407074, "learning_rate": 1.655186502038251e-05, "loss": 1.2201122045516968, "step": 150 }, { "epoch": 0.41530054644808745, "grad_norm": 0.10499599575996399, "learning_rate": 1.641429027271384e-05, "loss": 1.2546273469924927, "step": 152 }, { "epoch": 0.4207650273224044, "grad_norm": 0.12318555265665054, "learning_rate": 1.6274703565875736e-05, "loss": 1.2278828620910645, "step": 154 }, { "epoch": 0.4262295081967213, "grad_norm": 1.7749344110488892, "learning_rate": 1.613315677051801e-05, "loss": 1.2571786642074585, "step": 156 }, { "epoch": 0.43169398907103823, "grad_norm": 0.3517024517059326, "learning_rate": 1.598970248566261e-05, "loss": 1.0339151620864868, "step": 158 }, { "epoch": 0.4371584699453552, "grad_norm": 0.21650607883930206, "learning_rate": 1.5844394019157697e-05, "loss": 1.2541024684906006, "step": 160 }, { "epoch": 0.4426229508196721, "grad_norm": 0.4434749186038971, "learning_rate": 1.5697285367868393e-05, "loss": 0.8460209965705872, "step": 162 }, { "epoch": 0.44808743169398907, "grad_norm": 0.13023316860198975, "learning_rate": 1.5548431197611448e-05, "loss": 1.2656488418579102, "step": 164 }, { "epoch": 0.453551912568306, "grad_norm": 5.6986212730407715, "learning_rate": 1.539788682284133e-05, "loss": 0.8476435542106628, "step": 166 }, { "epoch": 0.45901639344262296, "grad_norm": 0.09535890817642212, "learning_rate": 1.5245708186095275e-05, "loss": 1.2499439716339111, "step": 168 }, { "epoch": 0.4644808743169399, "grad_norm": 0.4119236469268799, "learning_rate": 1.5091951837204973e-05, "loss": 1.5728163719177246, "step": 170 }, { "epoch": 0.46994535519125685, "grad_norm": 0.41598179936408997, "learning_rate": 1.4936674912282525e-05, "loss": 0.907516360282898, "step": 172 }, { "epoch": 0.47540983606557374, "grad_norm": 0.16133107244968414, "learning_rate": 1.4779935112488597e-05, "loss": 1.2518316507339478, "step": 174 }, { "epoch": 0.4808743169398907, "grad_norm": 0.12417590618133545, "learning_rate": 1.4621790682590556e-05, "loss": 1.2215498685836792, "step": 176 }, { "epoch": 0.48633879781420764, "grad_norm": 0.37595632672309875, "learning_rate": 1.4462300389318635e-05, "loss": 1.3558915853500366, "step": 178 }, { "epoch": 0.4918032786885246, "grad_norm": 0.4406963288784027, "learning_rate": 1.4301523499528099e-05, "loss": 0.9912778735160828, "step": 180 }, { "epoch": 0.4972677595628415, "grad_norm": 0.5479982495307922, "learning_rate": 1.4139519758175602e-05, "loss": 1.2772217988967896, "step": 182 }, { "epoch": 0.5027322404371585, "grad_norm": 0.11196983605623245, "learning_rate": 1.3976349366117861e-05, "loss": 1.2128099203109741, "step": 184 }, { "epoch": 0.5081967213114754, "grad_norm": 0.11452614516019821, "learning_rate": 1.3812072957740898e-05, "loss": 1.2313976287841797, "step": 186 }, { "epoch": 0.5136612021857924, "grad_norm": 0.7617996335029602, "learning_rate": 1.3646751578428231e-05, "loss": 0.4828013777732849, "step": 188 }, { "epoch": 0.5191256830601093, "grad_norm": 0.16669736802577972, "learning_rate": 1.3480446661876295e-05, "loss": 1.2047618627548218, "step": 190 }, { "epoch": 0.5245901639344263, "grad_norm": 0.1152682974934578, "learning_rate": 1.3313220007265572e-05, "loss": 1.2386715412139893, "step": 192 }, { "epoch": 0.5300546448087432, "grad_norm": 0.10782720148563385, "learning_rate": 1.3145133756295936e-05, "loss": 1.2308696508407593, "step": 194 }, { "epoch": 0.5355191256830601, "grad_norm": 0.19120632112026215, "learning_rate": 1.2976250370094668e-05, "loss": 0.9248079657554626, "step": 196 }, { "epoch": 0.5409836065573771, "grad_norm": 0.14845414459705353, "learning_rate": 1.2806632606005822e-05, "loss": 1.0146936178207397, "step": 198 }, { "epoch": 0.546448087431694, "grad_norm": 0.26761510968208313, "learning_rate": 1.2636343494269479e-05, "loss": 1.045541763305664, "step": 200 }, { "epoch": 0.5519125683060109, "grad_norm": 0.08655080199241638, "learning_rate": 1.2465446314599609e-05, "loss": 1.0569703578948975, "step": 202 }, { "epoch": 0.5573770491803278, "grad_norm": 0.17617474496364594, "learning_rate": 1.2294004572669228e-05, "loss": 1.194622278213501, "step": 204 }, { "epoch": 0.5628415300546448, "grad_norm": 0.22285908460617065, "learning_rate": 1.2122081976511581e-05, "loss": 1.2368446588516235, "step": 206 }, { "epoch": 0.5683060109289617, "grad_norm": 0.18463526666164398, "learning_rate": 1.1949742412846142e-05, "loss": 1.119215488433838, "step": 208 }, { "epoch": 0.5737704918032787, "grad_norm": 0.14102661609649658, "learning_rate": 1.177704992333818e-05, "loss": 1.2999699115753174, "step": 210 }, { "epoch": 0.5792349726775956, "grad_norm": 0.14098992943763733, "learning_rate": 1.1604068680800809e-05, "loss": 1.216047763824463, "step": 212 }, { "epoch": 0.5846994535519126, "grad_norm": 0.1435597836971283, "learning_rate": 1.1430862965348224e-05, "loss": 1.3120372295379639, "step": 214 }, { "epoch": 0.5901639344262295, "grad_norm": 0.0981784462928772, "learning_rate": 1.1257497140509141e-05, "loss": 1.212526798248291, "step": 216 }, { "epoch": 0.5956284153005464, "grad_norm": 0.1765955686569214, "learning_rate": 1.1084035629309176e-05, "loss": 1.2513571977615356, "step": 218 }, { "epoch": 0.6010928961748634, "grad_norm": 0.1305796355009079, "learning_rate": 1.0910542890331162e-05, "loss": 1.1725019216537476, "step": 220 }, { "epoch": 0.6065573770491803, "grad_norm": 0.14715325832366943, "learning_rate": 1.0737083393762213e-05, "loss": 0.7729817032814026, "step": 222 }, { "epoch": 0.6120218579234973, "grad_norm": 0.183350071310997, "learning_rate": 1.0563721597436525e-05, "loss": 1.1960976123809814, "step": 224 }, { "epoch": 0.6174863387978142, "grad_norm": 0.12949278950691223, "learning_rate": 1.039052192288271e-05, "loss": 1.2234892845153809, "step": 226 }, { "epoch": 0.6229508196721312, "grad_norm": 0.3930734694004059, "learning_rate": 1.0217548731384677e-05, "loss": 0.7811821699142456, "step": 228 }, { "epoch": 0.6284153005464481, "grad_norm": 0.24914288520812988, "learning_rate": 1.0044866300064842e-05, "loss": 1.1955829858779907, "step": 230 }, { "epoch": 0.6338797814207651, "grad_norm": 0.13359300792217255, "learning_rate": 9.872538797998672e-06, "loss": 1.229300856590271, "step": 232 }, { "epoch": 0.639344262295082, "grad_norm": 0.13108184933662415, "learning_rate": 9.700630262369337e-06, "loss": 0.9536800980567932, "step": 234 }, { "epoch": 0.644808743169399, "grad_norm": 0.20512312650680542, "learning_rate": 9.529204574671391e-06, "loss": 1.5177414417266846, "step": 236 }, { "epoch": 0.6502732240437158, "grad_norm": 0.13552652299404144, "learning_rate": 9.3583254369723e-06, "loss": 1.1782840490341187, "step": 238 }, { "epoch": 0.6557377049180327, "grad_norm": 0.14942114055156708, "learning_rate": 9.188056348240655e-06, "loss": 1.1067190170288086, "step": 240 }, { "epoch": 0.6612021857923497, "grad_norm": 0.15014563500881195, "learning_rate": 9.018460580749842e-06, "loss": 0.7160718441009521, "step": 242 }, { "epoch": 0.6666666666666666, "grad_norm": 0.25949665904045105, "learning_rate": 8.849601156565972e-06, "loss": 1.3130247592926025, "step": 244 }, { "epoch": 0.6721311475409836, "grad_norm": 1.2000055313110352, "learning_rate": 8.68154082412877e-06, "loss": 1.1773432493209839, "step": 246 }, { "epoch": 0.6775956284153005, "grad_norm": 0.12505550682544708, "learning_rate": 8.514342034934159e-06, "loss": 1.1854091882705688, "step": 248 }, { "epoch": 0.6830601092896175, "grad_norm": 0.2346983551979065, "learning_rate": 8.348066920327163e-06, "loss": 0.3909367322921753, "step": 250 }, { "epoch": 0.6885245901639344, "grad_norm": 0.10142967849969864, "learning_rate": 8.182777268413822e-06, "loss": 1.246644377708435, "step": 252 }, { "epoch": 0.6939890710382514, "grad_norm": 0.10150952637195587, "learning_rate": 8.018534501100611e-06, "loss": 1.2059601545333862, "step": 254 }, { "epoch": 0.6994535519125683, "grad_norm": 0.1962408721446991, "learning_rate": 7.855399651269982e-06, "loss": 0.9686606526374817, "step": 256 }, { "epoch": 0.7049180327868853, "grad_norm": 0.2061769962310791, "learning_rate": 7.6934333401004e-06, "loss": 1.2205626964569092, "step": 258 }, { "epoch": 0.7103825136612022, "grad_norm": 0.15705908834934235, "learning_rate": 7.53269575453947e-06, "loss": 1.035886526107788, "step": 260 }, { "epoch": 0.7158469945355191, "grad_norm": 0.4774817228317261, "learning_rate": 7.373246624938324e-06, "loss": 1.2100485563278198, "step": 262 }, { "epoch": 0.7213114754098361, "grad_norm": 0.14761529862880707, "learning_rate": 7.215145202855746e-06, "loss": 1.1908841133117676, "step": 264 }, { "epoch": 0.726775956284153, "grad_norm": 0.1933698058128357, "learning_rate": 7.0584502390401865e-06, "loss": 0.7691932916641235, "step": 266 }, { "epoch": 0.73224043715847, "grad_norm": 0.16835708916187286, "learning_rate": 6.903219961597891e-06, "loss": 1.1964633464813232, "step": 268 }, { "epoch": 0.7377049180327869, "grad_norm": 0.2084718942642212, "learning_rate": 6.7495120543552475e-06, "loss": 1.2518548965454102, "step": 270 }, { "epoch": 0.7431693989071039, "grad_norm": 0.1384730041027069, "learning_rate": 6.59738363542336e-06, "loss": 1.2805969715118408, "step": 272 }, { "epoch": 0.7486338797814208, "grad_norm": 0.14290660619735718, "learning_rate": 6.446891235972894e-06, "loss": 1.3189456462860107, "step": 274 }, { "epoch": 0.7540983606557377, "grad_norm": 0.31153976917266846, "learning_rate": 6.298090779226977e-06, "loss": 1.2968159914016724, "step": 276 }, { "epoch": 0.7595628415300546, "grad_norm": 0.11012361198663712, "learning_rate": 6.151037559680047e-06, "loss": 0.846051037311554, "step": 278 }, { "epoch": 0.7650273224043715, "grad_norm": 0.17122408747673035, "learning_rate": 6.005786222550319e-06, "loss": 1.2251654863357544, "step": 280 }, { "epoch": 0.7704918032786885, "grad_norm": 0.11512715369462967, "learning_rate": 5.8623907434735515e-06, "loss": 1.1258071660995483, "step": 282 }, { "epoch": 0.7759562841530054, "grad_norm": 0.18557684123516083, "learning_rate": 5.720904408445589e-06, "loss": 1.2396138906478882, "step": 284 }, { "epoch": 0.7814207650273224, "grad_norm": 0.10340467095375061, "learning_rate": 5.581379794021202e-06, "loss": 1.2666516304016113, "step": 286 }, { "epoch": 0.7868852459016393, "grad_norm": 0.32197245955467224, "learning_rate": 5.443868747776579e-06, "loss": 0.7289301156997681, "step": 288 }, { "epoch": 0.7923497267759563, "grad_norm": 0.11029759049415588, "learning_rate": 5.308422369042644e-06, "loss": 0.7729415893554688, "step": 290 }, { "epoch": 0.7978142076502732, "grad_norm": 0.1230657696723938, "learning_rate": 5.175090989916483e-06, "loss": 0.9814428091049194, "step": 292 }, { "epoch": 0.8032786885245902, "grad_norm": 0.1301792412996292, "learning_rate": 5.043924156557844e-06, "loss": 1.2187029123306274, "step": 294 }, { "epoch": 0.8087431693989071, "grad_norm": 0.13773725926876068, "learning_rate": 4.914970610777725e-06, "loss": 1.2197258472442627, "step": 296 }, { "epoch": 0.8142076502732241, "grad_norm": 0.14241813123226166, "learning_rate": 4.788278271925802e-06, "loss": 1.2182695865631104, "step": 298 }, { "epoch": 0.819672131147541, "grad_norm": 0.14574883878231049, "learning_rate": 4.663894219083548e-06, "loss": 0.8696047067642212, "step": 300 }, { "epoch": 0.825136612021858, "grad_norm": 0.16248951852321625, "learning_rate": 4.541864673569551e-06, "loss": 1.220901370048523, "step": 302 }, { "epoch": 0.8306010928961749, "grad_norm": 0.17606770992279053, "learning_rate": 4.422234981763613e-06, "loss": 1.1022499799728394, "step": 304 }, { "epoch": 0.8360655737704918, "grad_norm": 0.16798973083496094, "learning_rate": 4.305049598255946e-06, "loss": 1.2149680852890015, "step": 306 }, { "epoch": 0.8415300546448088, "grad_norm": 0.15118278563022614, "learning_rate": 4.190352069327777e-06, "loss": 1.2510839700698853, "step": 308 }, { "epoch": 0.8469945355191257, "grad_norm": 0.18143419921398163, "learning_rate": 4.078185016769484e-06, "loss": 1.1982481479644775, "step": 310 }, { "epoch": 0.8524590163934426, "grad_norm": 0.1455654352903366, "learning_rate": 3.968590122042265e-06, "loss": 1.2104380130767822, "step": 312 }, { "epoch": 0.8579234972677595, "grad_norm": 0.13336078822612762, "learning_rate": 3.861608110789228e-06, "loss": 1.232424259185791, "step": 314 }, { "epoch": 0.8633879781420765, "grad_norm": 0.172093465924263, "learning_rate": 3.757278737701697e-06, "loss": 1.2476005554199219, "step": 316 }, { "epoch": 0.8688524590163934, "grad_norm": 0.4827375113964081, "learning_rate": 3.6556407717462856e-06, "loss": 0.7419775128364563, "step": 318 }, { "epoch": 0.8743169398907104, "grad_norm": 0.2118140161037445, "learning_rate": 3.5567319817582944e-06, "loss": 1.1996129751205444, "step": 320 }, { "epoch": 0.8797814207650273, "grad_norm": 0.10368747264146805, "learning_rate": 3.4605891224067423e-06, "loss": 0.7767283916473389, "step": 322 }, { "epoch": 0.8852459016393442, "grad_norm": 0.18676143884658813, "learning_rate": 3.3672479205362764e-06, "loss": 1.491099238395691, "step": 324 }, { "epoch": 0.8907103825136612, "grad_norm": 0.1528121680021286, "learning_rate": 3.276743061891014e-06, "loss": 1.1976739168167114, "step": 326 }, { "epoch": 0.8961748633879781, "grad_norm": 0.1898827999830246, "learning_rate": 3.1891081782252726e-06, "loss": 1.144290566444397, "step": 328 }, { "epoch": 0.9016393442622951, "grad_norm": 0.18415604531764984, "learning_rate": 3.1043758348059384e-06, "loss": 1.2545756101608276, "step": 330 }, { "epoch": 0.907103825136612, "grad_norm": 0.141061931848526, "learning_rate": 3.0225775183111784e-06, "loss": 1.2385872602462769, "step": 332 }, { "epoch": 0.912568306010929, "grad_norm": 0.3184133470058441, "learning_rate": 2.943743625129917e-06, "loss": 1.4611538648605347, "step": 334 }, { "epoch": 0.9180327868852459, "grad_norm": 0.35060685873031616, "learning_rate": 2.867903450066513e-06, "loss": 1.186466932296753, "step": 336 }, { "epoch": 0.9234972677595629, "grad_norm": 0.13374803960323334, "learning_rate": 2.795085175454741e-06, "loss": 1.2442353963851929, "step": 338 }, { "epoch": 0.9289617486338798, "grad_norm": 0.8894411325454712, "learning_rate": 2.7253158606851983e-06, "loss": 0.7970354557037354, "step": 340 }, { "epoch": 0.9344262295081968, "grad_norm": 0.39505577087402344, "learning_rate": 2.6586214321499952e-06, "loss": 1.1527299880981445, "step": 342 }, { "epoch": 0.9398907103825137, "grad_norm": 0.21840043365955353, "learning_rate": 2.5950266736084558e-06, "loss": 0.7329099774360657, "step": 344 }, { "epoch": 0.9453551912568307, "grad_norm": 0.18624389171600342, "learning_rate": 2.5345552169774413e-06, "loss": 1.213990569114685, "step": 346 }, { "epoch": 0.9508196721311475, "grad_norm": 0.20502322912216187, "learning_rate": 2.477229533549685e-06, "loss": 1.0040937662124634, "step": 348 }, { "epoch": 0.9562841530054644, "grad_norm": 0.13913527131080627, "learning_rate": 2.423070925643422e-06, "loss": 1.195319652557373, "step": 350 }, { "epoch": 0.9617486338797814, "grad_norm": 0.25502294301986694, "learning_rate": 2.372099518686416e-06, "loss": 1.3571830987930298, "step": 352 }, { "epoch": 0.9672131147540983, "grad_norm": 0.48177042603492737, "learning_rate": 2.324334253737321e-06, "loss": 0.7296788692474365, "step": 354 }, { "epoch": 0.9726775956284153, "grad_norm": 0.2695556581020355, "learning_rate": 2.2797928804471413e-06, "loss": 0.7443707585334778, "step": 356 }, { "epoch": 0.9781420765027322, "grad_norm": 0.2975868880748749, "learning_rate": 2.2384919504634465e-06, "loss": 1.2335455417633057, "step": 358 }, { "epoch": 0.9836065573770492, "grad_norm": 0.13546700775623322, "learning_rate": 2.2004468112797345e-06, "loss": 0.8478338718414307, "step": 360 }, { "epoch": 0.9890710382513661, "grad_norm": 0.15997223556041718, "learning_rate": 2.165671600532298e-06, "loss": 1.1819065809249878, "step": 362 }, { "epoch": 0.994535519125683, "grad_norm": 0.26818767189979553, "learning_rate": 2.134179240746638e-06, "loss": 1.3250752687454224, "step": 364 }, { "epoch": 1.0, "grad_norm": 0.16711430251598358, "learning_rate": 2.1059814345354434e-06, "loss": 1.2777149677276611, "step": 366 }, { "epoch": 1.005464480874317, "grad_norm": 0.13456079363822937, "learning_rate": 2.0810886602498733e-06, "loss": 0.9346177577972412, "step": 368 }, { "epoch": 1.010928961748634, "grad_norm": 0.2345515638589859, "learning_rate": 2.059510168085791e-06, "loss": 1.343198537826538, "step": 370 }, { "epoch": 1.0163934426229508, "grad_norm": 0.18461638689041138, "learning_rate": 2.0412539766463697e-06, "loss": 1.2866058349609375, "step": 372 }, { "epoch": 1.0218579234972678, "grad_norm": 0.1437111347913742, "learning_rate": 2.0263268699623746e-06, "loss": 1.1869018077850342, "step": 374 }, { "epoch": 1.0273224043715847, "grad_norm": 0.13092809915542603, "learning_rate": 2.0147343949711965e-06, "loss": 1.1603018045425415, "step": 376 }, { "epoch": 1.0327868852459017, "grad_norm": 0.24336589872837067, "learning_rate": 2.0064808594556066e-06, "loss": 1.1444275379180908, "step": 378 }, { "epoch": 1.0382513661202186, "grad_norm": 0.13655312359333038, "learning_rate": 2.0015693304429757e-06, "loss": 1.1514266729354858, "step": 380 }, { "epoch": 1.0437158469945356, "grad_norm": 0.09100303798913956, "learning_rate": 2.000001633065562e-06, "loss": 0.7742247581481934, "step": 382 }, { "epoch": 1.0491803278688525, "grad_norm": 0.18667501211166382, "learning_rate": 2.0017783498822896e-06, "loss": 1.1750892400741577, "step": 384 }, { "epoch": 1.0546448087431695, "grad_norm": 0.14683479070663452, "learning_rate": 2.006898820662268e-06, "loss": 1.1899375915527344, "step": 386 }, { "epoch": 1.0601092896174864, "grad_norm": 0.17781662940979004, "learning_rate": 2.0153611426301325e-06, "loss": 1.5731885433197021, "step": 388 }, { "epoch": 1.0655737704918034, "grad_norm": 0.09566520154476166, "learning_rate": 2.027162171173126e-06, "loss": 0.9452205300331116, "step": 390 }, { "epoch": 1.0710382513661203, "grad_norm": 0.1786738634109497, "learning_rate": 2.0422975210096317e-06, "loss": 0.6096203327178955, "step": 392 }, { "epoch": 1.0765027322404372, "grad_norm": 0.15426206588745117, "learning_rate": 2.0607615678187605e-06, "loss": 1.1949257850646973, "step": 394 }, { "epoch": 1.0819672131147542, "grad_norm": 0.1298629641532898, "learning_rate": 2.082547450330353e-06, "loss": 1.1203322410583496, "step": 396 }, { "epoch": 1.0874316939890711, "grad_norm": 0.1290188431739807, "learning_rate": 2.1076470728746407e-06, "loss": 1.1237056255340576, "step": 398 }, { "epoch": 1.092896174863388, "grad_norm": 0.2040422558784485, "learning_rate": 2.136051108390608e-06, "loss": 1.2583763599395752, "step": 400 }, { "epoch": 1.098360655737705, "grad_norm": 0.10099250823259354, "learning_rate": 2.167749001891944e-06, "loss": 1.1252448558807373, "step": 402 }, { "epoch": 1.1038251366120218, "grad_norm": 0.11583796888589859, "learning_rate": 2.202728974389296e-06, "loss": 1.1236039400100708, "step": 404 }, { "epoch": 1.1092896174863387, "grad_norm": 0.13602401316165924, "learning_rate": 2.240978027267357e-06, "loss": 1.15111243724823, "step": 406 }, { "epoch": 1.1147540983606556, "grad_norm": 0.09002802520990372, "learning_rate": 2.2824819471151736e-06, "loss": 1.4592684507369995, "step": 408 }, { "epoch": 1.1202185792349726, "grad_norm": 0.2053132951259613, "learning_rate": 2.327225311007878e-06, "loss": 1.1027615070343018, "step": 410 }, { "epoch": 1.1256830601092895, "grad_norm": 0.16059550642967224, "learning_rate": 2.3751914922378623e-06, "loss": 1.101325273513794, "step": 412 }, { "epoch": 1.1311475409836065, "grad_norm": 0.13088057935237885, "learning_rate": 2.4263626664932998e-06, "loss": 0.57912278175354, "step": 414 }, { "epoch": 1.1366120218579234, "grad_norm": 0.1548115313053131, "learning_rate": 2.4807198184816817e-06, "loss": 1.3488638401031494, "step": 416 }, { "epoch": 1.1420765027322404, "grad_norm": 0.4698297083377838, "learning_rate": 2.5382427489959373e-06, "loss": 1.2834604978561401, "step": 418 }, { "epoch": 1.1475409836065573, "grad_norm": 0.1722850799560547, "learning_rate": 2.5989100824204876e-06, "loss": 1.126566767692566, "step": 420 }, { "epoch": 1.1530054644808743, "grad_norm": 0.1067054495215416, "learning_rate": 2.662699274674462e-06, "loss": 1.2055656909942627, "step": 422 }, { "epoch": 1.1584699453551912, "grad_norm": 0.10827223211526871, "learning_rate": 2.7295866215891107e-06, "loss": 1.1322380304336548, "step": 424 }, { "epoch": 1.1639344262295082, "grad_norm": 0.22720572352409363, "learning_rate": 2.799547267716326e-06, "loss": 0.7926866412162781, "step": 426 }, { "epoch": 1.169398907103825, "grad_norm": 0.14912304282188416, "learning_rate": 2.872555215564946e-06, "loss": 1.257475733757019, "step": 428 }, { "epoch": 1.174863387978142, "grad_norm": 0.12470504641532898, "learning_rate": 2.9485833352614895e-06, "loss": 1.196222186088562, "step": 430 }, { "epoch": 1.180327868852459, "grad_norm": 0.2074936479330063, "learning_rate": 3.027603374631647e-06, "loss": 1.1812493801116943, "step": 432 }, { "epoch": 1.185792349726776, "grad_norm": 0.15828382968902588, "learning_rate": 3.1095859696988273e-06, "loss": 1.1702839136123657, "step": 434 }, { "epoch": 1.1912568306010929, "grad_norm": 0.1257786899805069, "learning_rate": 3.1945006555958885e-06, "loss": 0.592043399810791, "step": 436 }, { "epoch": 1.1967213114754098, "grad_norm": 0.0843435600399971, "learning_rate": 3.2823158778858976e-06, "loss": 0.6085972785949707, "step": 438 }, { "epoch": 1.2021857923497268, "grad_norm": 0.1315852850675583, "learning_rate": 3.372999004287839e-06, "loss": 1.0785596370697021, "step": 440 }, { "epoch": 1.2076502732240437, "grad_norm": 0.15027481317520142, "learning_rate": 3.4665163368028044e-06, "loss": 1.4450383186340332, "step": 442 }, { "epoch": 1.2131147540983607, "grad_norm": 0.3036656379699707, "learning_rate": 3.562833124236238e-06, "loss": 1.367746353149414, "step": 444 }, { "epoch": 1.2185792349726776, "grad_norm": 0.13871243596076965, "learning_rate": 3.6619135751115325e-06, "loss": 0.6442332863807678, "step": 446 }, { "epoch": 1.2240437158469946, "grad_norm": 0.22103987634181976, "learning_rate": 3.763720870970201e-06, "loss": 0.9271941184997559, "step": 448 }, { "epoch": 1.2295081967213115, "grad_norm": 0.13876283168792725, "learning_rate": 3.86821718005367e-06, "loss": 1.1712263822555542, "step": 450 }, { "epoch": 1.2349726775956285, "grad_norm": 0.1687919646501541, "learning_rate": 3.975363671361641e-06, "loss": 0.7494930028915405, "step": 452 }, { "epoch": 1.2404371584699454, "grad_norm": 0.16324791312217712, "learning_rate": 4.0851205290817254e-06, "loss": 1.1281697750091553, "step": 454 }, { "epoch": 1.2459016393442623, "grad_norm": 0.11219220608472824, "learning_rate": 4.197446967385105e-06, "loss": 1.198438286781311, "step": 456 }, { "epoch": 1.2513661202185793, "grad_norm": 0.1335090547800064, "learning_rate": 4.312301245582571e-06, "loss": 0.6554253697395325, "step": 458 }, { "epoch": 1.2568306010928962, "grad_norm": 0.2589283883571625, "learning_rate": 4.429640683635466e-06, "loss": 1.20187246799469, "step": 460 }, { "epoch": 1.2622950819672132, "grad_norm": 0.1348496526479721, "learning_rate": 4.549421678015633e-06, "loss": 1.147897720336914, "step": 462 }, { "epoch": 1.2677595628415301, "grad_norm": 0.28325796127319336, "learning_rate": 4.671599717908582e-06, "loss": 0.9092267155647278, "step": 464 }, { "epoch": 1.273224043715847, "grad_norm": 0.16627341508865356, "learning_rate": 4.796129401753752e-06, "loss": 1.2192769050598145, "step": 466 }, { "epoch": 1.278688524590164, "grad_norm": 0.23972608149051666, "learning_rate": 4.922964454115837e-06, "loss": 0.45847344398498535, "step": 468 }, { "epoch": 1.2841530054644807, "grad_norm": 0.5658842921257019, "learning_rate": 5.0520577428807835e-06, "loss": 0.4286736845970154, "step": 470 }, { "epoch": 1.289617486338798, "grad_norm": 0.46968233585357666, "learning_rate": 5.183361296770197e-06, "loss": 1.0588371753692627, "step": 472 }, { "epoch": 1.2950819672131146, "grad_norm": 0.38157007098197937, "learning_rate": 5.316826323167505e-06, "loss": 0.9430091977119446, "step": 474 }, { "epoch": 1.3005464480874318, "grad_norm": 0.15258565545082092, "learning_rate": 5.4524032262494175e-06, "loss": 1.13564932346344, "step": 476 }, { "epoch": 1.3060109289617485, "grad_norm": 0.2158811241388321, "learning_rate": 5.590041625415783e-06, "loss": 1.1955578327178955, "step": 478 }, { "epoch": 1.3114754098360657, "grad_norm": 0.4422401189804077, "learning_rate": 5.7296903740111076e-06, "loss": 1.2549294233322144, "step": 480 }, { "epoch": 1.3169398907103824, "grad_norm": 0.11041804403066635, "learning_rate": 5.87129757833077e-06, "loss": 1.1942386627197266, "step": 482 }, { "epoch": 1.3224043715846996, "grad_norm": 0.14040009677410126, "learning_rate": 6.014810616904747e-06, "loss": 1.1555407047271729, "step": 484 }, { "epoch": 1.3278688524590163, "grad_norm": 0.09884827584028244, "learning_rate": 6.160176160051906e-06, "loss": 0.1766074001789093, "step": 486 }, { "epoch": 1.3333333333333333, "grad_norm": 0.18465925753116608, "learning_rate": 6.307340189697344e-06, "loss": 1.1929932832717896, "step": 488 }, { "epoch": 1.3387978142076502, "grad_norm": 0.1754603087902069, "learning_rate": 6.456248019445626e-06, "loss": 1.0935235023498535, "step": 490 }, { "epoch": 1.3442622950819672, "grad_norm": 0.18355412781238556, "learning_rate": 6.606844314902321e-06, "loss": 1.175545334815979, "step": 492 }, { "epoch": 1.349726775956284, "grad_norm": 0.2099565863609314, "learning_rate": 6.7590731142363915e-06, "loss": 0.6870489716529846, "step": 494 }, { "epoch": 1.355191256830601, "grad_norm": 0.1687641441822052, "learning_rate": 6.912877848975638e-06, "loss": 1.1621768474578857, "step": 496 }, { "epoch": 1.360655737704918, "grad_norm": 0.24288491904735565, "learning_rate": 7.068201365027712e-06, "loss": 0.9009864330291748, "step": 498 }, { "epoch": 1.366120218579235, "grad_norm": 0.164619579911232, "learning_rate": 7.2249859439185875e-06, "loss": 1.132088541984558, "step": 500 }, { "epoch": 1.3715846994535519, "grad_norm": 0.12828585505485535, "learning_rate": 7.3831733242409285e-06, "loss": 1.2778956890106201, "step": 502 }, { "epoch": 1.3770491803278688, "grad_norm": 0.2682889997959137, "learning_rate": 7.5427047233040485e-06, "loss": 1.1193040609359741, "step": 504 }, { "epoch": 1.3825136612021858, "grad_norm": 0.12167899310588837, "learning_rate": 7.703520858977702e-06, "loss": 0.73407381772995, "step": 506 }, { "epoch": 1.3879781420765027, "grad_norm": 0.15630660951137543, "learning_rate": 7.865561971721389e-06, "loss": 0.7426860928535461, "step": 508 }, { "epoch": 1.3934426229508197, "grad_norm": 0.19107486307621002, "learning_rate": 8.02876784679115e-06, "loss": 1.1898062229156494, "step": 510 }, { "epoch": 1.3989071038251366, "grad_norm": 0.176279217004776, "learning_rate": 8.193077836615386e-06, "loss": 1.1608000993728638, "step": 512 }, { "epoch": 1.4043715846994536, "grad_norm": 0.18996112048625946, "learning_rate": 8.35843088333168e-06, "loss": 0.6805540323257446, "step": 514 }, { "epoch": 1.4098360655737705, "grad_norm": 0.3888643682003021, "learning_rate": 8.524765541475935e-06, "loss": 1.573140025138855, "step": 516 }, { "epoch": 1.4153005464480874, "grad_norm": 0.1736215204000473, "learning_rate": 8.692020000815627e-06, "loss": 0.8413932919502258, "step": 518 }, { "epoch": 1.4207650273224044, "grad_norm": 0.1383085995912552, "learning_rate": 8.860132109318622e-06, "loss": 0.7804769277572632, "step": 520 }, { "epoch": 1.4262295081967213, "grad_norm": 0.18307553231716156, "learning_rate": 9.029039396248916e-06, "loss": 0.7059910893440247, "step": 522 }, { "epoch": 1.4316939890710383, "grad_norm": 0.15533241629600525, "learning_rate": 9.198679095380924e-06, "loss": 0.8162409663200378, "step": 524 }, { "epoch": 1.4371584699453552, "grad_norm": 0.3435671329498291, "learning_rate": 9.368988168323451e-06, "loss": 1.0322041511535645, "step": 526 }, { "epoch": 1.4426229508196722, "grad_norm": 0.10851337015628815, "learning_rate": 9.539903327944926e-06, "loss": 1.1319749355316162, "step": 528 }, { "epoch": 1.4480874316939891, "grad_norm": 0.13830029964447021, "learning_rate": 9.711361061890942e-06, "loss": 0.7779232263565063, "step": 530 }, { "epoch": 1.453551912568306, "grad_norm": 0.187329962849617, "learning_rate": 9.8832976561856e-06, "loss": 1.1993160247802734, "step": 532 }, { "epoch": 1.459016393442623, "grad_norm": 0.1746772676706314, "learning_rate": 1.0055649218907688e-05, "loss": 1.1548646688461304, "step": 534 }, { "epoch": 1.46448087431694, "grad_norm": 0.10917941480875015, "learning_rate": 1.0228351703933075e-05, "loss": 1.146438479423523, "step": 536 }, { "epoch": 1.469945355191257, "grad_norm": 0.13628044724464417, "learning_rate": 1.0401340934734287e-05, "loss": 0.6834872364997864, "step": 538 }, { "epoch": 1.4754098360655736, "grad_norm": 0.1356225311756134, "learning_rate": 1.0574552628228691e-05, "loss": 0.9159919619560242, "step": 540 }, { "epoch": 1.4808743169398908, "grad_norm": 0.40020471811294556, "learning_rate": 1.0747922418666115e-05, "loss": 1.231278896331787, "step": 542 }, { "epoch": 1.4863387978142075, "grad_norm": 0.13868877291679382, "learning_rate": 1.0921385881547311e-05, "loss": 1.125664234161377, "step": 544 }, { "epoch": 1.4918032786885247, "grad_norm": 0.19291168451309204, "learning_rate": 1.1094878557564217e-05, "loss": 0.7880281805992126, "step": 546 }, { "epoch": 1.4972677595628414, "grad_norm": 0.22060799598693848, "learning_rate": 1.1268335976553098e-05, "loss": 0.9573584198951721, "step": 548 }, { "epoch": 1.5027322404371586, "grad_norm": 0.11164124310016632, "learning_rate": 1.144169368145179e-05, "loss": 1.1322665214538574, "step": 550 }, { "epoch": 1.5081967213114753, "grad_norm": 0.5197082757949829, "learning_rate": 1.1614887252252076e-05, "loss": 0.8590179085731506, "step": 552 }, { "epoch": 1.5136612021857925, "grad_norm": 0.15628303587436676, "learning_rate": 1.1787852329938198e-05, "loss": 1.131445288658142, "step": 554 }, { "epoch": 1.5191256830601092, "grad_norm": 0.18890385329723358, "learning_rate": 1.1960524640402862e-05, "loss": 0.8027105927467346, "step": 556 }, { "epoch": 1.5245901639344264, "grad_norm": 0.2331659346818924, "learning_rate": 1.2132840018331514e-05, "loss": 1.1426656246185303, "step": 558 }, { "epoch": 1.530054644808743, "grad_norm": 0.15703825652599335, "learning_rate": 1.2304734431046335e-05, "loss": 0.3221997618675232, "step": 560 }, { "epoch": 1.5355191256830603, "grad_norm": 0.10984613001346588, "learning_rate": 1.2476144002300864e-05, "loss": 1.136183500289917, "step": 562 }, { "epoch": 1.540983606557377, "grad_norm": 0.14987057447433472, "learning_rate": 1.264700503601655e-05, "loss": 0.7029743194580078, "step": 564 }, { "epoch": 1.5464480874316942, "grad_norm": 0.13446514308452606, "learning_rate": 1.2817254039952253e-05, "loss": 1.243178367614746, "step": 566 }, { "epoch": 1.5519125683060109, "grad_norm": 0.13616947829723358, "learning_rate": 1.2986827749298138e-05, "loss": 1.218723177909851, "step": 568 }, { "epoch": 1.5573770491803278, "grad_norm": 0.11935320496559143, "learning_rate": 1.3155663150184942e-05, "loss": 1.1722185611724854, "step": 570 }, { "epoch": 1.5628415300546448, "grad_norm": 0.10870077461004257, "learning_rate": 1.3323697503100035e-05, "loss": 0.7070199251174927, "step": 572 }, { "epoch": 1.5683060109289617, "grad_norm": 0.4351557791233063, "learning_rate": 1.3490868366201527e-05, "loss": 1.0434682369232178, "step": 574 }, { "epoch": 1.5737704918032787, "grad_norm": 0.10035145282745361, "learning_rate": 1.3657113618521763e-05, "loss": 1.1506720781326294, "step": 576 }, { "epoch": 1.5792349726775956, "grad_norm": 0.13236872851848602, "learning_rate": 1.3822371483051593e-05, "loss": 1.1399495601654053, "step": 578 }, { "epoch": 1.5846994535519126, "grad_norm": 0.11729606240987778, "learning_rate": 1.3986580549696777e-05, "loss": 1.114902138710022, "step": 580 }, { "epoch": 1.5901639344262295, "grad_norm": 0.16270145773887634, "learning_rate": 1.4149679798098097e-05, "loss": 1.1003979444503784, "step": 582 }, { "epoch": 1.5956284153005464, "grad_norm": 0.3030289113521576, "learning_rate": 1.4311608620306626e-05, "loss": 0.6834750771522522, "step": 584 }, { "epoch": 1.6010928961748634, "grad_norm": 0.11277345567941666, "learning_rate": 1.447230684330573e-05, "loss": 1.1540107727050781, "step": 586 }, { "epoch": 1.6065573770491803, "grad_norm": 0.1363985538482666, "learning_rate": 1.4631714751371456e-05, "loss": 1.2158739566802979, "step": 588 }, { "epoch": 1.6120218579234973, "grad_norm": 0.12835562229156494, "learning_rate": 1.4789773108263016e-05, "loss": 1.4278290271759033, "step": 590 }, { "epoch": 1.6174863387978142, "grad_norm": 0.1124146431684494, "learning_rate": 1.4946423179235068e-05, "loss": 1.1424548625946045, "step": 592 }, { "epoch": 1.6229508196721312, "grad_norm": 0.15784600377082825, "learning_rate": 1.5101606752863606e-05, "loss": 1.1741244792938232, "step": 594 }, { "epoch": 1.6284153005464481, "grad_norm": 0.1587875932455063, "learning_rate": 1.5255266162677466e-05, "loss": 1.113938570022583, "step": 596 }, { "epoch": 1.633879781420765, "grad_norm": 0.1339414268732071, "learning_rate": 1.540734430858725e-05, "loss": 1.3817849159240723, "step": 598 }, { "epoch": 1.639344262295082, "grad_norm": 0.24032869935035706, "learning_rate": 1.5557784678103852e-05, "loss": 1.0632057189941406, "step": 600 }, { "epoch": 1.644808743169399, "grad_norm": 0.14984670281410217, "learning_rate": 1.5706531367338546e-05, "loss": 1.4408550262451172, "step": 602 }, { "epoch": 1.650273224043716, "grad_norm": 0.11051689833402634, "learning_rate": 1.5853529101776985e-05, "loss": 1.1191422939300537, "step": 604 }, { "epoch": 1.6557377049180326, "grad_norm": 0.2666247487068176, "learning_rate": 1.5998723256819298e-05, "loss": 1.1819491386413574, "step": 606 }, { "epoch": 1.6612021857923498, "grad_norm": 0.1419493705034256, "learning_rate": 1.614205987807872e-05, "loss": 1.1393964290618896, "step": 608 }, { "epoch": 1.6666666666666665, "grad_norm": 0.20164312422275543, "learning_rate": 1.628348570143105e-05, "loss": 1.1867141723632812, "step": 610 }, { "epoch": 1.6721311475409837, "grad_norm": 0.4470498561859131, "learning_rate": 1.6422948172807745e-05, "loss": 0.6968726515769958, "step": 612 }, { "epoch": 1.6775956284153004, "grad_norm": 0.3151567876338959, "learning_rate": 1.6560395467725086e-05, "loss": 0.984643816947937, "step": 614 }, { "epoch": 1.6830601092896176, "grad_norm": 0.1973286122083664, "learning_rate": 1.6695776510542253e-05, "loss": 0.73722904920578, "step": 616 }, { "epoch": 1.6885245901639343, "grad_norm": 0.11708299070596695, "learning_rate": 1.6829040993441085e-05, "loss": 1.1374552249908447, "step": 618 }, { "epoch": 1.6939890710382515, "grad_norm": 0.12260973453521729, "learning_rate": 1.696013939512057e-05, "loss": 1.111509084701538, "step": 620 }, { "epoch": 1.6994535519125682, "grad_norm": 0.13327494263648987, "learning_rate": 1.7089022999199064e-05, "loss": 1.0331177711486816, "step": 622 }, { "epoch": 1.7049180327868854, "grad_norm": 2.275272846221924, "learning_rate": 1.7215643912317323e-05, "loss": 0.7297571301460266, "step": 624 }, { "epoch": 1.710382513661202, "grad_norm": 0.16397327184677124, "learning_rate": 1.73399550819358e-05, "loss": 1.1528923511505127, "step": 626 }, { "epoch": 1.7158469945355193, "grad_norm": 0.1343916803598404, "learning_rate": 1.746191031381943e-05, "loss": 1.107448935508728, "step": 628 }, { "epoch": 1.721311475409836, "grad_norm": 0.2460424154996872, "learning_rate": 1.7581464289203475e-05, "loss": 0.7108749151229858, "step": 630 }, { "epoch": 1.7267759562841531, "grad_norm": 0.46268230676651, "learning_rate": 1.7698572581634083e-05, "loss": 0.9818768501281738, "step": 632 }, { "epoch": 1.7322404371584699, "grad_norm": 0.15652601420879364, "learning_rate": 1.781319167347718e-05, "loss": 1.3066364526748657, "step": 634 }, { "epoch": 1.737704918032787, "grad_norm": 0.24166584014892578, "learning_rate": 1.7925278972089748e-05, "loss": 1.037507176399231, "step": 636 }, { "epoch": 1.7431693989071038, "grad_norm": 0.12377048283815384, "learning_rate": 1.8034792825647287e-05, "loss": 1.14212965965271, "step": 638 }, { "epoch": 1.748633879781421, "grad_norm": 0.10295464843511581, "learning_rate": 1.8141692538621716e-05, "loss": 1.1561766862869263, "step": 640 }, { "epoch": 1.7540983606557377, "grad_norm": 0.17102456092834473, "learning_rate": 1.8245938386903896e-05, "loss": 0.7420101761817932, "step": 642 }, { "epoch": 1.7595628415300546, "grad_norm": 0.19951768219470978, "learning_rate": 1.8347491632565156e-05, "loss": 0.43516218662261963, "step": 644 }, { "epoch": 1.7650273224043715, "grad_norm": 0.667060136795044, "learning_rate": 1.8446314538252407e-05, "loss": 1.194848656654358, "step": 646 }, { "epoch": 1.7704918032786885, "grad_norm": 0.1322481632232666, "learning_rate": 1.8542370381211374e-05, "loss": 0.982461154460907, "step": 648 }, { "epoch": 1.7759562841530054, "grad_norm": 0.1529482752084732, "learning_rate": 1.8635623466932843e-05, "loss": 0.8924828767776489, "step": 650 }, { "epoch": 1.7814207650273224, "grad_norm": 0.3255729079246521, "learning_rate": 1.8726039142416796e-05, "loss": 0.7710011601448059, "step": 652 }, { "epoch": 1.7868852459016393, "grad_norm": 0.14727091789245605, "learning_rate": 1.881358380904954e-05, "loss": 1.1044501066207886, "step": 654 }, { "epoch": 1.7923497267759563, "grad_norm": 0.1614963561296463, "learning_rate": 1.889822493508897e-05, "loss": 1.1497408151626587, "step": 656 }, { "epoch": 1.7978142076502732, "grad_norm": 0.26955926418304443, "learning_rate": 1.897993106775346e-05, "loss": 0.9794219136238098, "step": 658 }, { "epoch": 1.8032786885245902, "grad_norm": 0.2158735692501068, "learning_rate": 1.9058671844909742e-05, "loss": 1.1939728260040283, "step": 660 }, { "epoch": 1.8087431693989071, "grad_norm": 0.11255908012390137, "learning_rate": 1.9134418006355532e-05, "loss": 0.9392801523208618, "step": 662 }, { "epoch": 1.814207650273224, "grad_norm": 0.17491623759269714, "learning_rate": 1.9207141404692667e-05, "loss": 0.7158020734786987, "step": 664 }, { "epoch": 1.819672131147541, "grad_norm": 0.12073390185832977, "learning_rate": 1.927681501578672e-05, "loss": 1.1663721799850464, "step": 666 }, { "epoch": 1.825136612021858, "grad_norm": 0.09961558878421783, "learning_rate": 1.934341294880924e-05, "loss": 1.1403790712356567, "step": 668 }, { "epoch": 1.830601092896175, "grad_norm": 0.13533425331115723, "learning_rate": 1.9406910455858783e-05, "loss": 1.110253095626831, "step": 670 }, { "epoch": 1.8360655737704918, "grad_norm": 0.3127456605434418, "learning_rate": 1.9467283941157304e-05, "loss": 1.1353299617767334, "step": 672 }, { "epoch": 1.8415300546448088, "grad_norm": 2.787572145462036, "learning_rate": 1.952451096981838e-05, "loss": 0.8424018025398254, "step": 674 }, { "epoch": 1.8469945355191257, "grad_norm": 0.33079859614372253, "learning_rate": 1.957857027618405e-05, "loss": 1.183168888092041, "step": 676 }, { "epoch": 1.8524590163934427, "grad_norm": 0.1199263259768486, "learning_rate": 1.9629441771727166e-05, "loss": 1.0803476572036743, "step": 678 }, { "epoch": 1.8579234972677594, "grad_norm": 0.1475621610879898, "learning_rate": 1.9677106552516317e-05, "loss": 1.1051766872406006, "step": 680 }, { "epoch": 1.8633879781420766, "grad_norm": 0.1131962314248085, "learning_rate": 1.9721546906240577e-05, "loss": 1.1643602848052979, "step": 682 }, { "epoch": 1.8688524590163933, "grad_norm": 0.23215758800506592, "learning_rate": 1.976274631879142e-05, "loss": 0.8713716268539429, "step": 684 }, { "epoch": 1.8743169398907105, "grad_norm": 0.2560221254825592, "learning_rate": 1.9800689480399383e-05, "loss": 0.7212733626365662, "step": 686 }, { "epoch": 1.8797814207650272, "grad_norm": 0.17682117223739624, "learning_rate": 1.9835362291323222e-05, "loss": 1.1008837223052979, "step": 688 }, { "epoch": 1.8852459016393444, "grad_norm": 0.1772257536649704, "learning_rate": 1.9866751867089363e-05, "loss": 0.8719238638877869, "step": 690 }, { "epoch": 1.890710382513661, "grad_norm": 0.15099941194057465, "learning_rate": 1.9894846543279838e-05, "loss": 1.1498489379882812, "step": 692 }, { "epoch": 1.8961748633879782, "grad_norm": 0.321058452129364, "learning_rate": 1.991963587986677e-05, "loss": 0.842879593372345, "step": 694 }, { "epoch": 1.901639344262295, "grad_norm": 0.11210023611783981, "learning_rate": 1.9941110665091922e-05, "loss": 1.1328097581863403, "step": 696 }, { "epoch": 1.9071038251366121, "grad_norm": 0.1561606526374817, "learning_rate": 1.9959262918889774e-05, "loss": 1.4744820594787598, "step": 698 }, { "epoch": 1.9125683060109289, "grad_norm": 0.13894771039485931, "learning_rate": 1.9974085895852973e-05, "loss": 1.1326099634170532, "step": 700 }, { "epoch": 1.918032786885246, "grad_norm": 0.09440940618515015, "learning_rate": 1.99855740877389e-05, "loss": 1.1104779243469238, "step": 702 }, { "epoch": 1.9234972677595628, "grad_norm": 0.11499873548746109, "learning_rate": 1.9993723225516553e-05, "loss": 1.1850953102111816, "step": 704 }, { "epoch": 1.92896174863388, "grad_norm": 0.2087257206439972, "learning_rate": 1.9998530280952938e-05, "loss": 1.1748231649398804, "step": 706 }, { "epoch": 1.9344262295081966, "grad_norm": 0.16280730068683624, "learning_rate": 1.9999993467738345e-05, "loss": 1.1230883598327637, "step": 708 }, { "epoch": 1.9398907103825138, "grad_norm": 0.5092416405677795, "learning_rate": 1.9998112242150162e-05, "loss": 1.339207649230957, "step": 710 }, { "epoch": 1.9453551912568305, "grad_norm": 0.12435825169086456, "learning_rate": 1.999288730325491e-05, "loss": 1.1549785137176514, "step": 712 }, { "epoch": 1.9508196721311475, "grad_norm": 0.19420410692691803, "learning_rate": 1.9984320592648474e-05, "loss": 0.9229910373687744, "step": 714 }, { "epoch": 1.9562841530054644, "grad_norm": 0.15405113995075226, "learning_rate": 1.9972415293734607e-05, "loss": 1.2319244146347046, "step": 716 }, { "epoch": 1.9617486338797814, "grad_norm": 0.14918380975723267, "learning_rate": 1.995717583054196e-05, "loss": 0.707750678062439, "step": 718 }, { "epoch": 1.9672131147540983, "grad_norm": 0.11523278057575226, "learning_rate": 1.9938607866080114e-05, "loss": 0.7229039669036865, "step": 720 }, { "epoch": 1.9726775956284153, "grad_norm": 0.1472606509923935, "learning_rate": 1.991671830023521e-05, "loss": 1.039513349533081, "step": 722 }, { "epoch": 1.9781420765027322, "grad_norm": 0.2472400814294815, "learning_rate": 1.989151526720591e-05, "loss": 1.1361088752746582, "step": 724 }, { "epoch": 1.9836065573770492, "grad_norm": 0.3535289764404297, "learning_rate": 1.986300813248073e-05, "loss": 0.7156858444213867, "step": 726 }, { "epoch": 1.989071038251366, "grad_norm": 0.14543037116527557, "learning_rate": 1.9831207489357825e-05, "loss": 0.9188486933708191, "step": 728 }, { "epoch": 1.994535519125683, "grad_norm": 0.12422552704811096, "learning_rate": 1.979612515500847e-05, "loss": 1.148845911026001, "step": 730 }, { "epoch": 2.0, "grad_norm": 0.19218623638153076, "learning_rate": 1.97577741660858e-05, "loss": 1.157519817352295, "step": 732 }, { "epoch": 2.0054644808743167, "grad_norm": 0.10968166589736938, "learning_rate": 1.9716168773880382e-05, "loss": 0.7050259113311768, "step": 734 }, { "epoch": 2.010928961748634, "grad_norm": 0.14246150851249695, "learning_rate": 1.9671324439024374e-05, "loss": 0.9686898589134216, "step": 736 }, { "epoch": 2.0163934426229506, "grad_norm": 0.17498300969600677, "learning_rate": 1.9623257825746357e-05, "loss": 1.0406219959259033, "step": 738 }, { "epoch": 2.021857923497268, "grad_norm": 0.11363784968852997, "learning_rate": 1.9571986795678878e-05, "loss": 1.0660529136657715, "step": 740 }, { "epoch": 2.0273224043715845, "grad_norm": 0.14233727753162384, "learning_rate": 1.951753040122102e-05, "loss": 1.067291498184204, "step": 742 }, { "epoch": 2.0327868852459017, "grad_norm": 0.33568593859672546, "learning_rate": 1.9459908878458532e-05, "loss": 1.3619149923324585, "step": 744 }, { "epoch": 2.0382513661202184, "grad_norm": 0.11692183464765549, "learning_rate": 1.939914363964402e-05, "loss": 1.4234706163406372, "step": 746 }, { "epoch": 2.0437158469945356, "grad_norm": 0.13339044153690338, "learning_rate": 1.9335257265240168e-05, "loss": 1.137938380241394, "step": 748 }, { "epoch": 2.0491803278688523, "grad_norm": 0.1429172307252884, "learning_rate": 1.9268273495528768e-05, "loss": 1.0415153503417969, "step": 750 }, { "epoch": 2.0546448087431695, "grad_norm": 0.1754903793334961, "learning_rate": 1.9198217221788806e-05, "loss": 1.0955044031143188, "step": 752 }, { "epoch": 2.060109289617486, "grad_norm": 0.13161030411720276, "learning_rate": 1.9125114477046807e-05, "loss": 0.6989483833312988, "step": 754 }, { "epoch": 2.0655737704918034, "grad_norm": 0.23866786062717438, "learning_rate": 1.9048992426402947e-05, "loss": 0.20047175884246826, "step": 756 }, { "epoch": 2.07103825136612, "grad_norm": 0.2189582735300064, "learning_rate": 1.896987935693643e-05, "loss": 1.1039602756500244, "step": 758 }, { "epoch": 2.0765027322404372, "grad_norm": 0.14158737659454346, "learning_rate": 1.888780466719397e-05, "loss": 0.8127365708351135, "step": 760 }, { "epoch": 2.081967213114754, "grad_norm": 0.16718794405460358, "learning_rate": 1.8802798856265254e-05, "loss": 1.090496301651001, "step": 762 }, { "epoch": 2.087431693989071, "grad_norm": 0.646533727645874, "learning_rate": 1.8714893512449424e-05, "loss": 0.9807750582695007, "step": 764 }, { "epoch": 2.092896174863388, "grad_norm": 0.16664795577526093, "learning_rate": 1.8624121301516808e-05, "loss": 1.148633599281311, "step": 766 }, { "epoch": 2.098360655737705, "grad_norm": 0.15415580570697784, "learning_rate": 1.853051595457026e-05, "loss": 1.0199898481369019, "step": 768 }, { "epoch": 2.1038251366120218, "grad_norm": 0.12932369112968445, "learning_rate": 1.843411225551065e-05, "loss": 1.1014589071273804, "step": 770 }, { "epoch": 2.109289617486339, "grad_norm": 0.234808549284935, "learning_rate": 1.8334946028111088e-05, "loss": 1.0307773351669312, "step": 772 }, { "epoch": 2.1147540983606556, "grad_norm": 0.15817847847938538, "learning_rate": 1.8233054122704765e-05, "loss": 0.892197847366333, "step": 774 }, { "epoch": 2.120218579234973, "grad_norm": 0.18785926699638367, "learning_rate": 1.8128474402491286e-05, "loss": 0.5686047673225403, "step": 776 }, { "epoch": 2.1256830601092895, "grad_norm": 0.20626573264598846, "learning_rate": 1.802124572946668e-05, "loss": 1.1014513969421387, "step": 778 }, { "epoch": 2.1311475409836067, "grad_norm": 0.1809910535812378, "learning_rate": 1.791140794998219e-05, "loss": 0.5480612516403198, "step": 780 }, { "epoch": 2.1366120218579234, "grad_norm": 0.1974882036447525, "learning_rate": 1.7799001879937294e-05, "loss": 0.7820758819580078, "step": 782 }, { "epoch": 2.1420765027322406, "grad_norm": 0.2597973346710205, "learning_rate": 1.768406928961248e-05, "loss": 0.8670818209648132, "step": 784 }, { "epoch": 2.1475409836065573, "grad_norm": 0.12401880323886871, "learning_rate": 1.7566652888147328e-05, "loss": 0.637206494808197, "step": 786 }, { "epoch": 2.1530054644808745, "grad_norm": 0.22445400059223175, "learning_rate": 1.7446796307669725e-05, "loss": 0.6058897972106934, "step": 788 }, { "epoch": 2.158469945355191, "grad_norm": 0.14324024319648743, "learning_rate": 1.732454408708209e-05, "loss": 1.1209547519683838, "step": 790 }, { "epoch": 2.1639344262295084, "grad_norm": 0.1719355434179306, "learning_rate": 1.719994165551063e-05, "loss": 1.0784960985183716, "step": 792 }, { "epoch": 2.169398907103825, "grad_norm": 0.4398317337036133, "learning_rate": 1.7073035315423838e-05, "loss": 0.8808025121688843, "step": 794 }, { "epoch": 2.1748633879781423, "grad_norm": 0.14957763254642487, "learning_rate": 1.6943872225426396e-05, "loss": 0.6589257717132568, "step": 796 }, { "epoch": 2.180327868852459, "grad_norm": 0.223003551363945, "learning_rate": 1.6812500382734977e-05, "loss": 0.740198016166687, "step": 798 }, { "epoch": 2.185792349726776, "grad_norm": 0.1016513854265213, "learning_rate": 1.6678968605342348e-05, "loss": 1.0570908784866333, "step": 800 }, { "epoch": 2.191256830601093, "grad_norm": 0.16483862698078156, "learning_rate": 1.6543326513876602e-05, "loss": 1.1871097087860107, "step": 802 }, { "epoch": 2.19672131147541, "grad_norm": 0.14547647535800934, "learning_rate": 1.6405624513162002e-05, "loss": 1.3363229036331177, "step": 804 }, { "epoch": 2.202185792349727, "grad_norm": 0.12882763147354126, "learning_rate": 1.6265913773488456e-05, "loss": 1.0369070768356323, "step": 806 }, { "epoch": 2.2076502732240435, "grad_norm": 0.12072896212339401, "learning_rate": 1.6124246211596606e-05, "loss": 0.9998791217803955, "step": 808 }, { "epoch": 2.2131147540983607, "grad_norm": 0.16041557490825653, "learning_rate": 1.598067447138542e-05, "loss": 1.0319167375564575, "step": 810 }, { "epoch": 2.2185792349726774, "grad_norm": 0.12052586674690247, "learning_rate": 1.5835251904349688e-05, "loss": 1.079565405845642, "step": 812 }, { "epoch": 2.2240437158469946, "grad_norm": 0.2981847822666168, "learning_rate": 1.5688032549754453e-05, "loss": 0.6331281661987305, "step": 814 }, { "epoch": 2.2295081967213113, "grad_norm": 0.21407558023929596, "learning_rate": 1.553907111455401e-05, "loss": 0.9996432662010193, "step": 816 }, { "epoch": 2.2349726775956285, "grad_norm": 0.38344627618789673, "learning_rate": 1.538842295306264e-05, "loss": 0.7279675006866455, "step": 818 }, { "epoch": 2.240437158469945, "grad_norm": 0.23144850134849548, "learning_rate": 1.5236144046384917e-05, "loss": 1.0548104047775269, "step": 820 }, { "epoch": 2.2459016393442623, "grad_norm": 0.16722913086414337, "learning_rate": 1.5082290981612987e-05, "loss": 1.0882266759872437, "step": 822 }, { "epoch": 2.251366120218579, "grad_norm": 0.19076433777809143, "learning_rate": 1.4926920930798736e-05, "loss": 0.9657437801361084, "step": 824 }, { "epoch": 2.2568306010928962, "grad_norm": 0.17660953104496002, "learning_rate": 1.4770091629708562e-05, "loss": 1.0023385286331177, "step": 826 }, { "epoch": 2.262295081967213, "grad_norm": 0.3156924247741699, "learning_rate": 1.461186135636868e-05, "loss": 0.49919602274894714, "step": 828 }, { "epoch": 2.26775956284153, "grad_norm": 0.13933399319648743, "learning_rate": 1.4452288909408864e-05, "loss": 0.9852725267410278, "step": 830 }, { "epoch": 2.273224043715847, "grad_norm": 0.20533017814159393, "learning_rate": 1.4291433586212831e-05, "loss": 0.7310548424720764, "step": 832 }, { "epoch": 2.278688524590164, "grad_norm": 0.21601639688014984, "learning_rate": 1.4129355160883216e-05, "loss": 0.7628719210624695, "step": 834 }, { "epoch": 2.2841530054644807, "grad_norm": 0.150185227394104, "learning_rate": 1.3966113862029429e-05, "loss": 1.0764801502227783, "step": 836 }, { "epoch": 2.289617486338798, "grad_norm": 0.1973620504140854, "learning_rate": 1.3801770350386568e-05, "loss": 1.0453038215637207, "step": 838 }, { "epoch": 2.2950819672131146, "grad_norm": 0.12196861952543259, "learning_rate": 1.363638569627384e-05, "loss": 0.5716758370399475, "step": 840 }, { "epoch": 2.300546448087432, "grad_norm": 0.2215929925441742, "learning_rate": 1.3470021356900696e-05, "loss": 0.8548388481140137, "step": 842 }, { "epoch": 2.3060109289617485, "grad_norm": 0.0590071864426136, "learning_rate": 1.3302739153529252e-05, "loss": 0.6019071936607361, "step": 844 }, { "epoch": 2.3114754098360657, "grad_norm": 0.41101735830307007, "learning_rate": 1.3134601248501366e-05, "loss": 1.1974279880523682, "step": 846 }, { "epoch": 2.3169398907103824, "grad_norm": 0.13029514253139496, "learning_rate": 1.2965670122139071e-05, "loss": 1.1063951253890991, "step": 848 }, { "epoch": 2.3224043715846996, "grad_norm": 0.129970520734787, "learning_rate": 1.2796008549526752e-05, "loss": 1.0337756872177124, "step": 850 }, { "epoch": 2.3278688524590163, "grad_norm": 0.13291127979755402, "learning_rate": 1.262567957718378e-05, "loss": 0.9729894399642944, "step": 852 }, { "epoch": 2.3333333333333335, "grad_norm": 0.18364933133125305, "learning_rate": 1.2454746499636408e-05, "loss": 0.6949310302734375, "step": 854 }, { "epoch": 2.33879781420765, "grad_norm": 0.11144661903381348, "learning_rate": 1.2283272835897359e-05, "loss": 0.7358170747756958, "step": 856 }, { "epoch": 2.3442622950819674, "grad_norm": 0.09950226545333862, "learning_rate": 1.2111322305862088e-05, "loss": 1.0024327039718628, "step": 858 }, { "epoch": 2.349726775956284, "grad_norm": 0.13857939839363098, "learning_rate": 1.1938958806630322e-05, "loss": 0.7280409336090088, "step": 860 }, { "epoch": 2.3551912568306013, "grad_norm": 0.20425045490264893, "learning_rate": 1.1766246388761841e-05, "loss": 0.9389795660972595, "step": 862 }, { "epoch": 2.360655737704918, "grad_norm": 0.1896969974040985, "learning_rate": 1.1593249232475162e-05, "loss": 1.030674695968628, "step": 864 }, { "epoch": 2.366120218579235, "grad_norm": 0.1484568864107132, "learning_rate": 1.142003162379808e-05, "loss": 0.6855581402778625, "step": 866 }, { "epoch": 2.371584699453552, "grad_norm": 0.2438308596611023, "learning_rate": 1.1246657930678817e-05, "loss": 1.3209505081176758, "step": 868 }, { "epoch": 2.3770491803278686, "grad_norm": 0.19440999627113342, "learning_rate": 1.1073192579066867e-05, "loss": 0.9309288263320923, "step": 870 }, { "epoch": 2.3825136612021858, "grad_norm": 0.29453930258750916, "learning_rate": 1.0899700028972169e-05, "loss": 0.5769140124320984, "step": 872 }, { "epoch": 2.387978142076503, "grad_norm": 0.25008222460746765, "learning_rate": 1.072624475051166e-05, "loss": 1.0771747827529907, "step": 874 }, { "epoch": 2.3934426229508197, "grad_norm": 0.09936109930276871, "learning_rate": 1.055289119995206e-05, "loss": 1.0050560235977173, "step": 876 }, { "epoch": 2.3989071038251364, "grad_norm": 0.4316991865634918, "learning_rate": 1.0379703795757853e-05, "loss": 0.5357435941696167, "step": 878 }, { "epoch": 2.4043715846994536, "grad_norm": 0.23820410668849945, "learning_rate": 1.0206746894653252e-05, "loss": 1.1548678874969482, "step": 880 }, { "epoch": 2.4098360655737707, "grad_norm": 0.1519540250301361, "learning_rate": 1.0034084767707164e-05, "loss": 0.9099193811416626, "step": 882 }, { "epoch": 2.4153005464480874, "grad_norm": 0.14186547696590424, "learning_rate": 9.861781576449879e-06, "loss": 0.9032131433486938, "step": 884 }, { "epoch": 2.420765027322404, "grad_norm": 0.744184672832489, "learning_rate": 9.689901349030646e-06, "loss": 1.0177228450775146, "step": 886 }, { "epoch": 2.4262295081967213, "grad_norm": 0.17870917916297913, "learning_rate": 9.518507956424643e-06, "loss": 1.0855988264083862, "step": 888 }, { "epoch": 2.431693989071038, "grad_norm": 0.42778703570365906, "learning_rate": 9.347665088698444e-06, "loss": 0.6894425749778748, "step": 890 }, { "epoch": 2.4371584699453552, "grad_norm": 0.24986915290355682, "learning_rate": 9.177436231342623e-06, "loss": 1.3268741369247437, "step": 892 }, { "epoch": 2.442622950819672, "grad_norm": 0.40933093428611755, "learning_rate": 9.00788464168054e-06, "loss": 0.9357931017875671, "step": 894 }, { "epoch": 2.448087431693989, "grad_norm": 0.3109062910079956, "learning_rate": 8.839073325361751e-06, "loss": 0.6064870953559875, "step": 896 }, { "epoch": 2.453551912568306, "grad_norm": 0.12668392062187195, "learning_rate": 8.67106501294902e-06, "loss": 0.20398537814617157, "step": 898 }, { "epoch": 2.459016393442623, "grad_norm": 0.14268121123313904, "learning_rate": 8.503922136607536e-06, "loss": 1.065590739250183, "step": 900 }, { "epoch": 2.4644808743169397, "grad_norm": 0.17221775650978088, "learning_rate": 8.337706806905029e-06, "loss": 0.495491087436676, "step": 902 }, { "epoch": 2.469945355191257, "grad_norm": 0.15687991678714752, "learning_rate": 8.172480789731374e-06, "loss": 1.2099788188934326, "step": 904 }, { "epoch": 2.4754098360655736, "grad_norm": 0.6032702326774597, "learning_rate": 8.00830548334625e-06, "loss": 0.4631669223308563, "step": 906 }, { "epoch": 2.480874316939891, "grad_norm": 0.5850704312324524, "learning_rate": 7.84524189556352e-06, "loss": 0.6608245968818665, "step": 908 }, { "epoch": 2.4863387978142075, "grad_norm": 0.12262270599603653, "learning_rate": 7.68335062108057e-06, "loss": 1.0410443544387817, "step": 910 }, { "epoch": 2.4918032786885247, "grad_norm": 0.8080756068229675, "learning_rate": 7.522691818961252e-06, "loss": 0.5971605181694031, "step": 912 }, { "epoch": 2.4972677595628414, "grad_norm": 0.20784160494804382, "learning_rate": 7.3633251902806165e-06, "loss": 1.0929663181304932, "step": 914 }, { "epoch": 2.5027322404371586, "grad_norm": 0.1316247284412384, "learning_rate": 7.205309955939983e-06, "loss": 1.1377352476119995, "step": 916 }, { "epoch": 2.5081967213114753, "grad_norm": 0.13229675590991974, "learning_rate": 7.048704834660296e-06, "loss": 1.0250879526138306, "step": 918 }, { "epoch": 2.5136612021857925, "grad_norm": 0.17270365357398987, "learning_rate": 6.8935680211621715e-06, "loss": 0.9176226854324341, "step": 920 }, { "epoch": 2.519125683060109, "grad_norm": 0.16042472422122955, "learning_rate": 6.739957164540634e-06, "loss": 1.0503426790237427, "step": 922 }, { "epoch": 2.5245901639344264, "grad_norm": 0.3965965509414673, "learning_rate": 6.587929346842625e-06, "loss": 0.4507668912410736, "step": 924 }, { "epoch": 2.530054644808743, "grad_norm": 0.14760476350784302, "learning_rate": 6.437541061855222e-06, "loss": 1.0362180471420288, "step": 926 }, { "epoch": 2.5355191256830603, "grad_norm": 0.7348153591156006, "learning_rate": 6.288848194112459e-06, "loss": 1.0108616352081299, "step": 928 }, { "epoch": 2.540983606557377, "grad_norm": 0.23355959355831146, "learning_rate": 6.141905998128495e-06, "loss": 0.7464023232460022, "step": 930 }, { "epoch": 2.546448087431694, "grad_norm": 0.7200878858566284, "learning_rate": 5.996769077865029e-06, "loss": 0.6252878308296204, "step": 932 }, { "epoch": 2.551912568306011, "grad_norm": 0.23962250351905823, "learning_rate": 5.853491366440313e-06, "loss": 0.9192193150520325, "step": 934 }, { "epoch": 2.557377049180328, "grad_norm": 0.23657557368278503, "learning_rate": 5.712126106087557e-06, "loss": 1.0096158981323242, "step": 936 }, { "epoch": 2.5628415300546448, "grad_norm": 0.14654363691806793, "learning_rate": 5.572725828369961e-06, "loss": 1.076252818107605, "step": 938 }, { "epoch": 2.5683060109289615, "grad_norm": 0.1286906749010086, "learning_rate": 5.4353423346599944e-06, "loss": 1.0361932516098022, "step": 940 }, { "epoch": 2.5737704918032787, "grad_norm": 0.14579473435878754, "learning_rate": 5.30002667688986e-06, "loss": 1.0813875198364258, "step": 942 }, { "epoch": 2.579234972677596, "grad_norm": 0.10800908505916595, "learning_rate": 5.1668291385804995e-06, "loss": 1.1076337099075317, "step": 944 }, { "epoch": 2.5846994535519126, "grad_norm": 0.31755056977272034, "learning_rate": 5.03579921615621e-06, "loss": 1.0581474304199219, "step": 946 }, { "epoch": 2.5901639344262293, "grad_norm": 0.14950014650821686, "learning_rate": 4.906985600551651e-06, "loss": 0.858165979385376, "step": 948 }, { "epoch": 2.5956284153005464, "grad_norm": 0.5994306802749634, "learning_rate": 4.780436159118221e-06, "loss": 0.8401349186897278, "step": 950 }, { "epoch": 2.6010928961748636, "grad_norm": 0.12480851262807846, "learning_rate": 4.656197917836474e-06, "loss": 1.0394039154052734, "step": 952 }, { "epoch": 2.6065573770491803, "grad_norm": 0.19965514540672302, "learning_rate": 4.5343170438411885e-06, "loss": 0.9946895241737366, "step": 954 }, { "epoch": 2.612021857923497, "grad_norm": 0.1912984848022461, "learning_rate": 4.414838828265581e-06, "loss": 0.5789214372634888, "step": 956 }, { "epoch": 2.6174863387978142, "grad_norm": 0.12282451242208481, "learning_rate": 4.297807669411057e-06, "loss": 0.7763662338256836, "step": 958 }, { "epoch": 2.6229508196721314, "grad_norm": 0.10073775053024292, "learning_rate": 4.183267056248689e-06, "loss": 0.9413790106773376, "step": 960 }, { "epoch": 2.628415300546448, "grad_norm": 0.7957872748374939, "learning_rate": 4.071259552258709e-06, "loss": 1.1018624305725098, "step": 962 }, { "epoch": 2.633879781420765, "grad_norm": 0.12228737026453018, "learning_rate": 3.961826779613801e-06, "loss": 0.9796239733695984, "step": 964 }, { "epoch": 2.639344262295082, "grad_norm": 0.16614055633544922, "learning_rate": 3.85500940371226e-06, "loss": 0.9639811515808105, "step": 966 }, { "epoch": 2.644808743169399, "grad_norm": 0.2138717919588089, "learning_rate": 3.750847118066614e-06, "loss": 1.065807819366455, "step": 968 }, { "epoch": 2.650273224043716, "grad_norm": 0.18565969169139862, "learning_rate": 3.6493786295535234e-06, "loss": 1.111021876335144, "step": 970 }, { "epoch": 2.6557377049180326, "grad_norm": 0.32286983728408813, "learning_rate": 3.5506416440301885e-06, "loss": 1.085739254951477, "step": 972 }, { "epoch": 2.66120218579235, "grad_norm": 0.15602485835552216, "learning_rate": 3.4546728523228067e-06, "loss": 1.0275617837905884, "step": 974 }, { "epoch": 2.6666666666666665, "grad_norm": 0.2214060127735138, "learning_rate": 3.361507916592206e-06, "loss": 1.0728002786636353, "step": 976 }, { "epoch": 2.6721311475409837, "grad_norm": 0.19005049765110016, "learning_rate": 3.271181457081715e-06, "loss": 0.47213196754455566, "step": 978 }, { "epoch": 2.6775956284153004, "grad_norm": 0.10323573648929596, "learning_rate": 3.1837270392522456e-06, "loss": 0.9167113304138184, "step": 980 }, { "epoch": 2.6830601092896176, "grad_norm": 0.7527140378952026, "learning_rate": 3.0991771613092686e-06, "loss": 1.1480381488800049, "step": 982 }, { "epoch": 2.6885245901639343, "grad_norm": 0.1350458264350891, "learning_rate": 3.017563242126483e-06, "loss": 1.1025961637496948, "step": 984 }, { "epoch": 2.6939890710382515, "grad_norm": 0.2891963720321655, "learning_rate": 2.9389156095704764e-06, "loss": 1.15847909450531, "step": 986 }, { "epoch": 2.699453551912568, "grad_norm": 0.11150713264942169, "learning_rate": 2.8632634892308535e-06, "loss": 1.0358167886734009, "step": 988 }, { "epoch": 2.7049180327868854, "grad_norm": 0.18181155622005463, "learning_rate": 2.7906349935599326e-06, "loss": 0.9927688837051392, "step": 990 }, { "epoch": 2.710382513661202, "grad_norm": 0.14437374472618103, "learning_rate": 2.721057111426154e-06, "loss": 0.9766374230384827, "step": 992 }, { "epoch": 2.7158469945355193, "grad_norm": 0.32675376534461975, "learning_rate": 2.6545556980849417e-06, "loss": 1.0123059749603271, "step": 994 }, { "epoch": 2.721311475409836, "grad_norm": 0.3038513958454132, "learning_rate": 2.591155465570866e-06, "loss": 1.0865612030029297, "step": 996 }, { "epoch": 2.726775956284153, "grad_norm": 0.17611156404018402, "learning_rate": 2.5308799735145813e-06, "loss": 1.0625133514404297, "step": 998 }, { "epoch": 2.73224043715847, "grad_norm": 0.12994976341724396, "learning_rate": 2.473751620388069e-06, "loss": 1.0845409631729126, "step": 1000 }, { "epoch": 2.737704918032787, "grad_norm": 2.454756736755371, "learning_rate": 2.419791635181301e-06, "loss": 1.0169895887374878, "step": 1002 }, { "epoch": 2.7431693989071038, "grad_norm": 0.14442019164562225, "learning_rate": 2.369020069513521e-06, "loss": 0.7047387361526489, "step": 1004 }, { "epoch": 2.748633879781421, "grad_norm": 0.2404894083738327, "learning_rate": 2.3214557901820258e-06, "loss": 0.9643245339393616, "step": 1006 }, { "epoch": 2.7540983606557377, "grad_norm": 0.18125297129154205, "learning_rate": 2.27711647215124e-06, "loss": 1.286293387413025, "step": 1008 }, { "epoch": 2.7595628415300544, "grad_norm": 0.24531304836273193, "learning_rate": 2.2360185919846593e-06, "loss": 0.6167261004447937, "step": 1010 }, { "epoch": 2.7650273224043715, "grad_norm": 0.12282934784889221, "learning_rate": 2.1981774217221474e-06, "loss": 1.0038611888885498, "step": 1012 }, { "epoch": 2.7704918032786887, "grad_norm": 0.5574485659599304, "learning_rate": 2.1636070232047966e-06, "loss": 0.9050815105438232, "step": 1014 }, { "epoch": 2.7759562841530054, "grad_norm": 0.16559119522571564, "learning_rate": 2.1323202428495544e-06, "loss": 0.986128568649292, "step": 1016 }, { "epoch": 2.781420765027322, "grad_norm": 0.3175508677959442, "learning_rate": 2.104328706875452e-06, "loss": 0.5148718953132629, "step": 1018 }, { "epoch": 2.7868852459016393, "grad_norm": 0.15311799943447113, "learning_rate": 2.079642816983293e-06, "loss": 1.0339350700378418, "step": 1020 }, { "epoch": 2.7923497267759565, "grad_norm": 0.20977604389190674, "learning_rate": 2.0582717464903546e-06, "loss": 1.0123640298843384, "step": 1022 }, { "epoch": 2.797814207650273, "grad_norm": 0.23887036740779877, "learning_rate": 2.040223436921581e-06, "loss": 0.9732429385185242, "step": 1024 }, { "epoch": 2.80327868852459, "grad_norm": 0.17824672162532806, "learning_rate": 2.025504595058489e-06, "loss": 1.0330421924591064, "step": 1026 }, { "epoch": 2.808743169398907, "grad_norm": 0.13717371225357056, "learning_rate": 2.0141206904469206e-06, "loss": 1.1064571142196655, "step": 1028 }, { "epoch": 2.8142076502732243, "grad_norm": 0.1265968233346939, "learning_rate": 2.006075953364551e-06, "loss": 1.0096856355667114, "step": 1030 }, { "epoch": 2.819672131147541, "grad_norm": 0.12728005647659302, "learning_rate": 2.0013733732489103e-06, "loss": 0.9550838470458984, "step": 1032 }, { "epoch": 2.8251366120218577, "grad_norm": 0.17370416224002838, "learning_rate": 2.000014697586502e-06, "loss": 1.2471691370010376, "step": 1034 }, { "epoch": 2.830601092896175, "grad_norm": 0.3669784367084503, "learning_rate": 2.0020004312634374e-06, "loss": 1.1650069952011108, "step": 1036 }, { "epoch": 2.836065573770492, "grad_norm": 0.13446620106697083, "learning_rate": 2.0073298363778166e-06, "loss": 0.3134404122829437, "step": 1038 }, { "epoch": 2.841530054644809, "grad_norm": 0.6104834675788879, "learning_rate": 2.016000932513934e-06, "loss": 1.1606090068817139, "step": 1040 }, { "epoch": 2.8469945355191255, "grad_norm": 0.11602997034788132, "learning_rate": 2.0280104974782058e-06, "loss": 0.9772266149520874, "step": 1042 }, { "epoch": 2.8524590163934427, "grad_norm": 0.2954210638999939, "learning_rate": 2.043354068496541e-06, "loss": 1.0794169902801514, "step": 1044 }, { "epoch": 2.8579234972677594, "grad_norm": 0.1831609308719635, "learning_rate": 2.0620259438727168e-06, "loss": 1.1897934675216675, "step": 1046 }, { "epoch": 2.8633879781420766, "grad_norm": 0.14778214693069458, "learning_rate": 2.084019185107135e-06, "loss": 1.0881187915802002, "step": 1048 }, { "epoch": 2.8688524590163933, "grad_norm": 0.13310196995735168, "learning_rate": 2.1093256194751822e-06, "loss": 1.029591679573059, "step": 1050 }, { "epoch": 2.8743169398907105, "grad_norm": 0.08348928391933441, "learning_rate": 2.137935843064233e-06, "loss": 0.6022316813468933, "step": 1052 }, { "epoch": 2.879781420765027, "grad_norm": 0.11385923624038696, "learning_rate": 2.1698392242681502e-06, "loss": 1.1325082778930664, "step": 1054 }, { "epoch": 2.8852459016393444, "grad_norm": 0.1381348967552185, "learning_rate": 2.2050239077380097e-06, "loss": 1.0211372375488281, "step": 1056 }, { "epoch": 2.890710382513661, "grad_norm": 0.17021217942237854, "learning_rate": 2.2434768187875723e-06, "loss": 0.8306444883346558, "step": 1058 }, { "epoch": 2.8961748633879782, "grad_norm": 0.16226349771022797, "learning_rate": 2.285183668251853e-06, "loss": 1.0502756834030151, "step": 1060 }, { "epoch": 2.901639344262295, "grad_norm": 0.16681478917598724, "learning_rate": 2.3301289577970028e-06, "loss": 0.8215010762214661, "step": 1062 }, { "epoch": 2.907103825136612, "grad_norm": 0.4038049876689911, "learning_rate": 2.3782959856795113e-06, "loss": 0.5339687466621399, "step": 1064 }, { "epoch": 2.912568306010929, "grad_norm": 0.19572515785694122, "learning_rate": 2.4296668529525998e-06, "loss": 0.8967607617378235, "step": 1066 }, { "epoch": 2.918032786885246, "grad_norm": 0.1520427167415619, "learning_rate": 2.4842224701175147e-06, "loss": 0.7001104950904846, "step": 1068 }, { "epoch": 2.9234972677595628, "grad_norm": 0.12228185683488846, "learning_rate": 2.541942564217196e-06, "loss": 1.1667863130569458, "step": 1070 }, { "epoch": 2.92896174863388, "grad_norm": 0.1114974319934845, "learning_rate": 2.6028056863697506e-06, "loss": 1.008376955986023, "step": 1072 }, { "epoch": 2.9344262295081966, "grad_norm": 0.163143128156662, "learning_rate": 2.6667892197388884e-06, "loss": 1.4090521335601807, "step": 1074 }, { "epoch": 2.939890710382514, "grad_norm": 0.11509400606155396, "learning_rate": 2.7338693879383967e-06, "loss": 0.6322512030601501, "step": 1076 }, { "epoch": 2.9453551912568305, "grad_norm": 0.10466722398996353, "learning_rate": 2.8040212638674506e-06, "loss": 1.0424083471298218, "step": 1078 }, { "epoch": 2.9508196721311473, "grad_norm": 0.42490720748901367, "learning_rate": 2.877218778973578e-06, "loss": 0.6144481301307678, "step": 1080 }, { "epoch": 2.9562841530054644, "grad_norm": 0.09882606565952301, "learning_rate": 2.9534347329398027e-06, "loss": 0.17148929834365845, "step": 1082 }, { "epoch": 2.9617486338797816, "grad_norm": 0.22523100674152374, "learning_rate": 3.0326408037922827e-06, "loss": 1.109324336051941, "step": 1084 }, { "epoch": 2.9672131147540983, "grad_norm": 0.09981999546289444, "learning_rate": 3.1148075584248306e-06, "loss": 0.7445070147514343, "step": 1086 }, { "epoch": 2.972677595628415, "grad_norm": 1.5596518516540527, "learning_rate": 3.199904463536296e-06, "loss": 0.970592737197876, "step": 1088 }, { "epoch": 2.978142076502732, "grad_norm": 0.1070963516831398, "learning_rate": 3.2878998969767954e-06, "loss": 1.2897882461547852, "step": 1090 }, { "epoch": 2.9836065573770494, "grad_norm": 0.3974516987800598, "learning_rate": 3.378761159498547e-06, "loss": 1.1402771472930908, "step": 1092 }, { "epoch": 2.989071038251366, "grad_norm": 0.4319957196712494, "learning_rate": 3.472454486906972e-06, "loss": 0.5495699644088745, "step": 1094 }, { "epoch": 2.994535519125683, "grad_norm": 0.1675819605588913, "learning_rate": 3.5689450626075132e-06, "loss": 1.0523113012313843, "step": 1096 }, { "epoch": 3.0, "grad_norm": 0.25202980637550354, "learning_rate": 3.668197030543573e-06, "loss": 0.6573507785797119, "step": 1098 }, { "epoch": 3.0, "step": 1098, "total_flos": 4.845424164514824e+18, "train_loss": 1.0781439861150388, "train_runtime": 38170.6856, "train_samples_per_second": 1.726, "train_steps_per_second": 0.029 } ], "logging_steps": 2, "max_steps": 1098, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 99999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.845424164514824e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }