{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1098, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00546448087431694, "grad_norm": 0.9973115921020508, "learning_rate": 5.454545454545455e-07, "loss": 2.5618793964385986, "step": 2 }, { "epoch": 0.01092896174863388, "grad_norm": 1.3840559720993042, "learning_rate": 1.6363636363636363e-06, "loss": 1.9354113340377808, "step": 4 }, { "epoch": 0.01639344262295082, "grad_norm": 2.0087080001831055, "learning_rate": 2.7272727272727272e-06, "loss": 1.8657492399215698, "step": 6 }, { "epoch": 0.02185792349726776, "grad_norm": 0.9125573635101318, "learning_rate": 3.818181818181818e-06, "loss": 1.7251954078674316, "step": 8 }, { "epoch": 0.0273224043715847, "grad_norm": 0.5344090461730957, "learning_rate": 4.90909090909091e-06, "loss": 1.6777604818344116, "step": 10 }, { "epoch": 0.03278688524590164, "grad_norm": 0.604454755783081, "learning_rate": 6e-06, "loss": 1.5453071594238281, "step": 12 }, { "epoch": 0.03825136612021858, "grad_norm": 0.7670559883117676, "learning_rate": 7.090909090909091e-06, "loss": 1.4508954286575317, "step": 14 }, { "epoch": 0.04371584699453552, "grad_norm": 0.4149356782436371, "learning_rate": 8.181818181818181e-06, "loss": 1.1633912324905396, "step": 16 }, { "epoch": 0.04918032786885246, "grad_norm": 0.7089836001396179, "learning_rate": 9.272727272727273e-06, "loss": 1.164050579071045, "step": 18 }, { "epoch": 0.0546448087431694, "grad_norm": 0.39713212847709656, "learning_rate": 1.0363636363636364e-05, "loss": 1.3879873752593994, "step": 20 }, { "epoch": 0.060109289617486336, "grad_norm": 0.5368804335594177, "learning_rate": 1.1454545454545455e-05, "loss": 1.3274301290512085, "step": 22 }, { "epoch": 0.06557377049180328, "grad_norm": 0.30250489711761475, "learning_rate": 1.2545454545454545e-05, "loss": 1.385084867477417, "step": 24 }, { "epoch": 0.07103825136612021, "grad_norm": 0.9990394115447998, "learning_rate": 1.3636363636363637e-05, "loss": 1.5793037414550781, "step": 26 }, { "epoch": 0.07650273224043716, "grad_norm": 0.3947621285915375, "learning_rate": 1.4727272727272728e-05, "loss": 1.3561811447143555, "step": 28 }, { "epoch": 0.08196721311475409, "grad_norm": 0.49388208985328674, "learning_rate": 1.5818181818181818e-05, "loss": 1.273718237876892, "step": 30 }, { "epoch": 0.08743169398907104, "grad_norm": 0.32329627871513367, "learning_rate": 1.6909090909090907e-05, "loss": 1.3053655624389648, "step": 32 }, { "epoch": 0.09289617486338798, "grad_norm": 0.718172013759613, "learning_rate": 1.8e-05, "loss": 1.0134309530258179, "step": 34 }, { "epoch": 0.09836065573770492, "grad_norm": 0.27196696400642395, "learning_rate": 1.909090909090909e-05, "loss": 1.1276304721832275, "step": 36 }, { "epoch": 0.10382513661202186, "grad_norm": 0.4729017913341522, "learning_rate": 2.0181818181818183e-05, "loss": 1.3713783025741577, "step": 38 }, { "epoch": 0.1092896174863388, "grad_norm": 0.337189644575119, "learning_rate": 2.1272727272727273e-05, "loss": 1.3217278718948364, "step": 40 }, { "epoch": 0.11475409836065574, "grad_norm": 0.8202743530273438, "learning_rate": 2.2363636363636366e-05, "loss": 1.0108166933059692, "step": 42 }, { "epoch": 0.12021857923497267, "grad_norm": 0.3670739233493805, "learning_rate": 2.3454545454545456e-05, "loss": 0.988700807094574, "step": 44 }, { "epoch": 0.12568306010928962, "grad_norm": 0.3262913227081299, "learning_rate": 2.454545454545455e-05, "loss": 1.3549995422363281, "step": 46 }, { "epoch": 0.13114754098360656, "grad_norm": 0.3616645336151123, "learning_rate": 2.5636363636363635e-05, "loss": 1.3315670490264893, "step": 48 }, { "epoch": 0.1366120218579235, "grad_norm": 0.3672925531864166, "learning_rate": 2.6727272727272728e-05, "loss": 1.3015525341033936, "step": 50 }, { "epoch": 0.14207650273224043, "grad_norm": 0.42094483971595764, "learning_rate": 2.7818181818181818e-05, "loss": 1.2684640884399414, "step": 52 }, { "epoch": 0.14754098360655737, "grad_norm": 2.618896722793579, "learning_rate": 2.890909090909091e-05, "loss": 0.8703136444091797, "step": 54 }, { "epoch": 0.15300546448087432, "grad_norm": 0.3619275391101837, "learning_rate": 3e-05, "loss": 1.4717642068862915, "step": 56 }, { "epoch": 0.15846994535519127, "grad_norm": 0.3370303213596344, "learning_rate": 2.9997491688899256e-05, "loss": 1.451904535293579, "step": 58 }, { "epoch": 0.16393442622950818, "grad_norm": 0.35009288787841797, "learning_rate": 2.998996768768956e-05, "loss": 1.3383069038391113, "step": 60 }, { "epoch": 0.16939890710382513, "grad_norm": 0.4444884955883026, "learning_rate": 2.9977430792302124e-05, "loss": 1.5286966562271118, "step": 62 }, { "epoch": 0.17486338797814208, "grad_norm": 0.7974561452865601, "learning_rate": 2.9959885661467903e-05, "loss": 1.2279592752456665, "step": 64 }, { "epoch": 0.18032786885245902, "grad_norm": 0.3576641082763672, "learning_rate": 2.993733881498636e-05, "loss": 1.3408796787261963, "step": 66 }, { "epoch": 0.18579234972677597, "grad_norm": 0.2858317494392395, "learning_rate": 2.9909798631302736e-05, "loss": 1.3054747581481934, "step": 68 }, { "epoch": 0.1912568306010929, "grad_norm": 0.6741045117378235, "learning_rate": 2.987727534439457e-05, "loss": 1.3402438163757324, "step": 70 }, { "epoch": 0.19672131147540983, "grad_norm": 4.621718406677246, "learning_rate": 2.983978103996877e-05, "loss": 1.189091444015503, "step": 72 }, { "epoch": 0.20218579234972678, "grad_norm": 0.3123322129249573, "learning_rate": 2.9797329650970525e-05, "loss": 1.1878712177276611, "step": 74 }, { "epoch": 0.20765027322404372, "grad_norm": 0.34585925936698914, "learning_rate": 2.974993695240579e-05, "loss": 1.38385808467865, "step": 76 }, { "epoch": 0.21311475409836064, "grad_norm": 2.415900468826294, "learning_rate": 2.9697620555479297e-05, "loss": 0.7725762128829956, "step": 78 }, { "epoch": 0.2185792349726776, "grad_norm": 0.48625677824020386, "learning_rate": 2.9640399901050182e-05, "loss": 0.8228914737701416, "step": 80 }, { "epoch": 0.22404371584699453, "grad_norm": 0.5314300060272217, "learning_rate": 2.9578296252407734e-05, "loss": 1.5916510820388794, "step": 82 }, { "epoch": 0.22950819672131148, "grad_norm": 0.7009768486022949, "learning_rate": 2.9511332687369917e-05, "loss": 0.8212544918060303, "step": 84 }, { "epoch": 0.23497267759562843, "grad_norm": 0.2534927427768707, "learning_rate": 2.9439534089707624e-05, "loss": 1.3987239599227905, "step": 86 }, { "epoch": 0.24043715846994534, "grad_norm": 0.3312060534954071, "learning_rate": 2.9362927139897832e-05, "loss": 1.3285937309265137, "step": 88 }, { "epoch": 0.2459016393442623, "grad_norm": 0.9822537899017334, "learning_rate": 2.9281540305209068e-05, "loss": 0.9706141352653503, "step": 90 }, { "epoch": 0.25136612021857924, "grad_norm": 0.6968846321105957, "learning_rate": 2.919540382912294e-05, "loss": 1.4107171297073364, "step": 92 }, { "epoch": 0.2568306010928962, "grad_norm": 0.4691411256790161, "learning_rate": 2.9104549720095634e-05, "loss": 1.2292362451553345, "step": 94 }, { "epoch": 0.26229508196721313, "grad_norm": 0.8305396437644958, "learning_rate": 2.9009011739663467e-05, "loss": 1.6303619146347046, "step": 96 }, { "epoch": 0.2677595628415301, "grad_norm": 0.4047655761241913, "learning_rate": 2.8908825389897094e-05, "loss": 1.355845332145691, "step": 98 }, { "epoch": 0.273224043715847, "grad_norm": 0.2581457793712616, "learning_rate": 2.8804027900208843e-05, "loss": 1.1548889875411987, "step": 100 }, { "epoch": 0.2786885245901639, "grad_norm": 0.41052544116973877, "learning_rate": 2.8694658213518226e-05, "loss": 1.3467285633087158, "step": 102 }, { "epoch": 0.28415300546448086, "grad_norm": 0.6879177689552307, "learning_rate": 2.8580756971780686e-05, "loss": 1.0806087255477905, "step": 104 }, { "epoch": 0.2896174863387978, "grad_norm": 0.27630922198295593, "learning_rate": 2.846236650088497e-05, "loss": 1.3213335275650024, "step": 106 }, { "epoch": 0.29508196721311475, "grad_norm": 0.2548673152923584, "learning_rate": 2.833953079492476e-05, "loss": 1.4522829055786133, "step": 108 }, { "epoch": 0.3005464480874317, "grad_norm": 0.6334999203681946, "learning_rate": 2.82122954998504e-05, "loss": 1.3047692775726318, "step": 110 }, { "epoch": 0.30601092896174864, "grad_norm": 0.2597161531448364, "learning_rate": 2.808070789650679e-05, "loss": 1.3145617246627808, "step": 112 }, { "epoch": 0.3114754098360656, "grad_norm": 0.5719535946846008, "learning_rate": 2.7944816883063727e-05, "loss": 1.4522080421447754, "step": 114 }, { "epoch": 0.31693989071038253, "grad_norm": 0.3497866690158844, "learning_rate": 2.7804672956845295e-05, "loss": 1.2907500267028809, "step": 116 }, { "epoch": 0.3224043715846995, "grad_norm": 0.5994348526000977, "learning_rate": 2.766032819556495e-05, "loss": 1.1124215126037598, "step": 118 }, { "epoch": 0.32786885245901637, "grad_norm": 3.034389019012451, "learning_rate": 2.7511836237973366e-05, "loss": 1.3056573867797852, "step": 120 }, { "epoch": 0.3333333333333333, "grad_norm": 0.41870880126953125, "learning_rate": 2.735925226392618e-05, "loss": 1.3432774543762207, "step": 122 }, { "epoch": 0.33879781420765026, "grad_norm": 0.3683644235134125, "learning_rate": 2.7202632973879086e-05, "loss": 1.0805763006210327, "step": 124 }, { "epoch": 0.3442622950819672, "grad_norm": 0.34786108136177063, "learning_rate": 2.7042036567817838e-05, "loss": 1.1195154190063477, "step": 126 }, { "epoch": 0.34972677595628415, "grad_norm": 1.6386656761169434, "learning_rate": 2.6877522723631036e-05, "loss": 1.3379027843475342, "step": 128 }, { "epoch": 0.3551912568306011, "grad_norm": 0.6847893595695496, "learning_rate": 2.6709152574933727e-05, "loss": 1.282078742980957, "step": 130 }, { "epoch": 0.36065573770491804, "grad_norm": 0.2398746758699417, "learning_rate": 2.6536988688350067e-05, "loss": 1.2325066328048706, "step": 132 }, { "epoch": 0.366120218579235, "grad_norm": 1.0792237520217896, "learning_rate": 2.6361095040263437e-05, "loss": 1.1317086219787598, "step": 134 }, { "epoch": 0.37158469945355194, "grad_norm": 1.1165977716445923, "learning_rate": 2.618153699304274e-05, "loss": 1.0417739152908325, "step": 136 }, { "epoch": 0.3770491803278688, "grad_norm": 0.18367436528205872, "learning_rate": 2.599838127075361e-05, "loss": 1.01582932472229, "step": 138 }, { "epoch": 0.3825136612021858, "grad_norm": 0.4686935842037201, "learning_rate": 2.5811695934363666e-05, "loss": 0.6732921004295349, "step": 140 }, { "epoch": 0.3879781420765027, "grad_norm": 0.2868647277355194, "learning_rate": 2.5621550356450914e-05, "loss": 1.253921389579773, "step": 142 }, { "epoch": 0.39344262295081966, "grad_norm": 0.22526639699935913, "learning_rate": 2.5428015195424825e-05, "loss": 1.2781373262405396, "step": 144 }, { "epoch": 0.3989071038251366, "grad_norm": 0.2401130348443985, "learning_rate": 2.5231162369269498e-05, "loss": 1.240103006362915, "step": 146 }, { "epoch": 0.40437158469945356, "grad_norm": 0.34567099809646606, "learning_rate": 2.503106502881889e-05, "loss": 1.2351547479629517, "step": 148 }, { "epoch": 0.4098360655737705, "grad_norm": 0.38193589448928833, "learning_rate": 2.4827797530573762e-05, "loss": 1.1751936674118042, "step": 150 }, { "epoch": 0.41530054644808745, "grad_norm": 0.2586861848831177, "learning_rate": 2.4621435409070757e-05, "loss": 1.2725920677185059, "step": 152 }, { "epoch": 0.4207650273224044, "grad_norm": 0.3629944920539856, "learning_rate": 2.4412055348813602e-05, "loss": 1.2570769786834717, "step": 154 }, { "epoch": 0.4262295081967213, "grad_norm": 0.4387210011482239, "learning_rate": 2.4199735155777017e-05, "loss": 1.3026829957962036, "step": 156 }, { "epoch": 0.43169398907103823, "grad_norm": 0.7413071393966675, "learning_rate": 2.3984553728493914e-05, "loss": 1.008730173110962, "step": 158 }, { "epoch": 0.4371584699453552, "grad_norm": 0.2501511573791504, "learning_rate": 2.3766591028736547e-05, "loss": 1.2349580526351929, "step": 160 }, { "epoch": 0.4426229508196721, "grad_norm": 1.2075088024139404, "learning_rate": 2.3545928051802588e-05, "loss": 0.7488419413566589, "step": 162 }, { "epoch": 0.44808743169398907, "grad_norm": 0.5644823312759399, "learning_rate": 2.332264679641717e-05, "loss": 1.6321135759353638, "step": 164 }, { "epoch": 0.453551912568306, "grad_norm": 1.0617799758911133, "learning_rate": 2.3096830234261996e-05, "loss": 0.7753019332885742, "step": 166 }, { "epoch": 0.45901639344262296, "grad_norm": 0.28146958351135254, "learning_rate": 2.2868562279142912e-05, "loss": 1.1729671955108643, "step": 168 }, { "epoch": 0.4644808743169399, "grad_norm": 0.29739731550216675, "learning_rate": 2.2637927755807458e-05, "loss": 1.268522024154663, "step": 170 }, { "epoch": 0.46994535519125685, "grad_norm": 1.04466712474823, "learning_rate": 2.2405012368423786e-05, "loss": 1.330442190170288, "step": 172 }, { "epoch": 0.47540983606557374, "grad_norm": 0.46707776188850403, "learning_rate": 2.2169902668732893e-05, "loss": 1.4469293355941772, "step": 174 }, { "epoch": 0.4808743169398907, "grad_norm": 0.4159034192562103, "learning_rate": 2.193268602388583e-05, "loss": 1.2424243688583374, "step": 176 }, { "epoch": 0.48633879781420764, "grad_norm": 0.5740254521369934, "learning_rate": 2.1693450583977953e-05, "loss": 1.2044943571090698, "step": 178 }, { "epoch": 0.4918032786885246, "grad_norm": 0.35157784819602966, "learning_rate": 2.1452285249292147e-05, "loss": 1.2033029794692993, "step": 180 }, { "epoch": 0.4972677595628415, "grad_norm": 0.5813264846801758, "learning_rate": 2.12092796372634e-05, "loss": 1.2133545875549316, "step": 182 }, { "epoch": 0.5027322404371585, "grad_norm": 1.1736332178115845, "learning_rate": 2.096452404917679e-05, "loss": 1.3634682893753052, "step": 184 }, { "epoch": 0.5081967213114754, "grad_norm": 0.48712271451950073, "learning_rate": 2.0718109436611348e-05, "loss": 1.239789605140686, "step": 186 }, { "epoch": 0.5136612021857924, "grad_norm": 0.7002618908882141, "learning_rate": 2.0470127367642345e-05, "loss": 1.2088130712509155, "step": 188 }, { "epoch": 0.5191256830601093, "grad_norm": 0.3209463059902191, "learning_rate": 2.022066999281444e-05, "loss": 1.2543022632598877, "step": 190 }, { "epoch": 0.5245901639344263, "grad_norm": 0.5514739155769348, "learning_rate": 1.9969830010898358e-05, "loss": 1.2285394668579102, "step": 192 }, { "epoch": 0.5300546448087432, "grad_norm": 0.41652482748031616, "learning_rate": 1.9717700634443903e-05, "loss": 1.2374595403671265, "step": 194 }, { "epoch": 0.5355191256830601, "grad_norm": 0.3611076772212982, "learning_rate": 1.9464375555142e-05, "loss": 1.288169503211975, "step": 196 }, { "epoch": 0.5409836065573771, "grad_norm": 0.3940355181694031, "learning_rate": 1.9209948909008734e-05, "loss": 1.1237924098968506, "step": 198 }, { "epoch": 0.546448087431694, "grad_norm": 0.3027671277523041, "learning_rate": 1.8954515241404218e-05, "loss": 1.2617192268371582, "step": 200 }, { "epoch": 0.5519125683060109, "grad_norm": 0.3746492266654968, "learning_rate": 1.8698169471899414e-05, "loss": 1.1039533615112305, "step": 202 }, { "epoch": 0.5573770491803278, "grad_norm": 0.8580636382102966, "learning_rate": 1.8441006859003842e-05, "loss": 0.9195830821990967, "step": 204 }, { "epoch": 0.5628415300546448, "grad_norm": 0.33917945623397827, "learning_rate": 1.818312296476737e-05, "loss": 1.270058274269104, "step": 206 }, { "epoch": 0.5683060109289617, "grad_norm": 0.3017415404319763, "learning_rate": 1.792461361926921e-05, "loss": 1.0368424654006958, "step": 208 }, { "epoch": 0.5737704918032787, "grad_norm": 0.3447563052177429, "learning_rate": 1.766557488500727e-05, "loss": 1.216840386390686, "step": 210 }, { "epoch": 0.5792349726775956, "grad_norm": 0.3713569641113281, "learning_rate": 1.7406103021201212e-05, "loss": 1.5115100145339966, "step": 212 }, { "epoch": 0.5846994535519126, "grad_norm": 0.19028295576572418, "learning_rate": 1.7146294448022335e-05, "loss": 1.2231677770614624, "step": 214 }, { "epoch": 0.5901639344262295, "grad_norm": 0.9744280576705933, "learning_rate": 1.688624571076371e-05, "loss": 1.5730527639389038, "step": 216 }, { "epoch": 0.5956284153005464, "grad_norm": 0.2510036528110504, "learning_rate": 1.6626053443963762e-05, "loss": 1.2018402814865112, "step": 218 }, { "epoch": 0.6010928961748634, "grad_norm": 0.26169928908348083, "learning_rate": 1.636581433549674e-05, "loss": 1.2398658990859985, "step": 220 }, { "epoch": 0.6065573770491803, "grad_norm": 0.34778648614883423, "learning_rate": 1.610562509064332e-05, "loss": 1.107602596282959, "step": 222 }, { "epoch": 0.6120218579234973, "grad_norm": 0.5183984637260437, "learning_rate": 1.5845582396154786e-05, "loss": 1.040785551071167, "step": 224 }, { "epoch": 0.6174863387978142, "grad_norm": 0.21732346713542938, "learning_rate": 1.5585782884324064e-05, "loss": 1.2164437770843506, "step": 226 }, { "epoch": 0.6229508196721312, "grad_norm": 0.8948683142662048, "learning_rate": 1.5326323097077015e-05, "loss": 1.229734182357788, "step": 228 }, { "epoch": 0.6284153005464481, "grad_norm": 0.2247624546289444, "learning_rate": 1.5067299450097261e-05, "loss": 0.7190737128257751, "step": 230 }, { "epoch": 0.6338797814207651, "grad_norm": 0.305253267288208, "learning_rate": 1.4808808196998006e-05, "loss": 1.22504460811615, "step": 232 }, { "epoch": 0.639344262295082, "grad_norm": 1.4114190340042114, "learning_rate": 1.4550945393554004e-05, "loss": 0.8749437928199768, "step": 234 }, { "epoch": 0.644808743169399, "grad_norm": 1.5399497747421265, "learning_rate": 1.4293806862007085e-05, "loss": 0.9087380766868591, "step": 236 }, { "epoch": 0.6502732240437158, "grad_norm": 0.6150895357131958, "learning_rate": 1.4037488155458448e-05, "loss": 1.303091287612915, "step": 238 }, { "epoch": 0.6557377049180327, "grad_norm": 0.5875135064125061, "learning_rate": 1.3782084522360981e-05, "loss": 1.1951395273208618, "step": 240 }, { "epoch": 0.6612021857923497, "grad_norm": 0.7042756676673889, "learning_rate": 1.3527690871124762e-05, "loss": 1.1392256021499634, "step": 242 }, { "epoch": 0.6666666666666666, "grad_norm": 0.628507673740387, "learning_rate": 1.3274401734848958e-05, "loss": 1.0318241119384766, "step": 244 }, { "epoch": 0.6721311475409836, "grad_norm": 0.24752730131149292, "learning_rate": 1.3022311236193156e-05, "loss": 1.1875934600830078, "step": 246 }, { "epoch": 0.6775956284153005, "grad_norm": 0.5493347644805908, "learning_rate": 1.2771513052401236e-05, "loss": 1.2356622219085693, "step": 248 }, { "epoch": 0.6830601092896175, "grad_norm": 0.3261685371398926, "learning_rate": 1.2522100380490744e-05, "loss": 1.2128081321716309, "step": 250 }, { "epoch": 0.6885245901639344, "grad_norm": 0.4376692771911621, "learning_rate": 1.2274165902620732e-05, "loss": 1.1218929290771484, "step": 252 }, { "epoch": 0.6939890710382514, "grad_norm": 0.335615336894989, "learning_rate": 1.2027801751650918e-05, "loss": 1.1654932498931885, "step": 254 }, { "epoch": 0.6994535519125683, "grad_norm": 0.4503217041492462, "learning_rate": 1.1783099476904972e-05, "loss": 1.1567964553833008, "step": 256 }, { "epoch": 0.7049180327868853, "grad_norm": 0.3116782605648041, "learning_rate": 1.1540150010150599e-05, "loss": 1.0923233032226562, "step": 258 }, { "epoch": 0.7103825136612022, "grad_norm": 0.2738410234451294, "learning_rate": 1.1299043631809205e-05, "loss": 1.159534215927124, "step": 260 }, { "epoch": 0.7158469945355191, "grad_norm": 0.8347621560096741, "learning_rate": 1.1059869937407486e-05, "loss": 1.2569533586502075, "step": 262 }, { "epoch": 0.7213114754098361, "grad_norm": 0.2674843370914459, "learning_rate": 1.082271780428362e-05, "loss": 1.1951040029525757, "step": 264 }, { "epoch": 0.726775956284153, "grad_norm": 0.3185964524745941, "learning_rate": 1.0587675358560278e-05, "loss": 1.0627436637878418, "step": 266 }, { "epoch": 0.73224043715847, "grad_norm": 0.5771196484565735, "learning_rate": 1.0354829942396837e-05, "loss": 1.1797475814819336, "step": 268 }, { "epoch": 0.7377049180327869, "grad_norm": 0.2694435119628906, "learning_rate": 1.012426808153287e-05, "loss": 1.1862552165985107, "step": 270 }, { "epoch": 0.7431693989071039, "grad_norm": 0.3423052132129669, "learning_rate": 9.896075453135039e-06, "loss": 1.1492632627487183, "step": 272 }, { "epoch": 0.7486338797814208, "grad_norm": 0.38316285610198975, "learning_rate": 9.67033685395934e-06, "loss": 1.167414903640747, "step": 274 }, { "epoch": 0.7540983606557377, "grad_norm": 0.3846551477909088, "learning_rate": 9.447136168840466e-06, "loss": 1.2184109687805176, "step": 276 }, { "epoch": 0.7595628415300546, "grad_norm": 0.44415712356567383, "learning_rate": 9.226556339520069e-06, "loss": 1.1867570877075195, "step": 278 }, { "epoch": 0.7650273224043715, "grad_norm": 0.8969565033912659, "learning_rate": 9.008679333825478e-06, "loss": 0.8917623162269592, "step": 280 }, { "epoch": 0.7704918032786885, "grad_norm": 0.28591468930244446, "learning_rate": 8.793586115210326e-06, "loss": 1.188754677772522, "step": 282 }, { "epoch": 0.7759562841530054, "grad_norm": 0.3005872666835785, "learning_rate": 8.581356612668382e-06, "loss": 1.173933744430542, "step": 284 }, { "epoch": 0.7814207650273224, "grad_norm": 0.2967031002044678, "learning_rate": 8.372069691031804e-06, "loss": 1.1837879419326782, "step": 286 }, { "epoch": 0.7868852459016393, "grad_norm": 0.2758060395717621, "learning_rate": 8.165803121664869e-06, "loss": 1.1517661809921265, "step": 288 }, { "epoch": 0.7923497267759563, "grad_norm": 0.29872316122055054, "learning_rate": 7.962633553563965e-06, "loss": 1.1168982982635498, "step": 290 }, { "epoch": 0.7978142076502732, "grad_norm": 0.3943071961402893, "learning_rate": 7.762636484874723e-06, "loss": 1.1435798406600952, "step": 292 }, { "epoch": 0.8032786885245902, "grad_norm": 0.4497228264808655, "learning_rate": 7.565886234836767e-06, "loss": 1.1566179990768433, "step": 294 }, { "epoch": 0.8087431693989071, "grad_norm": 0.354144424200058, "learning_rate": 7.3724559161665876e-06, "loss": 1.5012860298156738, "step": 296 }, { "epoch": 0.8142076502732241, "grad_norm": 1.309401273727417, "learning_rate": 7.182417407888703e-06, "loss": 0.9079286456108093, "step": 298 }, { "epoch": 0.819672131147541, "grad_norm": 0.3585984706878662, "learning_rate": 6.995841328625321e-06, "loss": 1.5736669301986694, "step": 300 }, { "epoch": 0.825136612021858, "grad_norm": 0.5723401308059692, "learning_rate": 6.812797010354325e-06, "loss": 1.192858099937439, "step": 302 }, { "epoch": 0.8306010928961749, "grad_norm": 2.9767942428588867, "learning_rate": 6.63335247264542e-06, "loss": 0.7880658507347107, "step": 304 }, { "epoch": 0.8360655737704918, "grad_norm": 1.3081823587417603, "learning_rate": 6.457574397383919e-06, "loss": 1.5271462202072144, "step": 306 }, { "epoch": 0.8415300546448088, "grad_norm": 0.34140118956565857, "learning_rate": 6.285528103991665e-06, "loss": 0.8412893414497375, "step": 308 }, { "epoch": 0.8469945355191257, "grad_norm": 0.3129923939704895, "learning_rate": 6.117277525154225e-06, "loss": 1.20363187789917, "step": 310 }, { "epoch": 0.8524590163934426, "grad_norm": 0.383305162191391, "learning_rate": 5.952885183063397e-06, "loss": 1.1824288368225098, "step": 312 }, { "epoch": 0.8579234972677595, "grad_norm": 0.3238297402858734, "learning_rate": 5.792412166183841e-06, "loss": 1.1946678161621094, "step": 314 }, { "epoch": 0.8633879781420765, "grad_norm": 2.138742208480835, "learning_rate": 5.635918106552546e-06, "loss": 1.201963186264038, "step": 316 }, { "epoch": 0.8688524590163934, "grad_norm": 0.3680674433708191, "learning_rate": 5.483461157619428e-06, "loss": 1.141302227973938, "step": 318 }, { "epoch": 0.8743169398907104, "grad_norm": 0.2648313641548157, "learning_rate": 5.335097972637441e-06, "loss": 1.3266459703445435, "step": 320 }, { "epoch": 0.8797814207650273, "grad_norm": 0.2844366431236267, "learning_rate": 5.1908836836101135e-06, "loss": 1.123349666595459, "step": 322 }, { "epoch": 0.8852459016393442, "grad_norm": 0.5806925296783447, "learning_rate": 5.050871880804414e-06, "loss": 0.5679168701171875, "step": 324 }, { "epoch": 0.8907103825136612, "grad_norm": 0.2976682484149933, "learning_rate": 4.915114592836521e-06, "loss": 1.1642491817474365, "step": 326 }, { "epoch": 0.8961748633879781, "grad_norm": 1.0322918891906738, "learning_rate": 4.783662267337909e-06, "loss": 1.0334808826446533, "step": 328 }, { "epoch": 0.9016393442622951, "grad_norm": 0.48543456196784973, "learning_rate": 4.656563752208907e-06, "loss": 1.186019778251648, "step": 330 }, { "epoch": 0.907103825136612, "grad_norm": 0.8842418789863586, "learning_rate": 4.533866277466767e-06, "loss": 1.249828815460205, "step": 332 }, { "epoch": 0.912568306010929, "grad_norm": 0.45925524830818176, "learning_rate": 4.415615437694876e-06, "loss": 1.0486047267913818, "step": 334 }, { "epoch": 0.9180327868852459, "grad_norm": 0.49386680126190186, "learning_rate": 4.3018551750997694e-06, "loss": 1.14175283908844, "step": 336 }, { "epoch": 0.9234972677595629, "grad_norm": 0.4923037886619568, "learning_rate": 4.192627763182111e-06, "loss": 1.1671501398086548, "step": 338 }, { "epoch": 0.9289617486338798, "grad_norm": 0.31538236141204834, "learning_rate": 4.087973791027797e-06, "loss": 1.1591607332229614, "step": 340 }, { "epoch": 0.9344262295081968, "grad_norm": 0.31360623240470886, "learning_rate": 3.987932148224993e-06, "loss": 1.1940336227416992, "step": 342 }, { "epoch": 0.9398907103825137, "grad_norm": 1.0204191207885742, "learning_rate": 3.8925400104126834e-06, "loss": 1.1359769105911255, "step": 344 }, { "epoch": 0.9453551912568307, "grad_norm": 0.6455877423286438, "learning_rate": 3.8018328254661618e-06, "loss": 1.5209176540374756, "step": 346 }, { "epoch": 0.9508196721311475, "grad_norm": 0.6131592392921448, "learning_rate": 3.715844300324527e-06, "loss": 0.8120126724243164, "step": 348 }, { "epoch": 0.9562841530054644, "grad_norm": 0.3298521339893341, "learning_rate": 3.6346063884651327e-06, "loss": 0.8144136071205139, "step": 350 }, { "epoch": 0.9617486338797814, "grad_norm": 0.35196903347969055, "learning_rate": 3.558149278029624e-06, "loss": 1.1486433744430542, "step": 352 }, { "epoch": 0.9672131147540983, "grad_norm": 1.0103577375411987, "learning_rate": 3.4865013806059817e-06, "loss": 0.8401246666908264, "step": 354 }, { "epoch": 0.9726775956284153, "grad_norm": 0.42854589223861694, "learning_rate": 3.419689320670712e-06, "loss": 1.042449951171875, "step": 356 }, { "epoch": 0.9781420765027322, "grad_norm": 0.37466299533843994, "learning_rate": 3.35773792569517e-06, "loss": 0.7528899312019348, "step": 358 }, { "epoch": 0.9836065573770492, "grad_norm": 0.2885702848434448, "learning_rate": 3.300670216919602e-06, "loss": 1.155380129814148, "step": 360 }, { "epoch": 0.9890710382513661, "grad_norm": 0.33391743898391724, "learning_rate": 3.2485074007984468e-06, "loss": 0.9418554306030273, "step": 362 }, { "epoch": 0.994535519125683, "grad_norm": 0.2690609097480774, "learning_rate": 3.2012688611199566e-06, "loss": 1.1393744945526123, "step": 364 }, { "epoch": 1.0, "grad_norm": 0.4000090956687927, "learning_rate": 3.158972151803165e-06, "loss": 1.1187925338745117, "step": 366 }, { "epoch": 1.005464480874317, "grad_norm": 0.40801742672920227, "learning_rate": 3.1216329903748095e-06, "loss": 0.981923520565033, "step": 368 }, { "epoch": 1.010928961748634, "grad_norm": 0.39118003845214844, "learning_rate": 3.089265252128686e-06, "loss": 1.0384995937347412, "step": 370 }, { "epoch": 1.0163934426229508, "grad_norm": 0.3082128167152405, "learning_rate": 3.061880964969555e-06, "loss": 0.9732365608215332, "step": 372 }, { "epoch": 1.0218579234972678, "grad_norm": 0.41411662101745605, "learning_rate": 3.039490304943562e-06, "loss": 1.0013678073883057, "step": 374 }, { "epoch": 1.0273224043715847, "grad_norm": 0.6949254870414734, "learning_rate": 3.022101592456795e-06, "loss": 0.848197877407074, "step": 376 }, { "epoch": 1.0327868852459017, "grad_norm": 0.4030425250530243, "learning_rate": 3.0097212891834095e-06, "loss": 0.886638879776001, "step": 378 }, { "epoch": 1.0382513661202186, "grad_norm": 0.37187013030052185, "learning_rate": 3.0023539956644634e-06, "loss": 0.60066819190979, "step": 380 }, { "epoch": 1.0437158469945356, "grad_norm": 0.7314932942390442, "learning_rate": 3.0000024495983428e-06, "loss": 0.9156309962272644, "step": 382 }, { "epoch": 1.0491803278688525, "grad_norm": 0.4109429121017456, "learning_rate": 3.002667524823434e-06, "loss": 0.9761273860931396, "step": 384 }, { "epoch": 1.0546448087431695, "grad_norm": 0.7147516012191772, "learning_rate": 3.010348230993402e-06, "loss": 0.6097055077552795, "step": 386 }, { "epoch": 1.0601092896174864, "grad_norm": 0.5150865316390991, "learning_rate": 3.0230417139451987e-06, "loss": 0.5354875326156616, "step": 388 }, { "epoch": 1.0655737704918034, "grad_norm": 0.5118747353553772, "learning_rate": 3.0407432567596883e-06, "loss": 1.1215546131134033, "step": 390 }, { "epoch": 1.0710382513661203, "grad_norm": 0.3243095874786377, "learning_rate": 3.0634462815144474e-06, "loss": 0.8853414058685303, "step": 392 }, { "epoch": 1.0765027322404372, "grad_norm": 1.0932362079620361, "learning_rate": 3.0911423517281404e-06, "loss": 0.5655322670936584, "step": 394 }, { "epoch": 1.0819672131147542, "grad_norm": 0.5749414563179016, "learning_rate": 3.1238211754955294e-06, "loss": 0.8961263298988342, "step": 396 }, { "epoch": 1.0874316939890711, "grad_norm": 0.4210751950740814, "learning_rate": 3.161470609311961e-06, "loss": 0.9246914386749268, "step": 398 }, { "epoch": 1.092896174863388, "grad_norm": 0.4931108057498932, "learning_rate": 3.2040766625859115e-06, "loss": 1.0521095991134644, "step": 400 }, { "epoch": 1.098360655737705, "grad_norm": 3.348501205444336, "learning_rate": 3.2516235028379157e-06, "loss": 0.9077386260032654, "step": 402 }, { "epoch": 1.1038251366120218, "grad_norm": 0.3238441050052643, "learning_rate": 3.304093461583944e-06, "loss": 0.970809817314148, "step": 404 }, { "epoch": 1.1092896174863387, "grad_norm": 1.0212552547454834, "learning_rate": 3.3614670409010353e-06, "loss": 1.094781756401062, "step": 406 }, { "epoch": 1.1147540983606556, "grad_norm": 0.5059043765068054, "learning_rate": 3.4237229206727602e-06, "loss": 0.643450140953064, "step": 408 }, { "epoch": 1.1202185792349726, "grad_norm": 0.34191587567329407, "learning_rate": 3.490837966511817e-06, "loss": 1.1581542491912842, "step": 410 }, { "epoch": 1.1256830601092895, "grad_norm": 1.0587300062179565, "learning_rate": 3.5627872383567937e-06, "loss": 0.7080036997795105, "step": 412 }, { "epoch": 1.1311475409836065, "grad_norm": 0.2536488175392151, "learning_rate": 3.6395439997399494e-06, "loss": 0.6488229632377625, "step": 414 }, { "epoch": 1.1366120218579234, "grad_norm": 0.8057353496551514, "learning_rate": 3.721079727722522e-06, "loss": 0.6402009129524231, "step": 416 }, { "epoch": 1.1420765027322404, "grad_norm": 0.29355666041374207, "learning_rate": 3.8073641234939055e-06, "loss": 0.9499251842498779, "step": 418 }, { "epoch": 1.1475409836065573, "grad_norm": 0.3825172185897827, "learning_rate": 3.898365123630732e-06, "loss": 0.8552459478378296, "step": 420 }, { "epoch": 1.1530054644808743, "grad_norm": 0.3960343897342682, "learning_rate": 3.994048912011692e-06, "loss": 0.9106036424636841, "step": 422 }, { "epoch": 1.1584699453551912, "grad_norm": 1.2985180616378784, "learning_rate": 4.094379932383666e-06, "loss": 0.7228477597236633, "step": 424 }, { "epoch": 1.1639344262295082, "grad_norm": 0.29699158668518066, "learning_rate": 4.199320901574489e-06, "loss": 0.6496458053588867, "step": 426 }, { "epoch": 1.169398907103825, "grad_norm": 0.3441046476364136, "learning_rate": 4.3088328233474185e-06, "loss": 1.1220229864120483, "step": 428 }, { "epoch": 1.174863387978142, "grad_norm": 0.38102617859840393, "learning_rate": 4.422875002892234e-06, "loss": 0.9543809294700623, "step": 430 }, { "epoch": 1.180327868852459, "grad_norm": 1.1369144916534424, "learning_rate": 4.54140506194747e-06, "loss": 0.8527680039405823, "step": 432 }, { "epoch": 1.185792349726776, "grad_norm": 3.073438882827759, "learning_rate": 4.664378954548241e-06, "loss": 0.7841230630874634, "step": 434 }, { "epoch": 1.1912568306010929, "grad_norm": 0.28754952549934387, "learning_rate": 4.791750983393832e-06, "loss": 0.9805347919464111, "step": 436 }, { "epoch": 1.1967213114754098, "grad_norm": 0.23602163791656494, "learning_rate": 4.9234738168288466e-06, "loss": 0.8651551008224487, "step": 438 }, { "epoch": 1.2021857923497268, "grad_norm": 0.3787078857421875, "learning_rate": 5.059498506431758e-06, "loss": 0.9147478342056274, "step": 440 }, { "epoch": 1.2076502732240437, "grad_norm": 0.513213038444519, "learning_rate": 5.199774505204206e-06, "loss": 0.9759296774864197, "step": 442 }, { "epoch": 1.2131147540983607, "grad_norm": 0.38409918546676636, "learning_rate": 5.344249686354357e-06, "loss": 0.9177553653717041, "step": 444 }, { "epoch": 1.2185792349726776, "grad_norm": 0.296101450920105, "learning_rate": 5.492870362667299e-06, "loss": 0.5355038046836853, "step": 446 }, { "epoch": 1.2240437158469946, "grad_norm": 0.6465612053871155, "learning_rate": 5.645581306455302e-06, "loss": 0.8347874879837036, "step": 448 }, { "epoch": 1.2295081967213115, "grad_norm": 0.378653347492218, "learning_rate": 5.802325770080506e-06, "loss": 0.9033167362213135, "step": 450 }, { "epoch": 1.2349726775956285, "grad_norm": 0.48362499475479126, "learning_rate": 5.96304550704246e-06, "loss": 0.971287727355957, "step": 452 }, { "epoch": 1.2404371584699454, "grad_norm": 0.3260844647884369, "learning_rate": 6.127680793622588e-06, "loss": 0.27598556876182556, "step": 454 }, { "epoch": 1.2459016393442623, "grad_norm": 4.79307746887207, "learning_rate": 6.296170451077657e-06, "loss": 0.7720631957054138, "step": 456 }, { "epoch": 1.2513661202185793, "grad_norm": 0.5071384906768799, "learning_rate": 6.468451868373856e-06, "loss": 0.6973821520805359, "step": 458 }, { "epoch": 1.2568306010928962, "grad_norm": 0.4039640426635742, "learning_rate": 6.6444610254532e-06, "loss": 0.9609543085098267, "step": 460 }, { "epoch": 1.2622950819672132, "grad_norm": 0.5991283059120178, "learning_rate": 6.824132517023449e-06, "loss": 0.6427564024925232, "step": 462 }, { "epoch": 1.2677595628415301, "grad_norm": 1.2359015941619873, "learning_rate": 7.007399576862872e-06, "loss": 1.003535509109497, "step": 464 }, { "epoch": 1.273224043715847, "grad_norm": 0.6143922209739685, "learning_rate": 7.1941941026306275e-06, "loss": 0.9678149819374084, "step": 466 }, { "epoch": 1.278688524590164, "grad_norm": 0.19856023788452148, "learning_rate": 7.3844466811737555e-06, "loss": 0.8354513645172119, "step": 468 }, { "epoch": 1.2841530054644807, "grad_norm": 0.4483174979686737, "learning_rate": 7.578086614321175e-06, "loss": 1.0250217914581299, "step": 470 }, { "epoch": 1.289617486338798, "grad_norm": 0.2900082468986511, "learning_rate": 7.775041945155295e-06, "loss": 1.206943154335022, "step": 472 }, { "epoch": 1.2950819672131146, "grad_norm": 0.25206395983695984, "learning_rate": 7.975239484751258e-06, "loss": 0.8465134501457214, "step": 474 }, { "epoch": 1.3005464480874318, "grad_norm": 0.44423073530197144, "learning_rate": 8.178604839374125e-06, "loss": 0.4682804346084595, "step": 476 }, { "epoch": 1.3060109289617485, "grad_norm": 0.40745142102241516, "learning_rate": 8.385062438123673e-06, "loss": 1.0709359645843506, "step": 478 }, { "epoch": 1.3114754098360657, "grad_norm": 0.5735993385314941, "learning_rate": 8.594535561016661e-06, "loss": 1.0265499353408813, "step": 480 }, { "epoch": 1.3169398907103824, "grad_norm": 0.25811538100242615, "learning_rate": 8.806946367496155e-06, "loss": 0.9305548667907715, "step": 482 }, { "epoch": 1.3224043715846996, "grad_norm": 0.5877478122711182, "learning_rate": 9.02221592535712e-06, "loss": 0.6250959634780884, "step": 484 }, { "epoch": 1.3278688524590163, "grad_norm": 0.31082847714424133, "learning_rate": 9.240264240077859e-06, "loss": 0.8635732531547546, "step": 486 }, { "epoch": 1.3333333333333333, "grad_norm": 0.4074012041091919, "learning_rate": 9.461010284546016e-06, "loss": 1.0225374698638916, "step": 488 }, { "epoch": 1.3387978142076502, "grad_norm": 0.7163453102111816, "learning_rate": 9.684372029168438e-06, "loss": 0.6194128394126892, "step": 490 }, { "epoch": 1.3442622950819672, "grad_norm": 0.3905765414237976, "learning_rate": 9.91026647235348e-06, "loss": 1.0774719715118408, "step": 492 }, { "epoch": 1.349726775956284, "grad_norm": 0.6559311151504517, "learning_rate": 1.0138609671354586e-05, "loss": 0.877537727355957, "step": 494 }, { "epoch": 1.355191256830601, "grad_norm": 0.4489327371120453, "learning_rate": 1.0369316773463458e-05, "loss": 0.9511969685554504, "step": 496 }, { "epoch": 1.360655737704918, "grad_norm": 0.5031542778015137, "learning_rate": 1.0602302047541566e-05, "loss": 0.83577960729599, "step": 498 }, { "epoch": 1.366120218579235, "grad_norm": 0.40769895911216736, "learning_rate": 1.083747891587788e-05, "loss": 1.0597424507141113, "step": 500 }, { "epoch": 1.3715846994535519, "grad_norm": 0.4040791690349579, "learning_rate": 1.1074759986361392e-05, "loss": 0.9861813187599182, "step": 502 }, { "epoch": 1.3770491803278688, "grad_norm": 0.4598991870880127, "learning_rate": 1.1314057084956073e-05, "loss": 0.5206344723701477, "step": 504 }, { "epoch": 1.3825136612021858, "grad_norm": 0.2866649627685547, "learning_rate": 1.1555281288466553e-05, "loss": 0.7465002536773682, "step": 506 }, { "epoch": 1.3879781420765027, "grad_norm": 0.230438232421875, "learning_rate": 1.1798342957582084e-05, "loss": 0.9311132431030273, "step": 508 }, { "epoch": 1.3934426229508197, "grad_norm": 0.28892356157302856, "learning_rate": 1.2043151770186725e-05, "loss": 0.964007556438446, "step": 510 }, { "epoch": 1.3989071038251366, "grad_norm": 0.2942189872264862, "learning_rate": 1.2289616754923078e-05, "loss": 1.0333213806152344, "step": 512 }, { "epoch": 1.4043715846994536, "grad_norm": 0.29071956872940063, "learning_rate": 1.253764632499752e-05, "loss": 0.6722708344459534, "step": 514 }, { "epoch": 1.4098360655737705, "grad_norm": 0.2511497437953949, "learning_rate": 1.2787148312213901e-05, "loss": 0.7008714079856873, "step": 516 }, { "epoch": 1.4153005464480874, "grad_norm": 0.30298787355422974, "learning_rate": 1.3038030001223439e-05, "loss": 0.9937577843666077, "step": 518 }, { "epoch": 1.4207650273224044, "grad_norm": 0.8531264066696167, "learning_rate": 1.3290198163977933e-05, "loss": 0.8048759698867798, "step": 520 }, { "epoch": 1.4262295081967213, "grad_norm": 0.24621592462062836, "learning_rate": 1.3543559094373372e-05, "loss": 1.039415955543518, "step": 522 }, { "epoch": 1.4316939890710383, "grad_norm": 0.6622775793075562, "learning_rate": 1.3798018643071386e-05, "loss": 0.996492862701416, "step": 524 }, { "epoch": 1.4371584699453552, "grad_norm": 0.8804835677146912, "learning_rate": 1.4053482252485178e-05, "loss": 0.9746305346488953, "step": 526 }, { "epoch": 1.4426229508196722, "grad_norm": 0.5442449450492859, "learning_rate": 1.4309854991917388e-05, "loss": 0.7142194509506226, "step": 528 }, { "epoch": 1.4480874316939891, "grad_norm": 0.4819408059120178, "learning_rate": 1.4567041592836413e-05, "loss": 0.7606571316719055, "step": 530 }, { "epoch": 1.453551912568306, "grad_norm": 0.6655029654502869, "learning_rate": 1.48249464842784e-05, "loss": 0.7583669424057007, "step": 532 }, { "epoch": 1.459016393442623, "grad_norm": 0.30279040336608887, "learning_rate": 1.508347382836153e-05, "loss": 1.036142349243164, "step": 534 }, { "epoch": 1.46448087431694, "grad_norm": 0.6906257271766663, "learning_rate": 1.534252755589961e-05, "loss": 0.963323712348938, "step": 536 }, { "epoch": 1.469945355191257, "grad_norm": 0.3248192369937897, "learning_rate": 1.5602011402101432e-05, "loss": 1.0202207565307617, "step": 538 }, { "epoch": 1.4754098360655736, "grad_norm": 0.5597788691520691, "learning_rate": 1.5861828942343037e-05, "loss": 1.0704936981201172, "step": 540 }, { "epoch": 1.4808743169398908, "grad_norm": 0.3918614089488983, "learning_rate": 1.612188362799917e-05, "loss": 0.9736133217811584, "step": 542 }, { "epoch": 1.4863387978142075, "grad_norm": 0.2836878299713135, "learning_rate": 1.6382078822320964e-05, "loss": 0.8202962875366211, "step": 544 }, { "epoch": 1.4918032786885247, "grad_norm": 1.0198228359222412, "learning_rate": 1.6642317836346324e-05, "loss": 1.0485470294952393, "step": 546 }, { "epoch": 1.4972677595628414, "grad_norm": 0.48356863856315613, "learning_rate": 1.6902503964829644e-05, "loss": 0.4609794318675995, "step": 548 }, { "epoch": 1.5027322404371586, "grad_norm": 0.30593419075012207, "learning_rate": 1.7162540522177685e-05, "loss": 0.9458433985710144, "step": 550 }, { "epoch": 1.5081967213114753, "grad_norm": 0.22826838493347168, "learning_rate": 1.7422330878378113e-05, "loss": 0.9808536171913147, "step": 552 }, { "epoch": 1.5136612021857925, "grad_norm": 0.24512672424316406, "learning_rate": 1.7681778494907298e-05, "loss": 1.0151616334915161, "step": 554 }, { "epoch": 1.5191256830601092, "grad_norm": 0.26424238085746765, "learning_rate": 1.794078696060429e-05, "loss": 1.1077044010162354, "step": 556 }, { "epoch": 1.5245901639344264, "grad_norm": 0.2831886410713196, "learning_rate": 1.819926002749727e-05, "loss": 0.777061939239502, "step": 558 }, { "epoch": 1.530054644808743, "grad_norm": 0.46841201186180115, "learning_rate": 1.84571016465695e-05, "loss": 0.5536693334579468, "step": 560 }, { "epoch": 1.5355191256830603, "grad_norm": 0.40938982367515564, "learning_rate": 1.8714216003451295e-05, "loss": 0.966119110584259, "step": 562 }, { "epoch": 1.540983606557377, "grad_norm": 0.24322885274887085, "learning_rate": 1.8970507554024827e-05, "loss": 0.8420946002006531, "step": 564 }, { "epoch": 1.5464480874316942, "grad_norm": 0.31624189019203186, "learning_rate": 1.922588105992838e-05, "loss": 0.9916543364524841, "step": 566 }, { "epoch": 1.5519125683060109, "grad_norm": 0.8164126873016357, "learning_rate": 1.9480241623947206e-05, "loss": 0.8523349761962891, "step": 568 }, { "epoch": 1.5573770491803278, "grad_norm": 0.3358438014984131, "learning_rate": 1.9733494725277413e-05, "loss": 1.1619439125061035, "step": 570 }, { "epoch": 1.5628415300546448, "grad_norm": 0.2056051790714264, "learning_rate": 1.998554625465005e-05, "loss": 0.9285228848457336, "step": 572 }, { "epoch": 1.5683060109289617, "grad_norm": 0.28299570083618164, "learning_rate": 2.0236302549302293e-05, "loss": 1.0506218671798706, "step": 574 }, { "epoch": 1.5737704918032787, "grad_norm": 0.7570331692695618, "learning_rate": 2.0485670427782644e-05, "loss": 0.8511103391647339, "step": 576 }, { "epoch": 1.5792349726775956, "grad_norm": 0.34417569637298584, "learning_rate": 2.073355722457739e-05, "loss": 1.080989956855774, "step": 578 }, { "epoch": 1.5846994535519126, "grad_norm": 0.7556970715522766, "learning_rate": 2.0979870824545165e-05, "loss": 0.5906158089637756, "step": 580 }, { "epoch": 1.5901639344262295, "grad_norm": 0.2745168209075928, "learning_rate": 2.1224519697147145e-05, "loss": 0.7238953113555908, "step": 582 }, { "epoch": 1.5956284153005464, "grad_norm": 0.4340955913066864, "learning_rate": 2.1467412930459936e-05, "loss": 0.6840563416481018, "step": 584 }, { "epoch": 1.6010928961748634, "grad_norm": 0.2478933185338974, "learning_rate": 2.1708460264958595e-05, "loss": 1.0408371686935425, "step": 586 }, { "epoch": 1.6065573770491803, "grad_norm": 0.2583276033401489, "learning_rate": 2.194757212705718e-05, "loss": 1.0583165884017944, "step": 588 }, { "epoch": 1.6120218579234973, "grad_norm": 0.31770971417427063, "learning_rate": 2.2184659662394522e-05, "loss": 0.8676682710647583, "step": 590 }, { "epoch": 1.6174863387978142, "grad_norm": 0.1893642693758011, "learning_rate": 2.24196347688526e-05, "loss": 1.0388176441192627, "step": 592 }, { "epoch": 1.6229508196721312, "grad_norm": 1.554314374923706, "learning_rate": 2.265241012929541e-05, "loss": 0.8767275214195251, "step": 594 }, { "epoch": 1.6284153005464481, "grad_norm": 0.3525921404361725, "learning_rate": 2.28828992440162e-05, "loss": 0.40555280447006226, "step": 596 }, { "epoch": 1.633879781420765, "grad_norm": 0.24459926784038544, "learning_rate": 2.3111016462880873e-05, "loss": 1.2219781875610352, "step": 598 }, { "epoch": 1.639344262295082, "grad_norm": 0.24944064021110535, "learning_rate": 2.333667701715578e-05, "loss": 1.16285240650177, "step": 600 }, { "epoch": 1.644808743169399, "grad_norm": 0.21063530445098877, "learning_rate": 2.3559797051007815e-05, "loss": 1.0140953063964844, "step": 602 }, { "epoch": 1.650273224043716, "grad_norm": 0.23877571523189545, "learning_rate": 2.3780293652665477e-05, "loss": 1.088266134262085, "step": 604 }, { "epoch": 1.6557377049180326, "grad_norm": 0.33986350893974304, "learning_rate": 2.399808488522895e-05, "loss": 1.1801600456237793, "step": 606 }, { "epoch": 1.6612021857923498, "grad_norm": 0.26012417674064636, "learning_rate": 2.4213089817118078e-05, "loss": 0.8795619606971741, "step": 608 }, { "epoch": 1.6666666666666665, "grad_norm": 0.2159816473722458, "learning_rate": 2.4425228552146573e-05, "loss": 1.19132661819458, "step": 610 }, { "epoch": 1.6721311475409837, "grad_norm": 1.038149118423462, "learning_rate": 2.4634422259211614e-05, "loss": 0.7393221259117126, "step": 612 }, { "epoch": 1.6775956284153004, "grad_norm": 0.21248742938041687, "learning_rate": 2.4840593201587626e-05, "loss": 1.010488748550415, "step": 614 }, { "epoch": 1.6830601092896176, "grad_norm": 0.21196357905864716, "learning_rate": 2.5043664765813377e-05, "loss": 1.0944840908050537, "step": 616 }, { "epoch": 1.6885245901639343, "grad_norm": 0.35981178283691406, "learning_rate": 2.524356149016163e-05, "loss": 0.5883587598800659, "step": 618 }, { "epoch": 1.6939890710382515, "grad_norm": 0.3057361841201782, "learning_rate": 2.544020909268085e-05, "loss": 1.1578078269958496, "step": 620 }, { "epoch": 1.6994535519125682, "grad_norm": 0.5280173420906067, "learning_rate": 2.5633534498798598e-05, "loss": 1.2335985898971558, "step": 622 }, { "epoch": 1.7049180327868854, "grad_norm": 0.1886816918849945, "learning_rate": 2.5823465868475985e-05, "loss": 0.8703290820121765, "step": 624 }, { "epoch": 1.710382513661202, "grad_norm": 0.16325658559799194, "learning_rate": 2.60099326229037e-05, "loss": 0.7182884216308594, "step": 626 }, { "epoch": 1.7158469945355193, "grad_norm": 0.17951571941375732, "learning_rate": 2.619286547072914e-05, "loss": 1.076783299446106, "step": 628 }, { "epoch": 1.721311475409836, "grad_norm": 0.49342671036720276, "learning_rate": 2.6372196433805214e-05, "loss": 0.7743050456047058, "step": 630 }, { "epoch": 1.7267759562841531, "grad_norm": 0.3581479787826538, "learning_rate": 2.654785887245112e-05, "loss": 1.046736478805542, "step": 632 }, { "epoch": 1.7322404371584699, "grad_norm": 0.19438432157039642, "learning_rate": 2.671978751021577e-05, "loss": 1.142443060874939, "step": 634 }, { "epoch": 1.737704918032787, "grad_norm": 0.20565520226955414, "learning_rate": 2.6887918458134622e-05, "loss": 1.021065354347229, "step": 636 }, { "epoch": 1.7431693989071038, "grad_norm": 0.16628266870975494, "learning_rate": 2.705218923847093e-05, "loss": 1.1118640899658203, "step": 638 }, { "epoch": 1.748633879781421, "grad_norm": 0.35274869203567505, "learning_rate": 2.7212538807932576e-05, "loss": 1.0129691362380981, "step": 640 }, { "epoch": 1.7540983606557377, "grad_norm": 0.19920802116394043, "learning_rate": 2.7368907580355843e-05, "loss": 1.0399836301803589, "step": 642 }, { "epoch": 1.7595628415300546, "grad_norm": 0.2887803018093109, "learning_rate": 2.7521237448847734e-05, "loss": 1.0195159912109375, "step": 644 }, { "epoch": 1.7650273224043715, "grad_norm": 0.2489539533853531, "learning_rate": 2.766947180737861e-05, "loss": 0.6208384037017822, "step": 646 }, { "epoch": 1.7704918032786885, "grad_norm": 0.2961595058441162, "learning_rate": 2.781355557181706e-05, "loss": 1.0299938917160034, "step": 648 }, { "epoch": 1.7759562841530054, "grad_norm": 0.2027096301317215, "learning_rate": 2.7953435200399262e-05, "loss": 1.0568764209747314, "step": 650 }, { "epoch": 1.7814207650273224, "grad_norm": 0.19219909608364105, "learning_rate": 2.8089058713625194e-05, "loss": 1.142011284828186, "step": 652 }, { "epoch": 1.7868852459016393, "grad_norm": 0.17235226929187775, "learning_rate": 2.8220375713574307e-05, "loss": 1.0625348091125488, "step": 654 }, { "epoch": 1.7923497267759563, "grad_norm": 1.209734559059143, "learning_rate": 2.8347337402633456e-05, "loss": 1.0341904163360596, "step": 656 }, { "epoch": 1.7978142076502732, "grad_norm": 0.4596608579158783, "learning_rate": 2.846989660163019e-05, "loss": 1.0296437740325928, "step": 658 }, { "epoch": 1.8032786885245902, "grad_norm": 0.36770763993263245, "learning_rate": 2.858800776736461e-05, "loss": 1.1404849290847778, "step": 660 }, { "epoch": 1.8087431693989071, "grad_norm": 0.2144036591053009, "learning_rate": 2.87016270095333e-05, "loss": 1.0944205522537231, "step": 662 }, { "epoch": 1.814207650273224, "grad_norm": 0.2577725350856781, "learning_rate": 2.8810712107039e-05, "loss": 1.1880568265914917, "step": 664 }, { "epoch": 1.819672131147541, "grad_norm": 1.9875679016113281, "learning_rate": 2.8915222523680082e-05, "loss": 1.3935034275054932, "step": 666 }, { "epoch": 1.825136612021858, "grad_norm": 0.17387168109416962, "learning_rate": 2.9015119423213857e-05, "loss": 1.1480551958084106, "step": 668 }, { "epoch": 1.830601092896175, "grad_norm": 0.2092132717370987, "learning_rate": 2.9110365683788173e-05, "loss": 0.7491433620452881, "step": 670 }, { "epoch": 1.8360655737704918, "grad_norm": 0.49205076694488525, "learning_rate": 2.9200925911735956e-05, "loss": 1.3666801452636719, "step": 672 }, { "epoch": 1.8415300546448088, "grad_norm": 0.21502411365509033, "learning_rate": 2.9286766454727563e-05, "loss": 1.0823509693145752, "step": 674 }, { "epoch": 1.8469945355191257, "grad_norm": 0.19282038509845734, "learning_rate": 2.9367855414276073e-05, "loss": 1.1538946628570557, "step": 676 }, { "epoch": 1.8524590163934427, "grad_norm": 0.1708725392818451, "learning_rate": 2.9444162657590747e-05, "loss": 0.36654239892959595, "step": 678 }, { "epoch": 1.8579234972677594, "grad_norm": 0.18482287228107452, "learning_rate": 2.951565982877447e-05, "loss": 1.1071395874023438, "step": 680 }, { "epoch": 1.8633879781420766, "grad_norm": 0.1782202124595642, "learning_rate": 2.9582320359360864e-05, "loss": 1.1017282009124756, "step": 682 }, { "epoch": 1.8688524590163933, "grad_norm": 0.251668244600296, "learning_rate": 2.9644119478187126e-05, "loss": 1.1279935836791992, "step": 684 }, { "epoch": 1.8743169398907105, "grad_norm": 9.041706085205078, "learning_rate": 2.9701034220599074e-05, "loss": 1.0460294485092163, "step": 686 }, { "epoch": 1.8797814207650272, "grad_norm": 0.18143460154533386, "learning_rate": 2.975304343698483e-05, "loss": 1.1729539632797241, "step": 688 }, { "epoch": 1.8852459016393444, "grad_norm": 0.19067686796188354, "learning_rate": 2.980012780063404e-05, "loss": 1.1014466285705566, "step": 690 }, { "epoch": 1.890710382513661, "grad_norm": 0.23826494812965393, "learning_rate": 2.9842269814919755e-05, "loss": 0.8912120461463928, "step": 692 }, { "epoch": 1.8961748633879782, "grad_norm": 0.23750869929790497, "learning_rate": 2.9879453819800156e-05, "loss": 1.1922415494918823, "step": 694 }, { "epoch": 1.901639344262295, "grad_norm": 0.18918921053409576, "learning_rate": 2.991166599763788e-05, "loss": 1.1193846464157104, "step": 696 }, { "epoch": 1.9071038251366121, "grad_norm": 0.3542817533016205, "learning_rate": 2.993889437833466e-05, "loss": 1.0432852506637573, "step": 698 }, { "epoch": 1.9125683060109289, "grad_norm": 0.17179521918296814, "learning_rate": 2.9961128843779457e-05, "loss": 1.1211903095245361, "step": 700 }, { "epoch": 1.918032786885246, "grad_norm": 0.2006503790616989, "learning_rate": 2.9978361131608348e-05, "loss": 1.0682426691055298, "step": 702 }, { "epoch": 1.9234972677595628, "grad_norm": 0.204038605093956, "learning_rate": 2.999058483827483e-05, "loss": 1.1585360765457153, "step": 704 }, { "epoch": 1.92896174863388, "grad_norm": 0.28496795892715454, "learning_rate": 2.9997795421429404e-05, "loss": 1.1962625980377197, "step": 706 }, { "epoch": 1.9344262295081966, "grad_norm": 0.18176399171352386, "learning_rate": 2.9999990201607516e-05, "loss": 1.1941583156585693, "step": 708 }, { "epoch": 1.9398907103825138, "grad_norm": 0.2823626399040222, "learning_rate": 2.999716836322524e-05, "loss": 0.9332343339920044, "step": 710 }, { "epoch": 1.9453551912568305, "grad_norm": 0.19465626776218414, "learning_rate": 2.9989330954882366e-05, "loss": 0.6781972050666809, "step": 712 }, { "epoch": 1.9508196721311475, "grad_norm": 0.45259955525398254, "learning_rate": 2.9976480888972708e-05, "loss": 1.159096598625183, "step": 714 }, { "epoch": 1.9562841530054644, "grad_norm": 0.23192590475082397, "learning_rate": 2.9958622940601907e-05, "loss": 0.8674803376197815, "step": 716 }, { "epoch": 1.9617486338797814, "grad_norm": 0.8216494917869568, "learning_rate": 2.9935763745812935e-05, "loss": 1.116053581237793, "step": 718 }, { "epoch": 1.9672131147540983, "grad_norm": 0.38614794611930847, "learning_rate": 2.990791179912017e-05, "loss": 1.1189167499542236, "step": 720 }, { "epoch": 1.9726775956284153, "grad_norm": 0.20481540262699127, "learning_rate": 2.9875077450352817e-05, "loss": 1.139620065689087, "step": 722 }, { "epoch": 1.9781420765027322, "grad_norm": 0.17928683757781982, "learning_rate": 2.9837272900808863e-05, "loss": 1.0577901601791382, "step": 724 }, { "epoch": 1.9836065573770492, "grad_norm": 0.38278234004974365, "learning_rate": 2.9794512198721092e-05, "loss": 1.0994060039520264, "step": 726 }, { "epoch": 1.989071038251366, "grad_norm": 0.39787301421165466, "learning_rate": 2.9746811234036736e-05, "loss": 1.0890076160430908, "step": 728 }, { "epoch": 1.994535519125683, "grad_norm": 0.3581365942955017, "learning_rate": 2.9694187732512702e-05, "loss": 1.0212537050247192, "step": 730 }, { "epoch": 2.0, "grad_norm": 0.21458348631858826, "learning_rate": 2.96366612491287e-05, "loss": 1.1633673906326294, "step": 732 }, { "epoch": 2.0054644808743167, "grad_norm": 0.1837082952260971, "learning_rate": 2.9574253160820573e-05, "loss": 0.7306780219078064, "step": 734 }, { "epoch": 2.010928961748634, "grad_norm": 0.2729966342449188, "learning_rate": 2.9506986658536562e-05, "loss": 0.7522571682929993, "step": 736 }, { "epoch": 2.0163934426229506, "grad_norm": 0.34518009424209595, "learning_rate": 2.9434886738619537e-05, "loss": 0.8162837624549866, "step": 738 }, { "epoch": 2.021857923497268, "grad_norm": 0.31829017400741577, "learning_rate": 2.9357980193518312e-05, "loss": 0.8290322422981262, "step": 740 }, { "epoch": 2.0273224043715845, "grad_norm": 0.9189549684524536, "learning_rate": 2.927629560183153e-05, "loss": 0.7381850481033325, "step": 742 }, { "epoch": 2.0327868852459017, "grad_norm": 0.4974863529205322, "learning_rate": 2.91898633176878e-05, "loss": 0.9779095649719238, "step": 744 }, { "epoch": 2.0382513661202184, "grad_norm": 0.22083698213100433, "learning_rate": 2.909871545946603e-05, "loss": 0.7365862131118774, "step": 746 }, { "epoch": 2.0437158469945356, "grad_norm": 0.2540753483772278, "learning_rate": 2.9002885897860252e-05, "loss": 0.9322983026504517, "step": 748 }, { "epoch": 2.0491803278688523, "grad_norm": 0.41510656476020813, "learning_rate": 2.8902410243293152e-05, "loss": 0.60332190990448, "step": 750 }, { "epoch": 2.0546448087431695, "grad_norm": 0.34592926502227783, "learning_rate": 2.8797325832683208e-05, "loss": 0.523179829120636, "step": 752 }, { "epoch": 2.060109289617486, "grad_norm": 0.8202958703041077, "learning_rate": 2.868767171557021e-05, "loss": 0.6999484896659851, "step": 754 }, { "epoch": 2.0655737704918034, "grad_norm": 0.5787069201469421, "learning_rate": 2.8573488639604418e-05, "loss": 1.0504649877548218, "step": 756 }, { "epoch": 2.07103825136612, "grad_norm": 0.36719974875450134, "learning_rate": 2.845481903540464e-05, "loss": 1.0751094818115234, "step": 758 }, { "epoch": 2.0765027322404372, "grad_norm": 0.242353156208992, "learning_rate": 2.8331707000790954e-05, "loss": 0.8690615296363831, "step": 760 }, { "epoch": 2.081967213114754, "grad_norm": 0.3211243152618408, "learning_rate": 2.820419828439788e-05, "loss": 0.9909636974334717, "step": 762 }, { "epoch": 2.087431693989071, "grad_norm": 0.19196948409080505, "learning_rate": 2.8072340268674133e-05, "loss": 0.8517364263534546, "step": 764 }, { "epoch": 2.092896174863388, "grad_norm": 0.1766076534986496, "learning_rate": 2.793618195227521e-05, "loss": 1.0775508880615234, "step": 766 }, { "epoch": 2.098360655737705, "grad_norm": 0.5555011630058289, "learning_rate": 2.779577393185539e-05, "loss": 0.7964032888412476, "step": 768 }, { "epoch": 2.1038251366120218, "grad_norm": 0.24965371191501617, "learning_rate": 2.765116838326597e-05, "loss": 0.90714031457901, "step": 770 }, { "epoch": 2.109289617486339, "grad_norm": 0.38016757369041443, "learning_rate": 2.750241904216663e-05, "loss": 0.9129889607429504, "step": 772 }, { "epoch": 2.1147540983606556, "grad_norm": 0.21674519777297974, "learning_rate": 2.7349581184057144e-05, "loss": 0.7319467663764954, "step": 774 }, { "epoch": 2.120218579234973, "grad_norm": 0.28484922647476196, "learning_rate": 2.719271160373693e-05, "loss": 0.9362223744392395, "step": 776 }, { "epoch": 2.1256830601092895, "grad_norm": 0.3728918731212616, "learning_rate": 2.703186859420002e-05, "loss": 0.7095181941986084, "step": 778 }, { "epoch": 2.1311475409836067, "grad_norm": 0.2245863527059555, "learning_rate": 2.6867111924973283e-05, "loss": 0.7740026116371155, "step": 780 }, { "epoch": 2.1366120218579234, "grad_norm": 0.1715814769268036, "learning_rate": 2.6698502819905935e-05, "loss": 0.8628339171409607, "step": 782 }, { "epoch": 2.1420765027322406, "grad_norm": 0.1785881519317627, "learning_rate": 2.652610393441872e-05, "loss": 1.0129051208496094, "step": 784 }, { "epoch": 2.1475409836065573, "grad_norm": 0.16147422790527344, "learning_rate": 2.6349979332220992e-05, "loss": 0.6580853462219238, "step": 786 }, { "epoch": 2.1530054644808745, "grad_norm": 0.6977578401565552, "learning_rate": 2.6170194461504586e-05, "loss": 0.8348934054374695, "step": 788 }, { "epoch": 2.158469945355191, "grad_norm": 0.2910391390323639, "learning_rate": 2.5986816130623133e-05, "loss": 1.0212098360061646, "step": 790 }, { "epoch": 2.1639344262295084, "grad_norm": 0.23879674077033997, "learning_rate": 2.579991248326594e-05, "loss": 0.6800191402435303, "step": 792 }, { "epoch": 2.169398907103825, "grad_norm": 0.7448290586471558, "learning_rate": 2.560955297313575e-05, "loss": 0.8883236646652222, "step": 794 }, { "epoch": 2.1748633879781423, "grad_norm": 0.2625773251056671, "learning_rate": 2.5415808338139595e-05, "loss": 0.7563661336898804, "step": 796 }, { "epoch": 2.180327868852459, "grad_norm": 0.21875064074993134, "learning_rate": 2.5218750574102465e-05, "loss": 0.8323014974594116, "step": 798 }, { "epoch": 2.185792349726776, "grad_norm": 0.535250723361969, "learning_rate": 2.5018452908013522e-05, "loss": 0.9481061697006226, "step": 800 }, { "epoch": 2.191256830601093, "grad_norm": 0.3556676506996155, "learning_rate": 2.48149897708149e-05, "loss": 0.8436076641082764, "step": 802 }, { "epoch": 2.19672131147541, "grad_norm": 0.23821324110031128, "learning_rate": 2.4608436769743e-05, "loss": 0.8622503876686096, "step": 804 }, { "epoch": 2.202185792349727, "grad_norm": 0.2570849657058716, "learning_rate": 2.4398870660232684e-05, "loss": 0.8588972091674805, "step": 806 }, { "epoch": 2.2076502732240435, "grad_norm": 2.3321070671081543, "learning_rate": 2.418636931739491e-05, "loss": 0.48147663474082947, "step": 808 }, { "epoch": 2.2131147540983607, "grad_norm": 0.26416775584220886, "learning_rate": 2.3971011707078125e-05, "loss": 0.6585652232170105, "step": 810 }, { "epoch": 2.2185792349726774, "grad_norm": 0.21810820698738098, "learning_rate": 2.3752877856524532e-05, "loss": 0.8585912585258484, "step": 812 }, { "epoch": 2.2240437158469946, "grad_norm": 0.2282879799604416, "learning_rate": 2.353204882463168e-05, "loss": 0.6812423467636108, "step": 814 }, { "epoch": 2.2295081967213113, "grad_norm": 0.4175258278846741, "learning_rate": 2.330860667183101e-05, "loss": 0.7331743240356445, "step": 816 }, { "epoch": 2.2349726775956285, "grad_norm": 0.27887681126594543, "learning_rate": 2.308263442959396e-05, "loss": 0.9462811350822449, "step": 818 }, { "epoch": 2.240437158469945, "grad_norm": 0.24890534579753876, "learning_rate": 2.2854216069577376e-05, "loss": 0.9054920673370361, "step": 820 }, { "epoch": 2.2459016393442623, "grad_norm": 0.6545159816741943, "learning_rate": 2.2623436472419476e-05, "loss": 1.0144051313400269, "step": 822 }, { "epoch": 2.251366120218579, "grad_norm": 0.48057249188423157, "learning_rate": 2.2390381396198102e-05, "loss": 0.7360069751739502, "step": 824 }, { "epoch": 2.2568306010928962, "grad_norm": 2.75382399559021, "learning_rate": 2.2155137444562842e-05, "loss": 0.6045563220977783, "step": 826 }, { "epoch": 2.262295081967213, "grad_norm": 0.6408010125160217, "learning_rate": 2.191779203455302e-05, "loss": 0.5113135576248169, "step": 828 }, { "epoch": 2.26775956284153, "grad_norm": 0.23403555154800415, "learning_rate": 2.1678433364113297e-05, "loss": 0.8575177192687988, "step": 830 }, { "epoch": 2.273224043715847, "grad_norm": 0.49736300110816956, "learning_rate": 2.1437150379319245e-05, "loss": 0.5503892302513123, "step": 832 }, { "epoch": 2.278688524590164, "grad_norm": 0.48317691683769226, "learning_rate": 2.1194032741324823e-05, "loss": 0.6569101810455322, "step": 834 }, { "epoch": 2.2841530054644807, "grad_norm": 0.5925542712211609, "learning_rate": 2.0949170793044142e-05, "loss": 0.9202378988265991, "step": 836 }, { "epoch": 2.289617486338798, "grad_norm": 0.41072195768356323, "learning_rate": 2.070265552557985e-05, "loss": 0.9517123699188232, "step": 838 }, { "epoch": 2.2950819672131146, "grad_norm": 0.22094306349754333, "learning_rate": 2.0454578544410758e-05, "loss": 0.7035161852836609, "step": 840 }, { "epoch": 2.300546448087432, "grad_norm": 0.29024970531463623, "learning_rate": 2.0205032035351043e-05, "loss": 0.8569685220718384, "step": 842 }, { "epoch": 2.3060109289617485, "grad_norm": 0.19582630693912506, "learning_rate": 1.9954108730293875e-05, "loss": 0.975737452507019, "step": 844 }, { "epoch": 2.3114754098360657, "grad_norm": 0.37361687421798706, "learning_rate": 1.9701901872752047e-05, "loss": 0.5019787549972534, "step": 846 }, { "epoch": 2.3169398907103824, "grad_norm": 0.1886816918849945, "learning_rate": 1.9448505183208607e-05, "loss": 0.9394426345825195, "step": 848 }, { "epoch": 2.3224043715846996, "grad_norm": 0.6417620182037354, "learning_rate": 1.919401282429013e-05, "loss": 0.8482524156570435, "step": 850 }, { "epoch": 2.3278688524590163, "grad_norm": 0.4325665831565857, "learning_rate": 1.893851936577567e-05, "loss": 0.8246769905090332, "step": 852 }, { "epoch": 2.3333333333333335, "grad_norm": 0.23205913603305817, "learning_rate": 1.868211974945461e-05, "loss": 0.9343006610870361, "step": 854 }, { "epoch": 2.33879781420765, "grad_norm": 0.2003387063741684, "learning_rate": 1.842490925384604e-05, "loss": 0.9182968735694885, "step": 856 }, { "epoch": 2.3442622950819674, "grad_norm": 0.2010021060705185, "learning_rate": 1.816698345879313e-05, "loss": 0.6269927620887756, "step": 858 }, { "epoch": 2.349726775956284, "grad_norm": 0.21301890909671783, "learning_rate": 1.790843820994548e-05, "loss": 0.7751089334487915, "step": 860 }, { "epoch": 2.3551912568306013, "grad_norm": 0.20366328954696655, "learning_rate": 1.7649369583142763e-05, "loss": 0.897426962852478, "step": 862 }, { "epoch": 2.360655737704918, "grad_norm": 0.21869035065174103, "learning_rate": 1.738987384871274e-05, "loss": 0.6120696067810059, "step": 864 }, { "epoch": 2.366120218579235, "grad_norm": 0.5016121864318848, "learning_rate": 1.7130047435697118e-05, "loss": 1.1808212995529175, "step": 866 }, { "epoch": 2.371584699453552, "grad_norm": 0.20122063159942627, "learning_rate": 1.6869986896018226e-05, "loss": 0.8688129782676697, "step": 868 }, { "epoch": 2.3770491803278686, "grad_norm": 0.18649625778198242, "learning_rate": 1.66097888686003e-05, "loss": 0.7019689083099365, "step": 870 }, { "epoch": 2.3825136612021858, "grad_norm": 0.2085297703742981, "learning_rate": 1.6349550043458252e-05, "loss": 0.7158989310264587, "step": 872 }, { "epoch": 2.387978142076503, "grad_norm": 0.26028934121131897, "learning_rate": 1.608936712576749e-05, "loss": 0.8539477586746216, "step": 874 }, { "epoch": 2.3934426229508197, "grad_norm": 0.5020867586135864, "learning_rate": 1.582933679992809e-05, "loss": 0.7443297505378723, "step": 876 }, { "epoch": 2.3989071038251364, "grad_norm": 0.2335505485534668, "learning_rate": 1.556955569363678e-05, "loss": 0.760352611541748, "step": 878 }, { "epoch": 2.4043715846994536, "grad_norm": 0.2085113674402237, "learning_rate": 1.531012034197988e-05, "loss": 0.9256618022918701, "step": 880 }, { "epoch": 2.4098360655737707, "grad_norm": 0.23211164772510529, "learning_rate": 1.5051127151560745e-05, "loss": 0.4930667281150818, "step": 882 }, { "epoch": 2.4153005464480874, "grad_norm": 0.2237941026687622, "learning_rate": 1.4792672364674816e-05, "loss": 0.7172381281852722, "step": 884 }, { "epoch": 2.420765027322404, "grad_norm": 0.21497218310832977, "learning_rate": 1.4534852023545968e-05, "loss": 0.823451578617096, "step": 886 }, { "epoch": 2.4262295081967213, "grad_norm": 0.23108679056167603, "learning_rate": 1.4277761934636963e-05, "loss": 0.6176611185073853, "step": 888 }, { "epoch": 2.431693989071038, "grad_norm": 0.2470959722995758, "learning_rate": 1.4021497633047664e-05, "loss": 0.509010374546051, "step": 890 }, { "epoch": 2.4371584699453552, "grad_norm": 0.21755096316337585, "learning_rate": 1.3766154347013933e-05, "loss": 0.7690192461013794, "step": 892 }, { "epoch": 2.442622950819672, "grad_norm": 0.19507208466529846, "learning_rate": 1.3511826962520809e-05, "loss": 0.4858570992946625, "step": 894 }, { "epoch": 2.448087431693989, "grad_norm": 0.19376493990421295, "learning_rate": 1.3258609988042627e-05, "loss": 0.7765929698944092, "step": 896 }, { "epoch": 2.453551912568306, "grad_norm": 0.15309995412826538, "learning_rate": 1.300659751942353e-05, "loss": 0.7383701205253601, "step": 898 }, { "epoch": 2.459016393442623, "grad_norm": 0.20970426499843597, "learning_rate": 1.2755883204911305e-05, "loss": 0.8502997159957886, "step": 900 }, { "epoch": 2.4644808743169397, "grad_norm": 0.20087599754333496, "learning_rate": 1.2506560210357541e-05, "loss": 0.8558724522590637, "step": 902 }, { "epoch": 2.469945355191257, "grad_norm": 0.15894219279289246, "learning_rate": 1.225872118459706e-05, "loss": 0.6488800048828125, "step": 904 }, { "epoch": 2.4754098360655736, "grad_norm": 0.3884161412715912, "learning_rate": 1.2012458225019375e-05, "loss": 0.9488789439201355, "step": 906 }, { "epoch": 2.480874316939891, "grad_norm": 0.1707250028848648, "learning_rate": 1.176786284334528e-05, "loss": 0.5347706079483032, "step": 908 }, { "epoch": 2.4863387978142075, "grad_norm": 0.11856268346309662, "learning_rate": 1.1525025931620855e-05, "loss": 0.6109844446182251, "step": 910 }, { "epoch": 2.4918032786885247, "grad_norm": 0.16962800920009613, "learning_rate": 1.1284037728441877e-05, "loss": 0.44916611909866333, "step": 912 }, { "epoch": 2.4972677595628414, "grad_norm": 0.1900777965784073, "learning_rate": 1.1044987785420924e-05, "loss": 0.8547566533088684, "step": 914 }, { "epoch": 2.5027322404371586, "grad_norm": 0.20876799523830414, "learning_rate": 1.0807964933909975e-05, "loss": 0.7574434876441956, "step": 916 }, { "epoch": 2.5081967213114753, "grad_norm": 0.21594154834747314, "learning_rate": 1.0573057251990443e-05, "loss": 0.7612369060516357, "step": 918 }, { "epoch": 2.5136612021857925, "grad_norm": 0.17668958008289337, "learning_rate": 1.0340352031743256e-05, "loss": 0.7049843668937683, "step": 920 }, { "epoch": 2.519125683060109, "grad_norm": 0.22894662618637085, "learning_rate": 1.010993574681095e-05, "loss": 0.754566490650177, "step": 922 }, { "epoch": 2.5245901639344264, "grad_norm": 0.3702336847782135, "learning_rate": 9.881894020263938e-06, "loss": 0.48762306571006775, "step": 924 }, { "epoch": 2.530054644808743, "grad_norm": 0.30566778779029846, "learning_rate": 9.656311592782831e-06, "loss": 0.5707772374153137, "step": 926 }, { "epoch": 2.5355191256830603, "grad_norm": 0.2742585837841034, "learning_rate": 9.433272291168689e-06, "loss": 0.6204325556755066, "step": 928 }, { "epoch": 2.540983606557377, "grad_norm": 0.29101449251174927, "learning_rate": 9.212858997192744e-06, "loss": 1.0284860134124756, "step": 930 }, { "epoch": 2.546448087431694, "grad_norm": 0.2771863341331482, "learning_rate": 8.995153616797544e-06, "loss": 0.6103931069374084, "step": 932 }, { "epoch": 2.551912568306011, "grad_norm": 0.20665957033634186, "learning_rate": 8.78023704966047e-06, "loss": 0.7804996371269226, "step": 934 }, { "epoch": 2.557377049180328, "grad_norm": 0.5313201546669006, "learning_rate": 8.568189159131336e-06, "loss": 0.6838847398757935, "step": 936 }, { "epoch": 2.5628415300546448, "grad_norm": 0.8041807413101196, "learning_rate": 8.359088742554941e-06, "loss": 0.5385434031486511, "step": 938 }, { "epoch": 2.5683060109289615, "grad_norm": 0.26967042684555054, "learning_rate": 8.15301350198999e-06, "loss": 0.7957769632339478, "step": 940 }, { "epoch": 2.5737704918032787, "grad_norm": 1.993246078491211, "learning_rate": 7.950040015334789e-06, "loss": 0.5354985594749451, "step": 942 }, { "epoch": 2.579234972677596, "grad_norm": 0.19206559658050537, "learning_rate": 7.750243707870748e-06, "loss": 0.8785912394523621, "step": 944 }, { "epoch": 2.5846994535519126, "grad_norm": 0.2833126187324524, "learning_rate": 7.553698824234314e-06, "loss": 0.25215014815330505, "step": 946 }, { "epoch": 2.5901639344262293, "grad_norm": 0.1929856687784195, "learning_rate": 7.360478400827475e-06, "loss": 0.6651497483253479, "step": 948 }, { "epoch": 2.5956284153005464, "grad_norm": 0.20867817103862762, "learning_rate": 7.170654238677331e-06, "loss": 0.6793198585510254, "step": 950 }, { "epoch": 2.6010928961748636, "grad_norm": 0.32787761092185974, "learning_rate": 6.984296876754711e-06, "loss": 0.737807035446167, "step": 952 }, { "epoch": 2.6065573770491803, "grad_norm": 1.5362244844436646, "learning_rate": 6.801475565761783e-06, "loss": 0.4789329171180725, "step": 954 }, { "epoch": 2.612021857923497, "grad_norm": 0.14147597551345825, "learning_rate": 6.622258242398371e-06, "loss": 0.24011307954788208, "step": 956 }, { "epoch": 2.6174863387978142, "grad_norm": 0.17159943282604218, "learning_rate": 6.4467115041165855e-06, "loss": 0.5085139274597168, "step": 958 }, { "epoch": 2.6229508196721314, "grad_norm": 0.17652657628059387, "learning_rate": 6.2749005843730336e-06, "loss": 0.6372896432876587, "step": 960 }, { "epoch": 2.628415300546448, "grad_norm": 0.8388151526451111, "learning_rate": 6.106889328388064e-06, "loss": 0.6449273824691772, "step": 962 }, { "epoch": 2.633879781420765, "grad_norm": 0.25947555899620056, "learning_rate": 5.942740169420701e-06, "loss": 1.000860333442688, "step": 964 }, { "epoch": 2.639344262295082, "grad_norm": 0.15862314403057098, "learning_rate": 5.7825141055683895e-06, "loss": 0.6797659993171692, "step": 966 }, { "epoch": 2.644808743169399, "grad_norm": 0.2170058637857437, "learning_rate": 5.62627067709992e-06, "loss": 0.7449020743370056, "step": 968 }, { "epoch": 2.650273224043716, "grad_norm": 0.5447753071784973, "learning_rate": 5.474067944330285e-06, "loss": 0.7707789540290833, "step": 970 }, { "epoch": 2.6557377049180326, "grad_norm": 0.2564184069633484, "learning_rate": 5.325962466045282e-06, "loss": 1.0045585632324219, "step": 972 }, { "epoch": 2.66120218579235, "grad_norm": 0.15954433381557465, "learning_rate": 5.18200927848421e-06, "loss": 0.5719258785247803, "step": 974 }, { "epoch": 2.6666666666666665, "grad_norm": 0.15210166573524475, "learning_rate": 5.042261874888308e-06, "loss": 0.7093988060951233, "step": 976 }, { "epoch": 2.6721311475409837, "grad_norm": 0.13644114136695862, "learning_rate": 4.906772185622572e-06, "loss": 0.7509814500808716, "step": 978 }, { "epoch": 2.6775956284153004, "grad_norm": 0.15326020121574402, "learning_rate": 4.775590558878368e-06, "loss": 0.6545107960700989, "step": 980 }, { "epoch": 2.6830601092896176, "grad_norm": 0.17753270268440247, "learning_rate": 4.648765741963903e-06, "loss": 0.8449227213859558, "step": 982 }, { "epoch": 2.6885245901639343, "grad_norm": 0.3523927927017212, "learning_rate": 4.526344863189724e-06, "loss": 0.6510394811630249, "step": 984 }, { "epoch": 2.6939890710382515, "grad_norm": 0.2865069508552551, "learning_rate": 4.408373414355714e-06, "loss": 0.9356023669242859, "step": 986 }, { "epoch": 2.699453551912568, "grad_norm": 0.1558021456003189, "learning_rate": 4.29489523384628e-06, "loss": 0.8081143498420715, "step": 988 }, { "epoch": 2.7049180327868854, "grad_norm": 0.2865149974822998, "learning_rate": 4.185952490339899e-06, "loss": 0.6692199110984802, "step": 990 }, { "epoch": 2.710382513661202, "grad_norm": 0.19687670469284058, "learning_rate": 4.081585667139231e-06, "loss": 0.5164180397987366, "step": 992 }, { "epoch": 2.7158469945355193, "grad_norm": 0.18878105282783508, "learning_rate": 3.981833547127413e-06, "loss": 0.811371922492981, "step": 994 }, { "epoch": 2.721311475409836, "grad_norm": 0.16217704117298126, "learning_rate": 3.886733198356298e-06, "loss": 0.8148671388626099, "step": 996 }, { "epoch": 2.726775956284153, "grad_norm": 0.15565072000026703, "learning_rate": 3.7963199602718717e-06, "loss": 0.6758864521980286, "step": 998 }, { "epoch": 2.73224043715847, "grad_norm": 0.6546015739440918, "learning_rate": 3.7106274305821034e-06, "loss": 0.9574772119522095, "step": 1000 }, { "epoch": 2.737704918032787, "grad_norm": 0.18078891932964325, "learning_rate": 3.6296874527719515e-06, "loss": 0.8926464915275574, "step": 1002 }, { "epoch": 2.7431693989071038, "grad_norm": 0.22077669203281403, "learning_rate": 3.553530104270281e-06, "loss": 0.6632700562477112, "step": 1004 }, { "epoch": 2.748633879781421, "grad_norm": 0.381185919046402, "learning_rate": 3.4821836852730384e-06, "loss": 0.35226285457611084, "step": 1006 }, { "epoch": 2.7540983606557377, "grad_norm": 0.18125414848327637, "learning_rate": 3.41567470822686e-06, "loss": 0.9504106044769287, "step": 1008 }, { "epoch": 2.7595628415300544, "grad_norm": 0.25593459606170654, "learning_rate": 3.354027887976989e-06, "loss": 0.9168705344200134, "step": 1010 }, { "epoch": 2.7650273224043715, "grad_norm": 0.6389570832252502, "learning_rate": 3.297266132583221e-06, "loss": 0.6682818531990051, "step": 1012 }, { "epoch": 2.7704918032786887, "grad_norm": 0.32402268052101135, "learning_rate": 3.245410534807195e-06, "loss": 0.9942286610603333, "step": 1014 }, { "epoch": 2.7759562841530054, "grad_norm": 0.20764689147472382, "learning_rate": 3.1984803642743314e-06, "loss": 0.6048266291618347, "step": 1016 }, { "epoch": 2.781420765027322, "grad_norm": 0.1485147923231125, "learning_rate": 3.1564930603131777e-06, "loss": 0.8052763342857361, "step": 1018 }, { "epoch": 2.7868852459016393, "grad_norm": 0.38492631912231445, "learning_rate": 3.1194642254749395e-06, "loss": 0.6152138113975525, "step": 1020 }, { "epoch": 2.7923497267759565, "grad_norm": 0.19493553042411804, "learning_rate": 3.0874076197355317e-06, "loss": 0.8494120836257935, "step": 1022 }, { "epoch": 2.797814207650273, "grad_norm": 0.4454888701438904, "learning_rate": 3.0603351553823717e-06, "loss": 0.4298951029777527, "step": 1024 }, { "epoch": 2.80327868852459, "grad_norm": 0.19092072546482086, "learning_rate": 3.038256892587734e-06, "loss": 0.8518891930580139, "step": 1026 }, { "epoch": 2.808743169398907, "grad_norm": 0.6577255725860596, "learning_rate": 3.0211810356703803e-06, "loss": 0.7430834174156189, "step": 1028 }, { "epoch": 2.8142076502732243, "grad_norm": 0.1505049765110016, "learning_rate": 3.0091139300468266e-06, "loss": 0.6131287813186646, "step": 1030 }, { "epoch": 2.819672131147541, "grad_norm": 0.1536058932542801, "learning_rate": 3.0020600598733656e-06, "loss": 0.6610476970672607, "step": 1032 }, { "epoch": 2.8251366120218577, "grad_norm": 1.4271342754364014, "learning_rate": 3.000022046379753e-06, "loss": 0.7583919763565063, "step": 1034 }, { "epoch": 2.830601092896175, "grad_norm": 0.24797333776950836, "learning_rate": 3.0030006468951557e-06, "loss": 0.9907567501068115, "step": 1036 }, { "epoch": 2.836065573770492, "grad_norm": 0.18040958046913147, "learning_rate": 3.0109947545667246e-06, "loss": 0.7043365240097046, "step": 1038 }, { "epoch": 2.841530054644809, "grad_norm": 0.38979649543762207, "learning_rate": 3.024001398770901e-06, "loss": 0.7790292501449585, "step": 1040 }, { "epoch": 2.8469945355191255, "grad_norm": 0.1695755124092102, "learning_rate": 3.042015746217308e-06, "loss": 0.7965599298477173, "step": 1042 }, { "epoch": 2.8524590163934427, "grad_norm": 0.16300812363624573, "learning_rate": 3.0650311027448116e-06, "loss": 0.7110670804977417, "step": 1044 }, { "epoch": 2.8579234972677594, "grad_norm": 0.18763573467731476, "learning_rate": 3.0930389158090754e-06, "loss": 0.9114327430725098, "step": 1046 }, { "epoch": 2.8633879781420766, "grad_norm": 0.7911413311958313, "learning_rate": 3.1260287776607025e-06, "loss": 0.3735189735889435, "step": 1048 }, { "epoch": 2.8688524590163933, "grad_norm": 0.20250028371810913, "learning_rate": 3.163988429212773e-06, "loss": 0.7022408246994019, "step": 1050 }, { "epoch": 2.8743169398907105, "grad_norm": 0.21102067828178406, "learning_rate": 3.206903764596349e-06, "loss": 0.6459388732910156, "step": 1052 }, { "epoch": 2.879781420765027, "grad_norm": 0.21323570609092712, "learning_rate": 3.254758836402225e-06, "loss": 0.9086355566978455, "step": 1054 }, { "epoch": 2.8852459016393444, "grad_norm": 1.73580002784729, "learning_rate": 3.3075358616070144e-06, "loss": 0.7384663820266724, "step": 1056 }, { "epoch": 2.890710382513661, "grad_norm": 1.4918919801712036, "learning_rate": 3.365215228181358e-06, "loss": 0.6115441918373108, "step": 1058 }, { "epoch": 2.8961748633879782, "grad_norm": 0.22330603003501892, "learning_rate": 3.4277755023777795e-06, "loss": 0.7794143557548523, "step": 1060 }, { "epoch": 2.901639344262295, "grad_norm": 0.19227337837219238, "learning_rate": 3.495193436695504e-06, "loss": 0.7703116536140442, "step": 1062 }, { "epoch": 2.907103825136612, "grad_norm": 0.3918929398059845, "learning_rate": 3.567443978519267e-06, "loss": 0.876200795173645, "step": 1064 }, { "epoch": 2.912568306010929, "grad_norm": 0.18335311114788055, "learning_rate": 3.6445002794288992e-06, "loss": 0.431761234998703, "step": 1066 }, { "epoch": 2.918032786885246, "grad_norm": 0.227519690990448, "learning_rate": 3.7263337051762718e-06, "loss": 0.5974105000495911, "step": 1068 }, { "epoch": 2.9234972677595628, "grad_norm": 0.43164753913879395, "learning_rate": 3.8129138463257943e-06, "loss": 0.6901212930679321, "step": 1070 }, { "epoch": 2.92896174863388, "grad_norm": 0.22106696665287018, "learning_rate": 3.904208529554625e-06, "loss": 0.9134948253631592, "step": 1072 }, { "epoch": 2.9344262295081966, "grad_norm": 0.17417727410793304, "learning_rate": 4.000183829608332e-06, "loss": 0.49246451258659363, "step": 1074 }, { "epoch": 2.939890710382514, "grad_norm": 1.0001611709594727, "learning_rate": 4.100804081907595e-06, "loss": 0.5228325128555298, "step": 1076 }, { "epoch": 2.9453551912568305, "grad_norm": 0.38090279698371887, "learning_rate": 4.206031895801176e-06, "loss": 0.5014840364456177, "step": 1078 }, { "epoch": 2.9508196721311473, "grad_norm": 0.2635762691497803, "learning_rate": 4.315828168460367e-06, "loss": 0.9488551020622253, "step": 1080 }, { "epoch": 2.9562841530054644, "grad_norm": 0.23455342650413513, "learning_rate": 4.430152099409704e-06, "loss": 0.5974031686782837, "step": 1082 }, { "epoch": 2.9617486338797816, "grad_norm": 0.306372731924057, "learning_rate": 4.548961205688424e-06, "loss": 0.748331606388092, "step": 1084 }, { "epoch": 2.9672131147540983, "grad_norm": 0.1417221575975418, "learning_rate": 4.672211337637246e-06, "loss": 0.52958744764328, "step": 1086 }, { "epoch": 2.972677595628415, "grad_norm": 0.18579013645648956, "learning_rate": 4.7998566953044445e-06, "loss": 0.995449960231781, "step": 1088 }, { "epoch": 2.978142076502732, "grad_norm": 0.31965434551239014, "learning_rate": 4.931849845465193e-06, "loss": 0.767081081867218, "step": 1090 }, { "epoch": 2.9836065573770494, "grad_norm": 0.15356819331645966, "learning_rate": 5.06814173924782e-06, "loss": 0.6917383670806885, "step": 1092 }, { "epoch": 2.989071038251366, "grad_norm": 0.17739875614643097, "learning_rate": 5.208681730360458e-06, "loss": 0.7700910568237305, "step": 1094 }, { "epoch": 2.994535519125683, "grad_norm": 0.5607284903526306, "learning_rate": 5.3534175939112694e-06, "loss": 0.5115733742713928, "step": 1096 }, { "epoch": 3.0, "grad_norm": 0.22396568953990936, "learning_rate": 5.50229554581536e-06, "loss": 0.8061965107917786, "step": 1098 }, { "epoch": 3.0, "step": 1098, "total_flos": 4.957143256761631e+18, "train_loss": 0.9666990600322765, "train_runtime": 11531.2166, "train_samples_per_second": 5.713, "train_steps_per_second": 0.095 } ], "logging_steps": 2, "max_steps": 1098, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 99999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.957143256761631e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }