{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998881390059287, "eval_steps": 500, "global_step": 6704, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001491479920951564, "grad_norm": 39.87211990356445, "learning_rate": 9.900990099009901e-08, "loss": 1.7939, "step": 1 }, { "epoch": 0.0002982959841903128, "grad_norm": 34.2124137878418, "learning_rate": 1.9801980198019803e-07, "loss": 1.7819, "step": 2 }, { "epoch": 0.00044744397628546925, "grad_norm": 33.1609992980957, "learning_rate": 2.9702970297029703e-07, "loss": 1.7116, "step": 3 }, { "epoch": 0.0005965919683806256, "grad_norm": 36.20302200317383, "learning_rate": 3.9603960396039606e-07, "loss": 1.7646, "step": 4 }, { "epoch": 0.0007457399604757821, "grad_norm": 46.00297927856445, "learning_rate": 4.950495049504951e-07, "loss": 1.8565, "step": 5 }, { "epoch": 0.0008948879525709385, "grad_norm": 43.027008056640625, "learning_rate": 5.940594059405941e-07, "loss": 1.808, "step": 6 }, { "epoch": 0.001044035944666095, "grad_norm": 35.75727081298828, "learning_rate": 6.930693069306931e-07, "loss": 1.7707, "step": 7 }, { "epoch": 0.0011931839367612513, "grad_norm": 38.25413131713867, "learning_rate": 7.920792079207921e-07, "loss": 1.632, "step": 8 }, { "epoch": 0.0013423319288564078, "grad_norm": 26.776020050048828, "learning_rate": 8.910891089108911e-07, "loss": 1.5778, "step": 9 }, { "epoch": 0.0014914799209515641, "grad_norm": 33.86367416381836, "learning_rate": 9.900990099009902e-07, "loss": 1.6822, "step": 10 }, { "epoch": 0.0016406279130467207, "grad_norm": 19.060773849487305, "learning_rate": 1.0891089108910893e-06, "loss": 1.2922, "step": 11 }, { "epoch": 0.001789775905141877, "grad_norm": 22.195920944213867, "learning_rate": 1.1881188118811881e-06, "loss": 1.2501, "step": 12 }, { "epoch": 0.0019389238972370335, "grad_norm": 19.902982711791992, "learning_rate": 1.2871287128712872e-06, "loss": 1.1384, "step": 13 }, { "epoch": 0.00208807188933219, "grad_norm": 13.879121780395508, "learning_rate": 1.3861386138613863e-06, "loss": 1.1368, "step": 14 }, { "epoch": 0.0022372198814273464, "grad_norm": 7.1763410568237305, "learning_rate": 1.4851485148514852e-06, "loss": 1.0728, "step": 15 }, { "epoch": 0.0023863678735225025, "grad_norm": 8.752571105957031, "learning_rate": 1.5841584158415842e-06, "loss": 0.8854, "step": 16 }, { "epoch": 0.002535515865617659, "grad_norm": 6.839611530303955, "learning_rate": 1.6831683168316833e-06, "loss": 0.9015, "step": 17 }, { "epoch": 0.0026846638577128156, "grad_norm": 5.589047431945801, "learning_rate": 1.7821782178217822e-06, "loss": 0.9546, "step": 18 }, { "epoch": 0.002833811849807972, "grad_norm": 3.8366494178771973, "learning_rate": 1.8811881188118813e-06, "loss": 0.8883, "step": 19 }, { "epoch": 0.0029829598419031282, "grad_norm": 2.5588526725769043, "learning_rate": 1.9801980198019803e-06, "loss": 0.8879, "step": 20 }, { "epoch": 0.003132107833998285, "grad_norm": 2.650646924972534, "learning_rate": 2.0792079207920794e-06, "loss": 0.9721, "step": 21 }, { "epoch": 0.0032812558260934413, "grad_norm": 2.7643964290618896, "learning_rate": 2.1782178217821785e-06, "loss": 0.8556, "step": 22 }, { "epoch": 0.0034304038181885974, "grad_norm": 2.5791187286376953, "learning_rate": 2.2772277227722776e-06, "loss": 0.8791, "step": 23 }, { "epoch": 0.003579551810283754, "grad_norm": 2.717031955718994, "learning_rate": 2.3762376237623762e-06, "loss": 0.9118, "step": 24 }, { "epoch": 0.0037286998023789105, "grad_norm": 2.634051561355591, "learning_rate": 2.4752475247524753e-06, "loss": 0.8872, "step": 25 }, { "epoch": 0.003877847794474067, "grad_norm": 2.904601812362671, "learning_rate": 2.5742574257425744e-06, "loss": 0.9932, "step": 26 }, { "epoch": 0.004026995786569223, "grad_norm": 2.9366304874420166, "learning_rate": 2.6732673267326735e-06, "loss": 0.8834, "step": 27 }, { "epoch": 0.00417614377866438, "grad_norm": 2.1792824268341064, "learning_rate": 2.7722772277227726e-06, "loss": 0.8968, "step": 28 }, { "epoch": 0.004325291770759536, "grad_norm": 2.1688127517700195, "learning_rate": 2.8712871287128712e-06, "loss": 0.8731, "step": 29 }, { "epoch": 0.004474439762854693, "grad_norm": 2.314924716949463, "learning_rate": 2.9702970297029703e-06, "loss": 0.9096, "step": 30 }, { "epoch": 0.004623587754949849, "grad_norm": 2.1087639331817627, "learning_rate": 3.0693069306930694e-06, "loss": 0.9367, "step": 31 }, { "epoch": 0.004772735747045005, "grad_norm": 2.051417350769043, "learning_rate": 3.1683168316831685e-06, "loss": 0.9332, "step": 32 }, { "epoch": 0.0049218837391401616, "grad_norm": 2.0115177631378174, "learning_rate": 3.2673267326732676e-06, "loss": 0.8109, "step": 33 }, { "epoch": 0.005071031731235318, "grad_norm": 1.874586582183838, "learning_rate": 3.3663366336633666e-06, "loss": 0.8555, "step": 34 }, { "epoch": 0.005220179723330475, "grad_norm": 2.069634437561035, "learning_rate": 3.4653465346534653e-06, "loss": 0.8479, "step": 35 }, { "epoch": 0.005369327715425631, "grad_norm": 2.0437252521514893, "learning_rate": 3.5643564356435644e-06, "loss": 0.7454, "step": 36 }, { "epoch": 0.005518475707520788, "grad_norm": 2.21240234375, "learning_rate": 3.6633663366336635e-06, "loss": 0.9335, "step": 37 }, { "epoch": 0.005667623699615944, "grad_norm": 2.05131196975708, "learning_rate": 3.7623762376237625e-06, "loss": 0.899, "step": 38 }, { "epoch": 0.0058167716917111, "grad_norm": 1.9264792203903198, "learning_rate": 3.861386138613862e-06, "loss": 0.8257, "step": 39 }, { "epoch": 0.0059659196838062565, "grad_norm": 1.8269070386886597, "learning_rate": 3.960396039603961e-06, "loss": 0.817, "step": 40 }, { "epoch": 0.006115067675901413, "grad_norm": 2.078799247741699, "learning_rate": 4.05940594059406e-06, "loss": 0.8694, "step": 41 }, { "epoch": 0.00626421566799657, "grad_norm": 2.094686269760132, "learning_rate": 4.158415841584159e-06, "loss": 0.8616, "step": 42 }, { "epoch": 0.006413363660091726, "grad_norm": 1.7922865152359009, "learning_rate": 4.2574257425742575e-06, "loss": 0.7327, "step": 43 }, { "epoch": 0.006562511652186883, "grad_norm": 1.7799499034881592, "learning_rate": 4.356435643564357e-06, "loss": 0.7854, "step": 44 }, { "epoch": 0.006711659644282039, "grad_norm": 1.7169584035873413, "learning_rate": 4.455445544554456e-06, "loss": 0.7882, "step": 45 }, { "epoch": 0.006860807636377195, "grad_norm": 2.9824090003967285, "learning_rate": 4.554455445544555e-06, "loss": 0.6767, "step": 46 }, { "epoch": 0.007009955628472351, "grad_norm": 1.946439504623413, "learning_rate": 4.653465346534654e-06, "loss": 0.831, "step": 47 }, { "epoch": 0.007159103620567508, "grad_norm": 1.736792802810669, "learning_rate": 4.7524752475247525e-06, "loss": 0.8143, "step": 48 }, { "epoch": 0.0073082516126626645, "grad_norm": 1.9736931324005127, "learning_rate": 4.851485148514852e-06, "loss": 0.8388, "step": 49 }, { "epoch": 0.007457399604757821, "grad_norm": 2.3758277893066406, "learning_rate": 4.950495049504951e-06, "loss": 0.6613, "step": 50 }, { "epoch": 0.007606547596852978, "grad_norm": 1.6204915046691895, "learning_rate": 5.04950495049505e-06, "loss": 0.731, "step": 51 }, { "epoch": 0.007755695588948134, "grad_norm": 1.9464293718338013, "learning_rate": 5.148514851485149e-06, "loss": 0.796, "step": 52 }, { "epoch": 0.00790484358104329, "grad_norm": 1.9661284685134888, "learning_rate": 5.247524752475248e-06, "loss": 0.9096, "step": 53 }, { "epoch": 0.008053991573138446, "grad_norm": 1.7220773696899414, "learning_rate": 5.346534653465347e-06, "loss": 0.8, "step": 54 }, { "epoch": 0.008203139565233603, "grad_norm": 1.8329463005065918, "learning_rate": 5.4455445544554465e-06, "loss": 0.7437, "step": 55 }, { "epoch": 0.00835228755732876, "grad_norm": 1.5822452306747437, "learning_rate": 5.544554455445545e-06, "loss": 0.6622, "step": 56 }, { "epoch": 0.008501435549423916, "grad_norm": 1.6702386140823364, "learning_rate": 5.643564356435644e-06, "loss": 0.7189, "step": 57 }, { "epoch": 0.008650583541519073, "grad_norm": 1.669589877128601, "learning_rate": 5.7425742574257425e-06, "loss": 0.7844, "step": 58 }, { "epoch": 0.008799731533614229, "grad_norm": 1.7205700874328613, "learning_rate": 5.841584158415842e-06, "loss": 0.8011, "step": 59 }, { "epoch": 0.008948879525709386, "grad_norm": 1.7440885305404663, "learning_rate": 5.940594059405941e-06, "loss": 0.7389, "step": 60 }, { "epoch": 0.009098027517804542, "grad_norm": 1.8718650341033936, "learning_rate": 6.03960396039604e-06, "loss": 0.7925, "step": 61 }, { "epoch": 0.009247175509899699, "grad_norm": 1.274830937385559, "learning_rate": 6.138613861386139e-06, "loss": 0.6563, "step": 62 }, { "epoch": 0.009396323501994855, "grad_norm": 1.8190218210220337, "learning_rate": 6.237623762376238e-06, "loss": 0.808, "step": 63 }, { "epoch": 0.00954547149409001, "grad_norm": 1.7734014987945557, "learning_rate": 6.336633663366337e-06, "loss": 0.6811, "step": 64 }, { "epoch": 0.009694619486185167, "grad_norm": 1.5249180793762207, "learning_rate": 6.4356435643564364e-06, "loss": 0.7204, "step": 65 }, { "epoch": 0.009843767478280323, "grad_norm": 1.526959776878357, "learning_rate": 6.534653465346535e-06, "loss": 0.7527, "step": 66 }, { "epoch": 0.00999291547037548, "grad_norm": 1.8352864980697632, "learning_rate": 6.633663366336635e-06, "loss": 0.8801, "step": 67 }, { "epoch": 0.010142063462470636, "grad_norm": 1.6300268173217773, "learning_rate": 6.732673267326733e-06, "loss": 0.7354, "step": 68 }, { "epoch": 0.010291211454565793, "grad_norm": 1.5928200483322144, "learning_rate": 6.831683168316833e-06, "loss": 0.8142, "step": 69 }, { "epoch": 0.01044035944666095, "grad_norm": 1.6016854047775269, "learning_rate": 6.930693069306931e-06, "loss": 0.6921, "step": 70 }, { "epoch": 0.010589507438756106, "grad_norm": 1.9445266723632812, "learning_rate": 7.02970297029703e-06, "loss": 0.8098, "step": 71 }, { "epoch": 0.010738655430851262, "grad_norm": 1.7588914632797241, "learning_rate": 7.128712871287129e-06, "loss": 0.8664, "step": 72 }, { "epoch": 0.010887803422946419, "grad_norm": 1.1182153224945068, "learning_rate": 7.227722772277228e-06, "loss": 0.6319, "step": 73 }, { "epoch": 0.011036951415041575, "grad_norm": 1.7638026475906372, "learning_rate": 7.326732673267327e-06, "loss": 0.8069, "step": 74 }, { "epoch": 0.011186099407136732, "grad_norm": 1.78876531124115, "learning_rate": 7.425742574257426e-06, "loss": 0.8048, "step": 75 }, { "epoch": 0.011335247399231889, "grad_norm": 1.7826611995697021, "learning_rate": 7.524752475247525e-06, "loss": 0.8946, "step": 76 }, { "epoch": 0.011484395391327045, "grad_norm": 1.1206309795379639, "learning_rate": 7.6237623762376246e-06, "loss": 0.6165, "step": 77 }, { "epoch": 0.0116335433834222, "grad_norm": 1.6246232986450195, "learning_rate": 7.722772277227724e-06, "loss": 0.6913, "step": 78 }, { "epoch": 0.011782691375517356, "grad_norm": 1.651530146598816, "learning_rate": 7.821782178217822e-06, "loss": 0.7417, "step": 79 }, { "epoch": 0.011931839367612513, "grad_norm": 1.6715677976608276, "learning_rate": 7.920792079207921e-06, "loss": 0.7067, "step": 80 }, { "epoch": 0.01208098735970767, "grad_norm": 1.6198248863220215, "learning_rate": 8.019801980198021e-06, "loss": 0.7301, "step": 81 }, { "epoch": 0.012230135351802826, "grad_norm": 2.0230417251586914, "learning_rate": 8.11881188118812e-06, "loss": 0.7959, "step": 82 }, { "epoch": 0.012379283343897983, "grad_norm": 1.6732629537582397, "learning_rate": 8.217821782178218e-06, "loss": 0.6898, "step": 83 }, { "epoch": 0.01252843133599314, "grad_norm": 1.9621461629867554, "learning_rate": 8.316831683168318e-06, "loss": 0.7592, "step": 84 }, { "epoch": 0.012677579328088296, "grad_norm": 1.8207122087478638, "learning_rate": 8.415841584158416e-06, "loss": 0.7493, "step": 85 }, { "epoch": 0.012826727320183452, "grad_norm": 1.6612868309020996, "learning_rate": 8.514851485148515e-06, "loss": 0.7232, "step": 86 }, { "epoch": 0.012975875312278609, "grad_norm": 1.8430603742599487, "learning_rate": 8.613861386138615e-06, "loss": 0.762, "step": 87 }, { "epoch": 0.013125023304373765, "grad_norm": 1.8745126724243164, "learning_rate": 8.712871287128714e-06, "loss": 0.7455, "step": 88 }, { "epoch": 0.013274171296468922, "grad_norm": 7.309123992919922, "learning_rate": 8.811881188118812e-06, "loss": 0.7287, "step": 89 }, { "epoch": 0.013423319288564078, "grad_norm": 1.6257158517837524, "learning_rate": 8.910891089108911e-06, "loss": 0.7853, "step": 90 }, { "epoch": 0.013572467280659235, "grad_norm": 1.8892756700515747, "learning_rate": 9.009900990099011e-06, "loss": 0.7435, "step": 91 }, { "epoch": 0.01372161527275439, "grad_norm": 1.8047070503234863, "learning_rate": 9.10891089108911e-06, "loss": 0.7072, "step": 92 }, { "epoch": 0.013870763264849546, "grad_norm": 1.696405053138733, "learning_rate": 9.20792079207921e-06, "loss": 0.7186, "step": 93 }, { "epoch": 0.014019911256944703, "grad_norm": 1.8157339096069336, "learning_rate": 9.306930693069308e-06, "loss": 0.7085, "step": 94 }, { "epoch": 0.01416905924903986, "grad_norm": 1.5440287590026855, "learning_rate": 9.405940594059405e-06, "loss": 0.648, "step": 95 }, { "epoch": 0.014318207241135016, "grad_norm": 1.6597669124603271, "learning_rate": 9.504950495049505e-06, "loss": 0.7196, "step": 96 }, { "epoch": 0.014467355233230172, "grad_norm": 1.763654351234436, "learning_rate": 9.603960396039604e-06, "loss": 0.771, "step": 97 }, { "epoch": 0.014616503225325329, "grad_norm": 1.773114800453186, "learning_rate": 9.702970297029704e-06, "loss": 0.6951, "step": 98 }, { "epoch": 0.014765651217420486, "grad_norm": 1.6714224815368652, "learning_rate": 9.801980198019802e-06, "loss": 0.729, "step": 99 }, { "epoch": 0.014914799209515642, "grad_norm": 1.9162660837173462, "learning_rate": 9.900990099009901e-06, "loss": 0.8237, "step": 100 }, { "epoch": 0.015063947201610799, "grad_norm": 1.7358973026275635, "learning_rate": 1e-05, "loss": 0.7469, "step": 101 }, { "epoch": 0.015213095193705955, "grad_norm": 1.5846261978149414, "learning_rate": 1.00990099009901e-05, "loss": 0.7217, "step": 102 }, { "epoch": 0.015362243185801112, "grad_norm": 1.7096412181854248, "learning_rate": 1.01980198019802e-05, "loss": 0.7363, "step": 103 }, { "epoch": 0.015511391177896268, "grad_norm": 1.7389696836471558, "learning_rate": 1.0297029702970298e-05, "loss": 0.7553, "step": 104 }, { "epoch": 0.015660539169991425, "grad_norm": 1.6424856185913086, "learning_rate": 1.0396039603960397e-05, "loss": 0.6756, "step": 105 }, { "epoch": 0.01580968716208658, "grad_norm": 1.7579039335250854, "learning_rate": 1.0495049504950497e-05, "loss": 0.7663, "step": 106 }, { "epoch": 0.015958835154181738, "grad_norm": 1.7045707702636719, "learning_rate": 1.0594059405940596e-05, "loss": 0.7531, "step": 107 }, { "epoch": 0.016107983146276893, "grad_norm": 1.6645679473876953, "learning_rate": 1.0693069306930694e-05, "loss": 0.7042, "step": 108 }, { "epoch": 0.01625713113837205, "grad_norm": 1.6268208026885986, "learning_rate": 1.0792079207920793e-05, "loss": 0.7415, "step": 109 }, { "epoch": 0.016406279130467206, "grad_norm": 1.446374535560608, "learning_rate": 1.0891089108910893e-05, "loss": 0.7147, "step": 110 }, { "epoch": 0.016555427122562364, "grad_norm": 1.6723965406417847, "learning_rate": 1.0990099009900992e-05, "loss": 0.7538, "step": 111 }, { "epoch": 0.01670457511465752, "grad_norm": 1.6058119535446167, "learning_rate": 1.108910891089109e-05, "loss": 0.7426, "step": 112 }, { "epoch": 0.016853723106752674, "grad_norm": 1.9413471221923828, "learning_rate": 1.118811881188119e-05, "loss": 0.7472, "step": 113 }, { "epoch": 0.017002871098847832, "grad_norm": 1.0029256343841553, "learning_rate": 1.1287128712871288e-05, "loss": 0.613, "step": 114 }, { "epoch": 0.017152019090942987, "grad_norm": 1.6287275552749634, "learning_rate": 1.1386138613861385e-05, "loss": 0.774, "step": 115 }, { "epoch": 0.017301167083038145, "grad_norm": 1.9319134950637817, "learning_rate": 1.1485148514851485e-05, "loss": 0.7675, "step": 116 }, { "epoch": 0.0174503150751333, "grad_norm": 1.7820221185684204, "learning_rate": 1.1584158415841584e-05, "loss": 0.7769, "step": 117 }, { "epoch": 0.017599463067228458, "grad_norm": 1.8961478471755981, "learning_rate": 1.1683168316831684e-05, "loss": 0.811, "step": 118 }, { "epoch": 0.017748611059323613, "grad_norm": 1.7435803413391113, "learning_rate": 1.1782178217821782e-05, "loss": 0.784, "step": 119 }, { "epoch": 0.01789775905141877, "grad_norm": 1.6413477659225464, "learning_rate": 1.1881188118811881e-05, "loss": 0.7616, "step": 120 }, { "epoch": 0.018046907043513926, "grad_norm": 1.7701083421707153, "learning_rate": 1.198019801980198e-05, "loss": 0.6919, "step": 121 }, { "epoch": 0.018196055035609084, "grad_norm": 1.6069620847702026, "learning_rate": 1.207920792079208e-05, "loss": 0.7153, "step": 122 }, { "epoch": 0.01834520302770424, "grad_norm": 1.460310459136963, "learning_rate": 1.217821782178218e-05, "loss": 0.7061, "step": 123 }, { "epoch": 0.018494351019799397, "grad_norm": 1.7387101650238037, "learning_rate": 1.2277227722772278e-05, "loss": 0.6762, "step": 124 }, { "epoch": 0.018643499011894552, "grad_norm": 1.6819206476211548, "learning_rate": 1.2376237623762377e-05, "loss": 0.7339, "step": 125 }, { "epoch": 0.01879264700398971, "grad_norm": 1.662119746208191, "learning_rate": 1.2475247524752477e-05, "loss": 0.799, "step": 126 }, { "epoch": 0.018941794996084865, "grad_norm": 1.8131217956542969, "learning_rate": 1.2574257425742576e-05, "loss": 0.7419, "step": 127 }, { "epoch": 0.01909094298818002, "grad_norm": 1.7437299489974976, "learning_rate": 1.2673267326732674e-05, "loss": 0.8482, "step": 128 }, { "epoch": 0.01924009098027518, "grad_norm": 1.709039330482483, "learning_rate": 1.2772277227722773e-05, "loss": 0.7595, "step": 129 }, { "epoch": 0.019389238972370333, "grad_norm": 1.8217540979385376, "learning_rate": 1.2871287128712873e-05, "loss": 0.7869, "step": 130 }, { "epoch": 0.01953838696446549, "grad_norm": 1.773653268814087, "learning_rate": 1.2970297029702972e-05, "loss": 0.8442, "step": 131 }, { "epoch": 0.019687534956560646, "grad_norm": 1.6657646894454956, "learning_rate": 1.306930693069307e-05, "loss": 0.7066, "step": 132 }, { "epoch": 0.019836682948655805, "grad_norm": 1.8044629096984863, "learning_rate": 1.316831683168317e-05, "loss": 0.8073, "step": 133 }, { "epoch": 0.01998583094075096, "grad_norm": 1.6769359111785889, "learning_rate": 1.326732673267327e-05, "loss": 0.7185, "step": 134 }, { "epoch": 0.020134978932846118, "grad_norm": 1.6461859941482544, "learning_rate": 1.3366336633663369e-05, "loss": 0.801, "step": 135 }, { "epoch": 0.020284126924941272, "grad_norm": 1.7334429025650024, "learning_rate": 1.3465346534653467e-05, "loss": 0.7856, "step": 136 }, { "epoch": 0.02043327491703643, "grad_norm": 1.5652496814727783, "learning_rate": 1.3564356435643566e-05, "loss": 0.6864, "step": 137 }, { "epoch": 0.020582422909131586, "grad_norm": 1.838726282119751, "learning_rate": 1.3663366336633666e-05, "loss": 0.6964, "step": 138 }, { "epoch": 0.020731570901226744, "grad_norm": 1.6988335847854614, "learning_rate": 1.3762376237623762e-05, "loss": 0.8088, "step": 139 }, { "epoch": 0.0208807188933219, "grad_norm": 1.6078284978866577, "learning_rate": 1.3861386138613861e-05, "loss": 0.6968, "step": 140 }, { "epoch": 0.021029866885417053, "grad_norm": 1.8192890882492065, "learning_rate": 1.396039603960396e-05, "loss": 0.7082, "step": 141 }, { "epoch": 0.02117901487751221, "grad_norm": 1.9122427701950073, "learning_rate": 1.405940594059406e-05, "loss": 0.7945, "step": 142 }, { "epoch": 0.021328162869607366, "grad_norm": 1.4321414232254028, "learning_rate": 1.4158415841584158e-05, "loss": 0.5992, "step": 143 }, { "epoch": 0.021477310861702525, "grad_norm": 1.7918022871017456, "learning_rate": 1.4257425742574257e-05, "loss": 0.6672, "step": 144 }, { "epoch": 0.02162645885379768, "grad_norm": 1.8519387245178223, "learning_rate": 1.4356435643564357e-05, "loss": 0.7522, "step": 145 }, { "epoch": 0.021775606845892838, "grad_norm": 1.8438019752502441, "learning_rate": 1.4455445544554456e-05, "loss": 0.7505, "step": 146 }, { "epoch": 0.021924754837987993, "grad_norm": 1.6194432973861694, "learning_rate": 1.4554455445544556e-05, "loss": 0.6713, "step": 147 }, { "epoch": 0.02207390283008315, "grad_norm": 1.9215517044067383, "learning_rate": 1.4653465346534654e-05, "loss": 0.7964, "step": 148 }, { "epoch": 0.022223050822178306, "grad_norm": 1.6668695211410522, "learning_rate": 1.4752475247524753e-05, "loss": 0.7374, "step": 149 }, { "epoch": 0.022372198814273464, "grad_norm": 1.754957675933838, "learning_rate": 1.4851485148514853e-05, "loss": 0.7552, "step": 150 }, { "epoch": 0.02252134680636862, "grad_norm": 1.642839789390564, "learning_rate": 1.4950495049504952e-05, "loss": 0.614, "step": 151 }, { "epoch": 0.022670494798463777, "grad_norm": 1.6388604640960693, "learning_rate": 1.504950495049505e-05, "loss": 0.7252, "step": 152 }, { "epoch": 0.022819642790558932, "grad_norm": 1.639191746711731, "learning_rate": 1.514851485148515e-05, "loss": 0.7199, "step": 153 }, { "epoch": 0.02296879078265409, "grad_norm": 0.9572929739952087, "learning_rate": 1.5247524752475249e-05, "loss": 0.5738, "step": 154 }, { "epoch": 0.023117938774749245, "grad_norm": 1.8391454219818115, "learning_rate": 1.534653465346535e-05, "loss": 0.7065, "step": 155 }, { "epoch": 0.0232670867668444, "grad_norm": 1.7684880495071411, "learning_rate": 1.5445544554455448e-05, "loss": 0.8145, "step": 156 }, { "epoch": 0.023416234758939558, "grad_norm": 1.9761751890182495, "learning_rate": 1.5544554455445548e-05, "loss": 0.8589, "step": 157 }, { "epoch": 0.023565382751034713, "grad_norm": 1.582116723060608, "learning_rate": 1.5643564356435644e-05, "loss": 0.7536, "step": 158 }, { "epoch": 0.02371453074312987, "grad_norm": 1.7217851877212524, "learning_rate": 1.5742574257425743e-05, "loss": 0.7458, "step": 159 }, { "epoch": 0.023863678735225026, "grad_norm": 1.6243512630462646, "learning_rate": 1.5841584158415843e-05, "loss": 0.6789, "step": 160 }, { "epoch": 0.024012826727320184, "grad_norm": 1.6033480167388916, "learning_rate": 1.5940594059405942e-05, "loss": 0.721, "step": 161 }, { "epoch": 0.02416197471941534, "grad_norm": 1.7328108549118042, "learning_rate": 1.6039603960396042e-05, "loss": 0.6663, "step": 162 }, { "epoch": 0.024311122711510497, "grad_norm": 1.6065844297409058, "learning_rate": 1.613861386138614e-05, "loss": 0.7136, "step": 163 }, { "epoch": 0.024460270703605652, "grad_norm": 1.911989450454712, "learning_rate": 1.623762376237624e-05, "loss": 0.7731, "step": 164 }, { "epoch": 0.02460941869570081, "grad_norm": 0.9822590351104736, "learning_rate": 1.6336633663366337e-05, "loss": 0.6647, "step": 165 }, { "epoch": 0.024758566687795965, "grad_norm": 1.8122568130493164, "learning_rate": 1.6435643564356436e-05, "loss": 0.7422, "step": 166 }, { "epoch": 0.024907714679891124, "grad_norm": 1.6496695280075073, "learning_rate": 1.6534653465346536e-05, "loss": 0.7706, "step": 167 }, { "epoch": 0.02505686267198628, "grad_norm": 1.7313069105148315, "learning_rate": 1.6633663366336635e-05, "loss": 0.7832, "step": 168 }, { "epoch": 0.025206010664081433, "grad_norm": 1.888084888458252, "learning_rate": 1.6732673267326735e-05, "loss": 0.7671, "step": 169 }, { "epoch": 0.02535515865617659, "grad_norm": 0.9465843439102173, "learning_rate": 1.683168316831683e-05, "loss": 0.6133, "step": 170 }, { "epoch": 0.025504306648271746, "grad_norm": 1.7396955490112305, "learning_rate": 1.693069306930693e-05, "loss": 0.668, "step": 171 }, { "epoch": 0.025653454640366904, "grad_norm": 1.8010923862457275, "learning_rate": 1.702970297029703e-05, "loss": 0.7886, "step": 172 }, { "epoch": 0.02580260263246206, "grad_norm": 1.8005412817001343, "learning_rate": 1.712871287128713e-05, "loss": 0.7766, "step": 173 }, { "epoch": 0.025951750624557218, "grad_norm": 1.7587385177612305, "learning_rate": 1.722772277227723e-05, "loss": 0.7631, "step": 174 }, { "epoch": 0.026100898616652372, "grad_norm": 1.814207673072815, "learning_rate": 1.732673267326733e-05, "loss": 0.6994, "step": 175 }, { "epoch": 0.02625004660874753, "grad_norm": 0.9923914074897766, "learning_rate": 1.7425742574257428e-05, "loss": 0.6472, "step": 176 }, { "epoch": 0.026399194600842685, "grad_norm": 1.9012458324432373, "learning_rate": 1.7524752475247528e-05, "loss": 0.7604, "step": 177 }, { "epoch": 0.026548342592937844, "grad_norm": 1.8085814714431763, "learning_rate": 1.7623762376237624e-05, "loss": 0.6983, "step": 178 }, { "epoch": 0.026697490585033, "grad_norm": 1.6659190654754639, "learning_rate": 1.7722772277227723e-05, "loss": 0.7027, "step": 179 }, { "epoch": 0.026846638577128157, "grad_norm": 1.7098112106323242, "learning_rate": 1.7821782178217823e-05, "loss": 0.6367, "step": 180 }, { "epoch": 0.02699578656922331, "grad_norm": 1.9318565130233765, "learning_rate": 1.7920792079207922e-05, "loss": 0.8169, "step": 181 }, { "epoch": 0.02714493456131847, "grad_norm": 0.9997861385345459, "learning_rate": 1.8019801980198022e-05, "loss": 0.6111, "step": 182 }, { "epoch": 0.027294082553413625, "grad_norm": 1.6954238414764404, "learning_rate": 1.811881188118812e-05, "loss": 0.7263, "step": 183 }, { "epoch": 0.02744323054550878, "grad_norm": 1.8744195699691772, "learning_rate": 1.821782178217822e-05, "loss": 0.7932, "step": 184 }, { "epoch": 0.027592378537603938, "grad_norm": 1.919986605644226, "learning_rate": 1.831683168316832e-05, "loss": 0.7712, "step": 185 }, { "epoch": 0.027741526529699093, "grad_norm": 1.8210453987121582, "learning_rate": 1.841584158415842e-05, "loss": 0.7089, "step": 186 }, { "epoch": 0.02789067452179425, "grad_norm": 1.8532724380493164, "learning_rate": 1.8514851485148516e-05, "loss": 0.7618, "step": 187 }, { "epoch": 0.028039822513889406, "grad_norm": 1.6459122896194458, "learning_rate": 1.8613861386138615e-05, "loss": 0.7971, "step": 188 }, { "epoch": 0.028188970505984564, "grad_norm": 1.4749879837036133, "learning_rate": 1.8712871287128715e-05, "loss": 0.7061, "step": 189 }, { "epoch": 0.02833811849807972, "grad_norm": 1.7577928304672241, "learning_rate": 1.881188118811881e-05, "loss": 0.745, "step": 190 }, { "epoch": 0.028487266490174877, "grad_norm": 1.7507456541061401, "learning_rate": 1.891089108910891e-05, "loss": 0.7124, "step": 191 }, { "epoch": 0.028636414482270032, "grad_norm": 1.548930287361145, "learning_rate": 1.900990099009901e-05, "loss": 0.6308, "step": 192 }, { "epoch": 0.02878556247436519, "grad_norm": 1.563779354095459, "learning_rate": 1.910891089108911e-05, "loss": 0.7173, "step": 193 }, { "epoch": 0.028934710466460345, "grad_norm": 1.4497593641281128, "learning_rate": 1.920792079207921e-05, "loss": 0.6688, "step": 194 }, { "epoch": 0.029083858458555503, "grad_norm": 1.7159051895141602, "learning_rate": 1.930693069306931e-05, "loss": 0.7166, "step": 195 }, { "epoch": 0.029233006450650658, "grad_norm": 1.0151840448379517, "learning_rate": 1.9405940594059408e-05, "loss": 0.6496, "step": 196 }, { "epoch": 0.029382154442745813, "grad_norm": 1.9960867166519165, "learning_rate": 1.9504950495049508e-05, "loss": 0.746, "step": 197 }, { "epoch": 0.02953130243484097, "grad_norm": 1.710082769393921, "learning_rate": 1.9603960396039604e-05, "loss": 0.7379, "step": 198 }, { "epoch": 0.029680450426936126, "grad_norm": 1.9930142164230347, "learning_rate": 1.9702970297029703e-05, "loss": 0.7984, "step": 199 }, { "epoch": 0.029829598419031284, "grad_norm": 1.6484274864196777, "learning_rate": 1.9801980198019803e-05, "loss": 0.7327, "step": 200 }, { "epoch": 0.02997874641112644, "grad_norm": 1.6819043159484863, "learning_rate": 1.9900990099009902e-05, "loss": 0.6708, "step": 201 }, { "epoch": 0.030127894403221597, "grad_norm": 1.551066279411316, "learning_rate": 2e-05, "loss": 0.7036, "step": 202 }, { "epoch": 0.030277042395316752, "grad_norm": 1.6419272422790527, "learning_rate": 1.999999883271794e-05, "loss": 0.6899, "step": 203 }, { "epoch": 0.03042619038741191, "grad_norm": 1.6170799732208252, "learning_rate": 1.9999995330872033e-05, "loss": 0.7261, "step": 204 }, { "epoch": 0.030575338379507065, "grad_norm": 1.6682153940200806, "learning_rate": 1.9999989494463094e-05, "loss": 0.8176, "step": 205 }, { "epoch": 0.030724486371602223, "grad_norm": 1.6111252307891846, "learning_rate": 1.9999981323492487e-05, "loss": 0.7418, "step": 206 }, { "epoch": 0.030873634363697378, "grad_norm": 0.9125475287437439, "learning_rate": 1.9999970817962122e-05, "loss": 0.5985, "step": 207 }, { "epoch": 0.031022782355792537, "grad_norm": 1.711775779724121, "learning_rate": 1.999995797787445e-05, "loss": 0.6546, "step": 208 }, { "epoch": 0.03117193034788769, "grad_norm": 1.523328423500061, "learning_rate": 1.9999942803232467e-05, "loss": 0.7757, "step": 209 }, { "epoch": 0.03132107833998285, "grad_norm": 1.7331674098968506, "learning_rate": 1.999992529403971e-05, "loss": 0.7528, "step": 210 }, { "epoch": 0.031470226332078, "grad_norm": 1.4765695333480835, "learning_rate": 1.9999905450300284e-05, "loss": 0.6895, "step": 211 }, { "epoch": 0.03161937432417316, "grad_norm": 1.4427309036254883, "learning_rate": 1.9999883272018805e-05, "loss": 0.7723, "step": 212 }, { "epoch": 0.03176852231626832, "grad_norm": 1.6306421756744385, "learning_rate": 1.9999858759200455e-05, "loss": 0.7432, "step": 213 }, { "epoch": 0.031917670308363476, "grad_norm": 1.8211294412612915, "learning_rate": 1.999983191185096e-05, "loss": 0.7295, "step": 214 }, { "epoch": 0.03206681830045863, "grad_norm": 1.5802382230758667, "learning_rate": 1.999980272997659e-05, "loss": 0.6879, "step": 215 }, { "epoch": 0.032215966292553785, "grad_norm": 1.3854180574417114, "learning_rate": 1.9999771213584147e-05, "loss": 0.673, "step": 216 }, { "epoch": 0.032365114284648944, "grad_norm": 1.8963450193405151, "learning_rate": 1.9999737362680997e-05, "loss": 0.7707, "step": 217 }, { "epoch": 0.0325142622767441, "grad_norm": 1.615785837173462, "learning_rate": 1.9999701177275045e-05, "loss": 0.8296, "step": 218 }, { "epoch": 0.03266341026883925, "grad_norm": 1.6357091665267944, "learning_rate": 1.9999662657374732e-05, "loss": 0.7082, "step": 219 }, { "epoch": 0.03281255826093441, "grad_norm": 1.629034399986267, "learning_rate": 1.999962180298905e-05, "loss": 0.6377, "step": 220 }, { "epoch": 0.03296170625302957, "grad_norm": 1.6643588542938232, "learning_rate": 1.9999578614127544e-05, "loss": 0.7727, "step": 221 }, { "epoch": 0.03311085424512473, "grad_norm": 1.4784526824951172, "learning_rate": 1.9999533090800293e-05, "loss": 0.7138, "step": 222 }, { "epoch": 0.03326000223721988, "grad_norm": 1.6167758703231812, "learning_rate": 1.9999485233017926e-05, "loss": 0.6904, "step": 223 }, { "epoch": 0.03340915022931504, "grad_norm": 1.5103248357772827, "learning_rate": 1.9999435040791612e-05, "loss": 0.591, "step": 224 }, { "epoch": 0.033558298221410196, "grad_norm": 1.5189889669418335, "learning_rate": 1.999938251413307e-05, "loss": 0.7554, "step": 225 }, { "epoch": 0.03370744621350535, "grad_norm": 1.5767608880996704, "learning_rate": 1.9999327653054563e-05, "loss": 0.6459, "step": 226 }, { "epoch": 0.033856594205600506, "grad_norm": 1.5449451208114624, "learning_rate": 1.9999270457568904e-05, "loss": 0.663, "step": 227 }, { "epoch": 0.034005742197695664, "grad_norm": 0.9470979571342468, "learning_rate": 1.9999210927689438e-05, "loss": 0.5989, "step": 228 }, { "epoch": 0.03415489018979082, "grad_norm": 1.4691171646118164, "learning_rate": 1.9999149063430066e-05, "loss": 0.769, "step": 229 }, { "epoch": 0.034304038181885974, "grad_norm": 1.0067752599716187, "learning_rate": 1.999908486480523e-05, "loss": 0.6361, "step": 230 }, { "epoch": 0.03445318617398113, "grad_norm": 1.6395866870880127, "learning_rate": 1.9999018331829916e-05, "loss": 0.7291, "step": 231 }, { "epoch": 0.03460233416607629, "grad_norm": 1.6291648149490356, "learning_rate": 1.999894946451966e-05, "loss": 0.7628, "step": 232 }, { "epoch": 0.03475148215817145, "grad_norm": 1.7023979425430298, "learning_rate": 1.999887826289054e-05, "loss": 0.7519, "step": 233 }, { "epoch": 0.0349006301502666, "grad_norm": 1.5916025638580322, "learning_rate": 1.9998804726959173e-05, "loss": 0.6652, "step": 234 }, { "epoch": 0.03504977814236176, "grad_norm": 1.4315685033798218, "learning_rate": 1.9998728856742732e-05, "loss": 0.6641, "step": 235 }, { "epoch": 0.035198926134456916, "grad_norm": 1.490493655204773, "learning_rate": 1.9998650652258926e-05, "loss": 0.692, "step": 236 }, { "epoch": 0.035348074126552075, "grad_norm": 1.5938440561294556, "learning_rate": 1.9998570113526013e-05, "loss": 0.7817, "step": 237 }, { "epoch": 0.035497222118647226, "grad_norm": 1.6600455045700073, "learning_rate": 1.9998487240562798e-05, "loss": 0.6654, "step": 238 }, { "epoch": 0.035646370110742384, "grad_norm": 1.5784664154052734, "learning_rate": 1.9998402033388626e-05, "loss": 0.6787, "step": 239 }, { "epoch": 0.03579551810283754, "grad_norm": 1.5787382125854492, "learning_rate": 1.9998314492023387e-05, "loss": 0.6989, "step": 240 }, { "epoch": 0.035944666094932694, "grad_norm": 1.5429885387420654, "learning_rate": 1.9998224616487523e-05, "loss": 0.694, "step": 241 }, { "epoch": 0.03609381408702785, "grad_norm": 0.966249406337738, "learning_rate": 1.9998132406802008e-05, "loss": 0.6155, "step": 242 }, { "epoch": 0.03624296207912301, "grad_norm": 1.9597301483154297, "learning_rate": 1.999803786298838e-05, "loss": 0.8097, "step": 243 }, { "epoch": 0.03639211007121817, "grad_norm": 1.4222015142440796, "learning_rate": 1.9997940985068702e-05, "loss": 0.6982, "step": 244 }, { "epoch": 0.03654125806331332, "grad_norm": 1.5456483364105225, "learning_rate": 1.9997841773065594e-05, "loss": 0.763, "step": 245 }, { "epoch": 0.03669040605540848, "grad_norm": 1.6217448711395264, "learning_rate": 1.9997740227002217e-05, "loss": 0.8325, "step": 246 }, { "epoch": 0.036839554047503636, "grad_norm": 1.4367305040359497, "learning_rate": 1.9997636346902284e-05, "loss": 0.615, "step": 247 }, { "epoch": 0.036988702039598795, "grad_norm": 1.5776287317276, "learning_rate": 1.9997530132790034e-05, "loss": 0.7564, "step": 248 }, { "epoch": 0.037137850031693946, "grad_norm": 1.5807666778564453, "learning_rate": 1.9997421584690272e-05, "loss": 0.7369, "step": 249 }, { "epoch": 0.037286998023789104, "grad_norm": 0.9507095813751221, "learning_rate": 1.9997310702628338e-05, "loss": 0.6599, "step": 250 }, { "epoch": 0.03743614601588426, "grad_norm": 1.6004399061203003, "learning_rate": 1.9997197486630116e-05, "loss": 0.6659, "step": 251 }, { "epoch": 0.03758529400797942, "grad_norm": 1.5122284889221191, "learning_rate": 1.9997081936722037e-05, "loss": 0.8033, "step": 252 }, { "epoch": 0.03773444200007457, "grad_norm": 1.5609434843063354, "learning_rate": 1.9996964052931082e-05, "loss": 0.6849, "step": 253 }, { "epoch": 0.03788358999216973, "grad_norm": 1.595179796218872, "learning_rate": 1.9996843835284765e-05, "loss": 0.7449, "step": 254 }, { "epoch": 0.03803273798426489, "grad_norm": 1.5428959131240845, "learning_rate": 1.9996721283811157e-05, "loss": 0.7566, "step": 255 }, { "epoch": 0.03818188597636004, "grad_norm": 1.6424975395202637, "learning_rate": 1.9996596398538865e-05, "loss": 0.7668, "step": 256 }, { "epoch": 0.0383310339684552, "grad_norm": 1.7203565835952759, "learning_rate": 1.9996469179497045e-05, "loss": 0.7288, "step": 257 }, { "epoch": 0.03848018196055036, "grad_norm": 1.6912214756011963, "learning_rate": 1.99963396267154e-05, "loss": 0.7566, "step": 258 }, { "epoch": 0.038629329952645515, "grad_norm": 1.505427360534668, "learning_rate": 1.999620774022417e-05, "loss": 0.7211, "step": 259 }, { "epoch": 0.038778477944740666, "grad_norm": 1.3943287134170532, "learning_rate": 1.9996073520054143e-05, "loss": 0.6966, "step": 260 }, { "epoch": 0.038927625936835825, "grad_norm": 1.5639848709106445, "learning_rate": 1.999593696623666e-05, "loss": 0.7475, "step": 261 }, { "epoch": 0.03907677392893098, "grad_norm": 1.490462064743042, "learning_rate": 1.99957980788036e-05, "loss": 0.723, "step": 262 }, { "epoch": 0.03922592192102614, "grad_norm": 1.5346022844314575, "learning_rate": 1.9995656857787384e-05, "loss": 0.6107, "step": 263 }, { "epoch": 0.03937506991312129, "grad_norm": 0.9769139885902405, "learning_rate": 1.999551330322098e-05, "loss": 0.6176, "step": 264 }, { "epoch": 0.03952421790521645, "grad_norm": 1.4978301525115967, "learning_rate": 1.9995367415137906e-05, "loss": 0.6497, "step": 265 }, { "epoch": 0.03967336589731161, "grad_norm": 1.6162197589874268, "learning_rate": 1.9995219193572216e-05, "loss": 0.7296, "step": 266 }, { "epoch": 0.03982251388940676, "grad_norm": 1.7493878602981567, "learning_rate": 1.9995068638558522e-05, "loss": 0.7148, "step": 267 }, { "epoch": 0.03997166188150192, "grad_norm": 1.604631781578064, "learning_rate": 1.999491575013196e-05, "loss": 0.7478, "step": 268 }, { "epoch": 0.04012080987359708, "grad_norm": 0.9143502116203308, "learning_rate": 1.9994760528328226e-05, "loss": 0.6402, "step": 269 }, { "epoch": 0.040269957865692235, "grad_norm": 1.4994713068008423, "learning_rate": 1.999460297318357e-05, "loss": 0.6771, "step": 270 }, { "epoch": 0.04041910585778739, "grad_norm": 0.9124842882156372, "learning_rate": 1.9994443084734754e-05, "loss": 0.5914, "step": 271 }, { "epoch": 0.040568253849882545, "grad_norm": 1.600705862045288, "learning_rate": 1.999428086301912e-05, "loss": 0.6924, "step": 272 }, { "epoch": 0.0407174018419777, "grad_norm": 1.435002088546753, "learning_rate": 1.9994116308074532e-05, "loss": 0.6325, "step": 273 }, { "epoch": 0.04086654983407286, "grad_norm": 1.598605990409851, "learning_rate": 1.9993949419939412e-05, "loss": 0.7782, "step": 274 }, { "epoch": 0.04101569782616801, "grad_norm": 1.5829216241836548, "learning_rate": 1.9993780198652716e-05, "loss": 0.71, "step": 275 }, { "epoch": 0.04116484581826317, "grad_norm": 1.4694468975067139, "learning_rate": 1.9993608644253954e-05, "loss": 0.7084, "step": 276 }, { "epoch": 0.04131399381035833, "grad_norm": 1.6179119348526, "learning_rate": 1.9993434756783173e-05, "loss": 0.7469, "step": 277 }, { "epoch": 0.04146314180245349, "grad_norm": 1.5919044017791748, "learning_rate": 1.999325853628097e-05, "loss": 0.642, "step": 278 }, { "epoch": 0.04161228979454864, "grad_norm": 1.4427070617675781, "learning_rate": 1.9993079982788486e-05, "loss": 0.705, "step": 279 }, { "epoch": 0.0417614377866438, "grad_norm": 1.6810849905014038, "learning_rate": 1.9992899096347403e-05, "loss": 0.7483, "step": 280 }, { "epoch": 0.041910585778738955, "grad_norm": 1.6644163131713867, "learning_rate": 1.9992715876999953e-05, "loss": 0.7444, "step": 281 }, { "epoch": 0.04205973377083411, "grad_norm": 1.608256459236145, "learning_rate": 1.9992530324788903e-05, "loss": 0.7744, "step": 282 }, { "epoch": 0.042208881762929265, "grad_norm": 1.6687290668487549, "learning_rate": 1.999234243975758e-05, "loss": 0.6523, "step": 283 }, { "epoch": 0.04235802975502442, "grad_norm": 1.633078694343567, "learning_rate": 1.9992152221949842e-05, "loss": 0.8036, "step": 284 }, { "epoch": 0.04250717774711958, "grad_norm": 1.4474937915802002, "learning_rate": 1.99919596714101e-05, "loss": 0.7255, "step": 285 }, { "epoch": 0.04265632573921473, "grad_norm": 1.5881614685058594, "learning_rate": 1.9991764788183303e-05, "loss": 0.7402, "step": 286 }, { "epoch": 0.04280547373130989, "grad_norm": 1.4106978178024292, "learning_rate": 1.9991567572314948e-05, "loss": 0.7233, "step": 287 }, { "epoch": 0.04295462172340505, "grad_norm": 1.4634579420089722, "learning_rate": 1.9991368023851078e-05, "loss": 0.6687, "step": 288 }, { "epoch": 0.04310376971550021, "grad_norm": 1.5051909685134888, "learning_rate": 1.9991166142838276e-05, "loss": 0.6419, "step": 289 }, { "epoch": 0.04325291770759536, "grad_norm": 1.5277020931243896, "learning_rate": 1.9990961929323674e-05, "loss": 0.7297, "step": 290 }, { "epoch": 0.04340206569969052, "grad_norm": 1.5958130359649658, "learning_rate": 1.999075538335495e-05, "loss": 0.769, "step": 291 }, { "epoch": 0.043551213691785676, "grad_norm": 1.5844252109527588, "learning_rate": 1.9990546504980318e-05, "loss": 0.8003, "step": 292 }, { "epoch": 0.043700361683880834, "grad_norm": 1.4733326435089111, "learning_rate": 1.9990335294248543e-05, "loss": 0.7027, "step": 293 }, { "epoch": 0.043849509675975985, "grad_norm": 1.5161259174346924, "learning_rate": 1.999012175120894e-05, "loss": 0.7283, "step": 294 }, { "epoch": 0.043998657668071144, "grad_norm": 1.4129836559295654, "learning_rate": 1.9989905875911353e-05, "loss": 0.6848, "step": 295 }, { "epoch": 0.0441478056601663, "grad_norm": 1.4780118465423584, "learning_rate": 1.9989687668406184e-05, "loss": 0.7338, "step": 296 }, { "epoch": 0.04429695365226145, "grad_norm": 1.5172386169433594, "learning_rate": 1.998946712874438e-05, "loss": 0.6831, "step": 297 }, { "epoch": 0.04444610164435661, "grad_norm": 1.5392571687698364, "learning_rate": 1.9989244256977415e-05, "loss": 0.7426, "step": 298 }, { "epoch": 0.04459524963645177, "grad_norm": 1.4433679580688477, "learning_rate": 1.998901905315733e-05, "loss": 0.7212, "step": 299 }, { "epoch": 0.04474439762854693, "grad_norm": 0.953732430934906, "learning_rate": 1.99887915173367e-05, "loss": 0.6459, "step": 300 }, { "epoch": 0.04489354562064208, "grad_norm": 1.4539735317230225, "learning_rate": 1.9988561649568636e-05, "loss": 0.6629, "step": 301 }, { "epoch": 0.04504269361273724, "grad_norm": 1.4923568964004517, "learning_rate": 1.998832944990681e-05, "loss": 0.7008, "step": 302 }, { "epoch": 0.045191841604832396, "grad_norm": 1.4665173292160034, "learning_rate": 1.9988094918405427e-05, "loss": 0.7121, "step": 303 }, { "epoch": 0.045340989596927554, "grad_norm": 1.518250823020935, "learning_rate": 1.9987858055119243e-05, "loss": 0.731, "step": 304 }, { "epoch": 0.045490137589022706, "grad_norm": 1.7140804529190063, "learning_rate": 1.9987618860103554e-05, "loss": 0.7699, "step": 305 }, { "epoch": 0.045639285581117864, "grad_norm": 1.4344278573989868, "learning_rate": 1.9987377333414203e-05, "loss": 0.6986, "step": 306 }, { "epoch": 0.04578843357321302, "grad_norm": 1.3703956604003906, "learning_rate": 1.998713347510757e-05, "loss": 0.6882, "step": 307 }, { "epoch": 0.04593758156530818, "grad_norm": 1.5024700164794922, "learning_rate": 1.9986887285240592e-05, "loss": 0.7568, "step": 308 }, { "epoch": 0.04608672955740333, "grad_norm": 1.4860122203826904, "learning_rate": 1.998663876387074e-05, "loss": 0.7178, "step": 309 }, { "epoch": 0.04623587754949849, "grad_norm": 1.6480766534805298, "learning_rate": 1.9986387911056034e-05, "loss": 0.7759, "step": 310 }, { "epoch": 0.04638502554159365, "grad_norm": 1.5034713745117188, "learning_rate": 1.9986134726855036e-05, "loss": 0.7831, "step": 311 }, { "epoch": 0.0465341735336888, "grad_norm": 1.5134673118591309, "learning_rate": 1.9985879211326857e-05, "loss": 0.7635, "step": 312 }, { "epoch": 0.04668332152578396, "grad_norm": 1.5605199337005615, "learning_rate": 1.9985621364531144e-05, "loss": 0.748, "step": 313 }, { "epoch": 0.046832469517879116, "grad_norm": 1.3718481063842773, "learning_rate": 1.9985361186528097e-05, "loss": 0.6423, "step": 314 }, { "epoch": 0.046981617509974274, "grad_norm": 1.5439130067825317, "learning_rate": 1.9985098677378456e-05, "loss": 0.7371, "step": 315 }, { "epoch": 0.047130765502069426, "grad_norm": 1.642765760421753, "learning_rate": 1.99848338371435e-05, "loss": 0.7539, "step": 316 }, { "epoch": 0.047279913494164584, "grad_norm": 1.475643515586853, "learning_rate": 1.9984566665885064e-05, "loss": 0.584, "step": 317 }, { "epoch": 0.04742906148625974, "grad_norm": 1.4803212881088257, "learning_rate": 1.9984297163665518e-05, "loss": 0.685, "step": 318 }, { "epoch": 0.0475782094783549, "grad_norm": 1.5185027122497559, "learning_rate": 1.998402533054778e-05, "loss": 0.7217, "step": 319 }, { "epoch": 0.04772735747045005, "grad_norm": 1.7957311868667603, "learning_rate": 1.998375116659531e-05, "loss": 0.8231, "step": 320 }, { "epoch": 0.04787650546254521, "grad_norm": 1.5849497318267822, "learning_rate": 1.9983474671872112e-05, "loss": 0.7545, "step": 321 }, { "epoch": 0.04802565345464037, "grad_norm": 1.4357719421386719, "learning_rate": 1.998319584644274e-05, "loss": 0.7084, "step": 322 }, { "epoch": 0.04817480144673553, "grad_norm": 1.387579321861267, "learning_rate": 1.9982914690372282e-05, "loss": 0.6821, "step": 323 }, { "epoch": 0.04832394943883068, "grad_norm": 1.5337121486663818, "learning_rate": 1.9982631203726385e-05, "loss": 0.7127, "step": 324 }, { "epoch": 0.048473097430925836, "grad_norm": 1.6455159187316895, "learning_rate": 1.9982345386571217e-05, "loss": 0.7478, "step": 325 }, { "epoch": 0.048622245423020995, "grad_norm": 1.4124482870101929, "learning_rate": 1.9982057238973516e-05, "loss": 0.6782, "step": 326 }, { "epoch": 0.048771393415116146, "grad_norm": 1.5444083213806152, "learning_rate": 1.998176676100055e-05, "loss": 0.6427, "step": 327 }, { "epoch": 0.048920541407211304, "grad_norm": 1.3563377857208252, "learning_rate": 1.9981473952720122e-05, "loss": 0.6535, "step": 328 }, { "epoch": 0.04906968939930646, "grad_norm": 1.5892703533172607, "learning_rate": 1.9981178814200603e-05, "loss": 0.7005, "step": 329 }, { "epoch": 0.04921883739140162, "grad_norm": 1.336524486541748, "learning_rate": 1.998088134551089e-05, "loss": 0.6549, "step": 330 }, { "epoch": 0.04936798538349677, "grad_norm": 1.662729263305664, "learning_rate": 1.998058154672043e-05, "loss": 0.7666, "step": 331 }, { "epoch": 0.04951713337559193, "grad_norm": 1.4783719778060913, "learning_rate": 1.998027941789921e-05, "loss": 0.6956, "step": 332 }, { "epoch": 0.04966628136768709, "grad_norm": 1.5683987140655518, "learning_rate": 1.997997495911777e-05, "loss": 0.7867, "step": 333 }, { "epoch": 0.04981542935978225, "grad_norm": 1.5841721296310425, "learning_rate": 1.9979668170447176e-05, "loss": 0.7796, "step": 334 }, { "epoch": 0.0499645773518774, "grad_norm": 1.4431957006454468, "learning_rate": 1.9979359051959063e-05, "loss": 0.7299, "step": 335 }, { "epoch": 0.05011372534397256, "grad_norm": 1.3350664377212524, "learning_rate": 1.997904760372559e-05, "loss": 0.67, "step": 336 }, { "epoch": 0.050262873336067715, "grad_norm": 1.5165947675704956, "learning_rate": 1.997873382581947e-05, "loss": 0.6917, "step": 337 }, { "epoch": 0.050412021328162866, "grad_norm": 1.4161449670791626, "learning_rate": 1.9978417718313953e-05, "loss": 0.6867, "step": 338 }, { "epoch": 0.050561169320258025, "grad_norm": 1.5938138961791992, "learning_rate": 1.997809928128284e-05, "loss": 0.7404, "step": 339 }, { "epoch": 0.05071031731235318, "grad_norm": 1.4800961017608643, "learning_rate": 1.9977778514800462e-05, "loss": 0.7595, "step": 340 }, { "epoch": 0.05085946530444834, "grad_norm": 1.6088716983795166, "learning_rate": 1.997745541894172e-05, "loss": 0.784, "step": 341 }, { "epoch": 0.05100861329654349, "grad_norm": 1.5642619132995605, "learning_rate": 1.997712999378203e-05, "loss": 0.7725, "step": 342 }, { "epoch": 0.05115776128863865, "grad_norm": 1.4439892768859863, "learning_rate": 1.9976802239397373e-05, "loss": 0.7304, "step": 343 }, { "epoch": 0.05130690928073381, "grad_norm": 1.534026026725769, "learning_rate": 1.9976472155864258e-05, "loss": 0.7891, "step": 344 }, { "epoch": 0.05145605727282897, "grad_norm": 1.524537444114685, "learning_rate": 1.997613974325975e-05, "loss": 0.7515, "step": 345 }, { "epoch": 0.05160520526492412, "grad_norm": 1.35983145236969, "learning_rate": 1.997580500166145e-05, "loss": 0.708, "step": 346 }, { "epoch": 0.05175435325701928, "grad_norm": 1.6361169815063477, "learning_rate": 1.9975467931147512e-05, "loss": 0.7723, "step": 347 }, { "epoch": 0.051903501249114435, "grad_norm": 1.5152329206466675, "learning_rate": 1.997512853179662e-05, "loss": 0.7241, "step": 348 }, { "epoch": 0.05205264924120959, "grad_norm": 1.7356202602386475, "learning_rate": 1.997478680368801e-05, "loss": 0.7737, "step": 349 }, { "epoch": 0.052201797233304745, "grad_norm": 1.4605746269226074, "learning_rate": 1.9974442746901464e-05, "loss": 0.7447, "step": 350 }, { "epoch": 0.0523509452253999, "grad_norm": 1.429103136062622, "learning_rate": 1.9974096361517302e-05, "loss": 0.6663, "step": 351 }, { "epoch": 0.05250009321749506, "grad_norm": 1.5587043762207031, "learning_rate": 1.9973747647616387e-05, "loss": 0.7475, "step": 352 }, { "epoch": 0.05264924120959021, "grad_norm": 1.5931293964385986, "learning_rate": 1.9973396605280135e-05, "loss": 0.7936, "step": 353 }, { "epoch": 0.05279838920168537, "grad_norm": 1.5888980627059937, "learning_rate": 1.9973043234590495e-05, "loss": 0.7165, "step": 354 }, { "epoch": 0.05294753719378053, "grad_norm": 1.6030563116073608, "learning_rate": 1.9972687535629962e-05, "loss": 0.7604, "step": 355 }, { "epoch": 0.05309668518587569, "grad_norm": 1.5393563508987427, "learning_rate": 1.997232950848158e-05, "loss": 0.678, "step": 356 }, { "epoch": 0.05324583317797084, "grad_norm": 1.4526506662368774, "learning_rate": 1.9971969153228934e-05, "loss": 0.6826, "step": 357 }, { "epoch": 0.053394981170066, "grad_norm": 1.430482029914856, "learning_rate": 1.9971606469956146e-05, "loss": 0.7058, "step": 358 }, { "epoch": 0.053544129162161155, "grad_norm": 1.4082790613174438, "learning_rate": 1.997124145874789e-05, "loss": 0.6276, "step": 359 }, { "epoch": 0.053693277154256314, "grad_norm": 1.4117693901062012, "learning_rate": 1.997087411968938e-05, "loss": 0.6656, "step": 360 }, { "epoch": 0.053842425146351465, "grad_norm": 1.3124408721923828, "learning_rate": 1.997050445286637e-05, "loss": 0.7401, "step": 361 }, { "epoch": 0.05399157313844662, "grad_norm": 1.5571537017822266, "learning_rate": 1.9970132458365165e-05, "loss": 0.7382, "step": 362 }, { "epoch": 0.05414072113054178, "grad_norm": 1.416812777519226, "learning_rate": 1.9969758136272614e-05, "loss": 0.7133, "step": 363 }, { "epoch": 0.05428986912263694, "grad_norm": 1.3400804996490479, "learning_rate": 1.9969381486676092e-05, "loss": 0.6257, "step": 364 }, { "epoch": 0.05443901711473209, "grad_norm": 1.5426405668258667, "learning_rate": 1.9969002509663543e-05, "loss": 0.6811, "step": 365 }, { "epoch": 0.05458816510682725, "grad_norm": 1.3529584407806396, "learning_rate": 1.9968621205323434e-05, "loss": 0.6636, "step": 366 }, { "epoch": 0.05473731309892241, "grad_norm": 1.5180954933166504, "learning_rate": 1.9968237573744788e-05, "loss": 0.718, "step": 367 }, { "epoch": 0.05488646109101756, "grad_norm": 1.577424168586731, "learning_rate": 1.9967851615017164e-05, "loss": 0.6508, "step": 368 }, { "epoch": 0.05503560908311272, "grad_norm": 1.401491403579712, "learning_rate": 1.9967463329230665e-05, "loss": 0.6907, "step": 369 }, { "epoch": 0.055184757075207876, "grad_norm": 1.5277283191680908, "learning_rate": 1.9967072716475938e-05, "loss": 0.7521, "step": 370 }, { "epoch": 0.055333905067303034, "grad_norm": 1.3720965385437012, "learning_rate": 1.996667977684418e-05, "loss": 0.631, "step": 371 }, { "epoch": 0.055483053059398185, "grad_norm": 1.3860142230987549, "learning_rate": 1.9966284510427118e-05, "loss": 0.6654, "step": 372 }, { "epoch": 0.055632201051493343, "grad_norm": 1.50922429561615, "learning_rate": 1.9965886917317034e-05, "loss": 0.758, "step": 373 }, { "epoch": 0.0557813490435885, "grad_norm": 0.9881088137626648, "learning_rate": 1.9965486997606747e-05, "loss": 0.6243, "step": 374 }, { "epoch": 0.05593049703568366, "grad_norm": 1.5098276138305664, "learning_rate": 1.996508475138962e-05, "loss": 0.6996, "step": 375 }, { "epoch": 0.05607964502777881, "grad_norm": 1.4448219537734985, "learning_rate": 1.9964680178759565e-05, "loss": 0.6784, "step": 376 }, { "epoch": 0.05622879301987397, "grad_norm": 1.5891748666763306, "learning_rate": 1.9964273279811026e-05, "loss": 0.7079, "step": 377 }, { "epoch": 0.05637794101196913, "grad_norm": 1.6885796785354614, "learning_rate": 1.9963864054639e-05, "loss": 0.7941, "step": 378 }, { "epoch": 0.056527089004064286, "grad_norm": 0.9266157746315002, "learning_rate": 1.996345250333902e-05, "loss": 0.6254, "step": 379 }, { "epoch": 0.05667623699615944, "grad_norm": 1.5008624792099, "learning_rate": 1.996303862600717e-05, "loss": 0.7249, "step": 380 }, { "epoch": 0.056825384988254596, "grad_norm": 1.5259833335876465, "learning_rate": 1.9962622422740067e-05, "loss": 0.7147, "step": 381 }, { "epoch": 0.056974532980349754, "grad_norm": 1.4623348712921143, "learning_rate": 1.996220389363488e-05, "loss": 0.7132, "step": 382 }, { "epoch": 0.057123680972444905, "grad_norm": 1.327634334564209, "learning_rate": 1.9961783038789314e-05, "loss": 0.6302, "step": 383 }, { "epoch": 0.057272828964540064, "grad_norm": 1.474143624305725, "learning_rate": 1.9961359858301622e-05, "loss": 0.7115, "step": 384 }, { "epoch": 0.05742197695663522, "grad_norm": 1.4162652492523193, "learning_rate": 1.99609343522706e-05, "loss": 0.6206, "step": 385 }, { "epoch": 0.05757112494873038, "grad_norm": 1.572583556175232, "learning_rate": 1.9960506520795585e-05, "loss": 0.7242, "step": 386 }, { "epoch": 0.05772027294082553, "grad_norm": 1.4912124872207642, "learning_rate": 1.9960076363976454e-05, "loss": 0.7523, "step": 387 }, { "epoch": 0.05786942093292069, "grad_norm": 1.5169504880905151, "learning_rate": 1.995964388191363e-05, "loss": 0.686, "step": 388 }, { "epoch": 0.05801856892501585, "grad_norm": 1.498563528060913, "learning_rate": 1.9959209074708084e-05, "loss": 0.7134, "step": 389 }, { "epoch": 0.058167716917111006, "grad_norm": 1.499510645866394, "learning_rate": 1.995877194246132e-05, "loss": 0.6465, "step": 390 }, { "epoch": 0.05831686490920616, "grad_norm": 1.4588828086853027, "learning_rate": 1.9958332485275386e-05, "loss": 0.7138, "step": 391 }, { "epoch": 0.058466012901301316, "grad_norm": 1.6011950969696045, "learning_rate": 1.9957890703252882e-05, "loss": 0.6521, "step": 392 }, { "epoch": 0.058615160893396474, "grad_norm": 1.3632590770721436, "learning_rate": 1.9957446596496945e-05, "loss": 0.6662, "step": 393 }, { "epoch": 0.058764308885491626, "grad_norm": 1.3973928689956665, "learning_rate": 1.995700016511125e-05, "loss": 0.6432, "step": 394 }, { "epoch": 0.058913456877586784, "grad_norm": 1.5284686088562012, "learning_rate": 1.995655140920002e-05, "loss": 0.7232, "step": 395 }, { "epoch": 0.05906260486968194, "grad_norm": 1.359071969985962, "learning_rate": 1.995610032886803e-05, "loss": 0.6781, "step": 396 }, { "epoch": 0.0592117528617771, "grad_norm": 1.589806079864502, "learning_rate": 1.995564692422057e-05, "loss": 0.71, "step": 397 }, { "epoch": 0.05936090085387225, "grad_norm": 1.4379953145980835, "learning_rate": 1.9955191195363505e-05, "loss": 0.689, "step": 398 }, { "epoch": 0.05951004884596741, "grad_norm": 1.7089396715164185, "learning_rate": 1.995473314240322e-05, "loss": 0.7406, "step": 399 }, { "epoch": 0.05965919683806257, "grad_norm": 1.3907313346862793, "learning_rate": 1.9954272765446656e-05, "loss": 0.6081, "step": 400 }, { "epoch": 0.05980834483015773, "grad_norm": 1.559873342514038, "learning_rate": 1.9953810064601284e-05, "loss": 0.7266, "step": 401 }, { "epoch": 0.05995749282225288, "grad_norm": 1.2699562311172485, "learning_rate": 1.995334503997513e-05, "loss": 0.6851, "step": 402 }, { "epoch": 0.060106640814348036, "grad_norm": 1.3944945335388184, "learning_rate": 1.9952877691676754e-05, "loss": 0.7256, "step": 403 }, { "epoch": 0.060255788806443195, "grad_norm": 1.4753215312957764, "learning_rate": 1.9952408019815266e-05, "loss": 0.5983, "step": 404 }, { "epoch": 0.06040493679853835, "grad_norm": 1.4626191854476929, "learning_rate": 1.9951936024500306e-05, "loss": 0.6975, "step": 405 }, { "epoch": 0.060554084790633504, "grad_norm": 1.4448440074920654, "learning_rate": 1.9951461705842073e-05, "loss": 0.751, "step": 406 }, { "epoch": 0.06070323278272866, "grad_norm": 1.5187348127365112, "learning_rate": 1.995098506395129e-05, "loss": 0.7219, "step": 407 }, { "epoch": 0.06085238077482382, "grad_norm": 1.5211695432662964, "learning_rate": 1.9950506098939243e-05, "loss": 0.7179, "step": 408 }, { "epoch": 0.06100152876691897, "grad_norm": 1.4152644872665405, "learning_rate": 1.9950024810917745e-05, "loss": 0.6817, "step": 409 }, { "epoch": 0.06115067675901413, "grad_norm": 1.4730937480926514, "learning_rate": 1.994954119999915e-05, "loss": 0.6868, "step": 410 }, { "epoch": 0.06129982475110929, "grad_norm": 1.4570707082748413, "learning_rate": 1.994905526629637e-05, "loss": 0.6767, "step": 411 }, { "epoch": 0.06144897274320445, "grad_norm": 1.5362976789474487, "learning_rate": 1.9948567009922842e-05, "loss": 0.683, "step": 412 }, { "epoch": 0.0615981207352996, "grad_norm": 1.4322980642318726, "learning_rate": 1.9948076430992557e-05, "loss": 0.7198, "step": 413 }, { "epoch": 0.061747268727394757, "grad_norm": 1.3514204025268555, "learning_rate": 1.9947583529620038e-05, "loss": 0.7064, "step": 414 }, { "epoch": 0.061896416719489915, "grad_norm": 1.3673690557479858, "learning_rate": 1.994708830592036e-05, "loss": 0.6074, "step": 415 }, { "epoch": 0.06204556471158507, "grad_norm": 1.4490498304367065, "learning_rate": 1.9946590760009137e-05, "loss": 0.6452, "step": 416 }, { "epoch": 0.062194712703680224, "grad_norm": 1.4640109539031982, "learning_rate": 1.9946090892002524e-05, "loss": 0.7157, "step": 417 }, { "epoch": 0.06234386069577538, "grad_norm": 1.6533218622207642, "learning_rate": 1.9945588702017215e-05, "loss": 0.7762, "step": 418 }, { "epoch": 0.06249300868787054, "grad_norm": 1.4364066123962402, "learning_rate": 1.9945084190170456e-05, "loss": 0.6685, "step": 419 }, { "epoch": 0.0626421566799657, "grad_norm": 1.469367504119873, "learning_rate": 1.9944577356580023e-05, "loss": 0.6545, "step": 420 }, { "epoch": 0.06279130467206086, "grad_norm": 1.3193655014038086, "learning_rate": 1.9944068201364238e-05, "loss": 0.6848, "step": 421 }, { "epoch": 0.062940452664156, "grad_norm": 1.3906612396240234, "learning_rate": 1.9943556724641975e-05, "loss": 0.7399, "step": 422 }, { "epoch": 0.06308960065625116, "grad_norm": 0.9286881685256958, "learning_rate": 1.9943042926532634e-05, "loss": 0.5844, "step": 423 }, { "epoch": 0.06323874864834632, "grad_norm": 1.4755446910858154, "learning_rate": 1.9942526807156166e-05, "loss": 0.7329, "step": 424 }, { "epoch": 0.06338789664044148, "grad_norm": 0.9194155335426331, "learning_rate": 1.9942008366633063e-05, "loss": 0.6326, "step": 425 }, { "epoch": 0.06353704463253664, "grad_norm": 1.3949514627456665, "learning_rate": 1.994148760508436e-05, "loss": 0.6005, "step": 426 }, { "epoch": 0.0636861926246318, "grad_norm": 1.594756841659546, "learning_rate": 1.994096452263163e-05, "loss": 0.769, "step": 427 }, { "epoch": 0.06383534061672695, "grad_norm": 1.4583789110183716, "learning_rate": 1.9940439119396985e-05, "loss": 0.667, "step": 428 }, { "epoch": 0.06398448860882211, "grad_norm": 1.3322975635528564, "learning_rate": 1.9939911395503094e-05, "loss": 0.6522, "step": 429 }, { "epoch": 0.06413363660091725, "grad_norm": 1.4323118925094604, "learning_rate": 1.9939381351073153e-05, "loss": 0.6703, "step": 430 }, { "epoch": 0.06428278459301241, "grad_norm": 1.5324490070343018, "learning_rate": 1.9938848986230904e-05, "loss": 0.666, "step": 431 }, { "epoch": 0.06443193258510757, "grad_norm": 1.4571603536605835, "learning_rate": 1.993831430110063e-05, "loss": 0.6511, "step": 432 }, { "epoch": 0.06458108057720273, "grad_norm": 1.5020426511764526, "learning_rate": 1.9937777295807156e-05, "loss": 0.7667, "step": 433 }, { "epoch": 0.06473022856929789, "grad_norm": 1.6153708696365356, "learning_rate": 1.9937237970475857e-05, "loss": 0.725, "step": 434 }, { "epoch": 0.06487937656139305, "grad_norm": 0.9078688025474548, "learning_rate": 1.993669632523263e-05, "loss": 0.5727, "step": 435 }, { "epoch": 0.0650285245534882, "grad_norm": 0.9583187103271484, "learning_rate": 1.993615236020393e-05, "loss": 0.6365, "step": 436 }, { "epoch": 0.06517767254558335, "grad_norm": 1.6068991422653198, "learning_rate": 1.9935606075516754e-05, "loss": 0.7401, "step": 437 }, { "epoch": 0.0653268205376785, "grad_norm": 0.9717398285865784, "learning_rate": 1.9935057471298633e-05, "loss": 0.6206, "step": 438 }, { "epoch": 0.06547596852977366, "grad_norm": 1.5897763967514038, "learning_rate": 1.993450654767764e-05, "loss": 0.6982, "step": 439 }, { "epoch": 0.06562511652186882, "grad_norm": 1.5132336616516113, "learning_rate": 1.993395330478239e-05, "loss": 0.8286, "step": 440 }, { "epoch": 0.06577426451396398, "grad_norm": 1.2519779205322266, "learning_rate": 1.993339774274205e-05, "loss": 0.6721, "step": 441 }, { "epoch": 0.06592341250605914, "grad_norm": 1.4578869342803955, "learning_rate": 1.993283986168631e-05, "loss": 0.7374, "step": 442 }, { "epoch": 0.0660725604981543, "grad_norm": 1.4773293733596802, "learning_rate": 1.9932279661745416e-05, "loss": 0.6737, "step": 443 }, { "epoch": 0.06622170849024946, "grad_norm": 1.2991313934326172, "learning_rate": 1.9931717143050147e-05, "loss": 0.5319, "step": 444 }, { "epoch": 0.0663708564823446, "grad_norm": 1.5397777557373047, "learning_rate": 1.9931152305731828e-05, "loss": 0.6801, "step": 445 }, { "epoch": 0.06652000447443976, "grad_norm": 1.4145373106002808, "learning_rate": 1.9930585149922325e-05, "loss": 0.6379, "step": 446 }, { "epoch": 0.06666915246653492, "grad_norm": 1.4230809211730957, "learning_rate": 1.9930015675754047e-05, "loss": 0.6991, "step": 447 }, { "epoch": 0.06681830045863008, "grad_norm": 1.0263575315475464, "learning_rate": 1.9929443883359934e-05, "loss": 0.632, "step": 448 }, { "epoch": 0.06696744845072523, "grad_norm": 1.4798200130462646, "learning_rate": 1.992886977287348e-05, "loss": 0.632, "step": 449 }, { "epoch": 0.06711659644282039, "grad_norm": 1.4818575382232666, "learning_rate": 1.9928293344428714e-05, "loss": 0.705, "step": 450 }, { "epoch": 0.06726574443491555, "grad_norm": 1.356042504310608, "learning_rate": 1.9927714598160204e-05, "loss": 0.6625, "step": 451 }, { "epoch": 0.0674148924270107, "grad_norm": 1.369890809059143, "learning_rate": 1.9927133534203064e-05, "loss": 0.6876, "step": 452 }, { "epoch": 0.06756404041910585, "grad_norm": 1.4438085556030273, "learning_rate": 1.992655015269295e-05, "loss": 0.6402, "step": 453 }, { "epoch": 0.06771318841120101, "grad_norm": 1.5097054243087769, "learning_rate": 1.992596445376605e-05, "loss": 0.7235, "step": 454 }, { "epoch": 0.06786233640329617, "grad_norm": 1.2713172435760498, "learning_rate": 1.9925376437559106e-05, "loss": 0.6958, "step": 455 }, { "epoch": 0.06801148439539133, "grad_norm": 1.627169132232666, "learning_rate": 1.992478610420939e-05, "loss": 0.7416, "step": 456 }, { "epoch": 0.06816063238748649, "grad_norm": 1.4097493886947632, "learning_rate": 1.992419345385472e-05, "loss": 0.6163, "step": 457 }, { "epoch": 0.06830978037958164, "grad_norm": 1.3728270530700684, "learning_rate": 1.992359848663345e-05, "loss": 0.6905, "step": 458 }, { "epoch": 0.0684589283716768, "grad_norm": 1.357750654220581, "learning_rate": 1.992300120268449e-05, "loss": 0.6507, "step": 459 }, { "epoch": 0.06860807636377195, "grad_norm": 1.4132955074310303, "learning_rate": 1.9922401602147266e-05, "loss": 0.7079, "step": 460 }, { "epoch": 0.0687572243558671, "grad_norm": 1.6018445491790771, "learning_rate": 1.992179968516177e-05, "loss": 0.7384, "step": 461 }, { "epoch": 0.06890637234796226, "grad_norm": 1.5601996183395386, "learning_rate": 1.9921195451868514e-05, "loss": 0.7038, "step": 462 }, { "epoch": 0.06905552034005742, "grad_norm": 1.3391180038452148, "learning_rate": 1.9920588902408567e-05, "loss": 0.6908, "step": 463 }, { "epoch": 0.06920466833215258, "grad_norm": 1.3740544319152832, "learning_rate": 1.991998003692353e-05, "loss": 0.7419, "step": 464 }, { "epoch": 0.06935381632424774, "grad_norm": 1.3112659454345703, "learning_rate": 1.9919368855555546e-05, "loss": 0.6708, "step": 465 }, { "epoch": 0.0695029643163429, "grad_norm": 1.490883469581604, "learning_rate": 1.9918755358447298e-05, "loss": 0.6098, "step": 466 }, { "epoch": 0.06965211230843804, "grad_norm": 1.552575707435608, "learning_rate": 1.991813954574201e-05, "loss": 0.7192, "step": 467 }, { "epoch": 0.0698012603005332, "grad_norm": 1.4074641466140747, "learning_rate": 1.9917521417583456e-05, "loss": 0.6514, "step": 468 }, { "epoch": 0.06995040829262836, "grad_norm": 1.4167221784591675, "learning_rate": 1.9916900974115932e-05, "loss": 0.6407, "step": 469 }, { "epoch": 0.07009955628472352, "grad_norm": 1.3386727571487427, "learning_rate": 1.9916278215484288e-05, "loss": 0.6463, "step": 470 }, { "epoch": 0.07024870427681867, "grad_norm": 1.4450346231460571, "learning_rate": 1.991565314183391e-05, "loss": 0.6969, "step": 471 }, { "epoch": 0.07039785226891383, "grad_norm": 0.9034586548805237, "learning_rate": 1.9915025753310727e-05, "loss": 0.607, "step": 472 }, { "epoch": 0.07054700026100899, "grad_norm": 1.5095510482788086, "learning_rate": 1.9914396050061212e-05, "loss": 0.7191, "step": 473 }, { "epoch": 0.07069614825310415, "grad_norm": 1.4253267049789429, "learning_rate": 1.9913764032232362e-05, "loss": 0.6654, "step": 474 }, { "epoch": 0.0708452962451993, "grad_norm": 1.4287805557250977, "learning_rate": 1.991312969997173e-05, "loss": 0.6645, "step": 475 }, { "epoch": 0.07099444423729445, "grad_norm": 1.5091814994812012, "learning_rate": 1.991249305342741e-05, "loss": 0.651, "step": 476 }, { "epoch": 0.07114359222938961, "grad_norm": 1.3835203647613525, "learning_rate": 1.9911854092748023e-05, "loss": 0.7111, "step": 477 }, { "epoch": 0.07129274022148477, "grad_norm": 1.3509793281555176, "learning_rate": 1.9911212818082746e-05, "loss": 0.6876, "step": 478 }, { "epoch": 0.07144188821357993, "grad_norm": 1.418982744216919, "learning_rate": 1.9910569229581288e-05, "loss": 0.6645, "step": 479 }, { "epoch": 0.07159103620567508, "grad_norm": 1.4616916179656982, "learning_rate": 1.990992332739389e-05, "loss": 0.6899, "step": 480 }, { "epoch": 0.07174018419777024, "grad_norm": 1.0137674808502197, "learning_rate": 1.9909275111671354e-05, "loss": 0.6424, "step": 481 }, { "epoch": 0.07188933218986539, "grad_norm": 0.9747961163520813, "learning_rate": 1.9908624582565002e-05, "loss": 0.6349, "step": 482 }, { "epoch": 0.07203848018196055, "grad_norm": 1.4299708604812622, "learning_rate": 1.9907971740226708e-05, "loss": 0.6231, "step": 483 }, { "epoch": 0.0721876281740557, "grad_norm": 1.3647220134735107, "learning_rate": 1.990731658480888e-05, "loss": 0.6442, "step": 484 }, { "epoch": 0.07233677616615086, "grad_norm": 1.3832165002822876, "learning_rate": 1.9906659116464467e-05, "loss": 0.6722, "step": 485 }, { "epoch": 0.07248592415824602, "grad_norm": 1.441433310508728, "learning_rate": 1.9905999335346967e-05, "loss": 0.716, "step": 486 }, { "epoch": 0.07263507215034118, "grad_norm": 1.3555541038513184, "learning_rate": 1.99053372416104e-05, "loss": 0.729, "step": 487 }, { "epoch": 0.07278422014243634, "grad_norm": 1.5130459070205688, "learning_rate": 1.990467283540934e-05, "loss": 0.7531, "step": 488 }, { "epoch": 0.0729333681345315, "grad_norm": 1.3544138669967651, "learning_rate": 1.9904006116898903e-05, "loss": 0.7057, "step": 489 }, { "epoch": 0.07308251612662664, "grad_norm": 1.4001933336257935, "learning_rate": 1.990333708623473e-05, "loss": 0.7031, "step": 490 }, { "epoch": 0.0732316641187218, "grad_norm": 1.398870825767517, "learning_rate": 1.9902665743573012e-05, "loss": 0.6675, "step": 491 }, { "epoch": 0.07338081211081696, "grad_norm": 1.4098583459854126, "learning_rate": 1.9901992089070483e-05, "loss": 0.6667, "step": 492 }, { "epoch": 0.07352996010291211, "grad_norm": 1.411223292350769, "learning_rate": 1.9901316122884405e-05, "loss": 0.6852, "step": 493 }, { "epoch": 0.07367910809500727, "grad_norm": 1.4521292448043823, "learning_rate": 1.9900637845172594e-05, "loss": 0.6395, "step": 494 }, { "epoch": 0.07382825608710243, "grad_norm": 1.4672410488128662, "learning_rate": 1.9899957256093393e-05, "loss": 0.6269, "step": 495 }, { "epoch": 0.07397740407919759, "grad_norm": 1.34549081325531, "learning_rate": 1.989927435580569e-05, "loss": 0.6265, "step": 496 }, { "epoch": 0.07412655207129273, "grad_norm": 1.4881229400634766, "learning_rate": 1.9898589144468916e-05, "loss": 0.7047, "step": 497 }, { "epoch": 0.07427570006338789, "grad_norm": 1.3553810119628906, "learning_rate": 1.9897901622243038e-05, "loss": 0.7186, "step": 498 }, { "epoch": 0.07442484805548305, "grad_norm": 1.4388889074325562, "learning_rate": 1.9897211789288556e-05, "loss": 0.7829, "step": 499 }, { "epoch": 0.07457399604757821, "grad_norm": 1.34690523147583, "learning_rate": 1.989651964576653e-05, "loss": 0.6478, "step": 500 }, { "epoch": 0.07472314403967337, "grad_norm": 1.44814133644104, "learning_rate": 1.9895825191838524e-05, "loss": 0.6664, "step": 501 }, { "epoch": 0.07487229203176853, "grad_norm": 1.3790717124938965, "learning_rate": 1.989512842766668e-05, "loss": 0.7647, "step": 502 }, { "epoch": 0.07502144002386368, "grad_norm": 1.1913613080978394, "learning_rate": 1.989442935341366e-05, "loss": 0.615, "step": 503 }, { "epoch": 0.07517058801595884, "grad_norm": 1.3310141563415527, "learning_rate": 1.9893727969242657e-05, "loss": 0.5981, "step": 504 }, { "epoch": 0.07531973600805399, "grad_norm": 1.3407601118087769, "learning_rate": 1.9893024275317424e-05, "loss": 0.6554, "step": 505 }, { "epoch": 0.07546888400014914, "grad_norm": 1.564221739768982, "learning_rate": 1.989231827180224e-05, "loss": 0.691, "step": 506 }, { "epoch": 0.0756180319922443, "grad_norm": 1.3603754043579102, "learning_rate": 1.9891609958861926e-05, "loss": 0.6854, "step": 507 }, { "epoch": 0.07576717998433946, "grad_norm": 1.4390084743499756, "learning_rate": 1.989089933666184e-05, "loss": 0.7363, "step": 508 }, { "epoch": 0.07591632797643462, "grad_norm": 1.5135220289230347, "learning_rate": 1.9890186405367884e-05, "loss": 0.8167, "step": 509 }, { "epoch": 0.07606547596852978, "grad_norm": 1.4937891960144043, "learning_rate": 1.9889471165146495e-05, "loss": 0.696, "step": 510 }, { "epoch": 0.07621462396062494, "grad_norm": 1.4724220037460327, "learning_rate": 1.988875361616465e-05, "loss": 0.7214, "step": 511 }, { "epoch": 0.07636377195272008, "grad_norm": 1.2966573238372803, "learning_rate": 1.988803375858987e-05, "loss": 0.6816, "step": 512 }, { "epoch": 0.07651291994481524, "grad_norm": 1.4509222507476807, "learning_rate": 1.9887311592590205e-05, "loss": 0.7002, "step": 513 }, { "epoch": 0.0766620679369104, "grad_norm": 1.416860580444336, "learning_rate": 1.9886587118334248e-05, "loss": 0.687, "step": 514 }, { "epoch": 0.07681121592900556, "grad_norm": 1.2932041883468628, "learning_rate": 1.9885860335991136e-05, "loss": 0.6407, "step": 515 }, { "epoch": 0.07696036392110071, "grad_norm": 1.3358943462371826, "learning_rate": 1.988513124573054e-05, "loss": 0.7119, "step": 516 }, { "epoch": 0.07710951191319587, "grad_norm": 1.3129233121871948, "learning_rate": 1.9884399847722676e-05, "loss": 0.6669, "step": 517 }, { "epoch": 0.07725865990529103, "grad_norm": 1.5207852125167847, "learning_rate": 1.9883666142138282e-05, "loss": 0.7676, "step": 518 }, { "epoch": 0.07740780789738619, "grad_norm": 1.6633069515228271, "learning_rate": 1.9882930129148653e-05, "loss": 0.7105, "step": 519 }, { "epoch": 0.07755695588948133, "grad_norm": 1.4260364770889282, "learning_rate": 1.988219180892562e-05, "loss": 0.6477, "step": 520 }, { "epoch": 0.07770610388157649, "grad_norm": 1.3805980682373047, "learning_rate": 1.9881451181641542e-05, "loss": 0.751, "step": 521 }, { "epoch": 0.07785525187367165, "grad_norm": 1.290549397468567, "learning_rate": 1.9880708247469328e-05, "loss": 0.7118, "step": 522 }, { "epoch": 0.07800439986576681, "grad_norm": 1.3223586082458496, "learning_rate": 1.9879963006582413e-05, "loss": 0.6356, "step": 523 }, { "epoch": 0.07815354785786197, "grad_norm": 1.3826876878738403, "learning_rate": 1.9879215459154787e-05, "loss": 0.6728, "step": 524 }, { "epoch": 0.07830269584995712, "grad_norm": 1.2269251346588135, "learning_rate": 1.9878465605360963e-05, "loss": 0.6373, "step": 525 }, { "epoch": 0.07845184384205228, "grad_norm": 1.3211681842803955, "learning_rate": 1.9877713445376005e-05, "loss": 0.6811, "step": 526 }, { "epoch": 0.07860099183414743, "grad_norm": 1.351428508758545, "learning_rate": 1.9876958979375507e-05, "loss": 0.6985, "step": 527 }, { "epoch": 0.07875013982624259, "grad_norm": 1.4462181329727173, "learning_rate": 1.98762022075356e-05, "loss": 0.6136, "step": 528 }, { "epoch": 0.07889928781833774, "grad_norm": 1.4354945421218872, "learning_rate": 1.9875443130032968e-05, "loss": 0.7172, "step": 529 }, { "epoch": 0.0790484358104329, "grad_norm": 1.6366629600524902, "learning_rate": 1.987468174704481e-05, "loss": 0.7858, "step": 530 }, { "epoch": 0.07919758380252806, "grad_norm": 1.2945959568023682, "learning_rate": 1.9873918058748886e-05, "loss": 0.6856, "step": 531 }, { "epoch": 0.07934673179462322, "grad_norm": 1.4620195627212524, "learning_rate": 1.9873152065323476e-05, "loss": 0.6403, "step": 532 }, { "epoch": 0.07949587978671838, "grad_norm": 1.4839584827423096, "learning_rate": 1.987238376694741e-05, "loss": 0.6555, "step": 533 }, { "epoch": 0.07964502777881352, "grad_norm": 1.5114924907684326, "learning_rate": 1.987161316380005e-05, "loss": 0.7609, "step": 534 }, { "epoch": 0.07979417577090868, "grad_norm": 1.401930809020996, "learning_rate": 1.98708402560613e-05, "loss": 0.6313, "step": 535 }, { "epoch": 0.07994332376300384, "grad_norm": 1.376880168914795, "learning_rate": 1.9870065043911603e-05, "loss": 0.7574, "step": 536 }, { "epoch": 0.080092471755099, "grad_norm": 1.4327081441879272, "learning_rate": 1.986928752753193e-05, "loss": 0.6728, "step": 537 }, { "epoch": 0.08024161974719415, "grad_norm": 1.3105218410491943, "learning_rate": 1.9868507707103806e-05, "loss": 0.6682, "step": 538 }, { "epoch": 0.08039076773928931, "grad_norm": 1.3209797143936157, "learning_rate": 1.9867725582809278e-05, "loss": 0.6129, "step": 539 }, { "epoch": 0.08053991573138447, "grad_norm": 1.4043629169464111, "learning_rate": 1.986694115483094e-05, "loss": 0.576, "step": 540 }, { "epoch": 0.08068906372347963, "grad_norm": 1.2837392091751099, "learning_rate": 1.9866154423351923e-05, "loss": 0.6613, "step": 541 }, { "epoch": 0.08083821171557477, "grad_norm": 1.4034463167190552, "learning_rate": 1.9865365388555896e-05, "loss": 0.6747, "step": 542 }, { "epoch": 0.08098735970766993, "grad_norm": 1.29253351688385, "learning_rate": 1.986457405062706e-05, "loss": 0.7137, "step": 543 }, { "epoch": 0.08113650769976509, "grad_norm": 1.1478804349899292, "learning_rate": 1.986378040975016e-05, "loss": 0.6541, "step": 544 }, { "epoch": 0.08128565569186025, "grad_norm": 1.4917393922805786, "learning_rate": 1.9862984466110476e-05, "loss": 0.7647, "step": 545 }, { "epoch": 0.0814348036839554, "grad_norm": 1.4741984605789185, "learning_rate": 1.9862186219893825e-05, "loss": 0.7022, "step": 546 }, { "epoch": 0.08158395167605056, "grad_norm": 1.53390371799469, "learning_rate": 1.9861385671286565e-05, "loss": 0.6647, "step": 547 }, { "epoch": 0.08173309966814572, "grad_norm": 1.4157136678695679, "learning_rate": 1.9860582820475593e-05, "loss": 0.7054, "step": 548 }, { "epoch": 0.08188224766024087, "grad_norm": 1.451453447341919, "learning_rate": 1.9859777667648326e-05, "loss": 0.7454, "step": 549 }, { "epoch": 0.08203139565233603, "grad_norm": 1.4964988231658936, "learning_rate": 1.985897021299275e-05, "loss": 0.7334, "step": 550 }, { "epoch": 0.08218054364443118, "grad_norm": 1.3696935176849365, "learning_rate": 1.985816045669736e-05, "loss": 0.6494, "step": 551 }, { "epoch": 0.08232969163652634, "grad_norm": 1.5071958303451538, "learning_rate": 1.98573483989512e-05, "loss": 0.8213, "step": 552 }, { "epoch": 0.0824788396286215, "grad_norm": 0.9122848510742188, "learning_rate": 1.985653403994385e-05, "loss": 0.6453, "step": 553 }, { "epoch": 0.08262798762071666, "grad_norm": 1.437438726425171, "learning_rate": 1.9855717379865424e-05, "loss": 0.734, "step": 554 }, { "epoch": 0.08277713561281182, "grad_norm": 1.3649293184280396, "learning_rate": 1.9854898418906585e-05, "loss": 0.6842, "step": 555 }, { "epoch": 0.08292628360490698, "grad_norm": 0.8457880020141602, "learning_rate": 1.985407715725852e-05, "loss": 0.5879, "step": 556 }, { "epoch": 0.08307543159700212, "grad_norm": 0.8851473331451416, "learning_rate": 1.9853253595112955e-05, "loss": 0.6262, "step": 557 }, { "epoch": 0.08322457958909728, "grad_norm": 1.346664547920227, "learning_rate": 1.985242773266216e-05, "loss": 0.676, "step": 558 }, { "epoch": 0.08337372758119244, "grad_norm": 1.4687330722808838, "learning_rate": 1.985159957009894e-05, "loss": 0.6886, "step": 559 }, { "epoch": 0.0835228755732876, "grad_norm": 1.3652839660644531, "learning_rate": 1.985076910761663e-05, "loss": 0.7122, "step": 560 }, { "epoch": 0.08367202356538275, "grad_norm": 1.575640320777893, "learning_rate": 1.9849936345409105e-05, "loss": 0.7961, "step": 561 }, { "epoch": 0.08382117155747791, "grad_norm": 1.4297093152999878, "learning_rate": 1.9849101283670787e-05, "loss": 0.7402, "step": 562 }, { "epoch": 0.08397031954957307, "grad_norm": 1.3761506080627441, "learning_rate": 1.9848263922596617e-05, "loss": 0.6961, "step": 563 }, { "epoch": 0.08411946754166821, "grad_norm": 1.41793954372406, "learning_rate": 1.9847424262382087e-05, "loss": 0.7112, "step": 564 }, { "epoch": 0.08426861553376337, "grad_norm": 1.5674684047698975, "learning_rate": 1.9846582303223224e-05, "loss": 0.7871, "step": 565 }, { "epoch": 0.08441776352585853, "grad_norm": 1.3514225482940674, "learning_rate": 1.9845738045316584e-05, "loss": 0.7137, "step": 566 }, { "epoch": 0.08456691151795369, "grad_norm": 1.4870209693908691, "learning_rate": 1.9844891488859267e-05, "loss": 0.6786, "step": 567 }, { "epoch": 0.08471605951004885, "grad_norm": 1.3521257638931274, "learning_rate": 1.9844042634048905e-05, "loss": 0.6632, "step": 568 }, { "epoch": 0.084865207502144, "grad_norm": 1.4606928825378418, "learning_rate": 1.984319148108367e-05, "loss": 0.7471, "step": 569 }, { "epoch": 0.08501435549423916, "grad_norm": 1.4577715396881104, "learning_rate": 1.9842338030162273e-05, "loss": 0.7914, "step": 570 }, { "epoch": 0.08516350348633432, "grad_norm": 1.4870964288711548, "learning_rate": 1.9841482281483946e-05, "loss": 0.7037, "step": 571 }, { "epoch": 0.08531265147842947, "grad_norm": 1.3589367866516113, "learning_rate": 1.984062423524848e-05, "loss": 0.5749, "step": 572 }, { "epoch": 0.08546179947052462, "grad_norm": 1.6226365566253662, "learning_rate": 1.9839763891656186e-05, "loss": 0.7776, "step": 573 }, { "epoch": 0.08561094746261978, "grad_norm": 1.2981324195861816, "learning_rate": 1.9838901250907924e-05, "loss": 0.6913, "step": 574 }, { "epoch": 0.08576009545471494, "grad_norm": 1.424639105796814, "learning_rate": 1.9838036313205073e-05, "loss": 0.7251, "step": 575 }, { "epoch": 0.0859092434468101, "grad_norm": 1.292096495628357, "learning_rate": 1.9837169078749567e-05, "loss": 0.6749, "step": 576 }, { "epoch": 0.08605839143890526, "grad_norm": 1.3551112413406372, "learning_rate": 1.9836299547743856e-05, "loss": 0.7203, "step": 577 }, { "epoch": 0.08620753943100042, "grad_norm": 1.452678918838501, "learning_rate": 1.983542772039095e-05, "loss": 0.7083, "step": 578 }, { "epoch": 0.08635668742309556, "grad_norm": 1.4596916437149048, "learning_rate": 1.9834553596894377e-05, "loss": 0.7152, "step": 579 }, { "epoch": 0.08650583541519072, "grad_norm": 1.3097752332687378, "learning_rate": 1.9833677177458207e-05, "loss": 0.6672, "step": 580 }, { "epoch": 0.08665498340728588, "grad_norm": 1.646790623664856, "learning_rate": 1.9832798462287047e-05, "loss": 0.7808, "step": 581 }, { "epoch": 0.08680413139938103, "grad_norm": 1.3622236251831055, "learning_rate": 1.9831917451586036e-05, "loss": 0.6093, "step": 582 }, { "epoch": 0.0869532793914762, "grad_norm": 1.4442791938781738, "learning_rate": 1.9831034145560854e-05, "loss": 0.7325, "step": 583 }, { "epoch": 0.08710242738357135, "grad_norm": 1.389676809310913, "learning_rate": 1.983014854441771e-05, "loss": 0.7126, "step": 584 }, { "epoch": 0.08725157537566651, "grad_norm": 1.4169607162475586, "learning_rate": 1.9829260648363366e-05, "loss": 0.5754, "step": 585 }, { "epoch": 0.08740072336776167, "grad_norm": 1.418156385421753, "learning_rate": 1.982837045760509e-05, "loss": 0.6915, "step": 586 }, { "epoch": 0.08754987135985681, "grad_norm": 1.4895095825195312, "learning_rate": 1.9827477972350713e-05, "loss": 0.6877, "step": 587 }, { "epoch": 0.08769901935195197, "grad_norm": 1.4068762063980103, "learning_rate": 1.982658319280859e-05, "loss": 0.6517, "step": 588 }, { "epoch": 0.08784816734404713, "grad_norm": 1.465804100036621, "learning_rate": 1.9825686119187613e-05, "loss": 0.6924, "step": 589 }, { "epoch": 0.08799731533614229, "grad_norm": 1.3557233810424805, "learning_rate": 1.9824786751697206e-05, "loss": 0.6835, "step": 590 }, { "epoch": 0.08814646332823745, "grad_norm": 1.403544306755066, "learning_rate": 1.9823885090547338e-05, "loss": 0.6703, "step": 591 }, { "epoch": 0.0882956113203326, "grad_norm": 1.4711475372314453, "learning_rate": 1.98229811359485e-05, "loss": 0.6446, "step": 592 }, { "epoch": 0.08844475931242776, "grad_norm": 1.4785858392715454, "learning_rate": 1.9822074888111738e-05, "loss": 0.7026, "step": 593 }, { "epoch": 0.0885939073045229, "grad_norm": 1.380642056465149, "learning_rate": 1.9821166347248607e-05, "loss": 0.7152, "step": 594 }, { "epoch": 0.08874305529661806, "grad_norm": 1.5329349040985107, "learning_rate": 1.9820255513571222e-05, "loss": 0.787, "step": 595 }, { "epoch": 0.08889220328871322, "grad_norm": 1.4418367147445679, "learning_rate": 1.981934238729222e-05, "loss": 0.6476, "step": 596 }, { "epoch": 0.08904135128080838, "grad_norm": 1.2764959335327148, "learning_rate": 1.9818426968624772e-05, "loss": 0.5813, "step": 597 }, { "epoch": 0.08919049927290354, "grad_norm": 1.3263152837753296, "learning_rate": 1.9817509257782595e-05, "loss": 0.6686, "step": 598 }, { "epoch": 0.0893396472649987, "grad_norm": 1.4063124656677246, "learning_rate": 1.9816589254979932e-05, "loss": 0.7448, "step": 599 }, { "epoch": 0.08948879525709386, "grad_norm": 0.9709582924842834, "learning_rate": 1.981566696043156e-05, "loss": 0.6321, "step": 600 }, { "epoch": 0.08963794324918901, "grad_norm": 1.4408869743347168, "learning_rate": 1.98147423743528e-05, "loss": 0.8181, "step": 601 }, { "epoch": 0.08978709124128416, "grad_norm": 1.4252870082855225, "learning_rate": 1.98138154969595e-05, "loss": 0.8385, "step": 602 }, { "epoch": 0.08993623923337932, "grad_norm": 1.421216368675232, "learning_rate": 1.9812886328468047e-05, "loss": 0.7252, "step": 603 }, { "epoch": 0.09008538722547448, "grad_norm": 1.5908063650131226, "learning_rate": 1.981195486909536e-05, "loss": 0.6755, "step": 604 }, { "epoch": 0.09023453521756963, "grad_norm": 1.313175082206726, "learning_rate": 1.981102111905889e-05, "loss": 0.6685, "step": 605 }, { "epoch": 0.09038368320966479, "grad_norm": 1.4039186239242554, "learning_rate": 1.981008507857664e-05, "loss": 0.7203, "step": 606 }, { "epoch": 0.09053283120175995, "grad_norm": 1.3813837766647339, "learning_rate": 1.9809146747867116e-05, "loss": 0.7677, "step": 607 }, { "epoch": 0.09068197919385511, "grad_norm": 1.382076621055603, "learning_rate": 1.980820612714939e-05, "loss": 0.6411, "step": 608 }, { "epoch": 0.09083112718595025, "grad_norm": 1.3834515810012817, "learning_rate": 1.9807263216643053e-05, "loss": 0.7498, "step": 609 }, { "epoch": 0.09098027517804541, "grad_norm": 1.3986544609069824, "learning_rate": 1.9806318016568235e-05, "loss": 0.7271, "step": 610 }, { "epoch": 0.09112942317014057, "grad_norm": 1.4248244762420654, "learning_rate": 1.98053705271456e-05, "loss": 0.7542, "step": 611 }, { "epoch": 0.09127857116223573, "grad_norm": 1.244462490081787, "learning_rate": 1.980442074859634e-05, "loss": 0.6628, "step": 612 }, { "epoch": 0.09142771915433089, "grad_norm": 1.6059437990188599, "learning_rate": 1.980346868114219e-05, "loss": 0.691, "step": 613 }, { "epoch": 0.09157686714642604, "grad_norm": 1.4226166009902954, "learning_rate": 1.9802514325005414e-05, "loss": 0.6563, "step": 614 }, { "epoch": 0.0917260151385212, "grad_norm": 1.4865089654922485, "learning_rate": 1.980155768040882e-05, "loss": 0.7577, "step": 615 }, { "epoch": 0.09187516313061636, "grad_norm": 1.3345626592636108, "learning_rate": 1.9800598747575734e-05, "loss": 0.6281, "step": 616 }, { "epoch": 0.0920243111227115, "grad_norm": 1.3329777717590332, "learning_rate": 1.9799637526730027e-05, "loss": 0.6503, "step": 617 }, { "epoch": 0.09217345911480666, "grad_norm": 1.31232750415802, "learning_rate": 1.9798674018096106e-05, "loss": 0.6663, "step": 618 }, { "epoch": 0.09232260710690182, "grad_norm": 1.3917348384857178, "learning_rate": 1.9797708221898906e-05, "loss": 0.6275, "step": 619 }, { "epoch": 0.09247175509899698, "grad_norm": 1.4341371059417725, "learning_rate": 1.97967401383639e-05, "loss": 0.6422, "step": 620 }, { "epoch": 0.09262090309109214, "grad_norm": 1.2797279357910156, "learning_rate": 1.9795769767717087e-05, "loss": 0.7399, "step": 621 }, { "epoch": 0.0927700510831873, "grad_norm": 1.2982444763183594, "learning_rate": 1.9794797110185015e-05, "loss": 0.6294, "step": 622 }, { "epoch": 0.09291919907528245, "grad_norm": 1.3231711387634277, "learning_rate": 1.979382216599475e-05, "loss": 0.7157, "step": 623 }, { "epoch": 0.0930683470673776, "grad_norm": 1.3612334728240967, "learning_rate": 1.9792844935373905e-05, "loss": 0.6685, "step": 624 }, { "epoch": 0.09321749505947276, "grad_norm": 1.3421387672424316, "learning_rate": 1.9791865418550618e-05, "loss": 0.63, "step": 625 }, { "epoch": 0.09336664305156792, "grad_norm": 1.3504610061645508, "learning_rate": 1.979088361575356e-05, "loss": 0.6877, "step": 626 }, { "epoch": 0.09351579104366307, "grad_norm": 1.3385058641433716, "learning_rate": 1.9789899527211943e-05, "loss": 0.7454, "step": 627 }, { "epoch": 0.09366493903575823, "grad_norm": 0.9490055441856384, "learning_rate": 1.9788913153155512e-05, "loss": 0.6505, "step": 628 }, { "epoch": 0.09381408702785339, "grad_norm": 1.3354918956756592, "learning_rate": 1.9787924493814533e-05, "loss": 0.6627, "step": 629 }, { "epoch": 0.09396323501994855, "grad_norm": 1.4169657230377197, "learning_rate": 1.9786933549419826e-05, "loss": 0.7038, "step": 630 }, { "epoch": 0.09411238301204371, "grad_norm": 1.5382839441299438, "learning_rate": 1.9785940320202726e-05, "loss": 0.761, "step": 631 }, { "epoch": 0.09426153100413885, "grad_norm": 0.9157195091247559, "learning_rate": 1.978494480639511e-05, "loss": 0.6429, "step": 632 }, { "epoch": 0.09441067899623401, "grad_norm": 1.2479710578918457, "learning_rate": 1.9783947008229387e-05, "loss": 0.5882, "step": 633 }, { "epoch": 0.09455982698832917, "grad_norm": 1.407273530960083, "learning_rate": 1.97829469259385e-05, "loss": 0.7508, "step": 634 }, { "epoch": 0.09470897498042433, "grad_norm": 1.6132398843765259, "learning_rate": 1.9781944559755924e-05, "loss": 0.6914, "step": 635 }, { "epoch": 0.09485812297251948, "grad_norm": 1.4635274410247803, "learning_rate": 1.9780939909915666e-05, "loss": 0.7742, "step": 636 }, { "epoch": 0.09500727096461464, "grad_norm": 1.2261035442352295, "learning_rate": 1.9779932976652272e-05, "loss": 0.6526, "step": 637 }, { "epoch": 0.0951564189567098, "grad_norm": 1.4382413625717163, "learning_rate": 1.977892376020082e-05, "loss": 0.7685, "step": 638 }, { "epoch": 0.09530556694880495, "grad_norm": 1.2554737329483032, "learning_rate": 1.977791226079691e-05, "loss": 0.6302, "step": 639 }, { "epoch": 0.0954547149409001, "grad_norm": 1.4683496952056885, "learning_rate": 1.9776898478676684e-05, "loss": 0.6863, "step": 640 }, { "epoch": 0.09560386293299526, "grad_norm": 1.3476452827453613, "learning_rate": 1.9775882414076822e-05, "loss": 0.6501, "step": 641 }, { "epoch": 0.09575301092509042, "grad_norm": 1.3334113359451294, "learning_rate": 1.9774864067234525e-05, "loss": 0.6717, "step": 642 }, { "epoch": 0.09590215891718558, "grad_norm": 1.2692784070968628, "learning_rate": 1.9773843438387534e-05, "loss": 0.6966, "step": 643 }, { "epoch": 0.09605130690928074, "grad_norm": 1.4701213836669922, "learning_rate": 1.9772820527774127e-05, "loss": 0.6478, "step": 644 }, { "epoch": 0.0962004549013759, "grad_norm": 1.18522047996521, "learning_rate": 1.9771795335633098e-05, "loss": 0.5936, "step": 645 }, { "epoch": 0.09634960289347105, "grad_norm": 1.278627634048462, "learning_rate": 1.9770767862203795e-05, "loss": 0.7085, "step": 646 }, { "epoch": 0.0964987508855662, "grad_norm": 1.4272873401641846, "learning_rate": 1.976973810772608e-05, "loss": 0.6835, "step": 647 }, { "epoch": 0.09664789887766136, "grad_norm": 1.4288580417633057, "learning_rate": 1.976870607244036e-05, "loss": 0.689, "step": 648 }, { "epoch": 0.09679704686975651, "grad_norm": 1.432155728340149, "learning_rate": 1.9767671756587577e-05, "loss": 0.7694, "step": 649 }, { "epoch": 0.09694619486185167, "grad_norm": 1.4689449071884155, "learning_rate": 1.9766635160409186e-05, "loss": 0.6822, "step": 650 }, { "epoch": 0.09709534285394683, "grad_norm": 1.3712331056594849, "learning_rate": 1.9765596284147192e-05, "loss": 0.679, "step": 651 }, { "epoch": 0.09724449084604199, "grad_norm": 1.5921995639801025, "learning_rate": 1.9764555128044128e-05, "loss": 0.7202, "step": 652 }, { "epoch": 0.09739363883813715, "grad_norm": 1.5189801454544067, "learning_rate": 1.9763511692343062e-05, "loss": 0.7546, "step": 653 }, { "epoch": 0.09754278683023229, "grad_norm": 1.4728947877883911, "learning_rate": 1.9762465977287587e-05, "loss": 0.7338, "step": 654 }, { "epoch": 0.09769193482232745, "grad_norm": 1.3383415937423706, "learning_rate": 1.976141798312183e-05, "loss": 0.6836, "step": 655 }, { "epoch": 0.09784108281442261, "grad_norm": 1.2036539316177368, "learning_rate": 1.976036771009046e-05, "loss": 0.6303, "step": 656 }, { "epoch": 0.09799023080651777, "grad_norm": 1.3923852443695068, "learning_rate": 1.9759315158438658e-05, "loss": 0.7243, "step": 657 }, { "epoch": 0.09813937879861293, "grad_norm": 1.3831360340118408, "learning_rate": 1.9758260328412154e-05, "loss": 0.7205, "step": 658 }, { "epoch": 0.09828852679070808, "grad_norm": 1.3895988464355469, "learning_rate": 1.975720322025721e-05, "loss": 0.7393, "step": 659 }, { "epoch": 0.09843767478280324, "grad_norm": 1.3852249383926392, "learning_rate": 1.975614383422061e-05, "loss": 0.7689, "step": 660 }, { "epoch": 0.09858682277489839, "grad_norm": 1.026003122329712, "learning_rate": 1.9755082170549675e-05, "loss": 0.6304, "step": 661 }, { "epoch": 0.09873597076699354, "grad_norm": 1.3526712656021118, "learning_rate": 1.9754018229492254e-05, "loss": 0.6894, "step": 662 }, { "epoch": 0.0988851187590887, "grad_norm": 1.4950733184814453, "learning_rate": 1.975295201129674e-05, "loss": 0.7889, "step": 663 }, { "epoch": 0.09903426675118386, "grad_norm": 1.1639695167541504, "learning_rate": 1.975188351621204e-05, "loss": 0.6232, "step": 664 }, { "epoch": 0.09918341474327902, "grad_norm": 1.4106346368789673, "learning_rate": 1.9750812744487605e-05, "loss": 0.733, "step": 665 }, { "epoch": 0.09933256273537418, "grad_norm": 1.3857417106628418, "learning_rate": 1.974973969637341e-05, "loss": 0.7143, "step": 666 }, { "epoch": 0.09948171072746934, "grad_norm": 1.299926996231079, "learning_rate": 1.974866437211997e-05, "loss": 0.6869, "step": 667 }, { "epoch": 0.0996308587195645, "grad_norm": 1.370554804801941, "learning_rate": 1.974758677197832e-05, "loss": 0.7318, "step": 668 }, { "epoch": 0.09978000671165964, "grad_norm": 1.5953242778778076, "learning_rate": 1.974650689620004e-05, "loss": 0.7452, "step": 669 }, { "epoch": 0.0999291547037548, "grad_norm": 1.4662151336669922, "learning_rate": 1.9745424745037226e-05, "loss": 0.6831, "step": 670 }, { "epoch": 0.10007830269584995, "grad_norm": 1.3317526578903198, "learning_rate": 1.974434031874252e-05, "loss": 0.6255, "step": 671 }, { "epoch": 0.10022745068794511, "grad_norm": 1.4076536893844604, "learning_rate": 1.9743253617569085e-05, "loss": 0.7145, "step": 672 }, { "epoch": 0.10037659868004027, "grad_norm": 1.3610775470733643, "learning_rate": 1.9742164641770617e-05, "loss": 0.65, "step": 673 }, { "epoch": 0.10052574667213543, "grad_norm": 1.3004302978515625, "learning_rate": 1.974107339160135e-05, "loss": 0.6376, "step": 674 }, { "epoch": 0.10067489466423059, "grad_norm": 1.4523828029632568, "learning_rate": 1.9739979867316035e-05, "loss": 0.7084, "step": 675 }, { "epoch": 0.10082404265632573, "grad_norm": 1.3890825510025024, "learning_rate": 1.9738884069169972e-05, "loss": 0.7709, "step": 676 }, { "epoch": 0.10097319064842089, "grad_norm": 1.4630684852600098, "learning_rate": 1.9737785997418973e-05, "loss": 0.7198, "step": 677 }, { "epoch": 0.10112233864051605, "grad_norm": 1.4137647151947021, "learning_rate": 1.9736685652319398e-05, "loss": 0.7029, "step": 678 }, { "epoch": 0.10127148663261121, "grad_norm": 1.211076021194458, "learning_rate": 1.973558303412812e-05, "loss": 0.6696, "step": 679 }, { "epoch": 0.10142063462470637, "grad_norm": 1.4327468872070312, "learning_rate": 1.973447814310256e-05, "loss": 0.6927, "step": 680 }, { "epoch": 0.10156978261680152, "grad_norm": 1.3704813718795776, "learning_rate": 1.973337097950066e-05, "loss": 0.6999, "step": 681 }, { "epoch": 0.10171893060889668, "grad_norm": 1.2564438581466675, "learning_rate": 1.9732261543580894e-05, "loss": 0.7088, "step": 682 }, { "epoch": 0.10186807860099184, "grad_norm": 1.4072786569595337, "learning_rate": 1.973114983560227e-05, "loss": 0.7167, "step": 683 }, { "epoch": 0.10201722659308698, "grad_norm": 1.2757017612457275, "learning_rate": 1.9730035855824317e-05, "loss": 0.683, "step": 684 }, { "epoch": 0.10216637458518214, "grad_norm": 1.3769382238388062, "learning_rate": 1.9728919604507105e-05, "loss": 0.6842, "step": 685 }, { "epoch": 0.1023155225772773, "grad_norm": 1.3506128787994385, "learning_rate": 1.9727801081911227e-05, "loss": 0.7376, "step": 686 }, { "epoch": 0.10246467056937246, "grad_norm": 1.344068169593811, "learning_rate": 1.9726680288297815e-05, "loss": 0.7574, "step": 687 }, { "epoch": 0.10261381856146762, "grad_norm": 1.409104347229004, "learning_rate": 1.972555722392852e-05, "loss": 0.7306, "step": 688 }, { "epoch": 0.10276296655356278, "grad_norm": 1.3124096393585205, "learning_rate": 1.972443188906553e-05, "loss": 0.6712, "step": 689 }, { "epoch": 0.10291211454565793, "grad_norm": 1.3444781303405762, "learning_rate": 1.9723304283971566e-05, "loss": 0.6508, "step": 690 }, { "epoch": 0.10306126253775308, "grad_norm": 1.2069485187530518, "learning_rate": 1.9722174408909866e-05, "loss": 0.6586, "step": 691 }, { "epoch": 0.10321041052984824, "grad_norm": 1.3540881872177124, "learning_rate": 1.9721042264144214e-05, "loss": 0.6975, "step": 692 }, { "epoch": 0.1033595585219434, "grad_norm": 1.2178974151611328, "learning_rate": 1.971990784993891e-05, "loss": 0.6112, "step": 693 }, { "epoch": 0.10350870651403855, "grad_norm": 1.396640419960022, "learning_rate": 1.9718771166558796e-05, "loss": 0.676, "step": 694 }, { "epoch": 0.10365785450613371, "grad_norm": 1.4486823081970215, "learning_rate": 1.971763221426924e-05, "loss": 0.6816, "step": 695 }, { "epoch": 0.10380700249822887, "grad_norm": 1.4555363655090332, "learning_rate": 1.971649099333613e-05, "loss": 0.7821, "step": 696 }, { "epoch": 0.10395615049032403, "grad_norm": 1.4266726970672607, "learning_rate": 1.971534750402589e-05, "loss": 0.7366, "step": 697 }, { "epoch": 0.10410529848241919, "grad_norm": 1.367936372756958, "learning_rate": 1.971420174660549e-05, "loss": 0.6927, "step": 698 }, { "epoch": 0.10425444647451433, "grad_norm": 1.1357744932174683, "learning_rate": 1.97130537213424e-05, "loss": 0.6591, "step": 699 }, { "epoch": 0.10440359446660949, "grad_norm": 1.460582971572876, "learning_rate": 1.971190342850464e-05, "loss": 0.6961, "step": 700 }, { "epoch": 0.10455274245870465, "grad_norm": 1.5253517627716064, "learning_rate": 1.971075086836075e-05, "loss": 0.7155, "step": 701 }, { "epoch": 0.1047018904507998, "grad_norm": 1.5601388216018677, "learning_rate": 1.9709596041179802e-05, "loss": 0.751, "step": 702 }, { "epoch": 0.10485103844289496, "grad_norm": 1.3004646301269531, "learning_rate": 1.9708438947231402e-05, "loss": 0.6571, "step": 703 }, { "epoch": 0.10500018643499012, "grad_norm": 1.3236724138259888, "learning_rate": 1.970727958678568e-05, "loss": 0.7365, "step": 704 }, { "epoch": 0.10514933442708528, "grad_norm": 1.3792473077774048, "learning_rate": 1.970611796011329e-05, "loss": 0.7096, "step": 705 }, { "epoch": 0.10529848241918043, "grad_norm": 1.3850637674331665, "learning_rate": 1.9704954067485432e-05, "loss": 0.7152, "step": 706 }, { "epoch": 0.10544763041127558, "grad_norm": 1.3878155946731567, "learning_rate": 1.9703787909173816e-05, "loss": 0.705, "step": 707 }, { "epoch": 0.10559677840337074, "grad_norm": 1.2280652523040771, "learning_rate": 1.970261948545069e-05, "loss": 0.6796, "step": 708 }, { "epoch": 0.1057459263954659, "grad_norm": 1.1791716814041138, "learning_rate": 1.9701448796588837e-05, "loss": 0.6945, "step": 709 }, { "epoch": 0.10589507438756106, "grad_norm": 1.3561251163482666, "learning_rate": 1.970027584286155e-05, "loss": 0.8395, "step": 710 }, { "epoch": 0.10604422237965622, "grad_norm": 1.2734272480010986, "learning_rate": 1.9699100624542673e-05, "loss": 0.6512, "step": 711 }, { "epoch": 0.10619337037175137, "grad_norm": 1.3573683500289917, "learning_rate": 1.9697923141906563e-05, "loss": 0.6242, "step": 712 }, { "epoch": 0.10634251836384653, "grad_norm": 1.3593908548355103, "learning_rate": 1.9696743395228113e-05, "loss": 0.6752, "step": 713 }, { "epoch": 0.10649166635594168, "grad_norm": 1.3404539823532104, "learning_rate": 1.9695561384782743e-05, "loss": 0.6023, "step": 714 }, { "epoch": 0.10664081434803684, "grad_norm": 1.2719773054122925, "learning_rate": 1.9694377110846393e-05, "loss": 0.676, "step": 715 }, { "epoch": 0.106789962340132, "grad_norm": 1.2355461120605469, "learning_rate": 1.969319057369555e-05, "loss": 0.6491, "step": 716 }, { "epoch": 0.10693911033222715, "grad_norm": 1.2035669088363647, "learning_rate": 1.9692001773607215e-05, "loss": 0.5958, "step": 717 }, { "epoch": 0.10708825832432231, "grad_norm": 1.3102304935455322, "learning_rate": 1.969081071085892e-05, "loss": 0.6361, "step": 718 }, { "epoch": 0.10723740631641747, "grad_norm": 1.4239424467086792, "learning_rate": 1.9689617385728726e-05, "loss": 0.7066, "step": 719 }, { "epoch": 0.10738655430851263, "grad_norm": 1.3335833549499512, "learning_rate": 1.9688421798495225e-05, "loss": 0.6911, "step": 720 }, { "epoch": 0.10753570230060777, "grad_norm": 1.2149713039398193, "learning_rate": 1.968722394943753e-05, "loss": 0.6385, "step": 721 }, { "epoch": 0.10768485029270293, "grad_norm": 1.5062472820281982, "learning_rate": 1.9686023838835292e-05, "loss": 0.7433, "step": 722 }, { "epoch": 0.10783399828479809, "grad_norm": 1.3684359788894653, "learning_rate": 1.968482146696868e-05, "loss": 0.6501, "step": 723 }, { "epoch": 0.10798314627689325, "grad_norm": 1.4466464519500732, "learning_rate": 1.9683616834118398e-05, "loss": 0.7034, "step": 724 }, { "epoch": 0.1081322942689884, "grad_norm": 1.3493512868881226, "learning_rate": 1.968240994056567e-05, "loss": 0.6241, "step": 725 }, { "epoch": 0.10828144226108356, "grad_norm": 1.3636741638183594, "learning_rate": 1.9681200786592265e-05, "loss": 0.6599, "step": 726 }, { "epoch": 0.10843059025317872, "grad_norm": 1.3029905557632446, "learning_rate": 1.9679989372480456e-05, "loss": 0.5789, "step": 727 }, { "epoch": 0.10857973824527388, "grad_norm": 1.3864917755126953, "learning_rate": 1.967877569851306e-05, "loss": 0.743, "step": 728 }, { "epoch": 0.10872888623736902, "grad_norm": 1.369247317314148, "learning_rate": 1.9677559764973416e-05, "loss": 0.6851, "step": 729 }, { "epoch": 0.10887803422946418, "grad_norm": 1.3597941398620605, "learning_rate": 1.967634157214539e-05, "loss": 0.6864, "step": 730 }, { "epoch": 0.10902718222155934, "grad_norm": 1.3137824535369873, "learning_rate": 1.967512112031338e-05, "loss": 0.6345, "step": 731 }, { "epoch": 0.1091763302136545, "grad_norm": 1.46244478225708, "learning_rate": 1.9673898409762315e-05, "loss": 0.7616, "step": 732 }, { "epoch": 0.10932547820574966, "grad_norm": 1.350715160369873, "learning_rate": 1.9672673440777628e-05, "loss": 0.7385, "step": 733 }, { "epoch": 0.10947462619784482, "grad_norm": 1.3459699153900146, "learning_rate": 1.9671446213645306e-05, "loss": 0.6114, "step": 734 }, { "epoch": 0.10962377418993997, "grad_norm": 1.247901439666748, "learning_rate": 1.9670216728651854e-05, "loss": 0.6333, "step": 735 }, { "epoch": 0.10977292218203512, "grad_norm": 1.4462181329727173, "learning_rate": 1.96689849860843e-05, "loss": 0.6024, "step": 736 }, { "epoch": 0.10992207017413028, "grad_norm": 1.3231359720230103, "learning_rate": 1.9667750986230203e-05, "loss": 0.6806, "step": 737 }, { "epoch": 0.11007121816622543, "grad_norm": 1.3312753438949585, "learning_rate": 1.966651472937765e-05, "loss": 0.654, "step": 738 }, { "epoch": 0.11022036615832059, "grad_norm": 1.2960315942764282, "learning_rate": 1.9665276215815247e-05, "loss": 0.6315, "step": 739 }, { "epoch": 0.11036951415041575, "grad_norm": 1.359475016593933, "learning_rate": 1.966403544583214e-05, "loss": 0.6985, "step": 740 }, { "epoch": 0.11051866214251091, "grad_norm": 1.4797704219818115, "learning_rate": 1.966279241971799e-05, "loss": 0.7071, "step": 741 }, { "epoch": 0.11066781013460607, "grad_norm": 1.450571894645691, "learning_rate": 1.9661547137762994e-05, "loss": 0.7562, "step": 742 }, { "epoch": 0.11081695812670123, "grad_norm": 1.344611644744873, "learning_rate": 1.966029960025787e-05, "loss": 0.6993, "step": 743 }, { "epoch": 0.11096610611879637, "grad_norm": 0.9606993198394775, "learning_rate": 1.965904980749386e-05, "loss": 0.6271, "step": 744 }, { "epoch": 0.11111525411089153, "grad_norm": 1.3998295068740845, "learning_rate": 1.9657797759762735e-05, "loss": 0.6845, "step": 745 }, { "epoch": 0.11126440210298669, "grad_norm": 1.487614393234253, "learning_rate": 1.96565434573568e-05, "loss": 0.7093, "step": 746 }, { "epoch": 0.11141355009508185, "grad_norm": 1.30966317653656, "learning_rate": 1.965528690056888e-05, "loss": 0.6753, "step": 747 }, { "epoch": 0.111562698087177, "grad_norm": 1.4931304454803467, "learning_rate": 1.9654028089692317e-05, "loss": 0.7371, "step": 748 }, { "epoch": 0.11171184607927216, "grad_norm": 1.380167841911316, "learning_rate": 1.9652767025020997e-05, "loss": 0.6355, "step": 749 }, { "epoch": 0.11186099407136732, "grad_norm": 1.3413898944854736, "learning_rate": 1.965150370684932e-05, "loss": 0.6932, "step": 750 }, { "epoch": 0.11201014206346246, "grad_norm": 1.4494924545288086, "learning_rate": 1.965023813547222e-05, "loss": 0.6831, "step": 751 }, { "epoch": 0.11215929005555762, "grad_norm": 1.2674747705459595, "learning_rate": 1.964897031118515e-05, "loss": 0.6386, "step": 752 }, { "epoch": 0.11230843804765278, "grad_norm": 1.370074987411499, "learning_rate": 1.9647700234284087e-05, "loss": 0.6672, "step": 753 }, { "epoch": 0.11245758603974794, "grad_norm": 1.2853118181228638, "learning_rate": 1.9646427905065545e-05, "loss": 0.6191, "step": 754 }, { "epoch": 0.1126067340318431, "grad_norm": 1.3100342750549316, "learning_rate": 1.9645153323826558e-05, "loss": 0.6163, "step": 755 }, { "epoch": 0.11275588202393826, "grad_norm": 1.3914382457733154, "learning_rate": 1.9643876490864678e-05, "loss": 0.7246, "step": 756 }, { "epoch": 0.11290503001603341, "grad_norm": 1.369560956954956, "learning_rate": 1.9642597406478e-05, "loss": 0.7316, "step": 757 }, { "epoch": 0.11305417800812857, "grad_norm": 1.3172763586044312, "learning_rate": 1.9641316070965123e-05, "loss": 0.7134, "step": 758 }, { "epoch": 0.11320332600022372, "grad_norm": 0.9358607530593872, "learning_rate": 1.964003248462519e-05, "loss": 0.6771, "step": 759 }, { "epoch": 0.11335247399231888, "grad_norm": 1.272871971130371, "learning_rate": 1.963874664775786e-05, "loss": 0.6832, "step": 760 }, { "epoch": 0.11350162198441403, "grad_norm": 1.4783827066421509, "learning_rate": 1.9637458560663323e-05, "loss": 0.7102, "step": 761 }, { "epoch": 0.11365076997650919, "grad_norm": 1.4270137548446655, "learning_rate": 1.9636168223642288e-05, "loss": 0.7601, "step": 762 }, { "epoch": 0.11379991796860435, "grad_norm": 1.394257664680481, "learning_rate": 1.9634875636996e-05, "loss": 0.7167, "step": 763 }, { "epoch": 0.11394906596069951, "grad_norm": 0.8898937106132507, "learning_rate": 1.9633580801026207e-05, "loss": 0.656, "step": 764 }, { "epoch": 0.11409821395279467, "grad_norm": 1.4020154476165771, "learning_rate": 1.963228371603521e-05, "loss": 0.7084, "step": 765 }, { "epoch": 0.11424736194488981, "grad_norm": 1.2963043451309204, "learning_rate": 1.9630984382325816e-05, "loss": 0.6402, "step": 766 }, { "epoch": 0.11439650993698497, "grad_norm": 1.2392805814743042, "learning_rate": 1.9629682800201363e-05, "loss": 0.6356, "step": 767 }, { "epoch": 0.11454565792908013, "grad_norm": 1.4551833868026733, "learning_rate": 1.9628378969965712e-05, "loss": 0.7184, "step": 768 }, { "epoch": 0.11469480592117529, "grad_norm": 1.4706906080245972, "learning_rate": 1.9627072891923258e-05, "loss": 0.6772, "step": 769 }, { "epoch": 0.11484395391327044, "grad_norm": 1.3338313102722168, "learning_rate": 1.9625764566378903e-05, "loss": 0.6513, "step": 770 }, { "epoch": 0.1149931019053656, "grad_norm": 1.3213180303573608, "learning_rate": 1.9624453993638094e-05, "loss": 0.5235, "step": 771 }, { "epoch": 0.11514224989746076, "grad_norm": 1.172197699546814, "learning_rate": 1.9623141174006785e-05, "loss": 0.6472, "step": 772 }, { "epoch": 0.1152913978895559, "grad_norm": 1.405665636062622, "learning_rate": 1.9621826107791465e-05, "loss": 0.7049, "step": 773 }, { "epoch": 0.11544054588165106, "grad_norm": 1.270817756652832, "learning_rate": 1.9620508795299148e-05, "loss": 0.6925, "step": 774 }, { "epoch": 0.11558969387374622, "grad_norm": 1.4664912223815918, "learning_rate": 1.961918923683736e-05, "loss": 0.7677, "step": 775 }, { "epoch": 0.11573884186584138, "grad_norm": 1.2670247554779053, "learning_rate": 1.961786743271417e-05, "loss": 0.6323, "step": 776 }, { "epoch": 0.11588798985793654, "grad_norm": 1.2836498022079468, "learning_rate": 1.9616543383238158e-05, "loss": 0.663, "step": 777 }, { "epoch": 0.1160371378500317, "grad_norm": 0.8548898100852966, "learning_rate": 1.961521708871843e-05, "loss": 0.5848, "step": 778 }, { "epoch": 0.11618628584212685, "grad_norm": 1.2503951787948608, "learning_rate": 1.961388854946462e-05, "loss": 0.5825, "step": 779 }, { "epoch": 0.11633543383422201, "grad_norm": 1.4000861644744873, "learning_rate": 1.9612557765786884e-05, "loss": 0.7422, "step": 780 }, { "epoch": 0.11648458182631716, "grad_norm": 1.2070746421813965, "learning_rate": 1.96112247379959e-05, "loss": 0.6752, "step": 781 }, { "epoch": 0.11663372981841232, "grad_norm": 1.1804975271224976, "learning_rate": 1.9609889466402877e-05, "loss": 0.6055, "step": 782 }, { "epoch": 0.11678287781050747, "grad_norm": 1.3156377077102661, "learning_rate": 1.9608551951319535e-05, "loss": 0.6719, "step": 783 }, { "epoch": 0.11693202580260263, "grad_norm": 1.362822413444519, "learning_rate": 1.960721219305813e-05, "loss": 0.6769, "step": 784 }, { "epoch": 0.11708117379469779, "grad_norm": 1.3152116537094116, "learning_rate": 1.960587019193144e-05, "loss": 0.6989, "step": 785 }, { "epoch": 0.11723032178679295, "grad_norm": 1.3212100267410278, "learning_rate": 1.9604525948252758e-05, "loss": 0.684, "step": 786 }, { "epoch": 0.11737946977888811, "grad_norm": 1.2260056734085083, "learning_rate": 1.9603179462335907e-05, "loss": 0.6696, "step": 787 }, { "epoch": 0.11752861777098325, "grad_norm": 1.4266750812530518, "learning_rate": 1.9601830734495236e-05, "loss": 0.7705, "step": 788 }, { "epoch": 0.11767776576307841, "grad_norm": 1.2299734354019165, "learning_rate": 1.9600479765045615e-05, "loss": 0.7262, "step": 789 }, { "epoch": 0.11782691375517357, "grad_norm": 1.1967610120773315, "learning_rate": 1.959912655430243e-05, "loss": 0.6142, "step": 790 }, { "epoch": 0.11797606174726873, "grad_norm": 1.3208119869232178, "learning_rate": 1.9597771102581607e-05, "loss": 0.6848, "step": 791 }, { "epoch": 0.11812520973936388, "grad_norm": 0.9656818509101868, "learning_rate": 1.9596413410199574e-05, "loss": 0.6561, "step": 792 }, { "epoch": 0.11827435773145904, "grad_norm": 1.3461511135101318, "learning_rate": 1.9595053477473302e-05, "loss": 0.7262, "step": 793 }, { "epoch": 0.1184235057235542, "grad_norm": 0.895524263381958, "learning_rate": 1.959369130472027e-05, "loss": 0.6635, "step": 794 }, { "epoch": 0.11857265371564936, "grad_norm": 1.2401317358016968, "learning_rate": 1.9592326892258486e-05, "loss": 0.6517, "step": 795 }, { "epoch": 0.1187218017077445, "grad_norm": 1.3785104751586914, "learning_rate": 1.9590960240406483e-05, "loss": 0.6723, "step": 796 }, { "epoch": 0.11887094969983966, "grad_norm": 1.4349597692489624, "learning_rate": 1.9589591349483316e-05, "loss": 0.6997, "step": 797 }, { "epoch": 0.11902009769193482, "grad_norm": 1.405426025390625, "learning_rate": 1.9588220219808554e-05, "loss": 0.6552, "step": 798 }, { "epoch": 0.11916924568402998, "grad_norm": 1.378907561302185, "learning_rate": 1.9586846851702307e-05, "loss": 0.7104, "step": 799 }, { "epoch": 0.11931839367612514, "grad_norm": 1.375394582748413, "learning_rate": 1.9585471245485193e-05, "loss": 0.6895, "step": 800 }, { "epoch": 0.1194675416682203, "grad_norm": 1.363815188407898, "learning_rate": 1.958409340147835e-05, "loss": 0.6088, "step": 801 }, { "epoch": 0.11961668966031545, "grad_norm": 1.3352717161178589, "learning_rate": 1.9582713320003454e-05, "loss": 0.6724, "step": 802 }, { "epoch": 0.1197658376524106, "grad_norm": 1.4232860803604126, "learning_rate": 1.9581331001382683e-05, "loss": 0.7989, "step": 803 }, { "epoch": 0.11991498564450576, "grad_norm": 1.244587779045105, "learning_rate": 1.9579946445938755e-05, "loss": 0.6639, "step": 804 }, { "epoch": 0.12006413363660091, "grad_norm": 1.3743140697479248, "learning_rate": 1.9578559653994905e-05, "loss": 0.712, "step": 805 }, { "epoch": 0.12021328162869607, "grad_norm": 1.3567711114883423, "learning_rate": 1.9577170625874885e-05, "loss": 0.7102, "step": 806 }, { "epoch": 0.12036242962079123, "grad_norm": 1.3812695741653442, "learning_rate": 1.957577936190297e-05, "loss": 0.6586, "step": 807 }, { "epoch": 0.12051157761288639, "grad_norm": 1.4265440702438354, "learning_rate": 1.9574385862403965e-05, "loss": 0.7348, "step": 808 }, { "epoch": 0.12066072560498155, "grad_norm": 1.247331976890564, "learning_rate": 1.957299012770319e-05, "loss": 0.6772, "step": 809 }, { "epoch": 0.1208098735970767, "grad_norm": 1.3000813722610474, "learning_rate": 1.9571592158126488e-05, "loss": 0.6851, "step": 810 }, { "epoch": 0.12095902158917185, "grad_norm": 1.3035870790481567, "learning_rate": 1.9570191954000225e-05, "loss": 0.676, "step": 811 }, { "epoch": 0.12110816958126701, "grad_norm": 1.4446022510528564, "learning_rate": 1.956878951565128e-05, "loss": 0.6591, "step": 812 }, { "epoch": 0.12125731757336217, "grad_norm": 1.3499093055725098, "learning_rate": 1.9567384843407068e-05, "loss": 0.7455, "step": 813 }, { "epoch": 0.12140646556545732, "grad_norm": 1.2775938510894775, "learning_rate": 1.9565977937595524e-05, "loss": 0.6994, "step": 814 }, { "epoch": 0.12155561355755248, "grad_norm": 1.4673991203308105, "learning_rate": 1.9564568798545086e-05, "loss": 0.7194, "step": 815 }, { "epoch": 0.12170476154964764, "grad_norm": 1.3061347007751465, "learning_rate": 1.9563157426584737e-05, "loss": 0.6688, "step": 816 }, { "epoch": 0.1218539095417428, "grad_norm": 1.2674907445907593, "learning_rate": 1.9561743822043968e-05, "loss": 0.6932, "step": 817 }, { "epoch": 0.12200305753383794, "grad_norm": 1.3799397945404053, "learning_rate": 1.9560327985252794e-05, "loss": 0.6782, "step": 818 }, { "epoch": 0.1221522055259331, "grad_norm": 0.9195637702941895, "learning_rate": 1.9558909916541746e-05, "loss": 0.6733, "step": 819 }, { "epoch": 0.12230135351802826, "grad_norm": 1.4278827905654907, "learning_rate": 1.955748961624189e-05, "loss": 0.7767, "step": 820 }, { "epoch": 0.12245050151012342, "grad_norm": 1.274914264678955, "learning_rate": 1.95560670846848e-05, "loss": 0.5989, "step": 821 }, { "epoch": 0.12259964950221858, "grad_norm": 1.3790324926376343, "learning_rate": 1.9554642322202574e-05, "loss": 0.6834, "step": 822 }, { "epoch": 0.12274879749431374, "grad_norm": 1.2665892839431763, "learning_rate": 1.9553215329127834e-05, "loss": 0.6692, "step": 823 }, { "epoch": 0.1228979454864089, "grad_norm": 1.433585524559021, "learning_rate": 1.955178610579372e-05, "loss": 0.6646, "step": 824 }, { "epoch": 0.12304709347850405, "grad_norm": 1.2951849699020386, "learning_rate": 1.955035465253389e-05, "loss": 0.6699, "step": 825 }, { "epoch": 0.1231962414705992, "grad_norm": 1.311026930809021, "learning_rate": 1.9548920969682535e-05, "loss": 0.7156, "step": 826 }, { "epoch": 0.12334538946269435, "grad_norm": 1.3271642923355103, "learning_rate": 1.954748505757435e-05, "loss": 0.7186, "step": 827 }, { "epoch": 0.12349453745478951, "grad_norm": 1.2433968782424927, "learning_rate": 1.9546046916544555e-05, "loss": 0.6393, "step": 828 }, { "epoch": 0.12364368544688467, "grad_norm": 0.9285369515419006, "learning_rate": 1.95446065469289e-05, "loss": 0.6405, "step": 829 }, { "epoch": 0.12379283343897983, "grad_norm": 1.3506027460098267, "learning_rate": 1.9543163949063648e-05, "loss": 0.6432, "step": 830 }, { "epoch": 0.12394198143107499, "grad_norm": 1.3003337383270264, "learning_rate": 1.954171912328558e-05, "loss": 0.7157, "step": 831 }, { "epoch": 0.12409112942317015, "grad_norm": 1.2241594791412354, "learning_rate": 1.9540272069932e-05, "loss": 0.645, "step": 832 }, { "epoch": 0.12424027741526529, "grad_norm": 1.1959466934204102, "learning_rate": 1.9538822789340734e-05, "loss": 0.6527, "step": 833 }, { "epoch": 0.12438942540736045, "grad_norm": 1.306870460510254, "learning_rate": 1.9537371281850123e-05, "loss": 0.6979, "step": 834 }, { "epoch": 0.12453857339945561, "grad_norm": 1.3886468410491943, "learning_rate": 1.9535917547799036e-05, "loss": 0.7372, "step": 835 }, { "epoch": 0.12468772139155077, "grad_norm": 1.2888375520706177, "learning_rate": 1.9534461587526847e-05, "loss": 0.717, "step": 836 }, { "epoch": 0.12483686938364592, "grad_norm": 1.3145325183868408, "learning_rate": 1.953300340137347e-05, "loss": 0.6733, "step": 837 }, { "epoch": 0.12498601737574108, "grad_norm": 1.2833771705627441, "learning_rate": 1.953154298967932e-05, "loss": 0.7082, "step": 838 }, { "epoch": 0.12513516536783623, "grad_norm": 1.4823951721191406, "learning_rate": 1.9530080352785343e-05, "loss": 0.6414, "step": 839 }, { "epoch": 0.1252843133599314, "grad_norm": 1.4237847328186035, "learning_rate": 1.9528615491033e-05, "loss": 0.7688, "step": 840 }, { "epoch": 0.12543346135202654, "grad_norm": 1.348519206047058, "learning_rate": 1.9527148404764275e-05, "loss": 0.6855, "step": 841 }, { "epoch": 0.12558260934412172, "grad_norm": 1.335147500038147, "learning_rate": 1.9525679094321667e-05, "loss": 0.6749, "step": 842 }, { "epoch": 0.12573175733621686, "grad_norm": 1.4166579246520996, "learning_rate": 1.952420756004819e-05, "loss": 0.7192, "step": 843 }, { "epoch": 0.125880905328312, "grad_norm": 1.1331673860549927, "learning_rate": 1.9522733802287394e-05, "loss": 0.6108, "step": 844 }, { "epoch": 0.12603005332040718, "grad_norm": 1.1790995597839355, "learning_rate": 1.952125782138333e-05, "loss": 0.5491, "step": 845 }, { "epoch": 0.12617920131250232, "grad_norm": 1.2530874013900757, "learning_rate": 1.9519779617680577e-05, "loss": 0.6752, "step": 846 }, { "epoch": 0.1263283493045975, "grad_norm": 1.3632198572158813, "learning_rate": 1.9518299191524232e-05, "loss": 0.8021, "step": 847 }, { "epoch": 0.12647749729669264, "grad_norm": 1.4827886819839478, "learning_rate": 1.9516816543259908e-05, "loss": 0.7166, "step": 848 }, { "epoch": 0.1266266452887878, "grad_norm": 1.418825387954712, "learning_rate": 1.951533167323374e-05, "loss": 0.622, "step": 849 }, { "epoch": 0.12677579328088295, "grad_norm": 1.339999794960022, "learning_rate": 1.951384458179238e-05, "loss": 0.692, "step": 850 }, { "epoch": 0.12692494127297813, "grad_norm": 1.3233944177627563, "learning_rate": 1.9512355269283e-05, "loss": 0.7024, "step": 851 }, { "epoch": 0.12707408926507327, "grad_norm": 1.2006932497024536, "learning_rate": 1.9510863736053286e-05, "loss": 0.651, "step": 852 }, { "epoch": 0.12722323725716841, "grad_norm": 1.2887763977050781, "learning_rate": 1.950936998245145e-05, "loss": 0.6836, "step": 853 }, { "epoch": 0.1273723852492636, "grad_norm": 1.5510444641113281, "learning_rate": 1.950787400882622e-05, "loss": 0.7281, "step": 854 }, { "epoch": 0.12752153324135873, "grad_norm": 1.357564926147461, "learning_rate": 1.9506375815526833e-05, "loss": 0.679, "step": 855 }, { "epoch": 0.1276706812334539, "grad_norm": 1.3898813724517822, "learning_rate": 1.950487540290306e-05, "loss": 0.6955, "step": 856 }, { "epoch": 0.12781982922554905, "grad_norm": 1.3547906875610352, "learning_rate": 1.950337277130518e-05, "loss": 0.692, "step": 857 }, { "epoch": 0.12796897721764422, "grad_norm": 1.3396705389022827, "learning_rate": 1.950186792108399e-05, "loss": 0.6829, "step": 858 }, { "epoch": 0.12811812520973936, "grad_norm": 1.228514552116394, "learning_rate": 1.9500360852590806e-05, "loss": 0.6069, "step": 859 }, { "epoch": 0.1282672732018345, "grad_norm": 1.4801928997039795, "learning_rate": 1.9498851566177462e-05, "loss": 0.7001, "step": 860 }, { "epoch": 0.12841642119392968, "grad_norm": 1.3552526235580444, "learning_rate": 1.9497340062196318e-05, "loss": 0.711, "step": 861 }, { "epoch": 0.12856556918602483, "grad_norm": 0.961524486541748, "learning_rate": 1.9495826341000237e-05, "loss": 0.6623, "step": 862 }, { "epoch": 0.12871471717812, "grad_norm": 1.2613317966461182, "learning_rate": 1.9494310402942607e-05, "loss": 0.6566, "step": 863 }, { "epoch": 0.12886386517021514, "grad_norm": 1.2809287309646606, "learning_rate": 1.9492792248377337e-05, "loss": 0.6471, "step": 864 }, { "epoch": 0.1290130131623103, "grad_norm": 1.3850784301757812, "learning_rate": 1.949127187765885e-05, "loss": 0.7024, "step": 865 }, { "epoch": 0.12916216115440546, "grad_norm": 1.273875117301941, "learning_rate": 1.948974929114208e-05, "loss": 0.6991, "step": 866 }, { "epoch": 0.1293113091465006, "grad_norm": 1.4043158292770386, "learning_rate": 1.9488224489182496e-05, "loss": 0.762, "step": 867 }, { "epoch": 0.12946045713859577, "grad_norm": 1.2380902767181396, "learning_rate": 1.9486697472136063e-05, "loss": 0.66, "step": 868 }, { "epoch": 0.12960960513069092, "grad_norm": 1.3367929458618164, "learning_rate": 1.9485168240359277e-05, "loss": 0.685, "step": 869 }, { "epoch": 0.1297587531227861, "grad_norm": 1.342427372932434, "learning_rate": 1.9483636794209143e-05, "loss": 0.6855, "step": 870 }, { "epoch": 0.12990790111488124, "grad_norm": 0.9306960701942444, "learning_rate": 1.9482103134043194e-05, "loss": 0.6737, "step": 871 }, { "epoch": 0.1300570491069764, "grad_norm": 1.316677212715149, "learning_rate": 1.9480567260219466e-05, "loss": 0.6222, "step": 872 }, { "epoch": 0.13020619709907155, "grad_norm": 1.3289095163345337, "learning_rate": 1.9479029173096523e-05, "loss": 0.6907, "step": 873 }, { "epoch": 0.1303553450911667, "grad_norm": 1.4271719455718994, "learning_rate": 1.9477488873033435e-05, "loss": 0.7137, "step": 874 }, { "epoch": 0.13050449308326187, "grad_norm": 1.3215421438217163, "learning_rate": 1.947594636038981e-05, "loss": 0.6529, "step": 875 }, { "epoch": 0.130653641075357, "grad_norm": 1.3691563606262207, "learning_rate": 1.9474401635525738e-05, "loss": 0.7253, "step": 876 }, { "epoch": 0.13080278906745219, "grad_norm": 1.340117335319519, "learning_rate": 1.9472854698801855e-05, "loss": 0.646, "step": 877 }, { "epoch": 0.13095193705954733, "grad_norm": 1.1758575439453125, "learning_rate": 1.9471305550579305e-05, "loss": 0.5561, "step": 878 }, { "epoch": 0.1311010850516425, "grad_norm": 0.8812705874443054, "learning_rate": 1.9469754191219743e-05, "loss": 0.66, "step": 879 }, { "epoch": 0.13125023304373765, "grad_norm": 1.2965463399887085, "learning_rate": 1.946820062108534e-05, "loss": 0.6176, "step": 880 }, { "epoch": 0.13139938103583282, "grad_norm": 1.1895122528076172, "learning_rate": 1.94666448405388e-05, "loss": 0.5959, "step": 881 }, { "epoch": 0.13154852902792796, "grad_norm": 1.5519888401031494, "learning_rate": 1.9465086849943318e-05, "loss": 0.7425, "step": 882 }, { "epoch": 0.1316976770200231, "grad_norm": 1.2604820728302002, "learning_rate": 1.9463526649662617e-05, "loss": 0.6336, "step": 883 }, { "epoch": 0.13184682501211828, "grad_norm": 1.1524332761764526, "learning_rate": 1.9461964240060944e-05, "loss": 0.5391, "step": 884 }, { "epoch": 0.13199597300421342, "grad_norm": 1.4283257722854614, "learning_rate": 1.9460399621503047e-05, "loss": 0.6849, "step": 885 }, { "epoch": 0.1321451209963086, "grad_norm": 1.219477653503418, "learning_rate": 1.9458832794354198e-05, "loss": 0.6759, "step": 886 }, { "epoch": 0.13229426898840374, "grad_norm": 1.3900070190429688, "learning_rate": 1.9457263758980182e-05, "loss": 0.7257, "step": 887 }, { "epoch": 0.1324434169804989, "grad_norm": 1.3553730249404907, "learning_rate": 1.9455692515747298e-05, "loss": 0.6373, "step": 888 }, { "epoch": 0.13259256497259406, "grad_norm": 1.2047041654586792, "learning_rate": 1.945411906502237e-05, "loss": 0.6547, "step": 889 }, { "epoch": 0.1327417129646892, "grad_norm": 1.25103759765625, "learning_rate": 1.9452543407172727e-05, "loss": 0.7305, "step": 890 }, { "epoch": 0.13289086095678437, "grad_norm": 1.3325084447860718, "learning_rate": 1.9450965542566217e-05, "loss": 0.685, "step": 891 }, { "epoch": 0.13304000894887952, "grad_norm": 1.1969677209854126, "learning_rate": 1.9449385471571197e-05, "loss": 0.6528, "step": 892 }, { "epoch": 0.1331891569409747, "grad_norm": 1.2723166942596436, "learning_rate": 1.9447803194556548e-05, "loss": 0.5973, "step": 893 }, { "epoch": 0.13333830493306983, "grad_norm": 1.3783345222473145, "learning_rate": 1.9446218711891666e-05, "loss": 0.6699, "step": 894 }, { "epoch": 0.133487452925165, "grad_norm": 0.9102293252944946, "learning_rate": 1.9444632023946456e-05, "loss": 0.6651, "step": 895 }, { "epoch": 0.13363660091726015, "grad_norm": 1.249232292175293, "learning_rate": 1.9443043131091343e-05, "loss": 0.6119, "step": 896 }, { "epoch": 0.1337857489093553, "grad_norm": 1.2109609842300415, "learning_rate": 1.944145203369726e-05, "loss": 0.5506, "step": 897 }, { "epoch": 0.13393489690145047, "grad_norm": 0.8616218566894531, "learning_rate": 1.9439858732135657e-05, "loss": 0.663, "step": 898 }, { "epoch": 0.1340840448935456, "grad_norm": 1.3403348922729492, "learning_rate": 1.9438263226778508e-05, "loss": 0.7167, "step": 899 }, { "epoch": 0.13423319288564078, "grad_norm": 1.501212477684021, "learning_rate": 1.943666551799829e-05, "loss": 0.7379, "step": 900 }, { "epoch": 0.13438234087773593, "grad_norm": 1.203760027885437, "learning_rate": 1.9435065606168e-05, "loss": 0.6649, "step": 901 }, { "epoch": 0.1345314888698311, "grad_norm": 1.3020741939544678, "learning_rate": 1.9433463491661143e-05, "loss": 0.6414, "step": 902 }, { "epoch": 0.13468063686192625, "grad_norm": 1.3040475845336914, "learning_rate": 1.9431859174851748e-05, "loss": 0.7342, "step": 903 }, { "epoch": 0.1348297848540214, "grad_norm": 1.3444937467575073, "learning_rate": 1.943025265611435e-05, "loss": 0.6799, "step": 904 }, { "epoch": 0.13497893284611656, "grad_norm": 1.2262442111968994, "learning_rate": 1.9428643935824006e-05, "loss": 0.6464, "step": 905 }, { "epoch": 0.1351280808382117, "grad_norm": 1.2683295011520386, "learning_rate": 1.9427033014356276e-05, "loss": 0.6579, "step": 906 }, { "epoch": 0.13527722883030688, "grad_norm": 1.3079136610031128, "learning_rate": 1.942541989208724e-05, "loss": 0.6807, "step": 907 }, { "epoch": 0.13542637682240202, "grad_norm": 1.3322042226791382, "learning_rate": 1.9423804569393497e-05, "loss": 0.7567, "step": 908 }, { "epoch": 0.1355755248144972, "grad_norm": 1.2910107374191284, "learning_rate": 1.942218704665215e-05, "loss": 0.5867, "step": 909 }, { "epoch": 0.13572467280659234, "grad_norm": 1.36272132396698, "learning_rate": 1.9420567324240822e-05, "loss": 0.6246, "step": 910 }, { "epoch": 0.1358738207986875, "grad_norm": 1.4627289772033691, "learning_rate": 1.9418945402537647e-05, "loss": 0.692, "step": 911 }, { "epoch": 0.13602296879078266, "grad_norm": 1.3874510526657104, "learning_rate": 1.9417321281921275e-05, "loss": 0.6679, "step": 912 }, { "epoch": 0.1361721167828778, "grad_norm": 1.2760919332504272, "learning_rate": 1.941569496277086e-05, "loss": 0.6226, "step": 913 }, { "epoch": 0.13632126477497297, "grad_norm": 1.3843663930892944, "learning_rate": 1.941406644546609e-05, "loss": 0.7467, "step": 914 }, { "epoch": 0.13647041276706812, "grad_norm": 1.3922626972198486, "learning_rate": 1.941243573038714e-05, "loss": 0.7605, "step": 915 }, { "epoch": 0.1366195607591633, "grad_norm": 1.058244228363037, "learning_rate": 1.9410802817914715e-05, "loss": 0.7088, "step": 916 }, { "epoch": 0.13676870875125843, "grad_norm": 1.2984232902526855, "learning_rate": 1.9409167708430036e-05, "loss": 0.6333, "step": 917 }, { "epoch": 0.1369178567433536, "grad_norm": 1.190583348274231, "learning_rate": 1.9407530402314818e-05, "loss": 0.5917, "step": 918 }, { "epoch": 0.13706700473544875, "grad_norm": 1.249495029449463, "learning_rate": 1.9405890899951306e-05, "loss": 0.6868, "step": 919 }, { "epoch": 0.1372161527275439, "grad_norm": 1.2492367029190063, "learning_rate": 1.9404249201722255e-05, "loss": 0.6858, "step": 920 }, { "epoch": 0.13736530071963907, "grad_norm": 1.305873155593872, "learning_rate": 1.9402605308010924e-05, "loss": 0.6424, "step": 921 }, { "epoch": 0.1375144487117342, "grad_norm": 1.454755187034607, "learning_rate": 1.9400959219201096e-05, "loss": 0.7232, "step": 922 }, { "epoch": 0.13766359670382938, "grad_norm": 1.2820184230804443, "learning_rate": 1.939931093567706e-05, "loss": 0.6731, "step": 923 }, { "epoch": 0.13781274469592453, "grad_norm": 1.3873790502548218, "learning_rate": 1.9397660457823618e-05, "loss": 0.6701, "step": 924 }, { "epoch": 0.1379618926880197, "grad_norm": 1.511277437210083, "learning_rate": 1.9396007786026085e-05, "loss": 0.7393, "step": 925 }, { "epoch": 0.13811104068011484, "grad_norm": 1.2310296297073364, "learning_rate": 1.9394352920670282e-05, "loss": 0.6799, "step": 926 }, { "epoch": 0.13826018867221, "grad_norm": 1.2505367994308472, "learning_rate": 1.9392695862142556e-05, "loss": 0.6882, "step": 927 }, { "epoch": 0.13840933666430516, "grad_norm": 1.3563116788864136, "learning_rate": 1.9391036610829753e-05, "loss": 0.7112, "step": 928 }, { "epoch": 0.1385584846564003, "grad_norm": 1.3019468784332275, "learning_rate": 1.9389375167119237e-05, "loss": 0.7245, "step": 929 }, { "epoch": 0.13870763264849548, "grad_norm": 1.3906844854354858, "learning_rate": 1.9387711531398883e-05, "loss": 0.6731, "step": 930 }, { "epoch": 0.13885678064059062, "grad_norm": 1.345935344696045, "learning_rate": 1.9386045704057083e-05, "loss": 0.6425, "step": 931 }, { "epoch": 0.1390059286326858, "grad_norm": 1.358036756515503, "learning_rate": 1.9384377685482725e-05, "loss": 0.7372, "step": 932 }, { "epoch": 0.13915507662478094, "grad_norm": 1.348948359489441, "learning_rate": 1.9382707476065224e-05, "loss": 0.6876, "step": 933 }, { "epoch": 0.13930422461687608, "grad_norm": 1.4479775428771973, "learning_rate": 1.9381035076194502e-05, "loss": 0.7093, "step": 934 }, { "epoch": 0.13945337260897125, "grad_norm": 1.3593740463256836, "learning_rate": 1.9379360486260988e-05, "loss": 0.7425, "step": 935 }, { "epoch": 0.1396025206010664, "grad_norm": 1.4537951946258545, "learning_rate": 1.9377683706655626e-05, "loss": 0.7149, "step": 936 }, { "epoch": 0.13975166859316157, "grad_norm": 1.3866628408432007, "learning_rate": 1.9376004737769878e-05, "loss": 0.703, "step": 937 }, { "epoch": 0.13990081658525672, "grad_norm": 1.289941430091858, "learning_rate": 1.93743235799957e-05, "loss": 0.6535, "step": 938 }, { "epoch": 0.1400499645773519, "grad_norm": 1.279964566230774, "learning_rate": 1.9372640233725576e-05, "loss": 0.728, "step": 939 }, { "epoch": 0.14019911256944703, "grad_norm": 1.2165162563323975, "learning_rate": 1.937095469935249e-05, "loss": 0.6069, "step": 940 }, { "epoch": 0.1403482605615422, "grad_norm": 1.2755948305130005, "learning_rate": 1.9369266977269946e-05, "loss": 0.6775, "step": 941 }, { "epoch": 0.14049740855363735, "grad_norm": 1.0643441677093506, "learning_rate": 1.9367577067871948e-05, "loss": 0.6593, "step": 942 }, { "epoch": 0.1406465565457325, "grad_norm": 1.2930901050567627, "learning_rate": 1.9365884971553014e-05, "loss": 0.6314, "step": 943 }, { "epoch": 0.14079570453782767, "grad_norm": 1.3559253215789795, "learning_rate": 1.9364190688708184e-05, "loss": 0.7484, "step": 944 }, { "epoch": 0.1409448525299228, "grad_norm": 1.3770711421966553, "learning_rate": 1.9362494219732994e-05, "loss": 0.6895, "step": 945 }, { "epoch": 0.14109400052201798, "grad_norm": 1.199790120124817, "learning_rate": 1.9360795565023494e-05, "loss": 0.6433, "step": 946 }, { "epoch": 0.14124314851411313, "grad_norm": 1.4475568532943726, "learning_rate": 1.9359094724976248e-05, "loss": 0.7982, "step": 947 }, { "epoch": 0.1413922965062083, "grad_norm": 1.3090274333953857, "learning_rate": 1.935739169998833e-05, "loss": 0.6286, "step": 948 }, { "epoch": 0.14154144449830344, "grad_norm": 1.1853325366973877, "learning_rate": 1.9355686490457318e-05, "loss": 0.6232, "step": 949 }, { "epoch": 0.1416905924903986, "grad_norm": 1.2680484056472778, "learning_rate": 1.9353979096781304e-05, "loss": 0.6996, "step": 950 }, { "epoch": 0.14183974048249376, "grad_norm": 1.3034193515777588, "learning_rate": 1.9352269519358895e-05, "loss": 0.7274, "step": 951 }, { "epoch": 0.1419888884745889, "grad_norm": 1.2887804508209229, "learning_rate": 1.9350557758589195e-05, "loss": 0.6198, "step": 952 }, { "epoch": 0.14213803646668408, "grad_norm": 1.4671082496643066, "learning_rate": 1.9348843814871836e-05, "loss": 0.7028, "step": 953 }, { "epoch": 0.14228718445877922, "grad_norm": 1.2405176162719727, "learning_rate": 1.934712768860694e-05, "loss": 0.5963, "step": 954 }, { "epoch": 0.1424363324508744, "grad_norm": 1.3898197412490845, "learning_rate": 1.9345409380195154e-05, "loss": 0.6138, "step": 955 }, { "epoch": 0.14258548044296954, "grad_norm": 1.2751951217651367, "learning_rate": 1.934368889003762e-05, "loss": 0.6399, "step": 956 }, { "epoch": 0.14273462843506468, "grad_norm": 1.17288076877594, "learning_rate": 1.9341966218536007e-05, "loss": 0.6163, "step": 957 }, { "epoch": 0.14288377642715985, "grad_norm": 1.2076432704925537, "learning_rate": 1.9340241366092475e-05, "loss": 0.7027, "step": 958 }, { "epoch": 0.143032924419255, "grad_norm": 1.2463736534118652, "learning_rate": 1.933851433310971e-05, "loss": 0.5922, "step": 959 }, { "epoch": 0.14318207241135017, "grad_norm": 1.1335736513137817, "learning_rate": 1.9336785119990894e-05, "loss": 0.6239, "step": 960 }, { "epoch": 0.14333122040344531, "grad_norm": 1.2310543060302734, "learning_rate": 1.933505372713972e-05, "loss": 0.5942, "step": 961 }, { "epoch": 0.1434803683955405, "grad_norm": 1.4322258234024048, "learning_rate": 1.9333320154960403e-05, "loss": 0.7201, "step": 962 }, { "epoch": 0.14362951638763563, "grad_norm": 1.2974315881729126, "learning_rate": 1.9331584403857645e-05, "loss": 0.7654, "step": 963 }, { "epoch": 0.14377866437973078, "grad_norm": 1.3637787103652954, "learning_rate": 1.9329846474236676e-05, "loss": 0.7342, "step": 964 }, { "epoch": 0.14392781237182595, "grad_norm": 1.2545559406280518, "learning_rate": 1.9328106366503227e-05, "loss": 0.6705, "step": 965 }, { "epoch": 0.1440769603639211, "grad_norm": 1.2893340587615967, "learning_rate": 1.932636408106353e-05, "loss": 0.6132, "step": 966 }, { "epoch": 0.14422610835601626, "grad_norm": 1.2430473566055298, "learning_rate": 1.9324619618324338e-05, "loss": 0.6196, "step": 967 }, { "epoch": 0.1443752563481114, "grad_norm": 1.2282116413116455, "learning_rate": 1.9322872978692907e-05, "loss": 0.6819, "step": 968 }, { "epoch": 0.14452440434020658, "grad_norm": 1.1722882986068726, "learning_rate": 1.9321124162577e-05, "loss": 0.6385, "step": 969 }, { "epoch": 0.14467355233230172, "grad_norm": 1.3537169694900513, "learning_rate": 1.9319373170384895e-05, "loss": 0.7404, "step": 970 }, { "epoch": 0.14482270032439687, "grad_norm": 1.3026299476623535, "learning_rate": 1.931762000252536e-05, "loss": 0.6475, "step": 971 }, { "epoch": 0.14497184831649204, "grad_norm": 1.20765221118927, "learning_rate": 1.9315864659407696e-05, "loss": 0.6968, "step": 972 }, { "epoch": 0.14512099630858719, "grad_norm": 1.1841531991958618, "learning_rate": 1.931410714144169e-05, "loss": 0.6497, "step": 973 }, { "epoch": 0.14527014430068236, "grad_norm": 1.3897104263305664, "learning_rate": 1.931234744903765e-05, "loss": 0.7359, "step": 974 }, { "epoch": 0.1454192922927775, "grad_norm": 1.5097330808639526, "learning_rate": 1.9310585582606385e-05, "loss": 0.7092, "step": 975 }, { "epoch": 0.14556844028487267, "grad_norm": 1.3557618856430054, "learning_rate": 1.930882154255922e-05, "loss": 0.7336, "step": 976 }, { "epoch": 0.14571758827696782, "grad_norm": 1.4081840515136719, "learning_rate": 1.9307055329307975e-05, "loss": 0.6383, "step": 977 }, { "epoch": 0.145866736269063, "grad_norm": 1.393591046333313, "learning_rate": 1.930528694326499e-05, "loss": 0.7095, "step": 978 }, { "epoch": 0.14601588426115814, "grad_norm": 1.3611457347869873, "learning_rate": 1.9303516384843093e-05, "loss": 0.7761, "step": 979 }, { "epoch": 0.14616503225325328, "grad_norm": 1.2637887001037598, "learning_rate": 1.9301743654455652e-05, "loss": 0.7053, "step": 980 }, { "epoch": 0.14631418024534845, "grad_norm": 1.0342658758163452, "learning_rate": 1.9299968752516505e-05, "loss": 0.6362, "step": 981 }, { "epoch": 0.1464633282374436, "grad_norm": 1.4216244220733643, "learning_rate": 1.9298191679440024e-05, "loss": 0.7371, "step": 982 }, { "epoch": 0.14661247622953877, "grad_norm": 1.4086053371429443, "learning_rate": 1.9296412435641073e-05, "loss": 0.6792, "step": 983 }, { "epoch": 0.1467616242216339, "grad_norm": 1.3320086002349854, "learning_rate": 1.929463102153503e-05, "loss": 0.6617, "step": 984 }, { "epoch": 0.14691077221372909, "grad_norm": 1.2891366481781006, "learning_rate": 1.9292847437537784e-05, "loss": 0.6855, "step": 985 }, { "epoch": 0.14705992020582423, "grad_norm": 1.293965458869934, "learning_rate": 1.929106168406571e-05, "loss": 0.6287, "step": 986 }, { "epoch": 0.14720906819791937, "grad_norm": 1.4440292119979858, "learning_rate": 1.9289273761535713e-05, "loss": 0.7146, "step": 987 }, { "epoch": 0.14735821619001455, "grad_norm": 1.4215255975723267, "learning_rate": 1.9287483670365193e-05, "loss": 0.7037, "step": 988 }, { "epoch": 0.1475073641821097, "grad_norm": 1.4938700199127197, "learning_rate": 1.928569141097206e-05, "loss": 0.7899, "step": 989 }, { "epoch": 0.14765651217420486, "grad_norm": 1.4163293838500977, "learning_rate": 1.9283896983774727e-05, "loss": 0.7094, "step": 990 }, { "epoch": 0.1478056601663, "grad_norm": 1.1974486112594604, "learning_rate": 1.9282100389192116e-05, "loss": 0.6045, "step": 991 }, { "epoch": 0.14795480815839518, "grad_norm": 1.2064433097839355, "learning_rate": 1.9280301627643647e-05, "loss": 0.6688, "step": 992 }, { "epoch": 0.14810395615049032, "grad_norm": 1.3315480947494507, "learning_rate": 1.927850069954926e-05, "loss": 0.6798, "step": 993 }, { "epoch": 0.14825310414258547, "grad_norm": 1.2050062417984009, "learning_rate": 1.9276697605329392e-05, "loss": 0.6715, "step": 994 }, { "epoch": 0.14840225213468064, "grad_norm": 1.212624430656433, "learning_rate": 1.9274892345404985e-05, "loss": 0.662, "step": 995 }, { "epoch": 0.14855140012677578, "grad_norm": 1.2929469347000122, "learning_rate": 1.9273084920197488e-05, "loss": 0.6779, "step": 996 }, { "epoch": 0.14870054811887096, "grad_norm": 1.2993645668029785, "learning_rate": 1.9271275330128856e-05, "loss": 0.7397, "step": 997 }, { "epoch": 0.1488496961109661, "grad_norm": 1.3198974132537842, "learning_rate": 1.9269463575621552e-05, "loss": 0.6838, "step": 998 }, { "epoch": 0.14899884410306127, "grad_norm": 1.4739305973052979, "learning_rate": 1.926764965709854e-05, "loss": 0.7127, "step": 999 }, { "epoch": 0.14914799209515642, "grad_norm": 1.3708794116973877, "learning_rate": 1.926583357498329e-05, "loss": 0.6727, "step": 1000 }, { "epoch": 0.14929714008725156, "grad_norm": 1.3286653757095337, "learning_rate": 1.926401532969978e-05, "loss": 0.631, "step": 1001 }, { "epoch": 0.14944628807934673, "grad_norm": 1.2117761373519897, "learning_rate": 1.926219492167249e-05, "loss": 0.625, "step": 1002 }, { "epoch": 0.14959543607144188, "grad_norm": 1.356370449066162, "learning_rate": 1.9260372351326406e-05, "loss": 0.6797, "step": 1003 }, { "epoch": 0.14974458406353705, "grad_norm": 1.3226449489593506, "learning_rate": 1.9258547619087017e-05, "loss": 0.7363, "step": 1004 }, { "epoch": 0.1498937320556322, "grad_norm": 1.3250395059585571, "learning_rate": 1.9256720725380323e-05, "loss": 0.749, "step": 1005 }, { "epoch": 0.15004288004772737, "grad_norm": 1.236847162246704, "learning_rate": 1.9254891670632823e-05, "loss": 0.6582, "step": 1006 }, { "epoch": 0.1501920280398225, "grad_norm": 1.265785574913025, "learning_rate": 1.9253060455271516e-05, "loss": 0.6298, "step": 1007 }, { "epoch": 0.15034117603191768, "grad_norm": 1.198799729347229, "learning_rate": 1.9251227079723917e-05, "loss": 0.6589, "step": 1008 }, { "epoch": 0.15049032402401283, "grad_norm": 1.2166225910186768, "learning_rate": 1.924939154441803e-05, "loss": 0.6039, "step": 1009 }, { "epoch": 0.15063947201610797, "grad_norm": 1.3836963176727295, "learning_rate": 1.924755384978239e-05, "loss": 0.6685, "step": 1010 }, { "epoch": 0.15078862000820314, "grad_norm": 1.2406208515167236, "learning_rate": 1.9245713996246e-05, "loss": 0.6446, "step": 1011 }, { "epoch": 0.1509377680002983, "grad_norm": 1.3404641151428223, "learning_rate": 1.92438719842384e-05, "loss": 0.646, "step": 1012 }, { "epoch": 0.15108691599239346, "grad_norm": 1.2213985919952393, "learning_rate": 1.924202781418961e-05, "loss": 0.6621, "step": 1013 }, { "epoch": 0.1512360639844886, "grad_norm": 1.3507589101791382, "learning_rate": 1.9240181486530166e-05, "loss": 0.6993, "step": 1014 }, { "epoch": 0.15138521197658378, "grad_norm": 1.4339412450790405, "learning_rate": 1.9238333001691107e-05, "loss": 0.6638, "step": 1015 }, { "epoch": 0.15153435996867892, "grad_norm": 1.2528456449508667, "learning_rate": 1.923648236010397e-05, "loss": 0.66, "step": 1016 }, { "epoch": 0.15168350796077407, "grad_norm": 1.2071731090545654, "learning_rate": 1.9234629562200805e-05, "loss": 0.6299, "step": 1017 }, { "epoch": 0.15183265595286924, "grad_norm": 1.344678282737732, "learning_rate": 1.9232774608414153e-05, "loss": 0.6545, "step": 1018 }, { "epoch": 0.15198180394496438, "grad_norm": 0.9764267802238464, "learning_rate": 1.9230917499177067e-05, "loss": 0.6412, "step": 1019 }, { "epoch": 0.15213095193705956, "grad_norm": 1.145087718963623, "learning_rate": 1.9229058234923104e-05, "loss": 0.621, "step": 1020 }, { "epoch": 0.1522800999291547, "grad_norm": 1.2425833940505981, "learning_rate": 1.922719681608632e-05, "loss": 0.6579, "step": 1021 }, { "epoch": 0.15242924792124987, "grad_norm": 1.369154691696167, "learning_rate": 1.9225333243101275e-05, "loss": 0.7395, "step": 1022 }, { "epoch": 0.15257839591334502, "grad_norm": 1.1488451957702637, "learning_rate": 1.9223467516403028e-05, "loss": 0.7046, "step": 1023 }, { "epoch": 0.15272754390544016, "grad_norm": 1.3414983749389648, "learning_rate": 1.922159963642715e-05, "loss": 0.6935, "step": 1024 }, { "epoch": 0.15287669189753533, "grad_norm": 1.1903114318847656, "learning_rate": 1.9219729603609706e-05, "loss": 0.6071, "step": 1025 }, { "epoch": 0.15302583988963048, "grad_norm": 1.2508041858673096, "learning_rate": 1.921785741838727e-05, "loss": 0.6313, "step": 1026 }, { "epoch": 0.15317498788172565, "grad_norm": 1.1845442056655884, "learning_rate": 1.921598308119691e-05, "loss": 0.5237, "step": 1027 }, { "epoch": 0.1533241358738208, "grad_norm": 1.3092690706253052, "learning_rate": 1.9214106592476215e-05, "loss": 0.6979, "step": 1028 }, { "epoch": 0.15347328386591597, "grad_norm": 1.3195267915725708, "learning_rate": 1.9212227952663247e-05, "loss": 0.6313, "step": 1029 }, { "epoch": 0.1536224318580111, "grad_norm": 1.3267848491668701, "learning_rate": 1.9210347162196598e-05, "loss": 0.7104, "step": 1030 }, { "epoch": 0.15377157985010625, "grad_norm": 1.2225236892700195, "learning_rate": 1.9208464221515347e-05, "loss": 0.7472, "step": 1031 }, { "epoch": 0.15392072784220143, "grad_norm": 1.328966498374939, "learning_rate": 1.9206579131059076e-05, "loss": 0.69, "step": 1032 }, { "epoch": 0.15406987583429657, "grad_norm": 1.302394986152649, "learning_rate": 1.9204691891267875e-05, "loss": 0.7371, "step": 1033 }, { "epoch": 0.15421902382639174, "grad_norm": 1.2681622505187988, "learning_rate": 1.9202802502582334e-05, "loss": 0.7014, "step": 1034 }, { "epoch": 0.1543681718184869, "grad_norm": 1.4580904245376587, "learning_rate": 1.9200910965443537e-05, "loss": 0.7027, "step": 1035 }, { "epoch": 0.15451731981058206, "grad_norm": 1.254655361175537, "learning_rate": 1.919901728029308e-05, "loss": 0.7297, "step": 1036 }, { "epoch": 0.1546664678026772, "grad_norm": 1.3661012649536133, "learning_rate": 1.9197121447573053e-05, "loss": 0.7499, "step": 1037 }, { "epoch": 0.15481561579477238, "grad_norm": 1.1932525634765625, "learning_rate": 1.9195223467726056e-05, "loss": 0.6274, "step": 1038 }, { "epoch": 0.15496476378686752, "grad_norm": 1.2381222248077393, "learning_rate": 1.9193323341195175e-05, "loss": 0.6128, "step": 1039 }, { "epoch": 0.15511391177896267, "grad_norm": 1.3562649488449097, "learning_rate": 1.9191421068424017e-05, "loss": 0.6395, "step": 1040 }, { "epoch": 0.15526305977105784, "grad_norm": 1.2645279169082642, "learning_rate": 1.918951664985667e-05, "loss": 0.7125, "step": 1041 }, { "epoch": 0.15541220776315298, "grad_norm": 1.2761634588241577, "learning_rate": 1.918761008593774e-05, "loss": 0.5979, "step": 1042 }, { "epoch": 0.15556135575524815, "grad_norm": 1.2835561037063599, "learning_rate": 1.9185701377112324e-05, "loss": 0.6566, "step": 1043 }, { "epoch": 0.1557105037473433, "grad_norm": 1.2360000610351562, "learning_rate": 1.9183790523826022e-05, "loss": 0.663, "step": 1044 }, { "epoch": 0.15585965173943847, "grad_norm": 0.9551177024841309, "learning_rate": 1.9181877526524937e-05, "loss": 0.6545, "step": 1045 }, { "epoch": 0.15600879973153362, "grad_norm": 1.3260152339935303, "learning_rate": 1.9179962385655665e-05, "loss": 0.6563, "step": 1046 }, { "epoch": 0.15615794772362876, "grad_norm": 1.1986775398254395, "learning_rate": 1.9178045101665317e-05, "loss": 0.63, "step": 1047 }, { "epoch": 0.15630709571572393, "grad_norm": 1.2433664798736572, "learning_rate": 1.9176125675001487e-05, "loss": 0.6652, "step": 1048 }, { "epoch": 0.15645624370781908, "grad_norm": 1.3717856407165527, "learning_rate": 1.917420410611228e-05, "loss": 0.671, "step": 1049 }, { "epoch": 0.15660539169991425, "grad_norm": 1.2839144468307495, "learning_rate": 1.91722803954463e-05, "loss": 0.7419, "step": 1050 }, { "epoch": 0.1567545396920094, "grad_norm": 1.2850242853164673, "learning_rate": 1.917035454345265e-05, "loss": 0.6527, "step": 1051 }, { "epoch": 0.15690368768410456, "grad_norm": 1.210608959197998, "learning_rate": 1.916842655058093e-05, "loss": 0.5977, "step": 1052 }, { "epoch": 0.1570528356761997, "grad_norm": 1.2655625343322754, "learning_rate": 1.9166496417281243e-05, "loss": 0.683, "step": 1053 }, { "epoch": 0.15720198366829485, "grad_norm": 1.3280987739562988, "learning_rate": 1.916456414400419e-05, "loss": 0.7206, "step": 1054 }, { "epoch": 0.15735113166039003, "grad_norm": 1.3113071918487549, "learning_rate": 1.9162629731200878e-05, "loss": 0.6866, "step": 1055 }, { "epoch": 0.15750027965248517, "grad_norm": 1.493241548538208, "learning_rate": 1.91606931793229e-05, "loss": 0.739, "step": 1056 }, { "epoch": 0.15764942764458034, "grad_norm": 1.2883027791976929, "learning_rate": 1.9158754488822366e-05, "loss": 0.7384, "step": 1057 }, { "epoch": 0.1577985756366755, "grad_norm": 1.1960649490356445, "learning_rate": 1.9156813660151866e-05, "loss": 0.5851, "step": 1058 }, { "epoch": 0.15794772362877066, "grad_norm": 1.3004871606826782, "learning_rate": 1.9154870693764504e-05, "loss": 0.5944, "step": 1059 }, { "epoch": 0.1580968716208658, "grad_norm": 1.2642871141433716, "learning_rate": 1.9152925590113878e-05, "loss": 0.7439, "step": 1060 }, { "epoch": 0.15824601961296095, "grad_norm": 1.3726983070373535, "learning_rate": 1.9150978349654082e-05, "loss": 0.6984, "step": 1061 }, { "epoch": 0.15839516760505612, "grad_norm": 1.30672025680542, "learning_rate": 1.9149028972839718e-05, "loss": 0.6781, "step": 1062 }, { "epoch": 0.15854431559715126, "grad_norm": 1.1871055364608765, "learning_rate": 1.9147077460125873e-05, "loss": 0.7192, "step": 1063 }, { "epoch": 0.15869346358924644, "grad_norm": 1.2567960023880005, "learning_rate": 1.9145123811968145e-05, "loss": 0.6475, "step": 1064 }, { "epoch": 0.15884261158134158, "grad_norm": 1.2058396339416504, "learning_rate": 1.9143168028822623e-05, "loss": 0.5973, "step": 1065 }, { "epoch": 0.15899175957343675, "grad_norm": 1.3184914588928223, "learning_rate": 1.9141210111145898e-05, "loss": 0.673, "step": 1066 }, { "epoch": 0.1591409075655319, "grad_norm": 1.2837327718734741, "learning_rate": 1.913925005939506e-05, "loss": 0.6379, "step": 1067 }, { "epoch": 0.15929005555762704, "grad_norm": 1.2625125646591187, "learning_rate": 1.9137287874027696e-05, "loss": 0.7084, "step": 1068 }, { "epoch": 0.1594392035497222, "grad_norm": 1.2468477487564087, "learning_rate": 1.9135323555501885e-05, "loss": 0.6568, "step": 1069 }, { "epoch": 0.15958835154181736, "grad_norm": 1.2418804168701172, "learning_rate": 1.9133357104276218e-05, "loss": 0.6758, "step": 1070 }, { "epoch": 0.15973749953391253, "grad_norm": 1.2995269298553467, "learning_rate": 1.913138852080977e-05, "loss": 0.7318, "step": 1071 }, { "epoch": 0.15988664752600767, "grad_norm": 1.3734304904937744, "learning_rate": 1.912941780556212e-05, "loss": 0.6857, "step": 1072 }, { "epoch": 0.16003579551810285, "grad_norm": 1.2231746912002563, "learning_rate": 1.9127444958993345e-05, "loss": 0.6599, "step": 1073 }, { "epoch": 0.160184943510198, "grad_norm": 1.2443691492080688, "learning_rate": 1.912546998156402e-05, "loss": 0.6117, "step": 1074 }, { "epoch": 0.16033409150229316, "grad_norm": 1.2997140884399414, "learning_rate": 1.9123492873735216e-05, "loss": 0.6855, "step": 1075 }, { "epoch": 0.1604832394943883, "grad_norm": 1.1965854167938232, "learning_rate": 1.9121513635968497e-05, "loss": 0.6011, "step": 1076 }, { "epoch": 0.16063238748648345, "grad_norm": 1.3550115823745728, "learning_rate": 1.9119532268725935e-05, "loss": 0.7039, "step": 1077 }, { "epoch": 0.16078153547857862, "grad_norm": 1.0766410827636719, "learning_rate": 1.9117548772470093e-05, "loss": 0.5439, "step": 1078 }, { "epoch": 0.16093068347067377, "grad_norm": 1.230507254600525, "learning_rate": 1.9115563147664022e-05, "loss": 0.678, "step": 1079 }, { "epoch": 0.16107983146276894, "grad_norm": 1.3385862112045288, "learning_rate": 1.9113575394771287e-05, "loss": 0.7262, "step": 1080 }, { "epoch": 0.16122897945486409, "grad_norm": 1.27250337600708, "learning_rate": 1.9111585514255943e-05, "loss": 0.7121, "step": 1081 }, { "epoch": 0.16137812744695926, "grad_norm": 1.106202244758606, "learning_rate": 1.910959350658253e-05, "loss": 0.6184, "step": 1082 }, { "epoch": 0.1615272754390544, "grad_norm": 1.279125452041626, "learning_rate": 1.9107599372216107e-05, "loss": 0.6735, "step": 1083 }, { "epoch": 0.16167642343114955, "grad_norm": 1.2146966457366943, "learning_rate": 1.9105603111622212e-05, "loss": 0.6975, "step": 1084 }, { "epoch": 0.16182557142324472, "grad_norm": 1.4121757745742798, "learning_rate": 1.910360472526688e-05, "loss": 0.6513, "step": 1085 }, { "epoch": 0.16197471941533986, "grad_norm": 1.6029887199401855, "learning_rate": 1.910160421361666e-05, "loss": 0.7107, "step": 1086 }, { "epoch": 0.16212386740743503, "grad_norm": 1.1876448392868042, "learning_rate": 1.9099601577138572e-05, "loss": 0.6337, "step": 1087 }, { "epoch": 0.16227301539953018, "grad_norm": 1.2823827266693115, "learning_rate": 1.909759681630015e-05, "loss": 0.7199, "step": 1088 }, { "epoch": 0.16242216339162535, "grad_norm": 1.2664463520050049, "learning_rate": 1.9095589931569418e-05, "loss": 0.6185, "step": 1089 }, { "epoch": 0.1625713113837205, "grad_norm": 1.475132703781128, "learning_rate": 1.909358092341489e-05, "loss": 0.7904, "step": 1090 }, { "epoch": 0.16272045937581564, "grad_norm": 1.39704430103302, "learning_rate": 1.9091569792305593e-05, "loss": 0.7584, "step": 1091 }, { "epoch": 0.1628696073679108, "grad_norm": 1.2276639938354492, "learning_rate": 1.908955653871103e-05, "loss": 0.6516, "step": 1092 }, { "epoch": 0.16301875536000596, "grad_norm": 1.3837952613830566, "learning_rate": 1.908754116310121e-05, "loss": 0.6574, "step": 1093 }, { "epoch": 0.16316790335210113, "grad_norm": 1.417701005935669, "learning_rate": 1.908552366594664e-05, "loss": 0.7074, "step": 1094 }, { "epoch": 0.16331705134419627, "grad_norm": 1.2228000164031982, "learning_rate": 1.9083504047718308e-05, "loss": 0.6644, "step": 1095 }, { "epoch": 0.16346619933629145, "grad_norm": 1.3300058841705322, "learning_rate": 1.9081482308887716e-05, "loss": 0.6989, "step": 1096 }, { "epoch": 0.1636153473283866, "grad_norm": 1.2333998680114746, "learning_rate": 1.9079458449926847e-05, "loss": 0.6226, "step": 1097 }, { "epoch": 0.16376449532048173, "grad_norm": 1.223961591720581, "learning_rate": 1.9077432471308182e-05, "loss": 0.6721, "step": 1098 }, { "epoch": 0.1639136433125769, "grad_norm": 1.422964096069336, "learning_rate": 1.9075404373504705e-05, "loss": 0.6863, "step": 1099 }, { "epoch": 0.16406279130467205, "grad_norm": 1.180919885635376, "learning_rate": 1.9073374156989888e-05, "loss": 0.6667, "step": 1100 }, { "epoch": 0.16421193929676722, "grad_norm": 1.2067362070083618, "learning_rate": 1.907134182223769e-05, "loss": 0.6515, "step": 1101 }, { "epoch": 0.16436108728886237, "grad_norm": 1.2444000244140625, "learning_rate": 1.906930736972258e-05, "loss": 0.5766, "step": 1102 }, { "epoch": 0.16451023528095754, "grad_norm": 1.2912579774856567, "learning_rate": 1.9067270799919512e-05, "loss": 0.6559, "step": 1103 }, { "epoch": 0.16465938327305268, "grad_norm": 1.365612268447876, "learning_rate": 1.9065232113303934e-05, "loss": 0.6709, "step": 1104 }, { "epoch": 0.16480853126514786, "grad_norm": 1.2823219299316406, "learning_rate": 1.9063191310351797e-05, "loss": 0.6795, "step": 1105 }, { "epoch": 0.164957679257243, "grad_norm": 1.2815334796905518, "learning_rate": 1.9061148391539534e-05, "loss": 0.7445, "step": 1106 }, { "epoch": 0.16510682724933815, "grad_norm": 0.891916811466217, "learning_rate": 1.9059103357344075e-05, "loss": 0.6564, "step": 1107 }, { "epoch": 0.16525597524143332, "grad_norm": 1.2493418455123901, "learning_rate": 1.905705620824285e-05, "loss": 0.604, "step": 1108 }, { "epoch": 0.16540512323352846, "grad_norm": 1.2630980014801025, "learning_rate": 1.9055006944713782e-05, "loss": 0.6895, "step": 1109 }, { "epoch": 0.16555427122562363, "grad_norm": 1.3991402387619019, "learning_rate": 1.905295556723528e-05, "loss": 0.679, "step": 1110 }, { "epoch": 0.16570341921771878, "grad_norm": 1.2150359153747559, "learning_rate": 1.9050902076286253e-05, "loss": 0.6533, "step": 1111 }, { "epoch": 0.16585256720981395, "grad_norm": 1.4057906866073608, "learning_rate": 1.9048846472346102e-05, "loss": 0.6572, "step": 1112 }, { "epoch": 0.1660017152019091, "grad_norm": 1.3630772829055786, "learning_rate": 1.9046788755894722e-05, "loss": 0.6509, "step": 1113 }, { "epoch": 0.16615086319400424, "grad_norm": 1.2395527362823486, "learning_rate": 1.9044728927412495e-05, "loss": 0.6581, "step": 1114 }, { "epoch": 0.1663000111860994, "grad_norm": 1.4584426879882812, "learning_rate": 1.904266698738031e-05, "loss": 0.7329, "step": 1115 }, { "epoch": 0.16644915917819456, "grad_norm": 1.2691479921340942, "learning_rate": 1.904060293627953e-05, "loss": 0.654, "step": 1116 }, { "epoch": 0.16659830717028973, "grad_norm": 1.262557864189148, "learning_rate": 1.903853677459203e-05, "loss": 0.692, "step": 1117 }, { "epoch": 0.16674745516238487, "grad_norm": 1.2883243560791016, "learning_rate": 1.903646850280016e-05, "loss": 0.6881, "step": 1118 }, { "epoch": 0.16689660315448004, "grad_norm": 1.3146823644638062, "learning_rate": 1.903439812138678e-05, "loss": 0.7559, "step": 1119 }, { "epoch": 0.1670457511465752, "grad_norm": 1.2775931358337402, "learning_rate": 1.9032325630835227e-05, "loss": 0.6748, "step": 1120 }, { "epoch": 0.16719489913867033, "grad_norm": 1.294083595275879, "learning_rate": 1.9030251031629338e-05, "loss": 0.609, "step": 1121 }, { "epoch": 0.1673440471307655, "grad_norm": 1.4273483753204346, "learning_rate": 1.902817432425345e-05, "loss": 0.7164, "step": 1122 }, { "epoch": 0.16749319512286065, "grad_norm": 1.2987107038497925, "learning_rate": 1.902609550919237e-05, "loss": 0.6405, "step": 1123 }, { "epoch": 0.16764234311495582, "grad_norm": 1.1375882625579834, "learning_rate": 1.902401458693142e-05, "loss": 0.6063, "step": 1124 }, { "epoch": 0.16779149110705097, "grad_norm": 1.2279057502746582, "learning_rate": 1.9021931557956404e-05, "loss": 0.6979, "step": 1125 }, { "epoch": 0.16794063909914614, "grad_norm": 1.2913306951522827, "learning_rate": 1.9019846422753615e-05, "loss": 0.7285, "step": 1126 }, { "epoch": 0.16808978709124128, "grad_norm": 1.3522192239761353, "learning_rate": 1.9017759181809846e-05, "loss": 0.7422, "step": 1127 }, { "epoch": 0.16823893508333643, "grad_norm": 1.3651543855667114, "learning_rate": 1.9015669835612375e-05, "loss": 0.6929, "step": 1128 }, { "epoch": 0.1683880830754316, "grad_norm": 0.9301030039787292, "learning_rate": 1.9013578384648968e-05, "loss": 0.6652, "step": 1129 }, { "epoch": 0.16853723106752674, "grad_norm": 1.5012904405593872, "learning_rate": 1.901148482940789e-05, "loss": 0.7555, "step": 1130 }, { "epoch": 0.16868637905962192, "grad_norm": 1.3647011518478394, "learning_rate": 1.90093891703779e-05, "loss": 0.6867, "step": 1131 }, { "epoch": 0.16883552705171706, "grad_norm": 1.3146623373031616, "learning_rate": 1.9007291408048238e-05, "loss": 0.6448, "step": 1132 }, { "epoch": 0.16898467504381223, "grad_norm": 1.2541567087173462, "learning_rate": 1.900519154290864e-05, "loss": 0.6506, "step": 1133 }, { "epoch": 0.16913382303590738, "grad_norm": 1.2715733051300049, "learning_rate": 1.900308957544934e-05, "loss": 0.644, "step": 1134 }, { "epoch": 0.16928297102800255, "grad_norm": 1.47920560836792, "learning_rate": 1.9000985506161047e-05, "loss": 0.7061, "step": 1135 }, { "epoch": 0.1694321190200977, "grad_norm": 1.3700486421585083, "learning_rate": 1.8998879335534973e-05, "loss": 0.7257, "step": 1136 }, { "epoch": 0.16958126701219284, "grad_norm": 1.3187255859375, "learning_rate": 1.899677106406282e-05, "loss": 0.6672, "step": 1137 }, { "epoch": 0.169730415004288, "grad_norm": 1.2195208072662354, "learning_rate": 1.899466069223677e-05, "loss": 0.6788, "step": 1138 }, { "epoch": 0.16987956299638315, "grad_norm": 1.2290998697280884, "learning_rate": 1.899254822054951e-05, "loss": 0.5822, "step": 1139 }, { "epoch": 0.17002871098847833, "grad_norm": 1.3596251010894775, "learning_rate": 1.899043364949421e-05, "loss": 0.7099, "step": 1140 }, { "epoch": 0.17017785898057347, "grad_norm": 1.386335849761963, "learning_rate": 1.8988316979564523e-05, "loss": 0.6615, "step": 1141 }, { "epoch": 0.17032700697266864, "grad_norm": 1.1815781593322754, "learning_rate": 1.8986198211254604e-05, "loss": 0.657, "step": 1142 }, { "epoch": 0.1704761549647638, "grad_norm": 1.31804358959198, "learning_rate": 1.8984077345059092e-05, "loss": 0.7201, "step": 1143 }, { "epoch": 0.17062530295685893, "grad_norm": 1.2035835981369019, "learning_rate": 1.8981954381473122e-05, "loss": 0.6637, "step": 1144 }, { "epoch": 0.1707744509489541, "grad_norm": 1.1567769050598145, "learning_rate": 1.8979829320992307e-05, "loss": 0.5253, "step": 1145 }, { "epoch": 0.17092359894104925, "grad_norm": 1.2730638980865479, "learning_rate": 1.8977702164112757e-05, "loss": 0.7449, "step": 1146 }, { "epoch": 0.17107274693314442, "grad_norm": 1.2289682626724243, "learning_rate": 1.897557291133107e-05, "loss": 0.6636, "step": 1147 }, { "epoch": 0.17122189492523956, "grad_norm": 1.1545565128326416, "learning_rate": 1.8973441563144338e-05, "loss": 0.6504, "step": 1148 }, { "epoch": 0.17137104291733474, "grad_norm": 1.325537919998169, "learning_rate": 1.8971308120050133e-05, "loss": 0.6291, "step": 1149 }, { "epoch": 0.17152019090942988, "grad_norm": 1.2470264434814453, "learning_rate": 1.8969172582546528e-05, "loss": 0.6797, "step": 1150 }, { "epoch": 0.17166933890152503, "grad_norm": 0.9316661953926086, "learning_rate": 1.8967034951132066e-05, "loss": 0.7196, "step": 1151 }, { "epoch": 0.1718184868936202, "grad_norm": 1.186293601989746, "learning_rate": 1.8964895226305802e-05, "loss": 0.6256, "step": 1152 }, { "epoch": 0.17196763488571534, "grad_norm": 1.345491647720337, "learning_rate": 1.8962753408567263e-05, "loss": 0.737, "step": 1153 }, { "epoch": 0.17211678287781051, "grad_norm": 1.297027826309204, "learning_rate": 1.896060949841647e-05, "loss": 0.6926, "step": 1154 }, { "epoch": 0.17226593086990566, "grad_norm": 1.1817659139633179, "learning_rate": 1.8958463496353935e-05, "loss": 0.5907, "step": 1155 }, { "epoch": 0.17241507886200083, "grad_norm": 1.2525031566619873, "learning_rate": 1.8956315402880655e-05, "loss": 0.6845, "step": 1156 }, { "epoch": 0.17256422685409598, "grad_norm": 1.250381350517273, "learning_rate": 1.8954165218498115e-05, "loss": 0.6666, "step": 1157 }, { "epoch": 0.17271337484619112, "grad_norm": 1.4021921157836914, "learning_rate": 1.895201294370829e-05, "loss": 0.7594, "step": 1158 }, { "epoch": 0.1728625228382863, "grad_norm": 1.3466781377792358, "learning_rate": 1.8949858579013645e-05, "loss": 0.6442, "step": 1159 }, { "epoch": 0.17301167083038144, "grad_norm": 1.2683426141738892, "learning_rate": 1.8947702124917126e-05, "loss": 0.6258, "step": 1160 }, { "epoch": 0.1731608188224766, "grad_norm": 1.2182300090789795, "learning_rate": 1.8945543581922173e-05, "loss": 0.6854, "step": 1161 }, { "epoch": 0.17330996681457175, "grad_norm": 1.2714022397994995, "learning_rate": 1.8943382950532713e-05, "loss": 0.6295, "step": 1162 }, { "epoch": 0.17345911480666693, "grad_norm": 1.2960405349731445, "learning_rate": 1.894122023125316e-05, "loss": 0.6605, "step": 1163 }, { "epoch": 0.17360826279876207, "grad_norm": 1.1702824831008911, "learning_rate": 1.8939055424588407e-05, "loss": 0.6432, "step": 1164 }, { "epoch": 0.17375741079085724, "grad_norm": 1.3189047574996948, "learning_rate": 1.8936888531043853e-05, "loss": 0.6896, "step": 1165 }, { "epoch": 0.1739065587829524, "grad_norm": 1.402740716934204, "learning_rate": 1.8934719551125364e-05, "loss": 0.7229, "step": 1166 }, { "epoch": 0.17405570677504753, "grad_norm": 1.3719226121902466, "learning_rate": 1.8932548485339304e-05, "loss": 0.6766, "step": 1167 }, { "epoch": 0.1742048547671427, "grad_norm": 1.3453019857406616, "learning_rate": 1.893037533419253e-05, "loss": 0.6795, "step": 1168 }, { "epoch": 0.17435400275923785, "grad_norm": 1.3188197612762451, "learning_rate": 1.8928200098192372e-05, "loss": 0.6996, "step": 1169 }, { "epoch": 0.17450315075133302, "grad_norm": 1.3408838510513306, "learning_rate": 1.8926022777846647e-05, "loss": 0.7123, "step": 1170 }, { "epoch": 0.17465229874342816, "grad_norm": 1.3334662914276123, "learning_rate": 1.8923843373663676e-05, "loss": 0.7231, "step": 1171 }, { "epoch": 0.17480144673552334, "grad_norm": 1.2855347394943237, "learning_rate": 1.8921661886152248e-05, "loss": 0.6455, "step": 1172 }, { "epoch": 0.17495059472761848, "grad_norm": 1.4144079685211182, "learning_rate": 1.8919478315821646e-05, "loss": 0.634, "step": 1173 }, { "epoch": 0.17509974271971362, "grad_norm": 1.3207191228866577, "learning_rate": 1.8917292663181638e-05, "loss": 0.7224, "step": 1174 }, { "epoch": 0.1752488907118088, "grad_norm": 1.3029683828353882, "learning_rate": 1.8915104928742484e-05, "loss": 0.5655, "step": 1175 }, { "epoch": 0.17539803870390394, "grad_norm": 1.2150758504867554, "learning_rate": 1.8912915113014918e-05, "loss": 0.6832, "step": 1176 }, { "epoch": 0.1755471866959991, "grad_norm": 1.3190016746520996, "learning_rate": 1.8910723216510168e-05, "loss": 0.752, "step": 1177 }, { "epoch": 0.17569633468809426, "grad_norm": 1.273057460784912, "learning_rate": 1.8908529239739946e-05, "loss": 0.5888, "step": 1178 }, { "epoch": 0.17584548268018943, "grad_norm": 1.162975549697876, "learning_rate": 1.8906333183216455e-05, "loss": 0.629, "step": 1179 }, { "epoch": 0.17599463067228457, "grad_norm": 1.3441118001937866, "learning_rate": 1.890413504745237e-05, "loss": 0.6794, "step": 1180 }, { "epoch": 0.17614377866437972, "grad_norm": 1.1924995183944702, "learning_rate": 1.890193483296087e-05, "loss": 0.6718, "step": 1181 }, { "epoch": 0.1762929266564749, "grad_norm": 1.2965853214263916, "learning_rate": 1.88997325402556e-05, "loss": 0.6899, "step": 1182 }, { "epoch": 0.17644207464857004, "grad_norm": 1.2131726741790771, "learning_rate": 1.8897528169850706e-05, "loss": 0.5692, "step": 1183 }, { "epoch": 0.1765912226406652, "grad_norm": 1.3027894496917725, "learning_rate": 1.8895321722260806e-05, "loss": 0.7187, "step": 1184 }, { "epoch": 0.17674037063276035, "grad_norm": 1.2953801155090332, "learning_rate": 1.8893113198001015e-05, "loss": 0.658, "step": 1185 }, { "epoch": 0.17688951862485552, "grad_norm": 1.2833045721054077, "learning_rate": 1.8890902597586926e-05, "loss": 0.6689, "step": 1186 }, { "epoch": 0.17703866661695067, "grad_norm": 1.327085256576538, "learning_rate": 1.8888689921534612e-05, "loss": 0.6898, "step": 1187 }, { "epoch": 0.1771878146090458, "grad_norm": 1.2384374141693115, "learning_rate": 1.8886475170360644e-05, "loss": 0.6207, "step": 1188 }, { "epoch": 0.17733696260114098, "grad_norm": 1.2570382356643677, "learning_rate": 1.888425834458207e-05, "loss": 0.6749, "step": 1189 }, { "epoch": 0.17748611059323613, "grad_norm": 1.2874305248260498, "learning_rate": 1.8882039444716417e-05, "loss": 0.6771, "step": 1190 }, { "epoch": 0.1776352585853313, "grad_norm": 1.17072594165802, "learning_rate": 1.8879818471281703e-05, "loss": 0.6345, "step": 1191 }, { "epoch": 0.17778440657742645, "grad_norm": 1.1973817348480225, "learning_rate": 1.8877595424796425e-05, "loss": 0.6164, "step": 1192 }, { "epoch": 0.17793355456952162, "grad_norm": 1.456977128982544, "learning_rate": 1.887537030577958e-05, "loss": 0.6948, "step": 1193 }, { "epoch": 0.17808270256161676, "grad_norm": 1.1592960357666016, "learning_rate": 1.887314311475062e-05, "loss": 0.6238, "step": 1194 }, { "epoch": 0.1782318505537119, "grad_norm": 1.2766308784484863, "learning_rate": 1.8870913852229513e-05, "loss": 0.5889, "step": 1195 }, { "epoch": 0.17838099854580708, "grad_norm": 1.33330500125885, "learning_rate": 1.886868251873668e-05, "loss": 0.7195, "step": 1196 }, { "epoch": 0.17853014653790222, "grad_norm": 1.2340140342712402, "learning_rate": 1.886644911479305e-05, "loss": 0.6943, "step": 1197 }, { "epoch": 0.1786792945299974, "grad_norm": 1.1250505447387695, "learning_rate": 1.8864213640920023e-05, "loss": 0.5683, "step": 1198 }, { "epoch": 0.17882844252209254, "grad_norm": 1.0277516841888428, "learning_rate": 1.886197609763948e-05, "loss": 0.6665, "step": 1199 }, { "epoch": 0.1789775905141877, "grad_norm": 1.22882878780365, "learning_rate": 1.88597364854738e-05, "loss": 0.6003, "step": 1200 }, { "epoch": 0.17912673850628286, "grad_norm": 1.240781307220459, "learning_rate": 1.8857494804945822e-05, "loss": 0.6447, "step": 1201 }, { "epoch": 0.17927588649837803, "grad_norm": 1.290142297744751, "learning_rate": 1.885525105657889e-05, "loss": 0.6642, "step": 1202 }, { "epoch": 0.17942503449047317, "grad_norm": 1.32684326171875, "learning_rate": 1.8853005240896818e-05, "loss": 0.7154, "step": 1203 }, { "epoch": 0.17957418248256832, "grad_norm": 1.3226685523986816, "learning_rate": 1.8850757358423907e-05, "loss": 0.6496, "step": 1204 }, { "epoch": 0.1797233304746635, "grad_norm": 0.8623102903366089, "learning_rate": 1.884850740968494e-05, "loss": 0.6279, "step": 1205 }, { "epoch": 0.17987247846675863, "grad_norm": 1.1237870454788208, "learning_rate": 1.884625539520518e-05, "loss": 0.6318, "step": 1206 }, { "epoch": 0.1800216264588538, "grad_norm": 1.4173915386199951, "learning_rate": 1.8844001315510375e-05, "loss": 0.6943, "step": 1207 }, { "epoch": 0.18017077445094895, "grad_norm": 1.368360161781311, "learning_rate": 1.8841745171126757e-05, "loss": 0.7045, "step": 1208 }, { "epoch": 0.18031992244304412, "grad_norm": 1.3219306468963623, "learning_rate": 1.8839486962581035e-05, "loss": 0.6413, "step": 1209 }, { "epoch": 0.18046907043513927, "grad_norm": 1.405928373336792, "learning_rate": 1.88372266904004e-05, "loss": 0.73, "step": 1210 }, { "epoch": 0.1806182184272344, "grad_norm": 0.9790107011795044, "learning_rate": 1.8834964355112532e-05, "loss": 0.6441, "step": 1211 }, { "epoch": 0.18076736641932958, "grad_norm": 1.3402612209320068, "learning_rate": 1.8832699957245585e-05, "loss": 0.6117, "step": 1212 }, { "epoch": 0.18091651441142473, "grad_norm": 1.381625771522522, "learning_rate": 1.8830433497328194e-05, "loss": 0.7055, "step": 1213 }, { "epoch": 0.1810656624035199, "grad_norm": 1.293513298034668, "learning_rate": 1.8828164975889486e-05, "loss": 0.7007, "step": 1214 }, { "epoch": 0.18121481039561504, "grad_norm": 1.3099662065505981, "learning_rate": 1.8825894393459058e-05, "loss": 0.6722, "step": 1215 }, { "epoch": 0.18136395838771022, "grad_norm": 1.2959606647491455, "learning_rate": 1.882362175056699e-05, "loss": 0.6562, "step": 1216 }, { "epoch": 0.18151310637980536, "grad_norm": 1.283426284790039, "learning_rate": 1.8821347047743846e-05, "loss": 0.622, "step": 1217 }, { "epoch": 0.1816622543719005, "grad_norm": 1.25002920627594, "learning_rate": 1.8819070285520673e-05, "loss": 0.6144, "step": 1218 }, { "epoch": 0.18181140236399568, "grad_norm": 0.9062471985816956, "learning_rate": 1.8816791464428993e-05, "loss": 0.6597, "step": 1219 }, { "epoch": 0.18196055035609082, "grad_norm": 1.228664755821228, "learning_rate": 1.8814510585000813e-05, "loss": 0.6473, "step": 1220 }, { "epoch": 0.182109698348186, "grad_norm": 1.1811703443527222, "learning_rate": 1.8812227647768616e-05, "loss": 0.6711, "step": 1221 }, { "epoch": 0.18225884634028114, "grad_norm": 1.1835057735443115, "learning_rate": 1.880994265326537e-05, "loss": 0.6301, "step": 1222 }, { "epoch": 0.1824079943323763, "grad_norm": 1.3634670972824097, "learning_rate": 1.8807655602024523e-05, "loss": 0.7316, "step": 1223 }, { "epoch": 0.18255714232447146, "grad_norm": 1.1439709663391113, "learning_rate": 1.8805366494580002e-05, "loss": 0.6138, "step": 1224 }, { "epoch": 0.1827062903165666, "grad_norm": 1.1977349519729614, "learning_rate": 1.880307533146621e-05, "loss": 0.6481, "step": 1225 }, { "epoch": 0.18285543830866177, "grad_norm": 1.4529204368591309, "learning_rate": 1.8800782113218038e-05, "loss": 0.6004, "step": 1226 }, { "epoch": 0.18300458630075692, "grad_norm": 1.3875341415405273, "learning_rate": 1.879848684037085e-05, "loss": 0.733, "step": 1227 }, { "epoch": 0.1831537342928521, "grad_norm": 1.2535732984542847, "learning_rate": 1.8796189513460495e-05, "loss": 0.7015, "step": 1228 }, { "epoch": 0.18330288228494723, "grad_norm": 1.2448205947875977, "learning_rate": 1.8793890133023295e-05, "loss": 0.6413, "step": 1229 }, { "epoch": 0.1834520302770424, "grad_norm": 1.2631036043167114, "learning_rate": 1.8791588699596057e-05, "loss": 0.6724, "step": 1230 }, { "epoch": 0.18360117826913755, "grad_norm": 1.3123592138290405, "learning_rate": 1.878928521371606e-05, "loss": 0.6929, "step": 1231 }, { "epoch": 0.18375032626123272, "grad_norm": 1.1540993452072144, "learning_rate": 1.878697967592108e-05, "loss": 0.6591, "step": 1232 }, { "epoch": 0.18389947425332787, "grad_norm": 1.3805643320083618, "learning_rate": 1.878467208674935e-05, "loss": 0.7189, "step": 1233 }, { "epoch": 0.184048622245423, "grad_norm": 1.2383253574371338, "learning_rate": 1.8782362446739594e-05, "loss": 0.7843, "step": 1234 }, { "epoch": 0.18419777023751818, "grad_norm": 1.1435985565185547, "learning_rate": 1.878005075643101e-05, "loss": 0.6396, "step": 1235 }, { "epoch": 0.18434691822961333, "grad_norm": 0.8718738555908203, "learning_rate": 1.877773701636328e-05, "loss": 0.6392, "step": 1236 }, { "epoch": 0.1844960662217085, "grad_norm": 1.3599838018417358, "learning_rate": 1.877542122707656e-05, "loss": 0.6875, "step": 1237 }, { "epoch": 0.18464521421380364, "grad_norm": 1.3243695497512817, "learning_rate": 1.8773103389111486e-05, "loss": 0.6087, "step": 1238 }, { "epoch": 0.18479436220589882, "grad_norm": 1.2169716358184814, "learning_rate": 1.8770783503009174e-05, "loss": 0.625, "step": 1239 }, { "epoch": 0.18494351019799396, "grad_norm": 1.2082395553588867, "learning_rate": 1.8768461569311215e-05, "loss": 0.6844, "step": 1240 }, { "epoch": 0.1850926581900891, "grad_norm": 1.274035930633545, "learning_rate": 1.8766137588559676e-05, "loss": 0.7038, "step": 1241 }, { "epoch": 0.18524180618218428, "grad_norm": 1.227041244506836, "learning_rate": 1.876381156129711e-05, "loss": 0.5923, "step": 1242 }, { "epoch": 0.18539095417427942, "grad_norm": 1.1181453466415405, "learning_rate": 1.876148348806654e-05, "loss": 0.6031, "step": 1243 }, { "epoch": 0.1855401021663746, "grad_norm": 1.3253579139709473, "learning_rate": 1.875915336941147e-05, "loss": 0.6154, "step": 1244 }, { "epoch": 0.18568925015846974, "grad_norm": 1.2323263883590698, "learning_rate": 1.8756821205875885e-05, "loss": 0.6668, "step": 1245 }, { "epoch": 0.1858383981505649, "grad_norm": 1.1987683773040771, "learning_rate": 1.875448699800424e-05, "loss": 0.6831, "step": 1246 }, { "epoch": 0.18598754614266005, "grad_norm": 1.2852225303649902, "learning_rate": 1.8752150746341468e-05, "loss": 0.6604, "step": 1247 }, { "epoch": 0.1861366941347552, "grad_norm": 1.1464412212371826, "learning_rate": 1.874981245143299e-05, "loss": 0.6501, "step": 1248 }, { "epoch": 0.18628584212685037, "grad_norm": 1.357778549194336, "learning_rate": 1.8747472113824687e-05, "loss": 0.6708, "step": 1249 }, { "epoch": 0.18643499011894551, "grad_norm": 1.347765326499939, "learning_rate": 1.8745129734062934e-05, "loss": 0.7731, "step": 1250 }, { "epoch": 0.1865841381110407, "grad_norm": 1.1788069009780884, "learning_rate": 1.8742785312694564e-05, "loss": 0.5919, "step": 1251 }, { "epoch": 0.18673328610313583, "grad_norm": 1.2953357696533203, "learning_rate": 1.8740438850266907e-05, "loss": 0.7066, "step": 1252 }, { "epoch": 0.186882434095231, "grad_norm": 1.1821653842926025, "learning_rate": 1.873809034732776e-05, "loss": 0.5357, "step": 1253 }, { "epoch": 0.18703158208732615, "grad_norm": 1.223190426826477, "learning_rate": 1.873573980442539e-05, "loss": 0.6719, "step": 1254 }, { "epoch": 0.1871807300794213, "grad_norm": 1.205953598022461, "learning_rate": 1.8733387222108546e-05, "loss": 0.6363, "step": 1255 }, { "epoch": 0.18732987807151646, "grad_norm": 1.3508793115615845, "learning_rate": 1.873103260092646e-05, "loss": 0.7173, "step": 1256 }, { "epoch": 0.1874790260636116, "grad_norm": 1.4176173210144043, "learning_rate": 1.8728675941428827e-05, "loss": 0.7246, "step": 1257 }, { "epoch": 0.18762817405570678, "grad_norm": 1.2368173599243164, "learning_rate": 1.872631724416583e-05, "loss": 0.6431, "step": 1258 }, { "epoch": 0.18777732204780193, "grad_norm": 1.131060004234314, "learning_rate": 1.8723956509688115e-05, "loss": 0.6627, "step": 1259 }, { "epoch": 0.1879264700398971, "grad_norm": 1.5161124467849731, "learning_rate": 1.8721593738546815e-05, "loss": 0.7257, "step": 1260 }, { "epoch": 0.18807561803199224, "grad_norm": 1.2149955034255981, "learning_rate": 1.8719228931293537e-05, "loss": 0.6211, "step": 1261 }, { "epoch": 0.18822476602408741, "grad_norm": 1.3826031684875488, "learning_rate": 1.8716862088480353e-05, "loss": 0.7404, "step": 1262 }, { "epoch": 0.18837391401618256, "grad_norm": 1.2875319719314575, "learning_rate": 1.8714493210659824e-05, "loss": 0.6218, "step": 1263 }, { "epoch": 0.1885230620082777, "grad_norm": 1.2624551057815552, "learning_rate": 1.8712122298384977e-05, "loss": 0.6621, "step": 1264 }, { "epoch": 0.18867221000037288, "grad_norm": 1.2557015419006348, "learning_rate": 1.8709749352209315e-05, "loss": 0.6683, "step": 1265 }, { "epoch": 0.18882135799246802, "grad_norm": 1.2865291833877563, "learning_rate": 1.8707374372686825e-05, "loss": 0.7123, "step": 1266 }, { "epoch": 0.1889705059845632, "grad_norm": 0.915653645992279, "learning_rate": 1.870499736037195e-05, "loss": 0.6663, "step": 1267 }, { "epoch": 0.18911965397665834, "grad_norm": 1.2908941507339478, "learning_rate": 1.8702618315819628e-05, "loss": 0.7311, "step": 1268 }, { "epoch": 0.1892688019687535, "grad_norm": 1.195971965789795, "learning_rate": 1.8700237239585253e-05, "loss": 0.6069, "step": 1269 }, { "epoch": 0.18941794996084865, "grad_norm": 1.2524992227554321, "learning_rate": 1.8697854132224713e-05, "loss": 0.6393, "step": 1270 }, { "epoch": 0.1895670979529438, "grad_norm": 1.1754839420318604, "learning_rate": 1.8695468994294355e-05, "loss": 0.6521, "step": 1271 }, { "epoch": 0.18971624594503897, "grad_norm": 1.2767118215560913, "learning_rate": 1.8693081826351002e-05, "loss": 0.5821, "step": 1272 }, { "epoch": 0.1898653939371341, "grad_norm": 1.2981126308441162, "learning_rate": 1.869069262895196e-05, "loss": 0.625, "step": 1273 }, { "epoch": 0.19001454192922929, "grad_norm": 1.2321557998657227, "learning_rate": 1.8688301402654995e-05, "loss": 0.6104, "step": 1274 }, { "epoch": 0.19016368992132443, "grad_norm": 1.2929730415344238, "learning_rate": 1.8685908148018362e-05, "loss": 0.6071, "step": 1275 }, { "epoch": 0.1903128379134196, "grad_norm": 1.3465652465820312, "learning_rate": 1.868351286560077e-05, "loss": 0.7961, "step": 1276 }, { "epoch": 0.19046198590551475, "grad_norm": 1.188463568687439, "learning_rate": 1.868111555596143e-05, "loss": 0.646, "step": 1277 }, { "epoch": 0.1906111338976099, "grad_norm": 1.456647276878357, "learning_rate": 1.8678716219659992e-05, "loss": 0.731, "step": 1278 }, { "epoch": 0.19076028188970506, "grad_norm": 1.1935629844665527, "learning_rate": 1.867631485725661e-05, "loss": 0.5779, "step": 1279 }, { "epoch": 0.1909094298818002, "grad_norm": 1.3039791584014893, "learning_rate": 1.867391146931189e-05, "loss": 0.6533, "step": 1280 }, { "epoch": 0.19105857787389538, "grad_norm": 1.258102536201477, "learning_rate": 1.8671506056386918e-05, "loss": 0.6478, "step": 1281 }, { "epoch": 0.19120772586599052, "grad_norm": 1.256650686264038, "learning_rate": 1.866909861904326e-05, "loss": 0.7597, "step": 1282 }, { "epoch": 0.1913568738580857, "grad_norm": 0.8550626635551453, "learning_rate": 1.8666689157842935e-05, "loss": 0.6253, "step": 1283 }, { "epoch": 0.19150602185018084, "grad_norm": 1.1962740421295166, "learning_rate": 1.8664277673348463e-05, "loss": 0.6519, "step": 1284 }, { "epoch": 0.19165516984227599, "grad_norm": 1.1922811269760132, "learning_rate": 1.866186416612281e-05, "loss": 0.674, "step": 1285 }, { "epoch": 0.19180431783437116, "grad_norm": 1.1486196517944336, "learning_rate": 1.8659448636729426e-05, "loss": 0.6199, "step": 1286 }, { "epoch": 0.1919534658264663, "grad_norm": 1.1846733093261719, "learning_rate": 1.865703108573223e-05, "loss": 0.5832, "step": 1287 }, { "epoch": 0.19210261381856147, "grad_norm": 1.3946806192398071, "learning_rate": 1.8654611513695622e-05, "loss": 0.7349, "step": 1288 }, { "epoch": 0.19225176181065662, "grad_norm": 1.247755765914917, "learning_rate": 1.8652189921184462e-05, "loss": 0.7292, "step": 1289 }, { "epoch": 0.1924009098027518, "grad_norm": 1.3507592678070068, "learning_rate": 1.8649766308764085e-05, "loss": 0.6495, "step": 1290 }, { "epoch": 0.19255005779484693, "grad_norm": 1.2331656217575073, "learning_rate": 1.8647340677000302e-05, "loss": 0.6004, "step": 1291 }, { "epoch": 0.1926992057869421, "grad_norm": 0.962810754776001, "learning_rate": 1.864491302645939e-05, "loss": 0.6563, "step": 1292 }, { "epoch": 0.19284835377903725, "grad_norm": 1.2304083108901978, "learning_rate": 1.8642483357708102e-05, "loss": 0.67, "step": 1293 }, { "epoch": 0.1929975017711324, "grad_norm": 1.2344094514846802, "learning_rate": 1.8640051671313656e-05, "loss": 0.7167, "step": 1294 }, { "epoch": 0.19314664976322757, "grad_norm": 1.2754974365234375, "learning_rate": 1.8637617967843748e-05, "loss": 0.7139, "step": 1295 }, { "epoch": 0.1932957977553227, "grad_norm": 1.138656497001648, "learning_rate": 1.8635182247866545e-05, "loss": 0.6713, "step": 1296 }, { "epoch": 0.19344494574741788, "grad_norm": 1.1317057609558105, "learning_rate": 1.863274451195067e-05, "loss": 0.6092, "step": 1297 }, { "epoch": 0.19359409373951303, "grad_norm": 1.3214174509048462, "learning_rate": 1.8630304760665237e-05, "loss": 0.6918, "step": 1298 }, { "epoch": 0.1937432417316082, "grad_norm": 1.188106656074524, "learning_rate": 1.8627862994579823e-05, "loss": 0.6351, "step": 1299 }, { "epoch": 0.19389238972370335, "grad_norm": 0.8883368372917175, "learning_rate": 1.862541921426447e-05, "loss": 0.6781, "step": 1300 }, { "epoch": 0.1940415377157985, "grad_norm": 1.2200433015823364, "learning_rate": 1.8622973420289692e-05, "loss": 0.6542, "step": 1301 }, { "epoch": 0.19419068570789366, "grad_norm": 1.3861056566238403, "learning_rate": 1.862052561322648e-05, "loss": 0.7306, "step": 1302 }, { "epoch": 0.1943398336999888, "grad_norm": 1.1913501024246216, "learning_rate": 1.8618075793646292e-05, "loss": 0.6579, "step": 1303 }, { "epoch": 0.19448898169208398, "grad_norm": 1.2580019235610962, "learning_rate": 1.8615623962121043e-05, "loss": 0.754, "step": 1304 }, { "epoch": 0.19463812968417912, "grad_norm": 1.1542595624923706, "learning_rate": 1.861317011922314e-05, "loss": 0.593, "step": 1305 }, { "epoch": 0.1947872776762743, "grad_norm": 1.2725908756256104, "learning_rate": 1.861071426552545e-05, "loss": 0.6838, "step": 1306 }, { "epoch": 0.19493642566836944, "grad_norm": 1.1621066331863403, "learning_rate": 1.8608256401601294e-05, "loss": 0.6495, "step": 1307 }, { "epoch": 0.19508557366046458, "grad_norm": 1.2553623914718628, "learning_rate": 1.860579652802449e-05, "loss": 0.758, "step": 1308 }, { "epoch": 0.19523472165255976, "grad_norm": 1.4298962354660034, "learning_rate": 1.8603334645369302e-05, "loss": 0.7452, "step": 1309 }, { "epoch": 0.1953838696446549, "grad_norm": 1.094214677810669, "learning_rate": 1.8600870754210477e-05, "loss": 0.622, "step": 1310 }, { "epoch": 0.19553301763675007, "grad_norm": 1.2186905145645142, "learning_rate": 1.859840485512323e-05, "loss": 0.662, "step": 1311 }, { "epoch": 0.19568216562884522, "grad_norm": 1.2142183780670166, "learning_rate": 1.8595936948683234e-05, "loss": 0.6499, "step": 1312 }, { "epoch": 0.1958313136209404, "grad_norm": 1.2059392929077148, "learning_rate": 1.8593467035466635e-05, "loss": 0.675, "step": 1313 }, { "epoch": 0.19598046161303553, "grad_norm": 1.257549524307251, "learning_rate": 1.859099511605006e-05, "loss": 0.6798, "step": 1314 }, { "epoch": 0.19612960960513068, "grad_norm": 1.2369314432144165, "learning_rate": 1.8588521191010586e-05, "loss": 0.6663, "step": 1315 }, { "epoch": 0.19627875759722585, "grad_norm": 1.3046118021011353, "learning_rate": 1.8586045260925773e-05, "loss": 0.7144, "step": 1316 }, { "epoch": 0.196427905589321, "grad_norm": 1.3991259336471558, "learning_rate": 1.858356732637364e-05, "loss": 0.6622, "step": 1317 }, { "epoch": 0.19657705358141617, "grad_norm": 1.1855661869049072, "learning_rate": 1.8581087387932676e-05, "loss": 0.6255, "step": 1318 }, { "epoch": 0.1967262015735113, "grad_norm": 1.2207093238830566, "learning_rate": 1.8578605446181838e-05, "loss": 0.6013, "step": 1319 }, { "epoch": 0.19687534956560648, "grad_norm": 1.2570966482162476, "learning_rate": 1.8576121501700553e-05, "loss": 0.6111, "step": 1320 }, { "epoch": 0.19702449755770163, "grad_norm": 1.2286419868469238, "learning_rate": 1.857363555506871e-05, "loss": 0.6138, "step": 1321 }, { "epoch": 0.19717364554979677, "grad_norm": 1.297150731086731, "learning_rate": 1.8571147606866677e-05, "loss": 0.7098, "step": 1322 }, { "epoch": 0.19732279354189194, "grad_norm": 1.2984563112258911, "learning_rate": 1.8568657657675272e-05, "loss": 0.668, "step": 1323 }, { "epoch": 0.1974719415339871, "grad_norm": 1.3900433778762817, "learning_rate": 1.85661657080758e-05, "loss": 0.7269, "step": 1324 }, { "epoch": 0.19762108952608226, "grad_norm": 1.183046579360962, "learning_rate": 1.8563671758650013e-05, "loss": 0.6715, "step": 1325 }, { "epoch": 0.1977702375181774, "grad_norm": 1.2472126483917236, "learning_rate": 1.8561175809980144e-05, "loss": 0.6495, "step": 1326 }, { "epoch": 0.19791938551027258, "grad_norm": 0.9567558169364929, "learning_rate": 1.8558677862648887e-05, "loss": 0.6883, "step": 1327 }, { "epoch": 0.19806853350236772, "grad_norm": 1.2453522682189941, "learning_rate": 1.8556177917239406e-05, "loss": 0.6132, "step": 1328 }, { "epoch": 0.1982176814944629, "grad_norm": 1.2495241165161133, "learning_rate": 1.8553675974335328e-05, "loss": 0.6739, "step": 1329 }, { "epoch": 0.19836682948655804, "grad_norm": 1.3261890411376953, "learning_rate": 1.8551172034520746e-05, "loss": 0.691, "step": 1330 }, { "epoch": 0.19851597747865318, "grad_norm": 1.183061957359314, "learning_rate": 1.854866609838022e-05, "loss": 0.6601, "step": 1331 }, { "epoch": 0.19866512547074835, "grad_norm": 0.9166485071182251, "learning_rate": 1.8546158166498783e-05, "loss": 0.6682, "step": 1332 }, { "epoch": 0.1988142734628435, "grad_norm": 1.1780014038085938, "learning_rate": 1.854364823946192e-05, "loss": 0.6495, "step": 1333 }, { "epoch": 0.19896342145493867, "grad_norm": 1.3327997922897339, "learning_rate": 1.8541136317855598e-05, "loss": 0.721, "step": 1334 }, { "epoch": 0.19911256944703382, "grad_norm": 1.3039010763168335, "learning_rate": 1.8538622402266232e-05, "loss": 0.7311, "step": 1335 }, { "epoch": 0.199261717439129, "grad_norm": 1.1980865001678467, "learning_rate": 1.853610649328072e-05, "loss": 0.6615, "step": 1336 }, { "epoch": 0.19941086543122413, "grad_norm": 1.1713671684265137, "learning_rate": 1.853358859148641e-05, "loss": 0.7104, "step": 1337 }, { "epoch": 0.19956001342331928, "grad_norm": 1.372649073600769, "learning_rate": 1.8531068697471125e-05, "loss": 0.6502, "step": 1338 }, { "epoch": 0.19970916141541445, "grad_norm": 1.1419477462768555, "learning_rate": 1.8528546811823156e-05, "loss": 0.537, "step": 1339 }, { "epoch": 0.1998583094075096, "grad_norm": 1.1521636247634888, "learning_rate": 1.8526022935131244e-05, "loss": 0.6788, "step": 1340 }, { "epoch": 0.20000745739960477, "grad_norm": 1.273037075996399, "learning_rate": 1.852349706798461e-05, "loss": 0.7021, "step": 1341 }, { "epoch": 0.2001566053916999, "grad_norm": 1.0940316915512085, "learning_rate": 1.8520969210972932e-05, "loss": 0.5971, "step": 1342 }, { "epoch": 0.20030575338379508, "grad_norm": 1.2608495950698853, "learning_rate": 1.8518439364686358e-05, "loss": 0.7169, "step": 1343 }, { "epoch": 0.20045490137589023, "grad_norm": 1.1679201126098633, "learning_rate": 1.8515907529715492e-05, "loss": 0.6384, "step": 1344 }, { "epoch": 0.20060404936798537, "grad_norm": 1.092212200164795, "learning_rate": 1.8513373706651406e-05, "loss": 0.6505, "step": 1345 }, { "epoch": 0.20075319736008054, "grad_norm": 1.2613786458969116, "learning_rate": 1.8510837896085642e-05, "loss": 0.6759, "step": 1346 }, { "epoch": 0.2009023453521757, "grad_norm": 1.3638575077056885, "learning_rate": 1.85083000986102e-05, "loss": 0.6978, "step": 1347 }, { "epoch": 0.20105149334427086, "grad_norm": 1.1635503768920898, "learning_rate": 1.8505760314817544e-05, "loss": 0.6879, "step": 1348 }, { "epoch": 0.201200641336366, "grad_norm": 1.2587816715240479, "learning_rate": 1.8503218545300603e-05, "loss": 0.6028, "step": 1349 }, { "epoch": 0.20134978932846118, "grad_norm": 1.4151403903961182, "learning_rate": 1.850067479065277e-05, "loss": 0.6929, "step": 1350 }, { "epoch": 0.20149893732055632, "grad_norm": 1.2400846481323242, "learning_rate": 1.84981290514679e-05, "loss": 0.6987, "step": 1351 }, { "epoch": 0.20164808531265146, "grad_norm": 1.1730631589889526, "learning_rate": 1.8495581328340315e-05, "loss": 0.6597, "step": 1352 }, { "epoch": 0.20179723330474664, "grad_norm": 1.1204549074172974, "learning_rate": 1.8493031621864792e-05, "loss": 0.6622, "step": 1353 }, { "epoch": 0.20194638129684178, "grad_norm": 1.3051378726959229, "learning_rate": 1.849047993263658e-05, "loss": 0.6919, "step": 1354 }, { "epoch": 0.20209552928893695, "grad_norm": 1.3176286220550537, "learning_rate": 1.8487926261251386e-05, "loss": 0.6674, "step": 1355 }, { "epoch": 0.2022446772810321, "grad_norm": 1.2007055282592773, "learning_rate": 1.8485370608305384e-05, "loss": 0.6057, "step": 1356 }, { "epoch": 0.20239382527312727, "grad_norm": 1.1351161003112793, "learning_rate": 1.8482812974395205e-05, "loss": 0.6307, "step": 1357 }, { "epoch": 0.20254297326522241, "grad_norm": 1.1887325048446655, "learning_rate": 1.848025336011794e-05, "loss": 0.6667, "step": 1358 }, { "epoch": 0.2026921212573176, "grad_norm": 1.3913487195968628, "learning_rate": 1.8477691766071156e-05, "loss": 0.7095, "step": 1359 }, { "epoch": 0.20284126924941273, "grad_norm": 1.4137318134307861, "learning_rate": 1.847512819285287e-05, "loss": 0.7218, "step": 1360 }, { "epoch": 0.20299041724150788, "grad_norm": 1.2400778532028198, "learning_rate": 1.8472562641061564e-05, "loss": 0.6121, "step": 1361 }, { "epoch": 0.20313956523360305, "grad_norm": 1.2147746086120605, "learning_rate": 1.8469995111296183e-05, "loss": 0.6177, "step": 1362 }, { "epoch": 0.2032887132256982, "grad_norm": 1.185765027999878, "learning_rate": 1.8467425604156133e-05, "loss": 0.7412, "step": 1363 }, { "epoch": 0.20343786121779336, "grad_norm": 1.4098436832427979, "learning_rate": 1.846485412024128e-05, "loss": 0.6799, "step": 1364 }, { "epoch": 0.2035870092098885, "grad_norm": 1.2702680826187134, "learning_rate": 1.8462280660151963e-05, "loss": 0.6304, "step": 1365 }, { "epoch": 0.20373615720198368, "grad_norm": 1.1970350742340088, "learning_rate": 1.8459705224488958e-05, "loss": 0.6784, "step": 1366 }, { "epoch": 0.20388530519407883, "grad_norm": 1.1990994215011597, "learning_rate": 1.845712781385353e-05, "loss": 0.6081, "step": 1367 }, { "epoch": 0.20403445318617397, "grad_norm": 1.3000037670135498, "learning_rate": 1.8454548428847383e-05, "loss": 0.6652, "step": 1368 }, { "epoch": 0.20418360117826914, "grad_norm": 1.1904083490371704, "learning_rate": 1.8451967070072693e-05, "loss": 0.6361, "step": 1369 }, { "epoch": 0.2043327491703643, "grad_norm": 1.4293203353881836, "learning_rate": 1.84493837381321e-05, "loss": 0.6703, "step": 1370 }, { "epoch": 0.20448189716245946, "grad_norm": 1.2332234382629395, "learning_rate": 1.844679843362869e-05, "loss": 0.5801, "step": 1371 }, { "epoch": 0.2046310451545546, "grad_norm": 1.293208122253418, "learning_rate": 1.844421115716603e-05, "loss": 0.6552, "step": 1372 }, { "epoch": 0.20478019314664977, "grad_norm": 1.260692834854126, "learning_rate": 1.8441621909348132e-05, "loss": 0.7204, "step": 1373 }, { "epoch": 0.20492934113874492, "grad_norm": 1.1903538703918457, "learning_rate": 1.8439030690779468e-05, "loss": 0.7121, "step": 1374 }, { "epoch": 0.20507848913084006, "grad_norm": 1.1062513589859009, "learning_rate": 1.8436437502064976e-05, "loss": 0.6295, "step": 1375 }, { "epoch": 0.20522763712293524, "grad_norm": 1.323517918586731, "learning_rate": 1.8433842343810058e-05, "loss": 0.6506, "step": 1376 }, { "epoch": 0.20537678511503038, "grad_norm": 1.4293711185455322, "learning_rate": 1.8431245216620562e-05, "loss": 0.7381, "step": 1377 }, { "epoch": 0.20552593310712555, "grad_norm": 1.324733018875122, "learning_rate": 1.8428646121102815e-05, "loss": 0.619, "step": 1378 }, { "epoch": 0.2056750810992207, "grad_norm": 1.2142924070358276, "learning_rate": 1.8426045057863585e-05, "loss": 0.6034, "step": 1379 }, { "epoch": 0.20582422909131587, "grad_norm": 1.1989132165908813, "learning_rate": 1.8423442027510104e-05, "loss": 0.6253, "step": 1380 }, { "epoch": 0.205973377083411, "grad_norm": 1.1932308673858643, "learning_rate": 1.8420837030650073e-05, "loss": 0.6986, "step": 1381 }, { "epoch": 0.20612252507550616, "grad_norm": 1.2196433544158936, "learning_rate": 1.8418230067891644e-05, "loss": 0.6325, "step": 1382 }, { "epoch": 0.20627167306760133, "grad_norm": 1.276810646057129, "learning_rate": 1.8415621139843426e-05, "loss": 0.7124, "step": 1383 }, { "epoch": 0.20642082105969647, "grad_norm": 1.1931428909301758, "learning_rate": 1.8413010247114492e-05, "loss": 0.6444, "step": 1384 }, { "epoch": 0.20656996905179165, "grad_norm": 1.1451082229614258, "learning_rate": 1.841039739031437e-05, "loss": 0.5959, "step": 1385 }, { "epoch": 0.2067191170438868, "grad_norm": 1.1998062133789062, "learning_rate": 1.840778257005305e-05, "loss": 0.6596, "step": 1386 }, { "epoch": 0.20686826503598196, "grad_norm": 1.2076616287231445, "learning_rate": 1.8405165786940976e-05, "loss": 0.7158, "step": 1387 }, { "epoch": 0.2070174130280771, "grad_norm": 1.3531197309494019, "learning_rate": 1.8402547041589057e-05, "loss": 0.7319, "step": 1388 }, { "epoch": 0.20716656102017228, "grad_norm": 1.2820454835891724, "learning_rate": 1.8399926334608654e-05, "loss": 0.6633, "step": 1389 }, { "epoch": 0.20731570901226742, "grad_norm": 1.3683836460113525, "learning_rate": 1.8397303666611588e-05, "loss": 0.7041, "step": 1390 }, { "epoch": 0.20746485700436257, "grad_norm": 1.2800101041793823, "learning_rate": 1.839467903821014e-05, "loss": 0.6454, "step": 1391 }, { "epoch": 0.20761400499645774, "grad_norm": 1.4214273691177368, "learning_rate": 1.8392052450017036e-05, "loss": 0.7151, "step": 1392 }, { "epoch": 0.20776315298855288, "grad_norm": 1.3491350412368774, "learning_rate": 1.838942390264548e-05, "loss": 0.6156, "step": 1393 }, { "epoch": 0.20791230098064806, "grad_norm": 1.2230280637741089, "learning_rate": 1.8386793396709123e-05, "loss": 0.699, "step": 1394 }, { "epoch": 0.2080614489727432, "grad_norm": 1.2768402099609375, "learning_rate": 1.838416093282207e-05, "loss": 0.6641, "step": 1395 }, { "epoch": 0.20821059696483837, "grad_norm": 1.3270107507705688, "learning_rate": 1.838152651159889e-05, "loss": 0.6539, "step": 1396 }, { "epoch": 0.20835974495693352, "grad_norm": 1.2856131792068481, "learning_rate": 1.83788901336546e-05, "loss": 0.682, "step": 1397 }, { "epoch": 0.20850889294902866, "grad_norm": 1.2248469591140747, "learning_rate": 1.8376251799604684e-05, "loss": 0.7423, "step": 1398 }, { "epoch": 0.20865804094112383, "grad_norm": 1.0862212181091309, "learning_rate": 1.8373611510065077e-05, "loss": 0.5947, "step": 1399 }, { "epoch": 0.20880718893321898, "grad_norm": 1.2741128206253052, "learning_rate": 1.837096926565217e-05, "loss": 0.6409, "step": 1400 }, { "epoch": 0.20895633692531415, "grad_norm": 1.1456025838851929, "learning_rate": 1.8368325066982817e-05, "loss": 0.7023, "step": 1401 }, { "epoch": 0.2091054849174093, "grad_norm": 1.1300121545791626, "learning_rate": 1.836567891467431e-05, "loss": 0.6791, "step": 1402 }, { "epoch": 0.20925463290950447, "grad_norm": 1.4069874286651611, "learning_rate": 1.8363030809344425e-05, "loss": 0.6732, "step": 1403 }, { "epoch": 0.2094037809015996, "grad_norm": 1.264992356300354, "learning_rate": 1.8360380751611375e-05, "loss": 0.7253, "step": 1404 }, { "epoch": 0.20955292889369476, "grad_norm": 1.260876178741455, "learning_rate": 1.8357728742093827e-05, "loss": 0.6457, "step": 1405 }, { "epoch": 0.20970207688578993, "grad_norm": 1.269580602645874, "learning_rate": 1.8355074781410918e-05, "loss": 0.701, "step": 1406 }, { "epoch": 0.20985122487788507, "grad_norm": 0.9341393113136292, "learning_rate": 1.835241887018223e-05, "loss": 0.7012, "step": 1407 }, { "epoch": 0.21000037286998025, "grad_norm": 1.1515933275222778, "learning_rate": 1.8349761009027794e-05, "loss": 0.6107, "step": 1408 }, { "epoch": 0.2101495208620754, "grad_norm": 1.1052286624908447, "learning_rate": 1.8347101198568116e-05, "loss": 0.5848, "step": 1409 }, { "epoch": 0.21029866885417056, "grad_norm": 1.2097163200378418, "learning_rate": 1.8344439439424142e-05, "loss": 0.6702, "step": 1410 }, { "epoch": 0.2104478168462657, "grad_norm": 1.1457406282424927, "learning_rate": 1.8341775732217275e-05, "loss": 0.611, "step": 1411 }, { "epoch": 0.21059696483836085, "grad_norm": 1.3098134994506836, "learning_rate": 1.8339110077569376e-05, "loss": 0.6768, "step": 1412 }, { "epoch": 0.21074611283045602, "grad_norm": 1.3204673528671265, "learning_rate": 1.8336442476102757e-05, "loss": 0.6847, "step": 1413 }, { "epoch": 0.21089526082255117, "grad_norm": 0.876653790473938, "learning_rate": 1.8333772928440187e-05, "loss": 0.6235, "step": 1414 }, { "epoch": 0.21104440881464634, "grad_norm": 1.2857518196105957, "learning_rate": 1.8331101435204896e-05, "loss": 0.6551, "step": 1415 }, { "epoch": 0.21119355680674148, "grad_norm": 1.3104336261749268, "learning_rate": 1.832842799702055e-05, "loss": 0.7651, "step": 1416 }, { "epoch": 0.21134270479883666, "grad_norm": 1.2191275358200073, "learning_rate": 1.832575261451129e-05, "loss": 0.7358, "step": 1417 }, { "epoch": 0.2114918527909318, "grad_norm": 1.193668246269226, "learning_rate": 1.8323075288301693e-05, "loss": 0.6027, "step": 1418 }, { "epoch": 0.21164100078302694, "grad_norm": 1.2305941581726074, "learning_rate": 1.8320396019016805e-05, "loss": 0.6622, "step": 1419 }, { "epoch": 0.21179014877512212, "grad_norm": 1.1798638105392456, "learning_rate": 1.8317714807282115e-05, "loss": 0.6504, "step": 1420 }, { "epoch": 0.21193929676721726, "grad_norm": 1.156719446182251, "learning_rate": 1.831503165372357e-05, "loss": 0.6883, "step": 1421 }, { "epoch": 0.21208844475931243, "grad_norm": 1.2019075155258179, "learning_rate": 1.831234655896757e-05, "loss": 0.6618, "step": 1422 }, { "epoch": 0.21223759275140758, "grad_norm": 1.290226697921753, "learning_rate": 1.8309659523640965e-05, "loss": 0.6949, "step": 1423 }, { "epoch": 0.21238674074350275, "grad_norm": 1.258439064025879, "learning_rate": 1.8306970548371062e-05, "loss": 0.7372, "step": 1424 }, { "epoch": 0.2125358887355979, "grad_norm": 1.0936964750289917, "learning_rate": 1.830427963378562e-05, "loss": 0.5916, "step": 1425 }, { "epoch": 0.21268503672769307, "grad_norm": 1.2285113334655762, "learning_rate": 1.830158678051285e-05, "loss": 0.6693, "step": 1426 }, { "epoch": 0.2128341847197882, "grad_norm": 1.2015728950500488, "learning_rate": 1.8298891989181417e-05, "loss": 0.6645, "step": 1427 }, { "epoch": 0.21298333271188336, "grad_norm": 1.3523821830749512, "learning_rate": 1.8296195260420438e-05, "loss": 0.6446, "step": 1428 }, { "epoch": 0.21313248070397853, "grad_norm": 1.2347209453582764, "learning_rate": 1.8293496594859478e-05, "loss": 0.796, "step": 1429 }, { "epoch": 0.21328162869607367, "grad_norm": 1.16396963596344, "learning_rate": 1.829079599312856e-05, "loss": 0.6127, "step": 1430 }, { "epoch": 0.21343077668816884, "grad_norm": 1.2499302625656128, "learning_rate": 1.828809345585816e-05, "loss": 0.6502, "step": 1431 }, { "epoch": 0.213579924680264, "grad_norm": 1.1687771081924438, "learning_rate": 1.8285388983679192e-05, "loss": 0.7031, "step": 1432 }, { "epoch": 0.21372907267235916, "grad_norm": 1.1917413473129272, "learning_rate": 1.8282682577223044e-05, "loss": 0.6811, "step": 1433 }, { "epoch": 0.2138782206644543, "grad_norm": 0.8736386895179749, "learning_rate": 1.8279974237121537e-05, "loss": 0.6462, "step": 1434 }, { "epoch": 0.21402736865654945, "grad_norm": 1.2580533027648926, "learning_rate": 1.8277263964006958e-05, "loss": 0.672, "step": 1435 }, { "epoch": 0.21417651664864462, "grad_norm": 1.2917382717132568, "learning_rate": 1.8274551758512026e-05, "loss": 0.7136, "step": 1436 }, { "epoch": 0.21432566464073977, "grad_norm": 1.202175498008728, "learning_rate": 1.8271837621269933e-05, "loss": 0.6924, "step": 1437 }, { "epoch": 0.21447481263283494, "grad_norm": 1.0752456188201904, "learning_rate": 1.8269121552914307e-05, "loss": 0.5308, "step": 1438 }, { "epoch": 0.21462396062493008, "grad_norm": 1.223292589187622, "learning_rate": 1.8266403554079237e-05, "loss": 0.6499, "step": 1439 }, { "epoch": 0.21477310861702525, "grad_norm": 0.9763250946998596, "learning_rate": 1.8263683625399244e-05, "loss": 0.6986, "step": 1440 }, { "epoch": 0.2149222566091204, "grad_norm": 1.3701043128967285, "learning_rate": 1.826096176750933e-05, "loss": 0.7291, "step": 1441 }, { "epoch": 0.21507140460121554, "grad_norm": 1.2797218561172485, "learning_rate": 1.8258237981044915e-05, "loss": 0.6533, "step": 1442 }, { "epoch": 0.21522055259331072, "grad_norm": 1.2760350704193115, "learning_rate": 1.8255512266641894e-05, "loss": 0.6349, "step": 1443 }, { "epoch": 0.21536970058540586, "grad_norm": 1.2005974054336548, "learning_rate": 1.82527846249366e-05, "loss": 0.6144, "step": 1444 }, { "epoch": 0.21551884857750103, "grad_norm": 1.215786337852478, "learning_rate": 1.825005505656582e-05, "loss": 0.6612, "step": 1445 }, { "epoch": 0.21566799656959618, "grad_norm": 1.3942633867263794, "learning_rate": 1.8247323562166785e-05, "loss": 0.7877, "step": 1446 }, { "epoch": 0.21581714456169135, "grad_norm": 1.2621136903762817, "learning_rate": 1.8244590142377183e-05, "loss": 0.6656, "step": 1447 }, { "epoch": 0.2159662925537865, "grad_norm": 1.283348560333252, "learning_rate": 1.824185479783515e-05, "loss": 0.6573, "step": 1448 }, { "epoch": 0.21611544054588164, "grad_norm": 1.2444207668304443, "learning_rate": 1.8239117529179263e-05, "loss": 0.6967, "step": 1449 }, { "epoch": 0.2162645885379768, "grad_norm": 1.2032597064971924, "learning_rate": 1.8236378337048562e-05, "loss": 0.6549, "step": 1450 }, { "epoch": 0.21641373653007195, "grad_norm": 1.234570026397705, "learning_rate": 1.8233637222082524e-05, "loss": 0.6348, "step": 1451 }, { "epoch": 0.21656288452216713, "grad_norm": 1.1828887462615967, "learning_rate": 1.823089418492108e-05, "loss": 0.6045, "step": 1452 }, { "epoch": 0.21671203251426227, "grad_norm": 0.9443606734275818, "learning_rate": 1.8228149226204617e-05, "loss": 0.6856, "step": 1453 }, { "epoch": 0.21686118050635744, "grad_norm": 1.3937400579452515, "learning_rate": 1.8225402346573958e-05, "loss": 0.6217, "step": 1454 }, { "epoch": 0.2170103284984526, "grad_norm": 1.2764183282852173, "learning_rate": 1.8222653546670377e-05, "loss": 0.7187, "step": 1455 }, { "epoch": 0.21715947649054776, "grad_norm": 0.9112249612808228, "learning_rate": 1.82199028271356e-05, "loss": 0.6585, "step": 1456 }, { "epoch": 0.2173086244826429, "grad_norm": 1.341169834136963, "learning_rate": 1.8217150188611807e-05, "loss": 0.7507, "step": 1457 }, { "epoch": 0.21745777247473805, "grad_norm": 1.2811932563781738, "learning_rate": 1.821439563174161e-05, "loss": 0.5751, "step": 1458 }, { "epoch": 0.21760692046683322, "grad_norm": 1.2685012817382812, "learning_rate": 1.8211639157168082e-05, "loss": 0.657, "step": 1459 }, { "epoch": 0.21775606845892836, "grad_norm": 1.2673906087875366, "learning_rate": 1.820888076553474e-05, "loss": 0.6341, "step": 1460 }, { "epoch": 0.21790521645102354, "grad_norm": 1.1899490356445312, "learning_rate": 1.820612045748555e-05, "loss": 0.642, "step": 1461 }, { "epoch": 0.21805436444311868, "grad_norm": 1.1698509454727173, "learning_rate": 1.8203358233664915e-05, "loss": 0.65, "step": 1462 }, { "epoch": 0.21820351243521385, "grad_norm": 1.2526006698608398, "learning_rate": 1.8200594094717708e-05, "loss": 0.6554, "step": 1463 }, { "epoch": 0.218352660427309, "grad_norm": 1.319230079650879, "learning_rate": 1.819782804128922e-05, "loss": 0.7224, "step": 1464 }, { "epoch": 0.21850180841940414, "grad_norm": 1.1862995624542236, "learning_rate": 1.8195060074025216e-05, "loss": 0.6335, "step": 1465 }, { "epoch": 0.21865095641149931, "grad_norm": 1.2423272132873535, "learning_rate": 1.819229019357189e-05, "loss": 0.6261, "step": 1466 }, { "epoch": 0.21880010440359446, "grad_norm": 1.2165107727050781, "learning_rate": 1.8189518400575886e-05, "loss": 0.6019, "step": 1467 }, { "epoch": 0.21894925239568963, "grad_norm": 1.180345892906189, "learning_rate": 1.81867446956843e-05, "loss": 0.6194, "step": 1468 }, { "epoch": 0.21909840038778478, "grad_norm": 1.2163774967193604, "learning_rate": 1.8183969079544677e-05, "loss": 0.6148, "step": 1469 }, { "epoch": 0.21924754837987995, "grad_norm": 1.279948353767395, "learning_rate": 1.818119155280499e-05, "loss": 0.6741, "step": 1470 }, { "epoch": 0.2193966963719751, "grad_norm": 1.1098873615264893, "learning_rate": 1.817841211611368e-05, "loss": 0.5308, "step": 1471 }, { "epoch": 0.21954584436407024, "grad_norm": 1.1428066492080688, "learning_rate": 1.817563077011962e-05, "loss": 0.5803, "step": 1472 }, { "epoch": 0.2196949923561654, "grad_norm": 1.0198824405670166, "learning_rate": 1.8172847515472134e-05, "loss": 0.5099, "step": 1473 }, { "epoch": 0.21984414034826055, "grad_norm": 1.1953487396240234, "learning_rate": 1.8170062352820993e-05, "loss": 0.6505, "step": 1474 }, { "epoch": 0.21999328834035572, "grad_norm": 1.1787863969802856, "learning_rate": 1.8167275282816406e-05, "loss": 0.6187, "step": 1475 }, { "epoch": 0.22014243633245087, "grad_norm": 1.4078902006149292, "learning_rate": 1.816448630610904e-05, "loss": 0.7167, "step": 1476 }, { "epoch": 0.22029158432454604, "grad_norm": 1.2152661085128784, "learning_rate": 1.816169542334999e-05, "loss": 0.6737, "step": 1477 }, { "epoch": 0.22044073231664119, "grad_norm": 1.1843079328536987, "learning_rate": 1.8158902635190812e-05, "loss": 0.6232, "step": 1478 }, { "epoch": 0.22058988030873633, "grad_norm": 1.1873644590377808, "learning_rate": 1.81561079422835e-05, "loss": 0.7057, "step": 1479 }, { "epoch": 0.2207390283008315, "grad_norm": 0.9732791781425476, "learning_rate": 1.815331134528049e-05, "loss": 0.6617, "step": 1480 }, { "epoch": 0.22088817629292665, "grad_norm": 0.9537947773933411, "learning_rate": 1.8150512844834668e-05, "loss": 0.6613, "step": 1481 }, { "epoch": 0.22103732428502182, "grad_norm": 1.1160311698913574, "learning_rate": 1.814771244159936e-05, "loss": 0.5539, "step": 1482 }, { "epoch": 0.22118647227711696, "grad_norm": 1.2228963375091553, "learning_rate": 1.814491013622834e-05, "loss": 0.6733, "step": 1483 }, { "epoch": 0.22133562026921214, "grad_norm": 1.2216593027114868, "learning_rate": 1.8142105929375823e-05, "loss": 0.6398, "step": 1484 }, { "epoch": 0.22148476826130728, "grad_norm": 1.322572112083435, "learning_rate": 1.813929982169647e-05, "loss": 0.7321, "step": 1485 }, { "epoch": 0.22163391625340245, "grad_norm": 0.9074899554252625, "learning_rate": 1.813649181384538e-05, "loss": 0.6249, "step": 1486 }, { "epoch": 0.2217830642454976, "grad_norm": 1.273016333580017, "learning_rate": 1.813368190647811e-05, "loss": 0.6839, "step": 1487 }, { "epoch": 0.22193221223759274, "grad_norm": 1.0671683549880981, "learning_rate": 1.8130870100250643e-05, "loss": 0.5842, "step": 1488 }, { "epoch": 0.2220813602296879, "grad_norm": 1.4392787218093872, "learning_rate": 1.8128056395819414e-05, "loss": 0.6555, "step": 1489 }, { "epoch": 0.22223050822178306, "grad_norm": 1.1974475383758545, "learning_rate": 1.8125240793841304e-05, "loss": 0.6687, "step": 1490 }, { "epoch": 0.22237965621387823, "grad_norm": 1.3338539600372314, "learning_rate": 1.812242329497363e-05, "loss": 0.6741, "step": 1491 }, { "epoch": 0.22252880420597337, "grad_norm": 1.1931002140045166, "learning_rate": 1.8119603899874163e-05, "loss": 0.6594, "step": 1492 }, { "epoch": 0.22267795219806855, "grad_norm": 1.357224702835083, "learning_rate": 1.8116782609201095e-05, "loss": 0.7189, "step": 1493 }, { "epoch": 0.2228271001901637, "grad_norm": 1.243970274925232, "learning_rate": 1.8113959423613084e-05, "loss": 0.6886, "step": 1494 }, { "epoch": 0.22297624818225883, "grad_norm": 1.377856731414795, "learning_rate": 1.811113434376922e-05, "loss": 0.6301, "step": 1495 }, { "epoch": 0.223125396174354, "grad_norm": 1.4103121757507324, "learning_rate": 1.8108307370329032e-05, "loss": 0.6628, "step": 1496 }, { "epoch": 0.22327454416644915, "grad_norm": 1.3958708047866821, "learning_rate": 1.81054785039525e-05, "loss": 0.6898, "step": 1497 }, { "epoch": 0.22342369215854432, "grad_norm": 1.2326754331588745, "learning_rate": 1.810264774530004e-05, "loss": 0.641, "step": 1498 }, { "epoch": 0.22357284015063947, "grad_norm": 1.1094112396240234, "learning_rate": 1.8099815095032502e-05, "loss": 0.5428, "step": 1499 }, { "epoch": 0.22372198814273464, "grad_norm": 1.11064875125885, "learning_rate": 1.80969805538112e-05, "loss": 0.6193, "step": 1500 }, { "epoch": 0.22387113613482978, "grad_norm": 1.159264326095581, "learning_rate": 1.8094144122297867e-05, "loss": 0.6748, "step": 1501 }, { "epoch": 0.22402028412692493, "grad_norm": 1.0186710357666016, "learning_rate": 1.809130580115469e-05, "loss": 0.7079, "step": 1502 }, { "epoch": 0.2241694321190201, "grad_norm": 1.190588116645813, "learning_rate": 1.8088465591044292e-05, "loss": 0.6535, "step": 1503 }, { "epoch": 0.22431858011111525, "grad_norm": 1.3528140783309937, "learning_rate": 1.808562349262974e-05, "loss": 0.6281, "step": 1504 }, { "epoch": 0.22446772810321042, "grad_norm": 1.291980504989624, "learning_rate": 1.8082779506574534e-05, "loss": 0.6938, "step": 1505 }, { "epoch": 0.22461687609530556, "grad_norm": 1.1682201623916626, "learning_rate": 1.807993363354263e-05, "loss": 0.6581, "step": 1506 }, { "epoch": 0.22476602408740073, "grad_norm": 1.2340359687805176, "learning_rate": 1.8077085874198404e-05, "loss": 0.6366, "step": 1507 }, { "epoch": 0.22491517207949588, "grad_norm": 1.299695372581482, "learning_rate": 1.8074236229206694e-05, "loss": 0.7204, "step": 1508 }, { "epoch": 0.22506432007159102, "grad_norm": 1.179503321647644, "learning_rate": 1.8071384699232766e-05, "loss": 0.6559, "step": 1509 }, { "epoch": 0.2252134680636862, "grad_norm": 1.2251368761062622, "learning_rate": 1.8068531284942324e-05, "loss": 0.6288, "step": 1510 }, { "epoch": 0.22536261605578134, "grad_norm": 1.269327998161316, "learning_rate": 1.8065675987001517e-05, "loss": 0.6871, "step": 1511 }, { "epoch": 0.2255117640478765, "grad_norm": 1.1893024444580078, "learning_rate": 1.8062818806076934e-05, "loss": 0.7078, "step": 1512 }, { "epoch": 0.22566091203997166, "grad_norm": 1.2018343210220337, "learning_rate": 1.8059959742835604e-05, "loss": 0.697, "step": 1513 }, { "epoch": 0.22581006003206683, "grad_norm": 1.2469828128814697, "learning_rate": 1.8057098797944987e-05, "loss": 0.7394, "step": 1514 }, { "epoch": 0.22595920802416197, "grad_norm": 1.2198987007141113, "learning_rate": 1.8054235972072994e-05, "loss": 0.6614, "step": 1515 }, { "epoch": 0.22610835601625714, "grad_norm": 1.2016226053237915, "learning_rate": 1.805137126588797e-05, "loss": 0.6371, "step": 1516 }, { "epoch": 0.2262575040083523, "grad_norm": 1.1595717668533325, "learning_rate": 1.8048504680058704e-05, "loss": 0.7011, "step": 1517 }, { "epoch": 0.22640665200044743, "grad_norm": 1.2469896078109741, "learning_rate": 1.8045636215254407e-05, "loss": 0.6182, "step": 1518 }, { "epoch": 0.2265557999925426, "grad_norm": 1.122747778892517, "learning_rate": 1.8042765872144747e-05, "loss": 0.6358, "step": 1519 }, { "epoch": 0.22670494798463775, "grad_norm": 1.2225083112716675, "learning_rate": 1.8039893651399823e-05, "loss": 0.6564, "step": 1520 }, { "epoch": 0.22685409597673292, "grad_norm": 1.1546891927719116, "learning_rate": 1.8037019553690176e-05, "loss": 0.6657, "step": 1521 }, { "epoch": 0.22700324396882807, "grad_norm": 1.1484553813934326, "learning_rate": 1.803414357968678e-05, "loss": 0.6472, "step": 1522 }, { "epoch": 0.22715239196092324, "grad_norm": 1.2025514841079712, "learning_rate": 1.803126573006105e-05, "loss": 0.5941, "step": 1523 }, { "epoch": 0.22730153995301838, "grad_norm": 1.2469310760498047, "learning_rate": 1.8028386005484837e-05, "loss": 0.7352, "step": 1524 }, { "epoch": 0.22745068794511353, "grad_norm": 1.2746546268463135, "learning_rate": 1.8025504406630434e-05, "loss": 0.6732, "step": 1525 }, { "epoch": 0.2275998359372087, "grad_norm": 1.3633203506469727, "learning_rate": 1.8022620934170568e-05, "loss": 0.6866, "step": 1526 }, { "epoch": 0.22774898392930384, "grad_norm": 1.1298909187316895, "learning_rate": 1.8019735588778404e-05, "loss": 0.6043, "step": 1527 }, { "epoch": 0.22789813192139902, "grad_norm": 1.1426095962524414, "learning_rate": 1.801684837112754e-05, "loss": 0.6244, "step": 1528 }, { "epoch": 0.22804727991349416, "grad_norm": 1.2885534763336182, "learning_rate": 1.8013959281892025e-05, "loss": 0.6253, "step": 1529 }, { "epoch": 0.22819642790558933, "grad_norm": 1.1940888166427612, "learning_rate": 1.801106832174633e-05, "loss": 0.6541, "step": 1530 }, { "epoch": 0.22834557589768448, "grad_norm": 1.076991319656372, "learning_rate": 1.8008175491365364e-05, "loss": 0.6481, "step": 1531 }, { "epoch": 0.22849472388977962, "grad_norm": 1.2502959966659546, "learning_rate": 1.8005280791424483e-05, "loss": 0.6414, "step": 1532 }, { "epoch": 0.2286438718818748, "grad_norm": 1.2022483348846436, "learning_rate": 1.800238422259947e-05, "loss": 0.6228, "step": 1533 }, { "epoch": 0.22879301987396994, "grad_norm": 1.352810025215149, "learning_rate": 1.799948578556655e-05, "loss": 0.7425, "step": 1534 }, { "epoch": 0.2289421678660651, "grad_norm": 1.1999223232269287, "learning_rate": 1.799658548100238e-05, "loss": 0.6816, "step": 1535 }, { "epoch": 0.22909131585816025, "grad_norm": 1.1947327852249146, "learning_rate": 1.799368330958405e-05, "loss": 0.6303, "step": 1536 }, { "epoch": 0.22924046385025543, "grad_norm": 1.2738924026489258, "learning_rate": 1.7990779271989103e-05, "loss": 0.673, "step": 1537 }, { "epoch": 0.22938961184235057, "grad_norm": 1.295430064201355, "learning_rate": 1.7987873368895494e-05, "loss": 0.6652, "step": 1538 }, { "epoch": 0.22953875983444572, "grad_norm": 1.175157904624939, "learning_rate": 1.798496560098163e-05, "loss": 0.7055, "step": 1539 }, { "epoch": 0.2296879078265409, "grad_norm": 1.202556848526001, "learning_rate": 1.7982055968926344e-05, "loss": 0.5702, "step": 1540 }, { "epoch": 0.22983705581863603, "grad_norm": 1.1708495616912842, "learning_rate": 1.7979144473408912e-05, "loss": 0.6329, "step": 1541 }, { "epoch": 0.2299862038107312, "grad_norm": 1.2325658798217773, "learning_rate": 1.797623111510904e-05, "loss": 0.6454, "step": 1542 }, { "epoch": 0.23013535180282635, "grad_norm": 1.305070161819458, "learning_rate": 1.7973315894706872e-05, "loss": 0.6109, "step": 1543 }, { "epoch": 0.23028449979492152, "grad_norm": 1.1555161476135254, "learning_rate": 1.7970398812882982e-05, "loss": 0.557, "step": 1544 }, { "epoch": 0.23043364778701667, "grad_norm": 1.2498972415924072, "learning_rate": 1.7967479870318384e-05, "loss": 0.6644, "step": 1545 }, { "epoch": 0.2305827957791118, "grad_norm": 1.162426233291626, "learning_rate": 1.796455906769452e-05, "loss": 0.5968, "step": 1546 }, { "epoch": 0.23073194377120698, "grad_norm": 1.3791956901550293, "learning_rate": 1.7961636405693274e-05, "loss": 0.6427, "step": 1547 }, { "epoch": 0.23088109176330213, "grad_norm": 1.2169655561447144, "learning_rate": 1.795871188499696e-05, "loss": 0.6974, "step": 1548 }, { "epoch": 0.2310302397553973, "grad_norm": 1.3254038095474243, "learning_rate": 1.7955785506288324e-05, "loss": 0.7629, "step": 1549 }, { "epoch": 0.23117938774749244, "grad_norm": 1.1799728870391846, "learning_rate": 1.795285727025055e-05, "loss": 0.6174, "step": 1550 }, { "epoch": 0.23132853573958762, "grad_norm": 1.2852067947387695, "learning_rate": 1.794992717756725e-05, "loss": 0.6635, "step": 1551 }, { "epoch": 0.23147768373168276, "grad_norm": 1.2128543853759766, "learning_rate": 1.7946995228922474e-05, "loss": 0.6602, "step": 1552 }, { "epoch": 0.23162683172377793, "grad_norm": 1.2759724855422974, "learning_rate": 1.794406142500071e-05, "loss": 0.6473, "step": 1553 }, { "epoch": 0.23177597971587308, "grad_norm": 0.8878532648086548, "learning_rate": 1.7941125766486865e-05, "loss": 0.6597, "step": 1554 }, { "epoch": 0.23192512770796822, "grad_norm": 1.1790708303451538, "learning_rate": 1.7938188254066293e-05, "loss": 0.6829, "step": 1555 }, { "epoch": 0.2320742757000634, "grad_norm": 1.3595967292785645, "learning_rate": 1.793524888842477e-05, "loss": 0.7355, "step": 1556 }, { "epoch": 0.23222342369215854, "grad_norm": 1.272783875465393, "learning_rate": 1.7932307670248518e-05, "loss": 0.6914, "step": 1557 }, { "epoch": 0.2323725716842537, "grad_norm": 1.319305181503296, "learning_rate": 1.792936460022417e-05, "loss": 0.7014, "step": 1558 }, { "epoch": 0.23252171967634885, "grad_norm": 1.175911784172058, "learning_rate": 1.7926419679038823e-05, "loss": 0.6362, "step": 1559 }, { "epoch": 0.23267086766844403, "grad_norm": 1.184370994567871, "learning_rate": 1.7923472907379968e-05, "loss": 0.6763, "step": 1560 }, { "epoch": 0.23282001566053917, "grad_norm": 1.1413496732711792, "learning_rate": 1.7920524285935563e-05, "loss": 0.6274, "step": 1561 }, { "epoch": 0.23296916365263431, "grad_norm": 1.3978804349899292, "learning_rate": 1.7917573815393975e-05, "loss": 0.751, "step": 1562 }, { "epoch": 0.2331183116447295, "grad_norm": 1.2732652425765991, "learning_rate": 1.7914621496444015e-05, "loss": 0.6666, "step": 1563 }, { "epoch": 0.23326745963682463, "grad_norm": 1.1217628717422485, "learning_rate": 1.7911667329774914e-05, "loss": 0.5483, "step": 1564 }, { "epoch": 0.2334166076289198, "grad_norm": 0.8935086727142334, "learning_rate": 1.7908711316076345e-05, "loss": 0.655, "step": 1565 }, { "epoch": 0.23356575562101495, "grad_norm": 1.1561706066131592, "learning_rate": 1.790575345603841e-05, "loss": 0.6265, "step": 1566 }, { "epoch": 0.23371490361311012, "grad_norm": 1.2806761264801025, "learning_rate": 1.790279375035164e-05, "loss": 0.7585, "step": 1567 }, { "epoch": 0.23386405160520526, "grad_norm": 1.3403931856155396, "learning_rate": 1.7899832199706993e-05, "loss": 0.6975, "step": 1568 }, { "epoch": 0.2340131995973004, "grad_norm": 1.253089427947998, "learning_rate": 1.7896868804795863e-05, "loss": 0.7122, "step": 1569 }, { "epoch": 0.23416234758939558, "grad_norm": 1.3972244262695312, "learning_rate": 1.789390356631008e-05, "loss": 0.6826, "step": 1570 }, { "epoch": 0.23431149558149073, "grad_norm": 1.111948013305664, "learning_rate": 1.7890936484941894e-05, "loss": 0.6158, "step": 1571 }, { "epoch": 0.2344606435735859, "grad_norm": 1.309623122215271, "learning_rate": 1.7887967561383986e-05, "loss": 0.6105, "step": 1572 }, { "epoch": 0.23460979156568104, "grad_norm": 1.2203096151351929, "learning_rate": 1.7884996796329472e-05, "loss": 0.6155, "step": 1573 }, { "epoch": 0.23475893955777621, "grad_norm": 1.236954689025879, "learning_rate": 1.78820241904719e-05, "loss": 0.6416, "step": 1574 }, { "epoch": 0.23490808754987136, "grad_norm": 1.2138941287994385, "learning_rate": 1.787904974450524e-05, "loss": 0.6883, "step": 1575 }, { "epoch": 0.2350572355419665, "grad_norm": 1.2276016473770142, "learning_rate": 1.7876073459123895e-05, "loss": 0.7083, "step": 1576 }, { "epoch": 0.23520638353406167, "grad_norm": 1.1837352514266968, "learning_rate": 1.78730953350227e-05, "loss": 0.5513, "step": 1577 }, { "epoch": 0.23535553152615682, "grad_norm": 1.2232201099395752, "learning_rate": 1.7870115372896915e-05, "loss": 0.6256, "step": 1578 }, { "epoch": 0.235504679518252, "grad_norm": 1.4317564964294434, "learning_rate": 1.7867133573442234e-05, "loss": 0.7501, "step": 1579 }, { "epoch": 0.23565382751034714, "grad_norm": 1.1194424629211426, "learning_rate": 1.786414993735478e-05, "loss": 0.6271, "step": 1580 }, { "epoch": 0.2358029755024423, "grad_norm": 1.3275007009506226, "learning_rate": 1.786116446533109e-05, "loss": 0.6865, "step": 1581 }, { "epoch": 0.23595212349453745, "grad_norm": 1.3048512935638428, "learning_rate": 1.7858177158068154e-05, "loss": 0.7062, "step": 1582 }, { "epoch": 0.23610127148663262, "grad_norm": 1.2108690738677979, "learning_rate": 1.7855188016263377e-05, "loss": 0.6596, "step": 1583 }, { "epoch": 0.23625041947872777, "grad_norm": 1.178743600845337, "learning_rate": 1.7852197040614583e-05, "loss": 0.5727, "step": 1584 }, { "epoch": 0.2363995674708229, "grad_norm": 1.2200438976287842, "learning_rate": 1.7849204231820042e-05, "loss": 0.6485, "step": 1585 }, { "epoch": 0.23654871546291809, "grad_norm": 1.3196816444396973, "learning_rate": 1.784620959057845e-05, "loss": 0.7136, "step": 1586 }, { "epoch": 0.23669786345501323, "grad_norm": 0.9017396569252014, "learning_rate": 1.7843213117588913e-05, "loss": 0.6531, "step": 1587 }, { "epoch": 0.2368470114471084, "grad_norm": 1.3245561122894287, "learning_rate": 1.7840214813550986e-05, "loss": 0.7475, "step": 1588 }, { "epoch": 0.23699615943920355, "grad_norm": 1.0997300148010254, "learning_rate": 1.7837214679164635e-05, "loss": 0.6596, "step": 1589 }, { "epoch": 0.23714530743129872, "grad_norm": 1.1823358535766602, "learning_rate": 1.783421271513027e-05, "loss": 0.6189, "step": 1590 }, { "epoch": 0.23729445542339386, "grad_norm": 1.291284441947937, "learning_rate": 1.7831208922148708e-05, "loss": 0.6979, "step": 1591 }, { "epoch": 0.237443603415489, "grad_norm": 1.3724466562271118, "learning_rate": 1.7828203300921216e-05, "loss": 0.7248, "step": 1592 }, { "epoch": 0.23759275140758418, "grad_norm": 1.2765698432922363, "learning_rate": 1.7825195852149463e-05, "loss": 0.7027, "step": 1593 }, { "epoch": 0.23774189939967932, "grad_norm": 1.1081347465515137, "learning_rate": 1.7822186576535566e-05, "loss": 0.6671, "step": 1594 }, { "epoch": 0.2378910473917745, "grad_norm": 0.9985323548316956, "learning_rate": 1.781917547478205e-05, "loss": 0.5639, "step": 1595 }, { "epoch": 0.23804019538386964, "grad_norm": 1.1895158290863037, "learning_rate": 1.781616254759189e-05, "loss": 0.7159, "step": 1596 }, { "epoch": 0.2381893433759648, "grad_norm": 1.2327927350997925, "learning_rate": 1.7813147795668465e-05, "loss": 0.5756, "step": 1597 }, { "epoch": 0.23833849136805996, "grad_norm": 1.2423759698867798, "learning_rate": 1.7810131219715585e-05, "loss": 0.6819, "step": 1598 }, { "epoch": 0.2384876393601551, "grad_norm": 1.3502336740493774, "learning_rate": 1.7807112820437496e-05, "loss": 0.6467, "step": 1599 }, { "epoch": 0.23863678735225027, "grad_norm": 1.1248784065246582, "learning_rate": 1.7804092598538857e-05, "loss": 0.6858, "step": 1600 }, { "epoch": 0.23878593534434542, "grad_norm": 1.095640778541565, "learning_rate": 1.7801070554724763e-05, "loss": 0.5827, "step": 1601 }, { "epoch": 0.2389350833364406, "grad_norm": 1.1582920551300049, "learning_rate": 1.7798046689700728e-05, "loss": 0.7156, "step": 1602 }, { "epoch": 0.23908423132853573, "grad_norm": 1.2633405923843384, "learning_rate": 1.779502100417269e-05, "loss": 0.6875, "step": 1603 }, { "epoch": 0.2392333793206309, "grad_norm": 1.3188165426254272, "learning_rate": 1.7791993498847016e-05, "loss": 0.628, "step": 1604 }, { "epoch": 0.23938252731272605, "grad_norm": 1.116984486579895, "learning_rate": 1.77889641744305e-05, "loss": 0.6818, "step": 1605 }, { "epoch": 0.2395316753048212, "grad_norm": 1.3238016366958618, "learning_rate": 1.778593303163035e-05, "loss": 0.7411, "step": 1606 }, { "epoch": 0.23968082329691637, "grad_norm": 1.2340675592422485, "learning_rate": 1.7782900071154215e-05, "loss": 0.7353, "step": 1607 }, { "epoch": 0.2398299712890115, "grad_norm": 1.1577489376068115, "learning_rate": 1.777986529371015e-05, "loss": 0.6244, "step": 1608 }, { "epoch": 0.23997911928110668, "grad_norm": 1.2205421924591064, "learning_rate": 1.777682870000665e-05, "loss": 0.6563, "step": 1609 }, { "epoch": 0.24012826727320183, "grad_norm": 1.0802232027053833, "learning_rate": 1.7773790290752626e-05, "loss": 0.6315, "step": 1610 }, { "epoch": 0.240277415265297, "grad_norm": 1.235665202140808, "learning_rate": 1.777075006665741e-05, "loss": 0.6269, "step": 1611 }, { "epoch": 0.24042656325739215, "grad_norm": 1.2065547704696655, "learning_rate": 1.7767708028430767e-05, "loss": 0.6079, "step": 1612 }, { "epoch": 0.24057571124948732, "grad_norm": 1.1530283689498901, "learning_rate": 1.7764664176782872e-05, "loss": 0.632, "step": 1613 }, { "epoch": 0.24072485924158246, "grad_norm": 1.2215440273284912, "learning_rate": 1.7761618512424347e-05, "loss": 0.5364, "step": 1614 }, { "epoch": 0.2408740072336776, "grad_norm": 1.2843683958053589, "learning_rate": 1.7758571036066206e-05, "loss": 0.6545, "step": 1615 }, { "epoch": 0.24102315522577278, "grad_norm": 1.2547012567520142, "learning_rate": 1.7755521748419912e-05, "loss": 0.6728, "step": 1616 }, { "epoch": 0.24117230321786792, "grad_norm": 1.1116889715194702, "learning_rate": 1.775247065019733e-05, "loss": 0.5698, "step": 1617 }, { "epoch": 0.2413214512099631, "grad_norm": 1.2116234302520752, "learning_rate": 1.7749417742110772e-05, "loss": 0.6209, "step": 1618 }, { "epoch": 0.24147059920205824, "grad_norm": 1.1419504880905151, "learning_rate": 1.774636302487295e-05, "loss": 0.6498, "step": 1619 }, { "epoch": 0.2416197471941534, "grad_norm": 1.255040168762207, "learning_rate": 1.7743306499197014e-05, "loss": 0.6299, "step": 1620 }, { "epoch": 0.24176889518624856, "grad_norm": 1.2007966041564941, "learning_rate": 1.774024816579652e-05, "loss": 0.5917, "step": 1621 }, { "epoch": 0.2419180431783437, "grad_norm": 1.244764804840088, "learning_rate": 1.7737188025385466e-05, "loss": 0.6558, "step": 1622 }, { "epoch": 0.24206719117043887, "grad_norm": 1.2576268911361694, "learning_rate": 1.7734126078678252e-05, "loss": 0.707, "step": 1623 }, { "epoch": 0.24221633916253402, "grad_norm": 1.0552327632904053, "learning_rate": 1.7731062326389716e-05, "loss": 0.5651, "step": 1624 }, { "epoch": 0.2423654871546292, "grad_norm": 1.0115259885787964, "learning_rate": 1.772799676923511e-05, "loss": 0.7048, "step": 1625 }, { "epoch": 0.24251463514672433, "grad_norm": 1.1748731136322021, "learning_rate": 1.77249294079301e-05, "loss": 0.6998, "step": 1626 }, { "epoch": 0.2426637831388195, "grad_norm": 1.168723464012146, "learning_rate": 1.772186024319079e-05, "loss": 0.5837, "step": 1627 }, { "epoch": 0.24281293113091465, "grad_norm": 1.188806414604187, "learning_rate": 1.7718789275733694e-05, "loss": 0.6509, "step": 1628 }, { "epoch": 0.2429620791230098, "grad_norm": 1.162414789199829, "learning_rate": 1.7715716506275747e-05, "loss": 0.5947, "step": 1629 }, { "epoch": 0.24311122711510497, "grad_norm": 1.2359960079193115, "learning_rate": 1.771264193553431e-05, "loss": 0.6916, "step": 1630 }, { "epoch": 0.2432603751072001, "grad_norm": 1.1776933670043945, "learning_rate": 1.770956556422716e-05, "loss": 0.6351, "step": 1631 }, { "epoch": 0.24340952309929528, "grad_norm": 1.2121528387069702, "learning_rate": 1.7706487393072492e-05, "loss": 0.6034, "step": 1632 }, { "epoch": 0.24355867109139043, "grad_norm": 1.1317113637924194, "learning_rate": 1.7703407422788933e-05, "loss": 0.6666, "step": 1633 }, { "epoch": 0.2437078190834856, "grad_norm": 1.3414638042449951, "learning_rate": 1.770032565409551e-05, "loss": 0.6888, "step": 1634 }, { "epoch": 0.24385696707558074, "grad_norm": 1.1961077451705933, "learning_rate": 1.769724208771169e-05, "loss": 0.6828, "step": 1635 }, { "epoch": 0.2440061150676759, "grad_norm": 1.1706042289733887, "learning_rate": 1.7694156724357352e-05, "loss": 0.5987, "step": 1636 }, { "epoch": 0.24415526305977106, "grad_norm": 1.2892911434173584, "learning_rate": 1.7691069564752793e-05, "loss": 0.7123, "step": 1637 }, { "epoch": 0.2443044110518662, "grad_norm": 1.1724326610565186, "learning_rate": 1.7687980609618726e-05, "loss": 0.6284, "step": 1638 }, { "epoch": 0.24445355904396138, "grad_norm": 1.281762719154358, "learning_rate": 1.768488985967629e-05, "loss": 0.6461, "step": 1639 }, { "epoch": 0.24460270703605652, "grad_norm": 1.3140584230422974, "learning_rate": 1.768179731564704e-05, "loss": 0.6974, "step": 1640 }, { "epoch": 0.2447518550281517, "grad_norm": 1.3266756534576416, "learning_rate": 1.767870297825295e-05, "loss": 0.7188, "step": 1641 }, { "epoch": 0.24490100302024684, "grad_norm": 1.3110613822937012, "learning_rate": 1.767560684821642e-05, "loss": 0.702, "step": 1642 }, { "epoch": 0.245050151012342, "grad_norm": 1.2093586921691895, "learning_rate": 1.7672508926260244e-05, "loss": 0.6411, "step": 1643 }, { "epoch": 0.24519929900443715, "grad_norm": 1.2707339525222778, "learning_rate": 1.7669409213107674e-05, "loss": 0.6881, "step": 1644 }, { "epoch": 0.2453484469965323, "grad_norm": 1.3458865880966187, "learning_rate": 1.766630770948234e-05, "loss": 0.6931, "step": 1645 }, { "epoch": 0.24549759498862747, "grad_norm": 1.199762225151062, "learning_rate": 1.7663204416108315e-05, "loss": 0.7635, "step": 1646 }, { "epoch": 0.24564674298072262, "grad_norm": 1.1838222742080688, "learning_rate": 1.7660099333710084e-05, "loss": 0.5833, "step": 1647 }, { "epoch": 0.2457958909728178, "grad_norm": 1.064180850982666, "learning_rate": 1.7656992463012548e-05, "loss": 0.6313, "step": 1648 }, { "epoch": 0.24594503896491293, "grad_norm": 1.1747335195541382, "learning_rate": 1.765388380474102e-05, "loss": 0.6158, "step": 1649 }, { "epoch": 0.2460941869570081, "grad_norm": 1.1638092994689941, "learning_rate": 1.765077335962124e-05, "loss": 0.591, "step": 1650 }, { "epoch": 0.24624333494910325, "grad_norm": 1.1071240901947021, "learning_rate": 1.7647661128379373e-05, "loss": 0.5917, "step": 1651 }, { "epoch": 0.2463924829411984, "grad_norm": 1.1813530921936035, "learning_rate": 1.7644547111741968e-05, "loss": 0.6064, "step": 1652 }, { "epoch": 0.24654163093329357, "grad_norm": 1.1278386116027832, "learning_rate": 1.7641431310436025e-05, "loss": 0.5818, "step": 1653 }, { "epoch": 0.2466907789253887, "grad_norm": 1.2481660842895508, "learning_rate": 1.7638313725188948e-05, "loss": 0.7411, "step": 1654 }, { "epoch": 0.24683992691748388, "grad_norm": 1.2620304822921753, "learning_rate": 1.7635194356728553e-05, "loss": 0.7338, "step": 1655 }, { "epoch": 0.24698907490957903, "grad_norm": 1.4927356243133545, "learning_rate": 1.7632073205783076e-05, "loss": 0.7335, "step": 1656 }, { "epoch": 0.2471382229016742, "grad_norm": 1.1201560497283936, "learning_rate": 1.7628950273081176e-05, "loss": 0.6525, "step": 1657 }, { "epoch": 0.24728737089376934, "grad_norm": 1.2022793292999268, "learning_rate": 1.7625825559351917e-05, "loss": 0.6442, "step": 1658 }, { "epoch": 0.2474365188858645, "grad_norm": 1.2114933729171753, "learning_rate": 1.762269906532478e-05, "loss": 0.7212, "step": 1659 }, { "epoch": 0.24758566687795966, "grad_norm": 1.24093496799469, "learning_rate": 1.7619570791729676e-05, "loss": 0.6924, "step": 1660 }, { "epoch": 0.2477348148700548, "grad_norm": 1.3061223030090332, "learning_rate": 1.7616440739296908e-05, "loss": 0.5868, "step": 1661 }, { "epoch": 0.24788396286214998, "grad_norm": 1.2284728288650513, "learning_rate": 1.7613308908757215e-05, "loss": 0.6182, "step": 1662 }, { "epoch": 0.24803311085424512, "grad_norm": 1.2090684175491333, "learning_rate": 1.761017530084174e-05, "loss": 0.6588, "step": 1663 }, { "epoch": 0.2481822588463403, "grad_norm": 1.240246295928955, "learning_rate": 1.7607039916282044e-05, "loss": 0.623, "step": 1664 }, { "epoch": 0.24833140683843544, "grad_norm": 1.2338807582855225, "learning_rate": 1.7603902755810102e-05, "loss": 0.6421, "step": 1665 }, { "epoch": 0.24848055483053058, "grad_norm": 1.0621659755706787, "learning_rate": 1.7600763820158308e-05, "loss": 0.5203, "step": 1666 }, { "epoch": 0.24862970282262575, "grad_norm": 1.126279592514038, "learning_rate": 1.7597623110059462e-05, "loss": 0.5702, "step": 1667 }, { "epoch": 0.2487788508147209, "grad_norm": 1.2404447793960571, "learning_rate": 1.7594480626246784e-05, "loss": 0.7035, "step": 1668 }, { "epoch": 0.24892799880681607, "grad_norm": 0.946708083152771, "learning_rate": 1.759133636945391e-05, "loss": 0.6551, "step": 1669 }, { "epoch": 0.24907714679891121, "grad_norm": 1.1504871845245361, "learning_rate": 1.7588190340414882e-05, "loss": 0.6546, "step": 1670 }, { "epoch": 0.2492262947910064, "grad_norm": 1.1966075897216797, "learning_rate": 1.7585042539864164e-05, "loss": 0.6614, "step": 1671 }, { "epoch": 0.24937544278310153, "grad_norm": 1.157460331916809, "learning_rate": 1.758189296853663e-05, "loss": 0.7057, "step": 1672 }, { "epoch": 0.24952459077519668, "grad_norm": 0.766019880771637, "learning_rate": 1.757874162716757e-05, "loss": 0.5528, "step": 1673 }, { "epoch": 0.24967373876729185, "grad_norm": 1.2577844858169556, "learning_rate": 1.7575588516492677e-05, "loss": 0.6556, "step": 1674 }, { "epoch": 0.249822886759387, "grad_norm": 1.198656439781189, "learning_rate": 1.757243363724807e-05, "loss": 0.6879, "step": 1675 }, { "epoch": 0.24997203475148216, "grad_norm": 1.2788581848144531, "learning_rate": 1.7569276990170276e-05, "loss": 0.6824, "step": 1676 }, { "epoch": 0.2501211827435773, "grad_norm": 0.9435068368911743, "learning_rate": 1.7566118575996238e-05, "loss": 0.6929, "step": 1677 }, { "epoch": 0.25027033073567245, "grad_norm": 1.1642608642578125, "learning_rate": 1.75629583954633e-05, "loss": 0.6485, "step": 1678 }, { "epoch": 0.25041947872776765, "grad_norm": 1.2771519422531128, "learning_rate": 1.7559796449309233e-05, "loss": 0.6444, "step": 1679 }, { "epoch": 0.2505686267198628, "grad_norm": 1.1928108930587769, "learning_rate": 1.755663273827221e-05, "loss": 0.6553, "step": 1680 }, { "epoch": 0.25071777471195794, "grad_norm": 1.2181358337402344, "learning_rate": 1.7553467263090822e-05, "loss": 0.6901, "step": 1681 }, { "epoch": 0.2508669227040531, "grad_norm": 1.2247344255447388, "learning_rate": 1.7550300024504067e-05, "loss": 0.7227, "step": 1682 }, { "epoch": 0.25101607069614823, "grad_norm": 1.1633230447769165, "learning_rate": 1.754713102325136e-05, "loss": 0.6645, "step": 1683 }, { "epoch": 0.25116521868824343, "grad_norm": 1.2573188543319702, "learning_rate": 1.7543960260072522e-05, "loss": 0.7093, "step": 1684 }, { "epoch": 0.2513143666803386, "grad_norm": 1.209058165550232, "learning_rate": 1.754078773570779e-05, "loss": 0.6322, "step": 1685 }, { "epoch": 0.2514635146724337, "grad_norm": 1.146430253982544, "learning_rate": 1.753761345089781e-05, "loss": 0.5279, "step": 1686 }, { "epoch": 0.25161266266452886, "grad_norm": 0.8813087344169617, "learning_rate": 1.7534437406383637e-05, "loss": 0.6338, "step": 1687 }, { "epoch": 0.251761810656624, "grad_norm": 1.1190674304962158, "learning_rate": 1.753125960290674e-05, "loss": 0.6527, "step": 1688 }, { "epoch": 0.2519109586487192, "grad_norm": 1.2717103958129883, "learning_rate": 1.7528080041209e-05, "loss": 0.6748, "step": 1689 }, { "epoch": 0.25206010664081435, "grad_norm": 1.2045786380767822, "learning_rate": 1.7524898722032704e-05, "loss": 0.7345, "step": 1690 }, { "epoch": 0.2522092546329095, "grad_norm": 1.2609467506408691, "learning_rate": 1.7521715646120547e-05, "loss": 0.6432, "step": 1691 }, { "epoch": 0.25235840262500464, "grad_norm": 1.2453371286392212, "learning_rate": 1.751853081421565e-05, "loss": 0.6681, "step": 1692 }, { "epoch": 0.25250755061709984, "grad_norm": 0.871311366558075, "learning_rate": 1.751534422706152e-05, "loss": 0.648, "step": 1693 }, { "epoch": 0.252656698609195, "grad_norm": 1.0264196395874023, "learning_rate": 1.7512155885402095e-05, "loss": 0.5555, "step": 1694 }, { "epoch": 0.25280584660129013, "grad_norm": 1.1753394603729248, "learning_rate": 1.7508965789981706e-05, "loss": 0.6657, "step": 1695 }, { "epoch": 0.2529549945933853, "grad_norm": 1.2547543048858643, "learning_rate": 1.7505773941545108e-05, "loss": 0.6221, "step": 1696 }, { "epoch": 0.2531041425854804, "grad_norm": 1.260514736175537, "learning_rate": 1.7502580340837455e-05, "loss": 0.6448, "step": 1697 }, { "epoch": 0.2532532905775756, "grad_norm": 1.2080104351043701, "learning_rate": 1.7499384988604316e-05, "loss": 0.6561, "step": 1698 }, { "epoch": 0.25340243856967076, "grad_norm": 1.1615724563598633, "learning_rate": 1.7496187885591664e-05, "loss": 0.688, "step": 1699 }, { "epoch": 0.2535515865617659, "grad_norm": 1.278199553489685, "learning_rate": 1.7492989032545886e-05, "loss": 0.7025, "step": 1700 }, { "epoch": 0.25370073455386105, "grad_norm": 1.28776216506958, "learning_rate": 1.7489788430213774e-05, "loss": 0.6008, "step": 1701 }, { "epoch": 0.25384988254595625, "grad_norm": 1.1997824907302856, "learning_rate": 1.7486586079342523e-05, "loss": 0.6081, "step": 1702 }, { "epoch": 0.2539990305380514, "grad_norm": 1.1424463987350464, "learning_rate": 1.748338198067975e-05, "loss": 0.5653, "step": 1703 }, { "epoch": 0.25414817853014654, "grad_norm": 1.3267173767089844, "learning_rate": 1.7480176134973474e-05, "loss": 0.643, "step": 1704 }, { "epoch": 0.2542973265222417, "grad_norm": 1.3844417333602905, "learning_rate": 1.7476968542972112e-05, "loss": 0.679, "step": 1705 }, { "epoch": 0.25444647451433683, "grad_norm": 1.1899747848510742, "learning_rate": 1.74737592054245e-05, "loss": 0.633, "step": 1706 }, { "epoch": 0.25459562250643203, "grad_norm": 1.211830735206604, "learning_rate": 1.7470548123079884e-05, "loss": 0.5491, "step": 1707 }, { "epoch": 0.2547447704985272, "grad_norm": 1.2600278854370117, "learning_rate": 1.7467335296687903e-05, "loss": 0.6683, "step": 1708 }, { "epoch": 0.2548939184906223, "grad_norm": 1.240298867225647, "learning_rate": 1.7464120726998616e-05, "loss": 0.7149, "step": 1709 }, { "epoch": 0.25504306648271746, "grad_norm": 1.2809852361679077, "learning_rate": 1.7460904414762488e-05, "loss": 0.7427, "step": 1710 }, { "epoch": 0.2551922144748126, "grad_norm": 1.3140288591384888, "learning_rate": 1.7457686360730382e-05, "loss": 0.6217, "step": 1711 }, { "epoch": 0.2553413624669078, "grad_norm": 1.2761589288711548, "learning_rate": 1.745446656565358e-05, "loss": 0.7375, "step": 1712 }, { "epoch": 0.25549051045900295, "grad_norm": 1.2127811908721924, "learning_rate": 1.7451245030283755e-05, "loss": 0.6795, "step": 1713 }, { "epoch": 0.2556396584510981, "grad_norm": 1.2355821132659912, "learning_rate": 1.7448021755373005e-05, "loss": 0.6313, "step": 1714 }, { "epoch": 0.25578880644319324, "grad_norm": 1.1672168970108032, "learning_rate": 1.7444796741673814e-05, "loss": 0.6098, "step": 1715 }, { "epoch": 0.25593795443528844, "grad_norm": 0.9266926050186157, "learning_rate": 1.7441569989939092e-05, "loss": 0.6779, "step": 1716 }, { "epoch": 0.2560871024273836, "grad_norm": 1.3198788166046143, "learning_rate": 1.7438341500922137e-05, "loss": 0.6775, "step": 1717 }, { "epoch": 0.25623625041947873, "grad_norm": 1.1728142499923706, "learning_rate": 1.7435111275376668e-05, "loss": 0.6092, "step": 1718 }, { "epoch": 0.2563853984115739, "grad_norm": 1.2118343114852905, "learning_rate": 1.7431879314056792e-05, "loss": 0.6776, "step": 1719 }, { "epoch": 0.256534546403669, "grad_norm": 1.1980693340301514, "learning_rate": 1.742864561771704e-05, "loss": 0.6497, "step": 1720 }, { "epoch": 0.2566836943957642, "grad_norm": 1.1691961288452148, "learning_rate": 1.7425410187112334e-05, "loss": 0.6349, "step": 1721 }, { "epoch": 0.25683284238785936, "grad_norm": 1.2958966493606567, "learning_rate": 1.742217302299801e-05, "loss": 0.7232, "step": 1722 }, { "epoch": 0.2569819903799545, "grad_norm": 1.2630891799926758, "learning_rate": 1.74189341261298e-05, "loss": 0.7222, "step": 1723 }, { "epoch": 0.25713113837204965, "grad_norm": 1.2874016761779785, "learning_rate": 1.741569349726385e-05, "loss": 0.6306, "step": 1724 }, { "epoch": 0.25728028636414485, "grad_norm": 1.2585132122039795, "learning_rate": 1.74124511371567e-05, "loss": 0.6366, "step": 1725 }, { "epoch": 0.25742943435624, "grad_norm": 1.1373804807662964, "learning_rate": 1.7409207046565306e-05, "loss": 0.6229, "step": 1726 }, { "epoch": 0.25757858234833514, "grad_norm": 1.2103432416915894, "learning_rate": 1.7405961226247022e-05, "loss": 0.6028, "step": 1727 }, { "epoch": 0.2577277303404303, "grad_norm": 1.2651399374008179, "learning_rate": 1.7402713676959598e-05, "loss": 0.6787, "step": 1728 }, { "epoch": 0.2578768783325254, "grad_norm": 1.1436799764633179, "learning_rate": 1.73994643994612e-05, "loss": 0.5859, "step": 1729 }, { "epoch": 0.2580260263246206, "grad_norm": 1.1558573246002197, "learning_rate": 1.7396213394510393e-05, "loss": 0.6329, "step": 1730 }, { "epoch": 0.25817517431671577, "grad_norm": 1.1607996225357056, "learning_rate": 1.7392960662866143e-05, "loss": 0.6352, "step": 1731 }, { "epoch": 0.2583243223088109, "grad_norm": 0.9049787521362305, "learning_rate": 1.7389706205287824e-05, "loss": 0.6764, "step": 1732 }, { "epoch": 0.25847347030090606, "grad_norm": 1.422385573387146, "learning_rate": 1.7386450022535207e-05, "loss": 0.6473, "step": 1733 }, { "epoch": 0.2586226182930012, "grad_norm": 1.1219661235809326, "learning_rate": 1.738319211536847e-05, "loss": 0.6517, "step": 1734 }, { "epoch": 0.2587717662850964, "grad_norm": 1.2043720483779907, "learning_rate": 1.7379932484548193e-05, "loss": 0.6509, "step": 1735 }, { "epoch": 0.25892091427719155, "grad_norm": 1.221616268157959, "learning_rate": 1.7376671130835362e-05, "loss": 0.6384, "step": 1736 }, { "epoch": 0.2590700622692867, "grad_norm": 1.1437373161315918, "learning_rate": 1.7373408054991348e-05, "loss": 0.5944, "step": 1737 }, { "epoch": 0.25921921026138184, "grad_norm": 1.2047526836395264, "learning_rate": 1.737014325777795e-05, "loss": 0.6381, "step": 1738 }, { "epoch": 0.25936835825347704, "grad_norm": 1.260296106338501, "learning_rate": 1.7366876739957346e-05, "loss": 0.6733, "step": 1739 }, { "epoch": 0.2595175062455722, "grad_norm": 1.116493821144104, "learning_rate": 1.7363608502292136e-05, "loss": 0.6068, "step": 1740 }, { "epoch": 0.2596666542376673, "grad_norm": 1.1480324268341064, "learning_rate": 1.7360338545545303e-05, "loss": 0.649, "step": 1741 }, { "epoch": 0.25981580222976247, "grad_norm": 1.2227944135665894, "learning_rate": 1.735706687048024e-05, "loss": 0.6836, "step": 1742 }, { "epoch": 0.2599649502218576, "grad_norm": 1.1798640489578247, "learning_rate": 1.7353793477860746e-05, "loss": 0.7205, "step": 1743 }, { "epoch": 0.2601140982139528, "grad_norm": 0.9486715793609619, "learning_rate": 1.735051836845101e-05, "loss": 0.6627, "step": 1744 }, { "epoch": 0.26026324620604796, "grad_norm": 1.169922947883606, "learning_rate": 1.734724154301563e-05, "loss": 0.6428, "step": 1745 }, { "epoch": 0.2604123941981431, "grad_norm": 1.1930066347122192, "learning_rate": 1.7343963002319597e-05, "loss": 0.5932, "step": 1746 }, { "epoch": 0.26056154219023825, "grad_norm": 1.2015354633331299, "learning_rate": 1.7340682747128314e-05, "loss": 0.6564, "step": 1747 }, { "epoch": 0.2607106901823334, "grad_norm": 1.1406705379486084, "learning_rate": 1.7337400778207578e-05, "loss": 0.6206, "step": 1748 }, { "epoch": 0.2608598381744286, "grad_norm": 1.1238248348236084, "learning_rate": 1.7334117096323578e-05, "loss": 0.6637, "step": 1749 }, { "epoch": 0.26100898616652374, "grad_norm": 1.191270112991333, "learning_rate": 1.733083170224292e-05, "loss": 0.6926, "step": 1750 }, { "epoch": 0.2611581341586189, "grad_norm": 1.145451307296753, "learning_rate": 1.732754459673259e-05, "loss": 0.7023, "step": 1751 }, { "epoch": 0.261307282150714, "grad_norm": 1.2031219005584717, "learning_rate": 1.7324255780559993e-05, "loss": 0.6841, "step": 1752 }, { "epoch": 0.2614564301428092, "grad_norm": 1.2157055139541626, "learning_rate": 1.732096525449292e-05, "loss": 0.708, "step": 1753 }, { "epoch": 0.26160557813490437, "grad_norm": 1.1216436624526978, "learning_rate": 1.7317673019299566e-05, "loss": 0.6593, "step": 1754 }, { "epoch": 0.2617547261269995, "grad_norm": 1.153321385383606, "learning_rate": 1.7314379075748524e-05, "loss": 0.6445, "step": 1755 }, { "epoch": 0.26190387411909466, "grad_norm": 1.3354092836380005, "learning_rate": 1.7311083424608785e-05, "loss": 0.5893, "step": 1756 }, { "epoch": 0.2620530221111898, "grad_norm": 1.1826887130737305, "learning_rate": 1.7307786066649742e-05, "loss": 0.6367, "step": 1757 }, { "epoch": 0.262202170103285, "grad_norm": 1.0088722705841064, "learning_rate": 1.730448700264119e-05, "loss": 0.5944, "step": 1758 }, { "epoch": 0.26235131809538015, "grad_norm": 1.291614055633545, "learning_rate": 1.7301186233353303e-05, "loss": 0.6368, "step": 1759 }, { "epoch": 0.2625004660874753, "grad_norm": 1.2902599573135376, "learning_rate": 1.7297883759556676e-05, "loss": 0.647, "step": 1760 }, { "epoch": 0.26264961407957044, "grad_norm": 1.2517753839492798, "learning_rate": 1.7294579582022296e-05, "loss": 0.6797, "step": 1761 }, { "epoch": 0.26279876207166564, "grad_norm": 1.112047791481018, "learning_rate": 1.7291273701521534e-05, "loss": 0.6041, "step": 1762 }, { "epoch": 0.2629479100637608, "grad_norm": 1.2411757707595825, "learning_rate": 1.7287966118826174e-05, "loss": 0.7283, "step": 1763 }, { "epoch": 0.2630970580558559, "grad_norm": 1.1682233810424805, "learning_rate": 1.72846568347084e-05, "loss": 0.6856, "step": 1764 }, { "epoch": 0.26324620604795107, "grad_norm": 1.2433886528015137, "learning_rate": 1.728134584994077e-05, "loss": 0.6095, "step": 1765 }, { "epoch": 0.2633953540400462, "grad_norm": 1.1890336275100708, "learning_rate": 1.7278033165296267e-05, "loss": 0.635, "step": 1766 }, { "epoch": 0.2635445020321414, "grad_norm": 1.3002066612243652, "learning_rate": 1.7274718781548256e-05, "loss": 0.6849, "step": 1767 }, { "epoch": 0.26369365002423656, "grad_norm": 1.139360785484314, "learning_rate": 1.7271402699470498e-05, "loss": 0.5844, "step": 1768 }, { "epoch": 0.2638427980163317, "grad_norm": 1.202312707901001, "learning_rate": 1.7268084919837155e-05, "loss": 0.6548, "step": 1769 }, { "epoch": 0.26399194600842685, "grad_norm": 1.3387713432312012, "learning_rate": 1.7264765443422783e-05, "loss": 0.7931, "step": 1770 }, { "epoch": 0.264141094000522, "grad_norm": 1.1218315362930298, "learning_rate": 1.726144427100234e-05, "loss": 0.6193, "step": 1771 }, { "epoch": 0.2642902419926172, "grad_norm": 0.9416771531105042, "learning_rate": 1.7258121403351168e-05, "loss": 0.7137, "step": 1772 }, { "epoch": 0.26443938998471234, "grad_norm": 1.1242603063583374, "learning_rate": 1.7254796841245017e-05, "loss": 0.558, "step": 1773 }, { "epoch": 0.2645885379768075, "grad_norm": 1.3657147884368896, "learning_rate": 1.7251470585460026e-05, "loss": 0.6792, "step": 1774 }, { "epoch": 0.2647376859689026, "grad_norm": 0.9067908525466919, "learning_rate": 1.724814263677273e-05, "loss": 0.6842, "step": 1775 }, { "epoch": 0.2648868339609978, "grad_norm": 1.1146438121795654, "learning_rate": 1.7244812995960056e-05, "loss": 0.6341, "step": 1776 }, { "epoch": 0.26503598195309297, "grad_norm": 1.1330249309539795, "learning_rate": 1.7241481663799337e-05, "loss": 0.669, "step": 1777 }, { "epoch": 0.2651851299451881, "grad_norm": 1.2585062980651855, "learning_rate": 1.7238148641068292e-05, "loss": 0.6877, "step": 1778 }, { "epoch": 0.26533427793728326, "grad_norm": 1.3056082725524902, "learning_rate": 1.7234813928545034e-05, "loss": 0.7393, "step": 1779 }, { "epoch": 0.2654834259293784, "grad_norm": 1.1765590906143188, "learning_rate": 1.7231477527008074e-05, "loss": 0.6322, "step": 1780 }, { "epoch": 0.2656325739214736, "grad_norm": 1.2189912796020508, "learning_rate": 1.722813943723632e-05, "loss": 0.682, "step": 1781 }, { "epoch": 0.26578172191356875, "grad_norm": 1.2540764808654785, "learning_rate": 1.7224799660009064e-05, "loss": 0.7141, "step": 1782 }, { "epoch": 0.2659308699056639, "grad_norm": 1.2106246948242188, "learning_rate": 1.7221458196106003e-05, "loss": 0.6405, "step": 1783 }, { "epoch": 0.26608001789775904, "grad_norm": 1.1659451723098755, "learning_rate": 1.721811504630722e-05, "loss": 0.6322, "step": 1784 }, { "epoch": 0.2662291658898542, "grad_norm": 1.3689993619918823, "learning_rate": 1.72147702113932e-05, "loss": 0.6121, "step": 1785 }, { "epoch": 0.2663783138819494, "grad_norm": 1.133482575416565, "learning_rate": 1.721142369214481e-05, "loss": 0.6281, "step": 1786 }, { "epoch": 0.2665274618740445, "grad_norm": 1.1333646774291992, "learning_rate": 1.7208075489343318e-05, "loss": 0.6806, "step": 1787 }, { "epoch": 0.26667660986613967, "grad_norm": 1.1234184503555298, "learning_rate": 1.7204725603770387e-05, "loss": 0.713, "step": 1788 }, { "epoch": 0.2668257578582348, "grad_norm": 1.2714111804962158, "learning_rate": 1.7201374036208066e-05, "loss": 0.7358, "step": 1789 }, { "epoch": 0.26697490585033, "grad_norm": 1.2322715520858765, "learning_rate": 1.71980207874388e-05, "loss": 0.5569, "step": 1790 }, { "epoch": 0.26712405384242516, "grad_norm": 1.2791780233383179, "learning_rate": 1.7194665858245428e-05, "loss": 0.6493, "step": 1791 }, { "epoch": 0.2672732018345203, "grad_norm": 1.169500470161438, "learning_rate": 1.719130924941118e-05, "loss": 0.5954, "step": 1792 }, { "epoch": 0.26742234982661545, "grad_norm": 1.2855952978134155, "learning_rate": 1.718795096171968e-05, "loss": 0.6206, "step": 1793 }, { "epoch": 0.2675714978187106, "grad_norm": 1.3565901517868042, "learning_rate": 1.718459099595493e-05, "loss": 0.6845, "step": 1794 }, { "epoch": 0.2677206458108058, "grad_norm": 1.2369558811187744, "learning_rate": 1.718122935290135e-05, "loss": 0.7051, "step": 1795 }, { "epoch": 0.26786979380290094, "grad_norm": 1.3869770765304565, "learning_rate": 1.717786603334373e-05, "loss": 0.7366, "step": 1796 }, { "epoch": 0.2680189417949961, "grad_norm": 1.2304410934448242, "learning_rate": 1.717450103806726e-05, "loss": 0.7172, "step": 1797 }, { "epoch": 0.2681680897870912, "grad_norm": 1.1988401412963867, "learning_rate": 1.717113436785752e-05, "loss": 0.5571, "step": 1798 }, { "epoch": 0.2683172377791864, "grad_norm": 1.1962413787841797, "learning_rate": 1.716776602350048e-05, "loss": 0.6799, "step": 1799 }, { "epoch": 0.26846638577128157, "grad_norm": 1.1995253562927246, "learning_rate": 1.71643960057825e-05, "loss": 0.6071, "step": 1800 }, { "epoch": 0.2686155337633767, "grad_norm": 1.229780912399292, "learning_rate": 1.7161024315490336e-05, "loss": 0.7058, "step": 1801 }, { "epoch": 0.26876468175547186, "grad_norm": 1.2760694026947021, "learning_rate": 1.715765095341113e-05, "loss": 0.6584, "step": 1802 }, { "epoch": 0.268913829747567, "grad_norm": 1.097308874130249, "learning_rate": 1.715427592033241e-05, "loss": 0.6021, "step": 1803 }, { "epoch": 0.2690629777396622, "grad_norm": 1.3191635608673096, "learning_rate": 1.715089921704211e-05, "loss": 0.7064, "step": 1804 }, { "epoch": 0.26921212573175735, "grad_norm": 1.2423380613327026, "learning_rate": 1.7147520844328526e-05, "loss": 0.6547, "step": 1805 }, { "epoch": 0.2693612737238525, "grad_norm": 1.1260634660720825, "learning_rate": 1.7144140802980377e-05, "loss": 0.6314, "step": 1806 }, { "epoch": 0.26951042171594763, "grad_norm": 1.1369701623916626, "learning_rate": 1.714075909378675e-05, "loss": 0.6101, "step": 1807 }, { "epoch": 0.2696595697080428, "grad_norm": 1.3089529275894165, "learning_rate": 1.7137375717537122e-05, "loss": 0.6788, "step": 1808 }, { "epoch": 0.269808717700138, "grad_norm": 1.2882227897644043, "learning_rate": 1.7133990675021367e-05, "loss": 0.6937, "step": 1809 }, { "epoch": 0.2699578656922331, "grad_norm": 1.1917266845703125, "learning_rate": 1.713060396702975e-05, "loss": 0.6077, "step": 1810 }, { "epoch": 0.27010701368432827, "grad_norm": 1.1208295822143555, "learning_rate": 1.7127215594352914e-05, "loss": 0.6265, "step": 1811 }, { "epoch": 0.2702561616764234, "grad_norm": 1.2962764501571655, "learning_rate": 1.7123825557781894e-05, "loss": 0.7384, "step": 1812 }, { "epoch": 0.2704053096685186, "grad_norm": 1.2103972434997559, "learning_rate": 1.7120433858108123e-05, "loss": 0.6905, "step": 1813 }, { "epoch": 0.27055445766061376, "grad_norm": 1.2964078187942505, "learning_rate": 1.7117040496123408e-05, "loss": 0.5948, "step": 1814 }, { "epoch": 0.2707036056527089, "grad_norm": 1.2195066213607788, "learning_rate": 1.711364547261996e-05, "loss": 0.6434, "step": 1815 }, { "epoch": 0.27085275364480405, "grad_norm": 1.2215718030929565, "learning_rate": 1.7110248788390358e-05, "loss": 0.7005, "step": 1816 }, { "epoch": 0.2710019016368992, "grad_norm": 1.1240499019622803, "learning_rate": 1.7106850444227588e-05, "loss": 0.6498, "step": 1817 }, { "epoch": 0.2711510496289944, "grad_norm": 1.2587807178497314, "learning_rate": 1.7103450440925013e-05, "loss": 0.6933, "step": 1818 }, { "epoch": 0.27130019762108953, "grad_norm": 1.2365666627883911, "learning_rate": 1.710004877927638e-05, "loss": 0.7614, "step": 1819 }, { "epoch": 0.2714493456131847, "grad_norm": 1.1118208169937134, "learning_rate": 1.7096645460075837e-05, "loss": 0.6307, "step": 1820 }, { "epoch": 0.2715984936052798, "grad_norm": 1.2219115495681763, "learning_rate": 1.7093240484117907e-05, "loss": 0.7253, "step": 1821 }, { "epoch": 0.271747641597375, "grad_norm": 1.1320210695266724, "learning_rate": 1.7089833852197508e-05, "loss": 0.5857, "step": 1822 }, { "epoch": 0.27189678958947017, "grad_norm": 1.1706202030181885, "learning_rate": 1.708642556510993e-05, "loss": 0.6828, "step": 1823 }, { "epoch": 0.2720459375815653, "grad_norm": 1.2667720317840576, "learning_rate": 1.7083015623650867e-05, "loss": 0.6352, "step": 1824 }, { "epoch": 0.27219508557366046, "grad_norm": 1.2360317707061768, "learning_rate": 1.707960402861639e-05, "loss": 0.6397, "step": 1825 }, { "epoch": 0.2723442335657556, "grad_norm": 1.1350665092468262, "learning_rate": 1.707619078080296e-05, "loss": 0.7003, "step": 1826 }, { "epoch": 0.2724933815578508, "grad_norm": 1.2619926929473877, "learning_rate": 1.707277588100742e-05, "loss": 0.7112, "step": 1827 }, { "epoch": 0.27264252954994594, "grad_norm": 1.1773428916931152, "learning_rate": 1.7069359330027e-05, "loss": 0.6631, "step": 1828 }, { "epoch": 0.2727916775420411, "grad_norm": 1.2034990787506104, "learning_rate": 1.706594112865931e-05, "loss": 0.6201, "step": 1829 }, { "epoch": 0.27294082553413623, "grad_norm": 1.0904669761657715, "learning_rate": 1.706252127770236e-05, "loss": 0.6339, "step": 1830 }, { "epoch": 0.2730899735262314, "grad_norm": 1.1710742712020874, "learning_rate": 1.7059099777954532e-05, "loss": 0.5935, "step": 1831 }, { "epoch": 0.2732391215183266, "grad_norm": 1.3610039949417114, "learning_rate": 1.7055676630214598e-05, "loss": 0.6468, "step": 1832 }, { "epoch": 0.2733882695104217, "grad_norm": 1.340254783630371, "learning_rate": 1.7052251835281716e-05, "loss": 0.6548, "step": 1833 }, { "epoch": 0.27353741750251687, "grad_norm": 1.182204008102417, "learning_rate": 1.704882539395542e-05, "loss": 0.6324, "step": 1834 }, { "epoch": 0.273686565494612, "grad_norm": 1.1912124156951904, "learning_rate": 1.704539730703564e-05, "loss": 0.7379, "step": 1835 }, { "epoch": 0.2738357134867072, "grad_norm": 1.2636033296585083, "learning_rate": 1.704196757532268e-05, "loss": 0.7357, "step": 1836 }, { "epoch": 0.27398486147880236, "grad_norm": 1.0547915697097778, "learning_rate": 1.703853619961724e-05, "loss": 0.5558, "step": 1837 }, { "epoch": 0.2741340094708975, "grad_norm": 1.2724015712738037, "learning_rate": 1.7035103180720392e-05, "loss": 0.7217, "step": 1838 }, { "epoch": 0.27428315746299264, "grad_norm": 1.2213484048843384, "learning_rate": 1.70316685194336e-05, "loss": 0.6728, "step": 1839 }, { "epoch": 0.2744323054550878, "grad_norm": 1.252753734588623, "learning_rate": 1.70282322165587e-05, "loss": 0.6609, "step": 1840 }, { "epoch": 0.274581453447183, "grad_norm": 1.196608066558838, "learning_rate": 1.7024794272897926e-05, "loss": 0.6581, "step": 1841 }, { "epoch": 0.27473060143927813, "grad_norm": 1.2433165311813354, "learning_rate": 1.7021354689253888e-05, "loss": 0.663, "step": 1842 }, { "epoch": 0.2748797494313733, "grad_norm": 1.1577775478363037, "learning_rate": 1.7017913466429572e-05, "loss": 0.5888, "step": 1843 }, { "epoch": 0.2750288974234684, "grad_norm": 1.2608187198638916, "learning_rate": 1.701447060522836e-05, "loss": 0.6894, "step": 1844 }, { "epoch": 0.27517804541556357, "grad_norm": 1.3826055526733398, "learning_rate": 1.7011026106454008e-05, "loss": 0.7593, "step": 1845 }, { "epoch": 0.27532719340765877, "grad_norm": 1.1624242067337036, "learning_rate": 1.7007579970910657e-05, "loss": 0.6128, "step": 1846 }, { "epoch": 0.2754763413997539, "grad_norm": 1.1358338594436646, "learning_rate": 1.700413219940283e-05, "loss": 0.6541, "step": 1847 }, { "epoch": 0.27562548939184905, "grad_norm": 1.2102652788162231, "learning_rate": 1.7000682792735427e-05, "loss": 0.7363, "step": 1848 }, { "epoch": 0.2757746373839442, "grad_norm": 1.289608359336853, "learning_rate": 1.699723175171374e-05, "loss": 0.6827, "step": 1849 }, { "epoch": 0.2759237853760394, "grad_norm": 1.085961103439331, "learning_rate": 1.6993779077143437e-05, "loss": 0.5151, "step": 1850 }, { "epoch": 0.27607293336813454, "grad_norm": 1.265276551246643, "learning_rate": 1.6990324769830557e-05, "loss": 0.654, "step": 1851 }, { "epoch": 0.2762220813602297, "grad_norm": 1.1580480337142944, "learning_rate": 1.6986868830581542e-05, "loss": 0.5653, "step": 1852 }, { "epoch": 0.27637122935232483, "grad_norm": 0.9812148809432983, "learning_rate": 1.6983411260203196e-05, "loss": 0.6451, "step": 1853 }, { "epoch": 0.27652037734442, "grad_norm": 1.0985432863235474, "learning_rate": 1.6979952059502715e-05, "loss": 0.5858, "step": 1854 }, { "epoch": 0.2766695253365152, "grad_norm": 1.2397221326828003, "learning_rate": 1.697649122928767e-05, "loss": 0.6629, "step": 1855 }, { "epoch": 0.2768186733286103, "grad_norm": 1.2655658721923828, "learning_rate": 1.6973028770366015e-05, "loss": 0.6938, "step": 1856 }, { "epoch": 0.27696782132070547, "grad_norm": 1.266351580619812, "learning_rate": 1.6969564683546077e-05, "loss": 0.7087, "step": 1857 }, { "epoch": 0.2771169693128006, "grad_norm": 1.2190797328948975, "learning_rate": 1.6966098969636583e-05, "loss": 0.5667, "step": 1858 }, { "epoch": 0.2772661173048958, "grad_norm": 1.3050237894058228, "learning_rate": 1.696263162944661e-05, "loss": 0.7484, "step": 1859 }, { "epoch": 0.27741526529699095, "grad_norm": 1.2013524770736694, "learning_rate": 1.695916266378564e-05, "loss": 0.6393, "step": 1860 }, { "epoch": 0.2775644132890861, "grad_norm": 1.267709493637085, "learning_rate": 1.695569207346353e-05, "loss": 0.7113, "step": 1861 }, { "epoch": 0.27771356128118124, "grad_norm": 1.1462839841842651, "learning_rate": 1.69522198592905e-05, "loss": 0.6361, "step": 1862 }, { "epoch": 0.2778627092732764, "grad_norm": 1.3092927932739258, "learning_rate": 1.6948746022077167e-05, "loss": 0.6496, "step": 1863 }, { "epoch": 0.2780118572653716, "grad_norm": 1.148301362991333, "learning_rate": 1.694527056263452e-05, "loss": 0.633, "step": 1864 }, { "epoch": 0.27816100525746673, "grad_norm": 0.9127328991889954, "learning_rate": 1.6941793481773924e-05, "loss": 0.6691, "step": 1865 }, { "epoch": 0.2783101532495619, "grad_norm": 1.2814834117889404, "learning_rate": 1.693831478030713e-05, "loss": 0.7503, "step": 1866 }, { "epoch": 0.278459301241657, "grad_norm": 1.1434221267700195, "learning_rate": 1.6934834459046262e-05, "loss": 0.6415, "step": 1867 }, { "epoch": 0.27860844923375216, "grad_norm": 1.3430933952331543, "learning_rate": 1.6931352518803825e-05, "loss": 0.6802, "step": 1868 }, { "epoch": 0.27875759722584736, "grad_norm": 1.3516209125518799, "learning_rate": 1.6927868960392698e-05, "loss": 0.7344, "step": 1869 }, { "epoch": 0.2789067452179425, "grad_norm": 1.1695646047592163, "learning_rate": 1.692438378462614e-05, "loss": 0.6254, "step": 1870 }, { "epoch": 0.27905589321003765, "grad_norm": 1.2928824424743652, "learning_rate": 1.6920896992317785e-05, "loss": 0.6702, "step": 1871 }, { "epoch": 0.2792050412021328, "grad_norm": 1.1908149719238281, "learning_rate": 1.6917408584281654e-05, "loss": 0.6525, "step": 1872 }, { "epoch": 0.279354189194228, "grad_norm": 1.2228859663009644, "learning_rate": 1.6913918561332132e-05, "loss": 0.642, "step": 1873 }, { "epoch": 0.27950333718632314, "grad_norm": 1.2636531591415405, "learning_rate": 1.6910426924283993e-05, "loss": 0.61, "step": 1874 }, { "epoch": 0.2796524851784183, "grad_norm": 1.1871291399002075, "learning_rate": 1.6906933673952375e-05, "loss": 0.6125, "step": 1875 }, { "epoch": 0.27980163317051343, "grad_norm": 1.168679118156433, "learning_rate": 1.6903438811152803e-05, "loss": 0.5914, "step": 1876 }, { "epoch": 0.2799507811626086, "grad_norm": 1.1882373094558716, "learning_rate": 1.6899942336701176e-05, "loss": 0.7177, "step": 1877 }, { "epoch": 0.2800999291547038, "grad_norm": 1.2292972803115845, "learning_rate": 1.6896444251413768e-05, "loss": 0.6518, "step": 1878 }, { "epoch": 0.2802490771467989, "grad_norm": 1.1782054901123047, "learning_rate": 1.6892944556107233e-05, "loss": 0.6018, "step": 1879 }, { "epoch": 0.28039822513889406, "grad_norm": 1.1640385389328003, "learning_rate": 1.688944325159859e-05, "loss": 0.6249, "step": 1880 }, { "epoch": 0.2805473731309892, "grad_norm": 1.14742112159729, "learning_rate": 1.6885940338705243e-05, "loss": 0.5984, "step": 1881 }, { "epoch": 0.2806965211230844, "grad_norm": 1.269797682762146, "learning_rate": 1.6882435818244976e-05, "loss": 0.6249, "step": 1882 }, { "epoch": 0.28084566911517955, "grad_norm": 0.9315639734268188, "learning_rate": 1.687892969103593e-05, "loss": 0.6584, "step": 1883 }, { "epoch": 0.2809948171072747, "grad_norm": 1.0713640451431274, "learning_rate": 1.6875421957896646e-05, "loss": 0.6076, "step": 1884 }, { "epoch": 0.28114396509936984, "grad_norm": 1.275266170501709, "learning_rate": 1.6871912619646017e-05, "loss": 0.67, "step": 1885 }, { "epoch": 0.281293113091465, "grad_norm": 1.2852250337600708, "learning_rate": 1.6868401677103324e-05, "loss": 0.6553, "step": 1886 }, { "epoch": 0.2814422610835602, "grad_norm": 1.2178142070770264, "learning_rate": 1.6864889131088223e-05, "loss": 0.6483, "step": 1887 }, { "epoch": 0.28159140907565533, "grad_norm": 1.1829043626785278, "learning_rate": 1.686137498242073e-05, "loss": 0.619, "step": 1888 }, { "epoch": 0.2817405570677505, "grad_norm": 1.3142180442810059, "learning_rate": 1.6857859231921258e-05, "loss": 0.6823, "step": 1889 }, { "epoch": 0.2818897050598456, "grad_norm": 1.1780891418457031, "learning_rate": 1.6854341880410573e-05, "loss": 0.6844, "step": 1890 }, { "epoch": 0.28203885305194076, "grad_norm": 1.1965874433517456, "learning_rate": 1.6850822928709825e-05, "loss": 0.6792, "step": 1891 }, { "epoch": 0.28218800104403596, "grad_norm": 1.1624606847763062, "learning_rate": 1.6847302377640538e-05, "loss": 0.6352, "step": 1892 }, { "epoch": 0.2823371490361311, "grad_norm": 1.2286205291748047, "learning_rate": 1.6843780228024605e-05, "loss": 0.6743, "step": 1893 }, { "epoch": 0.28248629702822625, "grad_norm": 1.2738559246063232, "learning_rate": 1.6840256480684294e-05, "loss": 0.6768, "step": 1894 }, { "epoch": 0.2826354450203214, "grad_norm": 1.274094581604004, "learning_rate": 1.683673113644225e-05, "loss": 0.645, "step": 1895 }, { "epoch": 0.2827845930124166, "grad_norm": 1.1850746870040894, "learning_rate": 1.683320419612148e-05, "loss": 0.5928, "step": 1896 }, { "epoch": 0.28293374100451174, "grad_norm": 1.2989360094070435, "learning_rate": 1.682967566054538e-05, "loss": 0.6141, "step": 1897 }, { "epoch": 0.2830828889966069, "grad_norm": 1.0942033529281616, "learning_rate": 1.6826145530537705e-05, "loss": 0.5571, "step": 1898 }, { "epoch": 0.28323203698870203, "grad_norm": 1.1780894994735718, "learning_rate": 1.682261380692259e-05, "loss": 0.6403, "step": 1899 }, { "epoch": 0.2833811849807972, "grad_norm": 1.163030743598938, "learning_rate": 1.6819080490524527e-05, "loss": 0.6262, "step": 1900 }, { "epoch": 0.2835303329728924, "grad_norm": 1.222395420074463, "learning_rate": 1.6815545582168403e-05, "loss": 0.6956, "step": 1901 }, { "epoch": 0.2836794809649875, "grad_norm": 1.1703157424926758, "learning_rate": 1.681200908267946e-05, "loss": 0.6043, "step": 1902 }, { "epoch": 0.28382862895708266, "grad_norm": 1.3106616735458374, "learning_rate": 1.680847099288332e-05, "loss": 0.7192, "step": 1903 }, { "epoch": 0.2839777769491778, "grad_norm": 1.16339910030365, "learning_rate": 1.680493131360597e-05, "loss": 0.6773, "step": 1904 }, { "epoch": 0.28412692494127295, "grad_norm": 1.1292383670806885, "learning_rate": 1.680139004567377e-05, "loss": 0.6383, "step": 1905 }, { "epoch": 0.28427607293336815, "grad_norm": 1.243338942527771, "learning_rate": 1.6797847189913456e-05, "loss": 0.6304, "step": 1906 }, { "epoch": 0.2844252209254633, "grad_norm": 1.237883448600769, "learning_rate": 1.6794302747152125e-05, "loss": 0.6178, "step": 1907 }, { "epoch": 0.28457436891755844, "grad_norm": 1.2124580144882202, "learning_rate": 1.6790756718217252e-05, "loss": 0.6013, "step": 1908 }, { "epoch": 0.2847235169096536, "grad_norm": 1.1135786771774292, "learning_rate": 1.6787209103936677e-05, "loss": 0.5705, "step": 1909 }, { "epoch": 0.2848726649017488, "grad_norm": 1.2147399187088013, "learning_rate": 1.6783659905138626e-05, "loss": 0.6492, "step": 1910 }, { "epoch": 0.28502181289384393, "grad_norm": 1.1957935094833374, "learning_rate": 1.6780109122651665e-05, "loss": 0.6028, "step": 1911 }, { "epoch": 0.2851709608859391, "grad_norm": 1.2556028366088867, "learning_rate": 1.677655675730476e-05, "loss": 0.7442, "step": 1912 }, { "epoch": 0.2853201088780342, "grad_norm": 1.15304696559906, "learning_rate": 1.6773002809927228e-05, "loss": 0.6151, "step": 1913 }, { "epoch": 0.28546925687012936, "grad_norm": 1.2725099325180054, "learning_rate": 1.6769447281348757e-05, "loss": 0.713, "step": 1914 }, { "epoch": 0.28561840486222456, "grad_norm": 1.2029634714126587, "learning_rate": 1.676589017239942e-05, "loss": 0.6148, "step": 1915 }, { "epoch": 0.2857675528543197, "grad_norm": 1.182120680809021, "learning_rate": 1.676233148390963e-05, "loss": 0.6678, "step": 1916 }, { "epoch": 0.28591670084641485, "grad_norm": 1.0975844860076904, "learning_rate": 1.6758771216710205e-05, "loss": 0.5816, "step": 1917 }, { "epoch": 0.28606584883851, "grad_norm": 1.223564863204956, "learning_rate": 1.675520937163229e-05, "loss": 0.6948, "step": 1918 }, { "epoch": 0.2862149968306052, "grad_norm": 1.1636247634887695, "learning_rate": 1.675164594950744e-05, "loss": 0.611, "step": 1919 }, { "epoch": 0.28636414482270034, "grad_norm": 1.1005092859268188, "learning_rate": 1.6748080951167552e-05, "loss": 0.663, "step": 1920 }, { "epoch": 0.2865132928147955, "grad_norm": 1.1910526752471924, "learning_rate": 1.6744514377444895e-05, "loss": 0.7071, "step": 1921 }, { "epoch": 0.28666244080689063, "grad_norm": 1.227667212486267, "learning_rate": 1.674094622917211e-05, "loss": 0.6558, "step": 1922 }, { "epoch": 0.2868115887989858, "grad_norm": 1.1635770797729492, "learning_rate": 1.6737376507182205e-05, "loss": 0.6715, "step": 1923 }, { "epoch": 0.286960736791081, "grad_norm": 1.326275110244751, "learning_rate": 1.6733805212308553e-05, "loss": 0.7066, "step": 1924 }, { "epoch": 0.2871098847831761, "grad_norm": 1.2623032331466675, "learning_rate": 1.67302323453849e-05, "loss": 0.6508, "step": 1925 }, { "epoch": 0.28725903277527126, "grad_norm": 1.2393200397491455, "learning_rate": 1.6726657907245348e-05, "loss": 0.6523, "step": 1926 }, { "epoch": 0.2874081807673664, "grad_norm": 1.1626958847045898, "learning_rate": 1.6723081898724377e-05, "loss": 0.6023, "step": 1927 }, { "epoch": 0.28755732875946155, "grad_norm": 1.2307543754577637, "learning_rate": 1.6719504320656827e-05, "loss": 0.6697, "step": 1928 }, { "epoch": 0.28770647675155675, "grad_norm": 1.1519428491592407, "learning_rate": 1.671592517387791e-05, "loss": 0.6538, "step": 1929 }, { "epoch": 0.2878556247436519, "grad_norm": 1.1997557878494263, "learning_rate": 1.6712344459223198e-05, "loss": 0.7217, "step": 1930 }, { "epoch": 0.28800477273574704, "grad_norm": 1.0593386888504028, "learning_rate": 1.6708762177528634e-05, "loss": 0.5899, "step": 1931 }, { "epoch": 0.2881539207278422, "grad_norm": 1.1833240985870361, "learning_rate": 1.670517832963052e-05, "loss": 0.6716, "step": 1932 }, { "epoch": 0.2883030687199374, "grad_norm": 1.1253337860107422, "learning_rate": 1.670159291636553e-05, "loss": 0.5368, "step": 1933 }, { "epoch": 0.2884522167120325, "grad_norm": 1.2433786392211914, "learning_rate": 1.6698005938570702e-05, "loss": 0.6369, "step": 1934 }, { "epoch": 0.28860136470412767, "grad_norm": 1.1496217250823975, "learning_rate": 1.6694417397083446e-05, "loss": 0.6511, "step": 1935 }, { "epoch": 0.2887505126962228, "grad_norm": 1.1414586305618286, "learning_rate": 1.669082729274152e-05, "loss": 0.6701, "step": 1936 }, { "epoch": 0.28889966068831796, "grad_norm": 1.2495090961456299, "learning_rate": 1.6687235626383057e-05, "loss": 0.5642, "step": 1937 }, { "epoch": 0.28904880868041316, "grad_norm": 1.1886833906173706, "learning_rate": 1.6683642398846563e-05, "loss": 0.6758, "step": 1938 }, { "epoch": 0.2891979566725083, "grad_norm": 1.3606771230697632, "learning_rate": 1.6680047610970894e-05, "loss": 0.6572, "step": 1939 }, { "epoch": 0.28934710466460345, "grad_norm": 1.2371063232421875, "learning_rate": 1.6676451263595276e-05, "loss": 0.6726, "step": 1940 }, { "epoch": 0.2894962526566986, "grad_norm": 1.1987515687942505, "learning_rate": 1.6672853357559304e-05, "loss": 0.6412, "step": 1941 }, { "epoch": 0.28964540064879374, "grad_norm": 1.1612144708633423, "learning_rate": 1.666925389370293e-05, "loss": 0.6157, "step": 1942 }, { "epoch": 0.28979454864088894, "grad_norm": 1.2979209423065186, "learning_rate": 1.666565287286647e-05, "loss": 0.7051, "step": 1943 }, { "epoch": 0.2899436966329841, "grad_norm": 1.2228338718414307, "learning_rate": 1.6662050295890605e-05, "loss": 0.7079, "step": 1944 }, { "epoch": 0.2900928446250792, "grad_norm": 1.2042278051376343, "learning_rate": 1.6658446163616376e-05, "loss": 0.6348, "step": 1945 }, { "epoch": 0.29024199261717437, "grad_norm": 1.1794925928115845, "learning_rate": 1.6654840476885205e-05, "loss": 0.6557, "step": 1946 }, { "epoch": 0.29039114060926957, "grad_norm": 0.9931629300117493, "learning_rate": 1.665123323653885e-05, "loss": 0.6638, "step": 1947 }, { "epoch": 0.2905402886013647, "grad_norm": 1.1077033281326294, "learning_rate": 1.6647624443419446e-05, "loss": 0.6865, "step": 1948 }, { "epoch": 0.29068943659345986, "grad_norm": 1.133957862854004, "learning_rate": 1.664401409836949e-05, "loss": 0.6036, "step": 1949 }, { "epoch": 0.290838584585555, "grad_norm": 1.2205184698104858, "learning_rate": 1.6640402202231847e-05, "loss": 0.683, "step": 1950 }, { "epoch": 0.29098773257765015, "grad_norm": 1.157766342163086, "learning_rate": 1.6636788755849725e-05, "loss": 0.6683, "step": 1951 }, { "epoch": 0.29113688056974535, "grad_norm": 1.0826164484024048, "learning_rate": 1.6633173760066717e-05, "loss": 0.5955, "step": 1952 }, { "epoch": 0.2912860285618405, "grad_norm": 1.2395155429840088, "learning_rate": 1.6629557215726762e-05, "loss": 0.6585, "step": 1953 }, { "epoch": 0.29143517655393564, "grad_norm": 1.153218388557434, "learning_rate": 1.6625939123674165e-05, "loss": 0.6835, "step": 1954 }, { "epoch": 0.2915843245460308, "grad_norm": 1.1024078130722046, "learning_rate": 1.6622319484753595e-05, "loss": 0.6474, "step": 1955 }, { "epoch": 0.291733472538126, "grad_norm": 1.202724814414978, "learning_rate": 1.6618698299810078e-05, "loss": 0.6351, "step": 1956 }, { "epoch": 0.2918826205302211, "grad_norm": 1.2760374546051025, "learning_rate": 1.6615075569689005e-05, "loss": 0.6885, "step": 1957 }, { "epoch": 0.29203176852231627, "grad_norm": 1.141248106956482, "learning_rate": 1.661145129523612e-05, "loss": 0.6544, "step": 1958 }, { "epoch": 0.2921809165144114, "grad_norm": 1.2226407527923584, "learning_rate": 1.660782547729754e-05, "loss": 0.669, "step": 1959 }, { "epoch": 0.29233006450650656, "grad_norm": 0.937691330909729, "learning_rate": 1.6604198116719735e-05, "loss": 0.7026, "step": 1960 }, { "epoch": 0.29247921249860176, "grad_norm": 1.2583891153335571, "learning_rate": 1.6600569214349528e-05, "loss": 0.7115, "step": 1961 }, { "epoch": 0.2926283604906969, "grad_norm": 1.1331427097320557, "learning_rate": 1.6596938771034116e-05, "loss": 0.6692, "step": 1962 }, { "epoch": 0.29277750848279205, "grad_norm": 1.0084739923477173, "learning_rate": 1.6593306787621052e-05, "loss": 0.5745, "step": 1963 }, { "epoch": 0.2929266564748872, "grad_norm": 1.1234569549560547, "learning_rate": 1.658967326495824e-05, "loss": 0.6525, "step": 1964 }, { "epoch": 0.29307580446698234, "grad_norm": 1.1871984004974365, "learning_rate": 1.658603820389395e-05, "loss": 0.6215, "step": 1965 }, { "epoch": 0.29322495245907754, "grad_norm": 1.181831955909729, "learning_rate": 1.6582401605276813e-05, "loss": 0.6132, "step": 1966 }, { "epoch": 0.2933741004511727, "grad_norm": 1.2404823303222656, "learning_rate": 1.657876346995581e-05, "loss": 0.6829, "step": 1967 }, { "epoch": 0.2935232484432678, "grad_norm": 1.272614598274231, "learning_rate": 1.65751237987803e-05, "loss": 0.7121, "step": 1968 }, { "epoch": 0.29367239643536297, "grad_norm": 1.1728522777557373, "learning_rate": 1.6571482592599974e-05, "loss": 0.729, "step": 1969 }, { "epoch": 0.29382154442745817, "grad_norm": 1.140467643737793, "learning_rate": 1.6567839852264898e-05, "loss": 0.5815, "step": 1970 }, { "epoch": 0.2939706924195533, "grad_norm": 1.1367402076721191, "learning_rate": 1.65641955786255e-05, "loss": 0.6502, "step": 1971 }, { "epoch": 0.29411984041164846, "grad_norm": 1.204301118850708, "learning_rate": 1.656054977253255e-05, "loss": 0.6903, "step": 1972 }, { "epoch": 0.2942689884037436, "grad_norm": 1.0943002700805664, "learning_rate": 1.655690243483719e-05, "loss": 0.6179, "step": 1973 }, { "epoch": 0.29441813639583875, "grad_norm": 1.1874165534973145, "learning_rate": 1.6553253566390916e-05, "loss": 0.6193, "step": 1974 }, { "epoch": 0.29456728438793395, "grad_norm": 1.0937016010284424, "learning_rate": 1.6549603168045577e-05, "loss": 0.6373, "step": 1975 }, { "epoch": 0.2947164323800291, "grad_norm": 1.2174162864685059, "learning_rate": 1.6545951240653383e-05, "loss": 0.6622, "step": 1976 }, { "epoch": 0.29486558037212424, "grad_norm": 1.2804512977600098, "learning_rate": 1.6542297785066898e-05, "loss": 0.6457, "step": 1977 }, { "epoch": 0.2950147283642194, "grad_norm": 1.2989658117294312, "learning_rate": 1.6538642802139042e-05, "loss": 0.7068, "step": 1978 }, { "epoch": 0.2951638763563146, "grad_norm": 1.2323285341262817, "learning_rate": 1.65349862927231e-05, "loss": 0.6422, "step": 1979 }, { "epoch": 0.2953130243484097, "grad_norm": 1.13369882106781, "learning_rate": 1.6531328257672707e-05, "loss": 0.6087, "step": 1980 }, { "epoch": 0.29546217234050487, "grad_norm": 1.3072060346603394, "learning_rate": 1.6527668697841853e-05, "loss": 0.6875, "step": 1981 }, { "epoch": 0.2956113203326, "grad_norm": 1.125993013381958, "learning_rate": 1.6524007614084886e-05, "loss": 0.5645, "step": 1982 }, { "epoch": 0.29576046832469516, "grad_norm": 1.3010677099227905, "learning_rate": 1.652034500725651e-05, "loss": 0.6581, "step": 1983 }, { "epoch": 0.29590961631679036, "grad_norm": 1.3272514343261719, "learning_rate": 1.651668087821178e-05, "loss": 0.6787, "step": 1984 }, { "epoch": 0.2960587643088855, "grad_norm": 1.1076488494873047, "learning_rate": 1.6513015227806117e-05, "loss": 0.5855, "step": 1985 }, { "epoch": 0.29620791230098065, "grad_norm": 1.328736662864685, "learning_rate": 1.6509348056895284e-05, "loss": 0.6427, "step": 1986 }, { "epoch": 0.2963570602930758, "grad_norm": 1.1798760890960693, "learning_rate": 1.650567936633541e-05, "loss": 0.6352, "step": 1987 }, { "epoch": 0.29650620828517094, "grad_norm": 1.1100220680236816, "learning_rate": 1.6502009156982974e-05, "loss": 0.5883, "step": 1988 }, { "epoch": 0.29665535627726614, "grad_norm": 1.28891921043396, "learning_rate": 1.649833742969481e-05, "loss": 0.7241, "step": 1989 }, { "epoch": 0.2968045042693613, "grad_norm": 1.221808910369873, "learning_rate": 1.6494664185328103e-05, "loss": 0.6461, "step": 1990 }, { "epoch": 0.2969536522614564, "grad_norm": 1.099137544631958, "learning_rate": 1.64909894247404e-05, "loss": 0.6164, "step": 1991 }, { "epoch": 0.29710280025355157, "grad_norm": 1.236623764038086, "learning_rate": 1.6487313148789597e-05, "loss": 0.6703, "step": 1992 }, { "epoch": 0.29725194824564677, "grad_norm": 1.234484314918518, "learning_rate": 1.648363535833394e-05, "loss": 0.7388, "step": 1993 }, { "epoch": 0.2974010962377419, "grad_norm": 1.2011796236038208, "learning_rate": 1.6479956054232034e-05, "loss": 0.6056, "step": 1994 }, { "epoch": 0.29755024422983706, "grad_norm": 1.1726055145263672, "learning_rate": 1.647627523734284e-05, "loss": 0.5851, "step": 1995 }, { "epoch": 0.2976993922219322, "grad_norm": 1.3197784423828125, "learning_rate": 1.6472592908525666e-05, "loss": 0.7191, "step": 1996 }, { "epoch": 0.29784854021402735, "grad_norm": 1.2442140579223633, "learning_rate": 1.6468909068640174e-05, "loss": 0.6921, "step": 1997 }, { "epoch": 0.29799768820612255, "grad_norm": 1.222663164138794, "learning_rate": 1.6465223718546383e-05, "loss": 0.6378, "step": 1998 }, { "epoch": 0.2981468361982177, "grad_norm": 1.2160981893539429, "learning_rate": 1.6461536859104658e-05, "loss": 0.6961, "step": 1999 }, { "epoch": 0.29829598419031284, "grad_norm": 1.1003015041351318, "learning_rate": 1.645784849117572e-05, "loss": 0.6771, "step": 2000 }, { "epoch": 0.298445132182408, "grad_norm": 1.1014111042022705, "learning_rate": 1.6454158615620643e-05, "loss": 0.6821, "step": 2001 }, { "epoch": 0.2985942801745031, "grad_norm": 1.2005938291549683, "learning_rate": 1.6450467233300854e-05, "loss": 0.6332, "step": 2002 }, { "epoch": 0.2987434281665983, "grad_norm": 1.146350622177124, "learning_rate": 1.644677434507813e-05, "loss": 0.6284, "step": 2003 }, { "epoch": 0.29889257615869347, "grad_norm": 1.1609022617340088, "learning_rate": 1.64430799518146e-05, "loss": 0.6651, "step": 2004 }, { "epoch": 0.2990417241507886, "grad_norm": 1.2231494188308716, "learning_rate": 1.643938405437274e-05, "loss": 0.7213, "step": 2005 }, { "epoch": 0.29919087214288376, "grad_norm": 1.070125699043274, "learning_rate": 1.643568665361538e-05, "loss": 0.5797, "step": 2006 }, { "epoch": 0.29934002013497896, "grad_norm": 1.1940997838974, "learning_rate": 1.6431987750405708e-05, "loss": 0.6588, "step": 2007 }, { "epoch": 0.2994891681270741, "grad_norm": 1.0727101564407349, "learning_rate": 1.6428287345607255e-05, "loss": 0.6568, "step": 2008 }, { "epoch": 0.29963831611916925, "grad_norm": 1.3406003713607788, "learning_rate": 1.64245854400839e-05, "loss": 0.643, "step": 2009 }, { "epoch": 0.2997874641112644, "grad_norm": 1.2415130138397217, "learning_rate": 1.6420882034699882e-05, "loss": 0.6977, "step": 2010 }, { "epoch": 0.29993661210335953, "grad_norm": 1.2453913688659668, "learning_rate": 1.641717713031978e-05, "loss": 0.6712, "step": 2011 }, { "epoch": 0.30008576009545473, "grad_norm": 1.003412127494812, "learning_rate": 1.6413470727808533e-05, "loss": 0.698, "step": 2012 }, { "epoch": 0.3002349080875499, "grad_norm": 1.2184940576553345, "learning_rate": 1.6409762828031416e-05, "loss": 0.5764, "step": 2013 }, { "epoch": 0.300384056079645, "grad_norm": 1.155010461807251, "learning_rate": 1.6406053431854066e-05, "loss": 0.6567, "step": 2014 }, { "epoch": 0.30053320407174017, "grad_norm": 1.2037984132766724, "learning_rate": 1.6402342540142474e-05, "loss": 0.6573, "step": 2015 }, { "epoch": 0.30068235206383537, "grad_norm": 1.1588367223739624, "learning_rate": 1.639863015376296e-05, "loss": 0.6267, "step": 2016 }, { "epoch": 0.3008315000559305, "grad_norm": 1.2615313529968262, "learning_rate": 1.6394916273582208e-05, "loss": 0.6508, "step": 2017 }, { "epoch": 0.30098064804802566, "grad_norm": 1.1784847974777222, "learning_rate": 1.6391200900467245e-05, "loss": 0.5697, "step": 2018 }, { "epoch": 0.3011297960401208, "grad_norm": 1.1945478916168213, "learning_rate": 1.6387484035285456e-05, "loss": 0.6674, "step": 2019 }, { "epoch": 0.30127894403221595, "grad_norm": 1.1301203966140747, "learning_rate": 1.6383765678904563e-05, "loss": 0.6132, "step": 2020 }, { "epoch": 0.30142809202431115, "grad_norm": 1.37343168258667, "learning_rate": 1.6380045832192634e-05, "loss": 0.6973, "step": 2021 }, { "epoch": 0.3015772400164063, "grad_norm": 1.0828701257705688, "learning_rate": 1.6376324496018096e-05, "loss": 0.6127, "step": 2022 }, { "epoch": 0.30172638800850143, "grad_norm": 1.2714097499847412, "learning_rate": 1.6372601671249724e-05, "loss": 0.6484, "step": 2023 }, { "epoch": 0.3018755360005966, "grad_norm": 1.178075909614563, "learning_rate": 1.636887735875663e-05, "loss": 0.6595, "step": 2024 }, { "epoch": 0.3020246839926917, "grad_norm": 0.9100092053413391, "learning_rate": 1.6365151559408276e-05, "loss": 0.6607, "step": 2025 }, { "epoch": 0.3021738319847869, "grad_norm": 1.2099359035491943, "learning_rate": 1.636142427407448e-05, "loss": 0.6416, "step": 2026 }, { "epoch": 0.30232297997688207, "grad_norm": 1.2056974172592163, "learning_rate": 1.6357695503625394e-05, "loss": 0.5602, "step": 2027 }, { "epoch": 0.3024721279689772, "grad_norm": 1.192659616470337, "learning_rate": 1.635396524893153e-05, "loss": 0.6221, "step": 2028 }, { "epoch": 0.30262127596107236, "grad_norm": 1.159165859222412, "learning_rate": 1.6350233510863736e-05, "loss": 0.6407, "step": 2029 }, { "epoch": 0.30277042395316756, "grad_norm": 1.17678964138031, "learning_rate": 1.634650029029321e-05, "loss": 0.7243, "step": 2030 }, { "epoch": 0.3029195719452627, "grad_norm": 1.2728220224380493, "learning_rate": 1.63427655880915e-05, "loss": 0.6863, "step": 2031 }, { "epoch": 0.30306871993735784, "grad_norm": 0.8615176677703857, "learning_rate": 1.633902940513049e-05, "loss": 0.6439, "step": 2032 }, { "epoch": 0.303217867929453, "grad_norm": 1.2527785301208496, "learning_rate": 1.633529174228242e-05, "loss": 0.7115, "step": 2033 }, { "epoch": 0.30336701592154813, "grad_norm": 1.3033037185668945, "learning_rate": 1.633155260041987e-05, "loss": 0.7074, "step": 2034 }, { "epoch": 0.30351616391364333, "grad_norm": 1.179593563079834, "learning_rate": 1.632781198041577e-05, "loss": 0.6512, "step": 2035 }, { "epoch": 0.3036653119057385, "grad_norm": 1.293797254562378, "learning_rate": 1.632406988314339e-05, "loss": 0.6911, "step": 2036 }, { "epoch": 0.3038144598978336, "grad_norm": 1.2679553031921387, "learning_rate": 1.632032630947634e-05, "loss": 0.6416, "step": 2037 }, { "epoch": 0.30396360788992877, "grad_norm": 1.111017107963562, "learning_rate": 1.631658126028859e-05, "loss": 0.6084, "step": 2038 }, { "epoch": 0.3041127558820239, "grad_norm": 1.1904078722000122, "learning_rate": 1.6312834736454446e-05, "loss": 0.679, "step": 2039 }, { "epoch": 0.3042619038741191, "grad_norm": 1.314630150794983, "learning_rate": 1.630908673884855e-05, "loss": 0.6353, "step": 2040 }, { "epoch": 0.30441105186621426, "grad_norm": 0.8559300303459167, "learning_rate": 1.63053372683459e-05, "loss": 0.6865, "step": 2041 }, { "epoch": 0.3045601998583094, "grad_norm": 1.308696985244751, "learning_rate": 1.630158632582184e-05, "loss": 0.6599, "step": 2042 }, { "epoch": 0.30470934785040454, "grad_norm": 1.2612452507019043, "learning_rate": 1.6297833912152043e-05, "loss": 0.6228, "step": 2043 }, { "epoch": 0.30485849584249974, "grad_norm": 0.8416270017623901, "learning_rate": 1.6294080028212532e-05, "loss": 0.6721, "step": 2044 }, { "epoch": 0.3050076438345949, "grad_norm": 1.2424815893173218, "learning_rate": 1.629032467487969e-05, "loss": 0.6326, "step": 2045 }, { "epoch": 0.30515679182669003, "grad_norm": 1.1940721273422241, "learning_rate": 1.6286567853030212e-05, "loss": 0.6594, "step": 2046 }, { "epoch": 0.3053059398187852, "grad_norm": 1.3554210662841797, "learning_rate": 1.628280956354116e-05, "loss": 0.6591, "step": 2047 }, { "epoch": 0.3054550878108803, "grad_norm": 1.0833969116210938, "learning_rate": 1.6279049807289936e-05, "loss": 0.588, "step": 2048 }, { "epoch": 0.3056042358029755, "grad_norm": 0.8935872316360474, "learning_rate": 1.6275288585154267e-05, "loss": 0.6544, "step": 2049 }, { "epoch": 0.30575338379507067, "grad_norm": 1.3092774152755737, "learning_rate": 1.6271525898012242e-05, "loss": 0.6676, "step": 2050 }, { "epoch": 0.3059025317871658, "grad_norm": 1.1297099590301514, "learning_rate": 1.626776174674228e-05, "loss": 0.6377, "step": 2051 }, { "epoch": 0.30605167977926095, "grad_norm": 1.2094101905822754, "learning_rate": 1.6263996132223155e-05, "loss": 0.5991, "step": 2052 }, { "epoch": 0.30620082777135615, "grad_norm": 1.208993911743164, "learning_rate": 1.6260229055333962e-05, "loss": 0.6437, "step": 2053 }, { "epoch": 0.3063499757634513, "grad_norm": 1.1506412029266357, "learning_rate": 1.625646051695416e-05, "loss": 0.5949, "step": 2054 }, { "epoch": 0.30649912375554644, "grad_norm": 1.2688711881637573, "learning_rate": 1.625269051796353e-05, "loss": 0.6764, "step": 2055 }, { "epoch": 0.3066482717476416, "grad_norm": 1.241438627243042, "learning_rate": 1.624891905924221e-05, "loss": 0.6805, "step": 2056 }, { "epoch": 0.30679741973973673, "grad_norm": 1.1025031805038452, "learning_rate": 1.6245146141670662e-05, "loss": 0.7072, "step": 2057 }, { "epoch": 0.30694656773183193, "grad_norm": 1.184335708618164, "learning_rate": 1.6241371766129707e-05, "loss": 0.6551, "step": 2058 }, { "epoch": 0.3070957157239271, "grad_norm": 1.1710909605026245, "learning_rate": 1.6237595933500495e-05, "loss": 0.6687, "step": 2059 }, { "epoch": 0.3072448637160222, "grad_norm": 1.3440632820129395, "learning_rate": 1.6233818644664514e-05, "loss": 0.6871, "step": 2060 }, { "epoch": 0.30739401170811737, "grad_norm": 1.2165311574935913, "learning_rate": 1.6230039900503598e-05, "loss": 0.6234, "step": 2061 }, { "epoch": 0.3075431597002125, "grad_norm": 1.2622190713882446, "learning_rate": 1.6226259701899922e-05, "loss": 0.6619, "step": 2062 }, { "epoch": 0.3076923076923077, "grad_norm": 1.1703895330429077, "learning_rate": 1.622247804973599e-05, "loss": 0.6541, "step": 2063 }, { "epoch": 0.30784145568440285, "grad_norm": 1.0583775043487549, "learning_rate": 1.6218694944894666e-05, "loss": 0.5173, "step": 2064 }, { "epoch": 0.307990603676498, "grad_norm": 1.2205349206924438, "learning_rate": 1.621491038825913e-05, "loss": 0.6347, "step": 2065 }, { "epoch": 0.30813975166859314, "grad_norm": 1.2239184379577637, "learning_rate": 1.6211124380712914e-05, "loss": 0.6333, "step": 2066 }, { "epoch": 0.30828889966068834, "grad_norm": 1.1100009679794312, "learning_rate": 1.6207336923139886e-05, "loss": 0.5781, "step": 2067 }, { "epoch": 0.3084380476527835, "grad_norm": 1.1355198621749878, "learning_rate": 1.620354801642425e-05, "loss": 0.5788, "step": 2068 }, { "epoch": 0.30858719564487863, "grad_norm": 1.1769410371780396, "learning_rate": 1.6199757661450552e-05, "loss": 0.6177, "step": 2069 }, { "epoch": 0.3087363436369738, "grad_norm": 1.1669634580612183, "learning_rate": 1.6195965859103675e-05, "loss": 0.609, "step": 2070 }, { "epoch": 0.3088854916290689, "grad_norm": 1.2049531936645508, "learning_rate": 1.6192172610268838e-05, "loss": 0.7127, "step": 2071 }, { "epoch": 0.3090346396211641, "grad_norm": 1.2118808031082153, "learning_rate": 1.6188377915831605e-05, "loss": 0.628, "step": 2072 }, { "epoch": 0.30918378761325926, "grad_norm": 1.2838126420974731, "learning_rate": 1.6184581776677864e-05, "loss": 0.7571, "step": 2073 }, { "epoch": 0.3093329356053544, "grad_norm": 1.167529821395874, "learning_rate": 1.6180784193693852e-05, "loss": 0.5636, "step": 2074 }, { "epoch": 0.30948208359744955, "grad_norm": 1.2590097188949585, "learning_rate": 1.617698516776614e-05, "loss": 0.7105, "step": 2075 }, { "epoch": 0.30963123158954475, "grad_norm": 1.2990187406539917, "learning_rate": 1.6173184699781632e-05, "loss": 0.7656, "step": 2076 }, { "epoch": 0.3097803795816399, "grad_norm": 1.1401755809783936, "learning_rate": 1.6169382790627575e-05, "loss": 0.6418, "step": 2077 }, { "epoch": 0.30992952757373504, "grad_norm": 1.2188471555709839, "learning_rate": 1.6165579441191546e-05, "loss": 0.6358, "step": 2078 }, { "epoch": 0.3100786755658302, "grad_norm": 1.1716419458389282, "learning_rate": 1.6161774652361463e-05, "loss": 0.6349, "step": 2079 }, { "epoch": 0.31022782355792533, "grad_norm": 1.2680160999298096, "learning_rate": 1.6157968425025577e-05, "loss": 0.663, "step": 2080 }, { "epoch": 0.31037697155002053, "grad_norm": 1.1172993183135986, "learning_rate": 1.6154160760072478e-05, "loss": 0.5438, "step": 2081 }, { "epoch": 0.3105261195421157, "grad_norm": 1.2245469093322754, "learning_rate": 1.6150351658391086e-05, "loss": 0.6457, "step": 2082 }, { "epoch": 0.3106752675342108, "grad_norm": 1.1050134897232056, "learning_rate": 1.6146541120870667e-05, "loss": 0.5469, "step": 2083 }, { "epoch": 0.31082441552630596, "grad_norm": 1.2204471826553345, "learning_rate": 1.614272914840081e-05, "loss": 0.5898, "step": 2084 }, { "epoch": 0.3109735635184011, "grad_norm": 1.1395822763442993, "learning_rate": 1.6138915741871445e-05, "loss": 0.6441, "step": 2085 }, { "epoch": 0.3111227115104963, "grad_norm": 1.0844967365264893, "learning_rate": 1.6135100902172838e-05, "loss": 0.6127, "step": 2086 }, { "epoch": 0.31127185950259145, "grad_norm": 1.1134495735168457, "learning_rate": 1.6131284630195588e-05, "loss": 0.5898, "step": 2087 }, { "epoch": 0.3114210074946866, "grad_norm": 1.1402288675308228, "learning_rate": 1.6127466926830625e-05, "loss": 0.6516, "step": 2088 }, { "epoch": 0.31157015548678174, "grad_norm": 1.0498480796813965, "learning_rate": 1.6123647792969217e-05, "loss": 0.5858, "step": 2089 }, { "epoch": 0.31171930347887694, "grad_norm": 1.1158579587936401, "learning_rate": 1.6119827229502972e-05, "loss": 0.6248, "step": 2090 }, { "epoch": 0.3118684514709721, "grad_norm": 1.2917678356170654, "learning_rate": 1.611600523732382e-05, "loss": 0.7241, "step": 2091 }, { "epoch": 0.31201759946306723, "grad_norm": 1.1707185506820679, "learning_rate": 1.611218181732402e-05, "loss": 0.7459, "step": 2092 }, { "epoch": 0.3121667474551624, "grad_norm": 1.1883299350738525, "learning_rate": 1.6108356970396187e-05, "loss": 0.6586, "step": 2093 }, { "epoch": 0.3123158954472575, "grad_norm": 1.2308090925216675, "learning_rate": 1.6104530697433258e-05, "loss": 0.6231, "step": 2094 }, { "epoch": 0.3124650434393527, "grad_norm": 1.1567398309707642, "learning_rate": 1.6100702999328494e-05, "loss": 0.639, "step": 2095 }, { "epoch": 0.31261419143144786, "grad_norm": 1.1670440435409546, "learning_rate": 1.6096873876975492e-05, "loss": 0.6447, "step": 2096 }, { "epoch": 0.312763339423543, "grad_norm": 1.391643762588501, "learning_rate": 1.6093043331268193e-05, "loss": 0.677, "step": 2097 }, { "epoch": 0.31291248741563815, "grad_norm": 1.2140512466430664, "learning_rate": 1.6089211363100858e-05, "loss": 0.647, "step": 2098 }, { "epoch": 0.3130616354077333, "grad_norm": 1.173197627067566, "learning_rate": 1.6085377973368088e-05, "loss": 0.6195, "step": 2099 }, { "epoch": 0.3132107833998285, "grad_norm": 1.2742228507995605, "learning_rate": 1.608154316296481e-05, "loss": 0.6956, "step": 2100 }, { "epoch": 0.31335993139192364, "grad_norm": 1.1090202331542969, "learning_rate": 1.6077706932786285e-05, "loss": 0.6334, "step": 2101 }, { "epoch": 0.3135090793840188, "grad_norm": 1.2125886678695679, "learning_rate": 1.6073869283728103e-05, "loss": 0.661, "step": 2102 }, { "epoch": 0.31365822737611393, "grad_norm": 1.1310163736343384, "learning_rate": 1.6070030216686196e-05, "loss": 0.6101, "step": 2103 }, { "epoch": 0.31380737536820913, "grad_norm": 1.153494954109192, "learning_rate": 1.6066189732556812e-05, "loss": 0.6294, "step": 2104 }, { "epoch": 0.3139565233603043, "grad_norm": 1.1229090690612793, "learning_rate": 1.6062347832236538e-05, "loss": 0.6686, "step": 2105 }, { "epoch": 0.3141056713523994, "grad_norm": 1.2093182802200317, "learning_rate": 1.6058504516622288e-05, "loss": 0.6503, "step": 2106 }, { "epoch": 0.31425481934449456, "grad_norm": 1.2260127067565918, "learning_rate": 1.6054659786611314e-05, "loss": 0.6341, "step": 2107 }, { "epoch": 0.3144039673365897, "grad_norm": 1.3115582466125488, "learning_rate": 1.6050813643101194e-05, "loss": 0.6628, "step": 2108 }, { "epoch": 0.3145531153286849, "grad_norm": 1.2393251657485962, "learning_rate": 1.6046966086989827e-05, "loss": 0.6277, "step": 2109 }, { "epoch": 0.31470226332078005, "grad_norm": 1.2819409370422363, "learning_rate": 1.604311711917545e-05, "loss": 0.5963, "step": 2110 }, { "epoch": 0.3148514113128752, "grad_norm": 1.172933578491211, "learning_rate": 1.6039266740556638e-05, "loss": 0.5618, "step": 2111 }, { "epoch": 0.31500055930497034, "grad_norm": 1.2777860164642334, "learning_rate": 1.6035414952032277e-05, "loss": 0.7445, "step": 2112 }, { "epoch": 0.31514970729706554, "grad_norm": 1.2389544248580933, "learning_rate": 1.6031561754501602e-05, "loss": 0.6492, "step": 2113 }, { "epoch": 0.3152988552891607, "grad_norm": 1.1018747091293335, "learning_rate": 1.6027707148864155e-05, "loss": 0.6403, "step": 2114 }, { "epoch": 0.31544800328125583, "grad_norm": 1.2383112907409668, "learning_rate": 1.6023851136019827e-05, "loss": 0.5855, "step": 2115 }, { "epoch": 0.315597151273351, "grad_norm": 1.2115190029144287, "learning_rate": 1.601999371686883e-05, "loss": 0.7239, "step": 2116 }, { "epoch": 0.3157462992654461, "grad_norm": 1.2138153314590454, "learning_rate": 1.6016134892311694e-05, "loss": 0.6195, "step": 2117 }, { "epoch": 0.3158954472575413, "grad_norm": 1.1455374956130981, "learning_rate": 1.6012274663249293e-05, "loss": 0.6223, "step": 2118 }, { "epoch": 0.31604459524963646, "grad_norm": 1.2982068061828613, "learning_rate": 1.600841303058282e-05, "loss": 0.7264, "step": 2119 }, { "epoch": 0.3161937432417316, "grad_norm": 1.3576301336288452, "learning_rate": 1.60045499952138e-05, "loss": 0.65, "step": 2120 }, { "epoch": 0.31634289123382675, "grad_norm": 1.134490728378296, "learning_rate": 1.6000685558044082e-05, "loss": 0.5773, "step": 2121 }, { "epoch": 0.3164920392259219, "grad_norm": 0.9444700479507446, "learning_rate": 1.599681971997584e-05, "loss": 0.6553, "step": 2122 }, { "epoch": 0.3166411872180171, "grad_norm": 1.0615439414978027, "learning_rate": 1.599295248191159e-05, "loss": 0.4913, "step": 2123 }, { "epoch": 0.31679033521011224, "grad_norm": 1.0958478450775146, "learning_rate": 1.5989083844754153e-05, "loss": 0.525, "step": 2124 }, { "epoch": 0.3169394832022074, "grad_norm": 1.2231652736663818, "learning_rate": 1.5985213809406686e-05, "loss": 0.6238, "step": 2125 }, { "epoch": 0.31708863119430253, "grad_norm": 1.2361091375350952, "learning_rate": 1.5981342376772687e-05, "loss": 0.6344, "step": 2126 }, { "epoch": 0.31723777918639773, "grad_norm": 1.2742063999176025, "learning_rate": 1.597746954775595e-05, "loss": 0.7137, "step": 2127 }, { "epoch": 0.3173869271784929, "grad_norm": 1.2496131658554077, "learning_rate": 1.597359532326062e-05, "loss": 0.6422, "step": 2128 }, { "epoch": 0.317536075170588, "grad_norm": 1.1558709144592285, "learning_rate": 1.5969719704191164e-05, "loss": 0.6517, "step": 2129 }, { "epoch": 0.31768522316268316, "grad_norm": 1.2468665838241577, "learning_rate": 1.596584269145236e-05, "loss": 0.6187, "step": 2130 }, { "epoch": 0.3178343711547783, "grad_norm": 0.9300780296325684, "learning_rate": 1.5961964285949326e-05, "loss": 0.6694, "step": 2131 }, { "epoch": 0.3179835191468735, "grad_norm": 1.3759478330612183, "learning_rate": 1.59580844885875e-05, "loss": 0.701, "step": 2132 }, { "epoch": 0.31813266713896865, "grad_norm": 1.2023802995681763, "learning_rate": 1.5954203300272653e-05, "loss": 0.6111, "step": 2133 }, { "epoch": 0.3182818151310638, "grad_norm": 1.061553716659546, "learning_rate": 1.5950320721910863e-05, "loss": 0.6106, "step": 2134 }, { "epoch": 0.31843096312315894, "grad_norm": 1.0880705118179321, "learning_rate": 1.5946436754408548e-05, "loss": 0.5629, "step": 2135 }, { "epoch": 0.3185801111152541, "grad_norm": 1.1036192178726196, "learning_rate": 1.5942551398672443e-05, "loss": 0.6287, "step": 2136 }, { "epoch": 0.3187292591073493, "grad_norm": 1.228055477142334, "learning_rate": 1.5938664655609612e-05, "loss": 0.5891, "step": 2137 }, { "epoch": 0.3188784070994444, "grad_norm": 1.178999662399292, "learning_rate": 1.5934776526127437e-05, "loss": 0.5802, "step": 2138 }, { "epoch": 0.31902755509153957, "grad_norm": 1.1317517757415771, "learning_rate": 1.5930887011133626e-05, "loss": 0.6798, "step": 2139 }, { "epoch": 0.3191767030836347, "grad_norm": 1.3747447729110718, "learning_rate": 1.5926996111536212e-05, "loss": 0.7632, "step": 2140 }, { "epoch": 0.3193258510757299, "grad_norm": 1.139053225517273, "learning_rate": 1.592310382824356e-05, "loss": 0.6162, "step": 2141 }, { "epoch": 0.31947499906782506, "grad_norm": 1.0545692443847656, "learning_rate": 1.591921016216433e-05, "loss": 0.6714, "step": 2142 }, { "epoch": 0.3196241470599202, "grad_norm": 1.0350514650344849, "learning_rate": 1.591531511420754e-05, "loss": 0.6245, "step": 2143 }, { "epoch": 0.31977329505201535, "grad_norm": 1.3381661176681519, "learning_rate": 1.5911418685282506e-05, "loss": 0.7371, "step": 2144 }, { "epoch": 0.3199224430441105, "grad_norm": 1.1377837657928467, "learning_rate": 1.5907520876298872e-05, "loss": 0.6513, "step": 2145 }, { "epoch": 0.3200715910362057, "grad_norm": 1.161911129951477, "learning_rate": 1.5903621688166614e-05, "loss": 0.6842, "step": 2146 }, { "epoch": 0.32022073902830084, "grad_norm": 1.161150336265564, "learning_rate": 1.589972112179602e-05, "loss": 0.6701, "step": 2147 }, { "epoch": 0.320369887020396, "grad_norm": 0.9165359735488892, "learning_rate": 1.58958191780977e-05, "loss": 0.6764, "step": 2148 }, { "epoch": 0.3205190350124911, "grad_norm": 1.27543306350708, "learning_rate": 1.5891915857982583e-05, "loss": 0.6427, "step": 2149 }, { "epoch": 0.3206681830045863, "grad_norm": 1.1985312700271606, "learning_rate": 1.588801116236194e-05, "loss": 0.7101, "step": 2150 }, { "epoch": 0.32081733099668147, "grad_norm": 1.2151747941970825, "learning_rate": 1.5884105092147328e-05, "loss": 0.7199, "step": 2151 }, { "epoch": 0.3209664789887766, "grad_norm": 1.0965577363967896, "learning_rate": 1.5880197648250658e-05, "loss": 0.6095, "step": 2152 }, { "epoch": 0.32111562698087176, "grad_norm": 1.0873743295669556, "learning_rate": 1.587628883158414e-05, "loss": 0.6896, "step": 2153 }, { "epoch": 0.3212647749729669, "grad_norm": 0.9409453868865967, "learning_rate": 1.587237864306032e-05, "loss": 0.665, "step": 2154 }, { "epoch": 0.3214139229650621, "grad_norm": 1.2378159761428833, "learning_rate": 1.5868467083592044e-05, "loss": 0.695, "step": 2155 }, { "epoch": 0.32156307095715725, "grad_norm": 1.2160775661468506, "learning_rate": 1.5864554154092503e-05, "loss": 0.5875, "step": 2156 }, { "epoch": 0.3217122189492524, "grad_norm": 1.1697388887405396, "learning_rate": 1.5860639855475194e-05, "loss": 0.6299, "step": 2157 }, { "epoch": 0.32186136694134754, "grad_norm": 1.2510745525360107, "learning_rate": 1.5856724188653928e-05, "loss": 0.6863, "step": 2158 }, { "epoch": 0.3220105149334427, "grad_norm": 1.1545181274414062, "learning_rate": 1.585280715454285e-05, "loss": 0.6952, "step": 2159 }, { "epoch": 0.3221596629255379, "grad_norm": 1.1665242910385132, "learning_rate": 1.5848888754056408e-05, "loss": 0.5976, "step": 2160 }, { "epoch": 0.322308810917633, "grad_norm": 1.243480920791626, "learning_rate": 1.584496898810939e-05, "loss": 0.6501, "step": 2161 }, { "epoch": 0.32245795890972817, "grad_norm": 1.1953293085098267, "learning_rate": 1.5841047857616876e-05, "loss": 0.6588, "step": 2162 }, { "epoch": 0.3226071069018233, "grad_norm": 1.1099915504455566, "learning_rate": 1.583712536349429e-05, "loss": 0.6755, "step": 2163 }, { "epoch": 0.3227562548939185, "grad_norm": 1.15560781955719, "learning_rate": 1.583320150665736e-05, "loss": 0.6273, "step": 2164 }, { "epoch": 0.32290540288601366, "grad_norm": 1.1337441205978394, "learning_rate": 1.5829276288022138e-05, "loss": 0.6055, "step": 2165 }, { "epoch": 0.3230545508781088, "grad_norm": 1.2517818212509155, "learning_rate": 1.5825349708504988e-05, "loss": 0.5963, "step": 2166 }, { "epoch": 0.32320369887020395, "grad_norm": 1.2926287651062012, "learning_rate": 1.5821421769022593e-05, "loss": 0.6716, "step": 2167 }, { "epoch": 0.3233528468622991, "grad_norm": 1.1206529140472412, "learning_rate": 1.5817492470491962e-05, "loss": 0.6742, "step": 2168 }, { "epoch": 0.3235019948543943, "grad_norm": 1.1647748947143555, "learning_rate": 1.581356181383041e-05, "loss": 0.6562, "step": 2169 }, { "epoch": 0.32365114284648944, "grad_norm": 1.331260085105896, "learning_rate": 1.5809629799955576e-05, "loss": 0.7246, "step": 2170 }, { "epoch": 0.3238002908385846, "grad_norm": 1.2036666870117188, "learning_rate": 1.5805696429785414e-05, "loss": 0.6018, "step": 2171 }, { "epoch": 0.3239494388306797, "grad_norm": 1.1351863145828247, "learning_rate": 1.5801761704238197e-05, "loss": 0.6217, "step": 2172 }, { "epoch": 0.3240985868227749, "grad_norm": 1.2159912586212158, "learning_rate": 1.5797825624232506e-05, "loss": 0.6001, "step": 2173 }, { "epoch": 0.32424773481487007, "grad_norm": 1.314862608909607, "learning_rate": 1.5793888190687247e-05, "loss": 0.6957, "step": 2174 }, { "epoch": 0.3243968828069652, "grad_norm": 1.2229810953140259, "learning_rate": 1.578994940452164e-05, "loss": 0.6625, "step": 2175 }, { "epoch": 0.32454603079906036, "grad_norm": 1.3131756782531738, "learning_rate": 1.578600926665522e-05, "loss": 0.7117, "step": 2176 }, { "epoch": 0.3246951787911555, "grad_norm": 1.1675958633422852, "learning_rate": 1.5782067778007835e-05, "loss": 0.5919, "step": 2177 }, { "epoch": 0.3248443267832507, "grad_norm": 0.9816223978996277, "learning_rate": 1.5778124939499654e-05, "loss": 0.6614, "step": 2178 }, { "epoch": 0.32499347477534585, "grad_norm": 1.1725636720657349, "learning_rate": 1.5774180752051152e-05, "loss": 0.5909, "step": 2179 }, { "epoch": 0.325142622767441, "grad_norm": 1.3165509700775146, "learning_rate": 1.5770235216583136e-05, "loss": 0.7236, "step": 2180 }, { "epoch": 0.32529177075953614, "grad_norm": 1.314494013786316, "learning_rate": 1.5766288334016705e-05, "loss": 0.737, "step": 2181 }, { "epoch": 0.3254409187516313, "grad_norm": 1.156119465827942, "learning_rate": 1.576234010527329e-05, "loss": 0.5851, "step": 2182 }, { "epoch": 0.3255900667437265, "grad_norm": 1.1662036180496216, "learning_rate": 1.575839053127463e-05, "loss": 0.6339, "step": 2183 }, { "epoch": 0.3257392147358216, "grad_norm": 1.1972496509552002, "learning_rate": 1.5754439612942774e-05, "loss": 0.587, "step": 2184 }, { "epoch": 0.32588836272791677, "grad_norm": 1.2408905029296875, "learning_rate": 1.5750487351200096e-05, "loss": 0.6131, "step": 2185 }, { "epoch": 0.3260375107200119, "grad_norm": 0.9778432846069336, "learning_rate": 1.5746533746969275e-05, "loss": 0.6655, "step": 2186 }, { "epoch": 0.3261866587121071, "grad_norm": 1.4220619201660156, "learning_rate": 1.57425788011733e-05, "loss": 0.6714, "step": 2187 }, { "epoch": 0.32633580670420226, "grad_norm": 1.1707950830459595, "learning_rate": 1.5738622514735483e-05, "loss": 0.6319, "step": 2188 }, { "epoch": 0.3264849546962974, "grad_norm": 1.17977774143219, "learning_rate": 1.5734664888579448e-05, "loss": 0.6195, "step": 2189 }, { "epoch": 0.32663410268839255, "grad_norm": 1.231858491897583, "learning_rate": 1.5730705923629116e-05, "loss": 0.6755, "step": 2190 }, { "epoch": 0.3267832506804877, "grad_norm": 1.170711874961853, "learning_rate": 1.572674562080875e-05, "loss": 0.6205, "step": 2191 }, { "epoch": 0.3269323986725829, "grad_norm": 1.1594570875167847, "learning_rate": 1.5722783981042892e-05, "loss": 0.6041, "step": 2192 }, { "epoch": 0.32708154666467804, "grad_norm": 1.0838236808776855, "learning_rate": 1.571882100525642e-05, "loss": 0.6603, "step": 2193 }, { "epoch": 0.3272306946567732, "grad_norm": 1.1644095182418823, "learning_rate": 1.5714856694374514e-05, "loss": 0.5882, "step": 2194 }, { "epoch": 0.3273798426488683, "grad_norm": 1.2347066402435303, "learning_rate": 1.5710891049322672e-05, "loss": 0.6505, "step": 2195 }, { "epoch": 0.32752899064096347, "grad_norm": 1.1458405256271362, "learning_rate": 1.5706924071026693e-05, "loss": 0.7187, "step": 2196 }, { "epoch": 0.32767813863305867, "grad_norm": 1.2239104509353638, "learning_rate": 1.57029557604127e-05, "loss": 0.6719, "step": 2197 }, { "epoch": 0.3278272866251538, "grad_norm": 1.1864399909973145, "learning_rate": 1.5698986118407113e-05, "loss": 0.6962, "step": 2198 }, { "epoch": 0.32797643461724896, "grad_norm": 1.1158112287521362, "learning_rate": 1.569501514593668e-05, "loss": 0.6678, "step": 2199 }, { "epoch": 0.3281255826093441, "grad_norm": 1.1572883129119873, "learning_rate": 1.569104284392844e-05, "loss": 0.6699, "step": 2200 }, { "epoch": 0.3282747306014393, "grad_norm": 1.1665360927581787, "learning_rate": 1.568706921330976e-05, "loss": 0.6423, "step": 2201 }, { "epoch": 0.32842387859353445, "grad_norm": 1.1024820804595947, "learning_rate": 1.5683094255008304e-05, "loss": 0.5952, "step": 2202 }, { "epoch": 0.3285730265856296, "grad_norm": 1.153855562210083, "learning_rate": 1.5679117969952055e-05, "loss": 0.6278, "step": 2203 }, { "epoch": 0.32872217457772474, "grad_norm": 1.119799256324768, "learning_rate": 1.5675140359069302e-05, "loss": 0.567, "step": 2204 }, { "epoch": 0.3288713225698199, "grad_norm": 1.1798193454742432, "learning_rate": 1.5671161423288642e-05, "loss": 0.6847, "step": 2205 }, { "epoch": 0.3290204705619151, "grad_norm": 1.1474840641021729, "learning_rate": 1.566718116353898e-05, "loss": 0.6351, "step": 2206 }, { "epoch": 0.3291696185540102, "grad_norm": 1.179066777229309, "learning_rate": 1.5663199580749543e-05, "loss": 0.6433, "step": 2207 }, { "epoch": 0.32931876654610537, "grad_norm": 1.068157434463501, "learning_rate": 1.565921667584985e-05, "loss": 0.5797, "step": 2208 }, { "epoch": 0.3294679145382005, "grad_norm": 1.0737882852554321, "learning_rate": 1.5655232449769738e-05, "loss": 0.6368, "step": 2209 }, { "epoch": 0.3296170625302957, "grad_norm": 1.0470657348632812, "learning_rate": 1.5651246903439344e-05, "loss": 0.6014, "step": 2210 }, { "epoch": 0.32976621052239086, "grad_norm": 1.2052483558654785, "learning_rate": 1.564726003778913e-05, "loss": 0.6224, "step": 2211 }, { "epoch": 0.329915358514486, "grad_norm": 1.2331315279006958, "learning_rate": 1.5643271853749848e-05, "loss": 0.6397, "step": 2212 }, { "epoch": 0.33006450650658115, "grad_norm": 1.1075570583343506, "learning_rate": 1.5639282352252568e-05, "loss": 0.6376, "step": 2213 }, { "epoch": 0.3302136544986763, "grad_norm": 1.2509896755218506, "learning_rate": 1.563529153422866e-05, "loss": 0.7139, "step": 2214 }, { "epoch": 0.3303628024907715, "grad_norm": 1.1792452335357666, "learning_rate": 1.563129940060981e-05, "loss": 0.6661, "step": 2215 }, { "epoch": 0.33051195048286663, "grad_norm": 1.2216027975082397, "learning_rate": 1.562730595232801e-05, "loss": 0.5441, "step": 2216 }, { "epoch": 0.3306610984749618, "grad_norm": 1.171568512916565, "learning_rate": 1.5623311190315554e-05, "loss": 0.5925, "step": 2217 }, { "epoch": 0.3308102464670569, "grad_norm": 1.2486094236373901, "learning_rate": 1.5619315115505037e-05, "loss": 0.6792, "step": 2218 }, { "epoch": 0.33095939445915207, "grad_norm": 1.2786320447921753, "learning_rate": 1.5615317728829383e-05, "loss": 0.7239, "step": 2219 }, { "epoch": 0.33110854245124727, "grad_norm": 1.206309199333191, "learning_rate": 1.5611319031221793e-05, "loss": 0.6119, "step": 2220 }, { "epoch": 0.3312576904433424, "grad_norm": 1.2067378759384155, "learning_rate": 1.5607319023615798e-05, "loss": 0.6242, "step": 2221 }, { "epoch": 0.33140683843543756, "grad_norm": 1.2042487859725952, "learning_rate": 1.5603317706945224e-05, "loss": 0.605, "step": 2222 }, { "epoch": 0.3315559864275327, "grad_norm": 1.273868203163147, "learning_rate": 1.55993150821442e-05, "loss": 0.7067, "step": 2223 }, { "epoch": 0.3317051344196279, "grad_norm": 1.2111995220184326, "learning_rate": 1.5595311150147167e-05, "loss": 0.7008, "step": 2224 }, { "epoch": 0.33185428241172304, "grad_norm": 1.0881820917129517, "learning_rate": 1.5591305911888876e-05, "loss": 0.5993, "step": 2225 }, { "epoch": 0.3320034304038182, "grad_norm": 1.119744896888733, "learning_rate": 1.5587299368304362e-05, "loss": 0.5887, "step": 2226 }, { "epoch": 0.33215257839591333, "grad_norm": 1.2647368907928467, "learning_rate": 1.558329152032898e-05, "loss": 0.7365, "step": 2227 }, { "epoch": 0.3323017263880085, "grad_norm": 1.1934727430343628, "learning_rate": 1.55792823688984e-05, "loss": 0.6663, "step": 2228 }, { "epoch": 0.3324508743801037, "grad_norm": 1.1525017023086548, "learning_rate": 1.5575271914948575e-05, "loss": 0.6791, "step": 2229 }, { "epoch": 0.3326000223721988, "grad_norm": 1.1620858907699585, "learning_rate": 1.557126015941577e-05, "loss": 0.5894, "step": 2230 }, { "epoch": 0.33274917036429397, "grad_norm": 1.1191250085830688, "learning_rate": 1.5567247103236556e-05, "loss": 0.5828, "step": 2231 }, { "epoch": 0.3328983183563891, "grad_norm": 1.176908254623413, "learning_rate": 1.5563232747347813e-05, "loss": 0.6307, "step": 2232 }, { "epoch": 0.3330474663484843, "grad_norm": 1.2204564809799194, "learning_rate": 1.555921709268671e-05, "loss": 0.6308, "step": 2233 }, { "epoch": 0.33319661434057946, "grad_norm": 1.2287763357162476, "learning_rate": 1.5555200140190732e-05, "loss": 0.5498, "step": 2234 }, { "epoch": 0.3333457623326746, "grad_norm": 1.1086399555206299, "learning_rate": 1.555118189079766e-05, "loss": 0.5295, "step": 2235 }, { "epoch": 0.33349491032476974, "grad_norm": 1.1697911024093628, "learning_rate": 1.5547162345445584e-05, "loss": 0.6236, "step": 2236 }, { "epoch": 0.3336440583168649, "grad_norm": 1.1163662672042847, "learning_rate": 1.5543141505072888e-05, "loss": 0.5943, "step": 2237 }, { "epoch": 0.3337932063089601, "grad_norm": 1.2284818887710571, "learning_rate": 1.5539119370618267e-05, "loss": 0.6467, "step": 2238 }, { "epoch": 0.33394235430105523, "grad_norm": 1.1574106216430664, "learning_rate": 1.553509594302071e-05, "loss": 0.7118, "step": 2239 }, { "epoch": 0.3340915022931504, "grad_norm": 1.084969401359558, "learning_rate": 1.5531071223219513e-05, "loss": 0.6575, "step": 2240 }, { "epoch": 0.3342406502852455, "grad_norm": 1.1511814594268799, "learning_rate": 1.5527045212154274e-05, "loss": 0.63, "step": 2241 }, { "epoch": 0.33438979827734067, "grad_norm": 1.2282367944717407, "learning_rate": 1.5523017910764892e-05, "loss": 0.6241, "step": 2242 }, { "epoch": 0.33453894626943587, "grad_norm": 1.120333194732666, "learning_rate": 1.5518989319991563e-05, "loss": 0.7256, "step": 2243 }, { "epoch": 0.334688094261531, "grad_norm": 1.1434431076049805, "learning_rate": 1.551495944077479e-05, "loss": 0.5652, "step": 2244 }, { "epoch": 0.33483724225362616, "grad_norm": 1.153017520904541, "learning_rate": 1.5510928274055373e-05, "loss": 0.6549, "step": 2245 }, { "epoch": 0.3349863902457213, "grad_norm": 1.2255792617797852, "learning_rate": 1.5506895820774416e-05, "loss": 0.6618, "step": 2246 }, { "epoch": 0.3351355382378165, "grad_norm": 1.1766265630722046, "learning_rate": 1.550286208187332e-05, "loss": 0.6894, "step": 2247 }, { "epoch": 0.33528468622991164, "grad_norm": 1.2210493087768555, "learning_rate": 1.5498827058293785e-05, "loss": 0.6538, "step": 2248 }, { "epoch": 0.3354338342220068, "grad_norm": 1.1792250871658325, "learning_rate": 1.5494790750977814e-05, "loss": 0.603, "step": 2249 }, { "epoch": 0.33558298221410193, "grad_norm": 1.1191729307174683, "learning_rate": 1.549075316086771e-05, "loss": 0.6154, "step": 2250 }, { "epoch": 0.3357321302061971, "grad_norm": 1.0622849464416504, "learning_rate": 1.5486714288906072e-05, "loss": 0.5685, "step": 2251 }, { "epoch": 0.3358812781982923, "grad_norm": 1.2974958419799805, "learning_rate": 1.5482674136035804e-05, "loss": 0.6578, "step": 2252 }, { "epoch": 0.3360304261903874, "grad_norm": 1.0299549102783203, "learning_rate": 1.5478632703200104e-05, "loss": 0.6607, "step": 2253 }, { "epoch": 0.33617957418248257, "grad_norm": 1.1874017715454102, "learning_rate": 1.5474589991342468e-05, "loss": 0.6524, "step": 2254 }, { "epoch": 0.3363287221745777, "grad_norm": 1.2122920751571655, "learning_rate": 1.5470546001406698e-05, "loss": 0.6712, "step": 2255 }, { "epoch": 0.33647787016667285, "grad_norm": 1.2136603593826294, "learning_rate": 1.5466500734336886e-05, "loss": 0.667, "step": 2256 }, { "epoch": 0.33662701815876805, "grad_norm": 1.3172355890274048, "learning_rate": 1.5462454191077427e-05, "loss": 0.7632, "step": 2257 }, { "epoch": 0.3367761661508632, "grad_norm": 1.199366569519043, "learning_rate": 1.5458406372573006e-05, "loss": 0.6216, "step": 2258 }, { "epoch": 0.33692531414295834, "grad_norm": 1.203533411026001, "learning_rate": 1.5454357279768624e-05, "loss": 0.6943, "step": 2259 }, { "epoch": 0.3370744621350535, "grad_norm": 1.119197130203247, "learning_rate": 1.5450306913609557e-05, "loss": 0.5923, "step": 2260 }, { "epoch": 0.3372236101271487, "grad_norm": 1.1143519878387451, "learning_rate": 1.5446255275041398e-05, "loss": 0.6096, "step": 2261 }, { "epoch": 0.33737275811924383, "grad_norm": 1.2123372554779053, "learning_rate": 1.5442202365010022e-05, "loss": 0.6173, "step": 2262 }, { "epoch": 0.337521906111339, "grad_norm": 1.2098814249038696, "learning_rate": 1.5438148184461606e-05, "loss": 0.6434, "step": 2263 }, { "epoch": 0.3376710541034341, "grad_norm": 1.2663953304290771, "learning_rate": 1.543409273434263e-05, "loss": 0.6311, "step": 2264 }, { "epoch": 0.33782020209552927, "grad_norm": 1.254465937614441, "learning_rate": 1.543003601559986e-05, "loss": 0.7255, "step": 2265 }, { "epoch": 0.33796935008762446, "grad_norm": 1.113221287727356, "learning_rate": 1.5425978029180367e-05, "loss": 0.6789, "step": 2266 }, { "epoch": 0.3381184980797196, "grad_norm": 1.2205177545547485, "learning_rate": 1.5421918776031506e-05, "loss": 0.6206, "step": 2267 }, { "epoch": 0.33826764607181475, "grad_norm": 1.0888317823410034, "learning_rate": 1.5417858257100946e-05, "loss": 0.6612, "step": 2268 }, { "epoch": 0.3384167940639099, "grad_norm": 1.1513725519180298, "learning_rate": 1.5413796473336635e-05, "loss": 0.6003, "step": 2269 }, { "epoch": 0.3385659420560051, "grad_norm": 1.1398320198059082, "learning_rate": 1.5409733425686822e-05, "loss": 0.6579, "step": 2270 }, { "epoch": 0.33871509004810024, "grad_norm": 1.2265492677688599, "learning_rate": 1.5405669115100057e-05, "loss": 0.6596, "step": 2271 }, { "epoch": 0.3388642380401954, "grad_norm": 1.165356159210205, "learning_rate": 1.5401603542525172e-05, "loss": 0.6579, "step": 2272 }, { "epoch": 0.33901338603229053, "grad_norm": 1.344971776008606, "learning_rate": 1.5397536708911308e-05, "loss": 0.7372, "step": 2273 }, { "epoch": 0.3391625340243857, "grad_norm": 1.2133064270019531, "learning_rate": 1.5393468615207887e-05, "loss": 0.6893, "step": 2274 }, { "epoch": 0.3393116820164809, "grad_norm": 1.110683560371399, "learning_rate": 1.5389399262364636e-05, "loss": 0.6062, "step": 2275 }, { "epoch": 0.339460830008576, "grad_norm": 1.1346160173416138, "learning_rate": 1.538532865133157e-05, "loss": 0.7577, "step": 2276 }, { "epoch": 0.33960997800067116, "grad_norm": 1.0189944505691528, "learning_rate": 1.5381256783059e-05, "loss": 0.6271, "step": 2277 }, { "epoch": 0.3397591259927663, "grad_norm": 1.141427755355835, "learning_rate": 1.537718365849753e-05, "loss": 0.7155, "step": 2278 }, { "epoch": 0.33990827398486145, "grad_norm": 1.1448885202407837, "learning_rate": 1.5373109278598055e-05, "loss": 0.6446, "step": 2279 }, { "epoch": 0.34005742197695665, "grad_norm": 1.0645650625228882, "learning_rate": 1.5369033644311768e-05, "loss": 0.5787, "step": 2280 }, { "epoch": 0.3402065699690518, "grad_norm": 1.2595055103302002, "learning_rate": 1.536495675659015e-05, "loss": 0.5425, "step": 2281 }, { "epoch": 0.34035571796114694, "grad_norm": 1.0983937978744507, "learning_rate": 1.5360878616384975e-05, "loss": 0.6179, "step": 2282 }, { "epoch": 0.3405048659532421, "grad_norm": 1.1530348062515259, "learning_rate": 1.5356799224648312e-05, "loss": 0.651, "step": 2283 }, { "epoch": 0.3406540139453373, "grad_norm": 1.1905772686004639, "learning_rate": 1.5352718582332524e-05, "loss": 0.6414, "step": 2284 }, { "epoch": 0.34080316193743243, "grad_norm": 1.116839051246643, "learning_rate": 1.534863669039026e-05, "loss": 0.6228, "step": 2285 }, { "epoch": 0.3409523099295276, "grad_norm": 1.0524928569793701, "learning_rate": 1.5344553549774466e-05, "loss": 0.5746, "step": 2286 }, { "epoch": 0.3411014579216227, "grad_norm": 1.1747674942016602, "learning_rate": 1.534046916143838e-05, "loss": 0.6688, "step": 2287 }, { "epoch": 0.34125060591371786, "grad_norm": 1.0891857147216797, "learning_rate": 1.5336383526335517e-05, "loss": 0.5677, "step": 2288 }, { "epoch": 0.34139975390581306, "grad_norm": 1.2533519268035889, "learning_rate": 1.5332296645419707e-05, "loss": 0.6632, "step": 2289 }, { "epoch": 0.3415489018979082, "grad_norm": 1.3376941680908203, "learning_rate": 1.5328208519645052e-05, "loss": 0.7358, "step": 2290 }, { "epoch": 0.34169804989000335, "grad_norm": 1.2354905605316162, "learning_rate": 1.5324119149965957e-05, "loss": 0.6386, "step": 2291 }, { "epoch": 0.3418471978820985, "grad_norm": 1.1956661939620972, "learning_rate": 1.5320028537337108e-05, "loss": 0.6443, "step": 2292 }, { "epoch": 0.34199634587419364, "grad_norm": 1.2448432445526123, "learning_rate": 1.531593668271348e-05, "loss": 0.6656, "step": 2293 }, { "epoch": 0.34214549386628884, "grad_norm": 1.1170761585235596, "learning_rate": 1.5311843587050352e-05, "loss": 0.5968, "step": 2294 }, { "epoch": 0.342294641858384, "grad_norm": 1.2353310585021973, "learning_rate": 1.5307749251303278e-05, "loss": 0.6629, "step": 2295 }, { "epoch": 0.34244378985047913, "grad_norm": 1.1350088119506836, "learning_rate": 1.5303653676428106e-05, "loss": 0.5465, "step": 2296 }, { "epoch": 0.3425929378425743, "grad_norm": 1.1247081756591797, "learning_rate": 1.529955686338098e-05, "loss": 0.6321, "step": 2297 }, { "epoch": 0.3427420858346695, "grad_norm": 1.1135681867599487, "learning_rate": 1.529545881311832e-05, "loss": 0.5572, "step": 2298 }, { "epoch": 0.3428912338267646, "grad_norm": 1.0871063470840454, "learning_rate": 1.529135952659684e-05, "loss": 0.567, "step": 2299 }, { "epoch": 0.34304038181885976, "grad_norm": 1.2127866744995117, "learning_rate": 1.528725900477356e-05, "loss": 0.5946, "step": 2300 }, { "epoch": 0.3431895298109549, "grad_norm": 1.1318833827972412, "learning_rate": 1.5283157248605758e-05, "loss": 0.6051, "step": 2301 }, { "epoch": 0.34333867780305005, "grad_norm": 1.1660903692245483, "learning_rate": 1.5279054259051022e-05, "loss": 0.6348, "step": 2302 }, { "epoch": 0.34348782579514525, "grad_norm": 1.2559043169021606, "learning_rate": 1.527495003706722e-05, "loss": 0.6248, "step": 2303 }, { "epoch": 0.3436369737872404, "grad_norm": 1.1739825010299683, "learning_rate": 1.5270844583612507e-05, "loss": 0.6217, "step": 2304 }, { "epoch": 0.34378612177933554, "grad_norm": 1.236143708229065, "learning_rate": 1.5266737899645333e-05, "loss": 0.7091, "step": 2305 }, { "epoch": 0.3439352697714307, "grad_norm": 1.1490556001663208, "learning_rate": 1.5262629986124422e-05, "loss": 0.5766, "step": 2306 }, { "epoch": 0.3440844177635259, "grad_norm": 1.1768697500228882, "learning_rate": 1.5258520844008797e-05, "loss": 0.6207, "step": 2307 }, { "epoch": 0.34423356575562103, "grad_norm": 1.2127114534378052, "learning_rate": 1.5254410474257765e-05, "loss": 0.6702, "step": 2308 }, { "epoch": 0.3443827137477162, "grad_norm": 1.1681288480758667, "learning_rate": 1.5250298877830916e-05, "loss": 0.5769, "step": 2309 }, { "epoch": 0.3445318617398113, "grad_norm": 1.20762300491333, "learning_rate": 1.5246186055688128e-05, "loss": 0.6092, "step": 2310 }, { "epoch": 0.34468100973190646, "grad_norm": 0.8544571399688721, "learning_rate": 1.5242072008789564e-05, "loss": 0.6531, "step": 2311 }, { "epoch": 0.34483015772400166, "grad_norm": 1.2345815896987915, "learning_rate": 1.5237956738095681e-05, "loss": 0.7637, "step": 2312 }, { "epoch": 0.3449793057160968, "grad_norm": 1.0287842750549316, "learning_rate": 1.5233840244567208e-05, "loss": 0.5777, "step": 2313 }, { "epoch": 0.34512845370819195, "grad_norm": 1.2676687240600586, "learning_rate": 1.5229722529165175e-05, "loss": 0.6994, "step": 2314 }, { "epoch": 0.3452776017002871, "grad_norm": 1.2982680797576904, "learning_rate": 1.5225603592850881e-05, "loss": 0.7047, "step": 2315 }, { "epoch": 0.34542674969238224, "grad_norm": 1.2922625541687012, "learning_rate": 1.5221483436585923e-05, "loss": 0.7628, "step": 2316 }, { "epoch": 0.34557589768447744, "grad_norm": 1.170249581336975, "learning_rate": 1.5217362061332176e-05, "loss": 0.6379, "step": 2317 }, { "epoch": 0.3457250456765726, "grad_norm": 1.209556221961975, "learning_rate": 1.5213239468051801e-05, "loss": 0.6148, "step": 2318 }, { "epoch": 0.34587419366866773, "grad_norm": 1.1083110570907593, "learning_rate": 1.5209115657707247e-05, "loss": 0.6505, "step": 2319 }, { "epoch": 0.3460233416607629, "grad_norm": 1.1288535594940186, "learning_rate": 1.520499063126124e-05, "loss": 0.5837, "step": 2320 }, { "epoch": 0.3461724896528581, "grad_norm": 1.1741398572921753, "learning_rate": 1.5200864389676793e-05, "loss": 0.6619, "step": 2321 }, { "epoch": 0.3463216376449532, "grad_norm": 1.1180373430252075, "learning_rate": 1.5196736933917211e-05, "loss": 0.6801, "step": 2322 }, { "epoch": 0.34647078563704836, "grad_norm": 1.0537831783294678, "learning_rate": 1.519260826494607e-05, "loss": 0.6431, "step": 2323 }, { "epoch": 0.3466199336291435, "grad_norm": 1.2319738864898682, "learning_rate": 1.518847838372723e-05, "loss": 0.6607, "step": 2324 }, { "epoch": 0.34676908162123865, "grad_norm": 1.1626718044281006, "learning_rate": 1.5184347291224843e-05, "loss": 0.674, "step": 2325 }, { "epoch": 0.34691822961333385, "grad_norm": 1.1153368949890137, "learning_rate": 1.5180214988403343e-05, "loss": 0.647, "step": 2326 }, { "epoch": 0.347067377605429, "grad_norm": 1.0389964580535889, "learning_rate": 1.5176081476227436e-05, "loss": 0.5188, "step": 2327 }, { "epoch": 0.34721652559752414, "grad_norm": 1.1456348896026611, "learning_rate": 1.5171946755662116e-05, "loss": 0.6278, "step": 2328 }, { "epoch": 0.3473656735896193, "grad_norm": 1.1270474195480347, "learning_rate": 1.5167810827672669e-05, "loss": 0.5064, "step": 2329 }, { "epoch": 0.3475148215817145, "grad_norm": 1.1507807970046997, "learning_rate": 1.5163673693224644e-05, "loss": 0.6169, "step": 2330 }, { "epoch": 0.34766396957380963, "grad_norm": 1.0895922183990479, "learning_rate": 1.5159535353283887e-05, "loss": 0.5664, "step": 2331 }, { "epoch": 0.3478131175659048, "grad_norm": 1.1593928337097168, "learning_rate": 1.5155395808816518e-05, "loss": 0.6728, "step": 2332 }, { "epoch": 0.3479622655579999, "grad_norm": 1.1057273149490356, "learning_rate": 1.5151255060788941e-05, "loss": 0.6779, "step": 2333 }, { "epoch": 0.34811141355009506, "grad_norm": 1.177842617034912, "learning_rate": 1.5147113110167841e-05, "loss": 0.6237, "step": 2334 }, { "epoch": 0.34826056154219026, "grad_norm": 1.0341589450836182, "learning_rate": 1.5142969957920181e-05, "loss": 0.6027, "step": 2335 }, { "epoch": 0.3484097095342854, "grad_norm": 1.1401914358139038, "learning_rate": 1.5138825605013208e-05, "loss": 0.6076, "step": 2336 }, { "epoch": 0.34855885752638055, "grad_norm": 1.051011562347412, "learning_rate": 1.5134680052414446e-05, "loss": 0.5552, "step": 2337 }, { "epoch": 0.3487080055184757, "grad_norm": 1.2088452577590942, "learning_rate": 1.51305333010917e-05, "loss": 0.674, "step": 2338 }, { "epoch": 0.34885715351057084, "grad_norm": 1.1596211194992065, "learning_rate": 1.5126385352013065e-05, "loss": 0.6775, "step": 2339 }, { "epoch": 0.34900630150266604, "grad_norm": 1.2481306791305542, "learning_rate": 1.5122236206146892e-05, "loss": 0.6823, "step": 2340 }, { "epoch": 0.3491554494947612, "grad_norm": 1.2777230739593506, "learning_rate": 1.5118085864461835e-05, "loss": 0.6676, "step": 2341 }, { "epoch": 0.3493045974868563, "grad_norm": 1.0291556119918823, "learning_rate": 1.5113934327926817e-05, "loss": 0.5167, "step": 2342 }, { "epoch": 0.34945374547895147, "grad_norm": 1.1682544946670532, "learning_rate": 1.5109781597511038e-05, "loss": 0.6122, "step": 2343 }, { "epoch": 0.34960289347104667, "grad_norm": 1.2814738750457764, "learning_rate": 1.510562767418398e-05, "loss": 0.6255, "step": 2344 }, { "epoch": 0.3497520414631418, "grad_norm": 1.2234506607055664, "learning_rate": 1.5101472558915408e-05, "loss": 0.6873, "step": 2345 }, { "epoch": 0.34990118945523696, "grad_norm": 1.206945776939392, "learning_rate": 1.5097316252675352e-05, "loss": 0.7023, "step": 2346 }, { "epoch": 0.3500503374473321, "grad_norm": 1.207877278327942, "learning_rate": 1.5093158756434134e-05, "loss": 0.6296, "step": 2347 }, { "epoch": 0.35019948543942725, "grad_norm": 1.1911588907241821, "learning_rate": 1.5089000071162347e-05, "loss": 0.6542, "step": 2348 }, { "epoch": 0.35034863343152245, "grad_norm": 1.1968457698822021, "learning_rate": 1.5084840197830861e-05, "loss": 0.639, "step": 2349 }, { "epoch": 0.3504977814236176, "grad_norm": 1.13497793674469, "learning_rate": 1.508067913741083e-05, "loss": 0.5881, "step": 2350 }, { "epoch": 0.35064692941571274, "grad_norm": 1.1960557699203491, "learning_rate": 1.5076516890873674e-05, "loss": 0.6804, "step": 2351 }, { "epoch": 0.3507960774078079, "grad_norm": 1.034600853919983, "learning_rate": 1.50723534591911e-05, "loss": 0.5243, "step": 2352 }, { "epoch": 0.350945225399903, "grad_norm": 1.2506364583969116, "learning_rate": 1.5068188843335087e-05, "loss": 0.5983, "step": 2353 }, { "epoch": 0.3510943733919982, "grad_norm": 1.1664483547210693, "learning_rate": 1.5064023044277891e-05, "loss": 0.6377, "step": 2354 }, { "epoch": 0.35124352138409337, "grad_norm": 1.18622887134552, "learning_rate": 1.5059856062992042e-05, "loss": 0.6983, "step": 2355 }, { "epoch": 0.3513926693761885, "grad_norm": 1.1094342470169067, "learning_rate": 1.5055687900450355e-05, "loss": 0.547, "step": 2356 }, { "epoch": 0.35154181736828366, "grad_norm": 1.2184512615203857, "learning_rate": 1.5051518557625908e-05, "loss": 0.66, "step": 2357 }, { "epoch": 0.35169096536037886, "grad_norm": 1.2000094652175903, "learning_rate": 1.5047348035492067e-05, "loss": 0.6279, "step": 2358 }, { "epoch": 0.351840113352474, "grad_norm": 1.128127098083496, "learning_rate": 1.504317633502246e-05, "loss": 0.5701, "step": 2359 }, { "epoch": 0.35198926134456915, "grad_norm": 1.2833284139633179, "learning_rate": 1.5039003457191e-05, "loss": 0.6508, "step": 2360 }, { "epoch": 0.3521384093366643, "grad_norm": 1.0958935022354126, "learning_rate": 1.5034829402971874e-05, "loss": 0.5437, "step": 2361 }, { "epoch": 0.35228755732875944, "grad_norm": 1.124182939529419, "learning_rate": 1.503065417333954e-05, "loss": 0.6539, "step": 2362 }, { "epoch": 0.35243670532085464, "grad_norm": 1.160143256187439, "learning_rate": 1.5026477769268732e-05, "loss": 0.6654, "step": 2363 }, { "epoch": 0.3525858533129498, "grad_norm": 1.157051682472229, "learning_rate": 1.502230019173446e-05, "loss": 0.5942, "step": 2364 }, { "epoch": 0.3527350013050449, "grad_norm": 1.1962699890136719, "learning_rate": 1.5018121441712005e-05, "loss": 0.7129, "step": 2365 }, { "epoch": 0.35288414929714007, "grad_norm": 1.2849602699279785, "learning_rate": 1.5013941520176922e-05, "loss": 0.6823, "step": 2366 }, { "epoch": 0.35303329728923527, "grad_norm": 1.1351858377456665, "learning_rate": 1.5009760428105045e-05, "loss": 0.5648, "step": 2367 }, { "epoch": 0.3531824452813304, "grad_norm": 1.2729465961456299, "learning_rate": 1.500557816647247e-05, "loss": 0.5773, "step": 2368 }, { "epoch": 0.35333159327342556, "grad_norm": 1.2773815393447876, "learning_rate": 1.5001394736255575e-05, "loss": 0.665, "step": 2369 }, { "epoch": 0.3534807412655207, "grad_norm": 1.1714000701904297, "learning_rate": 1.4997210138431011e-05, "loss": 0.6519, "step": 2370 }, { "epoch": 0.35362988925761585, "grad_norm": 1.1150767803192139, "learning_rate": 1.4993024373975698e-05, "loss": 0.637, "step": 2371 }, { "epoch": 0.35377903724971105, "grad_norm": 1.1919207572937012, "learning_rate": 1.4988837443866829e-05, "loss": 0.7161, "step": 2372 }, { "epoch": 0.3539281852418062, "grad_norm": 1.0508713722229004, "learning_rate": 1.4984649349081872e-05, "loss": 0.575, "step": 2373 }, { "epoch": 0.35407733323390134, "grad_norm": 1.3018360137939453, "learning_rate": 1.4980460090598562e-05, "loss": 0.7263, "step": 2374 }, { "epoch": 0.3542264812259965, "grad_norm": 1.1726030111312866, "learning_rate": 1.4976269669394908e-05, "loss": 0.6335, "step": 2375 }, { "epoch": 0.3543756292180916, "grad_norm": 1.1904913187026978, "learning_rate": 1.497207808644919e-05, "loss": 0.7019, "step": 2376 }, { "epoch": 0.3545247772101868, "grad_norm": 0.8625919222831726, "learning_rate": 1.4967885342739963e-05, "loss": 0.642, "step": 2377 }, { "epoch": 0.35467392520228197, "grad_norm": 1.2405176162719727, "learning_rate": 1.496369143924605e-05, "loss": 0.6817, "step": 2378 }, { "epoch": 0.3548230731943771, "grad_norm": 1.2021788358688354, "learning_rate": 1.495949637694654e-05, "loss": 0.6228, "step": 2379 }, { "epoch": 0.35497222118647226, "grad_norm": 1.1254299879074097, "learning_rate": 1.4955300156820805e-05, "loss": 0.6362, "step": 2380 }, { "epoch": 0.35512136917856746, "grad_norm": 0.8253349661827087, "learning_rate": 1.4951102779848473e-05, "loss": 0.6413, "step": 2381 }, { "epoch": 0.3552705171706626, "grad_norm": 1.1780520677566528, "learning_rate": 1.4946904247009446e-05, "loss": 0.6207, "step": 2382 }, { "epoch": 0.35541966516275775, "grad_norm": 1.062159538269043, "learning_rate": 1.494270455928391e-05, "loss": 0.5091, "step": 2383 }, { "epoch": 0.3555688131548529, "grad_norm": 1.0805145502090454, "learning_rate": 1.49385037176523e-05, "loss": 0.6872, "step": 2384 }, { "epoch": 0.35571796114694804, "grad_norm": 1.0512770414352417, "learning_rate": 1.4934301723095325e-05, "loss": 0.5585, "step": 2385 }, { "epoch": 0.35586710913904324, "grad_norm": 1.1995317935943604, "learning_rate": 1.4930098576593978e-05, "loss": 0.6229, "step": 2386 }, { "epoch": 0.3560162571311384, "grad_norm": 0.8476085662841797, "learning_rate": 1.4925894279129509e-05, "loss": 0.6352, "step": 2387 }, { "epoch": 0.3561654051232335, "grad_norm": 1.2612664699554443, "learning_rate": 1.4921688831683433e-05, "loss": 0.5196, "step": 2388 }, { "epoch": 0.35631455311532867, "grad_norm": 1.1277649402618408, "learning_rate": 1.4917482235237541e-05, "loss": 0.6829, "step": 2389 }, { "epoch": 0.3564637011074238, "grad_norm": 1.187063217163086, "learning_rate": 1.491327449077389e-05, "loss": 0.5822, "step": 2390 }, { "epoch": 0.356612849099519, "grad_norm": 1.18647038936615, "learning_rate": 1.4909065599274806e-05, "loss": 0.6008, "step": 2391 }, { "epoch": 0.35676199709161416, "grad_norm": 1.138887643814087, "learning_rate": 1.4904855561722881e-05, "loss": 0.6351, "step": 2392 }, { "epoch": 0.3569111450837093, "grad_norm": 1.2665749788284302, "learning_rate": 1.4900644379100974e-05, "loss": 0.5837, "step": 2393 }, { "epoch": 0.35706029307580445, "grad_norm": 1.2242523431777954, "learning_rate": 1.4896432052392213e-05, "loss": 0.6733, "step": 2394 }, { "epoch": 0.35720944106789965, "grad_norm": 1.1438907384872437, "learning_rate": 1.4892218582579997e-05, "loss": 0.6348, "step": 2395 }, { "epoch": 0.3573585890599948, "grad_norm": 1.097273588180542, "learning_rate": 1.4888003970647979e-05, "loss": 0.5815, "step": 2396 }, { "epoch": 0.35750773705208994, "grad_norm": 1.09188711643219, "learning_rate": 1.4883788217580093e-05, "loss": 0.6443, "step": 2397 }, { "epoch": 0.3576568850441851, "grad_norm": 1.1342017650604248, "learning_rate": 1.4879571324360533e-05, "loss": 0.5462, "step": 2398 }, { "epoch": 0.3578060330362802, "grad_norm": 1.1547940969467163, "learning_rate": 1.487535329197376e-05, "loss": 0.6752, "step": 2399 }, { "epoch": 0.3579551810283754, "grad_norm": 1.1404073238372803, "learning_rate": 1.4871134121404503e-05, "loss": 0.682, "step": 2400 }, { "epoch": 0.35810432902047057, "grad_norm": 1.1451327800750732, "learning_rate": 1.4866913813637749e-05, "loss": 0.6723, "step": 2401 }, { "epoch": 0.3582534770125657, "grad_norm": 1.1540638208389282, "learning_rate": 1.4862692369658755e-05, "loss": 0.6269, "step": 2402 }, { "epoch": 0.35840262500466086, "grad_norm": 1.1139609813690186, "learning_rate": 1.4858469790453049e-05, "loss": 0.6239, "step": 2403 }, { "epoch": 0.35855177299675606, "grad_norm": 0.9384128451347351, "learning_rate": 1.485424607700642e-05, "loss": 0.6839, "step": 2404 }, { "epoch": 0.3587009209888512, "grad_norm": 1.2513329982757568, "learning_rate": 1.4850021230304919e-05, "loss": 0.6386, "step": 2405 }, { "epoch": 0.35885006898094635, "grad_norm": 1.1992979049682617, "learning_rate": 1.4845795251334863e-05, "loss": 0.6751, "step": 2406 }, { "epoch": 0.3589992169730415, "grad_norm": 1.1437397003173828, "learning_rate": 1.4841568141082832e-05, "loss": 0.59, "step": 2407 }, { "epoch": 0.35914836496513663, "grad_norm": 1.2020580768585205, "learning_rate": 1.4837339900535674e-05, "loss": 0.6346, "step": 2408 }, { "epoch": 0.35929751295723183, "grad_norm": 1.095108985900879, "learning_rate": 1.4833110530680501e-05, "loss": 0.5362, "step": 2409 }, { "epoch": 0.359446660949327, "grad_norm": 1.1916958093643188, "learning_rate": 1.4828880032504684e-05, "loss": 0.6813, "step": 2410 }, { "epoch": 0.3595958089414221, "grad_norm": 1.1776877641677856, "learning_rate": 1.4824648406995858e-05, "loss": 0.6417, "step": 2411 }, { "epoch": 0.35974495693351727, "grad_norm": 1.0987616777420044, "learning_rate": 1.4820415655141932e-05, "loss": 0.6337, "step": 2412 }, { "epoch": 0.3598941049256124, "grad_norm": 1.1350202560424805, "learning_rate": 1.4816181777931056e-05, "loss": 0.6171, "step": 2413 }, { "epoch": 0.3600432529177076, "grad_norm": 1.1496028900146484, "learning_rate": 1.4811946776351667e-05, "loss": 0.7287, "step": 2414 }, { "epoch": 0.36019240090980276, "grad_norm": 1.1220872402191162, "learning_rate": 1.4807710651392446e-05, "loss": 0.6507, "step": 2415 }, { "epoch": 0.3603415489018979, "grad_norm": 1.1599910259246826, "learning_rate": 1.4803473404042345e-05, "loss": 0.6264, "step": 2416 }, { "epoch": 0.36049069689399305, "grad_norm": 1.2054933309555054, "learning_rate": 1.479923503529058e-05, "loss": 0.7003, "step": 2417 }, { "epoch": 0.36063984488608825, "grad_norm": 1.1370302438735962, "learning_rate": 1.4794995546126625e-05, "loss": 0.6166, "step": 2418 }, { "epoch": 0.3607889928781834, "grad_norm": 0.913709819316864, "learning_rate": 1.479075493754021e-05, "loss": 0.6803, "step": 2419 }, { "epoch": 0.36093814087027853, "grad_norm": 1.1656221151351929, "learning_rate": 1.4786513210521339e-05, "loss": 0.6206, "step": 2420 }, { "epoch": 0.3610872888623737, "grad_norm": 1.2734084129333496, "learning_rate": 1.4782270366060266e-05, "loss": 0.6766, "step": 2421 }, { "epoch": 0.3612364368544688, "grad_norm": 1.2036527395248413, "learning_rate": 1.4778026405147515e-05, "loss": 0.634, "step": 2422 }, { "epoch": 0.361385584846564, "grad_norm": 1.2226759195327759, "learning_rate": 1.477378132877386e-05, "loss": 0.6736, "step": 2423 }, { "epoch": 0.36153473283865917, "grad_norm": 1.07655930519104, "learning_rate": 1.4769535137930343e-05, "loss": 0.5639, "step": 2424 }, { "epoch": 0.3616838808307543, "grad_norm": 1.0317821502685547, "learning_rate": 1.4765287833608268e-05, "loss": 0.6264, "step": 2425 }, { "epoch": 0.36183302882284946, "grad_norm": 1.090095043182373, "learning_rate": 1.4761039416799192e-05, "loss": 0.6456, "step": 2426 }, { "epoch": 0.36198217681494466, "grad_norm": 1.1736441850662231, "learning_rate": 1.4756789888494938e-05, "loss": 0.5805, "step": 2427 }, { "epoch": 0.3621313248070398, "grad_norm": 1.0792051553726196, "learning_rate": 1.4752539249687583e-05, "loss": 0.6208, "step": 2428 }, { "epoch": 0.36228047279913494, "grad_norm": 1.1745307445526123, "learning_rate": 1.4748287501369464e-05, "loss": 0.5955, "step": 2429 }, { "epoch": 0.3624296207912301, "grad_norm": 1.1064412593841553, "learning_rate": 1.4744034644533185e-05, "loss": 0.692, "step": 2430 }, { "epoch": 0.36257876878332523, "grad_norm": 1.0955291986465454, "learning_rate": 1.4739780680171598e-05, "loss": 0.6543, "step": 2431 }, { "epoch": 0.36272791677542043, "grad_norm": 1.2435084581375122, "learning_rate": 1.4735525609277819e-05, "loss": 0.5788, "step": 2432 }, { "epoch": 0.3628770647675156, "grad_norm": 1.2891736030578613, "learning_rate": 1.4731269432845223e-05, "loss": 0.6665, "step": 2433 }, { "epoch": 0.3630262127596107, "grad_norm": 1.0186350345611572, "learning_rate": 1.4727012151867442e-05, "loss": 0.6118, "step": 2434 }, { "epoch": 0.36317536075170587, "grad_norm": 1.1585482358932495, "learning_rate": 1.472275376733836e-05, "loss": 0.6628, "step": 2435 }, { "epoch": 0.363324508743801, "grad_norm": 1.1640887260437012, "learning_rate": 1.4718494280252133e-05, "loss": 0.6727, "step": 2436 }, { "epoch": 0.3634736567358962, "grad_norm": 1.0916013717651367, "learning_rate": 1.4714233691603161e-05, "loss": 0.6368, "step": 2437 }, { "epoch": 0.36362280472799136, "grad_norm": 1.1571252346038818, "learning_rate": 1.4709972002386104e-05, "loss": 0.638, "step": 2438 }, { "epoch": 0.3637719527200865, "grad_norm": 1.0557003021240234, "learning_rate": 1.4705709213595882e-05, "loss": 0.53, "step": 2439 }, { "epoch": 0.36392110071218164, "grad_norm": 1.215585470199585, "learning_rate": 1.4701445326227675e-05, "loss": 0.6391, "step": 2440 }, { "epoch": 0.36407024870427684, "grad_norm": 1.2459467649459839, "learning_rate": 1.4697180341276907e-05, "loss": 0.6896, "step": 2441 }, { "epoch": 0.364219396696372, "grad_norm": 1.0817655324935913, "learning_rate": 1.4692914259739268e-05, "loss": 0.6541, "step": 2442 }, { "epoch": 0.36436854468846713, "grad_norm": 1.3136605024337769, "learning_rate": 1.4688647082610707e-05, "loss": 0.6554, "step": 2443 }, { "epoch": 0.3645176926805623, "grad_norm": 1.2122713327407837, "learning_rate": 1.4684378810887422e-05, "loss": 0.592, "step": 2444 }, { "epoch": 0.3646668406726574, "grad_norm": 1.224599003791809, "learning_rate": 1.4680109445565864e-05, "loss": 0.6772, "step": 2445 }, { "epoch": 0.3648159886647526, "grad_norm": 1.26401686668396, "learning_rate": 1.467583898764275e-05, "loss": 0.6492, "step": 2446 }, { "epoch": 0.36496513665684777, "grad_norm": 1.0650560855865479, "learning_rate": 1.4671567438115039e-05, "loss": 0.6093, "step": 2447 }, { "epoch": 0.3651142846489429, "grad_norm": 1.1972862482070923, "learning_rate": 1.4667294797979958e-05, "loss": 0.6843, "step": 2448 }, { "epoch": 0.36526343264103805, "grad_norm": 1.0943607091903687, "learning_rate": 1.466302106823498e-05, "loss": 0.6258, "step": 2449 }, { "epoch": 0.3654125806331332, "grad_norm": 1.1011862754821777, "learning_rate": 1.4658746249877833e-05, "loss": 0.6573, "step": 2450 }, { "epoch": 0.3655617286252284, "grad_norm": 1.1226743459701538, "learning_rate": 1.4654470343906501e-05, "loss": 0.6012, "step": 2451 }, { "epoch": 0.36571087661732354, "grad_norm": 1.107357144355774, "learning_rate": 1.4650193351319224e-05, "loss": 0.6686, "step": 2452 }, { "epoch": 0.3658600246094187, "grad_norm": 1.2051340341567993, "learning_rate": 1.4645915273114492e-05, "loss": 0.6158, "step": 2453 }, { "epoch": 0.36600917260151383, "grad_norm": 1.3171552419662476, "learning_rate": 1.4641636110291051e-05, "loss": 0.669, "step": 2454 }, { "epoch": 0.36615832059360903, "grad_norm": 1.2568854093551636, "learning_rate": 1.4637355863847893e-05, "loss": 0.6942, "step": 2455 }, { "epoch": 0.3663074685857042, "grad_norm": 1.2294825315475464, "learning_rate": 1.4633074534784278e-05, "loss": 0.692, "step": 2456 }, { "epoch": 0.3664566165777993, "grad_norm": 1.226195216178894, "learning_rate": 1.4628792124099704e-05, "loss": 0.6429, "step": 2457 }, { "epoch": 0.36660576456989447, "grad_norm": 1.077444314956665, "learning_rate": 1.4624508632793928e-05, "loss": 0.6199, "step": 2458 }, { "epoch": 0.3667549125619896, "grad_norm": 1.1633046865463257, "learning_rate": 1.462022406186696e-05, "loss": 0.6502, "step": 2459 }, { "epoch": 0.3669040605540848, "grad_norm": 1.179376482963562, "learning_rate": 1.461593841231906e-05, "loss": 0.6951, "step": 2460 }, { "epoch": 0.36705320854617995, "grad_norm": 1.0881109237670898, "learning_rate": 1.4611651685150738e-05, "loss": 0.5681, "step": 2461 }, { "epoch": 0.3672023565382751, "grad_norm": 1.0869508981704712, "learning_rate": 1.4607363881362765e-05, "loss": 0.6983, "step": 2462 }, { "epoch": 0.36735150453037024, "grad_norm": 1.143589973449707, "learning_rate": 1.4603075001956145e-05, "loss": 0.6477, "step": 2463 }, { "epoch": 0.36750065252246544, "grad_norm": 0.9943081140518188, "learning_rate": 1.4598785047932153e-05, "loss": 0.547, "step": 2464 }, { "epoch": 0.3676498005145606, "grad_norm": 1.1536625623703003, "learning_rate": 1.4594494020292307e-05, "loss": 0.6746, "step": 2465 }, { "epoch": 0.36779894850665573, "grad_norm": 1.2940510511398315, "learning_rate": 1.4590201920038367e-05, "loss": 0.6448, "step": 2466 }, { "epoch": 0.3679480964987509, "grad_norm": 1.1569350957870483, "learning_rate": 1.4585908748172361e-05, "loss": 0.6971, "step": 2467 }, { "epoch": 0.368097244490846, "grad_norm": 1.1893161535263062, "learning_rate": 1.4581614505696551e-05, "loss": 0.6987, "step": 2468 }, { "epoch": 0.3682463924829412, "grad_norm": 1.1677331924438477, "learning_rate": 1.4577319193613455e-05, "loss": 0.6274, "step": 2469 }, { "epoch": 0.36839554047503636, "grad_norm": 1.0877685546875, "learning_rate": 1.4573022812925845e-05, "loss": 0.6571, "step": 2470 }, { "epoch": 0.3685446884671315, "grad_norm": 0.894294798374176, "learning_rate": 1.4568725364636738e-05, "loss": 0.6908, "step": 2471 }, { "epoch": 0.36869383645922665, "grad_norm": 1.094233751296997, "learning_rate": 1.45644268497494e-05, "loss": 0.6422, "step": 2472 }, { "epoch": 0.3688429844513218, "grad_norm": 1.2482166290283203, "learning_rate": 1.4560127269267344e-05, "loss": 0.612, "step": 2473 }, { "epoch": 0.368992132443417, "grad_norm": 1.1260907649993896, "learning_rate": 1.4555826624194339e-05, "loss": 0.6405, "step": 2474 }, { "epoch": 0.36914128043551214, "grad_norm": 1.3105825185775757, "learning_rate": 1.4551524915534396e-05, "loss": 0.6682, "step": 2475 }, { "epoch": 0.3692904284276073, "grad_norm": 1.0918803215026855, "learning_rate": 1.4547222144291777e-05, "loss": 0.6206, "step": 2476 }, { "epoch": 0.36943957641970243, "grad_norm": 1.2358671426773071, "learning_rate": 1.4542918311470988e-05, "loss": 0.6631, "step": 2477 }, { "epoch": 0.36958872441179763, "grad_norm": 1.136886715888977, "learning_rate": 1.4538613418076795e-05, "loss": 0.6222, "step": 2478 }, { "epoch": 0.3697378724038928, "grad_norm": 1.062239408493042, "learning_rate": 1.4534307465114199e-05, "loss": 0.5253, "step": 2479 }, { "epoch": 0.3698870203959879, "grad_norm": 1.2510097026824951, "learning_rate": 1.4530000453588447e-05, "loss": 0.6903, "step": 2480 }, { "epoch": 0.37003616838808306, "grad_norm": 1.132807970046997, "learning_rate": 1.4525692384505043e-05, "loss": 0.6211, "step": 2481 }, { "epoch": 0.3701853163801782, "grad_norm": 1.1154229640960693, "learning_rate": 1.4521383258869735e-05, "loss": 0.6409, "step": 2482 }, { "epoch": 0.3703344643722734, "grad_norm": 1.1730159521102905, "learning_rate": 1.4517073077688513e-05, "loss": 0.6573, "step": 2483 }, { "epoch": 0.37048361236436855, "grad_norm": 1.2206953763961792, "learning_rate": 1.4512761841967615e-05, "loss": 0.613, "step": 2484 }, { "epoch": 0.3706327603564637, "grad_norm": 1.1736737489700317, "learning_rate": 1.4508449552713532e-05, "loss": 0.6591, "step": 2485 }, { "epoch": 0.37078190834855884, "grad_norm": 1.1418514251708984, "learning_rate": 1.450413621093299e-05, "loss": 0.5874, "step": 2486 }, { "epoch": 0.370931056340654, "grad_norm": 1.0758670568466187, "learning_rate": 1.4499821817632973e-05, "loss": 0.5518, "step": 2487 }, { "epoch": 0.3710802043327492, "grad_norm": 1.3015093803405762, "learning_rate": 1.4495506373820695e-05, "loss": 0.6504, "step": 2488 }, { "epoch": 0.37122935232484433, "grad_norm": 1.2582341432571411, "learning_rate": 1.4491189880503633e-05, "loss": 0.6912, "step": 2489 }, { "epoch": 0.3713785003169395, "grad_norm": 1.1303396224975586, "learning_rate": 1.4486872338689492e-05, "loss": 0.6337, "step": 2490 }, { "epoch": 0.3715276483090346, "grad_norm": 1.2301287651062012, "learning_rate": 1.4482553749386234e-05, "loss": 0.6526, "step": 2491 }, { "epoch": 0.3716767963011298, "grad_norm": 1.2032397985458374, "learning_rate": 1.4478234113602063e-05, "loss": 0.607, "step": 2492 }, { "epoch": 0.37182594429322496, "grad_norm": 1.0841890573501587, "learning_rate": 1.4473913432345426e-05, "loss": 0.5831, "step": 2493 }, { "epoch": 0.3719750922853201, "grad_norm": 1.2780532836914062, "learning_rate": 1.4469591706625003e-05, "loss": 0.6153, "step": 2494 }, { "epoch": 0.37212424027741525, "grad_norm": 1.2151846885681152, "learning_rate": 1.4465268937449743e-05, "loss": 0.6681, "step": 2495 }, { "epoch": 0.3722733882695104, "grad_norm": 1.194664478302002, "learning_rate": 1.446094512582882e-05, "loss": 0.6127, "step": 2496 }, { "epoch": 0.3724225362616056, "grad_norm": 1.1600722074508667, "learning_rate": 1.445662027277165e-05, "loss": 0.6267, "step": 2497 }, { "epoch": 0.37257168425370074, "grad_norm": 1.1818941831588745, "learning_rate": 1.44522943792879e-05, "loss": 0.6382, "step": 2498 }, { "epoch": 0.3727208322457959, "grad_norm": 1.0924981832504272, "learning_rate": 1.4447967446387482e-05, "loss": 0.6308, "step": 2499 }, { "epoch": 0.37286998023789103, "grad_norm": 1.1907544136047363, "learning_rate": 1.444363947508054e-05, "loss": 0.6487, "step": 2500 }, { "epoch": 0.37301912822998623, "grad_norm": 1.1232455968856812, "learning_rate": 1.4439310466377474e-05, "loss": 0.6446, "step": 2501 }, { "epoch": 0.3731682762220814, "grad_norm": 1.2410924434661865, "learning_rate": 1.4434980421288911e-05, "loss": 0.6853, "step": 2502 }, { "epoch": 0.3733174242141765, "grad_norm": 1.231178641319275, "learning_rate": 1.443064934082573e-05, "loss": 0.6047, "step": 2503 }, { "epoch": 0.37346657220627166, "grad_norm": 1.1369218826293945, "learning_rate": 1.4426317225999055e-05, "loss": 0.7485, "step": 2504 }, { "epoch": 0.3736157201983668, "grad_norm": 1.327988862991333, "learning_rate": 1.4421984077820242e-05, "loss": 0.6756, "step": 2505 }, { "epoch": 0.373764868190462, "grad_norm": 1.257147192955017, "learning_rate": 1.4417649897300891e-05, "loss": 0.7114, "step": 2506 }, { "epoch": 0.37391401618255715, "grad_norm": 1.069991946220398, "learning_rate": 1.4413314685452844e-05, "loss": 0.6602, "step": 2507 }, { "epoch": 0.3740631641746523, "grad_norm": 1.245141863822937, "learning_rate": 1.4408978443288186e-05, "loss": 0.7239, "step": 2508 }, { "epoch": 0.37421231216674744, "grad_norm": 1.1302036046981812, "learning_rate": 1.440464117181924e-05, "loss": 0.6721, "step": 2509 }, { "epoch": 0.3743614601588426, "grad_norm": 1.09298837184906, "learning_rate": 1.4400302872058568e-05, "loss": 0.6303, "step": 2510 }, { "epoch": 0.3745106081509378, "grad_norm": 1.2541240453720093, "learning_rate": 1.439596354501898e-05, "loss": 0.6744, "step": 2511 }, { "epoch": 0.37465975614303293, "grad_norm": 1.054716944694519, "learning_rate": 1.4391623191713513e-05, "loss": 0.6421, "step": 2512 }, { "epoch": 0.3748089041351281, "grad_norm": 1.1350510120391846, "learning_rate": 1.4387281813155451e-05, "loss": 0.5899, "step": 2513 }, { "epoch": 0.3749580521272232, "grad_norm": 1.155248999595642, "learning_rate": 1.438293941035832e-05, "loss": 0.677, "step": 2514 }, { "epoch": 0.3751072001193184, "grad_norm": 1.1380879878997803, "learning_rate": 1.4378595984335881e-05, "loss": 0.6861, "step": 2515 }, { "epoch": 0.37525634811141356, "grad_norm": 1.1584175825119019, "learning_rate": 1.4374251536102131e-05, "loss": 0.6528, "step": 2516 }, { "epoch": 0.3754054961035087, "grad_norm": 1.238633632659912, "learning_rate": 1.4369906066671313e-05, "loss": 0.5578, "step": 2517 }, { "epoch": 0.37555464409560385, "grad_norm": 1.1792998313903809, "learning_rate": 1.4365559577057905e-05, "loss": 0.6607, "step": 2518 }, { "epoch": 0.375703792087699, "grad_norm": 1.2787050008773804, "learning_rate": 1.4361212068276622e-05, "loss": 0.741, "step": 2519 }, { "epoch": 0.3758529400797942, "grad_norm": 1.0522730350494385, "learning_rate": 1.4356863541342416e-05, "loss": 0.602, "step": 2520 }, { "epoch": 0.37600208807188934, "grad_norm": 1.1818456649780273, "learning_rate": 1.435251399727048e-05, "loss": 0.7179, "step": 2521 }, { "epoch": 0.3761512360639845, "grad_norm": 1.1337130069732666, "learning_rate": 1.4348163437076243e-05, "loss": 0.569, "step": 2522 }, { "epoch": 0.37630038405607963, "grad_norm": 0.9337676167488098, "learning_rate": 1.4343811861775373e-05, "loss": 0.6853, "step": 2523 }, { "epoch": 0.37644953204817483, "grad_norm": 1.0946545600891113, "learning_rate": 1.4339459272383766e-05, "loss": 0.5193, "step": 2524 }, { "epoch": 0.37659868004027, "grad_norm": 1.1546049118041992, "learning_rate": 1.433510566991757e-05, "loss": 0.6163, "step": 2525 }, { "epoch": 0.3767478280323651, "grad_norm": 1.1320608854293823, "learning_rate": 1.4330751055393162e-05, "loss": 0.6018, "step": 2526 }, { "epoch": 0.37689697602446026, "grad_norm": 1.0993597507476807, "learning_rate": 1.4326395429827147e-05, "loss": 0.6128, "step": 2527 }, { "epoch": 0.3770461240165554, "grad_norm": 1.1415115594863892, "learning_rate": 1.4322038794236379e-05, "loss": 0.6844, "step": 2528 }, { "epoch": 0.3771952720086506, "grad_norm": 1.0982061624526978, "learning_rate": 1.4317681149637941e-05, "loss": 0.5559, "step": 2529 }, { "epoch": 0.37734442000074575, "grad_norm": 1.1134703159332275, "learning_rate": 1.4313322497049153e-05, "loss": 0.59, "step": 2530 }, { "epoch": 0.3774935679928409, "grad_norm": 1.168164849281311, "learning_rate": 1.4308962837487573e-05, "loss": 0.5602, "step": 2531 }, { "epoch": 0.37764271598493604, "grad_norm": 1.2293918132781982, "learning_rate": 1.430460217197099e-05, "loss": 0.6845, "step": 2532 }, { "epoch": 0.3777918639770312, "grad_norm": 1.1074397563934326, "learning_rate": 1.4300240501517424e-05, "loss": 0.628, "step": 2533 }, { "epoch": 0.3779410119691264, "grad_norm": 1.1269826889038086, "learning_rate": 1.4295877827145144e-05, "loss": 0.6155, "step": 2534 }, { "epoch": 0.37809015996122153, "grad_norm": 1.1453746557235718, "learning_rate": 1.4291514149872638e-05, "loss": 0.6145, "step": 2535 }, { "epoch": 0.3782393079533167, "grad_norm": 1.0699477195739746, "learning_rate": 1.4287149470718635e-05, "loss": 0.5821, "step": 2536 }, { "epoch": 0.3783884559454118, "grad_norm": 1.0749107599258423, "learning_rate": 1.4282783790702102e-05, "loss": 0.6257, "step": 2537 }, { "epoch": 0.378537603937507, "grad_norm": 1.139765977859497, "learning_rate": 1.427841711084223e-05, "loss": 0.6194, "step": 2538 }, { "epoch": 0.37868675192960216, "grad_norm": 1.049310326576233, "learning_rate": 1.427404943215845e-05, "loss": 0.5625, "step": 2539 }, { "epoch": 0.3788358999216973, "grad_norm": 0.9915666580200195, "learning_rate": 1.4269680755670425e-05, "loss": 0.6088, "step": 2540 }, { "epoch": 0.37898504791379245, "grad_norm": 1.2334954738616943, "learning_rate": 1.426531108239805e-05, "loss": 0.5992, "step": 2541 }, { "epoch": 0.3791341959058876, "grad_norm": 1.154932975769043, "learning_rate": 1.4260940413361452e-05, "loss": 0.6304, "step": 2542 }, { "epoch": 0.3792833438979828, "grad_norm": 1.4365710020065308, "learning_rate": 1.4256568749580996e-05, "loss": 0.678, "step": 2543 }, { "epoch": 0.37943249189007794, "grad_norm": 1.2050710916519165, "learning_rate": 1.425219609207727e-05, "loss": 0.6771, "step": 2544 }, { "epoch": 0.3795816398821731, "grad_norm": 1.2339609861373901, "learning_rate": 1.4247822441871105e-05, "loss": 0.6399, "step": 2545 }, { "epoch": 0.3797307878742682, "grad_norm": 1.0822374820709229, "learning_rate": 1.424344779998355e-05, "loss": 0.5751, "step": 2546 }, { "epoch": 0.37987993586636337, "grad_norm": 1.3397822380065918, "learning_rate": 1.4239072167435897e-05, "loss": 0.6585, "step": 2547 }, { "epoch": 0.38002908385845857, "grad_norm": 1.2038640975952148, "learning_rate": 1.4234695545249666e-05, "loss": 0.5629, "step": 2548 }, { "epoch": 0.3801782318505537, "grad_norm": 1.2296594381332397, "learning_rate": 1.4230317934446607e-05, "loss": 0.5773, "step": 2549 }, { "epoch": 0.38032737984264886, "grad_norm": 1.2118661403656006, "learning_rate": 1.4225939336048703e-05, "loss": 0.6774, "step": 2550 }, { "epoch": 0.380476527834744, "grad_norm": 1.0997288227081299, "learning_rate": 1.422155975107816e-05, "loss": 0.6047, "step": 2551 }, { "epoch": 0.3806256758268392, "grad_norm": 1.1665229797363281, "learning_rate": 1.4217179180557428e-05, "loss": 0.6841, "step": 2552 }, { "epoch": 0.38077482381893435, "grad_norm": 1.2034410238265991, "learning_rate": 1.4212797625509173e-05, "loss": 0.552, "step": 2553 }, { "epoch": 0.3809239718110295, "grad_norm": 1.2834223508834839, "learning_rate": 1.4208415086956305e-05, "loss": 0.6434, "step": 2554 }, { "epoch": 0.38107311980312464, "grad_norm": 1.2012877464294434, "learning_rate": 1.4204031565921944e-05, "loss": 0.6497, "step": 2555 }, { "epoch": 0.3812222677952198, "grad_norm": 1.1488008499145508, "learning_rate": 1.419964706342946e-05, "loss": 0.6316, "step": 2556 }, { "epoch": 0.381371415787315, "grad_norm": 1.2622568607330322, "learning_rate": 1.4195261580502442e-05, "loss": 0.6941, "step": 2557 }, { "epoch": 0.3815205637794101, "grad_norm": 1.1453534364700317, "learning_rate": 1.4190875118164706e-05, "loss": 0.7083, "step": 2558 }, { "epoch": 0.38166971177150527, "grad_norm": 1.0970566272735596, "learning_rate": 1.4186487677440304e-05, "loss": 0.6895, "step": 2559 }, { "epoch": 0.3818188597636004, "grad_norm": 1.1614669561386108, "learning_rate": 1.4182099259353508e-05, "loss": 0.6096, "step": 2560 }, { "epoch": 0.3819680077556956, "grad_norm": 1.1430466175079346, "learning_rate": 1.4177709864928822e-05, "loss": 0.671, "step": 2561 }, { "epoch": 0.38211715574779076, "grad_norm": 1.1221363544464111, "learning_rate": 1.4173319495190984e-05, "loss": 0.6116, "step": 2562 }, { "epoch": 0.3822663037398859, "grad_norm": 1.0543774366378784, "learning_rate": 1.416892815116495e-05, "loss": 0.5815, "step": 2563 }, { "epoch": 0.38241545173198105, "grad_norm": 1.2204867601394653, "learning_rate": 1.4164535833875905e-05, "loss": 0.6311, "step": 2564 }, { "epoch": 0.3825645997240762, "grad_norm": 1.1108311414718628, "learning_rate": 1.416014254434927e-05, "loss": 0.5131, "step": 2565 }, { "epoch": 0.3827137477161714, "grad_norm": 1.1831071376800537, "learning_rate": 1.415574828361068e-05, "loss": 0.7114, "step": 2566 }, { "epoch": 0.38286289570826654, "grad_norm": 1.1686744689941406, "learning_rate": 1.4151353052686008e-05, "loss": 0.656, "step": 2567 }, { "epoch": 0.3830120437003617, "grad_norm": 1.1614011526107788, "learning_rate": 1.4146956852601349e-05, "loss": 0.6858, "step": 2568 }, { "epoch": 0.3831611916924568, "grad_norm": 1.1084784269332886, "learning_rate": 1.4142559684383018e-05, "loss": 0.6471, "step": 2569 }, { "epoch": 0.38331033968455197, "grad_norm": 1.1120423078536987, "learning_rate": 1.413816154905757e-05, "loss": 0.6129, "step": 2570 }, { "epoch": 0.38345948767664717, "grad_norm": 1.3428999185562134, "learning_rate": 1.4133762447651774e-05, "loss": 0.6416, "step": 2571 }, { "epoch": 0.3836086356687423, "grad_norm": 1.221336007118225, "learning_rate": 1.4129362381192626e-05, "loss": 0.5765, "step": 2572 }, { "epoch": 0.38375778366083746, "grad_norm": 1.002447485923767, "learning_rate": 1.4124961350707354e-05, "loss": 0.5686, "step": 2573 }, { "epoch": 0.3839069316529326, "grad_norm": 1.1218842267990112, "learning_rate": 1.4120559357223407e-05, "loss": 0.636, "step": 2574 }, { "epoch": 0.3840560796450278, "grad_norm": 1.247597575187683, "learning_rate": 1.4116156401768452e-05, "loss": 0.6565, "step": 2575 }, { "epoch": 0.38420522763712295, "grad_norm": 1.3182353973388672, "learning_rate": 1.4111752485370399e-05, "loss": 0.6789, "step": 2576 }, { "epoch": 0.3843543756292181, "grad_norm": 1.1791224479675293, "learning_rate": 1.4107347609057358e-05, "loss": 0.6474, "step": 2577 }, { "epoch": 0.38450352362131324, "grad_norm": 1.0973304510116577, "learning_rate": 1.4102941773857683e-05, "loss": 0.5815, "step": 2578 }, { "epoch": 0.3846526716134084, "grad_norm": 1.1801602840423584, "learning_rate": 1.4098534980799943e-05, "loss": 0.616, "step": 2579 }, { "epoch": 0.3848018196055036, "grad_norm": 1.083550214767456, "learning_rate": 1.4094127230912931e-05, "loss": 0.6689, "step": 2580 }, { "epoch": 0.3849509675975987, "grad_norm": 1.1558889150619507, "learning_rate": 1.4089718525225667e-05, "loss": 0.6097, "step": 2581 }, { "epoch": 0.38510011558969387, "grad_norm": 1.0824357271194458, "learning_rate": 1.4085308864767389e-05, "loss": 0.5887, "step": 2582 }, { "epoch": 0.385249263581789, "grad_norm": 1.2718303203582764, "learning_rate": 1.4080898250567559e-05, "loss": 0.6366, "step": 2583 }, { "epoch": 0.3853984115738842, "grad_norm": 1.1554316282272339, "learning_rate": 1.407648668365587e-05, "loss": 0.6754, "step": 2584 }, { "epoch": 0.38554755956597936, "grad_norm": 1.1498593091964722, "learning_rate": 1.4072074165062224e-05, "loss": 0.5893, "step": 2585 }, { "epoch": 0.3856967075580745, "grad_norm": 1.2291748523712158, "learning_rate": 1.4067660695816751e-05, "loss": 0.6318, "step": 2586 }, { "epoch": 0.38584585555016965, "grad_norm": 1.2928751707077026, "learning_rate": 1.4063246276949811e-05, "loss": 0.6936, "step": 2587 }, { "epoch": 0.3859950035422648, "grad_norm": 1.0974886417388916, "learning_rate": 1.4058830909491971e-05, "loss": 0.5083, "step": 2588 }, { "epoch": 0.38614415153436, "grad_norm": 1.163084626197815, "learning_rate": 1.4054414594474033e-05, "loss": 0.697, "step": 2589 }, { "epoch": 0.38629329952645514, "grad_norm": 1.08879554271698, "learning_rate": 1.4049997332927007e-05, "loss": 0.6037, "step": 2590 }, { "epoch": 0.3864424475185503, "grad_norm": 1.2651269435882568, "learning_rate": 1.4045579125882136e-05, "loss": 0.6738, "step": 2591 }, { "epoch": 0.3865915955106454, "grad_norm": 1.1823219060897827, "learning_rate": 1.4041159974370881e-05, "loss": 0.6506, "step": 2592 }, { "epoch": 0.38674074350274057, "grad_norm": 1.2279115915298462, "learning_rate": 1.4036739879424916e-05, "loss": 0.6843, "step": 2593 }, { "epoch": 0.38688989149483577, "grad_norm": 0.9218177199363708, "learning_rate": 1.403231884207614e-05, "loss": 0.6811, "step": 2594 }, { "epoch": 0.3870390394869309, "grad_norm": 1.170396089553833, "learning_rate": 1.4027896863356679e-05, "loss": 0.6911, "step": 2595 }, { "epoch": 0.38718818747902606, "grad_norm": 1.243364691734314, "learning_rate": 1.4023473944298864e-05, "loss": 0.6541, "step": 2596 }, { "epoch": 0.3873373354711212, "grad_norm": 1.1150484085083008, "learning_rate": 1.401905008593526e-05, "loss": 0.6369, "step": 2597 }, { "epoch": 0.3874864834632164, "grad_norm": 0.9607396721839905, "learning_rate": 1.4014625289298645e-05, "loss": 0.7108, "step": 2598 }, { "epoch": 0.38763563145531155, "grad_norm": 1.2415446043014526, "learning_rate": 1.401019955542201e-05, "loss": 0.6312, "step": 2599 }, { "epoch": 0.3877847794474067, "grad_norm": 0.8622448444366455, "learning_rate": 1.4005772885338578e-05, "loss": 0.6223, "step": 2600 }, { "epoch": 0.38793392743950184, "grad_norm": 1.143079161643982, "learning_rate": 1.4001345280081782e-05, "loss": 0.6207, "step": 2601 }, { "epoch": 0.388083075431597, "grad_norm": 1.0804195404052734, "learning_rate": 1.399691674068527e-05, "loss": 0.6029, "step": 2602 }, { "epoch": 0.3882322234236922, "grad_norm": 1.191677212715149, "learning_rate": 1.399248726818292e-05, "loss": 0.6142, "step": 2603 }, { "epoch": 0.3883813714157873, "grad_norm": 1.2122994661331177, "learning_rate": 1.3988056863608815e-05, "loss": 0.6647, "step": 2604 }, { "epoch": 0.38853051940788247, "grad_norm": 1.2542264461517334, "learning_rate": 1.3983625527997264e-05, "loss": 0.7139, "step": 2605 }, { "epoch": 0.3886796673999776, "grad_norm": 1.1736431121826172, "learning_rate": 1.3979193262382791e-05, "loss": 0.6518, "step": 2606 }, { "epoch": 0.38882881539207276, "grad_norm": 1.0621994733810425, "learning_rate": 1.3974760067800137e-05, "loss": 0.6296, "step": 2607 }, { "epoch": 0.38897796338416796, "grad_norm": 1.164101004600525, "learning_rate": 1.3970325945284255e-05, "loss": 0.6376, "step": 2608 }, { "epoch": 0.3891271113762631, "grad_norm": 1.2306678295135498, "learning_rate": 1.3965890895870328e-05, "loss": 0.7073, "step": 2609 }, { "epoch": 0.38927625936835825, "grad_norm": 1.143444299697876, "learning_rate": 1.3961454920593743e-05, "loss": 0.6834, "step": 2610 }, { "epoch": 0.3894254073604534, "grad_norm": 1.1534868478775024, "learning_rate": 1.3957018020490101e-05, "loss": 0.6563, "step": 2611 }, { "epoch": 0.3895745553525486, "grad_norm": 1.2061253786087036, "learning_rate": 1.3952580196595232e-05, "loss": 0.7918, "step": 2612 }, { "epoch": 0.38972370334464373, "grad_norm": 1.2574783563613892, "learning_rate": 1.3948141449945172e-05, "loss": 0.6667, "step": 2613 }, { "epoch": 0.3898728513367389, "grad_norm": 1.226928472518921, "learning_rate": 1.3943701781576172e-05, "loss": 0.6253, "step": 2614 }, { "epoch": 0.390021999328834, "grad_norm": 1.1780203580856323, "learning_rate": 1.3939261192524708e-05, "loss": 0.7411, "step": 2615 }, { "epoch": 0.39017114732092917, "grad_norm": 1.3115057945251465, "learning_rate": 1.3934819683827457e-05, "loss": 0.6235, "step": 2616 }, { "epoch": 0.39032029531302437, "grad_norm": 1.2242225408554077, "learning_rate": 1.393037725652132e-05, "loss": 0.6747, "step": 2617 }, { "epoch": 0.3904694433051195, "grad_norm": 1.1372981071472168, "learning_rate": 1.3925933911643415e-05, "loss": 0.646, "step": 2618 }, { "epoch": 0.39061859129721466, "grad_norm": 1.1222646236419678, "learning_rate": 1.3921489650231061e-05, "loss": 0.6628, "step": 2619 }, { "epoch": 0.3907677392893098, "grad_norm": 1.014543056488037, "learning_rate": 1.3917044473321805e-05, "loss": 0.5768, "step": 2620 }, { "epoch": 0.390916887281405, "grad_norm": 1.2270069122314453, "learning_rate": 1.39125983819534e-05, "loss": 0.6214, "step": 2621 }, { "epoch": 0.39106603527350015, "grad_norm": 1.1071840524673462, "learning_rate": 1.3908151377163815e-05, "loss": 0.5813, "step": 2622 }, { "epoch": 0.3912151832655953, "grad_norm": 1.1206560134887695, "learning_rate": 1.3903703459991234e-05, "loss": 0.5779, "step": 2623 }, { "epoch": 0.39136433125769043, "grad_norm": 1.1620360612869263, "learning_rate": 1.3899254631474048e-05, "loss": 0.6387, "step": 2624 }, { "epoch": 0.3915134792497856, "grad_norm": 1.0899052619934082, "learning_rate": 1.3894804892650864e-05, "loss": 0.6692, "step": 2625 }, { "epoch": 0.3916626272418808, "grad_norm": 1.2726293802261353, "learning_rate": 1.3890354244560507e-05, "loss": 0.6643, "step": 2626 }, { "epoch": 0.3918117752339759, "grad_norm": 1.0857048034667969, "learning_rate": 1.3885902688242006e-05, "loss": 0.6521, "step": 2627 }, { "epoch": 0.39196092322607107, "grad_norm": 1.2424027919769287, "learning_rate": 1.3881450224734604e-05, "loss": 0.5756, "step": 2628 }, { "epoch": 0.3921100712181662, "grad_norm": 1.064763069152832, "learning_rate": 1.3876996855077763e-05, "loss": 0.6821, "step": 2629 }, { "epoch": 0.39225921921026136, "grad_norm": 1.1164865493774414, "learning_rate": 1.3872542580311144e-05, "loss": 0.5928, "step": 2630 }, { "epoch": 0.39240836720235656, "grad_norm": 1.1487741470336914, "learning_rate": 1.3868087401474628e-05, "loss": 0.6894, "step": 2631 }, { "epoch": 0.3925575151944517, "grad_norm": 1.2034475803375244, "learning_rate": 1.3863631319608306e-05, "loss": 0.6679, "step": 2632 }, { "epoch": 0.39270666318654684, "grad_norm": 1.1341185569763184, "learning_rate": 1.385917433575248e-05, "loss": 0.6299, "step": 2633 }, { "epoch": 0.392855811178642, "grad_norm": 1.207779884338379, "learning_rate": 1.3854716450947658e-05, "loss": 0.5773, "step": 2634 }, { "epoch": 0.3930049591707372, "grad_norm": 1.275665283203125, "learning_rate": 1.3850257666234569e-05, "loss": 0.72, "step": 2635 }, { "epoch": 0.39315410716283233, "grad_norm": 1.2095438241958618, "learning_rate": 1.3845797982654134e-05, "loss": 0.6485, "step": 2636 }, { "epoch": 0.3933032551549275, "grad_norm": 1.2540085315704346, "learning_rate": 1.3841337401247503e-05, "loss": 0.6475, "step": 2637 }, { "epoch": 0.3934524031470226, "grad_norm": 1.0936925411224365, "learning_rate": 1.3836875923056026e-05, "loss": 0.5743, "step": 2638 }, { "epoch": 0.39360155113911777, "grad_norm": 1.2680846452713013, "learning_rate": 1.383241354912126e-05, "loss": 0.6774, "step": 2639 }, { "epoch": 0.39375069913121297, "grad_norm": 1.0613982677459717, "learning_rate": 1.3827950280484981e-05, "loss": 0.7038, "step": 2640 }, { "epoch": 0.3938998471233081, "grad_norm": 1.2616044282913208, "learning_rate": 1.382348611818916e-05, "loss": 0.7076, "step": 2641 }, { "epoch": 0.39404899511540326, "grad_norm": 1.2372543811798096, "learning_rate": 1.381902106327599e-05, "loss": 0.63, "step": 2642 }, { "epoch": 0.3941981431074984, "grad_norm": 1.168812870979309, "learning_rate": 1.3814555116787864e-05, "loss": 0.6321, "step": 2643 }, { "epoch": 0.39434729109959354, "grad_norm": 1.1229941844940186, "learning_rate": 1.3810088279767389e-05, "loss": 0.5958, "step": 2644 }, { "epoch": 0.39449643909168874, "grad_norm": 1.151611089706421, "learning_rate": 1.3805620553257374e-05, "loss": 0.6221, "step": 2645 }, { "epoch": 0.3946455870837839, "grad_norm": 1.085677981376648, "learning_rate": 1.380115193830084e-05, "loss": 0.5413, "step": 2646 }, { "epoch": 0.39479473507587903, "grad_norm": 1.2232959270477295, "learning_rate": 1.379668243594101e-05, "loss": 0.6442, "step": 2647 }, { "epoch": 0.3949438830679742, "grad_norm": 1.11297607421875, "learning_rate": 1.3792212047221326e-05, "loss": 0.5418, "step": 2648 }, { "epoch": 0.3950930310600694, "grad_norm": 1.1837431192398071, "learning_rate": 1.3787740773185418e-05, "loss": 0.6705, "step": 2649 }, { "epoch": 0.3952421790521645, "grad_norm": 1.2123085260391235, "learning_rate": 1.3783268614877144e-05, "loss": 0.6454, "step": 2650 }, { "epoch": 0.39539132704425967, "grad_norm": 1.1495585441589355, "learning_rate": 1.3778795573340551e-05, "loss": 0.5635, "step": 2651 }, { "epoch": 0.3955404750363548, "grad_norm": 1.1881585121154785, "learning_rate": 1.3774321649619902e-05, "loss": 0.6682, "step": 2652 }, { "epoch": 0.39568962302844995, "grad_norm": 1.1660761833190918, "learning_rate": 1.376984684475966e-05, "loss": 0.6266, "step": 2653 }, { "epoch": 0.39583877102054515, "grad_norm": 1.1481776237487793, "learning_rate": 1.3765371159804503e-05, "loss": 0.5815, "step": 2654 }, { "epoch": 0.3959879190126403, "grad_norm": 1.1217690706253052, "learning_rate": 1.3760894595799305e-05, "loss": 0.6345, "step": 2655 }, { "epoch": 0.39613706700473544, "grad_norm": 1.2413102388381958, "learning_rate": 1.3756417153789148e-05, "loss": 0.6882, "step": 2656 }, { "epoch": 0.3962862149968306, "grad_norm": 1.1933958530426025, "learning_rate": 1.375193883481932e-05, "loss": 0.6168, "step": 2657 }, { "epoch": 0.3964353629889258, "grad_norm": 1.147037148475647, "learning_rate": 1.3747459639935312e-05, "loss": 0.6502, "step": 2658 }, { "epoch": 0.39658451098102093, "grad_norm": 1.119443655014038, "learning_rate": 1.3742979570182827e-05, "loss": 0.5785, "step": 2659 }, { "epoch": 0.3967336589731161, "grad_norm": 1.0058622360229492, "learning_rate": 1.3738498626607758e-05, "loss": 0.5445, "step": 2660 }, { "epoch": 0.3968828069652112, "grad_norm": 1.0943766832351685, "learning_rate": 1.3734016810256213e-05, "loss": 0.6207, "step": 2661 }, { "epoch": 0.39703195495730637, "grad_norm": 1.1404999494552612, "learning_rate": 1.37295341221745e-05, "loss": 0.5986, "step": 2662 }, { "epoch": 0.39718110294940157, "grad_norm": 1.1112430095672607, "learning_rate": 1.3725050563409135e-05, "loss": 0.6572, "step": 2663 }, { "epoch": 0.3973302509414967, "grad_norm": 1.1220402717590332, "learning_rate": 1.372056613500683e-05, "loss": 0.6073, "step": 2664 }, { "epoch": 0.39747939893359185, "grad_norm": 1.1394833326339722, "learning_rate": 1.37160808380145e-05, "loss": 0.5777, "step": 2665 }, { "epoch": 0.397628546925687, "grad_norm": 1.2720999717712402, "learning_rate": 1.3711594673479279e-05, "loss": 0.6444, "step": 2666 }, { "epoch": 0.39777769491778214, "grad_norm": 1.1464563608169556, "learning_rate": 1.3707107642448477e-05, "loss": 0.6121, "step": 2667 }, { "epoch": 0.39792684290987734, "grad_norm": 1.1436861753463745, "learning_rate": 1.3702619745969628e-05, "loss": 0.6439, "step": 2668 }, { "epoch": 0.3980759909019725, "grad_norm": 1.1457159519195557, "learning_rate": 1.3698130985090455e-05, "loss": 0.6807, "step": 2669 }, { "epoch": 0.39822513889406763, "grad_norm": 1.2271497249603271, "learning_rate": 1.3693641360858891e-05, "loss": 0.7108, "step": 2670 }, { "epoch": 0.3983742868861628, "grad_norm": 1.0271364450454712, "learning_rate": 1.3689150874323072e-05, "loss": 0.5718, "step": 2671 }, { "epoch": 0.398523434878258, "grad_norm": 1.132089376449585, "learning_rate": 1.368465952653132e-05, "loss": 0.5926, "step": 2672 }, { "epoch": 0.3986725828703531, "grad_norm": 1.1663624048233032, "learning_rate": 1.3680167318532182e-05, "loss": 0.6913, "step": 2673 }, { "epoch": 0.39882173086244826, "grad_norm": 1.0733455419540405, "learning_rate": 1.3675674251374382e-05, "loss": 0.6646, "step": 2674 }, { "epoch": 0.3989708788545434, "grad_norm": 1.1819349527359009, "learning_rate": 1.367118032610686e-05, "loss": 0.5948, "step": 2675 }, { "epoch": 0.39912002684663855, "grad_norm": 0.9154718518257141, "learning_rate": 1.3666685543778755e-05, "loss": 0.6962, "step": 2676 }, { "epoch": 0.39926917483873375, "grad_norm": 1.292445182800293, "learning_rate": 1.3662189905439394e-05, "loss": 0.6731, "step": 2677 }, { "epoch": 0.3994183228308289, "grad_norm": 1.1271700859069824, "learning_rate": 1.3657693412138318e-05, "loss": 0.6507, "step": 2678 }, { "epoch": 0.39956747082292404, "grad_norm": 1.1279582977294922, "learning_rate": 1.3653196064925264e-05, "loss": 0.6979, "step": 2679 }, { "epoch": 0.3997166188150192, "grad_norm": 1.147855520248413, "learning_rate": 1.3648697864850162e-05, "loss": 0.5907, "step": 2680 }, { "epoch": 0.3998657668071144, "grad_norm": 1.2072046995162964, "learning_rate": 1.364419881296315e-05, "loss": 0.6826, "step": 2681 }, { "epoch": 0.40001491479920953, "grad_norm": 1.2071969509124756, "learning_rate": 1.3639698910314556e-05, "loss": 0.5798, "step": 2682 }, { "epoch": 0.4001640627913047, "grad_norm": 1.2008306980133057, "learning_rate": 1.3635198157954915e-05, "loss": 0.6756, "step": 2683 }, { "epoch": 0.4003132107833998, "grad_norm": 1.274742841720581, "learning_rate": 1.3630696556934955e-05, "loss": 0.6478, "step": 2684 }, { "epoch": 0.40046235877549496, "grad_norm": 1.0371531248092651, "learning_rate": 1.3626194108305606e-05, "loss": 0.5809, "step": 2685 }, { "epoch": 0.40061150676759016, "grad_norm": 1.1434272527694702, "learning_rate": 1.3621690813117987e-05, "loss": 0.6423, "step": 2686 }, { "epoch": 0.4007606547596853, "grad_norm": 1.1356005668640137, "learning_rate": 1.3617186672423426e-05, "loss": 0.6291, "step": 2687 }, { "epoch": 0.40090980275178045, "grad_norm": 1.1539394855499268, "learning_rate": 1.3612681687273445e-05, "loss": 0.6004, "step": 2688 }, { "epoch": 0.4010589507438756, "grad_norm": 1.1591640710830688, "learning_rate": 1.3608175858719757e-05, "loss": 0.6662, "step": 2689 }, { "epoch": 0.40120809873597074, "grad_norm": 1.191286325454712, "learning_rate": 1.360366918781428e-05, "loss": 0.6412, "step": 2690 }, { "epoch": 0.40135724672806594, "grad_norm": 1.13279390335083, "learning_rate": 1.3599161675609125e-05, "loss": 0.5141, "step": 2691 }, { "epoch": 0.4015063947201611, "grad_norm": 1.2754040956497192, "learning_rate": 1.3594653323156597e-05, "loss": 0.7088, "step": 2692 }, { "epoch": 0.40165554271225623, "grad_norm": 1.096866250038147, "learning_rate": 1.3590144131509205e-05, "loss": 0.6356, "step": 2693 }, { "epoch": 0.4018046907043514, "grad_norm": 1.069510817527771, "learning_rate": 1.3585634101719642e-05, "loss": 0.5963, "step": 2694 }, { "epoch": 0.4019538386964466, "grad_norm": 1.1979148387908936, "learning_rate": 1.3581123234840807e-05, "loss": 0.68, "step": 2695 }, { "epoch": 0.4021029866885417, "grad_norm": 1.1792247295379639, "learning_rate": 1.3576611531925791e-05, "loss": 0.6626, "step": 2696 }, { "epoch": 0.40225213468063686, "grad_norm": 0.8662236928939819, "learning_rate": 1.357209899402788e-05, "loss": 0.658, "step": 2697 }, { "epoch": 0.402401282672732, "grad_norm": 1.2063201665878296, "learning_rate": 1.3567585622200556e-05, "loss": 0.6603, "step": 2698 }, { "epoch": 0.40255043066482715, "grad_norm": 1.2433335781097412, "learning_rate": 1.3563071417497493e-05, "loss": 0.707, "step": 2699 }, { "epoch": 0.40269957865692235, "grad_norm": 1.0500015020370483, "learning_rate": 1.3558556380972555e-05, "loss": 0.573, "step": 2700 }, { "epoch": 0.4028487266490175, "grad_norm": 1.1494256258010864, "learning_rate": 1.3554040513679821e-05, "loss": 0.5904, "step": 2701 }, { "epoch": 0.40299787464111264, "grad_norm": 1.0925239324569702, "learning_rate": 1.3549523816673536e-05, "loss": 0.6293, "step": 2702 }, { "epoch": 0.4031470226332078, "grad_norm": 1.1304666996002197, "learning_rate": 1.3545006291008155e-05, "loss": 0.617, "step": 2703 }, { "epoch": 0.40329617062530293, "grad_norm": 1.205000400543213, "learning_rate": 1.3540487937738327e-05, "loss": 0.7013, "step": 2704 }, { "epoch": 0.40344531861739813, "grad_norm": 1.1739140748977661, "learning_rate": 1.3535968757918887e-05, "loss": 0.6855, "step": 2705 }, { "epoch": 0.4035944666094933, "grad_norm": 0.858688235282898, "learning_rate": 1.3531448752604867e-05, "loss": 0.6691, "step": 2706 }, { "epoch": 0.4037436146015884, "grad_norm": 1.0970391035079956, "learning_rate": 1.3526927922851495e-05, "loss": 0.6513, "step": 2707 }, { "epoch": 0.40389276259368356, "grad_norm": 1.132868766784668, "learning_rate": 1.3522406269714182e-05, "loss": 0.5849, "step": 2708 }, { "epoch": 0.40404191058577876, "grad_norm": 1.2077170610427856, "learning_rate": 1.3517883794248539e-05, "loss": 0.6569, "step": 2709 }, { "epoch": 0.4041910585778739, "grad_norm": 1.2094563245773315, "learning_rate": 1.351336049751037e-05, "loss": 0.6922, "step": 2710 }, { "epoch": 0.40434020656996905, "grad_norm": 1.1551799774169922, "learning_rate": 1.3508836380555662e-05, "loss": 0.6453, "step": 2711 }, { "epoch": 0.4044893545620642, "grad_norm": 1.1323758363723755, "learning_rate": 1.3504311444440605e-05, "loss": 0.6774, "step": 2712 }, { "epoch": 0.40463850255415934, "grad_norm": 1.103938341140747, "learning_rate": 1.3499785690221571e-05, "loss": 0.6346, "step": 2713 }, { "epoch": 0.40478765054625454, "grad_norm": 1.0516952276229858, "learning_rate": 1.3495259118955124e-05, "loss": 0.5453, "step": 2714 }, { "epoch": 0.4049367985383497, "grad_norm": 1.0655690431594849, "learning_rate": 1.3490731731698028e-05, "loss": 0.5576, "step": 2715 }, { "epoch": 0.40508594653044483, "grad_norm": 1.1895304918289185, "learning_rate": 1.3486203529507225e-05, "loss": 0.6129, "step": 2716 }, { "epoch": 0.40523509452254, "grad_norm": 1.2137123346328735, "learning_rate": 1.3481674513439853e-05, "loss": 0.6579, "step": 2717 }, { "epoch": 0.4053842425146352, "grad_norm": 1.1581144332885742, "learning_rate": 1.3477144684553243e-05, "loss": 0.6893, "step": 2718 }, { "epoch": 0.4055333905067303, "grad_norm": 1.1402089595794678, "learning_rate": 1.347261404390491e-05, "loss": 0.6658, "step": 2719 }, { "epoch": 0.40568253849882546, "grad_norm": 1.1403015851974487, "learning_rate": 1.3468082592552562e-05, "loss": 0.5851, "step": 2720 }, { "epoch": 0.4058316864909206, "grad_norm": 1.1170674562454224, "learning_rate": 1.3463550331554096e-05, "loss": 0.6365, "step": 2721 }, { "epoch": 0.40598083448301575, "grad_norm": 1.1616612672805786, "learning_rate": 1.3459017261967593e-05, "loss": 0.6751, "step": 2722 }, { "epoch": 0.40612998247511095, "grad_norm": 1.0751584768295288, "learning_rate": 1.3454483384851335e-05, "loss": 0.616, "step": 2723 }, { "epoch": 0.4062791304672061, "grad_norm": 0.9845238924026489, "learning_rate": 1.3449948701263782e-05, "loss": 0.5581, "step": 2724 }, { "epoch": 0.40642827845930124, "grad_norm": 1.1994224786758423, "learning_rate": 1.344541321226358e-05, "loss": 0.6309, "step": 2725 }, { "epoch": 0.4065774264513964, "grad_norm": 1.1371371746063232, "learning_rate": 1.3440876918909571e-05, "loss": 0.6334, "step": 2726 }, { "epoch": 0.40672657444349153, "grad_norm": 1.1129734516143799, "learning_rate": 1.3436339822260785e-05, "loss": 0.6657, "step": 2727 }, { "epoch": 0.40687572243558673, "grad_norm": 1.1903830766677856, "learning_rate": 1.343180192337643e-05, "loss": 0.6431, "step": 2728 }, { "epoch": 0.4070248704276819, "grad_norm": 1.1068309545516968, "learning_rate": 1.3427263223315916e-05, "loss": 0.5825, "step": 2729 }, { "epoch": 0.407174018419777, "grad_norm": 1.194705843925476, "learning_rate": 1.3422723723138824e-05, "loss": 0.6336, "step": 2730 }, { "epoch": 0.40732316641187216, "grad_norm": 1.2734681367874146, "learning_rate": 1.3418183423904931e-05, "loss": 0.5736, "step": 2731 }, { "epoch": 0.40747231440396736, "grad_norm": 1.1762350797653198, "learning_rate": 1.34136423266742e-05, "loss": 0.5931, "step": 2732 }, { "epoch": 0.4076214623960625, "grad_norm": 0.9467999339103699, "learning_rate": 1.3409100432506783e-05, "loss": 0.6907, "step": 2733 }, { "epoch": 0.40777061038815765, "grad_norm": 1.221010684967041, "learning_rate": 1.3404557742463009e-05, "loss": 0.6644, "step": 2734 }, { "epoch": 0.4079197583802528, "grad_norm": 1.1409149169921875, "learning_rate": 1.3400014257603399e-05, "loss": 0.5821, "step": 2735 }, { "epoch": 0.40806890637234794, "grad_norm": 1.1009470224380493, "learning_rate": 1.339546997898866e-05, "loss": 0.5308, "step": 2736 }, { "epoch": 0.40821805436444314, "grad_norm": 1.1585248708724976, "learning_rate": 1.3390924907679683e-05, "loss": 0.6058, "step": 2737 }, { "epoch": 0.4083672023565383, "grad_norm": 1.2022593021392822, "learning_rate": 1.3386379044737545e-05, "loss": 0.6592, "step": 2738 }, { "epoch": 0.40851635034863343, "grad_norm": 1.1025727987289429, "learning_rate": 1.3381832391223499e-05, "loss": 0.6632, "step": 2739 }, { "epoch": 0.4086654983407286, "grad_norm": 1.236230731010437, "learning_rate": 1.3377284948199006e-05, "loss": 0.6578, "step": 2740 }, { "epoch": 0.4088146463328237, "grad_norm": 1.1814161539077759, "learning_rate": 1.337273671672568e-05, "loss": 0.6474, "step": 2741 }, { "epoch": 0.4089637943249189, "grad_norm": 1.1664996147155762, "learning_rate": 1.3368187697865342e-05, "loss": 0.5889, "step": 2742 }, { "epoch": 0.40911294231701406, "grad_norm": 1.108533263206482, "learning_rate": 1.336363789267999e-05, "loss": 0.6015, "step": 2743 }, { "epoch": 0.4092620903091092, "grad_norm": 1.1105120182037354, "learning_rate": 1.3359087302231806e-05, "loss": 0.6463, "step": 2744 }, { "epoch": 0.40941123830120435, "grad_norm": 1.1415694952011108, "learning_rate": 1.3354535927583153e-05, "loss": 0.6687, "step": 2745 }, { "epoch": 0.40956038629329955, "grad_norm": 1.1428035497665405, "learning_rate": 1.3349983769796574e-05, "loss": 0.6237, "step": 2746 }, { "epoch": 0.4097095342853947, "grad_norm": 1.1793622970581055, "learning_rate": 1.3345430829934806e-05, "loss": 0.6298, "step": 2747 }, { "epoch": 0.40985868227748984, "grad_norm": 1.1007035970687866, "learning_rate": 1.3340877109060762e-05, "loss": 0.5504, "step": 2748 }, { "epoch": 0.410007830269585, "grad_norm": 1.1099934577941895, "learning_rate": 1.3336322608237534e-05, "loss": 0.6175, "step": 2749 }, { "epoch": 0.4101569782616801, "grad_norm": 1.1484631299972534, "learning_rate": 1.3331767328528398e-05, "loss": 0.6237, "step": 2750 }, { "epoch": 0.4103061262537753, "grad_norm": 1.1624534130096436, "learning_rate": 1.3327211270996818e-05, "loss": 0.6075, "step": 2751 }, { "epoch": 0.41045527424587047, "grad_norm": 1.1330138444900513, "learning_rate": 1.332265443670643e-05, "loss": 0.6715, "step": 2752 }, { "epoch": 0.4106044222379656, "grad_norm": 1.2682702541351318, "learning_rate": 1.3318096826721061e-05, "loss": 0.6431, "step": 2753 }, { "epoch": 0.41075357023006076, "grad_norm": 1.0363925695419312, "learning_rate": 1.3313538442104714e-05, "loss": 0.606, "step": 2754 }, { "epoch": 0.41090271822215596, "grad_norm": 1.2262723445892334, "learning_rate": 1.3308979283921568e-05, "loss": 0.5989, "step": 2755 }, { "epoch": 0.4110518662142511, "grad_norm": 1.1269012689590454, "learning_rate": 1.3304419353235991e-05, "loss": 0.6402, "step": 2756 }, { "epoch": 0.41120101420634625, "grad_norm": 1.1283692121505737, "learning_rate": 1.3299858651112529e-05, "loss": 0.5996, "step": 2757 }, { "epoch": 0.4113501621984414, "grad_norm": 1.2738401889801025, "learning_rate": 1.3295297178615904e-05, "loss": 0.595, "step": 2758 }, { "epoch": 0.41149931019053654, "grad_norm": 1.1489758491516113, "learning_rate": 1.3290734936811027e-05, "loss": 0.5369, "step": 2759 }, { "epoch": 0.41164845818263174, "grad_norm": 1.1557055711746216, "learning_rate": 1.3286171926762977e-05, "loss": 0.6957, "step": 2760 }, { "epoch": 0.4117976061747269, "grad_norm": 1.1923238039016724, "learning_rate": 1.3281608149537018e-05, "loss": 0.6301, "step": 2761 }, { "epoch": 0.411946754166822, "grad_norm": 1.2482448816299438, "learning_rate": 1.3277043606198596e-05, "loss": 0.6762, "step": 2762 }, { "epoch": 0.41209590215891717, "grad_norm": 1.1712357997894287, "learning_rate": 1.3272478297813334e-05, "loss": 0.7079, "step": 2763 }, { "epoch": 0.4122450501510123, "grad_norm": 1.46941077709198, "learning_rate": 1.3267912225447026e-05, "loss": 0.7382, "step": 2764 }, { "epoch": 0.4123941981431075, "grad_norm": 1.1502411365509033, "learning_rate": 1.3263345390165654e-05, "loss": 0.6525, "step": 2765 }, { "epoch": 0.41254334613520266, "grad_norm": 1.1115278005599976, "learning_rate": 1.325877779303538e-05, "loss": 0.6495, "step": 2766 }, { "epoch": 0.4126924941272978, "grad_norm": 1.2524322271347046, "learning_rate": 1.3254209435122533e-05, "loss": 0.6559, "step": 2767 }, { "epoch": 0.41284164211939295, "grad_norm": 1.2133234739303589, "learning_rate": 1.3249640317493628e-05, "loss": 0.6739, "step": 2768 }, { "epoch": 0.41299079011148815, "grad_norm": 1.1261759996414185, "learning_rate": 1.3245070441215355e-05, "loss": 0.5488, "step": 2769 }, { "epoch": 0.4131399381035833, "grad_norm": 1.0990790128707886, "learning_rate": 1.3240499807354577e-05, "loss": 0.6278, "step": 2770 }, { "epoch": 0.41328908609567844, "grad_norm": 1.0823009014129639, "learning_rate": 1.3235928416978343e-05, "loss": 0.6107, "step": 2771 }, { "epoch": 0.4134382340877736, "grad_norm": 1.1976172924041748, "learning_rate": 1.323135627115387e-05, "loss": 0.7264, "step": 2772 }, { "epoch": 0.4135873820798687, "grad_norm": 1.224666714668274, "learning_rate": 1.3226783370948559e-05, "loss": 0.664, "step": 2773 }, { "epoch": 0.4137365300719639, "grad_norm": 1.176262617111206, "learning_rate": 1.3222209717429974e-05, "loss": 0.634, "step": 2774 }, { "epoch": 0.41388567806405907, "grad_norm": 1.1673316955566406, "learning_rate": 1.3217635311665876e-05, "loss": 0.6496, "step": 2775 }, { "epoch": 0.4140348260561542, "grad_norm": 1.2695319652557373, "learning_rate": 1.3213060154724179e-05, "loss": 0.6921, "step": 2776 }, { "epoch": 0.41418397404824936, "grad_norm": 1.2603856325149536, "learning_rate": 1.3208484247672988e-05, "loss": 0.6708, "step": 2777 }, { "epoch": 0.41433312204034456, "grad_norm": 1.2237294912338257, "learning_rate": 1.3203907591580573e-05, "loss": 0.6647, "step": 2778 }, { "epoch": 0.4144822700324397, "grad_norm": 0.881639301776886, "learning_rate": 1.3199330187515391e-05, "loss": 0.686, "step": 2779 }, { "epoch": 0.41463141802453485, "grad_norm": 1.203134298324585, "learning_rate": 1.3194752036546063e-05, "loss": 0.611, "step": 2780 }, { "epoch": 0.41478056601663, "grad_norm": 1.2935711145401, "learning_rate": 1.3190173139741384e-05, "loss": 0.6455, "step": 2781 }, { "epoch": 0.41492971400872514, "grad_norm": 1.1075915098190308, "learning_rate": 1.3185593498170334e-05, "loss": 0.5896, "step": 2782 }, { "epoch": 0.41507886200082034, "grad_norm": 1.086112380027771, "learning_rate": 1.3181013112902052e-05, "loss": 0.5648, "step": 2783 }, { "epoch": 0.4152280099929155, "grad_norm": 1.0549660921096802, "learning_rate": 1.3176431985005864e-05, "loss": 0.5716, "step": 2784 }, { "epoch": 0.4153771579850106, "grad_norm": 1.1145230531692505, "learning_rate": 1.317185011555126e-05, "loss": 0.6352, "step": 2785 }, { "epoch": 0.41552630597710577, "grad_norm": 1.1578134298324585, "learning_rate": 1.316726750560791e-05, "loss": 0.6345, "step": 2786 }, { "epoch": 0.4156754539692009, "grad_norm": 1.2144527435302734, "learning_rate": 1.3162684156245654e-05, "loss": 0.6366, "step": 2787 }, { "epoch": 0.4158246019612961, "grad_norm": 1.0715336799621582, "learning_rate": 1.31581000685345e-05, "loss": 0.554, "step": 2788 }, { "epoch": 0.41597374995339126, "grad_norm": 1.2632266283035278, "learning_rate": 1.3153515243544635e-05, "loss": 0.6894, "step": 2789 }, { "epoch": 0.4161228979454864, "grad_norm": 1.2070763111114502, "learning_rate": 1.3148929682346418e-05, "loss": 0.616, "step": 2790 }, { "epoch": 0.41627204593758155, "grad_norm": 1.0507214069366455, "learning_rate": 1.3144343386010375e-05, "loss": 0.5572, "step": 2791 }, { "epoch": 0.41642119392967675, "grad_norm": 1.1685025691986084, "learning_rate": 1.3139756355607203e-05, "loss": 0.6491, "step": 2792 }, { "epoch": 0.4165703419217719, "grad_norm": 1.1607590913772583, "learning_rate": 1.3135168592207781e-05, "loss": 0.6314, "step": 2793 }, { "epoch": 0.41671948991386704, "grad_norm": 1.01885187625885, "learning_rate": 1.313058009688315e-05, "loss": 0.5688, "step": 2794 }, { "epoch": 0.4168686379059622, "grad_norm": 1.0586568117141724, "learning_rate": 1.312599087070452e-05, "loss": 0.5702, "step": 2795 }, { "epoch": 0.4170177858980573, "grad_norm": 1.210342288017273, "learning_rate": 1.3121400914743275e-05, "loss": 0.6068, "step": 2796 }, { "epoch": 0.4171669338901525, "grad_norm": 1.1694966554641724, "learning_rate": 1.3116810230070976e-05, "loss": 0.5827, "step": 2797 }, { "epoch": 0.41731608188224767, "grad_norm": 0.840811550617218, "learning_rate": 1.3112218817759338e-05, "loss": 0.6493, "step": 2798 }, { "epoch": 0.4174652298743428, "grad_norm": 0.9089367985725403, "learning_rate": 1.3107626678880267e-05, "loss": 0.6518, "step": 2799 }, { "epoch": 0.41761437786643796, "grad_norm": 1.266210675239563, "learning_rate": 1.3103033814505817e-05, "loss": 0.6279, "step": 2800 }, { "epoch": 0.4177635258585331, "grad_norm": 0.9365933537483215, "learning_rate": 1.3098440225708232e-05, "loss": 0.7033, "step": 2801 }, { "epoch": 0.4179126738506283, "grad_norm": 1.1387639045715332, "learning_rate": 1.3093845913559906e-05, "loss": 0.5774, "step": 2802 }, { "epoch": 0.41806182184272345, "grad_norm": 1.2393710613250732, "learning_rate": 1.3089250879133412e-05, "loss": 0.5936, "step": 2803 }, { "epoch": 0.4182109698348186, "grad_norm": 1.0776851177215576, "learning_rate": 1.3084655123501495e-05, "loss": 0.5902, "step": 2804 }, { "epoch": 0.41836011782691374, "grad_norm": 1.1289807558059692, "learning_rate": 1.3080058647737058e-05, "loss": 0.6267, "step": 2805 }, { "epoch": 0.41850926581900894, "grad_norm": 0.9054809212684631, "learning_rate": 1.3075461452913181e-05, "loss": 0.71, "step": 2806 }, { "epoch": 0.4186584138111041, "grad_norm": 1.1026588678359985, "learning_rate": 1.307086354010311e-05, "loss": 0.5938, "step": 2807 }, { "epoch": 0.4188075618031992, "grad_norm": 1.1507338285446167, "learning_rate": 1.3066264910380251e-05, "loss": 0.5904, "step": 2808 }, { "epoch": 0.41895670979529437, "grad_norm": 1.0812772512435913, "learning_rate": 1.306166556481819e-05, "loss": 0.5761, "step": 2809 }, { "epoch": 0.4191058577873895, "grad_norm": 1.2146797180175781, "learning_rate": 1.3057065504490672e-05, "loss": 0.6484, "step": 2810 }, { "epoch": 0.4192550057794847, "grad_norm": 1.202652931213379, "learning_rate": 1.3052464730471607e-05, "loss": 0.6605, "step": 2811 }, { "epoch": 0.41940415377157986, "grad_norm": 1.2270705699920654, "learning_rate": 1.3047863243835081e-05, "loss": 0.7308, "step": 2812 }, { "epoch": 0.419553301763675, "grad_norm": 1.1313707828521729, "learning_rate": 1.3043261045655338e-05, "loss": 0.5912, "step": 2813 }, { "epoch": 0.41970244975577015, "grad_norm": 1.1841850280761719, "learning_rate": 1.3038658137006788e-05, "loss": 0.6297, "step": 2814 }, { "epoch": 0.41985159774786535, "grad_norm": 1.1772907972335815, "learning_rate": 1.3034054518964014e-05, "loss": 0.7033, "step": 2815 }, { "epoch": 0.4200007457399605, "grad_norm": 1.1156443357467651, "learning_rate": 1.3029450192601758e-05, "loss": 0.5964, "step": 2816 }, { "epoch": 0.42014989373205563, "grad_norm": 1.034260869026184, "learning_rate": 1.3024845158994927e-05, "loss": 0.6033, "step": 2817 }, { "epoch": 0.4202990417241508, "grad_norm": 1.1588321924209595, "learning_rate": 1.30202394192186e-05, "loss": 0.6397, "step": 2818 }, { "epoch": 0.4204481897162459, "grad_norm": 1.079904317855835, "learning_rate": 1.3015632974348015e-05, "loss": 0.615, "step": 2819 }, { "epoch": 0.4205973377083411, "grad_norm": 1.1452605724334717, "learning_rate": 1.3011025825458576e-05, "loss": 0.6761, "step": 2820 }, { "epoch": 0.42074648570043627, "grad_norm": 1.1991779804229736, "learning_rate": 1.3006417973625853e-05, "loss": 0.6222, "step": 2821 }, { "epoch": 0.4208956336925314, "grad_norm": 1.1321083307266235, "learning_rate": 1.3001809419925575e-05, "loss": 0.6611, "step": 2822 }, { "epoch": 0.42104478168462656, "grad_norm": 1.0601966381072998, "learning_rate": 1.2997200165433639e-05, "loss": 0.524, "step": 2823 }, { "epoch": 0.4211939296767217, "grad_norm": 1.2302392721176147, "learning_rate": 1.2992590211226106e-05, "loss": 0.6791, "step": 2824 }, { "epoch": 0.4213430776688169, "grad_norm": 1.257686972618103, "learning_rate": 1.29879795583792e-05, "loss": 0.6664, "step": 2825 }, { "epoch": 0.42149222566091205, "grad_norm": 1.0976985692977905, "learning_rate": 1.2983368207969309e-05, "loss": 0.6297, "step": 2826 }, { "epoch": 0.4216413736530072, "grad_norm": 1.0654337406158447, "learning_rate": 1.2978756161072978e-05, "loss": 0.5736, "step": 2827 }, { "epoch": 0.42179052164510233, "grad_norm": 1.1600754261016846, "learning_rate": 1.2974143418766922e-05, "loss": 0.6371, "step": 2828 }, { "epoch": 0.42193966963719753, "grad_norm": 1.2581008672714233, "learning_rate": 1.2969529982128017e-05, "loss": 0.6385, "step": 2829 }, { "epoch": 0.4220888176292927, "grad_norm": 0.936370849609375, "learning_rate": 1.2964915852233295e-05, "loss": 0.7335, "step": 2830 }, { "epoch": 0.4222379656213878, "grad_norm": 1.1698598861694336, "learning_rate": 1.2960301030159955e-05, "loss": 0.6881, "step": 2831 }, { "epoch": 0.42238711361348297, "grad_norm": 1.3556371927261353, "learning_rate": 1.295568551698536e-05, "loss": 0.602, "step": 2832 }, { "epoch": 0.4225362616055781, "grad_norm": 1.1698532104492188, "learning_rate": 1.2951069313787029e-05, "loss": 0.5435, "step": 2833 }, { "epoch": 0.4226854095976733, "grad_norm": 1.3734822273254395, "learning_rate": 1.2946452421642643e-05, "loss": 0.6392, "step": 2834 }, { "epoch": 0.42283455758976846, "grad_norm": 1.2181295156478882, "learning_rate": 1.2941834841630046e-05, "loss": 0.6375, "step": 2835 }, { "epoch": 0.4229837055818636, "grad_norm": 1.120451807975769, "learning_rate": 1.2937216574827245e-05, "loss": 0.6607, "step": 2836 }, { "epoch": 0.42313285357395874, "grad_norm": 1.2448307275772095, "learning_rate": 1.2932597622312396e-05, "loss": 0.6713, "step": 2837 }, { "epoch": 0.4232820015660539, "grad_norm": 1.056740403175354, "learning_rate": 1.2927977985163834e-05, "loss": 0.6406, "step": 2838 }, { "epoch": 0.4234311495581491, "grad_norm": 1.1620842218399048, "learning_rate": 1.2923357664460032e-05, "loss": 0.6455, "step": 2839 }, { "epoch": 0.42358029755024423, "grad_norm": 1.172660231590271, "learning_rate": 1.291873666127964e-05, "loss": 0.6355, "step": 2840 }, { "epoch": 0.4237294455423394, "grad_norm": 1.105292797088623, "learning_rate": 1.2914114976701463e-05, "loss": 0.6651, "step": 2841 }, { "epoch": 0.4238785935344345, "grad_norm": 1.133104920387268, "learning_rate": 1.2909492611804455e-05, "loss": 0.5715, "step": 2842 }, { "epoch": 0.4240277415265297, "grad_norm": 1.167047142982483, "learning_rate": 1.2904869567667743e-05, "loss": 0.6833, "step": 2843 }, { "epoch": 0.42417688951862487, "grad_norm": 1.1402703523635864, "learning_rate": 1.2900245845370603e-05, "loss": 0.6071, "step": 2844 }, { "epoch": 0.42432603751072, "grad_norm": 1.318824052810669, "learning_rate": 1.2895621445992474e-05, "loss": 0.8035, "step": 2845 }, { "epoch": 0.42447518550281516, "grad_norm": 1.2714303731918335, "learning_rate": 1.2890996370612954e-05, "loss": 0.6278, "step": 2846 }, { "epoch": 0.4246243334949103, "grad_norm": 1.2076126337051392, "learning_rate": 1.2886370620311789e-05, "loss": 0.7047, "step": 2847 }, { "epoch": 0.4247734814870055, "grad_norm": 1.251643419265747, "learning_rate": 1.28817441961689e-05, "loss": 0.6316, "step": 2848 }, { "epoch": 0.42492262947910064, "grad_norm": 1.1678434610366821, "learning_rate": 1.2877117099264349e-05, "loss": 0.6226, "step": 2849 }, { "epoch": 0.4250717774711958, "grad_norm": 1.1750421524047852, "learning_rate": 1.2872489330678363e-05, "loss": 0.5937, "step": 2850 }, { "epoch": 0.42522092546329093, "grad_norm": 1.1955037117004395, "learning_rate": 1.2867860891491326e-05, "loss": 0.5837, "step": 2851 }, { "epoch": 0.42537007345538613, "grad_norm": 1.0381823778152466, "learning_rate": 1.2863231782783774e-05, "loss": 0.513, "step": 2852 }, { "epoch": 0.4255192214474813, "grad_norm": 0.8781862258911133, "learning_rate": 1.28586020056364e-05, "loss": 0.6279, "step": 2853 }, { "epoch": 0.4256683694395764, "grad_norm": 1.1398133039474487, "learning_rate": 1.2853971561130062e-05, "loss": 0.6729, "step": 2854 }, { "epoch": 0.42581751743167157, "grad_norm": 1.1518101692199707, "learning_rate": 1.2849340450345765e-05, "loss": 0.6142, "step": 2855 }, { "epoch": 0.4259666654237667, "grad_norm": 1.0827678442001343, "learning_rate": 1.2844708674364665e-05, "loss": 0.672, "step": 2856 }, { "epoch": 0.4261158134158619, "grad_norm": 1.2062536478042603, "learning_rate": 1.2840076234268083e-05, "loss": 0.687, "step": 2857 }, { "epoch": 0.42626496140795705, "grad_norm": 1.163020133972168, "learning_rate": 1.2835443131137502e-05, "loss": 0.6249, "step": 2858 }, { "epoch": 0.4264141094000522, "grad_norm": 1.1932309865951538, "learning_rate": 1.2830809366054533e-05, "loss": 0.702, "step": 2859 }, { "epoch": 0.42656325739214734, "grad_norm": 1.1957988739013672, "learning_rate": 1.282617494010097e-05, "loss": 0.6271, "step": 2860 }, { "epoch": 0.4267124053842425, "grad_norm": 1.182133436203003, "learning_rate": 1.2821539854358745e-05, "loss": 0.704, "step": 2861 }, { "epoch": 0.4268615533763377, "grad_norm": 0.9864089488983154, "learning_rate": 1.2816904109909948e-05, "loss": 0.6038, "step": 2862 }, { "epoch": 0.42701070136843283, "grad_norm": 1.1932127475738525, "learning_rate": 1.2812267707836827e-05, "loss": 0.7478, "step": 2863 }, { "epoch": 0.427159849360528, "grad_norm": 1.1137391328811646, "learning_rate": 1.2807630649221777e-05, "loss": 0.5707, "step": 2864 }, { "epoch": 0.4273089973526231, "grad_norm": 1.1665878295898438, "learning_rate": 1.2802992935147348e-05, "loss": 0.6148, "step": 2865 }, { "epoch": 0.4274581453447183, "grad_norm": 1.099535346031189, "learning_rate": 1.2798354566696245e-05, "loss": 0.6356, "step": 2866 }, { "epoch": 0.42760729333681347, "grad_norm": 1.1975643634796143, "learning_rate": 1.2793715544951324e-05, "loss": 0.6831, "step": 2867 }, { "epoch": 0.4277564413289086, "grad_norm": 1.1831380128860474, "learning_rate": 1.27890758709956e-05, "loss": 0.6721, "step": 2868 }, { "epoch": 0.42790558932100375, "grad_norm": 1.009342908859253, "learning_rate": 1.2784435545912228e-05, "loss": 0.5266, "step": 2869 }, { "epoch": 0.4280547373130989, "grad_norm": 1.2015217542648315, "learning_rate": 1.277979457078452e-05, "loss": 0.6541, "step": 2870 }, { "epoch": 0.4282038853051941, "grad_norm": 1.1832650899887085, "learning_rate": 1.2775152946695953e-05, "loss": 0.6782, "step": 2871 }, { "epoch": 0.42835303329728924, "grad_norm": 1.0327504873275757, "learning_rate": 1.2770510674730132e-05, "loss": 0.6178, "step": 2872 }, { "epoch": 0.4285021812893844, "grad_norm": 1.1504664421081543, "learning_rate": 1.276586775597083e-05, "loss": 0.5743, "step": 2873 }, { "epoch": 0.42865132928147953, "grad_norm": 1.0793957710266113, "learning_rate": 1.2761224191501964e-05, "loss": 0.5824, "step": 2874 }, { "epoch": 0.42880047727357473, "grad_norm": 1.0434967279434204, "learning_rate": 1.2756579982407606e-05, "loss": 0.5654, "step": 2875 }, { "epoch": 0.4289496252656699, "grad_norm": 0.8463302850723267, "learning_rate": 1.2751935129771974e-05, "loss": 0.6456, "step": 2876 }, { "epoch": 0.429098773257765, "grad_norm": 1.2986271381378174, "learning_rate": 1.2747289634679445e-05, "loss": 0.7483, "step": 2877 }, { "epoch": 0.42924792124986016, "grad_norm": 0.8126690983772278, "learning_rate": 1.2742643498214534e-05, "loss": 0.6424, "step": 2878 }, { "epoch": 0.4293970692419553, "grad_norm": 1.150968074798584, "learning_rate": 1.2737996721461907e-05, "loss": 0.5401, "step": 2879 }, { "epoch": 0.4295462172340505, "grad_norm": 1.2417106628417969, "learning_rate": 1.2733349305506395e-05, "loss": 0.6346, "step": 2880 }, { "epoch": 0.42969536522614565, "grad_norm": 1.0305942296981812, "learning_rate": 1.272870125143296e-05, "loss": 0.5527, "step": 2881 }, { "epoch": 0.4298445132182408, "grad_norm": 1.054836630821228, "learning_rate": 1.2724052560326722e-05, "loss": 0.5653, "step": 2882 }, { "epoch": 0.42999366121033594, "grad_norm": 1.144228219985962, "learning_rate": 1.2719403233272947e-05, "loss": 0.6856, "step": 2883 }, { "epoch": 0.4301428092024311, "grad_norm": 1.1494051218032837, "learning_rate": 1.2714753271357047e-05, "loss": 0.5662, "step": 2884 }, { "epoch": 0.4302919571945263, "grad_norm": 1.0577934980392456, "learning_rate": 1.2710102675664593e-05, "loss": 0.6153, "step": 2885 }, { "epoch": 0.43044110518662143, "grad_norm": 1.0732706785202026, "learning_rate": 1.2705451447281289e-05, "loss": 0.6977, "step": 2886 }, { "epoch": 0.4305902531787166, "grad_norm": 1.2259365320205688, "learning_rate": 1.2700799587293e-05, "loss": 0.6834, "step": 2887 }, { "epoch": 0.4307394011708117, "grad_norm": 1.2308543920516968, "learning_rate": 1.2696147096785727e-05, "loss": 0.6359, "step": 2888 }, { "epoch": 0.4308885491629069, "grad_norm": 1.0853989124298096, "learning_rate": 1.2691493976845627e-05, "loss": 0.6385, "step": 2889 }, { "epoch": 0.43103769715500206, "grad_norm": 1.0514012575149536, "learning_rate": 1.2686840228559001e-05, "loss": 0.5199, "step": 2890 }, { "epoch": 0.4311868451470972, "grad_norm": 1.1150141954421997, "learning_rate": 1.2682185853012296e-05, "loss": 0.5583, "step": 2891 }, { "epoch": 0.43133599313919235, "grad_norm": 1.102432131767273, "learning_rate": 1.26775308512921e-05, "loss": 0.6393, "step": 2892 }, { "epoch": 0.4314851411312875, "grad_norm": 1.1331894397735596, "learning_rate": 1.2672875224485166e-05, "loss": 0.5876, "step": 2893 }, { "epoch": 0.4316342891233827, "grad_norm": 1.0195492506027222, "learning_rate": 1.266821897367837e-05, "loss": 0.5564, "step": 2894 }, { "epoch": 0.43178343711547784, "grad_norm": 1.1636563539505005, "learning_rate": 1.2663562099958746e-05, "loss": 0.7175, "step": 2895 }, { "epoch": 0.431932585107573, "grad_norm": 1.1075160503387451, "learning_rate": 1.2658904604413468e-05, "loss": 0.636, "step": 2896 }, { "epoch": 0.43208173309966813, "grad_norm": 1.1654101610183716, "learning_rate": 1.2654246488129864e-05, "loss": 0.6204, "step": 2897 }, { "epoch": 0.4322308810917633, "grad_norm": 1.254489541053772, "learning_rate": 1.2649587752195397e-05, "loss": 0.5617, "step": 2898 }, { "epoch": 0.4323800290838585, "grad_norm": 1.2075040340423584, "learning_rate": 1.2644928397697683e-05, "loss": 0.7446, "step": 2899 }, { "epoch": 0.4325291770759536, "grad_norm": 1.0458447933197021, "learning_rate": 1.2640268425724469e-05, "loss": 0.5866, "step": 2900 }, { "epoch": 0.43267832506804876, "grad_norm": 1.1141711473464966, "learning_rate": 1.2635607837363665e-05, "loss": 0.6269, "step": 2901 }, { "epoch": 0.4328274730601439, "grad_norm": 1.1975734233856201, "learning_rate": 1.2630946633703314e-05, "loss": 0.7223, "step": 2902 }, { "epoch": 0.4329766210522391, "grad_norm": 1.026450753211975, "learning_rate": 1.2626284815831597e-05, "loss": 0.5691, "step": 2903 }, { "epoch": 0.43312576904433425, "grad_norm": 1.1041579246520996, "learning_rate": 1.2621622384836853e-05, "loss": 0.5981, "step": 2904 }, { "epoch": 0.4332749170364294, "grad_norm": 1.1812926530838013, "learning_rate": 1.2616959341807553e-05, "loss": 0.7166, "step": 2905 }, { "epoch": 0.43342406502852454, "grad_norm": 0.9351599216461182, "learning_rate": 1.2612295687832315e-05, "loss": 0.6628, "step": 2906 }, { "epoch": 0.4335732130206197, "grad_norm": 1.2195736169815063, "learning_rate": 1.2607631423999898e-05, "loss": 0.6834, "step": 2907 }, { "epoch": 0.4337223610127149, "grad_norm": 1.224656343460083, "learning_rate": 1.2602966551399206e-05, "loss": 0.5057, "step": 2908 }, { "epoch": 0.43387150900481003, "grad_norm": 1.1665632724761963, "learning_rate": 1.2598301071119277e-05, "loss": 0.5985, "step": 2909 }, { "epoch": 0.4340206569969052, "grad_norm": 1.2752909660339355, "learning_rate": 1.2593634984249307e-05, "loss": 0.6563, "step": 2910 }, { "epoch": 0.4341698049890003, "grad_norm": 1.0690184831619263, "learning_rate": 1.2588968291878621e-05, "loss": 0.6112, "step": 2911 }, { "epoch": 0.4343189529810955, "grad_norm": 1.159042477607727, "learning_rate": 1.2584300995096684e-05, "loss": 0.5978, "step": 2912 }, { "epoch": 0.43446810097319066, "grad_norm": 1.1336004734039307, "learning_rate": 1.257963309499311e-05, "loss": 0.6462, "step": 2913 }, { "epoch": 0.4346172489652858, "grad_norm": 1.210214376449585, "learning_rate": 1.2574964592657648e-05, "loss": 0.6173, "step": 2914 }, { "epoch": 0.43476639695738095, "grad_norm": 1.2671409845352173, "learning_rate": 1.257029548918019e-05, "loss": 0.6189, "step": 2915 }, { "epoch": 0.4349155449494761, "grad_norm": 1.0937750339508057, "learning_rate": 1.2565625785650774e-05, "loss": 0.5948, "step": 2916 }, { "epoch": 0.4350646929415713, "grad_norm": 1.1975926160812378, "learning_rate": 1.2560955483159562e-05, "loss": 0.6396, "step": 2917 }, { "epoch": 0.43521384093366644, "grad_norm": 1.1460976600646973, "learning_rate": 1.2556284582796874e-05, "loss": 0.6534, "step": 2918 }, { "epoch": 0.4353629889257616, "grad_norm": 1.0292314291000366, "learning_rate": 1.255161308565316e-05, "loss": 0.5355, "step": 2919 }, { "epoch": 0.43551213691785673, "grad_norm": 1.023389220237732, "learning_rate": 1.254694099281901e-05, "loss": 0.6565, "step": 2920 }, { "epoch": 0.4356612849099519, "grad_norm": 1.056631326675415, "learning_rate": 1.2542268305385155e-05, "loss": 0.5803, "step": 2921 }, { "epoch": 0.4358104329020471, "grad_norm": 1.1590481996536255, "learning_rate": 1.2537595024442462e-05, "loss": 0.6249, "step": 2922 }, { "epoch": 0.4359595808941422, "grad_norm": 1.1351369619369507, "learning_rate": 1.2532921151081935e-05, "loss": 0.6396, "step": 2923 }, { "epoch": 0.43610872888623736, "grad_norm": 0.9221516251564026, "learning_rate": 1.2528246686394732e-05, "loss": 0.6682, "step": 2924 }, { "epoch": 0.4362578768783325, "grad_norm": 1.1321744918823242, "learning_rate": 1.2523571631472123e-05, "loss": 0.542, "step": 2925 }, { "epoch": 0.4364070248704277, "grad_norm": 1.2107460498809814, "learning_rate": 1.2518895987405539e-05, "loss": 0.6167, "step": 2926 }, { "epoch": 0.43655617286252285, "grad_norm": 1.1164675951004028, "learning_rate": 1.2514219755286531e-05, "loss": 0.6357, "step": 2927 }, { "epoch": 0.436705320854618, "grad_norm": 1.1669681072235107, "learning_rate": 1.2509542936206802e-05, "loss": 0.6504, "step": 2928 }, { "epoch": 0.43685446884671314, "grad_norm": 1.0508382320404053, "learning_rate": 1.2504865531258186e-05, "loss": 0.5975, "step": 2929 }, { "epoch": 0.4370036168388083, "grad_norm": 1.1183252334594727, "learning_rate": 1.250018754153265e-05, "loss": 0.6591, "step": 2930 }, { "epoch": 0.4371527648309035, "grad_norm": 1.1130917072296143, "learning_rate": 1.2495508968122297e-05, "loss": 0.6455, "step": 2931 }, { "epoch": 0.43730191282299863, "grad_norm": 1.1454057693481445, "learning_rate": 1.2490829812119376e-05, "loss": 0.6066, "step": 2932 }, { "epoch": 0.4374510608150938, "grad_norm": 1.1047269105911255, "learning_rate": 1.2486150074616268e-05, "loss": 0.6269, "step": 2933 }, { "epoch": 0.4376002088071889, "grad_norm": 1.3107606172561646, "learning_rate": 1.2481469756705478e-05, "loss": 0.6441, "step": 2934 }, { "epoch": 0.4377493567992841, "grad_norm": 1.201521873474121, "learning_rate": 1.2476788859479667e-05, "loss": 0.6836, "step": 2935 }, { "epoch": 0.43789850479137926, "grad_norm": 1.201822280883789, "learning_rate": 1.247210738403161e-05, "loss": 0.5853, "step": 2936 }, { "epoch": 0.4380476527834744, "grad_norm": 1.228279709815979, "learning_rate": 1.2467425331454237e-05, "loss": 0.6928, "step": 2937 }, { "epoch": 0.43819680077556955, "grad_norm": 1.1430326700210571, "learning_rate": 1.2462742702840597e-05, "loss": 0.6741, "step": 2938 }, { "epoch": 0.4383459487676647, "grad_norm": 1.0864412784576416, "learning_rate": 1.2458059499283884e-05, "loss": 0.646, "step": 2939 }, { "epoch": 0.4384950967597599, "grad_norm": 1.1392465829849243, "learning_rate": 1.2453375721877417e-05, "loss": 0.6456, "step": 2940 }, { "epoch": 0.43864424475185504, "grad_norm": 1.136502981185913, "learning_rate": 1.2448691371714661e-05, "loss": 0.6402, "step": 2941 }, { "epoch": 0.4387933927439502, "grad_norm": 0.8108587265014648, "learning_rate": 1.2444006449889198e-05, "loss": 0.6471, "step": 2942 }, { "epoch": 0.43894254073604533, "grad_norm": 1.2052351236343384, "learning_rate": 1.2439320957494762e-05, "loss": 0.6518, "step": 2943 }, { "epoch": 0.4390916887281405, "grad_norm": 1.2633134126663208, "learning_rate": 1.2434634895625206e-05, "loss": 0.6161, "step": 2944 }, { "epoch": 0.4392408367202357, "grad_norm": 1.1551672220230103, "learning_rate": 1.242994826537452e-05, "loss": 0.7031, "step": 2945 }, { "epoch": 0.4393899847123308, "grad_norm": 1.2092610597610474, "learning_rate": 1.2425261067836835e-05, "loss": 0.6818, "step": 2946 }, { "epoch": 0.43953913270442596, "grad_norm": 1.0876123905181885, "learning_rate": 1.2420573304106402e-05, "loss": 0.5751, "step": 2947 }, { "epoch": 0.4396882806965211, "grad_norm": 1.1294690370559692, "learning_rate": 1.241588497527761e-05, "loss": 0.5875, "step": 2948 }, { "epoch": 0.4398374286886163, "grad_norm": 1.1532515287399292, "learning_rate": 1.2411196082444978e-05, "loss": 0.6669, "step": 2949 }, { "epoch": 0.43998657668071145, "grad_norm": 1.1522043943405151, "learning_rate": 1.2406506626703163e-05, "loss": 0.6318, "step": 2950 }, { "epoch": 0.4401357246728066, "grad_norm": 0.9952991604804993, "learning_rate": 1.2401816609146942e-05, "loss": 0.7205, "step": 2951 }, { "epoch": 0.44028487266490174, "grad_norm": 1.2243983745574951, "learning_rate": 1.2397126030871235e-05, "loss": 0.5978, "step": 2952 }, { "epoch": 0.4404340206569969, "grad_norm": 1.100624442100525, "learning_rate": 1.2392434892971086e-05, "loss": 0.6365, "step": 2953 }, { "epoch": 0.4405831686490921, "grad_norm": 1.234767198562622, "learning_rate": 1.2387743196541669e-05, "loss": 0.5983, "step": 2954 }, { "epoch": 0.4407323166411872, "grad_norm": 1.2241379022598267, "learning_rate": 1.2383050942678295e-05, "loss": 0.7233, "step": 2955 }, { "epoch": 0.44088146463328237, "grad_norm": 1.307571291923523, "learning_rate": 1.2378358132476395e-05, "loss": 0.6113, "step": 2956 }, { "epoch": 0.4410306126253775, "grad_norm": 1.1074409484863281, "learning_rate": 1.237366476703154e-05, "loss": 0.6189, "step": 2957 }, { "epoch": 0.44117976061747266, "grad_norm": 1.1237530708312988, "learning_rate": 1.2368970847439426e-05, "loss": 0.6526, "step": 2958 }, { "epoch": 0.44132890860956786, "grad_norm": 1.1615022420883179, "learning_rate": 1.2364276374795878e-05, "loss": 0.6848, "step": 2959 }, { "epoch": 0.441478056601663, "grad_norm": 1.087801218032837, "learning_rate": 1.235958135019685e-05, "loss": 0.623, "step": 2960 }, { "epoch": 0.44162720459375815, "grad_norm": 1.2163633108139038, "learning_rate": 1.2354885774738428e-05, "loss": 0.6619, "step": 2961 }, { "epoch": 0.4417763525858533, "grad_norm": 0.8659238815307617, "learning_rate": 1.2350189649516818e-05, "loss": 0.6356, "step": 2962 }, { "epoch": 0.4419255005779485, "grad_norm": 1.166087031364441, "learning_rate": 1.2345492975628368e-05, "loss": 0.6334, "step": 2963 }, { "epoch": 0.44207464857004364, "grad_norm": 1.1239033937454224, "learning_rate": 1.2340795754169544e-05, "loss": 0.6034, "step": 2964 }, { "epoch": 0.4422237965621388, "grad_norm": 1.2263009548187256, "learning_rate": 1.233609798623694e-05, "loss": 0.5939, "step": 2965 }, { "epoch": 0.4423729445542339, "grad_norm": 1.168184518814087, "learning_rate": 1.233139967292728e-05, "loss": 0.6862, "step": 2966 }, { "epoch": 0.44252209254632907, "grad_norm": 1.194433331489563, "learning_rate": 1.2326700815337422e-05, "loss": 0.7106, "step": 2967 }, { "epoch": 0.44267124053842427, "grad_norm": 1.1805522441864014, "learning_rate": 1.2322001414564336e-05, "loss": 0.6701, "step": 2968 }, { "epoch": 0.4428203885305194, "grad_norm": 1.234705924987793, "learning_rate": 1.2317301471705134e-05, "loss": 0.6077, "step": 2969 }, { "epoch": 0.44296953652261456, "grad_norm": 1.1411901712417603, "learning_rate": 1.2312600987857041e-05, "loss": 0.5964, "step": 2970 }, { "epoch": 0.4431186845147097, "grad_norm": 1.1865615844726562, "learning_rate": 1.2307899964117422e-05, "loss": 0.6891, "step": 2971 }, { "epoch": 0.4432678325068049, "grad_norm": 1.1147712469100952, "learning_rate": 1.2303198401583759e-05, "loss": 0.607, "step": 2972 }, { "epoch": 0.44341698049890005, "grad_norm": 1.2610357999801636, "learning_rate": 1.2298496301353657e-05, "loss": 0.6224, "step": 2973 }, { "epoch": 0.4435661284909952, "grad_norm": 1.1321910619735718, "learning_rate": 1.229379366452486e-05, "loss": 0.5255, "step": 2974 }, { "epoch": 0.44371527648309034, "grad_norm": 1.0618503093719482, "learning_rate": 1.228909049219522e-05, "loss": 0.5661, "step": 2975 }, { "epoch": 0.4438644244751855, "grad_norm": 1.1206871271133423, "learning_rate": 1.2284386785462728e-05, "loss": 0.5671, "step": 2976 }, { "epoch": 0.4440135724672807, "grad_norm": 1.118672490119934, "learning_rate": 1.2279682545425495e-05, "loss": 0.5894, "step": 2977 }, { "epoch": 0.4441627204593758, "grad_norm": 1.1939576864242554, "learning_rate": 1.2274977773181753e-05, "loss": 0.5967, "step": 2978 }, { "epoch": 0.44431186845147097, "grad_norm": 1.225242257118225, "learning_rate": 1.2270272469829862e-05, "loss": 0.6681, "step": 2979 }, { "epoch": 0.4444610164435661, "grad_norm": 1.0799388885498047, "learning_rate": 1.2265566636468309e-05, "loss": 0.5587, "step": 2980 }, { "epoch": 0.44461016443566126, "grad_norm": 1.2204015254974365, "learning_rate": 1.2260860274195694e-05, "loss": 0.7293, "step": 2981 }, { "epoch": 0.44475931242775646, "grad_norm": 1.177132248878479, "learning_rate": 1.2256153384110754e-05, "loss": 0.5803, "step": 2982 }, { "epoch": 0.4449084604198516, "grad_norm": 1.0392035245895386, "learning_rate": 1.2251445967312341e-05, "loss": 0.6122, "step": 2983 }, { "epoch": 0.44505760841194675, "grad_norm": 1.1229174137115479, "learning_rate": 1.2246738024899424e-05, "loss": 0.6567, "step": 2984 }, { "epoch": 0.4452067564040419, "grad_norm": 1.1938340663909912, "learning_rate": 1.2242029557971116e-05, "loss": 0.6708, "step": 2985 }, { "epoch": 0.4453559043961371, "grad_norm": 1.1545846462249756, "learning_rate": 1.223732056762663e-05, "loss": 0.6412, "step": 2986 }, { "epoch": 0.44550505238823224, "grad_norm": 1.0428177118301392, "learning_rate": 1.2232611054965308e-05, "loss": 0.5895, "step": 2987 }, { "epoch": 0.4456542003803274, "grad_norm": 1.1783576011657715, "learning_rate": 1.2227901021086624e-05, "loss": 0.6186, "step": 2988 }, { "epoch": 0.4458033483724225, "grad_norm": 1.233903169631958, "learning_rate": 1.222319046709016e-05, "loss": 0.6191, "step": 2989 }, { "epoch": 0.44595249636451767, "grad_norm": 1.1563762426376343, "learning_rate": 1.2218479394075624e-05, "loss": 0.6264, "step": 2990 }, { "epoch": 0.44610164435661287, "grad_norm": 1.045460820198059, "learning_rate": 1.2213767803142854e-05, "loss": 0.6504, "step": 2991 }, { "epoch": 0.446250792348708, "grad_norm": 1.1446738243103027, "learning_rate": 1.220905569539179e-05, "loss": 0.6324, "step": 2992 }, { "epoch": 0.44639994034080316, "grad_norm": 1.1758981943130493, "learning_rate": 1.2204343071922511e-05, "loss": 0.6655, "step": 2993 }, { "epoch": 0.4465490883328983, "grad_norm": 1.1670901775360107, "learning_rate": 1.2199629933835208e-05, "loss": 0.6699, "step": 2994 }, { "epoch": 0.44669823632499345, "grad_norm": 1.1470566987991333, "learning_rate": 1.2194916282230192e-05, "loss": 0.6838, "step": 2995 }, { "epoch": 0.44684738431708865, "grad_norm": 1.0955371856689453, "learning_rate": 1.21902021182079e-05, "loss": 0.6207, "step": 2996 }, { "epoch": 0.4469965323091838, "grad_norm": 1.113419771194458, "learning_rate": 1.2185487442868876e-05, "loss": 0.6673, "step": 2997 }, { "epoch": 0.44714568030127894, "grad_norm": 1.1968328952789307, "learning_rate": 1.2180772257313793e-05, "loss": 0.6254, "step": 2998 }, { "epoch": 0.4472948282933741, "grad_norm": 1.175281047821045, "learning_rate": 1.2176056562643448e-05, "loss": 0.6881, "step": 2999 }, { "epoch": 0.4474439762854693, "grad_norm": 1.2572530508041382, "learning_rate": 1.2171340359958742e-05, "loss": 0.7033, "step": 3000 }, { "epoch": 0.4475931242775644, "grad_norm": 1.1849809885025024, "learning_rate": 1.2166623650360707e-05, "loss": 0.6389, "step": 3001 }, { "epoch": 0.44774227226965957, "grad_norm": 1.18790864944458, "learning_rate": 1.216190643495049e-05, "loss": 0.6488, "step": 3002 }, { "epoch": 0.4478914202617547, "grad_norm": 1.0804438591003418, "learning_rate": 1.2157188714829353e-05, "loss": 0.621, "step": 3003 }, { "epoch": 0.44804056825384986, "grad_norm": 1.119820237159729, "learning_rate": 1.2152470491098678e-05, "loss": 0.6156, "step": 3004 }, { "epoch": 0.44818971624594506, "grad_norm": 1.121809720993042, "learning_rate": 1.2147751764859967e-05, "loss": 0.6068, "step": 3005 }, { "epoch": 0.4483388642380402, "grad_norm": 1.0970762968063354, "learning_rate": 1.2143032537214832e-05, "loss": 0.618, "step": 3006 }, { "epoch": 0.44848801223013535, "grad_norm": 1.1699639558792114, "learning_rate": 1.2138312809265012e-05, "loss": 0.7299, "step": 3007 }, { "epoch": 0.4486371602222305, "grad_norm": 1.0686297416687012, "learning_rate": 1.2133592582112354e-05, "loss": 0.5719, "step": 3008 }, { "epoch": 0.4487863082143257, "grad_norm": 1.2716200351715088, "learning_rate": 1.2128871856858828e-05, "loss": 0.6866, "step": 3009 }, { "epoch": 0.44893545620642084, "grad_norm": 1.0872379541397095, "learning_rate": 1.2124150634606515e-05, "loss": 0.6305, "step": 3010 }, { "epoch": 0.449084604198516, "grad_norm": 1.1946172714233398, "learning_rate": 1.211942891645762e-05, "loss": 0.6628, "step": 3011 }, { "epoch": 0.4492337521906111, "grad_norm": 1.0899285078048706, "learning_rate": 1.2114706703514452e-05, "loss": 0.5619, "step": 3012 }, { "epoch": 0.44938290018270627, "grad_norm": 1.156241536140442, "learning_rate": 1.2109983996879446e-05, "loss": 0.5796, "step": 3013 }, { "epoch": 0.44953204817480147, "grad_norm": 1.1845649480819702, "learning_rate": 1.2105260797655144e-05, "loss": 0.6637, "step": 3014 }, { "epoch": 0.4496811961668966, "grad_norm": 1.0339666604995728, "learning_rate": 1.2100537106944213e-05, "loss": 0.5934, "step": 3015 }, { "epoch": 0.44983034415899176, "grad_norm": 1.2756143808364868, "learning_rate": 1.2095812925849424e-05, "loss": 0.6791, "step": 3016 }, { "epoch": 0.4499794921510869, "grad_norm": 1.220398187637329, "learning_rate": 1.2091088255473669e-05, "loss": 0.7464, "step": 3017 }, { "epoch": 0.45012864014318205, "grad_norm": 1.1792060136795044, "learning_rate": 1.2086363096919953e-05, "loss": 0.5865, "step": 3018 }, { "epoch": 0.45027778813527725, "grad_norm": 1.1301500797271729, "learning_rate": 1.2081637451291393e-05, "loss": 0.6363, "step": 3019 }, { "epoch": 0.4504269361273724, "grad_norm": 1.0641247034072876, "learning_rate": 1.2076911319691222e-05, "loss": 0.5997, "step": 3020 }, { "epoch": 0.45057608411946753, "grad_norm": 1.200486421585083, "learning_rate": 1.2072184703222791e-05, "loss": 0.706, "step": 3021 }, { "epoch": 0.4507252321115627, "grad_norm": 0.7750304937362671, "learning_rate": 1.2067457602989552e-05, "loss": 0.5739, "step": 3022 }, { "epoch": 0.4508743801036579, "grad_norm": 1.098364233970642, "learning_rate": 1.2062730020095073e-05, "loss": 0.6222, "step": 3023 }, { "epoch": 0.451023528095753, "grad_norm": 1.1955901384353638, "learning_rate": 1.205800195564305e-05, "loss": 0.6062, "step": 3024 }, { "epoch": 0.45117267608784817, "grad_norm": 1.1208730936050415, "learning_rate": 1.2053273410737275e-05, "loss": 0.6569, "step": 3025 }, { "epoch": 0.4513218240799433, "grad_norm": 1.1300145387649536, "learning_rate": 1.2048544386481656e-05, "loss": 0.6377, "step": 3026 }, { "epoch": 0.45147097207203846, "grad_norm": 1.1268329620361328, "learning_rate": 1.204381488398021e-05, "loss": 0.6927, "step": 3027 }, { "epoch": 0.45162012006413366, "grad_norm": 1.1799503564834595, "learning_rate": 1.2039084904337082e-05, "loss": 0.5928, "step": 3028 }, { "epoch": 0.4517692680562288, "grad_norm": 1.1182266473770142, "learning_rate": 1.2034354448656505e-05, "loss": 0.6209, "step": 3029 }, { "epoch": 0.45191841604832395, "grad_norm": 1.318830966949463, "learning_rate": 1.2029623518042837e-05, "loss": 0.679, "step": 3030 }, { "epoch": 0.4520675640404191, "grad_norm": 1.1041959524154663, "learning_rate": 1.2024892113600544e-05, "loss": 0.6109, "step": 3031 }, { "epoch": 0.4522167120325143, "grad_norm": 1.2294394969940186, "learning_rate": 1.2020160236434203e-05, "loss": 0.6705, "step": 3032 }, { "epoch": 0.45236586002460943, "grad_norm": 1.2231441736221313, "learning_rate": 1.2015427887648505e-05, "loss": 0.6524, "step": 3033 }, { "epoch": 0.4525150080167046, "grad_norm": 1.0998133420944214, "learning_rate": 1.2010695068348238e-05, "loss": 0.6669, "step": 3034 }, { "epoch": 0.4526641560087997, "grad_norm": 1.1736096143722534, "learning_rate": 1.2005961779638322e-05, "loss": 0.7293, "step": 3035 }, { "epoch": 0.45281330400089487, "grad_norm": 1.2057074308395386, "learning_rate": 1.2001228022623762e-05, "loss": 0.5968, "step": 3036 }, { "epoch": 0.45296245199299007, "grad_norm": 1.1461360454559326, "learning_rate": 1.1996493798409687e-05, "loss": 0.5965, "step": 3037 }, { "epoch": 0.4531115999850852, "grad_norm": 1.0881246328353882, "learning_rate": 1.1991759108101335e-05, "loss": 0.5983, "step": 3038 }, { "epoch": 0.45326074797718036, "grad_norm": 1.2362555265426636, "learning_rate": 1.1987023952804049e-05, "loss": 0.5548, "step": 3039 }, { "epoch": 0.4534098959692755, "grad_norm": 1.0910131931304932, "learning_rate": 1.1982288333623277e-05, "loss": 0.5731, "step": 3040 }, { "epoch": 0.45355904396137064, "grad_norm": 1.1485872268676758, "learning_rate": 1.1977552251664585e-05, "loss": 0.6544, "step": 3041 }, { "epoch": 0.45370819195346584, "grad_norm": 1.1565160751342773, "learning_rate": 1.197281570803364e-05, "loss": 0.5813, "step": 3042 }, { "epoch": 0.453857339945561, "grad_norm": 1.0753810405731201, "learning_rate": 1.1968078703836218e-05, "loss": 0.6061, "step": 3043 }, { "epoch": 0.45400648793765613, "grad_norm": 1.158522605895996, "learning_rate": 1.1963341240178206e-05, "loss": 0.5752, "step": 3044 }, { "epoch": 0.4541556359297513, "grad_norm": 1.144905924797058, "learning_rate": 1.1958603318165586e-05, "loss": 0.5857, "step": 3045 }, { "epoch": 0.4543047839218465, "grad_norm": 1.1726137399673462, "learning_rate": 1.1953864938904467e-05, "loss": 0.696, "step": 3046 }, { "epoch": 0.4544539319139416, "grad_norm": 1.2068499326705933, "learning_rate": 1.194912610350105e-05, "loss": 0.6285, "step": 3047 }, { "epoch": 0.45460307990603677, "grad_norm": 1.1486698389053345, "learning_rate": 1.1944386813061644e-05, "loss": 0.6079, "step": 3048 }, { "epoch": 0.4547522278981319, "grad_norm": 1.1360812187194824, "learning_rate": 1.193964706869267e-05, "loss": 0.6235, "step": 3049 }, { "epoch": 0.45490137589022706, "grad_norm": 1.1739420890808105, "learning_rate": 1.1934906871500654e-05, "loss": 0.6552, "step": 3050 }, { "epoch": 0.45505052388232226, "grad_norm": 1.2470780611038208, "learning_rate": 1.1930166222592217e-05, "loss": 0.6382, "step": 3051 }, { "epoch": 0.4551996718744174, "grad_norm": 1.0542241334915161, "learning_rate": 1.1925425123074102e-05, "loss": 0.6447, "step": 3052 }, { "epoch": 0.45534881986651254, "grad_norm": 1.109288215637207, "learning_rate": 1.1920683574053145e-05, "loss": 0.5744, "step": 3053 }, { "epoch": 0.4554979678586077, "grad_norm": 1.1931591033935547, "learning_rate": 1.1915941576636293e-05, "loss": 0.6841, "step": 3054 }, { "epoch": 0.45564711585070283, "grad_norm": 1.3081530332565308, "learning_rate": 1.1911199131930593e-05, "loss": 0.69, "step": 3055 }, { "epoch": 0.45579626384279803, "grad_norm": 1.1320393085479736, "learning_rate": 1.1906456241043203e-05, "loss": 0.6561, "step": 3056 }, { "epoch": 0.4559454118348932, "grad_norm": 0.8310832977294922, "learning_rate": 1.190171290508138e-05, "loss": 0.6714, "step": 3057 }, { "epoch": 0.4560945598269883, "grad_norm": 1.1462637186050415, "learning_rate": 1.1896969125152482e-05, "loss": 0.7067, "step": 3058 }, { "epoch": 0.45624370781908347, "grad_norm": 1.1112347841262817, "learning_rate": 1.189222490236398e-05, "loss": 0.6025, "step": 3059 }, { "epoch": 0.45639285581117867, "grad_norm": 1.0716160535812378, "learning_rate": 1.1887480237823443e-05, "loss": 0.5492, "step": 3060 }, { "epoch": 0.4565420038032738, "grad_norm": 1.0495229959487915, "learning_rate": 1.1882735132638544e-05, "loss": 0.6176, "step": 3061 }, { "epoch": 0.45669115179536895, "grad_norm": 1.189881443977356, "learning_rate": 1.1877989587917046e-05, "loss": 0.6391, "step": 3062 }, { "epoch": 0.4568402997874641, "grad_norm": 1.0628529787063599, "learning_rate": 1.1873243604766846e-05, "loss": 0.6744, "step": 3063 }, { "epoch": 0.45698944777955924, "grad_norm": 1.2292251586914062, "learning_rate": 1.1868497184295916e-05, "loss": 0.6556, "step": 3064 }, { "epoch": 0.45713859577165444, "grad_norm": 1.1026482582092285, "learning_rate": 1.1863750327612333e-05, "loss": 0.593, "step": 3065 }, { "epoch": 0.4572877437637496, "grad_norm": 1.066650629043579, "learning_rate": 1.1859003035824289e-05, "loss": 0.6476, "step": 3066 }, { "epoch": 0.45743689175584473, "grad_norm": 1.1124372482299805, "learning_rate": 1.1854255310040062e-05, "loss": 0.554, "step": 3067 }, { "epoch": 0.4575860397479399, "grad_norm": 1.2158337831497192, "learning_rate": 1.1849507151368045e-05, "loss": 0.5833, "step": 3068 }, { "epoch": 0.4577351877400351, "grad_norm": 1.2194527387619019, "learning_rate": 1.1844758560916728e-05, "loss": 0.6122, "step": 3069 }, { "epoch": 0.4578843357321302, "grad_norm": 1.2229712009429932, "learning_rate": 1.184000953979469e-05, "loss": 0.6374, "step": 3070 }, { "epoch": 0.45803348372422537, "grad_norm": 1.207123875617981, "learning_rate": 1.183526008911063e-05, "loss": 0.6284, "step": 3071 }, { "epoch": 0.4581826317163205, "grad_norm": 1.1858960390090942, "learning_rate": 1.1830510209973335e-05, "loss": 0.6253, "step": 3072 }, { "epoch": 0.45833177970841565, "grad_norm": 1.1538602113723755, "learning_rate": 1.1825759903491694e-05, "loss": 0.6412, "step": 3073 }, { "epoch": 0.45848092770051085, "grad_norm": 1.0256778001785278, "learning_rate": 1.1821009170774697e-05, "loss": 0.5512, "step": 3074 }, { "epoch": 0.458630075692606, "grad_norm": 1.1508073806762695, "learning_rate": 1.1816258012931434e-05, "loss": 0.6933, "step": 3075 }, { "epoch": 0.45877922368470114, "grad_norm": 1.0396305322647095, "learning_rate": 1.1811506431071088e-05, "loss": 0.5752, "step": 3076 }, { "epoch": 0.4589283716767963, "grad_norm": 0.8572307229042053, "learning_rate": 1.1806754426302954e-05, "loss": 0.6374, "step": 3077 }, { "epoch": 0.45907751966889143, "grad_norm": 1.1610366106033325, "learning_rate": 1.1802001999736412e-05, "loss": 0.6401, "step": 3078 }, { "epoch": 0.45922666766098663, "grad_norm": 1.152527093887329, "learning_rate": 1.179724915248095e-05, "loss": 0.6104, "step": 3079 }, { "epoch": 0.4593758156530818, "grad_norm": 1.0943204164505005, "learning_rate": 1.1792495885646148e-05, "loss": 0.6164, "step": 3080 }, { "epoch": 0.4595249636451769, "grad_norm": 0.8200857639312744, "learning_rate": 1.1787742200341687e-05, "loss": 0.6336, "step": 3081 }, { "epoch": 0.45967411163727206, "grad_norm": 1.105716586112976, "learning_rate": 1.1782988097677349e-05, "loss": 0.5951, "step": 3082 }, { "epoch": 0.45982325962936726, "grad_norm": 1.2109073400497437, "learning_rate": 1.1778233578763005e-05, "loss": 0.6186, "step": 3083 }, { "epoch": 0.4599724076214624, "grad_norm": 1.1954867839813232, "learning_rate": 1.1773478644708631e-05, "loss": 0.6564, "step": 3084 }, { "epoch": 0.46012155561355755, "grad_norm": 1.160833477973938, "learning_rate": 1.1768723296624293e-05, "loss": 0.6449, "step": 3085 }, { "epoch": 0.4602707036056527, "grad_norm": 1.1026674509048462, "learning_rate": 1.1763967535620164e-05, "loss": 0.6001, "step": 3086 }, { "epoch": 0.46041985159774784, "grad_norm": 1.0042153596878052, "learning_rate": 1.1759211362806501e-05, "loss": 0.5236, "step": 3087 }, { "epoch": 0.46056899958984304, "grad_norm": 1.1431300640106201, "learning_rate": 1.1754454779293669e-05, "loss": 0.6058, "step": 3088 }, { "epoch": 0.4607181475819382, "grad_norm": 1.139866828918457, "learning_rate": 1.1749697786192113e-05, "loss": 0.5987, "step": 3089 }, { "epoch": 0.46086729557403333, "grad_norm": 1.117830514907837, "learning_rate": 1.1744940384612394e-05, "loss": 0.5885, "step": 3090 }, { "epoch": 0.4610164435661285, "grad_norm": 1.4014936685562134, "learning_rate": 1.1740182575665154e-05, "loss": 0.6713, "step": 3091 }, { "epoch": 0.4611655915582236, "grad_norm": 1.1400036811828613, "learning_rate": 1.1735424360461134e-05, "loss": 0.6533, "step": 3092 }, { "epoch": 0.4613147395503188, "grad_norm": 1.0111945867538452, "learning_rate": 1.1730665740111164e-05, "loss": 0.5353, "step": 3093 }, { "epoch": 0.46146388754241396, "grad_norm": 1.1144993305206299, "learning_rate": 1.1725906715726185e-05, "loss": 0.5931, "step": 3094 }, { "epoch": 0.4616130355345091, "grad_norm": 1.096091866493225, "learning_rate": 1.1721147288417214e-05, "loss": 0.6106, "step": 3095 }, { "epoch": 0.46176218352660425, "grad_norm": 1.154346227645874, "learning_rate": 1.1716387459295375e-05, "loss": 0.6551, "step": 3096 }, { "epoch": 0.46191133151869945, "grad_norm": 1.0659222602844238, "learning_rate": 1.1711627229471876e-05, "loss": 0.6088, "step": 3097 }, { "epoch": 0.4620604795107946, "grad_norm": 1.0469505786895752, "learning_rate": 1.1706866600058025e-05, "loss": 0.5759, "step": 3098 }, { "epoch": 0.46220962750288974, "grad_norm": 1.1664707660675049, "learning_rate": 1.1702105572165223e-05, "loss": 0.6453, "step": 3099 }, { "epoch": 0.4623587754949849, "grad_norm": 1.2844983339309692, "learning_rate": 1.1697344146904964e-05, "loss": 0.7221, "step": 3100 }, { "epoch": 0.46250792348708003, "grad_norm": 1.1359450817108154, "learning_rate": 1.1692582325388824e-05, "loss": 0.6206, "step": 3101 }, { "epoch": 0.46265707147917523, "grad_norm": 1.0834643840789795, "learning_rate": 1.1687820108728491e-05, "loss": 0.5326, "step": 3102 }, { "epoch": 0.4628062194712704, "grad_norm": 1.128377079963684, "learning_rate": 1.1683057498035733e-05, "loss": 0.6344, "step": 3103 }, { "epoch": 0.4629553674633655, "grad_norm": 1.1372318267822266, "learning_rate": 1.1678294494422406e-05, "loss": 0.6662, "step": 3104 }, { "epoch": 0.46310451545546066, "grad_norm": 1.2236981391906738, "learning_rate": 1.167353109900047e-05, "loss": 0.6505, "step": 3105 }, { "epoch": 0.46325366344755586, "grad_norm": 1.448420763015747, "learning_rate": 1.1668767312881967e-05, "loss": 0.6753, "step": 3106 }, { "epoch": 0.463402811439651, "grad_norm": 1.1594942808151245, "learning_rate": 1.1664003137179036e-05, "loss": 0.6155, "step": 3107 }, { "epoch": 0.46355195943174615, "grad_norm": 1.1536935567855835, "learning_rate": 1.1659238573003903e-05, "loss": 0.6457, "step": 3108 }, { "epoch": 0.4637011074238413, "grad_norm": 1.197396993637085, "learning_rate": 1.1654473621468888e-05, "loss": 0.5855, "step": 3109 }, { "epoch": 0.46385025541593644, "grad_norm": 1.237223505973816, "learning_rate": 1.1649708283686394e-05, "loss": 0.7483, "step": 3110 }, { "epoch": 0.46399940340803164, "grad_norm": 1.2125524282455444, "learning_rate": 1.1644942560768926e-05, "loss": 0.6767, "step": 3111 }, { "epoch": 0.4641485514001268, "grad_norm": 1.058627963066101, "learning_rate": 1.1640176453829066e-05, "loss": 0.6334, "step": 3112 }, { "epoch": 0.46429769939222193, "grad_norm": 1.2420603036880493, "learning_rate": 1.16354099639795e-05, "loss": 0.6891, "step": 3113 }, { "epoch": 0.4644468473843171, "grad_norm": 1.2212750911712646, "learning_rate": 1.163064309233299e-05, "loss": 0.6498, "step": 3114 }, { "epoch": 0.4645959953764122, "grad_norm": 1.0733259916305542, "learning_rate": 1.1625875840002392e-05, "loss": 0.5146, "step": 3115 }, { "epoch": 0.4647451433685074, "grad_norm": 1.0728179216384888, "learning_rate": 1.1621108208100657e-05, "loss": 0.6221, "step": 3116 }, { "epoch": 0.46489429136060256, "grad_norm": 1.0829768180847168, "learning_rate": 1.161634019774082e-05, "loss": 0.6332, "step": 3117 }, { "epoch": 0.4650434393526977, "grad_norm": 1.1148277521133423, "learning_rate": 1.1611571810035991e-05, "loss": 0.6938, "step": 3118 }, { "epoch": 0.46519258734479285, "grad_norm": 1.0546423196792603, "learning_rate": 1.1606803046099392e-05, "loss": 0.5345, "step": 3119 }, { "epoch": 0.46534173533688805, "grad_norm": 1.1712952852249146, "learning_rate": 1.1602033907044324e-05, "loss": 0.6335, "step": 3120 }, { "epoch": 0.4654908833289832, "grad_norm": 1.2098103761672974, "learning_rate": 1.1597264393984165e-05, "loss": 0.5557, "step": 3121 }, { "epoch": 0.46564003132107834, "grad_norm": 1.0068613290786743, "learning_rate": 1.1592494508032393e-05, "loss": 0.5821, "step": 3122 }, { "epoch": 0.4657891793131735, "grad_norm": 0.9477585554122925, "learning_rate": 1.1587724250302564e-05, "loss": 0.5431, "step": 3123 }, { "epoch": 0.46593832730526863, "grad_norm": 1.1610305309295654, "learning_rate": 1.1582953621908328e-05, "loss": 0.6949, "step": 3124 }, { "epoch": 0.46608747529736383, "grad_norm": 1.1220722198486328, "learning_rate": 1.1578182623963422e-05, "loss": 0.6661, "step": 3125 }, { "epoch": 0.466236623289459, "grad_norm": 1.1461364030838013, "learning_rate": 1.1573411257581659e-05, "loss": 0.6144, "step": 3126 }, { "epoch": 0.4663857712815541, "grad_norm": 1.1700605154037476, "learning_rate": 1.1568639523876955e-05, "loss": 0.629, "step": 3127 }, { "epoch": 0.46653491927364926, "grad_norm": 1.136199951171875, "learning_rate": 1.1563867423963291e-05, "loss": 0.6628, "step": 3128 }, { "epoch": 0.46668406726574446, "grad_norm": 1.1534909009933472, "learning_rate": 1.155909495895475e-05, "loss": 0.6193, "step": 3129 }, { "epoch": 0.4668332152578396, "grad_norm": 1.1149805784225464, "learning_rate": 1.1554322129965495e-05, "loss": 0.5983, "step": 3130 }, { "epoch": 0.46698236324993475, "grad_norm": 1.1752732992172241, "learning_rate": 1.1549548938109775e-05, "loss": 0.6105, "step": 3131 }, { "epoch": 0.4671315112420299, "grad_norm": 1.1522854566574097, "learning_rate": 1.1544775384501914e-05, "loss": 0.6389, "step": 3132 }, { "epoch": 0.46728065923412504, "grad_norm": 1.177073359489441, "learning_rate": 1.1540001470256339e-05, "loss": 0.7554, "step": 3133 }, { "epoch": 0.46742980722622024, "grad_norm": 0.8707496523857117, "learning_rate": 1.1535227196487545e-05, "loss": 0.6705, "step": 3134 }, { "epoch": 0.4675789552183154, "grad_norm": 1.1055747270584106, "learning_rate": 1.1530452564310117e-05, "loss": 0.5939, "step": 3135 }, { "epoch": 0.46772810321041053, "grad_norm": 1.1769230365753174, "learning_rate": 1.1525677574838728e-05, "loss": 0.6478, "step": 3136 }, { "epoch": 0.4678772512025057, "grad_norm": 1.1602038145065308, "learning_rate": 1.1520902229188122e-05, "loss": 0.6254, "step": 3137 }, { "epoch": 0.4680263991946008, "grad_norm": 1.2693730592727661, "learning_rate": 1.151612652847314e-05, "loss": 0.667, "step": 3138 }, { "epoch": 0.468175547186696, "grad_norm": 1.0845359563827515, "learning_rate": 1.1511350473808699e-05, "loss": 0.576, "step": 3139 }, { "epoch": 0.46832469517879116, "grad_norm": 1.1878427267074585, "learning_rate": 1.1506574066309796e-05, "loss": 0.6171, "step": 3140 }, { "epoch": 0.4684738431708863, "grad_norm": 1.189881443977356, "learning_rate": 1.150179730709152e-05, "loss": 0.6525, "step": 3141 }, { "epoch": 0.46862299116298145, "grad_norm": 1.1677618026733398, "learning_rate": 1.1497020197269033e-05, "loss": 0.6508, "step": 3142 }, { "epoch": 0.46877213915507665, "grad_norm": 1.1352565288543701, "learning_rate": 1.1492242737957582e-05, "loss": 0.619, "step": 3143 }, { "epoch": 0.4689212871471718, "grad_norm": 1.2903090715408325, "learning_rate": 1.1487464930272496e-05, "loss": 0.5949, "step": 3144 }, { "epoch": 0.46907043513926694, "grad_norm": 1.0222455263137817, "learning_rate": 1.1482686775329183e-05, "loss": 0.5552, "step": 3145 }, { "epoch": 0.4692195831313621, "grad_norm": 1.0697073936462402, "learning_rate": 1.1477908274243135e-05, "loss": 0.6389, "step": 3146 }, { "epoch": 0.46936873112345723, "grad_norm": 1.1159851551055908, "learning_rate": 1.1473129428129926e-05, "loss": 0.6253, "step": 3147 }, { "epoch": 0.46951787911555243, "grad_norm": 1.1076356172561646, "learning_rate": 1.1468350238105203e-05, "loss": 0.6483, "step": 3148 }, { "epoch": 0.46966702710764757, "grad_norm": 1.111504077911377, "learning_rate": 1.1463570705284705e-05, "loss": 0.652, "step": 3149 }, { "epoch": 0.4698161750997427, "grad_norm": 1.0274854898452759, "learning_rate": 1.1458790830784242e-05, "loss": 0.5048, "step": 3150 }, { "epoch": 0.46996532309183786, "grad_norm": 1.30229651927948, "learning_rate": 1.1454010615719701e-05, "loss": 0.6557, "step": 3151 }, { "epoch": 0.470114471083933, "grad_norm": 1.213752269744873, "learning_rate": 1.1449230061207064e-05, "loss": 0.6018, "step": 3152 }, { "epoch": 0.4702636190760282, "grad_norm": 1.200314998626709, "learning_rate": 1.1444449168362375e-05, "loss": 0.6251, "step": 3153 }, { "epoch": 0.47041276706812335, "grad_norm": 1.1262574195861816, "learning_rate": 1.1439667938301762e-05, "loss": 0.6484, "step": 3154 }, { "epoch": 0.4705619150602185, "grad_norm": 1.1962833404541016, "learning_rate": 1.1434886372141443e-05, "loss": 0.6239, "step": 3155 }, { "epoch": 0.47071106305231364, "grad_norm": 1.0976792573928833, "learning_rate": 1.1430104470997698e-05, "loss": 0.6578, "step": 3156 }, { "epoch": 0.47086021104440884, "grad_norm": 1.0573968887329102, "learning_rate": 1.1425322235986893e-05, "loss": 0.638, "step": 3157 }, { "epoch": 0.471009359036504, "grad_norm": 0.9091238379478455, "learning_rate": 1.1420539668225475e-05, "loss": 0.6725, "step": 3158 }, { "epoch": 0.4711585070285991, "grad_norm": 1.1525791883468628, "learning_rate": 1.141575676882996e-05, "loss": 0.5528, "step": 3159 }, { "epoch": 0.47130765502069427, "grad_norm": 1.1787089109420776, "learning_rate": 1.141097353891695e-05, "loss": 0.6075, "step": 3160 }, { "epoch": 0.4714568030127894, "grad_norm": 1.1376711130142212, "learning_rate": 1.1406189979603122e-05, "loss": 0.6688, "step": 3161 }, { "epoch": 0.4716059510048846, "grad_norm": 1.1687155961990356, "learning_rate": 1.1401406092005226e-05, "loss": 0.6388, "step": 3162 }, { "epoch": 0.47175509899697976, "grad_norm": 1.1079355478286743, "learning_rate": 1.139662187724009e-05, "loss": 0.6143, "step": 3163 }, { "epoch": 0.4719042469890749, "grad_norm": 1.3064547777175903, "learning_rate": 1.1391837336424625e-05, "loss": 0.6405, "step": 3164 }, { "epoch": 0.47205339498117005, "grad_norm": 1.159406304359436, "learning_rate": 1.1387052470675806e-05, "loss": 0.6596, "step": 3165 }, { "epoch": 0.47220254297326525, "grad_norm": 1.0912067890167236, "learning_rate": 1.1382267281110697e-05, "loss": 0.6214, "step": 3166 }, { "epoch": 0.4723516909653604, "grad_norm": 1.1458607912063599, "learning_rate": 1.1377481768846426e-05, "loss": 0.6549, "step": 3167 }, { "epoch": 0.47250083895745554, "grad_norm": 1.0854204893112183, "learning_rate": 1.1372695935000204e-05, "loss": 0.6196, "step": 3168 }, { "epoch": 0.4726499869495507, "grad_norm": 1.047344446182251, "learning_rate": 1.1367909780689315e-05, "loss": 0.4899, "step": 3169 }, { "epoch": 0.4727991349416458, "grad_norm": 1.1290240287780762, "learning_rate": 1.1363123307031118e-05, "loss": 0.6422, "step": 3170 }, { "epoch": 0.472948282933741, "grad_norm": 1.0750579833984375, "learning_rate": 1.135833651514304e-05, "loss": 0.5654, "step": 3171 }, { "epoch": 0.47309743092583617, "grad_norm": 1.2260431051254272, "learning_rate": 1.1353549406142596e-05, "loss": 0.6922, "step": 3172 }, { "epoch": 0.4732465789179313, "grad_norm": 1.1159805059432983, "learning_rate": 1.1348761981147366e-05, "loss": 0.591, "step": 3173 }, { "epoch": 0.47339572691002646, "grad_norm": 1.2733081579208374, "learning_rate": 1.1343974241274998e-05, "loss": 0.6497, "step": 3174 }, { "epoch": 0.4735448749021216, "grad_norm": 1.2307339906692505, "learning_rate": 1.1339186187643229e-05, "loss": 0.6376, "step": 3175 }, { "epoch": 0.4736940228942168, "grad_norm": 1.1738002300262451, "learning_rate": 1.1334397821369858e-05, "loss": 0.696, "step": 3176 }, { "epoch": 0.47384317088631195, "grad_norm": 1.0645349025726318, "learning_rate": 1.1329609143572757e-05, "loss": 0.5764, "step": 3177 }, { "epoch": 0.4739923188784071, "grad_norm": 0.9858606457710266, "learning_rate": 1.1324820155369878e-05, "loss": 0.5435, "step": 3178 }, { "epoch": 0.47414146687050224, "grad_norm": 1.0858163833618164, "learning_rate": 1.1320030857879238e-05, "loss": 0.6961, "step": 3179 }, { "epoch": 0.47429061486259744, "grad_norm": 1.1870840787887573, "learning_rate": 1.1315241252218929e-05, "loss": 0.6743, "step": 3180 }, { "epoch": 0.4744397628546926, "grad_norm": 1.106196641921997, "learning_rate": 1.131045133950712e-05, "loss": 0.6097, "step": 3181 }, { "epoch": 0.4745889108467877, "grad_norm": 1.293498158454895, "learning_rate": 1.130566112086204e-05, "loss": 0.7232, "step": 3182 }, { "epoch": 0.47473805883888287, "grad_norm": 1.216238260269165, "learning_rate": 1.1300870597402e-05, "loss": 0.6134, "step": 3183 }, { "epoch": 0.474887206830978, "grad_norm": 1.0662513971328735, "learning_rate": 1.1296079770245378e-05, "loss": 0.6897, "step": 3184 }, { "epoch": 0.4750363548230732, "grad_norm": 1.167089819908142, "learning_rate": 1.1291288640510623e-05, "loss": 0.676, "step": 3185 }, { "epoch": 0.47518550281516836, "grad_norm": 1.1401900053024292, "learning_rate": 1.1286497209316256e-05, "loss": 0.5975, "step": 3186 }, { "epoch": 0.4753346508072635, "grad_norm": 1.2175426483154297, "learning_rate": 1.1281705477780866e-05, "loss": 0.644, "step": 3187 }, { "epoch": 0.47548379879935865, "grad_norm": 1.0687097311019897, "learning_rate": 1.1276913447023114e-05, "loss": 0.6131, "step": 3188 }, { "epoch": 0.47563294679145385, "grad_norm": 1.1967830657958984, "learning_rate": 1.1272121118161729e-05, "loss": 0.6121, "step": 3189 }, { "epoch": 0.475782094783549, "grad_norm": 1.0639495849609375, "learning_rate": 1.1267328492315513e-05, "loss": 0.572, "step": 3190 }, { "epoch": 0.47593124277564414, "grad_norm": 0.9870564341545105, "learning_rate": 1.1262535570603335e-05, "loss": 0.5076, "step": 3191 }, { "epoch": 0.4760803907677393, "grad_norm": 1.2091808319091797, "learning_rate": 1.1257742354144132e-05, "loss": 0.6517, "step": 3192 }, { "epoch": 0.4762295387598344, "grad_norm": 1.061057686805725, "learning_rate": 1.1252948844056912e-05, "loss": 0.5865, "step": 3193 }, { "epoch": 0.4763786867519296, "grad_norm": 1.243703842163086, "learning_rate": 1.1248155041460749e-05, "loss": 0.6536, "step": 3194 }, { "epoch": 0.47652783474402477, "grad_norm": 1.1932868957519531, "learning_rate": 1.124336094747479e-05, "loss": 0.6817, "step": 3195 }, { "epoch": 0.4766769827361199, "grad_norm": 0.9059649109840393, "learning_rate": 1.1238566563218244e-05, "loss": 0.6676, "step": 3196 }, { "epoch": 0.47682613072821506, "grad_norm": 1.2557858228683472, "learning_rate": 1.1233771889810394e-05, "loss": 0.6821, "step": 3197 }, { "epoch": 0.4769752787203102, "grad_norm": 1.2089132070541382, "learning_rate": 1.1228976928370583e-05, "loss": 0.6152, "step": 3198 }, { "epoch": 0.4771244267124054, "grad_norm": 1.1420172452926636, "learning_rate": 1.122418168001823e-05, "loss": 0.5626, "step": 3199 }, { "epoch": 0.47727357470450055, "grad_norm": 1.0292072296142578, "learning_rate": 1.1219386145872812e-05, "loss": 0.5298, "step": 3200 }, { "epoch": 0.4774227226965957, "grad_norm": 1.212363839149475, "learning_rate": 1.121459032705388e-05, "loss": 0.6193, "step": 3201 }, { "epoch": 0.47757187068869084, "grad_norm": 1.159781813621521, "learning_rate": 1.1209794224681048e-05, "loss": 0.6204, "step": 3202 }, { "epoch": 0.47772101868078604, "grad_norm": 1.113775372505188, "learning_rate": 1.1204997839874e-05, "loss": 0.5296, "step": 3203 }, { "epoch": 0.4778701666728812, "grad_norm": 1.1176356077194214, "learning_rate": 1.1200201173752476e-05, "loss": 0.5528, "step": 3204 }, { "epoch": 0.4780193146649763, "grad_norm": 1.118090271949768, "learning_rate": 1.1195404227436295e-05, "loss": 0.605, "step": 3205 }, { "epoch": 0.47816846265707147, "grad_norm": 1.1715017557144165, "learning_rate": 1.1190607002045332e-05, "loss": 0.5746, "step": 3206 }, { "epoch": 0.4783176106491666, "grad_norm": 1.1937909126281738, "learning_rate": 1.1185809498699526e-05, "loss": 0.5713, "step": 3207 }, { "epoch": 0.4784667586412618, "grad_norm": 1.1263803243637085, "learning_rate": 1.1181011718518895e-05, "loss": 0.6437, "step": 3208 }, { "epoch": 0.47861590663335696, "grad_norm": 1.0860395431518555, "learning_rate": 1.1176213662623502e-05, "loss": 0.6637, "step": 3209 }, { "epoch": 0.4787650546254521, "grad_norm": 1.1763381958007812, "learning_rate": 1.1171415332133488e-05, "loss": 0.6734, "step": 3210 }, { "epoch": 0.47891420261754725, "grad_norm": 1.1227147579193115, "learning_rate": 1.1166616728169052e-05, "loss": 0.5975, "step": 3211 }, { "epoch": 0.4790633506096424, "grad_norm": 1.1276628971099854, "learning_rate": 1.1161817851850464e-05, "loss": 0.6383, "step": 3212 }, { "epoch": 0.4792124986017376, "grad_norm": 1.169210433959961, "learning_rate": 1.1157018704298049e-05, "loss": 0.6348, "step": 3213 }, { "epoch": 0.47936164659383274, "grad_norm": 1.1310299634933472, "learning_rate": 1.1152219286632197e-05, "loss": 0.635, "step": 3214 }, { "epoch": 0.4795107945859279, "grad_norm": 1.2364180088043213, "learning_rate": 1.1147419599973364e-05, "loss": 0.7227, "step": 3215 }, { "epoch": 0.479659942578023, "grad_norm": 1.2077875137329102, "learning_rate": 1.1142619645442068e-05, "loss": 0.6208, "step": 3216 }, { "epoch": 0.4798090905701182, "grad_norm": 1.1086328029632568, "learning_rate": 1.1137819424158891e-05, "loss": 0.5736, "step": 3217 }, { "epoch": 0.47995823856221337, "grad_norm": 1.1048386096954346, "learning_rate": 1.1133018937244471e-05, "loss": 0.6176, "step": 3218 }, { "epoch": 0.4801073865543085, "grad_norm": 1.091813087463379, "learning_rate": 1.1128218185819517e-05, "loss": 0.589, "step": 3219 }, { "epoch": 0.48025653454640366, "grad_norm": 1.144848346710205, "learning_rate": 1.1123417171004794e-05, "loss": 0.6895, "step": 3220 }, { "epoch": 0.4804056825384988, "grad_norm": 1.1491613388061523, "learning_rate": 1.1118615893921125e-05, "loss": 0.6181, "step": 3221 }, { "epoch": 0.480554830530594, "grad_norm": 1.1594823598861694, "learning_rate": 1.1113814355689408e-05, "loss": 0.5767, "step": 3222 }, { "epoch": 0.48070397852268915, "grad_norm": 1.0895435810089111, "learning_rate": 1.1109012557430585e-05, "loss": 0.5785, "step": 3223 }, { "epoch": 0.4808531265147843, "grad_norm": 1.1087104082107544, "learning_rate": 1.1104210500265668e-05, "loss": 0.6087, "step": 3224 }, { "epoch": 0.48100227450687943, "grad_norm": 1.2220271825790405, "learning_rate": 1.1099408185315734e-05, "loss": 0.603, "step": 3225 }, { "epoch": 0.48115142249897463, "grad_norm": 1.055877685546875, "learning_rate": 1.1094605613701905e-05, "loss": 0.5351, "step": 3226 }, { "epoch": 0.4813005704910698, "grad_norm": 1.1875317096710205, "learning_rate": 1.108980278654538e-05, "loss": 0.5594, "step": 3227 }, { "epoch": 0.4814497184831649, "grad_norm": 1.208817481994629, "learning_rate": 1.1084999704967406e-05, "loss": 0.6361, "step": 3228 }, { "epoch": 0.48159886647526007, "grad_norm": 1.1120402812957764, "learning_rate": 1.1080196370089293e-05, "loss": 0.6171, "step": 3229 }, { "epoch": 0.4817480144673552, "grad_norm": 1.1644670963287354, "learning_rate": 1.1075392783032412e-05, "loss": 0.5873, "step": 3230 }, { "epoch": 0.4818971624594504, "grad_norm": 1.1983065605163574, "learning_rate": 1.1070588944918193e-05, "loss": 0.6562, "step": 3231 }, { "epoch": 0.48204631045154556, "grad_norm": 1.1223604679107666, "learning_rate": 1.1065784856868116e-05, "loss": 0.6406, "step": 3232 }, { "epoch": 0.4821954584436407, "grad_norm": 1.1602396965026855, "learning_rate": 1.106098052000373e-05, "loss": 0.6004, "step": 3233 }, { "epoch": 0.48234460643573585, "grad_norm": 1.1851359605789185, "learning_rate": 1.1056175935446642e-05, "loss": 0.6331, "step": 3234 }, { "epoch": 0.482493754427831, "grad_norm": 1.0904619693756104, "learning_rate": 1.1051371104318507e-05, "loss": 0.5939, "step": 3235 }, { "epoch": 0.4826429024199262, "grad_norm": 1.209891676902771, "learning_rate": 1.1046566027741048e-05, "loss": 0.6719, "step": 3236 }, { "epoch": 0.48279205041202133, "grad_norm": 1.1104406118392944, "learning_rate": 1.1041760706836037e-05, "loss": 0.5399, "step": 3237 }, { "epoch": 0.4829411984041165, "grad_norm": 1.2481348514556885, "learning_rate": 1.1036955142725309e-05, "loss": 0.7163, "step": 3238 }, { "epoch": 0.4830903463962116, "grad_norm": 1.1709803342819214, "learning_rate": 1.1032149336530757e-05, "loss": 0.5698, "step": 3239 }, { "epoch": 0.4832394943883068, "grad_norm": 1.0603584051132202, "learning_rate": 1.1027343289374322e-05, "loss": 0.6588, "step": 3240 }, { "epoch": 0.48338864238040197, "grad_norm": 1.2980889081954956, "learning_rate": 1.102253700237801e-05, "loss": 0.6507, "step": 3241 }, { "epoch": 0.4835377903724971, "grad_norm": 1.1051379442214966, "learning_rate": 1.1017730476663878e-05, "loss": 0.6374, "step": 3242 }, { "epoch": 0.48368693836459226, "grad_norm": 0.951265275478363, "learning_rate": 1.1012923713354039e-05, "loss": 0.6524, "step": 3243 }, { "epoch": 0.4838360863566874, "grad_norm": 1.322110891342163, "learning_rate": 1.1008116713570664e-05, "loss": 0.5729, "step": 3244 }, { "epoch": 0.4839852343487826, "grad_norm": 1.2485028505325317, "learning_rate": 1.1003309478435982e-05, "loss": 0.6301, "step": 3245 }, { "epoch": 0.48413438234087774, "grad_norm": 1.1262024641036987, "learning_rate": 1.0998502009072264e-05, "loss": 0.6133, "step": 3246 }, { "epoch": 0.4842835303329729, "grad_norm": 1.1784305572509766, "learning_rate": 1.0993694306601852e-05, "loss": 0.6462, "step": 3247 }, { "epoch": 0.48443267832506803, "grad_norm": 1.169173002243042, "learning_rate": 1.0988886372147135e-05, "loss": 0.6356, "step": 3248 }, { "epoch": 0.4845818263171632, "grad_norm": 1.1615897417068481, "learning_rate": 1.0984078206830548e-05, "loss": 0.6658, "step": 3249 }, { "epoch": 0.4847309743092584, "grad_norm": 1.196884036064148, "learning_rate": 1.0979269811774598e-05, "loss": 0.6173, "step": 3250 }, { "epoch": 0.4848801223013535, "grad_norm": 1.3049912452697754, "learning_rate": 1.0974461188101831e-05, "loss": 0.6954, "step": 3251 }, { "epoch": 0.48502927029344867, "grad_norm": 1.1524686813354492, "learning_rate": 1.096965233693485e-05, "loss": 0.6578, "step": 3252 }, { "epoch": 0.4851784182855438, "grad_norm": 1.2523623704910278, "learning_rate": 1.0964843259396313e-05, "loss": 0.5984, "step": 3253 }, { "epoch": 0.485327566277639, "grad_norm": 0.8548996448516846, "learning_rate": 1.0960033956608931e-05, "loss": 0.6866, "step": 3254 }, { "epoch": 0.48547671426973416, "grad_norm": 1.1354871988296509, "learning_rate": 1.0955224429695466e-05, "loss": 0.6606, "step": 3255 }, { "epoch": 0.4856258622618293, "grad_norm": 1.1372778415679932, "learning_rate": 1.0950414679778736e-05, "loss": 0.5798, "step": 3256 }, { "epoch": 0.48577501025392444, "grad_norm": 1.1153963804244995, "learning_rate": 1.0945604707981601e-05, "loss": 0.5871, "step": 3257 }, { "epoch": 0.4859241582460196, "grad_norm": 1.2521588802337646, "learning_rate": 1.0940794515426986e-05, "loss": 0.626, "step": 3258 }, { "epoch": 0.4860733062381148, "grad_norm": 1.2235559225082397, "learning_rate": 1.0935984103237857e-05, "loss": 0.6431, "step": 3259 }, { "epoch": 0.48622245423020993, "grad_norm": 1.2160193920135498, "learning_rate": 1.0931173472537237e-05, "loss": 0.6891, "step": 3260 }, { "epoch": 0.4863716022223051, "grad_norm": 1.0944178104400635, "learning_rate": 1.0926362624448202e-05, "loss": 0.6619, "step": 3261 }, { "epoch": 0.4865207502144002, "grad_norm": 1.0973466634750366, "learning_rate": 1.0921551560093872e-05, "loss": 0.5332, "step": 3262 }, { "epoch": 0.4866698982064954, "grad_norm": 1.1913585662841797, "learning_rate": 1.0916740280597417e-05, "loss": 0.6397, "step": 3263 }, { "epoch": 0.48681904619859057, "grad_norm": 1.0708825588226318, "learning_rate": 1.091192878708207e-05, "loss": 0.6504, "step": 3264 }, { "epoch": 0.4869681941906857, "grad_norm": 1.2471431493759155, "learning_rate": 1.0907117080671099e-05, "loss": 0.6373, "step": 3265 }, { "epoch": 0.48711734218278085, "grad_norm": 1.1805082559585571, "learning_rate": 1.0902305162487829e-05, "loss": 0.5911, "step": 3266 }, { "epoch": 0.487266490174876, "grad_norm": 1.150673747062683, "learning_rate": 1.0897493033655636e-05, "loss": 0.6074, "step": 3267 }, { "epoch": 0.4874156381669712, "grad_norm": 1.1490817070007324, "learning_rate": 1.0892680695297932e-05, "loss": 0.5754, "step": 3268 }, { "epoch": 0.48756478615906634, "grad_norm": 1.1982073783874512, "learning_rate": 1.0887868148538204e-05, "loss": 0.6479, "step": 3269 }, { "epoch": 0.4877139341511615, "grad_norm": 1.1249825954437256, "learning_rate": 1.0883055394499962e-05, "loss": 0.6019, "step": 3270 }, { "epoch": 0.48786308214325663, "grad_norm": 1.1543270349502563, "learning_rate": 1.0878242434306772e-05, "loss": 0.73, "step": 3271 }, { "epoch": 0.4880122301353518, "grad_norm": 1.2243183851242065, "learning_rate": 1.0873429269082256e-05, "loss": 0.7215, "step": 3272 }, { "epoch": 0.488161378127447, "grad_norm": 1.1000109910964966, "learning_rate": 1.086861589995008e-05, "loss": 0.5951, "step": 3273 }, { "epoch": 0.4883105261195421, "grad_norm": 1.1853290796279907, "learning_rate": 1.0863802328033947e-05, "loss": 0.6127, "step": 3274 }, { "epoch": 0.48845967411163727, "grad_norm": 1.1965060234069824, "learning_rate": 1.0858988554457626e-05, "loss": 0.6, "step": 3275 }, { "epoch": 0.4886088221037324, "grad_norm": 1.1722029447555542, "learning_rate": 1.0854174580344918e-05, "loss": 0.6313, "step": 3276 }, { "epoch": 0.4887579700958276, "grad_norm": 1.2913087606430054, "learning_rate": 1.0849360406819676e-05, "loss": 0.644, "step": 3277 }, { "epoch": 0.48890711808792275, "grad_norm": 1.1876214742660522, "learning_rate": 1.0844546035005803e-05, "loss": 0.6288, "step": 3278 }, { "epoch": 0.4890562660800179, "grad_norm": 1.0972989797592163, "learning_rate": 1.0839731466027242e-05, "loss": 0.5894, "step": 3279 }, { "epoch": 0.48920541407211304, "grad_norm": 1.1353589296340942, "learning_rate": 1.0834916701007985e-05, "loss": 0.6844, "step": 3280 }, { "epoch": 0.4893545620642082, "grad_norm": 1.2045475244522095, "learning_rate": 1.0830101741072069e-05, "loss": 0.6388, "step": 3281 }, { "epoch": 0.4895037100563034, "grad_norm": 1.2776455879211426, "learning_rate": 1.0825286587343582e-05, "loss": 0.6318, "step": 3282 }, { "epoch": 0.48965285804839853, "grad_norm": 1.0653250217437744, "learning_rate": 1.082047124094665e-05, "loss": 0.5993, "step": 3283 }, { "epoch": 0.4898020060404937, "grad_norm": 1.1788231134414673, "learning_rate": 1.0815655703005446e-05, "loss": 0.6948, "step": 3284 }, { "epoch": 0.4899511540325888, "grad_norm": 1.202825903892517, "learning_rate": 1.0810839974644183e-05, "loss": 0.6077, "step": 3285 }, { "epoch": 0.490100302024684, "grad_norm": 1.1523864269256592, "learning_rate": 1.0806024056987132e-05, "loss": 0.6228, "step": 3286 }, { "epoch": 0.49024945001677916, "grad_norm": 1.05517578125, "learning_rate": 1.0801207951158599e-05, "loss": 0.5513, "step": 3287 }, { "epoch": 0.4903985980088743, "grad_norm": 1.1537123918533325, "learning_rate": 1.079639165828293e-05, "loss": 0.5969, "step": 3288 }, { "epoch": 0.49054774600096945, "grad_norm": 1.2090120315551758, "learning_rate": 1.0791575179484523e-05, "loss": 0.611, "step": 3289 }, { "epoch": 0.4906968939930646, "grad_norm": 1.1577467918395996, "learning_rate": 1.0786758515887814e-05, "loss": 0.5939, "step": 3290 }, { "epoch": 0.4908460419851598, "grad_norm": 1.0052720308303833, "learning_rate": 1.0781941668617285e-05, "loss": 0.5613, "step": 3291 }, { "epoch": 0.49099518997725494, "grad_norm": 1.1717809438705444, "learning_rate": 1.077712463879746e-05, "loss": 0.6428, "step": 3292 }, { "epoch": 0.4911443379693501, "grad_norm": 1.1110461950302124, "learning_rate": 1.0772307427552903e-05, "loss": 0.5366, "step": 3293 }, { "epoch": 0.49129348596144523, "grad_norm": 1.0062803030014038, "learning_rate": 1.0767490036008225e-05, "loss": 0.5679, "step": 3294 }, { "epoch": 0.4914426339535404, "grad_norm": 1.1414395570755005, "learning_rate": 1.0762672465288079e-05, "loss": 0.6128, "step": 3295 }, { "epoch": 0.4915917819456356, "grad_norm": 1.1568862199783325, "learning_rate": 1.0757854716517156e-05, "loss": 0.6719, "step": 3296 }, { "epoch": 0.4917409299377307, "grad_norm": 1.1296136379241943, "learning_rate": 1.075303679082019e-05, "loss": 0.5748, "step": 3297 }, { "epoch": 0.49189007792982586, "grad_norm": 1.2486367225646973, "learning_rate": 1.0748218689321954e-05, "loss": 0.5282, "step": 3298 }, { "epoch": 0.492039225921921, "grad_norm": 1.3058205842971802, "learning_rate": 1.0743400413147269e-05, "loss": 0.6777, "step": 3299 }, { "epoch": 0.4921883739140162, "grad_norm": 1.025781273841858, "learning_rate": 1.0738581963420994e-05, "loss": 0.6087, "step": 3300 }, { "epoch": 0.49233752190611135, "grad_norm": 1.2273023128509521, "learning_rate": 1.073376334126802e-05, "loss": 0.5858, "step": 3301 }, { "epoch": 0.4924866698982065, "grad_norm": 1.2475558519363403, "learning_rate": 1.0728944547813289e-05, "loss": 0.6197, "step": 3302 }, { "epoch": 0.49263581789030164, "grad_norm": 1.1121315956115723, "learning_rate": 1.072412558418178e-05, "loss": 0.5832, "step": 3303 }, { "epoch": 0.4927849658823968, "grad_norm": 1.1717820167541504, "learning_rate": 1.0719306451498513e-05, "loss": 0.6619, "step": 3304 }, { "epoch": 0.492934113874492, "grad_norm": 1.1067827939987183, "learning_rate": 1.0714487150888537e-05, "loss": 0.5657, "step": 3305 }, { "epoch": 0.49308326186658713, "grad_norm": 1.2487419843673706, "learning_rate": 1.0709667683476962e-05, "loss": 0.6243, "step": 3306 }, { "epoch": 0.4932324098586823, "grad_norm": 1.0411467552185059, "learning_rate": 1.0704848050388905e-05, "loss": 0.5397, "step": 3307 }, { "epoch": 0.4933815578507774, "grad_norm": 1.1758074760437012, "learning_rate": 1.0700028252749559e-05, "loss": 0.6411, "step": 3308 }, { "epoch": 0.49353070584287256, "grad_norm": 1.0658329725265503, "learning_rate": 1.069520829168413e-05, "loss": 0.5274, "step": 3309 }, { "epoch": 0.49367985383496776, "grad_norm": 1.135272741317749, "learning_rate": 1.0690388168317863e-05, "loss": 0.6663, "step": 3310 }, { "epoch": 0.4938290018270629, "grad_norm": 1.0717352628707886, "learning_rate": 1.0685567883776054e-05, "loss": 0.6362, "step": 3311 }, { "epoch": 0.49397814981915805, "grad_norm": 1.2899351119995117, "learning_rate": 1.0680747439184025e-05, "loss": 0.6518, "step": 3312 }, { "epoch": 0.4941272978112532, "grad_norm": 1.2032660245895386, "learning_rate": 1.0675926835667142e-05, "loss": 0.5814, "step": 3313 }, { "epoch": 0.4942764458033484, "grad_norm": 1.1532167196273804, "learning_rate": 1.0671106074350805e-05, "loss": 0.5674, "step": 3314 }, { "epoch": 0.49442559379544354, "grad_norm": 1.132130742073059, "learning_rate": 1.0666285156360451e-05, "loss": 0.6436, "step": 3315 }, { "epoch": 0.4945747417875387, "grad_norm": 1.2020263671875, "learning_rate": 1.0661464082821558e-05, "loss": 0.664, "step": 3316 }, { "epoch": 0.49472388977963383, "grad_norm": 1.0650843381881714, "learning_rate": 1.065664285485963e-05, "loss": 0.5751, "step": 3317 }, { "epoch": 0.494873037771729, "grad_norm": 1.1429837942123413, "learning_rate": 1.0651821473600218e-05, "loss": 0.5654, "step": 3318 }, { "epoch": 0.4950221857638242, "grad_norm": 1.1465572118759155, "learning_rate": 1.0646999940168908e-05, "loss": 0.5944, "step": 3319 }, { "epoch": 0.4951713337559193, "grad_norm": 1.211782455444336, "learning_rate": 1.064217825569131e-05, "loss": 0.6243, "step": 3320 }, { "epoch": 0.49532048174801446, "grad_norm": 1.1764492988586426, "learning_rate": 1.0637356421293077e-05, "loss": 0.5783, "step": 3321 }, { "epoch": 0.4954696297401096, "grad_norm": 1.117879033088684, "learning_rate": 1.0632534438099906e-05, "loss": 0.5772, "step": 3322 }, { "epoch": 0.4956187777322048, "grad_norm": 1.0456784963607788, "learning_rate": 1.0627712307237513e-05, "loss": 0.6082, "step": 3323 }, { "epoch": 0.49576792572429995, "grad_norm": 0.8833367824554443, "learning_rate": 1.0622890029831656e-05, "loss": 0.693, "step": 3324 }, { "epoch": 0.4959170737163951, "grad_norm": 1.1456711292266846, "learning_rate": 1.0618067607008127e-05, "loss": 0.6655, "step": 3325 }, { "epoch": 0.49606622170849024, "grad_norm": 1.2094155550003052, "learning_rate": 1.0613245039892755e-05, "loss": 0.5563, "step": 3326 }, { "epoch": 0.4962153697005854, "grad_norm": 1.133793592453003, "learning_rate": 1.0608422329611393e-05, "loss": 0.6103, "step": 3327 }, { "epoch": 0.4963645176926806, "grad_norm": 1.1191765069961548, "learning_rate": 1.0603599477289939e-05, "loss": 0.6145, "step": 3328 }, { "epoch": 0.49651366568477573, "grad_norm": 1.051607370376587, "learning_rate": 1.0598776484054313e-05, "loss": 0.5872, "step": 3329 }, { "epoch": 0.4966628136768709, "grad_norm": 1.1579209566116333, "learning_rate": 1.0593953351030481e-05, "loss": 0.673, "step": 3330 }, { "epoch": 0.496811961668966, "grad_norm": 1.1712790727615356, "learning_rate": 1.0589130079344431e-05, "loss": 0.6992, "step": 3331 }, { "epoch": 0.49696110966106116, "grad_norm": 1.1659058332443237, "learning_rate": 1.0584306670122186e-05, "loss": 0.6725, "step": 3332 }, { "epoch": 0.49711025765315636, "grad_norm": 1.1043035984039307, "learning_rate": 1.05794831244898e-05, "loss": 0.6187, "step": 3333 }, { "epoch": 0.4972594056452515, "grad_norm": 1.15399968624115, "learning_rate": 1.0574659443573367e-05, "loss": 0.6782, "step": 3334 }, { "epoch": 0.49740855363734665, "grad_norm": 0.8679347634315491, "learning_rate": 1.0569835628498998e-05, "loss": 0.6494, "step": 3335 }, { "epoch": 0.4975577016294418, "grad_norm": 1.1126214265823364, "learning_rate": 1.0565011680392852e-05, "loss": 0.5973, "step": 3336 }, { "epoch": 0.497706849621537, "grad_norm": 1.1838756799697876, "learning_rate": 1.0560187600381104e-05, "loss": 0.5865, "step": 3337 }, { "epoch": 0.49785599761363214, "grad_norm": 1.15571129322052, "learning_rate": 1.0555363389589966e-05, "loss": 0.6677, "step": 3338 }, { "epoch": 0.4980051456057273, "grad_norm": 1.1414746046066284, "learning_rate": 1.0550539049145687e-05, "loss": 0.6532, "step": 3339 }, { "epoch": 0.49815429359782243, "grad_norm": 1.2612096071243286, "learning_rate": 1.054571458017454e-05, "loss": 0.6145, "step": 3340 }, { "epoch": 0.4983034415899176, "grad_norm": 1.0205719470977783, "learning_rate": 1.054088998380282e-05, "loss": 0.559, "step": 3341 }, { "epoch": 0.4984525895820128, "grad_norm": 0.9393166303634644, "learning_rate": 1.0536065261156864e-05, "loss": 0.6993, "step": 3342 }, { "epoch": 0.4986017375741079, "grad_norm": 1.1593719720840454, "learning_rate": 1.053124041336304e-05, "loss": 0.6514, "step": 3343 }, { "epoch": 0.49875088556620306, "grad_norm": 1.167070746421814, "learning_rate": 1.0526415441547732e-05, "loss": 0.5366, "step": 3344 }, { "epoch": 0.4989000335582982, "grad_norm": 1.140015959739685, "learning_rate": 1.0521590346837366e-05, "loss": 0.6889, "step": 3345 }, { "epoch": 0.49904918155039335, "grad_norm": 1.0211353302001953, "learning_rate": 1.0516765130358389e-05, "loss": 0.5276, "step": 3346 }, { "epoch": 0.49919832954248855, "grad_norm": 1.1962296962738037, "learning_rate": 1.0511939793237275e-05, "loss": 0.6385, "step": 3347 }, { "epoch": 0.4993474775345837, "grad_norm": 0.9963889718055725, "learning_rate": 1.0507114336600539e-05, "loss": 0.559, "step": 3348 }, { "epoch": 0.49949662552667884, "grad_norm": 1.0900012254714966, "learning_rate": 1.0502288761574706e-05, "loss": 0.6904, "step": 3349 }, { "epoch": 0.499645773518774, "grad_norm": 1.1372665166854858, "learning_rate": 1.0497463069286343e-05, "loss": 0.6854, "step": 3350 }, { "epoch": 0.4997949215108692, "grad_norm": 1.1484181880950928, "learning_rate": 1.0492637260862036e-05, "loss": 0.6512, "step": 3351 }, { "epoch": 0.49994406950296433, "grad_norm": 1.0698271989822388, "learning_rate": 1.04878113374284e-05, "loss": 0.5623, "step": 3352 }, { "epoch": 0.5000932174950595, "grad_norm": 1.1398138999938965, "learning_rate": 1.0482985300112081e-05, "loss": 0.5666, "step": 3353 }, { "epoch": 0.5002423654871546, "grad_norm": 1.0956355333328247, "learning_rate": 1.0478159150039745e-05, "loss": 0.6287, "step": 3354 }, { "epoch": 0.5003915134792498, "grad_norm": 1.0505266189575195, "learning_rate": 1.047333288833809e-05, "loss": 0.4768, "step": 3355 }, { "epoch": 0.5005406614713449, "grad_norm": 1.1167562007904053, "learning_rate": 1.046850651613384e-05, "loss": 0.6005, "step": 3356 }, { "epoch": 0.5006898094634401, "grad_norm": 1.0685821771621704, "learning_rate": 1.0463680034553738e-05, "loss": 0.5477, "step": 3357 }, { "epoch": 0.5008389574555353, "grad_norm": 1.2429670095443726, "learning_rate": 1.045885344472456e-05, "loss": 0.6423, "step": 3358 }, { "epoch": 0.5009881054476304, "grad_norm": 1.1279290914535522, "learning_rate": 1.0454026747773103e-05, "loss": 0.5963, "step": 3359 }, { "epoch": 0.5011372534397256, "grad_norm": 1.1912225484848022, "learning_rate": 1.0449199944826185e-05, "loss": 0.6489, "step": 3360 }, { "epoch": 0.5012864014318207, "grad_norm": 1.1952263116836548, "learning_rate": 1.0444373037010667e-05, "loss": 0.6686, "step": 3361 }, { "epoch": 0.5014355494239159, "grad_norm": 1.1902726888656616, "learning_rate": 1.0439546025453411e-05, "loss": 0.6392, "step": 3362 }, { "epoch": 0.5015846974160111, "grad_norm": 1.2506473064422607, "learning_rate": 1.0434718911281316e-05, "loss": 0.7171, "step": 3363 }, { "epoch": 0.5017338454081062, "grad_norm": 1.2365741729736328, "learning_rate": 1.0429891695621304e-05, "loss": 0.7199, "step": 3364 }, { "epoch": 0.5018829934002014, "grad_norm": 1.071617603302002, "learning_rate": 1.042506437960032e-05, "loss": 0.5457, "step": 3365 }, { "epoch": 0.5020321413922965, "grad_norm": 1.1395224332809448, "learning_rate": 1.0420236964345332e-05, "loss": 0.5501, "step": 3366 }, { "epoch": 0.5021812893843917, "grad_norm": 1.0747876167297363, "learning_rate": 1.041540945098333e-05, "loss": 0.5478, "step": 3367 }, { "epoch": 0.5023304373764869, "grad_norm": 1.154970407485962, "learning_rate": 1.0410581840641324e-05, "loss": 0.6007, "step": 3368 }, { "epoch": 0.502479585368582, "grad_norm": 1.1992632150650024, "learning_rate": 1.040575413444636e-05, "loss": 0.6473, "step": 3369 }, { "epoch": 0.5026287333606771, "grad_norm": 1.0249354839324951, "learning_rate": 1.040092633352549e-05, "loss": 0.5491, "step": 3370 }, { "epoch": 0.5027778813527722, "grad_norm": 1.205824851989746, "learning_rate": 1.0396098439005796e-05, "loss": 0.6717, "step": 3371 }, { "epoch": 0.5029270293448674, "grad_norm": 1.1325596570968628, "learning_rate": 1.0391270452014382e-05, "loss": 0.59, "step": 3372 }, { "epoch": 0.5030761773369626, "grad_norm": 1.1587392091751099, "learning_rate": 1.0386442373678372e-05, "loss": 0.6812, "step": 3373 }, { "epoch": 0.5032253253290577, "grad_norm": 1.2882481813430786, "learning_rate": 1.038161420512491e-05, "loss": 0.6431, "step": 3374 }, { "epoch": 0.5033744733211529, "grad_norm": 1.213577389717102, "learning_rate": 1.0376785947481168e-05, "loss": 0.6246, "step": 3375 }, { "epoch": 0.503523621313248, "grad_norm": 1.188665509223938, "learning_rate": 1.037195760187433e-05, "loss": 0.6524, "step": 3376 }, { "epoch": 0.5036727693053432, "grad_norm": 1.0980191230773926, "learning_rate": 1.03671291694316e-05, "loss": 0.6156, "step": 3377 }, { "epoch": 0.5038219172974384, "grad_norm": 1.132833480834961, "learning_rate": 1.0362300651280217e-05, "loss": 0.6154, "step": 3378 }, { "epoch": 0.5039710652895335, "grad_norm": 1.2190299034118652, "learning_rate": 1.0357472048547423e-05, "loss": 0.6063, "step": 3379 }, { "epoch": 0.5041202132816287, "grad_norm": 1.0819165706634521, "learning_rate": 1.0352643362360486e-05, "loss": 0.548, "step": 3380 }, { "epoch": 0.5042693612737239, "grad_norm": 1.1371408700942993, "learning_rate": 1.0347814593846694e-05, "loss": 0.6161, "step": 3381 }, { "epoch": 0.504418509265819, "grad_norm": 1.1080373525619507, "learning_rate": 1.0342985744133358e-05, "loss": 0.5586, "step": 3382 }, { "epoch": 0.5045676572579142, "grad_norm": 1.0142276287078857, "learning_rate": 1.0338156814347799e-05, "loss": 0.4968, "step": 3383 }, { "epoch": 0.5047168052500093, "grad_norm": 1.247387170791626, "learning_rate": 1.0333327805617367e-05, "loss": 0.5792, "step": 3384 }, { "epoch": 0.5048659532421045, "grad_norm": 1.1162232160568237, "learning_rate": 1.0328498719069416e-05, "loss": 0.6067, "step": 3385 }, { "epoch": 0.5050151012341997, "grad_norm": 1.2029566764831543, "learning_rate": 1.0323669555831332e-05, "loss": 0.6343, "step": 3386 }, { "epoch": 0.5051642492262948, "grad_norm": 1.161299467086792, "learning_rate": 1.0318840317030518e-05, "loss": 0.6055, "step": 3387 }, { "epoch": 0.50531339721839, "grad_norm": 1.1611725091934204, "learning_rate": 1.0314011003794386e-05, "loss": 0.6856, "step": 3388 }, { "epoch": 0.5054625452104851, "grad_norm": 1.1791132688522339, "learning_rate": 1.0309181617250374e-05, "loss": 0.6357, "step": 3389 }, { "epoch": 0.5056116932025803, "grad_norm": 1.2149795293807983, "learning_rate": 1.030435215852593e-05, "loss": 0.6662, "step": 3390 }, { "epoch": 0.5057608411946755, "grad_norm": 1.2058404684066772, "learning_rate": 1.0299522628748522e-05, "loss": 0.6018, "step": 3391 }, { "epoch": 0.5059099891867705, "grad_norm": 1.1825661659240723, "learning_rate": 1.0294693029045636e-05, "loss": 0.6035, "step": 3392 }, { "epoch": 0.5060591371788657, "grad_norm": 1.1809464693069458, "learning_rate": 1.0289863360544775e-05, "loss": 0.5957, "step": 3393 }, { "epoch": 0.5062082851709608, "grad_norm": 1.1470078229904175, "learning_rate": 1.0285033624373453e-05, "loss": 0.6349, "step": 3394 }, { "epoch": 0.506357433163056, "grad_norm": 1.198467493057251, "learning_rate": 1.0280203821659203e-05, "loss": 0.6415, "step": 3395 }, { "epoch": 0.5065065811551512, "grad_norm": 1.266183853149414, "learning_rate": 1.0275373953529572e-05, "loss": 0.6926, "step": 3396 }, { "epoch": 0.5066557291472463, "grad_norm": 1.1842331886291504, "learning_rate": 1.027054402111213e-05, "loss": 0.6571, "step": 3397 }, { "epoch": 0.5068048771393415, "grad_norm": 1.127766489982605, "learning_rate": 1.0265714025534451e-05, "loss": 0.5802, "step": 3398 }, { "epoch": 0.5069540251314366, "grad_norm": 1.127149224281311, "learning_rate": 1.0260883967924123e-05, "loss": 0.6128, "step": 3399 }, { "epoch": 0.5071031731235318, "grad_norm": 1.1468976736068726, "learning_rate": 1.0256053849408768e-05, "loss": 0.6202, "step": 3400 }, { "epoch": 0.507252321115627, "grad_norm": 1.0135716199874878, "learning_rate": 1.0251223671115995e-05, "loss": 0.5839, "step": 3401 }, { "epoch": 0.5074014691077221, "grad_norm": 1.0857785940170288, "learning_rate": 1.0246393434173446e-05, "loss": 0.5444, "step": 3402 }, { "epoch": 0.5075506170998173, "grad_norm": 1.2690538167953491, "learning_rate": 1.024156313970877e-05, "loss": 0.6479, "step": 3403 }, { "epoch": 0.5076997650919125, "grad_norm": 1.2790607213974, "learning_rate": 1.023673278884963e-05, "loss": 0.6994, "step": 3404 }, { "epoch": 0.5078489130840076, "grad_norm": 1.1085753440856934, "learning_rate": 1.0231902382723704e-05, "loss": 0.5841, "step": 3405 }, { "epoch": 0.5079980610761028, "grad_norm": 1.1809300184249878, "learning_rate": 1.022707192245868e-05, "loss": 0.5881, "step": 3406 }, { "epoch": 0.5081472090681979, "grad_norm": 1.086830735206604, "learning_rate": 1.0222241409182256e-05, "loss": 0.5918, "step": 3407 }, { "epoch": 0.5082963570602931, "grad_norm": 1.090157389640808, "learning_rate": 1.0217410844022154e-05, "loss": 0.5919, "step": 3408 }, { "epoch": 0.5084455050523883, "grad_norm": 1.0796082019805908, "learning_rate": 1.0212580228106094e-05, "loss": 0.5935, "step": 3409 }, { "epoch": 0.5085946530444834, "grad_norm": 1.0829477310180664, "learning_rate": 1.0207749562561817e-05, "loss": 0.6022, "step": 3410 }, { "epoch": 0.5087438010365786, "grad_norm": 1.1552993059158325, "learning_rate": 1.0202918848517075e-05, "loss": 0.6335, "step": 3411 }, { "epoch": 0.5088929490286737, "grad_norm": 1.065228819847107, "learning_rate": 1.0198088087099624e-05, "loss": 0.4843, "step": 3412 }, { "epoch": 0.5090420970207689, "grad_norm": 1.1277549266815186, "learning_rate": 1.0193257279437238e-05, "loss": 0.6592, "step": 3413 }, { "epoch": 0.5091912450128641, "grad_norm": 1.086295247077942, "learning_rate": 1.0188426426657705e-05, "loss": 0.5718, "step": 3414 }, { "epoch": 0.5093403930049591, "grad_norm": 1.2363958358764648, "learning_rate": 1.0183595529888812e-05, "loss": 0.5954, "step": 3415 }, { "epoch": 0.5094895409970543, "grad_norm": 1.1052767038345337, "learning_rate": 1.0178764590258363e-05, "loss": 0.613, "step": 3416 }, { "epoch": 0.5096386889891494, "grad_norm": 1.187432050704956, "learning_rate": 1.0173933608894177e-05, "loss": 0.5984, "step": 3417 }, { "epoch": 0.5097878369812446, "grad_norm": 1.195149540901184, "learning_rate": 1.0169102586924077e-05, "loss": 0.6117, "step": 3418 }, { "epoch": 0.5099369849733398, "grad_norm": 1.1563210487365723, "learning_rate": 1.016427152547589e-05, "loss": 0.6253, "step": 3419 }, { "epoch": 0.5100861329654349, "grad_norm": 1.1781470775604248, "learning_rate": 1.0159440425677466e-05, "loss": 0.6548, "step": 3420 }, { "epoch": 0.5102352809575301, "grad_norm": 1.157505750656128, "learning_rate": 1.0154609288656647e-05, "loss": 0.6163, "step": 3421 }, { "epoch": 0.5103844289496252, "grad_norm": 1.1597366333007812, "learning_rate": 1.01497781155413e-05, "loss": 0.6204, "step": 3422 }, { "epoch": 0.5105335769417204, "grad_norm": 1.1409029960632324, "learning_rate": 1.0144946907459294e-05, "loss": 0.6007, "step": 3423 }, { "epoch": 0.5106827249338156, "grad_norm": 1.1560112237930298, "learning_rate": 1.0140115665538502e-05, "loss": 0.6527, "step": 3424 }, { "epoch": 0.5108318729259107, "grad_norm": 1.1049169301986694, "learning_rate": 1.0135284390906805e-05, "loss": 0.6241, "step": 3425 }, { "epoch": 0.5109810209180059, "grad_norm": 1.1827656030654907, "learning_rate": 1.0130453084692108e-05, "loss": 0.685, "step": 3426 }, { "epoch": 0.5111301689101011, "grad_norm": 1.205468773841858, "learning_rate": 1.0125621748022295e-05, "loss": 0.648, "step": 3427 }, { "epoch": 0.5112793169021962, "grad_norm": 1.091343879699707, "learning_rate": 1.0120790382025282e-05, "loss": 0.6516, "step": 3428 }, { "epoch": 0.5114284648942914, "grad_norm": 1.2110403776168823, "learning_rate": 1.0115958987828977e-05, "loss": 0.6564, "step": 3429 }, { "epoch": 0.5115776128863865, "grad_norm": 1.216809868812561, "learning_rate": 1.0111127566561305e-05, "loss": 0.5695, "step": 3430 }, { "epoch": 0.5117267608784817, "grad_norm": 1.1682714223861694, "learning_rate": 1.0106296119350192e-05, "loss": 0.7098, "step": 3431 }, { "epoch": 0.5118759088705769, "grad_norm": 1.1391018629074097, "learning_rate": 1.0101464647323567e-05, "loss": 0.5637, "step": 3432 }, { "epoch": 0.512025056862672, "grad_norm": 1.1749440431594849, "learning_rate": 1.009663315160937e-05, "loss": 0.6563, "step": 3433 }, { "epoch": 0.5121742048547672, "grad_norm": 1.1556930541992188, "learning_rate": 1.0091801633335544e-05, "loss": 0.6383, "step": 3434 }, { "epoch": 0.5123233528468623, "grad_norm": 1.1587823629379272, "learning_rate": 1.0086970093630036e-05, "loss": 0.5943, "step": 3435 }, { "epoch": 0.5124725008389575, "grad_norm": 0.8854230046272278, "learning_rate": 1.0082138533620803e-05, "loss": 0.6812, "step": 3436 }, { "epoch": 0.5126216488310527, "grad_norm": 1.1431361436843872, "learning_rate": 1.0077306954435804e-05, "loss": 0.5767, "step": 3437 }, { "epoch": 0.5127707968231477, "grad_norm": 1.1581790447235107, "learning_rate": 1.0072475357202998e-05, "loss": 0.6229, "step": 3438 }, { "epoch": 0.512919944815243, "grad_norm": 1.0433828830718994, "learning_rate": 1.0067643743050357e-05, "loss": 0.5301, "step": 3439 }, { "epoch": 0.513069092807338, "grad_norm": 1.2129356861114502, "learning_rate": 1.006281211310585e-05, "loss": 0.7134, "step": 3440 }, { "epoch": 0.5132182407994332, "grad_norm": 1.1844213008880615, "learning_rate": 1.0057980468497453e-05, "loss": 0.5457, "step": 3441 }, { "epoch": 0.5133673887915284, "grad_norm": 1.0615838766098022, "learning_rate": 1.0053148810353146e-05, "loss": 0.503, "step": 3442 }, { "epoch": 0.5135165367836235, "grad_norm": 1.2967177629470825, "learning_rate": 1.0048317139800906e-05, "loss": 0.6893, "step": 3443 }, { "epoch": 0.5136656847757187, "grad_norm": 1.1766116619110107, "learning_rate": 1.0043485457968717e-05, "loss": 0.629, "step": 3444 }, { "epoch": 0.5138148327678138, "grad_norm": 1.084014892578125, "learning_rate": 1.0038653765984573e-05, "loss": 0.5145, "step": 3445 }, { "epoch": 0.513963980759909, "grad_norm": 1.143600344657898, "learning_rate": 1.0033822064976457e-05, "loss": 0.6386, "step": 3446 }, { "epoch": 0.5141131287520042, "grad_norm": 1.1767503023147583, "learning_rate": 1.0028990356072364e-05, "loss": 0.6165, "step": 3447 }, { "epoch": 0.5142622767440993, "grad_norm": 1.2110852003097534, "learning_rate": 1.0024158640400288e-05, "loss": 0.6328, "step": 3448 }, { "epoch": 0.5144114247361945, "grad_norm": 1.1396961212158203, "learning_rate": 1.0019326919088221e-05, "loss": 0.5849, "step": 3449 }, { "epoch": 0.5145605727282897, "grad_norm": 1.1801837682724, "learning_rate": 1.0014495193264162e-05, "loss": 0.6154, "step": 3450 }, { "epoch": 0.5147097207203848, "grad_norm": 1.1244757175445557, "learning_rate": 1.0009663464056108e-05, "loss": 0.5761, "step": 3451 }, { "epoch": 0.51485886871248, "grad_norm": 1.3542472124099731, "learning_rate": 1.0004831732592053e-05, "loss": 0.6621, "step": 3452 }, { "epoch": 0.5150080167045751, "grad_norm": 1.1632084846496582, "learning_rate": 1e-05, "loss": 0.6332, "step": 3453 }, { "epoch": 0.5151571646966703, "grad_norm": 1.2121202945709229, "learning_rate": 9.99516826740795e-06, "loss": 0.6431, "step": 3454 }, { "epoch": 0.5153063126887655, "grad_norm": 1.1381028890609741, "learning_rate": 9.990336535943897e-06, "loss": 0.6344, "step": 3455 }, { "epoch": 0.5154554606808606, "grad_norm": 1.093508005142212, "learning_rate": 9.985504806735841e-06, "loss": 0.6005, "step": 3456 }, { "epoch": 0.5156046086729558, "grad_norm": 1.1268192529678345, "learning_rate": 9.98067308091178e-06, "loss": 0.5877, "step": 3457 }, { "epoch": 0.5157537566650509, "grad_norm": 1.2459641695022583, "learning_rate": 9.975841359599712e-06, "loss": 0.6798, "step": 3458 }, { "epoch": 0.515902904657146, "grad_norm": 1.186470866203308, "learning_rate": 9.971009643927636e-06, "loss": 0.5141, "step": 3459 }, { "epoch": 0.5160520526492413, "grad_norm": 1.0631085634231567, "learning_rate": 9.966177935023545e-06, "loss": 0.6377, "step": 3460 }, { "epoch": 0.5162012006413363, "grad_norm": 1.3106721639633179, "learning_rate": 9.96134623401543e-06, "loss": 0.622, "step": 3461 }, { "epoch": 0.5163503486334315, "grad_norm": 1.1872702836990356, "learning_rate": 9.956514542031286e-06, "loss": 0.6049, "step": 3462 }, { "epoch": 0.5164994966255266, "grad_norm": 1.1802653074264526, "learning_rate": 9.9516828601991e-06, "loss": 0.6122, "step": 3463 }, { "epoch": 0.5166486446176218, "grad_norm": 1.2069756984710693, "learning_rate": 9.94685118964686e-06, "loss": 0.6814, "step": 3464 }, { "epoch": 0.516797792609717, "grad_norm": 1.1615475416183472, "learning_rate": 9.942019531502552e-06, "loss": 0.6509, "step": 3465 }, { "epoch": 0.5169469406018121, "grad_norm": 1.1632604598999023, "learning_rate": 9.937187886894153e-06, "loss": 0.5745, "step": 3466 }, { "epoch": 0.5170960885939073, "grad_norm": 1.2088065147399902, "learning_rate": 9.932356256949643e-06, "loss": 0.6954, "step": 3467 }, { "epoch": 0.5172452365860024, "grad_norm": 1.2269186973571777, "learning_rate": 9.927524642797003e-06, "loss": 0.6476, "step": 3468 }, { "epoch": 0.5173943845780976, "grad_norm": 1.128551721572876, "learning_rate": 9.9226930455642e-06, "loss": 0.6629, "step": 3469 }, { "epoch": 0.5175435325701928, "grad_norm": 1.2438509464263916, "learning_rate": 9.9178614663792e-06, "loss": 0.6773, "step": 3470 }, { "epoch": 0.5176926805622879, "grad_norm": 1.2193905115127563, "learning_rate": 9.91302990636997e-06, "loss": 0.5922, "step": 3471 }, { "epoch": 0.5178418285543831, "grad_norm": 0.8587470650672913, "learning_rate": 9.908198366664461e-06, "loss": 0.6804, "step": 3472 }, { "epoch": 0.5179909765464782, "grad_norm": 1.067629337310791, "learning_rate": 9.903366848390635e-06, "loss": 0.5646, "step": 3473 }, { "epoch": 0.5181401245385734, "grad_norm": 1.18639075756073, "learning_rate": 9.898535352676438e-06, "loss": 0.617, "step": 3474 }, { "epoch": 0.5182892725306686, "grad_norm": 1.0667093992233276, "learning_rate": 9.893703880649808e-06, "loss": 0.5571, "step": 3475 }, { "epoch": 0.5184384205227637, "grad_norm": 1.1090656518936157, "learning_rate": 9.888872433438695e-06, "loss": 0.5998, "step": 3476 }, { "epoch": 0.5185875685148589, "grad_norm": 1.14238703250885, "learning_rate": 9.884041012171023e-06, "loss": 0.6356, "step": 3477 }, { "epoch": 0.5187367165069541, "grad_norm": 1.1378806829452515, "learning_rate": 9.879209617974721e-06, "loss": 0.5856, "step": 3478 }, { "epoch": 0.5188858644990492, "grad_norm": 1.2013061046600342, "learning_rate": 9.874378251977709e-06, "loss": 0.7126, "step": 3479 }, { "epoch": 0.5190350124911444, "grad_norm": 1.145403265953064, "learning_rate": 9.869546915307897e-06, "loss": 0.6379, "step": 3480 }, { "epoch": 0.5191841604832395, "grad_norm": 1.166269063949585, "learning_rate": 9.864715609093196e-06, "loss": 0.6505, "step": 3481 }, { "epoch": 0.5193333084753347, "grad_norm": 1.1262315511703491, "learning_rate": 9.859884334461503e-06, "loss": 0.6868, "step": 3482 }, { "epoch": 0.5194824564674299, "grad_norm": 1.0910593271255493, "learning_rate": 9.85505309254071e-06, "loss": 0.5998, "step": 3483 }, { "epoch": 0.5196316044595249, "grad_norm": 1.034791350364685, "learning_rate": 9.8502218844587e-06, "loss": 0.4956, "step": 3484 }, { "epoch": 0.5197807524516201, "grad_norm": 1.0798858404159546, "learning_rate": 9.845390711343356e-06, "loss": 0.5668, "step": 3485 }, { "epoch": 0.5199299004437152, "grad_norm": 1.1961348056793213, "learning_rate": 9.840559574322538e-06, "loss": 0.6016, "step": 3486 }, { "epoch": 0.5200790484358104, "grad_norm": 1.1739336252212524, "learning_rate": 9.835728474524113e-06, "loss": 0.5691, "step": 3487 }, { "epoch": 0.5202281964279056, "grad_norm": 1.1675879955291748, "learning_rate": 9.830897413075926e-06, "loss": 0.7021, "step": 3488 }, { "epoch": 0.5203773444200007, "grad_norm": 1.1905648708343506, "learning_rate": 9.826066391105824e-06, "loss": 0.6038, "step": 3489 }, { "epoch": 0.5205264924120959, "grad_norm": 1.2461981773376465, "learning_rate": 9.82123540974164e-06, "loss": 0.6545, "step": 3490 }, { "epoch": 0.520675640404191, "grad_norm": 1.1091901063919067, "learning_rate": 9.816404470111191e-06, "loss": 0.6585, "step": 3491 }, { "epoch": 0.5208247883962862, "grad_norm": 1.2071126699447632, "learning_rate": 9.8115735733423e-06, "loss": 0.6475, "step": 3492 }, { "epoch": 0.5209739363883814, "grad_norm": 1.1821632385253906, "learning_rate": 9.806742720562762e-06, "loss": 0.6351, "step": 3493 }, { "epoch": 0.5211230843804765, "grad_norm": 1.3154983520507812, "learning_rate": 9.801911912900378e-06, "loss": 0.6614, "step": 3494 }, { "epoch": 0.5212722323725717, "grad_norm": 0.9852285981178284, "learning_rate": 9.797081151482928e-06, "loss": 0.5439, "step": 3495 }, { "epoch": 0.5214213803646668, "grad_norm": 1.2605128288269043, "learning_rate": 9.792250437438186e-06, "loss": 0.5887, "step": 3496 }, { "epoch": 0.521570528356762, "grad_norm": 1.0396076440811157, "learning_rate": 9.787419771893907e-06, "loss": 0.5692, "step": 3497 }, { "epoch": 0.5217196763488572, "grad_norm": 1.1076364517211914, "learning_rate": 9.78258915597785e-06, "loss": 0.6162, "step": 3498 }, { "epoch": 0.5218688243409523, "grad_norm": 1.175223708152771, "learning_rate": 9.777758590817746e-06, "loss": 0.6597, "step": 3499 }, { "epoch": 0.5220179723330475, "grad_norm": 1.233789324760437, "learning_rate": 9.772928077541325e-06, "loss": 0.6357, "step": 3500 }, { "epoch": 0.5221671203251427, "grad_norm": 1.2555897235870361, "learning_rate": 9.768097617276303e-06, "loss": 0.6803, "step": 3501 }, { "epoch": 0.5223162683172378, "grad_norm": 1.0117361545562744, "learning_rate": 9.763267211150372e-06, "loss": 0.5491, "step": 3502 }, { "epoch": 0.522465416309333, "grad_norm": 1.0790002346038818, "learning_rate": 9.758436860291232e-06, "loss": 0.5565, "step": 3503 }, { "epoch": 0.522614564301428, "grad_norm": 1.1204315423965454, "learning_rate": 9.753606565826556e-06, "loss": 0.5661, "step": 3504 }, { "epoch": 0.5227637122935233, "grad_norm": 1.2363835573196411, "learning_rate": 9.748776328884008e-06, "loss": 0.6578, "step": 3505 }, { "epoch": 0.5229128602856185, "grad_norm": 1.0749415159225464, "learning_rate": 9.743946150591237e-06, "loss": 0.5937, "step": 3506 }, { "epoch": 0.5230620082777135, "grad_norm": 1.0853941440582275, "learning_rate": 9.739116032075879e-06, "loss": 0.5927, "step": 3507 }, { "epoch": 0.5232111562698087, "grad_norm": 1.1529350280761719, "learning_rate": 9.734285974465554e-06, "loss": 0.6118, "step": 3508 }, { "epoch": 0.5233603042619038, "grad_norm": 1.2236303091049194, "learning_rate": 9.729455978887877e-06, "loss": 0.6711, "step": 3509 }, { "epoch": 0.523509452253999, "grad_norm": 1.183010458946228, "learning_rate": 9.72462604647043e-06, "loss": 0.6832, "step": 3510 }, { "epoch": 0.5236586002460942, "grad_norm": 1.1944551467895508, "learning_rate": 9.719796178340799e-06, "loss": 0.6844, "step": 3511 }, { "epoch": 0.5238077482381893, "grad_norm": 0.9828901886940002, "learning_rate": 9.71496637562655e-06, "loss": 0.5135, "step": 3512 }, { "epoch": 0.5239568962302845, "grad_norm": 1.0289738178253174, "learning_rate": 9.710136639455229e-06, "loss": 0.5653, "step": 3513 }, { "epoch": 0.5241060442223796, "grad_norm": 1.1070133447647095, "learning_rate": 9.705306970954365e-06, "loss": 0.6399, "step": 3514 }, { "epoch": 0.5242551922144748, "grad_norm": 1.18930983543396, "learning_rate": 9.700477371251481e-06, "loss": 0.6852, "step": 3515 }, { "epoch": 0.52440434020657, "grad_norm": 1.099847674369812, "learning_rate": 9.695647841474073e-06, "loss": 0.5879, "step": 3516 }, { "epoch": 0.5245534881986651, "grad_norm": 1.1479343175888062, "learning_rate": 9.69081838274963e-06, "loss": 0.6527, "step": 3517 }, { "epoch": 0.5247026361907603, "grad_norm": 1.2493524551391602, "learning_rate": 9.685988996205616e-06, "loss": 0.6179, "step": 3518 }, { "epoch": 0.5248517841828554, "grad_norm": 1.2001408338546753, "learning_rate": 9.681159682969483e-06, "loss": 0.58, "step": 3519 }, { "epoch": 0.5250009321749506, "grad_norm": 1.0806388854980469, "learning_rate": 9.676330444168668e-06, "loss": 0.5387, "step": 3520 }, { "epoch": 0.5251500801670458, "grad_norm": 1.1219720840454102, "learning_rate": 9.671501280930588e-06, "loss": 0.6262, "step": 3521 }, { "epoch": 0.5252992281591409, "grad_norm": 1.1642589569091797, "learning_rate": 9.666672194382639e-06, "loss": 0.6321, "step": 3522 }, { "epoch": 0.5254483761512361, "grad_norm": 1.1139737367630005, "learning_rate": 9.661843185652202e-06, "loss": 0.6298, "step": 3523 }, { "epoch": 0.5255975241433313, "grad_norm": 1.1653289794921875, "learning_rate": 9.657014255866643e-06, "loss": 0.6823, "step": 3524 }, { "epoch": 0.5257466721354264, "grad_norm": 1.1906754970550537, "learning_rate": 9.652185406153307e-06, "loss": 0.6689, "step": 3525 }, { "epoch": 0.5258958201275216, "grad_norm": 1.1186766624450684, "learning_rate": 9.647356637639518e-06, "loss": 0.558, "step": 3526 }, { "epoch": 0.5260449681196167, "grad_norm": 1.1343728303909302, "learning_rate": 9.64252795145258e-06, "loss": 0.6663, "step": 3527 }, { "epoch": 0.5261941161117119, "grad_norm": 1.0943961143493652, "learning_rate": 9.637699348719783e-06, "loss": 0.4882, "step": 3528 }, { "epoch": 0.526343264103807, "grad_norm": 1.195364236831665, "learning_rate": 9.632870830568399e-06, "loss": 0.6995, "step": 3529 }, { "epoch": 0.5264924120959021, "grad_norm": 1.1601088047027588, "learning_rate": 9.628042398125673e-06, "loss": 0.6174, "step": 3530 }, { "epoch": 0.5266415600879973, "grad_norm": 1.0992292165756226, "learning_rate": 9.623214052518836e-06, "loss": 0.6271, "step": 3531 }, { "epoch": 0.5267907080800924, "grad_norm": 1.0850685834884644, "learning_rate": 9.618385794875094e-06, "loss": 0.575, "step": 3532 }, { "epoch": 0.5269398560721876, "grad_norm": 1.1383205652236938, "learning_rate": 9.613557626321633e-06, "loss": 0.6298, "step": 3533 }, { "epoch": 0.5270890040642828, "grad_norm": 1.2354857921600342, "learning_rate": 9.608729547985623e-06, "loss": 0.7073, "step": 3534 }, { "epoch": 0.5272381520563779, "grad_norm": 1.1406766176223755, "learning_rate": 9.60390156099421e-06, "loss": 0.6526, "step": 3535 }, { "epoch": 0.5273873000484731, "grad_norm": 1.2182358503341675, "learning_rate": 9.599073666474516e-06, "loss": 0.6307, "step": 3536 }, { "epoch": 0.5275364480405682, "grad_norm": 1.088078498840332, "learning_rate": 9.594245865553641e-06, "loss": 0.6041, "step": 3537 }, { "epoch": 0.5276855960326634, "grad_norm": 1.1351583003997803, "learning_rate": 9.589418159358677e-06, "loss": 0.6184, "step": 3538 }, { "epoch": 0.5278347440247586, "grad_norm": 1.0556316375732422, "learning_rate": 9.584590549016674e-06, "loss": 0.6223, "step": 3539 }, { "epoch": 0.5279838920168537, "grad_norm": 1.203648567199707, "learning_rate": 9.579763035654671e-06, "loss": 0.6265, "step": 3540 }, { "epoch": 0.5281330400089489, "grad_norm": 1.1789177656173706, "learning_rate": 9.574935620399681e-06, "loss": 0.5967, "step": 3541 }, { "epoch": 0.528282188001044, "grad_norm": 1.093766689300537, "learning_rate": 9.5701083043787e-06, "loss": 0.589, "step": 3542 }, { "epoch": 0.5284313359931392, "grad_norm": 1.2140885591506958, "learning_rate": 9.56528108871869e-06, "loss": 0.6324, "step": 3543 }, { "epoch": 0.5285804839852344, "grad_norm": 0.9097411632537842, "learning_rate": 9.560453974546594e-06, "loss": 0.6593, "step": 3544 }, { "epoch": 0.5287296319773295, "grad_norm": 1.0834729671478271, "learning_rate": 9.555626962989335e-06, "loss": 0.5959, "step": 3545 }, { "epoch": 0.5288787799694247, "grad_norm": 1.0202232599258423, "learning_rate": 9.550800055173815e-06, "loss": 0.5997, "step": 3546 }, { "epoch": 0.5290279279615199, "grad_norm": 1.134353518486023, "learning_rate": 9.5459732522269e-06, "loss": 0.5755, "step": 3547 }, { "epoch": 0.529177075953615, "grad_norm": 1.1197446584701538, "learning_rate": 9.541146555275444e-06, "loss": 0.5868, "step": 3548 }, { "epoch": 0.5293262239457102, "grad_norm": 1.202101469039917, "learning_rate": 9.536319965446265e-06, "loss": 0.6751, "step": 3549 }, { "epoch": 0.5294753719378053, "grad_norm": 1.3330215215682983, "learning_rate": 9.531493483866163e-06, "loss": 0.7002, "step": 3550 }, { "epoch": 0.5296245199299005, "grad_norm": 0.8415202498435974, "learning_rate": 9.526667111661912e-06, "loss": 0.6462, "step": 3551 }, { "epoch": 0.5297736679219957, "grad_norm": 1.1487681865692139, "learning_rate": 9.521840849960256e-06, "loss": 0.6062, "step": 3552 }, { "epoch": 0.5299228159140907, "grad_norm": 1.0470647811889648, "learning_rate": 9.517014699887924e-06, "loss": 0.5762, "step": 3553 }, { "epoch": 0.5300719639061859, "grad_norm": 1.1450673341751099, "learning_rate": 9.512188662571601e-06, "loss": 0.6027, "step": 3554 }, { "epoch": 0.530221111898281, "grad_norm": 1.09403657913208, "learning_rate": 9.50736273913797e-06, "loss": 0.6498, "step": 3555 }, { "epoch": 0.5303702598903762, "grad_norm": 1.2918412685394287, "learning_rate": 9.502536930713659e-06, "loss": 0.7218, "step": 3556 }, { "epoch": 0.5305194078824714, "grad_norm": 1.0826259851455688, "learning_rate": 9.497711238425296e-06, "loss": 0.568, "step": 3557 }, { "epoch": 0.5306685558745665, "grad_norm": 1.0850590467453003, "learning_rate": 9.492885663399465e-06, "loss": 0.6156, "step": 3558 }, { "epoch": 0.5308177038666617, "grad_norm": 1.176753282546997, "learning_rate": 9.488060206762727e-06, "loss": 0.6716, "step": 3559 }, { "epoch": 0.5309668518587568, "grad_norm": 1.038549542427063, "learning_rate": 9.483234869641616e-06, "loss": 0.56, "step": 3560 }, { "epoch": 0.531115999850852, "grad_norm": 0.8350116610527039, "learning_rate": 9.478409653162639e-06, "loss": 0.6634, "step": 3561 }, { "epoch": 0.5312651478429472, "grad_norm": 1.3607068061828613, "learning_rate": 9.473584558452273e-06, "loss": 0.6587, "step": 3562 }, { "epoch": 0.5314142958350423, "grad_norm": 1.2463942766189575, "learning_rate": 9.468759586636963e-06, "loss": 0.6813, "step": 3563 }, { "epoch": 0.5315634438271375, "grad_norm": 1.264937162399292, "learning_rate": 9.463934738843135e-06, "loss": 0.6428, "step": 3564 }, { "epoch": 0.5317125918192326, "grad_norm": 1.2060205936431885, "learning_rate": 9.459110016197184e-06, "loss": 0.6538, "step": 3565 }, { "epoch": 0.5318617398113278, "grad_norm": 1.1665301322937012, "learning_rate": 9.454285419825464e-06, "loss": 0.6521, "step": 3566 }, { "epoch": 0.532010887803423, "grad_norm": 1.1185643672943115, "learning_rate": 9.449460950854315e-06, "loss": 0.6698, "step": 3567 }, { "epoch": 0.5321600357955181, "grad_norm": 1.1986662149429321, "learning_rate": 9.444636610410036e-06, "loss": 0.6696, "step": 3568 }, { "epoch": 0.5323091837876133, "grad_norm": 1.1431660652160645, "learning_rate": 9.439812399618901e-06, "loss": 0.6267, "step": 3569 }, { "epoch": 0.5324583317797084, "grad_norm": 1.1510459184646606, "learning_rate": 9.434988319607153e-06, "loss": 0.6111, "step": 3570 }, { "epoch": 0.5326074797718036, "grad_norm": 1.142813801765442, "learning_rate": 9.430164371501002e-06, "loss": 0.6207, "step": 3571 }, { "epoch": 0.5327566277638988, "grad_norm": 1.0803918838500977, "learning_rate": 9.425340556426635e-06, "loss": 0.5742, "step": 3572 }, { "epoch": 0.5329057757559938, "grad_norm": 1.2199331521987915, "learning_rate": 9.420516875510201e-06, "loss": 0.7163, "step": 3573 }, { "epoch": 0.533054923748089, "grad_norm": 1.1372822523117065, "learning_rate": 9.415693329877818e-06, "loss": 0.6346, "step": 3574 }, { "epoch": 0.5332040717401842, "grad_norm": 1.1204822063446045, "learning_rate": 9.41086992065557e-06, "loss": 0.5946, "step": 3575 }, { "epoch": 0.5333532197322793, "grad_norm": 1.109255075454712, "learning_rate": 9.40604664896952e-06, "loss": 0.5881, "step": 3576 }, { "epoch": 0.5335023677243745, "grad_norm": 1.0735478401184082, "learning_rate": 9.401223515945688e-06, "loss": 0.6263, "step": 3577 }, { "epoch": 0.5336515157164696, "grad_norm": 0.900059700012207, "learning_rate": 9.396400522710066e-06, "loss": 0.7, "step": 3578 }, { "epoch": 0.5338006637085648, "grad_norm": 1.0882220268249512, "learning_rate": 9.391577670388612e-06, "loss": 0.5388, "step": 3579 }, { "epoch": 0.53394981170066, "grad_norm": 1.1495438814163208, "learning_rate": 9.38675496010725e-06, "loss": 0.5831, "step": 3580 }, { "epoch": 0.5340989596927551, "grad_norm": 1.031186580657959, "learning_rate": 9.381932392991874e-06, "loss": 0.5922, "step": 3581 }, { "epoch": 0.5342481076848503, "grad_norm": 1.2105731964111328, "learning_rate": 9.377109970168348e-06, "loss": 0.6676, "step": 3582 }, { "epoch": 0.5343972556769454, "grad_norm": 1.1439993381500244, "learning_rate": 9.372287692762489e-06, "loss": 0.6797, "step": 3583 }, { "epoch": 0.5345464036690406, "grad_norm": 1.1583712100982666, "learning_rate": 9.367465561900097e-06, "loss": 0.6312, "step": 3584 }, { "epoch": 0.5346955516611358, "grad_norm": 1.1027483940124512, "learning_rate": 9.362643578706926e-06, "loss": 0.5456, "step": 3585 }, { "epoch": 0.5348446996532309, "grad_norm": 1.1490662097930908, "learning_rate": 9.357821744308696e-06, "loss": 0.644, "step": 3586 }, { "epoch": 0.5349938476453261, "grad_norm": 1.1251839399337769, "learning_rate": 9.353000059831097e-06, "loss": 0.5677, "step": 3587 }, { "epoch": 0.5351429956374212, "grad_norm": 1.1396478414535522, "learning_rate": 9.348178526399783e-06, "loss": 0.5911, "step": 3588 }, { "epoch": 0.5352921436295164, "grad_norm": 0.8505375981330872, "learning_rate": 9.343357145140368e-06, "loss": 0.6332, "step": 3589 }, { "epoch": 0.5354412916216116, "grad_norm": 1.1713327169418335, "learning_rate": 9.338535917178444e-06, "loss": 0.5769, "step": 3590 }, { "epoch": 0.5355904396137067, "grad_norm": 1.144457459449768, "learning_rate": 9.33371484363955e-06, "loss": 0.6535, "step": 3591 }, { "epoch": 0.5357395876058019, "grad_norm": 1.1148552894592285, "learning_rate": 9.328893925649196e-06, "loss": 0.6133, "step": 3592 }, { "epoch": 0.535888735597897, "grad_norm": 1.0582029819488525, "learning_rate": 9.324073164332861e-06, "loss": 0.5598, "step": 3593 }, { "epoch": 0.5360378835899922, "grad_norm": 1.1903302669525146, "learning_rate": 9.319252560815977e-06, "loss": 0.6525, "step": 3594 }, { "epoch": 0.5361870315820874, "grad_norm": 0.8538107872009277, "learning_rate": 9.31443211622395e-06, "loss": 0.6217, "step": 3595 }, { "epoch": 0.5363361795741824, "grad_norm": 1.113427758216858, "learning_rate": 9.309611831682142e-06, "loss": 0.5465, "step": 3596 }, { "epoch": 0.5364853275662776, "grad_norm": 1.2128769159317017, "learning_rate": 9.304791708315876e-06, "loss": 0.6551, "step": 3597 }, { "epoch": 0.5366344755583728, "grad_norm": 1.2402074337005615, "learning_rate": 9.29997174725044e-06, "loss": 0.6588, "step": 3598 }, { "epoch": 0.5367836235504679, "grad_norm": 1.157031536102295, "learning_rate": 9.295151949611095e-06, "loss": 0.5686, "step": 3599 }, { "epoch": 0.5369327715425631, "grad_norm": 1.1040087938308716, "learning_rate": 9.290332316523043e-06, "loss": 0.4908, "step": 3600 }, { "epoch": 0.5370819195346582, "grad_norm": 1.1869724988937378, "learning_rate": 9.285512849111465e-06, "loss": 0.6293, "step": 3601 }, { "epoch": 0.5372310675267534, "grad_norm": 1.1399543285369873, "learning_rate": 9.28069354850149e-06, "loss": 0.6559, "step": 3602 }, { "epoch": 0.5373802155188486, "grad_norm": 0.8691954612731934, "learning_rate": 9.275874415818222e-06, "loss": 0.7032, "step": 3603 }, { "epoch": 0.5375293635109437, "grad_norm": 1.191177487373352, "learning_rate": 9.271055452186716e-06, "loss": 0.6599, "step": 3604 }, { "epoch": 0.5376785115030389, "grad_norm": 1.2156389951705933, "learning_rate": 9.266236658731985e-06, "loss": 0.6311, "step": 3605 }, { "epoch": 0.537827659495134, "grad_norm": 1.1595264673233032, "learning_rate": 9.261418036579008e-06, "loss": 0.6294, "step": 3606 }, { "epoch": 0.5379768074872292, "grad_norm": 1.0996317863464355, "learning_rate": 9.256599586852731e-06, "loss": 0.6463, "step": 3607 }, { "epoch": 0.5381259554793244, "grad_norm": 1.0471829175949097, "learning_rate": 9.251781310678046e-06, "loss": 0.6082, "step": 3608 }, { "epoch": 0.5382751034714195, "grad_norm": 1.2371078729629517, "learning_rate": 9.246963209179813e-06, "loss": 0.5993, "step": 3609 }, { "epoch": 0.5384242514635147, "grad_norm": 1.1251091957092285, "learning_rate": 9.242145283482848e-06, "loss": 0.6219, "step": 3610 }, { "epoch": 0.5385733994556098, "grad_norm": 1.1395931243896484, "learning_rate": 9.237327534711922e-06, "loss": 0.6621, "step": 3611 }, { "epoch": 0.538722547447705, "grad_norm": 1.129940152168274, "learning_rate": 9.232509963991776e-06, "loss": 0.6644, "step": 3612 }, { "epoch": 0.5388716954398002, "grad_norm": 1.2395613193511963, "learning_rate": 9.2276925724471e-06, "loss": 0.6954, "step": 3613 }, { "epoch": 0.5390208434318953, "grad_norm": 1.1054906845092773, "learning_rate": 9.222875361202546e-06, "loss": 0.5594, "step": 3614 }, { "epoch": 0.5391699914239905, "grad_norm": 1.0405462980270386, "learning_rate": 9.218058331382717e-06, "loss": 0.6326, "step": 3615 }, { "epoch": 0.5393191394160856, "grad_norm": 0.8165274262428284, "learning_rate": 9.213241484112188e-06, "loss": 0.6206, "step": 3616 }, { "epoch": 0.5394682874081808, "grad_norm": 1.1209365129470825, "learning_rate": 9.208424820515478e-06, "loss": 0.6253, "step": 3617 }, { "epoch": 0.539617435400276, "grad_norm": 1.1800175905227661, "learning_rate": 9.203608341717073e-06, "loss": 0.6756, "step": 3618 }, { "epoch": 0.539766583392371, "grad_norm": 1.0325924158096313, "learning_rate": 9.198792048841403e-06, "loss": 0.5698, "step": 3619 }, { "epoch": 0.5399157313844662, "grad_norm": 1.0962759256362915, "learning_rate": 9.19397594301287e-06, "loss": 0.5422, "step": 3620 }, { "epoch": 0.5400648793765614, "grad_norm": 1.0393917560577393, "learning_rate": 9.18916002535582e-06, "loss": 0.603, "step": 3621 }, { "epoch": 0.5402140273686565, "grad_norm": 1.1833280324935913, "learning_rate": 9.184344296994559e-06, "loss": 0.6091, "step": 3622 }, { "epoch": 0.5403631753607517, "grad_norm": 1.1131764650344849, "learning_rate": 9.179528759053355e-06, "loss": 0.6308, "step": 3623 }, { "epoch": 0.5405123233528468, "grad_norm": 1.2207355499267578, "learning_rate": 9.174713412656418e-06, "loss": 0.6636, "step": 3624 }, { "epoch": 0.540661471344942, "grad_norm": 1.0271111726760864, "learning_rate": 9.16989825892793e-06, "loss": 0.5959, "step": 3625 }, { "epoch": 0.5408106193370372, "grad_norm": 1.1853318214416504, "learning_rate": 9.165083298992019e-06, "loss": 0.6844, "step": 3626 }, { "epoch": 0.5409597673291323, "grad_norm": 1.3023149967193604, "learning_rate": 9.160268533972763e-06, "loss": 0.7101, "step": 3627 }, { "epoch": 0.5411089153212275, "grad_norm": 1.0826787948608398, "learning_rate": 9.155453964994202e-06, "loss": 0.6488, "step": 3628 }, { "epoch": 0.5412580633133226, "grad_norm": 1.0227910280227661, "learning_rate": 9.150639593180327e-06, "loss": 0.5598, "step": 3629 }, { "epoch": 0.5414072113054178, "grad_norm": 1.1782565116882324, "learning_rate": 9.145825419655086e-06, "loss": 0.608, "step": 3630 }, { "epoch": 0.541556359297513, "grad_norm": 1.0819591283798218, "learning_rate": 9.141011445542377e-06, "loss": 0.5763, "step": 3631 }, { "epoch": 0.5417055072896081, "grad_norm": 1.1564035415649414, "learning_rate": 9.136197671966058e-06, "loss": 0.6594, "step": 3632 }, { "epoch": 0.5418546552817033, "grad_norm": 1.1305917501449585, "learning_rate": 9.131384100049924e-06, "loss": 0.6374, "step": 3633 }, { "epoch": 0.5420038032737984, "grad_norm": 1.2350025177001953, "learning_rate": 9.126570730917744e-06, "loss": 0.7285, "step": 3634 }, { "epoch": 0.5421529512658936, "grad_norm": 1.1364421844482422, "learning_rate": 9.12175756569323e-06, "loss": 0.6461, "step": 3635 }, { "epoch": 0.5423020992579888, "grad_norm": 1.1051145792007446, "learning_rate": 9.116944605500041e-06, "loss": 0.6083, "step": 3636 }, { "epoch": 0.5424512472500839, "grad_norm": 1.1356216669082642, "learning_rate": 9.1121318514618e-06, "loss": 0.6261, "step": 3637 }, { "epoch": 0.5426003952421791, "grad_norm": 1.1888740062713623, "learning_rate": 9.10731930470207e-06, "loss": 0.666, "step": 3638 }, { "epoch": 0.5427495432342742, "grad_norm": 1.0054242610931396, "learning_rate": 9.10250696634437e-06, "loss": 0.5235, "step": 3639 }, { "epoch": 0.5428986912263694, "grad_norm": 1.2801896333694458, "learning_rate": 9.097694837512175e-06, "loss": 0.6118, "step": 3640 }, { "epoch": 0.5430478392184646, "grad_norm": 1.0837514400482178, "learning_rate": 9.092882919328901e-06, "loss": 0.5587, "step": 3641 }, { "epoch": 0.5431969872105596, "grad_norm": 1.062689185142517, "learning_rate": 9.08807121291793e-06, "loss": 0.5951, "step": 3642 }, { "epoch": 0.5433461352026548, "grad_norm": 1.0429446697235107, "learning_rate": 9.083259719402583e-06, "loss": 0.6196, "step": 3643 }, { "epoch": 0.54349528319475, "grad_norm": 0.8925557732582092, "learning_rate": 9.07844843990613e-06, "loss": 0.6591, "step": 3644 }, { "epoch": 0.5436444311868451, "grad_norm": 1.147267460823059, "learning_rate": 9.0736373755518e-06, "loss": 0.5824, "step": 3645 }, { "epoch": 0.5437935791789403, "grad_norm": 1.0179404020309448, "learning_rate": 9.068826527462766e-06, "loss": 0.5325, "step": 3646 }, { "epoch": 0.5439427271710354, "grad_norm": 1.077741026878357, "learning_rate": 9.064015896762146e-06, "loss": 0.6225, "step": 3647 }, { "epoch": 0.5440918751631306, "grad_norm": 1.2130661010742188, "learning_rate": 9.059205484573019e-06, "loss": 0.6366, "step": 3648 }, { "epoch": 0.5442410231552258, "grad_norm": 1.1442148685455322, "learning_rate": 9.054395292018402e-06, "loss": 0.6444, "step": 3649 }, { "epoch": 0.5443901711473209, "grad_norm": 1.0480005741119385, "learning_rate": 9.049585320221266e-06, "loss": 0.5907, "step": 3650 }, { "epoch": 0.5445393191394161, "grad_norm": 1.0259028673171997, "learning_rate": 9.044775570304534e-06, "loss": 0.4998, "step": 3651 }, { "epoch": 0.5446884671315112, "grad_norm": 1.1001945734024048, "learning_rate": 9.03996604339107e-06, "loss": 0.6424, "step": 3652 }, { "epoch": 0.5448376151236064, "grad_norm": 1.1290788650512695, "learning_rate": 9.035156740603689e-06, "loss": 0.536, "step": 3653 }, { "epoch": 0.5449867631157016, "grad_norm": 1.2440533638000488, "learning_rate": 9.030347663065152e-06, "loss": 0.5918, "step": 3654 }, { "epoch": 0.5451359111077967, "grad_norm": 1.2562568187713623, "learning_rate": 9.025538811898172e-06, "loss": 0.6188, "step": 3655 }, { "epoch": 0.5452850590998919, "grad_norm": 1.0588583946228027, "learning_rate": 9.020730188225405e-06, "loss": 0.5182, "step": 3656 }, { "epoch": 0.545434207091987, "grad_norm": 1.0992182493209839, "learning_rate": 9.015921793169455e-06, "loss": 0.5827, "step": 3657 }, { "epoch": 0.5455833550840822, "grad_norm": 1.1457178592681885, "learning_rate": 9.01111362785287e-06, "loss": 0.6397, "step": 3658 }, { "epoch": 0.5457325030761774, "grad_norm": 1.2496800422668457, "learning_rate": 9.006305693398148e-06, "loss": 0.6249, "step": 3659 }, { "epoch": 0.5458816510682725, "grad_norm": 1.127684473991394, "learning_rate": 9.001497990927738e-06, "loss": 0.6003, "step": 3660 }, { "epoch": 0.5460307990603677, "grad_norm": 1.0588269233703613, "learning_rate": 8.996690521564021e-06, "loss": 0.5448, "step": 3661 }, { "epoch": 0.5461799470524628, "grad_norm": 1.1883889436721802, "learning_rate": 8.991883286429337e-06, "loss": 0.6288, "step": 3662 }, { "epoch": 0.546329095044558, "grad_norm": 1.0803335905075073, "learning_rate": 8.987076286645965e-06, "loss": 0.539, "step": 3663 }, { "epoch": 0.5464782430366532, "grad_norm": 1.2336441278457642, "learning_rate": 8.982269523336126e-06, "loss": 0.6068, "step": 3664 }, { "epoch": 0.5466273910287482, "grad_norm": 1.1852455139160156, "learning_rate": 8.977462997621994e-06, "loss": 0.7026, "step": 3665 }, { "epoch": 0.5467765390208434, "grad_norm": 1.147918939590454, "learning_rate": 8.972656710625682e-06, "loss": 0.6567, "step": 3666 }, { "epoch": 0.5469256870129385, "grad_norm": 1.274816632270813, "learning_rate": 8.967850663469248e-06, "loss": 0.7304, "step": 3667 }, { "epoch": 0.5470748350050337, "grad_norm": 1.042371153831482, "learning_rate": 8.963044857274691e-06, "loss": 0.5601, "step": 3668 }, { "epoch": 0.5472239829971289, "grad_norm": 1.1387884616851807, "learning_rate": 8.958239293163966e-06, "loss": 0.6265, "step": 3669 }, { "epoch": 0.547373130989224, "grad_norm": 1.1073968410491943, "learning_rate": 8.953433972258955e-06, "loss": 0.6277, "step": 3670 }, { "epoch": 0.5475222789813192, "grad_norm": 1.0447553396224976, "learning_rate": 8.948628895681498e-06, "loss": 0.5795, "step": 3671 }, { "epoch": 0.5476714269734144, "grad_norm": 0.8719123005867004, "learning_rate": 8.943824064553361e-06, "loss": 0.6453, "step": 3672 }, { "epoch": 0.5478205749655095, "grad_norm": 1.2186036109924316, "learning_rate": 8.939019479996272e-06, "loss": 0.6974, "step": 3673 }, { "epoch": 0.5479697229576047, "grad_norm": 1.1304644346237183, "learning_rate": 8.934215143131891e-06, "loss": 0.6201, "step": 3674 }, { "epoch": 0.5481188709496998, "grad_norm": 1.2019449472427368, "learning_rate": 8.929411055081812e-06, "loss": 0.6733, "step": 3675 }, { "epoch": 0.548268018941795, "grad_norm": 1.146378993988037, "learning_rate": 8.924607216967588e-06, "loss": 0.661, "step": 3676 }, { "epoch": 0.5484171669338902, "grad_norm": 1.0302261114120483, "learning_rate": 8.919803629910709e-06, "loss": 0.5275, "step": 3677 }, { "epoch": 0.5485663149259853, "grad_norm": 0.8535958528518677, "learning_rate": 8.915000295032594e-06, "loss": 0.6318, "step": 3678 }, { "epoch": 0.5487154629180805, "grad_norm": 1.2166374921798706, "learning_rate": 8.910197213454622e-06, "loss": 0.6223, "step": 3679 }, { "epoch": 0.5488646109101756, "grad_norm": 1.0890127420425415, "learning_rate": 8.905394386298098e-06, "loss": 0.5339, "step": 3680 }, { "epoch": 0.5490137589022708, "grad_norm": 1.1683578491210938, "learning_rate": 8.900591814684269e-06, "loss": 0.6735, "step": 3681 }, { "epoch": 0.549162906894366, "grad_norm": 1.2114685773849487, "learning_rate": 8.895789499734335e-06, "loss": 0.6365, "step": 3682 }, { "epoch": 0.5493120548864611, "grad_norm": 1.1153218746185303, "learning_rate": 8.890987442569419e-06, "loss": 0.6109, "step": 3683 }, { "epoch": 0.5494612028785563, "grad_norm": 1.1337571144104004, "learning_rate": 8.886185644310597e-06, "loss": 0.5899, "step": 3684 }, { "epoch": 0.5496103508706514, "grad_norm": 1.1764247417449951, "learning_rate": 8.881384106078875e-06, "loss": 0.6342, "step": 3685 }, { "epoch": 0.5497594988627466, "grad_norm": 1.2455899715423584, "learning_rate": 8.876582828995211e-06, "loss": 0.6717, "step": 3686 }, { "epoch": 0.5499086468548418, "grad_norm": 1.1627331972122192, "learning_rate": 8.871781814180486e-06, "loss": 0.6057, "step": 3687 }, { "epoch": 0.5500577948469368, "grad_norm": 1.2122199535369873, "learning_rate": 8.866981062755532e-06, "loss": 0.6512, "step": 3688 }, { "epoch": 0.550206942839032, "grad_norm": 1.2025703191757202, "learning_rate": 8.862180575841112e-06, "loss": 0.5582, "step": 3689 }, { "epoch": 0.5503560908311271, "grad_norm": 1.0538201332092285, "learning_rate": 8.857380354557937e-06, "loss": 0.5814, "step": 3690 }, { "epoch": 0.5505052388232223, "grad_norm": 1.0956900119781494, "learning_rate": 8.85258040002664e-06, "loss": 0.5831, "step": 3691 }, { "epoch": 0.5506543868153175, "grad_norm": 1.2007699012756348, "learning_rate": 8.847780713367808e-06, "loss": 0.6541, "step": 3692 }, { "epoch": 0.5508035348074126, "grad_norm": 1.17496919631958, "learning_rate": 8.842981295701956e-06, "loss": 0.5432, "step": 3693 }, { "epoch": 0.5509526827995078, "grad_norm": 1.1801514625549316, "learning_rate": 8.838182148149537e-06, "loss": 0.6217, "step": 3694 }, { "epoch": 0.551101830791603, "grad_norm": 1.1854795217514038, "learning_rate": 8.833383271830946e-06, "loss": 0.571, "step": 3695 }, { "epoch": 0.5512509787836981, "grad_norm": 1.2110098600387573, "learning_rate": 8.828584667866514e-06, "loss": 0.6235, "step": 3696 }, { "epoch": 0.5514001267757933, "grad_norm": 1.0800281763076782, "learning_rate": 8.8237863373765e-06, "loss": 0.6065, "step": 3697 }, { "epoch": 0.5515492747678884, "grad_norm": 1.0904148817062378, "learning_rate": 8.818988281481109e-06, "loss": 0.5461, "step": 3698 }, { "epoch": 0.5516984227599836, "grad_norm": 1.08951997756958, "learning_rate": 8.814190501300475e-06, "loss": 0.5727, "step": 3699 }, { "epoch": 0.5518475707520788, "grad_norm": 1.1515381336212158, "learning_rate": 8.809392997954673e-06, "loss": 0.6072, "step": 3700 }, { "epoch": 0.5519967187441739, "grad_norm": 1.1118584871292114, "learning_rate": 8.80459577256371e-06, "loss": 0.6791, "step": 3701 }, { "epoch": 0.5521458667362691, "grad_norm": 1.1066385507583618, "learning_rate": 8.799798826247526e-06, "loss": 0.6115, "step": 3702 }, { "epoch": 0.5522950147283642, "grad_norm": 1.3113312721252441, "learning_rate": 8.795002160126002e-06, "loss": 0.7241, "step": 3703 }, { "epoch": 0.5524441627204594, "grad_norm": 1.1351836919784546, "learning_rate": 8.790205775318952e-06, "loss": 0.6016, "step": 3704 }, { "epoch": 0.5525933107125546, "grad_norm": 1.1264153718948364, "learning_rate": 8.785409672946123e-06, "loss": 0.5917, "step": 3705 }, { "epoch": 0.5527424587046497, "grad_norm": 1.2174850702285767, "learning_rate": 8.78061385412719e-06, "loss": 0.6336, "step": 3706 }, { "epoch": 0.5528916066967449, "grad_norm": 1.1798094511032104, "learning_rate": 8.775818319981776e-06, "loss": 0.5691, "step": 3707 }, { "epoch": 0.55304075468884, "grad_norm": 1.1622788906097412, "learning_rate": 8.77102307162942e-06, "loss": 0.6495, "step": 3708 }, { "epoch": 0.5531899026809352, "grad_norm": 1.27125084400177, "learning_rate": 8.76622811018961e-06, "loss": 0.6818, "step": 3709 }, { "epoch": 0.5533390506730304, "grad_norm": 1.1812429428100586, "learning_rate": 8.76143343678176e-06, "loss": 0.6361, "step": 3710 }, { "epoch": 0.5534881986651254, "grad_norm": 1.1294745206832886, "learning_rate": 8.756639052525213e-06, "loss": 0.634, "step": 3711 }, { "epoch": 0.5536373466572206, "grad_norm": 1.131447434425354, "learning_rate": 8.751844958539251e-06, "loss": 0.5971, "step": 3712 }, { "epoch": 0.5537864946493157, "grad_norm": 1.2105692625045776, "learning_rate": 8.747051155943091e-06, "loss": 0.6121, "step": 3713 }, { "epoch": 0.5539356426414109, "grad_norm": 1.248047947883606, "learning_rate": 8.74225764585587e-06, "loss": 0.7422, "step": 3714 }, { "epoch": 0.5540847906335061, "grad_norm": 1.1628419160842896, "learning_rate": 8.737464429396668e-06, "loss": 0.648, "step": 3715 }, { "epoch": 0.5542339386256012, "grad_norm": 1.183563470840454, "learning_rate": 8.73267150768449e-06, "loss": 0.5901, "step": 3716 }, { "epoch": 0.5543830866176964, "grad_norm": 1.2117198705673218, "learning_rate": 8.727878881838273e-06, "loss": 0.6579, "step": 3717 }, { "epoch": 0.5545322346097916, "grad_norm": 1.3520736694335938, "learning_rate": 8.72308655297689e-06, "loss": 0.6848, "step": 3718 }, { "epoch": 0.5546813826018867, "grad_norm": 1.1715087890625, "learning_rate": 8.718294522219137e-06, "loss": 0.6055, "step": 3719 }, { "epoch": 0.5548305305939819, "grad_norm": 1.1534558534622192, "learning_rate": 8.713502790683743e-06, "loss": 0.572, "step": 3720 }, { "epoch": 0.554979678586077, "grad_norm": 0.9281258583068848, "learning_rate": 8.708711359489377e-06, "loss": 0.644, "step": 3721 }, { "epoch": 0.5551288265781722, "grad_norm": 1.1817066669464111, "learning_rate": 8.703920229754624e-06, "loss": 0.6221, "step": 3722 }, { "epoch": 0.5552779745702674, "grad_norm": 1.1275975704193115, "learning_rate": 8.699129402598001e-06, "loss": 0.6142, "step": 3723 }, { "epoch": 0.5554271225623625, "grad_norm": 1.0488560199737549, "learning_rate": 8.694338879137962e-06, "loss": 0.546, "step": 3724 }, { "epoch": 0.5555762705544577, "grad_norm": 1.1548465490341187, "learning_rate": 8.689548660492882e-06, "loss": 0.6041, "step": 3725 }, { "epoch": 0.5557254185465528, "grad_norm": 1.3608218431472778, "learning_rate": 8.684758747781073e-06, "loss": 0.7052, "step": 3726 }, { "epoch": 0.555874566538648, "grad_norm": 1.2237476110458374, "learning_rate": 8.679969142120765e-06, "loss": 0.6178, "step": 3727 }, { "epoch": 0.5560237145307432, "grad_norm": 1.245278000831604, "learning_rate": 8.675179844630125e-06, "loss": 0.6371, "step": 3728 }, { "epoch": 0.5561728625228383, "grad_norm": 1.1623404026031494, "learning_rate": 8.670390856427242e-06, "loss": 0.6306, "step": 3729 }, { "epoch": 0.5563220105149335, "grad_norm": 1.168493628501892, "learning_rate": 8.665602178630146e-06, "loss": 0.6397, "step": 3730 }, { "epoch": 0.5564711585070286, "grad_norm": 1.1207616329193115, "learning_rate": 8.660813812356773e-06, "loss": 0.5238, "step": 3731 }, { "epoch": 0.5566203064991238, "grad_norm": 1.2248669862747192, "learning_rate": 8.656025758725004e-06, "loss": 0.6488, "step": 3732 }, { "epoch": 0.556769454491219, "grad_norm": 1.138083815574646, "learning_rate": 8.651238018852638e-06, "loss": 0.6096, "step": 3733 }, { "epoch": 0.556918602483314, "grad_norm": 1.1909239292144775, "learning_rate": 8.646450593857407e-06, "loss": 0.6993, "step": 3734 }, { "epoch": 0.5570677504754092, "grad_norm": 1.1205322742462158, "learning_rate": 8.641663484856964e-06, "loss": 0.5375, "step": 3735 }, { "epoch": 0.5572168984675043, "grad_norm": 1.2751944065093994, "learning_rate": 8.636876692968887e-06, "loss": 0.6173, "step": 3736 }, { "epoch": 0.5573660464595995, "grad_norm": 1.0656242370605469, "learning_rate": 8.632090219310688e-06, "loss": 0.5954, "step": 3737 }, { "epoch": 0.5575151944516947, "grad_norm": 1.145231008529663, "learning_rate": 8.627304064999798e-06, "loss": 0.625, "step": 3738 }, { "epoch": 0.5576643424437898, "grad_norm": 0.8480390906333923, "learning_rate": 8.622518231153574e-06, "loss": 0.663, "step": 3739 }, { "epoch": 0.557813490435885, "grad_norm": 1.215307593345642, "learning_rate": 8.617732718889305e-06, "loss": 0.6044, "step": 3740 }, { "epoch": 0.5579626384279802, "grad_norm": 1.0882854461669922, "learning_rate": 8.612947529324196e-06, "loss": 0.5932, "step": 3741 }, { "epoch": 0.5581117864200753, "grad_norm": 1.206236481666565, "learning_rate": 8.608162663575378e-06, "loss": 0.6792, "step": 3742 }, { "epoch": 0.5582609344121705, "grad_norm": 1.1890778541564941, "learning_rate": 8.603378122759912e-06, "loss": 0.587, "step": 3743 }, { "epoch": 0.5584100824042656, "grad_norm": 1.2114545106887817, "learning_rate": 8.598593907994778e-06, "loss": 0.6774, "step": 3744 }, { "epoch": 0.5585592303963608, "grad_norm": 1.044012427330017, "learning_rate": 8.593810020396882e-06, "loss": 0.551, "step": 3745 }, { "epoch": 0.558708378388456, "grad_norm": 1.097816824913025, "learning_rate": 8.58902646108305e-06, "loss": 0.5771, "step": 3746 }, { "epoch": 0.5588575263805511, "grad_norm": 1.1836237907409668, "learning_rate": 8.584243231170042e-06, "loss": 0.6862, "step": 3747 }, { "epoch": 0.5590066743726463, "grad_norm": 1.1471501588821411, "learning_rate": 8.579460331774529e-06, "loss": 0.6056, "step": 3748 }, { "epoch": 0.5591558223647414, "grad_norm": 0.8958494067192078, "learning_rate": 8.57467776401311e-06, "loss": 0.6759, "step": 3749 }, { "epoch": 0.5593049703568366, "grad_norm": 1.1144005060195923, "learning_rate": 8.569895529002305e-06, "loss": 0.5032, "step": 3750 }, { "epoch": 0.5594541183489318, "grad_norm": 1.1269437074661255, "learning_rate": 8.565113627858562e-06, "loss": 0.6269, "step": 3751 }, { "epoch": 0.5596032663410269, "grad_norm": 1.1364997625350952, "learning_rate": 8.560332061698242e-06, "loss": 0.6346, "step": 3752 }, { "epoch": 0.5597524143331221, "grad_norm": 1.1448532342910767, "learning_rate": 8.55555083163763e-06, "loss": 0.577, "step": 3753 }, { "epoch": 0.5599015623252172, "grad_norm": 1.1328831911087036, "learning_rate": 8.550769938792943e-06, "loss": 0.6059, "step": 3754 }, { "epoch": 0.5600507103173124, "grad_norm": 1.0258458852767944, "learning_rate": 8.5459893842803e-06, "loss": 0.5408, "step": 3755 }, { "epoch": 0.5601998583094076, "grad_norm": 1.0509518384933472, "learning_rate": 8.54120916921576e-06, "loss": 0.5242, "step": 3756 }, { "epoch": 0.5603490063015026, "grad_norm": 1.2065634727478027, "learning_rate": 8.536429294715296e-06, "loss": 0.6027, "step": 3757 }, { "epoch": 0.5604981542935978, "grad_norm": 1.1341465711593628, "learning_rate": 8.5316497618948e-06, "loss": 0.6094, "step": 3758 }, { "epoch": 0.5606473022856929, "grad_norm": 1.1319752931594849, "learning_rate": 8.526870571870077e-06, "loss": 0.6283, "step": 3759 }, { "epoch": 0.5607964502777881, "grad_norm": 1.0760676860809326, "learning_rate": 8.522091725756868e-06, "loss": 0.6345, "step": 3760 }, { "epoch": 0.5609455982698833, "grad_norm": 1.192779302597046, "learning_rate": 8.51731322467082e-06, "loss": 0.583, "step": 3761 }, { "epoch": 0.5610947462619784, "grad_norm": 1.2683593034744263, "learning_rate": 8.51253506972751e-06, "loss": 0.6792, "step": 3762 }, { "epoch": 0.5612438942540736, "grad_norm": 1.171372890472412, "learning_rate": 8.507757262042423e-06, "loss": 0.6034, "step": 3763 }, { "epoch": 0.5613930422461688, "grad_norm": 0.8997403979301453, "learning_rate": 8.502979802730968e-06, "loss": 0.6542, "step": 3764 }, { "epoch": 0.5615421902382639, "grad_norm": 1.0456258058547974, "learning_rate": 8.49820269290848e-06, "loss": 0.5117, "step": 3765 }, { "epoch": 0.5616913382303591, "grad_norm": 1.0778394937515259, "learning_rate": 8.493425933690205e-06, "loss": 0.662, "step": 3766 }, { "epoch": 0.5618404862224542, "grad_norm": 1.0551354885101318, "learning_rate": 8.488649526191303e-06, "loss": 0.6214, "step": 3767 }, { "epoch": 0.5619896342145494, "grad_norm": 1.1306418180465698, "learning_rate": 8.483873471526865e-06, "loss": 0.6219, "step": 3768 }, { "epoch": 0.5621387822066446, "grad_norm": 1.0734412670135498, "learning_rate": 8.479097770811881e-06, "loss": 0.5758, "step": 3769 }, { "epoch": 0.5622879301987397, "grad_norm": 1.052053451538086, "learning_rate": 8.474322425161279e-06, "loss": 0.6339, "step": 3770 }, { "epoch": 0.5624370781908349, "grad_norm": 0.8805792331695557, "learning_rate": 8.469547435689888e-06, "loss": 0.6935, "step": 3771 }, { "epoch": 0.56258622618293, "grad_norm": 1.151658296585083, "learning_rate": 8.464772803512458e-06, "loss": 0.6353, "step": 3772 }, { "epoch": 0.5627353741750252, "grad_norm": 1.1787053346633911, "learning_rate": 8.459998529743661e-06, "loss": 0.6599, "step": 3773 }, { "epoch": 0.5628845221671204, "grad_norm": 1.314498782157898, "learning_rate": 8.455224615498086e-06, "loss": 0.6534, "step": 3774 }, { "epoch": 0.5630336701592155, "grad_norm": 1.0560548305511475, "learning_rate": 8.450451061890228e-06, "loss": 0.6049, "step": 3775 }, { "epoch": 0.5631828181513107, "grad_norm": 0.8265531659126282, "learning_rate": 8.445677870034506e-06, "loss": 0.6355, "step": 3776 }, { "epoch": 0.5633319661434057, "grad_norm": 1.2182379961013794, "learning_rate": 8.440905041045253e-06, "loss": 0.5937, "step": 3777 }, { "epoch": 0.563481114135501, "grad_norm": 1.1213334798812866, "learning_rate": 8.43613257603671e-06, "loss": 0.6194, "step": 3778 }, { "epoch": 0.5636302621275961, "grad_norm": 1.108445405960083, "learning_rate": 8.43136047612305e-06, "loss": 0.6102, "step": 3779 }, { "epoch": 0.5637794101196912, "grad_norm": 1.306890606880188, "learning_rate": 8.426588742418343e-06, "loss": 0.6599, "step": 3780 }, { "epoch": 0.5639285581117864, "grad_norm": 1.1372454166412354, "learning_rate": 8.421817376036578e-06, "loss": 0.5614, "step": 3781 }, { "epoch": 0.5640777061038815, "grad_norm": 1.1105612516403198, "learning_rate": 8.417046378091674e-06, "loss": 0.5611, "step": 3782 }, { "epoch": 0.5642268540959767, "grad_norm": 1.1346150636672974, "learning_rate": 8.41227574969744e-06, "loss": 0.6615, "step": 3783 }, { "epoch": 0.5643760020880719, "grad_norm": 1.2107160091400146, "learning_rate": 8.40750549196761e-06, "loss": 0.6441, "step": 3784 }, { "epoch": 0.564525150080167, "grad_norm": 1.1584985256195068, "learning_rate": 8.40273560601584e-06, "loss": 0.6494, "step": 3785 }, { "epoch": 0.5646742980722622, "grad_norm": 1.06426203250885, "learning_rate": 8.397966092955678e-06, "loss": 0.6081, "step": 3786 }, { "epoch": 0.5648234460643573, "grad_norm": 1.1765871047973633, "learning_rate": 8.39319695390061e-06, "loss": 0.6427, "step": 3787 }, { "epoch": 0.5649725940564525, "grad_norm": 1.1073169708251953, "learning_rate": 8.388428189964014e-06, "loss": 0.6248, "step": 3788 }, { "epoch": 0.5651217420485477, "grad_norm": 0.9215810894966125, "learning_rate": 8.383659802259187e-06, "loss": 0.6811, "step": 3789 }, { "epoch": 0.5652708900406428, "grad_norm": 1.0800864696502686, "learning_rate": 8.378891791899343e-06, "loss": 0.5665, "step": 3790 }, { "epoch": 0.565420038032738, "grad_norm": 1.1160476207733154, "learning_rate": 8.37412415999761e-06, "loss": 0.6283, "step": 3791 }, { "epoch": 0.5655691860248332, "grad_norm": 1.2748377323150635, "learning_rate": 8.369356907667013e-06, "loss": 0.6455, "step": 3792 }, { "epoch": 0.5657183340169283, "grad_norm": 1.3462668657302856, "learning_rate": 8.364590036020503e-06, "loss": 0.6247, "step": 3793 }, { "epoch": 0.5658674820090235, "grad_norm": 1.152126431465149, "learning_rate": 8.359823546170936e-06, "loss": 0.6466, "step": 3794 }, { "epoch": 0.5660166300011186, "grad_norm": 1.1569666862487793, "learning_rate": 8.355057439231078e-06, "loss": 0.5903, "step": 3795 }, { "epoch": 0.5661657779932138, "grad_norm": 0.8614338636398315, "learning_rate": 8.35029171631361e-06, "loss": 0.6675, "step": 3796 }, { "epoch": 0.566314925985309, "grad_norm": 1.0310933589935303, "learning_rate": 8.345526378531117e-06, "loss": 0.5749, "step": 3797 }, { "epoch": 0.5664640739774041, "grad_norm": 1.1697591543197632, "learning_rate": 8.3407614269961e-06, "loss": 0.5878, "step": 3798 }, { "epoch": 0.5666132219694993, "grad_norm": 1.204111933708191, "learning_rate": 8.335996862820964e-06, "loss": 0.6086, "step": 3799 }, { "epoch": 0.5667623699615943, "grad_norm": 1.1141735315322876, "learning_rate": 8.331232687118035e-06, "loss": 0.5553, "step": 3800 }, { "epoch": 0.5669115179536895, "grad_norm": 1.1724704504013062, "learning_rate": 8.326468900999532e-06, "loss": 0.703, "step": 3801 }, { "epoch": 0.5670606659457847, "grad_norm": 1.2266862392425537, "learning_rate": 8.321705505577597e-06, "loss": 0.5889, "step": 3802 }, { "epoch": 0.5672098139378798, "grad_norm": 1.1432788372039795, "learning_rate": 8.31694250196427e-06, "loss": 0.622, "step": 3803 }, { "epoch": 0.567358961929975, "grad_norm": 1.1431881189346313, "learning_rate": 8.312179891271512e-06, "loss": 0.5769, "step": 3804 }, { "epoch": 0.5675081099220701, "grad_norm": 1.2056248188018799, "learning_rate": 8.30741767461118e-06, "loss": 0.6592, "step": 3805 }, { "epoch": 0.5676572579141653, "grad_norm": 1.0613341331481934, "learning_rate": 8.302655853095043e-06, "loss": 0.5406, "step": 3806 }, { "epoch": 0.5678064059062605, "grad_norm": 1.1231955289840698, "learning_rate": 8.297894427834777e-06, "loss": 0.5607, "step": 3807 }, { "epoch": 0.5679555538983556, "grad_norm": 1.1205803155899048, "learning_rate": 8.293133399941977e-06, "loss": 0.5375, "step": 3808 }, { "epoch": 0.5681047018904508, "grad_norm": 1.0987225770950317, "learning_rate": 8.288372770528125e-06, "loss": 0.5532, "step": 3809 }, { "epoch": 0.5682538498825459, "grad_norm": 1.1043227910995483, "learning_rate": 8.283612540704628e-06, "loss": 0.5873, "step": 3810 }, { "epoch": 0.5684029978746411, "grad_norm": 1.1855496168136597, "learning_rate": 8.27885271158279e-06, "loss": 0.6383, "step": 3811 }, { "epoch": 0.5685521458667363, "grad_norm": 1.1050951480865479, "learning_rate": 8.274093284273819e-06, "loss": 0.5725, "step": 3812 }, { "epoch": 0.5687012938588314, "grad_norm": 1.206198811531067, "learning_rate": 8.26933425988884e-06, "loss": 0.6136, "step": 3813 }, { "epoch": 0.5688504418509266, "grad_norm": 1.1202706098556519, "learning_rate": 8.264575639538873e-06, "loss": 0.5674, "step": 3814 }, { "epoch": 0.5689995898430218, "grad_norm": 1.1725823879241943, "learning_rate": 8.259817424334851e-06, "loss": 0.6128, "step": 3815 }, { "epoch": 0.5691487378351169, "grad_norm": 1.2654056549072266, "learning_rate": 8.255059615387606e-06, "loss": 0.7271, "step": 3816 }, { "epoch": 0.5692978858272121, "grad_norm": 1.1507766246795654, "learning_rate": 8.250302213807886e-06, "loss": 0.6652, "step": 3817 }, { "epoch": 0.5694470338193072, "grad_norm": 1.0962694883346558, "learning_rate": 8.245545220706334e-06, "loss": 0.6251, "step": 3818 }, { "epoch": 0.5695961818114024, "grad_norm": 1.0910288095474243, "learning_rate": 8.2407886371935e-06, "loss": 0.5579, "step": 3819 }, { "epoch": 0.5697453298034976, "grad_norm": 1.0547477006912231, "learning_rate": 8.236032464379838e-06, "loss": 0.5074, "step": 3820 }, { "epoch": 0.5698944777955927, "grad_norm": 1.0956308841705322, "learning_rate": 8.231276703375708e-06, "loss": 0.4323, "step": 3821 }, { "epoch": 0.5700436257876879, "grad_norm": 1.071157693862915, "learning_rate": 8.226521355291372e-06, "loss": 0.5942, "step": 3822 }, { "epoch": 0.570192773779783, "grad_norm": 1.080344319343567, "learning_rate": 8.221766421237e-06, "loss": 0.5705, "step": 3823 }, { "epoch": 0.5703419217718781, "grad_norm": 1.234458088874817, "learning_rate": 8.217011902322656e-06, "loss": 0.6342, "step": 3824 }, { "epoch": 0.5704910697639733, "grad_norm": 1.2787678241729736, "learning_rate": 8.212257799658315e-06, "loss": 0.5875, "step": 3825 }, { "epoch": 0.5706402177560684, "grad_norm": 1.1371394395828247, "learning_rate": 8.207504114353854e-06, "loss": 0.5789, "step": 3826 }, { "epoch": 0.5707893657481636, "grad_norm": 1.1868544816970825, "learning_rate": 8.202750847519055e-06, "loss": 0.6642, "step": 3827 }, { "epoch": 0.5709385137402587, "grad_norm": 1.188139796257019, "learning_rate": 8.197998000263591e-06, "loss": 0.6141, "step": 3828 }, { "epoch": 0.5710876617323539, "grad_norm": 1.265506386756897, "learning_rate": 8.193245573697051e-06, "loss": 0.5887, "step": 3829 }, { "epoch": 0.5712368097244491, "grad_norm": 1.1749922037124634, "learning_rate": 8.188493568928916e-06, "loss": 0.576, "step": 3830 }, { "epoch": 0.5713859577165442, "grad_norm": 1.0286263227462769, "learning_rate": 8.18374198706857e-06, "loss": 0.4944, "step": 3831 }, { "epoch": 0.5715351057086394, "grad_norm": 1.0586867332458496, "learning_rate": 8.178990829225308e-06, "loss": 0.4987, "step": 3832 }, { "epoch": 0.5716842537007345, "grad_norm": 1.1669528484344482, "learning_rate": 8.17424009650831e-06, "loss": 0.6672, "step": 3833 }, { "epoch": 0.5718334016928297, "grad_norm": 1.1644419431686401, "learning_rate": 8.169489790026664e-06, "loss": 0.5606, "step": 3834 }, { "epoch": 0.5719825496849249, "grad_norm": 1.150164246559143, "learning_rate": 8.16473991088937e-06, "loss": 0.6667, "step": 3835 }, { "epoch": 0.57213169767702, "grad_norm": 1.0686770677566528, "learning_rate": 8.159990460205312e-06, "loss": 0.591, "step": 3836 }, { "epoch": 0.5722808456691152, "grad_norm": 1.2003039121627808, "learning_rate": 8.155241439083277e-06, "loss": 0.6158, "step": 3837 }, { "epoch": 0.5724299936612104, "grad_norm": 1.1814144849777222, "learning_rate": 8.150492848631958e-06, "loss": 0.6589, "step": 3838 }, { "epoch": 0.5725791416533055, "grad_norm": 1.0650376081466675, "learning_rate": 8.14574468995994e-06, "loss": 0.5323, "step": 3839 }, { "epoch": 0.5727282896454007, "grad_norm": 1.0385091304779053, "learning_rate": 8.140996964175716e-06, "loss": 0.5331, "step": 3840 }, { "epoch": 0.5728774376374958, "grad_norm": 1.0932166576385498, "learning_rate": 8.136249672387673e-06, "loss": 0.5953, "step": 3841 }, { "epoch": 0.573026585629591, "grad_norm": 1.1057945489883423, "learning_rate": 8.131502815704087e-06, "loss": 0.5881, "step": 3842 }, { "epoch": 0.5731757336216862, "grad_norm": 1.0978939533233643, "learning_rate": 8.126756395233154e-06, "loss": 0.6145, "step": 3843 }, { "epoch": 0.5733248816137813, "grad_norm": 0.9274131059646606, "learning_rate": 8.122010412082952e-06, "loss": 0.6501, "step": 3844 }, { "epoch": 0.5734740296058765, "grad_norm": 1.1548324823379517, "learning_rate": 8.117264867361461e-06, "loss": 0.6987, "step": 3845 }, { "epoch": 0.5736231775979715, "grad_norm": 1.2023895978927612, "learning_rate": 8.112519762176559e-06, "loss": 0.6371, "step": 3846 }, { "epoch": 0.5737723255900667, "grad_norm": 1.1649556159973145, "learning_rate": 8.107775097636023e-06, "loss": 0.6636, "step": 3847 }, { "epoch": 0.573921473582162, "grad_norm": 1.2455049753189087, "learning_rate": 8.103030874847521e-06, "loss": 0.6133, "step": 3848 }, { "epoch": 0.574070621574257, "grad_norm": 1.161386489868164, "learning_rate": 8.098287094918625e-06, "loss": 0.5735, "step": 3849 }, { "epoch": 0.5742197695663522, "grad_norm": 1.1161298751831055, "learning_rate": 8.093543758956802e-06, "loss": 0.6191, "step": 3850 }, { "epoch": 0.5743689175584473, "grad_norm": 1.1454282999038696, "learning_rate": 8.088800868069406e-06, "loss": 0.6192, "step": 3851 }, { "epoch": 0.5745180655505425, "grad_norm": 1.1644244194030762, "learning_rate": 8.084058423363709e-06, "loss": 0.6265, "step": 3852 }, { "epoch": 0.5746672135426377, "grad_norm": 1.272648572921753, "learning_rate": 8.079316425946858e-06, "loss": 0.6688, "step": 3853 }, { "epoch": 0.5748163615347328, "grad_norm": 1.1224762201309204, "learning_rate": 8.0745748769259e-06, "loss": 0.6367, "step": 3854 }, { "epoch": 0.574965509526828, "grad_norm": 1.1767408847808838, "learning_rate": 8.069833777407786e-06, "loss": 0.6589, "step": 3855 }, { "epoch": 0.5751146575189231, "grad_norm": 1.179999589920044, "learning_rate": 8.065093128499351e-06, "loss": 0.6789, "step": 3856 }, { "epoch": 0.5752638055110183, "grad_norm": 1.0767662525177002, "learning_rate": 8.060352931307332e-06, "loss": 0.5988, "step": 3857 }, { "epoch": 0.5754129535031135, "grad_norm": 0.8319959044456482, "learning_rate": 8.055613186938357e-06, "loss": 0.6395, "step": 3858 }, { "epoch": 0.5755621014952086, "grad_norm": 1.064532995223999, "learning_rate": 8.050873896498955e-06, "loss": 0.6327, "step": 3859 }, { "epoch": 0.5757112494873038, "grad_norm": 1.1713765859603882, "learning_rate": 8.046135061095534e-06, "loss": 0.5712, "step": 3860 }, { "epoch": 0.575860397479399, "grad_norm": 1.133490800857544, "learning_rate": 8.041396681834415e-06, "loss": 0.6479, "step": 3861 }, { "epoch": 0.5760095454714941, "grad_norm": 1.0157641172409058, "learning_rate": 8.036658759821799e-06, "loss": 0.5334, "step": 3862 }, { "epoch": 0.5761586934635893, "grad_norm": 0.9997193813323975, "learning_rate": 8.031921296163785e-06, "loss": 0.5485, "step": 3863 }, { "epoch": 0.5763078414556844, "grad_norm": 1.1041606664657593, "learning_rate": 8.027184291966361e-06, "loss": 0.523, "step": 3864 }, { "epoch": 0.5764569894477796, "grad_norm": 1.2831313610076904, "learning_rate": 8.022447748335418e-06, "loss": 0.6295, "step": 3865 }, { "epoch": 0.5766061374398748, "grad_norm": 1.151151418685913, "learning_rate": 8.017711666376726e-06, "loss": 0.5718, "step": 3866 }, { "epoch": 0.5767552854319699, "grad_norm": 1.0864007472991943, "learning_rate": 8.012976047195955e-06, "loss": 0.5875, "step": 3867 }, { "epoch": 0.576904433424065, "grad_norm": 1.1089614629745483, "learning_rate": 8.00824089189867e-06, "loss": 0.5668, "step": 3868 }, { "epoch": 0.5770535814161601, "grad_norm": 1.046291708946228, "learning_rate": 8.003506201590315e-06, "loss": 0.547, "step": 3869 }, { "epoch": 0.5772027294082553, "grad_norm": 0.864772379398346, "learning_rate": 7.99877197737624e-06, "loss": 0.6591, "step": 3870 }, { "epoch": 0.5773518774003505, "grad_norm": 1.1665382385253906, "learning_rate": 7.994038220361682e-06, "loss": 0.6373, "step": 3871 }, { "epoch": 0.5775010253924456, "grad_norm": 1.081602692604065, "learning_rate": 7.989304931651763e-06, "loss": 0.6124, "step": 3872 }, { "epoch": 0.5776501733845408, "grad_norm": 1.1589362621307373, "learning_rate": 7.984572112351499e-06, "loss": 0.6785, "step": 3873 }, { "epoch": 0.5777993213766359, "grad_norm": 1.1323167085647583, "learning_rate": 7.9798397635658e-06, "loss": 0.6521, "step": 3874 }, { "epoch": 0.5779484693687311, "grad_norm": 1.0586284399032593, "learning_rate": 7.975107886399457e-06, "loss": 0.5015, "step": 3875 }, { "epoch": 0.5780976173608263, "grad_norm": 1.2074848413467407, "learning_rate": 7.970376481957166e-06, "loss": 0.6648, "step": 3876 }, { "epoch": 0.5782467653529214, "grad_norm": 1.1752480268478394, "learning_rate": 7.965645551343497e-06, "loss": 0.5398, "step": 3877 }, { "epoch": 0.5783959133450166, "grad_norm": 1.3339977264404297, "learning_rate": 7.960915095662922e-06, "loss": 0.5294, "step": 3878 }, { "epoch": 0.5785450613371117, "grad_norm": 1.1407597064971924, "learning_rate": 7.956185116019787e-06, "loss": 0.7189, "step": 3879 }, { "epoch": 0.5786942093292069, "grad_norm": 1.0175455808639526, "learning_rate": 7.951455613518348e-06, "loss": 0.5323, "step": 3880 }, { "epoch": 0.5788433573213021, "grad_norm": 1.143682599067688, "learning_rate": 7.946726589262726e-06, "loss": 0.6404, "step": 3881 }, { "epoch": 0.5789925053133972, "grad_norm": 1.1223080158233643, "learning_rate": 7.941998044356951e-06, "loss": 0.6576, "step": 3882 }, { "epoch": 0.5791416533054924, "grad_norm": 1.160165548324585, "learning_rate": 7.937269979904928e-06, "loss": 0.6211, "step": 3883 }, { "epoch": 0.5792908012975875, "grad_norm": 1.235412836074829, "learning_rate": 7.932542397010453e-06, "loss": 0.6551, "step": 3884 }, { "epoch": 0.5794399492896827, "grad_norm": 1.2848478555679321, "learning_rate": 7.927815296777216e-06, "loss": 0.6231, "step": 3885 }, { "epoch": 0.5795890972817779, "grad_norm": 0.8611690402030945, "learning_rate": 7.923088680308777e-06, "loss": 0.661, "step": 3886 }, { "epoch": 0.579738245273873, "grad_norm": 1.2271528244018555, "learning_rate": 7.918362548708607e-06, "loss": 0.6024, "step": 3887 }, { "epoch": 0.5798873932659682, "grad_norm": 1.129400610923767, "learning_rate": 7.91363690308005e-06, "loss": 0.4859, "step": 3888 }, { "epoch": 0.5800365412580634, "grad_norm": 1.1516706943511963, "learning_rate": 7.908911744526334e-06, "loss": 0.6341, "step": 3889 }, { "epoch": 0.5801856892501585, "grad_norm": 1.119991660118103, "learning_rate": 7.90418707415058e-06, "loss": 0.5907, "step": 3890 }, { "epoch": 0.5803348372422537, "grad_norm": 1.2021514177322388, "learning_rate": 7.899462893055792e-06, "loss": 0.7074, "step": 3891 }, { "epoch": 0.5804839852343487, "grad_norm": 1.092086911201477, "learning_rate": 7.894739202344857e-06, "loss": 0.5165, "step": 3892 }, { "epoch": 0.5806331332264439, "grad_norm": 1.0690243244171143, "learning_rate": 7.890016003120559e-06, "loss": 0.6172, "step": 3893 }, { "epoch": 0.5807822812185391, "grad_norm": 1.2871007919311523, "learning_rate": 7.885293296485551e-06, "loss": 0.6126, "step": 3894 }, { "epoch": 0.5809314292106342, "grad_norm": 1.1247738599777222, "learning_rate": 7.880571083542381e-06, "loss": 0.5858, "step": 3895 }, { "epoch": 0.5810805772027294, "grad_norm": 1.2247745990753174, "learning_rate": 7.875849365393484e-06, "loss": 0.6127, "step": 3896 }, { "epoch": 0.5812297251948245, "grad_norm": 1.1555805206298828, "learning_rate": 7.871128143141175e-06, "loss": 0.5718, "step": 3897 }, { "epoch": 0.5813788731869197, "grad_norm": 1.221490740776062, "learning_rate": 7.866407417887647e-06, "loss": 0.5446, "step": 3898 }, { "epoch": 0.5815280211790149, "grad_norm": 1.114571452140808, "learning_rate": 7.861687190734992e-06, "loss": 0.6015, "step": 3899 }, { "epoch": 0.58167716917111, "grad_norm": 1.26194429397583, "learning_rate": 7.85696746278517e-06, "loss": 0.5989, "step": 3900 }, { "epoch": 0.5818263171632052, "grad_norm": 1.2645481824874878, "learning_rate": 7.852248235140038e-06, "loss": 0.6615, "step": 3901 }, { "epoch": 0.5819754651553003, "grad_norm": 1.1066887378692627, "learning_rate": 7.847529508901327e-06, "loss": 0.6025, "step": 3902 }, { "epoch": 0.5821246131473955, "grad_norm": 1.1356124877929688, "learning_rate": 7.84281128517065e-06, "loss": 0.5649, "step": 3903 }, { "epoch": 0.5822737611394907, "grad_norm": 1.151094913482666, "learning_rate": 7.83809356504951e-06, "loss": 0.5803, "step": 3904 }, { "epoch": 0.5824229091315858, "grad_norm": 1.174700379371643, "learning_rate": 7.833376349639295e-06, "loss": 0.6379, "step": 3905 }, { "epoch": 0.582572057123681, "grad_norm": 0.8378615379333496, "learning_rate": 7.82865964004126e-06, "loss": 0.6581, "step": 3906 }, { "epoch": 0.5827212051157761, "grad_norm": 1.08833909034729, "learning_rate": 7.823943437356556e-06, "loss": 0.5642, "step": 3907 }, { "epoch": 0.5828703531078713, "grad_norm": 1.0293127298355103, "learning_rate": 7.81922774268621e-06, "loss": 0.5478, "step": 3908 }, { "epoch": 0.5830195010999665, "grad_norm": 1.1173186302185059, "learning_rate": 7.81451255713113e-06, "loss": 0.6588, "step": 3909 }, { "epoch": 0.5831686490920616, "grad_norm": 1.163211464881897, "learning_rate": 7.809797881792108e-06, "loss": 0.5571, "step": 3910 }, { "epoch": 0.5833177970841568, "grad_norm": 0.8534988760948181, "learning_rate": 7.80508371776981e-06, "loss": 0.611, "step": 3911 }, { "epoch": 0.583466945076252, "grad_norm": 1.2006889581680298, "learning_rate": 7.800370066164793e-06, "loss": 0.6033, "step": 3912 }, { "epoch": 0.583616093068347, "grad_norm": 1.184321403503418, "learning_rate": 7.79565692807749e-06, "loss": 0.6258, "step": 3913 }, { "epoch": 0.5837652410604423, "grad_norm": 1.142616629600525, "learning_rate": 7.790944304608214e-06, "loss": 0.6658, "step": 3914 }, { "epoch": 0.5839143890525373, "grad_norm": 1.1371588706970215, "learning_rate": 7.786232196857151e-06, "loss": 0.6455, "step": 3915 }, { "epoch": 0.5840635370446325, "grad_norm": 1.044364094734192, "learning_rate": 7.781520605924378e-06, "loss": 0.5896, "step": 3916 }, { "epoch": 0.5842126850367277, "grad_norm": 1.1275649070739746, "learning_rate": 7.776809532909843e-06, "loss": 0.5396, "step": 3917 }, { "epoch": 0.5843618330288228, "grad_norm": 1.108587384223938, "learning_rate": 7.772098978913381e-06, "loss": 0.6026, "step": 3918 }, { "epoch": 0.584510981020918, "grad_norm": 1.160726547241211, "learning_rate": 7.767388945034695e-06, "loss": 0.5235, "step": 3919 }, { "epoch": 0.5846601290130131, "grad_norm": 1.1629133224487305, "learning_rate": 7.762679432373376e-06, "loss": 0.663, "step": 3920 }, { "epoch": 0.5848092770051083, "grad_norm": 1.0960050821304321, "learning_rate": 7.757970442028886e-06, "loss": 0.6387, "step": 3921 }, { "epoch": 0.5849584249972035, "grad_norm": 1.1348955631256104, "learning_rate": 7.753261975100577e-06, "loss": 0.5207, "step": 3922 }, { "epoch": 0.5851075729892986, "grad_norm": 1.0866622924804688, "learning_rate": 7.748554032687664e-06, "loss": 0.5901, "step": 3923 }, { "epoch": 0.5852567209813938, "grad_norm": 1.093990683555603, "learning_rate": 7.74384661588925e-06, "loss": 0.4821, "step": 3924 }, { "epoch": 0.5854058689734889, "grad_norm": 1.0522929430007935, "learning_rate": 7.73913972580431e-06, "loss": 0.5691, "step": 3925 }, { "epoch": 0.5855550169655841, "grad_norm": 0.8807864189147949, "learning_rate": 7.734433363531694e-06, "loss": 0.6813, "step": 3926 }, { "epoch": 0.5857041649576793, "grad_norm": 1.1335885524749756, "learning_rate": 7.729727530170141e-06, "loss": 0.5401, "step": 3927 }, { "epoch": 0.5858533129497744, "grad_norm": 1.143875002861023, "learning_rate": 7.72502222681825e-06, "loss": 0.6301, "step": 3928 }, { "epoch": 0.5860024609418696, "grad_norm": 1.1773251295089722, "learning_rate": 7.72031745457451e-06, "loss": 0.5994, "step": 3929 }, { "epoch": 0.5861516089339647, "grad_norm": 1.0070558786392212, "learning_rate": 7.715613214537272e-06, "loss": 0.5317, "step": 3930 }, { "epoch": 0.5863007569260599, "grad_norm": 1.1799283027648926, "learning_rate": 7.710909507804782e-06, "loss": 0.6534, "step": 3931 }, { "epoch": 0.5864499049181551, "grad_norm": 1.0367368459701538, "learning_rate": 7.706206335475143e-06, "loss": 0.5823, "step": 3932 }, { "epoch": 0.5865990529102502, "grad_norm": 1.0531402826309204, "learning_rate": 7.701503698646345e-06, "loss": 0.5957, "step": 3933 }, { "epoch": 0.5867482009023454, "grad_norm": 1.0287489891052246, "learning_rate": 7.696801598416245e-06, "loss": 0.596, "step": 3934 }, { "epoch": 0.5868973488944406, "grad_norm": 1.1266674995422363, "learning_rate": 7.692100035882581e-06, "loss": 0.6216, "step": 3935 }, { "epoch": 0.5870464968865357, "grad_norm": 1.2132233381271362, "learning_rate": 7.687399012142964e-06, "loss": 0.5743, "step": 3936 }, { "epoch": 0.5871956448786309, "grad_norm": 1.1811861991882324, "learning_rate": 7.682698528294872e-06, "loss": 0.6065, "step": 3937 }, { "epoch": 0.5873447928707259, "grad_norm": 1.1017237901687622, "learning_rate": 7.677998585435669e-06, "loss": 0.6482, "step": 3938 }, { "epoch": 0.5874939408628211, "grad_norm": 1.029917597770691, "learning_rate": 7.673299184662582e-06, "loss": 0.5107, "step": 3939 }, { "epoch": 0.5876430888549163, "grad_norm": 1.0776052474975586, "learning_rate": 7.668600327072721e-06, "loss": 0.6159, "step": 3940 }, { "epoch": 0.5877922368470114, "grad_norm": 1.1516824960708618, "learning_rate": 7.663902013763064e-06, "loss": 0.5956, "step": 3941 }, { "epoch": 0.5879413848391066, "grad_norm": 1.1934388875961304, "learning_rate": 7.65920424583046e-06, "loss": 0.5523, "step": 3942 }, { "epoch": 0.5880905328312017, "grad_norm": 1.2779773473739624, "learning_rate": 7.654507024371635e-06, "loss": 0.6737, "step": 3943 }, { "epoch": 0.5882396808232969, "grad_norm": 1.1619040966033936, "learning_rate": 7.649810350483187e-06, "loss": 0.6518, "step": 3944 }, { "epoch": 0.5883888288153921, "grad_norm": 1.1768851280212402, "learning_rate": 7.645114225261577e-06, "loss": 0.6169, "step": 3945 }, { "epoch": 0.5885379768074872, "grad_norm": 1.1495054960250854, "learning_rate": 7.640418649803155e-06, "loss": 0.6686, "step": 3946 }, { "epoch": 0.5886871247995824, "grad_norm": 1.1597462892532349, "learning_rate": 7.635723625204124e-06, "loss": 0.5718, "step": 3947 }, { "epoch": 0.5888362727916775, "grad_norm": 1.2948602437973022, "learning_rate": 7.631029152560574e-06, "loss": 0.6325, "step": 3948 }, { "epoch": 0.5889854207837727, "grad_norm": 0.824634313583374, "learning_rate": 7.62633523296846e-06, "loss": 0.6339, "step": 3949 }, { "epoch": 0.5891345687758679, "grad_norm": 1.113020658493042, "learning_rate": 7.621641867523608e-06, "loss": 0.5914, "step": 3950 }, { "epoch": 0.589283716767963, "grad_norm": 1.1414813995361328, "learning_rate": 7.6169490573217085e-06, "loss": 0.6368, "step": 3951 }, { "epoch": 0.5894328647600582, "grad_norm": 1.2085216045379639, "learning_rate": 7.612256803458335e-06, "loss": 0.6282, "step": 3952 }, { "epoch": 0.5895820127521533, "grad_norm": 1.310848593711853, "learning_rate": 7.607565107028918e-06, "loss": 0.6867, "step": 3953 }, { "epoch": 0.5897311607442485, "grad_norm": 1.1988461017608643, "learning_rate": 7.602873969128769e-06, "loss": 0.6684, "step": 3954 }, { "epoch": 0.5898803087363437, "grad_norm": 1.0265733003616333, "learning_rate": 7.598183390853063e-06, "loss": 0.4836, "step": 3955 }, { "epoch": 0.5900294567284388, "grad_norm": 1.0987571477890015, "learning_rate": 7.593493373296841e-06, "loss": 0.5309, "step": 3956 }, { "epoch": 0.590178604720534, "grad_norm": 0.8924896121025085, "learning_rate": 7.588803917555023e-06, "loss": 0.6474, "step": 3957 }, { "epoch": 0.5903277527126292, "grad_norm": 1.1123193502426147, "learning_rate": 7.584115024722392e-06, "loss": 0.5677, "step": 3958 }, { "epoch": 0.5904769007047243, "grad_norm": 1.20082688331604, "learning_rate": 7.579426695893599e-06, "loss": 0.5688, "step": 3959 }, { "epoch": 0.5906260486968195, "grad_norm": 1.109950065612793, "learning_rate": 7.574738932163167e-06, "loss": 0.6444, "step": 3960 }, { "epoch": 0.5907751966889145, "grad_norm": 1.04970383644104, "learning_rate": 7.570051734625481e-06, "loss": 0.5951, "step": 3961 }, { "epoch": 0.5909243446810097, "grad_norm": 1.1475194692611694, "learning_rate": 7.565365104374798e-06, "loss": 0.5886, "step": 3962 }, { "epoch": 0.5910734926731049, "grad_norm": 1.1710361242294312, "learning_rate": 7.560679042505242e-06, "loss": 0.6039, "step": 3963 }, { "epoch": 0.5912226406652, "grad_norm": 1.0672677755355835, "learning_rate": 7.555993550110805e-06, "loss": 0.6112, "step": 3964 }, { "epoch": 0.5913717886572952, "grad_norm": 1.0975812673568726, "learning_rate": 7.551308628285341e-06, "loss": 0.6642, "step": 3965 }, { "epoch": 0.5915209366493903, "grad_norm": 1.1820857524871826, "learning_rate": 7.546624278122583e-06, "loss": 0.5493, "step": 3966 }, { "epoch": 0.5916700846414855, "grad_norm": 1.1086384057998657, "learning_rate": 7.5419405007161195e-06, "loss": 0.6698, "step": 3967 }, { "epoch": 0.5918192326335807, "grad_norm": 1.1902034282684326, "learning_rate": 7.537257297159404e-06, "loss": 0.6445, "step": 3968 }, { "epoch": 0.5919683806256758, "grad_norm": 1.224690318107605, "learning_rate": 7.532574668545767e-06, "loss": 0.5627, "step": 3969 }, { "epoch": 0.592117528617771, "grad_norm": 1.032734751701355, "learning_rate": 7.527892615968392e-06, "loss": 0.5346, "step": 3970 }, { "epoch": 0.5922666766098661, "grad_norm": 1.1526670455932617, "learning_rate": 7.523211140520339e-06, "loss": 0.6464, "step": 3971 }, { "epoch": 0.5924158246019613, "grad_norm": 1.1495434045791626, "learning_rate": 7.518530243294526e-06, "loss": 0.5573, "step": 3972 }, { "epoch": 0.5925649725940565, "grad_norm": 1.02426016330719, "learning_rate": 7.513849925383736e-06, "loss": 0.5135, "step": 3973 }, { "epoch": 0.5927141205861516, "grad_norm": 1.2047600746154785, "learning_rate": 7.509170187880623e-06, "loss": 0.5426, "step": 3974 }, { "epoch": 0.5928632685782468, "grad_norm": 1.1691503524780273, "learning_rate": 7.504491031877704e-06, "loss": 0.6455, "step": 3975 }, { "epoch": 0.5930124165703419, "grad_norm": 1.2100361585617065, "learning_rate": 7.499812458467353e-06, "loss": 0.5989, "step": 3976 }, { "epoch": 0.5931615645624371, "grad_norm": 1.0804789066314697, "learning_rate": 7.495134468741816e-06, "loss": 0.6025, "step": 3977 }, { "epoch": 0.5933107125545323, "grad_norm": 1.183240294456482, "learning_rate": 7.490457063793199e-06, "loss": 0.488, "step": 3978 }, { "epoch": 0.5934598605466274, "grad_norm": 1.1406620740890503, "learning_rate": 7.4857802447134706e-06, "loss": 0.6603, "step": 3979 }, { "epoch": 0.5936090085387226, "grad_norm": 0.9086306691169739, "learning_rate": 7.481104012594466e-06, "loss": 0.7004, "step": 3980 }, { "epoch": 0.5937581565308176, "grad_norm": 1.1846930980682373, "learning_rate": 7.476428368527879e-06, "loss": 0.569, "step": 3981 }, { "epoch": 0.5939073045229128, "grad_norm": 1.1957571506500244, "learning_rate": 7.47175331360527e-06, "loss": 0.699, "step": 3982 }, { "epoch": 0.594056452515008, "grad_norm": 0.8377857804298401, "learning_rate": 7.467078848918065e-06, "loss": 0.6479, "step": 3983 }, { "epoch": 0.5942056005071031, "grad_norm": 1.169468641281128, "learning_rate": 7.46240497555754e-06, "loss": 0.6598, "step": 3984 }, { "epoch": 0.5943547484991983, "grad_norm": 1.132412075996399, "learning_rate": 7.457731694614848e-06, "loss": 0.662, "step": 3985 }, { "epoch": 0.5945038964912935, "grad_norm": 1.137346863746643, "learning_rate": 7.453059007180994e-06, "loss": 0.6207, "step": 3986 }, { "epoch": 0.5946530444833886, "grad_norm": 1.130329966545105, "learning_rate": 7.448386914346842e-06, "loss": 0.5292, "step": 3987 }, { "epoch": 0.5948021924754838, "grad_norm": 1.1795578002929688, "learning_rate": 7.443715417203128e-06, "loss": 0.613, "step": 3988 }, { "epoch": 0.5949513404675789, "grad_norm": 1.092433214187622, "learning_rate": 7.439044516840439e-06, "loss": 0.6308, "step": 3989 }, { "epoch": 0.5951004884596741, "grad_norm": 1.2939746379852295, "learning_rate": 7.434374214349232e-06, "loss": 0.6602, "step": 3990 }, { "epoch": 0.5952496364517693, "grad_norm": 1.1426079273223877, "learning_rate": 7.42970451081981e-06, "loss": 0.5678, "step": 3991 }, { "epoch": 0.5953987844438644, "grad_norm": 1.1997789144515991, "learning_rate": 7.425035407342355e-06, "loss": 0.595, "step": 3992 }, { "epoch": 0.5955479324359596, "grad_norm": 1.3309251070022583, "learning_rate": 7.420366905006893e-06, "loss": 0.6324, "step": 3993 }, { "epoch": 0.5956970804280547, "grad_norm": 1.0090464353561401, "learning_rate": 7.415699004903319e-06, "loss": 0.5745, "step": 3994 }, { "epoch": 0.5958462284201499, "grad_norm": 1.0574908256530762, "learning_rate": 7.4110317081213825e-06, "loss": 0.5691, "step": 3995 }, { "epoch": 0.5959953764122451, "grad_norm": 1.258916974067688, "learning_rate": 7.406365015750696e-06, "loss": 0.6705, "step": 3996 }, { "epoch": 0.5961445244043402, "grad_norm": 1.2310230731964111, "learning_rate": 7.401698928880726e-06, "loss": 0.667, "step": 3997 }, { "epoch": 0.5962936723964354, "grad_norm": 1.2212635278701782, "learning_rate": 7.3970334486008e-06, "loss": 0.6345, "step": 3998 }, { "epoch": 0.5964428203885305, "grad_norm": 1.210963249206543, "learning_rate": 7.3923685760001085e-06, "loss": 0.5893, "step": 3999 }, { "epoch": 0.5965919683806257, "grad_norm": 1.150609016418457, "learning_rate": 7.387704312167687e-06, "loss": 0.5943, "step": 4000 }, { "epoch": 0.5967411163727209, "grad_norm": 1.0869704484939575, "learning_rate": 7.383040658192449e-06, "loss": 0.6462, "step": 4001 }, { "epoch": 0.596890264364816, "grad_norm": 1.1502959728240967, "learning_rate": 7.378377615163148e-06, "loss": 0.5812, "step": 4002 }, { "epoch": 0.5970394123569112, "grad_norm": 1.1354371309280396, "learning_rate": 7.373715184168405e-06, "loss": 0.633, "step": 4003 }, { "epoch": 0.5971885603490062, "grad_norm": 1.1043769121170044, "learning_rate": 7.36905336629669e-06, "loss": 0.5553, "step": 4004 }, { "epoch": 0.5973377083411014, "grad_norm": 1.177720308303833, "learning_rate": 7.364392162636338e-06, "loss": 0.645, "step": 4005 }, { "epoch": 0.5974868563331966, "grad_norm": 1.101753830909729, "learning_rate": 7.359731574275533e-06, "loss": 0.6614, "step": 4006 }, { "epoch": 0.5976360043252917, "grad_norm": 1.1403741836547852, "learning_rate": 7.355071602302324e-06, "loss": 0.6045, "step": 4007 }, { "epoch": 0.5977851523173869, "grad_norm": 1.2174495458602905, "learning_rate": 7.350412247804603e-06, "loss": 0.6859, "step": 4008 }, { "epoch": 0.5979343003094821, "grad_norm": 1.0562764406204224, "learning_rate": 7.345753511870139e-06, "loss": 0.5657, "step": 4009 }, { "epoch": 0.5980834483015772, "grad_norm": 1.1686865091323853, "learning_rate": 7.3410953955865324e-06, "loss": 0.6233, "step": 4010 }, { "epoch": 0.5982325962936724, "grad_norm": 1.1494314670562744, "learning_rate": 7.336437900041258e-06, "loss": 0.6465, "step": 4011 }, { "epoch": 0.5983817442857675, "grad_norm": 1.0593432188034058, "learning_rate": 7.331781026321631e-06, "loss": 0.5946, "step": 4012 }, { "epoch": 0.5985308922778627, "grad_norm": 1.156538724899292, "learning_rate": 7.327124775514837e-06, "loss": 0.5937, "step": 4013 }, { "epoch": 0.5986800402699579, "grad_norm": 1.148645281791687, "learning_rate": 7.3224691487079e-06, "loss": 0.5879, "step": 4014 }, { "epoch": 0.598829188262053, "grad_norm": 1.153030276298523, "learning_rate": 7.317814146987708e-06, "loss": 0.6219, "step": 4015 }, { "epoch": 0.5989783362541482, "grad_norm": 1.082298994064331, "learning_rate": 7.313159771441003e-06, "loss": 0.5526, "step": 4016 }, { "epoch": 0.5991274842462433, "grad_norm": 1.0818836688995361, "learning_rate": 7.308506023154375e-06, "loss": 0.6145, "step": 4017 }, { "epoch": 0.5992766322383385, "grad_norm": 1.1735543012619019, "learning_rate": 7.303852903214274e-06, "loss": 0.5955, "step": 4018 }, { "epoch": 0.5994257802304337, "grad_norm": 0.8796026706695557, "learning_rate": 7.299200412707004e-06, "loss": 0.6574, "step": 4019 }, { "epoch": 0.5995749282225288, "grad_norm": 1.1563063859939575, "learning_rate": 7.294548552718714e-06, "loss": 0.5579, "step": 4020 }, { "epoch": 0.599724076214624, "grad_norm": 1.3252537250518799, "learning_rate": 7.289897324335411e-06, "loss": 0.6185, "step": 4021 }, { "epoch": 0.5998732242067191, "grad_norm": 1.1313049793243408, "learning_rate": 7.285246728642956e-06, "loss": 0.634, "step": 4022 }, { "epoch": 0.6000223721988143, "grad_norm": 1.1370927095413208, "learning_rate": 7.280596766727057e-06, "loss": 0.6297, "step": 4023 }, { "epoch": 0.6001715201909095, "grad_norm": 1.0920677185058594, "learning_rate": 7.2759474396732835e-06, "loss": 0.5342, "step": 4024 }, { "epoch": 0.6003206681830046, "grad_norm": 1.1404757499694824, "learning_rate": 7.271298748567043e-06, "loss": 0.6435, "step": 4025 }, { "epoch": 0.6004698161750998, "grad_norm": 1.1969703435897827, "learning_rate": 7.2666506944936045e-06, "loss": 0.6391, "step": 4026 }, { "epoch": 0.6006189641671948, "grad_norm": 1.2306853532791138, "learning_rate": 7.262003278538092e-06, "loss": 0.5167, "step": 4027 }, { "epoch": 0.60076811215929, "grad_norm": 1.085423231124878, "learning_rate": 7.25735650178547e-06, "loss": 0.5844, "step": 4028 }, { "epoch": 0.6009172601513852, "grad_norm": 1.0932354927062988, "learning_rate": 7.252710365320557e-06, "loss": 0.5315, "step": 4029 }, { "epoch": 0.6010664081434803, "grad_norm": 1.1614065170288086, "learning_rate": 7.248064870228028e-06, "loss": 0.6047, "step": 4030 }, { "epoch": 0.6012155561355755, "grad_norm": 1.2583837509155273, "learning_rate": 7.243420017592397e-06, "loss": 0.5887, "step": 4031 }, { "epoch": 0.6013647041276707, "grad_norm": 1.180088996887207, "learning_rate": 7.2387758084980405e-06, "loss": 0.5418, "step": 4032 }, { "epoch": 0.6015138521197658, "grad_norm": 1.1244184970855713, "learning_rate": 7.234132244029177e-06, "loss": 0.5614, "step": 4033 }, { "epoch": 0.601663000111861, "grad_norm": 1.0514410734176636, "learning_rate": 7.229489325269874e-06, "loss": 0.5934, "step": 4034 }, { "epoch": 0.6018121481039561, "grad_norm": 1.1411540508270264, "learning_rate": 7.224847053304049e-06, "loss": 0.6144, "step": 4035 }, { "epoch": 0.6019612960960513, "grad_norm": 1.1242998838424683, "learning_rate": 7.22020542921548e-06, "loss": 0.5967, "step": 4036 }, { "epoch": 0.6021104440881465, "grad_norm": 1.1104342937469482, "learning_rate": 7.215564454087775e-06, "loss": 0.5924, "step": 4037 }, { "epoch": 0.6022595920802416, "grad_norm": 1.1810003519058228, "learning_rate": 7.210924129004404e-06, "loss": 0.5821, "step": 4038 }, { "epoch": 0.6024087400723368, "grad_norm": 1.1707369089126587, "learning_rate": 7.206284455048677e-06, "loss": 0.5797, "step": 4039 }, { "epoch": 0.6025578880644319, "grad_norm": 1.216003656387329, "learning_rate": 7.2016454333037585e-06, "loss": 0.6148, "step": 4040 }, { "epoch": 0.6027070360565271, "grad_norm": 0.9006454944610596, "learning_rate": 7.1970070648526565e-06, "loss": 0.6412, "step": 4041 }, { "epoch": 0.6028561840486223, "grad_norm": 1.030618667602539, "learning_rate": 7.1923693507782276e-06, "loss": 0.582, "step": 4042 }, { "epoch": 0.6030053320407174, "grad_norm": 1.228219747543335, "learning_rate": 7.187732292163173e-06, "loss": 0.6421, "step": 4043 }, { "epoch": 0.6031544800328126, "grad_norm": 1.0938186645507812, "learning_rate": 7.183095890090052e-06, "loss": 0.534, "step": 4044 }, { "epoch": 0.6033036280249077, "grad_norm": 1.1500869989395142, "learning_rate": 7.178460145641257e-06, "loss": 0.5835, "step": 4045 }, { "epoch": 0.6034527760170029, "grad_norm": 1.243517518043518, "learning_rate": 7.173825059899031e-06, "loss": 0.5463, "step": 4046 }, { "epoch": 0.6036019240090981, "grad_norm": 1.2240769863128662, "learning_rate": 7.1691906339454685e-06, "loss": 0.6086, "step": 4047 }, { "epoch": 0.6037510720011932, "grad_norm": 1.1711032390594482, "learning_rate": 7.164556868862502e-06, "loss": 0.5858, "step": 4048 }, { "epoch": 0.6039002199932884, "grad_norm": 0.8209977746009827, "learning_rate": 7.159923765731917e-06, "loss": 0.6242, "step": 4049 }, { "epoch": 0.6040493679853834, "grad_norm": 1.201837182044983, "learning_rate": 7.1552913256353405e-06, "loss": 0.581, "step": 4050 }, { "epoch": 0.6041985159774786, "grad_norm": 1.197959065437317, "learning_rate": 7.150659549654242e-06, "loss": 0.617, "step": 4051 }, { "epoch": 0.6043476639695738, "grad_norm": 1.0844398736953735, "learning_rate": 7.146028438869938e-06, "loss": 0.6242, "step": 4052 }, { "epoch": 0.6044968119616689, "grad_norm": 1.185402750968933, "learning_rate": 7.141397994363602e-06, "loss": 0.6393, "step": 4053 }, { "epoch": 0.6046459599537641, "grad_norm": 1.2296521663665771, "learning_rate": 7.136768217216227e-06, "loss": 0.6877, "step": 4054 }, { "epoch": 0.6047951079458593, "grad_norm": 1.1604886054992676, "learning_rate": 7.132139108508678e-06, "loss": 0.6833, "step": 4055 }, { "epoch": 0.6049442559379544, "grad_norm": 1.1475696563720703, "learning_rate": 7.12751066932164e-06, "loss": 0.6028, "step": 4056 }, { "epoch": 0.6050934039300496, "grad_norm": 1.2041285037994385, "learning_rate": 7.122882900735653e-06, "loss": 0.6918, "step": 4057 }, { "epoch": 0.6052425519221447, "grad_norm": 1.126793622970581, "learning_rate": 7.118255803831104e-06, "loss": 0.6413, "step": 4058 }, { "epoch": 0.6053916999142399, "grad_norm": 1.2504161596298218, "learning_rate": 7.113629379688212e-06, "loss": 0.6635, "step": 4059 }, { "epoch": 0.6055408479063351, "grad_norm": 1.186137080192566, "learning_rate": 7.109003629387052e-06, "loss": 0.6279, "step": 4060 }, { "epoch": 0.6056899958984302, "grad_norm": 1.0945991277694702, "learning_rate": 7.104378554007527e-06, "loss": 0.5765, "step": 4061 }, { "epoch": 0.6058391438905254, "grad_norm": 1.092051386833191, "learning_rate": 7.099754154629399e-06, "loss": 0.5794, "step": 4062 }, { "epoch": 0.6059882918826205, "grad_norm": 1.1683764457702637, "learning_rate": 7.09513043233226e-06, "loss": 0.579, "step": 4063 }, { "epoch": 0.6061374398747157, "grad_norm": 1.1800446510314941, "learning_rate": 7.090507388195549e-06, "loss": 0.5718, "step": 4064 }, { "epoch": 0.6062865878668109, "grad_norm": 1.1725947856903076, "learning_rate": 7.085885023298541e-06, "loss": 0.6636, "step": 4065 }, { "epoch": 0.606435735858906, "grad_norm": 1.2753671407699585, "learning_rate": 7.081263338720362e-06, "loss": 0.6475, "step": 4066 }, { "epoch": 0.6065848838510012, "grad_norm": 1.2310484647750854, "learning_rate": 7.076642335539969e-06, "loss": 0.7001, "step": 4067 }, { "epoch": 0.6067340318430963, "grad_norm": 1.2415623664855957, "learning_rate": 7.072022014836172e-06, "loss": 0.6338, "step": 4068 }, { "epoch": 0.6068831798351915, "grad_norm": 1.0991805791854858, "learning_rate": 7.0674023776876086e-06, "loss": 0.5337, "step": 4069 }, { "epoch": 0.6070323278272867, "grad_norm": 1.0800890922546387, "learning_rate": 7.062783425172759e-06, "loss": 0.624, "step": 4070 }, { "epoch": 0.6071814758193818, "grad_norm": 1.1021597385406494, "learning_rate": 7.058165158369955e-06, "loss": 0.6038, "step": 4071 }, { "epoch": 0.607330623811477, "grad_norm": 1.2189574241638184, "learning_rate": 7.0535475783573606e-06, "loss": 0.5573, "step": 4072 }, { "epoch": 0.607479771803572, "grad_norm": 1.151618242263794, "learning_rate": 7.048930686212974e-06, "loss": 0.589, "step": 4073 }, { "epoch": 0.6076289197956672, "grad_norm": 1.0805754661560059, "learning_rate": 7.044314483014642e-06, "loss": 0.6035, "step": 4074 }, { "epoch": 0.6077780677877624, "grad_norm": 1.241652250289917, "learning_rate": 7.039698969840049e-06, "loss": 0.6426, "step": 4075 }, { "epoch": 0.6079272157798575, "grad_norm": 1.155410885810852, "learning_rate": 7.035084147766709e-06, "loss": 0.6137, "step": 4076 }, { "epoch": 0.6080763637719527, "grad_norm": 1.1380633115768433, "learning_rate": 7.030470017871989e-06, "loss": 0.6095, "step": 4077 }, { "epoch": 0.6082255117640478, "grad_norm": 1.1721347570419312, "learning_rate": 7.025856581233078e-06, "loss": 0.585, "step": 4078 }, { "epoch": 0.608374659756143, "grad_norm": 1.1281230449676514, "learning_rate": 7.021243838927021e-06, "loss": 0.563, "step": 4079 }, { "epoch": 0.6085238077482382, "grad_norm": 1.1936787366867065, "learning_rate": 7.016631792030692e-06, "loss": 0.6608, "step": 4080 }, { "epoch": 0.6086729557403333, "grad_norm": 1.192101240158081, "learning_rate": 7.012020441620801e-06, "loss": 0.6423, "step": 4081 }, { "epoch": 0.6088221037324285, "grad_norm": 1.1728458404541016, "learning_rate": 7.007409788773895e-06, "loss": 0.6026, "step": 4082 }, { "epoch": 0.6089712517245237, "grad_norm": 1.243391513824463, "learning_rate": 7.002799834566365e-06, "loss": 0.651, "step": 4083 }, { "epoch": 0.6091203997166188, "grad_norm": 1.1735103130340576, "learning_rate": 6.998190580074429e-06, "loss": 0.5897, "step": 4084 }, { "epoch": 0.609269547708714, "grad_norm": 1.0534321069717407, "learning_rate": 6.993582026374152e-06, "loss": 0.5765, "step": 4085 }, { "epoch": 0.6094186957008091, "grad_norm": 1.0140705108642578, "learning_rate": 6.988974174541428e-06, "loss": 0.5394, "step": 4086 }, { "epoch": 0.6095678436929043, "grad_norm": 1.0925463438034058, "learning_rate": 6.9843670256519855e-06, "loss": 0.6014, "step": 4087 }, { "epoch": 0.6097169916849995, "grad_norm": 1.333536982536316, "learning_rate": 6.979760580781399e-06, "loss": 0.6271, "step": 4088 }, { "epoch": 0.6098661396770946, "grad_norm": 1.1592082977294922, "learning_rate": 6.975154841005074e-06, "loss": 0.5987, "step": 4089 }, { "epoch": 0.6100152876691898, "grad_norm": 1.2606751918792725, "learning_rate": 6.970549807398244e-06, "loss": 0.6364, "step": 4090 }, { "epoch": 0.6101644356612849, "grad_norm": 1.1661620140075684, "learning_rate": 6.965945481035989e-06, "loss": 0.6148, "step": 4091 }, { "epoch": 0.6103135836533801, "grad_norm": 1.1042499542236328, "learning_rate": 6.961341862993215e-06, "loss": 0.7241, "step": 4092 }, { "epoch": 0.6104627316454753, "grad_norm": 1.0927627086639404, "learning_rate": 6.9567389543446665e-06, "loss": 0.6462, "step": 4093 }, { "epoch": 0.6106118796375704, "grad_norm": 1.29819655418396, "learning_rate": 6.952136756164922e-06, "loss": 0.7144, "step": 4094 }, { "epoch": 0.6107610276296656, "grad_norm": 1.3124638795852661, "learning_rate": 6.947535269528396e-06, "loss": 0.6341, "step": 4095 }, { "epoch": 0.6109101756217606, "grad_norm": 1.105712652206421, "learning_rate": 6.942934495509329e-06, "loss": 0.6407, "step": 4096 }, { "epoch": 0.6110593236138558, "grad_norm": 1.139723539352417, "learning_rate": 6.938334435181812e-06, "loss": 0.6268, "step": 4097 }, { "epoch": 0.611208471605951, "grad_norm": 1.1889961957931519, "learning_rate": 6.933735089619751e-06, "loss": 0.5954, "step": 4098 }, { "epoch": 0.6113576195980461, "grad_norm": 1.2212237119674683, "learning_rate": 6.929136459896893e-06, "loss": 0.6621, "step": 4099 }, { "epoch": 0.6115067675901413, "grad_norm": 1.113594651222229, "learning_rate": 6.924538547086822e-06, "loss": 0.6172, "step": 4100 }, { "epoch": 0.6116559155822364, "grad_norm": 1.1723895072937012, "learning_rate": 6.919941352262944e-06, "loss": 0.5887, "step": 4101 }, { "epoch": 0.6118050635743316, "grad_norm": 1.26467764377594, "learning_rate": 6.915344876498509e-06, "loss": 0.6462, "step": 4102 }, { "epoch": 0.6119542115664268, "grad_norm": 1.2005565166473389, "learning_rate": 6.910749120866592e-06, "loss": 0.6648, "step": 4103 }, { "epoch": 0.6121033595585219, "grad_norm": 1.2168599367141724, "learning_rate": 6.9061540864400986e-06, "loss": 0.5662, "step": 4104 }, { "epoch": 0.6122525075506171, "grad_norm": 1.2981282472610474, "learning_rate": 6.901559774291769e-06, "loss": 0.6956, "step": 4105 }, { "epoch": 0.6124016555427123, "grad_norm": 1.284963846206665, "learning_rate": 6.8969661854941826e-06, "loss": 0.6137, "step": 4106 }, { "epoch": 0.6125508035348074, "grad_norm": 1.026064157485962, "learning_rate": 6.892373321119734e-06, "loss": 0.521, "step": 4107 }, { "epoch": 0.6126999515269026, "grad_norm": 1.1240333318710327, "learning_rate": 6.8877811822406625e-06, "loss": 0.6377, "step": 4108 }, { "epoch": 0.6128490995189977, "grad_norm": 1.1938505172729492, "learning_rate": 6.883189769929028e-06, "loss": 0.5752, "step": 4109 }, { "epoch": 0.6129982475110929, "grad_norm": 1.1932766437530518, "learning_rate": 6.878599085256728e-06, "loss": 0.5864, "step": 4110 }, { "epoch": 0.6131473955031881, "grad_norm": 1.1134767532348633, "learning_rate": 6.874009129295487e-06, "loss": 0.6263, "step": 4111 }, { "epoch": 0.6132965434952832, "grad_norm": 1.2663421630859375, "learning_rate": 6.8694199031168555e-06, "loss": 0.622, "step": 4112 }, { "epoch": 0.6134456914873784, "grad_norm": 1.0819233655929565, "learning_rate": 6.864831407792218e-06, "loss": 0.5762, "step": 4113 }, { "epoch": 0.6135948394794735, "grad_norm": 1.0929878950119019, "learning_rate": 6.8602436443927975e-06, "loss": 0.6052, "step": 4114 }, { "epoch": 0.6137439874715687, "grad_norm": 1.1538455486297607, "learning_rate": 6.855656613989627e-06, "loss": 0.5622, "step": 4115 }, { "epoch": 0.6138931354636639, "grad_norm": 1.155116081237793, "learning_rate": 6.851070317653585e-06, "loss": 0.6334, "step": 4116 }, { "epoch": 0.614042283455759, "grad_norm": 1.0773866176605225, "learning_rate": 6.846484756455368e-06, "loss": 0.5649, "step": 4117 }, { "epoch": 0.6141914314478542, "grad_norm": 1.2287710905075073, "learning_rate": 6.841899931465503e-06, "loss": 0.6879, "step": 4118 }, { "epoch": 0.6143405794399492, "grad_norm": 1.1817806959152222, "learning_rate": 6.837315843754351e-06, "loss": 0.5906, "step": 4119 }, { "epoch": 0.6144897274320444, "grad_norm": 0.8422433137893677, "learning_rate": 6.832732494392092e-06, "loss": 0.6348, "step": 4120 }, { "epoch": 0.6146388754241396, "grad_norm": 1.053523302078247, "learning_rate": 6.828149884448743e-06, "loss": 0.5875, "step": 4121 }, { "epoch": 0.6147880234162347, "grad_norm": 1.227353572845459, "learning_rate": 6.823568014994138e-06, "loss": 0.6137, "step": 4122 }, { "epoch": 0.6149371714083299, "grad_norm": 1.2161586284637451, "learning_rate": 6.818986887097949e-06, "loss": 0.7093, "step": 4123 }, { "epoch": 0.615086319400425, "grad_norm": 1.128535270690918, "learning_rate": 6.814406501829668e-06, "loss": 0.5894, "step": 4124 }, { "epoch": 0.6152354673925202, "grad_norm": 1.1240389347076416, "learning_rate": 6.809826860258617e-06, "loss": 0.6111, "step": 4125 }, { "epoch": 0.6153846153846154, "grad_norm": 1.1382412910461426, "learning_rate": 6.8052479634539395e-06, "loss": 0.5569, "step": 4126 }, { "epoch": 0.6155337633767105, "grad_norm": 0.8707185387611389, "learning_rate": 6.8006698124846106e-06, "loss": 0.6643, "step": 4127 }, { "epoch": 0.6156829113688057, "grad_norm": 1.1216237545013428, "learning_rate": 6.796092408419429e-06, "loss": 0.6135, "step": 4128 }, { "epoch": 0.6158320593609009, "grad_norm": 1.103313684463501, "learning_rate": 6.791515752327016e-06, "loss": 0.5737, "step": 4129 }, { "epoch": 0.615981207352996, "grad_norm": 1.1228697299957275, "learning_rate": 6.786939845275826e-06, "loss": 0.6061, "step": 4130 }, { "epoch": 0.6161303553450912, "grad_norm": 1.268357515335083, "learning_rate": 6.782364688334127e-06, "loss": 0.5917, "step": 4131 }, { "epoch": 0.6162795033371863, "grad_norm": 1.276932716369629, "learning_rate": 6.777790282570025e-06, "loss": 0.6912, "step": 4132 }, { "epoch": 0.6164286513292815, "grad_norm": 1.1846678256988525, "learning_rate": 6.773216629051444e-06, "loss": 0.7042, "step": 4133 }, { "epoch": 0.6165777993213767, "grad_norm": 1.1056969165802002, "learning_rate": 6.768643728846132e-06, "loss": 0.6052, "step": 4134 }, { "epoch": 0.6167269473134718, "grad_norm": 1.007955551147461, "learning_rate": 6.764071583021659e-06, "loss": 0.5219, "step": 4135 }, { "epoch": 0.616876095305567, "grad_norm": 1.0907988548278809, "learning_rate": 6.759500192645425e-06, "loss": 0.6353, "step": 4136 }, { "epoch": 0.6170252432976621, "grad_norm": 1.113669514656067, "learning_rate": 6.754929558784648e-06, "loss": 0.5911, "step": 4137 }, { "epoch": 0.6171743912897573, "grad_norm": 1.1397196054458618, "learning_rate": 6.750359682506376e-06, "loss": 0.6715, "step": 4138 }, { "epoch": 0.6173235392818525, "grad_norm": 1.1070427894592285, "learning_rate": 6.745790564877471e-06, "loss": 0.5391, "step": 4139 }, { "epoch": 0.6174726872739476, "grad_norm": 1.1594847440719604, "learning_rate": 6.741222206964622e-06, "loss": 0.5748, "step": 4140 }, { "epoch": 0.6176218352660428, "grad_norm": 1.1938743591308594, "learning_rate": 6.7366546098343455e-06, "loss": 0.5908, "step": 4141 }, { "epoch": 0.6177709832581378, "grad_norm": 1.2193189859390259, "learning_rate": 6.732087774552978e-06, "loss": 0.6631, "step": 4142 }, { "epoch": 0.617920131250233, "grad_norm": 1.1017249822616577, "learning_rate": 6.7275217021866705e-06, "loss": 0.556, "step": 4143 }, { "epoch": 0.6180692792423282, "grad_norm": 1.2085331678390503, "learning_rate": 6.722956393801408e-06, "loss": 0.6508, "step": 4144 }, { "epoch": 0.6182184272344233, "grad_norm": 0.7766823172569275, "learning_rate": 6.718391850462986e-06, "loss": 0.6159, "step": 4145 }, { "epoch": 0.6183675752265185, "grad_norm": 1.160956621170044, "learning_rate": 6.7138280732370274e-06, "loss": 0.6217, "step": 4146 }, { "epoch": 0.6185167232186136, "grad_norm": 1.1928826570510864, "learning_rate": 6.709265063188978e-06, "loss": 0.6322, "step": 4147 }, { "epoch": 0.6186658712107088, "grad_norm": 1.1706790924072266, "learning_rate": 6.704702821384096e-06, "loss": 0.6166, "step": 4148 }, { "epoch": 0.618815019202804, "grad_norm": 1.068593144416809, "learning_rate": 6.700141348887472e-06, "loss": 0.516, "step": 4149 }, { "epoch": 0.6189641671948991, "grad_norm": 1.1107361316680908, "learning_rate": 6.69558064676401e-06, "loss": 0.5545, "step": 4150 }, { "epoch": 0.6191133151869943, "grad_norm": 1.1856143474578857, "learning_rate": 6.691020716078434e-06, "loss": 0.6569, "step": 4151 }, { "epoch": 0.6192624631790895, "grad_norm": 1.0995944738388062, "learning_rate": 6.68646155789529e-06, "loss": 0.5992, "step": 4152 }, { "epoch": 0.6194116111711846, "grad_norm": 1.1536650657653809, "learning_rate": 6.6819031732789405e-06, "loss": 0.654, "step": 4153 }, { "epoch": 0.6195607591632798, "grad_norm": 1.2778555154800415, "learning_rate": 6.677345563293571e-06, "loss": 0.6578, "step": 4154 }, { "epoch": 0.6197099071553749, "grad_norm": 1.200490117073059, "learning_rate": 6.6727887290031865e-06, "loss": 0.6193, "step": 4155 }, { "epoch": 0.6198590551474701, "grad_norm": 1.0903170108795166, "learning_rate": 6.668232671471605e-06, "loss": 0.4855, "step": 4156 }, { "epoch": 0.6200082031395653, "grad_norm": 1.2303956747055054, "learning_rate": 6.663677391762468e-06, "loss": 0.6068, "step": 4157 }, { "epoch": 0.6201573511316604, "grad_norm": 1.1688419580459595, "learning_rate": 6.65912289093924e-06, "loss": 0.5768, "step": 4158 }, { "epoch": 0.6203064991237556, "grad_norm": 1.2644885778427124, "learning_rate": 6.654569170065195e-06, "loss": 0.6047, "step": 4159 }, { "epoch": 0.6204556471158507, "grad_norm": 1.0642993450164795, "learning_rate": 6.6500162302034265e-06, "loss": 0.6178, "step": 4160 }, { "epoch": 0.6206047951079459, "grad_norm": 1.2264796495437622, "learning_rate": 6.6454640724168514e-06, "loss": 0.6736, "step": 4161 }, { "epoch": 0.6207539431000411, "grad_norm": 1.3141261339187622, "learning_rate": 6.640912697768196e-06, "loss": 0.6562, "step": 4162 }, { "epoch": 0.6209030910921362, "grad_norm": 1.3194160461425781, "learning_rate": 6.636362107320011e-06, "loss": 0.7139, "step": 4163 }, { "epoch": 0.6210522390842314, "grad_norm": 1.1284503936767578, "learning_rate": 6.631812302134662e-06, "loss": 0.5835, "step": 4164 }, { "epoch": 0.6212013870763264, "grad_norm": 1.145762324333191, "learning_rate": 6.6272632832743234e-06, "loss": 0.6609, "step": 4165 }, { "epoch": 0.6213505350684216, "grad_norm": 1.2550379037857056, "learning_rate": 6.6227150518009965e-06, "loss": 0.6175, "step": 4166 }, { "epoch": 0.6214996830605168, "grad_norm": 1.1776145696640015, "learning_rate": 6.6181676087765e-06, "loss": 0.5553, "step": 4167 }, { "epoch": 0.6216488310526119, "grad_norm": 1.1010571718215942, "learning_rate": 6.613620955262459e-06, "loss": 0.522, "step": 4168 }, { "epoch": 0.6217979790447071, "grad_norm": 1.190529227256775, "learning_rate": 6.60907509232032e-06, "loss": 0.6235, "step": 4169 }, { "epoch": 0.6219471270368022, "grad_norm": 1.185420274734497, "learning_rate": 6.604530021011344e-06, "loss": 0.5891, "step": 4170 }, { "epoch": 0.6220962750288974, "grad_norm": 1.186896562576294, "learning_rate": 6.599985742396604e-06, "loss": 0.5727, "step": 4171 }, { "epoch": 0.6222454230209926, "grad_norm": 1.2071378231048584, "learning_rate": 6.595442257536995e-06, "loss": 0.6648, "step": 4172 }, { "epoch": 0.6223945710130877, "grad_norm": 1.2922279834747314, "learning_rate": 6.590899567493221e-06, "loss": 0.6734, "step": 4173 }, { "epoch": 0.6225437190051829, "grad_norm": 1.1610636711120605, "learning_rate": 6.586357673325798e-06, "loss": 0.6859, "step": 4174 }, { "epoch": 0.622692866997278, "grad_norm": 1.0851774215698242, "learning_rate": 6.58181657609507e-06, "loss": 0.5929, "step": 4175 }, { "epoch": 0.6228420149893732, "grad_norm": 1.1975210905075073, "learning_rate": 6.57727627686118e-06, "loss": 0.5925, "step": 4176 }, { "epoch": 0.6229911629814684, "grad_norm": 1.2190359830856323, "learning_rate": 6.572736776684087e-06, "loss": 0.6014, "step": 4177 }, { "epoch": 0.6231403109735635, "grad_norm": 1.2146178483963013, "learning_rate": 6.568198076623571e-06, "loss": 0.6343, "step": 4178 }, { "epoch": 0.6232894589656587, "grad_norm": 1.246361255645752, "learning_rate": 6.563660177739217e-06, "loss": 0.6072, "step": 4179 }, { "epoch": 0.6234386069577539, "grad_norm": 1.2898324728012085, "learning_rate": 6.5591230810904316e-06, "loss": 0.6336, "step": 4180 }, { "epoch": 0.623587754949849, "grad_norm": 1.0443202257156372, "learning_rate": 6.554586787736425e-06, "loss": 0.5642, "step": 4181 }, { "epoch": 0.6237369029419442, "grad_norm": 1.3150115013122559, "learning_rate": 6.550051298736223e-06, "loss": 0.6731, "step": 4182 }, { "epoch": 0.6238860509340393, "grad_norm": 1.1597124338150024, "learning_rate": 6.5455166151486645e-06, "loss": 0.6181, "step": 4183 }, { "epoch": 0.6240351989261345, "grad_norm": 1.2323293685913086, "learning_rate": 6.540982738032406e-06, "loss": 0.6337, "step": 4184 }, { "epoch": 0.6241843469182297, "grad_norm": 1.1933834552764893, "learning_rate": 6.536449668445905e-06, "loss": 0.7068, "step": 4185 }, { "epoch": 0.6243334949103247, "grad_norm": 1.2256548404693604, "learning_rate": 6.531917407447441e-06, "loss": 0.5618, "step": 4186 }, { "epoch": 0.62448264290242, "grad_norm": 1.1474223136901855, "learning_rate": 6.527385956095094e-06, "loss": 0.574, "step": 4187 }, { "epoch": 0.624631790894515, "grad_norm": 1.1631064414978027, "learning_rate": 6.52285531544676e-06, "loss": 0.5492, "step": 4188 }, { "epoch": 0.6247809388866102, "grad_norm": 0.7839139699935913, "learning_rate": 6.518325486560151e-06, "loss": 0.6408, "step": 4189 }, { "epoch": 0.6249300868787054, "grad_norm": 1.1279927492141724, "learning_rate": 6.5137964704927795e-06, "loss": 0.5854, "step": 4190 }, { "epoch": 0.6250792348708005, "grad_norm": 1.0131911039352417, "learning_rate": 6.509268268301976e-06, "loss": 0.547, "step": 4191 }, { "epoch": 0.6252283828628957, "grad_norm": 1.183897614479065, "learning_rate": 6.504740881044875e-06, "loss": 0.6048, "step": 4192 }, { "epoch": 0.6253775308549908, "grad_norm": 1.329383373260498, "learning_rate": 6.500214309778432e-06, "loss": 0.6748, "step": 4193 }, { "epoch": 0.625526678847086, "grad_norm": 1.1555566787719727, "learning_rate": 6.495688555559396e-06, "loss": 0.5692, "step": 4194 }, { "epoch": 0.6256758268391812, "grad_norm": 1.2739076614379883, "learning_rate": 6.491163619444341e-06, "loss": 0.6539, "step": 4195 }, { "epoch": 0.6258249748312763, "grad_norm": 1.1178979873657227, "learning_rate": 6.4866395024896335e-06, "loss": 0.586, "step": 4196 }, { "epoch": 0.6259741228233715, "grad_norm": 1.1309788227081299, "learning_rate": 6.4821162057514635e-06, "loss": 0.5963, "step": 4197 }, { "epoch": 0.6261232708154666, "grad_norm": 1.0661935806274414, "learning_rate": 6.477593730285821e-06, "loss": 0.5655, "step": 4198 }, { "epoch": 0.6262724188075618, "grad_norm": 1.237443208694458, "learning_rate": 6.4730720771485104e-06, "loss": 0.6499, "step": 4199 }, { "epoch": 0.626421566799657, "grad_norm": 1.0024603605270386, "learning_rate": 6.468551247395136e-06, "loss": 0.5037, "step": 4200 }, { "epoch": 0.6265707147917521, "grad_norm": 1.1652617454528809, "learning_rate": 6.464031242081114e-06, "loss": 0.5967, "step": 4201 }, { "epoch": 0.6267198627838473, "grad_norm": 1.158070683479309, "learning_rate": 6.459512062261674e-06, "loss": 0.6262, "step": 4202 }, { "epoch": 0.6268690107759425, "grad_norm": 1.4067840576171875, "learning_rate": 6.4549937089918464e-06, "loss": 0.6328, "step": 4203 }, { "epoch": 0.6270181587680376, "grad_norm": 1.1754347085952759, "learning_rate": 6.450476183326466e-06, "loss": 0.6017, "step": 4204 }, { "epoch": 0.6271673067601328, "grad_norm": 1.250186800956726, "learning_rate": 6.445959486320184e-06, "loss": 0.6084, "step": 4205 }, { "epoch": 0.6273164547522279, "grad_norm": 1.1281884908676147, "learning_rate": 6.441443619027445e-06, "loss": 0.6186, "step": 4206 }, { "epoch": 0.6274656027443231, "grad_norm": 1.0631994009017944, "learning_rate": 6.4369285825025115e-06, "loss": 0.5137, "step": 4207 }, { "epoch": 0.6276147507364183, "grad_norm": 1.1622523069381714, "learning_rate": 6.432414377799449e-06, "loss": 0.6655, "step": 4208 }, { "epoch": 0.6277638987285133, "grad_norm": 1.1254209280014038, "learning_rate": 6.4279010059721194e-06, "loss": 0.5503, "step": 4209 }, { "epoch": 0.6279130467206085, "grad_norm": 1.0588116645812988, "learning_rate": 6.423388468074207e-06, "loss": 0.5847, "step": 4210 }, { "epoch": 0.6280621947127036, "grad_norm": 1.1016252040863037, "learning_rate": 6.418876765159195e-06, "loss": 0.5601, "step": 4211 }, { "epoch": 0.6282113427047988, "grad_norm": 1.0910134315490723, "learning_rate": 6.414365898280362e-06, "loss": 0.6099, "step": 4212 }, { "epoch": 0.628360490696894, "grad_norm": 1.1509674787521362, "learning_rate": 6.409855868490799e-06, "loss": 0.5862, "step": 4213 }, { "epoch": 0.6285096386889891, "grad_norm": 1.1466894149780273, "learning_rate": 6.405346676843406e-06, "loss": 0.5963, "step": 4214 }, { "epoch": 0.6286587866810843, "grad_norm": 1.0832338333129883, "learning_rate": 6.400838324390878e-06, "loss": 0.5454, "step": 4215 }, { "epoch": 0.6288079346731794, "grad_norm": 1.2041187286376953, "learning_rate": 6.3963308121857234e-06, "loss": 0.6228, "step": 4216 }, { "epoch": 0.6289570826652746, "grad_norm": 1.1370872259140015, "learning_rate": 6.391824141280247e-06, "loss": 0.5668, "step": 4217 }, { "epoch": 0.6291062306573698, "grad_norm": 1.211699366569519, "learning_rate": 6.387318312726558e-06, "loss": 0.6559, "step": 4218 }, { "epoch": 0.6292553786494649, "grad_norm": 1.18429696559906, "learning_rate": 6.382813327576574e-06, "loss": 0.5835, "step": 4219 }, { "epoch": 0.6294045266415601, "grad_norm": 1.249714732170105, "learning_rate": 6.378309186882016e-06, "loss": 0.6109, "step": 4220 }, { "epoch": 0.6295536746336552, "grad_norm": 1.1834440231323242, "learning_rate": 6.373805891694398e-06, "loss": 0.5631, "step": 4221 }, { "epoch": 0.6297028226257504, "grad_norm": 1.2193135023117065, "learning_rate": 6.369303443065047e-06, "loss": 0.634, "step": 4222 }, { "epoch": 0.6298519706178456, "grad_norm": 1.08097505569458, "learning_rate": 6.364801842045088e-06, "loss": 0.5635, "step": 4223 }, { "epoch": 0.6300011186099407, "grad_norm": 1.1487908363342285, "learning_rate": 6.360301089685445e-06, "loss": 0.5728, "step": 4224 }, { "epoch": 0.6301502666020359, "grad_norm": 1.1769016981124878, "learning_rate": 6.355801187036854e-06, "loss": 0.6313, "step": 4225 }, { "epoch": 0.6302994145941311, "grad_norm": 1.1913657188415527, "learning_rate": 6.3513021351498404e-06, "loss": 0.5827, "step": 4226 }, { "epoch": 0.6304485625862262, "grad_norm": 1.1520053148269653, "learning_rate": 6.346803935074737e-06, "loss": 0.5348, "step": 4227 }, { "epoch": 0.6305977105783214, "grad_norm": 1.2139596939086914, "learning_rate": 6.342306587861683e-06, "loss": 0.5855, "step": 4228 }, { "epoch": 0.6307468585704165, "grad_norm": 1.2148988246917725, "learning_rate": 6.337810094560609e-06, "loss": 0.6278, "step": 4229 }, { "epoch": 0.6308960065625117, "grad_norm": 1.098937749862671, "learning_rate": 6.333314456221249e-06, "loss": 0.6197, "step": 4230 }, { "epoch": 0.6310451545546069, "grad_norm": 1.071350336074829, "learning_rate": 6.328819673893143e-06, "loss": 0.5803, "step": 4231 }, { "epoch": 0.631194302546702, "grad_norm": 1.1280704736709595, "learning_rate": 6.324325748625619e-06, "loss": 0.609, "step": 4232 }, { "epoch": 0.6313434505387971, "grad_norm": 1.146059274673462, "learning_rate": 6.3198326814678225e-06, "loss": 0.5844, "step": 4233 }, { "epoch": 0.6314925985308922, "grad_norm": 1.1338534355163574, "learning_rate": 6.31534047346868e-06, "loss": 0.6698, "step": 4234 }, { "epoch": 0.6316417465229874, "grad_norm": 1.0910754203796387, "learning_rate": 6.310849125676934e-06, "loss": 0.6314, "step": 4235 }, { "epoch": 0.6317908945150826, "grad_norm": 1.1973451375961304, "learning_rate": 6.306358639141109e-06, "loss": 0.5956, "step": 4236 }, { "epoch": 0.6319400425071777, "grad_norm": 1.2311497926712036, "learning_rate": 6.301869014909548e-06, "loss": 0.6964, "step": 4237 }, { "epoch": 0.6320891904992729, "grad_norm": 1.0521860122680664, "learning_rate": 6.297380254030376e-06, "loss": 0.4808, "step": 4238 }, { "epoch": 0.632238338491368, "grad_norm": 1.1500927209854126, "learning_rate": 6.292892357551527e-06, "loss": 0.5628, "step": 4239 }, { "epoch": 0.6323874864834632, "grad_norm": 1.17449152469635, "learning_rate": 6.288405326520726e-06, "loss": 0.6401, "step": 4240 }, { "epoch": 0.6325366344755584, "grad_norm": 1.1867114305496216, "learning_rate": 6.283919161985501e-06, "loss": 0.5071, "step": 4241 }, { "epoch": 0.6326857824676535, "grad_norm": 1.145133137702942, "learning_rate": 6.279433864993176e-06, "loss": 0.5771, "step": 4242 }, { "epoch": 0.6328349304597487, "grad_norm": 1.1437534093856812, "learning_rate": 6.274949436590869e-06, "loss": 0.6287, "step": 4243 }, { "epoch": 0.6329840784518438, "grad_norm": 1.180616855621338, "learning_rate": 6.2704658778255e-06, "loss": 0.6037, "step": 4244 }, { "epoch": 0.633133226443939, "grad_norm": 1.0649313926696777, "learning_rate": 6.2659831897437895e-06, "loss": 0.5834, "step": 4245 }, { "epoch": 0.6332823744360342, "grad_norm": 1.2374104261398315, "learning_rate": 6.261501373392245e-06, "loss": 0.5511, "step": 4246 }, { "epoch": 0.6334315224281293, "grad_norm": 1.223200798034668, "learning_rate": 6.257020429817177e-06, "loss": 0.6178, "step": 4247 }, { "epoch": 0.6335806704202245, "grad_norm": 1.133970856666565, "learning_rate": 6.252540360064689e-06, "loss": 0.6149, "step": 4248 }, { "epoch": 0.6337298184123197, "grad_norm": 1.1683799028396606, "learning_rate": 6.248061165180682e-06, "loss": 0.5818, "step": 4249 }, { "epoch": 0.6338789664044148, "grad_norm": 1.224802017211914, "learning_rate": 6.243582846210856e-06, "loss": 0.6811, "step": 4250 }, { "epoch": 0.63402811439651, "grad_norm": 1.1752634048461914, "learning_rate": 6.239105404200698e-06, "loss": 0.6051, "step": 4251 }, { "epoch": 0.6341772623886051, "grad_norm": 1.0827823877334595, "learning_rate": 6.2346288401955e-06, "loss": 0.6017, "step": 4252 }, { "epoch": 0.6343264103807003, "grad_norm": 1.1317812204360962, "learning_rate": 6.230153155240339e-06, "loss": 0.5719, "step": 4253 }, { "epoch": 0.6344755583727955, "grad_norm": 1.2362850904464722, "learning_rate": 6.225678350380102e-06, "loss": 0.652, "step": 4254 }, { "epoch": 0.6346247063648905, "grad_norm": 1.1278860569000244, "learning_rate": 6.221204426659452e-06, "loss": 0.5532, "step": 4255 }, { "epoch": 0.6347738543569857, "grad_norm": 1.224298357963562, "learning_rate": 6.21673138512286e-06, "loss": 0.5853, "step": 4256 }, { "epoch": 0.6349230023490808, "grad_norm": 1.1008528470993042, "learning_rate": 6.212259226814583e-06, "loss": 0.4865, "step": 4257 }, { "epoch": 0.635072150341176, "grad_norm": 1.1359153985977173, "learning_rate": 6.207787952778679e-06, "loss": 0.632, "step": 4258 }, { "epoch": 0.6352212983332712, "grad_norm": 1.211444616317749, "learning_rate": 6.203317564058993e-06, "loss": 0.6051, "step": 4259 }, { "epoch": 0.6353704463253663, "grad_norm": 1.2036323547363281, "learning_rate": 6.1988480616991635e-06, "loss": 0.5661, "step": 4260 }, { "epoch": 0.6355195943174615, "grad_norm": 1.0998445749282837, "learning_rate": 6.19437944674263e-06, "loss": 0.5245, "step": 4261 }, { "epoch": 0.6356687423095566, "grad_norm": 1.025225281715393, "learning_rate": 6.189911720232612e-06, "loss": 0.5618, "step": 4262 }, { "epoch": 0.6358178903016518, "grad_norm": 1.0831985473632812, "learning_rate": 6.185444883212135e-06, "loss": 0.6305, "step": 4263 }, { "epoch": 0.635967038293747, "grad_norm": 1.1452654600143433, "learning_rate": 6.180978936724011e-06, "loss": 0.5531, "step": 4264 }, { "epoch": 0.6361161862858421, "grad_norm": 1.0964947938919067, "learning_rate": 6.176513881810844e-06, "loss": 0.5779, "step": 4265 }, { "epoch": 0.6362653342779373, "grad_norm": 1.1076241731643677, "learning_rate": 6.172049719515023e-06, "loss": 0.5714, "step": 4266 }, { "epoch": 0.6364144822700324, "grad_norm": 1.0952494144439697, "learning_rate": 6.167586450878743e-06, "loss": 0.4994, "step": 4267 }, { "epoch": 0.6365636302621276, "grad_norm": 1.1801142692565918, "learning_rate": 6.163124076943978e-06, "loss": 0.6751, "step": 4268 }, { "epoch": 0.6367127782542228, "grad_norm": 1.0924152135849, "learning_rate": 6.158662598752501e-06, "loss": 0.563, "step": 4269 }, { "epoch": 0.6368619262463179, "grad_norm": 1.2074286937713623, "learning_rate": 6.154202017345872e-06, "loss": 0.5529, "step": 4270 }, { "epoch": 0.6370110742384131, "grad_norm": 1.144215703010559, "learning_rate": 6.1497423337654365e-06, "loss": 0.5715, "step": 4271 }, { "epoch": 0.6371602222305082, "grad_norm": 1.146043062210083, "learning_rate": 6.145283549052342e-06, "loss": 0.5531, "step": 4272 }, { "epoch": 0.6373093702226034, "grad_norm": 1.2444308996200562, "learning_rate": 6.140825664247523e-06, "loss": 0.7306, "step": 4273 }, { "epoch": 0.6374585182146986, "grad_norm": 1.2859879732131958, "learning_rate": 6.136368680391695e-06, "loss": 0.6536, "step": 4274 }, { "epoch": 0.6376076662067937, "grad_norm": 1.241695761680603, "learning_rate": 6.1319125985253754e-06, "loss": 0.6714, "step": 4275 }, { "epoch": 0.6377568141988889, "grad_norm": 1.1798995733261108, "learning_rate": 6.1274574196888606e-06, "loss": 0.6452, "step": 4276 }, { "epoch": 0.637905962190984, "grad_norm": 1.0188349485397339, "learning_rate": 6.123003144922242e-06, "loss": 0.5304, "step": 4277 }, { "epoch": 0.6380551101830791, "grad_norm": 1.0988917350769043, "learning_rate": 6.1185497752654e-06, "loss": 0.6431, "step": 4278 }, { "epoch": 0.6382042581751743, "grad_norm": 1.1353845596313477, "learning_rate": 6.114097311757996e-06, "loss": 0.6583, "step": 4279 }, { "epoch": 0.6383534061672694, "grad_norm": 0.8614224791526794, "learning_rate": 6.109645755439495e-06, "loss": 0.6591, "step": 4280 }, { "epoch": 0.6385025541593646, "grad_norm": 1.2512317895889282, "learning_rate": 6.105195107349137e-06, "loss": 0.6302, "step": 4281 }, { "epoch": 0.6386517021514598, "grad_norm": 1.0820077657699585, "learning_rate": 6.100745368525955e-06, "loss": 0.5355, "step": 4282 }, { "epoch": 0.6388008501435549, "grad_norm": 1.1078770160675049, "learning_rate": 6.09629654000877e-06, "loss": 0.6406, "step": 4283 }, { "epoch": 0.6389499981356501, "grad_norm": 1.149470329284668, "learning_rate": 6.091848622836187e-06, "loss": 0.5344, "step": 4284 }, { "epoch": 0.6390991461277452, "grad_norm": 1.252278208732605, "learning_rate": 6.087401618046602e-06, "loss": 0.6412, "step": 4285 }, { "epoch": 0.6392482941198404, "grad_norm": 1.1070407629013062, "learning_rate": 6.082955526678199e-06, "loss": 0.4696, "step": 4286 }, { "epoch": 0.6393974421119356, "grad_norm": 1.1477299928665161, "learning_rate": 6.078510349768942e-06, "loss": 0.6258, "step": 4287 }, { "epoch": 0.6395465901040307, "grad_norm": 1.2290700674057007, "learning_rate": 6.074066088356587e-06, "loss": 0.613, "step": 4288 }, { "epoch": 0.6396957380961259, "grad_norm": 1.1571074724197388, "learning_rate": 6.069622743478681e-06, "loss": 0.5454, "step": 4289 }, { "epoch": 0.639844886088221, "grad_norm": 1.0566142797470093, "learning_rate": 6.065180316172547e-06, "loss": 0.5087, "step": 4290 }, { "epoch": 0.6399940340803162, "grad_norm": 1.078545093536377, "learning_rate": 6.060738807475295e-06, "loss": 0.5343, "step": 4291 }, { "epoch": 0.6401431820724114, "grad_norm": 1.0220768451690674, "learning_rate": 6.056298218423831e-06, "loss": 0.6135, "step": 4292 }, { "epoch": 0.6402923300645065, "grad_norm": 0.8754919171333313, "learning_rate": 6.051858550054832e-06, "loss": 0.635, "step": 4293 }, { "epoch": 0.6404414780566017, "grad_norm": 1.2517526149749756, "learning_rate": 6.047419803404772e-06, "loss": 0.6136, "step": 4294 }, { "epoch": 0.6405906260486968, "grad_norm": 1.136526346206665, "learning_rate": 6.042981979509904e-06, "loss": 0.5507, "step": 4295 }, { "epoch": 0.640739774040792, "grad_norm": 0.83910071849823, "learning_rate": 6.038545079406264e-06, "loss": 0.6645, "step": 4296 }, { "epoch": 0.6408889220328872, "grad_norm": 1.146599531173706, "learning_rate": 6.034109104129673e-06, "loss": 0.5343, "step": 4297 }, { "epoch": 0.6410380700249823, "grad_norm": 1.240498423576355, "learning_rate": 6.029674054715744e-06, "loss": 0.6252, "step": 4298 }, { "epoch": 0.6411872180170775, "grad_norm": 1.3548674583435059, "learning_rate": 6.025239932199864e-06, "loss": 0.6473, "step": 4299 }, { "epoch": 0.6413363660091727, "grad_norm": 1.1593854427337646, "learning_rate": 6.020806737617211e-06, "loss": 0.5868, "step": 4300 }, { "epoch": 0.6414855140012677, "grad_norm": 1.234110713005066, "learning_rate": 6.016374472002739e-06, "loss": 0.6444, "step": 4301 }, { "epoch": 0.6416346619933629, "grad_norm": 1.1142460107803345, "learning_rate": 6.0119431363911875e-06, "loss": 0.5848, "step": 4302 }, { "epoch": 0.641783809985458, "grad_norm": 1.1131653785705566, "learning_rate": 6.007512731817085e-06, "loss": 0.5941, "step": 4303 }, { "epoch": 0.6419329579775532, "grad_norm": 1.1739295721054077, "learning_rate": 6.0030832593147326e-06, "loss": 0.5865, "step": 4304 }, { "epoch": 0.6420821059696484, "grad_norm": 1.1090158224105835, "learning_rate": 5.998654719918223e-06, "loss": 0.664, "step": 4305 }, { "epoch": 0.6422312539617435, "grad_norm": 1.0235313177108765, "learning_rate": 5.994227114661423e-06, "loss": 0.4893, "step": 4306 }, { "epoch": 0.6423804019538387, "grad_norm": 1.1069759130477905, "learning_rate": 5.989800444577991e-06, "loss": 0.5822, "step": 4307 }, { "epoch": 0.6425295499459338, "grad_norm": 1.283277988433838, "learning_rate": 5.985374710701358e-06, "loss": 0.6828, "step": 4308 }, { "epoch": 0.642678697938029, "grad_norm": 1.190895676612854, "learning_rate": 5.980949914064742e-06, "loss": 0.5957, "step": 4309 }, { "epoch": 0.6428278459301242, "grad_norm": 1.0434857606887817, "learning_rate": 5.976526055701137e-06, "loss": 0.5595, "step": 4310 }, { "epoch": 0.6429769939222193, "grad_norm": 1.2765274047851562, "learning_rate": 5.972103136643326e-06, "loss": 0.6427, "step": 4311 }, { "epoch": 0.6431261419143145, "grad_norm": 1.209745168685913, "learning_rate": 5.967681157923864e-06, "loss": 0.5805, "step": 4312 }, { "epoch": 0.6432752899064096, "grad_norm": 1.2943834066390991, "learning_rate": 5.963260120575089e-06, "loss": 0.5878, "step": 4313 }, { "epoch": 0.6434244378985048, "grad_norm": 1.2930961847305298, "learning_rate": 5.9588400256291204e-06, "loss": 0.6113, "step": 4314 }, { "epoch": 0.6435735858906, "grad_norm": 1.1370503902435303, "learning_rate": 5.954420874117864e-06, "loss": 0.6216, "step": 4315 }, { "epoch": 0.6437227338826951, "grad_norm": 1.1539653539657593, "learning_rate": 5.950002667072994e-06, "loss": 0.6193, "step": 4316 }, { "epoch": 0.6438718818747903, "grad_norm": 1.1230820417404175, "learning_rate": 5.945585405525971e-06, "loss": 0.6293, "step": 4317 }, { "epoch": 0.6440210298668854, "grad_norm": 1.2556356191635132, "learning_rate": 5.941169090508032e-06, "loss": 0.6767, "step": 4318 }, { "epoch": 0.6441701778589806, "grad_norm": 1.1768821477890015, "learning_rate": 5.936753723050192e-06, "loss": 0.6205, "step": 4319 }, { "epoch": 0.6443193258510758, "grad_norm": 0.8993271589279175, "learning_rate": 5.932339304183251e-06, "loss": 0.6683, "step": 4320 }, { "epoch": 0.6444684738431709, "grad_norm": 1.191219449043274, "learning_rate": 5.92792583493778e-06, "loss": 0.6094, "step": 4321 }, { "epoch": 0.644617621835266, "grad_norm": 1.197219729423523, "learning_rate": 5.923513316344135e-06, "loss": 0.6031, "step": 4322 }, { "epoch": 0.6447667698273613, "grad_norm": 1.0357521772384644, "learning_rate": 5.919101749432441e-06, "loss": 0.5169, "step": 4323 }, { "epoch": 0.6449159178194563, "grad_norm": 1.2523280382156372, "learning_rate": 5.914691135232613e-06, "loss": 0.6173, "step": 4324 }, { "epoch": 0.6450650658115515, "grad_norm": 1.1612790822982788, "learning_rate": 5.910281474774335e-06, "loss": 0.5691, "step": 4325 }, { "epoch": 0.6452142138036466, "grad_norm": 1.2217786312103271, "learning_rate": 5.905872769087071e-06, "loss": 0.6392, "step": 4326 }, { "epoch": 0.6453633617957418, "grad_norm": 1.2139338254928589, "learning_rate": 5.901465019200059e-06, "loss": 0.6228, "step": 4327 }, { "epoch": 0.645512509787837, "grad_norm": 1.1334593296051025, "learning_rate": 5.897058226142321e-06, "loss": 0.5424, "step": 4328 }, { "epoch": 0.6456616577799321, "grad_norm": 1.1986563205718994, "learning_rate": 5.892652390942645e-06, "loss": 0.6172, "step": 4329 }, { "epoch": 0.6458108057720273, "grad_norm": 1.0990105867385864, "learning_rate": 5.888247514629607e-06, "loss": 0.6064, "step": 4330 }, { "epoch": 0.6459599537641224, "grad_norm": 1.0943466424942017, "learning_rate": 5.883843598231551e-06, "loss": 0.5361, "step": 4331 }, { "epoch": 0.6461091017562176, "grad_norm": 1.2717018127441406, "learning_rate": 5.879440642776597e-06, "loss": 0.5775, "step": 4332 }, { "epoch": 0.6462582497483128, "grad_norm": 1.1758239269256592, "learning_rate": 5.875038649292648e-06, "loss": 0.5294, "step": 4333 }, { "epoch": 0.6464073977404079, "grad_norm": 1.1507349014282227, "learning_rate": 5.8706376188073775e-06, "loss": 0.5075, "step": 4334 }, { "epoch": 0.6465565457325031, "grad_norm": 1.1365299224853516, "learning_rate": 5.866237552348231e-06, "loss": 0.5863, "step": 4335 }, { "epoch": 0.6467056937245982, "grad_norm": 1.105562448501587, "learning_rate": 5.861838450942434e-06, "loss": 0.6286, "step": 4336 }, { "epoch": 0.6468548417166934, "grad_norm": 1.2766180038452148, "learning_rate": 5.857440315616987e-06, "loss": 0.6373, "step": 4337 }, { "epoch": 0.6470039897087886, "grad_norm": 1.103217363357544, "learning_rate": 5.853043147398656e-06, "loss": 0.5663, "step": 4338 }, { "epoch": 0.6471531377008837, "grad_norm": 1.038797378540039, "learning_rate": 5.848646947313996e-06, "loss": 0.5323, "step": 4339 }, { "epoch": 0.6473022856929789, "grad_norm": 1.1418856382369995, "learning_rate": 5.844251716389324e-06, "loss": 0.5795, "step": 4340 }, { "epoch": 0.647451433685074, "grad_norm": 0.8807725310325623, "learning_rate": 5.839857455650732e-06, "loss": 0.6767, "step": 4341 }, { "epoch": 0.6476005816771692, "grad_norm": 1.1574450731277466, "learning_rate": 5.835464166124096e-06, "loss": 0.5984, "step": 4342 }, { "epoch": 0.6477497296692644, "grad_norm": 1.1842210292816162, "learning_rate": 5.831071848835053e-06, "loss": 0.5752, "step": 4343 }, { "epoch": 0.6478988776613595, "grad_norm": 1.2988605499267578, "learning_rate": 5.8266805048090216e-06, "loss": 0.7016, "step": 4344 }, { "epoch": 0.6480480256534547, "grad_norm": 0.8109025955200195, "learning_rate": 5.82229013507118e-06, "loss": 0.594, "step": 4345 }, { "epoch": 0.6481971736455499, "grad_norm": 1.0413521528244019, "learning_rate": 5.817900740646496e-06, "loss": 0.57, "step": 4346 }, { "epoch": 0.6483463216376449, "grad_norm": 1.1454757452011108, "learning_rate": 5.813512322559699e-06, "loss": 0.5729, "step": 4347 }, { "epoch": 0.6484954696297401, "grad_norm": 1.1363998651504517, "learning_rate": 5.809124881835299e-06, "loss": 0.5703, "step": 4348 }, { "epoch": 0.6486446176218352, "grad_norm": 1.131221890449524, "learning_rate": 5.804738419497558e-06, "loss": 0.597, "step": 4349 }, { "epoch": 0.6487937656139304, "grad_norm": 1.1656506061553955, "learning_rate": 5.800352936570543e-06, "loss": 0.5989, "step": 4350 }, { "epoch": 0.6489429136060256, "grad_norm": 1.0529392957687378, "learning_rate": 5.795968434078059e-06, "loss": 0.5593, "step": 4351 }, { "epoch": 0.6490920615981207, "grad_norm": 1.1509709358215332, "learning_rate": 5.791584913043699e-06, "loss": 0.615, "step": 4352 }, { "epoch": 0.6492412095902159, "grad_norm": 1.2225569486618042, "learning_rate": 5.787202374490826e-06, "loss": 0.598, "step": 4353 }, { "epoch": 0.649390357582311, "grad_norm": 1.2330580949783325, "learning_rate": 5.782820819442576e-06, "loss": 0.5694, "step": 4354 }, { "epoch": 0.6495395055744062, "grad_norm": 1.2596427202224731, "learning_rate": 5.778440248921842e-06, "loss": 0.6583, "step": 4355 }, { "epoch": 0.6496886535665014, "grad_norm": 1.203565239906311, "learning_rate": 5.7740606639513e-06, "loss": 0.6239, "step": 4356 }, { "epoch": 0.6498378015585965, "grad_norm": 1.107398271560669, "learning_rate": 5.7696820655533984e-06, "loss": 0.58, "step": 4357 }, { "epoch": 0.6499869495506917, "grad_norm": 1.173837423324585, "learning_rate": 5.765304454750333e-06, "loss": 0.6582, "step": 4358 }, { "epoch": 0.6501360975427868, "grad_norm": 1.1938506364822388, "learning_rate": 5.760927832564103e-06, "loss": 0.6335, "step": 4359 }, { "epoch": 0.650285245534882, "grad_norm": 1.1226588487625122, "learning_rate": 5.756552200016454e-06, "loss": 0.5457, "step": 4360 }, { "epoch": 0.6504343935269772, "grad_norm": 1.0876660346984863, "learning_rate": 5.752177558128899e-06, "loss": 0.5659, "step": 4361 }, { "epoch": 0.6505835415190723, "grad_norm": 0.9672759175300598, "learning_rate": 5.74780390792273e-06, "loss": 0.506, "step": 4362 }, { "epoch": 0.6507326895111675, "grad_norm": 1.0881586074829102, "learning_rate": 5.743431250419007e-06, "loss": 0.6307, "step": 4363 }, { "epoch": 0.6508818375032626, "grad_norm": 1.28812837600708, "learning_rate": 5.73905958663855e-06, "loss": 0.6658, "step": 4364 }, { "epoch": 0.6510309854953578, "grad_norm": 1.099823236465454, "learning_rate": 5.734688917601952e-06, "loss": 0.5801, "step": 4365 }, { "epoch": 0.651180133487453, "grad_norm": 1.1203523874282837, "learning_rate": 5.7303192443295805e-06, "loss": 0.5398, "step": 4366 }, { "epoch": 0.651329281479548, "grad_norm": 1.1549559831619263, "learning_rate": 5.725950567841552e-06, "loss": 0.5918, "step": 4367 }, { "epoch": 0.6514784294716433, "grad_norm": 1.0919415950775146, "learning_rate": 5.7215828891577705e-06, "loss": 0.5922, "step": 4368 }, { "epoch": 0.6516275774637384, "grad_norm": 1.2694722414016724, "learning_rate": 5.717216209297902e-06, "loss": 0.6501, "step": 4369 }, { "epoch": 0.6517767254558335, "grad_norm": 0.8938738703727722, "learning_rate": 5.712850529281366e-06, "loss": 0.6772, "step": 4370 }, { "epoch": 0.6519258734479287, "grad_norm": 1.1835752725601196, "learning_rate": 5.708485850127365e-06, "loss": 0.5691, "step": 4371 }, { "epoch": 0.6520750214400238, "grad_norm": 1.0841566324234009, "learning_rate": 5.704122172854863e-06, "loss": 0.5617, "step": 4372 }, { "epoch": 0.652224169432119, "grad_norm": 1.1540184020996094, "learning_rate": 5.6997594984825795e-06, "loss": 0.5385, "step": 4373 }, { "epoch": 0.6523733174242142, "grad_norm": 1.4050647020339966, "learning_rate": 5.695397828029016e-06, "loss": 0.642, "step": 4374 }, { "epoch": 0.6525224654163093, "grad_norm": 1.193285584449768, "learning_rate": 5.69103716251243e-06, "loss": 0.6075, "step": 4375 }, { "epoch": 0.6526716134084045, "grad_norm": 0.8413925766944885, "learning_rate": 5.686677502950848e-06, "loss": 0.644, "step": 4376 }, { "epoch": 0.6528207614004996, "grad_norm": 1.1921428442001343, "learning_rate": 5.682318850362061e-06, "loss": 0.6037, "step": 4377 }, { "epoch": 0.6529699093925948, "grad_norm": 1.1355417966842651, "learning_rate": 5.677961205763626e-06, "loss": 0.6278, "step": 4378 }, { "epoch": 0.65311905738469, "grad_norm": 1.1136194467544556, "learning_rate": 5.673604570172857e-06, "loss": 0.5826, "step": 4379 }, { "epoch": 0.6532682053767851, "grad_norm": 1.0884215831756592, "learning_rate": 5.669248944606842e-06, "loss": 0.6274, "step": 4380 }, { "epoch": 0.6534173533688803, "grad_norm": 0.8220637440681458, "learning_rate": 5.66489433008243e-06, "loss": 0.6288, "step": 4381 }, { "epoch": 0.6535665013609754, "grad_norm": 1.1734261512756348, "learning_rate": 5.660540727616237e-06, "loss": 0.6253, "step": 4382 }, { "epoch": 0.6537156493530706, "grad_norm": 1.243483543395996, "learning_rate": 5.656188138224633e-06, "loss": 0.6419, "step": 4383 }, { "epoch": 0.6538647973451658, "grad_norm": 1.2784051895141602, "learning_rate": 5.651836562923761e-06, "loss": 0.6006, "step": 4384 }, { "epoch": 0.6540139453372609, "grad_norm": 1.1346310377120972, "learning_rate": 5.647486002729523e-06, "loss": 0.6103, "step": 4385 }, { "epoch": 0.6541630933293561, "grad_norm": 1.2233997583389282, "learning_rate": 5.643136458657586e-06, "loss": 0.5719, "step": 4386 }, { "epoch": 0.6543122413214512, "grad_norm": 1.1061698198318481, "learning_rate": 5.638787931723379e-06, "loss": 0.5818, "step": 4387 }, { "epoch": 0.6544613893135464, "grad_norm": 1.0303951501846313, "learning_rate": 5.634440422942098e-06, "loss": 0.4761, "step": 4388 }, { "epoch": 0.6546105373056416, "grad_norm": 1.1810805797576904, "learning_rate": 5.630093933328688e-06, "loss": 0.5705, "step": 4389 }, { "epoch": 0.6547596852977366, "grad_norm": 1.1064696311950684, "learning_rate": 5.625748463897871e-06, "loss": 0.5779, "step": 4390 }, { "epoch": 0.6549088332898318, "grad_norm": 1.3161470890045166, "learning_rate": 5.621404015664125e-06, "loss": 0.6573, "step": 4391 }, { "epoch": 0.6550579812819269, "grad_norm": 1.1786038875579834, "learning_rate": 5.617060589641685e-06, "loss": 0.5813, "step": 4392 }, { "epoch": 0.6552071292740221, "grad_norm": 1.204799771308899, "learning_rate": 5.612718186844548e-06, "loss": 0.5941, "step": 4393 }, { "epoch": 0.6553562772661173, "grad_norm": 1.0969277620315552, "learning_rate": 5.608376808286491e-06, "loss": 0.6175, "step": 4394 }, { "epoch": 0.6555054252582124, "grad_norm": 1.1591252088546753, "learning_rate": 5.604036454981024e-06, "loss": 0.611, "step": 4395 }, { "epoch": 0.6556545732503076, "grad_norm": 1.1743966341018677, "learning_rate": 5.599697127941432e-06, "loss": 0.567, "step": 4396 }, { "epoch": 0.6558037212424028, "grad_norm": 1.1965676546096802, "learning_rate": 5.5953588281807644e-06, "loss": 0.5856, "step": 4397 }, { "epoch": 0.6559528692344979, "grad_norm": 1.201110601425171, "learning_rate": 5.591021556711818e-06, "loss": 0.7254, "step": 4398 }, { "epoch": 0.6561020172265931, "grad_norm": 1.1213369369506836, "learning_rate": 5.586685314547159e-06, "loss": 0.6298, "step": 4399 }, { "epoch": 0.6562511652186882, "grad_norm": 1.0733122825622559, "learning_rate": 5.582350102699112e-06, "loss": 0.601, "step": 4400 }, { "epoch": 0.6564003132107834, "grad_norm": 1.164581537246704, "learning_rate": 5.578015922179764e-06, "loss": 0.5284, "step": 4401 }, { "epoch": 0.6565494612028786, "grad_norm": 1.0907400846481323, "learning_rate": 5.573682774000944e-06, "loss": 0.6244, "step": 4402 }, { "epoch": 0.6566986091949737, "grad_norm": 1.104278564453125, "learning_rate": 5.5693506591742705e-06, "loss": 0.5392, "step": 4403 }, { "epoch": 0.6568477571870689, "grad_norm": 0.919524610042572, "learning_rate": 5.5650195787110915e-06, "loss": 0.6542, "step": 4404 }, { "epoch": 0.656996905179164, "grad_norm": 1.0537225008010864, "learning_rate": 5.560689533622529e-06, "loss": 0.5558, "step": 4405 }, { "epoch": 0.6571460531712592, "grad_norm": 1.0582407712936401, "learning_rate": 5.55636052491946e-06, "loss": 0.6227, "step": 4406 }, { "epoch": 0.6572952011633544, "grad_norm": 1.2146327495574951, "learning_rate": 5.552032553612523e-06, "loss": 0.5983, "step": 4407 }, { "epoch": 0.6574443491554495, "grad_norm": 1.2369362115859985, "learning_rate": 5.547705620712103e-06, "loss": 0.6289, "step": 4408 }, { "epoch": 0.6575934971475447, "grad_norm": 1.148102879524231, "learning_rate": 5.543379727228354e-06, "loss": 0.5984, "step": 4409 }, { "epoch": 0.6577426451396398, "grad_norm": 1.0823092460632324, "learning_rate": 5.539054874171183e-06, "loss": 0.6182, "step": 4410 }, { "epoch": 0.657891793131735, "grad_norm": 1.057714819908142, "learning_rate": 5.534731062550257e-06, "loss": 0.5167, "step": 4411 }, { "epoch": 0.6580409411238302, "grad_norm": 1.092989206314087, "learning_rate": 5.530408293374995e-06, "loss": 0.5059, "step": 4412 }, { "epoch": 0.6581900891159252, "grad_norm": 1.201097011566162, "learning_rate": 5.526086567654581e-06, "loss": 0.585, "step": 4413 }, { "epoch": 0.6583392371080204, "grad_norm": 1.237109661102295, "learning_rate": 5.521765886397938e-06, "loss": 0.5672, "step": 4414 }, { "epoch": 0.6584883851001155, "grad_norm": 1.1643630266189575, "learning_rate": 5.517446250613766e-06, "loss": 0.6562, "step": 4415 }, { "epoch": 0.6586375330922107, "grad_norm": 1.0629860162734985, "learning_rate": 5.513127661310512e-06, "loss": 0.5645, "step": 4416 }, { "epoch": 0.6587866810843059, "grad_norm": 1.0849688053131104, "learning_rate": 5.508810119496372e-06, "loss": 0.542, "step": 4417 }, { "epoch": 0.658935829076401, "grad_norm": 1.2068934440612793, "learning_rate": 5.504493626179307e-06, "loss": 0.57, "step": 4418 }, { "epoch": 0.6590849770684962, "grad_norm": 1.177473783493042, "learning_rate": 5.5001781823670305e-06, "loss": 0.6074, "step": 4419 }, { "epoch": 0.6592341250605914, "grad_norm": 1.1491384506225586, "learning_rate": 5.4958637890670105e-06, "loss": 0.5873, "step": 4420 }, { "epoch": 0.6593832730526865, "grad_norm": 1.1546132564544678, "learning_rate": 5.491550447286469e-06, "loss": 0.5483, "step": 4421 }, { "epoch": 0.6595324210447817, "grad_norm": 1.1368640661239624, "learning_rate": 5.487238158032388e-06, "loss": 0.6285, "step": 4422 }, { "epoch": 0.6596815690368768, "grad_norm": 1.1371244192123413, "learning_rate": 5.482926922311491e-06, "loss": 0.5624, "step": 4423 }, { "epoch": 0.659830717028972, "grad_norm": 1.1156752109527588, "learning_rate": 5.478616741130269e-06, "loss": 0.4986, "step": 4424 }, { "epoch": 0.6599798650210672, "grad_norm": 1.279273271560669, "learning_rate": 5.474307615494958e-06, "loss": 0.6209, "step": 4425 }, { "epoch": 0.6601290130131623, "grad_norm": 1.176820993423462, "learning_rate": 5.469999546411557e-06, "loss": 0.6041, "step": 4426 }, { "epoch": 0.6602781610052575, "grad_norm": 1.1832551956176758, "learning_rate": 5.465692534885807e-06, "loss": 0.58, "step": 4427 }, { "epoch": 0.6604273089973526, "grad_norm": 1.2059966325759888, "learning_rate": 5.461386581923207e-06, "loss": 0.53, "step": 4428 }, { "epoch": 0.6605764569894478, "grad_norm": 1.2029027938842773, "learning_rate": 5.457081688529011e-06, "loss": 0.5557, "step": 4429 }, { "epoch": 0.660725604981543, "grad_norm": 1.183924913406372, "learning_rate": 5.452777855708224e-06, "loss": 0.5893, "step": 4430 }, { "epoch": 0.6608747529736381, "grad_norm": 1.2532116174697876, "learning_rate": 5.448475084465605e-06, "loss": 0.5854, "step": 4431 }, { "epoch": 0.6610239009657333, "grad_norm": 1.2913986444473267, "learning_rate": 5.4441733758056655e-06, "loss": 0.6641, "step": 4432 }, { "epoch": 0.6611730489578284, "grad_norm": 1.2286771535873413, "learning_rate": 5.439872730732659e-06, "loss": 0.6103, "step": 4433 }, { "epoch": 0.6613221969499236, "grad_norm": 1.1694142818450928, "learning_rate": 5.4355731502506035e-06, "loss": 0.6314, "step": 4434 }, { "epoch": 0.6614713449420188, "grad_norm": 1.1646559238433838, "learning_rate": 5.431274635363268e-06, "loss": 0.5895, "step": 4435 }, { "epoch": 0.6616204929341138, "grad_norm": 1.1110262870788574, "learning_rate": 5.426977187074158e-06, "loss": 0.5854, "step": 4436 }, { "epoch": 0.661769640926209, "grad_norm": 1.2091366052627563, "learning_rate": 5.422680806386544e-06, "loss": 0.6303, "step": 4437 }, { "epoch": 0.6619187889183041, "grad_norm": 1.2600061893463135, "learning_rate": 5.418385494303453e-06, "loss": 0.6547, "step": 4438 }, { "epoch": 0.6620679369103993, "grad_norm": 1.1721272468566895, "learning_rate": 5.414091251827642e-06, "loss": 0.5526, "step": 4439 }, { "epoch": 0.6622170849024945, "grad_norm": 1.0951157808303833, "learning_rate": 5.409798079961632e-06, "loss": 0.4862, "step": 4440 }, { "epoch": 0.6623662328945896, "grad_norm": 1.1646112203598022, "learning_rate": 5.405505979707698e-06, "loss": 0.6128, "step": 4441 }, { "epoch": 0.6625153808866848, "grad_norm": 1.195068597793579, "learning_rate": 5.401214952067849e-06, "loss": 0.6289, "step": 4442 }, { "epoch": 0.66266452887878, "grad_norm": 1.2310229539871216, "learning_rate": 5.396924998043858e-06, "loss": 0.5914, "step": 4443 }, { "epoch": 0.6628136768708751, "grad_norm": 1.1498205661773682, "learning_rate": 5.392636118637242e-06, "loss": 0.5832, "step": 4444 }, { "epoch": 0.6629628248629703, "grad_norm": 1.350528359413147, "learning_rate": 5.388348314849261e-06, "loss": 0.6909, "step": 4445 }, { "epoch": 0.6631119728550654, "grad_norm": 1.1314327716827393, "learning_rate": 5.38406158768094e-06, "loss": 0.6097, "step": 4446 }, { "epoch": 0.6632611208471606, "grad_norm": 1.1955586671829224, "learning_rate": 5.379775938133043e-06, "loss": 0.6778, "step": 4447 }, { "epoch": 0.6634102688392558, "grad_norm": 1.2506000995635986, "learning_rate": 5.375491367206074e-06, "loss": 0.5788, "step": 4448 }, { "epoch": 0.6635594168313509, "grad_norm": 1.1556490659713745, "learning_rate": 5.371207875900298e-06, "loss": 0.4905, "step": 4449 }, { "epoch": 0.6637085648234461, "grad_norm": 1.1464427709579468, "learning_rate": 5.366925465215728e-06, "loss": 0.6483, "step": 4450 }, { "epoch": 0.6638577128155412, "grad_norm": 1.062254548072815, "learning_rate": 5.362644136152111e-06, "loss": 0.6177, "step": 4451 }, { "epoch": 0.6640068608076364, "grad_norm": 1.1006489992141724, "learning_rate": 5.358363889708954e-06, "loss": 0.6167, "step": 4452 }, { "epoch": 0.6641560087997316, "grad_norm": 1.2999281883239746, "learning_rate": 5.354084726885511e-06, "loss": 0.6952, "step": 4453 }, { "epoch": 0.6643051567918267, "grad_norm": 1.1252400875091553, "learning_rate": 5.349806648680778e-06, "loss": 0.5806, "step": 4454 }, { "epoch": 0.6644543047839219, "grad_norm": 1.223066806793213, "learning_rate": 5.3455296560935e-06, "loss": 0.6429, "step": 4455 }, { "epoch": 0.664603452776017, "grad_norm": 1.2199599742889404, "learning_rate": 5.34125375012217e-06, "loss": 0.654, "step": 4456 }, { "epoch": 0.6647526007681122, "grad_norm": 1.0905555486679077, "learning_rate": 5.336978931765023e-06, "loss": 0.558, "step": 4457 }, { "epoch": 0.6649017487602074, "grad_norm": 1.166114091873169, "learning_rate": 5.332705202020043e-06, "loss": 0.6195, "step": 4458 }, { "epoch": 0.6650508967523024, "grad_norm": 1.0617932081222534, "learning_rate": 5.328432561884962e-06, "loss": 0.5861, "step": 4459 }, { "epoch": 0.6652000447443976, "grad_norm": 1.137660264968872, "learning_rate": 5.324161012357256e-06, "loss": 0.6094, "step": 4460 }, { "epoch": 0.6653491927364927, "grad_norm": 1.0711692571640015, "learning_rate": 5.31989055443414e-06, "loss": 0.5972, "step": 4461 }, { "epoch": 0.6654983407285879, "grad_norm": 1.1283260583877563, "learning_rate": 5.315621189112582e-06, "loss": 0.5743, "step": 4462 }, { "epoch": 0.6656474887206831, "grad_norm": 1.1823077201843262, "learning_rate": 5.3113529173892945e-06, "loss": 0.5668, "step": 4463 }, { "epoch": 0.6657966367127782, "grad_norm": 1.2132458686828613, "learning_rate": 5.307085740260731e-06, "loss": 0.5715, "step": 4464 }, { "epoch": 0.6659457847048734, "grad_norm": 1.2196952104568481, "learning_rate": 5.302819658723095e-06, "loss": 0.5989, "step": 4465 }, { "epoch": 0.6660949326969686, "grad_norm": 1.1903295516967773, "learning_rate": 5.29855467377233e-06, "loss": 0.62, "step": 4466 }, { "epoch": 0.6662440806890637, "grad_norm": 1.1232150793075562, "learning_rate": 5.294290786404119e-06, "loss": 0.622, "step": 4467 }, { "epoch": 0.6663932286811589, "grad_norm": 1.0468331575393677, "learning_rate": 5.290027997613898e-06, "loss": 0.6047, "step": 4468 }, { "epoch": 0.666542376673254, "grad_norm": 1.177526831626892, "learning_rate": 5.285766308396845e-06, "loss": 0.5623, "step": 4469 }, { "epoch": 0.6666915246653492, "grad_norm": 1.1874808073043823, "learning_rate": 5.28150571974787e-06, "loss": 0.5917, "step": 4470 }, { "epoch": 0.6668406726574444, "grad_norm": 1.1966017484664917, "learning_rate": 5.277246232661641e-06, "loss": 0.6271, "step": 4471 }, { "epoch": 0.6669898206495395, "grad_norm": 1.2017019987106323, "learning_rate": 5.272987848132562e-06, "loss": 0.5699, "step": 4472 }, { "epoch": 0.6671389686416347, "grad_norm": 1.0581754446029663, "learning_rate": 5.268730567154778e-06, "loss": 0.5465, "step": 4473 }, { "epoch": 0.6672881166337298, "grad_norm": 1.1238113641738892, "learning_rate": 5.264474390722181e-06, "loss": 0.6357, "step": 4474 }, { "epoch": 0.667437264625825, "grad_norm": 1.1360409259796143, "learning_rate": 5.260219319828405e-06, "loss": 0.5715, "step": 4475 }, { "epoch": 0.6675864126179202, "grad_norm": 1.1780048608779907, "learning_rate": 5.2559653554668184e-06, "loss": 0.6245, "step": 4476 }, { "epoch": 0.6677355606100153, "grad_norm": 1.302392601966858, "learning_rate": 5.251712498630537e-06, "loss": 0.6449, "step": 4477 }, { "epoch": 0.6678847086021105, "grad_norm": 1.1052616834640503, "learning_rate": 5.24746075031242e-06, "loss": 0.5602, "step": 4478 }, { "epoch": 0.6680338565942056, "grad_norm": 1.4252296686172485, "learning_rate": 5.243210111505068e-06, "loss": 0.7083, "step": 4479 }, { "epoch": 0.6681830045863008, "grad_norm": 1.1215037107467651, "learning_rate": 5.238960583200807e-06, "loss": 0.5501, "step": 4480 }, { "epoch": 0.668332152578396, "grad_norm": 1.103541374206543, "learning_rate": 5.234712166391735e-06, "loss": 0.6022, "step": 4481 }, { "epoch": 0.668481300570491, "grad_norm": 1.2176748514175415, "learning_rate": 5.230464862069658e-06, "loss": 0.5376, "step": 4482 }, { "epoch": 0.6686304485625862, "grad_norm": 1.16739022731781, "learning_rate": 5.226218671226142e-06, "loss": 0.6015, "step": 4483 }, { "epoch": 0.6687795965546813, "grad_norm": 1.217729091644287, "learning_rate": 5.221973594852488e-06, "loss": 0.5942, "step": 4484 }, { "epoch": 0.6689287445467765, "grad_norm": 1.26436448097229, "learning_rate": 5.217729633939737e-06, "loss": 0.6839, "step": 4485 }, { "epoch": 0.6690778925388717, "grad_norm": 1.2844364643096924, "learning_rate": 5.213486789478665e-06, "loss": 0.5993, "step": 4486 }, { "epoch": 0.6692270405309668, "grad_norm": 0.8491200804710388, "learning_rate": 5.209245062459791e-06, "loss": 0.6216, "step": 4487 }, { "epoch": 0.669376188523062, "grad_norm": 1.2078295946121216, "learning_rate": 5.205004453873381e-06, "loss": 0.5609, "step": 4488 }, { "epoch": 0.6695253365151571, "grad_norm": 1.151663064956665, "learning_rate": 5.2007649647094195e-06, "loss": 0.5925, "step": 4489 }, { "epoch": 0.6696744845072523, "grad_norm": 1.20433509349823, "learning_rate": 5.196526595957654e-06, "loss": 0.6413, "step": 4490 }, { "epoch": 0.6698236324993475, "grad_norm": 1.241039752960205, "learning_rate": 5.192289348607557e-06, "loss": 0.6014, "step": 4491 }, { "epoch": 0.6699727804914426, "grad_norm": 1.196219801902771, "learning_rate": 5.188053223648337e-06, "loss": 0.6338, "step": 4492 }, { "epoch": 0.6701219284835378, "grad_norm": 1.2101390361785889, "learning_rate": 5.183818222068944e-06, "loss": 0.5776, "step": 4493 }, { "epoch": 0.670271076475633, "grad_norm": 1.1327911615371704, "learning_rate": 5.179584344858074e-06, "loss": 0.5676, "step": 4494 }, { "epoch": 0.6704202244677281, "grad_norm": 1.1162092685699463, "learning_rate": 5.175351593004143e-06, "loss": 0.5493, "step": 4495 }, { "epoch": 0.6705693724598233, "grad_norm": 1.123409628868103, "learning_rate": 5.171119967495319e-06, "loss": 0.5956, "step": 4496 }, { "epoch": 0.6707185204519184, "grad_norm": 1.1001670360565186, "learning_rate": 5.1668894693195045e-06, "loss": 0.5704, "step": 4497 }, { "epoch": 0.6708676684440136, "grad_norm": 1.2049310207366943, "learning_rate": 5.162660099464327e-06, "loss": 0.6463, "step": 4498 }, { "epoch": 0.6710168164361088, "grad_norm": 1.2515294551849365, "learning_rate": 5.158431858917169e-06, "loss": 0.6838, "step": 4499 }, { "epoch": 0.6711659644282039, "grad_norm": 0.8488227725028992, "learning_rate": 5.1542047486651415e-06, "loss": 0.6546, "step": 4500 }, { "epoch": 0.6713151124202991, "grad_norm": 1.1854311227798462, "learning_rate": 5.149978769695084e-06, "loss": 0.6057, "step": 4501 }, { "epoch": 0.6714642604123942, "grad_norm": 1.1330771446228027, "learning_rate": 5.145753922993582e-06, "loss": 0.5521, "step": 4502 }, { "epoch": 0.6716134084044894, "grad_norm": 0.7964829802513123, "learning_rate": 5.141530209546954e-06, "loss": 0.5951, "step": 4503 }, { "epoch": 0.6717625563965846, "grad_norm": 1.2126924991607666, "learning_rate": 5.137307630341248e-06, "loss": 0.5955, "step": 4504 }, { "epoch": 0.6719117043886796, "grad_norm": 1.1678798198699951, "learning_rate": 5.133086186362257e-06, "loss": 0.5715, "step": 4505 }, { "epoch": 0.6720608523807748, "grad_norm": 1.3438255786895752, "learning_rate": 5.128865878595502e-06, "loss": 0.6477, "step": 4506 }, { "epoch": 0.6722100003728699, "grad_norm": 1.1484813690185547, "learning_rate": 5.124646708026241e-06, "loss": 0.5634, "step": 4507 }, { "epoch": 0.6723591483649651, "grad_norm": 1.0416024923324585, "learning_rate": 5.120428675639466e-06, "loss": 0.6144, "step": 4508 }, { "epoch": 0.6725082963570603, "grad_norm": 1.1691728830337524, "learning_rate": 5.116211782419911e-06, "loss": 0.4735, "step": 4509 }, { "epoch": 0.6726574443491554, "grad_norm": 1.2568752765655518, "learning_rate": 5.111996029352025e-06, "loss": 0.6265, "step": 4510 }, { "epoch": 0.6728065923412506, "grad_norm": 1.1012482643127441, "learning_rate": 5.107781417420008e-06, "loss": 0.482, "step": 4511 }, { "epoch": 0.6729557403333457, "grad_norm": 1.1966474056243896, "learning_rate": 5.103567947607788e-06, "loss": 0.6097, "step": 4512 }, { "epoch": 0.6731048883254409, "grad_norm": 1.0323171615600586, "learning_rate": 5.099355620899032e-06, "loss": 0.555, "step": 4513 }, { "epoch": 0.6732540363175361, "grad_norm": 1.236633062362671, "learning_rate": 5.095144438277124e-06, "loss": 0.5835, "step": 4514 }, { "epoch": 0.6734031843096312, "grad_norm": 1.197718620300293, "learning_rate": 5.090934400725194e-06, "loss": 0.6002, "step": 4515 }, { "epoch": 0.6735523323017264, "grad_norm": 1.166609525680542, "learning_rate": 5.086725509226111e-06, "loss": 0.6332, "step": 4516 }, { "epoch": 0.6737014802938216, "grad_norm": 1.102784514427185, "learning_rate": 5.08251776476246e-06, "loss": 0.6072, "step": 4517 }, { "epoch": 0.6738506282859167, "grad_norm": 1.1311932802200317, "learning_rate": 5.0783111683165676e-06, "loss": 0.5542, "step": 4518 }, { "epoch": 0.6739997762780119, "grad_norm": 1.2665385007858276, "learning_rate": 5.074105720870495e-06, "loss": 0.6112, "step": 4519 }, { "epoch": 0.674148924270107, "grad_norm": 0.8923628330230713, "learning_rate": 5.069901423406023e-06, "loss": 0.6417, "step": 4520 }, { "epoch": 0.6742980722622022, "grad_norm": 1.2330546379089355, "learning_rate": 5.065698276904676e-06, "loss": 0.5847, "step": 4521 }, { "epoch": 0.6744472202542974, "grad_norm": 1.1996500492095947, "learning_rate": 5.061496282347709e-06, "loss": 0.4857, "step": 4522 }, { "epoch": 0.6745963682463925, "grad_norm": 1.1844964027404785, "learning_rate": 5.0572954407160954e-06, "loss": 0.5639, "step": 4523 }, { "epoch": 0.6747455162384877, "grad_norm": 1.1045905351638794, "learning_rate": 5.0530957529905515e-06, "loss": 0.5435, "step": 4524 }, { "epoch": 0.6748946642305828, "grad_norm": 1.2402286529541016, "learning_rate": 5.048897220151532e-06, "loss": 0.6103, "step": 4525 }, { "epoch": 0.675043812222678, "grad_norm": 1.2683649063110352, "learning_rate": 5.044699843179197e-06, "loss": 0.6987, "step": 4526 }, { "epoch": 0.6751929602147732, "grad_norm": 1.0840331315994263, "learning_rate": 5.040503623053458e-06, "loss": 0.5135, "step": 4527 }, { "epoch": 0.6753421082068682, "grad_norm": 1.2006202936172485, "learning_rate": 5.036308560753955e-06, "loss": 0.6182, "step": 4528 }, { "epoch": 0.6754912561989634, "grad_norm": 1.1057122945785522, "learning_rate": 5.03211465726004e-06, "loss": 0.5352, "step": 4529 }, { "epoch": 0.6756404041910585, "grad_norm": 1.1277711391448975, "learning_rate": 5.027921913550813e-06, "loss": 0.638, "step": 4530 }, { "epoch": 0.6757895521831537, "grad_norm": 1.1061714887619019, "learning_rate": 5.023730330605095e-06, "loss": 0.5674, "step": 4531 }, { "epoch": 0.6759387001752489, "grad_norm": 1.1109873056411743, "learning_rate": 5.019539909401445e-06, "loss": 0.5498, "step": 4532 }, { "epoch": 0.676087848167344, "grad_norm": 0.8227382302284241, "learning_rate": 5.015350650918129e-06, "loss": 0.667, "step": 4533 }, { "epoch": 0.6762369961594392, "grad_norm": 1.1566224098205566, "learning_rate": 5.011162556133174e-06, "loss": 0.5586, "step": 4534 }, { "epoch": 0.6763861441515343, "grad_norm": 1.1143016815185547, "learning_rate": 5.006975626024304e-06, "loss": 0.5787, "step": 4535 }, { "epoch": 0.6765352921436295, "grad_norm": 0.8538509607315063, "learning_rate": 5.00278986156899e-06, "loss": 0.6355, "step": 4536 }, { "epoch": 0.6766844401357247, "grad_norm": 1.3284767866134644, "learning_rate": 4.998605263744426e-06, "loss": 0.631, "step": 4537 }, { "epoch": 0.6768335881278198, "grad_norm": 1.1405274868011475, "learning_rate": 4.994421833527536e-06, "loss": 0.5186, "step": 4538 }, { "epoch": 0.676982736119915, "grad_norm": 1.2483799457550049, "learning_rate": 4.99023957189496e-06, "loss": 0.6453, "step": 4539 }, { "epoch": 0.6771318841120102, "grad_norm": 1.0733426809310913, "learning_rate": 4.986058479823079e-06, "loss": 0.5454, "step": 4540 }, { "epoch": 0.6772810321041053, "grad_norm": 1.0303285121917725, "learning_rate": 4.9818785582880006e-06, "loss": 0.465, "step": 4541 }, { "epoch": 0.6774301800962005, "grad_norm": 1.2345937490463257, "learning_rate": 4.9776998082655405e-06, "loss": 0.6012, "step": 4542 }, { "epoch": 0.6775793280882956, "grad_norm": 1.2775410413742065, "learning_rate": 4.973522230731267e-06, "loss": 0.6314, "step": 4543 }, { "epoch": 0.6777284760803908, "grad_norm": 1.1454492807388306, "learning_rate": 4.969345826660462e-06, "loss": 0.6694, "step": 4544 }, { "epoch": 0.677877624072486, "grad_norm": 1.2050602436065674, "learning_rate": 4.9651705970281286e-06, "loss": 0.6505, "step": 4545 }, { "epoch": 0.6780267720645811, "grad_norm": 1.2925056219100952, "learning_rate": 4.960996542809001e-06, "loss": 0.5573, "step": 4546 }, { "epoch": 0.6781759200566763, "grad_norm": 1.145829439163208, "learning_rate": 4.956823664977545e-06, "loss": 0.5617, "step": 4547 }, { "epoch": 0.6783250680487714, "grad_norm": 1.1386644840240479, "learning_rate": 4.9526519645079376e-06, "loss": 0.5401, "step": 4548 }, { "epoch": 0.6784742160408666, "grad_norm": 1.154524326324463, "learning_rate": 4.948481442374093e-06, "loss": 0.5463, "step": 4549 }, { "epoch": 0.6786233640329618, "grad_norm": 1.2001270055770874, "learning_rate": 4.944312099549647e-06, "loss": 0.5736, "step": 4550 }, { "epoch": 0.6787725120250568, "grad_norm": 1.0664136409759521, "learning_rate": 4.940143937007957e-06, "loss": 0.5547, "step": 4551 }, { "epoch": 0.678921660017152, "grad_norm": 1.2962263822555542, "learning_rate": 4.935976955722109e-06, "loss": 0.585, "step": 4552 }, { "epoch": 0.6790708080092471, "grad_norm": 1.2267484664916992, "learning_rate": 4.931811156664916e-06, "loss": 0.5907, "step": 4553 }, { "epoch": 0.6792199560013423, "grad_norm": 1.2502740621566772, "learning_rate": 4.927646540808903e-06, "loss": 0.5961, "step": 4554 }, { "epoch": 0.6793691039934375, "grad_norm": 1.1407561302185059, "learning_rate": 4.923483109126328e-06, "loss": 0.5723, "step": 4555 }, { "epoch": 0.6795182519855326, "grad_norm": 0.8568358421325684, "learning_rate": 4.919320862589172e-06, "loss": 0.6338, "step": 4556 }, { "epoch": 0.6796673999776278, "grad_norm": 1.1560707092285156, "learning_rate": 4.915159802169143e-06, "loss": 0.6407, "step": 4557 }, { "epoch": 0.6798165479697229, "grad_norm": 1.1649090051651, "learning_rate": 4.910999928837656e-06, "loss": 0.6112, "step": 4558 }, { "epoch": 0.6799656959618181, "grad_norm": 1.1629127264022827, "learning_rate": 4.906841243565869e-06, "loss": 0.6151, "step": 4559 }, { "epoch": 0.6801148439539133, "grad_norm": 1.1497483253479004, "learning_rate": 4.90268374732465e-06, "loss": 0.6018, "step": 4560 }, { "epoch": 0.6802639919460084, "grad_norm": 1.203366994857788, "learning_rate": 4.898527441084595e-06, "loss": 0.6457, "step": 4561 }, { "epoch": 0.6804131399381036, "grad_norm": 1.2814174890518188, "learning_rate": 4.894372325816019e-06, "loss": 0.5872, "step": 4562 }, { "epoch": 0.6805622879301988, "grad_norm": 1.221393346786499, "learning_rate": 4.890218402488966e-06, "loss": 0.6135, "step": 4563 }, { "epoch": 0.6807114359222939, "grad_norm": 1.130811095237732, "learning_rate": 4.886065672073186e-06, "loss": 0.6356, "step": 4564 }, { "epoch": 0.6808605839143891, "grad_norm": 1.1818197965621948, "learning_rate": 4.881914135538166e-06, "loss": 0.678, "step": 4565 }, { "epoch": 0.6810097319064842, "grad_norm": 1.1366040706634521, "learning_rate": 4.877763793853112e-06, "loss": 0.613, "step": 4566 }, { "epoch": 0.6811588798985794, "grad_norm": 1.1133453845977783, "learning_rate": 4.8736146479869404e-06, "loss": 0.5551, "step": 4567 }, { "epoch": 0.6813080278906746, "grad_norm": 1.1842107772827148, "learning_rate": 4.869466698908297e-06, "loss": 0.5002, "step": 4568 }, { "epoch": 0.6814571758827697, "grad_norm": 1.2618929147720337, "learning_rate": 4.865319947585556e-06, "loss": 0.6458, "step": 4569 }, { "epoch": 0.6816063238748649, "grad_norm": 1.1279622316360474, "learning_rate": 4.861174394986795e-06, "loss": 0.5736, "step": 4570 }, { "epoch": 0.68175547186696, "grad_norm": 1.1558626890182495, "learning_rate": 4.857030042079821e-06, "loss": 0.5279, "step": 4571 }, { "epoch": 0.6819046198590551, "grad_norm": 1.1837997436523438, "learning_rate": 4.852886889832163e-06, "loss": 0.5476, "step": 4572 }, { "epoch": 0.6820537678511503, "grad_norm": 1.2098784446716309, "learning_rate": 4.848744939211062e-06, "loss": 0.5477, "step": 4573 }, { "epoch": 0.6822029158432454, "grad_norm": 1.1142791509628296, "learning_rate": 4.844604191183485e-06, "loss": 0.6369, "step": 4574 }, { "epoch": 0.6823520638353406, "grad_norm": 1.055177092552185, "learning_rate": 4.8404646467161184e-06, "loss": 0.4863, "step": 4575 }, { "epoch": 0.6825012118274357, "grad_norm": 1.3716024160385132, "learning_rate": 4.836326306775357e-06, "loss": 0.6725, "step": 4576 }, { "epoch": 0.6826503598195309, "grad_norm": 1.2545936107635498, "learning_rate": 4.832189172327333e-06, "loss": 0.5201, "step": 4577 }, { "epoch": 0.6827995078116261, "grad_norm": 1.175665259361267, "learning_rate": 4.8280532443378855e-06, "loss": 0.5939, "step": 4578 }, { "epoch": 0.6829486558037212, "grad_norm": 1.097838282585144, "learning_rate": 4.823918523772567e-06, "loss": 0.5702, "step": 4579 }, { "epoch": 0.6830978037958164, "grad_norm": 1.1019110679626465, "learning_rate": 4.819785011596659e-06, "loss": 0.5763, "step": 4580 }, { "epoch": 0.6832469517879115, "grad_norm": 1.1551662683486938, "learning_rate": 4.81565270877516e-06, "loss": 0.5602, "step": 4581 }, { "epoch": 0.6833960997800067, "grad_norm": 1.1552989482879639, "learning_rate": 4.811521616272774e-06, "loss": 0.6212, "step": 4582 }, { "epoch": 0.6835452477721019, "grad_norm": 1.1038957834243774, "learning_rate": 4.807391735053936e-06, "loss": 0.5037, "step": 4583 }, { "epoch": 0.683694395764197, "grad_norm": 1.1245678663253784, "learning_rate": 4.8032630660827914e-06, "loss": 0.632, "step": 4584 }, { "epoch": 0.6838435437562922, "grad_norm": 1.2146985530853271, "learning_rate": 4.799135610323207e-06, "loss": 0.6245, "step": 4585 }, { "epoch": 0.6839926917483873, "grad_norm": 1.1418646574020386, "learning_rate": 4.795009368738761e-06, "loss": 0.5904, "step": 4586 }, { "epoch": 0.6841418397404825, "grad_norm": 1.1824188232421875, "learning_rate": 4.790884342292758e-06, "loss": 0.6323, "step": 4587 }, { "epoch": 0.6842909877325777, "grad_norm": 1.1313115358352661, "learning_rate": 4.7867605319482014e-06, "loss": 0.5609, "step": 4588 }, { "epoch": 0.6844401357246728, "grad_norm": 1.138644814491272, "learning_rate": 4.782637938667825e-06, "loss": 0.5925, "step": 4589 }, { "epoch": 0.684589283716768, "grad_norm": 1.1801663637161255, "learning_rate": 4.778516563414078e-06, "loss": 0.6445, "step": 4590 }, { "epoch": 0.6847384317088632, "grad_norm": 1.0447837114334106, "learning_rate": 4.7743964071491224e-06, "loss": 0.4919, "step": 4591 }, { "epoch": 0.6848875797009583, "grad_norm": 1.066903829574585, "learning_rate": 4.770277470834829e-06, "loss": 0.4749, "step": 4592 }, { "epoch": 0.6850367276930535, "grad_norm": 1.1797895431518555, "learning_rate": 4.766159755432793e-06, "loss": 0.6057, "step": 4593 }, { "epoch": 0.6851858756851485, "grad_norm": 1.1916897296905518, "learning_rate": 4.762043261904321e-06, "loss": 0.6025, "step": 4594 }, { "epoch": 0.6853350236772437, "grad_norm": 1.2428162097930908, "learning_rate": 4.757927991210436e-06, "loss": 0.5584, "step": 4595 }, { "epoch": 0.685484171669339, "grad_norm": 1.2499257326126099, "learning_rate": 4.753813944311873e-06, "loss": 0.5436, "step": 4596 }, { "epoch": 0.685633319661434, "grad_norm": 1.1204156875610352, "learning_rate": 4.749701122169089e-06, "loss": 0.5887, "step": 4597 }, { "epoch": 0.6857824676535292, "grad_norm": 1.1277775764465332, "learning_rate": 4.745589525742238e-06, "loss": 0.5598, "step": 4598 }, { "epoch": 0.6859316156456243, "grad_norm": 1.1962370872497559, "learning_rate": 4.741479155991204e-06, "loss": 0.6191, "step": 4599 }, { "epoch": 0.6860807636377195, "grad_norm": 1.1131188869476318, "learning_rate": 4.737370013875583e-06, "loss": 0.7012, "step": 4600 }, { "epoch": 0.6862299116298147, "grad_norm": 1.1397022008895874, "learning_rate": 4.7332621003546716e-06, "loss": 0.6147, "step": 4601 }, { "epoch": 0.6863790596219098, "grad_norm": 1.2213411331176758, "learning_rate": 4.729155416387495e-06, "loss": 0.6174, "step": 4602 }, { "epoch": 0.686528207614005, "grad_norm": 1.1016181707382202, "learning_rate": 4.725049962932782e-06, "loss": 0.5772, "step": 4603 }, { "epoch": 0.6866773556061001, "grad_norm": 1.3054413795471191, "learning_rate": 4.720945740948979e-06, "loss": 0.6159, "step": 4604 }, { "epoch": 0.6868265035981953, "grad_norm": 1.1525484323501587, "learning_rate": 4.716842751394241e-06, "loss": 0.5941, "step": 4605 }, { "epoch": 0.6869756515902905, "grad_norm": 0.8614673614501953, "learning_rate": 4.7127409952264445e-06, "loss": 0.6516, "step": 4606 }, { "epoch": 0.6871247995823856, "grad_norm": 1.0677436590194702, "learning_rate": 4.70864047340316e-06, "loss": 0.5334, "step": 4607 }, { "epoch": 0.6872739475744808, "grad_norm": 1.1668440103530884, "learning_rate": 4.704541186881685e-06, "loss": 0.604, "step": 4608 }, { "epoch": 0.6874230955665759, "grad_norm": 1.227146863937378, "learning_rate": 4.700443136619024e-06, "loss": 0.6329, "step": 4609 }, { "epoch": 0.6875722435586711, "grad_norm": 1.2191230058670044, "learning_rate": 4.696346323571899e-06, "loss": 0.5607, "step": 4610 }, { "epoch": 0.6877213915507663, "grad_norm": 1.052797555923462, "learning_rate": 4.692250748696723e-06, "loss": 0.5228, "step": 4611 }, { "epoch": 0.6878705395428614, "grad_norm": 1.167108178138733, "learning_rate": 4.688156412949651e-06, "loss": 0.5946, "step": 4612 }, { "epoch": 0.6880196875349566, "grad_norm": 1.2659995555877686, "learning_rate": 4.684063317286521e-06, "loss": 0.6252, "step": 4613 }, { "epoch": 0.6881688355270518, "grad_norm": 1.1299726963043213, "learning_rate": 4.679971462662896e-06, "loss": 0.5294, "step": 4614 }, { "epoch": 0.6883179835191469, "grad_norm": 1.1680541038513184, "learning_rate": 4.675880850034045e-06, "loss": 0.5963, "step": 4615 }, { "epoch": 0.6884671315112421, "grad_norm": 1.2288559675216675, "learning_rate": 4.67179148035495e-06, "loss": 0.6503, "step": 4616 }, { "epoch": 0.6886162795033371, "grad_norm": 1.1791006326675415, "learning_rate": 4.667703354580297e-06, "loss": 0.7085, "step": 4617 }, { "epoch": 0.6887654274954323, "grad_norm": 1.1301515102386475, "learning_rate": 4.663616473664485e-06, "loss": 0.5989, "step": 4618 }, { "epoch": 0.6889145754875275, "grad_norm": 1.2242048978805542, "learning_rate": 4.659530838561629e-06, "loss": 0.6651, "step": 4619 }, { "epoch": 0.6890637234796226, "grad_norm": 1.198386549949646, "learning_rate": 4.6554464502255345e-06, "loss": 0.6273, "step": 4620 }, { "epoch": 0.6892128714717178, "grad_norm": 1.1123608350753784, "learning_rate": 4.65136330960974e-06, "loss": 0.5467, "step": 4621 }, { "epoch": 0.6893620194638129, "grad_norm": 1.0692660808563232, "learning_rate": 4.64728141766748e-06, "loss": 0.5874, "step": 4622 }, { "epoch": 0.6895111674559081, "grad_norm": 1.0916082859039307, "learning_rate": 4.6432007753516904e-06, "loss": 0.591, "step": 4623 }, { "epoch": 0.6896603154480033, "grad_norm": 1.141681432723999, "learning_rate": 4.6391213836150284e-06, "loss": 0.5949, "step": 4624 }, { "epoch": 0.6898094634400984, "grad_norm": 0.8955248594284058, "learning_rate": 4.635043243409857e-06, "loss": 0.6619, "step": 4625 }, { "epoch": 0.6899586114321936, "grad_norm": 0.8681362271308899, "learning_rate": 4.6309663556882365e-06, "loss": 0.6235, "step": 4626 }, { "epoch": 0.6901077594242887, "grad_norm": 1.1986907720565796, "learning_rate": 4.626890721401948e-06, "loss": 0.5869, "step": 4627 }, { "epoch": 0.6902569074163839, "grad_norm": 1.1627693176269531, "learning_rate": 4.622816341502475e-06, "loss": 0.6103, "step": 4628 }, { "epoch": 0.6904060554084791, "grad_norm": 1.233422040939331, "learning_rate": 4.618743216941e-06, "loss": 0.658, "step": 4629 }, { "epoch": 0.6905552034005742, "grad_norm": 1.114574670791626, "learning_rate": 4.614671348668429e-06, "loss": 0.613, "step": 4630 }, { "epoch": 0.6907043513926694, "grad_norm": 1.1606107950210571, "learning_rate": 4.610600737635367e-06, "loss": 0.5558, "step": 4631 }, { "epoch": 0.6908534993847645, "grad_norm": 1.192320466041565, "learning_rate": 4.606531384792114e-06, "loss": 0.5744, "step": 4632 }, { "epoch": 0.6910026473768597, "grad_norm": 1.1826887130737305, "learning_rate": 4.602463291088695e-06, "loss": 0.6135, "step": 4633 }, { "epoch": 0.6911517953689549, "grad_norm": 1.1771423816680908, "learning_rate": 4.5983964574748315e-06, "loss": 0.5781, "step": 4634 }, { "epoch": 0.69130094336105, "grad_norm": 1.1391550302505493, "learning_rate": 4.594330884899948e-06, "loss": 0.6035, "step": 4635 }, { "epoch": 0.6914500913531452, "grad_norm": 1.1792465448379517, "learning_rate": 4.59026657431318e-06, "loss": 0.5834, "step": 4636 }, { "epoch": 0.6915992393452404, "grad_norm": 1.1430134773254395, "learning_rate": 4.586203526663368e-06, "loss": 0.5573, "step": 4637 }, { "epoch": 0.6917483873373355, "grad_norm": 1.1240687370300293, "learning_rate": 4.582141742899056e-06, "loss": 0.6272, "step": 4638 }, { "epoch": 0.6918975353294307, "grad_norm": 1.1258562803268433, "learning_rate": 4.578081223968494e-06, "loss": 0.6581, "step": 4639 }, { "epoch": 0.6920466833215257, "grad_norm": 1.14785897731781, "learning_rate": 4.574021970819635e-06, "loss": 0.5885, "step": 4640 }, { "epoch": 0.692195831313621, "grad_norm": 1.2562083005905151, "learning_rate": 4.569963984400143e-06, "loss": 0.5682, "step": 4641 }, { "epoch": 0.6923449793057161, "grad_norm": 1.228699803352356, "learning_rate": 4.565907265657372e-06, "loss": 0.6585, "step": 4642 }, { "epoch": 0.6924941272978112, "grad_norm": 1.357157588005066, "learning_rate": 4.561851815538394e-06, "loss": 0.6482, "step": 4643 }, { "epoch": 0.6926432752899064, "grad_norm": 1.35426926612854, "learning_rate": 4.557797634989982e-06, "loss": 0.6167, "step": 4644 }, { "epoch": 0.6927924232820015, "grad_norm": 1.2337061166763306, "learning_rate": 4.553744724958605e-06, "loss": 0.6023, "step": 4645 }, { "epoch": 0.6929415712740967, "grad_norm": 1.3174333572387695, "learning_rate": 4.54969308639044e-06, "loss": 0.6121, "step": 4646 }, { "epoch": 0.6930907192661919, "grad_norm": 1.141324758529663, "learning_rate": 4.545642720231378e-06, "loss": 0.6851, "step": 4647 }, { "epoch": 0.693239867258287, "grad_norm": 1.1311397552490234, "learning_rate": 4.541593627426993e-06, "loss": 0.5153, "step": 4648 }, { "epoch": 0.6933890152503822, "grad_norm": 1.3252507448196411, "learning_rate": 4.537545808922577e-06, "loss": 0.6548, "step": 4649 }, { "epoch": 0.6935381632424773, "grad_norm": 1.1416232585906982, "learning_rate": 4.5334992656631184e-06, "loss": 0.6725, "step": 4650 }, { "epoch": 0.6936873112345725, "grad_norm": 1.2789664268493652, "learning_rate": 4.529453998593305e-06, "loss": 0.5582, "step": 4651 }, { "epoch": 0.6938364592266677, "grad_norm": 1.2729068994522095, "learning_rate": 4.525410008657534e-06, "loss": 0.6563, "step": 4652 }, { "epoch": 0.6939856072187628, "grad_norm": 1.3403857946395874, "learning_rate": 4.521367296799902e-06, "loss": 0.6199, "step": 4653 }, { "epoch": 0.694134755210858, "grad_norm": 1.200751543045044, "learning_rate": 4.517325863964201e-06, "loss": 0.6383, "step": 4654 }, { "epoch": 0.6942839032029531, "grad_norm": 1.1545851230621338, "learning_rate": 4.5132857110939275e-06, "loss": 0.6515, "step": 4655 }, { "epoch": 0.6944330511950483, "grad_norm": 0.8524675369262695, "learning_rate": 4.509246839132294e-06, "loss": 0.6019, "step": 4656 }, { "epoch": 0.6945821991871435, "grad_norm": 1.0721161365509033, "learning_rate": 4.5052092490221885e-06, "loss": 0.5209, "step": 4657 }, { "epoch": 0.6947313471792386, "grad_norm": 1.2225912809371948, "learning_rate": 4.501172941706218e-06, "loss": 0.6481, "step": 4658 }, { "epoch": 0.6948804951713338, "grad_norm": 1.2082359790802002, "learning_rate": 4.497137918126685e-06, "loss": 0.5904, "step": 4659 }, { "epoch": 0.695029643163429, "grad_norm": 1.1718449592590332, "learning_rate": 4.4931041792255855e-06, "loss": 0.5729, "step": 4660 }, { "epoch": 0.6951787911555241, "grad_norm": 1.0891951322555542, "learning_rate": 4.489071725944627e-06, "loss": 0.5872, "step": 4661 }, { "epoch": 0.6953279391476193, "grad_norm": 0.8308393359184265, "learning_rate": 4.485040559225211e-06, "loss": 0.6296, "step": 4662 }, { "epoch": 0.6954770871397143, "grad_norm": 1.138563632965088, "learning_rate": 4.48101068000844e-06, "loss": 0.583, "step": 4663 }, { "epoch": 0.6956262351318095, "grad_norm": 1.1007881164550781, "learning_rate": 4.476982089235109e-06, "loss": 0.5434, "step": 4664 }, { "epoch": 0.6957753831239047, "grad_norm": 0.822973370552063, "learning_rate": 4.472954787845729e-06, "loss": 0.6435, "step": 4665 }, { "epoch": 0.6959245311159998, "grad_norm": 1.1138176918029785, "learning_rate": 4.468928776780489e-06, "loss": 0.5508, "step": 4666 }, { "epoch": 0.696073679108095, "grad_norm": 1.245082974433899, "learning_rate": 4.464904056979293e-06, "loss": 0.6221, "step": 4667 }, { "epoch": 0.6962228271001901, "grad_norm": 1.2746226787567139, "learning_rate": 4.460880629381736e-06, "loss": 0.643, "step": 4668 }, { "epoch": 0.6963719750922853, "grad_norm": 1.0983999967575073, "learning_rate": 4.456858494927116e-06, "loss": 0.5834, "step": 4669 }, { "epoch": 0.6965211230843805, "grad_norm": 1.1729364395141602, "learning_rate": 4.452837654554419e-06, "loss": 0.5223, "step": 4670 }, { "epoch": 0.6966702710764756, "grad_norm": 1.0727912187576294, "learning_rate": 4.448818109202341e-06, "loss": 0.5587, "step": 4671 }, { "epoch": 0.6968194190685708, "grad_norm": 1.2402125597000122, "learning_rate": 4.444799859809274e-06, "loss": 0.6378, "step": 4672 }, { "epoch": 0.6969685670606659, "grad_norm": 1.252654790878296, "learning_rate": 4.440782907313291e-06, "loss": 0.5829, "step": 4673 }, { "epoch": 0.6971177150527611, "grad_norm": 1.1795783042907715, "learning_rate": 4.436767252652189e-06, "loss": 0.5248, "step": 4674 }, { "epoch": 0.6972668630448563, "grad_norm": 1.144845724105835, "learning_rate": 4.432752896763447e-06, "loss": 0.5793, "step": 4675 }, { "epoch": 0.6974160110369514, "grad_norm": 1.0710941553115845, "learning_rate": 4.428739840584235e-06, "loss": 0.61, "step": 4676 }, { "epoch": 0.6975651590290466, "grad_norm": 1.1360013484954834, "learning_rate": 4.42472808505143e-06, "loss": 0.5763, "step": 4677 }, { "epoch": 0.6977143070211417, "grad_norm": 1.1737796068191528, "learning_rate": 4.420717631101607e-06, "loss": 0.6346, "step": 4678 }, { "epoch": 0.6978634550132369, "grad_norm": 1.22300386428833, "learning_rate": 4.416708479671022e-06, "loss": 0.6412, "step": 4679 }, { "epoch": 0.6980126030053321, "grad_norm": 1.2667038440704346, "learning_rate": 4.412700631695645e-06, "loss": 0.632, "step": 4680 }, { "epoch": 0.6981617509974272, "grad_norm": 1.1010662317276, "learning_rate": 4.4086940881111294e-06, "loss": 0.6279, "step": 4681 }, { "epoch": 0.6983108989895224, "grad_norm": 1.1451748609542847, "learning_rate": 4.404688849852832e-06, "loss": 0.5761, "step": 4682 }, { "epoch": 0.6984600469816175, "grad_norm": 1.1765916347503662, "learning_rate": 4.4006849178558e-06, "loss": 0.5751, "step": 4683 }, { "epoch": 0.6986091949737127, "grad_norm": 1.2075896263122559, "learning_rate": 4.396682293054779e-06, "loss": 0.6084, "step": 4684 }, { "epoch": 0.6987583429658079, "grad_norm": 1.2131643295288086, "learning_rate": 4.392680976384204e-06, "loss": 0.6106, "step": 4685 }, { "epoch": 0.6989074909579029, "grad_norm": 1.1098800897598267, "learning_rate": 4.388680968778207e-06, "loss": 0.5923, "step": 4686 }, { "epoch": 0.6990566389499981, "grad_norm": 1.194676160812378, "learning_rate": 4.384682271170619e-06, "loss": 0.5998, "step": 4687 }, { "epoch": 0.6992057869420933, "grad_norm": 1.1228023767471313, "learning_rate": 4.380684884494965e-06, "loss": 0.5857, "step": 4688 }, { "epoch": 0.6993549349341884, "grad_norm": 1.1471285820007324, "learning_rate": 4.376688809684452e-06, "loss": 0.5747, "step": 4689 }, { "epoch": 0.6995040829262836, "grad_norm": 1.2427692413330078, "learning_rate": 4.3726940476719925e-06, "loss": 0.5901, "step": 4690 }, { "epoch": 0.6996532309183787, "grad_norm": 1.1488617658615112, "learning_rate": 4.3687005993901895e-06, "loss": 0.641, "step": 4691 }, { "epoch": 0.6998023789104739, "grad_norm": 1.2819359302520752, "learning_rate": 4.364708465771341e-06, "loss": 0.6194, "step": 4692 }, { "epoch": 0.6999515269025691, "grad_norm": 1.0942200422286987, "learning_rate": 4.360717647747434e-06, "loss": 0.5074, "step": 4693 }, { "epoch": 0.7001006748946642, "grad_norm": 1.2057924270629883, "learning_rate": 4.3567281462501555e-06, "loss": 0.5849, "step": 4694 }, { "epoch": 0.7002498228867594, "grad_norm": 1.131862998008728, "learning_rate": 4.352739962210872e-06, "loss": 0.5594, "step": 4695 }, { "epoch": 0.7003989708788545, "grad_norm": 1.1021589040756226, "learning_rate": 4.348753096560655e-06, "loss": 0.5806, "step": 4696 }, { "epoch": 0.7005481188709497, "grad_norm": 1.2582430839538574, "learning_rate": 4.344767550230268e-06, "loss": 0.629, "step": 4697 }, { "epoch": 0.7006972668630449, "grad_norm": 1.18870210647583, "learning_rate": 4.340783324150153e-06, "loss": 0.6593, "step": 4698 }, { "epoch": 0.70084641485514, "grad_norm": 1.1528798341751099, "learning_rate": 4.3368004192504554e-06, "loss": 0.5483, "step": 4699 }, { "epoch": 0.7009955628472352, "grad_norm": 1.19119131565094, "learning_rate": 4.332818836461019e-06, "loss": 0.5816, "step": 4700 }, { "epoch": 0.7011447108393303, "grad_norm": 1.1978033781051636, "learning_rate": 4.32883857671136e-06, "loss": 0.6057, "step": 4701 }, { "epoch": 0.7012938588314255, "grad_norm": 1.0719757080078125, "learning_rate": 4.3248596409306995e-06, "loss": 0.4696, "step": 4702 }, { "epoch": 0.7014430068235207, "grad_norm": 1.1595854759216309, "learning_rate": 4.3208820300479495e-06, "loss": 0.6088, "step": 4703 }, { "epoch": 0.7015921548156158, "grad_norm": 1.1879312992095947, "learning_rate": 4.316905744991699e-06, "loss": 0.5722, "step": 4704 }, { "epoch": 0.701741302807711, "grad_norm": 1.2254834175109863, "learning_rate": 4.312930786690244e-06, "loss": 0.6114, "step": 4705 }, { "epoch": 0.701890450799806, "grad_norm": 1.3610371351242065, "learning_rate": 4.308957156071565e-06, "loss": 0.583, "step": 4706 }, { "epoch": 0.7020395987919013, "grad_norm": 1.179653286933899, "learning_rate": 4.304984854063326e-06, "loss": 0.585, "step": 4707 }, { "epoch": 0.7021887467839965, "grad_norm": 1.2779004573822021, "learning_rate": 4.301013881592885e-06, "loss": 0.6492, "step": 4708 }, { "epoch": 0.7023378947760915, "grad_norm": 1.195906162261963, "learning_rate": 4.297044239587304e-06, "loss": 0.5431, "step": 4709 }, { "epoch": 0.7024870427681867, "grad_norm": 1.1321227550506592, "learning_rate": 4.293075928973308e-06, "loss": 0.5712, "step": 4710 }, { "epoch": 0.7026361907602819, "grad_norm": 1.0413086414337158, "learning_rate": 4.28910895067733e-06, "loss": 0.485, "step": 4711 }, { "epoch": 0.702785338752377, "grad_norm": 1.170344352722168, "learning_rate": 4.285143305625489e-06, "loss": 0.5873, "step": 4712 }, { "epoch": 0.7029344867444722, "grad_norm": 1.2702404260635376, "learning_rate": 4.281178994743584e-06, "loss": 0.6072, "step": 4713 }, { "epoch": 0.7030836347365673, "grad_norm": 1.2409613132476807, "learning_rate": 4.277216018957112e-06, "loss": 0.6736, "step": 4714 }, { "epoch": 0.7032327827286625, "grad_norm": 1.2229801416397095, "learning_rate": 4.273254379191255e-06, "loss": 0.5732, "step": 4715 }, { "epoch": 0.7033819307207577, "grad_norm": 0.8491092324256897, "learning_rate": 4.269294076370884e-06, "loss": 0.6464, "step": 4716 }, { "epoch": 0.7035310787128528, "grad_norm": 1.163631796836853, "learning_rate": 4.265335111420554e-06, "loss": 0.6222, "step": 4717 }, { "epoch": 0.703680226704948, "grad_norm": 1.1773173809051514, "learning_rate": 4.26137748526452e-06, "loss": 0.6196, "step": 4718 }, { "epoch": 0.7038293746970431, "grad_norm": 1.1594411134719849, "learning_rate": 4.257421198826703e-06, "loss": 0.6355, "step": 4719 }, { "epoch": 0.7039785226891383, "grad_norm": 1.2234302759170532, "learning_rate": 4.253466253030728e-06, "loss": 0.5999, "step": 4720 }, { "epoch": 0.7041276706812335, "grad_norm": 1.2658653259277344, "learning_rate": 4.249512648799904e-06, "loss": 0.642, "step": 4721 }, { "epoch": 0.7042768186733286, "grad_norm": 1.2345919609069824, "learning_rate": 4.245560387057228e-06, "loss": 0.618, "step": 4722 }, { "epoch": 0.7044259666654238, "grad_norm": 1.1144177913665771, "learning_rate": 4.241609468725374e-06, "loss": 0.5413, "step": 4723 }, { "epoch": 0.7045751146575189, "grad_norm": 1.166223168373108, "learning_rate": 4.2376598947267124e-06, "loss": 0.5384, "step": 4724 }, { "epoch": 0.7047242626496141, "grad_norm": 1.1023294925689697, "learning_rate": 4.233711665983297e-06, "loss": 0.5164, "step": 4725 }, { "epoch": 0.7048734106417093, "grad_norm": 1.3307528495788574, "learning_rate": 4.229764783416867e-06, "loss": 0.6108, "step": 4726 }, { "epoch": 0.7050225586338044, "grad_norm": 1.3126835823059082, "learning_rate": 4.225819247948846e-06, "loss": 0.6081, "step": 4727 }, { "epoch": 0.7051717066258996, "grad_norm": 1.167709231376648, "learning_rate": 4.22187506050035e-06, "loss": 0.5509, "step": 4728 }, { "epoch": 0.7053208546179947, "grad_norm": 1.0522016286849976, "learning_rate": 4.2179322219921684e-06, "loss": 0.5378, "step": 4729 }, { "epoch": 0.7054700026100899, "grad_norm": 1.1184958219528198, "learning_rate": 4.213990733344783e-06, "loss": 0.5787, "step": 4730 }, { "epoch": 0.705619150602185, "grad_norm": 1.033262014389038, "learning_rate": 4.210050595478365e-06, "loss": 0.5677, "step": 4731 }, { "epoch": 0.7057682985942801, "grad_norm": 1.196907877922058, "learning_rate": 4.206111809312757e-06, "loss": 0.5391, "step": 4732 }, { "epoch": 0.7059174465863753, "grad_norm": 1.206258773803711, "learning_rate": 4.202174375767498e-06, "loss": 0.5626, "step": 4733 }, { "epoch": 0.7060665945784705, "grad_norm": 1.2271226644515991, "learning_rate": 4.198238295761807e-06, "loss": 0.6945, "step": 4734 }, { "epoch": 0.7062157425705656, "grad_norm": 1.2013053894042969, "learning_rate": 4.194303570214586e-06, "loss": 0.5889, "step": 4735 }, { "epoch": 0.7063648905626608, "grad_norm": 1.3024832010269165, "learning_rate": 4.1903702000444235e-06, "loss": 0.7537, "step": 4736 }, { "epoch": 0.7065140385547559, "grad_norm": 1.1314244270324707, "learning_rate": 4.1864381861695934e-06, "loss": 0.5599, "step": 4737 }, { "epoch": 0.7066631865468511, "grad_norm": 1.1418601274490356, "learning_rate": 4.182507529508042e-06, "loss": 0.5319, "step": 4738 }, { "epoch": 0.7068123345389463, "grad_norm": 1.0825227499008179, "learning_rate": 4.178578230977409e-06, "loss": 0.5687, "step": 4739 }, { "epoch": 0.7069614825310414, "grad_norm": 1.0722463130950928, "learning_rate": 4.174650291495015e-06, "loss": 0.536, "step": 4740 }, { "epoch": 0.7071106305231366, "grad_norm": 0.8079906702041626, "learning_rate": 4.170723711977867e-06, "loss": 0.626, "step": 4741 }, { "epoch": 0.7072597785152317, "grad_norm": 1.2130184173583984, "learning_rate": 4.166798493342642e-06, "loss": 0.6636, "step": 4742 }, { "epoch": 0.7074089265073269, "grad_norm": 1.1149083375930786, "learning_rate": 4.162874636505713e-06, "loss": 0.5481, "step": 4743 }, { "epoch": 0.7075580744994221, "grad_norm": 1.211108684539795, "learning_rate": 4.1589521423831254e-06, "loss": 0.6181, "step": 4744 }, { "epoch": 0.7077072224915172, "grad_norm": 1.0970863103866577, "learning_rate": 4.1550310118906145e-06, "loss": 0.5535, "step": 4745 }, { "epoch": 0.7078563704836124, "grad_norm": 1.1549712419509888, "learning_rate": 4.151111245943592e-06, "loss": 0.5869, "step": 4746 }, { "epoch": 0.7080055184757075, "grad_norm": 1.1141456365585327, "learning_rate": 4.1471928454571565e-06, "loss": 0.5347, "step": 4747 }, { "epoch": 0.7081546664678027, "grad_norm": 1.2203755378723145, "learning_rate": 4.143275811346076e-06, "loss": 0.5848, "step": 4748 }, { "epoch": 0.7083038144598979, "grad_norm": 1.15027916431427, "learning_rate": 4.13936014452481e-06, "loss": 0.5571, "step": 4749 }, { "epoch": 0.708452962451993, "grad_norm": 1.0707913637161255, "learning_rate": 4.1354458459075005e-06, "loss": 0.6009, "step": 4750 }, { "epoch": 0.7086021104440882, "grad_norm": 1.1279473304748535, "learning_rate": 4.131532916407955e-06, "loss": 0.5686, "step": 4751 }, { "epoch": 0.7087512584361833, "grad_norm": 1.203014612197876, "learning_rate": 4.127621356939683e-06, "loss": 0.6337, "step": 4752 }, { "epoch": 0.7089004064282785, "grad_norm": 0.8470929861068726, "learning_rate": 4.1237111684158625e-06, "loss": 0.6523, "step": 4753 }, { "epoch": 0.7090495544203737, "grad_norm": 1.1211565732955933, "learning_rate": 4.119802351749346e-06, "loss": 0.5724, "step": 4754 }, { "epoch": 0.7091987024124687, "grad_norm": 1.0658836364746094, "learning_rate": 4.1158949078526734e-06, "loss": 0.5633, "step": 4755 }, { "epoch": 0.7093478504045639, "grad_norm": 1.2818477153778076, "learning_rate": 4.111988837638067e-06, "loss": 0.6798, "step": 4756 }, { "epoch": 0.7094969983966591, "grad_norm": 1.208962321281433, "learning_rate": 4.1080841420174175e-06, "loss": 0.6423, "step": 4757 }, { "epoch": 0.7096461463887542, "grad_norm": 1.074265718460083, "learning_rate": 4.104180821902305e-06, "loss": 0.4852, "step": 4758 }, { "epoch": 0.7097952943808494, "grad_norm": 1.2138890027999878, "learning_rate": 4.100278878203986e-06, "loss": 0.6141, "step": 4759 }, { "epoch": 0.7099444423729445, "grad_norm": 1.13804030418396, "learning_rate": 4.096378311833386e-06, "loss": 0.5699, "step": 4760 }, { "epoch": 0.7100935903650397, "grad_norm": 1.2103214263916016, "learning_rate": 4.092479123701126e-06, "loss": 0.6192, "step": 4761 }, { "epoch": 0.7102427383571349, "grad_norm": 1.0567705631256104, "learning_rate": 4.088581314717498e-06, "loss": 0.5727, "step": 4762 }, { "epoch": 0.71039188634923, "grad_norm": 1.2135839462280273, "learning_rate": 4.084684885792462e-06, "loss": 0.6152, "step": 4763 }, { "epoch": 0.7105410343413252, "grad_norm": 1.1450544595718384, "learning_rate": 4.08078983783567e-06, "loss": 0.5405, "step": 4764 }, { "epoch": 0.7106901823334203, "grad_norm": 1.2284793853759766, "learning_rate": 4.076896171756444e-06, "loss": 0.5517, "step": 4765 }, { "epoch": 0.7108393303255155, "grad_norm": 1.1393284797668457, "learning_rate": 4.073003888463789e-06, "loss": 0.5397, "step": 4766 }, { "epoch": 0.7109884783176107, "grad_norm": 1.135493278503418, "learning_rate": 4.069112988866377e-06, "loss": 0.5686, "step": 4767 }, { "epoch": 0.7111376263097058, "grad_norm": 1.109712839126587, "learning_rate": 4.065223473872567e-06, "loss": 0.5189, "step": 4768 }, { "epoch": 0.711286774301801, "grad_norm": 1.1969093084335327, "learning_rate": 4.061335344390391e-06, "loss": 0.6676, "step": 4769 }, { "epoch": 0.7114359222938961, "grad_norm": 1.2108217477798462, "learning_rate": 4.0574486013275586e-06, "loss": 0.6486, "step": 4770 }, { "epoch": 0.7115850702859913, "grad_norm": 1.3037625551223755, "learning_rate": 4.053563245591452e-06, "loss": 0.6424, "step": 4771 }, { "epoch": 0.7117342182780865, "grad_norm": 1.277572512626648, "learning_rate": 4.049679278089139e-06, "loss": 0.7092, "step": 4772 }, { "epoch": 0.7118833662701816, "grad_norm": 1.1282145977020264, "learning_rate": 4.045796699727349e-06, "loss": 0.5542, "step": 4773 }, { "epoch": 0.7120325142622768, "grad_norm": 1.1828968524932861, "learning_rate": 4.0419155114124985e-06, "loss": 0.6341, "step": 4774 }, { "epoch": 0.7121816622543719, "grad_norm": 1.1949057579040527, "learning_rate": 4.038035714050678e-06, "loss": 0.5721, "step": 4775 }, { "epoch": 0.712330810246467, "grad_norm": 1.0994548797607422, "learning_rate": 4.034157308547645e-06, "loss": 0.5824, "step": 4776 }, { "epoch": 0.7124799582385622, "grad_norm": 1.188994288444519, "learning_rate": 4.030280295808838e-06, "loss": 0.6415, "step": 4777 }, { "epoch": 0.7126291062306573, "grad_norm": 1.120288610458374, "learning_rate": 4.0264046767393815e-06, "loss": 0.5865, "step": 4778 }, { "epoch": 0.7127782542227525, "grad_norm": 1.2023857831954956, "learning_rate": 4.022530452244052e-06, "loss": 0.541, "step": 4779 }, { "epoch": 0.7129274022148476, "grad_norm": 1.1326724290847778, "learning_rate": 4.018657623227317e-06, "loss": 0.6217, "step": 4780 }, { "epoch": 0.7130765502069428, "grad_norm": 1.161019206047058, "learning_rate": 4.0147861905933146e-06, "loss": 0.5767, "step": 4781 }, { "epoch": 0.713225698199038, "grad_norm": 1.189510703086853, "learning_rate": 4.010916155245851e-06, "loss": 0.6436, "step": 4782 }, { "epoch": 0.7133748461911331, "grad_norm": 1.0685893297195435, "learning_rate": 4.007047518088413e-06, "loss": 0.5715, "step": 4783 }, { "epoch": 0.7135239941832283, "grad_norm": 1.1123192310333252, "learning_rate": 4.003180280024163e-06, "loss": 0.5448, "step": 4784 }, { "epoch": 0.7136731421753235, "grad_norm": 1.1409542560577393, "learning_rate": 3.9993144419559234e-06, "loss": 0.5911, "step": 4785 }, { "epoch": 0.7138222901674186, "grad_norm": 1.276262879371643, "learning_rate": 3.995450004786201e-06, "loss": 0.6499, "step": 4786 }, { "epoch": 0.7139714381595138, "grad_norm": 1.220332145690918, "learning_rate": 3.991586969417184e-06, "loss": 0.5438, "step": 4787 }, { "epoch": 0.7141205861516089, "grad_norm": 1.093166708946228, "learning_rate": 3.9877253367507104e-06, "loss": 0.5909, "step": 4788 }, { "epoch": 0.7142697341437041, "grad_norm": 1.1779210567474365, "learning_rate": 3.98386510768831e-06, "loss": 0.5711, "step": 4789 }, { "epoch": 0.7144188821357993, "grad_norm": 1.1091822385787964, "learning_rate": 3.980006283131178e-06, "loss": 0.5177, "step": 4790 }, { "epoch": 0.7145680301278944, "grad_norm": 1.1613103151321411, "learning_rate": 3.976148863980176e-06, "loss": 0.6619, "step": 4791 }, { "epoch": 0.7147171781199896, "grad_norm": 1.1271023750305176, "learning_rate": 3.972292851135847e-06, "loss": 0.5541, "step": 4792 }, { "epoch": 0.7148663261120847, "grad_norm": 1.05609929561615, "learning_rate": 3.9684382454984015e-06, "loss": 0.5655, "step": 4793 }, { "epoch": 0.7150154741041799, "grad_norm": 1.1958469152450562, "learning_rate": 3.9645850479677264e-06, "loss": 0.6567, "step": 4794 }, { "epoch": 0.7151646220962751, "grad_norm": 1.137203574180603, "learning_rate": 3.960733259443365e-06, "loss": 0.5202, "step": 4795 }, { "epoch": 0.7153137700883702, "grad_norm": 1.1319998502731323, "learning_rate": 3.956882880824553e-06, "loss": 0.5657, "step": 4796 }, { "epoch": 0.7154629180804654, "grad_norm": 1.162359595298767, "learning_rate": 3.953033913010179e-06, "loss": 0.5554, "step": 4797 }, { "epoch": 0.7156120660725604, "grad_norm": 1.276747465133667, "learning_rate": 3.949186356898811e-06, "loss": 0.6293, "step": 4798 }, { "epoch": 0.7157612140646556, "grad_norm": 1.2002699375152588, "learning_rate": 3.945340213388687e-06, "loss": 0.5592, "step": 4799 }, { "epoch": 0.7159103620567508, "grad_norm": 1.268013834953308, "learning_rate": 3.941495483377714e-06, "loss": 0.6241, "step": 4800 }, { "epoch": 0.7160595100488459, "grad_norm": 0.8430142998695374, "learning_rate": 3.937652167763466e-06, "loss": 0.6056, "step": 4801 }, { "epoch": 0.7162086580409411, "grad_norm": 0.8419899940490723, "learning_rate": 3.933810267443191e-06, "loss": 0.6331, "step": 4802 }, { "epoch": 0.7163578060330362, "grad_norm": 1.2022889852523804, "learning_rate": 3.9299697833138094e-06, "loss": 0.5347, "step": 4803 }, { "epoch": 0.7165069540251314, "grad_norm": 1.1645019054412842, "learning_rate": 3.926130716271896e-06, "loss": 0.5952, "step": 4804 }, { "epoch": 0.7166561020172266, "grad_norm": 1.1679356098175049, "learning_rate": 3.9222930672137175e-06, "loss": 0.5488, "step": 4805 }, { "epoch": 0.7168052500093217, "grad_norm": 1.1810270547866821, "learning_rate": 3.918456837035195e-06, "loss": 0.6291, "step": 4806 }, { "epoch": 0.7169543980014169, "grad_norm": 1.1354161500930786, "learning_rate": 3.914622026631916e-06, "loss": 0.6159, "step": 4807 }, { "epoch": 0.7171035459935121, "grad_norm": 1.1857877969741821, "learning_rate": 3.910788636899143e-06, "loss": 0.635, "step": 4808 }, { "epoch": 0.7172526939856072, "grad_norm": 1.1537978649139404, "learning_rate": 3.906956668731813e-06, "loss": 0.5979, "step": 4809 }, { "epoch": 0.7174018419777024, "grad_norm": 1.2197222709655762, "learning_rate": 3.903126123024512e-06, "loss": 0.5669, "step": 4810 }, { "epoch": 0.7175509899697975, "grad_norm": 1.1534051895141602, "learning_rate": 3.899297000671511e-06, "loss": 0.6112, "step": 4811 }, { "epoch": 0.7177001379618927, "grad_norm": 1.0700410604476929, "learning_rate": 3.895469302566745e-06, "loss": 0.5547, "step": 4812 }, { "epoch": 0.7178492859539879, "grad_norm": 1.183259129524231, "learning_rate": 3.891643029603811e-06, "loss": 0.6166, "step": 4813 }, { "epoch": 0.717998433946083, "grad_norm": 1.2083783149719238, "learning_rate": 3.88781818267598e-06, "loss": 0.6418, "step": 4814 }, { "epoch": 0.7181475819381782, "grad_norm": 1.2410105466842651, "learning_rate": 3.883994762676189e-06, "loss": 0.6239, "step": 4815 }, { "epoch": 0.7182967299302733, "grad_norm": 0.8127601146697998, "learning_rate": 3.880172770497033e-06, "loss": 0.6433, "step": 4816 }, { "epoch": 0.7184458779223685, "grad_norm": 1.092099666595459, "learning_rate": 3.8763522070307835e-06, "loss": 0.57, "step": 4817 }, { "epoch": 0.7185950259144637, "grad_norm": 1.1565940380096436, "learning_rate": 3.872533073169377e-06, "loss": 0.6126, "step": 4818 }, { "epoch": 0.7187441739065588, "grad_norm": 1.2387839555740356, "learning_rate": 3.868715369804418e-06, "loss": 0.6403, "step": 4819 }, { "epoch": 0.718893321898654, "grad_norm": 1.1682031154632568, "learning_rate": 3.8648990978271646e-06, "loss": 0.5661, "step": 4820 }, { "epoch": 0.719042469890749, "grad_norm": 0.852453887462616, "learning_rate": 3.861084258128558e-06, "loss": 0.6197, "step": 4821 }, { "epoch": 0.7191916178828442, "grad_norm": 1.0887677669525146, "learning_rate": 3.857270851599193e-06, "loss": 0.5509, "step": 4822 }, { "epoch": 0.7193407658749394, "grad_norm": 1.1575602293014526, "learning_rate": 3.853458879129335e-06, "loss": 0.5794, "step": 4823 }, { "epoch": 0.7194899138670345, "grad_norm": 1.189289927482605, "learning_rate": 3.849648341608914e-06, "loss": 0.5647, "step": 4824 }, { "epoch": 0.7196390618591297, "grad_norm": 1.1600843667984009, "learning_rate": 3.845839239927527e-06, "loss": 0.6937, "step": 4825 }, { "epoch": 0.7197882098512248, "grad_norm": 1.2276676893234253, "learning_rate": 3.842031574974426e-06, "loss": 0.6213, "step": 4826 }, { "epoch": 0.71993735784332, "grad_norm": 1.1735488176345825, "learning_rate": 3.83822534763854e-06, "loss": 0.5757, "step": 4827 }, { "epoch": 0.7200865058354152, "grad_norm": 1.2753063440322876, "learning_rate": 3.834420558808459e-06, "loss": 0.5643, "step": 4828 }, { "epoch": 0.7202356538275103, "grad_norm": 1.079551100730896, "learning_rate": 3.830617209372429e-06, "loss": 0.5338, "step": 4829 }, { "epoch": 0.7203848018196055, "grad_norm": 1.1704779863357544, "learning_rate": 3.826815300218367e-06, "loss": 0.5963, "step": 4830 }, { "epoch": 0.7205339498117007, "grad_norm": 1.1802184581756592, "learning_rate": 3.8230148322338625e-06, "loss": 0.509, "step": 4831 }, { "epoch": 0.7206830978037958, "grad_norm": 1.0979735851287842, "learning_rate": 3.819215806306148e-06, "loss": 0.5552, "step": 4832 }, { "epoch": 0.720832245795891, "grad_norm": 1.1489280462265015, "learning_rate": 3.815418223322136e-06, "loss": 0.6075, "step": 4833 }, { "epoch": 0.7209813937879861, "grad_norm": 1.2211694717407227, "learning_rate": 3.811622084168399e-06, "loss": 0.5332, "step": 4834 }, { "epoch": 0.7211305417800813, "grad_norm": 1.0974595546722412, "learning_rate": 3.8078273897311626e-06, "loss": 0.5655, "step": 4835 }, { "epoch": 0.7212796897721765, "grad_norm": 1.248794436454773, "learning_rate": 3.8040341408963265e-06, "loss": 0.6162, "step": 4836 }, { "epoch": 0.7214288377642716, "grad_norm": 1.1325066089630127, "learning_rate": 3.8002423385494534e-06, "loss": 0.5426, "step": 4837 }, { "epoch": 0.7215779857563668, "grad_norm": 1.177669644355774, "learning_rate": 3.7964519835757554e-06, "loss": 0.5507, "step": 4838 }, { "epoch": 0.7217271337484619, "grad_norm": 1.128082036972046, "learning_rate": 3.792663076860116e-06, "loss": 0.5879, "step": 4839 }, { "epoch": 0.7218762817405571, "grad_norm": 1.231388807296753, "learning_rate": 3.788875619287089e-06, "loss": 0.6698, "step": 4840 }, { "epoch": 0.7220254297326523, "grad_norm": 1.1650872230529785, "learning_rate": 3.785089611740872e-06, "loss": 0.6162, "step": 4841 }, { "epoch": 0.7221745777247474, "grad_norm": 1.1355066299438477, "learning_rate": 3.7813050551053344e-06, "loss": 0.6075, "step": 4842 }, { "epoch": 0.7223237257168426, "grad_norm": 1.299243450164795, "learning_rate": 3.7775219502640105e-06, "loss": 0.6536, "step": 4843 }, { "epoch": 0.7224728737089376, "grad_norm": 1.1629501581192017, "learning_rate": 3.7737402981000827e-06, "loss": 0.5288, "step": 4844 }, { "epoch": 0.7226220217010328, "grad_norm": 1.1838665008544922, "learning_rate": 3.7699600994964046e-06, "loss": 0.5636, "step": 4845 }, { "epoch": 0.722771169693128, "grad_norm": 1.222150206565857, "learning_rate": 3.766181355335489e-06, "loss": 0.6293, "step": 4846 }, { "epoch": 0.7229203176852231, "grad_norm": 1.221805214881897, "learning_rate": 3.7624040664995075e-06, "loss": 0.5975, "step": 4847 }, { "epoch": 0.7230694656773183, "grad_norm": 1.2272223234176636, "learning_rate": 3.7586282338702918e-06, "loss": 0.5998, "step": 4848 }, { "epoch": 0.7232186136694134, "grad_norm": 1.124964952468872, "learning_rate": 3.754853858329336e-06, "loss": 0.581, "step": 4849 }, { "epoch": 0.7233677616615086, "grad_norm": 1.0305650234222412, "learning_rate": 3.7510809407577932e-06, "loss": 0.5044, "step": 4850 }, { "epoch": 0.7235169096536038, "grad_norm": 0.8230129480361938, "learning_rate": 3.7473094820364707e-06, "loss": 0.6338, "step": 4851 }, { "epoch": 0.7236660576456989, "grad_norm": 1.1496211290359497, "learning_rate": 3.7435394830458414e-06, "loss": 0.5624, "step": 4852 }, { "epoch": 0.7238152056377941, "grad_norm": 1.1172676086425781, "learning_rate": 3.73977094466604e-06, "loss": 0.4934, "step": 4853 }, { "epoch": 0.7239643536298893, "grad_norm": 1.124045968055725, "learning_rate": 3.7360038677768495e-06, "loss": 0.5898, "step": 4854 }, { "epoch": 0.7241135016219844, "grad_norm": 0.8316000699996948, "learning_rate": 3.7322382532577206e-06, "loss": 0.6276, "step": 4855 }, { "epoch": 0.7242626496140796, "grad_norm": 0.8156747221946716, "learning_rate": 3.72847410198776e-06, "loss": 0.6334, "step": 4856 }, { "epoch": 0.7244117976061747, "grad_norm": 1.167657732963562, "learning_rate": 3.7247114148457342e-06, "loss": 0.6148, "step": 4857 }, { "epoch": 0.7245609455982699, "grad_norm": 1.2166526317596436, "learning_rate": 3.7209501927100666e-06, "loss": 0.594, "step": 4858 }, { "epoch": 0.7247100935903651, "grad_norm": 1.2711364030838013, "learning_rate": 3.7171904364588405e-06, "loss": 0.6444, "step": 4859 }, { "epoch": 0.7248592415824602, "grad_norm": 1.148309588432312, "learning_rate": 3.7134321469697886e-06, "loss": 0.594, "step": 4860 }, { "epoch": 0.7250083895745554, "grad_norm": 1.0696635246276855, "learning_rate": 3.7096753251203134e-06, "loss": 0.5554, "step": 4861 }, { "epoch": 0.7251575375666505, "grad_norm": 1.1627804040908813, "learning_rate": 3.7059199717874693e-06, "loss": 0.6006, "step": 4862 }, { "epoch": 0.7253066855587457, "grad_norm": 1.1820751428604126, "learning_rate": 3.7021660878479628e-06, "loss": 0.591, "step": 4863 }, { "epoch": 0.7254558335508409, "grad_norm": 1.2837074995040894, "learning_rate": 3.698413674178165e-06, "loss": 0.6413, "step": 4864 }, { "epoch": 0.725604981542936, "grad_norm": 1.1815223693847656, "learning_rate": 3.6946627316541017e-06, "loss": 0.6157, "step": 4865 }, { "epoch": 0.7257541295350312, "grad_norm": 1.2650132179260254, "learning_rate": 3.690913261151453e-06, "loss": 0.6449, "step": 4866 }, { "epoch": 0.7259032775271262, "grad_norm": 1.1081922054290771, "learning_rate": 3.6871652635455577e-06, "loss": 0.5502, "step": 4867 }, { "epoch": 0.7260524255192214, "grad_norm": 1.1109639406204224, "learning_rate": 3.683418739711413e-06, "loss": 0.5337, "step": 4868 }, { "epoch": 0.7262015735113166, "grad_norm": 1.1174677610397339, "learning_rate": 3.6796736905236618e-06, "loss": 0.5289, "step": 4869 }, { "epoch": 0.7263507215034117, "grad_norm": 1.2143580913543701, "learning_rate": 3.6759301168566152e-06, "loss": 0.6551, "step": 4870 }, { "epoch": 0.7264998694955069, "grad_norm": 1.2404273748397827, "learning_rate": 3.6721880195842317e-06, "loss": 0.5711, "step": 4871 }, { "epoch": 0.726649017487602, "grad_norm": 1.2975833415985107, "learning_rate": 3.668447399580133e-06, "loss": 0.5527, "step": 4872 }, { "epoch": 0.7267981654796972, "grad_norm": 1.1079386472702026, "learning_rate": 3.664708257717583e-06, "loss": 0.6625, "step": 4873 }, { "epoch": 0.7269473134717924, "grad_norm": 1.1482073068618774, "learning_rate": 3.660970594869513e-06, "loss": 0.556, "step": 4874 }, { "epoch": 0.7270964614638875, "grad_norm": 1.3499141931533813, "learning_rate": 3.6572344119085033e-06, "loss": 0.6255, "step": 4875 }, { "epoch": 0.7272456094559827, "grad_norm": 1.2361699342727661, "learning_rate": 3.6534997097067913e-06, "loss": 0.5894, "step": 4876 }, { "epoch": 0.7273947574480778, "grad_norm": 1.1751947402954102, "learning_rate": 3.649766489136265e-06, "loss": 0.6032, "step": 4877 }, { "epoch": 0.727543905440173, "grad_norm": 1.2580987215042114, "learning_rate": 3.6460347510684736e-06, "loss": 0.6332, "step": 4878 }, { "epoch": 0.7276930534322682, "grad_norm": 1.2911988496780396, "learning_rate": 3.642304496374608e-06, "loss": 0.6136, "step": 4879 }, { "epoch": 0.7278422014243633, "grad_norm": 1.0997370481491089, "learning_rate": 3.638575725925523e-06, "loss": 0.5897, "step": 4880 }, { "epoch": 0.7279913494164585, "grad_norm": 1.2638368606567383, "learning_rate": 3.634848440591728e-06, "loss": 0.5584, "step": 4881 }, { "epoch": 0.7281404974085537, "grad_norm": 1.206134557723999, "learning_rate": 3.631122641243372e-06, "loss": 0.5732, "step": 4882 }, { "epoch": 0.7282896454006488, "grad_norm": 1.1432430744171143, "learning_rate": 3.6273983287502756e-06, "loss": 0.5466, "step": 4883 }, { "epoch": 0.728438793392744, "grad_norm": 1.0863145589828491, "learning_rate": 3.623675503981905e-06, "loss": 0.566, "step": 4884 }, { "epoch": 0.7285879413848391, "grad_norm": 1.2344690561294556, "learning_rate": 3.619954167807369e-06, "loss": 0.5941, "step": 4885 }, { "epoch": 0.7287370893769343, "grad_norm": 1.148627758026123, "learning_rate": 3.616234321095441e-06, "loss": 0.5728, "step": 4886 }, { "epoch": 0.7288862373690295, "grad_norm": 1.1670286655426025, "learning_rate": 3.612515964714548e-06, "loss": 0.5493, "step": 4887 }, { "epoch": 0.7290353853611246, "grad_norm": 1.1125764846801758, "learning_rate": 3.608799099532757e-06, "loss": 0.544, "step": 4888 }, { "epoch": 0.7291845333532198, "grad_norm": 1.1407939195632935, "learning_rate": 3.6050837264177952e-06, "loss": 0.6018, "step": 4889 }, { "epoch": 0.7293336813453148, "grad_norm": 1.3980306386947632, "learning_rate": 3.6013698462370426e-06, "loss": 0.6489, "step": 4890 }, { "epoch": 0.72948282933741, "grad_norm": 1.2288792133331299, "learning_rate": 3.5976574598575288e-06, "loss": 0.6856, "step": 4891 }, { "epoch": 0.7296319773295052, "grad_norm": 1.1118234395980835, "learning_rate": 3.593946568145932e-06, "loss": 0.5845, "step": 4892 }, { "epoch": 0.7297811253216003, "grad_norm": 1.1915596723556519, "learning_rate": 3.590237171968588e-06, "loss": 0.6023, "step": 4893 }, { "epoch": 0.7299302733136955, "grad_norm": 1.068679928779602, "learning_rate": 3.5865292721914724e-06, "loss": 0.5651, "step": 4894 }, { "epoch": 0.7300794213057906, "grad_norm": 1.1185020208358765, "learning_rate": 3.5828228696802226e-06, "loss": 0.5203, "step": 4895 }, { "epoch": 0.7302285692978858, "grad_norm": 1.1301345825195312, "learning_rate": 3.5791179653001195e-06, "loss": 0.5753, "step": 4896 }, { "epoch": 0.730377717289981, "grad_norm": 1.1539329290390015, "learning_rate": 3.5754145599161026e-06, "loss": 0.5749, "step": 4897 }, { "epoch": 0.7305268652820761, "grad_norm": 1.172937273979187, "learning_rate": 3.5717126543927484e-06, "loss": 0.5508, "step": 4898 }, { "epoch": 0.7306760132741713, "grad_norm": 1.077087640762329, "learning_rate": 3.5680122495942925e-06, "loss": 0.5179, "step": 4899 }, { "epoch": 0.7308251612662664, "grad_norm": 1.2563122510910034, "learning_rate": 3.5643133463846193e-06, "loss": 0.6768, "step": 4900 }, { "epoch": 0.7309743092583616, "grad_norm": 1.1861274242401123, "learning_rate": 3.5606159456272613e-06, "loss": 0.6049, "step": 4901 }, { "epoch": 0.7311234572504568, "grad_norm": 1.1913914680480957, "learning_rate": 3.5569200481854003e-06, "loss": 0.6411, "step": 4902 }, { "epoch": 0.7312726052425519, "grad_norm": 1.177161455154419, "learning_rate": 3.5532256549218715e-06, "loss": 0.6051, "step": 4903 }, { "epoch": 0.7314217532346471, "grad_norm": 1.096659779548645, "learning_rate": 3.549532766699146e-06, "loss": 0.5446, "step": 4904 }, { "epoch": 0.7315709012267423, "grad_norm": 1.0707014799118042, "learning_rate": 3.5458413843793583e-06, "loss": 0.594, "step": 4905 }, { "epoch": 0.7317200492188374, "grad_norm": 1.1189191341400146, "learning_rate": 3.5421515088242855e-06, "loss": 0.4988, "step": 4906 }, { "epoch": 0.7318691972109326, "grad_norm": 1.1265244483947754, "learning_rate": 3.5384631408953483e-06, "loss": 0.603, "step": 4907 }, { "epoch": 0.7320183452030277, "grad_norm": 1.1948596239089966, "learning_rate": 3.5347762814536224e-06, "loss": 0.5222, "step": 4908 }, { "epoch": 0.7321674931951229, "grad_norm": 1.2436039447784424, "learning_rate": 3.5310909313598287e-06, "loss": 0.6192, "step": 4909 }, { "epoch": 0.7323166411872181, "grad_norm": 1.1072673797607422, "learning_rate": 3.5274070914743362e-06, "loss": 0.5642, "step": 4910 }, { "epoch": 0.7324657891793132, "grad_norm": 1.1534022092819214, "learning_rate": 3.5237247626571604e-06, "loss": 0.5519, "step": 4911 }, { "epoch": 0.7326149371714084, "grad_norm": 1.2166026830673218, "learning_rate": 3.520043945767968e-06, "loss": 0.6019, "step": 4912 }, { "epoch": 0.7327640851635034, "grad_norm": 1.1556402444839478, "learning_rate": 3.5163646416660634e-06, "loss": 0.6345, "step": 4913 }, { "epoch": 0.7329132331555986, "grad_norm": 1.2118874788284302, "learning_rate": 3.512686851210406e-06, "loss": 0.624, "step": 4914 }, { "epoch": 0.7330623811476938, "grad_norm": 1.0992424488067627, "learning_rate": 3.509010575259604e-06, "loss": 0.5472, "step": 4915 }, { "epoch": 0.7332115291397889, "grad_norm": 1.2960197925567627, "learning_rate": 3.5053358146719e-06, "loss": 0.5653, "step": 4916 }, { "epoch": 0.7333606771318841, "grad_norm": 1.0521470308303833, "learning_rate": 3.501662570305191e-06, "loss": 0.5989, "step": 4917 }, { "epoch": 0.7335098251239792, "grad_norm": 1.113340139389038, "learning_rate": 3.4979908430170285e-06, "loss": 0.6147, "step": 4918 }, { "epoch": 0.7336589731160744, "grad_norm": 1.1486023664474487, "learning_rate": 3.4943206336645917e-06, "loss": 0.5497, "step": 4919 }, { "epoch": 0.7338081211081696, "grad_norm": 1.0247150659561157, "learning_rate": 3.490651943104718e-06, "loss": 0.538, "step": 4920 }, { "epoch": 0.7339572691002647, "grad_norm": 1.2039884328842163, "learning_rate": 3.4869847721938897e-06, "loss": 0.6251, "step": 4921 }, { "epoch": 0.7341064170923599, "grad_norm": 1.1332697868347168, "learning_rate": 3.4833191217882247e-06, "loss": 0.6112, "step": 4922 }, { "epoch": 0.734255565084455, "grad_norm": 1.1917794942855835, "learning_rate": 3.479654992743495e-06, "loss": 0.6326, "step": 4923 }, { "epoch": 0.7344047130765502, "grad_norm": 1.1416200399398804, "learning_rate": 3.4759923859151167e-06, "loss": 0.5918, "step": 4924 }, { "epoch": 0.7345538610686454, "grad_norm": 1.2515106201171875, "learning_rate": 3.4723313021581517e-06, "loss": 0.6997, "step": 4925 }, { "epoch": 0.7347030090607405, "grad_norm": 1.1419085264205933, "learning_rate": 3.4686717423272932e-06, "loss": 0.564, "step": 4926 }, { "epoch": 0.7348521570528357, "grad_norm": 1.1829309463500977, "learning_rate": 3.465013707276902e-06, "loss": 0.519, "step": 4927 }, { "epoch": 0.7350013050449309, "grad_norm": 1.3042278289794922, "learning_rate": 3.4613571978609595e-06, "loss": 0.5049, "step": 4928 }, { "epoch": 0.735150453037026, "grad_norm": 1.1769232749938965, "learning_rate": 3.4577022149331065e-06, "loss": 0.6265, "step": 4929 }, { "epoch": 0.7352996010291212, "grad_norm": 1.1533907651901245, "learning_rate": 3.4540487593466197e-06, "loss": 0.5508, "step": 4930 }, { "epoch": 0.7354487490212163, "grad_norm": 1.1750755310058594, "learning_rate": 3.4503968319544266e-06, "loss": 0.5774, "step": 4931 }, { "epoch": 0.7355978970133115, "grad_norm": 1.1936522722244263, "learning_rate": 3.4467464336090863e-06, "loss": 0.555, "step": 4932 }, { "epoch": 0.7357470450054067, "grad_norm": 1.207975149154663, "learning_rate": 3.443097565162811e-06, "loss": 0.5522, "step": 4933 }, { "epoch": 0.7358961929975018, "grad_norm": 1.2122342586517334, "learning_rate": 3.4394502274674544e-06, "loss": 0.5881, "step": 4934 }, { "epoch": 0.736045340989597, "grad_norm": 1.2242838144302368, "learning_rate": 3.435804421374502e-06, "loss": 0.6329, "step": 4935 }, { "epoch": 0.736194488981692, "grad_norm": 1.1325429677963257, "learning_rate": 3.4321601477351017e-06, "loss": 0.5645, "step": 4936 }, { "epoch": 0.7363436369737872, "grad_norm": 1.2377760410308838, "learning_rate": 3.4285174074000317e-06, "loss": 0.586, "step": 4937 }, { "epoch": 0.7364927849658824, "grad_norm": 1.1322846412658691, "learning_rate": 3.4248762012197047e-06, "loss": 0.5381, "step": 4938 }, { "epoch": 0.7366419329579775, "grad_norm": 1.2231864929199219, "learning_rate": 3.42123653004419e-06, "loss": 0.5454, "step": 4939 }, { "epoch": 0.7367910809500727, "grad_norm": 1.0376437902450562, "learning_rate": 3.417598394723193e-06, "loss": 0.525, "step": 4940 }, { "epoch": 0.7369402289421678, "grad_norm": 1.0843861103057861, "learning_rate": 3.4139617961060546e-06, "loss": 0.5448, "step": 4941 }, { "epoch": 0.737089376934263, "grad_norm": 1.4246342182159424, "learning_rate": 3.4103267350417645e-06, "loss": 0.6484, "step": 4942 }, { "epoch": 0.7372385249263582, "grad_norm": 1.140423059463501, "learning_rate": 3.406693212378951e-06, "loss": 0.5265, "step": 4943 }, { "epoch": 0.7373876729184533, "grad_norm": 1.1331708431243896, "learning_rate": 3.4030612289658836e-06, "loss": 0.5768, "step": 4944 }, { "epoch": 0.7375368209105485, "grad_norm": 1.0861769914627075, "learning_rate": 3.399430785650473e-06, "loss": 0.5579, "step": 4945 }, { "epoch": 0.7376859689026436, "grad_norm": 1.2548737525939941, "learning_rate": 3.395801883280271e-06, "loss": 0.6009, "step": 4946 }, { "epoch": 0.7378351168947388, "grad_norm": 1.2342389822006226, "learning_rate": 3.3921745227024626e-06, "loss": 0.5895, "step": 4947 }, { "epoch": 0.737984264886834, "grad_norm": 1.1884714365005493, "learning_rate": 3.388548704763882e-06, "loss": 0.5663, "step": 4948 }, { "epoch": 0.7381334128789291, "grad_norm": 1.1724573373794556, "learning_rate": 3.3849244303109986e-06, "loss": 0.5941, "step": 4949 }, { "epoch": 0.7382825608710243, "grad_norm": 1.1964155435562134, "learning_rate": 3.381301700189927e-06, "loss": 0.5931, "step": 4950 }, { "epoch": 0.7384317088631195, "grad_norm": 1.200560212135315, "learning_rate": 3.3776805152464087e-06, "loss": 0.6225, "step": 4951 }, { "epoch": 0.7385808568552146, "grad_norm": 1.1096714735031128, "learning_rate": 3.3740608763258375e-06, "loss": 0.5359, "step": 4952 }, { "epoch": 0.7387300048473098, "grad_norm": 1.1673321723937988, "learning_rate": 3.3704427842732403e-06, "loss": 0.6194, "step": 4953 }, { "epoch": 0.7388791528394049, "grad_norm": 1.0586072206497192, "learning_rate": 3.366826239933283e-06, "loss": 0.5908, "step": 4954 }, { "epoch": 0.7390283008315001, "grad_norm": 1.0851680040359497, "learning_rate": 3.363211244150273e-06, "loss": 0.5746, "step": 4955 }, { "epoch": 0.7391774488235953, "grad_norm": 1.1869876384735107, "learning_rate": 3.359597797768157e-06, "loss": 0.6359, "step": 4956 }, { "epoch": 0.7393265968156904, "grad_norm": 1.1538418531417847, "learning_rate": 3.3559859016305094e-06, "loss": 0.6122, "step": 4957 }, { "epoch": 0.7394757448077856, "grad_norm": 1.1656231880187988, "learning_rate": 3.352375556580556e-06, "loss": 0.6024, "step": 4958 }, { "epoch": 0.7396248927998806, "grad_norm": 1.1596252918243408, "learning_rate": 3.3487667634611555e-06, "loss": 0.562, "step": 4959 }, { "epoch": 0.7397740407919758, "grad_norm": 1.1424400806427002, "learning_rate": 3.3451595231148005e-06, "loss": 0.6016, "step": 4960 }, { "epoch": 0.739923188784071, "grad_norm": 1.1869287490844727, "learning_rate": 3.341553836383621e-06, "loss": 0.5879, "step": 4961 }, { "epoch": 0.7400723367761661, "grad_norm": 1.1601345539093018, "learning_rate": 3.3379497041094e-06, "loss": 0.5459, "step": 4962 }, { "epoch": 0.7402214847682613, "grad_norm": 1.3493207693099976, "learning_rate": 3.334347127133534e-06, "loss": 0.6684, "step": 4963 }, { "epoch": 0.7403706327603564, "grad_norm": 1.1728942394256592, "learning_rate": 3.3307461062970726e-06, "loss": 0.56, "step": 4964 }, { "epoch": 0.7405197807524516, "grad_norm": 1.209539532661438, "learning_rate": 3.3271466424406984e-06, "loss": 0.5898, "step": 4965 }, { "epoch": 0.7406689287445468, "grad_norm": 1.1315135955810547, "learning_rate": 3.323548736404725e-06, "loss": 0.6165, "step": 4966 }, { "epoch": 0.7408180767366419, "grad_norm": 1.2669470310211182, "learning_rate": 3.3199523890291074e-06, "loss": 0.662, "step": 4967 }, { "epoch": 0.7409672247287371, "grad_norm": 1.1435173749923706, "learning_rate": 3.3163576011534417e-06, "loss": 0.5859, "step": 4968 }, { "epoch": 0.7411163727208322, "grad_norm": 1.2066835165023804, "learning_rate": 3.312764373616946e-06, "loss": 0.6204, "step": 4969 }, { "epoch": 0.7412655207129274, "grad_norm": 1.2284276485443115, "learning_rate": 3.3091727072584825e-06, "loss": 0.6447, "step": 4970 }, { "epoch": 0.7414146687050226, "grad_norm": 1.076941967010498, "learning_rate": 3.305582602916558e-06, "loss": 0.5585, "step": 4971 }, { "epoch": 0.7415638166971177, "grad_norm": 1.1244829893112183, "learning_rate": 3.3019940614292977e-06, "loss": 0.6309, "step": 4972 }, { "epoch": 0.7417129646892129, "grad_norm": 1.2280257940292358, "learning_rate": 3.2984070836344717e-06, "loss": 0.6146, "step": 4973 }, { "epoch": 0.741862112681308, "grad_norm": 1.0376865863800049, "learning_rate": 3.2948216703694836e-06, "loss": 0.5777, "step": 4974 }, { "epoch": 0.7420112606734032, "grad_norm": 1.14107084274292, "learning_rate": 3.2912378224713727e-06, "loss": 0.5553, "step": 4975 }, { "epoch": 0.7421604086654984, "grad_norm": 1.1756192445755005, "learning_rate": 3.287655540776805e-06, "loss": 0.576, "step": 4976 }, { "epoch": 0.7423095566575935, "grad_norm": 1.1032239198684692, "learning_rate": 3.284074826122092e-06, "loss": 0.5758, "step": 4977 }, { "epoch": 0.7424587046496887, "grad_norm": 1.1689451932907104, "learning_rate": 3.280495679343173e-06, "loss": 0.5794, "step": 4978 }, { "epoch": 0.7426078526417839, "grad_norm": 0.8233368396759033, "learning_rate": 3.2769181012756248e-06, "loss": 0.6238, "step": 4979 }, { "epoch": 0.742757000633879, "grad_norm": 1.196665644645691, "learning_rate": 3.2733420927546533e-06, "loss": 0.5864, "step": 4980 }, { "epoch": 0.7429061486259741, "grad_norm": 1.1136757135391235, "learning_rate": 3.2697676546151045e-06, "loss": 0.5505, "step": 4981 }, { "epoch": 0.7430552966180692, "grad_norm": 1.1182307004928589, "learning_rate": 3.266194787691449e-06, "loss": 0.5833, "step": 4982 }, { "epoch": 0.7432044446101644, "grad_norm": 1.2317662239074707, "learning_rate": 3.262623492817798e-06, "loss": 0.5933, "step": 4983 }, { "epoch": 0.7433535926022596, "grad_norm": 1.235518455505371, "learning_rate": 3.2590537708278956e-06, "loss": 0.6022, "step": 4984 }, { "epoch": 0.7435027405943547, "grad_norm": 1.1542795896530151, "learning_rate": 3.25548562255511e-06, "loss": 0.5764, "step": 4985 }, { "epoch": 0.7436518885864499, "grad_norm": 1.1861978769302368, "learning_rate": 3.2519190488324528e-06, "loss": 0.5188, "step": 4986 }, { "epoch": 0.743801036578545, "grad_norm": 1.2847269773483276, "learning_rate": 3.2483540504925616e-06, "loss": 0.6704, "step": 4987 }, { "epoch": 0.7439501845706402, "grad_norm": 1.186396837234497, "learning_rate": 3.24479062836771e-06, "loss": 0.5431, "step": 4988 }, { "epoch": 0.7440993325627354, "grad_norm": 1.0649510622024536, "learning_rate": 3.2412287832898004e-06, "loss": 0.5569, "step": 4989 }, { "epoch": 0.7442484805548305, "grad_norm": 1.0770115852355957, "learning_rate": 3.237668516090372e-06, "loss": 0.5432, "step": 4990 }, { "epoch": 0.7443976285469257, "grad_norm": 1.1602827310562134, "learning_rate": 3.2341098276005856e-06, "loss": 0.6098, "step": 4991 }, { "epoch": 0.7445467765390208, "grad_norm": 1.1938482522964478, "learning_rate": 3.2305527186512432e-06, "loss": 0.6068, "step": 4992 }, { "epoch": 0.744695924531116, "grad_norm": 0.9972236156463623, "learning_rate": 3.226997190072777e-06, "loss": 0.5665, "step": 4993 }, { "epoch": 0.7448450725232112, "grad_norm": 1.1186940670013428, "learning_rate": 3.2234432426952432e-06, "loss": 0.5956, "step": 4994 }, { "epoch": 0.7449942205153063, "grad_norm": 1.1673647165298462, "learning_rate": 3.219890877348336e-06, "loss": 0.5912, "step": 4995 }, { "epoch": 0.7451433685074015, "grad_norm": 1.1643438339233398, "learning_rate": 3.216340094861378e-06, "loss": 0.587, "step": 4996 }, { "epoch": 0.7452925164994966, "grad_norm": 1.2544695138931274, "learning_rate": 3.212790896063321e-06, "loss": 0.5371, "step": 4997 }, { "epoch": 0.7454416644915918, "grad_norm": 1.1236636638641357, "learning_rate": 3.2092432817827502e-06, "loss": 0.5945, "step": 4998 }, { "epoch": 0.745590812483687, "grad_norm": 1.2114312648773193, "learning_rate": 3.2056972528478802e-06, "loss": 0.7074, "step": 4999 }, { "epoch": 0.7457399604757821, "grad_norm": 1.1504416465759277, "learning_rate": 3.2021528100865483e-06, "loss": 0.5966, "step": 5000 }, { "epoch": 0.7458891084678773, "grad_norm": 1.1988548040390015, "learning_rate": 3.198609954326232e-06, "loss": 0.6069, "step": 5001 }, { "epoch": 0.7460382564599725, "grad_norm": 1.2530186176300049, "learning_rate": 3.1950686863940315e-06, "loss": 0.6207, "step": 5002 }, { "epoch": 0.7461874044520675, "grad_norm": 0.9032281041145325, "learning_rate": 3.1915290071166836e-06, "loss": 0.6289, "step": 5003 }, { "epoch": 0.7463365524441627, "grad_norm": 1.2735663652420044, "learning_rate": 3.1879909173205425e-06, "loss": 0.5929, "step": 5004 }, { "epoch": 0.7464857004362578, "grad_norm": 1.075796127319336, "learning_rate": 3.1844544178315995e-06, "loss": 0.5083, "step": 5005 }, { "epoch": 0.746634848428353, "grad_norm": 1.044739007949829, "learning_rate": 3.1809195094754754e-06, "loss": 0.524, "step": 5006 }, { "epoch": 0.7467839964204482, "grad_norm": 1.336177110671997, "learning_rate": 3.177386193077415e-06, "loss": 0.6183, "step": 5007 }, { "epoch": 0.7469331444125433, "grad_norm": 1.1491799354553223, "learning_rate": 3.1738544694622955e-06, "loss": 0.565, "step": 5008 }, { "epoch": 0.7470822924046385, "grad_norm": 1.2491871118545532, "learning_rate": 3.170324339454621e-06, "loss": 0.5931, "step": 5009 }, { "epoch": 0.7472314403967336, "grad_norm": 1.1815574169158936, "learning_rate": 3.1667958038785206e-06, "loss": 0.5834, "step": 5010 }, { "epoch": 0.7473805883888288, "grad_norm": 1.2288731336593628, "learning_rate": 3.1632688635577535e-06, "loss": 0.5637, "step": 5011 }, { "epoch": 0.747529736380924, "grad_norm": 1.1677882671356201, "learning_rate": 3.15974351931571e-06, "loss": 0.5733, "step": 5012 }, { "epoch": 0.7476788843730191, "grad_norm": 1.080022931098938, "learning_rate": 3.156219771975397e-06, "loss": 0.5331, "step": 5013 }, { "epoch": 0.7478280323651143, "grad_norm": 1.262965202331543, "learning_rate": 3.152697622359463e-06, "loss": 0.6092, "step": 5014 }, { "epoch": 0.7479771803572094, "grad_norm": 1.1230727434158325, "learning_rate": 3.149177071290178e-06, "loss": 0.5965, "step": 5015 }, { "epoch": 0.7481263283493046, "grad_norm": 1.1888182163238525, "learning_rate": 3.14565811958943e-06, "loss": 0.5633, "step": 5016 }, { "epoch": 0.7482754763413998, "grad_norm": 1.2053829431533813, "learning_rate": 3.142140768078744e-06, "loss": 0.6642, "step": 5017 }, { "epoch": 0.7484246243334949, "grad_norm": 1.099575161933899, "learning_rate": 3.138625017579272e-06, "loss": 0.5996, "step": 5018 }, { "epoch": 0.7485737723255901, "grad_norm": 1.2361260652542114, "learning_rate": 3.1351108689117813e-06, "loss": 0.6353, "step": 5019 }, { "epoch": 0.7487229203176852, "grad_norm": 1.1621160507202148, "learning_rate": 3.1315983228966774e-06, "loss": 0.6428, "step": 5020 }, { "epoch": 0.7488720683097804, "grad_norm": 1.1385654211044312, "learning_rate": 3.1280873803539845e-06, "loss": 0.5876, "step": 5021 }, { "epoch": 0.7490212163018756, "grad_norm": 1.1624897718429565, "learning_rate": 3.1245780421033557e-06, "loss": 0.6125, "step": 5022 }, { "epoch": 0.7491703642939707, "grad_norm": 1.120621681213379, "learning_rate": 3.121070308964069e-06, "loss": 0.6064, "step": 5023 }, { "epoch": 0.7493195122860659, "grad_norm": 1.2461167573928833, "learning_rate": 3.1175641817550295e-06, "loss": 0.5863, "step": 5024 }, { "epoch": 0.7494686602781611, "grad_norm": 1.2118347883224487, "learning_rate": 3.1140596612947582e-06, "loss": 0.5963, "step": 5025 }, { "epoch": 0.7496178082702561, "grad_norm": 1.1726393699645996, "learning_rate": 3.1105567484014133e-06, "loss": 0.5715, "step": 5026 }, { "epoch": 0.7497669562623513, "grad_norm": 1.1212010383605957, "learning_rate": 3.1070554438927703e-06, "loss": 0.5623, "step": 5027 }, { "epoch": 0.7499161042544464, "grad_norm": 1.2048773765563965, "learning_rate": 3.1035557485862343e-06, "loss": 0.5938, "step": 5028 }, { "epoch": 0.7500652522465416, "grad_norm": 1.2859388589859009, "learning_rate": 3.1000576632988265e-06, "loss": 0.6231, "step": 5029 }, { "epoch": 0.7502144002386368, "grad_norm": 1.1520575284957886, "learning_rate": 3.0965611888471993e-06, "loss": 0.5492, "step": 5030 }, { "epoch": 0.7503635482307319, "grad_norm": 1.276855230331421, "learning_rate": 3.093066326047628e-06, "loss": 0.5498, "step": 5031 }, { "epoch": 0.7505126962228271, "grad_norm": 1.227986454963684, "learning_rate": 3.0895730757160104e-06, "loss": 0.621, "step": 5032 }, { "epoch": 0.7506618442149222, "grad_norm": 1.1588362455368042, "learning_rate": 3.0860814386678683e-06, "loss": 0.6364, "step": 5033 }, { "epoch": 0.7508109922070174, "grad_norm": 1.1177570819854736, "learning_rate": 3.0825914157183502e-06, "loss": 0.6125, "step": 5034 }, { "epoch": 0.7509601401991126, "grad_norm": 0.8009832501411438, "learning_rate": 3.079103007682217e-06, "loss": 0.5906, "step": 5035 }, { "epoch": 0.7511092881912077, "grad_norm": 1.2672942876815796, "learning_rate": 3.0756162153738633e-06, "loss": 0.6484, "step": 5036 }, { "epoch": 0.7512584361833029, "grad_norm": 1.2340139150619507, "learning_rate": 3.072131039607308e-06, "loss": 0.5981, "step": 5037 }, { "epoch": 0.751407584175398, "grad_norm": 1.1936362981796265, "learning_rate": 3.0686474811961787e-06, "loss": 0.6532, "step": 5038 }, { "epoch": 0.7515567321674932, "grad_norm": 1.1561179161071777, "learning_rate": 3.0651655409537394e-06, "loss": 0.5895, "step": 5039 }, { "epoch": 0.7517058801595884, "grad_norm": 1.2426141500473022, "learning_rate": 3.0616852196928714e-06, "loss": 0.6494, "step": 5040 }, { "epoch": 0.7518550281516835, "grad_norm": 0.8950293660163879, "learning_rate": 3.0582065182260777e-06, "loss": 0.6666, "step": 5041 }, { "epoch": 0.7520041761437787, "grad_norm": 1.1804593801498413, "learning_rate": 3.054729437365482e-06, "loss": 0.6044, "step": 5042 }, { "epoch": 0.7521533241358738, "grad_norm": 1.0531672239303589, "learning_rate": 3.051253977922838e-06, "loss": 0.5602, "step": 5043 }, { "epoch": 0.752302472127969, "grad_norm": 1.124578833580017, "learning_rate": 3.047780140709503e-06, "loss": 0.5161, "step": 5044 }, { "epoch": 0.7524516201200642, "grad_norm": 0.8925403952598572, "learning_rate": 3.044307926536474e-06, "loss": 0.6517, "step": 5045 }, { "epoch": 0.7526007681121593, "grad_norm": 1.1132258176803589, "learning_rate": 3.0408373362143617e-06, "loss": 0.5966, "step": 5046 }, { "epoch": 0.7527499161042545, "grad_norm": 0.971733570098877, "learning_rate": 3.037368370553393e-06, "loss": 0.397, "step": 5047 }, { "epoch": 0.7528990640963497, "grad_norm": 1.3379276990890503, "learning_rate": 3.0339010303634186e-06, "loss": 0.6261, "step": 5048 }, { "epoch": 0.7530482120884447, "grad_norm": 1.1231112480163574, "learning_rate": 3.0304353164539224e-06, "loss": 0.5118, "step": 5049 }, { "epoch": 0.75319736008054, "grad_norm": 1.2760570049285889, "learning_rate": 3.026971229633988e-06, "loss": 0.5433, "step": 5050 }, { "epoch": 0.753346508072635, "grad_norm": 1.1016372442245483, "learning_rate": 3.023508770712331e-06, "loss": 0.5846, "step": 5051 }, { "epoch": 0.7534956560647302, "grad_norm": 1.235161304473877, "learning_rate": 3.020047940497288e-06, "loss": 0.5845, "step": 5052 }, { "epoch": 0.7536448040568254, "grad_norm": 1.145775556564331, "learning_rate": 3.0165887397968064e-06, "loss": 0.526, "step": 5053 }, { "epoch": 0.7537939520489205, "grad_norm": 1.0920664072036743, "learning_rate": 3.0131311694184617e-06, "loss": 0.5413, "step": 5054 }, { "epoch": 0.7539431000410157, "grad_norm": 1.1858505010604858, "learning_rate": 3.009675230169444e-06, "loss": 0.5519, "step": 5055 }, { "epoch": 0.7540922480331108, "grad_norm": 1.1449555158615112, "learning_rate": 3.006220922856571e-06, "loss": 0.5299, "step": 5056 }, { "epoch": 0.754241396025206, "grad_norm": 1.1287293434143066, "learning_rate": 3.0027682482862606e-06, "loss": 0.5324, "step": 5057 }, { "epoch": 0.7543905440173012, "grad_norm": 1.1904211044311523, "learning_rate": 2.999317207264575e-06, "loss": 0.5708, "step": 5058 }, { "epoch": 0.7545396920093963, "grad_norm": 1.115506887435913, "learning_rate": 2.9958678005971744e-06, "loss": 0.5564, "step": 5059 }, { "epoch": 0.7546888400014915, "grad_norm": 1.1972384452819824, "learning_rate": 2.9924200290893447e-06, "loss": 0.6384, "step": 5060 }, { "epoch": 0.7548379879935866, "grad_norm": 1.1899688243865967, "learning_rate": 2.9889738935459934e-06, "loss": 0.596, "step": 5061 }, { "epoch": 0.7549871359856818, "grad_norm": 1.254374384880066, "learning_rate": 2.9855293947716446e-06, "loss": 0.5694, "step": 5062 }, { "epoch": 0.755136283977777, "grad_norm": 1.246324062347412, "learning_rate": 2.9820865335704318e-06, "loss": 0.6329, "step": 5063 }, { "epoch": 0.7552854319698721, "grad_norm": 1.199471354484558, "learning_rate": 2.9786453107461166e-06, "loss": 0.6382, "step": 5064 }, { "epoch": 0.7554345799619673, "grad_norm": 1.0673198699951172, "learning_rate": 2.9752057271020785e-06, "loss": 0.4945, "step": 5065 }, { "epoch": 0.7555837279540624, "grad_norm": 1.1340584754943848, "learning_rate": 2.9717677834413006e-06, "loss": 0.61, "step": 5066 }, { "epoch": 0.7557328759461576, "grad_norm": 1.1723331212997437, "learning_rate": 2.968331480566402e-06, "loss": 0.4979, "step": 5067 }, { "epoch": 0.7558820239382528, "grad_norm": 1.1640321016311646, "learning_rate": 2.96489681927961e-06, "loss": 0.5706, "step": 5068 }, { "epoch": 0.7560311719303479, "grad_norm": 1.1125491857528687, "learning_rate": 2.961463800382761e-06, "loss": 0.5555, "step": 5069 }, { "epoch": 0.7561803199224431, "grad_norm": 0.88401859998703, "learning_rate": 2.9580324246773195e-06, "loss": 0.6568, "step": 5070 }, { "epoch": 0.7563294679145383, "grad_norm": 1.2604175806045532, "learning_rate": 2.9546026929643645e-06, "loss": 0.5946, "step": 5071 }, { "epoch": 0.7564786159066333, "grad_norm": 1.1491551399230957, "learning_rate": 2.9511746060445834e-06, "loss": 0.568, "step": 5072 }, { "epoch": 0.7566277638987285, "grad_norm": 1.2436519861221313, "learning_rate": 2.947748164718288e-06, "loss": 0.6399, "step": 5073 }, { "epoch": 0.7567769118908236, "grad_norm": 1.3244495391845703, "learning_rate": 2.9443233697854036e-06, "loss": 0.6297, "step": 5074 }, { "epoch": 0.7569260598829188, "grad_norm": 1.2465704679489136, "learning_rate": 2.9409002220454686e-06, "loss": 0.5802, "step": 5075 }, { "epoch": 0.757075207875014, "grad_norm": 1.2302448749542236, "learning_rate": 2.9374787222976397e-06, "loss": 0.5917, "step": 5076 }, { "epoch": 0.7572243558671091, "grad_norm": 1.1106218099594116, "learning_rate": 2.9340588713406927e-06, "loss": 0.5178, "step": 5077 }, { "epoch": 0.7573735038592043, "grad_norm": 1.1398664712905884, "learning_rate": 2.930640669973005e-06, "loss": 0.6151, "step": 5078 }, { "epoch": 0.7575226518512994, "grad_norm": 0.8512532711029053, "learning_rate": 2.927224118992582e-06, "loss": 0.6593, "step": 5079 }, { "epoch": 0.7576717998433946, "grad_norm": 1.1288281679153442, "learning_rate": 2.92380921919704e-06, "loss": 0.5291, "step": 5080 }, { "epoch": 0.7578209478354898, "grad_norm": 1.1927101612091064, "learning_rate": 2.920395971383612e-06, "loss": 0.6127, "step": 5081 }, { "epoch": 0.7579700958275849, "grad_norm": 1.0593911409378052, "learning_rate": 2.9169843763491345e-06, "loss": 0.5462, "step": 5082 }, { "epoch": 0.7581192438196801, "grad_norm": 1.1576589345932007, "learning_rate": 2.9135744348900717e-06, "loss": 0.6112, "step": 5083 }, { "epoch": 0.7582683918117752, "grad_norm": 1.2327383756637573, "learning_rate": 2.9101661478024958e-06, "loss": 0.6369, "step": 5084 }, { "epoch": 0.7584175398038704, "grad_norm": 1.2286874055862427, "learning_rate": 2.9067595158820925e-06, "loss": 0.5066, "step": 5085 }, { "epoch": 0.7585666877959656, "grad_norm": 1.2980302572250366, "learning_rate": 2.9033545399241625e-06, "loss": 0.6828, "step": 5086 }, { "epoch": 0.7587158357880607, "grad_norm": 1.279102087020874, "learning_rate": 2.8999512207236226e-06, "loss": 0.643, "step": 5087 }, { "epoch": 0.7588649837801559, "grad_norm": 1.2361998558044434, "learning_rate": 2.8965495590749925e-06, "loss": 0.5464, "step": 5088 }, { "epoch": 0.759014131772251, "grad_norm": 1.168002724647522, "learning_rate": 2.8931495557724154e-06, "loss": 0.5767, "step": 5089 }, { "epoch": 0.7591632797643462, "grad_norm": 1.328494668006897, "learning_rate": 2.8897512116096473e-06, "loss": 0.6462, "step": 5090 }, { "epoch": 0.7593124277564414, "grad_norm": 1.0902788639068604, "learning_rate": 2.8863545273800462e-06, "loss": 0.5146, "step": 5091 }, { "epoch": 0.7594615757485365, "grad_norm": 1.2154734134674072, "learning_rate": 2.8829595038765914e-06, "loss": 0.5689, "step": 5092 }, { "epoch": 0.7596107237406317, "grad_norm": 1.128636121749878, "learning_rate": 2.8795661418918806e-06, "loss": 0.569, "step": 5093 }, { "epoch": 0.7597598717327267, "grad_norm": 1.32023024559021, "learning_rate": 2.876174442218107e-06, "loss": 0.6033, "step": 5094 }, { "epoch": 0.7599090197248219, "grad_norm": 1.2423183917999268, "learning_rate": 2.8727844056470886e-06, "loss": 0.6119, "step": 5095 }, { "epoch": 0.7600581677169171, "grad_norm": 1.1482245922088623, "learning_rate": 2.8693960329702542e-06, "loss": 0.5846, "step": 5096 }, { "epoch": 0.7602073157090122, "grad_norm": 1.3049678802490234, "learning_rate": 2.866009324978635e-06, "loss": 0.632, "step": 5097 }, { "epoch": 0.7603564637011074, "grad_norm": 1.143864393234253, "learning_rate": 2.862624282462881e-06, "loss": 0.5771, "step": 5098 }, { "epoch": 0.7605056116932026, "grad_norm": 1.2308363914489746, "learning_rate": 2.859240906213254e-06, "loss": 0.6519, "step": 5099 }, { "epoch": 0.7606547596852977, "grad_norm": 1.1679495573043823, "learning_rate": 2.855859197019627e-06, "loss": 0.5616, "step": 5100 }, { "epoch": 0.7608039076773929, "grad_norm": 1.129961609840393, "learning_rate": 2.8524791556714736e-06, "loss": 0.5721, "step": 5101 }, { "epoch": 0.760953055669488, "grad_norm": 1.0968623161315918, "learning_rate": 2.8491007829578965e-06, "loss": 0.5875, "step": 5102 }, { "epoch": 0.7611022036615832, "grad_norm": 1.3543713092803955, "learning_rate": 2.845724079667591e-06, "loss": 0.6704, "step": 5103 }, { "epoch": 0.7612513516536784, "grad_norm": 1.0463558435440063, "learning_rate": 2.8423490465888727e-06, "loss": 0.5833, "step": 5104 }, { "epoch": 0.7614004996457735, "grad_norm": 1.0582494735717773, "learning_rate": 2.8389756845096637e-06, "loss": 0.5511, "step": 5105 }, { "epoch": 0.7615496476378687, "grad_norm": 1.1136093139648438, "learning_rate": 2.835603994217502e-06, "loss": 0.5725, "step": 5106 }, { "epoch": 0.7616987956299638, "grad_norm": 1.1564536094665527, "learning_rate": 2.8322339764995235e-06, "loss": 0.5524, "step": 5107 }, { "epoch": 0.761847943622059, "grad_norm": 0.8869603276252747, "learning_rate": 2.8288656321424824e-06, "loss": 0.6341, "step": 5108 }, { "epoch": 0.7619970916141542, "grad_norm": 1.1611573696136475, "learning_rate": 2.825498961932743e-06, "loss": 0.5865, "step": 5109 }, { "epoch": 0.7621462396062493, "grad_norm": 1.200107455253601, "learning_rate": 2.8221339666562695e-06, "loss": 0.579, "step": 5110 }, { "epoch": 0.7622953875983445, "grad_norm": 1.3452973365783691, "learning_rate": 2.8187706470986496e-06, "loss": 0.6385, "step": 5111 }, { "epoch": 0.7624445355904396, "grad_norm": 1.2239843606948853, "learning_rate": 2.815409004045071e-06, "loss": 0.6237, "step": 5112 }, { "epoch": 0.7625936835825348, "grad_norm": 1.2070486545562744, "learning_rate": 2.8120490382803244e-06, "loss": 0.5862, "step": 5113 }, { "epoch": 0.76274283157463, "grad_norm": 1.3074933290481567, "learning_rate": 2.8086907505888205e-06, "loss": 0.6446, "step": 5114 }, { "epoch": 0.762891979566725, "grad_norm": 1.1458698511123657, "learning_rate": 2.8053341417545744e-06, "loss": 0.619, "step": 5115 }, { "epoch": 0.7630411275588203, "grad_norm": 1.2149301767349243, "learning_rate": 2.801979212561202e-06, "loss": 0.6541, "step": 5116 }, { "epoch": 0.7631902755509153, "grad_norm": 1.1882609128952026, "learning_rate": 2.7986259637919365e-06, "loss": 0.6123, "step": 5117 }, { "epoch": 0.7633394235430105, "grad_norm": 1.118178129196167, "learning_rate": 2.7952743962296146e-06, "loss": 0.5558, "step": 5118 }, { "epoch": 0.7634885715351057, "grad_norm": 1.0861937999725342, "learning_rate": 2.7919245106566827e-06, "loss": 0.5379, "step": 5119 }, { "epoch": 0.7636377195272008, "grad_norm": 1.1413761377334595, "learning_rate": 2.788576307855192e-06, "loss": 0.5495, "step": 5120 }, { "epoch": 0.763786867519296, "grad_norm": 1.1804033517837524, "learning_rate": 2.785229788606806e-06, "loss": 0.5798, "step": 5121 }, { "epoch": 0.7639360155113912, "grad_norm": 1.2721983194351196, "learning_rate": 2.7818849536927827e-06, "loss": 0.6374, "step": 5122 }, { "epoch": 0.7640851635034863, "grad_norm": 1.1123391389846802, "learning_rate": 2.7785418038940004e-06, "loss": 0.5138, "step": 5123 }, { "epoch": 0.7642343114955815, "grad_norm": 1.146031141281128, "learning_rate": 2.7752003399909423e-06, "loss": 0.6501, "step": 5124 }, { "epoch": 0.7643834594876766, "grad_norm": 1.1257483959197998, "learning_rate": 2.771860562763686e-06, "loss": 0.5243, "step": 5125 }, { "epoch": 0.7645326074797718, "grad_norm": 0.841446578502655, "learning_rate": 2.768522472991929e-06, "loss": 0.587, "step": 5126 }, { "epoch": 0.764681755471867, "grad_norm": 1.0836765766143799, "learning_rate": 2.7651860714549695e-06, "loss": 0.4881, "step": 5127 }, { "epoch": 0.7648309034639621, "grad_norm": 1.1406923532485962, "learning_rate": 2.761851358931711e-06, "loss": 0.525, "step": 5128 }, { "epoch": 0.7649800514560573, "grad_norm": 1.1433438062667847, "learning_rate": 2.758518336200664e-06, "loss": 0.6086, "step": 5129 }, { "epoch": 0.7651291994481524, "grad_norm": 1.0757761001586914, "learning_rate": 2.7551870040399475e-06, "loss": 0.5756, "step": 5130 }, { "epoch": 0.7652783474402476, "grad_norm": 1.1324094533920288, "learning_rate": 2.751857363227276e-06, "loss": 0.5689, "step": 5131 }, { "epoch": 0.7654274954323428, "grad_norm": 1.230970859527588, "learning_rate": 2.7485294145399778e-06, "loss": 0.6008, "step": 5132 }, { "epoch": 0.7655766434244379, "grad_norm": 1.1427052021026611, "learning_rate": 2.7452031587549844e-06, "loss": 0.5682, "step": 5133 }, { "epoch": 0.7657257914165331, "grad_norm": 1.0773521661758423, "learning_rate": 2.7418785966488347e-06, "loss": 0.5314, "step": 5134 }, { "epoch": 0.7658749394086282, "grad_norm": 1.1494683027267456, "learning_rate": 2.738555728997664e-06, "loss": 0.5061, "step": 5135 }, { "epoch": 0.7660240874007234, "grad_norm": 1.170017123222351, "learning_rate": 2.7352345565772175e-06, "loss": 0.5778, "step": 5136 }, { "epoch": 0.7661732353928186, "grad_norm": 1.3051637411117554, "learning_rate": 2.731915080162847e-06, "loss": 0.6096, "step": 5137 }, { "epoch": 0.7663223833849137, "grad_norm": 1.0783976316452026, "learning_rate": 2.728597300529503e-06, "loss": 0.4888, "step": 5138 }, { "epoch": 0.7664715313770089, "grad_norm": 1.1853575706481934, "learning_rate": 2.7252812184517454e-06, "loss": 0.6132, "step": 5139 }, { "epoch": 0.7666206793691039, "grad_norm": 1.2317478656768799, "learning_rate": 2.721966834703734e-06, "loss": 0.6541, "step": 5140 }, { "epoch": 0.7667698273611991, "grad_norm": 1.1013104915618896, "learning_rate": 2.718654150059231e-06, "loss": 0.5879, "step": 5141 }, { "epoch": 0.7669189753532943, "grad_norm": 1.189701795578003, "learning_rate": 2.715343165291604e-06, "loss": 0.5884, "step": 5142 }, { "epoch": 0.7670681233453894, "grad_norm": 0.8580636978149414, "learning_rate": 2.7120338811738277e-06, "loss": 0.6214, "step": 5143 }, { "epoch": 0.7672172713374846, "grad_norm": 1.164116621017456, "learning_rate": 2.708726298478469e-06, "loss": 0.5213, "step": 5144 }, { "epoch": 0.7673664193295798, "grad_norm": 1.1972782611846924, "learning_rate": 2.7054204179777054e-06, "loss": 0.593, "step": 5145 }, { "epoch": 0.7675155673216749, "grad_norm": 1.1596181392669678, "learning_rate": 2.7021162404433243e-06, "loss": 0.633, "step": 5146 }, { "epoch": 0.7676647153137701, "grad_norm": 1.2387073040008545, "learning_rate": 2.6988137666466983e-06, "loss": 0.6301, "step": 5147 }, { "epoch": 0.7678138633058652, "grad_norm": 1.4198377132415771, "learning_rate": 2.6955129973588136e-06, "loss": 0.5738, "step": 5148 }, { "epoch": 0.7679630112979604, "grad_norm": 1.075485348701477, "learning_rate": 2.6922139333502594e-06, "loss": 0.5421, "step": 5149 }, { "epoch": 0.7681121592900556, "grad_norm": 1.264413833618164, "learning_rate": 2.6889165753912173e-06, "loss": 0.5882, "step": 5150 }, { "epoch": 0.7682613072821507, "grad_norm": 1.2360360622406006, "learning_rate": 2.6856209242514797e-06, "loss": 0.5838, "step": 5151 }, { "epoch": 0.7684104552742459, "grad_norm": 1.1335313320159912, "learning_rate": 2.682326980700437e-06, "loss": 0.629, "step": 5152 }, { "epoch": 0.768559603266341, "grad_norm": 1.2598035335540771, "learning_rate": 2.679034745507082e-06, "loss": 0.6517, "step": 5153 }, { "epoch": 0.7687087512584362, "grad_norm": 1.2068147659301758, "learning_rate": 2.6757442194400087e-06, "loss": 0.5713, "step": 5154 }, { "epoch": 0.7688578992505314, "grad_norm": 1.1647647619247437, "learning_rate": 2.6724554032674133e-06, "loss": 0.6062, "step": 5155 }, { "epoch": 0.7690070472426265, "grad_norm": 1.3129092454910278, "learning_rate": 2.6691682977570855e-06, "loss": 0.6911, "step": 5156 }, { "epoch": 0.7691561952347217, "grad_norm": 1.3552404642105103, "learning_rate": 2.6658829036764232e-06, "loss": 0.6797, "step": 5157 }, { "epoch": 0.7693053432268168, "grad_norm": 1.0601156949996948, "learning_rate": 2.6625992217924245e-06, "loss": 0.5336, "step": 5158 }, { "epoch": 0.769454491218912, "grad_norm": 1.258553385734558, "learning_rate": 2.6593172528716884e-06, "loss": 0.5574, "step": 5159 }, { "epoch": 0.7696036392110072, "grad_norm": 1.1616469621658325, "learning_rate": 2.6560369976804045e-06, "loss": 0.6504, "step": 5160 }, { "epoch": 0.7697527872031023, "grad_norm": 1.2061216831207275, "learning_rate": 2.6527584569843746e-06, "loss": 0.5436, "step": 5161 }, { "epoch": 0.7699019351951975, "grad_norm": 1.1138334274291992, "learning_rate": 2.6494816315489923e-06, "loss": 0.5291, "step": 5162 }, { "epoch": 0.7700510831872925, "grad_norm": 1.2415766716003418, "learning_rate": 2.6462065221392564e-06, "loss": 0.5905, "step": 5163 }, { "epoch": 0.7702002311793877, "grad_norm": 1.1147550344467163, "learning_rate": 2.6429331295197593e-06, "loss": 0.5577, "step": 5164 }, { "epoch": 0.7703493791714829, "grad_norm": 1.12725031375885, "learning_rate": 2.6396614544547005e-06, "loss": 0.558, "step": 5165 }, { "epoch": 0.770498527163578, "grad_norm": 1.1427397727966309, "learning_rate": 2.6363914977078665e-06, "loss": 0.5934, "step": 5166 }, { "epoch": 0.7706476751556732, "grad_norm": 1.3539410829544067, "learning_rate": 2.6331232600426535e-06, "loss": 0.6776, "step": 5167 }, { "epoch": 0.7707968231477684, "grad_norm": 1.2036877870559692, "learning_rate": 2.6298567422220556e-06, "loss": 0.5278, "step": 5168 }, { "epoch": 0.7709459711398635, "grad_norm": 1.2038577795028687, "learning_rate": 2.6265919450086553e-06, "loss": 0.5756, "step": 5169 }, { "epoch": 0.7710951191319587, "grad_norm": 0.7989102005958557, "learning_rate": 2.623328869164644e-06, "loss": 0.6428, "step": 5170 }, { "epoch": 0.7712442671240538, "grad_norm": 1.2363293170928955, "learning_rate": 2.6200675154518075e-06, "loss": 0.6005, "step": 5171 }, { "epoch": 0.771393415116149, "grad_norm": 1.2463326454162598, "learning_rate": 2.6168078846315303e-06, "loss": 0.6019, "step": 5172 }, { "epoch": 0.7715425631082442, "grad_norm": 1.069054365158081, "learning_rate": 2.613549977464793e-06, "loss": 0.5805, "step": 5173 }, { "epoch": 0.7716917111003393, "grad_norm": 1.2149289846420288, "learning_rate": 2.6102937947121798e-06, "loss": 0.5701, "step": 5174 }, { "epoch": 0.7718408590924345, "grad_norm": 1.1098142862319946, "learning_rate": 2.607039337133859e-06, "loss": 0.5816, "step": 5175 }, { "epoch": 0.7719900070845296, "grad_norm": 1.2051295042037964, "learning_rate": 2.60378660548961e-06, "loss": 0.6325, "step": 5176 }, { "epoch": 0.7721391550766248, "grad_norm": 1.2270591259002686, "learning_rate": 2.6005356005388047e-06, "loss": 0.6447, "step": 5177 }, { "epoch": 0.77228830306872, "grad_norm": 1.2551442384719849, "learning_rate": 2.5972863230404066e-06, "loss": 0.6534, "step": 5178 }, { "epoch": 0.7724374510608151, "grad_norm": 1.246364951133728, "learning_rate": 2.594038773752984e-06, "loss": 0.6011, "step": 5179 }, { "epoch": 0.7725865990529103, "grad_norm": 1.1597388982772827, "learning_rate": 2.590792953434695e-06, "loss": 0.5699, "step": 5180 }, { "epoch": 0.7727357470450054, "grad_norm": 1.1810532808303833, "learning_rate": 2.5875488628433e-06, "loss": 0.5894, "step": 5181 }, { "epoch": 0.7728848950371006, "grad_norm": 1.194411039352417, "learning_rate": 2.5843065027361526e-06, "loss": 0.6022, "step": 5182 }, { "epoch": 0.7730340430291958, "grad_norm": 1.2361607551574707, "learning_rate": 2.581065873870203e-06, "loss": 0.5896, "step": 5183 }, { "epoch": 0.7731831910212908, "grad_norm": 1.2211146354675293, "learning_rate": 2.577826977001995e-06, "loss": 0.5687, "step": 5184 }, { "epoch": 0.773332339013386, "grad_norm": 1.123509168624878, "learning_rate": 2.574589812887669e-06, "loss": 0.5582, "step": 5185 }, { "epoch": 0.7734814870054811, "grad_norm": 1.246232509613037, "learning_rate": 2.5713543822829636e-06, "loss": 0.5202, "step": 5186 }, { "epoch": 0.7736306349975763, "grad_norm": 1.2590912580490112, "learning_rate": 2.5681206859432127e-06, "loss": 0.598, "step": 5187 }, { "epoch": 0.7737797829896715, "grad_norm": 1.158335566520691, "learning_rate": 2.5648887246233357e-06, "loss": 0.5812, "step": 5188 }, { "epoch": 0.7739289309817666, "grad_norm": 1.1729538440704346, "learning_rate": 2.5616584990778625e-06, "loss": 0.5856, "step": 5189 }, { "epoch": 0.7740780789738618, "grad_norm": 1.214111089706421, "learning_rate": 2.5584300100609116e-06, "loss": 0.6474, "step": 5190 }, { "epoch": 0.7742272269659569, "grad_norm": 1.1687190532684326, "learning_rate": 2.5552032583261867e-06, "loss": 0.6184, "step": 5191 }, { "epoch": 0.7743763749580521, "grad_norm": 1.15581476688385, "learning_rate": 2.551978244626998e-06, "loss": 0.5187, "step": 5192 }, { "epoch": 0.7745255229501473, "grad_norm": 1.242380142211914, "learning_rate": 2.548754969716248e-06, "loss": 0.6552, "step": 5193 }, { "epoch": 0.7746746709422424, "grad_norm": 1.180883526802063, "learning_rate": 2.5455334343464246e-06, "loss": 0.5631, "step": 5194 }, { "epoch": 0.7748238189343376, "grad_norm": 1.2642775774002075, "learning_rate": 2.54231363926962e-06, "loss": 0.5592, "step": 5195 }, { "epoch": 0.7749729669264328, "grad_norm": 1.2224149703979492, "learning_rate": 2.5390955852375177e-06, "loss": 0.6251, "step": 5196 }, { "epoch": 0.7751221149185279, "grad_norm": 1.192692518234253, "learning_rate": 2.5358792730013847e-06, "loss": 0.6081, "step": 5197 }, { "epoch": 0.7752712629106231, "grad_norm": 1.3166922330856323, "learning_rate": 2.532664703312099e-06, "loss": 0.6325, "step": 5198 }, { "epoch": 0.7754204109027182, "grad_norm": 1.1555507183074951, "learning_rate": 2.5294518769201213e-06, "loss": 0.5753, "step": 5199 }, { "epoch": 0.7755695588948134, "grad_norm": 1.1868237257003784, "learning_rate": 2.5262407945755017e-06, "loss": 0.6862, "step": 5200 }, { "epoch": 0.7757187068869086, "grad_norm": 1.120808720588684, "learning_rate": 2.5230314570278914e-06, "loss": 0.5292, "step": 5201 }, { "epoch": 0.7758678548790037, "grad_norm": 1.165928840637207, "learning_rate": 2.5198238650265317e-06, "loss": 0.5774, "step": 5202 }, { "epoch": 0.7760170028710989, "grad_norm": 1.22242271900177, "learning_rate": 2.5166180193202517e-06, "loss": 0.5978, "step": 5203 }, { "epoch": 0.776166150863194, "grad_norm": 1.052012324333191, "learning_rate": 2.5134139206574793e-06, "loss": 0.5048, "step": 5204 }, { "epoch": 0.7763152988552892, "grad_norm": 0.8102617859840393, "learning_rate": 2.5102115697862304e-06, "loss": 0.621, "step": 5205 }, { "epoch": 0.7764644468473844, "grad_norm": 1.2114529609680176, "learning_rate": 2.5070109674541155e-06, "loss": 0.6091, "step": 5206 }, { "epoch": 0.7766135948394794, "grad_norm": 1.2024325132369995, "learning_rate": 2.503812114408336e-06, "loss": 0.6013, "step": 5207 }, { "epoch": 0.7767627428315746, "grad_norm": 1.1367061138153076, "learning_rate": 2.5006150113956874e-06, "loss": 0.6202, "step": 5208 }, { "epoch": 0.7769118908236697, "grad_norm": 0.8141079545021057, "learning_rate": 2.4974196591625467e-06, "loss": 0.604, "step": 5209 }, { "epoch": 0.7770610388157649, "grad_norm": 1.1892476081848145, "learning_rate": 2.494226058454894e-06, "loss": 0.6165, "step": 5210 }, { "epoch": 0.7772101868078601, "grad_norm": 1.2278798818588257, "learning_rate": 2.491034210018295e-06, "loss": 0.6121, "step": 5211 }, { "epoch": 0.7773593347999552, "grad_norm": 1.1243653297424316, "learning_rate": 2.4878441145979115e-06, "loss": 0.5835, "step": 5212 }, { "epoch": 0.7775084827920504, "grad_norm": 1.1706527471542358, "learning_rate": 2.4846557729384835e-06, "loss": 0.4813, "step": 5213 }, { "epoch": 0.7776576307841455, "grad_norm": 1.025405764579773, "learning_rate": 2.4814691857843544e-06, "loss": 0.5025, "step": 5214 }, { "epoch": 0.7778067787762407, "grad_norm": 1.272582769393921, "learning_rate": 2.478284353879453e-06, "loss": 0.5962, "step": 5215 }, { "epoch": 0.7779559267683359, "grad_norm": 1.2087132930755615, "learning_rate": 2.475101277967299e-06, "loss": 0.6225, "step": 5216 }, { "epoch": 0.778105074760431, "grad_norm": 1.0463218688964844, "learning_rate": 2.471919958791e-06, "loss": 0.4552, "step": 5217 }, { "epoch": 0.7782542227525262, "grad_norm": 1.149179458618164, "learning_rate": 2.4687403970932622e-06, "loss": 0.5956, "step": 5218 }, { "epoch": 0.7784033707446214, "grad_norm": 1.3218426704406738, "learning_rate": 2.465562593616365e-06, "loss": 0.6272, "step": 5219 }, { "epoch": 0.7785525187367165, "grad_norm": 1.0687147378921509, "learning_rate": 2.4623865491021913e-06, "loss": 0.517, "step": 5220 }, { "epoch": 0.7787016667288117, "grad_norm": 1.1256271600723267, "learning_rate": 2.4592122642922134e-06, "loss": 0.5338, "step": 5221 }, { "epoch": 0.7788508147209068, "grad_norm": 1.1502922773361206, "learning_rate": 2.456039739927479e-06, "loss": 0.6162, "step": 5222 }, { "epoch": 0.778999962713002, "grad_norm": 1.024222493171692, "learning_rate": 2.452868976748639e-06, "loss": 0.5614, "step": 5223 }, { "epoch": 0.7791491107050972, "grad_norm": 1.2246328592300415, "learning_rate": 2.449699975495934e-06, "loss": 0.596, "step": 5224 }, { "epoch": 0.7792982586971923, "grad_norm": 1.216117024421692, "learning_rate": 2.4465327369091784e-06, "loss": 0.6018, "step": 5225 }, { "epoch": 0.7794474066892875, "grad_norm": 1.211552381515503, "learning_rate": 2.4433672617277892e-06, "loss": 0.5278, "step": 5226 }, { "epoch": 0.7795965546813826, "grad_norm": 1.1493215560913086, "learning_rate": 2.4402035506907697e-06, "loss": 0.6384, "step": 5227 }, { "epoch": 0.7797457026734778, "grad_norm": 1.059765338897705, "learning_rate": 2.437041604536702e-06, "loss": 0.5365, "step": 5228 }, { "epoch": 0.779894850665573, "grad_norm": 1.180930495262146, "learning_rate": 2.4338814240037643e-06, "loss": 0.6219, "step": 5229 }, { "epoch": 0.780043998657668, "grad_norm": 1.258105993270874, "learning_rate": 2.430723009829724e-06, "loss": 0.5238, "step": 5230 }, { "epoch": 0.7801931466497632, "grad_norm": 1.1262003183364868, "learning_rate": 2.427566362751934e-06, "loss": 0.5368, "step": 5231 }, { "epoch": 0.7803422946418583, "grad_norm": 0.859350323677063, "learning_rate": 2.424411483507325e-06, "loss": 0.631, "step": 5232 }, { "epoch": 0.7804914426339535, "grad_norm": 1.1449012756347656, "learning_rate": 2.4212583728324367e-06, "loss": 0.5306, "step": 5233 }, { "epoch": 0.7806405906260487, "grad_norm": 0.8615925908088684, "learning_rate": 2.4181070314633727e-06, "loss": 0.6358, "step": 5234 }, { "epoch": 0.7807897386181438, "grad_norm": 1.1792912483215332, "learning_rate": 2.4149574601358383e-06, "loss": 0.5938, "step": 5235 }, { "epoch": 0.780938886610239, "grad_norm": 1.2892372608184814, "learning_rate": 2.4118096595851205e-06, "loss": 0.6248, "step": 5236 }, { "epoch": 0.7810880346023341, "grad_norm": 1.1948531866073608, "learning_rate": 2.408663630546095e-06, "loss": 0.5745, "step": 5237 }, { "epoch": 0.7812371825944293, "grad_norm": 1.1118272542953491, "learning_rate": 2.405519373753219e-06, "loss": 0.5415, "step": 5238 }, { "epoch": 0.7813863305865245, "grad_norm": 1.2147146463394165, "learning_rate": 2.4023768899405407e-06, "loss": 0.5831, "step": 5239 }, { "epoch": 0.7815354785786196, "grad_norm": 1.1634199619293213, "learning_rate": 2.3992361798416974e-06, "loss": 0.5596, "step": 5240 }, { "epoch": 0.7816846265707148, "grad_norm": 1.3046298027038574, "learning_rate": 2.3960972441898976e-06, "loss": 0.5774, "step": 5241 }, { "epoch": 0.78183377456281, "grad_norm": 1.1961654424667358, "learning_rate": 2.392960083717957e-06, "loss": 0.5371, "step": 5242 }, { "epoch": 0.7819829225549051, "grad_norm": 1.2491499185562134, "learning_rate": 2.389824699158263e-06, "loss": 0.5185, "step": 5243 }, { "epoch": 0.7821320705470003, "grad_norm": 1.2297991514205933, "learning_rate": 2.3866910912427875e-06, "loss": 0.6466, "step": 5244 }, { "epoch": 0.7822812185390954, "grad_norm": 1.2831003665924072, "learning_rate": 2.383559260703093e-06, "loss": 0.6286, "step": 5245 }, { "epoch": 0.7824303665311906, "grad_norm": 0.8135766983032227, "learning_rate": 2.3804292082703295e-06, "loss": 0.6319, "step": 5246 }, { "epoch": 0.7825795145232858, "grad_norm": 1.1922500133514404, "learning_rate": 2.3773009346752207e-06, "loss": 0.6168, "step": 5247 }, { "epoch": 0.7827286625153809, "grad_norm": 1.2814596891403198, "learning_rate": 2.374174440648086e-06, "loss": 0.5759, "step": 5248 }, { "epoch": 0.7828778105074761, "grad_norm": 1.3175718784332275, "learning_rate": 2.3710497269188258e-06, "loss": 0.6321, "step": 5249 }, { "epoch": 0.7830269584995712, "grad_norm": 1.2037323713302612, "learning_rate": 2.3679267942169237e-06, "loss": 0.6259, "step": 5250 }, { "epoch": 0.7831761064916664, "grad_norm": 1.1714954376220703, "learning_rate": 2.3648056432714483e-06, "loss": 0.6004, "step": 5251 }, { "epoch": 0.7833252544837616, "grad_norm": 1.2195775508880615, "learning_rate": 2.361686274811056e-06, "loss": 0.548, "step": 5252 }, { "epoch": 0.7834744024758566, "grad_norm": 1.1811585426330566, "learning_rate": 2.3585686895639757e-06, "loss": 0.5516, "step": 5253 }, { "epoch": 0.7836235504679518, "grad_norm": 1.2175768613815308, "learning_rate": 2.355452888258033e-06, "loss": 0.5869, "step": 5254 }, { "epoch": 0.7837726984600469, "grad_norm": 1.1575920581817627, "learning_rate": 2.352338871620634e-06, "loss": 0.5791, "step": 5255 }, { "epoch": 0.7839218464521421, "grad_norm": 1.0917435884475708, "learning_rate": 2.349226640378759e-06, "loss": 0.5847, "step": 5256 }, { "epoch": 0.7840709944442373, "grad_norm": 1.231919527053833, "learning_rate": 2.346116195258982e-06, "loss": 0.6216, "step": 5257 }, { "epoch": 0.7842201424363324, "grad_norm": 1.037202000617981, "learning_rate": 2.3430075369874563e-06, "loss": 0.5133, "step": 5258 }, { "epoch": 0.7843692904284276, "grad_norm": 1.2152619361877441, "learning_rate": 2.339900666289918e-06, "loss": 0.6353, "step": 5259 }, { "epoch": 0.7845184384205227, "grad_norm": 1.2588852643966675, "learning_rate": 2.3367955838916855e-06, "loss": 0.6042, "step": 5260 }, { "epoch": 0.7846675864126179, "grad_norm": 1.3180984258651733, "learning_rate": 2.333692290517664e-06, "loss": 0.6001, "step": 5261 }, { "epoch": 0.7848167344047131, "grad_norm": 1.2390670776367188, "learning_rate": 2.3305907868923306e-06, "loss": 0.5506, "step": 5262 }, { "epoch": 0.7849658823968082, "grad_norm": 1.1336369514465332, "learning_rate": 2.327491073739755e-06, "loss": 0.557, "step": 5263 }, { "epoch": 0.7851150303889034, "grad_norm": 1.215159296989441, "learning_rate": 2.324393151783585e-06, "loss": 0.6115, "step": 5264 }, { "epoch": 0.7852641783809986, "grad_norm": 1.2483335733413696, "learning_rate": 2.321297021747052e-06, "loss": 0.7112, "step": 5265 }, { "epoch": 0.7854133263730937, "grad_norm": 1.1540101766586304, "learning_rate": 2.318202684352964e-06, "loss": 0.5902, "step": 5266 }, { "epoch": 0.7855624743651889, "grad_norm": 1.1756082773208618, "learning_rate": 2.315110140323713e-06, "loss": 0.6382, "step": 5267 }, { "epoch": 0.785711622357284, "grad_norm": 1.2334821224212646, "learning_rate": 2.312019390381277e-06, "loss": 0.5724, "step": 5268 }, { "epoch": 0.7858607703493792, "grad_norm": 1.1960868835449219, "learning_rate": 2.3089304352472095e-06, "loss": 0.6447, "step": 5269 }, { "epoch": 0.7860099183414744, "grad_norm": 1.339708685874939, "learning_rate": 2.3058432756426473e-06, "loss": 0.6056, "step": 5270 }, { "epoch": 0.7861590663335695, "grad_norm": 1.1074867248535156, "learning_rate": 2.3027579122883114e-06, "loss": 0.5354, "step": 5271 }, { "epoch": 0.7863082143256647, "grad_norm": 1.244795560836792, "learning_rate": 2.2996743459044925e-06, "loss": 0.5622, "step": 5272 }, { "epoch": 0.7864573623177598, "grad_norm": 1.258207082748413, "learning_rate": 2.296592577211072e-06, "loss": 0.6312, "step": 5273 }, { "epoch": 0.786606510309855, "grad_norm": 1.211887240409851, "learning_rate": 2.2935126069275116e-06, "loss": 0.5724, "step": 5274 }, { "epoch": 0.7867556583019502, "grad_norm": 1.1604562997817993, "learning_rate": 2.290434435772845e-06, "loss": 0.5717, "step": 5275 }, { "epoch": 0.7869048062940452, "grad_norm": 1.175411343574524, "learning_rate": 2.28735806446569e-06, "loss": 0.6051, "step": 5276 }, { "epoch": 0.7870539542861404, "grad_norm": 1.1421666145324707, "learning_rate": 2.284283493724255e-06, "loss": 0.543, "step": 5277 }, { "epoch": 0.7872031022782355, "grad_norm": 1.2385197877883911, "learning_rate": 2.2812107242663082e-06, "loss": 0.5531, "step": 5278 }, { "epoch": 0.7873522502703307, "grad_norm": 1.2352230548858643, "learning_rate": 2.2781397568092113e-06, "loss": 0.6441, "step": 5279 }, { "epoch": 0.7875013982624259, "grad_norm": 1.2176098823547363, "learning_rate": 2.2750705920699044e-06, "loss": 0.6389, "step": 5280 }, { "epoch": 0.787650546254521, "grad_norm": 1.1642996072769165, "learning_rate": 2.2720032307648967e-06, "loss": 0.6202, "step": 5281 }, { "epoch": 0.7877996942466162, "grad_norm": 1.1139878034591675, "learning_rate": 2.2689376736102874e-06, "loss": 0.6162, "step": 5282 }, { "epoch": 0.7879488422387113, "grad_norm": 1.098741054534912, "learning_rate": 2.2658739213217496e-06, "loss": 0.585, "step": 5283 }, { "epoch": 0.7880979902308065, "grad_norm": 1.1668888330459595, "learning_rate": 2.262811974614537e-06, "loss": 0.5376, "step": 5284 }, { "epoch": 0.7882471382229017, "grad_norm": 1.2178361415863037, "learning_rate": 2.2597518342034797e-06, "loss": 0.6085, "step": 5285 }, { "epoch": 0.7883962862149968, "grad_norm": 1.0126498937606812, "learning_rate": 2.25669350080299e-06, "loss": 0.5048, "step": 5286 }, { "epoch": 0.788545434207092, "grad_norm": 1.1883598566055298, "learning_rate": 2.2536369751270514e-06, "loss": 0.6228, "step": 5287 }, { "epoch": 0.7886945821991871, "grad_norm": 1.136285424232483, "learning_rate": 2.25058225788923e-06, "loss": 0.5996, "step": 5288 }, { "epoch": 0.7888437301912823, "grad_norm": 1.0605933666229248, "learning_rate": 2.2475293498026697e-06, "loss": 0.539, "step": 5289 }, { "epoch": 0.7889928781833775, "grad_norm": 1.1993001699447632, "learning_rate": 2.2444782515800946e-06, "loss": 0.6133, "step": 5290 }, { "epoch": 0.7891420261754726, "grad_norm": 1.1140413284301758, "learning_rate": 2.2414289639337983e-06, "loss": 0.5017, "step": 5291 }, { "epoch": 0.7892911741675678, "grad_norm": 1.0962672233581543, "learning_rate": 2.2383814875756583e-06, "loss": 0.5506, "step": 5292 }, { "epoch": 0.789440322159663, "grad_norm": 1.1401886940002441, "learning_rate": 2.235335823217127e-06, "loss": 0.5505, "step": 5293 }, { "epoch": 0.7895894701517581, "grad_norm": 1.2247322797775269, "learning_rate": 2.2322919715692358e-06, "loss": 0.5994, "step": 5294 }, { "epoch": 0.7897386181438533, "grad_norm": 1.228216290473938, "learning_rate": 2.229249933342591e-06, "loss": 0.572, "step": 5295 }, { "epoch": 0.7898877661359484, "grad_norm": 1.1851608753204346, "learning_rate": 2.2262097092473776e-06, "loss": 0.5174, "step": 5296 }, { "epoch": 0.7900369141280436, "grad_norm": 1.2861021757125854, "learning_rate": 2.2231712999933506e-06, "loss": 0.5907, "step": 5297 }, { "epoch": 0.7901860621201388, "grad_norm": 1.2193305492401123, "learning_rate": 2.2201347062898505e-06, "loss": 0.6303, "step": 5298 }, { "epoch": 0.7903352101122338, "grad_norm": 1.121227502822876, "learning_rate": 2.2170999288457896e-06, "loss": 0.5032, "step": 5299 }, { "epoch": 0.790484358104329, "grad_norm": 1.2995245456695557, "learning_rate": 2.2140669683696513e-06, "loss": 0.6311, "step": 5300 }, { "epoch": 0.7906335060964241, "grad_norm": 1.1582279205322266, "learning_rate": 2.211035825569503e-06, "loss": 0.5204, "step": 5301 }, { "epoch": 0.7907826540885193, "grad_norm": 0.8514735102653503, "learning_rate": 2.2080065011529848e-06, "loss": 0.62, "step": 5302 }, { "epoch": 0.7909318020806145, "grad_norm": 1.2791533470153809, "learning_rate": 2.2049789958273117e-06, "loss": 0.5728, "step": 5303 }, { "epoch": 0.7910809500727096, "grad_norm": 1.1586865186691284, "learning_rate": 2.201953310299274e-06, "loss": 0.5445, "step": 5304 }, { "epoch": 0.7912300980648048, "grad_norm": 1.224170446395874, "learning_rate": 2.1989294452752398e-06, "loss": 0.6405, "step": 5305 }, { "epoch": 0.7913792460568999, "grad_norm": 1.145415186882019, "learning_rate": 2.1959074014611447e-06, "loss": 0.5644, "step": 5306 }, { "epoch": 0.7915283940489951, "grad_norm": 1.1029068231582642, "learning_rate": 2.192887179562506e-06, "loss": 0.4921, "step": 5307 }, { "epoch": 0.7916775420410903, "grad_norm": 1.2571160793304443, "learning_rate": 2.1898687802844187e-06, "loss": 0.6306, "step": 5308 }, { "epoch": 0.7918266900331854, "grad_norm": 1.18130624294281, "learning_rate": 2.186852204331541e-06, "loss": 0.5982, "step": 5309 }, { "epoch": 0.7919758380252806, "grad_norm": 1.3406672477722168, "learning_rate": 2.183837452408113e-06, "loss": 0.6158, "step": 5310 }, { "epoch": 0.7921249860173757, "grad_norm": 1.1290818452835083, "learning_rate": 2.1808245252179503e-06, "loss": 0.5646, "step": 5311 }, { "epoch": 0.7922741340094709, "grad_norm": 1.1963481903076172, "learning_rate": 2.177813423464439e-06, "loss": 0.5815, "step": 5312 }, { "epoch": 0.7924232820015661, "grad_norm": 1.084618330001831, "learning_rate": 2.1748041478505386e-06, "loss": 0.5745, "step": 5313 }, { "epoch": 0.7925724299936612, "grad_norm": 1.260748028755188, "learning_rate": 2.1717966990787877e-06, "loss": 0.5611, "step": 5314 }, { "epoch": 0.7927215779857564, "grad_norm": 1.1675128936767578, "learning_rate": 2.168791077851293e-06, "loss": 0.5641, "step": 5315 }, { "epoch": 0.7928707259778516, "grad_norm": 1.1987758874893188, "learning_rate": 2.1657872848697336e-06, "loss": 0.6037, "step": 5316 }, { "epoch": 0.7930198739699467, "grad_norm": 1.2193691730499268, "learning_rate": 2.1627853208353655e-06, "loss": 0.5171, "step": 5317 }, { "epoch": 0.7931690219620419, "grad_norm": 1.2453259229660034, "learning_rate": 2.1597851864490193e-06, "loss": 0.6215, "step": 5318 }, { "epoch": 0.793318169954137, "grad_norm": 1.2896581888198853, "learning_rate": 2.156786882411087e-06, "loss": 0.5498, "step": 5319 }, { "epoch": 0.7934673179462322, "grad_norm": 1.1910346746444702, "learning_rate": 2.1537904094215512e-06, "loss": 0.5815, "step": 5320 }, { "epoch": 0.7936164659383274, "grad_norm": 1.2101773023605347, "learning_rate": 2.1507957681799574e-06, "loss": 0.5528, "step": 5321 }, { "epoch": 0.7937656139304224, "grad_norm": 1.2189395427703857, "learning_rate": 2.147802959385419e-06, "loss": 0.5598, "step": 5322 }, { "epoch": 0.7939147619225176, "grad_norm": 1.1820728778839111, "learning_rate": 2.1448119837366266e-06, "loss": 0.5542, "step": 5323 }, { "epoch": 0.7940639099146127, "grad_norm": 1.2289183139801025, "learning_rate": 2.1418228419318486e-06, "loss": 0.5821, "step": 5324 }, { "epoch": 0.7942130579067079, "grad_norm": 1.2654838562011719, "learning_rate": 2.1388355346689118e-06, "loss": 0.5814, "step": 5325 }, { "epoch": 0.7943622058988031, "grad_norm": 1.2571699619293213, "learning_rate": 2.135850062645225e-06, "loss": 0.5534, "step": 5326 }, { "epoch": 0.7945113538908982, "grad_norm": 1.2184401750564575, "learning_rate": 2.1328664265577694e-06, "loss": 0.572, "step": 5327 }, { "epoch": 0.7946605018829934, "grad_norm": 1.2781262397766113, "learning_rate": 2.1298846271030847e-06, "loss": 0.5466, "step": 5328 }, { "epoch": 0.7948096498750885, "grad_norm": 1.2985758781433105, "learning_rate": 2.126904664977302e-06, "loss": 0.4996, "step": 5329 }, { "epoch": 0.7949587978671837, "grad_norm": 1.2998247146606445, "learning_rate": 2.123926540876109e-06, "loss": 0.5742, "step": 5330 }, { "epoch": 0.7951079458592789, "grad_norm": 1.2758477926254272, "learning_rate": 2.1209502554947636e-06, "loss": 0.5922, "step": 5331 }, { "epoch": 0.795257093851374, "grad_norm": 1.2147129774093628, "learning_rate": 2.1179758095281023e-06, "loss": 0.6175, "step": 5332 }, { "epoch": 0.7954062418434692, "grad_norm": 1.2646623849868774, "learning_rate": 2.1150032036705316e-06, "loss": 0.6381, "step": 5333 }, { "epoch": 0.7955553898355643, "grad_norm": 0.8360782861709595, "learning_rate": 2.1120324386160187e-06, "loss": 0.6542, "step": 5334 }, { "epoch": 0.7957045378276595, "grad_norm": 1.352407455444336, "learning_rate": 2.109063515058111e-06, "loss": 0.6819, "step": 5335 }, { "epoch": 0.7958536858197547, "grad_norm": 1.1155022382736206, "learning_rate": 2.1060964336899216e-06, "loss": 0.4781, "step": 5336 }, { "epoch": 0.7960028338118498, "grad_norm": 0.7740480899810791, "learning_rate": 2.1031311952041366e-06, "loss": 0.581, "step": 5337 }, { "epoch": 0.796151981803945, "grad_norm": 1.1982979774475098, "learning_rate": 2.1001678002930093e-06, "loss": 0.5799, "step": 5338 }, { "epoch": 0.7963011297960402, "grad_norm": 1.1452447175979614, "learning_rate": 2.0972062496483657e-06, "loss": 0.5385, "step": 5339 }, { "epoch": 0.7964502777881353, "grad_norm": 1.1509513854980469, "learning_rate": 2.0942465439615935e-06, "loss": 0.5482, "step": 5340 }, { "epoch": 0.7965994257802305, "grad_norm": 1.337584137916565, "learning_rate": 2.0912886839236567e-06, "loss": 0.6481, "step": 5341 }, { "epoch": 0.7967485737723256, "grad_norm": 1.2190907001495361, "learning_rate": 2.0883326702250885e-06, "loss": 0.6018, "step": 5342 }, { "epoch": 0.7968977217644208, "grad_norm": 0.7645917534828186, "learning_rate": 2.0853785035559903e-06, "loss": 0.595, "step": 5343 }, { "epoch": 0.797046869756516, "grad_norm": 1.0614594221115112, "learning_rate": 2.082426184606027e-06, "loss": 0.6098, "step": 5344 }, { "epoch": 0.797196017748611, "grad_norm": 1.11899733543396, "learning_rate": 2.0794757140644397e-06, "loss": 0.5512, "step": 5345 }, { "epoch": 0.7973451657407062, "grad_norm": 1.18121337890625, "learning_rate": 2.076527092620032e-06, "loss": 0.5458, "step": 5346 }, { "epoch": 0.7974943137328013, "grad_norm": 1.149079442024231, "learning_rate": 2.0735803209611805e-06, "loss": 0.5567, "step": 5347 }, { "epoch": 0.7976434617248965, "grad_norm": 1.2046449184417725, "learning_rate": 2.070635399775828e-06, "loss": 0.5884, "step": 5348 }, { "epoch": 0.7977926097169917, "grad_norm": 1.2192145586013794, "learning_rate": 2.0676923297514874e-06, "loss": 0.5664, "step": 5349 }, { "epoch": 0.7979417577090868, "grad_norm": 1.3119981288909912, "learning_rate": 2.064751111575232e-06, "loss": 0.6486, "step": 5350 }, { "epoch": 0.798090905701182, "grad_norm": 1.2311196327209473, "learning_rate": 2.0618117459337107e-06, "loss": 0.5704, "step": 5351 }, { "epoch": 0.7982400536932771, "grad_norm": 1.06354820728302, "learning_rate": 2.0588742335131397e-06, "loss": 0.5773, "step": 5352 }, { "epoch": 0.7983892016853723, "grad_norm": 1.1751501560211182, "learning_rate": 2.0559385749992956e-06, "loss": 0.5944, "step": 5353 }, { "epoch": 0.7985383496774675, "grad_norm": 1.0964734554290771, "learning_rate": 2.053004771077525e-06, "loss": 0.5197, "step": 5354 }, { "epoch": 0.7986874976695626, "grad_norm": 1.242840051651001, "learning_rate": 2.0500728224327537e-06, "loss": 0.5479, "step": 5355 }, { "epoch": 0.7988366456616578, "grad_norm": 1.1232553720474243, "learning_rate": 2.047142729749454e-06, "loss": 0.5502, "step": 5356 }, { "epoch": 0.7989857936537529, "grad_norm": 1.1914697885513306, "learning_rate": 2.044214493711677e-06, "loss": 0.5658, "step": 5357 }, { "epoch": 0.7991349416458481, "grad_norm": 1.2291548252105713, "learning_rate": 2.041288115003043e-06, "loss": 0.561, "step": 5358 }, { "epoch": 0.7992840896379433, "grad_norm": 1.2512133121490479, "learning_rate": 2.038363594306727e-06, "loss": 0.6418, "step": 5359 }, { "epoch": 0.7994332376300384, "grad_norm": 1.0971219539642334, "learning_rate": 2.0354409323054814e-06, "loss": 0.526, "step": 5360 }, { "epoch": 0.7995823856221336, "grad_norm": 1.2686368227005005, "learning_rate": 2.0325201296816177e-06, "loss": 0.5942, "step": 5361 }, { "epoch": 0.7997315336142288, "grad_norm": 1.2364904880523682, "learning_rate": 2.0296011871170208e-06, "loss": 0.668, "step": 5362 }, { "epoch": 0.7998806816063239, "grad_norm": 1.1121623516082764, "learning_rate": 2.0266841052931275e-06, "loss": 0.5585, "step": 5363 }, { "epoch": 0.8000298295984191, "grad_norm": 1.2070835828781128, "learning_rate": 2.0237688848909607e-06, "loss": 0.5426, "step": 5364 }, { "epoch": 0.8001789775905142, "grad_norm": 1.301594614982605, "learning_rate": 2.020855526591089e-06, "loss": 0.5785, "step": 5365 }, { "epoch": 0.8003281255826094, "grad_norm": 1.2209845781326294, "learning_rate": 2.0179440310736575e-06, "loss": 0.6314, "step": 5366 }, { "epoch": 0.8004772735747046, "grad_norm": 1.2014493942260742, "learning_rate": 2.015034399018373e-06, "loss": 0.6028, "step": 5367 }, { "epoch": 0.8006264215667996, "grad_norm": 1.3100489377975464, "learning_rate": 2.0121266311045106e-06, "loss": 0.6197, "step": 5368 }, { "epoch": 0.8007755695588948, "grad_norm": 1.2435928583145142, "learning_rate": 2.009220728010901e-06, "loss": 0.6166, "step": 5369 }, { "epoch": 0.8009247175509899, "grad_norm": 1.2369002103805542, "learning_rate": 2.0063166904159516e-06, "loss": 0.5807, "step": 5370 }, { "epoch": 0.8010738655430851, "grad_norm": 1.1451987028121948, "learning_rate": 2.0034145189976275e-06, "loss": 0.5794, "step": 5371 }, { "epoch": 0.8012230135351803, "grad_norm": 1.198885440826416, "learning_rate": 2.0005142144334533e-06, "loss": 0.6243, "step": 5372 }, { "epoch": 0.8013721615272754, "grad_norm": 1.2458237409591675, "learning_rate": 1.9976157774005323e-06, "loss": 0.5889, "step": 5373 }, { "epoch": 0.8015213095193706, "grad_norm": 1.2346004247665405, "learning_rate": 1.994719208575522e-06, "loss": 0.6633, "step": 5374 }, { "epoch": 0.8016704575114657, "grad_norm": 1.2382704019546509, "learning_rate": 1.9918245086346387e-06, "loss": 0.5641, "step": 5375 }, { "epoch": 0.8018196055035609, "grad_norm": 1.141903281211853, "learning_rate": 1.9889316782536737e-06, "loss": 0.5772, "step": 5376 }, { "epoch": 0.8019687534956561, "grad_norm": 1.3814808130264282, "learning_rate": 1.9860407181079787e-06, "loss": 0.6884, "step": 5377 }, { "epoch": 0.8021179014877512, "grad_norm": 1.1554739475250244, "learning_rate": 1.9831516288724607e-06, "loss": 0.5192, "step": 5378 }, { "epoch": 0.8022670494798464, "grad_norm": 1.1393495798110962, "learning_rate": 1.9802644112215996e-06, "loss": 0.5706, "step": 5379 }, { "epoch": 0.8024161974719415, "grad_norm": 1.2304822206497192, "learning_rate": 1.9773790658294368e-06, "loss": 0.554, "step": 5380 }, { "epoch": 0.8025653454640367, "grad_norm": 1.1503117084503174, "learning_rate": 1.9744955933695663e-06, "loss": 0.5758, "step": 5381 }, { "epoch": 0.8027144934561319, "grad_norm": 1.1889744997024536, "learning_rate": 1.9716139945151634e-06, "loss": 0.5767, "step": 5382 }, { "epoch": 0.802863641448227, "grad_norm": 1.1579774618148804, "learning_rate": 1.9687342699389542e-06, "loss": 0.562, "step": 5383 }, { "epoch": 0.8030127894403222, "grad_norm": 1.2313345670700073, "learning_rate": 1.9658564203132235e-06, "loss": 0.5891, "step": 5384 }, { "epoch": 0.8031619374324173, "grad_norm": 1.1448005437850952, "learning_rate": 1.962980446309827e-06, "loss": 0.5557, "step": 5385 }, { "epoch": 0.8033110854245125, "grad_norm": 1.1569465398788452, "learning_rate": 1.9601063486001815e-06, "loss": 0.5662, "step": 5386 }, { "epoch": 0.8034602334166077, "grad_norm": 1.1608850955963135, "learning_rate": 1.9572341278552575e-06, "loss": 0.5792, "step": 5387 }, { "epoch": 0.8036093814087027, "grad_norm": 1.1372625827789307, "learning_rate": 1.9543637847455976e-06, "loss": 0.5283, "step": 5388 }, { "epoch": 0.803758529400798, "grad_norm": 1.2365871667861938, "learning_rate": 1.9514953199413013e-06, "loss": 0.6389, "step": 5389 }, { "epoch": 0.8039076773928931, "grad_norm": 1.2038960456848145, "learning_rate": 1.948628734112029e-06, "loss": 0.5419, "step": 5390 }, { "epoch": 0.8040568253849882, "grad_norm": 1.1919004917144775, "learning_rate": 1.9457640279270053e-06, "loss": 0.5299, "step": 5391 }, { "epoch": 0.8042059733770834, "grad_norm": 1.3174171447753906, "learning_rate": 1.942901202055015e-06, "loss": 0.653, "step": 5392 }, { "epoch": 0.8043551213691785, "grad_norm": 1.246513843536377, "learning_rate": 1.9400402571644005e-06, "loss": 0.5886, "step": 5393 }, { "epoch": 0.8045042693612737, "grad_norm": 1.2365264892578125, "learning_rate": 1.937181193923068e-06, "loss": 0.5563, "step": 5394 }, { "epoch": 0.8046534173533689, "grad_norm": 1.261883020401001, "learning_rate": 1.9343240129984843e-06, "loss": 0.5869, "step": 5395 }, { "epoch": 0.804802565345464, "grad_norm": 1.294022560119629, "learning_rate": 1.9314687150576806e-06, "loss": 0.5515, "step": 5396 }, { "epoch": 0.8049517133375592, "grad_norm": 1.245898962020874, "learning_rate": 1.928615300767237e-06, "loss": 0.6423, "step": 5397 }, { "epoch": 0.8051008613296543, "grad_norm": 1.1567866802215576, "learning_rate": 1.9257637707933043e-06, "loss": 0.5526, "step": 5398 }, { "epoch": 0.8052500093217495, "grad_norm": 1.1071441173553467, "learning_rate": 1.922914125801596e-06, "loss": 0.5961, "step": 5399 }, { "epoch": 0.8053991573138447, "grad_norm": 0.8199513554573059, "learning_rate": 1.920066366457374e-06, "loss": 0.6626, "step": 5400 }, { "epoch": 0.8055483053059398, "grad_norm": 1.1784154176712036, "learning_rate": 1.917220493425467e-06, "loss": 0.6605, "step": 5401 }, { "epoch": 0.805697453298035, "grad_norm": 1.1311407089233398, "learning_rate": 1.9143765073702646e-06, "loss": 0.5194, "step": 5402 }, { "epoch": 0.8058466012901301, "grad_norm": 1.3184326887130737, "learning_rate": 1.911534408955711e-06, "loss": 0.5684, "step": 5403 }, { "epoch": 0.8059957492822253, "grad_norm": 1.1880618333816528, "learning_rate": 1.908694198845312e-06, "loss": 0.5724, "step": 5404 }, { "epoch": 0.8061448972743205, "grad_norm": 1.168643593788147, "learning_rate": 1.9058558777021363e-06, "loss": 0.6377, "step": 5405 }, { "epoch": 0.8062940452664156, "grad_norm": 1.2967512607574463, "learning_rate": 1.9030194461888041e-06, "loss": 0.5838, "step": 5406 }, { "epoch": 0.8064431932585108, "grad_norm": 1.1148955821990967, "learning_rate": 1.900184904967498e-06, "loss": 0.5144, "step": 5407 }, { "epoch": 0.8065923412506059, "grad_norm": 0.7895510196685791, "learning_rate": 1.8973522546999667e-06, "loss": 0.6055, "step": 5408 }, { "epoch": 0.8067414892427011, "grad_norm": 1.0647985935211182, "learning_rate": 1.8945214960475034e-06, "loss": 0.5114, "step": 5409 }, { "epoch": 0.8068906372347963, "grad_norm": 1.1006759405136108, "learning_rate": 1.8916926296709692e-06, "loss": 0.6281, "step": 5410 }, { "epoch": 0.8070397852268913, "grad_norm": 1.2133090496063232, "learning_rate": 1.8888656562307849e-06, "loss": 0.606, "step": 5411 }, { "epoch": 0.8071889332189865, "grad_norm": 1.103794813156128, "learning_rate": 1.8860405763869183e-06, "loss": 0.6121, "step": 5412 }, { "epoch": 0.8073380812110817, "grad_norm": 1.2157467603683472, "learning_rate": 1.883217390798907e-06, "loss": 0.5368, "step": 5413 }, { "epoch": 0.8074872292031768, "grad_norm": 1.1664432287216187, "learning_rate": 1.8803961001258408e-06, "loss": 0.6232, "step": 5414 }, { "epoch": 0.807636377195272, "grad_norm": 1.2139590978622437, "learning_rate": 1.8775767050263683e-06, "loss": 0.571, "step": 5415 }, { "epoch": 0.8077855251873671, "grad_norm": 1.10991370677948, "learning_rate": 1.874759206158695e-06, "loss": 0.4924, "step": 5416 }, { "epoch": 0.8079346731794623, "grad_norm": 1.1812081336975098, "learning_rate": 1.8719436041805872e-06, "loss": 0.5817, "step": 5417 }, { "epoch": 0.8080838211715575, "grad_norm": 1.131568431854248, "learning_rate": 1.869129899749359e-06, "loss": 0.549, "step": 5418 }, { "epoch": 0.8082329691636526, "grad_norm": 0.8220629096031189, "learning_rate": 1.8663180935218927e-06, "loss": 0.5935, "step": 5419 }, { "epoch": 0.8083821171557478, "grad_norm": 1.1181074380874634, "learning_rate": 1.86350818615462e-06, "loss": 0.5214, "step": 5420 }, { "epoch": 0.8085312651478429, "grad_norm": 1.2419297695159912, "learning_rate": 1.860700178303535e-06, "loss": 0.5836, "step": 5421 }, { "epoch": 0.8086804131399381, "grad_norm": 1.2812894582748413, "learning_rate": 1.857894070624181e-06, "loss": 0.5926, "step": 5422 }, { "epoch": 0.8088295611320333, "grad_norm": 1.2251750230789185, "learning_rate": 1.855089863771663e-06, "loss": 0.5756, "step": 5423 }, { "epoch": 0.8089787091241284, "grad_norm": 1.1005566120147705, "learning_rate": 1.8522875584006417e-06, "loss": 0.6405, "step": 5424 }, { "epoch": 0.8091278571162236, "grad_norm": 1.1422277688980103, "learning_rate": 1.8494871551653338e-06, "loss": 0.4798, "step": 5425 }, { "epoch": 0.8092770051083187, "grad_norm": 1.1285408735275269, "learning_rate": 1.8466886547195106e-06, "loss": 0.6134, "step": 5426 }, { "epoch": 0.8094261531004139, "grad_norm": 1.2055773735046387, "learning_rate": 1.8438920577165032e-06, "loss": 0.5574, "step": 5427 }, { "epoch": 0.8095753010925091, "grad_norm": 1.2172677516937256, "learning_rate": 1.84109736480919e-06, "loss": 0.6035, "step": 5428 }, { "epoch": 0.8097244490846042, "grad_norm": 1.2601714134216309, "learning_rate": 1.8383045766500117e-06, "loss": 0.5457, "step": 5429 }, { "epoch": 0.8098735970766994, "grad_norm": 1.0733985900878906, "learning_rate": 1.8355136938909656e-06, "loss": 0.5159, "step": 5430 }, { "epoch": 0.8100227450687945, "grad_norm": 1.16970694065094, "learning_rate": 1.8327247171835961e-06, "loss": 0.5799, "step": 5431 }, { "epoch": 0.8101718930608897, "grad_norm": 1.137251853942871, "learning_rate": 1.8299376471790097e-06, "loss": 0.4923, "step": 5432 }, { "epoch": 0.8103210410529849, "grad_norm": 1.147110939025879, "learning_rate": 1.8271524845278676e-06, "loss": 0.5955, "step": 5433 }, { "epoch": 0.81047018904508, "grad_norm": 1.1523300409317017, "learning_rate": 1.8243692298803816e-06, "loss": 0.6105, "step": 5434 }, { "epoch": 0.8106193370371751, "grad_norm": 1.1820709705352783, "learning_rate": 1.821587883886321e-06, "loss": 0.5319, "step": 5435 }, { "epoch": 0.8107684850292703, "grad_norm": 1.29080069065094, "learning_rate": 1.818808447195013e-06, "loss": 0.5728, "step": 5436 }, { "epoch": 0.8109176330213654, "grad_norm": 1.092890739440918, "learning_rate": 1.8160309204553272e-06, "loss": 0.5825, "step": 5437 }, { "epoch": 0.8110667810134606, "grad_norm": 1.2062734365463257, "learning_rate": 1.8132553043156997e-06, "loss": 0.5519, "step": 5438 }, { "epoch": 0.8112159290055557, "grad_norm": 1.1975023746490479, "learning_rate": 1.8104815994241155e-06, "loss": 0.6356, "step": 5439 }, { "epoch": 0.8113650769976509, "grad_norm": 1.2563533782958984, "learning_rate": 1.807709806428115e-06, "loss": 0.6137, "step": 5440 }, { "epoch": 0.8115142249897461, "grad_norm": 1.116798996925354, "learning_rate": 1.8049399259747869e-06, "loss": 0.542, "step": 5441 }, { "epoch": 0.8116633729818412, "grad_norm": 1.3139369487762451, "learning_rate": 1.8021719587107811e-06, "loss": 0.5672, "step": 5442 }, { "epoch": 0.8118125209739364, "grad_norm": 1.292975902557373, "learning_rate": 1.7994059052822953e-06, "loss": 0.5998, "step": 5443 }, { "epoch": 0.8119616689660315, "grad_norm": 1.2622257471084595, "learning_rate": 1.7966417663350843e-06, "loss": 0.5399, "step": 5444 }, { "epoch": 0.8121108169581267, "grad_norm": 1.3081001043319702, "learning_rate": 1.7938795425144529e-06, "loss": 0.5861, "step": 5445 }, { "epoch": 0.8122599649502219, "grad_norm": 1.1709805727005005, "learning_rate": 1.7911192344652616e-06, "loss": 0.5419, "step": 5446 }, { "epoch": 0.812409112942317, "grad_norm": 1.107656478881836, "learning_rate": 1.78836084283192e-06, "loss": 0.5629, "step": 5447 }, { "epoch": 0.8125582609344122, "grad_norm": 1.1954476833343506, "learning_rate": 1.7856043682583913e-06, "loss": 0.5989, "step": 5448 }, { "epoch": 0.8127074089265073, "grad_norm": 1.1824145317077637, "learning_rate": 1.7828498113881976e-06, "loss": 0.5362, "step": 5449 }, { "epoch": 0.8128565569186025, "grad_norm": 1.186036467552185, "learning_rate": 1.780097172864399e-06, "loss": 0.6317, "step": 5450 }, { "epoch": 0.8130057049106977, "grad_norm": 1.2250308990478516, "learning_rate": 1.7773464533296237e-06, "loss": 0.5351, "step": 5451 }, { "epoch": 0.8131548529027928, "grad_norm": 0.7919328212738037, "learning_rate": 1.7745976534260457e-06, "loss": 0.5734, "step": 5452 }, { "epoch": 0.813304000894888, "grad_norm": 1.3660168647766113, "learning_rate": 1.7718507737953838e-06, "loss": 0.6281, "step": 5453 }, { "epoch": 0.8134531488869831, "grad_norm": 1.1067304611206055, "learning_rate": 1.7691058150789186e-06, "loss": 0.5762, "step": 5454 }, { "epoch": 0.8136022968790783, "grad_norm": 1.1743899583816528, "learning_rate": 1.7663627779174797e-06, "loss": 0.6517, "step": 5455 }, { "epoch": 0.8137514448711735, "grad_norm": 1.1751776933670044, "learning_rate": 1.7636216629514435e-06, "loss": 0.6083, "step": 5456 }, { "epoch": 0.8139005928632685, "grad_norm": 1.2415682077407837, "learning_rate": 1.7608824708207405e-06, "loss": 0.6341, "step": 5457 }, { "epoch": 0.8140497408553637, "grad_norm": 1.301798701286316, "learning_rate": 1.758145202164857e-06, "loss": 0.6595, "step": 5458 }, { "epoch": 0.814198888847459, "grad_norm": 1.1331344842910767, "learning_rate": 1.7554098576228185e-06, "loss": 0.67, "step": 5459 }, { "epoch": 0.814348036839554, "grad_norm": 1.293369174003601, "learning_rate": 1.752676437833216e-06, "loss": 0.6168, "step": 5460 }, { "epoch": 0.8144971848316492, "grad_norm": 1.1821285486221313, "learning_rate": 1.7499449434341843e-06, "loss": 0.529, "step": 5461 }, { "epoch": 0.8146463328237443, "grad_norm": 1.0347541570663452, "learning_rate": 1.7472153750634014e-06, "loss": 0.545, "step": 5462 }, { "epoch": 0.8147954808158395, "grad_norm": 1.0964715480804443, "learning_rate": 1.7444877333581067e-06, "loss": 0.5531, "step": 5463 }, { "epoch": 0.8149446288079347, "grad_norm": 1.281811237335205, "learning_rate": 1.7417620189550877e-06, "loss": 0.5647, "step": 5464 }, { "epoch": 0.8150937768000298, "grad_norm": 1.1972124576568604, "learning_rate": 1.7390382324906752e-06, "loss": 0.6154, "step": 5465 }, { "epoch": 0.815242924792125, "grad_norm": 1.167380690574646, "learning_rate": 1.7363163746007572e-06, "loss": 0.5974, "step": 5466 }, { "epoch": 0.8153920727842201, "grad_norm": 1.2842949628829956, "learning_rate": 1.7335964459207688e-06, "loss": 0.597, "step": 5467 }, { "epoch": 0.8155412207763153, "grad_norm": 1.1331971883773804, "learning_rate": 1.7308784470856944e-06, "loss": 0.5868, "step": 5468 }, { "epoch": 0.8156903687684105, "grad_norm": 1.2419166564941406, "learning_rate": 1.7281623787300672e-06, "loss": 0.5928, "step": 5469 }, { "epoch": 0.8158395167605056, "grad_norm": 1.2902815341949463, "learning_rate": 1.725448241487976e-06, "loss": 0.5473, "step": 5470 }, { "epoch": 0.8159886647526008, "grad_norm": 1.268829584121704, "learning_rate": 1.7227360359930468e-06, "loss": 0.605, "step": 5471 }, { "epoch": 0.8161378127446959, "grad_norm": 1.2387453317642212, "learning_rate": 1.7200257628784633e-06, "loss": 0.6294, "step": 5472 }, { "epoch": 0.8162869607367911, "grad_norm": 1.1583967208862305, "learning_rate": 1.7173174227769574e-06, "loss": 0.6312, "step": 5473 }, { "epoch": 0.8164361087288863, "grad_norm": 1.195160984992981, "learning_rate": 1.7146110163208108e-06, "loss": 0.6317, "step": 5474 }, { "epoch": 0.8165852567209814, "grad_norm": 0.816421389579773, "learning_rate": 1.711906544141846e-06, "loss": 0.6422, "step": 5475 }, { "epoch": 0.8167344047130766, "grad_norm": 1.1906324625015259, "learning_rate": 1.7092040068714421e-06, "loss": 0.5489, "step": 5476 }, { "epoch": 0.8168835527051717, "grad_norm": 1.3912488222122192, "learning_rate": 1.7065034051405239e-06, "loss": 0.6076, "step": 5477 }, { "epoch": 0.8170327006972669, "grad_norm": 1.231142520904541, "learning_rate": 1.703804739579563e-06, "loss": 0.649, "step": 5478 }, { "epoch": 0.8171818486893621, "grad_norm": 1.1141196489334106, "learning_rate": 1.701108010818583e-06, "loss": 0.4814, "step": 5479 }, { "epoch": 0.8173309966814571, "grad_norm": 1.2012351751327515, "learning_rate": 1.6984132194871516e-06, "loss": 0.5515, "step": 5480 }, { "epoch": 0.8174801446735523, "grad_norm": 1.2508078813552856, "learning_rate": 1.6957203662143818e-06, "loss": 0.5683, "step": 5481 }, { "epoch": 0.8176292926656474, "grad_norm": 1.0978127717971802, "learning_rate": 1.6930294516289403e-06, "loss": 0.5805, "step": 5482 }, { "epoch": 0.8177784406577426, "grad_norm": 1.2522835731506348, "learning_rate": 1.6903404763590403e-06, "loss": 0.5773, "step": 5483 }, { "epoch": 0.8179275886498378, "grad_norm": 1.2436823844909668, "learning_rate": 1.6876534410324352e-06, "loss": 0.5552, "step": 5484 }, { "epoch": 0.8180767366419329, "grad_norm": 1.3085827827453613, "learning_rate": 1.684968346276431e-06, "loss": 0.5938, "step": 5485 }, { "epoch": 0.8182258846340281, "grad_norm": 1.1672295331954956, "learning_rate": 1.6822851927178874e-06, "loss": 0.5864, "step": 5486 }, { "epoch": 0.8183750326261233, "grad_norm": 1.2914059162139893, "learning_rate": 1.6796039809831977e-06, "loss": 0.5324, "step": 5487 }, { "epoch": 0.8185241806182184, "grad_norm": 1.2615877389907837, "learning_rate": 1.6769247116983079e-06, "loss": 0.5648, "step": 5488 }, { "epoch": 0.8186733286103136, "grad_norm": 1.3150362968444824, "learning_rate": 1.6742473854887154e-06, "loss": 0.6565, "step": 5489 }, { "epoch": 0.8188224766024087, "grad_norm": 1.2637923955917358, "learning_rate": 1.6715720029794525e-06, "loss": 0.5481, "step": 5490 }, { "epoch": 0.8189716245945039, "grad_norm": 1.2355529069900513, "learning_rate": 1.6688985647951085e-06, "loss": 0.6246, "step": 5491 }, { "epoch": 0.8191207725865991, "grad_norm": 0.9113160371780396, "learning_rate": 1.666227071559814e-06, "loss": 0.693, "step": 5492 }, { "epoch": 0.8192699205786942, "grad_norm": 1.2626157999038696, "learning_rate": 1.6635575238972478e-06, "loss": 0.6184, "step": 5493 }, { "epoch": 0.8194190685707894, "grad_norm": 1.2516865730285645, "learning_rate": 1.6608899224306264e-06, "loss": 0.6004, "step": 5494 }, { "epoch": 0.8195682165628845, "grad_norm": 1.1257100105285645, "learning_rate": 1.6582242677827286e-06, "loss": 0.5404, "step": 5495 }, { "epoch": 0.8197173645549797, "grad_norm": 1.1606707572937012, "learning_rate": 1.6555605605758606e-06, "loss": 0.5665, "step": 5496 }, { "epoch": 0.8198665125470749, "grad_norm": 0.8545289635658264, "learning_rate": 1.6528988014318848e-06, "loss": 0.6771, "step": 5497 }, { "epoch": 0.82001566053917, "grad_norm": 1.1246281862258911, "learning_rate": 1.650238990972205e-06, "loss": 0.5955, "step": 5498 }, { "epoch": 0.8201648085312652, "grad_norm": 1.2260947227478027, "learning_rate": 1.6475811298177747e-06, "loss": 0.6446, "step": 5499 }, { "epoch": 0.8203139565233603, "grad_norm": 1.2441062927246094, "learning_rate": 1.644925218589083e-06, "loss": 0.5667, "step": 5500 }, { "epoch": 0.8204631045154555, "grad_norm": 1.058296799659729, "learning_rate": 1.6422712579061727e-06, "loss": 0.5027, "step": 5501 }, { "epoch": 0.8206122525075507, "grad_norm": 1.352378010749817, "learning_rate": 1.6396192483886285e-06, "loss": 0.6403, "step": 5502 }, { "epoch": 0.8207614004996457, "grad_norm": 1.1345163583755493, "learning_rate": 1.636969190655574e-06, "loss": 0.5702, "step": 5503 }, { "epoch": 0.8209105484917409, "grad_norm": 1.2150729894638062, "learning_rate": 1.6343210853256885e-06, "loss": 0.5424, "step": 5504 }, { "epoch": 0.821059696483836, "grad_norm": 1.115371823310852, "learning_rate": 1.6316749330171888e-06, "loss": 0.5025, "step": 5505 }, { "epoch": 0.8212088444759312, "grad_norm": 1.1311907768249512, "learning_rate": 1.6290307343478318e-06, "loss": 0.5832, "step": 5506 }, { "epoch": 0.8213579924680264, "grad_norm": 1.2157506942749023, "learning_rate": 1.6263884899349248e-06, "loss": 0.5712, "step": 5507 }, { "epoch": 0.8215071404601215, "grad_norm": 1.2850816249847412, "learning_rate": 1.6237482003953187e-06, "loss": 0.5856, "step": 5508 }, { "epoch": 0.8216562884522167, "grad_norm": 1.196205973625183, "learning_rate": 1.6211098663454016e-06, "loss": 0.5533, "step": 5509 }, { "epoch": 0.8218054364443119, "grad_norm": 1.3171502351760864, "learning_rate": 1.6184734884011123e-06, "loss": 0.5776, "step": 5510 }, { "epoch": 0.821954584436407, "grad_norm": 1.2195088863372803, "learning_rate": 1.6158390671779322e-06, "loss": 0.579, "step": 5511 }, { "epoch": 0.8221037324285022, "grad_norm": 1.2551167011260986, "learning_rate": 1.6132066032908766e-06, "loss": 0.5912, "step": 5512 }, { "epoch": 0.8222528804205973, "grad_norm": 1.1776981353759766, "learning_rate": 1.6105760973545181e-06, "loss": 0.6264, "step": 5513 }, { "epoch": 0.8224020284126925, "grad_norm": 1.3173803091049194, "learning_rate": 1.6079475499829655e-06, "loss": 0.631, "step": 5514 }, { "epoch": 0.8225511764047877, "grad_norm": 0.8025732040405273, "learning_rate": 1.6053209617898646e-06, "loss": 0.6083, "step": 5515 }, { "epoch": 0.8227003243968828, "grad_norm": 1.1592826843261719, "learning_rate": 1.6026963333884127e-06, "loss": 0.5849, "step": 5516 }, { "epoch": 0.822849472388978, "grad_norm": 1.210675597190857, "learning_rate": 1.6000736653913485e-06, "loss": 0.5958, "step": 5517 }, { "epoch": 0.8229986203810731, "grad_norm": 1.1932010650634766, "learning_rate": 1.5974529584109444e-06, "loss": 0.5455, "step": 5518 }, { "epoch": 0.8231477683731683, "grad_norm": 1.1795070171356201, "learning_rate": 1.5948342130590256e-06, "loss": 0.5536, "step": 5519 }, { "epoch": 0.8232969163652635, "grad_norm": 1.1878715753555298, "learning_rate": 1.5922174299469528e-06, "loss": 0.6542, "step": 5520 }, { "epoch": 0.8234460643573586, "grad_norm": 0.8436922430992126, "learning_rate": 1.5896026096856321e-06, "loss": 0.6745, "step": 5521 }, { "epoch": 0.8235952123494538, "grad_norm": 1.2461800575256348, "learning_rate": 1.5869897528855106e-06, "loss": 0.5803, "step": 5522 }, { "epoch": 0.8237443603415489, "grad_norm": 0.8661707043647766, "learning_rate": 1.5843788601565757e-06, "loss": 0.6067, "step": 5523 }, { "epoch": 0.823893508333644, "grad_norm": 1.2561675310134888, "learning_rate": 1.58176993210836e-06, "loss": 0.6015, "step": 5524 }, { "epoch": 0.8240426563257393, "grad_norm": 1.1914883852005005, "learning_rate": 1.5791629693499289e-06, "loss": 0.5839, "step": 5525 }, { "epoch": 0.8241918043178343, "grad_norm": 1.184877872467041, "learning_rate": 1.5765579724898973e-06, "loss": 0.5797, "step": 5526 }, { "epoch": 0.8243409523099295, "grad_norm": 1.1115115880966187, "learning_rate": 1.5739549421364196e-06, "loss": 0.5399, "step": 5527 }, { "epoch": 0.8244901003020246, "grad_norm": 1.2986806631088257, "learning_rate": 1.5713538788971882e-06, "loss": 0.6997, "step": 5528 }, { "epoch": 0.8246392482941198, "grad_norm": 1.2315781116485596, "learning_rate": 1.5687547833794349e-06, "loss": 0.623, "step": 5529 }, { "epoch": 0.824788396286215, "grad_norm": 1.2458086013793945, "learning_rate": 1.5661576561899438e-06, "loss": 0.5908, "step": 5530 }, { "epoch": 0.8249375442783101, "grad_norm": 1.1956076622009277, "learning_rate": 1.563562497935025e-06, "loss": 0.5675, "step": 5531 }, { "epoch": 0.8250866922704053, "grad_norm": 1.1339764595031738, "learning_rate": 1.5609693092205347e-06, "loss": 0.5762, "step": 5532 }, { "epoch": 0.8252358402625005, "grad_norm": 1.1517527103424072, "learning_rate": 1.558378090651872e-06, "loss": 0.5752, "step": 5533 }, { "epoch": 0.8253849882545956, "grad_norm": 0.9760810136795044, "learning_rate": 1.5557888428339706e-06, "loss": 0.4412, "step": 5534 }, { "epoch": 0.8255341362466908, "grad_norm": 1.264962077140808, "learning_rate": 1.5532015663713085e-06, "loss": 0.5881, "step": 5535 }, { "epoch": 0.8256832842387859, "grad_norm": 1.1403037309646606, "learning_rate": 1.5506162618679043e-06, "loss": 0.5288, "step": 5536 }, { "epoch": 0.8258324322308811, "grad_norm": 1.169880986213684, "learning_rate": 1.548032929927309e-06, "loss": 0.5655, "step": 5537 }, { "epoch": 0.8259815802229763, "grad_norm": 1.2561252117156982, "learning_rate": 1.5454515711526187e-06, "loss": 0.5877, "step": 5538 }, { "epoch": 0.8261307282150714, "grad_norm": 1.19745671749115, "learning_rate": 1.5428721861464746e-06, "loss": 0.607, "step": 5539 }, { "epoch": 0.8262798762071666, "grad_norm": 1.1389155387878418, "learning_rate": 1.540294775511043e-06, "loss": 0.5345, "step": 5540 }, { "epoch": 0.8264290241992617, "grad_norm": 1.2302446365356445, "learning_rate": 1.5377193398480406e-06, "loss": 0.6014, "step": 5541 }, { "epoch": 0.8265781721913569, "grad_norm": 1.1996334791183472, "learning_rate": 1.5351458797587205e-06, "loss": 0.6149, "step": 5542 }, { "epoch": 0.8267273201834521, "grad_norm": 1.2747734785079956, "learning_rate": 1.5325743958438698e-06, "loss": 0.5999, "step": 5543 }, { "epoch": 0.8268764681755472, "grad_norm": 1.0798962116241455, "learning_rate": 1.53000488870382e-06, "loss": 0.4794, "step": 5544 }, { "epoch": 0.8270256161676424, "grad_norm": 1.186616063117981, "learning_rate": 1.5274373589384384e-06, "loss": 0.583, "step": 5545 }, { "epoch": 0.8271747641597375, "grad_norm": 1.1441360712051392, "learning_rate": 1.5248718071471346e-06, "loss": 0.6323, "step": 5546 }, { "epoch": 0.8273239121518327, "grad_norm": 1.295761227607727, "learning_rate": 1.5223082339288452e-06, "loss": 0.6469, "step": 5547 }, { "epoch": 0.8274730601439279, "grad_norm": 1.1855427026748657, "learning_rate": 1.5197466398820625e-06, "loss": 0.6415, "step": 5548 }, { "epoch": 0.8276222081360229, "grad_norm": 1.2297041416168213, "learning_rate": 1.5171870256048005e-06, "loss": 0.6206, "step": 5549 }, { "epoch": 0.8277713561281181, "grad_norm": 1.3640727996826172, "learning_rate": 1.5146293916946185e-06, "loss": 0.6411, "step": 5550 }, { "epoch": 0.8279205041202132, "grad_norm": 1.0957828760147095, "learning_rate": 1.512073738748614e-06, "loss": 0.6269, "step": 5551 }, { "epoch": 0.8280696521123084, "grad_norm": 1.2698136568069458, "learning_rate": 1.5095200673634224e-06, "loss": 0.6188, "step": 5552 }, { "epoch": 0.8282188001044036, "grad_norm": 1.1407541036605835, "learning_rate": 1.5069683781352106e-06, "loss": 0.5974, "step": 5553 }, { "epoch": 0.8283679480964987, "grad_norm": 1.1579315662384033, "learning_rate": 1.5044186716596888e-06, "loss": 0.6152, "step": 5554 }, { "epoch": 0.8285170960885939, "grad_norm": 1.3148115873336792, "learning_rate": 1.5018709485321004e-06, "loss": 0.6959, "step": 5555 }, { "epoch": 0.8286662440806891, "grad_norm": 0.8190675377845764, "learning_rate": 1.499325209347231e-06, "loss": 0.6012, "step": 5556 }, { "epoch": 0.8288153920727842, "grad_norm": 1.1647233963012695, "learning_rate": 1.4967814546993975e-06, "loss": 0.6729, "step": 5557 }, { "epoch": 0.8289645400648794, "grad_norm": 1.2497293949127197, "learning_rate": 1.4942396851824582e-06, "loss": 0.6321, "step": 5558 }, { "epoch": 0.8291136880569745, "grad_norm": 1.2352222204208374, "learning_rate": 1.4916999013898027e-06, "loss": 0.591, "step": 5559 }, { "epoch": 0.8292628360490697, "grad_norm": 1.270321249961853, "learning_rate": 1.4891621039143589e-06, "loss": 0.6031, "step": 5560 }, { "epoch": 0.8294119840411649, "grad_norm": 1.0990225076675415, "learning_rate": 1.4866262933485975e-06, "loss": 0.5754, "step": 5561 }, { "epoch": 0.82956113203326, "grad_norm": 1.181523084640503, "learning_rate": 1.4840924702845128e-06, "loss": 0.5837, "step": 5562 }, { "epoch": 0.8297102800253552, "grad_norm": 1.2680048942565918, "learning_rate": 1.4815606353136459e-06, "loss": 0.5995, "step": 5563 }, { "epoch": 0.8298594280174503, "grad_norm": 1.1524237394332886, "learning_rate": 1.4790307890270694e-06, "loss": 0.6222, "step": 5564 }, { "epoch": 0.8300085760095455, "grad_norm": 1.2134593725204468, "learning_rate": 1.4765029320153912e-06, "loss": 0.5931, "step": 5565 }, { "epoch": 0.8301577240016407, "grad_norm": 1.2067877054214478, "learning_rate": 1.4739770648687568e-06, "loss": 0.5379, "step": 5566 }, { "epoch": 0.8303068719937358, "grad_norm": 1.2050379514694214, "learning_rate": 1.4714531881768478e-06, "loss": 0.5595, "step": 5567 }, { "epoch": 0.830456019985831, "grad_norm": 1.2587257623672485, "learning_rate": 1.4689313025288754e-06, "loss": 0.6015, "step": 5568 }, { "epoch": 0.830605167977926, "grad_norm": 0.7754291892051697, "learning_rate": 1.4664114085135916e-06, "loss": 0.6175, "step": 5569 }, { "epoch": 0.8307543159700213, "grad_norm": 1.2550355195999146, "learning_rate": 1.4638935067192828e-06, "loss": 0.6047, "step": 5570 }, { "epoch": 0.8309034639621165, "grad_norm": 1.114356279373169, "learning_rate": 1.4613775977337707e-06, "loss": 0.6129, "step": 5571 }, { "epoch": 0.8310526119542115, "grad_norm": 1.2113916873931885, "learning_rate": 1.4588636821444059e-06, "loss": 0.605, "step": 5572 }, { "epoch": 0.8312017599463067, "grad_norm": 1.0444962978363037, "learning_rate": 1.4563517605380805e-06, "loss": 0.5044, "step": 5573 }, { "epoch": 0.8313509079384018, "grad_norm": 1.2012605667114258, "learning_rate": 1.4538418335012194e-06, "loss": 0.5639, "step": 5574 }, { "epoch": 0.831500055930497, "grad_norm": 1.252685546875, "learning_rate": 1.4513339016197802e-06, "loss": 0.626, "step": 5575 }, { "epoch": 0.8316492039225922, "grad_norm": 1.161568522453308, "learning_rate": 1.4488279654792558e-06, "loss": 0.5752, "step": 5576 }, { "epoch": 0.8317983519146873, "grad_norm": 1.1532703638076782, "learning_rate": 1.446324025664676e-06, "loss": 0.5028, "step": 5577 }, { "epoch": 0.8319474999067825, "grad_norm": 1.1966630220413208, "learning_rate": 1.4438220827605965e-06, "loss": 0.5878, "step": 5578 }, { "epoch": 0.8320966478988776, "grad_norm": 1.177732229232788, "learning_rate": 1.4413221373511132e-06, "loss": 0.4948, "step": 5579 }, { "epoch": 0.8322457958909728, "grad_norm": 1.369682788848877, "learning_rate": 1.4388241900198597e-06, "loss": 0.6164, "step": 5580 }, { "epoch": 0.832394943883068, "grad_norm": 1.1727592945098877, "learning_rate": 1.4363282413499902e-06, "loss": 0.6057, "step": 5581 }, { "epoch": 0.8325440918751631, "grad_norm": 1.2491146326065063, "learning_rate": 1.433834291924201e-06, "loss": 0.5877, "step": 5582 }, { "epoch": 0.8326932398672583, "grad_norm": 1.114404320716858, "learning_rate": 1.4313423423247275e-06, "loss": 0.58, "step": 5583 }, { "epoch": 0.8328423878593535, "grad_norm": 1.1957075595855713, "learning_rate": 1.4288523931333242e-06, "loss": 0.5907, "step": 5584 }, { "epoch": 0.8329915358514486, "grad_norm": 1.1965268850326538, "learning_rate": 1.4263644449312896e-06, "loss": 0.569, "step": 5585 }, { "epoch": 0.8331406838435438, "grad_norm": 1.1422020196914673, "learning_rate": 1.4238784982994503e-06, "loss": 0.5703, "step": 5586 }, { "epoch": 0.8332898318356389, "grad_norm": 1.0042577981948853, "learning_rate": 1.4213945538181651e-06, "loss": 0.5547, "step": 5587 }, { "epoch": 0.8334389798277341, "grad_norm": 1.106259822845459, "learning_rate": 1.418912612067327e-06, "loss": 0.5828, "step": 5588 }, { "epoch": 0.8335881278198293, "grad_norm": 1.1845057010650635, "learning_rate": 1.4164326736263645e-06, "loss": 0.5098, "step": 5589 }, { "epoch": 0.8337372758119244, "grad_norm": 1.2343361377716064, "learning_rate": 1.413954739074227e-06, "loss": 0.5818, "step": 5590 }, { "epoch": 0.8338864238040196, "grad_norm": 1.2032499313354492, "learning_rate": 1.4114788089894128e-06, "loss": 0.5574, "step": 5591 }, { "epoch": 0.8340355717961146, "grad_norm": 0.8185061812400818, "learning_rate": 1.4090048839499426e-06, "loss": 0.6259, "step": 5592 }, { "epoch": 0.8341847197882098, "grad_norm": 1.177548885345459, "learning_rate": 1.4065329645333658e-06, "loss": 0.6076, "step": 5593 }, { "epoch": 0.834333867780305, "grad_norm": 1.1480118036270142, "learning_rate": 1.4040630513167697e-06, "loss": 0.555, "step": 5594 }, { "epoch": 0.8344830157724001, "grad_norm": 1.2765494585037231, "learning_rate": 1.401595144876775e-06, "loss": 0.6088, "step": 5595 }, { "epoch": 0.8346321637644953, "grad_norm": 1.1945668458938599, "learning_rate": 1.3991292457895234e-06, "loss": 0.5762, "step": 5596 }, { "epoch": 0.8347813117565904, "grad_norm": 1.244062066078186, "learning_rate": 1.3966653546306997e-06, "loss": 0.579, "step": 5597 }, { "epoch": 0.8349304597486856, "grad_norm": 1.1488240957260132, "learning_rate": 1.3942034719755127e-06, "loss": 0.6293, "step": 5598 }, { "epoch": 0.8350796077407808, "grad_norm": 1.0878485441207886, "learning_rate": 1.391743598398707e-06, "loss": 0.5183, "step": 5599 }, { "epoch": 0.8352287557328759, "grad_norm": 1.2140158414840698, "learning_rate": 1.3892857344745537e-06, "loss": 0.6076, "step": 5600 }, { "epoch": 0.8353779037249711, "grad_norm": 0.8195977807044983, "learning_rate": 1.386829880776861e-06, "loss": 0.5873, "step": 5601 }, { "epoch": 0.8355270517170662, "grad_norm": 1.2076694965362549, "learning_rate": 1.3843760378789583e-06, "loss": 0.5897, "step": 5602 }, { "epoch": 0.8356761997091614, "grad_norm": 1.2187371253967285, "learning_rate": 1.3819242063537131e-06, "loss": 0.6515, "step": 5603 }, { "epoch": 0.8358253477012566, "grad_norm": 1.1683802604675293, "learning_rate": 1.3794743867735206e-06, "loss": 0.5735, "step": 5604 }, { "epoch": 0.8359744956933517, "grad_norm": 1.2712417840957642, "learning_rate": 1.37702657971031e-06, "loss": 0.6292, "step": 5605 }, { "epoch": 0.8361236436854469, "grad_norm": 1.2142971754074097, "learning_rate": 1.3745807857355342e-06, "loss": 0.5684, "step": 5606 }, { "epoch": 0.8362727916775421, "grad_norm": 1.10639488697052, "learning_rate": 1.3721370054201788e-06, "loss": 0.5579, "step": 5607 }, { "epoch": 0.8364219396696372, "grad_norm": 1.2088122367858887, "learning_rate": 1.3696952393347629e-06, "loss": 0.5702, "step": 5608 }, { "epoch": 0.8365710876617324, "grad_norm": 1.2953553199768066, "learning_rate": 1.3672554880493305e-06, "loss": 0.6211, "step": 5609 }, { "epoch": 0.8367202356538275, "grad_norm": 1.0149829387664795, "learning_rate": 1.3648177521334582e-06, "loss": 0.4692, "step": 5610 }, { "epoch": 0.8368693836459227, "grad_norm": 1.115106225013733, "learning_rate": 1.3623820321562531e-06, "loss": 0.6305, "step": 5611 }, { "epoch": 0.8370185316380179, "grad_norm": 1.2238515615463257, "learning_rate": 1.3599483286863458e-06, "loss": 0.5524, "step": 5612 }, { "epoch": 0.837167679630113, "grad_norm": 1.1552469730377197, "learning_rate": 1.3575166422919006e-06, "loss": 0.6124, "step": 5613 }, { "epoch": 0.8373168276222082, "grad_norm": 1.1337056159973145, "learning_rate": 1.3550869735406124e-06, "loss": 0.5525, "step": 5614 }, { "epoch": 0.8374659756143032, "grad_norm": 0.850429117679596, "learning_rate": 1.352659322999701e-06, "loss": 0.6173, "step": 5615 }, { "epoch": 0.8376151236063984, "grad_norm": 1.2752571105957031, "learning_rate": 1.350233691235915e-06, "loss": 0.589, "step": 5616 }, { "epoch": 0.8377642715984936, "grad_norm": 1.2738878726959229, "learning_rate": 1.3478100788155413e-06, "loss": 0.574, "step": 5617 }, { "epoch": 0.8379134195905887, "grad_norm": 1.2842719554901123, "learning_rate": 1.3453884863043798e-06, "loss": 0.6492, "step": 5618 }, { "epoch": 0.8380625675826839, "grad_norm": 0.8276585936546326, "learning_rate": 1.3429689142677704e-06, "loss": 0.6278, "step": 5619 }, { "epoch": 0.838211715574779, "grad_norm": 1.1576423645019531, "learning_rate": 1.3405513632705792e-06, "loss": 0.5257, "step": 5620 }, { "epoch": 0.8383608635668742, "grad_norm": 1.3078689575195312, "learning_rate": 1.3381358338771954e-06, "loss": 0.6005, "step": 5621 }, { "epoch": 0.8385100115589694, "grad_norm": 1.1295043230056763, "learning_rate": 1.3357223266515406e-06, "loss": 0.602, "step": 5622 }, { "epoch": 0.8386591595510645, "grad_norm": 1.0842266082763672, "learning_rate": 1.333310842157064e-06, "loss": 0.5483, "step": 5623 }, { "epoch": 0.8388083075431597, "grad_norm": 1.2156641483306885, "learning_rate": 1.330901380956745e-06, "loss": 0.5473, "step": 5624 }, { "epoch": 0.8389574555352548, "grad_norm": 1.2690577507019043, "learning_rate": 1.3284939436130816e-06, "loss": 0.584, "step": 5625 }, { "epoch": 0.83910660352735, "grad_norm": 1.2993682622909546, "learning_rate": 1.3260885306881122e-06, "loss": 0.6323, "step": 5626 }, { "epoch": 0.8392557515194452, "grad_norm": 1.0494853258132935, "learning_rate": 1.3236851427433917e-06, "loss": 0.5239, "step": 5627 }, { "epoch": 0.8394048995115403, "grad_norm": 0.8618642687797546, "learning_rate": 1.3212837803400068e-06, "loss": 0.6438, "step": 5628 }, { "epoch": 0.8395540475036355, "grad_norm": 1.0820026397705078, "learning_rate": 1.3188844440385716e-06, "loss": 0.4903, "step": 5629 }, { "epoch": 0.8397031954957307, "grad_norm": 1.2175202369689941, "learning_rate": 1.3164871343992292e-06, "loss": 0.5921, "step": 5630 }, { "epoch": 0.8398523434878258, "grad_norm": 1.2458380460739136, "learning_rate": 1.3140918519816415e-06, "loss": 0.5978, "step": 5631 }, { "epoch": 0.840001491479921, "grad_norm": 1.2405219078063965, "learning_rate": 1.3116985973450058e-06, "loss": 0.6019, "step": 5632 }, { "epoch": 0.8401506394720161, "grad_norm": 1.109967589378357, "learning_rate": 1.3093073710480442e-06, "loss": 0.5586, "step": 5633 }, { "epoch": 0.8402997874641113, "grad_norm": 1.1130454540252686, "learning_rate": 1.3069181736489978e-06, "loss": 0.5725, "step": 5634 }, { "epoch": 0.8404489354562065, "grad_norm": 1.1959396600723267, "learning_rate": 1.3045310057056459e-06, "loss": 0.5746, "step": 5635 }, { "epoch": 0.8405980834483016, "grad_norm": 1.1133822202682495, "learning_rate": 1.3021458677752884e-06, "loss": 0.516, "step": 5636 }, { "epoch": 0.8407472314403968, "grad_norm": 1.236522912979126, "learning_rate": 1.2997627604147468e-06, "loss": 0.6007, "step": 5637 }, { "epoch": 0.8408963794324918, "grad_norm": 1.078040599822998, "learning_rate": 1.2973816841803756e-06, "loss": 0.616, "step": 5638 }, { "epoch": 0.841045527424587, "grad_norm": 1.250964641571045, "learning_rate": 1.2950026396280536e-06, "loss": 0.6039, "step": 5639 }, { "epoch": 0.8411946754166822, "grad_norm": 1.2028231620788574, "learning_rate": 1.2926256273131799e-06, "loss": 0.532, "step": 5640 }, { "epoch": 0.8413438234087773, "grad_norm": 1.1905592679977417, "learning_rate": 1.2902506477906862e-06, "loss": 0.5096, "step": 5641 }, { "epoch": 0.8414929714008725, "grad_norm": 0.8365747332572937, "learning_rate": 1.2878777016150267e-06, "loss": 0.653, "step": 5642 }, { "epoch": 0.8416421193929676, "grad_norm": 1.1823142766952515, "learning_rate": 1.2855067893401773e-06, "loss": 0.5248, "step": 5643 }, { "epoch": 0.8417912673850628, "grad_norm": 1.0909199714660645, "learning_rate": 1.2831379115196473e-06, "loss": 0.4791, "step": 5644 }, { "epoch": 0.841940415377158, "grad_norm": 1.1486619710922241, "learning_rate": 1.2807710687064667e-06, "loss": 0.5776, "step": 5645 }, { "epoch": 0.8420895633692531, "grad_norm": 1.2232073545455933, "learning_rate": 1.2784062614531866e-06, "loss": 0.5721, "step": 5646 }, { "epoch": 0.8422387113613483, "grad_norm": 1.2072478532791138, "learning_rate": 1.2760434903118868e-06, "loss": 0.5102, "step": 5647 }, { "epoch": 0.8423878593534434, "grad_norm": 1.227023720741272, "learning_rate": 1.2736827558341735e-06, "loss": 0.5801, "step": 5648 }, { "epoch": 0.8425370073455386, "grad_norm": 1.2539502382278442, "learning_rate": 1.2713240585711762e-06, "loss": 0.674, "step": 5649 }, { "epoch": 0.8426861553376338, "grad_norm": 1.0937995910644531, "learning_rate": 1.2689673990735428e-06, "loss": 0.4835, "step": 5650 }, { "epoch": 0.8428353033297289, "grad_norm": 1.2308714389801025, "learning_rate": 1.266612777891455e-06, "loss": 0.6144, "step": 5651 }, { "epoch": 0.8429844513218241, "grad_norm": 1.2197831869125366, "learning_rate": 1.2642601955746126e-06, "loss": 0.6569, "step": 5652 }, { "epoch": 0.8431335993139193, "grad_norm": 1.2599270343780518, "learning_rate": 1.2619096526722418e-06, "loss": 0.607, "step": 5653 }, { "epoch": 0.8432827473060144, "grad_norm": 1.3190168142318726, "learning_rate": 1.2595611497330917e-06, "loss": 0.5583, "step": 5654 }, { "epoch": 0.8434318952981096, "grad_norm": 0.8130264282226562, "learning_rate": 1.257214687305437e-06, "loss": 0.6268, "step": 5655 }, { "epoch": 0.8435810432902047, "grad_norm": 1.1327426433563232, "learning_rate": 1.2548702659370703e-06, "loss": 0.5487, "step": 5656 }, { "epoch": 0.8437301912822999, "grad_norm": 1.1547586917877197, "learning_rate": 1.2525278861753142e-06, "loss": 0.6039, "step": 5657 }, { "epoch": 0.8438793392743951, "grad_norm": 1.1572232246398926, "learning_rate": 1.2501875485670145e-06, "loss": 0.5779, "step": 5658 }, { "epoch": 0.8440284872664902, "grad_norm": 1.256182312965393, "learning_rate": 1.247849253658533e-06, "loss": 0.5033, "step": 5659 }, { "epoch": 0.8441776352585854, "grad_norm": 1.1430400609970093, "learning_rate": 1.2455130019957607e-06, "loss": 0.5355, "step": 5660 }, { "epoch": 0.8443267832506804, "grad_norm": 1.2387865781784058, "learning_rate": 1.2431787941241157e-06, "loss": 0.5505, "step": 5661 }, { "epoch": 0.8444759312427756, "grad_norm": 0.8754222989082336, "learning_rate": 1.240846630588529e-06, "loss": 0.6196, "step": 5662 }, { "epoch": 0.8446250792348708, "grad_norm": 1.2822012901306152, "learning_rate": 1.2385165119334607e-06, "loss": 0.5975, "step": 5663 }, { "epoch": 0.8447742272269659, "grad_norm": 1.126091480255127, "learning_rate": 1.2361884387028933e-06, "loss": 0.5214, "step": 5664 }, { "epoch": 0.8449233752190611, "grad_norm": 1.312314748764038, "learning_rate": 1.2338624114403263e-06, "loss": 0.5794, "step": 5665 }, { "epoch": 0.8450725232111562, "grad_norm": 1.2447084188461304, "learning_rate": 1.231538430688789e-06, "loss": 0.5713, "step": 5666 }, { "epoch": 0.8452216712032514, "grad_norm": 1.200161337852478, "learning_rate": 1.2292164969908294e-06, "loss": 0.5489, "step": 5667 }, { "epoch": 0.8453708191953466, "grad_norm": 1.162123680114746, "learning_rate": 1.226896610888516e-06, "loss": 0.5721, "step": 5668 }, { "epoch": 0.8455199671874417, "grad_norm": 1.1261727809906006, "learning_rate": 1.2245787729234404e-06, "loss": 0.5676, "step": 5669 }, { "epoch": 0.8456691151795369, "grad_norm": 1.235496163368225, "learning_rate": 1.2222629836367227e-06, "loss": 0.545, "step": 5670 }, { "epoch": 0.845818263171632, "grad_norm": 1.1741663217544556, "learning_rate": 1.2199492435689918e-06, "loss": 0.6271, "step": 5671 }, { "epoch": 0.8459674111637272, "grad_norm": 1.0985851287841797, "learning_rate": 1.217637553260409e-06, "loss": 0.5387, "step": 5672 }, { "epoch": 0.8461165591558224, "grad_norm": 1.0758216381072998, "learning_rate": 1.2153279132506535e-06, "loss": 0.5488, "step": 5673 }, { "epoch": 0.8462657071479175, "grad_norm": 1.2606511116027832, "learning_rate": 1.2130203240789228e-06, "loss": 0.5842, "step": 5674 }, { "epoch": 0.8464148551400127, "grad_norm": 1.1670249700546265, "learning_rate": 1.2107147862839396e-06, "loss": 0.5868, "step": 5675 }, { "epoch": 0.8465640031321078, "grad_norm": 1.1894845962524414, "learning_rate": 1.2084113004039467e-06, "loss": 0.5377, "step": 5676 }, { "epoch": 0.846713151124203, "grad_norm": 1.2133057117462158, "learning_rate": 1.20610986697671e-06, "loss": 0.5504, "step": 5677 }, { "epoch": 0.8468622991162982, "grad_norm": 1.108642816543579, "learning_rate": 1.2038104865395072e-06, "loss": 0.5563, "step": 5678 }, { "epoch": 0.8470114471083933, "grad_norm": 1.2545204162597656, "learning_rate": 1.2015131596291518e-06, "loss": 0.5974, "step": 5679 }, { "epoch": 0.8471605951004885, "grad_norm": 1.1055926084518433, "learning_rate": 1.1992178867819636e-06, "loss": 0.5489, "step": 5680 }, { "epoch": 0.8473097430925837, "grad_norm": 1.2874630689620972, "learning_rate": 1.1969246685337909e-06, "loss": 0.5752, "step": 5681 }, { "epoch": 0.8474588910846788, "grad_norm": 1.2038631439208984, "learning_rate": 1.1946335054199999e-06, "loss": 0.5741, "step": 5682 }, { "epoch": 0.847608039076774, "grad_norm": 1.1338770389556885, "learning_rate": 1.192344397975479e-06, "loss": 0.5688, "step": 5683 }, { "epoch": 0.847757187068869, "grad_norm": 1.1068757772445679, "learning_rate": 1.1900573467346322e-06, "loss": 0.5266, "step": 5684 }, { "epoch": 0.8479063350609642, "grad_norm": 1.1560535430908203, "learning_rate": 1.1877723522313867e-06, "loss": 0.5599, "step": 5685 }, { "epoch": 0.8480554830530594, "grad_norm": 1.194203495979309, "learning_rate": 1.1854894149991902e-06, "loss": 0.5715, "step": 5686 }, { "epoch": 0.8482046310451545, "grad_norm": 1.1397984027862549, "learning_rate": 1.1832085355710087e-06, "loss": 0.5199, "step": 5687 }, { "epoch": 0.8483537790372497, "grad_norm": 1.1506437063217163, "learning_rate": 1.1809297144793285e-06, "loss": 0.5795, "step": 5688 }, { "epoch": 0.8485029270293448, "grad_norm": 1.1986644268035889, "learning_rate": 1.1786529522561564e-06, "loss": 0.5578, "step": 5689 }, { "epoch": 0.84865207502144, "grad_norm": 1.2238051891326904, "learning_rate": 1.1763782494330135e-06, "loss": 0.5988, "step": 5690 }, { "epoch": 0.8488012230135352, "grad_norm": 1.2290977239608765, "learning_rate": 1.174105606540945e-06, "loss": 0.5698, "step": 5691 }, { "epoch": 0.8489503710056303, "grad_norm": 1.151793122291565, "learning_rate": 1.171835024110517e-06, "loss": 0.5481, "step": 5692 }, { "epoch": 0.8490995189977255, "grad_norm": 1.2892696857452393, "learning_rate": 1.1695665026718073e-06, "loss": 0.6092, "step": 5693 }, { "epoch": 0.8492486669898206, "grad_norm": 1.270787239074707, "learning_rate": 1.167300042754419e-06, "loss": 0.6861, "step": 5694 }, { "epoch": 0.8493978149819158, "grad_norm": 1.152498483657837, "learning_rate": 1.16503564488747e-06, "loss": 0.5947, "step": 5695 }, { "epoch": 0.849546962974011, "grad_norm": 1.1608744859695435, "learning_rate": 1.1627733095996008e-06, "loss": 0.562, "step": 5696 }, { "epoch": 0.8496961109661061, "grad_norm": 1.1923898458480835, "learning_rate": 1.1605130374189676e-06, "loss": 0.6224, "step": 5697 }, { "epoch": 0.8498452589582013, "grad_norm": 1.1780306100845337, "learning_rate": 1.1582548288732465e-06, "loss": 0.5431, "step": 5698 }, { "epoch": 0.8499944069502964, "grad_norm": 1.24868643283844, "learning_rate": 1.1559986844896265e-06, "loss": 0.5653, "step": 5699 }, { "epoch": 0.8501435549423916, "grad_norm": 1.309256672859192, "learning_rate": 1.153744604794822e-06, "loss": 0.6395, "step": 5700 }, { "epoch": 0.8502927029344868, "grad_norm": 1.1641677618026733, "learning_rate": 1.151492590315062e-06, "loss": 0.5201, "step": 5701 }, { "epoch": 0.8504418509265819, "grad_norm": 1.0970529317855835, "learning_rate": 1.149242641576096e-06, "loss": 0.5217, "step": 5702 }, { "epoch": 0.8505909989186771, "grad_norm": 1.1987098455429077, "learning_rate": 1.1469947591031848e-06, "loss": 0.6338, "step": 5703 }, { "epoch": 0.8507401469107723, "grad_norm": 1.2865179777145386, "learning_rate": 1.1447489434211124e-06, "loss": 0.5928, "step": 5704 }, { "epoch": 0.8508892949028674, "grad_norm": 1.1695051193237305, "learning_rate": 1.1425051950541798e-06, "loss": 0.5573, "step": 5705 }, { "epoch": 0.8510384428949626, "grad_norm": 1.0393847227096558, "learning_rate": 1.1402635145262043e-06, "loss": 0.5735, "step": 5706 }, { "epoch": 0.8511875908870576, "grad_norm": 0.8451313972473145, "learning_rate": 1.13802390236052e-06, "loss": 0.6381, "step": 5707 }, { "epoch": 0.8513367388791528, "grad_norm": 1.2093634605407715, "learning_rate": 1.13578635907998e-06, "loss": 0.5226, "step": 5708 }, { "epoch": 0.851485886871248, "grad_norm": 1.1297417879104614, "learning_rate": 1.133550885206951e-06, "loss": 0.5597, "step": 5709 }, { "epoch": 0.8516350348633431, "grad_norm": 1.296160340309143, "learning_rate": 1.13131748126332e-06, "loss": 0.6309, "step": 5710 }, { "epoch": 0.8517841828554383, "grad_norm": 1.2147340774536133, "learning_rate": 1.1290861477704918e-06, "loss": 0.5508, "step": 5711 }, { "epoch": 0.8519333308475334, "grad_norm": 1.1048099994659424, "learning_rate": 1.12685688524938e-06, "loss": 0.6319, "step": 5712 }, { "epoch": 0.8520824788396286, "grad_norm": 1.1801940202713013, "learning_rate": 1.1246296942204216e-06, "loss": 0.5877, "step": 5713 }, { "epoch": 0.8522316268317238, "grad_norm": 1.186392068862915, "learning_rate": 1.122404575203574e-06, "loss": 0.5706, "step": 5714 }, { "epoch": 0.8523807748238189, "grad_norm": 1.2834199666976929, "learning_rate": 1.1201815287183005e-06, "loss": 0.6176, "step": 5715 }, { "epoch": 0.8525299228159141, "grad_norm": 1.3230476379394531, "learning_rate": 1.1179605552835859e-06, "loss": 0.6431, "step": 5716 }, { "epoch": 0.8526790708080092, "grad_norm": 1.1937782764434814, "learning_rate": 1.1157416554179345e-06, "loss": 0.4994, "step": 5717 }, { "epoch": 0.8528282188001044, "grad_norm": 1.3135669231414795, "learning_rate": 1.1135248296393574e-06, "loss": 0.6092, "step": 5718 }, { "epoch": 0.8529773667921996, "grad_norm": 1.1070177555084229, "learning_rate": 1.1113100784653895e-06, "loss": 0.5229, "step": 5719 }, { "epoch": 0.8531265147842947, "grad_norm": 1.2030432224273682, "learning_rate": 1.1090974024130795e-06, "loss": 0.6256, "step": 5720 }, { "epoch": 0.8532756627763899, "grad_norm": 1.121352195739746, "learning_rate": 1.1068868019989864e-06, "loss": 0.5802, "step": 5721 }, { "epoch": 0.853424810768485, "grad_norm": 1.1975610256195068, "learning_rate": 1.1046782777391951e-06, "loss": 0.5651, "step": 5722 }, { "epoch": 0.8535739587605802, "grad_norm": 1.2710343599319458, "learning_rate": 1.1024718301492975e-06, "loss": 0.6014, "step": 5723 }, { "epoch": 0.8537231067526754, "grad_norm": 1.2076154947280884, "learning_rate": 1.1002674597444019e-06, "loss": 0.5965, "step": 5724 }, { "epoch": 0.8538722547447705, "grad_norm": 1.1667304039001465, "learning_rate": 1.0980651670391317e-06, "loss": 0.5774, "step": 5725 }, { "epoch": 0.8540214027368657, "grad_norm": 0.8355014324188232, "learning_rate": 1.0958649525476306e-06, "loss": 0.6327, "step": 5726 }, { "epoch": 0.8541705507289609, "grad_norm": 1.1073698997497559, "learning_rate": 1.0936668167835484e-06, "loss": 0.5603, "step": 5727 }, { "epoch": 0.854319698721056, "grad_norm": 1.1864107847213745, "learning_rate": 1.0914707602600549e-06, "loss": 0.5808, "step": 5728 }, { "epoch": 0.8544688467131512, "grad_norm": 1.1615321636199951, "learning_rate": 1.0892767834898343e-06, "loss": 0.6306, "step": 5729 }, { "epoch": 0.8546179947052462, "grad_norm": 1.26863431930542, "learning_rate": 1.0870848869850847e-06, "loss": 0.6085, "step": 5730 }, { "epoch": 0.8547671426973414, "grad_norm": 1.2379307746887207, "learning_rate": 1.084895071257518e-06, "loss": 0.5642, "step": 5731 }, { "epoch": 0.8549162906894366, "grad_norm": 1.225091814994812, "learning_rate": 1.0827073368183627e-06, "loss": 0.5615, "step": 5732 }, { "epoch": 0.8550654386815317, "grad_norm": 1.1378129720687866, "learning_rate": 1.080521684178356e-06, "loss": 0.5324, "step": 5733 }, { "epoch": 0.8552145866736269, "grad_norm": 1.0950040817260742, "learning_rate": 1.0783381138477544e-06, "loss": 0.607, "step": 5734 }, { "epoch": 0.855363734665722, "grad_norm": 1.2458995580673218, "learning_rate": 1.0761566263363254e-06, "loss": 0.654, "step": 5735 }, { "epoch": 0.8555128826578172, "grad_norm": 1.197131633758545, "learning_rate": 1.073977222153355e-06, "loss": 0.6584, "step": 5736 }, { "epoch": 0.8556620306499124, "grad_norm": 1.285779356956482, "learning_rate": 1.071799901807633e-06, "loss": 0.5796, "step": 5737 }, { "epoch": 0.8558111786420075, "grad_norm": 1.2834126949310303, "learning_rate": 1.0696246658074728e-06, "loss": 0.6644, "step": 5738 }, { "epoch": 0.8559603266341027, "grad_norm": 1.1726936101913452, "learning_rate": 1.0674515146606957e-06, "loss": 0.541, "step": 5739 }, { "epoch": 0.8561094746261978, "grad_norm": 1.1508183479309082, "learning_rate": 1.0652804488746382e-06, "loss": 0.5136, "step": 5740 }, { "epoch": 0.856258622618293, "grad_norm": 0.8369187116622925, "learning_rate": 1.0631114689561496e-06, "loss": 0.5963, "step": 5741 }, { "epoch": 0.8564077706103882, "grad_norm": 1.2315618991851807, "learning_rate": 1.0609445754115944e-06, "loss": 0.6608, "step": 5742 }, { "epoch": 0.8565569186024833, "grad_norm": 1.22354257106781, "learning_rate": 1.0587797687468438e-06, "loss": 0.5357, "step": 5743 }, { "epoch": 0.8567060665945785, "grad_norm": 1.1704339981079102, "learning_rate": 1.0566170494672878e-06, "loss": 0.5657, "step": 5744 }, { "epoch": 0.8568552145866736, "grad_norm": 1.1515579223632812, "learning_rate": 1.0544564180778283e-06, "loss": 0.483, "step": 5745 }, { "epoch": 0.8570043625787688, "grad_norm": 1.1535167694091797, "learning_rate": 1.0522978750828761e-06, "loss": 0.5846, "step": 5746 }, { "epoch": 0.857153510570864, "grad_norm": 1.184295415878296, "learning_rate": 1.050141420986357e-06, "loss": 0.5335, "step": 5747 }, { "epoch": 0.8573026585629591, "grad_norm": 1.1527299880981445, "learning_rate": 1.0479870562917105e-06, "loss": 0.6367, "step": 5748 }, { "epoch": 0.8574518065550543, "grad_norm": 1.2022831439971924, "learning_rate": 1.0458347815018855e-06, "loss": 0.5608, "step": 5749 }, { "epoch": 0.8576009545471495, "grad_norm": 1.2741776704788208, "learning_rate": 1.0436845971193465e-06, "loss": 0.5987, "step": 5750 }, { "epoch": 0.8577501025392446, "grad_norm": 0.8747479915618896, "learning_rate": 1.0415365036460679e-06, "loss": 0.6246, "step": 5751 }, { "epoch": 0.8578992505313398, "grad_norm": 1.282515287399292, "learning_rate": 1.0393905015835325e-06, "loss": 0.6, "step": 5752 }, { "epoch": 0.8580483985234348, "grad_norm": 1.1903188228607178, "learning_rate": 1.0372465914327402e-06, "loss": 0.6268, "step": 5753 }, { "epoch": 0.85819754651553, "grad_norm": 1.1781517267227173, "learning_rate": 1.0351047736942e-06, "loss": 0.5964, "step": 5754 }, { "epoch": 0.8583466945076252, "grad_norm": 1.215907335281372, "learning_rate": 1.0329650488679366e-06, "loss": 0.5445, "step": 5755 }, { "epoch": 0.8584958424997203, "grad_norm": 1.19926118850708, "learning_rate": 1.030827417453475e-06, "loss": 0.6077, "step": 5756 }, { "epoch": 0.8586449904918155, "grad_norm": 1.1707825660705566, "learning_rate": 1.028691879949868e-06, "loss": 0.5026, "step": 5757 }, { "epoch": 0.8587941384839106, "grad_norm": 1.3341832160949707, "learning_rate": 1.0265584368556636e-06, "loss": 0.6488, "step": 5758 }, { "epoch": 0.8589432864760058, "grad_norm": 1.3064590692520142, "learning_rate": 1.02442708866893e-06, "loss": 0.6014, "step": 5759 }, { "epoch": 0.859092434468101, "grad_norm": 1.2142189741134644, "learning_rate": 1.0222978358872448e-06, "loss": 0.5527, "step": 5760 }, { "epoch": 0.8592415824601961, "grad_norm": 1.2586095333099365, "learning_rate": 1.020170679007697e-06, "loss": 0.5819, "step": 5761 }, { "epoch": 0.8593907304522913, "grad_norm": 1.1947425603866577, "learning_rate": 1.0180456185268805e-06, "loss": 0.6231, "step": 5762 }, { "epoch": 0.8595398784443864, "grad_norm": 1.2557384967803955, "learning_rate": 1.0159226549409074e-06, "loss": 0.5007, "step": 5763 }, { "epoch": 0.8596890264364816, "grad_norm": 1.1131794452667236, "learning_rate": 1.0138017887453988e-06, "loss": 0.5809, "step": 5764 }, { "epoch": 0.8598381744285768, "grad_norm": 1.224048137664795, "learning_rate": 1.011683020435479e-06, "loss": 0.5997, "step": 5765 }, { "epoch": 0.8599873224206719, "grad_norm": 1.1688963174819946, "learning_rate": 1.009566350505793e-06, "loss": 0.5458, "step": 5766 }, { "epoch": 0.8601364704127671, "grad_norm": 1.3572118282318115, "learning_rate": 1.0074517794504913e-06, "loss": 0.622, "step": 5767 }, { "epoch": 0.8602856184048622, "grad_norm": 1.2232671976089478, "learning_rate": 1.0053393077632302e-06, "loss": 0.565, "step": 5768 }, { "epoch": 0.8604347663969574, "grad_norm": 1.3246995210647583, "learning_rate": 1.0032289359371816e-06, "loss": 0.697, "step": 5769 }, { "epoch": 0.8605839143890526, "grad_norm": 0.8547030687332153, "learning_rate": 1.0011206644650273e-06, "loss": 0.6254, "step": 5770 }, { "epoch": 0.8607330623811477, "grad_norm": 1.1383857727050781, "learning_rate": 9.990144938389546e-07, "loss": 0.5662, "step": 5771 }, { "epoch": 0.8608822103732429, "grad_norm": 1.1781556606292725, "learning_rate": 9.96910424550661e-07, "loss": 0.5984, "step": 5772 }, { "epoch": 0.8610313583653381, "grad_norm": 1.2882709503173828, "learning_rate": 9.948084570913585e-07, "loss": 0.6628, "step": 5773 }, { "epoch": 0.8611805063574332, "grad_norm": 1.2582745552062988, "learning_rate": 9.92708591951762e-07, "loss": 0.5949, "step": 5774 }, { "epoch": 0.8613296543495284, "grad_norm": 1.145880103111267, "learning_rate": 9.906108296221007e-07, "loss": 0.5678, "step": 5775 }, { "epoch": 0.8614788023416234, "grad_norm": 1.1759259700775146, "learning_rate": 9.885151705921115e-07, "loss": 0.6231, "step": 5776 }, { "epoch": 0.8616279503337186, "grad_norm": 1.2138330936431885, "learning_rate": 9.864216153510364e-07, "loss": 0.5854, "step": 5777 }, { "epoch": 0.8617770983258138, "grad_norm": 1.2644691467285156, "learning_rate": 9.843301643876292e-07, "loss": 0.6163, "step": 5778 }, { "epoch": 0.8619262463179089, "grad_norm": 1.17783522605896, "learning_rate": 9.822408181901544e-07, "loss": 0.4945, "step": 5779 }, { "epoch": 0.8620753943100041, "grad_norm": 1.2142058610916138, "learning_rate": 9.801535772463856e-07, "loss": 0.622, "step": 5780 }, { "epoch": 0.8622245423020992, "grad_norm": 1.2884553670883179, "learning_rate": 9.78068442043597e-07, "loss": 0.662, "step": 5781 }, { "epoch": 0.8623736902941944, "grad_norm": 1.1888762712478638, "learning_rate": 9.759854130685798e-07, "loss": 0.5999, "step": 5782 }, { "epoch": 0.8625228382862896, "grad_norm": 1.2243396043777466, "learning_rate": 9.739044908076301e-07, "loss": 0.596, "step": 5783 }, { "epoch": 0.8626719862783847, "grad_norm": 1.1696271896362305, "learning_rate": 9.718256757465526e-07, "loss": 0.5364, "step": 5784 }, { "epoch": 0.8628211342704799, "grad_norm": 0.8609404563903809, "learning_rate": 9.697489683706607e-07, "loss": 0.6124, "step": 5785 }, { "epoch": 0.862970282262575, "grad_norm": 1.1488126516342163, "learning_rate": 9.67674369164776e-07, "loss": 0.5229, "step": 5786 }, { "epoch": 0.8631194302546702, "grad_norm": 1.418496012687683, "learning_rate": 9.656018786132236e-07, "loss": 0.727, "step": 5787 }, { "epoch": 0.8632685782467654, "grad_norm": 1.2322062253952026, "learning_rate": 9.63531497199841e-07, "loss": 0.6005, "step": 5788 }, { "epoch": 0.8634177262388605, "grad_norm": 1.2076349258422852, "learning_rate": 9.614632254079748e-07, "loss": 0.5882, "step": 5789 }, { "epoch": 0.8635668742309557, "grad_norm": 1.2540109157562256, "learning_rate": 9.59397063720472e-07, "loss": 0.5754, "step": 5790 }, { "epoch": 0.8637160222230508, "grad_norm": 1.3294235467910767, "learning_rate": 9.573330126196912e-07, "loss": 0.5515, "step": 5791 }, { "epoch": 0.863865170215146, "grad_norm": 1.286733627319336, "learning_rate": 9.552710725875047e-07, "loss": 0.5879, "step": 5792 }, { "epoch": 0.8640143182072412, "grad_norm": 1.3409323692321777, "learning_rate": 9.532112441052799e-07, "loss": 0.6362, "step": 5793 }, { "epoch": 0.8641634661993363, "grad_norm": 1.1330102682113647, "learning_rate": 9.511535276538986e-07, "loss": 0.5248, "step": 5794 }, { "epoch": 0.8643126141914315, "grad_norm": 1.1991902589797974, "learning_rate": 9.490979237137487e-07, "loss": 0.5551, "step": 5795 }, { "epoch": 0.8644617621835265, "grad_norm": 1.2362803220748901, "learning_rate": 9.470444327647221e-07, "loss": 0.5788, "step": 5796 }, { "epoch": 0.8646109101756217, "grad_norm": 1.1601685285568237, "learning_rate": 9.449930552862208e-07, "loss": 0.5776, "step": 5797 }, { "epoch": 0.864760058167717, "grad_norm": 1.157646656036377, "learning_rate": 9.429437917571526e-07, "loss": 0.5415, "step": 5798 }, { "epoch": 0.864909206159812, "grad_norm": 1.1511952877044678, "learning_rate": 9.408966426559296e-07, "loss": 0.5487, "step": 5799 }, { "epoch": 0.8650583541519072, "grad_norm": 1.2398180961608887, "learning_rate": 9.388516084604704e-07, "loss": 0.494, "step": 5800 }, { "epoch": 0.8652075021440024, "grad_norm": 1.2684519290924072, "learning_rate": 9.368086896482065e-07, "loss": 0.5945, "step": 5801 }, { "epoch": 0.8653566501360975, "grad_norm": 1.2751305103302002, "learning_rate": 9.347678866960664e-07, "loss": 0.6429, "step": 5802 }, { "epoch": 0.8655057981281927, "grad_norm": 1.2497453689575195, "learning_rate": 9.3272920008049e-07, "loss": 0.5756, "step": 5803 }, { "epoch": 0.8656549461202878, "grad_norm": 1.1410086154937744, "learning_rate": 9.306926302774233e-07, "loss": 0.6319, "step": 5804 }, { "epoch": 0.865804094112383, "grad_norm": 1.171292781829834, "learning_rate": 9.286581777623127e-07, "loss": 0.5973, "step": 5805 }, { "epoch": 0.8659532421044782, "grad_norm": 1.172917366027832, "learning_rate": 9.26625843010116e-07, "loss": 0.6409, "step": 5806 }, { "epoch": 0.8661023900965733, "grad_norm": 1.267665147781372, "learning_rate": 9.24595626495296e-07, "loss": 0.5241, "step": 5807 }, { "epoch": 0.8662515380886685, "grad_norm": 1.2021121978759766, "learning_rate": 9.225675286918201e-07, "loss": 0.6264, "step": 5808 }, { "epoch": 0.8664006860807636, "grad_norm": 1.2620391845703125, "learning_rate": 9.205415500731551e-07, "loss": 0.6369, "step": 5809 }, { "epoch": 0.8665498340728588, "grad_norm": 1.0880961418151855, "learning_rate": 9.185176911122873e-07, "loss": 0.5739, "step": 5810 }, { "epoch": 0.866698982064954, "grad_norm": 1.1092993021011353, "learning_rate": 9.164959522816941e-07, "loss": 0.5793, "step": 5811 }, { "epoch": 0.8668481300570491, "grad_norm": 1.3260711431503296, "learning_rate": 9.144763340533635e-07, "loss": 0.6142, "step": 5812 }, { "epoch": 0.8669972780491443, "grad_norm": 0.8390570282936096, "learning_rate": 9.124588368987896e-07, "loss": 0.645, "step": 5813 }, { "epoch": 0.8671464260412394, "grad_norm": 1.3293288946151733, "learning_rate": 9.104434612889723e-07, "loss": 0.6494, "step": 5814 }, { "epoch": 0.8672955740333346, "grad_norm": 1.2639734745025635, "learning_rate": 9.084302076944096e-07, "loss": 0.5993, "step": 5815 }, { "epoch": 0.8674447220254298, "grad_norm": 1.2349292039871216, "learning_rate": 9.0641907658511e-07, "loss": 0.5976, "step": 5816 }, { "epoch": 0.8675938700175249, "grad_norm": 1.0700432062149048, "learning_rate": 9.044100684305857e-07, "loss": 0.4927, "step": 5817 }, { "epoch": 0.8677430180096201, "grad_norm": 1.2224708795547485, "learning_rate": 9.024031836998525e-07, "loss": 0.5731, "step": 5818 }, { "epoch": 0.8678921660017151, "grad_norm": 1.0478614568710327, "learning_rate": 9.003984228614293e-07, "loss": 0.5242, "step": 5819 }, { "epoch": 0.8680413139938103, "grad_norm": 1.1113027334213257, "learning_rate": 8.983957863833437e-07, "loss": 0.5136, "step": 5820 }, { "epoch": 0.8681904619859055, "grad_norm": 1.280051350593567, "learning_rate": 8.963952747331195e-07, "loss": 0.5804, "step": 5821 }, { "epoch": 0.8683396099780006, "grad_norm": 1.1294691562652588, "learning_rate": 8.943968883777909e-07, "loss": 0.558, "step": 5822 }, { "epoch": 0.8684887579700958, "grad_norm": 1.2977622747421265, "learning_rate": 8.92400627783897e-07, "loss": 0.5831, "step": 5823 }, { "epoch": 0.868637905962191, "grad_norm": 1.2041701078414917, "learning_rate": 8.904064934174717e-07, "loss": 0.5524, "step": 5824 }, { "epoch": 0.8687870539542861, "grad_norm": 1.1696796417236328, "learning_rate": 8.884144857440624e-07, "loss": 0.5046, "step": 5825 }, { "epoch": 0.8689362019463813, "grad_norm": 1.1917240619659424, "learning_rate": 8.864246052287151e-07, "loss": 0.4991, "step": 5826 }, { "epoch": 0.8690853499384764, "grad_norm": 1.149425983428955, "learning_rate": 8.844368523359803e-07, "loss": 0.5066, "step": 5827 }, { "epoch": 0.8692344979305716, "grad_norm": 1.1162340641021729, "learning_rate": 8.824512275299114e-07, "loss": 0.5568, "step": 5828 }, { "epoch": 0.8693836459226668, "grad_norm": 1.229017972946167, "learning_rate": 8.804677312740673e-07, "loss": 0.6703, "step": 5829 }, { "epoch": 0.8695327939147619, "grad_norm": 1.2014507055282593, "learning_rate": 8.784863640315045e-07, "loss": 0.5896, "step": 5830 }, { "epoch": 0.8696819419068571, "grad_norm": 1.178226351737976, "learning_rate": 8.765071262647873e-07, "loss": 0.5692, "step": 5831 }, { "epoch": 0.8698310898989522, "grad_norm": 1.2459919452667236, "learning_rate": 8.745300184359817e-07, "loss": 0.6136, "step": 5832 }, { "epoch": 0.8699802378910474, "grad_norm": 1.316787838935852, "learning_rate": 8.725550410066575e-07, "loss": 0.5559, "step": 5833 }, { "epoch": 0.8701293858831426, "grad_norm": 1.1776701211929321, "learning_rate": 8.705821944378834e-07, "loss": 0.5705, "step": 5834 }, { "epoch": 0.8702785338752377, "grad_norm": 1.205305814743042, "learning_rate": 8.686114791902334e-07, "loss": 0.5742, "step": 5835 }, { "epoch": 0.8704276818673329, "grad_norm": 1.3159409761428833, "learning_rate": 8.666428957237849e-07, "loss": 0.6018, "step": 5836 }, { "epoch": 0.870576829859428, "grad_norm": 1.2308826446533203, "learning_rate": 8.64676444498116e-07, "loss": 0.6295, "step": 5837 }, { "epoch": 0.8707259778515232, "grad_norm": 1.2740387916564941, "learning_rate": 8.627121259723071e-07, "loss": 0.6527, "step": 5838 }, { "epoch": 0.8708751258436184, "grad_norm": 0.7497671842575073, "learning_rate": 8.607499406049424e-07, "loss": 0.5624, "step": 5839 }, { "epoch": 0.8710242738357135, "grad_norm": 1.315882682800293, "learning_rate": 8.587898888541035e-07, "loss": 0.6141, "step": 5840 }, { "epoch": 0.8711734218278087, "grad_norm": 1.1032918691635132, "learning_rate": 8.568319711773787e-07, "loss": 0.5492, "step": 5841 }, { "epoch": 0.8713225698199037, "grad_norm": 1.2948633432388306, "learning_rate": 8.54876188031859e-07, "loss": 0.5954, "step": 5842 }, { "epoch": 0.871471717811999, "grad_norm": 1.21697199344635, "learning_rate": 8.529225398741303e-07, "loss": 0.586, "step": 5843 }, { "epoch": 0.8716208658040941, "grad_norm": 1.3574879169464111, "learning_rate": 8.509710271602833e-07, "loss": 0.5959, "step": 5844 }, { "epoch": 0.8717700137961892, "grad_norm": 1.2977607250213623, "learning_rate": 8.490216503459181e-07, "loss": 0.6081, "step": 5845 }, { "epoch": 0.8719191617882844, "grad_norm": 1.3251711130142212, "learning_rate": 8.470744098861239e-07, "loss": 0.6832, "step": 5846 }, { "epoch": 0.8720683097803796, "grad_norm": 1.3965340852737427, "learning_rate": 8.45129306235497e-07, "loss": 0.6753, "step": 5847 }, { "epoch": 0.8722174577724747, "grad_norm": 1.2986104488372803, "learning_rate": 8.431863398481366e-07, "loss": 0.6275, "step": 5848 }, { "epoch": 0.8723666057645699, "grad_norm": 1.2301530838012695, "learning_rate": 8.412455111776374e-07, "loss": 0.655, "step": 5849 }, { "epoch": 0.872515753756665, "grad_norm": 1.1059075593948364, "learning_rate": 8.393068206770993e-07, "loss": 0.6085, "step": 5850 }, { "epoch": 0.8726649017487602, "grad_norm": 1.2669777870178223, "learning_rate": 8.373702687991247e-07, "loss": 0.6313, "step": 5851 }, { "epoch": 0.8728140497408554, "grad_norm": 1.1727728843688965, "learning_rate": 8.354358559958087e-07, "loss": 0.5433, "step": 5852 }, { "epoch": 0.8729631977329505, "grad_norm": 1.164023995399475, "learning_rate": 8.335035827187577e-07, "loss": 0.4803, "step": 5853 }, { "epoch": 0.8731123457250457, "grad_norm": 1.3207603693008423, "learning_rate": 8.31573449419073e-07, "loss": 0.6243, "step": 5854 }, { "epoch": 0.8732614937171408, "grad_norm": 1.3047453165054321, "learning_rate": 8.296454565473522e-07, "loss": 0.6023, "step": 5855 }, { "epoch": 0.873410641709236, "grad_norm": 1.2717400789260864, "learning_rate": 8.277196045537006e-07, "loss": 0.5718, "step": 5856 }, { "epoch": 0.8735597897013312, "grad_norm": 1.2402061223983765, "learning_rate": 8.25795893887722e-07, "loss": 0.5973, "step": 5857 }, { "epoch": 0.8737089376934263, "grad_norm": 1.2575279474258423, "learning_rate": 8.238743249985159e-07, "loss": 0.5962, "step": 5858 }, { "epoch": 0.8738580856855215, "grad_norm": 1.2135552167892456, "learning_rate": 8.219548983346859e-07, "loss": 0.6001, "step": 5859 }, { "epoch": 0.8740072336776166, "grad_norm": 1.1008753776550293, "learning_rate": 8.200376143443356e-07, "loss": 0.4979, "step": 5860 }, { "epoch": 0.8741563816697118, "grad_norm": 1.163009524345398, "learning_rate": 8.181224734750659e-07, "loss": 0.4535, "step": 5861 }, { "epoch": 0.874305529661807, "grad_norm": 1.1429098844528198, "learning_rate": 8.162094761739792e-07, "loss": 0.5572, "step": 5862 }, { "epoch": 0.8744546776539021, "grad_norm": 1.2359293699264526, "learning_rate": 8.14298622887677e-07, "loss": 0.5791, "step": 5863 }, { "epoch": 0.8746038256459973, "grad_norm": 1.2405436038970947, "learning_rate": 8.123899140622616e-07, "loss": 0.5496, "step": 5864 }, { "epoch": 0.8747529736380923, "grad_norm": 1.148794412612915, "learning_rate": 8.104833501433318e-07, "loss": 0.5407, "step": 5865 }, { "epoch": 0.8749021216301875, "grad_norm": 1.209547758102417, "learning_rate": 8.085789315759862e-07, "loss": 0.5519, "step": 5866 }, { "epoch": 0.8750512696222827, "grad_norm": 1.266993522644043, "learning_rate": 8.06676658804827e-07, "loss": 0.58, "step": 5867 }, { "epoch": 0.8752004176143778, "grad_norm": 1.193725824356079, "learning_rate": 8.047765322739476e-07, "loss": 0.6004, "step": 5868 }, { "epoch": 0.875349565606473, "grad_norm": 1.2068301439285278, "learning_rate": 8.028785524269466e-07, "loss": 0.5404, "step": 5869 }, { "epoch": 0.8754987135985682, "grad_norm": 1.2254191637039185, "learning_rate": 8.009827197069209e-07, "loss": 0.6181, "step": 5870 }, { "epoch": 0.8756478615906633, "grad_norm": 1.023799180984497, "learning_rate": 7.990890345564628e-07, "loss": 0.5165, "step": 5871 }, { "epoch": 0.8757970095827585, "grad_norm": 1.1289548873901367, "learning_rate": 7.971974974176666e-07, "loss": 0.5857, "step": 5872 }, { "epoch": 0.8759461575748536, "grad_norm": 1.1279622316360474, "learning_rate": 7.953081087321257e-07, "loss": 0.5106, "step": 5873 }, { "epoch": 0.8760953055669488, "grad_norm": 1.0998921394348145, "learning_rate": 7.934208689409251e-07, "loss": 0.4845, "step": 5874 }, { "epoch": 0.876244453559044, "grad_norm": 1.3520772457122803, "learning_rate": 7.915357784846556e-07, "loss": 0.5673, "step": 5875 }, { "epoch": 0.8763936015511391, "grad_norm": 1.1532621383666992, "learning_rate": 7.896528378034052e-07, "loss": 0.5742, "step": 5876 }, { "epoch": 0.8765427495432343, "grad_norm": 0.7272390723228455, "learning_rate": 7.877720473367556e-07, "loss": 0.624, "step": 5877 }, { "epoch": 0.8766918975353294, "grad_norm": 1.1908864974975586, "learning_rate": 7.858934075237901e-07, "loss": 0.5726, "step": 5878 }, { "epoch": 0.8768410455274246, "grad_norm": 0.8181572556495667, "learning_rate": 7.840169188030899e-07, "loss": 0.5888, "step": 5879 }, { "epoch": 0.8769901935195198, "grad_norm": 1.1684904098510742, "learning_rate": 7.821425816127337e-07, "loss": 0.5412, "step": 5880 }, { "epoch": 0.8771393415116149, "grad_norm": 1.180802583694458, "learning_rate": 7.802703963902968e-07, "loss": 0.5599, "step": 5881 }, { "epoch": 0.8772884895037101, "grad_norm": 1.2553706169128418, "learning_rate": 7.784003635728555e-07, "loss": 0.5927, "step": 5882 }, { "epoch": 0.8774376374958052, "grad_norm": 0.8845263123512268, "learning_rate": 7.765324835969757e-07, "loss": 0.6514, "step": 5883 }, { "epoch": 0.8775867854879004, "grad_norm": 1.1351600885391235, "learning_rate": 7.746667568987287e-07, "loss": 0.5933, "step": 5884 }, { "epoch": 0.8777359334799956, "grad_norm": 1.114383578300476, "learning_rate": 7.728031839136818e-07, "loss": 0.4991, "step": 5885 }, { "epoch": 0.8778850814720907, "grad_norm": 1.2223302125930786, "learning_rate": 7.70941765076898e-07, "loss": 0.6078, "step": 5886 }, { "epoch": 0.8780342294641859, "grad_norm": 1.2117950916290283, "learning_rate": 7.690825008229319e-07, "loss": 0.5842, "step": 5887 }, { "epoch": 0.878183377456281, "grad_norm": 1.2516731023788452, "learning_rate": 7.672253915858496e-07, "loss": 0.6076, "step": 5888 }, { "epoch": 0.8783325254483761, "grad_norm": 1.0811614990234375, "learning_rate": 7.653704377991977e-07, "loss": 0.5441, "step": 5889 }, { "epoch": 0.8784816734404713, "grad_norm": 1.1618225574493408, "learning_rate": 7.635176398960308e-07, "loss": 0.5616, "step": 5890 }, { "epoch": 0.8786308214325664, "grad_norm": 1.4240037202835083, "learning_rate": 7.616669983088953e-07, "loss": 0.5875, "step": 5891 }, { "epoch": 0.8787799694246616, "grad_norm": 1.237128496170044, "learning_rate": 7.598185134698366e-07, "loss": 0.5591, "step": 5892 }, { "epoch": 0.8789291174167567, "grad_norm": 1.1507283449172974, "learning_rate": 7.579721858103928e-07, "loss": 0.5652, "step": 5893 }, { "epoch": 0.8790782654088519, "grad_norm": 1.1320244073867798, "learning_rate": 7.561280157616036e-07, "loss": 0.5393, "step": 5894 }, { "epoch": 0.8792274134009471, "grad_norm": 1.1810939311981201, "learning_rate": 7.542860037540012e-07, "loss": 0.5161, "step": 5895 }, { "epoch": 0.8793765613930422, "grad_norm": 1.2930690050125122, "learning_rate": 7.524461502176128e-07, "loss": 0.5799, "step": 5896 }, { "epoch": 0.8795257093851374, "grad_norm": 1.1421420574188232, "learning_rate": 7.506084555819682e-07, "loss": 0.6057, "step": 5897 }, { "epoch": 0.8796748573772326, "grad_norm": 1.1819101572036743, "learning_rate": 7.487729202760874e-07, "loss": 0.5901, "step": 5898 }, { "epoch": 0.8798240053693277, "grad_norm": 1.1921502351760864, "learning_rate": 7.469395447284866e-07, "loss": 0.5549, "step": 5899 }, { "epoch": 0.8799731533614229, "grad_norm": 1.2295515537261963, "learning_rate": 7.451083293671801e-07, "loss": 0.6042, "step": 5900 }, { "epoch": 0.880122301353518, "grad_norm": 1.2936080694198608, "learning_rate": 7.432792746196793e-07, "loss": 0.6554, "step": 5901 }, { "epoch": 0.8802714493456132, "grad_norm": 1.3180358409881592, "learning_rate": 7.414523809129836e-07, "loss": 0.6073, "step": 5902 }, { "epoch": 0.8804205973377084, "grad_norm": 1.178034782409668, "learning_rate": 7.396276486735965e-07, "loss": 0.5434, "step": 5903 }, { "epoch": 0.8805697453298035, "grad_norm": 1.2095857858657837, "learning_rate": 7.378050783275115e-07, "loss": 0.4699, "step": 5904 }, { "epoch": 0.8807188933218987, "grad_norm": 1.1616305112838745, "learning_rate": 7.359846703002216e-07, "loss": 0.5189, "step": 5905 }, { "epoch": 0.8808680413139938, "grad_norm": 1.1935912370681763, "learning_rate": 7.341664250167113e-07, "loss": 0.6397, "step": 5906 }, { "epoch": 0.881017189306089, "grad_norm": 1.0348145961761475, "learning_rate": 7.323503429014633e-07, "loss": 0.4315, "step": 5907 }, { "epoch": 0.8811663372981842, "grad_norm": 1.1763794422149658, "learning_rate": 7.305364243784507e-07, "loss": 0.5807, "step": 5908 }, { "epoch": 0.8813154852902793, "grad_norm": 1.3643635511398315, "learning_rate": 7.287246698711459e-07, "loss": 0.5784, "step": 5909 }, { "epoch": 0.8814646332823745, "grad_norm": 1.218613862991333, "learning_rate": 7.269150798025148e-07, "loss": 0.6426, "step": 5910 }, { "epoch": 0.8816137812744695, "grad_norm": 1.150098204612732, "learning_rate": 7.251076545950198e-07, "loss": 0.5771, "step": 5911 }, { "epoch": 0.8817629292665647, "grad_norm": 1.1567964553833008, "learning_rate": 7.233023946706108e-07, "loss": 0.5956, "step": 5912 }, { "epoch": 0.8819120772586599, "grad_norm": 0.8306707739830017, "learning_rate": 7.214993004507409e-07, "loss": 0.6191, "step": 5913 }, { "epoch": 0.882061225250755, "grad_norm": 1.2101054191589355, "learning_rate": 7.196983723563544e-07, "loss": 0.5921, "step": 5914 }, { "epoch": 0.8822103732428502, "grad_norm": 1.2049700021743774, "learning_rate": 7.178996108078873e-07, "loss": 0.6155, "step": 5915 }, { "epoch": 0.8823595212349453, "grad_norm": 1.1830500364303589, "learning_rate": 7.161030162252735e-07, "loss": 0.5471, "step": 5916 }, { "epoch": 0.8825086692270405, "grad_norm": 1.0649844408035278, "learning_rate": 7.143085890279411e-07, "loss": 0.4592, "step": 5917 }, { "epoch": 0.8826578172191357, "grad_norm": 1.174309253692627, "learning_rate": 7.12516329634807e-07, "loss": 0.5027, "step": 5918 }, { "epoch": 0.8828069652112308, "grad_norm": 1.3072619438171387, "learning_rate": 7.107262384642877e-07, "loss": 0.6067, "step": 5919 }, { "epoch": 0.882956113203326, "grad_norm": 1.2858710289001465, "learning_rate": 7.089383159342933e-07, "loss": 0.5736, "step": 5920 }, { "epoch": 0.8831052611954212, "grad_norm": 0.8153188824653625, "learning_rate": 7.071525624622211e-07, "loss": 0.6236, "step": 5921 }, { "epoch": 0.8832544091875163, "grad_norm": 1.171504020690918, "learning_rate": 7.053689784649676e-07, "loss": 0.5389, "step": 5922 }, { "epoch": 0.8834035571796115, "grad_norm": 1.2704106569290161, "learning_rate": 7.035875643589274e-07, "loss": 0.5742, "step": 5923 }, { "epoch": 0.8835527051717066, "grad_norm": 0.8934979438781738, "learning_rate": 7.018083205599779e-07, "loss": 0.6415, "step": 5924 }, { "epoch": 0.8837018531638018, "grad_norm": 1.1548430919647217, "learning_rate": 7.000312474834959e-07, "loss": 0.6174, "step": 5925 }, { "epoch": 0.883851001155897, "grad_norm": 1.0693918466567993, "learning_rate": 6.982563455443525e-07, "loss": 0.5053, "step": 5926 }, { "epoch": 0.8840001491479921, "grad_norm": 0.8827353715896606, "learning_rate": 6.964836151569066e-07, "loss": 0.6349, "step": 5927 }, { "epoch": 0.8841492971400873, "grad_norm": 1.2318556308746338, "learning_rate": 6.947130567350147e-07, "loss": 0.6089, "step": 5928 }, { "epoch": 0.8842984451321824, "grad_norm": 1.1470282077789307, "learning_rate": 6.929446706920285e-07, "loss": 0.5578, "step": 5929 }, { "epoch": 0.8844475931242776, "grad_norm": 1.1814889907836914, "learning_rate": 6.911784574407832e-07, "loss": 0.5315, "step": 5930 }, { "epoch": 0.8845967411163728, "grad_norm": 1.176499605178833, "learning_rate": 6.894144173936146e-07, "loss": 0.6141, "step": 5931 }, { "epoch": 0.8847458891084679, "grad_norm": 1.2689087390899658, "learning_rate": 6.876525509623532e-07, "loss": 0.583, "step": 5932 }, { "epoch": 0.884895037100563, "grad_norm": 1.095292329788208, "learning_rate": 6.858928585583135e-07, "loss": 0.5115, "step": 5933 }, { "epoch": 0.8850441850926581, "grad_norm": 1.1766115427017212, "learning_rate": 6.841353405923079e-07, "loss": 0.6291, "step": 5934 }, { "epoch": 0.8851933330847533, "grad_norm": 1.1412019729614258, "learning_rate": 6.823799974746425e-07, "loss": 0.5483, "step": 5935 }, { "epoch": 0.8853424810768485, "grad_norm": 1.0760712623596191, "learning_rate": 6.806268296151097e-07, "loss": 0.52, "step": 5936 }, { "epoch": 0.8854916290689436, "grad_norm": 1.1674013137817383, "learning_rate": 6.788758374229998e-07, "loss": 0.5837, "step": 5937 }, { "epoch": 0.8856407770610388, "grad_norm": 1.0944842100143433, "learning_rate": 6.771270213070935e-07, "loss": 0.5507, "step": 5938 }, { "epoch": 0.8857899250531339, "grad_norm": 1.172945499420166, "learning_rate": 6.753803816756643e-07, "loss": 0.6069, "step": 5939 }, { "epoch": 0.8859390730452291, "grad_norm": 1.3185794353485107, "learning_rate": 6.736359189364716e-07, "loss": 0.5792, "step": 5940 }, { "epoch": 0.8860882210373243, "grad_norm": 1.2346692085266113, "learning_rate": 6.718936334967774e-07, "loss": 0.6036, "step": 5941 }, { "epoch": 0.8862373690294194, "grad_norm": 1.1996679306030273, "learning_rate": 6.701535257633252e-07, "loss": 0.5641, "step": 5942 }, { "epoch": 0.8863865170215146, "grad_norm": 1.2749309539794922, "learning_rate": 6.684155961423555e-07, "loss": 0.6112, "step": 5943 }, { "epoch": 0.8865356650136098, "grad_norm": 1.1778703927993774, "learning_rate": 6.666798450395995e-07, "loss": 0.4469, "step": 5944 }, { "epoch": 0.8866848130057049, "grad_norm": 1.1060172319412231, "learning_rate": 6.649462728602807e-07, "loss": 0.5818, "step": 5945 }, { "epoch": 0.8868339609978001, "grad_norm": 1.200190782546997, "learning_rate": 6.632148800091099e-07, "loss": 0.6249, "step": 5946 }, { "epoch": 0.8869831089898952, "grad_norm": 1.0634015798568726, "learning_rate": 6.614856668902924e-07, "loss": 0.6064, "step": 5947 }, { "epoch": 0.8871322569819904, "grad_norm": 1.1398506164550781, "learning_rate": 6.597586339075279e-07, "loss": 0.5574, "step": 5948 }, { "epoch": 0.8872814049740856, "grad_norm": 1.2795426845550537, "learning_rate": 6.580337814639959e-07, "loss": 0.5827, "step": 5949 }, { "epoch": 0.8874305529661807, "grad_norm": 1.1892739534378052, "learning_rate": 6.563111099623809e-07, "loss": 0.5868, "step": 5950 }, { "epoch": 0.8875797009582759, "grad_norm": 1.0990657806396484, "learning_rate": 6.545906198048502e-07, "loss": 0.586, "step": 5951 }, { "epoch": 0.887728848950371, "grad_norm": 1.434255599975586, "learning_rate": 6.528723113930613e-07, "loss": 0.6329, "step": 5952 }, { "epoch": 0.8878779969424662, "grad_norm": 1.3559709787368774, "learning_rate": 6.511561851281656e-07, "loss": 0.6599, "step": 5953 }, { "epoch": 0.8880271449345614, "grad_norm": 1.1729429960250854, "learning_rate": 6.494422414108048e-07, "loss": 0.5441, "step": 5954 }, { "epoch": 0.8881762929266565, "grad_norm": 1.1497721672058105, "learning_rate": 6.477304806411078e-07, "loss": 0.5512, "step": 5955 }, { "epoch": 0.8883254409187517, "grad_norm": 1.190177083015442, "learning_rate": 6.460209032186971e-07, "loss": 0.5874, "step": 5956 }, { "epoch": 0.8884745889108467, "grad_norm": 1.191208839416504, "learning_rate": 6.443135095426845e-07, "loss": 0.6091, "step": 5957 }, { "epoch": 0.8886237369029419, "grad_norm": 0.8417494893074036, "learning_rate": 6.426083000116723e-07, "loss": 0.6233, "step": 5958 }, { "epoch": 0.8887728848950371, "grad_norm": 1.1973347663879395, "learning_rate": 6.409052750237521e-07, "loss": 0.5304, "step": 5959 }, { "epoch": 0.8889220328871322, "grad_norm": 1.2687381505966187, "learning_rate": 6.39204434976507e-07, "loss": 0.5876, "step": 5960 }, { "epoch": 0.8890711808792274, "grad_norm": 1.1672130823135376, "learning_rate": 6.375057802670081e-07, "loss": 0.5691, "step": 5961 }, { "epoch": 0.8892203288713225, "grad_norm": 1.1973495483398438, "learning_rate": 6.358093112918174e-07, "loss": 0.553, "step": 5962 }, { "epoch": 0.8893694768634177, "grad_norm": 1.1300901174545288, "learning_rate": 6.341150284469855e-07, "loss": 0.5132, "step": 5963 }, { "epoch": 0.8895186248555129, "grad_norm": 1.2878973484039307, "learning_rate": 6.324229321280572e-07, "loss": 0.473, "step": 5964 }, { "epoch": 0.889667772847608, "grad_norm": 1.264086127281189, "learning_rate": 6.307330227300579e-07, "loss": 0.6018, "step": 5965 }, { "epoch": 0.8898169208397032, "grad_norm": 1.2610397338867188, "learning_rate": 6.290453006475117e-07, "loss": 0.6325, "step": 5966 }, { "epoch": 0.8899660688317984, "grad_norm": 1.1983500719070435, "learning_rate": 6.273597662744269e-07, "loss": 0.5517, "step": 5967 }, { "epoch": 0.8901152168238935, "grad_norm": 1.2768462896347046, "learning_rate": 6.25676420004302e-07, "loss": 0.6269, "step": 5968 }, { "epoch": 0.8902643648159887, "grad_norm": 1.2377707958221436, "learning_rate": 6.239952622301248e-07, "loss": 0.6303, "step": 5969 }, { "epoch": 0.8904135128080838, "grad_norm": 1.2588990926742554, "learning_rate": 6.22316293344375e-07, "loss": 0.588, "step": 5970 }, { "epoch": 0.890562660800179, "grad_norm": 1.1478761434555054, "learning_rate": 6.206395137390153e-07, "loss": 0.6155, "step": 5971 }, { "epoch": 0.8907118087922742, "grad_norm": 1.2324450016021729, "learning_rate": 6.189649238055018e-07, "loss": 0.5166, "step": 5972 }, { "epoch": 0.8908609567843693, "grad_norm": 1.1807212829589844, "learning_rate": 6.172925239347793e-07, "loss": 0.5791, "step": 5973 }, { "epoch": 0.8910101047764645, "grad_norm": 1.202286720275879, "learning_rate": 6.15622314517278e-07, "loss": 0.5603, "step": 5974 }, { "epoch": 0.8911592527685596, "grad_norm": 1.1879483461380005, "learning_rate": 6.13954295942919e-07, "loss": 0.5771, "step": 5975 }, { "epoch": 0.8913084007606548, "grad_norm": 1.2739654779434204, "learning_rate": 6.122884686011166e-07, "loss": 0.5676, "step": 5976 }, { "epoch": 0.89145754875275, "grad_norm": 1.3494418859481812, "learning_rate": 6.10624832880764e-07, "loss": 0.6503, "step": 5977 }, { "epoch": 0.891606696744845, "grad_norm": 1.1143475770950317, "learning_rate": 6.089633891702496e-07, "loss": 0.5613, "step": 5978 }, { "epoch": 0.8917558447369403, "grad_norm": 1.1736634969711304, "learning_rate": 6.073041378574485e-07, "loss": 0.5929, "step": 5979 }, { "epoch": 0.8919049927290353, "grad_norm": 1.0849310159683228, "learning_rate": 6.056470793297209e-07, "loss": 0.5452, "step": 5980 }, { "epoch": 0.8920541407211305, "grad_norm": 1.122978687286377, "learning_rate": 6.039922139739196e-07, "loss": 0.493, "step": 5981 }, { "epoch": 0.8922032887132257, "grad_norm": 1.1329271793365479, "learning_rate": 6.023395421763856e-07, "loss": 0.5534, "step": 5982 }, { "epoch": 0.8923524367053208, "grad_norm": 1.2406738996505737, "learning_rate": 6.006890643229424e-07, "loss": 0.5602, "step": 5983 }, { "epoch": 0.892501584697416, "grad_norm": 1.2995095252990723, "learning_rate": 5.99040780798904e-07, "loss": 0.6308, "step": 5984 }, { "epoch": 0.8926507326895111, "grad_norm": 1.2341444492340088, "learning_rate": 5.973946919890772e-07, "loss": 0.6103, "step": 5985 }, { "epoch": 0.8927998806816063, "grad_norm": 1.125593662261963, "learning_rate": 5.957507982777477e-07, "loss": 0.5115, "step": 5986 }, { "epoch": 0.8929490286737015, "grad_norm": 1.2085909843444824, "learning_rate": 5.941091000486953e-07, "loss": 0.5599, "step": 5987 }, { "epoch": 0.8930981766657966, "grad_norm": 1.1698784828186035, "learning_rate": 5.924695976851846e-07, "loss": 0.5024, "step": 5988 }, { "epoch": 0.8932473246578918, "grad_norm": 1.2822229862213135, "learning_rate": 5.908322915699694e-07, "loss": 0.6098, "step": 5989 }, { "epoch": 0.8933964726499869, "grad_norm": 1.1913257837295532, "learning_rate": 5.89197182085286e-07, "loss": 0.6037, "step": 5990 }, { "epoch": 0.8935456206420821, "grad_norm": 1.2288105487823486, "learning_rate": 5.875642696128625e-07, "loss": 0.5987, "step": 5991 }, { "epoch": 0.8936947686341773, "grad_norm": 1.2339996099472046, "learning_rate": 5.859335545339129e-07, "loss": 0.6346, "step": 5992 }, { "epoch": 0.8938439166262724, "grad_norm": 1.3531386852264404, "learning_rate": 5.843050372291381e-07, "loss": 0.5693, "step": 5993 }, { "epoch": 0.8939930646183676, "grad_norm": 1.269781231880188, "learning_rate": 5.826787180787274e-07, "loss": 0.5886, "step": 5994 }, { "epoch": 0.8941422126104628, "grad_norm": 1.2117952108383179, "learning_rate": 5.810545974623549e-07, "loss": 0.5654, "step": 5995 }, { "epoch": 0.8942913606025579, "grad_norm": 0.8498607873916626, "learning_rate": 5.794326757591795e-07, "loss": 0.6167, "step": 5996 }, { "epoch": 0.8944405085946531, "grad_norm": 1.3042010068893433, "learning_rate": 5.778129533478516e-07, "loss": 0.5975, "step": 5997 }, { "epoch": 0.8945896565867482, "grad_norm": 1.2656300067901611, "learning_rate": 5.761954306065065e-07, "loss": 0.5766, "step": 5998 }, { "epoch": 0.8947388045788434, "grad_norm": 1.2052507400512695, "learning_rate": 5.745801079127622e-07, "loss": 0.5436, "step": 5999 }, { "epoch": 0.8948879525709386, "grad_norm": 1.1104305982589722, "learning_rate": 5.729669856437281e-07, "loss": 0.4954, "step": 6000 }, { "epoch": 0.8950371005630336, "grad_norm": 1.197804570198059, "learning_rate": 5.713560641759975e-07, "loss": 0.614, "step": 6001 }, { "epoch": 0.8951862485551288, "grad_norm": 1.2555711269378662, "learning_rate": 5.697473438856505e-07, "loss": 0.5807, "step": 6002 }, { "epoch": 0.8953353965472239, "grad_norm": 1.0670573711395264, "learning_rate": 5.681408251482523e-07, "loss": 0.4867, "step": 6003 }, { "epoch": 0.8954845445393191, "grad_norm": 1.096103310585022, "learning_rate": 5.665365083388586e-07, "loss": 0.5355, "step": 6004 }, { "epoch": 0.8956336925314143, "grad_norm": 1.2324990034103394, "learning_rate": 5.649343938320029e-07, "loss": 0.6506, "step": 6005 }, { "epoch": 0.8957828405235094, "grad_norm": 1.1895283460617065, "learning_rate": 5.633344820017106e-07, "loss": 0.5541, "step": 6006 }, { "epoch": 0.8959319885156046, "grad_norm": 1.176539659500122, "learning_rate": 5.61736773221494e-07, "loss": 0.5614, "step": 6007 }, { "epoch": 0.8960811365076997, "grad_norm": 1.2576942443847656, "learning_rate": 5.601412678643447e-07, "loss": 0.713, "step": 6008 }, { "epoch": 0.8962302844997949, "grad_norm": 1.2412663698196411, "learning_rate": 5.585479663027437e-07, "loss": 0.5661, "step": 6009 }, { "epoch": 0.8963794324918901, "grad_norm": 1.207014560699463, "learning_rate": 5.569568689086602e-07, "loss": 0.6336, "step": 6010 }, { "epoch": 0.8965285804839852, "grad_norm": 1.2306076288223267, "learning_rate": 5.553679760535447e-07, "loss": 0.6215, "step": 6011 }, { "epoch": 0.8966777284760804, "grad_norm": 1.1191956996917725, "learning_rate": 5.537812881083349e-07, "loss": 0.6137, "step": 6012 }, { "epoch": 0.8968268764681755, "grad_norm": 1.1493059396743774, "learning_rate": 5.521968054434534e-07, "loss": 0.5485, "step": 6013 }, { "epoch": 0.8969760244602707, "grad_norm": 1.1503548622131348, "learning_rate": 5.506145284288056e-07, "loss": 0.5536, "step": 6014 }, { "epoch": 0.8971251724523659, "grad_norm": 1.2220767736434937, "learning_rate": 5.49034457433787e-07, "loss": 0.6155, "step": 6015 }, { "epoch": 0.897274320444461, "grad_norm": 1.3042386770248413, "learning_rate": 5.474565928272735e-07, "loss": 0.5673, "step": 6016 }, { "epoch": 0.8974234684365562, "grad_norm": 1.1389758586883545, "learning_rate": 5.458809349776306e-07, "loss": 0.4882, "step": 6017 }, { "epoch": 0.8975726164286514, "grad_norm": 1.1842060089111328, "learning_rate": 5.443074842527007e-07, "loss": 0.575, "step": 6018 }, { "epoch": 0.8977217644207465, "grad_norm": 1.1747335195541382, "learning_rate": 5.427362410198212e-07, "loss": 0.5568, "step": 6019 }, { "epoch": 0.8978709124128417, "grad_norm": 1.2202168703079224, "learning_rate": 5.411672056458051e-07, "loss": 0.6902, "step": 6020 }, { "epoch": 0.8980200604049368, "grad_norm": 1.2522430419921875, "learning_rate": 5.396003784969551e-07, "loss": 0.5522, "step": 6021 }, { "epoch": 0.898169208397032, "grad_norm": 1.2705497741699219, "learning_rate": 5.380357599390573e-07, "loss": 0.5321, "step": 6022 }, { "epoch": 0.8983183563891272, "grad_norm": 1.1442172527313232, "learning_rate": 5.364733503373842e-07, "loss": 0.563, "step": 6023 }, { "epoch": 0.8984675043812222, "grad_norm": 1.2001885175704956, "learning_rate": 5.34913150056685e-07, "loss": 0.5712, "step": 6024 }, { "epoch": 0.8986166523733174, "grad_norm": 1.2916009426116943, "learning_rate": 5.333551594612018e-07, "loss": 0.6322, "step": 6025 }, { "epoch": 0.8987658003654125, "grad_norm": 1.2357996702194214, "learning_rate": 5.317993789146591e-07, "loss": 0.5877, "step": 6026 }, { "epoch": 0.8989149483575077, "grad_norm": 1.1379765272140503, "learning_rate": 5.302458087802587e-07, "loss": 0.5265, "step": 6027 }, { "epoch": 0.8990640963496029, "grad_norm": 1.2348682880401611, "learning_rate": 5.286944494206969e-07, "loss": 0.6761, "step": 6028 }, { "epoch": 0.899213244341698, "grad_norm": 1.206345796585083, "learning_rate": 5.271453011981464e-07, "loss": 0.5628, "step": 6029 }, { "epoch": 0.8993623923337932, "grad_norm": 1.1753813028335571, "learning_rate": 5.255983644742646e-07, "loss": 0.5145, "step": 6030 }, { "epoch": 0.8995115403258883, "grad_norm": 1.139802098274231, "learning_rate": 5.240536396101948e-07, "loss": 0.5616, "step": 6031 }, { "epoch": 0.8996606883179835, "grad_norm": 1.2898025512695312, "learning_rate": 5.225111269665651e-07, "loss": 0.5946, "step": 6032 }, { "epoch": 0.8998098363100787, "grad_norm": 1.334494709968567, "learning_rate": 5.209708269034797e-07, "loss": 0.6435, "step": 6033 }, { "epoch": 0.8999589843021738, "grad_norm": 0.799480676651001, "learning_rate": 5.194327397805365e-07, "loss": 0.604, "step": 6034 }, { "epoch": 0.900108132294269, "grad_norm": 1.3077055215835571, "learning_rate": 5.178968659568084e-07, "loss": 0.5995, "step": 6035 }, { "epoch": 0.9002572802863641, "grad_norm": 1.1894910335540771, "learning_rate": 5.163632057908574e-07, "loss": 0.6538, "step": 6036 }, { "epoch": 0.9004064282784593, "grad_norm": 0.8853569030761719, "learning_rate": 5.148317596407259e-07, "loss": 0.6474, "step": 6037 }, { "epoch": 0.9005555762705545, "grad_norm": 1.2514539957046509, "learning_rate": 5.133025278639403e-07, "loss": 0.6614, "step": 6038 }, { "epoch": 0.9007047242626496, "grad_norm": 1.1754705905914307, "learning_rate": 5.117755108175071e-07, "loss": 0.5738, "step": 6039 }, { "epoch": 0.9008538722547448, "grad_norm": 1.0902889966964722, "learning_rate": 5.102507088579189e-07, "loss": 0.5056, "step": 6040 }, { "epoch": 0.90100302024684, "grad_norm": 1.0999972820281982, "learning_rate": 5.087281223411522e-07, "loss": 0.4707, "step": 6041 }, { "epoch": 0.9011521682389351, "grad_norm": 1.2234247922897339, "learning_rate": 5.072077516226648e-07, "loss": 0.5644, "step": 6042 }, { "epoch": 0.9013013162310303, "grad_norm": 1.22075355052948, "learning_rate": 5.05689597057395e-07, "loss": 0.6349, "step": 6043 }, { "epoch": 0.9014504642231254, "grad_norm": 1.3056310415267944, "learning_rate": 5.04173658999767e-07, "loss": 0.5489, "step": 6044 }, { "epoch": 0.9015996122152206, "grad_norm": 1.1634107828140259, "learning_rate": 5.026599378036845e-07, "loss": 0.531, "step": 6045 }, { "epoch": 0.9017487602073158, "grad_norm": 1.3030624389648438, "learning_rate": 5.011484338225381e-07, "loss": 0.6008, "step": 6046 }, { "epoch": 0.9018979081994108, "grad_norm": 1.2337566614151, "learning_rate": 4.996391474091966e-07, "loss": 0.5398, "step": 6047 }, { "epoch": 0.902047056191506, "grad_norm": 1.1529954671859741, "learning_rate": 4.981320789160138e-07, "loss": 0.4776, "step": 6048 }, { "epoch": 0.9021962041836011, "grad_norm": 0.7281879782676697, "learning_rate": 4.966272286948215e-07, "loss": 0.6062, "step": 6049 }, { "epoch": 0.9023453521756963, "grad_norm": 1.2676938772201538, "learning_rate": 4.951245970969399e-07, "loss": 0.6512, "step": 6050 }, { "epoch": 0.9024945001677915, "grad_norm": 1.1832300424575806, "learning_rate": 4.936241844731671e-07, "loss": 0.5022, "step": 6051 }, { "epoch": 0.9026436481598866, "grad_norm": 1.218111515045166, "learning_rate": 4.921259911737831e-07, "loss": 0.6583, "step": 6052 }, { "epoch": 0.9027927961519818, "grad_norm": 1.0123419761657715, "learning_rate": 4.906300175485501e-07, "loss": 0.5066, "step": 6053 }, { "epoch": 0.9029419441440769, "grad_norm": 1.221020221710205, "learning_rate": 4.891362639467156e-07, "loss": 0.4881, "step": 6054 }, { "epoch": 0.9030910921361721, "grad_norm": 1.1660512685775757, "learning_rate": 4.87644730717004e-07, "loss": 0.5545, "step": 6055 }, { "epoch": 0.9032402401282673, "grad_norm": 1.1333121061325073, "learning_rate": 4.861554182076222e-07, "loss": 0.5642, "step": 6056 }, { "epoch": 0.9033893881203624, "grad_norm": 1.1172007322311401, "learning_rate": 4.846683267662632e-07, "loss": 0.5781, "step": 6057 }, { "epoch": 0.9035385361124576, "grad_norm": 1.1073509454727173, "learning_rate": 4.83183456740095e-07, "loss": 0.519, "step": 6058 }, { "epoch": 0.9036876841045527, "grad_norm": 1.3216286897659302, "learning_rate": 4.817008084757713e-07, "loss": 0.5473, "step": 6059 }, { "epoch": 0.9038368320966479, "grad_norm": 1.1732556819915771, "learning_rate": 4.802203823194263e-07, "loss": 0.5606, "step": 6060 }, { "epoch": 0.9039859800887431, "grad_norm": 1.227534294128418, "learning_rate": 4.787421786166724e-07, "loss": 0.5672, "step": 6061 }, { "epoch": 0.9041351280808382, "grad_norm": 1.2576243877410889, "learning_rate": 4.77266197712607e-07, "loss": 0.6293, "step": 6062 }, { "epoch": 0.9042842760729334, "grad_norm": 1.1567472219467163, "learning_rate": 4.757924399518099e-07, "loss": 0.5609, "step": 6063 }, { "epoch": 0.9044334240650286, "grad_norm": 1.0861327648162842, "learning_rate": 4.743209056783371e-07, "loss": 0.5604, "step": 6064 }, { "epoch": 0.9045825720571237, "grad_norm": 1.241750717163086, "learning_rate": 4.72851595235726e-07, "loss": 0.5353, "step": 6065 }, { "epoch": 0.9047317200492189, "grad_norm": 1.2701362371444702, "learning_rate": 4.7138450896700105e-07, "loss": 0.6201, "step": 6066 }, { "epoch": 0.904880868041314, "grad_norm": 1.183472990989685, "learning_rate": 4.6991964721465944e-07, "loss": 0.594, "step": 6067 }, { "epoch": 0.9050300160334092, "grad_norm": 1.2620108127593994, "learning_rate": 4.68457010320682e-07, "loss": 0.5661, "step": 6068 }, { "epoch": 0.9051791640255044, "grad_norm": 1.3115098476409912, "learning_rate": 4.6699659862653347e-07, "loss": 0.6176, "step": 6069 }, { "epoch": 0.9053283120175994, "grad_norm": 1.180184245109558, "learning_rate": 4.6553841247315544e-07, "loss": 0.6131, "step": 6070 }, { "epoch": 0.9054774600096946, "grad_norm": 1.328800916671753, "learning_rate": 4.6408245220096795e-07, "loss": 0.6411, "step": 6071 }, { "epoch": 0.9056266080017897, "grad_norm": 1.1518981456756592, "learning_rate": 4.6262871814987895e-07, "loss": 0.612, "step": 6072 }, { "epoch": 0.9057757559938849, "grad_norm": 1.2926898002624512, "learning_rate": 4.6117721065926824e-07, "loss": 0.5441, "step": 6073 }, { "epoch": 0.9059249039859801, "grad_norm": 1.1276910305023193, "learning_rate": 4.597279300680013e-07, "loss": 0.5324, "step": 6074 }, { "epoch": 0.9060740519780752, "grad_norm": 1.3226336240768433, "learning_rate": 4.58280876714422e-07, "loss": 0.567, "step": 6075 }, { "epoch": 0.9062231999701704, "grad_norm": 1.1352018117904663, "learning_rate": 4.568360509363545e-07, "loss": 0.5314, "step": 6076 }, { "epoch": 0.9063723479622655, "grad_norm": 1.274806022644043, "learning_rate": 4.5539345307110125e-07, "loss": 0.5646, "step": 6077 }, { "epoch": 0.9065214959543607, "grad_norm": 0.9190636873245239, "learning_rate": 4.539530834554473e-07, "loss": 0.6562, "step": 6078 }, { "epoch": 0.9066706439464559, "grad_norm": 1.0874669551849365, "learning_rate": 4.5251494242565587e-07, "loss": 0.5376, "step": 6079 }, { "epoch": 0.906819791938551, "grad_norm": 1.1154201030731201, "learning_rate": 4.510790303174672e-07, "loss": 0.6002, "step": 6080 }, { "epoch": 0.9069689399306462, "grad_norm": 1.146837592124939, "learning_rate": 4.496453474661089e-07, "loss": 0.531, "step": 6081 }, { "epoch": 0.9071180879227413, "grad_norm": 1.0863239765167236, "learning_rate": 4.48213894206283e-07, "loss": 0.6126, "step": 6082 }, { "epoch": 0.9072672359148365, "grad_norm": 1.2145079374313354, "learning_rate": 4.4678467087216794e-07, "loss": 0.6323, "step": 6083 }, { "epoch": 0.9074163839069317, "grad_norm": 1.2906497716903687, "learning_rate": 4.453576777974278e-07, "loss": 0.6757, "step": 6084 }, { "epoch": 0.9075655318990268, "grad_norm": 1.2759824991226196, "learning_rate": 4.439329153152028e-07, "loss": 0.6044, "step": 6085 }, { "epoch": 0.907714679891122, "grad_norm": 1.1691174507141113, "learning_rate": 4.425103837581124e-07, "loss": 0.5629, "step": 6086 }, { "epoch": 0.9078638278832171, "grad_norm": 1.3240735530853271, "learning_rate": 4.410900834582543e-07, "loss": 0.5778, "step": 6087 }, { "epoch": 0.9080129758753123, "grad_norm": 0.9926077723503113, "learning_rate": 4.3967201474721e-07, "loss": 0.4815, "step": 6088 }, { "epoch": 0.9081621238674075, "grad_norm": 1.1701503992080688, "learning_rate": 4.382561779560335e-07, "loss": 0.5458, "step": 6089 }, { "epoch": 0.9083112718595026, "grad_norm": 0.9038656949996948, "learning_rate": 4.3684257341526373e-07, "loss": 0.6264, "step": 6090 }, { "epoch": 0.9084604198515978, "grad_norm": 1.213759422302246, "learning_rate": 4.3543120145491555e-07, "loss": 0.5741, "step": 6091 }, { "epoch": 0.908609567843693, "grad_norm": 0.9975036978721619, "learning_rate": 4.3402206240447997e-07, "loss": 0.531, "step": 6092 }, { "epoch": 0.908758715835788, "grad_norm": 1.2175642251968384, "learning_rate": 4.326151565929315e-07, "loss": 0.5723, "step": 6093 }, { "epoch": 0.9089078638278832, "grad_norm": 1.204339623451233, "learning_rate": 4.3121048434872083e-07, "loss": 0.5818, "step": 6094 }, { "epoch": 0.9090570118199783, "grad_norm": 1.1040453910827637, "learning_rate": 4.2980804599978e-07, "loss": 0.4956, "step": 6095 }, { "epoch": 0.9092061598120735, "grad_norm": 1.3298218250274658, "learning_rate": 4.284078418735138e-07, "loss": 0.639, "step": 6096 }, { "epoch": 0.9093553078041687, "grad_norm": 1.141184687614441, "learning_rate": 4.270098722968108e-07, "loss": 0.5593, "step": 6097 }, { "epoch": 0.9095044557962638, "grad_norm": 1.0373541116714478, "learning_rate": 4.256141375960343e-07, "loss": 0.5097, "step": 6098 }, { "epoch": 0.909653603788359, "grad_norm": 1.1847165822982788, "learning_rate": 4.2422063809702927e-07, "loss": 0.592, "step": 6099 }, { "epoch": 0.9098027517804541, "grad_norm": 1.230488657951355, "learning_rate": 4.228293741251166e-07, "loss": 0.6214, "step": 6100 }, { "epoch": 0.9099518997725493, "grad_norm": 1.2042388916015625, "learning_rate": 4.214403460050964e-07, "loss": 0.5256, "step": 6101 }, { "epoch": 0.9101010477646445, "grad_norm": 1.294020175933838, "learning_rate": 4.200535540612449e-07, "loss": 0.5336, "step": 6102 }, { "epoch": 0.9102501957567396, "grad_norm": 1.1645599603652954, "learning_rate": 4.1866899861731867e-07, "loss": 0.663, "step": 6103 }, { "epoch": 0.9103993437488348, "grad_norm": 1.2259117364883423, "learning_rate": 4.1728667999655027e-07, "loss": 0.7002, "step": 6104 }, { "epoch": 0.9105484917409299, "grad_norm": 1.1984375715255737, "learning_rate": 4.159065985216515e-07, "loss": 0.5306, "step": 6105 }, { "epoch": 0.9106976397330251, "grad_norm": 1.2365559339523315, "learning_rate": 4.14528754514808e-07, "loss": 0.5613, "step": 6106 }, { "epoch": 0.9108467877251203, "grad_norm": 1.2294049263000488, "learning_rate": 4.131531482976925e-07, "loss": 0.5308, "step": 6107 }, { "epoch": 0.9109959357172154, "grad_norm": 1.2207919359207153, "learning_rate": 4.117797801914447e-07, "loss": 0.6246, "step": 6108 }, { "epoch": 0.9111450837093106, "grad_norm": 1.1758720874786377, "learning_rate": 4.104086505166871e-07, "loss": 0.5799, "step": 6109 }, { "epoch": 0.9112942317014057, "grad_norm": 1.2077422142028809, "learning_rate": 4.0903975959352026e-07, "loss": 0.5365, "step": 6110 }, { "epoch": 0.9114433796935009, "grad_norm": 1.1727432012557983, "learning_rate": 4.0767310774151746e-07, "loss": 0.5971, "step": 6111 }, { "epoch": 0.9115925276855961, "grad_norm": 1.235001564025879, "learning_rate": 4.063086952797346e-07, "loss": 0.5187, "step": 6112 }, { "epoch": 0.9117416756776912, "grad_norm": 1.1436980962753296, "learning_rate": 4.049465225267013e-07, "loss": 0.5613, "step": 6113 }, { "epoch": 0.9118908236697864, "grad_norm": 1.115543246269226, "learning_rate": 4.0358658980042765e-07, "loss": 0.4992, "step": 6114 }, { "epoch": 0.9120399716618816, "grad_norm": 1.0934972763061523, "learning_rate": 4.022288974183941e-07, "loss": 0.5472, "step": 6115 }, { "epoch": 0.9121891196539766, "grad_norm": 1.2215327024459839, "learning_rate": 4.0087344569756934e-07, "loss": 0.583, "step": 6116 }, { "epoch": 0.9123382676460718, "grad_norm": 1.2692680358886719, "learning_rate": 3.99520234954387e-07, "loss": 0.6278, "step": 6117 }, { "epoch": 0.9124874156381669, "grad_norm": 1.1808875799179077, "learning_rate": 3.981692655047642e-07, "loss": 0.554, "step": 6118 }, { "epoch": 0.9126365636302621, "grad_norm": 0.8323667049407959, "learning_rate": 3.968205376640932e-07, "loss": 0.6084, "step": 6119 }, { "epoch": 0.9127857116223573, "grad_norm": 1.2356152534484863, "learning_rate": 3.954740517472455e-07, "loss": 0.5751, "step": 6120 }, { "epoch": 0.9129348596144524, "grad_norm": 1.2824738025665283, "learning_rate": 3.94129808068564e-07, "loss": 0.6437, "step": 6121 }, { "epoch": 0.9130840076065476, "grad_norm": 1.1879076957702637, "learning_rate": 3.9278780694187114e-07, "loss": 0.6006, "step": 6122 }, { "epoch": 0.9132331555986427, "grad_norm": 1.2061662673950195, "learning_rate": 3.9144804868046724e-07, "loss": 0.5433, "step": 6123 }, { "epoch": 0.9133823035907379, "grad_norm": 1.246052861213684, "learning_rate": 3.901105335971267e-07, "loss": 0.6281, "step": 6124 }, { "epoch": 0.9135314515828331, "grad_norm": 0.860032320022583, "learning_rate": 3.887752620041008e-07, "loss": 0.6455, "step": 6125 }, { "epoch": 0.9136805995749282, "grad_norm": 1.1654173135757446, "learning_rate": 3.8744223421311787e-07, "loss": 0.5341, "step": 6126 }, { "epoch": 0.9138297475670234, "grad_norm": 1.266843557357788, "learning_rate": 3.8611145053538134e-07, "loss": 0.5744, "step": 6127 }, { "epoch": 0.9139788955591185, "grad_norm": 1.310750126838684, "learning_rate": 3.8478291128157155e-07, "loss": 0.6257, "step": 6128 }, { "epoch": 0.9141280435512137, "grad_norm": 1.0781528949737549, "learning_rate": 3.8345661676184475e-07, "loss": 0.4981, "step": 6129 }, { "epoch": 0.9142771915433089, "grad_norm": 1.1558784246444702, "learning_rate": 3.8213256728583115e-07, "loss": 0.5316, "step": 6130 }, { "epoch": 0.914426339535404, "grad_norm": 1.162264108657837, "learning_rate": 3.808107631626401e-07, "loss": 0.6287, "step": 6131 }, { "epoch": 0.9145754875274992, "grad_norm": 1.2738527059555054, "learning_rate": 3.7949120470085586e-07, "loss": 0.6158, "step": 6132 }, { "epoch": 0.9147246355195943, "grad_norm": 1.0864191055297852, "learning_rate": 3.781738922085354e-07, "loss": 0.5267, "step": 6133 }, { "epoch": 0.9148737835116895, "grad_norm": 1.3638098239898682, "learning_rate": 3.76858825993216e-07, "loss": 0.6788, "step": 6134 }, { "epoch": 0.9150229315037847, "grad_norm": 1.162233591079712, "learning_rate": 3.7554600636190876e-07, "loss": 0.4937, "step": 6135 }, { "epoch": 0.9151720794958798, "grad_norm": 1.2473307847976685, "learning_rate": 3.742354336210974e-07, "loss": 0.4966, "step": 6136 }, { "epoch": 0.915321227487975, "grad_norm": 1.2139239311218262, "learning_rate": 3.7292710807674493e-07, "loss": 0.6937, "step": 6137 }, { "epoch": 0.9154703754800702, "grad_norm": 1.214768886566162, "learning_rate": 3.716210300342893e-07, "loss": 0.5745, "step": 6138 }, { "epoch": 0.9156195234721652, "grad_norm": 1.3404887914657593, "learning_rate": 3.70317199798641e-07, "loss": 0.6436, "step": 6139 }, { "epoch": 0.9157686714642604, "grad_norm": 1.129093885421753, "learning_rate": 3.690156176741877e-07, "loss": 0.5349, "step": 6140 }, { "epoch": 0.9159178194563555, "grad_norm": 1.2783218622207642, "learning_rate": 3.6771628396479295e-07, "loss": 0.6517, "step": 6141 }, { "epoch": 0.9160669674484507, "grad_norm": 1.3668071031570435, "learning_rate": 3.664191989737942e-07, "loss": 0.5761, "step": 6142 }, { "epoch": 0.9162161154405459, "grad_norm": 1.1892986297607422, "learning_rate": 3.651243630040047e-07, "loss": 0.5678, "step": 6143 }, { "epoch": 0.916365263432641, "grad_norm": 1.2326252460479736, "learning_rate": 3.638317763577126e-07, "loss": 0.5449, "step": 6144 }, { "epoch": 0.9165144114247362, "grad_norm": 1.2052370309829712, "learning_rate": 3.6254143933667886e-07, "loss": 0.5972, "step": 6145 }, { "epoch": 0.9166635594168313, "grad_norm": 0.8510274887084961, "learning_rate": 3.6125335224214133e-07, "loss": 0.6533, "step": 6146 }, { "epoch": 0.9168127074089265, "grad_norm": 1.2888745069503784, "learning_rate": 3.5996751537481277e-07, "loss": 0.6367, "step": 6147 }, { "epoch": 0.9169618554010217, "grad_norm": 1.2475019693374634, "learning_rate": 3.586839290348809e-07, "loss": 0.6022, "step": 6148 }, { "epoch": 0.9171110033931168, "grad_norm": 1.1853827238082886, "learning_rate": 3.57402593522006e-07, "loss": 0.5834, "step": 6149 }, { "epoch": 0.917260151385212, "grad_norm": 1.1088505983352661, "learning_rate": 3.561235091353243e-07, "loss": 0.5904, "step": 6150 }, { "epoch": 0.9174092993773071, "grad_norm": 1.1419143676757812, "learning_rate": 3.548466761734459e-07, "loss": 0.5698, "step": 6151 }, { "epoch": 0.9175584473694023, "grad_norm": 1.223997712135315, "learning_rate": 3.535720949344557e-07, "loss": 0.5645, "step": 6152 }, { "epoch": 0.9177075953614975, "grad_norm": 0.8887866735458374, "learning_rate": 3.522997657159133e-07, "loss": 0.6375, "step": 6153 }, { "epoch": 0.9178567433535926, "grad_norm": 1.212690830230713, "learning_rate": 3.5102968881485344e-07, "loss": 0.6039, "step": 6154 }, { "epoch": 0.9180058913456878, "grad_norm": 1.1113072633743286, "learning_rate": 3.4976186452778116e-07, "loss": 0.5658, "step": 6155 }, { "epoch": 0.9181550393377829, "grad_norm": 1.1988123655319214, "learning_rate": 3.4849629315067856e-07, "loss": 0.5684, "step": 6156 }, { "epoch": 0.9183041873298781, "grad_norm": 1.1187907457351685, "learning_rate": 3.4723297497900487e-07, "loss": 0.4987, "step": 6157 }, { "epoch": 0.9184533353219733, "grad_norm": 1.222887635231018, "learning_rate": 3.459719103076831e-07, "loss": 0.5846, "step": 6158 }, { "epoch": 0.9186024833140684, "grad_norm": 1.2223325967788696, "learning_rate": 3.447130994311232e-07, "loss": 0.6098, "step": 6159 }, { "epoch": 0.9187516313061636, "grad_norm": 1.0689141750335693, "learning_rate": 3.4345654264320017e-07, "loss": 0.5311, "step": 6160 }, { "epoch": 0.9189007792982588, "grad_norm": 1.1700031757354736, "learning_rate": 3.422022402372649e-07, "loss": 0.6344, "step": 6161 }, { "epoch": 0.9190499272903538, "grad_norm": 1.054559350013733, "learning_rate": 3.4095019250614316e-07, "loss": 0.5581, "step": 6162 }, { "epoch": 0.919199075282449, "grad_norm": 1.203089952468872, "learning_rate": 3.397003997421344e-07, "loss": 0.6374, "step": 6163 }, { "epoch": 0.9193482232745441, "grad_norm": 1.1926097869873047, "learning_rate": 3.3845286223700757e-07, "loss": 0.5676, "step": 6164 }, { "epoch": 0.9194973712666393, "grad_norm": 1.168603777885437, "learning_rate": 3.372075802820107e-07, "loss": 0.5545, "step": 6165 }, { "epoch": 0.9196465192587345, "grad_norm": 1.1543837785720825, "learning_rate": 3.3596455416786245e-07, "loss": 0.5498, "step": 6166 }, { "epoch": 0.9197956672508296, "grad_norm": 1.267538070678711, "learning_rate": 3.34723784184755e-07, "loss": 0.5791, "step": 6167 }, { "epoch": 0.9199448152429248, "grad_norm": 1.233599066734314, "learning_rate": 3.334852706223546e-07, "loss": 0.6023, "step": 6168 }, { "epoch": 0.9200939632350199, "grad_norm": 1.2342503070831299, "learning_rate": 3.322490137697998e-07, "loss": 0.5613, "step": 6169 }, { "epoch": 0.9202431112271151, "grad_norm": 1.2423045635223389, "learning_rate": 3.310150139157031e-07, "loss": 0.5624, "step": 6170 }, { "epoch": 0.9203922592192103, "grad_norm": 1.2830004692077637, "learning_rate": 3.297832713481486e-07, "loss": 0.6624, "step": 6171 }, { "epoch": 0.9205414072113054, "grad_norm": 1.2165502309799194, "learning_rate": 3.2855378635469503e-07, "loss": 0.5175, "step": 6172 }, { "epoch": 0.9206905552034006, "grad_norm": 1.1105908155441284, "learning_rate": 3.273265592223751e-07, "loss": 0.4769, "step": 6173 }, { "epoch": 0.9208397031954957, "grad_norm": 1.1611384153366089, "learning_rate": 3.261015902376896e-07, "loss": 0.6226, "step": 6174 }, { "epoch": 0.9209888511875909, "grad_norm": 1.1764094829559326, "learning_rate": 3.2487887968661866e-07, "loss": 0.7014, "step": 6175 }, { "epoch": 0.9211379991796861, "grad_norm": 1.3023207187652588, "learning_rate": 3.2365842785460954e-07, "loss": 0.6511, "step": 6176 }, { "epoch": 0.9212871471717812, "grad_norm": 1.2180759906768799, "learning_rate": 3.2244023502658537e-07, "loss": 0.5983, "step": 6177 }, { "epoch": 0.9214362951638764, "grad_norm": 1.3415932655334473, "learning_rate": 3.2122430148694203e-07, "loss": 0.5795, "step": 6178 }, { "epoch": 0.9215854431559715, "grad_norm": 1.1985212564468384, "learning_rate": 3.2001062751954583e-07, "loss": 0.529, "step": 6179 }, { "epoch": 0.9217345911480667, "grad_norm": 1.1727584600448608, "learning_rate": 3.1879921340773776e-07, "loss": 0.6085, "step": 6180 }, { "epoch": 0.9218837391401619, "grad_norm": 1.2603377103805542, "learning_rate": 3.175900594343284e-07, "loss": 0.5676, "step": 6181 }, { "epoch": 0.922032887132257, "grad_norm": 1.1065315008163452, "learning_rate": 3.163831658816052e-07, "loss": 0.5704, "step": 6182 }, { "epoch": 0.9221820351243522, "grad_norm": 1.1762003898620605, "learning_rate": 3.151785330313217e-07, "loss": 0.5632, "step": 6183 }, { "epoch": 0.9223311831164472, "grad_norm": 1.1975064277648926, "learning_rate": 3.1397616116470964e-07, "loss": 0.6576, "step": 6184 }, { "epoch": 0.9224803311085424, "grad_norm": 1.122030258178711, "learning_rate": 3.1277605056246994e-07, "loss": 0.5545, "step": 6185 }, { "epoch": 0.9226294791006376, "grad_norm": 1.1992290019989014, "learning_rate": 3.1157820150477634e-07, "loss": 0.5857, "step": 6186 }, { "epoch": 0.9227786270927327, "grad_norm": 1.169674038887024, "learning_rate": 3.10382614271274e-07, "loss": 0.6301, "step": 6187 }, { "epoch": 0.9229277750848279, "grad_norm": 1.2396478652954102, "learning_rate": 3.091892891410808e-07, "loss": 0.581, "step": 6188 }, { "epoch": 0.9230769230769231, "grad_norm": 1.295931100845337, "learning_rate": 3.079982263927861e-07, "loss": 0.6428, "step": 6189 }, { "epoch": 0.9232260710690182, "grad_norm": 1.229773759841919, "learning_rate": 3.0680942630444965e-07, "loss": 0.5698, "step": 6190 }, { "epoch": 0.9233752190611134, "grad_norm": 1.3457584381103516, "learning_rate": 3.0562288915360837e-07, "loss": 0.6394, "step": 6191 }, { "epoch": 0.9235243670532085, "grad_norm": 1.0973554849624634, "learning_rate": 3.0443861521726183e-07, "loss": 0.5792, "step": 6192 }, { "epoch": 0.9236735150453037, "grad_norm": 1.1876027584075928, "learning_rate": 3.0325660477188767e-07, "loss": 0.5377, "step": 6193 }, { "epoch": 0.9238226630373989, "grad_norm": 1.3275203704833984, "learning_rate": 3.020768580934386e-07, "loss": 0.6218, "step": 6194 }, { "epoch": 0.923971811029494, "grad_norm": 1.188092589378357, "learning_rate": 3.008993754573286e-07, "loss": 0.6003, "step": 6195 }, { "epoch": 0.9241209590215892, "grad_norm": 1.1373786926269531, "learning_rate": 2.9972415713845016e-07, "loss": 0.5413, "step": 6196 }, { "epoch": 0.9242701070136843, "grad_norm": 1.401903510093689, "learning_rate": 2.9855120341116706e-07, "loss": 0.599, "step": 6197 }, { "epoch": 0.9244192550057795, "grad_norm": 1.1888853311538696, "learning_rate": 2.973805145493103e-07, "loss": 0.5851, "step": 6198 }, { "epoch": 0.9245684029978747, "grad_norm": 1.0739233493804932, "learning_rate": 2.962120908261856e-07, "loss": 0.5013, "step": 6199 }, { "epoch": 0.9247175509899698, "grad_norm": 1.2500349283218384, "learning_rate": 2.950459325145705e-07, "loss": 0.6121, "step": 6200 }, { "epoch": 0.924866698982065, "grad_norm": 1.242249608039856, "learning_rate": 2.9388203988671037e-07, "loss": 0.5689, "step": 6201 }, { "epoch": 0.9250158469741601, "grad_norm": 1.3098838329315186, "learning_rate": 2.9272041321432353e-07, "loss": 0.5939, "step": 6202 }, { "epoch": 0.9251649949662553, "grad_norm": 1.152140736579895, "learning_rate": 2.915610527685997e-07, "loss": 0.6022, "step": 6203 }, { "epoch": 0.9253141429583505, "grad_norm": 1.2260849475860596, "learning_rate": 2.904039588202001e-07, "loss": 0.601, "step": 6204 }, { "epoch": 0.9254632909504455, "grad_norm": 1.1419981718063354, "learning_rate": 2.892491316392543e-07, "loss": 0.566, "step": 6205 }, { "epoch": 0.9256124389425407, "grad_norm": 1.18759024143219, "learning_rate": 2.880965714953643e-07, "loss": 0.5433, "step": 6206 }, { "epoch": 0.9257615869346358, "grad_norm": 1.1856679916381836, "learning_rate": 2.869462786576027e-07, "loss": 0.5635, "step": 6207 }, { "epoch": 0.925910734926731, "grad_norm": 1.2644321918487549, "learning_rate": 2.857982533945125e-07, "loss": 0.5595, "step": 6208 }, { "epoch": 0.9260598829188262, "grad_norm": 1.2893110513687134, "learning_rate": 2.8465249597410816e-07, "loss": 0.5989, "step": 6209 }, { "epoch": 0.9262090309109213, "grad_norm": 1.2528223991394043, "learning_rate": 2.835090066638746e-07, "loss": 0.5268, "step": 6210 }, { "epoch": 0.9263581789030165, "grad_norm": 1.2086819410324097, "learning_rate": 2.823677857307638e-07, "loss": 0.5636, "step": 6211 }, { "epoch": 0.9265073268951117, "grad_norm": 1.171816349029541, "learning_rate": 2.812288334412039e-07, "loss": 0.5025, "step": 6212 }, { "epoch": 0.9266564748872068, "grad_norm": 1.194261074066162, "learning_rate": 2.80092150061092e-07, "loss": 0.6023, "step": 6213 }, { "epoch": 0.926805622879302, "grad_norm": 1.1816505193710327, "learning_rate": 2.7895773585579047e-07, "loss": 0.6107, "step": 6214 }, { "epoch": 0.9269547708713971, "grad_norm": 1.2406203746795654, "learning_rate": 2.778255910901362e-07, "loss": 0.5774, "step": 6215 }, { "epoch": 0.9271039188634923, "grad_norm": 1.228469729423523, "learning_rate": 2.766957160284389e-07, "loss": 0.5268, "step": 6216 }, { "epoch": 0.9272530668555875, "grad_norm": 1.1742149591445923, "learning_rate": 2.75568110934471e-07, "loss": 0.5072, "step": 6217 }, { "epoch": 0.9274022148476826, "grad_norm": 1.1875320672988892, "learning_rate": 2.744427760714818e-07, "loss": 0.5623, "step": 6218 }, { "epoch": 0.9275513628397778, "grad_norm": 1.2016016244888306, "learning_rate": 2.7331971170218684e-07, "loss": 0.6266, "step": 6219 }, { "epoch": 0.9277005108318729, "grad_norm": 1.3109642267227173, "learning_rate": 2.72198918088773e-07, "loss": 0.6012, "step": 6220 }, { "epoch": 0.9278496588239681, "grad_norm": 1.1793640851974487, "learning_rate": 2.7108039549289754e-07, "loss": 0.6162, "step": 6221 }, { "epoch": 0.9279988068160633, "grad_norm": 1.1693042516708374, "learning_rate": 2.699641441756862e-07, "loss": 0.506, "step": 6222 }, { "epoch": 0.9281479548081584, "grad_norm": 1.215370774269104, "learning_rate": 2.688501643977337e-07, "loss": 0.6202, "step": 6223 }, { "epoch": 0.9282971028002536, "grad_norm": 1.2852951288223267, "learning_rate": 2.6773845641910655e-07, "loss": 0.5553, "step": 6224 }, { "epoch": 0.9284462507923487, "grad_norm": 1.2330996990203857, "learning_rate": 2.6662902049934047e-07, "loss": 0.6483, "step": 6225 }, { "epoch": 0.9285953987844439, "grad_norm": 1.2552320957183838, "learning_rate": 2.655218568974416e-07, "loss": 0.6121, "step": 6226 }, { "epoch": 0.9287445467765391, "grad_norm": 1.1328701972961426, "learning_rate": 2.64416965871882e-07, "loss": 0.566, "step": 6227 }, { "epoch": 0.9288936947686341, "grad_norm": 1.339658498764038, "learning_rate": 2.633143476806066e-07, "loss": 0.6007, "step": 6228 }, { "epoch": 0.9290428427607293, "grad_norm": 1.1662864685058594, "learning_rate": 2.6221400258102826e-07, "loss": 0.5719, "step": 6229 }, { "epoch": 0.9291919907528244, "grad_norm": 1.134431004524231, "learning_rate": 2.611159308300304e-07, "loss": 0.6225, "step": 6230 }, { "epoch": 0.9293411387449196, "grad_norm": 1.3021397590637207, "learning_rate": 2.600201326839646e-07, "loss": 0.5323, "step": 6231 }, { "epoch": 0.9294902867370148, "grad_norm": 1.2532258033752441, "learning_rate": 2.58926608398653e-07, "loss": 0.5947, "step": 6232 }, { "epoch": 0.9296394347291099, "grad_norm": 1.179765224456787, "learning_rate": 2.5783535822938354e-07, "loss": 0.5514, "step": 6233 }, { "epoch": 0.9297885827212051, "grad_norm": 1.112747311592102, "learning_rate": 2.56746382430918e-07, "loss": 0.5716, "step": 6234 }, { "epoch": 0.9299377307133003, "grad_norm": 1.1536394357681274, "learning_rate": 2.55659681257483e-07, "loss": 0.5517, "step": 6235 }, { "epoch": 0.9300868787053954, "grad_norm": 1.117096185684204, "learning_rate": 2.545752549627767e-07, "loss": 0.4916, "step": 6236 }, { "epoch": 0.9302360266974906, "grad_norm": 1.2437914609909058, "learning_rate": 2.534931037999633e-07, "loss": 0.5473, "step": 6237 }, { "epoch": 0.9303851746895857, "grad_norm": 1.3140125274658203, "learning_rate": 2.524132280216818e-07, "loss": 0.6334, "step": 6238 }, { "epoch": 0.9305343226816809, "grad_norm": 1.291017770767212, "learning_rate": 2.5133562788003276e-07, "loss": 0.6238, "step": 6239 }, { "epoch": 0.9306834706737761, "grad_norm": 1.1433494091033936, "learning_rate": 2.5026030362659157e-07, "loss": 0.5729, "step": 6240 }, { "epoch": 0.9308326186658712, "grad_norm": 1.3037326335906982, "learning_rate": 2.491872555123975e-07, "loss": 0.5889, "step": 6241 }, { "epoch": 0.9309817666579664, "grad_norm": 0.9529646635055542, "learning_rate": 2.4811648378796127e-07, "loss": 0.4323, "step": 6242 }, { "epoch": 0.9311309146500615, "grad_norm": 1.32063889503479, "learning_rate": 2.4704798870326174e-07, "loss": 0.6038, "step": 6243 }, { "epoch": 0.9312800626421567, "grad_norm": 1.0226502418518066, "learning_rate": 2.4598177050774495e-07, "loss": 0.5179, "step": 6244 }, { "epoch": 0.9314292106342519, "grad_norm": 1.1597864627838135, "learning_rate": 2.449178294503274e-07, "loss": 0.555, "step": 6245 }, { "epoch": 0.931578358626347, "grad_norm": 1.1940090656280518, "learning_rate": 2.438561657793914e-07, "loss": 0.6458, "step": 6246 }, { "epoch": 0.9317275066184422, "grad_norm": 1.2553269863128662, "learning_rate": 2.4279677974279214e-07, "loss": 0.6283, "step": 6247 }, { "epoch": 0.9318766546105373, "grad_norm": 1.3000143766403198, "learning_rate": 2.417396715878462e-07, "loss": 0.5866, "step": 6248 }, { "epoch": 0.9320258026026325, "grad_norm": 1.3072099685668945, "learning_rate": 2.40684841561345e-07, "loss": 0.5989, "step": 6249 }, { "epoch": 0.9321749505947277, "grad_norm": 1.1360310316085815, "learning_rate": 2.396322899095449e-07, "loss": 0.555, "step": 6250 }, { "epoch": 0.9323240985868227, "grad_norm": 1.259295105934143, "learning_rate": 2.3858201687817164e-07, "loss": 0.5652, "step": 6251 }, { "epoch": 0.932473246578918, "grad_norm": 0.8612923622131348, "learning_rate": 2.3753402271241566e-07, "loss": 0.638, "step": 6252 }, { "epoch": 0.932622394571013, "grad_norm": 1.208009958267212, "learning_rate": 2.3648830765693908e-07, "loss": 0.5565, "step": 6253 }, { "epoch": 0.9327715425631082, "grad_norm": 1.1386109590530396, "learning_rate": 2.3544487195587108e-07, "loss": 0.5436, "step": 6254 }, { "epoch": 0.9329206905552034, "grad_norm": 1.1333316564559937, "learning_rate": 2.3440371585280896e-07, "loss": 0.6251, "step": 6255 }, { "epoch": 0.9330698385472985, "grad_norm": 1.2641340494155884, "learning_rate": 2.3336483959081612e-07, "loss": 0.6023, "step": 6256 }, { "epoch": 0.9332189865393937, "grad_norm": 1.1133028268814087, "learning_rate": 2.3232824341242743e-07, "loss": 0.5095, "step": 6257 }, { "epoch": 0.9333681345314889, "grad_norm": 1.276210904121399, "learning_rate": 2.312939275596393e-07, "loss": 0.6738, "step": 6258 }, { "epoch": 0.933517282523584, "grad_norm": 1.228456974029541, "learning_rate": 2.3026189227392083e-07, "loss": 0.6489, "step": 6259 }, { "epoch": 0.9336664305156792, "grad_norm": 1.2520692348480225, "learning_rate": 2.2923213779620924e-07, "loss": 0.5668, "step": 6260 }, { "epoch": 0.9338155785077743, "grad_norm": 0.8514218926429749, "learning_rate": 2.2820466436690447e-07, "loss": 0.5908, "step": 6261 }, { "epoch": 0.9339647264998695, "grad_norm": 1.3315702676773071, "learning_rate": 2.27179472225878e-07, "loss": 0.6184, "step": 6262 }, { "epoch": 0.9341138744919647, "grad_norm": 1.2418259382247925, "learning_rate": 2.2615656161246613e-07, "loss": 0.5622, "step": 6263 }, { "epoch": 0.9342630224840598, "grad_norm": 1.16958749294281, "learning_rate": 2.2513593276547673e-07, "loss": 0.5271, "step": 6264 }, { "epoch": 0.934412170476155, "grad_norm": 1.302210807800293, "learning_rate": 2.2411758592318033e-07, "loss": 0.5764, "step": 6265 }, { "epoch": 0.9345613184682501, "grad_norm": 1.1842516660690308, "learning_rate": 2.2310152132331676e-07, "loss": 0.5802, "step": 6266 }, { "epoch": 0.9347104664603453, "grad_norm": 1.269554615020752, "learning_rate": 2.220877392030929e-07, "loss": 0.6216, "step": 6267 }, { "epoch": 0.9348596144524405, "grad_norm": 1.1904551982879639, "learning_rate": 2.210762397991828e-07, "loss": 0.5228, "step": 6268 }, { "epoch": 0.9350087624445356, "grad_norm": 1.1259371042251587, "learning_rate": 2.2006702334772755e-07, "loss": 0.5664, "step": 6269 }, { "epoch": 0.9351579104366308, "grad_norm": 1.1859838962554932, "learning_rate": 2.1906009008433427e-07, "loss": 0.5837, "step": 6270 }, { "epoch": 0.9353070584287259, "grad_norm": 1.1725236177444458, "learning_rate": 2.1805544024407933e-07, "loss": 0.6514, "step": 6271 }, { "epoch": 0.9354562064208211, "grad_norm": 1.2857098579406738, "learning_rate": 2.17053074061504e-07, "loss": 0.6482, "step": 6272 }, { "epoch": 0.9356053544129163, "grad_norm": 1.1848130226135254, "learning_rate": 2.1605299177061668e-07, "loss": 0.5837, "step": 6273 }, { "epoch": 0.9357545024050113, "grad_norm": 1.1043384075164795, "learning_rate": 2.150551936048928e-07, "loss": 0.6008, "step": 6274 }, { "epoch": 0.9359036503971065, "grad_norm": 1.159881830215454, "learning_rate": 2.1405967979727715e-07, "loss": 0.5281, "step": 6275 }, { "epoch": 0.9360527983892016, "grad_norm": 1.317151427268982, "learning_rate": 2.1306645058017607e-07, "loss": 0.5901, "step": 6276 }, { "epoch": 0.9362019463812968, "grad_norm": 0.8867807984352112, "learning_rate": 2.1207550618546624e-07, "loss": 0.6646, "step": 6277 }, { "epoch": 0.936351094373392, "grad_norm": 1.3516998291015625, "learning_rate": 2.1108684684448932e-07, "loss": 0.5837, "step": 6278 }, { "epoch": 0.9365002423654871, "grad_norm": 1.3402525186538696, "learning_rate": 2.1010047278805735e-07, "loss": 0.5791, "step": 6279 }, { "epoch": 0.9366493903575823, "grad_norm": 1.2873687744140625, "learning_rate": 2.0911638424644055e-07, "loss": 0.5227, "step": 6280 }, { "epoch": 0.9367985383496774, "grad_norm": 1.208130121231079, "learning_rate": 2.0813458144938514e-07, "loss": 0.5943, "step": 6281 }, { "epoch": 0.9369476863417726, "grad_norm": 0.8459069132804871, "learning_rate": 2.0715506462609557e-07, "loss": 0.6534, "step": 6282 }, { "epoch": 0.9370968343338678, "grad_norm": 1.114473819732666, "learning_rate": 2.0617783400525003e-07, "loss": 0.5622, "step": 6283 }, { "epoch": 0.9372459823259629, "grad_norm": 1.1086344718933105, "learning_rate": 2.0520288981498605e-07, "loss": 0.5863, "step": 6284 }, { "epoch": 0.9373951303180581, "grad_norm": 1.2815028429031372, "learning_rate": 2.0423023228291373e-07, "loss": 0.6071, "step": 6285 }, { "epoch": 0.9375442783101533, "grad_norm": 1.2488031387329102, "learning_rate": 2.0325986163610367e-07, "loss": 0.5694, "step": 6286 }, { "epoch": 0.9376934263022484, "grad_norm": 1.1231399774551392, "learning_rate": 2.022917781010958e-07, "loss": 0.5232, "step": 6287 }, { "epoch": 0.9378425742943436, "grad_norm": 1.073895812034607, "learning_rate": 2.0132598190389596e-07, "loss": 0.5653, "step": 6288 }, { "epoch": 0.9379917222864387, "grad_norm": 1.2319446802139282, "learning_rate": 2.0036247326997383e-07, "loss": 0.525, "step": 6289 }, { "epoch": 0.9381408702785339, "grad_norm": 1.200760841369629, "learning_rate": 1.9940125242426834e-07, "loss": 0.5614, "step": 6290 }, { "epoch": 0.9382900182706291, "grad_norm": 1.0907857418060303, "learning_rate": 1.9844231959118444e-07, "loss": 0.4907, "step": 6291 }, { "epoch": 0.9384391662627242, "grad_norm": 0.8853755593299866, "learning_rate": 1.9748567499458639e-07, "loss": 0.6563, "step": 6292 }, { "epoch": 0.9385883142548194, "grad_norm": 1.2502442598342896, "learning_rate": 1.9653131885781328e-07, "loss": 0.5219, "step": 6293 }, { "epoch": 0.9387374622469145, "grad_norm": 1.360818862915039, "learning_rate": 1.9557925140366363e-07, "loss": 0.6741, "step": 6294 }, { "epoch": 0.9388866102390097, "grad_norm": 1.0705320835113525, "learning_rate": 1.9462947285440405e-07, "loss": 0.4773, "step": 6295 }, { "epoch": 0.9390357582311049, "grad_norm": 1.1551874876022339, "learning_rate": 1.9368198343176604e-07, "loss": 0.5537, "step": 6296 }, { "epoch": 0.9391849062232, "grad_norm": 1.086219072341919, "learning_rate": 1.9273678335694712e-07, "loss": 0.5479, "step": 6297 }, { "epoch": 0.9393340542152951, "grad_norm": 1.1212098598480225, "learning_rate": 1.917938728506108e-07, "loss": 0.5679, "step": 6298 }, { "epoch": 0.9394832022073902, "grad_norm": 1.1261916160583496, "learning_rate": 1.9085325213288542e-07, "loss": 0.5092, "step": 6299 }, { "epoch": 0.9396323501994854, "grad_norm": 1.0936964750289917, "learning_rate": 1.8991492142336644e-07, "loss": 0.5486, "step": 6300 }, { "epoch": 0.9397814981915806, "grad_norm": 1.271406888961792, "learning_rate": 1.8897888094110972e-07, "loss": 0.5911, "step": 6301 }, { "epoch": 0.9399306461836757, "grad_norm": 1.2402865886688232, "learning_rate": 1.880451309046427e-07, "loss": 0.5971, "step": 6302 }, { "epoch": 0.9400797941757709, "grad_norm": 1.281859278678894, "learning_rate": 1.8711367153195436e-07, "loss": 0.5987, "step": 6303 }, { "epoch": 0.940228942167866, "grad_norm": 1.2785037755966187, "learning_rate": 1.8618450304050074e-07, "loss": 0.5729, "step": 6304 }, { "epoch": 0.9403780901599612, "grad_norm": 1.1304466724395752, "learning_rate": 1.852576256472005e-07, "loss": 0.5753, "step": 6305 }, { "epoch": 0.9405272381520564, "grad_norm": 1.1152364015579224, "learning_rate": 1.8433303956843952e-07, "loss": 0.5064, "step": 6306 }, { "epoch": 0.9406763861441515, "grad_norm": 1.0554238557815552, "learning_rate": 1.834107450200695e-07, "loss": 0.5378, "step": 6307 }, { "epoch": 0.9408255341362467, "grad_norm": 1.2106878757476807, "learning_rate": 1.8249074221740494e-07, "loss": 0.5589, "step": 6308 }, { "epoch": 0.9409746821283419, "grad_norm": 1.2133358716964722, "learning_rate": 1.815730313752273e-07, "loss": 0.611, "step": 6309 }, { "epoch": 0.941123830120437, "grad_norm": 1.1644083261489868, "learning_rate": 1.8065761270778303e-07, "loss": 0.578, "step": 6310 }, { "epoch": 0.9412729781125322, "grad_norm": 1.1138161420822144, "learning_rate": 1.7974448642877894e-07, "loss": 0.574, "step": 6311 }, { "epoch": 0.9414221261046273, "grad_norm": 1.275641679763794, "learning_rate": 1.788336527513934e-07, "loss": 0.5764, "step": 6312 }, { "epoch": 0.9415712740967225, "grad_norm": 1.3046414852142334, "learning_rate": 1.7792511188826522e-07, "loss": 0.6429, "step": 6313 }, { "epoch": 0.9417204220888177, "grad_norm": 1.2102504968643188, "learning_rate": 1.7701886405149914e-07, "loss": 0.6162, "step": 6314 }, { "epoch": 0.9418695700809128, "grad_norm": 1.2563083171844482, "learning_rate": 1.7611490945266375e-07, "loss": 0.5842, "step": 6315 }, { "epoch": 0.942018718073008, "grad_norm": 1.2200006246566772, "learning_rate": 1.7521324830279463e-07, "loss": 0.5855, "step": 6316 }, { "epoch": 0.942167866065103, "grad_norm": 1.2225019931793213, "learning_rate": 1.7431388081238898e-07, "loss": 0.5304, "step": 6317 }, { "epoch": 0.9423170140571983, "grad_norm": 1.0563284158706665, "learning_rate": 1.7341680719141106e-07, "loss": 0.5354, "step": 6318 }, { "epoch": 0.9424661620492935, "grad_norm": 1.2075767517089844, "learning_rate": 1.7252202764928893e-07, "loss": 0.5383, "step": 6319 }, { "epoch": 0.9426153100413885, "grad_norm": 1.260694146156311, "learning_rate": 1.7162954239491213e-07, "loss": 0.5272, "step": 6320 }, { "epoch": 0.9427644580334837, "grad_norm": 1.1961941719055176, "learning_rate": 1.7073935163663847e-07, "loss": 0.5628, "step": 6321 }, { "epoch": 0.9429136060255788, "grad_norm": 1.2071489095687866, "learning_rate": 1.6985145558228942e-07, "loss": 0.5277, "step": 6322 }, { "epoch": 0.943062754017674, "grad_norm": 1.1267184019088745, "learning_rate": 1.6896585443914927e-07, "loss": 0.5489, "step": 6323 }, { "epoch": 0.9432119020097692, "grad_norm": 1.1682419776916504, "learning_rate": 1.6808254841396587e-07, "loss": 0.5954, "step": 6324 }, { "epoch": 0.9433610500018643, "grad_norm": 1.385467290878296, "learning_rate": 1.6720153771295656e-07, "loss": 0.6875, "step": 6325 }, { "epoch": 0.9435101979939595, "grad_norm": 0.8388324975967407, "learning_rate": 1.6632282254179456e-07, "loss": 0.5953, "step": 6326 }, { "epoch": 0.9436593459860546, "grad_norm": 1.3586167097091675, "learning_rate": 1.6544640310562466e-07, "loss": 0.5819, "step": 6327 }, { "epoch": 0.9438084939781498, "grad_norm": 1.1838237047195435, "learning_rate": 1.6457227960905097e-07, "loss": 0.6378, "step": 6328 }, { "epoch": 0.943957641970245, "grad_norm": 1.0441852807998657, "learning_rate": 1.6370045225614474e-07, "loss": 0.5398, "step": 6329 }, { "epoch": 0.9441067899623401, "grad_norm": 1.1171107292175293, "learning_rate": 1.6283092125043754e-07, "loss": 0.5619, "step": 6330 }, { "epoch": 0.9442559379544353, "grad_norm": 1.3204594850540161, "learning_rate": 1.6196368679492815e-07, "loss": 0.6024, "step": 6331 }, { "epoch": 0.9444050859465305, "grad_norm": 1.3212389945983887, "learning_rate": 1.6109874909207901e-07, "loss": 0.5938, "step": 6332 }, { "epoch": 0.9445542339386256, "grad_norm": 1.1471326351165771, "learning_rate": 1.6023610834381197e-07, "loss": 0.5877, "step": 6333 }, { "epoch": 0.9447033819307208, "grad_norm": 1.2124203443527222, "learning_rate": 1.593757647515204e-07, "loss": 0.5787, "step": 6334 }, { "epoch": 0.9448525299228159, "grad_norm": 1.3536348342895508, "learning_rate": 1.585177185160547e-07, "loss": 0.5894, "step": 6335 }, { "epoch": 0.9450016779149111, "grad_norm": 1.1490435600280762, "learning_rate": 1.576619698377313e-07, "loss": 0.5963, "step": 6336 }, { "epoch": 0.9451508259070063, "grad_norm": 1.2904834747314453, "learning_rate": 1.5680851891633042e-07, "loss": 0.5737, "step": 6337 }, { "epoch": 0.9452999738991014, "grad_norm": 1.2493797540664673, "learning_rate": 1.55957365951096e-07, "loss": 0.6227, "step": 6338 }, { "epoch": 0.9454491218911966, "grad_norm": 1.1385446786880493, "learning_rate": 1.5510851114073467e-07, "loss": 0.6047, "step": 6339 }, { "epoch": 0.9455982698832917, "grad_norm": 1.2571721076965332, "learning_rate": 1.5426195468341675e-07, "loss": 0.5702, "step": 6340 }, { "epoch": 0.9457474178753869, "grad_norm": 1.1441807746887207, "learning_rate": 1.5341769677677753e-07, "loss": 0.5548, "step": 6341 }, { "epoch": 0.945896565867482, "grad_norm": 1.0289925336837769, "learning_rate": 1.5257573761791265e-07, "loss": 0.5809, "step": 6342 }, { "epoch": 0.9460457138595771, "grad_norm": 1.223900556564331, "learning_rate": 1.5173607740338382e-07, "loss": 0.5453, "step": 6343 }, { "epoch": 0.9461948618516723, "grad_norm": 1.275010585784912, "learning_rate": 1.5089871632921638e-07, "loss": 0.584, "step": 6344 }, { "epoch": 0.9463440098437674, "grad_norm": 1.1913057565689087, "learning_rate": 1.5006365459089622e-07, "loss": 0.4995, "step": 6345 }, { "epoch": 0.9464931578358626, "grad_norm": 1.2505899667739868, "learning_rate": 1.4923089238337296e-07, "loss": 0.5913, "step": 6346 }, { "epoch": 0.9466423058279578, "grad_norm": 1.2340914011001587, "learning_rate": 1.484004299010633e-07, "loss": 0.5572, "step": 6347 }, { "epoch": 0.9467914538200529, "grad_norm": 1.2443296909332275, "learning_rate": 1.4757226733783992e-07, "loss": 0.5677, "step": 6348 }, { "epoch": 0.9469406018121481, "grad_norm": 1.2664616107940674, "learning_rate": 1.4674640488704596e-07, "loss": 0.5871, "step": 6349 }, { "epoch": 0.9470897498042432, "grad_norm": 1.2578424215316772, "learning_rate": 1.4592284274148273e-07, "loss": 0.5915, "step": 6350 }, { "epoch": 0.9472388977963384, "grad_norm": 1.1251015663146973, "learning_rate": 1.4510158109341644e-07, "loss": 0.6539, "step": 6351 }, { "epoch": 0.9473880457884336, "grad_norm": 1.2041406631469727, "learning_rate": 1.4428262013457706e-07, "loss": 0.5728, "step": 6352 }, { "epoch": 0.9475371937805287, "grad_norm": 1.109626293182373, "learning_rate": 1.4346596005615499e-07, "loss": 0.4832, "step": 6353 }, { "epoch": 0.9476863417726239, "grad_norm": 1.2958664894104004, "learning_rate": 1.4265160104880438e-07, "loss": 0.6146, "step": 6354 }, { "epoch": 0.9478354897647191, "grad_norm": 1.2489436864852905, "learning_rate": 1.4183954330264317e-07, "loss": 0.6139, "step": 6355 }, { "epoch": 0.9479846377568142, "grad_norm": 1.1711838245391846, "learning_rate": 1.410297870072508e-07, "loss": 0.6295, "step": 6356 }, { "epoch": 0.9481337857489094, "grad_norm": 1.282676100730896, "learning_rate": 1.402223323516727e-07, "loss": 0.4782, "step": 6357 }, { "epoch": 0.9482829337410045, "grad_norm": 1.2109891176223755, "learning_rate": 1.3941717952441146e-07, "loss": 0.6161, "step": 6358 }, { "epoch": 0.9484320817330997, "grad_norm": 1.3282538652420044, "learning_rate": 1.386143287134356e-07, "loss": 0.6276, "step": 6359 }, { "epoch": 0.9485812297251949, "grad_norm": 1.246658205986023, "learning_rate": 1.378137801061763e-07, "loss": 0.5738, "step": 6360 }, { "epoch": 0.94873037771729, "grad_norm": 1.0979125499725342, "learning_rate": 1.3701553388952627e-07, "loss": 0.5167, "step": 6361 }, { "epoch": 0.9488795257093852, "grad_norm": 1.2463278770446777, "learning_rate": 1.362195902498431e-07, "loss": 0.5903, "step": 6362 }, { "epoch": 0.9490286737014803, "grad_norm": 1.222005009651184, "learning_rate": 1.354259493729426e-07, "loss": 0.6604, "step": 6363 }, { "epoch": 0.9491778216935755, "grad_norm": 1.1871280670166016, "learning_rate": 1.346346114441066e-07, "loss": 0.6415, "step": 6364 }, { "epoch": 0.9493269696856707, "grad_norm": 1.164472222328186, "learning_rate": 1.3384557664807729e-07, "loss": 0.4854, "step": 6365 }, { "epoch": 0.9494761176777657, "grad_norm": 1.2384940385818481, "learning_rate": 1.3305884516906065e-07, "loss": 0.6271, "step": 6366 }, { "epoch": 0.9496252656698609, "grad_norm": 1.2472126483917236, "learning_rate": 1.322744171907242e-07, "loss": 0.6084, "step": 6367 }, { "epoch": 0.949774413661956, "grad_norm": 1.2314177751541138, "learning_rate": 1.3149229289619593e-07, "loss": 0.5313, "step": 6368 }, { "epoch": 0.9499235616540512, "grad_norm": 1.2268199920654297, "learning_rate": 1.3071247246806972e-07, "loss": 0.6071, "step": 6369 }, { "epoch": 0.9500727096461464, "grad_norm": 1.1292263269424438, "learning_rate": 1.299349560883989e-07, "loss": 0.618, "step": 6370 }, { "epoch": 0.9502218576382415, "grad_norm": 1.2461588382720947, "learning_rate": 1.2915974393870046e-07, "loss": 0.6353, "step": 6371 }, { "epoch": 0.9503710056303367, "grad_norm": 1.1556097269058228, "learning_rate": 1.2838683619995185e-07, "loss": 0.6093, "step": 6372 }, { "epoch": 0.9505201536224318, "grad_norm": 1.270248293876648, "learning_rate": 1.276162330525932e-07, "loss": 0.6783, "step": 6373 }, { "epoch": 0.950669301614527, "grad_norm": 1.1830885410308838, "learning_rate": 1.2684793467652722e-07, "loss": 0.5853, "step": 6374 }, { "epoch": 0.9508184496066222, "grad_norm": 1.2296266555786133, "learning_rate": 1.2608194125111716e-07, "loss": 0.548, "step": 6375 }, { "epoch": 0.9509675975987173, "grad_norm": 1.302587866783142, "learning_rate": 1.2531825295519106e-07, "loss": 0.5869, "step": 6376 }, { "epoch": 0.9511167455908125, "grad_norm": 1.14602792263031, "learning_rate": 1.2455686996703409e-07, "loss": 0.5032, "step": 6377 }, { "epoch": 0.9512658935829077, "grad_norm": 1.1087653636932373, "learning_rate": 1.237977924643985e-07, "loss": 0.5118, "step": 6378 }, { "epoch": 0.9514150415750028, "grad_norm": 1.143277645111084, "learning_rate": 1.2304102062449475e-07, "loss": 0.4909, "step": 6379 }, { "epoch": 0.951564189567098, "grad_norm": 1.1577367782592773, "learning_rate": 1.2228655462399598e-07, "loss": 0.6039, "step": 6380 }, { "epoch": 0.9517133375591931, "grad_norm": 1.2017946243286133, "learning_rate": 1.2153439463903793e-07, "loss": 0.5251, "step": 6381 }, { "epoch": 0.9518624855512883, "grad_norm": 1.2363051176071167, "learning_rate": 1.2078454084521575e-07, "loss": 0.5875, "step": 6382 }, { "epoch": 0.9520116335433835, "grad_norm": 1.2137548923492432, "learning_rate": 1.2003699341758934e-07, "loss": 0.5475, "step": 6383 }, { "epoch": 0.9521607815354786, "grad_norm": 1.1203805208206177, "learning_rate": 1.192917525306758e-07, "loss": 0.5479, "step": 6384 }, { "epoch": 0.9523099295275738, "grad_norm": 1.1147329807281494, "learning_rate": 1.1854881835846044e-07, "loss": 0.502, "step": 6385 }, { "epoch": 0.9524590775196689, "grad_norm": 1.137062668800354, "learning_rate": 1.1780819107438112e-07, "loss": 0.5256, "step": 6386 }, { "epoch": 0.952608225511764, "grad_norm": 1.2935354709625244, "learning_rate": 1.1706987085134624e-07, "loss": 0.5095, "step": 6387 }, { "epoch": 0.9527573735038593, "grad_norm": 1.3371758460998535, "learning_rate": 1.1633385786171903e-07, "loss": 0.6579, "step": 6388 }, { "epoch": 0.9529065214959543, "grad_norm": 1.3444806337356567, "learning_rate": 1.1560015227732757e-07, "loss": 0.6726, "step": 6389 }, { "epoch": 0.9530556694880495, "grad_norm": 0.8876031637191772, "learning_rate": 1.1486875426945931e-07, "loss": 0.6499, "step": 6390 }, { "epoch": 0.9532048174801446, "grad_norm": 1.1519720554351807, "learning_rate": 1.1413966400886544e-07, "loss": 0.5547, "step": 6391 }, { "epoch": 0.9533539654722398, "grad_norm": 1.0705149173736572, "learning_rate": 1.1341288166575425e-07, "loss": 0.5254, "step": 6392 }, { "epoch": 0.953503113464335, "grad_norm": 0.8620312213897705, "learning_rate": 1.1268840740979891e-07, "loss": 0.6403, "step": 6393 }, { "epoch": 0.9536522614564301, "grad_norm": 1.2672737836837769, "learning_rate": 1.1196624141013301e-07, "loss": 0.5437, "step": 6394 }, { "epoch": 0.9538014094485253, "grad_norm": 1.1866109371185303, "learning_rate": 1.1124638383534947e-07, "loss": 0.469, "step": 6395 }, { "epoch": 0.9539505574406204, "grad_norm": 1.170609474182129, "learning_rate": 1.1052883485350607e-07, "loss": 0.5539, "step": 6396 }, { "epoch": 0.9540997054327156, "grad_norm": 1.1672465801239014, "learning_rate": 1.0981359463211772e-07, "loss": 0.5534, "step": 6397 }, { "epoch": 0.9542488534248108, "grad_norm": 1.193922519683838, "learning_rate": 1.0910066333816194e-07, "loss": 0.6219, "step": 6398 }, { "epoch": 0.9543980014169059, "grad_norm": 1.337250828742981, "learning_rate": 1.083900411380756e-07, "loss": 0.5368, "step": 6399 }, { "epoch": 0.9545471494090011, "grad_norm": 1.2116894721984863, "learning_rate": 1.0768172819776158e-07, "loss": 0.6413, "step": 6400 }, { "epoch": 0.9546962974010962, "grad_norm": 1.220923900604248, "learning_rate": 1.0697572468257755e-07, "loss": 0.6135, "step": 6401 }, { "epoch": 0.9548454453931914, "grad_norm": 1.0945217609405518, "learning_rate": 1.0627203075734394e-07, "loss": 0.5122, "step": 6402 }, { "epoch": 0.9549945933852866, "grad_norm": 1.121080994606018, "learning_rate": 1.0557064658634486e-07, "loss": 0.5234, "step": 6403 }, { "epoch": 0.9551437413773817, "grad_norm": 1.1211763620376587, "learning_rate": 1.0487157233332046e-07, "loss": 0.4828, "step": 6404 }, { "epoch": 0.9552928893694769, "grad_norm": 1.1475988626480103, "learning_rate": 1.0417480816147574e-07, "loss": 0.6185, "step": 6405 }, { "epoch": 0.9554420373615721, "grad_norm": 1.3037241697311401, "learning_rate": 1.0348035423347613e-07, "loss": 0.6111, "step": 6406 }, { "epoch": 0.9555911853536672, "grad_norm": 1.0155028104782104, "learning_rate": 1.0278821071144306e-07, "loss": 0.4387, "step": 6407 }, { "epoch": 0.9557403333457624, "grad_norm": 1.1168692111968994, "learning_rate": 1.0209837775696396e-07, "loss": 0.5377, "step": 6408 }, { "epoch": 0.9558894813378574, "grad_norm": 1.144568920135498, "learning_rate": 1.0141085553108443e-07, "loss": 0.5902, "step": 6409 }, { "epoch": 0.9560386293299526, "grad_norm": 1.2669768333435059, "learning_rate": 1.0072564419431053e-07, "loss": 0.5427, "step": 6410 }, { "epoch": 0.9561877773220478, "grad_norm": 1.1885126829147339, "learning_rate": 1.0004274390660984e-07, "loss": 0.5946, "step": 6411 }, { "epoch": 0.9563369253141429, "grad_norm": 1.289277195930481, "learning_rate": 9.936215482740819e-08, "loss": 0.5856, "step": 6412 }, { "epoch": 0.9564860733062381, "grad_norm": 1.0745917558670044, "learning_rate": 9.868387711559624e-08, "loss": 0.5328, "step": 6413 }, { "epoch": 0.9566352212983332, "grad_norm": 1.2452219724655151, "learning_rate": 9.800791092951956e-08, "loss": 0.5866, "step": 6414 }, { "epoch": 0.9567843692904284, "grad_norm": 1.0797873735427856, "learning_rate": 9.733425642698857e-08, "loss": 0.5631, "step": 6415 }, { "epoch": 0.9569335172825236, "grad_norm": 1.2857869863510132, "learning_rate": 9.666291376527304e-08, "loss": 0.6057, "step": 6416 }, { "epoch": 0.9570826652746187, "grad_norm": 1.312244176864624, "learning_rate": 9.59938831010998e-08, "loss": 0.6421, "step": 6417 }, { "epoch": 0.9572318132667139, "grad_norm": 1.1746469736099243, "learning_rate": 9.532716459065838e-08, "loss": 0.5992, "step": 6418 }, { "epoch": 0.957380961258809, "grad_norm": 1.1872800588607788, "learning_rate": 9.466275838960093e-08, "loss": 0.5938, "step": 6419 }, { "epoch": 0.9575301092509042, "grad_norm": 1.154873013496399, "learning_rate": 9.400066465303448e-08, "loss": 0.5638, "step": 6420 }, { "epoch": 0.9576792572429994, "grad_norm": 1.0816813707351685, "learning_rate": 9.334088353553206e-08, "loss": 0.5019, "step": 6421 }, { "epoch": 0.9578284052350945, "grad_norm": 1.1647822856903076, "learning_rate": 9.268341519112156e-08, "loss": 0.553, "step": 6422 }, { "epoch": 0.9579775532271897, "grad_norm": 1.1818844079971313, "learning_rate": 9.202825977329355e-08, "loss": 0.6693, "step": 6423 }, { "epoch": 0.9581267012192848, "grad_norm": 1.32778799533844, "learning_rate": 9.1375417434999e-08, "loss": 0.645, "step": 6424 }, { "epoch": 0.95827584921138, "grad_norm": 1.304006576538086, "learning_rate": 9.072488832864823e-08, "loss": 0.5755, "step": 6425 }, { "epoch": 0.9584249972034752, "grad_norm": 1.2456406354904175, "learning_rate": 9.007667260610975e-08, "loss": 0.6551, "step": 6426 }, { "epoch": 0.9585741451955703, "grad_norm": 1.1353358030319214, "learning_rate": 8.943077041871584e-08, "loss": 0.5415, "step": 6427 }, { "epoch": 0.9587232931876655, "grad_norm": 1.2146351337432861, "learning_rate": 8.878718191725478e-08, "loss": 0.6007, "step": 6428 }, { "epoch": 0.9588724411797607, "grad_norm": 1.2282220125198364, "learning_rate": 8.814590725197636e-08, "loss": 0.5691, "step": 6429 }, { "epoch": 0.9590215891718558, "grad_norm": 1.3317784070968628, "learning_rate": 8.750694657259195e-08, "loss": 0.5227, "step": 6430 }, { "epoch": 0.959170737163951, "grad_norm": 1.1792433261871338, "learning_rate": 8.687030002827113e-08, "loss": 0.5934, "step": 6431 }, { "epoch": 0.959319885156046, "grad_norm": 1.1515529155731201, "learning_rate": 8.623596776764165e-08, "loss": 0.5753, "step": 6432 }, { "epoch": 0.9594690331481412, "grad_norm": 1.1903266906738281, "learning_rate": 8.560394993879173e-08, "loss": 0.5311, "step": 6433 }, { "epoch": 0.9596181811402364, "grad_norm": 1.1017433404922485, "learning_rate": 8.497424668927224e-08, "loss": 0.639, "step": 6434 }, { "epoch": 0.9597673291323315, "grad_norm": 1.1737089157104492, "learning_rate": 8.434685816609e-08, "loss": 0.545, "step": 6435 }, { "epoch": 0.9599164771244267, "grad_norm": 1.0707844495773315, "learning_rate": 8.372178451571344e-08, "loss": 0.5593, "step": 6436 }, { "epoch": 0.9600656251165218, "grad_norm": 1.1421536207199097, "learning_rate": 8.309902588407026e-08, "loss": 0.6218, "step": 6437 }, { "epoch": 0.960214773108617, "grad_norm": 1.2269995212554932, "learning_rate": 8.247858241654638e-08, "loss": 0.637, "step": 6438 }, { "epoch": 0.9603639211007122, "grad_norm": 1.292898416519165, "learning_rate": 8.186045425798817e-08, "loss": 0.5696, "step": 6439 }, { "epoch": 0.9605130690928073, "grad_norm": 1.206351399421692, "learning_rate": 8.124464155270351e-08, "loss": 0.6015, "step": 6440 }, { "epoch": 0.9606622170849025, "grad_norm": 1.0886449813842773, "learning_rate": 8.063114444445741e-08, "loss": 0.5519, "step": 6441 }, { "epoch": 0.9608113650769976, "grad_norm": 1.340680718421936, "learning_rate": 8.001996307647197e-08, "loss": 0.5598, "step": 6442 }, { "epoch": 0.9609605130690928, "grad_norm": 1.100494146347046, "learning_rate": 7.941109759143528e-08, "loss": 0.5143, "step": 6443 }, { "epoch": 0.961109661061188, "grad_norm": 1.134639024734497, "learning_rate": 7.880454813148807e-08, "loss": 0.5234, "step": 6444 }, { "epoch": 0.9612588090532831, "grad_norm": 1.1037570238113403, "learning_rate": 7.820031483823487e-08, "loss": 0.5021, "step": 6445 }, { "epoch": 0.9614079570453783, "grad_norm": 1.2725509405136108, "learning_rate": 7.759839785273615e-08, "loss": 0.6446, "step": 6446 }, { "epoch": 0.9615571050374734, "grad_norm": 1.0902701616287231, "learning_rate": 7.699879731551397e-08, "loss": 0.547, "step": 6447 }, { "epoch": 0.9617062530295686, "grad_norm": 1.325758934020996, "learning_rate": 7.640151336654966e-08, "loss": 0.6236, "step": 6448 }, { "epoch": 0.9618554010216638, "grad_norm": 1.1894235610961914, "learning_rate": 7.580654614528282e-08, "loss": 0.5424, "step": 6449 }, { "epoch": 0.9620045490137589, "grad_norm": 1.2949814796447754, "learning_rate": 7.521389579061234e-08, "loss": 0.5708, "step": 6450 }, { "epoch": 0.9621536970058541, "grad_norm": 1.1229921579360962, "learning_rate": 7.462356244089642e-08, "loss": 0.478, "step": 6451 }, { "epoch": 0.9623028449979493, "grad_norm": 1.1046682596206665, "learning_rate": 7.403554623395038e-08, "loss": 0.5041, "step": 6452 }, { "epoch": 0.9624519929900444, "grad_norm": 1.274896502494812, "learning_rate": 7.344984730705218e-08, "loss": 0.647, "step": 6453 }, { "epoch": 0.9626011409821396, "grad_norm": 1.3023377656936646, "learning_rate": 7.286646579693691e-08, "loss": 0.6011, "step": 6454 }, { "epoch": 0.9627502889742346, "grad_norm": 1.1811611652374268, "learning_rate": 7.228540183979782e-08, "loss": 0.5285, "step": 6455 }, { "epoch": 0.9628994369663298, "grad_norm": 1.150793194770813, "learning_rate": 7.170665557128975e-08, "loss": 0.572, "step": 6456 }, { "epoch": 0.963048584958425, "grad_norm": 1.2023189067840576, "learning_rate": 7.11302271265224e-08, "loss": 0.6066, "step": 6457 }, { "epoch": 0.9631977329505201, "grad_norm": 1.2383757829666138, "learning_rate": 7.055611664006701e-08, "loss": 0.5675, "step": 6458 }, { "epoch": 0.9633468809426153, "grad_norm": 1.1538376808166504, "learning_rate": 6.998432424595524e-08, "loss": 0.5658, "step": 6459 }, { "epoch": 0.9634960289347104, "grad_norm": 1.1374019384384155, "learning_rate": 6.941485007767479e-08, "loss": 0.574, "step": 6460 }, { "epoch": 0.9636451769268056, "grad_norm": 1.0388529300689697, "learning_rate": 6.884769426817261e-08, "loss": 0.5441, "step": 6461 }, { "epoch": 0.9637943249189008, "grad_norm": 1.2032604217529297, "learning_rate": 6.828285694985504e-08, "loss": 0.5373, "step": 6462 }, { "epoch": 0.9639434729109959, "grad_norm": 1.1540765762329102, "learning_rate": 6.772033825458769e-08, "loss": 0.5735, "step": 6463 }, { "epoch": 0.9640926209030911, "grad_norm": 0.8362287282943726, "learning_rate": 6.716013831369217e-08, "loss": 0.6434, "step": 6464 }, { "epoch": 0.9642417688951862, "grad_norm": 1.2194432020187378, "learning_rate": 6.660225725795278e-08, "loss": 0.6229, "step": 6465 }, { "epoch": 0.9643909168872814, "grad_norm": 1.2763513326644897, "learning_rate": 6.604669521760975e-08, "loss": 0.5928, "step": 6466 }, { "epoch": 0.9645400648793766, "grad_norm": 1.1008286476135254, "learning_rate": 6.54934523223627e-08, "loss": 0.5529, "step": 6467 }, { "epoch": 0.9646892128714717, "grad_norm": 1.2441729307174683, "learning_rate": 6.494252870136942e-08, "loss": 0.6184, "step": 6468 }, { "epoch": 0.9648383608635669, "grad_norm": 1.2432453632354736, "learning_rate": 6.439392448324699e-08, "loss": 0.6413, "step": 6469 }, { "epoch": 0.964987508855662, "grad_norm": 1.2762104272842407, "learning_rate": 6.384763979607078e-08, "loss": 0.5626, "step": 6470 }, { "epoch": 0.9651366568477572, "grad_norm": 1.2441222667694092, "learning_rate": 6.330367476737321e-08, "loss": 0.5623, "step": 6471 }, { "epoch": 0.9652858048398524, "grad_norm": 1.2497000694274902, "learning_rate": 6.276202952414823e-08, "loss": 0.5397, "step": 6472 }, { "epoch": 0.9654349528319475, "grad_norm": 1.193108081817627, "learning_rate": 6.222270419284359e-08, "loss": 0.5368, "step": 6473 }, { "epoch": 0.9655841008240427, "grad_norm": 1.053763508796692, "learning_rate": 6.168569889937081e-08, "loss": 0.4779, "step": 6474 }, { "epoch": 0.9657332488161379, "grad_norm": 1.1994998455047607, "learning_rate": 6.115101376909738e-08, "loss": 0.6759, "step": 6475 }, { "epoch": 0.965882396808233, "grad_norm": 1.2477494478225708, "learning_rate": 6.061864892684788e-08, "loss": 0.6199, "step": 6476 }, { "epoch": 0.9660315448003282, "grad_norm": 1.2381569147109985, "learning_rate": 6.008860449690512e-08, "loss": 0.5741, "step": 6477 }, { "epoch": 0.9661806927924232, "grad_norm": 1.2354588508605957, "learning_rate": 5.956088060301457e-08, "loss": 0.5946, "step": 6478 }, { "epoch": 0.9663298407845184, "grad_norm": 1.2035088539123535, "learning_rate": 5.903547736837323e-08, "loss": 0.5144, "step": 6479 }, { "epoch": 0.9664789887766136, "grad_norm": 1.184382677078247, "learning_rate": 5.851239491564298e-08, "loss": 0.5162, "step": 6480 }, { "epoch": 0.9666281367687087, "grad_norm": 1.2532315254211426, "learning_rate": 5.799163336693836e-08, "loss": 0.5949, "step": 6481 }, { "epoch": 0.9667772847608039, "grad_norm": 1.1372262239456177, "learning_rate": 5.7473192843835454e-08, "loss": 0.5749, "step": 6482 }, { "epoch": 0.966926432752899, "grad_norm": 1.1841710805892944, "learning_rate": 5.6957073467367454e-08, "loss": 0.542, "step": 6483 }, { "epoch": 0.9670755807449942, "grad_norm": 1.1938998699188232, "learning_rate": 5.644327535802685e-08, "loss": 0.5807, "step": 6484 }, { "epoch": 0.9672247287370894, "grad_norm": 1.2231106758117676, "learning_rate": 5.5931798635761036e-08, "loss": 0.5536, "step": 6485 }, { "epoch": 0.9673738767291845, "grad_norm": 1.286909818649292, "learning_rate": 5.542264341997894e-08, "loss": 0.594, "step": 6486 }, { "epoch": 0.9675230247212797, "grad_norm": 1.2632851600646973, "learning_rate": 5.491580982954547e-08, "loss": 0.5819, "step": 6487 }, { "epoch": 0.9676721727133748, "grad_norm": 1.1602877378463745, "learning_rate": 5.441129798278488e-08, "loss": 0.7109, "step": 6488 }, { "epoch": 0.96782132070547, "grad_norm": 1.3645342588424683, "learning_rate": 5.3909107997477395e-08, "loss": 0.5703, "step": 6489 }, { "epoch": 0.9679704686975652, "grad_norm": 1.1953927278518677, "learning_rate": 5.3409239990863673e-08, "loss": 0.5345, "step": 6490 }, { "epoch": 0.9681196166896603, "grad_norm": 1.2260979413986206, "learning_rate": 5.291169407964147e-08, "loss": 0.5983, "step": 6491 }, { "epoch": 0.9682687646817555, "grad_norm": 1.166195273399353, "learning_rate": 5.2416470379964556e-08, "loss": 0.6038, "step": 6492 }, { "epoch": 0.9684179126738506, "grad_norm": 1.0992991924285889, "learning_rate": 5.192356900744711e-08, "loss": 0.5551, "step": 6493 }, { "epoch": 0.9685670606659458, "grad_norm": 1.271265983581543, "learning_rate": 5.1432990077160405e-08, "loss": 0.5786, "step": 6494 }, { "epoch": 0.968716208658041, "grad_norm": 1.1473543643951416, "learning_rate": 5.0944733703632845e-08, "loss": 0.5622, "step": 6495 }, { "epoch": 0.9688653566501361, "grad_norm": 1.1961232423782349, "learning_rate": 5.04588000008499e-08, "loss": 0.5362, "step": 6496 }, { "epoch": 0.9690145046422313, "grad_norm": 1.2378156185150146, "learning_rate": 4.9975189082258625e-08, "loss": 0.578, "step": 6497 }, { "epoch": 0.9691636526343264, "grad_norm": 1.2409298419952393, "learning_rate": 4.949390106075758e-08, "loss": 0.6239, "step": 6498 }, { "epoch": 0.9693128006264216, "grad_norm": 1.2410122156143188, "learning_rate": 4.901493604870799e-08, "loss": 0.599, "step": 6499 }, { "epoch": 0.9694619486185168, "grad_norm": 1.2264482975006104, "learning_rate": 4.853829415792932e-08, "loss": 0.6155, "step": 6500 }, { "epoch": 0.9696110966106118, "grad_norm": 1.0807052850723267, "learning_rate": 4.8063975499694774e-08, "loss": 0.5632, "step": 6501 }, { "epoch": 0.969760244602707, "grad_norm": 1.1762325763702393, "learning_rate": 4.7591980184736874e-08, "loss": 0.5745, "step": 6502 }, { "epoch": 0.9699093925948022, "grad_norm": 1.2538203001022339, "learning_rate": 4.7122308323246377e-08, "loss": 0.6227, "step": 6503 }, { "epoch": 0.9700585405868973, "grad_norm": 1.2494149208068848, "learning_rate": 4.6654960024871134e-08, "loss": 0.5934, "step": 6504 }, { "epoch": 0.9702076885789925, "grad_norm": 1.1578595638275146, "learning_rate": 4.618993539871719e-08, "loss": 0.575, "step": 6505 }, { "epoch": 0.9703568365710876, "grad_norm": 1.247901201248169, "learning_rate": 4.57272345533466e-08, "loss": 0.6125, "step": 6506 }, { "epoch": 0.9705059845631828, "grad_norm": 1.200121283531189, "learning_rate": 4.526685759678073e-08, "loss": 0.5512, "step": 6507 }, { "epoch": 0.970655132555278, "grad_norm": 1.1533384323120117, "learning_rate": 4.480880463649584e-08, "loss": 0.4997, "step": 6508 }, { "epoch": 0.9708042805473731, "grad_norm": 1.239682674407959, "learning_rate": 4.4353075779429713e-08, "loss": 0.6128, "step": 6509 }, { "epoch": 0.9709534285394683, "grad_norm": 1.1357295513153076, "learning_rate": 4.389967113197391e-08, "loss": 0.5427, "step": 6510 }, { "epoch": 0.9711025765315634, "grad_norm": 1.1479551792144775, "learning_rate": 4.3448590799978205e-08, "loss": 0.5252, "step": 6511 }, { "epoch": 0.9712517245236586, "grad_norm": 1.1855865716934204, "learning_rate": 4.299983488875059e-08, "loss": 0.5836, "step": 6512 }, { "epoch": 0.9714008725157538, "grad_norm": 1.1881803274154663, "learning_rate": 4.255340350305726e-08, "loss": 0.5654, "step": 6513 }, { "epoch": 0.9715500205078489, "grad_norm": 1.4002772569656372, "learning_rate": 4.210929674711817e-08, "loss": 0.6127, "step": 6514 }, { "epoch": 0.9716991684999441, "grad_norm": 0.90034019947052, "learning_rate": 4.166751472461483e-08, "loss": 0.633, "step": 6515 }, { "epoch": 0.9718483164920392, "grad_norm": 1.196420431137085, "learning_rate": 4.1228057538683644e-08, "loss": 0.5685, "step": 6516 }, { "epoch": 0.9719974644841344, "grad_norm": 1.1557353734970093, "learning_rate": 4.0790925291918084e-08, "loss": 0.5936, "step": 6517 }, { "epoch": 0.9721466124762296, "grad_norm": 1.099671483039856, "learning_rate": 4.035611808636986e-08, "loss": 0.5251, "step": 6518 }, { "epoch": 0.9722957604683247, "grad_norm": 1.1304160356521606, "learning_rate": 3.9923636023547765e-08, "loss": 0.6016, "step": 6519 }, { "epoch": 0.9724449084604199, "grad_norm": 1.1932915449142456, "learning_rate": 3.94934792044166e-08, "loss": 0.5465, "step": 6520 }, { "epoch": 0.972594056452515, "grad_norm": 1.148717999458313, "learning_rate": 3.906564772939936e-08, "loss": 0.5399, "step": 6521 }, { "epoch": 0.9727432044446102, "grad_norm": 1.2140463590621948, "learning_rate": 3.8640141698378376e-08, "loss": 0.5817, "step": 6522 }, { "epoch": 0.9728923524367054, "grad_norm": 1.205650806427002, "learning_rate": 3.821696121068752e-08, "loss": 0.5433, "step": 6523 }, { "epoch": 0.9730415004288004, "grad_norm": 1.249142050743103, "learning_rate": 3.779610636512221e-08, "loss": 0.5293, "step": 6524 }, { "epoch": 0.9731906484208956, "grad_norm": 1.1504437923431396, "learning_rate": 3.737757725993496e-08, "loss": 0.6036, "step": 6525 }, { "epoch": 0.9733397964129908, "grad_norm": 1.2869045734405518, "learning_rate": 3.696137399283206e-08, "loss": 0.5217, "step": 6526 }, { "epoch": 0.9734889444050859, "grad_norm": 1.222469449043274, "learning_rate": 3.654749666098023e-08, "loss": 0.6191, "step": 6527 }, { "epoch": 0.9736380923971811, "grad_norm": 1.1093027591705322, "learning_rate": 3.613594536100107e-08, "loss": 0.552, "step": 6528 }, { "epoch": 0.9737872403892762, "grad_norm": 1.1982091665267944, "learning_rate": 3.5726720188974384e-08, "loss": 0.5408, "step": 6529 }, { "epoch": 0.9739363883813714, "grad_norm": 1.141862154006958, "learning_rate": 3.531982124043598e-08, "loss": 0.5793, "step": 6530 }, { "epoch": 0.9740855363734666, "grad_norm": 1.218837857246399, "learning_rate": 3.491524861037876e-08, "loss": 0.5776, "step": 6531 }, { "epoch": 0.9742346843655617, "grad_norm": 1.115351915359497, "learning_rate": 3.451300239325384e-08, "loss": 0.5692, "step": 6532 }, { "epoch": 0.9743838323576569, "grad_norm": 1.2518665790557861, "learning_rate": 3.41130826829672e-08, "loss": 0.6264, "step": 6533 }, { "epoch": 0.974532980349752, "grad_norm": 1.160880446434021, "learning_rate": 3.371548957288418e-08, "loss": 0.64, "step": 6534 }, { "epoch": 0.9746821283418472, "grad_norm": 1.1653993129730225, "learning_rate": 3.332022315582273e-08, "loss": 0.614, "step": 6535 }, { "epoch": 0.9748312763339424, "grad_norm": 1.1523946523666382, "learning_rate": 3.292728352406238e-08, "loss": 0.5062, "step": 6536 }, { "epoch": 0.9749804243260375, "grad_norm": 1.2423733472824097, "learning_rate": 3.253667076933753e-08, "loss": 0.5827, "step": 6537 }, { "epoch": 0.9751295723181327, "grad_norm": 1.239054560661316, "learning_rate": 3.214838498283857e-08, "loss": 0.5974, "step": 6538 }, { "epoch": 0.9752787203102278, "grad_norm": 1.1484235525131226, "learning_rate": 3.176242625521297e-08, "loss": 0.5138, "step": 6539 }, { "epoch": 0.975427868302323, "grad_norm": 1.1885241270065308, "learning_rate": 3.137879467656535e-08, "loss": 0.5728, "step": 6540 }, { "epoch": 0.9755770162944182, "grad_norm": 1.2175688743591309, "learning_rate": 3.099749033645738e-08, "loss": 0.5681, "step": 6541 }, { "epoch": 0.9757261642865133, "grad_norm": 1.2504844665527344, "learning_rate": 3.061851332390786e-08, "loss": 0.644, "step": 6542 }, { "epoch": 0.9758753122786085, "grad_norm": 1.0783989429473877, "learning_rate": 3.024186372738935e-08, "loss": 0.5537, "step": 6543 }, { "epoch": 0.9760244602707036, "grad_norm": 1.3090254068374634, "learning_rate": 2.986754163483485e-08, "loss": 0.6128, "step": 6544 }, { "epoch": 0.9761736082627988, "grad_norm": 1.0840861797332764, "learning_rate": 2.949554713363112e-08, "loss": 0.5472, "step": 6545 }, { "epoch": 0.976322756254894, "grad_norm": 1.1998718976974487, "learning_rate": 2.9125880310623132e-08, "loss": 0.566, "step": 6546 }, { "epoch": 0.976471904246989, "grad_norm": 1.2697629928588867, "learning_rate": 2.875854125211297e-08, "loss": 0.5836, "step": 6547 }, { "epoch": 0.9766210522390842, "grad_norm": 1.2036371231079102, "learning_rate": 2.8393530043856476e-08, "loss": 0.5559, "step": 6548 }, { "epoch": 0.9767702002311794, "grad_norm": 1.130122184753418, "learning_rate": 2.803084677106882e-08, "loss": 0.5237, "step": 6549 }, { "epoch": 0.9769193482232745, "grad_norm": 1.1274508237838745, "learning_rate": 2.767049151842005e-08, "loss": 0.5028, "step": 6550 }, { "epoch": 0.9770684962153697, "grad_norm": 1.1969759464263916, "learning_rate": 2.731246437003843e-08, "loss": 0.5949, "step": 6551 }, { "epoch": 0.9772176442074648, "grad_norm": 1.2015271186828613, "learning_rate": 2.695676540950709e-08, "loss": 0.5385, "step": 6552 }, { "epoch": 0.97736679219956, "grad_norm": 1.1509608030319214, "learning_rate": 2.660339471986739e-08, "loss": 0.5709, "step": 6553 }, { "epoch": 0.9775159401916552, "grad_norm": 1.2129554748535156, "learning_rate": 2.6252352383613346e-08, "loss": 0.6689, "step": 6554 }, { "epoch": 0.9776650881837503, "grad_norm": 1.0754669904708862, "learning_rate": 2.59036384827005e-08, "loss": 0.5766, "step": 6555 }, { "epoch": 0.9778142361758455, "grad_norm": 0.8398563265800476, "learning_rate": 2.555725309853818e-08, "loss": 0.6579, "step": 6556 }, { "epoch": 0.9779633841679406, "grad_norm": 1.2021374702453613, "learning_rate": 2.5213196311990595e-08, "loss": 0.6304, "step": 6557 }, { "epoch": 0.9781125321600358, "grad_norm": 1.212191104888916, "learning_rate": 2.4871468203382376e-08, "loss": 0.4952, "step": 6558 }, { "epoch": 0.978261680152131, "grad_norm": 1.2525053024291992, "learning_rate": 2.4532068852489708e-08, "loss": 0.6266, "step": 6559 }, { "epoch": 0.9784108281442261, "grad_norm": 1.2957078218460083, "learning_rate": 2.4194998338548103e-08, "loss": 0.5987, "step": 6560 }, { "epoch": 0.9785599761363213, "grad_norm": 1.2854996919631958, "learning_rate": 2.3860256740250166e-08, "loss": 0.5671, "step": 6561 }, { "epoch": 0.9787091241284164, "grad_norm": 1.211551547050476, "learning_rate": 2.352784413574227e-08, "loss": 0.5714, "step": 6562 }, { "epoch": 0.9788582721205116, "grad_norm": 1.2305408716201782, "learning_rate": 2.3197760602629015e-08, "loss": 0.6236, "step": 6563 }, { "epoch": 0.9790074201126068, "grad_norm": 1.1838093996047974, "learning_rate": 2.2870006217969864e-08, "loss": 0.5375, "step": 6564 }, { "epoch": 0.9791565681047019, "grad_norm": 1.1716768741607666, "learning_rate": 2.25445810582825e-08, "loss": 0.5483, "step": 6565 }, { "epoch": 0.9793057160967971, "grad_norm": 1.0900545120239258, "learning_rate": 2.2221485199537262e-08, "loss": 0.5017, "step": 6566 }, { "epoch": 0.9794548640888922, "grad_norm": 1.324167251586914, "learning_rate": 2.1900718717164927e-08, "loss": 0.6002, "step": 6567 }, { "epoch": 0.9796040120809874, "grad_norm": 1.1475260257720947, "learning_rate": 2.1582281686048924e-08, "loss": 0.5384, "step": 6568 }, { "epoch": 0.9797531600730826, "grad_norm": 1.2252428531646729, "learning_rate": 2.1266174180532006e-08, "loss": 0.5844, "step": 6569 }, { "epoch": 0.9799023080651776, "grad_norm": 1.248701572418213, "learning_rate": 2.09523962744107e-08, "loss": 0.631, "step": 6570 }, { "epoch": 0.9800514560572728, "grad_norm": 1.1857106685638428, "learning_rate": 2.0640948040937525e-08, "loss": 0.5152, "step": 6571 }, { "epoch": 0.980200604049368, "grad_norm": 1.2551850080490112, "learning_rate": 2.0331829552824313e-08, "loss": 0.587, "step": 6572 }, { "epoch": 0.9803497520414631, "grad_norm": 1.1681004762649536, "learning_rate": 2.0025040882234447e-08, "loss": 0.5885, "step": 6573 }, { "epoch": 0.9804989000335583, "grad_norm": 1.2201226949691772, "learning_rate": 1.9720582100791753e-08, "loss": 0.5912, "step": 6574 }, { "epoch": 0.9806480480256534, "grad_norm": 1.2447447776794434, "learning_rate": 1.94184532795727e-08, "loss": 0.5426, "step": 6575 }, { "epoch": 0.9807971960177486, "grad_norm": 0.8859454393386841, "learning_rate": 1.9118654489110877e-08, "loss": 0.6514, "step": 6576 }, { "epoch": 0.9809463440098438, "grad_norm": 1.0960830450057983, "learning_rate": 1.8821185799398067e-08, "loss": 0.5561, "step": 6577 }, { "epoch": 0.9810954920019389, "grad_norm": 1.2526311874389648, "learning_rate": 1.8526047279878723e-08, "loss": 0.5979, "step": 6578 }, { "epoch": 0.9812446399940341, "grad_norm": 1.159339427947998, "learning_rate": 1.8233238999454394e-08, "loss": 0.542, "step": 6579 }, { "epoch": 0.9813937879861292, "grad_norm": 1.2316699028015137, "learning_rate": 1.7942761026484845e-08, "loss": 0.6076, "step": 6580 }, { "epoch": 0.9815429359782244, "grad_norm": 1.2003765106201172, "learning_rate": 1.7654613428782498e-08, "loss": 0.61, "step": 6581 }, { "epoch": 0.9816920839703196, "grad_norm": 1.3457914590835571, "learning_rate": 1.7368796273617982e-08, "loss": 0.5479, "step": 6582 }, { "epoch": 0.9818412319624147, "grad_norm": 1.2575089931488037, "learning_rate": 1.708530962771793e-08, "loss": 0.6442, "step": 6583 }, { "epoch": 0.9819903799545099, "grad_norm": 1.1287294626235962, "learning_rate": 1.6804153557261615e-08, "loss": 0.5739, "step": 6584 }, { "epoch": 0.982139527946605, "grad_norm": 1.2983248233795166, "learning_rate": 1.652532812788987e-08, "loss": 0.6696, "step": 6585 }, { "epoch": 0.9822886759387002, "grad_norm": 1.256823182106018, "learning_rate": 1.6248833404692856e-08, "loss": 0.6431, "step": 6586 }, { "epoch": 0.9824378239307954, "grad_norm": 1.0925147533416748, "learning_rate": 1.597466945222337e-08, "loss": 0.5191, "step": 6587 }, { "epoch": 0.9825869719228905, "grad_norm": 1.1821168661117554, "learning_rate": 1.570283633448466e-08, "loss": 0.5833, "step": 6588 }, { "epoch": 0.9827361199149857, "grad_norm": 1.1979308128356934, "learning_rate": 1.5433334114938193e-08, "loss": 0.4595, "step": 6589 }, { "epoch": 0.9828852679070808, "grad_norm": 1.1546757221221924, "learning_rate": 1.516616285650141e-08, "loss": 0.5115, "step": 6590 }, { "epoch": 0.983034415899176, "grad_norm": 1.1311110258102417, "learning_rate": 1.4901322621547753e-08, "loss": 0.5087, "step": 6591 }, { "epoch": 0.9831835638912712, "grad_norm": 1.2163954973220825, "learning_rate": 1.4638813471904435e-08, "loss": 0.5749, "step": 6592 }, { "epoch": 0.9833327118833662, "grad_norm": 1.1228522062301636, "learning_rate": 1.4378635468855762e-08, "loss": 0.5338, "step": 6593 }, { "epoch": 0.9834818598754614, "grad_norm": 1.1969630718231201, "learning_rate": 1.412078867314426e-08, "loss": 0.5947, "step": 6594 }, { "epoch": 0.9836310078675565, "grad_norm": 1.2246018648147583, "learning_rate": 1.3865273144963998e-08, "loss": 0.5715, "step": 6595 }, { "epoch": 0.9837801558596517, "grad_norm": 1.2709825038909912, "learning_rate": 1.3612088943967262e-08, "loss": 0.5447, "step": 6596 }, { "epoch": 0.9839293038517469, "grad_norm": 1.1895861625671387, "learning_rate": 1.3361236129261212e-08, "loss": 0.6172, "step": 6597 }, { "epoch": 0.984078451843842, "grad_norm": 1.3606892824172974, "learning_rate": 1.3112714759409006e-08, "loss": 0.5497, "step": 6598 }, { "epoch": 0.9842275998359372, "grad_norm": 1.212048053741455, "learning_rate": 1.2866524892430898e-08, "loss": 0.6248, "step": 6599 }, { "epoch": 0.9843767478280324, "grad_norm": 1.3025577068328857, "learning_rate": 1.2622666585799803e-08, "loss": 0.612, "step": 6600 }, { "epoch": 0.9845258958201275, "grad_norm": 1.151227355003357, "learning_rate": 1.2381139896445737e-08, "loss": 0.5738, "step": 6601 }, { "epoch": 0.9846750438122227, "grad_norm": 1.16837739944458, "learning_rate": 1.2141944880756928e-08, "loss": 0.5571, "step": 6602 }, { "epoch": 0.9848241918043178, "grad_norm": 1.1036170721054077, "learning_rate": 1.1905081594573153e-08, "loss": 0.5069, "step": 6603 }, { "epoch": 0.984973339796413, "grad_norm": 1.2216612100601196, "learning_rate": 1.167055009319129e-08, "loss": 0.5012, "step": 6604 }, { "epoch": 0.9851224877885082, "grad_norm": 0.8763064742088318, "learning_rate": 1.143835043136643e-08, "loss": 0.5966, "step": 6605 }, { "epoch": 0.9852716357806033, "grad_norm": 1.0535417795181274, "learning_rate": 1.120848266330521e-08, "loss": 0.5238, "step": 6606 }, { "epoch": 0.9854207837726985, "grad_norm": 1.2174835205078125, "learning_rate": 1.098094684267137e-08, "loss": 0.5843, "step": 6607 }, { "epoch": 0.9855699317647936, "grad_norm": 1.139424443244934, "learning_rate": 1.0755743022585751e-08, "loss": 0.5717, "step": 6608 }, { "epoch": 0.9857190797568888, "grad_norm": 1.166476845741272, "learning_rate": 1.0532871255624077e-08, "loss": 0.5531, "step": 6609 }, { "epoch": 0.985868227748984, "grad_norm": 1.2817858457565308, "learning_rate": 1.0312331593815839e-08, "loss": 0.6265, "step": 6610 }, { "epoch": 0.9860173757410791, "grad_norm": 1.1556670665740967, "learning_rate": 1.0094124088648739e-08, "loss": 0.5191, "step": 6611 }, { "epoch": 0.9861665237331743, "grad_norm": 1.159109354019165, "learning_rate": 9.878248791063138e-09, "loss": 0.5619, "step": 6612 }, { "epoch": 0.9863156717252693, "grad_norm": 1.3074793815612793, "learning_rate": 9.664705751457615e-09, "loss": 0.6024, "step": 6613 }, { "epoch": 0.9864648197173645, "grad_norm": 1.111274003982544, "learning_rate": 9.45349501968451e-09, "loss": 0.5924, "step": 6614 }, { "epoch": 0.9866139677094597, "grad_norm": 1.276548147201538, "learning_rate": 9.244616645053272e-09, "loss": 0.5656, "step": 6615 }, { "epoch": 0.9867631157015548, "grad_norm": 1.3009188175201416, "learning_rate": 9.038070676328226e-09, "loss": 0.5874, "step": 6616 }, { "epoch": 0.98691226369365, "grad_norm": 1.1175528764724731, "learning_rate": 8.833857161726355e-09, "loss": 0.5265, "step": 6617 }, { "epoch": 0.9870614116857451, "grad_norm": 1.2899138927459717, "learning_rate": 8.631976148925081e-09, "loss": 0.578, "step": 6618 }, { "epoch": 0.9872105596778403, "grad_norm": 0.8846939206123352, "learning_rate": 8.432427685054479e-09, "loss": 0.6663, "step": 6619 }, { "epoch": 0.9873597076699355, "grad_norm": 1.1745070219039917, "learning_rate": 8.235211816699506e-09, "loss": 0.5696, "step": 6620 }, { "epoch": 0.9875088556620306, "grad_norm": 1.2033965587615967, "learning_rate": 8.040328589901114e-09, "loss": 0.6385, "step": 6621 }, { "epoch": 0.9876580036541258, "grad_norm": 1.1253753900527954, "learning_rate": 7.847778050157351e-09, "loss": 0.5275, "step": 6622 }, { "epoch": 0.987807151646221, "grad_norm": 0.8444572687149048, "learning_rate": 7.657560242420037e-09, "loss": 0.6438, "step": 6623 }, { "epoch": 0.9879562996383161, "grad_norm": 0.8965653777122498, "learning_rate": 7.469675211096983e-09, "loss": 0.6713, "step": 6624 }, { "epoch": 0.9881054476304113, "grad_norm": 1.206716537475586, "learning_rate": 7.28412300004977e-09, "loss": 0.6005, "step": 6625 }, { "epoch": 0.9882545956225064, "grad_norm": 1.298237919807434, "learning_rate": 7.10090365259819e-09, "loss": 0.5089, "step": 6626 }, { "epoch": 0.9884037436146016, "grad_norm": 0.8206361532211304, "learning_rate": 6.920017211515806e-09, "loss": 0.6221, "step": 6627 }, { "epoch": 0.9885528916066968, "grad_norm": 1.3229386806488037, "learning_rate": 6.7414637190310605e-09, "loss": 0.6291, "step": 6628 }, { "epoch": 0.9887020395987919, "grad_norm": 1.2413108348846436, "learning_rate": 6.5652432168283875e-09, "loss": 0.5984, "step": 6629 }, { "epoch": 0.9888511875908871, "grad_norm": 1.199458122253418, "learning_rate": 6.391355746048211e-09, "loss": 0.5418, "step": 6630 }, { "epoch": 0.9890003355829822, "grad_norm": 1.2075936794281006, "learning_rate": 6.219801347285837e-09, "loss": 0.5511, "step": 6631 }, { "epoch": 0.9891494835750774, "grad_norm": 1.3065435886383057, "learning_rate": 6.050580060590339e-09, "loss": 0.5926, "step": 6632 }, { "epoch": 0.9892986315671726, "grad_norm": 1.1716042757034302, "learning_rate": 5.883691925469004e-09, "loss": 0.632, "step": 6633 }, { "epoch": 0.9894477795592677, "grad_norm": 1.0528700351715088, "learning_rate": 5.719136980882889e-09, "loss": 0.4862, "step": 6634 }, { "epoch": 0.9895969275513629, "grad_norm": 1.239279866218567, "learning_rate": 5.556915265247931e-09, "loss": 0.5613, "step": 6635 }, { "epoch": 0.989746075543458, "grad_norm": 1.160166621208191, "learning_rate": 5.397026816434947e-09, "loss": 0.5903, "step": 6636 }, { "epoch": 0.9898952235355531, "grad_norm": 1.3308144807815552, "learning_rate": 5.239471671772967e-09, "loss": 0.5665, "step": 6637 }, { "epoch": 0.9900443715276483, "grad_norm": 1.284734845161438, "learning_rate": 5.084249868042568e-09, "loss": 0.5622, "step": 6638 }, { "epoch": 0.9901935195197434, "grad_norm": 1.2847721576690674, "learning_rate": 4.93136144148143e-09, "loss": 0.5966, "step": 6639 }, { "epoch": 0.9903426675118386, "grad_norm": 1.1532225608825684, "learning_rate": 4.780806427783224e-09, "loss": 0.5807, "step": 6640 }, { "epoch": 0.9904918155039337, "grad_norm": 1.1596792936325073, "learning_rate": 4.632584862095391e-09, "loss": 0.5906, "step": 6641 }, { "epoch": 0.9906409634960289, "grad_norm": 1.1359246969223022, "learning_rate": 4.48669677902025e-09, "loss": 0.5274, "step": 6642 }, { "epoch": 0.9907901114881241, "grad_norm": 1.2480393648147583, "learning_rate": 4.3431422126183344e-09, "loss": 0.6325, "step": 6643 }, { "epoch": 0.9909392594802192, "grad_norm": 1.1227396726608276, "learning_rate": 4.201921196402836e-09, "loss": 0.5656, "step": 6644 }, { "epoch": 0.9910884074723144, "grad_norm": 1.159265160560608, "learning_rate": 4.063033763341828e-09, "loss": 0.5655, "step": 6645 }, { "epoch": 0.9912375554644096, "grad_norm": 0.9453413486480713, "learning_rate": 3.9264799458593736e-09, "loss": 0.6841, "step": 6646 }, { "epoch": 0.9913867034565047, "grad_norm": 1.3104884624481201, "learning_rate": 3.7922597758355275e-09, "loss": 0.5902, "step": 6647 }, { "epoch": 0.9915358514485999, "grad_norm": 1.3067090511322021, "learning_rate": 3.660373284605223e-09, "loss": 0.6112, "step": 6648 }, { "epoch": 0.991684999440695, "grad_norm": 1.2742854356765747, "learning_rate": 3.5308205029571663e-09, "loss": 0.6816, "step": 6649 }, { "epoch": 0.9918341474327902, "grad_norm": 1.2596343755722046, "learning_rate": 3.4036014611371624e-09, "loss": 0.5934, "step": 6650 }, { "epoch": 0.9919832954248854, "grad_norm": 1.1130825281143188, "learning_rate": 3.2787161888447883e-09, "loss": 0.5134, "step": 6651 }, { "epoch": 0.9921324434169805, "grad_norm": 1.099948763847351, "learning_rate": 3.15616471523561e-09, "loss": 0.5872, "step": 6652 }, { "epoch": 0.9922815914090757, "grad_norm": 1.2406448125839233, "learning_rate": 3.035947068920075e-09, "loss": 0.5595, "step": 6653 }, { "epoch": 0.9924307394011708, "grad_norm": 1.3228740692138672, "learning_rate": 2.9180632779624017e-09, "loss": 0.5782, "step": 6654 }, { "epoch": 0.992579887393266, "grad_norm": 1.099594235420227, "learning_rate": 2.8025133698861282e-09, "loss": 0.5405, "step": 6655 }, { "epoch": 0.9927290353853612, "grad_norm": 1.203151822090149, "learning_rate": 2.6892973716641235e-09, "loss": 0.5158, "step": 6656 }, { "epoch": 0.9928781833774563, "grad_norm": 1.0953474044799805, "learning_rate": 2.578415309729687e-09, "loss": 0.5391, "step": 6657 }, { "epoch": 0.9930273313695515, "grad_norm": 1.1842750310897827, "learning_rate": 2.469867209967669e-09, "loss": 0.5766, "step": 6658 }, { "epoch": 0.9931764793616465, "grad_norm": 1.2910715341567993, "learning_rate": 2.36365309772002e-09, "loss": 0.5595, "step": 6659 }, { "epoch": 0.9933256273537417, "grad_norm": 1.2997592687606812, "learning_rate": 2.259772997782461e-09, "loss": 0.5567, "step": 6660 }, { "epoch": 0.993474775345837, "grad_norm": 1.1572134494781494, "learning_rate": 2.1582269344067043e-09, "loss": 0.607, "step": 6661 }, { "epoch": 0.993623923337932, "grad_norm": 1.0985233783721924, "learning_rate": 2.0590149312993412e-09, "loss": 0.5481, "step": 6662 }, { "epoch": 0.9937730713300272, "grad_norm": 1.2184321880340576, "learning_rate": 1.9621370116218453e-09, "loss": 0.5258, "step": 6663 }, { "epoch": 0.9939222193221223, "grad_norm": 1.0965322256088257, "learning_rate": 1.8675931979916794e-09, "loss": 0.5175, "step": 6664 }, { "epoch": 0.9940713673142175, "grad_norm": 1.2909703254699707, "learning_rate": 1.7753835124800778e-09, "loss": 0.6305, "step": 6665 }, { "epoch": 0.9942205153063127, "grad_norm": 1.2392572164535522, "learning_rate": 1.6855079766142646e-09, "loss": 0.5645, "step": 6666 }, { "epoch": 0.9943696632984078, "grad_norm": 1.1733546257019043, "learning_rate": 1.5979666113763448e-09, "loss": 0.5869, "step": 6667 }, { "epoch": 0.994518811290503, "grad_norm": 1.1147795915603638, "learning_rate": 1.5127594372033038e-09, "loss": 0.5076, "step": 6668 }, { "epoch": 0.9946679592825982, "grad_norm": 1.2892425060272217, "learning_rate": 1.4298864739870078e-09, "loss": 0.6466, "step": 6669 }, { "epoch": 0.9948171072746933, "grad_norm": 1.3020681142807007, "learning_rate": 1.349347741075313e-09, "loss": 0.5689, "step": 6670 }, { "epoch": 0.9949662552667885, "grad_norm": 1.130197286605835, "learning_rate": 1.2711432572698468e-09, "loss": 0.5715, "step": 6671 }, { "epoch": 0.9951154032588836, "grad_norm": 1.2509516477584839, "learning_rate": 1.1952730408282264e-09, "loss": 0.5976, "step": 6672 }, { "epoch": 0.9952645512509788, "grad_norm": 1.2130733728408813, "learning_rate": 1.12173710946184e-09, "loss": 0.6233, "step": 6673 }, { "epoch": 0.995413699243074, "grad_norm": 1.1466618776321411, "learning_rate": 1.0505354803402867e-09, "loss": 0.6211, "step": 6674 }, { "epoch": 0.9955628472351691, "grad_norm": 1.1393154859542847, "learning_rate": 9.816681700847152e-10, "loss": 0.5638, "step": 6675 }, { "epoch": 0.9957119952272643, "grad_norm": 1.1041783094406128, "learning_rate": 9.151351947722653e-10, "loss": 0.5879, "step": 6676 }, { "epoch": 0.9958611432193594, "grad_norm": 1.2351653575897217, "learning_rate": 8.509365699360672e-10, "loss": 0.5562, "step": 6677 }, { "epoch": 0.9960102912114546, "grad_norm": 1.1901352405548096, "learning_rate": 7.890723105641318e-10, "loss": 0.6491, "step": 6678 }, { "epoch": 0.9961594392035498, "grad_norm": 1.1448229551315308, "learning_rate": 7.295424310982402e-10, "loss": 0.5383, "step": 6679 }, { "epoch": 0.9963085871956449, "grad_norm": 1.3348523378372192, "learning_rate": 6.723469454372744e-10, "loss": 0.5713, "step": 6680 }, { "epoch": 0.9964577351877401, "grad_norm": 1.1552866697311401, "learning_rate": 6.174858669316664e-10, "loss": 0.5558, "step": 6681 }, { "epoch": 0.9966068831798351, "grad_norm": 1.4325863122940063, "learning_rate": 5.649592083911693e-10, "loss": 0.6189, "step": 6682 }, { "epoch": 0.9967560311719303, "grad_norm": 1.2631235122680664, "learning_rate": 5.147669820770861e-10, "loss": 0.6533, "step": 6683 }, { "epoch": 0.9969051791640255, "grad_norm": 1.2138139009475708, "learning_rate": 4.669091997078212e-10, "loss": 0.537, "step": 6684 }, { "epoch": 0.9970543271561206, "grad_norm": 1.155671238899231, "learning_rate": 4.2138587245665883e-10, "loss": 0.5743, "step": 6685 }, { "epoch": 0.9972034751482158, "grad_norm": 1.231107234954834, "learning_rate": 3.7819701094954365e-10, "loss": 0.572, "step": 6686 }, { "epoch": 0.9973526231403109, "grad_norm": 1.0686383247375488, "learning_rate": 3.373426252706313e-10, "loss": 0.5423, "step": 6687 }, { "epoch": 0.9975017711324061, "grad_norm": 1.1893494129180908, "learning_rate": 2.988227249578479e-10, "loss": 0.6117, "step": 6688 }, { "epoch": 0.9976509191245013, "grad_norm": 1.2024356126785278, "learning_rate": 2.626373190028897e-10, "loss": 0.5823, "step": 6689 }, { "epoch": 0.9978000671165964, "grad_norm": 1.0887446403503418, "learning_rate": 2.2878641585455385e-10, "loss": 0.5327, "step": 6690 }, { "epoch": 0.9979492151086916, "grad_norm": 1.1717685461044312, "learning_rate": 1.9727002341429768e-10, "loss": 0.5686, "step": 6691 }, { "epoch": 0.9980983631007867, "grad_norm": 1.1688576936721802, "learning_rate": 1.680881490406794e-10, "loss": 0.5512, "step": 6692 }, { "epoch": 0.9982475110928819, "grad_norm": 1.1835929155349731, "learning_rate": 1.4124079954602743e-10, "loss": 0.5694, "step": 6693 }, { "epoch": 0.9983966590849771, "grad_norm": 1.1423912048339844, "learning_rate": 1.167279811975508e-10, "loss": 0.551, "step": 6694 }, { "epoch": 0.9985458070770722, "grad_norm": 1.2141412496566772, "learning_rate": 9.454969971955941e-11, "loss": 0.5649, "step": 6695 }, { "epoch": 0.9986949550691674, "grad_norm": 1.1514286994934082, "learning_rate": 7.470596028902321e-11, "loss": 0.5778, "step": 6696 }, { "epoch": 0.9988441030612626, "grad_norm": 1.3197903633117676, "learning_rate": 5.719676753668246e-11, "loss": 0.5928, "step": 6697 }, { "epoch": 0.9989932510533577, "grad_norm": 1.2637964487075806, "learning_rate": 4.202212555259877e-11, "loss": 0.5561, "step": 6698 }, { "epoch": 0.9991423990454529, "grad_norm": 1.1150416135787964, "learning_rate": 2.9182037879493805e-11, "loss": 0.6075, "step": 6699 }, { "epoch": 0.999291547037548, "grad_norm": 1.220365285873413, "learning_rate": 1.8676507512749297e-11, "loss": 0.5563, "step": 6700 }, { "epoch": 0.9994406950296432, "grad_norm": 1.2233799695968628, "learning_rate": 1.0505536907068347e-11, "loss": 0.5967, "step": 6701 }, { "epoch": 0.9995898430217384, "grad_norm": 1.179775357246399, "learning_rate": 4.6691279687038905e-12, "loss": 0.5582, "step": 6702 }, { "epoch": 0.9997389910138335, "grad_norm": 1.1228342056274414, "learning_rate": 1.167282059899577e-12, "loss": 0.4981, "step": 6703 }, { "epoch": 0.9998881390059287, "grad_norm": 1.1061762571334839, "learning_rate": 0.0, "loss": 0.502, "step": 6704 }, { "epoch": 0.9998881390059287, "step": 6704, "total_flos": 2.3027706492543304e+19, "train_loss": 0.6291037106748697, "train_runtime": 58318.9299, "train_samples_per_second": 14.716, "train_steps_per_second": 0.115 } ], "logging_steps": 1.0, "max_steps": 6704, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 6000, "total_flos": 2.3027706492543304e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }