{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3564, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016835016835016834, "grad_norm": 11.373913764953613, "learning_rate": 1.1173184357541899e-08, "loss": 1.7052544355392456, "step": 2 }, { "epoch": 0.003367003367003367, "grad_norm": 7.921729564666748, "learning_rate": 3.3519553072625695e-08, "loss": 1.2431142330169678, "step": 4 }, { "epoch": 0.005050505050505051, "grad_norm": 10.424066543579102, "learning_rate": 5.586592178770949e-08, "loss": 1.6871027946472168, "step": 6 }, { "epoch": 0.006734006734006734, "grad_norm": 12.496847152709961, "learning_rate": 7.82122905027933e-08, "loss": 1.605969786643982, "step": 8 }, { "epoch": 0.008417508417508417, "grad_norm": 41.12433624267578, "learning_rate": 1.005586592178771e-07, "loss": 4.507528781890869, "step": 10 }, { "epoch": 0.010101010101010102, "grad_norm": 5.525810718536377, "learning_rate": 1.2290502793296089e-07, "loss": 1.978538990020752, "step": 12 }, { "epoch": 0.011784511784511785, "grad_norm": 5.537006855010986, "learning_rate": 1.452513966480447e-07, "loss": 1.6731345653533936, "step": 14 }, { "epoch": 0.013468013468013467, "grad_norm": 5.548595905303955, "learning_rate": 1.6759776536312846e-07, "loss": 1.6586148738861084, "step": 16 }, { "epoch": 0.015151515151515152, "grad_norm": 30.940366744995117, "learning_rate": 1.8994413407821228e-07, "loss": 2.7178502082824707, "step": 18 }, { "epoch": 0.016835016835016835, "grad_norm": 13.579764366149902, "learning_rate": 2.122905027932961e-07, "loss": 1.946686029434204, "step": 20 }, { "epoch": 0.018518518518518517, "grad_norm": 27.936044692993164, "learning_rate": 2.3463687150837988e-07, "loss": 1.845343828201294, "step": 22 }, { "epoch": 0.020202020202020204, "grad_norm": 15.411078453063965, "learning_rate": 2.5698324022346367e-07, "loss": 3.498995780944824, "step": 24 }, { "epoch": 0.021885521885521887, "grad_norm": 14.533377647399902, "learning_rate": 2.7932960893854745e-07, "loss": 2.7915127277374268, "step": 26 }, { "epoch": 0.02356902356902357, "grad_norm": 14.302261352539062, "learning_rate": 3.016759776536313e-07, "loss": 2.1403520107269287, "step": 28 }, { "epoch": 0.025252525252525252, "grad_norm": 128.7482147216797, "learning_rate": 3.240223463687151e-07, "loss": 3.224329948425293, "step": 30 }, { "epoch": 0.026936026936026935, "grad_norm": 19.047321319580078, "learning_rate": 3.4636871508379887e-07, "loss": 1.7786729335784912, "step": 32 }, { "epoch": 0.02861952861952862, "grad_norm": 34.735931396484375, "learning_rate": 3.6871508379888266e-07, "loss": 3.5843183994293213, "step": 34 }, { "epoch": 0.030303030303030304, "grad_norm": 14.215960502624512, "learning_rate": 3.9106145251396645e-07, "loss": 1.935349464416504, "step": 36 }, { "epoch": 0.03198653198653199, "grad_norm": 19.77899742126465, "learning_rate": 4.134078212290503e-07, "loss": 1.4627070426940918, "step": 38 }, { "epoch": 0.03367003367003367, "grad_norm": 11.175507545471191, "learning_rate": 4.35754189944134e-07, "loss": 2.240666389465332, "step": 40 }, { "epoch": 0.03535353535353535, "grad_norm": 12.57774543762207, "learning_rate": 4.5810055865921786e-07, "loss": 2.6894779205322266, "step": 42 }, { "epoch": 0.037037037037037035, "grad_norm": 10.773889541625977, "learning_rate": 4.804469273743016e-07, "loss": 2.1831870079040527, "step": 44 }, { "epoch": 0.03872053872053872, "grad_norm": 19.548690795898438, "learning_rate": 5.027932960893855e-07, "loss": 1.439545750617981, "step": 46 }, { "epoch": 0.04040404040404041, "grad_norm": 36.42060089111328, "learning_rate": 5.251396648044693e-07, "loss": 2.12111759185791, "step": 48 }, { "epoch": 0.04208754208754209, "grad_norm": 21.412006378173828, "learning_rate": 5.474860335195531e-07, "loss": 1.8557084798812866, "step": 50 }, { "epoch": 0.04377104377104377, "grad_norm": 34.71672821044922, "learning_rate": 5.698324022346367e-07, "loss": 1.9305258989334106, "step": 52 }, { "epoch": 0.045454545454545456, "grad_norm": 9.617766380310059, "learning_rate": 5.921787709497206e-07, "loss": 1.8030924797058105, "step": 54 }, { "epoch": 0.04713804713804714, "grad_norm": 7.223271369934082, "learning_rate": 6.145251396648044e-07, "loss": 1.5451292991638184, "step": 56 }, { "epoch": 0.04882154882154882, "grad_norm": 23.102691650390625, "learning_rate": 6.368715083798882e-07, "loss": 1.7897560596466064, "step": 58 }, { "epoch": 0.050505050505050504, "grad_norm": 5.478814125061035, "learning_rate": 6.59217877094972e-07, "loss": 1.5537652969360352, "step": 60 }, { "epoch": 0.05218855218855219, "grad_norm": 24.925573348999023, "learning_rate": 6.815642458100558e-07, "loss": 1.5619089603424072, "step": 62 }, { "epoch": 0.05387205387205387, "grad_norm": 6.516916751861572, "learning_rate": 7.039106145251397e-07, "loss": 1.3759074211120605, "step": 64 }, { "epoch": 0.05555555555555555, "grad_norm": 15.737017631530762, "learning_rate": 7.262569832402235e-07, "loss": 1.7463831901550293, "step": 66 }, { "epoch": 0.05723905723905724, "grad_norm": 8.381811141967773, "learning_rate": 7.486033519553073e-07, "loss": 1.5691605806350708, "step": 68 }, { "epoch": 0.058922558922558925, "grad_norm": 4.55023193359375, "learning_rate": 7.709497206703909e-07, "loss": 1.477018117904663, "step": 70 }, { "epoch": 0.06060606060606061, "grad_norm": 10.475610733032227, "learning_rate": 7.932960893854748e-07, "loss": 1.7785859107971191, "step": 72 }, { "epoch": 0.06228956228956229, "grad_norm": 3.722770929336548, "learning_rate": 8.156424581005586e-07, "loss": 1.5958442687988281, "step": 74 }, { "epoch": 0.06397306397306397, "grad_norm": 10.343371391296387, "learning_rate": 8.379888268156424e-07, "loss": 1.0664887428283691, "step": 76 }, { "epoch": 0.06565656565656566, "grad_norm": 4.329504489898682, "learning_rate": 8.603351955307262e-07, "loss": 1.624394178390503, "step": 78 }, { "epoch": 0.06734006734006734, "grad_norm": 3.1892311573028564, "learning_rate": 8.8268156424581e-07, "loss": 1.4226157665252686, "step": 80 }, { "epoch": 0.06902356902356903, "grad_norm": 4.654183387756348, "learning_rate": 9.050279329608939e-07, "loss": 1.5613698959350586, "step": 82 }, { "epoch": 0.0707070707070707, "grad_norm": 11.606276512145996, "learning_rate": 9.273743016759777e-07, "loss": 1.3501516580581665, "step": 84 }, { "epoch": 0.0723905723905724, "grad_norm": 16.68027114868164, "learning_rate": 9.497206703910615e-07, "loss": 1.4939382076263428, "step": 86 }, { "epoch": 0.07407407407407407, "grad_norm": 6.979609489440918, "learning_rate": 9.720670391061452e-07, "loss": 1.3073322772979736, "step": 88 }, { "epoch": 0.07575757575757576, "grad_norm": 14.462837219238281, "learning_rate": 9.94413407821229e-07, "loss": 1.4457292556762695, "step": 90 }, { "epoch": 0.07744107744107744, "grad_norm": 28.974502563476562, "learning_rate": 1.0167597765363128e-06, "loss": 1.1620988845825195, "step": 92 }, { "epoch": 0.07912457912457913, "grad_norm": 4.1516923904418945, "learning_rate": 1.0391061452513965e-06, "loss": 1.238929271697998, "step": 94 }, { "epoch": 0.08080808080808081, "grad_norm": 4.485579490661621, "learning_rate": 1.0614525139664804e-06, "loss": 1.2894108295440674, "step": 96 }, { "epoch": 0.08249158249158249, "grad_norm": 14.860766410827637, "learning_rate": 1.0837988826815643e-06, "loss": 1.233134150505066, "step": 98 }, { "epoch": 0.08417508417508418, "grad_norm": 3.1615281105041504, "learning_rate": 1.106145251396648e-06, "loss": 1.21131432056427, "step": 100 }, { "epoch": 0.08585858585858586, "grad_norm": 6.936952114105225, "learning_rate": 1.1284916201117319e-06, "loss": 1.0955811738967896, "step": 102 }, { "epoch": 0.08754208754208755, "grad_norm": 4.402707576751709, "learning_rate": 1.1508379888268155e-06, "loss": 0.8408428430557251, "step": 104 }, { "epoch": 0.08922558922558922, "grad_norm": 26.573022842407227, "learning_rate": 1.1731843575418994e-06, "loss": 0.9416179656982422, "step": 106 }, { "epoch": 0.09090909090909091, "grad_norm": 3.3700878620147705, "learning_rate": 1.1955307262569831e-06, "loss": 1.2525057792663574, "step": 108 }, { "epoch": 0.09259259259259259, "grad_norm": 6.59627103805542, "learning_rate": 1.217877094972067e-06, "loss": 0.8143967986106873, "step": 110 }, { "epoch": 0.09427609427609428, "grad_norm": 6.249094486236572, "learning_rate": 1.2402234636871507e-06, "loss": 1.253350853919983, "step": 112 }, { "epoch": 0.09595959595959595, "grad_norm": 14.433463096618652, "learning_rate": 1.2625698324022344e-06, "loss": 1.0653541088104248, "step": 114 }, { "epoch": 0.09764309764309764, "grad_norm": 3.963433265686035, "learning_rate": 1.2849162011173185e-06, "loss": 0.7410316467285156, "step": 116 }, { "epoch": 0.09932659932659933, "grad_norm": 3.3435471057891846, "learning_rate": 1.3072625698324022e-06, "loss": 1.144932746887207, "step": 118 }, { "epoch": 0.10101010101010101, "grad_norm": 3.1995925903320312, "learning_rate": 1.329608938547486e-06, "loss": 0.9971737861633301, "step": 120 }, { "epoch": 0.1026936026936027, "grad_norm": 4.980305194854736, "learning_rate": 1.3519553072625697e-06, "loss": 1.1758251190185547, "step": 122 }, { "epoch": 0.10437710437710437, "grad_norm": 6.944910049438477, "learning_rate": 1.3743016759776536e-06, "loss": 0.8010753393173218, "step": 124 }, { "epoch": 0.10606060606060606, "grad_norm": 5.405940055847168, "learning_rate": 1.3966480446927373e-06, "loss": 0.708318829536438, "step": 126 }, { "epoch": 0.10774410774410774, "grad_norm": 24.046825408935547, "learning_rate": 1.4189944134078212e-06, "loss": 1.0953171253204346, "step": 128 }, { "epoch": 0.10942760942760943, "grad_norm": 9.63823127746582, "learning_rate": 1.441340782122905e-06, "loss": 0.9741929173469543, "step": 130 }, { "epoch": 0.1111111111111111, "grad_norm": 15.6827974319458, "learning_rate": 1.4636871508379886e-06, "loss": 0.9290119409561157, "step": 132 }, { "epoch": 0.1127946127946128, "grad_norm": 4.126307010650635, "learning_rate": 1.4860335195530727e-06, "loss": 1.1921985149383545, "step": 134 }, { "epoch": 0.11447811447811448, "grad_norm": 26.01188087463379, "learning_rate": 1.5083798882681564e-06, "loss": 1.1901376247406006, "step": 136 }, { "epoch": 0.11616161616161616, "grad_norm": 4.109427452087402, "learning_rate": 1.5307262569832403e-06, "loss": 1.5455617904663086, "step": 138 }, { "epoch": 0.11784511784511785, "grad_norm": 5.346724987030029, "learning_rate": 1.553072625698324e-06, "loss": 1.1195063591003418, "step": 140 }, { "epoch": 0.11952861952861953, "grad_norm": 4.396357536315918, "learning_rate": 1.5754189944134078e-06, "loss": 1.060058832168579, "step": 142 }, { "epoch": 0.12121212121212122, "grad_norm": 102.16704559326172, "learning_rate": 1.5977653631284915e-06, "loss": 0.9710292816162109, "step": 144 }, { "epoch": 0.12289562289562289, "grad_norm": 3.3568778038024902, "learning_rate": 1.6201117318435752e-06, "loss": 1.1372500658035278, "step": 146 }, { "epoch": 0.12457912457912458, "grad_norm": 4.527273178100586, "learning_rate": 1.642458100558659e-06, "loss": 0.991235077381134, "step": 148 }, { "epoch": 0.12626262626262627, "grad_norm": 26.466514587402344, "learning_rate": 1.6648044692737428e-06, "loss": 1.206244945526123, "step": 150 }, { "epoch": 0.12794612794612795, "grad_norm": 14.26403522491455, "learning_rate": 1.6871508379888269e-06, "loss": 0.972631573677063, "step": 152 }, { "epoch": 0.12962962962962962, "grad_norm": 22.82804298400879, "learning_rate": 1.7094972067039106e-06, "loss": 1.2080013751983643, "step": 154 }, { "epoch": 0.13131313131313133, "grad_norm": 15.690414428710938, "learning_rate": 1.7318435754189945e-06, "loss": 1.0757750272750854, "step": 156 }, { "epoch": 0.132996632996633, "grad_norm": 4.205806732177734, "learning_rate": 1.7541899441340781e-06, "loss": 1.054223656654358, "step": 158 }, { "epoch": 0.13468013468013468, "grad_norm": 5.085096836090088, "learning_rate": 1.776536312849162e-06, "loss": 1.1487317085266113, "step": 160 }, { "epoch": 0.13636363636363635, "grad_norm": 2.9503731727600098, "learning_rate": 1.7988826815642457e-06, "loss": 1.0323597192764282, "step": 162 }, { "epoch": 0.13804713804713806, "grad_norm": 15.863119125366211, "learning_rate": 1.8212290502793294e-06, "loss": 0.9738507866859436, "step": 164 }, { "epoch": 0.13973063973063973, "grad_norm": 8.238275527954102, "learning_rate": 1.8435754189944133e-06, "loss": 0.9224099516868591, "step": 166 }, { "epoch": 0.1414141414141414, "grad_norm": 3.9588847160339355, "learning_rate": 1.865921787709497e-06, "loss": 1.1515543460845947, "step": 168 }, { "epoch": 0.14309764309764308, "grad_norm": 8.48491382598877, "learning_rate": 1.8882681564245809e-06, "loss": 1.013866662979126, "step": 170 }, { "epoch": 0.1447811447811448, "grad_norm": 13.511845588684082, "learning_rate": 1.9106145251396648e-06, "loss": 1.0169274806976318, "step": 172 }, { "epoch": 0.14646464646464646, "grad_norm": 3.410078287124634, "learning_rate": 1.9329608938547484e-06, "loss": 0.7148650288581848, "step": 174 }, { "epoch": 0.14814814814814814, "grad_norm": 6.603743553161621, "learning_rate": 1.9553072625698325e-06, "loss": 1.0830409526824951, "step": 176 }, { "epoch": 0.14983164983164984, "grad_norm": 3.631049871444702, "learning_rate": 1.9776536312849162e-06, "loss": 1.2707182168960571, "step": 178 }, { "epoch": 0.15151515151515152, "grad_norm": 4.926392555236816, "learning_rate": 2e-06, "loss": 1.0230215787887573, "step": 180 }, { "epoch": 0.1531986531986532, "grad_norm": 3.419835329055786, "learning_rate": 1.9999984495606584e-06, "loss": 1.4019644260406494, "step": 182 }, { "epoch": 0.15488215488215487, "grad_norm": 91.24452209472656, "learning_rate": 1.999993798247977e-06, "loss": 0.9929481744766235, "step": 184 }, { "epoch": 0.15656565656565657, "grad_norm": 11.83352279663086, "learning_rate": 1.99998604607798e-06, "loss": 1.1366297006607056, "step": 186 }, { "epoch": 0.15824915824915825, "grad_norm": 8.025732040405273, "learning_rate": 1.9999751930773778e-06, "loss": 0.9216547608375549, "step": 188 }, { "epoch": 0.15993265993265993, "grad_norm": 23.363126754760742, "learning_rate": 1.999961239283563e-06, "loss": 0.7902772426605225, "step": 190 }, { "epoch": 0.16161616161616163, "grad_norm": 5.2719621658325195, "learning_rate": 1.999944184744613e-06, "loss": 1.3453216552734375, "step": 192 }, { "epoch": 0.1632996632996633, "grad_norm": 12.32300090789795, "learning_rate": 1.999924029519287e-06, "loss": 1.2495100498199463, "step": 194 }, { "epoch": 0.16498316498316498, "grad_norm": 3.889246940612793, "learning_rate": 1.9999007736770295e-06, "loss": 1.0745317935943604, "step": 196 }, { "epoch": 0.16666666666666666, "grad_norm": 3.305817127227783, "learning_rate": 1.9998744172979654e-06, "loss": 1.173724889755249, "step": 198 }, { "epoch": 0.16835016835016836, "grad_norm": 17.490114212036133, "learning_rate": 1.9998449604729044e-06, "loss": 0.8745306730270386, "step": 200 }, { "epoch": 0.17003367003367004, "grad_norm": 23.266372680664062, "learning_rate": 1.9998124033033366e-06, "loss": 0.8984509706497192, "step": 202 }, { "epoch": 0.1717171717171717, "grad_norm": 3.1825051307678223, "learning_rate": 1.9997767459014363e-06, "loss": 1.029420018196106, "step": 204 }, { "epoch": 0.1734006734006734, "grad_norm": 6.860640048980713, "learning_rate": 1.9997379883900572e-06, "loss": 1.0008872747421265, "step": 206 }, { "epoch": 0.1750841750841751, "grad_norm": 3.333308458328247, "learning_rate": 1.999696130902736e-06, "loss": 1.2087559700012207, "step": 208 }, { "epoch": 0.17676767676767677, "grad_norm": 14.786520957946777, "learning_rate": 1.9996511735836895e-06, "loss": 0.7541635036468506, "step": 210 }, { "epoch": 0.17845117845117844, "grad_norm": 12.577203750610352, "learning_rate": 1.999603116587814e-06, "loss": 0.8843977451324463, "step": 212 }, { "epoch": 0.18013468013468015, "grad_norm": 3.1734931468963623, "learning_rate": 1.9995519600806863e-06, "loss": 1.3309192657470703, "step": 214 }, { "epoch": 0.18181818181818182, "grad_norm": 3.05130672454834, "learning_rate": 1.999497704238562e-06, "loss": 0.837327241897583, "step": 216 }, { "epoch": 0.1835016835016835, "grad_norm": 17.063798904418945, "learning_rate": 1.9994403492483755e-06, "loss": 0.6734769344329834, "step": 218 }, { "epoch": 0.18518518518518517, "grad_norm": 3.6013314723968506, "learning_rate": 1.999379895307739e-06, "loss": 1.3345911502838135, "step": 220 }, { "epoch": 0.18686868686868688, "grad_norm": 12.002358436584473, "learning_rate": 1.999316342624941e-06, "loss": 0.9222342371940613, "step": 222 }, { "epoch": 0.18855218855218855, "grad_norm": 6.592970848083496, "learning_rate": 1.999249691418948e-06, "loss": 1.2030017375946045, "step": 224 }, { "epoch": 0.19023569023569023, "grad_norm": 5.709416389465332, "learning_rate": 1.999179941919401e-06, "loss": 0.9448881149291992, "step": 226 }, { "epoch": 0.1919191919191919, "grad_norm": 15.942882537841797, "learning_rate": 1.999107094366617e-06, "loss": 1.201434850692749, "step": 228 }, { "epoch": 0.1936026936026936, "grad_norm": 7.546412467956543, "learning_rate": 1.9990311490115858e-06, "loss": 1.3325837850570679, "step": 230 }, { "epoch": 0.19528619528619529, "grad_norm": 8.624425888061523, "learning_rate": 1.9989521061159715e-06, "loss": 1.0627577304840088, "step": 232 }, { "epoch": 0.19696969696969696, "grad_norm": 205.3386688232422, "learning_rate": 1.9988699659521098e-06, "loss": 1.1965469121932983, "step": 234 }, { "epoch": 0.19865319865319866, "grad_norm": 3.602259874343872, "learning_rate": 1.9987847288030083e-06, "loss": 0.9878703355789185, "step": 236 }, { "epoch": 0.20033670033670034, "grad_norm": 3.7958240509033203, "learning_rate": 1.998696394962345e-06, "loss": 1.1146423816680908, "step": 238 }, { "epoch": 0.20202020202020202, "grad_norm": 5.15401029586792, "learning_rate": 1.998604964734467e-06, "loss": 0.9246745109558105, "step": 240 }, { "epoch": 0.2037037037037037, "grad_norm": 7.467074394226074, "learning_rate": 1.99851043843439e-06, "loss": 1.1710363626480103, "step": 242 }, { "epoch": 0.2053872053872054, "grad_norm": 5.450447082519531, "learning_rate": 1.9984128163877964e-06, "loss": 0.99492347240448, "step": 244 }, { "epoch": 0.20707070707070707, "grad_norm": 13.562344551086426, "learning_rate": 1.998312098931036e-06, "loss": 0.6625456809997559, "step": 246 }, { "epoch": 0.20875420875420875, "grad_norm": 13.973015785217285, "learning_rate": 1.998208286411122e-06, "loss": 1.2143261432647705, "step": 248 }, { "epoch": 0.21043771043771045, "grad_norm": 4.950899124145508, "learning_rate": 1.9981013791857327e-06, "loss": 1.0001804828643799, "step": 250 }, { "epoch": 0.21212121212121213, "grad_norm": 8.739319801330566, "learning_rate": 1.997991377623209e-06, "loss": 0.9055366516113281, "step": 252 }, { "epoch": 0.2138047138047138, "grad_norm": 20.48418426513672, "learning_rate": 1.9978782821025513e-06, "loss": 1.0460654497146606, "step": 254 }, { "epoch": 0.21548821548821548, "grad_norm": 3.14092755317688, "learning_rate": 1.9977620930134223e-06, "loss": 1.1977128982543945, "step": 256 }, { "epoch": 0.21717171717171718, "grad_norm": 3.1732327938079834, "learning_rate": 1.9976428107561415e-06, "loss": 0.8446206450462341, "step": 258 }, { "epoch": 0.21885521885521886, "grad_norm": 4.083011150360107, "learning_rate": 1.997520435741687e-06, "loss": 1.0371161699295044, "step": 260 }, { "epoch": 0.22053872053872053, "grad_norm": 18.663959503173828, "learning_rate": 1.9973949683916927e-06, "loss": 1.0510563850402832, "step": 262 }, { "epoch": 0.2222222222222222, "grad_norm": 7.912849426269531, "learning_rate": 1.9972664091384454e-06, "loss": 1.1071124076843262, "step": 264 }, { "epoch": 0.2239057239057239, "grad_norm": 8.017518997192383, "learning_rate": 1.997134758424886e-06, "loss": 1.1996357440948486, "step": 266 }, { "epoch": 0.2255892255892256, "grad_norm": 8.789745330810547, "learning_rate": 1.9970000167046075e-06, "loss": 0.6464065313339233, "step": 268 }, { "epoch": 0.22727272727272727, "grad_norm": 2.892493963241577, "learning_rate": 1.996862184441851e-06, "loss": 0.9799895882606506, "step": 270 }, { "epoch": 0.22895622895622897, "grad_norm": 15.957945823669434, "learning_rate": 1.9967212621115065e-06, "loss": 1.310072898864746, "step": 272 }, { "epoch": 0.23063973063973064, "grad_norm": 15.204549789428711, "learning_rate": 1.996577250199111e-06, "loss": 1.1751708984375, "step": 274 }, { "epoch": 0.23232323232323232, "grad_norm": 13.538248062133789, "learning_rate": 1.9964301492008464e-06, "loss": 0.9009586572647095, "step": 276 }, { "epoch": 0.234006734006734, "grad_norm": 3.318108558654785, "learning_rate": 1.996279959623537e-06, "loss": 1.160229206085205, "step": 278 }, { "epoch": 0.2356902356902357, "grad_norm": 19.02191925048828, "learning_rate": 1.9961266819846495e-06, "loss": 0.9633986353874207, "step": 280 }, { "epoch": 0.23737373737373738, "grad_norm": 2.9140703678131104, "learning_rate": 1.9959703168122897e-06, "loss": 0.9368491172790527, "step": 282 }, { "epoch": 0.23905723905723905, "grad_norm": 6.261467933654785, "learning_rate": 1.995810864645202e-06, "loss": 1.2744100093841553, "step": 284 }, { "epoch": 0.24074074074074073, "grad_norm": 10.625744819641113, "learning_rate": 1.995648326032765e-06, "loss": 0.93353271484375, "step": 286 }, { "epoch": 0.24242424242424243, "grad_norm": 4.416279315948486, "learning_rate": 1.9954827015349937e-06, "loss": 0.8594992160797119, "step": 288 }, { "epoch": 0.2441077441077441, "grad_norm": 12.509293556213379, "learning_rate": 1.9953139917225333e-06, "loss": 1.1634539365768433, "step": 290 }, { "epoch": 0.24579124579124578, "grad_norm": 6.1216559410095215, "learning_rate": 1.995142197176661e-06, "loss": 0.6774170398712158, "step": 292 }, { "epoch": 0.2474747474747475, "grad_norm": 11.188633918762207, "learning_rate": 1.9949673184892803e-06, "loss": 1.2763657569885254, "step": 294 }, { "epoch": 0.24915824915824916, "grad_norm": 14.471898078918457, "learning_rate": 1.9947893562629227e-06, "loss": 1.0749788284301758, "step": 296 }, { "epoch": 0.25084175084175087, "grad_norm": 4.776523113250732, "learning_rate": 1.9946083111107425e-06, "loss": 0.6309652328491211, "step": 298 }, { "epoch": 0.25252525252525254, "grad_norm": 13.419336318969727, "learning_rate": 1.9944241836565167e-06, "loss": 0.7867164611816406, "step": 300 }, { "epoch": 0.2542087542087542, "grad_norm": 13.28558349609375, "learning_rate": 1.9942369745346417e-06, "loss": 1.074715256690979, "step": 302 }, { "epoch": 0.2558922558922559, "grad_norm": 13.307300567626953, "learning_rate": 1.9940466843901318e-06, "loss": 0.909276008605957, "step": 304 }, { "epoch": 0.25757575757575757, "grad_norm": 3.531205415725708, "learning_rate": 1.9938533138786163e-06, "loss": 1.3488116264343262, "step": 306 }, { "epoch": 0.25925925925925924, "grad_norm": 18.24886131286621, "learning_rate": 1.9936568636663383e-06, "loss": 1.1045993566513062, "step": 308 }, { "epoch": 0.2609427609427609, "grad_norm": 8.636100769042969, "learning_rate": 1.9934573344301514e-06, "loss": 1.0755970478057861, "step": 310 }, { "epoch": 0.26262626262626265, "grad_norm": 21.606901168823242, "learning_rate": 1.993254726857518e-06, "loss": 1.192507266998291, "step": 312 }, { "epoch": 0.26430976430976433, "grad_norm": 3.9044668674468994, "learning_rate": 1.9930490416465057e-06, "loss": 1.0830907821655273, "step": 314 }, { "epoch": 0.265993265993266, "grad_norm": 4.083668231964111, "learning_rate": 1.992840279505787e-06, "loss": 1.2332277297973633, "step": 316 }, { "epoch": 0.2676767676767677, "grad_norm": 2.9494712352752686, "learning_rate": 1.9926284411546355e-06, "loss": 1.0134263038635254, "step": 318 }, { "epoch": 0.26936026936026936, "grad_norm": 15.981578826904297, "learning_rate": 1.9924135273229235e-06, "loss": 0.7042160034179688, "step": 320 }, { "epoch": 0.27104377104377103, "grad_norm": 3.9529871940612793, "learning_rate": 1.9921955387511195e-06, "loss": 0.9744091033935547, "step": 322 }, { "epoch": 0.2727272727272727, "grad_norm": 4.732446193695068, "learning_rate": 1.991974476190285e-06, "loss": 1.2661027908325195, "step": 324 }, { "epoch": 0.27441077441077444, "grad_norm": 3.1209988594055176, "learning_rate": 1.9917503404020747e-06, "loss": 1.0432727336883545, "step": 326 }, { "epoch": 0.2760942760942761, "grad_norm": 2.297736644744873, "learning_rate": 1.9915231321587305e-06, "loss": 0.7997782230377197, "step": 328 }, { "epoch": 0.2777777777777778, "grad_norm": 3.8393845558166504, "learning_rate": 1.99129285224308e-06, "loss": 0.8995693922042847, "step": 330 }, { "epoch": 0.27946127946127947, "grad_norm": 5.347554683685303, "learning_rate": 1.9910595014485347e-06, "loss": 1.094329595565796, "step": 332 }, { "epoch": 0.28114478114478114, "grad_norm": 9.933974266052246, "learning_rate": 1.990823080579086e-06, "loss": 1.0780812501907349, "step": 334 }, { "epoch": 0.2828282828282828, "grad_norm": 6.838101863861084, "learning_rate": 1.990583590449303e-06, "loss": 1.0089993476867676, "step": 336 }, { "epoch": 0.2845117845117845, "grad_norm": 2.9299662113189697, "learning_rate": 1.990341031884331e-06, "loss": 1.188491702079773, "step": 338 }, { "epoch": 0.28619528619528617, "grad_norm": 11.96368408203125, "learning_rate": 1.9900954057198856e-06, "loss": 0.9743690490722656, "step": 340 }, { "epoch": 0.2878787878787879, "grad_norm": 10.01843547821045, "learning_rate": 1.989846712802252e-06, "loss": 1.091504454612732, "step": 342 }, { "epoch": 0.2895622895622896, "grad_norm": 11.638251304626465, "learning_rate": 1.9895949539882827e-06, "loss": 0.8539205193519592, "step": 344 }, { "epoch": 0.29124579124579125, "grad_norm": 4.232053279876709, "learning_rate": 1.9893401301453926e-06, "loss": 1.1060683727264404, "step": 346 }, { "epoch": 0.29292929292929293, "grad_norm": 4.821753025054932, "learning_rate": 1.989082242151556e-06, "loss": 1.054430365562439, "step": 348 }, { "epoch": 0.2946127946127946, "grad_norm": 13.823630332946777, "learning_rate": 1.988821290895307e-06, "loss": 0.7408787608146667, "step": 350 }, { "epoch": 0.2962962962962963, "grad_norm": 4.8311028480529785, "learning_rate": 1.988557277275732e-06, "loss": 0.5961899757385254, "step": 352 }, { "epoch": 0.29797979797979796, "grad_norm": 6.172464847564697, "learning_rate": 1.9882902022024683e-06, "loss": 1.0046343803405762, "step": 354 }, { "epoch": 0.2996632996632997, "grad_norm": 8.489828109741211, "learning_rate": 1.9880200665957026e-06, "loss": 1.0770823955535889, "step": 356 }, { "epoch": 0.30134680134680136, "grad_norm": 20.501022338867188, "learning_rate": 1.9877468713861656e-06, "loss": 0.9369664192199707, "step": 358 }, { "epoch": 0.30303030303030304, "grad_norm": 3.7023608684539795, "learning_rate": 1.98747061751513e-06, "loss": 0.813412070274353, "step": 360 }, { "epoch": 0.3047138047138047, "grad_norm": 3.5265374183654785, "learning_rate": 1.987191305934406e-06, "loss": 0.9706151485443115, "step": 362 }, { "epoch": 0.3063973063973064, "grad_norm": 2.9974145889282227, "learning_rate": 1.98690893760634e-06, "loss": 1.2250468730926514, "step": 364 }, { "epoch": 0.30808080808080807, "grad_norm": 6.445283889770508, "learning_rate": 1.9866235135038095e-06, "loss": 0.8330235481262207, "step": 366 }, { "epoch": 0.30976430976430974, "grad_norm": 3.45355224609375, "learning_rate": 1.986335034610221e-06, "loss": 0.8574585318565369, "step": 368 }, { "epoch": 0.3114478114478115, "grad_norm": 4.574197769165039, "learning_rate": 1.9860435019195054e-06, "loss": 1.0763049125671387, "step": 370 }, { "epoch": 0.31313131313131315, "grad_norm": 5.326190948486328, "learning_rate": 1.9857489164361147e-06, "loss": 1.2134881019592285, "step": 372 }, { "epoch": 0.3148148148148148, "grad_norm": 18.84362030029297, "learning_rate": 1.9854512791750214e-06, "loss": 0.6605836153030396, "step": 374 }, { "epoch": 0.3164983164983165, "grad_norm": 5.314328193664551, "learning_rate": 1.9851505911617097e-06, "loss": 0.9535898566246033, "step": 376 }, { "epoch": 0.3181818181818182, "grad_norm": 4.1279168128967285, "learning_rate": 1.984846853432177e-06, "loss": 1.2825720310211182, "step": 378 }, { "epoch": 0.31986531986531985, "grad_norm": 8.377116203308105, "learning_rate": 1.9845400670329275e-06, "loss": 0.734359860420227, "step": 380 }, { "epoch": 0.32154882154882153, "grad_norm": 10.643378257751465, "learning_rate": 1.98423023302097e-06, "loss": 1.0042654275894165, "step": 382 }, { "epoch": 0.32323232323232326, "grad_norm": 7.596747875213623, "learning_rate": 1.9839173524638115e-06, "loss": 1.110269546508789, "step": 384 }, { "epoch": 0.32491582491582494, "grad_norm": 6.244058132171631, "learning_rate": 1.9836014264394587e-06, "loss": 0.7185302972793579, "step": 386 }, { "epoch": 0.3265993265993266, "grad_norm": 6.148385524749756, "learning_rate": 1.9832824560364093e-06, "loss": 0.9159483909606934, "step": 388 }, { "epoch": 0.3282828282828283, "grad_norm": 183.32968139648438, "learning_rate": 1.98296044235365e-06, "loss": 1.0852017402648926, "step": 390 }, { "epoch": 0.32996632996632996, "grad_norm": 8.698363304138184, "learning_rate": 1.9826353865006538e-06, "loss": 0.7871326208114624, "step": 392 }, { "epoch": 0.33164983164983164, "grad_norm": 3.863551139831543, "learning_rate": 1.9823072895973748e-06, "loss": 1.3192460536956787, "step": 394 }, { "epoch": 0.3333333333333333, "grad_norm": 13.84194564819336, "learning_rate": 1.981976152774245e-06, "loss": 1.158171534538269, "step": 396 }, { "epoch": 0.335016835016835, "grad_norm": 18.21632194519043, "learning_rate": 1.98164197717217e-06, "loss": 0.7753697037696838, "step": 398 }, { "epoch": 0.3367003367003367, "grad_norm": 5.396472930908203, "learning_rate": 1.9813047639425253e-06, "loss": 0.9357776641845703, "step": 400 }, { "epoch": 0.3383838383838384, "grad_norm": 9.759978294372559, "learning_rate": 1.9809645142471528e-06, "loss": 0.9591242074966431, "step": 402 }, { "epoch": 0.3400673400673401, "grad_norm": 6.960322380065918, "learning_rate": 1.980621229258355e-06, "loss": 0.9946481585502625, "step": 404 }, { "epoch": 0.34175084175084175, "grad_norm": 3.891620635986328, "learning_rate": 1.9802749101588942e-06, "loss": 1.068068265914917, "step": 406 }, { "epoch": 0.3434343434343434, "grad_norm": 8.289826393127441, "learning_rate": 1.9799255581419844e-06, "loss": 0.9243034720420837, "step": 408 }, { "epoch": 0.3451178451178451, "grad_norm": 16.256540298461914, "learning_rate": 1.9795731744112908e-06, "loss": 0.5437488555908203, "step": 410 }, { "epoch": 0.3468013468013468, "grad_norm": 3.7120189666748047, "learning_rate": 1.9792177601809234e-06, "loss": 0.8597297668457031, "step": 412 }, { "epoch": 0.3484848484848485, "grad_norm": 9.203973770141602, "learning_rate": 1.9788593166754343e-06, "loss": 0.914923906326294, "step": 414 }, { "epoch": 0.3501683501683502, "grad_norm": 15.325188636779785, "learning_rate": 1.9784978451298115e-06, "loss": 1.1473793983459473, "step": 416 }, { "epoch": 0.35185185185185186, "grad_norm": 9.513066291809082, "learning_rate": 1.9781333467894773e-06, "loss": 0.7187636494636536, "step": 418 }, { "epoch": 0.35353535353535354, "grad_norm": 11.042696952819824, "learning_rate": 1.9777658229102807e-06, "loss": 0.8753368258476257, "step": 420 }, { "epoch": 0.3552188552188552, "grad_norm": 19.48780059814453, "learning_rate": 1.9773952747584976e-06, "loss": 1.1081957817077637, "step": 422 }, { "epoch": 0.3569023569023569, "grad_norm": 7.343861103057861, "learning_rate": 1.9770217036108212e-06, "loss": 0.5900806188583374, "step": 424 }, { "epoch": 0.35858585858585856, "grad_norm": 13.56103801727295, "learning_rate": 1.9766451107543614e-06, "loss": 1.0243406295776367, "step": 426 }, { "epoch": 0.3602693602693603, "grad_norm": 5.026733875274658, "learning_rate": 1.9762654974866396e-06, "loss": 0.7951416969299316, "step": 428 }, { "epoch": 0.36195286195286197, "grad_norm": 30.160125732421875, "learning_rate": 1.975882865115583e-06, "loss": 1.226210117340088, "step": 430 }, { "epoch": 0.36363636363636365, "grad_norm": 3.663498640060425, "learning_rate": 1.9754972149595204e-06, "loss": 0.954987645149231, "step": 432 }, { "epoch": 0.3653198653198653, "grad_norm": 3.128584146499634, "learning_rate": 1.97510854834718e-06, "loss": 0.8251101970672607, "step": 434 }, { "epoch": 0.367003367003367, "grad_norm": 2.8099863529205322, "learning_rate": 1.9747168666176813e-06, "loss": 0.9983630180358887, "step": 436 }, { "epoch": 0.3686868686868687, "grad_norm": 9.544251441955566, "learning_rate": 1.9743221711205323e-06, "loss": 1.074230432510376, "step": 438 }, { "epoch": 0.37037037037037035, "grad_norm": 3.466240406036377, "learning_rate": 1.9739244632156256e-06, "loss": 0.9089052677154541, "step": 440 }, { "epoch": 0.3720538720538721, "grad_norm": 3.713217258453369, "learning_rate": 1.973523744273232e-06, "loss": 0.9246188402175903, "step": 442 }, { "epoch": 0.37373737373737376, "grad_norm": 4.951727867126465, "learning_rate": 1.973120015673997e-06, "loss": 0.7823818922042847, "step": 444 }, { "epoch": 0.37542087542087543, "grad_norm": 14.46354866027832, "learning_rate": 1.9727132788089354e-06, "loss": 0.6286232471466064, "step": 446 }, { "epoch": 0.3771043771043771, "grad_norm": 3.3994994163513184, "learning_rate": 1.972303535079427e-06, "loss": 1.116566777229309, "step": 448 }, { "epoch": 0.3787878787878788, "grad_norm": 13.856016159057617, "learning_rate": 1.971890785897211e-06, "loss": 1.0384341478347778, "step": 450 }, { "epoch": 0.38047138047138046, "grad_norm": 5.349656581878662, "learning_rate": 1.9714750326843825e-06, "loss": 0.7245984077453613, "step": 452 }, { "epoch": 0.38215488215488214, "grad_norm": 16.106748580932617, "learning_rate": 1.9710562768733857e-06, "loss": 0.9850279688835144, "step": 454 }, { "epoch": 0.3838383838383838, "grad_norm": 2.889192819595337, "learning_rate": 1.9706345199070107e-06, "loss": 0.7854516506195068, "step": 456 }, { "epoch": 0.38552188552188554, "grad_norm": 4.588443756103516, "learning_rate": 1.970209763238388e-06, "loss": 0.9596130847930908, "step": 458 }, { "epoch": 0.3872053872053872, "grad_norm": 2.0430006980895996, "learning_rate": 1.969782008330983e-06, "loss": 1.125518560409546, "step": 460 }, { "epoch": 0.3888888888888889, "grad_norm": 3.873711347579956, "learning_rate": 1.969351256658591e-06, "loss": 0.8866109848022461, "step": 462 }, { "epoch": 0.39057239057239057, "grad_norm": 3.8733267784118652, "learning_rate": 1.968917509705333e-06, "loss": 0.8248393535614014, "step": 464 }, { "epoch": 0.39225589225589225, "grad_norm": 10.369402885437012, "learning_rate": 1.9684807689656497e-06, "loss": 0.8977053165435791, "step": 466 }, { "epoch": 0.3939393939393939, "grad_norm": 1.9317212104797363, "learning_rate": 1.9680410359442972e-06, "loss": 0.9425126314163208, "step": 468 }, { "epoch": 0.3956228956228956, "grad_norm": 3.9970741271972656, "learning_rate": 1.9675983121563397e-06, "loss": 0.9490628242492676, "step": 470 }, { "epoch": 0.39730639730639733, "grad_norm": 4.277144908905029, "learning_rate": 1.9671525991271478e-06, "loss": 0.7922143340110779, "step": 472 }, { "epoch": 0.398989898989899, "grad_norm": 6.69656229019165, "learning_rate": 1.9667038983923902e-06, "loss": 0.9853019714355469, "step": 474 }, { "epoch": 0.4006734006734007, "grad_norm": 10.086434364318848, "learning_rate": 1.9662522114980296e-06, "loss": 0.7648198008537292, "step": 476 }, { "epoch": 0.40235690235690236, "grad_norm": 75.10104370117188, "learning_rate": 1.965797540000318e-06, "loss": 0.9607178568840027, "step": 478 }, { "epoch": 0.40404040404040403, "grad_norm": 13.168256759643555, "learning_rate": 1.9653398854657887e-06, "loss": 1.0317054986953735, "step": 480 }, { "epoch": 0.4057239057239057, "grad_norm": 7.137551784515381, "learning_rate": 1.9648792494712553e-06, "loss": 1.0325589179992676, "step": 482 }, { "epoch": 0.4074074074074074, "grad_norm": 7.48604679107666, "learning_rate": 1.9644156336038024e-06, "loss": 0.838646411895752, "step": 484 }, { "epoch": 0.4090909090909091, "grad_norm": 9.154224395751953, "learning_rate": 1.9639490394607813e-06, "loss": 0.8662800192832947, "step": 486 }, { "epoch": 0.4107744107744108, "grad_norm": 5.478043556213379, "learning_rate": 1.9634794686498055e-06, "loss": 1.0649371147155762, "step": 488 }, { "epoch": 0.41245791245791247, "grad_norm": 4.0281901359558105, "learning_rate": 1.9630069227887444e-06, "loss": 1.1006402969360352, "step": 490 }, { "epoch": 0.41414141414141414, "grad_norm": 11.18668270111084, "learning_rate": 1.9625314035057167e-06, "loss": 1.0519776344299316, "step": 492 }, { "epoch": 0.4158249158249158, "grad_norm": 6.470438003540039, "learning_rate": 1.9620529124390863e-06, "loss": 0.9292422533035278, "step": 494 }, { "epoch": 0.4175084175084175, "grad_norm": 2.4250965118408203, "learning_rate": 1.9615714512374567e-06, "loss": 1.0612026453018188, "step": 496 }, { "epoch": 0.41919191919191917, "grad_norm": 4.183928489685059, "learning_rate": 1.9610870215596643e-06, "loss": 1.081310510635376, "step": 498 }, { "epoch": 0.4208754208754209, "grad_norm": 15.612129211425781, "learning_rate": 1.960599625074773e-06, "loss": 0.8103876709938049, "step": 500 }, { "epoch": 0.4225589225589226, "grad_norm": 7.831202983856201, "learning_rate": 1.9601092634620687e-06, "loss": 0.633713960647583, "step": 502 }, { "epoch": 0.42424242424242425, "grad_norm": 7.164036750793457, "learning_rate": 1.9596159384110535e-06, "loss": 0.8758570551872253, "step": 504 }, { "epoch": 0.42592592592592593, "grad_norm": 10.661258697509766, "learning_rate": 1.95911965162144e-06, "loss": 0.9336118698120117, "step": 506 }, { "epoch": 0.4276094276094276, "grad_norm": 9.44550895690918, "learning_rate": 1.958620404803145e-06, "loss": 1.2653324604034424, "step": 508 }, { "epoch": 0.4292929292929293, "grad_norm": 6.048154354095459, "learning_rate": 1.9581181996762834e-06, "loss": 1.0118142366409302, "step": 510 }, { "epoch": 0.43097643097643096, "grad_norm": 2.742072105407715, "learning_rate": 1.9576130379711634e-06, "loss": 1.170724630355835, "step": 512 }, { "epoch": 0.43265993265993263, "grad_norm": 5.8300089836120605, "learning_rate": 1.95710492142828e-06, "loss": 1.1076912879943848, "step": 514 }, { "epoch": 0.43434343434343436, "grad_norm": 5.84092903137207, "learning_rate": 1.956593851798308e-06, "loss": 1.0066879987716675, "step": 516 }, { "epoch": 0.43602693602693604, "grad_norm": 2.712181568145752, "learning_rate": 1.9560798308420974e-06, "loss": 1.0203490257263184, "step": 518 }, { "epoch": 0.4377104377104377, "grad_norm": 10.564250946044922, "learning_rate": 1.955562860330667e-06, "loss": 0.8946080207824707, "step": 520 }, { "epoch": 0.4393939393939394, "grad_norm": 3.632702589035034, "learning_rate": 1.9550429420451973e-06, "loss": 0.7722439169883728, "step": 522 }, { "epoch": 0.44107744107744107, "grad_norm": 5.241938591003418, "learning_rate": 1.954520077777026e-06, "loss": 1.09357750415802, "step": 524 }, { "epoch": 0.44276094276094274, "grad_norm": 12.312281608581543, "learning_rate": 1.9539942693276405e-06, "loss": 0.7668850421905518, "step": 526 }, { "epoch": 0.4444444444444444, "grad_norm": 3.640024423599243, "learning_rate": 1.9534655185086717e-06, "loss": 1.1953470706939697, "step": 528 }, { "epoch": 0.44612794612794615, "grad_norm": 6.970150470733643, "learning_rate": 1.9529338271418886e-06, "loss": 0.7888380289077759, "step": 530 }, { "epoch": 0.4478114478114478, "grad_norm": 5.498988151550293, "learning_rate": 1.952399197059192e-06, "loss": 0.8151825070381165, "step": 532 }, { "epoch": 0.4494949494949495, "grad_norm": 4.296585559844971, "learning_rate": 1.9518616301026077e-06, "loss": 0.9414000511169434, "step": 534 }, { "epoch": 0.4511784511784512, "grad_norm": 3.7714059352874756, "learning_rate": 1.9513211281242795e-06, "loss": 1.2206546068191528, "step": 536 }, { "epoch": 0.45286195286195285, "grad_norm": 5.960073471069336, "learning_rate": 1.9507776929864643e-06, "loss": 1.045861840248108, "step": 538 }, { "epoch": 0.45454545454545453, "grad_norm": 4.743275165557861, "learning_rate": 1.950231326561525e-06, "loss": 0.8734741806983948, "step": 540 }, { "epoch": 0.4562289562289562, "grad_norm": 4.74852180480957, "learning_rate": 1.9496820307319237e-06, "loss": 1.0024454593658447, "step": 542 }, { "epoch": 0.45791245791245794, "grad_norm": 3.7979114055633545, "learning_rate": 1.9491298073902157e-06, "loss": 1.1115365028381348, "step": 544 }, { "epoch": 0.4595959595959596, "grad_norm": 11.823755264282227, "learning_rate": 1.9485746584390426e-06, "loss": 1.154505729675293, "step": 546 }, { "epoch": 0.4612794612794613, "grad_norm": 4.7486090660095215, "learning_rate": 1.948016585791127e-06, "loss": 1.3286551237106323, "step": 548 }, { "epoch": 0.46296296296296297, "grad_norm": 4.785913467407227, "learning_rate": 1.9474555913692627e-06, "loss": 0.8783373832702637, "step": 550 }, { "epoch": 0.46464646464646464, "grad_norm": 23.855112075805664, "learning_rate": 1.946891677106312e-06, "loss": 0.8687731027603149, "step": 552 }, { "epoch": 0.4663299663299663, "grad_norm": 4.286966323852539, "learning_rate": 1.946324844945197e-06, "loss": 1.031162977218628, "step": 554 }, { "epoch": 0.468013468013468, "grad_norm": 2.7228028774261475, "learning_rate": 1.9457550968388928e-06, "loss": 0.7218068242073059, "step": 556 }, { "epoch": 0.4696969696969697, "grad_norm": 4.717339038848877, "learning_rate": 1.9451824347504213e-06, "loss": 1.17518949508667, "step": 558 }, { "epoch": 0.4713804713804714, "grad_norm": 13.350486755371094, "learning_rate": 1.944606860652845e-06, "loss": 0.4006010890007019, "step": 560 }, { "epoch": 0.4730639730639731, "grad_norm": 3.6367268562316895, "learning_rate": 1.944028376529258e-06, "loss": 0.5863475799560547, "step": 562 }, { "epoch": 0.47474747474747475, "grad_norm": 2.8083655834198, "learning_rate": 1.943446984372782e-06, "loss": 1.2751696109771729, "step": 564 }, { "epoch": 0.4764309764309764, "grad_norm": 6.395586967468262, "learning_rate": 1.942862686186557e-06, "loss": 1.1098227500915527, "step": 566 }, { "epoch": 0.4781144781144781, "grad_norm": 10.825981140136719, "learning_rate": 1.9422754839837366e-06, "loss": 0.4494704604148865, "step": 568 }, { "epoch": 0.4797979797979798, "grad_norm": 32.269683837890625, "learning_rate": 1.9416853797874797e-06, "loss": 1.0807325839996338, "step": 570 }, { "epoch": 0.48148148148148145, "grad_norm": 20.739370346069336, "learning_rate": 1.941092375630943e-06, "loss": 0.6750832796096802, "step": 572 }, { "epoch": 0.4831649831649832, "grad_norm": 11.944796562194824, "learning_rate": 1.9404964735572754e-06, "loss": 0.9658522605895996, "step": 574 }, { "epoch": 0.48484848484848486, "grad_norm": 8.797262191772461, "learning_rate": 1.939897675619611e-06, "loss": 0.8590230941772461, "step": 576 }, { "epoch": 0.48653198653198654, "grad_norm": 9.839401245117188, "learning_rate": 1.9392959838810597e-06, "loss": 1.0677263736724854, "step": 578 }, { "epoch": 0.4882154882154882, "grad_norm": 8.498374938964844, "learning_rate": 1.9386914004147034e-06, "loss": 0.7860367298126221, "step": 580 }, { "epoch": 0.4898989898989899, "grad_norm": 6.413960933685303, "learning_rate": 1.938083927303586e-06, "loss": 1.3258328437805176, "step": 582 }, { "epoch": 0.49158249158249157, "grad_norm": 2.855747699737549, "learning_rate": 1.937473566640708e-06, "loss": 0.9856802821159363, "step": 584 }, { "epoch": 0.49326599326599324, "grad_norm": 5.174104690551758, "learning_rate": 1.9368603205290196e-06, "loss": 0.8397727012634277, "step": 586 }, { "epoch": 0.494949494949495, "grad_norm": 3.8909213542938232, "learning_rate": 1.9362441910814105e-06, "loss": 0.6163880825042725, "step": 588 }, { "epoch": 0.49663299663299665, "grad_norm": 4.697168350219727, "learning_rate": 1.935625180420706e-06, "loss": 1.087604284286499, "step": 590 }, { "epoch": 0.4983164983164983, "grad_norm": 2.995621681213379, "learning_rate": 1.935003290679659e-06, "loss": 1.0904521942138672, "step": 592 }, { "epoch": 0.5, "grad_norm": 13.57467269897461, "learning_rate": 1.934378524000941e-06, "loss": 0.8232730031013489, "step": 594 }, { "epoch": 0.5016835016835017, "grad_norm": 6.692266464233398, "learning_rate": 1.933750882537136e-06, "loss": 0.9355677366256714, "step": 596 }, { "epoch": 0.5033670033670034, "grad_norm": 2.513978958129883, "learning_rate": 1.9331203684507333e-06, "loss": 1.284334421157837, "step": 598 }, { "epoch": 0.5050505050505051, "grad_norm": 23.219905853271484, "learning_rate": 1.9324869839141184e-06, "loss": 0.7689567804336548, "step": 600 }, { "epoch": 0.5067340067340067, "grad_norm": 16.52220344543457, "learning_rate": 1.9318507311095686e-06, "loss": 1.0293747186660767, "step": 602 }, { "epoch": 0.5084175084175084, "grad_norm": 10.209641456604004, "learning_rate": 1.9312116122292414e-06, "loss": 0.9961596727371216, "step": 604 }, { "epoch": 0.51010101010101, "grad_norm": 3.1684632301330566, "learning_rate": 1.9305696294751707e-06, "loss": 1.0693247318267822, "step": 606 }, { "epoch": 0.5117845117845118, "grad_norm": 15.678545951843262, "learning_rate": 1.9299247850592575e-06, "loss": 0.5298241376876831, "step": 608 }, { "epoch": 0.5134680134680135, "grad_norm": 3.837263822555542, "learning_rate": 1.9292770812032626e-06, "loss": 0.9167294502258301, "step": 610 }, { "epoch": 0.5151515151515151, "grad_norm": 8.804614067077637, "learning_rate": 1.9286265201387966e-06, "loss": 0.8463789224624634, "step": 612 }, { "epoch": 0.5168350168350169, "grad_norm": 6.136633396148682, "learning_rate": 1.9279731041073177e-06, "loss": 0.6948338747024536, "step": 614 }, { "epoch": 0.5185185185185185, "grad_norm": 5.291085720062256, "learning_rate": 1.9273168353601185e-06, "loss": 1.080240249633789, "step": 616 }, { "epoch": 0.5202020202020202, "grad_norm": 5.781073093414307, "learning_rate": 1.9266577161583207e-06, "loss": 1.0078164339065552, "step": 618 }, { "epoch": 0.5218855218855218, "grad_norm": 4.246747970581055, "learning_rate": 1.925995748772868e-06, "loss": 0.9573478102684021, "step": 620 }, { "epoch": 0.5235690235690236, "grad_norm": 6.759246349334717, "learning_rate": 1.925330935484516e-06, "loss": 1.0398313999176025, "step": 622 }, { "epoch": 0.5252525252525253, "grad_norm": 2.2948110103607178, "learning_rate": 1.9246632785838263e-06, "loss": 0.7390921711921692, "step": 624 }, { "epoch": 0.5269360269360269, "grad_norm": 9.203880310058594, "learning_rate": 1.9239927803711578e-06, "loss": 0.9215421676635742, "step": 626 }, { "epoch": 0.5286195286195287, "grad_norm": 18.581615447998047, "learning_rate": 1.923319443156659e-06, "loss": 0.8367900252342224, "step": 628 }, { "epoch": 0.5303030303030303, "grad_norm": 16.1141357421875, "learning_rate": 1.92264326926026e-06, "loss": 0.7088955640792847, "step": 630 }, { "epoch": 0.531986531986532, "grad_norm": 4.339905738830566, "learning_rate": 1.9219642610116647e-06, "loss": 1.1045582294464111, "step": 632 }, { "epoch": 0.5336700336700336, "grad_norm": 5.655019760131836, "learning_rate": 1.9212824207503415e-06, "loss": 0.9011019468307495, "step": 634 }, { "epoch": 0.5353535353535354, "grad_norm": 18.707368850708008, "learning_rate": 1.920597750825517e-06, "loss": 0.7971285581588745, "step": 636 }, { "epoch": 0.5370370370370371, "grad_norm": 15.80146312713623, "learning_rate": 1.919910253596168e-06, "loss": 0.9591305255889893, "step": 638 }, { "epoch": 0.5387205387205387, "grad_norm": 4.86085319519043, "learning_rate": 1.919219931431011e-06, "loss": 0.810368537902832, "step": 640 }, { "epoch": 0.5404040404040404, "grad_norm": 6.4632792472839355, "learning_rate": 1.918526786708497e-06, "loss": 0.9356435537338257, "step": 642 }, { "epoch": 0.5420875420875421, "grad_norm": 16.430055618286133, "learning_rate": 1.9178308218168e-06, "loss": 0.8751171231269836, "step": 644 }, { "epoch": 0.5437710437710438, "grad_norm": 8.275667190551758, "learning_rate": 1.9171320391538132e-06, "loss": 0.8758902549743652, "step": 646 }, { "epoch": 0.5454545454545454, "grad_norm": 3.1901955604553223, "learning_rate": 1.9164304411271364e-06, "loss": 0.9705331325531006, "step": 648 }, { "epoch": 0.5471380471380471, "grad_norm": 16.389245986938477, "learning_rate": 1.9157260301540697e-06, "loss": 1.0938405990600586, "step": 650 }, { "epoch": 0.5488215488215489, "grad_norm": 7.667538642883301, "learning_rate": 1.9150188086616055e-06, "loss": 1.0371794700622559, "step": 652 }, { "epoch": 0.5505050505050505, "grad_norm": 19.274045944213867, "learning_rate": 1.91430877908642e-06, "loss": 1.0635974407196045, "step": 654 }, { "epoch": 0.5521885521885522, "grad_norm": 11.602453231811523, "learning_rate": 1.9135959438748626e-06, "loss": 0.8951305747032166, "step": 656 }, { "epoch": 0.5538720538720538, "grad_norm": 5.208285331726074, "learning_rate": 1.9128803054829515e-06, "loss": 0.7661327719688416, "step": 658 }, { "epoch": 0.5555555555555556, "grad_norm": 13.546456336975098, "learning_rate": 1.912161866376362e-06, "loss": 0.7583224177360535, "step": 660 }, { "epoch": 0.5572390572390572, "grad_norm": 3.013401508331299, "learning_rate": 1.9114406290304186e-06, "loss": 1.0516358613967896, "step": 662 }, { "epoch": 0.5589225589225589, "grad_norm": 7.981349468231201, "learning_rate": 1.910716595930088e-06, "loss": 1.0741899013519287, "step": 664 }, { "epoch": 0.5606060606060606, "grad_norm": 5.936778545379639, "learning_rate": 1.9099897695699684e-06, "loss": 0.49627187848091125, "step": 666 }, { "epoch": 0.5622895622895623, "grad_norm": 13.402975082397461, "learning_rate": 1.9092601524542828e-06, "loss": 0.6627441644668579, "step": 668 }, { "epoch": 0.563973063973064, "grad_norm": 44.2243537902832, "learning_rate": 1.9085277470968692e-06, "loss": 1.0360723733901978, "step": 670 }, { "epoch": 0.5656565656565656, "grad_norm": 40.057151794433594, "learning_rate": 1.907792556021171e-06, "loss": 0.6004194021224976, "step": 672 }, { "epoch": 0.5673400673400674, "grad_norm": 10.742400169372559, "learning_rate": 1.9070545817602328e-06, "loss": 0.5512696504592896, "step": 674 }, { "epoch": 0.569023569023569, "grad_norm": 4.548379898071289, "learning_rate": 1.9063138268566851e-06, "loss": 0.6692613959312439, "step": 676 }, { "epoch": 0.5707070707070707, "grad_norm": 5.4406023025512695, "learning_rate": 1.9055702938627407e-06, "loss": 1.1743131875991821, "step": 678 }, { "epoch": 0.5723905723905723, "grad_norm": 11.110426902770996, "learning_rate": 1.9048239853401833e-06, "loss": 0.41852569580078125, "step": 680 }, { "epoch": 0.5740740740740741, "grad_norm": 8.052200317382812, "learning_rate": 1.9040749038603602e-06, "loss": 1.0331244468688965, "step": 682 }, { "epoch": 0.5757575757575758, "grad_norm": 9.13505744934082, "learning_rate": 1.9033230520041719e-06, "loss": 1.1170430183410645, "step": 684 }, { "epoch": 0.5774410774410774, "grad_norm": 5.407991886138916, "learning_rate": 1.9025684323620645e-06, "loss": 1.0954296588897705, "step": 686 }, { "epoch": 0.5791245791245792, "grad_norm": 4.380704879760742, "learning_rate": 1.9018110475340203e-06, "loss": 0.8225352168083191, "step": 688 }, { "epoch": 0.5808080808080808, "grad_norm": 10.951150894165039, "learning_rate": 1.9010509001295485e-06, "loss": 0.7188082337379456, "step": 690 }, { "epoch": 0.5824915824915825, "grad_norm": 3.9585494995117188, "learning_rate": 1.9002879927676767e-06, "loss": 0.8001824617385864, "step": 692 }, { "epoch": 0.5841750841750841, "grad_norm": 3.551115036010742, "learning_rate": 1.8995223280769424e-06, "loss": 0.9616529941558838, "step": 694 }, { "epoch": 0.5858585858585859, "grad_norm": 6.161308765411377, "learning_rate": 1.8987539086953819e-06, "loss": 0.8874322772026062, "step": 696 }, { "epoch": 0.5875420875420876, "grad_norm": 10.766314506530762, "learning_rate": 1.8979827372705233e-06, "loss": 0.8692164421081543, "step": 698 }, { "epoch": 0.5892255892255892, "grad_norm": 7.8653035163879395, "learning_rate": 1.8972088164593771e-06, "loss": 0.8069002032279968, "step": 700 }, { "epoch": 0.5909090909090909, "grad_norm": 7.134982585906982, "learning_rate": 1.896432148928426e-06, "loss": 0.9260559678077698, "step": 702 }, { "epoch": 0.5925925925925926, "grad_norm": 6.079588890075684, "learning_rate": 1.895652737353616e-06, "loss": 1.0575344562530518, "step": 704 }, { "epoch": 0.5942760942760943, "grad_norm": 28.564146041870117, "learning_rate": 1.8948705844203482e-06, "loss": 0.9762513041496277, "step": 706 }, { "epoch": 0.5959595959595959, "grad_norm": 9.878491401672363, "learning_rate": 1.8940856928234689e-06, "loss": 0.7743998765945435, "step": 708 }, { "epoch": 0.5976430976430976, "grad_norm": 6.6208720207214355, "learning_rate": 1.8932980652672597e-06, "loss": 0.8060773015022278, "step": 710 }, { "epoch": 0.5993265993265994, "grad_norm": 6.425124168395996, "learning_rate": 1.8925077044654288e-06, "loss": 1.1068170070648193, "step": 712 }, { "epoch": 0.601010101010101, "grad_norm": 2.99337100982666, "learning_rate": 1.8917146131411015e-06, "loss": 1.0512995719909668, "step": 714 }, { "epoch": 0.6026936026936027, "grad_norm": 3.8051576614379883, "learning_rate": 1.8909187940268115e-06, "loss": 0.7426064610481262, "step": 716 }, { "epoch": 0.6043771043771043, "grad_norm": 6.566201686859131, "learning_rate": 1.89012024986449e-06, "loss": 0.894334614276886, "step": 718 }, { "epoch": 0.6060606060606061, "grad_norm": 8.337869644165039, "learning_rate": 1.8893189834054586e-06, "loss": 0.9385843276977539, "step": 720 }, { "epoch": 0.6077441077441077, "grad_norm": 9.33846664428711, "learning_rate": 1.8885149974104164e-06, "loss": 0.9482979774475098, "step": 722 }, { "epoch": 0.6094276094276094, "grad_norm": 3.8621480464935303, "learning_rate": 1.8877082946494339e-06, "loss": 0.8786056041717529, "step": 724 }, { "epoch": 0.6111111111111112, "grad_norm": 19.156356811523438, "learning_rate": 1.8868988779019414e-06, "loss": 0.9990079402923584, "step": 726 }, { "epoch": 0.6127946127946128, "grad_norm": 27.229507446289062, "learning_rate": 1.8860867499567203e-06, "loss": 0.908332347869873, "step": 728 }, { "epoch": 0.6144781144781145, "grad_norm": 11.623302459716797, "learning_rate": 1.885271913611893e-06, "loss": 1.1277103424072266, "step": 730 }, { "epoch": 0.6161616161616161, "grad_norm": 3.196768283843994, "learning_rate": 1.8844543716749134e-06, "loss": 1.0839519500732422, "step": 732 }, { "epoch": 0.6178451178451179, "grad_norm": 3.471727132797241, "learning_rate": 1.8836341269625578e-06, "loss": 0.7715842723846436, "step": 734 }, { "epoch": 0.6195286195286195, "grad_norm": 8.554580688476562, "learning_rate": 1.882811182300914e-06, "loss": 0.7822331190109253, "step": 736 }, { "epoch": 0.6212121212121212, "grad_norm": 22.184911727905273, "learning_rate": 1.881985540525373e-06, "loss": 0.6754369139671326, "step": 738 }, { "epoch": 0.622895622895623, "grad_norm": 5.2334442138671875, "learning_rate": 1.8811572044806178e-06, "loss": 1.2211134433746338, "step": 740 }, { "epoch": 0.6245791245791246, "grad_norm": 5.914177417755127, "learning_rate": 1.8803261770206149e-06, "loss": 0.9921356439590454, "step": 742 }, { "epoch": 0.6262626262626263, "grad_norm": 6.881519794464111, "learning_rate": 1.8794924610086031e-06, "loss": 1.1868412494659424, "step": 744 }, { "epoch": 0.6279461279461279, "grad_norm": 3.5606613159179688, "learning_rate": 1.8786560593170854e-06, "loss": 0.9340991377830505, "step": 746 }, { "epoch": 0.6296296296296297, "grad_norm": 81.61597442626953, "learning_rate": 1.877816974827817e-06, "loss": 1.1839344501495361, "step": 748 }, { "epoch": 0.6313131313131313, "grad_norm": 2.52506685256958, "learning_rate": 1.8769752104317973e-06, "loss": 1.280696153640747, "step": 750 }, { "epoch": 0.632996632996633, "grad_norm": 6.496135711669922, "learning_rate": 1.8761307690292589e-06, "loss": 0.7088183164596558, "step": 752 }, { "epoch": 0.6346801346801347, "grad_norm": 20.767459869384766, "learning_rate": 1.875283653529658e-06, "loss": 0.9602365493774414, "step": 754 }, { "epoch": 0.6363636363636364, "grad_norm": 3.436274290084839, "learning_rate": 1.874433866851663e-06, "loss": 0.7587154507637024, "step": 756 }, { "epoch": 0.6380471380471381, "grad_norm": 6.604635238647461, "learning_rate": 1.8735814119231475e-06, "loss": 0.8278650641441345, "step": 758 }, { "epoch": 0.6397306397306397, "grad_norm": 17.961626052856445, "learning_rate": 1.872726291681177e-06, "loss": 0.6165801286697388, "step": 760 }, { "epoch": 0.6414141414141414, "grad_norm": 4.451328754425049, "learning_rate": 1.8718685090720004e-06, "loss": 0.4456964433193207, "step": 762 }, { "epoch": 0.6430976430976431, "grad_norm": 4.893067359924316, "learning_rate": 1.8710080670510402e-06, "loss": 0.9912799000740051, "step": 764 }, { "epoch": 0.6447811447811448, "grad_norm": 9.001324653625488, "learning_rate": 1.8701449685828806e-06, "loss": 1.0763907432556152, "step": 766 }, { "epoch": 0.6464646464646465, "grad_norm": 10.884461402893066, "learning_rate": 1.8692792166412595e-06, "loss": 0.761760950088501, "step": 768 }, { "epoch": 0.6481481481481481, "grad_norm": 7.378164768218994, "learning_rate": 1.8684108142090562e-06, "loss": 0.7692549824714661, "step": 770 }, { "epoch": 0.6498316498316499, "grad_norm": 3.8816888332366943, "learning_rate": 1.8675397642782827e-06, "loss": 0.7803175449371338, "step": 772 }, { "epoch": 0.6515151515151515, "grad_norm": 28.640594482421875, "learning_rate": 1.8666660698500726e-06, "loss": 0.7042616009712219, "step": 774 }, { "epoch": 0.6531986531986532, "grad_norm": 3.964298725128174, "learning_rate": 1.8657897339346707e-06, "loss": 0.9174256920814514, "step": 776 }, { "epoch": 0.6548821548821548, "grad_norm": 2.9591541290283203, "learning_rate": 1.8649107595514226e-06, "loss": 1.040077805519104, "step": 778 }, { "epoch": 0.6565656565656566, "grad_norm": 15.031349182128906, "learning_rate": 1.8640291497287654e-06, "loss": 0.9099994897842407, "step": 780 }, { "epoch": 0.6582491582491582, "grad_norm": 15.89492416381836, "learning_rate": 1.8631449075042156e-06, "loss": 1.0717145204544067, "step": 782 }, { "epoch": 0.6599326599326599, "grad_norm": 5.403634071350098, "learning_rate": 1.8622580359243601e-06, "loss": 0.984376847743988, "step": 784 }, { "epoch": 0.6616161616161617, "grad_norm": 12.673766136169434, "learning_rate": 1.8613685380448441e-06, "loss": 1.05198073387146, "step": 786 }, { "epoch": 0.6632996632996633, "grad_norm": 14.643843650817871, "learning_rate": 1.8604764169303626e-06, "loss": 0.8343431949615479, "step": 788 }, { "epoch": 0.664983164983165, "grad_norm": 3.391157627105713, "learning_rate": 1.8595816756546477e-06, "loss": 0.935477614402771, "step": 790 }, { "epoch": 0.6666666666666666, "grad_norm": 10.033073425292969, "learning_rate": 1.8586843173004598e-06, "loss": 0.9675720930099487, "step": 792 }, { "epoch": 0.6683501683501684, "grad_norm": 3.087076187133789, "learning_rate": 1.8577843449595763e-06, "loss": 0.6215054392814636, "step": 794 }, { "epoch": 0.67003367003367, "grad_norm": 2.3780627250671387, "learning_rate": 1.85688176173278e-06, "loss": 0.9712251424789429, "step": 796 }, { "epoch": 0.6717171717171717, "grad_norm": 5.441427230834961, "learning_rate": 1.8559765707298502e-06, "loss": 0.993064820766449, "step": 798 }, { "epoch": 0.6734006734006734, "grad_norm": 3.6938350200653076, "learning_rate": 1.8550687750695509e-06, "loss": 0.6260876655578613, "step": 800 }, { "epoch": 0.6750841750841751, "grad_norm": 2.9936280250549316, "learning_rate": 1.8541583778796196e-06, "loss": 0.9794340133666992, "step": 802 }, { "epoch": 0.6767676767676768, "grad_norm": 11.732361793518066, "learning_rate": 1.8532453822967584e-06, "loss": 0.7467688322067261, "step": 804 }, { "epoch": 0.6784511784511784, "grad_norm": 14.32625675201416, "learning_rate": 1.8523297914666207e-06, "loss": 0.6042066812515259, "step": 806 }, { "epoch": 0.6801346801346801, "grad_norm": 8.490279197692871, "learning_rate": 1.8514116085438027e-06, "loss": 0.9197585582733154, "step": 808 }, { "epoch": 0.6818181818181818, "grad_norm": 8.056469917297363, "learning_rate": 1.8504908366918302e-06, "loss": 0.9674583077430725, "step": 810 }, { "epoch": 0.6835016835016835, "grad_norm": 3.1704888343811035, "learning_rate": 1.84956747908315e-06, "loss": 1.158250331878662, "step": 812 }, { "epoch": 0.6851851851851852, "grad_norm": 21.666156768798828, "learning_rate": 1.8486415388991173e-06, "loss": 0.5964489579200745, "step": 814 }, { "epoch": 0.6868686868686869, "grad_norm": 2.8897705078125, "learning_rate": 1.8477130193299863e-06, "loss": 1.0845026969909668, "step": 816 }, { "epoch": 0.6885521885521886, "grad_norm": 4.6460371017456055, "learning_rate": 1.846781923574897e-06, "loss": 0.7914435863494873, "step": 818 }, { "epoch": 0.6902356902356902, "grad_norm": 15.396445274353027, "learning_rate": 1.8458482548418661e-06, "loss": 0.6972349882125854, "step": 820 }, { "epoch": 0.6919191919191919, "grad_norm": 12.507894515991211, "learning_rate": 1.8449120163477753e-06, "loss": 0.7580819129943848, "step": 822 }, { "epoch": 0.6936026936026936, "grad_norm": 3.151318073272705, "learning_rate": 1.8439732113183607e-06, "loss": 0.8469318151473999, "step": 824 }, { "epoch": 0.6952861952861953, "grad_norm": 7.642462730407715, "learning_rate": 1.8430318429881997e-06, "loss": 0.8898569941520691, "step": 826 }, { "epoch": 0.696969696969697, "grad_norm": 3.9848973751068115, "learning_rate": 1.8420879146007025e-06, "loss": 0.7908803224563599, "step": 828 }, { "epoch": 0.6986531986531986, "grad_norm": 3.608306884765625, "learning_rate": 1.8411414294081003e-06, "loss": 1.208510398864746, "step": 830 }, { "epoch": 0.7003367003367004, "grad_norm": 5.017977237701416, "learning_rate": 1.8401923906714321e-06, "loss": 0.8827351331710815, "step": 832 }, { "epoch": 0.702020202020202, "grad_norm": 6.648691177368164, "learning_rate": 1.8392408016605358e-06, "loss": 0.6782714128494263, "step": 834 }, { "epoch": 0.7037037037037037, "grad_norm": 2.3364169597625732, "learning_rate": 1.8382866656540361e-06, "loss": 0.8600856065750122, "step": 836 }, { "epoch": 0.7053872053872053, "grad_norm": 8.938956260681152, "learning_rate": 1.8373299859393326e-06, "loss": 0.598077654838562, "step": 838 }, { "epoch": 0.7070707070707071, "grad_norm": 2.977544069290161, "learning_rate": 1.8363707658125905e-06, "loss": 1.239319920539856, "step": 840 }, { "epoch": 0.7087542087542088, "grad_norm": 8.318215370178223, "learning_rate": 1.8354090085787252e-06, "loss": 1.1046662330627441, "step": 842 }, { "epoch": 0.7104377104377104, "grad_norm": 11.01289176940918, "learning_rate": 1.8344447175513965e-06, "loss": 1.0052223205566406, "step": 844 }, { "epoch": 0.7121212121212122, "grad_norm": 4.2134199142456055, "learning_rate": 1.8334778960529916e-06, "loss": 0.8582904934883118, "step": 846 }, { "epoch": 0.7138047138047138, "grad_norm": 12.371885299682617, "learning_rate": 1.8325085474146178e-06, "loss": 0.7332583665847778, "step": 848 }, { "epoch": 0.7154882154882155, "grad_norm": 17.55687713623047, "learning_rate": 1.8315366749760892e-06, "loss": 0.8967425227165222, "step": 850 }, { "epoch": 0.7171717171717171, "grad_norm": 8.929709434509277, "learning_rate": 1.8305622820859153e-06, "loss": 0.7431824207305908, "step": 852 }, { "epoch": 0.7188552188552189, "grad_norm": 8.052350044250488, "learning_rate": 1.829585372101289e-06, "loss": 0.74913489818573, "step": 854 }, { "epoch": 0.7205387205387206, "grad_norm": 7.0438432693481445, "learning_rate": 1.828605948388077e-06, "loss": 1.1222918033599854, "step": 856 }, { "epoch": 0.7222222222222222, "grad_norm": 4.850925445556641, "learning_rate": 1.8276240143208054e-06, "loss": 0.7487032413482666, "step": 858 }, { "epoch": 0.7239057239057239, "grad_norm": 4.052372932434082, "learning_rate": 1.8266395732826508e-06, "loss": 0.9676373600959778, "step": 860 }, { "epoch": 0.7255892255892256, "grad_norm": 3.9550697803497314, "learning_rate": 1.8256526286654264e-06, "loss": 1.170372724533081, "step": 862 }, { "epoch": 0.7272727272727273, "grad_norm": 5.656938076019287, "learning_rate": 1.824663183869572e-06, "loss": 0.9866449236869812, "step": 864 }, { "epoch": 0.7289562289562289, "grad_norm": 38.397705078125, "learning_rate": 1.8236712423041408e-06, "loss": 0.9790170192718506, "step": 866 }, { "epoch": 0.7306397306397306, "grad_norm": 5.537583827972412, "learning_rate": 1.822676807386789e-06, "loss": 1.2290745973587036, "step": 868 }, { "epoch": 0.7323232323232324, "grad_norm": 5.6285080909729, "learning_rate": 1.8216798825437635e-06, "loss": 1.1579557657241821, "step": 870 }, { "epoch": 0.734006734006734, "grad_norm": 6.440390586853027, "learning_rate": 1.8206804712098903e-06, "loss": 1.0755215883255005, "step": 872 }, { "epoch": 0.7356902356902357, "grad_norm": 3.566018581390381, "learning_rate": 1.819678576828561e-06, "loss": 1.0724159479141235, "step": 874 }, { "epoch": 0.7373737373737373, "grad_norm": 76.55033111572266, "learning_rate": 1.8186742028517237e-06, "loss": 0.8843256235122681, "step": 876 }, { "epoch": 0.7390572390572391, "grad_norm": 12.517910957336426, "learning_rate": 1.8176673527398694e-06, "loss": 0.6147758960723877, "step": 878 }, { "epoch": 0.7407407407407407, "grad_norm": 16.583293914794922, "learning_rate": 1.8166580299620202e-06, "loss": 0.6138923764228821, "step": 880 }, { "epoch": 0.7424242424242424, "grad_norm": 2.747283935546875, "learning_rate": 1.815646237995718e-06, "loss": 1.1428195238113403, "step": 882 }, { "epoch": 0.7441077441077442, "grad_norm": 92.89835357666016, "learning_rate": 1.814631980327012e-06, "loss": 1.0840024948120117, "step": 884 }, { "epoch": 0.7457912457912458, "grad_norm": 4.928184509277344, "learning_rate": 1.813615260450446e-06, "loss": 0.646350622177124, "step": 886 }, { "epoch": 0.7474747474747475, "grad_norm": 2.7117934226989746, "learning_rate": 1.8125960818690485e-06, "loss": 0.991912841796875, "step": 888 }, { "epoch": 0.7491582491582491, "grad_norm": 17.214120864868164, "learning_rate": 1.811574448094318e-06, "loss": 0.8976044058799744, "step": 890 }, { "epoch": 0.7508417508417509, "grad_norm": 27.415754318237305, "learning_rate": 1.8105503626462129e-06, "loss": 0.9429522752761841, "step": 892 }, { "epoch": 0.7525252525252525, "grad_norm": 7.359311580657959, "learning_rate": 1.8095238290531385e-06, "loss": 0.7071723937988281, "step": 894 }, { "epoch": 0.7542087542087542, "grad_norm": 8.892601013183594, "learning_rate": 1.8084948508519346e-06, "loss": 0.8216047286987305, "step": 896 }, { "epoch": 0.7558922558922558, "grad_norm": 62.057533264160156, "learning_rate": 1.8074634315878644e-06, "loss": 0.6230831146240234, "step": 898 }, { "epoch": 0.7575757575757576, "grad_norm": 2.9089205265045166, "learning_rate": 1.8064295748146014e-06, "loss": 0.8760740160942078, "step": 900 }, { "epoch": 0.7592592592592593, "grad_norm": 14.121993064880371, "learning_rate": 1.8053932840942175e-06, "loss": 0.6401835680007935, "step": 902 }, { "epoch": 0.7609427609427609, "grad_norm": 5.602692604064941, "learning_rate": 1.8043545629971689e-06, "loss": 1.1890406608581543, "step": 904 }, { "epoch": 0.7626262626262627, "grad_norm": 3.653724431991577, "learning_rate": 1.8033134151022881e-06, "loss": 0.8872392177581787, "step": 906 }, { "epoch": 0.7643097643097643, "grad_norm": 5.650278568267822, "learning_rate": 1.8022698439967673e-06, "loss": 0.8761744499206543, "step": 908 }, { "epoch": 0.765993265993266, "grad_norm": 3.7810301780700684, "learning_rate": 1.8012238532761476e-06, "loss": 0.8327740430831909, "step": 910 }, { "epoch": 0.7676767676767676, "grad_norm": 16.808286666870117, "learning_rate": 1.8001754465443078e-06, "loss": 0.9591882228851318, "step": 912 }, { "epoch": 0.7693602693602694, "grad_norm": 8.369492530822754, "learning_rate": 1.79912462741345e-06, "loss": 0.8368163704872131, "step": 914 }, { "epoch": 0.7710437710437711, "grad_norm": 8.313328742980957, "learning_rate": 1.798071399504088e-06, "loss": 0.9555743336677551, "step": 916 }, { "epoch": 0.7727272727272727, "grad_norm": 4.798566818237305, "learning_rate": 1.7970157664450357e-06, "loss": 0.6112362146377563, "step": 918 }, { "epoch": 0.7744107744107744, "grad_norm": 18.712345123291016, "learning_rate": 1.7959577318733925e-06, "loss": 0.5020445585250854, "step": 920 }, { "epoch": 0.7760942760942761, "grad_norm": 2.20595383644104, "learning_rate": 1.7948972994345328e-06, "loss": 0.6102715134620667, "step": 922 }, { "epoch": 0.7777777777777778, "grad_norm": 6.520366191864014, "learning_rate": 1.7938344727820928e-06, "loss": 0.9018456935882568, "step": 924 }, { "epoch": 0.7794612794612794, "grad_norm": 12.06176471710205, "learning_rate": 1.7927692555779577e-06, "loss": 1.130429744720459, "step": 926 }, { "epoch": 0.7811447811447811, "grad_norm": 4.479389190673828, "learning_rate": 1.791701651492248e-06, "loss": 0.46166184544563293, "step": 928 }, { "epoch": 0.7828282828282829, "grad_norm": 9.225821495056152, "learning_rate": 1.7906316642033099e-06, "loss": 1.3147855997085571, "step": 930 }, { "epoch": 0.7845117845117845, "grad_norm": 13.229998588562012, "learning_rate": 1.7895592973976998e-06, "loss": 0.8350358605384827, "step": 932 }, { "epoch": 0.7861952861952862, "grad_norm": 2.252268075942993, "learning_rate": 1.7884845547701721e-06, "loss": 0.991974949836731, "step": 934 }, { "epoch": 0.7878787878787878, "grad_norm": 2.7008936405181885, "learning_rate": 1.7874074400236677e-06, "loss": 0.8550293445587158, "step": 936 }, { "epoch": 0.7895622895622896, "grad_norm": 2.743255376815796, "learning_rate": 1.7863279568692999e-06, "loss": 0.8677815198898315, "step": 938 }, { "epoch": 0.7912457912457912, "grad_norm": 5.70646858215332, "learning_rate": 1.7852461090263422e-06, "loss": 0.9757652282714844, "step": 940 }, { "epoch": 0.7929292929292929, "grad_norm": 4.7707200050354, "learning_rate": 1.7841619002222164e-06, "loss": 0.4027637541294098, "step": 942 }, { "epoch": 0.7946127946127947, "grad_norm": 2.5659232139587402, "learning_rate": 1.7830753341924768e-06, "loss": 0.8958191275596619, "step": 944 }, { "epoch": 0.7962962962962963, "grad_norm": 5.1869049072265625, "learning_rate": 1.781986414680802e-06, "loss": 0.8641246557235718, "step": 946 }, { "epoch": 0.797979797979798, "grad_norm": 2.7495296001434326, "learning_rate": 1.7808951454389761e-06, "loss": 1.0250309705734253, "step": 948 }, { "epoch": 0.7996632996632996, "grad_norm": 8.12884521484375, "learning_rate": 1.7798015302268826e-06, "loss": 0.8447544574737549, "step": 950 }, { "epoch": 0.8013468013468014, "grad_norm": 9.825166702270508, "learning_rate": 1.7787055728124853e-06, "loss": 0.44982272386550903, "step": 952 }, { "epoch": 0.803030303030303, "grad_norm": 2.7511558532714844, "learning_rate": 1.777607276971818e-06, "loss": 0.934439480304718, "step": 954 }, { "epoch": 0.8047138047138047, "grad_norm": 10.230318069458008, "learning_rate": 1.7765066464889729e-06, "loss": 0.9457552433013916, "step": 956 }, { "epoch": 0.8063973063973064, "grad_norm": 11.444622039794922, "learning_rate": 1.775403685156085e-06, "loss": 1.083388090133667, "step": 958 }, { "epoch": 0.8080808080808081, "grad_norm": 6.961023330688477, "learning_rate": 1.77429839677332e-06, "loss": 0.6390881538391113, "step": 960 }, { "epoch": 0.8097643097643098, "grad_norm": 3.448756217956543, "learning_rate": 1.773190785148861e-06, "loss": 0.7549522519111633, "step": 962 }, { "epoch": 0.8114478114478114, "grad_norm": 9.252376556396484, "learning_rate": 1.7720808540988965e-06, "loss": 0.6879374980926514, "step": 964 }, { "epoch": 0.8131313131313131, "grad_norm": 2.4772350788116455, "learning_rate": 1.770968607447606e-06, "loss": 0.9675562977790833, "step": 966 }, { "epoch": 0.8148148148148148, "grad_norm": 4.749292850494385, "learning_rate": 1.7698540490271475e-06, "loss": 1.174008846282959, "step": 968 }, { "epoch": 0.8164983164983165, "grad_norm": 2.8017964363098145, "learning_rate": 1.7687371826776432e-06, "loss": 0.9735618829727173, "step": 970 }, { "epoch": 0.8181818181818182, "grad_norm": 26.424652099609375, "learning_rate": 1.7676180122471677e-06, "loss": 0.9349749088287354, "step": 972 }, { "epoch": 0.8198653198653199, "grad_norm": 3.5407838821411133, "learning_rate": 1.7664965415917342e-06, "loss": 0.7211604714393616, "step": 974 }, { "epoch": 0.8215488215488216, "grad_norm": 4.120766639709473, "learning_rate": 1.765372774575281e-06, "loss": 0.9185746908187866, "step": 976 }, { "epoch": 0.8232323232323232, "grad_norm": 2.634417772293091, "learning_rate": 1.764246715069658e-06, "loss": 1.179499626159668, "step": 978 }, { "epoch": 0.8249158249158249, "grad_norm": 4.83583927154541, "learning_rate": 1.7631183669546146e-06, "loss": 1.140142798423767, "step": 980 }, { "epoch": 0.8265993265993266, "grad_norm": 4.506636142730713, "learning_rate": 1.761987734117784e-06, "loss": 1.0069242715835571, "step": 982 }, { "epoch": 0.8282828282828283, "grad_norm": 4.123355388641357, "learning_rate": 1.7608548204546724e-06, "loss": 1.0207629203796387, "step": 984 }, { "epoch": 0.82996632996633, "grad_norm": 16.72430419921875, "learning_rate": 1.7597196298686446e-06, "loss": 0.9050367474555969, "step": 986 }, { "epoch": 0.8316498316498316, "grad_norm": 19.312665939331055, "learning_rate": 1.7585821662709088e-06, "loss": 0.8223767280578613, "step": 988 }, { "epoch": 0.8333333333333334, "grad_norm": 13.517312049865723, "learning_rate": 1.7574424335805066e-06, "loss": 0.8045912384986877, "step": 990 }, { "epoch": 0.835016835016835, "grad_norm": 10.205414772033691, "learning_rate": 1.7563004357242962e-06, "loss": 0.6719659566879272, "step": 992 }, { "epoch": 0.8367003367003367, "grad_norm": 2.9161360263824463, "learning_rate": 1.755156176636941e-06, "loss": 0.9085012674331665, "step": 994 }, { "epoch": 0.8383838383838383, "grad_norm": 6.5189714431762695, "learning_rate": 1.7540096602608946e-06, "loss": 0.6452804803848267, "step": 996 }, { "epoch": 0.8400673400673401, "grad_norm": 12.399802207946777, "learning_rate": 1.7528608905463881e-06, "loss": 0.8944587707519531, "step": 998 }, { "epoch": 0.8417508417508418, "grad_norm": 2.180464029312134, "learning_rate": 1.7517098714514175e-06, "loss": 1.0595688819885254, "step": 1000 }, { "epoch": 0.8434343434343434, "grad_norm": 2.6750636100769043, "learning_rate": 1.7505566069417272e-06, "loss": 0.7289663553237915, "step": 1002 }, { "epoch": 0.8451178451178452, "grad_norm": 3.7006335258483887, "learning_rate": 1.749401100990799e-06, "loss": 0.6673641204833984, "step": 1004 }, { "epoch": 0.8468013468013468, "grad_norm": 5.355027675628662, "learning_rate": 1.748243357579837e-06, "loss": 1.0645616054534912, "step": 1006 }, { "epoch": 0.8484848484848485, "grad_norm": 3.5920186042785645, "learning_rate": 1.747083380697754e-06, "loss": 0.5822446346282959, "step": 1008 }, { "epoch": 0.8501683501683501, "grad_norm": 11.027798652648926, "learning_rate": 1.7459211743411589e-06, "loss": 0.9186769723892212, "step": 1010 }, { "epoch": 0.8518518518518519, "grad_norm": 12.822773933410645, "learning_rate": 1.7447567425143413e-06, "loss": 0.8671125769615173, "step": 1012 }, { "epoch": 0.8535353535353535, "grad_norm": 6.343443393707275, "learning_rate": 1.7435900892292593e-06, "loss": 0.7743659019470215, "step": 1014 }, { "epoch": 0.8552188552188552, "grad_norm": 19.62537384033203, "learning_rate": 1.7424212185055236e-06, "loss": 0.627282977104187, "step": 1016 }, { "epoch": 0.8569023569023569, "grad_norm": 11.67722225189209, "learning_rate": 1.7412501343703858e-06, "loss": 0.9576413631439209, "step": 1018 }, { "epoch": 0.8585858585858586, "grad_norm": 4.532960414886475, "learning_rate": 1.740076840858724e-06, "loss": 1.2340772151947021, "step": 1020 }, { "epoch": 0.8602693602693603, "grad_norm": 6.776996612548828, "learning_rate": 1.7389013420130278e-06, "loss": 0.8961556553840637, "step": 1022 }, { "epoch": 0.8619528619528619, "grad_norm": 5.019154071807861, "learning_rate": 1.7377236418833855e-06, "loss": 0.9290032386779785, "step": 1024 }, { "epoch": 0.8636363636363636, "grad_norm": 6.809414863586426, "learning_rate": 1.736543744527469e-06, "loss": 0.8829033374786377, "step": 1026 }, { "epoch": 0.8653198653198653, "grad_norm": 7.587875843048096, "learning_rate": 1.7353616540105214e-06, "loss": 0.950920581817627, "step": 1028 }, { "epoch": 0.867003367003367, "grad_norm": 2.2208216190338135, "learning_rate": 1.7341773744053423e-06, "loss": 0.621329128742218, "step": 1030 }, { "epoch": 0.8686868686868687, "grad_norm": 2.647271156311035, "learning_rate": 1.7329909097922726e-06, "loss": 0.8295049667358398, "step": 1032 }, { "epoch": 0.8703703703703703, "grad_norm": 4.676586151123047, "learning_rate": 1.7318022642591826e-06, "loss": 0.9272868633270264, "step": 1034 }, { "epoch": 0.8720538720538721, "grad_norm": 2.2150022983551025, "learning_rate": 1.730611441901456e-06, "loss": 0.9140334129333496, "step": 1036 }, { "epoch": 0.8737373737373737, "grad_norm": 6.269838809967041, "learning_rate": 1.7294184468219768e-06, "loss": 1.0908087491989136, "step": 1038 }, { "epoch": 0.8754208754208754, "grad_norm": 7.025053024291992, "learning_rate": 1.728223283131116e-06, "loss": 1.0729374885559082, "step": 1040 }, { "epoch": 0.877104377104377, "grad_norm": 21.954816818237305, "learning_rate": 1.727025954946714e-06, "loss": 0.9535812139511108, "step": 1042 }, { "epoch": 0.8787878787878788, "grad_norm": 11.953475952148438, "learning_rate": 1.7258264663940706e-06, "loss": 1.0482563972473145, "step": 1044 }, { "epoch": 0.8804713804713805, "grad_norm": 2.643186092376709, "learning_rate": 1.724624821605929e-06, "loss": 1.0523911714553833, "step": 1046 }, { "epoch": 0.8821548821548821, "grad_norm": 21.0428409576416, "learning_rate": 1.7234210247224608e-06, "loss": 0.9557990431785583, "step": 1048 }, { "epoch": 0.8838383838383839, "grad_norm": 13.664984703063965, "learning_rate": 1.7222150798912527e-06, "loss": 0.7585754990577698, "step": 1050 }, { "epoch": 0.8855218855218855, "grad_norm": 7.943265438079834, "learning_rate": 1.7210069912672924e-06, "loss": 0.9970930218696594, "step": 1052 }, { "epoch": 0.8872053872053872, "grad_norm": 13.632953643798828, "learning_rate": 1.7197967630129533e-06, "loss": 0.5471928715705872, "step": 1054 }, { "epoch": 0.8888888888888888, "grad_norm": 2.960538625717163, "learning_rate": 1.7185843992979805e-06, "loss": 0.9481421113014221, "step": 1056 }, { "epoch": 0.8905723905723906, "grad_norm": 2.4345412254333496, "learning_rate": 1.7173699042994778e-06, "loss": 0.9089041948318481, "step": 1058 }, { "epoch": 0.8922558922558923, "grad_norm": 8.801026344299316, "learning_rate": 1.716153282201891e-06, "loss": 0.958892822265625, "step": 1060 }, { "epoch": 0.8939393939393939, "grad_norm": 6.052116870880127, "learning_rate": 1.7149345371969958e-06, "loss": 0.8855940699577332, "step": 1062 }, { "epoch": 0.8956228956228957, "grad_norm": 29.812705993652344, "learning_rate": 1.7137136734838809e-06, "loss": 0.8104236125946045, "step": 1064 }, { "epoch": 0.8973063973063973, "grad_norm": 7.219144344329834, "learning_rate": 1.7124906952689354e-06, "loss": 1.0544826984405518, "step": 1066 }, { "epoch": 0.898989898989899, "grad_norm": 8.607142448425293, "learning_rate": 1.7112656067658345e-06, "loss": 0.7836295366287231, "step": 1068 }, { "epoch": 0.9006734006734006, "grad_norm": 3.9157323837280273, "learning_rate": 1.7100384121955229e-06, "loss": 0.9466323852539062, "step": 1070 }, { "epoch": 0.9023569023569024, "grad_norm": 3.7519919872283936, "learning_rate": 1.7088091157862026e-06, "loss": 1.1859047412872314, "step": 1072 }, { "epoch": 0.9040404040404041, "grad_norm": 22.836341857910156, "learning_rate": 1.7075777217733169e-06, "loss": 0.8282434344291687, "step": 1074 }, { "epoch": 0.9057239057239057, "grad_norm": 7.615798473358154, "learning_rate": 1.7063442343995361e-06, "loss": 0.4293259382247925, "step": 1076 }, { "epoch": 0.9074074074074074, "grad_norm": 14.713326454162598, "learning_rate": 1.7051086579147436e-06, "loss": 1.0748696327209473, "step": 1078 }, { "epoch": 0.9090909090909091, "grad_norm": 6.053645610809326, "learning_rate": 1.7038709965760198e-06, "loss": 0.9073866605758667, "step": 1080 }, { "epoch": 0.9107744107744108, "grad_norm": 3.388359785079956, "learning_rate": 1.7026312546476292e-06, "loss": 0.9109467267990112, "step": 1082 }, { "epoch": 0.9124579124579124, "grad_norm": 9.549911499023438, "learning_rate": 1.701389436401004e-06, "loss": 0.697003960609436, "step": 1084 }, { "epoch": 0.9141414141414141, "grad_norm": 3.0203182697296143, "learning_rate": 1.700145546114731e-06, "loss": 1.1409720182418823, "step": 1086 }, { "epoch": 0.9158249158249159, "grad_norm": 8.64733600616455, "learning_rate": 1.698899588074535e-06, "loss": 0.8965491056442261, "step": 1088 }, { "epoch": 0.9175084175084175, "grad_norm": 2.4151153564453125, "learning_rate": 1.6976515665732663e-06, "loss": 0.9052882790565491, "step": 1090 }, { "epoch": 0.9191919191919192, "grad_norm": 6.9435224533081055, "learning_rate": 1.6964014859108837e-06, "loss": 1.0003384351730347, "step": 1092 }, { "epoch": 0.9208754208754208, "grad_norm": 4.513472557067871, "learning_rate": 1.6951493503944414e-06, "loss": 0.8998319506645203, "step": 1094 }, { "epoch": 0.9225589225589226, "grad_norm": 11.51063060760498, "learning_rate": 1.693895164338073e-06, "loss": 0.7377707958221436, "step": 1096 }, { "epoch": 0.9242424242424242, "grad_norm": 6.038638591766357, "learning_rate": 1.6926389320629768e-06, "loss": 0.3615678548812866, "step": 1098 }, { "epoch": 0.9259259259259259, "grad_norm": 6.45628023147583, "learning_rate": 1.6913806578974016e-06, "loss": 0.9533661603927612, "step": 1100 }, { "epoch": 0.9276094276094277, "grad_norm": 11.960179328918457, "learning_rate": 1.690120346176632e-06, "loss": 0.5207856893539429, "step": 1102 }, { "epoch": 0.9292929292929293, "grad_norm": 5.242334842681885, "learning_rate": 1.6888580012429717e-06, "loss": 1.1098419427871704, "step": 1104 }, { "epoch": 0.930976430976431, "grad_norm": 2.832732677459717, "learning_rate": 1.68759362744573e-06, "loss": 1.0050939321517944, "step": 1106 }, { "epoch": 0.9326599326599326, "grad_norm": 16.379804611206055, "learning_rate": 1.686327229141207e-06, "loss": 0.7864120602607727, "step": 1108 }, { "epoch": 0.9343434343434344, "grad_norm": 6.306436538696289, "learning_rate": 1.6850588106926773e-06, "loss": 1.20371413230896, "step": 1110 }, { "epoch": 0.936026936026936, "grad_norm": 5.394667625427246, "learning_rate": 1.6837883764703765e-06, "loss": 1.1867024898529053, "step": 1112 }, { "epoch": 0.9377104377104377, "grad_norm": 4.2957305908203125, "learning_rate": 1.6825159308514847e-06, "loss": 1.0403316020965576, "step": 1114 }, { "epoch": 0.9393939393939394, "grad_norm": 3.2342448234558105, "learning_rate": 1.6812414782201127e-06, "loss": 1.1196048259735107, "step": 1116 }, { "epoch": 0.9410774410774411, "grad_norm": 4.326461315155029, "learning_rate": 1.6799650229672862e-06, "loss": 0.9937688708305359, "step": 1118 }, { "epoch": 0.9427609427609428, "grad_norm": 8.076350212097168, "learning_rate": 1.6786865694909301e-06, "loss": 1.2609586715698242, "step": 1120 }, { "epoch": 0.9444444444444444, "grad_norm": 2.848473310470581, "learning_rate": 1.6774061221958552e-06, "loss": 0.7693970203399658, "step": 1122 }, { "epoch": 0.9461279461279462, "grad_norm": 21.549283981323242, "learning_rate": 1.6761236854937406e-06, "loss": 0.8295996189117432, "step": 1124 }, { "epoch": 0.9478114478114478, "grad_norm": 3.2013320922851562, "learning_rate": 1.674839263803121e-06, "loss": 0.8039145469665527, "step": 1126 }, { "epoch": 0.9494949494949495, "grad_norm": 4.9571099281311035, "learning_rate": 1.6735528615493686e-06, "loss": 0.9634122848510742, "step": 1128 }, { "epoch": 0.9511784511784511, "grad_norm": 16.527570724487305, "learning_rate": 1.6722644831646815e-06, "loss": 0.79341059923172, "step": 1130 }, { "epoch": 0.9528619528619529, "grad_norm": 2.471346855163574, "learning_rate": 1.6709741330880644e-06, "loss": 0.9218388795852661, "step": 1132 }, { "epoch": 0.9545454545454546, "grad_norm": 30.464435577392578, "learning_rate": 1.6696818157653172e-06, "loss": 0.946638286113739, "step": 1134 }, { "epoch": 0.9562289562289562, "grad_norm": 6.8406453132629395, "learning_rate": 1.6683875356490157e-06, "loss": 0.8108268976211548, "step": 1136 }, { "epoch": 0.9579124579124579, "grad_norm": 5.6103620529174805, "learning_rate": 1.6670912971985002e-06, "loss": 0.6951830387115479, "step": 1138 }, { "epoch": 0.9595959595959596, "grad_norm": 4.111386299133301, "learning_rate": 1.6657931048798576e-06, "loss": 0.5389662384986877, "step": 1140 }, { "epoch": 0.9612794612794613, "grad_norm": 5.01594352722168, "learning_rate": 1.6644929631659061e-06, "loss": 0.8873554468154907, "step": 1142 }, { "epoch": 0.9629629629629629, "grad_norm": 5.6005096435546875, "learning_rate": 1.6631908765361818e-06, "loss": 0.5947662591934204, "step": 1144 }, { "epoch": 0.9646464646464646, "grad_norm": 4.118565082550049, "learning_rate": 1.6618868494769202e-06, "loss": 0.8753615617752075, "step": 1146 }, { "epoch": 0.9663299663299664, "grad_norm": 10.705119132995605, "learning_rate": 1.6605808864810437e-06, "loss": 0.7432312965393066, "step": 1148 }, { "epoch": 0.968013468013468, "grad_norm": 6.360631465911865, "learning_rate": 1.6592729920481443e-06, "loss": 0.9374081492424011, "step": 1150 }, { "epoch": 0.9696969696969697, "grad_norm": 22.604328155517578, "learning_rate": 1.6579631706844683e-06, "loss": 0.5783393383026123, "step": 1152 }, { "epoch": 0.9713804713804713, "grad_norm": 10.371187210083008, "learning_rate": 1.6566514269029015e-06, "loss": 0.8774973750114441, "step": 1154 }, { "epoch": 0.9730639730639731, "grad_norm": 2.2685441970825195, "learning_rate": 1.6553377652229536e-06, "loss": 0.5517897605895996, "step": 1156 }, { "epoch": 0.9747474747474747, "grad_norm": 15.745230674743652, "learning_rate": 1.6540221901707413e-06, "loss": 0.9311755895614624, "step": 1158 }, { "epoch": 0.9764309764309764, "grad_norm": 6.642886161804199, "learning_rate": 1.6527047062789743e-06, "loss": 0.4048464298248291, "step": 1160 }, { "epoch": 0.9781144781144782, "grad_norm": 23.364538192749023, "learning_rate": 1.6513853180869391e-06, "loss": 1.0577645301818848, "step": 1162 }, { "epoch": 0.9797979797979798, "grad_norm": 2.7986645698547363, "learning_rate": 1.6500640301404832e-06, "loss": 0.6768155694007874, "step": 1164 }, { "epoch": 0.9814814814814815, "grad_norm": 4.8387131690979, "learning_rate": 1.6487408469919992e-06, "loss": 0.7736034393310547, "step": 1166 }, { "epoch": 0.9831649831649831, "grad_norm": 4.39155387878418, "learning_rate": 1.6474157732004101e-06, "loss": 0.7835286855697632, "step": 1168 }, { "epoch": 0.9848484848484849, "grad_norm": 4.676360607147217, "learning_rate": 1.6460888133311526e-06, "loss": 0.8302567005157471, "step": 1170 }, { "epoch": 0.9865319865319865, "grad_norm": 3.651604413986206, "learning_rate": 1.6447599719561616e-06, "loss": 0.6171858310699463, "step": 1172 }, { "epoch": 0.9882154882154882, "grad_norm": 3.5588345527648926, "learning_rate": 1.6434292536538547e-06, "loss": 0.8998767137527466, "step": 1174 }, { "epoch": 0.98989898989899, "grad_norm": 11.439290046691895, "learning_rate": 1.6420966630091168e-06, "loss": 0.41087231040000916, "step": 1176 }, { "epoch": 0.9915824915824916, "grad_norm": 11.601485252380371, "learning_rate": 1.6407622046132831e-06, "loss": 1.0380841493606567, "step": 1178 }, { "epoch": 0.9932659932659933, "grad_norm": 7.792235374450684, "learning_rate": 1.6394258830641243e-06, "loss": 0.43105313181877136, "step": 1180 }, { "epoch": 0.9949494949494949, "grad_norm": 21.001230239868164, "learning_rate": 1.6380877029658303e-06, "loss": 0.8770669102668762, "step": 1182 }, { "epoch": 0.9966329966329966, "grad_norm": 13.98222827911377, "learning_rate": 1.6367476689289947e-06, "loss": 0.9919424057006836, "step": 1184 }, { "epoch": 0.9983164983164983, "grad_norm": 36.48440933227539, "learning_rate": 1.6354057855705984e-06, "loss": 0.6105228066444397, "step": 1186 }, { "epoch": 1.0, "grad_norm": 9.162494659423828, "learning_rate": 1.6340620575139947e-06, "loss": 0.7021905183792114, "step": 1188 }, { "epoch": 1.0016835016835017, "grad_norm": 6.610725402832031, "learning_rate": 1.6327164893888913e-06, "loss": 0.3793674111366272, "step": 1190 }, { "epoch": 1.0033670033670035, "grad_norm": 6.908663272857666, "learning_rate": 1.6313690858313374e-06, "loss": 0.39230918884277344, "step": 1192 }, { "epoch": 1.005050505050505, "grad_norm": 2.9396955966949463, "learning_rate": 1.6300198514837045e-06, "loss": 1.0317349433898926, "step": 1194 }, { "epoch": 1.0067340067340067, "grad_norm": 12.543563842773438, "learning_rate": 1.6286687909946732e-06, "loss": 0.8607063293457031, "step": 1196 }, { "epoch": 1.0084175084175084, "grad_norm": 3.976856231689453, "learning_rate": 1.6273159090192152e-06, "loss": 0.9927105903625488, "step": 1198 }, { "epoch": 1.0101010101010102, "grad_norm": 7.6159348487854, "learning_rate": 1.6259612102185778e-06, "loss": 1.056520938873291, "step": 1200 }, { "epoch": 1.0117845117845117, "grad_norm": 13.293722152709961, "learning_rate": 1.6246046992602685e-06, "loss": 0.9043182134628296, "step": 1202 }, { "epoch": 1.0134680134680134, "grad_norm": 7.976161003112793, "learning_rate": 1.6232463808180385e-06, "loss": 0.8953118920326233, "step": 1204 }, { "epoch": 1.0151515151515151, "grad_norm": 14.81564998626709, "learning_rate": 1.6218862595718664e-06, "loss": 1.0292134284973145, "step": 1206 }, { "epoch": 1.0168350168350169, "grad_norm": 8.188558578491211, "learning_rate": 1.620524340207942e-06, "loss": 0.5569553375244141, "step": 1208 }, { "epoch": 1.0185185185185186, "grad_norm": 7.264322757720947, "learning_rate": 1.6191606274186504e-06, "loss": 0.5535443425178528, "step": 1210 }, { "epoch": 1.02020202020202, "grad_norm": 5.589961528778076, "learning_rate": 1.6177951259025562e-06, "loss": 0.5485536456108093, "step": 1212 }, { "epoch": 1.0218855218855218, "grad_norm": 6.598013401031494, "learning_rate": 1.6164278403643867e-06, "loss": 0.7326016426086426, "step": 1214 }, { "epoch": 1.0235690235690236, "grad_norm": 2.7756152153015137, "learning_rate": 1.6150587755150158e-06, "loss": 0.4036520719528198, "step": 1216 }, { "epoch": 1.0252525252525253, "grad_norm": 12.781232833862305, "learning_rate": 1.6136879360714478e-06, "loss": 0.8799995183944702, "step": 1218 }, { "epoch": 1.026936026936027, "grad_norm": 2.7762389183044434, "learning_rate": 1.612315326756802e-06, "loss": 0.7381196022033691, "step": 1220 }, { "epoch": 1.0286195286195285, "grad_norm": 11.140121459960938, "learning_rate": 1.6109409523002942e-06, "loss": 0.8362076282501221, "step": 1222 }, { "epoch": 1.0303030303030303, "grad_norm": 2.88662052154541, "learning_rate": 1.6095648174372231e-06, "loss": 1.0976812839508057, "step": 1224 }, { "epoch": 1.031986531986532, "grad_norm": 3.13314151763916, "learning_rate": 1.6081869269089522e-06, "loss": 0.709804892539978, "step": 1226 }, { "epoch": 1.0336700336700337, "grad_norm": 5.350557327270508, "learning_rate": 1.606807285462894e-06, "loss": 0.8039405941963196, "step": 1228 }, { "epoch": 1.0353535353535352, "grad_norm": 2.7725930213928223, "learning_rate": 1.6054258978524943e-06, "loss": 0.8068400025367737, "step": 1230 }, { "epoch": 1.037037037037037, "grad_norm": 29.508012771606445, "learning_rate": 1.6040427688372143e-06, "loss": 0.47366365790367126, "step": 1232 }, { "epoch": 1.0387205387205387, "grad_norm": 1.3913285732269287, "learning_rate": 1.602657903182515e-06, "loss": 0.7617353796958923, "step": 1234 }, { "epoch": 1.0404040404040404, "grad_norm": 4.6602630615234375, "learning_rate": 1.6012713056598423e-06, "loss": 0.7685100436210632, "step": 1236 }, { "epoch": 1.0420875420875422, "grad_norm": 3.482510805130005, "learning_rate": 1.599882981046607e-06, "loss": 0.512657105922699, "step": 1238 }, { "epoch": 1.0437710437710437, "grad_norm": 3.340650796890259, "learning_rate": 1.5984929341261724e-06, "loss": 1.0025690793991089, "step": 1240 }, { "epoch": 1.0454545454545454, "grad_norm": 5.567379474639893, "learning_rate": 1.5971011696878342e-06, "loss": 0.9806394577026367, "step": 1242 }, { "epoch": 1.0471380471380471, "grad_norm": 7.478330612182617, "learning_rate": 1.5957076925268072e-06, "loss": 0.6606462001800537, "step": 1244 }, { "epoch": 1.0488215488215489, "grad_norm": 5.497067451477051, "learning_rate": 1.5943125074442064e-06, "loss": 0.6403665542602539, "step": 1246 }, { "epoch": 1.0505050505050506, "grad_norm": 5.195033550262451, "learning_rate": 1.5929156192470313e-06, "loss": 0.8676759004592896, "step": 1248 }, { "epoch": 1.0521885521885521, "grad_norm": 3.5050344467163086, "learning_rate": 1.5915170327481491e-06, "loss": 0.7130298614501953, "step": 1250 }, { "epoch": 1.0538720538720538, "grad_norm": 6.229882717132568, "learning_rate": 1.5901167527662796e-06, "loss": 0.6191893815994263, "step": 1252 }, { "epoch": 1.0555555555555556, "grad_norm": 3.6591920852661133, "learning_rate": 1.5887147841259758e-06, "loss": 0.9453639388084412, "step": 1254 }, { "epoch": 1.0572390572390573, "grad_norm": 8.242814064025879, "learning_rate": 1.5873111316576102e-06, "loss": 0.711391270160675, "step": 1256 }, { "epoch": 1.0589225589225588, "grad_norm": 9.809550285339355, "learning_rate": 1.5859058001973555e-06, "loss": 0.5224330425262451, "step": 1258 }, { "epoch": 1.0606060606060606, "grad_norm": 8.671676635742188, "learning_rate": 1.5844987945871701e-06, "loss": 0.736186146736145, "step": 1260 }, { "epoch": 1.0622895622895623, "grad_norm": 8.753976821899414, "learning_rate": 1.5830901196747805e-06, "loss": 0.632482647895813, "step": 1262 }, { "epoch": 1.063973063973064, "grad_norm": 3.3778975009918213, "learning_rate": 1.5816797803136647e-06, "loss": 0.7275056838989258, "step": 1264 }, { "epoch": 1.0656565656565657, "grad_norm": 6.493520736694336, "learning_rate": 1.5802677813630348e-06, "loss": 0.7164782285690308, "step": 1266 }, { "epoch": 1.0673400673400673, "grad_norm": 12.627816200256348, "learning_rate": 1.5788541276878212e-06, "loss": 0.5824927687644958, "step": 1268 }, { "epoch": 1.069023569023569, "grad_norm": 7.747696876525879, "learning_rate": 1.577438824158656e-06, "loss": 0.5714269876480103, "step": 1270 }, { "epoch": 1.0707070707070707, "grad_norm": 12.949309349060059, "learning_rate": 1.5760218756518548e-06, "loss": 0.7176691293716431, "step": 1272 }, { "epoch": 1.0723905723905724, "grad_norm": 6.077565670013428, "learning_rate": 1.5746032870494022e-06, "loss": 0.4697990417480469, "step": 1274 }, { "epoch": 1.074074074074074, "grad_norm": 4.4054155349731445, "learning_rate": 1.5731830632389322e-06, "loss": 0.6759170293807983, "step": 1276 }, { "epoch": 1.0757575757575757, "grad_norm": 62.43513488769531, "learning_rate": 1.5717612091137137e-06, "loss": 0.9693543910980225, "step": 1278 }, { "epoch": 1.0774410774410774, "grad_norm": 27.173269271850586, "learning_rate": 1.570337729572632e-06, "loss": 0.4767664670944214, "step": 1280 }, { "epoch": 1.0791245791245792, "grad_norm": 6.065430164337158, "learning_rate": 1.5689126295201738e-06, "loss": 0.33717769384384155, "step": 1282 }, { "epoch": 1.0808080808080809, "grad_norm": 5.10385799407959, "learning_rate": 1.5674859138664076e-06, "loss": 0.9727071523666382, "step": 1284 }, { "epoch": 1.0824915824915824, "grad_norm": 4.059802055358887, "learning_rate": 1.5660575875269696e-06, "loss": 0.7808531522750854, "step": 1286 }, { "epoch": 1.0841750841750841, "grad_norm": 3.3735897541046143, "learning_rate": 1.5646276554230454e-06, "loss": 0.5864525437355042, "step": 1288 }, { "epoch": 1.0858585858585859, "grad_norm": 3.3175692558288574, "learning_rate": 1.563196122481352e-06, "loss": 0.6308066844940186, "step": 1290 }, { "epoch": 1.0875420875420876, "grad_norm": 8.797651290893555, "learning_rate": 1.5617629936341225e-06, "loss": 1.049008846282959, "step": 1292 }, { "epoch": 1.0892255892255893, "grad_norm": 7.429879188537598, "learning_rate": 1.5603282738190898e-06, "loss": 0.766440749168396, "step": 1294 }, { "epoch": 1.0909090909090908, "grad_norm": 14.650995254516602, "learning_rate": 1.5588919679794668e-06, "loss": 0.5494952201843262, "step": 1296 }, { "epoch": 1.0925925925925926, "grad_norm": 7.310492515563965, "learning_rate": 1.5574540810639312e-06, "loss": 0.5477076768875122, "step": 1298 }, { "epoch": 1.0942760942760943, "grad_norm": 21.442401885986328, "learning_rate": 1.556014618026609e-06, "loss": 0.6048269271850586, "step": 1300 }, { "epoch": 1.095959595959596, "grad_norm": 78.25362396240234, "learning_rate": 1.5545735838270556e-06, "loss": 0.5611992478370667, "step": 1302 }, { "epoch": 1.0976430976430978, "grad_norm": 7.619815826416016, "learning_rate": 1.5531309834302403e-06, "loss": 0.5441624522209167, "step": 1304 }, { "epoch": 1.0993265993265993, "grad_norm": 26.699399948120117, "learning_rate": 1.5516868218065283e-06, "loss": 0.5887436866760254, "step": 1306 }, { "epoch": 1.101010101010101, "grad_norm": 15.65885066986084, "learning_rate": 1.5502411039316642e-06, "loss": 0.5249545574188232, "step": 1308 }, { "epoch": 1.1026936026936027, "grad_norm": 25.263103485107422, "learning_rate": 1.5487938347867542e-06, "loss": 0.36874455213546753, "step": 1310 }, { "epoch": 1.1043771043771045, "grad_norm": 9.12649917602539, "learning_rate": 1.5473450193582498e-06, "loss": 1.1010559797286987, "step": 1312 }, { "epoch": 1.106060606060606, "grad_norm": 23.143815994262695, "learning_rate": 1.5458946626379293e-06, "loss": 0.8757441639900208, "step": 1314 }, { "epoch": 1.1077441077441077, "grad_norm": 11.386807441711426, "learning_rate": 1.5444427696228822e-06, "loss": 0.8766863346099854, "step": 1316 }, { "epoch": 1.1094276094276094, "grad_norm": 5.802887439727783, "learning_rate": 1.5429893453154906e-06, "loss": 0.8725073337554932, "step": 1318 }, { "epoch": 1.1111111111111112, "grad_norm": 5.350346088409424, "learning_rate": 1.5415343947234132e-06, "loss": 0.5795699954032898, "step": 1320 }, { "epoch": 1.112794612794613, "grad_norm": 15.783977508544922, "learning_rate": 1.5400779228595663e-06, "loss": 0.8113459348678589, "step": 1322 }, { "epoch": 1.1144781144781144, "grad_norm": 9.137958526611328, "learning_rate": 1.538619934742109e-06, "loss": 0.46189528703689575, "step": 1324 }, { "epoch": 1.1161616161616161, "grad_norm": 5.9258527755737305, "learning_rate": 1.5371604353944235e-06, "loss": 0.8045957684516907, "step": 1326 }, { "epoch": 1.1178451178451179, "grad_norm": 2.5547056198120117, "learning_rate": 1.5356994298450989e-06, "loss": 0.6314079165458679, "step": 1328 }, { "epoch": 1.1195286195286196, "grad_norm": 6.180763244628906, "learning_rate": 1.5342369231279145e-06, "loss": 0.9923676252365112, "step": 1330 }, { "epoch": 1.121212121212121, "grad_norm": 10.539793968200684, "learning_rate": 1.5327729202818212e-06, "loss": 0.6905699372291565, "step": 1332 }, { "epoch": 1.1228956228956228, "grad_norm": 3.815638780593872, "learning_rate": 1.5313074263509242e-06, "loss": 1.0867717266082764, "step": 1334 }, { "epoch": 1.1245791245791246, "grad_norm": 7.576748847961426, "learning_rate": 1.5298404463844675e-06, "loss": 0.5058388113975525, "step": 1336 }, { "epoch": 1.1262626262626263, "grad_norm": 6.077386856079102, "learning_rate": 1.5283719854368142e-06, "loss": 0.6739003658294678, "step": 1338 }, { "epoch": 1.127946127946128, "grad_norm": 18.228174209594727, "learning_rate": 1.5269020485674299e-06, "loss": 0.5296186208724976, "step": 1340 }, { "epoch": 1.1296296296296295, "grad_norm": 7.708940029144287, "learning_rate": 1.5254306408408657e-06, "loss": 0.8153047561645508, "step": 1342 }, { "epoch": 1.1313131313131313, "grad_norm": 3.31766414642334, "learning_rate": 1.5239577673267401e-06, "loss": 1.0957720279693604, "step": 1344 }, { "epoch": 1.132996632996633, "grad_norm": 5.293587684631348, "learning_rate": 1.5224834330997222e-06, "loss": 0.9039838314056396, "step": 1346 }, { "epoch": 1.1346801346801347, "grad_norm": 3.792046070098877, "learning_rate": 1.5210076432395138e-06, "loss": 0.6438568234443665, "step": 1348 }, { "epoch": 1.1363636363636362, "grad_norm": 7.245974063873291, "learning_rate": 1.5195304028308324e-06, "loss": 0.4882217049598694, "step": 1350 }, { "epoch": 1.138047138047138, "grad_norm": 26.42631721496582, "learning_rate": 1.5180517169633914e-06, "loss": 0.2949609160423279, "step": 1352 }, { "epoch": 1.1397306397306397, "grad_norm": 3.006683111190796, "learning_rate": 1.5165715907318874e-06, "loss": 1.0205047130584717, "step": 1354 }, { "epoch": 1.1414141414141414, "grad_norm": 3.6523959636688232, "learning_rate": 1.5150900292359775e-06, "loss": 1.0392919778823853, "step": 1356 }, { "epoch": 1.1430976430976432, "grad_norm": 4.214179992675781, "learning_rate": 1.513607037580264e-06, "loss": 0.6601721048355103, "step": 1358 }, { "epoch": 1.144781144781145, "grad_norm": 10.945768356323242, "learning_rate": 1.5121226208742771e-06, "loss": 0.6551761627197266, "step": 1360 }, { "epoch": 1.1464646464646464, "grad_norm": 3.450727701187134, "learning_rate": 1.5106367842324578e-06, "loss": 0.8425558805465698, "step": 1362 }, { "epoch": 1.1481481481481481, "grad_norm": 13.319304466247559, "learning_rate": 1.5091495327741375e-06, "loss": 0.8309493064880371, "step": 1364 }, { "epoch": 1.1498316498316499, "grad_norm": 3.239384889602661, "learning_rate": 1.507660871623524e-06, "loss": 0.6987888813018799, "step": 1366 }, { "epoch": 1.1515151515151516, "grad_norm": 9.262398719787598, "learning_rate": 1.5061708059096807e-06, "loss": 0.7337237596511841, "step": 1368 }, { "epoch": 1.1531986531986531, "grad_norm": 3.1679928302764893, "learning_rate": 1.5046793407665114e-06, "loss": 1.047074794769287, "step": 1370 }, { "epoch": 1.1548821548821548, "grad_norm": 5.058619976043701, "learning_rate": 1.503186481332741e-06, "loss": 1.0454055070877075, "step": 1372 }, { "epoch": 1.1565656565656566, "grad_norm": 9.18127155303955, "learning_rate": 1.5016922327518986e-06, "loss": 0.38407066464424133, "step": 1374 }, { "epoch": 1.1582491582491583, "grad_norm": 6.960140228271484, "learning_rate": 1.5001966001722986e-06, "loss": 0.38796305656433105, "step": 1376 }, { "epoch": 1.15993265993266, "grad_norm": 18.356365203857422, "learning_rate": 1.4986995887470248e-06, "loss": 0.8758000135421753, "step": 1378 }, { "epoch": 1.1616161616161615, "grad_norm": 2.5531139373779297, "learning_rate": 1.497201203633912e-06, "loss": 0.6682250499725342, "step": 1380 }, { "epoch": 1.1632996632996633, "grad_norm": 18.195405960083008, "learning_rate": 1.4957014499955265e-06, "loss": 0.5331791639328003, "step": 1382 }, { "epoch": 1.164983164983165, "grad_norm": 4.818270683288574, "learning_rate": 1.4942003329991513e-06, "loss": 0.3785390257835388, "step": 1384 }, { "epoch": 1.1666666666666667, "grad_norm": 66.33992767333984, "learning_rate": 1.492697857816766e-06, "loss": 0.48905232548713684, "step": 1386 }, { "epoch": 1.1683501683501682, "grad_norm": 6.675547122955322, "learning_rate": 1.491194029625029e-06, "loss": 0.5575925707817078, "step": 1388 }, { "epoch": 1.17003367003367, "grad_norm": 2.9333407878875732, "learning_rate": 1.489688853605262e-06, "loss": 0.8529257774353027, "step": 1390 }, { "epoch": 1.1717171717171717, "grad_norm": 14.85582447052002, "learning_rate": 1.4881823349434296e-06, "loss": 0.8529238104820251, "step": 1392 }, { "epoch": 1.1734006734006734, "grad_norm": 4.551332473754883, "learning_rate": 1.4866744788301226e-06, "loss": 0.6012097597122192, "step": 1394 }, { "epoch": 1.1750841750841752, "grad_norm": 5.803267955780029, "learning_rate": 1.485165290460539e-06, "loss": 0.5330957770347595, "step": 1396 }, { "epoch": 1.1767676767676767, "grad_norm": 4.956878185272217, "learning_rate": 1.4836547750344688e-06, "loss": 0.7069591283798218, "step": 1398 }, { "epoch": 1.1784511784511784, "grad_norm": 11.88759708404541, "learning_rate": 1.4821429377562725e-06, "loss": 0.4460894763469696, "step": 1400 }, { "epoch": 1.1801346801346801, "grad_norm": 3.5958197116851807, "learning_rate": 1.4806297838348653e-06, "loss": 0.909576952457428, "step": 1402 }, { "epoch": 1.1818181818181819, "grad_norm": 4.076791286468506, "learning_rate": 1.4791153184837e-06, "loss": 0.6851646900177002, "step": 1404 }, { "epoch": 1.1835016835016834, "grad_norm": 8.969018936157227, "learning_rate": 1.4775995469207467e-06, "loss": 0.7221487760543823, "step": 1406 }, { "epoch": 1.1851851851851851, "grad_norm": 24.653610229492188, "learning_rate": 1.476082474368476e-06, "loss": 1.0442817211151123, "step": 1408 }, { "epoch": 1.1868686868686869, "grad_norm": 6.7254557609558105, "learning_rate": 1.4745641060538407e-06, "loss": 0.6711673140525818, "step": 1410 }, { "epoch": 1.1885521885521886, "grad_norm": 38.141719818115234, "learning_rate": 1.4730444472082597e-06, "loss": 0.6712204217910767, "step": 1412 }, { "epoch": 1.1902356902356903, "grad_norm": 5.311680793762207, "learning_rate": 1.471523503067596e-06, "loss": 0.7601330280303955, "step": 1414 }, { "epoch": 1.1919191919191918, "grad_norm": 6.686192512512207, "learning_rate": 1.4700012788721431e-06, "loss": 0.6655834913253784, "step": 1416 }, { "epoch": 1.1936026936026936, "grad_norm": 12.520559310913086, "learning_rate": 1.4684777798666028e-06, "loss": 1.0070924758911133, "step": 1418 }, { "epoch": 1.1952861952861953, "grad_norm": 39.29856491088867, "learning_rate": 1.4669530113000712e-06, "loss": 0.8293688297271729, "step": 1420 }, { "epoch": 1.196969696969697, "grad_norm": 5.298742294311523, "learning_rate": 1.465426978426017e-06, "loss": 0.7399046421051025, "step": 1422 }, { "epoch": 1.1986531986531987, "grad_norm": 4.998674392700195, "learning_rate": 1.4638996865022658e-06, "loss": 0.5819299221038818, "step": 1424 }, { "epoch": 1.2003367003367003, "grad_norm": 19.531993865966797, "learning_rate": 1.4623711407909802e-06, "loss": 0.8090528845787048, "step": 1426 }, { "epoch": 1.202020202020202, "grad_norm": 5.534289836883545, "learning_rate": 1.4608413465586444e-06, "loss": 0.4998140335083008, "step": 1428 }, { "epoch": 1.2037037037037037, "grad_norm": 4.479226589202881, "learning_rate": 1.4593103090760426e-06, "loss": 0.8749973177909851, "step": 1430 }, { "epoch": 1.2053872053872055, "grad_norm": 6.119904518127441, "learning_rate": 1.4577780336182429e-06, "loss": 0.6631636619567871, "step": 1432 }, { "epoch": 1.2070707070707072, "grad_norm": 6.20470666885376, "learning_rate": 1.4562445254645793e-06, "loss": 1.0941792726516724, "step": 1434 }, { "epoch": 1.2087542087542087, "grad_norm": 13.419809341430664, "learning_rate": 1.4547097898986332e-06, "loss": 0.5603539347648621, "step": 1436 }, { "epoch": 1.2104377104377104, "grad_norm": 10.74496841430664, "learning_rate": 1.453173832208213e-06, "loss": 0.3947031497955322, "step": 1438 }, { "epoch": 1.2121212121212122, "grad_norm": 2.647723436355591, "learning_rate": 1.4516366576853406e-06, "loss": 0.3918086886405945, "step": 1440 }, { "epoch": 1.2138047138047139, "grad_norm": 7.783057689666748, "learning_rate": 1.450098271626228e-06, "loss": 0.6404916048049927, "step": 1442 }, { "epoch": 1.2154882154882154, "grad_norm": 7.518592834472656, "learning_rate": 1.448558679331263e-06, "loss": 0.8621898889541626, "step": 1444 }, { "epoch": 1.2171717171717171, "grad_norm": 15.241488456726074, "learning_rate": 1.4470178861049886e-06, "loss": 0.8157280683517456, "step": 1446 }, { "epoch": 1.2188552188552189, "grad_norm": 5.622246742248535, "learning_rate": 1.4454758972560863e-06, "loss": 0.6764127612113953, "step": 1448 }, { "epoch": 1.2205387205387206, "grad_norm": 3.0841257572174072, "learning_rate": 1.4439327180973556e-06, "loss": 0.8733148574829102, "step": 1450 }, { "epoch": 1.2222222222222223, "grad_norm": 4.749155521392822, "learning_rate": 1.4423883539456987e-06, "loss": 0.828094482421875, "step": 1452 }, { "epoch": 1.2239057239057238, "grad_norm": 14.270376205444336, "learning_rate": 1.4408428101220997e-06, "loss": 0.5771759152412415, "step": 1454 }, { "epoch": 1.2255892255892256, "grad_norm": 4.161510467529297, "learning_rate": 1.439296091951607e-06, "loss": 0.8248889446258545, "step": 1456 }, { "epoch": 1.2272727272727273, "grad_norm": 7.337621212005615, "learning_rate": 1.4377482047633162e-06, "loss": 0.8380516767501831, "step": 1458 }, { "epoch": 1.228956228956229, "grad_norm": 15.451786041259766, "learning_rate": 1.4361991538903495e-06, "loss": 0.9264905452728271, "step": 1460 }, { "epoch": 1.2306397306397305, "grad_norm": 17.90766143798828, "learning_rate": 1.4346489446698388e-06, "loss": 0.616461455821991, "step": 1462 }, { "epoch": 1.2323232323232323, "grad_norm": 4.267929553985596, "learning_rate": 1.4330975824429076e-06, "loss": 0.587724506855011, "step": 1464 }, { "epoch": 1.234006734006734, "grad_norm": 3.7121894359588623, "learning_rate": 1.4315450725546516e-06, "loss": 0.7742079496383667, "step": 1466 }, { "epoch": 1.2356902356902357, "grad_norm": 3.0019185543060303, "learning_rate": 1.42999142035412e-06, "loss": 0.8585535287857056, "step": 1468 }, { "epoch": 1.2373737373737375, "grad_norm": 2.829047441482544, "learning_rate": 1.4284366311942985e-06, "loss": 1.0342047214508057, "step": 1470 }, { "epoch": 1.239057239057239, "grad_norm": 8.36631965637207, "learning_rate": 1.42688071043209e-06, "loss": 0.5781531929969788, "step": 1472 }, { "epoch": 1.2407407407407407, "grad_norm": 11.143059730529785, "learning_rate": 1.4253236634282964e-06, "loss": 0.6396032571792603, "step": 1474 }, { "epoch": 1.2424242424242424, "grad_norm": 26.655942916870117, "learning_rate": 1.4237654955475997e-06, "loss": 0.4640727639198303, "step": 1476 }, { "epoch": 1.2441077441077442, "grad_norm": 6.614319801330566, "learning_rate": 1.4222062121585438e-06, "loss": 0.6802918910980225, "step": 1478 }, { "epoch": 1.2457912457912457, "grad_norm": 3.256394863128662, "learning_rate": 1.4206458186335158e-06, "loss": 0.666190505027771, "step": 1480 }, { "epoch": 1.2474747474747474, "grad_norm": 4.943792819976807, "learning_rate": 1.4190843203487285e-06, "loss": 0.7142783403396606, "step": 1482 }, { "epoch": 1.2491582491582491, "grad_norm": 3.698286771774292, "learning_rate": 1.4175217226842e-06, "loss": 0.3970263600349426, "step": 1484 }, { "epoch": 1.2508417508417509, "grad_norm": 8.15507984161377, "learning_rate": 1.4159580310237368e-06, "loss": 0.5399370193481445, "step": 1486 }, { "epoch": 1.2525252525252526, "grad_norm": 12.810306549072266, "learning_rate": 1.414393250754915e-06, "loss": 0.6834887266159058, "step": 1488 }, { "epoch": 1.2542087542087543, "grad_norm": 5.88965368270874, "learning_rate": 1.4128273872690608e-06, "loss": 0.6449817419052124, "step": 1490 }, { "epoch": 1.2558922558922558, "grad_norm": 3.2324328422546387, "learning_rate": 1.4112604459612326e-06, "loss": 0.7542852759361267, "step": 1492 }, { "epoch": 1.2575757575757576, "grad_norm": 30.748018264770508, "learning_rate": 1.4096924322302025e-06, "loss": 0.7624866962432861, "step": 1494 }, { "epoch": 1.2592592592592593, "grad_norm": 6.311125755310059, "learning_rate": 1.4081233514784377e-06, "loss": 0.6044232845306396, "step": 1496 }, { "epoch": 1.2609427609427608, "grad_norm": 2.3243467807769775, "learning_rate": 1.4065532091120815e-06, "loss": 0.8974160552024841, "step": 1498 }, { "epoch": 1.2626262626262625, "grad_norm": 7.767407417297363, "learning_rate": 1.4049820105409354e-06, "loss": 1.017437219619751, "step": 1500 }, { "epoch": 1.2643097643097643, "grad_norm": 79.28764343261719, "learning_rate": 1.4034097611784388e-06, "loss": 0.5455498695373535, "step": 1502 }, { "epoch": 1.265993265993266, "grad_norm": 7.436858654022217, "learning_rate": 1.4018364664416531e-06, "loss": 0.7246487140655518, "step": 1504 }, { "epoch": 1.2676767676767677, "grad_norm": 3.221330165863037, "learning_rate": 1.4002621317512402e-06, "loss": 1.0642752647399902, "step": 1506 }, { "epoch": 1.2693602693602695, "grad_norm": 2.4483256340026855, "learning_rate": 1.3986867625314453e-06, "loss": 1.104174256324768, "step": 1508 }, { "epoch": 1.271043771043771, "grad_norm": 16.08315086364746, "learning_rate": 1.397110364210079e-06, "loss": 0.5644181966781616, "step": 1510 }, { "epoch": 1.2727272727272727, "grad_norm": 26.60236930847168, "learning_rate": 1.395532942218496e-06, "loss": 0.5067999362945557, "step": 1512 }, { "epoch": 1.2744107744107744, "grad_norm": 4.2767558097839355, "learning_rate": 1.393954501991579e-06, "loss": 0.5825619697570801, "step": 1514 }, { "epoch": 1.2760942760942762, "grad_norm": 5.0948896408081055, "learning_rate": 1.3923750489677192e-06, "loss": 0.7657870054244995, "step": 1516 }, { "epoch": 1.2777777777777777, "grad_norm": 6.115753650665283, "learning_rate": 1.3907945885887963e-06, "loss": 0.6665242910385132, "step": 1518 }, { "epoch": 1.2794612794612794, "grad_norm": 3.168313980102539, "learning_rate": 1.389213126300161e-06, "loss": 0.8947120904922485, "step": 1520 }, { "epoch": 1.2811447811447811, "grad_norm": 6.6659746170043945, "learning_rate": 1.3876306675506176e-06, "loss": 0.5565755367279053, "step": 1522 }, { "epoch": 1.2828282828282829, "grad_norm": 10.685264587402344, "learning_rate": 1.3860472177924008e-06, "loss": 0.5323166847229004, "step": 1524 }, { "epoch": 1.2845117845117846, "grad_norm": 3.2777657508850098, "learning_rate": 1.3844627824811623e-06, "loss": 0.7731577157974243, "step": 1526 }, { "epoch": 1.2861952861952861, "grad_norm": 4.757735729217529, "learning_rate": 1.3828773670759476e-06, "loss": 0.6660727262496948, "step": 1528 }, { "epoch": 1.2878787878787878, "grad_norm": 6.414804458618164, "learning_rate": 1.3812909770391808e-06, "loss": 0.2846236228942871, "step": 1530 }, { "epoch": 1.2895622895622896, "grad_norm": 4.582427978515625, "learning_rate": 1.3797036178366422e-06, "loss": 0.7430540919303894, "step": 1532 }, { "epoch": 1.2912457912457913, "grad_norm": 20.26249122619629, "learning_rate": 1.3781152949374526e-06, "loss": 0.9778026938438416, "step": 1534 }, { "epoch": 1.2929292929292928, "grad_norm": 5.21143913269043, "learning_rate": 1.3765260138140523e-06, "loss": 0.9354510307312012, "step": 1536 }, { "epoch": 1.2946127946127945, "grad_norm": 7.789968013763428, "learning_rate": 1.3749357799421846e-06, "loss": 0.6247372627258301, "step": 1538 }, { "epoch": 1.2962962962962963, "grad_norm": 13.731108665466309, "learning_rate": 1.3733445988008729e-06, "loss": 0.6366062164306641, "step": 1540 }, { "epoch": 1.297979797979798, "grad_norm": 13.730175018310547, "learning_rate": 1.3717524758724065e-06, "loss": 0.6833373308181763, "step": 1542 }, { "epoch": 1.2996632996632997, "grad_norm": 10.050169944763184, "learning_rate": 1.3701594166423182e-06, "loss": 0.8749772310256958, "step": 1544 }, { "epoch": 1.3013468013468015, "grad_norm": 37.633522033691406, "learning_rate": 1.3685654265993682e-06, "loss": 0.7598909139633179, "step": 1546 }, { "epoch": 1.303030303030303, "grad_norm": 5.5588178634643555, "learning_rate": 1.366970511235522e-06, "loss": 0.8211129903793335, "step": 1548 }, { "epoch": 1.3047138047138047, "grad_norm": 6.061704158782959, "learning_rate": 1.3653746760459345e-06, "loss": 0.5478522777557373, "step": 1550 }, { "epoch": 1.3063973063973064, "grad_norm": 5.632637977600098, "learning_rate": 1.3637779265289299e-06, "loss": 0.8678094148635864, "step": 1552 }, { "epoch": 1.308080808080808, "grad_norm": 7.475294589996338, "learning_rate": 1.3621802681859812e-06, "loss": 0.9599659442901611, "step": 1554 }, { "epoch": 1.3097643097643097, "grad_norm": 3.182800769805908, "learning_rate": 1.3605817065216944e-06, "loss": 0.9627713561058044, "step": 1556 }, { "epoch": 1.3114478114478114, "grad_norm": 7.048341274261475, "learning_rate": 1.3589822470437864e-06, "loss": 0.8731982707977295, "step": 1558 }, { "epoch": 1.3131313131313131, "grad_norm": 12.228373527526855, "learning_rate": 1.3573818952630683e-06, "loss": 0.3814980089664459, "step": 1560 }, { "epoch": 1.3148148148148149, "grad_norm": 5.152705192565918, "learning_rate": 1.3557806566934256e-06, "loss": 0.47562462091445923, "step": 1562 }, { "epoch": 1.3164983164983166, "grad_norm": 12.943581581115723, "learning_rate": 1.354178536851799e-06, "loss": 0.5296528935432434, "step": 1564 }, { "epoch": 1.3181818181818181, "grad_norm": 3.833484172821045, "learning_rate": 1.3525755412581645e-06, "loss": 1.0292046070098877, "step": 1566 }, { "epoch": 1.3198653198653199, "grad_norm": 9.532318115234375, "learning_rate": 1.3509716754355174e-06, "loss": 0.4947565197944641, "step": 1568 }, { "epoch": 1.3215488215488216, "grad_norm": 6.8037848472595215, "learning_rate": 1.34936694490985e-06, "loss": 0.897117018699646, "step": 1570 }, { "epoch": 1.3232323232323233, "grad_norm": 4.932839393615723, "learning_rate": 1.3477613552101344e-06, "loss": 0.738558292388916, "step": 1572 }, { "epoch": 1.3249158249158248, "grad_norm": 4.227520942687988, "learning_rate": 1.3461549118683023e-06, "loss": 0.6831085681915283, "step": 1574 }, { "epoch": 1.3265993265993266, "grad_norm": 4.703937530517578, "learning_rate": 1.344547620419227e-06, "loss": 0.931479811668396, "step": 1576 }, { "epoch": 1.3282828282828283, "grad_norm": 8.815512657165527, "learning_rate": 1.3429394864007037e-06, "loss": 0.6243126392364502, "step": 1578 }, { "epoch": 1.32996632996633, "grad_norm": 14.775157928466797, "learning_rate": 1.3413305153534313e-06, "loss": 0.5434067249298096, "step": 1580 }, { "epoch": 1.3316498316498318, "grad_norm": 4.071495056152344, "learning_rate": 1.3397207128209916e-06, "loss": 0.62471604347229, "step": 1582 }, { "epoch": 1.3333333333333333, "grad_norm": 3.9714295864105225, "learning_rate": 1.3381100843498315e-06, "loss": 0.9411803483963013, "step": 1584 }, { "epoch": 1.335016835016835, "grad_norm": 7.909718990325928, "learning_rate": 1.3364986354892442e-06, "loss": 0.7755764722824097, "step": 1586 }, { "epoch": 1.3367003367003367, "grad_norm": 9.560751914978027, "learning_rate": 1.3348863717913485e-06, "loss": 0.4694201350212097, "step": 1588 }, { "epoch": 1.3383838383838385, "grad_norm": 4.796677589416504, "learning_rate": 1.3332732988110717e-06, "loss": 0.6505795121192932, "step": 1590 }, { "epoch": 1.34006734006734, "grad_norm": 13.761187553405762, "learning_rate": 1.3316594221061293e-06, "loss": 0.5099287033081055, "step": 1592 }, { "epoch": 1.3417508417508417, "grad_norm": 3.4837796688079834, "learning_rate": 1.3300447472370047e-06, "loss": 0.9218275547027588, "step": 1594 }, { "epoch": 1.3434343434343434, "grad_norm": 7.638758659362793, "learning_rate": 1.3284292797669325e-06, "loss": 0.343423992395401, "step": 1596 }, { "epoch": 1.3451178451178452, "grad_norm": 3.7108771800994873, "learning_rate": 1.326813025261878e-06, "loss": 0.8066189289093018, "step": 1598 }, { "epoch": 1.3468013468013469, "grad_norm": 5.8035359382629395, "learning_rate": 1.3251959892905183e-06, "loss": 0.7118152976036072, "step": 1600 }, { "epoch": 1.3484848484848486, "grad_norm": 3.8060877323150635, "learning_rate": 1.3235781774242221e-06, "loss": 0.64288330078125, "step": 1602 }, { "epoch": 1.3501683501683501, "grad_norm": 23.318649291992188, "learning_rate": 1.321959595237032e-06, "loss": 0.7593903541564941, "step": 1604 }, { "epoch": 1.3518518518518519, "grad_norm": 6.9713640213012695, "learning_rate": 1.3203402483056457e-06, "loss": 1.0495635271072388, "step": 1606 }, { "epoch": 1.3535353535353536, "grad_norm": 3.930389642715454, "learning_rate": 1.3187201422093937e-06, "loss": 0.8280398845672607, "step": 1608 }, { "epoch": 1.355218855218855, "grad_norm": 5.5319743156433105, "learning_rate": 1.3170992825302231e-06, "loss": 0.37589627504348755, "step": 1610 }, { "epoch": 1.3569023569023568, "grad_norm": 15.358514785766602, "learning_rate": 1.315477674852678e-06, "loss": 0.9352704286575317, "step": 1612 }, { "epoch": 1.3585858585858586, "grad_norm": 10.822661399841309, "learning_rate": 1.3138553247638793e-06, "loss": 0.6205300092697144, "step": 1614 }, { "epoch": 1.3602693602693603, "grad_norm": 8.775157928466797, "learning_rate": 1.3122322378535052e-06, "loss": 0.6584144830703735, "step": 1616 }, { "epoch": 1.361952861952862, "grad_norm": 8.134145736694336, "learning_rate": 1.310608419713773e-06, "loss": 0.8533636927604675, "step": 1618 }, { "epoch": 1.3636363636363638, "grad_norm": 3.848430871963501, "learning_rate": 1.3089838759394198e-06, "loss": 0.6382489204406738, "step": 1620 }, { "epoch": 1.3653198653198653, "grad_norm": 5.8508620262146, "learning_rate": 1.3073586121276824e-06, "loss": 0.947349488735199, "step": 1622 }, { "epoch": 1.367003367003367, "grad_norm": 2.4821629524230957, "learning_rate": 1.3057326338782782e-06, "loss": 0.8861122131347656, "step": 1624 }, { "epoch": 1.3686868686868687, "grad_norm": 14.790640830993652, "learning_rate": 1.3041059467933864e-06, "loss": 0.6823830604553223, "step": 1626 }, { "epoch": 1.3703703703703702, "grad_norm": 3.717794418334961, "learning_rate": 1.3024785564776287e-06, "loss": 0.8171314001083374, "step": 1628 }, { "epoch": 1.372053872053872, "grad_norm": 5.684549331665039, "learning_rate": 1.3008504685380493e-06, "loss": 0.8313175439834595, "step": 1630 }, { "epoch": 1.3737373737373737, "grad_norm": 4.290356159210205, "learning_rate": 1.2992216885840964e-06, "loss": 1.0408048629760742, "step": 1632 }, { "epoch": 1.3754208754208754, "grad_norm": 12.80916690826416, "learning_rate": 1.297592222227602e-06, "loss": 0.613922655582428, "step": 1634 }, { "epoch": 1.3771043771043772, "grad_norm": 5.713363170623779, "learning_rate": 1.2959620750827637e-06, "loss": 0.34947091341018677, "step": 1636 }, { "epoch": 1.378787878787879, "grad_norm": 5.641543388366699, "learning_rate": 1.2943312527661236e-06, "loss": 0.5458937883377075, "step": 1638 }, { "epoch": 1.3804713804713804, "grad_norm": 2.729052782058716, "learning_rate": 1.2926997608965515e-06, "loss": 0.6660902500152588, "step": 1640 }, { "epoch": 1.3821548821548821, "grad_norm": 3.4759159088134766, "learning_rate": 1.2910676050952232e-06, "loss": 0.9125963449478149, "step": 1642 }, { "epoch": 1.3838383838383839, "grad_norm": 3.339698314666748, "learning_rate": 1.2894347909856021e-06, "loss": 1.1126599311828613, "step": 1644 }, { "epoch": 1.3855218855218856, "grad_norm": 9.030343055725098, "learning_rate": 1.2878013241934195e-06, "loss": 0.7313506603240967, "step": 1646 }, { "epoch": 1.387205387205387, "grad_norm": 2.8494951725006104, "learning_rate": 1.2861672103466564e-06, "loss": 0.9350987672805786, "step": 1648 }, { "epoch": 1.3888888888888888, "grad_norm": 4.453482151031494, "learning_rate": 1.284532455075522e-06, "loss": 0.8794913291931152, "step": 1650 }, { "epoch": 1.3905723905723906, "grad_norm": 3.846998691558838, "learning_rate": 1.2828970640124361e-06, "loss": 1.1854183673858643, "step": 1652 }, { "epoch": 1.3922558922558923, "grad_norm": 4.283193588256836, "learning_rate": 1.281261042792009e-06, "loss": 0.9548810720443726, "step": 1654 }, { "epoch": 1.393939393939394, "grad_norm": 44.709163665771484, "learning_rate": 1.2796243970510232e-06, "loss": 0.5343578457832336, "step": 1656 }, { "epoch": 1.3956228956228955, "grad_norm": 6.563719272613525, "learning_rate": 1.2779871324284106e-06, "loss": 0.6447005271911621, "step": 1658 }, { "epoch": 1.3973063973063973, "grad_norm": 31.160367965698242, "learning_rate": 1.2763492545652373e-06, "loss": 0.9052919149398804, "step": 1660 }, { "epoch": 1.398989898989899, "grad_norm": 22.833118438720703, "learning_rate": 1.2747107691046815e-06, "loss": 0.7731602191925049, "step": 1662 }, { "epoch": 1.4006734006734007, "grad_norm": 8.412581443786621, "learning_rate": 1.2730716816920151e-06, "loss": 0.711165189743042, "step": 1664 }, { "epoch": 1.4023569023569022, "grad_norm": 4.195555686950684, "learning_rate": 1.271431997974584e-06, "loss": 0.4324186444282532, "step": 1666 }, { "epoch": 1.404040404040404, "grad_norm": 5.849745273590088, "learning_rate": 1.2697917236017886e-06, "loss": 0.780827522277832, "step": 1668 }, { "epoch": 1.4057239057239057, "grad_norm": 15.396544456481934, "learning_rate": 1.2681508642250637e-06, "loss": 0.7758296728134155, "step": 1670 }, { "epoch": 1.4074074074074074, "grad_norm": 2.463407516479492, "learning_rate": 1.266509425497861e-06, "loss": 0.7455316781997681, "step": 1672 }, { "epoch": 1.4090909090909092, "grad_norm": 4.74429988861084, "learning_rate": 1.2648674130756271e-06, "loss": 1.1411914825439453, "step": 1674 }, { "epoch": 1.410774410774411, "grad_norm": 23.014007568359375, "learning_rate": 1.2632248326157854e-06, "loss": 0.43792814016342163, "step": 1676 }, { "epoch": 1.4124579124579124, "grad_norm": 7.774737358093262, "learning_rate": 1.2615816897777176e-06, "loss": 0.9449222087860107, "step": 1678 }, { "epoch": 1.4141414141414141, "grad_norm": 73.73876190185547, "learning_rate": 1.2599379902227419e-06, "loss": 0.9584387540817261, "step": 1680 }, { "epoch": 1.4158249158249159, "grad_norm": 2.815396785736084, "learning_rate": 1.258293739614094e-06, "loss": 0.6266515254974365, "step": 1682 }, { "epoch": 1.4175084175084174, "grad_norm": 7.27461051940918, "learning_rate": 1.2566489436169101e-06, "loss": 0.4212794899940491, "step": 1684 }, { "epoch": 1.4191919191919191, "grad_norm": 14.794193267822266, "learning_rate": 1.255003607898204e-06, "loss": 0.5568593740463257, "step": 1686 }, { "epoch": 1.4208754208754208, "grad_norm": 3.4901039600372314, "learning_rate": 1.2533577381268495e-06, "loss": 1.1170185804367065, "step": 1688 }, { "epoch": 1.4225589225589226, "grad_norm": 23.822872161865234, "learning_rate": 1.2517113399735608e-06, "loss": 0.5119540691375732, "step": 1690 }, { "epoch": 1.4242424242424243, "grad_norm": 4.004513263702393, "learning_rate": 1.250064419110872e-06, "loss": 0.5368912220001221, "step": 1692 }, { "epoch": 1.425925925925926, "grad_norm": 4.135901927947998, "learning_rate": 1.2484169812131184e-06, "loss": 0.44615352153778076, "step": 1694 }, { "epoch": 1.4276094276094276, "grad_norm": 3.496605396270752, "learning_rate": 1.246769031956417e-06, "loss": 1.1422553062438965, "step": 1696 }, { "epoch": 1.4292929292929293, "grad_norm": 15.598981857299805, "learning_rate": 1.245120577018646e-06, "loss": 1.0423638820648193, "step": 1698 }, { "epoch": 1.430976430976431, "grad_norm": 5.298037052154541, "learning_rate": 1.2434716220794265e-06, "loss": 0.7629603147506714, "step": 1700 }, { "epoch": 1.4326599326599325, "grad_norm": 8.531468391418457, "learning_rate": 1.2418221728201023e-06, "loss": 0.8187654614448547, "step": 1702 }, { "epoch": 1.4343434343434343, "grad_norm": 4.175232410430908, "learning_rate": 1.2401722349237198e-06, "loss": 0.28517311811447144, "step": 1704 }, { "epoch": 1.436026936026936, "grad_norm": 9.86863899230957, "learning_rate": 1.238521814075009e-06, "loss": 0.46237754821777344, "step": 1706 }, { "epoch": 1.4377104377104377, "grad_norm": 13.564935684204102, "learning_rate": 1.236870915960365e-06, "loss": 0.958651065826416, "step": 1708 }, { "epoch": 1.4393939393939394, "grad_norm": 6.472082614898682, "learning_rate": 1.2352195462678257e-06, "loss": 1.0340254306793213, "step": 1710 }, { "epoch": 1.4410774410774412, "grad_norm": 17.20500373840332, "learning_rate": 1.2335677106870546e-06, "loss": 0.9463751316070557, "step": 1712 }, { "epoch": 1.4427609427609427, "grad_norm": 3.069565773010254, "learning_rate": 1.2319154149093202e-06, "loss": 0.8773708939552307, "step": 1714 }, { "epoch": 1.4444444444444444, "grad_norm": 65.60623168945312, "learning_rate": 1.2302626646274773e-06, "loss": 0.9023821353912354, "step": 1716 }, { "epoch": 1.4461279461279462, "grad_norm": 3.0062930583953857, "learning_rate": 1.228609465535946e-06, "loss": 0.7161345481872559, "step": 1718 }, { "epoch": 1.4478114478114479, "grad_norm": 7.399210453033447, "learning_rate": 1.2269558233306918e-06, "loss": 0.711788535118103, "step": 1720 }, { "epoch": 1.4494949494949494, "grad_norm": 4.035950660705566, "learning_rate": 1.2253017437092088e-06, "loss": 0.5917500257492065, "step": 1722 }, { "epoch": 1.4511784511784511, "grad_norm": 3.7948551177978516, "learning_rate": 1.2236472323704971e-06, "loss": 0.7458564043045044, "step": 1724 }, { "epoch": 1.4528619528619529, "grad_norm": 4.743600845336914, "learning_rate": 1.221992295015044e-06, "loss": 0.8199291229248047, "step": 1726 }, { "epoch": 1.4545454545454546, "grad_norm": 7.403223514556885, "learning_rate": 1.2203369373448053e-06, "loss": 0.651489794254303, "step": 1728 }, { "epoch": 1.4562289562289563, "grad_norm": 9.762937545776367, "learning_rate": 1.2186811650631847e-06, "loss": 0.9804219007492065, "step": 1730 }, { "epoch": 1.457912457912458, "grad_norm": 4.465795516967773, "learning_rate": 1.217024983875014e-06, "loss": 1.19962739944458, "step": 1732 }, { "epoch": 1.4595959595959596, "grad_norm": 3.2770626544952393, "learning_rate": 1.2153683994865354e-06, "loss": 0.8254581093788147, "step": 1734 }, { "epoch": 1.4612794612794613, "grad_norm": 7.466728687286377, "learning_rate": 1.213711417605378e-06, "loss": 0.6395374536514282, "step": 1736 }, { "epoch": 1.462962962962963, "grad_norm": 5.3473920822143555, "learning_rate": 1.2120540439405418e-06, "loss": 0.5120725631713867, "step": 1738 }, { "epoch": 1.4646464646464645, "grad_norm": 5.0326924324035645, "learning_rate": 1.2103962842023765e-06, "loss": 1.0951206684112549, "step": 1740 }, { "epoch": 1.4663299663299663, "grad_norm": 3.968535900115967, "learning_rate": 1.2087381441025624e-06, "loss": 0.5963525772094727, "step": 1742 }, { "epoch": 1.468013468013468, "grad_norm": 14.208403587341309, "learning_rate": 1.2070796293540887e-06, "loss": 0.5282841324806213, "step": 1744 }, { "epoch": 1.4696969696969697, "grad_norm": 3.4273574352264404, "learning_rate": 1.2054207456712377e-06, "loss": 0.9493914246559143, "step": 1746 }, { "epoch": 1.4713804713804715, "grad_norm": 6.042728424072266, "learning_rate": 1.2037614987695609e-06, "loss": 0.9857927560806274, "step": 1748 }, { "epoch": 1.4730639730639732, "grad_norm": 13.786114692687988, "learning_rate": 1.2021018943658623e-06, "loss": 0.7202122211456299, "step": 1750 }, { "epoch": 1.4747474747474747, "grad_norm": 7.52271842956543, "learning_rate": 1.2004419381781779e-06, "loss": 0.6241959929466248, "step": 1752 }, { "epoch": 1.4764309764309764, "grad_norm": 7.334821701049805, "learning_rate": 1.1987816359257543e-06, "loss": 0.6670255661010742, "step": 1754 }, { "epoch": 1.4781144781144782, "grad_norm": 8.069925308227539, "learning_rate": 1.1971209933290318e-06, "loss": 0.8243575096130371, "step": 1756 }, { "epoch": 1.4797979797979797, "grad_norm": 18.11806297302246, "learning_rate": 1.1954600161096226e-06, "loss": 0.5894988775253296, "step": 1758 }, { "epoch": 1.4814814814814814, "grad_norm": 5.887277126312256, "learning_rate": 1.1937987099902927e-06, "loss": 0.7406305074691772, "step": 1760 }, { "epoch": 1.4831649831649831, "grad_norm": 4.559276103973389, "learning_rate": 1.19213708069494e-06, "loss": 0.9007562398910522, "step": 1762 }, { "epoch": 1.4848484848484849, "grad_norm": 3.148066759109497, "learning_rate": 1.190475133948577e-06, "loss": 0.8763662576675415, "step": 1764 }, { "epoch": 1.4865319865319866, "grad_norm": 4.318105697631836, "learning_rate": 1.1888128754773092e-06, "loss": 0.586820125579834, "step": 1766 }, { "epoch": 1.4882154882154883, "grad_norm": 8.943533897399902, "learning_rate": 1.1871503110083167e-06, "loss": 0.7152913808822632, "step": 1768 }, { "epoch": 1.4898989898989898, "grad_norm": 4.288205146789551, "learning_rate": 1.1854874462698337e-06, "loss": 0.9644764065742493, "step": 1770 }, { "epoch": 1.4915824915824916, "grad_norm": 8.917333602905273, "learning_rate": 1.1838242869911285e-06, "loss": 0.26478564739227295, "step": 1772 }, { "epoch": 1.4932659932659933, "grad_norm": 2.5780608654022217, "learning_rate": 1.182160838902485e-06, "loss": 0.6720756888389587, "step": 1774 }, { "epoch": 1.494949494949495, "grad_norm": 4.799160480499268, "learning_rate": 1.1804971077351818e-06, "loss": 0.703216016292572, "step": 1776 }, { "epoch": 1.4966329966329965, "grad_norm": 6.038239002227783, "learning_rate": 1.1788330992214724e-06, "loss": 0.8697667121887207, "step": 1778 }, { "epoch": 1.4983164983164983, "grad_norm": 7.712295055389404, "learning_rate": 1.1771688190945664e-06, "loss": 0.8953297138214111, "step": 1780 }, { "epoch": 1.5, "grad_norm": 13.718062400817871, "learning_rate": 1.1755042730886093e-06, "loss": 0.5260931253433228, "step": 1782 }, { "epoch": 1.5016835016835017, "grad_norm": 2.981576681137085, "learning_rate": 1.1738394669386621e-06, "loss": 1.1269118785858154, "step": 1784 }, { "epoch": 1.5033670033670035, "grad_norm": 6.8433427810668945, "learning_rate": 1.172174406380683e-06, "loss": 0.7218701839447021, "step": 1786 }, { "epoch": 1.5050505050505052, "grad_norm": 4.18980073928833, "learning_rate": 1.170509097151506e-06, "loss": 0.9302811622619629, "step": 1788 }, { "epoch": 1.5067340067340067, "grad_norm": 16.984750747680664, "learning_rate": 1.168843544988822e-06, "loss": 0.5803855657577515, "step": 1790 }, { "epoch": 1.5084175084175084, "grad_norm": 7.404435157775879, "learning_rate": 1.1671777556311587e-06, "loss": 0.5785191059112549, "step": 1792 }, { "epoch": 1.51010101010101, "grad_norm": 9.237391471862793, "learning_rate": 1.1655117348178619e-06, "loss": 0.8854154348373413, "step": 1794 }, { "epoch": 1.5117845117845117, "grad_norm": 15.445114135742188, "learning_rate": 1.163845488289074e-06, "loss": 0.8979889154434204, "step": 1796 }, { "epoch": 1.5134680134680134, "grad_norm": 13.821887016296387, "learning_rate": 1.1621790217857153e-06, "loss": 0.9836833477020264, "step": 1798 }, { "epoch": 1.5151515151515151, "grad_norm": 3.357462167739868, "learning_rate": 1.1605123410494643e-06, "loss": 0.6817135810852051, "step": 1800 }, { "epoch": 1.5168350168350169, "grad_norm": 3.545977830886841, "learning_rate": 1.1588454518227375e-06, "loss": 1.0103018283843994, "step": 1802 }, { "epoch": 1.5185185185185186, "grad_norm": 6.9810333251953125, "learning_rate": 1.157178359848669e-06, "loss": 0.8972345590591431, "step": 1804 }, { "epoch": 1.5202020202020203, "grad_norm": 4.857126235961914, "learning_rate": 1.155511070871093e-06, "loss": 0.8954426050186157, "step": 1806 }, { "epoch": 1.5218855218855218, "grad_norm": 8.7957763671875, "learning_rate": 1.1538435906345213e-06, "loss": 0.7016856670379639, "step": 1808 }, { "epoch": 1.5235690235690236, "grad_norm": 6.070329189300537, "learning_rate": 1.1521759248841237e-06, "loss": 0.6799755096435547, "step": 1810 }, { "epoch": 1.5252525252525253, "grad_norm": 1.5235867500305176, "learning_rate": 1.1505080793657124e-06, "loss": 0.2342766374349594, "step": 1812 }, { "epoch": 1.5269360269360268, "grad_norm": 3.382413864135742, "learning_rate": 1.1488400598257157e-06, "loss": 1.052855134010315, "step": 1814 }, { "epoch": 1.5286195286195285, "grad_norm": 27.871402740478516, "learning_rate": 1.1471718720111629e-06, "loss": 0.783697247505188, "step": 1816 }, { "epoch": 1.5303030303030303, "grad_norm": 61.98154067993164, "learning_rate": 1.1455035216696634e-06, "loss": 0.8607441186904907, "step": 1818 }, { "epoch": 1.531986531986532, "grad_norm": 5.8887763023376465, "learning_rate": 1.1438350145493853e-06, "loss": 0.7033579349517822, "step": 1820 }, { "epoch": 1.5336700336700337, "grad_norm": 5.281957149505615, "learning_rate": 1.1421663563990383e-06, "loss": 0.6749075651168823, "step": 1822 }, { "epoch": 1.5353535353535355, "grad_norm": 5.970940589904785, "learning_rate": 1.1404975529678515e-06, "loss": 0.9477555751800537, "step": 1824 }, { "epoch": 1.5370370370370372, "grad_norm": 2.8316867351531982, "learning_rate": 1.1388286100055555e-06, "loss": 0.8103057742118835, "step": 1826 }, { "epoch": 1.5387205387205387, "grad_norm": 3.575162410736084, "learning_rate": 1.1371595332623601e-06, "loss": 0.9152002334594727, "step": 1828 }, { "epoch": 1.5404040404040404, "grad_norm": 8.131978988647461, "learning_rate": 1.1354903284889377e-06, "loss": 0.6978881359100342, "step": 1830 }, { "epoch": 1.542087542087542, "grad_norm": 6.332693099975586, "learning_rate": 1.133821001436401e-06, "loss": 0.6509323120117188, "step": 1832 }, { "epoch": 1.5437710437710437, "grad_norm": 2.458233594894409, "learning_rate": 1.1321515578562835e-06, "loss": 1.04543936252594, "step": 1834 }, { "epoch": 1.5454545454545454, "grad_norm": 270.1246032714844, "learning_rate": 1.1304820035005211e-06, "loss": 0.9024485349655151, "step": 1836 }, { "epoch": 1.5471380471380471, "grad_norm": 4.0058183670043945, "learning_rate": 1.1288123441214315e-06, "loss": 0.4209427535533905, "step": 1838 }, { "epoch": 1.5488215488215489, "grad_norm": 4.2291083335876465, "learning_rate": 1.1271425854716931e-06, "loss": 0.6784233450889587, "step": 1840 }, { "epoch": 1.5505050505050506, "grad_norm": 15.050762176513672, "learning_rate": 1.125472733304327e-06, "loss": 0.5746853351593018, "step": 1842 }, { "epoch": 1.5521885521885523, "grad_norm": 9.32972526550293, "learning_rate": 1.1238027933726776e-06, "loss": 0.41324469447135925, "step": 1844 }, { "epoch": 1.5538720538720538, "grad_norm": 3.370657205581665, "learning_rate": 1.122132771430389e-06, "loss": 0.9156204462051392, "step": 1846 }, { "epoch": 1.5555555555555556, "grad_norm": 4.2874650955200195, "learning_rate": 1.1204626732313907e-06, "loss": 0.9899235367774963, "step": 1848 }, { "epoch": 1.557239057239057, "grad_norm": 3.843651294708252, "learning_rate": 1.1187925045298732e-06, "loss": 0.8029769659042358, "step": 1850 }, { "epoch": 1.5589225589225588, "grad_norm": 3.7915287017822266, "learning_rate": 1.1171222710802704e-06, "loss": 0.9333086013793945, "step": 1852 }, { "epoch": 1.5606060606060606, "grad_norm": 17.623516082763672, "learning_rate": 1.1154519786372392e-06, "loss": 0.5394339561462402, "step": 1854 }, { "epoch": 1.5622895622895623, "grad_norm": 2.851343870162964, "learning_rate": 1.1137816329556403e-06, "loss": 0.617688775062561, "step": 1856 }, { "epoch": 1.563973063973064, "grad_norm": 3.1740288734436035, "learning_rate": 1.112111239790517e-06, "loss": 0.902677059173584, "step": 1858 }, { "epoch": 1.5656565656565657, "grad_norm": 7.28153133392334, "learning_rate": 1.1104408048970765e-06, "loss": 0.3739192485809326, "step": 1860 }, { "epoch": 1.5673400673400675, "grad_norm": 3.828963279724121, "learning_rate": 1.1087703340306707e-06, "loss": 0.9757977724075317, "step": 1862 }, { "epoch": 1.569023569023569, "grad_norm": 4.035392761230469, "learning_rate": 1.1070998329467738e-06, "loss": 0.33518415689468384, "step": 1864 }, { "epoch": 1.5707070707070707, "grad_norm": 7.576591968536377, "learning_rate": 1.1054293074009646e-06, "loss": 0.9643778800964355, "step": 1866 }, { "epoch": 1.5723905723905722, "grad_norm": 10.830273628234863, "learning_rate": 1.1037587631489077e-06, "loss": 0.6072518825531006, "step": 1868 }, { "epoch": 1.574074074074074, "grad_norm": 2.8351891040802, "learning_rate": 1.1020882059463297e-06, "loss": 0.8100966215133667, "step": 1870 }, { "epoch": 1.5757575757575757, "grad_norm": 2.7790122032165527, "learning_rate": 1.1004176415490036e-06, "loss": 0.7995985746383667, "step": 1872 }, { "epoch": 1.5774410774410774, "grad_norm": 3.3753979206085205, "learning_rate": 1.0987470757127267e-06, "loss": 0.8837331533432007, "step": 1874 }, { "epoch": 1.5791245791245792, "grad_norm": 12.539671897888184, "learning_rate": 1.0970765141933012e-06, "loss": 0.5485697388648987, "step": 1876 }, { "epoch": 1.5808080808080809, "grad_norm": 2.4989864826202393, "learning_rate": 1.0954059627465144e-06, "loss": 1.1583393812179565, "step": 1878 }, { "epoch": 1.5824915824915826, "grad_norm": 6.108792304992676, "learning_rate": 1.093735427128119e-06, "loss": 0.7429193258285522, "step": 1880 }, { "epoch": 1.5841750841750841, "grad_norm": 5.460334300994873, "learning_rate": 1.092064913093813e-06, "loss": 0.4204625189304352, "step": 1882 }, { "epoch": 1.5858585858585859, "grad_norm": 19.09606170654297, "learning_rate": 1.09039442639922e-06, "loss": 0.5326663255691528, "step": 1884 }, { "epoch": 1.5875420875420876, "grad_norm": 12.058549880981445, "learning_rate": 1.0887239727998697e-06, "loss": 0.6357114315032959, "step": 1886 }, { "epoch": 1.589225589225589, "grad_norm": 14.583388328552246, "learning_rate": 1.0870535580511778e-06, "loss": 1.0033700466156006, "step": 1888 }, { "epoch": 1.5909090909090908, "grad_norm": 3.857271671295166, "learning_rate": 1.0853831879084254e-06, "loss": 0.19512847065925598, "step": 1890 }, { "epoch": 1.5925925925925926, "grad_norm": 8.160994529724121, "learning_rate": 1.0837128681267409e-06, "loss": 1.0877628326416016, "step": 1892 }, { "epoch": 1.5942760942760943, "grad_norm": 4.857079982757568, "learning_rate": 1.082042604461079e-06, "loss": 0.7703442573547363, "step": 1894 }, { "epoch": 1.595959595959596, "grad_norm": 22.629634857177734, "learning_rate": 1.0803724026662e-06, "loss": 0.9460948705673218, "step": 1896 }, { "epoch": 1.5976430976430978, "grad_norm": 12.860857963562012, "learning_rate": 1.0787022684966524e-06, "loss": 0.8795516490936279, "step": 1898 }, { "epoch": 1.5993265993265995, "grad_norm": 7.2853193283081055, "learning_rate": 1.0770322077067512e-06, "loss": 0.8695672750473022, "step": 1900 }, { "epoch": 1.601010101010101, "grad_norm": 3.358490467071533, "learning_rate": 1.0753622260505582e-06, "loss": 0.8867776393890381, "step": 1902 }, { "epoch": 1.6026936026936027, "grad_norm": 4.54321813583374, "learning_rate": 1.0736923292818631e-06, "loss": 0.730638861656189, "step": 1904 }, { "epoch": 1.6043771043771042, "grad_norm": 2.6725199222564697, "learning_rate": 1.0720225231541629e-06, "loss": 1.1262996196746826, "step": 1906 }, { "epoch": 1.606060606060606, "grad_norm": 4.424936771392822, "learning_rate": 1.0703528134206418e-06, "loss": 0.9013878703117371, "step": 1908 }, { "epoch": 1.6077441077441077, "grad_norm": 4.459665775299072, "learning_rate": 1.0686832058341534e-06, "loss": 0.5786502957344055, "step": 1910 }, { "epoch": 1.6094276094276094, "grad_norm": 7.450462341308594, "learning_rate": 1.0670137061471972e-06, "loss": 0.5591634511947632, "step": 1912 }, { "epoch": 1.6111111111111112, "grad_norm": 6.1925764083862305, "learning_rate": 1.0653443201119026e-06, "loss": 0.7897850275039673, "step": 1914 }, { "epoch": 1.612794612794613, "grad_norm": 14.83733081817627, "learning_rate": 1.063675053480007e-06, "loss": 0.6848697066307068, "step": 1916 }, { "epoch": 1.6144781144781146, "grad_norm": 7.251978397369385, "learning_rate": 1.0620059120028363e-06, "loss": 0.6231127977371216, "step": 1918 }, { "epoch": 1.6161616161616161, "grad_norm": 6.919361591339111, "learning_rate": 1.0603369014312848e-06, "loss": 0.665825605392456, "step": 1920 }, { "epoch": 1.6178451178451179, "grad_norm": 5.165210247039795, "learning_rate": 1.0586680275157966e-06, "loss": 0.9070066809654236, "step": 1922 }, { "epoch": 1.6195286195286194, "grad_norm": 14.22563648223877, "learning_rate": 1.0569992960063445e-06, "loss": 0.6462626457214355, "step": 1924 }, { "epoch": 1.621212121212121, "grad_norm": 10.909793853759766, "learning_rate": 1.0553307126524105e-06, "loss": 0.6075209379196167, "step": 1926 }, { "epoch": 1.6228956228956228, "grad_norm": 2.4525344371795654, "learning_rate": 1.0536622832029663e-06, "loss": 0.7241764068603516, "step": 1928 }, { "epoch": 1.6245791245791246, "grad_norm": 7.981225967407227, "learning_rate": 1.0519940134064535e-06, "loss": 0.7813702821731567, "step": 1930 }, { "epoch": 1.6262626262626263, "grad_norm": 3.924685478210449, "learning_rate": 1.0503259090107635e-06, "loss": 0.6770836114883423, "step": 1932 }, { "epoch": 1.627946127946128, "grad_norm": 6.139669895172119, "learning_rate": 1.0486579757632177e-06, "loss": 0.9623356461524963, "step": 1934 }, { "epoch": 1.6296296296296298, "grad_norm": 12.121989250183105, "learning_rate": 1.046990219410548e-06, "loss": 0.9487285614013672, "step": 1936 }, { "epoch": 1.6313131313131313, "grad_norm": 3.9633893966674805, "learning_rate": 1.0453226456988766e-06, "loss": 1.0289177894592285, "step": 1938 }, { "epoch": 1.632996632996633, "grad_norm": 7.765763759613037, "learning_rate": 1.0436552603736967e-06, "loss": 0.8020685315132141, "step": 1940 }, { "epoch": 1.6346801346801347, "grad_norm": 4.463337421417236, "learning_rate": 1.0419880691798526e-06, "loss": 1.010524868965149, "step": 1942 }, { "epoch": 1.6363636363636362, "grad_norm": 29.483732223510742, "learning_rate": 1.040321077861519e-06, "loss": 0.7623812556266785, "step": 1944 }, { "epoch": 1.638047138047138, "grad_norm": 3.125913619995117, "learning_rate": 1.0386542921621824e-06, "loss": 0.41824889183044434, "step": 1946 }, { "epoch": 1.6397306397306397, "grad_norm": 6.553778648376465, "learning_rate": 1.036987717824621e-06, "loss": 0.9804911613464355, "step": 1948 }, { "epoch": 1.6414141414141414, "grad_norm": 3.5837337970733643, "learning_rate": 1.0353213605908854e-06, "loss": 0.999625563621521, "step": 1950 }, { "epoch": 1.6430976430976432, "grad_norm": 6.643466949462891, "learning_rate": 1.0336552262022756e-06, "loss": 0.49242016673088074, "step": 1952 }, { "epoch": 1.644781144781145, "grad_norm": 5.533985614776611, "learning_rate": 1.0319893203993276e-06, "loss": 0.39796119928359985, "step": 1954 }, { "epoch": 1.6464646464646466, "grad_norm": 15.642714500427246, "learning_rate": 1.0303236489217863e-06, "loss": 0.22867411375045776, "step": 1956 }, { "epoch": 1.6481481481481481, "grad_norm": 6.918363571166992, "learning_rate": 1.0286582175085913e-06, "loss": 0.6615217924118042, "step": 1958 }, { "epoch": 1.6498316498316499, "grad_norm": 9.181211471557617, "learning_rate": 1.0269930318978552e-06, "loss": 0.7599420547485352, "step": 1960 }, { "epoch": 1.6515151515151514, "grad_norm": 7.992411136627197, "learning_rate": 1.0253280978268421e-06, "loss": 0.6117727756500244, "step": 1962 }, { "epoch": 1.6531986531986531, "grad_norm": 6.661476135253906, "learning_rate": 1.0236634210319507e-06, "loss": 0.446529746055603, "step": 1964 }, { "epoch": 1.6548821548821548, "grad_norm": 7.98351526260376, "learning_rate": 1.0219990072486938e-06, "loss": 0.5448979139328003, "step": 1966 }, { "epoch": 1.6565656565656566, "grad_norm": 3.0457189083099365, "learning_rate": 1.020334862211676e-06, "loss": 0.8045427799224854, "step": 1968 }, { "epoch": 1.6582491582491583, "grad_norm": 11.37780475616455, "learning_rate": 1.0186709916545775e-06, "loss": 0.7433644533157349, "step": 1970 }, { "epoch": 1.65993265993266, "grad_norm": 18.77171516418457, "learning_rate": 1.0170074013101329e-06, "loss": 0.9492733478546143, "step": 1972 }, { "epoch": 1.6616161616161618, "grad_norm": 2.508883237838745, "learning_rate": 1.0153440969101103e-06, "loss": 0.7720388174057007, "step": 1974 }, { "epoch": 1.6632996632996633, "grad_norm": 24.94049644470215, "learning_rate": 1.0136810841852937e-06, "loss": 0.6722294688224792, "step": 1976 }, { "epoch": 1.664983164983165, "grad_norm": 24.168481826782227, "learning_rate": 1.0120183688654616e-06, "loss": 0.6629032492637634, "step": 1978 }, { "epoch": 1.6666666666666665, "grad_norm": 6.165626525878906, "learning_rate": 1.0103559566793679e-06, "loss": 0.7866932153701782, "step": 1980 }, { "epoch": 1.6683501683501682, "grad_norm": 10.85080337524414, "learning_rate": 1.0086938533547213e-06, "loss": 0.5067884922027588, "step": 1982 }, { "epoch": 1.67003367003367, "grad_norm": 6.6474199295043945, "learning_rate": 1.0070320646181684e-06, "loss": 0.3992816209793091, "step": 1984 }, { "epoch": 1.6717171717171717, "grad_norm": 3.2397751808166504, "learning_rate": 1.0053705961952697e-06, "loss": 0.9870185256004333, "step": 1986 }, { "epoch": 1.6734006734006734, "grad_norm": 4.640201568603516, "learning_rate": 1.0037094538104832e-06, "loss": 0.916529655456543, "step": 1988 }, { "epoch": 1.6750841750841752, "grad_norm": 2.8144161701202393, "learning_rate": 1.002048643187143e-06, "loss": 0.5871807932853699, "step": 1990 }, { "epoch": 1.676767676767677, "grad_norm": 2.0470471382141113, "learning_rate": 1.0003881700474415e-06, "loss": 1.0817761421203613, "step": 1992 }, { "epoch": 1.6784511784511784, "grad_norm": 13.735745429992676, "learning_rate": 9.987280401124063e-07, "loss": 0.6647434234619141, "step": 1994 }, { "epoch": 1.6801346801346801, "grad_norm": 8.261211395263672, "learning_rate": 9.970682591018842e-07, "loss": 0.6305195689201355, "step": 1996 }, { "epoch": 1.6818181818181817, "grad_norm": 2.91133713722229, "learning_rate": 9.95408832734519e-07, "loss": 0.9166790246963501, "step": 1998 }, { "epoch": 1.6835016835016834, "grad_norm": 12.362771987915039, "learning_rate": 9.937497667277322e-07, "loss": 0.7117506861686707, "step": 2000 }, { "epoch": 1.6851851851851851, "grad_norm": 6.4341349601745605, "learning_rate": 9.92091066797705e-07, "loss": 0.5901815891265869, "step": 2002 }, { "epoch": 1.6868686868686869, "grad_norm": 6.667015075683594, "learning_rate": 9.904327386593563e-07, "loss": 0.6358145475387573, "step": 2004 }, { "epoch": 1.6885521885521886, "grad_norm": 5.651986122131348, "learning_rate": 9.887747880263236e-07, "loss": 0.6100403666496277, "step": 2006 }, { "epoch": 1.6902356902356903, "grad_norm": 2.491840124130249, "learning_rate": 9.871172206109458e-07, "loss": 0.9090219736099243, "step": 2008 }, { "epoch": 1.691919191919192, "grad_norm": 5.323090553283691, "learning_rate": 9.854600421242396e-07, "loss": 0.464111328125, "step": 2010 }, { "epoch": 1.6936026936026936, "grad_norm": 5.4663496017456055, "learning_rate": 9.838032582758814e-07, "loss": 0.7845708727836609, "step": 2012 }, { "epoch": 1.6952861952861953, "grad_norm": 5.896030902862549, "learning_rate": 9.821468747741893e-07, "loss": 0.5561348795890808, "step": 2014 }, { "epoch": 1.696969696969697, "grad_norm": 11.155455589294434, "learning_rate": 9.804908973261012e-07, "loss": 0.7063945531845093, "step": 2016 }, { "epoch": 1.6986531986531985, "grad_norm": 12.239091873168945, "learning_rate": 9.788353316371562e-07, "loss": 0.7154542803764343, "step": 2018 }, { "epoch": 1.7003367003367003, "grad_norm": 7.434993743896484, "learning_rate": 9.771801834114748e-07, "loss": 0.6947083473205566, "step": 2020 }, { "epoch": 1.702020202020202, "grad_norm": 8.770514488220215, "learning_rate": 9.755254583517394e-07, "loss": 0.998549222946167, "step": 2022 }, { "epoch": 1.7037037037037037, "grad_norm": 3.318065643310547, "learning_rate": 9.738711621591733e-07, "loss": 0.7664910554885864, "step": 2024 }, { "epoch": 1.7053872053872055, "grad_norm": 3.665529489517212, "learning_rate": 9.722173005335235e-07, "loss": 0.8967854976654053, "step": 2026 }, { "epoch": 1.7070707070707072, "grad_norm": 4.913332939147949, "learning_rate": 9.705638791730391e-07, "loss": 1.01124906539917, "step": 2028 }, { "epoch": 1.708754208754209, "grad_norm": 13.111969947814941, "learning_rate": 9.689109037744522e-07, "loss": 0.5944876670837402, "step": 2030 }, { "epoch": 1.7104377104377104, "grad_norm": 4.983813762664795, "learning_rate": 9.672583800329585e-07, "loss": 0.3871064782142639, "step": 2032 }, { "epoch": 1.7121212121212122, "grad_norm": 3.5434679985046387, "learning_rate": 9.65606313642198e-07, "loss": 0.8279162645339966, "step": 2034 }, { "epoch": 1.7138047138047137, "grad_norm": 4.728488922119141, "learning_rate": 9.63954710294234e-07, "loss": 0.7765666246414185, "step": 2036 }, { "epoch": 1.7154882154882154, "grad_norm": 9.771429061889648, "learning_rate": 9.623035756795352e-07, "loss": 0.38172125816345215, "step": 2038 }, { "epoch": 1.7171717171717171, "grad_norm": 6.764921188354492, "learning_rate": 9.606529154869556e-07, "loss": 0.4684080481529236, "step": 2040 }, { "epoch": 1.7188552188552189, "grad_norm": 3.6262731552124023, "learning_rate": 9.590027354037134e-07, "loss": 0.8603177070617676, "step": 2042 }, { "epoch": 1.7205387205387206, "grad_norm": 3.9443676471710205, "learning_rate": 9.573530411153732e-07, "loss": 0.8025220632553101, "step": 2044 }, { "epoch": 1.7222222222222223, "grad_norm": 3.0719077587127686, "learning_rate": 9.557038383058265e-07, "loss": 1.0896143913269043, "step": 2046 }, { "epoch": 1.723905723905724, "grad_norm": 2.531261920928955, "learning_rate": 9.540551326572709e-07, "loss": 0.9985212087631226, "step": 2048 }, { "epoch": 1.7255892255892256, "grad_norm": 12.073436737060547, "learning_rate": 9.524069298501902e-07, "loss": 0.3480485677719116, "step": 2050 }, { "epoch": 1.7272727272727273, "grad_norm": 3.418630599975586, "learning_rate": 9.507592355633376e-07, "loss": 1.0522449016571045, "step": 2052 }, { "epoch": 1.7289562289562288, "grad_norm": 8.016219139099121, "learning_rate": 9.491120554737126e-07, "loss": 0.8546870350837708, "step": 2054 }, { "epoch": 1.7306397306397305, "grad_norm": 8.340877532958984, "learning_rate": 9.474653952565439e-07, "loss": 0.7133148908615112, "step": 2056 }, { "epoch": 1.7323232323232323, "grad_norm": 7.340385437011719, "learning_rate": 9.458192605852691e-07, "loss": 1.0671539306640625, "step": 2058 }, { "epoch": 1.734006734006734, "grad_norm": 3.097386121749878, "learning_rate": 9.441736571315142e-07, "loss": 0.6089422702789307, "step": 2060 }, { "epoch": 1.7356902356902357, "grad_norm": 24.186203002929688, "learning_rate": 9.425285905650755e-07, "loss": 1.0324299335479736, "step": 2062 }, { "epoch": 1.7373737373737375, "grad_norm": 6.780990123748779, "learning_rate": 9.408840665538999e-07, "loss": 0.6091172099113464, "step": 2064 }, { "epoch": 1.7390572390572392, "grad_norm": 3.00539231300354, "learning_rate": 9.392400907640645e-07, "loss": 0.6669168472290039, "step": 2066 }, { "epoch": 1.7407407407407407, "grad_norm": 9.17961311340332, "learning_rate": 9.375966688597572e-07, "loss": 0.8047370314598083, "step": 2068 }, { "epoch": 1.7424242424242424, "grad_norm": 3.1698920726776123, "learning_rate": 9.359538065032586e-07, "loss": 0.6602023839950562, "step": 2070 }, { "epoch": 1.7441077441077442, "grad_norm": 30.58012580871582, "learning_rate": 9.343115093549203e-07, "loss": 0.6329094171524048, "step": 2072 }, { "epoch": 1.7457912457912457, "grad_norm": 4.666545867919922, "learning_rate": 9.32669783073147e-07, "loss": 0.5041278600692749, "step": 2074 }, { "epoch": 1.7474747474747474, "grad_norm": 9.805131912231445, "learning_rate": 9.310286333143767e-07, "loss": 1.0198402404785156, "step": 2076 }, { "epoch": 1.7491582491582491, "grad_norm": 11.345261573791504, "learning_rate": 9.293880657330604e-07, "loss": 0.7572150230407715, "step": 2078 }, { "epoch": 1.7508417508417509, "grad_norm": 5.581562519073486, "learning_rate": 9.277480859816444e-07, "loss": 0.9102179408073425, "step": 2080 }, { "epoch": 1.7525252525252526, "grad_norm": 2.126108169555664, "learning_rate": 9.261086997105487e-07, "loss": 0.5880842208862305, "step": 2082 }, { "epoch": 1.7542087542087543, "grad_norm": 2.7663979530334473, "learning_rate": 9.244699125681485e-07, "loss": 1.1094093322753906, "step": 2084 }, { "epoch": 1.7558922558922558, "grad_norm": 11.278059959411621, "learning_rate": 9.228317302007556e-07, "loss": 0.7268582582473755, "step": 2086 }, { "epoch": 1.7575757575757576, "grad_norm": 9.18853759765625, "learning_rate": 9.211941582525968e-07, "loss": 0.44798004627227783, "step": 2088 }, { "epoch": 1.7592592592592593, "grad_norm": 11.152181625366211, "learning_rate": 9.195572023657969e-07, "loss": 0.4857521653175354, "step": 2090 }, { "epoch": 1.7609427609427608, "grad_norm": 6.81666374206543, "learning_rate": 9.179208681803579e-07, "loss": 0.510983943939209, "step": 2092 }, { "epoch": 1.7626262626262625, "grad_norm": 6.450544834136963, "learning_rate": 9.162851613341389e-07, "loss": 0.4013763964176178, "step": 2094 }, { "epoch": 1.7643097643097643, "grad_norm": 4.122218608856201, "learning_rate": 9.146500874628391e-07, "loss": 0.6035534143447876, "step": 2096 }, { "epoch": 1.765993265993266, "grad_norm": 7.275836944580078, "learning_rate": 9.130156521999757e-07, "loss": 0.9859648942947388, "step": 2098 }, { "epoch": 1.7676767676767677, "grad_norm": 3.9273769855499268, "learning_rate": 9.113818611768654e-07, "loss": 0.918908953666687, "step": 2100 }, { "epoch": 1.7693602693602695, "grad_norm": 5.415125846862793, "learning_rate": 9.097487200226059e-07, "loss": 0.8446367979049683, "step": 2102 }, { "epoch": 1.7710437710437712, "grad_norm": 15.805021286010742, "learning_rate": 9.081162343640561e-07, "loss": 0.5240712761878967, "step": 2104 }, { "epoch": 1.7727272727272727, "grad_norm": 5.227410316467285, "learning_rate": 9.064844098258153e-07, "loss": 0.6734915375709534, "step": 2106 }, { "epoch": 1.7744107744107744, "grad_norm": 9.779236793518066, "learning_rate": 9.048532520302061e-07, "loss": 0.8648114204406738, "step": 2108 }, { "epoch": 1.776094276094276, "grad_norm": 9.622480392456055, "learning_rate": 9.032227665972534e-07, "loss": 0.4809529781341553, "step": 2110 }, { "epoch": 1.7777777777777777, "grad_norm": 3.092237949371338, "learning_rate": 9.015929591446651e-07, "loss": 0.775432288646698, "step": 2112 }, { "epoch": 1.7794612794612794, "grad_norm": 6.282991409301758, "learning_rate": 8.999638352878142e-07, "loss": 0.8989666700363159, "step": 2114 }, { "epoch": 1.7811447811447811, "grad_norm": 2.8331105709075928, "learning_rate": 8.983354006397177e-07, "loss": 0.9354023933410645, "step": 2116 }, { "epoch": 1.7828282828282829, "grad_norm": 3.6972124576568604, "learning_rate": 8.96707660811018e-07, "loss": 0.8982851505279541, "step": 2118 }, { "epoch": 1.7845117845117846, "grad_norm": 7.385217189788818, "learning_rate": 8.950806214099638e-07, "loss": 0.6171048879623413, "step": 2120 }, { "epoch": 1.7861952861952863, "grad_norm": 6.615528106689453, "learning_rate": 8.934542880423903e-07, "loss": 0.5291919708251953, "step": 2122 }, { "epoch": 1.7878787878787878, "grad_norm": 4.079862117767334, "learning_rate": 8.918286663117005e-07, "loss": 0.7172562479972839, "step": 2124 }, { "epoch": 1.7895622895622896, "grad_norm": 3.52138352394104, "learning_rate": 8.902037618188449e-07, "loss": 0.6790080666542053, "step": 2126 }, { "epoch": 1.791245791245791, "grad_norm": 3.611370325088501, "learning_rate": 8.885795801623035e-07, "loss": 0.6517022848129272, "step": 2128 }, { "epoch": 1.7929292929292928, "grad_norm": 14.185620307922363, "learning_rate": 8.869561269380652e-07, "loss": 0.6533136367797852, "step": 2130 }, { "epoch": 1.7946127946127945, "grad_norm": 4.437119007110596, "learning_rate": 8.853334077396098e-07, "loss": 0.5168370008468628, "step": 2132 }, { "epoch": 1.7962962962962963, "grad_norm": 2.8739631175994873, "learning_rate": 8.837114281578872e-07, "loss": 0.6581718921661377, "step": 2134 }, { "epoch": 1.797979797979798, "grad_norm": 6.71103572845459, "learning_rate": 8.820901937813003e-07, "loss": 0.3342350125312805, "step": 2136 }, { "epoch": 1.7996632996632997, "grad_norm": 4.6629486083984375, "learning_rate": 8.804697101956828e-07, "loss": 0.9553017616271973, "step": 2138 }, { "epoch": 1.8013468013468015, "grad_norm": 3.458785057067871, "learning_rate": 8.78849982984283e-07, "loss": 0.7399221658706665, "step": 2140 }, { "epoch": 1.803030303030303, "grad_norm": 6.880527973175049, "learning_rate": 8.772310177277427e-07, "loss": 0.7662659287452698, "step": 2142 }, { "epoch": 1.8047138047138047, "grad_norm": 4.792196273803711, "learning_rate": 8.756128200040782e-07, "loss": 0.6991869211196899, "step": 2144 }, { "epoch": 1.8063973063973064, "grad_norm": 20.593137741088867, "learning_rate": 8.739953953886614e-07, "loss": 0.8479831218719482, "step": 2146 }, { "epoch": 1.808080808080808, "grad_norm": 5.395805358886719, "learning_rate": 8.72378749454201e-07, "loss": 0.8385607004165649, "step": 2148 }, { "epoch": 1.8097643097643097, "grad_norm": 2.312955379486084, "learning_rate": 8.707628877707221e-07, "loss": 0.9476625919342041, "step": 2150 }, { "epoch": 1.8114478114478114, "grad_norm": 4.342343807220459, "learning_rate": 8.691478159055483e-07, "loss": 0.9815539121627808, "step": 2152 }, { "epoch": 1.8131313131313131, "grad_norm": 7.069920063018799, "learning_rate": 8.675335394232819e-07, "loss": 0.7816078066825867, "step": 2154 }, { "epoch": 1.8148148148148149, "grad_norm": 3.6072463989257812, "learning_rate": 8.659200638857845e-07, "loss": 0.642024576663971, "step": 2156 }, { "epoch": 1.8164983164983166, "grad_norm": 3.091968297958374, "learning_rate": 8.643073948521576e-07, "loss": 0.4574873447418213, "step": 2158 }, { "epoch": 1.8181818181818183, "grad_norm": 3.328583002090454, "learning_rate": 8.626955378787256e-07, "loss": 0.8696750402450562, "step": 2160 }, { "epoch": 1.8198653198653199, "grad_norm": 10.013894081115723, "learning_rate": 8.610844985190127e-07, "loss": 0.6890912652015686, "step": 2162 }, { "epoch": 1.8215488215488216, "grad_norm": 9.785984992980957, "learning_rate": 8.594742823237287e-07, "loss": 0.8184359073638916, "step": 2164 }, { "epoch": 1.823232323232323, "grad_norm": 6.415750026702881, "learning_rate": 8.578648948407452e-07, "loss": 0.9016733169555664, "step": 2166 }, { "epoch": 1.8249158249158248, "grad_norm": 4.460061073303223, "learning_rate": 8.562563416150794e-07, "loss": 0.8134877681732178, "step": 2168 }, { "epoch": 1.8265993265993266, "grad_norm": 9.919402122497559, "learning_rate": 8.546486281888739e-07, "loss": 0.5249311923980713, "step": 2170 }, { "epoch": 1.8282828282828283, "grad_norm": 4.25754976272583, "learning_rate": 8.53041760101378e-07, "loss": 0.7299934029579163, "step": 2172 }, { "epoch": 1.82996632996633, "grad_norm": 5.09484338760376, "learning_rate": 8.51435742888928e-07, "loss": 0.3953469395637512, "step": 2174 }, { "epoch": 1.8316498316498318, "grad_norm": 3.4278955459594727, "learning_rate": 8.498305820849296e-07, "loss": 0.4628002643585205, "step": 2176 }, { "epoch": 1.8333333333333335, "grad_norm": 9.368400573730469, "learning_rate": 8.482262832198365e-07, "loss": 0.6508548259735107, "step": 2178 }, { "epoch": 1.835016835016835, "grad_norm": 2.587501287460327, "learning_rate": 8.46622851821134e-07, "loss": 0.8062055110931396, "step": 2180 }, { "epoch": 1.8367003367003367, "grad_norm": 19.423526763916016, "learning_rate": 8.450202934133174e-07, "loss": 0.395694375038147, "step": 2182 }, { "epoch": 1.8383838383838382, "grad_norm": 9.420888900756836, "learning_rate": 8.434186135178749e-07, "loss": 0.9303032159805298, "step": 2184 }, { "epoch": 1.84006734006734, "grad_norm": 3.11016845703125, "learning_rate": 8.418178176532674e-07, "loss": 0.9512186050415039, "step": 2186 }, { "epoch": 1.8417508417508417, "grad_norm": 3.171823501586914, "learning_rate": 8.402179113349106e-07, "loss": 0.8358129262924194, "step": 2188 }, { "epoch": 1.8434343434343434, "grad_norm": 20.7672176361084, "learning_rate": 8.386189000751544e-07, "loss": 0.4782221019268036, "step": 2190 }, { "epoch": 1.8451178451178452, "grad_norm": 16.821311950683594, "learning_rate": 8.370207893832661e-07, "loss": 0.7589244246482849, "step": 2192 }, { "epoch": 1.8468013468013469, "grad_norm": 13.138861656188965, "learning_rate": 8.354235847654092e-07, "loss": 0.5737025141716003, "step": 2194 }, { "epoch": 1.8484848484848486, "grad_norm": 7.118038177490234, "learning_rate": 8.338272917246252e-07, "loss": 0.7278249263763428, "step": 2196 }, { "epoch": 1.8501683501683501, "grad_norm": 10.954305648803711, "learning_rate": 8.322319157608158e-07, "loss": 0.4181557893753052, "step": 2198 }, { "epoch": 1.8518518518518519, "grad_norm": 2.7884762287139893, "learning_rate": 8.306374623707222e-07, "loss": 0.9623106718063354, "step": 2200 }, { "epoch": 1.8535353535353534, "grad_norm": 20.612829208374023, "learning_rate": 8.29043937047907e-07, "loss": 0.6952165365219116, "step": 2202 }, { "epoch": 1.855218855218855, "grad_norm": 4.281187534332275, "learning_rate": 8.274513452827361e-07, "loss": 0.5947088003158569, "step": 2204 }, { "epoch": 1.8569023569023568, "grad_norm": 4.014023780822754, "learning_rate": 8.258596925623578e-07, "loss": 0.8658819198608398, "step": 2206 }, { "epoch": 1.8585858585858586, "grad_norm": 5.036103248596191, "learning_rate": 8.242689843706852e-07, "loss": 0.7244065999984741, "step": 2208 }, { "epoch": 1.8602693602693603, "grad_norm": 10.339949607849121, "learning_rate": 8.226792261883777e-07, "loss": 0.28258228302001953, "step": 2210 }, { "epoch": 1.861952861952862, "grad_norm": 3.753382921218872, "learning_rate": 8.210904234928213e-07, "loss": 0.7527827620506287, "step": 2212 }, { "epoch": 1.8636363636363638, "grad_norm": 6.2754082679748535, "learning_rate": 8.195025817581092e-07, "loss": 1.0558643341064453, "step": 2214 }, { "epoch": 1.8653198653198653, "grad_norm": 26.839725494384766, "learning_rate": 8.179157064550246e-07, "loss": 0.30213648080825806, "step": 2216 }, { "epoch": 1.867003367003367, "grad_norm": 9.034235954284668, "learning_rate": 8.163298030510208e-07, "loss": 0.5420745611190796, "step": 2218 }, { "epoch": 1.8686868686868687, "grad_norm": 11.360336303710938, "learning_rate": 8.147448770102019e-07, "loss": 0.4777377247810364, "step": 2220 }, { "epoch": 1.8703703703703702, "grad_norm": 20.048816680908203, "learning_rate": 8.131609337933054e-07, "loss": 0.6607373952865601, "step": 2222 }, { "epoch": 1.872053872053872, "grad_norm": 4.080456733703613, "learning_rate": 8.115779788576818e-07, "loss": 0.9611594676971436, "step": 2224 }, { "epoch": 1.8737373737373737, "grad_norm": 6.5773539543151855, "learning_rate": 8.099960176572768e-07, "loss": 0.5292639136314392, "step": 2226 }, { "epoch": 1.8754208754208754, "grad_norm": 8.111262321472168, "learning_rate": 8.08415055642613e-07, "loss": 0.4228924512863159, "step": 2228 }, { "epoch": 1.8771043771043772, "grad_norm": 7.139439105987549, "learning_rate": 8.068350982607693e-07, "loss": 1.036095380783081, "step": 2230 }, { "epoch": 1.878787878787879, "grad_norm": 5.3863019943237305, "learning_rate": 8.052561509553633e-07, "loss": 0.879308819770813, "step": 2232 }, { "epoch": 1.8804713804713806, "grad_norm": 2.3317646980285645, "learning_rate": 8.03678219166533e-07, "loss": 0.4804232120513916, "step": 2234 }, { "epoch": 1.8821548821548821, "grad_norm": 7.66561222076416, "learning_rate": 8.021013083309181e-07, "loss": 0.6847870349884033, "step": 2236 }, { "epoch": 1.8838383838383839, "grad_norm": 20.43767738342285, "learning_rate": 8.005254238816392e-07, "loss": 0.9153972864151001, "step": 2238 }, { "epoch": 1.8855218855218854, "grad_norm": 4.095572471618652, "learning_rate": 7.989505712482814e-07, "loss": 1.1261423826217651, "step": 2240 }, { "epoch": 1.887205387205387, "grad_norm": 3.0072097778320312, "learning_rate": 7.973767558568749e-07, "loss": 0.9912216663360596, "step": 2242 }, { "epoch": 1.8888888888888888, "grad_norm": 5.47834587097168, "learning_rate": 7.95803983129876e-07, "loss": 0.914950430393219, "step": 2244 }, { "epoch": 1.8905723905723906, "grad_norm": 2.8906898498535156, "learning_rate": 7.942322584861476e-07, "loss": 0.9614880681037903, "step": 2246 }, { "epoch": 1.8922558922558923, "grad_norm": 6.5934977531433105, "learning_rate": 7.926615873409435e-07, "loss": 0.9108870029449463, "step": 2248 }, { "epoch": 1.893939393939394, "grad_norm": 3.955982208251953, "learning_rate": 7.910919751058863e-07, "loss": 0.9415953755378723, "step": 2250 }, { "epoch": 1.8956228956228958, "grad_norm": 3.4299967288970947, "learning_rate": 7.895234271889502e-07, "loss": 1.1199214458465576, "step": 2252 }, { "epoch": 1.8973063973063973, "grad_norm": 2.3956785202026367, "learning_rate": 7.879559489944431e-07, "loss": 0.8545496463775635, "step": 2254 }, { "epoch": 1.898989898989899, "grad_norm": 5.623586654663086, "learning_rate": 7.86389545922987e-07, "loss": 0.8165835738182068, "step": 2256 }, { "epoch": 1.9006734006734005, "grad_norm": 3.9756457805633545, "learning_rate": 7.848242233714992e-07, "loss": 0.9491643905639648, "step": 2258 }, { "epoch": 1.9023569023569022, "grad_norm": 12.658402442932129, "learning_rate": 7.832599867331751e-07, "loss": 0.7046935558319092, "step": 2260 }, { "epoch": 1.904040404040404, "grad_norm": 3.2188074588775635, "learning_rate": 7.816968413974676e-07, "loss": 0.7821506261825562, "step": 2262 }, { "epoch": 1.9057239057239057, "grad_norm": 6.156993865966797, "learning_rate": 7.801347927500701e-07, "loss": 0.4521103501319885, "step": 2264 }, { "epoch": 1.9074074074074074, "grad_norm": 7.991714000701904, "learning_rate": 7.785738461728975e-07, "loss": 0.7530079483985901, "step": 2266 }, { "epoch": 1.9090909090909092, "grad_norm": 5.461221694946289, "learning_rate": 7.770140070440679e-07, "loss": 0.6550673842430115, "step": 2268 }, { "epoch": 1.910774410774411, "grad_norm": 4.9177446365356445, "learning_rate": 7.754552807378827e-07, "loss": 0.8085366487503052, "step": 2270 }, { "epoch": 1.9124579124579124, "grad_norm": 5.982177257537842, "learning_rate": 7.738976726248105e-07, "loss": 0.8757312893867493, "step": 2272 }, { "epoch": 1.9141414141414141, "grad_norm": 5.69901704788208, "learning_rate": 7.723411880714663e-07, "loss": 0.6707878112792969, "step": 2274 }, { "epoch": 1.9158249158249159, "grad_norm": 5.210788249969482, "learning_rate": 7.707858324405945e-07, "loss": 0.8715642690658569, "step": 2276 }, { "epoch": 1.9175084175084174, "grad_norm": 9.772908210754395, "learning_rate": 7.692316110910495e-07, "loss": 0.5358242392539978, "step": 2278 }, { "epoch": 1.9191919191919191, "grad_norm": 10.29883861541748, "learning_rate": 7.676785293777779e-07, "loss": 0.18194249272346497, "step": 2280 }, { "epoch": 1.9208754208754208, "grad_norm": 12.38522720336914, "learning_rate": 7.661265926517997e-07, "loss": 0.9799966812133789, "step": 2282 }, { "epoch": 1.9225589225589226, "grad_norm": 10.77762222290039, "learning_rate": 7.6457580626019e-07, "loss": 0.4065392315387726, "step": 2284 }, { "epoch": 1.9242424242424243, "grad_norm": 4.503013610839844, "learning_rate": 7.630261755460598e-07, "loss": 0.6107114553451538, "step": 2286 }, { "epoch": 1.925925925925926, "grad_norm": 4.1006574630737305, "learning_rate": 7.614777058485398e-07, "loss": 0.9525327086448669, "step": 2288 }, { "epoch": 1.9276094276094278, "grad_norm": 4.675087928771973, "learning_rate": 7.59930402502759e-07, "loss": 0.64920973777771, "step": 2290 }, { "epoch": 1.9292929292929293, "grad_norm": 10.305427551269531, "learning_rate": 7.58384270839829e-07, "loss": 0.4203697741031647, "step": 2292 }, { "epoch": 1.930976430976431, "grad_norm": 10.108484268188477, "learning_rate": 7.568393161868234e-07, "loss": 0.8978174924850464, "step": 2294 }, { "epoch": 1.9326599326599325, "grad_norm": 9.093255996704102, "learning_rate": 7.552955438667612e-07, "loss": 0.7504777908325195, "step": 2296 }, { "epoch": 1.9343434343434343, "grad_norm": 2.7709944248199463, "learning_rate": 7.537529591985879e-07, "loss": 0.7725180983543396, "step": 2298 }, { "epoch": 1.936026936026936, "grad_norm": 18.586732864379883, "learning_rate": 7.522115674971564e-07, "loss": 0.5804815292358398, "step": 2300 }, { "epoch": 1.9377104377104377, "grad_norm": 5.98298978805542, "learning_rate": 7.506713740732098e-07, "loss": 1.1325410604476929, "step": 2302 }, { "epoch": 1.9393939393939394, "grad_norm": 2.994622230529785, "learning_rate": 7.491323842333626e-07, "loss": 0.9246529340744019, "step": 2304 }, { "epoch": 1.9410774410774412, "grad_norm": 10.680237770080566, "learning_rate": 7.47594603280082e-07, "loss": 0.2776586413383484, "step": 2306 }, { "epoch": 1.942760942760943, "grad_norm": 5.663994312286377, "learning_rate": 7.460580365116704e-07, "loss": 0.7812565565109253, "step": 2308 }, { "epoch": 1.9444444444444444, "grad_norm": 5.120817184448242, "learning_rate": 7.445226892222476e-07, "loss": 1.0193424224853516, "step": 2310 }, { "epoch": 1.9461279461279462, "grad_norm": 23.06011962890625, "learning_rate": 7.429885667017301e-07, "loss": 0.9074631929397583, "step": 2312 }, { "epoch": 1.9478114478114477, "grad_norm": 27.372034072875977, "learning_rate": 7.41455674235816e-07, "loss": 0.860990583896637, "step": 2314 }, { "epoch": 1.9494949494949494, "grad_norm": 4.709370136260986, "learning_rate": 7.399240171059649e-07, "loss": 0.6999090313911438, "step": 2316 }, { "epoch": 1.9511784511784511, "grad_norm": 3.648000955581665, "learning_rate": 7.383936005893798e-07, "loss": 0.8313673138618469, "step": 2318 }, { "epoch": 1.9528619528619529, "grad_norm": 6.1949005126953125, "learning_rate": 7.368644299589894e-07, "loss": 0.8585817217826843, "step": 2320 }, { "epoch": 1.9545454545454546, "grad_norm": 6.129204273223877, "learning_rate": 7.353365104834304e-07, "loss": 0.9358435869216919, "step": 2322 }, { "epoch": 1.9562289562289563, "grad_norm": 5.967504501342773, "learning_rate": 7.338098474270277e-07, "loss": 0.6934836506843567, "step": 2324 }, { "epoch": 1.957912457912458, "grad_norm": 8.291871070861816, "learning_rate": 7.322844460497783e-07, "loss": 0.4362953305244446, "step": 2326 }, { "epoch": 1.9595959595959596, "grad_norm": 4.457334995269775, "learning_rate": 7.307603116073317e-07, "loss": 1.026896595954895, "step": 2328 }, { "epoch": 1.9612794612794613, "grad_norm": 34.517372131347656, "learning_rate": 7.292374493509725e-07, "loss": 0.9922385811805725, "step": 2330 }, { "epoch": 1.9629629629629628, "grad_norm": 5.860324859619141, "learning_rate": 7.277158645276014e-07, "loss": 0.9369185566902161, "step": 2332 }, { "epoch": 1.9646464646464645, "grad_norm": 6.046477317810059, "learning_rate": 7.261955623797189e-07, "loss": 0.9571334719657898, "step": 2334 }, { "epoch": 1.9663299663299663, "grad_norm": 7.769930362701416, "learning_rate": 7.246765481454056e-07, "loss": 0.8826982975006104, "step": 2336 }, { "epoch": 1.968013468013468, "grad_norm": 6.431835651397705, "learning_rate": 7.23158827058304e-07, "loss": 0.9630632400512695, "step": 2338 }, { "epoch": 1.9696969696969697, "grad_norm": 22.74308967590332, "learning_rate": 7.216424043476022e-07, "loss": 0.3798217177391052, "step": 2340 }, { "epoch": 1.9713804713804715, "grad_norm": 4.635124683380127, "learning_rate": 7.20127285238015e-07, "loss": 0.6755929589271545, "step": 2342 }, { "epoch": 1.9730639730639732, "grad_norm": 11.939600944519043, "learning_rate": 7.186134749497645e-07, "loss": 0.4677308201789856, "step": 2344 }, { "epoch": 1.9747474747474747, "grad_norm": 7.168182849884033, "learning_rate": 7.171009786985642e-07, "loss": 0.7687026858329773, "step": 2346 }, { "epoch": 1.9764309764309764, "grad_norm": 8.714031219482422, "learning_rate": 7.155898016956008e-07, "loss": 0.6937582492828369, "step": 2348 }, { "epoch": 1.9781144781144782, "grad_norm": 12.572547912597656, "learning_rate": 7.14079949147514e-07, "loss": 0.6481941342353821, "step": 2350 }, { "epoch": 1.9797979797979797, "grad_norm": 11.015668869018555, "learning_rate": 7.125714262563814e-07, "loss": 0.5940038561820984, "step": 2352 }, { "epoch": 1.9814814814814814, "grad_norm": 2.312359571456909, "learning_rate": 7.110642382196996e-07, "loss": 0.3644195795059204, "step": 2354 }, { "epoch": 1.9831649831649831, "grad_norm": 2.8572607040405273, "learning_rate": 7.095583902303648e-07, "loss": 0.9964379668235779, "step": 2356 }, { "epoch": 1.9848484848484849, "grad_norm": 3.200378179550171, "learning_rate": 7.080538874766573e-07, "loss": 0.901992917060852, "step": 2358 }, { "epoch": 1.9865319865319866, "grad_norm": 3.659830093383789, "learning_rate": 7.06550735142222e-07, "loss": 0.8655633926391602, "step": 2360 }, { "epoch": 1.9882154882154883, "grad_norm": 4.574953079223633, "learning_rate": 7.050489384060512e-07, "loss": 0.6048173904418945, "step": 2362 }, { "epoch": 1.98989898989899, "grad_norm": 2.2669944763183594, "learning_rate": 7.035485024424666e-07, "loss": 0.8642423152923584, "step": 2364 }, { "epoch": 1.9915824915824916, "grad_norm": 9.807171821594238, "learning_rate": 7.020494324211017e-07, "loss": 0.8357862234115601, "step": 2366 }, { "epoch": 1.9932659932659933, "grad_norm": 4.8804097175598145, "learning_rate": 7.005517335068827e-07, "loss": 0.9583761692047119, "step": 2368 }, { "epoch": 1.9949494949494948, "grad_norm": 3.2299656867980957, "learning_rate": 6.99055410860013e-07, "loss": 0.3349935710430145, "step": 2370 }, { "epoch": 1.9966329966329965, "grad_norm": 10.934320449829102, "learning_rate": 6.975604696359542e-07, "loss": 0.4770701825618744, "step": 2372 }, { "epoch": 1.9983164983164983, "grad_norm": 4.283078193664551, "learning_rate": 6.960669149854068e-07, "loss": 0.8760964870452881, "step": 2374 }, { "epoch": 2.0, "grad_norm": 2.9110515117645264, "learning_rate": 6.945747520542955e-07, "loss": 0.8032587766647339, "step": 2376 }, { "epoch": 2.0016835016835017, "grad_norm": 13.453629493713379, "learning_rate": 6.930839859837496e-07, "loss": 0.5529247522354126, "step": 2378 }, { "epoch": 2.0033670033670035, "grad_norm": 2.993082284927368, "learning_rate": 6.915946219100852e-07, "loss": 1.0674469470977783, "step": 2380 }, { "epoch": 2.005050505050505, "grad_norm": 9.452933311462402, "learning_rate": 6.901066649647887e-07, "loss": 0.5305376052856445, "step": 2382 }, { "epoch": 2.006734006734007, "grad_norm": 10.649518013000488, "learning_rate": 6.886201202744972e-07, "loss": 0.4740598499774933, "step": 2384 }, { "epoch": 2.008417508417508, "grad_norm": 2.3154711723327637, "learning_rate": 6.871349929609826e-07, "loss": 0.6172109842300415, "step": 2386 }, { "epoch": 2.01010101010101, "grad_norm": 4.7962565422058105, "learning_rate": 6.856512881411343e-07, "loss": 0.751620352268219, "step": 2388 }, { "epoch": 2.0117845117845117, "grad_norm": 3.493546485900879, "learning_rate": 6.841690109269386e-07, "loss": 0.9236295223236084, "step": 2390 }, { "epoch": 2.0134680134680134, "grad_norm": 4.757162094116211, "learning_rate": 6.826881664254646e-07, "loss": 0.5604578852653503, "step": 2392 }, { "epoch": 2.015151515151515, "grad_norm": 15.590490341186523, "learning_rate": 6.812087597388452e-07, "loss": 0.656000018119812, "step": 2394 }, { "epoch": 2.016835016835017, "grad_norm": 3.1684648990631104, "learning_rate": 6.79730795964258e-07, "loss": 0.7373712062835693, "step": 2396 }, { "epoch": 2.0185185185185186, "grad_norm": 4.949743270874023, "learning_rate": 6.782542801939105e-07, "loss": 0.7128652930259705, "step": 2398 }, { "epoch": 2.0202020202020203, "grad_norm": 6.615425109863281, "learning_rate": 6.767792175150211e-07, "loss": 0.5110639333724976, "step": 2400 }, { "epoch": 2.021885521885522, "grad_norm": 29.939123153686523, "learning_rate": 6.753056130098009e-07, "loss": 0.3357080817222595, "step": 2402 }, { "epoch": 2.0235690235690234, "grad_norm": 7.256524562835693, "learning_rate": 6.738334717554373e-07, "loss": 0.803414523601532, "step": 2404 }, { "epoch": 2.025252525252525, "grad_norm": 8.12669563293457, "learning_rate": 6.723627988240772e-07, "loss": 0.6509519815444946, "step": 2406 }, { "epoch": 2.026936026936027, "grad_norm": 2.850172519683838, "learning_rate": 6.708935992828068e-07, "loss": 0.6972189545631409, "step": 2408 }, { "epoch": 2.0286195286195285, "grad_norm": 4.508718967437744, "learning_rate": 6.694258781936369e-07, "loss": 0.607012152671814, "step": 2410 }, { "epoch": 2.0303030303030303, "grad_norm": 6.282621383666992, "learning_rate": 6.679596406134844e-07, "loss": 0.8239716291427612, "step": 2412 }, { "epoch": 2.031986531986532, "grad_norm": 4.128354549407959, "learning_rate": 6.664948915941546e-07, "loss": 0.6955975294113159, "step": 2414 }, { "epoch": 2.0336700336700337, "grad_norm": 2.551084518432617, "learning_rate": 6.65031636182324e-07, "loss": 0.7895976305007935, "step": 2416 }, { "epoch": 2.0353535353535355, "grad_norm": 5.181878089904785, "learning_rate": 6.635698794195237e-07, "loss": 0.5881921052932739, "step": 2418 }, { "epoch": 2.037037037037037, "grad_norm": 15.786396980285645, "learning_rate": 6.621096263421202e-07, "loss": 0.3898243308067322, "step": 2420 }, { "epoch": 2.038720538720539, "grad_norm": 5.842708587646484, "learning_rate": 6.606508819813001e-07, "loss": 0.7089550495147705, "step": 2422 }, { "epoch": 2.04040404040404, "grad_norm": 48.35086441040039, "learning_rate": 6.591936513630514e-07, "loss": 0.17687079310417175, "step": 2424 }, { "epoch": 2.042087542087542, "grad_norm": 7.372962951660156, "learning_rate": 6.577379395081466e-07, "loss": 0.33852899074554443, "step": 2426 }, { "epoch": 2.0437710437710437, "grad_norm": 35.252044677734375, "learning_rate": 6.562837514321258e-07, "loss": 0.5263517498970032, "step": 2428 }, { "epoch": 2.0454545454545454, "grad_norm": 2.461886405944824, "learning_rate": 6.548310921452784e-07, "loss": 0.7057082653045654, "step": 2430 }, { "epoch": 2.047138047138047, "grad_norm": 3.156841516494751, "learning_rate": 6.533799666526275e-07, "loss": 0.6170644760131836, "step": 2432 }, { "epoch": 2.048821548821549, "grad_norm": 3.1701977252960205, "learning_rate": 6.519303799539104e-07, "loss": 0.7602715492248535, "step": 2434 }, { "epoch": 2.0505050505050506, "grad_norm": 3.952972412109375, "learning_rate": 6.504823370435633e-07, "loss": 1.1037501096725464, "step": 2436 }, { "epoch": 2.0521885521885523, "grad_norm": 3.426377773284912, "learning_rate": 6.490358429107038e-07, "loss": 0.6811984181404114, "step": 2438 }, { "epoch": 2.053872053872054, "grad_norm": 11.86534309387207, "learning_rate": 6.47590902539112e-07, "loss": 0.7863556146621704, "step": 2440 }, { "epoch": 2.0555555555555554, "grad_norm": 8.28430461883545, "learning_rate": 6.461475209072161e-07, "loss": 0.6948744654655457, "step": 2442 }, { "epoch": 2.057239057239057, "grad_norm": 2.8476991653442383, "learning_rate": 6.44705702988073e-07, "loss": 0.7017114162445068, "step": 2444 }, { "epoch": 2.058922558922559, "grad_norm": 5.588902950286865, "learning_rate": 6.432654537493518e-07, "loss": 0.8929611444473267, "step": 2446 }, { "epoch": 2.0606060606060606, "grad_norm": 3.2887089252471924, "learning_rate": 6.418267781533173e-07, "loss": 0.79296875, "step": 2448 }, { "epoch": 2.0622895622895623, "grad_norm": 14.845014572143555, "learning_rate": 6.403896811568124e-07, "loss": 0.5820084810256958, "step": 2450 }, { "epoch": 2.063973063973064, "grad_norm": 6.622726917266846, "learning_rate": 6.389541677112407e-07, "loss": 0.8630738258361816, "step": 2452 }, { "epoch": 2.0656565656565657, "grad_norm": 8.550455093383789, "learning_rate": 6.375202427625505e-07, "loss": 0.6654762625694275, "step": 2454 }, { "epoch": 2.0673400673400675, "grad_norm": 2.6550607681274414, "learning_rate": 6.360879112512159e-07, "loss": 0.7484475374221802, "step": 2456 }, { "epoch": 2.069023569023569, "grad_norm": 3.6882874965667725, "learning_rate": 6.346571781122218e-07, "loss": 0.709972620010376, "step": 2458 }, { "epoch": 2.0707070707070705, "grad_norm": 10.880833625793457, "learning_rate": 6.332280482750466e-07, "loss": 0.5114179849624634, "step": 2460 }, { "epoch": 2.0723905723905722, "grad_norm": 7.7855000495910645, "learning_rate": 6.318005266636428e-07, "loss": 0.5731675028800964, "step": 2462 }, { "epoch": 2.074074074074074, "grad_norm": 9.688587188720703, "learning_rate": 6.303746181964234e-07, "loss": 0.5561926364898682, "step": 2464 }, { "epoch": 2.0757575757575757, "grad_norm": 10.193296432495117, "learning_rate": 6.289503277862438e-07, "loss": 0.7371481657028198, "step": 2466 }, { "epoch": 2.0774410774410774, "grad_norm": 5.605756759643555, "learning_rate": 6.275276603403824e-07, "loss": 0.5109883546829224, "step": 2468 }, { "epoch": 2.079124579124579, "grad_norm": 6.081256866455078, "learning_rate": 6.26106620760528e-07, "loss": 0.9331031441688538, "step": 2470 }, { "epoch": 2.080808080808081, "grad_norm": 5.508481979370117, "learning_rate": 6.246872139427602e-07, "loss": 0.9123448133468628, "step": 2472 }, { "epoch": 2.0824915824915826, "grad_norm": 4.696747779846191, "learning_rate": 6.232694447775316e-07, "loss": 0.4582900106906891, "step": 2474 }, { "epoch": 2.0841750841750843, "grad_norm": 8.642160415649414, "learning_rate": 6.218533181496541e-07, "loss": 0.5799881815910339, "step": 2476 }, { "epoch": 2.0858585858585856, "grad_norm": 4.685534954071045, "learning_rate": 6.204388389382804e-07, "loss": 0.7565197944641113, "step": 2478 }, { "epoch": 2.0875420875420874, "grad_norm": 3.0699758529663086, "learning_rate": 6.190260120168855e-07, "loss": 0.6127052307128906, "step": 2480 }, { "epoch": 2.089225589225589, "grad_norm": 2.890374183654785, "learning_rate": 6.17614842253253e-07, "loss": 0.6200038194656372, "step": 2482 }, { "epoch": 2.090909090909091, "grad_norm": 5.803356647491455, "learning_rate": 6.162053345094569e-07, "loss": 1.080254316329956, "step": 2484 }, { "epoch": 2.0925925925925926, "grad_norm": 6.378223419189453, "learning_rate": 6.147974936418436e-07, "loss": 0.5638513565063477, "step": 2486 }, { "epoch": 2.0942760942760943, "grad_norm": 2.6295933723449707, "learning_rate": 6.133913245010181e-07, "loss": 0.5809881687164307, "step": 2488 }, { "epoch": 2.095959595959596, "grad_norm": 9.536388397216797, "learning_rate": 6.119868319318244e-07, "loss": 0.7412412166595459, "step": 2490 }, { "epoch": 2.0976430976430978, "grad_norm": 6.749050140380859, "learning_rate": 6.105840207733302e-07, "loss": 0.8024865388870239, "step": 2492 }, { "epoch": 2.0993265993265995, "grad_norm": 3.2662672996520996, "learning_rate": 6.091828958588101e-07, "loss": 0.49432703852653503, "step": 2494 }, { "epoch": 2.101010101010101, "grad_norm": 4.484532356262207, "learning_rate": 6.077834620157296e-07, "loss": 0.7933484315872192, "step": 2496 }, { "epoch": 2.1026936026936025, "grad_norm": 2.866675853729248, "learning_rate": 6.063857240657264e-07, "loss": 0.4336718022823334, "step": 2498 }, { "epoch": 2.1043771043771042, "grad_norm": 5.029768943786621, "learning_rate": 6.049896868245962e-07, "loss": 0.5639874339103699, "step": 2500 }, { "epoch": 2.106060606060606, "grad_norm": 4.42257833480835, "learning_rate": 6.035953551022748e-07, "loss": 0.9859836101531982, "step": 2502 }, { "epoch": 2.1077441077441077, "grad_norm": 25.789899826049805, "learning_rate": 6.022027337028212e-07, "loss": 0.8477144241333008, "step": 2504 }, { "epoch": 2.1094276094276094, "grad_norm": 27.71114158630371, "learning_rate": 6.008118274244025e-07, "loss": 0.8800366520881653, "step": 2506 }, { "epoch": 2.111111111111111, "grad_norm": 5.043661594390869, "learning_rate": 5.994226410592762e-07, "loss": 0.40974220633506775, "step": 2508 }, { "epoch": 2.112794612794613, "grad_norm": 3.724855661392212, "learning_rate": 5.980351793937734e-07, "loss": 0.5578930377960205, "step": 2510 }, { "epoch": 2.1144781144781146, "grad_norm": 7.778206825256348, "learning_rate": 5.966494472082832e-07, "loss": 0.6988534927368164, "step": 2512 }, { "epoch": 2.1161616161616164, "grad_norm": 9.13245964050293, "learning_rate": 5.952654492772369e-07, "loss": 0.38724464178085327, "step": 2514 }, { "epoch": 2.1178451178451176, "grad_norm": 5.150360584259033, "learning_rate": 5.938831903690887e-07, "loss": 0.8136914968490601, "step": 2516 }, { "epoch": 2.1195286195286194, "grad_norm": 8.077790260314941, "learning_rate": 5.925026752463027e-07, "loss": 0.13099154829978943, "step": 2518 }, { "epoch": 2.121212121212121, "grad_norm": 14.749094009399414, "learning_rate": 5.911239086653345e-07, "loss": 0.33465084433555603, "step": 2520 }, { "epoch": 2.122895622895623, "grad_norm": 3.972292184829712, "learning_rate": 5.89746895376614e-07, "loss": 0.2251596450805664, "step": 2522 }, { "epoch": 2.1245791245791246, "grad_norm": 3.6862993240356445, "learning_rate": 5.883716401245329e-07, "loss": 0.41063302755355835, "step": 2524 }, { "epoch": 2.1262626262626263, "grad_norm": 13.53211498260498, "learning_rate": 5.869981476474235e-07, "loss": 0.32705599069595337, "step": 2526 }, { "epoch": 2.127946127946128, "grad_norm": 11.80972671508789, "learning_rate": 5.856264226775451e-07, "loss": 0.28738293051719666, "step": 2528 }, { "epoch": 2.1296296296296298, "grad_norm": 4.907763481140137, "learning_rate": 5.842564699410676e-07, "loss": 0.5695469379425049, "step": 2530 }, { "epoch": 2.1313131313131315, "grad_norm": 7.322058200836182, "learning_rate": 5.828882941580548e-07, "loss": 0.7862983345985413, "step": 2532 }, { "epoch": 2.1329966329966332, "grad_norm": 3.169811725616455, "learning_rate": 5.815219000424475e-07, "loss": 0.32265302538871765, "step": 2534 }, { "epoch": 2.1346801346801345, "grad_norm": 4.123760223388672, "learning_rate": 5.801572923020486e-07, "loss": 0.6733647584915161, "step": 2536 }, { "epoch": 2.1363636363636362, "grad_norm": 10.175186157226562, "learning_rate": 5.787944756385061e-07, "loss": 0.34301066398620605, "step": 2538 }, { "epoch": 2.138047138047138, "grad_norm": 0.8496463894844055, "learning_rate": 5.774334547472963e-07, "loss": 0.31534287333488464, "step": 2540 }, { "epoch": 2.1397306397306397, "grad_norm": 4.556532382965088, "learning_rate": 5.760742343177091e-07, "loss": 0.6951263546943665, "step": 2542 }, { "epoch": 2.1414141414141414, "grad_norm": 2.392409086227417, "learning_rate": 5.747168190328313e-07, "loss": 0.09168624877929688, "step": 2544 }, { "epoch": 2.143097643097643, "grad_norm": 2.1044692993164062, "learning_rate": 5.73361213569529e-07, "loss": 0.34088313579559326, "step": 2546 }, { "epoch": 2.144781144781145, "grad_norm": 12.998042106628418, "learning_rate": 5.720074225984335e-07, "loss": 0.6928970813751221, "step": 2548 }, { "epoch": 2.1464646464646466, "grad_norm": 3.534303903579712, "learning_rate": 5.706554507839247e-07, "loss": 0.8698376417160034, "step": 2550 }, { "epoch": 2.148148148148148, "grad_norm": 3.9357972145080566, "learning_rate": 5.693053027841139e-07, "loss": 0.5156476497650146, "step": 2552 }, { "epoch": 2.1498316498316496, "grad_norm": 12.438335418701172, "learning_rate": 5.679569832508294e-07, "loss": 0.14811789989471436, "step": 2554 }, { "epoch": 2.1515151515151514, "grad_norm": 8.0103759765625, "learning_rate": 5.666104968295993e-07, "loss": 0.4402310848236084, "step": 2556 }, { "epoch": 2.153198653198653, "grad_norm": 3.672968864440918, "learning_rate": 5.652658481596355e-07, "loss": 0.6228591203689575, "step": 2558 }, { "epoch": 2.154882154882155, "grad_norm": 6.100817680358887, "learning_rate": 5.639230418738186e-07, "loss": 0.3809899091720581, "step": 2560 }, { "epoch": 2.1565656565656566, "grad_norm": 25.523374557495117, "learning_rate": 5.625820825986818e-07, "loss": 0.4754774570465088, "step": 2562 }, { "epoch": 2.1582491582491583, "grad_norm": 4.202336311340332, "learning_rate": 5.61242974954393e-07, "loss": 0.7122776508331299, "step": 2564 }, { "epoch": 2.15993265993266, "grad_norm": 16.867658615112305, "learning_rate": 5.599057235547422e-07, "loss": 0.45209017395973206, "step": 2566 }, { "epoch": 2.1616161616161618, "grad_norm": 5.021929740905762, "learning_rate": 5.585703330071232e-07, "loss": 0.3703120946884155, "step": 2568 }, { "epoch": 2.1632996632996635, "grad_norm": 3.3957135677337646, "learning_rate": 5.572368079125177e-07, "loss": 0.8958742618560791, "step": 2570 }, { "epoch": 2.164983164983165, "grad_norm": 3.049757957458496, "learning_rate": 5.559051528654812e-07, "loss": 1.0562491416931152, "step": 2572 }, { "epoch": 2.1666666666666665, "grad_norm": 6.365866184234619, "learning_rate": 5.545753724541259e-07, "loss": 0.7664850950241089, "step": 2574 }, { "epoch": 2.1683501683501682, "grad_norm": 3.9597971439361572, "learning_rate": 5.532474712601041e-07, "loss": 0.2349638044834137, "step": 2576 }, { "epoch": 2.17003367003367, "grad_norm": 14.629343032836914, "learning_rate": 5.519214538585945e-07, "loss": 0.5862404108047485, "step": 2578 }, { "epoch": 2.1717171717171717, "grad_norm": 13.472465515136719, "learning_rate": 5.505973248182854e-07, "loss": 0.25796785950660706, "step": 2580 }, { "epoch": 2.1734006734006734, "grad_norm": 4.650449275970459, "learning_rate": 5.492750887013576e-07, "loss": 0.40474733710289, "step": 2582 }, { "epoch": 2.175084175084175, "grad_norm": 4.238655090332031, "learning_rate": 5.479547500634716e-07, "loss": 0.25570929050445557, "step": 2584 }, { "epoch": 2.176767676767677, "grad_norm": 9.685871124267578, "learning_rate": 5.466363134537495e-07, "loss": 0.582108736038208, "step": 2586 }, { "epoch": 2.1784511784511786, "grad_norm": 20.41779899597168, "learning_rate": 5.453197834147596e-07, "loss": 0.5546954274177551, "step": 2588 }, { "epoch": 2.18013468013468, "grad_norm": 89.8573226928711, "learning_rate": 5.440051644825024e-07, "loss": 0.6109448671340942, "step": 2590 }, { "epoch": 2.1818181818181817, "grad_norm": 9.055795669555664, "learning_rate": 5.426924611863932e-07, "loss": 0.4381883144378662, "step": 2592 }, { "epoch": 2.1835016835016834, "grad_norm": 7.171759605407715, "learning_rate": 5.413816780492464e-07, "loss": 0.28566718101501465, "step": 2594 }, { "epoch": 2.185185185185185, "grad_norm": 5.162403583526611, "learning_rate": 5.400728195872627e-07, "loss": 0.6839703321456909, "step": 2596 }, { "epoch": 2.186868686868687, "grad_norm": 4.578564643859863, "learning_rate": 5.387658903100093e-07, "loss": 0.7969393134117126, "step": 2598 }, { "epoch": 2.1885521885521886, "grad_norm": 3.3671751022338867, "learning_rate": 5.374608947204078e-07, "loss": 0.5756024122238159, "step": 2600 }, { "epoch": 2.1902356902356903, "grad_norm": 3.339944362640381, "learning_rate": 5.361578373147173e-07, "loss": 0.8270890116691589, "step": 2602 }, { "epoch": 2.191919191919192, "grad_norm": 61.960235595703125, "learning_rate": 5.348567225825182e-07, "loss": 0.7463648319244385, "step": 2604 }, { "epoch": 2.1936026936026938, "grad_norm": 12.145258903503418, "learning_rate": 5.335575550066987e-07, "loss": 0.3755905032157898, "step": 2606 }, { "epoch": 2.1952861952861955, "grad_norm": 4.23495626449585, "learning_rate": 5.322603390634379e-07, "loss": 0.828824520111084, "step": 2608 }, { "epoch": 2.196969696969697, "grad_norm": 5.706808090209961, "learning_rate": 5.3096507922219e-07, "loss": 0.7120569944381714, "step": 2610 }, { "epoch": 2.1986531986531985, "grad_norm": 7.548922538757324, "learning_rate": 5.296717799456703e-07, "loss": 0.2670977711677551, "step": 2612 }, { "epoch": 2.2003367003367003, "grad_norm": 6.819214820861816, "learning_rate": 5.283804456898393e-07, "loss": 0.7222539782524109, "step": 2614 }, { "epoch": 2.202020202020202, "grad_norm": 6.466555595397949, "learning_rate": 5.270910809038866e-07, "loss": 0.5107656717300415, "step": 2616 }, { "epoch": 2.2037037037037037, "grad_norm": 9.062774658203125, "learning_rate": 5.258036900302162e-07, "loss": 0.44302040338516235, "step": 2618 }, { "epoch": 2.2053872053872055, "grad_norm": 3.68121600151062, "learning_rate": 5.245182775044319e-07, "loss": 0.28953254222869873, "step": 2620 }, { "epoch": 2.207070707070707, "grad_norm": 4.225932598114014, "learning_rate": 5.2323484775532e-07, "loss": 0.5604819655418396, "step": 2622 }, { "epoch": 2.208754208754209, "grad_norm": 6.57682466506958, "learning_rate": 5.219534052048364e-07, "loss": 0.4838787317276001, "step": 2624 }, { "epoch": 2.2104377104377106, "grad_norm": 5.847450256347656, "learning_rate": 5.206739542680903e-07, "loss": 0.41042160987854004, "step": 2626 }, { "epoch": 2.212121212121212, "grad_norm": 10.914462089538574, "learning_rate": 5.193964993533275e-07, "loss": 0.5403867959976196, "step": 2628 }, { "epoch": 2.2138047138047137, "grad_norm": 8.292633056640625, "learning_rate": 5.181210448619185e-07, "loss": 0.25527873635292053, "step": 2630 }, { "epoch": 2.2154882154882154, "grad_norm": 18.88636016845703, "learning_rate": 5.168475951883405e-07, "loss": 0.404461145401001, "step": 2632 }, { "epoch": 2.217171717171717, "grad_norm": 3.0683631896972656, "learning_rate": 5.155761547201631e-07, "loss": 0.07407370954751968, "step": 2634 }, { "epoch": 2.218855218855219, "grad_norm": 3.333080291748047, "learning_rate": 5.143067278380339e-07, "loss": 0.7165415287017822, "step": 2636 }, { "epoch": 2.2205387205387206, "grad_norm": 11.401552200317383, "learning_rate": 5.13039318915663e-07, "loss": 1.0603926181793213, "step": 2638 }, { "epoch": 2.2222222222222223, "grad_norm": 7.289011001586914, "learning_rate": 5.117739323198067e-07, "loss": 0.997651219367981, "step": 2640 }, { "epoch": 2.223905723905724, "grad_norm": 4.159246444702148, "learning_rate": 5.105105724102547e-07, "loss": 0.6530795097351074, "step": 2642 }, { "epoch": 2.225589225589226, "grad_norm": 30.293039321899414, "learning_rate": 5.092492435398137e-07, "loss": 0.6192750930786133, "step": 2644 }, { "epoch": 2.227272727272727, "grad_norm": 13.535540580749512, "learning_rate": 5.079899500542917e-07, "loss": 0.5436962246894836, "step": 2646 }, { "epoch": 2.228956228956229, "grad_norm": 5.441864967346191, "learning_rate": 5.067326962924848e-07, "loss": 0.2577816843986511, "step": 2648 }, { "epoch": 2.2306397306397305, "grad_norm": 8.864923477172852, "learning_rate": 5.054774865861617e-07, "loss": 0.9602568745613098, "step": 2650 }, { "epoch": 2.2323232323232323, "grad_norm": 14.644983291625977, "learning_rate": 5.042243252600475e-07, "loss": 0.5225367546081543, "step": 2652 }, { "epoch": 2.234006734006734, "grad_norm": 17.72758674621582, "learning_rate": 5.029732166318106e-07, "loss": 0.47632715106010437, "step": 2654 }, { "epoch": 2.2356902356902357, "grad_norm": 3.0727274417877197, "learning_rate": 5.017241650120462e-07, "loss": 0.5418964624404907, "step": 2656 }, { "epoch": 2.2373737373737375, "grad_norm": 12.295948028564453, "learning_rate": 5.004771747042631e-07, "loss": 0.8024328351020813, "step": 2658 }, { "epoch": 2.239057239057239, "grad_norm": 10.540696144104004, "learning_rate": 4.992322500048673e-07, "loss": 0.5871691703796387, "step": 2660 }, { "epoch": 2.240740740740741, "grad_norm": 3.301222324371338, "learning_rate": 4.979893952031483e-07, "loss": 0.7337244153022766, "step": 2662 }, { "epoch": 2.242424242424242, "grad_norm": 18.132505416870117, "learning_rate": 4.96748614581264e-07, "loss": 0.3517826795578003, "step": 2664 }, { "epoch": 2.244107744107744, "grad_norm": 5.087287902832031, "learning_rate": 4.955099124142251e-07, "loss": 0.7348419427871704, "step": 2666 }, { "epoch": 2.2457912457912457, "grad_norm": 5.434046268463135, "learning_rate": 4.942732929698827e-07, "loss": 0.5416382551193237, "step": 2668 }, { "epoch": 2.2474747474747474, "grad_norm": 5.668000221252441, "learning_rate": 4.930387605089104e-07, "loss": 0.44201749563217163, "step": 2670 }, { "epoch": 2.249158249158249, "grad_norm": 2.4525139331817627, "learning_rate": 4.918063192847921e-07, "loss": 0.34817391633987427, "step": 2672 }, { "epoch": 2.250841750841751, "grad_norm": 10.748351097106934, "learning_rate": 4.905759735438068e-07, "loss": 0.6200217008590698, "step": 2674 }, { "epoch": 2.2525252525252526, "grad_norm": 4.222598075866699, "learning_rate": 4.893477275250127e-07, "loss": 0.7119044065475464, "step": 2676 }, { "epoch": 2.2542087542087543, "grad_norm": 3.8408939838409424, "learning_rate": 4.881215854602342e-07, "loss": 0.4421549141407013, "step": 2678 }, { "epoch": 2.255892255892256, "grad_norm": 2.2825546264648438, "learning_rate": 4.868975515740471e-07, "loss": 0.835530161857605, "step": 2680 }, { "epoch": 2.257575757575758, "grad_norm": 11.838665962219238, "learning_rate": 4.856756300837625e-07, "loss": 0.19798390567302704, "step": 2682 }, { "epoch": 2.259259259259259, "grad_norm": 25.079456329345703, "learning_rate": 4.844558251994146e-07, "loss": 0.1048535406589508, "step": 2684 }, { "epoch": 2.260942760942761, "grad_norm": 4.039831638336182, "learning_rate": 4.832381411237444e-07, "loss": 0.604271650314331, "step": 2686 }, { "epoch": 2.2626262626262625, "grad_norm": 4.378790855407715, "learning_rate": 4.820225820521855e-07, "loss": 0.36290663480758667, "step": 2688 }, { "epoch": 2.2643097643097643, "grad_norm": 4.032955169677734, "learning_rate": 4.808091521728506e-07, "loss": 0.8970327377319336, "step": 2690 }, { "epoch": 2.265993265993266, "grad_norm": 6.259299278259277, "learning_rate": 4.795978556665165e-07, "loss": 0.8129058480262756, "step": 2692 }, { "epoch": 2.2676767676767677, "grad_norm": 4.226785182952881, "learning_rate": 4.783886967066088e-07, "loss": 0.653793454170227, "step": 2694 }, { "epoch": 2.2693602693602695, "grad_norm": 8.080623626708984, "learning_rate": 4.77181679459189e-07, "loss": 0.5345746874809265, "step": 2696 }, { "epoch": 2.271043771043771, "grad_norm": 2.8720853328704834, "learning_rate": 4.759768080829399e-07, "loss": 0.638217568397522, "step": 2698 }, { "epoch": 2.2727272727272725, "grad_norm": 5.371377944946289, "learning_rate": 4.747740867291497e-07, "loss": 0.7549663782119751, "step": 2700 }, { "epoch": 2.274410774410774, "grad_norm": 2.809866428375244, "learning_rate": 4.7357351954169973e-07, "loss": 0.5037040114402771, "step": 2702 }, { "epoch": 2.276094276094276, "grad_norm": 11.470369338989258, "learning_rate": 4.7237511065704933e-07, "loss": 0.8505884408950806, "step": 2704 }, { "epoch": 2.2777777777777777, "grad_norm": 5.015624523162842, "learning_rate": 4.7117886420422094e-07, "loss": 0.9292435050010681, "step": 2706 }, { "epoch": 2.2794612794612794, "grad_norm": 3.195216655731201, "learning_rate": 4.6998478430478714e-07, "loss": 0.4456526041030884, "step": 2708 }, { "epoch": 2.281144781144781, "grad_norm": 7.715219497680664, "learning_rate": 4.6879287507285596e-07, "loss": 0.49354496598243713, "step": 2710 }, { "epoch": 2.282828282828283, "grad_norm": 10.351372718811035, "learning_rate": 4.676031406150555e-07, "loss": 0.517022430896759, "step": 2712 }, { "epoch": 2.2845117845117846, "grad_norm": 4.449305534362793, "learning_rate": 4.66415585030522e-07, "loss": 0.42631667852401733, "step": 2714 }, { "epoch": 2.2861952861952863, "grad_norm": 21.76262855529785, "learning_rate": 4.6523021241088416e-07, "loss": 0.7113944292068481, "step": 2716 }, { "epoch": 2.287878787878788, "grad_norm": 37.7462272644043, "learning_rate": 4.6404702684024905e-07, "loss": 0.5162969827651978, "step": 2718 }, { "epoch": 2.28956228956229, "grad_norm": 4.822917938232422, "learning_rate": 4.628660323951891e-07, "loss": 0.5146564841270447, "step": 2720 }, { "epoch": 2.291245791245791, "grad_norm": 2.2735533714294434, "learning_rate": 4.616872331447272e-07, "loss": 0.6732128262519836, "step": 2722 }, { "epoch": 2.292929292929293, "grad_norm": 3.959578514099121, "learning_rate": 4.605106331503223e-07, "loss": 0.6910574436187744, "step": 2724 }, { "epoch": 2.2946127946127945, "grad_norm": 6.245284080505371, "learning_rate": 4.5933623646585683e-07, "loss": 0.6672347784042358, "step": 2726 }, { "epoch": 2.2962962962962963, "grad_norm": 18.67147445678711, "learning_rate": 4.581640471376215e-07, "loss": 0.509329617023468, "step": 2728 }, { "epoch": 2.297979797979798, "grad_norm": 5.631857395172119, "learning_rate": 4.5699406920430155e-07, "loss": 0.9162227511405945, "step": 2730 }, { "epoch": 2.2996632996632997, "grad_norm": 4.981385707855225, "learning_rate": 4.5582630669696324e-07, "loss": 0.46352601051330566, "step": 2732 }, { "epoch": 2.3013468013468015, "grad_norm": 11.902592658996582, "learning_rate": 4.5466076363904e-07, "loss": 0.44609200954437256, "step": 2734 }, { "epoch": 2.303030303030303, "grad_norm": 7.000277042388916, "learning_rate": 4.5349744404631785e-07, "loss": 0.38603392243385315, "step": 2736 }, { "epoch": 2.3047138047138045, "grad_norm": 19.020755767822266, "learning_rate": 4.5233635192692206e-07, "loss": 0.5370512008666992, "step": 2738 }, { "epoch": 2.3063973063973062, "grad_norm": 9.254744529724121, "learning_rate": 4.511774912813043e-07, "loss": 0.35465237498283386, "step": 2740 }, { "epoch": 2.308080808080808, "grad_norm": 2.5461535453796387, "learning_rate": 4.5002086610222626e-07, "loss": 0.7493946552276611, "step": 2742 }, { "epoch": 2.3097643097643097, "grad_norm": 7.801723003387451, "learning_rate": 4.488664803747487e-07, "loss": 0.7291615009307861, "step": 2744 }, { "epoch": 2.3114478114478114, "grad_norm": 4.71798849105835, "learning_rate": 4.4771433807621644e-07, "loss": 0.8265661001205444, "step": 2746 }, { "epoch": 2.313131313131313, "grad_norm": 11.469908714294434, "learning_rate": 4.4656444317624397e-07, "loss": 0.6443151831626892, "step": 2748 }, { "epoch": 2.314814814814815, "grad_norm": 0.9388121962547302, "learning_rate": 4.454167996367032e-07, "loss": 0.0978798121213913, "step": 2750 }, { "epoch": 2.3164983164983166, "grad_norm": 7.400945663452148, "learning_rate": 4.442714114117092e-07, "loss": 0.2580530345439911, "step": 2752 }, { "epoch": 2.3181818181818183, "grad_norm": 3.6424386501312256, "learning_rate": 4.4312828244760613e-07, "loss": 0.46834707260131836, "step": 2754 }, { "epoch": 2.31986531986532, "grad_norm": 10.415234565734863, "learning_rate": 4.4198741668295425e-07, "loss": 0.900390625, "step": 2756 }, { "epoch": 2.3215488215488214, "grad_norm": 2.8194925785064697, "learning_rate": 4.4084881804851644e-07, "loss": 0.6006342172622681, "step": 2758 }, { "epoch": 2.323232323232323, "grad_norm": 9.550015449523926, "learning_rate": 4.397124904672437e-07, "loss": 0.7037711143493652, "step": 2760 }, { "epoch": 2.324915824915825, "grad_norm": 5.865845203399658, "learning_rate": 4.3857843785426263e-07, "loss": 0.4606119990348816, "step": 2762 }, { "epoch": 2.3265993265993266, "grad_norm": 9.260407447814941, "learning_rate": 4.374466641168622e-07, "loss": 0.9028510451316833, "step": 2764 }, { "epoch": 2.3282828282828283, "grad_norm": 30.487369537353516, "learning_rate": 4.363171731544786e-07, "loss": 0.6837437152862549, "step": 2766 }, { "epoch": 2.32996632996633, "grad_norm": 3.4019174575805664, "learning_rate": 4.351899688586834e-07, "loss": 0.5506434440612793, "step": 2768 }, { "epoch": 2.3316498316498318, "grad_norm": 9.221944808959961, "learning_rate": 4.3406505511317025e-07, "loss": 0.6231704354286194, "step": 2770 }, { "epoch": 2.3333333333333335, "grad_norm": 5.134349346160889, "learning_rate": 4.329424357937397e-07, "loss": 0.5775326490402222, "step": 2772 }, { "epoch": 2.3350168350168348, "grad_norm": 3.2986905574798584, "learning_rate": 4.318221147682879e-07, "loss": 0.6728795766830444, "step": 2774 }, { "epoch": 2.3367003367003365, "grad_norm": 7.071535587310791, "learning_rate": 4.307040958967924e-07, "loss": 0.7195960879325867, "step": 2776 }, { "epoch": 2.3383838383838382, "grad_norm": 6.33209228515625, "learning_rate": 4.2958838303129817e-07, "loss": 0.3605208098888397, "step": 2778 }, { "epoch": 2.34006734006734, "grad_norm": 15.394960403442383, "learning_rate": 4.2847498001590573e-07, "loss": 0.6560809016227722, "step": 2780 }, { "epoch": 2.3417508417508417, "grad_norm": 5.364711761474609, "learning_rate": 4.273638906867573e-07, "loss": 0.5723754167556763, "step": 2782 }, { "epoch": 2.3434343434343434, "grad_norm": 4.554681301116943, "learning_rate": 4.2625511887202225e-07, "loss": 0.786733090877533, "step": 2784 }, { "epoch": 2.345117845117845, "grad_norm": 5.919230937957764, "learning_rate": 4.2514866839188657e-07, "loss": 0.5187538862228394, "step": 2786 }, { "epoch": 2.346801346801347, "grad_norm": 2.8754208087921143, "learning_rate": 4.2404454305853796e-07, "loss": 0.9200822114944458, "step": 2788 }, { "epoch": 2.3484848484848486, "grad_norm": 4.2973833084106445, "learning_rate": 4.229427466761522e-07, "loss": 0.7082578539848328, "step": 2790 }, { "epoch": 2.3501683501683504, "grad_norm": 2.8982136249542236, "learning_rate": 4.2184328304088164e-07, "loss": 0.5452355146408081, "step": 2792 }, { "epoch": 2.351851851851852, "grad_norm": 10.917097091674805, "learning_rate": 4.2074615594084146e-07, "loss": 0.5780555009841919, "step": 2794 }, { "epoch": 2.3535353535353534, "grad_norm": 4.399576187133789, "learning_rate": 4.1965136915609543e-07, "loss": 0.9775782823562622, "step": 2796 }, { "epoch": 2.355218855218855, "grad_norm": 3.9406611919403076, "learning_rate": 4.1855892645864513e-07, "loss": 0.4702543616294861, "step": 2798 }, { "epoch": 2.356902356902357, "grad_norm": 2.8284730911254883, "learning_rate": 4.1746883161241555e-07, "loss": 1.041868805885315, "step": 2800 }, { "epoch": 2.3585858585858586, "grad_norm": 2.9816761016845703, "learning_rate": 4.1638108837324137e-07, "loss": 0.8972384333610535, "step": 2802 }, { "epoch": 2.3602693602693603, "grad_norm": 4.195338249206543, "learning_rate": 4.152957004888563e-07, "loss": 0.8051435947418213, "step": 2804 }, { "epoch": 2.361952861952862, "grad_norm": 7.884792804718018, "learning_rate": 4.142126716988784e-07, "loss": 0.805417001247406, "step": 2806 }, { "epoch": 2.3636363636363638, "grad_norm": 3.174224853515625, "learning_rate": 4.131320057347969e-07, "loss": 0.7631466388702393, "step": 2808 }, { "epoch": 2.3653198653198655, "grad_norm": 2.2100088596343994, "learning_rate": 4.120537063199612e-07, "loss": 0.9656248688697815, "step": 2810 }, { "epoch": 2.3670033670033668, "grad_norm": 21.951086044311523, "learning_rate": 4.109777771695663e-07, "loss": 0.6510505676269531, "step": 2812 }, { "epoch": 2.3686868686868685, "grad_norm": 4.415777683258057, "learning_rate": 4.0990422199064103e-07, "loss": 0.5992385745048523, "step": 2814 }, { "epoch": 2.3703703703703702, "grad_norm": 4.938045024871826, "learning_rate": 4.0883304448203477e-07, "loss": 0.6755191087722778, "step": 2816 }, { "epoch": 2.372053872053872, "grad_norm": 5.014671325683594, "learning_rate": 4.077642483344044e-07, "loss": 0.6416581869125366, "step": 2818 }, { "epoch": 2.3737373737373737, "grad_norm": 3.0677618980407715, "learning_rate": 4.066978372302025e-07, "loss": 0.7114299535751343, "step": 2820 }, { "epoch": 2.3754208754208754, "grad_norm": 5.499224662780762, "learning_rate": 4.056338148436643e-07, "loss": 0.38672173023223877, "step": 2822 }, { "epoch": 2.377104377104377, "grad_norm": 3.9416239261627197, "learning_rate": 4.0457218484079414e-07, "loss": 0.9695321321487427, "step": 2824 }, { "epoch": 2.378787878787879, "grad_norm": 4.72567892074585, "learning_rate": 4.035129508793542e-07, "loss": 0.899653971195221, "step": 2826 }, { "epoch": 2.3804713804713806, "grad_norm": 4.175594806671143, "learning_rate": 4.024561166088516e-07, "loss": 0.4069860577583313, "step": 2828 }, { "epoch": 2.3821548821548824, "grad_norm": 12.212733268737793, "learning_rate": 4.0140168567052447e-07, "loss": 0.90252685546875, "step": 2830 }, { "epoch": 2.3838383838383836, "grad_norm": 10.1971435546875, "learning_rate": 4.003496616973312e-07, "loss": 0.6742314100265503, "step": 2832 }, { "epoch": 2.3855218855218854, "grad_norm": 19.07830238342285, "learning_rate": 3.9930004831393757e-07, "loss": 0.5178687572479248, "step": 2834 }, { "epoch": 2.387205387205387, "grad_norm": 5.426108360290527, "learning_rate": 3.982528491367025e-07, "loss": 0.5686367154121399, "step": 2836 }, { "epoch": 2.388888888888889, "grad_norm": 15.152667045593262, "learning_rate": 3.9720806777366817e-07, "loss": 0.4284480810165405, "step": 2838 }, { "epoch": 2.3905723905723906, "grad_norm": 3.7981669902801514, "learning_rate": 3.961657078245462e-07, "loss": 0.7795579433441162, "step": 2840 }, { "epoch": 2.3922558922558923, "grad_norm": 2.7446529865264893, "learning_rate": 3.9512577288070487e-07, "loss": 0.3763793110847473, "step": 2842 }, { "epoch": 2.393939393939394, "grad_norm": 2.8617823123931885, "learning_rate": 3.940882665251576e-07, "loss": 0.9840795993804932, "step": 2844 }, { "epoch": 2.3956228956228958, "grad_norm": 3.311777114868164, "learning_rate": 3.930531923325506e-07, "loss": 0.7532452344894409, "step": 2846 }, { "epoch": 2.3973063973063975, "grad_norm": 7.39417839050293, "learning_rate": 3.920205538691497e-07, "loss": 0.9117331504821777, "step": 2848 }, { "epoch": 2.398989898989899, "grad_norm": 2.8873496055603027, "learning_rate": 3.9099035469282906e-07, "loss": 0.7445226907730103, "step": 2850 }, { "epoch": 2.4006734006734005, "grad_norm": 5.140913486480713, "learning_rate": 3.8996259835305835e-07, "loss": 0.3813757598400116, "step": 2852 }, { "epoch": 2.4023569023569022, "grad_norm": 18.368505477905273, "learning_rate": 3.8893728839089035e-07, "loss": 0.589090883731842, "step": 2854 }, { "epoch": 2.404040404040404, "grad_norm": 7.0607709884643555, "learning_rate": 3.879144283389495e-07, "loss": 0.5158854126930237, "step": 2856 }, { "epoch": 2.4057239057239057, "grad_norm": 6.402346134185791, "learning_rate": 3.8689402172141915e-07, "loss": 0.6101418733596802, "step": 2858 }, { "epoch": 2.4074074074074074, "grad_norm": 11.600252151489258, "learning_rate": 3.8587607205402916e-07, "loss": 0.3425447642803192, "step": 2860 }, { "epoch": 2.409090909090909, "grad_norm": 3.169504165649414, "learning_rate": 3.848605828440444e-07, "loss": 0.7518799901008606, "step": 2862 }, { "epoch": 2.410774410774411, "grad_norm": 6.338188171386719, "learning_rate": 3.8384755759025313e-07, "loss": 0.4169810712337494, "step": 2864 }, { "epoch": 2.4124579124579126, "grad_norm": 4.593759536743164, "learning_rate": 3.828369997829528e-07, "loss": 0.6622034907341003, "step": 2866 }, { "epoch": 2.4141414141414144, "grad_norm": 10.378397941589355, "learning_rate": 3.818289129039405e-07, "loss": 0.7845497131347656, "step": 2868 }, { "epoch": 2.4158249158249157, "grad_norm": 2.801703453063965, "learning_rate": 3.808233004264997e-07, "loss": 0.5676144361495972, "step": 2870 }, { "epoch": 2.4175084175084174, "grad_norm": 3.49591064453125, "learning_rate": 3.79820165815389e-07, "loss": 0.4738210439682007, "step": 2872 }, { "epoch": 2.419191919191919, "grad_norm": 3.7410953044891357, "learning_rate": 3.788195125268284e-07, "loss": 0.8427296876907349, "step": 2874 }, { "epoch": 2.420875420875421, "grad_norm": 5.019288063049316, "learning_rate": 3.7782134400848995e-07, "loss": 0.7298943996429443, "step": 2876 }, { "epoch": 2.4225589225589226, "grad_norm": 3.775413751602173, "learning_rate": 3.768256636994843e-07, "loss": 0.4356338381767273, "step": 2878 }, { "epoch": 2.4242424242424243, "grad_norm": 2.9583945274353027, "learning_rate": 3.7583247503034864e-07, "loss": 0.7260875701904297, "step": 2880 }, { "epoch": 2.425925925925926, "grad_norm": 3.2975947856903076, "learning_rate": 3.7484178142303625e-07, "loss": 0.5450549721717834, "step": 2882 }, { "epoch": 2.4276094276094278, "grad_norm": 16.18134307861328, "learning_rate": 3.738535862909031e-07, "loss": 0.4824645519256592, "step": 2884 }, { "epoch": 2.429292929292929, "grad_norm": 5.209835529327393, "learning_rate": 3.7286789303869735e-07, "loss": 0.4984836280345917, "step": 2886 }, { "epoch": 2.430976430976431, "grad_norm": 9.006096839904785, "learning_rate": 3.7188470506254744e-07, "loss": 0.6126713156700134, "step": 2888 }, { "epoch": 2.4326599326599325, "grad_norm": 2.905740261077881, "learning_rate": 3.7090402574994885e-07, "loss": 0.5302858352661133, "step": 2890 }, { "epoch": 2.4343434343434343, "grad_norm": 7.235422134399414, "learning_rate": 3.699258584797548e-07, "loss": 0.5883275270462036, "step": 2892 }, { "epoch": 2.436026936026936, "grad_norm": 4.7563157081604, "learning_rate": 3.6895020662216326e-07, "loss": 0.8630578517913818, "step": 2894 }, { "epoch": 2.4377104377104377, "grad_norm": 3.8442506790161133, "learning_rate": 3.679770735387052e-07, "loss": 0.720264732837677, "step": 2896 }, { "epoch": 2.4393939393939394, "grad_norm": 6.493531703948975, "learning_rate": 3.6700646258223343e-07, "loss": 0.6094503998756409, "step": 2898 }, { "epoch": 2.441077441077441, "grad_norm": 24.394699096679688, "learning_rate": 3.6603837709691153e-07, "loss": 0.40544137358665466, "step": 2900 }, { "epoch": 2.442760942760943, "grad_norm": 4.592130661010742, "learning_rate": 3.6507282041820085e-07, "loss": 0.8314005136489868, "step": 2902 }, { "epoch": 2.4444444444444446, "grad_norm": 9.8695707321167, "learning_rate": 3.641097958728506e-07, "loss": 0.49147939682006836, "step": 2904 }, { "epoch": 2.4461279461279464, "grad_norm": 6.742786407470703, "learning_rate": 3.631493067788858e-07, "loss": 0.34731265902519226, "step": 2906 }, { "epoch": 2.4478114478114477, "grad_norm": 7.511764049530029, "learning_rate": 3.6219135644559506e-07, "loss": 0.5173161029815674, "step": 2908 }, { "epoch": 2.4494949494949494, "grad_norm": 3.2894692420959473, "learning_rate": 3.6123594817352046e-07, "loss": 0.6695667505264282, "step": 2910 }, { "epoch": 2.451178451178451, "grad_norm": 5.603763103485107, "learning_rate": 3.602830852544458e-07, "loss": 0.4327901005744934, "step": 2912 }, { "epoch": 2.452861952861953, "grad_norm": 3.399629592895508, "learning_rate": 3.593327709713844e-07, "loss": 0.7913680672645569, "step": 2914 }, { "epoch": 2.4545454545454546, "grad_norm": 3.867079257965088, "learning_rate": 3.5838500859856893e-07, "loss": 0.6534749865531921, "step": 2916 }, { "epoch": 2.4562289562289563, "grad_norm": 1.4564638137817383, "learning_rate": 3.5743980140143975e-07, "loss": 0.19182810187339783, "step": 2918 }, { "epoch": 2.457912457912458, "grad_norm": 4.126720905303955, "learning_rate": 3.5649715263663297e-07, "loss": 0.8050523996353149, "step": 2920 }, { "epoch": 2.45959595959596, "grad_norm": 518.9237670898438, "learning_rate": 3.5555706555197043e-07, "loss": 0.3782300353050232, "step": 2922 }, { "epoch": 2.461279461279461, "grad_norm": 4.448193073272705, "learning_rate": 3.5461954338644795e-07, "loss": 0.316059410572052, "step": 2924 }, { "epoch": 2.462962962962963, "grad_norm": 3.345587730407715, "learning_rate": 3.536845893702234e-07, "loss": 0.5723974704742432, "step": 2926 }, { "epoch": 2.4646464646464645, "grad_norm": 8.732227325439453, "learning_rate": 3.527522067246068e-07, "loss": 0.5091125965118408, "step": 2928 }, { "epoch": 2.4663299663299663, "grad_norm": 3.8187427520751953, "learning_rate": 3.518223986620491e-07, "loss": 0.3073745667934418, "step": 2930 }, { "epoch": 2.468013468013468, "grad_norm": 8.199573516845703, "learning_rate": 3.5089516838612986e-07, "loss": 0.6242831945419312, "step": 2932 }, { "epoch": 2.4696969696969697, "grad_norm": 6.898658752441406, "learning_rate": 3.499705190915476e-07, "loss": 0.627583384513855, "step": 2934 }, { "epoch": 2.4713804713804715, "grad_norm": 7.565421104431152, "learning_rate": 3.4904845396410854e-07, "loss": 0.43692106008529663, "step": 2936 }, { "epoch": 2.473063973063973, "grad_norm": 3.025193691253662, "learning_rate": 3.4812897618071445e-07, "loss": 0.5572280883789062, "step": 2938 }, { "epoch": 2.474747474747475, "grad_norm": 8.582428932189941, "learning_rate": 3.472120889093536e-07, "loss": 0.5607247352600098, "step": 2940 }, { "epoch": 2.4764309764309766, "grad_norm": 5.5012640953063965, "learning_rate": 3.462977953090884e-07, "loss": 0.3747951090335846, "step": 2942 }, { "epoch": 2.478114478114478, "grad_norm": 4.519533634185791, "learning_rate": 3.453860985300446e-07, "loss": 0.43182575702667236, "step": 2944 }, { "epoch": 2.4797979797979797, "grad_norm": 2.845407247543335, "learning_rate": 3.4447700171340164e-07, "loss": 0.9047005772590637, "step": 2946 }, { "epoch": 2.4814814814814814, "grad_norm": 2.432866334915161, "learning_rate": 3.4357050799138053e-07, "loss": 0.938655436038971, "step": 2948 }, { "epoch": 2.483164983164983, "grad_norm": 3.2918946743011475, "learning_rate": 3.4266662048723337e-07, "loss": 1.013432502746582, "step": 2950 }, { "epoch": 2.484848484848485, "grad_norm": 18.99071502685547, "learning_rate": 3.417653423152329e-07, "loss": 0.8985989093780518, "step": 2952 }, { "epoch": 2.4865319865319866, "grad_norm": 6.151244163513184, "learning_rate": 3.4086667658066186e-07, "loss": 0.5609415769577026, "step": 2954 }, { "epoch": 2.4882154882154883, "grad_norm": 8.208552360534668, "learning_rate": 3.3997062637980167e-07, "loss": 0.8369396924972534, "step": 2956 }, { "epoch": 2.48989898989899, "grad_norm": 6.0119853019714355, "learning_rate": 3.390771947999224e-07, "loss": 0.5242006182670593, "step": 2958 }, { "epoch": 2.4915824915824913, "grad_norm": 7.873940467834473, "learning_rate": 3.381863849192718e-07, "loss": 0.8243865370750427, "step": 2960 }, { "epoch": 2.493265993265993, "grad_norm": 7.8693742752075195, "learning_rate": 3.3729819980706444e-07, "loss": 0.5058671832084656, "step": 2962 }, { "epoch": 2.494949494949495, "grad_norm": 2.891031503677368, "learning_rate": 3.364126425234719e-07, "loss": 0.7412878274917603, "step": 2964 }, { "epoch": 2.4966329966329965, "grad_norm": 18.90471649169922, "learning_rate": 3.3552971611961187e-07, "loss": 0.5835074186325073, "step": 2966 }, { "epoch": 2.4983164983164983, "grad_norm": 3.6547908782958984, "learning_rate": 3.34649423637537e-07, "loss": 0.8192091584205627, "step": 2968 }, { "epoch": 2.5, "grad_norm": 11.999411582946777, "learning_rate": 3.337717681102253e-07, "loss": 0.8428059816360474, "step": 2970 }, { "epoch": 2.5016835016835017, "grad_norm": 5.940135955810547, "learning_rate": 3.328967525615697e-07, "loss": 0.39063435792922974, "step": 2972 }, { "epoch": 2.5033670033670035, "grad_norm": 23.767696380615234, "learning_rate": 3.3202438000636634e-07, "loss": 0.47806400060653687, "step": 2974 }, { "epoch": 2.505050505050505, "grad_norm": 6.031237602233887, "learning_rate": 3.311546534503061e-07, "loss": 0.6802424788475037, "step": 2976 }, { "epoch": 2.506734006734007, "grad_norm": 25.405719757080078, "learning_rate": 3.3028757588996303e-07, "loss": 0.38681331276893616, "step": 2978 }, { "epoch": 2.5084175084175087, "grad_norm": 6.533238887786865, "learning_rate": 3.294231503127839e-07, "loss": 0.7302665710449219, "step": 2980 }, { "epoch": 2.51010101010101, "grad_norm": 6.384099006652832, "learning_rate": 3.2856137969707847e-07, "loss": 0.7972818613052368, "step": 2982 }, { "epoch": 2.5117845117845117, "grad_norm": 6.987396240234375, "learning_rate": 3.277022670120095e-07, "loss": 0.39771410822868347, "step": 2984 }, { "epoch": 2.5134680134680134, "grad_norm": 16.85350227355957, "learning_rate": 3.268458152175813e-07, "loss": 0.7731115818023682, "step": 2986 }, { "epoch": 2.515151515151515, "grad_norm": 4.062409400939941, "learning_rate": 3.2599202726463084e-07, "loss": 0.5933781862258911, "step": 2988 }, { "epoch": 2.516835016835017, "grad_norm": 7.16248083114624, "learning_rate": 3.2514090609481683e-07, "loss": 0.09502522647380829, "step": 2990 }, { "epoch": 2.5185185185185186, "grad_norm": 4.739719867706299, "learning_rate": 3.2429245464060965e-07, "loss": 0.8891875147819519, "step": 2992 }, { "epoch": 2.5202020202020203, "grad_norm": 6.538869857788086, "learning_rate": 3.234466758252818e-07, "loss": 0.5735270977020264, "step": 2994 }, { "epoch": 2.5218855218855216, "grad_norm": 5.069677352905273, "learning_rate": 3.2260357256289715e-07, "loss": 0.7090741395950317, "step": 2996 }, { "epoch": 2.5235690235690234, "grad_norm": 4.84168004989624, "learning_rate": 3.217631477583009e-07, "loss": 0.5537684559822083, "step": 2998 }, { "epoch": 2.525252525252525, "grad_norm": 4.053093910217285, "learning_rate": 3.2092540430711044e-07, "loss": 0.5045433044433594, "step": 3000 }, { "epoch": 2.526936026936027, "grad_norm": 6.442458152770996, "learning_rate": 3.200903450957044e-07, "loss": 0.4958549439907074, "step": 3002 }, { "epoch": 2.5286195286195285, "grad_norm": 4.950314521789551, "learning_rate": 3.192579730012129e-07, "loss": 0.9713015556335449, "step": 3004 }, { "epoch": 2.5303030303030303, "grad_norm": 32.4094123840332, "learning_rate": 3.184282908915081e-07, "loss": 0.7774836421012878, "step": 3006 }, { "epoch": 2.531986531986532, "grad_norm": 8.05980396270752, "learning_rate": 3.1760130162519427e-07, "loss": 0.6949951648712158, "step": 3008 }, { "epoch": 2.5336700336700337, "grad_norm": 6.453157901763916, "learning_rate": 3.16777008051597e-07, "loss": 0.2635032832622528, "step": 3010 }, { "epoch": 2.5353535353535355, "grad_norm": 8.72614860534668, "learning_rate": 3.159554130107546e-07, "loss": 0.7169020771980286, "step": 3012 }, { "epoch": 2.537037037037037, "grad_norm": 2.803579807281494, "learning_rate": 3.1513651933340797e-07, "loss": 0.6434400677680969, "step": 3014 }, { "epoch": 2.538720538720539, "grad_norm": 2.3139851093292236, "learning_rate": 3.143203298409899e-07, "loss": 0.522533655166626, "step": 3016 }, { "epoch": 2.5404040404040407, "grad_norm": 7.905545711517334, "learning_rate": 3.1350684734561676e-07, "loss": 0.8724677562713623, "step": 3018 }, { "epoch": 2.542087542087542, "grad_norm": 3.6152162551879883, "learning_rate": 3.126960746500784e-07, "loss": 0.6959270238876343, "step": 3020 }, { "epoch": 2.5437710437710437, "grad_norm": 14.063467025756836, "learning_rate": 3.118880145478274e-07, "loss": 0.7995277643203735, "step": 3022 }, { "epoch": 2.5454545454545454, "grad_norm": 3.315876007080078, "learning_rate": 3.110826698229711e-07, "loss": 0.9624471664428711, "step": 3024 }, { "epoch": 2.547138047138047, "grad_norm": 11.101134300231934, "learning_rate": 3.102800432502607e-07, "loss": 0.22170954942703247, "step": 3026 }, { "epoch": 2.548821548821549, "grad_norm": 6.456979751586914, "learning_rate": 3.0948013759508274e-07, "loss": 0.5246233344078064, "step": 3028 }, { "epoch": 2.5505050505050506, "grad_norm": 3.3993847370147705, "learning_rate": 3.0868295561344874e-07, "loss": 0.4475906491279602, "step": 3030 }, { "epoch": 2.5521885521885523, "grad_norm": 1.6112107038497925, "learning_rate": 3.078885000519858e-07, "loss": 0.4590218961238861, "step": 3032 }, { "epoch": 2.5538720538720536, "grad_norm": 14.948426246643066, "learning_rate": 3.0709677364792767e-07, "loss": 0.8541072607040405, "step": 3034 }, { "epoch": 2.5555555555555554, "grad_norm": 3.668416976928711, "learning_rate": 3.0630777912910533e-07, "loss": 0.9300471544265747, "step": 3036 }, { "epoch": 2.557239057239057, "grad_norm": 4.241018772125244, "learning_rate": 3.0552151921393633e-07, "loss": 0.6171663999557495, "step": 3038 }, { "epoch": 2.558922558922559, "grad_norm": 6.009745121002197, "learning_rate": 3.0473799661141707e-07, "loss": 0.865818977355957, "step": 3040 }, { "epoch": 2.5606060606060606, "grad_norm": 12.198860168457031, "learning_rate": 3.0395721402111286e-07, "loss": 0.6238538026809692, "step": 3042 }, { "epoch": 2.5622895622895623, "grad_norm": 7.544912338256836, "learning_rate": 3.031791741331478e-07, "loss": 0.778638482093811, "step": 3044 }, { "epoch": 2.563973063973064, "grad_norm": 6.367477893829346, "learning_rate": 3.0240387962819695e-07, "loss": 0.6787006855010986, "step": 3046 }, { "epoch": 2.5656565656565657, "grad_norm": 3.4499287605285645, "learning_rate": 3.016313331774762e-07, "loss": 0.8738001585006714, "step": 3048 }, { "epoch": 2.5673400673400675, "grad_norm": 2.4657773971557617, "learning_rate": 3.008615374427329e-07, "loss": 0.3498271703720093, "step": 3050 }, { "epoch": 2.569023569023569, "grad_norm": 5.700794696807861, "learning_rate": 3.000944950762373e-07, "loss": 0.9484968185424805, "step": 3052 }, { "epoch": 2.570707070707071, "grad_norm": 18.000146865844727, "learning_rate": 2.993302087207732e-07, "loss": 0.0691433697938919, "step": 3054 }, { "epoch": 2.5723905723905722, "grad_norm": 16.131559371948242, "learning_rate": 2.985686810096285e-07, "loss": 0.6116932034492493, "step": 3056 }, { "epoch": 2.574074074074074, "grad_norm": 5.901321887969971, "learning_rate": 2.978099145665867e-07, "loss": 0.3154261112213135, "step": 3058 }, { "epoch": 2.5757575757575757, "grad_norm": 4.957643508911133, "learning_rate": 2.970539120059174e-07, "loss": 0.6580586433410645, "step": 3060 }, { "epoch": 2.5774410774410774, "grad_norm": 5.373193264007568, "learning_rate": 2.963006759323676e-07, "loss": 0.6125509142875671, "step": 3062 }, { "epoch": 2.579124579124579, "grad_norm": 5.6912522315979, "learning_rate": 2.955502089411523e-07, "loss": 0.4061823785305023, "step": 3064 }, { "epoch": 2.580808080808081, "grad_norm": 10.119878768920898, "learning_rate": 2.9480251361794656e-07, "loss": 0.5432108044624329, "step": 3066 }, { "epoch": 2.5824915824915826, "grad_norm": 5.718217372894287, "learning_rate": 2.940575925388746e-07, "loss": 0.2773892879486084, "step": 3068 }, { "epoch": 2.584175084175084, "grad_norm": 7.640798091888428, "learning_rate": 2.933154482705035e-07, "loss": 0.08487945795059204, "step": 3070 }, { "epoch": 2.5858585858585856, "grad_norm": 6.283509731292725, "learning_rate": 2.925760833698327e-07, "loss": 0.41717803478240967, "step": 3072 }, { "epoch": 2.5875420875420874, "grad_norm": 3.45359468460083, "learning_rate": 2.9183950038428475e-07, "loss": 0.9503785371780396, "step": 3074 }, { "epoch": 2.589225589225589, "grad_norm": 4.694600582122803, "learning_rate": 2.9110570185169834e-07, "loss": 0.3452813923358917, "step": 3076 }, { "epoch": 2.590909090909091, "grad_norm": 3.6043646335601807, "learning_rate": 2.903746903003184e-07, "loss": 0.8001734614372253, "step": 3078 }, { "epoch": 2.5925925925925926, "grad_norm": 5.150274753570557, "learning_rate": 2.896464682487866e-07, "loss": 0.6741084456443787, "step": 3080 }, { "epoch": 2.5942760942760943, "grad_norm": 6.488956928253174, "learning_rate": 2.8892103820613487e-07, "loss": 0.9191502332687378, "step": 3082 }, { "epoch": 2.595959595959596, "grad_norm": 6.83146333694458, "learning_rate": 2.88198402671775e-07, "loss": 0.5582960844039917, "step": 3084 }, { "epoch": 2.5976430976430978, "grad_norm": 5.457592964172363, "learning_rate": 2.874785641354901e-07, "loss": 0.5779297947883606, "step": 3086 }, { "epoch": 2.5993265993265995, "grad_norm": 3.332746744155884, "learning_rate": 2.867615250774269e-07, "loss": 0.7671989798545837, "step": 3088 }, { "epoch": 2.601010101010101, "grad_norm": 3.9494850635528564, "learning_rate": 2.860472879680869e-07, "loss": 0.8642760515213013, "step": 3090 }, { "epoch": 2.602693602693603, "grad_norm": 4.3518757820129395, "learning_rate": 2.8533585526831726e-07, "loss": 0.6304323673248291, "step": 3092 }, { "epoch": 2.6043771043771042, "grad_norm": 6.350977897644043, "learning_rate": 2.8462722942930286e-07, "loss": 0.4931812286376953, "step": 3094 }, { "epoch": 2.606060606060606, "grad_norm": 3.1723833084106445, "learning_rate": 2.8392141289255806e-07, "loss": 0.6241375207901001, "step": 3096 }, { "epoch": 2.6077441077441077, "grad_norm": 6.107673168182373, "learning_rate": 2.8321840808991775e-07, "loss": 0.5527880191802979, "step": 3098 }, { "epoch": 2.6094276094276094, "grad_norm": 5.577755928039551, "learning_rate": 2.8251821744352933e-07, "loss": 0.6250026226043701, "step": 3100 }, { "epoch": 2.611111111111111, "grad_norm": 7.03651762008667, "learning_rate": 2.8182084336584423e-07, "loss": 0.5582347512245178, "step": 3102 }, { "epoch": 2.612794612794613, "grad_norm": 2.2495877742767334, "learning_rate": 2.8112628825960926e-07, "loss": 0.791733980178833, "step": 3104 }, { "epoch": 2.6144781144781146, "grad_norm": 17.0977725982666, "learning_rate": 2.804345545178594e-07, "loss": 0.7450399398803711, "step": 3106 }, { "epoch": 2.616161616161616, "grad_norm": 4.711960792541504, "learning_rate": 2.7974564452390833e-07, "loss": 0.17849119007587433, "step": 3108 }, { "epoch": 2.6178451178451176, "grad_norm": 10.859472274780273, "learning_rate": 2.790595606513406e-07, "loss": 0.7354204654693604, "step": 3110 }, { "epoch": 2.6195286195286194, "grad_norm": 3.239361047744751, "learning_rate": 2.78376305264004e-07, "loss": 0.41245055198669434, "step": 3112 }, { "epoch": 2.621212121212121, "grad_norm": 2.8065528869628906, "learning_rate": 2.776958807160011e-07, "loss": 0.37273505330085754, "step": 3114 }, { "epoch": 2.622895622895623, "grad_norm": 4.473266124725342, "learning_rate": 2.7701828935168026e-07, "loss": 0.8599231243133545, "step": 3116 }, { "epoch": 2.6245791245791246, "grad_norm": 7.686254501342773, "learning_rate": 2.763435335056291e-07, "loss": 0.9832479953765869, "step": 3118 }, { "epoch": 2.6262626262626263, "grad_norm": 2.1346304416656494, "learning_rate": 2.756716155026656e-07, "loss": 0.5217673778533936, "step": 3120 }, { "epoch": 2.627946127946128, "grad_norm": 3.8724348545074463, "learning_rate": 2.750025376578295e-07, "loss": 0.8622322082519531, "step": 3122 }, { "epoch": 2.6296296296296298, "grad_norm": 2.7656431198120117, "learning_rate": 2.743363022763758e-07, "loss": 0.8336771726608276, "step": 3124 }, { "epoch": 2.6313131313131315, "grad_norm": 2.994492769241333, "learning_rate": 2.7367291165376593e-07, "loss": 0.5954484939575195, "step": 3126 }, { "epoch": 2.6329966329966332, "grad_norm": 6.633072376251221, "learning_rate": 2.7301236807565925e-07, "loss": 0.8022388219833374, "step": 3128 }, { "epoch": 2.634680134680135, "grad_norm": 9.094773292541504, "learning_rate": 2.7235467381790654e-07, "loss": 0.5048923492431641, "step": 3130 }, { "epoch": 2.6363636363636362, "grad_norm": 5.657838821411133, "learning_rate": 2.716998311465415e-07, "loss": 0.2697800397872925, "step": 3132 }, { "epoch": 2.638047138047138, "grad_norm": 4.260385513305664, "learning_rate": 2.710478423177722e-07, "loss": 0.8560886383056641, "step": 3134 }, { "epoch": 2.6397306397306397, "grad_norm": 5.333981513977051, "learning_rate": 2.7039870957797464e-07, "loss": 0.7351222038269043, "step": 3136 }, { "epoch": 2.6414141414141414, "grad_norm": 8.460240364074707, "learning_rate": 2.697524351636844e-07, "loss": 0.41435521841049194, "step": 3138 }, { "epoch": 2.643097643097643, "grad_norm": 4.321287155151367, "learning_rate": 2.691090213015886e-07, "loss": 0.9173501133918762, "step": 3140 }, { "epoch": 2.644781144781145, "grad_norm": 4.3384857177734375, "learning_rate": 2.6846847020851884e-07, "loss": 0.5904110670089722, "step": 3142 }, { "epoch": 2.6464646464646466, "grad_norm": 6.099468231201172, "learning_rate": 2.678307840914431e-07, "loss": 0.8097279071807861, "step": 3144 }, { "epoch": 2.648148148148148, "grad_norm": 3.9057722091674805, "learning_rate": 2.6719596514745826e-07, "loss": 0.8938575983047485, "step": 3146 }, { "epoch": 2.6498316498316496, "grad_norm": 8.309523582458496, "learning_rate": 2.665640155637828e-07, "loss": 0.5425578355789185, "step": 3148 }, { "epoch": 2.6515151515151514, "grad_norm": 3.0026330947875977, "learning_rate": 2.659349375177489e-07, "loss": 0.8360292911529541, "step": 3150 }, { "epoch": 2.653198653198653, "grad_norm": 5.204579830169678, "learning_rate": 2.6530873317679515e-07, "loss": 0.2029864341020584, "step": 3152 }, { "epoch": 2.654882154882155, "grad_norm": 23.3417911529541, "learning_rate": 2.6468540469845895e-07, "loss": 0.9556988477706909, "step": 3154 }, { "epoch": 2.6565656565656566, "grad_norm": 13.595047950744629, "learning_rate": 2.640649542303693e-07, "loss": 0.5114415884017944, "step": 3156 }, { "epoch": 2.6582491582491583, "grad_norm": 6.162187576293945, "learning_rate": 2.634473839102389e-07, "loss": 0.39493846893310547, "step": 3158 }, { "epoch": 2.65993265993266, "grad_norm": 43.08856964111328, "learning_rate": 2.6283269586585737e-07, "loss": 0.5446680784225464, "step": 3160 }, { "epoch": 2.6616161616161618, "grad_norm": 11.108345031738281, "learning_rate": 2.6222089221508404e-07, "loss": 0.6248540282249451, "step": 3162 }, { "epoch": 2.6632996632996635, "grad_norm": 4.680754661560059, "learning_rate": 2.6161197506583944e-07, "loss": 0.8368432521820068, "step": 3164 }, { "epoch": 2.6649831649831652, "grad_norm": 7.473052978515625, "learning_rate": 2.610059465160995e-07, "loss": 0.619489312171936, "step": 3166 }, { "epoch": 2.6666666666666665, "grad_norm": 2.3733127117156982, "learning_rate": 2.6040280865388773e-07, "loss": 0.7894487380981445, "step": 3168 }, { "epoch": 2.6683501683501682, "grad_norm": 1.7357522249221802, "learning_rate": 2.5980256355726744e-07, "loss": 0.5782526135444641, "step": 3170 }, { "epoch": 2.67003367003367, "grad_norm": 7.880289554595947, "learning_rate": 2.5920521329433606e-07, "loss": 1.0222315788269043, "step": 3172 }, { "epoch": 2.6717171717171717, "grad_norm": 3.272036075592041, "learning_rate": 2.586107599232164e-07, "loss": 0.9073632955551147, "step": 3174 }, { "epoch": 2.6734006734006734, "grad_norm": 3.847628355026245, "learning_rate": 2.5801920549205023e-07, "loss": 0.46630191802978516, "step": 3176 }, { "epoch": 2.675084175084175, "grad_norm": 2.5537798404693604, "learning_rate": 2.5743055203899167e-07, "loss": 0.9780217409133911, "step": 3178 }, { "epoch": 2.676767676767677, "grad_norm": 4.765364170074463, "learning_rate": 2.568448015921996e-07, "loss": 0.639081597328186, "step": 3180 }, { "epoch": 2.678451178451178, "grad_norm": 5.098658084869385, "learning_rate": 2.562619561698306e-07, "loss": 0.7984585762023926, "step": 3182 }, { "epoch": 2.68013468013468, "grad_norm": 2.4715800285339355, "learning_rate": 2.556820177800324e-07, "loss": 0.9407286643981934, "step": 3184 }, { "epoch": 2.6818181818181817, "grad_norm": 2.711570978164673, "learning_rate": 2.551049884209371e-07, "loss": 0.8115611672401428, "step": 3186 }, { "epoch": 2.6835016835016834, "grad_norm": 9.145926475524902, "learning_rate": 2.5453087008065307e-07, "loss": 0.7339519262313843, "step": 3188 }, { "epoch": 2.685185185185185, "grad_norm": 1.2086787223815918, "learning_rate": 2.5395966473725994e-07, "loss": 0.49706321954727173, "step": 3190 }, { "epoch": 2.686868686868687, "grad_norm": 14.16477108001709, "learning_rate": 2.5339137435880043e-07, "loss": 0.6397048234939575, "step": 3192 }, { "epoch": 2.6885521885521886, "grad_norm": 3.3142552375793457, "learning_rate": 2.5282600090327383e-07, "loss": 0.7652658820152283, "step": 3194 }, { "epoch": 2.6902356902356903, "grad_norm": 19.05327606201172, "learning_rate": 2.5226354631862966e-07, "loss": 0.6125460863113403, "step": 3196 }, { "epoch": 2.691919191919192, "grad_norm": 4.221333026885986, "learning_rate": 2.517040125427608e-07, "loss": 0.7383702397346497, "step": 3198 }, { "epoch": 2.6936026936026938, "grad_norm": 2.8563621044158936, "learning_rate": 2.511474015034964e-07, "loss": 0.8494305610656738, "step": 3200 }, { "epoch": 2.6952861952861955, "grad_norm": 3.877546548843384, "learning_rate": 2.5059371511859557e-07, "loss": 0.6800326108932495, "step": 3202 }, { "epoch": 2.6969696969696972, "grad_norm": 3.861481189727783, "learning_rate": 2.50042955295741e-07, "loss": 0.6918296813964844, "step": 3204 }, { "epoch": 2.6986531986531985, "grad_norm": 9.997620582580566, "learning_rate": 2.494951239325321e-07, "loss": 0.6519820094108582, "step": 3206 }, { "epoch": 2.7003367003367003, "grad_norm": 4.166572093963623, "learning_rate": 2.489502229164781e-07, "loss": 0.5281827449798584, "step": 3208 }, { "epoch": 2.702020202020202, "grad_norm": 4.448598384857178, "learning_rate": 2.4840825412499274e-07, "loss": 0.8719410300254822, "step": 3210 }, { "epoch": 2.7037037037037037, "grad_norm": 4.639568328857422, "learning_rate": 2.478692194253861e-07, "loss": 0.5532783269882202, "step": 3212 }, { "epoch": 2.7053872053872055, "grad_norm": 8.537738800048828, "learning_rate": 2.473331206748597e-07, "loss": 0.5865626931190491, "step": 3214 }, { "epoch": 2.707070707070707, "grad_norm": 10.096135139465332, "learning_rate": 2.467999597204996e-07, "loss": 0.2805863618850708, "step": 3216 }, { "epoch": 2.708754208754209, "grad_norm": 6.932223320007324, "learning_rate": 2.462697383992691e-07, "loss": 0.7335485219955444, "step": 3218 }, { "epoch": 2.71043771043771, "grad_norm": 12.214366912841797, "learning_rate": 2.457424585380041e-07, "loss": 0.3276599943637848, "step": 3220 }, { "epoch": 2.712121212121212, "grad_norm": 10.359675407409668, "learning_rate": 2.4521812195340544e-07, "loss": 0.672775149345398, "step": 3222 }, { "epoch": 2.7138047138047137, "grad_norm": 27.647464752197266, "learning_rate": 2.4469673045203333e-07, "loss": 0.40836215019226074, "step": 3224 }, { "epoch": 2.7154882154882154, "grad_norm": 15.687188148498535, "learning_rate": 2.441782858303007e-07, "loss": 0.4133344888687134, "step": 3226 }, { "epoch": 2.717171717171717, "grad_norm": 17.905902862548828, "learning_rate": 2.436627898744678e-07, "loss": 0.7267272472381592, "step": 3228 }, { "epoch": 2.718855218855219, "grad_norm": 9.417744636535645, "learning_rate": 2.4315024436063464e-07, "loss": 0.42516928911209106, "step": 3230 }, { "epoch": 2.7205387205387206, "grad_norm": 8.572908401489258, "learning_rate": 2.4264065105473637e-07, "loss": 0.768959641456604, "step": 3232 }, { "epoch": 2.7222222222222223, "grad_norm": 1.9153132438659668, "learning_rate": 2.4213401171253656e-07, "loss": 0.6403470039367676, "step": 3234 }, { "epoch": 2.723905723905724, "grad_norm": 5.261312484741211, "learning_rate": 2.416303280796206e-07, "loss": 0.7732399106025696, "step": 3236 }, { "epoch": 2.725589225589226, "grad_norm": 3.5602827072143555, "learning_rate": 2.411296018913907e-07, "loss": 0.7329007387161255, "step": 3238 }, { "epoch": 2.7272727272727275, "grad_norm": 3.6793055534362793, "learning_rate": 2.406318348730592e-07, "loss": 0.7464162111282349, "step": 3240 }, { "epoch": 2.728956228956229, "grad_norm": 2.7270774841308594, "learning_rate": 2.401370287396428e-07, "loss": 0.7636083364486694, "step": 3242 }, { "epoch": 2.7306397306397305, "grad_norm": 4.971183776855469, "learning_rate": 2.396451851959571e-07, "loss": 0.599960207939148, "step": 3244 }, { "epoch": 2.7323232323232323, "grad_norm": 4.194789886474609, "learning_rate": 2.391563059366099e-07, "loss": 0.7824025750160217, "step": 3246 }, { "epoch": 2.734006734006734, "grad_norm": 5.917283535003662, "learning_rate": 2.3867039264599587e-07, "loss": 0.8408564329147339, "step": 3248 }, { "epoch": 2.7356902356902357, "grad_norm": 3.7883689403533936, "learning_rate": 2.3818744699829105e-07, "loss": 0.6503514051437378, "step": 3250 }, { "epoch": 2.7373737373737375, "grad_norm": 6.666152000427246, "learning_rate": 2.3770747065744594e-07, "loss": 0.3846713900566101, "step": 3252 }, { "epoch": 2.739057239057239, "grad_norm": 4.073997497558594, "learning_rate": 2.3723046527718137e-07, "loss": 0.5147488713264465, "step": 3254 }, { "epoch": 2.7407407407407405, "grad_norm": 6.8026018142700195, "learning_rate": 2.367564325009815e-07, "loss": 0.5139864087104797, "step": 3256 }, { "epoch": 2.742424242424242, "grad_norm": 2.5795681476593018, "learning_rate": 2.362853739620885e-07, "loss": 0.5290718078613281, "step": 3258 }, { "epoch": 2.744107744107744, "grad_norm": 14.904226303100586, "learning_rate": 2.3581729128349745e-07, "loss": 0.3965787887573242, "step": 3260 }, { "epoch": 2.7457912457912457, "grad_norm": 5.50350284576416, "learning_rate": 2.3535218607795013e-07, "loss": 0.6484100222587585, "step": 3262 }, { "epoch": 2.7474747474747474, "grad_norm": 5.252780437469482, "learning_rate": 2.3489005994792948e-07, "loss": 0.8430534601211548, "step": 3264 }, { "epoch": 2.749158249158249, "grad_norm": 7.023755073547363, "learning_rate": 2.3443091448565454e-07, "loss": 0.957166314125061, "step": 3266 }, { "epoch": 2.750841750841751, "grad_norm": 11.244546890258789, "learning_rate": 2.339747512730749e-07, "loss": 0.3728073835372925, "step": 3268 }, { "epoch": 2.7525252525252526, "grad_norm": 3.2135775089263916, "learning_rate": 2.3352157188186424e-07, "loss": 0.9523381590843201, "step": 3270 }, { "epoch": 2.7542087542087543, "grad_norm": 7.215963840484619, "learning_rate": 2.3307137787341667e-07, "loss": 0.4420832395553589, "step": 3272 }, { "epoch": 2.755892255892256, "grad_norm": 2.81378436088562, "learning_rate": 2.3262417079883986e-07, "loss": 0.660933792591095, "step": 3274 }, { "epoch": 2.757575757575758, "grad_norm": 127.56824493408203, "learning_rate": 2.3217995219895016e-07, "loss": 0.3062414228916168, "step": 3276 }, { "epoch": 2.7592592592592595, "grad_norm": 0.698665201663971, "learning_rate": 2.317387236042678e-07, "loss": 0.021941782906651497, "step": 3278 }, { "epoch": 2.760942760942761, "grad_norm": 4.418609619140625, "learning_rate": 2.313004865350109e-07, "loss": 1.040034532546997, "step": 3280 }, { "epoch": 2.7626262626262625, "grad_norm": 3.401939868927002, "learning_rate": 2.3086524250109045e-07, "loss": 1.0358326435089111, "step": 3282 }, { "epoch": 2.7643097643097643, "grad_norm": 18.86932945251465, "learning_rate": 2.3043299300210528e-07, "loss": 0.23045207560062408, "step": 3284 }, { "epoch": 2.765993265993266, "grad_norm": 3.0848443508148193, "learning_rate": 2.30003739527337e-07, "loss": 0.7953276038169861, "step": 3286 }, { "epoch": 2.7676767676767677, "grad_norm": 4.258274078369141, "learning_rate": 2.2957748355574408e-07, "loss": 0.7808912396430969, "step": 3288 }, { "epoch": 2.7693602693602695, "grad_norm": 8.350629806518555, "learning_rate": 2.2915422655595795e-07, "loss": 0.2024976909160614, "step": 3290 }, { "epoch": 2.771043771043771, "grad_norm": 3.212890386581421, "learning_rate": 2.287339699862771e-07, "loss": 0.9757770299911499, "step": 3292 }, { "epoch": 2.7727272727272725, "grad_norm": 4.119185447692871, "learning_rate": 2.2831671529466205e-07, "loss": 0.8145531415939331, "step": 3294 }, { "epoch": 2.774410774410774, "grad_norm": 4.300760269165039, "learning_rate": 2.2790246391873086e-07, "loss": 0.8364596366882324, "step": 3296 }, { "epoch": 2.776094276094276, "grad_norm": 5.6328630447387695, "learning_rate": 2.2749121728575393e-07, "loss": 0.2111830711364746, "step": 3298 }, { "epoch": 2.7777777777777777, "grad_norm": 6.152875900268555, "learning_rate": 2.2708297681264874e-07, "loss": 0.4531656801700592, "step": 3300 }, { "epoch": 2.7794612794612794, "grad_norm": 6.0950164794921875, "learning_rate": 2.2667774390597562e-07, "loss": 0.486369788646698, "step": 3302 }, { "epoch": 2.781144781144781, "grad_norm": 12.233784675598145, "learning_rate": 2.2627551996193247e-07, "loss": 0.4338839054107666, "step": 3304 }, { "epoch": 2.782828282828283, "grad_norm": 11.843306541442871, "learning_rate": 2.2587630636634985e-07, "loss": 0.7146729230880737, "step": 3306 }, { "epoch": 2.7845117845117846, "grad_norm": 26.314231872558594, "learning_rate": 2.2548010449468676e-07, "loss": 0.426150381565094, "step": 3308 }, { "epoch": 2.7861952861952863, "grad_norm": 5.808564186096191, "learning_rate": 2.2508691571202528e-07, "loss": 0.6131501793861389, "step": 3310 }, { "epoch": 2.787878787878788, "grad_norm": 4.843730926513672, "learning_rate": 2.2469674137306627e-07, "loss": 0.4474066197872162, "step": 3312 }, { "epoch": 2.78956228956229, "grad_norm": 5.842626571655273, "learning_rate": 2.2430958282212414e-07, "loss": 0.676105260848999, "step": 3314 }, { "epoch": 2.791245791245791, "grad_norm": 10.79865550994873, "learning_rate": 2.239254413931236e-07, "loss": 0.9383071660995483, "step": 3316 }, { "epoch": 2.792929292929293, "grad_norm": 2.2393341064453125, "learning_rate": 2.2354431840959307e-07, "loss": 0.7455552220344543, "step": 3318 }, { "epoch": 2.7946127946127945, "grad_norm": 5.729065895080566, "learning_rate": 2.2316621518466167e-07, "loss": 0.28741055727005005, "step": 3320 }, { "epoch": 2.7962962962962963, "grad_norm": 9.186633110046387, "learning_rate": 2.227911330210542e-07, "loss": 0.6114668250083923, "step": 3322 }, { "epoch": 2.797979797979798, "grad_norm": 12.35034465789795, "learning_rate": 2.2241907321108638e-07, "loss": 0.6540449857711792, "step": 3324 }, { "epoch": 2.7996632996632997, "grad_norm": 2.6777584552764893, "learning_rate": 2.22050037036661e-07, "loss": 0.30680525302886963, "step": 3326 }, { "epoch": 2.8013468013468015, "grad_norm": 3.350935697555542, "learning_rate": 2.216840257692628e-07, "loss": 0.7153966426849365, "step": 3328 }, { "epoch": 2.8030303030303028, "grad_norm": 2.8656368255615234, "learning_rate": 2.213210406699547e-07, "loss": 0.7619553804397583, "step": 3330 }, { "epoch": 2.8047138047138045, "grad_norm": 7.474374294281006, "learning_rate": 2.209610829893729e-07, "loss": 0.5717604160308838, "step": 3332 }, { "epoch": 2.8063973063973062, "grad_norm": 8.4893798828125, "learning_rate": 2.2060415396772337e-07, "loss": 0.5182145833969116, "step": 3334 }, { "epoch": 2.808080808080808, "grad_norm": 8.64901065826416, "learning_rate": 2.2025025483477654e-07, "loss": 0.5500608682632446, "step": 3336 }, { "epoch": 2.8097643097643097, "grad_norm": 2.9587368965148926, "learning_rate": 2.1989938680986382e-07, "loss": 0.2802525758743286, "step": 3338 }, { "epoch": 2.8114478114478114, "grad_norm": 7.318872928619385, "learning_rate": 2.1955155110187344e-07, "loss": 0.6136119365692139, "step": 3340 }, { "epoch": 2.813131313131313, "grad_norm": 7.030915260314941, "learning_rate": 2.1920674890924545e-07, "loss": 0.7545953989028931, "step": 3342 }, { "epoch": 2.814814814814815, "grad_norm": 2.9126713275909424, "learning_rate": 2.1886498141996858e-07, "loss": 0.33089566230773926, "step": 3344 }, { "epoch": 2.8164983164983166, "grad_norm": 2.292778968811035, "learning_rate": 2.185262498115759e-07, "loss": 0.820242166519165, "step": 3346 }, { "epoch": 2.8181818181818183, "grad_norm": 9.872072219848633, "learning_rate": 2.1819055525113995e-07, "loss": 0.4794435501098633, "step": 3348 }, { "epoch": 2.81986531986532, "grad_norm": 6.807747840881348, "learning_rate": 2.178578988952698e-07, "loss": 0.8766056299209595, "step": 3350 }, { "epoch": 2.821548821548822, "grad_norm": 11.850113868713379, "learning_rate": 2.1752828189010677e-07, "loss": 0.8210408687591553, "step": 3352 }, { "epoch": 2.823232323232323, "grad_norm": 4.237025260925293, "learning_rate": 2.1720170537132003e-07, "loss": 0.7889919281005859, "step": 3354 }, { "epoch": 2.824915824915825, "grad_norm": 6.600332736968994, "learning_rate": 2.16878170464103e-07, "loss": 0.7373786568641663, "step": 3356 }, { "epoch": 2.8265993265993266, "grad_norm": 3.782309055328369, "learning_rate": 2.1655767828316967e-07, "loss": 0.4632776975631714, "step": 3358 }, { "epoch": 2.8282828282828283, "grad_norm": 20.347566604614258, "learning_rate": 2.1624022993275042e-07, "loss": 0.47924166917800903, "step": 3360 }, { "epoch": 2.82996632996633, "grad_norm": 3.760439872741699, "learning_rate": 2.1592582650658838e-07, "loss": 0.5661218166351318, "step": 3362 }, { "epoch": 2.8316498316498318, "grad_norm": 12.392730712890625, "learning_rate": 2.1561446908793575e-07, "loss": 0.5744220018386841, "step": 3364 }, { "epoch": 2.8333333333333335, "grad_norm": 9.636838912963867, "learning_rate": 2.1530615874954978e-07, "loss": 0.4627985954284668, "step": 3366 }, { "epoch": 2.8350168350168348, "grad_norm": 21.72933578491211, "learning_rate": 2.1500089655368913e-07, "loss": 0.4576794505119324, "step": 3368 }, { "epoch": 2.8367003367003365, "grad_norm": 7.211141586303711, "learning_rate": 2.146986835521108e-07, "loss": 0.8104113340377808, "step": 3370 }, { "epoch": 2.8383838383838382, "grad_norm": 3.049208879470825, "learning_rate": 2.143995207860655e-07, "loss": 0.6803615093231201, "step": 3372 }, { "epoch": 2.84006734006734, "grad_norm": 15.541363716125488, "learning_rate": 2.1410340928629483e-07, "loss": 0.2819385230541229, "step": 3374 }, { "epoch": 2.8417508417508417, "grad_norm": 3.854581832885742, "learning_rate": 2.138103500730278e-07, "loss": 0.8866885900497437, "step": 3376 }, { "epoch": 2.8434343434343434, "grad_norm": 2.881070613861084, "learning_rate": 2.1352034415597635e-07, "loss": 0.7249988317489624, "step": 3378 }, { "epoch": 2.845117845117845, "grad_norm": 2.772418260574341, "learning_rate": 2.1323339253433309e-07, "loss": 0.5438086986541748, "step": 3380 }, { "epoch": 2.846801346801347, "grad_norm": 5.94671106338501, "learning_rate": 2.1294949619676717e-07, "loss": 0.5575168132781982, "step": 3382 }, { "epoch": 2.8484848484848486, "grad_norm": 10.924814224243164, "learning_rate": 2.1266865612142064e-07, "loss": 0.5616028308868408, "step": 3384 }, { "epoch": 2.8501683501683504, "grad_norm": 4.334954261779785, "learning_rate": 2.1239087327590582e-07, "loss": 0.7617322206497192, "step": 3386 }, { "epoch": 2.851851851851852, "grad_norm": 1.0559417009353638, "learning_rate": 2.121161486173017e-07, "loss": 0.7200487852096558, "step": 3388 }, { "epoch": 2.8535353535353534, "grad_norm": 8.445873260498047, "learning_rate": 2.1184448309215015e-07, "loss": 0.4146542549133301, "step": 3390 }, { "epoch": 2.855218855218855, "grad_norm": 3.8039331436157227, "learning_rate": 2.1157587763645322e-07, "loss": 0.46166175603866577, "step": 3392 }, { "epoch": 2.856902356902357, "grad_norm": 6.415493488311768, "learning_rate": 2.113103331756698e-07, "loss": 0.930475652217865, "step": 3394 }, { "epoch": 2.8585858585858586, "grad_norm": 3.632256507873535, "learning_rate": 2.110478506247122e-07, "loss": 0.9054207801818848, "step": 3396 }, { "epoch": 2.8602693602693603, "grad_norm": 4.30327844619751, "learning_rate": 2.1078843088794325e-07, "loss": 0.4588157534599304, "step": 3398 }, { "epoch": 2.861952861952862, "grad_norm": 7.749840259552002, "learning_rate": 2.105320748591732e-07, "loss": 0.3445073962211609, "step": 3400 }, { "epoch": 2.8636363636363638, "grad_norm": 12.756885528564453, "learning_rate": 2.1027878342165624e-07, "loss": 0.4542715847492218, "step": 3402 }, { "epoch": 2.865319865319865, "grad_norm": 4.2234296798706055, "learning_rate": 2.1002855744808815e-07, "loss": 0.38249820470809937, "step": 3404 }, { "epoch": 2.8670033670033668, "grad_norm": 11.025925636291504, "learning_rate": 2.0978139780060257e-07, "loss": 0.7736653089523315, "step": 3406 }, { "epoch": 2.8686868686868685, "grad_norm": 6.31485652923584, "learning_rate": 2.0953730533076862e-07, "loss": 0.30026775598526, "step": 3408 }, { "epoch": 2.8703703703703702, "grad_norm": 4.0879034996032715, "learning_rate": 2.0929628087958734e-07, "loss": 0.7915642261505127, "step": 3410 }, { "epoch": 2.872053872053872, "grad_norm": 8.910355567932129, "learning_rate": 2.0905832527748953e-07, "loss": 0.4548564851284027, "step": 3412 }, { "epoch": 2.8737373737373737, "grad_norm": 4.792451858520508, "learning_rate": 2.0882343934433236e-07, "loss": 0.6330816745758057, "step": 3414 }, { "epoch": 2.8754208754208754, "grad_norm": 6.679534912109375, "learning_rate": 2.085916238893966e-07, "loss": 0.17160841822624207, "step": 3416 }, { "epoch": 2.877104377104377, "grad_norm": 4.708609104156494, "learning_rate": 2.0836287971138418e-07, "loss": 0.6133572459220886, "step": 3418 }, { "epoch": 2.878787878787879, "grad_norm": 2.8028249740600586, "learning_rate": 2.0813720759841492e-07, "loss": 0.37677788734436035, "step": 3420 }, { "epoch": 2.8804713804713806, "grad_norm": 17.95976448059082, "learning_rate": 2.0791460832802423e-07, "loss": 0.6834679841995239, "step": 3422 }, { "epoch": 2.8821548821548824, "grad_norm": 1.99964439868927, "learning_rate": 2.0769508266716027e-07, "loss": 0.5820834636688232, "step": 3424 }, { "epoch": 2.883838383838384, "grad_norm": 4.93143367767334, "learning_rate": 2.0747863137218126e-07, "loss": 0.6087404489517212, "step": 3426 }, { "epoch": 2.8855218855218854, "grad_norm": 4.417807102203369, "learning_rate": 2.0726525518885308e-07, "loss": 0.5436590909957886, "step": 3428 }, { "epoch": 2.887205387205387, "grad_norm": 17.931697845458984, "learning_rate": 2.0705495485234653e-07, "loss": 0.28521019220352173, "step": 3430 }, { "epoch": 2.888888888888889, "grad_norm": 4.5258026123046875, "learning_rate": 2.0684773108723455e-07, "loss": 0.5188443660736084, "step": 3432 }, { "epoch": 2.8905723905723906, "grad_norm": 7.992106914520264, "learning_rate": 2.0664358460749018e-07, "loss": 0.2710973620414734, "step": 3434 }, { "epoch": 2.8922558922558923, "grad_norm": 2.4972705841064453, "learning_rate": 2.064425161164842e-07, "loss": 0.9403241872787476, "step": 3436 }, { "epoch": 2.893939393939394, "grad_norm": 7.593927383422852, "learning_rate": 2.0624452630698195e-07, "loss": 0.8685269355773926, "step": 3438 }, { "epoch": 2.8956228956228958, "grad_norm": 5.5332746505737305, "learning_rate": 2.0604961586114163e-07, "loss": 0.7080799341201782, "step": 3440 }, { "epoch": 2.897306397306397, "grad_norm": 4.279024600982666, "learning_rate": 2.0585778545051195e-07, "loss": 0.9225847721099854, "step": 3442 }, { "epoch": 2.898989898989899, "grad_norm": 7.960180282592773, "learning_rate": 2.0566903573602913e-07, "loss": 0.26514777541160583, "step": 3444 }, { "epoch": 2.9006734006734005, "grad_norm": 52.408592224121094, "learning_rate": 2.0548336736801548e-07, "loss": 0.5182454586029053, "step": 3446 }, { "epoch": 2.9023569023569022, "grad_norm": 3.880129098892212, "learning_rate": 2.0530078098617668e-07, "loss": 1.0010104179382324, "step": 3448 }, { "epoch": 2.904040404040404, "grad_norm": 5.750271320343018, "learning_rate": 2.0512127721959954e-07, "loss": 0.23654749989509583, "step": 3450 }, { "epoch": 2.9057239057239057, "grad_norm": 4.4567551612854, "learning_rate": 2.0494485668675003e-07, "loss": 0.6079249382019043, "step": 3452 }, { "epoch": 2.9074074074074074, "grad_norm": 13.503162384033203, "learning_rate": 2.0477151999547137e-07, "loss": 0.5366786122322083, "step": 3454 }, { "epoch": 2.909090909090909, "grad_norm": 3.5950307846069336, "learning_rate": 2.0460126774298115e-07, "loss": 0.9563678503036499, "step": 3456 }, { "epoch": 2.910774410774411, "grad_norm": 2.127427339553833, "learning_rate": 2.044341005158701e-07, "loss": 0.7329115867614746, "step": 3458 }, { "epoch": 2.9124579124579126, "grad_norm": 10.821589469909668, "learning_rate": 2.042700188900996e-07, "loss": 0.9082905054092407, "step": 3460 }, { "epoch": 2.9141414141414144, "grad_norm": 11.092399597167969, "learning_rate": 2.0410902343099998e-07, "loss": 1.0648142099380493, "step": 3462 }, { "epoch": 2.915824915824916, "grad_norm": 8.53269100189209, "learning_rate": 2.039511146932683e-07, "loss": 0.6280519962310791, "step": 3464 }, { "epoch": 2.9175084175084174, "grad_norm": 4.54081916809082, "learning_rate": 2.0379629322096658e-07, "loss": 0.9411839246749878, "step": 3466 }, { "epoch": 2.919191919191919, "grad_norm": 3.7969729900360107, "learning_rate": 2.036445595475199e-07, "loss": 0.5461298823356628, "step": 3468 }, { "epoch": 2.920875420875421, "grad_norm": 2.2279632091522217, "learning_rate": 2.0349591419571473e-07, "loss": 0.0855223536491394, "step": 3470 }, { "epoch": 2.9225589225589226, "grad_norm": 8.34626293182373, "learning_rate": 2.0335035767769674e-07, "loss": 0.6720945835113525, "step": 3472 }, { "epoch": 2.9242424242424243, "grad_norm": 4.789892673492432, "learning_rate": 2.032078904949694e-07, "loss": 0.6181377172470093, "step": 3474 }, { "epoch": 2.925925925925926, "grad_norm": 4.624399662017822, "learning_rate": 2.0306851313839217e-07, "loss": 0.25879359245300293, "step": 3476 }, { "epoch": 2.9276094276094278, "grad_norm": 6.712757587432861, "learning_rate": 2.0293222608817862e-07, "loss": 0.7951024770736694, "step": 3478 }, { "epoch": 2.929292929292929, "grad_norm": 4.2503814697265625, "learning_rate": 2.0279902981389491e-07, "loss": 0.4090489447116852, "step": 3480 }, { "epoch": 2.930976430976431, "grad_norm": 4.199467182159424, "learning_rate": 2.026689247744584e-07, "loss": 0.7058537602424622, "step": 3482 }, { "epoch": 2.9326599326599325, "grad_norm": 2.017397165298462, "learning_rate": 2.0254191141813563e-07, "loss": 0.4949754476547241, "step": 3484 }, { "epoch": 2.9343434343434343, "grad_norm": 2.5312118530273438, "learning_rate": 2.0241799018254102e-07, "loss": 0.6103169322013855, "step": 3486 }, { "epoch": 2.936026936026936, "grad_norm": 7.283255577087402, "learning_rate": 2.0229716149463543e-07, "loss": 0.5724541544914246, "step": 3488 }, { "epoch": 2.9377104377104377, "grad_norm": 15.510021209716797, "learning_rate": 2.0217942577072447e-07, "loss": 0.5570365190505981, "step": 3490 }, { "epoch": 2.9393939393939394, "grad_norm": 15.865419387817383, "learning_rate": 2.0206478341645734e-07, "loss": 0.8093217611312866, "step": 3492 }, { "epoch": 2.941077441077441, "grad_norm": 16.84939956665039, "learning_rate": 2.0195323482682508e-07, "loss": 0.40408650040626526, "step": 3494 }, { "epoch": 2.942760942760943, "grad_norm": 2.694458246231079, "learning_rate": 2.0184478038615948e-07, "loss": 0.6976212859153748, "step": 3496 }, { "epoch": 2.9444444444444446, "grad_norm": 6.089773654937744, "learning_rate": 2.0173942046813191e-07, "loss": 0.30283308029174805, "step": 3498 }, { "epoch": 2.9461279461279464, "grad_norm": 17.606487274169922, "learning_rate": 2.016371554357515e-07, "loss": 0.6129805445671082, "step": 3500 }, { "epoch": 2.9478114478114477, "grad_norm": 48.08317565917969, "learning_rate": 2.015379856413643e-07, "loss": 0.6700767278671265, "step": 3502 }, { "epoch": 2.9494949494949494, "grad_norm": 10.773337364196777, "learning_rate": 2.01441911426652e-07, "loss": 0.32376813888549805, "step": 3504 }, { "epoch": 2.951178451178451, "grad_norm": 1.6822550296783447, "learning_rate": 2.013489331226307e-07, "loss": 0.6684743762016296, "step": 3506 }, { "epoch": 2.952861952861953, "grad_norm": 4.8438568115234375, "learning_rate": 2.0125905104964978e-07, "loss": 0.846743106842041, "step": 3508 }, { "epoch": 2.9545454545454546, "grad_norm": 5.908998012542725, "learning_rate": 2.0117226551739068e-07, "loss": 0.6087542772293091, "step": 3510 }, { "epoch": 2.9562289562289563, "grad_norm": 7.448733329772949, "learning_rate": 2.0108857682486629e-07, "loss": 0.8167439103126526, "step": 3512 }, { "epoch": 2.957912457912458, "grad_norm": 9.953859329223633, "learning_rate": 2.0100798526041927e-07, "loss": 0.304475873708725, "step": 3514 }, { "epoch": 2.9595959595959593, "grad_norm": 5.336069107055664, "learning_rate": 2.009304911017215e-07, "loss": 0.8450760841369629, "step": 3516 }, { "epoch": 2.961279461279461, "grad_norm": 3.322150707244873, "learning_rate": 2.0085609461577295e-07, "loss": 0.8154351711273193, "step": 3518 }, { "epoch": 2.962962962962963, "grad_norm": 7.335842132568359, "learning_rate": 2.0078479605890064e-07, "loss": 0.35378673672676086, "step": 3520 }, { "epoch": 2.9646464646464645, "grad_norm": 4.2547783851623535, "learning_rate": 2.007165956767584e-07, "loss": 0.6887914538383484, "step": 3522 }, { "epoch": 2.9663299663299663, "grad_norm": 3.4846153259277344, "learning_rate": 2.00651493704325e-07, "loss": 0.22204965353012085, "step": 3524 }, { "epoch": 2.968013468013468, "grad_norm": 20.680572509765625, "learning_rate": 2.0058949036590426e-07, "loss": 0.8485254645347595, "step": 3526 }, { "epoch": 2.9696969696969697, "grad_norm": 3.527207851409912, "learning_rate": 2.0053058587512378e-07, "loss": 0.7592622637748718, "step": 3528 }, { "epoch": 2.9713804713804715, "grad_norm": 4.903465270996094, "learning_rate": 2.0047478043493418e-07, "loss": 0.7468944191932678, "step": 3530 }, { "epoch": 2.973063973063973, "grad_norm": 6.085175514221191, "learning_rate": 2.004220742376088e-07, "loss": 0.6274712681770325, "step": 3532 }, { "epoch": 2.974747474747475, "grad_norm": 13.613375663757324, "learning_rate": 2.0037246746474277e-07, "loss": 0.19880472123622894, "step": 3534 }, { "epoch": 2.9764309764309766, "grad_norm": 3.277733325958252, "learning_rate": 2.0032596028725204e-07, "loss": 0.8517122268676758, "step": 3536 }, { "epoch": 2.9781144781144784, "grad_norm": 9.69018268585205, "learning_rate": 2.0028255286537355e-07, "loss": 0.4260925352573395, "step": 3538 }, { "epoch": 2.9797979797979797, "grad_norm": 3.108520269393921, "learning_rate": 2.0024224534866408e-07, "loss": 0.9670834541320801, "step": 3540 }, { "epoch": 2.9814814814814814, "grad_norm": 3.3656985759735107, "learning_rate": 2.0020503787599998e-07, "loss": 0.8684190511703491, "step": 3542 }, { "epoch": 2.983164983164983, "grad_norm": 5.216827392578125, "learning_rate": 2.001709305755767e-07, "loss": 0.4294402599334717, "step": 3544 }, { "epoch": 2.984848484848485, "grad_norm": 3.578760862350464, "learning_rate": 2.0013992356490827e-07, "loss": 0.8262860178947449, "step": 3546 }, { "epoch": 2.9865319865319866, "grad_norm": 6.799862861633301, "learning_rate": 2.0011201695082687e-07, "loss": 0.39053958654403687, "step": 3548 }, { "epoch": 2.9882154882154883, "grad_norm": 8.427506446838379, "learning_rate": 2.0008721082948243e-07, "loss": 0.2766346037387848, "step": 3550 }, { "epoch": 2.98989898989899, "grad_norm": 4.960444927215576, "learning_rate": 2.0006550528634258e-07, "loss": 0.5050246715545654, "step": 3552 }, { "epoch": 2.9915824915824913, "grad_norm": 18.282289505004883, "learning_rate": 2.00046900396192e-07, "loss": 0.8541325926780701, "step": 3554 }, { "epoch": 2.993265993265993, "grad_norm": 3.258129358291626, "learning_rate": 2.0003139622313241e-07, "loss": 0.7546226978302002, "step": 3556 }, { "epoch": 2.994949494949495, "grad_norm": 3.466796398162842, "learning_rate": 2.0001899282058216e-07, "loss": 0.6056807041168213, "step": 3558 }, { "epoch": 2.9966329966329965, "grad_norm": 4.726839542388916, "learning_rate": 2.000096902312762e-07, "loss": 0.3962956964969635, "step": 3560 }, { "epoch": 2.9983164983164983, "grad_norm": 5.164308071136475, "learning_rate": 2.0000348848726586e-07, "loss": 0.5580795407295227, "step": 3562 }, { "epoch": 3.0, "grad_norm": 9.059016227722168, "learning_rate": 2.0000038760991877e-07, "loss": 0.46740537881851196, "step": 3564 }, { "epoch": 3.0, "step": 3564, "total_flos": 4.2988160857187287e+18, "train_loss": 0.7857236749096433, "train_runtime": 6229.2125, "train_samples_per_second": 9.154, "train_steps_per_second": 0.572 } ], "logging_steps": 2, "max_steps": 3564, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 99999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.2988160857187287e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }