{ "best_metric": null, "best_model_checkpoint": null, "epoch": 13.0, "eval_steps": 500, "global_step": 126516, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003082297338949964, "grad_norm": 0.5791122913360596, "learning_rate": 1.2325390304026295e-05, "loss": 11.5149, "step": 3 }, { "epoch": 0.0006164594677899928, "grad_norm": 1.7733268737792969, "learning_rate": 2.465078060805259e-05, "loss": 11.504, "step": 6 }, { "epoch": 0.0009246892016849893, "grad_norm": 3.047468423843384, "learning_rate": 3.697617091207888e-05, "loss": 11.4391, "step": 9 }, { "epoch": 0.0012329189355799856, "grad_norm": 3.346104621887207, "learning_rate": 4.930156121610518e-05, "loss": 11.2635, "step": 12 }, { "epoch": 0.001541148669474982, "grad_norm": 3.2847304344177246, "learning_rate": 6.162695152013147e-05, "loss": 10.9864, "step": 15 }, { "epoch": 0.0018493784033699785, "grad_norm": 3.2979729175567627, "learning_rate": 7.395234182415776e-05, "loss": 10.6487, "step": 18 }, { "epoch": 0.0021576081372649747, "grad_norm": 3.445396661758423, "learning_rate": 8.627773212818406e-05, "loss": 10.2667, "step": 21 }, { "epoch": 0.0024658378711599712, "grad_norm": 3.620011568069458, "learning_rate": 9.860312243221036e-05, "loss": 9.8679, "step": 24 }, { "epoch": 0.002774067605054968, "grad_norm": 3.66719913482666, "learning_rate": 0.00011092851273623665, "loss": 9.4532, "step": 27 }, { "epoch": 0.003082297338949964, "grad_norm": 3.709792375564575, "learning_rate": 0.00012325390304026294, "loss": 9.0393, "step": 30 }, { "epoch": 0.0033905270728449605, "grad_norm": 3.5757813453674316, "learning_rate": 0.00013557929334428925, "loss": 8.6449, "step": 33 }, { "epoch": 0.003698756806739957, "grad_norm": 3.200732707977295, "learning_rate": 0.00014790468364831553, "loss": 8.2932, "step": 36 }, { "epoch": 0.004006986540634953, "grad_norm": 2.7463502883911133, "learning_rate": 0.0001602300739523418, "loss": 8.0151, "step": 39 }, { "epoch": 0.004315216274529949, "grad_norm": 1.961776852607727, "learning_rate": 0.00017255546425636812, "loss": 7.7988, "step": 42 }, { "epoch": 0.004623446008424946, "grad_norm": 1.5326848030090332, "learning_rate": 0.0001848808545603944, "loss": 7.6688, "step": 45 }, { "epoch": 0.0049316757423199424, "grad_norm": 2.266148090362549, "learning_rate": 0.00019720624486442071, "loss": 7.6276, "step": 48 }, { "epoch": 0.005239905476214939, "grad_norm": 2.563206672668457, "learning_rate": 0.000209531635168447, "loss": 7.5954, "step": 51 }, { "epoch": 0.005548135210109936, "grad_norm": 1.9326294660568237, "learning_rate": 0.0002218570254724733, "loss": 7.5136, "step": 54 }, { "epoch": 0.005856364944004932, "grad_norm": 1.0131096839904785, "learning_rate": 0.0002341824157764996, "loss": 7.4167, "step": 57 }, { "epoch": 0.006164594677899928, "grad_norm": 1.1923354864120483, "learning_rate": 0.00024650780608052587, "loss": 7.3273, "step": 60 }, { "epoch": 0.006472824411794925, "grad_norm": 1.2690105438232422, "learning_rate": 0.00025883319638455215, "loss": 7.2615, "step": 63 }, { "epoch": 0.006781054145689921, "grad_norm": 0.8375588059425354, "learning_rate": 0.0002711585866885785, "loss": 7.2088, "step": 66 }, { "epoch": 0.007089283879584917, "grad_norm": 0.719011127948761, "learning_rate": 0.0002834839769926048, "loss": 7.1474, "step": 69 }, { "epoch": 0.007397513613479914, "grad_norm": 0.7095780372619629, "learning_rate": 0.00029580936729663106, "loss": 7.0923, "step": 72 }, { "epoch": 0.00770574334737491, "grad_norm": 0.5094121098518372, "learning_rate": 0.00030813475760065734, "loss": 7.0406, "step": 75 }, { "epoch": 0.008013973081269906, "grad_norm": 0.4085525572299957, "learning_rate": 0.0003204601479046836, "loss": 6.9586, "step": 78 }, { "epoch": 0.008322202815164903, "grad_norm": 0.4122146666049957, "learning_rate": 0.0003327855382087099, "loss": 6.9295, "step": 81 }, { "epoch": 0.008630432549059899, "grad_norm": 0.41669413447380066, "learning_rate": 0.00034511092851273624, "loss": 6.8629, "step": 84 }, { "epoch": 0.008938662282954896, "grad_norm": 0.40379494428634644, "learning_rate": 0.0003574363188167625, "loss": 6.7987, "step": 87 }, { "epoch": 0.009246892016849893, "grad_norm": 0.4202142059803009, "learning_rate": 0.0003697617091207888, "loss": 6.7779, "step": 90 }, { "epoch": 0.009555121750744888, "grad_norm": 0.3880140781402588, "learning_rate": 0.0003820870994248151, "loss": 6.7204, "step": 93 }, { "epoch": 0.009863351484639885, "grad_norm": 0.419883668422699, "learning_rate": 0.00039441248972884143, "loss": 6.6553, "step": 96 }, { "epoch": 0.010171581218534882, "grad_norm": 0.4570387601852417, "learning_rate": 0.0004067378800328677, "loss": 6.5936, "step": 99 }, { "epoch": 0.010479810952429877, "grad_norm": 0.5889524221420288, "learning_rate": 0.000419063270336894, "loss": 6.5271, "step": 102 }, { "epoch": 0.010788040686324874, "grad_norm": 0.5578189492225647, "learning_rate": 0.0004313886606409203, "loss": 6.4701, "step": 105 }, { "epoch": 0.011096270420219871, "grad_norm": 0.4710846245288849, "learning_rate": 0.0004437140509449466, "loss": 6.4254, "step": 108 }, { "epoch": 0.011404500154114866, "grad_norm": 0.30558162927627563, "learning_rate": 0.0004560394412489729, "loss": 6.4217, "step": 111 }, { "epoch": 0.011712729888009863, "grad_norm": 0.3634621500968933, "learning_rate": 0.0004683648315529992, "loss": 6.3422, "step": 114 }, { "epoch": 0.01202095962190486, "grad_norm": 0.369437575340271, "learning_rate": 0.00048069022185702546, "loss": 6.303, "step": 117 }, { "epoch": 0.012329189355799856, "grad_norm": 0.7418856024742126, "learning_rate": 0.0004930156121610517, "loss": 6.2535, "step": 120 }, { "epoch": 0.012637419089694853, "grad_norm": 0.855678915977478, "learning_rate": 0.000505341002465078, "loss": 6.2037, "step": 123 }, { "epoch": 0.01294564882358985, "grad_norm": 0.42414671182632446, "learning_rate": 0.0005176663927691043, "loss": 6.175, "step": 126 }, { "epoch": 0.013253878557484845, "grad_norm": 0.44572392106056213, "learning_rate": 0.0005299917830731307, "loss": 6.1548, "step": 129 }, { "epoch": 0.013562108291379842, "grad_norm": 0.24437515437602997, "learning_rate": 0.000542317173377157, "loss": 6.0967, "step": 132 }, { "epoch": 0.013870338025274839, "grad_norm": 0.4750615954399109, "learning_rate": 0.0005546425636811833, "loss": 6.0776, "step": 135 }, { "epoch": 0.014178567759169834, "grad_norm": 0.9504273533821106, "learning_rate": 0.0005669679539852095, "loss": 6.0471, "step": 138 }, { "epoch": 0.014486797493064831, "grad_norm": 0.5452646613121033, "learning_rate": 0.0005792933442892358, "loss": 5.9987, "step": 141 }, { "epoch": 0.014795027226959828, "grad_norm": 0.4314074218273163, "learning_rate": 0.0005916187345932621, "loss": 5.9874, "step": 144 }, { "epoch": 0.015103256960854823, "grad_norm": 0.48243793845176697, "learning_rate": 0.0006039441248972884, "loss": 5.962, "step": 147 }, { "epoch": 0.01541148669474982, "grad_norm": 0.7570855617523193, "learning_rate": 0.0006162695152013147, "loss": 5.921, "step": 150 }, { "epoch": 0.015719716428644816, "grad_norm": 0.4731276333332062, "learning_rate": 0.000628594905505341, "loss": 5.9032, "step": 153 }, { "epoch": 0.016027946162539813, "grad_norm": 0.3078465759754181, "learning_rate": 0.0006409202958093672, "loss": 5.8932, "step": 156 }, { "epoch": 0.01633617589643481, "grad_norm": 0.3441495895385742, "learning_rate": 0.0006532456861133935, "loss": 5.8695, "step": 159 }, { "epoch": 0.016644405630329807, "grad_norm": 0.43056854605674744, "learning_rate": 0.0006655710764174198, "loss": 5.8642, "step": 162 }, { "epoch": 0.016952635364224804, "grad_norm": 1.3783445358276367, "learning_rate": 0.0006778964667214461, "loss": 5.845, "step": 165 }, { "epoch": 0.017260865098119797, "grad_norm": 0.77984619140625, "learning_rate": 0.0006902218570254725, "loss": 5.8216, "step": 168 }, { "epoch": 0.017569094832014794, "grad_norm": 0.2990098297595978, "learning_rate": 0.0007025472473294988, "loss": 5.7801, "step": 171 }, { "epoch": 0.01787732456590979, "grad_norm": 0.29485219717025757, "learning_rate": 0.000714872637633525, "loss": 5.7757, "step": 174 }, { "epoch": 0.018185554299804788, "grad_norm": 0.4363936483860016, "learning_rate": 0.0007271980279375513, "loss": 5.7598, "step": 177 }, { "epoch": 0.018493784033699785, "grad_norm": 0.8902605175971985, "learning_rate": 0.0007395234182415776, "loss": 5.7719, "step": 180 }, { "epoch": 0.018802013767594782, "grad_norm": 0.7133249044418335, "learning_rate": 0.0007518488085456039, "loss": 5.7568, "step": 183 }, { "epoch": 0.019110243501489776, "grad_norm": 0.9105846881866455, "learning_rate": 0.0007641741988496302, "loss": 5.7191, "step": 186 }, { "epoch": 0.019418473235384773, "grad_norm": 0.9680726528167725, "learning_rate": 0.0007764995891536565, "loss": 5.7077, "step": 189 }, { "epoch": 0.01972670296927977, "grad_norm": 0.535446047782898, "learning_rate": 0.0007888249794576829, "loss": 5.6982, "step": 192 }, { "epoch": 0.020034932703174767, "grad_norm": 0.7894541621208191, "learning_rate": 0.0008011503697617091, "loss": 5.668, "step": 195 }, { "epoch": 0.020343162437069764, "grad_norm": 0.6975138187408447, "learning_rate": 0.0008134757600657354, "loss": 5.6432, "step": 198 }, { "epoch": 0.02065139217096476, "grad_norm": 0.6306262016296387, "learning_rate": 0.0008258011503697617, "loss": 5.6647, "step": 201 }, { "epoch": 0.020959621904859754, "grad_norm": 0.5615081787109375, "learning_rate": 0.000838126540673788, "loss": 5.6208, "step": 204 }, { "epoch": 0.02126785163875475, "grad_norm": 0.6468993425369263, "learning_rate": 0.0008504519309778143, "loss": 5.6272, "step": 207 }, { "epoch": 0.02157608137264975, "grad_norm": 0.8359414339065552, "learning_rate": 0.0008627773212818406, "loss": 5.6114, "step": 210 }, { "epoch": 0.021884311106544745, "grad_norm": 0.8909689784049988, "learning_rate": 0.0008751027115858668, "loss": 5.5957, "step": 213 }, { "epoch": 0.022192540840439742, "grad_norm": 0.39673465490341187, "learning_rate": 0.0008874281018898932, "loss": 5.568, "step": 216 }, { "epoch": 0.022500770574334736, "grad_norm": 0.9037743806838989, "learning_rate": 0.0008997534921939195, "loss": 5.5746, "step": 219 }, { "epoch": 0.022809000308229733, "grad_norm": 0.6929497122764587, "learning_rate": 0.0009120788824979458, "loss": 5.5482, "step": 222 }, { "epoch": 0.02311723004212473, "grad_norm": 0.5773665308952332, "learning_rate": 0.0009244042728019721, "loss": 5.5389, "step": 225 }, { "epoch": 0.023425459776019727, "grad_norm": 0.9532020092010498, "learning_rate": 0.0009367296631059984, "loss": 5.5369, "step": 228 }, { "epoch": 0.023733689509914724, "grad_norm": 1.2347012758255005, "learning_rate": 0.0009490550534100246, "loss": 5.5251, "step": 231 }, { "epoch": 0.02404191924380972, "grad_norm": 1.2062091827392578, "learning_rate": 0.0009613804437140509, "loss": 5.5042, "step": 234 }, { "epoch": 0.024350148977704714, "grad_norm": 1.4920969009399414, "learning_rate": 0.0009737058340180772, "loss": 5.4851, "step": 237 }, { "epoch": 0.02465837871159971, "grad_norm": 0.5619600415229797, "learning_rate": 0.0009860312243221035, "loss": 5.4889, "step": 240 }, { "epoch": 0.02496660844549471, "grad_norm": 0.8607615828514099, "learning_rate": 0.0009983566146261299, "loss": 5.4747, "step": 243 }, { "epoch": 0.025274838179389705, "grad_norm": 0.6228588223457336, "learning_rate": 0.001010682004930156, "loss": 5.4502, "step": 246 }, { "epoch": 0.025583067913284702, "grad_norm": 1.1925005912780762, "learning_rate": 0.0010230073952341824, "loss": 5.4449, "step": 249 }, { "epoch": 0.0258912976471797, "grad_norm": 0.7956414818763733, "learning_rate": 0.0010353327855382086, "loss": 5.4623, "step": 252 }, { "epoch": 0.026199527381074693, "grad_norm": 0.654242992401123, "learning_rate": 0.001047658175842235, "loss": 5.4287, "step": 255 }, { "epoch": 0.02650775711496969, "grad_norm": 0.592880368232727, "learning_rate": 0.0010599835661462614, "loss": 5.3891, "step": 258 }, { "epoch": 0.026815986848864687, "grad_norm": 0.9015865921974182, "learning_rate": 0.0010723089564502876, "loss": 5.4127, "step": 261 }, { "epoch": 0.027124216582759684, "grad_norm": 0.593488335609436, "learning_rate": 0.001084634346754314, "loss": 5.3887, "step": 264 }, { "epoch": 0.02743244631665468, "grad_norm": 0.7008156180381775, "learning_rate": 0.0010969597370583401, "loss": 5.386, "step": 267 }, { "epoch": 0.027740676050549678, "grad_norm": 0.32653194665908813, "learning_rate": 0.0011092851273623665, "loss": 5.3479, "step": 270 }, { "epoch": 0.02804890578444467, "grad_norm": 0.551142692565918, "learning_rate": 0.0011216105176663927, "loss": 5.3613, "step": 273 }, { "epoch": 0.02835713551833967, "grad_norm": 2.3521084785461426, "learning_rate": 0.001133935907970419, "loss": 5.3653, "step": 276 }, { "epoch": 0.028665365252234665, "grad_norm": 1.3452407121658325, "learning_rate": 0.0011462612982744455, "loss": 5.3559, "step": 279 }, { "epoch": 0.028973594986129662, "grad_norm": 1.0670260190963745, "learning_rate": 0.0011585866885784717, "loss": 5.3299, "step": 282 }, { "epoch": 0.02928182472002466, "grad_norm": 0.7768902778625488, "learning_rate": 0.001170912078882498, "loss": 5.3346, "step": 285 }, { "epoch": 0.029590054453919656, "grad_norm": 0.48641496896743774, "learning_rate": 0.0011832374691865242, "loss": 5.3178, "step": 288 }, { "epoch": 0.02989828418781465, "grad_norm": 0.5284126400947571, "learning_rate": 0.0011955628594905506, "loss": 5.3061, "step": 291 }, { "epoch": 0.030206513921709647, "grad_norm": 0.9099608659744263, "learning_rate": 0.0012078882497945768, "loss": 5.2764, "step": 294 }, { "epoch": 0.030514743655604644, "grad_norm": 0.7352691888809204, "learning_rate": 0.0012202136400986032, "loss": 5.2853, "step": 297 }, { "epoch": 0.03082297338949964, "grad_norm": 0.8361043334007263, "learning_rate": 0.0012325390304026294, "loss": 5.2838, "step": 300 }, { "epoch": 0.031131203123394638, "grad_norm": 1.525067925453186, "learning_rate": 0.0012448644207066558, "loss": 5.2612, "step": 303 }, { "epoch": 0.03143943285728963, "grad_norm": 0.6117688417434692, "learning_rate": 0.001257189811010682, "loss": 5.2488, "step": 306 }, { "epoch": 0.03174766259118463, "grad_norm": 0.9976358413696289, "learning_rate": 0.0012695152013147083, "loss": 5.2327, "step": 309 }, { "epoch": 0.032055892325079625, "grad_norm": 0.8152816891670227, "learning_rate": 0.0012818405916187345, "loss": 5.2095, "step": 312 }, { "epoch": 0.03236412205897462, "grad_norm": 0.8640046715736389, "learning_rate": 0.0012941659819227609, "loss": 5.1932, "step": 315 }, { "epoch": 0.03267235179286962, "grad_norm": 0.9461572170257568, "learning_rate": 0.001306491372226787, "loss": 5.1822, "step": 318 }, { "epoch": 0.03298058152676461, "grad_norm": 0.7717807292938232, "learning_rate": 0.0013188167625308134, "loss": 5.183, "step": 321 }, { "epoch": 0.03328881126065961, "grad_norm": 0.9057526588439941, "learning_rate": 0.0013311421528348396, "loss": 5.1686, "step": 324 }, { "epoch": 0.03359704099455461, "grad_norm": 0.5352618098258972, "learning_rate": 0.001343467543138866, "loss": 5.1378, "step": 327 }, { "epoch": 0.03390527072844961, "grad_norm": 1.2399810552597046, "learning_rate": 0.0013557929334428922, "loss": 5.1436, "step": 330 }, { "epoch": 0.0342135004623446, "grad_norm": 0.6678963303565979, "learning_rate": 0.0013681183237469186, "loss": 5.1488, "step": 333 }, { "epoch": 0.034521730196239594, "grad_norm": 0.6166791915893555, "learning_rate": 0.001380443714050945, "loss": 5.1239, "step": 336 }, { "epoch": 0.034829959930134595, "grad_norm": 1.1305850744247437, "learning_rate": 0.0013927691043549711, "loss": 5.1145, "step": 339 }, { "epoch": 0.03513818966402959, "grad_norm": 0.46510085463523865, "learning_rate": 0.0014050944946589975, "loss": 5.1041, "step": 342 }, { "epoch": 0.03544641939792459, "grad_norm": 0.4835362136363983, "learning_rate": 0.0014174198849630237, "loss": 5.0699, "step": 345 }, { "epoch": 0.03575464913181958, "grad_norm": 0.6595330238342285, "learning_rate": 0.00142974527526705, "loss": 5.0744, "step": 348 }, { "epoch": 0.036062878865714576, "grad_norm": 0.7306437492370605, "learning_rate": 0.0014420706655710763, "loss": 5.0703, "step": 351 }, { "epoch": 0.036371108599609576, "grad_norm": 0.5263068079948425, "learning_rate": 0.0014543960558751027, "loss": 5.0666, "step": 354 }, { "epoch": 0.03667933833350457, "grad_norm": 0.5896726250648499, "learning_rate": 0.001466721446179129, "loss": 5.0326, "step": 357 }, { "epoch": 0.03698756806739957, "grad_norm": 0.9357500672340393, "learning_rate": 0.0014790468364831552, "loss": 5.0363, "step": 360 }, { "epoch": 0.037295797801294564, "grad_norm": 0.7629897594451904, "learning_rate": 0.0014913722267871816, "loss": 5.0304, "step": 363 }, { "epoch": 0.037604027535189564, "grad_norm": 0.6347280144691467, "learning_rate": 0.0015036976170912078, "loss": 4.9962, "step": 366 }, { "epoch": 0.03791225726908456, "grad_norm": 0.4810947775840759, "learning_rate": 0.0015160230073952342, "loss": 4.9856, "step": 369 }, { "epoch": 0.03822048700297955, "grad_norm": 0.5907162427902222, "learning_rate": 0.0015283483976992604, "loss": 4.9712, "step": 372 }, { "epoch": 0.03852871673687455, "grad_norm": 0.5781192183494568, "learning_rate": 0.0015406737880032868, "loss": 5.0078, "step": 375 }, { "epoch": 0.038836946470769546, "grad_norm": 0.6017566323280334, "learning_rate": 0.001552999178307313, "loss": 4.9563, "step": 378 }, { "epoch": 0.039145176204664546, "grad_norm": 1.208348035812378, "learning_rate": 0.0015653245686113393, "loss": 4.9696, "step": 381 }, { "epoch": 0.03945340593855954, "grad_norm": 0.6113926768302917, "learning_rate": 0.0015776499589153657, "loss": 4.9461, "step": 384 }, { "epoch": 0.03976163567245453, "grad_norm": 0.6794010996818542, "learning_rate": 0.0015899753492193919, "loss": 4.9668, "step": 387 }, { "epoch": 0.040069865406349534, "grad_norm": 0.4383271038532257, "learning_rate": 0.0016023007395234183, "loss": 4.9283, "step": 390 }, { "epoch": 0.04037809514024453, "grad_norm": 0.9564613699913025, "learning_rate": 0.0016146261298274444, "loss": 4.8814, "step": 393 }, { "epoch": 0.04068632487413953, "grad_norm": 0.6730177402496338, "learning_rate": 0.0016269515201314708, "loss": 4.9158, "step": 396 }, { "epoch": 0.04099455460803452, "grad_norm": 0.5306158661842346, "learning_rate": 0.001639276910435497, "loss": 4.904, "step": 399 }, { "epoch": 0.04130278434192952, "grad_norm": 0.48708540201187134, "learning_rate": 0.0016516023007395234, "loss": 4.9002, "step": 402 }, { "epoch": 0.041611014075824515, "grad_norm": 0.4917944371700287, "learning_rate": 0.0016639276910435496, "loss": 4.8913, "step": 405 }, { "epoch": 0.04191924380971951, "grad_norm": 1.0929678678512573, "learning_rate": 0.001676253081347576, "loss": 4.8986, "step": 408 }, { "epoch": 0.04222747354361451, "grad_norm": 0.5417898297309875, "learning_rate": 0.0016885784716516024, "loss": 4.8702, "step": 411 }, { "epoch": 0.0425357032775095, "grad_norm": 1.1427472829818726, "learning_rate": 0.0017009038619556285, "loss": 4.8396, "step": 414 }, { "epoch": 0.0428439330114045, "grad_norm": 0.8225170969963074, "learning_rate": 0.001713229252259655, "loss": 4.8439, "step": 417 }, { "epoch": 0.0431521627452995, "grad_norm": 0.5638198256492615, "learning_rate": 0.001725554642563681, "loss": 4.8271, "step": 420 }, { "epoch": 0.04346039247919449, "grad_norm": 0.3389821946620941, "learning_rate": 0.0017378800328677075, "loss": 4.8207, "step": 423 }, { "epoch": 0.04376862221308949, "grad_norm": 0.38620057702064514, "learning_rate": 0.0017502054231717337, "loss": 4.8082, "step": 426 }, { "epoch": 0.044076851946984484, "grad_norm": 1.1568442583084106, "learning_rate": 0.00176253081347576, "loss": 4.7946, "step": 429 }, { "epoch": 0.044385081680879485, "grad_norm": 0.650175154209137, "learning_rate": 0.0017748562037797865, "loss": 4.7798, "step": 432 }, { "epoch": 0.04469331141477448, "grad_norm": 0.5364396572113037, "learning_rate": 0.0017871815940838126, "loss": 4.7732, "step": 435 }, { "epoch": 0.04500154114866947, "grad_norm": 0.7013806700706482, "learning_rate": 0.001799506984387839, "loss": 4.7733, "step": 438 }, { "epoch": 0.04530977088256447, "grad_norm": 0.4559784233570099, "learning_rate": 0.0018118323746918652, "loss": 4.7789, "step": 441 }, { "epoch": 0.045618000616459466, "grad_norm": 0.3456243872642517, "learning_rate": 0.0018241577649958916, "loss": 4.7456, "step": 444 }, { "epoch": 0.045926230350354466, "grad_norm": 0.6245532631874084, "learning_rate": 0.0018364831552999178, "loss": 4.7408, "step": 447 }, { "epoch": 0.04623446008424946, "grad_norm": 1.1933598518371582, "learning_rate": 0.0018488085456039441, "loss": 4.7728, "step": 450 }, { "epoch": 0.04654268981814446, "grad_norm": 0.8743248581886292, "learning_rate": 0.0018611339359079703, "loss": 4.7595, "step": 453 }, { "epoch": 0.046850919552039454, "grad_norm": 0.4980567693710327, "learning_rate": 0.0018734593262119967, "loss": 4.7222, "step": 456 }, { "epoch": 0.04715914928593445, "grad_norm": 0.6380690932273865, "learning_rate": 0.001885784716516023, "loss": 4.7175, "step": 459 }, { "epoch": 0.04746737901982945, "grad_norm": 0.3606894612312317, "learning_rate": 0.0018981101068200493, "loss": 4.7075, "step": 462 }, { "epoch": 0.04777560875372444, "grad_norm": 0.5618919730186462, "learning_rate": 0.0019104354971240757, "loss": 4.6939, "step": 465 }, { "epoch": 0.04808383848761944, "grad_norm": 0.639410138130188, "learning_rate": 0.0019227608874281018, "loss": 4.6748, "step": 468 }, { "epoch": 0.048392068221514435, "grad_norm": 0.7849680185317993, "learning_rate": 0.0019350862777321282, "loss": 4.6895, "step": 471 }, { "epoch": 0.04870029795540943, "grad_norm": 0.5419800877571106, "learning_rate": 0.0019474116680361544, "loss": 4.64, "step": 474 }, { "epoch": 0.04900852768930443, "grad_norm": 0.40359726548194885, "learning_rate": 0.001959737058340181, "loss": 4.6564, "step": 477 }, { "epoch": 0.04931675742319942, "grad_norm": 0.742076575756073, "learning_rate": 0.001972062448644207, "loss": 4.6434, "step": 480 }, { "epoch": 0.04962498715709442, "grad_norm": 0.620801568031311, "learning_rate": 0.0019843878389482336, "loss": 4.6509, "step": 483 }, { "epoch": 0.04993321689098942, "grad_norm": 0.5293563008308411, "learning_rate": 0.0019967132292522598, "loss": 4.6459, "step": 486 }, { "epoch": 0.05024144662488442, "grad_norm": 0.7527710795402527, "learning_rate": 0.002009038619556286, "loss": 4.6557, "step": 489 }, { "epoch": 0.05054967635877941, "grad_norm": 0.47365424036979675, "learning_rate": 0.002021364009860312, "loss": 4.6223, "step": 492 }, { "epoch": 0.050857906092674404, "grad_norm": 0.5232967734336853, "learning_rate": 0.0020336894001643387, "loss": 4.6186, "step": 495 }, { "epoch": 0.051166135826569405, "grad_norm": 0.40717506408691406, "learning_rate": 0.002046014790468365, "loss": 4.6125, "step": 498 }, { "epoch": 0.0514743655604644, "grad_norm": 0.5403701066970825, "learning_rate": 0.002058340180772391, "loss": 4.6143, "step": 501 }, { "epoch": 0.0517825952943594, "grad_norm": 0.7209203839302063, "learning_rate": 0.0020706655710764172, "loss": 4.5713, "step": 504 }, { "epoch": 0.05209082502825439, "grad_norm": 0.6991008520126343, "learning_rate": 0.002082990961380444, "loss": 4.6044, "step": 507 }, { "epoch": 0.052399054762149386, "grad_norm": 0.7478086352348328, "learning_rate": 0.00209531635168447, "loss": 4.5685, "step": 510 }, { "epoch": 0.052707284496044386, "grad_norm": 0.5864932537078857, "learning_rate": 0.002107641741988496, "loss": 4.588, "step": 513 }, { "epoch": 0.05301551422993938, "grad_norm": 0.44748950004577637, "learning_rate": 0.002119967132292523, "loss": 4.5823, "step": 516 }, { "epoch": 0.05332374396383438, "grad_norm": 0.32787564396858215, "learning_rate": 0.002132292522596549, "loss": 4.5522, "step": 519 }, { "epoch": 0.053631973697729374, "grad_norm": 0.30747687816619873, "learning_rate": 0.002144617912900575, "loss": 4.5429, "step": 522 }, { "epoch": 0.05394020343162437, "grad_norm": 0.3548784554004669, "learning_rate": 0.0021569433032046013, "loss": 4.5207, "step": 525 }, { "epoch": 0.05424843316551937, "grad_norm": 0.6617491841316223, "learning_rate": 0.002169268693508628, "loss": 4.5373, "step": 528 }, { "epoch": 0.05455666289941436, "grad_norm": 0.9917429089546204, "learning_rate": 0.002181594083812654, "loss": 4.5504, "step": 531 }, { "epoch": 0.05486489263330936, "grad_norm": 0.6506537795066833, "learning_rate": 0.0021939194741166803, "loss": 4.5385, "step": 534 }, { "epoch": 0.055173122367204355, "grad_norm": 0.3738003075122833, "learning_rate": 0.002206244864420707, "loss": 4.5169, "step": 537 }, { "epoch": 0.055481352101099356, "grad_norm": 0.3488200008869171, "learning_rate": 0.002218570254724733, "loss": 4.5119, "step": 540 }, { "epoch": 0.05578958183499435, "grad_norm": 0.31217944622039795, "learning_rate": 0.0022308956450287592, "loss": 4.4796, "step": 543 }, { "epoch": 0.05609781156888934, "grad_norm": 0.26770153641700745, "learning_rate": 0.0022432210353327854, "loss": 4.4699, "step": 546 }, { "epoch": 0.05640604130278434, "grad_norm": 0.3656662702560425, "learning_rate": 0.002255546425636812, "loss": 4.4817, "step": 549 }, { "epoch": 0.05671427103667934, "grad_norm": 0.5845988392829895, "learning_rate": 0.002267871815940838, "loss": 4.4525, "step": 552 }, { "epoch": 0.05702250077057434, "grad_norm": 0.41006627678871155, "learning_rate": 0.0022801972062448644, "loss": 4.4649, "step": 555 }, { "epoch": 0.05733073050446933, "grad_norm": 1.2013694047927856, "learning_rate": 0.002292522596548891, "loss": 4.4666, "step": 558 }, { "epoch": 0.057638960238364324, "grad_norm": 0.6116489171981812, "learning_rate": 0.002304847986852917, "loss": 4.4853, "step": 561 }, { "epoch": 0.057947189972259325, "grad_norm": 0.30115845799446106, "learning_rate": 0.0023171733771569433, "loss": 4.4409, "step": 564 }, { "epoch": 0.05825541970615432, "grad_norm": 0.2863396108150482, "learning_rate": 0.0023294987674609695, "loss": 4.4358, "step": 567 }, { "epoch": 0.05856364944004932, "grad_norm": 0.3191300928592682, "learning_rate": 0.002341824157764996, "loss": 4.454, "step": 570 }, { "epoch": 0.05887187917394431, "grad_norm": 0.4280944764614105, "learning_rate": 0.0023541495480690223, "loss": 4.3943, "step": 573 }, { "epoch": 0.05918010890783931, "grad_norm": 0.49310484528541565, "learning_rate": 0.0023664749383730485, "loss": 4.4097, "step": 576 }, { "epoch": 0.059488338641734306, "grad_norm": 0.4923991858959198, "learning_rate": 0.002378800328677075, "loss": 4.4454, "step": 579 }, { "epoch": 0.0597965683756293, "grad_norm": 0.5043625235557556, "learning_rate": 0.0023911257189811012, "loss": 4.3975, "step": 582 }, { "epoch": 0.0601047981095243, "grad_norm": 0.5404270887374878, "learning_rate": 0.0024034511092851274, "loss": 4.3957, "step": 585 }, { "epoch": 0.060413027843419294, "grad_norm": 0.9954332709312439, "learning_rate": 0.0024157764995891536, "loss": 4.3864, "step": 588 }, { "epoch": 0.060721257577314294, "grad_norm": 0.3632584512233734, "learning_rate": 0.00242810188989318, "loss": 4.38, "step": 591 }, { "epoch": 0.06102948731120929, "grad_norm": 0.2620343267917633, "learning_rate": 0.0024404272801972064, "loss": 4.3538, "step": 594 }, { "epoch": 0.06133771704510428, "grad_norm": 0.25050923228263855, "learning_rate": 0.0024527526705012325, "loss": 4.351, "step": 597 }, { "epoch": 0.06164594677899928, "grad_norm": 0.27279627323150635, "learning_rate": 0.0024650780608052587, "loss": 4.3335, "step": 600 }, { "epoch": 0.061954176512894275, "grad_norm": 0.6038771271705627, "learning_rate": 0.0024774034511092853, "loss": 4.3409, "step": 603 }, { "epoch": 0.062262406246789276, "grad_norm": 0.6948337554931641, "learning_rate": 0.0024897288414133115, "loss": 4.3555, "step": 606 }, { "epoch": 0.06257063598068427, "grad_norm": 0.5086238980293274, "learning_rate": 0.0025020542317173377, "loss": 4.3491, "step": 609 }, { "epoch": 0.06287886571457926, "grad_norm": 0.475999116897583, "learning_rate": 0.002514379622021364, "loss": 4.3412, "step": 612 }, { "epoch": 0.06318709544847426, "grad_norm": 0.3968357741832733, "learning_rate": 0.0025267050123253905, "loss": 4.3139, "step": 615 }, { "epoch": 0.06349532518236926, "grad_norm": 0.6681760549545288, "learning_rate": 0.0025390304026294166, "loss": 4.2999, "step": 618 }, { "epoch": 0.06380355491626426, "grad_norm": 0.3453294634819031, "learning_rate": 0.002551355792933443, "loss": 4.2873, "step": 621 }, { "epoch": 0.06411178465015925, "grad_norm": 0.3346744775772095, "learning_rate": 0.002563681183237469, "loss": 4.2868, "step": 624 }, { "epoch": 0.06442001438405424, "grad_norm": 0.39689645171165466, "learning_rate": 0.0025760065735414956, "loss": 4.2846, "step": 627 }, { "epoch": 0.06472824411794924, "grad_norm": 0.4017212688922882, "learning_rate": 0.0025883319638455218, "loss": 4.2625, "step": 630 }, { "epoch": 0.06503647385184425, "grad_norm": 0.3414025902748108, "learning_rate": 0.0026006573541495484, "loss": 4.2657, "step": 633 }, { "epoch": 0.06534470358573924, "grad_norm": 0.4091610312461853, "learning_rate": 0.002612982744453574, "loss": 4.2414, "step": 636 }, { "epoch": 0.06565293331963423, "grad_norm": 0.3916926085948944, "learning_rate": 0.0026253081347576007, "loss": 4.1801, "step": 639 }, { "epoch": 0.06596116305352923, "grad_norm": 1.0324465036392212, "learning_rate": 0.002637633525061627, "loss": 4.2162, "step": 642 }, { "epoch": 0.06626939278742423, "grad_norm": 0.4595172107219696, "learning_rate": 0.0026499589153656535, "loss": 4.2352, "step": 645 }, { "epoch": 0.06657762252131923, "grad_norm": 0.3215947151184082, "learning_rate": 0.0026622843056696792, "loss": 4.1865, "step": 648 }, { "epoch": 0.06688585225521422, "grad_norm": 0.2739149034023285, "learning_rate": 0.002674609695973706, "loss": 4.1644, "step": 651 }, { "epoch": 0.06719408198910921, "grad_norm": 0.250794917345047, "learning_rate": 0.002686935086277732, "loss": 4.1164, "step": 654 }, { "epoch": 0.06750231172300421, "grad_norm": 0.38465654850006104, "learning_rate": 0.0026992604765817586, "loss": 4.0844, "step": 657 }, { "epoch": 0.06781054145689921, "grad_norm": 0.5341691970825195, "learning_rate": 0.0027115858668857844, "loss": 4.1149, "step": 660 }, { "epoch": 0.06811877119079421, "grad_norm": 0.3479110896587372, "learning_rate": 0.002723911257189811, "loss": 4.1186, "step": 663 }, { "epoch": 0.0684270009246892, "grad_norm": 1.026038646697998, "learning_rate": 0.002736236647493837, "loss": 4.1293, "step": 666 }, { "epoch": 0.0687352306585842, "grad_norm": 0.445689857006073, "learning_rate": 0.0027485620377978638, "loss": 4.1319, "step": 669 }, { "epoch": 0.06904346039247919, "grad_norm": 0.3061058819293976, "learning_rate": 0.00276088742810189, "loss": 4.0338, "step": 672 }, { "epoch": 0.0693516901263742, "grad_norm": 0.26792746782302856, "learning_rate": 0.002773212818405916, "loss": 4.0154, "step": 675 }, { "epoch": 0.06965991986026919, "grad_norm": 0.2843894064426422, "learning_rate": 0.0027855382087099423, "loss": 4.0364, "step": 678 }, { "epoch": 0.06996814959416418, "grad_norm": 0.3073459565639496, "learning_rate": 0.002797863599013969, "loss": 3.9836, "step": 681 }, { "epoch": 0.07027637932805918, "grad_norm": 0.5893545746803284, "learning_rate": 0.002810188989317995, "loss": 4.0062, "step": 684 }, { "epoch": 0.07058460906195417, "grad_norm": 0.5386547446250916, "learning_rate": 0.0028225143796220217, "loss": 4.0066, "step": 687 }, { "epoch": 0.07089283879584918, "grad_norm": 0.7944250106811523, "learning_rate": 0.0028348397699260474, "loss": 3.9816, "step": 690 }, { "epoch": 0.07120106852974417, "grad_norm": 0.32200196385383606, "learning_rate": 0.002847165160230074, "loss": 3.9551, "step": 693 }, { "epoch": 0.07150929826363916, "grad_norm": 0.28814995288848877, "learning_rate": 0.0028594905505341, "loss": 3.912, "step": 696 }, { "epoch": 0.07181752799753416, "grad_norm": 0.2727998197078705, "learning_rate": 0.002871815940838127, "loss": 3.9203, "step": 699 }, { "epoch": 0.07212575773142915, "grad_norm": 0.2785607576370239, "learning_rate": 0.0028841413311421525, "loss": 3.8865, "step": 702 }, { "epoch": 0.07243398746532416, "grad_norm": 0.4318368136882782, "learning_rate": 0.002896466721446179, "loss": 3.8364, "step": 705 }, { "epoch": 0.07274221719921915, "grad_norm": 0.5888954997062683, "learning_rate": 0.0029087921117502053, "loss": 3.8772, "step": 708 }, { "epoch": 0.07305044693311415, "grad_norm": 0.5866847634315491, "learning_rate": 0.002921117502054232, "loss": 3.9039, "step": 711 }, { "epoch": 0.07335867666700914, "grad_norm": 0.40300968289375305, "learning_rate": 0.002933442892358258, "loss": 3.8332, "step": 714 }, { "epoch": 0.07366690640090415, "grad_norm": 0.2894107401371002, "learning_rate": 0.0029457682826622843, "loss": 3.8533, "step": 717 }, { "epoch": 0.07397513613479914, "grad_norm": 0.2637479901313782, "learning_rate": 0.0029580936729663105, "loss": 3.7962, "step": 720 }, { "epoch": 0.07428336586869413, "grad_norm": 0.5004228353500366, "learning_rate": 0.002970419063270337, "loss": 3.7759, "step": 723 }, { "epoch": 0.07459159560258913, "grad_norm": 0.30835986137390137, "learning_rate": 0.0029827444535743632, "loss": 3.7819, "step": 726 }, { "epoch": 0.07489982533648412, "grad_norm": 0.5601911544799805, "learning_rate": 0.00299506984387839, "loss": 3.7716, "step": 729 }, { "epoch": 0.07520805507037913, "grad_norm": 0.48242396116256714, "learning_rate": 0.0030073952341824156, "loss": 3.803, "step": 732 }, { "epoch": 0.07551628480427412, "grad_norm": 0.355916827917099, "learning_rate": 0.003019720624486442, "loss": 3.7532, "step": 735 }, { "epoch": 0.07582451453816912, "grad_norm": 0.4205069839954376, "learning_rate": 0.0030320460147904684, "loss": 3.7657, "step": 738 }, { "epoch": 0.07613274427206411, "grad_norm": 0.35680562257766724, "learning_rate": 0.003044371405094495, "loss": 3.7348, "step": 741 }, { "epoch": 0.0764409740059591, "grad_norm": 0.36372673511505127, "learning_rate": 0.0030566967953985207, "loss": 3.7569, "step": 744 }, { "epoch": 0.07674920373985411, "grad_norm": 0.2887914776802063, "learning_rate": 0.0030690221857025473, "loss": 3.7704, "step": 747 }, { "epoch": 0.0770574334737491, "grad_norm": 0.255290687084198, "learning_rate": 0.0030813475760065735, "loss": 3.7054, "step": 750 }, { "epoch": 0.0773656632076441, "grad_norm": 0.2969897389411926, "learning_rate": 0.0030936729663106, "loss": 3.708, "step": 753 }, { "epoch": 0.07767389294153909, "grad_norm": 0.491763710975647, "learning_rate": 0.003105998356614626, "loss": 3.7279, "step": 756 }, { "epoch": 0.07798212267543408, "grad_norm": 0.6437285542488098, "learning_rate": 0.0031183237469186525, "loss": 3.7129, "step": 759 }, { "epoch": 0.07829035240932909, "grad_norm": 0.3605806827545166, "learning_rate": 0.0031306491372226786, "loss": 3.6754, "step": 762 }, { "epoch": 0.07859858214322409, "grad_norm": 0.26162126660346985, "learning_rate": 0.0031429745275267052, "loss": 3.6869, "step": 765 }, { "epoch": 0.07890681187711908, "grad_norm": 0.3107220530509949, "learning_rate": 0.0031552999178307314, "loss": 3.6278, "step": 768 }, { "epoch": 0.07921504161101407, "grad_norm": 0.30417200922966003, "learning_rate": 0.0031676253081347576, "loss": 3.6046, "step": 771 }, { "epoch": 0.07952327134490907, "grad_norm": 0.5612326860427856, "learning_rate": 0.0031799506984387838, "loss": 3.6578, "step": 774 }, { "epoch": 0.07983150107880407, "grad_norm": 0.6136355996131897, "learning_rate": 0.0031922760887428104, "loss": 3.6897, "step": 777 }, { "epoch": 0.08013973081269907, "grad_norm": 0.4560060501098633, "learning_rate": 0.0032046014790468366, "loss": 3.6976, "step": 780 }, { "epoch": 0.08044796054659406, "grad_norm": 0.23871034383773804, "learning_rate": 0.003216926869350863, "loss": 3.657, "step": 783 }, { "epoch": 0.08075619028048905, "grad_norm": 0.17063000798225403, "learning_rate": 0.003229252259654889, "loss": 3.5905, "step": 786 }, { "epoch": 0.08106442001438405, "grad_norm": 0.35351842641830444, "learning_rate": 0.0032415776499589155, "loss": 3.603, "step": 789 }, { "epoch": 0.08137264974827906, "grad_norm": 0.340762197971344, "learning_rate": 0.0032539030402629417, "loss": 3.5978, "step": 792 }, { "epoch": 0.08168087948217405, "grad_norm": 0.22542034089565277, "learning_rate": 0.0032662284305669683, "loss": 3.5821, "step": 795 }, { "epoch": 0.08198910921606904, "grad_norm": 0.25130555033683777, "learning_rate": 0.003278553820870994, "loss": 3.5491, "step": 798 }, { "epoch": 0.08229733894996404, "grad_norm": 0.5155714750289917, "learning_rate": 0.0032908792111750206, "loss": 3.5605, "step": 801 }, { "epoch": 0.08260556868385904, "grad_norm": 0.3964254856109619, "learning_rate": 0.003303204601479047, "loss": 3.581, "step": 804 }, { "epoch": 0.08291379841775404, "grad_norm": 0.27110666036605835, "learning_rate": 0.0033155299917830734, "loss": 3.5995, "step": 807 }, { "epoch": 0.08322202815164903, "grad_norm": 0.38535767793655396, "learning_rate": 0.003327855382087099, "loss": 3.6029, "step": 810 }, { "epoch": 0.08353025788554402, "grad_norm": 0.6176712512969971, "learning_rate": 0.0033401807723911258, "loss": 3.5694, "step": 813 }, { "epoch": 0.08383848761943902, "grad_norm": 0.33828550577163696, "learning_rate": 0.003352506162695152, "loss": 3.5507, "step": 816 }, { "epoch": 0.08414671735333402, "grad_norm": 0.2286808043718338, "learning_rate": 0.0033648315529991786, "loss": 3.5345, "step": 819 }, { "epoch": 0.08445494708722902, "grad_norm": 0.30232542753219604, "learning_rate": 0.0033771569433032047, "loss": 3.5154, "step": 822 }, { "epoch": 0.08476317682112401, "grad_norm": 0.31767842173576355, "learning_rate": 0.0033894823336072313, "loss": 3.5442, "step": 825 }, { "epoch": 0.085071406555019, "grad_norm": 0.4275444746017456, "learning_rate": 0.003401807723911257, "loss": 3.5715, "step": 828 }, { "epoch": 0.085379636288914, "grad_norm": 0.3426364064216614, "learning_rate": 0.0034141331142152837, "loss": 3.5224, "step": 831 }, { "epoch": 0.085687866022809, "grad_norm": 0.33871403336524963, "learning_rate": 0.00342645850451931, "loss": 3.5119, "step": 834 }, { "epoch": 0.085996095756704, "grad_norm": 0.2641143202781677, "learning_rate": 0.0034387838948233365, "loss": 3.5179, "step": 837 }, { "epoch": 0.086304325490599, "grad_norm": 0.22955679893493652, "learning_rate": 0.003451109285127362, "loss": 3.4807, "step": 840 }, { "epoch": 0.08661255522449399, "grad_norm": 0.3795819878578186, "learning_rate": 0.003463434675431389, "loss": 3.4916, "step": 843 }, { "epoch": 0.08692078495838898, "grad_norm": 0.2942325174808502, "learning_rate": 0.003475760065735415, "loss": 3.4601, "step": 846 }, { "epoch": 0.08722901469228399, "grad_norm": 0.49732574820518494, "learning_rate": 0.0034880854560394416, "loss": 3.5021, "step": 849 }, { "epoch": 0.08753724442617898, "grad_norm": 0.4395911991596222, "learning_rate": 0.0035004108463434673, "loss": 3.4976, "step": 852 }, { "epoch": 0.08784547416007397, "grad_norm": 0.24201816320419312, "learning_rate": 0.003512736236647494, "loss": 3.4648, "step": 855 }, { "epoch": 0.08815370389396897, "grad_norm": 0.32818078994750977, "learning_rate": 0.00352506162695152, "loss": 3.4826, "step": 858 }, { "epoch": 0.08846193362786396, "grad_norm": 0.4433400630950928, "learning_rate": 0.0035373870172555467, "loss": 3.4763, "step": 861 }, { "epoch": 0.08877016336175897, "grad_norm": 0.2911035716533661, "learning_rate": 0.003549712407559573, "loss": 3.5024, "step": 864 }, { "epoch": 0.08907839309565396, "grad_norm": 0.27419009804725647, "learning_rate": 0.003562037797863599, "loss": 3.4382, "step": 867 }, { "epoch": 0.08938662282954896, "grad_norm": 0.2970244586467743, "learning_rate": 0.0035743631881676253, "loss": 3.4362, "step": 870 }, { "epoch": 0.08969485256344395, "grad_norm": 0.34221401810646057, "learning_rate": 0.003586688578471652, "loss": 3.4469, "step": 873 }, { "epoch": 0.09000308229733894, "grad_norm": 0.31807199120521545, "learning_rate": 0.003599013968775678, "loss": 3.3974, "step": 876 }, { "epoch": 0.09031131203123395, "grad_norm": 0.31519362330436707, "learning_rate": 0.0036113393590797046, "loss": 3.4275, "step": 879 }, { "epoch": 0.09061954176512894, "grad_norm": 0.5152423977851868, "learning_rate": 0.0036236647493837304, "loss": 3.4468, "step": 882 }, { "epoch": 0.09092777149902394, "grad_norm": 0.32447418570518494, "learning_rate": 0.003635990139687757, "loss": 3.4505, "step": 885 }, { "epoch": 0.09123600123291893, "grad_norm": 0.19884614646434784, "learning_rate": 0.003648315529991783, "loss": 3.4228, "step": 888 }, { "epoch": 0.09154423096681394, "grad_norm": 0.2726935148239136, "learning_rate": 0.0036606409202958098, "loss": 3.3957, "step": 891 }, { "epoch": 0.09185246070070893, "grad_norm": 0.29470425844192505, "learning_rate": 0.0036729663105998355, "loss": 3.3813, "step": 894 }, { "epoch": 0.09216069043460393, "grad_norm": 0.27806392312049866, "learning_rate": 0.003685291700903862, "loss": 3.3871, "step": 897 }, { "epoch": 0.09246892016849892, "grad_norm": 0.23773950338363647, "learning_rate": 0.0036976170912078883, "loss": 3.3941, "step": 900 }, { "epoch": 0.09277714990239391, "grad_norm": 0.45804303884506226, "learning_rate": 0.003709942481511915, "loss": 3.3752, "step": 903 }, { "epoch": 0.09308537963628892, "grad_norm": 0.45320865511894226, "learning_rate": 0.0037222678718159406, "loss": 3.4068, "step": 906 }, { "epoch": 0.09339360937018391, "grad_norm": 0.277089387178421, "learning_rate": 0.0037345932621199673, "loss": 3.4052, "step": 909 }, { "epoch": 0.09370183910407891, "grad_norm": 0.26548513770103455, "learning_rate": 0.0037469186524239934, "loss": 3.3753, "step": 912 }, { "epoch": 0.0940100688379739, "grad_norm": 0.24219335615634918, "learning_rate": 0.00375924404272802, "loss": 3.3913, "step": 915 }, { "epoch": 0.0943182985718689, "grad_norm": 0.2855617105960846, "learning_rate": 0.003771569433032046, "loss": 3.3636, "step": 918 }, { "epoch": 0.0946265283057639, "grad_norm": 0.35244864225387573, "learning_rate": 0.003783894823336073, "loss": 3.3603, "step": 921 }, { "epoch": 0.0949347580396589, "grad_norm": 0.3226896822452545, "learning_rate": 0.0037962202136400986, "loss": 3.3267, "step": 924 }, { "epoch": 0.09524298777355389, "grad_norm": 0.279863178730011, "learning_rate": 0.003808545603944125, "loss": 3.3192, "step": 927 }, { "epoch": 0.09555121750744888, "grad_norm": 0.35309404134750366, "learning_rate": 0.0038208709942481513, "loss": 3.2978, "step": 930 }, { "epoch": 0.09585944724134388, "grad_norm": 0.2359645515680313, "learning_rate": 0.003833196384552178, "loss": 3.3627, "step": 933 }, { "epoch": 0.09616767697523888, "grad_norm": 0.22583429515361786, "learning_rate": 0.0038455217748562037, "loss": 3.2669, "step": 936 }, { "epoch": 0.09647590670913388, "grad_norm": 0.2914174199104309, "learning_rate": 0.0038578471651602303, "loss": 3.3238, "step": 939 }, { "epoch": 0.09678413644302887, "grad_norm": 0.37748411297798157, "learning_rate": 0.0038701725554642565, "loss": 3.3232, "step": 942 }, { "epoch": 0.09709236617692386, "grad_norm": 0.28686878085136414, "learning_rate": 0.003882497945768283, "loss": 3.3143, "step": 945 }, { "epoch": 0.09740059591081886, "grad_norm": 0.22591544687747955, "learning_rate": 0.003894823336072309, "loss": 3.3285, "step": 948 }, { "epoch": 0.09770882564471386, "grad_norm": 0.24365665018558502, "learning_rate": 0.003907148726376336, "loss": 3.2799, "step": 951 }, { "epoch": 0.09801705537860886, "grad_norm": 0.3929263651371002, "learning_rate": 0.003919474116680362, "loss": 3.29, "step": 954 }, { "epoch": 0.09832528511250385, "grad_norm": 0.20268237590789795, "learning_rate": 0.003931799506984388, "loss": 3.2412, "step": 957 }, { "epoch": 0.09863351484639885, "grad_norm": 0.3333010673522949, "learning_rate": 0.003944124897288414, "loss": 3.2523, "step": 960 }, { "epoch": 0.09894174458029384, "grad_norm": 0.32760193943977356, "learning_rate": 0.0039564502875924406, "loss": 3.2958, "step": 963 }, { "epoch": 0.09924997431418885, "grad_norm": 0.27670565247535706, "learning_rate": 0.003968775677896467, "loss": 3.2683, "step": 966 }, { "epoch": 0.09955820404808384, "grad_norm": 0.32110410928726196, "learning_rate": 0.003981101068200493, "loss": 3.2576, "step": 969 }, { "epoch": 0.09986643378197883, "grad_norm": 0.43541696667671204, "learning_rate": 0.0039934264585045195, "loss": 3.2924, "step": 972 }, { "epoch": 0.10017466351587383, "grad_norm": 0.3483084738254547, "learning_rate": 0.004005751848808546, "loss": 3.2936, "step": 975 }, { "epoch": 0.10048289324976883, "grad_norm": 0.29586124420166016, "learning_rate": 0.004018077239112572, "loss": 3.2511, "step": 978 }, { "epoch": 0.10079112298366383, "grad_norm": 0.21434040367603302, "learning_rate": 0.0040304026294165985, "loss": 3.242, "step": 981 }, { "epoch": 0.10109935271755882, "grad_norm": 0.35204213857650757, "learning_rate": 0.004042728019720624, "loss": 3.2156, "step": 984 }, { "epoch": 0.10140758245145381, "grad_norm": 0.25223758816719055, "learning_rate": 0.004055053410024651, "loss": 3.257, "step": 987 }, { "epoch": 0.10171581218534881, "grad_norm": 0.2969653010368347, "learning_rate": 0.004067378800328677, "loss": 3.2576, "step": 990 }, { "epoch": 0.10202404191924382, "grad_norm": 0.26683250069618225, "learning_rate": 0.004079704190632704, "loss": 3.1998, "step": 993 }, { "epoch": 0.10233227165313881, "grad_norm": 0.26404044032096863, "learning_rate": 0.00409202958093673, "loss": 3.2303, "step": 996 }, { "epoch": 0.1026405013870338, "grad_norm": 0.2442736029624939, "learning_rate": 0.004104354971240756, "loss": 3.2428, "step": 999 }, { "epoch": 0.1029487311209288, "grad_norm": 0.2192964255809784, "learning_rate": 0.004116680361544782, "loss": 3.2661, "step": 1002 }, { "epoch": 0.10325696085482379, "grad_norm": 0.21057608723640442, "learning_rate": 0.004129005751848809, "loss": 3.1995, "step": 1005 }, { "epoch": 0.1035651905887188, "grad_norm": 0.3122745454311371, "learning_rate": 0.0041413311421528345, "loss": 3.2104, "step": 1008 }, { "epoch": 0.10387342032261379, "grad_norm": 0.643337607383728, "learning_rate": 0.004153656532456861, "loss": 3.2196, "step": 1011 }, { "epoch": 0.10418165005650878, "grad_norm": 0.265302449464798, "learning_rate": 0.004165981922760888, "loss": 3.2163, "step": 1014 }, { "epoch": 0.10448987979040378, "grad_norm": 0.27250421047210693, "learning_rate": 0.004178307313064914, "loss": 3.1781, "step": 1017 }, { "epoch": 0.10479810952429877, "grad_norm": 0.3951704800128937, "learning_rate": 0.00419063270336894, "loss": 3.2405, "step": 1020 }, { "epoch": 0.10510633925819378, "grad_norm": 0.20837850868701935, "learning_rate": 0.004202958093672967, "loss": 3.2269, "step": 1023 }, { "epoch": 0.10541456899208877, "grad_norm": 0.3887670338153839, "learning_rate": 0.004215283483976992, "loss": 3.219, "step": 1026 }, { "epoch": 0.10572279872598377, "grad_norm": 0.18901754915714264, "learning_rate": 0.004227608874281019, "loss": 3.1759, "step": 1029 }, { "epoch": 0.10603102845987876, "grad_norm": 0.3570176362991333, "learning_rate": 0.004239934264585046, "loss": 3.1544, "step": 1032 }, { "epoch": 0.10633925819377375, "grad_norm": 0.2346538007259369, "learning_rate": 0.004252259654889072, "loss": 3.1834, "step": 1035 }, { "epoch": 0.10664748792766876, "grad_norm": 0.1956055760383606, "learning_rate": 0.004264585045193098, "loss": 3.1597, "step": 1038 }, { "epoch": 0.10695571766156375, "grad_norm": 0.19475719332695007, "learning_rate": 0.0042769104354971246, "loss": 3.1818, "step": 1041 }, { "epoch": 0.10726394739545875, "grad_norm": 0.20991206169128418, "learning_rate": 0.00428923582580115, "loss": 3.148, "step": 1044 }, { "epoch": 0.10757217712935374, "grad_norm": 0.45754027366638184, "learning_rate": 0.004301561216105177, "loss": 3.1838, "step": 1047 }, { "epoch": 0.10788040686324873, "grad_norm": 0.2500004470348358, "learning_rate": 0.004313886606409203, "loss": 3.158, "step": 1050 }, { "epoch": 0.10818863659714374, "grad_norm": 0.29174116253852844, "learning_rate": 0.004326211996713229, "loss": 3.1619, "step": 1053 }, { "epoch": 0.10849686633103874, "grad_norm": 0.1642913520336151, "learning_rate": 0.004338537387017256, "loss": 3.1313, "step": 1056 }, { "epoch": 0.10880509606493373, "grad_norm": 0.20638629794120789, "learning_rate": 0.004350862777321282, "loss": 3.1553, "step": 1059 }, { "epoch": 0.10911332579882872, "grad_norm": 0.2534577548503876, "learning_rate": 0.004363188167625308, "loss": 3.146, "step": 1062 }, { "epoch": 0.10942155553272373, "grad_norm": 0.3894107937812805, "learning_rate": 0.004375513557929334, "loss": 3.1702, "step": 1065 }, { "epoch": 0.10972978526661872, "grad_norm": 0.18316411972045898, "learning_rate": 0.0043878389482333606, "loss": 3.1306, "step": 1068 }, { "epoch": 0.11003801500051372, "grad_norm": 0.22901946306228638, "learning_rate": 0.004400164338537387, "loss": 3.1012, "step": 1071 }, { "epoch": 0.11034624473440871, "grad_norm": 0.3013692796230316, "learning_rate": 0.004412489728841414, "loss": 3.1266, "step": 1074 }, { "epoch": 0.1106544744683037, "grad_norm": 0.26568275690078735, "learning_rate": 0.0044248151191454395, "loss": 3.1161, "step": 1077 }, { "epoch": 0.11096270420219871, "grad_norm": 0.23559318482875824, "learning_rate": 0.004437140509449466, "loss": 3.125, "step": 1080 }, { "epoch": 0.1112709339360937, "grad_norm": 0.29804936051368713, "learning_rate": 0.004449465899753492, "loss": 3.1212, "step": 1083 }, { "epoch": 0.1115791636699887, "grad_norm": 0.2965604066848755, "learning_rate": 0.0044617912900575185, "loss": 3.1435, "step": 1086 }, { "epoch": 0.11188739340388369, "grad_norm": 0.22977206110954285, "learning_rate": 0.004474116680361544, "loss": 3.1355, "step": 1089 }, { "epoch": 0.11219562313777869, "grad_norm": 0.2511363923549652, "learning_rate": 0.004486442070665571, "loss": 3.1041, "step": 1092 }, { "epoch": 0.11250385287167369, "grad_norm": 0.13533104956150055, "learning_rate": 0.004498767460969597, "loss": 3.1006, "step": 1095 }, { "epoch": 0.11281208260556869, "grad_norm": 0.1323193609714508, "learning_rate": 0.004511092851273624, "loss": 3.0623, "step": 1098 }, { "epoch": 0.11312031233946368, "grad_norm": 0.24355067312717438, "learning_rate": 0.00452341824157765, "loss": 3.109, "step": 1101 }, { "epoch": 0.11342854207335867, "grad_norm": 0.45989617705345154, "learning_rate": 0.004535743631881676, "loss": 3.1102, "step": 1104 }, { "epoch": 0.11373677180725367, "grad_norm": 0.27389761805534363, "learning_rate": 0.004548069022185702, "loss": 3.1058, "step": 1107 }, { "epoch": 0.11404500154114867, "grad_norm": 0.3120715320110321, "learning_rate": 0.004560394412489729, "loss": 3.0936, "step": 1110 }, { "epoch": 0.11435323127504367, "grad_norm": 0.3641244173049927, "learning_rate": 0.004572719802793755, "loss": 3.0895, "step": 1113 }, { "epoch": 0.11466146100893866, "grad_norm": 0.16439078748226166, "learning_rate": 0.004585045193097782, "loss": 3.0697, "step": 1116 }, { "epoch": 0.11496969074283366, "grad_norm": 0.21766935288906097, "learning_rate": 0.004597370583401808, "loss": 3.0952, "step": 1119 }, { "epoch": 0.11527792047672865, "grad_norm": 0.1682632714509964, "learning_rate": 0.004609695973705834, "loss": 3.0644, "step": 1122 }, { "epoch": 0.11558615021062366, "grad_norm": 0.18391060829162598, "learning_rate": 0.00462202136400986, "loss": 3.0565, "step": 1125 }, { "epoch": 0.11589437994451865, "grad_norm": 0.2503467798233032, "learning_rate": 0.004634346754313887, "loss": 3.0798, "step": 1128 }, { "epoch": 0.11620260967841364, "grad_norm": 0.3139159083366394, "learning_rate": 0.004646672144617912, "loss": 3.0784, "step": 1131 }, { "epoch": 0.11651083941230864, "grad_norm": 0.2205217182636261, "learning_rate": 0.004658997534921939, "loss": 3.0696, "step": 1134 }, { "epoch": 0.11681906914620364, "grad_norm": 0.322355180978775, "learning_rate": 0.004671322925225966, "loss": 3.0811, "step": 1137 }, { "epoch": 0.11712729888009864, "grad_norm": 0.27023863792419434, "learning_rate": 0.004683648315529992, "loss": 3.0955, "step": 1140 }, { "epoch": 0.11743552861399363, "grad_norm": 0.2672137916088104, "learning_rate": 0.004695973705834018, "loss": 3.0584, "step": 1143 }, { "epoch": 0.11774375834788862, "grad_norm": 0.271323561668396, "learning_rate": 0.0047082990961380446, "loss": 3.0483, "step": 1146 }, { "epoch": 0.11805198808178362, "grad_norm": 0.1428508758544922, "learning_rate": 0.00472062448644207, "loss": 3.0661, "step": 1149 }, { "epoch": 0.11836021781567863, "grad_norm": 0.29395970702171326, "learning_rate": 0.004732949876746097, "loss": 3.0391, "step": 1152 }, { "epoch": 0.11866844754957362, "grad_norm": 0.22083403170108795, "learning_rate": 0.0047452752670501235, "loss": 3.0579, "step": 1155 }, { "epoch": 0.11897667728346861, "grad_norm": 0.2015424370765686, "learning_rate": 0.00475760065735415, "loss": 3.0356, "step": 1158 }, { "epoch": 0.1192849070173636, "grad_norm": 0.21997034549713135, "learning_rate": 0.004769926047658176, "loss": 3.0301, "step": 1161 }, { "epoch": 0.1195931367512586, "grad_norm": 0.16206422448158264, "learning_rate": 0.0047822514379622025, "loss": 3.0407, "step": 1164 }, { "epoch": 0.11990136648515361, "grad_norm": 0.22591377794742584, "learning_rate": 0.004794576828266228, "loss": 3.0414, "step": 1167 }, { "epoch": 0.1202095962190486, "grad_norm": 0.2582632601261139, "learning_rate": 0.004806902218570255, "loss": 3.0148, "step": 1170 }, { "epoch": 0.1205178259529436, "grad_norm": 0.273416131734848, "learning_rate": 0.004819227608874281, "loss": 3.0023, "step": 1173 }, { "epoch": 0.12082605568683859, "grad_norm": 0.16373753547668457, "learning_rate": 0.004831552999178307, "loss": 3.0127, "step": 1176 }, { "epoch": 0.12113428542073358, "grad_norm": 0.2623594105243683, "learning_rate": 0.004843878389482334, "loss": 3.0635, "step": 1179 }, { "epoch": 0.12144251515462859, "grad_norm": 0.34809616208076477, "learning_rate": 0.00485620377978636, "loss": 3.0222, "step": 1182 }, { "epoch": 0.12175074488852358, "grad_norm": 0.23841938376426697, "learning_rate": 0.004868529170090386, "loss": 3.019, "step": 1185 }, { "epoch": 0.12205897462241858, "grad_norm": 0.2161986231803894, "learning_rate": 0.004880854560394413, "loss": 2.9934, "step": 1188 }, { "epoch": 0.12236720435631357, "grad_norm": 0.2870507836341858, "learning_rate": 0.0048931799506984385, "loss": 3.0438, "step": 1191 }, { "epoch": 0.12267543409020856, "grad_norm": 0.20796675980091095, "learning_rate": 0.004905505341002465, "loss": 2.9947, "step": 1194 }, { "epoch": 0.12298366382410357, "grad_norm": 0.1762983798980713, "learning_rate": 0.004917830731306492, "loss": 2.9729, "step": 1197 }, { "epoch": 0.12329189355799856, "grad_norm": 0.1240881159901619, "learning_rate": 0.0049301561216105174, "loss": 3.0149, "step": 1200 }, { "epoch": 0.12360012329189356, "grad_norm": 0.16968263685703278, "learning_rate": 0.004942481511914544, "loss": 2.9944, "step": 1203 }, { "epoch": 0.12390835302578855, "grad_norm": 0.1743592470884323, "learning_rate": 0.004954806902218571, "loss": 2.9947, "step": 1206 }, { "epoch": 0.12421658275968354, "grad_norm": 0.29677319526672363, "learning_rate": 0.004967132292522596, "loss": 2.9922, "step": 1209 }, { "epoch": 0.12452481249357855, "grad_norm": 0.273882657289505, "learning_rate": 0.004979457682826623, "loss": 2.9698, "step": 1212 }, { "epoch": 0.12483304222747355, "grad_norm": 0.3060019910335541, "learning_rate": 0.004991783073130649, "loss": 2.9925, "step": 1215 }, { "epoch": 0.12514127196136854, "grad_norm": 0.13856515288352966, "learning_rate": 0.005004108463434675, "loss": 3.0212, "step": 1218 }, { "epoch": 0.12544950169526353, "grad_norm": 0.12940354645252228, "learning_rate": 0.005016433853738702, "loss": 2.9472, "step": 1221 }, { "epoch": 0.12575773142915853, "grad_norm": 0.15493866801261902, "learning_rate": 0.005028759244042728, "loss": 2.9859, "step": 1224 }, { "epoch": 0.12606596116305352, "grad_norm": 0.4994816184043884, "learning_rate": 0.005041084634346754, "loss": 2.949, "step": 1227 }, { "epoch": 0.1263741908969485, "grad_norm": 0.37235137820243835, "learning_rate": 0.005053410024650781, "loss": 3.006, "step": 1230 }, { "epoch": 0.12668242063084353, "grad_norm": 0.24599948525428772, "learning_rate": 0.0050657354149548075, "loss": 2.9954, "step": 1233 }, { "epoch": 0.12699065036473853, "grad_norm": 0.1838703751564026, "learning_rate": 0.005078060805258833, "loss": 2.9886, "step": 1236 }, { "epoch": 0.12729888009863352, "grad_norm": 0.19366377592086792, "learning_rate": 0.005090386195562859, "loss": 2.9715, "step": 1239 }, { "epoch": 0.12760710983252851, "grad_norm": 0.11911759525537491, "learning_rate": 0.005102711585866886, "loss": 2.965, "step": 1242 }, { "epoch": 0.1279153395664235, "grad_norm": 0.12456653267145157, "learning_rate": 0.005115036976170912, "loss": 2.9343, "step": 1245 }, { "epoch": 0.1282235693003185, "grad_norm": 0.322380393743515, "learning_rate": 0.005127362366474938, "loss": 2.9604, "step": 1248 }, { "epoch": 0.1285317990342135, "grad_norm": 0.40975773334503174, "learning_rate": 0.005139687756778965, "loss": 2.9386, "step": 1251 }, { "epoch": 0.1288400287681085, "grad_norm": 0.2045045793056488, "learning_rate": 0.005152013147082991, "loss": 2.9459, "step": 1254 }, { "epoch": 0.12914825850200348, "grad_norm": 0.20005717873573303, "learning_rate": 0.005164338537387018, "loss": 2.9631, "step": 1257 }, { "epoch": 0.12945648823589848, "grad_norm": 0.18930204212665558, "learning_rate": 0.0051766639276910435, "loss": 2.9014, "step": 1260 }, { "epoch": 0.1297647179697935, "grad_norm": 0.3180810213088989, "learning_rate": 0.00518898931799507, "loss": 2.9242, "step": 1263 }, { "epoch": 0.1300729477036885, "grad_norm": 0.17843572795391083, "learning_rate": 0.005201314708299097, "loss": 2.9063, "step": 1266 }, { "epoch": 0.13038117743758348, "grad_norm": 0.12591248750686646, "learning_rate": 0.005213640098603123, "loss": 2.9095, "step": 1269 }, { "epoch": 0.13068940717147848, "grad_norm": 0.17976878583431244, "learning_rate": 0.005225965488907148, "loss": 2.928, "step": 1272 }, { "epoch": 0.13099763690537347, "grad_norm": 0.16759532690048218, "learning_rate": 0.005238290879211175, "loss": 2.9202, "step": 1275 }, { "epoch": 0.13130586663926846, "grad_norm": 0.27441859245300293, "learning_rate": 0.0052506162695152014, "loss": 2.9242, "step": 1278 }, { "epoch": 0.13161409637316346, "grad_norm": 0.23654502630233765, "learning_rate": 0.005262941659819228, "loss": 2.9175, "step": 1281 }, { "epoch": 0.13192232610705845, "grad_norm": 0.3399145007133484, "learning_rate": 0.005275267050123254, "loss": 2.9277, "step": 1284 }, { "epoch": 0.13223055584095345, "grad_norm": 0.199320450425148, "learning_rate": 0.00528759244042728, "loss": 2.9184, "step": 1287 }, { "epoch": 0.13253878557484847, "grad_norm": 0.16563403606414795, "learning_rate": 0.005299917830731307, "loss": 2.9166, "step": 1290 }, { "epoch": 0.13284701530874346, "grad_norm": 0.18119758367538452, "learning_rate": 0.005312243221035334, "loss": 2.9239, "step": 1293 }, { "epoch": 0.13315524504263845, "grad_norm": 0.1558375358581543, "learning_rate": 0.0053245686113393585, "loss": 2.9028, "step": 1296 }, { "epoch": 0.13346347477653345, "grad_norm": 0.36665746569633484, "learning_rate": 0.005336894001643385, "loss": 2.9081, "step": 1299 }, { "epoch": 0.13377170451042844, "grad_norm": 0.186012864112854, "learning_rate": 0.005349219391947412, "loss": 2.8836, "step": 1302 }, { "epoch": 0.13407993424432343, "grad_norm": 0.14102259278297424, "learning_rate": 0.005361544782251438, "loss": 2.8906, "step": 1305 }, { "epoch": 0.13438816397821843, "grad_norm": 0.12519022822380066, "learning_rate": 0.005373870172555464, "loss": 2.9148, "step": 1308 }, { "epoch": 0.13469639371211342, "grad_norm": 0.14027029275894165, "learning_rate": 0.005386195562859491, "loss": 2.9108, "step": 1311 }, { "epoch": 0.13500462344600841, "grad_norm": 0.2553085684776306, "learning_rate": 0.005398520953163517, "loss": 2.8837, "step": 1314 }, { "epoch": 0.1353128531799034, "grad_norm": 0.2809675335884094, "learning_rate": 0.005410846343467544, "loss": 2.8795, "step": 1317 }, { "epoch": 0.13562108291379843, "grad_norm": 0.19451378285884857, "learning_rate": 0.005423171733771569, "loss": 2.8648, "step": 1320 }, { "epoch": 0.13592931264769342, "grad_norm": 0.22285006940364838, "learning_rate": 0.005435497124075595, "loss": 2.8994, "step": 1323 }, { "epoch": 0.13623754238158842, "grad_norm": 0.14703693985939026, "learning_rate": 0.005447822514379622, "loss": 2.8984, "step": 1326 }, { "epoch": 0.1365457721154834, "grad_norm": 0.23260341584682465, "learning_rate": 0.005460147904683649, "loss": 2.863, "step": 1329 }, { "epoch": 0.1368540018493784, "grad_norm": 0.16448146104812622, "learning_rate": 0.005472473294987674, "loss": 2.8895, "step": 1332 }, { "epoch": 0.1371622315832734, "grad_norm": 0.1994483470916748, "learning_rate": 0.005484798685291701, "loss": 2.9012, "step": 1335 }, { "epoch": 0.1374704613171684, "grad_norm": 0.2786753177642822, "learning_rate": 0.0054971240755957275, "loss": 2.8753, "step": 1338 }, { "epoch": 0.13777869105106338, "grad_norm": 0.13169367611408234, "learning_rate": 0.005509449465899754, "loss": 2.8567, "step": 1341 }, { "epoch": 0.13808692078495838, "grad_norm": 0.21205192804336548, "learning_rate": 0.00552177485620378, "loss": 2.8523, "step": 1344 }, { "epoch": 0.1383951505188534, "grad_norm": 0.3462331295013428, "learning_rate": 0.0055341002465078065, "loss": 2.881, "step": 1347 }, { "epoch": 0.1387033802527484, "grad_norm": 0.26768332719802856, "learning_rate": 0.005546425636811832, "loss": 2.8803, "step": 1350 }, { "epoch": 0.1390116099866434, "grad_norm": 0.22518084943294525, "learning_rate": 0.005558751027115859, "loss": 2.874, "step": 1353 }, { "epoch": 0.13931983972053838, "grad_norm": 0.1767919361591339, "learning_rate": 0.005571076417419885, "loss": 2.8593, "step": 1356 }, { "epoch": 0.13962806945443337, "grad_norm": 0.14405187964439392, "learning_rate": 0.005583401807723911, "loss": 2.8576, "step": 1359 }, { "epoch": 0.13993629918832837, "grad_norm": 0.15364724397659302, "learning_rate": 0.005595727198027938, "loss": 2.856, "step": 1362 }, { "epoch": 0.14024452892222336, "grad_norm": 0.26737314462661743, "learning_rate": 0.005608052588331964, "loss": 2.8225, "step": 1365 }, { "epoch": 0.14055275865611835, "grad_norm": 0.14594382047653198, "learning_rate": 0.00562037797863599, "loss": 2.8397, "step": 1368 }, { "epoch": 0.14086098839001335, "grad_norm": 0.1974790245294571, "learning_rate": 0.005632703368940017, "loss": 2.8294, "step": 1371 }, { "epoch": 0.14116921812390834, "grad_norm": 0.12267682701349258, "learning_rate": 0.005645028759244043, "loss": 2.8543, "step": 1374 }, { "epoch": 0.14147744785780336, "grad_norm": 0.14111129939556122, "learning_rate": 0.00565735414954807, "loss": 2.8181, "step": 1377 }, { "epoch": 0.14178567759169836, "grad_norm": 0.1846015751361847, "learning_rate": 0.005669679539852095, "loss": 2.8272, "step": 1380 }, { "epoch": 0.14209390732559335, "grad_norm": 0.26931676268577576, "learning_rate": 0.0056820049301561214, "loss": 2.8286, "step": 1383 }, { "epoch": 0.14240213705948834, "grad_norm": 0.17969557642936707, "learning_rate": 0.005694330320460148, "loss": 2.8315, "step": 1386 }, { "epoch": 0.14271036679338334, "grad_norm": 0.2056432068347931, "learning_rate": 0.005706655710764175, "loss": 2.835, "step": 1389 }, { "epoch": 0.14301859652727833, "grad_norm": 0.29306477308273315, "learning_rate": 0.0057189811010682, "loss": 2.8294, "step": 1392 }, { "epoch": 0.14332682626117332, "grad_norm": 0.1792561262845993, "learning_rate": 0.005731306491372227, "loss": 2.8321, "step": 1395 }, { "epoch": 0.14363505599506832, "grad_norm": 0.11323501914739609, "learning_rate": 0.005743631881676254, "loss": 2.83, "step": 1398 }, { "epoch": 0.1439432857289633, "grad_norm": 0.2804841101169586, "learning_rate": 0.00575595727198028, "loss": 2.8271, "step": 1401 }, { "epoch": 0.1442515154628583, "grad_norm": 0.33056163787841797, "learning_rate": 0.005768282662284305, "loss": 2.7976, "step": 1404 }, { "epoch": 0.14455974519675333, "grad_norm": 0.12834665179252625, "learning_rate": 0.005780608052588332, "loss": 2.8169, "step": 1407 }, { "epoch": 0.14486797493064832, "grad_norm": 0.15917035937309265, "learning_rate": 0.005792933442892358, "loss": 2.8124, "step": 1410 }, { "epoch": 0.1451762046645433, "grad_norm": 0.28015008568763733, "learning_rate": 0.005805258833196385, "loss": 2.8019, "step": 1413 }, { "epoch": 0.1454844343984383, "grad_norm": 0.16829009354114532, "learning_rate": 0.005817584223500411, "loss": 2.8357, "step": 1416 }, { "epoch": 0.1457926641323333, "grad_norm": 0.14804339408874512, "learning_rate": 0.005829909613804437, "loss": 2.8102, "step": 1419 }, { "epoch": 0.1461008938662283, "grad_norm": 0.20360830426216125, "learning_rate": 0.005842235004108464, "loss": 2.8211, "step": 1422 }, { "epoch": 0.1464091236001233, "grad_norm": 0.22152036428451538, "learning_rate": 0.0058545603944124905, "loss": 2.8103, "step": 1425 }, { "epoch": 0.14671735333401828, "grad_norm": 0.20746375620365143, "learning_rate": 0.005866885784716516, "loss": 2.7994, "step": 1428 }, { "epoch": 0.14702558306791327, "grad_norm": 0.16845661401748657, "learning_rate": 0.005879211175020542, "loss": 2.8286, "step": 1431 }, { "epoch": 0.1473338128018083, "grad_norm": 0.1094370111823082, "learning_rate": 0.005891536565324569, "loss": 2.7888, "step": 1434 }, { "epoch": 0.1476420425357033, "grad_norm": 0.14844520390033722, "learning_rate": 0.005903861955628595, "loss": 2.8035, "step": 1437 }, { "epoch": 0.14795027226959828, "grad_norm": 0.12289691716432571, "learning_rate": 0.005916187345932621, "loss": 2.7852, "step": 1440 }, { "epoch": 0.14825850200349328, "grad_norm": 0.1203322485089302, "learning_rate": 0.0059285127362366475, "loss": 2.8101, "step": 1443 }, { "epoch": 0.14856673173738827, "grad_norm": 0.1871965080499649, "learning_rate": 0.005940838126540674, "loss": 2.7485, "step": 1446 }, { "epoch": 0.14887496147128326, "grad_norm": 0.1567300707101822, "learning_rate": 0.005953163516844701, "loss": 2.8097, "step": 1449 }, { "epoch": 0.14918319120517826, "grad_norm": 0.18046674132347107, "learning_rate": 0.0059654889071487265, "loss": 2.8118, "step": 1452 }, { "epoch": 0.14949142093907325, "grad_norm": 0.23180244863033295, "learning_rate": 0.005977814297452753, "loss": 2.7836, "step": 1455 }, { "epoch": 0.14979965067296824, "grad_norm": 0.2300175577402115, "learning_rate": 0.00599013968775678, "loss": 2.7675, "step": 1458 }, { "epoch": 0.15010788040686324, "grad_norm": 0.11340396106243134, "learning_rate": 0.006002465078060806, "loss": 2.8012, "step": 1461 }, { "epoch": 0.15041611014075826, "grad_norm": 0.10667074471712112, "learning_rate": 0.006014790468364831, "loss": 2.8154, "step": 1464 }, { "epoch": 0.15072433987465325, "grad_norm": 0.10800652205944061, "learning_rate": 0.006027115858668858, "loss": 2.7646, "step": 1467 }, { "epoch": 0.15103256960854824, "grad_norm": 0.2588643431663513, "learning_rate": 0.006039441248972884, "loss": 2.7912, "step": 1470 }, { "epoch": 0.15134079934244324, "grad_norm": 0.32462435960769653, "learning_rate": 0.006051766639276911, "loss": 2.7666, "step": 1473 }, { "epoch": 0.15164902907633823, "grad_norm": 0.23754975199699402, "learning_rate": 0.006064092029580937, "loss": 2.7694, "step": 1476 }, { "epoch": 0.15195725881023323, "grad_norm": 0.14895015954971313, "learning_rate": 0.006076417419884963, "loss": 2.7678, "step": 1479 }, { "epoch": 0.15226548854412822, "grad_norm": 0.3228299021720886, "learning_rate": 0.00608874281018899, "loss": 2.7786, "step": 1482 }, { "epoch": 0.1525737182780232, "grad_norm": 0.15597562491893768, "learning_rate": 0.006101068200493017, "loss": 2.7967, "step": 1485 }, { "epoch": 0.1528819480119182, "grad_norm": 0.09748488664627075, "learning_rate": 0.0061133935907970414, "loss": 2.7673, "step": 1488 }, { "epoch": 0.1531901777458132, "grad_norm": 0.12523339688777924, "learning_rate": 0.006125718981101068, "loss": 2.7391, "step": 1491 }, { "epoch": 0.15349840747970822, "grad_norm": 0.16529253125190735, "learning_rate": 0.006138044371405095, "loss": 2.7642, "step": 1494 }, { "epoch": 0.15380663721360321, "grad_norm": 0.2083311527967453, "learning_rate": 0.006150369761709121, "loss": 2.764, "step": 1497 }, { "epoch": 0.1541148669474982, "grad_norm": 0.13263079524040222, "learning_rate": 0.006162695152013147, "loss": 2.7828, "step": 1500 }, { "epoch": 0.1544230966813932, "grad_norm": 0.1473417580127716, "learning_rate": 0.006175020542317174, "loss": 2.7574, "step": 1503 }, { "epoch": 0.1547313264152882, "grad_norm": 0.22629734873771667, "learning_rate": 0.0061873459326212, "loss": 2.7792, "step": 1506 }, { "epoch": 0.1550395561491832, "grad_norm": 0.21652548015117645, "learning_rate": 0.006199671322925227, "loss": 2.7785, "step": 1509 }, { "epoch": 0.15534778588307818, "grad_norm": 0.1948641836643219, "learning_rate": 0.006211996713229252, "loss": 2.7969, "step": 1512 }, { "epoch": 0.15565601561697318, "grad_norm": 0.13890105485916138, "learning_rate": 0.006224322103533278, "loss": 2.7856, "step": 1515 }, { "epoch": 0.15596424535086817, "grad_norm": 0.09859870374202728, "learning_rate": 0.006236647493837305, "loss": 2.7523, "step": 1518 }, { "epoch": 0.1562724750847632, "grad_norm": 0.10258977860212326, "learning_rate": 0.0062489728841413315, "loss": 2.7466, "step": 1521 }, { "epoch": 0.15658070481865818, "grad_norm": 0.11476584523916245, "learning_rate": 0.006261298274445357, "loss": 2.7314, "step": 1524 }, { "epoch": 0.15688893455255318, "grad_norm": 0.1920320987701416, "learning_rate": 0.006273623664749384, "loss": 2.7647, "step": 1527 }, { "epoch": 0.15719716428644817, "grad_norm": 0.18576020002365112, "learning_rate": 0.0062859490550534105, "loss": 2.7632, "step": 1530 }, { "epoch": 0.15750539402034316, "grad_norm": 0.128046452999115, "learning_rate": 0.006298274445357437, "loss": 2.7237, "step": 1533 }, { "epoch": 0.15781362375423816, "grad_norm": 0.30617430806159973, "learning_rate": 0.006310599835661463, "loss": 2.7907, "step": 1536 }, { "epoch": 0.15812185348813315, "grad_norm": 0.140928253531456, "learning_rate": 0.0063229252259654894, "loss": 2.7879, "step": 1539 }, { "epoch": 0.15843008322202815, "grad_norm": 0.2537645399570465, "learning_rate": 0.006335250616269515, "loss": 2.7513, "step": 1542 }, { "epoch": 0.15873831295592314, "grad_norm": 0.40944191813468933, "learning_rate": 0.006347576006573542, "loss": 2.7418, "step": 1545 }, { "epoch": 0.15904654268981813, "grad_norm": 0.1284068077802658, "learning_rate": 0.0063599013968775675, "loss": 2.7235, "step": 1548 }, { "epoch": 0.15935477242371315, "grad_norm": 0.08984164893627167, "learning_rate": 0.006372226787181594, "loss": 2.7414, "step": 1551 }, { "epoch": 0.15966300215760815, "grad_norm": 0.13366155326366425, "learning_rate": 0.006384552177485621, "loss": 2.7456, "step": 1554 }, { "epoch": 0.15997123189150314, "grad_norm": 0.1179983913898468, "learning_rate": 0.006396877567789647, "loss": 2.7313, "step": 1557 }, { "epoch": 0.16027946162539813, "grad_norm": 0.15718503296375275, "learning_rate": 0.006409202958093673, "loss": 2.7315, "step": 1560 }, { "epoch": 0.16058769135929313, "grad_norm": 0.14405110478401184, "learning_rate": 0.0064215283483977, "loss": 2.7275, "step": 1563 }, { "epoch": 0.16089592109318812, "grad_norm": 0.13050544261932373, "learning_rate": 0.006433853738701726, "loss": 2.6935, "step": 1566 }, { "epoch": 0.16120415082708311, "grad_norm": 0.2343079298734665, "learning_rate": 0.006446179129005751, "loss": 2.6932, "step": 1569 }, { "epoch": 0.1615123805609781, "grad_norm": 0.2493698000907898, "learning_rate": 0.006458504519309778, "loss": 2.7414, "step": 1572 }, { "epoch": 0.1618206102948731, "grad_norm": 0.17371931672096252, "learning_rate": 0.006470829909613804, "loss": 2.7522, "step": 1575 }, { "epoch": 0.1621288400287681, "grad_norm": 0.16282691061496735, "learning_rate": 0.006483155299917831, "loss": 2.7659, "step": 1578 }, { "epoch": 0.16243706976266312, "grad_norm": 0.12791027128696442, "learning_rate": 0.006495480690221857, "loss": 2.7077, "step": 1581 }, { "epoch": 0.1627452994965581, "grad_norm": 0.09789251536130905, "learning_rate": 0.006507806080525883, "loss": 2.7041, "step": 1584 }, { "epoch": 0.1630535292304531, "grad_norm": 0.10156393051147461, "learning_rate": 0.00652013147082991, "loss": 2.685, "step": 1587 }, { "epoch": 0.1633617589643481, "grad_norm": 0.1974211484193802, "learning_rate": 0.006532456861133937, "loss": 2.7183, "step": 1590 }, { "epoch": 0.1636699886982431, "grad_norm": 0.1420728713274002, "learning_rate": 0.0065447822514379615, "loss": 2.7095, "step": 1593 }, { "epoch": 0.16397821843213808, "grad_norm": 0.3637617528438568, "learning_rate": 0.006557107641741988, "loss": 2.7578, "step": 1596 }, { "epoch": 0.16428644816603308, "grad_norm": 0.09830935299396515, "learning_rate": 0.006569433032046015, "loss": 2.6937, "step": 1599 }, { "epoch": 0.16459467789992807, "grad_norm": 0.15821218490600586, "learning_rate": 0.006581758422350041, "loss": 2.7031, "step": 1602 }, { "epoch": 0.16490290763382306, "grad_norm": 0.17226357758045197, "learning_rate": 0.006594083812654067, "loss": 2.6702, "step": 1605 }, { "epoch": 0.16521113736771809, "grad_norm": 0.21252015233039856, "learning_rate": 0.006606409202958094, "loss": 2.6893, "step": 1608 }, { "epoch": 0.16551936710161308, "grad_norm": 0.11433108150959015, "learning_rate": 0.00661873459326212, "loss": 2.6852, "step": 1611 }, { "epoch": 0.16582759683550807, "grad_norm": 0.15884144604206085, "learning_rate": 0.006631059983566147, "loss": 2.7164, "step": 1614 }, { "epoch": 0.16613582656940307, "grad_norm": 0.1429038643836975, "learning_rate": 0.006643385373870173, "loss": 2.6976, "step": 1617 }, { "epoch": 0.16644405630329806, "grad_norm": 0.09187953919172287, "learning_rate": 0.006655710764174198, "loss": 2.7134, "step": 1620 }, { "epoch": 0.16675228603719305, "grad_norm": 0.13670755922794342, "learning_rate": 0.006668036154478225, "loss": 2.6951, "step": 1623 }, { "epoch": 0.16706051577108805, "grad_norm": 0.17965632677078247, "learning_rate": 0.0066803615447822515, "loss": 2.6911, "step": 1626 }, { "epoch": 0.16736874550498304, "grad_norm": 0.21141032874584198, "learning_rate": 0.006692686935086277, "loss": 2.67, "step": 1629 }, { "epoch": 0.16767697523887803, "grad_norm": 0.30064719915390015, "learning_rate": 0.006705012325390304, "loss": 2.6837, "step": 1632 }, { "epoch": 0.16798520497277303, "grad_norm": 0.11874115467071533, "learning_rate": 0.0067173377156943305, "loss": 2.6968, "step": 1635 }, { "epoch": 0.16829343470666805, "grad_norm": 0.10265806317329407, "learning_rate": 0.006729663105998357, "loss": 2.6632, "step": 1638 }, { "epoch": 0.16860166444056304, "grad_norm": 0.10916320979595184, "learning_rate": 0.006741988496302383, "loss": 2.6749, "step": 1641 }, { "epoch": 0.16890989417445804, "grad_norm": 0.2549231946468353, "learning_rate": 0.0067543138866064095, "loss": 2.636, "step": 1644 }, { "epoch": 0.16921812390835303, "grad_norm": 0.15071339905261993, "learning_rate": 0.006766639276910436, "loss": 2.6933, "step": 1647 }, { "epoch": 0.16952635364224802, "grad_norm": 0.1088666021823883, "learning_rate": 0.006778964667214463, "loss": 2.6477, "step": 1650 }, { "epoch": 0.16983458337614302, "grad_norm": 0.0984036773443222, "learning_rate": 0.0067912900575184875, "loss": 2.6801, "step": 1653 }, { "epoch": 0.170142813110038, "grad_norm": 0.15402089059352875, "learning_rate": 0.006803615447822514, "loss": 2.6877, "step": 1656 }, { "epoch": 0.170451042843933, "grad_norm": 0.1299775093793869, "learning_rate": 0.006815940838126541, "loss": 2.6717, "step": 1659 }, { "epoch": 0.170759272577828, "grad_norm": 0.15615323185920715, "learning_rate": 0.006828266228430567, "loss": 2.6578, "step": 1662 }, { "epoch": 0.171067502311723, "grad_norm": 0.122567318379879, "learning_rate": 0.006840591618734593, "loss": 2.6959, "step": 1665 }, { "epoch": 0.171375732045618, "grad_norm": 0.1386043280363083, "learning_rate": 0.00685291700903862, "loss": 2.6491, "step": 1668 }, { "epoch": 0.171683961779513, "grad_norm": 0.1900375783443451, "learning_rate": 0.006865242399342646, "loss": 2.6643, "step": 1671 }, { "epoch": 0.171992191513408, "grad_norm": 0.1118064671754837, "learning_rate": 0.006877567789646673, "loss": 2.6496, "step": 1674 }, { "epoch": 0.172300421247303, "grad_norm": 0.1593448519706726, "learning_rate": 0.006889893179950698, "loss": 2.6833, "step": 1677 }, { "epoch": 0.172608650981198, "grad_norm": 0.17275281250476837, "learning_rate": 0.006902218570254724, "loss": 2.6909, "step": 1680 }, { "epoch": 0.17291688071509298, "grad_norm": 0.13396479189395905, "learning_rate": 0.006914543960558751, "loss": 2.692, "step": 1683 }, { "epoch": 0.17322511044898797, "grad_norm": 0.09812068939208984, "learning_rate": 0.006926869350862778, "loss": 2.6939, "step": 1686 }, { "epoch": 0.17353334018288297, "grad_norm": 0.08181022852659225, "learning_rate": 0.006939194741166803, "loss": 2.6408, "step": 1689 }, { "epoch": 0.17384156991677796, "grad_norm": 0.15573051571846008, "learning_rate": 0.00695152013147083, "loss": 2.6647, "step": 1692 }, { "epoch": 0.17414979965067298, "grad_norm": 0.2834240198135376, "learning_rate": 0.006963845521774857, "loss": 2.6585, "step": 1695 }, { "epoch": 0.17445802938456798, "grad_norm": 0.23794801533222198, "learning_rate": 0.006976170912078883, "loss": 2.6559, "step": 1698 }, { "epoch": 0.17476625911846297, "grad_norm": 0.1332167536020279, "learning_rate": 0.006988496302382908, "loss": 2.6695, "step": 1701 }, { "epoch": 0.17507448885235796, "grad_norm": 0.09555593878030777, "learning_rate": 0.007000821692686935, "loss": 2.6811, "step": 1704 }, { "epoch": 0.17538271858625296, "grad_norm": 0.10987939685583115, "learning_rate": 0.007013147082990961, "loss": 2.6524, "step": 1707 }, { "epoch": 0.17569094832014795, "grad_norm": 0.11458218097686768, "learning_rate": 0.007025472473294988, "loss": 2.6085, "step": 1710 }, { "epoch": 0.17599917805404294, "grad_norm": 0.12646709382534027, "learning_rate": 0.007037797863599014, "loss": 2.6561, "step": 1713 }, { "epoch": 0.17630740778793794, "grad_norm": 0.15338967740535736, "learning_rate": 0.00705012325390304, "loss": 2.6471, "step": 1716 }, { "epoch": 0.17661563752183293, "grad_norm": 0.14660318195819855, "learning_rate": 0.007062448644207067, "loss": 2.6532, "step": 1719 }, { "epoch": 0.17692386725572792, "grad_norm": 0.2730877995491028, "learning_rate": 0.0070747740345110935, "loss": 2.6565, "step": 1722 }, { "epoch": 0.17723209698962294, "grad_norm": 0.26743727922439575, "learning_rate": 0.007087099424815119, "loss": 2.6707, "step": 1725 }, { "epoch": 0.17754032672351794, "grad_norm": 0.13842618465423584, "learning_rate": 0.007099424815119146, "loss": 2.6652, "step": 1728 }, { "epoch": 0.17784855645741293, "grad_norm": 0.15871621668338776, "learning_rate": 0.0071117502054231715, "loss": 2.6464, "step": 1731 }, { "epoch": 0.17815678619130793, "grad_norm": 0.11526347696781158, "learning_rate": 0.007124075595727198, "loss": 2.662, "step": 1734 }, { "epoch": 0.17846501592520292, "grad_norm": 0.21620534360408783, "learning_rate": 0.007136400986031224, "loss": 2.6603, "step": 1737 }, { "epoch": 0.1787732456590979, "grad_norm": 0.0905444398522377, "learning_rate": 0.0071487263763352505, "loss": 2.6523, "step": 1740 }, { "epoch": 0.1790814753929929, "grad_norm": 0.28233054280281067, "learning_rate": 0.007161051766639277, "loss": 2.6597, "step": 1743 }, { "epoch": 0.1793897051268879, "grad_norm": 0.2363336831331253, "learning_rate": 0.007173377156943304, "loss": 2.6483, "step": 1746 }, { "epoch": 0.1796979348607829, "grad_norm": 0.11012139916419983, "learning_rate": 0.0071857025472473295, "loss": 2.6513, "step": 1749 }, { "epoch": 0.1800061645946779, "grad_norm": 0.09720948338508606, "learning_rate": 0.007198027937551356, "loss": 2.6511, "step": 1752 }, { "epoch": 0.1803143943285729, "grad_norm": 0.13130852580070496, "learning_rate": 0.007210353327855383, "loss": 2.6509, "step": 1755 }, { "epoch": 0.1806226240624679, "grad_norm": 0.14865098893642426, "learning_rate": 0.007222678718159409, "loss": 2.6253, "step": 1758 }, { "epoch": 0.1809308537963629, "grad_norm": 0.20482710003852844, "learning_rate": 0.007235004108463434, "loss": 2.6312, "step": 1761 }, { "epoch": 0.1812390835302579, "grad_norm": 0.12063097953796387, "learning_rate": 0.007247329498767461, "loss": 2.6007, "step": 1764 }, { "epoch": 0.18154731326415288, "grad_norm": 0.23084934055805206, "learning_rate": 0.007259654889071487, "loss": 2.6129, "step": 1767 }, { "epoch": 0.18185554299804788, "grad_norm": 0.10387217253446579, "learning_rate": 0.007271980279375514, "loss": 2.6309, "step": 1770 }, { "epoch": 0.18216377273194287, "grad_norm": 0.14229682087898254, "learning_rate": 0.00728430566967954, "loss": 2.6074, "step": 1773 }, { "epoch": 0.18247200246583786, "grad_norm": 0.12009115517139435, "learning_rate": 0.007296631059983566, "loss": 2.6407, "step": 1776 }, { "epoch": 0.18278023219973286, "grad_norm": 0.15677185356616974, "learning_rate": 0.007308956450287593, "loss": 2.6268, "step": 1779 }, { "epoch": 0.18308846193362788, "grad_norm": 0.13304303586483002, "learning_rate": 0.0073212818405916195, "loss": 2.6463, "step": 1782 }, { "epoch": 0.18339669166752287, "grad_norm": 0.15444768965244293, "learning_rate": 0.007333607230895644, "loss": 2.6218, "step": 1785 }, { "epoch": 0.18370492140141786, "grad_norm": 0.1738140732049942, "learning_rate": 0.007345932621199671, "loss": 2.6525, "step": 1788 }, { "epoch": 0.18401315113531286, "grad_norm": 0.13087227940559387, "learning_rate": 0.007358258011503698, "loss": 2.6266, "step": 1791 }, { "epoch": 0.18432138086920785, "grad_norm": 0.1026511862874031, "learning_rate": 0.007370583401807724, "loss": 2.6017, "step": 1794 }, { "epoch": 0.18462961060310285, "grad_norm": 0.11183813214302063, "learning_rate": 0.00738290879211175, "loss": 2.5966, "step": 1797 }, { "epoch": 0.18493784033699784, "grad_norm": 0.12239934504032135, "learning_rate": 0.007395234182415777, "loss": 2.6205, "step": 1800 }, { "epoch": 0.18524607007089283, "grad_norm": 0.2630854845046997, "learning_rate": 0.007407559572719803, "loss": 2.609, "step": 1803 }, { "epoch": 0.18555429980478783, "grad_norm": 0.24282613396644592, "learning_rate": 0.00741988496302383, "loss": 2.6405, "step": 1806 }, { "epoch": 0.18586252953868282, "grad_norm": 0.2825084328651428, "learning_rate": 0.007432210353327855, "loss": 2.5933, "step": 1809 }, { "epoch": 0.18617075927257784, "grad_norm": 0.26462721824645996, "learning_rate": 0.007444535743631881, "loss": 2.6021, "step": 1812 }, { "epoch": 0.18647898900647283, "grad_norm": 0.11797992140054703, "learning_rate": 0.007456861133935908, "loss": 2.6246, "step": 1815 }, { "epoch": 0.18678721874036783, "grad_norm": 0.14044708013534546, "learning_rate": 0.0074691865242399345, "loss": 2.6028, "step": 1818 }, { "epoch": 0.18709544847426282, "grad_norm": 0.1374548226594925, "learning_rate": 0.00748151191454396, "loss": 2.6092, "step": 1821 }, { "epoch": 0.18740367820815781, "grad_norm": 0.10084279626607895, "learning_rate": 0.007493837304847987, "loss": 2.6162, "step": 1824 }, { "epoch": 0.1877119079420528, "grad_norm": 0.1052001565694809, "learning_rate": 0.0075061626951520135, "loss": 2.5742, "step": 1827 }, { "epoch": 0.1880201376759478, "grad_norm": 0.11738535761833191, "learning_rate": 0.00751848808545604, "loss": 2.5715, "step": 1830 }, { "epoch": 0.1883283674098428, "grad_norm": 0.10453224182128906, "learning_rate": 0.007530813475760066, "loss": 2.5896, "step": 1833 }, { "epoch": 0.1886365971437378, "grad_norm": 0.10509374737739563, "learning_rate": 0.007543138866064092, "loss": 2.6047, "step": 1836 }, { "epoch": 0.18894482687763278, "grad_norm": 0.11291799694299698, "learning_rate": 0.007555464256368119, "loss": 2.6062, "step": 1839 }, { "epoch": 0.1892530566115278, "grad_norm": 0.11998583376407623, "learning_rate": 0.007567789646672146, "loss": 2.629, "step": 1842 }, { "epoch": 0.1895612863454228, "grad_norm": 0.21776226162910461, "learning_rate": 0.0075801150369761705, "loss": 2.5847, "step": 1845 }, { "epoch": 0.1898695160793178, "grad_norm": 0.210985466837883, "learning_rate": 0.007592440427280197, "loss": 2.5901, "step": 1848 }, { "epoch": 0.19017774581321278, "grad_norm": 0.11799308657646179, "learning_rate": 0.007604765817584224, "loss": 2.5893, "step": 1851 }, { "epoch": 0.19048597554710778, "grad_norm": 0.10019934922456741, "learning_rate": 0.00761709120788825, "loss": 2.6327, "step": 1854 }, { "epoch": 0.19079420528100277, "grad_norm": 0.07964596897363663, "learning_rate": 0.007629416598192276, "loss": 2.5921, "step": 1857 }, { "epoch": 0.19110243501489776, "grad_norm": 0.16393065452575684, "learning_rate": 0.007641741988496303, "loss": 2.5912, "step": 1860 }, { "epoch": 0.19141066474879276, "grad_norm": 0.324639230966568, "learning_rate": 0.007654067378800329, "loss": 2.5998, "step": 1863 }, { "epoch": 0.19171889448268775, "grad_norm": 0.14071421325206757, "learning_rate": 0.007666392769104356, "loss": 2.5803, "step": 1866 }, { "epoch": 0.19202712421658277, "grad_norm": 0.20063026249408722, "learning_rate": 0.007678718159408381, "loss": 2.6019, "step": 1869 }, { "epoch": 0.19233535395047777, "grad_norm": 0.11311519891023636, "learning_rate": 0.007691043549712407, "loss": 2.5645, "step": 1872 }, { "epoch": 0.19264358368437276, "grad_norm": 0.08542342483997345, "learning_rate": 0.007703368940016434, "loss": 2.6122, "step": 1875 }, { "epoch": 0.19295181341826775, "grad_norm": 0.08306868374347687, "learning_rate": 0.007715694330320461, "loss": 2.5859, "step": 1878 }, { "epoch": 0.19326004315216275, "grad_norm": 0.11635984480381012, "learning_rate": 0.007728019720624486, "loss": 2.5855, "step": 1881 }, { "epoch": 0.19356827288605774, "grad_norm": 0.08945252746343613, "learning_rate": 0.007740345110928513, "loss": 2.5509, "step": 1884 }, { "epoch": 0.19387650261995273, "grad_norm": 0.19044962525367737, "learning_rate": 0.0077526705012325395, "loss": 2.559, "step": 1887 }, { "epoch": 0.19418473235384773, "grad_norm": 0.1462780088186264, "learning_rate": 0.007764995891536566, "loss": 2.5749, "step": 1890 }, { "epoch": 0.19449296208774272, "grad_norm": 0.15944691002368927, "learning_rate": 0.007777321281840591, "loss": 2.5801, "step": 1893 }, { "epoch": 0.19480119182163771, "grad_norm": 0.10125305503606796, "learning_rate": 0.007789646672144618, "loss": 2.5821, "step": 1896 }, { "epoch": 0.19510942155553274, "grad_norm": 0.17344938218593597, "learning_rate": 0.007801972062448644, "loss": 2.5905, "step": 1899 }, { "epoch": 0.19541765128942773, "grad_norm": 0.16651591658592224, "learning_rate": 0.007814297452752672, "loss": 2.5668, "step": 1902 }, { "epoch": 0.19572588102332272, "grad_norm": 0.17417702078819275, "learning_rate": 0.007826622843056696, "loss": 2.568, "step": 1905 }, { "epoch": 0.19603411075721772, "grad_norm": 0.11182334274053574, "learning_rate": 0.007838948233360723, "loss": 2.5547, "step": 1908 }, { "epoch": 0.1963423404911127, "grad_norm": 0.23256631195545197, "learning_rate": 0.007851273623664749, "loss": 2.5722, "step": 1911 }, { "epoch": 0.1966505702250077, "grad_norm": 0.18180392682552338, "learning_rate": 0.007863599013968776, "loss": 2.558, "step": 1914 }, { "epoch": 0.1969587999589027, "grad_norm": 0.12168890237808228, "learning_rate": 0.007875924404272802, "loss": 2.5977, "step": 1917 }, { "epoch": 0.1972670296927977, "grad_norm": 0.11032187938690186, "learning_rate": 0.007888249794576828, "loss": 2.5846, "step": 1920 }, { "epoch": 0.19757525942669268, "grad_norm": 0.0740116760134697, "learning_rate": 0.007900575184880855, "loss": 2.5824, "step": 1923 }, { "epoch": 0.19788348916058768, "grad_norm": 0.05902474746108055, "learning_rate": 0.007912900575184881, "loss": 2.5497, "step": 1926 }, { "epoch": 0.1981917188944827, "grad_norm": 0.09003309905529022, "learning_rate": 0.007925225965488907, "loss": 2.5523, "step": 1929 }, { "epoch": 0.1984999486283777, "grad_norm": 0.4191035330295563, "learning_rate": 0.007937551355792934, "loss": 2.6223, "step": 1932 }, { "epoch": 0.1988081783622727, "grad_norm": 0.17093214392662048, "learning_rate": 0.00794987674609696, "loss": 2.5647, "step": 1935 }, { "epoch": 0.19911640809616768, "grad_norm": 0.0921127051115036, "learning_rate": 0.007962202136400986, "loss": 2.564, "step": 1938 }, { "epoch": 0.19942463783006267, "grad_norm": 0.14204134047031403, "learning_rate": 0.007974527526705012, "loss": 2.5972, "step": 1941 }, { "epoch": 0.19973286756395767, "grad_norm": 0.07556895911693573, "learning_rate": 0.007986852917009039, "loss": 2.5796, "step": 1944 }, { "epoch": 0.20004109729785266, "grad_norm": 0.07290320843458176, "learning_rate": 0.007999178307313065, "loss": 2.5564, "step": 1947 }, { "epoch": 0.20034932703174765, "grad_norm": 0.1624913364648819, "learning_rate": 0.008011503697617092, "loss": 2.5849, "step": 1950 }, { "epoch": 0.20065755676564265, "grad_norm": 0.11839967221021652, "learning_rate": 0.008023829087921118, "loss": 2.5611, "step": 1953 }, { "epoch": 0.20096578649953767, "grad_norm": 0.14280788600444794, "learning_rate": 0.008036154478225144, "loss": 2.5289, "step": 1956 }, { "epoch": 0.20127401623343266, "grad_norm": 0.11515247821807861, "learning_rate": 0.008048479868529171, "loss": 2.5678, "step": 1959 }, { "epoch": 0.20158224596732766, "grad_norm": 0.1147715225815773, "learning_rate": 0.008060805258833197, "loss": 2.5452, "step": 1962 }, { "epoch": 0.20189047570122265, "grad_norm": 0.09767001122236252, "learning_rate": 0.008073130649137223, "loss": 2.6023, "step": 1965 }, { "epoch": 0.20219870543511764, "grad_norm": 0.0866391509771347, "learning_rate": 0.008085456039441248, "loss": 2.5518, "step": 1968 }, { "epoch": 0.20250693516901264, "grad_norm": 0.1610632985830307, "learning_rate": 0.008097781429745276, "loss": 2.5271, "step": 1971 }, { "epoch": 0.20281516490290763, "grad_norm": 0.20238341391086578, "learning_rate": 0.008110106820049302, "loss": 2.5597, "step": 1974 }, { "epoch": 0.20312339463680262, "grad_norm": 0.11807162314653397, "learning_rate": 0.008122432210353327, "loss": 2.5663, "step": 1977 }, { "epoch": 0.20343162437069762, "grad_norm": 0.14654900133609772, "learning_rate": 0.008134757600657355, "loss": 2.5729, "step": 1980 }, { "epoch": 0.2037398541045926, "grad_norm": 0.17804567515850067, "learning_rate": 0.00814708299096138, "loss": 2.5658, "step": 1983 }, { "epoch": 0.20404808383848763, "grad_norm": 0.12376303225755692, "learning_rate": 0.008159408381265408, "loss": 2.5703, "step": 1986 }, { "epoch": 0.20435631357238263, "grad_norm": 0.1248418316245079, "learning_rate": 0.008171733771569432, "loss": 2.5328, "step": 1989 }, { "epoch": 0.20466454330627762, "grad_norm": 0.08159278333187103, "learning_rate": 0.00818405916187346, "loss": 2.5349, "step": 1992 }, { "epoch": 0.2049727730401726, "grad_norm": 0.11184779554605484, "learning_rate": 0.008196384552177485, "loss": 2.5557, "step": 1995 }, { "epoch": 0.2052810027740676, "grad_norm": 0.09568610787391663, "learning_rate": 0.008208709942481513, "loss": 2.5415, "step": 1998 }, { "epoch": 0.2055892325079626, "grad_norm": 0.08708583563566208, "learning_rate": 0.008221035332785539, "loss": 2.5369, "step": 2001 }, { "epoch": 0.2058974622418576, "grad_norm": 0.11849135160446167, "learning_rate": 0.008233360723089564, "loss": 2.5617, "step": 2004 }, { "epoch": 0.2062056919757526, "grad_norm": 0.1407340168952942, "learning_rate": 0.008245686113393592, "loss": 2.5374, "step": 2007 }, { "epoch": 0.20651392170964758, "grad_norm": 0.13198955357074738, "learning_rate": 0.008258011503697617, "loss": 2.57, "step": 2010 }, { "epoch": 0.20682215144354257, "grad_norm": 0.12408044934272766, "learning_rate": 0.008270336894001643, "loss": 2.5344, "step": 2013 }, { "epoch": 0.2071303811774376, "grad_norm": 0.149169921875, "learning_rate": 0.008282662284305669, "loss": 2.5357, "step": 2016 }, { "epoch": 0.2074386109113326, "grad_norm": 0.10010293871164322, "learning_rate": 0.008294987674609696, "loss": 2.5166, "step": 2019 }, { "epoch": 0.20774684064522758, "grad_norm": 0.17650344967842102, "learning_rate": 0.008307313064913722, "loss": 2.5664, "step": 2022 }, { "epoch": 0.20805507037912258, "grad_norm": 0.09946206212043762, "learning_rate": 0.008319638455217748, "loss": 2.5378, "step": 2025 }, { "epoch": 0.20836330011301757, "grad_norm": 0.07705225795507431, "learning_rate": 0.008331963845521775, "loss": 2.5088, "step": 2028 }, { "epoch": 0.20867152984691256, "grad_norm": 0.18174925446510315, "learning_rate": 0.008344289235825801, "loss": 2.5264, "step": 2031 }, { "epoch": 0.20897975958080756, "grad_norm": 0.14415894448757172, "learning_rate": 0.008356614626129829, "loss": 2.5549, "step": 2034 }, { "epoch": 0.20928798931470255, "grad_norm": 0.17721933126449585, "learning_rate": 0.008368940016433854, "loss": 2.5476, "step": 2037 }, { "epoch": 0.20959621904859754, "grad_norm": 0.1727544367313385, "learning_rate": 0.00838126540673788, "loss": 2.5809, "step": 2040 }, { "epoch": 0.20990444878249256, "grad_norm": 0.20624054968357086, "learning_rate": 0.008393590797041908, "loss": 2.5256, "step": 2043 }, { "epoch": 0.21021267851638756, "grad_norm": 0.08070924133062363, "learning_rate": 0.008405916187345933, "loss": 2.5537, "step": 2046 }, { "epoch": 0.21052090825028255, "grad_norm": 0.07868220657110214, "learning_rate": 0.008418241577649959, "loss": 2.5266, "step": 2049 }, { "epoch": 0.21082913798417754, "grad_norm": 0.19941876828670502, "learning_rate": 0.008430566967953985, "loss": 2.5344, "step": 2052 }, { "epoch": 0.21113736771807254, "grad_norm": 0.08758697658777237, "learning_rate": 0.008442892358258012, "loss": 2.5409, "step": 2055 }, { "epoch": 0.21144559745196753, "grad_norm": 0.11635969579219818, "learning_rate": 0.008455217748562038, "loss": 2.5497, "step": 2058 }, { "epoch": 0.21175382718586253, "grad_norm": 0.16910326480865479, "learning_rate": 0.008467543138866064, "loss": 2.5509, "step": 2061 }, { "epoch": 0.21206205691975752, "grad_norm": 0.14605827629566193, "learning_rate": 0.008479868529170091, "loss": 2.5589, "step": 2064 }, { "epoch": 0.2123702866536525, "grad_norm": 0.18890123069286346, "learning_rate": 0.008492193919474117, "loss": 2.5454, "step": 2067 }, { "epoch": 0.2126785163875475, "grad_norm": 0.09277717024087906, "learning_rate": 0.008504519309778144, "loss": 2.4984, "step": 2070 }, { "epoch": 0.21298674612144253, "grad_norm": 0.07268327474594116, "learning_rate": 0.008516844700082168, "loss": 2.5323, "step": 2073 }, { "epoch": 0.21329497585533752, "grad_norm": 0.0807403028011322, "learning_rate": 0.008529170090386196, "loss": 2.5083, "step": 2076 }, { "epoch": 0.21360320558923251, "grad_norm": 0.12681947648525238, "learning_rate": 0.008541495480690222, "loss": 2.5386, "step": 2079 }, { "epoch": 0.2139114353231275, "grad_norm": 0.25378334522247314, "learning_rate": 0.008553820870994249, "loss": 2.5188, "step": 2082 }, { "epoch": 0.2142196650570225, "grad_norm": 0.15101733803749084, "learning_rate": 0.008566146261298275, "loss": 2.5457, "step": 2085 }, { "epoch": 0.2145278947909175, "grad_norm": 0.17336703836917877, "learning_rate": 0.0085784716516023, "loss": 2.5206, "step": 2088 }, { "epoch": 0.2148361245248125, "grad_norm": 0.07735245674848557, "learning_rate": 0.008590797041906328, "loss": 2.5297, "step": 2091 }, { "epoch": 0.21514435425870748, "grad_norm": 0.15841136872768402, "learning_rate": 0.008603122432210354, "loss": 2.5086, "step": 2094 }, { "epoch": 0.21545258399260248, "grad_norm": 0.15941859781742096, "learning_rate": 0.00861544782251438, "loss": 2.5316, "step": 2097 }, { "epoch": 0.21576081372649747, "grad_norm": 0.13837756216526031, "learning_rate": 0.008627773212818405, "loss": 2.4818, "step": 2100 }, { "epoch": 0.2160690434603925, "grad_norm": 0.14743675291538239, "learning_rate": 0.008640098603122433, "loss": 2.5351, "step": 2103 }, { "epoch": 0.21637727319428748, "grad_norm": 0.15961112082004547, "learning_rate": 0.008652423993426459, "loss": 2.4916, "step": 2106 }, { "epoch": 0.21668550292818248, "grad_norm": 0.16091223061084747, "learning_rate": 0.008664749383730484, "loss": 2.5026, "step": 2109 }, { "epoch": 0.21699373266207747, "grad_norm": 0.1695915311574936, "learning_rate": 0.008677074774034512, "loss": 2.4994, "step": 2112 }, { "epoch": 0.21730196239597246, "grad_norm": 0.1457175761461258, "learning_rate": 0.008689400164338537, "loss": 2.5225, "step": 2115 }, { "epoch": 0.21761019212986746, "grad_norm": 0.0995342880487442, "learning_rate": 0.008701725554642563, "loss": 2.5373, "step": 2118 }, { "epoch": 0.21791842186376245, "grad_norm": 0.11527393013238907, "learning_rate": 0.00871405094494659, "loss": 2.5207, "step": 2121 }, { "epoch": 0.21822665159765745, "grad_norm": 0.07951527088880539, "learning_rate": 0.008726376335250616, "loss": 2.4868, "step": 2124 }, { "epoch": 0.21853488133155244, "grad_norm": 0.11319970339536667, "learning_rate": 0.008738701725554644, "loss": 2.4965, "step": 2127 }, { "epoch": 0.21884311106544746, "grad_norm": 0.14932893216609955, "learning_rate": 0.008751027115858668, "loss": 2.5164, "step": 2130 }, { "epoch": 0.21915134079934245, "grad_norm": 0.1703396886587143, "learning_rate": 0.008763352506162695, "loss": 2.5175, "step": 2133 }, { "epoch": 0.21945957053323745, "grad_norm": 0.2208787351846695, "learning_rate": 0.008775677896466721, "loss": 2.521, "step": 2136 }, { "epoch": 0.21976780026713244, "grad_norm": 0.0884699895977974, "learning_rate": 0.008788003286770749, "loss": 2.5356, "step": 2139 }, { "epoch": 0.22007603000102743, "grad_norm": 0.06739311665296555, "learning_rate": 0.008800328677074774, "loss": 2.5102, "step": 2142 }, { "epoch": 0.22038425973492243, "grad_norm": 0.09653139859437943, "learning_rate": 0.0088126540673788, "loss": 2.5047, "step": 2145 }, { "epoch": 0.22069248946881742, "grad_norm": 0.11972832679748535, "learning_rate": 0.008824979457682828, "loss": 2.5086, "step": 2148 }, { "epoch": 0.22100071920271241, "grad_norm": 0.13725396990776062, "learning_rate": 0.008837304847986853, "loss": 2.5034, "step": 2151 }, { "epoch": 0.2213089489366074, "grad_norm": 0.09293966740369797, "learning_rate": 0.008849630238290879, "loss": 2.5004, "step": 2154 }, { "epoch": 0.2216171786705024, "grad_norm": 0.07625159621238708, "learning_rate": 0.008861955628594905, "loss": 2.508, "step": 2157 }, { "epoch": 0.22192540840439742, "grad_norm": 0.08581928163766861, "learning_rate": 0.008874281018898932, "loss": 2.4973, "step": 2160 }, { "epoch": 0.22223363813829242, "grad_norm": 0.12700457870960236, "learning_rate": 0.008886606409202958, "loss": 2.5174, "step": 2163 }, { "epoch": 0.2225418678721874, "grad_norm": 0.17155064642429352, "learning_rate": 0.008898931799506984, "loss": 2.4969, "step": 2166 }, { "epoch": 0.2228500976060824, "grad_norm": 0.13356278836727142, "learning_rate": 0.008911257189811011, "loss": 2.4876, "step": 2169 }, { "epoch": 0.2231583273399774, "grad_norm": 0.07805536687374115, "learning_rate": 0.008923582580115037, "loss": 2.5151, "step": 2172 }, { "epoch": 0.2234665570738724, "grad_norm": 0.10661714524030685, "learning_rate": 0.008935907970419064, "loss": 2.4607, "step": 2175 }, { "epoch": 0.22377478680776738, "grad_norm": 0.15095242857933044, "learning_rate": 0.008948233360723088, "loss": 2.5358, "step": 2178 }, { "epoch": 0.22408301654166238, "grad_norm": 0.11287077516317368, "learning_rate": 0.008960558751027116, "loss": 2.5289, "step": 2181 }, { "epoch": 0.22439124627555737, "grad_norm": 0.16408318281173706, "learning_rate": 0.008972884141331142, "loss": 2.5256, "step": 2184 }, { "epoch": 0.22469947600945236, "grad_norm": 0.1227622851729393, "learning_rate": 0.008985209531635169, "loss": 2.5091, "step": 2187 }, { "epoch": 0.22500770574334739, "grad_norm": 0.06549924612045288, "learning_rate": 0.008997534921939195, "loss": 2.4908, "step": 2190 }, { "epoch": 0.22531593547724238, "grad_norm": 0.09310626983642578, "learning_rate": 0.00900986031224322, "loss": 2.4903, "step": 2193 }, { "epoch": 0.22562416521113737, "grad_norm": 0.12637357413768768, "learning_rate": 0.009022185702547248, "loss": 2.5089, "step": 2196 }, { "epoch": 0.22593239494503237, "grad_norm": 0.1691301167011261, "learning_rate": 0.009034511092851274, "loss": 2.4984, "step": 2199 }, { "epoch": 0.22624062467892736, "grad_norm": 0.18173068761825562, "learning_rate": 0.0090468364831553, "loss": 2.4552, "step": 2202 }, { "epoch": 0.22654885441282235, "grad_norm": 0.19549600780010223, "learning_rate": 0.009059161873459327, "loss": 2.4642, "step": 2205 }, { "epoch": 0.22685708414671735, "grad_norm": 0.09038446098566055, "learning_rate": 0.009071487263763353, "loss": 2.5017, "step": 2208 }, { "epoch": 0.22716531388061234, "grad_norm": 0.07959726452827454, "learning_rate": 0.009083812654067379, "loss": 2.4934, "step": 2211 }, { "epoch": 0.22747354361450733, "grad_norm": 0.07991699874401093, "learning_rate": 0.009096138044371404, "loss": 2.498, "step": 2214 }, { "epoch": 0.22778177334840236, "grad_norm": 0.09022307395935059, "learning_rate": 0.009108463434675432, "loss": 2.4832, "step": 2217 }, { "epoch": 0.22809000308229735, "grad_norm": 0.11399543285369873, "learning_rate": 0.009120788824979457, "loss": 2.4929, "step": 2220 }, { "epoch": 0.22839823281619234, "grad_norm": 0.10349836200475693, "learning_rate": 0.009133114215283485, "loss": 2.4622, "step": 2223 }, { "epoch": 0.22870646255008734, "grad_norm": 0.17096632719039917, "learning_rate": 0.00914543960558751, "loss": 2.5103, "step": 2226 }, { "epoch": 0.22901469228398233, "grad_norm": 0.13803228735923767, "learning_rate": 0.009157764995891536, "loss": 2.5034, "step": 2229 }, { "epoch": 0.22932292201787732, "grad_norm": 0.16332487761974335, "learning_rate": 0.009170090386195564, "loss": 2.5051, "step": 2232 }, { "epoch": 0.22963115175177232, "grad_norm": 0.12147244811058044, "learning_rate": 0.00918241577649959, "loss": 2.4794, "step": 2235 }, { "epoch": 0.2299393814856673, "grad_norm": 0.08943907916545868, "learning_rate": 0.009194741166803615, "loss": 2.5331, "step": 2238 }, { "epoch": 0.2302476112195623, "grad_norm": 0.08069117367267609, "learning_rate": 0.009207066557107641, "loss": 2.4807, "step": 2241 }, { "epoch": 0.2305558409534573, "grad_norm": 0.11125557869672775, "learning_rate": 0.009219391947411669, "loss": 2.4567, "step": 2244 }, { "epoch": 0.23086407068735232, "grad_norm": 0.2825096547603607, "learning_rate": 0.009231717337715694, "loss": 2.5101, "step": 2247 }, { "epoch": 0.2311723004212473, "grad_norm": 0.10534384101629257, "learning_rate": 0.00924404272801972, "loss": 2.5272, "step": 2250 }, { "epoch": 0.2314805301551423, "grad_norm": 0.07159514725208282, "learning_rate": 0.009256368118323748, "loss": 2.4707, "step": 2253 }, { "epoch": 0.2317887598890373, "grad_norm": 0.06435802578926086, "learning_rate": 0.009268693508627773, "loss": 2.4788, "step": 2256 }, { "epoch": 0.2320969896229323, "grad_norm": 0.09402693063020706, "learning_rate": 0.0092810188989318, "loss": 2.4639, "step": 2259 }, { "epoch": 0.2324052193568273, "grad_norm": 0.18836408853530884, "learning_rate": 0.009293344289235825, "loss": 2.4747, "step": 2262 }, { "epoch": 0.23271344909072228, "grad_norm": 0.09705471992492676, "learning_rate": 0.009305669679539852, "loss": 2.5041, "step": 2265 }, { "epoch": 0.23302167882461727, "grad_norm": 0.09185091406106949, "learning_rate": 0.009317995069843878, "loss": 2.4625, "step": 2268 }, { "epoch": 0.23332990855851227, "grad_norm": 0.0848812386393547, "learning_rate": 0.009330320460147905, "loss": 2.4876, "step": 2271 }, { "epoch": 0.2336381382924073, "grad_norm": 0.07989475131034851, "learning_rate": 0.009342645850451931, "loss": 2.4697, "step": 2274 }, { "epoch": 0.23394636802630228, "grad_norm": 0.09660454094409943, "learning_rate": 0.009354971240755957, "loss": 2.4917, "step": 2277 }, { "epoch": 0.23425459776019728, "grad_norm": 0.09550273418426514, "learning_rate": 0.009367296631059984, "loss": 2.4806, "step": 2280 }, { "epoch": 0.23456282749409227, "grad_norm": 0.16650651395320892, "learning_rate": 0.00937962202136401, "loss": 2.4424, "step": 2283 }, { "epoch": 0.23487105722798726, "grad_norm": 0.1455817073583603, "learning_rate": 0.009391947411668036, "loss": 2.4907, "step": 2286 }, { "epoch": 0.23517928696188226, "grad_norm": 0.075865738093853, "learning_rate": 0.009404272801972062, "loss": 2.5004, "step": 2289 }, { "epoch": 0.23548751669577725, "grad_norm": 0.188491553068161, "learning_rate": 0.009416598192276089, "loss": 2.5111, "step": 2292 }, { "epoch": 0.23579574642967224, "grad_norm": 0.07567702233791351, "learning_rate": 0.009428923582580115, "loss": 2.4966, "step": 2295 }, { "epoch": 0.23610397616356724, "grad_norm": 0.0682358667254448, "learning_rate": 0.00944124897288414, "loss": 2.4781, "step": 2298 }, { "epoch": 0.23641220589746223, "grad_norm": 0.173895925283432, "learning_rate": 0.009453574363188168, "loss": 2.4471, "step": 2301 }, { "epoch": 0.23672043563135725, "grad_norm": 0.15088587999343872, "learning_rate": 0.009465899753492194, "loss": 2.4783, "step": 2304 }, { "epoch": 0.23702866536525224, "grad_norm": 0.09947361797094345, "learning_rate": 0.009478225143796221, "loss": 2.4757, "step": 2307 }, { "epoch": 0.23733689509914724, "grad_norm": 0.0709480568766594, "learning_rate": 0.009490550534100247, "loss": 2.4617, "step": 2310 }, { "epoch": 0.23764512483304223, "grad_norm": 0.11335324496030807, "learning_rate": 0.009502875924404273, "loss": 2.4506, "step": 2313 }, { "epoch": 0.23795335456693723, "grad_norm": 0.10329569876194, "learning_rate": 0.0095152013147083, "loss": 2.4444, "step": 2316 }, { "epoch": 0.23826158430083222, "grad_norm": 0.18935157358646393, "learning_rate": 0.009527526705012326, "loss": 2.4739, "step": 2319 }, { "epoch": 0.2385698140347272, "grad_norm": 0.10977230221033096, "learning_rate": 0.009539852095316352, "loss": 2.4849, "step": 2322 }, { "epoch": 0.2388780437686222, "grad_norm": 0.1623351126909256, "learning_rate": 0.009552177485620377, "loss": 2.4856, "step": 2325 }, { "epoch": 0.2391862735025172, "grad_norm": 0.12067209929227829, "learning_rate": 0.009564502875924405, "loss": 2.427, "step": 2328 }, { "epoch": 0.2394945032364122, "grad_norm": 0.12578649818897247, "learning_rate": 0.00957682826622843, "loss": 2.4719, "step": 2331 }, { "epoch": 0.23980273297030721, "grad_norm": 0.09442924708127975, "learning_rate": 0.009589153656532456, "loss": 2.475, "step": 2334 }, { "epoch": 0.2401109627042022, "grad_norm": 0.06693053990602493, "learning_rate": 0.009601479046836484, "loss": 2.4949, "step": 2337 }, { "epoch": 0.2404191924380972, "grad_norm": 0.09371168911457062, "learning_rate": 0.00961380443714051, "loss": 2.4611, "step": 2340 }, { "epoch": 0.2407274221719922, "grad_norm": 0.11009377986192703, "learning_rate": 0.009626129827444537, "loss": 2.4998, "step": 2343 }, { "epoch": 0.2410356519058872, "grad_norm": 0.08789053559303284, "learning_rate": 0.009638455217748561, "loss": 2.4891, "step": 2346 }, { "epoch": 0.24134388163978218, "grad_norm": 0.2513992488384247, "learning_rate": 0.009650780608052589, "loss": 2.4613, "step": 2349 }, { "epoch": 0.24165211137367718, "grad_norm": 0.09223336726427078, "learning_rate": 0.009663105998356614, "loss": 2.4874, "step": 2352 }, { "epoch": 0.24196034110757217, "grad_norm": 0.08941586315631866, "learning_rate": 0.009675431388660642, "loss": 2.4777, "step": 2355 }, { "epoch": 0.24226857084146716, "grad_norm": 0.09664765000343323, "learning_rate": 0.009687756778964668, "loss": 2.4728, "step": 2358 }, { "epoch": 0.24257680057536218, "grad_norm": 0.08079587668180466, "learning_rate": 0.009700082169268693, "loss": 2.4621, "step": 2361 }, { "epoch": 0.24288503030925718, "grad_norm": 0.07663597911596298, "learning_rate": 0.00971240755957272, "loss": 2.487, "step": 2364 }, { "epoch": 0.24319326004315217, "grad_norm": 0.07564109563827515, "learning_rate": 0.009724732949876747, "loss": 2.4123, "step": 2367 }, { "epoch": 0.24350148977704716, "grad_norm": 0.1025756299495697, "learning_rate": 0.009737058340180772, "loss": 2.4669, "step": 2370 }, { "epoch": 0.24380971951094216, "grad_norm": 0.1370251476764679, "learning_rate": 0.009749383730484798, "loss": 2.4664, "step": 2373 }, { "epoch": 0.24411794924483715, "grad_norm": 0.11926325410604477, "learning_rate": 0.009761709120788825, "loss": 2.4483, "step": 2376 }, { "epoch": 0.24442617897873214, "grad_norm": 0.16847510635852814, "learning_rate": 0.009774034511092851, "loss": 2.4421, "step": 2379 }, { "epoch": 0.24473440871262714, "grad_norm": 0.14343461394309998, "learning_rate": 0.009786359901396877, "loss": 2.452, "step": 2382 }, { "epoch": 0.24504263844652213, "grad_norm": 0.0658588707447052, "learning_rate": 0.009798685291700904, "loss": 2.4717, "step": 2385 }, { "epoch": 0.24535086818041713, "grad_norm": 0.09394209086894989, "learning_rate": 0.00981101068200493, "loss": 2.4467, "step": 2388 }, { "epoch": 0.24565909791431215, "grad_norm": 0.0717134177684784, "learning_rate": 0.009823336072308958, "loss": 2.4505, "step": 2391 }, { "epoch": 0.24596732764820714, "grad_norm": 0.07518400996923447, "learning_rate": 0.009835661462612983, "loss": 2.431, "step": 2394 }, { "epoch": 0.24627555738210213, "grad_norm": 0.10242413729429245, "learning_rate": 0.00984798685291701, "loss": 2.451, "step": 2397 }, { "epoch": 0.24658378711599713, "grad_norm": 0.11668457090854645, "learning_rate": 0.009860312243221035, "loss": 2.4574, "step": 2400 }, { "epoch": 0.24689201684989212, "grad_norm": 0.1074887290596962, "learning_rate": 0.009872637633525062, "loss": 2.4688, "step": 2403 }, { "epoch": 0.24720024658378711, "grad_norm": 0.143118217587471, "learning_rate": 0.009884963023829088, "loss": 2.4614, "step": 2406 }, { "epoch": 0.2475084763176821, "grad_norm": 0.08865509182214737, "learning_rate": 0.009897288414133114, "loss": 2.4768, "step": 2409 }, { "epoch": 0.2478167060515771, "grad_norm": 0.10735021531581879, "learning_rate": 0.009909613804437141, "loss": 2.457, "step": 2412 }, { "epoch": 0.2481249357854721, "grad_norm": 0.11766096949577332, "learning_rate": 0.009921939194741167, "loss": 2.4661, "step": 2415 }, { "epoch": 0.2484331655193671, "grad_norm": 0.11476657539606094, "learning_rate": 0.009934264585045193, "loss": 2.4488, "step": 2418 }, { "epoch": 0.2487413952532621, "grad_norm": 0.05828983336687088, "learning_rate": 0.00994658997534922, "loss": 2.4167, "step": 2421 }, { "epoch": 0.2490496249871571, "grad_norm": 0.05311143398284912, "learning_rate": 0.009958915365653246, "loss": 2.451, "step": 2424 }, { "epoch": 0.2493578547210521, "grad_norm": 0.14447921514511108, "learning_rate": 0.009971240755957273, "loss": 2.4448, "step": 2427 }, { "epoch": 0.2496660844549471, "grad_norm": 0.178679421544075, "learning_rate": 0.009983566146261297, "loss": 2.4577, "step": 2430 }, { "epoch": 0.24997431418884208, "grad_norm": 0.18707922101020813, "learning_rate": 0.009995891536565325, "loss": 2.4544, "step": 2433 }, { "epoch": 0.2502825439227371, "grad_norm": 0.11012792587280273, "learning_rate": 0.01, "loss": 2.4636, "step": 2436 }, { "epoch": 0.25059077365663207, "grad_norm": 0.1133418157696724, "learning_rate": 0.01, "loss": 2.4694, "step": 2439 }, { "epoch": 0.25089900339052706, "grad_norm": 0.09263787418603897, "learning_rate": 0.01, "loss": 2.4174, "step": 2442 }, { "epoch": 0.25120723312442206, "grad_norm": 0.07637537270784378, "learning_rate": 0.01, "loss": 2.4546, "step": 2445 }, { "epoch": 0.25151546285831705, "grad_norm": 0.05083318054676056, "learning_rate": 0.01, "loss": 2.4517, "step": 2448 }, { "epoch": 0.25182369259221205, "grad_norm": 0.11429949849843979, "learning_rate": 0.01, "loss": 2.3998, "step": 2451 }, { "epoch": 0.25213192232610704, "grad_norm": 0.0740060955286026, "learning_rate": 0.01, "loss": 2.4572, "step": 2454 }, { "epoch": 0.25244015206000203, "grad_norm": 0.23151956498622894, "learning_rate": 0.01, "loss": 2.4507, "step": 2457 }, { "epoch": 0.252748381793897, "grad_norm": 0.09557089954614639, "learning_rate": 0.01, "loss": 2.438, "step": 2460 }, { "epoch": 0.2530566115277921, "grad_norm": 0.06453042477369308, "learning_rate": 0.01, "loss": 2.4444, "step": 2463 }, { "epoch": 0.25336484126168707, "grad_norm": 0.06805883347988129, "learning_rate": 0.01, "loss": 2.4333, "step": 2466 }, { "epoch": 0.25367307099558206, "grad_norm": 0.12063002586364746, "learning_rate": 0.01, "loss": 2.4349, "step": 2469 }, { "epoch": 0.25398130072947706, "grad_norm": 0.12683679163455963, "learning_rate": 0.01, "loss": 2.4615, "step": 2472 }, { "epoch": 0.25428953046337205, "grad_norm": 0.19388514757156372, "learning_rate": 0.01, "loss": 2.4251, "step": 2475 }, { "epoch": 0.25459776019726704, "grad_norm": 0.15118692815303802, "learning_rate": 0.01, "loss": 2.4493, "step": 2478 }, { "epoch": 0.25490598993116204, "grad_norm": 0.0716528594493866, "learning_rate": 0.01, "loss": 2.4177, "step": 2481 }, { "epoch": 0.25521421966505703, "grad_norm": 0.06410454958677292, "learning_rate": 0.01, "loss": 2.4472, "step": 2484 }, { "epoch": 0.255522449398952, "grad_norm": 0.0613977424800396, "learning_rate": 0.01, "loss": 2.4374, "step": 2487 }, { "epoch": 0.255830679132847, "grad_norm": 0.18522503972053528, "learning_rate": 0.01, "loss": 2.4237, "step": 2490 }, { "epoch": 0.256138908866742, "grad_norm": 0.10789433866739273, "learning_rate": 0.01, "loss": 2.4035, "step": 2493 }, { "epoch": 0.256447138600637, "grad_norm": 0.17734338343143463, "learning_rate": 0.01, "loss": 2.4809, "step": 2496 }, { "epoch": 0.256755368334532, "grad_norm": 0.09952409565448761, "learning_rate": 0.01, "loss": 2.4586, "step": 2499 }, { "epoch": 0.257063598068427, "grad_norm": 0.15578734874725342, "learning_rate": 0.01, "loss": 2.4451, "step": 2502 }, { "epoch": 0.257371827802322, "grad_norm": 0.11684698611497879, "learning_rate": 0.01, "loss": 2.4422, "step": 2505 }, { "epoch": 0.257680057536217, "grad_norm": 0.06539366394281387, "learning_rate": 0.01, "loss": 2.4369, "step": 2508 }, { "epoch": 0.25798828727011197, "grad_norm": 0.15363268554210663, "learning_rate": 0.01, "loss": 2.4307, "step": 2511 }, { "epoch": 0.25829651700400696, "grad_norm": 0.07657501846551895, "learning_rate": 0.01, "loss": 2.4287, "step": 2514 }, { "epoch": 0.25860474673790196, "grad_norm": 0.11238528788089752, "learning_rate": 0.01, "loss": 2.415, "step": 2517 }, { "epoch": 0.25891297647179695, "grad_norm": 0.08362044394016266, "learning_rate": 0.01, "loss": 2.4603, "step": 2520 }, { "epoch": 0.259221206205692, "grad_norm": 0.07373514771461487, "learning_rate": 0.01, "loss": 2.3937, "step": 2523 }, { "epoch": 0.259529435939587, "grad_norm": 0.062842458486557, "learning_rate": 0.01, "loss": 2.4096, "step": 2526 }, { "epoch": 0.259837665673482, "grad_norm": 0.12551096081733704, "learning_rate": 0.01, "loss": 2.4379, "step": 2529 }, { "epoch": 0.260145895407377, "grad_norm": 0.06409156322479248, "learning_rate": 0.01, "loss": 2.4212, "step": 2532 }, { "epoch": 0.260454125141272, "grad_norm": 0.10057753324508667, "learning_rate": 0.01, "loss": 2.4349, "step": 2535 }, { "epoch": 0.26076235487516697, "grad_norm": 0.1575561910867691, "learning_rate": 0.01, "loss": 2.44, "step": 2538 }, { "epoch": 0.26107058460906196, "grad_norm": 0.25684165954589844, "learning_rate": 0.01, "loss": 2.4308, "step": 2541 }, { "epoch": 0.26137881434295696, "grad_norm": 0.07472192496061325, "learning_rate": 0.01, "loss": 2.4065, "step": 2544 }, { "epoch": 0.26168704407685195, "grad_norm": 0.060896482318639755, "learning_rate": 0.01, "loss": 2.4347, "step": 2547 }, { "epoch": 0.26199527381074694, "grad_norm": 0.12883131206035614, "learning_rate": 0.01, "loss": 2.42, "step": 2550 }, { "epoch": 0.26230350354464194, "grad_norm": 0.10772990435361862, "learning_rate": 0.01, "loss": 2.3982, "step": 2553 }, { "epoch": 0.26261173327853693, "grad_norm": 0.20955395698547363, "learning_rate": 0.01, "loss": 2.4204, "step": 2556 }, { "epoch": 0.2629199630124319, "grad_norm": 0.08120223879814148, "learning_rate": 0.01, "loss": 2.4192, "step": 2559 }, { "epoch": 0.2632281927463269, "grad_norm": 0.059099119156599045, "learning_rate": 0.01, "loss": 2.4252, "step": 2562 }, { "epoch": 0.2635364224802219, "grad_norm": 0.08729352802038193, "learning_rate": 0.01, "loss": 2.4227, "step": 2565 }, { "epoch": 0.2638446522141169, "grad_norm": 0.1920178234577179, "learning_rate": 0.01, "loss": 2.4017, "step": 2568 }, { "epoch": 0.2641528819480119, "grad_norm": 0.15997105836868286, "learning_rate": 0.01, "loss": 2.444, "step": 2571 }, { "epoch": 0.2644611116819069, "grad_norm": 0.12249890714883804, "learning_rate": 0.01, "loss": 2.3957, "step": 2574 }, { "epoch": 0.2647693414158019, "grad_norm": 0.05974414199590683, "learning_rate": 0.01, "loss": 2.421, "step": 2577 }, { "epoch": 0.26507757114969693, "grad_norm": 0.13711535930633545, "learning_rate": 0.01, "loss": 2.4234, "step": 2580 }, { "epoch": 0.2653858008835919, "grad_norm": 0.15437988936901093, "learning_rate": 0.01, "loss": 2.4216, "step": 2583 }, { "epoch": 0.2656940306174869, "grad_norm": 0.10766157507896423, "learning_rate": 0.01, "loss": 2.4086, "step": 2586 }, { "epoch": 0.2660022603513819, "grad_norm": 0.0736764669418335, "learning_rate": 0.01, "loss": 2.4227, "step": 2589 }, { "epoch": 0.2663104900852769, "grad_norm": 0.06279190629720688, "learning_rate": 0.01, "loss": 2.4371, "step": 2592 }, { "epoch": 0.2666187198191719, "grad_norm": 0.11150863766670227, "learning_rate": 0.01, "loss": 2.3941, "step": 2595 }, { "epoch": 0.2669269495530669, "grad_norm": 0.1527506411075592, "learning_rate": 0.01, "loss": 2.4287, "step": 2598 }, { "epoch": 0.2672351792869619, "grad_norm": 0.13321219384670258, "learning_rate": 0.01, "loss": 2.3995, "step": 2601 }, { "epoch": 0.2675434090208569, "grad_norm": 0.1157502606511116, "learning_rate": 0.01, "loss": 2.4284, "step": 2604 }, { "epoch": 0.2678516387547519, "grad_norm": 0.10027257353067398, "learning_rate": 0.01, "loss": 2.3877, "step": 2607 }, { "epoch": 0.26815986848864687, "grad_norm": 0.10909545421600342, "learning_rate": 0.01, "loss": 2.4134, "step": 2610 }, { "epoch": 0.26846809822254186, "grad_norm": 0.09810952842235565, "learning_rate": 0.01, "loss": 2.4231, "step": 2613 }, { "epoch": 0.26877632795643686, "grad_norm": 0.06906435638666153, "learning_rate": 0.01, "loss": 2.3989, "step": 2616 }, { "epoch": 0.26908455769033185, "grad_norm": 0.10627961158752441, "learning_rate": 0.01, "loss": 2.4333, "step": 2619 }, { "epoch": 0.26939278742422684, "grad_norm": 0.10462147742509842, "learning_rate": 0.01, "loss": 2.394, "step": 2622 }, { "epoch": 0.26970101715812184, "grad_norm": 0.10885953158140182, "learning_rate": 0.01, "loss": 2.4172, "step": 2625 }, { "epoch": 0.27000924689201683, "grad_norm": 0.0981958881020546, "learning_rate": 0.01, "loss": 2.4112, "step": 2628 }, { "epoch": 0.2703174766259118, "grad_norm": 0.14177650213241577, "learning_rate": 0.01, "loss": 2.3748, "step": 2631 }, { "epoch": 0.2706257063598068, "grad_norm": 0.06374615430831909, "learning_rate": 0.01, "loss": 2.4054, "step": 2634 }, { "epoch": 0.27093393609370187, "grad_norm": 0.23363849520683289, "learning_rate": 0.01, "loss": 2.4194, "step": 2637 }, { "epoch": 0.27124216582759686, "grad_norm": 0.10294153541326523, "learning_rate": 0.01, "loss": 2.384, "step": 2640 }, { "epoch": 0.27155039556149185, "grad_norm": 0.17200984060764313, "learning_rate": 0.01, "loss": 2.4122, "step": 2643 }, { "epoch": 0.27185862529538685, "grad_norm": 0.06513970345258713, "learning_rate": 0.01, "loss": 2.4152, "step": 2646 }, { "epoch": 0.27216685502928184, "grad_norm": 0.08533628284931183, "learning_rate": 0.01, "loss": 2.4508, "step": 2649 }, { "epoch": 0.27247508476317683, "grad_norm": 0.07299966365098953, "learning_rate": 0.01, "loss": 2.4091, "step": 2652 }, { "epoch": 0.2727833144970718, "grad_norm": 0.06617329269647598, "learning_rate": 0.01, "loss": 2.4, "step": 2655 }, { "epoch": 0.2730915442309668, "grad_norm": 0.07062381505966187, "learning_rate": 0.01, "loss": 2.4002, "step": 2658 }, { "epoch": 0.2733997739648618, "grad_norm": 0.11162712424993515, "learning_rate": 0.01, "loss": 2.386, "step": 2661 }, { "epoch": 0.2737080036987568, "grad_norm": 0.07827174663543701, "learning_rate": 0.01, "loss": 2.4111, "step": 2664 }, { "epoch": 0.2740162334326518, "grad_norm": 0.07248109579086304, "learning_rate": 0.01, "loss": 2.3968, "step": 2667 }, { "epoch": 0.2743244631665468, "grad_norm": 0.1251075118780136, "learning_rate": 0.01, "loss": 2.4216, "step": 2670 }, { "epoch": 0.2746326929004418, "grad_norm": 0.1280512660741806, "learning_rate": 0.01, "loss": 2.4233, "step": 2673 }, { "epoch": 0.2749409226343368, "grad_norm": 0.06290891766548157, "learning_rate": 0.01, "loss": 2.412, "step": 2676 }, { "epoch": 0.2752491523682318, "grad_norm": 0.09324091672897339, "learning_rate": 0.01, "loss": 2.4025, "step": 2679 }, { "epoch": 0.27555738210212677, "grad_norm": 0.06253890693187714, "learning_rate": 0.01, "loss": 2.4197, "step": 2682 }, { "epoch": 0.27586561183602176, "grad_norm": 0.10279545187950134, "learning_rate": 0.01, "loss": 2.4099, "step": 2685 }, { "epoch": 0.27617384156991676, "grad_norm": 0.07942310720682144, "learning_rate": 0.01, "loss": 2.4052, "step": 2688 }, { "epoch": 0.27648207130381175, "grad_norm": 0.10373161733150482, "learning_rate": 0.01, "loss": 2.3899, "step": 2691 }, { "epoch": 0.2767903010377068, "grad_norm": 0.312575101852417, "learning_rate": 0.01, "loss": 2.4243, "step": 2694 }, { "epoch": 0.2770985307716018, "grad_norm": 0.07417728751897812, "learning_rate": 0.01, "loss": 2.3604, "step": 2697 }, { "epoch": 0.2774067605054968, "grad_norm": 0.09007294476032257, "learning_rate": 0.01, "loss": 2.3863, "step": 2700 }, { "epoch": 0.2777149902393918, "grad_norm": 0.10452757775783539, "learning_rate": 0.01, "loss": 2.41, "step": 2703 }, { "epoch": 0.2780232199732868, "grad_norm": 0.09276364743709564, "learning_rate": 0.01, "loss": 2.3878, "step": 2706 }, { "epoch": 0.27833144970718177, "grad_norm": 0.08949960023164749, "learning_rate": 0.01, "loss": 2.3823, "step": 2709 }, { "epoch": 0.27863967944107676, "grad_norm": 0.0589129813015461, "learning_rate": 0.01, "loss": 2.401, "step": 2712 }, { "epoch": 0.27894790917497175, "grad_norm": 0.08298425376415253, "learning_rate": 0.01, "loss": 2.4, "step": 2715 }, { "epoch": 0.27925613890886675, "grad_norm": 0.07719019800424576, "learning_rate": 0.01, "loss": 2.3726, "step": 2718 }, { "epoch": 0.27956436864276174, "grad_norm": 0.09369128197431564, "learning_rate": 0.01, "loss": 2.3893, "step": 2721 }, { "epoch": 0.27987259837665673, "grad_norm": 0.11461931467056274, "learning_rate": 0.01, "loss": 2.4017, "step": 2724 }, { "epoch": 0.2801808281105517, "grad_norm": 0.050078991800546646, "learning_rate": 0.01, "loss": 2.3852, "step": 2727 }, { "epoch": 0.2804890578444467, "grad_norm": 0.08188966661691666, "learning_rate": 0.01, "loss": 2.3469, "step": 2730 }, { "epoch": 0.2807972875783417, "grad_norm": 0.0805756077170372, "learning_rate": 0.01, "loss": 2.3632, "step": 2733 }, { "epoch": 0.2811055173122367, "grad_norm": 0.07377249747514725, "learning_rate": 0.01, "loss": 2.3852, "step": 2736 }, { "epoch": 0.2814137470461317, "grad_norm": 0.17040085792541504, "learning_rate": 0.01, "loss": 2.3904, "step": 2739 }, { "epoch": 0.2817219767800267, "grad_norm": 0.1419583261013031, "learning_rate": 0.01, "loss": 2.3735, "step": 2742 }, { "epoch": 0.2820302065139217, "grad_norm": 0.13182134926319122, "learning_rate": 0.01, "loss": 2.3904, "step": 2745 }, { "epoch": 0.2823384362478167, "grad_norm": 0.1058223620057106, "learning_rate": 0.01, "loss": 2.3922, "step": 2748 }, { "epoch": 0.2826466659817117, "grad_norm": 0.08037062734365463, "learning_rate": 0.01, "loss": 2.3692, "step": 2751 }, { "epoch": 0.2829548957156067, "grad_norm": 0.10247037559747696, "learning_rate": 0.01, "loss": 2.3712, "step": 2754 }, { "epoch": 0.2832631254495017, "grad_norm": 0.09925279021263123, "learning_rate": 0.01, "loss": 2.3632, "step": 2757 }, { "epoch": 0.2835713551833967, "grad_norm": 0.05111562833189964, "learning_rate": 0.01, "loss": 2.3622, "step": 2760 }, { "epoch": 0.2838795849172917, "grad_norm": 0.060480840504169464, "learning_rate": 0.01, "loss": 2.3592, "step": 2763 }, { "epoch": 0.2841878146511867, "grad_norm": 0.13488496840000153, "learning_rate": 0.01, "loss": 2.3822, "step": 2766 }, { "epoch": 0.2844960443850817, "grad_norm": 0.08369171619415283, "learning_rate": 0.01, "loss": 2.3922, "step": 2769 }, { "epoch": 0.2848042741189767, "grad_norm": 0.19474861025810242, "learning_rate": 0.01, "loss": 2.387, "step": 2772 }, { "epoch": 0.2851125038528717, "grad_norm": 0.17801512777805328, "learning_rate": 0.01, "loss": 2.3745, "step": 2775 }, { "epoch": 0.2854207335867667, "grad_norm": 0.0658038854598999, "learning_rate": 0.01, "loss": 2.3857, "step": 2778 }, { "epoch": 0.28572896332066167, "grad_norm": 0.0510118305683136, "learning_rate": 0.01, "loss": 2.3735, "step": 2781 }, { "epoch": 0.28603719305455666, "grad_norm": 0.0649714320898056, "learning_rate": 0.01, "loss": 2.4002, "step": 2784 }, { "epoch": 0.28634542278845165, "grad_norm": 0.11462211608886719, "learning_rate": 0.01, "loss": 2.3642, "step": 2787 }, { "epoch": 0.28665365252234665, "grad_norm": 0.0745900496840477, "learning_rate": 0.01, "loss": 2.4058, "step": 2790 }, { "epoch": 0.28696188225624164, "grad_norm": 0.2475040704011917, "learning_rate": 0.01, "loss": 2.3778, "step": 2793 }, { "epoch": 0.28727011199013663, "grad_norm": 0.08792129158973694, "learning_rate": 0.01, "loss": 2.3932, "step": 2796 }, { "epoch": 0.2875783417240316, "grad_norm": 0.04952983185648918, "learning_rate": 0.01, "loss": 2.3631, "step": 2799 }, { "epoch": 0.2878865714579266, "grad_norm": 0.053665559738874435, "learning_rate": 0.01, "loss": 2.3673, "step": 2802 }, { "epoch": 0.2881948011918216, "grad_norm": 0.0579262301325798, "learning_rate": 0.01, "loss": 2.3234, "step": 2805 }, { "epoch": 0.2885030309257166, "grad_norm": 0.13837358355522156, "learning_rate": 0.01, "loss": 2.3854, "step": 2808 }, { "epoch": 0.28881126065961166, "grad_norm": 0.09924750030040741, "learning_rate": 0.01, "loss": 2.3819, "step": 2811 }, { "epoch": 0.28911949039350665, "grad_norm": 0.14742402732372284, "learning_rate": 0.01, "loss": 2.3853, "step": 2814 }, { "epoch": 0.28942772012740164, "grad_norm": 0.11731177568435669, "learning_rate": 0.01, "loss": 2.4082, "step": 2817 }, { "epoch": 0.28973594986129664, "grad_norm": 0.16812686622142792, "learning_rate": 0.01, "loss": 2.3855, "step": 2820 }, { "epoch": 0.29004417959519163, "grad_norm": 0.06864415854215622, "learning_rate": 0.01, "loss": 2.3911, "step": 2823 }, { "epoch": 0.2903524093290866, "grad_norm": 0.050597239285707474, "learning_rate": 0.01, "loss": 2.3627, "step": 2826 }, { "epoch": 0.2906606390629816, "grad_norm": 0.06927742809057236, "learning_rate": 0.01, "loss": 2.3653, "step": 2829 }, { "epoch": 0.2909688687968766, "grad_norm": 0.049216922372579575, "learning_rate": 0.01, "loss": 2.3527, "step": 2832 }, { "epoch": 0.2912770985307716, "grad_norm": 0.06790090352296829, "learning_rate": 0.01, "loss": 2.4087, "step": 2835 }, { "epoch": 0.2915853282646666, "grad_norm": 0.14112398028373718, "learning_rate": 0.01, "loss": 2.3777, "step": 2838 }, { "epoch": 0.2918935579985616, "grad_norm": 0.07459170371294022, "learning_rate": 0.01, "loss": 2.3776, "step": 2841 }, { "epoch": 0.2922017877324566, "grad_norm": 0.05480146035552025, "learning_rate": 0.01, "loss": 2.3831, "step": 2844 }, { "epoch": 0.2925100174663516, "grad_norm": 0.11372058093547821, "learning_rate": 0.01, "loss": 2.3667, "step": 2847 }, { "epoch": 0.2928182472002466, "grad_norm": 0.05589181184768677, "learning_rate": 0.01, "loss": 2.3744, "step": 2850 }, { "epoch": 0.29312647693414157, "grad_norm": 0.10505107790231705, "learning_rate": 0.01, "loss": 2.3461, "step": 2853 }, { "epoch": 0.29343470666803656, "grad_norm": 0.06241190806031227, "learning_rate": 0.01, "loss": 2.3616, "step": 2856 }, { "epoch": 0.29374293640193155, "grad_norm": 0.2687353193759918, "learning_rate": 0.01, "loss": 2.3728, "step": 2859 }, { "epoch": 0.29405116613582655, "grad_norm": 0.13569511473178864, "learning_rate": 0.01, "loss": 2.3758, "step": 2862 }, { "epoch": 0.29435939586972154, "grad_norm": 0.08852502703666687, "learning_rate": 0.01, "loss": 2.3805, "step": 2865 }, { "epoch": 0.2946676256036166, "grad_norm": 0.0690246969461441, "learning_rate": 0.01, "loss": 2.3694, "step": 2868 }, { "epoch": 0.2949758553375116, "grad_norm": 0.13508114218711853, "learning_rate": 0.01, "loss": 2.3626, "step": 2871 }, { "epoch": 0.2952840850714066, "grad_norm": 0.06574945896863937, "learning_rate": 0.01, "loss": 2.3661, "step": 2874 }, { "epoch": 0.29559231480530157, "grad_norm": 0.08492054790258408, "learning_rate": 0.01, "loss": 2.3737, "step": 2877 }, { "epoch": 0.29590054453919656, "grad_norm": 0.11930177360773087, "learning_rate": 0.01, "loss": 2.3684, "step": 2880 }, { "epoch": 0.29620877427309156, "grad_norm": 0.06913982331752777, "learning_rate": 0.01, "loss": 2.3597, "step": 2883 }, { "epoch": 0.29651700400698655, "grad_norm": 0.1508978009223938, "learning_rate": 0.01, "loss": 2.3807, "step": 2886 }, { "epoch": 0.29682523374088154, "grad_norm": 0.059416841715574265, "learning_rate": 0.01, "loss": 2.3672, "step": 2889 }, { "epoch": 0.29713346347477654, "grad_norm": 0.07864934206008911, "learning_rate": 0.01, "loss": 2.3542, "step": 2892 }, { "epoch": 0.29744169320867153, "grad_norm": 0.15172207355499268, "learning_rate": 0.01, "loss": 2.3816, "step": 2895 }, { "epoch": 0.2977499229425665, "grad_norm": 0.08946362882852554, "learning_rate": 0.01, "loss": 2.3854, "step": 2898 }, { "epoch": 0.2980581526764615, "grad_norm": 0.06231836602091789, "learning_rate": 0.01, "loss": 2.3803, "step": 2901 }, { "epoch": 0.2983663824103565, "grad_norm": 0.06673764437437057, "learning_rate": 0.01, "loss": 2.3506, "step": 2904 }, { "epoch": 0.2986746121442515, "grad_norm": 0.11514609307050705, "learning_rate": 0.01, "loss": 2.3345, "step": 2907 }, { "epoch": 0.2989828418781465, "grad_norm": 0.05702753737568855, "learning_rate": 0.01, "loss": 2.353, "step": 2910 }, { "epoch": 0.2992910716120415, "grad_norm": 0.09202984720468521, "learning_rate": 0.01, "loss": 2.3978, "step": 2913 }, { "epoch": 0.2995993013459365, "grad_norm": 0.09088042378425598, "learning_rate": 0.01, "loss": 2.3508, "step": 2916 }, { "epoch": 0.2999075310798315, "grad_norm": 0.09106214344501495, "learning_rate": 0.01, "loss": 2.3695, "step": 2919 }, { "epoch": 0.3002157608137265, "grad_norm": 0.12793834507465363, "learning_rate": 0.01, "loss": 2.3585, "step": 2922 }, { "epoch": 0.30052399054762147, "grad_norm": 0.16437458992004395, "learning_rate": 0.01, "loss": 2.3708, "step": 2925 }, { "epoch": 0.3008322202815165, "grad_norm": 0.10168170928955078, "learning_rate": 0.01, "loss": 2.3839, "step": 2928 }, { "epoch": 0.3011404500154115, "grad_norm": 0.12716282904148102, "learning_rate": 0.01, "loss": 2.3653, "step": 2931 }, { "epoch": 0.3014486797493065, "grad_norm": 0.05094976723194122, "learning_rate": 0.01, "loss": 2.3315, "step": 2934 }, { "epoch": 0.3017569094832015, "grad_norm": 0.11750750988721848, "learning_rate": 0.01, "loss": 2.3544, "step": 2937 }, { "epoch": 0.3020651392170965, "grad_norm": 0.0688977912068367, "learning_rate": 0.01, "loss": 2.3485, "step": 2940 }, { "epoch": 0.3023733689509915, "grad_norm": 0.09537909924983978, "learning_rate": 0.01, "loss": 2.3357, "step": 2943 }, { "epoch": 0.3026815986848865, "grad_norm": 0.15028056502342224, "learning_rate": 0.01, "loss": 2.3029, "step": 2946 }, { "epoch": 0.30298982841878147, "grad_norm": 0.2069140523672104, "learning_rate": 0.01, "loss": 2.3658, "step": 2949 }, { "epoch": 0.30329805815267646, "grad_norm": 0.04774792492389679, "learning_rate": 0.01, "loss": 2.3488, "step": 2952 }, { "epoch": 0.30360628788657146, "grad_norm": 0.04033259302377701, "learning_rate": 0.01, "loss": 2.3536, "step": 2955 }, { "epoch": 0.30391451762046645, "grad_norm": 0.04587483033537865, "learning_rate": 0.01, "loss": 2.3377, "step": 2958 }, { "epoch": 0.30422274735436144, "grad_norm": 0.08392881602048874, "learning_rate": 0.01, "loss": 2.3323, "step": 2961 }, { "epoch": 0.30453097708825644, "grad_norm": 0.16665025055408478, "learning_rate": 0.01, "loss": 2.3763, "step": 2964 }, { "epoch": 0.30483920682215143, "grad_norm": 0.19268077611923218, "learning_rate": 0.01, "loss": 2.3545, "step": 2967 }, { "epoch": 0.3051474365560464, "grad_norm": 0.14428319036960602, "learning_rate": 0.01, "loss": 2.3481, "step": 2970 }, { "epoch": 0.3054556662899414, "grad_norm": 0.08958342671394348, "learning_rate": 0.01, "loss": 2.3704, "step": 2973 }, { "epoch": 0.3057638960238364, "grad_norm": 0.06964152306318283, "learning_rate": 0.01, "loss": 2.3649, "step": 2976 }, { "epoch": 0.3060721257577314, "grad_norm": 0.1336866170167923, "learning_rate": 0.01, "loss": 2.3426, "step": 2979 }, { "epoch": 0.3063803554916264, "grad_norm": 0.06913724541664124, "learning_rate": 0.01, "loss": 2.363, "step": 2982 }, { "epoch": 0.30668858522552145, "grad_norm": 0.0705854743719101, "learning_rate": 0.01, "loss": 2.327, "step": 2985 }, { "epoch": 0.30699681495941644, "grad_norm": 0.06596222519874573, "learning_rate": 0.01, "loss": 2.3669, "step": 2988 }, { "epoch": 0.30730504469331144, "grad_norm": 0.12716993689537048, "learning_rate": 0.01, "loss": 2.3395, "step": 2991 }, { "epoch": 0.30761327442720643, "grad_norm": 0.09933049976825714, "learning_rate": 0.01, "loss": 2.3532, "step": 2994 }, { "epoch": 0.3079215041611014, "grad_norm": 0.19280697405338287, "learning_rate": 0.01, "loss": 2.3513, "step": 2997 }, { "epoch": 0.3082297338949964, "grad_norm": 0.08448618650436401, "learning_rate": 0.01, "loss": 2.3828, "step": 3000 }, { "epoch": 0.3085379636288914, "grad_norm": 0.14882349967956543, "learning_rate": 0.01, "loss": 2.3398, "step": 3003 }, { "epoch": 0.3088461933627864, "grad_norm": 0.08360068500041962, "learning_rate": 0.01, "loss": 2.3414, "step": 3006 }, { "epoch": 0.3091544230966814, "grad_norm": 0.1378074288368225, "learning_rate": 0.01, "loss": 2.3461, "step": 3009 }, { "epoch": 0.3094626528305764, "grad_norm": 0.13160692155361176, "learning_rate": 0.01, "loss": 2.3517, "step": 3012 }, { "epoch": 0.3097708825644714, "grad_norm": 0.0702040046453476, "learning_rate": 0.01, "loss": 2.3524, "step": 3015 }, { "epoch": 0.3100791122983664, "grad_norm": 0.06959223002195358, "learning_rate": 0.01, "loss": 2.3398, "step": 3018 }, { "epoch": 0.31038734203226137, "grad_norm": 0.10830830782651901, "learning_rate": 0.01, "loss": 2.3437, "step": 3021 }, { "epoch": 0.31069557176615636, "grad_norm": 0.09298605471849442, "learning_rate": 0.01, "loss": 2.3473, "step": 3024 }, { "epoch": 0.31100380150005136, "grad_norm": 0.06620427966117859, "learning_rate": 0.01, "loss": 2.3341, "step": 3027 }, { "epoch": 0.31131203123394635, "grad_norm": 0.21722812950611115, "learning_rate": 0.01, "loss": 2.3281, "step": 3030 }, { "epoch": 0.31162026096784134, "grad_norm": 0.1475544422864914, "learning_rate": 0.01, "loss": 2.3383, "step": 3033 }, { "epoch": 0.31192849070173634, "grad_norm": 0.13449987769126892, "learning_rate": 0.01, "loss": 2.314, "step": 3036 }, { "epoch": 0.31223672043563133, "grad_norm": 0.06219559907913208, "learning_rate": 0.01, "loss": 2.3526, "step": 3039 }, { "epoch": 0.3125449501695264, "grad_norm": 0.05337538942694664, "learning_rate": 0.01, "loss": 2.3386, "step": 3042 }, { "epoch": 0.3128531799034214, "grad_norm": 0.11457488685846329, "learning_rate": 0.01, "loss": 2.3261, "step": 3045 }, { "epoch": 0.31316140963731637, "grad_norm": 0.23809069395065308, "learning_rate": 0.01, "loss": 2.3411, "step": 3048 }, { "epoch": 0.31346963937121136, "grad_norm": 0.11100046336650848, "learning_rate": 0.01, "loss": 2.3269, "step": 3051 }, { "epoch": 0.31377786910510636, "grad_norm": 0.05229029804468155, "learning_rate": 0.01, "loss": 2.3339, "step": 3054 }, { "epoch": 0.31408609883900135, "grad_norm": 0.05956039950251579, "learning_rate": 0.01, "loss": 2.3566, "step": 3057 }, { "epoch": 0.31439432857289634, "grad_norm": 0.13084881007671356, "learning_rate": 0.01, "loss": 2.332, "step": 3060 }, { "epoch": 0.31470255830679134, "grad_norm": 0.13889305293560028, "learning_rate": 0.01, "loss": 2.3118, "step": 3063 }, { "epoch": 0.31501078804068633, "grad_norm": 0.10443049669265747, "learning_rate": 0.01, "loss": 2.3246, "step": 3066 }, { "epoch": 0.3153190177745813, "grad_norm": 0.04321267828345299, "learning_rate": 0.01, "loss": 2.3353, "step": 3069 }, { "epoch": 0.3156272475084763, "grad_norm": 0.046873319894075394, "learning_rate": 0.01, "loss": 2.3144, "step": 3072 }, { "epoch": 0.3159354772423713, "grad_norm": 0.06548158824443817, "learning_rate": 0.01, "loss": 2.3285, "step": 3075 }, { "epoch": 0.3162437069762663, "grad_norm": 0.19105824828147888, "learning_rate": 0.01, "loss": 2.349, "step": 3078 }, { "epoch": 0.3165519367101613, "grad_norm": 0.14477142691612244, "learning_rate": 0.01, "loss": 2.3074, "step": 3081 }, { "epoch": 0.3168601664440563, "grad_norm": 0.08536936342716217, "learning_rate": 0.01, "loss": 2.3462, "step": 3084 }, { "epoch": 0.3171683961779513, "grad_norm": 0.0595535933971405, "learning_rate": 0.01, "loss": 2.3522, "step": 3087 }, { "epoch": 0.3174766259118463, "grad_norm": 0.058548733592033386, "learning_rate": 0.01, "loss": 2.3314, "step": 3090 }, { "epoch": 0.31778485564574127, "grad_norm": 0.04651311784982681, "learning_rate": 0.01, "loss": 2.3058, "step": 3093 }, { "epoch": 0.31809308537963626, "grad_norm": 0.0516805462539196, "learning_rate": 0.01, "loss": 2.3243, "step": 3096 }, { "epoch": 0.31840131511353126, "grad_norm": 0.16851970553398132, "learning_rate": 0.01, "loss": 2.3315, "step": 3099 }, { "epoch": 0.3187095448474263, "grad_norm": 0.08350600302219391, "learning_rate": 0.01, "loss": 2.315, "step": 3102 }, { "epoch": 0.3190177745813213, "grad_norm": 0.08899964392185211, "learning_rate": 0.01, "loss": 2.3218, "step": 3105 }, { "epoch": 0.3193260043152163, "grad_norm": 0.2424800843000412, "learning_rate": 0.01, "loss": 2.3207, "step": 3108 }, { "epoch": 0.3196342340491113, "grad_norm": 0.213782399892807, "learning_rate": 0.01, "loss": 2.3728, "step": 3111 }, { "epoch": 0.3199424637830063, "grad_norm": 0.15629780292510986, "learning_rate": 0.01, "loss": 2.3453, "step": 3114 }, { "epoch": 0.3202506935169013, "grad_norm": 0.06920924782752991, "learning_rate": 0.01, "loss": 2.3111, "step": 3117 }, { "epoch": 0.32055892325079627, "grad_norm": 0.04514181613922119, "learning_rate": 0.01, "loss": 2.33, "step": 3120 }, { "epoch": 0.32086715298469126, "grad_norm": 0.05500979721546173, "learning_rate": 0.01, "loss": 2.3078, "step": 3123 }, { "epoch": 0.32117538271858626, "grad_norm": 0.09148071706295013, "learning_rate": 0.01, "loss": 2.3457, "step": 3126 }, { "epoch": 0.32148361245248125, "grad_norm": 0.10582035779953003, "learning_rate": 0.01, "loss": 2.3114, "step": 3129 }, { "epoch": 0.32179184218637624, "grad_norm": 0.1557345986366272, "learning_rate": 0.01, "loss": 2.3334, "step": 3132 }, { "epoch": 0.32210007192027124, "grad_norm": 0.11304829269647598, "learning_rate": 0.01, "loss": 2.2797, "step": 3135 }, { "epoch": 0.32240830165416623, "grad_norm": 0.08236223459243774, "learning_rate": 0.01, "loss": 2.3357, "step": 3138 }, { "epoch": 0.3227165313880612, "grad_norm": 0.09718946367502213, "learning_rate": 0.01, "loss": 2.3096, "step": 3141 }, { "epoch": 0.3230247611219562, "grad_norm": 0.07455772161483765, "learning_rate": 0.01, "loss": 2.3127, "step": 3144 }, { "epoch": 0.3233329908558512, "grad_norm": 0.0556890033185482, "learning_rate": 0.01, "loss": 2.3088, "step": 3147 }, { "epoch": 0.3236412205897462, "grad_norm": 0.07595494389533997, "learning_rate": 0.01, "loss": 2.3159, "step": 3150 }, { "epoch": 0.3239494503236412, "grad_norm": 0.07064896821975708, "learning_rate": 0.01, "loss": 2.3336, "step": 3153 }, { "epoch": 0.3242576800575362, "grad_norm": 0.06646276265382767, "learning_rate": 0.01, "loss": 2.324, "step": 3156 }, { "epoch": 0.32456590979143124, "grad_norm": 0.08837945014238358, "learning_rate": 0.01, "loss": 2.3191, "step": 3159 }, { "epoch": 0.32487413952532623, "grad_norm": 0.13228796422481537, "learning_rate": 0.01, "loss": 2.3231, "step": 3162 }, { "epoch": 0.3251823692592212, "grad_norm": 0.1080455407500267, "learning_rate": 0.01, "loss": 2.3341, "step": 3165 }, { "epoch": 0.3254905989931162, "grad_norm": 0.1073957234621048, "learning_rate": 0.01, "loss": 2.3237, "step": 3168 }, { "epoch": 0.3257988287270112, "grad_norm": 0.12472347915172577, "learning_rate": 0.01, "loss": 2.3315, "step": 3171 }, { "epoch": 0.3261070584609062, "grad_norm": 0.09123571217060089, "learning_rate": 0.01, "loss": 2.3588, "step": 3174 }, { "epoch": 0.3264152881948012, "grad_norm": 0.07830306142568588, "learning_rate": 0.01, "loss": 2.3273, "step": 3177 }, { "epoch": 0.3267235179286962, "grad_norm": 0.11552650481462479, "learning_rate": 0.01, "loss": 2.3407, "step": 3180 }, { "epoch": 0.3270317476625912, "grad_norm": 0.13251489400863647, "learning_rate": 0.01, "loss": 2.3241, "step": 3183 }, { "epoch": 0.3273399773964862, "grad_norm": 0.12775808572769165, "learning_rate": 0.01, "loss": 2.331, "step": 3186 }, { "epoch": 0.3276482071303812, "grad_norm": 0.12069859355688095, "learning_rate": 0.01, "loss": 2.3486, "step": 3189 }, { "epoch": 0.32795643686427617, "grad_norm": 0.059109434485435486, "learning_rate": 0.01, "loss": 2.2969, "step": 3192 }, { "epoch": 0.32826466659817116, "grad_norm": 0.12731850147247314, "learning_rate": 0.01, "loss": 2.3269, "step": 3195 }, { "epoch": 0.32857289633206616, "grad_norm": 0.15247757732868195, "learning_rate": 0.01, "loss": 2.3312, "step": 3198 }, { "epoch": 0.32888112606596115, "grad_norm": 0.128463476896286, "learning_rate": 0.01, "loss": 2.3275, "step": 3201 }, { "epoch": 0.32918935579985614, "grad_norm": 0.09406638145446777, "learning_rate": 0.01, "loss": 2.3205, "step": 3204 }, { "epoch": 0.32949758553375114, "grad_norm": 0.10524141043424606, "learning_rate": 0.01, "loss": 2.3423, "step": 3207 }, { "epoch": 0.32980581526764613, "grad_norm": 0.11357913911342621, "learning_rate": 0.01, "loss": 2.3071, "step": 3210 }, { "epoch": 0.3301140450015411, "grad_norm": 0.06979521363973618, "learning_rate": 0.01, "loss": 2.3319, "step": 3213 }, { "epoch": 0.33042227473543617, "grad_norm": 0.07000034302473068, "learning_rate": 0.01, "loss": 2.3523, "step": 3216 }, { "epoch": 0.33073050446933117, "grad_norm": 0.07495003193616867, "learning_rate": 0.01, "loss": 2.305, "step": 3219 }, { "epoch": 0.33103873420322616, "grad_norm": 0.07131810486316681, "learning_rate": 0.01, "loss": 2.2896, "step": 3222 }, { "epoch": 0.33134696393712115, "grad_norm": 0.051389019936323166, "learning_rate": 0.01, "loss": 2.2974, "step": 3225 }, { "epoch": 0.33165519367101615, "grad_norm": 0.05159701779484749, "learning_rate": 0.01, "loss": 2.3344, "step": 3228 }, { "epoch": 0.33196342340491114, "grad_norm": 0.07632975280284882, "learning_rate": 0.01, "loss": 2.3091, "step": 3231 }, { "epoch": 0.33227165313880613, "grad_norm": 0.08053800463676453, "learning_rate": 0.01, "loss": 2.298, "step": 3234 }, { "epoch": 0.3325798828727011, "grad_norm": 0.1371622234582901, "learning_rate": 0.01, "loss": 2.3095, "step": 3237 }, { "epoch": 0.3328881126065961, "grad_norm": 0.11367069184780121, "learning_rate": 0.01, "loss": 2.3212, "step": 3240 }, { "epoch": 0.3331963423404911, "grad_norm": 0.13252900540828705, "learning_rate": 0.01, "loss": 2.3238, "step": 3243 }, { "epoch": 0.3335045720743861, "grad_norm": 0.15517258644104004, "learning_rate": 0.01, "loss": 2.3263, "step": 3246 }, { "epoch": 0.3338128018082811, "grad_norm": 0.14029370248317719, "learning_rate": 0.01, "loss": 2.3457, "step": 3249 }, { "epoch": 0.3341210315421761, "grad_norm": 0.105759397149086, "learning_rate": 0.01, "loss": 2.3008, "step": 3252 }, { "epoch": 0.3344292612760711, "grad_norm": 0.04762979596853256, "learning_rate": 0.01, "loss": 2.3306, "step": 3255 }, { "epoch": 0.3347374910099661, "grad_norm": 0.12065446376800537, "learning_rate": 0.01, "loss": 2.2904, "step": 3258 }, { "epoch": 0.3350457207438611, "grad_norm": 0.08886688947677612, "learning_rate": 0.01, "loss": 2.3243, "step": 3261 }, { "epoch": 0.33535395047775607, "grad_norm": 0.08021339774131775, "learning_rate": 0.01, "loss": 2.3313, "step": 3264 }, { "epoch": 0.33566218021165106, "grad_norm": 0.04490290582180023, "learning_rate": 0.01, "loss": 2.2888, "step": 3267 }, { "epoch": 0.33597040994554606, "grad_norm": 0.061480812728405, "learning_rate": 0.01, "loss": 2.2898, "step": 3270 }, { "epoch": 0.33627863967944105, "grad_norm": 0.04230419546365738, "learning_rate": 0.01, "loss": 2.3062, "step": 3273 }, { "epoch": 0.3365868694133361, "grad_norm": 0.12344948202371597, "learning_rate": 0.01, "loss": 2.3105, "step": 3276 }, { "epoch": 0.3368950991472311, "grad_norm": 0.13087160885334015, "learning_rate": 0.01, "loss": 2.3388, "step": 3279 }, { "epoch": 0.3372033288811261, "grad_norm": 0.06671308726072311, "learning_rate": 0.01, "loss": 2.3062, "step": 3282 }, { "epoch": 0.3375115586150211, "grad_norm": 0.055828843265771866, "learning_rate": 0.01, "loss": 2.3227, "step": 3285 }, { "epoch": 0.3378197883489161, "grad_norm": 0.07760481536388397, "learning_rate": 0.01, "loss": 2.307, "step": 3288 }, { "epoch": 0.33812801808281107, "grad_norm": 0.08074722439050674, "learning_rate": 0.01, "loss": 2.3363, "step": 3291 }, { "epoch": 0.33843624781670606, "grad_norm": 0.046514566987752914, "learning_rate": 0.01, "loss": 2.3152, "step": 3294 }, { "epoch": 0.33874447755060105, "grad_norm": 0.15358585119247437, "learning_rate": 0.01, "loss": 2.3114, "step": 3297 }, { "epoch": 0.33905270728449605, "grad_norm": 0.09048300981521606, "learning_rate": 0.01, "loss": 2.3218, "step": 3300 }, { "epoch": 0.33936093701839104, "grad_norm": 0.08199465274810791, "learning_rate": 0.01, "loss": 2.3133, "step": 3303 }, { "epoch": 0.33966916675228603, "grad_norm": 0.13738159835338593, "learning_rate": 0.01, "loss": 2.3108, "step": 3306 }, { "epoch": 0.339977396486181, "grad_norm": 0.11493804305791855, "learning_rate": 0.01, "loss": 2.2996, "step": 3309 }, { "epoch": 0.340285626220076, "grad_norm": 0.06872740387916565, "learning_rate": 0.01, "loss": 2.306, "step": 3312 }, { "epoch": 0.340593855953971, "grad_norm": 0.055139992386102676, "learning_rate": 0.01, "loss": 2.3129, "step": 3315 }, { "epoch": 0.340902085687866, "grad_norm": 0.16477546095848083, "learning_rate": 0.01, "loss": 2.3138, "step": 3318 }, { "epoch": 0.341210315421761, "grad_norm": 0.06387230008840561, "learning_rate": 0.01, "loss": 2.3025, "step": 3321 }, { "epoch": 0.341518545155656, "grad_norm": 0.1657593995332718, "learning_rate": 0.01, "loss": 2.3255, "step": 3324 }, { "epoch": 0.341826774889551, "grad_norm": 0.08980764448642731, "learning_rate": 0.01, "loss": 2.3024, "step": 3327 }, { "epoch": 0.342135004623446, "grad_norm": 0.05479981005191803, "learning_rate": 0.01, "loss": 2.2955, "step": 3330 }, { "epoch": 0.34244323435734103, "grad_norm": 0.05986113101243973, "learning_rate": 0.01, "loss": 2.3078, "step": 3333 }, { "epoch": 0.342751464091236, "grad_norm": 0.1339874267578125, "learning_rate": 0.01, "loss": 2.2974, "step": 3336 }, { "epoch": 0.343059693825131, "grad_norm": 0.11250229179859161, "learning_rate": 0.01, "loss": 2.3162, "step": 3339 }, { "epoch": 0.343367923559026, "grad_norm": 0.12179972976446152, "learning_rate": 0.01, "loss": 2.2746, "step": 3342 }, { "epoch": 0.343676153292921, "grad_norm": 0.10306143760681152, "learning_rate": 0.01, "loss": 2.3008, "step": 3345 }, { "epoch": 0.343984383026816, "grad_norm": 0.08372616767883301, "learning_rate": 0.01, "loss": 2.2962, "step": 3348 }, { "epoch": 0.344292612760711, "grad_norm": 0.05286876857280731, "learning_rate": 0.01, "loss": 2.3067, "step": 3351 }, { "epoch": 0.344600842494606, "grad_norm": 0.06248036026954651, "learning_rate": 0.01, "loss": 2.3432, "step": 3354 }, { "epoch": 0.344909072228501, "grad_norm": 0.1287723332643509, "learning_rate": 0.01, "loss": 2.3064, "step": 3357 }, { "epoch": 0.345217301962396, "grad_norm": 0.08843682706356049, "learning_rate": 0.01, "loss": 2.3059, "step": 3360 }, { "epoch": 0.34552553169629097, "grad_norm": 0.07060680538415909, "learning_rate": 0.01, "loss": 2.2627, "step": 3363 }, { "epoch": 0.34583376143018596, "grad_norm": 0.10443838685750961, "learning_rate": 0.01, "loss": 2.2876, "step": 3366 }, { "epoch": 0.34614199116408095, "grad_norm": 0.06748315691947937, "learning_rate": 0.01, "loss": 2.3182, "step": 3369 }, { "epoch": 0.34645022089797595, "grad_norm": 0.06599223613739014, "learning_rate": 0.01, "loss": 2.2997, "step": 3372 }, { "epoch": 0.34675845063187094, "grad_norm": 0.08530016988515854, "learning_rate": 0.01, "loss": 2.2959, "step": 3375 }, { "epoch": 0.34706668036576593, "grad_norm": 0.10694181174039841, "learning_rate": 0.01, "loss": 2.3248, "step": 3378 }, { "epoch": 0.3473749100996609, "grad_norm": 0.06598237156867981, "learning_rate": 0.01, "loss": 2.2837, "step": 3381 }, { "epoch": 0.3476831398335559, "grad_norm": 0.0782204419374466, "learning_rate": 0.01, "loss": 2.2926, "step": 3384 }, { "epoch": 0.3479913695674509, "grad_norm": 0.09585436433553696, "learning_rate": 0.01, "loss": 2.2984, "step": 3387 }, { "epoch": 0.34829959930134596, "grad_norm": 0.061477720737457275, "learning_rate": 0.01, "loss": 2.2693, "step": 3390 }, { "epoch": 0.34860782903524096, "grad_norm": 0.104725681245327, "learning_rate": 0.01, "loss": 2.2887, "step": 3393 }, { "epoch": 0.34891605876913595, "grad_norm": 0.12205322831869125, "learning_rate": 0.01, "loss": 2.3052, "step": 3396 }, { "epoch": 0.34922428850303094, "grad_norm": 0.16279913485050201, "learning_rate": 0.01, "loss": 2.2771, "step": 3399 }, { "epoch": 0.34953251823692594, "grad_norm": 0.059565551578998566, "learning_rate": 0.01, "loss": 2.3027, "step": 3402 }, { "epoch": 0.34984074797082093, "grad_norm": 0.06318376958370209, "learning_rate": 0.01, "loss": 2.3131, "step": 3405 }, { "epoch": 0.3501489777047159, "grad_norm": 0.05476443096995354, "learning_rate": 0.01, "loss": 2.2953, "step": 3408 }, { "epoch": 0.3504572074386109, "grad_norm": 0.07989142090082169, "learning_rate": 0.01, "loss": 2.31, "step": 3411 }, { "epoch": 0.3507654371725059, "grad_norm": 0.15566086769104004, "learning_rate": 0.01, "loss": 2.2839, "step": 3414 }, { "epoch": 0.3510736669064009, "grad_norm": 0.060441337525844574, "learning_rate": 0.01, "loss": 2.2952, "step": 3417 }, { "epoch": 0.3513818966402959, "grad_norm": 0.06277213245630264, "learning_rate": 0.01, "loss": 2.32, "step": 3420 }, { "epoch": 0.3516901263741909, "grad_norm": 0.04959907755255699, "learning_rate": 0.01, "loss": 2.3116, "step": 3423 }, { "epoch": 0.3519983561080859, "grad_norm": 0.06766139715909958, "learning_rate": 0.01, "loss": 2.3201, "step": 3426 }, { "epoch": 0.3523065858419809, "grad_norm": 0.053323931992053986, "learning_rate": 0.01, "loss": 2.3271, "step": 3429 }, { "epoch": 0.3526148155758759, "grad_norm": 0.06396596878767014, "learning_rate": 0.01, "loss": 2.2929, "step": 3432 }, { "epoch": 0.35292304530977087, "grad_norm": 0.07360636442899704, "learning_rate": 0.01, "loss": 2.2918, "step": 3435 }, { "epoch": 0.35323127504366586, "grad_norm": 0.10262563079595566, "learning_rate": 0.01, "loss": 2.2871, "step": 3438 }, { "epoch": 0.35353950477756085, "grad_norm": 0.09783780574798584, "learning_rate": 0.01, "loss": 2.3229, "step": 3441 }, { "epoch": 0.35384773451145585, "grad_norm": 0.08542583137750626, "learning_rate": 0.01, "loss": 2.2887, "step": 3444 }, { "epoch": 0.35415596424535084, "grad_norm": 0.11864805966615677, "learning_rate": 0.01, "loss": 2.2848, "step": 3447 }, { "epoch": 0.3544641939792459, "grad_norm": 0.10997387021780014, "learning_rate": 0.01, "loss": 2.2897, "step": 3450 }, { "epoch": 0.3547724237131409, "grad_norm": 0.10915081202983856, "learning_rate": 0.01, "loss": 2.3114, "step": 3453 }, { "epoch": 0.3550806534470359, "grad_norm": 0.15109725296497345, "learning_rate": 0.01, "loss": 2.2933, "step": 3456 }, { "epoch": 0.35538888318093087, "grad_norm": 0.04911811649799347, "learning_rate": 0.01, "loss": 2.3035, "step": 3459 }, { "epoch": 0.35569711291482586, "grad_norm": 0.12352598458528519, "learning_rate": 0.01, "loss": 2.2897, "step": 3462 }, { "epoch": 0.35600534264872086, "grad_norm": 0.10834213346242905, "learning_rate": 0.01, "loss": 2.2879, "step": 3465 }, { "epoch": 0.35631357238261585, "grad_norm": 0.10665787756443024, "learning_rate": 0.01, "loss": 2.2614, "step": 3468 }, { "epoch": 0.35662180211651084, "grad_norm": 0.0898185670375824, "learning_rate": 0.01, "loss": 2.2943, "step": 3471 }, { "epoch": 0.35693003185040584, "grad_norm": 0.07015782594680786, "learning_rate": 0.01, "loss": 2.298, "step": 3474 }, { "epoch": 0.35723826158430083, "grad_norm": 0.1292288452386856, "learning_rate": 0.01, "loss": 2.3122, "step": 3477 }, { "epoch": 0.3575464913181958, "grad_norm": 0.09300121665000916, "learning_rate": 0.01, "loss": 2.2769, "step": 3480 }, { "epoch": 0.3578547210520908, "grad_norm": 0.0449809767305851, "learning_rate": 0.01, "loss": 2.2564, "step": 3483 }, { "epoch": 0.3581629507859858, "grad_norm": 0.051362160593271255, "learning_rate": 0.01, "loss": 2.2739, "step": 3486 }, { "epoch": 0.3584711805198808, "grad_norm": 0.12473469972610474, "learning_rate": 0.01, "loss": 2.2844, "step": 3489 }, { "epoch": 0.3587794102537758, "grad_norm": 0.0925057902932167, "learning_rate": 0.01, "loss": 2.2618, "step": 3492 }, { "epoch": 0.3590876399876708, "grad_norm": 0.1026608943939209, "learning_rate": 0.01, "loss": 2.2814, "step": 3495 }, { "epoch": 0.3593958697215658, "grad_norm": 0.0995681881904602, "learning_rate": 0.01, "loss": 2.2861, "step": 3498 }, { "epoch": 0.3597040994554608, "grad_norm": 0.06513385474681854, "learning_rate": 0.01, "loss": 2.2827, "step": 3501 }, { "epoch": 0.3600123291893558, "grad_norm": 0.06724824756383896, "learning_rate": 0.01, "loss": 2.2799, "step": 3504 }, { "epoch": 0.3603205589232508, "grad_norm": 0.06367610394954681, "learning_rate": 0.01, "loss": 2.2846, "step": 3507 }, { "epoch": 0.3606287886571458, "grad_norm": 0.07489916682243347, "learning_rate": 0.01, "loss": 2.2816, "step": 3510 }, { "epoch": 0.3609370183910408, "grad_norm": 0.11221667379140854, "learning_rate": 0.01, "loss": 2.2869, "step": 3513 }, { "epoch": 0.3612452481249358, "grad_norm": 0.09854032099246979, "learning_rate": 0.01, "loss": 2.2646, "step": 3516 }, { "epoch": 0.3615534778588308, "grad_norm": 0.09218656271696091, "learning_rate": 0.01, "loss": 2.2844, "step": 3519 }, { "epoch": 0.3618617075927258, "grad_norm": 0.1531379073858261, "learning_rate": 0.01, "loss": 2.279, "step": 3522 }, { "epoch": 0.3621699373266208, "grad_norm": 0.07070820778608322, "learning_rate": 0.01, "loss": 2.2747, "step": 3525 }, { "epoch": 0.3624781670605158, "grad_norm": 0.1057102233171463, "learning_rate": 0.01, "loss": 2.275, "step": 3528 }, { "epoch": 0.36278639679441077, "grad_norm": 0.049471016973257065, "learning_rate": 0.01, "loss": 2.3013, "step": 3531 }, { "epoch": 0.36309462652830576, "grad_norm": 0.08196526020765305, "learning_rate": 0.01, "loss": 2.2571, "step": 3534 }, { "epoch": 0.36340285626220076, "grad_norm": 0.09507983922958374, "learning_rate": 0.01, "loss": 2.3196, "step": 3537 }, { "epoch": 0.36371108599609575, "grad_norm": 0.089228555560112, "learning_rate": 0.01, "loss": 2.2539, "step": 3540 }, { "epoch": 0.36401931572999074, "grad_norm": 0.0866270586848259, "learning_rate": 0.01, "loss": 2.284, "step": 3543 }, { "epoch": 0.36432754546388574, "grad_norm": 0.13805072009563446, "learning_rate": 0.01, "loss": 2.2723, "step": 3546 }, { "epoch": 0.36463577519778073, "grad_norm": 0.09308724105358124, "learning_rate": 0.01, "loss": 2.2969, "step": 3549 }, { "epoch": 0.3649440049316757, "grad_norm": 0.07004178315401077, "learning_rate": 0.01, "loss": 2.2959, "step": 3552 }, { "epoch": 0.3652522346655707, "grad_norm": 0.09345975518226624, "learning_rate": 0.01, "loss": 2.2656, "step": 3555 }, { "epoch": 0.3655604643994657, "grad_norm": 0.07694482058286667, "learning_rate": 0.01, "loss": 2.2921, "step": 3558 }, { "epoch": 0.3658686941333607, "grad_norm": 0.05591150000691414, "learning_rate": 0.01, "loss": 2.2869, "step": 3561 }, { "epoch": 0.36617692386725575, "grad_norm": 0.06863993406295776, "learning_rate": 0.01, "loss": 2.2897, "step": 3564 }, { "epoch": 0.36648515360115075, "grad_norm": 0.06258527934551239, "learning_rate": 0.01, "loss": 2.2994, "step": 3567 }, { "epoch": 0.36679338333504574, "grad_norm": 0.1049329936504364, "learning_rate": 0.01, "loss": 2.2794, "step": 3570 }, { "epoch": 0.36710161306894074, "grad_norm": 0.1229025200009346, "learning_rate": 0.01, "loss": 2.2949, "step": 3573 }, { "epoch": 0.36740984280283573, "grad_norm": 0.13274389505386353, "learning_rate": 0.01, "loss": 2.2791, "step": 3576 }, { "epoch": 0.3677180725367307, "grad_norm": 0.09388844668865204, "learning_rate": 0.01, "loss": 2.3067, "step": 3579 }, { "epoch": 0.3680263022706257, "grad_norm": 0.05375714227557182, "learning_rate": 0.01, "loss": 2.2946, "step": 3582 }, { "epoch": 0.3683345320045207, "grad_norm": 0.059105634689331055, "learning_rate": 0.01, "loss": 2.2821, "step": 3585 }, { "epoch": 0.3686427617384157, "grad_norm": 0.055578552186489105, "learning_rate": 0.01, "loss": 2.2694, "step": 3588 }, { "epoch": 0.3689509914723107, "grad_norm": 0.08778764307498932, "learning_rate": 0.01, "loss": 2.2712, "step": 3591 }, { "epoch": 0.3692592212062057, "grad_norm": 0.1044803187251091, "learning_rate": 0.01, "loss": 2.2797, "step": 3594 }, { "epoch": 0.3695674509401007, "grad_norm": 0.15398399531841278, "learning_rate": 0.01, "loss": 2.3042, "step": 3597 }, { "epoch": 0.3698756806739957, "grad_norm": 0.11562564969062805, "learning_rate": 0.01, "loss": 2.2609, "step": 3600 }, { "epoch": 0.37018391040789067, "grad_norm": 0.060630831867456436, "learning_rate": 0.01, "loss": 2.2663, "step": 3603 }, { "epoch": 0.37049214014178566, "grad_norm": 0.0576477013528347, "learning_rate": 0.01, "loss": 2.2974, "step": 3606 }, { "epoch": 0.37080036987568066, "grad_norm": 0.059915438294410706, "learning_rate": 0.01, "loss": 2.3031, "step": 3609 }, { "epoch": 0.37110859960957565, "grad_norm": 0.10807155817747116, "learning_rate": 0.01, "loss": 2.2739, "step": 3612 }, { "epoch": 0.37141682934347064, "grad_norm": 0.09196165949106216, "learning_rate": 0.01, "loss": 2.3054, "step": 3615 }, { "epoch": 0.37172505907736564, "grad_norm": 0.07379795610904694, "learning_rate": 0.01, "loss": 2.2805, "step": 3618 }, { "epoch": 0.3720332888112607, "grad_norm": 0.06034912168979645, "learning_rate": 0.01, "loss": 2.2549, "step": 3621 }, { "epoch": 0.3723415185451557, "grad_norm": 0.13983361423015594, "learning_rate": 0.01, "loss": 2.269, "step": 3624 }, { "epoch": 0.3726497482790507, "grad_norm": 0.11592069268226624, "learning_rate": 0.01, "loss": 2.2903, "step": 3627 }, { "epoch": 0.37295797801294567, "grad_norm": 0.15428505837917328, "learning_rate": 0.01, "loss": 2.2918, "step": 3630 }, { "epoch": 0.37326620774684066, "grad_norm": 0.19936774671077728, "learning_rate": 0.01, "loss": 2.2782, "step": 3633 }, { "epoch": 0.37357443748073565, "grad_norm": 0.15364627540111542, "learning_rate": 0.01, "loss": 2.2736, "step": 3636 }, { "epoch": 0.37388266721463065, "grad_norm": 0.047554273158311844, "learning_rate": 0.01, "loss": 2.3172, "step": 3639 }, { "epoch": 0.37419089694852564, "grad_norm": 0.0555570051074028, "learning_rate": 0.01, "loss": 2.2731, "step": 3642 }, { "epoch": 0.37449912668242064, "grad_norm": 0.052204012870788574, "learning_rate": 0.01, "loss": 2.281, "step": 3645 }, { "epoch": 0.37480735641631563, "grad_norm": 0.09206510335206985, "learning_rate": 0.01, "loss": 2.2639, "step": 3648 }, { "epoch": 0.3751155861502106, "grad_norm": 0.1199311912059784, "learning_rate": 0.01, "loss": 2.2873, "step": 3651 }, { "epoch": 0.3754238158841056, "grad_norm": 0.08949270099401474, "learning_rate": 0.01, "loss": 2.2668, "step": 3654 }, { "epoch": 0.3757320456180006, "grad_norm": 0.08521883934736252, "learning_rate": 0.01, "loss": 2.247, "step": 3657 }, { "epoch": 0.3760402753518956, "grad_norm": 0.07689694315195084, "learning_rate": 0.01, "loss": 2.2904, "step": 3660 }, { "epoch": 0.3763485050857906, "grad_norm": 0.08761987835168839, "learning_rate": 0.01, "loss": 2.2761, "step": 3663 }, { "epoch": 0.3766567348196856, "grad_norm": 0.056420013308525085, "learning_rate": 0.01, "loss": 2.259, "step": 3666 }, { "epoch": 0.3769649645535806, "grad_norm": 0.06192856654524803, "learning_rate": 0.01, "loss": 2.2294, "step": 3669 }, { "epoch": 0.3772731942874756, "grad_norm": 0.1021333709359169, "learning_rate": 0.01, "loss": 2.2649, "step": 3672 }, { "epoch": 0.37758142402137057, "grad_norm": 0.10071670264005661, "learning_rate": 0.01, "loss": 2.2584, "step": 3675 }, { "epoch": 0.37788965375526556, "grad_norm": 0.05968625843524933, "learning_rate": 0.01, "loss": 2.2699, "step": 3678 }, { "epoch": 0.3781978834891606, "grad_norm": 0.07489661872386932, "learning_rate": 0.01, "loss": 2.2663, "step": 3681 }, { "epoch": 0.3785061132230556, "grad_norm": 0.07880943268537521, "learning_rate": 0.01, "loss": 2.2709, "step": 3684 }, { "epoch": 0.3788143429569506, "grad_norm": 0.055632054805755615, "learning_rate": 0.01, "loss": 2.272, "step": 3687 }, { "epoch": 0.3791225726908456, "grad_norm": 0.05365302786231041, "learning_rate": 0.01, "loss": 2.2268, "step": 3690 }, { "epoch": 0.3794308024247406, "grad_norm": 0.0802481397986412, "learning_rate": 0.01, "loss": 2.2631, "step": 3693 }, { "epoch": 0.3797390321586356, "grad_norm": 0.1312764585018158, "learning_rate": 0.01, "loss": 2.2985, "step": 3696 }, { "epoch": 0.3800472618925306, "grad_norm": 0.14543971419334412, "learning_rate": 0.01, "loss": 2.25, "step": 3699 }, { "epoch": 0.38035549162642557, "grad_norm": 0.05727002024650574, "learning_rate": 0.01, "loss": 2.2556, "step": 3702 }, { "epoch": 0.38066372136032056, "grad_norm": 0.07309607416391373, "learning_rate": 0.01, "loss": 2.2574, "step": 3705 }, { "epoch": 0.38097195109421556, "grad_norm": 0.03849095106124878, "learning_rate": 0.01, "loss": 2.2501, "step": 3708 }, { "epoch": 0.38128018082811055, "grad_norm": 0.0623021237552166, "learning_rate": 0.01, "loss": 2.2672, "step": 3711 }, { "epoch": 0.38158841056200554, "grad_norm": 0.08916610479354858, "learning_rate": 0.01, "loss": 2.2683, "step": 3714 }, { "epoch": 0.38189664029590054, "grad_norm": 0.08126388490200043, "learning_rate": 0.01, "loss": 2.2574, "step": 3717 }, { "epoch": 0.38220487002979553, "grad_norm": 0.07121114432811737, "learning_rate": 0.01, "loss": 2.2358, "step": 3720 }, { "epoch": 0.3825130997636905, "grad_norm": 0.07406505942344666, "learning_rate": 0.01, "loss": 2.2736, "step": 3723 }, { "epoch": 0.3828213294975855, "grad_norm": 0.13355331122875214, "learning_rate": 0.01, "loss": 2.2685, "step": 3726 }, { "epoch": 0.3831295592314805, "grad_norm": 0.05672430619597435, "learning_rate": 0.01, "loss": 2.2913, "step": 3729 }, { "epoch": 0.3834377889653755, "grad_norm": 0.047647468745708466, "learning_rate": 0.01, "loss": 2.2533, "step": 3732 }, { "epoch": 0.3837460186992705, "grad_norm": 0.059008341282606125, "learning_rate": 0.01, "loss": 2.2867, "step": 3735 }, { "epoch": 0.38405424843316555, "grad_norm": 0.06551840156316757, "learning_rate": 0.01, "loss": 2.2742, "step": 3738 }, { "epoch": 0.38436247816706054, "grad_norm": 0.08781883865594864, "learning_rate": 0.01, "loss": 2.2427, "step": 3741 }, { "epoch": 0.38467070790095553, "grad_norm": 0.06808102875947952, "learning_rate": 0.01, "loss": 2.2493, "step": 3744 }, { "epoch": 0.3849789376348505, "grad_norm": 0.06570697575807571, "learning_rate": 0.01, "loss": 2.2445, "step": 3747 }, { "epoch": 0.3852871673687455, "grad_norm": 0.08742080628871918, "learning_rate": 0.01, "loss": 2.2576, "step": 3750 }, { "epoch": 0.3855953971026405, "grad_norm": 0.1518019735813141, "learning_rate": 0.01, "loss": 2.2819, "step": 3753 }, { "epoch": 0.3859036268365355, "grad_norm": 0.10349754244089127, "learning_rate": 0.01, "loss": 2.2465, "step": 3756 }, { "epoch": 0.3862118565704305, "grad_norm": 0.06008581072092056, "learning_rate": 0.01, "loss": 2.2817, "step": 3759 }, { "epoch": 0.3865200863043255, "grad_norm": 0.0450257770717144, "learning_rate": 0.01, "loss": 2.2585, "step": 3762 }, { "epoch": 0.3868283160382205, "grad_norm": 0.04145176708698273, "learning_rate": 0.01, "loss": 2.2634, "step": 3765 }, { "epoch": 0.3871365457721155, "grad_norm": 0.17084141075611115, "learning_rate": 0.01, "loss": 2.2355, "step": 3768 }, { "epoch": 0.3874447755060105, "grad_norm": 0.06679602712392807, "learning_rate": 0.01, "loss": 2.2737, "step": 3771 }, { "epoch": 0.38775300523990547, "grad_norm": 0.05363382026553154, "learning_rate": 0.01, "loss": 2.244, "step": 3774 }, { "epoch": 0.38806123497380046, "grad_norm": 0.05722133815288544, "learning_rate": 0.01, "loss": 2.2515, "step": 3777 }, { "epoch": 0.38836946470769546, "grad_norm": 0.06288215517997742, "learning_rate": 0.01, "loss": 2.2625, "step": 3780 }, { "epoch": 0.38867769444159045, "grad_norm": 0.05087801814079285, "learning_rate": 0.01, "loss": 2.2883, "step": 3783 }, { "epoch": 0.38898592417548544, "grad_norm": 0.08160998672246933, "learning_rate": 0.01, "loss": 2.2462, "step": 3786 }, { "epoch": 0.38929415390938044, "grad_norm": 0.22291240096092224, "learning_rate": 0.01, "loss": 2.2613, "step": 3789 }, { "epoch": 0.38960238364327543, "grad_norm": 0.11482773721218109, "learning_rate": 0.01, "loss": 2.2633, "step": 3792 }, { "epoch": 0.3899106133771705, "grad_norm": 0.056299589574337006, "learning_rate": 0.01, "loss": 2.2896, "step": 3795 }, { "epoch": 0.39021884311106547, "grad_norm": 0.04524017125368118, "learning_rate": 0.01, "loss": 2.2543, "step": 3798 }, { "epoch": 0.39052707284496047, "grad_norm": 0.0903107225894928, "learning_rate": 0.01, "loss": 2.2801, "step": 3801 }, { "epoch": 0.39083530257885546, "grad_norm": 0.0645504966378212, "learning_rate": 0.01, "loss": 2.2628, "step": 3804 }, { "epoch": 0.39114353231275045, "grad_norm": 0.06752094626426697, "learning_rate": 0.01, "loss": 2.2732, "step": 3807 }, { "epoch": 0.39145176204664545, "grad_norm": 0.04459339380264282, "learning_rate": 0.01, "loss": 2.2601, "step": 3810 }, { "epoch": 0.39175999178054044, "grad_norm": 0.07300913333892822, "learning_rate": 0.01, "loss": 2.2437, "step": 3813 }, { "epoch": 0.39206822151443543, "grad_norm": 0.16804097592830658, "learning_rate": 0.01, "loss": 2.26, "step": 3816 }, { "epoch": 0.3923764512483304, "grad_norm": 0.10682248324155807, "learning_rate": 0.01, "loss": 2.2764, "step": 3819 }, { "epoch": 0.3926846809822254, "grad_norm": 0.046895258128643036, "learning_rate": 0.01, "loss": 2.2654, "step": 3822 }, { "epoch": 0.3929929107161204, "grad_norm": 0.05799179524183273, "learning_rate": 0.01, "loss": 2.2254, "step": 3825 }, { "epoch": 0.3933011404500154, "grad_norm": 0.0474528968334198, "learning_rate": 0.01, "loss": 2.2604, "step": 3828 }, { "epoch": 0.3936093701839104, "grad_norm": 0.1437537968158722, "learning_rate": 0.01, "loss": 2.2532, "step": 3831 }, { "epoch": 0.3939175999178054, "grad_norm": 0.06202014535665512, "learning_rate": 0.01, "loss": 2.2486, "step": 3834 }, { "epoch": 0.3942258296517004, "grad_norm": 0.09379147738218307, "learning_rate": 0.01, "loss": 2.2602, "step": 3837 }, { "epoch": 0.3945340593855954, "grad_norm": 0.07898830622434616, "learning_rate": 0.01, "loss": 2.2605, "step": 3840 }, { "epoch": 0.3948422891194904, "grad_norm": 0.10186600685119629, "learning_rate": 0.01, "loss": 2.2807, "step": 3843 }, { "epoch": 0.39515051885338537, "grad_norm": 0.08611535280942917, "learning_rate": 0.01, "loss": 2.2571, "step": 3846 }, { "epoch": 0.39545874858728036, "grad_norm": 0.10435480624437332, "learning_rate": 0.01, "loss": 2.2721, "step": 3849 }, { "epoch": 0.39576697832117536, "grad_norm": 0.11543019860982895, "learning_rate": 0.01, "loss": 2.2598, "step": 3852 }, { "epoch": 0.3960752080550704, "grad_norm": 0.11996404081583023, "learning_rate": 0.01, "loss": 2.2536, "step": 3855 }, { "epoch": 0.3963834377889654, "grad_norm": 0.05615765228867531, "learning_rate": 0.01, "loss": 2.2637, "step": 3858 }, { "epoch": 0.3966916675228604, "grad_norm": 0.06568838655948639, "learning_rate": 0.01, "loss": 2.2756, "step": 3861 }, { "epoch": 0.3969998972567554, "grad_norm": 0.07747132331132889, "learning_rate": 0.01, "loss": 2.2816, "step": 3864 }, { "epoch": 0.3973081269906504, "grad_norm": 0.057373497635126114, "learning_rate": 0.01, "loss": 2.255, "step": 3867 }, { "epoch": 0.3976163567245454, "grad_norm": 0.11501277983188629, "learning_rate": 0.01, "loss": 2.2494, "step": 3870 }, { "epoch": 0.39792458645844037, "grad_norm": 0.07761958241462708, "learning_rate": 0.01, "loss": 2.2459, "step": 3873 }, { "epoch": 0.39823281619233536, "grad_norm": 0.06263428926467896, "learning_rate": 0.01, "loss": 2.2649, "step": 3876 }, { "epoch": 0.39854104592623035, "grad_norm": 0.04552373290061951, "learning_rate": 0.01, "loss": 2.2578, "step": 3879 }, { "epoch": 0.39884927566012535, "grad_norm": 0.0631655901670456, "learning_rate": 0.01, "loss": 2.2648, "step": 3882 }, { "epoch": 0.39915750539402034, "grad_norm": 0.06519417464733124, "learning_rate": 0.01, "loss": 2.2438, "step": 3885 }, { "epoch": 0.39946573512791533, "grad_norm": 0.10446424037218094, "learning_rate": 0.01, "loss": 2.2815, "step": 3888 }, { "epoch": 0.3997739648618103, "grad_norm": 0.07533372938632965, "learning_rate": 0.01, "loss": 2.272, "step": 3891 }, { "epoch": 0.4000821945957053, "grad_norm": 0.05748215690255165, "learning_rate": 0.01, "loss": 2.2971, "step": 3894 }, { "epoch": 0.4003904243296003, "grad_norm": 0.051343973726034164, "learning_rate": 0.01, "loss": 2.2316, "step": 3897 }, { "epoch": 0.4006986540634953, "grad_norm": 0.04799075797200203, "learning_rate": 0.01, "loss": 2.2333, "step": 3900 }, { "epoch": 0.4010068837973903, "grad_norm": 0.12885436415672302, "learning_rate": 0.01, "loss": 2.247, "step": 3903 }, { "epoch": 0.4013151135312853, "grad_norm": 0.07175249606370926, "learning_rate": 0.01, "loss": 2.2407, "step": 3906 }, { "epoch": 0.4016233432651803, "grad_norm": 0.10784266144037247, "learning_rate": 0.01, "loss": 2.2458, "step": 3909 }, { "epoch": 0.40193157299907534, "grad_norm": 0.08646712452173233, "learning_rate": 0.01, "loss": 2.2571, "step": 3912 }, { "epoch": 0.40223980273297033, "grad_norm": 0.05365338176488876, "learning_rate": 0.01, "loss": 2.2585, "step": 3915 }, { "epoch": 0.4025480324668653, "grad_norm": 0.07037780433893204, "learning_rate": 0.01, "loss": 2.2277, "step": 3918 }, { "epoch": 0.4028562622007603, "grad_norm": 0.040290024131536484, "learning_rate": 0.01, "loss": 2.2508, "step": 3921 }, { "epoch": 0.4031644919346553, "grad_norm": 0.050338853150606155, "learning_rate": 0.01, "loss": 2.2356, "step": 3924 }, { "epoch": 0.4034727216685503, "grad_norm": 0.1420246660709381, "learning_rate": 0.01, "loss": 2.2531, "step": 3927 }, { "epoch": 0.4037809514024453, "grad_norm": 0.07432923465967178, "learning_rate": 0.01, "loss": 2.2766, "step": 3930 }, { "epoch": 0.4040891811363403, "grad_norm": 0.04954257979989052, "learning_rate": 0.01, "loss": 2.2825, "step": 3933 }, { "epoch": 0.4043974108702353, "grad_norm": 0.05988876149058342, "learning_rate": 0.01, "loss": 2.2342, "step": 3936 }, { "epoch": 0.4047056406041303, "grad_norm": 0.09800540655851364, "learning_rate": 0.01, "loss": 2.2268, "step": 3939 }, { "epoch": 0.4050138703380253, "grad_norm": 0.09171874821186066, "learning_rate": 0.01, "loss": 2.2648, "step": 3942 }, { "epoch": 0.40532210007192027, "grad_norm": 0.07430606335401535, "learning_rate": 0.01, "loss": 2.2523, "step": 3945 }, { "epoch": 0.40563032980581526, "grad_norm": 0.043649185448884964, "learning_rate": 0.01, "loss": 2.2303, "step": 3948 }, { "epoch": 0.40593855953971025, "grad_norm": 0.04120480641722679, "learning_rate": 0.01, "loss": 2.2299, "step": 3951 }, { "epoch": 0.40624678927360525, "grad_norm": 0.0692945346236229, "learning_rate": 0.01, "loss": 2.2466, "step": 3954 }, { "epoch": 0.40655501900750024, "grad_norm": 0.08884318917989731, "learning_rate": 0.01, "loss": 2.2802, "step": 3957 }, { "epoch": 0.40686324874139523, "grad_norm": 0.05542384088039398, "learning_rate": 0.01, "loss": 2.2303, "step": 3960 }, { "epoch": 0.4071714784752902, "grad_norm": 0.08013599365949631, "learning_rate": 0.01, "loss": 2.2361, "step": 3963 }, { "epoch": 0.4074797082091852, "grad_norm": 0.15963242948055267, "learning_rate": 0.01, "loss": 2.2608, "step": 3966 }, { "epoch": 0.40778793794308027, "grad_norm": 0.05428241938352585, "learning_rate": 0.01, "loss": 2.2415, "step": 3969 }, { "epoch": 0.40809616767697526, "grad_norm": 0.09297880530357361, "learning_rate": 0.01, "loss": 2.2804, "step": 3972 }, { "epoch": 0.40840439741087026, "grad_norm": 0.11259882897138596, "learning_rate": 0.01, "loss": 2.2562, "step": 3975 }, { "epoch": 0.40871262714476525, "grad_norm": 0.0546397790312767, "learning_rate": 0.01, "loss": 2.2423, "step": 3978 }, { "epoch": 0.40902085687866024, "grad_norm": 0.13870957493782043, "learning_rate": 0.01, "loss": 2.2431, "step": 3981 }, { "epoch": 0.40932908661255524, "grad_norm": 0.05527504161000252, "learning_rate": 0.01, "loss": 2.2649, "step": 3984 }, { "epoch": 0.40963731634645023, "grad_norm": 0.08060980588197708, "learning_rate": 0.01, "loss": 2.2708, "step": 3987 }, { "epoch": 0.4099455460803452, "grad_norm": 0.05611690506339073, "learning_rate": 0.01, "loss": 2.2683, "step": 3990 }, { "epoch": 0.4102537758142402, "grad_norm": 0.08760816603899002, "learning_rate": 0.01, "loss": 2.2392, "step": 3993 }, { "epoch": 0.4105620055481352, "grad_norm": 0.07327746599912643, "learning_rate": 0.01, "loss": 2.2587, "step": 3996 }, { "epoch": 0.4108702352820302, "grad_norm": 0.05924748629331589, "learning_rate": 0.01, "loss": 2.2435, "step": 3999 }, { "epoch": 0.4111784650159252, "grad_norm": 0.08269370347261429, "learning_rate": 0.01, "loss": 2.2365, "step": 4002 }, { "epoch": 0.4114866947498202, "grad_norm": 0.06834371387958527, "learning_rate": 0.01, "loss": 2.2579, "step": 4005 }, { "epoch": 0.4117949244837152, "grad_norm": 0.06737885624170303, "learning_rate": 0.01, "loss": 2.2585, "step": 4008 }, { "epoch": 0.4121031542176102, "grad_norm": 0.0919148176908493, "learning_rate": 0.01, "loss": 2.2524, "step": 4011 }, { "epoch": 0.4124113839515052, "grad_norm": 0.0744348093867302, "learning_rate": 0.01, "loss": 2.2328, "step": 4014 }, { "epoch": 0.41271961368540017, "grad_norm": 0.08952994644641876, "learning_rate": 0.01, "loss": 2.2556, "step": 4017 }, { "epoch": 0.41302784341929516, "grad_norm": 0.054230738431215286, "learning_rate": 0.01, "loss": 2.2559, "step": 4020 }, { "epoch": 0.41333607315319015, "grad_norm": 0.11185753345489502, "learning_rate": 0.01, "loss": 2.2599, "step": 4023 }, { "epoch": 0.41364430288708515, "grad_norm": 0.11211541295051575, "learning_rate": 0.01, "loss": 2.2456, "step": 4026 }, { "epoch": 0.4139525326209802, "grad_norm": 0.08211257308721542, "learning_rate": 0.01, "loss": 2.2636, "step": 4029 }, { "epoch": 0.4142607623548752, "grad_norm": 0.07233046740293503, "learning_rate": 0.01, "loss": 2.2148, "step": 4032 }, { "epoch": 0.4145689920887702, "grad_norm": 0.1062379851937294, "learning_rate": 0.01, "loss": 2.2382, "step": 4035 }, { "epoch": 0.4148772218226652, "grad_norm": 0.07079877704381943, "learning_rate": 0.01, "loss": 2.2462, "step": 4038 }, { "epoch": 0.41518545155656017, "grad_norm": 0.04237307608127594, "learning_rate": 0.01, "loss": 2.2523, "step": 4041 }, { "epoch": 0.41549368129045516, "grad_norm": 0.12513239681720734, "learning_rate": 0.01, "loss": 2.2614, "step": 4044 }, { "epoch": 0.41580191102435016, "grad_norm": 0.07134360820055008, "learning_rate": 0.01, "loss": 2.2533, "step": 4047 }, { "epoch": 0.41611014075824515, "grad_norm": 0.07371515780687332, "learning_rate": 0.01, "loss": 2.2333, "step": 4050 }, { "epoch": 0.41641837049214014, "grad_norm": 0.05744464695453644, "learning_rate": 0.01, "loss": 2.2292, "step": 4053 }, { "epoch": 0.41672660022603514, "grad_norm": 0.0790088102221489, "learning_rate": 0.01, "loss": 2.2217, "step": 4056 }, { "epoch": 0.41703482995993013, "grad_norm": 0.12540112435817719, "learning_rate": 0.01, "loss": 2.2367, "step": 4059 }, { "epoch": 0.4173430596938251, "grad_norm": 0.06895852833986282, "learning_rate": 0.01, "loss": 2.2354, "step": 4062 }, { "epoch": 0.4176512894277201, "grad_norm": 0.09068478643894196, "learning_rate": 0.01, "loss": 2.2605, "step": 4065 }, { "epoch": 0.4179595191616151, "grad_norm": 0.051881443709135056, "learning_rate": 0.01, "loss": 2.2501, "step": 4068 }, { "epoch": 0.4182677488955101, "grad_norm": 0.20433951914310455, "learning_rate": 0.01, "loss": 2.2582, "step": 4071 }, { "epoch": 0.4185759786294051, "grad_norm": 0.08301309496164322, "learning_rate": 0.01, "loss": 2.2424, "step": 4074 }, { "epoch": 0.4188842083633001, "grad_norm": 0.07062964886426926, "learning_rate": 0.01, "loss": 2.2345, "step": 4077 }, { "epoch": 0.4191924380971951, "grad_norm": 0.09770773351192474, "learning_rate": 0.01, "loss": 2.264, "step": 4080 }, { "epoch": 0.4195006678310901, "grad_norm": 0.0847458690404892, "learning_rate": 0.01, "loss": 2.2329, "step": 4083 }, { "epoch": 0.41980889756498513, "grad_norm": 0.06491915881633759, "learning_rate": 0.01, "loss": 2.2174, "step": 4086 }, { "epoch": 0.4201171272988801, "grad_norm": 0.11355047672986984, "learning_rate": 0.01, "loss": 2.2653, "step": 4089 }, { "epoch": 0.4204253570327751, "grad_norm": 0.10509520024061203, "learning_rate": 0.01, "loss": 2.2435, "step": 4092 }, { "epoch": 0.4207335867666701, "grad_norm": 0.07456620037555695, "learning_rate": 0.01, "loss": 2.2348, "step": 4095 }, { "epoch": 0.4210418165005651, "grad_norm": 0.07531027495861053, "learning_rate": 0.01, "loss": 2.2524, "step": 4098 }, { "epoch": 0.4213500462344601, "grad_norm": 0.06129564717411995, "learning_rate": 0.01, "loss": 2.2577, "step": 4101 }, { "epoch": 0.4216582759683551, "grad_norm": 0.03984616696834564, "learning_rate": 0.01, "loss": 2.2354, "step": 4104 }, { "epoch": 0.4219665057022501, "grad_norm": 0.1273418813943863, "learning_rate": 0.01, "loss": 2.2478, "step": 4107 }, { "epoch": 0.4222747354361451, "grad_norm": 0.08859774470329285, "learning_rate": 0.01, "loss": 2.2504, "step": 4110 }, { "epoch": 0.42258296517004007, "grad_norm": 0.10512147098779678, "learning_rate": 0.01, "loss": 2.2435, "step": 4113 }, { "epoch": 0.42289119490393506, "grad_norm": 0.11181578040122986, "learning_rate": 0.01, "loss": 2.2396, "step": 4116 }, { "epoch": 0.42319942463783006, "grad_norm": 0.07474307715892792, "learning_rate": 0.01, "loss": 2.2518, "step": 4119 }, { "epoch": 0.42350765437172505, "grad_norm": 0.07233690470457077, "learning_rate": 0.01, "loss": 2.2283, "step": 4122 }, { "epoch": 0.42381588410562004, "grad_norm": 0.06051602587103844, "learning_rate": 0.01, "loss": 2.2429, "step": 4125 }, { "epoch": 0.42412411383951504, "grad_norm": 0.0492120198905468, "learning_rate": 0.01, "loss": 2.2312, "step": 4128 }, { "epoch": 0.42443234357341003, "grad_norm": 0.07249493151903152, "learning_rate": 0.01, "loss": 2.244, "step": 4131 }, { "epoch": 0.424740573307305, "grad_norm": 0.0993468165397644, "learning_rate": 0.01, "loss": 2.2441, "step": 4134 }, { "epoch": 0.4250488030412, "grad_norm": 0.07051920145750046, "learning_rate": 0.01, "loss": 2.2188, "step": 4137 }, { "epoch": 0.425357032775095, "grad_norm": 0.08267249912023544, "learning_rate": 0.01, "loss": 2.2472, "step": 4140 }, { "epoch": 0.42566526250899006, "grad_norm": 0.1307336390018463, "learning_rate": 0.01, "loss": 2.2359, "step": 4143 }, { "epoch": 0.42597349224288505, "grad_norm": 0.09383214265108109, "learning_rate": 0.01, "loss": 2.2519, "step": 4146 }, { "epoch": 0.42628172197678005, "grad_norm": 0.08928582817316055, "learning_rate": 0.01, "loss": 2.2322, "step": 4149 }, { "epoch": 0.42658995171067504, "grad_norm": 0.10554556548595428, "learning_rate": 0.01, "loss": 2.2219, "step": 4152 }, { "epoch": 0.42689818144457004, "grad_norm": 0.06501816213130951, "learning_rate": 0.01, "loss": 2.2351, "step": 4155 }, { "epoch": 0.42720641117846503, "grad_norm": 0.10736589878797531, "learning_rate": 0.01, "loss": 2.2327, "step": 4158 }, { "epoch": 0.42751464091236, "grad_norm": 0.11834681034088135, "learning_rate": 0.01, "loss": 2.2617, "step": 4161 }, { "epoch": 0.427822870646255, "grad_norm": 0.07011161744594574, "learning_rate": 0.01, "loss": 2.2218, "step": 4164 }, { "epoch": 0.42813110038015, "grad_norm": 0.0653071179986, "learning_rate": 0.01, "loss": 2.2115, "step": 4167 }, { "epoch": 0.428439330114045, "grad_norm": 0.057517893612384796, "learning_rate": 0.01, "loss": 2.2533, "step": 4170 }, { "epoch": 0.42874755984794, "grad_norm": 0.060261376202106476, "learning_rate": 0.01, "loss": 2.2199, "step": 4173 }, { "epoch": 0.429055789581835, "grad_norm": 0.12384762614965439, "learning_rate": 0.01, "loss": 2.2124, "step": 4176 }, { "epoch": 0.42936401931573, "grad_norm": 0.06436473876237869, "learning_rate": 0.01, "loss": 2.2558, "step": 4179 }, { "epoch": 0.429672249049625, "grad_norm": 0.049704987555742264, "learning_rate": 0.01, "loss": 2.2434, "step": 4182 }, { "epoch": 0.42998047878351997, "grad_norm": 0.0809103325009346, "learning_rate": 0.01, "loss": 2.2461, "step": 4185 }, { "epoch": 0.43028870851741496, "grad_norm": 0.04888701066374779, "learning_rate": 0.01, "loss": 2.2342, "step": 4188 }, { "epoch": 0.43059693825130996, "grad_norm": 0.04951067641377449, "learning_rate": 0.01, "loss": 2.2292, "step": 4191 }, { "epoch": 0.43090516798520495, "grad_norm": 0.13740333914756775, "learning_rate": 0.01, "loss": 2.2243, "step": 4194 }, { "epoch": 0.43121339771909994, "grad_norm": 0.09912848472595215, "learning_rate": 0.01, "loss": 2.2065, "step": 4197 }, { "epoch": 0.43152162745299494, "grad_norm": 0.1031954362988472, "learning_rate": 0.01, "loss": 2.2247, "step": 4200 }, { "epoch": 0.43182985718689, "grad_norm": 0.04378229379653931, "learning_rate": 0.01, "loss": 2.2485, "step": 4203 }, { "epoch": 0.432138086920785, "grad_norm": 0.05430865287780762, "learning_rate": 0.01, "loss": 2.2178, "step": 4206 }, { "epoch": 0.43244631665468, "grad_norm": 0.05675321817398071, "learning_rate": 0.01, "loss": 2.2568, "step": 4209 }, { "epoch": 0.43275454638857497, "grad_norm": 0.07637004554271698, "learning_rate": 0.01, "loss": 2.2567, "step": 4212 }, { "epoch": 0.43306277612246996, "grad_norm": 0.06263475120067596, "learning_rate": 0.01, "loss": 2.2597, "step": 4215 }, { "epoch": 0.43337100585636495, "grad_norm": 0.09689760208129883, "learning_rate": 0.01, "loss": 2.2376, "step": 4218 }, { "epoch": 0.43367923559025995, "grad_norm": 0.13923399150371552, "learning_rate": 0.01, "loss": 2.2394, "step": 4221 }, { "epoch": 0.43398746532415494, "grad_norm": 0.0607299767434597, "learning_rate": 0.01, "loss": 2.2366, "step": 4224 }, { "epoch": 0.43429569505804994, "grad_norm": 0.05221550166606903, "learning_rate": 0.01, "loss": 2.2587, "step": 4227 }, { "epoch": 0.43460392479194493, "grad_norm": 0.05556831881403923, "learning_rate": 0.01, "loss": 2.2422, "step": 4230 }, { "epoch": 0.4349121545258399, "grad_norm": 0.0843261182308197, "learning_rate": 0.01, "loss": 2.2399, "step": 4233 }, { "epoch": 0.4352203842597349, "grad_norm": 0.08864692598581314, "learning_rate": 0.01, "loss": 2.2155, "step": 4236 }, { "epoch": 0.4355286139936299, "grad_norm": 0.11530198156833649, "learning_rate": 0.01, "loss": 2.2612, "step": 4239 }, { "epoch": 0.4358368437275249, "grad_norm": 0.11549337208271027, "learning_rate": 0.01, "loss": 2.2233, "step": 4242 }, { "epoch": 0.4361450734614199, "grad_norm": 0.11105350404977798, "learning_rate": 0.01, "loss": 2.2426, "step": 4245 }, { "epoch": 0.4364533031953149, "grad_norm": 0.1190980076789856, "learning_rate": 0.01, "loss": 2.2353, "step": 4248 }, { "epoch": 0.4367615329292099, "grad_norm": 0.08560021221637726, "learning_rate": 0.01, "loss": 2.2542, "step": 4251 }, { "epoch": 0.4370697626631049, "grad_norm": 0.05514337494969368, "learning_rate": 0.01, "loss": 2.2171, "step": 4254 }, { "epoch": 0.43737799239699987, "grad_norm": 0.06764981150627136, "learning_rate": 0.01, "loss": 2.2363, "step": 4257 }, { "epoch": 0.4376862221308949, "grad_norm": 0.04801105335354805, "learning_rate": 0.01, "loss": 2.2352, "step": 4260 }, { "epoch": 0.4379944518647899, "grad_norm": 0.04782482981681824, "learning_rate": 0.01, "loss": 2.2458, "step": 4263 }, { "epoch": 0.4383026815986849, "grad_norm": 0.12880820035934448, "learning_rate": 0.01, "loss": 2.2384, "step": 4266 }, { "epoch": 0.4386109113325799, "grad_norm": 0.06714754551649094, "learning_rate": 0.01, "loss": 2.2214, "step": 4269 }, { "epoch": 0.4389191410664749, "grad_norm": 0.08878037333488464, "learning_rate": 0.01, "loss": 2.2597, "step": 4272 }, { "epoch": 0.4392273708003699, "grad_norm": 0.051335882395505905, "learning_rate": 0.01, "loss": 2.2065, "step": 4275 }, { "epoch": 0.4395356005342649, "grad_norm": 0.058174654841423035, "learning_rate": 0.01, "loss": 2.246, "step": 4278 }, { "epoch": 0.4398438302681599, "grad_norm": 0.053695593029260635, "learning_rate": 0.01, "loss": 2.2406, "step": 4281 }, { "epoch": 0.44015206000205487, "grad_norm": 0.07685926556587219, "learning_rate": 0.01, "loss": 2.2212, "step": 4284 }, { "epoch": 0.44046028973594986, "grad_norm": 0.13495223224163055, "learning_rate": 0.01, "loss": 2.2486, "step": 4287 }, { "epoch": 0.44076851946984485, "grad_norm": 0.0707453116774559, "learning_rate": 0.01, "loss": 2.247, "step": 4290 }, { "epoch": 0.44107674920373985, "grad_norm": 0.04909240081906319, "learning_rate": 0.01, "loss": 2.2528, "step": 4293 }, { "epoch": 0.44138497893763484, "grad_norm": 0.06148238107562065, "learning_rate": 0.01, "loss": 2.2462, "step": 4296 }, { "epoch": 0.44169320867152984, "grad_norm": 0.07306285202503204, "learning_rate": 0.01, "loss": 2.199, "step": 4299 }, { "epoch": 0.44200143840542483, "grad_norm": 0.12965865433216095, "learning_rate": 0.01, "loss": 2.2156, "step": 4302 }, { "epoch": 0.4423096681393198, "grad_norm": 0.059606775641441345, "learning_rate": 0.01, "loss": 2.2209, "step": 4305 }, { "epoch": 0.4426178978732148, "grad_norm": 0.06866457313299179, "learning_rate": 0.01, "loss": 2.2508, "step": 4308 }, { "epoch": 0.4429261276071098, "grad_norm": 0.08940677344799042, "learning_rate": 0.01, "loss": 2.2244, "step": 4311 }, { "epoch": 0.4432343573410048, "grad_norm": 0.10428988933563232, "learning_rate": 0.01, "loss": 2.2106, "step": 4314 }, { "epoch": 0.44354258707489985, "grad_norm": 0.1565064787864685, "learning_rate": 0.01, "loss": 2.2745, "step": 4317 }, { "epoch": 0.44385081680879485, "grad_norm": 0.11433500796556473, "learning_rate": 0.01, "loss": 2.2655, "step": 4320 }, { "epoch": 0.44415904654268984, "grad_norm": 0.07315809279680252, "learning_rate": 0.01, "loss": 2.2523, "step": 4323 }, { "epoch": 0.44446727627658483, "grad_norm": 0.048583708703517914, "learning_rate": 0.01, "loss": 2.2345, "step": 4326 }, { "epoch": 0.4447755060104798, "grad_norm": 0.03422848507761955, "learning_rate": 0.01, "loss": 2.1883, "step": 4329 }, { "epoch": 0.4450837357443748, "grad_norm": 0.05057518929243088, "learning_rate": 0.01, "loss": 2.2288, "step": 4332 }, { "epoch": 0.4453919654782698, "grad_norm": 0.10407044738531113, "learning_rate": 0.01, "loss": 2.1974, "step": 4335 }, { "epoch": 0.4457001952121648, "grad_norm": 0.06545260548591614, "learning_rate": 0.01, "loss": 2.2121, "step": 4338 }, { "epoch": 0.4460084249460598, "grad_norm": 0.09442485123872757, "learning_rate": 0.01, "loss": 2.2145, "step": 4341 }, { "epoch": 0.4463166546799548, "grad_norm": 0.11353209614753723, "learning_rate": 0.01, "loss": 2.227, "step": 4344 }, { "epoch": 0.4466248844138498, "grad_norm": 0.11243279278278351, "learning_rate": 0.01, "loss": 2.242, "step": 4347 }, { "epoch": 0.4469331141477448, "grad_norm": 0.14264856278896332, "learning_rate": 0.01, "loss": 2.2405, "step": 4350 }, { "epoch": 0.4472413438816398, "grad_norm": 0.048186566680669785, "learning_rate": 0.01, "loss": 2.1921, "step": 4353 }, { "epoch": 0.44754957361553477, "grad_norm": 0.0693448930978775, "learning_rate": 0.01, "loss": 2.2404, "step": 4356 }, { "epoch": 0.44785780334942976, "grad_norm": 0.04426461458206177, "learning_rate": 0.01, "loss": 2.2114, "step": 4359 }, { "epoch": 0.44816603308332476, "grad_norm": 0.06392990797758102, "learning_rate": 0.01, "loss": 2.224, "step": 4362 }, { "epoch": 0.44847426281721975, "grad_norm": 0.16224262118339539, "learning_rate": 0.01, "loss": 2.261, "step": 4365 }, { "epoch": 0.44878249255111474, "grad_norm": 0.06382444500923157, "learning_rate": 0.01, "loss": 2.2067, "step": 4368 }, { "epoch": 0.44909072228500974, "grad_norm": 0.09267281740903854, "learning_rate": 0.01, "loss": 2.2403, "step": 4371 }, { "epoch": 0.44939895201890473, "grad_norm": 0.09785914421081543, "learning_rate": 0.01, "loss": 2.2276, "step": 4374 }, { "epoch": 0.4497071817527998, "grad_norm": 0.06673259288072586, "learning_rate": 0.01, "loss": 2.214, "step": 4377 }, { "epoch": 0.45001541148669477, "grad_norm": 0.05463524907827377, "learning_rate": 0.01, "loss": 2.2048, "step": 4380 }, { "epoch": 0.45032364122058977, "grad_norm": 0.05466567724943161, "learning_rate": 0.01, "loss": 2.2062, "step": 4383 }, { "epoch": 0.45063187095448476, "grad_norm": 0.07413290441036224, "learning_rate": 0.01, "loss": 2.2178, "step": 4386 }, { "epoch": 0.45094010068837975, "grad_norm": 0.06564678996801376, "learning_rate": 0.01, "loss": 2.2304, "step": 4389 }, { "epoch": 0.45124833042227475, "grad_norm": 0.12468644231557846, "learning_rate": 0.01, "loss": 2.2301, "step": 4392 }, { "epoch": 0.45155656015616974, "grad_norm": 0.06898069381713867, "learning_rate": 0.01, "loss": 2.2255, "step": 4395 }, { "epoch": 0.45186478989006473, "grad_norm": 0.13579058647155762, "learning_rate": 0.01, "loss": 2.2021, "step": 4398 }, { "epoch": 0.4521730196239597, "grad_norm": 0.07980421930551529, "learning_rate": 0.01, "loss": 2.2598, "step": 4401 }, { "epoch": 0.4524812493578547, "grad_norm": 0.07771994173526764, "learning_rate": 0.01, "loss": 2.2166, "step": 4404 }, { "epoch": 0.4527894790917497, "grad_norm": 0.08967602998018265, "learning_rate": 0.01, "loss": 2.2095, "step": 4407 }, { "epoch": 0.4530977088256447, "grad_norm": 0.10909977555274963, "learning_rate": 0.01, "loss": 2.2064, "step": 4410 }, { "epoch": 0.4534059385595397, "grad_norm": 0.11167363077402115, "learning_rate": 0.01, "loss": 2.2021, "step": 4413 }, { "epoch": 0.4537141682934347, "grad_norm": 0.10310694575309753, "learning_rate": 0.01, "loss": 2.2582, "step": 4416 }, { "epoch": 0.4540223980273297, "grad_norm": 0.06411474943161011, "learning_rate": 0.01, "loss": 2.2203, "step": 4419 }, { "epoch": 0.4543306277612247, "grad_norm": 0.11141805350780487, "learning_rate": 0.01, "loss": 2.2163, "step": 4422 }, { "epoch": 0.4546388574951197, "grad_norm": 0.09054200351238251, "learning_rate": 0.01, "loss": 2.2054, "step": 4425 }, { "epoch": 0.45494708722901467, "grad_norm": 0.06952405720949173, "learning_rate": 0.01, "loss": 2.2488, "step": 4428 }, { "epoch": 0.45525531696290966, "grad_norm": 0.08597440272569656, "learning_rate": 0.01, "loss": 2.2044, "step": 4431 }, { "epoch": 0.4555635466968047, "grad_norm": 0.06718187034130096, "learning_rate": 0.01, "loss": 2.2419, "step": 4434 }, { "epoch": 0.4558717764306997, "grad_norm": 0.0558515265583992, "learning_rate": 0.01, "loss": 2.2102, "step": 4437 }, { "epoch": 0.4561800061645947, "grad_norm": 0.0560682937502861, "learning_rate": 0.01, "loss": 2.2324, "step": 4440 }, { "epoch": 0.4564882358984897, "grad_norm": 0.058881547302007675, "learning_rate": 0.01, "loss": 2.1966, "step": 4443 }, { "epoch": 0.4567964656323847, "grad_norm": 0.07034582644701004, "learning_rate": 0.01, "loss": 2.2021, "step": 4446 }, { "epoch": 0.4571046953662797, "grad_norm": 0.09703799337148666, "learning_rate": 0.01, "loss": 2.21, "step": 4449 }, { "epoch": 0.45741292510017467, "grad_norm": 0.06268820911645889, "learning_rate": 0.01, "loss": 2.2237, "step": 4452 }, { "epoch": 0.45772115483406967, "grad_norm": 0.123359814286232, "learning_rate": 0.01, "loss": 2.2063, "step": 4455 }, { "epoch": 0.45802938456796466, "grad_norm": 0.0536644384264946, "learning_rate": 0.01, "loss": 2.2002, "step": 4458 }, { "epoch": 0.45833761430185965, "grad_norm": 0.0957527682185173, "learning_rate": 0.01, "loss": 2.2484, "step": 4461 }, { "epoch": 0.45864584403575465, "grad_norm": 0.12607458233833313, "learning_rate": 0.01, "loss": 2.2241, "step": 4464 }, { "epoch": 0.45895407376964964, "grad_norm": 0.07415255159139633, "learning_rate": 0.01, "loss": 2.2083, "step": 4467 }, { "epoch": 0.45926230350354463, "grad_norm": 0.10248073190450668, "learning_rate": 0.01, "loss": 2.2253, "step": 4470 }, { "epoch": 0.4595705332374396, "grad_norm": 0.05264243111014366, "learning_rate": 0.01, "loss": 2.2166, "step": 4473 }, { "epoch": 0.4598787629713346, "grad_norm": 0.0557783767580986, "learning_rate": 0.01, "loss": 2.2213, "step": 4476 }, { "epoch": 0.4601869927052296, "grad_norm": 0.06835830211639404, "learning_rate": 0.01, "loss": 2.2255, "step": 4479 }, { "epoch": 0.4604952224391246, "grad_norm": 0.12045460939407349, "learning_rate": 0.01, "loss": 2.2331, "step": 4482 }, { "epoch": 0.4608034521730196, "grad_norm": 0.11495090276002884, "learning_rate": 0.01, "loss": 2.2191, "step": 4485 }, { "epoch": 0.4611116819069146, "grad_norm": 0.07859046757221222, "learning_rate": 0.01, "loss": 2.2282, "step": 4488 }, { "epoch": 0.46141991164080964, "grad_norm": 0.03789819777011871, "learning_rate": 0.01, "loss": 2.2188, "step": 4491 }, { "epoch": 0.46172814137470464, "grad_norm": 0.03617655113339424, "learning_rate": 0.01, "loss": 2.2496, "step": 4494 }, { "epoch": 0.46203637110859963, "grad_norm": 0.06894705444574356, "learning_rate": 0.01, "loss": 2.2007, "step": 4497 }, { "epoch": 0.4623446008424946, "grad_norm": 0.1143706887960434, "learning_rate": 0.01, "loss": 2.2247, "step": 4500 }, { "epoch": 0.4626528305763896, "grad_norm": 0.10069230943918228, "learning_rate": 0.01, "loss": 2.2114, "step": 4503 }, { "epoch": 0.4629610603102846, "grad_norm": 0.10068007558584213, "learning_rate": 0.01, "loss": 2.2438, "step": 4506 }, { "epoch": 0.4632692900441796, "grad_norm": 0.05319290608167648, "learning_rate": 0.01, "loss": 2.2422, "step": 4509 }, { "epoch": 0.4635775197780746, "grad_norm": 0.06933122128248215, "learning_rate": 0.01, "loss": 2.2059, "step": 4512 }, { "epoch": 0.4638857495119696, "grad_norm": 0.11921056360006332, "learning_rate": 0.01, "loss": 2.2137, "step": 4515 }, { "epoch": 0.4641939792458646, "grad_norm": 0.06092121824622154, "learning_rate": 0.01, "loss": 2.1941, "step": 4518 }, { "epoch": 0.4645022089797596, "grad_norm": 0.06017937511205673, "learning_rate": 0.01, "loss": 2.2539, "step": 4521 }, { "epoch": 0.4648104387136546, "grad_norm": 0.05721915140748024, "learning_rate": 0.01, "loss": 2.2348, "step": 4524 }, { "epoch": 0.46511866844754957, "grad_norm": 0.07706714421510696, "learning_rate": 0.01, "loss": 2.2169, "step": 4527 }, { "epoch": 0.46542689818144456, "grad_norm": 0.07279779762029648, "learning_rate": 0.01, "loss": 2.2163, "step": 4530 }, { "epoch": 0.46573512791533955, "grad_norm": 0.06781268864870071, "learning_rate": 0.01, "loss": 2.1682, "step": 4533 }, { "epoch": 0.46604335764923455, "grad_norm": 0.0807657316327095, "learning_rate": 0.01, "loss": 2.2123, "step": 4536 }, { "epoch": 0.46635158738312954, "grad_norm": 0.06467099487781525, "learning_rate": 0.01, "loss": 2.2152, "step": 4539 }, { "epoch": 0.46665981711702453, "grad_norm": 0.10680168867111206, "learning_rate": 0.01, "loss": 2.2062, "step": 4542 }, { "epoch": 0.4669680468509195, "grad_norm": 0.11668167263269424, "learning_rate": 0.01, "loss": 2.206, "step": 4545 }, { "epoch": 0.4672762765848146, "grad_norm": 0.06468226760625839, "learning_rate": 0.01, "loss": 2.2011, "step": 4548 }, { "epoch": 0.46758450631870957, "grad_norm": 0.07668601721525192, "learning_rate": 0.01, "loss": 2.2128, "step": 4551 }, { "epoch": 0.46789273605260456, "grad_norm": 0.05631673336029053, "learning_rate": 0.01, "loss": 2.1812, "step": 4554 }, { "epoch": 0.46820096578649956, "grad_norm": 0.12898530066013336, "learning_rate": 0.01, "loss": 2.2312, "step": 4557 }, { "epoch": 0.46850919552039455, "grad_norm": 0.07105603069067001, "learning_rate": 0.01, "loss": 2.1949, "step": 4560 }, { "epoch": 0.46881742525428954, "grad_norm": 0.07172367721796036, "learning_rate": 0.01, "loss": 2.2509, "step": 4563 }, { "epoch": 0.46912565498818454, "grad_norm": 0.1219574511051178, "learning_rate": 0.01, "loss": 2.2147, "step": 4566 }, { "epoch": 0.46943388472207953, "grad_norm": 0.05777307227253914, "learning_rate": 0.01, "loss": 2.2071, "step": 4569 }, { "epoch": 0.4697421144559745, "grad_norm": 0.12805253267288208, "learning_rate": 0.01, "loss": 2.2166, "step": 4572 }, { "epoch": 0.4700503441898695, "grad_norm": 0.11360877752304077, "learning_rate": 0.01, "loss": 2.1827, "step": 4575 }, { "epoch": 0.4703585739237645, "grad_norm": 0.07203348726034164, "learning_rate": 0.01, "loss": 2.2378, "step": 4578 }, { "epoch": 0.4706668036576595, "grad_norm": 0.05645303055644035, "learning_rate": 0.01, "loss": 2.2044, "step": 4581 }, { "epoch": 0.4709750333915545, "grad_norm": 0.06103040650486946, "learning_rate": 0.01, "loss": 2.2302, "step": 4584 }, { "epoch": 0.4712832631254495, "grad_norm": 0.0621771402657032, "learning_rate": 0.01, "loss": 2.2147, "step": 4587 }, { "epoch": 0.4715914928593445, "grad_norm": 0.08458666503429413, "learning_rate": 0.01, "loss": 2.1781, "step": 4590 }, { "epoch": 0.4718997225932395, "grad_norm": 0.092729851603508, "learning_rate": 0.01, "loss": 2.2326, "step": 4593 }, { "epoch": 0.4722079523271345, "grad_norm": 0.09255766123533249, "learning_rate": 0.01, "loss": 2.2082, "step": 4596 }, { "epoch": 0.47251618206102947, "grad_norm": 0.11929985135793686, "learning_rate": 0.01, "loss": 2.2064, "step": 4599 }, { "epoch": 0.47282441179492446, "grad_norm": 0.12234004586935043, "learning_rate": 0.01, "loss": 2.1513, "step": 4602 }, { "epoch": 0.47313264152881945, "grad_norm": 0.07648742944002151, "learning_rate": 0.01, "loss": 2.2376, "step": 4605 }, { "epoch": 0.4734408712627145, "grad_norm": 0.05717691034078598, "learning_rate": 0.01, "loss": 2.231, "step": 4608 }, { "epoch": 0.4737491009966095, "grad_norm": 0.048224568367004395, "learning_rate": 0.01, "loss": 2.2126, "step": 4611 }, { "epoch": 0.4740573307305045, "grad_norm": 0.07530826330184937, "learning_rate": 0.01, "loss": 2.2155, "step": 4614 }, { "epoch": 0.4743655604643995, "grad_norm": 0.08617862313985825, "learning_rate": 0.01, "loss": 2.2286, "step": 4617 }, { "epoch": 0.4746737901982945, "grad_norm": 0.10041820257902145, "learning_rate": 0.01, "loss": 2.1917, "step": 4620 }, { "epoch": 0.47498201993218947, "grad_norm": 0.04470205307006836, "learning_rate": 0.01, "loss": 2.2188, "step": 4623 }, { "epoch": 0.47529024966608446, "grad_norm": 0.060269374400377274, "learning_rate": 0.01, "loss": 2.2267, "step": 4626 }, { "epoch": 0.47559847939997946, "grad_norm": 0.06320520490407944, "learning_rate": 0.01, "loss": 2.2054, "step": 4629 }, { "epoch": 0.47590670913387445, "grad_norm": 0.05642838776111603, "learning_rate": 0.01, "loss": 2.2062, "step": 4632 }, { "epoch": 0.47621493886776944, "grad_norm": 0.064301997423172, "learning_rate": 0.01, "loss": 2.2296, "step": 4635 }, { "epoch": 0.47652316860166444, "grad_norm": 0.07448214292526245, "learning_rate": 0.01, "loss": 2.197, "step": 4638 }, { "epoch": 0.47683139833555943, "grad_norm": 0.08586326986551285, "learning_rate": 0.01, "loss": 2.1743, "step": 4641 }, { "epoch": 0.4771396280694544, "grad_norm": 0.13179326057434082, "learning_rate": 0.01, "loss": 2.2299, "step": 4644 }, { "epoch": 0.4774478578033494, "grad_norm": 0.1163720041513443, "learning_rate": 0.01, "loss": 2.2089, "step": 4647 }, { "epoch": 0.4777560875372444, "grad_norm": 0.04846031963825226, "learning_rate": 0.01, "loss": 2.1564, "step": 4650 }, { "epoch": 0.4780643172711394, "grad_norm": 0.13724131882190704, "learning_rate": 0.01, "loss": 2.2078, "step": 4653 }, { "epoch": 0.4783725470050344, "grad_norm": 0.062840536236763, "learning_rate": 0.01, "loss": 2.2252, "step": 4656 }, { "epoch": 0.4786807767389294, "grad_norm": 0.06721820682287216, "learning_rate": 0.01, "loss": 2.1781, "step": 4659 }, { "epoch": 0.4789890064728244, "grad_norm": 0.09086044877767563, "learning_rate": 0.01, "loss": 2.2179, "step": 4662 }, { "epoch": 0.47929723620671943, "grad_norm": 0.07732655107975006, "learning_rate": 0.01, "loss": 2.2334, "step": 4665 }, { "epoch": 0.47960546594061443, "grad_norm": 0.04763714596629143, "learning_rate": 0.01, "loss": 2.2262, "step": 4668 }, { "epoch": 0.4799136956745094, "grad_norm": 0.09649144858121872, "learning_rate": 0.01, "loss": 2.2141, "step": 4671 }, { "epoch": 0.4802219254084044, "grad_norm": 0.05458167567849159, "learning_rate": 0.01, "loss": 2.1967, "step": 4674 }, { "epoch": 0.4805301551422994, "grad_norm": 0.08577650040388107, "learning_rate": 0.01, "loss": 2.2183, "step": 4677 }, { "epoch": 0.4808383848761944, "grad_norm": 0.0733698159456253, "learning_rate": 0.01, "loss": 2.2185, "step": 4680 }, { "epoch": 0.4811466146100894, "grad_norm": 0.06648692488670349, "learning_rate": 0.01, "loss": 2.1904, "step": 4683 }, { "epoch": 0.4814548443439844, "grad_norm": 0.08376996219158173, "learning_rate": 0.01, "loss": 2.2097, "step": 4686 }, { "epoch": 0.4817630740778794, "grad_norm": 0.05270134285092354, "learning_rate": 0.01, "loss": 2.2304, "step": 4689 }, { "epoch": 0.4820713038117744, "grad_norm": 0.05531509965658188, "learning_rate": 0.01, "loss": 2.2039, "step": 4692 }, { "epoch": 0.48237953354566937, "grad_norm": 0.05848492309451103, "learning_rate": 0.01, "loss": 2.2113, "step": 4695 }, { "epoch": 0.48268776327956436, "grad_norm": 0.06692120432853699, "learning_rate": 0.01, "loss": 2.1972, "step": 4698 }, { "epoch": 0.48299599301345936, "grad_norm": 0.07243851572275162, "learning_rate": 0.01, "loss": 2.223, "step": 4701 }, { "epoch": 0.48330422274735435, "grad_norm": 0.06565523892641068, "learning_rate": 0.01, "loss": 2.1913, "step": 4704 }, { "epoch": 0.48361245248124934, "grad_norm": 0.04595122113823891, "learning_rate": 0.01, "loss": 2.1782, "step": 4707 }, { "epoch": 0.48392068221514434, "grad_norm": 0.06658844649791718, "learning_rate": 0.01, "loss": 2.224, "step": 4710 }, { "epoch": 0.48422891194903933, "grad_norm": 0.0807071253657341, "learning_rate": 0.01, "loss": 2.217, "step": 4713 }, { "epoch": 0.4845371416829343, "grad_norm": 0.0562782846391201, "learning_rate": 0.01, "loss": 2.2033, "step": 4716 }, { "epoch": 0.4848453714168293, "grad_norm": 0.07851718366146088, "learning_rate": 0.01, "loss": 2.1847, "step": 4719 }, { "epoch": 0.48515360115072437, "grad_norm": 0.07649900764226913, "learning_rate": 0.01, "loss": 2.2222, "step": 4722 }, { "epoch": 0.48546183088461936, "grad_norm": 0.07279150187969208, "learning_rate": 0.01, "loss": 2.1951, "step": 4725 }, { "epoch": 0.48577006061851435, "grad_norm": 0.053628645837306976, "learning_rate": 0.01, "loss": 2.1681, "step": 4728 }, { "epoch": 0.48607829035240935, "grad_norm": 0.09401357173919678, "learning_rate": 0.01, "loss": 2.1943, "step": 4731 }, { "epoch": 0.48638652008630434, "grad_norm": 0.1156088337302208, "learning_rate": 0.01, "loss": 2.2317, "step": 4734 }, { "epoch": 0.48669474982019933, "grad_norm": 0.12672138214111328, "learning_rate": 0.01, "loss": 2.2085, "step": 4737 }, { "epoch": 0.48700297955409433, "grad_norm": 0.06799574196338654, "learning_rate": 0.01, "loss": 2.2161, "step": 4740 }, { "epoch": 0.4873112092879893, "grad_norm": 0.06479325145483017, "learning_rate": 0.01, "loss": 2.1663, "step": 4743 }, { "epoch": 0.4876194390218843, "grad_norm": 0.09143824130296707, "learning_rate": 0.01, "loss": 2.2193, "step": 4746 }, { "epoch": 0.4879276687557793, "grad_norm": 0.09262688457965851, "learning_rate": 0.01, "loss": 2.218, "step": 4749 }, { "epoch": 0.4882358984896743, "grad_norm": 0.11519678682088852, "learning_rate": 0.01, "loss": 2.1937, "step": 4752 }, { "epoch": 0.4885441282235693, "grad_norm": 0.07646415382623672, "learning_rate": 0.01, "loss": 2.2133, "step": 4755 }, { "epoch": 0.4888523579574643, "grad_norm": 0.08090809732675552, "learning_rate": 0.01, "loss": 2.193, "step": 4758 }, { "epoch": 0.4891605876913593, "grad_norm": 0.08812209218740463, "learning_rate": 0.01, "loss": 2.2215, "step": 4761 }, { "epoch": 0.4894688174252543, "grad_norm": 0.14427846670150757, "learning_rate": 0.01, "loss": 2.2115, "step": 4764 }, { "epoch": 0.48977704715914927, "grad_norm": 0.08065719902515411, "learning_rate": 0.01, "loss": 2.1861, "step": 4767 }, { "epoch": 0.49008527689304426, "grad_norm": 0.04888691008090973, "learning_rate": 0.01, "loss": 2.1911, "step": 4770 }, { "epoch": 0.49039350662693926, "grad_norm": 0.04742259159684181, "learning_rate": 0.01, "loss": 2.2152, "step": 4773 }, { "epoch": 0.49070173636083425, "grad_norm": 0.061714138835668564, "learning_rate": 0.01, "loss": 2.2009, "step": 4776 }, { "epoch": 0.49100996609472924, "grad_norm": 0.07582443952560425, "learning_rate": 0.01, "loss": 2.2189, "step": 4779 }, { "epoch": 0.4913181958286243, "grad_norm": 0.1390780359506607, "learning_rate": 0.01, "loss": 2.211, "step": 4782 }, { "epoch": 0.4916264255625193, "grad_norm": 0.03784565255045891, "learning_rate": 0.01, "loss": 2.2011, "step": 4785 }, { "epoch": 0.4919346552964143, "grad_norm": 0.07413594424724579, "learning_rate": 0.01, "loss": 2.2103, "step": 4788 }, { "epoch": 0.4922428850303093, "grad_norm": 0.09402404725551605, "learning_rate": 0.01, "loss": 2.1912, "step": 4791 }, { "epoch": 0.49255111476420427, "grad_norm": 0.0717400312423706, "learning_rate": 0.01, "loss": 2.1868, "step": 4794 }, { "epoch": 0.49285934449809926, "grad_norm": 0.05179424583911896, "learning_rate": 0.01, "loss": 2.2298, "step": 4797 }, { "epoch": 0.49316757423199425, "grad_norm": 0.12123433500528336, "learning_rate": 0.01, "loss": 2.2005, "step": 4800 }, { "epoch": 0.49347580396588925, "grad_norm": 0.04941033944487572, "learning_rate": 0.01, "loss": 2.2113, "step": 4803 }, { "epoch": 0.49378403369978424, "grad_norm": 0.10987304151058197, "learning_rate": 0.01, "loss": 2.209, "step": 4806 }, { "epoch": 0.49409226343367924, "grad_norm": 0.09235193580389023, "learning_rate": 0.01, "loss": 2.1967, "step": 4809 }, { "epoch": 0.49440049316757423, "grad_norm": 0.057354703545570374, "learning_rate": 0.01, "loss": 2.219, "step": 4812 }, { "epoch": 0.4947087229014692, "grad_norm": 0.04692654311656952, "learning_rate": 0.01, "loss": 2.173, "step": 4815 }, { "epoch": 0.4950169526353642, "grad_norm": 0.09447453171014786, "learning_rate": 0.01, "loss": 2.1806, "step": 4818 }, { "epoch": 0.4953251823692592, "grad_norm": 0.09967079013586044, "learning_rate": 0.01, "loss": 2.1809, "step": 4821 }, { "epoch": 0.4956334121031542, "grad_norm": 0.06462189555168152, "learning_rate": 0.01, "loss": 2.1922, "step": 4824 }, { "epoch": 0.4959416418370492, "grad_norm": 0.038030870258808136, "learning_rate": 0.01, "loss": 2.2239, "step": 4827 }, { "epoch": 0.4962498715709442, "grad_norm": 0.06828872114419937, "learning_rate": 0.01, "loss": 2.1881, "step": 4830 }, { "epoch": 0.4965581013048392, "grad_norm": 0.10087070614099503, "learning_rate": 0.01, "loss": 2.22, "step": 4833 }, { "epoch": 0.4968663310387342, "grad_norm": 0.07630455493927002, "learning_rate": 0.01, "loss": 2.188, "step": 4836 }, { "epoch": 0.4971745607726292, "grad_norm": 0.05040668696165085, "learning_rate": 0.01, "loss": 2.2012, "step": 4839 }, { "epoch": 0.4974827905065242, "grad_norm": 0.05160282924771309, "learning_rate": 0.01, "loss": 2.2119, "step": 4842 }, { "epoch": 0.4977910202404192, "grad_norm": 0.04949258640408516, "learning_rate": 0.01, "loss": 2.1959, "step": 4845 }, { "epoch": 0.4980992499743142, "grad_norm": 0.07766029983758926, "learning_rate": 0.01, "loss": 2.1896, "step": 4848 }, { "epoch": 0.4984074797082092, "grad_norm": 0.06274580955505371, "learning_rate": 0.01, "loss": 2.2014, "step": 4851 }, { "epoch": 0.4987157094421042, "grad_norm": 0.1071280762553215, "learning_rate": 0.01, "loss": 2.2045, "step": 4854 }, { "epoch": 0.4990239391759992, "grad_norm": 0.10645020008087158, "learning_rate": 0.01, "loss": 2.1895, "step": 4857 }, { "epoch": 0.4993321689098942, "grad_norm": 0.1151091679930687, "learning_rate": 0.01, "loss": 2.1954, "step": 4860 }, { "epoch": 0.4996403986437892, "grad_norm": 0.09699530899524689, "learning_rate": 0.01, "loss": 2.1833, "step": 4863 }, { "epoch": 0.49994862837768417, "grad_norm": 0.06568959355354309, "learning_rate": 0.01, "loss": 2.1862, "step": 4866 }, { "epoch": 0.5002568581115792, "grad_norm": 0.0421447716653347, "learning_rate": 0.01, "loss": 2.1819, "step": 4869 }, { "epoch": 0.5005650878454742, "grad_norm": 0.04529868811368942, "learning_rate": 0.01, "loss": 2.1852, "step": 4872 }, { "epoch": 0.5008733175793691, "grad_norm": 0.059541650116443634, "learning_rate": 0.01, "loss": 2.1955, "step": 4875 }, { "epoch": 0.5011815473132641, "grad_norm": 0.061823770403862, "learning_rate": 0.01, "loss": 2.2039, "step": 4878 }, { "epoch": 0.5014897770471591, "grad_norm": 0.05892050638794899, "learning_rate": 0.01, "loss": 2.178, "step": 4881 }, { "epoch": 0.5017980067810541, "grad_norm": 0.04842402786016464, "learning_rate": 0.01, "loss": 2.1948, "step": 4884 }, { "epoch": 0.5021062365149491, "grad_norm": 0.05962050333619118, "learning_rate": 0.01, "loss": 2.1932, "step": 4887 }, { "epoch": 0.5024144662488441, "grad_norm": 0.056295089423656464, "learning_rate": 0.01, "loss": 2.1757, "step": 4890 }, { "epoch": 0.5027226959827391, "grad_norm": 0.07448049634695053, "learning_rate": 0.01, "loss": 2.2181, "step": 4893 }, { "epoch": 0.5030309257166341, "grad_norm": 0.07998815923929214, "learning_rate": 0.01, "loss": 2.1714, "step": 4896 }, { "epoch": 0.5033391554505291, "grad_norm": 0.08058517426252365, "learning_rate": 0.01, "loss": 2.2131, "step": 4899 }, { "epoch": 0.5036473851844241, "grad_norm": 0.07899410277605057, "learning_rate": 0.01, "loss": 2.1727, "step": 4902 }, { "epoch": 0.5039556149183191, "grad_norm": 0.05830831080675125, "learning_rate": 0.01, "loss": 2.176, "step": 4905 }, { "epoch": 0.5042638446522141, "grad_norm": 0.05831579118967056, "learning_rate": 0.01, "loss": 2.1754, "step": 4908 }, { "epoch": 0.5045720743861091, "grad_norm": 0.052614904940128326, "learning_rate": 0.01, "loss": 2.1935, "step": 4911 }, { "epoch": 0.5048803041200041, "grad_norm": 0.0830332413315773, "learning_rate": 0.01, "loss": 2.2274, "step": 4914 }, { "epoch": 0.5051885338538991, "grad_norm": 0.1138230562210083, "learning_rate": 0.01, "loss": 2.1976, "step": 4917 }, { "epoch": 0.505496763587794, "grad_norm": 0.07024016976356506, "learning_rate": 0.01, "loss": 2.1969, "step": 4920 }, { "epoch": 0.505804993321689, "grad_norm": 0.07235170155763626, "learning_rate": 0.01, "loss": 2.2163, "step": 4923 }, { "epoch": 0.5061132230555841, "grad_norm": 0.06894835084676743, "learning_rate": 0.01, "loss": 2.2232, "step": 4926 }, { "epoch": 0.5064214527894791, "grad_norm": 0.0825890600681305, "learning_rate": 0.01, "loss": 2.1896, "step": 4929 }, { "epoch": 0.5067296825233741, "grad_norm": 0.05901159718632698, "learning_rate": 0.01, "loss": 2.1988, "step": 4932 }, { "epoch": 0.5070379122572691, "grad_norm": 0.048157334327697754, "learning_rate": 0.01, "loss": 2.1904, "step": 4935 }, { "epoch": 0.5073461419911641, "grad_norm": 0.10036749392747879, "learning_rate": 0.01, "loss": 2.1977, "step": 4938 }, { "epoch": 0.5076543717250591, "grad_norm": 0.10984963923692703, "learning_rate": 0.01, "loss": 2.1957, "step": 4941 }, { "epoch": 0.5079626014589541, "grad_norm": 0.09587367624044418, "learning_rate": 0.01, "loss": 2.2, "step": 4944 }, { "epoch": 0.5082708311928491, "grad_norm": 0.06347552686929703, "learning_rate": 0.01, "loss": 2.1918, "step": 4947 }, { "epoch": 0.5085790609267441, "grad_norm": 0.0658629834651947, "learning_rate": 0.01, "loss": 2.1945, "step": 4950 }, { "epoch": 0.5088872906606391, "grad_norm": 0.045971643179655075, "learning_rate": 0.01, "loss": 2.2114, "step": 4953 }, { "epoch": 0.5091955203945341, "grad_norm": 0.04058291018009186, "learning_rate": 0.01, "loss": 2.2066, "step": 4956 }, { "epoch": 0.5095037501284291, "grad_norm": 0.052851296961307526, "learning_rate": 0.01, "loss": 2.1884, "step": 4959 }, { "epoch": 0.5098119798623241, "grad_norm": 0.033158350735902786, "learning_rate": 0.01, "loss": 2.2078, "step": 4962 }, { "epoch": 0.5101202095962191, "grad_norm": 0.05409036949276924, "learning_rate": 0.01, "loss": 2.181, "step": 4965 }, { "epoch": 0.5104284393301141, "grad_norm": 0.0731736570596695, "learning_rate": 0.01, "loss": 2.1825, "step": 4968 }, { "epoch": 0.510736669064009, "grad_norm": 0.05854470282793045, "learning_rate": 0.01, "loss": 2.2119, "step": 4971 }, { "epoch": 0.511044898797904, "grad_norm": 0.05071520060300827, "learning_rate": 0.01, "loss": 2.1886, "step": 4974 }, { "epoch": 0.511353128531799, "grad_norm": 0.060792725533246994, "learning_rate": 0.01, "loss": 2.2066, "step": 4977 }, { "epoch": 0.511661358265694, "grad_norm": 0.0910191684961319, "learning_rate": 0.01, "loss": 2.209, "step": 4980 }, { "epoch": 0.511969587999589, "grad_norm": 0.12366749346256256, "learning_rate": 0.01, "loss": 2.2263, "step": 4983 }, { "epoch": 0.512277817733484, "grad_norm": 0.11254429817199707, "learning_rate": 0.01, "loss": 2.175, "step": 4986 }, { "epoch": 0.512586047467379, "grad_norm": 0.11091643571853638, "learning_rate": 0.01, "loss": 2.2046, "step": 4989 }, { "epoch": 0.512894277201274, "grad_norm": 0.045611754059791565, "learning_rate": 0.01, "loss": 2.1861, "step": 4992 }, { "epoch": 0.513202506935169, "grad_norm": 0.09836157411336899, "learning_rate": 0.01, "loss": 2.1931, "step": 4995 }, { "epoch": 0.513510736669064, "grad_norm": 0.11932815611362457, "learning_rate": 0.01, "loss": 2.2087, "step": 4998 }, { "epoch": 0.513818966402959, "grad_norm": 0.10955359041690826, "learning_rate": 0.01, "loss": 2.19, "step": 5001 }, { "epoch": 0.514127196136854, "grad_norm": 0.09804633259773254, "learning_rate": 0.01, "loss": 2.1572, "step": 5004 }, { "epoch": 0.514435425870749, "grad_norm": 0.04835839942097664, "learning_rate": 0.01, "loss": 2.2115, "step": 5007 }, { "epoch": 0.514743655604644, "grad_norm": 0.04645110294222832, "learning_rate": 0.01, "loss": 2.2012, "step": 5010 }, { "epoch": 0.515051885338539, "grad_norm": 0.05947386845946312, "learning_rate": 0.01, "loss": 2.2039, "step": 5013 }, { "epoch": 0.515360115072434, "grad_norm": 0.05693971738219261, "learning_rate": 0.01, "loss": 2.1733, "step": 5016 }, { "epoch": 0.515668344806329, "grad_norm": 0.0724320039153099, "learning_rate": 0.01, "loss": 2.1944, "step": 5019 }, { "epoch": 0.5159765745402239, "grad_norm": 0.06627337634563446, "learning_rate": 0.01, "loss": 2.1932, "step": 5022 }, { "epoch": 0.5162848042741189, "grad_norm": 0.10879958420991898, "learning_rate": 0.01, "loss": 2.2024, "step": 5025 }, { "epoch": 0.5165930340080139, "grad_norm": 0.12266898900270462, "learning_rate": 0.01, "loss": 2.1938, "step": 5028 }, { "epoch": 0.5169012637419089, "grad_norm": 0.06240540370345116, "learning_rate": 0.01, "loss": 2.18, "step": 5031 }, { "epoch": 0.5172094934758039, "grad_norm": 0.05043266713619232, "learning_rate": 0.01, "loss": 2.1936, "step": 5034 }, { "epoch": 0.5175177232096989, "grad_norm": 0.052652738988399506, "learning_rate": 0.01, "loss": 2.1631, "step": 5037 }, { "epoch": 0.5178259529435939, "grad_norm": 0.04598904401063919, "learning_rate": 0.01, "loss": 2.2067, "step": 5040 }, { "epoch": 0.518134182677489, "grad_norm": 0.07040087133646011, "learning_rate": 0.01, "loss": 2.1737, "step": 5043 }, { "epoch": 0.518442412411384, "grad_norm": 0.04827702417969704, "learning_rate": 0.01, "loss": 2.2128, "step": 5046 }, { "epoch": 0.518750642145279, "grad_norm": 0.09803622215986252, "learning_rate": 0.01, "loss": 2.1851, "step": 5049 }, { "epoch": 0.519058871879174, "grad_norm": 0.1019926443696022, "learning_rate": 0.01, "loss": 2.181, "step": 5052 }, { "epoch": 0.519367101613069, "grad_norm": 0.08847504109144211, "learning_rate": 0.01, "loss": 2.1874, "step": 5055 }, { "epoch": 0.519675331346964, "grad_norm": 0.06151921674609184, "learning_rate": 0.01, "loss": 2.1785, "step": 5058 }, { "epoch": 0.519983561080859, "grad_norm": 0.04823022335767746, "learning_rate": 0.01, "loss": 2.1662, "step": 5061 }, { "epoch": 0.520291790814754, "grad_norm": 0.12454935908317566, "learning_rate": 0.01, "loss": 2.1864, "step": 5064 }, { "epoch": 0.520600020548649, "grad_norm": 0.0716002956032753, "learning_rate": 0.01, "loss": 2.1866, "step": 5067 }, { "epoch": 0.520908250282544, "grad_norm": 0.055079616606235504, "learning_rate": 0.01, "loss": 2.2137, "step": 5070 }, { "epoch": 0.5212164800164389, "grad_norm": 0.05969909206032753, "learning_rate": 0.01, "loss": 2.1972, "step": 5073 }, { "epoch": 0.5215247097503339, "grad_norm": 0.07373122125864029, "learning_rate": 0.01, "loss": 2.2312, "step": 5076 }, { "epoch": 0.5218329394842289, "grad_norm": 0.1899929642677307, "learning_rate": 0.01, "loss": 2.2141, "step": 5079 }, { "epoch": 0.5221411692181239, "grad_norm": 0.05221979692578316, "learning_rate": 0.01, "loss": 2.1899, "step": 5082 }, { "epoch": 0.5224493989520189, "grad_norm": 0.04537337273359299, "learning_rate": 0.01, "loss": 2.1571, "step": 5085 }, { "epoch": 0.5227576286859139, "grad_norm": 0.05490431934595108, "learning_rate": 0.01, "loss": 2.1705, "step": 5088 }, { "epoch": 0.5230658584198089, "grad_norm": 0.03813198208808899, "learning_rate": 0.01, "loss": 2.1773, "step": 5091 }, { "epoch": 0.5233740881537039, "grad_norm": 0.045411352068185806, "learning_rate": 0.01, "loss": 2.2065, "step": 5094 }, { "epoch": 0.5236823178875989, "grad_norm": 0.05433456227183342, "learning_rate": 0.01, "loss": 2.1901, "step": 5097 }, { "epoch": 0.5239905476214939, "grad_norm": 0.10771681368350983, "learning_rate": 0.01, "loss": 2.171, "step": 5100 }, { "epoch": 0.5242987773553889, "grad_norm": 0.06446761637926102, "learning_rate": 0.01, "loss": 2.2033, "step": 5103 }, { "epoch": 0.5246070070892839, "grad_norm": 0.06428392231464386, "learning_rate": 0.01, "loss": 2.1877, "step": 5106 }, { "epoch": 0.5249152368231789, "grad_norm": 0.0525304451584816, "learning_rate": 0.01, "loss": 2.206, "step": 5109 }, { "epoch": 0.5252234665570739, "grad_norm": 0.07332491129636765, "learning_rate": 0.01, "loss": 2.1992, "step": 5112 }, { "epoch": 0.5255316962909689, "grad_norm": 0.1738174557685852, "learning_rate": 0.01, "loss": 2.176, "step": 5115 }, { "epoch": 0.5258399260248638, "grad_norm": 0.08102334290742874, "learning_rate": 0.01, "loss": 2.2067, "step": 5118 }, { "epoch": 0.5261481557587588, "grad_norm": 0.06945500522851944, "learning_rate": 0.01, "loss": 2.206, "step": 5121 }, { "epoch": 0.5264563854926538, "grad_norm": 0.07017000019550323, "learning_rate": 0.01, "loss": 2.2002, "step": 5124 }, { "epoch": 0.5267646152265488, "grad_norm": 0.03883346915245056, "learning_rate": 0.01, "loss": 2.1608, "step": 5127 }, { "epoch": 0.5270728449604438, "grad_norm": 0.050974566489458084, "learning_rate": 0.01, "loss": 2.1973, "step": 5130 }, { "epoch": 0.5273810746943388, "grad_norm": 0.0665312334895134, "learning_rate": 0.01, "loss": 2.2064, "step": 5133 }, { "epoch": 0.5276893044282338, "grad_norm": 0.03946761414408684, "learning_rate": 0.01, "loss": 2.1794, "step": 5136 }, { "epoch": 0.5279975341621288, "grad_norm": 0.046160902827978134, "learning_rate": 0.01, "loss": 2.1919, "step": 5139 }, { "epoch": 0.5283057638960238, "grad_norm": 0.046186063438653946, "learning_rate": 0.01, "loss": 2.1687, "step": 5142 }, { "epoch": 0.5286139936299188, "grad_norm": 0.07073774188756943, "learning_rate": 0.01, "loss": 2.2154, "step": 5145 }, { "epoch": 0.5289222233638138, "grad_norm": 0.08319617807865143, "learning_rate": 0.01, "loss": 2.2104, "step": 5148 }, { "epoch": 0.5292304530977088, "grad_norm": 0.08551453799009323, "learning_rate": 0.01, "loss": 2.1743, "step": 5151 }, { "epoch": 0.5295386828316038, "grad_norm": 0.06613323837518692, "learning_rate": 0.01, "loss": 2.1774, "step": 5154 }, { "epoch": 0.5298469125654989, "grad_norm": 0.0601351298391819, "learning_rate": 0.01, "loss": 2.1831, "step": 5157 }, { "epoch": 0.5301551422993939, "grad_norm": 0.04963411018252373, "learning_rate": 0.01, "loss": 2.1915, "step": 5160 }, { "epoch": 0.5304633720332889, "grad_norm": 0.06755329668521881, "learning_rate": 0.01, "loss": 2.163, "step": 5163 }, { "epoch": 0.5307716017671839, "grad_norm": 0.04755258187651634, "learning_rate": 0.01, "loss": 2.1518, "step": 5166 }, { "epoch": 0.5310798315010788, "grad_norm": 0.15916316211223602, "learning_rate": 0.01, "loss": 2.1835, "step": 5169 }, { "epoch": 0.5313880612349738, "grad_norm": 0.0807122215628624, "learning_rate": 0.01, "loss": 2.2193, "step": 5172 }, { "epoch": 0.5316962909688688, "grad_norm": 0.05207689106464386, "learning_rate": 0.01, "loss": 2.1754, "step": 5175 }, { "epoch": 0.5320045207027638, "grad_norm": 0.045082803815603256, "learning_rate": 0.01, "loss": 2.1525, "step": 5178 }, { "epoch": 0.5323127504366588, "grad_norm": 0.07747700810432434, "learning_rate": 0.01, "loss": 2.1438, "step": 5181 }, { "epoch": 0.5326209801705538, "grad_norm": 0.13366450369358063, "learning_rate": 0.01, "loss": 2.1904, "step": 5184 }, { "epoch": 0.5329292099044488, "grad_norm": 0.06902889162302017, "learning_rate": 0.01, "loss": 2.1786, "step": 5187 }, { "epoch": 0.5332374396383438, "grad_norm": 0.04604712501168251, "learning_rate": 0.01, "loss": 2.1848, "step": 5190 }, { "epoch": 0.5335456693722388, "grad_norm": 0.08803047984838486, "learning_rate": 0.01, "loss": 2.1798, "step": 5193 }, { "epoch": 0.5338538991061338, "grad_norm": 0.08366485685110092, "learning_rate": 0.01, "loss": 2.2008, "step": 5196 }, { "epoch": 0.5341621288400288, "grad_norm": 0.06176333501935005, "learning_rate": 0.01, "loss": 2.1722, "step": 5199 }, { "epoch": 0.5344703585739238, "grad_norm": 0.0837249681353569, "learning_rate": 0.01, "loss": 2.1783, "step": 5202 }, { "epoch": 0.5347785883078188, "grad_norm": 0.1322035938501358, "learning_rate": 0.01, "loss": 2.1948, "step": 5205 }, { "epoch": 0.5350868180417138, "grad_norm": 0.11094444990158081, "learning_rate": 0.01, "loss": 2.1937, "step": 5208 }, { "epoch": 0.5353950477756088, "grad_norm": 0.05182232707738876, "learning_rate": 0.01, "loss": 2.1945, "step": 5211 }, { "epoch": 0.5357032775095038, "grad_norm": 0.08261944353580475, "learning_rate": 0.01, "loss": 2.1855, "step": 5214 }, { "epoch": 0.5360115072433987, "grad_norm": 0.11097295582294464, "learning_rate": 0.01, "loss": 2.1902, "step": 5217 }, { "epoch": 0.5363197369772937, "grad_norm": 0.05825675278902054, "learning_rate": 0.01, "loss": 2.1984, "step": 5220 }, { "epoch": 0.5366279667111887, "grad_norm": 0.11612821370363235, "learning_rate": 0.01, "loss": 2.1679, "step": 5223 }, { "epoch": 0.5369361964450837, "grad_norm": 0.09120064228773117, "learning_rate": 0.01, "loss": 2.1586, "step": 5226 }, { "epoch": 0.5372444261789787, "grad_norm": 0.055181995034217834, "learning_rate": 0.01, "loss": 2.2169, "step": 5229 }, { "epoch": 0.5375526559128737, "grad_norm": 0.055780068039894104, "learning_rate": 0.01, "loss": 2.1748, "step": 5232 }, { "epoch": 0.5378608856467687, "grad_norm": 0.06303024291992188, "learning_rate": 0.01, "loss": 2.1782, "step": 5235 }, { "epoch": 0.5381691153806637, "grad_norm": 0.10456321388483047, "learning_rate": 0.01, "loss": 2.1975, "step": 5238 }, { "epoch": 0.5384773451145587, "grad_norm": 0.054177962243556976, "learning_rate": 0.01, "loss": 2.1641, "step": 5241 }, { "epoch": 0.5387855748484537, "grad_norm": 0.06265738606452942, "learning_rate": 0.01, "loss": 2.183, "step": 5244 }, { "epoch": 0.5390938045823487, "grad_norm": 0.13720418512821198, "learning_rate": 0.01, "loss": 2.1698, "step": 5247 }, { "epoch": 0.5394020343162437, "grad_norm": 0.04917861148715019, "learning_rate": 0.01, "loss": 2.1692, "step": 5250 }, { "epoch": 0.5397102640501387, "grad_norm": 0.04919945448637009, "learning_rate": 0.01, "loss": 2.1652, "step": 5253 }, { "epoch": 0.5400184937840337, "grad_norm": 0.06462734192609787, "learning_rate": 0.01, "loss": 2.1987, "step": 5256 }, { "epoch": 0.5403267235179287, "grad_norm": 0.05275480076670647, "learning_rate": 0.01, "loss": 2.1955, "step": 5259 }, { "epoch": 0.5406349532518236, "grad_norm": 0.12235717475414276, "learning_rate": 0.01, "loss": 2.1937, "step": 5262 }, { "epoch": 0.5409431829857186, "grad_norm": 0.05300014466047287, "learning_rate": 0.01, "loss": 2.1589, "step": 5265 }, { "epoch": 0.5412514127196136, "grad_norm": 0.0429493710398674, "learning_rate": 0.01, "loss": 2.1618, "step": 5268 }, { "epoch": 0.5415596424535086, "grad_norm": 0.07041259855031967, "learning_rate": 0.01, "loss": 2.1661, "step": 5271 }, { "epoch": 0.5418678721874037, "grad_norm": 0.05304478853940964, "learning_rate": 0.01, "loss": 2.183, "step": 5274 }, { "epoch": 0.5421761019212987, "grad_norm": 0.12009457498788834, "learning_rate": 0.01, "loss": 2.1862, "step": 5277 }, { "epoch": 0.5424843316551937, "grad_norm": 0.11629784107208252, "learning_rate": 0.01, "loss": 2.1897, "step": 5280 }, { "epoch": 0.5427925613890887, "grad_norm": 0.07305426150560379, "learning_rate": 0.01, "loss": 2.1832, "step": 5283 }, { "epoch": 0.5431007911229837, "grad_norm": 0.0855623185634613, "learning_rate": 0.01, "loss": 2.1884, "step": 5286 }, { "epoch": 0.5434090208568787, "grad_norm": 0.04178578779101372, "learning_rate": 0.01, "loss": 2.1872, "step": 5289 }, { "epoch": 0.5437172505907737, "grad_norm": 0.05382310971617699, "learning_rate": 0.01, "loss": 2.1901, "step": 5292 }, { "epoch": 0.5440254803246687, "grad_norm": 0.10682760924100876, "learning_rate": 0.01, "loss": 2.1957, "step": 5295 }, { "epoch": 0.5443337100585637, "grad_norm": 0.15037471055984497, "learning_rate": 0.01, "loss": 2.2085, "step": 5298 }, { "epoch": 0.5446419397924587, "grad_norm": 0.08333491533994675, "learning_rate": 0.01, "loss": 2.1964, "step": 5301 }, { "epoch": 0.5449501695263537, "grad_norm": 0.08964785933494568, "learning_rate": 0.01, "loss": 2.1613, "step": 5304 }, { "epoch": 0.5452583992602487, "grad_norm": 0.06194687634706497, "learning_rate": 0.01, "loss": 2.1711, "step": 5307 }, { "epoch": 0.5455666289941437, "grad_norm": 0.047254305332899094, "learning_rate": 0.01, "loss": 2.1956, "step": 5310 }, { "epoch": 0.5458748587280386, "grad_norm": 0.052926719188690186, "learning_rate": 0.01, "loss": 2.1767, "step": 5313 }, { "epoch": 0.5461830884619336, "grad_norm": 0.08765383809804916, "learning_rate": 0.01, "loss": 2.1782, "step": 5316 }, { "epoch": 0.5464913181958286, "grad_norm": 0.0749160572886467, "learning_rate": 0.01, "loss": 2.1875, "step": 5319 }, { "epoch": 0.5467995479297236, "grad_norm": 0.09781020879745483, "learning_rate": 0.01, "loss": 2.1748, "step": 5322 }, { "epoch": 0.5471077776636186, "grad_norm": 0.04605260491371155, "learning_rate": 0.01, "loss": 2.145, "step": 5325 }, { "epoch": 0.5474160073975136, "grad_norm": 0.13507331907749176, "learning_rate": 0.01, "loss": 2.1769, "step": 5328 }, { "epoch": 0.5477242371314086, "grad_norm": 0.05028709024190903, "learning_rate": 0.01, "loss": 2.1925, "step": 5331 }, { "epoch": 0.5480324668653036, "grad_norm": 0.08754327893257141, "learning_rate": 0.01, "loss": 2.159, "step": 5334 }, { "epoch": 0.5483406965991986, "grad_norm": 0.10449190437793732, "learning_rate": 0.01, "loss": 2.1898, "step": 5337 }, { "epoch": 0.5486489263330936, "grad_norm": 0.10263057053089142, "learning_rate": 0.01, "loss": 2.1776, "step": 5340 }, { "epoch": 0.5489571560669886, "grad_norm": 0.0547097772359848, "learning_rate": 0.01, "loss": 2.1941, "step": 5343 }, { "epoch": 0.5492653858008836, "grad_norm": 0.06682941317558289, "learning_rate": 0.01, "loss": 2.1901, "step": 5346 }, { "epoch": 0.5495736155347786, "grad_norm": 0.06421027332544327, "learning_rate": 0.01, "loss": 2.1981, "step": 5349 }, { "epoch": 0.5498818452686736, "grad_norm": 0.041218411177396774, "learning_rate": 0.01, "loss": 2.1844, "step": 5352 }, { "epoch": 0.5501900750025686, "grad_norm": 0.042902372777462006, "learning_rate": 0.01, "loss": 2.1981, "step": 5355 }, { "epoch": 0.5504983047364636, "grad_norm": 0.05338321253657341, "learning_rate": 0.01, "loss": 2.168, "step": 5358 }, { "epoch": 0.5508065344703585, "grad_norm": 0.06692427396774292, "learning_rate": 0.01, "loss": 2.1891, "step": 5361 }, { "epoch": 0.5511147642042535, "grad_norm": 0.07927200943231583, "learning_rate": 0.01, "loss": 2.1853, "step": 5364 }, { "epoch": 0.5514229939381485, "grad_norm": 0.05655739828944206, "learning_rate": 0.01, "loss": 2.1838, "step": 5367 }, { "epoch": 0.5517312236720435, "grad_norm": 0.04488144442439079, "learning_rate": 0.01, "loss": 2.1754, "step": 5370 }, { "epoch": 0.5520394534059385, "grad_norm": 0.09253795444965363, "learning_rate": 0.01, "loss": 2.1742, "step": 5373 }, { "epoch": 0.5523476831398335, "grad_norm": 0.07396019250154495, "learning_rate": 0.01, "loss": 2.1582, "step": 5376 }, { "epoch": 0.5526559128737285, "grad_norm": 0.053663600236177444, "learning_rate": 0.01, "loss": 2.1508, "step": 5379 }, { "epoch": 0.5529641426076235, "grad_norm": 0.062076181173324585, "learning_rate": 0.01, "loss": 2.1772, "step": 5382 }, { "epoch": 0.5532723723415185, "grad_norm": 0.08481581509113312, "learning_rate": 0.01, "loss": 2.1836, "step": 5385 }, { "epoch": 0.5535806020754136, "grad_norm": 0.08981155604124069, "learning_rate": 0.01, "loss": 2.204, "step": 5388 }, { "epoch": 0.5538888318093086, "grad_norm": 0.10067261755466461, "learning_rate": 0.01, "loss": 2.1527, "step": 5391 }, { "epoch": 0.5541970615432036, "grad_norm": 0.06231047958135605, "learning_rate": 0.01, "loss": 2.194, "step": 5394 }, { "epoch": 0.5545052912770986, "grad_norm": 0.049111973494291306, "learning_rate": 0.01, "loss": 2.1889, "step": 5397 }, { "epoch": 0.5548135210109936, "grad_norm": 0.06446948647499084, "learning_rate": 0.01, "loss": 2.2103, "step": 5400 }, { "epoch": 0.5551217507448886, "grad_norm": 0.051946625113487244, "learning_rate": 0.01, "loss": 2.1977, "step": 5403 }, { "epoch": 0.5554299804787836, "grad_norm": 0.1369265466928482, "learning_rate": 0.01, "loss": 2.1771, "step": 5406 }, { "epoch": 0.5557382102126786, "grad_norm": 0.08489779382944107, "learning_rate": 0.01, "loss": 2.1782, "step": 5409 }, { "epoch": 0.5560464399465735, "grad_norm": 0.10673670470714569, "learning_rate": 0.01, "loss": 2.173, "step": 5412 }, { "epoch": 0.5563546696804685, "grad_norm": 0.055250637233257294, "learning_rate": 0.01, "loss": 2.1539, "step": 5415 }, { "epoch": 0.5566628994143635, "grad_norm": 0.05136672407388687, "learning_rate": 0.01, "loss": 2.2035, "step": 5418 }, { "epoch": 0.5569711291482585, "grad_norm": 0.040590591728687286, "learning_rate": 0.01, "loss": 2.1778, "step": 5421 }, { "epoch": 0.5572793588821535, "grad_norm": 0.048333633691072464, "learning_rate": 0.01, "loss": 2.191, "step": 5424 }, { "epoch": 0.5575875886160485, "grad_norm": 0.0582728311419487, "learning_rate": 0.01, "loss": 2.1734, "step": 5427 }, { "epoch": 0.5578958183499435, "grad_norm": 0.05272262915968895, "learning_rate": 0.01, "loss": 2.1714, "step": 5430 }, { "epoch": 0.5582040480838385, "grad_norm": 0.08472342789173126, "learning_rate": 0.01, "loss": 2.1624, "step": 5433 }, { "epoch": 0.5585122778177335, "grad_norm": 0.10869960486888885, "learning_rate": 0.01, "loss": 2.164, "step": 5436 }, { "epoch": 0.5588205075516285, "grad_norm": 0.0569114163517952, "learning_rate": 0.01, "loss": 2.1933, "step": 5439 }, { "epoch": 0.5591287372855235, "grad_norm": 0.14485467970371246, "learning_rate": 0.01, "loss": 2.1779, "step": 5442 }, { "epoch": 0.5594369670194185, "grad_norm": 0.08184878528118134, "learning_rate": 0.01, "loss": 2.1779, "step": 5445 }, { "epoch": 0.5597451967533135, "grad_norm": 0.06575775146484375, "learning_rate": 0.01, "loss": 2.136, "step": 5448 }, { "epoch": 0.5600534264872085, "grad_norm": 0.08628299832344055, "learning_rate": 0.01, "loss": 2.1696, "step": 5451 }, { "epoch": 0.5603616562211035, "grad_norm": 0.06078352406620979, "learning_rate": 0.01, "loss": 2.1865, "step": 5454 }, { "epoch": 0.5606698859549984, "grad_norm": 0.05207353085279465, "learning_rate": 0.01, "loss": 2.167, "step": 5457 }, { "epoch": 0.5609781156888934, "grad_norm": 0.059535857290029526, "learning_rate": 0.01, "loss": 2.1977, "step": 5460 }, { "epoch": 0.5612863454227884, "grad_norm": 0.05342729389667511, "learning_rate": 0.01, "loss": 2.1823, "step": 5463 }, { "epoch": 0.5615945751566834, "grad_norm": 0.04207632318139076, "learning_rate": 0.01, "loss": 2.1849, "step": 5466 }, { "epoch": 0.5619028048905784, "grad_norm": 0.1334255486726761, "learning_rate": 0.01, "loss": 2.1886, "step": 5469 }, { "epoch": 0.5622110346244734, "grad_norm": 0.06532323360443115, "learning_rate": 0.01, "loss": 2.1979, "step": 5472 }, { "epoch": 0.5625192643583684, "grad_norm": 0.0793483555316925, "learning_rate": 0.01, "loss": 2.188, "step": 5475 }, { "epoch": 0.5628274940922634, "grad_norm": 0.04637480154633522, "learning_rate": 0.01, "loss": 2.1562, "step": 5478 }, { "epoch": 0.5631357238261584, "grad_norm": 0.0482000894844532, "learning_rate": 0.01, "loss": 2.1587, "step": 5481 }, { "epoch": 0.5634439535600534, "grad_norm": 0.06253401190042496, "learning_rate": 0.01, "loss": 2.1978, "step": 5484 }, { "epoch": 0.5637521832939484, "grad_norm": 0.15622715651988983, "learning_rate": 0.01, "loss": 2.171, "step": 5487 }, { "epoch": 0.5640604130278434, "grad_norm": 0.10941077768802643, "learning_rate": 0.01, "loss": 2.1952, "step": 5490 }, { "epoch": 0.5643686427617384, "grad_norm": 0.08030713349580765, "learning_rate": 0.01, "loss": 2.1948, "step": 5493 }, { "epoch": 0.5646768724956334, "grad_norm": 0.13679014146327972, "learning_rate": 0.01, "loss": 2.1767, "step": 5496 }, { "epoch": 0.5649851022295284, "grad_norm": 0.04662426933646202, "learning_rate": 0.01, "loss": 2.1926, "step": 5499 }, { "epoch": 0.5652933319634234, "grad_norm": 0.05347858741879463, "learning_rate": 0.01, "loss": 2.1825, "step": 5502 }, { "epoch": 0.5656015616973185, "grad_norm": 0.06205238029360771, "learning_rate": 0.01, "loss": 2.1537, "step": 5505 }, { "epoch": 0.5659097914312134, "grad_norm": 0.05525955557823181, "learning_rate": 0.01, "loss": 2.1802, "step": 5508 }, { "epoch": 0.5662180211651084, "grad_norm": 0.055693045258522034, "learning_rate": 0.01, "loss": 2.1727, "step": 5511 }, { "epoch": 0.5665262508990034, "grad_norm": 0.051134396344423294, "learning_rate": 0.01, "loss": 2.1695, "step": 5514 }, { "epoch": 0.5668344806328984, "grad_norm": 0.05469521880149841, "learning_rate": 0.01, "loss": 2.1664, "step": 5517 }, { "epoch": 0.5671427103667934, "grad_norm": 0.039416272193193436, "learning_rate": 0.01, "loss": 2.1708, "step": 5520 }, { "epoch": 0.5674509401006884, "grad_norm": 0.10661659389734268, "learning_rate": 0.01, "loss": 2.1753, "step": 5523 }, { "epoch": 0.5677591698345834, "grad_norm": 0.07567829638719559, "learning_rate": 0.01, "loss": 2.1733, "step": 5526 }, { "epoch": 0.5680673995684784, "grad_norm": 0.06030309572815895, "learning_rate": 0.01, "loss": 2.1795, "step": 5529 }, { "epoch": 0.5683756293023734, "grad_norm": 0.07429811358451843, "learning_rate": 0.01, "loss": 2.1936, "step": 5532 }, { "epoch": 0.5686838590362684, "grad_norm": 0.08618849515914917, "learning_rate": 0.01, "loss": 2.2009, "step": 5535 }, { "epoch": 0.5689920887701634, "grad_norm": 0.04969833791255951, "learning_rate": 0.01, "loss": 2.1711, "step": 5538 }, { "epoch": 0.5693003185040584, "grad_norm": 0.11154712736606598, "learning_rate": 0.01, "loss": 2.1802, "step": 5541 }, { "epoch": 0.5696085482379534, "grad_norm": 0.07754155993461609, "learning_rate": 0.01, "loss": 2.164, "step": 5544 }, { "epoch": 0.5699167779718484, "grad_norm": 0.04600273445248604, "learning_rate": 0.01, "loss": 2.1918, "step": 5547 }, { "epoch": 0.5702250077057434, "grad_norm": 0.06788063049316406, "learning_rate": 0.01, "loss": 2.1477, "step": 5550 }, { "epoch": 0.5705332374396384, "grad_norm": 0.11349419504404068, "learning_rate": 0.01, "loss": 2.1603, "step": 5553 }, { "epoch": 0.5708414671735333, "grad_norm": 0.11178430914878845, "learning_rate": 0.01, "loss": 2.1439, "step": 5556 }, { "epoch": 0.5711496969074283, "grad_norm": 0.050257500261068344, "learning_rate": 0.01, "loss": 2.1851, "step": 5559 }, { "epoch": 0.5714579266413233, "grad_norm": 0.08327756822109222, "learning_rate": 0.01, "loss": 2.158, "step": 5562 }, { "epoch": 0.5717661563752183, "grad_norm": 0.06866388767957687, "learning_rate": 0.01, "loss": 2.1584, "step": 5565 }, { "epoch": 0.5720743861091133, "grad_norm": 0.1139674037694931, "learning_rate": 0.01, "loss": 2.1897, "step": 5568 }, { "epoch": 0.5723826158430083, "grad_norm": 0.07029612362384796, "learning_rate": 0.01, "loss": 2.1723, "step": 5571 }, { "epoch": 0.5726908455769033, "grad_norm": 0.10171212255954742, "learning_rate": 0.01, "loss": 2.1788, "step": 5574 }, { "epoch": 0.5729990753107983, "grad_norm": 0.11364202946424484, "learning_rate": 0.01, "loss": 2.1659, "step": 5577 }, { "epoch": 0.5733073050446933, "grad_norm": 0.08066857606172562, "learning_rate": 0.01, "loss": 2.1902, "step": 5580 }, { "epoch": 0.5736155347785883, "grad_norm": 0.09207342565059662, "learning_rate": 0.01, "loss": 2.1519, "step": 5583 }, { "epoch": 0.5739237645124833, "grad_norm": 0.06945987790822983, "learning_rate": 0.01, "loss": 2.1677, "step": 5586 }, { "epoch": 0.5742319942463783, "grad_norm": 0.05137445777654648, "learning_rate": 0.01, "loss": 2.1686, "step": 5589 }, { "epoch": 0.5745402239802733, "grad_norm": 0.10192268341779709, "learning_rate": 0.01, "loss": 2.1758, "step": 5592 }, { "epoch": 0.5748484537141683, "grad_norm": 0.056787896901369095, "learning_rate": 0.01, "loss": 2.1642, "step": 5595 }, { "epoch": 0.5751566834480633, "grad_norm": 0.07727455347776413, "learning_rate": 0.01, "loss": 2.1662, "step": 5598 }, { "epoch": 0.5754649131819582, "grad_norm": 0.1311456710100174, "learning_rate": 0.01, "loss": 2.1713, "step": 5601 }, { "epoch": 0.5757731429158532, "grad_norm": 0.1014258936047554, "learning_rate": 0.01, "loss": 2.1751, "step": 5604 }, { "epoch": 0.5760813726497482, "grad_norm": 0.06325560063123703, "learning_rate": 0.01, "loss": 2.1757, "step": 5607 }, { "epoch": 0.5763896023836432, "grad_norm": 0.07262448221445084, "learning_rate": 0.01, "loss": 2.1575, "step": 5610 }, { "epoch": 0.5766978321175382, "grad_norm": 0.07160039991140366, "learning_rate": 0.01, "loss": 2.1706, "step": 5613 }, { "epoch": 0.5770060618514332, "grad_norm": 0.050024017691612244, "learning_rate": 0.01, "loss": 2.1811, "step": 5616 }, { "epoch": 0.5773142915853282, "grad_norm": 0.09685138612985611, "learning_rate": 0.01, "loss": 2.1549, "step": 5619 }, { "epoch": 0.5776225213192233, "grad_norm": 0.058329988270998, "learning_rate": 0.01, "loss": 2.1813, "step": 5622 }, { "epoch": 0.5779307510531183, "grad_norm": 0.06637705117464066, "learning_rate": 0.01, "loss": 2.1717, "step": 5625 }, { "epoch": 0.5782389807870133, "grad_norm": 0.0906175896525383, "learning_rate": 0.01, "loss": 2.1677, "step": 5628 }, { "epoch": 0.5785472105209083, "grad_norm": 0.06751519441604614, "learning_rate": 0.01, "loss": 2.1584, "step": 5631 }, { "epoch": 0.5788554402548033, "grad_norm": 0.04437318444252014, "learning_rate": 0.01, "loss": 2.2013, "step": 5634 }, { "epoch": 0.5791636699886983, "grad_norm": 0.04365368187427521, "learning_rate": 0.01, "loss": 2.1746, "step": 5637 }, { "epoch": 0.5794718997225933, "grad_norm": 0.04844829812645912, "learning_rate": 0.01, "loss": 2.1818, "step": 5640 }, { "epoch": 0.5797801294564883, "grad_norm": 0.04154437035322189, "learning_rate": 0.01, "loss": 2.1536, "step": 5643 }, { "epoch": 0.5800883591903833, "grad_norm": 0.07691098004579544, "learning_rate": 0.01, "loss": 2.1883, "step": 5646 }, { "epoch": 0.5803965889242783, "grad_norm": 0.07065980136394501, "learning_rate": 0.01, "loss": 2.154, "step": 5649 }, { "epoch": 0.5807048186581732, "grad_norm": 0.1051129475235939, "learning_rate": 0.01, "loss": 2.1447, "step": 5652 }, { "epoch": 0.5810130483920682, "grad_norm": 0.10921964794397354, "learning_rate": 0.01, "loss": 2.1777, "step": 5655 }, { "epoch": 0.5813212781259632, "grad_norm": 0.1120898649096489, "learning_rate": 0.01, "loss": 2.1617, "step": 5658 }, { "epoch": 0.5816295078598582, "grad_norm": 0.09593590348958969, "learning_rate": 0.01, "loss": 2.1457, "step": 5661 }, { "epoch": 0.5819377375937532, "grad_norm": 0.054108936339616776, "learning_rate": 0.01, "loss": 2.1793, "step": 5664 }, { "epoch": 0.5822459673276482, "grad_norm": 0.07890141755342484, "learning_rate": 0.01, "loss": 2.1676, "step": 5667 }, { "epoch": 0.5825541970615432, "grad_norm": 0.07864063233137131, "learning_rate": 0.01, "loss": 2.1766, "step": 5670 }, { "epoch": 0.5828624267954382, "grad_norm": 0.08160068094730377, "learning_rate": 0.01, "loss": 2.166, "step": 5673 }, { "epoch": 0.5831706565293332, "grad_norm": 0.08126121759414673, "learning_rate": 0.01, "loss": 2.1691, "step": 5676 }, { "epoch": 0.5834788862632282, "grad_norm": 0.05922897160053253, "learning_rate": 0.01, "loss": 2.1854, "step": 5679 }, { "epoch": 0.5837871159971232, "grad_norm": 0.1024496778845787, "learning_rate": 0.01, "loss": 2.1818, "step": 5682 }, { "epoch": 0.5840953457310182, "grad_norm": 0.08880037069320679, "learning_rate": 0.01, "loss": 2.2054, "step": 5685 }, { "epoch": 0.5844035754649132, "grad_norm": 0.04404953494668007, "learning_rate": 0.01, "loss": 2.1524, "step": 5688 }, { "epoch": 0.5847118051988082, "grad_norm": 0.05817687511444092, "learning_rate": 0.01, "loss": 2.1813, "step": 5691 }, { "epoch": 0.5850200349327032, "grad_norm": 0.047581762075424194, "learning_rate": 0.01, "loss": 2.1545, "step": 5694 }, { "epoch": 0.5853282646665982, "grad_norm": 0.11034911125898361, "learning_rate": 0.01, "loss": 2.1803, "step": 5697 }, { "epoch": 0.5856364944004931, "grad_norm": 0.05118945613503456, "learning_rate": 0.01, "loss": 2.1314, "step": 5700 }, { "epoch": 0.5859447241343881, "grad_norm": 0.048316795378923416, "learning_rate": 0.01, "loss": 2.1711, "step": 5703 }, { "epoch": 0.5862529538682831, "grad_norm": 0.12578584253787994, "learning_rate": 0.01, "loss": 2.1636, "step": 5706 }, { "epoch": 0.5865611836021781, "grad_norm": 0.06594375520944595, "learning_rate": 0.01, "loss": 2.1977, "step": 5709 }, { "epoch": 0.5868694133360731, "grad_norm": 0.060622964054346085, "learning_rate": 0.01, "loss": 2.1408, "step": 5712 }, { "epoch": 0.5871776430699681, "grad_norm": 0.10055366903543472, "learning_rate": 0.01, "loss": 2.1999, "step": 5715 }, { "epoch": 0.5874858728038631, "grad_norm": 0.10235504060983658, "learning_rate": 0.01, "loss": 2.1337, "step": 5718 }, { "epoch": 0.5877941025377581, "grad_norm": 0.07707791030406952, "learning_rate": 0.01, "loss": 2.1387, "step": 5721 }, { "epoch": 0.5881023322716531, "grad_norm": 0.05508594587445259, "learning_rate": 0.01, "loss": 2.1494, "step": 5724 }, { "epoch": 0.5884105620055481, "grad_norm": 0.06580860912799835, "learning_rate": 0.01, "loss": 2.1598, "step": 5727 }, { "epoch": 0.5887187917394431, "grad_norm": 0.07102775573730469, "learning_rate": 0.01, "loss": 2.1618, "step": 5730 }, { "epoch": 0.5890270214733381, "grad_norm": 0.06750554591417313, "learning_rate": 0.01, "loss": 2.1782, "step": 5733 }, { "epoch": 0.5893352512072332, "grad_norm": 0.07100195437669754, "learning_rate": 0.01, "loss": 2.1456, "step": 5736 }, { "epoch": 0.5896434809411282, "grad_norm": 0.10585575550794601, "learning_rate": 0.01, "loss": 2.1751, "step": 5739 }, { "epoch": 0.5899517106750232, "grad_norm": 0.055082373321056366, "learning_rate": 0.01, "loss": 2.1808, "step": 5742 }, { "epoch": 0.5902599404089182, "grad_norm": 0.06285014003515244, "learning_rate": 0.01, "loss": 2.1588, "step": 5745 }, { "epoch": 0.5905681701428132, "grad_norm": 0.13328112661838531, "learning_rate": 0.01, "loss": 2.177, "step": 5748 }, { "epoch": 0.5908763998767081, "grad_norm": 0.08568006008863449, "learning_rate": 0.01, "loss": 2.1559, "step": 5751 }, { "epoch": 0.5911846296106031, "grad_norm": 0.07850711792707443, "learning_rate": 0.01, "loss": 2.2047, "step": 5754 }, { "epoch": 0.5914928593444981, "grad_norm": 0.07706760615110397, "learning_rate": 0.01, "loss": 2.1602, "step": 5757 }, { "epoch": 0.5918010890783931, "grad_norm": 0.07512292265892029, "learning_rate": 0.01, "loss": 2.1871, "step": 5760 }, { "epoch": 0.5921093188122881, "grad_norm": 0.059620197862386703, "learning_rate": 0.01, "loss": 2.1484, "step": 5763 }, { "epoch": 0.5924175485461831, "grad_norm": 0.04021789878606796, "learning_rate": 0.01, "loss": 2.1651, "step": 5766 }, { "epoch": 0.5927257782800781, "grad_norm": 0.050683967769145966, "learning_rate": 0.01, "loss": 2.1693, "step": 5769 }, { "epoch": 0.5930340080139731, "grad_norm": 0.07091210782527924, "learning_rate": 0.01, "loss": 2.1851, "step": 5772 }, { "epoch": 0.5933422377478681, "grad_norm": 0.09877889603376389, "learning_rate": 0.01, "loss": 2.1642, "step": 5775 }, { "epoch": 0.5936504674817631, "grad_norm": 0.08692251145839691, "learning_rate": 0.01, "loss": 2.1755, "step": 5778 }, { "epoch": 0.5939586972156581, "grad_norm": 0.06255677342414856, "learning_rate": 0.01, "loss": 2.1634, "step": 5781 }, { "epoch": 0.5942669269495531, "grad_norm": 0.05615478754043579, "learning_rate": 0.01, "loss": 2.1909, "step": 5784 }, { "epoch": 0.5945751566834481, "grad_norm": 0.04576956480741501, "learning_rate": 0.01, "loss": 2.1519, "step": 5787 }, { "epoch": 0.5948833864173431, "grad_norm": 0.044911667704582214, "learning_rate": 0.01, "loss": 2.1697, "step": 5790 }, { "epoch": 0.5951916161512381, "grad_norm": 0.07787128537893295, "learning_rate": 0.01, "loss": 2.1611, "step": 5793 }, { "epoch": 0.595499845885133, "grad_norm": 0.06199866533279419, "learning_rate": 0.01, "loss": 2.1576, "step": 5796 }, { "epoch": 0.595808075619028, "grad_norm": 0.07048948854207993, "learning_rate": 0.01, "loss": 2.1721, "step": 5799 }, { "epoch": 0.596116305352923, "grad_norm": 0.1173306256532669, "learning_rate": 0.01, "loss": 2.1573, "step": 5802 }, { "epoch": 0.596424535086818, "grad_norm": 0.06866045296192169, "learning_rate": 0.01, "loss": 2.1606, "step": 5805 }, { "epoch": 0.596732764820713, "grad_norm": 0.06821485608816147, "learning_rate": 0.01, "loss": 2.1842, "step": 5808 }, { "epoch": 0.597040994554608, "grad_norm": 0.09566816687583923, "learning_rate": 0.01, "loss": 2.1569, "step": 5811 }, { "epoch": 0.597349224288503, "grad_norm": 0.1130233108997345, "learning_rate": 0.01, "loss": 2.1649, "step": 5814 }, { "epoch": 0.597657454022398, "grad_norm": 0.07310149073600769, "learning_rate": 0.01, "loss": 2.1798, "step": 5817 }, { "epoch": 0.597965683756293, "grad_norm": 0.04523763060569763, "learning_rate": 0.01, "loss": 2.1515, "step": 5820 }, { "epoch": 0.598273913490188, "grad_norm": 0.05843660235404968, "learning_rate": 0.01, "loss": 2.1403, "step": 5823 }, { "epoch": 0.598582143224083, "grad_norm": 0.03981595113873482, "learning_rate": 0.01, "loss": 2.1598, "step": 5826 }, { "epoch": 0.598890372957978, "grad_norm": 0.057108644396066666, "learning_rate": 0.01, "loss": 2.1619, "step": 5829 }, { "epoch": 0.599198602691873, "grad_norm": 0.12298591434955597, "learning_rate": 0.01, "loss": 2.125, "step": 5832 }, { "epoch": 0.599506832425768, "grad_norm": 0.06120186299085617, "learning_rate": 0.01, "loss": 2.181, "step": 5835 }, { "epoch": 0.599815062159663, "grad_norm": 0.05780164897441864, "learning_rate": 0.01, "loss": 2.1555, "step": 5838 }, { "epoch": 0.600123291893558, "grad_norm": 0.0962534248828888, "learning_rate": 0.01, "loss": 2.1626, "step": 5841 }, { "epoch": 0.600431521627453, "grad_norm": 0.07417720556259155, "learning_rate": 0.01, "loss": 2.1996, "step": 5844 }, { "epoch": 0.6007397513613479, "grad_norm": 0.08221522718667984, "learning_rate": 0.01, "loss": 2.1562, "step": 5847 }, { "epoch": 0.6010479810952429, "grad_norm": 0.11511900275945663, "learning_rate": 0.01, "loss": 2.196, "step": 5850 }, { "epoch": 0.601356210829138, "grad_norm": 0.1331305354833603, "learning_rate": 0.01, "loss": 2.1649, "step": 5853 }, { "epoch": 0.601664440563033, "grad_norm": 0.07239941507577896, "learning_rate": 0.01, "loss": 2.1695, "step": 5856 }, { "epoch": 0.601972670296928, "grad_norm": 0.05865192040801048, "learning_rate": 0.01, "loss": 2.1808, "step": 5859 }, { "epoch": 0.602280900030823, "grad_norm": 0.047268107533454895, "learning_rate": 0.01, "loss": 2.176, "step": 5862 }, { "epoch": 0.602589129764718, "grad_norm": 0.046770863234996796, "learning_rate": 0.01, "loss": 2.1689, "step": 5865 }, { "epoch": 0.602897359498613, "grad_norm": 0.04817832633852959, "learning_rate": 0.01, "loss": 2.1566, "step": 5868 }, { "epoch": 0.603205589232508, "grad_norm": 0.05692889541387558, "learning_rate": 0.01, "loss": 2.1564, "step": 5871 }, { "epoch": 0.603513818966403, "grad_norm": 0.056694116443395615, "learning_rate": 0.01, "loss": 2.1591, "step": 5874 }, { "epoch": 0.603822048700298, "grad_norm": 0.08296339213848114, "learning_rate": 0.01, "loss": 2.1695, "step": 5877 }, { "epoch": 0.604130278434193, "grad_norm": 0.0934629738330841, "learning_rate": 0.01, "loss": 2.1472, "step": 5880 }, { "epoch": 0.604438508168088, "grad_norm": 0.10192359238862991, "learning_rate": 0.01, "loss": 2.1441, "step": 5883 }, { "epoch": 0.604746737901983, "grad_norm": 0.04818946123123169, "learning_rate": 0.01, "loss": 2.1747, "step": 5886 }, { "epoch": 0.605054967635878, "grad_norm": 0.10131523758172989, "learning_rate": 0.01, "loss": 2.1546, "step": 5889 }, { "epoch": 0.605363197369773, "grad_norm": 0.07115977257490158, "learning_rate": 0.01, "loss": 2.1597, "step": 5892 }, { "epoch": 0.605671427103668, "grad_norm": 0.03929082304239273, "learning_rate": 0.01, "loss": 2.171, "step": 5895 }, { "epoch": 0.6059796568375629, "grad_norm": 0.04109720513224602, "learning_rate": 0.01, "loss": 2.134, "step": 5898 }, { "epoch": 0.6062878865714579, "grad_norm": 0.05026080831885338, "learning_rate": 0.01, "loss": 2.1491, "step": 5901 }, { "epoch": 0.6065961163053529, "grad_norm": 0.08281126618385315, "learning_rate": 0.01, "loss": 2.1732, "step": 5904 }, { "epoch": 0.6069043460392479, "grad_norm": 0.04994012042880058, "learning_rate": 0.01, "loss": 2.1664, "step": 5907 }, { "epoch": 0.6072125757731429, "grad_norm": 0.06299131363630295, "learning_rate": 0.01, "loss": 2.1669, "step": 5910 }, { "epoch": 0.6075208055070379, "grad_norm": 0.059428080916404724, "learning_rate": 0.01, "loss": 2.1731, "step": 5913 }, { "epoch": 0.6078290352409329, "grad_norm": 0.07036252319812775, "learning_rate": 0.01, "loss": 2.1787, "step": 5916 }, { "epoch": 0.6081372649748279, "grad_norm": 0.04721888527274132, "learning_rate": 0.01, "loss": 2.1531, "step": 5919 }, { "epoch": 0.6084454947087229, "grad_norm": 0.06953759491443634, "learning_rate": 0.01, "loss": 2.1573, "step": 5922 }, { "epoch": 0.6087537244426179, "grad_norm": 0.11679168790578842, "learning_rate": 0.01, "loss": 2.155, "step": 5925 }, { "epoch": 0.6090619541765129, "grad_norm": 0.09196575731039047, "learning_rate": 0.01, "loss": 2.1574, "step": 5928 }, { "epoch": 0.6093701839104079, "grad_norm": 0.05219469591975212, "learning_rate": 0.01, "loss": 2.1605, "step": 5931 }, { "epoch": 0.6096784136443029, "grad_norm": 0.09352759271860123, "learning_rate": 0.01, "loss": 2.1456, "step": 5934 }, { "epoch": 0.6099866433781979, "grad_norm": 0.07393237948417664, "learning_rate": 0.01, "loss": 2.1611, "step": 5937 }, { "epoch": 0.6102948731120929, "grad_norm": 0.06727741658687592, "learning_rate": 0.01, "loss": 2.1599, "step": 5940 }, { "epoch": 0.6106031028459878, "grad_norm": 0.09024669975042343, "learning_rate": 0.01, "loss": 2.1621, "step": 5943 }, { "epoch": 0.6109113325798828, "grad_norm": 0.04514656960964203, "learning_rate": 0.01, "loss": 2.1809, "step": 5946 }, { "epoch": 0.6112195623137778, "grad_norm": 0.04011565446853638, "learning_rate": 0.01, "loss": 2.1715, "step": 5949 }, { "epoch": 0.6115277920476728, "grad_norm": 0.04640655592083931, "learning_rate": 0.01, "loss": 2.15, "step": 5952 }, { "epoch": 0.6118360217815678, "grad_norm": 0.0471080057322979, "learning_rate": 0.01, "loss": 2.1805, "step": 5955 }, { "epoch": 0.6121442515154628, "grad_norm": 0.17398513853549957, "learning_rate": 0.01, "loss": 2.1497, "step": 5958 }, { "epoch": 0.6124524812493578, "grad_norm": 0.06299551576375961, "learning_rate": 0.01, "loss": 2.1387, "step": 5961 }, { "epoch": 0.6127607109832528, "grad_norm": 0.07517322897911072, "learning_rate": 0.01, "loss": 2.1348, "step": 5964 }, { "epoch": 0.6130689407171478, "grad_norm": 0.050515878945589066, "learning_rate": 0.01, "loss": 2.1725, "step": 5967 }, { "epoch": 0.6133771704510429, "grad_norm": 0.04682675376534462, "learning_rate": 0.01, "loss": 2.1759, "step": 5970 }, { "epoch": 0.6136854001849379, "grad_norm": 0.05297816917300224, "learning_rate": 0.01, "loss": 2.1491, "step": 5973 }, { "epoch": 0.6139936299188329, "grad_norm": 0.07467235624790192, "learning_rate": 0.01, "loss": 2.1556, "step": 5976 }, { "epoch": 0.6143018596527279, "grad_norm": 0.06621374934911728, "learning_rate": 0.01, "loss": 2.1498, "step": 5979 }, { "epoch": 0.6146100893866229, "grad_norm": 0.0538405105471611, "learning_rate": 0.01, "loss": 2.1694, "step": 5982 }, { "epoch": 0.6149183191205179, "grad_norm": 0.09891212731599808, "learning_rate": 0.01, "loss": 2.1598, "step": 5985 }, { "epoch": 0.6152265488544129, "grad_norm": 0.042064208537340164, "learning_rate": 0.01, "loss": 2.1375, "step": 5988 }, { "epoch": 0.6155347785883079, "grad_norm": 0.06750064343214035, "learning_rate": 0.01, "loss": 2.1371, "step": 5991 }, { "epoch": 0.6158430083222028, "grad_norm": 0.0626809298992157, "learning_rate": 0.01, "loss": 2.1455, "step": 5994 }, { "epoch": 0.6161512380560978, "grad_norm": 0.04291335120797157, "learning_rate": 0.01, "loss": 2.1397, "step": 5997 }, { "epoch": 0.6164594677899928, "grad_norm": 0.05945251137018204, "learning_rate": 0.01, "loss": 2.1346, "step": 6000 }, { "epoch": 0.6167676975238878, "grad_norm": 0.15699933469295502, "learning_rate": 0.01, "loss": 2.1384, "step": 6003 }, { "epoch": 0.6170759272577828, "grad_norm": 0.06863987445831299, "learning_rate": 0.01, "loss": 2.1401, "step": 6006 }, { "epoch": 0.6173841569916778, "grad_norm": 0.04850529506802559, "learning_rate": 0.01, "loss": 2.1637, "step": 6009 }, { "epoch": 0.6176923867255728, "grad_norm": 0.05660491809248924, "learning_rate": 0.01, "loss": 2.1721, "step": 6012 }, { "epoch": 0.6180006164594678, "grad_norm": 0.050568364560604095, "learning_rate": 0.01, "loss": 2.1676, "step": 6015 }, { "epoch": 0.6183088461933628, "grad_norm": 0.060765717178583145, "learning_rate": 0.01, "loss": 2.127, "step": 6018 }, { "epoch": 0.6186170759272578, "grad_norm": 0.0731448233127594, "learning_rate": 0.01, "loss": 2.1531, "step": 6021 }, { "epoch": 0.6189253056611528, "grad_norm": 0.055431608110666275, "learning_rate": 0.01, "loss": 2.1662, "step": 6024 }, { "epoch": 0.6192335353950478, "grad_norm": 0.05376220867037773, "learning_rate": 0.01, "loss": 2.1465, "step": 6027 }, { "epoch": 0.6195417651289428, "grad_norm": 0.09729186445474625, "learning_rate": 0.01, "loss": 2.161, "step": 6030 }, { "epoch": 0.6198499948628378, "grad_norm": 0.08046093583106995, "learning_rate": 0.01, "loss": 2.1435, "step": 6033 }, { "epoch": 0.6201582245967328, "grad_norm": 0.09514495730400085, "learning_rate": 0.01, "loss": 2.1511, "step": 6036 }, { "epoch": 0.6204664543306277, "grad_norm": 0.056993287056684494, "learning_rate": 0.01, "loss": 2.1439, "step": 6039 }, { "epoch": 0.6207746840645227, "grad_norm": 0.06429582834243774, "learning_rate": 0.01, "loss": 2.1393, "step": 6042 }, { "epoch": 0.6210829137984177, "grad_norm": 0.1299380660057068, "learning_rate": 0.01, "loss": 2.1831, "step": 6045 }, { "epoch": 0.6213911435323127, "grad_norm": 0.13815906643867493, "learning_rate": 0.01, "loss": 2.1645, "step": 6048 }, { "epoch": 0.6216993732662077, "grad_norm": 0.056314874440431595, "learning_rate": 0.01, "loss": 2.1417, "step": 6051 }, { "epoch": 0.6220076030001027, "grad_norm": 0.06146218627691269, "learning_rate": 0.01, "loss": 2.1418, "step": 6054 }, { "epoch": 0.6223158327339977, "grad_norm": 0.062167149037122726, "learning_rate": 0.01, "loss": 2.1778, "step": 6057 }, { "epoch": 0.6226240624678927, "grad_norm": 0.059581879526376724, "learning_rate": 0.01, "loss": 2.1725, "step": 6060 }, { "epoch": 0.6229322922017877, "grad_norm": 0.044389910995960236, "learning_rate": 0.01, "loss": 2.1553, "step": 6063 }, { "epoch": 0.6232405219356827, "grad_norm": 0.036525238305330276, "learning_rate": 0.01, "loss": 2.1545, "step": 6066 }, { "epoch": 0.6235487516695777, "grad_norm": 0.0995573177933693, "learning_rate": 0.01, "loss": 2.1566, "step": 6069 }, { "epoch": 0.6238569814034727, "grad_norm": 0.10412520170211792, "learning_rate": 0.01, "loss": 2.1525, "step": 6072 }, { "epoch": 0.6241652111373677, "grad_norm": 0.10417335480451584, "learning_rate": 0.01, "loss": 2.1535, "step": 6075 }, { "epoch": 0.6244734408712627, "grad_norm": 0.09024351090192795, "learning_rate": 0.01, "loss": 2.1551, "step": 6078 }, { "epoch": 0.6247816706051577, "grad_norm": 0.04889573156833649, "learning_rate": 0.01, "loss": 2.1549, "step": 6081 }, { "epoch": 0.6250899003390528, "grad_norm": 0.05154373124241829, "learning_rate": 0.01, "loss": 2.1461, "step": 6084 }, { "epoch": 0.6253981300729478, "grad_norm": 0.04337237402796745, "learning_rate": 0.01, "loss": 2.1733, "step": 6087 }, { "epoch": 0.6257063598068427, "grad_norm": 0.06173473224043846, "learning_rate": 0.01, "loss": 2.1657, "step": 6090 }, { "epoch": 0.6260145895407377, "grad_norm": 0.06174352392554283, "learning_rate": 0.01, "loss": 2.1528, "step": 6093 }, { "epoch": 0.6263228192746327, "grad_norm": 0.07301110029220581, "learning_rate": 0.01, "loss": 2.1489, "step": 6096 }, { "epoch": 0.6266310490085277, "grad_norm": 0.04265190288424492, "learning_rate": 0.01, "loss": 2.1624, "step": 6099 }, { "epoch": 0.6269392787424227, "grad_norm": 0.056723251938819885, "learning_rate": 0.01, "loss": 2.1624, "step": 6102 }, { "epoch": 0.6272475084763177, "grad_norm": 0.06809309124946594, "learning_rate": 0.01, "loss": 2.1525, "step": 6105 }, { "epoch": 0.6275557382102127, "grad_norm": 0.06820474565029144, "learning_rate": 0.01, "loss": 2.1472, "step": 6108 }, { "epoch": 0.6278639679441077, "grad_norm": 0.05961904302239418, "learning_rate": 0.01, "loss": 2.1561, "step": 6111 }, { "epoch": 0.6281721976780027, "grad_norm": 0.04617665335536003, "learning_rate": 0.01, "loss": 2.1475, "step": 6114 }, { "epoch": 0.6284804274118977, "grad_norm": 0.040670618414878845, "learning_rate": 0.01, "loss": 2.153, "step": 6117 }, { "epoch": 0.6287886571457927, "grad_norm": 0.09909021109342575, "learning_rate": 0.01, "loss": 2.141, "step": 6120 }, { "epoch": 0.6290968868796877, "grad_norm": 0.04966261237859726, "learning_rate": 0.01, "loss": 2.1264, "step": 6123 }, { "epoch": 0.6294051166135827, "grad_norm": 0.0570046491920948, "learning_rate": 0.01, "loss": 2.1572, "step": 6126 }, { "epoch": 0.6297133463474777, "grad_norm": 0.10374405980110168, "learning_rate": 0.01, "loss": 2.149, "step": 6129 }, { "epoch": 0.6300215760813727, "grad_norm": 0.061325542628765106, "learning_rate": 0.01, "loss": 2.1521, "step": 6132 }, { "epoch": 0.6303298058152677, "grad_norm": 0.16151310503482819, "learning_rate": 0.01, "loss": 2.1825, "step": 6135 }, { "epoch": 0.6306380355491626, "grad_norm": 0.0921199768781662, "learning_rate": 0.01, "loss": 2.1773, "step": 6138 }, { "epoch": 0.6309462652830576, "grad_norm": 0.05603238567709923, "learning_rate": 0.01, "loss": 2.1452, "step": 6141 }, { "epoch": 0.6312544950169526, "grad_norm": 0.12173126637935638, "learning_rate": 0.01, "loss": 2.1713, "step": 6144 }, { "epoch": 0.6315627247508476, "grad_norm": 0.04609265923500061, "learning_rate": 0.01, "loss": 2.1518, "step": 6147 }, { "epoch": 0.6318709544847426, "grad_norm": 0.06445127725601196, "learning_rate": 0.01, "loss": 2.162, "step": 6150 }, { "epoch": 0.6321791842186376, "grad_norm": 0.05396106466650963, "learning_rate": 0.01, "loss": 2.1248, "step": 6153 }, { "epoch": 0.6324874139525326, "grad_norm": 0.06955734640359879, "learning_rate": 0.01, "loss": 2.1497, "step": 6156 }, { "epoch": 0.6327956436864276, "grad_norm": 0.04371445253491402, "learning_rate": 0.01, "loss": 2.1167, "step": 6159 }, { "epoch": 0.6331038734203226, "grad_norm": 0.07146921008825302, "learning_rate": 0.01, "loss": 2.1633, "step": 6162 }, { "epoch": 0.6334121031542176, "grad_norm": 0.08056561648845673, "learning_rate": 0.01, "loss": 2.1506, "step": 6165 }, { "epoch": 0.6337203328881126, "grad_norm": 0.08875605463981628, "learning_rate": 0.01, "loss": 2.1834, "step": 6168 }, { "epoch": 0.6340285626220076, "grad_norm": 0.05090434104204178, "learning_rate": 0.01, "loss": 2.1514, "step": 6171 }, { "epoch": 0.6343367923559026, "grad_norm": 0.11710961163043976, "learning_rate": 0.01, "loss": 2.1589, "step": 6174 }, { "epoch": 0.6346450220897976, "grad_norm": 0.04704523831605911, "learning_rate": 0.01, "loss": 2.1469, "step": 6177 }, { "epoch": 0.6349532518236926, "grad_norm": 0.045143596827983856, "learning_rate": 0.01, "loss": 2.1311, "step": 6180 }, { "epoch": 0.6352614815575875, "grad_norm": 0.04246919974684715, "learning_rate": 0.01, "loss": 2.1481, "step": 6183 }, { "epoch": 0.6355697112914825, "grad_norm": 0.04303867742419243, "learning_rate": 0.01, "loss": 2.1557, "step": 6186 }, { "epoch": 0.6358779410253775, "grad_norm": 0.17376503348350525, "learning_rate": 0.01, "loss": 2.1616, "step": 6189 }, { "epoch": 0.6361861707592725, "grad_norm": 0.11983154714107513, "learning_rate": 0.01, "loss": 2.1569, "step": 6192 }, { "epoch": 0.6364944004931675, "grad_norm": 0.0443497858941555, "learning_rate": 0.01, "loss": 2.1454, "step": 6195 }, { "epoch": 0.6368026302270625, "grad_norm": 0.04241250827908516, "learning_rate": 0.01, "loss": 2.1409, "step": 6198 }, { "epoch": 0.6371108599609576, "grad_norm": 0.07058902829885483, "learning_rate": 0.01, "loss": 2.1246, "step": 6201 }, { "epoch": 0.6374190896948526, "grad_norm": 0.060852985829114914, "learning_rate": 0.01, "loss": 2.1512, "step": 6204 }, { "epoch": 0.6377273194287476, "grad_norm": 0.058703117072582245, "learning_rate": 0.01, "loss": 2.1114, "step": 6207 }, { "epoch": 0.6380355491626426, "grad_norm": 0.08501632511615753, "learning_rate": 0.01, "loss": 2.1818, "step": 6210 }, { "epoch": 0.6383437788965376, "grad_norm": 0.07715412974357605, "learning_rate": 0.01, "loss": 2.1661, "step": 6213 }, { "epoch": 0.6386520086304326, "grad_norm": 0.06822165101766586, "learning_rate": 0.01, "loss": 2.1652, "step": 6216 }, { "epoch": 0.6389602383643276, "grad_norm": 0.048459213227033615, "learning_rate": 0.01, "loss": 2.1311, "step": 6219 }, { "epoch": 0.6392684680982226, "grad_norm": 0.08208850026130676, "learning_rate": 0.01, "loss": 2.1316, "step": 6222 }, { "epoch": 0.6395766978321176, "grad_norm": 0.06399821490049362, "learning_rate": 0.01, "loss": 2.1354, "step": 6225 }, { "epoch": 0.6398849275660126, "grad_norm": 0.12036826461553574, "learning_rate": 0.01, "loss": 2.1509, "step": 6228 }, { "epoch": 0.6401931572999076, "grad_norm": 0.08180755376815796, "learning_rate": 0.01, "loss": 2.1571, "step": 6231 }, { "epoch": 0.6405013870338025, "grad_norm": 0.053771521896123886, "learning_rate": 0.01, "loss": 2.1485, "step": 6234 }, { "epoch": 0.6408096167676975, "grad_norm": 0.042291607707738876, "learning_rate": 0.01, "loss": 2.1606, "step": 6237 }, { "epoch": 0.6411178465015925, "grad_norm": 0.044655315577983856, "learning_rate": 0.01, "loss": 2.1592, "step": 6240 }, { "epoch": 0.6414260762354875, "grad_norm": 0.07763859629631042, "learning_rate": 0.01, "loss": 2.1543, "step": 6243 }, { "epoch": 0.6417343059693825, "grad_norm": 0.055368274450302124, "learning_rate": 0.01, "loss": 2.1643, "step": 6246 }, { "epoch": 0.6420425357032775, "grad_norm": 0.047774944454431534, "learning_rate": 0.01, "loss": 2.1542, "step": 6249 }, { "epoch": 0.6423507654371725, "grad_norm": 0.06478223204612732, "learning_rate": 0.01, "loss": 2.1501, "step": 6252 }, { "epoch": 0.6426589951710675, "grad_norm": 0.03782160207629204, "learning_rate": 0.01, "loss": 2.1455, "step": 6255 }, { "epoch": 0.6429672249049625, "grad_norm": 0.11297930777072906, "learning_rate": 0.01, "loss": 2.1667, "step": 6258 }, { "epoch": 0.6432754546388575, "grad_norm": 0.09408997744321823, "learning_rate": 0.01, "loss": 2.146, "step": 6261 }, { "epoch": 0.6435836843727525, "grad_norm": 0.06677352637052536, "learning_rate": 0.01, "loss": 2.1723, "step": 6264 }, { "epoch": 0.6438919141066475, "grad_norm": 0.08687873184680939, "learning_rate": 0.01, "loss": 2.1517, "step": 6267 }, { "epoch": 0.6442001438405425, "grad_norm": 0.06850516051054001, "learning_rate": 0.01, "loss": 2.148, "step": 6270 }, { "epoch": 0.6445083735744375, "grad_norm": 0.07705084979534149, "learning_rate": 0.01, "loss": 2.1567, "step": 6273 }, { "epoch": 0.6448166033083325, "grad_norm": 0.1622423529624939, "learning_rate": 0.01, "loss": 2.1676, "step": 6276 }, { "epoch": 0.6451248330422275, "grad_norm": 0.11197759211063385, "learning_rate": 0.01, "loss": 2.1376, "step": 6279 }, { "epoch": 0.6454330627761224, "grad_norm": 0.06562814861536026, "learning_rate": 0.01, "loss": 2.1652, "step": 6282 }, { "epoch": 0.6457412925100174, "grad_norm": 0.0867902860045433, "learning_rate": 0.01, "loss": 2.1733, "step": 6285 }, { "epoch": 0.6460495222439124, "grad_norm": 0.08153738081455231, "learning_rate": 0.01, "loss": 2.1442, "step": 6288 }, { "epoch": 0.6463577519778074, "grad_norm": 0.09800709784030914, "learning_rate": 0.01, "loss": 2.1262, "step": 6291 }, { "epoch": 0.6466659817117024, "grad_norm": 0.07728230953216553, "learning_rate": 0.01, "loss": 2.139, "step": 6294 }, { "epoch": 0.6469742114455974, "grad_norm": 0.09658671170473099, "learning_rate": 0.01, "loss": 2.1421, "step": 6297 }, { "epoch": 0.6472824411794924, "grad_norm": 0.0448787659406662, "learning_rate": 0.01, "loss": 2.1415, "step": 6300 }, { "epoch": 0.6475906709133874, "grad_norm": 0.03848707675933838, "learning_rate": 0.01, "loss": 2.1209, "step": 6303 }, { "epoch": 0.6478989006472824, "grad_norm": 0.07465004920959473, "learning_rate": 0.01, "loss": 2.1395, "step": 6306 }, { "epoch": 0.6482071303811774, "grad_norm": 0.060424912720918655, "learning_rate": 0.01, "loss": 2.1806, "step": 6309 }, { "epoch": 0.6485153601150724, "grad_norm": 0.05204974114894867, "learning_rate": 0.01, "loss": 2.1287, "step": 6312 }, { "epoch": 0.6488235898489675, "grad_norm": 0.06045055389404297, "learning_rate": 0.01, "loss": 2.1727, "step": 6315 }, { "epoch": 0.6491318195828625, "grad_norm": 0.04978582262992859, "learning_rate": 0.01, "loss": 2.1264, "step": 6318 }, { "epoch": 0.6494400493167575, "grad_norm": 0.08131048828363419, "learning_rate": 0.01, "loss": 2.137, "step": 6321 }, { "epoch": 0.6497482790506525, "grad_norm": 0.09749994426965714, "learning_rate": 0.01, "loss": 2.1557, "step": 6324 }, { "epoch": 0.6500565087845475, "grad_norm": 0.06079535186290741, "learning_rate": 0.01, "loss": 2.1432, "step": 6327 }, { "epoch": 0.6503647385184425, "grad_norm": 0.08241060376167297, "learning_rate": 0.01, "loss": 2.1551, "step": 6330 }, { "epoch": 0.6506729682523374, "grad_norm": 0.12339378148317337, "learning_rate": 0.01, "loss": 2.1216, "step": 6333 }, { "epoch": 0.6509811979862324, "grad_norm": 0.0660511702299118, "learning_rate": 0.01, "loss": 2.1156, "step": 6336 }, { "epoch": 0.6512894277201274, "grad_norm": 0.06279938668012619, "learning_rate": 0.01, "loss": 2.1778, "step": 6339 }, { "epoch": 0.6515976574540224, "grad_norm": 0.068712018430233, "learning_rate": 0.01, "loss": 2.1348, "step": 6342 }, { "epoch": 0.6519058871879174, "grad_norm": 0.05808734893798828, "learning_rate": 0.01, "loss": 2.135, "step": 6345 }, { "epoch": 0.6522141169218124, "grad_norm": 0.044942643493413925, "learning_rate": 0.01, "loss": 2.1613, "step": 6348 }, { "epoch": 0.6525223466557074, "grad_norm": 0.11666214466094971, "learning_rate": 0.01, "loss": 2.1399, "step": 6351 }, { "epoch": 0.6528305763896024, "grad_norm": 0.06776747852563858, "learning_rate": 0.01, "loss": 2.1369, "step": 6354 }, { "epoch": 0.6531388061234974, "grad_norm": 0.10171874612569809, "learning_rate": 0.01, "loss": 2.1273, "step": 6357 }, { "epoch": 0.6534470358573924, "grad_norm": 0.04611232131719589, "learning_rate": 0.01, "loss": 2.1482, "step": 6360 }, { "epoch": 0.6537552655912874, "grad_norm": 0.042139992117881775, "learning_rate": 0.01, "loss": 2.1235, "step": 6363 }, { "epoch": 0.6540634953251824, "grad_norm": 0.057816632091999054, "learning_rate": 0.01, "loss": 2.1449, "step": 6366 }, { "epoch": 0.6543717250590774, "grad_norm": 0.11400949209928513, "learning_rate": 0.01, "loss": 2.1566, "step": 6369 }, { "epoch": 0.6546799547929724, "grad_norm": 0.07320736348628998, "learning_rate": 0.01, "loss": 2.1682, "step": 6372 }, { "epoch": 0.6549881845268674, "grad_norm": 0.07262291014194489, "learning_rate": 0.01, "loss": 2.1514, "step": 6375 }, { "epoch": 0.6552964142607623, "grad_norm": 0.05559679865837097, "learning_rate": 0.01, "loss": 2.1347, "step": 6378 }, { "epoch": 0.6556046439946573, "grad_norm": 0.049424149096012115, "learning_rate": 0.01, "loss": 2.1423, "step": 6381 }, { "epoch": 0.6559128737285523, "grad_norm": 0.05457301065325737, "learning_rate": 0.01, "loss": 2.1425, "step": 6384 }, { "epoch": 0.6562211034624473, "grad_norm": 0.058564141392707825, "learning_rate": 0.01, "loss": 2.1158, "step": 6387 }, { "epoch": 0.6565293331963423, "grad_norm": 0.10944786667823792, "learning_rate": 0.01, "loss": 2.146, "step": 6390 }, { "epoch": 0.6568375629302373, "grad_norm": 0.07760695368051529, "learning_rate": 0.01, "loss": 2.176, "step": 6393 }, { "epoch": 0.6571457926641323, "grad_norm": 0.07621042430400848, "learning_rate": 0.01, "loss": 2.1779, "step": 6396 }, { "epoch": 0.6574540223980273, "grad_norm": 0.09723789244890213, "learning_rate": 0.01, "loss": 2.1455, "step": 6399 }, { "epoch": 0.6577622521319223, "grad_norm": 0.05648832768201828, "learning_rate": 0.01, "loss": 2.154, "step": 6402 }, { "epoch": 0.6580704818658173, "grad_norm": 0.04370080679655075, "learning_rate": 0.01, "loss": 2.1374, "step": 6405 }, { "epoch": 0.6583787115997123, "grad_norm": 0.03729141131043434, "learning_rate": 0.01, "loss": 2.1275, "step": 6408 }, { "epoch": 0.6586869413336073, "grad_norm": 0.055584125220775604, "learning_rate": 0.01, "loss": 2.1442, "step": 6411 }, { "epoch": 0.6589951710675023, "grad_norm": 0.07981918007135391, "learning_rate": 0.01, "loss": 2.1618, "step": 6414 }, { "epoch": 0.6593034008013973, "grad_norm": 0.09241674095392227, "learning_rate": 0.01, "loss": 2.1519, "step": 6417 }, { "epoch": 0.6596116305352923, "grad_norm": 0.10454630106687546, "learning_rate": 0.01, "loss": 2.1309, "step": 6420 }, { "epoch": 0.6599198602691873, "grad_norm": 0.08674053847789764, "learning_rate": 0.01, "loss": 2.1617, "step": 6423 }, { "epoch": 0.6602280900030822, "grad_norm": 0.06003529578447342, "learning_rate": 0.01, "loss": 2.1475, "step": 6426 }, { "epoch": 0.6605363197369772, "grad_norm": 0.07370956987142563, "learning_rate": 0.01, "loss": 2.1466, "step": 6429 }, { "epoch": 0.6608445494708723, "grad_norm": 0.05090004578232765, "learning_rate": 0.01, "loss": 2.1506, "step": 6432 }, { "epoch": 0.6611527792047673, "grad_norm": 0.06062362715601921, "learning_rate": 0.01, "loss": 2.1601, "step": 6435 }, { "epoch": 0.6614610089386623, "grad_norm": 0.05484107881784439, "learning_rate": 0.01, "loss": 2.1452, "step": 6438 }, { "epoch": 0.6617692386725573, "grad_norm": 0.1367156058549881, "learning_rate": 0.01, "loss": 2.1586, "step": 6441 }, { "epoch": 0.6620774684064523, "grad_norm": 0.05140338093042374, "learning_rate": 0.01, "loss": 2.1463, "step": 6444 }, { "epoch": 0.6623856981403473, "grad_norm": 0.09168683737516403, "learning_rate": 0.01, "loss": 2.1467, "step": 6447 }, { "epoch": 0.6626939278742423, "grad_norm": 0.04098822921514511, "learning_rate": 0.01, "loss": 2.1648, "step": 6450 }, { "epoch": 0.6630021576081373, "grad_norm": 0.049763478338718414, "learning_rate": 0.01, "loss": 2.1289, "step": 6453 }, { "epoch": 0.6633103873420323, "grad_norm": 0.060069404542446136, "learning_rate": 0.01, "loss": 2.1467, "step": 6456 }, { "epoch": 0.6636186170759273, "grad_norm": 0.06611450761556625, "learning_rate": 0.01, "loss": 2.1599, "step": 6459 }, { "epoch": 0.6639268468098223, "grad_norm": 0.04955270141363144, "learning_rate": 0.01, "loss": 2.136, "step": 6462 }, { "epoch": 0.6642350765437173, "grad_norm": 0.04004522040486336, "learning_rate": 0.01, "loss": 2.1457, "step": 6465 }, { "epoch": 0.6645433062776123, "grad_norm": 0.06539756804704666, "learning_rate": 0.01, "loss": 2.1458, "step": 6468 }, { "epoch": 0.6648515360115073, "grad_norm": 0.10684728622436523, "learning_rate": 0.01, "loss": 2.1279, "step": 6471 }, { "epoch": 0.6651597657454023, "grad_norm": 0.09936464577913284, "learning_rate": 0.01, "loss": 2.1767, "step": 6474 }, { "epoch": 0.6654679954792972, "grad_norm": 0.04908827692270279, "learning_rate": 0.01, "loss": 2.1259, "step": 6477 }, { "epoch": 0.6657762252131922, "grad_norm": 0.048053622245788574, "learning_rate": 0.01, "loss": 2.1718, "step": 6480 }, { "epoch": 0.6660844549470872, "grad_norm": 0.05524458363652229, "learning_rate": 0.01, "loss": 2.1673, "step": 6483 }, { "epoch": 0.6663926846809822, "grad_norm": 0.05107030272483826, "learning_rate": 0.01, "loss": 2.13, "step": 6486 }, { "epoch": 0.6667009144148772, "grad_norm": 0.12472579628229141, "learning_rate": 0.01, "loss": 2.149, "step": 6489 }, { "epoch": 0.6670091441487722, "grad_norm": 0.05257454514503479, "learning_rate": 0.01, "loss": 2.1343, "step": 6492 }, { "epoch": 0.6673173738826672, "grad_norm": 0.05986837297677994, "learning_rate": 0.01, "loss": 2.1265, "step": 6495 }, { "epoch": 0.6676256036165622, "grad_norm": 0.08322940021753311, "learning_rate": 0.01, "loss": 2.1317, "step": 6498 }, { "epoch": 0.6679338333504572, "grad_norm": 0.0466473363339901, "learning_rate": 0.01, "loss": 2.1235, "step": 6501 }, { "epoch": 0.6682420630843522, "grad_norm": 0.05092160776257515, "learning_rate": 0.01, "loss": 2.1672, "step": 6504 }, { "epoch": 0.6685502928182472, "grad_norm": 0.08392294496297836, "learning_rate": 0.01, "loss": 2.1473, "step": 6507 }, { "epoch": 0.6688585225521422, "grad_norm": 0.042165517807006836, "learning_rate": 0.01, "loss": 2.1181, "step": 6510 }, { "epoch": 0.6691667522860372, "grad_norm": 0.06214481219649315, "learning_rate": 0.01, "loss": 2.138, "step": 6513 }, { "epoch": 0.6694749820199322, "grad_norm": 0.06087846681475639, "learning_rate": 0.01, "loss": 2.15, "step": 6516 }, { "epoch": 0.6697832117538272, "grad_norm": 0.047256652265787125, "learning_rate": 0.01, "loss": 2.1433, "step": 6519 }, { "epoch": 0.6700914414877222, "grad_norm": 0.10626421123743057, "learning_rate": 0.01, "loss": 2.156, "step": 6522 }, { "epoch": 0.6703996712216171, "grad_norm": 0.09426552802324295, "learning_rate": 0.01, "loss": 2.1472, "step": 6525 }, { "epoch": 0.6707079009555121, "grad_norm": 0.0632442831993103, "learning_rate": 0.01, "loss": 2.1536, "step": 6528 }, { "epoch": 0.6710161306894071, "grad_norm": 0.07149971276521683, "learning_rate": 0.01, "loss": 2.1694, "step": 6531 }, { "epoch": 0.6713243604233021, "grad_norm": 0.04060966521501541, "learning_rate": 0.01, "loss": 2.164, "step": 6534 }, { "epoch": 0.6716325901571971, "grad_norm": 0.20043891668319702, "learning_rate": 0.01, "loss": 2.125, "step": 6537 }, { "epoch": 0.6719408198910921, "grad_norm": 0.06755783408880234, "learning_rate": 0.01, "loss": 2.15, "step": 6540 }, { "epoch": 0.6722490496249871, "grad_norm": 0.0509268082678318, "learning_rate": 0.01, "loss": 2.1405, "step": 6543 }, { "epoch": 0.6725572793588821, "grad_norm": 0.04033916816115379, "learning_rate": 0.01, "loss": 2.136, "step": 6546 }, { "epoch": 0.6728655090927772, "grad_norm": 0.04707946255803108, "learning_rate": 0.01, "loss": 2.1514, "step": 6549 }, { "epoch": 0.6731737388266722, "grad_norm": 0.04360898956656456, "learning_rate": 0.01, "loss": 2.1518, "step": 6552 }, { "epoch": 0.6734819685605672, "grad_norm": 0.11959343403577805, "learning_rate": 0.01, "loss": 2.1377, "step": 6555 }, { "epoch": 0.6737901982944622, "grad_norm": 0.06620760262012482, "learning_rate": 0.01, "loss": 2.1419, "step": 6558 }, { "epoch": 0.6740984280283572, "grad_norm": 0.056747015565633774, "learning_rate": 0.01, "loss": 2.138, "step": 6561 }, { "epoch": 0.6744066577622522, "grad_norm": 0.05230560526251793, "learning_rate": 0.01, "loss": 2.1335, "step": 6564 }, { "epoch": 0.6747148874961472, "grad_norm": 0.0526299811899662, "learning_rate": 0.01, "loss": 2.131, "step": 6567 }, { "epoch": 0.6750231172300422, "grad_norm": 0.15683774650096893, "learning_rate": 0.01, "loss": 2.1167, "step": 6570 }, { "epoch": 0.6753313469639372, "grad_norm": 0.10133557766675949, "learning_rate": 0.01, "loss": 2.1219, "step": 6573 }, { "epoch": 0.6756395766978321, "grad_norm": 0.06826774775981903, "learning_rate": 0.01, "loss": 2.1416, "step": 6576 }, { "epoch": 0.6759478064317271, "grad_norm": 0.046236682683229446, "learning_rate": 0.01, "loss": 2.1704, "step": 6579 }, { "epoch": 0.6762560361656221, "grad_norm": 0.07654762268066406, "learning_rate": 0.01, "loss": 2.1411, "step": 6582 }, { "epoch": 0.6765642658995171, "grad_norm": 0.07760706543922424, "learning_rate": 0.01, "loss": 2.168, "step": 6585 }, { "epoch": 0.6768724956334121, "grad_norm": 0.04213540256023407, "learning_rate": 0.01, "loss": 2.1899, "step": 6588 }, { "epoch": 0.6771807253673071, "grad_norm": 0.0517420619726181, "learning_rate": 0.01, "loss": 2.1561, "step": 6591 }, { "epoch": 0.6774889551012021, "grad_norm": 0.04073292762041092, "learning_rate": 0.01, "loss": 2.1475, "step": 6594 }, { "epoch": 0.6777971848350971, "grad_norm": 0.11223835498094559, "learning_rate": 0.01, "loss": 2.1102, "step": 6597 }, { "epoch": 0.6781054145689921, "grad_norm": 0.08094224333763123, "learning_rate": 0.01, "loss": 2.1537, "step": 6600 }, { "epoch": 0.6784136443028871, "grad_norm": 0.036313675343990326, "learning_rate": 0.01, "loss": 2.1471, "step": 6603 }, { "epoch": 0.6787218740367821, "grad_norm": 0.09553749114274979, "learning_rate": 0.01, "loss": 2.1445, "step": 6606 }, { "epoch": 0.6790301037706771, "grad_norm": 0.07334265112876892, "learning_rate": 0.01, "loss": 2.1594, "step": 6609 }, { "epoch": 0.6793383335045721, "grad_norm": 0.12031051516532898, "learning_rate": 0.01, "loss": 2.1321, "step": 6612 }, { "epoch": 0.6796465632384671, "grad_norm": 0.08834968507289886, "learning_rate": 0.01, "loss": 2.1474, "step": 6615 }, { "epoch": 0.679954792972362, "grad_norm": 0.05016850307583809, "learning_rate": 0.01, "loss": 2.1582, "step": 6618 }, { "epoch": 0.680263022706257, "grad_norm": 0.039213377982378006, "learning_rate": 0.01, "loss": 2.1461, "step": 6621 }, { "epoch": 0.680571252440152, "grad_norm": 0.035611145198345184, "learning_rate": 0.01, "loss": 2.137, "step": 6624 }, { "epoch": 0.680879482174047, "grad_norm": 0.09345167875289917, "learning_rate": 0.01, "loss": 2.1357, "step": 6627 }, { "epoch": 0.681187711907942, "grad_norm": 0.04311450198292732, "learning_rate": 0.01, "loss": 2.1413, "step": 6630 }, { "epoch": 0.681495941641837, "grad_norm": 0.040315765887498856, "learning_rate": 0.01, "loss": 2.1091, "step": 6633 }, { "epoch": 0.681804171375732, "grad_norm": 0.11044291406869888, "learning_rate": 0.01, "loss": 2.1392, "step": 6636 }, { "epoch": 0.682112401109627, "grad_norm": 0.1288553774356842, "learning_rate": 0.01, "loss": 2.1129, "step": 6639 }, { "epoch": 0.682420630843522, "grad_norm": 0.0698169469833374, "learning_rate": 0.01, "loss": 2.137, "step": 6642 }, { "epoch": 0.682728860577417, "grad_norm": 0.037890784442424774, "learning_rate": 0.01, "loss": 2.1195, "step": 6645 }, { "epoch": 0.683037090311312, "grad_norm": 0.07425201684236526, "learning_rate": 0.01, "loss": 2.1194, "step": 6648 }, { "epoch": 0.683345320045207, "grad_norm": 0.058168716728687286, "learning_rate": 0.01, "loss": 2.1371, "step": 6651 }, { "epoch": 0.683653549779102, "grad_norm": 0.05515358969569206, "learning_rate": 0.01, "loss": 2.137, "step": 6654 }, { "epoch": 0.683961779512997, "grad_norm": 0.0501445047557354, "learning_rate": 0.01, "loss": 2.1539, "step": 6657 }, { "epoch": 0.684270009246892, "grad_norm": 0.06167145445942879, "learning_rate": 0.01, "loss": 2.1413, "step": 6660 }, { "epoch": 0.6845782389807871, "grad_norm": 0.0841723158955574, "learning_rate": 0.01, "loss": 2.1194, "step": 6663 }, { "epoch": 0.6848864687146821, "grad_norm": 0.06027607619762421, "learning_rate": 0.01, "loss": 2.158, "step": 6666 }, { "epoch": 0.685194698448577, "grad_norm": 0.1187741607427597, "learning_rate": 0.01, "loss": 2.1651, "step": 6669 }, { "epoch": 0.685502928182472, "grad_norm": 0.10789939761161804, "learning_rate": 0.01, "loss": 2.1465, "step": 6672 }, { "epoch": 0.685811157916367, "grad_norm": 0.06254967302083969, "learning_rate": 0.01, "loss": 2.1639, "step": 6675 }, { "epoch": 0.686119387650262, "grad_norm": 0.04242802783846855, "learning_rate": 0.01, "loss": 2.1563, "step": 6678 }, { "epoch": 0.686427617384157, "grad_norm": 0.03538980334997177, "learning_rate": 0.01, "loss": 2.1373, "step": 6681 }, { "epoch": 0.686735847118052, "grad_norm": 0.04609490931034088, "learning_rate": 0.01, "loss": 2.1345, "step": 6684 }, { "epoch": 0.687044076851947, "grad_norm": 0.1298975795507431, "learning_rate": 0.01, "loss": 2.1446, "step": 6687 }, { "epoch": 0.687352306585842, "grad_norm": 0.10049281269311905, "learning_rate": 0.01, "loss": 2.1432, "step": 6690 }, { "epoch": 0.687660536319737, "grad_norm": 0.05908266827464104, "learning_rate": 0.01, "loss": 2.1288, "step": 6693 }, { "epoch": 0.687968766053632, "grad_norm": 0.0546141043305397, "learning_rate": 0.01, "loss": 2.1086, "step": 6696 }, { "epoch": 0.688276995787527, "grad_norm": 0.04135862737894058, "learning_rate": 0.01, "loss": 2.1187, "step": 6699 }, { "epoch": 0.688585225521422, "grad_norm": 0.03824761137366295, "learning_rate": 0.01, "loss": 2.1162, "step": 6702 }, { "epoch": 0.688893455255317, "grad_norm": 0.041454900056123734, "learning_rate": 0.01, "loss": 2.1304, "step": 6705 }, { "epoch": 0.689201684989212, "grad_norm": 0.08948934823274612, "learning_rate": 0.01, "loss": 2.1538, "step": 6708 }, { "epoch": 0.689509914723107, "grad_norm": 0.07379783689975739, "learning_rate": 0.01, "loss": 2.145, "step": 6711 }, { "epoch": 0.689818144457002, "grad_norm": 0.0833912044763565, "learning_rate": 0.01, "loss": 2.1218, "step": 6714 }, { "epoch": 0.690126374190897, "grad_norm": 0.05899098515510559, "learning_rate": 0.01, "loss": 2.1516, "step": 6717 }, { "epoch": 0.690434603924792, "grad_norm": 0.06462058424949646, "learning_rate": 0.01, "loss": 2.1496, "step": 6720 }, { "epoch": 0.6907428336586869, "grad_norm": 0.04040443152189255, "learning_rate": 0.01, "loss": 2.1311, "step": 6723 }, { "epoch": 0.6910510633925819, "grad_norm": 0.05336814373731613, "learning_rate": 0.01, "loss": 2.1227, "step": 6726 }, { "epoch": 0.6913592931264769, "grad_norm": 0.05057406798005104, "learning_rate": 0.01, "loss": 2.1281, "step": 6729 }, { "epoch": 0.6916675228603719, "grad_norm": 0.08063513040542603, "learning_rate": 0.01, "loss": 2.1318, "step": 6732 }, { "epoch": 0.6919757525942669, "grad_norm": 0.08304840326309204, "learning_rate": 0.01, "loss": 2.1179, "step": 6735 }, { "epoch": 0.6922839823281619, "grad_norm": 0.04266434162855148, "learning_rate": 0.01, "loss": 2.1447, "step": 6738 }, { "epoch": 0.6925922120620569, "grad_norm": 0.07502007484436035, "learning_rate": 0.01, "loss": 2.1173, "step": 6741 }, { "epoch": 0.6929004417959519, "grad_norm": 0.10870220512151718, "learning_rate": 0.01, "loss": 2.1555, "step": 6744 }, { "epoch": 0.6932086715298469, "grad_norm": 0.15824924409389496, "learning_rate": 0.01, "loss": 2.1668, "step": 6747 }, { "epoch": 0.6935169012637419, "grad_norm": 0.06319935619831085, "learning_rate": 0.01, "loss": 2.1788, "step": 6750 }, { "epoch": 0.6938251309976369, "grad_norm": 0.06392507255077362, "learning_rate": 0.01, "loss": 2.1398, "step": 6753 }, { "epoch": 0.6941333607315319, "grad_norm": 0.044481996446847916, "learning_rate": 0.01, "loss": 2.147, "step": 6756 }, { "epoch": 0.6944415904654269, "grad_norm": 0.09093592315912247, "learning_rate": 0.01, "loss": 2.1399, "step": 6759 }, { "epoch": 0.6947498201993219, "grad_norm": 0.09249415248632431, "learning_rate": 0.01, "loss": 2.1274, "step": 6762 }, { "epoch": 0.6950580499332168, "grad_norm": 0.06134162098169327, "learning_rate": 0.01, "loss": 2.142, "step": 6765 }, { "epoch": 0.6953662796671118, "grad_norm": 0.048883359879255295, "learning_rate": 0.01, "loss": 2.1357, "step": 6768 }, { "epoch": 0.6956745094010068, "grad_norm": 0.04553356394171715, "learning_rate": 0.01, "loss": 2.132, "step": 6771 }, { "epoch": 0.6959827391349018, "grad_norm": 0.10365505516529083, "learning_rate": 0.01, "loss": 2.1568, "step": 6774 }, { "epoch": 0.6962909688687968, "grad_norm": 0.07474958896636963, "learning_rate": 0.01, "loss": 2.1209, "step": 6777 }, { "epoch": 0.6965991986026919, "grad_norm": 0.11140461266040802, "learning_rate": 0.01, "loss": 2.1585, "step": 6780 }, { "epoch": 0.6969074283365869, "grad_norm": 0.0529690645635128, "learning_rate": 0.01, "loss": 2.1228, "step": 6783 }, { "epoch": 0.6972156580704819, "grad_norm": 0.06484264135360718, "learning_rate": 0.01, "loss": 2.117, "step": 6786 }, { "epoch": 0.6975238878043769, "grad_norm": 0.0467400886118412, "learning_rate": 0.01, "loss": 2.1367, "step": 6789 }, { "epoch": 0.6978321175382719, "grad_norm": 0.09690822660923004, "learning_rate": 0.01, "loss": 2.1275, "step": 6792 }, { "epoch": 0.6981403472721669, "grad_norm": 0.053299982100725174, "learning_rate": 0.01, "loss": 2.1557, "step": 6795 }, { "epoch": 0.6984485770060619, "grad_norm": 0.08451724797487259, "learning_rate": 0.01, "loss": 2.1235, "step": 6798 }, { "epoch": 0.6987568067399569, "grad_norm": 0.11180119216442108, "learning_rate": 0.01, "loss": 2.1389, "step": 6801 }, { "epoch": 0.6990650364738519, "grad_norm": 0.04366112872958183, "learning_rate": 0.01, "loss": 2.1345, "step": 6804 }, { "epoch": 0.6993732662077469, "grad_norm": 0.057021014392375946, "learning_rate": 0.01, "loss": 2.145, "step": 6807 }, { "epoch": 0.6996814959416419, "grad_norm": 0.050035975873470306, "learning_rate": 0.01, "loss": 2.1245, "step": 6810 }, { "epoch": 0.6999897256755369, "grad_norm": 0.16434957087039948, "learning_rate": 0.01, "loss": 2.099, "step": 6813 }, { "epoch": 0.7002979554094318, "grad_norm": 0.0473979152739048, "learning_rate": 0.01, "loss": 2.124, "step": 6816 }, { "epoch": 0.7006061851433268, "grad_norm": 0.06207640469074249, "learning_rate": 0.01, "loss": 2.1528, "step": 6819 }, { "epoch": 0.7009144148772218, "grad_norm": 0.09829109162092209, "learning_rate": 0.01, "loss": 2.1359, "step": 6822 }, { "epoch": 0.7012226446111168, "grad_norm": 0.0563257597386837, "learning_rate": 0.01, "loss": 2.1639, "step": 6825 }, { "epoch": 0.7015308743450118, "grad_norm": 0.12371699512004852, "learning_rate": 0.01, "loss": 2.1479, "step": 6828 }, { "epoch": 0.7018391040789068, "grad_norm": 0.07342347502708435, "learning_rate": 0.01, "loss": 2.1786, "step": 6831 }, { "epoch": 0.7021473338128018, "grad_norm": 0.05420146882534027, "learning_rate": 0.01, "loss": 2.1261, "step": 6834 }, { "epoch": 0.7024555635466968, "grad_norm": 0.04500873014330864, "learning_rate": 0.01, "loss": 2.1356, "step": 6837 }, { "epoch": 0.7027637932805918, "grad_norm": 0.10648415237665176, "learning_rate": 0.01, "loss": 2.1205, "step": 6840 }, { "epoch": 0.7030720230144868, "grad_norm": 0.05089351162314415, "learning_rate": 0.01, "loss": 2.1403, "step": 6843 }, { "epoch": 0.7033802527483818, "grad_norm": 0.10011807084083557, "learning_rate": 0.01, "loss": 2.1508, "step": 6846 }, { "epoch": 0.7036884824822768, "grad_norm": 0.06787194311618805, "learning_rate": 0.01, "loss": 2.1391, "step": 6849 }, { "epoch": 0.7039967122161718, "grad_norm": 0.08248817175626755, "learning_rate": 0.01, "loss": 2.1782, "step": 6852 }, { "epoch": 0.7043049419500668, "grad_norm": 0.04949905723333359, "learning_rate": 0.01, "loss": 2.1401, "step": 6855 }, { "epoch": 0.7046131716839618, "grad_norm": 0.043910931795835495, "learning_rate": 0.01, "loss": 2.108, "step": 6858 }, { "epoch": 0.7049214014178568, "grad_norm": 0.05133078247308731, "learning_rate": 0.01, "loss": 2.1088, "step": 6861 }, { "epoch": 0.7052296311517517, "grad_norm": 0.11582443863153458, "learning_rate": 0.01, "loss": 2.1301, "step": 6864 }, { "epoch": 0.7055378608856467, "grad_norm": 0.04287354275584221, "learning_rate": 0.01, "loss": 2.124, "step": 6867 }, { "epoch": 0.7058460906195417, "grad_norm": 0.09393726289272308, "learning_rate": 0.01, "loss": 2.1326, "step": 6870 }, { "epoch": 0.7061543203534367, "grad_norm": 0.1286250203847885, "learning_rate": 0.01, "loss": 2.1292, "step": 6873 }, { "epoch": 0.7064625500873317, "grad_norm": 0.14816388487815857, "learning_rate": 0.01, "loss": 2.1439, "step": 6876 }, { "epoch": 0.7067707798212267, "grad_norm": 0.062444012612104416, "learning_rate": 0.01, "loss": 2.1421, "step": 6879 }, { "epoch": 0.7070790095551217, "grad_norm": 0.053750455379486084, "learning_rate": 0.01, "loss": 2.1185, "step": 6882 }, { "epoch": 0.7073872392890167, "grad_norm": 0.051356710493564606, "learning_rate": 0.01, "loss": 2.1298, "step": 6885 }, { "epoch": 0.7076954690229117, "grad_norm": 0.061504025012254715, "learning_rate": 0.01, "loss": 2.1132, "step": 6888 }, { "epoch": 0.7080036987568067, "grad_norm": 0.056496761739254, "learning_rate": 0.01, "loss": 2.1019, "step": 6891 }, { "epoch": 0.7083119284907017, "grad_norm": 0.048710647970438004, "learning_rate": 0.01, "loss": 2.126, "step": 6894 }, { "epoch": 0.7086201582245968, "grad_norm": 0.06260757148265839, "learning_rate": 0.01, "loss": 2.1534, "step": 6897 }, { "epoch": 0.7089283879584918, "grad_norm": 0.06622278690338135, "learning_rate": 0.01, "loss": 2.1247, "step": 6900 }, { "epoch": 0.7092366176923868, "grad_norm": 0.0810452550649643, "learning_rate": 0.01, "loss": 2.1336, "step": 6903 }, { "epoch": 0.7095448474262818, "grad_norm": 0.04692875221371651, "learning_rate": 0.01, "loss": 2.1096, "step": 6906 }, { "epoch": 0.7098530771601768, "grad_norm": 0.04757360368967056, "learning_rate": 0.01, "loss": 2.1181, "step": 6909 }, { "epoch": 0.7101613068940718, "grad_norm": 0.05597659945487976, "learning_rate": 0.01, "loss": 2.1425, "step": 6912 }, { "epoch": 0.7104695366279667, "grad_norm": 0.051605843007564545, "learning_rate": 0.01, "loss": 2.1118, "step": 6915 }, { "epoch": 0.7107777663618617, "grad_norm": 0.06179991737008095, "learning_rate": 0.01, "loss": 2.1362, "step": 6918 }, { "epoch": 0.7110859960957567, "grad_norm": 0.05455191805958748, "learning_rate": 0.01, "loss": 2.1279, "step": 6921 }, { "epoch": 0.7113942258296517, "grad_norm": 0.11560655385255814, "learning_rate": 0.01, "loss": 2.1316, "step": 6924 }, { "epoch": 0.7117024555635467, "grad_norm": 0.12203246355056763, "learning_rate": 0.01, "loss": 2.1173, "step": 6927 }, { "epoch": 0.7120106852974417, "grad_norm": 0.07024069130420685, "learning_rate": 0.01, "loss": 2.1395, "step": 6930 }, { "epoch": 0.7123189150313367, "grad_norm": 0.04773107171058655, "learning_rate": 0.01, "loss": 2.1455, "step": 6933 }, { "epoch": 0.7126271447652317, "grad_norm": 0.06106821820139885, "learning_rate": 0.01, "loss": 2.1352, "step": 6936 }, { "epoch": 0.7129353744991267, "grad_norm": 0.11438222974538803, "learning_rate": 0.01, "loss": 2.149, "step": 6939 }, { "epoch": 0.7132436042330217, "grad_norm": 0.07224932312965393, "learning_rate": 0.01, "loss": 2.1234, "step": 6942 }, { "epoch": 0.7135518339669167, "grad_norm": 0.06790932267904282, "learning_rate": 0.01, "loss": 2.1222, "step": 6945 }, { "epoch": 0.7138600637008117, "grad_norm": 0.12322958558797836, "learning_rate": 0.01, "loss": 2.106, "step": 6948 }, { "epoch": 0.7141682934347067, "grad_norm": 0.07186157256364822, "learning_rate": 0.01, "loss": 2.1365, "step": 6951 }, { "epoch": 0.7144765231686017, "grad_norm": 0.05366130173206329, "learning_rate": 0.01, "loss": 2.1264, "step": 6954 }, { "epoch": 0.7147847529024967, "grad_norm": 0.06682512164115906, "learning_rate": 0.01, "loss": 2.1163, "step": 6957 }, { "epoch": 0.7150929826363916, "grad_norm": 0.04629479721188545, "learning_rate": 0.01, "loss": 2.126, "step": 6960 }, { "epoch": 0.7154012123702866, "grad_norm": 0.053164754062891006, "learning_rate": 0.01, "loss": 2.1262, "step": 6963 }, { "epoch": 0.7157094421041816, "grad_norm": 0.08918699622154236, "learning_rate": 0.01, "loss": 2.157, "step": 6966 }, { "epoch": 0.7160176718380766, "grad_norm": 0.06226164847612381, "learning_rate": 0.01, "loss": 2.1391, "step": 6969 }, { "epoch": 0.7163259015719716, "grad_norm": 0.08120178431272507, "learning_rate": 0.01, "loss": 2.1118, "step": 6972 }, { "epoch": 0.7166341313058666, "grad_norm": 0.06390135735273361, "learning_rate": 0.01, "loss": 2.1302, "step": 6975 }, { "epoch": 0.7169423610397616, "grad_norm": 0.039068643003702164, "learning_rate": 0.01, "loss": 2.1304, "step": 6978 }, { "epoch": 0.7172505907736566, "grad_norm": 0.05006824806332588, "learning_rate": 0.01, "loss": 2.1352, "step": 6981 }, { "epoch": 0.7175588205075516, "grad_norm": 0.03946538642048836, "learning_rate": 0.01, "loss": 2.1513, "step": 6984 }, { "epoch": 0.7178670502414466, "grad_norm": 0.05072702839970589, "learning_rate": 0.01, "loss": 2.1298, "step": 6987 }, { "epoch": 0.7181752799753416, "grad_norm": 0.06457548588514328, "learning_rate": 0.01, "loss": 2.1276, "step": 6990 }, { "epoch": 0.7184835097092366, "grad_norm": 0.05759236589074135, "learning_rate": 0.01, "loss": 2.1198, "step": 6993 }, { "epoch": 0.7187917394431316, "grad_norm": 0.1151571124792099, "learning_rate": 0.01, "loss": 2.1217, "step": 6996 }, { "epoch": 0.7190999691770266, "grad_norm": 0.04867241531610489, "learning_rate": 0.01, "loss": 2.1343, "step": 6999 }, { "epoch": 0.7194081989109216, "grad_norm": 0.074817955493927, "learning_rate": 0.01, "loss": 2.1474, "step": 7002 }, { "epoch": 0.7197164286448166, "grad_norm": 0.04749060794711113, "learning_rate": 0.01, "loss": 2.1403, "step": 7005 }, { "epoch": 0.7200246583787115, "grad_norm": 0.04965493455529213, "learning_rate": 0.01, "loss": 2.142, "step": 7008 }, { "epoch": 0.7203328881126067, "grad_norm": 0.044914234429597855, "learning_rate": 0.01, "loss": 2.1397, "step": 7011 }, { "epoch": 0.7206411178465016, "grad_norm": 0.06727777421474457, "learning_rate": 0.01, "loss": 2.1443, "step": 7014 }, { "epoch": 0.7209493475803966, "grad_norm": 0.10670837014913559, "learning_rate": 0.01, "loss": 2.1316, "step": 7017 }, { "epoch": 0.7212575773142916, "grad_norm": 0.05047740787267685, "learning_rate": 0.01, "loss": 2.1268, "step": 7020 }, { "epoch": 0.7215658070481866, "grad_norm": 0.055116791278123856, "learning_rate": 0.01, "loss": 2.1194, "step": 7023 }, { "epoch": 0.7218740367820816, "grad_norm": 0.04873311519622803, "learning_rate": 0.01, "loss": 2.1122, "step": 7026 }, { "epoch": 0.7221822665159766, "grad_norm": 0.0893159881234169, "learning_rate": 0.01, "loss": 2.1413, "step": 7029 }, { "epoch": 0.7224904962498716, "grad_norm": 0.07278893142938614, "learning_rate": 0.01, "loss": 2.1394, "step": 7032 }, { "epoch": 0.7227987259837666, "grad_norm": 0.09431196749210358, "learning_rate": 0.01, "loss": 2.1489, "step": 7035 }, { "epoch": 0.7231069557176616, "grad_norm": 0.03588537499308586, "learning_rate": 0.01, "loss": 2.1585, "step": 7038 }, { "epoch": 0.7234151854515566, "grad_norm": 0.044003136456012726, "learning_rate": 0.01, "loss": 2.1442, "step": 7041 }, { "epoch": 0.7237234151854516, "grad_norm": 0.10805044323205948, "learning_rate": 0.01, "loss": 2.127, "step": 7044 }, { "epoch": 0.7240316449193466, "grad_norm": 0.06328746676445007, "learning_rate": 0.01, "loss": 2.1166, "step": 7047 }, { "epoch": 0.7243398746532416, "grad_norm": 0.08782347291707993, "learning_rate": 0.01, "loss": 2.1474, "step": 7050 }, { "epoch": 0.7246481043871366, "grad_norm": 0.06585227698087692, "learning_rate": 0.01, "loss": 2.1228, "step": 7053 }, { "epoch": 0.7249563341210316, "grad_norm": 0.06324558705091476, "learning_rate": 0.01, "loss": 2.1313, "step": 7056 }, { "epoch": 0.7252645638549265, "grad_norm": 0.057287219911813736, "learning_rate": 0.01, "loss": 2.1241, "step": 7059 }, { "epoch": 0.7255727935888215, "grad_norm": 0.07684747129678726, "learning_rate": 0.01, "loss": 2.1299, "step": 7062 }, { "epoch": 0.7258810233227165, "grad_norm": 0.10347555577754974, "learning_rate": 0.01, "loss": 2.12, "step": 7065 }, { "epoch": 0.7261892530566115, "grad_norm": 0.06019530072808266, "learning_rate": 0.01, "loss": 2.138, "step": 7068 }, { "epoch": 0.7264974827905065, "grad_norm": 0.04816723242402077, "learning_rate": 0.01, "loss": 2.1161, "step": 7071 }, { "epoch": 0.7268057125244015, "grad_norm": 0.05839864909648895, "learning_rate": 0.01, "loss": 2.136, "step": 7074 }, { "epoch": 0.7271139422582965, "grad_norm": 0.061795271933078766, "learning_rate": 0.01, "loss": 2.1315, "step": 7077 }, { "epoch": 0.7274221719921915, "grad_norm": 0.05736471712589264, "learning_rate": 0.01, "loss": 2.1403, "step": 7080 }, { "epoch": 0.7277304017260865, "grad_norm": 0.059238459914922714, "learning_rate": 0.01, "loss": 2.1101, "step": 7083 }, { "epoch": 0.7280386314599815, "grad_norm": 0.10844148695468903, "learning_rate": 0.01, "loss": 2.1454, "step": 7086 }, { "epoch": 0.7283468611938765, "grad_norm": 0.047568898648023605, "learning_rate": 0.01, "loss": 2.1183, "step": 7089 }, { "epoch": 0.7286550909277715, "grad_norm": 0.05178900063037872, "learning_rate": 0.01, "loss": 2.1346, "step": 7092 }, { "epoch": 0.7289633206616665, "grad_norm": 0.04113532230257988, "learning_rate": 0.01, "loss": 2.0915, "step": 7095 }, { "epoch": 0.7292715503955615, "grad_norm": 0.10488615930080414, "learning_rate": 0.01, "loss": 2.1239, "step": 7098 }, { "epoch": 0.7295797801294565, "grad_norm": 0.13013161718845367, "learning_rate": 0.01, "loss": 2.1251, "step": 7101 }, { "epoch": 0.7298880098633515, "grad_norm": 0.10956915467977524, "learning_rate": 0.01, "loss": 2.1113, "step": 7104 }, { "epoch": 0.7301962395972464, "grad_norm": 0.06996689736843109, "learning_rate": 0.01, "loss": 2.118, "step": 7107 }, { "epoch": 0.7305044693311414, "grad_norm": 0.07773365080356598, "learning_rate": 0.01, "loss": 2.1144, "step": 7110 }, { "epoch": 0.7308126990650364, "grad_norm": 0.06922838091850281, "learning_rate": 0.01, "loss": 2.1148, "step": 7113 }, { "epoch": 0.7311209287989314, "grad_norm": 0.08941454440355301, "learning_rate": 0.01, "loss": 2.1493, "step": 7116 }, { "epoch": 0.7314291585328264, "grad_norm": 0.04264171048998833, "learning_rate": 0.01, "loss": 2.136, "step": 7119 }, { "epoch": 0.7317373882667214, "grad_norm": 0.04473461955785751, "learning_rate": 0.01, "loss": 2.1294, "step": 7122 }, { "epoch": 0.7320456180006164, "grad_norm": 0.0396125465631485, "learning_rate": 0.01, "loss": 2.1439, "step": 7125 }, { "epoch": 0.7323538477345115, "grad_norm": 0.04613679647445679, "learning_rate": 0.01, "loss": 2.1503, "step": 7128 }, { "epoch": 0.7326620774684065, "grad_norm": 0.04897918924689293, "learning_rate": 0.01, "loss": 2.1214, "step": 7131 }, { "epoch": 0.7329703072023015, "grad_norm": 0.05057375133037567, "learning_rate": 0.01, "loss": 2.1112, "step": 7134 }, { "epoch": 0.7332785369361965, "grad_norm": 0.05711055174469948, "learning_rate": 0.01, "loss": 2.102, "step": 7137 }, { "epoch": 0.7335867666700915, "grad_norm": 0.08658434450626373, "learning_rate": 0.01, "loss": 2.1574, "step": 7140 }, { "epoch": 0.7338949964039865, "grad_norm": 0.07044188678264618, "learning_rate": 0.01, "loss": 2.1037, "step": 7143 }, { "epoch": 0.7342032261378815, "grad_norm": 0.03941315785050392, "learning_rate": 0.01, "loss": 2.1369, "step": 7146 }, { "epoch": 0.7345114558717765, "grad_norm": 0.04527783393859863, "learning_rate": 0.01, "loss": 2.1212, "step": 7149 }, { "epoch": 0.7348196856056715, "grad_norm": 0.07909847050905228, "learning_rate": 0.01, "loss": 2.1316, "step": 7152 }, { "epoch": 0.7351279153395665, "grad_norm": 0.12793006002902985, "learning_rate": 0.01, "loss": 2.1254, "step": 7155 }, { "epoch": 0.7354361450734614, "grad_norm": 0.0639350563287735, "learning_rate": 0.01, "loss": 2.1319, "step": 7158 }, { "epoch": 0.7357443748073564, "grad_norm": 0.0342305451631546, "learning_rate": 0.01, "loss": 2.1386, "step": 7161 }, { "epoch": 0.7360526045412514, "grad_norm": 0.049001939594745636, "learning_rate": 0.01, "loss": 2.1485, "step": 7164 }, { "epoch": 0.7363608342751464, "grad_norm": 0.047717638313770294, "learning_rate": 0.01, "loss": 2.1368, "step": 7167 }, { "epoch": 0.7366690640090414, "grad_norm": 0.04402822256088257, "learning_rate": 0.01, "loss": 2.1162, "step": 7170 }, { "epoch": 0.7369772937429364, "grad_norm": 0.06922505795955658, "learning_rate": 0.01, "loss": 2.1279, "step": 7173 }, { "epoch": 0.7372855234768314, "grad_norm": 0.06231709569692612, "learning_rate": 0.01, "loss": 2.0946, "step": 7176 }, { "epoch": 0.7375937532107264, "grad_norm": 0.11480400711297989, "learning_rate": 0.01, "loss": 2.146, "step": 7179 }, { "epoch": 0.7379019829446214, "grad_norm": 0.05144179239869118, "learning_rate": 0.01, "loss": 2.1128, "step": 7182 }, { "epoch": 0.7382102126785164, "grad_norm": 0.05130591616034508, "learning_rate": 0.01, "loss": 2.0964, "step": 7185 }, { "epoch": 0.7385184424124114, "grad_norm": 0.0549122579395771, "learning_rate": 0.01, "loss": 2.165, "step": 7188 }, { "epoch": 0.7388266721463064, "grad_norm": 0.1378844678401947, "learning_rate": 0.01, "loss": 2.1367, "step": 7191 }, { "epoch": 0.7391349018802014, "grad_norm": 0.06231486052274704, "learning_rate": 0.01, "loss": 2.1341, "step": 7194 }, { "epoch": 0.7394431316140964, "grad_norm": 0.10189559310674667, "learning_rate": 0.01, "loss": 2.1161, "step": 7197 }, { "epoch": 0.7397513613479914, "grad_norm": 0.053364284336566925, "learning_rate": 0.01, "loss": 2.1043, "step": 7200 }, { "epoch": 0.7400595910818863, "grad_norm": 0.046057943254709244, "learning_rate": 0.01, "loss": 2.1011, "step": 7203 }, { "epoch": 0.7403678208157813, "grad_norm": 0.04084615036845207, "learning_rate": 0.01, "loss": 2.1253, "step": 7206 }, { "epoch": 0.7406760505496763, "grad_norm": 0.04594961181282997, "learning_rate": 0.01, "loss": 2.1228, "step": 7209 }, { "epoch": 0.7409842802835713, "grad_norm": 0.06608622521162033, "learning_rate": 0.01, "loss": 2.1188, "step": 7212 }, { "epoch": 0.7412925100174663, "grad_norm": 0.125398188829422, "learning_rate": 0.01, "loss": 2.1057, "step": 7215 }, { "epoch": 0.7416007397513613, "grad_norm": 0.08068963885307312, "learning_rate": 0.01, "loss": 2.0947, "step": 7218 }, { "epoch": 0.7419089694852563, "grad_norm": 0.07993921637535095, "learning_rate": 0.01, "loss": 2.1214, "step": 7221 }, { "epoch": 0.7422171992191513, "grad_norm": 0.04969675466418266, "learning_rate": 0.01, "loss": 2.1099, "step": 7224 }, { "epoch": 0.7425254289530463, "grad_norm": 0.054677605628967285, "learning_rate": 0.01, "loss": 2.1229, "step": 7227 }, { "epoch": 0.7428336586869413, "grad_norm": 0.04562999680638313, "learning_rate": 0.01, "loss": 2.1409, "step": 7230 }, { "epoch": 0.7431418884208363, "grad_norm": 0.07618910074234009, "learning_rate": 0.01, "loss": 2.0924, "step": 7233 }, { "epoch": 0.7434501181547313, "grad_norm": 0.14368098974227905, "learning_rate": 0.01, "loss": 2.1348, "step": 7236 }, { "epoch": 0.7437583478886263, "grad_norm": 0.05517590045928955, "learning_rate": 0.01, "loss": 2.116, "step": 7239 }, { "epoch": 0.7440665776225214, "grad_norm": 0.17316390573978424, "learning_rate": 0.01, "loss": 2.1363, "step": 7242 }, { "epoch": 0.7443748073564164, "grad_norm": 0.15268415212631226, "learning_rate": 0.01, "loss": 2.1033, "step": 7245 }, { "epoch": 0.7446830370903114, "grad_norm": 0.06212317943572998, "learning_rate": 0.01, "loss": 2.0971, "step": 7248 }, { "epoch": 0.7449912668242064, "grad_norm": 0.04282272607088089, "learning_rate": 0.01, "loss": 2.1434, "step": 7251 }, { "epoch": 0.7452994965581013, "grad_norm": 0.04305952787399292, "learning_rate": 0.01, "loss": 2.1406, "step": 7254 }, { "epoch": 0.7456077262919963, "grad_norm": 0.048668697476387024, "learning_rate": 0.01, "loss": 2.1303, "step": 7257 }, { "epoch": 0.7459159560258913, "grad_norm": 0.05524542182683945, "learning_rate": 0.01, "loss": 2.1331, "step": 7260 }, { "epoch": 0.7462241857597863, "grad_norm": 0.0438026525080204, "learning_rate": 0.01, "loss": 2.109, "step": 7263 }, { "epoch": 0.7465324154936813, "grad_norm": 0.08154566586017609, "learning_rate": 0.01, "loss": 2.1053, "step": 7266 }, { "epoch": 0.7468406452275763, "grad_norm": 0.11754357814788818, "learning_rate": 0.01, "loss": 2.1298, "step": 7269 }, { "epoch": 0.7471488749614713, "grad_norm": 0.06593465805053711, "learning_rate": 0.01, "loss": 2.1323, "step": 7272 }, { "epoch": 0.7474571046953663, "grad_norm": 0.08065393567085266, "learning_rate": 0.01, "loss": 2.1297, "step": 7275 }, { "epoch": 0.7477653344292613, "grad_norm": 0.10624121129512787, "learning_rate": 0.01, "loss": 2.1175, "step": 7278 }, { "epoch": 0.7480735641631563, "grad_norm": 0.06357972323894501, "learning_rate": 0.01, "loss": 2.164, "step": 7281 }, { "epoch": 0.7483817938970513, "grad_norm": 0.03753754869103432, "learning_rate": 0.01, "loss": 2.1519, "step": 7284 }, { "epoch": 0.7486900236309463, "grad_norm": 0.04756931588053703, "learning_rate": 0.01, "loss": 2.0901, "step": 7287 }, { "epoch": 0.7489982533648413, "grad_norm": 0.0494108609855175, "learning_rate": 0.01, "loss": 2.1474, "step": 7290 }, { "epoch": 0.7493064830987363, "grad_norm": 0.063727006316185, "learning_rate": 0.01, "loss": 2.1425, "step": 7293 }, { "epoch": 0.7496147128326313, "grad_norm": 0.06327082961797714, "learning_rate": 0.01, "loss": 2.1346, "step": 7296 }, { "epoch": 0.7499229425665263, "grad_norm": 0.10383486747741699, "learning_rate": 0.01, "loss": 2.1245, "step": 7299 }, { "epoch": 0.7502311723004212, "grad_norm": 0.10473886877298355, "learning_rate": 0.01, "loss": 2.1302, "step": 7302 }, { "epoch": 0.7505394020343162, "grad_norm": 0.04905236139893532, "learning_rate": 0.01, "loss": 2.119, "step": 7305 }, { "epoch": 0.7508476317682112, "grad_norm": 0.04571664705872536, "learning_rate": 0.01, "loss": 2.1505, "step": 7308 }, { "epoch": 0.7511558615021062, "grad_norm": 0.06305412203073502, "learning_rate": 0.01, "loss": 2.1389, "step": 7311 }, { "epoch": 0.7514640912360012, "grad_norm": 0.05825283005833626, "learning_rate": 0.01, "loss": 2.1361, "step": 7314 }, { "epoch": 0.7517723209698962, "grad_norm": 0.059476301074028015, "learning_rate": 0.01, "loss": 2.1191, "step": 7317 }, { "epoch": 0.7520805507037912, "grad_norm": 0.042396873235702515, "learning_rate": 0.01, "loss": 2.1309, "step": 7320 }, { "epoch": 0.7523887804376862, "grad_norm": 0.04611228406429291, "learning_rate": 0.01, "loss": 2.1438, "step": 7323 }, { "epoch": 0.7526970101715812, "grad_norm": 0.09147686511278152, "learning_rate": 0.01, "loss": 2.1288, "step": 7326 }, { "epoch": 0.7530052399054762, "grad_norm": 0.08085332810878754, "learning_rate": 0.01, "loss": 2.1427, "step": 7329 }, { "epoch": 0.7533134696393712, "grad_norm": 0.03873496130108833, "learning_rate": 0.01, "loss": 2.1257, "step": 7332 }, { "epoch": 0.7536216993732662, "grad_norm": 0.05457824096083641, "learning_rate": 0.01, "loss": 2.1373, "step": 7335 }, { "epoch": 0.7539299291071612, "grad_norm": 0.049249522387981415, "learning_rate": 0.01, "loss": 2.1185, "step": 7338 }, { "epoch": 0.7542381588410562, "grad_norm": 0.07082841545343399, "learning_rate": 0.01, "loss": 2.1157, "step": 7341 }, { "epoch": 0.7545463885749512, "grad_norm": 0.046108178794384, "learning_rate": 0.01, "loss": 2.1238, "step": 7344 }, { "epoch": 0.7548546183088461, "grad_norm": 0.05572620406746864, "learning_rate": 0.01, "loss": 2.1445, "step": 7347 }, { "epoch": 0.7551628480427411, "grad_norm": 0.1091703474521637, "learning_rate": 0.01, "loss": 2.1281, "step": 7350 }, { "epoch": 0.7554710777766361, "grad_norm": 0.09372757375240326, "learning_rate": 0.01, "loss": 2.1231, "step": 7353 }, { "epoch": 0.7557793075105311, "grad_norm": 0.0482059009373188, "learning_rate": 0.01, "loss": 2.1003, "step": 7356 }, { "epoch": 0.7560875372444262, "grad_norm": 0.041941821575164795, "learning_rate": 0.01, "loss": 2.1382, "step": 7359 }, { "epoch": 0.7563957669783212, "grad_norm": 0.07122782617807388, "learning_rate": 0.01, "loss": 2.1419, "step": 7362 }, { "epoch": 0.7567039967122162, "grad_norm": 0.06854265183210373, "learning_rate": 0.01, "loss": 2.1328, "step": 7365 }, { "epoch": 0.7570122264461112, "grad_norm": 0.10073423385620117, "learning_rate": 0.01, "loss": 2.1322, "step": 7368 }, { "epoch": 0.7573204561800062, "grad_norm": 0.038869407027959824, "learning_rate": 0.01, "loss": 2.1273, "step": 7371 }, { "epoch": 0.7576286859139012, "grad_norm": 0.09483812749385834, "learning_rate": 0.01, "loss": 2.1465, "step": 7374 }, { "epoch": 0.7579369156477962, "grad_norm": 0.07226487994194031, "learning_rate": 0.01, "loss": 2.1386, "step": 7377 }, { "epoch": 0.7582451453816912, "grad_norm": 0.05041668191552162, "learning_rate": 0.01, "loss": 2.1249, "step": 7380 }, { "epoch": 0.7585533751155862, "grad_norm": 0.03839525580406189, "learning_rate": 0.01, "loss": 2.1125, "step": 7383 }, { "epoch": 0.7588616048494812, "grad_norm": 0.047746479511260986, "learning_rate": 0.01, "loss": 2.1027, "step": 7386 }, { "epoch": 0.7591698345833762, "grad_norm": 0.05524810031056404, "learning_rate": 0.01, "loss": 2.166, "step": 7389 }, { "epoch": 0.7594780643172712, "grad_norm": 0.050045181065797806, "learning_rate": 0.01, "loss": 2.1411, "step": 7392 }, { "epoch": 0.7597862940511662, "grad_norm": 0.09187906980514526, "learning_rate": 0.01, "loss": 2.13, "step": 7395 }, { "epoch": 0.7600945237850611, "grad_norm": 0.15085643529891968, "learning_rate": 0.01, "loss": 2.1198, "step": 7398 }, { "epoch": 0.7604027535189561, "grad_norm": 0.05295104160904884, "learning_rate": 0.01, "loss": 2.1067, "step": 7401 }, { "epoch": 0.7607109832528511, "grad_norm": 0.03696104511618614, "learning_rate": 0.01, "loss": 2.1159, "step": 7404 }, { "epoch": 0.7610192129867461, "grad_norm": 0.04209265485405922, "learning_rate": 0.01, "loss": 2.1243, "step": 7407 }, { "epoch": 0.7613274427206411, "grad_norm": 0.056943077594041824, "learning_rate": 0.01, "loss": 2.1038, "step": 7410 }, { "epoch": 0.7616356724545361, "grad_norm": 0.12749402225017548, "learning_rate": 0.01, "loss": 2.1087, "step": 7413 }, { "epoch": 0.7619439021884311, "grad_norm": 0.09119253605604172, "learning_rate": 0.01, "loss": 2.1252, "step": 7416 }, { "epoch": 0.7622521319223261, "grad_norm": 0.04251190647482872, "learning_rate": 0.01, "loss": 2.1384, "step": 7419 }, { "epoch": 0.7625603616562211, "grad_norm": 0.04010685533285141, "learning_rate": 0.01, "loss": 2.1449, "step": 7422 }, { "epoch": 0.7628685913901161, "grad_norm": 0.05524475499987602, "learning_rate": 0.01, "loss": 2.0841, "step": 7425 }, { "epoch": 0.7631768211240111, "grad_norm": 0.10250036418437958, "learning_rate": 0.01, "loss": 2.0827, "step": 7428 }, { "epoch": 0.7634850508579061, "grad_norm": 0.0748668685555458, "learning_rate": 0.01, "loss": 2.128, "step": 7431 }, { "epoch": 0.7637932805918011, "grad_norm": 0.08616036176681519, "learning_rate": 0.01, "loss": 2.1087, "step": 7434 }, { "epoch": 0.7641015103256961, "grad_norm": 0.09491308033466339, "learning_rate": 0.01, "loss": 2.1247, "step": 7437 }, { "epoch": 0.7644097400595911, "grad_norm": 0.08575759083032608, "learning_rate": 0.01, "loss": 2.1419, "step": 7440 }, { "epoch": 0.764717969793486, "grad_norm": 0.04314613714814186, "learning_rate": 0.01, "loss": 2.1462, "step": 7443 }, { "epoch": 0.765026199527381, "grad_norm": 0.035719119012355804, "learning_rate": 0.01, "loss": 2.1337, "step": 7446 }, { "epoch": 0.765334429261276, "grad_norm": 0.04597650095820427, "learning_rate": 0.01, "loss": 2.1258, "step": 7449 }, { "epoch": 0.765642658995171, "grad_norm": 0.10039210319519043, "learning_rate": 0.01, "loss": 2.1238, "step": 7452 }, { "epoch": 0.765950888729066, "grad_norm": 0.07157409191131592, "learning_rate": 0.01, "loss": 2.1349, "step": 7455 }, { "epoch": 0.766259118462961, "grad_norm": 0.09058292210102081, "learning_rate": 0.01, "loss": 2.1232, "step": 7458 }, { "epoch": 0.766567348196856, "grad_norm": 0.06009940057992935, "learning_rate": 0.01, "loss": 2.1386, "step": 7461 }, { "epoch": 0.766875577930751, "grad_norm": 0.1165439561009407, "learning_rate": 0.01, "loss": 2.1293, "step": 7464 }, { "epoch": 0.767183807664646, "grad_norm": 0.06138407811522484, "learning_rate": 0.01, "loss": 2.1066, "step": 7467 }, { "epoch": 0.767492037398541, "grad_norm": 0.06058945506811142, "learning_rate": 0.01, "loss": 2.1044, "step": 7470 }, { "epoch": 0.767800267132436, "grad_norm": 0.06741827726364136, "learning_rate": 0.01, "loss": 2.1191, "step": 7473 }, { "epoch": 0.7681084968663311, "grad_norm": 0.047926925122737885, "learning_rate": 0.01, "loss": 2.1333, "step": 7476 }, { "epoch": 0.7684167266002261, "grad_norm": 0.06450969725847244, "learning_rate": 0.01, "loss": 2.1061, "step": 7479 }, { "epoch": 0.7687249563341211, "grad_norm": 0.11133641749620438, "learning_rate": 0.01, "loss": 2.1129, "step": 7482 }, { "epoch": 0.7690331860680161, "grad_norm": 0.049795158207416534, "learning_rate": 0.01, "loss": 2.136, "step": 7485 }, { "epoch": 0.7693414158019111, "grad_norm": 0.06083859130740166, "learning_rate": 0.01, "loss": 2.1459, "step": 7488 }, { "epoch": 0.7696496455358061, "grad_norm": 0.04686833918094635, "learning_rate": 0.01, "loss": 2.1073, "step": 7491 }, { "epoch": 0.769957875269701, "grad_norm": 0.05475611612200737, "learning_rate": 0.01, "loss": 2.1167, "step": 7494 }, { "epoch": 0.770266105003596, "grad_norm": 0.04683786630630493, "learning_rate": 0.01, "loss": 2.1491, "step": 7497 }, { "epoch": 0.770574334737491, "grad_norm": 0.10841275751590729, "learning_rate": 0.01, "loss": 2.0967, "step": 7500 }, { "epoch": 0.770882564471386, "grad_norm": 0.09716581553220749, "learning_rate": 0.01, "loss": 2.1133, "step": 7503 }, { "epoch": 0.771190794205281, "grad_norm": 0.04913085699081421, "learning_rate": 0.01, "loss": 2.1401, "step": 7506 }, { "epoch": 0.771499023939176, "grad_norm": 0.04710682854056358, "learning_rate": 0.01, "loss": 2.109, "step": 7509 }, { "epoch": 0.771807253673071, "grad_norm": 0.054945673793554306, "learning_rate": 0.01, "loss": 2.1169, "step": 7512 }, { "epoch": 0.772115483406966, "grad_norm": 0.04265155643224716, "learning_rate": 0.01, "loss": 2.1156, "step": 7515 }, { "epoch": 0.772423713140861, "grad_norm": 0.03544042259454727, "learning_rate": 0.01, "loss": 2.1172, "step": 7518 }, { "epoch": 0.772731942874756, "grad_norm": 0.05048484355211258, "learning_rate": 0.01, "loss": 2.1015, "step": 7521 }, { "epoch": 0.773040172608651, "grad_norm": 0.14160272479057312, "learning_rate": 0.01, "loss": 2.1475, "step": 7524 }, { "epoch": 0.773348402342546, "grad_norm": 0.08693049848079681, "learning_rate": 0.01, "loss": 2.1266, "step": 7527 }, { "epoch": 0.773656632076441, "grad_norm": 0.06437800824642181, "learning_rate": 0.01, "loss": 2.1273, "step": 7530 }, { "epoch": 0.773964861810336, "grad_norm": 0.04450656846165657, "learning_rate": 0.01, "loss": 2.1192, "step": 7533 }, { "epoch": 0.774273091544231, "grad_norm": 0.05369933694601059, "learning_rate": 0.01, "loss": 2.1264, "step": 7536 }, { "epoch": 0.774581321278126, "grad_norm": 0.04080953076481819, "learning_rate": 0.01, "loss": 2.1319, "step": 7539 }, { "epoch": 0.774889551012021, "grad_norm": 0.03433745354413986, "learning_rate": 0.01, "loss": 2.1024, "step": 7542 }, { "epoch": 0.7751977807459159, "grad_norm": 0.1574896275997162, "learning_rate": 0.01, "loss": 2.1182, "step": 7545 }, { "epoch": 0.7755060104798109, "grad_norm": 0.1207810789346695, "learning_rate": 0.01, "loss": 2.1052, "step": 7548 }, { "epoch": 0.7758142402137059, "grad_norm": 0.07270894944667816, "learning_rate": 0.01, "loss": 2.1331, "step": 7551 }, { "epoch": 0.7761224699476009, "grad_norm": 0.07062831521034241, "learning_rate": 0.01, "loss": 2.099, "step": 7554 }, { "epoch": 0.7764306996814959, "grad_norm": 0.04142964631319046, "learning_rate": 0.01, "loss": 2.1192, "step": 7557 }, { "epoch": 0.7767389294153909, "grad_norm": 0.04645151272416115, "learning_rate": 0.01, "loss": 2.1117, "step": 7560 }, { "epoch": 0.7770471591492859, "grad_norm": 0.046251073479652405, "learning_rate": 0.01, "loss": 2.1399, "step": 7563 }, { "epoch": 0.7773553888831809, "grad_norm": 0.07185769826173782, "learning_rate": 0.01, "loss": 2.1261, "step": 7566 }, { "epoch": 0.7776636186170759, "grad_norm": 0.045216575264930725, "learning_rate": 0.01, "loss": 2.1302, "step": 7569 }, { "epoch": 0.7779718483509709, "grad_norm": 0.04923580586910248, "learning_rate": 0.01, "loss": 2.1482, "step": 7572 }, { "epoch": 0.7782800780848659, "grad_norm": 0.06434139609336853, "learning_rate": 0.01, "loss": 2.1325, "step": 7575 }, { "epoch": 0.7785883078187609, "grad_norm": 0.11186740547418594, "learning_rate": 0.01, "loss": 2.1168, "step": 7578 }, { "epoch": 0.7788965375526559, "grad_norm": 0.06694278120994568, "learning_rate": 0.01, "loss": 2.15, "step": 7581 }, { "epoch": 0.7792047672865509, "grad_norm": 0.05431769788265228, "learning_rate": 0.01, "loss": 2.1156, "step": 7584 }, { "epoch": 0.7795129970204459, "grad_norm": 0.05853963643312454, "learning_rate": 0.01, "loss": 2.1145, "step": 7587 }, { "epoch": 0.779821226754341, "grad_norm": 0.04059399664402008, "learning_rate": 0.01, "loss": 2.1373, "step": 7590 }, { "epoch": 0.780129456488236, "grad_norm": 0.06444236636161804, "learning_rate": 0.01, "loss": 2.1167, "step": 7593 }, { "epoch": 0.7804376862221309, "grad_norm": 0.09885245561599731, "learning_rate": 0.01, "loss": 2.1146, "step": 7596 }, { "epoch": 0.7807459159560259, "grad_norm": 0.08536794036626816, "learning_rate": 0.01, "loss": 2.1282, "step": 7599 }, { "epoch": 0.7810541456899209, "grad_norm": 0.04299011081457138, "learning_rate": 0.01, "loss": 2.103, "step": 7602 }, { "epoch": 0.7813623754238159, "grad_norm": 0.060757700353860855, "learning_rate": 0.01, "loss": 2.0923, "step": 7605 }, { "epoch": 0.7816706051577109, "grad_norm": 0.037401244044303894, "learning_rate": 0.01, "loss": 2.1343, "step": 7608 }, { "epoch": 0.7819788348916059, "grad_norm": 0.12264932692050934, "learning_rate": 0.01, "loss": 2.1193, "step": 7611 }, { "epoch": 0.7822870646255009, "grad_norm": 0.052691470831632614, "learning_rate": 0.01, "loss": 2.1097, "step": 7614 }, { "epoch": 0.7825952943593959, "grad_norm": 0.05509025603532791, "learning_rate": 0.01, "loss": 2.1208, "step": 7617 }, { "epoch": 0.7829035240932909, "grad_norm": 0.10352631658315659, "learning_rate": 0.01, "loss": 2.1277, "step": 7620 }, { "epoch": 0.7832117538271859, "grad_norm": 0.05865751951932907, "learning_rate": 0.01, "loss": 2.138, "step": 7623 }, { "epoch": 0.7835199835610809, "grad_norm": 0.09445837885141373, "learning_rate": 0.01, "loss": 2.1131, "step": 7626 }, { "epoch": 0.7838282132949759, "grad_norm": 0.11066542565822601, "learning_rate": 0.01, "loss": 2.1315, "step": 7629 }, { "epoch": 0.7841364430288709, "grad_norm": 0.05489170923829079, "learning_rate": 0.01, "loss": 2.1264, "step": 7632 }, { "epoch": 0.7844446727627659, "grad_norm": 0.06804061681032181, "learning_rate": 0.01, "loss": 2.1491, "step": 7635 }, { "epoch": 0.7847529024966609, "grad_norm": 0.07411237061023712, "learning_rate": 0.01, "loss": 2.126, "step": 7638 }, { "epoch": 0.7850611322305558, "grad_norm": 0.050356972962617874, "learning_rate": 0.01, "loss": 2.1237, "step": 7641 }, { "epoch": 0.7853693619644508, "grad_norm": 0.06125912442803383, "learning_rate": 0.01, "loss": 2.1328, "step": 7644 }, { "epoch": 0.7856775916983458, "grad_norm": 0.05983618274331093, "learning_rate": 0.01, "loss": 2.1152, "step": 7647 }, { "epoch": 0.7859858214322408, "grad_norm": 0.04065684601664543, "learning_rate": 0.01, "loss": 2.1213, "step": 7650 }, { "epoch": 0.7862940511661358, "grad_norm": 0.05535745993256569, "learning_rate": 0.01, "loss": 2.1106, "step": 7653 }, { "epoch": 0.7866022809000308, "grad_norm": 0.09727519005537033, "learning_rate": 0.01, "loss": 2.1202, "step": 7656 }, { "epoch": 0.7869105106339258, "grad_norm": 0.07764584571123123, "learning_rate": 0.01, "loss": 2.1181, "step": 7659 }, { "epoch": 0.7872187403678208, "grad_norm": 0.04933121055364609, "learning_rate": 0.01, "loss": 2.1217, "step": 7662 }, { "epoch": 0.7875269701017158, "grad_norm": 0.12199501693248749, "learning_rate": 0.01, "loss": 2.1412, "step": 7665 }, { "epoch": 0.7878351998356108, "grad_norm": 0.14431309700012207, "learning_rate": 0.01, "loss": 2.1249, "step": 7668 }, { "epoch": 0.7881434295695058, "grad_norm": 0.07583998888731003, "learning_rate": 0.01, "loss": 2.139, "step": 7671 }, { "epoch": 0.7884516593034008, "grad_norm": 0.10426465421915054, "learning_rate": 0.01, "loss": 2.1053, "step": 7674 }, { "epoch": 0.7887598890372958, "grad_norm": 0.06411170959472656, "learning_rate": 0.01, "loss": 2.1105, "step": 7677 }, { "epoch": 0.7890681187711908, "grad_norm": 0.07436025142669678, "learning_rate": 0.01, "loss": 2.1301, "step": 7680 }, { "epoch": 0.7893763485050858, "grad_norm": 0.10409426689147949, "learning_rate": 0.01, "loss": 2.1319, "step": 7683 }, { "epoch": 0.7896845782389807, "grad_norm": 0.05232664570212364, "learning_rate": 0.01, "loss": 2.1458, "step": 7686 }, { "epoch": 0.7899928079728757, "grad_norm": 0.06705309450626373, "learning_rate": 0.01, "loss": 2.1231, "step": 7689 }, { "epoch": 0.7903010377067707, "grad_norm": 0.04422546550631523, "learning_rate": 0.01, "loss": 2.0836, "step": 7692 }, { "epoch": 0.7906092674406657, "grad_norm": 0.04316714033484459, "learning_rate": 0.01, "loss": 2.1117, "step": 7695 }, { "epoch": 0.7909174971745607, "grad_norm": 0.058282140642404556, "learning_rate": 0.01, "loss": 2.0904, "step": 7698 }, { "epoch": 0.7912257269084557, "grad_norm": 0.07676571607589722, "learning_rate": 0.01, "loss": 2.1402, "step": 7701 }, { "epoch": 0.7915339566423507, "grad_norm": 0.07258665561676025, "learning_rate": 0.01, "loss": 2.1458, "step": 7704 }, { "epoch": 0.7918421863762458, "grad_norm": 0.04850257560610771, "learning_rate": 0.01, "loss": 2.0886, "step": 7707 }, { "epoch": 0.7921504161101408, "grad_norm": 0.05658482015132904, "learning_rate": 0.01, "loss": 2.1174, "step": 7710 }, { "epoch": 0.7924586458440358, "grad_norm": 0.06475166231393814, "learning_rate": 0.01, "loss": 2.0995, "step": 7713 }, { "epoch": 0.7927668755779308, "grad_norm": 0.10428962856531143, "learning_rate": 0.01, "loss": 2.109, "step": 7716 }, { "epoch": 0.7930751053118258, "grad_norm": 0.04227283224463463, "learning_rate": 0.01, "loss": 2.1124, "step": 7719 }, { "epoch": 0.7933833350457208, "grad_norm": 0.0594823881983757, "learning_rate": 0.01, "loss": 2.0944, "step": 7722 }, { "epoch": 0.7936915647796158, "grad_norm": 0.08695527911186218, "learning_rate": 0.01, "loss": 2.1077, "step": 7725 }, { "epoch": 0.7939997945135108, "grad_norm": 0.06003952398896217, "learning_rate": 0.01, "loss": 2.088, "step": 7728 }, { "epoch": 0.7943080242474058, "grad_norm": 0.058509476482868195, "learning_rate": 0.01, "loss": 2.1471, "step": 7731 }, { "epoch": 0.7946162539813008, "grad_norm": 0.048057131469249725, "learning_rate": 0.01, "loss": 2.1252, "step": 7734 }, { "epoch": 0.7949244837151958, "grad_norm": 0.11144626140594482, "learning_rate": 0.01, "loss": 2.1209, "step": 7737 }, { "epoch": 0.7952327134490907, "grad_norm": 0.041008081287145615, "learning_rate": 0.01, "loss": 2.1139, "step": 7740 }, { "epoch": 0.7955409431829857, "grad_norm": 0.04088988155126572, "learning_rate": 0.01, "loss": 2.0927, "step": 7743 }, { "epoch": 0.7958491729168807, "grad_norm": 0.1495555192232132, "learning_rate": 0.01, "loss": 2.0977, "step": 7746 }, { "epoch": 0.7961574026507757, "grad_norm": 0.042645204812288284, "learning_rate": 0.01, "loss": 2.1021, "step": 7749 }, { "epoch": 0.7964656323846707, "grad_norm": 0.04671596363186836, "learning_rate": 0.01, "loss": 2.1015, "step": 7752 }, { "epoch": 0.7967738621185657, "grad_norm": 0.07249152660369873, "learning_rate": 0.01, "loss": 2.1278, "step": 7755 }, { "epoch": 0.7970820918524607, "grad_norm": 0.05848756060004234, "learning_rate": 0.01, "loss": 2.1168, "step": 7758 }, { "epoch": 0.7973903215863557, "grad_norm": 0.05428781732916832, "learning_rate": 0.01, "loss": 2.1228, "step": 7761 }, { "epoch": 0.7976985513202507, "grad_norm": 0.04751111939549446, "learning_rate": 0.01, "loss": 2.1178, "step": 7764 }, { "epoch": 0.7980067810541457, "grad_norm": 0.08653240650892258, "learning_rate": 0.01, "loss": 2.1081, "step": 7767 }, { "epoch": 0.7983150107880407, "grad_norm": 0.04038892313838005, "learning_rate": 0.01, "loss": 2.1028, "step": 7770 }, { "epoch": 0.7986232405219357, "grad_norm": 0.05703849345445633, "learning_rate": 0.01, "loss": 2.1249, "step": 7773 }, { "epoch": 0.7989314702558307, "grad_norm": 0.06425055861473083, "learning_rate": 0.01, "loss": 2.1291, "step": 7776 }, { "epoch": 0.7992396999897257, "grad_norm": 0.05537475273013115, "learning_rate": 0.01, "loss": 2.1122, "step": 7779 }, { "epoch": 0.7995479297236207, "grad_norm": 0.05172963812947273, "learning_rate": 0.01, "loss": 2.1218, "step": 7782 }, { "epoch": 0.7998561594575156, "grad_norm": 0.05907023698091507, "learning_rate": 0.01, "loss": 2.1041, "step": 7785 }, { "epoch": 0.8001643891914106, "grad_norm": 0.10618621110916138, "learning_rate": 0.01, "loss": 2.1266, "step": 7788 }, { "epoch": 0.8004726189253056, "grad_norm": 0.06189849600195885, "learning_rate": 0.01, "loss": 2.1327, "step": 7791 }, { "epoch": 0.8007808486592006, "grad_norm": 0.10624901950359344, "learning_rate": 0.01, "loss": 2.0943, "step": 7794 }, { "epoch": 0.8010890783930956, "grad_norm": 0.04061825945973396, "learning_rate": 0.01, "loss": 2.0859, "step": 7797 }, { "epoch": 0.8013973081269906, "grad_norm": 0.04402461647987366, "learning_rate": 0.01, "loss": 2.1303, "step": 7800 }, { "epoch": 0.8017055378608856, "grad_norm": 0.05029004439711571, "learning_rate": 0.01, "loss": 2.1224, "step": 7803 }, { "epoch": 0.8020137675947806, "grad_norm": 0.055786702781915665, "learning_rate": 0.01, "loss": 2.1296, "step": 7806 }, { "epoch": 0.8023219973286756, "grad_norm": 0.11740477383136749, "learning_rate": 0.01, "loss": 2.1222, "step": 7809 }, { "epoch": 0.8026302270625706, "grad_norm": 0.10261218994855881, "learning_rate": 0.01, "loss": 2.1405, "step": 7812 }, { "epoch": 0.8029384567964656, "grad_norm": 0.05233708769083023, "learning_rate": 0.01, "loss": 2.1118, "step": 7815 }, { "epoch": 0.8032466865303606, "grad_norm": 0.04390858858823776, "learning_rate": 0.01, "loss": 2.1299, "step": 7818 }, { "epoch": 0.8035549162642556, "grad_norm": 0.05893026292324066, "learning_rate": 0.01, "loss": 2.1184, "step": 7821 }, { "epoch": 0.8038631459981507, "grad_norm": 0.06398338079452515, "learning_rate": 0.01, "loss": 2.1057, "step": 7824 }, { "epoch": 0.8041713757320457, "grad_norm": 0.07129772752523422, "learning_rate": 0.01, "loss": 2.1056, "step": 7827 }, { "epoch": 0.8044796054659407, "grad_norm": 0.07481534779071808, "learning_rate": 0.01, "loss": 2.1272, "step": 7830 }, { "epoch": 0.8047878351998357, "grad_norm": 0.049200594425201416, "learning_rate": 0.01, "loss": 2.0942, "step": 7833 }, { "epoch": 0.8050960649337306, "grad_norm": 0.05124384164810181, "learning_rate": 0.01, "loss": 2.0859, "step": 7836 }, { "epoch": 0.8054042946676256, "grad_norm": 0.07997792959213257, "learning_rate": 0.01, "loss": 2.1412, "step": 7839 }, { "epoch": 0.8057125244015206, "grad_norm": 0.12280064076185226, "learning_rate": 0.01, "loss": 2.0826, "step": 7842 }, { "epoch": 0.8060207541354156, "grad_norm": 0.05292202904820442, "learning_rate": 0.01, "loss": 2.0965, "step": 7845 }, { "epoch": 0.8063289838693106, "grad_norm": 0.04903187230229378, "learning_rate": 0.01, "loss": 2.0911, "step": 7848 }, { "epoch": 0.8066372136032056, "grad_norm": 0.06882268935441971, "learning_rate": 0.01, "loss": 2.108, "step": 7851 }, { "epoch": 0.8069454433371006, "grad_norm": 0.06937083601951599, "learning_rate": 0.01, "loss": 2.1234, "step": 7854 }, { "epoch": 0.8072536730709956, "grad_norm": 0.10075647383928299, "learning_rate": 0.01, "loss": 2.0983, "step": 7857 }, { "epoch": 0.8075619028048906, "grad_norm": 0.07185733318328857, "learning_rate": 0.01, "loss": 2.0998, "step": 7860 }, { "epoch": 0.8078701325387856, "grad_norm": 0.07266184687614441, "learning_rate": 0.01, "loss": 2.1056, "step": 7863 }, { "epoch": 0.8081783622726806, "grad_norm": 0.05049808695912361, "learning_rate": 0.01, "loss": 2.1126, "step": 7866 }, { "epoch": 0.8084865920065756, "grad_norm": 0.07260838896036148, "learning_rate": 0.01, "loss": 2.1311, "step": 7869 }, { "epoch": 0.8087948217404706, "grad_norm": 0.0659325122833252, "learning_rate": 0.01, "loss": 2.1317, "step": 7872 }, { "epoch": 0.8091030514743656, "grad_norm": 0.056960709393024445, "learning_rate": 0.01, "loss": 2.0988, "step": 7875 }, { "epoch": 0.8094112812082606, "grad_norm": 0.1266620010137558, "learning_rate": 0.01, "loss": 2.1274, "step": 7878 }, { "epoch": 0.8097195109421556, "grad_norm": 0.05951874330639839, "learning_rate": 0.01, "loss": 2.1342, "step": 7881 }, { "epoch": 0.8100277406760505, "grad_norm": 0.06081915274262428, "learning_rate": 0.01, "loss": 2.1036, "step": 7884 }, { "epoch": 0.8103359704099455, "grad_norm": 0.07136547565460205, "learning_rate": 0.01, "loss": 2.1067, "step": 7887 }, { "epoch": 0.8106442001438405, "grad_norm": 0.08835722506046295, "learning_rate": 0.01, "loss": 2.1123, "step": 7890 }, { "epoch": 0.8109524298777355, "grad_norm": 0.04469553008675575, "learning_rate": 0.01, "loss": 2.1117, "step": 7893 }, { "epoch": 0.8112606596116305, "grad_norm": 0.042171087116003036, "learning_rate": 0.01, "loss": 2.0875, "step": 7896 }, { "epoch": 0.8115688893455255, "grad_norm": 0.0847015529870987, "learning_rate": 0.01, "loss": 2.0998, "step": 7899 }, { "epoch": 0.8118771190794205, "grad_norm": 0.09157509356737137, "learning_rate": 0.01, "loss": 2.121, "step": 7902 }, { "epoch": 0.8121853488133155, "grad_norm": 0.06001126766204834, "learning_rate": 0.01, "loss": 2.1441, "step": 7905 }, { "epoch": 0.8124935785472105, "grad_norm": 0.03552449122071266, "learning_rate": 0.01, "loss": 2.1371, "step": 7908 }, { "epoch": 0.8128018082811055, "grad_norm": 0.034304428845644, "learning_rate": 0.01, "loss": 2.1066, "step": 7911 }, { "epoch": 0.8131100380150005, "grad_norm": 0.04897907376289368, "learning_rate": 0.01, "loss": 2.1054, "step": 7914 }, { "epoch": 0.8134182677488955, "grad_norm": 0.06674344837665558, "learning_rate": 0.01, "loss": 2.1156, "step": 7917 }, { "epoch": 0.8137264974827905, "grad_norm": 0.06437379866838455, "learning_rate": 0.01, "loss": 2.1189, "step": 7920 }, { "epoch": 0.8140347272166855, "grad_norm": 0.06402087956666946, "learning_rate": 0.01, "loss": 2.1111, "step": 7923 }, { "epoch": 0.8143429569505805, "grad_norm": 0.11063557863235474, "learning_rate": 0.01, "loss": 2.1131, "step": 7926 }, { "epoch": 0.8146511866844754, "grad_norm": 0.10625256597995758, "learning_rate": 0.01, "loss": 2.113, "step": 7929 }, { "epoch": 0.8149594164183704, "grad_norm": 0.0682268813252449, "learning_rate": 0.01, "loss": 2.0929, "step": 7932 }, { "epoch": 0.8152676461522654, "grad_norm": 0.08721883594989777, "learning_rate": 0.01, "loss": 2.0878, "step": 7935 }, { "epoch": 0.8155758758861605, "grad_norm": 0.07372716814279556, "learning_rate": 0.01, "loss": 2.1173, "step": 7938 }, { "epoch": 0.8158841056200555, "grad_norm": 0.049299102276563644, "learning_rate": 0.01, "loss": 2.1172, "step": 7941 }, { "epoch": 0.8161923353539505, "grad_norm": 0.06552339345216751, "learning_rate": 0.01, "loss": 2.1035, "step": 7944 }, { "epoch": 0.8165005650878455, "grad_norm": 0.08362871408462524, "learning_rate": 0.01, "loss": 2.0942, "step": 7947 }, { "epoch": 0.8168087948217405, "grad_norm": 0.07610680162906647, "learning_rate": 0.01, "loss": 2.1026, "step": 7950 }, { "epoch": 0.8171170245556355, "grad_norm": 0.058830149471759796, "learning_rate": 0.01, "loss": 2.121, "step": 7953 }, { "epoch": 0.8174252542895305, "grad_norm": 0.10281010717153549, "learning_rate": 0.01, "loss": 2.1084, "step": 7956 }, { "epoch": 0.8177334840234255, "grad_norm": 0.04509102553129196, "learning_rate": 0.01, "loss": 2.0917, "step": 7959 }, { "epoch": 0.8180417137573205, "grad_norm": 0.034059979021549225, "learning_rate": 0.01, "loss": 2.1286, "step": 7962 }, { "epoch": 0.8183499434912155, "grad_norm": 0.09370562434196472, "learning_rate": 0.01, "loss": 2.1298, "step": 7965 }, { "epoch": 0.8186581732251105, "grad_norm": 0.09386254847049713, "learning_rate": 0.01, "loss": 2.1018, "step": 7968 }, { "epoch": 0.8189664029590055, "grad_norm": 0.1801362931728363, "learning_rate": 0.01, "loss": 2.1125, "step": 7971 }, { "epoch": 0.8192746326929005, "grad_norm": 0.12590090930461884, "learning_rate": 0.01, "loss": 2.1145, "step": 7974 }, { "epoch": 0.8195828624267955, "grad_norm": 0.09913074970245361, "learning_rate": 0.01, "loss": 2.1049, "step": 7977 }, { "epoch": 0.8198910921606904, "grad_norm": 0.05249069631099701, "learning_rate": 0.01, "loss": 2.1348, "step": 7980 }, { "epoch": 0.8201993218945854, "grad_norm": 0.05334639549255371, "learning_rate": 0.01, "loss": 2.0952, "step": 7983 }, { "epoch": 0.8205075516284804, "grad_norm": 0.03963373601436615, "learning_rate": 0.01, "loss": 2.1133, "step": 7986 }, { "epoch": 0.8208157813623754, "grad_norm": 0.03334924206137657, "learning_rate": 0.01, "loss": 2.1156, "step": 7989 }, { "epoch": 0.8211240110962704, "grad_norm": 0.0628419816493988, "learning_rate": 0.01, "loss": 2.1298, "step": 7992 }, { "epoch": 0.8214322408301654, "grad_norm": 0.07143758237361908, "learning_rate": 0.01, "loss": 2.0891, "step": 7995 }, { "epoch": 0.8217404705640604, "grad_norm": 0.06662650406360626, "learning_rate": 0.01, "loss": 2.0976, "step": 7998 }, { "epoch": 0.8220487002979554, "grad_norm": 0.10575726628303528, "learning_rate": 0.01, "loss": 2.0946, "step": 8001 }, { "epoch": 0.8223569300318504, "grad_norm": 0.056455157697200775, "learning_rate": 0.01, "loss": 2.0924, "step": 8004 }, { "epoch": 0.8226651597657454, "grad_norm": 0.10326797515153885, "learning_rate": 0.01, "loss": 2.0823, "step": 8007 }, { "epoch": 0.8229733894996404, "grad_norm": 0.08464314043521881, "learning_rate": 0.01, "loss": 2.1274, "step": 8010 }, { "epoch": 0.8232816192335354, "grad_norm": 0.052144117653369904, "learning_rate": 0.01, "loss": 2.0952, "step": 8013 }, { "epoch": 0.8235898489674304, "grad_norm": 0.05464213341474533, "learning_rate": 0.01, "loss": 2.1117, "step": 8016 }, { "epoch": 0.8238980787013254, "grad_norm": 0.06700276583433151, "learning_rate": 0.01, "loss": 2.1289, "step": 8019 }, { "epoch": 0.8242063084352204, "grad_norm": 0.05322539806365967, "learning_rate": 0.01, "loss": 2.1346, "step": 8022 }, { "epoch": 0.8245145381691154, "grad_norm": 0.040953267365694046, "learning_rate": 0.01, "loss": 2.1035, "step": 8025 }, { "epoch": 0.8248227679030103, "grad_norm": 0.043644580990076065, "learning_rate": 0.01, "loss": 2.1238, "step": 8028 }, { "epoch": 0.8251309976369053, "grad_norm": 0.060951683670282364, "learning_rate": 0.01, "loss": 2.1169, "step": 8031 }, { "epoch": 0.8254392273708003, "grad_norm": 0.11269400268793106, "learning_rate": 0.01, "loss": 2.1184, "step": 8034 }, { "epoch": 0.8257474571046953, "grad_norm": 0.05554080754518509, "learning_rate": 0.01, "loss": 2.112, "step": 8037 }, { "epoch": 0.8260556868385903, "grad_norm": 0.08155755698680878, "learning_rate": 0.01, "loss": 2.096, "step": 8040 }, { "epoch": 0.8263639165724853, "grad_norm": 0.0661015510559082, "learning_rate": 0.01, "loss": 2.0943, "step": 8043 }, { "epoch": 0.8266721463063803, "grad_norm": 0.06186169385910034, "learning_rate": 0.01, "loss": 2.1225, "step": 8046 }, { "epoch": 0.8269803760402753, "grad_norm": 0.06658541411161423, "learning_rate": 0.01, "loss": 2.1189, "step": 8049 }, { "epoch": 0.8272886057741703, "grad_norm": 0.09120085090398788, "learning_rate": 0.01, "loss": 2.1181, "step": 8052 }, { "epoch": 0.8275968355080654, "grad_norm": 0.059662993997335434, "learning_rate": 0.01, "loss": 2.1257, "step": 8055 }, { "epoch": 0.8279050652419604, "grad_norm": 0.08305416256189346, "learning_rate": 0.01, "loss": 2.1108, "step": 8058 }, { "epoch": 0.8282132949758554, "grad_norm": 0.047110967338085175, "learning_rate": 0.01, "loss": 2.0786, "step": 8061 }, { "epoch": 0.8285215247097504, "grad_norm": 0.042120445519685745, "learning_rate": 0.01, "loss": 2.1081, "step": 8064 }, { "epoch": 0.8288297544436454, "grad_norm": 0.04596862941980362, "learning_rate": 0.01, "loss": 2.1025, "step": 8067 }, { "epoch": 0.8291379841775404, "grad_norm": 0.055216096341609955, "learning_rate": 0.01, "loss": 2.1416, "step": 8070 }, { "epoch": 0.8294462139114354, "grad_norm": 0.05959683656692505, "learning_rate": 0.01, "loss": 2.0832, "step": 8073 }, { "epoch": 0.8297544436453304, "grad_norm": 0.045481909066438675, "learning_rate": 0.01, "loss": 2.1391, "step": 8076 }, { "epoch": 0.8300626733792253, "grad_norm": 0.049282100051641464, "learning_rate": 0.01, "loss": 2.1227, "step": 8079 }, { "epoch": 0.8303709031131203, "grad_norm": 0.058084890246391296, "learning_rate": 0.01, "loss": 2.1211, "step": 8082 }, { "epoch": 0.8306791328470153, "grad_norm": 0.11113768070936203, "learning_rate": 0.01, "loss": 2.132, "step": 8085 }, { "epoch": 0.8309873625809103, "grad_norm": 0.07015852630138397, "learning_rate": 0.01, "loss": 2.0988, "step": 8088 }, { "epoch": 0.8312955923148053, "grad_norm": 0.09509722143411636, "learning_rate": 0.01, "loss": 2.1064, "step": 8091 }, { "epoch": 0.8316038220487003, "grad_norm": 0.03616593778133392, "learning_rate": 0.01, "loss": 2.1078, "step": 8094 }, { "epoch": 0.8319120517825953, "grad_norm": 0.0486396960914135, "learning_rate": 0.01, "loss": 2.1279, "step": 8097 }, { "epoch": 0.8322202815164903, "grad_norm": 0.050647489726543427, "learning_rate": 0.01, "loss": 2.0808, "step": 8100 }, { "epoch": 0.8325285112503853, "grad_norm": 0.08125802874565125, "learning_rate": 0.01, "loss": 2.1099, "step": 8103 }, { "epoch": 0.8328367409842803, "grad_norm": 0.08078313618898392, "learning_rate": 0.01, "loss": 2.1162, "step": 8106 }, { "epoch": 0.8331449707181753, "grad_norm": 0.06157573312520981, "learning_rate": 0.01, "loss": 2.1288, "step": 8109 }, { "epoch": 0.8334532004520703, "grad_norm": 0.057771824300289154, "learning_rate": 0.01, "loss": 2.1136, "step": 8112 }, { "epoch": 0.8337614301859653, "grad_norm": 0.06634260714054108, "learning_rate": 0.01, "loss": 2.1029, "step": 8115 }, { "epoch": 0.8340696599198603, "grad_norm": 0.12186034023761749, "learning_rate": 0.01, "loss": 2.1236, "step": 8118 }, { "epoch": 0.8343778896537553, "grad_norm": 0.03940106928348541, "learning_rate": 0.01, "loss": 2.1169, "step": 8121 }, { "epoch": 0.8346861193876502, "grad_norm": 0.06003478914499283, "learning_rate": 0.01, "loss": 2.1108, "step": 8124 }, { "epoch": 0.8349943491215452, "grad_norm": 0.04963524639606476, "learning_rate": 0.01, "loss": 2.0893, "step": 8127 }, { "epoch": 0.8353025788554402, "grad_norm": 0.04543556645512581, "learning_rate": 0.01, "loss": 2.1062, "step": 8130 }, { "epoch": 0.8356108085893352, "grad_norm": 0.05210501328110695, "learning_rate": 0.01, "loss": 2.1065, "step": 8133 }, { "epoch": 0.8359190383232302, "grad_norm": 0.10866094380617142, "learning_rate": 0.01, "loss": 2.1154, "step": 8136 }, { "epoch": 0.8362272680571252, "grad_norm": 0.07595928758382797, "learning_rate": 0.01, "loss": 2.1079, "step": 8139 }, { "epoch": 0.8365354977910202, "grad_norm": 0.04948664829134941, "learning_rate": 0.01, "loss": 2.0955, "step": 8142 }, { "epoch": 0.8368437275249152, "grad_norm": 0.11719872057437897, "learning_rate": 0.01, "loss": 2.1106, "step": 8145 }, { "epoch": 0.8371519572588102, "grad_norm": 0.04469067603349686, "learning_rate": 0.01, "loss": 2.1145, "step": 8148 }, { "epoch": 0.8374601869927052, "grad_norm": 0.038385387510061264, "learning_rate": 0.01, "loss": 2.0933, "step": 8151 }, { "epoch": 0.8377684167266002, "grad_norm": 0.04040665924549103, "learning_rate": 0.01, "loss": 2.1119, "step": 8154 }, { "epoch": 0.8380766464604952, "grad_norm": 0.042900413274765015, "learning_rate": 0.01, "loss": 2.1053, "step": 8157 }, { "epoch": 0.8383848761943902, "grad_norm": 0.06709393113851547, "learning_rate": 0.01, "loss": 2.0858, "step": 8160 }, { "epoch": 0.8386931059282852, "grad_norm": 0.08388926833868027, "learning_rate": 0.01, "loss": 2.1287, "step": 8163 }, { "epoch": 0.8390013356621802, "grad_norm": 0.0701015368103981, "learning_rate": 0.01, "loss": 2.0965, "step": 8166 }, { "epoch": 0.8393095653960753, "grad_norm": 0.0841115415096283, "learning_rate": 0.01, "loss": 2.1136, "step": 8169 }, { "epoch": 0.8396177951299703, "grad_norm": 0.08542285114526749, "learning_rate": 0.01, "loss": 2.1166, "step": 8172 }, { "epoch": 0.8399260248638653, "grad_norm": 0.046626705676317215, "learning_rate": 0.01, "loss": 2.1318, "step": 8175 }, { "epoch": 0.8402342545977602, "grad_norm": 0.08752947300672531, "learning_rate": 0.01, "loss": 2.0873, "step": 8178 }, { "epoch": 0.8405424843316552, "grad_norm": 0.04728331416845322, "learning_rate": 0.01, "loss": 2.0951, "step": 8181 }, { "epoch": 0.8408507140655502, "grad_norm": 0.04881293699145317, "learning_rate": 0.01, "loss": 2.1011, "step": 8184 }, { "epoch": 0.8411589437994452, "grad_norm": 0.049758728593587875, "learning_rate": 0.01, "loss": 2.1206, "step": 8187 }, { "epoch": 0.8414671735333402, "grad_norm": 0.037589117884635925, "learning_rate": 0.01, "loss": 2.085, "step": 8190 }, { "epoch": 0.8417754032672352, "grad_norm": 0.11785265803337097, "learning_rate": 0.01, "loss": 2.1195, "step": 8193 }, { "epoch": 0.8420836330011302, "grad_norm": 0.09815037995576859, "learning_rate": 0.01, "loss": 2.065, "step": 8196 }, { "epoch": 0.8423918627350252, "grad_norm": 0.07950727641582489, "learning_rate": 0.01, "loss": 2.1081, "step": 8199 }, { "epoch": 0.8427000924689202, "grad_norm": 0.04057254642248154, "learning_rate": 0.01, "loss": 2.0882, "step": 8202 }, { "epoch": 0.8430083222028152, "grad_norm": 0.07260222733020782, "learning_rate": 0.01, "loss": 2.1018, "step": 8205 }, { "epoch": 0.8433165519367102, "grad_norm": 0.057693734765052795, "learning_rate": 0.01, "loss": 2.112, "step": 8208 }, { "epoch": 0.8436247816706052, "grad_norm": 0.039680637419223785, "learning_rate": 0.01, "loss": 2.0974, "step": 8211 }, { "epoch": 0.8439330114045002, "grad_norm": 0.07584577798843384, "learning_rate": 0.01, "loss": 2.129, "step": 8214 }, { "epoch": 0.8442412411383952, "grad_norm": 0.044016819447278976, "learning_rate": 0.01, "loss": 2.1139, "step": 8217 }, { "epoch": 0.8445494708722902, "grad_norm": 0.04449582099914551, "learning_rate": 0.01, "loss": 2.1085, "step": 8220 }, { "epoch": 0.8448577006061851, "grad_norm": 0.044676005840301514, "learning_rate": 0.01, "loss": 2.1081, "step": 8223 }, { "epoch": 0.8451659303400801, "grad_norm": 0.04926025867462158, "learning_rate": 0.01, "loss": 2.1187, "step": 8226 }, { "epoch": 0.8454741600739751, "grad_norm": 0.10964366793632507, "learning_rate": 0.01, "loss": 2.0898, "step": 8229 }, { "epoch": 0.8457823898078701, "grad_norm": 0.09405852109193802, "learning_rate": 0.01, "loss": 2.1117, "step": 8232 }, { "epoch": 0.8460906195417651, "grad_norm": 0.09241268038749695, "learning_rate": 0.01, "loss": 2.1225, "step": 8235 }, { "epoch": 0.8463988492756601, "grad_norm": 0.05887102335691452, "learning_rate": 0.01, "loss": 2.0944, "step": 8238 }, { "epoch": 0.8467070790095551, "grad_norm": 0.13513131439685822, "learning_rate": 0.01, "loss": 2.1217, "step": 8241 }, { "epoch": 0.8470153087434501, "grad_norm": 0.06370443850755692, "learning_rate": 0.01, "loss": 2.121, "step": 8244 }, { "epoch": 0.8473235384773451, "grad_norm": 0.0426030196249485, "learning_rate": 0.01, "loss": 2.0937, "step": 8247 }, { "epoch": 0.8476317682112401, "grad_norm": 0.049412764608860016, "learning_rate": 0.01, "loss": 2.108, "step": 8250 }, { "epoch": 0.8479399979451351, "grad_norm": 0.061407607048749924, "learning_rate": 0.01, "loss": 2.1009, "step": 8253 }, { "epoch": 0.8482482276790301, "grad_norm": 0.12416908144950867, "learning_rate": 0.01, "loss": 2.0819, "step": 8256 }, { "epoch": 0.8485564574129251, "grad_norm": 0.05728744715452194, "learning_rate": 0.01, "loss": 2.1132, "step": 8259 }, { "epoch": 0.8488646871468201, "grad_norm": 0.06724981963634491, "learning_rate": 0.01, "loss": 2.0932, "step": 8262 }, { "epoch": 0.8491729168807151, "grad_norm": 0.055260930210351944, "learning_rate": 0.01, "loss": 2.0933, "step": 8265 }, { "epoch": 0.84948114661461, "grad_norm": 0.04230106249451637, "learning_rate": 0.01, "loss": 2.1135, "step": 8268 }, { "epoch": 0.849789376348505, "grad_norm": 0.04593104496598244, "learning_rate": 0.01, "loss": 2.1091, "step": 8271 }, { "epoch": 0.8500976060824, "grad_norm": 0.09625285863876343, "learning_rate": 0.01, "loss": 2.0946, "step": 8274 }, { "epoch": 0.850405835816295, "grad_norm": 0.04556501284241676, "learning_rate": 0.01, "loss": 2.0975, "step": 8277 }, { "epoch": 0.85071406555019, "grad_norm": 0.09413543343544006, "learning_rate": 0.01, "loss": 2.0863, "step": 8280 }, { "epoch": 0.851022295284085, "grad_norm": 0.08400101214647293, "learning_rate": 0.01, "loss": 2.1045, "step": 8283 }, { "epoch": 0.8513305250179801, "grad_norm": 0.06278138607740402, "learning_rate": 0.01, "loss": 2.116, "step": 8286 }, { "epoch": 0.8516387547518751, "grad_norm": 0.04442959651350975, "learning_rate": 0.01, "loss": 2.0796, "step": 8289 }, { "epoch": 0.8519469844857701, "grad_norm": 0.045807912945747375, "learning_rate": 0.01, "loss": 2.0823, "step": 8292 }, { "epoch": 0.8522552142196651, "grad_norm": 0.0426551029086113, "learning_rate": 0.01, "loss": 2.1109, "step": 8295 }, { "epoch": 0.8525634439535601, "grad_norm": 0.12200357019901276, "learning_rate": 0.01, "loss": 2.1146, "step": 8298 }, { "epoch": 0.8528716736874551, "grad_norm": 0.04152747616171837, "learning_rate": 0.01, "loss": 2.1204, "step": 8301 }, { "epoch": 0.8531799034213501, "grad_norm": 0.08464021235704422, "learning_rate": 0.01, "loss": 2.085, "step": 8304 }, { "epoch": 0.8534881331552451, "grad_norm": 0.050391390919685364, "learning_rate": 0.01, "loss": 2.0957, "step": 8307 }, { "epoch": 0.8537963628891401, "grad_norm": 0.08581732958555222, "learning_rate": 0.01, "loss": 2.1015, "step": 8310 }, { "epoch": 0.8541045926230351, "grad_norm": 0.10992308706045151, "learning_rate": 0.01, "loss": 2.0939, "step": 8313 }, { "epoch": 0.8544128223569301, "grad_norm": 0.053225912153720856, "learning_rate": 0.01, "loss": 2.1106, "step": 8316 }, { "epoch": 0.854721052090825, "grad_norm": 0.06759096682071686, "learning_rate": 0.01, "loss": 2.1035, "step": 8319 }, { "epoch": 0.85502928182472, "grad_norm": 0.058069922029972076, "learning_rate": 0.01, "loss": 2.0833, "step": 8322 }, { "epoch": 0.855337511558615, "grad_norm": 0.0657680481672287, "learning_rate": 0.01, "loss": 2.0918, "step": 8325 }, { "epoch": 0.85564574129251, "grad_norm": 0.1428556591272354, "learning_rate": 0.01, "loss": 2.0895, "step": 8328 }, { "epoch": 0.855953971026405, "grad_norm": 0.10311869531869888, "learning_rate": 0.01, "loss": 2.1041, "step": 8331 }, { "epoch": 0.8562622007603, "grad_norm": 0.12024179100990295, "learning_rate": 0.01, "loss": 2.1179, "step": 8334 }, { "epoch": 0.856570430494195, "grad_norm": 0.08294446766376495, "learning_rate": 0.01, "loss": 2.1042, "step": 8337 }, { "epoch": 0.85687866022809, "grad_norm": 0.05203935503959656, "learning_rate": 0.01, "loss": 2.1275, "step": 8340 }, { "epoch": 0.857186889961985, "grad_norm": 0.061564356088638306, "learning_rate": 0.01, "loss": 2.0951, "step": 8343 }, { "epoch": 0.85749511969588, "grad_norm": 0.043616339564323425, "learning_rate": 0.01, "loss": 2.1082, "step": 8346 }, { "epoch": 0.857803349429775, "grad_norm": 0.0885004997253418, "learning_rate": 0.01, "loss": 2.1071, "step": 8349 }, { "epoch": 0.85811157916367, "grad_norm": 0.06275481730699539, "learning_rate": 0.01, "loss": 2.137, "step": 8352 }, { "epoch": 0.858419808897565, "grad_norm": 0.054776523262262344, "learning_rate": 0.01, "loss": 2.1117, "step": 8355 }, { "epoch": 0.85872803863146, "grad_norm": 0.07782801240682602, "learning_rate": 0.01, "loss": 2.0822, "step": 8358 }, { "epoch": 0.859036268365355, "grad_norm": 0.12301263958215714, "learning_rate": 0.01, "loss": 2.1126, "step": 8361 }, { "epoch": 0.85934449809925, "grad_norm": 0.07181745767593384, "learning_rate": 0.01, "loss": 2.1359, "step": 8364 }, { "epoch": 0.859652727833145, "grad_norm": 0.07232604175806046, "learning_rate": 0.01, "loss": 2.0849, "step": 8367 }, { "epoch": 0.8599609575670399, "grad_norm": 0.06810937821865082, "learning_rate": 0.01, "loss": 2.1088, "step": 8370 }, { "epoch": 0.8602691873009349, "grad_norm": 0.048163384199142456, "learning_rate": 0.01, "loss": 2.1173, "step": 8373 }, { "epoch": 0.8605774170348299, "grad_norm": 0.05688156560063362, "learning_rate": 0.01, "loss": 2.114, "step": 8376 }, { "epoch": 0.8608856467687249, "grad_norm": 0.065540611743927, "learning_rate": 0.01, "loss": 2.0989, "step": 8379 }, { "epoch": 0.8611938765026199, "grad_norm": 0.09561596065759659, "learning_rate": 0.01, "loss": 2.0894, "step": 8382 }, { "epoch": 0.8615021062365149, "grad_norm": 0.06719313561916351, "learning_rate": 0.01, "loss": 2.0928, "step": 8385 }, { "epoch": 0.8618103359704099, "grad_norm": 0.05895761027932167, "learning_rate": 0.01, "loss": 2.1037, "step": 8388 }, { "epoch": 0.8621185657043049, "grad_norm": 0.09232669323682785, "learning_rate": 0.01, "loss": 2.1272, "step": 8391 }, { "epoch": 0.8624267954381999, "grad_norm": 0.06715840846300125, "learning_rate": 0.01, "loss": 2.072, "step": 8394 }, { "epoch": 0.8627350251720949, "grad_norm": 0.04794420674443245, "learning_rate": 0.01, "loss": 2.1087, "step": 8397 }, { "epoch": 0.8630432549059899, "grad_norm": 0.037383124232292175, "learning_rate": 0.01, "loss": 2.0761, "step": 8400 }, { "epoch": 0.863351484639885, "grad_norm": 0.05601905286312103, "learning_rate": 0.01, "loss": 2.0926, "step": 8403 }, { "epoch": 0.86365971437378, "grad_norm": 0.0839313194155693, "learning_rate": 0.01, "loss": 2.0887, "step": 8406 }, { "epoch": 0.863967944107675, "grad_norm": 0.07600929588079453, "learning_rate": 0.01, "loss": 2.1143, "step": 8409 }, { "epoch": 0.86427617384157, "grad_norm": 0.06851659715175629, "learning_rate": 0.01, "loss": 2.0921, "step": 8412 }, { "epoch": 0.864584403575465, "grad_norm": 0.05021858587861061, "learning_rate": 0.01, "loss": 2.0903, "step": 8415 }, { "epoch": 0.86489263330936, "grad_norm": 0.04881426692008972, "learning_rate": 0.01, "loss": 2.1047, "step": 8418 }, { "epoch": 0.8652008630432549, "grad_norm": 0.04262546822428703, "learning_rate": 0.01, "loss": 2.0852, "step": 8421 }, { "epoch": 0.8655090927771499, "grad_norm": 0.050467535853385925, "learning_rate": 0.01, "loss": 2.1045, "step": 8424 }, { "epoch": 0.8658173225110449, "grad_norm": 0.0725008100271225, "learning_rate": 0.01, "loss": 2.077, "step": 8427 }, { "epoch": 0.8661255522449399, "grad_norm": 0.07234456390142441, "learning_rate": 0.01, "loss": 2.13, "step": 8430 }, { "epoch": 0.8664337819788349, "grad_norm": 0.060751501470804214, "learning_rate": 0.01, "loss": 2.0948, "step": 8433 }, { "epoch": 0.8667420117127299, "grad_norm": 0.058911584317684174, "learning_rate": 0.01, "loss": 2.0908, "step": 8436 }, { "epoch": 0.8670502414466249, "grad_norm": 0.08380532264709473, "learning_rate": 0.01, "loss": 2.1438, "step": 8439 }, { "epoch": 0.8673584711805199, "grad_norm": 0.058240536600351334, "learning_rate": 0.01, "loss": 2.1384, "step": 8442 }, { "epoch": 0.8676667009144149, "grad_norm": 0.0422792062163353, "learning_rate": 0.01, "loss": 2.0926, "step": 8445 }, { "epoch": 0.8679749306483099, "grad_norm": 0.07096652686595917, "learning_rate": 0.01, "loss": 2.1195, "step": 8448 }, { "epoch": 0.8682831603822049, "grad_norm": 0.13370642066001892, "learning_rate": 0.01, "loss": 2.1367, "step": 8451 }, { "epoch": 0.8685913901160999, "grad_norm": 0.0597628615796566, "learning_rate": 0.01, "loss": 2.087, "step": 8454 }, { "epoch": 0.8688996198499949, "grad_norm": 0.039561979472637177, "learning_rate": 0.01, "loss": 2.112, "step": 8457 }, { "epoch": 0.8692078495838899, "grad_norm": 0.04080485925078392, "learning_rate": 0.01, "loss": 2.1024, "step": 8460 }, { "epoch": 0.8695160793177849, "grad_norm": 0.05293022468686104, "learning_rate": 0.01, "loss": 2.0731, "step": 8463 }, { "epoch": 0.8698243090516798, "grad_norm": 0.06960830092430115, "learning_rate": 0.01, "loss": 2.1255, "step": 8466 }, { "epoch": 0.8701325387855748, "grad_norm": 0.09768849611282349, "learning_rate": 0.01, "loss": 2.1217, "step": 8469 }, { "epoch": 0.8704407685194698, "grad_norm": 0.11970885097980499, "learning_rate": 0.01, "loss": 2.0932, "step": 8472 }, { "epoch": 0.8707489982533648, "grad_norm": 0.12014521658420563, "learning_rate": 0.01, "loss": 2.1009, "step": 8475 }, { "epoch": 0.8710572279872598, "grad_norm": 0.04288540408015251, "learning_rate": 0.01, "loss": 2.1111, "step": 8478 }, { "epoch": 0.8713654577211548, "grad_norm": 0.033004507422447205, "learning_rate": 0.01, "loss": 2.1029, "step": 8481 }, { "epoch": 0.8716736874550498, "grad_norm": 0.03685779869556427, "learning_rate": 0.01, "loss": 2.1077, "step": 8484 }, { "epoch": 0.8719819171889448, "grad_norm": 0.06450948119163513, "learning_rate": 0.01, "loss": 2.102, "step": 8487 }, { "epoch": 0.8722901469228398, "grad_norm": 0.04806706681847572, "learning_rate": 0.01, "loss": 2.1056, "step": 8490 }, { "epoch": 0.8725983766567348, "grad_norm": 0.05847964435815811, "learning_rate": 0.01, "loss": 2.095, "step": 8493 }, { "epoch": 0.8729066063906298, "grad_norm": 0.11569567024707794, "learning_rate": 0.01, "loss": 2.1058, "step": 8496 }, { "epoch": 0.8732148361245248, "grad_norm": 0.04440119490027428, "learning_rate": 0.01, "loss": 2.1127, "step": 8499 }, { "epoch": 0.8735230658584198, "grad_norm": 0.13856938481330872, "learning_rate": 0.01, "loss": 2.1072, "step": 8502 }, { "epoch": 0.8738312955923148, "grad_norm": 0.06448937207460403, "learning_rate": 0.01, "loss": 2.0813, "step": 8505 }, { "epoch": 0.8741395253262098, "grad_norm": 0.05872811749577522, "learning_rate": 0.01, "loss": 2.1227, "step": 8508 }, { "epoch": 0.8744477550601047, "grad_norm": 0.06387540698051453, "learning_rate": 0.01, "loss": 2.099, "step": 8511 }, { "epoch": 0.8747559847939997, "grad_norm": 0.044399481266736984, "learning_rate": 0.01, "loss": 2.0989, "step": 8514 }, { "epoch": 0.8750642145278948, "grad_norm": 0.118850938975811, "learning_rate": 0.01, "loss": 2.1261, "step": 8517 }, { "epoch": 0.8753724442617898, "grad_norm": 0.05479248985648155, "learning_rate": 0.01, "loss": 2.0701, "step": 8520 }, { "epoch": 0.8756806739956848, "grad_norm": 0.06442543119192123, "learning_rate": 0.01, "loss": 2.0844, "step": 8523 }, { "epoch": 0.8759889037295798, "grad_norm": 0.054294027388095856, "learning_rate": 0.01, "loss": 2.1051, "step": 8526 }, { "epoch": 0.8762971334634748, "grad_norm": 0.04776893928647041, "learning_rate": 0.01, "loss": 2.1056, "step": 8529 }, { "epoch": 0.8766053631973698, "grad_norm": 0.06740310043096542, "learning_rate": 0.01, "loss": 2.0956, "step": 8532 }, { "epoch": 0.8769135929312648, "grad_norm": 0.048034511506557465, "learning_rate": 0.01, "loss": 2.1223, "step": 8535 }, { "epoch": 0.8772218226651598, "grad_norm": 0.05819391459226608, "learning_rate": 0.01, "loss": 2.1133, "step": 8538 }, { "epoch": 0.8775300523990548, "grad_norm": 0.06093437224626541, "learning_rate": 0.01, "loss": 2.0889, "step": 8541 }, { "epoch": 0.8778382821329498, "grad_norm": 0.04628787562251091, "learning_rate": 0.01, "loss": 2.1202, "step": 8544 }, { "epoch": 0.8781465118668448, "grad_norm": 0.0903085321187973, "learning_rate": 0.01, "loss": 2.0495, "step": 8547 }, { "epoch": 0.8784547416007398, "grad_norm": 0.06924945116043091, "learning_rate": 0.01, "loss": 2.1004, "step": 8550 }, { "epoch": 0.8787629713346348, "grad_norm": 0.04104374721646309, "learning_rate": 0.01, "loss": 2.0954, "step": 8553 }, { "epoch": 0.8790712010685298, "grad_norm": 0.11671441793441772, "learning_rate": 0.01, "loss": 2.1027, "step": 8556 }, { "epoch": 0.8793794308024248, "grad_norm": 0.10247964411973953, "learning_rate": 0.01, "loss": 2.0861, "step": 8559 }, { "epoch": 0.8796876605363197, "grad_norm": 0.03979288041591644, "learning_rate": 0.01, "loss": 2.1307, "step": 8562 }, { "epoch": 0.8799958902702147, "grad_norm": 0.0406351312994957, "learning_rate": 0.01, "loss": 2.0868, "step": 8565 }, { "epoch": 0.8803041200041097, "grad_norm": 0.04127006232738495, "learning_rate": 0.01, "loss": 2.0899, "step": 8568 }, { "epoch": 0.8806123497380047, "grad_norm": 0.04559047520160675, "learning_rate": 0.01, "loss": 2.1071, "step": 8571 }, { "epoch": 0.8809205794718997, "grad_norm": 0.12507610023021698, "learning_rate": 0.01, "loss": 2.0944, "step": 8574 }, { "epoch": 0.8812288092057947, "grad_norm": 0.042683400213718414, "learning_rate": 0.01, "loss": 2.078, "step": 8577 }, { "epoch": 0.8815370389396897, "grad_norm": 0.04022818058729172, "learning_rate": 0.01, "loss": 2.0797, "step": 8580 }, { "epoch": 0.8818452686735847, "grad_norm": 0.0382862351834774, "learning_rate": 0.01, "loss": 2.0859, "step": 8583 }, { "epoch": 0.8821534984074797, "grad_norm": 0.05260771885514259, "learning_rate": 0.01, "loss": 2.0832, "step": 8586 }, { "epoch": 0.8824617281413747, "grad_norm": 0.05381648615002632, "learning_rate": 0.01, "loss": 2.1211, "step": 8589 }, { "epoch": 0.8827699578752697, "grad_norm": 0.055818814784288406, "learning_rate": 0.01, "loss": 2.1108, "step": 8592 }, { "epoch": 0.8830781876091647, "grad_norm": 0.16680215299129486, "learning_rate": 0.01, "loss": 2.0961, "step": 8595 }, { "epoch": 0.8833864173430597, "grad_norm": 0.10034742951393127, "learning_rate": 0.01, "loss": 2.1187, "step": 8598 }, { "epoch": 0.8836946470769547, "grad_norm": 0.0827341303229332, "learning_rate": 0.01, "loss": 2.1112, "step": 8601 }, { "epoch": 0.8840028768108497, "grad_norm": 0.07657956331968307, "learning_rate": 0.01, "loss": 2.0711, "step": 8604 }, { "epoch": 0.8843111065447447, "grad_norm": 0.036220960319042206, "learning_rate": 0.01, "loss": 2.1097, "step": 8607 }, { "epoch": 0.8846193362786396, "grad_norm": 0.04672658443450928, "learning_rate": 0.01, "loss": 2.1099, "step": 8610 }, { "epoch": 0.8849275660125346, "grad_norm": 0.04827800393104553, "learning_rate": 0.01, "loss": 2.1081, "step": 8613 }, { "epoch": 0.8852357957464296, "grad_norm": 0.04962724447250366, "learning_rate": 0.01, "loss": 2.0895, "step": 8616 }, { "epoch": 0.8855440254803246, "grad_norm": 0.03474809601902962, "learning_rate": 0.01, "loss": 2.0942, "step": 8619 }, { "epoch": 0.8858522552142196, "grad_norm": 0.07395246624946594, "learning_rate": 0.01, "loss": 2.1145, "step": 8622 }, { "epoch": 0.8861604849481146, "grad_norm": 0.09853484481573105, "learning_rate": 0.01, "loss": 2.0991, "step": 8625 }, { "epoch": 0.8864687146820096, "grad_norm": 0.11892013251781464, "learning_rate": 0.01, "loss": 2.0968, "step": 8628 }, { "epoch": 0.8867769444159046, "grad_norm": 0.12780621647834778, "learning_rate": 0.01, "loss": 2.1154, "step": 8631 }, { "epoch": 0.8870851741497997, "grad_norm": 0.04470033943653107, "learning_rate": 0.01, "loss": 2.1027, "step": 8634 }, { "epoch": 0.8873934038836947, "grad_norm": 0.054323747754096985, "learning_rate": 0.01, "loss": 2.0952, "step": 8637 }, { "epoch": 0.8877016336175897, "grad_norm": 0.08175788819789886, "learning_rate": 0.01, "loss": 2.0882, "step": 8640 }, { "epoch": 0.8880098633514847, "grad_norm": 0.07456079125404358, "learning_rate": 0.01, "loss": 2.141, "step": 8643 }, { "epoch": 0.8883180930853797, "grad_norm": 0.055910736322402954, "learning_rate": 0.01, "loss": 2.1102, "step": 8646 }, { "epoch": 0.8886263228192747, "grad_norm": 0.05231192335486412, "learning_rate": 0.01, "loss": 2.1026, "step": 8649 }, { "epoch": 0.8889345525531697, "grad_norm": 0.05306578800082207, "learning_rate": 0.01, "loss": 2.1051, "step": 8652 }, { "epoch": 0.8892427822870647, "grad_norm": 0.05569072067737579, "learning_rate": 0.01, "loss": 2.0835, "step": 8655 }, { "epoch": 0.8895510120209597, "grad_norm": 0.050971515476703644, "learning_rate": 0.01, "loss": 2.0718, "step": 8658 }, { "epoch": 0.8898592417548546, "grad_norm": 0.061436936259269714, "learning_rate": 0.01, "loss": 2.1167, "step": 8661 }, { "epoch": 0.8901674714887496, "grad_norm": 0.04307536780834198, "learning_rate": 0.01, "loss": 2.0972, "step": 8664 }, { "epoch": 0.8904757012226446, "grad_norm": 0.1459832638502121, "learning_rate": 0.01, "loss": 2.1306, "step": 8667 }, { "epoch": 0.8907839309565396, "grad_norm": 0.05527958646416664, "learning_rate": 0.01, "loss": 2.0974, "step": 8670 }, { "epoch": 0.8910921606904346, "grad_norm": 0.1319393813610077, "learning_rate": 0.01, "loss": 2.1259, "step": 8673 }, { "epoch": 0.8914003904243296, "grad_norm": 0.06124665215611458, "learning_rate": 0.01, "loss": 2.0997, "step": 8676 }, { "epoch": 0.8917086201582246, "grad_norm": 0.08667455613613129, "learning_rate": 0.01, "loss": 2.0941, "step": 8679 }, { "epoch": 0.8920168498921196, "grad_norm": 0.06631213426589966, "learning_rate": 0.01, "loss": 2.1196, "step": 8682 }, { "epoch": 0.8923250796260146, "grad_norm": 0.060188647359609604, "learning_rate": 0.01, "loss": 2.0971, "step": 8685 }, { "epoch": 0.8926333093599096, "grad_norm": 0.039312943816185, "learning_rate": 0.01, "loss": 2.1119, "step": 8688 }, { "epoch": 0.8929415390938046, "grad_norm": 0.03959662839770317, "learning_rate": 0.01, "loss": 2.0897, "step": 8691 }, { "epoch": 0.8932497688276996, "grad_norm": 0.09711046516895294, "learning_rate": 0.01, "loss": 2.1133, "step": 8694 }, { "epoch": 0.8935579985615946, "grad_norm": 0.07965920865535736, "learning_rate": 0.01, "loss": 2.0635, "step": 8697 }, { "epoch": 0.8938662282954896, "grad_norm": 0.08770687133073807, "learning_rate": 0.01, "loss": 2.0885, "step": 8700 }, { "epoch": 0.8941744580293846, "grad_norm": 0.04591045528650284, "learning_rate": 0.01, "loss": 2.0926, "step": 8703 }, { "epoch": 0.8944826877632795, "grad_norm": 0.09602218866348267, "learning_rate": 0.01, "loss": 2.0856, "step": 8706 }, { "epoch": 0.8947909174971745, "grad_norm": 0.09482742100954056, "learning_rate": 0.01, "loss": 2.0966, "step": 8709 }, { "epoch": 0.8950991472310695, "grad_norm": 0.03937089815735817, "learning_rate": 0.01, "loss": 2.1043, "step": 8712 }, { "epoch": 0.8954073769649645, "grad_norm": 0.056832704693078995, "learning_rate": 0.01, "loss": 2.1165, "step": 8715 }, { "epoch": 0.8957156066988595, "grad_norm": 0.06370353698730469, "learning_rate": 0.01, "loss": 2.1144, "step": 8718 }, { "epoch": 0.8960238364327545, "grad_norm": 0.06752549856901169, "learning_rate": 0.01, "loss": 2.1026, "step": 8721 }, { "epoch": 0.8963320661666495, "grad_norm": 0.13301892578601837, "learning_rate": 0.01, "loss": 2.11, "step": 8724 }, { "epoch": 0.8966402959005445, "grad_norm": 0.05210836976766586, "learning_rate": 0.01, "loss": 2.0925, "step": 8727 }, { "epoch": 0.8969485256344395, "grad_norm": 0.03570270165801048, "learning_rate": 0.01, "loss": 2.0809, "step": 8730 }, { "epoch": 0.8972567553683345, "grad_norm": 0.05898820236325264, "learning_rate": 0.01, "loss": 2.0786, "step": 8733 }, { "epoch": 0.8975649851022295, "grad_norm": 0.05087563395500183, "learning_rate": 0.01, "loss": 2.1071, "step": 8736 }, { "epoch": 0.8978732148361245, "grad_norm": 0.09473355114459991, "learning_rate": 0.01, "loss": 2.103, "step": 8739 }, { "epoch": 0.8981814445700195, "grad_norm": 0.09793075919151306, "learning_rate": 0.01, "loss": 2.0972, "step": 8742 }, { "epoch": 0.8984896743039145, "grad_norm": 0.05115204304456711, "learning_rate": 0.01, "loss": 2.0979, "step": 8745 }, { "epoch": 0.8987979040378095, "grad_norm": 0.057413987815380096, "learning_rate": 0.01, "loss": 2.1156, "step": 8748 }, { "epoch": 0.8991061337717046, "grad_norm": 0.04136224836111069, "learning_rate": 0.01, "loss": 2.1269, "step": 8751 }, { "epoch": 0.8994143635055996, "grad_norm": 0.06866753846406937, "learning_rate": 0.01, "loss": 2.1092, "step": 8754 }, { "epoch": 0.8997225932394946, "grad_norm": 0.0757627934217453, "learning_rate": 0.01, "loss": 2.0933, "step": 8757 }, { "epoch": 0.9000308229733895, "grad_norm": 0.08082983642816544, "learning_rate": 0.01, "loss": 2.1124, "step": 8760 }, { "epoch": 0.9003390527072845, "grad_norm": 0.046828944236040115, "learning_rate": 0.01, "loss": 2.0978, "step": 8763 }, { "epoch": 0.9006472824411795, "grad_norm": 0.11039458215236664, "learning_rate": 0.01, "loss": 2.0989, "step": 8766 }, { "epoch": 0.9009555121750745, "grad_norm": 0.048537638038396835, "learning_rate": 0.01, "loss": 2.0946, "step": 8769 }, { "epoch": 0.9012637419089695, "grad_norm": 0.06700310111045837, "learning_rate": 0.01, "loss": 2.1184, "step": 8772 }, { "epoch": 0.9015719716428645, "grad_norm": 0.044369909912347794, "learning_rate": 0.01, "loss": 2.1026, "step": 8775 }, { "epoch": 0.9018802013767595, "grad_norm": 0.041071876883506775, "learning_rate": 0.01, "loss": 2.0774, "step": 8778 }, { "epoch": 0.9021884311106545, "grad_norm": 0.04735315591096878, "learning_rate": 0.01, "loss": 2.0812, "step": 8781 }, { "epoch": 0.9024966608445495, "grad_norm": 0.11621284484863281, "learning_rate": 0.01, "loss": 2.0766, "step": 8784 }, { "epoch": 0.9028048905784445, "grad_norm": 0.11453153938055038, "learning_rate": 0.01, "loss": 2.0866, "step": 8787 }, { "epoch": 0.9031131203123395, "grad_norm": 0.057418763637542725, "learning_rate": 0.01, "loss": 2.081, "step": 8790 }, { "epoch": 0.9034213500462345, "grad_norm": 0.041579000651836395, "learning_rate": 0.01, "loss": 2.1154, "step": 8793 }, { "epoch": 0.9037295797801295, "grad_norm": 0.045673951506614685, "learning_rate": 0.01, "loss": 2.1241, "step": 8796 }, { "epoch": 0.9040378095140245, "grad_norm": 0.05963718518614769, "learning_rate": 0.01, "loss": 2.0955, "step": 8799 }, { "epoch": 0.9043460392479195, "grad_norm": 0.04776541888713837, "learning_rate": 0.01, "loss": 2.1138, "step": 8802 }, { "epoch": 0.9046542689818144, "grad_norm": 0.09103482216596603, "learning_rate": 0.01, "loss": 2.1192, "step": 8805 }, { "epoch": 0.9049624987157094, "grad_norm": 0.09218809008598328, "learning_rate": 0.01, "loss": 2.0985, "step": 8808 }, { "epoch": 0.9052707284496044, "grad_norm": 0.10253725945949554, "learning_rate": 0.01, "loss": 2.1189, "step": 8811 }, { "epoch": 0.9055789581834994, "grad_norm": 0.09638465940952301, "learning_rate": 0.01, "loss": 2.1008, "step": 8814 }, { "epoch": 0.9058871879173944, "grad_norm": 0.0947449579834938, "learning_rate": 0.01, "loss": 2.1222, "step": 8817 }, { "epoch": 0.9061954176512894, "grad_norm": 0.04588090255856514, "learning_rate": 0.01, "loss": 2.1198, "step": 8820 }, { "epoch": 0.9065036473851844, "grad_norm": 0.05041109770536423, "learning_rate": 0.01, "loss": 2.0843, "step": 8823 }, { "epoch": 0.9068118771190794, "grad_norm": 0.038898076862096786, "learning_rate": 0.01, "loss": 2.125, "step": 8826 }, { "epoch": 0.9071201068529744, "grad_norm": 0.03356321156024933, "learning_rate": 0.01, "loss": 2.0985, "step": 8829 }, { "epoch": 0.9074283365868694, "grad_norm": 0.04668448120355606, "learning_rate": 0.01, "loss": 2.1071, "step": 8832 }, { "epoch": 0.9077365663207644, "grad_norm": 0.051277391612529755, "learning_rate": 0.01, "loss": 2.0702, "step": 8835 }, { "epoch": 0.9080447960546594, "grad_norm": 0.049883171916007996, "learning_rate": 0.01, "loss": 2.1111, "step": 8838 }, { "epoch": 0.9083530257885544, "grad_norm": 0.04149313643574715, "learning_rate": 0.01, "loss": 2.0991, "step": 8841 }, { "epoch": 0.9086612555224494, "grad_norm": 0.09206261485815048, "learning_rate": 0.01, "loss": 2.0961, "step": 8844 }, { "epoch": 0.9089694852563444, "grad_norm": 0.1830751895904541, "learning_rate": 0.01, "loss": 2.1093, "step": 8847 }, { "epoch": 0.9092777149902393, "grad_norm": 0.0757865458726883, "learning_rate": 0.01, "loss": 2.115, "step": 8850 }, { "epoch": 0.9095859447241343, "grad_norm": 0.06030673533678055, "learning_rate": 0.01, "loss": 2.0874, "step": 8853 }, { "epoch": 0.9098941744580293, "grad_norm": 0.03440079465508461, "learning_rate": 0.01, "loss": 2.0997, "step": 8856 }, { "epoch": 0.9102024041919243, "grad_norm": 0.040004558861255646, "learning_rate": 0.01, "loss": 2.0767, "step": 8859 }, { "epoch": 0.9105106339258193, "grad_norm": 0.033261023461818695, "learning_rate": 0.01, "loss": 2.0834, "step": 8862 }, { "epoch": 0.9108188636597144, "grad_norm": 0.04814066365361214, "learning_rate": 0.01, "loss": 2.0868, "step": 8865 }, { "epoch": 0.9111270933936094, "grad_norm": 0.04939806088805199, "learning_rate": 0.01, "loss": 2.0944, "step": 8868 }, { "epoch": 0.9114353231275044, "grad_norm": 0.05242007225751877, "learning_rate": 0.01, "loss": 2.1035, "step": 8871 }, { "epoch": 0.9117435528613994, "grad_norm": 0.04576495289802551, "learning_rate": 0.01, "loss": 2.0881, "step": 8874 }, { "epoch": 0.9120517825952944, "grad_norm": 0.0369776152074337, "learning_rate": 0.01, "loss": 2.1017, "step": 8877 }, { "epoch": 0.9123600123291894, "grad_norm": 0.08296829462051392, "learning_rate": 0.01, "loss": 2.1199, "step": 8880 }, { "epoch": 0.9126682420630844, "grad_norm": 0.07186676561832428, "learning_rate": 0.01, "loss": 2.0906, "step": 8883 }, { "epoch": 0.9129764717969794, "grad_norm": 0.06849399209022522, "learning_rate": 0.01, "loss": 2.0944, "step": 8886 }, { "epoch": 0.9132847015308744, "grad_norm": 0.1285102367401123, "learning_rate": 0.01, "loss": 2.0959, "step": 8889 }, { "epoch": 0.9135929312647694, "grad_norm": 0.045700203627347946, "learning_rate": 0.01, "loss": 2.0924, "step": 8892 }, { "epoch": 0.9139011609986644, "grad_norm": 0.04561945050954819, "learning_rate": 0.01, "loss": 2.1126, "step": 8895 }, { "epoch": 0.9142093907325594, "grad_norm": 0.0417817123234272, "learning_rate": 0.01, "loss": 2.0692, "step": 8898 }, { "epoch": 0.9145176204664544, "grad_norm": 0.07923369109630585, "learning_rate": 0.01, "loss": 2.1059, "step": 8901 }, { "epoch": 0.9148258502003493, "grad_norm": 0.052836060523986816, "learning_rate": 0.01, "loss": 2.1089, "step": 8904 }, { "epoch": 0.9151340799342443, "grad_norm": 0.04591790586709976, "learning_rate": 0.01, "loss": 2.1007, "step": 8907 }, { "epoch": 0.9154423096681393, "grad_norm": 0.09871240705251694, "learning_rate": 0.01, "loss": 2.0718, "step": 8910 }, { "epoch": 0.9157505394020343, "grad_norm": 0.044554613530635834, "learning_rate": 0.01, "loss": 2.0956, "step": 8913 }, { "epoch": 0.9160587691359293, "grad_norm": 0.10009585320949554, "learning_rate": 0.01, "loss": 2.0838, "step": 8916 }, { "epoch": 0.9163669988698243, "grad_norm": 0.07252159714698792, "learning_rate": 0.01, "loss": 2.0973, "step": 8919 }, { "epoch": 0.9166752286037193, "grad_norm": 0.09162852168083191, "learning_rate": 0.01, "loss": 2.0961, "step": 8922 }, { "epoch": 0.9169834583376143, "grad_norm": 0.06149733439087868, "learning_rate": 0.01, "loss": 2.1377, "step": 8925 }, { "epoch": 0.9172916880715093, "grad_norm": 0.09315814077854156, "learning_rate": 0.01, "loss": 2.0901, "step": 8928 }, { "epoch": 0.9175999178054043, "grad_norm": 0.056877728551626205, "learning_rate": 0.01, "loss": 2.0934, "step": 8931 }, { "epoch": 0.9179081475392993, "grad_norm": 0.0976705476641655, "learning_rate": 0.01, "loss": 2.0791, "step": 8934 }, { "epoch": 0.9182163772731943, "grad_norm": 0.0493176206946373, "learning_rate": 0.01, "loss": 2.0937, "step": 8937 }, { "epoch": 0.9185246070070893, "grad_norm": 0.06268187612295151, "learning_rate": 0.01, "loss": 2.1053, "step": 8940 }, { "epoch": 0.9188328367409843, "grad_norm": 0.049251820892095566, "learning_rate": 0.01, "loss": 2.1104, "step": 8943 }, { "epoch": 0.9191410664748793, "grad_norm": 0.05342431366443634, "learning_rate": 0.01, "loss": 2.1005, "step": 8946 }, { "epoch": 0.9194492962087742, "grad_norm": 0.036090634763240814, "learning_rate": 0.01, "loss": 2.0815, "step": 8949 }, { "epoch": 0.9197575259426692, "grad_norm": 0.0320359505712986, "learning_rate": 0.01, "loss": 2.0704, "step": 8952 }, { "epoch": 0.9200657556765642, "grad_norm": 0.03514352813363075, "learning_rate": 0.01, "loss": 2.1046, "step": 8955 }, { "epoch": 0.9203739854104592, "grad_norm": 0.06132291629910469, "learning_rate": 0.01, "loss": 2.0887, "step": 8958 }, { "epoch": 0.9206822151443542, "grad_norm": 0.07312822341918945, "learning_rate": 0.01, "loss": 2.1079, "step": 8961 }, { "epoch": 0.9209904448782492, "grad_norm": 0.09670150279998779, "learning_rate": 0.01, "loss": 2.1195, "step": 8964 }, { "epoch": 0.9212986746121442, "grad_norm": 0.1106385663151741, "learning_rate": 0.01, "loss": 2.0809, "step": 8967 }, { "epoch": 0.9216069043460392, "grad_norm": 0.05964332073926926, "learning_rate": 0.01, "loss": 2.1108, "step": 8970 }, { "epoch": 0.9219151340799342, "grad_norm": 0.05584556236863136, "learning_rate": 0.01, "loss": 2.1274, "step": 8973 }, { "epoch": 0.9222233638138292, "grad_norm": 0.04485652595758438, "learning_rate": 0.01, "loss": 2.0627, "step": 8976 }, { "epoch": 0.9225315935477242, "grad_norm": 0.07286686450242996, "learning_rate": 0.01, "loss": 2.1087, "step": 8979 }, { "epoch": 0.9228398232816193, "grad_norm": 0.10815869271755219, "learning_rate": 0.01, "loss": 2.1057, "step": 8982 }, { "epoch": 0.9231480530155143, "grad_norm": 0.1037832722067833, "learning_rate": 0.01, "loss": 2.0836, "step": 8985 }, { "epoch": 0.9234562827494093, "grad_norm": 0.08297618478536606, "learning_rate": 0.01, "loss": 2.1181, "step": 8988 }, { "epoch": 0.9237645124833043, "grad_norm": 0.04203306511044502, "learning_rate": 0.01, "loss": 2.1112, "step": 8991 }, { "epoch": 0.9240727422171993, "grad_norm": 0.06641580909490585, "learning_rate": 0.01, "loss": 2.1004, "step": 8994 }, { "epoch": 0.9243809719510943, "grad_norm": 0.04921744763851166, "learning_rate": 0.01, "loss": 2.1116, "step": 8997 }, { "epoch": 0.9246892016849892, "grad_norm": 0.03472235053777695, "learning_rate": 0.01, "loss": 2.0777, "step": 9000 }, { "epoch": 0.9249974314188842, "grad_norm": 0.03650922700762749, "learning_rate": 0.01, "loss": 2.0802, "step": 9003 }, { "epoch": 0.9253056611527792, "grad_norm": 0.04657342657446861, "learning_rate": 0.01, "loss": 2.0773, "step": 9006 }, { "epoch": 0.9256138908866742, "grad_norm": 0.05943501368165016, "learning_rate": 0.01, "loss": 2.0753, "step": 9009 }, { "epoch": 0.9259221206205692, "grad_norm": 0.04763554409146309, "learning_rate": 0.01, "loss": 2.0959, "step": 9012 }, { "epoch": 0.9262303503544642, "grad_norm": 0.1267511248588562, "learning_rate": 0.01, "loss": 2.0971, "step": 9015 }, { "epoch": 0.9265385800883592, "grad_norm": 0.055529460310935974, "learning_rate": 0.01, "loss": 2.1327, "step": 9018 }, { "epoch": 0.9268468098222542, "grad_norm": 0.15508927404880524, "learning_rate": 0.01, "loss": 2.0947, "step": 9021 }, { "epoch": 0.9271550395561492, "grad_norm": 0.0593777671456337, "learning_rate": 0.01, "loss": 2.1171, "step": 9024 }, { "epoch": 0.9274632692900442, "grad_norm": 0.08907107263803482, "learning_rate": 0.01, "loss": 2.093, "step": 9027 }, { "epoch": 0.9277714990239392, "grad_norm": 0.07041808217763901, "learning_rate": 0.01, "loss": 2.0676, "step": 9030 }, { "epoch": 0.9280797287578342, "grad_norm": 0.03434208780527115, "learning_rate": 0.01, "loss": 2.0928, "step": 9033 }, { "epoch": 0.9283879584917292, "grad_norm": 0.07591548562049866, "learning_rate": 0.01, "loss": 2.0857, "step": 9036 }, { "epoch": 0.9286961882256242, "grad_norm": 0.08999443799257278, "learning_rate": 0.01, "loss": 2.0984, "step": 9039 }, { "epoch": 0.9290044179595192, "grad_norm": 0.11046464741230011, "learning_rate": 0.01, "loss": 2.1009, "step": 9042 }, { "epoch": 0.9293126476934142, "grad_norm": 0.08271370083093643, "learning_rate": 0.01, "loss": 2.1027, "step": 9045 }, { "epoch": 0.9296208774273091, "grad_norm": 0.046337299048900604, "learning_rate": 0.01, "loss": 2.0826, "step": 9048 }, { "epoch": 0.9299291071612041, "grad_norm": 0.037284769117832184, "learning_rate": 0.01, "loss": 2.1015, "step": 9051 }, { "epoch": 0.9302373368950991, "grad_norm": 0.04956496134400368, "learning_rate": 0.01, "loss": 2.1036, "step": 9054 }, { "epoch": 0.9305455666289941, "grad_norm": 0.12329571694135666, "learning_rate": 0.01, "loss": 2.0917, "step": 9057 }, { "epoch": 0.9308537963628891, "grad_norm": 0.06971380859613419, "learning_rate": 0.01, "loss": 2.114, "step": 9060 }, { "epoch": 0.9311620260967841, "grad_norm": 0.06084508076310158, "learning_rate": 0.01, "loss": 2.1122, "step": 9063 }, { "epoch": 0.9314702558306791, "grad_norm": 0.049602411687374115, "learning_rate": 0.01, "loss": 2.1268, "step": 9066 }, { "epoch": 0.9317784855645741, "grad_norm": 0.05200349539518356, "learning_rate": 0.01, "loss": 2.0979, "step": 9069 }, { "epoch": 0.9320867152984691, "grad_norm": 0.05793909728527069, "learning_rate": 0.01, "loss": 2.096, "step": 9072 }, { "epoch": 0.9323949450323641, "grad_norm": 0.10819883644580841, "learning_rate": 0.01, "loss": 2.1096, "step": 9075 }, { "epoch": 0.9327031747662591, "grad_norm": 0.07809442281723022, "learning_rate": 0.01, "loss": 2.0968, "step": 9078 }, { "epoch": 0.9330114045001541, "grad_norm": 0.09595733880996704, "learning_rate": 0.01, "loss": 2.0769, "step": 9081 }, { "epoch": 0.9333196342340491, "grad_norm": 0.11658616364002228, "learning_rate": 0.01, "loss": 2.0945, "step": 9084 }, { "epoch": 0.9336278639679441, "grad_norm": 0.07642678171396255, "learning_rate": 0.01, "loss": 2.0811, "step": 9087 }, { "epoch": 0.933936093701839, "grad_norm": 0.03174865245819092, "learning_rate": 0.01, "loss": 2.1017, "step": 9090 }, { "epoch": 0.934244323435734, "grad_norm": 0.05137626454234123, "learning_rate": 0.01, "loss": 2.0878, "step": 9093 }, { "epoch": 0.9345525531696292, "grad_norm": 0.05306951329112053, "learning_rate": 0.01, "loss": 2.1163, "step": 9096 }, { "epoch": 0.9348607829035241, "grad_norm": 0.0716642439365387, "learning_rate": 0.01, "loss": 2.0903, "step": 9099 }, { "epoch": 0.9351690126374191, "grad_norm": 0.10328514873981476, "learning_rate": 0.01, "loss": 2.0789, "step": 9102 }, { "epoch": 0.9354772423713141, "grad_norm": 0.04914560168981552, "learning_rate": 0.01, "loss": 2.0963, "step": 9105 }, { "epoch": 0.9357854721052091, "grad_norm": 0.04810576140880585, "learning_rate": 0.01, "loss": 2.1119, "step": 9108 }, { "epoch": 0.9360937018391041, "grad_norm": 0.05689787119626999, "learning_rate": 0.01, "loss": 2.0955, "step": 9111 }, { "epoch": 0.9364019315729991, "grad_norm": 0.06455382704734802, "learning_rate": 0.01, "loss": 2.0894, "step": 9114 }, { "epoch": 0.9367101613068941, "grad_norm": 0.044911760836839676, "learning_rate": 0.01, "loss": 2.0967, "step": 9117 }, { "epoch": 0.9370183910407891, "grad_norm": 0.06244887784123421, "learning_rate": 0.01, "loss": 2.0921, "step": 9120 }, { "epoch": 0.9373266207746841, "grad_norm": 0.052621614187955856, "learning_rate": 0.01, "loss": 2.1296, "step": 9123 }, { "epoch": 0.9376348505085791, "grad_norm": 0.05098232626914978, "learning_rate": 0.01, "loss": 2.0807, "step": 9126 }, { "epoch": 0.9379430802424741, "grad_norm": 0.058582011610269547, "learning_rate": 0.01, "loss": 2.0973, "step": 9129 }, { "epoch": 0.9382513099763691, "grad_norm": 0.10984500497579575, "learning_rate": 0.01, "loss": 2.0789, "step": 9132 }, { "epoch": 0.9385595397102641, "grad_norm": 0.045173123478889465, "learning_rate": 0.01, "loss": 2.0937, "step": 9135 }, { "epoch": 0.9388677694441591, "grad_norm": 0.06749478727579117, "learning_rate": 0.01, "loss": 2.1051, "step": 9138 }, { "epoch": 0.939175999178054, "grad_norm": 0.06236808001995087, "learning_rate": 0.01, "loss": 2.1099, "step": 9141 }, { "epoch": 0.939484228911949, "grad_norm": 0.06205837428569794, "learning_rate": 0.01, "loss": 2.0893, "step": 9144 }, { "epoch": 0.939792458645844, "grad_norm": 0.0742972195148468, "learning_rate": 0.01, "loss": 2.1034, "step": 9147 }, { "epoch": 0.940100688379739, "grad_norm": 0.06998419016599655, "learning_rate": 0.01, "loss": 2.0558, "step": 9150 }, { "epoch": 0.940408918113634, "grad_norm": 0.04214362055063248, "learning_rate": 0.01, "loss": 2.0968, "step": 9153 }, { "epoch": 0.940717147847529, "grad_norm": 0.055913276970386505, "learning_rate": 0.01, "loss": 2.0736, "step": 9156 }, { "epoch": 0.941025377581424, "grad_norm": 0.0941486805677414, "learning_rate": 0.01, "loss": 2.1038, "step": 9159 }, { "epoch": 0.941333607315319, "grad_norm": 0.05609782040119171, "learning_rate": 0.01, "loss": 2.096, "step": 9162 }, { "epoch": 0.941641837049214, "grad_norm": 0.05714662745594978, "learning_rate": 0.01, "loss": 2.0939, "step": 9165 }, { "epoch": 0.941950066783109, "grad_norm": 0.05364496633410454, "learning_rate": 0.01, "loss": 2.0838, "step": 9168 }, { "epoch": 0.942258296517004, "grad_norm": 0.050090350210666656, "learning_rate": 0.01, "loss": 2.087, "step": 9171 }, { "epoch": 0.942566526250899, "grad_norm": 0.07287559658288956, "learning_rate": 0.01, "loss": 2.098, "step": 9174 }, { "epoch": 0.942874755984794, "grad_norm": 0.04061901941895485, "learning_rate": 0.01, "loss": 2.0677, "step": 9177 }, { "epoch": 0.943182985718689, "grad_norm": 0.10750306397676468, "learning_rate": 0.01, "loss": 2.1105, "step": 9180 }, { "epoch": 0.943491215452584, "grad_norm": 0.10353365540504456, "learning_rate": 0.01, "loss": 2.0712, "step": 9183 }, { "epoch": 0.943799445186479, "grad_norm": 0.07502592355012894, "learning_rate": 0.01, "loss": 2.1115, "step": 9186 }, { "epoch": 0.944107674920374, "grad_norm": 0.046962104737758636, "learning_rate": 0.01, "loss": 2.0937, "step": 9189 }, { "epoch": 0.944415904654269, "grad_norm": 0.05084332078695297, "learning_rate": 0.01, "loss": 2.0943, "step": 9192 }, { "epoch": 0.9447241343881639, "grad_norm": 0.0458371527493, "learning_rate": 0.01, "loss": 2.0967, "step": 9195 }, { "epoch": 0.9450323641220589, "grad_norm": 0.040458545088768005, "learning_rate": 0.01, "loss": 2.0949, "step": 9198 }, { "epoch": 0.9453405938559539, "grad_norm": 0.046158358454704285, "learning_rate": 0.01, "loss": 2.0912, "step": 9201 }, { "epoch": 0.9456488235898489, "grad_norm": 0.10080043226480484, "learning_rate": 0.01, "loss": 2.1, "step": 9204 }, { "epoch": 0.9459570533237439, "grad_norm": 0.07679333537817001, "learning_rate": 0.01, "loss": 2.1013, "step": 9207 }, { "epoch": 0.9462652830576389, "grad_norm": 0.07189175486564636, "learning_rate": 0.01, "loss": 2.1008, "step": 9210 }, { "epoch": 0.946573512791534, "grad_norm": 0.07828579097986221, "learning_rate": 0.01, "loss": 2.1063, "step": 9213 }, { "epoch": 0.946881742525429, "grad_norm": 0.07649674266576767, "learning_rate": 0.01, "loss": 2.1146, "step": 9216 }, { "epoch": 0.947189972259324, "grad_norm": 0.06558651477098465, "learning_rate": 0.01, "loss": 2.0705, "step": 9219 }, { "epoch": 0.947498201993219, "grad_norm": 0.03276702016592026, "learning_rate": 0.01, "loss": 2.1065, "step": 9222 }, { "epoch": 0.947806431727114, "grad_norm": 0.03779645636677742, "learning_rate": 0.01, "loss": 2.0924, "step": 9225 }, { "epoch": 0.948114661461009, "grad_norm": 0.048466913402080536, "learning_rate": 0.01, "loss": 2.1037, "step": 9228 }, { "epoch": 0.948422891194904, "grad_norm": 0.04391203075647354, "learning_rate": 0.01, "loss": 2.0722, "step": 9231 }, { "epoch": 0.948731120928799, "grad_norm": 0.11353743076324463, "learning_rate": 0.01, "loss": 2.113, "step": 9234 }, { "epoch": 0.949039350662694, "grad_norm": 0.045930709689855576, "learning_rate": 0.01, "loss": 2.0699, "step": 9237 }, { "epoch": 0.949347580396589, "grad_norm": 0.06440164893865585, "learning_rate": 0.01, "loss": 2.0786, "step": 9240 }, { "epoch": 0.949655810130484, "grad_norm": 0.08666238933801651, "learning_rate": 0.01, "loss": 2.1049, "step": 9243 }, { "epoch": 0.9499640398643789, "grad_norm": 0.11012524366378784, "learning_rate": 0.01, "loss": 2.1018, "step": 9246 }, { "epoch": 0.9502722695982739, "grad_norm": 0.047307875007390976, "learning_rate": 0.01, "loss": 2.0943, "step": 9249 }, { "epoch": 0.9505804993321689, "grad_norm": 0.04565277695655823, "learning_rate": 0.01, "loss": 2.1174, "step": 9252 }, { "epoch": 0.9508887290660639, "grad_norm": 0.03389623388648033, "learning_rate": 0.01, "loss": 2.0896, "step": 9255 }, { "epoch": 0.9511969587999589, "grad_norm": 0.04582008346915245, "learning_rate": 0.01, "loss": 2.0888, "step": 9258 }, { "epoch": 0.9515051885338539, "grad_norm": 0.07722247391939163, "learning_rate": 0.01, "loss": 2.0843, "step": 9261 }, { "epoch": 0.9518134182677489, "grad_norm": 0.03505149856209755, "learning_rate": 0.01, "loss": 2.0903, "step": 9264 }, { "epoch": 0.9521216480016439, "grad_norm": 0.08010539412498474, "learning_rate": 0.01, "loss": 2.1249, "step": 9267 }, { "epoch": 0.9524298777355389, "grad_norm": 0.0723007321357727, "learning_rate": 0.01, "loss": 2.0951, "step": 9270 }, { "epoch": 0.9527381074694339, "grad_norm": 0.05629736930131912, "learning_rate": 0.01, "loss": 2.0948, "step": 9273 }, { "epoch": 0.9530463372033289, "grad_norm": 0.05514506623148918, "learning_rate": 0.01, "loss": 2.1214, "step": 9276 }, { "epoch": 0.9533545669372239, "grad_norm": 0.1107834130525589, "learning_rate": 0.01, "loss": 2.0876, "step": 9279 }, { "epoch": 0.9536627966711189, "grad_norm": 0.046309590339660645, "learning_rate": 0.01, "loss": 2.0669, "step": 9282 }, { "epoch": 0.9539710264050139, "grad_norm": 0.06956466287374496, "learning_rate": 0.01, "loss": 2.0903, "step": 9285 }, { "epoch": 0.9542792561389088, "grad_norm": 0.086011603474617, "learning_rate": 0.01, "loss": 2.0896, "step": 9288 }, { "epoch": 0.9545874858728038, "grad_norm": 0.04768074303865433, "learning_rate": 0.01, "loss": 2.0923, "step": 9291 }, { "epoch": 0.9548957156066988, "grad_norm": 0.0958017110824585, "learning_rate": 0.01, "loss": 2.1134, "step": 9294 }, { "epoch": 0.9552039453405938, "grad_norm": 0.06098558008670807, "learning_rate": 0.01, "loss": 2.0775, "step": 9297 }, { "epoch": 0.9555121750744888, "grad_norm": 0.05258086323738098, "learning_rate": 0.01, "loss": 2.0998, "step": 9300 }, { "epoch": 0.9558204048083838, "grad_norm": 0.06664231419563293, "learning_rate": 0.01, "loss": 2.1215, "step": 9303 }, { "epoch": 0.9561286345422788, "grad_norm": 0.05491488054394722, "learning_rate": 0.01, "loss": 2.0837, "step": 9306 }, { "epoch": 0.9564368642761738, "grad_norm": 0.0436725877225399, "learning_rate": 0.01, "loss": 2.1268, "step": 9309 }, { "epoch": 0.9567450940100688, "grad_norm": 0.08737560361623764, "learning_rate": 0.01, "loss": 2.0901, "step": 9312 }, { "epoch": 0.9570533237439638, "grad_norm": 0.08130110800266266, "learning_rate": 0.01, "loss": 2.0766, "step": 9315 }, { "epoch": 0.9573615534778588, "grad_norm": 0.07826768606901169, "learning_rate": 0.01, "loss": 2.0836, "step": 9318 }, { "epoch": 0.9576697832117538, "grad_norm": 0.09330857545137405, "learning_rate": 0.01, "loss": 2.0794, "step": 9321 }, { "epoch": 0.9579780129456488, "grad_norm": 0.03914652019739151, "learning_rate": 0.01, "loss": 2.102, "step": 9324 }, { "epoch": 0.9582862426795438, "grad_norm": 0.03853154182434082, "learning_rate": 0.01, "loss": 2.0915, "step": 9327 }, { "epoch": 0.9585944724134389, "grad_norm": 0.07349935919046402, "learning_rate": 0.01, "loss": 2.0856, "step": 9330 }, { "epoch": 0.9589027021473339, "grad_norm": 0.1473885178565979, "learning_rate": 0.01, "loss": 2.0904, "step": 9333 }, { "epoch": 0.9592109318812289, "grad_norm": 0.11091527342796326, "learning_rate": 0.01, "loss": 2.0934, "step": 9336 }, { "epoch": 0.9595191616151239, "grad_norm": 0.0400085523724556, "learning_rate": 0.01, "loss": 2.0924, "step": 9339 }, { "epoch": 0.9598273913490188, "grad_norm": 0.05025499314069748, "learning_rate": 0.01, "loss": 2.0845, "step": 9342 }, { "epoch": 0.9601356210829138, "grad_norm": 0.03745681792497635, "learning_rate": 0.01, "loss": 2.1006, "step": 9345 }, { "epoch": 0.9604438508168088, "grad_norm": 0.05147318169474602, "learning_rate": 0.01, "loss": 2.0912, "step": 9348 }, { "epoch": 0.9607520805507038, "grad_norm": 0.06338364630937576, "learning_rate": 0.01, "loss": 2.1169, "step": 9351 }, { "epoch": 0.9610603102845988, "grad_norm": 0.09458258748054504, "learning_rate": 0.01, "loss": 2.1005, "step": 9354 }, { "epoch": 0.9613685400184938, "grad_norm": 0.09883291274309158, "learning_rate": 0.01, "loss": 2.0934, "step": 9357 }, { "epoch": 0.9616767697523888, "grad_norm": 0.048908524215221405, "learning_rate": 0.01, "loss": 2.0863, "step": 9360 }, { "epoch": 0.9619849994862838, "grad_norm": 0.11762084811925888, "learning_rate": 0.01, "loss": 2.1182, "step": 9363 }, { "epoch": 0.9622932292201788, "grad_norm": 0.0835133045911789, "learning_rate": 0.01, "loss": 2.0728, "step": 9366 }, { "epoch": 0.9626014589540738, "grad_norm": 0.0580466203391552, "learning_rate": 0.01, "loss": 2.0756, "step": 9369 }, { "epoch": 0.9629096886879688, "grad_norm": 0.051043394953012466, "learning_rate": 0.01, "loss": 2.0936, "step": 9372 }, { "epoch": 0.9632179184218638, "grad_norm": 0.1081843450665474, "learning_rate": 0.01, "loss": 2.107, "step": 9375 }, { "epoch": 0.9635261481557588, "grad_norm": 0.04656577482819557, "learning_rate": 0.01, "loss": 2.1084, "step": 9378 }, { "epoch": 0.9638343778896538, "grad_norm": 0.03988798335194588, "learning_rate": 0.01, "loss": 2.1015, "step": 9381 }, { "epoch": 0.9641426076235488, "grad_norm": 0.07686126232147217, "learning_rate": 0.01, "loss": 2.1417, "step": 9384 }, { "epoch": 0.9644508373574437, "grad_norm": 0.057407401502132416, "learning_rate": 0.01, "loss": 2.1191, "step": 9387 }, { "epoch": 0.9647590670913387, "grad_norm": 0.0947386845946312, "learning_rate": 0.01, "loss": 2.0796, "step": 9390 }, { "epoch": 0.9650672968252337, "grad_norm": 0.05064699798822403, "learning_rate": 0.01, "loss": 2.1001, "step": 9393 }, { "epoch": 0.9653755265591287, "grad_norm": 0.04948986694216728, "learning_rate": 0.01, "loss": 2.0736, "step": 9396 }, { "epoch": 0.9656837562930237, "grad_norm": 0.10736438632011414, "learning_rate": 0.01, "loss": 2.0939, "step": 9399 }, { "epoch": 0.9659919860269187, "grad_norm": 0.039317477494478226, "learning_rate": 0.01, "loss": 2.1077, "step": 9402 }, { "epoch": 0.9663002157608137, "grad_norm": 0.06933067739009857, "learning_rate": 0.01, "loss": 2.1056, "step": 9405 }, { "epoch": 0.9666084454947087, "grad_norm": 0.03649623692035675, "learning_rate": 0.01, "loss": 2.0838, "step": 9408 }, { "epoch": 0.9669166752286037, "grad_norm": 0.09309684485197067, "learning_rate": 0.01, "loss": 2.0913, "step": 9411 }, { "epoch": 0.9672249049624987, "grad_norm": 0.11532922834157944, "learning_rate": 0.01, "loss": 2.1127, "step": 9414 }, { "epoch": 0.9675331346963937, "grad_norm": 0.053582970052957535, "learning_rate": 0.01, "loss": 2.0812, "step": 9417 }, { "epoch": 0.9678413644302887, "grad_norm": 0.07581201195716858, "learning_rate": 0.01, "loss": 2.1148, "step": 9420 }, { "epoch": 0.9681495941641837, "grad_norm": 0.051002178341150284, "learning_rate": 0.01, "loss": 2.0834, "step": 9423 }, { "epoch": 0.9684578238980787, "grad_norm": 0.06385383754968643, "learning_rate": 0.01, "loss": 2.0826, "step": 9426 }, { "epoch": 0.9687660536319737, "grad_norm": 0.10576994717121124, "learning_rate": 0.01, "loss": 2.0768, "step": 9429 }, { "epoch": 0.9690742833658686, "grad_norm": 0.054983410984277725, "learning_rate": 0.01, "loss": 2.0604, "step": 9432 }, { "epoch": 0.9693825130997636, "grad_norm": 0.09159716218709946, "learning_rate": 0.01, "loss": 2.0613, "step": 9435 }, { "epoch": 0.9696907428336586, "grad_norm": 0.07718406617641449, "learning_rate": 0.01, "loss": 2.1132, "step": 9438 }, { "epoch": 0.9699989725675536, "grad_norm": 0.0788009986281395, "learning_rate": 0.01, "loss": 2.0887, "step": 9441 }, { "epoch": 0.9703072023014487, "grad_norm": 0.040717653930187225, "learning_rate": 0.01, "loss": 2.084, "step": 9444 }, { "epoch": 0.9706154320353437, "grad_norm": 0.09677381813526154, "learning_rate": 0.01, "loss": 2.0903, "step": 9447 }, { "epoch": 0.9709236617692387, "grad_norm": 0.0706525593996048, "learning_rate": 0.01, "loss": 2.0647, "step": 9450 }, { "epoch": 0.9712318915031337, "grad_norm": 0.04624510183930397, "learning_rate": 0.01, "loss": 2.0818, "step": 9453 }, { "epoch": 0.9715401212370287, "grad_norm": 0.04585500434041023, "learning_rate": 0.01, "loss": 2.0927, "step": 9456 }, { "epoch": 0.9718483509709237, "grad_norm": 0.03468145430088043, "learning_rate": 0.01, "loss": 2.0759, "step": 9459 }, { "epoch": 0.9721565807048187, "grad_norm": 0.06956649571657181, "learning_rate": 0.01, "loss": 2.092, "step": 9462 }, { "epoch": 0.9724648104387137, "grad_norm": 0.04509080946445465, "learning_rate": 0.01, "loss": 2.1095, "step": 9465 }, { "epoch": 0.9727730401726087, "grad_norm": 0.09959586709737778, "learning_rate": 0.01, "loss": 2.0921, "step": 9468 }, { "epoch": 0.9730812699065037, "grad_norm": 0.08427727967500687, "learning_rate": 0.01, "loss": 2.1031, "step": 9471 }, { "epoch": 0.9733894996403987, "grad_norm": 0.14798741042613983, "learning_rate": 0.01, "loss": 2.091, "step": 9474 }, { "epoch": 0.9736977293742937, "grad_norm": 0.057735662907361984, "learning_rate": 0.01, "loss": 2.0701, "step": 9477 }, { "epoch": 0.9740059591081887, "grad_norm": 0.04484837129712105, "learning_rate": 0.01, "loss": 2.1015, "step": 9480 }, { "epoch": 0.9743141888420837, "grad_norm": 0.04166285693645477, "learning_rate": 0.01, "loss": 2.1021, "step": 9483 }, { "epoch": 0.9746224185759786, "grad_norm": 0.05640358105301857, "learning_rate": 0.01, "loss": 2.0925, "step": 9486 }, { "epoch": 0.9749306483098736, "grad_norm": 0.040314216166734695, "learning_rate": 0.01, "loss": 2.0797, "step": 9489 }, { "epoch": 0.9752388780437686, "grad_norm": 0.04522860422730446, "learning_rate": 0.01, "loss": 2.0935, "step": 9492 }, { "epoch": 0.9755471077776636, "grad_norm": 0.03492886200547218, "learning_rate": 0.01, "loss": 2.0968, "step": 9495 }, { "epoch": 0.9758553375115586, "grad_norm": 0.03252703323960304, "learning_rate": 0.01, "loss": 2.125, "step": 9498 }, { "epoch": 0.9761635672454536, "grad_norm": 0.04002056270837784, "learning_rate": 0.01, "loss": 2.0651, "step": 9501 }, { "epoch": 0.9764717969793486, "grad_norm": 0.07364718616008759, "learning_rate": 0.01, "loss": 2.0629, "step": 9504 }, { "epoch": 0.9767800267132436, "grad_norm": 0.05577448755502701, "learning_rate": 0.01, "loss": 2.0726, "step": 9507 }, { "epoch": 0.9770882564471386, "grad_norm": 0.13259132206439972, "learning_rate": 0.01, "loss": 2.1075, "step": 9510 }, { "epoch": 0.9773964861810336, "grad_norm": 0.06911557912826538, "learning_rate": 0.01, "loss": 2.0887, "step": 9513 }, { "epoch": 0.9777047159149286, "grad_norm": 0.10592345148324966, "learning_rate": 0.01, "loss": 2.0982, "step": 9516 }, { "epoch": 0.9780129456488236, "grad_norm": 0.05682144686579704, "learning_rate": 0.01, "loss": 2.0961, "step": 9519 }, { "epoch": 0.9783211753827186, "grad_norm": 0.07456633448600769, "learning_rate": 0.01, "loss": 2.0983, "step": 9522 }, { "epoch": 0.9786294051166136, "grad_norm": 0.062031425535678864, "learning_rate": 0.01, "loss": 2.1163, "step": 9525 }, { "epoch": 0.9789376348505086, "grad_norm": 0.0570233091711998, "learning_rate": 0.01, "loss": 2.1046, "step": 9528 }, { "epoch": 0.9792458645844035, "grad_norm": 0.04668619483709335, "learning_rate": 0.01, "loss": 2.081, "step": 9531 }, { "epoch": 0.9795540943182985, "grad_norm": 0.04718153178691864, "learning_rate": 0.01, "loss": 2.0678, "step": 9534 }, { "epoch": 0.9798623240521935, "grad_norm": 0.054066251963377, "learning_rate": 0.01, "loss": 2.0911, "step": 9537 }, { "epoch": 0.9801705537860885, "grad_norm": 0.1274210512638092, "learning_rate": 0.01, "loss": 2.097, "step": 9540 }, { "epoch": 0.9804787835199835, "grad_norm": 0.07543773949146271, "learning_rate": 0.01, "loss": 2.0824, "step": 9543 }, { "epoch": 0.9807870132538785, "grad_norm": 0.07845018804073334, "learning_rate": 0.01, "loss": 2.0749, "step": 9546 }, { "epoch": 0.9810952429877735, "grad_norm": 0.08444254100322723, "learning_rate": 0.01, "loss": 2.1019, "step": 9549 }, { "epoch": 0.9814034727216685, "grad_norm": 0.07719142735004425, "learning_rate": 0.01, "loss": 2.0811, "step": 9552 }, { "epoch": 0.9817117024555635, "grad_norm": 0.05624673515558243, "learning_rate": 0.01, "loss": 2.0752, "step": 9555 }, { "epoch": 0.9820199321894585, "grad_norm": 0.0419309176504612, "learning_rate": 0.01, "loss": 2.0812, "step": 9558 }, { "epoch": 0.9823281619233536, "grad_norm": 0.0343257375061512, "learning_rate": 0.01, "loss": 2.0694, "step": 9561 }, { "epoch": 0.9826363916572486, "grad_norm": 0.059452395886182785, "learning_rate": 0.01, "loss": 2.0521, "step": 9564 }, { "epoch": 0.9829446213911436, "grad_norm": 0.09073518216609955, "learning_rate": 0.01, "loss": 2.0636, "step": 9567 }, { "epoch": 0.9832528511250386, "grad_norm": 0.10660509765148163, "learning_rate": 0.01, "loss": 2.0796, "step": 9570 }, { "epoch": 0.9835610808589336, "grad_norm": 0.04380667209625244, "learning_rate": 0.01, "loss": 2.0992, "step": 9573 }, { "epoch": 0.9838693105928286, "grad_norm": 0.06383811682462692, "learning_rate": 0.01, "loss": 2.0722, "step": 9576 }, { "epoch": 0.9841775403267236, "grad_norm": 0.07926032692193985, "learning_rate": 0.01, "loss": 2.0571, "step": 9579 }, { "epoch": 0.9844857700606185, "grad_norm": 0.05310386046767235, "learning_rate": 0.01, "loss": 2.0739, "step": 9582 }, { "epoch": 0.9847939997945135, "grad_norm": 0.03591843321919441, "learning_rate": 0.01, "loss": 2.0757, "step": 9585 }, { "epoch": 0.9851022295284085, "grad_norm": 0.04773431271314621, "learning_rate": 0.01, "loss": 2.0525, "step": 9588 }, { "epoch": 0.9854104592623035, "grad_norm": 0.04679710045456886, "learning_rate": 0.01, "loss": 2.0771, "step": 9591 }, { "epoch": 0.9857186889961985, "grad_norm": 0.05671774223446846, "learning_rate": 0.01, "loss": 2.1106, "step": 9594 }, { "epoch": 0.9860269187300935, "grad_norm": 0.049488577991724014, "learning_rate": 0.01, "loss": 2.0695, "step": 9597 }, { "epoch": 0.9863351484639885, "grad_norm": 0.04207129031419754, "learning_rate": 0.01, "loss": 2.0903, "step": 9600 }, { "epoch": 0.9866433781978835, "grad_norm": 0.10019747167825699, "learning_rate": 0.01, "loss": 2.073, "step": 9603 }, { "epoch": 0.9869516079317785, "grad_norm": 0.051381729543209076, "learning_rate": 0.01, "loss": 2.0626, "step": 9606 }, { "epoch": 0.9872598376656735, "grad_norm": 0.13477744162082672, "learning_rate": 0.01, "loss": 2.1098, "step": 9609 }, { "epoch": 0.9875680673995685, "grad_norm": 0.09002148360013962, "learning_rate": 0.01, "loss": 2.0927, "step": 9612 }, { "epoch": 0.9878762971334635, "grad_norm": 0.05230112001299858, "learning_rate": 0.01, "loss": 2.0902, "step": 9615 }, { "epoch": 0.9881845268673585, "grad_norm": 0.0639885738492012, "learning_rate": 0.01, "loss": 2.1179, "step": 9618 }, { "epoch": 0.9884927566012535, "grad_norm": 0.0553070530295372, "learning_rate": 0.01, "loss": 2.0923, "step": 9621 }, { "epoch": 0.9888009863351485, "grad_norm": 0.04541468620300293, "learning_rate": 0.01, "loss": 2.0965, "step": 9624 }, { "epoch": 0.9891092160690435, "grad_norm": 0.08656930178403854, "learning_rate": 0.01, "loss": 2.1038, "step": 9627 }, { "epoch": 0.9894174458029384, "grad_norm": 0.04954921826720238, "learning_rate": 0.01, "loss": 2.0759, "step": 9630 }, { "epoch": 0.9897256755368334, "grad_norm": 0.07971720397472382, "learning_rate": 0.01, "loss": 2.0837, "step": 9633 }, { "epoch": 0.9900339052707284, "grad_norm": 0.12388944625854492, "learning_rate": 0.01, "loss": 2.1181, "step": 9636 }, { "epoch": 0.9903421350046234, "grad_norm": 0.040693242102861404, "learning_rate": 0.01, "loss": 2.0806, "step": 9639 }, { "epoch": 0.9906503647385184, "grad_norm": 0.032711997628211975, "learning_rate": 0.01, "loss": 2.0925, "step": 9642 }, { "epoch": 0.9909585944724134, "grad_norm": 0.04089382663369179, "learning_rate": 0.01, "loss": 2.0841, "step": 9645 }, { "epoch": 0.9912668242063084, "grad_norm": 0.05480481684207916, "learning_rate": 0.01, "loss": 2.0769, "step": 9648 }, { "epoch": 0.9915750539402034, "grad_norm": 0.04627472907304764, "learning_rate": 0.01, "loss": 2.094, "step": 9651 }, { "epoch": 0.9918832836740984, "grad_norm": 0.0517272874712944, "learning_rate": 0.01, "loss": 2.1181, "step": 9654 }, { "epoch": 0.9921915134079934, "grad_norm": 0.051012761890888214, "learning_rate": 0.01, "loss": 2.0985, "step": 9657 }, { "epoch": 0.9924997431418884, "grad_norm": 0.08666348457336426, "learning_rate": 0.01, "loss": 2.0875, "step": 9660 }, { "epoch": 0.9928079728757834, "grad_norm": 0.0972173810005188, "learning_rate": 0.01, "loss": 2.0995, "step": 9663 }, { "epoch": 0.9931162026096784, "grad_norm": 0.0765865370631218, "learning_rate": 0.01, "loss": 2.0729, "step": 9666 }, { "epoch": 0.9934244323435734, "grad_norm": 0.04532674700021744, "learning_rate": 0.01, "loss": 2.0656, "step": 9669 }, { "epoch": 0.9937326620774684, "grad_norm": 0.08642619848251343, "learning_rate": 0.01, "loss": 2.1036, "step": 9672 }, { "epoch": 0.9940408918113633, "grad_norm": 0.04758689925074577, "learning_rate": 0.01, "loss": 2.0683, "step": 9675 }, { "epoch": 0.9943491215452585, "grad_norm": 0.07701463252305984, "learning_rate": 0.01, "loss": 2.0898, "step": 9678 }, { "epoch": 0.9946573512791534, "grad_norm": 0.05999990925192833, "learning_rate": 0.01, "loss": 2.0694, "step": 9681 }, { "epoch": 0.9949655810130484, "grad_norm": 0.08793257176876068, "learning_rate": 0.01, "loss": 2.0689, "step": 9684 }, { "epoch": 0.9952738107469434, "grad_norm": 0.06139199063181877, "learning_rate": 0.01, "loss": 2.0801, "step": 9687 }, { "epoch": 0.9955820404808384, "grad_norm": 0.09202239662408829, "learning_rate": 0.01, "loss": 2.0837, "step": 9690 }, { "epoch": 0.9958902702147334, "grad_norm": 0.09284163266420364, "learning_rate": 0.01, "loss": 2.107, "step": 9693 }, { "epoch": 0.9961984999486284, "grad_norm": 0.08113729953765869, "learning_rate": 0.01, "loss": 2.076, "step": 9696 }, { "epoch": 0.9965067296825234, "grad_norm": 0.10663104802370071, "learning_rate": 0.01, "loss": 2.0973, "step": 9699 }, { "epoch": 0.9968149594164184, "grad_norm": 0.11791951954364777, "learning_rate": 0.01, "loss": 2.0885, "step": 9702 }, { "epoch": 0.9971231891503134, "grad_norm": 0.09039194136857986, "learning_rate": 0.01, "loss": 2.0957, "step": 9705 }, { "epoch": 0.9974314188842084, "grad_norm": 0.08142858743667603, "learning_rate": 0.01, "loss": 2.0721, "step": 9708 }, { "epoch": 0.9977396486181034, "grad_norm": 0.07347192615270615, "learning_rate": 0.01, "loss": 2.0985, "step": 9711 }, { "epoch": 0.9980478783519984, "grad_norm": 0.04449746012687683, "learning_rate": 0.01, "loss": 2.0728, "step": 9714 }, { "epoch": 0.9983561080858934, "grad_norm": 0.040178634226322174, "learning_rate": 0.01, "loss": 2.0773, "step": 9717 }, { "epoch": 0.9986643378197884, "grad_norm": 0.0577414333820343, "learning_rate": 0.01, "loss": 2.0854, "step": 9720 }, { "epoch": 0.9989725675536834, "grad_norm": 0.07444582879543304, "learning_rate": 0.01, "loss": 2.0834, "step": 9723 }, { "epoch": 0.9992807972875783, "grad_norm": 0.10387948155403137, "learning_rate": 0.01, "loss": 2.0698, "step": 9726 }, { "epoch": 0.9995890270214733, "grad_norm": 0.11066528409719467, "learning_rate": 0.01, "loss": 2.1035, "step": 9729 }, { "epoch": 0.9998972567553683, "grad_norm": 0.06454616039991379, "learning_rate": 0.01, "loss": 2.0692, "step": 9732 }, { "epoch": 0.9990763546798029, "grad_norm": 0.048325520008802414, "learning_rate": 0.01, "loss": 2.1225, "step": 9735 }, { "epoch": 0.999384236453202, "grad_norm": 0.03542228788137436, "learning_rate": 0.01, "loss": 2.1024, "step": 9738 }, { "epoch": 0.999692118226601, "grad_norm": 0.042020559310913086, "learning_rate": 0.01, "loss": 2.0968, "step": 9741 }, { "epoch": 1.0, "grad_norm": 0.04916913062334061, "learning_rate": 0.01, "loss": 2.1244, "step": 9744 }, { "epoch": 1.000307881773399, "grad_norm": 0.08905553072690964, "learning_rate": 0.01, "loss": 2.0867, "step": 9747 }, { "epoch": 1.000615763546798, "grad_norm": 0.07140953093767166, "learning_rate": 0.01, "loss": 2.0863, "step": 9750 }, { "epoch": 1.000923645320197, "grad_norm": 0.05284767597913742, "learning_rate": 0.01, "loss": 2.1131, "step": 9753 }, { "epoch": 1.001231527093596, "grad_norm": 0.1293289214372635, "learning_rate": 0.01, "loss": 2.1036, "step": 9756 }, { "epoch": 1.001539408866995, "grad_norm": 0.06052086502313614, "learning_rate": 0.01, "loss": 2.1189, "step": 9759 }, { "epoch": 1.0018472906403941, "grad_norm": 0.07361391931772232, "learning_rate": 0.01, "loss": 2.0962, "step": 9762 }, { "epoch": 1.0021551724137931, "grad_norm": 0.06513562798500061, "learning_rate": 0.01, "loss": 2.129, "step": 9765 }, { "epoch": 1.0024630541871922, "grad_norm": 0.036649156361818314, "learning_rate": 0.01, "loss": 2.0964, "step": 9768 }, { "epoch": 1.0027709359605912, "grad_norm": 0.05371764674782753, "learning_rate": 0.01, "loss": 2.0976, "step": 9771 }, { "epoch": 1.0030788177339902, "grad_norm": 0.06316730380058289, "learning_rate": 0.01, "loss": 2.097, "step": 9774 }, { "epoch": 1.0033866995073892, "grad_norm": 0.03097986802458763, "learning_rate": 0.01, "loss": 2.1128, "step": 9777 }, { "epoch": 1.0036945812807883, "grad_norm": 0.046021945774555206, "learning_rate": 0.01, "loss": 2.1296, "step": 9780 }, { "epoch": 1.0040024630541873, "grad_norm": 0.06580191850662231, "learning_rate": 0.01, "loss": 2.1106, "step": 9783 }, { "epoch": 1.0043103448275863, "grad_norm": 0.054073531180620193, "learning_rate": 0.01, "loss": 2.0986, "step": 9786 }, { "epoch": 1.0046182266009853, "grad_norm": 0.10088641196489334, "learning_rate": 0.01, "loss": 2.1301, "step": 9789 }, { "epoch": 1.0049261083743843, "grad_norm": 0.03944807127118111, "learning_rate": 0.01, "loss": 2.1337, "step": 9792 }, { "epoch": 1.0052339901477831, "grad_norm": 0.07183028757572174, "learning_rate": 0.01, "loss": 2.1272, "step": 9795 }, { "epoch": 1.0055418719211822, "grad_norm": 0.13821956515312195, "learning_rate": 0.01, "loss": 2.1016, "step": 9798 }, { "epoch": 1.0058497536945812, "grad_norm": 0.14031893014907837, "learning_rate": 0.01, "loss": 2.0924, "step": 9801 }, { "epoch": 1.0061576354679802, "grad_norm": 0.06494525820016861, "learning_rate": 0.01, "loss": 2.128, "step": 9804 }, { "epoch": 1.0064655172413792, "grad_norm": 0.05946667864918709, "learning_rate": 0.01, "loss": 2.1335, "step": 9807 }, { "epoch": 1.0067733990147782, "grad_norm": 0.05583272874355316, "learning_rate": 0.01, "loss": 2.1186, "step": 9810 }, { "epoch": 1.0070812807881773, "grad_norm": 0.06858284026384354, "learning_rate": 0.01, "loss": 2.1207, "step": 9813 }, { "epoch": 1.0073891625615763, "grad_norm": 0.05864641070365906, "learning_rate": 0.01, "loss": 2.0869, "step": 9816 }, { "epoch": 1.0076970443349753, "grad_norm": 0.043661102652549744, "learning_rate": 0.01, "loss": 2.1067, "step": 9819 }, { "epoch": 1.0080049261083743, "grad_norm": 0.07878375053405762, "learning_rate": 0.01, "loss": 2.1149, "step": 9822 }, { "epoch": 1.0083128078817734, "grad_norm": 0.04246210679411888, "learning_rate": 0.01, "loss": 2.1241, "step": 9825 }, { "epoch": 1.0086206896551724, "grad_norm": 0.06508597731590271, "learning_rate": 0.01, "loss": 2.1232, "step": 9828 }, { "epoch": 1.0089285714285714, "grad_norm": 0.07472758740186691, "learning_rate": 0.01, "loss": 2.0893, "step": 9831 }, { "epoch": 1.0092364532019704, "grad_norm": 0.13144147396087646, "learning_rate": 0.01, "loss": 2.1194, "step": 9834 }, { "epoch": 1.0095443349753694, "grad_norm": 0.08961367607116699, "learning_rate": 0.01, "loss": 2.1215, "step": 9837 }, { "epoch": 1.0098522167487685, "grad_norm": 0.053439076989889145, "learning_rate": 0.01, "loss": 2.1173, "step": 9840 }, { "epoch": 1.0101600985221675, "grad_norm": 0.03234443441033363, "learning_rate": 0.01, "loss": 2.104, "step": 9843 }, { "epoch": 1.0104679802955665, "grad_norm": 0.07516933977603912, "learning_rate": 0.01, "loss": 2.1186, "step": 9846 }, { "epoch": 1.0107758620689655, "grad_norm": 0.12221794575452805, "learning_rate": 0.01, "loss": 2.0934, "step": 9849 }, { "epoch": 1.0110837438423645, "grad_norm": 0.08198120445013046, "learning_rate": 0.01, "loss": 2.1495, "step": 9852 }, { "epoch": 1.0113916256157636, "grad_norm": 0.058380696922540665, "learning_rate": 0.01, "loss": 2.1234, "step": 9855 }, { "epoch": 1.0116995073891626, "grad_norm": 0.04831172525882721, "learning_rate": 0.01, "loss": 2.0977, "step": 9858 }, { "epoch": 1.0120073891625616, "grad_norm": 0.045920804142951965, "learning_rate": 0.01, "loss": 2.0842, "step": 9861 }, { "epoch": 1.0123152709359606, "grad_norm": 0.12969541549682617, "learning_rate": 0.01, "loss": 2.1005, "step": 9864 }, { "epoch": 1.0126231527093597, "grad_norm": 0.09659627079963684, "learning_rate": 0.01, "loss": 2.1126, "step": 9867 }, { "epoch": 1.0129310344827587, "grad_norm": 0.033160608261823654, "learning_rate": 0.01, "loss": 2.1244, "step": 9870 }, { "epoch": 1.0132389162561577, "grad_norm": 0.03523699939250946, "learning_rate": 0.01, "loss": 2.1009, "step": 9873 }, { "epoch": 1.0135467980295567, "grad_norm": 0.04670235142111778, "learning_rate": 0.01, "loss": 2.1107, "step": 9876 }, { "epoch": 1.0138546798029557, "grad_norm": 0.05278048664331436, "learning_rate": 0.01, "loss": 2.133, "step": 9879 }, { "epoch": 1.0141625615763548, "grad_norm": 0.1409105509519577, "learning_rate": 0.01, "loss": 2.105, "step": 9882 }, { "epoch": 1.0144704433497538, "grad_norm": 0.08208174258470535, "learning_rate": 0.01, "loss": 2.1202, "step": 9885 }, { "epoch": 1.0147783251231528, "grad_norm": 0.052980221807956696, "learning_rate": 0.01, "loss": 2.1108, "step": 9888 }, { "epoch": 1.0150862068965518, "grad_norm": 0.03402642160654068, "learning_rate": 0.01, "loss": 2.1058, "step": 9891 }, { "epoch": 1.0153940886699508, "grad_norm": 0.05165582895278931, "learning_rate": 0.01, "loss": 2.0962, "step": 9894 }, { "epoch": 1.0157019704433496, "grad_norm": 0.0488906130194664, "learning_rate": 0.01, "loss": 2.1157, "step": 9897 }, { "epoch": 1.0160098522167487, "grad_norm": 0.06578544527292252, "learning_rate": 0.01, "loss": 2.0783, "step": 9900 }, { "epoch": 1.0163177339901477, "grad_norm": 0.05930023267865181, "learning_rate": 0.01, "loss": 2.115, "step": 9903 }, { "epoch": 1.0166256157635467, "grad_norm": 0.07461842894554138, "learning_rate": 0.01, "loss": 2.0833, "step": 9906 }, { "epoch": 1.0169334975369457, "grad_norm": 0.04523751139640808, "learning_rate": 0.01, "loss": 2.1062, "step": 9909 }, { "epoch": 1.0172413793103448, "grad_norm": 0.05342249572277069, "learning_rate": 0.01, "loss": 2.1127, "step": 9912 }, { "epoch": 1.0175492610837438, "grad_norm": 0.040748368948698044, "learning_rate": 0.01, "loss": 2.0893, "step": 9915 }, { "epoch": 1.0178571428571428, "grad_norm": 0.03435824438929558, "learning_rate": 0.01, "loss": 2.1151, "step": 9918 }, { "epoch": 1.0181650246305418, "grad_norm": 0.04769265651702881, "learning_rate": 0.01, "loss": 2.0984, "step": 9921 }, { "epoch": 1.0184729064039408, "grad_norm": 0.07814217358827591, "learning_rate": 0.01, "loss": 2.1038, "step": 9924 }, { "epoch": 1.0187807881773399, "grad_norm": 0.12953363358974457, "learning_rate": 0.01, "loss": 2.1051, "step": 9927 }, { "epoch": 1.0190886699507389, "grad_norm": 0.11376773566007614, "learning_rate": 0.01, "loss": 2.0989, "step": 9930 }, { "epoch": 1.019396551724138, "grad_norm": 0.05323106423020363, "learning_rate": 0.01, "loss": 2.1135, "step": 9933 }, { "epoch": 1.019704433497537, "grad_norm": 0.07705114781856537, "learning_rate": 0.01, "loss": 2.1046, "step": 9936 }, { "epoch": 1.020012315270936, "grad_norm": 0.05934451147913933, "learning_rate": 0.01, "loss": 2.1207, "step": 9939 }, { "epoch": 1.020320197044335, "grad_norm": 0.10474961996078491, "learning_rate": 0.01, "loss": 2.1134, "step": 9942 }, { "epoch": 1.020628078817734, "grad_norm": 0.05283385515213013, "learning_rate": 0.01, "loss": 2.1085, "step": 9945 }, { "epoch": 1.020935960591133, "grad_norm": 0.043369196355342865, "learning_rate": 0.01, "loss": 2.1265, "step": 9948 }, { "epoch": 1.021243842364532, "grad_norm": 0.0366055853664875, "learning_rate": 0.01, "loss": 2.1214, "step": 9951 }, { "epoch": 1.021551724137931, "grad_norm": 0.06200672313570976, "learning_rate": 0.01, "loss": 2.0943, "step": 9954 }, { "epoch": 1.02185960591133, "grad_norm": 0.06652572005987167, "learning_rate": 0.01, "loss": 2.1139, "step": 9957 }, { "epoch": 1.022167487684729, "grad_norm": 0.04040740057826042, "learning_rate": 0.01, "loss": 2.0894, "step": 9960 }, { "epoch": 1.0224753694581281, "grad_norm": 0.049162358045578, "learning_rate": 0.01, "loss": 2.0955, "step": 9963 }, { "epoch": 1.0227832512315271, "grad_norm": 0.05465700104832649, "learning_rate": 0.01, "loss": 2.1109, "step": 9966 }, { "epoch": 1.0230911330049262, "grad_norm": 0.0575067512691021, "learning_rate": 0.01, "loss": 2.0956, "step": 9969 }, { "epoch": 1.0233990147783252, "grad_norm": 0.14622198045253754, "learning_rate": 0.01, "loss": 2.1031, "step": 9972 }, { "epoch": 1.0237068965517242, "grad_norm": 0.04765618219971657, "learning_rate": 0.01, "loss": 2.0834, "step": 9975 }, { "epoch": 1.0240147783251232, "grad_norm": 0.04039911553263664, "learning_rate": 0.01, "loss": 2.0933, "step": 9978 }, { "epoch": 1.0243226600985222, "grad_norm": 0.06009029969573021, "learning_rate": 0.01, "loss": 2.115, "step": 9981 }, { "epoch": 1.0246305418719213, "grad_norm": 0.06187298893928528, "learning_rate": 0.01, "loss": 2.079, "step": 9984 }, { "epoch": 1.0249384236453203, "grad_norm": 0.05368026718497276, "learning_rate": 0.01, "loss": 2.0875, "step": 9987 }, { "epoch": 1.0252463054187193, "grad_norm": 0.051921263337135315, "learning_rate": 0.01, "loss": 2.1243, "step": 9990 }, { "epoch": 1.0255541871921183, "grad_norm": 0.09820009768009186, "learning_rate": 0.01, "loss": 2.0983, "step": 9993 }, { "epoch": 1.0258620689655173, "grad_norm": 0.10601375997066498, "learning_rate": 0.01, "loss": 2.1288, "step": 9996 }, { "epoch": 1.0261699507389161, "grad_norm": 0.05488260090351105, "learning_rate": 0.01, "loss": 2.1033, "step": 9999 }, { "epoch": 1.0264778325123152, "grad_norm": 0.07482553273439407, "learning_rate": 0.01, "loss": 2.1181, "step": 10002 }, { "epoch": 1.0267857142857142, "grad_norm": 0.044733475893735886, "learning_rate": 0.01, "loss": 2.1237, "step": 10005 }, { "epoch": 1.0270935960591132, "grad_norm": 0.04775967076420784, "learning_rate": 0.01, "loss": 2.1288, "step": 10008 }, { "epoch": 1.0274014778325122, "grad_norm": 0.05972621962428093, "learning_rate": 0.01, "loss": 2.0878, "step": 10011 }, { "epoch": 1.0277093596059113, "grad_norm": 0.12219330668449402, "learning_rate": 0.01, "loss": 2.1034, "step": 10014 }, { "epoch": 1.0280172413793103, "grad_norm": 0.05171920731663704, "learning_rate": 0.01, "loss": 2.0925, "step": 10017 }, { "epoch": 1.0283251231527093, "grad_norm": 0.04166760668158531, "learning_rate": 0.01, "loss": 2.0928, "step": 10020 }, { "epoch": 1.0286330049261083, "grad_norm": 0.05231022089719772, "learning_rate": 0.01, "loss": 2.0945, "step": 10023 }, { "epoch": 1.0289408866995073, "grad_norm": 0.1091604232788086, "learning_rate": 0.01, "loss": 2.0878, "step": 10026 }, { "epoch": 1.0292487684729064, "grad_norm": 0.07104350626468658, "learning_rate": 0.01, "loss": 2.1125, "step": 10029 }, { "epoch": 1.0295566502463054, "grad_norm": 0.0466371588408947, "learning_rate": 0.01, "loss": 2.0973, "step": 10032 }, { "epoch": 1.0298645320197044, "grad_norm": 0.05548730120062828, "learning_rate": 0.01, "loss": 2.0846, "step": 10035 }, { "epoch": 1.0301724137931034, "grad_norm": 0.06483764201402664, "learning_rate": 0.01, "loss": 2.105, "step": 10038 }, { "epoch": 1.0304802955665024, "grad_norm": 0.05243910476565361, "learning_rate": 0.01, "loss": 2.1011, "step": 10041 }, { "epoch": 1.0307881773399015, "grad_norm": 0.09996815025806427, "learning_rate": 0.01, "loss": 2.1389, "step": 10044 }, { "epoch": 1.0310960591133005, "grad_norm": 0.04864559695124626, "learning_rate": 0.01, "loss": 2.1069, "step": 10047 }, { "epoch": 1.0314039408866995, "grad_norm": 0.14447607100009918, "learning_rate": 0.01, "loss": 2.1075, "step": 10050 }, { "epoch": 1.0317118226600985, "grad_norm": 0.050261352211236954, "learning_rate": 0.01, "loss": 2.1147, "step": 10053 }, { "epoch": 1.0320197044334976, "grad_norm": 0.07719244807958603, "learning_rate": 0.01, "loss": 2.1061, "step": 10056 }, { "epoch": 1.0323275862068966, "grad_norm": 0.10620381683111191, "learning_rate": 0.01, "loss": 2.1129, "step": 10059 }, { "epoch": 1.0326354679802956, "grad_norm": 0.05358508229255676, "learning_rate": 0.01, "loss": 2.1156, "step": 10062 }, { "epoch": 1.0329433497536946, "grad_norm": 0.04341145232319832, "learning_rate": 0.01, "loss": 2.1046, "step": 10065 }, { "epoch": 1.0332512315270936, "grad_norm": 0.04785105213522911, "learning_rate": 0.01, "loss": 2.0804, "step": 10068 }, { "epoch": 1.0335591133004927, "grad_norm": 0.04886849224567413, "learning_rate": 0.01, "loss": 2.0691, "step": 10071 }, { "epoch": 1.0338669950738917, "grad_norm": 0.03917735815048218, "learning_rate": 0.01, "loss": 2.0808, "step": 10074 }, { "epoch": 1.0341748768472907, "grad_norm": 0.10696244239807129, "learning_rate": 0.01, "loss": 2.085, "step": 10077 }, { "epoch": 1.0344827586206897, "grad_norm": 0.14525163173675537, "learning_rate": 0.01, "loss": 2.1246, "step": 10080 }, { "epoch": 1.0347906403940887, "grad_norm": 0.06464140862226486, "learning_rate": 0.01, "loss": 2.1088, "step": 10083 }, { "epoch": 1.0350985221674878, "grad_norm": 0.055628299713134766, "learning_rate": 0.01, "loss": 2.1013, "step": 10086 }, { "epoch": 1.0354064039408868, "grad_norm": 0.0457589291036129, "learning_rate": 0.01, "loss": 2.118, "step": 10089 }, { "epoch": 1.0357142857142858, "grad_norm": 0.07108809798955917, "learning_rate": 0.01, "loss": 2.0882, "step": 10092 }, { "epoch": 1.0360221674876848, "grad_norm": 0.07304032146930695, "learning_rate": 0.01, "loss": 2.1632, "step": 10095 }, { "epoch": 1.0363300492610836, "grad_norm": 0.04778844490647316, "learning_rate": 0.01, "loss": 2.1076, "step": 10098 }, { "epoch": 1.0366379310344827, "grad_norm": 0.0444946251809597, "learning_rate": 0.01, "loss": 2.1092, "step": 10101 }, { "epoch": 1.0369458128078817, "grad_norm": 0.03863450884819031, "learning_rate": 0.01, "loss": 2.0973, "step": 10104 }, { "epoch": 1.0372536945812807, "grad_norm": 0.11049003899097443, "learning_rate": 0.01, "loss": 2.1069, "step": 10107 }, { "epoch": 1.0375615763546797, "grad_norm": 0.055413637310266495, "learning_rate": 0.01, "loss": 2.0935, "step": 10110 }, { "epoch": 1.0378694581280787, "grad_norm": 0.1212301105260849, "learning_rate": 0.01, "loss": 2.1033, "step": 10113 }, { "epoch": 1.0381773399014778, "grad_norm": 0.06444283574819565, "learning_rate": 0.01, "loss": 2.0821, "step": 10116 }, { "epoch": 1.0384852216748768, "grad_norm": 0.048522353172302246, "learning_rate": 0.01, "loss": 2.1129, "step": 10119 }, { "epoch": 1.0387931034482758, "grad_norm": 0.03755674138665199, "learning_rate": 0.01, "loss": 2.0773, "step": 10122 }, { "epoch": 1.0391009852216748, "grad_norm": 0.03873259574174881, "learning_rate": 0.01, "loss": 2.0877, "step": 10125 }, { "epoch": 1.0394088669950738, "grad_norm": 0.062387898564338684, "learning_rate": 0.01, "loss": 2.1119, "step": 10128 }, { "epoch": 1.0397167487684729, "grad_norm": 0.037559203803539276, "learning_rate": 0.01, "loss": 2.1165, "step": 10131 }, { "epoch": 1.0400246305418719, "grad_norm": 0.0703917145729065, "learning_rate": 0.01, "loss": 2.0877, "step": 10134 }, { "epoch": 1.040332512315271, "grad_norm": 0.05063795670866966, "learning_rate": 0.01, "loss": 2.1282, "step": 10137 }, { "epoch": 1.04064039408867, "grad_norm": 0.08476493507623672, "learning_rate": 0.01, "loss": 2.1217, "step": 10140 }, { "epoch": 1.040948275862069, "grad_norm": 0.09482383728027344, "learning_rate": 0.01, "loss": 2.1002, "step": 10143 }, { "epoch": 1.041256157635468, "grad_norm": 0.1094396710395813, "learning_rate": 0.01, "loss": 2.1138, "step": 10146 }, { "epoch": 1.041564039408867, "grad_norm": 0.17252720892429352, "learning_rate": 0.01, "loss": 2.1079, "step": 10149 }, { "epoch": 1.041871921182266, "grad_norm": 0.11076754331588745, "learning_rate": 0.01, "loss": 2.1198, "step": 10152 }, { "epoch": 1.042179802955665, "grad_norm": 0.06879215687513351, "learning_rate": 0.01, "loss": 2.0878, "step": 10155 }, { "epoch": 1.042487684729064, "grad_norm": 0.07402212172746658, "learning_rate": 0.01, "loss": 2.0869, "step": 10158 }, { "epoch": 1.042795566502463, "grad_norm": 0.04562051594257355, "learning_rate": 0.01, "loss": 2.1139, "step": 10161 }, { "epoch": 1.043103448275862, "grad_norm": 0.04578396677970886, "learning_rate": 0.01, "loss": 2.0974, "step": 10164 }, { "epoch": 1.0434113300492611, "grad_norm": 0.051678020507097244, "learning_rate": 0.01, "loss": 2.0995, "step": 10167 }, { "epoch": 1.0437192118226601, "grad_norm": 0.03445015102624893, "learning_rate": 0.01, "loss": 2.106, "step": 10170 }, { "epoch": 1.0440270935960592, "grad_norm": 0.03868851810693741, "learning_rate": 0.01, "loss": 2.0732, "step": 10173 }, { "epoch": 1.0443349753694582, "grad_norm": 0.058904558420181274, "learning_rate": 0.01, "loss": 2.085, "step": 10176 }, { "epoch": 1.0446428571428572, "grad_norm": 0.10729484260082245, "learning_rate": 0.01, "loss": 2.0909, "step": 10179 }, { "epoch": 1.0449507389162562, "grad_norm": 0.10037554055452347, "learning_rate": 0.01, "loss": 2.0945, "step": 10182 }, { "epoch": 1.0452586206896552, "grad_norm": 0.07336730509996414, "learning_rate": 0.01, "loss": 2.0885, "step": 10185 }, { "epoch": 1.0455665024630543, "grad_norm": 0.11717227101325989, "learning_rate": 0.01, "loss": 2.1019, "step": 10188 }, { "epoch": 1.0458743842364533, "grad_norm": 0.06263696402311325, "learning_rate": 0.01, "loss": 2.1113, "step": 10191 }, { "epoch": 1.0461822660098523, "grad_norm": 0.07939436286687851, "learning_rate": 0.01, "loss": 2.0803, "step": 10194 }, { "epoch": 1.0464901477832513, "grad_norm": 0.05761004984378815, "learning_rate": 0.01, "loss": 2.1155, "step": 10197 }, { "epoch": 1.0467980295566504, "grad_norm": 0.04293765127658844, "learning_rate": 0.01, "loss": 2.0944, "step": 10200 }, { "epoch": 1.0471059113300492, "grad_norm": 0.04638001322746277, "learning_rate": 0.01, "loss": 2.113, "step": 10203 }, { "epoch": 1.0474137931034482, "grad_norm": 0.047882046550512314, "learning_rate": 0.01, "loss": 2.0733, "step": 10206 }, { "epoch": 1.0477216748768472, "grad_norm": 0.07461071759462357, "learning_rate": 0.01, "loss": 2.107, "step": 10209 }, { "epoch": 1.0480295566502462, "grad_norm": 0.10987289249897003, "learning_rate": 0.01, "loss": 2.105, "step": 10212 }, { "epoch": 1.0483374384236452, "grad_norm": 0.04183235019445419, "learning_rate": 0.01, "loss": 2.0953, "step": 10215 }, { "epoch": 1.0486453201970443, "grad_norm": 0.049700990319252014, "learning_rate": 0.01, "loss": 2.1067, "step": 10218 }, { "epoch": 1.0489532019704433, "grad_norm": 0.08448828011751175, "learning_rate": 0.01, "loss": 2.1113, "step": 10221 }, { "epoch": 1.0492610837438423, "grad_norm": 0.05486508831381798, "learning_rate": 0.01, "loss": 2.1156, "step": 10224 }, { "epoch": 1.0495689655172413, "grad_norm": 0.057925377041101456, "learning_rate": 0.01, "loss": 2.127, "step": 10227 }, { "epoch": 1.0498768472906403, "grad_norm": 0.05322302505373955, "learning_rate": 0.01, "loss": 2.0861, "step": 10230 }, { "epoch": 1.0501847290640394, "grad_norm": 0.046823181211948395, "learning_rate": 0.01, "loss": 2.089, "step": 10233 }, { "epoch": 1.0504926108374384, "grad_norm": 0.05037027224898338, "learning_rate": 0.01, "loss": 2.0841, "step": 10236 }, { "epoch": 1.0508004926108374, "grad_norm": 0.05172303318977356, "learning_rate": 0.01, "loss": 2.105, "step": 10239 }, { "epoch": 1.0511083743842364, "grad_norm": 0.07993052154779434, "learning_rate": 0.01, "loss": 2.1097, "step": 10242 }, { "epoch": 1.0514162561576355, "grad_norm": 0.039322953671216965, "learning_rate": 0.01, "loss": 2.0951, "step": 10245 }, { "epoch": 1.0517241379310345, "grad_norm": 0.05829343572258949, "learning_rate": 0.01, "loss": 2.1257, "step": 10248 }, { "epoch": 1.0520320197044335, "grad_norm": 0.12303601950407028, "learning_rate": 0.01, "loss": 2.1143, "step": 10251 }, { "epoch": 1.0523399014778325, "grad_norm": 0.07176418602466583, "learning_rate": 0.01, "loss": 2.1297, "step": 10254 }, { "epoch": 1.0526477832512315, "grad_norm": 0.05229344964027405, "learning_rate": 0.01, "loss": 2.0934, "step": 10257 }, { "epoch": 1.0529556650246306, "grad_norm": 0.041665658354759216, "learning_rate": 0.01, "loss": 2.116, "step": 10260 }, { "epoch": 1.0532635467980296, "grad_norm": 0.04542261362075806, "learning_rate": 0.01, "loss": 2.1277, "step": 10263 }, { "epoch": 1.0535714285714286, "grad_norm": 0.0501495897769928, "learning_rate": 0.01, "loss": 2.0911, "step": 10266 }, { "epoch": 1.0538793103448276, "grad_norm": 0.06474924832582474, "learning_rate": 0.01, "loss": 2.1254, "step": 10269 }, { "epoch": 1.0541871921182266, "grad_norm": 0.0736108273267746, "learning_rate": 0.01, "loss": 2.0685, "step": 10272 }, { "epoch": 1.0544950738916257, "grad_norm": 0.07487022131681442, "learning_rate": 0.01, "loss": 2.113, "step": 10275 }, { "epoch": 1.0548029556650247, "grad_norm": 0.04876410961151123, "learning_rate": 0.01, "loss": 2.1051, "step": 10278 }, { "epoch": 1.0551108374384237, "grad_norm": 0.056595779955387115, "learning_rate": 0.01, "loss": 2.0864, "step": 10281 }, { "epoch": 1.0554187192118227, "grad_norm": 0.06958241015672684, "learning_rate": 0.01, "loss": 2.1, "step": 10284 }, { "epoch": 1.0557266009852218, "grad_norm": 0.08811846375465393, "learning_rate": 0.01, "loss": 2.1021, "step": 10287 }, { "epoch": 1.0560344827586208, "grad_norm": 0.061557747423648834, "learning_rate": 0.01, "loss": 2.1063, "step": 10290 }, { "epoch": 1.0563423645320198, "grad_norm": 0.07043389976024628, "learning_rate": 0.01, "loss": 2.106, "step": 10293 }, { "epoch": 1.0566502463054188, "grad_norm": 0.0916379988193512, "learning_rate": 0.01, "loss": 2.0851, "step": 10296 }, { "epoch": 1.0569581280788178, "grad_norm": 0.050577979534864426, "learning_rate": 0.01, "loss": 2.0966, "step": 10299 }, { "epoch": 1.0572660098522166, "grad_norm": 0.06576110422611237, "learning_rate": 0.01, "loss": 2.1038, "step": 10302 }, { "epoch": 1.0575738916256157, "grad_norm": 0.09315023571252823, "learning_rate": 0.01, "loss": 2.1341, "step": 10305 }, { "epoch": 1.0578817733990147, "grad_norm": 0.0649820864200592, "learning_rate": 0.01, "loss": 2.1064, "step": 10308 }, { "epoch": 1.0581896551724137, "grad_norm": 0.07930494844913483, "learning_rate": 0.01, "loss": 2.107, "step": 10311 }, { "epoch": 1.0584975369458127, "grad_norm": 0.09142257273197174, "learning_rate": 0.01, "loss": 2.1162, "step": 10314 }, { "epoch": 1.0588054187192117, "grad_norm": 0.05011974647641182, "learning_rate": 0.01, "loss": 2.0686, "step": 10317 }, { "epoch": 1.0591133004926108, "grad_norm": 0.1002635508775711, "learning_rate": 0.01, "loss": 2.138, "step": 10320 }, { "epoch": 1.0594211822660098, "grad_norm": 0.07570278644561768, "learning_rate": 0.01, "loss": 2.0693, "step": 10323 }, { "epoch": 1.0597290640394088, "grad_norm": 0.05086719989776611, "learning_rate": 0.01, "loss": 2.0991, "step": 10326 }, { "epoch": 1.0600369458128078, "grad_norm": 0.03596855327486992, "learning_rate": 0.01, "loss": 2.1038, "step": 10329 }, { "epoch": 1.0603448275862069, "grad_norm": 0.05059434473514557, "learning_rate": 0.01, "loss": 2.1, "step": 10332 }, { "epoch": 1.0606527093596059, "grad_norm": 0.058818116784095764, "learning_rate": 0.01, "loss": 2.0855, "step": 10335 }, { "epoch": 1.060960591133005, "grad_norm": 0.14139403402805328, "learning_rate": 0.01, "loss": 2.0755, "step": 10338 }, { "epoch": 1.061268472906404, "grad_norm": 0.12123113870620728, "learning_rate": 0.01, "loss": 2.0896, "step": 10341 }, { "epoch": 1.061576354679803, "grad_norm": 0.04767270013689995, "learning_rate": 0.01, "loss": 2.11, "step": 10344 }, { "epoch": 1.061884236453202, "grad_norm": 0.03506815433502197, "learning_rate": 0.01, "loss": 2.0953, "step": 10347 }, { "epoch": 1.062192118226601, "grad_norm": 0.08807789534330368, "learning_rate": 0.01, "loss": 2.0903, "step": 10350 }, { "epoch": 1.0625, "grad_norm": 0.1130862608551979, "learning_rate": 0.01, "loss": 2.0888, "step": 10353 }, { "epoch": 1.062807881773399, "grad_norm": 0.05720696598291397, "learning_rate": 0.01, "loss": 2.0904, "step": 10356 }, { "epoch": 1.063115763546798, "grad_norm": 0.057933416217565536, "learning_rate": 0.01, "loss": 2.1138, "step": 10359 }, { "epoch": 1.063423645320197, "grad_norm": 0.056713253259658813, "learning_rate": 0.01, "loss": 2.0965, "step": 10362 }, { "epoch": 1.063731527093596, "grad_norm": 0.05062280595302582, "learning_rate": 0.01, "loss": 2.1058, "step": 10365 }, { "epoch": 1.064039408866995, "grad_norm": 0.03439073637127876, "learning_rate": 0.01, "loss": 2.0945, "step": 10368 }, { "epoch": 1.0643472906403941, "grad_norm": 0.10244173556566238, "learning_rate": 0.01, "loss": 2.0916, "step": 10371 }, { "epoch": 1.0646551724137931, "grad_norm": 0.04706069454550743, "learning_rate": 0.01, "loss": 2.103, "step": 10374 }, { "epoch": 1.0649630541871922, "grad_norm": 0.11580058932304382, "learning_rate": 0.01, "loss": 2.0995, "step": 10377 }, { "epoch": 1.0652709359605912, "grad_norm": 0.044736508280038834, "learning_rate": 0.01, "loss": 2.0906, "step": 10380 }, { "epoch": 1.0655788177339902, "grad_norm": 0.08990567922592163, "learning_rate": 0.01, "loss": 2.1197, "step": 10383 }, { "epoch": 1.0658866995073892, "grad_norm": 0.06923419237136841, "learning_rate": 0.01, "loss": 2.0997, "step": 10386 }, { "epoch": 1.0661945812807883, "grad_norm": 0.059495240449905396, "learning_rate": 0.01, "loss": 2.1106, "step": 10389 }, { "epoch": 1.0665024630541873, "grad_norm": 0.07906550914049149, "learning_rate": 0.01, "loss": 2.1196, "step": 10392 }, { "epoch": 1.0668103448275863, "grad_norm": 0.08792297542095184, "learning_rate": 0.01, "loss": 2.0985, "step": 10395 }, { "epoch": 1.0671182266009853, "grad_norm": 0.06077072396874428, "learning_rate": 0.01, "loss": 2.088, "step": 10398 }, { "epoch": 1.0674261083743843, "grad_norm": 0.03865751996636391, "learning_rate": 0.01, "loss": 2.0894, "step": 10401 }, { "epoch": 1.0677339901477834, "grad_norm": 0.03158612549304962, "learning_rate": 0.01, "loss": 2.0861, "step": 10404 }, { "epoch": 1.0680418719211822, "grad_norm": 0.03455328568816185, "learning_rate": 0.01, "loss": 2.0819, "step": 10407 }, { "epoch": 1.0683497536945812, "grad_norm": 0.062100328505039215, "learning_rate": 0.01, "loss": 2.0967, "step": 10410 }, { "epoch": 1.0686576354679802, "grad_norm": 0.10934283584356308, "learning_rate": 0.01, "loss": 2.1135, "step": 10413 }, { "epoch": 1.0689655172413792, "grad_norm": 0.07184179127216339, "learning_rate": 0.01, "loss": 2.0949, "step": 10416 }, { "epoch": 1.0692733990147782, "grad_norm": 0.06610151380300522, "learning_rate": 0.01, "loss": 2.1001, "step": 10419 }, { "epoch": 1.0695812807881773, "grad_norm": 0.06064629554748535, "learning_rate": 0.01, "loss": 2.0835, "step": 10422 }, { "epoch": 1.0698891625615763, "grad_norm": 0.0531432181596756, "learning_rate": 0.01, "loss": 2.1105, "step": 10425 }, { "epoch": 1.0701970443349753, "grad_norm": 0.056448470801115036, "learning_rate": 0.01, "loss": 2.0724, "step": 10428 }, { "epoch": 1.0705049261083743, "grad_norm": 0.03736816346645355, "learning_rate": 0.01, "loss": 2.1243, "step": 10431 }, { "epoch": 1.0708128078817734, "grad_norm": 0.12693117558956146, "learning_rate": 0.01, "loss": 2.1156, "step": 10434 }, { "epoch": 1.0711206896551724, "grad_norm": 0.0428193174302578, "learning_rate": 0.01, "loss": 2.1025, "step": 10437 }, { "epoch": 1.0714285714285714, "grad_norm": 0.0464596189558506, "learning_rate": 0.01, "loss": 2.1067, "step": 10440 }, { "epoch": 1.0717364532019704, "grad_norm": 0.07535267621278763, "learning_rate": 0.01, "loss": 2.0785, "step": 10443 }, { "epoch": 1.0720443349753694, "grad_norm": 0.0537327378988266, "learning_rate": 0.01, "loss": 2.0775, "step": 10446 }, { "epoch": 1.0723522167487685, "grad_norm": 0.03783145919442177, "learning_rate": 0.01, "loss": 2.0921, "step": 10449 }, { "epoch": 1.0726600985221675, "grad_norm": 0.052689142525196075, "learning_rate": 0.01, "loss": 2.116, "step": 10452 }, { "epoch": 1.0729679802955665, "grad_norm": 0.1437288373708725, "learning_rate": 0.01, "loss": 2.071, "step": 10455 }, { "epoch": 1.0732758620689655, "grad_norm": 0.07633062452077866, "learning_rate": 0.01, "loss": 2.088, "step": 10458 }, { "epoch": 1.0735837438423645, "grad_norm": 0.061189718544483185, "learning_rate": 0.01, "loss": 2.0796, "step": 10461 }, { "epoch": 1.0738916256157636, "grad_norm": 0.06256800144910812, "learning_rate": 0.01, "loss": 2.1056, "step": 10464 }, { "epoch": 1.0741995073891626, "grad_norm": 0.0745188519358635, "learning_rate": 0.01, "loss": 2.0782, "step": 10467 }, { "epoch": 1.0745073891625616, "grad_norm": 0.0663486197590828, "learning_rate": 0.01, "loss": 2.0704, "step": 10470 }, { "epoch": 1.0748152709359606, "grad_norm": 0.05472427234053612, "learning_rate": 0.01, "loss": 2.082, "step": 10473 }, { "epoch": 1.0751231527093597, "grad_norm": 0.10171230137348175, "learning_rate": 0.01, "loss": 2.1135, "step": 10476 }, { "epoch": 1.0754310344827587, "grad_norm": 0.05689026787877083, "learning_rate": 0.01, "loss": 2.0748, "step": 10479 }, { "epoch": 1.0757389162561577, "grad_norm": 0.0593440905213356, "learning_rate": 0.01, "loss": 2.0922, "step": 10482 }, { "epoch": 1.0760467980295567, "grad_norm": 0.07408995181322098, "learning_rate": 0.01, "loss": 2.0781, "step": 10485 }, { "epoch": 1.0763546798029557, "grad_norm": 0.05688070133328438, "learning_rate": 0.01, "loss": 2.1085, "step": 10488 }, { "epoch": 1.0766625615763548, "grad_norm": 0.05378828942775726, "learning_rate": 0.01, "loss": 2.1084, "step": 10491 }, { "epoch": 1.0769704433497538, "grad_norm": 0.057735592126846313, "learning_rate": 0.01, "loss": 2.1023, "step": 10494 }, { "epoch": 1.0772783251231528, "grad_norm": 0.0586666576564312, "learning_rate": 0.01, "loss": 2.1003, "step": 10497 }, { "epoch": 1.0775862068965518, "grad_norm": 0.12087473273277283, "learning_rate": 0.01, "loss": 2.0974, "step": 10500 }, { "epoch": 1.0778940886699506, "grad_norm": 0.07307861000299454, "learning_rate": 0.01, "loss": 2.0913, "step": 10503 }, { "epoch": 1.0782019704433496, "grad_norm": 0.06621012091636658, "learning_rate": 0.01, "loss": 2.1173, "step": 10506 }, { "epoch": 1.0785098522167487, "grad_norm": 0.0647876039147377, "learning_rate": 0.01, "loss": 2.1006, "step": 10509 }, { "epoch": 1.0788177339901477, "grad_norm": 0.06163914501667023, "learning_rate": 0.01, "loss": 2.0892, "step": 10512 }, { "epoch": 1.0791256157635467, "grad_norm": 0.04312353581190109, "learning_rate": 0.01, "loss": 2.0901, "step": 10515 }, { "epoch": 1.0794334975369457, "grad_norm": 0.0760812908411026, "learning_rate": 0.01, "loss": 2.0995, "step": 10518 }, { "epoch": 1.0797413793103448, "grad_norm": 0.0802140161395073, "learning_rate": 0.01, "loss": 2.0905, "step": 10521 }, { "epoch": 1.0800492610837438, "grad_norm": 0.09008529782295227, "learning_rate": 0.01, "loss": 2.08, "step": 10524 }, { "epoch": 1.0803571428571428, "grad_norm": 0.07469696551561356, "learning_rate": 0.01, "loss": 2.0725, "step": 10527 }, { "epoch": 1.0806650246305418, "grad_norm": 0.08821582794189453, "learning_rate": 0.01, "loss": 2.1086, "step": 10530 }, { "epoch": 1.0809729064039408, "grad_norm": 0.04690997302532196, "learning_rate": 0.01, "loss": 2.1095, "step": 10533 }, { "epoch": 1.0812807881773399, "grad_norm": 0.04316158965229988, "learning_rate": 0.01, "loss": 2.0818, "step": 10536 }, { "epoch": 1.0815886699507389, "grad_norm": 0.06996279209852219, "learning_rate": 0.01, "loss": 2.0993, "step": 10539 }, { "epoch": 1.081896551724138, "grad_norm": 0.10073279589414597, "learning_rate": 0.01, "loss": 2.112, "step": 10542 }, { "epoch": 1.082204433497537, "grad_norm": 0.0448322668671608, "learning_rate": 0.01, "loss": 2.0834, "step": 10545 }, { "epoch": 1.082512315270936, "grad_norm": 0.11411638557910919, "learning_rate": 0.01, "loss": 2.1082, "step": 10548 }, { "epoch": 1.082820197044335, "grad_norm": 0.10779088735580444, "learning_rate": 0.01, "loss": 2.0702, "step": 10551 }, { "epoch": 1.083128078817734, "grad_norm": 0.041448626667261124, "learning_rate": 0.01, "loss": 2.1041, "step": 10554 }, { "epoch": 1.083435960591133, "grad_norm": 0.07522560656070709, "learning_rate": 0.01, "loss": 2.0794, "step": 10557 }, { "epoch": 1.083743842364532, "grad_norm": 0.048221901059150696, "learning_rate": 0.01, "loss": 2.0936, "step": 10560 }, { "epoch": 1.084051724137931, "grad_norm": 0.05512038618326187, "learning_rate": 0.01, "loss": 2.0898, "step": 10563 }, { "epoch": 1.08435960591133, "grad_norm": 0.07599300891160965, "learning_rate": 0.01, "loss": 2.1246, "step": 10566 }, { "epoch": 1.084667487684729, "grad_norm": 0.06631644070148468, "learning_rate": 0.01, "loss": 2.0861, "step": 10569 }, { "epoch": 1.0849753694581281, "grad_norm": 0.04972488060593605, "learning_rate": 0.01, "loss": 2.11, "step": 10572 }, { "epoch": 1.0852832512315271, "grad_norm": 0.08250217139720917, "learning_rate": 0.01, "loss": 2.1142, "step": 10575 }, { "epoch": 1.0855911330049262, "grad_norm": 0.09104974567890167, "learning_rate": 0.01, "loss": 2.0822, "step": 10578 }, { "epoch": 1.0858990147783252, "grad_norm": 0.057310063391923904, "learning_rate": 0.01, "loss": 2.0819, "step": 10581 }, { "epoch": 1.0862068965517242, "grad_norm": 0.08102291077375412, "learning_rate": 0.01, "loss": 2.0931, "step": 10584 }, { "epoch": 1.0865147783251232, "grad_norm": 0.045641325414180756, "learning_rate": 0.01, "loss": 2.1096, "step": 10587 }, { "epoch": 1.0868226600985222, "grad_norm": 0.05350523442029953, "learning_rate": 0.01, "loss": 2.1151, "step": 10590 }, { "epoch": 1.0871305418719213, "grad_norm": 0.045734379440546036, "learning_rate": 0.01, "loss": 2.1043, "step": 10593 }, { "epoch": 1.0874384236453203, "grad_norm": 0.044645924121141434, "learning_rate": 0.01, "loss": 2.0882, "step": 10596 }, { "epoch": 1.0877463054187193, "grad_norm": 0.046704743057489395, "learning_rate": 0.01, "loss": 2.0823, "step": 10599 }, { "epoch": 1.0880541871921183, "grad_norm": 0.09600807726383209, "learning_rate": 0.01, "loss": 2.09, "step": 10602 }, { "epoch": 1.0883620689655173, "grad_norm": 0.062323443591594696, "learning_rate": 0.01, "loss": 2.091, "step": 10605 }, { "epoch": 1.0886699507389164, "grad_norm": 0.08459887653589249, "learning_rate": 0.01, "loss": 2.094, "step": 10608 }, { "epoch": 1.0889778325123152, "grad_norm": 0.0621943362057209, "learning_rate": 0.01, "loss": 2.0735, "step": 10611 }, { "epoch": 1.0892857142857142, "grad_norm": 0.10963741689920425, "learning_rate": 0.01, "loss": 2.0769, "step": 10614 }, { "epoch": 1.0895935960591132, "grad_norm": 0.07325689494609833, "learning_rate": 0.01, "loss": 2.0905, "step": 10617 }, { "epoch": 1.0899014778325122, "grad_norm": 0.08307964354753494, "learning_rate": 0.01, "loss": 2.0977, "step": 10620 }, { "epoch": 1.0902093596059113, "grad_norm": 0.18072094023227692, "learning_rate": 0.01, "loss": 2.1096, "step": 10623 }, { "epoch": 1.0905172413793103, "grad_norm": 0.10427471250295639, "learning_rate": 0.01, "loss": 2.0761, "step": 10626 }, { "epoch": 1.0908251231527093, "grad_norm": 0.0732191875576973, "learning_rate": 0.01, "loss": 2.1031, "step": 10629 }, { "epoch": 1.0911330049261083, "grad_norm": 0.03703717514872551, "learning_rate": 0.01, "loss": 2.0734, "step": 10632 }, { "epoch": 1.0914408866995073, "grad_norm": 0.04907006770372391, "learning_rate": 0.01, "loss": 2.096, "step": 10635 }, { "epoch": 1.0917487684729064, "grad_norm": 0.04126304015517235, "learning_rate": 0.01, "loss": 2.0824, "step": 10638 }, { "epoch": 1.0920566502463054, "grad_norm": 0.04017401486635208, "learning_rate": 0.01, "loss": 2.0694, "step": 10641 }, { "epoch": 1.0923645320197044, "grad_norm": 0.036132264882326126, "learning_rate": 0.01, "loss": 2.0792, "step": 10644 }, { "epoch": 1.0926724137931034, "grad_norm": 0.06275150179862976, "learning_rate": 0.01, "loss": 2.1172, "step": 10647 }, { "epoch": 1.0929802955665024, "grad_norm": 0.08319203555583954, "learning_rate": 0.01, "loss": 2.0868, "step": 10650 }, { "epoch": 1.0932881773399015, "grad_norm": 0.08663000166416168, "learning_rate": 0.01, "loss": 2.0834, "step": 10653 }, { "epoch": 1.0935960591133005, "grad_norm": 0.10765951871871948, "learning_rate": 0.01, "loss": 2.0891, "step": 10656 }, { "epoch": 1.0939039408866995, "grad_norm": 0.035412587225437164, "learning_rate": 0.01, "loss": 2.0912, "step": 10659 }, { "epoch": 1.0942118226600985, "grad_norm": 0.051735054701566696, "learning_rate": 0.01, "loss": 2.0986, "step": 10662 }, { "epoch": 1.0945197044334976, "grad_norm": 0.04320614039897919, "learning_rate": 0.01, "loss": 2.0912, "step": 10665 }, { "epoch": 1.0948275862068966, "grad_norm": 0.03285462409257889, "learning_rate": 0.01, "loss": 2.0957, "step": 10668 }, { "epoch": 1.0951354679802956, "grad_norm": 0.05172726511955261, "learning_rate": 0.01, "loss": 2.0706, "step": 10671 }, { "epoch": 1.0954433497536946, "grad_norm": 0.04941645637154579, "learning_rate": 0.01, "loss": 2.1018, "step": 10674 }, { "epoch": 1.0957512315270936, "grad_norm": 0.04746576398611069, "learning_rate": 0.01, "loss": 2.1002, "step": 10677 }, { "epoch": 1.0960591133004927, "grad_norm": 0.10900839418172836, "learning_rate": 0.01, "loss": 2.1188, "step": 10680 }, { "epoch": 1.0963669950738917, "grad_norm": 0.06924229860305786, "learning_rate": 0.01, "loss": 2.097, "step": 10683 }, { "epoch": 1.0966748768472907, "grad_norm": 0.11047599464654922, "learning_rate": 0.01, "loss": 2.0607, "step": 10686 }, { "epoch": 1.0969827586206897, "grad_norm": 0.10662158578634262, "learning_rate": 0.01, "loss": 2.078, "step": 10689 }, { "epoch": 1.0972906403940887, "grad_norm": 0.07408568263053894, "learning_rate": 0.01, "loss": 2.0918, "step": 10692 }, { "epoch": 1.0975985221674878, "grad_norm": 0.0471009686589241, "learning_rate": 0.01, "loss": 2.1248, "step": 10695 }, { "epoch": 1.0979064039408868, "grad_norm": 0.049591194838285446, "learning_rate": 0.01, "loss": 2.082, "step": 10698 }, { "epoch": 1.0982142857142858, "grad_norm": 0.0919683426618576, "learning_rate": 0.01, "loss": 2.1229, "step": 10701 }, { "epoch": 1.0985221674876848, "grad_norm": 0.05292963236570358, "learning_rate": 0.01, "loss": 2.1097, "step": 10704 }, { "epoch": 1.0988300492610836, "grad_norm": 0.053880974650382996, "learning_rate": 0.01, "loss": 2.0787, "step": 10707 }, { "epoch": 1.0991379310344827, "grad_norm": 0.05608196556568146, "learning_rate": 0.01, "loss": 2.0735, "step": 10710 }, { "epoch": 1.0994458128078817, "grad_norm": 0.06456641852855682, "learning_rate": 0.01, "loss": 2.1148, "step": 10713 }, { "epoch": 1.0997536945812807, "grad_norm": 0.08165917545557022, "learning_rate": 0.01, "loss": 2.1199, "step": 10716 }, { "epoch": 1.1000615763546797, "grad_norm": 0.0773044228553772, "learning_rate": 0.01, "loss": 2.0972, "step": 10719 }, { "epoch": 1.1003694581280787, "grad_norm": 0.07669848203659058, "learning_rate": 0.01, "loss": 2.101, "step": 10722 }, { "epoch": 1.1006773399014778, "grad_norm": 0.0773942843079567, "learning_rate": 0.01, "loss": 2.0573, "step": 10725 }, { "epoch": 1.1009852216748768, "grad_norm": 0.06698640435934067, "learning_rate": 0.01, "loss": 2.1189, "step": 10728 }, { "epoch": 1.1012931034482758, "grad_norm": 0.098200224339962, "learning_rate": 0.01, "loss": 2.0739, "step": 10731 }, { "epoch": 1.1016009852216748, "grad_norm": 0.06676481664180756, "learning_rate": 0.01, "loss": 2.097, "step": 10734 }, { "epoch": 1.1019088669950738, "grad_norm": 0.03925321251153946, "learning_rate": 0.01, "loss": 2.0904, "step": 10737 }, { "epoch": 1.1022167487684729, "grad_norm": 0.08387935161590576, "learning_rate": 0.01, "loss": 2.1069, "step": 10740 }, { "epoch": 1.1025246305418719, "grad_norm": 0.06382130831480026, "learning_rate": 0.01, "loss": 2.091, "step": 10743 }, { "epoch": 1.102832512315271, "grad_norm": 0.04457903653383255, "learning_rate": 0.01, "loss": 2.074, "step": 10746 }, { "epoch": 1.10314039408867, "grad_norm": 0.057858239859342575, "learning_rate": 0.01, "loss": 2.1021, "step": 10749 }, { "epoch": 1.103448275862069, "grad_norm": 0.055992983281612396, "learning_rate": 0.01, "loss": 2.0894, "step": 10752 }, { "epoch": 1.103756157635468, "grad_norm": 0.10200835764408112, "learning_rate": 0.01, "loss": 2.0948, "step": 10755 }, { "epoch": 1.104064039408867, "grad_norm": 0.11163626611232758, "learning_rate": 0.01, "loss": 2.0963, "step": 10758 }, { "epoch": 1.104371921182266, "grad_norm": 0.11462046951055527, "learning_rate": 0.01, "loss": 2.0808, "step": 10761 }, { "epoch": 1.104679802955665, "grad_norm": 0.08823121339082718, "learning_rate": 0.01, "loss": 2.1136, "step": 10764 }, { "epoch": 1.104987684729064, "grad_norm": 0.08843538910150528, "learning_rate": 0.01, "loss": 2.0767, "step": 10767 }, { "epoch": 1.105295566502463, "grad_norm": 0.05961614102125168, "learning_rate": 0.01, "loss": 2.1067, "step": 10770 }, { "epoch": 1.105603448275862, "grad_norm": 0.08095360547304153, "learning_rate": 0.01, "loss": 2.066, "step": 10773 }, { "epoch": 1.1059113300492611, "grad_norm": 0.08094312995672226, "learning_rate": 0.01, "loss": 2.0849, "step": 10776 }, { "epoch": 1.1062192118226601, "grad_norm": 0.05718453973531723, "learning_rate": 0.01, "loss": 2.1097, "step": 10779 }, { "epoch": 1.1065270935960592, "grad_norm": 0.0537499338388443, "learning_rate": 0.01, "loss": 2.082, "step": 10782 }, { "epoch": 1.1068349753694582, "grad_norm": 0.06437748670578003, "learning_rate": 0.01, "loss": 2.0982, "step": 10785 }, { "epoch": 1.1071428571428572, "grad_norm": 0.03420199081301689, "learning_rate": 0.01, "loss": 2.0919, "step": 10788 }, { "epoch": 1.1074507389162562, "grad_norm": 0.049510665237903595, "learning_rate": 0.01, "loss": 2.0777, "step": 10791 }, { "epoch": 1.1077586206896552, "grad_norm": 0.044145356863737106, "learning_rate": 0.01, "loss": 2.0994, "step": 10794 }, { "epoch": 1.1080665024630543, "grad_norm": 0.0494622103869915, "learning_rate": 0.01, "loss": 2.103, "step": 10797 }, { "epoch": 1.1083743842364533, "grad_norm": 0.039029188454151154, "learning_rate": 0.01, "loss": 2.1026, "step": 10800 }, { "epoch": 1.1086822660098523, "grad_norm": 0.05786842480301857, "learning_rate": 0.01, "loss": 2.0767, "step": 10803 }, { "epoch": 1.1089901477832513, "grad_norm": 0.07576561719179153, "learning_rate": 0.01, "loss": 2.1078, "step": 10806 }, { "epoch": 1.1092980295566504, "grad_norm": 0.084762342274189, "learning_rate": 0.01, "loss": 2.1027, "step": 10809 }, { "epoch": 1.1096059113300494, "grad_norm": 0.05042179673910141, "learning_rate": 0.01, "loss": 2.0742, "step": 10812 }, { "epoch": 1.1099137931034482, "grad_norm": 0.07194402068853378, "learning_rate": 0.01, "loss": 2.0985, "step": 10815 }, { "epoch": 1.1102216748768472, "grad_norm": 0.13966146111488342, "learning_rate": 0.01, "loss": 2.0924, "step": 10818 }, { "epoch": 1.1105295566502462, "grad_norm": 0.060582250356674194, "learning_rate": 0.01, "loss": 2.1039, "step": 10821 }, { "epoch": 1.1108374384236452, "grad_norm": 0.03663609176874161, "learning_rate": 0.01, "loss": 2.0731, "step": 10824 }, { "epoch": 1.1111453201970443, "grad_norm": 0.09468091279268265, "learning_rate": 0.01, "loss": 2.0961, "step": 10827 }, { "epoch": 1.1114532019704433, "grad_norm": 0.07199615240097046, "learning_rate": 0.01, "loss": 2.0834, "step": 10830 }, { "epoch": 1.1117610837438423, "grad_norm": 0.06624965369701385, "learning_rate": 0.01, "loss": 2.1286, "step": 10833 }, { "epoch": 1.1120689655172413, "grad_norm": 0.0414128340780735, "learning_rate": 0.01, "loss": 2.0922, "step": 10836 }, { "epoch": 1.1123768472906403, "grad_norm": 0.06416642665863037, "learning_rate": 0.01, "loss": 2.0908, "step": 10839 }, { "epoch": 1.1126847290640394, "grad_norm": 0.05309692397713661, "learning_rate": 0.01, "loss": 2.117, "step": 10842 }, { "epoch": 1.1129926108374384, "grad_norm": 0.04576392099261284, "learning_rate": 0.01, "loss": 2.0801, "step": 10845 }, { "epoch": 1.1133004926108374, "grad_norm": 0.0887250304222107, "learning_rate": 0.01, "loss": 2.0815, "step": 10848 }, { "epoch": 1.1136083743842364, "grad_norm": 0.061223480850458145, "learning_rate": 0.01, "loss": 2.0607, "step": 10851 }, { "epoch": 1.1139162561576355, "grad_norm": 0.12983545660972595, "learning_rate": 0.01, "loss": 2.0882, "step": 10854 }, { "epoch": 1.1142241379310345, "grad_norm": 0.09382637590169907, "learning_rate": 0.01, "loss": 2.0838, "step": 10857 }, { "epoch": 1.1145320197044335, "grad_norm": 0.04275491461157799, "learning_rate": 0.01, "loss": 2.0905, "step": 10860 }, { "epoch": 1.1148399014778325, "grad_norm": 0.044315680861473083, "learning_rate": 0.01, "loss": 2.0924, "step": 10863 }, { "epoch": 1.1151477832512315, "grad_norm": 0.05177663639187813, "learning_rate": 0.01, "loss": 2.0985, "step": 10866 }, { "epoch": 1.1154556650246306, "grad_norm": 0.08161107450723648, "learning_rate": 0.01, "loss": 2.105, "step": 10869 }, { "epoch": 1.1157635467980296, "grad_norm": 0.08273576200008392, "learning_rate": 0.01, "loss": 2.0991, "step": 10872 }, { "epoch": 1.1160714285714286, "grad_norm": 0.04973771795630455, "learning_rate": 0.01, "loss": 2.0849, "step": 10875 }, { "epoch": 1.1163793103448276, "grad_norm": 0.036696773022413254, "learning_rate": 0.01, "loss": 2.0651, "step": 10878 }, { "epoch": 1.1166871921182266, "grad_norm": 0.03647401183843613, "learning_rate": 0.01, "loss": 2.0772, "step": 10881 }, { "epoch": 1.1169950738916257, "grad_norm": 0.03360895812511444, "learning_rate": 0.01, "loss": 2.0952, "step": 10884 }, { "epoch": 1.1173029556650247, "grad_norm": 0.037918057292699814, "learning_rate": 0.01, "loss": 2.0776, "step": 10887 }, { "epoch": 1.1176108374384237, "grad_norm": 0.10544890910387039, "learning_rate": 0.01, "loss": 2.1079, "step": 10890 }, { "epoch": 1.1179187192118227, "grad_norm": 0.15091745555400848, "learning_rate": 0.01, "loss": 2.1231, "step": 10893 }, { "epoch": 1.1182266009852218, "grad_norm": 0.07386527210474014, "learning_rate": 0.01, "loss": 2.0922, "step": 10896 }, { "epoch": 1.1185344827586208, "grad_norm": 0.04889804869890213, "learning_rate": 0.01, "loss": 2.1016, "step": 10899 }, { "epoch": 1.1188423645320198, "grad_norm": 0.04805940017104149, "learning_rate": 0.01, "loss": 2.0833, "step": 10902 }, { "epoch": 1.1191502463054188, "grad_norm": 0.040073320269584656, "learning_rate": 0.01, "loss": 2.0943, "step": 10905 }, { "epoch": 1.1194581280788178, "grad_norm": 0.046124961227178574, "learning_rate": 0.01, "loss": 2.0891, "step": 10908 }, { "epoch": 1.1197660098522166, "grad_norm": 0.04982076957821846, "learning_rate": 0.01, "loss": 2.0595, "step": 10911 }, { "epoch": 1.1200738916256157, "grad_norm": 0.036569107323884964, "learning_rate": 0.01, "loss": 2.0602, "step": 10914 }, { "epoch": 1.1203817733990147, "grad_norm": 0.033519893884658813, "learning_rate": 0.01, "loss": 2.1026, "step": 10917 }, { "epoch": 1.1206896551724137, "grad_norm": 0.0513744130730629, "learning_rate": 0.01, "loss": 2.1119, "step": 10920 }, { "epoch": 1.1209975369458127, "grad_norm": 0.08677095174789429, "learning_rate": 0.01, "loss": 2.0791, "step": 10923 }, { "epoch": 1.1213054187192117, "grad_norm": 0.1263512223958969, "learning_rate": 0.01, "loss": 2.0912, "step": 10926 }, { "epoch": 1.1216133004926108, "grad_norm": 0.0737731009721756, "learning_rate": 0.01, "loss": 2.1193, "step": 10929 }, { "epoch": 1.1219211822660098, "grad_norm": 0.045122213661670685, "learning_rate": 0.01, "loss": 2.1029, "step": 10932 }, { "epoch": 1.1222290640394088, "grad_norm": 0.04616571217775345, "learning_rate": 0.01, "loss": 2.062, "step": 10935 }, { "epoch": 1.1225369458128078, "grad_norm": 0.03985420614480972, "learning_rate": 0.01, "loss": 2.0868, "step": 10938 }, { "epoch": 1.1228448275862069, "grad_norm": 0.11042526364326477, "learning_rate": 0.01, "loss": 2.1057, "step": 10941 }, { "epoch": 1.1231527093596059, "grad_norm": 0.08071359992027283, "learning_rate": 0.01, "loss": 2.0796, "step": 10944 }, { "epoch": 1.123460591133005, "grad_norm": 0.049534909427165985, "learning_rate": 0.01, "loss": 2.1055, "step": 10947 }, { "epoch": 1.123768472906404, "grad_norm": 0.08341135829687119, "learning_rate": 0.01, "loss": 2.0948, "step": 10950 }, { "epoch": 1.124076354679803, "grad_norm": 0.03842156007885933, "learning_rate": 0.01, "loss": 2.1051, "step": 10953 }, { "epoch": 1.124384236453202, "grad_norm": 0.04978267103433609, "learning_rate": 0.01, "loss": 2.0927, "step": 10956 }, { "epoch": 1.124692118226601, "grad_norm": 0.04545191302895546, "learning_rate": 0.01, "loss": 2.0847, "step": 10959 }, { "epoch": 1.125, "grad_norm": 0.10103368014097214, "learning_rate": 0.01, "loss": 2.105, "step": 10962 }, { "epoch": 1.125307881773399, "grad_norm": 0.05956938862800598, "learning_rate": 0.01, "loss": 2.1043, "step": 10965 }, { "epoch": 1.125615763546798, "grad_norm": 0.048797741532325745, "learning_rate": 0.01, "loss": 2.1044, "step": 10968 }, { "epoch": 1.125923645320197, "grad_norm": 0.041901495307683945, "learning_rate": 0.01, "loss": 2.0847, "step": 10971 }, { "epoch": 1.126231527093596, "grad_norm": 0.14950989186763763, "learning_rate": 0.01, "loss": 2.0919, "step": 10974 }, { "epoch": 1.126539408866995, "grad_norm": 0.049760669469833374, "learning_rate": 0.01, "loss": 2.0707, "step": 10977 }, { "epoch": 1.1268472906403941, "grad_norm": 0.07016187906265259, "learning_rate": 0.01, "loss": 2.0709, "step": 10980 }, { "epoch": 1.1271551724137931, "grad_norm": 0.057528458535671234, "learning_rate": 0.01, "loss": 2.0759, "step": 10983 }, { "epoch": 1.1274630541871922, "grad_norm": 0.06690733879804611, "learning_rate": 0.01, "loss": 2.102, "step": 10986 }, { "epoch": 1.1277709359605912, "grad_norm": 0.05225450173020363, "learning_rate": 0.01, "loss": 2.0675, "step": 10989 }, { "epoch": 1.1280788177339902, "grad_norm": 0.048363544046878815, "learning_rate": 0.01, "loss": 2.0634, "step": 10992 }, { "epoch": 1.1283866995073892, "grad_norm": 0.05356382206082344, "learning_rate": 0.01, "loss": 2.1003, "step": 10995 }, { "epoch": 1.1286945812807883, "grad_norm": 0.06921149045228958, "learning_rate": 0.01, "loss": 2.0934, "step": 10998 }, { "epoch": 1.1290024630541873, "grad_norm": 0.04210525006055832, "learning_rate": 0.01, "loss": 2.096, "step": 11001 }, { "epoch": 1.1293103448275863, "grad_norm": 0.11790584027767181, "learning_rate": 0.01, "loss": 2.0746, "step": 11004 }, { "epoch": 1.1296182266009853, "grad_norm": 0.08045307546854019, "learning_rate": 0.01, "loss": 2.0929, "step": 11007 }, { "epoch": 1.1299261083743843, "grad_norm": 0.10474243015050888, "learning_rate": 0.01, "loss": 2.1418, "step": 11010 }, { "epoch": 1.1302339901477834, "grad_norm": 0.06073759123682976, "learning_rate": 0.01, "loss": 2.1089, "step": 11013 }, { "epoch": 1.1305418719211824, "grad_norm": 0.057685475796461105, "learning_rate": 0.01, "loss": 2.0959, "step": 11016 }, { "epoch": 1.1308497536945814, "grad_norm": 0.04218476638197899, "learning_rate": 0.01, "loss": 2.0834, "step": 11019 }, { "epoch": 1.1311576354679802, "grad_norm": 0.04814853519201279, "learning_rate": 0.01, "loss": 2.1134, "step": 11022 }, { "epoch": 1.1314655172413792, "grad_norm": 0.1344536989927292, "learning_rate": 0.01, "loss": 2.1121, "step": 11025 }, { "epoch": 1.1317733990147782, "grad_norm": 0.057088855654001236, "learning_rate": 0.01, "loss": 2.0978, "step": 11028 }, { "epoch": 1.1320812807881773, "grad_norm": 0.04567364603281021, "learning_rate": 0.01, "loss": 2.0837, "step": 11031 }, { "epoch": 1.1323891625615763, "grad_norm": 0.07506916671991348, "learning_rate": 0.01, "loss": 2.0926, "step": 11034 }, { "epoch": 1.1326970443349753, "grad_norm": 0.05837171897292137, "learning_rate": 0.01, "loss": 2.0961, "step": 11037 }, { "epoch": 1.1330049261083743, "grad_norm": 0.0457015223801136, "learning_rate": 0.01, "loss": 2.101, "step": 11040 }, { "epoch": 1.1333128078817734, "grad_norm": 0.061310991644859314, "learning_rate": 0.01, "loss": 2.1128, "step": 11043 }, { "epoch": 1.1336206896551724, "grad_norm": 0.05517786741256714, "learning_rate": 0.01, "loss": 2.0844, "step": 11046 }, { "epoch": 1.1339285714285714, "grad_norm": 0.07835637778043747, "learning_rate": 0.01, "loss": 2.0996, "step": 11049 }, { "epoch": 1.1342364532019704, "grad_norm": 0.05821641907095909, "learning_rate": 0.01, "loss": 2.1074, "step": 11052 }, { "epoch": 1.1345443349753694, "grad_norm": 0.04394884407520294, "learning_rate": 0.01, "loss": 2.0799, "step": 11055 }, { "epoch": 1.1348522167487685, "grad_norm": 0.05148720741271973, "learning_rate": 0.01, "loss": 2.0856, "step": 11058 }, { "epoch": 1.1351600985221675, "grad_norm": 0.05766841769218445, "learning_rate": 0.01, "loss": 2.0973, "step": 11061 }, { "epoch": 1.1354679802955665, "grad_norm": 0.09894710779190063, "learning_rate": 0.01, "loss": 2.0831, "step": 11064 }, { "epoch": 1.1357758620689655, "grad_norm": 0.11916875094175339, "learning_rate": 0.01, "loss": 2.1044, "step": 11067 }, { "epoch": 1.1360837438423645, "grad_norm": 0.03926829248666763, "learning_rate": 0.01, "loss": 2.0866, "step": 11070 }, { "epoch": 1.1363916256157636, "grad_norm": 0.05105220153927803, "learning_rate": 0.01, "loss": 2.0911, "step": 11073 }, { "epoch": 1.1366995073891626, "grad_norm": 0.04516123607754707, "learning_rate": 0.01, "loss": 2.0693, "step": 11076 }, { "epoch": 1.1370073891625616, "grad_norm": 0.046173594892024994, "learning_rate": 0.01, "loss": 2.092, "step": 11079 }, { "epoch": 1.1373152709359606, "grad_norm": 0.05173357576131821, "learning_rate": 0.01, "loss": 2.1007, "step": 11082 }, { "epoch": 1.1376231527093597, "grad_norm": 0.06486919522285461, "learning_rate": 0.01, "loss": 2.0775, "step": 11085 }, { "epoch": 1.1379310344827587, "grad_norm": 0.09763675928115845, "learning_rate": 0.01, "loss": 2.0942, "step": 11088 }, { "epoch": 1.1382389162561577, "grad_norm": 0.1281820833683014, "learning_rate": 0.01, "loss": 2.0903, "step": 11091 }, { "epoch": 1.1385467980295567, "grad_norm": 0.05734977498650551, "learning_rate": 0.01, "loss": 2.0891, "step": 11094 }, { "epoch": 1.1388546798029557, "grad_norm": 0.06809762120246887, "learning_rate": 0.01, "loss": 2.0855, "step": 11097 }, { "epoch": 1.1391625615763548, "grad_norm": 0.05105281621217728, "learning_rate": 0.01, "loss": 2.0975, "step": 11100 }, { "epoch": 1.1394704433497538, "grad_norm": 0.07381090521812439, "learning_rate": 0.01, "loss": 2.0762, "step": 11103 }, { "epoch": 1.1397783251231528, "grad_norm": 0.050722070038318634, "learning_rate": 0.01, "loss": 2.0778, "step": 11106 }, { "epoch": 1.1400862068965516, "grad_norm": 0.03850618377327919, "learning_rate": 0.01, "loss": 2.0626, "step": 11109 }, { "epoch": 1.1403940886699506, "grad_norm": 0.08264841884374619, "learning_rate": 0.01, "loss": 2.1258, "step": 11112 }, { "epoch": 1.1407019704433496, "grad_norm": 0.06493505835533142, "learning_rate": 0.01, "loss": 2.0961, "step": 11115 }, { "epoch": 1.1410098522167487, "grad_norm": 0.06895186007022858, "learning_rate": 0.01, "loss": 2.1207, "step": 11118 }, { "epoch": 1.1413177339901477, "grad_norm": 0.042232003062963486, "learning_rate": 0.01, "loss": 2.0724, "step": 11121 }, { "epoch": 1.1416256157635467, "grad_norm": 0.10296539217233658, "learning_rate": 0.01, "loss": 2.1126, "step": 11124 }, { "epoch": 1.1419334975369457, "grad_norm": 0.043095991015434265, "learning_rate": 0.01, "loss": 2.0928, "step": 11127 }, { "epoch": 1.1422413793103448, "grad_norm": 0.046020470559597015, "learning_rate": 0.01, "loss": 2.092, "step": 11130 }, { "epoch": 1.1425492610837438, "grad_norm": 0.04754204675555229, "learning_rate": 0.01, "loss": 2.126, "step": 11133 }, { "epoch": 1.1428571428571428, "grad_norm": 0.03732014447450638, "learning_rate": 0.01, "loss": 2.0637, "step": 11136 }, { "epoch": 1.1431650246305418, "grad_norm": 0.039080169051885605, "learning_rate": 0.01, "loss": 2.0877, "step": 11139 }, { "epoch": 1.1434729064039408, "grad_norm": 0.04575611278414726, "learning_rate": 0.01, "loss": 2.0646, "step": 11142 }, { "epoch": 1.1437807881773399, "grad_norm": 0.11764515191316605, "learning_rate": 0.01, "loss": 2.0964, "step": 11145 }, { "epoch": 1.1440886699507389, "grad_norm": 0.10087098181247711, "learning_rate": 0.01, "loss": 2.069, "step": 11148 }, { "epoch": 1.144396551724138, "grad_norm": 0.05462269112467766, "learning_rate": 0.01, "loss": 2.0902, "step": 11151 }, { "epoch": 1.144704433497537, "grad_norm": 0.06296168267726898, "learning_rate": 0.01, "loss": 2.1257, "step": 11154 }, { "epoch": 1.145012315270936, "grad_norm": 0.041026949882507324, "learning_rate": 0.01, "loss": 2.0672, "step": 11157 }, { "epoch": 1.145320197044335, "grad_norm": 0.05761269852519035, "learning_rate": 0.01, "loss": 2.0833, "step": 11160 }, { "epoch": 1.145628078817734, "grad_norm": 0.12491103261709213, "learning_rate": 0.01, "loss": 2.0681, "step": 11163 }, { "epoch": 1.145935960591133, "grad_norm": 0.09269531071186066, "learning_rate": 0.01, "loss": 2.0618, "step": 11166 }, { "epoch": 1.146243842364532, "grad_norm": 0.05623659864068031, "learning_rate": 0.01, "loss": 2.0861, "step": 11169 }, { "epoch": 1.146551724137931, "grad_norm": 0.04075248911976814, "learning_rate": 0.01, "loss": 2.0513, "step": 11172 }, { "epoch": 1.14685960591133, "grad_norm": 0.04557061940431595, "learning_rate": 0.01, "loss": 2.0876, "step": 11175 }, { "epoch": 1.147167487684729, "grad_norm": 0.05686535686254501, "learning_rate": 0.01, "loss": 2.0746, "step": 11178 }, { "epoch": 1.1474753694581281, "grad_norm": 0.04164785146713257, "learning_rate": 0.01, "loss": 2.0684, "step": 11181 }, { "epoch": 1.1477832512315271, "grad_norm": 0.05453825742006302, "learning_rate": 0.01, "loss": 2.0809, "step": 11184 }, { "epoch": 1.1480911330049262, "grad_norm": 0.15215769410133362, "learning_rate": 0.01, "loss": 2.0806, "step": 11187 }, { "epoch": 1.1483990147783252, "grad_norm": 0.14634385704994202, "learning_rate": 0.01, "loss": 2.11, "step": 11190 }, { "epoch": 1.1487068965517242, "grad_norm": 0.06655893474817276, "learning_rate": 0.01, "loss": 2.0953, "step": 11193 }, { "epoch": 1.1490147783251232, "grad_norm": 0.07074826955795288, "learning_rate": 0.01, "loss": 2.0984, "step": 11196 }, { "epoch": 1.1493226600985222, "grad_norm": 0.044581200927495956, "learning_rate": 0.01, "loss": 2.0816, "step": 11199 }, { "epoch": 1.1496305418719213, "grad_norm": 0.06769565492868423, "learning_rate": 0.01, "loss": 2.0809, "step": 11202 }, { "epoch": 1.1499384236453203, "grad_norm": 0.11437363177537918, "learning_rate": 0.01, "loss": 2.0761, "step": 11205 }, { "epoch": 1.1502463054187193, "grad_norm": 0.040699586272239685, "learning_rate": 0.01, "loss": 2.1028, "step": 11208 }, { "epoch": 1.1505541871921183, "grad_norm": 0.06996385753154755, "learning_rate": 0.01, "loss": 2.0755, "step": 11211 }, { "epoch": 1.1508620689655173, "grad_norm": 0.04621044918894768, "learning_rate": 0.01, "loss": 2.11, "step": 11214 }, { "epoch": 1.1511699507389164, "grad_norm": 0.07714984565973282, "learning_rate": 0.01, "loss": 2.1008, "step": 11217 }, { "epoch": 1.1514778325123154, "grad_norm": 0.11194620281457901, "learning_rate": 0.01, "loss": 2.093, "step": 11220 }, { "epoch": 1.1517857142857142, "grad_norm": 0.08710391074419022, "learning_rate": 0.01, "loss": 2.1017, "step": 11223 }, { "epoch": 1.1520935960591132, "grad_norm": 0.06653989106416702, "learning_rate": 0.01, "loss": 2.1084, "step": 11226 }, { "epoch": 1.1524014778325122, "grad_norm": 0.056591540575027466, "learning_rate": 0.01, "loss": 2.107, "step": 11229 }, { "epoch": 1.1527093596059113, "grad_norm": 0.056475669145584106, "learning_rate": 0.01, "loss": 2.0774, "step": 11232 }, { "epoch": 1.1530172413793103, "grad_norm": 0.08408259600400925, "learning_rate": 0.01, "loss": 2.1193, "step": 11235 }, { "epoch": 1.1533251231527093, "grad_norm": 0.06853178143501282, "learning_rate": 0.01, "loss": 2.0796, "step": 11238 }, { "epoch": 1.1536330049261083, "grad_norm": 0.11699818074703217, "learning_rate": 0.01, "loss": 2.0932, "step": 11241 }, { "epoch": 1.1539408866995073, "grad_norm": 0.06365542113780975, "learning_rate": 0.01, "loss": 2.079, "step": 11244 }, { "epoch": 1.1542487684729064, "grad_norm": 0.040505316108465195, "learning_rate": 0.01, "loss": 2.0688, "step": 11247 }, { "epoch": 1.1545566502463054, "grad_norm": 0.06958888471126556, "learning_rate": 0.01, "loss": 2.0882, "step": 11250 }, { "epoch": 1.1548645320197044, "grad_norm": 0.08968232572078705, "learning_rate": 0.01, "loss": 2.064, "step": 11253 }, { "epoch": 1.1551724137931034, "grad_norm": 0.05143412947654724, "learning_rate": 0.01, "loss": 2.0682, "step": 11256 }, { "epoch": 1.1554802955665024, "grad_norm": 0.1219927966594696, "learning_rate": 0.01, "loss": 2.1038, "step": 11259 }, { "epoch": 1.1557881773399015, "grad_norm": 0.11974184960126877, "learning_rate": 0.01, "loss": 2.1099, "step": 11262 }, { "epoch": 1.1560960591133005, "grad_norm": 0.09557066112756729, "learning_rate": 0.01, "loss": 2.0976, "step": 11265 }, { "epoch": 1.1564039408866995, "grad_norm": 0.10169565677642822, "learning_rate": 0.01, "loss": 2.0904, "step": 11268 }, { "epoch": 1.1567118226600985, "grad_norm": 0.0586596317589283, "learning_rate": 0.01, "loss": 2.0787, "step": 11271 }, { "epoch": 1.1570197044334976, "grad_norm": 0.0423048697412014, "learning_rate": 0.01, "loss": 2.1084, "step": 11274 }, { "epoch": 1.1573275862068966, "grad_norm": 0.07394707947969437, "learning_rate": 0.01, "loss": 2.0802, "step": 11277 }, { "epoch": 1.1576354679802956, "grad_norm": 0.05507487431168556, "learning_rate": 0.01, "loss": 2.0777, "step": 11280 }, { "epoch": 1.1579433497536946, "grad_norm": 0.03251083940267563, "learning_rate": 0.01, "loss": 2.1128, "step": 11283 }, { "epoch": 1.1582512315270936, "grad_norm": 0.04480468109250069, "learning_rate": 0.01, "loss": 2.1162, "step": 11286 }, { "epoch": 1.1585591133004927, "grad_norm": 0.08275749534368515, "learning_rate": 0.01, "loss": 2.0808, "step": 11289 }, { "epoch": 1.1588669950738917, "grad_norm": 0.08199379593133926, "learning_rate": 0.01, "loss": 2.1006, "step": 11292 }, { "epoch": 1.1591748768472907, "grad_norm": 0.1015235111117363, "learning_rate": 0.01, "loss": 2.0926, "step": 11295 }, { "epoch": 1.1594827586206897, "grad_norm": 0.08872919529676437, "learning_rate": 0.01, "loss": 2.0739, "step": 11298 }, { "epoch": 1.1597906403940887, "grad_norm": 0.04640579596161842, "learning_rate": 0.01, "loss": 2.09, "step": 11301 }, { "epoch": 1.1600985221674878, "grad_norm": 0.044142261147499084, "learning_rate": 0.01, "loss": 2.115, "step": 11304 }, { "epoch": 1.1604064039408868, "grad_norm": 0.1030762568116188, "learning_rate": 0.01, "loss": 2.1358, "step": 11307 }, { "epoch": 1.1607142857142858, "grad_norm": 0.06712359189987183, "learning_rate": 0.01, "loss": 2.0504, "step": 11310 }, { "epoch": 1.1610221674876846, "grad_norm": 0.05579240992665291, "learning_rate": 0.01, "loss": 2.0915, "step": 11313 }, { "epoch": 1.1613300492610836, "grad_norm": 0.04237228259444237, "learning_rate": 0.01, "loss": 2.0514, "step": 11316 }, { "epoch": 1.1616379310344827, "grad_norm": 0.08990642428398132, "learning_rate": 0.01, "loss": 2.0753, "step": 11319 }, { "epoch": 1.1619458128078817, "grad_norm": 0.09788185358047485, "learning_rate": 0.01, "loss": 2.0907, "step": 11322 }, { "epoch": 1.1622536945812807, "grad_norm": 0.07207074761390686, "learning_rate": 0.01, "loss": 2.0858, "step": 11325 }, { "epoch": 1.1625615763546797, "grad_norm": 0.07704904675483704, "learning_rate": 0.01, "loss": 2.0938, "step": 11328 }, { "epoch": 1.1628694581280787, "grad_norm": 0.07003269344568253, "learning_rate": 0.01, "loss": 2.1031, "step": 11331 }, { "epoch": 1.1631773399014778, "grad_norm": 0.05584646388888359, "learning_rate": 0.01, "loss": 2.0852, "step": 11334 }, { "epoch": 1.1634852216748768, "grad_norm": 0.10223853588104248, "learning_rate": 0.01, "loss": 2.072, "step": 11337 }, { "epoch": 1.1637931034482758, "grad_norm": 0.13336369395256042, "learning_rate": 0.01, "loss": 2.0902, "step": 11340 }, { "epoch": 1.1641009852216748, "grad_norm": 0.04471458122134209, "learning_rate": 0.01, "loss": 2.0884, "step": 11343 }, { "epoch": 1.1644088669950738, "grad_norm": 0.04342315346002579, "learning_rate": 0.01, "loss": 2.0885, "step": 11346 }, { "epoch": 1.1647167487684729, "grad_norm": 0.05172869563102722, "learning_rate": 0.01, "loss": 2.069, "step": 11349 }, { "epoch": 1.1650246305418719, "grad_norm": 0.04304314777255058, "learning_rate": 0.01, "loss": 2.0915, "step": 11352 }, { "epoch": 1.165332512315271, "grad_norm": 0.14592792093753815, "learning_rate": 0.01, "loss": 2.0841, "step": 11355 }, { "epoch": 1.16564039408867, "grad_norm": 0.05238402634859085, "learning_rate": 0.01, "loss": 2.0772, "step": 11358 }, { "epoch": 1.165948275862069, "grad_norm": 0.04800669103860855, "learning_rate": 0.01, "loss": 2.0739, "step": 11361 }, { "epoch": 1.166256157635468, "grad_norm": 0.06648313999176025, "learning_rate": 0.01, "loss": 2.0956, "step": 11364 }, { "epoch": 1.166564039408867, "grad_norm": 0.03402326628565788, "learning_rate": 0.01, "loss": 2.0847, "step": 11367 }, { "epoch": 1.166871921182266, "grad_norm": 0.05076766759157181, "learning_rate": 0.01, "loss": 2.0833, "step": 11370 }, { "epoch": 1.167179802955665, "grad_norm": 0.07221470773220062, "learning_rate": 0.01, "loss": 2.0778, "step": 11373 }, { "epoch": 1.167487684729064, "grad_norm": 0.04556736722588539, "learning_rate": 0.01, "loss": 2.0957, "step": 11376 }, { "epoch": 1.167795566502463, "grad_norm": 0.03702834993600845, "learning_rate": 0.01, "loss": 2.0843, "step": 11379 }, { "epoch": 1.168103448275862, "grad_norm": 0.046527571976184845, "learning_rate": 0.01, "loss": 2.084, "step": 11382 }, { "epoch": 1.1684113300492611, "grad_norm": 0.09520363062620163, "learning_rate": 0.01, "loss": 2.0924, "step": 11385 }, { "epoch": 1.1687192118226601, "grad_norm": 0.12759263813495636, "learning_rate": 0.01, "loss": 2.1265, "step": 11388 }, { "epoch": 1.1690270935960592, "grad_norm": 0.03981192037463188, "learning_rate": 0.01, "loss": 2.076, "step": 11391 }, { "epoch": 1.1693349753694582, "grad_norm": 0.04739897698163986, "learning_rate": 0.01, "loss": 2.1013, "step": 11394 }, { "epoch": 1.1696428571428572, "grad_norm": 0.04937390610575676, "learning_rate": 0.01, "loss": 2.0832, "step": 11397 }, { "epoch": 1.1699507389162562, "grad_norm": 0.07097122073173523, "learning_rate": 0.01, "loss": 2.0699, "step": 11400 }, { "epoch": 1.1702586206896552, "grad_norm": 0.07773783802986145, "learning_rate": 0.01, "loss": 2.1215, "step": 11403 }, { "epoch": 1.1705665024630543, "grad_norm": 0.04591994732618332, "learning_rate": 0.01, "loss": 2.0657, "step": 11406 }, { "epoch": 1.1708743842364533, "grad_norm": 0.08724237233400345, "learning_rate": 0.01, "loss": 2.0817, "step": 11409 }, { "epoch": 1.1711822660098523, "grad_norm": 0.06528041511774063, "learning_rate": 0.01, "loss": 2.0962, "step": 11412 }, { "epoch": 1.1714901477832513, "grad_norm": 0.0660424679517746, "learning_rate": 0.01, "loss": 2.0904, "step": 11415 }, { "epoch": 1.1717980295566504, "grad_norm": 0.08304266631603241, "learning_rate": 0.01, "loss": 2.0912, "step": 11418 }, { "epoch": 1.1721059113300494, "grad_norm": 0.05073266103863716, "learning_rate": 0.01, "loss": 2.0747, "step": 11421 }, { "epoch": 1.1724137931034484, "grad_norm": 0.07305736094713211, "learning_rate": 0.01, "loss": 2.0852, "step": 11424 }, { "epoch": 1.1727216748768472, "grad_norm": 0.09365279227495193, "learning_rate": 0.01, "loss": 2.0883, "step": 11427 }, { "epoch": 1.1730295566502462, "grad_norm": 0.12279067188501358, "learning_rate": 0.01, "loss": 2.0999, "step": 11430 }, { "epoch": 1.1733374384236452, "grad_norm": 0.0589769072830677, "learning_rate": 0.01, "loss": 2.0948, "step": 11433 }, { "epoch": 1.1736453201970443, "grad_norm": 0.06621567159891129, "learning_rate": 0.01, "loss": 2.0823, "step": 11436 }, { "epoch": 1.1739532019704433, "grad_norm": 0.051341816782951355, "learning_rate": 0.01, "loss": 2.0622, "step": 11439 }, { "epoch": 1.1742610837438423, "grad_norm": 0.06027314066886902, "learning_rate": 0.01, "loss": 2.0798, "step": 11442 }, { "epoch": 1.1745689655172413, "grad_norm": 0.10131573677062988, "learning_rate": 0.01, "loss": 2.0882, "step": 11445 }, { "epoch": 1.1748768472906403, "grad_norm": 0.08082377910614014, "learning_rate": 0.01, "loss": 2.0949, "step": 11448 }, { "epoch": 1.1751847290640394, "grad_norm": 0.07095243781805038, "learning_rate": 0.01, "loss": 2.0655, "step": 11451 }, { "epoch": 1.1754926108374384, "grad_norm": 0.07132910192012787, "learning_rate": 0.01, "loss": 2.0903, "step": 11454 }, { "epoch": 1.1758004926108374, "grad_norm": 0.10488838702440262, "learning_rate": 0.01, "loss": 2.0639, "step": 11457 }, { "epoch": 1.1761083743842364, "grad_norm": 0.12755680084228516, "learning_rate": 0.01, "loss": 2.0989, "step": 11460 }, { "epoch": 1.1764162561576355, "grad_norm": 0.12174911797046661, "learning_rate": 0.01, "loss": 2.1026, "step": 11463 }, { "epoch": 1.1767241379310345, "grad_norm": 0.07873964309692383, "learning_rate": 0.01, "loss": 2.0908, "step": 11466 }, { "epoch": 1.1770320197044335, "grad_norm": 0.04275409132242203, "learning_rate": 0.01, "loss": 2.0899, "step": 11469 }, { "epoch": 1.1773399014778325, "grad_norm": 0.046134103089571, "learning_rate": 0.01, "loss": 2.1064, "step": 11472 }, { "epoch": 1.1776477832512315, "grad_norm": 0.07631804049015045, "learning_rate": 0.01, "loss": 2.0811, "step": 11475 }, { "epoch": 1.1779556650246306, "grad_norm": 0.04843062162399292, "learning_rate": 0.01, "loss": 2.1078, "step": 11478 }, { "epoch": 1.1782635467980296, "grad_norm": 0.04664747416973114, "learning_rate": 0.01, "loss": 2.0807, "step": 11481 }, { "epoch": 1.1785714285714286, "grad_norm": 0.042328983545303345, "learning_rate": 0.01, "loss": 2.0898, "step": 11484 }, { "epoch": 1.1788793103448276, "grad_norm": 0.04443054646253586, "learning_rate": 0.01, "loss": 2.092, "step": 11487 }, { "epoch": 1.1791871921182266, "grad_norm": 0.03439139202237129, "learning_rate": 0.01, "loss": 2.0438, "step": 11490 }, { "epoch": 1.1794950738916257, "grad_norm": 0.1651001274585724, "learning_rate": 0.01, "loss": 2.1068, "step": 11493 }, { "epoch": 1.1798029556650247, "grad_norm": 0.04535198211669922, "learning_rate": 0.01, "loss": 2.0951, "step": 11496 }, { "epoch": 1.1801108374384237, "grad_norm": 0.04346736520528793, "learning_rate": 0.01, "loss": 2.1032, "step": 11499 }, { "epoch": 1.1804187192118227, "grad_norm": 0.08131600171327591, "learning_rate": 0.01, "loss": 2.0709, "step": 11502 }, { "epoch": 1.1807266009852218, "grad_norm": 0.0638989582657814, "learning_rate": 0.01, "loss": 2.0676, "step": 11505 }, { "epoch": 1.1810344827586208, "grad_norm": 0.06305757910013199, "learning_rate": 0.01, "loss": 2.0636, "step": 11508 }, { "epoch": 1.1813423645320198, "grad_norm": 0.048068735748529434, "learning_rate": 0.01, "loss": 2.0803, "step": 11511 }, { "epoch": 1.1816502463054186, "grad_norm": 0.1859566867351532, "learning_rate": 0.01, "loss": 2.1206, "step": 11514 }, { "epoch": 1.1819581280788176, "grad_norm": 0.13449640572071075, "learning_rate": 0.01, "loss": 2.1047, "step": 11517 }, { "epoch": 1.1822660098522166, "grad_norm": 0.09624927490949631, "learning_rate": 0.01, "loss": 2.1338, "step": 11520 }, { "epoch": 1.1825738916256157, "grad_norm": 0.04823729023337364, "learning_rate": 0.01, "loss": 2.1485, "step": 11523 }, { "epoch": 1.1828817733990147, "grad_norm": 0.04005538672208786, "learning_rate": 0.01, "loss": 2.1062, "step": 11526 }, { "epoch": 1.1831896551724137, "grad_norm": 0.035647980868816376, "learning_rate": 0.01, "loss": 2.1002, "step": 11529 }, { "epoch": 1.1834975369458127, "grad_norm": 0.03485687077045441, "learning_rate": 0.01, "loss": 2.085, "step": 11532 }, { "epoch": 1.1838054187192117, "grad_norm": 0.03271855041384697, "learning_rate": 0.01, "loss": 2.1083, "step": 11535 }, { "epoch": 1.1841133004926108, "grad_norm": 0.14434824883937836, "learning_rate": 0.01, "loss": 2.0917, "step": 11538 }, { "epoch": 1.1844211822660098, "grad_norm": 0.16373054683208466, "learning_rate": 0.01, "loss": 2.0889, "step": 11541 }, { "epoch": 1.1847290640394088, "grad_norm": 0.1128426045179367, "learning_rate": 0.01, "loss": 2.0605, "step": 11544 }, { "epoch": 1.1850369458128078, "grad_norm": 0.03807492554187775, "learning_rate": 0.01, "loss": 2.0792, "step": 11547 }, { "epoch": 1.1853448275862069, "grad_norm": 0.0678490698337555, "learning_rate": 0.01, "loss": 2.072, "step": 11550 }, { "epoch": 1.1856527093596059, "grad_norm": 0.04688851907849312, "learning_rate": 0.01, "loss": 2.0648, "step": 11553 }, { "epoch": 1.185960591133005, "grad_norm": 0.03682132810354233, "learning_rate": 0.01, "loss": 2.096, "step": 11556 }, { "epoch": 1.186268472906404, "grad_norm": 0.0797944962978363, "learning_rate": 0.01, "loss": 2.0632, "step": 11559 }, { "epoch": 1.186576354679803, "grad_norm": 0.09593506157398224, "learning_rate": 0.01, "loss": 2.0923, "step": 11562 }, { "epoch": 1.186884236453202, "grad_norm": 0.10455387830734253, "learning_rate": 0.01, "loss": 2.0927, "step": 11565 }, { "epoch": 1.187192118226601, "grad_norm": 0.05642642080783844, "learning_rate": 0.01, "loss": 2.0688, "step": 11568 }, { "epoch": 1.1875, "grad_norm": 0.09467128664255142, "learning_rate": 0.01, "loss": 2.0561, "step": 11571 }, { "epoch": 1.187807881773399, "grad_norm": 0.061598166823387146, "learning_rate": 0.01, "loss": 2.0666, "step": 11574 }, { "epoch": 1.188115763546798, "grad_norm": 0.0875246673822403, "learning_rate": 0.01, "loss": 2.0753, "step": 11577 }, { "epoch": 1.188423645320197, "grad_norm": 0.05889583006501198, "learning_rate": 0.01, "loss": 2.0705, "step": 11580 }, { "epoch": 1.188731527093596, "grad_norm": 0.0796559602022171, "learning_rate": 0.01, "loss": 2.0939, "step": 11583 }, { "epoch": 1.189039408866995, "grad_norm": 0.04127117991447449, "learning_rate": 0.01, "loss": 2.0905, "step": 11586 }, { "epoch": 1.1893472906403941, "grad_norm": 0.06161842495203018, "learning_rate": 0.01, "loss": 2.0954, "step": 11589 }, { "epoch": 1.1896551724137931, "grad_norm": 0.05344879627227783, "learning_rate": 0.01, "loss": 2.0808, "step": 11592 }, { "epoch": 1.1899630541871922, "grad_norm": 0.03660701587796211, "learning_rate": 0.01, "loss": 2.0748, "step": 11595 }, { "epoch": 1.1902709359605912, "grad_norm": 0.04792351648211479, "learning_rate": 0.01, "loss": 2.0833, "step": 11598 }, { "epoch": 1.1905788177339902, "grad_norm": 0.04336618259549141, "learning_rate": 0.01, "loss": 2.1017, "step": 11601 }, { "epoch": 1.1908866995073892, "grad_norm": 0.0654226765036583, "learning_rate": 0.01, "loss": 2.0922, "step": 11604 }, { "epoch": 1.1911945812807883, "grad_norm": 0.08879897743463516, "learning_rate": 0.01, "loss": 2.0842, "step": 11607 }, { "epoch": 1.1915024630541873, "grad_norm": 0.15568268299102783, "learning_rate": 0.01, "loss": 2.0947, "step": 11610 }, { "epoch": 1.1918103448275863, "grad_norm": 0.11712448298931122, "learning_rate": 0.01, "loss": 2.0955, "step": 11613 }, { "epoch": 1.1921182266009853, "grad_norm": 0.04966702312231064, "learning_rate": 0.01, "loss": 2.1093, "step": 11616 }, { "epoch": 1.1924261083743843, "grad_norm": 0.04304838925600052, "learning_rate": 0.01, "loss": 2.0828, "step": 11619 }, { "epoch": 1.1927339901477834, "grad_norm": 0.04981999844312668, "learning_rate": 0.01, "loss": 2.0964, "step": 11622 }, { "epoch": 1.1930418719211824, "grad_norm": 0.045383159071207047, "learning_rate": 0.01, "loss": 2.0967, "step": 11625 }, { "epoch": 1.1933497536945814, "grad_norm": 0.0348484069108963, "learning_rate": 0.01, "loss": 2.0685, "step": 11628 }, { "epoch": 1.1936576354679802, "grad_norm": 0.04802081733942032, "learning_rate": 0.01, "loss": 2.0904, "step": 11631 }, { "epoch": 1.1939655172413792, "grad_norm": 0.0615711510181427, "learning_rate": 0.01, "loss": 2.1153, "step": 11634 }, { "epoch": 1.1942733990147782, "grad_norm": 0.15608100593090057, "learning_rate": 0.01, "loss": 2.0753, "step": 11637 }, { "epoch": 1.1945812807881773, "grad_norm": 0.10449741780757904, "learning_rate": 0.01, "loss": 2.1083, "step": 11640 }, { "epoch": 1.1948891625615763, "grad_norm": 0.062145963311195374, "learning_rate": 0.01, "loss": 2.1104, "step": 11643 }, { "epoch": 1.1951970443349753, "grad_norm": 0.04744469001889229, "learning_rate": 0.01, "loss": 2.0742, "step": 11646 }, { "epoch": 1.1955049261083743, "grad_norm": 0.036814477294683456, "learning_rate": 0.01, "loss": 2.0641, "step": 11649 }, { "epoch": 1.1958128078817734, "grad_norm": 0.037870246917009354, "learning_rate": 0.01, "loss": 2.0906, "step": 11652 }, { "epoch": 1.1961206896551724, "grad_norm": 0.17372412979602814, "learning_rate": 0.01, "loss": 2.1027, "step": 11655 }, { "epoch": 1.1964285714285714, "grad_norm": 0.04681265726685524, "learning_rate": 0.01, "loss": 2.1029, "step": 11658 }, { "epoch": 1.1967364532019704, "grad_norm": 0.058284103870391846, "learning_rate": 0.01, "loss": 2.0829, "step": 11661 }, { "epoch": 1.1970443349753694, "grad_norm": 0.07531574368476868, "learning_rate": 0.01, "loss": 2.0602, "step": 11664 }, { "epoch": 1.1973522167487685, "grad_norm": 0.053437430411577225, "learning_rate": 0.01, "loss": 2.0726, "step": 11667 }, { "epoch": 1.1976600985221675, "grad_norm": 0.047438427805900574, "learning_rate": 0.01, "loss": 2.107, "step": 11670 }, { "epoch": 1.1979679802955665, "grad_norm": 0.04474404826760292, "learning_rate": 0.01, "loss": 2.08, "step": 11673 }, { "epoch": 1.1982758620689655, "grad_norm": 0.15452256798744202, "learning_rate": 0.01, "loss": 2.0788, "step": 11676 }, { "epoch": 1.1985837438423645, "grad_norm": 0.05446213111281395, "learning_rate": 0.01, "loss": 2.0696, "step": 11679 }, { "epoch": 1.1988916256157636, "grad_norm": 0.059178926050662994, "learning_rate": 0.01, "loss": 2.0867, "step": 11682 }, { "epoch": 1.1991995073891626, "grad_norm": 0.05807918682694435, "learning_rate": 0.01, "loss": 2.0675, "step": 11685 }, { "epoch": 1.1995073891625616, "grad_norm": 0.046843890100717545, "learning_rate": 0.01, "loss": 2.0729, "step": 11688 }, { "epoch": 1.1998152709359606, "grad_norm": 0.042494870722293854, "learning_rate": 0.01, "loss": 2.079, "step": 11691 }, { "epoch": 1.2001231527093597, "grad_norm": 0.04506772756576538, "learning_rate": 0.01, "loss": 2.0862, "step": 11694 }, { "epoch": 1.2004310344827587, "grad_norm": 0.04237942770123482, "learning_rate": 0.01, "loss": 2.0712, "step": 11697 }, { "epoch": 1.2007389162561577, "grad_norm": 0.03488307446241379, "learning_rate": 0.01, "loss": 2.1051, "step": 11700 }, { "epoch": 1.2010467980295567, "grad_norm": 0.03693482652306557, "learning_rate": 0.01, "loss": 2.045, "step": 11703 }, { "epoch": 1.2013546798029557, "grad_norm": 0.10410935431718826, "learning_rate": 0.01, "loss": 2.0884, "step": 11706 }, { "epoch": 1.2016625615763548, "grad_norm": 0.11816459894180298, "learning_rate": 0.01, "loss": 2.0687, "step": 11709 }, { "epoch": 1.2019704433497538, "grad_norm": 0.06567800790071487, "learning_rate": 0.01, "loss": 2.063, "step": 11712 }, { "epoch": 1.2022783251231528, "grad_norm": 0.06639432907104492, "learning_rate": 0.01, "loss": 2.078, "step": 11715 }, { "epoch": 1.2025862068965516, "grad_norm": 0.05059380456805229, "learning_rate": 0.01, "loss": 2.1163, "step": 11718 }, { "epoch": 1.2028940886699506, "grad_norm": 0.04076917842030525, "learning_rate": 0.01, "loss": 2.0784, "step": 11721 }, { "epoch": 1.2032019704433496, "grad_norm": 0.05994633212685585, "learning_rate": 0.01, "loss": 2.0819, "step": 11724 }, { "epoch": 1.2035098522167487, "grad_norm": 0.05682201310992241, "learning_rate": 0.01, "loss": 2.0625, "step": 11727 }, { "epoch": 1.2038177339901477, "grad_norm": 0.05393010750412941, "learning_rate": 0.01, "loss": 2.072, "step": 11730 }, { "epoch": 1.2041256157635467, "grad_norm": 0.04697128012776375, "learning_rate": 0.01, "loss": 2.0796, "step": 11733 }, { "epoch": 1.2044334975369457, "grad_norm": 0.04945002868771553, "learning_rate": 0.01, "loss": 2.0666, "step": 11736 }, { "epoch": 1.2047413793103448, "grad_norm": 0.06519649177789688, "learning_rate": 0.01, "loss": 2.0873, "step": 11739 }, { "epoch": 1.2050492610837438, "grad_norm": 0.1188720241189003, "learning_rate": 0.01, "loss": 2.0967, "step": 11742 }, { "epoch": 1.2053571428571428, "grad_norm": 0.1045864149928093, "learning_rate": 0.01, "loss": 2.0834, "step": 11745 }, { "epoch": 1.2056650246305418, "grad_norm": 0.04561993479728699, "learning_rate": 0.01, "loss": 2.0872, "step": 11748 }, { "epoch": 1.2059729064039408, "grad_norm": 0.04972228407859802, "learning_rate": 0.01, "loss": 2.0599, "step": 11751 }, { "epoch": 1.2062807881773399, "grad_norm": 0.05342618376016617, "learning_rate": 0.01, "loss": 2.0606, "step": 11754 }, { "epoch": 1.2065886699507389, "grad_norm": 0.05637587606906891, "learning_rate": 0.01, "loss": 2.0836, "step": 11757 }, { "epoch": 1.206896551724138, "grad_norm": 0.11595457047224045, "learning_rate": 0.01, "loss": 2.0904, "step": 11760 }, { "epoch": 1.207204433497537, "grad_norm": 0.11803465336561203, "learning_rate": 0.01, "loss": 2.0741, "step": 11763 }, { "epoch": 1.207512315270936, "grad_norm": 0.045427508652210236, "learning_rate": 0.01, "loss": 2.0721, "step": 11766 }, { "epoch": 1.207820197044335, "grad_norm": 0.0365883894264698, "learning_rate": 0.01, "loss": 2.0599, "step": 11769 }, { "epoch": 1.208128078817734, "grad_norm": 0.03729262575507164, "learning_rate": 0.01, "loss": 2.087, "step": 11772 }, { "epoch": 1.208435960591133, "grad_norm": 0.05842882767319679, "learning_rate": 0.01, "loss": 2.0762, "step": 11775 }, { "epoch": 1.208743842364532, "grad_norm": 0.07687997072935104, "learning_rate": 0.01, "loss": 2.0691, "step": 11778 }, { "epoch": 1.209051724137931, "grad_norm": 0.06832735985517502, "learning_rate": 0.01, "loss": 2.0974, "step": 11781 }, { "epoch": 1.20935960591133, "grad_norm": 0.10200455039739609, "learning_rate": 0.01, "loss": 2.0866, "step": 11784 }, { "epoch": 1.209667487684729, "grad_norm": 0.10769661515951157, "learning_rate": 0.01, "loss": 2.0761, "step": 11787 }, { "epoch": 1.2099753694581281, "grad_norm": 0.12233126908540726, "learning_rate": 0.01, "loss": 2.0911, "step": 11790 }, { "epoch": 1.2102832512315271, "grad_norm": 0.046646762639284134, "learning_rate": 0.01, "loss": 2.0546, "step": 11793 }, { "epoch": 1.2105911330049262, "grad_norm": 0.030627859756350517, "learning_rate": 0.01, "loss": 2.0805, "step": 11796 }, { "epoch": 1.2108990147783252, "grad_norm": 0.03977693244814873, "learning_rate": 0.01, "loss": 2.0465, "step": 11799 }, { "epoch": 1.2112068965517242, "grad_norm": 0.06213162839412689, "learning_rate": 0.01, "loss": 2.1123, "step": 11802 }, { "epoch": 1.2115147783251232, "grad_norm": 0.04708317294716835, "learning_rate": 0.01, "loss": 2.1315, "step": 11805 }, { "epoch": 1.2118226600985222, "grad_norm": 0.10807790607213974, "learning_rate": 0.01, "loss": 2.0825, "step": 11808 }, { "epoch": 1.2121305418719213, "grad_norm": 0.08579280972480774, "learning_rate": 0.01, "loss": 2.1366, "step": 11811 }, { "epoch": 1.2124384236453203, "grad_norm": 0.05783751606941223, "learning_rate": 0.01, "loss": 2.0682, "step": 11814 }, { "epoch": 1.2127463054187193, "grad_norm": 0.04836808145046234, "learning_rate": 0.01, "loss": 2.0606, "step": 11817 }, { "epoch": 1.2130541871921183, "grad_norm": 0.04026191681623459, "learning_rate": 0.01, "loss": 2.0637, "step": 11820 }, { "epoch": 1.2133620689655173, "grad_norm": 0.03309273347258568, "learning_rate": 0.01, "loss": 2.08, "step": 11823 }, { "epoch": 1.2136699507389164, "grad_norm": 0.10611164569854736, "learning_rate": 0.01, "loss": 2.0787, "step": 11826 }, { "epoch": 1.2139778325123154, "grad_norm": 0.0817422866821289, "learning_rate": 0.01, "loss": 2.069, "step": 11829 }, { "epoch": 1.2142857142857142, "grad_norm": 0.05528125911951065, "learning_rate": 0.01, "loss": 2.0408, "step": 11832 }, { "epoch": 1.2145935960591132, "grad_norm": 0.05016999691724777, "learning_rate": 0.01, "loss": 2.0794, "step": 11835 }, { "epoch": 1.2149014778325122, "grad_norm": 0.06376414746046066, "learning_rate": 0.01, "loss": 2.1097, "step": 11838 }, { "epoch": 1.2152093596059113, "grad_norm": 0.06668904423713684, "learning_rate": 0.01, "loss": 2.0745, "step": 11841 }, { "epoch": 1.2155172413793103, "grad_norm": 0.046260587871074677, "learning_rate": 0.01, "loss": 2.0711, "step": 11844 }, { "epoch": 1.2158251231527093, "grad_norm": 0.039374321699142456, "learning_rate": 0.01, "loss": 2.087, "step": 11847 }, { "epoch": 1.2161330049261083, "grad_norm": 0.10993562638759613, "learning_rate": 0.01, "loss": 2.0615, "step": 11850 }, { "epoch": 1.2164408866995073, "grad_norm": 0.03676668182015419, "learning_rate": 0.01, "loss": 2.0717, "step": 11853 }, { "epoch": 1.2167487684729064, "grad_norm": 0.10777715593576431, "learning_rate": 0.01, "loss": 2.1016, "step": 11856 }, { "epoch": 1.2170566502463054, "grad_norm": 0.07948705554008484, "learning_rate": 0.01, "loss": 2.083, "step": 11859 }, { "epoch": 1.2173645320197044, "grad_norm": 0.11646637320518494, "learning_rate": 0.01, "loss": 2.0552, "step": 11862 }, { "epoch": 1.2176724137931034, "grad_norm": 0.07525186985731125, "learning_rate": 0.01, "loss": 2.0877, "step": 11865 }, { "epoch": 1.2179802955665024, "grad_norm": 0.048124101012945175, "learning_rate": 0.01, "loss": 2.0652, "step": 11868 }, { "epoch": 1.2182881773399015, "grad_norm": 0.04603361710906029, "learning_rate": 0.01, "loss": 2.0922, "step": 11871 }, { "epoch": 1.2185960591133005, "grad_norm": 0.07067687064409256, "learning_rate": 0.01, "loss": 2.0946, "step": 11874 }, { "epoch": 1.2189039408866995, "grad_norm": 0.0959327220916748, "learning_rate": 0.01, "loss": 2.096, "step": 11877 }, { "epoch": 1.2192118226600985, "grad_norm": 0.08565320819616318, "learning_rate": 0.01, "loss": 2.09, "step": 11880 }, { "epoch": 1.2195197044334976, "grad_norm": 0.06728377193212509, "learning_rate": 0.01, "loss": 2.0801, "step": 11883 }, { "epoch": 1.2198275862068966, "grad_norm": 0.03809618949890137, "learning_rate": 0.01, "loss": 2.0668, "step": 11886 }, { "epoch": 1.2201354679802956, "grad_norm": 0.049925826489925385, "learning_rate": 0.01, "loss": 2.0625, "step": 11889 }, { "epoch": 1.2204433497536946, "grad_norm": 0.05949478596448898, "learning_rate": 0.01, "loss": 2.0687, "step": 11892 }, { "epoch": 1.2207512315270936, "grad_norm": 0.08161807060241699, "learning_rate": 0.01, "loss": 2.0789, "step": 11895 }, { "epoch": 1.2210591133004927, "grad_norm": 0.05829952284693718, "learning_rate": 0.01, "loss": 2.0846, "step": 11898 }, { "epoch": 1.2213669950738917, "grad_norm": 0.05801619216799736, "learning_rate": 0.01, "loss": 2.1014, "step": 11901 }, { "epoch": 1.2216748768472907, "grad_norm": 0.04123099148273468, "learning_rate": 0.01, "loss": 2.0867, "step": 11904 }, { "epoch": 1.2219827586206897, "grad_norm": 0.05088057741522789, "learning_rate": 0.01, "loss": 2.0751, "step": 11907 }, { "epoch": 1.2222906403940887, "grad_norm": 0.07357197254896164, "learning_rate": 0.01, "loss": 2.1023, "step": 11910 }, { "epoch": 1.2225985221674878, "grad_norm": 0.060078103095293045, "learning_rate": 0.01, "loss": 2.0784, "step": 11913 }, { "epoch": 1.2229064039408868, "grad_norm": 0.12629617750644684, "learning_rate": 0.01, "loss": 2.0767, "step": 11916 }, { "epoch": 1.2232142857142858, "grad_norm": 0.07202067971229553, "learning_rate": 0.01, "loss": 2.0796, "step": 11919 }, { "epoch": 1.2235221674876846, "grad_norm": 0.06407934427261353, "learning_rate": 0.01, "loss": 2.0926, "step": 11922 }, { "epoch": 1.2238300492610836, "grad_norm": 0.053789231926202774, "learning_rate": 0.01, "loss": 2.092, "step": 11925 }, { "epoch": 1.2241379310344827, "grad_norm": 0.04130502790212631, "learning_rate": 0.01, "loss": 2.051, "step": 11928 }, { "epoch": 1.2244458128078817, "grad_norm": 0.05235166475176811, "learning_rate": 0.01, "loss": 2.0829, "step": 11931 }, { "epoch": 1.2247536945812807, "grad_norm": 0.04508119449019432, "learning_rate": 0.01, "loss": 2.0857, "step": 11934 }, { "epoch": 1.2250615763546797, "grad_norm": 0.03570512309670448, "learning_rate": 0.01, "loss": 2.0734, "step": 11937 }, { "epoch": 1.2253694581280787, "grad_norm": 0.04690218344330788, "learning_rate": 0.01, "loss": 2.0988, "step": 11940 }, { "epoch": 1.2256773399014778, "grad_norm": 0.10231764614582062, "learning_rate": 0.01, "loss": 2.0716, "step": 11943 }, { "epoch": 1.2259852216748768, "grad_norm": 0.05221893638372421, "learning_rate": 0.01, "loss": 2.0693, "step": 11946 }, { "epoch": 1.2262931034482758, "grad_norm": 0.0647406056523323, "learning_rate": 0.01, "loss": 2.0809, "step": 11949 }, { "epoch": 1.2266009852216748, "grad_norm": 0.06388009339570999, "learning_rate": 0.01, "loss": 2.0968, "step": 11952 }, { "epoch": 1.2269088669950738, "grad_norm": 0.06904192268848419, "learning_rate": 0.01, "loss": 2.0917, "step": 11955 }, { "epoch": 1.2272167487684729, "grad_norm": 0.08780385553836823, "learning_rate": 0.01, "loss": 2.0881, "step": 11958 }, { "epoch": 1.2275246305418719, "grad_norm": 0.037958092987537384, "learning_rate": 0.01, "loss": 2.0813, "step": 11961 }, { "epoch": 1.227832512315271, "grad_norm": 0.04035305231809616, "learning_rate": 0.01, "loss": 2.0718, "step": 11964 }, { "epoch": 1.22814039408867, "grad_norm": 0.056451354175806046, "learning_rate": 0.01, "loss": 2.0653, "step": 11967 }, { "epoch": 1.228448275862069, "grad_norm": 0.06248374283313751, "learning_rate": 0.01, "loss": 2.0794, "step": 11970 }, { "epoch": 1.228756157635468, "grad_norm": 0.05662978067994118, "learning_rate": 0.01, "loss": 2.0873, "step": 11973 }, { "epoch": 1.229064039408867, "grad_norm": 0.06416438519954681, "learning_rate": 0.01, "loss": 2.0933, "step": 11976 }, { "epoch": 1.229371921182266, "grad_norm": 0.04529969021677971, "learning_rate": 0.01, "loss": 2.0892, "step": 11979 }, { "epoch": 1.229679802955665, "grad_norm": 0.03636370226740837, "learning_rate": 0.01, "loss": 2.0968, "step": 11982 }, { "epoch": 1.229987684729064, "grad_norm": 0.03992651402950287, "learning_rate": 0.01, "loss": 2.0606, "step": 11985 }, { "epoch": 1.230295566502463, "grad_norm": 0.19436125457286835, "learning_rate": 0.01, "loss": 2.0779, "step": 11988 }, { "epoch": 1.230603448275862, "grad_norm": 0.15459048748016357, "learning_rate": 0.01, "loss": 2.0801, "step": 11991 }, { "epoch": 1.2309113300492611, "grad_norm": 0.11131371557712555, "learning_rate": 0.01, "loss": 2.0629, "step": 11994 }, { "epoch": 1.2312192118226601, "grad_norm": 0.06876586377620697, "learning_rate": 0.01, "loss": 2.1081, "step": 11997 }, { "epoch": 1.2315270935960592, "grad_norm": 0.03379599004983902, "learning_rate": 0.01, "loss": 2.0977, "step": 12000 }, { "epoch": 1.2318349753694582, "grad_norm": 0.06905510276556015, "learning_rate": 0.01, "loss": 2.0545, "step": 12003 }, { "epoch": 1.2321428571428572, "grad_norm": 0.05859539657831192, "learning_rate": 0.01, "loss": 2.0751, "step": 12006 }, { "epoch": 1.2324507389162562, "grad_norm": 0.1200842559337616, "learning_rate": 0.01, "loss": 2.1045, "step": 12009 }, { "epoch": 1.2327586206896552, "grad_norm": 0.09969060868024826, "learning_rate": 0.01, "loss": 2.0641, "step": 12012 }, { "epoch": 1.2330665024630543, "grad_norm": 0.08915867656469345, "learning_rate": 0.01, "loss": 2.0585, "step": 12015 }, { "epoch": 1.2333743842364533, "grad_norm": 0.10951671004295349, "learning_rate": 0.01, "loss": 2.1007, "step": 12018 }, { "epoch": 1.2336822660098523, "grad_norm": 0.15262556076049805, "learning_rate": 0.01, "loss": 2.0643, "step": 12021 }, { "epoch": 1.2339901477832513, "grad_norm": 0.05622226372361183, "learning_rate": 0.01, "loss": 2.0796, "step": 12024 }, { "epoch": 1.2342980295566504, "grad_norm": 0.05918841436505318, "learning_rate": 0.01, "loss": 2.0911, "step": 12027 }, { "epoch": 1.2346059113300494, "grad_norm": 0.04867622256278992, "learning_rate": 0.01, "loss": 2.0872, "step": 12030 }, { "epoch": 1.2349137931034484, "grad_norm": 0.04389597848057747, "learning_rate": 0.01, "loss": 2.0882, "step": 12033 }, { "epoch": 1.2352216748768472, "grad_norm": 0.1209108904004097, "learning_rate": 0.01, "loss": 2.0394, "step": 12036 }, { "epoch": 1.2355295566502462, "grad_norm": 0.08446931838989258, "learning_rate": 0.01, "loss": 2.0744, "step": 12039 }, { "epoch": 1.2358374384236452, "grad_norm": 0.07141686230897903, "learning_rate": 0.01, "loss": 2.1099, "step": 12042 }, { "epoch": 1.2361453201970443, "grad_norm": 0.1216801181435585, "learning_rate": 0.01, "loss": 2.0754, "step": 12045 }, { "epoch": 1.2364532019704433, "grad_norm": 0.10539086163043976, "learning_rate": 0.01, "loss": 2.0887, "step": 12048 }, { "epoch": 1.2367610837438423, "grad_norm": 0.09336747229099274, "learning_rate": 0.01, "loss": 2.0637, "step": 12051 }, { "epoch": 1.2370689655172413, "grad_norm": 0.091059610247612, "learning_rate": 0.01, "loss": 2.057, "step": 12054 }, { "epoch": 1.2373768472906403, "grad_norm": 0.08291159570217133, "learning_rate": 0.01, "loss": 2.1039, "step": 12057 }, { "epoch": 1.2376847290640394, "grad_norm": 0.07626821845769882, "learning_rate": 0.01, "loss": 2.0784, "step": 12060 }, { "epoch": 1.2379926108374384, "grad_norm": 0.05197496339678764, "learning_rate": 0.01, "loss": 2.0968, "step": 12063 }, { "epoch": 1.2383004926108374, "grad_norm": 0.061275266110897064, "learning_rate": 0.01, "loss": 2.095, "step": 12066 }, { "epoch": 1.2386083743842364, "grad_norm": 0.04282483085989952, "learning_rate": 0.01, "loss": 2.083, "step": 12069 }, { "epoch": 1.2389162561576355, "grad_norm": 0.037066422402858734, "learning_rate": 0.01, "loss": 2.0653, "step": 12072 }, { "epoch": 1.2392241379310345, "grad_norm": 0.0467105396091938, "learning_rate": 0.01, "loss": 2.0913, "step": 12075 }, { "epoch": 1.2395320197044335, "grad_norm": 0.053995974361896515, "learning_rate": 0.01, "loss": 2.0754, "step": 12078 }, { "epoch": 1.2398399014778325, "grad_norm": 0.08583737164735794, "learning_rate": 0.01, "loss": 2.089, "step": 12081 }, { "epoch": 1.2401477832512315, "grad_norm": 0.07264076173305511, "learning_rate": 0.01, "loss": 2.07, "step": 12084 }, { "epoch": 1.2404556650246306, "grad_norm": 0.062001802027225494, "learning_rate": 0.01, "loss": 2.0969, "step": 12087 }, { "epoch": 1.2407635467980296, "grad_norm": 0.05311381444334984, "learning_rate": 0.01, "loss": 2.0833, "step": 12090 }, { "epoch": 1.2410714285714286, "grad_norm": 0.04272656887769699, "learning_rate": 0.01, "loss": 2.0883, "step": 12093 }, { "epoch": 1.2413793103448276, "grad_norm": 0.10696172714233398, "learning_rate": 0.01, "loss": 2.0925, "step": 12096 }, { "epoch": 1.2416871921182266, "grad_norm": 0.08625493943691254, "learning_rate": 0.01, "loss": 2.0616, "step": 12099 }, { "epoch": 1.2419950738916257, "grad_norm": 0.06818173080682755, "learning_rate": 0.01, "loss": 2.0943, "step": 12102 }, { "epoch": 1.2423029556650247, "grad_norm": 0.050731681287288666, "learning_rate": 0.01, "loss": 2.0802, "step": 12105 }, { "epoch": 1.2426108374384237, "grad_norm": 0.08426883816719055, "learning_rate": 0.01, "loss": 2.0825, "step": 12108 }, { "epoch": 1.2429187192118227, "grad_norm": 0.09432832151651382, "learning_rate": 0.01, "loss": 2.0834, "step": 12111 }, { "epoch": 1.2432266009852218, "grad_norm": 0.06951441615819931, "learning_rate": 0.01, "loss": 2.0951, "step": 12114 }, { "epoch": 1.2435344827586208, "grad_norm": 0.06427393108606339, "learning_rate": 0.01, "loss": 2.0706, "step": 12117 }, { "epoch": 1.2438423645320198, "grad_norm": 0.03967609629034996, "learning_rate": 0.01, "loss": 2.0897, "step": 12120 }, { "epoch": 1.2441502463054186, "grad_norm": 0.036665160208940506, "learning_rate": 0.01, "loss": 2.0791, "step": 12123 }, { "epoch": 1.2444581280788176, "grad_norm": 0.072290800511837, "learning_rate": 0.01, "loss": 2.0889, "step": 12126 }, { "epoch": 1.2447660098522166, "grad_norm": 0.07136868685483932, "learning_rate": 0.01, "loss": 2.0688, "step": 12129 }, { "epoch": 1.2450738916256157, "grad_norm": 0.15400493144989014, "learning_rate": 0.01, "loss": 2.0821, "step": 12132 }, { "epoch": 1.2453817733990147, "grad_norm": 0.07114578038454056, "learning_rate": 0.01, "loss": 2.0719, "step": 12135 }, { "epoch": 1.2456896551724137, "grad_norm": 0.043961767107248306, "learning_rate": 0.01, "loss": 2.0877, "step": 12138 }, { "epoch": 1.2459975369458127, "grad_norm": 0.056267060339450836, "learning_rate": 0.01, "loss": 2.0688, "step": 12141 }, { "epoch": 1.2463054187192117, "grad_norm": 0.035889722406864166, "learning_rate": 0.01, "loss": 2.0783, "step": 12144 }, { "epoch": 1.2466133004926108, "grad_norm": 0.1781640499830246, "learning_rate": 0.01, "loss": 2.0574, "step": 12147 }, { "epoch": 1.2469211822660098, "grad_norm": 0.0891503393650055, "learning_rate": 0.01, "loss": 2.0957, "step": 12150 }, { "epoch": 1.2472290640394088, "grad_norm": 0.047431472688913345, "learning_rate": 0.01, "loss": 2.0696, "step": 12153 }, { "epoch": 1.2475369458128078, "grad_norm": 0.04693286865949631, "learning_rate": 0.01, "loss": 2.0438, "step": 12156 }, { "epoch": 1.2478448275862069, "grad_norm": 0.0382777564227581, "learning_rate": 0.01, "loss": 2.0851, "step": 12159 }, { "epoch": 1.2481527093596059, "grad_norm": 0.04085429012775421, "learning_rate": 0.01, "loss": 2.0846, "step": 12162 }, { "epoch": 1.248460591133005, "grad_norm": 0.05329781025648117, "learning_rate": 0.01, "loss": 2.0732, "step": 12165 }, { "epoch": 1.248768472906404, "grad_norm": 0.06961992383003235, "learning_rate": 0.01, "loss": 2.0372, "step": 12168 }, { "epoch": 1.249076354679803, "grad_norm": 0.05290938913822174, "learning_rate": 0.01, "loss": 2.0692, "step": 12171 }, { "epoch": 1.249384236453202, "grad_norm": 0.1247674971818924, "learning_rate": 0.01, "loss": 2.0679, "step": 12174 }, { "epoch": 1.249692118226601, "grad_norm": 0.04983863607048988, "learning_rate": 0.01, "loss": 2.0611, "step": 12177 }, { "epoch": 1.25, "grad_norm": 0.08552074432373047, "learning_rate": 0.01, "loss": 2.1084, "step": 12180 }, { "epoch": 1.250307881773399, "grad_norm": 0.1376069337129593, "learning_rate": 0.01, "loss": 2.0997, "step": 12183 }, { "epoch": 1.250615763546798, "grad_norm": 0.07097752392292023, "learning_rate": 0.01, "loss": 2.0761, "step": 12186 }, { "epoch": 1.250923645320197, "grad_norm": 0.03953644260764122, "learning_rate": 0.01, "loss": 2.0675, "step": 12189 }, { "epoch": 1.251231527093596, "grad_norm": 0.04611526057124138, "learning_rate": 0.01, "loss": 2.0717, "step": 12192 }, { "epoch": 1.251539408866995, "grad_norm": 0.07573895156383514, "learning_rate": 0.01, "loss": 2.0785, "step": 12195 }, { "epoch": 1.2518472906403941, "grad_norm": 0.07144660502672195, "learning_rate": 0.01, "loss": 2.0815, "step": 12198 }, { "epoch": 1.2521551724137931, "grad_norm": 0.05297645181417465, "learning_rate": 0.01, "loss": 2.093, "step": 12201 }, { "epoch": 1.2524630541871922, "grad_norm": 0.044887710362672806, "learning_rate": 0.01, "loss": 2.0752, "step": 12204 }, { "epoch": 1.2527709359605912, "grad_norm": 0.04305564984679222, "learning_rate": 0.01, "loss": 2.0596, "step": 12207 }, { "epoch": 1.2530788177339902, "grad_norm": 0.057785287499427795, "learning_rate": 0.01, "loss": 2.094, "step": 12210 }, { "epoch": 1.2533866995073892, "grad_norm": 0.04404570534825325, "learning_rate": 0.01, "loss": 2.1037, "step": 12213 }, { "epoch": 1.2536945812807883, "grad_norm": 0.055468104779720306, "learning_rate": 0.01, "loss": 2.0851, "step": 12216 }, { "epoch": 1.2540024630541873, "grad_norm": 0.17121906578540802, "learning_rate": 0.01, "loss": 2.081, "step": 12219 }, { "epoch": 1.2543103448275863, "grad_norm": 0.09411416202783585, "learning_rate": 0.01, "loss": 2.0974, "step": 12222 }, { "epoch": 1.2546182266009853, "grad_norm": 0.07855021953582764, "learning_rate": 0.01, "loss": 2.0733, "step": 12225 }, { "epoch": 1.2549261083743843, "grad_norm": 0.052616432309150696, "learning_rate": 0.01, "loss": 2.0372, "step": 12228 }, { "epoch": 1.2552339901477834, "grad_norm": 0.047992121428251266, "learning_rate": 0.01, "loss": 2.0918, "step": 12231 }, { "epoch": 1.2555418719211824, "grad_norm": 0.04336715489625931, "learning_rate": 0.01, "loss": 2.0511, "step": 12234 }, { "epoch": 1.2558497536945814, "grad_norm": 0.03128316253423691, "learning_rate": 0.01, "loss": 2.0882, "step": 12237 }, { "epoch": 1.2561576354679804, "grad_norm": 0.06315557658672333, "learning_rate": 0.01, "loss": 2.0918, "step": 12240 }, { "epoch": 1.2564655172413794, "grad_norm": 0.0528687946498394, "learning_rate": 0.01, "loss": 2.0556, "step": 12243 }, { "epoch": 1.2567733990147782, "grad_norm": 0.17166706919670105, "learning_rate": 0.01, "loss": 2.068, "step": 12246 }, { "epoch": 1.2570812807881773, "grad_norm": 0.11394128203392029, "learning_rate": 0.01, "loss": 2.1179, "step": 12249 }, { "epoch": 1.2573891625615763, "grad_norm": 0.08554805815219879, "learning_rate": 0.01, "loss": 2.0857, "step": 12252 }, { "epoch": 1.2576970443349753, "grad_norm": 0.05203767865896225, "learning_rate": 0.01, "loss": 2.0975, "step": 12255 }, { "epoch": 1.2580049261083743, "grad_norm": 0.06072428077459335, "learning_rate": 0.01, "loss": 2.0692, "step": 12258 }, { "epoch": 1.2583128078817734, "grad_norm": 0.044136617332696915, "learning_rate": 0.01, "loss": 2.0458, "step": 12261 }, { "epoch": 1.2586206896551724, "grad_norm": 0.038774993270635605, "learning_rate": 0.01, "loss": 2.0835, "step": 12264 }, { "epoch": 1.2589285714285714, "grad_norm": 0.03669529780745506, "learning_rate": 0.01, "loss": 2.0949, "step": 12267 }, { "epoch": 1.2592364532019704, "grad_norm": 0.050722066313028336, "learning_rate": 0.01, "loss": 2.0772, "step": 12270 }, { "epoch": 1.2595443349753694, "grad_norm": 0.12684300541877747, "learning_rate": 0.01, "loss": 2.0814, "step": 12273 }, { "epoch": 1.2598522167487685, "grad_norm": 0.07431039214134216, "learning_rate": 0.01, "loss": 2.1001, "step": 12276 }, { "epoch": 1.2601600985221675, "grad_norm": 0.07050034403800964, "learning_rate": 0.01, "loss": 2.0507, "step": 12279 }, { "epoch": 1.2604679802955665, "grad_norm": 0.0553620308637619, "learning_rate": 0.01, "loss": 2.0757, "step": 12282 }, { "epoch": 1.2607758620689655, "grad_norm": 0.10946903377771378, "learning_rate": 0.01, "loss": 2.0659, "step": 12285 }, { "epoch": 1.2610837438423645, "grad_norm": 0.06346802413463593, "learning_rate": 0.01, "loss": 2.0871, "step": 12288 }, { "epoch": 1.2613916256157636, "grad_norm": 0.09386668354272842, "learning_rate": 0.01, "loss": 2.0702, "step": 12291 }, { "epoch": 1.2616995073891626, "grad_norm": 0.05672946944832802, "learning_rate": 0.01, "loss": 2.0812, "step": 12294 }, { "epoch": 1.2620073891625616, "grad_norm": 0.1079539805650711, "learning_rate": 0.01, "loss": 2.0851, "step": 12297 }, { "epoch": 1.2623152709359606, "grad_norm": 0.043078985065221786, "learning_rate": 0.01, "loss": 2.0846, "step": 12300 }, { "epoch": 1.2626231527093597, "grad_norm": 0.043098706752061844, "learning_rate": 0.01, "loss": 2.0541, "step": 12303 }, { "epoch": 1.2629310344827587, "grad_norm": 0.04908977448940277, "learning_rate": 0.01, "loss": 2.0801, "step": 12306 }, { "epoch": 1.2632389162561577, "grad_norm": 0.09897246211767197, "learning_rate": 0.01, "loss": 2.0899, "step": 12309 }, { "epoch": 1.2635467980295567, "grad_norm": 0.0861278846859932, "learning_rate": 0.01, "loss": 2.0613, "step": 12312 }, { "epoch": 1.2638546798029557, "grad_norm": 0.06973280757665634, "learning_rate": 0.01, "loss": 2.0598, "step": 12315 }, { "epoch": 1.2641625615763548, "grad_norm": 0.048658501356840134, "learning_rate": 0.01, "loss": 2.0838, "step": 12318 }, { "epoch": 1.2644704433497536, "grad_norm": 0.05053295940160751, "learning_rate": 0.01, "loss": 2.0665, "step": 12321 }, { "epoch": 1.2647783251231526, "grad_norm": 0.0536380410194397, "learning_rate": 0.01, "loss": 2.0696, "step": 12324 }, { "epoch": 1.2650862068965516, "grad_norm": 0.13452892005443573, "learning_rate": 0.01, "loss": 2.0928, "step": 12327 }, { "epoch": 1.2653940886699506, "grad_norm": 0.056635159999132156, "learning_rate": 0.01, "loss": 2.0903, "step": 12330 }, { "epoch": 1.2657019704433496, "grad_norm": 0.09460306912660599, "learning_rate": 0.01, "loss": 2.0616, "step": 12333 }, { "epoch": 1.2660098522167487, "grad_norm": 0.09019794315099716, "learning_rate": 0.01, "loss": 2.0808, "step": 12336 }, { "epoch": 1.2663177339901477, "grad_norm": 0.04020017758011818, "learning_rate": 0.01, "loss": 2.0613, "step": 12339 }, { "epoch": 1.2666256157635467, "grad_norm": 0.05063892900943756, "learning_rate": 0.01, "loss": 2.0846, "step": 12342 }, { "epoch": 1.2669334975369457, "grad_norm": 0.06472054123878479, "learning_rate": 0.01, "loss": 2.0884, "step": 12345 }, { "epoch": 1.2672413793103448, "grad_norm": 0.0523315854370594, "learning_rate": 0.01, "loss": 2.0756, "step": 12348 }, { "epoch": 1.2675492610837438, "grad_norm": 0.040240950882434845, "learning_rate": 0.01, "loss": 2.0646, "step": 12351 }, { "epoch": 1.2678571428571428, "grad_norm": 0.061988551169633865, "learning_rate": 0.01, "loss": 2.0899, "step": 12354 }, { "epoch": 1.2681650246305418, "grad_norm": 0.03831657022237778, "learning_rate": 0.01, "loss": 2.0633, "step": 12357 }, { "epoch": 1.2684729064039408, "grad_norm": 0.105617955327034, "learning_rate": 0.01, "loss": 2.0553, "step": 12360 }, { "epoch": 1.2687807881773399, "grad_norm": 0.09372366219758987, "learning_rate": 0.01, "loss": 2.0522, "step": 12363 }, { "epoch": 1.2690886699507389, "grad_norm": 0.10305638611316681, "learning_rate": 0.01, "loss": 2.0543, "step": 12366 }, { "epoch": 1.269396551724138, "grad_norm": 0.07187418639659882, "learning_rate": 0.01, "loss": 2.0775, "step": 12369 }, { "epoch": 1.269704433497537, "grad_norm": 0.03744306415319443, "learning_rate": 0.01, "loss": 2.0771, "step": 12372 }, { "epoch": 1.270012315270936, "grad_norm": 0.03059488907456398, "learning_rate": 0.01, "loss": 2.067, "step": 12375 }, { "epoch": 1.270320197044335, "grad_norm": 0.09767211973667145, "learning_rate": 0.01, "loss": 2.0889, "step": 12378 }, { "epoch": 1.270628078817734, "grad_norm": 0.05093003436923027, "learning_rate": 0.01, "loss": 2.0852, "step": 12381 }, { "epoch": 1.270935960591133, "grad_norm": 0.04648155719041824, "learning_rate": 0.01, "loss": 2.0775, "step": 12384 }, { "epoch": 1.271243842364532, "grad_norm": 0.04745417460799217, "learning_rate": 0.01, "loss": 2.0888, "step": 12387 }, { "epoch": 1.271551724137931, "grad_norm": 0.05171092227101326, "learning_rate": 0.01, "loss": 2.075, "step": 12390 }, { "epoch": 1.27185960591133, "grad_norm": 0.05051850154995918, "learning_rate": 0.01, "loss": 2.0754, "step": 12393 }, { "epoch": 1.272167487684729, "grad_norm": 0.06048591807484627, "learning_rate": 0.01, "loss": 2.1025, "step": 12396 }, { "epoch": 1.2724753694581281, "grad_norm": 0.09673628211021423, "learning_rate": 0.01, "loss": 2.0866, "step": 12399 }, { "epoch": 1.2727832512315271, "grad_norm": 0.11826295405626297, "learning_rate": 0.01, "loss": 2.0757, "step": 12402 }, { "epoch": 1.2730911330049262, "grad_norm": 0.06271769106388092, "learning_rate": 0.01, "loss": 2.0797, "step": 12405 }, { "epoch": 1.2733990147783252, "grad_norm": 0.0463738851249218, "learning_rate": 0.01, "loss": 2.054, "step": 12408 }, { "epoch": 1.2737068965517242, "grad_norm": 0.035649579018354416, "learning_rate": 0.01, "loss": 2.0747, "step": 12411 }, { "epoch": 1.2740147783251232, "grad_norm": 0.08586122840642929, "learning_rate": 0.01, "loss": 2.0692, "step": 12414 }, { "epoch": 1.2743226600985222, "grad_norm": 0.08538860082626343, "learning_rate": 0.01, "loss": 2.0617, "step": 12417 }, { "epoch": 1.2746305418719213, "grad_norm": 0.05974709242582321, "learning_rate": 0.01, "loss": 2.0527, "step": 12420 }, { "epoch": 1.2749384236453203, "grad_norm": 0.057023100554943085, "learning_rate": 0.01, "loss": 2.0751, "step": 12423 }, { "epoch": 1.2752463054187193, "grad_norm": 0.062105972319841385, "learning_rate": 0.01, "loss": 2.0946, "step": 12426 }, { "epoch": 1.2755541871921183, "grad_norm": 0.048221688717603683, "learning_rate": 0.01, "loss": 2.0662, "step": 12429 }, { "epoch": 1.2758620689655173, "grad_norm": 0.04440610110759735, "learning_rate": 0.01, "loss": 2.0852, "step": 12432 }, { "epoch": 1.2761699507389164, "grad_norm": 0.06074054539203644, "learning_rate": 0.01, "loss": 2.0667, "step": 12435 }, { "epoch": 1.2764778325123154, "grad_norm": 0.09477720409631729, "learning_rate": 0.01, "loss": 2.0815, "step": 12438 }, { "epoch": 1.2767857142857144, "grad_norm": 0.08470610529184341, "learning_rate": 0.01, "loss": 2.1023, "step": 12441 }, { "epoch": 1.2770935960591134, "grad_norm": 0.09079881012439728, "learning_rate": 0.01, "loss": 2.0761, "step": 12444 }, { "epoch": 1.2774014778325122, "grad_norm": 0.06132930517196655, "learning_rate": 0.01, "loss": 2.1027, "step": 12447 }, { "epoch": 1.2777093596059113, "grad_norm": 0.04702606052160263, "learning_rate": 0.01, "loss": 2.1051, "step": 12450 }, { "epoch": 1.2780172413793103, "grad_norm": 0.06621818989515305, "learning_rate": 0.01, "loss": 2.0835, "step": 12453 }, { "epoch": 1.2783251231527093, "grad_norm": 0.047375794500112534, "learning_rate": 0.01, "loss": 2.0982, "step": 12456 }, { "epoch": 1.2786330049261083, "grad_norm": 0.03766035661101341, "learning_rate": 0.01, "loss": 2.0468, "step": 12459 }, { "epoch": 1.2789408866995073, "grad_norm": 0.10769324004650116, "learning_rate": 0.01, "loss": 2.0848, "step": 12462 }, { "epoch": 1.2792487684729064, "grad_norm": 0.06848851591348648, "learning_rate": 0.01, "loss": 2.0664, "step": 12465 }, { "epoch": 1.2795566502463054, "grad_norm": 0.09864836931228638, "learning_rate": 0.01, "loss": 2.0719, "step": 12468 }, { "epoch": 1.2798645320197044, "grad_norm": 0.04042387008666992, "learning_rate": 0.01, "loss": 2.0672, "step": 12471 }, { "epoch": 1.2801724137931034, "grad_norm": 0.09212526679039001, "learning_rate": 0.01, "loss": 2.0789, "step": 12474 }, { "epoch": 1.2804802955665024, "grad_norm": 0.08713985234498978, "learning_rate": 0.01, "loss": 2.0765, "step": 12477 }, { "epoch": 1.2807881773399015, "grad_norm": 0.04133505001664162, "learning_rate": 0.01, "loss": 2.0809, "step": 12480 }, { "epoch": 1.2810960591133005, "grad_norm": 0.07466418296098709, "learning_rate": 0.01, "loss": 2.0499, "step": 12483 }, { "epoch": 1.2814039408866995, "grad_norm": 0.08685484528541565, "learning_rate": 0.01, "loss": 2.0689, "step": 12486 }, { "epoch": 1.2817118226600985, "grad_norm": 0.13300663232803345, "learning_rate": 0.01, "loss": 2.0761, "step": 12489 }, { "epoch": 1.2820197044334976, "grad_norm": 0.1024186760187149, "learning_rate": 0.01, "loss": 2.0763, "step": 12492 }, { "epoch": 1.2823275862068966, "grad_norm": 0.05908042937517166, "learning_rate": 0.01, "loss": 2.0485, "step": 12495 }, { "epoch": 1.2826354679802956, "grad_norm": 0.03715427219867706, "learning_rate": 0.01, "loss": 2.0925, "step": 12498 }, { "epoch": 1.2829433497536946, "grad_norm": 0.05033004283905029, "learning_rate": 0.01, "loss": 2.0875, "step": 12501 }, { "epoch": 1.2832512315270936, "grad_norm": 0.04851456359028816, "learning_rate": 0.01, "loss": 2.0547, "step": 12504 }, { "epoch": 1.2835591133004927, "grad_norm": 0.08153282105922699, "learning_rate": 0.01, "loss": 2.0828, "step": 12507 }, { "epoch": 1.2838669950738917, "grad_norm": 0.07549238950014114, "learning_rate": 0.01, "loss": 2.0681, "step": 12510 }, { "epoch": 1.2841748768472907, "grad_norm": 0.08571973443031311, "learning_rate": 0.01, "loss": 2.0556, "step": 12513 }, { "epoch": 1.2844827586206897, "grad_norm": 0.1036754697561264, "learning_rate": 0.01, "loss": 2.0712, "step": 12516 }, { "epoch": 1.2847906403940887, "grad_norm": 0.04329349100589752, "learning_rate": 0.01, "loss": 2.0711, "step": 12519 }, { "epoch": 1.2850985221674878, "grad_norm": 0.03718428686261177, "learning_rate": 0.01, "loss": 2.0812, "step": 12522 }, { "epoch": 1.2854064039408866, "grad_norm": 0.07977878302335739, "learning_rate": 0.01, "loss": 2.0562, "step": 12525 }, { "epoch": 1.2857142857142856, "grad_norm": 0.11540202796459198, "learning_rate": 0.01, "loss": 2.0666, "step": 12528 }, { "epoch": 1.2860221674876846, "grad_norm": 0.07631656527519226, "learning_rate": 0.01, "loss": 2.0694, "step": 12531 }, { "epoch": 1.2863300492610836, "grad_norm": 0.051738426089286804, "learning_rate": 0.01, "loss": 2.0862, "step": 12534 }, { "epoch": 1.2866379310344827, "grad_norm": 0.05070396512746811, "learning_rate": 0.01, "loss": 2.0538, "step": 12537 }, { "epoch": 1.2869458128078817, "grad_norm": 0.039027947932481766, "learning_rate": 0.01, "loss": 2.0487, "step": 12540 }, { "epoch": 1.2872536945812807, "grad_norm": 0.052646003663539886, "learning_rate": 0.01, "loss": 2.0745, "step": 12543 }, { "epoch": 1.2875615763546797, "grad_norm": 0.06035429984331131, "learning_rate": 0.01, "loss": 2.0775, "step": 12546 }, { "epoch": 1.2878694581280787, "grad_norm": 0.0818098783493042, "learning_rate": 0.01, "loss": 2.0685, "step": 12549 }, { "epoch": 1.2881773399014778, "grad_norm": 0.030897030606865883, "learning_rate": 0.01, "loss": 2.0701, "step": 12552 }, { "epoch": 1.2884852216748768, "grad_norm": 0.042795561254024506, "learning_rate": 0.01, "loss": 2.0673, "step": 12555 }, { "epoch": 1.2887931034482758, "grad_norm": 0.11560031026601791, "learning_rate": 0.01, "loss": 2.0637, "step": 12558 }, { "epoch": 1.2891009852216748, "grad_norm": 0.03919963166117668, "learning_rate": 0.01, "loss": 2.0547, "step": 12561 }, { "epoch": 1.2894088669950738, "grad_norm": 0.03816407918930054, "learning_rate": 0.01, "loss": 2.0576, "step": 12564 }, { "epoch": 1.2897167487684729, "grad_norm": 0.04641805216670036, "learning_rate": 0.01, "loss": 2.0779, "step": 12567 }, { "epoch": 1.2900246305418719, "grad_norm": 0.16832102835178375, "learning_rate": 0.01, "loss": 2.0658, "step": 12570 }, { "epoch": 1.290332512315271, "grad_norm": 0.06651032716035843, "learning_rate": 0.01, "loss": 2.0602, "step": 12573 }, { "epoch": 1.29064039408867, "grad_norm": 0.05333925411105156, "learning_rate": 0.01, "loss": 2.0842, "step": 12576 }, { "epoch": 1.290948275862069, "grad_norm": 0.03173685073852539, "learning_rate": 0.01, "loss": 2.0834, "step": 12579 }, { "epoch": 1.291256157635468, "grad_norm": 0.0836583599448204, "learning_rate": 0.01, "loss": 2.0605, "step": 12582 }, { "epoch": 1.291564039408867, "grad_norm": 0.09913724660873413, "learning_rate": 0.01, "loss": 2.0577, "step": 12585 }, { "epoch": 1.291871921182266, "grad_norm": 0.04268624261021614, "learning_rate": 0.01, "loss": 2.1027, "step": 12588 }, { "epoch": 1.292179802955665, "grad_norm": 0.03744608163833618, "learning_rate": 0.01, "loss": 2.0506, "step": 12591 }, { "epoch": 1.292487684729064, "grad_norm": 0.11833969503641129, "learning_rate": 0.01, "loss": 2.0606, "step": 12594 }, { "epoch": 1.292795566502463, "grad_norm": 0.11814229190349579, "learning_rate": 0.01, "loss": 2.0641, "step": 12597 }, { "epoch": 1.293103448275862, "grad_norm": 0.04583375155925751, "learning_rate": 0.01, "loss": 2.0898, "step": 12600 }, { "epoch": 1.2934113300492611, "grad_norm": 0.11559072881937027, "learning_rate": 0.01, "loss": 2.0657, "step": 12603 }, { "epoch": 1.2937192118226601, "grad_norm": 0.050608474761247635, "learning_rate": 0.01, "loss": 2.0879, "step": 12606 }, { "epoch": 1.2940270935960592, "grad_norm": 0.04284593090415001, "learning_rate": 0.01, "loss": 2.0545, "step": 12609 }, { "epoch": 1.2943349753694582, "grad_norm": 0.042168114334344864, "learning_rate": 0.01, "loss": 2.0645, "step": 12612 }, { "epoch": 1.2946428571428572, "grad_norm": 0.06251826882362366, "learning_rate": 0.01, "loss": 2.0621, "step": 12615 }, { "epoch": 1.2949507389162562, "grad_norm": 0.11159554868936539, "learning_rate": 0.01, "loss": 2.0718, "step": 12618 }, { "epoch": 1.2952586206896552, "grad_norm": 0.0715017095208168, "learning_rate": 0.01, "loss": 2.0844, "step": 12621 }, { "epoch": 1.2955665024630543, "grad_norm": 0.12078917771577835, "learning_rate": 0.01, "loss": 2.0604, "step": 12624 }, { "epoch": 1.2958743842364533, "grad_norm": 0.11613093316555023, "learning_rate": 0.01, "loss": 2.0753, "step": 12627 }, { "epoch": 1.2961822660098523, "grad_norm": 0.053543299436569214, "learning_rate": 0.01, "loss": 2.0736, "step": 12630 }, { "epoch": 1.2964901477832513, "grad_norm": 0.05464399605989456, "learning_rate": 0.01, "loss": 2.0845, "step": 12633 }, { "epoch": 1.2967980295566504, "grad_norm": 0.0487944521009922, "learning_rate": 0.01, "loss": 2.0859, "step": 12636 }, { "epoch": 1.2971059113300494, "grad_norm": 0.05266605690121651, "learning_rate": 0.01, "loss": 2.0532, "step": 12639 }, { "epoch": 1.2974137931034484, "grad_norm": 0.07863074541091919, "learning_rate": 0.01, "loss": 2.0496, "step": 12642 }, { "epoch": 1.2977216748768474, "grad_norm": 0.05371072143316269, "learning_rate": 0.01, "loss": 2.1016, "step": 12645 }, { "epoch": 1.2980295566502464, "grad_norm": 0.05592924728989601, "learning_rate": 0.01, "loss": 2.0832, "step": 12648 }, { "epoch": 1.2983374384236452, "grad_norm": 0.0675397738814354, "learning_rate": 0.01, "loss": 2.0784, "step": 12651 }, { "epoch": 1.2986453201970443, "grad_norm": 0.04399113729596138, "learning_rate": 0.01, "loss": 2.0567, "step": 12654 }, { "epoch": 1.2989532019704433, "grad_norm": 0.04609301686286926, "learning_rate": 0.01, "loss": 2.0683, "step": 12657 }, { "epoch": 1.2992610837438423, "grad_norm": 0.05637912079691887, "learning_rate": 0.01, "loss": 2.0655, "step": 12660 }, { "epoch": 1.2995689655172413, "grad_norm": 0.1115126758813858, "learning_rate": 0.01, "loss": 2.0643, "step": 12663 }, { "epoch": 1.2998768472906403, "grad_norm": 0.07762522250413895, "learning_rate": 0.01, "loss": 2.0853, "step": 12666 }, { "epoch": 1.3001847290640394, "grad_norm": 0.06717406958341599, "learning_rate": 0.01, "loss": 2.0881, "step": 12669 }, { "epoch": 1.3004926108374384, "grad_norm": 0.0727803036570549, "learning_rate": 0.01, "loss": 2.0606, "step": 12672 }, { "epoch": 1.3008004926108374, "grad_norm": 0.06588024646043777, "learning_rate": 0.01, "loss": 2.0508, "step": 12675 }, { "epoch": 1.3011083743842364, "grad_norm": 0.09718842804431915, "learning_rate": 0.01, "loss": 2.0793, "step": 12678 }, { "epoch": 1.3014162561576355, "grad_norm": 0.05804411321878433, "learning_rate": 0.01, "loss": 2.0825, "step": 12681 }, { "epoch": 1.3017241379310345, "grad_norm": 0.07549803704023361, "learning_rate": 0.01, "loss": 2.0546, "step": 12684 }, { "epoch": 1.3020320197044335, "grad_norm": 0.04496621713042259, "learning_rate": 0.01, "loss": 2.0483, "step": 12687 }, { "epoch": 1.3023399014778325, "grad_norm": 0.1283668428659439, "learning_rate": 0.01, "loss": 2.0639, "step": 12690 }, { "epoch": 1.3026477832512315, "grad_norm": 0.1276516169309616, "learning_rate": 0.01, "loss": 2.062, "step": 12693 }, { "epoch": 1.3029556650246306, "grad_norm": 0.12865981459617615, "learning_rate": 0.01, "loss": 2.0697, "step": 12696 }, { "epoch": 1.3032635467980296, "grad_norm": 0.05869213864207268, "learning_rate": 0.01, "loss": 2.0955, "step": 12699 }, { "epoch": 1.3035714285714286, "grad_norm": 0.042082637548446655, "learning_rate": 0.01, "loss": 2.0847, "step": 12702 }, { "epoch": 1.3038793103448276, "grad_norm": 0.11474558711051941, "learning_rate": 0.01, "loss": 2.0767, "step": 12705 }, { "epoch": 1.3041871921182266, "grad_norm": 0.12426330894231796, "learning_rate": 0.01, "loss": 2.0787, "step": 12708 }, { "epoch": 1.3044950738916257, "grad_norm": 0.06731969118118286, "learning_rate": 0.01, "loss": 2.064, "step": 12711 }, { "epoch": 1.3048029556650247, "grad_norm": 0.049505047500133514, "learning_rate": 0.01, "loss": 2.0848, "step": 12714 }, { "epoch": 1.3051108374384237, "grad_norm": 0.04460617154836655, "learning_rate": 0.01, "loss": 2.0958, "step": 12717 }, { "epoch": 1.3054187192118227, "grad_norm": 0.05461740121245384, "learning_rate": 0.01, "loss": 2.0724, "step": 12720 }, { "epoch": 1.3057266009852218, "grad_norm": 0.04074997082352638, "learning_rate": 0.01, "loss": 2.0453, "step": 12723 }, { "epoch": 1.3060344827586206, "grad_norm": 0.04925557225942612, "learning_rate": 0.01, "loss": 2.0707, "step": 12726 }, { "epoch": 1.3063423645320196, "grad_norm": 0.06570678949356079, "learning_rate": 0.01, "loss": 2.0703, "step": 12729 }, { "epoch": 1.3066502463054186, "grad_norm": 0.076143778860569, "learning_rate": 0.01, "loss": 2.0639, "step": 12732 }, { "epoch": 1.3069581280788176, "grad_norm": 0.06691130995750427, "learning_rate": 0.01, "loss": 2.0858, "step": 12735 }, { "epoch": 1.3072660098522166, "grad_norm": 0.06733332574367523, "learning_rate": 0.01, "loss": 2.0622, "step": 12738 }, { "epoch": 1.3075738916256157, "grad_norm": 0.08008868247270584, "learning_rate": 0.01, "loss": 2.085, "step": 12741 }, { "epoch": 1.3078817733990147, "grad_norm": 0.10713468492031097, "learning_rate": 0.01, "loss": 2.0642, "step": 12744 }, { "epoch": 1.3081896551724137, "grad_norm": 0.101436547935009, "learning_rate": 0.01, "loss": 2.0835, "step": 12747 }, { "epoch": 1.3084975369458127, "grad_norm": 0.0552450455725193, "learning_rate": 0.01, "loss": 2.0553, "step": 12750 }, { "epoch": 1.3088054187192117, "grad_norm": 0.08755996823310852, "learning_rate": 0.01, "loss": 2.0564, "step": 12753 }, { "epoch": 1.3091133004926108, "grad_norm": 0.03980748727917671, "learning_rate": 0.01, "loss": 2.084, "step": 12756 }, { "epoch": 1.3094211822660098, "grad_norm": 0.0971774309873581, "learning_rate": 0.01, "loss": 2.0858, "step": 12759 }, { "epoch": 1.3097290640394088, "grad_norm": 0.05845404043793678, "learning_rate": 0.01, "loss": 2.0792, "step": 12762 }, { "epoch": 1.3100369458128078, "grad_norm": 0.08498022705316544, "learning_rate": 0.01, "loss": 2.0932, "step": 12765 }, { "epoch": 1.3103448275862069, "grad_norm": 0.05135398730635643, "learning_rate": 0.01, "loss": 2.0796, "step": 12768 }, { "epoch": 1.3106527093596059, "grad_norm": 0.048872511833906174, "learning_rate": 0.01, "loss": 2.0675, "step": 12771 }, { "epoch": 1.310960591133005, "grad_norm": 0.0529983788728714, "learning_rate": 0.01, "loss": 2.0573, "step": 12774 }, { "epoch": 1.311268472906404, "grad_norm": 0.03580658137798309, "learning_rate": 0.01, "loss": 2.0741, "step": 12777 }, { "epoch": 1.311576354679803, "grad_norm": 0.03904234617948532, "learning_rate": 0.01, "loss": 2.0691, "step": 12780 }, { "epoch": 1.311884236453202, "grad_norm": 0.04073551669716835, "learning_rate": 0.01, "loss": 2.0694, "step": 12783 }, { "epoch": 1.312192118226601, "grad_norm": 0.058973487466573715, "learning_rate": 0.01, "loss": 2.0381, "step": 12786 }, { "epoch": 1.3125, "grad_norm": 0.05673586577177048, "learning_rate": 0.01, "loss": 2.0691, "step": 12789 }, { "epoch": 1.312807881773399, "grad_norm": 0.13680770993232727, "learning_rate": 0.01, "loss": 2.0673, "step": 12792 }, { "epoch": 1.313115763546798, "grad_norm": 0.0454241968691349, "learning_rate": 0.01, "loss": 2.0766, "step": 12795 }, { "epoch": 1.313423645320197, "grad_norm": 0.04074293375015259, "learning_rate": 0.01, "loss": 2.0593, "step": 12798 }, { "epoch": 1.313731527093596, "grad_norm": 0.04174893721938133, "learning_rate": 0.01, "loss": 2.0537, "step": 12801 }, { "epoch": 1.314039408866995, "grad_norm": 0.06716062128543854, "learning_rate": 0.01, "loss": 2.0642, "step": 12804 }, { "epoch": 1.3143472906403941, "grad_norm": 0.09866861253976822, "learning_rate": 0.01, "loss": 2.0889, "step": 12807 }, { "epoch": 1.3146551724137931, "grad_norm": 0.13097235560417175, "learning_rate": 0.01, "loss": 2.0744, "step": 12810 }, { "epoch": 1.3149630541871922, "grad_norm": 0.07859724014997482, "learning_rate": 0.01, "loss": 2.0899, "step": 12813 }, { "epoch": 1.3152709359605912, "grad_norm": 0.06141912192106247, "learning_rate": 0.01, "loss": 2.0666, "step": 12816 }, { "epoch": 1.3155788177339902, "grad_norm": 0.05112985149025917, "learning_rate": 0.01, "loss": 2.0658, "step": 12819 }, { "epoch": 1.3158866995073892, "grad_norm": 0.03482404351234436, "learning_rate": 0.01, "loss": 2.0802, "step": 12822 }, { "epoch": 1.3161945812807883, "grad_norm": 0.05562854930758476, "learning_rate": 0.01, "loss": 2.0659, "step": 12825 }, { "epoch": 1.3165024630541873, "grad_norm": 0.04841645434498787, "learning_rate": 0.01, "loss": 2.058, "step": 12828 }, { "epoch": 1.3168103448275863, "grad_norm": 0.0756571963429451, "learning_rate": 0.01, "loss": 2.0283, "step": 12831 }, { "epoch": 1.3171182266009853, "grad_norm": 0.09575197845697403, "learning_rate": 0.01, "loss": 2.0709, "step": 12834 }, { "epoch": 1.3174261083743843, "grad_norm": 0.07003197073936462, "learning_rate": 0.01, "loss": 2.0409, "step": 12837 }, { "epoch": 1.3177339901477834, "grad_norm": 0.12592460215091705, "learning_rate": 0.01, "loss": 2.051, "step": 12840 }, { "epoch": 1.3180418719211824, "grad_norm": 0.07086621969938278, "learning_rate": 0.01, "loss": 2.0856, "step": 12843 }, { "epoch": 1.3183497536945814, "grad_norm": 0.07367062568664551, "learning_rate": 0.01, "loss": 2.0759, "step": 12846 }, { "epoch": 1.3186576354679804, "grad_norm": 0.06731852889060974, "learning_rate": 0.01, "loss": 2.077, "step": 12849 }, { "epoch": 1.3189655172413794, "grad_norm": 0.07499510049819946, "learning_rate": 0.01, "loss": 2.072, "step": 12852 }, { "epoch": 1.3192733990147782, "grad_norm": 0.07604499906301498, "learning_rate": 0.01, "loss": 2.0717, "step": 12855 }, { "epoch": 1.3195812807881773, "grad_norm": 0.0770401731133461, "learning_rate": 0.01, "loss": 2.0894, "step": 12858 }, { "epoch": 1.3198891625615763, "grad_norm": 0.06753168255090714, "learning_rate": 0.01, "loss": 2.0546, "step": 12861 }, { "epoch": 1.3201970443349753, "grad_norm": 0.14175836741924286, "learning_rate": 0.01, "loss": 2.0811, "step": 12864 }, { "epoch": 1.3205049261083743, "grad_norm": 0.04258207604289055, "learning_rate": 0.01, "loss": 2.0831, "step": 12867 }, { "epoch": 1.3208128078817734, "grad_norm": 0.03372815623879433, "learning_rate": 0.01, "loss": 2.0827, "step": 12870 }, { "epoch": 1.3211206896551724, "grad_norm": 0.040649689733982086, "learning_rate": 0.01, "loss": 2.067, "step": 12873 }, { "epoch": 1.3214285714285714, "grad_norm": 0.060684412717819214, "learning_rate": 0.01, "loss": 2.0701, "step": 12876 }, { "epoch": 1.3217364532019704, "grad_norm": 0.036452196538448334, "learning_rate": 0.01, "loss": 2.0726, "step": 12879 }, { "epoch": 1.3220443349753694, "grad_norm": 0.09457682818174362, "learning_rate": 0.01, "loss": 2.0709, "step": 12882 }, { "epoch": 1.3223522167487685, "grad_norm": 0.049776870757341385, "learning_rate": 0.01, "loss": 2.0354, "step": 12885 }, { "epoch": 1.3226600985221675, "grad_norm": 0.05794042348861694, "learning_rate": 0.01, "loss": 2.0781, "step": 12888 }, { "epoch": 1.3229679802955665, "grad_norm": 0.052100833505392075, "learning_rate": 0.01, "loss": 2.0801, "step": 12891 }, { "epoch": 1.3232758620689655, "grad_norm": 0.06558441370725632, "learning_rate": 0.01, "loss": 2.076, "step": 12894 }, { "epoch": 1.3235837438423645, "grad_norm": 0.1107778549194336, "learning_rate": 0.01, "loss": 2.0859, "step": 12897 }, { "epoch": 1.3238916256157636, "grad_norm": 0.052982959896326065, "learning_rate": 0.01, "loss": 2.0749, "step": 12900 }, { "epoch": 1.3241995073891626, "grad_norm": 0.0976911410689354, "learning_rate": 0.01, "loss": 2.067, "step": 12903 }, { "epoch": 1.3245073891625616, "grad_norm": 0.05620484799146652, "learning_rate": 0.01, "loss": 2.0892, "step": 12906 }, { "epoch": 1.3248152709359606, "grad_norm": 0.12753431499004364, "learning_rate": 0.01, "loss": 2.073, "step": 12909 }, { "epoch": 1.3251231527093597, "grad_norm": 0.14311249554157257, "learning_rate": 0.01, "loss": 2.0641, "step": 12912 }, { "epoch": 1.3254310344827587, "grad_norm": 0.049242276698350906, "learning_rate": 0.01, "loss": 2.0701, "step": 12915 }, { "epoch": 1.3257389162561577, "grad_norm": 0.07875422388315201, "learning_rate": 0.01, "loss": 2.0889, "step": 12918 }, { "epoch": 1.3260467980295567, "grad_norm": 0.036451369524002075, "learning_rate": 0.01, "loss": 2.0634, "step": 12921 }, { "epoch": 1.3263546798029557, "grad_norm": 0.03659353405237198, "learning_rate": 0.01, "loss": 2.0725, "step": 12924 }, { "epoch": 1.3266625615763548, "grad_norm": 0.049371130764484406, "learning_rate": 0.01, "loss": 2.0531, "step": 12927 }, { "epoch": 1.3269704433497536, "grad_norm": 0.05998126044869423, "learning_rate": 0.01, "loss": 2.0894, "step": 12930 }, { "epoch": 1.3272783251231526, "grad_norm": 0.08878383040428162, "learning_rate": 0.01, "loss": 2.0627, "step": 12933 }, { "epoch": 1.3275862068965516, "grad_norm": 0.059183619916439056, "learning_rate": 0.01, "loss": 2.0829, "step": 12936 }, { "epoch": 1.3278940886699506, "grad_norm": 0.05783310905098915, "learning_rate": 0.01, "loss": 2.0972, "step": 12939 }, { "epoch": 1.3282019704433496, "grad_norm": 0.07206647843122482, "learning_rate": 0.01, "loss": 2.0667, "step": 12942 }, { "epoch": 1.3285098522167487, "grad_norm": 0.07303550839424133, "learning_rate": 0.01, "loss": 2.0745, "step": 12945 }, { "epoch": 1.3288177339901477, "grad_norm": 0.04319525510072708, "learning_rate": 0.01, "loss": 2.0673, "step": 12948 }, { "epoch": 1.3291256157635467, "grad_norm": 0.044222913682460785, "learning_rate": 0.01, "loss": 2.0856, "step": 12951 }, { "epoch": 1.3294334975369457, "grad_norm": 0.08791283518075943, "learning_rate": 0.01, "loss": 2.0841, "step": 12954 }, { "epoch": 1.3297413793103448, "grad_norm": 0.05172525718808174, "learning_rate": 0.01, "loss": 2.0828, "step": 12957 }, { "epoch": 1.3300492610837438, "grad_norm": 0.053524646908044815, "learning_rate": 0.01, "loss": 2.0504, "step": 12960 }, { "epoch": 1.3303571428571428, "grad_norm": 0.03858666867017746, "learning_rate": 0.01, "loss": 2.1045, "step": 12963 }, { "epoch": 1.3306650246305418, "grad_norm": 0.037037089467048645, "learning_rate": 0.01, "loss": 2.0742, "step": 12966 }, { "epoch": 1.3309729064039408, "grad_norm": 0.04609520733356476, "learning_rate": 0.01, "loss": 2.0474, "step": 12969 }, { "epoch": 1.3312807881773399, "grad_norm": 0.03773853927850723, "learning_rate": 0.01, "loss": 2.0573, "step": 12972 }, { "epoch": 1.3315886699507389, "grad_norm": 0.040222879499197006, "learning_rate": 0.01, "loss": 2.073, "step": 12975 }, { "epoch": 1.331896551724138, "grad_norm": 0.11522398144006729, "learning_rate": 0.01, "loss": 2.0523, "step": 12978 }, { "epoch": 1.332204433497537, "grad_norm": 0.0496886670589447, "learning_rate": 0.01, "loss": 2.0873, "step": 12981 }, { "epoch": 1.332512315270936, "grad_norm": 0.0955866202712059, "learning_rate": 0.01, "loss": 2.0735, "step": 12984 }, { "epoch": 1.332820197044335, "grad_norm": 0.06260306388139725, "learning_rate": 0.01, "loss": 2.0696, "step": 12987 }, { "epoch": 1.333128078817734, "grad_norm": 0.049784719944000244, "learning_rate": 0.01, "loss": 2.0807, "step": 12990 }, { "epoch": 1.333435960591133, "grad_norm": 0.04571852833032608, "learning_rate": 0.01, "loss": 2.0648, "step": 12993 }, { "epoch": 1.333743842364532, "grad_norm": 0.11032246053218842, "learning_rate": 0.01, "loss": 2.0777, "step": 12996 }, { "epoch": 1.334051724137931, "grad_norm": 0.04030182585120201, "learning_rate": 0.01, "loss": 2.0491, "step": 12999 }, { "epoch": 1.33435960591133, "grad_norm": 0.06681946665048599, "learning_rate": 0.01, "loss": 2.051, "step": 13002 }, { "epoch": 1.334667487684729, "grad_norm": 0.04532696306705475, "learning_rate": 0.01, "loss": 2.0322, "step": 13005 }, { "epoch": 1.3349753694581281, "grad_norm": 0.03890594094991684, "learning_rate": 0.01, "loss": 2.0792, "step": 13008 }, { "epoch": 1.3352832512315271, "grad_norm": 0.08290864527225494, "learning_rate": 0.01, "loss": 2.0903, "step": 13011 }, { "epoch": 1.3355911330049262, "grad_norm": 0.09402919560670853, "learning_rate": 0.01, "loss": 2.0678, "step": 13014 }, { "epoch": 1.3358990147783252, "grad_norm": 0.06945643573999405, "learning_rate": 0.01, "loss": 2.0823, "step": 13017 }, { "epoch": 1.3362068965517242, "grad_norm": 0.0540471225976944, "learning_rate": 0.01, "loss": 2.0705, "step": 13020 }, { "epoch": 1.3365147783251232, "grad_norm": 0.04104168713092804, "learning_rate": 0.01, "loss": 2.0808, "step": 13023 }, { "epoch": 1.3368226600985222, "grad_norm": 0.04578167945146561, "learning_rate": 0.01, "loss": 2.0821, "step": 13026 }, { "epoch": 1.3371305418719213, "grad_norm": 0.07289981096982956, "learning_rate": 0.01, "loss": 2.0686, "step": 13029 }, { "epoch": 1.3374384236453203, "grad_norm": 0.09114310890436172, "learning_rate": 0.01, "loss": 2.0435, "step": 13032 }, { "epoch": 1.3377463054187193, "grad_norm": 0.06305088102817535, "learning_rate": 0.01, "loss": 2.0593, "step": 13035 }, { "epoch": 1.3380541871921183, "grad_norm": 0.06495746225118637, "learning_rate": 0.01, "loss": 2.0728, "step": 13038 }, { "epoch": 1.3383620689655173, "grad_norm": 0.05586539953947067, "learning_rate": 0.01, "loss": 2.0747, "step": 13041 }, { "epoch": 1.3386699507389164, "grad_norm": 0.04566524177789688, "learning_rate": 0.01, "loss": 2.0576, "step": 13044 }, { "epoch": 1.3389778325123154, "grad_norm": 0.060839373618364334, "learning_rate": 0.01, "loss": 2.0717, "step": 13047 }, { "epoch": 1.3392857142857144, "grad_norm": 0.11224903166294098, "learning_rate": 0.01, "loss": 2.0878, "step": 13050 }, { "epoch": 1.3395935960591134, "grad_norm": 0.09223728626966476, "learning_rate": 0.01, "loss": 2.0531, "step": 13053 }, { "epoch": 1.3399014778325122, "grad_norm": 0.0413731187582016, "learning_rate": 0.01, "loss": 2.0495, "step": 13056 }, { "epoch": 1.3402093596059113, "grad_norm": 0.050937358289957047, "learning_rate": 0.01, "loss": 2.0774, "step": 13059 }, { "epoch": 1.3405172413793103, "grad_norm": 0.0407971516251564, "learning_rate": 0.01, "loss": 2.0766, "step": 13062 }, { "epoch": 1.3408251231527093, "grad_norm": 0.0623883455991745, "learning_rate": 0.01, "loss": 2.0691, "step": 13065 }, { "epoch": 1.3411330049261083, "grad_norm": 0.09325427561998367, "learning_rate": 0.01, "loss": 2.0731, "step": 13068 }, { "epoch": 1.3414408866995073, "grad_norm": 0.06965765357017517, "learning_rate": 0.01, "loss": 2.0653, "step": 13071 }, { "epoch": 1.3417487684729064, "grad_norm": 0.12671297788619995, "learning_rate": 0.01, "loss": 2.1028, "step": 13074 }, { "epoch": 1.3420566502463054, "grad_norm": 0.04154878482222557, "learning_rate": 0.01, "loss": 2.0783, "step": 13077 }, { "epoch": 1.3423645320197044, "grad_norm": 0.04698561131954193, "learning_rate": 0.01, "loss": 2.0799, "step": 13080 }, { "epoch": 1.3426724137931034, "grad_norm": 0.031127501279115677, "learning_rate": 0.01, "loss": 2.0756, "step": 13083 }, { "epoch": 1.3429802955665024, "grad_norm": 0.05258537083864212, "learning_rate": 0.01, "loss": 2.0821, "step": 13086 }, { "epoch": 1.3432881773399015, "grad_norm": 0.06848637759685516, "learning_rate": 0.01, "loss": 2.0629, "step": 13089 }, { "epoch": 1.3435960591133005, "grad_norm": 0.07738485932350159, "learning_rate": 0.01, "loss": 2.0674, "step": 13092 }, { "epoch": 1.3439039408866995, "grad_norm": 0.09635680168867111, "learning_rate": 0.01, "loss": 2.0782, "step": 13095 }, { "epoch": 1.3442118226600985, "grad_norm": 0.04388611391186714, "learning_rate": 0.01, "loss": 2.0575, "step": 13098 }, { "epoch": 1.3445197044334976, "grad_norm": 0.0776490792632103, "learning_rate": 0.01, "loss": 2.0753, "step": 13101 }, { "epoch": 1.3448275862068966, "grad_norm": 0.11331035196781158, "learning_rate": 0.01, "loss": 2.053, "step": 13104 }, { "epoch": 1.3451354679802956, "grad_norm": 0.04267279431223869, "learning_rate": 0.01, "loss": 2.0812, "step": 13107 }, { "epoch": 1.3454433497536946, "grad_norm": 0.05454112961888313, "learning_rate": 0.01, "loss": 2.0711, "step": 13110 }, { "epoch": 1.3457512315270936, "grad_norm": 0.07470305263996124, "learning_rate": 0.01, "loss": 2.0878, "step": 13113 }, { "epoch": 1.3460591133004927, "grad_norm": 0.057337477803230286, "learning_rate": 0.01, "loss": 2.0607, "step": 13116 }, { "epoch": 1.3463669950738917, "grad_norm": 0.09155120700597763, "learning_rate": 0.01, "loss": 2.0898, "step": 13119 }, { "epoch": 1.3466748768472907, "grad_norm": 0.09644894301891327, "learning_rate": 0.01, "loss": 2.0905, "step": 13122 }, { "epoch": 1.3469827586206897, "grad_norm": 0.0579628124833107, "learning_rate": 0.01, "loss": 2.0834, "step": 13125 }, { "epoch": 1.3472906403940887, "grad_norm": 0.09968624264001846, "learning_rate": 0.01, "loss": 2.0797, "step": 13128 }, { "epoch": 1.3475985221674878, "grad_norm": 0.04834052175283432, "learning_rate": 0.01, "loss": 2.0552, "step": 13131 }, { "epoch": 1.3479064039408866, "grad_norm": 0.05561887100338936, "learning_rate": 0.01, "loss": 2.0564, "step": 13134 }, { "epoch": 1.3482142857142856, "grad_norm": 0.14990638196468353, "learning_rate": 0.01, "loss": 2.0738, "step": 13137 }, { "epoch": 1.3485221674876846, "grad_norm": 0.07530777156352997, "learning_rate": 0.01, "loss": 2.0765, "step": 13140 }, { "epoch": 1.3488300492610836, "grad_norm": 0.09080106765031815, "learning_rate": 0.01, "loss": 2.083, "step": 13143 }, { "epoch": 1.3491379310344827, "grad_norm": 0.042014699429273605, "learning_rate": 0.01, "loss": 2.0608, "step": 13146 }, { "epoch": 1.3494458128078817, "grad_norm": 0.08905219286680222, "learning_rate": 0.01, "loss": 2.0657, "step": 13149 }, { "epoch": 1.3497536945812807, "grad_norm": 0.1093059629201889, "learning_rate": 0.01, "loss": 2.102, "step": 13152 }, { "epoch": 1.3500615763546797, "grad_norm": 0.09834617376327515, "learning_rate": 0.01, "loss": 2.0601, "step": 13155 }, { "epoch": 1.3503694581280787, "grad_norm": 0.0754542201757431, "learning_rate": 0.01, "loss": 2.103, "step": 13158 }, { "epoch": 1.3506773399014778, "grad_norm": 0.09639342129230499, "learning_rate": 0.01, "loss": 2.0743, "step": 13161 }, { "epoch": 1.3509852216748768, "grad_norm": 0.05084405094385147, "learning_rate": 0.01, "loss": 2.0898, "step": 13164 }, { "epoch": 1.3512931034482758, "grad_norm": 0.04796381667256355, "learning_rate": 0.01, "loss": 2.0788, "step": 13167 }, { "epoch": 1.3516009852216748, "grad_norm": 0.05373486131429672, "learning_rate": 0.01, "loss": 2.0632, "step": 13170 }, { "epoch": 1.3519088669950738, "grad_norm": 0.05145580321550369, "learning_rate": 0.01, "loss": 2.0383, "step": 13173 }, { "epoch": 1.3522167487684729, "grad_norm": 0.03861214593052864, "learning_rate": 0.01, "loss": 2.0678, "step": 13176 }, { "epoch": 1.3525246305418719, "grad_norm": 0.04394346475601196, "learning_rate": 0.01, "loss": 2.0601, "step": 13179 }, { "epoch": 1.352832512315271, "grad_norm": 0.08851804584264755, "learning_rate": 0.01, "loss": 2.0374, "step": 13182 }, { "epoch": 1.35314039408867, "grad_norm": 0.059799451380968094, "learning_rate": 0.01, "loss": 2.0735, "step": 13185 }, { "epoch": 1.353448275862069, "grad_norm": 0.13764169812202454, "learning_rate": 0.01, "loss": 2.1001, "step": 13188 }, { "epoch": 1.353756157635468, "grad_norm": 0.05652278661727905, "learning_rate": 0.01, "loss": 2.0771, "step": 13191 }, { "epoch": 1.354064039408867, "grad_norm": 0.04755775257945061, "learning_rate": 0.01, "loss": 2.0467, "step": 13194 }, { "epoch": 1.354371921182266, "grad_norm": 0.058131635189056396, "learning_rate": 0.01, "loss": 2.0884, "step": 13197 }, { "epoch": 1.354679802955665, "grad_norm": 0.041266053915023804, "learning_rate": 0.01, "loss": 2.0684, "step": 13200 }, { "epoch": 1.354987684729064, "grad_norm": 0.034990034997463226, "learning_rate": 0.01, "loss": 2.0576, "step": 13203 }, { "epoch": 1.355295566502463, "grad_norm": 0.13107064366340637, "learning_rate": 0.01, "loss": 2.0616, "step": 13206 }, { "epoch": 1.355603448275862, "grad_norm": 0.05397200584411621, "learning_rate": 0.01, "loss": 2.0953, "step": 13209 }, { "epoch": 1.3559113300492611, "grad_norm": 0.04137737303972244, "learning_rate": 0.01, "loss": 2.0538, "step": 13212 }, { "epoch": 1.3562192118226601, "grad_norm": 0.05001407861709595, "learning_rate": 0.01, "loss": 2.0809, "step": 13215 }, { "epoch": 1.3565270935960592, "grad_norm": 0.10387953370809555, "learning_rate": 0.01, "loss": 2.0818, "step": 13218 }, { "epoch": 1.3568349753694582, "grad_norm": 0.052998363971710205, "learning_rate": 0.01, "loss": 2.0711, "step": 13221 }, { "epoch": 1.3571428571428572, "grad_norm": 0.06805765628814697, "learning_rate": 0.01, "loss": 2.0604, "step": 13224 }, { "epoch": 1.3574507389162562, "grad_norm": 0.06597940623760223, "learning_rate": 0.01, "loss": 2.0701, "step": 13227 }, { "epoch": 1.3577586206896552, "grad_norm": 0.10083628445863724, "learning_rate": 0.01, "loss": 2.086, "step": 13230 }, { "epoch": 1.3580665024630543, "grad_norm": 0.05467986315488815, "learning_rate": 0.01, "loss": 2.072, "step": 13233 }, { "epoch": 1.3583743842364533, "grad_norm": 0.08951261639595032, "learning_rate": 0.01, "loss": 2.1256, "step": 13236 }, { "epoch": 1.3586822660098523, "grad_norm": 0.052532244473695755, "learning_rate": 0.01, "loss": 2.0555, "step": 13239 }, { "epoch": 1.3589901477832513, "grad_norm": 0.038159146904945374, "learning_rate": 0.01, "loss": 2.062, "step": 13242 }, { "epoch": 1.3592980295566504, "grad_norm": 0.09895820915699005, "learning_rate": 0.01, "loss": 2.0559, "step": 13245 }, { "epoch": 1.3596059113300494, "grad_norm": 0.07522387057542801, "learning_rate": 0.01, "loss": 2.0634, "step": 13248 }, { "epoch": 1.3599137931034484, "grad_norm": 0.04762687534093857, "learning_rate": 0.01, "loss": 2.0848, "step": 13251 }, { "epoch": 1.3602216748768474, "grad_norm": 0.0716032013297081, "learning_rate": 0.01, "loss": 2.0783, "step": 13254 }, { "epoch": 1.3605295566502464, "grad_norm": 0.08518968522548676, "learning_rate": 0.01, "loss": 2.0818, "step": 13257 }, { "epoch": 1.3608374384236452, "grad_norm": 0.06927520781755447, "learning_rate": 0.01, "loss": 2.0728, "step": 13260 }, { "epoch": 1.3611453201970443, "grad_norm": 0.10368376970291138, "learning_rate": 0.01, "loss": 2.0419, "step": 13263 }, { "epoch": 1.3614532019704433, "grad_norm": 0.04249117895960808, "learning_rate": 0.01, "loss": 2.0695, "step": 13266 }, { "epoch": 1.3617610837438423, "grad_norm": 0.06504488736391068, "learning_rate": 0.01, "loss": 2.0658, "step": 13269 }, { "epoch": 1.3620689655172413, "grad_norm": 0.03990466147661209, "learning_rate": 0.01, "loss": 2.0796, "step": 13272 }, { "epoch": 1.3623768472906403, "grad_norm": 0.042559072375297546, "learning_rate": 0.01, "loss": 2.0643, "step": 13275 }, { "epoch": 1.3626847290640394, "grad_norm": 0.046650230884552, "learning_rate": 0.01, "loss": 2.0756, "step": 13278 }, { "epoch": 1.3629926108374384, "grad_norm": 0.08641167730093002, "learning_rate": 0.01, "loss": 2.0638, "step": 13281 }, { "epoch": 1.3633004926108374, "grad_norm": 0.11438708007335663, "learning_rate": 0.01, "loss": 2.0559, "step": 13284 }, { "epoch": 1.3636083743842364, "grad_norm": 0.05360870808362961, "learning_rate": 0.01, "loss": 2.0534, "step": 13287 }, { "epoch": 1.3639162561576355, "grad_norm": 0.07226021587848663, "learning_rate": 0.01, "loss": 2.0569, "step": 13290 }, { "epoch": 1.3642241379310345, "grad_norm": 0.04532739892601967, "learning_rate": 0.01, "loss": 2.0504, "step": 13293 }, { "epoch": 1.3645320197044335, "grad_norm": 0.06119906157255173, "learning_rate": 0.01, "loss": 2.0507, "step": 13296 }, { "epoch": 1.3648399014778325, "grad_norm": 0.05576052516698837, "learning_rate": 0.01, "loss": 2.0454, "step": 13299 }, { "epoch": 1.3651477832512315, "grad_norm": 0.038748834282159805, "learning_rate": 0.01, "loss": 2.0632, "step": 13302 }, { "epoch": 1.3654556650246306, "grad_norm": 0.09733711183071136, "learning_rate": 0.01, "loss": 2.0733, "step": 13305 }, { "epoch": 1.3657635467980296, "grad_norm": 0.043375931680202484, "learning_rate": 0.01, "loss": 2.0629, "step": 13308 }, { "epoch": 1.3660714285714286, "grad_norm": 0.11930018663406372, "learning_rate": 0.01, "loss": 2.0931, "step": 13311 }, { "epoch": 1.3663793103448276, "grad_norm": 0.06754540652036667, "learning_rate": 0.01, "loss": 2.0575, "step": 13314 }, { "epoch": 1.3666871921182266, "grad_norm": 0.07226148992776871, "learning_rate": 0.01, "loss": 2.0786, "step": 13317 }, { "epoch": 1.3669950738916257, "grad_norm": 0.08159705251455307, "learning_rate": 0.01, "loss": 2.0594, "step": 13320 }, { "epoch": 1.3673029556650247, "grad_norm": 0.044994477182626724, "learning_rate": 0.01, "loss": 2.0782, "step": 13323 }, { "epoch": 1.3676108374384237, "grad_norm": 0.05308050662279129, "learning_rate": 0.01, "loss": 2.0645, "step": 13326 }, { "epoch": 1.3679187192118227, "grad_norm": 0.09141236543655396, "learning_rate": 0.01, "loss": 2.0801, "step": 13329 }, { "epoch": 1.3682266009852218, "grad_norm": 0.040702883154153824, "learning_rate": 0.01, "loss": 2.0497, "step": 13332 }, { "epoch": 1.3685344827586206, "grad_norm": 0.06111524999141693, "learning_rate": 0.01, "loss": 2.0633, "step": 13335 }, { "epoch": 1.3688423645320196, "grad_norm": 0.10802275687456131, "learning_rate": 0.01, "loss": 2.0541, "step": 13338 }, { "epoch": 1.3691502463054186, "grad_norm": 0.09049344807863235, "learning_rate": 0.01, "loss": 2.0636, "step": 13341 }, { "epoch": 1.3694581280788176, "grad_norm": 0.055894456803798676, "learning_rate": 0.01, "loss": 2.0675, "step": 13344 }, { "epoch": 1.3697660098522166, "grad_norm": 0.054729919880628586, "learning_rate": 0.01, "loss": 2.0674, "step": 13347 }, { "epoch": 1.3700738916256157, "grad_norm": 0.05745011568069458, "learning_rate": 0.01, "loss": 2.0707, "step": 13350 }, { "epoch": 1.3703817733990147, "grad_norm": 0.06573651731014252, "learning_rate": 0.01, "loss": 2.0493, "step": 13353 }, { "epoch": 1.3706896551724137, "grad_norm": 0.20495210587978363, "learning_rate": 0.01, "loss": 2.0798, "step": 13356 }, { "epoch": 1.3709975369458127, "grad_norm": 0.10678639262914658, "learning_rate": 0.01, "loss": 2.0499, "step": 13359 }, { "epoch": 1.3713054187192117, "grad_norm": 0.10948281735181808, "learning_rate": 0.01, "loss": 2.087, "step": 13362 }, { "epoch": 1.3716133004926108, "grad_norm": 0.07788719981908798, "learning_rate": 0.01, "loss": 2.0745, "step": 13365 }, { "epoch": 1.3719211822660098, "grad_norm": 0.04947768524289131, "learning_rate": 0.01, "loss": 2.0608, "step": 13368 }, { "epoch": 1.3722290640394088, "grad_norm": 0.04789843037724495, "learning_rate": 0.01, "loss": 2.0607, "step": 13371 }, { "epoch": 1.3725369458128078, "grad_norm": 0.05217898637056351, "learning_rate": 0.01, "loss": 2.058, "step": 13374 }, { "epoch": 1.3728448275862069, "grad_norm": 0.04018987715244293, "learning_rate": 0.01, "loss": 2.0782, "step": 13377 }, { "epoch": 1.3731527093596059, "grad_norm": 0.035446904599666595, "learning_rate": 0.01, "loss": 2.0668, "step": 13380 }, { "epoch": 1.373460591133005, "grad_norm": 0.038600854575634, "learning_rate": 0.01, "loss": 2.0865, "step": 13383 }, { "epoch": 1.373768472906404, "grad_norm": 0.055341143161058426, "learning_rate": 0.01, "loss": 2.0591, "step": 13386 }, { "epoch": 1.374076354679803, "grad_norm": 0.11673317849636078, "learning_rate": 0.01, "loss": 2.0897, "step": 13389 }, { "epoch": 1.374384236453202, "grad_norm": 0.06797752529382706, "learning_rate": 0.01, "loss": 2.0676, "step": 13392 }, { "epoch": 1.374692118226601, "grad_norm": 0.041885413229465485, "learning_rate": 0.01, "loss": 2.0245, "step": 13395 }, { "epoch": 1.375, "grad_norm": 0.07391481846570969, "learning_rate": 0.01, "loss": 2.067, "step": 13398 }, { "epoch": 1.375307881773399, "grad_norm": 0.07959283888339996, "learning_rate": 0.01, "loss": 2.0192, "step": 13401 }, { "epoch": 1.375615763546798, "grad_norm": 0.09504754096269608, "learning_rate": 0.01, "loss": 2.0841, "step": 13404 }, { "epoch": 1.375923645320197, "grad_norm": 0.08874071389436722, "learning_rate": 0.01, "loss": 2.0473, "step": 13407 }, { "epoch": 1.376231527093596, "grad_norm": 0.05350710079073906, "learning_rate": 0.01, "loss": 2.0763, "step": 13410 }, { "epoch": 1.376539408866995, "grad_norm": 0.06738609820604324, "learning_rate": 0.01, "loss": 2.0782, "step": 13413 }, { "epoch": 1.3768472906403941, "grad_norm": 0.04335073009133339, "learning_rate": 0.01, "loss": 2.0601, "step": 13416 }, { "epoch": 1.3771551724137931, "grad_norm": 0.056045398116111755, "learning_rate": 0.01, "loss": 2.0588, "step": 13419 }, { "epoch": 1.3774630541871922, "grad_norm": 0.06593155860900879, "learning_rate": 0.01, "loss": 2.0637, "step": 13422 }, { "epoch": 1.3777709359605912, "grad_norm": 0.08942624181509018, "learning_rate": 0.01, "loss": 2.0724, "step": 13425 }, { "epoch": 1.3780788177339902, "grad_norm": 0.10098058730363846, "learning_rate": 0.01, "loss": 2.064, "step": 13428 }, { "epoch": 1.3783866995073892, "grad_norm": 0.04804680123925209, "learning_rate": 0.01, "loss": 2.0684, "step": 13431 }, { "epoch": 1.3786945812807883, "grad_norm": 0.08567364513874054, "learning_rate": 0.01, "loss": 2.0617, "step": 13434 }, { "epoch": 1.3790024630541873, "grad_norm": 0.06027091667056084, "learning_rate": 0.01, "loss": 2.0666, "step": 13437 }, { "epoch": 1.3793103448275863, "grad_norm": 0.08809462189674377, "learning_rate": 0.01, "loss": 2.0862, "step": 13440 }, { "epoch": 1.3796182266009853, "grad_norm": 0.053466469049453735, "learning_rate": 0.01, "loss": 2.0497, "step": 13443 }, { "epoch": 1.3799261083743843, "grad_norm": 0.033619511872529984, "learning_rate": 0.01, "loss": 2.0298, "step": 13446 }, { "epoch": 1.3802339901477834, "grad_norm": 0.04768878221511841, "learning_rate": 0.01, "loss": 2.054, "step": 13449 }, { "epoch": 1.3805418719211824, "grad_norm": 0.07854757457971573, "learning_rate": 0.01, "loss": 2.0538, "step": 13452 }, { "epoch": 1.3808497536945814, "grad_norm": 0.05409559607505798, "learning_rate": 0.01, "loss": 2.082, "step": 13455 }, { "epoch": 1.3811576354679804, "grad_norm": 0.057855118066072464, "learning_rate": 0.01, "loss": 2.0814, "step": 13458 }, { "epoch": 1.3814655172413794, "grad_norm": 0.047502126544713974, "learning_rate": 0.01, "loss": 2.0955, "step": 13461 }, { "epoch": 1.3817733990147782, "grad_norm": 0.040939487516880035, "learning_rate": 0.01, "loss": 2.0431, "step": 13464 }, { "epoch": 1.3820812807881773, "grad_norm": 0.1307850480079651, "learning_rate": 0.01, "loss": 2.0801, "step": 13467 }, { "epoch": 1.3823891625615763, "grad_norm": 0.04386845603585243, "learning_rate": 0.01, "loss": 2.0662, "step": 13470 }, { "epoch": 1.3826970443349753, "grad_norm": 0.08174968510866165, "learning_rate": 0.01, "loss": 2.0544, "step": 13473 }, { "epoch": 1.3830049261083743, "grad_norm": 0.1113237589597702, "learning_rate": 0.01, "loss": 2.0411, "step": 13476 }, { "epoch": 1.3833128078817734, "grad_norm": 0.06756308674812317, "learning_rate": 0.01, "loss": 2.0813, "step": 13479 }, { "epoch": 1.3836206896551724, "grad_norm": 0.05931835621595383, "learning_rate": 0.01, "loss": 2.0544, "step": 13482 }, { "epoch": 1.3839285714285714, "grad_norm": 0.043539129197597504, "learning_rate": 0.01, "loss": 2.0528, "step": 13485 }, { "epoch": 1.3842364532019704, "grad_norm": 0.0510721355676651, "learning_rate": 0.01, "loss": 2.075, "step": 13488 }, { "epoch": 1.3845443349753694, "grad_norm": 0.10811501741409302, "learning_rate": 0.01, "loss": 2.0626, "step": 13491 }, { "epoch": 1.3848522167487685, "grad_norm": 0.08322811871767044, "learning_rate": 0.01, "loss": 2.0882, "step": 13494 }, { "epoch": 1.3851600985221675, "grad_norm": 0.05101979896426201, "learning_rate": 0.01, "loss": 2.0677, "step": 13497 }, { "epoch": 1.3854679802955665, "grad_norm": 0.036535851657390594, "learning_rate": 0.01, "loss": 2.0791, "step": 13500 }, { "epoch": 1.3857758620689655, "grad_norm": 0.05161239951848984, "learning_rate": 0.01, "loss": 2.0513, "step": 13503 }, { "epoch": 1.3860837438423645, "grad_norm": 0.06677153706550598, "learning_rate": 0.01, "loss": 2.0581, "step": 13506 }, { "epoch": 1.3863916256157636, "grad_norm": 0.04239841178059578, "learning_rate": 0.01, "loss": 2.0383, "step": 13509 }, { "epoch": 1.3866995073891626, "grad_norm": 0.04252205416560173, "learning_rate": 0.01, "loss": 2.0875, "step": 13512 }, { "epoch": 1.3870073891625616, "grad_norm": 0.10147176682949066, "learning_rate": 0.01, "loss": 2.0631, "step": 13515 }, { "epoch": 1.3873152709359606, "grad_norm": 0.046371154487133026, "learning_rate": 0.01, "loss": 2.0784, "step": 13518 }, { "epoch": 1.3876231527093597, "grad_norm": 0.0997064933180809, "learning_rate": 0.01, "loss": 2.0578, "step": 13521 }, { "epoch": 1.3879310344827587, "grad_norm": 0.05733582749962807, "learning_rate": 0.01, "loss": 2.0742, "step": 13524 }, { "epoch": 1.3882389162561577, "grad_norm": 0.05061260983347893, "learning_rate": 0.01, "loss": 2.0448, "step": 13527 }, { "epoch": 1.3885467980295567, "grad_norm": 0.04336051642894745, "learning_rate": 0.01, "loss": 2.0846, "step": 13530 }, { "epoch": 1.3888546798029557, "grad_norm": 0.04115337133407593, "learning_rate": 0.01, "loss": 2.0577, "step": 13533 }, { "epoch": 1.3891625615763548, "grad_norm": 0.04914069175720215, "learning_rate": 0.01, "loss": 2.0815, "step": 13536 }, { "epoch": 1.3894704433497536, "grad_norm": 0.0677042305469513, "learning_rate": 0.01, "loss": 2.0754, "step": 13539 }, { "epoch": 1.3897783251231526, "grad_norm": 0.04985208064317703, "learning_rate": 0.01, "loss": 2.0633, "step": 13542 }, { "epoch": 1.3900862068965516, "grad_norm": 0.08042199164628983, "learning_rate": 0.01, "loss": 2.0828, "step": 13545 }, { "epoch": 1.3903940886699506, "grad_norm": 0.03648814186453819, "learning_rate": 0.01, "loss": 2.0669, "step": 13548 }, { "epoch": 1.3907019704433496, "grad_norm": 0.03645399957895279, "learning_rate": 0.01, "loss": 2.0495, "step": 13551 }, { "epoch": 1.3910098522167487, "grad_norm": 0.04866683483123779, "learning_rate": 0.01, "loss": 2.0531, "step": 13554 }, { "epoch": 1.3913177339901477, "grad_norm": 0.07728299498558044, "learning_rate": 0.01, "loss": 2.0671, "step": 13557 }, { "epoch": 1.3916256157635467, "grad_norm": 0.12097810208797455, "learning_rate": 0.01, "loss": 2.0935, "step": 13560 }, { "epoch": 1.3919334975369457, "grad_norm": 0.12485776096582413, "learning_rate": 0.01, "loss": 2.0712, "step": 13563 }, { "epoch": 1.3922413793103448, "grad_norm": 0.053524475544691086, "learning_rate": 0.01, "loss": 2.0595, "step": 13566 }, { "epoch": 1.3925492610837438, "grad_norm": 0.04277713969349861, "learning_rate": 0.01, "loss": 2.0791, "step": 13569 }, { "epoch": 1.3928571428571428, "grad_norm": 0.09847384691238403, "learning_rate": 0.01, "loss": 2.0754, "step": 13572 }, { "epoch": 1.3931650246305418, "grad_norm": 0.03410463035106659, "learning_rate": 0.01, "loss": 2.0556, "step": 13575 }, { "epoch": 1.3934729064039408, "grad_norm": 0.10606678575277328, "learning_rate": 0.01, "loss": 2.0637, "step": 13578 }, { "epoch": 1.3937807881773399, "grad_norm": 0.06554549187421799, "learning_rate": 0.01, "loss": 2.0549, "step": 13581 }, { "epoch": 1.3940886699507389, "grad_norm": 0.07487329095602036, "learning_rate": 0.01, "loss": 2.0645, "step": 13584 }, { "epoch": 1.394396551724138, "grad_norm": 0.07526996731758118, "learning_rate": 0.01, "loss": 2.0733, "step": 13587 }, { "epoch": 1.394704433497537, "grad_norm": 0.0581665076315403, "learning_rate": 0.01, "loss": 2.0491, "step": 13590 }, { "epoch": 1.395012315270936, "grad_norm": 0.057513732463121414, "learning_rate": 0.01, "loss": 2.087, "step": 13593 }, { "epoch": 1.395320197044335, "grad_norm": 0.037192508578300476, "learning_rate": 0.01, "loss": 2.0487, "step": 13596 }, { "epoch": 1.395628078817734, "grad_norm": 0.0965125560760498, "learning_rate": 0.01, "loss": 2.0549, "step": 13599 }, { "epoch": 1.395935960591133, "grad_norm": 0.04594407603144646, "learning_rate": 0.01, "loss": 2.067, "step": 13602 }, { "epoch": 1.396243842364532, "grad_norm": 0.08442319929599762, "learning_rate": 0.01, "loss": 2.0627, "step": 13605 }, { "epoch": 1.396551724137931, "grad_norm": 0.08673713356256485, "learning_rate": 0.01, "loss": 2.054, "step": 13608 }, { "epoch": 1.39685960591133, "grad_norm": 0.07299968600273132, "learning_rate": 0.01, "loss": 2.0672, "step": 13611 }, { "epoch": 1.397167487684729, "grad_norm": 0.052630744874477386, "learning_rate": 0.01, "loss": 2.079, "step": 13614 }, { "epoch": 1.3974753694581281, "grad_norm": 0.0626215934753418, "learning_rate": 0.01, "loss": 2.0732, "step": 13617 }, { "epoch": 1.3977832512315271, "grad_norm": 0.0866907387971878, "learning_rate": 0.01, "loss": 2.0743, "step": 13620 }, { "epoch": 1.3980911330049262, "grad_norm": 0.05650071054697037, "learning_rate": 0.01, "loss": 2.0856, "step": 13623 }, { "epoch": 1.3983990147783252, "grad_norm": 0.07526635378599167, "learning_rate": 0.01, "loss": 2.0707, "step": 13626 }, { "epoch": 1.3987068965517242, "grad_norm": 0.07472112774848938, "learning_rate": 0.01, "loss": 2.0608, "step": 13629 }, { "epoch": 1.3990147783251232, "grad_norm": 0.07251216471195221, "learning_rate": 0.01, "loss": 2.086, "step": 13632 }, { "epoch": 1.3993226600985222, "grad_norm": 0.08701921999454498, "learning_rate": 0.01, "loss": 2.0382, "step": 13635 }, { "epoch": 1.3996305418719213, "grad_norm": 0.033323436975479126, "learning_rate": 0.01, "loss": 2.0716, "step": 13638 }, { "epoch": 1.3999384236453203, "grad_norm": 0.04960713908076286, "learning_rate": 0.01, "loss": 2.0557, "step": 13641 }, { "epoch": 1.4002463054187193, "grad_norm": 0.1418198049068451, "learning_rate": 0.01, "loss": 2.0528, "step": 13644 }, { "epoch": 1.4005541871921183, "grad_norm": 0.06056910380721092, "learning_rate": 0.01, "loss": 2.0825, "step": 13647 }, { "epoch": 1.4008620689655173, "grad_norm": 0.062474992126226425, "learning_rate": 0.01, "loss": 2.0195, "step": 13650 }, { "epoch": 1.4011699507389164, "grad_norm": 0.05380658805370331, "learning_rate": 0.01, "loss": 2.0644, "step": 13653 }, { "epoch": 1.4014778325123154, "grad_norm": 0.046230513602495193, "learning_rate": 0.01, "loss": 2.062, "step": 13656 }, { "epoch": 1.4017857142857144, "grad_norm": 0.05238807573914528, "learning_rate": 0.01, "loss": 2.0976, "step": 13659 }, { "epoch": 1.4020935960591134, "grad_norm": 0.045423392206430435, "learning_rate": 0.01, "loss": 2.0729, "step": 13662 }, { "epoch": 1.4024014778325122, "grad_norm": 0.1077577993273735, "learning_rate": 0.01, "loss": 2.0481, "step": 13665 }, { "epoch": 1.4027093596059113, "grad_norm": 0.04370421916246414, "learning_rate": 0.01, "loss": 2.0634, "step": 13668 }, { "epoch": 1.4030172413793103, "grad_norm": 0.061496302485466, "learning_rate": 0.01, "loss": 2.0449, "step": 13671 }, { "epoch": 1.4033251231527093, "grad_norm": 0.048742834478616714, "learning_rate": 0.01, "loss": 2.0683, "step": 13674 }, { "epoch": 1.4036330049261083, "grad_norm": 0.14942513406276703, "learning_rate": 0.01, "loss": 2.0616, "step": 13677 }, { "epoch": 1.4039408866995073, "grad_norm": 0.04235846549272537, "learning_rate": 0.01, "loss": 2.0611, "step": 13680 }, { "epoch": 1.4042487684729064, "grad_norm": 0.05509978160262108, "learning_rate": 0.01, "loss": 2.0645, "step": 13683 }, { "epoch": 1.4045566502463054, "grad_norm": 0.09692233055830002, "learning_rate": 0.01, "loss": 2.0703, "step": 13686 }, { "epoch": 1.4048645320197044, "grad_norm": 0.11141908913850784, "learning_rate": 0.01, "loss": 2.0724, "step": 13689 }, { "epoch": 1.4051724137931034, "grad_norm": 0.06601562350988388, "learning_rate": 0.01, "loss": 2.0719, "step": 13692 }, { "epoch": 1.4054802955665024, "grad_norm": 0.04997260868549347, "learning_rate": 0.01, "loss": 2.056, "step": 13695 }, { "epoch": 1.4057881773399015, "grad_norm": 0.07198163866996765, "learning_rate": 0.01, "loss": 2.0546, "step": 13698 }, { "epoch": 1.4060960591133005, "grad_norm": 0.03802650794386864, "learning_rate": 0.01, "loss": 2.0696, "step": 13701 }, { "epoch": 1.4064039408866995, "grad_norm": 0.06259030848741531, "learning_rate": 0.01, "loss": 2.0459, "step": 13704 }, { "epoch": 1.4067118226600985, "grad_norm": 0.09554235637187958, "learning_rate": 0.01, "loss": 2.0476, "step": 13707 }, { "epoch": 1.4070197044334976, "grad_norm": 0.056935038417577744, "learning_rate": 0.01, "loss": 2.0522, "step": 13710 }, { "epoch": 1.4073275862068966, "grad_norm": 0.11038411408662796, "learning_rate": 0.01, "loss": 2.0567, "step": 13713 }, { "epoch": 1.4076354679802956, "grad_norm": 0.05257488042116165, "learning_rate": 0.01, "loss": 2.09, "step": 13716 }, { "epoch": 1.4079433497536946, "grad_norm": 0.0573866032063961, "learning_rate": 0.01, "loss": 2.0538, "step": 13719 }, { "epoch": 1.4082512315270936, "grad_norm": 0.04933631047606468, "learning_rate": 0.01, "loss": 2.0435, "step": 13722 }, { "epoch": 1.4085591133004927, "grad_norm": 0.05909980088472366, "learning_rate": 0.01, "loss": 2.0554, "step": 13725 }, { "epoch": 1.4088669950738917, "grad_norm": 0.09598751366138458, "learning_rate": 0.01, "loss": 2.0718, "step": 13728 }, { "epoch": 1.4091748768472907, "grad_norm": 0.05608231574296951, "learning_rate": 0.01, "loss": 2.0621, "step": 13731 }, { "epoch": 1.4094827586206897, "grad_norm": 0.08262834697961807, "learning_rate": 0.01, "loss": 2.0661, "step": 13734 }, { "epoch": 1.4097906403940887, "grad_norm": 0.041144959628582, "learning_rate": 0.01, "loss": 2.0646, "step": 13737 }, { "epoch": 1.4100985221674878, "grad_norm": 0.03748650476336479, "learning_rate": 0.01, "loss": 2.0558, "step": 13740 }, { "epoch": 1.4104064039408866, "grad_norm": 0.04054822400212288, "learning_rate": 0.01, "loss": 2.0564, "step": 13743 }, { "epoch": 1.4107142857142856, "grad_norm": 0.07961263507604599, "learning_rate": 0.01, "loss": 2.0513, "step": 13746 }, { "epoch": 1.4110221674876846, "grad_norm": 0.049971841275691986, "learning_rate": 0.01, "loss": 2.033, "step": 13749 }, { "epoch": 1.4113300492610836, "grad_norm": 0.040059734135866165, "learning_rate": 0.01, "loss": 2.0791, "step": 13752 }, { "epoch": 1.4116379310344827, "grad_norm": 0.0400179885327816, "learning_rate": 0.01, "loss": 2.0496, "step": 13755 }, { "epoch": 1.4119458128078817, "grad_norm": 0.04587862268090248, "learning_rate": 0.01, "loss": 2.0572, "step": 13758 }, { "epoch": 1.4122536945812807, "grad_norm": 0.08982817828655243, "learning_rate": 0.01, "loss": 2.0864, "step": 13761 }, { "epoch": 1.4125615763546797, "grad_norm": 0.05488836392760277, "learning_rate": 0.01, "loss": 2.0529, "step": 13764 }, { "epoch": 1.4128694581280787, "grad_norm": 0.06559593975543976, "learning_rate": 0.01, "loss": 2.0564, "step": 13767 }, { "epoch": 1.4131773399014778, "grad_norm": 0.10647718608379364, "learning_rate": 0.01, "loss": 2.0404, "step": 13770 }, { "epoch": 1.4134852216748768, "grad_norm": 0.05944173410534859, "learning_rate": 0.01, "loss": 2.053, "step": 13773 }, { "epoch": 1.4137931034482758, "grad_norm": 0.05548718199133873, "learning_rate": 0.01, "loss": 2.0534, "step": 13776 }, { "epoch": 1.4141009852216748, "grad_norm": 0.0694265142083168, "learning_rate": 0.01, "loss": 2.0647, "step": 13779 }, { "epoch": 1.4144088669950738, "grad_norm": 0.10526683181524277, "learning_rate": 0.01, "loss": 2.0768, "step": 13782 }, { "epoch": 1.4147167487684729, "grad_norm": 0.08820123970508575, "learning_rate": 0.01, "loss": 2.0693, "step": 13785 }, { "epoch": 1.4150246305418719, "grad_norm": 0.04513731971383095, "learning_rate": 0.01, "loss": 2.0596, "step": 13788 }, { "epoch": 1.415332512315271, "grad_norm": 0.05737076327204704, "learning_rate": 0.01, "loss": 2.0698, "step": 13791 }, { "epoch": 1.41564039408867, "grad_norm": 0.0431799478828907, "learning_rate": 0.01, "loss": 2.0603, "step": 13794 }, { "epoch": 1.415948275862069, "grad_norm": 0.09012471139431, "learning_rate": 0.01, "loss": 2.0634, "step": 13797 }, { "epoch": 1.416256157635468, "grad_norm": 0.05895904824137688, "learning_rate": 0.01, "loss": 2.0516, "step": 13800 }, { "epoch": 1.416564039408867, "grad_norm": 0.1610986888408661, "learning_rate": 0.01, "loss": 2.0743, "step": 13803 }, { "epoch": 1.416871921182266, "grad_norm": 0.07852904498577118, "learning_rate": 0.01, "loss": 2.064, "step": 13806 }, { "epoch": 1.417179802955665, "grad_norm": 0.06620481610298157, "learning_rate": 0.01, "loss": 2.0688, "step": 13809 }, { "epoch": 1.417487684729064, "grad_norm": 0.033222537487745285, "learning_rate": 0.01, "loss": 2.0702, "step": 13812 }, { "epoch": 1.417795566502463, "grad_norm": 0.02942623570561409, "learning_rate": 0.01, "loss": 2.0654, "step": 13815 }, { "epoch": 1.418103448275862, "grad_norm": 0.03543059900403023, "learning_rate": 0.01, "loss": 2.0776, "step": 13818 }, { "epoch": 1.4184113300492611, "grad_norm": 0.13414567708969116, "learning_rate": 0.01, "loss": 2.0621, "step": 13821 }, { "epoch": 1.4187192118226601, "grad_norm": 0.06474481523036957, "learning_rate": 0.01, "loss": 2.0675, "step": 13824 }, { "epoch": 1.4190270935960592, "grad_norm": 0.11285994201898575, "learning_rate": 0.01, "loss": 2.0533, "step": 13827 }, { "epoch": 1.4193349753694582, "grad_norm": 0.05104577913880348, "learning_rate": 0.01, "loss": 2.0835, "step": 13830 }, { "epoch": 1.4196428571428572, "grad_norm": 0.05463656783103943, "learning_rate": 0.01, "loss": 2.0788, "step": 13833 }, { "epoch": 1.4199507389162562, "grad_norm": 0.07886187732219696, "learning_rate": 0.01, "loss": 2.0538, "step": 13836 }, { "epoch": 1.4202586206896552, "grad_norm": 0.06960279494524002, "learning_rate": 0.01, "loss": 2.0927, "step": 13839 }, { "epoch": 1.4205665024630543, "grad_norm": 0.07481426745653152, "learning_rate": 0.01, "loss": 2.0421, "step": 13842 }, { "epoch": 1.4208743842364533, "grad_norm": 0.04317006468772888, "learning_rate": 0.01, "loss": 2.0665, "step": 13845 }, { "epoch": 1.4211822660098523, "grad_norm": 0.10644064098596573, "learning_rate": 0.01, "loss": 2.0434, "step": 13848 }, { "epoch": 1.4214901477832513, "grad_norm": 0.09246213734149933, "learning_rate": 0.01, "loss": 2.0487, "step": 13851 }, { "epoch": 1.4217980295566504, "grad_norm": 0.05824518948793411, "learning_rate": 0.01, "loss": 2.0633, "step": 13854 }, { "epoch": 1.4221059113300494, "grad_norm": 0.06316854059696198, "learning_rate": 0.01, "loss": 2.076, "step": 13857 }, { "epoch": 1.4224137931034484, "grad_norm": 0.058339640498161316, "learning_rate": 0.01, "loss": 2.0702, "step": 13860 }, { "epoch": 1.4227216748768474, "grad_norm": 0.05458427220582962, "learning_rate": 0.01, "loss": 2.0585, "step": 13863 }, { "epoch": 1.4230295566502464, "grad_norm": 0.041047900915145874, "learning_rate": 0.01, "loss": 2.0793, "step": 13866 }, { "epoch": 1.4233374384236452, "grad_norm": 0.04485390707850456, "learning_rate": 0.01, "loss": 2.0723, "step": 13869 }, { "epoch": 1.4236453201970443, "grad_norm": 0.13340137898921967, "learning_rate": 0.01, "loss": 2.0555, "step": 13872 }, { "epoch": 1.4239532019704433, "grad_norm": 0.05519254505634308, "learning_rate": 0.01, "loss": 2.0931, "step": 13875 }, { "epoch": 1.4242610837438423, "grad_norm": 0.07989728450775146, "learning_rate": 0.01, "loss": 2.0435, "step": 13878 }, { "epoch": 1.4245689655172413, "grad_norm": 0.09447802603244781, "learning_rate": 0.01, "loss": 2.061, "step": 13881 }, { "epoch": 1.4248768472906403, "grad_norm": 0.05240226909518242, "learning_rate": 0.01, "loss": 2.0643, "step": 13884 }, { "epoch": 1.4251847290640394, "grad_norm": 0.07171013206243515, "learning_rate": 0.01, "loss": 2.0829, "step": 13887 }, { "epoch": 1.4254926108374384, "grad_norm": 0.05098895728588104, "learning_rate": 0.01, "loss": 2.0716, "step": 13890 }, { "epoch": 1.4258004926108374, "grad_norm": 0.08569507300853729, "learning_rate": 0.01, "loss": 2.0699, "step": 13893 }, { "epoch": 1.4261083743842364, "grad_norm": 0.09055166691541672, "learning_rate": 0.01, "loss": 2.0843, "step": 13896 }, { "epoch": 1.4264162561576355, "grad_norm": 0.03242780640721321, "learning_rate": 0.01, "loss": 2.0433, "step": 13899 }, { "epoch": 1.4267241379310345, "grad_norm": 0.04612202197313309, "learning_rate": 0.01, "loss": 2.0327, "step": 13902 }, { "epoch": 1.4270320197044335, "grad_norm": 0.05800663307309151, "learning_rate": 0.01, "loss": 2.0362, "step": 13905 }, { "epoch": 1.4273399014778325, "grad_norm": 0.04150572046637535, "learning_rate": 0.01, "loss": 2.0504, "step": 13908 }, { "epoch": 1.4276477832512315, "grad_norm": 0.08542584627866745, "learning_rate": 0.01, "loss": 2.0406, "step": 13911 }, { "epoch": 1.4279556650246306, "grad_norm": 0.11966803669929504, "learning_rate": 0.01, "loss": 2.0829, "step": 13914 }, { "epoch": 1.4282635467980296, "grad_norm": 0.12066449970006943, "learning_rate": 0.01, "loss": 2.0657, "step": 13917 }, { "epoch": 1.4285714285714286, "grad_norm": 0.04103751480579376, "learning_rate": 0.01, "loss": 2.0496, "step": 13920 }, { "epoch": 1.4288793103448276, "grad_norm": 0.05432034656405449, "learning_rate": 0.01, "loss": 2.0563, "step": 13923 }, { "epoch": 1.4291871921182266, "grad_norm": 0.03935731574892998, "learning_rate": 0.01, "loss": 2.0463, "step": 13926 }, { "epoch": 1.4294950738916257, "grad_norm": 0.1475706547498703, "learning_rate": 0.01, "loss": 2.0641, "step": 13929 }, { "epoch": 1.4298029556650247, "grad_norm": 0.06562622636556625, "learning_rate": 0.01, "loss": 2.0536, "step": 13932 }, { "epoch": 1.4301108374384237, "grad_norm": 0.051726315170526505, "learning_rate": 0.01, "loss": 2.0506, "step": 13935 }, { "epoch": 1.4304187192118227, "grad_norm": 0.0998329371213913, "learning_rate": 0.01, "loss": 2.0521, "step": 13938 }, { "epoch": 1.4307266009852218, "grad_norm": 0.04965333640575409, "learning_rate": 0.01, "loss": 2.0568, "step": 13941 }, { "epoch": 1.4310344827586206, "grad_norm": 0.04430006071925163, "learning_rate": 0.01, "loss": 2.0735, "step": 13944 }, { "epoch": 1.4313423645320196, "grad_norm": 0.05260150134563446, "learning_rate": 0.01, "loss": 2.0887, "step": 13947 }, { "epoch": 1.4316502463054186, "grad_norm": 0.04135138541460037, "learning_rate": 0.01, "loss": 2.055, "step": 13950 }, { "epoch": 1.4319581280788176, "grad_norm": 0.08347123116254807, "learning_rate": 0.01, "loss": 2.0826, "step": 13953 }, { "epoch": 1.4322660098522166, "grad_norm": 0.12328385561704636, "learning_rate": 0.01, "loss": 2.0787, "step": 13956 }, { "epoch": 1.4325738916256157, "grad_norm": 0.05809056758880615, "learning_rate": 0.01, "loss": 2.0798, "step": 13959 }, { "epoch": 1.4328817733990147, "grad_norm": 0.038590408861637115, "learning_rate": 0.01, "loss": 2.0699, "step": 13962 }, { "epoch": 1.4331896551724137, "grad_norm": 0.11158851534128189, "learning_rate": 0.01, "loss": 2.0707, "step": 13965 }, { "epoch": 1.4334975369458127, "grad_norm": 0.0880589634180069, "learning_rate": 0.01, "loss": 2.0425, "step": 13968 }, { "epoch": 1.4338054187192117, "grad_norm": 0.059966232627630234, "learning_rate": 0.01, "loss": 2.0659, "step": 13971 }, { "epoch": 1.4341133004926108, "grad_norm": 0.04661833122372627, "learning_rate": 0.01, "loss": 2.0736, "step": 13974 }, { "epoch": 1.4344211822660098, "grad_norm": 0.04508896544575691, "learning_rate": 0.01, "loss": 2.0856, "step": 13977 }, { "epoch": 1.4347290640394088, "grad_norm": 0.051987554877996445, "learning_rate": 0.01, "loss": 2.0449, "step": 13980 }, { "epoch": 1.4350369458128078, "grad_norm": 0.04814029112458229, "learning_rate": 0.01, "loss": 2.0587, "step": 13983 }, { "epoch": 1.4353448275862069, "grad_norm": 0.09631717205047607, "learning_rate": 0.01, "loss": 2.0747, "step": 13986 }, { "epoch": 1.4356527093596059, "grad_norm": 0.06581971794366837, "learning_rate": 0.01, "loss": 2.0456, "step": 13989 }, { "epoch": 1.435960591133005, "grad_norm": 0.06483247131109238, "learning_rate": 0.01, "loss": 2.0627, "step": 13992 }, { "epoch": 1.436268472906404, "grad_norm": 0.1000155657529831, "learning_rate": 0.01, "loss": 2.0655, "step": 13995 }, { "epoch": 1.436576354679803, "grad_norm": 0.07297492027282715, "learning_rate": 0.01, "loss": 2.0686, "step": 13998 }, { "epoch": 1.436884236453202, "grad_norm": 0.054907578974962234, "learning_rate": 0.01, "loss": 2.0351, "step": 14001 }, { "epoch": 1.437192118226601, "grad_norm": 0.051127828657627106, "learning_rate": 0.01, "loss": 2.0583, "step": 14004 }, { "epoch": 1.4375, "grad_norm": 0.04157300665974617, "learning_rate": 0.01, "loss": 2.0548, "step": 14007 }, { "epoch": 1.437807881773399, "grad_norm": 0.07996746897697449, "learning_rate": 0.01, "loss": 2.0625, "step": 14010 }, { "epoch": 1.438115763546798, "grad_norm": 0.0764036774635315, "learning_rate": 0.01, "loss": 2.076, "step": 14013 }, { "epoch": 1.438423645320197, "grad_norm": 0.10736891627311707, "learning_rate": 0.01, "loss": 2.0682, "step": 14016 }, { "epoch": 1.438731527093596, "grad_norm": 0.0598980113863945, "learning_rate": 0.01, "loss": 2.0774, "step": 14019 }, { "epoch": 1.439039408866995, "grad_norm": 0.10858605802059174, "learning_rate": 0.01, "loss": 2.0708, "step": 14022 }, { "epoch": 1.4393472906403941, "grad_norm": 0.03999786823987961, "learning_rate": 0.01, "loss": 2.0521, "step": 14025 }, { "epoch": 1.4396551724137931, "grad_norm": 0.053138673305511475, "learning_rate": 0.01, "loss": 2.0547, "step": 14028 }, { "epoch": 1.4399630541871922, "grad_norm": 0.06477091461420059, "learning_rate": 0.01, "loss": 2.0371, "step": 14031 }, { "epoch": 1.4402709359605912, "grad_norm": 0.035987384617328644, "learning_rate": 0.01, "loss": 2.0897, "step": 14034 }, { "epoch": 1.4405788177339902, "grad_norm": 0.06938667595386505, "learning_rate": 0.01, "loss": 2.0895, "step": 14037 }, { "epoch": 1.4408866995073892, "grad_norm": 0.041746124625205994, "learning_rate": 0.01, "loss": 2.0705, "step": 14040 }, { "epoch": 1.4411945812807883, "grad_norm": 0.04503123462200165, "learning_rate": 0.01, "loss": 2.0528, "step": 14043 }, { "epoch": 1.4415024630541873, "grad_norm": 0.04153100401163101, "learning_rate": 0.01, "loss": 2.0596, "step": 14046 }, { "epoch": 1.4418103448275863, "grad_norm": 0.04992615804076195, "learning_rate": 0.01, "loss": 2.0527, "step": 14049 }, { "epoch": 1.4421182266009853, "grad_norm": 0.0718725174665451, "learning_rate": 0.01, "loss": 2.049, "step": 14052 }, { "epoch": 1.4424261083743843, "grad_norm": 0.13080431520938873, "learning_rate": 0.01, "loss": 2.0457, "step": 14055 }, { "epoch": 1.4427339901477834, "grad_norm": 0.04203762486577034, "learning_rate": 0.01, "loss": 2.0713, "step": 14058 }, { "epoch": 1.4430418719211824, "grad_norm": 0.04111120104789734, "learning_rate": 0.01, "loss": 2.0845, "step": 14061 }, { "epoch": 1.4433497536945814, "grad_norm": 0.044398125261068344, "learning_rate": 0.01, "loss": 2.0679, "step": 14064 }, { "epoch": 1.4436576354679804, "grad_norm": 0.031682152301073074, "learning_rate": 0.01, "loss": 2.0855, "step": 14067 }, { "epoch": 1.4439655172413794, "grad_norm": 0.07848865538835526, "learning_rate": 0.01, "loss": 2.0607, "step": 14070 }, { "epoch": 1.4442733990147782, "grad_norm": 0.08814079314470291, "learning_rate": 0.01, "loss": 2.0468, "step": 14073 }, { "epoch": 1.4445812807881773, "grad_norm": 0.05344429612159729, "learning_rate": 0.01, "loss": 2.0499, "step": 14076 }, { "epoch": 1.4448891625615763, "grad_norm": 0.05509471520781517, "learning_rate": 0.01, "loss": 2.0662, "step": 14079 }, { "epoch": 1.4451970443349753, "grad_norm": 0.08177798241376877, "learning_rate": 0.01, "loss": 2.0509, "step": 14082 }, { "epoch": 1.4455049261083743, "grad_norm": 0.07953787595033646, "learning_rate": 0.01, "loss": 2.0501, "step": 14085 }, { "epoch": 1.4458128078817734, "grad_norm": 0.06984551250934601, "learning_rate": 0.01, "loss": 2.0599, "step": 14088 }, { "epoch": 1.4461206896551724, "grad_norm": 0.07923319190740585, "learning_rate": 0.01, "loss": 2.06, "step": 14091 }, { "epoch": 1.4464285714285714, "grad_norm": 0.04370349645614624, "learning_rate": 0.01, "loss": 2.0839, "step": 14094 }, { "epoch": 1.4467364532019704, "grad_norm": 0.045787643641233444, "learning_rate": 0.01, "loss": 2.0512, "step": 14097 }, { "epoch": 1.4470443349753694, "grad_norm": 0.04126288741827011, "learning_rate": 0.01, "loss": 2.0603, "step": 14100 }, { "epoch": 1.4473522167487685, "grad_norm": 0.039805784821510315, "learning_rate": 0.01, "loss": 2.0561, "step": 14103 }, { "epoch": 1.4476600985221675, "grad_norm": 0.038430992513895035, "learning_rate": 0.01, "loss": 2.0697, "step": 14106 }, { "epoch": 1.4479679802955665, "grad_norm": 0.07664498686790466, "learning_rate": 0.01, "loss": 2.0381, "step": 14109 }, { "epoch": 1.4482758620689655, "grad_norm": 0.04592788219451904, "learning_rate": 0.01, "loss": 2.042, "step": 14112 }, { "epoch": 1.4485837438423645, "grad_norm": 0.06161922961473465, "learning_rate": 0.01, "loss": 2.0446, "step": 14115 }, { "epoch": 1.4488916256157636, "grad_norm": 0.07906373590230942, "learning_rate": 0.01, "loss": 2.0758, "step": 14118 }, { "epoch": 1.4491995073891626, "grad_norm": 0.09529503434896469, "learning_rate": 0.01, "loss": 2.0512, "step": 14121 }, { "epoch": 1.4495073891625616, "grad_norm": 0.05416659638285637, "learning_rate": 0.01, "loss": 2.0484, "step": 14124 }, { "epoch": 1.4498152709359606, "grad_norm": 0.07085006684064865, "learning_rate": 0.01, "loss": 2.0293, "step": 14127 }, { "epoch": 1.4501231527093597, "grad_norm": 0.07722880691289902, "learning_rate": 0.01, "loss": 2.0578, "step": 14130 }, { "epoch": 1.4504310344827587, "grad_norm": 0.06599342823028564, "learning_rate": 0.01, "loss": 2.0556, "step": 14133 }, { "epoch": 1.4507389162561577, "grad_norm": 0.11217498779296875, "learning_rate": 0.01, "loss": 2.068, "step": 14136 }, { "epoch": 1.4510467980295567, "grad_norm": 0.13082845509052277, "learning_rate": 0.01, "loss": 2.0677, "step": 14139 }, { "epoch": 1.4513546798029557, "grad_norm": 0.06812801957130432, "learning_rate": 0.01, "loss": 2.0668, "step": 14142 }, { "epoch": 1.4516625615763548, "grad_norm": 0.043554674834012985, "learning_rate": 0.01, "loss": 2.0537, "step": 14145 }, { "epoch": 1.4519704433497536, "grad_norm": 0.04550860822200775, "learning_rate": 0.01, "loss": 2.0415, "step": 14148 }, { "epoch": 1.4522783251231526, "grad_norm": 0.04838492348790169, "learning_rate": 0.01, "loss": 2.0642, "step": 14151 }, { "epoch": 1.4525862068965516, "grad_norm": 0.08433537930250168, "learning_rate": 0.01, "loss": 2.0549, "step": 14154 }, { "epoch": 1.4528940886699506, "grad_norm": 0.06986009329557419, "learning_rate": 0.01, "loss": 2.057, "step": 14157 }, { "epoch": 1.4532019704433496, "grad_norm": 0.06860263645648956, "learning_rate": 0.01, "loss": 2.0581, "step": 14160 }, { "epoch": 1.4535098522167487, "grad_norm": 0.06218327581882477, "learning_rate": 0.01, "loss": 2.0482, "step": 14163 }, { "epoch": 1.4538177339901477, "grad_norm": 0.10177832096815109, "learning_rate": 0.01, "loss": 2.059, "step": 14166 }, { "epoch": 1.4541256157635467, "grad_norm": 0.047695957124233246, "learning_rate": 0.01, "loss": 2.0372, "step": 14169 }, { "epoch": 1.4544334975369457, "grad_norm": 0.09761510044336319, "learning_rate": 0.01, "loss": 2.0671, "step": 14172 }, { "epoch": 1.4547413793103448, "grad_norm": 0.050296783447265625, "learning_rate": 0.01, "loss": 2.0536, "step": 14175 }, { "epoch": 1.4550492610837438, "grad_norm": 0.13070064783096313, "learning_rate": 0.01, "loss": 2.0579, "step": 14178 }, { "epoch": 1.4553571428571428, "grad_norm": 0.1080620214343071, "learning_rate": 0.01, "loss": 2.0455, "step": 14181 }, { "epoch": 1.4556650246305418, "grad_norm": 0.06132792308926582, "learning_rate": 0.01, "loss": 2.062, "step": 14184 }, { "epoch": 1.4559729064039408, "grad_norm": 0.07258635014295578, "learning_rate": 0.01, "loss": 2.0666, "step": 14187 }, { "epoch": 1.4562807881773399, "grad_norm": 0.05423443764448166, "learning_rate": 0.01, "loss": 2.0676, "step": 14190 }, { "epoch": 1.4565886699507389, "grad_norm": 0.06038088724017143, "learning_rate": 0.01, "loss": 2.0539, "step": 14193 }, { "epoch": 1.456896551724138, "grad_norm": 0.043958742171525955, "learning_rate": 0.01, "loss": 2.0739, "step": 14196 }, { "epoch": 1.457204433497537, "grad_norm": 0.03985238075256348, "learning_rate": 0.01, "loss": 2.0323, "step": 14199 }, { "epoch": 1.457512315270936, "grad_norm": 0.05626663193106651, "learning_rate": 0.01, "loss": 2.0518, "step": 14202 }, { "epoch": 1.457820197044335, "grad_norm": 0.06365952640771866, "learning_rate": 0.01, "loss": 2.042, "step": 14205 }, { "epoch": 1.458128078817734, "grad_norm": 0.0698857232928276, "learning_rate": 0.01, "loss": 2.0676, "step": 14208 }, { "epoch": 1.458435960591133, "grad_norm": 0.08149702101945877, "learning_rate": 0.01, "loss": 2.0659, "step": 14211 }, { "epoch": 1.458743842364532, "grad_norm": 0.055818330496549606, "learning_rate": 0.01, "loss": 2.0633, "step": 14214 }, { "epoch": 1.459051724137931, "grad_norm": 0.046251330524683, "learning_rate": 0.01, "loss": 2.0598, "step": 14217 }, { "epoch": 1.45935960591133, "grad_norm": 0.10986322909593582, "learning_rate": 0.01, "loss": 2.0768, "step": 14220 }, { "epoch": 1.459667487684729, "grad_norm": 0.06735626608133316, "learning_rate": 0.01, "loss": 2.0709, "step": 14223 }, { "epoch": 1.4599753694581281, "grad_norm": 0.1051633432507515, "learning_rate": 0.01, "loss": 2.0807, "step": 14226 }, { "epoch": 1.4602832512315271, "grad_norm": 0.0956743136048317, "learning_rate": 0.01, "loss": 2.0377, "step": 14229 }, { "epoch": 1.4605911330049262, "grad_norm": 0.04349840059876442, "learning_rate": 0.01, "loss": 2.0723, "step": 14232 }, { "epoch": 1.4608990147783252, "grad_norm": 0.0388668030500412, "learning_rate": 0.01, "loss": 2.0612, "step": 14235 }, { "epoch": 1.4612068965517242, "grad_norm": 0.04311763867735863, "learning_rate": 0.01, "loss": 2.0553, "step": 14238 }, { "epoch": 1.4615147783251232, "grad_norm": 0.09116464853286743, "learning_rate": 0.01, "loss": 2.0537, "step": 14241 }, { "epoch": 1.4618226600985222, "grad_norm": 0.08582088351249695, "learning_rate": 0.01, "loss": 2.0588, "step": 14244 }, { "epoch": 1.4621305418719213, "grad_norm": 0.0435602031648159, "learning_rate": 0.01, "loss": 2.0303, "step": 14247 }, { "epoch": 1.4624384236453203, "grad_norm": 0.067762091755867, "learning_rate": 0.01, "loss": 2.0453, "step": 14250 }, { "epoch": 1.4627463054187193, "grad_norm": 0.03980677202343941, "learning_rate": 0.01, "loss": 2.0488, "step": 14253 }, { "epoch": 1.4630541871921183, "grad_norm": 0.08521614968776703, "learning_rate": 0.01, "loss": 2.0617, "step": 14256 }, { "epoch": 1.4633620689655173, "grad_norm": 0.06770948320627213, "learning_rate": 0.01, "loss": 2.0535, "step": 14259 }, { "epoch": 1.4636699507389164, "grad_norm": 0.053458839654922485, "learning_rate": 0.01, "loss": 2.0594, "step": 14262 }, { "epoch": 1.4639778325123154, "grad_norm": 0.06733859330415726, "learning_rate": 0.01, "loss": 2.0422, "step": 14265 }, { "epoch": 1.4642857142857144, "grad_norm": 0.08033892512321472, "learning_rate": 0.01, "loss": 2.0685, "step": 14268 }, { "epoch": 1.4645935960591134, "grad_norm": 0.0832366794347763, "learning_rate": 0.01, "loss": 2.0867, "step": 14271 }, { "epoch": 1.4649014778325122, "grad_norm": 0.055291153490543365, "learning_rate": 0.01, "loss": 2.0618, "step": 14274 }, { "epoch": 1.4652093596059113, "grad_norm": 0.07180801033973694, "learning_rate": 0.01, "loss": 2.0569, "step": 14277 }, { "epoch": 1.4655172413793103, "grad_norm": 0.048950713127851486, "learning_rate": 0.01, "loss": 2.0433, "step": 14280 }, { "epoch": 1.4658251231527093, "grad_norm": 0.05428025498986244, "learning_rate": 0.01, "loss": 2.052, "step": 14283 }, { "epoch": 1.4661330049261083, "grad_norm": 0.06631309539079666, "learning_rate": 0.01, "loss": 2.0615, "step": 14286 }, { "epoch": 1.4664408866995073, "grad_norm": 0.06743253022432327, "learning_rate": 0.01, "loss": 2.0524, "step": 14289 }, { "epoch": 1.4667487684729064, "grad_norm": 0.10901882499456406, "learning_rate": 0.01, "loss": 2.076, "step": 14292 }, { "epoch": 1.4670566502463054, "grad_norm": 0.08234187960624695, "learning_rate": 0.01, "loss": 2.0432, "step": 14295 }, { "epoch": 1.4673645320197044, "grad_norm": 0.07249965518712997, "learning_rate": 0.01, "loss": 2.057, "step": 14298 }, { "epoch": 1.4676724137931034, "grad_norm": 0.0705137550830841, "learning_rate": 0.01, "loss": 2.0616, "step": 14301 }, { "epoch": 1.4679802955665024, "grad_norm": 0.10730472952127457, "learning_rate": 0.01, "loss": 2.0575, "step": 14304 }, { "epoch": 1.4682881773399015, "grad_norm": 0.048364557325839996, "learning_rate": 0.01, "loss": 2.0719, "step": 14307 }, { "epoch": 1.4685960591133005, "grad_norm": 0.03604978322982788, "learning_rate": 0.01, "loss": 2.0608, "step": 14310 }, { "epoch": 1.4689039408866995, "grad_norm": 0.09732489287853241, "learning_rate": 0.01, "loss": 2.0627, "step": 14313 }, { "epoch": 1.4692118226600985, "grad_norm": 0.06590714305639267, "learning_rate": 0.01, "loss": 2.0775, "step": 14316 }, { "epoch": 1.4695197044334976, "grad_norm": 0.075086310505867, "learning_rate": 0.01, "loss": 2.0633, "step": 14319 }, { "epoch": 1.4698275862068966, "grad_norm": 0.10288450121879578, "learning_rate": 0.01, "loss": 2.0607, "step": 14322 }, { "epoch": 1.4701354679802956, "grad_norm": 0.0535271093249321, "learning_rate": 0.01, "loss": 2.0702, "step": 14325 }, { "epoch": 1.4704433497536946, "grad_norm": 0.04609391465783119, "learning_rate": 0.01, "loss": 2.049, "step": 14328 }, { "epoch": 1.4707512315270936, "grad_norm": 0.044252909719944, "learning_rate": 0.01, "loss": 2.0586, "step": 14331 }, { "epoch": 1.4710591133004927, "grad_norm": 0.07837995141744614, "learning_rate": 0.01, "loss": 2.0541, "step": 14334 }, { "epoch": 1.4713669950738917, "grad_norm": 0.06548511236906052, "learning_rate": 0.01, "loss": 2.0575, "step": 14337 }, { "epoch": 1.4716748768472907, "grad_norm": 0.09237763285636902, "learning_rate": 0.01, "loss": 2.0607, "step": 14340 }, { "epoch": 1.4719827586206897, "grad_norm": 0.04163452237844467, "learning_rate": 0.01, "loss": 2.0604, "step": 14343 }, { "epoch": 1.4722906403940887, "grad_norm": 0.13814504444599152, "learning_rate": 0.01, "loss": 2.0667, "step": 14346 }, { "epoch": 1.4725985221674878, "grad_norm": 0.054490260779857635, "learning_rate": 0.01, "loss": 2.0754, "step": 14349 }, { "epoch": 1.4729064039408866, "grad_norm": 0.07470995932817459, "learning_rate": 0.01, "loss": 2.0731, "step": 14352 }, { "epoch": 1.4732142857142856, "grad_norm": 0.12089511752128601, "learning_rate": 0.01, "loss": 2.0671, "step": 14355 }, { "epoch": 1.4735221674876846, "grad_norm": 0.05507595092058182, "learning_rate": 0.01, "loss": 2.0278, "step": 14358 }, { "epoch": 1.4738300492610836, "grad_norm": 0.05130109563469887, "learning_rate": 0.01, "loss": 2.0491, "step": 14361 }, { "epoch": 1.4741379310344827, "grad_norm": 0.04399503022432327, "learning_rate": 0.01, "loss": 2.0669, "step": 14364 }, { "epoch": 1.4744458128078817, "grad_norm": 0.06161755695939064, "learning_rate": 0.01, "loss": 2.0458, "step": 14367 }, { "epoch": 1.4747536945812807, "grad_norm": 0.045603156089782715, "learning_rate": 0.01, "loss": 2.0477, "step": 14370 }, { "epoch": 1.4750615763546797, "grad_norm": 0.06444186717271805, "learning_rate": 0.01, "loss": 2.0514, "step": 14373 }, { "epoch": 1.4753694581280787, "grad_norm": 0.07450753450393677, "learning_rate": 0.01, "loss": 2.0327, "step": 14376 }, { "epoch": 1.4756773399014778, "grad_norm": 0.10367168486118317, "learning_rate": 0.01, "loss": 2.0453, "step": 14379 }, { "epoch": 1.4759852216748768, "grad_norm": 0.08999089151620865, "learning_rate": 0.01, "loss": 2.0588, "step": 14382 }, { "epoch": 1.4762931034482758, "grad_norm": 0.09803617745637894, "learning_rate": 0.01, "loss": 2.0972, "step": 14385 }, { "epoch": 1.4766009852216748, "grad_norm": 0.042447153478860855, "learning_rate": 0.01, "loss": 2.0295, "step": 14388 }, { "epoch": 1.4769088669950738, "grad_norm": 0.04479740187525749, "learning_rate": 0.01, "loss": 2.0545, "step": 14391 }, { "epoch": 1.4772167487684729, "grad_norm": 0.04113270714879036, "learning_rate": 0.01, "loss": 2.0522, "step": 14394 }, { "epoch": 1.4775246305418719, "grad_norm": 0.1087644025683403, "learning_rate": 0.01, "loss": 2.0668, "step": 14397 }, { "epoch": 1.477832512315271, "grad_norm": 0.05737099424004555, "learning_rate": 0.01, "loss": 2.0509, "step": 14400 }, { "epoch": 1.47814039408867, "grad_norm": 0.11025606095790863, "learning_rate": 0.01, "loss": 2.067, "step": 14403 }, { "epoch": 1.478448275862069, "grad_norm": 0.06662195175886154, "learning_rate": 0.01, "loss": 2.0554, "step": 14406 }, { "epoch": 1.478756157635468, "grad_norm": 0.05261904001235962, "learning_rate": 0.01, "loss": 2.0562, "step": 14409 }, { "epoch": 1.479064039408867, "grad_norm": 0.048272691667079926, "learning_rate": 0.01, "loss": 2.0858, "step": 14412 }, { "epoch": 1.479371921182266, "grad_norm": 0.048300545662641525, "learning_rate": 0.01, "loss": 2.0736, "step": 14415 }, { "epoch": 1.479679802955665, "grad_norm": 0.08697368204593658, "learning_rate": 0.01, "loss": 2.0425, "step": 14418 }, { "epoch": 1.479987684729064, "grad_norm": 0.07993713021278381, "learning_rate": 0.01, "loss": 2.0511, "step": 14421 }, { "epoch": 1.480295566502463, "grad_norm": 0.10037390887737274, "learning_rate": 0.01, "loss": 2.0584, "step": 14424 }, { "epoch": 1.480603448275862, "grad_norm": 0.06048484891653061, "learning_rate": 0.01, "loss": 2.0589, "step": 14427 }, { "epoch": 1.4809113300492611, "grad_norm": 0.08982612937688828, "learning_rate": 0.01, "loss": 2.0575, "step": 14430 }, { "epoch": 1.4812192118226601, "grad_norm": 0.06678975373506546, "learning_rate": 0.01, "loss": 2.0544, "step": 14433 }, { "epoch": 1.4815270935960592, "grad_norm": 0.07890944927930832, "learning_rate": 0.01, "loss": 2.0352, "step": 14436 }, { "epoch": 1.4818349753694582, "grad_norm": 0.05838685482740402, "learning_rate": 0.01, "loss": 2.0481, "step": 14439 }, { "epoch": 1.4821428571428572, "grad_norm": 0.06483394652605057, "learning_rate": 0.01, "loss": 2.0425, "step": 14442 }, { "epoch": 1.4824507389162562, "grad_norm": 0.07320713996887207, "learning_rate": 0.01, "loss": 2.0524, "step": 14445 }, { "epoch": 1.4827586206896552, "grad_norm": 0.07484092563390732, "learning_rate": 0.01, "loss": 2.0709, "step": 14448 }, { "epoch": 1.4830665024630543, "grad_norm": 0.07702804356813431, "learning_rate": 0.01, "loss": 2.0483, "step": 14451 }, { "epoch": 1.4833743842364533, "grad_norm": 0.05470692366361618, "learning_rate": 0.01, "loss": 2.0767, "step": 14454 }, { "epoch": 1.4836822660098523, "grad_norm": 0.055773910135030746, "learning_rate": 0.01, "loss": 2.0692, "step": 14457 }, { "epoch": 1.4839901477832513, "grad_norm": 0.03712743893265724, "learning_rate": 0.01, "loss": 2.0723, "step": 14460 }, { "epoch": 1.4842980295566504, "grad_norm": 0.035963475704193115, "learning_rate": 0.01, "loss": 2.0526, "step": 14463 }, { "epoch": 1.4846059113300494, "grad_norm": 0.08578921854496002, "learning_rate": 0.01, "loss": 2.0631, "step": 14466 }, { "epoch": 1.4849137931034484, "grad_norm": 0.08239159733057022, "learning_rate": 0.01, "loss": 2.0654, "step": 14469 }, { "epoch": 1.4852216748768474, "grad_norm": 0.05097891017794609, "learning_rate": 0.01, "loss": 2.0837, "step": 14472 }, { "epoch": 1.4855295566502464, "grad_norm": 0.051847904920578, "learning_rate": 0.01, "loss": 2.059, "step": 14475 }, { "epoch": 1.4858374384236452, "grad_norm": 0.04754810780286789, "learning_rate": 0.01, "loss": 2.0658, "step": 14478 }, { "epoch": 1.4861453201970443, "grad_norm": 0.046647075563669205, "learning_rate": 0.01, "loss": 2.0423, "step": 14481 }, { "epoch": 1.4864532019704433, "grad_norm": 0.06013277545571327, "learning_rate": 0.01, "loss": 2.0254, "step": 14484 }, { "epoch": 1.4867610837438423, "grad_norm": 0.13433513045310974, "learning_rate": 0.01, "loss": 2.0775, "step": 14487 }, { "epoch": 1.4870689655172413, "grad_norm": 0.046518564224243164, "learning_rate": 0.01, "loss": 2.0434, "step": 14490 }, { "epoch": 1.4873768472906403, "grad_norm": 0.09483514726161957, "learning_rate": 0.01, "loss": 2.0839, "step": 14493 }, { "epoch": 1.4876847290640394, "grad_norm": 0.07147302478551865, "learning_rate": 0.01, "loss": 2.0741, "step": 14496 }, { "epoch": 1.4879926108374384, "grad_norm": 0.12423846870660782, "learning_rate": 0.01, "loss": 2.045, "step": 14499 }, { "epoch": 1.4883004926108374, "grad_norm": 0.07726770639419556, "learning_rate": 0.01, "loss": 2.0578, "step": 14502 }, { "epoch": 1.4886083743842364, "grad_norm": 0.059802982956171036, "learning_rate": 0.01, "loss": 2.0526, "step": 14505 }, { "epoch": 1.4889162561576355, "grad_norm": 0.050745993852615356, "learning_rate": 0.01, "loss": 2.049, "step": 14508 }, { "epoch": 1.4892241379310345, "grad_norm": 0.052064161747694016, "learning_rate": 0.01, "loss": 2.0575, "step": 14511 }, { "epoch": 1.4895320197044335, "grad_norm": 0.06646674871444702, "learning_rate": 0.01, "loss": 2.0689, "step": 14514 }, { "epoch": 1.4898399014778325, "grad_norm": 0.043484605848789215, "learning_rate": 0.01, "loss": 2.0655, "step": 14517 }, { "epoch": 1.4901477832512315, "grad_norm": 0.14452145993709564, "learning_rate": 0.01, "loss": 2.0599, "step": 14520 }, { "epoch": 1.4904556650246306, "grad_norm": 0.08289093524217606, "learning_rate": 0.01, "loss": 2.0654, "step": 14523 }, { "epoch": 1.4907635467980296, "grad_norm": 0.05047908052802086, "learning_rate": 0.01, "loss": 2.0409, "step": 14526 }, { "epoch": 1.4910714285714286, "grad_norm": 0.04830252006649971, "learning_rate": 0.01, "loss": 2.0529, "step": 14529 }, { "epoch": 1.4913793103448276, "grad_norm": 0.0430610254406929, "learning_rate": 0.01, "loss": 2.0242, "step": 14532 }, { "epoch": 1.4916871921182266, "grad_norm": 0.04282008111476898, "learning_rate": 0.01, "loss": 2.0494, "step": 14535 }, { "epoch": 1.4919950738916257, "grad_norm": 0.037373676896095276, "learning_rate": 0.01, "loss": 2.0528, "step": 14538 }, { "epoch": 1.4923029556650247, "grad_norm": 0.04186755418777466, "learning_rate": 0.01, "loss": 2.058, "step": 14541 }, { "epoch": 1.4926108374384237, "grad_norm": 0.05514196678996086, "learning_rate": 0.01, "loss": 2.0647, "step": 14544 }, { "epoch": 1.4929187192118227, "grad_norm": 0.07391703873872757, "learning_rate": 0.01, "loss": 2.0812, "step": 14547 }, { "epoch": 1.4932266009852218, "grad_norm": 0.1295444518327713, "learning_rate": 0.01, "loss": 2.0571, "step": 14550 }, { "epoch": 1.4935344827586206, "grad_norm": 0.06389490514993668, "learning_rate": 0.01, "loss": 2.0756, "step": 14553 }, { "epoch": 1.4938423645320196, "grad_norm": 0.09335155785083771, "learning_rate": 0.01, "loss": 2.0904, "step": 14556 }, { "epoch": 1.4941502463054186, "grad_norm": 0.059700366109609604, "learning_rate": 0.01, "loss": 2.0598, "step": 14559 }, { "epoch": 1.4944581280788176, "grad_norm": 0.07785683870315552, "learning_rate": 0.01, "loss": 2.0567, "step": 14562 }, { "epoch": 1.4947660098522166, "grad_norm": 0.11935362964868546, "learning_rate": 0.01, "loss": 2.0536, "step": 14565 }, { "epoch": 1.4950738916256157, "grad_norm": 0.06188122183084488, "learning_rate": 0.01, "loss": 2.0436, "step": 14568 }, { "epoch": 1.4953817733990147, "grad_norm": 0.05302932485938072, "learning_rate": 0.01, "loss": 2.0704, "step": 14571 }, { "epoch": 1.4956896551724137, "grad_norm": 0.03871694207191467, "learning_rate": 0.01, "loss": 2.062, "step": 14574 }, { "epoch": 1.4959975369458127, "grad_norm": 0.03942064568400383, "learning_rate": 0.01, "loss": 2.0725, "step": 14577 }, { "epoch": 1.4963054187192117, "grad_norm": 0.05354088917374611, "learning_rate": 0.01, "loss": 2.054, "step": 14580 }, { "epoch": 1.4966133004926108, "grad_norm": 0.07863521575927734, "learning_rate": 0.01, "loss": 2.0577, "step": 14583 }, { "epoch": 1.4969211822660098, "grad_norm": 0.0440685860812664, "learning_rate": 0.01, "loss": 2.0422, "step": 14586 }, { "epoch": 1.4972290640394088, "grad_norm": 0.0724552571773529, "learning_rate": 0.01, "loss": 2.0707, "step": 14589 }, { "epoch": 1.4975369458128078, "grad_norm": 0.06099352613091469, "learning_rate": 0.01, "loss": 2.0567, "step": 14592 }, { "epoch": 1.4978448275862069, "grad_norm": 0.05534674599766731, "learning_rate": 0.01, "loss": 2.0343, "step": 14595 }, { "epoch": 1.4981527093596059, "grad_norm": 0.07876823097467422, "learning_rate": 0.01, "loss": 2.0686, "step": 14598 }, { "epoch": 1.498460591133005, "grad_norm": 0.07860377430915833, "learning_rate": 0.01, "loss": 2.0506, "step": 14601 }, { "epoch": 1.498768472906404, "grad_norm": 0.054005399346351624, "learning_rate": 0.01, "loss": 2.0429, "step": 14604 }, { "epoch": 1.499076354679803, "grad_norm": 0.10550951957702637, "learning_rate": 0.01, "loss": 2.0407, "step": 14607 }, { "epoch": 1.499384236453202, "grad_norm": 0.056426841765642166, "learning_rate": 0.01, "loss": 2.0589, "step": 14610 }, { "epoch": 1.499692118226601, "grad_norm": 0.09640904515981674, "learning_rate": 0.01, "loss": 2.0556, "step": 14613 }, { "epoch": 1.5, "grad_norm": 0.0822538211941719, "learning_rate": 0.01, "loss": 2.0684, "step": 14616 }, { "epoch": 1.500307881773399, "grad_norm": 0.05105495825409889, "learning_rate": 0.01, "loss": 2.0588, "step": 14619 }, { "epoch": 1.500615763546798, "grad_norm": 0.07851336896419525, "learning_rate": 0.01, "loss": 2.0635, "step": 14622 }, { "epoch": 1.500923645320197, "grad_norm": 0.051046207547187805, "learning_rate": 0.01, "loss": 2.0418, "step": 14625 }, { "epoch": 1.501231527093596, "grad_norm": 0.12335740774869919, "learning_rate": 0.01, "loss": 2.0483, "step": 14628 }, { "epoch": 1.501539408866995, "grad_norm": 0.04044636711478233, "learning_rate": 0.01, "loss": 2.0635, "step": 14631 }, { "epoch": 1.5018472906403941, "grad_norm": 0.0532059408724308, "learning_rate": 0.01, "loss": 2.0567, "step": 14634 }, { "epoch": 1.5021551724137931, "grad_norm": 0.0446847639977932, "learning_rate": 0.01, "loss": 2.0531, "step": 14637 }, { "epoch": 1.5024630541871922, "grad_norm": 0.05464153364300728, "learning_rate": 0.01, "loss": 2.0464, "step": 14640 }, { "epoch": 1.5027709359605912, "grad_norm": 0.08923088759183884, "learning_rate": 0.01, "loss": 2.0787, "step": 14643 }, { "epoch": 1.5030788177339902, "grad_norm": 0.06256496161222458, "learning_rate": 0.01, "loss": 2.0417, "step": 14646 }, { "epoch": 1.5033866995073892, "grad_norm": 0.05338229984045029, "learning_rate": 0.01, "loss": 2.053, "step": 14649 }, { "epoch": 1.5036945812807883, "grad_norm": 0.04416535049676895, "learning_rate": 0.01, "loss": 2.0532, "step": 14652 }, { "epoch": 1.5040024630541873, "grad_norm": 0.07076221704483032, "learning_rate": 0.01, "loss": 2.0677, "step": 14655 }, { "epoch": 1.5043103448275863, "grad_norm": 0.08566464483737946, "learning_rate": 0.01, "loss": 2.0499, "step": 14658 }, { "epoch": 1.5046182266009853, "grad_norm": 0.049552109092473984, "learning_rate": 0.01, "loss": 2.0549, "step": 14661 }, { "epoch": 1.5049261083743843, "grad_norm": 0.11802852898836136, "learning_rate": 0.01, "loss": 2.0657, "step": 14664 }, { "epoch": 1.5052339901477834, "grad_norm": 0.05280107632279396, "learning_rate": 0.01, "loss": 2.0722, "step": 14667 }, { "epoch": 1.5055418719211824, "grad_norm": 0.036458853632211685, "learning_rate": 0.01, "loss": 2.0357, "step": 14670 }, { "epoch": 1.5058497536945814, "grad_norm": 0.0465536043047905, "learning_rate": 0.01, "loss": 2.0765, "step": 14673 }, { "epoch": 1.5061576354679804, "grad_norm": 0.09052444994449615, "learning_rate": 0.01, "loss": 2.0677, "step": 14676 }, { "epoch": 1.5064655172413794, "grad_norm": 0.08750707656145096, "learning_rate": 0.01, "loss": 2.0719, "step": 14679 }, { "epoch": 1.5067733990147785, "grad_norm": 0.07876972109079361, "learning_rate": 0.01, "loss": 2.0539, "step": 14682 }, { "epoch": 1.5070812807881775, "grad_norm": 0.045561011880636215, "learning_rate": 0.01, "loss": 2.0656, "step": 14685 }, { "epoch": 1.5073891625615765, "grad_norm": 0.04548237472772598, "learning_rate": 0.01, "loss": 2.0811, "step": 14688 }, { "epoch": 1.5076970443349755, "grad_norm": 0.04897540062665939, "learning_rate": 0.01, "loss": 2.046, "step": 14691 }, { "epoch": 1.5080049261083743, "grad_norm": 0.08820399641990662, "learning_rate": 0.01, "loss": 2.0521, "step": 14694 }, { "epoch": 1.5083128078817734, "grad_norm": 0.0701432004570961, "learning_rate": 0.01, "loss": 2.0644, "step": 14697 }, { "epoch": 1.5086206896551724, "grad_norm": 0.10921904444694519, "learning_rate": 0.01, "loss": 2.0617, "step": 14700 }, { "epoch": 1.5089285714285714, "grad_norm": 0.08308566361665726, "learning_rate": 0.01, "loss": 2.101, "step": 14703 }, { "epoch": 1.5092364532019704, "grad_norm": 0.12545743584632874, "learning_rate": 0.01, "loss": 2.0495, "step": 14706 }, { "epoch": 1.5095443349753694, "grad_norm": 0.11245466768741608, "learning_rate": 0.01, "loss": 2.0264, "step": 14709 }, { "epoch": 1.5098522167487685, "grad_norm": 0.10128718614578247, "learning_rate": 0.01, "loss": 2.0413, "step": 14712 }, { "epoch": 1.5101600985221675, "grad_norm": 0.07226911187171936, "learning_rate": 0.01, "loss": 2.0487, "step": 14715 }, { "epoch": 1.5104679802955665, "grad_norm": 0.056605782359838486, "learning_rate": 0.01, "loss": 2.0686, "step": 14718 }, { "epoch": 1.5107758620689655, "grad_norm": 0.08795683085918427, "learning_rate": 0.01, "loss": 2.053, "step": 14721 }, { "epoch": 1.5110837438423645, "grad_norm": 0.07311341166496277, "learning_rate": 0.01, "loss": 2.0164, "step": 14724 }, { "epoch": 1.5113916256157636, "grad_norm": 0.07164688408374786, "learning_rate": 0.01, "loss": 2.048, "step": 14727 }, { "epoch": 1.5116995073891626, "grad_norm": 0.04577312618494034, "learning_rate": 0.01, "loss": 2.0523, "step": 14730 }, { "epoch": 1.5120073891625616, "grad_norm": 0.043186552822589874, "learning_rate": 0.01, "loss": 2.0694, "step": 14733 }, { "epoch": 1.5123152709359606, "grad_norm": 0.06101042777299881, "learning_rate": 0.01, "loss": 2.072, "step": 14736 }, { "epoch": 1.5126231527093597, "grad_norm": 0.12651608884334564, "learning_rate": 0.01, "loss": 2.0914, "step": 14739 }, { "epoch": 1.5129310344827587, "grad_norm": 0.039495594799518585, "learning_rate": 0.01, "loss": 2.0331, "step": 14742 }, { "epoch": 1.5132389162561575, "grad_norm": 0.10854054987430573, "learning_rate": 0.01, "loss": 2.0585, "step": 14745 }, { "epoch": 1.5135467980295565, "grad_norm": 0.142778679728508, "learning_rate": 0.01, "loss": 2.0433, "step": 14748 }, { "epoch": 1.5138546798029555, "grad_norm": 0.06473375856876373, "learning_rate": 0.01, "loss": 2.075, "step": 14751 }, { "epoch": 1.5141625615763545, "grad_norm": 0.050436701625585556, "learning_rate": 0.01, "loss": 2.0823, "step": 14754 }, { "epoch": 1.5144704433497536, "grad_norm": 0.057088159024715424, "learning_rate": 0.01, "loss": 2.0578, "step": 14757 }, { "epoch": 1.5147783251231526, "grad_norm": 0.051623161882162094, "learning_rate": 0.01, "loss": 2.0816, "step": 14760 }, { "epoch": 1.5150862068965516, "grad_norm": 0.0770149901509285, "learning_rate": 0.01, "loss": 2.0583, "step": 14763 }, { "epoch": 1.5153940886699506, "grad_norm": 0.06827536970376968, "learning_rate": 0.01, "loss": 2.0782, "step": 14766 }, { "epoch": 1.5157019704433496, "grad_norm": 0.06987358629703522, "learning_rate": 0.01, "loss": 2.0618, "step": 14769 }, { "epoch": 1.5160098522167487, "grad_norm": 0.05388219282031059, "learning_rate": 0.01, "loss": 2.084, "step": 14772 }, { "epoch": 1.5163177339901477, "grad_norm": 0.11866139620542526, "learning_rate": 0.01, "loss": 2.0622, "step": 14775 }, { "epoch": 1.5166256157635467, "grad_norm": 0.12754911184310913, "learning_rate": 0.01, "loss": 2.0387, "step": 14778 }, { "epoch": 1.5169334975369457, "grad_norm": 0.03591502830386162, "learning_rate": 0.01, "loss": 2.043, "step": 14781 }, { "epoch": 1.5172413793103448, "grad_norm": 0.09142038226127625, "learning_rate": 0.01, "loss": 2.05, "step": 14784 }, { "epoch": 1.5175492610837438, "grad_norm": 0.13140954077243805, "learning_rate": 0.01, "loss": 2.0302, "step": 14787 }, { "epoch": 1.5178571428571428, "grad_norm": 0.08330459147691727, "learning_rate": 0.01, "loss": 2.0461, "step": 14790 }, { "epoch": 1.5181650246305418, "grad_norm": 0.0779498815536499, "learning_rate": 0.01, "loss": 2.0662, "step": 14793 }, { "epoch": 1.5184729064039408, "grad_norm": 0.05396762117743492, "learning_rate": 0.01, "loss": 2.0586, "step": 14796 }, { "epoch": 1.5187807881773399, "grad_norm": 0.06744614988565445, "learning_rate": 0.01, "loss": 2.0578, "step": 14799 }, { "epoch": 1.5190886699507389, "grad_norm": 0.04777420684695244, "learning_rate": 0.01, "loss": 2.0645, "step": 14802 }, { "epoch": 1.519396551724138, "grad_norm": 0.044643301516771317, "learning_rate": 0.01, "loss": 2.0691, "step": 14805 }, { "epoch": 1.519704433497537, "grad_norm": 0.05263877660036087, "learning_rate": 0.01, "loss": 2.0475, "step": 14808 }, { "epoch": 1.520012315270936, "grad_norm": 0.07794903963804245, "learning_rate": 0.01, "loss": 2.0685, "step": 14811 }, { "epoch": 1.520320197044335, "grad_norm": 0.03846001625061035, "learning_rate": 0.01, "loss": 2.0547, "step": 14814 }, { "epoch": 1.520628078817734, "grad_norm": 0.03806301951408386, "learning_rate": 0.01, "loss": 2.0701, "step": 14817 }, { "epoch": 1.520935960591133, "grad_norm": 0.08289408683776855, "learning_rate": 0.01, "loss": 2.0712, "step": 14820 }, { "epoch": 1.521243842364532, "grad_norm": 0.04307285323739052, "learning_rate": 0.01, "loss": 2.0926, "step": 14823 }, { "epoch": 1.521551724137931, "grad_norm": 0.04523704573512077, "learning_rate": 0.01, "loss": 2.0613, "step": 14826 }, { "epoch": 1.52185960591133, "grad_norm": 0.0813162624835968, "learning_rate": 0.01, "loss": 2.0516, "step": 14829 }, { "epoch": 1.522167487684729, "grad_norm": 0.08958449214696884, "learning_rate": 0.01, "loss": 2.0534, "step": 14832 }, { "epoch": 1.5224753694581281, "grad_norm": 0.1036042720079422, "learning_rate": 0.01, "loss": 2.051, "step": 14835 }, { "epoch": 1.5227832512315271, "grad_norm": 0.06528764218091965, "learning_rate": 0.01, "loss": 2.0389, "step": 14838 }, { "epoch": 1.5230911330049262, "grad_norm": 0.04857415333390236, "learning_rate": 0.01, "loss": 2.0215, "step": 14841 }, { "epoch": 1.5233990147783252, "grad_norm": 0.11137302964925766, "learning_rate": 0.01, "loss": 2.0706, "step": 14844 }, { "epoch": 1.5237068965517242, "grad_norm": 0.05258537456393242, "learning_rate": 0.01, "loss": 2.053, "step": 14847 }, { "epoch": 1.5240147783251232, "grad_norm": 0.05203690007328987, "learning_rate": 0.01, "loss": 2.0719, "step": 14850 }, { "epoch": 1.5243226600985222, "grad_norm": 0.036557264626026154, "learning_rate": 0.01, "loss": 2.0827, "step": 14853 }, { "epoch": 1.5246305418719213, "grad_norm": 0.05553048849105835, "learning_rate": 0.01, "loss": 2.0526, "step": 14856 }, { "epoch": 1.5249384236453203, "grad_norm": 0.07551626116037369, "learning_rate": 0.01, "loss": 2.0696, "step": 14859 }, { "epoch": 1.5252463054187193, "grad_norm": 0.09335839748382568, "learning_rate": 0.01, "loss": 2.069, "step": 14862 }, { "epoch": 1.5255541871921183, "grad_norm": 0.07123745232820511, "learning_rate": 0.01, "loss": 2.048, "step": 14865 }, { "epoch": 1.5258620689655173, "grad_norm": 0.06792188435792923, "learning_rate": 0.01, "loss": 2.059, "step": 14868 }, { "epoch": 1.5261699507389164, "grad_norm": 0.035666827112436295, "learning_rate": 0.01, "loss": 2.0585, "step": 14871 }, { "epoch": 1.5264778325123154, "grad_norm": 0.039600104093551636, "learning_rate": 0.01, "loss": 2.0447, "step": 14874 }, { "epoch": 1.5267857142857144, "grad_norm": 0.03983796760439873, "learning_rate": 0.01, "loss": 2.0607, "step": 14877 }, { "epoch": 1.5270935960591134, "grad_norm": 0.07013492286205292, "learning_rate": 0.01, "loss": 2.0502, "step": 14880 }, { "epoch": 1.5274014778325125, "grad_norm": 0.07064792513847351, "learning_rate": 0.01, "loss": 2.0328, "step": 14883 }, { "epoch": 1.5277093596059115, "grad_norm": 0.10502810031175613, "learning_rate": 0.01, "loss": 2.0494, "step": 14886 }, { "epoch": 1.5280172413793105, "grad_norm": 0.050288375467061996, "learning_rate": 0.01, "loss": 2.0316, "step": 14889 }, { "epoch": 1.5283251231527095, "grad_norm": 0.07382049411535263, "learning_rate": 0.01, "loss": 2.0414, "step": 14892 }, { "epoch": 1.5286330049261085, "grad_norm": 0.08693026751279831, "learning_rate": 0.01, "loss": 2.0428, "step": 14895 }, { "epoch": 1.5289408866995073, "grad_norm": 0.04283773526549339, "learning_rate": 0.01, "loss": 2.068, "step": 14898 }, { "epoch": 1.5292487684729064, "grad_norm": 0.1044667437672615, "learning_rate": 0.01, "loss": 2.0583, "step": 14901 }, { "epoch": 1.5295566502463054, "grad_norm": 0.06316410005092621, "learning_rate": 0.01, "loss": 2.0372, "step": 14904 }, { "epoch": 1.5298645320197044, "grad_norm": 0.04687780514359474, "learning_rate": 0.01, "loss": 2.0575, "step": 14907 }, { "epoch": 1.5301724137931034, "grad_norm": 0.04785927012562752, "learning_rate": 0.01, "loss": 2.0435, "step": 14910 }, { "epoch": 1.5304802955665024, "grad_norm": 0.03788747265934944, "learning_rate": 0.01, "loss": 2.0483, "step": 14913 }, { "epoch": 1.5307881773399015, "grad_norm": 0.05618858337402344, "learning_rate": 0.01, "loss": 2.031, "step": 14916 }, { "epoch": 1.5310960591133005, "grad_norm": 0.10059016942977905, "learning_rate": 0.01, "loss": 2.0608, "step": 14919 }, { "epoch": 1.5314039408866995, "grad_norm": 0.06718064099550247, "learning_rate": 0.01, "loss": 2.0676, "step": 14922 }, { "epoch": 1.5317118226600985, "grad_norm": 0.09006398916244507, "learning_rate": 0.01, "loss": 2.059, "step": 14925 }, { "epoch": 1.5320197044334976, "grad_norm": 0.036577485501766205, "learning_rate": 0.01, "loss": 2.0425, "step": 14928 }, { "epoch": 1.5323275862068966, "grad_norm": 0.07979925721883774, "learning_rate": 0.01, "loss": 2.0387, "step": 14931 }, { "epoch": 1.5326354679802956, "grad_norm": 0.08286473900079727, "learning_rate": 0.01, "loss": 2.0523, "step": 14934 }, { "epoch": 1.5329433497536946, "grad_norm": 0.042596206068992615, "learning_rate": 0.01, "loss": 2.0692, "step": 14937 }, { "epoch": 1.5332512315270936, "grad_norm": 0.043695200234651566, "learning_rate": 0.01, "loss": 2.0429, "step": 14940 }, { "epoch": 1.5335591133004927, "grad_norm": 0.13041776418685913, "learning_rate": 0.01, "loss": 2.0966, "step": 14943 }, { "epoch": 1.5338669950738915, "grad_norm": 0.06076871603727341, "learning_rate": 0.01, "loss": 2.081, "step": 14946 }, { "epoch": 1.5341748768472905, "grad_norm": 0.08744147419929504, "learning_rate": 0.01, "loss": 2.0698, "step": 14949 }, { "epoch": 1.5344827586206895, "grad_norm": 0.041992440819740295, "learning_rate": 0.01, "loss": 2.0735, "step": 14952 }, { "epoch": 1.5347906403940885, "grad_norm": 0.0773782953619957, "learning_rate": 0.01, "loss": 2.0501, "step": 14955 }, { "epoch": 1.5350985221674875, "grad_norm": 0.04371657967567444, "learning_rate": 0.01, "loss": 2.043, "step": 14958 }, { "epoch": 1.5354064039408866, "grad_norm": 0.04753347858786583, "learning_rate": 0.01, "loss": 2.0399, "step": 14961 }, { "epoch": 1.5357142857142856, "grad_norm": 0.10644037276506424, "learning_rate": 0.01, "loss": 2.0838, "step": 14964 }, { "epoch": 1.5360221674876846, "grad_norm": 0.037067610770463943, "learning_rate": 0.01, "loss": 2.0677, "step": 14967 }, { "epoch": 1.5363300492610836, "grad_norm": 0.06745267659425735, "learning_rate": 0.01, "loss": 2.0464, "step": 14970 }, { "epoch": 1.5366379310344827, "grad_norm": 0.10053039342164993, "learning_rate": 0.01, "loss": 2.0696, "step": 14973 }, { "epoch": 1.5369458128078817, "grad_norm": 0.08785562962293625, "learning_rate": 0.01, "loss": 2.059, "step": 14976 }, { "epoch": 1.5372536945812807, "grad_norm": 0.12240536510944366, "learning_rate": 0.01, "loss": 2.0477, "step": 14979 }, { "epoch": 1.5375615763546797, "grad_norm": 0.08541588485240936, "learning_rate": 0.01, "loss": 2.0581, "step": 14982 }, { "epoch": 1.5378694581280787, "grad_norm": 0.0469081737101078, "learning_rate": 0.01, "loss": 2.0581, "step": 14985 }, { "epoch": 1.5381773399014778, "grad_norm": 0.04396476596593857, "learning_rate": 0.01, "loss": 2.0504, "step": 14988 }, { "epoch": 1.5384852216748768, "grad_norm": 0.033920448273420334, "learning_rate": 0.01, "loss": 2.0513, "step": 14991 }, { "epoch": 1.5387931034482758, "grad_norm": 0.035798102617263794, "learning_rate": 0.01, "loss": 2.0583, "step": 14994 }, { "epoch": 1.5391009852216748, "grad_norm": 0.030788132920861244, "learning_rate": 0.01, "loss": 2.0856, "step": 14997 }, { "epoch": 1.5394088669950738, "grad_norm": 0.06127138063311577, "learning_rate": 0.01, "loss": 2.0485, "step": 15000 }, { "epoch": 1.5397167487684729, "grad_norm": 0.07792042940855026, "learning_rate": 0.01, "loss": 2.0473, "step": 15003 }, { "epoch": 1.5400246305418719, "grad_norm": 0.1915716826915741, "learning_rate": 0.01, "loss": 2.06, "step": 15006 }, { "epoch": 1.540332512315271, "grad_norm": 0.13130734860897064, "learning_rate": 0.01, "loss": 2.0458, "step": 15009 }, { "epoch": 1.54064039408867, "grad_norm": 0.08750183880329132, "learning_rate": 0.01, "loss": 2.0569, "step": 15012 }, { "epoch": 1.540948275862069, "grad_norm": 0.0678631141781807, "learning_rate": 0.01, "loss": 2.0256, "step": 15015 }, { "epoch": 1.541256157635468, "grad_norm": 0.04352593049407005, "learning_rate": 0.01, "loss": 2.0541, "step": 15018 }, { "epoch": 1.541564039408867, "grad_norm": 0.059398628771305084, "learning_rate": 0.01, "loss": 2.0542, "step": 15021 }, { "epoch": 1.541871921182266, "grad_norm": 0.09179355949163437, "learning_rate": 0.01, "loss": 2.0623, "step": 15024 }, { "epoch": 1.542179802955665, "grad_norm": 0.08243024349212646, "learning_rate": 0.01, "loss": 2.0616, "step": 15027 }, { "epoch": 1.542487684729064, "grad_norm": 0.05103360861539841, "learning_rate": 0.01, "loss": 2.0498, "step": 15030 }, { "epoch": 1.542795566502463, "grad_norm": 0.04206395894289017, "learning_rate": 0.01, "loss": 2.0676, "step": 15033 }, { "epoch": 1.543103448275862, "grad_norm": 0.03659799322485924, "learning_rate": 0.01, "loss": 2.0376, "step": 15036 }, { "epoch": 1.5434113300492611, "grad_norm": 0.1279965043067932, "learning_rate": 0.01, "loss": 2.043, "step": 15039 }, { "epoch": 1.5437192118226601, "grad_norm": 0.09509512782096863, "learning_rate": 0.01, "loss": 2.0233, "step": 15042 }, { "epoch": 1.5440270935960592, "grad_norm": 0.07963217794895172, "learning_rate": 0.01, "loss": 2.0632, "step": 15045 }, { "epoch": 1.5443349753694582, "grad_norm": 0.06425557285547256, "learning_rate": 0.01, "loss": 2.0454, "step": 15048 }, { "epoch": 1.5446428571428572, "grad_norm": 0.1166144609451294, "learning_rate": 0.01, "loss": 2.0675, "step": 15051 }, { "epoch": 1.5449507389162562, "grad_norm": 0.0558270663022995, "learning_rate": 0.01, "loss": 2.0495, "step": 15054 }, { "epoch": 1.5452586206896552, "grad_norm": 0.05666494369506836, "learning_rate": 0.01, "loss": 2.0417, "step": 15057 }, { "epoch": 1.5455665024630543, "grad_norm": 0.048931702971458435, "learning_rate": 0.01, "loss": 2.0503, "step": 15060 }, { "epoch": 1.5458743842364533, "grad_norm": 0.10072410106658936, "learning_rate": 0.01, "loss": 2.0432, "step": 15063 }, { "epoch": 1.5461822660098523, "grad_norm": 0.06339754164218903, "learning_rate": 0.01, "loss": 2.048, "step": 15066 }, { "epoch": 1.5464901477832513, "grad_norm": 0.04913650080561638, "learning_rate": 0.01, "loss": 2.07, "step": 15069 }, { "epoch": 1.5467980295566504, "grad_norm": 0.1012924313545227, "learning_rate": 0.01, "loss": 2.0423, "step": 15072 }, { "epoch": 1.5471059113300494, "grad_norm": 0.048015668988227844, "learning_rate": 0.01, "loss": 2.0356, "step": 15075 }, { "epoch": 1.5474137931034484, "grad_norm": 0.09666754305362701, "learning_rate": 0.01, "loss": 2.0701, "step": 15078 }, { "epoch": 1.5477216748768474, "grad_norm": 0.07722094655036926, "learning_rate": 0.01, "loss": 2.0223, "step": 15081 }, { "epoch": 1.5480295566502464, "grad_norm": 0.06525082141160965, "learning_rate": 0.01, "loss": 2.056, "step": 15084 }, { "epoch": 1.5483374384236455, "grad_norm": 0.04979628697037697, "learning_rate": 0.01, "loss": 2.0662, "step": 15087 }, { "epoch": 1.5486453201970445, "grad_norm": 0.05903888866305351, "learning_rate": 0.01, "loss": 2.0551, "step": 15090 }, { "epoch": 1.5489532019704435, "grad_norm": 0.09098793566226959, "learning_rate": 0.01, "loss": 2.0758, "step": 15093 }, { "epoch": 1.5492610837438425, "grad_norm": 0.08262350410223007, "learning_rate": 0.01, "loss": 2.0548, "step": 15096 }, { "epoch": 1.5495689655172413, "grad_norm": 0.057414278388023376, "learning_rate": 0.01, "loss": 2.0887, "step": 15099 }, { "epoch": 1.5498768472906403, "grad_norm": 0.06718642264604568, "learning_rate": 0.01, "loss": 2.0731, "step": 15102 }, { "epoch": 1.5501847290640394, "grad_norm": 0.07351098954677582, "learning_rate": 0.01, "loss": 2.0589, "step": 15105 }, { "epoch": 1.5504926108374384, "grad_norm": 0.03318174555897713, "learning_rate": 0.01, "loss": 2.0545, "step": 15108 }, { "epoch": 1.5508004926108374, "grad_norm": 0.11198091506958008, "learning_rate": 0.01, "loss": 2.0306, "step": 15111 }, { "epoch": 1.5511083743842364, "grad_norm": 0.056512147188186646, "learning_rate": 0.01, "loss": 2.0724, "step": 15114 }, { "epoch": 1.5514162561576355, "grad_norm": 0.08460327982902527, "learning_rate": 0.01, "loss": 2.0537, "step": 15117 }, { "epoch": 1.5517241379310345, "grad_norm": 0.08536583930253983, "learning_rate": 0.01, "loss": 2.0696, "step": 15120 }, { "epoch": 1.5520320197044335, "grad_norm": 0.10857357084751129, "learning_rate": 0.01, "loss": 2.0645, "step": 15123 }, { "epoch": 1.5523399014778325, "grad_norm": 0.04923904314637184, "learning_rate": 0.01, "loss": 2.0339, "step": 15126 }, { "epoch": 1.5526477832512315, "grad_norm": 0.05313669145107269, "learning_rate": 0.01, "loss": 2.044, "step": 15129 }, { "epoch": 1.5529556650246306, "grad_norm": 0.058348409831523895, "learning_rate": 0.01, "loss": 2.0264, "step": 15132 }, { "epoch": 1.5532635467980296, "grad_norm": 0.04621830955147743, "learning_rate": 0.01, "loss": 2.0543, "step": 15135 }, { "epoch": 1.5535714285714286, "grad_norm": 0.079473577439785, "learning_rate": 0.01, "loss": 2.0494, "step": 15138 }, { "epoch": 1.5538793103448276, "grad_norm": 0.10176654160022736, "learning_rate": 0.01, "loss": 2.0569, "step": 15141 }, { "epoch": 1.5541871921182266, "grad_norm": 0.048997897654771805, "learning_rate": 0.01, "loss": 2.0832, "step": 15144 }, { "epoch": 1.5544950738916257, "grad_norm": 0.06281887739896774, "learning_rate": 0.01, "loss": 2.0446, "step": 15147 }, { "epoch": 1.5548029556650245, "grad_norm": 0.07801464200019836, "learning_rate": 0.01, "loss": 2.0608, "step": 15150 }, { "epoch": 1.5551108374384235, "grad_norm": 0.08535271883010864, "learning_rate": 0.01, "loss": 2.0597, "step": 15153 }, { "epoch": 1.5554187192118225, "grad_norm": 0.06536438316106796, "learning_rate": 0.01, "loss": 2.0491, "step": 15156 }, { "epoch": 1.5557266009852215, "grad_norm": 0.045388113707304, "learning_rate": 0.01, "loss": 2.0243, "step": 15159 }, { "epoch": 1.5560344827586206, "grad_norm": 0.041893068701028824, "learning_rate": 0.01, "loss": 2.0373, "step": 15162 }, { "epoch": 1.5563423645320196, "grad_norm": 0.04261694848537445, "learning_rate": 0.01, "loss": 2.028, "step": 15165 }, { "epoch": 1.5566502463054186, "grad_norm": 0.03123985230922699, "learning_rate": 0.01, "loss": 2.0653, "step": 15168 }, { "epoch": 1.5569581280788176, "grad_norm": 0.048562515527009964, "learning_rate": 0.01, "loss": 2.0249, "step": 15171 }, { "epoch": 1.5572660098522166, "grad_norm": 0.1343316286802292, "learning_rate": 0.01, "loss": 2.062, "step": 15174 }, { "epoch": 1.5575738916256157, "grad_norm": 0.10992839932441711, "learning_rate": 0.01, "loss": 2.0374, "step": 15177 }, { "epoch": 1.5578817733990147, "grad_norm": 0.09098651260137558, "learning_rate": 0.01, "loss": 2.0394, "step": 15180 }, { "epoch": 1.5581896551724137, "grad_norm": 0.05405926704406738, "learning_rate": 0.01, "loss": 2.0491, "step": 15183 }, { "epoch": 1.5584975369458127, "grad_norm": 0.04776093736290932, "learning_rate": 0.01, "loss": 2.0775, "step": 15186 }, { "epoch": 1.5588054187192117, "grad_norm": 0.04614724963903427, "learning_rate": 0.01, "loss": 2.0339, "step": 15189 }, { "epoch": 1.5591133004926108, "grad_norm": 0.05032865330576897, "learning_rate": 0.01, "loss": 2.0516, "step": 15192 }, { "epoch": 1.5594211822660098, "grad_norm": 0.051392171531915665, "learning_rate": 0.01, "loss": 2.0405, "step": 15195 }, { "epoch": 1.5597290640394088, "grad_norm": 0.10255944728851318, "learning_rate": 0.01, "loss": 2.0615, "step": 15198 }, { "epoch": 1.5600369458128078, "grad_norm": 0.04222560301423073, "learning_rate": 0.01, "loss": 2.0527, "step": 15201 }, { "epoch": 1.5603448275862069, "grad_norm": 0.045385826379060745, "learning_rate": 0.01, "loss": 2.0556, "step": 15204 }, { "epoch": 1.5606527093596059, "grad_norm": 0.04241577908396721, "learning_rate": 0.01, "loss": 2.0727, "step": 15207 }, { "epoch": 1.560960591133005, "grad_norm": 0.12387125194072723, "learning_rate": 0.01, "loss": 2.0337, "step": 15210 }, { "epoch": 1.561268472906404, "grad_norm": 0.14704599976539612, "learning_rate": 0.01, "loss": 2.0602, "step": 15213 }, { "epoch": 1.561576354679803, "grad_norm": 0.049915559589862823, "learning_rate": 0.01, "loss": 2.0203, "step": 15216 }, { "epoch": 1.561884236453202, "grad_norm": 0.057630617171525955, "learning_rate": 0.01, "loss": 2.0503, "step": 15219 }, { "epoch": 1.562192118226601, "grad_norm": 0.04765351861715317, "learning_rate": 0.01, "loss": 2.0763, "step": 15222 }, { "epoch": 1.5625, "grad_norm": 0.06213679164648056, "learning_rate": 0.01, "loss": 2.0831, "step": 15225 }, { "epoch": 1.562807881773399, "grad_norm": 0.07990710437297821, "learning_rate": 0.01, "loss": 2.0458, "step": 15228 }, { "epoch": 1.563115763546798, "grad_norm": 0.0683673620223999, "learning_rate": 0.01, "loss": 2.0294, "step": 15231 }, { "epoch": 1.563423645320197, "grad_norm": 0.12503905594348907, "learning_rate": 0.01, "loss": 2.0543, "step": 15234 }, { "epoch": 1.563731527093596, "grad_norm": 0.03973531350493431, "learning_rate": 0.01, "loss": 2.0474, "step": 15237 }, { "epoch": 1.564039408866995, "grad_norm": 0.07055282592773438, "learning_rate": 0.01, "loss": 2.0607, "step": 15240 }, { "epoch": 1.5643472906403941, "grad_norm": 0.06088467687368393, "learning_rate": 0.01, "loss": 2.0643, "step": 15243 }, { "epoch": 1.5646551724137931, "grad_norm": 0.06393450498580933, "learning_rate": 0.01, "loss": 2.0369, "step": 15246 }, { "epoch": 1.5649630541871922, "grad_norm": 0.08600255101919174, "learning_rate": 0.01, "loss": 2.072, "step": 15249 }, { "epoch": 1.5652709359605912, "grad_norm": 0.07075429707765579, "learning_rate": 0.01, "loss": 2.0509, "step": 15252 }, { "epoch": 1.5655788177339902, "grad_norm": 0.058057527989149094, "learning_rate": 0.01, "loss": 2.0373, "step": 15255 }, { "epoch": 1.5658866995073892, "grad_norm": 0.04670482128858566, "learning_rate": 0.01, "loss": 2.0447, "step": 15258 }, { "epoch": 1.5661945812807883, "grad_norm": 0.08971681445837021, "learning_rate": 0.01, "loss": 2.0599, "step": 15261 }, { "epoch": 1.5665024630541873, "grad_norm": 0.12580984830856323, "learning_rate": 0.01, "loss": 2.0402, "step": 15264 }, { "epoch": 1.5668103448275863, "grad_norm": 0.05133863538503647, "learning_rate": 0.01, "loss": 2.056, "step": 15267 }, { "epoch": 1.5671182266009853, "grad_norm": 0.07821512222290039, "learning_rate": 0.01, "loss": 2.0458, "step": 15270 }, { "epoch": 1.5674261083743843, "grad_norm": 0.07024712860584259, "learning_rate": 0.01, "loss": 2.0496, "step": 15273 }, { "epoch": 1.5677339901477834, "grad_norm": 0.09332927316427231, "learning_rate": 0.01, "loss": 2.0358, "step": 15276 }, { "epoch": 1.5680418719211824, "grad_norm": 0.06875135749578476, "learning_rate": 0.01, "loss": 2.0459, "step": 15279 }, { "epoch": 1.5683497536945814, "grad_norm": 0.08868546783924103, "learning_rate": 0.01, "loss": 2.0641, "step": 15282 }, { "epoch": 1.5686576354679804, "grad_norm": 0.07729046791791916, "learning_rate": 0.01, "loss": 2.0333, "step": 15285 }, { "epoch": 1.5689655172413794, "grad_norm": 0.07686775177717209, "learning_rate": 0.01, "loss": 2.0446, "step": 15288 }, { "epoch": 1.5692733990147785, "grad_norm": 0.0839667096734047, "learning_rate": 0.01, "loss": 2.0428, "step": 15291 }, { "epoch": 1.5695812807881775, "grad_norm": 0.0704370066523552, "learning_rate": 0.01, "loss": 2.0638, "step": 15294 }, { "epoch": 1.5698891625615765, "grad_norm": 0.05312497168779373, "learning_rate": 0.01, "loss": 2.029, "step": 15297 }, { "epoch": 1.5701970443349755, "grad_norm": 0.049166906625032425, "learning_rate": 0.01, "loss": 2.0544, "step": 15300 }, { "epoch": 1.5705049261083743, "grad_norm": 0.041398897767066956, "learning_rate": 0.01, "loss": 2.0652, "step": 15303 }, { "epoch": 1.5708128078817734, "grad_norm": 0.08617027848958969, "learning_rate": 0.01, "loss": 2.0675, "step": 15306 }, { "epoch": 1.5711206896551724, "grad_norm": 0.0348927266895771, "learning_rate": 0.01, "loss": 2.043, "step": 15309 }, { "epoch": 1.5714285714285714, "grad_norm": 0.060787077993154526, "learning_rate": 0.01, "loss": 2.0439, "step": 15312 }, { "epoch": 1.5717364532019704, "grad_norm": 0.050898227840662, "learning_rate": 0.01, "loss": 2.0475, "step": 15315 }, { "epoch": 1.5720443349753694, "grad_norm": 0.04594309255480766, "learning_rate": 0.01, "loss": 2.0352, "step": 15318 }, { "epoch": 1.5723522167487685, "grad_norm": 0.1161418929696083, "learning_rate": 0.01, "loss": 2.0232, "step": 15321 }, { "epoch": 1.5726600985221675, "grad_norm": 0.05419136583805084, "learning_rate": 0.01, "loss": 2.0417, "step": 15324 }, { "epoch": 1.5729679802955665, "grad_norm": 0.07661257684230804, "learning_rate": 0.01, "loss": 2.0548, "step": 15327 }, { "epoch": 1.5732758620689655, "grad_norm": 0.09760436415672302, "learning_rate": 0.01, "loss": 2.0539, "step": 15330 }, { "epoch": 1.5735837438423645, "grad_norm": 0.07211121916770935, "learning_rate": 0.01, "loss": 2.037, "step": 15333 }, { "epoch": 1.5738916256157636, "grad_norm": 0.08360971510410309, "learning_rate": 0.01, "loss": 2.0465, "step": 15336 }, { "epoch": 1.5741995073891626, "grad_norm": 0.05901337414979935, "learning_rate": 0.01, "loss": 2.0581, "step": 15339 }, { "epoch": 1.5745073891625616, "grad_norm": 0.07543767988681793, "learning_rate": 0.01, "loss": 2.0437, "step": 15342 }, { "epoch": 1.5748152709359606, "grad_norm": 0.04725690186023712, "learning_rate": 0.01, "loss": 2.0751, "step": 15345 }, { "epoch": 1.5751231527093597, "grad_norm": 0.051067035645246506, "learning_rate": 0.01, "loss": 2.0434, "step": 15348 }, { "epoch": 1.5754310344827587, "grad_norm": 0.03145357966423035, "learning_rate": 0.01, "loss": 2.0511, "step": 15351 }, { "epoch": 1.5757389162561575, "grad_norm": 0.09291981905698776, "learning_rate": 0.01, "loss": 2.0604, "step": 15354 }, { "epoch": 1.5760467980295565, "grad_norm": 0.03409574180841446, "learning_rate": 0.01, "loss": 2.0052, "step": 15357 }, { "epoch": 1.5763546798029555, "grad_norm": 0.045993223786354065, "learning_rate": 0.01, "loss": 2.0442, "step": 15360 }, { "epoch": 1.5766625615763545, "grad_norm": 0.03174202889204025, "learning_rate": 0.01, "loss": 2.0525, "step": 15363 }, { "epoch": 1.5769704433497536, "grad_norm": 0.057013627141714096, "learning_rate": 0.01, "loss": 2.0699, "step": 15366 }, { "epoch": 1.5772783251231526, "grad_norm": 0.05778640881180763, "learning_rate": 0.01, "loss": 2.0641, "step": 15369 }, { "epoch": 1.5775862068965516, "grad_norm": 0.11259103566408157, "learning_rate": 0.01, "loss": 2.0496, "step": 15372 }, { "epoch": 1.5778940886699506, "grad_norm": 0.05728684365749359, "learning_rate": 0.01, "loss": 2.0646, "step": 15375 }, { "epoch": 1.5782019704433496, "grad_norm": 0.07037964463233948, "learning_rate": 0.01, "loss": 2.072, "step": 15378 }, { "epoch": 1.5785098522167487, "grad_norm": 0.05147834122180939, "learning_rate": 0.01, "loss": 2.0339, "step": 15381 }, { "epoch": 1.5788177339901477, "grad_norm": 0.0663742870092392, "learning_rate": 0.01, "loss": 2.0534, "step": 15384 }, { "epoch": 1.5791256157635467, "grad_norm": 0.04665178433060646, "learning_rate": 0.01, "loss": 2.0698, "step": 15387 }, { "epoch": 1.5794334975369457, "grad_norm": 0.037410903722047806, "learning_rate": 0.01, "loss": 2.0529, "step": 15390 }, { "epoch": 1.5797413793103448, "grad_norm": 0.03849703446030617, "learning_rate": 0.01, "loss": 2.0484, "step": 15393 }, { "epoch": 1.5800492610837438, "grad_norm": 0.10119396448135376, "learning_rate": 0.01, "loss": 2.0752, "step": 15396 }, { "epoch": 1.5803571428571428, "grad_norm": 0.03294401615858078, "learning_rate": 0.01, "loss": 2.0218, "step": 15399 }, { "epoch": 1.5806650246305418, "grad_norm": 0.06310781091451645, "learning_rate": 0.01, "loss": 2.0311, "step": 15402 }, { "epoch": 1.5809729064039408, "grad_norm": 0.050191253423690796, "learning_rate": 0.01, "loss": 2.0408, "step": 15405 }, { "epoch": 1.5812807881773399, "grad_norm": 0.09952792525291443, "learning_rate": 0.01, "loss": 2.0622, "step": 15408 }, { "epoch": 1.5815886699507389, "grad_norm": 0.12618017196655273, "learning_rate": 0.01, "loss": 2.0696, "step": 15411 }, { "epoch": 1.581896551724138, "grad_norm": 0.19296571612358093, "learning_rate": 0.01, "loss": 2.072, "step": 15414 }, { "epoch": 1.582204433497537, "grad_norm": 0.1124732494354248, "learning_rate": 0.01, "loss": 2.0451, "step": 15417 }, { "epoch": 1.582512315270936, "grad_norm": 0.06556060910224915, "learning_rate": 0.01, "loss": 2.0325, "step": 15420 }, { "epoch": 1.582820197044335, "grad_norm": 0.05607735365629196, "learning_rate": 0.01, "loss": 2.0663, "step": 15423 }, { "epoch": 1.583128078817734, "grad_norm": 0.04731295630335808, "learning_rate": 0.01, "loss": 2.0559, "step": 15426 }, { "epoch": 1.583435960591133, "grad_norm": 0.060452669858932495, "learning_rate": 0.01, "loss": 2.0498, "step": 15429 }, { "epoch": 1.583743842364532, "grad_norm": 0.056996360421180725, "learning_rate": 0.01, "loss": 2.0752, "step": 15432 }, { "epoch": 1.584051724137931, "grad_norm": 0.11382313817739487, "learning_rate": 0.01, "loss": 2.0723, "step": 15435 }, { "epoch": 1.58435960591133, "grad_norm": 0.05176008865237236, "learning_rate": 0.01, "loss": 2.0706, "step": 15438 }, { "epoch": 1.584667487684729, "grad_norm": 0.05329972878098488, "learning_rate": 0.01, "loss": 2.0473, "step": 15441 }, { "epoch": 1.5849753694581281, "grad_norm": 0.05416284501552582, "learning_rate": 0.01, "loss": 2.0585, "step": 15444 }, { "epoch": 1.5852832512315271, "grad_norm": 0.052267350256443024, "learning_rate": 0.01, "loss": 2.0314, "step": 15447 }, { "epoch": 1.5855911330049262, "grad_norm": 0.1025579646229744, "learning_rate": 0.01, "loss": 2.0412, "step": 15450 }, { "epoch": 1.5858990147783252, "grad_norm": 0.06416940689086914, "learning_rate": 0.01, "loss": 2.0312, "step": 15453 }, { "epoch": 1.5862068965517242, "grad_norm": 0.05699596926569939, "learning_rate": 0.01, "loss": 2.0678, "step": 15456 }, { "epoch": 1.5865147783251232, "grad_norm": 0.036711812019348145, "learning_rate": 0.01, "loss": 2.0251, "step": 15459 }, { "epoch": 1.5868226600985222, "grad_norm": 0.1025582030415535, "learning_rate": 0.01, "loss": 2.0387, "step": 15462 }, { "epoch": 1.5871305418719213, "grad_norm": 0.03923096880316734, "learning_rate": 0.01, "loss": 2.067, "step": 15465 }, { "epoch": 1.5874384236453203, "grad_norm": 0.08267144113779068, "learning_rate": 0.01, "loss": 2.0583, "step": 15468 }, { "epoch": 1.5877463054187193, "grad_norm": 0.15374930202960968, "learning_rate": 0.01, "loss": 2.0384, "step": 15471 }, { "epoch": 1.5880541871921183, "grad_norm": 0.10246127098798752, "learning_rate": 0.01, "loss": 2.0696, "step": 15474 }, { "epoch": 1.5883620689655173, "grad_norm": 0.09784136712551117, "learning_rate": 0.01, "loss": 2.0546, "step": 15477 }, { "epoch": 1.5886699507389164, "grad_norm": 0.08747786283493042, "learning_rate": 0.01, "loss": 2.0754, "step": 15480 }, { "epoch": 1.5889778325123154, "grad_norm": 0.0755406990647316, "learning_rate": 0.01, "loss": 2.0477, "step": 15483 }, { "epoch": 1.5892857142857144, "grad_norm": 0.05593521520495415, "learning_rate": 0.01, "loss": 2.0485, "step": 15486 }, { "epoch": 1.5895935960591134, "grad_norm": 0.04462866857647896, "learning_rate": 0.01, "loss": 2.056, "step": 15489 }, { "epoch": 1.5899014778325125, "grad_norm": 0.040571678429841995, "learning_rate": 0.01, "loss": 2.0906, "step": 15492 }, { "epoch": 1.5902093596059115, "grad_norm": 0.038100458681583405, "learning_rate": 0.01, "loss": 2.0735, "step": 15495 }, { "epoch": 1.5905172413793105, "grad_norm": 0.04336906597018242, "learning_rate": 0.01, "loss": 2.0716, "step": 15498 }, { "epoch": 1.5908251231527095, "grad_norm": 0.1424836665391922, "learning_rate": 0.01, "loss": 2.0424, "step": 15501 }, { "epoch": 1.5911330049261085, "grad_norm": 0.09235331416130066, "learning_rate": 0.01, "loss": 2.0461, "step": 15504 }, { "epoch": 1.5914408866995073, "grad_norm": 0.07932816445827484, "learning_rate": 0.01, "loss": 2.0595, "step": 15507 }, { "epoch": 1.5917487684729064, "grad_norm": 0.057297565042972565, "learning_rate": 0.01, "loss": 2.0407, "step": 15510 }, { "epoch": 1.5920566502463054, "grad_norm": 0.05323542281985283, "learning_rate": 0.01, "loss": 2.0568, "step": 15513 }, { "epoch": 1.5923645320197044, "grad_norm": 0.039465416222810745, "learning_rate": 0.01, "loss": 2.0455, "step": 15516 }, { "epoch": 1.5926724137931034, "grad_norm": 0.044614970684051514, "learning_rate": 0.01, "loss": 2.0784, "step": 15519 }, { "epoch": 1.5929802955665024, "grad_norm": 0.044074248522520065, "learning_rate": 0.01, "loss": 2.0391, "step": 15522 }, { "epoch": 1.5932881773399015, "grad_norm": 0.04098647087812424, "learning_rate": 0.01, "loss": 2.0512, "step": 15525 }, { "epoch": 1.5935960591133005, "grad_norm": 0.18400810658931732, "learning_rate": 0.01, "loss": 2.0623, "step": 15528 }, { "epoch": 1.5939039408866995, "grad_norm": 0.10264160484075546, "learning_rate": 0.01, "loss": 2.0546, "step": 15531 }, { "epoch": 1.5942118226600985, "grad_norm": 0.10086511820554733, "learning_rate": 0.01, "loss": 2.0671, "step": 15534 }, { "epoch": 1.5945197044334976, "grad_norm": 0.03823179379105568, "learning_rate": 0.01, "loss": 2.0496, "step": 15537 }, { "epoch": 1.5948275862068966, "grad_norm": 0.0449577271938324, "learning_rate": 0.01, "loss": 2.0635, "step": 15540 }, { "epoch": 1.5951354679802956, "grad_norm": 0.04791559278964996, "learning_rate": 0.01, "loss": 2.0596, "step": 15543 }, { "epoch": 1.5954433497536946, "grad_norm": 0.04523475095629692, "learning_rate": 0.01, "loss": 2.0457, "step": 15546 }, { "epoch": 1.5957512315270936, "grad_norm": 0.10654012113809586, "learning_rate": 0.01, "loss": 2.0172, "step": 15549 }, { "epoch": 1.5960591133004927, "grad_norm": 0.06602972745895386, "learning_rate": 0.01, "loss": 2.0565, "step": 15552 }, { "epoch": 1.5963669950738915, "grad_norm": 0.10605626553297043, "learning_rate": 0.01, "loss": 2.0792, "step": 15555 }, { "epoch": 1.5966748768472905, "grad_norm": 0.05995124578475952, "learning_rate": 0.01, "loss": 2.0367, "step": 15558 }, { "epoch": 1.5969827586206895, "grad_norm": 0.05426995828747749, "learning_rate": 0.01, "loss": 2.0458, "step": 15561 }, { "epoch": 1.5972906403940885, "grad_norm": 0.08749561756849289, "learning_rate": 0.01, "loss": 2.0509, "step": 15564 }, { "epoch": 1.5975985221674875, "grad_norm": 0.0735105574131012, "learning_rate": 0.01, "loss": 2.0548, "step": 15567 }, { "epoch": 1.5979064039408866, "grad_norm": 0.05417585372924805, "learning_rate": 0.01, "loss": 2.0304, "step": 15570 }, { "epoch": 1.5982142857142856, "grad_norm": 0.04170646145939827, "learning_rate": 0.01, "loss": 2.0751, "step": 15573 }, { "epoch": 1.5985221674876846, "grad_norm": 0.05886775627732277, "learning_rate": 0.01, "loss": 2.0419, "step": 15576 }, { "epoch": 1.5988300492610836, "grad_norm": 0.04231201857328415, "learning_rate": 0.01, "loss": 2.0531, "step": 15579 }, { "epoch": 1.5991379310344827, "grad_norm": 0.06620585918426514, "learning_rate": 0.01, "loss": 2.043, "step": 15582 }, { "epoch": 1.5994458128078817, "grad_norm": 0.10536913573741913, "learning_rate": 0.01, "loss": 2.0327, "step": 15585 }, { "epoch": 1.5997536945812807, "grad_norm": 0.14467884600162506, "learning_rate": 0.01, "loss": 2.0491, "step": 15588 }, { "epoch": 1.6000615763546797, "grad_norm": 0.11715273559093475, "learning_rate": 0.01, "loss": 2.0362, "step": 15591 }, { "epoch": 1.6003694581280787, "grad_norm": 0.04978121817111969, "learning_rate": 0.01, "loss": 2.0444, "step": 15594 }, { "epoch": 1.6006773399014778, "grad_norm": 0.06248803436756134, "learning_rate": 0.01, "loss": 2.0527, "step": 15597 }, { "epoch": 1.6009852216748768, "grad_norm": 0.05408048257231712, "learning_rate": 0.01, "loss": 2.0519, "step": 15600 }, { "epoch": 1.6012931034482758, "grad_norm": 0.05805948004126549, "learning_rate": 0.01, "loss": 2.0539, "step": 15603 }, { "epoch": 1.6016009852216748, "grad_norm": 0.03809194639325142, "learning_rate": 0.01, "loss": 2.0515, "step": 15606 }, { "epoch": 1.6019088669950738, "grad_norm": 0.07981141656637192, "learning_rate": 0.01, "loss": 2.0238, "step": 15609 }, { "epoch": 1.6022167487684729, "grad_norm": 0.04769575223326683, "learning_rate": 0.01, "loss": 2.0703, "step": 15612 }, { "epoch": 1.6025246305418719, "grad_norm": 0.09913644194602966, "learning_rate": 0.01, "loss": 2.074, "step": 15615 }, { "epoch": 1.602832512315271, "grad_norm": 0.12298569083213806, "learning_rate": 0.01, "loss": 2.0662, "step": 15618 }, { "epoch": 1.60314039408867, "grad_norm": 0.0525309219956398, "learning_rate": 0.01, "loss": 2.0411, "step": 15621 }, { "epoch": 1.603448275862069, "grad_norm": 0.07430320978164673, "learning_rate": 0.01, "loss": 2.0465, "step": 15624 }, { "epoch": 1.603756157635468, "grad_norm": 0.036753058433532715, "learning_rate": 0.01, "loss": 2.0408, "step": 15627 }, { "epoch": 1.604064039408867, "grad_norm": 0.04560523107647896, "learning_rate": 0.01, "loss": 2.074, "step": 15630 }, { "epoch": 1.604371921182266, "grad_norm": 0.07089810073375702, "learning_rate": 0.01, "loss": 2.0599, "step": 15633 }, { "epoch": 1.604679802955665, "grad_norm": 0.10833004862070084, "learning_rate": 0.01, "loss": 2.066, "step": 15636 }, { "epoch": 1.604987684729064, "grad_norm": 0.06033416837453842, "learning_rate": 0.01, "loss": 2.0893, "step": 15639 }, { "epoch": 1.605295566502463, "grad_norm": 0.06819162517786026, "learning_rate": 0.01, "loss": 2.0432, "step": 15642 }, { "epoch": 1.605603448275862, "grad_norm": 0.08949002623558044, "learning_rate": 0.01, "loss": 2.0891, "step": 15645 }, { "epoch": 1.6059113300492611, "grad_norm": 0.04749004542827606, "learning_rate": 0.01, "loss": 2.0434, "step": 15648 }, { "epoch": 1.6062192118226601, "grad_norm": 0.06903103739023209, "learning_rate": 0.01, "loss": 2.0379, "step": 15651 }, { "epoch": 1.6065270935960592, "grad_norm": 0.10074819624423981, "learning_rate": 0.01, "loss": 2.0657, "step": 15654 }, { "epoch": 1.6068349753694582, "grad_norm": 0.0390753298997879, "learning_rate": 0.01, "loss": 2.0186, "step": 15657 }, { "epoch": 1.6071428571428572, "grad_norm": 0.04776669666171074, "learning_rate": 0.01, "loss": 2.0435, "step": 15660 }, { "epoch": 1.6074507389162562, "grad_norm": 0.1191340908408165, "learning_rate": 0.01, "loss": 2.0519, "step": 15663 }, { "epoch": 1.6077586206896552, "grad_norm": 0.08326657861471176, "learning_rate": 0.01, "loss": 2.0443, "step": 15666 }, { "epoch": 1.6080665024630543, "grad_norm": 0.044734589755535126, "learning_rate": 0.01, "loss": 2.0626, "step": 15669 }, { "epoch": 1.6083743842364533, "grad_norm": 0.047262392938137054, "learning_rate": 0.01, "loss": 2.0351, "step": 15672 }, { "epoch": 1.6086822660098523, "grad_norm": 0.0908563882112503, "learning_rate": 0.01, "loss": 2.0655, "step": 15675 }, { "epoch": 1.6089901477832513, "grad_norm": 0.06681264191865921, "learning_rate": 0.01, "loss": 2.0606, "step": 15678 }, { "epoch": 1.6092980295566504, "grad_norm": 0.09569018334150314, "learning_rate": 0.01, "loss": 2.0454, "step": 15681 }, { "epoch": 1.6096059113300494, "grad_norm": 0.04303963482379913, "learning_rate": 0.01, "loss": 2.0296, "step": 15684 }, { "epoch": 1.6099137931034484, "grad_norm": 0.09924867749214172, "learning_rate": 0.01, "loss": 2.0578, "step": 15687 }, { "epoch": 1.6102216748768474, "grad_norm": 0.041328392922878265, "learning_rate": 0.01, "loss": 2.0538, "step": 15690 }, { "epoch": 1.6105295566502464, "grad_norm": 0.056367840617895126, "learning_rate": 0.01, "loss": 2.0746, "step": 15693 }, { "epoch": 1.6108374384236455, "grad_norm": 0.06074264645576477, "learning_rate": 0.01, "loss": 2.0707, "step": 15696 }, { "epoch": 1.6111453201970445, "grad_norm": 0.06541740894317627, "learning_rate": 0.01, "loss": 2.0615, "step": 15699 }, { "epoch": 1.6114532019704435, "grad_norm": 0.06279835850000381, "learning_rate": 0.01, "loss": 2.0484, "step": 15702 }, { "epoch": 1.6117610837438425, "grad_norm": 0.03825109452009201, "learning_rate": 0.01, "loss": 2.0317, "step": 15705 }, { "epoch": 1.6120689655172413, "grad_norm": 0.03792817145586014, "learning_rate": 0.01, "loss": 2.0246, "step": 15708 }, { "epoch": 1.6123768472906403, "grad_norm": 0.05229473114013672, "learning_rate": 0.01, "loss": 2.0765, "step": 15711 }, { "epoch": 1.6126847290640394, "grad_norm": 0.11285384744405746, "learning_rate": 0.01, "loss": 2.0392, "step": 15714 }, { "epoch": 1.6129926108374384, "grad_norm": 0.07333546876907349, "learning_rate": 0.01, "loss": 2.05, "step": 15717 }, { "epoch": 1.6133004926108374, "grad_norm": 0.07698936760425568, "learning_rate": 0.01, "loss": 2.0757, "step": 15720 }, { "epoch": 1.6136083743842364, "grad_norm": 0.06517963856458664, "learning_rate": 0.01, "loss": 2.032, "step": 15723 }, { "epoch": 1.6139162561576355, "grad_norm": 0.07242800295352936, "learning_rate": 0.01, "loss": 2.0398, "step": 15726 }, { "epoch": 1.6142241379310345, "grad_norm": 0.03956649452447891, "learning_rate": 0.01, "loss": 2.0437, "step": 15729 }, { "epoch": 1.6145320197044335, "grad_norm": 0.10249898582696915, "learning_rate": 0.01, "loss": 2.0716, "step": 15732 }, { "epoch": 1.6148399014778325, "grad_norm": 0.09716839343309402, "learning_rate": 0.01, "loss": 2.0288, "step": 15735 }, { "epoch": 1.6151477832512315, "grad_norm": 0.0809134840965271, "learning_rate": 0.01, "loss": 2.0553, "step": 15738 }, { "epoch": 1.6154556650246306, "grad_norm": 0.07891330122947693, "learning_rate": 0.01, "loss": 2.0622, "step": 15741 }, { "epoch": 1.6157635467980296, "grad_norm": 0.06289231032133102, "learning_rate": 0.01, "loss": 2.0382, "step": 15744 }, { "epoch": 1.6160714285714286, "grad_norm": 0.032251689583063126, "learning_rate": 0.01, "loss": 2.049, "step": 15747 }, { "epoch": 1.6163793103448276, "grad_norm": 0.032203931361436844, "learning_rate": 0.01, "loss": 2.0474, "step": 15750 }, { "epoch": 1.6166871921182266, "grad_norm": 0.042572617530822754, "learning_rate": 0.01, "loss": 2.0604, "step": 15753 }, { "epoch": 1.6169950738916257, "grad_norm": 0.06869769096374512, "learning_rate": 0.01, "loss": 2.0643, "step": 15756 }, { "epoch": 1.6173029556650245, "grad_norm": 0.09649953991174698, "learning_rate": 0.01, "loss": 2.0811, "step": 15759 }, { "epoch": 1.6176108374384235, "grad_norm": 0.060255225747823715, "learning_rate": 0.01, "loss": 2.0543, "step": 15762 }, { "epoch": 1.6179187192118225, "grad_norm": 0.0548517182469368, "learning_rate": 0.01, "loss": 2.0427, "step": 15765 }, { "epoch": 1.6182266009852215, "grad_norm": 0.09392546862363815, "learning_rate": 0.01, "loss": 2.0722, "step": 15768 }, { "epoch": 1.6185344827586206, "grad_norm": 0.052100926637649536, "learning_rate": 0.01, "loss": 2.0469, "step": 15771 }, { "epoch": 1.6188423645320196, "grad_norm": 0.05099212005734444, "learning_rate": 0.01, "loss": 2.0589, "step": 15774 }, { "epoch": 1.6191502463054186, "grad_norm": 0.0486266165971756, "learning_rate": 0.01, "loss": 2.0308, "step": 15777 }, { "epoch": 1.6194581280788176, "grad_norm": 0.044072605669498444, "learning_rate": 0.01, "loss": 2.0877, "step": 15780 }, { "epoch": 1.6197660098522166, "grad_norm": 0.09196856617927551, "learning_rate": 0.01, "loss": 2.0224, "step": 15783 }, { "epoch": 1.6200738916256157, "grad_norm": 0.05948984995484352, "learning_rate": 0.01, "loss": 2.0425, "step": 15786 }, { "epoch": 1.6203817733990147, "grad_norm": 0.043075162917375565, "learning_rate": 0.01, "loss": 2.058, "step": 15789 }, { "epoch": 1.6206896551724137, "grad_norm": 0.06739038228988647, "learning_rate": 0.01, "loss": 2.0356, "step": 15792 }, { "epoch": 1.6209975369458127, "grad_norm": 0.05961238220334053, "learning_rate": 0.01, "loss": 2.0492, "step": 15795 }, { "epoch": 1.6213054187192117, "grad_norm": 0.06527238339185715, "learning_rate": 0.01, "loss": 2.0227, "step": 15798 }, { "epoch": 1.6216133004926108, "grad_norm": 0.09234929084777832, "learning_rate": 0.01, "loss": 2.039, "step": 15801 }, { "epoch": 1.6219211822660098, "grad_norm": 0.08050446212291718, "learning_rate": 0.01, "loss": 2.0769, "step": 15804 }, { "epoch": 1.6222290640394088, "grad_norm": 0.06419754028320312, "learning_rate": 0.01, "loss": 2.0613, "step": 15807 }, { "epoch": 1.6225369458128078, "grad_norm": 0.06302323937416077, "learning_rate": 0.01, "loss": 2.0535, "step": 15810 }, { "epoch": 1.6228448275862069, "grad_norm": 0.051602717489004135, "learning_rate": 0.01, "loss": 2.0462, "step": 15813 }, { "epoch": 1.6231527093596059, "grad_norm": 0.12424405664205551, "learning_rate": 0.01, "loss": 2.0562, "step": 15816 }, { "epoch": 1.623460591133005, "grad_norm": 0.10444232821464539, "learning_rate": 0.01, "loss": 2.0527, "step": 15819 }, { "epoch": 1.623768472906404, "grad_norm": 0.06170908361673355, "learning_rate": 0.01, "loss": 2.0456, "step": 15822 }, { "epoch": 1.624076354679803, "grad_norm": 0.05145244672894478, "learning_rate": 0.01, "loss": 2.0368, "step": 15825 }, { "epoch": 1.624384236453202, "grad_norm": 0.0459282286465168, "learning_rate": 0.01, "loss": 2.0547, "step": 15828 }, { "epoch": 1.624692118226601, "grad_norm": 0.05250949412584305, "learning_rate": 0.01, "loss": 2.0475, "step": 15831 }, { "epoch": 1.625, "grad_norm": 0.03222022205591202, "learning_rate": 0.01, "loss": 2.0347, "step": 15834 }, { "epoch": 1.625307881773399, "grad_norm": 0.05849120765924454, "learning_rate": 0.01, "loss": 2.0351, "step": 15837 }, { "epoch": 1.625615763546798, "grad_norm": 0.04638088122010231, "learning_rate": 0.01, "loss": 2.0222, "step": 15840 }, { "epoch": 1.625923645320197, "grad_norm": 0.046597037464380264, "learning_rate": 0.01, "loss": 2.0577, "step": 15843 }, { "epoch": 1.626231527093596, "grad_norm": 0.10477445274591446, "learning_rate": 0.01, "loss": 2.0528, "step": 15846 }, { "epoch": 1.626539408866995, "grad_norm": 0.03439783677458763, "learning_rate": 0.01, "loss": 2.0631, "step": 15849 }, { "epoch": 1.6268472906403941, "grad_norm": 0.05810544639825821, "learning_rate": 0.01, "loss": 2.0531, "step": 15852 }, { "epoch": 1.6271551724137931, "grad_norm": 0.04557522386312485, "learning_rate": 0.01, "loss": 2.0582, "step": 15855 }, { "epoch": 1.6274630541871922, "grad_norm": 0.04236530885100365, "learning_rate": 0.01, "loss": 2.0531, "step": 15858 }, { "epoch": 1.6277709359605912, "grad_norm": 0.04338948428630829, "learning_rate": 0.01, "loss": 2.0584, "step": 15861 }, { "epoch": 1.6280788177339902, "grad_norm": 0.039782583713531494, "learning_rate": 0.01, "loss": 2.0699, "step": 15864 }, { "epoch": 1.6283866995073892, "grad_norm": 0.06858891248703003, "learning_rate": 0.01, "loss": 2.0477, "step": 15867 }, { "epoch": 1.6286945812807883, "grad_norm": 0.0510399155318737, "learning_rate": 0.01, "loss": 2.0692, "step": 15870 }, { "epoch": 1.6290024630541873, "grad_norm": 0.12568604946136475, "learning_rate": 0.01, "loss": 2.0597, "step": 15873 }, { "epoch": 1.6293103448275863, "grad_norm": 0.09245727956295013, "learning_rate": 0.01, "loss": 2.0365, "step": 15876 }, { "epoch": 1.6296182266009853, "grad_norm": 0.05763734132051468, "learning_rate": 0.01, "loss": 2.0787, "step": 15879 }, { "epoch": 1.6299261083743843, "grad_norm": 0.06099852919578552, "learning_rate": 0.01, "loss": 2.0603, "step": 15882 }, { "epoch": 1.6302339901477834, "grad_norm": 0.05738021805882454, "learning_rate": 0.01, "loss": 2.0405, "step": 15885 }, { "epoch": 1.6305418719211824, "grad_norm": 0.04953853040933609, "learning_rate": 0.01, "loss": 2.0622, "step": 15888 }, { "epoch": 1.6308497536945814, "grad_norm": 0.08572196215391159, "learning_rate": 0.01, "loss": 2.0618, "step": 15891 }, { "epoch": 1.6311576354679804, "grad_norm": 0.09245479106903076, "learning_rate": 0.01, "loss": 2.0453, "step": 15894 }, { "epoch": 1.6314655172413794, "grad_norm": 0.057964712381362915, "learning_rate": 0.01, "loss": 2.0186, "step": 15897 }, { "epoch": 1.6317733990147785, "grad_norm": 0.05189305916428566, "learning_rate": 0.01, "loss": 2.0615, "step": 15900 }, { "epoch": 1.6320812807881775, "grad_norm": 0.07327884435653687, "learning_rate": 0.01, "loss": 2.0175, "step": 15903 }, { "epoch": 1.6323891625615765, "grad_norm": 0.07089177519083023, "learning_rate": 0.01, "loss": 2.0475, "step": 15906 }, { "epoch": 1.6326970443349755, "grad_norm": 0.09783073514699936, "learning_rate": 0.01, "loss": 2.051, "step": 15909 }, { "epoch": 1.6330049261083743, "grad_norm": 0.06617991626262665, "learning_rate": 0.01, "loss": 2.0408, "step": 15912 }, { "epoch": 1.6333128078817734, "grad_norm": 0.10033921152353287, "learning_rate": 0.01, "loss": 2.0308, "step": 15915 }, { "epoch": 1.6336206896551724, "grad_norm": 0.054432835429906845, "learning_rate": 0.01, "loss": 2.0404, "step": 15918 }, { "epoch": 1.6339285714285714, "grad_norm": 0.056940387934446335, "learning_rate": 0.01, "loss": 2.0566, "step": 15921 }, { "epoch": 1.6342364532019704, "grad_norm": 0.12047278136014938, "learning_rate": 0.01, "loss": 2.0464, "step": 15924 }, { "epoch": 1.6345443349753694, "grad_norm": 0.04637087881565094, "learning_rate": 0.01, "loss": 2.0514, "step": 15927 }, { "epoch": 1.6348522167487685, "grad_norm": 0.03925006836652756, "learning_rate": 0.01, "loss": 2.0536, "step": 15930 }, { "epoch": 1.6351600985221675, "grad_norm": 0.04180562496185303, "learning_rate": 0.01, "loss": 2.0206, "step": 15933 }, { "epoch": 1.6354679802955665, "grad_norm": 0.08031155914068222, "learning_rate": 0.01, "loss": 2.0509, "step": 15936 }, { "epoch": 1.6357758620689655, "grad_norm": 0.0812869518995285, "learning_rate": 0.01, "loss": 2.021, "step": 15939 }, { "epoch": 1.6360837438423645, "grad_norm": 0.07887094467878342, "learning_rate": 0.01, "loss": 2.0554, "step": 15942 }, { "epoch": 1.6363916256157636, "grad_norm": 0.12604457139968872, "learning_rate": 0.01, "loss": 2.0236, "step": 15945 }, { "epoch": 1.6366995073891626, "grad_norm": 0.1262006163597107, "learning_rate": 0.01, "loss": 2.0806, "step": 15948 }, { "epoch": 1.6370073891625616, "grad_norm": 0.07335629314184189, "learning_rate": 0.01, "loss": 2.0339, "step": 15951 }, { "epoch": 1.6373152709359606, "grad_norm": 0.043172985315322876, "learning_rate": 0.01, "loss": 2.0455, "step": 15954 }, { "epoch": 1.6376231527093597, "grad_norm": 0.07475942373275757, "learning_rate": 0.01, "loss": 2.0842, "step": 15957 }, { "epoch": 1.6379310344827587, "grad_norm": 0.06113087013363838, "learning_rate": 0.01, "loss": 2.0526, "step": 15960 }, { "epoch": 1.6382389162561575, "grad_norm": 0.08709672093391418, "learning_rate": 0.01, "loss": 2.0886, "step": 15963 }, { "epoch": 1.6385467980295565, "grad_norm": 0.05810529738664627, "learning_rate": 0.01, "loss": 2.043, "step": 15966 }, { "epoch": 1.6388546798029555, "grad_norm": 0.0831620916724205, "learning_rate": 0.01, "loss": 2.0599, "step": 15969 }, { "epoch": 1.6391625615763545, "grad_norm": 0.040577951818704605, "learning_rate": 0.01, "loss": 2.0676, "step": 15972 }, { "epoch": 1.6394704433497536, "grad_norm": 0.03428385406732559, "learning_rate": 0.01, "loss": 2.0491, "step": 15975 }, { "epoch": 1.6397783251231526, "grad_norm": 0.04800771176815033, "learning_rate": 0.01, "loss": 2.0371, "step": 15978 }, { "epoch": 1.6400862068965516, "grad_norm": 0.05769934877753258, "learning_rate": 0.01, "loss": 2.0468, "step": 15981 }, { "epoch": 1.6403940886699506, "grad_norm": 0.08842117339372635, "learning_rate": 0.01, "loss": 2.037, "step": 15984 }, { "epoch": 1.6407019704433496, "grad_norm": 0.09740414470434189, "learning_rate": 0.01, "loss": 2.0423, "step": 15987 }, { "epoch": 1.6410098522167487, "grad_norm": 0.11128890514373779, "learning_rate": 0.01, "loss": 2.0423, "step": 15990 }, { "epoch": 1.6413177339901477, "grad_norm": 0.03690354898571968, "learning_rate": 0.01, "loss": 2.0412, "step": 15993 }, { "epoch": 1.6416256157635467, "grad_norm": 0.07311075925827026, "learning_rate": 0.01, "loss": 2.036, "step": 15996 }, { "epoch": 1.6419334975369457, "grad_norm": 0.045825451612472534, "learning_rate": 0.01, "loss": 2.0491, "step": 15999 }, { "epoch": 1.6422413793103448, "grad_norm": 0.09123300760984421, "learning_rate": 0.01, "loss": 2.0574, "step": 16002 }, { "epoch": 1.6425492610837438, "grad_norm": 0.0702185183763504, "learning_rate": 0.01, "loss": 2.0715, "step": 16005 }, { "epoch": 1.6428571428571428, "grad_norm": 0.0355604812502861, "learning_rate": 0.01, "loss": 2.0461, "step": 16008 }, { "epoch": 1.6431650246305418, "grad_norm": 0.03151632100343704, "learning_rate": 0.01, "loss": 2.0205, "step": 16011 }, { "epoch": 1.6434729064039408, "grad_norm": 0.04302441328763962, "learning_rate": 0.01, "loss": 2.0824, "step": 16014 }, { "epoch": 1.6437807881773399, "grad_norm": 0.06012306734919548, "learning_rate": 0.01, "loss": 2.0494, "step": 16017 }, { "epoch": 1.6440886699507389, "grad_norm": 0.04698712378740311, "learning_rate": 0.01, "loss": 2.0364, "step": 16020 }, { "epoch": 1.644396551724138, "grad_norm": 0.03930363059043884, "learning_rate": 0.01, "loss": 2.0612, "step": 16023 }, { "epoch": 1.644704433497537, "grad_norm": 0.0881473496556282, "learning_rate": 0.01, "loss": 2.0724, "step": 16026 }, { "epoch": 1.645012315270936, "grad_norm": 0.04207085818052292, "learning_rate": 0.01, "loss": 2.0524, "step": 16029 }, { "epoch": 1.645320197044335, "grad_norm": 0.04729215428233147, "learning_rate": 0.01, "loss": 2.0538, "step": 16032 }, { "epoch": 1.645628078817734, "grad_norm": 0.050990305840969086, "learning_rate": 0.01, "loss": 2.0473, "step": 16035 }, { "epoch": 1.645935960591133, "grad_norm": 0.05049813538789749, "learning_rate": 0.01, "loss": 2.0609, "step": 16038 }, { "epoch": 1.646243842364532, "grad_norm": 0.07787630707025528, "learning_rate": 0.01, "loss": 2.0505, "step": 16041 }, { "epoch": 1.646551724137931, "grad_norm": 0.06656081229448318, "learning_rate": 0.01, "loss": 2.0365, "step": 16044 }, { "epoch": 1.64685960591133, "grad_norm": 0.08293109387159348, "learning_rate": 0.01, "loss": 2.062, "step": 16047 }, { "epoch": 1.647167487684729, "grad_norm": 0.0775810182094574, "learning_rate": 0.01, "loss": 2.0466, "step": 16050 }, { "epoch": 1.6474753694581281, "grad_norm": 0.07917825132608414, "learning_rate": 0.01, "loss": 2.067, "step": 16053 }, { "epoch": 1.6477832512315271, "grad_norm": 0.07658717036247253, "learning_rate": 0.01, "loss": 2.0323, "step": 16056 }, { "epoch": 1.6480911330049262, "grad_norm": 0.07735300809144974, "learning_rate": 0.01, "loss": 2.0481, "step": 16059 }, { "epoch": 1.6483990147783252, "grad_norm": 0.07964644581079483, "learning_rate": 0.01, "loss": 2.0469, "step": 16062 }, { "epoch": 1.6487068965517242, "grad_norm": 0.0601799339056015, "learning_rate": 0.01, "loss": 2.0453, "step": 16065 }, { "epoch": 1.6490147783251232, "grad_norm": 0.1039920225739479, "learning_rate": 0.01, "loss": 2.0474, "step": 16068 }, { "epoch": 1.6493226600985222, "grad_norm": 0.055755455046892166, "learning_rate": 0.01, "loss": 2.0615, "step": 16071 }, { "epoch": 1.6496305418719213, "grad_norm": 0.0998646542429924, "learning_rate": 0.01, "loss": 2.0675, "step": 16074 }, { "epoch": 1.6499384236453203, "grad_norm": 0.04582648724317551, "learning_rate": 0.01, "loss": 2.0277, "step": 16077 }, { "epoch": 1.6502463054187193, "grad_norm": 0.08638078719377518, "learning_rate": 0.01, "loss": 2.0473, "step": 16080 }, { "epoch": 1.6505541871921183, "grad_norm": 0.053813617676496506, "learning_rate": 0.01, "loss": 2.0488, "step": 16083 }, { "epoch": 1.6508620689655173, "grad_norm": 0.08186789602041245, "learning_rate": 0.01, "loss": 2.07, "step": 16086 }, { "epoch": 1.6511699507389164, "grad_norm": 0.037794895470142365, "learning_rate": 0.01, "loss": 2.0554, "step": 16089 }, { "epoch": 1.6514778325123154, "grad_norm": 0.1052238717675209, "learning_rate": 0.01, "loss": 2.0614, "step": 16092 }, { "epoch": 1.6517857142857144, "grad_norm": 0.07596205919981003, "learning_rate": 0.01, "loss": 2.0358, "step": 16095 }, { "epoch": 1.6520935960591134, "grad_norm": 0.047295670956373215, "learning_rate": 0.01, "loss": 2.0488, "step": 16098 }, { "epoch": 1.6524014778325125, "grad_norm": 0.05572659894824028, "learning_rate": 0.01, "loss": 2.0468, "step": 16101 }, { "epoch": 1.6527093596059115, "grad_norm": 0.0429069958627224, "learning_rate": 0.01, "loss": 2.0681, "step": 16104 }, { "epoch": 1.6530172413793105, "grad_norm": 0.055060967803001404, "learning_rate": 0.01, "loss": 2.0347, "step": 16107 }, { "epoch": 1.6533251231527095, "grad_norm": 0.05243745073676109, "learning_rate": 0.01, "loss": 2.0696, "step": 16110 }, { "epoch": 1.6536330049261085, "grad_norm": 0.052228983491659164, "learning_rate": 0.01, "loss": 2.06, "step": 16113 }, { "epoch": 1.6539408866995073, "grad_norm": 0.065925233066082, "learning_rate": 0.01, "loss": 2.0707, "step": 16116 }, { "epoch": 1.6542487684729064, "grad_norm": 0.05819106101989746, "learning_rate": 0.01, "loss": 2.0137, "step": 16119 }, { "epoch": 1.6545566502463054, "grad_norm": 0.04320794716477394, "learning_rate": 0.01, "loss": 2.0691, "step": 16122 }, { "epoch": 1.6548645320197044, "grad_norm": 0.04202846437692642, "learning_rate": 0.01, "loss": 2.0456, "step": 16125 }, { "epoch": 1.6551724137931034, "grad_norm": 0.12747296690940857, "learning_rate": 0.01, "loss": 2.0419, "step": 16128 }, { "epoch": 1.6554802955665024, "grad_norm": 0.07199030369520187, "learning_rate": 0.01, "loss": 2.0347, "step": 16131 }, { "epoch": 1.6557881773399015, "grad_norm": 0.085335373878479, "learning_rate": 0.01, "loss": 2.0383, "step": 16134 }, { "epoch": 1.6560960591133005, "grad_norm": 0.061818841844797134, "learning_rate": 0.01, "loss": 2.0631, "step": 16137 }, { "epoch": 1.6564039408866995, "grad_norm": 0.06255804747343063, "learning_rate": 0.01, "loss": 2.0492, "step": 16140 }, { "epoch": 1.6567118226600985, "grad_norm": 0.08308485150337219, "learning_rate": 0.01, "loss": 2.0814, "step": 16143 }, { "epoch": 1.6570197044334976, "grad_norm": 0.06358073651790619, "learning_rate": 0.01, "loss": 2.048, "step": 16146 }, { "epoch": 1.6573275862068966, "grad_norm": 0.085427425801754, "learning_rate": 0.01, "loss": 2.0433, "step": 16149 }, { "epoch": 1.6576354679802956, "grad_norm": 0.043243568390607834, "learning_rate": 0.01, "loss": 2.0432, "step": 16152 }, { "epoch": 1.6579433497536946, "grad_norm": 0.06593325734138489, "learning_rate": 0.01, "loss": 2.0469, "step": 16155 }, { "epoch": 1.6582512315270936, "grad_norm": 0.14644569158554077, "learning_rate": 0.01, "loss": 2.0689, "step": 16158 }, { "epoch": 1.6585591133004927, "grad_norm": 0.1211152896285057, "learning_rate": 0.01, "loss": 2.0505, "step": 16161 }, { "epoch": 1.6588669950738915, "grad_norm": 0.11020830273628235, "learning_rate": 0.01, "loss": 2.0572, "step": 16164 }, { "epoch": 1.6591748768472905, "grad_norm": 0.08850467950105667, "learning_rate": 0.01, "loss": 2.0424, "step": 16167 }, { "epoch": 1.6594827586206895, "grad_norm": 0.050562698394060135, "learning_rate": 0.01, "loss": 2.0542, "step": 16170 }, { "epoch": 1.6597906403940885, "grad_norm": 0.048076871782541275, "learning_rate": 0.01, "loss": 2.0433, "step": 16173 }, { "epoch": 1.6600985221674875, "grad_norm": 0.03727034851908684, "learning_rate": 0.01, "loss": 2.0333, "step": 16176 }, { "epoch": 1.6604064039408866, "grad_norm": 0.048614371567964554, "learning_rate": 0.01, "loss": 2.0552, "step": 16179 }, { "epoch": 1.6607142857142856, "grad_norm": 0.05649641901254654, "learning_rate": 0.01, "loss": 2.0536, "step": 16182 }, { "epoch": 1.6610221674876846, "grad_norm": 0.05329003930091858, "learning_rate": 0.01, "loss": 2.0386, "step": 16185 }, { "epoch": 1.6613300492610836, "grad_norm": 0.06444583833217621, "learning_rate": 0.01, "loss": 2.055, "step": 16188 }, { "epoch": 1.6616379310344827, "grad_norm": 0.045777902007102966, "learning_rate": 0.01, "loss": 2.0476, "step": 16191 }, { "epoch": 1.6619458128078817, "grad_norm": 0.04831868037581444, "learning_rate": 0.01, "loss": 2.0582, "step": 16194 }, { "epoch": 1.6622536945812807, "grad_norm": 0.10648196935653687, "learning_rate": 0.01, "loss": 2.0579, "step": 16197 }, { "epoch": 1.6625615763546797, "grad_norm": 0.08369257301092148, "learning_rate": 0.01, "loss": 2.0505, "step": 16200 }, { "epoch": 1.6628694581280787, "grad_norm": 0.13716475665569305, "learning_rate": 0.01, "loss": 2.0383, "step": 16203 }, { "epoch": 1.6631773399014778, "grad_norm": 0.05025027319788933, "learning_rate": 0.01, "loss": 2.0549, "step": 16206 }, { "epoch": 1.6634852216748768, "grad_norm": 0.03850054368376732, "learning_rate": 0.01, "loss": 2.0412, "step": 16209 }, { "epoch": 1.6637931034482758, "grad_norm": 0.046656832098960876, "learning_rate": 0.01, "loss": 2.0595, "step": 16212 }, { "epoch": 1.6641009852216748, "grad_norm": 0.03826647624373436, "learning_rate": 0.01, "loss": 2.0352, "step": 16215 }, { "epoch": 1.6644088669950738, "grad_norm": 0.061087023466825485, "learning_rate": 0.01, "loss": 2.0357, "step": 16218 }, { "epoch": 1.6647167487684729, "grad_norm": 0.03787006065249443, "learning_rate": 0.01, "loss": 2.0226, "step": 16221 }, { "epoch": 1.6650246305418719, "grad_norm": 0.09619265049695969, "learning_rate": 0.01, "loss": 2.0399, "step": 16224 }, { "epoch": 1.665332512315271, "grad_norm": 0.04012330621480942, "learning_rate": 0.01, "loss": 2.044, "step": 16227 }, { "epoch": 1.66564039408867, "grad_norm": 0.062126293778419495, "learning_rate": 0.01, "loss": 2.0726, "step": 16230 }, { "epoch": 1.665948275862069, "grad_norm": 0.050277624279260635, "learning_rate": 0.01, "loss": 2.0219, "step": 16233 }, { "epoch": 1.666256157635468, "grad_norm": 0.03983129933476448, "learning_rate": 0.01, "loss": 2.0554, "step": 16236 }, { "epoch": 1.666564039408867, "grad_norm": 0.13119915127754211, "learning_rate": 0.01, "loss": 2.0682, "step": 16239 }, { "epoch": 1.666871921182266, "grad_norm": 0.0525536946952343, "learning_rate": 0.01, "loss": 2.0524, "step": 16242 }, { "epoch": 1.667179802955665, "grad_norm": 0.056762780994176865, "learning_rate": 0.01, "loss": 2.0293, "step": 16245 }, { "epoch": 1.667487684729064, "grad_norm": 0.08652041852474213, "learning_rate": 0.01, "loss": 2.0574, "step": 16248 }, { "epoch": 1.667795566502463, "grad_norm": 0.14455944299697876, "learning_rate": 0.01, "loss": 2.0406, "step": 16251 }, { "epoch": 1.668103448275862, "grad_norm": 0.03951118513941765, "learning_rate": 0.01, "loss": 2.0368, "step": 16254 }, { "epoch": 1.6684113300492611, "grad_norm": 0.040585123002529144, "learning_rate": 0.01, "loss": 2.017, "step": 16257 }, { "epoch": 1.6687192118226601, "grad_norm": 0.05393810570240021, "learning_rate": 0.01, "loss": 2.0679, "step": 16260 }, { "epoch": 1.6690270935960592, "grad_norm": 0.050093088299036026, "learning_rate": 0.01, "loss": 2.0546, "step": 16263 }, { "epoch": 1.6693349753694582, "grad_norm": 0.04196159914135933, "learning_rate": 0.01, "loss": 2.0488, "step": 16266 }, { "epoch": 1.6696428571428572, "grad_norm": 0.03978092968463898, "learning_rate": 0.01, "loss": 2.0453, "step": 16269 }, { "epoch": 1.6699507389162562, "grad_norm": 0.05054232105612755, "learning_rate": 0.01, "loss": 2.0337, "step": 16272 }, { "epoch": 1.6702586206896552, "grad_norm": 0.0746975764632225, "learning_rate": 0.01, "loss": 2.0636, "step": 16275 }, { "epoch": 1.6705665024630543, "grad_norm": 0.05685516446828842, "learning_rate": 0.01, "loss": 2.0591, "step": 16278 }, { "epoch": 1.6708743842364533, "grad_norm": 0.031971871852874756, "learning_rate": 0.01, "loss": 2.0657, "step": 16281 }, { "epoch": 1.6711822660098523, "grad_norm": 0.03947863727807999, "learning_rate": 0.01, "loss": 2.0333, "step": 16284 }, { "epoch": 1.6714901477832513, "grad_norm": 0.11271070688962936, "learning_rate": 0.01, "loss": 2.0421, "step": 16287 }, { "epoch": 1.6717980295566504, "grad_norm": 0.05308755114674568, "learning_rate": 0.01, "loss": 2.0536, "step": 16290 }, { "epoch": 1.6721059113300494, "grad_norm": 0.042826078832149506, "learning_rate": 0.01, "loss": 2.0694, "step": 16293 }, { "epoch": 1.6724137931034484, "grad_norm": 0.0458630695939064, "learning_rate": 0.01, "loss": 2.0312, "step": 16296 }, { "epoch": 1.6727216748768474, "grad_norm": 0.05401900038123131, "learning_rate": 0.01, "loss": 2.0613, "step": 16299 }, { "epoch": 1.6730295566502464, "grad_norm": 0.05380195751786232, "learning_rate": 0.01, "loss": 2.0336, "step": 16302 }, { "epoch": 1.6733374384236455, "grad_norm": 0.038716450333595276, "learning_rate": 0.01, "loss": 2.0548, "step": 16305 }, { "epoch": 1.6736453201970445, "grad_norm": 0.04034694656729698, "learning_rate": 0.01, "loss": 2.0421, "step": 16308 }, { "epoch": 1.6739532019704435, "grad_norm": 0.06753403693437576, "learning_rate": 0.01, "loss": 2.0324, "step": 16311 }, { "epoch": 1.6742610837438425, "grad_norm": 0.10001173615455627, "learning_rate": 0.01, "loss": 2.0467, "step": 16314 }, { "epoch": 1.6745689655172413, "grad_norm": 0.04366351664066315, "learning_rate": 0.01, "loss": 2.0622, "step": 16317 }, { "epoch": 1.6748768472906403, "grad_norm": 0.07137630879878998, "learning_rate": 0.01, "loss": 2.0333, "step": 16320 }, { "epoch": 1.6751847290640394, "grad_norm": 0.049938492476940155, "learning_rate": 0.01, "loss": 2.0426, "step": 16323 }, { "epoch": 1.6754926108374384, "grad_norm": 0.03337172046303749, "learning_rate": 0.01, "loss": 2.0462, "step": 16326 }, { "epoch": 1.6758004926108374, "grad_norm": 0.07407473772764206, "learning_rate": 0.01, "loss": 2.0705, "step": 16329 }, { "epoch": 1.6761083743842364, "grad_norm": 0.07006946206092834, "learning_rate": 0.01, "loss": 2.0383, "step": 16332 }, { "epoch": 1.6764162561576355, "grad_norm": 0.05342825874686241, "learning_rate": 0.01, "loss": 2.0481, "step": 16335 }, { "epoch": 1.6767241379310345, "grad_norm": 0.052405234426259995, "learning_rate": 0.01, "loss": 2.0138, "step": 16338 }, { "epoch": 1.6770320197044335, "grad_norm": 0.20231324434280396, "learning_rate": 0.01, "loss": 2.0472, "step": 16341 }, { "epoch": 1.6773399014778325, "grad_norm": 0.07893595844507217, "learning_rate": 0.01, "loss": 2.0415, "step": 16344 }, { "epoch": 1.6776477832512315, "grad_norm": 0.06872416287660599, "learning_rate": 0.01, "loss": 2.0376, "step": 16347 }, { "epoch": 1.6779556650246306, "grad_norm": 0.041687123477458954, "learning_rate": 0.01, "loss": 2.0442, "step": 16350 }, { "epoch": 1.6782635467980296, "grad_norm": 0.04184873029589653, "learning_rate": 0.01, "loss": 2.0769, "step": 16353 }, { "epoch": 1.6785714285714286, "grad_norm": 0.036598458886146545, "learning_rate": 0.01, "loss": 2.0255, "step": 16356 }, { "epoch": 1.6788793103448276, "grad_norm": 0.062203384935855865, "learning_rate": 0.01, "loss": 2.0582, "step": 16359 }, { "epoch": 1.6791871921182266, "grad_norm": 0.04513971135020256, "learning_rate": 0.01, "loss": 2.0475, "step": 16362 }, { "epoch": 1.6794950738916257, "grad_norm": 0.043875399976968765, "learning_rate": 0.01, "loss": 2.0455, "step": 16365 }, { "epoch": 1.6798029556650245, "grad_norm": 0.030207300558686256, "learning_rate": 0.01, "loss": 2.0733, "step": 16368 }, { "epoch": 1.6801108374384235, "grad_norm": 0.07749854028224945, "learning_rate": 0.01, "loss": 2.0402, "step": 16371 }, { "epoch": 1.6804187192118225, "grad_norm": 0.10269973427057266, "learning_rate": 0.01, "loss": 2.0342, "step": 16374 }, { "epoch": 1.6807266009852215, "grad_norm": 0.043558500707149506, "learning_rate": 0.01, "loss": 2.0429, "step": 16377 }, { "epoch": 1.6810344827586206, "grad_norm": 0.0490686409175396, "learning_rate": 0.01, "loss": 2.0381, "step": 16380 }, { "epoch": 1.6813423645320196, "grad_norm": 0.062107689678668976, "learning_rate": 0.01, "loss": 2.0592, "step": 16383 }, { "epoch": 1.6816502463054186, "grad_norm": 0.0856776013970375, "learning_rate": 0.01, "loss": 2.0666, "step": 16386 }, { "epoch": 1.6819581280788176, "grad_norm": 0.11694356054067612, "learning_rate": 0.01, "loss": 2.0545, "step": 16389 }, { "epoch": 1.6822660098522166, "grad_norm": 0.07279752194881439, "learning_rate": 0.01, "loss": 2.0348, "step": 16392 }, { "epoch": 1.6825738916256157, "grad_norm": 0.06813056766986847, "learning_rate": 0.01, "loss": 2.0549, "step": 16395 }, { "epoch": 1.6828817733990147, "grad_norm": 0.045916907489299774, "learning_rate": 0.01, "loss": 2.0714, "step": 16398 }, { "epoch": 1.6831896551724137, "grad_norm": 0.04464447498321533, "learning_rate": 0.01, "loss": 2.0655, "step": 16401 }, { "epoch": 1.6834975369458127, "grad_norm": 0.04815223440527916, "learning_rate": 0.01, "loss": 2.0633, "step": 16404 }, { "epoch": 1.6838054187192117, "grad_norm": 0.06025001034140587, "learning_rate": 0.01, "loss": 2.0325, "step": 16407 }, { "epoch": 1.6841133004926108, "grad_norm": 0.05691540613770485, "learning_rate": 0.01, "loss": 2.07, "step": 16410 }, { "epoch": 1.6844211822660098, "grad_norm": 0.04643694683909416, "learning_rate": 0.01, "loss": 2.0478, "step": 16413 }, { "epoch": 1.6847290640394088, "grad_norm": 0.03540325164794922, "learning_rate": 0.01, "loss": 2.0739, "step": 16416 }, { "epoch": 1.6850369458128078, "grad_norm": 0.034472569823265076, "learning_rate": 0.01, "loss": 2.0441, "step": 16419 }, { "epoch": 1.6853448275862069, "grad_norm": 0.04316902533173561, "learning_rate": 0.01, "loss": 2.0422, "step": 16422 }, { "epoch": 1.6856527093596059, "grad_norm": 0.04943558946251869, "learning_rate": 0.01, "loss": 2.0377, "step": 16425 }, { "epoch": 1.685960591133005, "grad_norm": 0.11482315510511398, "learning_rate": 0.01, "loss": 2.0668, "step": 16428 }, { "epoch": 1.686268472906404, "grad_norm": 0.10594377666711807, "learning_rate": 0.01, "loss": 2.0513, "step": 16431 }, { "epoch": 1.686576354679803, "grad_norm": 0.09860610961914062, "learning_rate": 0.01, "loss": 2.0456, "step": 16434 }, { "epoch": 1.686884236453202, "grad_norm": 0.06849053502082825, "learning_rate": 0.01, "loss": 2.0645, "step": 16437 }, { "epoch": 1.687192118226601, "grad_norm": 0.05089464411139488, "learning_rate": 0.01, "loss": 2.0383, "step": 16440 }, { "epoch": 1.6875, "grad_norm": 0.04762034863233566, "learning_rate": 0.01, "loss": 2.0443, "step": 16443 }, { "epoch": 1.687807881773399, "grad_norm": 0.09014497697353363, "learning_rate": 0.01, "loss": 2.0736, "step": 16446 }, { "epoch": 1.688115763546798, "grad_norm": 0.06832917779684067, "learning_rate": 0.01, "loss": 2.0677, "step": 16449 }, { "epoch": 1.688423645320197, "grad_norm": 0.0529920794069767, "learning_rate": 0.01, "loss": 2.0423, "step": 16452 }, { "epoch": 1.688731527093596, "grad_norm": 0.03208652511239052, "learning_rate": 0.01, "loss": 2.0561, "step": 16455 }, { "epoch": 1.689039408866995, "grad_norm": 0.13702784478664398, "learning_rate": 0.01, "loss": 2.0393, "step": 16458 }, { "epoch": 1.6893472906403941, "grad_norm": 0.05972970649600029, "learning_rate": 0.01, "loss": 2.0795, "step": 16461 }, { "epoch": 1.6896551724137931, "grad_norm": 0.043536797165870667, "learning_rate": 0.01, "loss": 2.0622, "step": 16464 }, { "epoch": 1.6899630541871922, "grad_norm": 0.0556536540389061, "learning_rate": 0.01, "loss": 2.0523, "step": 16467 }, { "epoch": 1.6902709359605912, "grad_norm": 0.06583042442798615, "learning_rate": 0.01, "loss": 2.0568, "step": 16470 }, { "epoch": 1.6905788177339902, "grad_norm": 0.0535028837621212, "learning_rate": 0.01, "loss": 2.0427, "step": 16473 }, { "epoch": 1.6908866995073892, "grad_norm": 0.09974632412195206, "learning_rate": 0.01, "loss": 2.0589, "step": 16476 }, { "epoch": 1.6911945812807883, "grad_norm": 0.058350350707769394, "learning_rate": 0.01, "loss": 2.0392, "step": 16479 }, { "epoch": 1.6915024630541873, "grad_norm": 0.10049036890268326, "learning_rate": 0.01, "loss": 2.0643, "step": 16482 }, { "epoch": 1.6918103448275863, "grad_norm": 0.061119675636291504, "learning_rate": 0.01, "loss": 2.0455, "step": 16485 }, { "epoch": 1.6921182266009853, "grad_norm": 0.07189033925533295, "learning_rate": 0.01, "loss": 2.0629, "step": 16488 }, { "epoch": 1.6924261083743843, "grad_norm": 0.08962611109018326, "learning_rate": 0.01, "loss": 2.0586, "step": 16491 }, { "epoch": 1.6927339901477834, "grad_norm": 0.05600450560450554, "learning_rate": 0.01, "loss": 2.0434, "step": 16494 }, { "epoch": 1.6930418719211824, "grad_norm": 0.1281098574399948, "learning_rate": 0.01, "loss": 2.0241, "step": 16497 }, { "epoch": 1.6933497536945814, "grad_norm": 0.036696117371320724, "learning_rate": 0.01, "loss": 2.065, "step": 16500 }, { "epoch": 1.6936576354679804, "grad_norm": 0.12428770959377289, "learning_rate": 0.01, "loss": 2.0479, "step": 16503 }, { "epoch": 1.6939655172413794, "grad_norm": 0.07593953609466553, "learning_rate": 0.01, "loss": 2.0357, "step": 16506 }, { "epoch": 1.6942733990147785, "grad_norm": 0.0686376765370369, "learning_rate": 0.01, "loss": 2.05, "step": 16509 }, { "epoch": 1.6945812807881775, "grad_norm": 0.044805269688367844, "learning_rate": 0.01, "loss": 2.0514, "step": 16512 }, { "epoch": 1.6948891625615765, "grad_norm": 0.04698259010910988, "learning_rate": 0.01, "loss": 2.0505, "step": 16515 }, { "epoch": 1.6951970443349755, "grad_norm": 0.04546966403722763, "learning_rate": 0.01, "loss": 2.0265, "step": 16518 }, { "epoch": 1.6955049261083743, "grad_norm": 0.07239431142807007, "learning_rate": 0.01, "loss": 2.0403, "step": 16521 }, { "epoch": 1.6958128078817734, "grad_norm": 0.08790195733308792, "learning_rate": 0.01, "loss": 2.0721, "step": 16524 }, { "epoch": 1.6961206896551724, "grad_norm": 0.05445432290434837, "learning_rate": 0.01, "loss": 2.039, "step": 16527 }, { "epoch": 1.6964285714285714, "grad_norm": 0.048141270875930786, "learning_rate": 0.01, "loss": 2.0191, "step": 16530 }, { "epoch": 1.6967364532019704, "grad_norm": 0.05230564624071121, "learning_rate": 0.01, "loss": 2.0646, "step": 16533 }, { "epoch": 1.6970443349753694, "grad_norm": 0.1007009968161583, "learning_rate": 0.01, "loss": 2.0751, "step": 16536 }, { "epoch": 1.6973522167487685, "grad_norm": 0.03878286853432655, "learning_rate": 0.01, "loss": 2.0257, "step": 16539 }, { "epoch": 1.6976600985221675, "grad_norm": 0.08503543585538864, "learning_rate": 0.01, "loss": 2.0516, "step": 16542 }, { "epoch": 1.6979679802955665, "grad_norm": 0.06239473819732666, "learning_rate": 0.01, "loss": 2.057, "step": 16545 }, { "epoch": 1.6982758620689655, "grad_norm": 0.06893055140972137, "learning_rate": 0.01, "loss": 2.0435, "step": 16548 }, { "epoch": 1.6985837438423645, "grad_norm": 0.08434829860925674, "learning_rate": 0.01, "loss": 2.0319, "step": 16551 }, { "epoch": 1.6988916256157636, "grad_norm": 0.031773362308740616, "learning_rate": 0.01, "loss": 2.0585, "step": 16554 }, { "epoch": 1.6991995073891626, "grad_norm": 0.11598584800958633, "learning_rate": 0.01, "loss": 2.0423, "step": 16557 }, { "epoch": 1.6995073891625616, "grad_norm": 0.07008111476898193, "learning_rate": 0.01, "loss": 2.0787, "step": 16560 }, { "epoch": 1.6998152709359606, "grad_norm": 0.03940622881054878, "learning_rate": 0.01, "loss": 2.0525, "step": 16563 }, { "epoch": 1.7001231527093597, "grad_norm": 0.05206933617591858, "learning_rate": 0.01, "loss": 2.0671, "step": 16566 }, { "epoch": 1.7004310344827587, "grad_norm": 0.04568307474255562, "learning_rate": 0.01, "loss": 2.0413, "step": 16569 }, { "epoch": 1.7007389162561575, "grad_norm": 0.031628433614969254, "learning_rate": 0.01, "loss": 2.0323, "step": 16572 }, { "epoch": 1.7010467980295565, "grad_norm": 0.05636722221970558, "learning_rate": 0.01, "loss": 2.0403, "step": 16575 }, { "epoch": 1.7013546798029555, "grad_norm": 0.11134552955627441, "learning_rate": 0.01, "loss": 2.034, "step": 16578 }, { "epoch": 1.7016625615763545, "grad_norm": 0.06964823603630066, "learning_rate": 0.01, "loss": 2.0701, "step": 16581 }, { "epoch": 1.7019704433497536, "grad_norm": 0.041148003190755844, "learning_rate": 0.01, "loss": 2.0693, "step": 16584 }, { "epoch": 1.7022783251231526, "grad_norm": 0.03673578426241875, "learning_rate": 0.01, "loss": 2.0232, "step": 16587 }, { "epoch": 1.7025862068965516, "grad_norm": 0.03659043833613396, "learning_rate": 0.01, "loss": 2.0257, "step": 16590 }, { "epoch": 1.7028940886699506, "grad_norm": 0.03824566677212715, "learning_rate": 0.01, "loss": 2.0587, "step": 16593 }, { "epoch": 1.7032019704433496, "grad_norm": 0.07334180176258087, "learning_rate": 0.01, "loss": 2.0626, "step": 16596 }, { "epoch": 1.7035098522167487, "grad_norm": 0.055927857756614685, "learning_rate": 0.01, "loss": 2.0311, "step": 16599 }, { "epoch": 1.7038177339901477, "grad_norm": 0.07610691338777542, "learning_rate": 0.01, "loss": 2.0419, "step": 16602 }, { "epoch": 1.7041256157635467, "grad_norm": 0.06405298411846161, "learning_rate": 0.01, "loss": 2.0693, "step": 16605 }, { "epoch": 1.7044334975369457, "grad_norm": 0.06193486601114273, "learning_rate": 0.01, "loss": 2.0442, "step": 16608 }, { "epoch": 1.7047413793103448, "grad_norm": 0.12181366235017776, "learning_rate": 0.01, "loss": 2.0324, "step": 16611 }, { "epoch": 1.7050492610837438, "grad_norm": 0.049060508608818054, "learning_rate": 0.01, "loss": 2.044, "step": 16614 }, { "epoch": 1.7053571428571428, "grad_norm": 0.05021090805530548, "learning_rate": 0.01, "loss": 2.0501, "step": 16617 }, { "epoch": 1.7056650246305418, "grad_norm": 0.045171257108449936, "learning_rate": 0.01, "loss": 2.0418, "step": 16620 }, { "epoch": 1.7059729064039408, "grad_norm": 0.04944808408617973, "learning_rate": 0.01, "loss": 2.0576, "step": 16623 }, { "epoch": 1.7062807881773399, "grad_norm": 0.03556932508945465, "learning_rate": 0.01, "loss": 2.0405, "step": 16626 }, { "epoch": 1.7065886699507389, "grad_norm": 0.10005172342061996, "learning_rate": 0.01, "loss": 2.0422, "step": 16629 }, { "epoch": 1.706896551724138, "grad_norm": 0.04088572412729263, "learning_rate": 0.01, "loss": 2.0526, "step": 16632 }, { "epoch": 1.707204433497537, "grad_norm": 0.04937949404120445, "learning_rate": 0.01, "loss": 2.0656, "step": 16635 }, { "epoch": 1.707512315270936, "grad_norm": 0.07822302728891373, "learning_rate": 0.01, "loss": 2.0545, "step": 16638 }, { "epoch": 1.707820197044335, "grad_norm": 0.05767158418893814, "learning_rate": 0.01, "loss": 2.0515, "step": 16641 }, { "epoch": 1.708128078817734, "grad_norm": 0.08512212336063385, "learning_rate": 0.01, "loss": 2.0579, "step": 16644 }, { "epoch": 1.708435960591133, "grad_norm": 0.06758993119001389, "learning_rate": 0.01, "loss": 2.0307, "step": 16647 }, { "epoch": 1.708743842364532, "grad_norm": 0.08142005652189255, "learning_rate": 0.01, "loss": 2.0531, "step": 16650 }, { "epoch": 1.709051724137931, "grad_norm": 0.06357218325138092, "learning_rate": 0.01, "loss": 2.0475, "step": 16653 }, { "epoch": 1.70935960591133, "grad_norm": 0.10591546446084976, "learning_rate": 0.01, "loss": 2.0447, "step": 16656 }, { "epoch": 1.709667487684729, "grad_norm": 0.06571496278047562, "learning_rate": 0.01, "loss": 2.0176, "step": 16659 }, { "epoch": 1.7099753694581281, "grad_norm": 0.049450814723968506, "learning_rate": 0.01, "loss": 2.0522, "step": 16662 }, { "epoch": 1.7102832512315271, "grad_norm": 0.11850273609161377, "learning_rate": 0.01, "loss": 2.0575, "step": 16665 }, { "epoch": 1.7105911330049262, "grad_norm": 0.0952281728386879, "learning_rate": 0.01, "loss": 2.0442, "step": 16668 }, { "epoch": 1.7108990147783252, "grad_norm": 0.09431217610836029, "learning_rate": 0.01, "loss": 2.0403, "step": 16671 }, { "epoch": 1.7112068965517242, "grad_norm": 0.07080823183059692, "learning_rate": 0.01, "loss": 2.0111, "step": 16674 }, { "epoch": 1.7115147783251232, "grad_norm": 0.049033813178539276, "learning_rate": 0.01, "loss": 2.0489, "step": 16677 }, { "epoch": 1.7118226600985222, "grad_norm": 0.04356718435883522, "learning_rate": 0.01, "loss": 2.035, "step": 16680 }, { "epoch": 1.7121305418719213, "grad_norm": 0.03276592493057251, "learning_rate": 0.01, "loss": 2.0396, "step": 16683 }, { "epoch": 1.7124384236453203, "grad_norm": 0.04438839852809906, "learning_rate": 0.01, "loss": 2.0441, "step": 16686 }, { "epoch": 1.7127463054187193, "grad_norm": 0.07276454567909241, "learning_rate": 0.01, "loss": 2.0513, "step": 16689 }, { "epoch": 1.7130541871921183, "grad_norm": 0.11324001848697662, "learning_rate": 0.01, "loss": 2.0482, "step": 16692 }, { "epoch": 1.7133620689655173, "grad_norm": 0.14715081453323364, "learning_rate": 0.01, "loss": 2.048, "step": 16695 }, { "epoch": 1.7136699507389164, "grad_norm": 0.07661852240562439, "learning_rate": 0.01, "loss": 2.0396, "step": 16698 }, { "epoch": 1.7139778325123154, "grad_norm": 0.05308947339653969, "learning_rate": 0.01, "loss": 2.0601, "step": 16701 }, { "epoch": 1.7142857142857144, "grad_norm": 0.06816977262496948, "learning_rate": 0.01, "loss": 2.0456, "step": 16704 }, { "epoch": 1.7145935960591134, "grad_norm": 0.05123249441385269, "learning_rate": 0.01, "loss": 2.0532, "step": 16707 }, { "epoch": 1.7149014778325125, "grad_norm": 0.05118009075522423, "learning_rate": 0.01, "loss": 2.0659, "step": 16710 }, { "epoch": 1.7152093596059115, "grad_norm": 0.03276235982775688, "learning_rate": 0.01, "loss": 2.0718, "step": 16713 }, { "epoch": 1.7155172413793105, "grad_norm": 0.049824655055999756, "learning_rate": 0.01, "loss": 2.0388, "step": 16716 }, { "epoch": 1.7158251231527095, "grad_norm": 0.1416471302509308, "learning_rate": 0.01, "loss": 2.0526, "step": 16719 }, { "epoch": 1.7161330049261085, "grad_norm": 0.04109251871705055, "learning_rate": 0.01, "loss": 2.0329, "step": 16722 }, { "epoch": 1.7164408866995073, "grad_norm": 0.08853971213102341, "learning_rate": 0.01, "loss": 2.0506, "step": 16725 }, { "epoch": 1.7167487684729064, "grad_norm": 0.05180136114358902, "learning_rate": 0.01, "loss": 2.0608, "step": 16728 }, { "epoch": 1.7170566502463054, "grad_norm": 0.0667758584022522, "learning_rate": 0.01, "loss": 2.0347, "step": 16731 }, { "epoch": 1.7173645320197044, "grad_norm": 0.039203155785799026, "learning_rate": 0.01, "loss": 2.0331, "step": 16734 }, { "epoch": 1.7176724137931034, "grad_norm": 0.05210564285516739, "learning_rate": 0.01, "loss": 2.0666, "step": 16737 }, { "epoch": 1.7179802955665024, "grad_norm": 0.0668390691280365, "learning_rate": 0.01, "loss": 2.0365, "step": 16740 }, { "epoch": 1.7182881773399015, "grad_norm": 0.05041831359267235, "learning_rate": 0.01, "loss": 2.0261, "step": 16743 }, { "epoch": 1.7185960591133005, "grad_norm": 0.04496284946799278, "learning_rate": 0.01, "loss": 2.0182, "step": 16746 }, { "epoch": 1.7189039408866995, "grad_norm": 0.08660906553268433, "learning_rate": 0.01, "loss": 2.0434, "step": 16749 }, { "epoch": 1.7192118226600985, "grad_norm": 0.054843079298734665, "learning_rate": 0.01, "loss": 2.0522, "step": 16752 }, { "epoch": 1.7195197044334976, "grad_norm": 0.05377354100346565, "learning_rate": 0.01, "loss": 2.0263, "step": 16755 }, { "epoch": 1.7198275862068966, "grad_norm": 0.060547634959220886, "learning_rate": 0.01, "loss": 2.0165, "step": 16758 }, { "epoch": 1.7201354679802956, "grad_norm": 0.06253104656934738, "learning_rate": 0.01, "loss": 2.0479, "step": 16761 }, { "epoch": 1.7204433497536946, "grad_norm": 0.052225805819034576, "learning_rate": 0.01, "loss": 2.03, "step": 16764 }, { "epoch": 1.7207512315270936, "grad_norm": 0.04988449066877365, "learning_rate": 0.01, "loss": 2.0498, "step": 16767 }, { "epoch": 1.7210591133004927, "grad_norm": 0.049726665019989014, "learning_rate": 0.01, "loss": 2.0588, "step": 16770 }, { "epoch": 1.7213669950738915, "grad_norm": 0.05053321272134781, "learning_rate": 0.01, "loss": 2.0518, "step": 16773 }, { "epoch": 1.7216748768472905, "grad_norm": 0.060702208429574966, "learning_rate": 0.01, "loss": 2.0448, "step": 16776 }, { "epoch": 1.7219827586206895, "grad_norm": 0.06695824861526489, "learning_rate": 0.01, "loss": 2.0361, "step": 16779 }, { "epoch": 1.7222906403940885, "grad_norm": 0.07498425990343094, "learning_rate": 0.01, "loss": 2.0652, "step": 16782 }, { "epoch": 1.7225985221674875, "grad_norm": 0.04544251784682274, "learning_rate": 0.01, "loss": 2.0294, "step": 16785 }, { "epoch": 1.7229064039408866, "grad_norm": 0.09211461246013641, "learning_rate": 0.01, "loss": 2.0261, "step": 16788 }, { "epoch": 1.7232142857142856, "grad_norm": 0.19020068645477295, "learning_rate": 0.01, "loss": 2.0459, "step": 16791 }, { "epoch": 1.7235221674876846, "grad_norm": 0.08979224413633347, "learning_rate": 0.01, "loss": 2.052, "step": 16794 }, { "epoch": 1.7238300492610836, "grad_norm": 0.06724744290113449, "learning_rate": 0.01, "loss": 2.0463, "step": 16797 }, { "epoch": 1.7241379310344827, "grad_norm": 0.05009309947490692, "learning_rate": 0.01, "loss": 2.0338, "step": 16800 }, { "epoch": 1.7244458128078817, "grad_norm": 0.04953375831246376, "learning_rate": 0.01, "loss": 2.0554, "step": 16803 }, { "epoch": 1.7247536945812807, "grad_norm": 0.12439639121294022, "learning_rate": 0.01, "loss": 2.0363, "step": 16806 }, { "epoch": 1.7250615763546797, "grad_norm": 0.126046821475029, "learning_rate": 0.01, "loss": 2.0435, "step": 16809 }, { "epoch": 1.7253694581280787, "grad_norm": 0.06693071871995926, "learning_rate": 0.01, "loss": 2.058, "step": 16812 }, { "epoch": 1.7256773399014778, "grad_norm": 0.04883793368935585, "learning_rate": 0.01, "loss": 2.0768, "step": 16815 }, { "epoch": 1.7259852216748768, "grad_norm": 0.06895219534635544, "learning_rate": 0.01, "loss": 2.057, "step": 16818 }, { "epoch": 1.7262931034482758, "grad_norm": 0.06979874521493912, "learning_rate": 0.01, "loss": 2.0641, "step": 16821 }, { "epoch": 1.7266009852216748, "grad_norm": 0.06453370302915573, "learning_rate": 0.01, "loss": 2.025, "step": 16824 }, { "epoch": 1.7269088669950738, "grad_norm": 0.11405997723340988, "learning_rate": 0.01, "loss": 2.0469, "step": 16827 }, { "epoch": 1.7272167487684729, "grad_norm": 0.10612863302230835, "learning_rate": 0.01, "loss": 2.0825, "step": 16830 }, { "epoch": 1.7275246305418719, "grad_norm": 0.09805281460285187, "learning_rate": 0.01, "loss": 2.0256, "step": 16833 }, { "epoch": 1.727832512315271, "grad_norm": 0.08369138836860657, "learning_rate": 0.01, "loss": 2.0307, "step": 16836 }, { "epoch": 1.72814039408867, "grad_norm": 0.08893299847841263, "learning_rate": 0.01, "loss": 2.0362, "step": 16839 }, { "epoch": 1.728448275862069, "grad_norm": 0.06902442872524261, "learning_rate": 0.01, "loss": 2.0562, "step": 16842 }, { "epoch": 1.728756157635468, "grad_norm": 0.058703918009996414, "learning_rate": 0.01, "loss": 2.0406, "step": 16845 }, { "epoch": 1.729064039408867, "grad_norm": 0.03979343920946121, "learning_rate": 0.01, "loss": 2.0636, "step": 16848 }, { "epoch": 1.729371921182266, "grad_norm": 0.08049800246953964, "learning_rate": 0.01, "loss": 2.0483, "step": 16851 }, { "epoch": 1.729679802955665, "grad_norm": 0.07980278134346008, "learning_rate": 0.01, "loss": 2.0471, "step": 16854 }, { "epoch": 1.729987684729064, "grad_norm": 0.051650844514369965, "learning_rate": 0.01, "loss": 2.0322, "step": 16857 }, { "epoch": 1.730295566502463, "grad_norm": 0.06828897446393967, "learning_rate": 0.01, "loss": 2.0456, "step": 16860 }, { "epoch": 1.730603448275862, "grad_norm": 0.10987185686826706, "learning_rate": 0.01, "loss": 2.0334, "step": 16863 }, { "epoch": 1.7309113300492611, "grad_norm": 0.055677276104688644, "learning_rate": 0.01, "loss": 2.018, "step": 16866 }, { "epoch": 1.7312192118226601, "grad_norm": 0.056478437036275864, "learning_rate": 0.01, "loss": 2.0241, "step": 16869 }, { "epoch": 1.7315270935960592, "grad_norm": 0.1247091144323349, "learning_rate": 0.01, "loss": 2.0128, "step": 16872 }, { "epoch": 1.7318349753694582, "grad_norm": 0.06603918969631195, "learning_rate": 0.01, "loss": 2.0456, "step": 16875 }, { "epoch": 1.7321428571428572, "grad_norm": 0.04174170270562172, "learning_rate": 0.01, "loss": 2.0349, "step": 16878 }, { "epoch": 1.7324507389162562, "grad_norm": 0.03841250762343407, "learning_rate": 0.01, "loss": 2.0365, "step": 16881 }, { "epoch": 1.7327586206896552, "grad_norm": 0.03429241105914116, "learning_rate": 0.01, "loss": 2.038, "step": 16884 }, { "epoch": 1.7330665024630543, "grad_norm": 0.05175672471523285, "learning_rate": 0.01, "loss": 2.0583, "step": 16887 }, { "epoch": 1.7333743842364533, "grad_norm": 0.06318958103656769, "learning_rate": 0.01, "loss": 2.0398, "step": 16890 }, { "epoch": 1.7336822660098523, "grad_norm": 0.08888188004493713, "learning_rate": 0.01, "loss": 2.0447, "step": 16893 }, { "epoch": 1.7339901477832513, "grad_norm": 0.04479747265577316, "learning_rate": 0.01, "loss": 2.0671, "step": 16896 }, { "epoch": 1.7342980295566504, "grad_norm": 0.05286455899477005, "learning_rate": 0.01, "loss": 2.0342, "step": 16899 }, { "epoch": 1.7346059113300494, "grad_norm": 0.04719952121376991, "learning_rate": 0.01, "loss": 2.0251, "step": 16902 }, { "epoch": 1.7349137931034484, "grad_norm": 0.06204066798090935, "learning_rate": 0.01, "loss": 2.0412, "step": 16905 }, { "epoch": 1.7352216748768474, "grad_norm": 0.1099078357219696, "learning_rate": 0.01, "loss": 2.008, "step": 16908 }, { "epoch": 1.7355295566502464, "grad_norm": 0.08311746269464493, "learning_rate": 0.01, "loss": 2.0248, "step": 16911 }, { "epoch": 1.7358374384236455, "grad_norm": 0.09649046510457993, "learning_rate": 0.01, "loss": 2.0585, "step": 16914 }, { "epoch": 1.7361453201970445, "grad_norm": 0.06716254353523254, "learning_rate": 0.01, "loss": 2.0128, "step": 16917 }, { "epoch": 1.7364532019704435, "grad_norm": 0.05400918424129486, "learning_rate": 0.01, "loss": 2.035, "step": 16920 }, { "epoch": 1.7367610837438425, "grad_norm": 0.06857965141534805, "learning_rate": 0.01, "loss": 2.0394, "step": 16923 }, { "epoch": 1.7370689655172413, "grad_norm": 0.05842095986008644, "learning_rate": 0.01, "loss": 2.0418, "step": 16926 }, { "epoch": 1.7373768472906403, "grad_norm": 0.06279005855321884, "learning_rate": 0.01, "loss": 2.0528, "step": 16929 }, { "epoch": 1.7376847290640394, "grad_norm": 0.04209805652499199, "learning_rate": 0.01, "loss": 2.0374, "step": 16932 }, { "epoch": 1.7379926108374384, "grad_norm": 0.07557601481676102, "learning_rate": 0.01, "loss": 2.0351, "step": 16935 }, { "epoch": 1.7383004926108374, "grad_norm": 0.05998634174466133, "learning_rate": 0.01, "loss": 2.0283, "step": 16938 }, { "epoch": 1.7386083743842364, "grad_norm": 0.05477887764573097, "learning_rate": 0.01, "loss": 2.0659, "step": 16941 }, { "epoch": 1.7389162561576355, "grad_norm": 0.09489591419696808, "learning_rate": 0.01, "loss": 2.0313, "step": 16944 }, { "epoch": 1.7392241379310345, "grad_norm": 0.13861320912837982, "learning_rate": 0.01, "loss": 2.0778, "step": 16947 }, { "epoch": 1.7395320197044335, "grad_norm": 0.1302078664302826, "learning_rate": 0.01, "loss": 2.024, "step": 16950 }, { "epoch": 1.7398399014778325, "grad_norm": 0.08335380256175995, "learning_rate": 0.01, "loss": 2.0612, "step": 16953 }, { "epoch": 1.7401477832512315, "grad_norm": 0.06901963800191879, "learning_rate": 0.01, "loss": 2.0357, "step": 16956 }, { "epoch": 1.7404556650246306, "grad_norm": 0.06619597226381302, "learning_rate": 0.01, "loss": 2.0207, "step": 16959 }, { "epoch": 1.7407635467980296, "grad_norm": 0.045459166169166565, "learning_rate": 0.01, "loss": 2.071, "step": 16962 }, { "epoch": 1.7410714285714286, "grad_norm": 0.06302090734243393, "learning_rate": 0.01, "loss": 2.0311, "step": 16965 }, { "epoch": 1.7413793103448276, "grad_norm": 0.07184530049562454, "learning_rate": 0.01, "loss": 2.0309, "step": 16968 }, { "epoch": 1.7416871921182266, "grad_norm": 0.07319154590368271, "learning_rate": 0.01, "loss": 2.0316, "step": 16971 }, { "epoch": 1.7419950738916257, "grad_norm": 0.0667872503399849, "learning_rate": 0.01, "loss": 2.0406, "step": 16974 }, { "epoch": 1.7423029556650245, "grad_norm": 0.0988057404756546, "learning_rate": 0.01, "loss": 2.0466, "step": 16977 }, { "epoch": 1.7426108374384235, "grad_norm": 0.09142420440912247, "learning_rate": 0.01, "loss": 2.0503, "step": 16980 }, { "epoch": 1.7429187192118225, "grad_norm": 0.07581423968076706, "learning_rate": 0.01, "loss": 2.0004, "step": 16983 }, { "epoch": 1.7432266009852215, "grad_norm": 0.05793863534927368, "learning_rate": 0.01, "loss": 2.0278, "step": 16986 }, { "epoch": 1.7435344827586206, "grad_norm": 0.0717446580529213, "learning_rate": 0.01, "loss": 2.061, "step": 16989 }, { "epoch": 1.7438423645320196, "grad_norm": 0.03572774678468704, "learning_rate": 0.01, "loss": 2.0663, "step": 16992 }, { "epoch": 1.7441502463054186, "grad_norm": 0.09349701553583145, "learning_rate": 0.01, "loss": 2.0433, "step": 16995 }, { "epoch": 1.7444581280788176, "grad_norm": 0.08374779671430588, "learning_rate": 0.01, "loss": 2.0228, "step": 16998 }, { "epoch": 1.7447660098522166, "grad_norm": 0.045029062777757645, "learning_rate": 0.01, "loss": 2.0425, "step": 17001 }, { "epoch": 1.7450738916256157, "grad_norm": 0.06359710544347763, "learning_rate": 0.01, "loss": 2.073, "step": 17004 }, { "epoch": 1.7453817733990147, "grad_norm": 0.0948299989104271, "learning_rate": 0.01, "loss": 2.0326, "step": 17007 }, { "epoch": 1.7456896551724137, "grad_norm": 0.05561600998044014, "learning_rate": 0.01, "loss": 2.0702, "step": 17010 }, { "epoch": 1.7459975369458127, "grad_norm": 0.05672406032681465, "learning_rate": 0.01, "loss": 2.0591, "step": 17013 }, { "epoch": 1.7463054187192117, "grad_norm": 0.047100841999053955, "learning_rate": 0.01, "loss": 2.0597, "step": 17016 }, { "epoch": 1.7466133004926108, "grad_norm": 0.09477028995752335, "learning_rate": 0.01, "loss": 2.046, "step": 17019 }, { "epoch": 1.7469211822660098, "grad_norm": 0.07235399633646011, "learning_rate": 0.01, "loss": 2.0778, "step": 17022 }, { "epoch": 1.7472290640394088, "grad_norm": 0.08015649020671844, "learning_rate": 0.01, "loss": 2.0219, "step": 17025 }, { "epoch": 1.7475369458128078, "grad_norm": 0.07459922134876251, "learning_rate": 0.01, "loss": 2.0278, "step": 17028 }, { "epoch": 1.7478448275862069, "grad_norm": 0.10745642334222794, "learning_rate": 0.01, "loss": 2.0536, "step": 17031 }, { "epoch": 1.7481527093596059, "grad_norm": 0.049479395151138306, "learning_rate": 0.01, "loss": 2.0477, "step": 17034 }, { "epoch": 1.748460591133005, "grad_norm": 0.03935602307319641, "learning_rate": 0.01, "loss": 2.0495, "step": 17037 }, { "epoch": 1.748768472906404, "grad_norm": 0.05755804106593132, "learning_rate": 0.01, "loss": 2.0186, "step": 17040 }, { "epoch": 1.749076354679803, "grad_norm": 0.07461614906787872, "learning_rate": 0.01, "loss": 2.0577, "step": 17043 }, { "epoch": 1.749384236453202, "grad_norm": 0.07078621536493301, "learning_rate": 0.01, "loss": 2.0159, "step": 17046 }, { "epoch": 1.749692118226601, "grad_norm": 0.12035417556762695, "learning_rate": 0.01, "loss": 2.0549, "step": 17049 }, { "epoch": 1.75, "grad_norm": 0.054343827068805695, "learning_rate": 0.01, "loss": 2.0394, "step": 17052 }, { "epoch": 1.750307881773399, "grad_norm": 0.056529540568590164, "learning_rate": 0.01, "loss": 2.0604, "step": 17055 }, { "epoch": 1.750615763546798, "grad_norm": 0.09392616152763367, "learning_rate": 0.01, "loss": 2.0233, "step": 17058 }, { "epoch": 1.750923645320197, "grad_norm": 0.0874391570687294, "learning_rate": 0.01, "loss": 2.0733, "step": 17061 }, { "epoch": 1.751231527093596, "grad_norm": 0.03889552876353264, "learning_rate": 0.01, "loss": 2.0099, "step": 17064 }, { "epoch": 1.751539408866995, "grad_norm": 0.06299902498722076, "learning_rate": 0.01, "loss": 2.01, "step": 17067 }, { "epoch": 1.7518472906403941, "grad_norm": 0.05655315890908241, "learning_rate": 0.01, "loss": 2.0571, "step": 17070 }, { "epoch": 1.7521551724137931, "grad_norm": 0.04646646976470947, "learning_rate": 0.01, "loss": 2.0401, "step": 17073 }, { "epoch": 1.7524630541871922, "grad_norm": 0.04910219460725784, "learning_rate": 0.01, "loss": 2.0527, "step": 17076 }, { "epoch": 1.7527709359605912, "grad_norm": 0.03616022691130638, "learning_rate": 0.01, "loss": 2.0152, "step": 17079 }, { "epoch": 1.7530788177339902, "grad_norm": 0.05539899319410324, "learning_rate": 0.01, "loss": 2.029, "step": 17082 }, { "epoch": 1.7533866995073892, "grad_norm": 0.09409206360578537, "learning_rate": 0.01, "loss": 2.0621, "step": 17085 }, { "epoch": 1.7536945812807883, "grad_norm": 0.048882581293582916, "learning_rate": 0.01, "loss": 2.057, "step": 17088 }, { "epoch": 1.7540024630541873, "grad_norm": 0.09687826037406921, "learning_rate": 0.01, "loss": 2.0472, "step": 17091 }, { "epoch": 1.7543103448275863, "grad_norm": 0.06646592915058136, "learning_rate": 0.01, "loss": 2.0469, "step": 17094 }, { "epoch": 1.7546182266009853, "grad_norm": 0.07316572964191437, "learning_rate": 0.01, "loss": 2.0427, "step": 17097 }, { "epoch": 1.7549261083743843, "grad_norm": 0.058037593960762024, "learning_rate": 0.01, "loss": 2.0462, "step": 17100 }, { "epoch": 1.7552339901477834, "grad_norm": 0.05486461892724037, "learning_rate": 0.01, "loss": 2.0426, "step": 17103 }, { "epoch": 1.7555418719211824, "grad_norm": 0.0397610180079937, "learning_rate": 0.01, "loss": 2.0397, "step": 17106 }, { "epoch": 1.7558497536945814, "grad_norm": 0.11639061570167542, "learning_rate": 0.01, "loss": 2.0248, "step": 17109 }, { "epoch": 1.7561576354679804, "grad_norm": 0.04207361862063408, "learning_rate": 0.01, "loss": 2.064, "step": 17112 }, { "epoch": 1.7564655172413794, "grad_norm": 0.09336013346910477, "learning_rate": 0.01, "loss": 2.0399, "step": 17115 }, { "epoch": 1.7567733990147785, "grad_norm": 0.059693820774555206, "learning_rate": 0.01, "loss": 2.0347, "step": 17118 }, { "epoch": 1.7570812807881775, "grad_norm": 0.05269778147339821, "learning_rate": 0.01, "loss": 2.0222, "step": 17121 }, { "epoch": 1.7573891625615765, "grad_norm": 0.0548628568649292, "learning_rate": 0.01, "loss": 2.0269, "step": 17124 }, { "epoch": 1.7576970443349755, "grad_norm": 0.049783483147621155, "learning_rate": 0.01, "loss": 2.0469, "step": 17127 }, { "epoch": 1.7580049261083743, "grad_norm": 0.11240525543689728, "learning_rate": 0.01, "loss": 2.0405, "step": 17130 }, { "epoch": 1.7583128078817734, "grad_norm": 0.04133368283510208, "learning_rate": 0.01, "loss": 2.064, "step": 17133 }, { "epoch": 1.7586206896551724, "grad_norm": 0.042926132678985596, "learning_rate": 0.01, "loss": 2.0685, "step": 17136 }, { "epoch": 1.7589285714285714, "grad_norm": 0.053613241761922836, "learning_rate": 0.01, "loss": 2.0241, "step": 17139 }, { "epoch": 1.7592364532019704, "grad_norm": 0.03950737044215202, "learning_rate": 0.01, "loss": 2.0662, "step": 17142 }, { "epoch": 1.7595443349753694, "grad_norm": 0.045378413051366806, "learning_rate": 0.01, "loss": 2.0401, "step": 17145 }, { "epoch": 1.7598522167487685, "grad_norm": 0.036304816603660583, "learning_rate": 0.01, "loss": 2.0789, "step": 17148 }, { "epoch": 1.7601600985221675, "grad_norm": 0.03886290267109871, "learning_rate": 0.01, "loss": 2.0516, "step": 17151 }, { "epoch": 1.7604679802955665, "grad_norm": 0.0885484591126442, "learning_rate": 0.01, "loss": 2.0333, "step": 17154 }, { "epoch": 1.7607758620689655, "grad_norm": 0.06733599305152893, "learning_rate": 0.01, "loss": 2.039, "step": 17157 }, { "epoch": 1.7610837438423645, "grad_norm": 0.10319662094116211, "learning_rate": 0.01, "loss": 2.0629, "step": 17160 }, { "epoch": 1.7613916256157636, "grad_norm": 0.047492869198322296, "learning_rate": 0.01, "loss": 2.0726, "step": 17163 }, { "epoch": 1.7616995073891626, "grad_norm": 0.04345547780394554, "learning_rate": 0.01, "loss": 2.0259, "step": 17166 }, { "epoch": 1.7620073891625616, "grad_norm": 0.0452197827398777, "learning_rate": 0.01, "loss": 2.0393, "step": 17169 }, { "epoch": 1.7623152709359606, "grad_norm": 0.0703844428062439, "learning_rate": 0.01, "loss": 2.0458, "step": 17172 }, { "epoch": 1.7626231527093597, "grad_norm": 0.07864879071712494, "learning_rate": 0.01, "loss": 2.047, "step": 17175 }, { "epoch": 1.7629310344827587, "grad_norm": 0.1282995641231537, "learning_rate": 0.01, "loss": 2.0226, "step": 17178 }, { "epoch": 1.7632389162561575, "grad_norm": 0.0837298184633255, "learning_rate": 0.01, "loss": 2.0483, "step": 17181 }, { "epoch": 1.7635467980295565, "grad_norm": 0.05081562697887421, "learning_rate": 0.01, "loss": 2.0656, "step": 17184 }, { "epoch": 1.7638546798029555, "grad_norm": 0.07952243834733963, "learning_rate": 0.01, "loss": 2.0561, "step": 17187 }, { "epoch": 1.7641625615763545, "grad_norm": 0.06592147797346115, "learning_rate": 0.01, "loss": 2.0311, "step": 17190 }, { "epoch": 1.7644704433497536, "grad_norm": 0.04341195523738861, "learning_rate": 0.01, "loss": 2.0413, "step": 17193 }, { "epoch": 1.7647783251231526, "grad_norm": 0.04649266228079796, "learning_rate": 0.01, "loss": 2.0338, "step": 17196 }, { "epoch": 1.7650862068965516, "grad_norm": 0.04569242149591446, "learning_rate": 0.01, "loss": 2.0428, "step": 17199 }, { "epoch": 1.7653940886699506, "grad_norm": 0.040291350334882736, "learning_rate": 0.01, "loss": 2.0165, "step": 17202 }, { "epoch": 1.7657019704433496, "grad_norm": 0.05328141525387764, "learning_rate": 0.01, "loss": 2.0384, "step": 17205 }, { "epoch": 1.7660098522167487, "grad_norm": 0.04405885562300682, "learning_rate": 0.01, "loss": 2.0566, "step": 17208 }, { "epoch": 1.7663177339901477, "grad_norm": 0.06635614484548569, "learning_rate": 0.01, "loss": 2.0397, "step": 17211 }, { "epoch": 1.7666256157635467, "grad_norm": 0.09231774508953094, "learning_rate": 0.01, "loss": 2.0247, "step": 17214 }, { "epoch": 1.7669334975369457, "grad_norm": 0.056320998817682266, "learning_rate": 0.01, "loss": 2.066, "step": 17217 }, { "epoch": 1.7672413793103448, "grad_norm": 0.049784105271101, "learning_rate": 0.01, "loss": 2.0175, "step": 17220 }, { "epoch": 1.7675492610837438, "grad_norm": 0.03728071227669716, "learning_rate": 0.01, "loss": 2.0403, "step": 17223 }, { "epoch": 1.7678571428571428, "grad_norm": 0.06607525050640106, "learning_rate": 0.01, "loss": 2.0364, "step": 17226 }, { "epoch": 1.7681650246305418, "grad_norm": 0.07367686927318573, "learning_rate": 0.01, "loss": 2.0478, "step": 17229 }, { "epoch": 1.7684729064039408, "grad_norm": 0.039499782025814056, "learning_rate": 0.01, "loss": 2.0508, "step": 17232 }, { "epoch": 1.7687807881773399, "grad_norm": 0.04863186180591583, "learning_rate": 0.01, "loss": 2.0595, "step": 17235 }, { "epoch": 1.7690886699507389, "grad_norm": 0.03877348452806473, "learning_rate": 0.01, "loss": 2.0608, "step": 17238 }, { "epoch": 1.769396551724138, "grad_norm": 0.049965135753154755, "learning_rate": 0.01, "loss": 2.0521, "step": 17241 }, { "epoch": 1.769704433497537, "grad_norm": 0.0697547048330307, "learning_rate": 0.01, "loss": 2.0191, "step": 17244 }, { "epoch": 1.770012315270936, "grad_norm": 0.0562531016767025, "learning_rate": 0.01, "loss": 2.0581, "step": 17247 }, { "epoch": 1.770320197044335, "grad_norm": 0.12931805849075317, "learning_rate": 0.01, "loss": 2.072, "step": 17250 }, { "epoch": 1.770628078817734, "grad_norm": 0.06590058654546738, "learning_rate": 0.01, "loss": 2.0487, "step": 17253 }, { "epoch": 1.770935960591133, "grad_norm": 0.045246463268995285, "learning_rate": 0.01, "loss": 2.0424, "step": 17256 }, { "epoch": 1.771243842364532, "grad_norm": 0.03972258046269417, "learning_rate": 0.01, "loss": 2.043, "step": 17259 }, { "epoch": 1.771551724137931, "grad_norm": 0.030682874843478203, "learning_rate": 0.01, "loss": 2.0665, "step": 17262 }, { "epoch": 1.77185960591133, "grad_norm": 0.08989464491605759, "learning_rate": 0.01, "loss": 2.042, "step": 17265 }, { "epoch": 1.772167487684729, "grad_norm": 0.05595966801047325, "learning_rate": 0.01, "loss": 2.0399, "step": 17268 }, { "epoch": 1.7724753694581281, "grad_norm": 0.16923703253269196, "learning_rate": 0.01, "loss": 2.0161, "step": 17271 }, { "epoch": 1.7727832512315271, "grad_norm": 0.08722022920846939, "learning_rate": 0.01, "loss": 2.0379, "step": 17274 }, { "epoch": 1.7730911330049262, "grad_norm": 0.0741046667098999, "learning_rate": 0.01, "loss": 2.0512, "step": 17277 }, { "epoch": 1.7733990147783252, "grad_norm": 0.06061973422765732, "learning_rate": 0.01, "loss": 2.0318, "step": 17280 }, { "epoch": 1.7737068965517242, "grad_norm": 0.036843594163656235, "learning_rate": 0.01, "loss": 2.056, "step": 17283 }, { "epoch": 1.7740147783251232, "grad_norm": 0.03937767818570137, "learning_rate": 0.01, "loss": 2.019, "step": 17286 }, { "epoch": 1.7743226600985222, "grad_norm": 0.03801162540912628, "learning_rate": 0.01, "loss": 2.04, "step": 17289 }, { "epoch": 1.7746305418719213, "grad_norm": 0.045572392642498016, "learning_rate": 0.01, "loss": 2.0665, "step": 17292 }, { "epoch": 1.7749384236453203, "grad_norm": 0.06430240720510483, "learning_rate": 0.01, "loss": 2.0357, "step": 17295 }, { "epoch": 1.7752463054187193, "grad_norm": 0.09266401827335358, "learning_rate": 0.01, "loss": 2.0474, "step": 17298 }, { "epoch": 1.7755541871921183, "grad_norm": 0.09686179459095001, "learning_rate": 0.01, "loss": 2.0224, "step": 17301 }, { "epoch": 1.7758620689655173, "grad_norm": 0.04640132188796997, "learning_rate": 0.01, "loss": 2.0611, "step": 17304 }, { "epoch": 1.7761699507389164, "grad_norm": 0.03891894221305847, "learning_rate": 0.01, "loss": 2.0529, "step": 17307 }, { "epoch": 1.7764778325123154, "grad_norm": 0.06023077294230461, "learning_rate": 0.01, "loss": 2.0282, "step": 17310 }, { "epoch": 1.7767857142857144, "grad_norm": 0.12215135246515274, "learning_rate": 0.01, "loss": 2.0472, "step": 17313 }, { "epoch": 1.7770935960591134, "grad_norm": 0.04197768121957779, "learning_rate": 0.01, "loss": 2.038, "step": 17316 }, { "epoch": 1.7774014778325125, "grad_norm": 0.0429445244371891, "learning_rate": 0.01, "loss": 2.0291, "step": 17319 }, { "epoch": 1.7777093596059115, "grad_norm": 0.04674970358610153, "learning_rate": 0.01, "loss": 2.0493, "step": 17322 }, { "epoch": 1.7780172413793105, "grad_norm": 0.11712675541639328, "learning_rate": 0.01, "loss": 2.0421, "step": 17325 }, { "epoch": 1.7783251231527095, "grad_norm": 0.04812907800078392, "learning_rate": 0.01, "loss": 2.0395, "step": 17328 }, { "epoch": 1.7786330049261085, "grad_norm": 0.04147825017571449, "learning_rate": 0.01, "loss": 2.0057, "step": 17331 }, { "epoch": 1.7789408866995073, "grad_norm": 0.07262876629829407, "learning_rate": 0.01, "loss": 2.0383, "step": 17334 }, { "epoch": 1.7792487684729064, "grad_norm": 0.08528011292219162, "learning_rate": 0.01, "loss": 2.0151, "step": 17337 }, { "epoch": 1.7795566502463054, "grad_norm": 0.046615902334451675, "learning_rate": 0.01, "loss": 2.0368, "step": 17340 }, { "epoch": 1.7798645320197044, "grad_norm": 0.06018273904919624, "learning_rate": 0.01, "loss": 2.0411, "step": 17343 }, { "epoch": 1.7801724137931034, "grad_norm": 0.07272887974977493, "learning_rate": 0.01, "loss": 2.026, "step": 17346 }, { "epoch": 1.7804802955665024, "grad_norm": 0.07152794301509857, "learning_rate": 0.01, "loss": 2.0631, "step": 17349 }, { "epoch": 1.7807881773399015, "grad_norm": 0.07950329035520554, "learning_rate": 0.01, "loss": 2.0435, "step": 17352 }, { "epoch": 1.7810960591133005, "grad_norm": 0.040778059512376785, "learning_rate": 0.01, "loss": 2.0089, "step": 17355 }, { "epoch": 1.7814039408866995, "grad_norm": 0.06180460751056671, "learning_rate": 0.01, "loss": 2.0183, "step": 17358 }, { "epoch": 1.7817118226600985, "grad_norm": 0.06950334459543228, "learning_rate": 0.01, "loss": 2.0352, "step": 17361 }, { "epoch": 1.7820197044334976, "grad_norm": 0.037724483758211136, "learning_rate": 0.01, "loss": 2.0324, "step": 17364 }, { "epoch": 1.7823275862068966, "grad_norm": 0.05991238355636597, "learning_rate": 0.01, "loss": 2.053, "step": 17367 }, { "epoch": 1.7826354679802956, "grad_norm": 0.047278665006160736, "learning_rate": 0.01, "loss": 2.0427, "step": 17370 }, { "epoch": 1.7829433497536946, "grad_norm": 0.05376293137669563, "learning_rate": 0.01, "loss": 2.0315, "step": 17373 }, { "epoch": 1.7832512315270936, "grad_norm": 0.04049403965473175, "learning_rate": 0.01, "loss": 2.0483, "step": 17376 }, { "epoch": 1.7835591133004927, "grad_norm": 0.04954640567302704, "learning_rate": 0.01, "loss": 2.0393, "step": 17379 }, { "epoch": 1.7838669950738915, "grad_norm": 0.049089133739471436, "learning_rate": 0.01, "loss": 2.0633, "step": 17382 }, { "epoch": 1.7841748768472905, "grad_norm": 0.0531185045838356, "learning_rate": 0.01, "loss": 2.0474, "step": 17385 }, { "epoch": 1.7844827586206895, "grad_norm": 0.060973040759563446, "learning_rate": 0.01, "loss": 2.0219, "step": 17388 }, { "epoch": 1.7847906403940885, "grad_norm": 0.044274650514125824, "learning_rate": 0.01, "loss": 2.0403, "step": 17391 }, { "epoch": 1.7850985221674875, "grad_norm": 0.08154580742120743, "learning_rate": 0.01, "loss": 2.011, "step": 17394 }, { "epoch": 1.7854064039408866, "grad_norm": 0.05253531411290169, "learning_rate": 0.01, "loss": 2.0352, "step": 17397 }, { "epoch": 1.7857142857142856, "grad_norm": 0.056620582938194275, "learning_rate": 0.01, "loss": 2.04, "step": 17400 }, { "epoch": 1.7860221674876846, "grad_norm": 0.069371297955513, "learning_rate": 0.01, "loss": 2.0456, "step": 17403 }, { "epoch": 1.7863300492610836, "grad_norm": 0.04726189747452736, "learning_rate": 0.01, "loss": 2.018, "step": 17406 }, { "epoch": 1.7866379310344827, "grad_norm": 0.11150949448347092, "learning_rate": 0.01, "loss": 2.0503, "step": 17409 }, { "epoch": 1.7869458128078817, "grad_norm": 0.07482532411813736, "learning_rate": 0.01, "loss": 2.0361, "step": 17412 }, { "epoch": 1.7872536945812807, "grad_norm": 0.03803645819425583, "learning_rate": 0.01, "loss": 2.0555, "step": 17415 }, { "epoch": 1.7875615763546797, "grad_norm": 0.08635829389095306, "learning_rate": 0.01, "loss": 2.0551, "step": 17418 }, { "epoch": 1.7878694581280787, "grad_norm": 0.08558929711580276, "learning_rate": 0.01, "loss": 2.0611, "step": 17421 }, { "epoch": 1.7881773399014778, "grad_norm": 0.051051054149866104, "learning_rate": 0.01, "loss": 2.0375, "step": 17424 }, { "epoch": 1.7884852216748768, "grad_norm": 0.0584864616394043, "learning_rate": 0.01, "loss": 2.0131, "step": 17427 }, { "epoch": 1.7887931034482758, "grad_norm": 0.04015490040183067, "learning_rate": 0.01, "loss": 2.0559, "step": 17430 }, { "epoch": 1.7891009852216748, "grad_norm": 0.0499749630689621, "learning_rate": 0.01, "loss": 2.0611, "step": 17433 }, { "epoch": 1.7894088669950738, "grad_norm": 0.08796360343694687, "learning_rate": 0.01, "loss": 2.0538, "step": 17436 }, { "epoch": 1.7897167487684729, "grad_norm": 0.08200754970312119, "learning_rate": 0.01, "loss": 2.0459, "step": 17439 }, { "epoch": 1.7900246305418719, "grad_norm": 0.09300393611192703, "learning_rate": 0.01, "loss": 2.051, "step": 17442 }, { "epoch": 1.790332512315271, "grad_norm": 0.08223576098680496, "learning_rate": 0.01, "loss": 2.0194, "step": 17445 }, { "epoch": 1.79064039408867, "grad_norm": 0.05235210806131363, "learning_rate": 0.01, "loss": 2.0421, "step": 17448 }, { "epoch": 1.790948275862069, "grad_norm": 0.047677502036094666, "learning_rate": 0.01, "loss": 2.043, "step": 17451 }, { "epoch": 1.791256157635468, "grad_norm": 0.044341955333948135, "learning_rate": 0.01, "loss": 2.0381, "step": 17454 }, { "epoch": 1.791564039408867, "grad_norm": 0.09555595368146896, "learning_rate": 0.01, "loss": 2.0224, "step": 17457 }, { "epoch": 1.791871921182266, "grad_norm": 0.05652477219700813, "learning_rate": 0.01, "loss": 2.0318, "step": 17460 }, { "epoch": 1.792179802955665, "grad_norm": 0.0979117676615715, "learning_rate": 0.01, "loss": 2.0747, "step": 17463 }, { "epoch": 1.792487684729064, "grad_norm": 0.0674947127699852, "learning_rate": 0.01, "loss": 2.0723, "step": 17466 }, { "epoch": 1.792795566502463, "grad_norm": 0.05617907643318176, "learning_rate": 0.01, "loss": 2.0444, "step": 17469 }, { "epoch": 1.793103448275862, "grad_norm": 0.10979234427213669, "learning_rate": 0.01, "loss": 2.0638, "step": 17472 }, { "epoch": 1.7934113300492611, "grad_norm": 0.056006476283073425, "learning_rate": 0.01, "loss": 2.0396, "step": 17475 }, { "epoch": 1.7937192118226601, "grad_norm": 0.10030517727136612, "learning_rate": 0.01, "loss": 2.0379, "step": 17478 }, { "epoch": 1.7940270935960592, "grad_norm": 0.042350657284259796, "learning_rate": 0.01, "loss": 2.0319, "step": 17481 }, { "epoch": 1.7943349753694582, "grad_norm": 0.03725098446011543, "learning_rate": 0.01, "loss": 2.0537, "step": 17484 }, { "epoch": 1.7946428571428572, "grad_norm": 0.09215757250785828, "learning_rate": 0.01, "loss": 2.0247, "step": 17487 }, { "epoch": 1.7949507389162562, "grad_norm": 0.08012344688177109, "learning_rate": 0.01, "loss": 2.0428, "step": 17490 }, { "epoch": 1.7952586206896552, "grad_norm": 0.128404900431633, "learning_rate": 0.01, "loss": 2.038, "step": 17493 }, { "epoch": 1.7955665024630543, "grad_norm": 0.08718766272068024, "learning_rate": 0.01, "loss": 2.0557, "step": 17496 }, { "epoch": 1.7958743842364533, "grad_norm": 0.030426733195781708, "learning_rate": 0.01, "loss": 2.0192, "step": 17499 }, { "epoch": 1.7961822660098523, "grad_norm": 0.03950949385762215, "learning_rate": 0.01, "loss": 2.0228, "step": 17502 }, { "epoch": 1.7964901477832513, "grad_norm": 0.049466658383607864, "learning_rate": 0.01, "loss": 2.0514, "step": 17505 }, { "epoch": 1.7967980295566504, "grad_norm": 0.06188172101974487, "learning_rate": 0.01, "loss": 2.0512, "step": 17508 }, { "epoch": 1.7971059113300494, "grad_norm": 0.06420351564884186, "learning_rate": 0.01, "loss": 2.0365, "step": 17511 }, { "epoch": 1.7974137931034484, "grad_norm": 0.04329871013760567, "learning_rate": 0.01, "loss": 2.0511, "step": 17514 }, { "epoch": 1.7977216748768474, "grad_norm": 0.04420280084013939, "learning_rate": 0.01, "loss": 2.0481, "step": 17517 }, { "epoch": 1.7980295566502464, "grad_norm": 0.04043954238295555, "learning_rate": 0.01, "loss": 2.0184, "step": 17520 }, { "epoch": 1.7983374384236455, "grad_norm": 0.049305226653814316, "learning_rate": 0.01, "loss": 2.0353, "step": 17523 }, { "epoch": 1.7986453201970445, "grad_norm": 0.1928088515996933, "learning_rate": 0.01, "loss": 2.0869, "step": 17526 }, { "epoch": 1.7989532019704435, "grad_norm": 0.12283357232809067, "learning_rate": 0.01, "loss": 2.0378, "step": 17529 }, { "epoch": 1.7992610837438425, "grad_norm": 0.07897382229566574, "learning_rate": 0.01, "loss": 2.045, "step": 17532 }, { "epoch": 1.7995689655172413, "grad_norm": 0.0749836266040802, "learning_rate": 0.01, "loss": 2.0388, "step": 17535 }, { "epoch": 1.7998768472906403, "grad_norm": 0.06578727811574936, "learning_rate": 0.01, "loss": 2.0575, "step": 17538 }, { "epoch": 1.8001847290640394, "grad_norm": 0.06609571725130081, "learning_rate": 0.01, "loss": 2.0448, "step": 17541 }, { "epoch": 1.8004926108374384, "grad_norm": 0.047696053981781006, "learning_rate": 0.01, "loss": 2.0574, "step": 17544 }, { "epoch": 1.8008004926108374, "grad_norm": 0.05110754072666168, "learning_rate": 0.01, "loss": 2.0191, "step": 17547 }, { "epoch": 1.8011083743842364, "grad_norm": 0.03783520683646202, "learning_rate": 0.01, "loss": 2.0328, "step": 17550 }, { "epoch": 1.8014162561576355, "grad_norm": 0.03145405650138855, "learning_rate": 0.01, "loss": 2.0373, "step": 17553 }, { "epoch": 1.8017241379310345, "grad_norm": 0.09492892026901245, "learning_rate": 0.01, "loss": 2.0173, "step": 17556 }, { "epoch": 1.8020320197044335, "grad_norm": 0.06920488178730011, "learning_rate": 0.01, "loss": 2.0809, "step": 17559 }, { "epoch": 1.8023399014778325, "grad_norm": 0.0583655945956707, "learning_rate": 0.01, "loss": 2.0259, "step": 17562 }, { "epoch": 1.8026477832512315, "grad_norm": 0.08449242264032364, "learning_rate": 0.01, "loss": 2.0205, "step": 17565 }, { "epoch": 1.8029556650246306, "grad_norm": 0.12186135351657867, "learning_rate": 0.01, "loss": 2.0076, "step": 17568 }, { "epoch": 1.8032635467980296, "grad_norm": 0.09926268458366394, "learning_rate": 0.01, "loss": 2.0444, "step": 17571 }, { "epoch": 1.8035714285714286, "grad_norm": 0.06820474565029144, "learning_rate": 0.01, "loss": 2.0211, "step": 17574 }, { "epoch": 1.8038793103448276, "grad_norm": 0.050847604870796204, "learning_rate": 0.01, "loss": 2.0377, "step": 17577 }, { "epoch": 1.8041871921182266, "grad_norm": 0.053053803741931915, "learning_rate": 0.01, "loss": 2.0462, "step": 17580 }, { "epoch": 1.8044950738916257, "grad_norm": 0.047114890068769455, "learning_rate": 0.01, "loss": 2.0171, "step": 17583 }, { "epoch": 1.8048029556650245, "grad_norm": 0.05182573199272156, "learning_rate": 0.01, "loss": 2.0396, "step": 17586 }, { "epoch": 1.8051108374384235, "grad_norm": 0.12609605491161346, "learning_rate": 0.01, "loss": 2.053, "step": 17589 }, { "epoch": 1.8054187192118225, "grad_norm": 0.0496569462120533, "learning_rate": 0.01, "loss": 2.0418, "step": 17592 }, { "epoch": 1.8057266009852215, "grad_norm": 0.0490572527050972, "learning_rate": 0.01, "loss": 2.0199, "step": 17595 }, { "epoch": 1.8060344827586206, "grad_norm": 0.038300756365060806, "learning_rate": 0.01, "loss": 2.0337, "step": 17598 }, { "epoch": 1.8063423645320196, "grad_norm": 0.03666609153151512, "learning_rate": 0.01, "loss": 2.0392, "step": 17601 }, { "epoch": 1.8066502463054186, "grad_norm": 0.036330632865428925, "learning_rate": 0.01, "loss": 2.0319, "step": 17604 }, { "epoch": 1.8069581280788176, "grad_norm": 0.0605342797935009, "learning_rate": 0.01, "loss": 2.0356, "step": 17607 }, { "epoch": 1.8072660098522166, "grad_norm": 0.04346880316734314, "learning_rate": 0.01, "loss": 2.0188, "step": 17610 }, { "epoch": 1.8075738916256157, "grad_norm": 0.06400660425424576, "learning_rate": 0.01, "loss": 2.0342, "step": 17613 }, { "epoch": 1.8078817733990147, "grad_norm": 0.0812198668718338, "learning_rate": 0.01, "loss": 2.0622, "step": 17616 }, { "epoch": 1.8081896551724137, "grad_norm": 0.06756972521543503, "learning_rate": 0.01, "loss": 2.0396, "step": 17619 }, { "epoch": 1.8084975369458127, "grad_norm": 0.05277147516608238, "learning_rate": 0.01, "loss": 2.0149, "step": 17622 }, { "epoch": 1.8088054187192117, "grad_norm": 0.07904385775327682, "learning_rate": 0.01, "loss": 2.0393, "step": 17625 }, { "epoch": 1.8091133004926108, "grad_norm": 0.06955704092979431, "learning_rate": 0.01, "loss": 2.0433, "step": 17628 }, { "epoch": 1.8094211822660098, "grad_norm": 0.06605497002601624, "learning_rate": 0.01, "loss": 2.0439, "step": 17631 }, { "epoch": 1.8097290640394088, "grad_norm": 0.03861093521118164, "learning_rate": 0.01, "loss": 2.03, "step": 17634 }, { "epoch": 1.8100369458128078, "grad_norm": 0.04323074221611023, "learning_rate": 0.01, "loss": 2.0444, "step": 17637 }, { "epoch": 1.8103448275862069, "grad_norm": 0.03443233296275139, "learning_rate": 0.01, "loss": 2.0466, "step": 17640 }, { "epoch": 1.8106527093596059, "grad_norm": 0.04190131649374962, "learning_rate": 0.01, "loss": 2.0307, "step": 17643 }, { "epoch": 1.810960591133005, "grad_norm": 0.09095717966556549, "learning_rate": 0.01, "loss": 2.0529, "step": 17646 }, { "epoch": 1.811268472906404, "grad_norm": 0.05452005937695503, "learning_rate": 0.01, "loss": 2.0337, "step": 17649 }, { "epoch": 1.811576354679803, "grad_norm": 0.05032350867986679, "learning_rate": 0.01, "loss": 2.0398, "step": 17652 }, { "epoch": 1.811884236453202, "grad_norm": 0.05733015760779381, "learning_rate": 0.01, "loss": 2.0573, "step": 17655 }, { "epoch": 1.812192118226601, "grad_norm": 0.09373817592859268, "learning_rate": 0.01, "loss": 2.0278, "step": 17658 }, { "epoch": 1.8125, "grad_norm": 0.07385890185832977, "learning_rate": 0.01, "loss": 2.032, "step": 17661 }, { "epoch": 1.812807881773399, "grad_norm": 0.08643963187932968, "learning_rate": 0.01, "loss": 2.0351, "step": 17664 }, { "epoch": 1.813115763546798, "grad_norm": 0.09909530729055405, "learning_rate": 0.01, "loss": 2.0562, "step": 17667 }, { "epoch": 1.813423645320197, "grad_norm": 0.04600978642702103, "learning_rate": 0.01, "loss": 2.0219, "step": 17670 }, { "epoch": 1.813731527093596, "grad_norm": 0.033060222864151, "learning_rate": 0.01, "loss": 2.0479, "step": 17673 }, { "epoch": 1.814039408866995, "grad_norm": 0.03789517655968666, "learning_rate": 0.01, "loss": 2.0242, "step": 17676 }, { "epoch": 1.8143472906403941, "grad_norm": 0.0502844899892807, "learning_rate": 0.01, "loss": 2.0519, "step": 17679 }, { "epoch": 1.8146551724137931, "grad_norm": 0.0627695843577385, "learning_rate": 0.01, "loss": 2.0327, "step": 17682 }, { "epoch": 1.8149630541871922, "grad_norm": 0.15737055242061615, "learning_rate": 0.01, "loss": 2.0572, "step": 17685 }, { "epoch": 1.8152709359605912, "grad_norm": 0.09944868832826614, "learning_rate": 0.01, "loss": 2.0462, "step": 17688 }, { "epoch": 1.8155788177339902, "grad_norm": 0.12345952540636063, "learning_rate": 0.01, "loss": 2.0447, "step": 17691 }, { "epoch": 1.8158866995073892, "grad_norm": 0.06330909579992294, "learning_rate": 0.01, "loss": 2.0511, "step": 17694 }, { "epoch": 1.8161945812807883, "grad_norm": 0.0584748238325119, "learning_rate": 0.01, "loss": 2.0164, "step": 17697 }, { "epoch": 1.8165024630541873, "grad_norm": 0.07284627109766006, "learning_rate": 0.01, "loss": 2.0308, "step": 17700 }, { "epoch": 1.8168103448275863, "grad_norm": 0.07302995771169662, "learning_rate": 0.01, "loss": 2.0347, "step": 17703 }, { "epoch": 1.8171182266009853, "grad_norm": 0.06292667984962463, "learning_rate": 0.01, "loss": 2.026, "step": 17706 }, { "epoch": 1.8174261083743843, "grad_norm": 0.04821958392858505, "learning_rate": 0.01, "loss": 2.033, "step": 17709 }, { "epoch": 1.8177339901477834, "grad_norm": 0.03572079911828041, "learning_rate": 0.01, "loss": 2.047, "step": 17712 }, { "epoch": 1.8180418719211824, "grad_norm": 0.12643416225910187, "learning_rate": 0.01, "loss": 2.0621, "step": 17715 }, { "epoch": 1.8183497536945814, "grad_norm": 0.08803770691156387, "learning_rate": 0.01, "loss": 2.0422, "step": 17718 }, { "epoch": 1.8186576354679804, "grad_norm": 0.061583418399095535, "learning_rate": 0.01, "loss": 1.9895, "step": 17721 }, { "epoch": 1.8189655172413794, "grad_norm": 0.04947415366768837, "learning_rate": 0.01, "loss": 2.0249, "step": 17724 }, { "epoch": 1.8192733990147785, "grad_norm": 0.06042906641960144, "learning_rate": 0.01, "loss": 2.0563, "step": 17727 }, { "epoch": 1.8195812807881775, "grad_norm": 0.03236406669020653, "learning_rate": 0.01, "loss": 2.0482, "step": 17730 }, { "epoch": 1.8198891625615765, "grad_norm": 0.05975859984755516, "learning_rate": 0.01, "loss": 2.0353, "step": 17733 }, { "epoch": 1.8201970443349755, "grad_norm": 0.11028258502483368, "learning_rate": 0.01, "loss": 2.0654, "step": 17736 }, { "epoch": 1.8205049261083743, "grad_norm": 0.055842846632003784, "learning_rate": 0.01, "loss": 2.0589, "step": 17739 }, { "epoch": 1.8208128078817734, "grad_norm": 0.09189102053642273, "learning_rate": 0.01, "loss": 2.0444, "step": 17742 }, { "epoch": 1.8211206896551724, "grad_norm": 0.07795927673578262, "learning_rate": 0.01, "loss": 2.0628, "step": 17745 }, { "epoch": 1.8214285714285714, "grad_norm": 0.06452701985836029, "learning_rate": 0.01, "loss": 2.0415, "step": 17748 }, { "epoch": 1.8217364532019704, "grad_norm": 0.056360337883234024, "learning_rate": 0.01, "loss": 2.0159, "step": 17751 }, { "epoch": 1.8220443349753694, "grad_norm": 0.08861987292766571, "learning_rate": 0.01, "loss": 2.0325, "step": 17754 }, { "epoch": 1.8223522167487685, "grad_norm": 0.07276416569948196, "learning_rate": 0.01, "loss": 2.022, "step": 17757 }, { "epoch": 1.8226600985221675, "grad_norm": 0.07501320540904999, "learning_rate": 0.01, "loss": 2.0592, "step": 17760 }, { "epoch": 1.8229679802955665, "grad_norm": 0.08408310264348984, "learning_rate": 0.01, "loss": 2.0353, "step": 17763 }, { "epoch": 1.8232758620689655, "grad_norm": 0.039008378982543945, "learning_rate": 0.01, "loss": 2.0541, "step": 17766 }, { "epoch": 1.8235837438423645, "grad_norm": 0.05153367295861244, "learning_rate": 0.01, "loss": 2.0614, "step": 17769 }, { "epoch": 1.8238916256157636, "grad_norm": 0.05685068294405937, "learning_rate": 0.01, "loss": 2.0603, "step": 17772 }, { "epoch": 1.8241995073891626, "grad_norm": 0.10836745798587799, "learning_rate": 0.01, "loss": 2.0293, "step": 17775 }, { "epoch": 1.8245073891625616, "grad_norm": 0.13855011761188507, "learning_rate": 0.01, "loss": 2.044, "step": 17778 }, { "epoch": 1.8248152709359606, "grad_norm": 0.07912803441286087, "learning_rate": 0.01, "loss": 2.062, "step": 17781 }, { "epoch": 1.8251231527093597, "grad_norm": 0.065729521214962, "learning_rate": 0.01, "loss": 2.0416, "step": 17784 }, { "epoch": 1.8254310344827587, "grad_norm": 0.04546307399868965, "learning_rate": 0.01, "loss": 2.0291, "step": 17787 }, { "epoch": 1.8257389162561575, "grad_norm": 0.03415641188621521, "learning_rate": 0.01, "loss": 2.0391, "step": 17790 }, { "epoch": 1.8260467980295565, "grad_norm": 0.038325123488903046, "learning_rate": 0.01, "loss": 2.0249, "step": 17793 }, { "epoch": 1.8263546798029555, "grad_norm": 0.057417213916778564, "learning_rate": 0.01, "loss": 2.0465, "step": 17796 }, { "epoch": 1.8266625615763545, "grad_norm": 0.07312962412834167, "learning_rate": 0.01, "loss": 2.009, "step": 17799 }, { "epoch": 1.8269704433497536, "grad_norm": 0.06465096771717072, "learning_rate": 0.01, "loss": 2.0582, "step": 17802 }, { "epoch": 1.8272783251231526, "grad_norm": 0.049065001308918, "learning_rate": 0.01, "loss": 2.0466, "step": 17805 }, { "epoch": 1.8275862068965516, "grad_norm": 0.05004505068063736, "learning_rate": 0.01, "loss": 2.0368, "step": 17808 }, { "epoch": 1.8278940886699506, "grad_norm": 0.12177273631095886, "learning_rate": 0.01, "loss": 2.0299, "step": 17811 }, { "epoch": 1.8282019704433496, "grad_norm": 0.09006219357252121, "learning_rate": 0.01, "loss": 2.0442, "step": 17814 }, { "epoch": 1.8285098522167487, "grad_norm": 0.07000398635864258, "learning_rate": 0.01, "loss": 2.0346, "step": 17817 }, { "epoch": 1.8288177339901477, "grad_norm": 0.03561507910490036, "learning_rate": 0.01, "loss": 2.022, "step": 17820 }, { "epoch": 1.8291256157635467, "grad_norm": 0.050965216010808945, "learning_rate": 0.01, "loss": 2.0577, "step": 17823 }, { "epoch": 1.8294334975369457, "grad_norm": 0.04437123239040375, "learning_rate": 0.01, "loss": 2.0204, "step": 17826 }, { "epoch": 1.8297413793103448, "grad_norm": 0.046157170087099075, "learning_rate": 0.01, "loss": 2.0315, "step": 17829 }, { "epoch": 1.8300492610837438, "grad_norm": 0.0641985610127449, "learning_rate": 0.01, "loss": 2.0619, "step": 17832 }, { "epoch": 1.8303571428571428, "grad_norm": 0.10295763611793518, "learning_rate": 0.01, "loss": 2.0142, "step": 17835 }, { "epoch": 1.8306650246305418, "grad_norm": 0.08395816385746002, "learning_rate": 0.01, "loss": 2.0388, "step": 17838 }, { "epoch": 1.8309729064039408, "grad_norm": 0.07087874412536621, "learning_rate": 0.01, "loss": 2.0458, "step": 17841 }, { "epoch": 1.8312807881773399, "grad_norm": 0.04754515364766121, "learning_rate": 0.01, "loss": 2.0305, "step": 17844 }, { "epoch": 1.8315886699507389, "grad_norm": 0.042998362332582474, "learning_rate": 0.01, "loss": 2.0334, "step": 17847 }, { "epoch": 1.831896551724138, "grad_norm": 0.044786881655454636, "learning_rate": 0.01, "loss": 2.0545, "step": 17850 }, { "epoch": 1.832204433497537, "grad_norm": 0.05035366117954254, "learning_rate": 0.01, "loss": 2.0346, "step": 17853 }, { "epoch": 1.832512315270936, "grad_norm": 0.08760454505681992, "learning_rate": 0.01, "loss": 2.0407, "step": 17856 }, { "epoch": 1.832820197044335, "grad_norm": 0.07182349264621735, "learning_rate": 0.01, "loss": 2.0617, "step": 17859 }, { "epoch": 1.833128078817734, "grad_norm": 0.0653420239686966, "learning_rate": 0.01, "loss": 2.02, "step": 17862 }, { "epoch": 1.833435960591133, "grad_norm": 0.07664595544338226, "learning_rate": 0.01, "loss": 2.0453, "step": 17865 }, { "epoch": 1.833743842364532, "grad_norm": 0.052884750068187714, "learning_rate": 0.01, "loss": 2.0433, "step": 17868 }, { "epoch": 1.834051724137931, "grad_norm": 0.049432456493377686, "learning_rate": 0.01, "loss": 2.0392, "step": 17871 }, { "epoch": 1.83435960591133, "grad_norm": 0.10208621621131897, "learning_rate": 0.01, "loss": 2.0425, "step": 17874 }, { "epoch": 1.834667487684729, "grad_norm": 0.0663546770811081, "learning_rate": 0.01, "loss": 2.0276, "step": 17877 }, { "epoch": 1.8349753694581281, "grad_norm": 0.0952199399471283, "learning_rate": 0.01, "loss": 2.0273, "step": 17880 }, { "epoch": 1.8352832512315271, "grad_norm": 0.04969238117337227, "learning_rate": 0.01, "loss": 2.0227, "step": 17883 }, { "epoch": 1.8355911330049262, "grad_norm": 0.05101123824715614, "learning_rate": 0.01, "loss": 2.0642, "step": 17886 }, { "epoch": 1.8358990147783252, "grad_norm": 0.1026005819439888, "learning_rate": 0.01, "loss": 2.0118, "step": 17889 }, { "epoch": 1.8362068965517242, "grad_norm": 0.06481184810400009, "learning_rate": 0.01, "loss": 2.0457, "step": 17892 }, { "epoch": 1.8365147783251232, "grad_norm": 0.0684402734041214, "learning_rate": 0.01, "loss": 2.0364, "step": 17895 }, { "epoch": 1.8368226600985222, "grad_norm": 0.1051085963845253, "learning_rate": 0.01, "loss": 2.0178, "step": 17898 }, { "epoch": 1.8371305418719213, "grad_norm": 0.06582857668399811, "learning_rate": 0.01, "loss": 2.0409, "step": 17901 }, { "epoch": 1.8374384236453203, "grad_norm": 0.05665391683578491, "learning_rate": 0.01, "loss": 2.04, "step": 17904 }, { "epoch": 1.8377463054187193, "grad_norm": 0.06239892914891243, "learning_rate": 0.01, "loss": 2.0199, "step": 17907 }, { "epoch": 1.8380541871921183, "grad_norm": 0.08531507849693298, "learning_rate": 0.01, "loss": 2.0429, "step": 17910 }, { "epoch": 1.8383620689655173, "grad_norm": 0.07379250973463058, "learning_rate": 0.01, "loss": 2.0226, "step": 17913 }, { "epoch": 1.8386699507389164, "grad_norm": 0.052789974957704544, "learning_rate": 0.01, "loss": 2.0198, "step": 17916 }, { "epoch": 1.8389778325123154, "grad_norm": 0.09525316208600998, "learning_rate": 0.01, "loss": 2.0423, "step": 17919 }, { "epoch": 1.8392857142857144, "grad_norm": 0.05700648948550224, "learning_rate": 0.01, "loss": 2.0332, "step": 17922 }, { "epoch": 1.8395935960591134, "grad_norm": 0.061519671231508255, "learning_rate": 0.01, "loss": 2.038, "step": 17925 }, { "epoch": 1.8399014778325125, "grad_norm": 0.05594256520271301, "learning_rate": 0.01, "loss": 2.0247, "step": 17928 }, { "epoch": 1.8402093596059115, "grad_norm": 0.06823567301034927, "learning_rate": 0.01, "loss": 2.0319, "step": 17931 }, { "epoch": 1.8405172413793105, "grad_norm": 0.061398666352033615, "learning_rate": 0.01, "loss": 2.038, "step": 17934 }, { "epoch": 1.8408251231527095, "grad_norm": 0.10590513050556183, "learning_rate": 0.01, "loss": 2.0336, "step": 17937 }, { "epoch": 1.8411330049261085, "grad_norm": 0.0579022578895092, "learning_rate": 0.01, "loss": 2.0249, "step": 17940 }, { "epoch": 1.8414408866995073, "grad_norm": 0.07047640532255173, "learning_rate": 0.01, "loss": 2.0147, "step": 17943 }, { "epoch": 1.8417487684729064, "grad_norm": 0.07486578077077866, "learning_rate": 0.01, "loss": 2.0413, "step": 17946 }, { "epoch": 1.8420566502463054, "grad_norm": 0.057884715497493744, "learning_rate": 0.01, "loss": 2.0357, "step": 17949 }, { "epoch": 1.8423645320197044, "grad_norm": 0.10381656140089035, "learning_rate": 0.01, "loss": 2.0382, "step": 17952 }, { "epoch": 1.8426724137931034, "grad_norm": 0.041863467544317245, "learning_rate": 0.01, "loss": 2.0345, "step": 17955 }, { "epoch": 1.8429802955665024, "grad_norm": 0.10012530535459518, "learning_rate": 0.01, "loss": 2.0648, "step": 17958 }, { "epoch": 1.8432881773399015, "grad_norm": 0.05597177520394325, "learning_rate": 0.01, "loss": 2.0513, "step": 17961 }, { "epoch": 1.8435960591133005, "grad_norm": 0.05338521674275398, "learning_rate": 0.01, "loss": 2.0287, "step": 17964 }, { "epoch": 1.8439039408866995, "grad_norm": 0.049141060560941696, "learning_rate": 0.01, "loss": 2.0486, "step": 17967 }, { "epoch": 1.8442118226600985, "grad_norm": 0.0784049779176712, "learning_rate": 0.01, "loss": 2.0176, "step": 17970 }, { "epoch": 1.8445197044334976, "grad_norm": 0.038596317172050476, "learning_rate": 0.01, "loss": 2.0167, "step": 17973 }, { "epoch": 1.8448275862068966, "grad_norm": 0.08521022647619247, "learning_rate": 0.01, "loss": 2.0364, "step": 17976 }, { "epoch": 1.8451354679802956, "grad_norm": 0.05890432372689247, "learning_rate": 0.01, "loss": 2.062, "step": 17979 }, { "epoch": 1.8454433497536946, "grad_norm": 0.09090931713581085, "learning_rate": 0.01, "loss": 2.0514, "step": 17982 }, { "epoch": 1.8457512315270936, "grad_norm": 0.06019595265388489, "learning_rate": 0.01, "loss": 2.0463, "step": 17985 }, { "epoch": 1.8460591133004927, "grad_norm": 0.07712443917989731, "learning_rate": 0.01, "loss": 2.0466, "step": 17988 }, { "epoch": 1.8463669950738915, "grad_norm": 0.06155428662896156, "learning_rate": 0.01, "loss": 2.0224, "step": 17991 }, { "epoch": 1.8466748768472905, "grad_norm": 0.07221681624650955, "learning_rate": 0.01, "loss": 2.0128, "step": 17994 }, { "epoch": 1.8469827586206895, "grad_norm": 0.056776583194732666, "learning_rate": 0.01, "loss": 2.0156, "step": 17997 }, { "epoch": 1.8472906403940885, "grad_norm": 0.12099254876375198, "learning_rate": 0.01, "loss": 2.0522, "step": 18000 }, { "epoch": 1.8475985221674875, "grad_norm": 0.060344647616147995, "learning_rate": 0.01, "loss": 2.038, "step": 18003 }, { "epoch": 1.8479064039408866, "grad_norm": 0.042333200573921204, "learning_rate": 0.01, "loss": 2.0202, "step": 18006 }, { "epoch": 1.8482142857142856, "grad_norm": 0.046059176325798035, "learning_rate": 0.01, "loss": 2.0505, "step": 18009 }, { "epoch": 1.8485221674876846, "grad_norm": 0.03853166475892067, "learning_rate": 0.01, "loss": 2.0681, "step": 18012 }, { "epoch": 1.8488300492610836, "grad_norm": 0.05197960138320923, "learning_rate": 0.01, "loss": 2.0563, "step": 18015 }, { "epoch": 1.8491379310344827, "grad_norm": 0.03316551446914673, "learning_rate": 0.01, "loss": 2.0279, "step": 18018 }, { "epoch": 1.8494458128078817, "grad_norm": 0.05977516993880272, "learning_rate": 0.01, "loss": 2.0217, "step": 18021 }, { "epoch": 1.8497536945812807, "grad_norm": 0.12331486493349075, "learning_rate": 0.01, "loss": 2.0531, "step": 18024 }, { "epoch": 1.8500615763546797, "grad_norm": 0.08065730333328247, "learning_rate": 0.01, "loss": 2.0508, "step": 18027 }, { "epoch": 1.8503694581280787, "grad_norm": 0.07649014890193939, "learning_rate": 0.01, "loss": 2.0629, "step": 18030 }, { "epoch": 1.8506773399014778, "grad_norm": 0.08431357145309448, "learning_rate": 0.01, "loss": 2.0504, "step": 18033 }, { "epoch": 1.8509852216748768, "grad_norm": 0.041856877505779266, "learning_rate": 0.01, "loss": 2.0376, "step": 18036 }, { "epoch": 1.8512931034482758, "grad_norm": 0.03598650172352791, "learning_rate": 0.01, "loss": 2.0455, "step": 18039 }, { "epoch": 1.8516009852216748, "grad_norm": 0.0360511913895607, "learning_rate": 0.01, "loss": 2.044, "step": 18042 }, { "epoch": 1.8519088669950738, "grad_norm": 0.0409335158765316, "learning_rate": 0.01, "loss": 2.0307, "step": 18045 }, { "epoch": 1.8522167487684729, "grad_norm": 0.04646136611700058, "learning_rate": 0.01, "loss": 2.0306, "step": 18048 }, { "epoch": 1.8525246305418719, "grad_norm": 0.08265028148889542, "learning_rate": 0.01, "loss": 2.0325, "step": 18051 }, { "epoch": 1.852832512315271, "grad_norm": 0.08118387311697006, "learning_rate": 0.01, "loss": 2.0315, "step": 18054 }, { "epoch": 1.85314039408867, "grad_norm": 0.05400428548455238, "learning_rate": 0.01, "loss": 2.0186, "step": 18057 }, { "epoch": 1.853448275862069, "grad_norm": 0.04605553671717644, "learning_rate": 0.01, "loss": 2.0212, "step": 18060 }, { "epoch": 1.853756157635468, "grad_norm": 0.06259449571371078, "learning_rate": 0.01, "loss": 2.0443, "step": 18063 }, { "epoch": 1.854064039408867, "grad_norm": 0.04901091381907463, "learning_rate": 0.01, "loss": 2.023, "step": 18066 }, { "epoch": 1.854371921182266, "grad_norm": 0.13033097982406616, "learning_rate": 0.01, "loss": 2.0696, "step": 18069 }, { "epoch": 1.854679802955665, "grad_norm": 0.04114639014005661, "learning_rate": 0.01, "loss": 2.0314, "step": 18072 }, { "epoch": 1.854987684729064, "grad_norm": 0.05269275978207588, "learning_rate": 0.01, "loss": 2.0576, "step": 18075 }, { "epoch": 1.855295566502463, "grad_norm": 0.052419982850551605, "learning_rate": 0.01, "loss": 2.0309, "step": 18078 }, { "epoch": 1.855603448275862, "grad_norm": 0.11109264940023422, "learning_rate": 0.01, "loss": 2.0234, "step": 18081 }, { "epoch": 1.8559113300492611, "grad_norm": 0.09544682502746582, "learning_rate": 0.01, "loss": 2.0287, "step": 18084 }, { "epoch": 1.8562192118226601, "grad_norm": 0.08282047510147095, "learning_rate": 0.01, "loss": 2.0601, "step": 18087 }, { "epoch": 1.8565270935960592, "grad_norm": 0.04255926236510277, "learning_rate": 0.01, "loss": 2.0094, "step": 18090 }, { "epoch": 1.8568349753694582, "grad_norm": 0.04899003729224205, "learning_rate": 0.01, "loss": 2.0494, "step": 18093 }, { "epoch": 1.8571428571428572, "grad_norm": 0.05081721395254135, "learning_rate": 0.01, "loss": 2.0309, "step": 18096 }, { "epoch": 1.8574507389162562, "grad_norm": 0.06633096188306808, "learning_rate": 0.01, "loss": 2.0526, "step": 18099 }, { "epoch": 1.8577586206896552, "grad_norm": 0.06513489037752151, "learning_rate": 0.01, "loss": 2.0072, "step": 18102 }, { "epoch": 1.8580665024630543, "grad_norm": 0.09429512917995453, "learning_rate": 0.01, "loss": 2.0522, "step": 18105 }, { "epoch": 1.8583743842364533, "grad_norm": 0.06042760610580444, "learning_rate": 0.01, "loss": 2.0613, "step": 18108 }, { "epoch": 1.8586822660098523, "grad_norm": 0.04098260775208473, "learning_rate": 0.01, "loss": 2.0712, "step": 18111 }, { "epoch": 1.8589901477832513, "grad_norm": 0.04882989823818207, "learning_rate": 0.01, "loss": 2.0308, "step": 18114 }, { "epoch": 1.8592980295566504, "grad_norm": 0.06196373701095581, "learning_rate": 0.01, "loss": 2.0535, "step": 18117 }, { "epoch": 1.8596059113300494, "grad_norm": 0.10515942424535751, "learning_rate": 0.01, "loss": 2.0402, "step": 18120 }, { "epoch": 1.8599137931034484, "grad_norm": 0.08962828665971756, "learning_rate": 0.01, "loss": 2.048, "step": 18123 }, { "epoch": 1.8602216748768474, "grad_norm": 0.07672600448131561, "learning_rate": 0.01, "loss": 2.0502, "step": 18126 }, { "epoch": 1.8605295566502464, "grad_norm": 0.04769902676343918, "learning_rate": 0.01, "loss": 2.0444, "step": 18129 }, { "epoch": 1.8608374384236455, "grad_norm": 0.06558270752429962, "learning_rate": 0.01, "loss": 2.0431, "step": 18132 }, { "epoch": 1.8611453201970445, "grad_norm": 0.06909210234880447, "learning_rate": 0.01, "loss": 2.0429, "step": 18135 }, { "epoch": 1.8614532019704435, "grad_norm": 0.07571686059236526, "learning_rate": 0.01, "loss": 2.045, "step": 18138 }, { "epoch": 1.8617610837438425, "grad_norm": 0.11170367896556854, "learning_rate": 0.01, "loss": 2.05, "step": 18141 }, { "epoch": 1.8620689655172413, "grad_norm": 0.12978370487689972, "learning_rate": 0.01, "loss": 2.0621, "step": 18144 }, { "epoch": 1.8623768472906403, "grad_norm": 0.056673552840948105, "learning_rate": 0.01, "loss": 2.0264, "step": 18147 }, { "epoch": 1.8626847290640394, "grad_norm": 0.04110763967037201, "learning_rate": 0.01, "loss": 2.0478, "step": 18150 }, { "epoch": 1.8629926108374384, "grad_norm": 0.06502550840377808, "learning_rate": 0.01, "loss": 2.0404, "step": 18153 }, { "epoch": 1.8633004926108374, "grad_norm": 0.059242501854896545, "learning_rate": 0.01, "loss": 2.0545, "step": 18156 }, { "epoch": 1.8636083743842364, "grad_norm": 0.05173099413514137, "learning_rate": 0.01, "loss": 2.0327, "step": 18159 }, { "epoch": 1.8639162561576355, "grad_norm": 0.0403546541929245, "learning_rate": 0.01, "loss": 2.037, "step": 18162 }, { "epoch": 1.8642241379310345, "grad_norm": 0.10920348763465881, "learning_rate": 0.01, "loss": 2.04, "step": 18165 }, { "epoch": 1.8645320197044335, "grad_norm": 0.05528813973069191, "learning_rate": 0.01, "loss": 2.0207, "step": 18168 }, { "epoch": 1.8648399014778325, "grad_norm": 0.04583175480365753, "learning_rate": 0.01, "loss": 2.0409, "step": 18171 }, { "epoch": 1.8651477832512315, "grad_norm": 0.04503155127167702, "learning_rate": 0.01, "loss": 2.0412, "step": 18174 }, { "epoch": 1.8654556650246306, "grad_norm": 0.038750261068344116, "learning_rate": 0.01, "loss": 2.0412, "step": 18177 }, { "epoch": 1.8657635467980296, "grad_norm": 0.0650535374879837, "learning_rate": 0.01, "loss": 2.0515, "step": 18180 }, { "epoch": 1.8660714285714286, "grad_norm": 0.08589319884777069, "learning_rate": 0.01, "loss": 2.0456, "step": 18183 }, { "epoch": 1.8663793103448276, "grad_norm": 0.06245085969567299, "learning_rate": 0.01, "loss": 2.0257, "step": 18186 }, { "epoch": 1.8666871921182266, "grad_norm": 0.07419238984584808, "learning_rate": 0.01, "loss": 2.0501, "step": 18189 }, { "epoch": 1.8669950738916257, "grad_norm": 0.06336040049791336, "learning_rate": 0.01, "loss": 2.0822, "step": 18192 }, { "epoch": 1.8673029556650245, "grad_norm": 0.09494315087795258, "learning_rate": 0.01, "loss": 2.0523, "step": 18195 }, { "epoch": 1.8676108374384235, "grad_norm": 0.06543273478746414, "learning_rate": 0.01, "loss": 2.0289, "step": 18198 }, { "epoch": 1.8679187192118225, "grad_norm": 0.05602452531456947, "learning_rate": 0.01, "loss": 2.0626, "step": 18201 }, { "epoch": 1.8682266009852215, "grad_norm": 0.041735779494047165, "learning_rate": 0.01, "loss": 2.0137, "step": 18204 }, { "epoch": 1.8685344827586206, "grad_norm": 0.03998032957315445, "learning_rate": 0.01, "loss": 2.0341, "step": 18207 }, { "epoch": 1.8688423645320196, "grad_norm": 0.07010776549577713, "learning_rate": 0.01, "loss": 2.0525, "step": 18210 }, { "epoch": 1.8691502463054186, "grad_norm": 0.08516181260347366, "learning_rate": 0.01, "loss": 2.0065, "step": 18213 }, { "epoch": 1.8694581280788176, "grad_norm": 0.08233955502510071, "learning_rate": 0.01, "loss": 2.0391, "step": 18216 }, { "epoch": 1.8697660098522166, "grad_norm": 0.06725854426622391, "learning_rate": 0.01, "loss": 2.0224, "step": 18219 }, { "epoch": 1.8700738916256157, "grad_norm": 0.03345496207475662, "learning_rate": 0.01, "loss": 2.0327, "step": 18222 }, { "epoch": 1.8703817733990147, "grad_norm": 0.09758662432432175, "learning_rate": 0.01, "loss": 2.0258, "step": 18225 }, { "epoch": 1.8706896551724137, "grad_norm": 0.05376002565026283, "learning_rate": 0.01, "loss": 2.0592, "step": 18228 }, { "epoch": 1.8709975369458127, "grad_norm": 0.04620193690061569, "learning_rate": 0.01, "loss": 2.023, "step": 18231 }, { "epoch": 1.8713054187192117, "grad_norm": 0.0853218212723732, "learning_rate": 0.01, "loss": 2.0352, "step": 18234 }, { "epoch": 1.8716133004926108, "grad_norm": 0.15689584612846375, "learning_rate": 0.01, "loss": 2.0163, "step": 18237 }, { "epoch": 1.8719211822660098, "grad_norm": 0.05037194490432739, "learning_rate": 0.01, "loss": 2.0541, "step": 18240 }, { "epoch": 1.8722290640394088, "grad_norm": 0.031591251492500305, "learning_rate": 0.01, "loss": 2.0502, "step": 18243 }, { "epoch": 1.8725369458128078, "grad_norm": 0.05832947790622711, "learning_rate": 0.01, "loss": 2.0284, "step": 18246 }, { "epoch": 1.8728448275862069, "grad_norm": 0.059619709849357605, "learning_rate": 0.01, "loss": 2.053, "step": 18249 }, { "epoch": 1.8731527093596059, "grad_norm": 0.04232211783528328, "learning_rate": 0.01, "loss": 2.0456, "step": 18252 }, { "epoch": 1.873460591133005, "grad_norm": 0.03756287693977356, "learning_rate": 0.01, "loss": 2.0183, "step": 18255 }, { "epoch": 1.873768472906404, "grad_norm": 0.04177022725343704, "learning_rate": 0.01, "loss": 2.0213, "step": 18258 }, { "epoch": 1.874076354679803, "grad_norm": 0.054342493414878845, "learning_rate": 0.01, "loss": 2.0246, "step": 18261 }, { "epoch": 1.874384236453202, "grad_norm": 0.11872408539056778, "learning_rate": 0.01, "loss": 2.0259, "step": 18264 }, { "epoch": 1.874692118226601, "grad_norm": 0.05143645405769348, "learning_rate": 0.01, "loss": 2.0473, "step": 18267 }, { "epoch": 1.875, "grad_norm": 0.06726546585559845, "learning_rate": 0.01, "loss": 2.0413, "step": 18270 }, { "epoch": 1.875307881773399, "grad_norm": 0.10031245648860931, "learning_rate": 0.01, "loss": 2.0315, "step": 18273 }, { "epoch": 1.875615763546798, "grad_norm": 0.09145006537437439, "learning_rate": 0.01, "loss": 2.0359, "step": 18276 }, { "epoch": 1.875923645320197, "grad_norm": 0.0797610953450203, "learning_rate": 0.01, "loss": 2.0507, "step": 18279 }, { "epoch": 1.876231527093596, "grad_norm": 0.07170062512159348, "learning_rate": 0.01, "loss": 2.0369, "step": 18282 }, { "epoch": 1.876539408866995, "grad_norm": 0.043757934123277664, "learning_rate": 0.01, "loss": 2.0825, "step": 18285 }, { "epoch": 1.8768472906403941, "grad_norm": 0.059610821306705475, "learning_rate": 0.01, "loss": 2.0275, "step": 18288 }, { "epoch": 1.8771551724137931, "grad_norm": 0.06014898791909218, "learning_rate": 0.01, "loss": 2.0498, "step": 18291 }, { "epoch": 1.8774630541871922, "grad_norm": 0.0823112353682518, "learning_rate": 0.01, "loss": 1.9977, "step": 18294 }, { "epoch": 1.8777709359605912, "grad_norm": 0.11342828720808029, "learning_rate": 0.01, "loss": 2.0259, "step": 18297 }, { "epoch": 1.8780788177339902, "grad_norm": 0.1533091962337494, "learning_rate": 0.01, "loss": 2.0593, "step": 18300 }, { "epoch": 1.8783866995073892, "grad_norm": 0.09665971249341965, "learning_rate": 0.01, "loss": 2.0304, "step": 18303 }, { "epoch": 1.8786945812807883, "grad_norm": 0.043992988765239716, "learning_rate": 0.01, "loss": 2.0412, "step": 18306 }, { "epoch": 1.8790024630541873, "grad_norm": 0.03452041372656822, "learning_rate": 0.01, "loss": 2.009, "step": 18309 }, { "epoch": 1.8793103448275863, "grad_norm": 0.05596618726849556, "learning_rate": 0.01, "loss": 2.0498, "step": 18312 }, { "epoch": 1.8796182266009853, "grad_norm": 0.0542016327381134, "learning_rate": 0.01, "loss": 2.033, "step": 18315 }, { "epoch": 1.8799261083743843, "grad_norm": 0.049744654446840286, "learning_rate": 0.01, "loss": 2.0625, "step": 18318 }, { "epoch": 1.8802339901477834, "grad_norm": 0.04219472035765648, "learning_rate": 0.01, "loss": 2.0458, "step": 18321 }, { "epoch": 1.8805418719211824, "grad_norm": 0.11828272044658661, "learning_rate": 0.01, "loss": 2.047, "step": 18324 }, { "epoch": 1.8808497536945814, "grad_norm": 0.042180564254522324, "learning_rate": 0.01, "loss": 2.0477, "step": 18327 }, { "epoch": 1.8811576354679804, "grad_norm": 0.05486786365509033, "learning_rate": 0.01, "loss": 2.034, "step": 18330 }, { "epoch": 1.8814655172413794, "grad_norm": 0.09456659108400345, "learning_rate": 0.01, "loss": 2.059, "step": 18333 }, { "epoch": 1.8817733990147785, "grad_norm": 0.03962776064872742, "learning_rate": 0.01, "loss": 2.0569, "step": 18336 }, { "epoch": 1.8820812807881775, "grad_norm": 0.06588723510503769, "learning_rate": 0.01, "loss": 2.0131, "step": 18339 }, { "epoch": 1.8823891625615765, "grad_norm": 0.0490611270070076, "learning_rate": 0.01, "loss": 2.0558, "step": 18342 }, { "epoch": 1.8826970443349755, "grad_norm": 0.10546906292438507, "learning_rate": 0.01, "loss": 2.0354, "step": 18345 }, { "epoch": 1.8830049261083743, "grad_norm": 0.05751054733991623, "learning_rate": 0.01, "loss": 2.0321, "step": 18348 }, { "epoch": 1.8833128078817734, "grad_norm": 0.102676160633564, "learning_rate": 0.01, "loss": 2.0116, "step": 18351 }, { "epoch": 1.8836206896551724, "grad_norm": 0.0536825954914093, "learning_rate": 0.01, "loss": 2.0677, "step": 18354 }, { "epoch": 1.8839285714285714, "grad_norm": 0.039357978850603104, "learning_rate": 0.01, "loss": 2.0217, "step": 18357 }, { "epoch": 1.8842364532019704, "grad_norm": 0.03515158221125603, "learning_rate": 0.01, "loss": 2.0332, "step": 18360 }, { "epoch": 1.8845443349753694, "grad_norm": 0.04234091565012932, "learning_rate": 0.01, "loss": 2.0329, "step": 18363 }, { "epoch": 1.8848522167487685, "grad_norm": 0.06893119215965271, "learning_rate": 0.01, "loss": 2.0241, "step": 18366 }, { "epoch": 1.8851600985221675, "grad_norm": 0.08353175222873688, "learning_rate": 0.01, "loss": 2.0236, "step": 18369 }, { "epoch": 1.8854679802955665, "grad_norm": 0.0567467026412487, "learning_rate": 0.01, "loss": 2.0672, "step": 18372 }, { "epoch": 1.8857758620689655, "grad_norm": 0.05529101565480232, "learning_rate": 0.01, "loss": 2.0364, "step": 18375 }, { "epoch": 1.8860837438423645, "grad_norm": 0.09581262618303299, "learning_rate": 0.01, "loss": 2.0395, "step": 18378 }, { "epoch": 1.8863916256157636, "grad_norm": 0.052034202963113785, "learning_rate": 0.01, "loss": 2.0415, "step": 18381 }, { "epoch": 1.8866995073891626, "grad_norm": 0.10314558446407318, "learning_rate": 0.01, "loss": 2.0095, "step": 18384 }, { "epoch": 1.8870073891625616, "grad_norm": 0.04631441831588745, "learning_rate": 0.01, "loss": 2.0504, "step": 18387 }, { "epoch": 1.8873152709359606, "grad_norm": 0.08067111670970917, "learning_rate": 0.01, "loss": 2.0504, "step": 18390 }, { "epoch": 1.8876231527093597, "grad_norm": 0.07892802357673645, "learning_rate": 0.01, "loss": 2.0495, "step": 18393 }, { "epoch": 1.8879310344827587, "grad_norm": 0.06167163327336311, "learning_rate": 0.01, "loss": 2.0546, "step": 18396 }, { "epoch": 1.8882389162561575, "grad_norm": 0.06746269762516022, "learning_rate": 0.01, "loss": 2.0187, "step": 18399 }, { "epoch": 1.8885467980295565, "grad_norm": 0.06389199942350388, "learning_rate": 0.01, "loss": 2.0556, "step": 18402 }, { "epoch": 1.8888546798029555, "grad_norm": 0.05448369309306145, "learning_rate": 0.01, "loss": 2.0607, "step": 18405 }, { "epoch": 1.8891625615763545, "grad_norm": 0.05535599961876869, "learning_rate": 0.01, "loss": 2.0347, "step": 18408 }, { "epoch": 1.8894704433497536, "grad_norm": 0.0533415786921978, "learning_rate": 0.01, "loss": 2.0648, "step": 18411 }, { "epoch": 1.8897783251231526, "grad_norm": 0.06423043459653854, "learning_rate": 0.01, "loss": 2.0554, "step": 18414 }, { "epoch": 1.8900862068965516, "grad_norm": 0.04682399705052376, "learning_rate": 0.01, "loss": 2.0402, "step": 18417 }, { "epoch": 1.8903940886699506, "grad_norm": 0.09395507723093033, "learning_rate": 0.01, "loss": 2.0417, "step": 18420 }, { "epoch": 1.8907019704433496, "grad_norm": 0.07777848839759827, "learning_rate": 0.01, "loss": 2.068, "step": 18423 }, { "epoch": 1.8910098522167487, "grad_norm": 0.052006904035806656, "learning_rate": 0.01, "loss": 2.0232, "step": 18426 }, { "epoch": 1.8913177339901477, "grad_norm": 0.05325109511613846, "learning_rate": 0.01, "loss": 2.0292, "step": 18429 }, { "epoch": 1.8916256157635467, "grad_norm": 0.05496850982308388, "learning_rate": 0.01, "loss": 2.0531, "step": 18432 }, { "epoch": 1.8919334975369457, "grad_norm": 0.0395922027528286, "learning_rate": 0.01, "loss": 2.0504, "step": 18435 }, { "epoch": 1.8922413793103448, "grad_norm": 0.0664554312825203, "learning_rate": 0.01, "loss": 2.0555, "step": 18438 }, { "epoch": 1.8925492610837438, "grad_norm": 0.06475098431110382, "learning_rate": 0.01, "loss": 2.0361, "step": 18441 }, { "epoch": 1.8928571428571428, "grad_norm": 0.06291298568248749, "learning_rate": 0.01, "loss": 2.0083, "step": 18444 }, { "epoch": 1.8931650246305418, "grad_norm": 0.07891640067100525, "learning_rate": 0.01, "loss": 2.0338, "step": 18447 }, { "epoch": 1.8934729064039408, "grad_norm": 0.10065143555402756, "learning_rate": 0.01, "loss": 2.0372, "step": 18450 }, { "epoch": 1.8937807881773399, "grad_norm": 0.0962565690279007, "learning_rate": 0.01, "loss": 2.0369, "step": 18453 }, { "epoch": 1.8940886699507389, "grad_norm": 0.06286763399839401, "learning_rate": 0.01, "loss": 2.0416, "step": 18456 }, { "epoch": 1.894396551724138, "grad_norm": 0.05067887529730797, "learning_rate": 0.01, "loss": 2.0582, "step": 18459 }, { "epoch": 1.894704433497537, "grad_norm": 0.03956342115998268, "learning_rate": 0.01, "loss": 2.0633, "step": 18462 }, { "epoch": 1.895012315270936, "grad_norm": 0.06670361012220383, "learning_rate": 0.01, "loss": 2.0569, "step": 18465 }, { "epoch": 1.895320197044335, "grad_norm": 0.07848145067691803, "learning_rate": 0.01, "loss": 2.037, "step": 18468 }, { "epoch": 1.895628078817734, "grad_norm": 0.05415938422083855, "learning_rate": 0.01, "loss": 2.0699, "step": 18471 }, { "epoch": 1.895935960591133, "grad_norm": 0.07549092918634415, "learning_rate": 0.01, "loss": 2.0369, "step": 18474 }, { "epoch": 1.896243842364532, "grad_norm": 0.057871196419000626, "learning_rate": 0.01, "loss": 2.0434, "step": 18477 }, { "epoch": 1.896551724137931, "grad_norm": 0.059748612344264984, "learning_rate": 0.01, "loss": 2.0374, "step": 18480 }, { "epoch": 1.89685960591133, "grad_norm": 0.04501016065478325, "learning_rate": 0.01, "loss": 2.034, "step": 18483 }, { "epoch": 1.897167487684729, "grad_norm": 0.06361118704080582, "learning_rate": 0.01, "loss": 2.0398, "step": 18486 }, { "epoch": 1.8974753694581281, "grad_norm": 0.09649393707513809, "learning_rate": 0.01, "loss": 2.0311, "step": 18489 }, { "epoch": 1.8977832512315271, "grad_norm": 0.08288730680942535, "learning_rate": 0.01, "loss": 2.0585, "step": 18492 }, { "epoch": 1.8980911330049262, "grad_norm": 0.037788692861795425, "learning_rate": 0.01, "loss": 2.028, "step": 18495 }, { "epoch": 1.8983990147783252, "grad_norm": 0.05678097531199455, "learning_rate": 0.01, "loss": 2.029, "step": 18498 }, { "epoch": 1.8987068965517242, "grad_norm": 0.05753886699676514, "learning_rate": 0.01, "loss": 2.0523, "step": 18501 }, { "epoch": 1.8990147783251232, "grad_norm": 0.0542941652238369, "learning_rate": 0.01, "loss": 2.0334, "step": 18504 }, { "epoch": 1.8993226600985222, "grad_norm": 0.06856728345155716, "learning_rate": 0.01, "loss": 2.0179, "step": 18507 }, { "epoch": 1.8996305418719213, "grad_norm": 0.09270088374614716, "learning_rate": 0.01, "loss": 2.0258, "step": 18510 }, { "epoch": 1.8999384236453203, "grad_norm": 0.04473109543323517, "learning_rate": 0.01, "loss": 2.0036, "step": 18513 }, { "epoch": 1.9002463054187193, "grad_norm": 0.06040007993578911, "learning_rate": 0.01, "loss": 2.0344, "step": 18516 }, { "epoch": 1.9005541871921183, "grad_norm": 0.032143257558345795, "learning_rate": 0.01, "loss": 2.0478, "step": 18519 }, { "epoch": 1.9008620689655173, "grad_norm": 0.04205821454524994, "learning_rate": 0.01, "loss": 2.0562, "step": 18522 }, { "epoch": 1.9011699507389164, "grad_norm": 0.03920583799481392, "learning_rate": 0.01, "loss": 2.0561, "step": 18525 }, { "epoch": 1.9014778325123154, "grad_norm": 0.17323125898838043, "learning_rate": 0.01, "loss": 2.0243, "step": 18528 }, { "epoch": 1.9017857142857144, "grad_norm": 0.04595707729458809, "learning_rate": 0.01, "loss": 2.0515, "step": 18531 }, { "epoch": 1.9020935960591134, "grad_norm": 0.03803316131234169, "learning_rate": 0.01, "loss": 2.0268, "step": 18534 }, { "epoch": 1.9024014778325125, "grad_norm": 0.04623658210039139, "learning_rate": 0.01, "loss": 2.0423, "step": 18537 }, { "epoch": 1.9027093596059115, "grad_norm": 0.04388248175382614, "learning_rate": 0.01, "loss": 2.0207, "step": 18540 }, { "epoch": 1.9030172413793105, "grad_norm": 0.03582540154457092, "learning_rate": 0.01, "loss": 2.0307, "step": 18543 }, { "epoch": 1.9033251231527095, "grad_norm": 0.033453166484832764, "learning_rate": 0.01, "loss": 2.0514, "step": 18546 }, { "epoch": 1.9036330049261085, "grad_norm": 0.04929531365633011, "learning_rate": 0.01, "loss": 2.022, "step": 18549 }, { "epoch": 1.9039408866995073, "grad_norm": 0.0575069934129715, "learning_rate": 0.01, "loss": 2.0136, "step": 18552 }, { "epoch": 1.9042487684729064, "grad_norm": 0.044883664697408676, "learning_rate": 0.01, "loss": 2.0267, "step": 18555 }, { "epoch": 1.9045566502463054, "grad_norm": 0.06335309147834778, "learning_rate": 0.01, "loss": 2.013, "step": 18558 }, { "epoch": 1.9048645320197044, "grad_norm": 0.07315582036972046, "learning_rate": 0.01, "loss": 2.0331, "step": 18561 }, { "epoch": 1.9051724137931034, "grad_norm": 0.08378446102142334, "learning_rate": 0.01, "loss": 2.0154, "step": 18564 }, { "epoch": 1.9054802955665024, "grad_norm": 0.09492503106594086, "learning_rate": 0.01, "loss": 2.0239, "step": 18567 }, { "epoch": 1.9057881773399015, "grad_norm": 0.0497819185256958, "learning_rate": 0.01, "loss": 2.0551, "step": 18570 }, { "epoch": 1.9060960591133005, "grad_norm": 0.06625241041183472, "learning_rate": 0.01, "loss": 2.0477, "step": 18573 }, { "epoch": 1.9064039408866995, "grad_norm": 0.13533645868301392, "learning_rate": 0.01, "loss": 2.0288, "step": 18576 }, { "epoch": 1.9067118226600985, "grad_norm": 0.129546657204628, "learning_rate": 0.01, "loss": 1.9943, "step": 18579 }, { "epoch": 1.9070197044334976, "grad_norm": 0.0862266942858696, "learning_rate": 0.01, "loss": 2.0273, "step": 18582 }, { "epoch": 1.9073275862068966, "grad_norm": 0.04262632504105568, "learning_rate": 0.01, "loss": 2.0289, "step": 18585 }, { "epoch": 1.9076354679802956, "grad_norm": 0.06536297500133514, "learning_rate": 0.01, "loss": 2.0453, "step": 18588 }, { "epoch": 1.9079433497536946, "grad_norm": 0.04408801719546318, "learning_rate": 0.01, "loss": 2.045, "step": 18591 }, { "epoch": 1.9082512315270936, "grad_norm": 0.0382089763879776, "learning_rate": 0.01, "loss": 2.057, "step": 18594 }, { "epoch": 1.9085591133004927, "grad_norm": 0.05695042014122009, "learning_rate": 0.01, "loss": 2.0468, "step": 18597 }, { "epoch": 1.9088669950738915, "grad_norm": 0.06890982389450073, "learning_rate": 0.01, "loss": 2.0631, "step": 18600 }, { "epoch": 1.9091748768472905, "grad_norm": 0.06517864018678665, "learning_rate": 0.01, "loss": 2.0288, "step": 18603 }, { "epoch": 1.9094827586206895, "grad_norm": 0.03709007799625397, "learning_rate": 0.01, "loss": 2.0579, "step": 18606 }, { "epoch": 1.9097906403940885, "grad_norm": 0.040355831384658813, "learning_rate": 0.01, "loss": 2.023, "step": 18609 }, { "epoch": 1.9100985221674875, "grad_norm": 0.08973202854394913, "learning_rate": 0.01, "loss": 2.0495, "step": 18612 }, { "epoch": 1.9104064039408866, "grad_norm": 0.08074682205915451, "learning_rate": 0.01, "loss": 2.0352, "step": 18615 }, { "epoch": 1.9107142857142856, "grad_norm": 0.07134959101676941, "learning_rate": 0.01, "loss": 2.0234, "step": 18618 }, { "epoch": 1.9110221674876846, "grad_norm": 0.10389960557222366, "learning_rate": 0.01, "loss": 2.0456, "step": 18621 }, { "epoch": 1.9113300492610836, "grad_norm": 0.052665699273347855, "learning_rate": 0.01, "loss": 2.019, "step": 18624 }, { "epoch": 1.9116379310344827, "grad_norm": 0.06355523318052292, "learning_rate": 0.01, "loss": 2.0328, "step": 18627 }, { "epoch": 1.9119458128078817, "grad_norm": 0.06806465983390808, "learning_rate": 0.01, "loss": 2.0364, "step": 18630 }, { "epoch": 1.9122536945812807, "grad_norm": 0.08892465382814407, "learning_rate": 0.01, "loss": 2.0436, "step": 18633 }, { "epoch": 1.9125615763546797, "grad_norm": 0.09806855767965317, "learning_rate": 0.01, "loss": 2.0494, "step": 18636 }, { "epoch": 1.9128694581280787, "grad_norm": 0.036283593624830246, "learning_rate": 0.01, "loss": 2.03, "step": 18639 }, { "epoch": 1.9131773399014778, "grad_norm": 0.06654248386621475, "learning_rate": 0.01, "loss": 2.0177, "step": 18642 }, { "epoch": 1.9134852216748768, "grad_norm": 0.07729227095842361, "learning_rate": 0.01, "loss": 2.041, "step": 18645 }, { "epoch": 1.9137931034482758, "grad_norm": 0.05296695604920387, "learning_rate": 0.01, "loss": 2.0512, "step": 18648 }, { "epoch": 1.9141009852216748, "grad_norm": 0.05579183250665665, "learning_rate": 0.01, "loss": 2.0171, "step": 18651 }, { "epoch": 1.9144088669950738, "grad_norm": 0.04230615124106407, "learning_rate": 0.01, "loss": 2.0178, "step": 18654 }, { "epoch": 1.9147167487684729, "grad_norm": 0.0412709042429924, "learning_rate": 0.01, "loss": 2.0356, "step": 18657 }, { "epoch": 1.9150246305418719, "grad_norm": 0.056640543043613434, "learning_rate": 0.01, "loss": 2.0506, "step": 18660 }, { "epoch": 1.915332512315271, "grad_norm": 0.04353609308600426, "learning_rate": 0.01, "loss": 2.0703, "step": 18663 }, { "epoch": 1.91564039408867, "grad_norm": 0.04212663322687149, "learning_rate": 0.01, "loss": 2.0333, "step": 18666 }, { "epoch": 1.915948275862069, "grad_norm": 0.07639022916555405, "learning_rate": 0.01, "loss": 2.0267, "step": 18669 }, { "epoch": 1.916256157635468, "grad_norm": 0.09405479580163956, "learning_rate": 0.01, "loss": 2.0532, "step": 18672 }, { "epoch": 1.916564039408867, "grad_norm": 0.07501058280467987, "learning_rate": 0.01, "loss": 2.0281, "step": 18675 }, { "epoch": 1.916871921182266, "grad_norm": 0.06343735009431839, "learning_rate": 0.01, "loss": 2.0387, "step": 18678 }, { "epoch": 1.917179802955665, "grad_norm": 0.07794613391160965, "learning_rate": 0.01, "loss": 2.0189, "step": 18681 }, { "epoch": 1.917487684729064, "grad_norm": 0.1063399538397789, "learning_rate": 0.01, "loss": 2.0522, "step": 18684 }, { "epoch": 1.917795566502463, "grad_norm": 0.06528618931770325, "learning_rate": 0.01, "loss": 2.0411, "step": 18687 }, { "epoch": 1.918103448275862, "grad_norm": 0.063084177672863, "learning_rate": 0.01, "loss": 2.0405, "step": 18690 }, { "epoch": 1.9184113300492611, "grad_norm": 0.06663991510868073, "learning_rate": 0.01, "loss": 2.0547, "step": 18693 }, { "epoch": 1.9187192118226601, "grad_norm": 0.09827464818954468, "learning_rate": 0.01, "loss": 2.0285, "step": 18696 }, { "epoch": 1.9190270935960592, "grad_norm": 0.052305273711681366, "learning_rate": 0.01, "loss": 2.0205, "step": 18699 }, { "epoch": 1.9193349753694582, "grad_norm": 0.07126889377832413, "learning_rate": 0.01, "loss": 2.0331, "step": 18702 }, { "epoch": 1.9196428571428572, "grad_norm": 0.06262009590864182, "learning_rate": 0.01, "loss": 2.0217, "step": 18705 }, { "epoch": 1.9199507389162562, "grad_norm": 0.056034356355667114, "learning_rate": 0.01, "loss": 2.0136, "step": 18708 }, { "epoch": 1.9202586206896552, "grad_norm": 0.07673577219247818, "learning_rate": 0.01, "loss": 2.0617, "step": 18711 }, { "epoch": 1.9205665024630543, "grad_norm": 0.06006854772567749, "learning_rate": 0.01, "loss": 2.0493, "step": 18714 }, { "epoch": 1.9208743842364533, "grad_norm": 0.07149071991443634, "learning_rate": 0.01, "loss": 2.0339, "step": 18717 }, { "epoch": 1.9211822660098523, "grad_norm": 0.04935576766729355, "learning_rate": 0.01, "loss": 2.0228, "step": 18720 }, { "epoch": 1.9214901477832513, "grad_norm": 0.1052050068974495, "learning_rate": 0.01, "loss": 2.0384, "step": 18723 }, { "epoch": 1.9217980295566504, "grad_norm": 0.07589028030633926, "learning_rate": 0.01, "loss": 2.0349, "step": 18726 }, { "epoch": 1.9221059113300494, "grad_norm": 0.0862005278468132, "learning_rate": 0.01, "loss": 2.0357, "step": 18729 }, { "epoch": 1.9224137931034484, "grad_norm": 0.07210662961006165, "learning_rate": 0.01, "loss": 2.0344, "step": 18732 }, { "epoch": 1.9227216748768474, "grad_norm": 0.0924825370311737, "learning_rate": 0.01, "loss": 2.0069, "step": 18735 }, { "epoch": 1.9230295566502464, "grad_norm": 0.05819706991314888, "learning_rate": 0.01, "loss": 2.0266, "step": 18738 }, { "epoch": 1.9233374384236455, "grad_norm": 0.04784362018108368, "learning_rate": 0.01, "loss": 2.0633, "step": 18741 }, { "epoch": 1.9236453201970445, "grad_norm": 0.07216835021972656, "learning_rate": 0.01, "loss": 2.0107, "step": 18744 }, { "epoch": 1.9239532019704435, "grad_norm": 0.05539752170443535, "learning_rate": 0.01, "loss": 2.0222, "step": 18747 }, { "epoch": 1.9242610837438425, "grad_norm": 0.07037390768527985, "learning_rate": 0.01, "loss": 2.0331, "step": 18750 }, { "epoch": 1.9245689655172413, "grad_norm": 0.0941152572631836, "learning_rate": 0.01, "loss": 2.0432, "step": 18753 }, { "epoch": 1.9248768472906403, "grad_norm": 0.05315488949418068, "learning_rate": 0.01, "loss": 2.036, "step": 18756 }, { "epoch": 1.9251847290640394, "grad_norm": 0.04986554756760597, "learning_rate": 0.01, "loss": 2.0249, "step": 18759 }, { "epoch": 1.9254926108374384, "grad_norm": 0.0750490128993988, "learning_rate": 0.01, "loss": 2.0448, "step": 18762 }, { "epoch": 1.9258004926108374, "grad_norm": 0.13903938233852386, "learning_rate": 0.01, "loss": 2.0433, "step": 18765 }, { "epoch": 1.9261083743842364, "grad_norm": 0.08733932673931122, "learning_rate": 0.01, "loss": 2.0223, "step": 18768 }, { "epoch": 1.9264162561576355, "grad_norm": 0.04527903348207474, "learning_rate": 0.01, "loss": 2.0112, "step": 18771 }, { "epoch": 1.9267241379310345, "grad_norm": 0.036207813769578934, "learning_rate": 0.01, "loss": 2.0353, "step": 18774 }, { "epoch": 1.9270320197044335, "grad_norm": 0.04572034999728203, "learning_rate": 0.01, "loss": 2.0388, "step": 18777 }, { "epoch": 1.9273399014778325, "grad_norm": 0.03662864491343498, "learning_rate": 0.01, "loss": 2.0023, "step": 18780 }, { "epoch": 1.9276477832512315, "grad_norm": 0.12049257755279541, "learning_rate": 0.01, "loss": 2.027, "step": 18783 }, { "epoch": 1.9279556650246306, "grad_norm": 0.1005631759762764, "learning_rate": 0.01, "loss": 2.0537, "step": 18786 }, { "epoch": 1.9282635467980296, "grad_norm": 0.13943985104560852, "learning_rate": 0.01, "loss": 2.0129, "step": 18789 }, { "epoch": 1.9285714285714286, "grad_norm": 0.13312341272830963, "learning_rate": 0.01, "loss": 2.0196, "step": 18792 }, { "epoch": 1.9288793103448276, "grad_norm": 0.0517788864672184, "learning_rate": 0.01, "loss": 2.0291, "step": 18795 }, { "epoch": 1.9291871921182266, "grad_norm": 0.05525217577815056, "learning_rate": 0.01, "loss": 2.0707, "step": 18798 }, { "epoch": 1.9294950738916257, "grad_norm": 0.04876135662198067, "learning_rate": 0.01, "loss": 2.0441, "step": 18801 }, { "epoch": 1.9298029556650245, "grad_norm": 0.04944787919521332, "learning_rate": 0.01, "loss": 2.0479, "step": 18804 }, { "epoch": 1.9301108374384235, "grad_norm": 0.06437812745571136, "learning_rate": 0.01, "loss": 2.0316, "step": 18807 }, { "epoch": 1.9304187192118225, "grad_norm": 0.04027709737420082, "learning_rate": 0.01, "loss": 2.0443, "step": 18810 }, { "epoch": 1.9307266009852215, "grad_norm": 0.05178974196314812, "learning_rate": 0.01, "loss": 2.0127, "step": 18813 }, { "epoch": 1.9310344827586206, "grad_norm": 0.05347009375691414, "learning_rate": 0.01, "loss": 2.0426, "step": 18816 }, { "epoch": 1.9313423645320196, "grad_norm": 0.03055960312485695, "learning_rate": 0.01, "loss": 2.0369, "step": 18819 }, { "epoch": 1.9316502463054186, "grad_norm": 0.10861945152282715, "learning_rate": 0.01, "loss": 2.0398, "step": 18822 }, { "epoch": 1.9319581280788176, "grad_norm": 0.05932777374982834, "learning_rate": 0.01, "loss": 2.0272, "step": 18825 }, { "epoch": 1.9322660098522166, "grad_norm": 0.046545740216970444, "learning_rate": 0.01, "loss": 2.0171, "step": 18828 }, { "epoch": 1.9325738916256157, "grad_norm": 0.07582221925258636, "learning_rate": 0.01, "loss": 2.0239, "step": 18831 }, { "epoch": 1.9328817733990147, "grad_norm": 0.07975540310144424, "learning_rate": 0.01, "loss": 2.0073, "step": 18834 }, { "epoch": 1.9331896551724137, "grad_norm": 0.07365059852600098, "learning_rate": 0.01, "loss": 2.0188, "step": 18837 }, { "epoch": 1.9334975369458127, "grad_norm": 0.09160298854112625, "learning_rate": 0.01, "loss": 2.027, "step": 18840 }, { "epoch": 1.9338054187192117, "grad_norm": 0.07767198234796524, "learning_rate": 0.01, "loss": 2.0157, "step": 18843 }, { "epoch": 1.9341133004926108, "grad_norm": 0.07545919716358185, "learning_rate": 0.01, "loss": 2.0334, "step": 18846 }, { "epoch": 1.9344211822660098, "grad_norm": 0.06564575433731079, "learning_rate": 0.01, "loss": 2.0199, "step": 18849 }, { "epoch": 1.9347290640394088, "grad_norm": 0.04205799475312233, "learning_rate": 0.01, "loss": 2.0275, "step": 18852 }, { "epoch": 1.9350369458128078, "grad_norm": 0.07428024709224701, "learning_rate": 0.01, "loss": 2.0258, "step": 18855 }, { "epoch": 1.9353448275862069, "grad_norm": 0.08150817453861237, "learning_rate": 0.01, "loss": 2.0404, "step": 18858 }, { "epoch": 1.9356527093596059, "grad_norm": 0.07489453256130219, "learning_rate": 0.01, "loss": 2.0489, "step": 18861 }, { "epoch": 1.935960591133005, "grad_norm": 0.09983116388320923, "learning_rate": 0.01, "loss": 2.0101, "step": 18864 }, { "epoch": 1.936268472906404, "grad_norm": 0.09074544906616211, "learning_rate": 0.01, "loss": 2.0385, "step": 18867 }, { "epoch": 1.936576354679803, "grad_norm": 0.056056223809719086, "learning_rate": 0.01, "loss": 2.0531, "step": 18870 }, { "epoch": 1.936884236453202, "grad_norm": 0.04818575084209442, "learning_rate": 0.01, "loss": 2.029, "step": 18873 }, { "epoch": 1.937192118226601, "grad_norm": 0.04811173304915428, "learning_rate": 0.01, "loss": 2.0147, "step": 18876 }, { "epoch": 1.9375, "grad_norm": 0.05799747258424759, "learning_rate": 0.01, "loss": 2.0466, "step": 18879 }, { "epoch": 1.937807881773399, "grad_norm": 0.07357611507177353, "learning_rate": 0.01, "loss": 2.0404, "step": 18882 }, { "epoch": 1.938115763546798, "grad_norm": 0.10275068879127502, "learning_rate": 0.01, "loss": 2.0468, "step": 18885 }, { "epoch": 1.938423645320197, "grad_norm": 0.03685866296291351, "learning_rate": 0.01, "loss": 2.0165, "step": 18888 }, { "epoch": 1.938731527093596, "grad_norm": 0.03603344038128853, "learning_rate": 0.01, "loss": 2.0403, "step": 18891 }, { "epoch": 1.939039408866995, "grad_norm": 0.06932532042264938, "learning_rate": 0.01, "loss": 2.0359, "step": 18894 }, { "epoch": 1.9393472906403941, "grad_norm": 0.05983889847993851, "learning_rate": 0.01, "loss": 2.0299, "step": 18897 }, { "epoch": 1.9396551724137931, "grad_norm": 0.1199260875582695, "learning_rate": 0.01, "loss": 2.0467, "step": 18900 }, { "epoch": 1.9399630541871922, "grad_norm": 0.06222264841198921, "learning_rate": 0.01, "loss": 2.0387, "step": 18903 }, { "epoch": 1.9402709359605912, "grad_norm": 0.0764993354678154, "learning_rate": 0.01, "loss": 2.0014, "step": 18906 }, { "epoch": 1.9405788177339902, "grad_norm": 0.04790098965167999, "learning_rate": 0.01, "loss": 2.0399, "step": 18909 }, { "epoch": 1.9408866995073892, "grad_norm": 0.03822425380349159, "learning_rate": 0.01, "loss": 2.0325, "step": 18912 }, { "epoch": 1.9411945812807883, "grad_norm": 0.05336176976561546, "learning_rate": 0.01, "loss": 2.0307, "step": 18915 }, { "epoch": 1.9415024630541873, "grad_norm": 0.08732246607542038, "learning_rate": 0.01, "loss": 2.0373, "step": 18918 }, { "epoch": 1.9418103448275863, "grad_norm": 0.08886411786079407, "learning_rate": 0.01, "loss": 2.0682, "step": 18921 }, { "epoch": 1.9421182266009853, "grad_norm": 0.08069706708192825, "learning_rate": 0.01, "loss": 2.0382, "step": 18924 }, { "epoch": 1.9424261083743843, "grad_norm": 0.08464798331260681, "learning_rate": 0.01, "loss": 2.0207, "step": 18927 }, { "epoch": 1.9427339901477834, "grad_norm": 0.07051963359117508, "learning_rate": 0.01, "loss": 2.0375, "step": 18930 }, { "epoch": 1.9430418719211824, "grad_norm": 0.037250157445669174, "learning_rate": 0.01, "loss": 2.0146, "step": 18933 }, { "epoch": 1.9433497536945814, "grad_norm": 0.07512888312339783, "learning_rate": 0.01, "loss": 2.0279, "step": 18936 }, { "epoch": 1.9436576354679804, "grad_norm": 0.12079732865095139, "learning_rate": 0.01, "loss": 2.0457, "step": 18939 }, { "epoch": 1.9439655172413794, "grad_norm": 0.0511600561439991, "learning_rate": 0.01, "loss": 2.0558, "step": 18942 }, { "epoch": 1.9442733990147785, "grad_norm": 0.06442293524742126, "learning_rate": 0.01, "loss": 2.07, "step": 18945 }, { "epoch": 1.9445812807881775, "grad_norm": 0.04928497597575188, "learning_rate": 0.01, "loss": 2.0088, "step": 18948 }, { "epoch": 1.9448891625615765, "grad_norm": 0.07882185280323029, "learning_rate": 0.01, "loss": 2.0192, "step": 18951 }, { "epoch": 1.9451970443349755, "grad_norm": 0.03649712726473808, "learning_rate": 0.01, "loss": 2.0319, "step": 18954 }, { "epoch": 1.9455049261083743, "grad_norm": 0.10654021054506302, "learning_rate": 0.01, "loss": 2.038, "step": 18957 }, { "epoch": 1.9458128078817734, "grad_norm": 0.09655455499887466, "learning_rate": 0.01, "loss": 2.0284, "step": 18960 }, { "epoch": 1.9461206896551724, "grad_norm": 0.06114486977458, "learning_rate": 0.01, "loss": 2.0578, "step": 18963 }, { "epoch": 1.9464285714285714, "grad_norm": 0.04167640954256058, "learning_rate": 0.01, "loss": 2.0229, "step": 18966 }, { "epoch": 1.9467364532019704, "grad_norm": 0.054138265550136566, "learning_rate": 0.01, "loss": 2.0439, "step": 18969 }, { "epoch": 1.9470443349753694, "grad_norm": 0.04728518798947334, "learning_rate": 0.01, "loss": 2.0485, "step": 18972 }, { "epoch": 1.9473522167487685, "grad_norm": 0.09992729872465134, "learning_rate": 0.01, "loss": 2.0581, "step": 18975 }, { "epoch": 1.9476600985221675, "grad_norm": 0.039344482123851776, "learning_rate": 0.01, "loss": 2.0213, "step": 18978 }, { "epoch": 1.9479679802955665, "grad_norm": 0.10921066254377365, "learning_rate": 0.01, "loss": 2.0538, "step": 18981 }, { "epoch": 1.9482758620689655, "grad_norm": 0.03921816125512123, "learning_rate": 0.01, "loss": 2.011, "step": 18984 }, { "epoch": 1.9485837438423645, "grad_norm": 0.08293361961841583, "learning_rate": 0.01, "loss": 2.0186, "step": 18987 }, { "epoch": 1.9488916256157636, "grad_norm": 0.08310680091381073, "learning_rate": 0.01, "loss": 2.065, "step": 18990 }, { "epoch": 1.9491995073891626, "grad_norm": 0.05105976015329361, "learning_rate": 0.01, "loss": 2.0461, "step": 18993 }, { "epoch": 1.9495073891625616, "grad_norm": 0.0387946255505085, "learning_rate": 0.01, "loss": 2.0227, "step": 18996 }, { "epoch": 1.9498152709359606, "grad_norm": 0.039592646062374115, "learning_rate": 0.01, "loss": 2.0409, "step": 18999 }, { "epoch": 1.9501231527093597, "grad_norm": 0.042499393224716187, "learning_rate": 0.01, "loss": 2.0388, "step": 19002 }, { "epoch": 1.9504310344827587, "grad_norm": 0.1202671155333519, "learning_rate": 0.01, "loss": 2.0185, "step": 19005 }, { "epoch": 1.9507389162561575, "grad_norm": 0.05047677457332611, "learning_rate": 0.01, "loss": 2.0534, "step": 19008 }, { "epoch": 1.9510467980295565, "grad_norm": 0.13210178911685944, "learning_rate": 0.01, "loss": 2.0502, "step": 19011 }, { "epoch": 1.9513546798029555, "grad_norm": 0.07093524187803268, "learning_rate": 0.01, "loss": 2.0426, "step": 19014 }, { "epoch": 1.9516625615763545, "grad_norm": 0.05528571456670761, "learning_rate": 0.01, "loss": 2.0348, "step": 19017 }, { "epoch": 1.9519704433497536, "grad_norm": 0.08988847583532333, "learning_rate": 0.01, "loss": 2.0214, "step": 19020 }, { "epoch": 1.9522783251231526, "grad_norm": 0.05767255648970604, "learning_rate": 0.01, "loss": 2.0335, "step": 19023 }, { "epoch": 1.9525862068965516, "grad_norm": 0.07641880214214325, "learning_rate": 0.01, "loss": 2.0278, "step": 19026 }, { "epoch": 1.9528940886699506, "grad_norm": 0.08751394599676132, "learning_rate": 0.01, "loss": 2.0298, "step": 19029 }, { "epoch": 1.9532019704433496, "grad_norm": 0.06144971400499344, "learning_rate": 0.01, "loss": 2.0433, "step": 19032 }, { "epoch": 1.9535098522167487, "grad_norm": 0.04502955824136734, "learning_rate": 0.01, "loss": 2.0442, "step": 19035 }, { "epoch": 1.9538177339901477, "grad_norm": 0.05031814053654671, "learning_rate": 0.01, "loss": 2.0491, "step": 19038 }, { "epoch": 1.9541256157635467, "grad_norm": 0.08885148912668228, "learning_rate": 0.01, "loss": 2.043, "step": 19041 }, { "epoch": 1.9544334975369457, "grad_norm": 0.05610232427716255, "learning_rate": 0.01, "loss": 2.0515, "step": 19044 }, { "epoch": 1.9547413793103448, "grad_norm": 0.07169227302074432, "learning_rate": 0.01, "loss": 2.0248, "step": 19047 }, { "epoch": 1.9550492610837438, "grad_norm": 0.07136573642492294, "learning_rate": 0.01, "loss": 2.0344, "step": 19050 }, { "epoch": 1.9553571428571428, "grad_norm": 0.07761941850185394, "learning_rate": 0.01, "loss": 2.0358, "step": 19053 }, { "epoch": 1.9556650246305418, "grad_norm": 0.062269363552331924, "learning_rate": 0.01, "loss": 2.0169, "step": 19056 }, { "epoch": 1.9559729064039408, "grad_norm": 0.08797021210193634, "learning_rate": 0.01, "loss": 1.9928, "step": 19059 }, { "epoch": 1.9562807881773399, "grad_norm": 0.04401189833879471, "learning_rate": 0.01, "loss": 2.0615, "step": 19062 }, { "epoch": 1.9565886699507389, "grad_norm": 0.08460985869169235, "learning_rate": 0.01, "loss": 2.0126, "step": 19065 }, { "epoch": 1.956896551724138, "grad_norm": 0.07027498632669449, "learning_rate": 0.01, "loss": 2.0038, "step": 19068 }, { "epoch": 1.957204433497537, "grad_norm": 0.06747457385063171, "learning_rate": 0.01, "loss": 2.0089, "step": 19071 }, { "epoch": 1.957512315270936, "grad_norm": 0.10890615731477737, "learning_rate": 0.01, "loss": 2.0418, "step": 19074 }, { "epoch": 1.957820197044335, "grad_norm": 0.1049666553735733, "learning_rate": 0.01, "loss": 2.0683, "step": 19077 }, { "epoch": 1.958128078817734, "grad_norm": 0.04320647194981575, "learning_rate": 0.01, "loss": 2.0371, "step": 19080 }, { "epoch": 1.958435960591133, "grad_norm": 0.06038364768028259, "learning_rate": 0.01, "loss": 2.0417, "step": 19083 }, { "epoch": 1.958743842364532, "grad_norm": 0.04486788064241409, "learning_rate": 0.01, "loss": 2.0153, "step": 19086 }, { "epoch": 1.959051724137931, "grad_norm": 0.045702993869781494, "learning_rate": 0.01, "loss": 2.0447, "step": 19089 }, { "epoch": 1.95935960591133, "grad_norm": 0.10784240067005157, "learning_rate": 0.01, "loss": 2.0231, "step": 19092 }, { "epoch": 1.959667487684729, "grad_norm": 0.07740773260593414, "learning_rate": 0.01, "loss": 2.079, "step": 19095 }, { "epoch": 1.9599753694581281, "grad_norm": 0.0517832413315773, "learning_rate": 0.01, "loss": 2.061, "step": 19098 }, { "epoch": 1.9602832512315271, "grad_norm": 0.04660172387957573, "learning_rate": 0.01, "loss": 2.026, "step": 19101 }, { "epoch": 1.9605911330049262, "grad_norm": 0.084842748939991, "learning_rate": 0.01, "loss": 2.0418, "step": 19104 }, { "epoch": 1.9608990147783252, "grad_norm": 0.10866342484951019, "learning_rate": 0.01, "loss": 2.0361, "step": 19107 }, { "epoch": 1.9612068965517242, "grad_norm": 0.060094647109508514, "learning_rate": 0.01, "loss": 2.0207, "step": 19110 }, { "epoch": 1.9615147783251232, "grad_norm": 0.04082890599966049, "learning_rate": 0.01, "loss": 1.9837, "step": 19113 }, { "epoch": 1.9618226600985222, "grad_norm": 0.09193670004606247, "learning_rate": 0.01, "loss": 2.045, "step": 19116 }, { "epoch": 1.9621305418719213, "grad_norm": 0.049036990851163864, "learning_rate": 0.01, "loss": 2.0014, "step": 19119 }, { "epoch": 1.9624384236453203, "grad_norm": 0.06658133864402771, "learning_rate": 0.01, "loss": 2.0256, "step": 19122 }, { "epoch": 1.9627463054187193, "grad_norm": 0.07290081679821014, "learning_rate": 0.01, "loss": 2.022, "step": 19125 }, { "epoch": 1.9630541871921183, "grad_norm": 0.05635548382997513, "learning_rate": 0.01, "loss": 2.0439, "step": 19128 }, { "epoch": 1.9633620689655173, "grad_norm": 0.07143761217594147, "learning_rate": 0.01, "loss": 2.009, "step": 19131 }, { "epoch": 1.9636699507389164, "grad_norm": 0.15296097099781036, "learning_rate": 0.01, "loss": 2.0539, "step": 19134 }, { "epoch": 1.9639778325123154, "grad_norm": 0.1795274019241333, "learning_rate": 0.01, "loss": 2.0418, "step": 19137 }, { "epoch": 1.9642857142857144, "grad_norm": 0.04691818729043007, "learning_rate": 0.01, "loss": 2.0114, "step": 19140 }, { "epoch": 1.9645935960591134, "grad_norm": 0.05018999055027962, "learning_rate": 0.01, "loss": 2.0558, "step": 19143 }, { "epoch": 1.9649014778325125, "grad_norm": 0.0349762961268425, "learning_rate": 0.01, "loss": 2.0409, "step": 19146 }, { "epoch": 1.9652093596059115, "grad_norm": 0.04055612534284592, "learning_rate": 0.01, "loss": 2.033, "step": 19149 }, { "epoch": 1.9655172413793105, "grad_norm": 0.04818587005138397, "learning_rate": 0.01, "loss": 2.0522, "step": 19152 }, { "epoch": 1.9658251231527095, "grad_norm": 0.03579457104206085, "learning_rate": 0.01, "loss": 2.0295, "step": 19155 }, { "epoch": 1.9661330049261085, "grad_norm": 0.04382238909602165, "learning_rate": 0.01, "loss": 2.0011, "step": 19158 }, { "epoch": 1.9664408866995073, "grad_norm": 0.03784547746181488, "learning_rate": 0.01, "loss": 2.0332, "step": 19161 }, { "epoch": 1.9667487684729064, "grad_norm": 0.049413155764341354, "learning_rate": 0.01, "loss": 2.0276, "step": 19164 }, { "epoch": 1.9670566502463054, "grad_norm": 0.10560319572687149, "learning_rate": 0.01, "loss": 2.012, "step": 19167 }, { "epoch": 1.9673645320197044, "grad_norm": 0.07912679761648178, "learning_rate": 0.01, "loss": 2.0233, "step": 19170 }, { "epoch": 1.9676724137931034, "grad_norm": 0.051868222653865814, "learning_rate": 0.01, "loss": 2.0399, "step": 19173 }, { "epoch": 1.9679802955665024, "grad_norm": 0.09925144910812378, "learning_rate": 0.01, "loss": 2.0382, "step": 19176 }, { "epoch": 1.9682881773399015, "grad_norm": 0.09824500232934952, "learning_rate": 0.01, "loss": 2.022, "step": 19179 }, { "epoch": 1.9685960591133005, "grad_norm": 0.04710378497838974, "learning_rate": 0.01, "loss": 2.018, "step": 19182 }, { "epoch": 1.9689039408866995, "grad_norm": 0.09339728951454163, "learning_rate": 0.01, "loss": 2.0623, "step": 19185 }, { "epoch": 1.9692118226600985, "grad_norm": 0.04485667496919632, "learning_rate": 0.01, "loss": 2.0361, "step": 19188 }, { "epoch": 1.9695197044334976, "grad_norm": 0.06367155909538269, "learning_rate": 0.01, "loss": 2.0269, "step": 19191 }, { "epoch": 1.9698275862068966, "grad_norm": 0.06692302227020264, "learning_rate": 0.01, "loss": 2.0475, "step": 19194 }, { "epoch": 1.9701354679802956, "grad_norm": 0.06107610464096069, "learning_rate": 0.01, "loss": 2.046, "step": 19197 }, { "epoch": 1.9704433497536946, "grad_norm": 0.06362861394882202, "learning_rate": 0.01, "loss": 2.0515, "step": 19200 }, { "epoch": 1.9707512315270936, "grad_norm": 0.07524324208498001, "learning_rate": 0.01, "loss": 2.044, "step": 19203 }, { "epoch": 1.9710591133004927, "grad_norm": 0.09118182212114334, "learning_rate": 0.01, "loss": 2.0501, "step": 19206 }, { "epoch": 1.9713669950738915, "grad_norm": 0.0823112204670906, "learning_rate": 0.01, "loss": 2.0305, "step": 19209 }, { "epoch": 1.9716748768472905, "grad_norm": 0.061318982392549515, "learning_rate": 0.01, "loss": 2.0499, "step": 19212 }, { "epoch": 1.9719827586206895, "grad_norm": 0.09838750958442688, "learning_rate": 0.01, "loss": 2.0211, "step": 19215 }, { "epoch": 1.9722906403940885, "grad_norm": 0.061727046966552734, "learning_rate": 0.01, "loss": 2.0671, "step": 19218 }, { "epoch": 1.9725985221674875, "grad_norm": 0.044177260249853134, "learning_rate": 0.01, "loss": 2.0429, "step": 19221 }, { "epoch": 1.9729064039408866, "grad_norm": 0.031012659892439842, "learning_rate": 0.01, "loss": 2.0204, "step": 19224 }, { "epoch": 1.9732142857142856, "grad_norm": 0.0593150295317173, "learning_rate": 0.01, "loss": 2.0418, "step": 19227 }, { "epoch": 1.9735221674876846, "grad_norm": 0.09283222258090973, "learning_rate": 0.01, "loss": 2.0363, "step": 19230 }, { "epoch": 1.9738300492610836, "grad_norm": 0.07416541129350662, "learning_rate": 0.01, "loss": 2.0101, "step": 19233 }, { "epoch": 1.9741379310344827, "grad_norm": 0.08513590693473816, "learning_rate": 0.01, "loss": 2.0284, "step": 19236 }, { "epoch": 1.9744458128078817, "grad_norm": 0.08401728421449661, "learning_rate": 0.01, "loss": 2.0356, "step": 19239 }, { "epoch": 1.9747536945812807, "grad_norm": 0.08488047868013382, "learning_rate": 0.01, "loss": 2.0408, "step": 19242 }, { "epoch": 1.9750615763546797, "grad_norm": 0.11438726633787155, "learning_rate": 0.01, "loss": 2.0439, "step": 19245 }, { "epoch": 1.9753694581280787, "grad_norm": 0.0416182205080986, "learning_rate": 0.01, "loss": 2.0034, "step": 19248 }, { "epoch": 1.9756773399014778, "grad_norm": 0.046806883066892624, "learning_rate": 0.01, "loss": 2.0307, "step": 19251 }, { "epoch": 1.9759852216748768, "grad_norm": 0.04319307208061218, "learning_rate": 0.01, "loss": 2.0404, "step": 19254 }, { "epoch": 1.9762931034482758, "grad_norm": 0.11832991987466812, "learning_rate": 0.01, "loss": 2.0338, "step": 19257 }, { "epoch": 1.9766009852216748, "grad_norm": 0.04716213047504425, "learning_rate": 0.01, "loss": 2.005, "step": 19260 }, { "epoch": 1.9769088669950738, "grad_norm": 0.08626002073287964, "learning_rate": 0.01, "loss": 2.0178, "step": 19263 }, { "epoch": 1.9772167487684729, "grad_norm": 0.0981634259223938, "learning_rate": 0.01, "loss": 2.0502, "step": 19266 }, { "epoch": 1.9775246305418719, "grad_norm": 0.0657229796051979, "learning_rate": 0.01, "loss": 2.065, "step": 19269 }, { "epoch": 1.977832512315271, "grad_norm": 0.0652332603931427, "learning_rate": 0.01, "loss": 2.0395, "step": 19272 }, { "epoch": 1.97814039408867, "grad_norm": 0.06810397654771805, "learning_rate": 0.01, "loss": 2.0418, "step": 19275 }, { "epoch": 1.978448275862069, "grad_norm": 0.04740637540817261, "learning_rate": 0.01, "loss": 2.0456, "step": 19278 }, { "epoch": 1.978756157635468, "grad_norm": 0.039233241230249405, "learning_rate": 0.01, "loss": 2.0348, "step": 19281 }, { "epoch": 1.979064039408867, "grad_norm": 0.07533819228410721, "learning_rate": 0.01, "loss": 2.0411, "step": 19284 }, { "epoch": 1.979371921182266, "grad_norm": 0.0820235162973404, "learning_rate": 0.01, "loss": 2.0299, "step": 19287 }, { "epoch": 1.979679802955665, "grad_norm": 0.057419124990701675, "learning_rate": 0.01, "loss": 2.0692, "step": 19290 }, { "epoch": 1.979987684729064, "grad_norm": 0.10119790583848953, "learning_rate": 0.01, "loss": 2.0752, "step": 19293 }, { "epoch": 1.980295566502463, "grad_norm": 0.116152822971344, "learning_rate": 0.01, "loss": 2.0377, "step": 19296 }, { "epoch": 1.980603448275862, "grad_norm": 0.05364501103758812, "learning_rate": 0.01, "loss": 2.0118, "step": 19299 }, { "epoch": 1.9809113300492611, "grad_norm": 0.09089913219213486, "learning_rate": 0.01, "loss": 2.0445, "step": 19302 }, { "epoch": 1.9812192118226601, "grad_norm": 0.06570890545845032, "learning_rate": 0.01, "loss": 2.048, "step": 19305 }, { "epoch": 1.9815270935960592, "grad_norm": 0.10739763081073761, "learning_rate": 0.01, "loss": 2.0527, "step": 19308 }, { "epoch": 1.9818349753694582, "grad_norm": 0.0396854430437088, "learning_rate": 0.01, "loss": 2.0414, "step": 19311 }, { "epoch": 1.9821428571428572, "grad_norm": 0.11273244023323059, "learning_rate": 0.01, "loss": 2.0447, "step": 19314 }, { "epoch": 1.9824507389162562, "grad_norm": 0.10009465366601944, "learning_rate": 0.01, "loss": 2.0512, "step": 19317 }, { "epoch": 1.9827586206896552, "grad_norm": 0.053756825625896454, "learning_rate": 0.01, "loss": 2.0731, "step": 19320 }, { "epoch": 1.9830665024630543, "grad_norm": 0.06603456288576126, "learning_rate": 0.01, "loss": 2.0399, "step": 19323 }, { "epoch": 1.9833743842364533, "grad_norm": 0.038810715079307556, "learning_rate": 0.01, "loss": 2.0272, "step": 19326 }, { "epoch": 1.9836822660098523, "grad_norm": 0.04284658655524254, "learning_rate": 0.01, "loss": 2.035, "step": 19329 }, { "epoch": 1.9839901477832513, "grad_norm": 0.04441271349787712, "learning_rate": 0.01, "loss": 2.0448, "step": 19332 }, { "epoch": 1.9842980295566504, "grad_norm": 0.04501213878393173, "learning_rate": 0.01, "loss": 2.0517, "step": 19335 }, { "epoch": 1.9846059113300494, "grad_norm": 0.05109642818570137, "learning_rate": 0.01, "loss": 2.0168, "step": 19338 }, { "epoch": 1.9849137931034484, "grad_norm": 0.03543083369731903, "learning_rate": 0.01, "loss": 2.0396, "step": 19341 }, { "epoch": 1.9852216748768474, "grad_norm": 0.04665149003267288, "learning_rate": 0.01, "loss": 2.0285, "step": 19344 }, { "epoch": 1.9855295566502464, "grad_norm": 0.035318441689014435, "learning_rate": 0.01, "loss": 2.0321, "step": 19347 }, { "epoch": 1.9858374384236455, "grad_norm": 0.035862043499946594, "learning_rate": 0.01, "loss": 2.0461, "step": 19350 }, { "epoch": 1.9861453201970445, "grad_norm": 0.128739133477211, "learning_rate": 0.01, "loss": 2.0561, "step": 19353 }, { "epoch": 1.9864532019704435, "grad_norm": 0.08115250617265701, "learning_rate": 0.01, "loss": 2.0364, "step": 19356 }, { "epoch": 1.9867610837438425, "grad_norm": 0.04203096404671669, "learning_rate": 0.01, "loss": 2.0298, "step": 19359 }, { "epoch": 1.9870689655172413, "grad_norm": 0.03801970183849335, "learning_rate": 0.01, "loss": 2.034, "step": 19362 }, { "epoch": 1.9873768472906403, "grad_norm": 0.05322232097387314, "learning_rate": 0.01, "loss": 2.0519, "step": 19365 }, { "epoch": 1.9876847290640394, "grad_norm": 0.037100568413734436, "learning_rate": 0.01, "loss": 2.0087, "step": 19368 }, { "epoch": 1.9879926108374384, "grad_norm": 0.03714398667216301, "learning_rate": 0.01, "loss": 2.0187, "step": 19371 }, { "epoch": 1.9883004926108374, "grad_norm": 0.050371263176202774, "learning_rate": 0.01, "loss": 2.0318, "step": 19374 }, { "epoch": 1.9886083743842364, "grad_norm": 0.03875119984149933, "learning_rate": 0.01, "loss": 2.0224, "step": 19377 }, { "epoch": 1.9889162561576355, "grad_norm": 0.06838756054639816, "learning_rate": 0.01, "loss": 2.0458, "step": 19380 }, { "epoch": 1.9892241379310345, "grad_norm": 0.04749476909637451, "learning_rate": 0.01, "loss": 2.022, "step": 19383 }, { "epoch": 1.9895320197044335, "grad_norm": 0.041247084736824036, "learning_rate": 0.01, "loss": 2.0088, "step": 19386 }, { "epoch": 1.9898399014778325, "grad_norm": 0.08582460135221481, "learning_rate": 0.01, "loss": 2.0061, "step": 19389 }, { "epoch": 1.9901477832512315, "grad_norm": 0.042033273726701736, "learning_rate": 0.01, "loss": 2.0516, "step": 19392 }, { "epoch": 1.9904556650246306, "grad_norm": 0.08395756036043167, "learning_rate": 0.01, "loss": 2.0338, "step": 19395 }, { "epoch": 1.9907635467980296, "grad_norm": 0.07154903560876846, "learning_rate": 0.01, "loss": 2.0168, "step": 19398 }, { "epoch": 1.9910714285714286, "grad_norm": 0.06137581169605255, "learning_rate": 0.01, "loss": 2.0046, "step": 19401 }, { "epoch": 1.9913793103448276, "grad_norm": 0.1226835623383522, "learning_rate": 0.01, "loss": 2.0314, "step": 19404 }, { "epoch": 1.9916871921182266, "grad_norm": 0.06524399667978287, "learning_rate": 0.01, "loss": 2.0581, "step": 19407 }, { "epoch": 1.9919950738916257, "grad_norm": 0.060310300439596176, "learning_rate": 0.01, "loss": 2.0205, "step": 19410 }, { "epoch": 1.9923029556650245, "grad_norm": 0.10605314373970032, "learning_rate": 0.01, "loss": 2.0461, "step": 19413 }, { "epoch": 1.9926108374384235, "grad_norm": 0.07056690007448196, "learning_rate": 0.01, "loss": 2.0603, "step": 19416 }, { "epoch": 1.9929187192118225, "grad_norm": 0.04367789626121521, "learning_rate": 0.01, "loss": 2.0358, "step": 19419 }, { "epoch": 1.9932266009852215, "grad_norm": 0.07856806367635727, "learning_rate": 0.01, "loss": 2.061, "step": 19422 }, { "epoch": 1.9935344827586206, "grad_norm": 0.07237541675567627, "learning_rate": 0.01, "loss": 2.0218, "step": 19425 }, { "epoch": 1.9938423645320196, "grad_norm": 0.04983443021774292, "learning_rate": 0.01, "loss": 2.0204, "step": 19428 }, { "epoch": 1.9941502463054186, "grad_norm": 0.049045633524656296, "learning_rate": 0.01, "loss": 2.0089, "step": 19431 }, { "epoch": 1.9944581280788176, "grad_norm": 0.07521536946296692, "learning_rate": 0.01, "loss": 2.021, "step": 19434 }, { "epoch": 1.9947660098522166, "grad_norm": 0.03521602228283882, "learning_rate": 0.01, "loss": 2.0386, "step": 19437 }, { "epoch": 1.9950738916256157, "grad_norm": 0.06075441092252731, "learning_rate": 0.01, "loss": 2.045, "step": 19440 }, { "epoch": 1.9953817733990147, "grad_norm": 0.08800282329320908, "learning_rate": 0.01, "loss": 2.0511, "step": 19443 }, { "epoch": 1.9956896551724137, "grad_norm": 0.04632639139890671, "learning_rate": 0.01, "loss": 2.0434, "step": 19446 }, { "epoch": 1.9959975369458127, "grad_norm": 0.05275778844952583, "learning_rate": 0.01, "loss": 2.0036, "step": 19449 }, { "epoch": 1.9963054187192117, "grad_norm": 0.04615132138133049, "learning_rate": 0.01, "loss": 1.9958, "step": 19452 }, { "epoch": 1.9966133004926108, "grad_norm": 0.12586715817451477, "learning_rate": 0.01, "loss": 2.0311, "step": 19455 }, { "epoch": 1.9969211822660098, "grad_norm": 0.09406362473964691, "learning_rate": 0.01, "loss": 2.028, "step": 19458 }, { "epoch": 1.9972290640394088, "grad_norm": 0.032408129423856735, "learning_rate": 0.01, "loss": 2.0311, "step": 19461 }, { "epoch": 1.9975369458128078, "grad_norm": 0.08810164034366608, "learning_rate": 0.01, "loss": 2.0364, "step": 19464 }, { "epoch": 1.9978448275862069, "grad_norm": 0.11263968795537949, "learning_rate": 0.01, "loss": 2.0263, "step": 19467 }, { "epoch": 1.9981527093596059, "grad_norm": 0.06618282198905945, "learning_rate": 0.01, "loss": 2.0435, "step": 19470 }, { "epoch": 1.998460591133005, "grad_norm": 0.03649067133665085, "learning_rate": 0.01, "loss": 2.0131, "step": 19473 }, { "epoch": 1.998768472906404, "grad_norm": 0.03718538209795952, "learning_rate": 0.01, "loss": 2.0517, "step": 19476 }, { "epoch": 1.999076354679803, "grad_norm": 0.047908563166856766, "learning_rate": 0.01, "loss": 2.0162, "step": 19479 }, { "epoch": 1.999384236453202, "grad_norm": 0.04926212877035141, "learning_rate": 0.01, "loss": 2.0401, "step": 19482 }, { "epoch": 1.999692118226601, "grad_norm": 0.08558470755815506, "learning_rate": 0.01, "loss": 2.0611, "step": 19485 }, { "epoch": 2.0, "grad_norm": 0.07099032402038574, "learning_rate": 0.01, "loss": 2.0408, "step": 19488 }, { "epoch": 2.003391921060746, "grad_norm": 0.08354249596595764, "learning_rate": 0.01, "loss": 2.065, "step": 19491 }, { "epoch": 2.003700277520814, "grad_norm": 0.09245558828115463, "learning_rate": 0.01, "loss": 2.0571, "step": 19494 }, { "epoch": 2.004008633980882, "grad_norm": 0.09228463470935822, "learning_rate": 0.01, "loss": 2.0749, "step": 19497 }, { "epoch": 2.00431699044095, "grad_norm": 0.05558445304632187, "learning_rate": 0.01, "loss": 2.0754, "step": 19500 }, { "epoch": 2.0046253469010176, "grad_norm": 0.04827789589762688, "learning_rate": 0.01, "loss": 2.0431, "step": 19503 }, { "epoch": 2.0049337033610852, "grad_norm": 0.063465915620327, "learning_rate": 0.01, "loss": 2.067, "step": 19506 }, { "epoch": 2.0052420598211533, "grad_norm": 0.047648850828409195, "learning_rate": 0.01, "loss": 2.0614, "step": 19509 }, { "epoch": 2.005550416281221, "grad_norm": 0.038050852715969086, "learning_rate": 0.01, "loss": 2.0699, "step": 19512 }, { "epoch": 2.005858772741289, "grad_norm": 0.036580201238393784, "learning_rate": 0.01, "loss": 2.0832, "step": 19515 }, { "epoch": 2.0061671292013568, "grad_norm": 0.033919982612133026, "learning_rate": 0.01, "loss": 2.0492, "step": 19518 }, { "epoch": 2.0064754856614244, "grad_norm": 0.05007147789001465, "learning_rate": 0.01, "loss": 2.0662, "step": 19521 }, { "epoch": 2.0067838421214925, "grad_norm": 0.042562540620565414, "learning_rate": 0.01, "loss": 2.0508, "step": 19524 }, { "epoch": 2.00709219858156, "grad_norm": 0.0872044712305069, "learning_rate": 0.01, "loss": 2.0628, "step": 19527 }, { "epoch": 2.0074005550416283, "grad_norm": 0.06331472843885422, "learning_rate": 0.01, "loss": 2.0479, "step": 19530 }, { "epoch": 2.007708911501696, "grad_norm": 0.08928905427455902, "learning_rate": 0.01, "loss": 2.051, "step": 19533 }, { "epoch": 2.0080172679617636, "grad_norm": 0.0869508758187294, "learning_rate": 0.01, "loss": 2.0497, "step": 19536 }, { "epoch": 2.0083256244218317, "grad_norm": 0.04267793521285057, "learning_rate": 0.01, "loss": 2.0457, "step": 19539 }, { "epoch": 2.0086339808818994, "grad_norm": 0.046922095119953156, "learning_rate": 0.01, "loss": 2.0488, "step": 19542 }, { "epoch": 2.0089423373419675, "grad_norm": 0.07374055683612823, "learning_rate": 0.01, "loss": 2.0571, "step": 19545 }, { "epoch": 2.009250693802035, "grad_norm": 0.042078323662281036, "learning_rate": 0.01, "loss": 2.0501, "step": 19548 }, { "epoch": 2.009559050262103, "grad_norm": 0.052491504698991776, "learning_rate": 0.01, "loss": 2.0996, "step": 19551 }, { "epoch": 2.009867406722171, "grad_norm": 0.04900294169783592, "learning_rate": 0.01, "loss": 2.0476, "step": 19554 }, { "epoch": 2.0101757631822386, "grad_norm": 0.13067513704299927, "learning_rate": 0.01, "loss": 2.066, "step": 19557 }, { "epoch": 2.0104841196423067, "grad_norm": 0.09229371696710587, "learning_rate": 0.01, "loss": 2.0708, "step": 19560 }, { "epoch": 2.0107924761023743, "grad_norm": 0.05014317110180855, "learning_rate": 0.01, "loss": 2.0474, "step": 19563 }, { "epoch": 2.011100832562442, "grad_norm": 0.06385400146245956, "learning_rate": 0.01, "loss": 2.0427, "step": 19566 }, { "epoch": 2.01140918902251, "grad_norm": 0.04037034139037132, "learning_rate": 0.01, "loss": 2.0508, "step": 19569 }, { "epoch": 2.0117175454825778, "grad_norm": 0.02967817150056362, "learning_rate": 0.01, "loss": 2.0548, "step": 19572 }, { "epoch": 2.012025901942646, "grad_norm": 0.04519663751125336, "learning_rate": 0.01, "loss": 2.0497, "step": 19575 }, { "epoch": 2.0123342584027135, "grad_norm": 0.07825223356485367, "learning_rate": 0.01, "loss": 2.06, "step": 19578 }, { "epoch": 2.012642614862781, "grad_norm": 0.13088425993919373, "learning_rate": 0.01, "loss": 2.0546, "step": 19581 }, { "epoch": 2.0129509713228493, "grad_norm": 0.10113450884819031, "learning_rate": 0.01, "loss": 2.0628, "step": 19584 }, { "epoch": 2.013259327782917, "grad_norm": 0.06662772595882416, "learning_rate": 0.01, "loss": 2.0655, "step": 19587 }, { "epoch": 2.013567684242985, "grad_norm": 0.04824177175760269, "learning_rate": 0.01, "loss": 2.0853, "step": 19590 }, { "epoch": 2.0138760407030527, "grad_norm": 0.07255363464355469, "learning_rate": 0.01, "loss": 2.0614, "step": 19593 }, { "epoch": 2.0141843971631204, "grad_norm": 0.061763517558574677, "learning_rate": 0.01, "loss": 2.0761, "step": 19596 }, { "epoch": 2.0144927536231885, "grad_norm": 0.058266837149858475, "learning_rate": 0.01, "loss": 2.0495, "step": 19599 }, { "epoch": 2.014801110083256, "grad_norm": 0.05644237995147705, "learning_rate": 0.01, "loss": 2.0416, "step": 19602 }, { "epoch": 2.0151094665433242, "grad_norm": 0.102548748254776, "learning_rate": 0.01, "loss": 2.0581, "step": 19605 }, { "epoch": 2.015417823003392, "grad_norm": 0.06959159672260284, "learning_rate": 0.01, "loss": 2.0288, "step": 19608 }, { "epoch": 2.0157261794634596, "grad_norm": 0.09066049009561539, "learning_rate": 0.01, "loss": 2.045, "step": 19611 }, { "epoch": 2.0160345359235277, "grad_norm": 0.061073388904333115, "learning_rate": 0.01, "loss": 2.0523, "step": 19614 }, { "epoch": 2.0163428923835953, "grad_norm": 0.09000861644744873, "learning_rate": 0.01, "loss": 2.0501, "step": 19617 }, { "epoch": 2.0166512488436634, "grad_norm": 0.040078576654195786, "learning_rate": 0.01, "loss": 2.0409, "step": 19620 }, { "epoch": 2.016959605303731, "grad_norm": 0.045984748750925064, "learning_rate": 0.01, "loss": 2.0456, "step": 19623 }, { "epoch": 2.0172679617637987, "grad_norm": 0.045942965894937515, "learning_rate": 0.01, "loss": 2.0319, "step": 19626 }, { "epoch": 2.017576318223867, "grad_norm": 0.059133514761924744, "learning_rate": 0.01, "loss": 2.0504, "step": 19629 }, { "epoch": 2.0178846746839345, "grad_norm": 0.07386631518602371, "learning_rate": 0.01, "loss": 2.0367, "step": 19632 }, { "epoch": 2.0181930311440026, "grad_norm": 0.06906817853450775, "learning_rate": 0.01, "loss": 2.0491, "step": 19635 }, { "epoch": 2.0185013876040703, "grad_norm": 0.06259379535913467, "learning_rate": 0.01, "loss": 2.0812, "step": 19638 }, { "epoch": 2.018809744064138, "grad_norm": 0.07011371850967407, "learning_rate": 0.01, "loss": 2.0716, "step": 19641 }, { "epoch": 2.019118100524206, "grad_norm": 0.05763932690024376, "learning_rate": 0.01, "loss": 2.0618, "step": 19644 }, { "epoch": 2.0194264569842737, "grad_norm": 0.09810350090265274, "learning_rate": 0.01, "loss": 2.046, "step": 19647 }, { "epoch": 2.019734813444342, "grad_norm": 0.11434987187385559, "learning_rate": 0.01, "loss": 2.057, "step": 19650 }, { "epoch": 2.0200431699044095, "grad_norm": 0.10692505538463593, "learning_rate": 0.01, "loss": 2.0669, "step": 19653 }, { "epoch": 2.020351526364477, "grad_norm": 0.06918302178382874, "learning_rate": 0.01, "loss": 2.0636, "step": 19656 }, { "epoch": 2.0206598828245452, "grad_norm": 0.06661045551300049, "learning_rate": 0.01, "loss": 2.0366, "step": 19659 }, { "epoch": 2.020968239284613, "grad_norm": 0.03996479883790016, "learning_rate": 0.01, "loss": 2.0552, "step": 19662 }, { "epoch": 2.021276595744681, "grad_norm": 0.041359271854162216, "learning_rate": 0.01, "loss": 2.0542, "step": 19665 }, { "epoch": 2.0215849522047487, "grad_norm": 0.046693217009305954, "learning_rate": 0.01, "loss": 2.0448, "step": 19668 }, { "epoch": 2.0218933086648163, "grad_norm": 0.031555816531181335, "learning_rate": 0.01, "loss": 2.0146, "step": 19671 }, { "epoch": 2.0222016651248844, "grad_norm": 0.036573588848114014, "learning_rate": 0.01, "loss": 2.038, "step": 19674 }, { "epoch": 2.022510021584952, "grad_norm": 0.09064050763845444, "learning_rate": 0.01, "loss": 2.0609, "step": 19677 }, { "epoch": 2.02281837804502, "grad_norm": 0.11865704506635666, "learning_rate": 0.01, "loss": 2.0412, "step": 19680 }, { "epoch": 2.023126734505088, "grad_norm": 0.08720502257347107, "learning_rate": 0.01, "loss": 2.0403, "step": 19683 }, { "epoch": 2.0234350909651555, "grad_norm": 0.06953457742929459, "learning_rate": 0.01, "loss": 2.0769, "step": 19686 }, { "epoch": 2.0237434474252236, "grad_norm": 0.04386308416724205, "learning_rate": 0.01, "loss": 2.0595, "step": 19689 }, { "epoch": 2.0240518038852913, "grad_norm": 0.047490183264017105, "learning_rate": 0.01, "loss": 2.0478, "step": 19692 }, { "epoch": 2.0243601603453594, "grad_norm": 0.061406608670949936, "learning_rate": 0.01, "loss": 2.0582, "step": 19695 }, { "epoch": 2.024668516805427, "grad_norm": 0.0626315325498581, "learning_rate": 0.01, "loss": 2.0908, "step": 19698 }, { "epoch": 2.0249768732654947, "grad_norm": 0.048075366765260696, "learning_rate": 0.01, "loss": 2.0703, "step": 19701 }, { "epoch": 2.025285229725563, "grad_norm": 0.06243044510483742, "learning_rate": 0.01, "loss": 2.0452, "step": 19704 }, { "epoch": 2.0255935861856305, "grad_norm": 0.06498084217309952, "learning_rate": 0.01, "loss": 2.0812, "step": 19707 }, { "epoch": 2.0259019426456986, "grad_norm": 0.05091014504432678, "learning_rate": 0.01, "loss": 2.0451, "step": 19710 }, { "epoch": 2.026210299105766, "grad_norm": 0.04733705893158913, "learning_rate": 0.01, "loss": 2.059, "step": 19713 }, { "epoch": 2.026518655565834, "grad_norm": 0.10866537690162659, "learning_rate": 0.01, "loss": 2.0505, "step": 19716 }, { "epoch": 2.026827012025902, "grad_norm": 0.07504774630069733, "learning_rate": 0.01, "loss": 2.0318, "step": 19719 }, { "epoch": 2.0271353684859696, "grad_norm": 0.07938455790281296, "learning_rate": 0.01, "loss": 2.0643, "step": 19722 }, { "epoch": 2.0274437249460378, "grad_norm": 0.10090157389640808, "learning_rate": 0.01, "loss": 2.0381, "step": 19725 }, { "epoch": 2.0277520814061054, "grad_norm": 0.04133126139640808, "learning_rate": 0.01, "loss": 2.0126, "step": 19728 }, { "epoch": 2.0280604378661735, "grad_norm": 0.12022694200277328, "learning_rate": 0.01, "loss": 2.0463, "step": 19731 }, { "epoch": 2.028368794326241, "grad_norm": 0.05904841795563698, "learning_rate": 0.01, "loss": 2.0547, "step": 19734 }, { "epoch": 2.028677150786309, "grad_norm": 0.08344896882772446, "learning_rate": 0.01, "loss": 2.0721, "step": 19737 }, { "epoch": 2.028985507246377, "grad_norm": 0.045264534652233124, "learning_rate": 0.01, "loss": 2.0597, "step": 19740 }, { "epoch": 2.0292938637064446, "grad_norm": 0.05907116085290909, "learning_rate": 0.01, "loss": 2.0561, "step": 19743 }, { "epoch": 2.0296022201665127, "grad_norm": 0.04975851625204086, "learning_rate": 0.01, "loss": 2.0642, "step": 19746 }, { "epoch": 2.0299105766265804, "grad_norm": 0.08190937340259552, "learning_rate": 0.01, "loss": 2.0429, "step": 19749 }, { "epoch": 2.030218933086648, "grad_norm": 0.14594541490077972, "learning_rate": 0.01, "loss": 2.0438, "step": 19752 }, { "epoch": 2.030527289546716, "grad_norm": 0.11920581012964249, "learning_rate": 0.01, "loss": 2.0549, "step": 19755 }, { "epoch": 2.030835646006784, "grad_norm": 0.04334663227200508, "learning_rate": 0.01, "loss": 2.0835, "step": 19758 }, { "epoch": 2.031144002466852, "grad_norm": 0.05323721095919609, "learning_rate": 0.01, "loss": 2.0546, "step": 19761 }, { "epoch": 2.0314523589269196, "grad_norm": 0.09565315395593643, "learning_rate": 0.01, "loss": 2.0622, "step": 19764 }, { "epoch": 2.031760715386987, "grad_norm": 0.14498768746852875, "learning_rate": 0.01, "loss": 2.0336, "step": 19767 }, { "epoch": 2.0320690718470553, "grad_norm": 0.16146855056285858, "learning_rate": 0.01, "loss": 2.0444, "step": 19770 }, { "epoch": 2.032377428307123, "grad_norm": 0.09023015946149826, "learning_rate": 0.01, "loss": 2.0341, "step": 19773 }, { "epoch": 2.032685784767191, "grad_norm": 0.05290327966213226, "learning_rate": 0.01, "loss": 2.0442, "step": 19776 }, { "epoch": 2.0329941412272587, "grad_norm": 0.06551158428192139, "learning_rate": 0.01, "loss": 2.062, "step": 19779 }, { "epoch": 2.0333024976873264, "grad_norm": 0.09268030524253845, "learning_rate": 0.01, "loss": 2.0668, "step": 19782 }, { "epoch": 2.0336108541473945, "grad_norm": 0.05402594432234764, "learning_rate": 0.01, "loss": 2.0739, "step": 19785 }, { "epoch": 2.033919210607462, "grad_norm": 0.052478570491075516, "learning_rate": 0.01, "loss": 2.0709, "step": 19788 }, { "epoch": 2.0342275670675303, "grad_norm": 0.03243448957800865, "learning_rate": 0.01, "loss": 2.049, "step": 19791 }, { "epoch": 2.034535923527598, "grad_norm": 0.08627558499574661, "learning_rate": 0.01, "loss": 2.058, "step": 19794 }, { "epoch": 2.0348442799876656, "grad_norm": 0.04757314547896385, "learning_rate": 0.01, "loss": 2.0637, "step": 19797 }, { "epoch": 2.0351526364477337, "grad_norm": 0.11217369884252548, "learning_rate": 0.01, "loss": 2.0581, "step": 19800 }, { "epoch": 2.0354609929078014, "grad_norm": 0.07525690644979477, "learning_rate": 0.01, "loss": 2.0782, "step": 19803 }, { "epoch": 2.0357693493678695, "grad_norm": 0.0945955365896225, "learning_rate": 0.01, "loss": 2.0594, "step": 19806 }, { "epoch": 2.036077705827937, "grad_norm": 0.07789472490549088, "learning_rate": 0.01, "loss": 2.0444, "step": 19809 }, { "epoch": 2.036386062288005, "grad_norm": 0.06672658026218414, "learning_rate": 0.01, "loss": 2.0392, "step": 19812 }, { "epoch": 2.036694418748073, "grad_norm": 0.06361529976129532, "learning_rate": 0.01, "loss": 2.0504, "step": 19815 }, { "epoch": 2.0370027752081405, "grad_norm": 0.03530391305685043, "learning_rate": 0.01, "loss": 2.0453, "step": 19818 }, { "epoch": 2.0373111316682087, "grad_norm": 0.08201812207698822, "learning_rate": 0.01, "loss": 2.023, "step": 19821 }, { "epoch": 2.0376194881282763, "grad_norm": 0.09198293834924698, "learning_rate": 0.01, "loss": 2.0422, "step": 19824 }, { "epoch": 2.037927844588344, "grad_norm": 0.058875374495983124, "learning_rate": 0.01, "loss": 2.0484, "step": 19827 }, { "epoch": 2.038236201048412, "grad_norm": 0.04453382268548012, "learning_rate": 0.01, "loss": 2.0322, "step": 19830 }, { "epoch": 2.0385445575084797, "grad_norm": 0.03713817149400711, "learning_rate": 0.01, "loss": 2.0151, "step": 19833 }, { "epoch": 2.038852913968548, "grad_norm": 0.056827936321496964, "learning_rate": 0.01, "loss": 2.0532, "step": 19836 }, { "epoch": 2.0391612704286155, "grad_norm": 0.08166830986738205, "learning_rate": 0.01, "loss": 2.0602, "step": 19839 }, { "epoch": 2.039469626888683, "grad_norm": 0.06837287545204163, "learning_rate": 0.01, "loss": 2.0567, "step": 19842 }, { "epoch": 2.0397779833487513, "grad_norm": 0.08867949992418289, "learning_rate": 0.01, "loss": 2.0559, "step": 19845 }, { "epoch": 2.040086339808819, "grad_norm": 0.07119370251893997, "learning_rate": 0.01, "loss": 2.0357, "step": 19848 }, { "epoch": 2.040394696268887, "grad_norm": 0.07701986283063889, "learning_rate": 0.01, "loss": 2.0695, "step": 19851 }, { "epoch": 2.0407030527289547, "grad_norm": 0.04700729623436928, "learning_rate": 0.01, "loss": 2.0266, "step": 19854 }, { "epoch": 2.0410114091890224, "grad_norm": 0.05898338556289673, "learning_rate": 0.01, "loss": 2.0519, "step": 19857 }, { "epoch": 2.0413197656490905, "grad_norm": 0.11953815072774887, "learning_rate": 0.01, "loss": 2.0487, "step": 19860 }, { "epoch": 2.041628122109158, "grad_norm": 0.09704854339361191, "learning_rate": 0.01, "loss": 2.0511, "step": 19863 }, { "epoch": 2.041936478569226, "grad_norm": 0.1362537145614624, "learning_rate": 0.01, "loss": 2.0354, "step": 19866 }, { "epoch": 2.042244835029294, "grad_norm": 0.09366025030612946, "learning_rate": 0.01, "loss": 2.0628, "step": 19869 }, { "epoch": 2.0425531914893615, "grad_norm": 0.05397522822022438, "learning_rate": 0.01, "loss": 2.0646, "step": 19872 }, { "epoch": 2.0428615479494296, "grad_norm": 0.07723390311002731, "learning_rate": 0.01, "loss": 2.0523, "step": 19875 }, { "epoch": 2.0431699044094973, "grad_norm": 0.08418615907430649, "learning_rate": 0.01, "loss": 2.0878, "step": 19878 }, { "epoch": 2.0434782608695654, "grad_norm": 0.06149798631668091, "learning_rate": 0.01, "loss": 2.0547, "step": 19881 }, { "epoch": 2.043786617329633, "grad_norm": 0.0474097803235054, "learning_rate": 0.01, "loss": 2.0539, "step": 19884 }, { "epoch": 2.0440949737897007, "grad_norm": 0.04854200407862663, "learning_rate": 0.01, "loss": 2.0637, "step": 19887 }, { "epoch": 2.044403330249769, "grad_norm": 0.04509511590003967, "learning_rate": 0.01, "loss": 2.0679, "step": 19890 }, { "epoch": 2.0447116867098365, "grad_norm": 0.05422825738787651, "learning_rate": 0.01, "loss": 2.0449, "step": 19893 }, { "epoch": 2.0450200431699046, "grad_norm": 0.06556607037782669, "learning_rate": 0.01, "loss": 2.0429, "step": 19896 }, { "epoch": 2.0453283996299723, "grad_norm": 0.03906751424074173, "learning_rate": 0.01, "loss": 2.0694, "step": 19899 }, { "epoch": 2.04563675609004, "grad_norm": 0.05207069590687752, "learning_rate": 0.01, "loss": 2.0363, "step": 19902 }, { "epoch": 2.045945112550108, "grad_norm": 0.04187217727303505, "learning_rate": 0.01, "loss": 2.0413, "step": 19905 }, { "epoch": 2.0462534690101757, "grad_norm": 0.04163263365626335, "learning_rate": 0.01, "loss": 2.0535, "step": 19908 }, { "epoch": 2.046561825470244, "grad_norm": 0.037544943392276764, "learning_rate": 0.01, "loss": 2.033, "step": 19911 }, { "epoch": 2.0468701819303114, "grad_norm": 0.03623516857624054, "learning_rate": 0.01, "loss": 2.0758, "step": 19914 }, { "epoch": 2.047178538390379, "grad_norm": 0.08026546239852905, "learning_rate": 0.01, "loss": 2.0654, "step": 19917 }, { "epoch": 2.047486894850447, "grad_norm": 0.05316372588276863, "learning_rate": 0.01, "loss": 2.0751, "step": 19920 }, { "epoch": 2.047795251310515, "grad_norm": 0.062127552926540375, "learning_rate": 0.01, "loss": 2.0458, "step": 19923 }, { "epoch": 2.048103607770583, "grad_norm": 0.049675267189741135, "learning_rate": 0.01, "loss": 2.0741, "step": 19926 }, { "epoch": 2.0484119642306506, "grad_norm": 0.0425347164273262, "learning_rate": 0.01, "loss": 2.0573, "step": 19929 }, { "epoch": 2.0487203206907183, "grad_norm": 0.03532329574227333, "learning_rate": 0.01, "loss": 2.0367, "step": 19932 }, { "epoch": 2.0490286771507864, "grad_norm": 0.05779660493135452, "learning_rate": 0.01, "loss": 2.0801, "step": 19935 }, { "epoch": 2.049337033610854, "grad_norm": 0.07841507345438004, "learning_rate": 0.01, "loss": 2.0603, "step": 19938 }, { "epoch": 2.049645390070922, "grad_norm": 0.0883709266781807, "learning_rate": 0.01, "loss": 2.0594, "step": 19941 }, { "epoch": 2.04995374653099, "grad_norm": 0.09949532151222229, "learning_rate": 0.01, "loss": 2.0422, "step": 19944 }, { "epoch": 2.0502621029910575, "grad_norm": 0.04350358247756958, "learning_rate": 0.01, "loss": 2.0439, "step": 19947 }, { "epoch": 2.0505704594511256, "grad_norm": 0.042655814439058304, "learning_rate": 0.01, "loss": 2.0821, "step": 19950 }, { "epoch": 2.0508788159111933, "grad_norm": 0.060070816427469254, "learning_rate": 0.01, "loss": 2.0495, "step": 19953 }, { "epoch": 2.0511871723712614, "grad_norm": 0.06479921191930771, "learning_rate": 0.01, "loss": 2.0783, "step": 19956 }, { "epoch": 2.051495528831329, "grad_norm": 0.0982329398393631, "learning_rate": 0.01, "loss": 2.0756, "step": 19959 }, { "epoch": 2.0518038852913967, "grad_norm": 0.10483184456825256, "learning_rate": 0.01, "loss": 2.035, "step": 19962 }, { "epoch": 2.052112241751465, "grad_norm": 0.06383049488067627, "learning_rate": 0.01, "loss": 2.0252, "step": 19965 }, { "epoch": 2.0524205982115324, "grad_norm": 0.13797828555107117, "learning_rate": 0.01, "loss": 2.0604, "step": 19968 }, { "epoch": 2.0527289546716005, "grad_norm": 0.037840090692043304, "learning_rate": 0.01, "loss": 2.0604, "step": 19971 }, { "epoch": 2.053037311131668, "grad_norm": 0.043872520327568054, "learning_rate": 0.01, "loss": 2.0609, "step": 19974 }, { "epoch": 2.053345667591736, "grad_norm": 0.03223152458667755, "learning_rate": 0.01, "loss": 2.0198, "step": 19977 }, { "epoch": 2.053654024051804, "grad_norm": 0.05935351178050041, "learning_rate": 0.01, "loss": 2.066, "step": 19980 }, { "epoch": 2.0539623805118716, "grad_norm": 0.054079607129096985, "learning_rate": 0.01, "loss": 2.0665, "step": 19983 }, { "epoch": 2.0542707369719397, "grad_norm": 0.04307890310883522, "learning_rate": 0.01, "loss": 2.0145, "step": 19986 }, { "epoch": 2.0545790934320074, "grad_norm": 0.06624720245599747, "learning_rate": 0.01, "loss": 2.052, "step": 19989 }, { "epoch": 2.054887449892075, "grad_norm": 0.08096028864383698, "learning_rate": 0.01, "loss": 2.0947, "step": 19992 }, { "epoch": 2.055195806352143, "grad_norm": 0.0872364342212677, "learning_rate": 0.01, "loss": 2.0675, "step": 19995 }, { "epoch": 2.055504162812211, "grad_norm": 0.04538879171013832, "learning_rate": 0.01, "loss": 2.031, "step": 19998 }, { "epoch": 2.055812519272279, "grad_norm": 0.11873256415128708, "learning_rate": 0.01, "loss": 2.0682, "step": 20001 }, { "epoch": 2.0561208757323466, "grad_norm": 0.05929452180862427, "learning_rate": 0.01, "loss": 2.0602, "step": 20004 }, { "epoch": 2.0564292321924142, "grad_norm": 0.05131294205784798, "learning_rate": 0.01, "loss": 2.0569, "step": 20007 }, { "epoch": 2.0567375886524824, "grad_norm": 0.05690256133675575, "learning_rate": 0.01, "loss": 2.0779, "step": 20010 }, { "epoch": 2.05704594511255, "grad_norm": 0.04414551705121994, "learning_rate": 0.01, "loss": 2.0168, "step": 20013 }, { "epoch": 2.057354301572618, "grad_norm": 0.04017036780714989, "learning_rate": 0.01, "loss": 2.0349, "step": 20016 }, { "epoch": 2.0576626580326858, "grad_norm": 0.06785457581281662, "learning_rate": 0.01, "loss": 2.058, "step": 20019 }, { "epoch": 2.0579710144927534, "grad_norm": 0.06258828938007355, "learning_rate": 0.01, "loss": 2.0484, "step": 20022 }, { "epoch": 2.0582793709528215, "grad_norm": 0.11196446418762207, "learning_rate": 0.01, "loss": 2.0624, "step": 20025 }, { "epoch": 2.058587727412889, "grad_norm": 0.08678428828716278, "learning_rate": 0.01, "loss": 2.0667, "step": 20028 }, { "epoch": 2.0588960838729573, "grad_norm": 0.13598018884658813, "learning_rate": 0.01, "loss": 2.0239, "step": 20031 }, { "epoch": 2.059204440333025, "grad_norm": 0.06666143238544464, "learning_rate": 0.01, "loss": 2.037, "step": 20034 }, { "epoch": 2.0595127967930926, "grad_norm": 0.05994727462530136, "learning_rate": 0.01, "loss": 2.0674, "step": 20037 }, { "epoch": 2.0598211532531607, "grad_norm": 0.03867008537054062, "learning_rate": 0.01, "loss": 2.056, "step": 20040 }, { "epoch": 2.0601295097132284, "grad_norm": 0.13077500462532043, "learning_rate": 0.01, "loss": 2.0345, "step": 20043 }, { "epoch": 2.0604378661732965, "grad_norm": 0.057164691388607025, "learning_rate": 0.01, "loss": 2.0436, "step": 20046 }, { "epoch": 2.060746222633364, "grad_norm": 0.07206998765468597, "learning_rate": 0.01, "loss": 2.0472, "step": 20049 }, { "epoch": 2.061054579093432, "grad_norm": 0.08844766765832901, "learning_rate": 0.01, "loss": 2.0535, "step": 20052 }, { "epoch": 2.0613629355535, "grad_norm": 0.07533573359251022, "learning_rate": 0.01, "loss": 2.0698, "step": 20055 }, { "epoch": 2.0616712920135676, "grad_norm": 0.03260966017842293, "learning_rate": 0.01, "loss": 2.0494, "step": 20058 }, { "epoch": 2.0619796484736357, "grad_norm": 0.03491971641778946, "learning_rate": 0.01, "loss": 2.0612, "step": 20061 }, { "epoch": 2.0622880049337033, "grad_norm": 0.04022398218512535, "learning_rate": 0.01, "loss": 2.0343, "step": 20064 }, { "epoch": 2.062596361393771, "grad_norm": 0.06325655430555344, "learning_rate": 0.01, "loss": 2.0409, "step": 20067 }, { "epoch": 2.062904717853839, "grad_norm": 0.06704667955636978, "learning_rate": 0.01, "loss": 2.0641, "step": 20070 }, { "epoch": 2.0632130743139068, "grad_norm": 0.06883389502763748, "learning_rate": 0.01, "loss": 2.0462, "step": 20073 }, { "epoch": 2.063521430773975, "grad_norm": 0.05242495611310005, "learning_rate": 0.01, "loss": 2.0701, "step": 20076 }, { "epoch": 2.0638297872340425, "grad_norm": 0.06587128341197968, "learning_rate": 0.01, "loss": 2.0443, "step": 20079 }, { "epoch": 2.06413814369411, "grad_norm": 0.03571178764104843, "learning_rate": 0.01, "loss": 2.0519, "step": 20082 }, { "epoch": 2.0644465001541783, "grad_norm": 0.061605412513017654, "learning_rate": 0.01, "loss": 2.0357, "step": 20085 }, { "epoch": 2.064754856614246, "grad_norm": 0.05552279204130173, "learning_rate": 0.01, "loss": 2.0258, "step": 20088 }, { "epoch": 2.065063213074314, "grad_norm": 0.047950152307748795, "learning_rate": 0.01, "loss": 2.0354, "step": 20091 }, { "epoch": 2.0653715695343817, "grad_norm": 0.09466604888439178, "learning_rate": 0.01, "loss": 2.0523, "step": 20094 }, { "epoch": 2.0656799259944494, "grad_norm": 0.04828859865665436, "learning_rate": 0.01, "loss": 2.0466, "step": 20097 }, { "epoch": 2.0659882824545175, "grad_norm": 0.03933820128440857, "learning_rate": 0.01, "loss": 2.0458, "step": 20100 }, { "epoch": 2.066296638914585, "grad_norm": 0.05044875666499138, "learning_rate": 0.01, "loss": 2.0396, "step": 20103 }, { "epoch": 2.0666049953746533, "grad_norm": 0.04152398556470871, "learning_rate": 0.01, "loss": 2.0269, "step": 20106 }, { "epoch": 2.066913351834721, "grad_norm": 0.10098916292190552, "learning_rate": 0.01, "loss": 2.0726, "step": 20109 }, { "epoch": 2.0672217082947886, "grad_norm": 0.06381060183048248, "learning_rate": 0.01, "loss": 2.0276, "step": 20112 }, { "epoch": 2.0675300647548567, "grad_norm": 0.13991308212280273, "learning_rate": 0.01, "loss": 2.0508, "step": 20115 }, { "epoch": 2.0678384212149243, "grad_norm": 0.061171598732471466, "learning_rate": 0.01, "loss": 2.0232, "step": 20118 }, { "epoch": 2.0681467776749924, "grad_norm": 0.04276692867279053, "learning_rate": 0.01, "loss": 2.0667, "step": 20121 }, { "epoch": 2.06845513413506, "grad_norm": 0.03582247719168663, "learning_rate": 0.01, "loss": 2.0511, "step": 20124 }, { "epoch": 2.0687634905951278, "grad_norm": 0.037077244371175766, "learning_rate": 0.01, "loss": 2.0275, "step": 20127 }, { "epoch": 2.069071847055196, "grad_norm": 0.11291185766458511, "learning_rate": 0.01, "loss": 2.0207, "step": 20130 }, { "epoch": 2.0693802035152635, "grad_norm": 0.06811921298503876, "learning_rate": 0.01, "loss": 2.0437, "step": 20133 }, { "epoch": 2.0696885599753316, "grad_norm": 0.049292147159576416, "learning_rate": 0.01, "loss": 2.0327, "step": 20136 }, { "epoch": 2.0699969164353993, "grad_norm": 0.08937390893697739, "learning_rate": 0.01, "loss": 2.0548, "step": 20139 }, { "epoch": 2.070305272895467, "grad_norm": 0.04353107511997223, "learning_rate": 0.01, "loss": 2.0522, "step": 20142 }, { "epoch": 2.070613629355535, "grad_norm": 0.03737090900540352, "learning_rate": 0.01, "loss": 2.0525, "step": 20145 }, { "epoch": 2.0709219858156027, "grad_norm": 0.038217127323150635, "learning_rate": 0.01, "loss": 2.0285, "step": 20148 }, { "epoch": 2.071230342275671, "grad_norm": 0.07162989675998688, "learning_rate": 0.01, "loss": 2.0556, "step": 20151 }, { "epoch": 2.0715386987357385, "grad_norm": 0.06507647782564163, "learning_rate": 0.01, "loss": 2.0348, "step": 20154 }, { "epoch": 2.071847055195806, "grad_norm": 0.07880635559558868, "learning_rate": 0.01, "loss": 2.0599, "step": 20157 }, { "epoch": 2.0721554116558742, "grad_norm": 0.11247913539409637, "learning_rate": 0.01, "loss": 2.042, "step": 20160 }, { "epoch": 2.072463768115942, "grad_norm": 0.04084709286689758, "learning_rate": 0.01, "loss": 2.0369, "step": 20163 }, { "epoch": 2.07277212457601, "grad_norm": 0.06349261850118637, "learning_rate": 0.01, "loss": 2.0656, "step": 20166 }, { "epoch": 2.0730804810360777, "grad_norm": 0.03916813060641289, "learning_rate": 0.01, "loss": 2.0142, "step": 20169 }, { "epoch": 2.0733888374961453, "grad_norm": 0.041867464780807495, "learning_rate": 0.01, "loss": 2.0613, "step": 20172 }, { "epoch": 2.0736971939562134, "grad_norm": 0.09670063108205795, "learning_rate": 0.01, "loss": 2.0254, "step": 20175 }, { "epoch": 2.074005550416281, "grad_norm": 0.05259916931390762, "learning_rate": 0.01, "loss": 2.059, "step": 20178 }, { "epoch": 2.074313906876349, "grad_norm": 0.0970730185508728, "learning_rate": 0.01, "loss": 2.0557, "step": 20181 }, { "epoch": 2.074622263336417, "grad_norm": 0.1317344307899475, "learning_rate": 0.01, "loss": 2.0225, "step": 20184 }, { "epoch": 2.0749306197964845, "grad_norm": 0.0787033885717392, "learning_rate": 0.01, "loss": 2.072, "step": 20187 }, { "epoch": 2.0752389762565526, "grad_norm": 0.04037567600607872, "learning_rate": 0.01, "loss": 2.0422, "step": 20190 }, { "epoch": 2.0755473327166203, "grad_norm": 0.03588324785232544, "learning_rate": 0.01, "loss": 2.0396, "step": 20193 }, { "epoch": 2.0758556891766884, "grad_norm": 0.05277855321764946, "learning_rate": 0.01, "loss": 2.0702, "step": 20196 }, { "epoch": 2.076164045636756, "grad_norm": 0.050833381712436676, "learning_rate": 0.01, "loss": 2.0748, "step": 20199 }, { "epoch": 2.076472402096824, "grad_norm": 0.089606374502182, "learning_rate": 0.01, "loss": 2.0482, "step": 20202 }, { "epoch": 2.076780758556892, "grad_norm": 0.05270789936184883, "learning_rate": 0.01, "loss": 2.0555, "step": 20205 }, { "epoch": 2.0770891150169595, "grad_norm": 0.06895376741886139, "learning_rate": 0.01, "loss": 2.0509, "step": 20208 }, { "epoch": 2.0773974714770276, "grad_norm": 0.11967889964580536, "learning_rate": 0.01, "loss": 2.0512, "step": 20211 }, { "epoch": 2.0777058279370952, "grad_norm": 0.11328759789466858, "learning_rate": 0.01, "loss": 2.002, "step": 20214 }, { "epoch": 2.078014184397163, "grad_norm": 0.045189183205366135, "learning_rate": 0.01, "loss": 2.0211, "step": 20217 }, { "epoch": 2.078322540857231, "grad_norm": 0.05716565251350403, "learning_rate": 0.01, "loss": 2.0413, "step": 20220 }, { "epoch": 2.0786308973172987, "grad_norm": 0.07340056449174881, "learning_rate": 0.01, "loss": 2.0457, "step": 20223 }, { "epoch": 2.0789392537773668, "grad_norm": 0.05395069718360901, "learning_rate": 0.01, "loss": 2.0582, "step": 20226 }, { "epoch": 2.0792476102374344, "grad_norm": 0.03723681718111038, "learning_rate": 0.01, "loss": 2.0581, "step": 20229 }, { "epoch": 2.0795559666975025, "grad_norm": 0.10626024752855301, "learning_rate": 0.01, "loss": 2.0351, "step": 20232 }, { "epoch": 2.07986432315757, "grad_norm": 0.09987606853246689, "learning_rate": 0.01, "loss": 2.067, "step": 20235 }, { "epoch": 2.080172679617638, "grad_norm": 0.07282060384750366, "learning_rate": 0.01, "loss": 2.0511, "step": 20238 }, { "epoch": 2.080481036077706, "grad_norm": 0.04192940518260002, "learning_rate": 0.01, "loss": 2.0519, "step": 20241 }, { "epoch": 2.0807893925377736, "grad_norm": 0.06585846096277237, "learning_rate": 0.01, "loss": 2.0282, "step": 20244 }, { "epoch": 2.0810977489978413, "grad_norm": 0.04427814856171608, "learning_rate": 0.01, "loss": 2.017, "step": 20247 }, { "epoch": 2.0814061054579094, "grad_norm": 0.05114896968007088, "learning_rate": 0.01, "loss": 2.059, "step": 20250 }, { "epoch": 2.081714461917977, "grad_norm": 0.0445995107293129, "learning_rate": 0.01, "loss": 2.0544, "step": 20253 }, { "epoch": 2.082022818378045, "grad_norm": 0.04904405400156975, "learning_rate": 0.01, "loss": 2.0428, "step": 20256 }, { "epoch": 2.082331174838113, "grad_norm": 0.03620357811450958, "learning_rate": 0.01, "loss": 2.0493, "step": 20259 }, { "epoch": 2.082639531298181, "grad_norm": 0.10994633287191391, "learning_rate": 0.01, "loss": 2.0719, "step": 20262 }, { "epoch": 2.0829478877582486, "grad_norm": 0.05244474112987518, "learning_rate": 0.01, "loss": 2.0559, "step": 20265 }, { "epoch": 2.0832562442183162, "grad_norm": 0.05937792733311653, "learning_rate": 0.01, "loss": 2.0593, "step": 20268 }, { "epoch": 2.0835646006783843, "grad_norm": 0.08669353276491165, "learning_rate": 0.01, "loss": 2.0539, "step": 20271 }, { "epoch": 2.083872957138452, "grad_norm": 0.054145876318216324, "learning_rate": 0.01, "loss": 2.0281, "step": 20274 }, { "epoch": 2.08418131359852, "grad_norm": 0.040682870894670486, "learning_rate": 0.01, "loss": 2.0657, "step": 20277 }, { "epoch": 2.0844896700585878, "grad_norm": 0.04110307991504669, "learning_rate": 0.01, "loss": 2.053, "step": 20280 }, { "epoch": 2.0847980265186554, "grad_norm": 0.13420680165290833, "learning_rate": 0.01, "loss": 2.0568, "step": 20283 }, { "epoch": 2.0851063829787235, "grad_norm": 0.049191731959581375, "learning_rate": 0.01, "loss": 2.0498, "step": 20286 }, { "epoch": 2.085414739438791, "grad_norm": 0.04682133346796036, "learning_rate": 0.01, "loss": 2.0532, "step": 20289 }, { "epoch": 2.0857230958988593, "grad_norm": 0.043646588921546936, "learning_rate": 0.01, "loss": 2.0582, "step": 20292 }, { "epoch": 2.086031452358927, "grad_norm": 0.05107354745268822, "learning_rate": 0.01, "loss": 2.0574, "step": 20295 }, { "epoch": 2.0863398088189946, "grad_norm": 0.06274458020925522, "learning_rate": 0.01, "loss": 2.0621, "step": 20298 }, { "epoch": 2.0866481652790627, "grad_norm": 0.11294244229793549, "learning_rate": 0.01, "loss": 2.0685, "step": 20301 }, { "epoch": 2.0869565217391304, "grad_norm": 0.04948057234287262, "learning_rate": 0.01, "loss": 2.0469, "step": 20304 }, { "epoch": 2.0872648781991985, "grad_norm": 0.04451402649283409, "learning_rate": 0.01, "loss": 2.0675, "step": 20307 }, { "epoch": 2.087573234659266, "grad_norm": 0.04940638318657875, "learning_rate": 0.01, "loss": 2.0584, "step": 20310 }, { "epoch": 2.087881591119334, "grad_norm": 0.06530692428350449, "learning_rate": 0.01, "loss": 2.0541, "step": 20313 }, { "epoch": 2.088189947579402, "grad_norm": 0.13395404815673828, "learning_rate": 0.01, "loss": 2.028, "step": 20316 }, { "epoch": 2.0884983040394696, "grad_norm": 0.09222474694252014, "learning_rate": 0.01, "loss": 2.045, "step": 20319 }, { "epoch": 2.0888066604995377, "grad_norm": 0.06510595977306366, "learning_rate": 0.01, "loss": 2.0291, "step": 20322 }, { "epoch": 2.0891150169596053, "grad_norm": 0.055552888661623, "learning_rate": 0.01, "loss": 2.0612, "step": 20325 }, { "epoch": 2.089423373419673, "grad_norm": 0.04411375895142555, "learning_rate": 0.01, "loss": 2.0368, "step": 20328 }, { "epoch": 2.089731729879741, "grad_norm": 0.05151544511318207, "learning_rate": 0.01, "loss": 2.0539, "step": 20331 }, { "epoch": 2.0900400863398088, "grad_norm": 0.09836700558662415, "learning_rate": 0.01, "loss": 2.0353, "step": 20334 }, { "epoch": 2.090348442799877, "grad_norm": 0.06430090218782425, "learning_rate": 0.01, "loss": 2.043, "step": 20337 }, { "epoch": 2.0906567992599445, "grad_norm": 0.09683403372764587, "learning_rate": 0.01, "loss": 2.0459, "step": 20340 }, { "epoch": 2.090965155720012, "grad_norm": 0.08345566689968109, "learning_rate": 0.01, "loss": 2.0543, "step": 20343 }, { "epoch": 2.0912735121800803, "grad_norm": 0.045199088752269745, "learning_rate": 0.01, "loss": 2.0509, "step": 20346 }, { "epoch": 2.091581868640148, "grad_norm": 0.0399625338613987, "learning_rate": 0.01, "loss": 2.0256, "step": 20349 }, { "epoch": 2.091890225100216, "grad_norm": 0.03815968707203865, "learning_rate": 0.01, "loss": 2.0196, "step": 20352 }, { "epoch": 2.0921985815602837, "grad_norm": 0.054826896637678146, "learning_rate": 0.01, "loss": 2.035, "step": 20355 }, { "epoch": 2.0925069380203514, "grad_norm": 0.05717878043651581, "learning_rate": 0.01, "loss": 2.0613, "step": 20358 }, { "epoch": 2.0928152944804195, "grad_norm": 0.09222474694252014, "learning_rate": 0.01, "loss": 2.037, "step": 20361 }, { "epoch": 2.093123650940487, "grad_norm": 0.0983637347817421, "learning_rate": 0.01, "loss": 2.0652, "step": 20364 }, { "epoch": 2.0934320074005552, "grad_norm": 0.05338272079825401, "learning_rate": 0.01, "loss": 2.0448, "step": 20367 }, { "epoch": 2.093740363860623, "grad_norm": 0.047821927815675735, "learning_rate": 0.01, "loss": 2.0299, "step": 20370 }, { "epoch": 2.0940487203206906, "grad_norm": 0.10657365620136261, "learning_rate": 0.01, "loss": 2.0533, "step": 20373 }, { "epoch": 2.0943570767807587, "grad_norm": 0.08650174736976624, "learning_rate": 0.01, "loss": 2.0486, "step": 20376 }, { "epoch": 2.0946654332408263, "grad_norm": 0.06975332647562027, "learning_rate": 0.01, "loss": 2.0365, "step": 20379 }, { "epoch": 2.0949737897008944, "grad_norm": 0.08443387597799301, "learning_rate": 0.01, "loss": 2.0529, "step": 20382 }, { "epoch": 2.095282146160962, "grad_norm": 0.04376668483018875, "learning_rate": 0.01, "loss": 2.0238, "step": 20385 }, { "epoch": 2.0955905026210297, "grad_norm": 0.1501801759004593, "learning_rate": 0.01, "loss": 2.0461, "step": 20388 }, { "epoch": 2.095898859081098, "grad_norm": 0.08488426357507706, "learning_rate": 0.01, "loss": 2.0391, "step": 20391 }, { "epoch": 2.0962072155411655, "grad_norm": 0.05569930747151375, "learning_rate": 0.01, "loss": 2.0375, "step": 20394 }, { "epoch": 2.0965155720012336, "grad_norm": 0.059826262295246124, "learning_rate": 0.01, "loss": 2.0303, "step": 20397 }, { "epoch": 2.0968239284613013, "grad_norm": 0.0911981388926506, "learning_rate": 0.01, "loss": 2.0568, "step": 20400 }, { "epoch": 2.097132284921369, "grad_norm": 0.03921716660261154, "learning_rate": 0.01, "loss": 2.0296, "step": 20403 }, { "epoch": 2.097440641381437, "grad_norm": 0.07355164736509323, "learning_rate": 0.01, "loss": 2.0397, "step": 20406 }, { "epoch": 2.0977489978415047, "grad_norm": 0.031198322772979736, "learning_rate": 0.01, "loss": 2.0755, "step": 20409 }, { "epoch": 2.098057354301573, "grad_norm": 0.07405471056699753, "learning_rate": 0.01, "loss": 2.0174, "step": 20412 }, { "epoch": 2.0983657107616405, "grad_norm": 0.08160628378391266, "learning_rate": 0.01, "loss": 2.0509, "step": 20415 }, { "epoch": 2.098674067221708, "grad_norm": 0.11125149577856064, "learning_rate": 0.01, "loss": 2.0612, "step": 20418 }, { "epoch": 2.0989824236817762, "grad_norm": 0.04484894871711731, "learning_rate": 0.01, "loss": 2.061, "step": 20421 }, { "epoch": 2.099290780141844, "grad_norm": 0.07138056308031082, "learning_rate": 0.01, "loss": 2.0469, "step": 20424 }, { "epoch": 2.099599136601912, "grad_norm": 0.0794389471411705, "learning_rate": 0.01, "loss": 2.0423, "step": 20427 }, { "epoch": 2.0999074930619797, "grad_norm": 0.05673963576555252, "learning_rate": 0.01, "loss": 2.0708, "step": 20430 }, { "epoch": 2.1002158495220473, "grad_norm": 0.054527074098587036, "learning_rate": 0.01, "loss": 2.0304, "step": 20433 }, { "epoch": 2.1005242059821154, "grad_norm": 0.04964460805058479, "learning_rate": 0.01, "loss": 2.0302, "step": 20436 }, { "epoch": 2.100832562442183, "grad_norm": 0.07147829979658127, "learning_rate": 0.01, "loss": 2.0365, "step": 20439 }, { "epoch": 2.101140918902251, "grad_norm": 0.07474019378423691, "learning_rate": 0.01, "loss": 2.0559, "step": 20442 }, { "epoch": 2.101449275362319, "grad_norm": 0.06944689154624939, "learning_rate": 0.01, "loss": 2.0477, "step": 20445 }, { "epoch": 2.1017576318223865, "grad_norm": 0.07336383312940598, "learning_rate": 0.01, "loss": 2.0386, "step": 20448 }, { "epoch": 2.1020659882824546, "grad_norm": 0.08889491856098175, "learning_rate": 0.01, "loss": 2.0822, "step": 20451 }, { "epoch": 2.1023743447425223, "grad_norm": 0.07154878228902817, "learning_rate": 0.01, "loss": 2.0672, "step": 20454 }, { "epoch": 2.1026827012025904, "grad_norm": 0.0471792072057724, "learning_rate": 0.01, "loss": 2.026, "step": 20457 }, { "epoch": 2.102991057662658, "grad_norm": 0.10836900025606155, "learning_rate": 0.01, "loss": 2.037, "step": 20460 }, { "epoch": 2.1032994141227257, "grad_norm": 0.05292079225182533, "learning_rate": 0.01, "loss": 2.0145, "step": 20463 }, { "epoch": 2.103607770582794, "grad_norm": 0.13195812702178955, "learning_rate": 0.01, "loss": 2.0426, "step": 20466 }, { "epoch": 2.1039161270428615, "grad_norm": 0.1316298395395279, "learning_rate": 0.01, "loss": 2.0179, "step": 20469 }, { "epoch": 2.1042244835029296, "grad_norm": 0.04061713069677353, "learning_rate": 0.01, "loss": 2.0304, "step": 20472 }, { "epoch": 2.1045328399629972, "grad_norm": 0.09250857681035995, "learning_rate": 0.01, "loss": 2.0596, "step": 20475 }, { "epoch": 2.104841196423065, "grad_norm": 0.058364611119031906, "learning_rate": 0.01, "loss": 2.0442, "step": 20478 }, { "epoch": 2.105149552883133, "grad_norm": 0.046974651515483856, "learning_rate": 0.01, "loss": 2.0439, "step": 20481 }, { "epoch": 2.1054579093432007, "grad_norm": 0.04835136979818344, "learning_rate": 0.01, "loss": 2.044, "step": 20484 }, { "epoch": 2.1057662658032688, "grad_norm": 0.04643654450774193, "learning_rate": 0.01, "loss": 2.0276, "step": 20487 }, { "epoch": 2.1060746222633364, "grad_norm": 0.10667752474546432, "learning_rate": 0.01, "loss": 2.0356, "step": 20490 }, { "epoch": 2.106382978723404, "grad_norm": 0.07521391659975052, "learning_rate": 0.01, "loss": 2.0552, "step": 20493 }, { "epoch": 2.106691335183472, "grad_norm": 0.10269229114055634, "learning_rate": 0.01, "loss": 2.0452, "step": 20496 }, { "epoch": 2.10699969164354, "grad_norm": 0.040783487260341644, "learning_rate": 0.01, "loss": 2.0567, "step": 20499 }, { "epoch": 2.107308048103608, "grad_norm": 0.05012373626232147, "learning_rate": 0.01, "loss": 2.0359, "step": 20502 }, { "epoch": 2.1076164045636756, "grad_norm": 0.042675044387578964, "learning_rate": 0.01, "loss": 2.0367, "step": 20505 }, { "epoch": 2.1079247610237433, "grad_norm": 0.06164225935935974, "learning_rate": 0.01, "loss": 2.0324, "step": 20508 }, { "epoch": 2.1082331174838114, "grad_norm": 0.06368019431829453, "learning_rate": 0.01, "loss": 2.0228, "step": 20511 }, { "epoch": 2.108541473943879, "grad_norm": 0.049319278448820114, "learning_rate": 0.01, "loss": 2.0443, "step": 20514 }, { "epoch": 2.108849830403947, "grad_norm": 0.06070362776517868, "learning_rate": 0.01, "loss": 2.045, "step": 20517 }, { "epoch": 2.109158186864015, "grad_norm": 0.07560203224420547, "learning_rate": 0.01, "loss": 2.0469, "step": 20520 }, { "epoch": 2.1094665433240825, "grad_norm": 0.05563758686184883, "learning_rate": 0.01, "loss": 2.0415, "step": 20523 }, { "epoch": 2.1097748997841506, "grad_norm": 0.04349389672279358, "learning_rate": 0.01, "loss": 2.0396, "step": 20526 }, { "epoch": 2.110083256244218, "grad_norm": 0.05849798396229744, "learning_rate": 0.01, "loss": 2.0235, "step": 20529 }, { "epoch": 2.1103916127042863, "grad_norm": 0.06813669949769974, "learning_rate": 0.01, "loss": 2.0541, "step": 20532 }, { "epoch": 2.110699969164354, "grad_norm": 0.08951954543590546, "learning_rate": 0.01, "loss": 2.0232, "step": 20535 }, { "epoch": 2.1110083256244216, "grad_norm": 0.08673957735300064, "learning_rate": 0.01, "loss": 2.0602, "step": 20538 }, { "epoch": 2.1113166820844897, "grad_norm": 0.06135937571525574, "learning_rate": 0.01, "loss": 2.0416, "step": 20541 }, { "epoch": 2.1116250385445574, "grad_norm": 0.08979734778404236, "learning_rate": 0.01, "loss": 2.0504, "step": 20544 }, { "epoch": 2.1119333950046255, "grad_norm": 0.07878229022026062, "learning_rate": 0.01, "loss": 2.0194, "step": 20547 }, { "epoch": 2.112241751464693, "grad_norm": 0.07120572775602341, "learning_rate": 0.01, "loss": 2.0414, "step": 20550 }, { "epoch": 2.112550107924761, "grad_norm": 0.11559943854808807, "learning_rate": 0.01, "loss": 2.0334, "step": 20553 }, { "epoch": 2.112858464384829, "grad_norm": 0.09005344659090042, "learning_rate": 0.01, "loss": 2.0457, "step": 20556 }, { "epoch": 2.1131668208448966, "grad_norm": 0.05143802613019943, "learning_rate": 0.01, "loss": 2.0364, "step": 20559 }, { "epoch": 2.1134751773049647, "grad_norm": 0.03870050981640816, "learning_rate": 0.01, "loss": 2.0216, "step": 20562 }, { "epoch": 2.1137835337650324, "grad_norm": 0.06471268832683563, "learning_rate": 0.01, "loss": 2.0116, "step": 20565 }, { "epoch": 2.1140918902251, "grad_norm": 0.07047493010759354, "learning_rate": 0.01, "loss": 2.052, "step": 20568 }, { "epoch": 2.114400246685168, "grad_norm": 0.08189850300550461, "learning_rate": 0.01, "loss": 2.012, "step": 20571 }, { "epoch": 2.114708603145236, "grad_norm": 0.04041110724210739, "learning_rate": 0.01, "loss": 2.0604, "step": 20574 }, { "epoch": 2.115016959605304, "grad_norm": 0.06573761254549026, "learning_rate": 0.01, "loss": 2.0466, "step": 20577 }, { "epoch": 2.1153253160653716, "grad_norm": 0.06190131977200508, "learning_rate": 0.01, "loss": 2.0475, "step": 20580 }, { "epoch": 2.115633672525439, "grad_norm": 0.044485028833150864, "learning_rate": 0.01, "loss": 2.0336, "step": 20583 }, { "epoch": 2.1159420289855073, "grad_norm": 0.03833581507205963, "learning_rate": 0.01, "loss": 2.0446, "step": 20586 }, { "epoch": 2.116250385445575, "grad_norm": 0.07705090194940567, "learning_rate": 0.01, "loss": 2.0546, "step": 20589 }, { "epoch": 2.116558741905643, "grad_norm": 0.12635158002376556, "learning_rate": 0.01, "loss": 2.0399, "step": 20592 }, { "epoch": 2.1168670983657107, "grad_norm": 0.06076742708683014, "learning_rate": 0.01, "loss": 2.0409, "step": 20595 }, { "epoch": 2.1171754548257784, "grad_norm": 0.06634259968996048, "learning_rate": 0.01, "loss": 2.0603, "step": 20598 }, { "epoch": 2.1174838112858465, "grad_norm": 0.06167810782790184, "learning_rate": 0.01, "loss": 2.0404, "step": 20601 }, { "epoch": 2.117792167745914, "grad_norm": 0.0474831759929657, "learning_rate": 0.01, "loss": 2.0413, "step": 20604 }, { "epoch": 2.1181005242059823, "grad_norm": 0.04799888655543327, "learning_rate": 0.01, "loss": 2.0628, "step": 20607 }, { "epoch": 2.11840888066605, "grad_norm": 0.055144913494586945, "learning_rate": 0.01, "loss": 2.0126, "step": 20610 }, { "epoch": 2.1187172371261176, "grad_norm": 0.1116517037153244, "learning_rate": 0.01, "loss": 2.0467, "step": 20613 }, { "epoch": 2.1190255935861857, "grad_norm": 0.06136411800980568, "learning_rate": 0.01, "loss": 2.0489, "step": 20616 }, { "epoch": 2.1193339500462534, "grad_norm": 0.09005284309387207, "learning_rate": 0.01, "loss": 2.0185, "step": 20619 }, { "epoch": 2.1196423065063215, "grad_norm": 0.19345355033874512, "learning_rate": 0.01, "loss": 2.0588, "step": 20622 }, { "epoch": 2.119950662966389, "grad_norm": 0.14011520147323608, "learning_rate": 0.01, "loss": 2.0264, "step": 20625 }, { "epoch": 2.120259019426457, "grad_norm": 0.06036897376179695, "learning_rate": 0.01, "loss": 2.0566, "step": 20628 }, { "epoch": 2.120567375886525, "grad_norm": 0.05589490756392479, "learning_rate": 0.01, "loss": 2.0455, "step": 20631 }, { "epoch": 2.1208757323465925, "grad_norm": 0.07571965456008911, "learning_rate": 0.01, "loss": 2.0403, "step": 20634 }, { "epoch": 2.1211840888066607, "grad_norm": 0.05558032542467117, "learning_rate": 0.01, "loss": 2.0373, "step": 20637 }, { "epoch": 2.1214924452667283, "grad_norm": 0.048844993114471436, "learning_rate": 0.01, "loss": 2.0209, "step": 20640 }, { "epoch": 2.121800801726796, "grad_norm": 0.04454483836889267, "learning_rate": 0.01, "loss": 2.0431, "step": 20643 }, { "epoch": 2.122109158186864, "grad_norm": 0.04565678536891937, "learning_rate": 0.01, "loss": 2.0498, "step": 20646 }, { "epoch": 2.1224175146469317, "grad_norm": 0.04082358628511429, "learning_rate": 0.01, "loss": 2.0201, "step": 20649 }, { "epoch": 2.122725871107, "grad_norm": 0.03753536194562912, "learning_rate": 0.01, "loss": 2.0324, "step": 20652 }, { "epoch": 2.1230342275670675, "grad_norm": 0.04898180440068245, "learning_rate": 0.01, "loss": 2.0503, "step": 20655 }, { "epoch": 2.123342584027135, "grad_norm": 0.0748276561498642, "learning_rate": 0.01, "loss": 2.0632, "step": 20658 }, { "epoch": 2.1236509404872033, "grad_norm": 0.19312037527561188, "learning_rate": 0.01, "loss": 2.0409, "step": 20661 }, { "epoch": 2.123959296947271, "grad_norm": 0.1444995105266571, "learning_rate": 0.01, "loss": 2.0713, "step": 20664 }, { "epoch": 2.124267653407339, "grad_norm": 0.03868752345442772, "learning_rate": 0.01, "loss": 2.045, "step": 20667 }, { "epoch": 2.1245760098674067, "grad_norm": 0.04851067438721657, "learning_rate": 0.01, "loss": 2.0347, "step": 20670 }, { "epoch": 2.1248843663274743, "grad_norm": 0.03709336742758751, "learning_rate": 0.01, "loss": 2.0288, "step": 20673 }, { "epoch": 2.1251927227875425, "grad_norm": 0.03498173505067825, "learning_rate": 0.01, "loss": 2.0262, "step": 20676 }, { "epoch": 2.12550107924761, "grad_norm": 0.04838285222649574, "learning_rate": 0.01, "loss": 2.0391, "step": 20679 }, { "epoch": 2.125809435707678, "grad_norm": 0.04562096297740936, "learning_rate": 0.01, "loss": 2.0226, "step": 20682 }, { "epoch": 2.126117792167746, "grad_norm": 0.06128044053912163, "learning_rate": 0.01, "loss": 2.0452, "step": 20685 }, { "epoch": 2.1264261486278135, "grad_norm": 0.04903801530599594, "learning_rate": 0.01, "loss": 2.049, "step": 20688 }, { "epoch": 2.1267345050878816, "grad_norm": 0.038953814655542374, "learning_rate": 0.01, "loss": 2.0559, "step": 20691 }, { "epoch": 2.1270428615479493, "grad_norm": 0.06137779355049133, "learning_rate": 0.01, "loss": 2.0458, "step": 20694 }, { "epoch": 2.1273512180080174, "grad_norm": 0.11357403546571732, "learning_rate": 0.01, "loss": 2.0572, "step": 20697 }, { "epoch": 2.127659574468085, "grad_norm": 0.16223950684070587, "learning_rate": 0.01, "loss": 2.0158, "step": 20700 }, { "epoch": 2.127967930928153, "grad_norm": 0.06391070038080215, "learning_rate": 0.01, "loss": 2.0682, "step": 20703 }, { "epoch": 2.128276287388221, "grad_norm": 0.03951489180326462, "learning_rate": 0.01, "loss": 2.0645, "step": 20706 }, { "epoch": 2.1285846438482885, "grad_norm": 0.03921591490507126, "learning_rate": 0.01, "loss": 2.0538, "step": 20709 }, { "epoch": 2.1288930003083566, "grad_norm": 0.0479004830121994, "learning_rate": 0.01, "loss": 2.0587, "step": 20712 }, { "epoch": 2.1292013567684243, "grad_norm": 0.04370121285319328, "learning_rate": 0.01, "loss": 2.0249, "step": 20715 }, { "epoch": 2.129509713228492, "grad_norm": 0.05750906467437744, "learning_rate": 0.01, "loss": 2.0452, "step": 20718 }, { "epoch": 2.12981806968856, "grad_norm": 0.06549614667892456, "learning_rate": 0.01, "loss": 2.0173, "step": 20721 }, { "epoch": 2.1301264261486277, "grad_norm": 0.05763638764619827, "learning_rate": 0.01, "loss": 2.0315, "step": 20724 }, { "epoch": 2.130434782608696, "grad_norm": 0.16448546946048737, "learning_rate": 0.01, "loss": 2.0352, "step": 20727 }, { "epoch": 2.1307431390687634, "grad_norm": 0.057152118533849716, "learning_rate": 0.01, "loss": 2.0361, "step": 20730 }, { "epoch": 2.1310514955288316, "grad_norm": 0.054196059703826904, "learning_rate": 0.01, "loss": 2.0387, "step": 20733 }, { "epoch": 2.131359851988899, "grad_norm": 0.03887069597840309, "learning_rate": 0.01, "loss": 2.031, "step": 20736 }, { "epoch": 2.131668208448967, "grad_norm": 0.03529683127999306, "learning_rate": 0.01, "loss": 2.0466, "step": 20739 }, { "epoch": 2.131976564909035, "grad_norm": 0.053463347256183624, "learning_rate": 0.01, "loss": 2.011, "step": 20742 }, { "epoch": 2.1322849213691026, "grad_norm": 0.03482777252793312, "learning_rate": 0.01, "loss": 2.0421, "step": 20745 }, { "epoch": 2.1325932778291703, "grad_norm": 0.056043028831481934, "learning_rate": 0.01, "loss": 2.0442, "step": 20748 }, { "epoch": 2.1329016342892384, "grad_norm": 0.04648544266819954, "learning_rate": 0.01, "loss": 2.0502, "step": 20751 }, { "epoch": 2.133209990749306, "grad_norm": 0.0878090187907219, "learning_rate": 0.01, "loss": 2.0092, "step": 20754 }, { "epoch": 2.133518347209374, "grad_norm": 0.059173766523599625, "learning_rate": 0.01, "loss": 2.0469, "step": 20757 }, { "epoch": 2.133826703669442, "grad_norm": 0.09119824320077896, "learning_rate": 0.01, "loss": 2.0448, "step": 20760 }, { "epoch": 2.13413506012951, "grad_norm": 0.04082552343606949, "learning_rate": 0.01, "loss": 2.0501, "step": 20763 }, { "epoch": 2.1344434165895776, "grad_norm": 0.0831313356757164, "learning_rate": 0.01, "loss": 2.0293, "step": 20766 }, { "epoch": 2.1347517730496453, "grad_norm": 0.0635252296924591, "learning_rate": 0.01, "loss": 2.0495, "step": 20769 }, { "epoch": 2.1350601295097134, "grad_norm": 0.09516814351081848, "learning_rate": 0.01, "loss": 2.0284, "step": 20772 }, { "epoch": 2.135368485969781, "grad_norm": 0.05734236165881157, "learning_rate": 0.01, "loss": 2.0341, "step": 20775 }, { "epoch": 2.1356768424298487, "grad_norm": 0.07606332749128342, "learning_rate": 0.01, "loss": 2.0388, "step": 20778 }, { "epoch": 2.135985198889917, "grad_norm": 0.041968777775764465, "learning_rate": 0.01, "loss": 2.0172, "step": 20781 }, { "epoch": 2.1362935553499844, "grad_norm": 0.10549111664295197, "learning_rate": 0.01, "loss": 2.0695, "step": 20784 }, { "epoch": 2.1366019118100525, "grad_norm": 0.0783652663230896, "learning_rate": 0.01, "loss": 2.0416, "step": 20787 }, { "epoch": 2.13691026827012, "grad_norm": 0.08309295773506165, "learning_rate": 0.01, "loss": 2.0455, "step": 20790 }, { "epoch": 2.1372186247301883, "grad_norm": 0.038408368825912476, "learning_rate": 0.01, "loss": 2.0298, "step": 20793 }, { "epoch": 2.137526981190256, "grad_norm": 0.1311808079481125, "learning_rate": 0.01, "loss": 2.0608, "step": 20796 }, { "epoch": 2.1378353376503236, "grad_norm": 0.08983028680086136, "learning_rate": 0.01, "loss": 2.0357, "step": 20799 }, { "epoch": 2.1381436941103917, "grad_norm": 0.12422356754541397, "learning_rate": 0.01, "loss": 2.0443, "step": 20802 }, { "epoch": 2.1384520505704594, "grad_norm": 0.0687076672911644, "learning_rate": 0.01, "loss": 2.0224, "step": 20805 }, { "epoch": 2.138760407030527, "grad_norm": 0.04577179625630379, "learning_rate": 0.01, "loss": 2.0195, "step": 20808 }, { "epoch": 2.139068763490595, "grad_norm": 0.046294886618852615, "learning_rate": 0.01, "loss": 2.028, "step": 20811 }, { "epoch": 2.139377119950663, "grad_norm": 0.048477739095687866, "learning_rate": 0.01, "loss": 2.055, "step": 20814 }, { "epoch": 2.139685476410731, "grad_norm": 0.035206399857997894, "learning_rate": 0.01, "loss": 2.0242, "step": 20817 }, { "epoch": 2.1399938328707986, "grad_norm": 0.0451078936457634, "learning_rate": 0.01, "loss": 2.0316, "step": 20820 }, { "epoch": 2.1403021893308667, "grad_norm": 0.09469619393348694, "learning_rate": 0.01, "loss": 2.0259, "step": 20823 }, { "epoch": 2.1406105457909343, "grad_norm": 0.03966006636619568, "learning_rate": 0.01, "loss": 2.0427, "step": 20826 }, { "epoch": 2.140918902251002, "grad_norm": 0.0793539360165596, "learning_rate": 0.01, "loss": 2.0469, "step": 20829 }, { "epoch": 2.14122725871107, "grad_norm": 0.08547502756118774, "learning_rate": 0.01, "loss": 2.0189, "step": 20832 }, { "epoch": 2.1415356151711378, "grad_norm": 0.055916544049978256, "learning_rate": 0.01, "loss": 2.042, "step": 20835 }, { "epoch": 2.141843971631206, "grad_norm": 0.0369451642036438, "learning_rate": 0.01, "loss": 2.0346, "step": 20838 }, { "epoch": 2.1421523280912735, "grad_norm": 0.05001531541347504, "learning_rate": 0.01, "loss": 2.06, "step": 20841 }, { "epoch": 2.142460684551341, "grad_norm": 0.09491688758134842, "learning_rate": 0.01, "loss": 2.0092, "step": 20844 }, { "epoch": 2.1427690410114093, "grad_norm": 0.05615471303462982, "learning_rate": 0.01, "loss": 2.0575, "step": 20847 }, { "epoch": 2.143077397471477, "grad_norm": 0.08531193435192108, "learning_rate": 0.01, "loss": 2.0562, "step": 20850 }, { "epoch": 2.143385753931545, "grad_norm": 0.08138839155435562, "learning_rate": 0.01, "loss": 2.0482, "step": 20853 }, { "epoch": 2.1436941103916127, "grad_norm": 0.04965364560484886, "learning_rate": 0.01, "loss": 2.0487, "step": 20856 }, { "epoch": 2.1440024668516804, "grad_norm": 0.07006839662790298, "learning_rate": 0.01, "loss": 2.0469, "step": 20859 }, { "epoch": 2.1443108233117485, "grad_norm": 0.07477371394634247, "learning_rate": 0.01, "loss": 2.0452, "step": 20862 }, { "epoch": 2.144619179771816, "grad_norm": 0.07817060500383377, "learning_rate": 0.01, "loss": 2.0239, "step": 20865 }, { "epoch": 2.1449275362318843, "grad_norm": 0.04393570497632027, "learning_rate": 0.01, "loss": 2.0261, "step": 20868 }, { "epoch": 2.145235892691952, "grad_norm": 0.0777692198753357, "learning_rate": 0.01, "loss": 2.0313, "step": 20871 }, { "epoch": 2.1455442491520196, "grad_norm": 0.05267953500151634, "learning_rate": 0.01, "loss": 2.0496, "step": 20874 }, { "epoch": 2.1458526056120877, "grad_norm": 0.053839076310396194, "learning_rate": 0.01, "loss": 2.0593, "step": 20877 }, { "epoch": 2.1461609620721553, "grad_norm": 0.05793678015470505, "learning_rate": 0.01, "loss": 2.0333, "step": 20880 }, { "epoch": 2.1464693185322234, "grad_norm": 0.045539624989032745, "learning_rate": 0.01, "loss": 2.0504, "step": 20883 }, { "epoch": 2.146777674992291, "grad_norm": 0.062209442257881165, "learning_rate": 0.01, "loss": 2.0273, "step": 20886 }, { "epoch": 2.1470860314523588, "grad_norm": 0.11053802073001862, "learning_rate": 0.01, "loss": 2.0764, "step": 20889 }, { "epoch": 2.147394387912427, "grad_norm": 0.037597738206386566, "learning_rate": 0.01, "loss": 2.046, "step": 20892 }, { "epoch": 2.1477027443724945, "grad_norm": 0.050160784274339676, "learning_rate": 0.01, "loss": 2.0797, "step": 20895 }, { "epoch": 2.1480111008325626, "grad_norm": 0.053438689559698105, "learning_rate": 0.01, "loss": 2.0611, "step": 20898 }, { "epoch": 2.1483194572926303, "grad_norm": 0.06175898388028145, "learning_rate": 0.01, "loss": 2.0558, "step": 20901 }, { "epoch": 2.148627813752698, "grad_norm": 0.062430836260318756, "learning_rate": 0.01, "loss": 2.0405, "step": 20904 }, { "epoch": 2.148936170212766, "grad_norm": 0.09347888082265854, "learning_rate": 0.01, "loss": 2.0482, "step": 20907 }, { "epoch": 2.1492445266728337, "grad_norm": 0.13494189083576202, "learning_rate": 0.01, "loss": 2.0483, "step": 20910 }, { "epoch": 2.149552883132902, "grad_norm": 0.06735777854919434, "learning_rate": 0.01, "loss": 2.0799, "step": 20913 }, { "epoch": 2.1498612395929695, "grad_norm": 0.08108525723218918, "learning_rate": 0.01, "loss": 2.036, "step": 20916 }, { "epoch": 2.150169596053037, "grad_norm": 0.043221328407526016, "learning_rate": 0.01, "loss": 2.0401, "step": 20919 }, { "epoch": 2.1504779525131053, "grad_norm": 0.046568017452955246, "learning_rate": 0.01, "loss": 2.0393, "step": 20922 }, { "epoch": 2.150786308973173, "grad_norm": 0.040786582976579666, "learning_rate": 0.01, "loss": 2.0527, "step": 20925 }, { "epoch": 2.151094665433241, "grad_norm": 0.043856412172317505, "learning_rate": 0.01, "loss": 2.0405, "step": 20928 }, { "epoch": 2.1514030218933087, "grad_norm": 0.03569160774350166, "learning_rate": 0.01, "loss": 2.0431, "step": 20931 }, { "epoch": 2.1517113783533763, "grad_norm": 0.059598151594400406, "learning_rate": 0.01, "loss": 2.0316, "step": 20934 }, { "epoch": 2.1520197348134444, "grad_norm": 0.12518006563186646, "learning_rate": 0.01, "loss": 2.0389, "step": 20937 }, { "epoch": 2.152328091273512, "grad_norm": 0.04442603886127472, "learning_rate": 0.01, "loss": 2.0413, "step": 20940 }, { "epoch": 2.15263644773358, "grad_norm": 0.1175926923751831, "learning_rate": 0.01, "loss": 2.0307, "step": 20943 }, { "epoch": 2.152944804193648, "grad_norm": 0.09045865386724472, "learning_rate": 0.01, "loss": 2.0291, "step": 20946 }, { "epoch": 2.1532531606537155, "grad_norm": 0.07519534230232239, "learning_rate": 0.01, "loss": 2.048, "step": 20949 }, { "epoch": 2.1535615171137836, "grad_norm": 0.03551176190376282, "learning_rate": 0.01, "loss": 2.0396, "step": 20952 }, { "epoch": 2.1538698735738513, "grad_norm": 0.11210478097200394, "learning_rate": 0.01, "loss": 2.0453, "step": 20955 }, { "epoch": 2.1541782300339194, "grad_norm": 0.09921340644359589, "learning_rate": 0.01, "loss": 2.0295, "step": 20958 }, { "epoch": 2.154486586493987, "grad_norm": 0.04131443426012993, "learning_rate": 0.01, "loss": 2.0295, "step": 20961 }, { "epoch": 2.1547949429540547, "grad_norm": 0.07579990476369858, "learning_rate": 0.01, "loss": 2.041, "step": 20964 }, { "epoch": 2.155103299414123, "grad_norm": 0.04955494403839111, "learning_rate": 0.01, "loss": 2.0409, "step": 20967 }, { "epoch": 2.1554116558741905, "grad_norm": 0.038999974727630615, "learning_rate": 0.01, "loss": 2.0267, "step": 20970 }, { "epoch": 2.1557200123342586, "grad_norm": 0.04656311497092247, "learning_rate": 0.01, "loss": 2.0319, "step": 20973 }, { "epoch": 2.1560283687943262, "grad_norm": 0.08853477984666824, "learning_rate": 0.01, "loss": 2.0573, "step": 20976 }, { "epoch": 2.156336725254394, "grad_norm": 0.07657559216022491, "learning_rate": 0.01, "loss": 2.0455, "step": 20979 }, { "epoch": 2.156645081714462, "grad_norm": 0.03593476489186287, "learning_rate": 0.01, "loss": 2.0365, "step": 20982 }, { "epoch": 2.1569534381745297, "grad_norm": 0.08656775951385498, "learning_rate": 0.01, "loss": 2.0286, "step": 20985 }, { "epoch": 2.1572617946345978, "grad_norm": 0.07760220021009445, "learning_rate": 0.01, "loss": 2.068, "step": 20988 }, { "epoch": 2.1575701510946654, "grad_norm": 0.09167666733264923, "learning_rate": 0.01, "loss": 2.0165, "step": 20991 }, { "epoch": 2.157878507554733, "grad_norm": 0.052361227571964264, "learning_rate": 0.01, "loss": 2.0424, "step": 20994 }, { "epoch": 2.158186864014801, "grad_norm": 0.05431961268186569, "learning_rate": 0.01, "loss": 2.0475, "step": 20997 }, { "epoch": 2.158495220474869, "grad_norm": 0.03956061974167824, "learning_rate": 0.01, "loss": 2.0299, "step": 21000 }, { "epoch": 2.158803576934937, "grad_norm": 0.03906402364373207, "learning_rate": 0.01, "loss": 2.0442, "step": 21003 }, { "epoch": 2.1591119333950046, "grad_norm": 0.07876679301261902, "learning_rate": 0.01, "loss": 2.0369, "step": 21006 }, { "epoch": 2.1594202898550723, "grad_norm": 0.051768235862255096, "learning_rate": 0.01, "loss": 2.0404, "step": 21009 }, { "epoch": 2.1597286463151404, "grad_norm": 0.05242472141981125, "learning_rate": 0.01, "loss": 2.0514, "step": 21012 }, { "epoch": 2.160037002775208, "grad_norm": 0.07014864683151245, "learning_rate": 0.01, "loss": 2.0285, "step": 21015 }, { "epoch": 2.160345359235276, "grad_norm": 0.1024969145655632, "learning_rate": 0.01, "loss": 2.031, "step": 21018 }, { "epoch": 2.160653715695344, "grad_norm": 0.059565525501966476, "learning_rate": 0.01, "loss": 2.0334, "step": 21021 }, { "epoch": 2.1609620721554115, "grad_norm": 0.04438649117946625, "learning_rate": 0.01, "loss": 2.0736, "step": 21024 }, { "epoch": 2.1612704286154796, "grad_norm": 0.06552638858556747, "learning_rate": 0.01, "loss": 2.0449, "step": 21027 }, { "epoch": 2.1615787850755472, "grad_norm": 0.044327035546302795, "learning_rate": 0.01, "loss": 2.0512, "step": 21030 }, { "epoch": 2.1618871415356153, "grad_norm": 0.04915094003081322, "learning_rate": 0.01, "loss": 2.0634, "step": 21033 }, { "epoch": 2.162195497995683, "grad_norm": 0.052909743040800095, "learning_rate": 0.01, "loss": 2.0408, "step": 21036 }, { "epoch": 2.1625038544557507, "grad_norm": 0.09883973747491837, "learning_rate": 0.01, "loss": 2.0107, "step": 21039 }, { "epoch": 2.1628122109158188, "grad_norm": 0.07719819992780685, "learning_rate": 0.01, "loss": 2.0357, "step": 21042 }, { "epoch": 2.1631205673758864, "grad_norm": 0.09743590652942657, "learning_rate": 0.01, "loss": 2.0263, "step": 21045 }, { "epoch": 2.1634289238359545, "grad_norm": 0.06583153456449509, "learning_rate": 0.01, "loss": 2.0249, "step": 21048 }, { "epoch": 2.163737280296022, "grad_norm": 0.10464660078287125, "learning_rate": 0.01, "loss": 2.0154, "step": 21051 }, { "epoch": 2.16404563675609, "grad_norm": 0.057555560022592545, "learning_rate": 0.01, "loss": 2.0545, "step": 21054 }, { "epoch": 2.164353993216158, "grad_norm": 0.04753732308745384, "learning_rate": 0.01, "loss": 2.054, "step": 21057 }, { "epoch": 2.1646623496762256, "grad_norm": 0.05236852541565895, "learning_rate": 0.01, "loss": 2.0336, "step": 21060 }, { "epoch": 2.1649707061362937, "grad_norm": 0.05231897532939911, "learning_rate": 0.01, "loss": 2.0303, "step": 21063 }, { "epoch": 2.1652790625963614, "grad_norm": 0.058431778103113174, "learning_rate": 0.01, "loss": 2.0387, "step": 21066 }, { "epoch": 2.165587419056429, "grad_norm": 0.04048459604382515, "learning_rate": 0.01, "loss": 2.0317, "step": 21069 }, { "epoch": 2.165895775516497, "grad_norm": 0.0941488966345787, "learning_rate": 0.01, "loss": 2.0113, "step": 21072 }, { "epoch": 2.166204131976565, "grad_norm": 0.04498032480478287, "learning_rate": 0.01, "loss": 2.0178, "step": 21075 }, { "epoch": 2.166512488436633, "grad_norm": 0.03668253496289253, "learning_rate": 0.01, "loss": 2.0354, "step": 21078 }, { "epoch": 2.1668208448967006, "grad_norm": 0.06661085039377213, "learning_rate": 0.01, "loss": 2.0221, "step": 21081 }, { "epoch": 2.1671292013567682, "grad_norm": 0.06425791233778, "learning_rate": 0.01, "loss": 2.0301, "step": 21084 }, { "epoch": 2.1674375578168363, "grad_norm": 0.046152301132678986, "learning_rate": 0.01, "loss": 2.0287, "step": 21087 }, { "epoch": 2.167745914276904, "grad_norm": 0.12584535777568817, "learning_rate": 0.01, "loss": 2.0364, "step": 21090 }, { "epoch": 2.168054270736972, "grad_norm": 0.04133312404155731, "learning_rate": 0.01, "loss": 2.0403, "step": 21093 }, { "epoch": 2.1683626271970398, "grad_norm": 0.04181768745183945, "learning_rate": 0.01, "loss": 2.0241, "step": 21096 }, { "epoch": 2.1686709836571074, "grad_norm": 0.04648204892873764, "learning_rate": 0.01, "loss": 2.0307, "step": 21099 }, { "epoch": 2.1689793401171755, "grad_norm": 0.044040024280548096, "learning_rate": 0.01, "loss": 2.0495, "step": 21102 }, { "epoch": 2.169287696577243, "grad_norm": 0.04033771902322769, "learning_rate": 0.01, "loss": 2.0496, "step": 21105 }, { "epoch": 2.1695960530373113, "grad_norm": 0.04693743214011192, "learning_rate": 0.01, "loss": 2.0492, "step": 21108 }, { "epoch": 2.169904409497379, "grad_norm": 0.07494813948869705, "learning_rate": 0.01, "loss": 2.0462, "step": 21111 }, { "epoch": 2.1702127659574466, "grad_norm": 0.09254445135593414, "learning_rate": 0.01, "loss": 2.0478, "step": 21114 }, { "epoch": 2.1705211224175147, "grad_norm": 0.13350296020507812, "learning_rate": 0.01, "loss": 2.0277, "step": 21117 }, { "epoch": 2.1708294788775824, "grad_norm": 0.051686085760593414, "learning_rate": 0.01, "loss": 2.0507, "step": 21120 }, { "epoch": 2.1711378353376505, "grad_norm": 0.043330930173397064, "learning_rate": 0.01, "loss": 2.0261, "step": 21123 }, { "epoch": 2.171446191797718, "grad_norm": 0.05219477042555809, "learning_rate": 0.01, "loss": 2.0299, "step": 21126 }, { "epoch": 2.171754548257786, "grad_norm": 0.04195651784539223, "learning_rate": 0.01, "loss": 2.0441, "step": 21129 }, { "epoch": 2.172062904717854, "grad_norm": 0.15802520513534546, "learning_rate": 0.01, "loss": 2.0609, "step": 21132 }, { "epoch": 2.1723712611779216, "grad_norm": 0.08619748800992966, "learning_rate": 0.01, "loss": 2.0542, "step": 21135 }, { "epoch": 2.1726796176379897, "grad_norm": 0.08852280676364899, "learning_rate": 0.01, "loss": 2.0287, "step": 21138 }, { "epoch": 2.1729879740980573, "grad_norm": 0.040646422654390335, "learning_rate": 0.01, "loss": 2.0289, "step": 21141 }, { "epoch": 2.173296330558125, "grad_norm": 0.06913924962282181, "learning_rate": 0.01, "loss": 2.0463, "step": 21144 }, { "epoch": 2.173604687018193, "grad_norm": 0.05461576208472252, "learning_rate": 0.01, "loss": 2.0382, "step": 21147 }, { "epoch": 2.1739130434782608, "grad_norm": 0.08613748103380203, "learning_rate": 0.01, "loss": 2.0412, "step": 21150 }, { "epoch": 2.174221399938329, "grad_norm": 0.06459856033325195, "learning_rate": 0.01, "loss": 2.0445, "step": 21153 }, { "epoch": 2.1745297563983965, "grad_norm": 0.08450223505496979, "learning_rate": 0.01, "loss": 2.0237, "step": 21156 }, { "epoch": 2.174838112858464, "grad_norm": 0.10049585998058319, "learning_rate": 0.01, "loss": 2.0458, "step": 21159 }, { "epoch": 2.1751464693185323, "grad_norm": 0.06031005084514618, "learning_rate": 0.01, "loss": 2.0668, "step": 21162 }, { "epoch": 2.1754548257786, "grad_norm": 0.06921012699604034, "learning_rate": 0.01, "loss": 2.053, "step": 21165 }, { "epoch": 2.175763182238668, "grad_norm": 0.044479697942733765, "learning_rate": 0.01, "loss": 2.0275, "step": 21168 }, { "epoch": 2.1760715386987357, "grad_norm": 0.0857187807559967, "learning_rate": 0.01, "loss": 2.0513, "step": 21171 }, { "epoch": 2.176379895158804, "grad_norm": 0.057432882487773895, "learning_rate": 0.01, "loss": 2.0622, "step": 21174 }, { "epoch": 2.1766882516188715, "grad_norm": 0.0905427411198616, "learning_rate": 0.01, "loss": 2.0574, "step": 21177 }, { "epoch": 2.176996608078939, "grad_norm": 0.05289644002914429, "learning_rate": 0.01, "loss": 2.0348, "step": 21180 }, { "epoch": 2.1773049645390072, "grad_norm": 0.06351148337125778, "learning_rate": 0.01, "loss": 2.0223, "step": 21183 }, { "epoch": 2.177613320999075, "grad_norm": 0.1098824068903923, "learning_rate": 0.01, "loss": 2.0534, "step": 21186 }, { "epoch": 2.1779216774591426, "grad_norm": 0.03698734566569328, "learning_rate": 0.01, "loss": 2.0265, "step": 21189 }, { "epoch": 2.1782300339192107, "grad_norm": 0.09595025330781937, "learning_rate": 0.01, "loss": 2.0596, "step": 21192 }, { "epoch": 2.1785383903792783, "grad_norm": 0.05725647136569023, "learning_rate": 0.01, "loss": 2.0568, "step": 21195 }, { "epoch": 2.1788467468393464, "grad_norm": 0.06952492892742157, "learning_rate": 0.01, "loss": 2.0119, "step": 21198 }, { "epoch": 2.179155103299414, "grad_norm": 0.06831461936235428, "learning_rate": 0.01, "loss": 2.0484, "step": 21201 }, { "epoch": 2.179463459759482, "grad_norm": 0.05125569924712181, "learning_rate": 0.01, "loss": 2.0593, "step": 21204 }, { "epoch": 2.17977181621955, "grad_norm": 0.053290076553821564, "learning_rate": 0.01, "loss": 2.0576, "step": 21207 }, { "epoch": 2.1800801726796175, "grad_norm": 0.05718007683753967, "learning_rate": 0.01, "loss": 2.0472, "step": 21210 }, { "epoch": 2.1803885291396856, "grad_norm": 0.0895228236913681, "learning_rate": 0.01, "loss": 2.0673, "step": 21213 }, { "epoch": 2.1806968855997533, "grad_norm": 0.05205732583999634, "learning_rate": 0.01, "loss": 2.0366, "step": 21216 }, { "epoch": 2.181005242059821, "grad_norm": 0.05785641819238663, "learning_rate": 0.01, "loss": 2.0557, "step": 21219 }, { "epoch": 2.181313598519889, "grad_norm": 0.0756557285785675, "learning_rate": 0.01, "loss": 2.0431, "step": 21222 }, { "epoch": 2.1816219549799567, "grad_norm": 0.07016823440790176, "learning_rate": 0.01, "loss": 2.0508, "step": 21225 }, { "epoch": 2.181930311440025, "grad_norm": 0.06859459728002548, "learning_rate": 0.01, "loss": 2.0418, "step": 21228 }, { "epoch": 2.1822386679000925, "grad_norm": 0.14140251278877258, "learning_rate": 0.01, "loss": 2.0473, "step": 21231 }, { "epoch": 2.1825470243601606, "grad_norm": 0.0544712096452713, "learning_rate": 0.01, "loss": 2.04, "step": 21234 }, { "epoch": 2.1828553808202282, "grad_norm": 0.04346593841910362, "learning_rate": 0.01, "loss": 2.0516, "step": 21237 }, { "epoch": 2.183163737280296, "grad_norm": 0.04261700063943863, "learning_rate": 0.01, "loss": 2.0431, "step": 21240 }, { "epoch": 2.183472093740364, "grad_norm": 0.04763154685497284, "learning_rate": 0.01, "loss": 2.0196, "step": 21243 }, { "epoch": 2.1837804502004317, "grad_norm": 0.06876801699399948, "learning_rate": 0.01, "loss": 2.0343, "step": 21246 }, { "epoch": 2.1840888066604993, "grad_norm": 0.07819974422454834, "learning_rate": 0.01, "loss": 2.0506, "step": 21249 }, { "epoch": 2.1843971631205674, "grad_norm": 0.06239667907357216, "learning_rate": 0.01, "loss": 2.0268, "step": 21252 }, { "epoch": 2.184705519580635, "grad_norm": 0.1095786988735199, "learning_rate": 0.01, "loss": 2.0398, "step": 21255 }, { "epoch": 2.185013876040703, "grad_norm": 0.055070340633392334, "learning_rate": 0.01, "loss": 2.0523, "step": 21258 }, { "epoch": 2.185322232500771, "grad_norm": 0.08038482069969177, "learning_rate": 0.01, "loss": 2.0222, "step": 21261 }, { "epoch": 2.185630588960839, "grad_norm": 0.06929390877485275, "learning_rate": 0.01, "loss": 2.0405, "step": 21264 }, { "epoch": 2.1859389454209066, "grad_norm": 0.054179031401872635, "learning_rate": 0.01, "loss": 2.0554, "step": 21267 }, { "epoch": 2.1862473018809743, "grad_norm": 0.06956303864717484, "learning_rate": 0.01, "loss": 2.0586, "step": 21270 }, { "epoch": 2.1865556583410424, "grad_norm": 0.14279653131961823, "learning_rate": 0.01, "loss": 2.0381, "step": 21273 }, { "epoch": 2.18686401480111, "grad_norm": 0.04268357530236244, "learning_rate": 0.01, "loss": 2.0404, "step": 21276 }, { "epoch": 2.1871723712611777, "grad_norm": 0.05189354717731476, "learning_rate": 0.01, "loss": 2.0405, "step": 21279 }, { "epoch": 2.187480727721246, "grad_norm": 0.05047158896923065, "learning_rate": 0.01, "loss": 2.0495, "step": 21282 }, { "epoch": 2.1877890841813135, "grad_norm": 0.041077565401792526, "learning_rate": 0.01, "loss": 2.0558, "step": 21285 }, { "epoch": 2.1880974406413816, "grad_norm": 0.03880000859498978, "learning_rate": 0.01, "loss": 2.0472, "step": 21288 }, { "epoch": 2.1884057971014492, "grad_norm": 0.1096898689866066, "learning_rate": 0.01, "loss": 2.0537, "step": 21291 }, { "epoch": 2.1887141535615173, "grad_norm": 0.04502374678850174, "learning_rate": 0.01, "loss": 2.0397, "step": 21294 }, { "epoch": 2.189022510021585, "grad_norm": 0.037158042192459106, "learning_rate": 0.01, "loss": 2.0117, "step": 21297 }, { "epoch": 2.1893308664816526, "grad_norm": 0.03381425887346268, "learning_rate": 0.01, "loss": 2.0297, "step": 21300 }, { "epoch": 2.1896392229417208, "grad_norm": 0.05572035536170006, "learning_rate": 0.01, "loss": 2.0575, "step": 21303 }, { "epoch": 2.1899475794017884, "grad_norm": 0.06287326663732529, "learning_rate": 0.01, "loss": 2.0341, "step": 21306 }, { "epoch": 2.190255935861856, "grad_norm": 0.07691732794046402, "learning_rate": 0.01, "loss": 2.0317, "step": 21309 }, { "epoch": 2.190564292321924, "grad_norm": 0.058651309460401535, "learning_rate": 0.01, "loss": 2.04, "step": 21312 }, { "epoch": 2.190872648781992, "grad_norm": 0.033279962837696075, "learning_rate": 0.01, "loss": 2.0325, "step": 21315 }, { "epoch": 2.19118100524206, "grad_norm": 0.08742087334394455, "learning_rate": 0.01, "loss": 2.0303, "step": 21318 }, { "epoch": 2.1914893617021276, "grad_norm": 0.0864923968911171, "learning_rate": 0.01, "loss": 2.0492, "step": 21321 }, { "epoch": 2.1917977181621957, "grad_norm": 0.10759606957435608, "learning_rate": 0.01, "loss": 2.0582, "step": 21324 }, { "epoch": 2.1921060746222634, "grad_norm": 0.058335281908512115, "learning_rate": 0.01, "loss": 2.019, "step": 21327 }, { "epoch": 2.192414431082331, "grad_norm": 0.04506481811404228, "learning_rate": 0.01, "loss": 2.027, "step": 21330 }, { "epoch": 2.192722787542399, "grad_norm": 0.0454195998609066, "learning_rate": 0.01, "loss": 2.0297, "step": 21333 }, { "epoch": 2.193031144002467, "grad_norm": 0.051547158509492874, "learning_rate": 0.01, "loss": 1.9814, "step": 21336 }, { "epoch": 2.193339500462535, "grad_norm": 0.09826447069644928, "learning_rate": 0.01, "loss": 2.0518, "step": 21339 }, { "epoch": 2.1936478569226026, "grad_norm": 0.05799272283911705, "learning_rate": 0.01, "loss": 2.0649, "step": 21342 }, { "epoch": 2.19395621338267, "grad_norm": 0.12493482232093811, "learning_rate": 0.01, "loss": 2.0257, "step": 21345 }, { "epoch": 2.1942645698427383, "grad_norm": 0.04930184409022331, "learning_rate": 0.01, "loss": 2.0228, "step": 21348 }, { "epoch": 2.194572926302806, "grad_norm": 0.04257272928953171, "learning_rate": 0.01, "loss": 2.0579, "step": 21351 }, { "epoch": 2.194881282762874, "grad_norm": 0.04625258222222328, "learning_rate": 0.01, "loss": 2.0244, "step": 21354 }, { "epoch": 2.1951896392229417, "grad_norm": 0.04295830428600311, "learning_rate": 0.01, "loss": 2.032, "step": 21357 }, { "epoch": 2.1954979956830094, "grad_norm": 0.21663565933704376, "learning_rate": 0.01, "loss": 2.0489, "step": 21360 }, { "epoch": 2.1958063521430775, "grad_norm": 0.15513652563095093, "learning_rate": 0.01, "loss": 2.0473, "step": 21363 }, { "epoch": 2.196114708603145, "grad_norm": 0.08245841413736343, "learning_rate": 0.01, "loss": 2.0402, "step": 21366 }, { "epoch": 2.1964230650632133, "grad_norm": 0.03768035024404526, "learning_rate": 0.01, "loss": 2.0444, "step": 21369 }, { "epoch": 2.196731421523281, "grad_norm": 0.0586925707757473, "learning_rate": 0.01, "loss": 2.0299, "step": 21372 }, { "epoch": 2.1970397779833486, "grad_norm": 0.045760899782180786, "learning_rate": 0.01, "loss": 2.0484, "step": 21375 }, { "epoch": 2.1973481344434167, "grad_norm": 0.04357283189892769, "learning_rate": 0.01, "loss": 2.0231, "step": 21378 }, { "epoch": 2.1976564909034844, "grad_norm": 0.04477246478199959, "learning_rate": 0.01, "loss": 2.0211, "step": 21381 }, { "epoch": 2.1979648473635525, "grad_norm": 0.06785521656274796, "learning_rate": 0.01, "loss": 2.0532, "step": 21384 }, { "epoch": 2.19827320382362, "grad_norm": 0.04677508771419525, "learning_rate": 0.01, "loss": 2.0222, "step": 21387 }, { "epoch": 2.198581560283688, "grad_norm": 0.049355942755937576, "learning_rate": 0.01, "loss": 2.0488, "step": 21390 }, { "epoch": 2.198889916743756, "grad_norm": 0.11005277186632156, "learning_rate": 0.01, "loss": 2.0198, "step": 21393 }, { "epoch": 2.1991982732038236, "grad_norm": 0.10170245915651321, "learning_rate": 0.01, "loss": 2.0021, "step": 21396 }, { "epoch": 2.1995066296638917, "grad_norm": 0.045158207416534424, "learning_rate": 0.01, "loss": 2.0462, "step": 21399 }, { "epoch": 2.1998149861239593, "grad_norm": 0.0780436098575592, "learning_rate": 0.01, "loss": 2.0292, "step": 21402 }, { "epoch": 2.200123342584027, "grad_norm": 0.06062453240156174, "learning_rate": 0.01, "loss": 2.0437, "step": 21405 }, { "epoch": 2.200431699044095, "grad_norm": 0.09208519756793976, "learning_rate": 0.01, "loss": 2.0378, "step": 21408 }, { "epoch": 2.2007400555041627, "grad_norm": 0.05279922112822533, "learning_rate": 0.01, "loss": 2.0521, "step": 21411 }, { "epoch": 2.201048411964231, "grad_norm": 0.0831415057182312, "learning_rate": 0.01, "loss": 2.0394, "step": 21414 }, { "epoch": 2.2013567684242985, "grad_norm": 0.06481669098138809, "learning_rate": 0.01, "loss": 2.0119, "step": 21417 }, { "epoch": 2.201665124884366, "grad_norm": 0.08297551423311234, "learning_rate": 0.01, "loss": 2.0741, "step": 21420 }, { "epoch": 2.2019734813444343, "grad_norm": 0.06962350755929947, "learning_rate": 0.01, "loss": 2.0434, "step": 21423 }, { "epoch": 2.202281837804502, "grad_norm": 0.08055757731199265, "learning_rate": 0.01, "loss": 2.0334, "step": 21426 }, { "epoch": 2.20259019426457, "grad_norm": 0.09755895286798477, "learning_rate": 0.01, "loss": 2.0443, "step": 21429 }, { "epoch": 2.2028985507246377, "grad_norm": 0.04400679096579552, "learning_rate": 0.01, "loss": 2.0194, "step": 21432 }, { "epoch": 2.2032069071847054, "grad_norm": 0.04204344376921654, "learning_rate": 0.01, "loss": 2.063, "step": 21435 }, { "epoch": 2.2035152636447735, "grad_norm": 0.029974184930324554, "learning_rate": 0.01, "loss": 2.0211, "step": 21438 }, { "epoch": 2.203823620104841, "grad_norm": 0.07232589274644852, "learning_rate": 0.01, "loss": 2.0254, "step": 21441 }, { "epoch": 2.2041319765649092, "grad_norm": 0.06404844671487808, "learning_rate": 0.01, "loss": 2.0596, "step": 21444 }, { "epoch": 2.204440333024977, "grad_norm": 0.08751700818538666, "learning_rate": 0.01, "loss": 2.0576, "step": 21447 }, { "epoch": 2.2047486894850445, "grad_norm": 0.0371503084897995, "learning_rate": 0.01, "loss": 2.0424, "step": 21450 }, { "epoch": 2.2050570459451126, "grad_norm": 0.06034844368696213, "learning_rate": 0.01, "loss": 2.0306, "step": 21453 }, { "epoch": 2.2053654024051803, "grad_norm": 0.04261939972639084, "learning_rate": 0.01, "loss": 2.0323, "step": 21456 }, { "epoch": 2.2056737588652484, "grad_norm": 0.0612785667181015, "learning_rate": 0.01, "loss": 2.0349, "step": 21459 }, { "epoch": 2.205982115325316, "grad_norm": 0.05828654393553734, "learning_rate": 0.01, "loss": 2.0231, "step": 21462 }, { "epoch": 2.2062904717853837, "grad_norm": 0.06474754214286804, "learning_rate": 0.01, "loss": 2.0585, "step": 21465 }, { "epoch": 2.206598828245452, "grad_norm": 0.04646962508559227, "learning_rate": 0.01, "loss": 2.0337, "step": 21468 }, { "epoch": 2.2069071847055195, "grad_norm": 0.07051596790552139, "learning_rate": 0.01, "loss": 2.0218, "step": 21471 }, { "epoch": 2.2072155411655876, "grad_norm": 0.05658755078911781, "learning_rate": 0.01, "loss": 2.0347, "step": 21474 }, { "epoch": 2.2075238976256553, "grad_norm": 0.039348311722278595, "learning_rate": 0.01, "loss": 2.0587, "step": 21477 }, { "epoch": 2.207832254085723, "grad_norm": 0.030550241470336914, "learning_rate": 0.01, "loss": 2.0412, "step": 21480 }, { "epoch": 2.208140610545791, "grad_norm": 0.11341346800327301, "learning_rate": 0.01, "loss": 2.0416, "step": 21483 }, { "epoch": 2.2084489670058587, "grad_norm": 0.07061111927032471, "learning_rate": 0.01, "loss": 2.0117, "step": 21486 }, { "epoch": 2.208757323465927, "grad_norm": 0.10256624966859818, "learning_rate": 0.01, "loss": 2.0482, "step": 21489 }, { "epoch": 2.2090656799259945, "grad_norm": 0.06658724695444107, "learning_rate": 0.01, "loss": 2.0544, "step": 21492 }, { "epoch": 2.209374036386062, "grad_norm": 0.12220150977373123, "learning_rate": 0.01, "loss": 2.0399, "step": 21495 }, { "epoch": 2.20968239284613, "grad_norm": 0.05570116639137268, "learning_rate": 0.01, "loss": 2.0238, "step": 21498 }, { "epoch": 2.209990749306198, "grad_norm": 0.04273837059736252, "learning_rate": 0.01, "loss": 2.032, "step": 21501 }, { "epoch": 2.210299105766266, "grad_norm": 0.04138748720288277, "learning_rate": 0.01, "loss": 2.0497, "step": 21504 }, { "epoch": 2.2106074622263336, "grad_norm": 0.08082164078950882, "learning_rate": 0.01, "loss": 2.025, "step": 21507 }, { "epoch": 2.2109158186864013, "grad_norm": 0.06663049012422562, "learning_rate": 0.01, "loss": 2.0298, "step": 21510 }, { "epoch": 2.2112241751464694, "grad_norm": 0.0444667711853981, "learning_rate": 0.01, "loss": 2.0469, "step": 21513 }, { "epoch": 2.211532531606537, "grad_norm": 0.04407314583659172, "learning_rate": 0.01, "loss": 2.0667, "step": 21516 }, { "epoch": 2.211840888066605, "grad_norm": 0.03877383843064308, "learning_rate": 0.01, "loss": 2.0329, "step": 21519 }, { "epoch": 2.212149244526673, "grad_norm": 0.059297189116477966, "learning_rate": 0.01, "loss": 2.0441, "step": 21522 }, { "epoch": 2.2124576009867405, "grad_norm": 0.06609878689050674, "learning_rate": 0.01, "loss": 2.0263, "step": 21525 }, { "epoch": 2.2127659574468086, "grad_norm": 0.06935823708772659, "learning_rate": 0.01, "loss": 2.0295, "step": 21528 }, { "epoch": 2.2130743139068763, "grad_norm": 0.07610715180635452, "learning_rate": 0.01, "loss": 2.0424, "step": 21531 }, { "epoch": 2.2133826703669444, "grad_norm": 0.10587569326162338, "learning_rate": 0.01, "loss": 2.0636, "step": 21534 }, { "epoch": 2.213691026827012, "grad_norm": 0.05116620659828186, "learning_rate": 0.01, "loss": 2.0232, "step": 21537 }, { "epoch": 2.2139993832870797, "grad_norm": 0.03773776814341545, "learning_rate": 0.01, "loss": 2.0438, "step": 21540 }, { "epoch": 2.214307739747148, "grad_norm": 0.05412130430340767, "learning_rate": 0.01, "loss": 2.0522, "step": 21543 }, { "epoch": 2.2146160962072154, "grad_norm": 0.03664164990186691, "learning_rate": 0.01, "loss": 2.0535, "step": 21546 }, { "epoch": 2.2149244526672835, "grad_norm": 0.04415920004248619, "learning_rate": 0.01, "loss": 2.0113, "step": 21549 }, { "epoch": 2.215232809127351, "grad_norm": 0.05737615004181862, "learning_rate": 0.01, "loss": 2.0397, "step": 21552 }, { "epoch": 2.215541165587419, "grad_norm": 0.032385457307100296, "learning_rate": 0.01, "loss": 2.0325, "step": 21555 }, { "epoch": 2.215849522047487, "grad_norm": 0.0982925221323967, "learning_rate": 0.01, "loss": 2.0653, "step": 21558 }, { "epoch": 2.2161578785075546, "grad_norm": 0.03911735862493515, "learning_rate": 0.01, "loss": 2.0346, "step": 21561 }, { "epoch": 2.2164662349676227, "grad_norm": 0.07814744859933853, "learning_rate": 0.01, "loss": 2.0445, "step": 21564 }, { "epoch": 2.2167745914276904, "grad_norm": 0.05368256941437721, "learning_rate": 0.01, "loss": 2.0689, "step": 21567 }, { "epoch": 2.217082947887758, "grad_norm": 0.046178530901670456, "learning_rate": 0.01, "loss": 2.0402, "step": 21570 }, { "epoch": 2.217391304347826, "grad_norm": 0.047109801322221756, "learning_rate": 0.01, "loss": 2.0721, "step": 21573 }, { "epoch": 2.217699660807894, "grad_norm": 0.05443650484085083, "learning_rate": 0.01, "loss": 2.0416, "step": 21576 }, { "epoch": 2.218008017267962, "grad_norm": 0.13156400620937347, "learning_rate": 0.01, "loss": 2.0581, "step": 21579 }, { "epoch": 2.2183163737280296, "grad_norm": 0.04178638756275177, "learning_rate": 0.01, "loss": 2.0517, "step": 21582 }, { "epoch": 2.2186247301880972, "grad_norm": 0.042627740651369095, "learning_rate": 0.01, "loss": 2.0541, "step": 21585 }, { "epoch": 2.2189330866481654, "grad_norm": 0.05318658426403999, "learning_rate": 0.01, "loss": 2.0361, "step": 21588 }, { "epoch": 2.219241443108233, "grad_norm": 0.061288055032491684, "learning_rate": 0.01, "loss": 2.0425, "step": 21591 }, { "epoch": 2.219549799568301, "grad_norm": 0.06663260608911514, "learning_rate": 0.01, "loss": 2.0549, "step": 21594 }, { "epoch": 2.219858156028369, "grad_norm": 0.04567466303706169, "learning_rate": 0.01, "loss": 2.03, "step": 21597 }, { "epoch": 2.2201665124884364, "grad_norm": 0.12566886842250824, "learning_rate": 0.01, "loss": 2.0527, "step": 21600 }, { "epoch": 2.2204748689485045, "grad_norm": 0.03933155536651611, "learning_rate": 0.01, "loss": 2.022, "step": 21603 }, { "epoch": 2.220783225408572, "grad_norm": 0.04617391526699066, "learning_rate": 0.01, "loss": 2.0109, "step": 21606 }, { "epoch": 2.2210915818686403, "grad_norm": 0.05472411960363388, "learning_rate": 0.01, "loss": 2.0412, "step": 21609 }, { "epoch": 2.221399938328708, "grad_norm": 0.05556654930114746, "learning_rate": 0.01, "loss": 2.0214, "step": 21612 }, { "epoch": 2.2217082947887756, "grad_norm": 0.05096900090575218, "learning_rate": 0.01, "loss": 2.0524, "step": 21615 }, { "epoch": 2.2220166512488437, "grad_norm": 0.039425577968358994, "learning_rate": 0.01, "loss": 2.0368, "step": 21618 }, { "epoch": 2.2223250077089114, "grad_norm": 0.05080854892730713, "learning_rate": 0.01, "loss": 2.0494, "step": 21621 }, { "epoch": 2.2226333641689795, "grad_norm": 0.03824865445494652, "learning_rate": 0.01, "loss": 2.0497, "step": 21624 }, { "epoch": 2.222941720629047, "grad_norm": 0.03814932331442833, "learning_rate": 0.01, "loss": 2.0388, "step": 21627 }, { "epoch": 2.223250077089115, "grad_norm": 0.057797808200120926, "learning_rate": 0.01, "loss": 2.046, "step": 21630 }, { "epoch": 2.223558433549183, "grad_norm": 0.06596177071332932, "learning_rate": 0.01, "loss": 2.0572, "step": 21633 }, { "epoch": 2.2238667900092506, "grad_norm": 0.13438032567501068, "learning_rate": 0.01, "loss": 2.04, "step": 21636 }, { "epoch": 2.2241751464693187, "grad_norm": 0.06200256571173668, "learning_rate": 0.01, "loss": 2.0563, "step": 21639 }, { "epoch": 2.2244835029293863, "grad_norm": 0.06471807509660721, "learning_rate": 0.01, "loss": 2.0264, "step": 21642 }, { "epoch": 2.224791859389454, "grad_norm": 0.06439206004142761, "learning_rate": 0.01, "loss": 2.0425, "step": 21645 }, { "epoch": 2.225100215849522, "grad_norm": 0.0768360123038292, "learning_rate": 0.01, "loss": 2.0494, "step": 21648 }, { "epoch": 2.2254085723095898, "grad_norm": 0.10393831878900528, "learning_rate": 0.01, "loss": 2.0455, "step": 21651 }, { "epoch": 2.225716928769658, "grad_norm": 0.03999519720673561, "learning_rate": 0.01, "loss": 2.0168, "step": 21654 }, { "epoch": 2.2260252852297255, "grad_norm": 0.04620358720421791, "learning_rate": 0.01, "loss": 2.0576, "step": 21657 }, { "epoch": 2.226333641689793, "grad_norm": 0.05364964157342911, "learning_rate": 0.01, "loss": 2.0409, "step": 21660 }, { "epoch": 2.2266419981498613, "grad_norm": 0.049792349338531494, "learning_rate": 0.01, "loss": 2.0315, "step": 21663 }, { "epoch": 2.226950354609929, "grad_norm": 0.0651509091258049, "learning_rate": 0.01, "loss": 2.0176, "step": 21666 }, { "epoch": 2.227258711069997, "grad_norm": 0.035688720643520355, "learning_rate": 0.01, "loss": 2.0509, "step": 21669 }, { "epoch": 2.2275670675300647, "grad_norm": 0.06412792950868607, "learning_rate": 0.01, "loss": 2.0007, "step": 21672 }, { "epoch": 2.2278754239901324, "grad_norm": 0.08088821917772293, "learning_rate": 0.01, "loss": 2.0421, "step": 21675 }, { "epoch": 2.2281837804502005, "grad_norm": 0.060282256454229355, "learning_rate": 0.01, "loss": 2.0361, "step": 21678 }, { "epoch": 2.228492136910268, "grad_norm": 0.09816791117191315, "learning_rate": 0.01, "loss": 2.0317, "step": 21681 }, { "epoch": 2.2288004933703363, "grad_norm": 0.06672241538763046, "learning_rate": 0.01, "loss": 2.0559, "step": 21684 }, { "epoch": 2.229108849830404, "grad_norm": 0.06586040556430817, "learning_rate": 0.01, "loss": 2.0375, "step": 21687 }, { "epoch": 2.2294172062904716, "grad_norm": 0.0655137374997139, "learning_rate": 0.01, "loss": 2.0189, "step": 21690 }, { "epoch": 2.2297255627505397, "grad_norm": 0.08448750525712967, "learning_rate": 0.01, "loss": 2.0292, "step": 21693 }, { "epoch": 2.2300339192106073, "grad_norm": 0.05828822776675224, "learning_rate": 0.01, "loss": 2.043, "step": 21696 }, { "epoch": 2.2303422756706754, "grad_norm": 0.0816628485918045, "learning_rate": 0.01, "loss": 2.0362, "step": 21699 }, { "epoch": 2.230650632130743, "grad_norm": 0.038368817418813705, "learning_rate": 0.01, "loss": 2.029, "step": 21702 }, { "epoch": 2.230958988590811, "grad_norm": 0.08584286272525787, "learning_rate": 0.01, "loss": 2.0183, "step": 21705 }, { "epoch": 2.231267345050879, "grad_norm": 0.08528412878513336, "learning_rate": 0.01, "loss": 2.0185, "step": 21708 }, { "epoch": 2.2315757015109465, "grad_norm": 0.07158780843019485, "learning_rate": 0.01, "loss": 2.0437, "step": 21711 }, { "epoch": 2.2318840579710146, "grad_norm": 0.07810889184474945, "learning_rate": 0.01, "loss": 2.0374, "step": 21714 }, { "epoch": 2.2321924144310823, "grad_norm": 0.07769618928432465, "learning_rate": 0.01, "loss": 2.038, "step": 21717 }, { "epoch": 2.23250077089115, "grad_norm": 0.04515406861901283, "learning_rate": 0.01, "loss": 2.0307, "step": 21720 }, { "epoch": 2.232809127351218, "grad_norm": 0.11805865168571472, "learning_rate": 0.01, "loss": 2.0203, "step": 21723 }, { "epoch": 2.2331174838112857, "grad_norm": 0.11899860948324203, "learning_rate": 0.01, "loss": 2.0184, "step": 21726 }, { "epoch": 2.233425840271354, "grad_norm": 0.03309144452214241, "learning_rate": 0.01, "loss": 2.0307, "step": 21729 }, { "epoch": 2.2337341967314215, "grad_norm": 0.03389447182416916, "learning_rate": 0.01, "loss": 2.0257, "step": 21732 }, { "epoch": 2.2340425531914896, "grad_norm": 0.03472166880965233, "learning_rate": 0.01, "loss": 2.0386, "step": 21735 }, { "epoch": 2.2343509096515572, "grad_norm": 0.11815425753593445, "learning_rate": 0.01, "loss": 2.0302, "step": 21738 }, { "epoch": 2.234659266111625, "grad_norm": 0.08007802814245224, "learning_rate": 0.01, "loss": 2.0047, "step": 21741 }, { "epoch": 2.234967622571693, "grad_norm": 0.05053863301873207, "learning_rate": 0.01, "loss": 2.0119, "step": 21744 }, { "epoch": 2.2352759790317607, "grad_norm": 0.12116878479719162, "learning_rate": 0.01, "loss": 2.0189, "step": 21747 }, { "epoch": 2.2355843354918283, "grad_norm": 0.06733332574367523, "learning_rate": 0.01, "loss": 2.0441, "step": 21750 }, { "epoch": 2.2358926919518964, "grad_norm": 0.05141659080982208, "learning_rate": 0.01, "loss": 2.0315, "step": 21753 }, { "epoch": 2.236201048411964, "grad_norm": 0.03695325180888176, "learning_rate": 0.01, "loss": 2.0249, "step": 21756 }, { "epoch": 2.236509404872032, "grad_norm": 0.033388737589120865, "learning_rate": 0.01, "loss": 2.0238, "step": 21759 }, { "epoch": 2.2368177613321, "grad_norm": 0.03913639858365059, "learning_rate": 0.01, "loss": 2.0286, "step": 21762 }, { "epoch": 2.237126117792168, "grad_norm": 0.048513270914554596, "learning_rate": 0.01, "loss": 2.0277, "step": 21765 }, { "epoch": 2.2374344742522356, "grad_norm": 0.061340250074863434, "learning_rate": 0.01, "loss": 2.0295, "step": 21768 }, { "epoch": 2.2377428307123033, "grad_norm": 0.042784787714481354, "learning_rate": 0.01, "loss": 2.0306, "step": 21771 }, { "epoch": 2.2380511871723714, "grad_norm": 0.04124082252383232, "learning_rate": 0.01, "loss": 2.0478, "step": 21774 }, { "epoch": 2.238359543632439, "grad_norm": 0.03749304264783859, "learning_rate": 0.01, "loss": 2.0067, "step": 21777 }, { "epoch": 2.2386679000925067, "grad_norm": 0.12073452770709991, "learning_rate": 0.01, "loss": 2.0382, "step": 21780 }, { "epoch": 2.238976256552575, "grad_norm": 0.06613750755786896, "learning_rate": 0.01, "loss": 2.066, "step": 21783 }, { "epoch": 2.2392846130126425, "grad_norm": 0.11562148481607437, "learning_rate": 0.01, "loss": 2.0542, "step": 21786 }, { "epoch": 2.2395929694727106, "grad_norm": 0.06355132162570953, "learning_rate": 0.01, "loss": 2.0439, "step": 21789 }, { "epoch": 2.2399013259327782, "grad_norm": 0.09228016436100006, "learning_rate": 0.01, "loss": 2.066, "step": 21792 }, { "epoch": 2.2402096823928463, "grad_norm": 0.07648111879825592, "learning_rate": 0.01, "loss": 2.0334, "step": 21795 }, { "epoch": 2.240518038852914, "grad_norm": 0.0650177150964737, "learning_rate": 0.01, "loss": 2.0415, "step": 21798 }, { "epoch": 2.2408263953129817, "grad_norm": 0.09675853699445724, "learning_rate": 0.01, "loss": 2.0483, "step": 21801 }, { "epoch": 2.2411347517730498, "grad_norm": 0.045449864119291306, "learning_rate": 0.01, "loss": 2.0415, "step": 21804 }, { "epoch": 2.2414431082331174, "grad_norm": 0.0982547327876091, "learning_rate": 0.01, "loss": 2.0369, "step": 21807 }, { "epoch": 2.241751464693185, "grad_norm": 0.060201246291399, "learning_rate": 0.01, "loss": 2.0366, "step": 21810 }, { "epoch": 2.242059821153253, "grad_norm": 0.04503628611564636, "learning_rate": 0.01, "loss": 2.0411, "step": 21813 }, { "epoch": 2.242368177613321, "grad_norm": 0.04854295030236244, "learning_rate": 0.01, "loss": 2.0485, "step": 21816 }, { "epoch": 2.242676534073389, "grad_norm": 0.12228553742170334, "learning_rate": 0.01, "loss": 2.0204, "step": 21819 }, { "epoch": 2.2429848905334566, "grad_norm": 0.10491406172513962, "learning_rate": 0.01, "loss": 2.0546, "step": 21822 }, { "epoch": 2.2432932469935247, "grad_norm": 0.1188177838921547, "learning_rate": 0.01, "loss": 2.0324, "step": 21825 }, { "epoch": 2.2436016034535924, "grad_norm": 0.10877541452646255, "learning_rate": 0.01, "loss": 2.0433, "step": 21828 }, { "epoch": 2.24390995991366, "grad_norm": 0.03375115245580673, "learning_rate": 0.01, "loss": 2.0475, "step": 21831 }, { "epoch": 2.244218316373728, "grad_norm": 0.07588639855384827, "learning_rate": 0.01, "loss": 2.0192, "step": 21834 }, { "epoch": 2.244526672833796, "grad_norm": 0.04841979220509529, "learning_rate": 0.01, "loss": 2.0324, "step": 21837 }, { "epoch": 2.2448350292938635, "grad_norm": 0.041367027908563614, "learning_rate": 0.01, "loss": 2.0436, "step": 21840 }, { "epoch": 2.2451433857539316, "grad_norm": 0.044548399746418, "learning_rate": 0.01, "loss": 2.0349, "step": 21843 }, { "epoch": 2.2454517422139992, "grad_norm": 0.049429114907979965, "learning_rate": 0.01, "loss": 2.007, "step": 21846 }, { "epoch": 2.2457600986740673, "grad_norm": 0.07232322543859482, "learning_rate": 0.01, "loss": 2.028, "step": 21849 }, { "epoch": 2.246068455134135, "grad_norm": 0.05866502597928047, "learning_rate": 0.01, "loss": 2.0255, "step": 21852 }, { "epoch": 2.246376811594203, "grad_norm": 0.08406642079353333, "learning_rate": 0.01, "loss": 2.045, "step": 21855 }, { "epoch": 2.2466851680542708, "grad_norm": 0.08692225068807602, "learning_rate": 0.01, "loss": 2.0074, "step": 21858 }, { "epoch": 2.2469935245143384, "grad_norm": 0.09182562679052353, "learning_rate": 0.01, "loss": 2.0245, "step": 21861 }, { "epoch": 2.2473018809744065, "grad_norm": 0.07580506801605225, "learning_rate": 0.01, "loss": 2.0281, "step": 21864 }, { "epoch": 2.247610237434474, "grad_norm": 0.09604424238204956, "learning_rate": 0.01, "loss": 2.054, "step": 21867 }, { "epoch": 2.2479185938945423, "grad_norm": 0.05383382737636566, "learning_rate": 0.01, "loss": 2.0316, "step": 21870 }, { "epoch": 2.24822695035461, "grad_norm": 0.035557687282562256, "learning_rate": 0.01, "loss": 2.0246, "step": 21873 }, { "epoch": 2.2485353068146776, "grad_norm": 0.0330585315823555, "learning_rate": 0.01, "loss": 2.0335, "step": 21876 }, { "epoch": 2.2488436632747457, "grad_norm": 0.07125352323055267, "learning_rate": 0.01, "loss": 2.0058, "step": 21879 }, { "epoch": 2.2491520197348134, "grad_norm": 0.07722420245409012, "learning_rate": 0.01, "loss": 2.0428, "step": 21882 }, { "epoch": 2.2494603761948815, "grad_norm": 0.05086112394928932, "learning_rate": 0.01, "loss": 2.0444, "step": 21885 }, { "epoch": 2.249768732654949, "grad_norm": 0.055818263441324234, "learning_rate": 0.01, "loss": 2.0381, "step": 21888 }, { "epoch": 2.250077089115017, "grad_norm": 0.06524482369422913, "learning_rate": 0.01, "loss": 2.0543, "step": 21891 }, { "epoch": 2.250385445575085, "grad_norm": 0.051445212215185165, "learning_rate": 0.01, "loss": 2.0594, "step": 21894 }, { "epoch": 2.2506938020351526, "grad_norm": 0.057153645902872086, "learning_rate": 0.01, "loss": 2.0113, "step": 21897 }, { "epoch": 2.2510021584952202, "grad_norm": 0.08242445439100266, "learning_rate": 0.01, "loss": 2.0465, "step": 21900 }, { "epoch": 2.2513105149552883, "grad_norm": 0.06253752112388611, "learning_rate": 0.01, "loss": 2.0405, "step": 21903 }, { "epoch": 2.251618871415356, "grad_norm": 0.05388238653540611, "learning_rate": 0.01, "loss": 2.0548, "step": 21906 }, { "epoch": 2.251927227875424, "grad_norm": 0.05570909380912781, "learning_rate": 0.01, "loss": 2.0474, "step": 21909 }, { "epoch": 2.2522355843354918, "grad_norm": 0.038132745772600174, "learning_rate": 0.01, "loss": 2.0473, "step": 21912 }, { "epoch": 2.25254394079556, "grad_norm": 0.07434429973363876, "learning_rate": 0.01, "loss": 2.0306, "step": 21915 }, { "epoch": 2.2528522972556275, "grad_norm": 0.05396199971437454, "learning_rate": 0.01, "loss": 2.0239, "step": 21918 }, { "epoch": 2.253160653715695, "grad_norm": 0.06541625410318375, "learning_rate": 0.01, "loss": 2.0513, "step": 21921 }, { "epoch": 2.2534690101757633, "grad_norm": 0.07078956067562103, "learning_rate": 0.01, "loss": 2.0333, "step": 21924 }, { "epoch": 2.253777366635831, "grad_norm": 0.06990516930818558, "learning_rate": 0.01, "loss": 2.0078, "step": 21927 }, { "epoch": 2.254085723095899, "grad_norm": 0.049635425209999084, "learning_rate": 0.01, "loss": 2.0283, "step": 21930 }, { "epoch": 2.2543940795559667, "grad_norm": 0.04051049426198006, "learning_rate": 0.01, "loss": 2.0293, "step": 21933 }, { "epoch": 2.2547024360160344, "grad_norm": 0.11082252860069275, "learning_rate": 0.01, "loss": 2.0278, "step": 21936 }, { "epoch": 2.2550107924761025, "grad_norm": 0.0768849179148674, "learning_rate": 0.01, "loss": 2.0089, "step": 21939 }, { "epoch": 2.25531914893617, "grad_norm": 0.0420587994158268, "learning_rate": 0.01, "loss": 2.0188, "step": 21942 }, { "epoch": 2.2556275053962382, "grad_norm": 0.03924650698900223, "learning_rate": 0.01, "loss": 2.035, "step": 21945 }, { "epoch": 2.255935861856306, "grad_norm": 0.058580152690410614, "learning_rate": 0.01, "loss": 2.0358, "step": 21948 }, { "epoch": 2.2562442183163736, "grad_norm": 0.050332482904195786, "learning_rate": 0.01, "loss": 2.0261, "step": 21951 }, { "epoch": 2.2565525747764417, "grad_norm": 0.0472397580742836, "learning_rate": 0.01, "loss": 2.0196, "step": 21954 }, { "epoch": 2.2568609312365093, "grad_norm": 0.0500427670776844, "learning_rate": 0.01, "loss": 2.033, "step": 21957 }, { "epoch": 2.2571692876965774, "grad_norm": 0.08899175375699997, "learning_rate": 0.01, "loss": 2.0503, "step": 21960 }, { "epoch": 2.257477644156645, "grad_norm": 0.06647983938455582, "learning_rate": 0.01, "loss": 2.0087, "step": 21963 }, { "epoch": 2.2577860006167128, "grad_norm": 0.12703396379947662, "learning_rate": 0.01, "loss": 2.0532, "step": 21966 }, { "epoch": 2.258094357076781, "grad_norm": 0.06023648753762245, "learning_rate": 0.01, "loss": 2.0346, "step": 21969 }, { "epoch": 2.2584027135368485, "grad_norm": 0.05025608092546463, "learning_rate": 0.01, "loss": 2.0431, "step": 21972 }, { "epoch": 2.2587110699969166, "grad_norm": 0.043917398899793625, "learning_rate": 0.01, "loss": 2.0184, "step": 21975 }, { "epoch": 2.2590194264569843, "grad_norm": 0.03574421629309654, "learning_rate": 0.01, "loss": 2.021, "step": 21978 }, { "epoch": 2.259327782917052, "grad_norm": 0.03546285256743431, "learning_rate": 0.01, "loss": 2.0116, "step": 21981 }, { "epoch": 2.25963613937712, "grad_norm": 0.039241861552000046, "learning_rate": 0.01, "loss": 2.0314, "step": 21984 }, { "epoch": 2.2599444958371877, "grad_norm": 0.09643664956092834, "learning_rate": 0.01, "loss": 2.0378, "step": 21987 }, { "epoch": 2.260252852297256, "grad_norm": 0.08918357640504837, "learning_rate": 0.01, "loss": 2.0377, "step": 21990 }, { "epoch": 2.2605612087573235, "grad_norm": 0.05018826946616173, "learning_rate": 0.01, "loss": 2.0183, "step": 21993 }, { "epoch": 2.260869565217391, "grad_norm": 0.05288619175553322, "learning_rate": 0.01, "loss": 2.0333, "step": 21996 }, { "epoch": 2.2611779216774592, "grad_norm": 0.053800489753484726, "learning_rate": 0.01, "loss": 2.0359, "step": 21999 }, { "epoch": 2.261486278137527, "grad_norm": 0.11081501096487045, "learning_rate": 0.01, "loss": 2.0517, "step": 22002 }, { "epoch": 2.261794634597595, "grad_norm": 0.045440319925546646, "learning_rate": 0.01, "loss": 2.0105, "step": 22005 }, { "epoch": 2.2621029910576627, "grad_norm": 0.07007669657468796, "learning_rate": 0.01, "loss": 2.0411, "step": 22008 }, { "epoch": 2.2624113475177303, "grad_norm": 0.04412767291069031, "learning_rate": 0.01, "loss": 2.0345, "step": 22011 }, { "epoch": 2.2627197039777984, "grad_norm": 0.034647636115550995, "learning_rate": 0.01, "loss": 2.0236, "step": 22014 }, { "epoch": 2.263028060437866, "grad_norm": 0.04766898602247238, "learning_rate": 0.01, "loss": 2.0214, "step": 22017 }, { "epoch": 2.263336416897934, "grad_norm": 0.0641711950302124, "learning_rate": 0.01, "loss": 2.0195, "step": 22020 }, { "epoch": 2.263644773358002, "grad_norm": 0.05316673591732979, "learning_rate": 0.01, "loss": 2.0137, "step": 22023 }, { "epoch": 2.2639531298180695, "grad_norm": 0.03488560765981674, "learning_rate": 0.01, "loss": 2.0221, "step": 22026 }, { "epoch": 2.2642614862781376, "grad_norm": 0.05415144935250282, "learning_rate": 0.01, "loss": 2.0551, "step": 22029 }, { "epoch": 2.2645698427382053, "grad_norm": 0.08091796189546585, "learning_rate": 0.01, "loss": 2.0339, "step": 22032 }, { "epoch": 2.2648781991982734, "grad_norm": 0.0850050300359726, "learning_rate": 0.01, "loss": 2.0213, "step": 22035 }, { "epoch": 2.265186555658341, "grad_norm": 0.13837046921253204, "learning_rate": 0.01, "loss": 2.0352, "step": 22038 }, { "epoch": 2.2654949121184087, "grad_norm": 0.05676966533064842, "learning_rate": 0.01, "loss": 2.0357, "step": 22041 }, { "epoch": 2.265803268578477, "grad_norm": 0.1171020120382309, "learning_rate": 0.01, "loss": 2.0048, "step": 22044 }, { "epoch": 2.2661116250385445, "grad_norm": 0.04077135771512985, "learning_rate": 0.01, "loss": 2.0386, "step": 22047 }, { "epoch": 2.2664199814986126, "grad_norm": 0.03570380434393883, "learning_rate": 0.01, "loss": 2.0046, "step": 22050 }, { "epoch": 2.2667283379586802, "grad_norm": 0.0496290847659111, "learning_rate": 0.01, "loss": 2.0439, "step": 22053 }, { "epoch": 2.267036694418748, "grad_norm": 0.06205829232931137, "learning_rate": 0.01, "loss": 2.0346, "step": 22056 }, { "epoch": 2.267345050878816, "grad_norm": 0.04645274206995964, "learning_rate": 0.01, "loss": 2.0237, "step": 22059 }, { "epoch": 2.2676534073388837, "grad_norm": 0.04779735952615738, "learning_rate": 0.01, "loss": 2.009, "step": 22062 }, { "epoch": 2.2679617637989518, "grad_norm": 0.05166240781545639, "learning_rate": 0.01, "loss": 2.0391, "step": 22065 }, { "epoch": 2.2682701202590194, "grad_norm": 0.14004342257976532, "learning_rate": 0.01, "loss": 2.0309, "step": 22068 }, { "epoch": 2.268578476719087, "grad_norm": 0.06366194784641266, "learning_rate": 0.01, "loss": 2.0306, "step": 22071 }, { "epoch": 2.268886833179155, "grad_norm": 0.04914015159010887, "learning_rate": 0.01, "loss": 2.026, "step": 22074 }, { "epoch": 2.269195189639223, "grad_norm": 0.07121274620294571, "learning_rate": 0.01, "loss": 2.0302, "step": 22077 }, { "epoch": 2.269503546099291, "grad_norm": 0.048453398048877716, "learning_rate": 0.01, "loss": 2.047, "step": 22080 }, { "epoch": 2.2698119025593586, "grad_norm": 0.03777848929166794, "learning_rate": 0.01, "loss": 2.0643, "step": 22083 }, { "epoch": 2.2701202590194263, "grad_norm": 0.04788368567824364, "learning_rate": 0.01, "loss": 2.0296, "step": 22086 }, { "epoch": 2.2704286154794944, "grad_norm": 0.036985646933317184, "learning_rate": 0.01, "loss": 2.0124, "step": 22089 }, { "epoch": 2.270736971939562, "grad_norm": 0.08654552698135376, "learning_rate": 0.01, "loss": 2.0268, "step": 22092 }, { "epoch": 2.27104532839963, "grad_norm": 0.12740878760814667, "learning_rate": 0.01, "loss": 2.0423, "step": 22095 }, { "epoch": 2.271353684859698, "grad_norm": 0.058829743415117264, "learning_rate": 0.01, "loss": 2.0141, "step": 22098 }, { "epoch": 2.2716620413197655, "grad_norm": 0.04173153638839722, "learning_rate": 0.01, "loss": 2.0288, "step": 22101 }, { "epoch": 2.2719703977798336, "grad_norm": 0.04551135376095772, "learning_rate": 0.01, "loss": 2.0348, "step": 22104 }, { "epoch": 2.272278754239901, "grad_norm": 0.04404577612876892, "learning_rate": 0.01, "loss": 2.0043, "step": 22107 }, { "epoch": 2.2725871106999693, "grad_norm": 0.033306755125522614, "learning_rate": 0.01, "loss": 2.0333, "step": 22110 }, { "epoch": 2.272895467160037, "grad_norm": 0.03450062498450279, "learning_rate": 0.01, "loss": 2.0188, "step": 22113 }, { "epoch": 2.273203823620105, "grad_norm": 0.04665246978402138, "learning_rate": 0.01, "loss": 2.0002, "step": 22116 }, { "epoch": 2.2735121800801728, "grad_norm": 0.04256023094058037, "learning_rate": 0.01, "loss": 1.9977, "step": 22119 }, { "epoch": 2.2738205365402404, "grad_norm": 0.04408838599920273, "learning_rate": 0.01, "loss": 2.047, "step": 22122 }, { "epoch": 2.2741288930003085, "grad_norm": 0.17131304740905762, "learning_rate": 0.01, "loss": 2.0225, "step": 22125 }, { "epoch": 2.274437249460376, "grad_norm": 0.12661625444889069, "learning_rate": 0.01, "loss": 2.0417, "step": 22128 }, { "epoch": 2.274745605920444, "grad_norm": 0.1100076287984848, "learning_rate": 0.01, "loss": 2.0526, "step": 22131 }, { "epoch": 2.275053962380512, "grad_norm": 0.06313984096050262, "learning_rate": 0.01, "loss": 2.0372, "step": 22134 }, { "epoch": 2.2753623188405796, "grad_norm": 0.04563833773136139, "learning_rate": 0.01, "loss": 2.0356, "step": 22137 }, { "epoch": 2.2756706753006477, "grad_norm": 0.034519702196121216, "learning_rate": 0.01, "loss": 2.0394, "step": 22140 }, { "epoch": 2.2759790317607154, "grad_norm": 0.033708199858665466, "learning_rate": 0.01, "loss": 2.0418, "step": 22143 }, { "epoch": 2.2762873882207835, "grad_norm": 0.036958202719688416, "learning_rate": 0.01, "loss": 2.005, "step": 22146 }, { "epoch": 2.276595744680851, "grad_norm": 0.052069418132305145, "learning_rate": 0.01, "loss": 2.0288, "step": 22149 }, { "epoch": 2.276904101140919, "grad_norm": 0.03941415995359421, "learning_rate": 0.01, "loss": 2.0391, "step": 22152 }, { "epoch": 2.277212457600987, "grad_norm": 0.041270628571510315, "learning_rate": 0.01, "loss": 2.0574, "step": 22155 }, { "epoch": 2.2775208140610546, "grad_norm": 0.05398832634091377, "learning_rate": 0.01, "loss": 2.044, "step": 22158 }, { "epoch": 2.277829170521122, "grad_norm": 0.048906486481428146, "learning_rate": 0.01, "loss": 2.046, "step": 22161 }, { "epoch": 2.2781375269811903, "grad_norm": 0.15073837339878082, "learning_rate": 0.01, "loss": 2.0425, "step": 22164 }, { "epoch": 2.278445883441258, "grad_norm": 0.06301066279411316, "learning_rate": 0.01, "loss": 2.0516, "step": 22167 }, { "epoch": 2.278754239901326, "grad_norm": 0.07525215297937393, "learning_rate": 0.01, "loss": 2.0344, "step": 22170 }, { "epoch": 2.2790625963613937, "grad_norm": 0.059356629848480225, "learning_rate": 0.01, "loss": 2.0373, "step": 22173 }, { "epoch": 2.279370952821462, "grad_norm": 0.04340675100684166, "learning_rate": 0.01, "loss": 2.0172, "step": 22176 }, { "epoch": 2.2796793092815295, "grad_norm": 0.052859678864479065, "learning_rate": 0.01, "loss": 2.0215, "step": 22179 }, { "epoch": 2.279987665741597, "grad_norm": 0.05917971953749657, "learning_rate": 0.01, "loss": 2.0286, "step": 22182 }, { "epoch": 2.2802960222016653, "grad_norm": 0.04111889749765396, "learning_rate": 0.01, "loss": 2.0502, "step": 22185 }, { "epoch": 2.280604378661733, "grad_norm": 0.06371071189641953, "learning_rate": 0.01, "loss": 2.0264, "step": 22188 }, { "epoch": 2.2809127351218006, "grad_norm": 0.039862968027591705, "learning_rate": 0.01, "loss": 2.0247, "step": 22191 }, { "epoch": 2.2812210915818687, "grad_norm": 0.0944151058793068, "learning_rate": 0.01, "loss": 2.0257, "step": 22194 }, { "epoch": 2.2815294480419364, "grad_norm": 0.041564084589481354, "learning_rate": 0.01, "loss": 2.0282, "step": 22197 }, { "epoch": 2.2818378045020045, "grad_norm": 0.1109161302447319, "learning_rate": 0.01, "loss": 1.9969, "step": 22200 }, { "epoch": 2.282146160962072, "grad_norm": 0.14463242888450623, "learning_rate": 0.01, "loss": 2.0455, "step": 22203 }, { "epoch": 2.2824545174221402, "grad_norm": 0.06254401057958603, "learning_rate": 0.01, "loss": 2.0269, "step": 22206 }, { "epoch": 2.282762873882208, "grad_norm": 0.03546387329697609, "learning_rate": 0.01, "loss": 2.0116, "step": 22209 }, { "epoch": 2.2830712303422755, "grad_norm": 0.041129205375909805, "learning_rate": 0.01, "loss": 2.0323, "step": 22212 }, { "epoch": 2.2833795868023437, "grad_norm": 0.048473335802555084, "learning_rate": 0.01, "loss": 2.03, "step": 22215 }, { "epoch": 2.2836879432624113, "grad_norm": 0.0633682832121849, "learning_rate": 0.01, "loss": 2.0277, "step": 22218 }, { "epoch": 2.283996299722479, "grad_norm": 0.06431914120912552, "learning_rate": 0.01, "loss": 2.0353, "step": 22221 }, { "epoch": 2.284304656182547, "grad_norm": 0.09050017595291138, "learning_rate": 0.01, "loss": 2.0305, "step": 22224 }, { "epoch": 2.2846130126426147, "grad_norm": 0.07363586872816086, "learning_rate": 0.01, "loss": 2.0421, "step": 22227 }, { "epoch": 2.284921369102683, "grad_norm": 0.07984726130962372, "learning_rate": 0.01, "loss": 2.0216, "step": 22230 }, { "epoch": 2.2852297255627505, "grad_norm": 0.0845586284995079, "learning_rate": 0.01, "loss": 2.0083, "step": 22233 }, { "epoch": 2.2855380820228186, "grad_norm": 0.13392622768878937, "learning_rate": 0.01, "loss": 2.0339, "step": 22236 }, { "epoch": 2.2858464384828863, "grad_norm": 0.06791893392801285, "learning_rate": 0.01, "loss": 2.0457, "step": 22239 }, { "epoch": 2.286154794942954, "grad_norm": 0.044396497309207916, "learning_rate": 0.01, "loss": 2.0408, "step": 22242 }, { "epoch": 2.286463151403022, "grad_norm": 0.04964762553572655, "learning_rate": 0.01, "loss": 2.0259, "step": 22245 }, { "epoch": 2.2867715078630897, "grad_norm": 0.06517814844846725, "learning_rate": 0.01, "loss": 2.0352, "step": 22248 }, { "epoch": 2.2870798643231574, "grad_norm": 0.043722327798604965, "learning_rate": 0.01, "loss": 2.0111, "step": 22251 }, { "epoch": 2.2873882207832255, "grad_norm": 0.06852597743272781, "learning_rate": 0.01, "loss": 2.0377, "step": 22254 }, { "epoch": 2.287696577243293, "grad_norm": 0.13437853753566742, "learning_rate": 0.01, "loss": 2.0376, "step": 22257 }, { "epoch": 2.288004933703361, "grad_norm": 0.03924357146024704, "learning_rate": 0.01, "loss": 2.0283, "step": 22260 }, { "epoch": 2.288313290163429, "grad_norm": 0.039702124893665314, "learning_rate": 0.01, "loss": 2.0293, "step": 22263 }, { "epoch": 2.288621646623497, "grad_norm": 0.05187009647488594, "learning_rate": 0.01, "loss": 1.9957, "step": 22266 }, { "epoch": 2.2889300030835646, "grad_norm": 0.0682847648859024, "learning_rate": 0.01, "loss": 2.0251, "step": 22269 }, { "epoch": 2.2892383595436323, "grad_norm": 0.046652939170598984, "learning_rate": 0.01, "loss": 2.0481, "step": 22272 }, { "epoch": 2.2895467160037004, "grad_norm": 0.04348958283662796, "learning_rate": 0.01, "loss": 2.0492, "step": 22275 }, { "epoch": 2.289855072463768, "grad_norm": 0.05141732096672058, "learning_rate": 0.01, "loss": 2.0372, "step": 22278 }, { "epoch": 2.2901634289238357, "grad_norm": 0.04899610951542854, "learning_rate": 0.01, "loss": 2.0381, "step": 22281 }, { "epoch": 2.290471785383904, "grad_norm": 0.05614173039793968, "learning_rate": 0.01, "loss": 2.0082, "step": 22284 }, { "epoch": 2.2907801418439715, "grad_norm": 0.08255594223737717, "learning_rate": 0.01, "loss": 2.0079, "step": 22287 }, { "epoch": 2.2910884983040396, "grad_norm": 0.12105574458837509, "learning_rate": 0.01, "loss": 2.0341, "step": 22290 }, { "epoch": 2.2913968547641073, "grad_norm": 0.07021155953407288, "learning_rate": 0.01, "loss": 2.0051, "step": 22293 }, { "epoch": 2.2917052112241754, "grad_norm": 0.08364082872867584, "learning_rate": 0.01, "loss": 2.0445, "step": 22296 }, { "epoch": 2.292013567684243, "grad_norm": 0.1123763918876648, "learning_rate": 0.01, "loss": 2.0298, "step": 22299 }, { "epoch": 2.2923219241443107, "grad_norm": 0.06539402157068253, "learning_rate": 0.01, "loss": 2.0245, "step": 22302 }, { "epoch": 2.292630280604379, "grad_norm": 0.08749070018529892, "learning_rate": 0.01, "loss": 2.0336, "step": 22305 }, { "epoch": 2.2929386370644464, "grad_norm": 0.05580052733421326, "learning_rate": 0.01, "loss": 2.0519, "step": 22308 }, { "epoch": 2.293246993524514, "grad_norm": 0.07866258919239044, "learning_rate": 0.01, "loss": 2.0455, "step": 22311 }, { "epoch": 2.293555349984582, "grad_norm": 0.06253038346767426, "learning_rate": 0.01, "loss": 2.0423, "step": 22314 }, { "epoch": 2.29386370644465, "grad_norm": 0.0412861630320549, "learning_rate": 0.01, "loss": 2.0504, "step": 22317 }, { "epoch": 2.294172062904718, "grad_norm": 0.05568886548280716, "learning_rate": 0.01, "loss": 2.0397, "step": 22320 }, { "epoch": 2.2944804193647856, "grad_norm": 0.060274332761764526, "learning_rate": 0.01, "loss": 2.0563, "step": 22323 }, { "epoch": 2.2947887758248537, "grad_norm": 0.09785215556621552, "learning_rate": 0.01, "loss": 2.0378, "step": 22326 }, { "epoch": 2.2950971322849214, "grad_norm": 0.06171462684869766, "learning_rate": 0.01, "loss": 2.0246, "step": 22329 }, { "epoch": 2.295405488744989, "grad_norm": 0.1034122183918953, "learning_rate": 0.01, "loss": 2.0493, "step": 22332 }, { "epoch": 2.295713845205057, "grad_norm": 0.04855991527438164, "learning_rate": 0.01, "loss": 2.0401, "step": 22335 }, { "epoch": 2.296022201665125, "grad_norm": 0.05000189319252968, "learning_rate": 0.01, "loss": 2.0111, "step": 22338 }, { "epoch": 2.2963305581251925, "grad_norm": 0.031126966699957848, "learning_rate": 0.01, "loss": 2.0248, "step": 22341 }, { "epoch": 2.2966389145852606, "grad_norm": 0.08688051998615265, "learning_rate": 0.01, "loss": 2.0227, "step": 22344 }, { "epoch": 2.2969472710453283, "grad_norm": 0.08918049931526184, "learning_rate": 0.01, "loss": 2.0573, "step": 22347 }, { "epoch": 2.2972556275053964, "grad_norm": 0.0592593289911747, "learning_rate": 0.01, "loss": 2.0042, "step": 22350 }, { "epoch": 2.297563983965464, "grad_norm": 0.11501101404428482, "learning_rate": 0.01, "loss": 2.0348, "step": 22353 }, { "epoch": 2.297872340425532, "grad_norm": 0.12966668605804443, "learning_rate": 0.01, "loss": 2.0475, "step": 22356 }, { "epoch": 2.2981806968856, "grad_norm": 0.08889560401439667, "learning_rate": 0.01, "loss": 2.0499, "step": 22359 }, { "epoch": 2.2984890533456674, "grad_norm": 0.07578025758266449, "learning_rate": 0.01, "loss": 2.0479, "step": 22362 }, { "epoch": 2.2987974098057355, "grad_norm": 0.04283175244927406, "learning_rate": 0.01, "loss": 2.0009, "step": 22365 }, { "epoch": 2.299105766265803, "grad_norm": 0.038169119507074356, "learning_rate": 0.01, "loss": 2.0506, "step": 22368 }, { "epoch": 2.299414122725871, "grad_norm": 0.07047520577907562, "learning_rate": 0.01, "loss": 2.0435, "step": 22371 }, { "epoch": 2.299722479185939, "grad_norm": 0.06367038190364838, "learning_rate": 0.01, "loss": 2.0186, "step": 22374 }, { "epoch": 2.3000308356460066, "grad_norm": 0.06601911783218384, "learning_rate": 0.01, "loss": 2.026, "step": 22377 }, { "epoch": 2.3003391921060747, "grad_norm": 0.06333800405263901, "learning_rate": 0.01, "loss": 2.0244, "step": 22380 }, { "epoch": 2.3006475485661424, "grad_norm": 0.0635596215724945, "learning_rate": 0.01, "loss": 2.0102, "step": 22383 }, { "epoch": 2.3009559050262105, "grad_norm": 0.09258962422609329, "learning_rate": 0.01, "loss": 2.0348, "step": 22386 }, { "epoch": 2.301264261486278, "grad_norm": 0.04462733119726181, "learning_rate": 0.01, "loss": 1.9775, "step": 22389 }, { "epoch": 2.301572617946346, "grad_norm": 0.04802479222416878, "learning_rate": 0.01, "loss": 2.0557, "step": 22392 }, { "epoch": 2.301880974406414, "grad_norm": 0.0396190769970417, "learning_rate": 0.01, "loss": 2.0543, "step": 22395 }, { "epoch": 2.3021893308664816, "grad_norm": 0.05610959604382515, "learning_rate": 0.01, "loss": 2.048, "step": 22398 }, { "epoch": 2.3024976873265492, "grad_norm": 0.03454664349555969, "learning_rate": 0.01, "loss": 2.0592, "step": 22401 }, { "epoch": 2.3028060437866174, "grad_norm": 0.049920883029699326, "learning_rate": 0.01, "loss": 2.0533, "step": 22404 }, { "epoch": 2.303114400246685, "grad_norm": 0.04548259451985359, "learning_rate": 0.01, "loss": 2.045, "step": 22407 }, { "epoch": 2.303422756706753, "grad_norm": 0.07624956220388412, "learning_rate": 0.01, "loss": 2.0321, "step": 22410 }, { "epoch": 2.3037311131668208, "grad_norm": 0.1172046959400177, "learning_rate": 0.01, "loss": 2.0285, "step": 22413 }, { "epoch": 2.304039469626889, "grad_norm": 0.07652290165424347, "learning_rate": 0.01, "loss": 2.0324, "step": 22416 }, { "epoch": 2.3043478260869565, "grad_norm": 0.07458405196666718, "learning_rate": 0.01, "loss": 2.0351, "step": 22419 }, { "epoch": 2.304656182547024, "grad_norm": 0.08384053409099579, "learning_rate": 0.01, "loss": 2.0457, "step": 22422 }, { "epoch": 2.3049645390070923, "grad_norm": 0.0492565892636776, "learning_rate": 0.01, "loss": 2.0353, "step": 22425 }, { "epoch": 2.30527289546716, "grad_norm": 0.05125366151332855, "learning_rate": 0.01, "loss": 2.0447, "step": 22428 }, { "epoch": 2.3055812519272276, "grad_norm": 0.03920417279005051, "learning_rate": 0.01, "loss": 2.0471, "step": 22431 }, { "epoch": 2.3058896083872957, "grad_norm": 0.07206401228904724, "learning_rate": 0.01, "loss": 2.0312, "step": 22434 }, { "epoch": 2.3061979648473634, "grad_norm": 0.08888402581214905, "learning_rate": 0.01, "loss": 2.0331, "step": 22437 }, { "epoch": 2.3065063213074315, "grad_norm": 0.1788371056318283, "learning_rate": 0.01, "loss": 2.0461, "step": 22440 }, { "epoch": 2.306814677767499, "grad_norm": 0.11030272394418716, "learning_rate": 0.01, "loss": 2.0314, "step": 22443 }, { "epoch": 2.3071230342275673, "grad_norm": 0.08602694422006607, "learning_rate": 0.01, "loss": 2.0101, "step": 22446 }, { "epoch": 2.307431390687635, "grad_norm": 0.07269123196601868, "learning_rate": 0.01, "loss": 2.0068, "step": 22449 }, { "epoch": 2.3077397471477026, "grad_norm": 0.043196290731430054, "learning_rate": 0.01, "loss": 2.0617, "step": 22452 }, { "epoch": 2.3080481036077707, "grad_norm": 0.03932726010680199, "learning_rate": 0.01, "loss": 2.0314, "step": 22455 }, { "epoch": 2.3083564600678383, "grad_norm": 0.07751597464084625, "learning_rate": 0.01, "loss": 2.0225, "step": 22458 }, { "epoch": 2.3086648165279064, "grad_norm": 0.06549305468797684, "learning_rate": 0.01, "loss": 2.0386, "step": 22461 }, { "epoch": 2.308973172987974, "grad_norm": 0.05819348245859146, "learning_rate": 0.01, "loss": 2.0343, "step": 22464 }, { "epoch": 2.3092815294480418, "grad_norm": 0.04251622408628464, "learning_rate": 0.01, "loss": 2.02, "step": 22467 }, { "epoch": 2.30958988590811, "grad_norm": 0.03997926414012909, "learning_rate": 0.01, "loss": 2.0405, "step": 22470 }, { "epoch": 2.3098982423681775, "grad_norm": 0.060847315937280655, "learning_rate": 0.01, "loss": 2.0301, "step": 22473 }, { "epoch": 2.3102065988282456, "grad_norm": 0.188557431101799, "learning_rate": 0.01, "loss": 2.0388, "step": 22476 }, { "epoch": 2.3105149552883133, "grad_norm": 0.050608254969120026, "learning_rate": 0.01, "loss": 2.0376, "step": 22479 }, { "epoch": 2.310823311748381, "grad_norm": 0.0930873304605484, "learning_rate": 0.01, "loss": 2.0329, "step": 22482 }, { "epoch": 2.311131668208449, "grad_norm": 0.05231544002890587, "learning_rate": 0.01, "loss": 2.0646, "step": 22485 }, { "epoch": 2.3114400246685167, "grad_norm": 0.038996320217847824, "learning_rate": 0.01, "loss": 2.0198, "step": 22488 }, { "epoch": 2.311748381128585, "grad_norm": 0.04804287478327751, "learning_rate": 0.01, "loss": 2.0377, "step": 22491 }, { "epoch": 2.3120567375886525, "grad_norm": 0.05822224169969559, "learning_rate": 0.01, "loss": 2.0581, "step": 22494 }, { "epoch": 2.31236509404872, "grad_norm": 0.04677027836441994, "learning_rate": 0.01, "loss": 2.0552, "step": 22497 }, { "epoch": 2.3126734505087883, "grad_norm": 0.05270523950457573, "learning_rate": 0.01, "loss": 1.9882, "step": 22500 }, { "epoch": 2.312981806968856, "grad_norm": 0.04068690538406372, "learning_rate": 0.01, "loss": 2.0495, "step": 22503 }, { "epoch": 2.313290163428924, "grad_norm": 0.03488868847489357, "learning_rate": 0.01, "loss": 2.0382, "step": 22506 }, { "epoch": 2.3135985198889917, "grad_norm": 0.1007746085524559, "learning_rate": 0.01, "loss": 2.0356, "step": 22509 }, { "epoch": 2.3139068763490593, "grad_norm": 0.04351113364100456, "learning_rate": 0.01, "loss": 2.0513, "step": 22512 }, { "epoch": 2.3142152328091274, "grad_norm": 0.042396917939186096, "learning_rate": 0.01, "loss": 2.0427, "step": 22515 }, { "epoch": 2.314523589269195, "grad_norm": 0.05541342496871948, "learning_rate": 0.01, "loss": 2.0577, "step": 22518 }, { "epoch": 2.314831945729263, "grad_norm": 0.05093314126133919, "learning_rate": 0.01, "loss": 2.0127, "step": 22521 }, { "epoch": 2.315140302189331, "grad_norm": 0.09227500855922699, "learning_rate": 0.01, "loss": 2.0303, "step": 22524 }, { "epoch": 2.3154486586493985, "grad_norm": 0.07242649048566818, "learning_rate": 0.01, "loss": 2.0355, "step": 22527 }, { "epoch": 2.3157570151094666, "grad_norm": 0.07468149811029434, "learning_rate": 0.01, "loss": 2.0443, "step": 22530 }, { "epoch": 2.3160653715695343, "grad_norm": 0.0861506536602974, "learning_rate": 0.01, "loss": 2.0461, "step": 22533 }, { "epoch": 2.3163737280296024, "grad_norm": 0.05024004355072975, "learning_rate": 0.01, "loss": 2.0228, "step": 22536 }, { "epoch": 2.31668208448967, "grad_norm": 0.07255478948354721, "learning_rate": 0.01, "loss": 1.997, "step": 22539 }, { "epoch": 2.3169904409497377, "grad_norm": 0.08199785649776459, "learning_rate": 0.01, "loss": 2.0465, "step": 22542 }, { "epoch": 2.317298797409806, "grad_norm": 0.05194063484668732, "learning_rate": 0.01, "loss": 2.0426, "step": 22545 }, { "epoch": 2.3176071538698735, "grad_norm": 0.05785224214196205, "learning_rate": 0.01, "loss": 2.0311, "step": 22548 }, { "epoch": 2.3179155103299416, "grad_norm": 0.0767458900809288, "learning_rate": 0.01, "loss": 2.0338, "step": 22551 }, { "epoch": 2.3182238667900092, "grad_norm": 0.04905517399311066, "learning_rate": 0.01, "loss": 2.0369, "step": 22554 }, { "epoch": 2.318532223250077, "grad_norm": 0.12444967031478882, "learning_rate": 0.01, "loss": 1.9988, "step": 22557 }, { "epoch": 2.318840579710145, "grad_norm": 0.07965657114982605, "learning_rate": 0.01, "loss": 2.0438, "step": 22560 }, { "epoch": 2.3191489361702127, "grad_norm": 0.12167848646640778, "learning_rate": 0.01, "loss": 2.0359, "step": 22563 }, { "epoch": 2.3194572926302808, "grad_norm": 0.05483706668019295, "learning_rate": 0.01, "loss": 2.034, "step": 22566 }, { "epoch": 2.3197656490903484, "grad_norm": 0.043046776205301285, "learning_rate": 0.01, "loss": 2.0132, "step": 22569 }, { "epoch": 2.320074005550416, "grad_norm": 0.03646457940340042, "learning_rate": 0.01, "loss": 1.9867, "step": 22572 }, { "epoch": 2.320382362010484, "grad_norm": 0.05777902901172638, "learning_rate": 0.01, "loss": 2.0397, "step": 22575 }, { "epoch": 2.320690718470552, "grad_norm": 0.08646814525127411, "learning_rate": 0.01, "loss": 2.0294, "step": 22578 }, { "epoch": 2.32099907493062, "grad_norm": 0.10770904272794724, "learning_rate": 0.01, "loss": 2.0168, "step": 22581 }, { "epoch": 2.3213074313906876, "grad_norm": 0.11546061187982559, "learning_rate": 0.01, "loss": 2.0281, "step": 22584 }, { "epoch": 2.3216157878507553, "grad_norm": 0.08060076832771301, "learning_rate": 0.01, "loss": 2.0378, "step": 22587 }, { "epoch": 2.3219241443108234, "grad_norm": 0.05546190217137337, "learning_rate": 0.01, "loss": 2.0361, "step": 22590 }, { "epoch": 2.322232500770891, "grad_norm": 0.06310277432203293, "learning_rate": 0.01, "loss": 2.0261, "step": 22593 }, { "epoch": 2.322540857230959, "grad_norm": 0.037245072424411774, "learning_rate": 0.01, "loss": 2.038, "step": 22596 }, { "epoch": 2.322849213691027, "grad_norm": 0.05082730948925018, "learning_rate": 0.01, "loss": 2.0327, "step": 22599 }, { "epoch": 2.3231575701510945, "grad_norm": 0.054748065769672394, "learning_rate": 0.01, "loss": 2.0356, "step": 22602 }, { "epoch": 2.3234659266111626, "grad_norm": 0.06093902140855789, "learning_rate": 0.01, "loss": 2.0339, "step": 22605 }, { "epoch": 2.3237742830712302, "grad_norm": 0.10238637775182724, "learning_rate": 0.01, "loss": 2.0559, "step": 22608 }, { "epoch": 2.3240826395312983, "grad_norm": 0.0850609764456749, "learning_rate": 0.01, "loss": 2.0337, "step": 22611 }, { "epoch": 2.324390995991366, "grad_norm": 0.04108014330267906, "learning_rate": 0.01, "loss": 2.045, "step": 22614 }, { "epoch": 2.3246993524514337, "grad_norm": 0.03669281303882599, "learning_rate": 0.01, "loss": 2.0276, "step": 22617 }, { "epoch": 2.3250077089115018, "grad_norm": 0.053524449467659, "learning_rate": 0.01, "loss": 2.0076, "step": 22620 }, { "epoch": 2.3253160653715694, "grad_norm": 0.03545799478888512, "learning_rate": 0.01, "loss": 2.03, "step": 22623 }, { "epoch": 2.3256244218316375, "grad_norm": 0.0569877028465271, "learning_rate": 0.01, "loss": 2.0189, "step": 22626 }, { "epoch": 2.325932778291705, "grad_norm": 0.06943153589963913, "learning_rate": 0.01, "loss": 2.0183, "step": 22629 }, { "epoch": 2.326241134751773, "grad_norm": 0.07389956712722778, "learning_rate": 0.01, "loss": 2.0524, "step": 22632 }, { "epoch": 2.326549491211841, "grad_norm": 0.08397506922483444, "learning_rate": 0.01, "loss": 2.0214, "step": 22635 }, { "epoch": 2.3268578476719086, "grad_norm": 0.13598600029945374, "learning_rate": 0.01, "loss": 2.0276, "step": 22638 }, { "epoch": 2.3271662041319767, "grad_norm": 0.08953214436769485, "learning_rate": 0.01, "loss": 2.0282, "step": 22641 }, { "epoch": 2.3274745605920444, "grad_norm": 0.09812797605991364, "learning_rate": 0.01, "loss": 2.0631, "step": 22644 }, { "epoch": 2.3277829170521125, "grad_norm": 0.06696080416440964, "learning_rate": 0.01, "loss": 2.0314, "step": 22647 }, { "epoch": 2.32809127351218, "grad_norm": 0.07981070876121521, "learning_rate": 0.01, "loss": 2.0209, "step": 22650 }, { "epoch": 2.328399629972248, "grad_norm": 0.06655466556549072, "learning_rate": 0.01, "loss": 2.0426, "step": 22653 }, { "epoch": 2.328707986432316, "grad_norm": 0.061447955667972565, "learning_rate": 0.01, "loss": 2.0349, "step": 22656 }, { "epoch": 2.3290163428923836, "grad_norm": 0.08812141418457031, "learning_rate": 0.01, "loss": 2.0471, "step": 22659 }, { "epoch": 2.3293246993524512, "grad_norm": 0.04082084447145462, "learning_rate": 0.01, "loss": 2.0435, "step": 22662 }, { "epoch": 2.3296330558125193, "grad_norm": 0.04827709123492241, "learning_rate": 0.01, "loss": 2.0344, "step": 22665 }, { "epoch": 2.329941412272587, "grad_norm": 0.04531668871641159, "learning_rate": 0.01, "loss": 2.01, "step": 22668 }, { "epoch": 2.330249768732655, "grad_norm": 0.06739286333322525, "learning_rate": 0.01, "loss": 2.0479, "step": 22671 }, { "epoch": 2.3305581251927228, "grad_norm": 0.07753113657236099, "learning_rate": 0.01, "loss": 2.0454, "step": 22674 }, { "epoch": 2.330866481652791, "grad_norm": 0.1572093814611435, "learning_rate": 0.01, "loss": 2.0089, "step": 22677 }, { "epoch": 2.3311748381128585, "grad_norm": 0.0898297056555748, "learning_rate": 0.01, "loss": 2.0434, "step": 22680 }, { "epoch": 2.331483194572926, "grad_norm": 0.0803251639008522, "learning_rate": 0.01, "loss": 2.0421, "step": 22683 }, { "epoch": 2.3317915510329943, "grad_norm": 0.06155823543667793, "learning_rate": 0.01, "loss": 2.0106, "step": 22686 }, { "epoch": 2.332099907493062, "grad_norm": 0.03681538626551628, "learning_rate": 0.01, "loss": 2.0082, "step": 22689 }, { "epoch": 2.3324082639531296, "grad_norm": 0.03595598414540291, "learning_rate": 0.01, "loss": 2.0563, "step": 22692 }, { "epoch": 2.3327166204131977, "grad_norm": 0.06950604170560837, "learning_rate": 0.01, "loss": 2.0413, "step": 22695 }, { "epoch": 2.3330249768732654, "grad_norm": 0.07386364042758942, "learning_rate": 0.01, "loss": 2.0261, "step": 22698 }, { "epoch": 2.3333333333333335, "grad_norm": 0.06711214780807495, "learning_rate": 0.01, "loss": 2.0418, "step": 22701 }, { "epoch": 2.333641689793401, "grad_norm": 0.07357903569936752, "learning_rate": 0.01, "loss": 2.0271, "step": 22704 }, { "epoch": 2.3339500462534692, "grad_norm": 0.0660550519824028, "learning_rate": 0.01, "loss": 2.0242, "step": 22707 }, { "epoch": 2.334258402713537, "grad_norm": 0.05066053569316864, "learning_rate": 0.01, "loss": 2.0117, "step": 22710 }, { "epoch": 2.3345667591736046, "grad_norm": 0.04992471635341644, "learning_rate": 0.01, "loss": 2.0764, "step": 22713 }, { "epoch": 2.3348751156336727, "grad_norm": 0.03477726876735687, "learning_rate": 0.01, "loss": 2.0239, "step": 22716 }, { "epoch": 2.3351834720937403, "grad_norm": 0.05454208329319954, "learning_rate": 0.01, "loss": 2.0476, "step": 22719 }, { "epoch": 2.335491828553808, "grad_norm": 0.09009736031293869, "learning_rate": 0.01, "loss": 2.0026, "step": 22722 }, { "epoch": 2.335800185013876, "grad_norm": 0.10271045565605164, "learning_rate": 0.01, "loss": 2.0514, "step": 22725 }, { "epoch": 2.3361085414739438, "grad_norm": 0.05939007177948952, "learning_rate": 0.01, "loss": 2.0436, "step": 22728 }, { "epoch": 2.336416897934012, "grad_norm": 0.06454264372587204, "learning_rate": 0.01, "loss": 2.0334, "step": 22731 }, { "epoch": 2.3367252543940795, "grad_norm": 0.04892132803797722, "learning_rate": 0.01, "loss": 2.0305, "step": 22734 }, { "epoch": 2.3370336108541476, "grad_norm": 0.060863394290208817, "learning_rate": 0.01, "loss": 2.0365, "step": 22737 }, { "epoch": 2.3373419673142153, "grad_norm": 0.06073556840419769, "learning_rate": 0.01, "loss": 2.028, "step": 22740 }, { "epoch": 2.337650323774283, "grad_norm": 0.04620682820677757, "learning_rate": 0.01, "loss": 2.034, "step": 22743 }, { "epoch": 2.337958680234351, "grad_norm": 0.04834865778684616, "learning_rate": 0.01, "loss": 2.0407, "step": 22746 }, { "epoch": 2.3382670366944187, "grad_norm": 0.08154502511024475, "learning_rate": 0.01, "loss": 2.0477, "step": 22749 }, { "epoch": 2.3385753931544864, "grad_norm": 0.062378283590078354, "learning_rate": 0.01, "loss": 2.0256, "step": 22752 }, { "epoch": 2.3388837496145545, "grad_norm": 0.11560855805873871, "learning_rate": 0.01, "loss": 2.0059, "step": 22755 }, { "epoch": 2.339192106074622, "grad_norm": 0.14084307849407196, "learning_rate": 0.01, "loss": 2.0179, "step": 22758 }, { "epoch": 2.3395004625346902, "grad_norm": 0.08354218304157257, "learning_rate": 0.01, "loss": 2.0344, "step": 22761 }, { "epoch": 2.339808818994758, "grad_norm": 0.04660021886229515, "learning_rate": 0.01, "loss": 2.0225, "step": 22764 }, { "epoch": 2.340117175454826, "grad_norm": 0.06319580972194672, "learning_rate": 0.01, "loss": 2.0438, "step": 22767 }, { "epoch": 2.3404255319148937, "grad_norm": 0.06721585988998413, "learning_rate": 0.01, "loss": 2.0112, "step": 22770 }, { "epoch": 2.3407338883749613, "grad_norm": 0.08593994379043579, "learning_rate": 0.01, "loss": 2.0149, "step": 22773 }, { "epoch": 2.3410422448350294, "grad_norm": 0.07594014704227448, "learning_rate": 0.01, "loss": 2.045, "step": 22776 }, { "epoch": 2.341350601295097, "grad_norm": 0.05942634493112564, "learning_rate": 0.01, "loss": 2.0557, "step": 22779 }, { "epoch": 2.3416589577551647, "grad_norm": 0.09516235440969467, "learning_rate": 0.01, "loss": 2.0524, "step": 22782 }, { "epoch": 2.341967314215233, "grad_norm": 0.11480774730443954, "learning_rate": 0.01, "loss": 2.0112, "step": 22785 }, { "epoch": 2.3422756706753005, "grad_norm": 0.078018918633461, "learning_rate": 0.01, "loss": 2.0083, "step": 22788 }, { "epoch": 2.3425840271353686, "grad_norm": 0.04268670827150345, "learning_rate": 0.01, "loss": 1.9939, "step": 22791 }, { "epoch": 2.3428923835954363, "grad_norm": 0.04221808537840843, "learning_rate": 0.01, "loss": 2.0198, "step": 22794 }, { "epoch": 2.3432007400555044, "grad_norm": 0.04617379978299141, "learning_rate": 0.01, "loss": 2.036, "step": 22797 }, { "epoch": 2.343509096515572, "grad_norm": 0.05600395053625107, "learning_rate": 0.01, "loss": 2.0289, "step": 22800 }, { "epoch": 2.3438174529756397, "grad_norm": 0.04997321218252182, "learning_rate": 0.01, "loss": 2.0396, "step": 22803 }, { "epoch": 2.344125809435708, "grad_norm": 0.08891329169273376, "learning_rate": 0.01, "loss": 2.0464, "step": 22806 }, { "epoch": 2.3444341658957755, "grad_norm": 0.06883488595485687, "learning_rate": 0.01, "loss": 2.0417, "step": 22809 }, { "epoch": 2.344742522355843, "grad_norm": 0.05532229691743851, "learning_rate": 0.01, "loss": 2.0367, "step": 22812 }, { "epoch": 2.3450508788159112, "grad_norm": 0.04480346292257309, "learning_rate": 0.01, "loss": 2.036, "step": 22815 }, { "epoch": 2.345359235275979, "grad_norm": 0.06533756107091904, "learning_rate": 0.01, "loss": 2.0218, "step": 22818 }, { "epoch": 2.345667591736047, "grad_norm": 0.09851729869842529, "learning_rate": 0.01, "loss": 2.0175, "step": 22821 }, { "epoch": 2.3459759481961147, "grad_norm": 0.06026868894696236, "learning_rate": 0.01, "loss": 2.0359, "step": 22824 }, { "epoch": 2.3462843046561828, "grad_norm": 0.10084753483533859, "learning_rate": 0.01, "loss": 2.0348, "step": 22827 }, { "epoch": 2.3465926611162504, "grad_norm": 0.09302054345607758, "learning_rate": 0.01, "loss": 2.0376, "step": 22830 }, { "epoch": 2.346901017576318, "grad_norm": 0.06304951757192612, "learning_rate": 0.01, "loss": 2.0254, "step": 22833 }, { "epoch": 2.347209374036386, "grad_norm": 0.03625926375389099, "learning_rate": 0.01, "loss": 2.037, "step": 22836 }, { "epoch": 2.347517730496454, "grad_norm": 0.0602838359773159, "learning_rate": 0.01, "loss": 2.0258, "step": 22839 }, { "epoch": 2.3478260869565215, "grad_norm": 0.08285371959209442, "learning_rate": 0.01, "loss": 2.0299, "step": 22842 }, { "epoch": 2.3481344434165896, "grad_norm": 0.05780784785747528, "learning_rate": 0.01, "loss": 2.0288, "step": 22845 }, { "epoch": 2.3484427998766573, "grad_norm": 0.07491873949766159, "learning_rate": 0.01, "loss": 2.0327, "step": 22848 }, { "epoch": 2.3487511563367254, "grad_norm": 0.04745258018374443, "learning_rate": 0.01, "loss": 2.0347, "step": 22851 }, { "epoch": 2.349059512796793, "grad_norm": 0.030006757006049156, "learning_rate": 0.01, "loss": 2.0315, "step": 22854 }, { "epoch": 2.349367869256861, "grad_norm": 0.035286273807287216, "learning_rate": 0.01, "loss": 2.0198, "step": 22857 }, { "epoch": 2.349676225716929, "grad_norm": 0.08970153331756592, "learning_rate": 0.01, "loss": 2.0473, "step": 22860 }, { "epoch": 2.3499845821769965, "grad_norm": 0.06819456070661545, "learning_rate": 0.01, "loss": 2.0246, "step": 22863 }, { "epoch": 2.3502929386370646, "grad_norm": 0.07130144536495209, "learning_rate": 0.01, "loss": 2.019, "step": 22866 }, { "epoch": 2.3506012950971322, "grad_norm": 0.046634428203105927, "learning_rate": 0.01, "loss": 2.0338, "step": 22869 }, { "epoch": 2.3509096515572, "grad_norm": 0.10333798825740814, "learning_rate": 0.01, "loss": 2.0348, "step": 22872 }, { "epoch": 2.351218008017268, "grad_norm": 0.06691887974739075, "learning_rate": 0.01, "loss": 2.0254, "step": 22875 }, { "epoch": 2.3515263644773357, "grad_norm": 0.06780122220516205, "learning_rate": 0.01, "loss": 2.052, "step": 22878 }, { "epoch": 2.3518347209374038, "grad_norm": 0.04448529705405235, "learning_rate": 0.01, "loss": 2.0322, "step": 22881 }, { "epoch": 2.3521430773974714, "grad_norm": 0.09176638722419739, "learning_rate": 0.01, "loss": 2.0168, "step": 22884 }, { "epoch": 2.3524514338575395, "grad_norm": 0.05699750408530235, "learning_rate": 0.01, "loss": 2.0241, "step": 22887 }, { "epoch": 2.352759790317607, "grad_norm": 0.0556836873292923, "learning_rate": 0.01, "loss": 2.0328, "step": 22890 }, { "epoch": 2.353068146777675, "grad_norm": 0.05335136875510216, "learning_rate": 0.01, "loss": 2.0072, "step": 22893 }, { "epoch": 2.353376503237743, "grad_norm": 0.06900188326835632, "learning_rate": 0.01, "loss": 2.0392, "step": 22896 }, { "epoch": 2.3536848596978106, "grad_norm": 0.07300775498151779, "learning_rate": 0.01, "loss": 2.0341, "step": 22899 }, { "epoch": 2.3539932161578783, "grad_norm": 0.0912926122546196, "learning_rate": 0.01, "loss": 2.041, "step": 22902 }, { "epoch": 2.3543015726179464, "grad_norm": 0.0803481861948967, "learning_rate": 0.01, "loss": 2.0225, "step": 22905 }, { "epoch": 2.354609929078014, "grad_norm": 0.05067334324121475, "learning_rate": 0.01, "loss": 2.0187, "step": 22908 }, { "epoch": 2.354918285538082, "grad_norm": 0.10282410681247711, "learning_rate": 0.01, "loss": 2.0349, "step": 22911 }, { "epoch": 2.35522664199815, "grad_norm": 0.04224551469087601, "learning_rate": 0.01, "loss": 2.0166, "step": 22914 }, { "epoch": 2.355534998458218, "grad_norm": 0.06768529862165451, "learning_rate": 0.01, "loss": 2.0304, "step": 22917 }, { "epoch": 2.3558433549182856, "grad_norm": 0.10372413694858551, "learning_rate": 0.01, "loss": 2.0421, "step": 22920 }, { "epoch": 2.356151711378353, "grad_norm": 0.05974859744310379, "learning_rate": 0.01, "loss": 2.0231, "step": 22923 }, { "epoch": 2.3564600678384213, "grad_norm": 0.06408294290304184, "learning_rate": 0.01, "loss": 2.0199, "step": 22926 }, { "epoch": 2.356768424298489, "grad_norm": 0.053037382662296295, "learning_rate": 0.01, "loss": 2.0286, "step": 22929 }, { "epoch": 2.3570767807585566, "grad_norm": 0.038871798664331436, "learning_rate": 0.01, "loss": 2.0588, "step": 22932 }, { "epoch": 2.3573851372186247, "grad_norm": 0.046300627291202545, "learning_rate": 0.01, "loss": 2.033, "step": 22935 }, { "epoch": 2.3576934936786924, "grad_norm": 0.05344367399811745, "learning_rate": 0.01, "loss": 2.0462, "step": 22938 }, { "epoch": 2.3580018501387605, "grad_norm": 0.0878051146864891, "learning_rate": 0.01, "loss": 2.0173, "step": 22941 }, { "epoch": 2.358310206598828, "grad_norm": 0.09469857066869736, "learning_rate": 0.01, "loss": 2.0373, "step": 22944 }, { "epoch": 2.3586185630588963, "grad_norm": 0.04746483266353607, "learning_rate": 0.01, "loss": 2.0155, "step": 22947 }, { "epoch": 2.358926919518964, "grad_norm": 0.051294367760419846, "learning_rate": 0.01, "loss": 2.0452, "step": 22950 }, { "epoch": 2.3592352759790316, "grad_norm": 0.04378626495599747, "learning_rate": 0.01, "loss": 2.0381, "step": 22953 }, { "epoch": 2.3595436324390997, "grad_norm": 0.054627690464258194, "learning_rate": 0.01, "loss": 2.0592, "step": 22956 }, { "epoch": 2.3598519888991674, "grad_norm": 0.04732866585254669, "learning_rate": 0.01, "loss": 2.0284, "step": 22959 }, { "epoch": 2.3601603453592355, "grad_norm": 0.11569249629974365, "learning_rate": 0.01, "loss": 2.007, "step": 22962 }, { "epoch": 2.360468701819303, "grad_norm": 0.10606824606657028, "learning_rate": 0.01, "loss": 2.0357, "step": 22965 }, { "epoch": 2.360777058279371, "grad_norm": 0.05072540044784546, "learning_rate": 0.01, "loss": 2.0581, "step": 22968 }, { "epoch": 2.361085414739439, "grad_norm": 0.0738227441906929, "learning_rate": 0.01, "loss": 2.0305, "step": 22971 }, { "epoch": 2.3613937711995066, "grad_norm": 0.06440632790327072, "learning_rate": 0.01, "loss": 2.0315, "step": 22974 }, { "epoch": 2.3617021276595747, "grad_norm": 0.05426304414868355, "learning_rate": 0.01, "loss": 2.043, "step": 22977 }, { "epoch": 2.3620104841196423, "grad_norm": 0.06414210051298141, "learning_rate": 0.01, "loss": 2.0735, "step": 22980 }, { "epoch": 2.36231884057971, "grad_norm": 0.034726858139038086, "learning_rate": 0.01, "loss": 2.0422, "step": 22983 }, { "epoch": 2.362627197039778, "grad_norm": 0.04326315596699715, "learning_rate": 0.01, "loss": 2.0422, "step": 22986 }, { "epoch": 2.3629355534998457, "grad_norm": 0.06470071524381638, "learning_rate": 0.01, "loss": 2.0192, "step": 22989 }, { "epoch": 2.363243909959914, "grad_norm": 0.09161586314439774, "learning_rate": 0.01, "loss": 2.02, "step": 22992 }, { "epoch": 2.3635522664199815, "grad_norm": 0.035171300172805786, "learning_rate": 0.01, "loss": 2.0413, "step": 22995 }, { "epoch": 2.363860622880049, "grad_norm": 0.059095609933137894, "learning_rate": 0.01, "loss": 2.0237, "step": 22998 }, { "epoch": 2.3641689793401173, "grad_norm": 0.094328872859478, "learning_rate": 0.01, "loss": 2.0362, "step": 23001 }, { "epoch": 2.364477335800185, "grad_norm": 0.059930335730314255, "learning_rate": 0.01, "loss": 2.0274, "step": 23004 }, { "epoch": 2.364785692260253, "grad_norm": 0.08969483524560928, "learning_rate": 0.01, "loss": 2.04, "step": 23007 }, { "epoch": 2.3650940487203207, "grad_norm": 0.054753273725509644, "learning_rate": 0.01, "loss": 2.0195, "step": 23010 }, { "epoch": 2.3654024051803884, "grad_norm": 0.09794525057077408, "learning_rate": 0.01, "loss": 2.0385, "step": 23013 }, { "epoch": 2.3657107616404565, "grad_norm": 0.07700082659721375, "learning_rate": 0.01, "loss": 2.0483, "step": 23016 }, { "epoch": 2.366019118100524, "grad_norm": 0.03871360793709755, "learning_rate": 0.01, "loss": 2.0255, "step": 23019 }, { "epoch": 2.3663274745605922, "grad_norm": 0.08822629600763321, "learning_rate": 0.01, "loss": 2.0296, "step": 23022 }, { "epoch": 2.36663583102066, "grad_norm": 0.08018068969249725, "learning_rate": 0.01, "loss": 2.0405, "step": 23025 }, { "epoch": 2.3669441874807275, "grad_norm": 0.046322405338287354, "learning_rate": 0.01, "loss": 2.0345, "step": 23028 }, { "epoch": 2.3672525439407957, "grad_norm": 0.08056683838367462, "learning_rate": 0.01, "loss": 2.035, "step": 23031 }, { "epoch": 2.3675609004008633, "grad_norm": 0.059787072241306305, "learning_rate": 0.01, "loss": 2.0163, "step": 23034 }, { "epoch": 2.3678692568609314, "grad_norm": 0.07793419063091278, "learning_rate": 0.01, "loss": 2.0039, "step": 23037 }, { "epoch": 2.368177613320999, "grad_norm": 0.1337706595659256, "learning_rate": 0.01, "loss": 2.0138, "step": 23040 }, { "epoch": 2.3684859697810667, "grad_norm": 0.0608229860663414, "learning_rate": 0.01, "loss": 2.0064, "step": 23043 }, { "epoch": 2.368794326241135, "grad_norm": 0.07523812353610992, "learning_rate": 0.01, "loss": 2.0243, "step": 23046 }, { "epoch": 2.3691026827012025, "grad_norm": 0.05788956955075264, "learning_rate": 0.01, "loss": 2.0354, "step": 23049 }, { "epoch": 2.3694110391612706, "grad_norm": 0.05372630059719086, "learning_rate": 0.01, "loss": 2.0188, "step": 23052 }, { "epoch": 2.3697193956213383, "grad_norm": 0.0775051936507225, "learning_rate": 0.01, "loss": 2.0018, "step": 23055 }, { "epoch": 2.370027752081406, "grad_norm": 0.042055148631334305, "learning_rate": 0.01, "loss": 2.0152, "step": 23058 }, { "epoch": 2.370336108541474, "grad_norm": 0.03500501438975334, "learning_rate": 0.01, "loss": 2.0111, "step": 23061 }, { "epoch": 2.3706444650015417, "grad_norm": 0.05405488237738609, "learning_rate": 0.01, "loss": 2.0569, "step": 23064 }, { "epoch": 2.37095282146161, "grad_norm": 0.04199660196900368, "learning_rate": 0.01, "loss": 2.024, "step": 23067 }, { "epoch": 2.3712611779216775, "grad_norm": 0.10155687481164932, "learning_rate": 0.01, "loss": 2.0335, "step": 23070 }, { "epoch": 2.371569534381745, "grad_norm": 0.04267631843686104, "learning_rate": 0.01, "loss": 2.0313, "step": 23073 }, { "epoch": 2.371877890841813, "grad_norm": 0.08961319178342819, "learning_rate": 0.01, "loss": 2.0308, "step": 23076 }, { "epoch": 2.372186247301881, "grad_norm": 0.06389858573675156, "learning_rate": 0.01, "loss": 2.02, "step": 23079 }, { "epoch": 2.372494603761949, "grad_norm": 0.0733552873134613, "learning_rate": 0.01, "loss": 2.0304, "step": 23082 }, { "epoch": 2.3728029602220166, "grad_norm": 0.04577986150979996, "learning_rate": 0.01, "loss": 2.0137, "step": 23085 }, { "epoch": 2.3731113166820843, "grad_norm": 0.07929688692092896, "learning_rate": 0.01, "loss": 2.0357, "step": 23088 }, { "epoch": 2.3734196731421524, "grad_norm": 0.07446785271167755, "learning_rate": 0.01, "loss": 2.0058, "step": 23091 }, { "epoch": 2.37372802960222, "grad_norm": 0.08491658419370651, "learning_rate": 0.01, "loss": 2.0372, "step": 23094 }, { "epoch": 2.374036386062288, "grad_norm": 0.04434317350387573, "learning_rate": 0.01, "loss": 2.022, "step": 23097 }, { "epoch": 2.374344742522356, "grad_norm": 0.031715601682662964, "learning_rate": 0.01, "loss": 2.0436, "step": 23100 }, { "epoch": 2.3746530989824235, "grad_norm": 0.05073494091629982, "learning_rate": 0.01, "loss": 2.0205, "step": 23103 }, { "epoch": 2.3749614554424916, "grad_norm": 0.0484505333006382, "learning_rate": 0.01, "loss": 2.0388, "step": 23106 }, { "epoch": 2.3752698119025593, "grad_norm": 0.036467526108026505, "learning_rate": 0.01, "loss": 2.0136, "step": 23109 }, { "epoch": 2.3755781683626274, "grad_norm": 0.048451874405145645, "learning_rate": 0.01, "loss": 2.014, "step": 23112 }, { "epoch": 2.375886524822695, "grad_norm": 0.048714105039834976, "learning_rate": 0.01, "loss": 2.0546, "step": 23115 }, { "epoch": 2.3761948812827627, "grad_norm": 0.07499600946903229, "learning_rate": 0.01, "loss": 2.024, "step": 23118 }, { "epoch": 2.376503237742831, "grad_norm": 0.052757177501916885, "learning_rate": 0.01, "loss": 2.07, "step": 23121 }, { "epoch": 2.3768115942028984, "grad_norm": 0.062441807240247726, "learning_rate": 0.01, "loss": 2.0156, "step": 23124 }, { "epoch": 2.3771199506629666, "grad_norm": 0.04984834045171738, "learning_rate": 0.01, "loss": 2.0046, "step": 23127 }, { "epoch": 2.377428307123034, "grad_norm": 0.12042737007141113, "learning_rate": 0.01, "loss": 2.0043, "step": 23130 }, { "epoch": 2.377736663583102, "grad_norm": 0.08114577829837799, "learning_rate": 0.01, "loss": 2.0212, "step": 23133 }, { "epoch": 2.37804502004317, "grad_norm": 0.044427551329135895, "learning_rate": 0.01, "loss": 2.0127, "step": 23136 }, { "epoch": 2.3783533765032376, "grad_norm": 0.04814080893993378, "learning_rate": 0.01, "loss": 2.0237, "step": 23139 }, { "epoch": 2.3786617329633057, "grad_norm": 0.03783731907606125, "learning_rate": 0.01, "loss": 2.0293, "step": 23142 }, { "epoch": 2.3789700894233734, "grad_norm": 0.03743833675980568, "learning_rate": 0.01, "loss": 2.0446, "step": 23145 }, { "epoch": 2.3792784458834415, "grad_norm": 0.05341466888785362, "learning_rate": 0.01, "loss": 2.0071, "step": 23148 }, { "epoch": 2.379586802343509, "grad_norm": 0.04965018108487129, "learning_rate": 0.01, "loss": 2.0257, "step": 23151 }, { "epoch": 2.379895158803577, "grad_norm": 0.06330037117004395, "learning_rate": 0.01, "loss": 2.0466, "step": 23154 }, { "epoch": 2.380203515263645, "grad_norm": 0.04018980264663696, "learning_rate": 0.01, "loss": 2.045, "step": 23157 }, { "epoch": 2.3805118717237126, "grad_norm": 0.04469529166817665, "learning_rate": 0.01, "loss": 2.0437, "step": 23160 }, { "epoch": 2.3808202281837803, "grad_norm": 0.07242292910814285, "learning_rate": 0.01, "loss": 2.0199, "step": 23163 }, { "epoch": 2.3811285846438484, "grad_norm": 0.05165792256593704, "learning_rate": 0.01, "loss": 2.0459, "step": 23166 }, { "epoch": 2.381436941103916, "grad_norm": 0.12728899717330933, "learning_rate": 0.01, "loss": 2.0086, "step": 23169 }, { "epoch": 2.381745297563984, "grad_norm": 0.07357775419950485, "learning_rate": 0.01, "loss": 2.0436, "step": 23172 }, { "epoch": 2.382053654024052, "grad_norm": 0.05125182494521141, "learning_rate": 0.01, "loss": 2.0269, "step": 23175 }, { "epoch": 2.38236201048412, "grad_norm": 0.061880383640527725, "learning_rate": 0.01, "loss": 2.0355, "step": 23178 }, { "epoch": 2.3826703669441875, "grad_norm": 0.03774267062544823, "learning_rate": 0.01, "loss": 2.0205, "step": 23181 }, { "epoch": 2.382978723404255, "grad_norm": 0.04750019684433937, "learning_rate": 0.01, "loss": 2.046, "step": 23184 }, { "epoch": 2.3832870798643233, "grad_norm": 0.06727109849452972, "learning_rate": 0.01, "loss": 2.0545, "step": 23187 }, { "epoch": 2.383595436324391, "grad_norm": 0.08848878741264343, "learning_rate": 0.01, "loss": 2.0458, "step": 23190 }, { "epoch": 2.3839037927844586, "grad_norm": 0.07577743381261826, "learning_rate": 0.01, "loss": 2.0502, "step": 23193 }, { "epoch": 2.3842121492445267, "grad_norm": 0.07908739149570465, "learning_rate": 0.01, "loss": 2.0516, "step": 23196 }, { "epoch": 2.3845205057045944, "grad_norm": 0.08100943267345428, "learning_rate": 0.01, "loss": 2.036, "step": 23199 }, { "epoch": 2.3848288621646625, "grad_norm": 0.12388890236616135, "learning_rate": 0.01, "loss": 2.0506, "step": 23202 }, { "epoch": 2.38513721862473, "grad_norm": 0.0538008026778698, "learning_rate": 0.01, "loss": 2.0236, "step": 23205 }, { "epoch": 2.3854455750847983, "grad_norm": 0.04448952525854111, "learning_rate": 0.01, "loss": 2.0354, "step": 23208 }, { "epoch": 2.385753931544866, "grad_norm": 0.038850218057632446, "learning_rate": 0.01, "loss": 2.0368, "step": 23211 }, { "epoch": 2.3860622880049336, "grad_norm": 0.10436736047267914, "learning_rate": 0.01, "loss": 2.0316, "step": 23214 }, { "epoch": 2.3863706444650017, "grad_norm": 0.03787916898727417, "learning_rate": 0.01, "loss": 2.0358, "step": 23217 }, { "epoch": 2.3866790009250693, "grad_norm": 0.0874573364853859, "learning_rate": 0.01, "loss": 2.0306, "step": 23220 }, { "epoch": 2.386987357385137, "grad_norm": 0.08322203904390335, "learning_rate": 0.01, "loss": 2.0495, "step": 23223 }, { "epoch": 2.387295713845205, "grad_norm": 0.034374579787254333, "learning_rate": 0.01, "loss": 2.0047, "step": 23226 }, { "epoch": 2.3876040703052728, "grad_norm": 0.03772469237446785, "learning_rate": 0.01, "loss": 2.0309, "step": 23229 }, { "epoch": 2.387912426765341, "grad_norm": 0.12290430814027786, "learning_rate": 0.01, "loss": 2.0343, "step": 23232 }, { "epoch": 2.3882207832254085, "grad_norm": 0.04306014999747276, "learning_rate": 0.01, "loss": 2.0248, "step": 23235 }, { "epoch": 2.3885291396854766, "grad_norm": 0.10940956324338913, "learning_rate": 0.01, "loss": 2.0322, "step": 23238 }, { "epoch": 2.3888374961455443, "grad_norm": 0.06950829923152924, "learning_rate": 0.01, "loss": 2.0168, "step": 23241 }, { "epoch": 2.389145852605612, "grad_norm": 0.11061616986989975, "learning_rate": 0.01, "loss": 2.017, "step": 23244 }, { "epoch": 2.38945420906568, "grad_norm": 0.051813628524541855, "learning_rate": 0.01, "loss": 2.0657, "step": 23247 }, { "epoch": 2.3897625655257477, "grad_norm": 0.10411377251148224, "learning_rate": 0.01, "loss": 2.0044, "step": 23250 }, { "epoch": 2.3900709219858154, "grad_norm": 0.03951719403266907, "learning_rate": 0.01, "loss": 2.0135, "step": 23253 }, { "epoch": 2.3903792784458835, "grad_norm": 0.08920100331306458, "learning_rate": 0.01, "loss": 2.0523, "step": 23256 }, { "epoch": 2.390687634905951, "grad_norm": 0.03843710198998451, "learning_rate": 0.01, "loss": 2.0387, "step": 23259 }, { "epoch": 2.3909959913660193, "grad_norm": 0.09093856811523438, "learning_rate": 0.01, "loss": 2.0477, "step": 23262 }, { "epoch": 2.391304347826087, "grad_norm": 0.06239760294556618, "learning_rate": 0.01, "loss": 2.0407, "step": 23265 }, { "epoch": 2.391612704286155, "grad_norm": 0.07422836124897003, "learning_rate": 0.01, "loss": 2.0232, "step": 23268 }, { "epoch": 2.3919210607462227, "grad_norm": 0.04418382793664932, "learning_rate": 0.01, "loss": 2.0346, "step": 23271 }, { "epoch": 2.3922294172062903, "grad_norm": 0.08631134033203125, "learning_rate": 0.01, "loss": 2.0675, "step": 23274 }, { "epoch": 2.3925377736663584, "grad_norm": 0.05164894834160805, "learning_rate": 0.01, "loss": 2.0328, "step": 23277 }, { "epoch": 2.392846130126426, "grad_norm": 0.056628335267305374, "learning_rate": 0.01, "loss": 2.0462, "step": 23280 }, { "epoch": 2.3931544865864938, "grad_norm": 0.10568714141845703, "learning_rate": 0.01, "loss": 2.0295, "step": 23283 }, { "epoch": 2.393462843046562, "grad_norm": 0.1235639825463295, "learning_rate": 0.01, "loss": 2.0149, "step": 23286 }, { "epoch": 2.3937711995066295, "grad_norm": 0.04766182228922844, "learning_rate": 0.01, "loss": 2.016, "step": 23289 }, { "epoch": 2.3940795559666976, "grad_norm": 0.040086500346660614, "learning_rate": 0.01, "loss": 2.0011, "step": 23292 }, { "epoch": 2.3943879124267653, "grad_norm": 0.04474460333585739, "learning_rate": 0.01, "loss": 2.003, "step": 23295 }, { "epoch": 2.3946962688868334, "grad_norm": 0.03349655494093895, "learning_rate": 0.01, "loss": 2.0251, "step": 23298 }, { "epoch": 2.395004625346901, "grad_norm": 0.04361939802765846, "learning_rate": 0.01, "loss": 2.0105, "step": 23301 }, { "epoch": 2.3953129818069687, "grad_norm": 0.08834217488765717, "learning_rate": 0.01, "loss": 2.0455, "step": 23304 }, { "epoch": 2.395621338267037, "grad_norm": 0.09799984842538834, "learning_rate": 0.01, "loss": 2.0153, "step": 23307 }, { "epoch": 2.3959296947271045, "grad_norm": 0.06085311621427536, "learning_rate": 0.01, "loss": 2.0326, "step": 23310 }, { "epoch": 2.396238051187172, "grad_norm": 0.04466985911130905, "learning_rate": 0.01, "loss": 2.0122, "step": 23313 }, { "epoch": 2.3965464076472403, "grad_norm": 0.05248266085982323, "learning_rate": 0.01, "loss": 2.0388, "step": 23316 }, { "epoch": 2.396854764107308, "grad_norm": 0.06024044379591942, "learning_rate": 0.01, "loss": 2.0232, "step": 23319 }, { "epoch": 2.397163120567376, "grad_norm": 0.05061257630586624, "learning_rate": 0.01, "loss": 2.0259, "step": 23322 }, { "epoch": 2.3974714770274437, "grad_norm": 0.04136064276099205, "learning_rate": 0.01, "loss": 2.0356, "step": 23325 }, { "epoch": 2.397779833487512, "grad_norm": 0.048327211290597916, "learning_rate": 0.01, "loss": 2.0158, "step": 23328 }, { "epoch": 2.3980881899475794, "grad_norm": 0.038865648210048676, "learning_rate": 0.01, "loss": 2.0312, "step": 23331 }, { "epoch": 2.398396546407647, "grad_norm": 0.04522191360592842, "learning_rate": 0.01, "loss": 2.0125, "step": 23334 }, { "epoch": 2.398704902867715, "grad_norm": 0.033488884568214417, "learning_rate": 0.01, "loss": 2.0496, "step": 23337 }, { "epoch": 2.399013259327783, "grad_norm": 0.05207303538918495, "learning_rate": 0.01, "loss": 2.0254, "step": 23340 }, { "epoch": 2.3993216157878505, "grad_norm": 0.09892462193965912, "learning_rate": 0.01, "loss": 2.0294, "step": 23343 }, { "epoch": 2.3996299722479186, "grad_norm": 0.07379446923732758, "learning_rate": 0.01, "loss": 2.0351, "step": 23346 }, { "epoch": 2.3999383287079863, "grad_norm": 0.07604874670505524, "learning_rate": 0.01, "loss": 2.0348, "step": 23349 }, { "epoch": 2.4002466851680544, "grad_norm": 0.05876392871141434, "learning_rate": 0.01, "loss": 2.0442, "step": 23352 }, { "epoch": 2.400555041628122, "grad_norm": 0.06497185677289963, "learning_rate": 0.01, "loss": 2.0289, "step": 23355 }, { "epoch": 2.40086339808819, "grad_norm": 0.059574466198682785, "learning_rate": 0.01, "loss": 2.0056, "step": 23358 }, { "epoch": 2.401171754548258, "grad_norm": 0.03427527844905853, "learning_rate": 0.01, "loss": 2.0115, "step": 23361 }, { "epoch": 2.4014801110083255, "grad_norm": 0.04248304292559624, "learning_rate": 0.01, "loss": 2.022, "step": 23364 }, { "epoch": 2.4017884674683936, "grad_norm": 0.11010278761386871, "learning_rate": 0.01, "loss": 2.0378, "step": 23367 }, { "epoch": 2.4020968239284612, "grad_norm": 0.03992384672164917, "learning_rate": 0.01, "loss": 2.024, "step": 23370 }, { "epoch": 2.402405180388529, "grad_norm": 0.0774572566151619, "learning_rate": 0.01, "loss": 2.047, "step": 23373 }, { "epoch": 2.402713536848597, "grad_norm": 0.0753302052617073, "learning_rate": 0.01, "loss": 2.0182, "step": 23376 }, { "epoch": 2.4030218933086647, "grad_norm": 0.06872344017028809, "learning_rate": 0.01, "loss": 2.041, "step": 23379 }, { "epoch": 2.4033302497687328, "grad_norm": 0.03949934244155884, "learning_rate": 0.01, "loss": 2.0444, "step": 23382 }, { "epoch": 2.4036386062288004, "grad_norm": 0.06230955943465233, "learning_rate": 0.01, "loss": 2.0168, "step": 23385 }, { "epoch": 2.4039469626888685, "grad_norm": 0.0821446031332016, "learning_rate": 0.01, "loss": 2.0091, "step": 23388 }, { "epoch": 2.404255319148936, "grad_norm": 0.0392254963517189, "learning_rate": 0.01, "loss": 2.0083, "step": 23391 }, { "epoch": 2.404563675609004, "grad_norm": 0.04208219051361084, "learning_rate": 0.01, "loss": 2.008, "step": 23394 }, { "epoch": 2.404872032069072, "grad_norm": 0.053683795034885406, "learning_rate": 0.01, "loss": 2.0175, "step": 23397 }, { "epoch": 2.4051803885291396, "grad_norm": 0.0707748755812645, "learning_rate": 0.01, "loss": 2.0447, "step": 23400 }, { "epoch": 2.4054887449892073, "grad_norm": 0.04077059030532837, "learning_rate": 0.01, "loss": 2.0247, "step": 23403 }, { "epoch": 2.4057971014492754, "grad_norm": 0.041255395859479904, "learning_rate": 0.01, "loss": 2.0542, "step": 23406 }, { "epoch": 2.406105457909343, "grad_norm": 0.04560267925262451, "learning_rate": 0.01, "loss": 2.0134, "step": 23409 }, { "epoch": 2.406413814369411, "grad_norm": 0.1152443066239357, "learning_rate": 0.01, "loss": 2.0367, "step": 23412 }, { "epoch": 2.406722170829479, "grad_norm": 0.06491648405790329, "learning_rate": 0.01, "loss": 2.046, "step": 23415 }, { "epoch": 2.407030527289547, "grad_norm": 0.10551664233207703, "learning_rate": 0.01, "loss": 2.0176, "step": 23418 }, { "epoch": 2.4073388837496146, "grad_norm": 0.05565710365772247, "learning_rate": 0.01, "loss": 2.051, "step": 23421 }, { "epoch": 2.4076472402096822, "grad_norm": 0.07831353694200516, "learning_rate": 0.01, "loss": 2.0311, "step": 23424 }, { "epoch": 2.4079555966697503, "grad_norm": 0.04822281748056412, "learning_rate": 0.01, "loss": 2.0086, "step": 23427 }, { "epoch": 2.408263953129818, "grad_norm": 0.10326611250638962, "learning_rate": 0.01, "loss": 2.0079, "step": 23430 }, { "epoch": 2.4085723095898857, "grad_norm": 0.03122270293533802, "learning_rate": 0.01, "loss": 2.0371, "step": 23433 }, { "epoch": 2.4088806660499538, "grad_norm": 0.04365687072277069, "learning_rate": 0.01, "loss": 2.0323, "step": 23436 }, { "epoch": 2.4091890225100214, "grad_norm": 0.055597368627786636, "learning_rate": 0.01, "loss": 2.0345, "step": 23439 }, { "epoch": 2.4094973789700895, "grad_norm": 0.05635388568043709, "learning_rate": 0.01, "loss": 2.0255, "step": 23442 }, { "epoch": 2.409805735430157, "grad_norm": 0.059105440974235535, "learning_rate": 0.01, "loss": 2.0366, "step": 23445 }, { "epoch": 2.4101140918902253, "grad_norm": 0.03262312710285187, "learning_rate": 0.01, "loss": 2.0371, "step": 23448 }, { "epoch": 2.410422448350293, "grad_norm": 0.12139608711004257, "learning_rate": 0.01, "loss": 2.0423, "step": 23451 }, { "epoch": 2.4107308048103606, "grad_norm": 0.0331861712038517, "learning_rate": 0.01, "loss": 2.0242, "step": 23454 }, { "epoch": 2.4110391612704287, "grad_norm": 0.14580830931663513, "learning_rate": 0.01, "loss": 2.0268, "step": 23457 }, { "epoch": 2.4113475177304964, "grad_norm": 0.08361393213272095, "learning_rate": 0.01, "loss": 2.0357, "step": 23460 }, { "epoch": 2.4116558741905645, "grad_norm": 0.04604450613260269, "learning_rate": 0.01, "loss": 2.0218, "step": 23463 }, { "epoch": 2.411964230650632, "grad_norm": 0.0660676434636116, "learning_rate": 0.01, "loss": 2.0051, "step": 23466 }, { "epoch": 2.4122725871107, "grad_norm": 0.03829890862107277, "learning_rate": 0.01, "loss": 2.0037, "step": 23469 }, { "epoch": 2.412580943570768, "grad_norm": 0.03486289829015732, "learning_rate": 0.01, "loss": 2.0157, "step": 23472 }, { "epoch": 2.4128893000308356, "grad_norm": 0.043325070291757584, "learning_rate": 0.01, "loss": 2.0228, "step": 23475 }, { "epoch": 2.4131976564909037, "grad_norm": 0.062313832342624664, "learning_rate": 0.01, "loss": 2.0278, "step": 23478 }, { "epoch": 2.4135060129509713, "grad_norm": 0.07333476096391678, "learning_rate": 0.01, "loss": 2.031, "step": 23481 }, { "epoch": 2.413814369411039, "grad_norm": 0.05855254456400871, "learning_rate": 0.01, "loss": 2.0019, "step": 23484 }, { "epoch": 2.414122725871107, "grad_norm": 0.07818280905485153, "learning_rate": 0.01, "loss": 2.0305, "step": 23487 }, { "epoch": 2.4144310823311748, "grad_norm": 0.17364929616451263, "learning_rate": 0.01, "loss": 2.0358, "step": 23490 }, { "epoch": 2.414739438791243, "grad_norm": 0.04748675599694252, "learning_rate": 0.01, "loss": 2.0441, "step": 23493 }, { "epoch": 2.4150477952513105, "grad_norm": 0.04472067952156067, "learning_rate": 0.01, "loss": 2.001, "step": 23496 }, { "epoch": 2.415356151711378, "grad_norm": 0.04624779149889946, "learning_rate": 0.01, "loss": 2.0606, "step": 23499 }, { "epoch": 2.4156645081714463, "grad_norm": 0.04642438516020775, "learning_rate": 0.01, "loss": 2.0387, "step": 23502 }, { "epoch": 2.415972864631514, "grad_norm": 0.03872397541999817, "learning_rate": 0.01, "loss": 2.0463, "step": 23505 }, { "epoch": 2.416281221091582, "grad_norm": 0.055659957230091095, "learning_rate": 0.01, "loss": 2.0076, "step": 23508 }, { "epoch": 2.4165895775516497, "grad_norm": 0.05187131464481354, "learning_rate": 0.01, "loss": 2.0252, "step": 23511 }, { "epoch": 2.4168979340117174, "grad_norm": 0.04747389629483223, "learning_rate": 0.01, "loss": 1.9933, "step": 23514 }, { "epoch": 2.4172062904717855, "grad_norm": 0.04594513028860092, "learning_rate": 0.01, "loss": 2.0489, "step": 23517 }, { "epoch": 2.417514646931853, "grad_norm": 0.08976871520280838, "learning_rate": 0.01, "loss": 2.0728, "step": 23520 }, { "epoch": 2.4178230033919212, "grad_norm": 0.09563881158828735, "learning_rate": 0.01, "loss": 2.0399, "step": 23523 }, { "epoch": 2.418131359851989, "grad_norm": 0.05488200485706329, "learning_rate": 0.01, "loss": 2.0314, "step": 23526 }, { "epoch": 2.4184397163120566, "grad_norm": 0.10557366907596588, "learning_rate": 0.01, "loss": 2.0141, "step": 23529 }, { "epoch": 2.4187480727721247, "grad_norm": 0.06399507075548172, "learning_rate": 0.01, "loss": 2.032, "step": 23532 }, { "epoch": 2.4190564292321923, "grad_norm": 0.06832768023014069, "learning_rate": 0.01, "loss": 2.0445, "step": 23535 }, { "epoch": 2.4193647856922604, "grad_norm": 0.04164440929889679, "learning_rate": 0.01, "loss": 2.046, "step": 23538 }, { "epoch": 2.419673142152328, "grad_norm": 0.03020538203418255, "learning_rate": 0.01, "loss": 2.0368, "step": 23541 }, { "epoch": 2.4199814986123958, "grad_norm": 0.05985027551651001, "learning_rate": 0.01, "loss": 2.0166, "step": 23544 }, { "epoch": 2.420289855072464, "grad_norm": 0.08126893639564514, "learning_rate": 0.01, "loss": 2.0334, "step": 23547 }, { "epoch": 2.4205982115325315, "grad_norm": 0.03897944465279579, "learning_rate": 0.01, "loss": 2.0475, "step": 23550 }, { "epoch": 2.4209065679925996, "grad_norm": 0.05052472651004791, "learning_rate": 0.01, "loss": 2.0536, "step": 23553 }, { "epoch": 2.4212149244526673, "grad_norm": 0.03434956073760986, "learning_rate": 0.01, "loss": 2.0379, "step": 23556 }, { "epoch": 2.421523280912735, "grad_norm": 0.0911344438791275, "learning_rate": 0.01, "loss": 2.0412, "step": 23559 }, { "epoch": 2.421831637372803, "grad_norm": 0.11097840219736099, "learning_rate": 0.01, "loss": 1.9977, "step": 23562 }, { "epoch": 2.4221399938328707, "grad_norm": 0.09742028266191483, "learning_rate": 0.01, "loss": 2.0188, "step": 23565 }, { "epoch": 2.422448350292939, "grad_norm": 0.04184804484248161, "learning_rate": 0.01, "loss": 2.0143, "step": 23568 }, { "epoch": 2.4227567067530065, "grad_norm": 0.04188203811645508, "learning_rate": 0.01, "loss": 2.0039, "step": 23571 }, { "epoch": 2.423065063213074, "grad_norm": 0.05326924845576286, "learning_rate": 0.01, "loss": 2.0269, "step": 23574 }, { "epoch": 2.4233734196731422, "grad_norm": 0.04598800092935562, "learning_rate": 0.01, "loss": 2.0272, "step": 23577 }, { "epoch": 2.42368177613321, "grad_norm": 0.1110750213265419, "learning_rate": 0.01, "loss": 2.0178, "step": 23580 }, { "epoch": 2.423990132593278, "grad_norm": 0.0473935566842556, "learning_rate": 0.01, "loss": 2.0289, "step": 23583 }, { "epoch": 2.4242984890533457, "grad_norm": 0.08599594235420227, "learning_rate": 0.01, "loss": 2.0039, "step": 23586 }, { "epoch": 2.4246068455134133, "grad_norm": 0.03763740509748459, "learning_rate": 0.01, "loss": 2.0332, "step": 23589 }, { "epoch": 2.4249152019734814, "grad_norm": 0.056663963943719864, "learning_rate": 0.01, "loss": 2.0284, "step": 23592 }, { "epoch": 2.425223558433549, "grad_norm": 0.09664086252450943, "learning_rate": 0.01, "loss": 2.0424, "step": 23595 }, { "epoch": 2.425531914893617, "grad_norm": 0.047283969819545746, "learning_rate": 0.01, "loss": 2.0179, "step": 23598 }, { "epoch": 2.425840271353685, "grad_norm": 0.03726861625909805, "learning_rate": 0.01, "loss": 2.0154, "step": 23601 }, { "epoch": 2.4261486278137525, "grad_norm": 0.0514906644821167, "learning_rate": 0.01, "loss": 2.0229, "step": 23604 }, { "epoch": 2.4264569842738206, "grad_norm": 0.11972562968730927, "learning_rate": 0.01, "loss": 2.07, "step": 23607 }, { "epoch": 2.4267653407338883, "grad_norm": 0.17023397982120514, "learning_rate": 0.01, "loss": 2.0478, "step": 23610 }, { "epoch": 2.4270736971939564, "grad_norm": 0.1355310082435608, "learning_rate": 0.01, "loss": 2.0356, "step": 23613 }, { "epoch": 2.427382053654024, "grad_norm": 0.039905257523059845, "learning_rate": 0.01, "loss": 2.0305, "step": 23616 }, { "epoch": 2.4276904101140917, "grad_norm": 0.0339365154504776, "learning_rate": 0.01, "loss": 2.0328, "step": 23619 }, { "epoch": 2.42799876657416, "grad_norm": 0.03365493193268776, "learning_rate": 0.01, "loss": 2.0267, "step": 23622 }, { "epoch": 2.4283071230342275, "grad_norm": 0.041596464812755585, "learning_rate": 0.01, "loss": 2.0078, "step": 23625 }, { "epoch": 2.4286154794942956, "grad_norm": 0.0537833645939827, "learning_rate": 0.01, "loss": 2.0444, "step": 23628 }, { "epoch": 2.4289238359543632, "grad_norm": 0.049658019095659256, "learning_rate": 0.01, "loss": 2.0165, "step": 23631 }, { "epoch": 2.429232192414431, "grad_norm": 0.03983505442738533, "learning_rate": 0.01, "loss": 2.0223, "step": 23634 }, { "epoch": 2.429540548874499, "grad_norm": 0.0780608057975769, "learning_rate": 0.01, "loss": 2.0011, "step": 23637 }, { "epoch": 2.4298489053345667, "grad_norm": 0.13898798823356628, "learning_rate": 0.01, "loss": 2.0424, "step": 23640 }, { "epoch": 2.4301572617946348, "grad_norm": 0.07781122624874115, "learning_rate": 0.01, "loss": 2.0499, "step": 23643 }, { "epoch": 2.4304656182547024, "grad_norm": 0.045428015291690826, "learning_rate": 0.01, "loss": 2.0188, "step": 23646 }, { "epoch": 2.43077397471477, "grad_norm": 0.03437395393848419, "learning_rate": 0.01, "loss": 2.038, "step": 23649 }, { "epoch": 2.431082331174838, "grad_norm": 0.06030704453587532, "learning_rate": 0.01, "loss": 2.0563, "step": 23652 }, { "epoch": 2.431390687634906, "grad_norm": 0.046194083988666534, "learning_rate": 0.01, "loss": 2.0324, "step": 23655 }, { "epoch": 2.431699044094974, "grad_norm": 0.10041481256484985, "learning_rate": 0.01, "loss": 2.0184, "step": 23658 }, { "epoch": 2.4320074005550416, "grad_norm": 0.07109228521585464, "learning_rate": 0.01, "loss": 2.002, "step": 23661 }, { "epoch": 2.4323157570151093, "grad_norm": 0.03319769352674484, "learning_rate": 0.01, "loss": 2.0172, "step": 23664 }, { "epoch": 2.4326241134751774, "grad_norm": 0.047379735857248306, "learning_rate": 0.01, "loss": 2.0495, "step": 23667 }, { "epoch": 2.432932469935245, "grad_norm": 0.04836783930659294, "learning_rate": 0.01, "loss": 2.045, "step": 23670 }, { "epoch": 2.433240826395313, "grad_norm": 0.07264747470617294, "learning_rate": 0.01, "loss": 2.0147, "step": 23673 }, { "epoch": 2.433549182855381, "grad_norm": 0.09331446141004562, "learning_rate": 0.01, "loss": 2.0259, "step": 23676 }, { "epoch": 2.433857539315449, "grad_norm": 0.07618245482444763, "learning_rate": 0.01, "loss": 2.0188, "step": 23679 }, { "epoch": 2.4341658957755166, "grad_norm": 0.13270626962184906, "learning_rate": 0.01, "loss": 2.05, "step": 23682 }, { "epoch": 2.4344742522355842, "grad_norm": 0.06691613793373108, "learning_rate": 0.01, "loss": 2.0353, "step": 23685 }, { "epoch": 2.4347826086956523, "grad_norm": 0.04376102611422539, "learning_rate": 0.01, "loss": 2.0213, "step": 23688 }, { "epoch": 2.43509096515572, "grad_norm": 0.04829081892967224, "learning_rate": 0.01, "loss": 2.0337, "step": 23691 }, { "epoch": 2.4353993216157876, "grad_norm": 0.04604188725352287, "learning_rate": 0.01, "loss": 2.0297, "step": 23694 }, { "epoch": 2.4357076780758558, "grad_norm": 0.06296957284212112, "learning_rate": 0.01, "loss": 2.0044, "step": 23697 }, { "epoch": 2.4360160345359234, "grad_norm": 0.07184949517250061, "learning_rate": 0.01, "loss": 2.0204, "step": 23700 }, { "epoch": 2.4363243909959915, "grad_norm": 0.11544491350650787, "learning_rate": 0.01, "loss": 2.0512, "step": 23703 }, { "epoch": 2.436632747456059, "grad_norm": 0.03796609491109848, "learning_rate": 0.01, "loss": 2.0154, "step": 23706 }, { "epoch": 2.4369411039161273, "grad_norm": 0.09068101644515991, "learning_rate": 0.01, "loss": 2.0402, "step": 23709 }, { "epoch": 2.437249460376195, "grad_norm": 0.06220867484807968, "learning_rate": 0.01, "loss": 2.0289, "step": 23712 }, { "epoch": 2.4375578168362626, "grad_norm": 0.040711212903261185, "learning_rate": 0.01, "loss": 2.0365, "step": 23715 }, { "epoch": 2.4378661732963307, "grad_norm": 0.03610675781965256, "learning_rate": 0.01, "loss": 2.023, "step": 23718 }, { "epoch": 2.4381745297563984, "grad_norm": 0.06709878146648407, "learning_rate": 0.01, "loss": 2.0125, "step": 23721 }, { "epoch": 2.438482886216466, "grad_norm": 0.10784180462360382, "learning_rate": 0.01, "loss": 2.0328, "step": 23724 }, { "epoch": 2.438791242676534, "grad_norm": 0.0911094918847084, "learning_rate": 0.01, "loss": 2.0594, "step": 23727 }, { "epoch": 2.439099599136602, "grad_norm": 0.059867773205041885, "learning_rate": 0.01, "loss": 2.0244, "step": 23730 }, { "epoch": 2.43940795559667, "grad_norm": 0.06214331462979317, "learning_rate": 0.01, "loss": 2.0698, "step": 23733 }, { "epoch": 2.4397163120567376, "grad_norm": 0.058527860790491104, "learning_rate": 0.01, "loss": 2.0562, "step": 23736 }, { "epoch": 2.4400246685168057, "grad_norm": 0.09416967630386353, "learning_rate": 0.01, "loss": 2.0385, "step": 23739 }, { "epoch": 2.4403330249768733, "grad_norm": 0.11225190758705139, "learning_rate": 0.01, "loss": 2.0534, "step": 23742 }, { "epoch": 2.440641381436941, "grad_norm": 0.056495241820812225, "learning_rate": 0.01, "loss": 2.038, "step": 23745 }, { "epoch": 2.440949737897009, "grad_norm": 0.05432302877306938, "learning_rate": 0.01, "loss": 2.01, "step": 23748 }, { "epoch": 2.4412580943570767, "grad_norm": 0.04850140959024429, "learning_rate": 0.01, "loss": 2.0303, "step": 23751 }, { "epoch": 2.4415664508171444, "grad_norm": 0.05038965865969658, "learning_rate": 0.01, "loss": 2.0226, "step": 23754 }, { "epoch": 2.4418748072772125, "grad_norm": 0.050226423889398575, "learning_rate": 0.01, "loss": 2.0238, "step": 23757 }, { "epoch": 2.44218316373728, "grad_norm": 0.05979544296860695, "learning_rate": 0.01, "loss": 2.0378, "step": 23760 }, { "epoch": 2.4424915201973483, "grad_norm": 0.04254556819796562, "learning_rate": 0.01, "loss": 2.0207, "step": 23763 }, { "epoch": 2.442799876657416, "grad_norm": 0.1078273206949234, "learning_rate": 0.01, "loss": 2.0342, "step": 23766 }, { "epoch": 2.443108233117484, "grad_norm": 0.0591372586786747, "learning_rate": 0.01, "loss": 2.0186, "step": 23769 }, { "epoch": 2.4434165895775517, "grad_norm": 0.05430880934000015, "learning_rate": 0.01, "loss": 2.0232, "step": 23772 }, { "epoch": 2.4437249460376194, "grad_norm": 0.07246505469083786, "learning_rate": 0.01, "loss": 2.0372, "step": 23775 }, { "epoch": 2.4440333024976875, "grad_norm": 0.04457786679267883, "learning_rate": 0.01, "loss": 2.0157, "step": 23778 }, { "epoch": 2.444341658957755, "grad_norm": 0.03915979340672493, "learning_rate": 0.01, "loss": 2.0173, "step": 23781 }, { "epoch": 2.444650015417823, "grad_norm": 0.08284246176481247, "learning_rate": 0.01, "loss": 2.0433, "step": 23784 }, { "epoch": 2.444958371877891, "grad_norm": 0.08254294097423553, "learning_rate": 0.01, "loss": 2.012, "step": 23787 }, { "epoch": 2.4452667283379586, "grad_norm": 0.12263736873865128, "learning_rate": 0.01, "loss": 2.0491, "step": 23790 }, { "epoch": 2.4455750847980267, "grad_norm": 0.08192913979291916, "learning_rate": 0.01, "loss": 2.047, "step": 23793 }, { "epoch": 2.4458834412580943, "grad_norm": 0.09507100284099579, "learning_rate": 0.01, "loss": 2.0357, "step": 23796 }, { "epoch": 2.4461917977181624, "grad_norm": 0.10554829984903336, "learning_rate": 0.01, "loss": 2.024, "step": 23799 }, { "epoch": 2.44650015417823, "grad_norm": 0.051542408764362335, "learning_rate": 0.01, "loss": 2.0304, "step": 23802 }, { "epoch": 2.4468085106382977, "grad_norm": 0.05467440187931061, "learning_rate": 0.01, "loss": 2.043, "step": 23805 }, { "epoch": 2.447116867098366, "grad_norm": 0.033462487161159515, "learning_rate": 0.01, "loss": 2.0267, "step": 23808 }, { "epoch": 2.4474252235584335, "grad_norm": 0.04688438028097153, "learning_rate": 0.01, "loss": 2.032, "step": 23811 }, { "epoch": 2.447733580018501, "grad_norm": 0.04699881002306938, "learning_rate": 0.01, "loss": 2.0392, "step": 23814 }, { "epoch": 2.4480419364785693, "grad_norm": 0.03932753950357437, "learning_rate": 0.01, "loss": 2.0193, "step": 23817 }, { "epoch": 2.448350292938637, "grad_norm": 0.11096165329217911, "learning_rate": 0.01, "loss": 2.0124, "step": 23820 }, { "epoch": 2.448658649398705, "grad_norm": 0.03774998337030411, "learning_rate": 0.01, "loss": 2.0192, "step": 23823 }, { "epoch": 2.4489670058587727, "grad_norm": 0.03317665681242943, "learning_rate": 0.01, "loss": 2.0218, "step": 23826 }, { "epoch": 2.449275362318841, "grad_norm": 0.06077956408262253, "learning_rate": 0.01, "loss": 2.0397, "step": 23829 }, { "epoch": 2.4495837187789085, "grad_norm": 0.08997214585542679, "learning_rate": 0.01, "loss": 2.0451, "step": 23832 }, { "epoch": 2.449892075238976, "grad_norm": 0.12234194576740265, "learning_rate": 0.01, "loss": 2.0144, "step": 23835 }, { "epoch": 2.4502004316990442, "grad_norm": 0.13833922147750854, "learning_rate": 0.01, "loss": 2.0348, "step": 23838 }, { "epoch": 2.450508788159112, "grad_norm": 0.06707292795181274, "learning_rate": 0.01, "loss": 2.0276, "step": 23841 }, { "epoch": 2.4508171446191795, "grad_norm": 0.04679076373577118, "learning_rate": 0.01, "loss": 2.0624, "step": 23844 }, { "epoch": 2.4511255010792476, "grad_norm": 0.08921289443969727, "learning_rate": 0.01, "loss": 2.0188, "step": 23847 }, { "epoch": 2.4514338575393153, "grad_norm": 0.056680746376514435, "learning_rate": 0.01, "loss": 2.0299, "step": 23850 }, { "epoch": 2.4517422139993834, "grad_norm": 0.07349438965320587, "learning_rate": 0.01, "loss": 2.0249, "step": 23853 }, { "epoch": 2.452050570459451, "grad_norm": 0.057414181530475616, "learning_rate": 0.01, "loss": 2.0054, "step": 23856 }, { "epoch": 2.452358926919519, "grad_norm": 0.09144090861082077, "learning_rate": 0.01, "loss": 2.0264, "step": 23859 }, { "epoch": 2.452667283379587, "grad_norm": 0.05499831214547157, "learning_rate": 0.01, "loss": 2.035, "step": 23862 }, { "epoch": 2.4529756398396545, "grad_norm": 0.03803296014666557, "learning_rate": 0.01, "loss": 1.9935, "step": 23865 }, { "epoch": 2.4532839962997226, "grad_norm": 0.11475666612386703, "learning_rate": 0.01, "loss": 2.0491, "step": 23868 }, { "epoch": 2.4535923527597903, "grad_norm": 0.13063554465770721, "learning_rate": 0.01, "loss": 1.9912, "step": 23871 }, { "epoch": 2.453900709219858, "grad_norm": 0.04982873052358627, "learning_rate": 0.01, "loss": 2.0718, "step": 23874 }, { "epoch": 2.454209065679926, "grad_norm": 0.04090685769915581, "learning_rate": 0.01, "loss": 2.0095, "step": 23877 }, { "epoch": 2.4545174221399937, "grad_norm": 0.044233810156583786, "learning_rate": 0.01, "loss": 2.0235, "step": 23880 }, { "epoch": 2.454825778600062, "grad_norm": 0.045451819896698, "learning_rate": 0.01, "loss": 2.0135, "step": 23883 }, { "epoch": 2.4551341350601295, "grad_norm": 0.04413032531738281, "learning_rate": 0.01, "loss": 2.0371, "step": 23886 }, { "epoch": 2.4554424915201976, "grad_norm": 0.07937499135732651, "learning_rate": 0.01, "loss": 2.0375, "step": 23889 }, { "epoch": 2.455750847980265, "grad_norm": 0.09389673918485641, "learning_rate": 0.01, "loss": 2.0265, "step": 23892 }, { "epoch": 2.456059204440333, "grad_norm": 0.061937954276800156, "learning_rate": 0.01, "loss": 2.0335, "step": 23895 }, { "epoch": 2.456367560900401, "grad_norm": 0.038002993911504745, "learning_rate": 0.01, "loss": 1.9868, "step": 23898 }, { "epoch": 2.4566759173604686, "grad_norm": 0.05679142847657204, "learning_rate": 0.01, "loss": 2.0346, "step": 23901 }, { "epoch": 2.4569842738205363, "grad_norm": 0.07343320548534393, "learning_rate": 0.01, "loss": 2.0105, "step": 23904 }, { "epoch": 2.4572926302806044, "grad_norm": 0.04992273077368736, "learning_rate": 0.01, "loss": 2.0327, "step": 23907 }, { "epoch": 2.457600986740672, "grad_norm": 0.07384973764419556, "learning_rate": 0.01, "loss": 2.0337, "step": 23910 }, { "epoch": 2.45790934320074, "grad_norm": 0.05728161707520485, "learning_rate": 0.01, "loss": 2.0036, "step": 23913 }, { "epoch": 2.458217699660808, "grad_norm": 0.06384929269552231, "learning_rate": 0.01, "loss": 2.0471, "step": 23916 }, { "epoch": 2.458526056120876, "grad_norm": 0.07515307515859604, "learning_rate": 0.01, "loss": 2.0485, "step": 23919 }, { "epoch": 2.4588344125809436, "grad_norm": 0.09443585574626923, "learning_rate": 0.01, "loss": 2.0183, "step": 23922 }, { "epoch": 2.4591427690410113, "grad_norm": 0.059930965304374695, "learning_rate": 0.01, "loss": 1.9955, "step": 23925 }, { "epoch": 2.4594511255010794, "grad_norm": 0.09930091351270676, "learning_rate": 0.01, "loss": 2.0269, "step": 23928 }, { "epoch": 2.459759481961147, "grad_norm": 0.03487955033779144, "learning_rate": 0.01, "loss": 2.003, "step": 23931 }, { "epoch": 2.4600678384212147, "grad_norm": 0.08312834799289703, "learning_rate": 0.01, "loss": 2.0118, "step": 23934 }, { "epoch": 2.460376194881283, "grad_norm": 0.06459874659776688, "learning_rate": 0.01, "loss": 2.0394, "step": 23937 }, { "epoch": 2.4606845513413504, "grad_norm": 0.10331536084413528, "learning_rate": 0.01, "loss": 2.0428, "step": 23940 }, { "epoch": 2.4609929078014185, "grad_norm": 0.06738618016242981, "learning_rate": 0.01, "loss": 2.0409, "step": 23943 }, { "epoch": 2.461301264261486, "grad_norm": 0.05211193859577179, "learning_rate": 0.01, "loss": 2.0296, "step": 23946 }, { "epoch": 2.4616096207215543, "grad_norm": 0.05060914158821106, "learning_rate": 0.01, "loss": 2.0518, "step": 23949 }, { "epoch": 2.461917977181622, "grad_norm": 0.037570733577013016, "learning_rate": 0.01, "loss": 1.9946, "step": 23952 }, { "epoch": 2.4622263336416896, "grad_norm": 0.06046308949589729, "learning_rate": 0.01, "loss": 2.0154, "step": 23955 }, { "epoch": 2.4625346901017577, "grad_norm": 0.08765476942062378, "learning_rate": 0.01, "loss": 2.0433, "step": 23958 }, { "epoch": 2.4628430465618254, "grad_norm": 0.07397017627954483, "learning_rate": 0.01, "loss": 2.0187, "step": 23961 }, { "epoch": 2.463151403021893, "grad_norm": 0.10959914326667786, "learning_rate": 0.01, "loss": 2.0076, "step": 23964 }, { "epoch": 2.463459759481961, "grad_norm": 0.04373926669359207, "learning_rate": 0.01, "loss": 2.0219, "step": 23967 }, { "epoch": 2.463768115942029, "grad_norm": 0.049903422594070435, "learning_rate": 0.01, "loss": 2.0046, "step": 23970 }, { "epoch": 2.464076472402097, "grad_norm": 0.037193864583969116, "learning_rate": 0.01, "loss": 2.012, "step": 23973 }, { "epoch": 2.4643848288621646, "grad_norm": 0.06762266159057617, "learning_rate": 0.01, "loss": 2.0179, "step": 23976 }, { "epoch": 2.4646931853222327, "grad_norm": 0.03918517008423805, "learning_rate": 0.01, "loss": 2.002, "step": 23979 }, { "epoch": 2.4650015417823004, "grad_norm": 0.09819602221250534, "learning_rate": 0.01, "loss": 2.0208, "step": 23982 }, { "epoch": 2.465309898242368, "grad_norm": 0.12719838321208954, "learning_rate": 0.01, "loss": 2.0155, "step": 23985 }, { "epoch": 2.465618254702436, "grad_norm": 0.05890420451760292, "learning_rate": 0.01, "loss": 2.0459, "step": 23988 }, { "epoch": 2.465926611162504, "grad_norm": 0.05101997032761574, "learning_rate": 0.01, "loss": 2.0266, "step": 23991 }, { "epoch": 2.466234967622572, "grad_norm": 0.04597810283303261, "learning_rate": 0.01, "loss": 2.0348, "step": 23994 }, { "epoch": 2.4665433240826395, "grad_norm": 0.0504877045750618, "learning_rate": 0.01, "loss": 2.0267, "step": 23997 }, { "epoch": 2.466851680542707, "grad_norm": 0.07908850163221359, "learning_rate": 0.01, "loss": 2.0215, "step": 24000 }, { "epoch": 2.4671600370027753, "grad_norm": 0.07396575808525085, "learning_rate": 0.01, "loss": 2.0287, "step": 24003 }, { "epoch": 2.467468393462843, "grad_norm": 0.06695767492055893, "learning_rate": 0.01, "loss": 2.0061, "step": 24006 }, { "epoch": 2.467776749922911, "grad_norm": 0.0907633900642395, "learning_rate": 0.01, "loss": 2.0261, "step": 24009 }, { "epoch": 2.4680851063829787, "grad_norm": 0.07585210353136063, "learning_rate": 0.01, "loss": 2.041, "step": 24012 }, { "epoch": 2.4683934628430464, "grad_norm": 0.049894414842128754, "learning_rate": 0.01, "loss": 2.0352, "step": 24015 }, { "epoch": 2.4687018193031145, "grad_norm": 0.038147564977407455, "learning_rate": 0.01, "loss": 2.043, "step": 24018 }, { "epoch": 2.469010175763182, "grad_norm": 0.03651060909032822, "learning_rate": 0.01, "loss": 2.0208, "step": 24021 }, { "epoch": 2.4693185322232503, "grad_norm": 0.09396708011627197, "learning_rate": 0.01, "loss": 2.0419, "step": 24024 }, { "epoch": 2.469626888683318, "grad_norm": 0.03474831208586693, "learning_rate": 0.01, "loss": 2.0217, "step": 24027 }, { "epoch": 2.4699352451433856, "grad_norm": 0.09970984607934952, "learning_rate": 0.01, "loss": 2.0348, "step": 24030 }, { "epoch": 2.4702436016034537, "grad_norm": 0.10633893311023712, "learning_rate": 0.01, "loss": 2.019, "step": 24033 }, { "epoch": 2.4705519580635213, "grad_norm": 0.0787937119603157, "learning_rate": 0.01, "loss": 2.0109, "step": 24036 }, { "epoch": 2.4708603145235895, "grad_norm": 0.05214501917362213, "learning_rate": 0.01, "loss": 2.0023, "step": 24039 }, { "epoch": 2.471168670983657, "grad_norm": 0.02967134490609169, "learning_rate": 0.01, "loss": 2.0043, "step": 24042 }, { "epoch": 2.4714770274437248, "grad_norm": 0.03181852400302887, "learning_rate": 0.01, "loss": 2.0164, "step": 24045 }, { "epoch": 2.471785383903793, "grad_norm": 0.03905104100704193, "learning_rate": 0.01, "loss": 2.0194, "step": 24048 }, { "epoch": 2.4720937403638605, "grad_norm": 0.051532018929719925, "learning_rate": 0.01, "loss": 2.0045, "step": 24051 }, { "epoch": 2.4724020968239286, "grad_norm": 0.13418106734752655, "learning_rate": 0.01, "loss": 2.0486, "step": 24054 }, { "epoch": 2.4727104532839963, "grad_norm": 0.0745147094130516, "learning_rate": 0.01, "loss": 2.0448, "step": 24057 }, { "epoch": 2.473018809744064, "grad_norm": 0.06666639447212219, "learning_rate": 0.01, "loss": 2.0274, "step": 24060 }, { "epoch": 2.473327166204132, "grad_norm": 0.08356054127216339, "learning_rate": 0.01, "loss": 2.028, "step": 24063 }, { "epoch": 2.4736355226641997, "grad_norm": 0.04733874648809433, "learning_rate": 0.01, "loss": 2.0294, "step": 24066 }, { "epoch": 2.473943879124268, "grad_norm": 0.07551899552345276, "learning_rate": 0.01, "loss": 2.0389, "step": 24069 }, { "epoch": 2.4742522355843355, "grad_norm": 0.03114013373851776, "learning_rate": 0.01, "loss": 2.0454, "step": 24072 }, { "epoch": 2.474560592044403, "grad_norm": 0.062112826853990555, "learning_rate": 0.01, "loss": 2.048, "step": 24075 }, { "epoch": 2.4748689485044713, "grad_norm": 0.0542120561003685, "learning_rate": 0.01, "loss": 2.0198, "step": 24078 }, { "epoch": 2.475177304964539, "grad_norm": 0.06153399124741554, "learning_rate": 0.01, "loss": 2.0264, "step": 24081 }, { "epoch": 2.475485661424607, "grad_norm": 0.06306985020637512, "learning_rate": 0.01, "loss": 2.0425, "step": 24084 }, { "epoch": 2.4757940178846747, "grad_norm": 0.03326687961816788, "learning_rate": 0.01, "loss": 2.0211, "step": 24087 }, { "epoch": 2.4761023743447423, "grad_norm": 0.10279777646064758, "learning_rate": 0.01, "loss": 2.0483, "step": 24090 }, { "epoch": 2.4764107308048104, "grad_norm": 0.045618560165166855, "learning_rate": 0.01, "loss": 2.0476, "step": 24093 }, { "epoch": 2.476719087264878, "grad_norm": 0.05065792426466942, "learning_rate": 0.01, "loss": 2.0261, "step": 24096 }, { "epoch": 2.477027443724946, "grad_norm": 0.04114675521850586, "learning_rate": 0.01, "loss": 2.0324, "step": 24099 }, { "epoch": 2.477335800185014, "grad_norm": 0.11044265329837799, "learning_rate": 0.01, "loss": 2.0379, "step": 24102 }, { "epoch": 2.4776441566450815, "grad_norm": 0.053481362760066986, "learning_rate": 0.01, "loss": 2.0408, "step": 24105 }, { "epoch": 2.4779525131051496, "grad_norm": 0.14947016537189484, "learning_rate": 0.01, "loss": 2.0511, "step": 24108 }, { "epoch": 2.4782608695652173, "grad_norm": 0.08009488135576248, "learning_rate": 0.01, "loss": 2.0383, "step": 24111 }, { "epoch": 2.4785692260252854, "grad_norm": 0.06397935748100281, "learning_rate": 0.01, "loss": 2.0445, "step": 24114 }, { "epoch": 2.478877582485353, "grad_norm": 0.0409528985619545, "learning_rate": 0.01, "loss": 2.0386, "step": 24117 }, { "epoch": 2.4791859389454207, "grad_norm": 0.11498477309942245, "learning_rate": 0.01, "loss": 1.9942, "step": 24120 }, { "epoch": 2.479494295405489, "grad_norm": 0.02937469258904457, "learning_rate": 0.01, "loss": 2.0329, "step": 24123 }, { "epoch": 2.4798026518655565, "grad_norm": 0.03659180551767349, "learning_rate": 0.01, "loss": 2.0638, "step": 24126 }, { "epoch": 2.4801110083256246, "grad_norm": 0.10866084694862366, "learning_rate": 0.01, "loss": 2.0319, "step": 24129 }, { "epoch": 2.4804193647856922, "grad_norm": 0.07002250105142593, "learning_rate": 0.01, "loss": 2.03, "step": 24132 }, { "epoch": 2.48072772124576, "grad_norm": 0.07129926234483719, "learning_rate": 0.01, "loss": 2.0407, "step": 24135 }, { "epoch": 2.481036077705828, "grad_norm": 0.061107151210308075, "learning_rate": 0.01, "loss": 2.0288, "step": 24138 }, { "epoch": 2.4813444341658957, "grad_norm": 0.04937992990016937, "learning_rate": 0.01, "loss": 2.0492, "step": 24141 }, { "epoch": 2.481652790625964, "grad_norm": 0.04651058465242386, "learning_rate": 0.01, "loss": 2.0474, "step": 24144 }, { "epoch": 2.4819611470860314, "grad_norm": 0.06658688187599182, "learning_rate": 0.01, "loss": 2.0429, "step": 24147 }, { "epoch": 2.482269503546099, "grad_norm": 0.05241367965936661, "learning_rate": 0.01, "loss": 2.0193, "step": 24150 }, { "epoch": 2.482577860006167, "grad_norm": 0.03564739227294922, "learning_rate": 0.01, "loss": 2.0092, "step": 24153 }, { "epoch": 2.482886216466235, "grad_norm": 0.03984629735350609, "learning_rate": 0.01, "loss": 2.0302, "step": 24156 }, { "epoch": 2.483194572926303, "grad_norm": 0.10245262831449509, "learning_rate": 0.01, "loss": 2.0327, "step": 24159 }, { "epoch": 2.4835029293863706, "grad_norm": 0.0727042481303215, "learning_rate": 0.01, "loss": 2.0295, "step": 24162 }, { "epoch": 2.4838112858464383, "grad_norm": 0.041190728545188904, "learning_rate": 0.01, "loss": 1.9987, "step": 24165 }, { "epoch": 2.4841196423065064, "grad_norm": 0.04986109584569931, "learning_rate": 0.01, "loss": 2.0227, "step": 24168 }, { "epoch": 2.484427998766574, "grad_norm": 0.0689210444688797, "learning_rate": 0.01, "loss": 2.0262, "step": 24171 }, { "epoch": 2.484736355226642, "grad_norm": 0.043997250497341156, "learning_rate": 0.01, "loss": 2.007, "step": 24174 }, { "epoch": 2.48504471168671, "grad_norm": 0.06231982633471489, "learning_rate": 0.01, "loss": 2.0095, "step": 24177 }, { "epoch": 2.485353068146778, "grad_norm": 0.09958053380250931, "learning_rate": 0.01, "loss": 2.022, "step": 24180 }, { "epoch": 2.4856614246068456, "grad_norm": 0.05297970771789551, "learning_rate": 0.01, "loss": 2.0416, "step": 24183 }, { "epoch": 2.4859697810669132, "grad_norm": 0.07420172542333603, "learning_rate": 0.01, "loss": 1.988, "step": 24186 }, { "epoch": 2.4862781375269813, "grad_norm": 0.049017585813999176, "learning_rate": 0.01, "loss": 2.0195, "step": 24189 }, { "epoch": 2.486586493987049, "grad_norm": 0.05398377403616905, "learning_rate": 0.01, "loss": 2.0161, "step": 24192 }, { "epoch": 2.4868948504471167, "grad_norm": 0.03338189795613289, "learning_rate": 0.01, "loss": 2.0073, "step": 24195 }, { "epoch": 2.4872032069071848, "grad_norm": 0.1013825535774231, "learning_rate": 0.01, "loss": 2.0372, "step": 24198 }, { "epoch": 2.4875115633672524, "grad_norm": 0.06101495400071144, "learning_rate": 0.01, "loss": 2.0313, "step": 24201 }, { "epoch": 2.4878199198273205, "grad_norm": 0.06915189325809479, "learning_rate": 0.01, "loss": 1.9753, "step": 24204 }, { "epoch": 2.488128276287388, "grad_norm": 0.09961054474115372, "learning_rate": 0.01, "loss": 2.0183, "step": 24207 }, { "epoch": 2.4884366327474563, "grad_norm": 0.039923045784235, "learning_rate": 0.01, "loss": 2.0256, "step": 24210 }, { "epoch": 2.488744989207524, "grad_norm": 0.07982566952705383, "learning_rate": 0.01, "loss": 2.0225, "step": 24213 }, { "epoch": 2.4890533456675916, "grad_norm": 0.06360599398612976, "learning_rate": 0.01, "loss": 2.0126, "step": 24216 }, { "epoch": 2.4893617021276597, "grad_norm": 0.06489767879247665, "learning_rate": 0.01, "loss": 2.0615, "step": 24219 }, { "epoch": 2.4896700585877274, "grad_norm": 0.08873300999403, "learning_rate": 0.01, "loss": 2.0144, "step": 24222 }, { "epoch": 2.489978415047795, "grad_norm": 0.04309386387467384, "learning_rate": 0.01, "loss": 2.0478, "step": 24225 }, { "epoch": 2.490286771507863, "grad_norm": 0.042991675436496735, "learning_rate": 0.01, "loss": 2.0046, "step": 24228 }, { "epoch": 2.490595127967931, "grad_norm": 0.04507692903280258, "learning_rate": 0.01, "loss": 2.0417, "step": 24231 }, { "epoch": 2.490903484427999, "grad_norm": 0.09288784116506577, "learning_rate": 0.01, "loss": 2.0181, "step": 24234 }, { "epoch": 2.4912118408880666, "grad_norm": 0.040776100009679794, "learning_rate": 0.01, "loss": 2.0524, "step": 24237 }, { "epoch": 2.4915201973481347, "grad_norm": 0.09847230464220047, "learning_rate": 0.01, "loss": 2.0298, "step": 24240 }, { "epoch": 2.4918285538082023, "grad_norm": 0.05252716690301895, "learning_rate": 0.01, "loss": 2.0288, "step": 24243 }, { "epoch": 2.49213691026827, "grad_norm": 0.04028663411736488, "learning_rate": 0.01, "loss": 2.0351, "step": 24246 }, { "epoch": 2.492445266728338, "grad_norm": 0.04721330851316452, "learning_rate": 0.01, "loss": 2.045, "step": 24249 }, { "epoch": 2.4927536231884058, "grad_norm": 0.0453517884016037, "learning_rate": 0.01, "loss": 2.0264, "step": 24252 }, { "epoch": 2.4930619796484734, "grad_norm": 0.09120344370603561, "learning_rate": 0.01, "loss": 2.0189, "step": 24255 }, { "epoch": 2.4933703361085415, "grad_norm": 0.1132507398724556, "learning_rate": 0.01, "loss": 2.0109, "step": 24258 }, { "epoch": 2.493678692568609, "grad_norm": 0.07698217034339905, "learning_rate": 0.01, "loss": 2.0391, "step": 24261 }, { "epoch": 2.4939870490286773, "grad_norm": 0.04104442894458771, "learning_rate": 0.01, "loss": 2.0357, "step": 24264 }, { "epoch": 2.494295405488745, "grad_norm": 0.04681272804737091, "learning_rate": 0.01, "loss": 2.0142, "step": 24267 }, { "epoch": 2.494603761948813, "grad_norm": 0.05137484520673752, "learning_rate": 0.01, "loss": 2.0616, "step": 24270 }, { "epoch": 2.4949121184088807, "grad_norm": 0.05654723569750786, "learning_rate": 0.01, "loss": 2.0371, "step": 24273 }, { "epoch": 2.4952204748689484, "grad_norm": 0.052294518798589706, "learning_rate": 0.01, "loss": 2.0163, "step": 24276 }, { "epoch": 2.4955288313290165, "grad_norm": 0.06906304508447647, "learning_rate": 0.01, "loss": 2.0049, "step": 24279 }, { "epoch": 2.495837187789084, "grad_norm": 0.0664992555975914, "learning_rate": 0.01, "loss": 2.037, "step": 24282 }, { "epoch": 2.496145544249152, "grad_norm": 0.06502712517976761, "learning_rate": 0.01, "loss": 2.0439, "step": 24285 }, { "epoch": 2.49645390070922, "grad_norm": 0.047290291637182236, "learning_rate": 0.01, "loss": 2.0267, "step": 24288 }, { "epoch": 2.4967622571692876, "grad_norm": 0.07271420210599899, "learning_rate": 0.01, "loss": 2.0325, "step": 24291 }, { "epoch": 2.4970706136293557, "grad_norm": 0.042307768017053604, "learning_rate": 0.01, "loss": 2.0223, "step": 24294 }, { "epoch": 2.4973789700894233, "grad_norm": 0.04834264889359474, "learning_rate": 0.01, "loss": 2.0278, "step": 24297 }, { "epoch": 2.4976873265494914, "grad_norm": 0.045435305684804916, "learning_rate": 0.01, "loss": 1.9997, "step": 24300 }, { "epoch": 2.497995683009559, "grad_norm": 0.07511632144451141, "learning_rate": 0.01, "loss": 2.025, "step": 24303 }, { "epoch": 2.4983040394696268, "grad_norm": 0.14621250331401825, "learning_rate": 0.01, "loss": 2.0401, "step": 24306 }, { "epoch": 2.498612395929695, "grad_norm": 0.05969877541065216, "learning_rate": 0.01, "loss": 2.0309, "step": 24309 }, { "epoch": 2.4989207523897625, "grad_norm": 0.040014345198869705, "learning_rate": 0.01, "loss": 2.0301, "step": 24312 }, { "epoch": 2.49922910884983, "grad_norm": 0.05185026675462723, "learning_rate": 0.01, "loss": 2.0149, "step": 24315 }, { "epoch": 2.4995374653098983, "grad_norm": 0.04459863528609276, "learning_rate": 0.01, "loss": 2.03, "step": 24318 }, { "epoch": 2.499845821769966, "grad_norm": 0.06208227947354317, "learning_rate": 0.01, "loss": 2.0253, "step": 24321 }, { "epoch": 2.500154178230034, "grad_norm": 0.0788293108344078, "learning_rate": 0.01, "loss": 2.0154, "step": 24324 }, { "epoch": 2.5004625346901017, "grad_norm": 0.0636831745505333, "learning_rate": 0.01, "loss": 2.0522, "step": 24327 }, { "epoch": 2.50077089115017, "grad_norm": 0.05828903615474701, "learning_rate": 0.01, "loss": 2.0093, "step": 24330 }, { "epoch": 2.5010792476102375, "grad_norm": 0.06897569447755814, "learning_rate": 0.01, "loss": 1.9997, "step": 24333 }, { "epoch": 2.501387604070305, "grad_norm": 0.03793172910809517, "learning_rate": 0.01, "loss": 2.0209, "step": 24336 }, { "epoch": 2.5016959605303732, "grad_norm": 0.05384537950158119, "learning_rate": 0.01, "loss": 2.0355, "step": 24339 }, { "epoch": 2.502004316990441, "grad_norm": 0.07979964464902878, "learning_rate": 0.01, "loss": 2.023, "step": 24342 }, { "epoch": 2.5023126734505086, "grad_norm": 0.054392259567976, "learning_rate": 0.01, "loss": 2.0115, "step": 24345 }, { "epoch": 2.5026210299105767, "grad_norm": 0.06897418200969696, "learning_rate": 0.01, "loss": 2.0147, "step": 24348 }, { "epoch": 2.5029293863706443, "grad_norm": 0.0801873579621315, "learning_rate": 0.01, "loss": 2.0397, "step": 24351 }, { "epoch": 2.5032377428307124, "grad_norm": 0.03708551451563835, "learning_rate": 0.01, "loss": 2.0076, "step": 24354 }, { "epoch": 2.50354609929078, "grad_norm": 0.09656143933534622, "learning_rate": 0.01, "loss": 2.0463, "step": 24357 }, { "epoch": 2.503854455750848, "grad_norm": 0.0770551860332489, "learning_rate": 0.01, "loss": 2.0355, "step": 24360 }, { "epoch": 2.504162812210916, "grad_norm": 0.08103878796100616, "learning_rate": 0.01, "loss": 2.0262, "step": 24363 }, { "epoch": 2.5044711686709835, "grad_norm": 0.10928300768136978, "learning_rate": 0.01, "loss": 2.0389, "step": 24366 }, { "epoch": 2.5047795251310516, "grad_norm": 0.07541976869106293, "learning_rate": 0.01, "loss": 2.0242, "step": 24369 }, { "epoch": 2.5050878815911193, "grad_norm": 0.05937611311674118, "learning_rate": 0.01, "loss": 2.0395, "step": 24372 }, { "epoch": 2.505396238051187, "grad_norm": 0.05396249517798424, "learning_rate": 0.01, "loss": 2.0164, "step": 24375 }, { "epoch": 2.505704594511255, "grad_norm": 0.04004419967532158, "learning_rate": 0.01, "loss": 2.0174, "step": 24378 }, { "epoch": 2.5060129509713227, "grad_norm": 0.05242707580327988, "learning_rate": 0.01, "loss": 2.0119, "step": 24381 }, { "epoch": 2.506321307431391, "grad_norm": 0.038752540946006775, "learning_rate": 0.01, "loss": 2.0181, "step": 24384 }, { "epoch": 2.5066296638914585, "grad_norm": 0.07296596467494965, "learning_rate": 0.01, "loss": 2.0027, "step": 24387 }, { "epoch": 2.5069380203515266, "grad_norm": 0.116209976375103, "learning_rate": 0.01, "loss": 2.0205, "step": 24390 }, { "epoch": 2.5072463768115942, "grad_norm": 0.09165041148662567, "learning_rate": 0.01, "loss": 2.0271, "step": 24393 }, { "epoch": 2.507554733271662, "grad_norm": 0.14264173805713654, "learning_rate": 0.01, "loss": 2.0217, "step": 24396 }, { "epoch": 2.50786308973173, "grad_norm": 0.045917656272649765, "learning_rate": 0.01, "loss": 2.0018, "step": 24399 }, { "epoch": 2.5081714461917977, "grad_norm": 0.057148344814777374, "learning_rate": 0.01, "loss": 2.0295, "step": 24402 }, { "epoch": 2.5084798026518653, "grad_norm": 0.03626836836338043, "learning_rate": 0.01, "loss": 2.0197, "step": 24405 }, { "epoch": 2.5087881591119334, "grad_norm": 0.03621996194124222, "learning_rate": 0.01, "loss": 2.0307, "step": 24408 }, { "epoch": 2.509096515572001, "grad_norm": 0.05835467949509621, "learning_rate": 0.01, "loss": 2.0506, "step": 24411 }, { "epoch": 2.509404872032069, "grad_norm": 0.03973361849784851, "learning_rate": 0.01, "loss": 2.0076, "step": 24414 }, { "epoch": 2.509713228492137, "grad_norm": 0.09463023394346237, "learning_rate": 0.01, "loss": 2.0254, "step": 24417 }, { "epoch": 2.510021584952205, "grad_norm": 0.07552462071180344, "learning_rate": 0.01, "loss": 2.0182, "step": 24420 }, { "epoch": 2.5103299414122726, "grad_norm": 0.1288609802722931, "learning_rate": 0.01, "loss": 2.0337, "step": 24423 }, { "epoch": 2.5106382978723403, "grad_norm": 0.06943691521883011, "learning_rate": 0.01, "loss": 2.0162, "step": 24426 }, { "epoch": 2.5109466543324084, "grad_norm": 0.08581320196390152, "learning_rate": 0.01, "loss": 2.0292, "step": 24429 }, { "epoch": 2.511255010792476, "grad_norm": 0.07379914820194244, "learning_rate": 0.01, "loss": 2.0328, "step": 24432 }, { "epoch": 2.5115633672525437, "grad_norm": 0.09235703945159912, "learning_rate": 0.01, "loss": 2.0194, "step": 24435 }, { "epoch": 2.511871723712612, "grad_norm": 0.05038774758577347, "learning_rate": 0.01, "loss": 2.0249, "step": 24438 }, { "epoch": 2.5121800801726795, "grad_norm": 0.08711019903421402, "learning_rate": 0.01, "loss": 2.0276, "step": 24441 }, { "epoch": 2.5124884366327476, "grad_norm": 0.05432825908064842, "learning_rate": 0.01, "loss": 2.0251, "step": 24444 }, { "epoch": 2.5127967930928152, "grad_norm": 0.10115527361631393, "learning_rate": 0.01, "loss": 2.016, "step": 24447 }, { "epoch": 2.5131051495528833, "grad_norm": 0.10387013852596283, "learning_rate": 0.01, "loss": 2.0198, "step": 24450 }, { "epoch": 2.513413506012951, "grad_norm": 0.0740542933344841, "learning_rate": 0.01, "loss": 2.0179, "step": 24453 }, { "epoch": 2.5137218624730187, "grad_norm": 0.07834311574697495, "learning_rate": 0.01, "loss": 2.0731, "step": 24456 }, { "epoch": 2.5140302189330868, "grad_norm": 0.06743736565113068, "learning_rate": 0.01, "loss": 2.0588, "step": 24459 }, { "epoch": 2.5143385753931544, "grad_norm": 0.051791246980428696, "learning_rate": 0.01, "loss": 2.0063, "step": 24462 }, { "epoch": 2.514646931853222, "grad_norm": 0.0731598362326622, "learning_rate": 0.01, "loss": 2.0271, "step": 24465 }, { "epoch": 2.51495528831329, "grad_norm": 0.04995987191796303, "learning_rate": 0.01, "loss": 2.0224, "step": 24468 }, { "epoch": 2.515263644773358, "grad_norm": 0.04953973367810249, "learning_rate": 0.01, "loss": 2.0176, "step": 24471 }, { "epoch": 2.515572001233426, "grad_norm": 0.05432116240262985, "learning_rate": 0.01, "loss": 2.0349, "step": 24474 }, { "epoch": 2.5158803576934936, "grad_norm": 0.048791225999593735, "learning_rate": 0.01, "loss": 2.0296, "step": 24477 }, { "epoch": 2.5161887141535617, "grad_norm": 0.11742904037237167, "learning_rate": 0.01, "loss": 2.04, "step": 24480 }, { "epoch": 2.5164970706136294, "grad_norm": 0.12617075443267822, "learning_rate": 0.01, "loss": 2.0214, "step": 24483 }, { "epoch": 2.516805427073697, "grad_norm": 0.051573995500802994, "learning_rate": 0.01, "loss": 2.0662, "step": 24486 }, { "epoch": 2.517113783533765, "grad_norm": 0.09131506085395813, "learning_rate": 0.01, "loss": 2.0487, "step": 24489 }, { "epoch": 2.517422139993833, "grad_norm": 0.06593006104230881, "learning_rate": 0.01, "loss": 2.0369, "step": 24492 }, { "epoch": 2.5177304964539005, "grad_norm": 0.038310687988996506, "learning_rate": 0.01, "loss": 1.9954, "step": 24495 }, { "epoch": 2.5180388529139686, "grad_norm": 0.05975675210356712, "learning_rate": 0.01, "loss": 2.028, "step": 24498 }, { "epoch": 2.518347209374036, "grad_norm": 0.04541294649243355, "learning_rate": 0.01, "loss": 2.0285, "step": 24501 }, { "epoch": 2.5186555658341043, "grad_norm": 0.053723473101854324, "learning_rate": 0.01, "loss": 2.0029, "step": 24504 }, { "epoch": 2.518963922294172, "grad_norm": 0.030095964670181274, "learning_rate": 0.01, "loss": 2.0401, "step": 24507 }, { "epoch": 2.51927227875424, "grad_norm": 0.10244923830032349, "learning_rate": 0.01, "loss": 2.0348, "step": 24510 }, { "epoch": 2.5195806352143078, "grad_norm": 0.06249944120645523, "learning_rate": 0.01, "loss": 2.0449, "step": 24513 }, { "epoch": 2.5198889916743754, "grad_norm": 0.08720767498016357, "learning_rate": 0.01, "loss": 2.036, "step": 24516 }, { "epoch": 2.5201973481344435, "grad_norm": 0.07686194777488708, "learning_rate": 0.01, "loss": 2.0151, "step": 24519 }, { "epoch": 2.520505704594511, "grad_norm": 0.08837150782346725, "learning_rate": 0.01, "loss": 2.0414, "step": 24522 }, { "epoch": 2.520814061054579, "grad_norm": 0.0794796347618103, "learning_rate": 0.01, "loss": 2.027, "step": 24525 }, { "epoch": 2.521122417514647, "grad_norm": 0.05655858293175697, "learning_rate": 0.01, "loss": 2.0336, "step": 24528 }, { "epoch": 2.521430773974715, "grad_norm": 0.08295401185750961, "learning_rate": 0.01, "loss": 2.0007, "step": 24531 }, { "epoch": 2.5217391304347827, "grad_norm": 0.03982521593570709, "learning_rate": 0.01, "loss": 2.0288, "step": 24534 }, { "epoch": 2.5220474868948504, "grad_norm": 0.04791923984885216, "learning_rate": 0.01, "loss": 2.038, "step": 24537 }, { "epoch": 2.5223558433549185, "grad_norm": 0.11436691880226135, "learning_rate": 0.01, "loss": 2.046, "step": 24540 }, { "epoch": 2.522664199814986, "grad_norm": 0.1064198762178421, "learning_rate": 0.01, "loss": 2.0144, "step": 24543 }, { "epoch": 2.522972556275054, "grad_norm": 0.08036024123430252, "learning_rate": 0.01, "loss": 2.0152, "step": 24546 }, { "epoch": 2.523280912735122, "grad_norm": 0.061799556016922, "learning_rate": 0.01, "loss": 2.0165, "step": 24549 }, { "epoch": 2.5235892691951896, "grad_norm": 0.04592469707131386, "learning_rate": 0.01, "loss": 2.0061, "step": 24552 }, { "epoch": 2.523897625655257, "grad_norm": 0.036766473203897476, "learning_rate": 0.01, "loss": 1.9913, "step": 24555 }, { "epoch": 2.5242059821153253, "grad_norm": 0.09619138389825821, "learning_rate": 0.01, "loss": 2.0264, "step": 24558 }, { "epoch": 2.5245143385753934, "grad_norm": 0.03915918245911598, "learning_rate": 0.01, "loss": 2.0098, "step": 24561 }, { "epoch": 2.524822695035461, "grad_norm": 0.04883084446191788, "learning_rate": 0.01, "loss": 2.0298, "step": 24564 }, { "epoch": 2.5251310514955287, "grad_norm": 0.05630512908101082, "learning_rate": 0.01, "loss": 2.0117, "step": 24567 }, { "epoch": 2.525439407955597, "grad_norm": 0.04064425081014633, "learning_rate": 0.01, "loss": 2.0434, "step": 24570 }, { "epoch": 2.5257477644156645, "grad_norm": 0.05302917957305908, "learning_rate": 0.01, "loss": 2.044, "step": 24573 }, { "epoch": 2.526056120875732, "grad_norm": 0.07677201181650162, "learning_rate": 0.01, "loss": 2.0294, "step": 24576 }, { "epoch": 2.5263644773358003, "grad_norm": 0.07950242608785629, "learning_rate": 0.01, "loss": 2.0006, "step": 24579 }, { "epoch": 2.526672833795868, "grad_norm": 0.07068518549203873, "learning_rate": 0.01, "loss": 2.0278, "step": 24582 }, { "epoch": 2.5269811902559356, "grad_norm": 0.08623625338077545, "learning_rate": 0.01, "loss": 2.0369, "step": 24585 }, { "epoch": 2.5272895467160037, "grad_norm": 0.05549190193414688, "learning_rate": 0.01, "loss": 2.0382, "step": 24588 }, { "epoch": 2.527597903176072, "grad_norm": 0.05710297450423241, "learning_rate": 0.01, "loss": 2.0209, "step": 24591 }, { "epoch": 2.5279062596361395, "grad_norm": 0.05071646347641945, "learning_rate": 0.01, "loss": 2.0066, "step": 24594 }, { "epoch": 2.528214616096207, "grad_norm": 0.09765972197055817, "learning_rate": 0.01, "loss": 2.0468, "step": 24597 }, { "epoch": 2.5285229725562752, "grad_norm": 0.05874921754002571, "learning_rate": 0.01, "loss": 2.0306, "step": 24600 }, { "epoch": 2.528831329016343, "grad_norm": 0.10598991811275482, "learning_rate": 0.01, "loss": 2.0156, "step": 24603 }, { "epoch": 2.5291396854764105, "grad_norm": 0.07071609050035477, "learning_rate": 0.01, "loss": 2.0284, "step": 24606 }, { "epoch": 2.5294480419364787, "grad_norm": 0.07132923603057861, "learning_rate": 0.01, "loss": 2.0004, "step": 24609 }, { "epoch": 2.5297563983965463, "grad_norm": 0.06741276383399963, "learning_rate": 0.01, "loss": 2.0247, "step": 24612 }, { "epoch": 2.530064754856614, "grad_norm": 0.10399371385574341, "learning_rate": 0.01, "loss": 2.033, "step": 24615 }, { "epoch": 2.530373111316682, "grad_norm": 0.054513610899448395, "learning_rate": 0.01, "loss": 2.0364, "step": 24618 }, { "epoch": 2.53068146777675, "grad_norm": 0.03990021347999573, "learning_rate": 0.01, "loss": 2.019, "step": 24621 }, { "epoch": 2.530989824236818, "grad_norm": 0.0329439677298069, "learning_rate": 0.01, "loss": 2.0653, "step": 24624 }, { "epoch": 2.5312981806968855, "grad_norm": 0.08065532892942429, "learning_rate": 0.01, "loss": 2.0195, "step": 24627 }, { "epoch": 2.5316065371569536, "grad_norm": 0.04455409198999405, "learning_rate": 0.01, "loss": 2.0383, "step": 24630 }, { "epoch": 2.5319148936170213, "grad_norm": 0.09395566582679749, "learning_rate": 0.01, "loss": 2.026, "step": 24633 }, { "epoch": 2.532223250077089, "grad_norm": 0.04042106121778488, "learning_rate": 0.01, "loss": 2.0272, "step": 24636 }, { "epoch": 2.532531606537157, "grad_norm": 0.09208521991968155, "learning_rate": 0.01, "loss": 2.0265, "step": 24639 }, { "epoch": 2.5328399629972247, "grad_norm": 0.06603435426950455, "learning_rate": 0.01, "loss": 2.044, "step": 24642 }, { "epoch": 2.533148319457293, "grad_norm": 0.039963483810424805, "learning_rate": 0.01, "loss": 2.0491, "step": 24645 }, { "epoch": 2.5334566759173605, "grad_norm": 0.14821624755859375, "learning_rate": 0.01, "loss": 2.0013, "step": 24648 }, { "epoch": 2.5337650323774286, "grad_norm": 0.06644035130739212, "learning_rate": 0.01, "loss": 2.0433, "step": 24651 }, { "epoch": 2.534073388837496, "grad_norm": 0.0375928059220314, "learning_rate": 0.01, "loss": 2.0264, "step": 24654 }, { "epoch": 2.534381745297564, "grad_norm": 0.06041393801569939, "learning_rate": 0.01, "loss": 2.0198, "step": 24657 }, { "epoch": 2.534690101757632, "grad_norm": 0.06117352098226547, "learning_rate": 0.01, "loss": 2.0148, "step": 24660 }, { "epoch": 2.5349984582176996, "grad_norm": 0.05386986956000328, "learning_rate": 0.01, "loss": 2.0521, "step": 24663 }, { "epoch": 2.5353068146777673, "grad_norm": 0.03399750217795372, "learning_rate": 0.01, "loss": 2.0223, "step": 24666 }, { "epoch": 2.5356151711378354, "grad_norm": 0.06256785988807678, "learning_rate": 0.01, "loss": 2.0372, "step": 24669 }, { "epoch": 2.535923527597903, "grad_norm": 0.08575739711523056, "learning_rate": 0.01, "loss": 2.0214, "step": 24672 }, { "epoch": 2.536231884057971, "grad_norm": 0.0895959809422493, "learning_rate": 0.01, "loss": 2.0339, "step": 24675 }, { "epoch": 2.536540240518039, "grad_norm": 0.06579075753688812, "learning_rate": 0.01, "loss": 2.0363, "step": 24678 }, { "epoch": 2.536848596978107, "grad_norm": 0.04509506747126579, "learning_rate": 0.01, "loss": 2.0331, "step": 24681 }, { "epoch": 2.5371569534381746, "grad_norm": 0.03535350412130356, "learning_rate": 0.01, "loss": 2.0262, "step": 24684 }, { "epoch": 2.5374653098982423, "grad_norm": 0.03496406227350235, "learning_rate": 0.01, "loss": 2.0183, "step": 24687 }, { "epoch": 2.5377736663583104, "grad_norm": 0.04595872759819031, "learning_rate": 0.01, "loss": 2.0376, "step": 24690 }, { "epoch": 2.538082022818378, "grad_norm": 0.07009676098823547, "learning_rate": 0.01, "loss": 2.0236, "step": 24693 }, { "epoch": 2.5383903792784457, "grad_norm": 0.07328460365533829, "learning_rate": 0.01, "loss": 2.0163, "step": 24696 }, { "epoch": 2.538698735738514, "grad_norm": 0.09521552175283432, "learning_rate": 0.01, "loss": 2.0288, "step": 24699 }, { "epoch": 2.5390070921985815, "grad_norm": 0.09087500721216202, "learning_rate": 0.01, "loss": 2.0524, "step": 24702 }, { "epoch": 2.5393154486586496, "grad_norm": 0.05657880753278732, "learning_rate": 0.01, "loss": 2.0336, "step": 24705 }, { "epoch": 2.539623805118717, "grad_norm": 0.13524407148361206, "learning_rate": 0.01, "loss": 2.0245, "step": 24708 }, { "epoch": 2.5399321615787853, "grad_norm": 0.04498621076345444, "learning_rate": 0.01, "loss": 2.0101, "step": 24711 }, { "epoch": 2.540240518038853, "grad_norm": 0.04117140173912048, "learning_rate": 0.01, "loss": 2.0149, "step": 24714 }, { "epoch": 2.5405488744989206, "grad_norm": 0.03630746528506279, "learning_rate": 0.01, "loss": 2.0323, "step": 24717 }, { "epoch": 2.5408572309589887, "grad_norm": 0.03791969269514084, "learning_rate": 0.01, "loss": 2.0142, "step": 24720 }, { "epoch": 2.5411655874190564, "grad_norm": 0.045213595032691956, "learning_rate": 0.01, "loss": 2.0136, "step": 24723 }, { "epoch": 2.541473943879124, "grad_norm": 0.08232447504997253, "learning_rate": 0.01, "loss": 2.0101, "step": 24726 }, { "epoch": 2.541782300339192, "grad_norm": 0.0790674090385437, "learning_rate": 0.01, "loss": 2.0282, "step": 24729 }, { "epoch": 2.54209065679926, "grad_norm": 0.09643759578466415, "learning_rate": 0.01, "loss": 2.008, "step": 24732 }, { "epoch": 2.542399013259328, "grad_norm": 0.09790430217981339, "learning_rate": 0.01, "loss": 1.9987, "step": 24735 }, { "epoch": 2.5427073697193956, "grad_norm": 0.04904096946120262, "learning_rate": 0.01, "loss": 2.0366, "step": 24738 }, { "epoch": 2.5430157261794637, "grad_norm": 0.042802706360816956, "learning_rate": 0.01, "loss": 2.0548, "step": 24741 }, { "epoch": 2.5433240826395314, "grad_norm": 0.04947663098573685, "learning_rate": 0.01, "loss": 2.03, "step": 24744 }, { "epoch": 2.543632439099599, "grad_norm": 0.040841687470674515, "learning_rate": 0.01, "loss": 2.0246, "step": 24747 }, { "epoch": 2.543940795559667, "grad_norm": 0.051419809460639954, "learning_rate": 0.01, "loss": 2.0352, "step": 24750 }, { "epoch": 2.544249152019735, "grad_norm": 0.07173865288496017, "learning_rate": 0.01, "loss": 2.0396, "step": 24753 }, { "epoch": 2.5445575084798024, "grad_norm": 0.07664339989423752, "learning_rate": 0.01, "loss": 2.0209, "step": 24756 }, { "epoch": 2.5448658649398705, "grad_norm": 0.05180468037724495, "learning_rate": 0.01, "loss": 2.0375, "step": 24759 }, { "epoch": 2.545174221399938, "grad_norm": 0.03839515894651413, "learning_rate": 0.01, "loss": 2.0408, "step": 24762 }, { "epoch": 2.5454825778600063, "grad_norm": 0.08712394535541534, "learning_rate": 0.01, "loss": 2.0303, "step": 24765 }, { "epoch": 2.545790934320074, "grad_norm": 0.06906873732805252, "learning_rate": 0.01, "loss": 2.0103, "step": 24768 }, { "epoch": 2.546099290780142, "grad_norm": 0.04779994115233421, "learning_rate": 0.01, "loss": 2.0007, "step": 24771 }, { "epoch": 2.5464076472402097, "grad_norm": 0.03945513069629669, "learning_rate": 0.01, "loss": 2.0207, "step": 24774 }, { "epoch": 2.5467160037002774, "grad_norm": 0.04089882969856262, "learning_rate": 0.01, "loss": 2.0475, "step": 24777 }, { "epoch": 2.5470243601603455, "grad_norm": 0.04492718353867531, "learning_rate": 0.01, "loss": 2.0333, "step": 24780 }, { "epoch": 2.547332716620413, "grad_norm": 0.0761101022362709, "learning_rate": 0.01, "loss": 2.024, "step": 24783 }, { "epoch": 2.547641073080481, "grad_norm": 0.09586388617753983, "learning_rate": 0.01, "loss": 2.0455, "step": 24786 }, { "epoch": 2.547949429540549, "grad_norm": 0.0410308912396431, "learning_rate": 0.01, "loss": 2.0067, "step": 24789 }, { "epoch": 2.5482577860006166, "grad_norm": 0.0583110935986042, "learning_rate": 0.01, "loss": 2.0433, "step": 24792 }, { "epoch": 2.5485661424606847, "grad_norm": 0.03310194984078407, "learning_rate": 0.01, "loss": 2.0022, "step": 24795 }, { "epoch": 2.5488744989207524, "grad_norm": 0.0849560797214508, "learning_rate": 0.01, "loss": 2.0458, "step": 24798 }, { "epoch": 2.5491828553808205, "grad_norm": 0.052898190915584564, "learning_rate": 0.01, "loss": 2.0099, "step": 24801 }, { "epoch": 2.549491211840888, "grad_norm": 0.09630381315946579, "learning_rate": 0.01, "loss": 2.0312, "step": 24804 }, { "epoch": 2.5497995683009558, "grad_norm": 0.04892333596944809, "learning_rate": 0.01, "loss": 2.0151, "step": 24807 }, { "epoch": 2.550107924761024, "grad_norm": 0.09465577453374863, "learning_rate": 0.01, "loss": 2.0163, "step": 24810 }, { "epoch": 2.5504162812210915, "grad_norm": 0.0832308977842331, "learning_rate": 0.01, "loss": 2.0446, "step": 24813 }, { "epoch": 2.550724637681159, "grad_norm": 0.11276236176490784, "learning_rate": 0.01, "loss": 2.0243, "step": 24816 }, { "epoch": 2.5510329941412273, "grad_norm": 0.08327414095401764, "learning_rate": 0.01, "loss": 2.0607, "step": 24819 }, { "epoch": 2.551341350601295, "grad_norm": 0.05502014979720116, "learning_rate": 0.01, "loss": 2.0281, "step": 24822 }, { "epoch": 2.551649707061363, "grad_norm": 0.03681863471865654, "learning_rate": 0.01, "loss": 2.0101, "step": 24825 }, { "epoch": 2.5519580635214307, "grad_norm": 0.08096860349178314, "learning_rate": 0.01, "loss": 1.9904, "step": 24828 }, { "epoch": 2.552266419981499, "grad_norm": 0.05901675671339035, "learning_rate": 0.01, "loss": 2.046, "step": 24831 }, { "epoch": 2.5525747764415665, "grad_norm": 0.09850065410137177, "learning_rate": 0.01, "loss": 2.0495, "step": 24834 }, { "epoch": 2.552883132901634, "grad_norm": 0.08438712358474731, "learning_rate": 0.01, "loss": 2.0185, "step": 24837 }, { "epoch": 2.5531914893617023, "grad_norm": 0.04949135333299637, "learning_rate": 0.01, "loss": 2.0197, "step": 24840 }, { "epoch": 2.55349984582177, "grad_norm": 0.044099997729063034, "learning_rate": 0.01, "loss": 2.0275, "step": 24843 }, { "epoch": 2.5538082022818376, "grad_norm": 0.08626649528741837, "learning_rate": 0.01, "loss": 2.0246, "step": 24846 }, { "epoch": 2.5541165587419057, "grad_norm": 0.08545881509780884, "learning_rate": 0.01, "loss": 2.0221, "step": 24849 }, { "epoch": 2.5544249152019733, "grad_norm": 0.06181343272328377, "learning_rate": 0.01, "loss": 2.0379, "step": 24852 }, { "epoch": 2.5547332716620414, "grad_norm": 0.0839785784482956, "learning_rate": 0.01, "loss": 2.0309, "step": 24855 }, { "epoch": 2.555041628122109, "grad_norm": 0.055504992604255676, "learning_rate": 0.01, "loss": 2.0333, "step": 24858 }, { "epoch": 2.555349984582177, "grad_norm": 0.04236135631799698, "learning_rate": 0.01, "loss": 1.9981, "step": 24861 }, { "epoch": 2.555658341042245, "grad_norm": 0.035614918917417526, "learning_rate": 0.01, "loss": 2.006, "step": 24864 }, { "epoch": 2.5559666975023125, "grad_norm": 0.04150492325425148, "learning_rate": 0.01, "loss": 2.0414, "step": 24867 }, { "epoch": 2.5562750539623806, "grad_norm": 0.07994359731674194, "learning_rate": 0.01, "loss": 2.0302, "step": 24870 }, { "epoch": 2.5565834104224483, "grad_norm": 0.08954035490751266, "learning_rate": 0.01, "loss": 2.0208, "step": 24873 }, { "epoch": 2.556891766882516, "grad_norm": 0.1362268626689911, "learning_rate": 0.01, "loss": 2.0211, "step": 24876 }, { "epoch": 2.557200123342584, "grad_norm": 0.11425944417715073, "learning_rate": 0.01, "loss": 2.0126, "step": 24879 }, { "epoch": 2.5575084798026517, "grad_norm": 0.07083035260438919, "learning_rate": 0.01, "loss": 2.0312, "step": 24882 }, { "epoch": 2.55781683626272, "grad_norm": 0.06250528246164322, "learning_rate": 0.01, "loss": 2.0317, "step": 24885 }, { "epoch": 2.5581251927227875, "grad_norm": 0.047506481409072876, "learning_rate": 0.01, "loss": 2.0134, "step": 24888 }, { "epoch": 2.5584335491828556, "grad_norm": 0.04237549751996994, "learning_rate": 0.01, "loss": 2.0227, "step": 24891 }, { "epoch": 2.5587419056429233, "grad_norm": 0.04128411412239075, "learning_rate": 0.01, "loss": 2.0122, "step": 24894 }, { "epoch": 2.559050262102991, "grad_norm": 0.03886473551392555, "learning_rate": 0.01, "loss": 2.0385, "step": 24897 }, { "epoch": 2.559358618563059, "grad_norm": 0.1163051575422287, "learning_rate": 0.01, "loss": 2.0456, "step": 24900 }, { "epoch": 2.5596669750231267, "grad_norm": 0.04279797896742821, "learning_rate": 0.01, "loss": 2.0122, "step": 24903 }, { "epoch": 2.5599753314831943, "grad_norm": 0.08159471303224564, "learning_rate": 0.01, "loss": 2.0218, "step": 24906 }, { "epoch": 2.5602836879432624, "grad_norm": 0.06161525472998619, "learning_rate": 0.01, "loss": 2.009, "step": 24909 }, { "epoch": 2.56059204440333, "grad_norm": 0.05011424049735069, "learning_rate": 0.01, "loss": 2.0001, "step": 24912 }, { "epoch": 2.560900400863398, "grad_norm": 0.05973159521818161, "learning_rate": 0.01, "loss": 2.0289, "step": 24915 }, { "epoch": 2.561208757323466, "grad_norm": 0.07461394369602203, "learning_rate": 0.01, "loss": 2.0357, "step": 24918 }, { "epoch": 2.561517113783534, "grad_norm": 0.09631699323654175, "learning_rate": 0.01, "loss": 2.0234, "step": 24921 }, { "epoch": 2.5618254702436016, "grad_norm": 0.05727219581604004, "learning_rate": 0.01, "loss": 2.0404, "step": 24924 }, { "epoch": 2.5621338267036693, "grad_norm": 0.08594338595867157, "learning_rate": 0.01, "loss": 2.0166, "step": 24927 }, { "epoch": 2.5624421831637374, "grad_norm": 0.1109083890914917, "learning_rate": 0.01, "loss": 2.0185, "step": 24930 }, { "epoch": 2.562750539623805, "grad_norm": 0.0593339204788208, "learning_rate": 0.01, "loss": 2.0064, "step": 24933 }, { "epoch": 2.5630588960838727, "grad_norm": 0.0381302647292614, "learning_rate": 0.01, "loss": 2.0236, "step": 24936 }, { "epoch": 2.563367252543941, "grad_norm": 0.056093595921993256, "learning_rate": 0.01, "loss": 2.0269, "step": 24939 }, { "epoch": 2.5636756090040085, "grad_norm": 0.11212731897830963, "learning_rate": 0.01, "loss": 2.0488, "step": 24942 }, { "epoch": 2.5639839654640766, "grad_norm": 0.07110024988651276, "learning_rate": 0.01, "loss": 2.013, "step": 24945 }, { "epoch": 2.5642923219241442, "grad_norm": 0.05951390787959099, "learning_rate": 0.01, "loss": 2.0412, "step": 24948 }, { "epoch": 2.5646006783842124, "grad_norm": 0.07836031913757324, "learning_rate": 0.01, "loss": 2.0143, "step": 24951 }, { "epoch": 2.56490903484428, "grad_norm": 0.06882999837398529, "learning_rate": 0.01, "loss": 2.0065, "step": 24954 }, { "epoch": 2.5652173913043477, "grad_norm": 0.0868605375289917, "learning_rate": 0.01, "loss": 2.0111, "step": 24957 }, { "epoch": 2.5655257477644158, "grad_norm": 0.10812171548604965, "learning_rate": 0.01, "loss": 2.0462, "step": 24960 }, { "epoch": 2.5658341042244834, "grad_norm": 0.07124783843755722, "learning_rate": 0.01, "loss": 2.013, "step": 24963 }, { "epoch": 2.566142460684551, "grad_norm": 0.037611838430166245, "learning_rate": 0.01, "loss": 2.0277, "step": 24966 }, { "epoch": 2.566450817144619, "grad_norm": 0.03723758086562157, "learning_rate": 0.01, "loss": 2.0187, "step": 24969 }, { "epoch": 2.566759173604687, "grad_norm": 0.08805309981107712, "learning_rate": 0.01, "loss": 2.01, "step": 24972 }, { "epoch": 2.567067530064755, "grad_norm": 0.10381683707237244, "learning_rate": 0.01, "loss": 2.0253, "step": 24975 }, { "epoch": 2.5673758865248226, "grad_norm": 0.11186369508504868, "learning_rate": 0.01, "loss": 2.0271, "step": 24978 }, { "epoch": 2.5676842429848907, "grad_norm": 0.07986850291490555, "learning_rate": 0.01, "loss": 2.0252, "step": 24981 }, { "epoch": 2.5679925994449584, "grad_norm": 0.04094192385673523, "learning_rate": 0.01, "loss": 1.9982, "step": 24984 }, { "epoch": 2.568300955905026, "grad_norm": 0.049611032009124756, "learning_rate": 0.01, "loss": 2.0283, "step": 24987 }, { "epoch": 2.568609312365094, "grad_norm": 0.05613689869642258, "learning_rate": 0.01, "loss": 2.0304, "step": 24990 }, { "epoch": 2.568917668825162, "grad_norm": 0.051894500851631165, "learning_rate": 0.01, "loss": 2.0069, "step": 24993 }, { "epoch": 2.5692260252852295, "grad_norm": 0.04092536121606827, "learning_rate": 0.01, "loss": 2.0283, "step": 24996 }, { "epoch": 2.5695343817452976, "grad_norm": 0.03474249318242073, "learning_rate": 0.01, "loss": 2.0083, "step": 24999 }, { "epoch": 2.5698427382053652, "grad_norm": 0.04513520747423172, "learning_rate": 0.01, "loss": 2.0288, "step": 25002 }, { "epoch": 2.5701510946654333, "grad_norm": 0.06130135431885719, "learning_rate": 0.01, "loss": 2.0245, "step": 25005 }, { "epoch": 2.570459451125501, "grad_norm": 0.07398026436567307, "learning_rate": 0.01, "loss": 2.0199, "step": 25008 }, { "epoch": 2.570767807585569, "grad_norm": 0.06060103699564934, "learning_rate": 0.01, "loss": 1.9956, "step": 25011 }, { "epoch": 2.5710761640456368, "grad_norm": 0.051868923008441925, "learning_rate": 0.01, "loss": 2.0355, "step": 25014 }, { "epoch": 2.5713845205057044, "grad_norm": 0.09465671330690384, "learning_rate": 0.01, "loss": 2.014, "step": 25017 }, { "epoch": 2.5716928769657725, "grad_norm": 0.048888836055994034, "learning_rate": 0.01, "loss": 2.0163, "step": 25020 }, { "epoch": 2.57200123342584, "grad_norm": 0.04938677325844765, "learning_rate": 0.01, "loss": 2.0165, "step": 25023 }, { "epoch": 2.572309589885908, "grad_norm": 0.1066848635673523, "learning_rate": 0.01, "loss": 2.0354, "step": 25026 }, { "epoch": 2.572617946345976, "grad_norm": 0.044199470430612564, "learning_rate": 0.01, "loss": 2.0358, "step": 25029 }, { "epoch": 2.572926302806044, "grad_norm": 0.06313291937112808, "learning_rate": 0.01, "loss": 2.0166, "step": 25032 }, { "epoch": 2.5732346592661117, "grad_norm": 0.08843620121479034, "learning_rate": 0.01, "loss": 2.0125, "step": 25035 }, { "epoch": 2.5735430157261794, "grad_norm": 0.028659775853157043, "learning_rate": 0.01, "loss": 2.0324, "step": 25038 }, { "epoch": 2.5738513721862475, "grad_norm": 0.09034299850463867, "learning_rate": 0.01, "loss": 2.0311, "step": 25041 }, { "epoch": 2.574159728646315, "grad_norm": 0.08496701717376709, "learning_rate": 0.01, "loss": 2.0457, "step": 25044 }, { "epoch": 2.574468085106383, "grad_norm": 0.04633186012506485, "learning_rate": 0.01, "loss": 2.0204, "step": 25047 }, { "epoch": 2.574776441566451, "grad_norm": 0.05091328173875809, "learning_rate": 0.01, "loss": 2.0622, "step": 25050 }, { "epoch": 2.5750847980265186, "grad_norm": 0.03941154107451439, "learning_rate": 0.01, "loss": 2.0097, "step": 25053 }, { "epoch": 2.5753931544865862, "grad_norm": 0.07574623823165894, "learning_rate": 0.01, "loss": 2.0119, "step": 25056 }, { "epoch": 2.5757015109466543, "grad_norm": 0.07106275856494904, "learning_rate": 0.01, "loss": 2.0164, "step": 25059 }, { "epoch": 2.5760098674067224, "grad_norm": 0.06767601519823074, "learning_rate": 0.01, "loss": 2.0243, "step": 25062 }, { "epoch": 2.57631822386679, "grad_norm": 0.05537039414048195, "learning_rate": 0.01, "loss": 2.0375, "step": 25065 }, { "epoch": 2.5766265803268578, "grad_norm": 0.06547438353300095, "learning_rate": 0.01, "loss": 2.0014, "step": 25068 }, { "epoch": 2.576934936786926, "grad_norm": 0.0862760990858078, "learning_rate": 0.01, "loss": 2.004, "step": 25071 }, { "epoch": 2.5772432932469935, "grad_norm": 0.041683733463287354, "learning_rate": 0.01, "loss": 2.0484, "step": 25074 }, { "epoch": 2.577551649707061, "grad_norm": 0.049321915954351425, "learning_rate": 0.01, "loss": 2.0158, "step": 25077 }, { "epoch": 2.5778600061671293, "grad_norm": 0.09261754900217056, "learning_rate": 0.01, "loss": 2.0162, "step": 25080 }, { "epoch": 2.578168362627197, "grad_norm": 0.07979609072208405, "learning_rate": 0.01, "loss": 2.0425, "step": 25083 }, { "epoch": 2.5784767190872646, "grad_norm": 0.06629879772663116, "learning_rate": 0.01, "loss": 2.0339, "step": 25086 }, { "epoch": 2.5787850755473327, "grad_norm": 0.07896976172924042, "learning_rate": 0.01, "loss": 2.0144, "step": 25089 }, { "epoch": 2.579093432007401, "grad_norm": 0.06102503091096878, "learning_rate": 0.01, "loss": 2.0012, "step": 25092 }, { "epoch": 2.5794017884674685, "grad_norm": 0.07823985069990158, "learning_rate": 0.01, "loss": 2.0207, "step": 25095 }, { "epoch": 2.579710144927536, "grad_norm": 0.08163253217935562, "learning_rate": 0.01, "loss": 2.0583, "step": 25098 }, { "epoch": 2.5800185013876042, "grad_norm": 0.06111651286482811, "learning_rate": 0.01, "loss": 2.0434, "step": 25101 }, { "epoch": 2.580326857847672, "grad_norm": 0.03768099471926689, "learning_rate": 0.01, "loss": 2.0185, "step": 25104 }, { "epoch": 2.5806352143077396, "grad_norm": 0.0871853157877922, "learning_rate": 0.01, "loss": 2.0293, "step": 25107 }, { "epoch": 2.5809435707678077, "grad_norm": 0.05394020304083824, "learning_rate": 0.01, "loss": 2.0249, "step": 25110 }, { "epoch": 2.5812519272278753, "grad_norm": 0.07910983264446259, "learning_rate": 0.01, "loss": 2.042, "step": 25113 }, { "epoch": 2.581560283687943, "grad_norm": 0.06922271102666855, "learning_rate": 0.01, "loss": 2.0493, "step": 25116 }, { "epoch": 2.581868640148011, "grad_norm": 0.05517781525850296, "learning_rate": 0.01, "loss": 2.0161, "step": 25119 }, { "epoch": 2.582176996608079, "grad_norm": 0.05166595056653023, "learning_rate": 0.01, "loss": 2.0402, "step": 25122 }, { "epoch": 2.582485353068147, "grad_norm": 0.045153357088565826, "learning_rate": 0.01, "loss": 2.0366, "step": 25125 }, { "epoch": 2.5827937095282145, "grad_norm": 0.07232387363910675, "learning_rate": 0.01, "loss": 2.0345, "step": 25128 }, { "epoch": 2.5831020659882826, "grad_norm": 0.035037536174058914, "learning_rate": 0.01, "loss": 2.0195, "step": 25131 }, { "epoch": 2.5834104224483503, "grad_norm": 0.039313822984695435, "learning_rate": 0.01, "loss": 2.0196, "step": 25134 }, { "epoch": 2.583718778908418, "grad_norm": 0.0632469579577446, "learning_rate": 0.01, "loss": 2.0454, "step": 25137 }, { "epoch": 2.584027135368486, "grad_norm": 0.10993051528930664, "learning_rate": 0.01, "loss": 2.0289, "step": 25140 }, { "epoch": 2.5843354918285537, "grad_norm": 0.0852990597486496, "learning_rate": 0.01, "loss": 2.053, "step": 25143 }, { "epoch": 2.5846438482886214, "grad_norm": 0.0442403107881546, "learning_rate": 0.01, "loss": 2.0089, "step": 25146 }, { "epoch": 2.5849522047486895, "grad_norm": 0.03534874692559242, "learning_rate": 0.01, "loss": 2.0297, "step": 25149 }, { "epoch": 2.5852605612087576, "grad_norm": 0.031708016991615295, "learning_rate": 0.01, "loss": 2.0064, "step": 25152 }, { "epoch": 2.5855689176688252, "grad_norm": 0.056695304811000824, "learning_rate": 0.01, "loss": 2.0265, "step": 25155 }, { "epoch": 2.585877274128893, "grad_norm": 0.12697716057300568, "learning_rate": 0.01, "loss": 2.0415, "step": 25158 }, { "epoch": 2.586185630588961, "grad_norm": 0.07686912268400192, "learning_rate": 0.01, "loss": 2.0098, "step": 25161 }, { "epoch": 2.5864939870490287, "grad_norm": 0.10015466809272766, "learning_rate": 0.01, "loss": 2.0229, "step": 25164 }, { "epoch": 2.5868023435090963, "grad_norm": 0.05786514654755592, "learning_rate": 0.01, "loss": 2.0025, "step": 25167 }, { "epoch": 2.5871106999691644, "grad_norm": 0.05359407886862755, "learning_rate": 0.01, "loss": 2.0075, "step": 25170 }, { "epoch": 2.587419056429232, "grad_norm": 0.10763208568096161, "learning_rate": 0.01, "loss": 2.0611, "step": 25173 }, { "epoch": 2.5877274128893, "grad_norm": 0.06255360692739487, "learning_rate": 0.01, "loss": 2.0173, "step": 25176 }, { "epoch": 2.588035769349368, "grad_norm": 0.0519418902695179, "learning_rate": 0.01, "loss": 2.0095, "step": 25179 }, { "epoch": 2.588344125809436, "grad_norm": 0.09810636937618256, "learning_rate": 0.01, "loss": 2.0012, "step": 25182 }, { "epoch": 2.5886524822695036, "grad_norm": 0.05091201886534691, "learning_rate": 0.01, "loss": 2.0221, "step": 25185 }, { "epoch": 2.5889608387295713, "grad_norm": 0.046215660870075226, "learning_rate": 0.01, "loss": 2.0235, "step": 25188 }, { "epoch": 2.5892691951896394, "grad_norm": 0.06873856484889984, "learning_rate": 0.01, "loss": 2.0234, "step": 25191 }, { "epoch": 2.589577551649707, "grad_norm": 0.08075796812772751, "learning_rate": 0.01, "loss": 2.0399, "step": 25194 }, { "epoch": 2.5898859081097747, "grad_norm": 0.10317845642566681, "learning_rate": 0.01, "loss": 2.0087, "step": 25197 }, { "epoch": 2.590194264569843, "grad_norm": 0.07780349254608154, "learning_rate": 0.01, "loss": 2.0052, "step": 25200 }, { "epoch": 2.5905026210299105, "grad_norm": 0.0646161437034607, "learning_rate": 0.01, "loss": 2.0077, "step": 25203 }, { "epoch": 2.5908109774899786, "grad_norm": 0.10224328190088272, "learning_rate": 0.01, "loss": 2.0091, "step": 25206 }, { "epoch": 2.5911193339500462, "grad_norm": 0.0714394599199295, "learning_rate": 0.01, "loss": 2.0246, "step": 25209 }, { "epoch": 2.5914276904101143, "grad_norm": 0.06261731684207916, "learning_rate": 0.01, "loss": 1.9908, "step": 25212 }, { "epoch": 2.591736046870182, "grad_norm": 0.07271763682365417, "learning_rate": 0.01, "loss": 2.0489, "step": 25215 }, { "epoch": 2.5920444033302497, "grad_norm": 0.07044383883476257, "learning_rate": 0.01, "loss": 2.0461, "step": 25218 }, { "epoch": 2.5923527597903178, "grad_norm": 0.10808674246072769, "learning_rate": 0.01, "loss": 2.0229, "step": 25221 }, { "epoch": 2.5926611162503854, "grad_norm": 0.049697790294885635, "learning_rate": 0.01, "loss": 2.0139, "step": 25224 }, { "epoch": 2.592969472710453, "grad_norm": 0.08951854705810547, "learning_rate": 0.01, "loss": 2.0228, "step": 25227 }, { "epoch": 2.593277829170521, "grad_norm": 0.06834417581558228, "learning_rate": 0.01, "loss": 2.0362, "step": 25230 }, { "epoch": 2.593586185630589, "grad_norm": 0.037199974060058594, "learning_rate": 0.01, "loss": 1.9987, "step": 25233 }, { "epoch": 2.593894542090657, "grad_norm": 0.056284189224243164, "learning_rate": 0.01, "loss": 2.0313, "step": 25236 }, { "epoch": 2.5942028985507246, "grad_norm": 0.07686278969049454, "learning_rate": 0.01, "loss": 2.0376, "step": 25239 }, { "epoch": 2.5945112550107927, "grad_norm": 0.043646443635225296, "learning_rate": 0.01, "loss": 2.0145, "step": 25242 }, { "epoch": 2.5948196114708604, "grad_norm": 0.0594371035695076, "learning_rate": 0.01, "loss": 2.0147, "step": 25245 }, { "epoch": 2.595127967930928, "grad_norm": 0.08617819100618362, "learning_rate": 0.01, "loss": 2.0307, "step": 25248 }, { "epoch": 2.595436324390996, "grad_norm": 0.13672196865081787, "learning_rate": 0.01, "loss": 2.0515, "step": 25251 }, { "epoch": 2.595744680851064, "grad_norm": 0.062405068427324295, "learning_rate": 0.01, "loss": 2.0205, "step": 25254 }, { "epoch": 2.5960530373111315, "grad_norm": 0.042263686656951904, "learning_rate": 0.01, "loss": 2.0336, "step": 25257 }, { "epoch": 2.5963613937711996, "grad_norm": 0.04821668192744255, "learning_rate": 0.01, "loss": 2.0231, "step": 25260 }, { "epoch": 2.5966697502312672, "grad_norm": 0.048817023634910583, "learning_rate": 0.01, "loss": 2.0178, "step": 25263 }, { "epoch": 2.5969781066913353, "grad_norm": 0.05794850364327431, "learning_rate": 0.01, "loss": 2.023, "step": 25266 }, { "epoch": 2.597286463151403, "grad_norm": 0.05057196319103241, "learning_rate": 0.01, "loss": 2.0075, "step": 25269 }, { "epoch": 2.597594819611471, "grad_norm": 0.05443112552165985, "learning_rate": 0.01, "loss": 2.0381, "step": 25272 }, { "epoch": 2.5979031760715388, "grad_norm": 0.0533830001950264, "learning_rate": 0.01, "loss": 2.0201, "step": 25275 }, { "epoch": 2.5982115325316064, "grad_norm": 0.040888138115406036, "learning_rate": 0.01, "loss": 1.9956, "step": 25278 }, { "epoch": 2.5985198889916745, "grad_norm": 0.07154986262321472, "learning_rate": 0.01, "loss": 2.0285, "step": 25281 }, { "epoch": 2.598828245451742, "grad_norm": 0.17314322292804718, "learning_rate": 0.01, "loss": 2.0288, "step": 25284 }, { "epoch": 2.59913660191181, "grad_norm": 0.051602523773908615, "learning_rate": 0.01, "loss": 2.0351, "step": 25287 }, { "epoch": 2.599444958371878, "grad_norm": 0.047731827944517136, "learning_rate": 0.01, "loss": 2.0435, "step": 25290 }, { "epoch": 2.5997533148319456, "grad_norm": 0.0334257036447525, "learning_rate": 0.01, "loss": 2.0074, "step": 25293 }, { "epoch": 2.6000616712920137, "grad_norm": 0.03708617389202118, "learning_rate": 0.01, "loss": 1.9933, "step": 25296 }, { "epoch": 2.6003700277520814, "grad_norm": 0.08540193736553192, "learning_rate": 0.01, "loss": 2.0085, "step": 25299 }, { "epoch": 2.6006783842121495, "grad_norm": 0.1036924496293068, "learning_rate": 0.01, "loss": 2.0238, "step": 25302 }, { "epoch": 2.600986740672217, "grad_norm": 0.056603506207466125, "learning_rate": 0.01, "loss": 2.0161, "step": 25305 }, { "epoch": 2.601295097132285, "grad_norm": 0.1030723974108696, "learning_rate": 0.01, "loss": 2.0414, "step": 25308 }, { "epoch": 2.601603453592353, "grad_norm": 0.060525115579366684, "learning_rate": 0.01, "loss": 2.0083, "step": 25311 }, { "epoch": 2.6019118100524206, "grad_norm": 0.061082128435373306, "learning_rate": 0.01, "loss": 1.987, "step": 25314 }, { "epoch": 2.602220166512488, "grad_norm": 0.045477550476789474, "learning_rate": 0.01, "loss": 2.0251, "step": 25317 }, { "epoch": 2.6025285229725563, "grad_norm": 0.03306104615330696, "learning_rate": 0.01, "loss": 2.0359, "step": 25320 }, { "epoch": 2.602836879432624, "grad_norm": 0.052543554455041885, "learning_rate": 0.01, "loss": 2.0454, "step": 25323 }, { "epoch": 2.603145235892692, "grad_norm": 0.04408182203769684, "learning_rate": 0.01, "loss": 2.0375, "step": 25326 }, { "epoch": 2.6034535923527597, "grad_norm": 0.05216488614678383, "learning_rate": 0.01, "loss": 2.0331, "step": 25329 }, { "epoch": 2.603761948812828, "grad_norm": 0.12084914743900299, "learning_rate": 0.01, "loss": 2.0052, "step": 25332 }, { "epoch": 2.6040703052728955, "grad_norm": 0.09642963856458664, "learning_rate": 0.01, "loss": 2.0161, "step": 25335 }, { "epoch": 2.604378661732963, "grad_norm": 0.06409110128879547, "learning_rate": 0.01, "loss": 2.0052, "step": 25338 }, { "epoch": 2.6046870181930313, "grad_norm": 0.07277770340442657, "learning_rate": 0.01, "loss": 2.0241, "step": 25341 }, { "epoch": 2.604995374653099, "grad_norm": 0.049252595752477646, "learning_rate": 0.01, "loss": 2.0172, "step": 25344 }, { "epoch": 2.6053037311131666, "grad_norm": 0.0495469830930233, "learning_rate": 0.01, "loss": 2.0195, "step": 25347 }, { "epoch": 2.6056120875732347, "grad_norm": 0.06318475306034088, "learning_rate": 0.01, "loss": 1.9937, "step": 25350 }, { "epoch": 2.6059204440333024, "grad_norm": 0.07843147218227386, "learning_rate": 0.01, "loss": 2.0333, "step": 25353 }, { "epoch": 2.6062288004933705, "grad_norm": 0.055239688605070114, "learning_rate": 0.01, "loss": 2.0308, "step": 25356 }, { "epoch": 2.606537156953438, "grad_norm": 0.03876148536801338, "learning_rate": 0.01, "loss": 2.0227, "step": 25359 }, { "epoch": 2.6068455134135062, "grad_norm": 0.12309783697128296, "learning_rate": 0.01, "loss": 2.0233, "step": 25362 }, { "epoch": 2.607153869873574, "grad_norm": 0.09926038980484009, "learning_rate": 0.01, "loss": 2.0305, "step": 25365 }, { "epoch": 2.6074622263336416, "grad_norm": 0.07237336784601212, "learning_rate": 0.01, "loss": 1.9929, "step": 25368 }, { "epoch": 2.6077705827937097, "grad_norm": 0.09653117507696152, "learning_rate": 0.01, "loss": 2.0547, "step": 25371 }, { "epoch": 2.6080789392537773, "grad_norm": 0.0454515665769577, "learning_rate": 0.01, "loss": 2.0324, "step": 25374 }, { "epoch": 2.608387295713845, "grad_norm": 0.04541772976517677, "learning_rate": 0.01, "loss": 2.0301, "step": 25377 }, { "epoch": 2.608695652173913, "grad_norm": 0.05721856653690338, "learning_rate": 0.01, "loss": 2.011, "step": 25380 }, { "epoch": 2.6090040086339807, "grad_norm": 0.0526764839887619, "learning_rate": 0.01, "loss": 2.0176, "step": 25383 }, { "epoch": 2.609312365094049, "grad_norm": 0.08415975421667099, "learning_rate": 0.01, "loss": 1.9948, "step": 25386 }, { "epoch": 2.6096207215541165, "grad_norm": 0.07950541377067566, "learning_rate": 0.01, "loss": 2.0285, "step": 25389 }, { "epoch": 2.6099290780141846, "grad_norm": 0.07684530317783356, "learning_rate": 0.01, "loss": 2.0283, "step": 25392 }, { "epoch": 2.6102374344742523, "grad_norm": 0.0458965003490448, "learning_rate": 0.01, "loss": 2.0335, "step": 25395 }, { "epoch": 2.61054579093432, "grad_norm": 0.11776190251111984, "learning_rate": 0.01, "loss": 2.0117, "step": 25398 }, { "epoch": 2.610854147394388, "grad_norm": 0.03954809904098511, "learning_rate": 0.01, "loss": 2.0352, "step": 25401 }, { "epoch": 2.6111625038544557, "grad_norm": 0.08056820929050446, "learning_rate": 0.01, "loss": 2.0338, "step": 25404 }, { "epoch": 2.6114708603145234, "grad_norm": 0.03288201987743378, "learning_rate": 0.01, "loss": 2.0067, "step": 25407 }, { "epoch": 2.6117792167745915, "grad_norm": 0.06156465783715248, "learning_rate": 0.01, "loss": 2.0455, "step": 25410 }, { "epoch": 2.612087573234659, "grad_norm": 0.04141581431031227, "learning_rate": 0.01, "loss": 1.9822, "step": 25413 }, { "epoch": 2.6123959296947272, "grad_norm": 0.06731928139925003, "learning_rate": 0.01, "loss": 2.028, "step": 25416 }, { "epoch": 2.612704286154795, "grad_norm": 0.07682816684246063, "learning_rate": 0.01, "loss": 2.0155, "step": 25419 }, { "epoch": 2.613012642614863, "grad_norm": 0.10766996443271637, "learning_rate": 0.01, "loss": 1.9897, "step": 25422 }, { "epoch": 2.6133209990749307, "grad_norm": 0.11409672349691391, "learning_rate": 0.01, "loss": 1.9823, "step": 25425 }, { "epoch": 2.6136293555349983, "grad_norm": 0.07693130522966385, "learning_rate": 0.01, "loss": 2.02, "step": 25428 }, { "epoch": 2.6139377119950664, "grad_norm": 0.034606434404850006, "learning_rate": 0.01, "loss": 2.043, "step": 25431 }, { "epoch": 2.614246068455134, "grad_norm": 0.0957694724202156, "learning_rate": 0.01, "loss": 2.0527, "step": 25434 }, { "epoch": 2.6145544249152017, "grad_norm": 0.05739649757742882, "learning_rate": 0.01, "loss": 2.0209, "step": 25437 }, { "epoch": 2.61486278137527, "grad_norm": 0.05702357366681099, "learning_rate": 0.01, "loss": 2.0321, "step": 25440 }, { "epoch": 2.6151711378353375, "grad_norm": 0.10596863180398941, "learning_rate": 0.01, "loss": 2.023, "step": 25443 }, { "epoch": 2.6154794942954056, "grad_norm": 0.07135487347841263, "learning_rate": 0.01, "loss": 2.0192, "step": 25446 }, { "epoch": 2.6157878507554733, "grad_norm": 0.04034152254462242, "learning_rate": 0.01, "loss": 2.034, "step": 25449 }, { "epoch": 2.6160962072155414, "grad_norm": 0.05510259047150612, "learning_rate": 0.01, "loss": 2.0201, "step": 25452 }, { "epoch": 2.616404563675609, "grad_norm": 0.03920494019985199, "learning_rate": 0.01, "loss": 1.9831, "step": 25455 }, { "epoch": 2.6167129201356767, "grad_norm": 0.06935703754425049, "learning_rate": 0.01, "loss": 2.0242, "step": 25458 }, { "epoch": 2.617021276595745, "grad_norm": 0.04524112120270729, "learning_rate": 0.01, "loss": 2.0415, "step": 25461 }, { "epoch": 2.6173296330558125, "grad_norm": 0.03639009967446327, "learning_rate": 0.01, "loss": 2.0086, "step": 25464 }, { "epoch": 2.61763798951588, "grad_norm": 0.0551072359085083, "learning_rate": 0.01, "loss": 2.0279, "step": 25467 }, { "epoch": 2.617946345975948, "grad_norm": 0.03943365439772606, "learning_rate": 0.01, "loss": 2.0229, "step": 25470 }, { "epoch": 2.618254702436016, "grad_norm": 0.05363466218113899, "learning_rate": 0.01, "loss": 2.0285, "step": 25473 }, { "epoch": 2.618563058896084, "grad_norm": 0.10065023601055145, "learning_rate": 0.01, "loss": 2.0307, "step": 25476 }, { "epoch": 2.6188714153561516, "grad_norm": 0.06447052210569382, "learning_rate": 0.01, "loss": 2.0375, "step": 25479 }, { "epoch": 2.6191797718162197, "grad_norm": 0.03966406360268593, "learning_rate": 0.01, "loss": 1.9923, "step": 25482 }, { "epoch": 2.6194881282762874, "grad_norm": 0.05280005559325218, "learning_rate": 0.01, "loss": 2.0184, "step": 25485 }, { "epoch": 2.619796484736355, "grad_norm": 0.1111968457698822, "learning_rate": 0.01, "loss": 1.9933, "step": 25488 }, { "epoch": 2.620104841196423, "grad_norm": 0.11483361572027206, "learning_rate": 0.01, "loss": 2.0007, "step": 25491 }, { "epoch": 2.620413197656491, "grad_norm": 0.039019446820020676, "learning_rate": 0.01, "loss": 2.0435, "step": 25494 }, { "epoch": 2.6207215541165585, "grad_norm": 0.05683600530028343, "learning_rate": 0.01, "loss": 2.0323, "step": 25497 }, { "epoch": 2.6210299105766266, "grad_norm": 0.042798321694135666, "learning_rate": 0.01, "loss": 2.0255, "step": 25500 }, { "epoch": 2.6213382670366943, "grad_norm": 0.040838126093149185, "learning_rate": 0.01, "loss": 2.0164, "step": 25503 }, { "epoch": 2.6216466234967624, "grad_norm": 0.07249750196933746, "learning_rate": 0.01, "loss": 2.0395, "step": 25506 }, { "epoch": 2.62195497995683, "grad_norm": 0.08197704702615738, "learning_rate": 0.01, "loss": 1.9932, "step": 25509 }, { "epoch": 2.622263336416898, "grad_norm": 0.11885122954845428, "learning_rate": 0.01, "loss": 2.0203, "step": 25512 }, { "epoch": 2.622571692876966, "grad_norm": 0.07574759423732758, "learning_rate": 0.01, "loss": 2.0443, "step": 25515 }, { "epoch": 2.6228800493370334, "grad_norm": 0.05106687173247337, "learning_rate": 0.01, "loss": 2.0222, "step": 25518 }, { "epoch": 2.6231884057971016, "grad_norm": 0.03381403908133507, "learning_rate": 0.01, "loss": 2.0167, "step": 25521 }, { "epoch": 2.623496762257169, "grad_norm": 0.04259166494011879, "learning_rate": 0.01, "loss": 1.9915, "step": 25524 }, { "epoch": 2.623805118717237, "grad_norm": 0.06276170909404755, "learning_rate": 0.01, "loss": 2.0116, "step": 25527 }, { "epoch": 2.624113475177305, "grad_norm": 0.05478539317846298, "learning_rate": 0.01, "loss": 2.0299, "step": 25530 }, { "epoch": 2.6244218316373726, "grad_norm": 0.05915123224258423, "learning_rate": 0.01, "loss": 2.0211, "step": 25533 }, { "epoch": 2.6247301880974407, "grad_norm": 0.09082819521427155, "learning_rate": 0.01, "loss": 2.038, "step": 25536 }, { "epoch": 2.6250385445575084, "grad_norm": 0.04604744166135788, "learning_rate": 0.01, "loss": 2.0395, "step": 25539 }, { "epoch": 2.6253469010175765, "grad_norm": 0.04548676684498787, "learning_rate": 0.01, "loss": 2.0144, "step": 25542 }, { "epoch": 2.625655257477644, "grad_norm": 0.03629077225923538, "learning_rate": 0.01, "loss": 2.0237, "step": 25545 }, { "epoch": 2.625963613937712, "grad_norm": 0.060490988194942474, "learning_rate": 0.01, "loss": 2.0049, "step": 25548 }, { "epoch": 2.62627197039778, "grad_norm": 0.12501440942287445, "learning_rate": 0.01, "loss": 2.0289, "step": 25551 }, { "epoch": 2.6265803268578476, "grad_norm": 0.052221618592739105, "learning_rate": 0.01, "loss": 2.0092, "step": 25554 }, { "epoch": 2.6268886833179153, "grad_norm": 0.08127149194478989, "learning_rate": 0.01, "loss": 2.0133, "step": 25557 }, { "epoch": 2.6271970397779834, "grad_norm": 0.13186825811862946, "learning_rate": 0.01, "loss": 2.0281, "step": 25560 }, { "epoch": 2.6275053962380515, "grad_norm": 0.10297831892967224, "learning_rate": 0.01, "loss": 2.0137, "step": 25563 }, { "epoch": 2.627813752698119, "grad_norm": 0.04737088829278946, "learning_rate": 0.01, "loss": 2.0334, "step": 25566 }, { "epoch": 2.628122109158187, "grad_norm": 0.055748652666807175, "learning_rate": 0.01, "loss": 2.0085, "step": 25569 }, { "epoch": 2.628430465618255, "grad_norm": 0.08968318998813629, "learning_rate": 0.01, "loss": 2.023, "step": 25572 }, { "epoch": 2.6287388220783225, "grad_norm": 0.04962621256709099, "learning_rate": 0.01, "loss": 2.0111, "step": 25575 }, { "epoch": 2.62904717853839, "grad_norm": 0.033645693212747574, "learning_rate": 0.01, "loss": 2.0332, "step": 25578 }, { "epoch": 2.6293555349984583, "grad_norm": 0.05864016339182854, "learning_rate": 0.01, "loss": 2.0159, "step": 25581 }, { "epoch": 2.629663891458526, "grad_norm": 0.12207403779029846, "learning_rate": 0.01, "loss": 2.0282, "step": 25584 }, { "epoch": 2.6299722479185936, "grad_norm": 0.049948643893003464, "learning_rate": 0.01, "loss": 2.0534, "step": 25587 }, { "epoch": 2.6302806043786617, "grad_norm": 0.08774693310260773, "learning_rate": 0.01, "loss": 2.0214, "step": 25590 }, { "epoch": 2.63058896083873, "grad_norm": 0.053613871335983276, "learning_rate": 0.01, "loss": 2.0171, "step": 25593 }, { "epoch": 2.6308973172987975, "grad_norm": 0.06298764050006866, "learning_rate": 0.01, "loss": 2.0197, "step": 25596 }, { "epoch": 2.631205673758865, "grad_norm": 0.03511015325784683, "learning_rate": 0.01, "loss": 2.0239, "step": 25599 }, { "epoch": 2.6315140302189333, "grad_norm": 0.09345996379852295, "learning_rate": 0.01, "loss": 2.0239, "step": 25602 }, { "epoch": 2.631822386679001, "grad_norm": 0.06202877685427666, "learning_rate": 0.01, "loss": 2.0286, "step": 25605 }, { "epoch": 2.6321307431390686, "grad_norm": 0.10231085866689682, "learning_rate": 0.01, "loss": 2.0324, "step": 25608 }, { "epoch": 2.6324390995991367, "grad_norm": 0.12403954565525055, "learning_rate": 0.01, "loss": 2.0074, "step": 25611 }, { "epoch": 2.6327474560592043, "grad_norm": 0.059275902807712555, "learning_rate": 0.01, "loss": 2.0093, "step": 25614 }, { "epoch": 2.633055812519272, "grad_norm": 0.04833563044667244, "learning_rate": 0.01, "loss": 2.0218, "step": 25617 }, { "epoch": 2.63336416897934, "grad_norm": 0.04218841344118118, "learning_rate": 0.01, "loss": 2.0132, "step": 25620 }, { "epoch": 2.633672525439408, "grad_norm": 0.1189088523387909, "learning_rate": 0.01, "loss": 2.0407, "step": 25623 }, { "epoch": 2.633980881899476, "grad_norm": 0.11460559070110321, "learning_rate": 0.01, "loss": 2.0083, "step": 25626 }, { "epoch": 2.6342892383595435, "grad_norm": 0.13970693945884705, "learning_rate": 0.01, "loss": 2.0195, "step": 25629 }, { "epoch": 2.6345975948196116, "grad_norm": 0.06081441789865494, "learning_rate": 0.01, "loss": 2.0285, "step": 25632 }, { "epoch": 2.6349059512796793, "grad_norm": 0.055472832173109055, "learning_rate": 0.01, "loss": 2.0019, "step": 25635 }, { "epoch": 2.635214307739747, "grad_norm": 0.06767648458480835, "learning_rate": 0.01, "loss": 2.0193, "step": 25638 }, { "epoch": 2.635522664199815, "grad_norm": 0.060980260372161865, "learning_rate": 0.01, "loss": 2.0137, "step": 25641 }, { "epoch": 2.6358310206598827, "grad_norm": 0.04839742183685303, "learning_rate": 0.01, "loss": 1.9982, "step": 25644 }, { "epoch": 2.6361393771199504, "grad_norm": 0.04725825786590576, "learning_rate": 0.01, "loss": 2.0176, "step": 25647 }, { "epoch": 2.6364477335800185, "grad_norm": 0.04045959562063217, "learning_rate": 0.01, "loss": 2.0455, "step": 25650 }, { "epoch": 2.6367560900400866, "grad_norm": 0.07219504565000534, "learning_rate": 0.01, "loss": 2.0391, "step": 25653 }, { "epoch": 2.6370644465001543, "grad_norm": 0.0438094437122345, "learning_rate": 0.01, "loss": 2.029, "step": 25656 }, { "epoch": 2.637372802960222, "grad_norm": 0.06555571407079697, "learning_rate": 0.01, "loss": 2.033, "step": 25659 }, { "epoch": 2.63768115942029, "grad_norm": 0.07731533795595169, "learning_rate": 0.01, "loss": 2.0432, "step": 25662 }, { "epoch": 2.6379895158803577, "grad_norm": 0.045945990830659866, "learning_rate": 0.01, "loss": 2.0308, "step": 25665 }, { "epoch": 2.6382978723404253, "grad_norm": 0.07175582647323608, "learning_rate": 0.01, "loss": 2.019, "step": 25668 }, { "epoch": 2.6386062288004934, "grad_norm": 0.07860400527715683, "learning_rate": 0.01, "loss": 2.0291, "step": 25671 }, { "epoch": 2.638914585260561, "grad_norm": 0.05635571479797363, "learning_rate": 0.01, "loss": 2.0519, "step": 25674 }, { "epoch": 2.639222941720629, "grad_norm": 0.07352261245250702, "learning_rate": 0.01, "loss": 2.0235, "step": 25677 }, { "epoch": 2.639531298180697, "grad_norm": 0.07502644509077072, "learning_rate": 0.01, "loss": 1.9877, "step": 25680 }, { "epoch": 2.639839654640765, "grad_norm": 0.05903060361742973, "learning_rate": 0.01, "loss": 2.012, "step": 25683 }, { "epoch": 2.6401480111008326, "grad_norm": 0.07454460859298706, "learning_rate": 0.01, "loss": 1.9988, "step": 25686 }, { "epoch": 2.6404563675609003, "grad_norm": 0.06615950912237167, "learning_rate": 0.01, "loss": 2.0379, "step": 25689 }, { "epoch": 2.6407647240209684, "grad_norm": 0.07897205650806427, "learning_rate": 0.01, "loss": 2.0366, "step": 25692 }, { "epoch": 2.641073080481036, "grad_norm": 0.12898573279380798, "learning_rate": 0.01, "loss": 2.0252, "step": 25695 }, { "epoch": 2.6413814369411037, "grad_norm": 0.10731782764196396, "learning_rate": 0.01, "loss": 2.0508, "step": 25698 }, { "epoch": 2.641689793401172, "grad_norm": 0.07936359196901321, "learning_rate": 0.01, "loss": 1.9993, "step": 25701 }, { "epoch": 2.6419981498612395, "grad_norm": 0.07443369179964066, "learning_rate": 0.01, "loss": 2.0586, "step": 25704 }, { "epoch": 2.6423065063213076, "grad_norm": 0.07027627527713776, "learning_rate": 0.01, "loss": 2.0375, "step": 25707 }, { "epoch": 2.6426148627813753, "grad_norm": 0.04909298196434975, "learning_rate": 0.01, "loss": 2.0234, "step": 25710 }, { "epoch": 2.6429232192414434, "grad_norm": 0.09595979005098343, "learning_rate": 0.01, "loss": 2.0072, "step": 25713 }, { "epoch": 2.643231575701511, "grad_norm": 0.07217621803283691, "learning_rate": 0.01, "loss": 2.0187, "step": 25716 }, { "epoch": 2.6435399321615787, "grad_norm": 0.10547403246164322, "learning_rate": 0.01, "loss": 2.0351, "step": 25719 }, { "epoch": 2.643848288621647, "grad_norm": 0.048883359879255295, "learning_rate": 0.01, "loss": 1.9996, "step": 25722 }, { "epoch": 2.6441566450817144, "grad_norm": 0.09399348497390747, "learning_rate": 0.01, "loss": 2.0112, "step": 25725 }, { "epoch": 2.644465001541782, "grad_norm": 0.05369775742292404, "learning_rate": 0.01, "loss": 2.0483, "step": 25728 }, { "epoch": 2.64477335800185, "grad_norm": 0.08048120886087418, "learning_rate": 0.01, "loss": 2.0279, "step": 25731 }, { "epoch": 2.645081714461918, "grad_norm": 0.07802169024944305, "learning_rate": 0.01, "loss": 2.0178, "step": 25734 }, { "epoch": 2.645390070921986, "grad_norm": 0.06137097254395485, "learning_rate": 0.01, "loss": 2.0382, "step": 25737 }, { "epoch": 2.6456984273820536, "grad_norm": 0.05498100444674492, "learning_rate": 0.01, "loss": 2.0252, "step": 25740 }, { "epoch": 2.6460067838421217, "grad_norm": 0.057225704193115234, "learning_rate": 0.01, "loss": 2.0243, "step": 25743 }, { "epoch": 2.6463151403021894, "grad_norm": 0.06727226078510284, "learning_rate": 0.01, "loss": 2.0316, "step": 25746 }, { "epoch": 2.646623496762257, "grad_norm": 0.04740385711193085, "learning_rate": 0.01, "loss": 2.0263, "step": 25749 }, { "epoch": 2.646931853222325, "grad_norm": 0.04808951914310455, "learning_rate": 0.01, "loss": 2.0063, "step": 25752 }, { "epoch": 2.647240209682393, "grad_norm": 0.04689471423625946, "learning_rate": 0.01, "loss": 2.0209, "step": 25755 }, { "epoch": 2.6475485661424605, "grad_norm": 0.10928355902433395, "learning_rate": 0.01, "loss": 1.9951, "step": 25758 }, { "epoch": 2.6478569226025286, "grad_norm": 0.054571595042943954, "learning_rate": 0.01, "loss": 2.0216, "step": 25761 }, { "epoch": 2.6481652790625962, "grad_norm": 0.0809570774435997, "learning_rate": 0.01, "loss": 2.0451, "step": 25764 }, { "epoch": 2.6484736355226643, "grad_norm": 0.10407903790473938, "learning_rate": 0.01, "loss": 2.0205, "step": 25767 }, { "epoch": 2.648781991982732, "grad_norm": 0.045180514454841614, "learning_rate": 0.01, "loss": 2.0137, "step": 25770 }, { "epoch": 2.6490903484428, "grad_norm": 0.07642797380685806, "learning_rate": 0.01, "loss": 1.9826, "step": 25773 }, { "epoch": 2.6493987049028678, "grad_norm": 0.06943316757678986, "learning_rate": 0.01, "loss": 2.0186, "step": 25776 }, { "epoch": 2.6497070613629354, "grad_norm": 0.06536037474870682, "learning_rate": 0.01, "loss": 2.008, "step": 25779 }, { "epoch": 2.6500154178230035, "grad_norm": 0.04698020964860916, "learning_rate": 0.01, "loss": 2.0149, "step": 25782 }, { "epoch": 2.650323774283071, "grad_norm": 0.05523454770445824, "learning_rate": 0.01, "loss": 2.0191, "step": 25785 }, { "epoch": 2.650632130743139, "grad_norm": 0.043647054582834244, "learning_rate": 0.01, "loss": 1.9993, "step": 25788 }, { "epoch": 2.650940487203207, "grad_norm": 0.05682402104139328, "learning_rate": 0.01, "loss": 2.0315, "step": 25791 }, { "epoch": 2.6512488436632746, "grad_norm": 0.10919704288244247, "learning_rate": 0.01, "loss": 1.9967, "step": 25794 }, { "epoch": 2.6515572001233427, "grad_norm": 0.0895003080368042, "learning_rate": 0.01, "loss": 2.0361, "step": 25797 }, { "epoch": 2.6518655565834104, "grad_norm": 0.07750852406024933, "learning_rate": 0.01, "loss": 2.0079, "step": 25800 }, { "epoch": 2.6521739130434785, "grad_norm": 0.07465649396181107, "learning_rate": 0.01, "loss": 2.0272, "step": 25803 }, { "epoch": 2.652482269503546, "grad_norm": 0.042654186487197876, "learning_rate": 0.01, "loss": 2.0386, "step": 25806 }, { "epoch": 2.652790625963614, "grad_norm": 0.04910847172141075, "learning_rate": 0.01, "loss": 2.0049, "step": 25809 }, { "epoch": 2.653098982423682, "grad_norm": 0.04584549367427826, "learning_rate": 0.01, "loss": 2.0232, "step": 25812 }, { "epoch": 2.6534073388837496, "grad_norm": 0.03400958329439163, "learning_rate": 0.01, "loss": 1.9994, "step": 25815 }, { "epoch": 2.6537156953438172, "grad_norm": 0.11867182701826096, "learning_rate": 0.01, "loss": 2.0275, "step": 25818 }, { "epoch": 2.6540240518038853, "grad_norm": 0.09303310513496399, "learning_rate": 0.01, "loss": 2.0533, "step": 25821 }, { "epoch": 2.654332408263953, "grad_norm": 0.04951944947242737, "learning_rate": 0.01, "loss": 2.0391, "step": 25824 }, { "epoch": 2.654640764724021, "grad_norm": 0.05893867462873459, "learning_rate": 0.01, "loss": 2.0109, "step": 25827 }, { "epoch": 2.6549491211840888, "grad_norm": 0.034138891845941544, "learning_rate": 0.01, "loss": 2.0101, "step": 25830 }, { "epoch": 2.655257477644157, "grad_norm": 0.039834585040807724, "learning_rate": 0.01, "loss": 2.0008, "step": 25833 }, { "epoch": 2.6555658341042245, "grad_norm": 0.10098033398389816, "learning_rate": 0.01, "loss": 2.0156, "step": 25836 }, { "epoch": 2.655874190564292, "grad_norm": 0.07205129414796829, "learning_rate": 0.01, "loss": 2.0256, "step": 25839 }, { "epoch": 2.6561825470243603, "grad_norm": 0.04370702803134918, "learning_rate": 0.01, "loss": 2.0031, "step": 25842 }, { "epoch": 2.656490903484428, "grad_norm": 0.10662799328565598, "learning_rate": 0.01, "loss": 2.0311, "step": 25845 }, { "epoch": 2.6567992599444956, "grad_norm": 0.1833692342042923, "learning_rate": 0.01, "loss": 2.0585, "step": 25848 }, { "epoch": 2.6571076164045637, "grad_norm": 0.12229418754577637, "learning_rate": 0.01, "loss": 2.0219, "step": 25851 }, { "epoch": 2.6574159728646314, "grad_norm": 0.05533694103360176, "learning_rate": 0.01, "loss": 2.026, "step": 25854 }, { "epoch": 2.6577243293246995, "grad_norm": 0.042578935623168945, "learning_rate": 0.01, "loss": 2.0247, "step": 25857 }, { "epoch": 2.658032685784767, "grad_norm": 0.034579649567604065, "learning_rate": 0.01, "loss": 2.0286, "step": 25860 }, { "epoch": 2.6583410422448353, "grad_norm": 0.052627481520175934, "learning_rate": 0.01, "loss": 1.986, "step": 25863 }, { "epoch": 2.658649398704903, "grad_norm": 0.04877294600009918, "learning_rate": 0.01, "loss": 2.0157, "step": 25866 }, { "epoch": 2.6589577551649706, "grad_norm": 0.05192401632666588, "learning_rate": 0.01, "loss": 2.0321, "step": 25869 }, { "epoch": 2.6592661116250387, "grad_norm": 0.08188097178936005, "learning_rate": 0.01, "loss": 2.0074, "step": 25872 }, { "epoch": 2.6595744680851063, "grad_norm": 0.06215586140751839, "learning_rate": 0.01, "loss": 1.9925, "step": 25875 }, { "epoch": 2.659882824545174, "grad_norm": 0.10561207681894302, "learning_rate": 0.01, "loss": 2.0233, "step": 25878 }, { "epoch": 2.660191181005242, "grad_norm": 0.08467070758342743, "learning_rate": 0.01, "loss": 2.0503, "step": 25881 }, { "epoch": 2.6604995374653098, "grad_norm": 0.05138308182358742, "learning_rate": 0.01, "loss": 2.0492, "step": 25884 }, { "epoch": 2.660807893925378, "grad_norm": 0.07387588173151016, "learning_rate": 0.01, "loss": 2.0134, "step": 25887 }, { "epoch": 2.6611162503854455, "grad_norm": 0.06682645529508591, "learning_rate": 0.01, "loss": 2.0508, "step": 25890 }, { "epoch": 2.6614246068455136, "grad_norm": 0.056953735649585724, "learning_rate": 0.01, "loss": 2.0312, "step": 25893 }, { "epoch": 2.6617329633055813, "grad_norm": 0.03709590435028076, "learning_rate": 0.01, "loss": 2.0205, "step": 25896 }, { "epoch": 2.662041319765649, "grad_norm": 0.05895649641752243, "learning_rate": 0.01, "loss": 2.028, "step": 25899 }, { "epoch": 2.662349676225717, "grad_norm": 0.08740679174661636, "learning_rate": 0.01, "loss": 2.0156, "step": 25902 }, { "epoch": 2.6626580326857847, "grad_norm": 0.04745563119649887, "learning_rate": 0.01, "loss": 2.0338, "step": 25905 }, { "epoch": 2.6629663891458524, "grad_norm": 0.08748458325862885, "learning_rate": 0.01, "loss": 2.0392, "step": 25908 }, { "epoch": 2.6632747456059205, "grad_norm": 0.04833563417196274, "learning_rate": 0.01, "loss": 2.0128, "step": 25911 }, { "epoch": 2.663583102065988, "grad_norm": 0.03873222693800926, "learning_rate": 0.01, "loss": 2.0028, "step": 25914 }, { "epoch": 2.6638914585260562, "grad_norm": 0.07129956781864166, "learning_rate": 0.01, "loss": 2.0092, "step": 25917 }, { "epoch": 2.664199814986124, "grad_norm": 0.08984299004077911, "learning_rate": 0.01, "loss": 2.0206, "step": 25920 }, { "epoch": 2.664508171446192, "grad_norm": 0.03720667213201523, "learning_rate": 0.01, "loss": 2.0018, "step": 25923 }, { "epoch": 2.6648165279062597, "grad_norm": 0.09795575588941574, "learning_rate": 0.01, "loss": 2.0373, "step": 25926 }, { "epoch": 2.6651248843663273, "grad_norm": 0.1043957769870758, "learning_rate": 0.01, "loss": 2.0313, "step": 25929 }, { "epoch": 2.6654332408263954, "grad_norm": 0.04737646505236626, "learning_rate": 0.01, "loss": 2.0313, "step": 25932 }, { "epoch": 2.665741597286463, "grad_norm": 0.04864402487874031, "learning_rate": 0.01, "loss": 2.0068, "step": 25935 }, { "epoch": 2.6660499537465308, "grad_norm": 0.0386229082942009, "learning_rate": 0.01, "loss": 2.0259, "step": 25938 }, { "epoch": 2.666358310206599, "grad_norm": 0.04127798601984978, "learning_rate": 0.01, "loss": 2.0463, "step": 25941 }, { "epoch": 2.6666666666666665, "grad_norm": 0.07537275552749634, "learning_rate": 0.01, "loss": 2.0254, "step": 25944 }, { "epoch": 2.6669750231267346, "grad_norm": 0.041533395648002625, "learning_rate": 0.01, "loss": 2.0228, "step": 25947 }, { "epoch": 2.6672833795868023, "grad_norm": 0.045062318444252014, "learning_rate": 0.01, "loss": 2.0428, "step": 25950 }, { "epoch": 2.6675917360468704, "grad_norm": 0.08328119665384293, "learning_rate": 0.01, "loss": 2.0233, "step": 25953 }, { "epoch": 2.667900092506938, "grad_norm": 0.08402698487043381, "learning_rate": 0.01, "loss": 2.0303, "step": 25956 }, { "epoch": 2.6682084489670057, "grad_norm": 0.10571663826704025, "learning_rate": 0.01, "loss": 2.0292, "step": 25959 }, { "epoch": 2.668516805427074, "grad_norm": 0.05094289034605026, "learning_rate": 0.01, "loss": 2.0278, "step": 25962 }, { "epoch": 2.6688251618871415, "grad_norm": 0.034051552414894104, "learning_rate": 0.01, "loss": 2.0265, "step": 25965 }, { "epoch": 2.669133518347209, "grad_norm": 0.05703623592853546, "learning_rate": 0.01, "loss": 2.0272, "step": 25968 }, { "epoch": 2.6694418748072772, "grad_norm": 0.10606861114501953, "learning_rate": 0.01, "loss": 2.0179, "step": 25971 }, { "epoch": 2.669750231267345, "grad_norm": 0.11793088167905807, "learning_rate": 0.01, "loss": 2.0492, "step": 25974 }, { "epoch": 2.670058587727413, "grad_norm": 0.09058693051338196, "learning_rate": 0.01, "loss": 2.0213, "step": 25977 }, { "epoch": 2.6703669441874807, "grad_norm": 0.0677378848195076, "learning_rate": 0.01, "loss": 2.0036, "step": 25980 }, { "epoch": 2.6706753006475488, "grad_norm": 0.05636313930153847, "learning_rate": 0.01, "loss": 2.0318, "step": 25983 }, { "epoch": 2.6709836571076164, "grad_norm": 0.04547611251473427, "learning_rate": 0.01, "loss": 2.0236, "step": 25986 }, { "epoch": 2.671292013567684, "grad_norm": 0.04852728173136711, "learning_rate": 0.01, "loss": 2.0125, "step": 25989 }, { "epoch": 2.671600370027752, "grad_norm": 0.04465510696172714, "learning_rate": 0.01, "loss": 1.9907, "step": 25992 }, { "epoch": 2.67190872648782, "grad_norm": 0.043789103627204895, "learning_rate": 0.01, "loss": 2.0213, "step": 25995 }, { "epoch": 2.6722170829478875, "grad_norm": 0.12845320999622345, "learning_rate": 0.01, "loss": 2.0429, "step": 25998 }, { "epoch": 2.6725254394079556, "grad_norm": 0.054881785064935684, "learning_rate": 0.01, "loss": 2.0252, "step": 26001 }, { "epoch": 2.6728337958680233, "grad_norm": 0.0845002606511116, "learning_rate": 0.01, "loss": 2.0475, "step": 26004 }, { "epoch": 2.6731421523280914, "grad_norm": 0.04782318323850632, "learning_rate": 0.01, "loss": 2.0411, "step": 26007 }, { "epoch": 2.673450508788159, "grad_norm": 0.04288490489125252, "learning_rate": 0.01, "loss": 2.0376, "step": 26010 }, { "epoch": 2.673758865248227, "grad_norm": 0.10904238373041153, "learning_rate": 0.01, "loss": 2.0316, "step": 26013 }, { "epoch": 2.674067221708295, "grad_norm": 0.08276703208684921, "learning_rate": 0.01, "loss": 2.032, "step": 26016 }, { "epoch": 2.6743755781683625, "grad_norm": 0.06076609715819359, "learning_rate": 0.01, "loss": 2.0507, "step": 26019 }, { "epoch": 2.6746839346284306, "grad_norm": 0.043946780264377594, "learning_rate": 0.01, "loss": 2.0188, "step": 26022 }, { "epoch": 2.6749922910884982, "grad_norm": 0.03716238588094711, "learning_rate": 0.01, "loss": 2.0183, "step": 26025 }, { "epoch": 2.675300647548566, "grad_norm": 0.04982864856719971, "learning_rate": 0.01, "loss": 2.0114, "step": 26028 }, { "epoch": 2.675609004008634, "grad_norm": 0.05211315676569939, "learning_rate": 0.01, "loss": 2.0305, "step": 26031 }, { "epoch": 2.6759173604687017, "grad_norm": 0.10215940326452255, "learning_rate": 0.01, "loss": 2.0365, "step": 26034 }, { "epoch": 2.6762257169287698, "grad_norm": 0.07742898166179657, "learning_rate": 0.01, "loss": 2.0173, "step": 26037 }, { "epoch": 2.6765340733888374, "grad_norm": 0.047653328627347946, "learning_rate": 0.01, "loss": 2.0198, "step": 26040 }, { "epoch": 2.6768424298489055, "grad_norm": 0.07179111242294312, "learning_rate": 0.01, "loss": 2.0283, "step": 26043 }, { "epoch": 2.677150786308973, "grad_norm": 0.044947609305381775, "learning_rate": 0.01, "loss": 2.0044, "step": 26046 }, { "epoch": 2.677459142769041, "grad_norm": 0.03128395974636078, "learning_rate": 0.01, "loss": 2.0097, "step": 26049 }, { "epoch": 2.677767499229109, "grad_norm": 0.037758342921733856, "learning_rate": 0.01, "loss": 2.0073, "step": 26052 }, { "epoch": 2.6780758556891766, "grad_norm": 0.059724219143390656, "learning_rate": 0.01, "loss": 2.0304, "step": 26055 }, { "epoch": 2.6783842121492443, "grad_norm": 0.09603296220302582, "learning_rate": 0.01, "loss": 2.0106, "step": 26058 }, { "epoch": 2.6786925686093124, "grad_norm": 0.059488292783498764, "learning_rate": 0.01, "loss": 2.0114, "step": 26061 }, { "epoch": 2.6790009250693805, "grad_norm": 0.07343467324972153, "learning_rate": 0.01, "loss": 2.0494, "step": 26064 }, { "epoch": 2.679309281529448, "grad_norm": 0.054782915860414505, "learning_rate": 0.01, "loss": 2.023, "step": 26067 }, { "epoch": 2.679617637989516, "grad_norm": 0.042636722326278687, "learning_rate": 0.01, "loss": 2.0376, "step": 26070 }, { "epoch": 2.679925994449584, "grad_norm": 0.06803017109632492, "learning_rate": 0.01, "loss": 1.9864, "step": 26073 }, { "epoch": 2.6802343509096516, "grad_norm": 0.10990459471940994, "learning_rate": 0.01, "loss": 2.0343, "step": 26076 }, { "epoch": 2.6805427073697192, "grad_norm": 0.035727545619010925, "learning_rate": 0.01, "loss": 2.0195, "step": 26079 }, { "epoch": 2.6808510638297873, "grad_norm": 0.11241263896226883, "learning_rate": 0.01, "loss": 2.038, "step": 26082 }, { "epoch": 2.681159420289855, "grad_norm": 0.06156554073095322, "learning_rate": 0.01, "loss": 2.0191, "step": 26085 }, { "epoch": 2.6814677767499226, "grad_norm": 0.0609101839363575, "learning_rate": 0.01, "loss": 1.994, "step": 26088 }, { "epoch": 2.6817761332099908, "grad_norm": 0.046859260648489, "learning_rate": 0.01, "loss": 2.0226, "step": 26091 }, { "epoch": 2.682084489670059, "grad_norm": 0.05803006887435913, "learning_rate": 0.01, "loss": 2.0347, "step": 26094 }, { "epoch": 2.6823928461301265, "grad_norm": 0.09425931423902512, "learning_rate": 0.01, "loss": 2.0073, "step": 26097 }, { "epoch": 2.682701202590194, "grad_norm": 0.07893898338079453, "learning_rate": 0.01, "loss": 2.0248, "step": 26100 }, { "epoch": 2.6830095590502623, "grad_norm": 0.044163595885038376, "learning_rate": 0.01, "loss": 2.0217, "step": 26103 }, { "epoch": 2.68331791551033, "grad_norm": 0.061135292053222656, "learning_rate": 0.01, "loss": 2.0006, "step": 26106 }, { "epoch": 2.6836262719703976, "grad_norm": 0.03806147351861, "learning_rate": 0.01, "loss": 2.0226, "step": 26109 }, { "epoch": 2.6839346284304657, "grad_norm": 0.050713907927274704, "learning_rate": 0.01, "loss": 2.0088, "step": 26112 }, { "epoch": 2.6842429848905334, "grad_norm": 0.05512484535574913, "learning_rate": 0.01, "loss": 2.038, "step": 26115 }, { "epoch": 2.684551341350601, "grad_norm": 0.048581019043922424, "learning_rate": 0.01, "loss": 2.0388, "step": 26118 }, { "epoch": 2.684859697810669, "grad_norm": 0.04121122509241104, "learning_rate": 0.01, "loss": 2.0176, "step": 26121 }, { "epoch": 2.6851680542707372, "grad_norm": 0.036977533251047134, "learning_rate": 0.01, "loss": 2.0341, "step": 26124 }, { "epoch": 2.685476410730805, "grad_norm": 0.10436423867940903, "learning_rate": 0.01, "loss": 2.0122, "step": 26127 }, { "epoch": 2.6857847671908726, "grad_norm": 0.05357207730412483, "learning_rate": 0.01, "loss": 2.0076, "step": 26130 }, { "epoch": 2.6860931236509407, "grad_norm": 0.05386963114142418, "learning_rate": 0.01, "loss": 2.0012, "step": 26133 }, { "epoch": 2.6864014801110083, "grad_norm": 0.03591128811240196, "learning_rate": 0.01, "loss": 2.0291, "step": 26136 }, { "epoch": 2.686709836571076, "grad_norm": 0.036740854382514954, "learning_rate": 0.01, "loss": 2.0142, "step": 26139 }, { "epoch": 2.687018193031144, "grad_norm": 0.05966171249747276, "learning_rate": 0.01, "loss": 2.0145, "step": 26142 }, { "epoch": 2.6873265494912117, "grad_norm": 0.10529398918151855, "learning_rate": 0.01, "loss": 2.0091, "step": 26145 }, { "epoch": 2.6876349059512794, "grad_norm": 0.07834989577531815, "learning_rate": 0.01, "loss": 2.0211, "step": 26148 }, { "epoch": 2.6879432624113475, "grad_norm": 0.10702015459537506, "learning_rate": 0.01, "loss": 2.0368, "step": 26151 }, { "epoch": 2.6882516188714156, "grad_norm": 0.04647925868630409, "learning_rate": 0.01, "loss": 2.032, "step": 26154 }, { "epoch": 2.6885599753314833, "grad_norm": 0.04523247852921486, "learning_rate": 0.01, "loss": 2.0129, "step": 26157 }, { "epoch": 2.688868331791551, "grad_norm": 0.04293215274810791, "learning_rate": 0.01, "loss": 1.9982, "step": 26160 }, { "epoch": 2.689176688251619, "grad_norm": 0.06344863772392273, "learning_rate": 0.01, "loss": 2.028, "step": 26163 }, { "epoch": 2.6894850447116867, "grad_norm": 0.047177109867334366, "learning_rate": 0.01, "loss": 2.0154, "step": 26166 }, { "epoch": 2.6897934011717544, "grad_norm": 0.09941892325878143, "learning_rate": 0.01, "loss": 2.0204, "step": 26169 }, { "epoch": 2.6901017576318225, "grad_norm": 0.04238753393292427, "learning_rate": 0.01, "loss": 2.0164, "step": 26172 }, { "epoch": 2.69041011409189, "grad_norm": 0.0550382174551487, "learning_rate": 0.01, "loss": 2.0283, "step": 26175 }, { "epoch": 2.690718470551958, "grad_norm": 0.09788551181554794, "learning_rate": 0.01, "loss": 2.0071, "step": 26178 }, { "epoch": 2.691026827012026, "grad_norm": 0.044239919632673264, "learning_rate": 0.01, "loss": 2.0163, "step": 26181 }, { "epoch": 2.691335183472094, "grad_norm": 0.10677097737789154, "learning_rate": 0.01, "loss": 2.0041, "step": 26184 }, { "epoch": 2.6916435399321617, "grad_norm": 0.04113283380866051, "learning_rate": 0.01, "loss": 2.0061, "step": 26187 }, { "epoch": 2.6919518963922293, "grad_norm": 0.05748388543725014, "learning_rate": 0.01, "loss": 2.0144, "step": 26190 }, { "epoch": 2.6922602528522974, "grad_norm": 0.04931147024035454, "learning_rate": 0.01, "loss": 1.9967, "step": 26193 }, { "epoch": 2.692568609312365, "grad_norm": 0.051582470536231995, "learning_rate": 0.01, "loss": 2.0358, "step": 26196 }, { "epoch": 2.6928769657724327, "grad_norm": 0.1054171696305275, "learning_rate": 0.01, "loss": 2.0273, "step": 26199 }, { "epoch": 2.693185322232501, "grad_norm": 0.09005532413721085, "learning_rate": 0.01, "loss": 2.0256, "step": 26202 }, { "epoch": 2.6934936786925685, "grad_norm": 0.08124802261590958, "learning_rate": 0.01, "loss": 2.0252, "step": 26205 }, { "epoch": 2.6938020351526366, "grad_norm": 0.06238120049238205, "learning_rate": 0.01, "loss": 1.9943, "step": 26208 }, { "epoch": 2.6941103916127043, "grad_norm": 0.0375673770904541, "learning_rate": 0.01, "loss": 2.0124, "step": 26211 }, { "epoch": 2.6944187480727724, "grad_norm": 0.04710889235138893, "learning_rate": 0.01, "loss": 2.0343, "step": 26214 }, { "epoch": 2.69472710453284, "grad_norm": 0.0509529784321785, "learning_rate": 0.01, "loss": 2.0015, "step": 26217 }, { "epoch": 2.6950354609929077, "grad_norm": 0.043452613055706024, "learning_rate": 0.01, "loss": 2.0079, "step": 26220 }, { "epoch": 2.695343817452976, "grad_norm": 0.07350075244903564, "learning_rate": 0.01, "loss": 2.0141, "step": 26223 }, { "epoch": 2.6956521739130435, "grad_norm": 0.07672197371721268, "learning_rate": 0.01, "loss": 2.0002, "step": 26226 }, { "epoch": 2.695960530373111, "grad_norm": 0.1255597323179245, "learning_rate": 0.01, "loss": 2.0403, "step": 26229 }, { "epoch": 2.6962688868331792, "grad_norm": 0.0636356994509697, "learning_rate": 0.01, "loss": 2.0438, "step": 26232 }, { "epoch": 2.696577243293247, "grad_norm": 0.06334753334522247, "learning_rate": 0.01, "loss": 2.0339, "step": 26235 }, { "epoch": 2.696885599753315, "grad_norm": 0.0592232346534729, "learning_rate": 0.01, "loss": 2.0223, "step": 26238 }, { "epoch": 2.6971939562133826, "grad_norm": 0.06983037292957306, "learning_rate": 0.01, "loss": 2.0418, "step": 26241 }, { "epoch": 2.6975023126734508, "grad_norm": 0.042423997074365616, "learning_rate": 0.01, "loss": 2.0219, "step": 26244 }, { "epoch": 2.6978106691335184, "grad_norm": 0.12152963131666183, "learning_rate": 0.01, "loss": 2.026, "step": 26247 }, { "epoch": 2.698119025593586, "grad_norm": 0.03922561556100845, "learning_rate": 0.01, "loss": 1.9994, "step": 26250 }, { "epoch": 2.698427382053654, "grad_norm": 0.04790705442428589, "learning_rate": 0.01, "loss": 2.0155, "step": 26253 }, { "epoch": 2.698735738513722, "grad_norm": 0.0596231147646904, "learning_rate": 0.01, "loss": 2.0317, "step": 26256 }, { "epoch": 2.6990440949737895, "grad_norm": 0.06809087842702866, "learning_rate": 0.01, "loss": 2.001, "step": 26259 }, { "epoch": 2.6993524514338576, "grad_norm": 0.08235184103250504, "learning_rate": 0.01, "loss": 2.0284, "step": 26262 }, { "epoch": 2.6996608078939253, "grad_norm": 0.07293444871902466, "learning_rate": 0.01, "loss": 2.0309, "step": 26265 }, { "epoch": 2.6999691643539934, "grad_norm": 0.04277535527944565, "learning_rate": 0.01, "loss": 2.0236, "step": 26268 }, { "epoch": 2.700277520814061, "grad_norm": 0.10841301828622818, "learning_rate": 0.01, "loss": 2.0275, "step": 26271 }, { "epoch": 2.700585877274129, "grad_norm": 0.04575572907924652, "learning_rate": 0.01, "loss": 2.0331, "step": 26274 }, { "epoch": 2.700894233734197, "grad_norm": 0.0422062948346138, "learning_rate": 0.01, "loss": 2.0441, "step": 26277 }, { "epoch": 2.7012025901942645, "grad_norm": 0.034286245703697205, "learning_rate": 0.01, "loss": 2.0178, "step": 26280 }, { "epoch": 2.7015109466543326, "grad_norm": 0.040598925203084946, "learning_rate": 0.01, "loss": 2.0192, "step": 26283 }, { "epoch": 2.7018193031144, "grad_norm": 0.20375369489192963, "learning_rate": 0.01, "loss": 1.9977, "step": 26286 }, { "epoch": 2.702127659574468, "grad_norm": 0.1908276230096817, "learning_rate": 0.01, "loss": 2.0169, "step": 26289 }, { "epoch": 2.702436016034536, "grad_norm": 0.06826306879520416, "learning_rate": 0.01, "loss": 2.0231, "step": 26292 }, { "epoch": 2.7027443724946036, "grad_norm": 0.03917758911848068, "learning_rate": 0.01, "loss": 1.9938, "step": 26295 }, { "epoch": 2.7030527289546717, "grad_norm": 0.041581716388463974, "learning_rate": 0.01, "loss": 2.0375, "step": 26298 }, { "epoch": 2.7033610854147394, "grad_norm": 0.03759411349892616, "learning_rate": 0.01, "loss": 2.0156, "step": 26301 }, { "epoch": 2.7036694418748075, "grad_norm": 0.043274398893117905, "learning_rate": 0.01, "loss": 2.025, "step": 26304 }, { "epoch": 2.703977798334875, "grad_norm": 0.03156547620892525, "learning_rate": 0.01, "loss": 2.0151, "step": 26307 }, { "epoch": 2.704286154794943, "grad_norm": 0.042134515941143036, "learning_rate": 0.01, "loss": 2.0363, "step": 26310 }, { "epoch": 2.704594511255011, "grad_norm": 0.15473207831382751, "learning_rate": 0.01, "loss": 2.0455, "step": 26313 }, { "epoch": 2.7049028677150786, "grad_norm": 0.16576655209064484, "learning_rate": 0.01, "loss": 1.9929, "step": 26316 }, { "epoch": 2.7052112241751463, "grad_norm": 0.2106962651014328, "learning_rate": 0.01, "loss": 2.0215, "step": 26319 }, { "epoch": 2.7055195806352144, "grad_norm": 0.06543057411909103, "learning_rate": 0.01, "loss": 2.0368, "step": 26322 }, { "epoch": 2.705827937095282, "grad_norm": 0.05747131630778313, "learning_rate": 0.01, "loss": 2.0092, "step": 26325 }, { "epoch": 2.70613629355535, "grad_norm": 0.03801654651761055, "learning_rate": 0.01, "loss": 1.9988, "step": 26328 }, { "epoch": 2.706444650015418, "grad_norm": 0.047034505754709244, "learning_rate": 0.01, "loss": 2.0524, "step": 26331 }, { "epoch": 2.706753006475486, "grad_norm": 0.03980104252696037, "learning_rate": 0.01, "loss": 1.9928, "step": 26334 }, { "epoch": 2.7070613629355536, "grad_norm": 0.04470902681350708, "learning_rate": 0.01, "loss": 2.0276, "step": 26337 }, { "epoch": 2.707369719395621, "grad_norm": 0.03287111967802048, "learning_rate": 0.01, "loss": 2.0186, "step": 26340 }, { "epoch": 2.7076780758556893, "grad_norm": 0.04395715892314911, "learning_rate": 0.01, "loss": 2.0066, "step": 26343 }, { "epoch": 2.707986432315757, "grad_norm": 0.06358876079320908, "learning_rate": 0.01, "loss": 2.0505, "step": 26346 }, { "epoch": 2.7082947887758246, "grad_norm": 0.03925269469618797, "learning_rate": 0.01, "loss": 2.0149, "step": 26349 }, { "epoch": 2.7086031452358927, "grad_norm": 0.16810861229896545, "learning_rate": 0.01, "loss": 2.0433, "step": 26352 }, { "epoch": 2.7089115016959604, "grad_norm": 0.09612049907445908, "learning_rate": 0.01, "loss": 2.0014, "step": 26355 }, { "epoch": 2.7092198581560285, "grad_norm": 0.048840370029211044, "learning_rate": 0.01, "loss": 2.0062, "step": 26358 }, { "epoch": 2.709528214616096, "grad_norm": 0.03775126487016678, "learning_rate": 0.01, "loss": 2.0149, "step": 26361 }, { "epoch": 2.7098365710761643, "grad_norm": 0.04588973522186279, "learning_rate": 0.01, "loss": 2.031, "step": 26364 }, { "epoch": 2.710144927536232, "grad_norm": 0.061829451471567154, "learning_rate": 0.01, "loss": 2.0076, "step": 26367 }, { "epoch": 2.7104532839962996, "grad_norm": 0.053572434931993484, "learning_rate": 0.01, "loss": 2.0167, "step": 26370 }, { "epoch": 2.7107616404563677, "grad_norm": 0.0460902638733387, "learning_rate": 0.01, "loss": 2.0515, "step": 26373 }, { "epoch": 2.7110699969164354, "grad_norm": 0.07622378319501877, "learning_rate": 0.01, "loss": 2.0229, "step": 26376 }, { "epoch": 2.711378353376503, "grad_norm": 0.04137422889471054, "learning_rate": 0.01, "loss": 1.9985, "step": 26379 }, { "epoch": 2.711686709836571, "grad_norm": 0.052519541233778, "learning_rate": 0.01, "loss": 2.0166, "step": 26382 }, { "epoch": 2.711995066296639, "grad_norm": 0.13548482954502106, "learning_rate": 0.01, "loss": 2.0221, "step": 26385 }, { "epoch": 2.712303422756707, "grad_norm": 0.05124654993414879, "learning_rate": 0.01, "loss": 2.02, "step": 26388 }, { "epoch": 2.7126117792167745, "grad_norm": 0.04233216866850853, "learning_rate": 0.01, "loss": 2.0131, "step": 26391 }, { "epoch": 2.7129201356768426, "grad_norm": 0.04341414198279381, "learning_rate": 0.01, "loss": 2.0088, "step": 26394 }, { "epoch": 2.7132284921369103, "grad_norm": 0.078862264752388, "learning_rate": 0.01, "loss": 2.0098, "step": 26397 }, { "epoch": 2.713536848596978, "grad_norm": 0.08351139724254608, "learning_rate": 0.01, "loss": 2.0078, "step": 26400 }, { "epoch": 2.713845205057046, "grad_norm": 0.10899659991264343, "learning_rate": 0.01, "loss": 2.0354, "step": 26403 }, { "epoch": 2.7141535615171137, "grad_norm": 0.11437533795833588, "learning_rate": 0.01, "loss": 2.0145, "step": 26406 }, { "epoch": 2.7144619179771814, "grad_norm": 0.04197553172707558, "learning_rate": 0.01, "loss": 2.01, "step": 26409 }, { "epoch": 2.7147702744372495, "grad_norm": 0.054637305438518524, "learning_rate": 0.01, "loss": 2.0087, "step": 26412 }, { "epoch": 2.715078630897317, "grad_norm": 0.058579228818416595, "learning_rate": 0.01, "loss": 2.0132, "step": 26415 }, { "epoch": 2.7153869873573853, "grad_norm": 0.03501029685139656, "learning_rate": 0.01, "loss": 2.0404, "step": 26418 }, { "epoch": 2.715695343817453, "grad_norm": 0.04865582287311554, "learning_rate": 0.01, "loss": 2.0221, "step": 26421 }, { "epoch": 2.716003700277521, "grad_norm": 0.08878735452890396, "learning_rate": 0.01, "loss": 2.0376, "step": 26424 }, { "epoch": 2.7163120567375887, "grad_norm": 0.06919421255588531, "learning_rate": 0.01, "loss": 2.0125, "step": 26427 }, { "epoch": 2.7166204131976563, "grad_norm": 0.05429752171039581, "learning_rate": 0.01, "loss": 2.0217, "step": 26430 }, { "epoch": 2.7169287696577245, "grad_norm": 0.10294333100318909, "learning_rate": 0.01, "loss": 2.0036, "step": 26433 }, { "epoch": 2.717237126117792, "grad_norm": 0.0625937432050705, "learning_rate": 0.01, "loss": 2.0137, "step": 26436 }, { "epoch": 2.7175454825778598, "grad_norm": 0.048780061304569244, "learning_rate": 0.01, "loss": 2.0324, "step": 26439 }, { "epoch": 2.717853839037928, "grad_norm": 0.03840133175253868, "learning_rate": 0.01, "loss": 2.0035, "step": 26442 }, { "epoch": 2.7181621954979955, "grad_norm": 0.04021105915307999, "learning_rate": 0.01, "loss": 2.0172, "step": 26445 }, { "epoch": 2.7184705519580636, "grad_norm": 0.04241623356938362, "learning_rate": 0.01, "loss": 2.0192, "step": 26448 }, { "epoch": 2.7187789084181313, "grad_norm": 0.04488721489906311, "learning_rate": 0.01, "loss": 2.0275, "step": 26451 }, { "epoch": 2.7190872648781994, "grad_norm": 0.09730737656354904, "learning_rate": 0.01, "loss": 2.0064, "step": 26454 }, { "epoch": 2.719395621338267, "grad_norm": 0.05743822455406189, "learning_rate": 0.01, "loss": 2.0294, "step": 26457 }, { "epoch": 2.7197039777983347, "grad_norm": 0.042568083852529526, "learning_rate": 0.01, "loss": 2.0172, "step": 26460 }, { "epoch": 2.720012334258403, "grad_norm": 0.050452932715415955, "learning_rate": 0.01, "loss": 2.0354, "step": 26463 }, { "epoch": 2.7203206907184705, "grad_norm": 0.04751294478774071, "learning_rate": 0.01, "loss": 2.0242, "step": 26466 }, { "epoch": 2.720629047178538, "grad_norm": 0.12711678445339203, "learning_rate": 0.01, "loss": 2.0163, "step": 26469 }, { "epoch": 2.7209374036386063, "grad_norm": 0.043656837195158005, "learning_rate": 0.01, "loss": 2.0234, "step": 26472 }, { "epoch": 2.721245760098674, "grad_norm": 0.061641763895750046, "learning_rate": 0.01, "loss": 2.0436, "step": 26475 }, { "epoch": 2.721554116558742, "grad_norm": 0.04843028262257576, "learning_rate": 0.01, "loss": 2.0422, "step": 26478 }, { "epoch": 2.7218624730188097, "grad_norm": 0.05009736493229866, "learning_rate": 0.01, "loss": 2.025, "step": 26481 }, { "epoch": 2.722170829478878, "grad_norm": 0.0330355204641819, "learning_rate": 0.01, "loss": 2.038, "step": 26484 }, { "epoch": 2.7224791859389454, "grad_norm": 0.05958884581923485, "learning_rate": 0.01, "loss": 1.9874, "step": 26487 }, { "epoch": 2.722787542399013, "grad_norm": 0.10207222402095795, "learning_rate": 0.01, "loss": 2.0332, "step": 26490 }, { "epoch": 2.723095898859081, "grad_norm": 0.06909897923469543, "learning_rate": 0.01, "loss": 2.0262, "step": 26493 }, { "epoch": 2.723404255319149, "grad_norm": 0.09485501050949097, "learning_rate": 0.01, "loss": 2.0064, "step": 26496 }, { "epoch": 2.7237126117792165, "grad_norm": 0.0501030869781971, "learning_rate": 0.01, "loss": 2.03, "step": 26499 }, { "epoch": 2.7240209682392846, "grad_norm": 0.07009368389844894, "learning_rate": 0.01, "loss": 2.0152, "step": 26502 }, { "epoch": 2.7243293246993523, "grad_norm": 0.0766390711069107, "learning_rate": 0.01, "loss": 2.0315, "step": 26505 }, { "epoch": 2.7246376811594204, "grad_norm": 0.050900235772132874, "learning_rate": 0.01, "loss": 1.9763, "step": 26508 }, { "epoch": 2.724946037619488, "grad_norm": 0.043463658541440964, "learning_rate": 0.01, "loss": 1.9971, "step": 26511 }, { "epoch": 2.725254394079556, "grad_norm": 0.07331634312868118, "learning_rate": 0.01, "loss": 2.0331, "step": 26514 }, { "epoch": 2.725562750539624, "grad_norm": 0.06863999366760254, "learning_rate": 0.01, "loss": 2.0025, "step": 26517 }, { "epoch": 2.7258711069996915, "grad_norm": 0.12374615669250488, "learning_rate": 0.01, "loss": 2.019, "step": 26520 }, { "epoch": 2.7261794634597596, "grad_norm": 0.05284014344215393, "learning_rate": 0.01, "loss": 2.0116, "step": 26523 }, { "epoch": 2.7264878199198272, "grad_norm": 0.07776612043380737, "learning_rate": 0.01, "loss": 2.0139, "step": 26526 }, { "epoch": 2.726796176379895, "grad_norm": 0.0325651615858078, "learning_rate": 0.01, "loss": 2.0118, "step": 26529 }, { "epoch": 2.727104532839963, "grad_norm": 0.03690047189593315, "learning_rate": 0.01, "loss": 2.029, "step": 26532 }, { "epoch": 2.7274128893000307, "grad_norm": 0.12211088836193085, "learning_rate": 0.01, "loss": 2.0267, "step": 26535 }, { "epoch": 2.727721245760099, "grad_norm": 0.05561113730072975, "learning_rate": 0.01, "loss": 2.0318, "step": 26538 }, { "epoch": 2.7280296022201664, "grad_norm": 0.05541878193616867, "learning_rate": 0.01, "loss": 2.0095, "step": 26541 }, { "epoch": 2.7283379586802345, "grad_norm": 0.037987880408763885, "learning_rate": 0.01, "loss": 2.0295, "step": 26544 }, { "epoch": 2.728646315140302, "grad_norm": 0.0748986229300499, "learning_rate": 0.01, "loss": 2.039, "step": 26547 }, { "epoch": 2.72895467160037, "grad_norm": 0.05377615988254547, "learning_rate": 0.01, "loss": 2.0223, "step": 26550 }, { "epoch": 2.729263028060438, "grad_norm": 0.11756724119186401, "learning_rate": 0.01, "loss": 2.0119, "step": 26553 }, { "epoch": 2.7295713845205056, "grad_norm": 0.046288661658763885, "learning_rate": 0.01, "loss": 2.0479, "step": 26556 }, { "epoch": 2.7298797409805733, "grad_norm": 0.041496362537145615, "learning_rate": 0.01, "loss": 2.0247, "step": 26559 }, { "epoch": 2.7301880974406414, "grad_norm": 0.036013491451740265, "learning_rate": 0.01, "loss": 2.0143, "step": 26562 }, { "epoch": 2.7304964539007095, "grad_norm": 0.044654857367277145, "learning_rate": 0.01, "loss": 2.0032, "step": 26565 }, { "epoch": 2.730804810360777, "grad_norm": 0.10827535390853882, "learning_rate": 0.01, "loss": 2.037, "step": 26568 }, { "epoch": 2.731113166820845, "grad_norm": 0.053766943514347076, "learning_rate": 0.01, "loss": 2.0142, "step": 26571 }, { "epoch": 2.731421523280913, "grad_norm": 0.10854397714138031, "learning_rate": 0.01, "loss": 2.0367, "step": 26574 }, { "epoch": 2.7317298797409806, "grad_norm": 0.10521306842565536, "learning_rate": 0.01, "loss": 2.0242, "step": 26577 }, { "epoch": 2.7320382362010482, "grad_norm": 0.06532178819179535, "learning_rate": 0.01, "loss": 2.0305, "step": 26580 }, { "epoch": 2.7323465926611163, "grad_norm": 0.06266357749700546, "learning_rate": 0.01, "loss": 2.0257, "step": 26583 }, { "epoch": 2.732654949121184, "grad_norm": 0.04027451574802399, "learning_rate": 0.01, "loss": 2.0301, "step": 26586 }, { "epoch": 2.7329633055812517, "grad_norm": 0.06135137006640434, "learning_rate": 0.01, "loss": 2.0116, "step": 26589 }, { "epoch": 2.7332716620413198, "grad_norm": 0.08157611638307571, "learning_rate": 0.01, "loss": 2.041, "step": 26592 }, { "epoch": 2.733580018501388, "grad_norm": 0.060507632791996, "learning_rate": 0.01, "loss": 2.0404, "step": 26595 }, { "epoch": 2.7338883749614555, "grad_norm": 0.07385814189910889, "learning_rate": 0.01, "loss": 2.0496, "step": 26598 }, { "epoch": 2.734196731421523, "grad_norm": 0.12423606961965561, "learning_rate": 0.01, "loss": 2.0281, "step": 26601 }, { "epoch": 2.7345050878815913, "grad_norm": 0.09299054741859436, "learning_rate": 0.01, "loss": 1.9909, "step": 26604 }, { "epoch": 2.734813444341659, "grad_norm": 0.07102067768573761, "learning_rate": 0.01, "loss": 2.0175, "step": 26607 }, { "epoch": 2.7351218008017266, "grad_norm": 0.036449629813432693, "learning_rate": 0.01, "loss": 2.0253, "step": 26610 }, { "epoch": 2.7354301572617947, "grad_norm": 0.11397985368967056, "learning_rate": 0.01, "loss": 2.0372, "step": 26613 }, { "epoch": 2.7357385137218624, "grad_norm": 0.0781063437461853, "learning_rate": 0.01, "loss": 2.0191, "step": 26616 }, { "epoch": 2.73604687018193, "grad_norm": 0.05996118485927582, "learning_rate": 0.01, "loss": 2.0221, "step": 26619 }, { "epoch": 2.736355226641998, "grad_norm": 0.05300947278738022, "learning_rate": 0.01, "loss": 2.032, "step": 26622 }, { "epoch": 2.7366635831020663, "grad_norm": 0.048238106071949005, "learning_rate": 0.01, "loss": 2.0335, "step": 26625 }, { "epoch": 2.736971939562134, "grad_norm": 0.09588511288166046, "learning_rate": 0.01, "loss": 2.0418, "step": 26628 }, { "epoch": 2.7372802960222016, "grad_norm": 0.04981528967618942, "learning_rate": 0.01, "loss": 2.0125, "step": 26631 }, { "epoch": 2.7375886524822697, "grad_norm": 0.06809774041175842, "learning_rate": 0.01, "loss": 2.0033, "step": 26634 }, { "epoch": 2.7378970089423373, "grad_norm": 0.08924854546785355, "learning_rate": 0.01, "loss": 2.0026, "step": 26637 }, { "epoch": 2.738205365402405, "grad_norm": 0.06717575341463089, "learning_rate": 0.01, "loss": 2.039, "step": 26640 }, { "epoch": 2.738513721862473, "grad_norm": 0.08358625322580338, "learning_rate": 0.01, "loss": 2.0165, "step": 26643 }, { "epoch": 2.7388220783225408, "grad_norm": 0.14468573033809662, "learning_rate": 0.01, "loss": 2.014, "step": 26646 }, { "epoch": 2.7391304347826084, "grad_norm": 0.0533704049885273, "learning_rate": 0.01, "loss": 2.0268, "step": 26649 }, { "epoch": 2.7394387912426765, "grad_norm": 0.052842382341623306, "learning_rate": 0.01, "loss": 2.0334, "step": 26652 }, { "epoch": 2.7397471477027446, "grad_norm": 0.04230334609746933, "learning_rate": 0.01, "loss": 2.0077, "step": 26655 }, { "epoch": 2.7400555041628123, "grad_norm": 0.1004776731133461, "learning_rate": 0.01, "loss": 2.0254, "step": 26658 }, { "epoch": 2.74036386062288, "grad_norm": 0.03724560886621475, "learning_rate": 0.01, "loss": 2.0188, "step": 26661 }, { "epoch": 2.740672217082948, "grad_norm": 0.04997468739748001, "learning_rate": 0.01, "loss": 2.0229, "step": 26664 }, { "epoch": 2.7409805735430157, "grad_norm": 0.08138549327850342, "learning_rate": 0.01, "loss": 2.0138, "step": 26667 }, { "epoch": 2.7412889300030834, "grad_norm": 0.06195824220776558, "learning_rate": 0.01, "loss": 2.0282, "step": 26670 }, { "epoch": 2.7415972864631515, "grad_norm": 0.07340802997350693, "learning_rate": 0.01, "loss": 2.0226, "step": 26673 }, { "epoch": 2.741905642923219, "grad_norm": 0.09338308125734329, "learning_rate": 0.01, "loss": 2.0374, "step": 26676 }, { "epoch": 2.742213999383287, "grad_norm": 0.03742242231965065, "learning_rate": 0.01, "loss": 2.0313, "step": 26679 }, { "epoch": 2.742522355843355, "grad_norm": 0.04461616277694702, "learning_rate": 0.01, "loss": 2.0463, "step": 26682 }, { "epoch": 2.742830712303423, "grad_norm": 0.062195319682359695, "learning_rate": 0.01, "loss": 1.9822, "step": 26685 }, { "epoch": 2.7431390687634907, "grad_norm": 0.12009324878454208, "learning_rate": 0.01, "loss": 2.003, "step": 26688 }, { "epoch": 2.7434474252235583, "grad_norm": 0.04727381840348244, "learning_rate": 0.01, "loss": 2.0264, "step": 26691 }, { "epoch": 2.7437557816836264, "grad_norm": 0.05080636590719223, "learning_rate": 0.01, "loss": 2.033, "step": 26694 }, { "epoch": 2.744064138143694, "grad_norm": 0.11046820133924484, "learning_rate": 0.01, "loss": 2.0399, "step": 26697 }, { "epoch": 2.7443724946037618, "grad_norm": 0.06709396839141846, "learning_rate": 0.01, "loss": 2.0171, "step": 26700 }, { "epoch": 2.74468085106383, "grad_norm": 0.05088644102215767, "learning_rate": 0.01, "loss": 2.0221, "step": 26703 }, { "epoch": 2.7449892075238975, "grad_norm": 0.06446198374032974, "learning_rate": 0.01, "loss": 2.0061, "step": 26706 }, { "epoch": 2.7452975639839656, "grad_norm": 0.07209669798612595, "learning_rate": 0.01, "loss": 2.0058, "step": 26709 }, { "epoch": 2.7456059204440333, "grad_norm": 0.0958387553691864, "learning_rate": 0.01, "loss": 2.0106, "step": 26712 }, { "epoch": 2.7459142769041014, "grad_norm": 0.04666278511285782, "learning_rate": 0.01, "loss": 2.0223, "step": 26715 }, { "epoch": 2.746222633364169, "grad_norm": 0.0713481456041336, "learning_rate": 0.01, "loss": 2.0357, "step": 26718 }, { "epoch": 2.7465309898242367, "grad_norm": 0.060584791004657745, "learning_rate": 0.01, "loss": 1.999, "step": 26721 }, { "epoch": 2.746839346284305, "grad_norm": 0.08711333572864532, "learning_rate": 0.01, "loss": 2.0386, "step": 26724 }, { "epoch": 2.7471477027443725, "grad_norm": 0.04297727718949318, "learning_rate": 0.01, "loss": 2.0163, "step": 26727 }, { "epoch": 2.74745605920444, "grad_norm": 0.06777355074882507, "learning_rate": 0.01, "loss": 2.0318, "step": 26730 }, { "epoch": 2.7477644156645082, "grad_norm": 0.05725346878170967, "learning_rate": 0.01, "loss": 2.0395, "step": 26733 }, { "epoch": 2.748072772124576, "grad_norm": 0.0601140521466732, "learning_rate": 0.01, "loss": 2.0176, "step": 26736 }, { "epoch": 2.748381128584644, "grad_norm": 0.04107888415455818, "learning_rate": 0.01, "loss": 1.9972, "step": 26739 }, { "epoch": 2.7486894850447117, "grad_norm": 0.03484128415584564, "learning_rate": 0.01, "loss": 2.0018, "step": 26742 }, { "epoch": 2.7489978415047798, "grad_norm": 0.05098375305533409, "learning_rate": 0.01, "loss": 1.9977, "step": 26745 }, { "epoch": 2.7493061979648474, "grad_norm": 0.1751684993505478, "learning_rate": 0.01, "loss": 2.0183, "step": 26748 }, { "epoch": 2.749614554424915, "grad_norm": 0.09155084937810898, "learning_rate": 0.01, "loss": 2.0239, "step": 26751 }, { "epoch": 2.749922910884983, "grad_norm": 0.05078737810254097, "learning_rate": 0.01, "loss": 2.0396, "step": 26754 }, { "epoch": 2.750231267345051, "grad_norm": 0.03406425565481186, "learning_rate": 0.01, "loss": 2.0378, "step": 26757 }, { "epoch": 2.7505396238051185, "grad_norm": 0.07126470655202866, "learning_rate": 0.01, "loss": 2.0391, "step": 26760 }, { "epoch": 2.7508479802651866, "grad_norm": 0.05165224149823189, "learning_rate": 0.01, "loss": 2.0454, "step": 26763 }, { "epoch": 2.7511563367252543, "grad_norm": 0.04024217650294304, "learning_rate": 0.01, "loss": 1.9988, "step": 26766 }, { "epoch": 2.7514646931853224, "grad_norm": 0.05387435853481293, "learning_rate": 0.01, "loss": 2.0263, "step": 26769 }, { "epoch": 2.75177304964539, "grad_norm": 0.11387746036052704, "learning_rate": 0.01, "loss": 2.0191, "step": 26772 }, { "epoch": 2.752081406105458, "grad_norm": 0.06504768133163452, "learning_rate": 0.01, "loss": 2.0359, "step": 26775 }, { "epoch": 2.752389762565526, "grad_norm": 0.07217580825090408, "learning_rate": 0.01, "loss": 1.9967, "step": 26778 }, { "epoch": 2.7526981190255935, "grad_norm": 0.07751470804214478, "learning_rate": 0.01, "loss": 2.0171, "step": 26781 }, { "epoch": 2.7530064754856616, "grad_norm": 0.06282947957515717, "learning_rate": 0.01, "loss": 2.0022, "step": 26784 }, { "epoch": 2.7533148319457292, "grad_norm": 0.04301406815648079, "learning_rate": 0.01, "loss": 2.0062, "step": 26787 }, { "epoch": 2.753623188405797, "grad_norm": 0.06154756247997284, "learning_rate": 0.01, "loss": 2.0212, "step": 26790 }, { "epoch": 2.753931544865865, "grad_norm": 0.04729504510760307, "learning_rate": 0.01, "loss": 2.0192, "step": 26793 }, { "epoch": 2.7542399013259327, "grad_norm": 0.06597118079662323, "learning_rate": 0.01, "loss": 2.0081, "step": 26796 }, { "epoch": 2.7545482577860008, "grad_norm": 0.050065554678440094, "learning_rate": 0.01, "loss": 2.0004, "step": 26799 }, { "epoch": 2.7548566142460684, "grad_norm": 0.048531875014305115, "learning_rate": 0.01, "loss": 2.0158, "step": 26802 }, { "epoch": 2.7551649707061365, "grad_norm": 0.04954374581575394, "learning_rate": 0.01, "loss": 2.007, "step": 26805 }, { "epoch": 2.755473327166204, "grad_norm": 0.06998781114816666, "learning_rate": 0.01, "loss": 2.0537, "step": 26808 }, { "epoch": 2.755781683626272, "grad_norm": 0.08933461457490921, "learning_rate": 0.01, "loss": 2.0058, "step": 26811 }, { "epoch": 2.75609004008634, "grad_norm": 0.06067082658410072, "learning_rate": 0.01, "loss": 2.0318, "step": 26814 }, { "epoch": 2.7563983965464076, "grad_norm": 0.07710520923137665, "learning_rate": 0.01, "loss": 2.0029, "step": 26817 }, { "epoch": 2.7567067530064753, "grad_norm": 0.05703970044851303, "learning_rate": 0.01, "loss": 2.0505, "step": 26820 }, { "epoch": 2.7570151094665434, "grad_norm": 0.1251015067100525, "learning_rate": 0.01, "loss": 2.0264, "step": 26823 }, { "epoch": 2.757323465926611, "grad_norm": 0.05942771956324577, "learning_rate": 0.01, "loss": 2.0368, "step": 26826 }, { "epoch": 2.757631822386679, "grad_norm": 0.03195277228951454, "learning_rate": 0.01, "loss": 1.9868, "step": 26829 }, { "epoch": 2.757940178846747, "grad_norm": 0.03609882667660713, "learning_rate": 0.01, "loss": 2.0208, "step": 26832 }, { "epoch": 2.758248535306815, "grad_norm": 0.04004659876227379, "learning_rate": 0.01, "loss": 1.9996, "step": 26835 }, { "epoch": 2.7585568917668826, "grad_norm": 0.06475567817687988, "learning_rate": 0.01, "loss": 2.0187, "step": 26838 }, { "epoch": 2.7588652482269502, "grad_norm": 0.04894067347049713, "learning_rate": 0.01, "loss": 2.0342, "step": 26841 }, { "epoch": 2.7591736046870183, "grad_norm": 0.11116486042737961, "learning_rate": 0.01, "loss": 2.0111, "step": 26844 }, { "epoch": 2.759481961147086, "grad_norm": 0.0832393690943718, "learning_rate": 0.01, "loss": 1.9986, "step": 26847 }, { "epoch": 2.7597903176071537, "grad_norm": 0.06433276832103729, "learning_rate": 0.01, "loss": 2.0233, "step": 26850 }, { "epoch": 2.7600986740672218, "grad_norm": 0.06255584210157394, "learning_rate": 0.01, "loss": 2.0097, "step": 26853 }, { "epoch": 2.7604070305272894, "grad_norm": 0.06610293686389923, "learning_rate": 0.01, "loss": 2.0223, "step": 26856 }, { "epoch": 2.7607153869873575, "grad_norm": 0.048567045480012894, "learning_rate": 0.01, "loss": 1.9865, "step": 26859 }, { "epoch": 2.761023743447425, "grad_norm": 0.06945166736841202, "learning_rate": 0.01, "loss": 2.0189, "step": 26862 }, { "epoch": 2.7613320999074933, "grad_norm": 0.07567547261714935, "learning_rate": 0.01, "loss": 2.0356, "step": 26865 }, { "epoch": 2.761640456367561, "grad_norm": 0.09087162464857101, "learning_rate": 0.01, "loss": 2.0395, "step": 26868 }, { "epoch": 2.7619488128276286, "grad_norm": 0.07904476672410965, "learning_rate": 0.01, "loss": 2.0028, "step": 26871 }, { "epoch": 2.7622571692876967, "grad_norm": 0.08835271000862122, "learning_rate": 0.01, "loss": 2.0364, "step": 26874 }, { "epoch": 2.7625655257477644, "grad_norm": 0.042680736631155014, "learning_rate": 0.01, "loss": 2.0206, "step": 26877 }, { "epoch": 2.762873882207832, "grad_norm": 0.034312695264816284, "learning_rate": 0.01, "loss": 2.0124, "step": 26880 }, { "epoch": 2.7631822386679, "grad_norm": 0.04073645919561386, "learning_rate": 0.01, "loss": 2.0207, "step": 26883 }, { "epoch": 2.763490595127968, "grad_norm": 0.11242496222257614, "learning_rate": 0.01, "loss": 2.0234, "step": 26886 }, { "epoch": 2.763798951588036, "grad_norm": 0.048365022987127304, "learning_rate": 0.01, "loss": 2.0223, "step": 26889 }, { "epoch": 2.7641073080481036, "grad_norm": 0.06508596241474152, "learning_rate": 0.01, "loss": 2.0067, "step": 26892 }, { "epoch": 2.7644156645081717, "grad_norm": 0.08372494578361511, "learning_rate": 0.01, "loss": 2.024, "step": 26895 }, { "epoch": 2.7647240209682393, "grad_norm": 0.08499015122652054, "learning_rate": 0.01, "loss": 2.0337, "step": 26898 }, { "epoch": 2.765032377428307, "grad_norm": 0.08214948326349258, "learning_rate": 0.01, "loss": 2.0235, "step": 26901 }, { "epoch": 2.765340733888375, "grad_norm": 0.0902111828327179, "learning_rate": 0.01, "loss": 2.0319, "step": 26904 }, { "epoch": 2.7656490903484428, "grad_norm": 0.0556274875998497, "learning_rate": 0.01, "loss": 2.0096, "step": 26907 }, { "epoch": 2.7659574468085104, "grad_norm": 0.06845957040786743, "learning_rate": 0.01, "loss": 1.9983, "step": 26910 }, { "epoch": 2.7662658032685785, "grad_norm": 0.051283106207847595, "learning_rate": 0.01, "loss": 2.009, "step": 26913 }, { "epoch": 2.766574159728646, "grad_norm": 0.03301118686795235, "learning_rate": 0.01, "loss": 1.9983, "step": 26916 }, { "epoch": 2.7668825161887143, "grad_norm": 0.05253078415989876, "learning_rate": 0.01, "loss": 2.0274, "step": 26919 }, { "epoch": 2.767190872648782, "grad_norm": 0.1089513972401619, "learning_rate": 0.01, "loss": 2.044, "step": 26922 }, { "epoch": 2.76749922910885, "grad_norm": 0.06042607128620148, "learning_rate": 0.01, "loss": 1.9767, "step": 26925 }, { "epoch": 2.7678075855689177, "grad_norm": 0.09722250699996948, "learning_rate": 0.01, "loss": 2.0136, "step": 26928 }, { "epoch": 2.7681159420289854, "grad_norm": 0.04542861133813858, "learning_rate": 0.01, "loss": 2.0, "step": 26931 }, { "epoch": 2.7684242984890535, "grad_norm": 0.14705310761928558, "learning_rate": 0.01, "loss": 2.0074, "step": 26934 }, { "epoch": 2.768732654949121, "grad_norm": 0.03334478661417961, "learning_rate": 0.01, "loss": 2.0104, "step": 26937 }, { "epoch": 2.769041011409189, "grad_norm": 0.054583244025707245, "learning_rate": 0.01, "loss": 2.0111, "step": 26940 }, { "epoch": 2.769349367869257, "grad_norm": 0.03972140699625015, "learning_rate": 0.01, "loss": 2.0327, "step": 26943 }, { "epoch": 2.7696577243293246, "grad_norm": 0.04227004200220108, "learning_rate": 0.01, "loss": 2.052, "step": 26946 }, { "epoch": 2.7699660807893927, "grad_norm": 0.035285089164972305, "learning_rate": 0.01, "loss": 2.0224, "step": 26949 }, { "epoch": 2.7702744372494603, "grad_norm": 0.08037975430488586, "learning_rate": 0.01, "loss": 2.0201, "step": 26952 }, { "epoch": 2.7705827937095284, "grad_norm": 0.06296487152576447, "learning_rate": 0.01, "loss": 2.0222, "step": 26955 }, { "epoch": 2.770891150169596, "grad_norm": 0.08724946528673172, "learning_rate": 0.01, "loss": 2.0515, "step": 26958 }, { "epoch": 2.7711995066296637, "grad_norm": 0.06455433368682861, "learning_rate": 0.01, "loss": 2.0573, "step": 26961 }, { "epoch": 2.771507863089732, "grad_norm": 0.07615622133016586, "learning_rate": 0.01, "loss": 2.0177, "step": 26964 }, { "epoch": 2.7718162195497995, "grad_norm": 0.06695935130119324, "learning_rate": 0.01, "loss": 2.031, "step": 26967 }, { "epoch": 2.772124576009867, "grad_norm": 0.04257410392165184, "learning_rate": 0.01, "loss": 2.0186, "step": 26970 }, { "epoch": 2.7724329324699353, "grad_norm": 0.1329718828201294, "learning_rate": 0.01, "loss": 2.014, "step": 26973 }, { "epoch": 2.772741288930003, "grad_norm": 0.06937066465616226, "learning_rate": 0.01, "loss": 2.0185, "step": 26976 }, { "epoch": 2.773049645390071, "grad_norm": 0.07017137110233307, "learning_rate": 0.01, "loss": 2.0066, "step": 26979 }, { "epoch": 2.7733580018501387, "grad_norm": 0.03589661046862602, "learning_rate": 0.01, "loss": 2.0338, "step": 26982 }, { "epoch": 2.773666358310207, "grad_norm": 0.04110831022262573, "learning_rate": 0.01, "loss": 2.0054, "step": 26985 }, { "epoch": 2.7739747147702745, "grad_norm": 0.06182010471820831, "learning_rate": 0.01, "loss": 2.029, "step": 26988 }, { "epoch": 2.774283071230342, "grad_norm": 0.08161917328834534, "learning_rate": 0.01, "loss": 2.0191, "step": 26991 }, { "epoch": 2.7745914276904102, "grad_norm": 0.05832149460911751, "learning_rate": 0.01, "loss": 2.0279, "step": 26994 }, { "epoch": 2.774899784150478, "grad_norm": 0.08298707008361816, "learning_rate": 0.01, "loss": 2.0351, "step": 26997 }, { "epoch": 2.7752081406105455, "grad_norm": 0.04794152081012726, "learning_rate": 0.01, "loss": 2.0158, "step": 27000 }, { "epoch": 2.7755164970706137, "grad_norm": 0.04620853811502457, "learning_rate": 0.01, "loss": 2.029, "step": 27003 }, { "epoch": 2.7758248535306813, "grad_norm": 0.045553676784038544, "learning_rate": 0.01, "loss": 1.9998, "step": 27006 }, { "epoch": 2.7761332099907494, "grad_norm": 0.058516908437013626, "learning_rate": 0.01, "loss": 2.0324, "step": 27009 }, { "epoch": 2.776441566450817, "grad_norm": 0.05661854147911072, "learning_rate": 0.01, "loss": 2.0331, "step": 27012 }, { "epoch": 2.776749922910885, "grad_norm": 0.05626550316810608, "learning_rate": 0.01, "loss": 2.016, "step": 27015 }, { "epoch": 2.777058279370953, "grad_norm": 0.08505663275718689, "learning_rate": 0.01, "loss": 2.014, "step": 27018 }, { "epoch": 2.7773666358310205, "grad_norm": 0.08449403196573257, "learning_rate": 0.01, "loss": 2.0141, "step": 27021 }, { "epoch": 2.7776749922910886, "grad_norm": 0.08090987801551819, "learning_rate": 0.01, "loss": 2.0313, "step": 27024 }, { "epoch": 2.7779833487511563, "grad_norm": 0.03180038183927536, "learning_rate": 0.01, "loss": 2.0105, "step": 27027 }, { "epoch": 2.778291705211224, "grad_norm": 0.03052234649658203, "learning_rate": 0.01, "loss": 2.0144, "step": 27030 }, { "epoch": 2.778600061671292, "grad_norm": 0.07030376046895981, "learning_rate": 0.01, "loss": 2.0101, "step": 27033 }, { "epoch": 2.7789084181313597, "grad_norm": 0.1097898781299591, "learning_rate": 0.01, "loss": 2.016, "step": 27036 }, { "epoch": 2.779216774591428, "grad_norm": 0.10938852280378342, "learning_rate": 0.01, "loss": 2.0358, "step": 27039 }, { "epoch": 2.7795251310514955, "grad_norm": 0.17036540806293488, "learning_rate": 0.01, "loss": 2.0242, "step": 27042 }, { "epoch": 2.7798334875115636, "grad_norm": 0.07512038946151733, "learning_rate": 0.01, "loss": 2.0142, "step": 27045 }, { "epoch": 2.780141843971631, "grad_norm": 0.03864987567067146, "learning_rate": 0.01, "loss": 1.993, "step": 27048 }, { "epoch": 2.780450200431699, "grad_norm": 0.07508687674999237, "learning_rate": 0.01, "loss": 2.0404, "step": 27051 }, { "epoch": 2.780758556891767, "grad_norm": 0.03866630047559738, "learning_rate": 0.01, "loss": 2.0285, "step": 27054 }, { "epoch": 2.7810669133518346, "grad_norm": 0.044196490198373795, "learning_rate": 0.01, "loss": 2.0015, "step": 27057 }, { "epoch": 2.7813752698119023, "grad_norm": 0.03751998767256737, "learning_rate": 0.01, "loss": 2.0142, "step": 27060 }, { "epoch": 2.7816836262719704, "grad_norm": 0.04259081184864044, "learning_rate": 0.01, "loss": 2.0344, "step": 27063 }, { "epoch": 2.781991982732038, "grad_norm": 0.04910355806350708, "learning_rate": 0.01, "loss": 2.0345, "step": 27066 }, { "epoch": 2.782300339192106, "grad_norm": 0.07969753444194794, "learning_rate": 0.01, "loss": 2.0147, "step": 27069 }, { "epoch": 2.782608695652174, "grad_norm": 0.04511536657810211, "learning_rate": 0.01, "loss": 2.0265, "step": 27072 }, { "epoch": 2.782917052112242, "grad_norm": 0.03906785696744919, "learning_rate": 0.01, "loss": 2.0169, "step": 27075 }, { "epoch": 2.7832254085723096, "grad_norm": 0.045391857624053955, "learning_rate": 0.01, "loss": 2.0256, "step": 27078 }, { "epoch": 2.7835337650323773, "grad_norm": 0.04607332870364189, "learning_rate": 0.01, "loss": 2.0379, "step": 27081 }, { "epoch": 2.7838421214924454, "grad_norm": 0.10543697327375412, "learning_rate": 0.01, "loss": 2.0022, "step": 27084 }, { "epoch": 2.784150477952513, "grad_norm": 0.045777421444654465, "learning_rate": 0.01, "loss": 2.0065, "step": 27087 }, { "epoch": 2.7844588344125807, "grad_norm": 0.11416932195425034, "learning_rate": 0.01, "loss": 2.0156, "step": 27090 }, { "epoch": 2.784767190872649, "grad_norm": 0.04630710557103157, "learning_rate": 0.01, "loss": 2.0109, "step": 27093 }, { "epoch": 2.785075547332717, "grad_norm": 0.1050376445055008, "learning_rate": 0.01, "loss": 2.0239, "step": 27096 }, { "epoch": 2.7853839037927846, "grad_norm": 0.04592348635196686, "learning_rate": 0.01, "loss": 2.0032, "step": 27099 }, { "epoch": 2.785692260252852, "grad_norm": 0.058079514652490616, "learning_rate": 0.01, "loss": 2.0067, "step": 27102 }, { "epoch": 2.7860006167129203, "grad_norm": 0.12133990228176117, "learning_rate": 0.01, "loss": 2.0296, "step": 27105 }, { "epoch": 2.786308973172988, "grad_norm": 0.09523560851812363, "learning_rate": 0.01, "loss": 2.0022, "step": 27108 }, { "epoch": 2.7866173296330556, "grad_norm": 0.05585847795009613, "learning_rate": 0.01, "loss": 2.0458, "step": 27111 }, { "epoch": 2.7869256860931237, "grad_norm": 0.03669681027531624, "learning_rate": 0.01, "loss": 2.0072, "step": 27114 }, { "epoch": 2.7872340425531914, "grad_norm": 0.05370241776108742, "learning_rate": 0.01, "loss": 2.0034, "step": 27117 }, { "epoch": 2.787542399013259, "grad_norm": 0.04395952448248863, "learning_rate": 0.01, "loss": 1.992, "step": 27120 }, { "epoch": 2.787850755473327, "grad_norm": 0.04012378677725792, "learning_rate": 0.01, "loss": 2.0269, "step": 27123 }, { "epoch": 2.7881591119333953, "grad_norm": 0.08143822848796844, "learning_rate": 0.01, "loss": 2.0385, "step": 27126 }, { "epoch": 2.788467468393463, "grad_norm": 0.05250631272792816, "learning_rate": 0.01, "loss": 2.0132, "step": 27129 }, { "epoch": 2.7887758248535306, "grad_norm": 0.09945302456617355, "learning_rate": 0.01, "loss": 2.0135, "step": 27132 }, { "epoch": 2.7890841813135987, "grad_norm": 0.06001584604382515, "learning_rate": 0.01, "loss": 1.9975, "step": 27135 }, { "epoch": 2.7893925377736664, "grad_norm": 0.05244366079568863, "learning_rate": 0.01, "loss": 2.0247, "step": 27138 }, { "epoch": 2.789700894233734, "grad_norm": 0.06054399535059929, "learning_rate": 0.01, "loss": 1.9925, "step": 27141 }, { "epoch": 2.790009250693802, "grad_norm": 0.10409083217382431, "learning_rate": 0.01, "loss": 2.005, "step": 27144 }, { "epoch": 2.79031760715387, "grad_norm": 0.06229739263653755, "learning_rate": 0.01, "loss": 2.0175, "step": 27147 }, { "epoch": 2.7906259636139374, "grad_norm": 0.06340862810611725, "learning_rate": 0.01, "loss": 2.0205, "step": 27150 }, { "epoch": 2.7909343200740055, "grad_norm": 0.035666175186634064, "learning_rate": 0.01, "loss": 2.0039, "step": 27153 }, { "epoch": 2.7912426765340737, "grad_norm": 0.043176453560590744, "learning_rate": 0.01, "loss": 2.0045, "step": 27156 }, { "epoch": 2.7915510329941413, "grad_norm": 0.042664237320423126, "learning_rate": 0.01, "loss": 2.0133, "step": 27159 }, { "epoch": 2.791859389454209, "grad_norm": 0.08263540267944336, "learning_rate": 0.01, "loss": 2.0327, "step": 27162 }, { "epoch": 2.792167745914277, "grad_norm": 0.05592063441872597, "learning_rate": 0.01, "loss": 2.012, "step": 27165 }, { "epoch": 2.7924761023743447, "grad_norm": 0.0740666538476944, "learning_rate": 0.01, "loss": 2.0348, "step": 27168 }, { "epoch": 2.7927844588344124, "grad_norm": 0.07478248327970505, "learning_rate": 0.01, "loss": 2.0143, "step": 27171 }, { "epoch": 2.7930928152944805, "grad_norm": 0.08470360189676285, "learning_rate": 0.01, "loss": 2.0075, "step": 27174 }, { "epoch": 2.793401171754548, "grad_norm": 0.04486660659313202, "learning_rate": 0.01, "loss": 1.9898, "step": 27177 }, { "epoch": 2.793709528214616, "grad_norm": 0.12479998171329498, "learning_rate": 0.01, "loss": 2.005, "step": 27180 }, { "epoch": 2.794017884674684, "grad_norm": 0.05652941018342972, "learning_rate": 0.01, "loss": 2.0248, "step": 27183 }, { "epoch": 2.794326241134752, "grad_norm": 0.054577797651290894, "learning_rate": 0.01, "loss": 2.0249, "step": 27186 }, { "epoch": 2.7946345975948197, "grad_norm": 0.03588878735899925, "learning_rate": 0.01, "loss": 2.0321, "step": 27189 }, { "epoch": 2.7949429540548874, "grad_norm": 0.0890214741230011, "learning_rate": 0.01, "loss": 2.0066, "step": 27192 }, { "epoch": 2.7952513105149555, "grad_norm": 0.0901033952832222, "learning_rate": 0.01, "loss": 2.0203, "step": 27195 }, { "epoch": 2.795559666975023, "grad_norm": 0.09306314587593079, "learning_rate": 0.01, "loss": 2.0337, "step": 27198 }, { "epoch": 2.7958680234350908, "grad_norm": 0.07897783815860748, "learning_rate": 0.01, "loss": 2.004, "step": 27201 }, { "epoch": 2.796176379895159, "grad_norm": 0.08055876195430756, "learning_rate": 0.01, "loss": 2.0122, "step": 27204 }, { "epoch": 2.7964847363552265, "grad_norm": 0.058927252888679504, "learning_rate": 0.01, "loss": 2.0178, "step": 27207 }, { "epoch": 2.7967930928152946, "grad_norm": 0.1303315907716751, "learning_rate": 0.01, "loss": 2.0186, "step": 27210 }, { "epoch": 2.7971014492753623, "grad_norm": 0.08321187645196915, "learning_rate": 0.01, "loss": 2.0038, "step": 27213 }, { "epoch": 2.7974098057354304, "grad_norm": 0.0715765729546547, "learning_rate": 0.01, "loss": 2.02, "step": 27216 }, { "epoch": 2.797718162195498, "grad_norm": 0.047501109540462494, "learning_rate": 0.01, "loss": 2.0185, "step": 27219 }, { "epoch": 2.7980265186555657, "grad_norm": 0.04297725111246109, "learning_rate": 0.01, "loss": 1.9893, "step": 27222 }, { "epoch": 2.798334875115634, "grad_norm": 0.08612347394227982, "learning_rate": 0.01, "loss": 2.0351, "step": 27225 }, { "epoch": 2.7986432315757015, "grad_norm": 0.06801822036504745, "learning_rate": 0.01, "loss": 2.0012, "step": 27228 }, { "epoch": 2.798951588035769, "grad_norm": 0.103379026055336, "learning_rate": 0.01, "loss": 2.0172, "step": 27231 }, { "epoch": 2.7992599444958373, "grad_norm": 0.040429919958114624, "learning_rate": 0.01, "loss": 2.0289, "step": 27234 }, { "epoch": 2.799568300955905, "grad_norm": 0.09817671030759811, "learning_rate": 0.01, "loss": 2.0055, "step": 27237 }, { "epoch": 2.799876657415973, "grad_norm": 0.06191599741578102, "learning_rate": 0.01, "loss": 2.0032, "step": 27240 }, { "epoch": 2.8001850138760407, "grad_norm": 0.07992551475763321, "learning_rate": 0.01, "loss": 2.0203, "step": 27243 }, { "epoch": 2.800493370336109, "grad_norm": 0.055606722831726074, "learning_rate": 0.01, "loss": 2.0185, "step": 27246 }, { "epoch": 2.8008017267961764, "grad_norm": 0.1322477012872696, "learning_rate": 0.01, "loss": 2.0215, "step": 27249 }, { "epoch": 2.801110083256244, "grad_norm": 0.05603817105293274, "learning_rate": 0.01, "loss": 2.039, "step": 27252 }, { "epoch": 2.801418439716312, "grad_norm": 0.06935392320156097, "learning_rate": 0.01, "loss": 2.0119, "step": 27255 }, { "epoch": 2.80172679617638, "grad_norm": 0.04515109956264496, "learning_rate": 0.01, "loss": 2.053, "step": 27258 }, { "epoch": 2.8020351526364475, "grad_norm": 0.050864629447460175, "learning_rate": 0.01, "loss": 2.0042, "step": 27261 }, { "epoch": 2.8023435090965156, "grad_norm": 0.038876548409461975, "learning_rate": 0.01, "loss": 1.9945, "step": 27264 }, { "epoch": 2.8026518655565833, "grad_norm": 0.03906438872218132, "learning_rate": 0.01, "loss": 1.9832, "step": 27267 }, { "epoch": 2.8029602220166514, "grad_norm": 0.08978519588708878, "learning_rate": 0.01, "loss": 2.0292, "step": 27270 }, { "epoch": 2.803268578476719, "grad_norm": 0.07353704422712326, "learning_rate": 0.01, "loss": 2.0117, "step": 27273 }, { "epoch": 2.803576934936787, "grad_norm": 0.0424012616276741, "learning_rate": 0.01, "loss": 1.9931, "step": 27276 }, { "epoch": 2.803885291396855, "grad_norm": 0.05939796194434166, "learning_rate": 0.01, "loss": 2.0089, "step": 27279 }, { "epoch": 2.8041936478569225, "grad_norm": 0.06859377771615982, "learning_rate": 0.01, "loss": 2.005, "step": 27282 }, { "epoch": 2.8045020043169906, "grad_norm": 0.1016155332326889, "learning_rate": 0.01, "loss": 2.0207, "step": 27285 }, { "epoch": 2.8048103607770583, "grad_norm": 0.05608060583472252, "learning_rate": 0.01, "loss": 2.0117, "step": 27288 }, { "epoch": 2.805118717237126, "grad_norm": 0.043284934014081955, "learning_rate": 0.01, "loss": 1.9931, "step": 27291 }, { "epoch": 2.805427073697194, "grad_norm": 0.04967540502548218, "learning_rate": 0.01, "loss": 2.0222, "step": 27294 }, { "epoch": 2.8057354301572617, "grad_norm": 0.040739741176366806, "learning_rate": 0.01, "loss": 2.0148, "step": 27297 }, { "epoch": 2.80604378661733, "grad_norm": 0.039170749485492706, "learning_rate": 0.01, "loss": 2.0277, "step": 27300 }, { "epoch": 2.8063521430773974, "grad_norm": 0.040820952504873276, "learning_rate": 0.01, "loss": 2.0151, "step": 27303 }, { "epoch": 2.8066604995374655, "grad_norm": 0.07061021775007248, "learning_rate": 0.01, "loss": 2.0072, "step": 27306 }, { "epoch": 2.806968855997533, "grad_norm": 0.054698627442121506, "learning_rate": 0.01, "loss": 2.0059, "step": 27309 }, { "epoch": 2.807277212457601, "grad_norm": 0.04737408831715584, "learning_rate": 0.01, "loss": 2.0109, "step": 27312 }, { "epoch": 2.807585568917669, "grad_norm": 0.11695525050163269, "learning_rate": 0.01, "loss": 2.0076, "step": 27315 }, { "epoch": 2.8078939253777366, "grad_norm": 0.0921463891863823, "learning_rate": 0.01, "loss": 2.039, "step": 27318 }, { "epoch": 2.8082022818378043, "grad_norm": 0.03874325007200241, "learning_rate": 0.01, "loss": 2.0212, "step": 27321 }, { "epoch": 2.8085106382978724, "grad_norm": 0.04040201008319855, "learning_rate": 0.01, "loss": 1.9732, "step": 27324 }, { "epoch": 2.80881899475794, "grad_norm": 0.035648833960294724, "learning_rate": 0.01, "loss": 2.0269, "step": 27327 }, { "epoch": 2.809127351218008, "grad_norm": 0.0893108919262886, "learning_rate": 0.01, "loss": 1.9982, "step": 27330 }, { "epoch": 2.809435707678076, "grad_norm": 0.1203017309308052, "learning_rate": 0.01, "loss": 2.0138, "step": 27333 }, { "epoch": 2.809744064138144, "grad_norm": 0.045630257576704025, "learning_rate": 0.01, "loss": 2.0178, "step": 27336 }, { "epoch": 2.8100524205982116, "grad_norm": 0.04097437858581543, "learning_rate": 0.01, "loss": 1.9842, "step": 27339 }, { "epoch": 2.8103607770582792, "grad_norm": 0.05476262420415878, "learning_rate": 0.01, "loss": 2.0238, "step": 27342 }, { "epoch": 2.8106691335183474, "grad_norm": 0.05577860027551651, "learning_rate": 0.01, "loss": 2.0396, "step": 27345 }, { "epoch": 2.810977489978415, "grad_norm": 0.04357774555683136, "learning_rate": 0.01, "loss": 2.0158, "step": 27348 }, { "epoch": 2.8112858464384827, "grad_norm": 0.04594416916370392, "learning_rate": 0.01, "loss": 2.0197, "step": 27351 }, { "epoch": 2.8115942028985508, "grad_norm": 0.041454486548900604, "learning_rate": 0.01, "loss": 2.014, "step": 27354 }, { "epoch": 2.8119025593586184, "grad_norm": 0.05279424414038658, "learning_rate": 0.01, "loss": 2.0261, "step": 27357 }, { "epoch": 2.8122109158186865, "grad_norm": 0.10371188074350357, "learning_rate": 0.01, "loss": 2.0333, "step": 27360 }, { "epoch": 2.812519272278754, "grad_norm": 0.053941383957862854, "learning_rate": 0.01, "loss": 2.0067, "step": 27363 }, { "epoch": 2.8128276287388223, "grad_norm": 0.12082924693822861, "learning_rate": 0.01, "loss": 2.0177, "step": 27366 }, { "epoch": 2.81313598519889, "grad_norm": 0.11066311597824097, "learning_rate": 0.01, "loss": 2.0101, "step": 27369 }, { "epoch": 2.8134443416589576, "grad_norm": 0.07547413557767868, "learning_rate": 0.01, "loss": 2.0079, "step": 27372 }, { "epoch": 2.8137526981190257, "grad_norm": 0.06772000342607498, "learning_rate": 0.01, "loss": 2.0077, "step": 27375 }, { "epoch": 2.8140610545790934, "grad_norm": 0.03906597942113876, "learning_rate": 0.01, "loss": 1.9984, "step": 27378 }, { "epoch": 2.814369411039161, "grad_norm": 0.048235367983579636, "learning_rate": 0.01, "loss": 2.0233, "step": 27381 }, { "epoch": 2.814677767499229, "grad_norm": 0.08060193806886673, "learning_rate": 0.01, "loss": 2.0238, "step": 27384 }, { "epoch": 2.814986123959297, "grad_norm": 0.06527181714773178, "learning_rate": 0.01, "loss": 1.9996, "step": 27387 }, { "epoch": 2.815294480419365, "grad_norm": 0.04199523106217384, "learning_rate": 0.01, "loss": 2.0233, "step": 27390 }, { "epoch": 2.8156028368794326, "grad_norm": 0.07436412572860718, "learning_rate": 0.01, "loss": 1.992, "step": 27393 }, { "epoch": 2.8159111933395007, "grad_norm": 0.05724874138832092, "learning_rate": 0.01, "loss": 1.9834, "step": 27396 }, { "epoch": 2.8162195497995683, "grad_norm": 0.03609538823366165, "learning_rate": 0.01, "loss": 2.0376, "step": 27399 }, { "epoch": 2.816527906259636, "grad_norm": 0.03008181042969227, "learning_rate": 0.01, "loss": 2.0129, "step": 27402 }, { "epoch": 2.816836262719704, "grad_norm": 0.0635017454624176, "learning_rate": 0.01, "loss": 2.0, "step": 27405 }, { "epoch": 2.8171446191797718, "grad_norm": 0.07800552248954773, "learning_rate": 0.01, "loss": 2.0143, "step": 27408 }, { "epoch": 2.8174529756398394, "grad_norm": 0.09289496392011642, "learning_rate": 0.01, "loss": 1.991, "step": 27411 }, { "epoch": 2.8177613320999075, "grad_norm": 0.0791633352637291, "learning_rate": 0.01, "loss": 2.033, "step": 27414 }, { "epoch": 2.818069688559975, "grad_norm": 0.06715335696935654, "learning_rate": 0.01, "loss": 2.0149, "step": 27417 }, { "epoch": 2.8183780450200433, "grad_norm": 0.0814514085650444, "learning_rate": 0.01, "loss": 2.025, "step": 27420 }, { "epoch": 2.818686401480111, "grad_norm": 0.09124473482370377, "learning_rate": 0.01, "loss": 2.0017, "step": 27423 }, { "epoch": 2.818994757940179, "grad_norm": 0.03711434826254845, "learning_rate": 0.01, "loss": 2.0231, "step": 27426 }, { "epoch": 2.8193031144002467, "grad_norm": 0.05914726108312607, "learning_rate": 0.01, "loss": 2.0047, "step": 27429 }, { "epoch": 2.8196114708603144, "grad_norm": 0.04358556494116783, "learning_rate": 0.01, "loss": 2.0211, "step": 27432 }, { "epoch": 2.8199198273203825, "grad_norm": 0.1319088488817215, "learning_rate": 0.01, "loss": 2.0186, "step": 27435 }, { "epoch": 2.82022818378045, "grad_norm": 0.07464331388473511, "learning_rate": 0.01, "loss": 2.0434, "step": 27438 }, { "epoch": 2.820536540240518, "grad_norm": 0.08253163844347, "learning_rate": 0.01, "loss": 2.0172, "step": 27441 }, { "epoch": 2.820844896700586, "grad_norm": 0.042612046003341675, "learning_rate": 0.01, "loss": 2.0227, "step": 27444 }, { "epoch": 2.8211532531606536, "grad_norm": 0.04726095870137215, "learning_rate": 0.01, "loss": 2.0301, "step": 27447 }, { "epoch": 2.8214616096207217, "grad_norm": 0.06982813775539398, "learning_rate": 0.01, "loss": 2.0424, "step": 27450 }, { "epoch": 2.8217699660807893, "grad_norm": 0.051457736641168594, "learning_rate": 0.01, "loss": 2.0424, "step": 27453 }, { "epoch": 2.8220783225408574, "grad_norm": 0.07463426142930984, "learning_rate": 0.01, "loss": 2.012, "step": 27456 }, { "epoch": 2.822386679000925, "grad_norm": 0.048826638609170914, "learning_rate": 0.01, "loss": 2.0303, "step": 27459 }, { "epoch": 2.8226950354609928, "grad_norm": 0.05443079397082329, "learning_rate": 0.01, "loss": 2.0424, "step": 27462 }, { "epoch": 2.823003391921061, "grad_norm": 0.10064379125833511, "learning_rate": 0.01, "loss": 2.0166, "step": 27465 }, { "epoch": 2.8233117483811285, "grad_norm": 0.0755578950047493, "learning_rate": 0.01, "loss": 1.9922, "step": 27468 }, { "epoch": 2.823620104841196, "grad_norm": 0.05240621045231819, "learning_rate": 0.01, "loss": 2.0092, "step": 27471 }, { "epoch": 2.8239284613012643, "grad_norm": 0.08781099319458008, "learning_rate": 0.01, "loss": 1.9954, "step": 27474 }, { "epoch": 2.824236817761332, "grad_norm": 0.04524267092347145, "learning_rate": 0.01, "loss": 2.0327, "step": 27477 }, { "epoch": 2.8245451742214, "grad_norm": 0.05119558051228523, "learning_rate": 0.01, "loss": 2.0062, "step": 27480 }, { "epoch": 2.8248535306814677, "grad_norm": 0.06969384104013443, "learning_rate": 0.01, "loss": 2.0216, "step": 27483 }, { "epoch": 2.825161887141536, "grad_norm": 0.09887497872114182, "learning_rate": 0.01, "loss": 2.0033, "step": 27486 }, { "epoch": 2.8254702436016035, "grad_norm": 0.04231324791908264, "learning_rate": 0.01, "loss": 2.0145, "step": 27489 }, { "epoch": 2.825778600061671, "grad_norm": 0.11022655665874481, "learning_rate": 0.01, "loss": 2.0218, "step": 27492 }, { "epoch": 2.8260869565217392, "grad_norm": 0.16821467876434326, "learning_rate": 0.01, "loss": 2.0137, "step": 27495 }, { "epoch": 2.826395312981807, "grad_norm": 0.13946877419948578, "learning_rate": 0.01, "loss": 2.0046, "step": 27498 }, { "epoch": 2.8267036694418746, "grad_norm": 0.07387804985046387, "learning_rate": 0.01, "loss": 2.0294, "step": 27501 }, { "epoch": 2.8270120259019427, "grad_norm": 0.06437870115041733, "learning_rate": 0.01, "loss": 2.0307, "step": 27504 }, { "epoch": 2.8273203823620103, "grad_norm": 0.04310622811317444, "learning_rate": 0.01, "loss": 2.0155, "step": 27507 }, { "epoch": 2.8276287388220784, "grad_norm": 0.04511455446481705, "learning_rate": 0.01, "loss": 2.0243, "step": 27510 }, { "epoch": 2.827937095282146, "grad_norm": 0.05232998728752136, "learning_rate": 0.01, "loss": 2.0523, "step": 27513 }, { "epoch": 2.828245451742214, "grad_norm": 0.03596142679452896, "learning_rate": 0.01, "loss": 2.0137, "step": 27516 }, { "epoch": 2.828553808202282, "grad_norm": 0.035050373524427414, "learning_rate": 0.01, "loss": 2.0141, "step": 27519 }, { "epoch": 2.8288621646623495, "grad_norm": 0.03525279834866524, "learning_rate": 0.01, "loss": 2.007, "step": 27522 }, { "epoch": 2.8291705211224176, "grad_norm": 0.1629800945520401, "learning_rate": 0.01, "loss": 2.0266, "step": 27525 }, { "epoch": 2.8294788775824853, "grad_norm": 0.06687575578689575, "learning_rate": 0.01, "loss": 2.0099, "step": 27528 }, { "epoch": 2.829787234042553, "grad_norm": 0.05308271199464798, "learning_rate": 0.01, "loss": 2.0116, "step": 27531 }, { "epoch": 2.830095590502621, "grad_norm": 0.05350314825773239, "learning_rate": 0.01, "loss": 1.9883, "step": 27534 }, { "epoch": 2.8304039469626887, "grad_norm": 0.03929050639271736, "learning_rate": 0.01, "loss": 2.024, "step": 27537 }, { "epoch": 2.830712303422757, "grad_norm": 0.037572652101516724, "learning_rate": 0.01, "loss": 2.02, "step": 27540 }, { "epoch": 2.8310206598828245, "grad_norm": 0.05316625535488129, "learning_rate": 0.01, "loss": 2.0101, "step": 27543 }, { "epoch": 2.8313290163428926, "grad_norm": 0.04561341553926468, "learning_rate": 0.01, "loss": 2.0185, "step": 27546 }, { "epoch": 2.8316373728029602, "grad_norm": 0.055242884904146194, "learning_rate": 0.01, "loss": 2.0114, "step": 27549 }, { "epoch": 2.831945729263028, "grad_norm": 0.057326629757881165, "learning_rate": 0.01, "loss": 2.0339, "step": 27552 }, { "epoch": 2.832254085723096, "grad_norm": 0.050966937094926834, "learning_rate": 0.01, "loss": 2.0338, "step": 27555 }, { "epoch": 2.8325624421831637, "grad_norm": 0.07518629729747772, "learning_rate": 0.01, "loss": 2.0217, "step": 27558 }, { "epoch": 2.8328707986432313, "grad_norm": 0.10435313731431961, "learning_rate": 0.01, "loss": 2.0082, "step": 27561 }, { "epoch": 2.8331791551032994, "grad_norm": 0.07228770107030869, "learning_rate": 0.01, "loss": 2.0041, "step": 27564 }, { "epoch": 2.833487511563367, "grad_norm": 0.06778567284345627, "learning_rate": 0.01, "loss": 2.0248, "step": 27567 }, { "epoch": 2.833795868023435, "grad_norm": 0.03698448836803436, "learning_rate": 0.01, "loss": 1.9949, "step": 27570 }, { "epoch": 2.834104224483503, "grad_norm": 0.027820177376270294, "learning_rate": 0.01, "loss": 2.0141, "step": 27573 }, { "epoch": 2.834412580943571, "grad_norm": 0.07996013760566711, "learning_rate": 0.01, "loss": 2.0249, "step": 27576 }, { "epoch": 2.8347209374036386, "grad_norm": 0.04629092290997505, "learning_rate": 0.01, "loss": 1.9933, "step": 27579 }, { "epoch": 2.8350292938637063, "grad_norm": 0.04143141210079193, "learning_rate": 0.01, "loss": 2.039, "step": 27582 }, { "epoch": 2.8353376503237744, "grad_norm": 0.12624233961105347, "learning_rate": 0.01, "loss": 2.0228, "step": 27585 }, { "epoch": 2.835646006783842, "grad_norm": 0.1815887838602066, "learning_rate": 0.01, "loss": 2.0228, "step": 27588 }, { "epoch": 2.8359543632439097, "grad_norm": 0.14360418915748596, "learning_rate": 0.01, "loss": 2.0339, "step": 27591 }, { "epoch": 2.836262719703978, "grad_norm": 0.10748742520809174, "learning_rate": 0.01, "loss": 2.0204, "step": 27594 }, { "epoch": 2.836571076164046, "grad_norm": 0.0488545261323452, "learning_rate": 0.01, "loss": 2.0278, "step": 27597 }, { "epoch": 2.8368794326241136, "grad_norm": 0.03699369728565216, "learning_rate": 0.01, "loss": 2.0013, "step": 27600 }, { "epoch": 2.8371877890841812, "grad_norm": 0.04813402146100998, "learning_rate": 0.01, "loss": 2.0025, "step": 27603 }, { "epoch": 2.8374961455442493, "grad_norm": 0.06493838876485825, "learning_rate": 0.01, "loss": 2.0502, "step": 27606 }, { "epoch": 2.837804502004317, "grad_norm": 0.05451219528913498, "learning_rate": 0.01, "loss": 1.9959, "step": 27609 }, { "epoch": 2.8381128584643847, "grad_norm": 0.04877667874097824, "learning_rate": 0.01, "loss": 2.0236, "step": 27612 }, { "epoch": 2.8384212149244528, "grad_norm": 0.05827799066901207, "learning_rate": 0.01, "loss": 2.0247, "step": 27615 }, { "epoch": 2.8387295713845204, "grad_norm": 0.040546271950006485, "learning_rate": 0.01, "loss": 2.0061, "step": 27618 }, { "epoch": 2.839037927844588, "grad_norm": 0.03764180466532707, "learning_rate": 0.01, "loss": 2.0076, "step": 27621 }, { "epoch": 2.839346284304656, "grad_norm": 0.06349320709705353, "learning_rate": 0.01, "loss": 2.0162, "step": 27624 }, { "epoch": 2.8396546407647243, "grad_norm": 0.07079531252384186, "learning_rate": 0.01, "loss": 2.0038, "step": 27627 }, { "epoch": 2.839962997224792, "grad_norm": 0.05074724927544594, "learning_rate": 0.01, "loss": 2.0329, "step": 27630 }, { "epoch": 2.8402713536848596, "grad_norm": 0.10624159872531891, "learning_rate": 0.01, "loss": 2.0454, "step": 27633 }, { "epoch": 2.8405797101449277, "grad_norm": 0.08726594597101212, "learning_rate": 0.01, "loss": 1.9982, "step": 27636 }, { "epoch": 2.8408880666049954, "grad_norm": 0.04581126943230629, "learning_rate": 0.01, "loss": 2.0092, "step": 27639 }, { "epoch": 2.841196423065063, "grad_norm": 0.042876582592725754, "learning_rate": 0.01, "loss": 1.9851, "step": 27642 }, { "epoch": 2.841504779525131, "grad_norm": 0.03951287642121315, "learning_rate": 0.01, "loss": 2.0118, "step": 27645 }, { "epoch": 2.841813135985199, "grad_norm": 0.04439757391810417, "learning_rate": 0.01, "loss": 2.0018, "step": 27648 }, { "epoch": 2.8421214924452665, "grad_norm": 0.05910231173038483, "learning_rate": 0.01, "loss": 2.025, "step": 27651 }, { "epoch": 2.8424298489053346, "grad_norm": 0.1063590720295906, "learning_rate": 0.01, "loss": 2.0129, "step": 27654 }, { "epoch": 2.8427382053654027, "grad_norm": 0.042655881494283676, "learning_rate": 0.01, "loss": 2.004, "step": 27657 }, { "epoch": 2.8430465618254703, "grad_norm": 0.037402208894491196, "learning_rate": 0.01, "loss": 2.0025, "step": 27660 }, { "epoch": 2.843354918285538, "grad_norm": 0.03912290558218956, "learning_rate": 0.01, "loss": 2.0187, "step": 27663 }, { "epoch": 2.843663274745606, "grad_norm": 0.1316000372171402, "learning_rate": 0.01, "loss": 2.0089, "step": 27666 }, { "epoch": 2.8439716312056738, "grad_norm": 0.10243986546993256, "learning_rate": 0.01, "loss": 2.0419, "step": 27669 }, { "epoch": 2.8442799876657414, "grad_norm": 0.09918151050806046, "learning_rate": 0.01, "loss": 2.0299, "step": 27672 }, { "epoch": 2.8445883441258095, "grad_norm": 0.0572928749024868, "learning_rate": 0.01, "loss": 2.0254, "step": 27675 }, { "epoch": 2.844896700585877, "grad_norm": 0.09781496226787567, "learning_rate": 0.01, "loss": 1.9992, "step": 27678 }, { "epoch": 2.845205057045945, "grad_norm": 0.04969809949398041, "learning_rate": 0.01, "loss": 2.0099, "step": 27681 }, { "epoch": 2.845513413506013, "grad_norm": 0.0894259363412857, "learning_rate": 0.01, "loss": 2.0065, "step": 27684 }, { "epoch": 2.845821769966081, "grad_norm": 0.05788953974843025, "learning_rate": 0.01, "loss": 2.0217, "step": 27687 }, { "epoch": 2.8461301264261487, "grad_norm": 0.07628165185451508, "learning_rate": 0.01, "loss": 2.0191, "step": 27690 }, { "epoch": 2.8464384828862164, "grad_norm": 0.07799988240003586, "learning_rate": 0.01, "loss": 1.9947, "step": 27693 }, { "epoch": 2.8467468393462845, "grad_norm": 0.08373581618070602, "learning_rate": 0.01, "loss": 2.0351, "step": 27696 }, { "epoch": 2.847055195806352, "grad_norm": 0.054138489067554474, "learning_rate": 0.01, "loss": 2.0127, "step": 27699 }, { "epoch": 2.84736355226642, "grad_norm": 0.03457311540842056, "learning_rate": 0.01, "loss": 2.0082, "step": 27702 }, { "epoch": 2.847671908726488, "grad_norm": 0.04888417571783066, "learning_rate": 0.01, "loss": 2.0034, "step": 27705 }, { "epoch": 2.8479802651865556, "grad_norm": 0.09924766421318054, "learning_rate": 0.01, "loss": 2.0514, "step": 27708 }, { "epoch": 2.848288621646623, "grad_norm": 0.0531487911939621, "learning_rate": 0.01, "loss": 2.01, "step": 27711 }, { "epoch": 2.8485969781066913, "grad_norm": 0.07373910397291183, "learning_rate": 0.01, "loss": 2.0177, "step": 27714 }, { "epoch": 2.8489053345667594, "grad_norm": 0.09154459089040756, "learning_rate": 0.01, "loss": 2.0375, "step": 27717 }, { "epoch": 2.849213691026827, "grad_norm": 0.04007700830698013, "learning_rate": 0.01, "loss": 2.0447, "step": 27720 }, { "epoch": 2.8495220474868947, "grad_norm": 0.06406736373901367, "learning_rate": 0.01, "loss": 2.0262, "step": 27723 }, { "epoch": 2.849830403946963, "grad_norm": 0.06983067840337753, "learning_rate": 0.01, "loss": 2.0126, "step": 27726 }, { "epoch": 2.8501387604070305, "grad_norm": 0.04832616075873375, "learning_rate": 0.01, "loss": 2.0454, "step": 27729 }, { "epoch": 2.850447116867098, "grad_norm": 0.09404818713665009, "learning_rate": 0.01, "loss": 2.0239, "step": 27732 }, { "epoch": 2.8507554733271663, "grad_norm": 0.12212470918893814, "learning_rate": 0.01, "loss": 1.9912, "step": 27735 }, { "epoch": 2.851063829787234, "grad_norm": 0.08778437972068787, "learning_rate": 0.01, "loss": 2.0503, "step": 27738 }, { "epoch": 2.851372186247302, "grad_norm": 0.08406031876802444, "learning_rate": 0.01, "loss": 2.0432, "step": 27741 }, { "epoch": 2.8516805427073697, "grad_norm": 0.07672454416751862, "learning_rate": 0.01, "loss": 1.999, "step": 27744 }, { "epoch": 2.851988899167438, "grad_norm": 0.12199164927005768, "learning_rate": 0.01, "loss": 2.011, "step": 27747 }, { "epoch": 2.8522972556275055, "grad_norm": 0.05166463926434517, "learning_rate": 0.01, "loss": 2.0053, "step": 27750 }, { "epoch": 2.852605612087573, "grad_norm": 0.11618038266897202, "learning_rate": 0.01, "loss": 1.9846, "step": 27753 }, { "epoch": 2.8529139685476412, "grad_norm": 0.06255706399679184, "learning_rate": 0.01, "loss": 2.0253, "step": 27756 }, { "epoch": 2.853222325007709, "grad_norm": 0.04340292140841484, "learning_rate": 0.01, "loss": 2.0085, "step": 27759 }, { "epoch": 2.8535306814677766, "grad_norm": 0.06595482677221298, "learning_rate": 0.01, "loss": 1.9913, "step": 27762 }, { "epoch": 2.8538390379278447, "grad_norm": 0.03980451449751854, "learning_rate": 0.01, "loss": 1.9994, "step": 27765 }, { "epoch": 2.8541473943879123, "grad_norm": 0.035016849637031555, "learning_rate": 0.01, "loss": 2.0156, "step": 27768 }, { "epoch": 2.8544557508479804, "grad_norm": 0.07189803570508957, "learning_rate": 0.01, "loss": 2.0156, "step": 27771 }, { "epoch": 2.854764107308048, "grad_norm": 0.0759616568684578, "learning_rate": 0.01, "loss": 2.0058, "step": 27774 }, { "epoch": 2.855072463768116, "grad_norm": 0.05985911190509796, "learning_rate": 0.01, "loss": 2.0115, "step": 27777 }, { "epoch": 2.855380820228184, "grad_norm": 0.0529702752828598, "learning_rate": 0.01, "loss": 2.0315, "step": 27780 }, { "epoch": 2.8556891766882515, "grad_norm": 0.046540237963199615, "learning_rate": 0.01, "loss": 2.0035, "step": 27783 }, { "epoch": 2.8559975331483196, "grad_norm": 0.04505128040909767, "learning_rate": 0.01, "loss": 2.0385, "step": 27786 }, { "epoch": 2.8563058896083873, "grad_norm": 0.041494566947221756, "learning_rate": 0.01, "loss": 2.0239, "step": 27789 }, { "epoch": 2.856614246068455, "grad_norm": 0.038743190467357635, "learning_rate": 0.01, "loss": 2.0207, "step": 27792 }, { "epoch": 2.856922602528523, "grad_norm": 0.062206536531448364, "learning_rate": 0.01, "loss": 2.0133, "step": 27795 }, { "epoch": 2.8572309589885907, "grad_norm": 0.19090187549591064, "learning_rate": 0.01, "loss": 2.0292, "step": 27798 }, { "epoch": 2.857539315448659, "grad_norm": 0.06649639457464218, "learning_rate": 0.01, "loss": 2.016, "step": 27801 }, { "epoch": 2.8578476719087265, "grad_norm": 0.07956647872924805, "learning_rate": 0.01, "loss": 1.9797, "step": 27804 }, { "epoch": 2.8581560283687946, "grad_norm": 0.050361573696136475, "learning_rate": 0.01, "loss": 1.9901, "step": 27807 }, { "epoch": 2.8584643848288622, "grad_norm": 0.036132823675870895, "learning_rate": 0.01, "loss": 2.0122, "step": 27810 }, { "epoch": 2.85877274128893, "grad_norm": 0.04948203265666962, "learning_rate": 0.01, "loss": 2.0104, "step": 27813 }, { "epoch": 2.859081097748998, "grad_norm": 0.07491700351238251, "learning_rate": 0.01, "loss": 2.01, "step": 27816 }, { "epoch": 2.8593894542090657, "grad_norm": 0.03278898447751999, "learning_rate": 0.01, "loss": 2.0224, "step": 27819 }, { "epoch": 2.8596978106691333, "grad_norm": 0.04186544567346573, "learning_rate": 0.01, "loss": 1.9922, "step": 27822 }, { "epoch": 2.8600061671292014, "grad_norm": 0.07123599201440811, "learning_rate": 0.01, "loss": 2.0019, "step": 27825 }, { "epoch": 2.860314523589269, "grad_norm": 0.038315288722515106, "learning_rate": 0.01, "loss": 1.9854, "step": 27828 }, { "epoch": 2.860622880049337, "grad_norm": 0.06678714603185654, "learning_rate": 0.01, "loss": 2.0151, "step": 27831 }, { "epoch": 2.860931236509405, "grad_norm": 0.10680954158306122, "learning_rate": 0.01, "loss": 2.0067, "step": 27834 }, { "epoch": 2.861239592969473, "grad_norm": 0.13149844110012054, "learning_rate": 0.01, "loss": 2.0266, "step": 27837 }, { "epoch": 2.8615479494295406, "grad_norm": 0.04700513929128647, "learning_rate": 0.01, "loss": 2.0548, "step": 27840 }, { "epoch": 2.8618563058896083, "grad_norm": 0.03596799075603485, "learning_rate": 0.01, "loss": 2.0206, "step": 27843 }, { "epoch": 2.8621646623496764, "grad_norm": 0.042376477271318436, "learning_rate": 0.01, "loss": 2.0363, "step": 27846 }, { "epoch": 2.862473018809744, "grad_norm": 0.054233819246292114, "learning_rate": 0.01, "loss": 2.0324, "step": 27849 }, { "epoch": 2.8627813752698117, "grad_norm": 0.04501786455512047, "learning_rate": 0.01, "loss": 2.0211, "step": 27852 }, { "epoch": 2.86308973172988, "grad_norm": 0.06443289667367935, "learning_rate": 0.01, "loss": 1.9847, "step": 27855 }, { "epoch": 2.8633980881899475, "grad_norm": 0.05772462114691734, "learning_rate": 0.01, "loss": 2.0152, "step": 27858 }, { "epoch": 2.8637064446500156, "grad_norm": 0.039729420095682144, "learning_rate": 0.01, "loss": 2.0233, "step": 27861 }, { "epoch": 2.864014801110083, "grad_norm": 0.1330452859401703, "learning_rate": 0.01, "loss": 2.0166, "step": 27864 }, { "epoch": 2.8643231575701513, "grad_norm": 0.03952759504318237, "learning_rate": 0.01, "loss": 2.0151, "step": 27867 }, { "epoch": 2.864631514030219, "grad_norm": 0.05294906720519066, "learning_rate": 0.01, "loss": 2.0078, "step": 27870 }, { "epoch": 2.8649398704902866, "grad_norm": 0.07945666462182999, "learning_rate": 0.01, "loss": 2.0237, "step": 27873 }, { "epoch": 2.8652482269503547, "grad_norm": 0.09529011696577072, "learning_rate": 0.01, "loss": 2.0017, "step": 27876 }, { "epoch": 2.8655565834104224, "grad_norm": 0.038869790732860565, "learning_rate": 0.01, "loss": 2.0051, "step": 27879 }, { "epoch": 2.86586493987049, "grad_norm": 0.0421532541513443, "learning_rate": 0.01, "loss": 2.019, "step": 27882 }, { "epoch": 2.866173296330558, "grad_norm": 0.037143412977457047, "learning_rate": 0.01, "loss": 2.0196, "step": 27885 }, { "epoch": 2.866481652790626, "grad_norm": 0.0359092615544796, "learning_rate": 0.01, "loss": 2.0076, "step": 27888 }, { "epoch": 2.866790009250694, "grad_norm": 0.04631698876619339, "learning_rate": 0.01, "loss": 2.0131, "step": 27891 }, { "epoch": 2.8670983657107616, "grad_norm": 0.06015830859541893, "learning_rate": 0.01, "loss": 2.0341, "step": 27894 }, { "epoch": 2.8674067221708297, "grad_norm": 0.09565176069736481, "learning_rate": 0.01, "loss": 2.0251, "step": 27897 }, { "epoch": 2.8677150786308974, "grad_norm": 0.05290444567799568, "learning_rate": 0.01, "loss": 2.0412, "step": 27900 }, { "epoch": 2.868023435090965, "grad_norm": 0.03383943438529968, "learning_rate": 0.01, "loss": 1.9902, "step": 27903 }, { "epoch": 2.868331791551033, "grad_norm": 0.05390051752328873, "learning_rate": 0.01, "loss": 2.0159, "step": 27906 }, { "epoch": 2.868640148011101, "grad_norm": 0.08597470074892044, "learning_rate": 0.01, "loss": 2.0308, "step": 27909 }, { "epoch": 2.8689485044711684, "grad_norm": 0.059518035501241684, "learning_rate": 0.01, "loss": 2.0169, "step": 27912 }, { "epoch": 2.8692568609312366, "grad_norm": 0.04992047697305679, "learning_rate": 0.01, "loss": 2.0267, "step": 27915 }, { "epoch": 2.869565217391304, "grad_norm": 0.04728172719478607, "learning_rate": 0.01, "loss": 2.0266, "step": 27918 }, { "epoch": 2.8698735738513723, "grad_norm": 0.038830891251564026, "learning_rate": 0.01, "loss": 2.0197, "step": 27921 }, { "epoch": 2.87018193031144, "grad_norm": 0.039699580520391464, "learning_rate": 0.01, "loss": 2.0144, "step": 27924 }, { "epoch": 2.870490286771508, "grad_norm": 0.10109611600637436, "learning_rate": 0.01, "loss": 2.0033, "step": 27927 }, { "epoch": 2.8707986432315757, "grad_norm": 0.07930929958820343, "learning_rate": 0.01, "loss": 2.0463, "step": 27930 }, { "epoch": 2.8711069996916434, "grad_norm": 0.05544453486800194, "learning_rate": 0.01, "loss": 2.0342, "step": 27933 }, { "epoch": 2.8714153561517115, "grad_norm": 0.11004924774169922, "learning_rate": 0.01, "loss": 2.0199, "step": 27936 }, { "epoch": 2.871723712611779, "grad_norm": 0.09423034638166428, "learning_rate": 0.01, "loss": 2.0232, "step": 27939 }, { "epoch": 2.872032069071847, "grad_norm": 0.05001804232597351, "learning_rate": 0.01, "loss": 2.0032, "step": 27942 }, { "epoch": 2.872340425531915, "grad_norm": 0.04487982019782066, "learning_rate": 0.01, "loss": 2.0154, "step": 27945 }, { "epoch": 2.8726487819919826, "grad_norm": 0.033674515783786774, "learning_rate": 0.01, "loss": 2.0138, "step": 27948 }, { "epoch": 2.8729571384520507, "grad_norm": 0.040895912796258926, "learning_rate": 0.01, "loss": 1.9902, "step": 27951 }, { "epoch": 2.8732654949121184, "grad_norm": 0.03655741363763809, "learning_rate": 0.01, "loss": 2.0201, "step": 27954 }, { "epoch": 2.8735738513721865, "grad_norm": 0.06042269244790077, "learning_rate": 0.01, "loss": 2.0354, "step": 27957 }, { "epoch": 2.873882207832254, "grad_norm": 0.0660943016409874, "learning_rate": 0.01, "loss": 2.0312, "step": 27960 }, { "epoch": 2.874190564292322, "grad_norm": 0.0681222453713417, "learning_rate": 0.01, "loss": 2.0059, "step": 27963 }, { "epoch": 2.87449892075239, "grad_norm": 0.05666188523173332, "learning_rate": 0.01, "loss": 2.022, "step": 27966 }, { "epoch": 2.8748072772124575, "grad_norm": 0.09911254048347473, "learning_rate": 0.01, "loss": 2.0056, "step": 27969 }, { "epoch": 2.875115633672525, "grad_norm": 0.0730089545249939, "learning_rate": 0.01, "loss": 1.9927, "step": 27972 }, { "epoch": 2.8754239901325933, "grad_norm": 0.05434833839535713, "learning_rate": 0.01, "loss": 1.9998, "step": 27975 }, { "epoch": 2.875732346592661, "grad_norm": 0.07539419084787369, "learning_rate": 0.01, "loss": 2.0304, "step": 27978 }, { "epoch": 2.876040703052729, "grad_norm": 0.04300360381603241, "learning_rate": 0.01, "loss": 2.0046, "step": 27981 }, { "epoch": 2.8763490595127967, "grad_norm": 0.1077357679605484, "learning_rate": 0.01, "loss": 2.0344, "step": 27984 }, { "epoch": 2.876657415972865, "grad_norm": 0.08229992538690567, "learning_rate": 0.01, "loss": 2.0412, "step": 27987 }, { "epoch": 2.8769657724329325, "grad_norm": 0.12404177337884903, "learning_rate": 0.01, "loss": 2.0047, "step": 27990 }, { "epoch": 2.877274128893, "grad_norm": 0.06843412667512894, "learning_rate": 0.01, "loss": 2.0144, "step": 27993 }, { "epoch": 2.8775824853530683, "grad_norm": 0.09625189751386642, "learning_rate": 0.01, "loss": 2.0435, "step": 27996 }, { "epoch": 2.877890841813136, "grad_norm": 0.04479416087269783, "learning_rate": 0.01, "loss": 2.0355, "step": 27999 }, { "epoch": 2.8781991982732036, "grad_norm": 0.04933414235711098, "learning_rate": 0.01, "loss": 2.0018, "step": 28002 }, { "epoch": 2.8785075547332717, "grad_norm": 0.05558469891548157, "learning_rate": 0.01, "loss": 2.03, "step": 28005 }, { "epoch": 2.8788159111933393, "grad_norm": 0.04813915863633156, "learning_rate": 0.01, "loss": 2.0227, "step": 28008 }, { "epoch": 2.8791242676534075, "grad_norm": 0.06402178108692169, "learning_rate": 0.01, "loss": 2.0023, "step": 28011 }, { "epoch": 2.879432624113475, "grad_norm": 0.03481597453355789, "learning_rate": 0.01, "loss": 2.032, "step": 28014 }, { "epoch": 2.879740980573543, "grad_norm": 0.03682602196931839, "learning_rate": 0.01, "loss": 2.0198, "step": 28017 }, { "epoch": 2.880049337033611, "grad_norm": 0.11671306192874908, "learning_rate": 0.01, "loss": 2.0025, "step": 28020 }, { "epoch": 2.8803576934936785, "grad_norm": 0.051900725811719894, "learning_rate": 0.01, "loss": 2.0128, "step": 28023 }, { "epoch": 2.8806660499537466, "grad_norm": 0.034372419118881226, "learning_rate": 0.01, "loss": 2.0146, "step": 28026 }, { "epoch": 2.8809744064138143, "grad_norm": 0.05165507644414902, "learning_rate": 0.01, "loss": 2.0139, "step": 28029 }, { "epoch": 2.881282762873882, "grad_norm": 0.046683549880981445, "learning_rate": 0.01, "loss": 2.0454, "step": 28032 }, { "epoch": 2.88159111933395, "grad_norm": 0.04153186455368996, "learning_rate": 0.01, "loss": 2.022, "step": 28035 }, { "epoch": 2.8818994757940177, "grad_norm": 0.07401791960000992, "learning_rate": 0.01, "loss": 2.0249, "step": 28038 }, { "epoch": 2.882207832254086, "grad_norm": 0.08125065267086029, "learning_rate": 0.01, "loss": 2.0042, "step": 28041 }, { "epoch": 2.8825161887141535, "grad_norm": 0.076799176633358, "learning_rate": 0.01, "loss": 1.9924, "step": 28044 }, { "epoch": 2.8828245451742216, "grad_norm": 0.09814302623271942, "learning_rate": 0.01, "loss": 2.028, "step": 28047 }, { "epoch": 2.8831329016342893, "grad_norm": 0.06998278200626373, "learning_rate": 0.01, "loss": 2.0036, "step": 28050 }, { "epoch": 2.883441258094357, "grad_norm": 0.057476770132780075, "learning_rate": 0.01, "loss": 2.0134, "step": 28053 }, { "epoch": 2.883749614554425, "grad_norm": 0.037239111959934235, "learning_rate": 0.01, "loss": 1.9987, "step": 28056 }, { "epoch": 2.8840579710144927, "grad_norm": 0.08055179566144943, "learning_rate": 0.01, "loss": 2.0267, "step": 28059 }, { "epoch": 2.8843663274745603, "grad_norm": 0.07050419598817825, "learning_rate": 0.01, "loss": 2.0294, "step": 28062 }, { "epoch": 2.8846746839346284, "grad_norm": 0.05187792330980301, "learning_rate": 0.01, "loss": 2.004, "step": 28065 }, { "epoch": 2.884983040394696, "grad_norm": 0.03746993839740753, "learning_rate": 0.01, "loss": 2.0219, "step": 28068 }, { "epoch": 2.885291396854764, "grad_norm": 0.09767074137926102, "learning_rate": 0.01, "loss": 2.0352, "step": 28071 }, { "epoch": 2.885599753314832, "grad_norm": 0.08471439778804779, "learning_rate": 0.01, "loss": 2.0026, "step": 28074 }, { "epoch": 2.8859081097749, "grad_norm": 0.061311863362789154, "learning_rate": 0.01, "loss": 2.0323, "step": 28077 }, { "epoch": 2.8862164662349676, "grad_norm": 0.06402043253183365, "learning_rate": 0.01, "loss": 2.0375, "step": 28080 }, { "epoch": 2.8865248226950353, "grad_norm": 0.06756860762834549, "learning_rate": 0.01, "loss": 2.0232, "step": 28083 }, { "epoch": 2.8868331791551034, "grad_norm": 0.07851870357990265, "learning_rate": 0.01, "loss": 2.0178, "step": 28086 }, { "epoch": 2.887141535615171, "grad_norm": 0.08102571219205856, "learning_rate": 0.01, "loss": 2.0162, "step": 28089 }, { "epoch": 2.8874498920752387, "grad_norm": 0.04745829850435257, "learning_rate": 0.01, "loss": 2.0267, "step": 28092 }, { "epoch": 2.887758248535307, "grad_norm": 0.04496223106980324, "learning_rate": 0.01, "loss": 2.012, "step": 28095 }, { "epoch": 2.8880666049953745, "grad_norm": 0.09044700860977173, "learning_rate": 0.01, "loss": 2.0354, "step": 28098 }, { "epoch": 2.8883749614554426, "grad_norm": 0.05591853708028793, "learning_rate": 0.01, "loss": 2.0033, "step": 28101 }, { "epoch": 2.8886833179155103, "grad_norm": 0.07982437312602997, "learning_rate": 0.01, "loss": 2.0423, "step": 28104 }, { "epoch": 2.8889916743755784, "grad_norm": 0.039345480501651764, "learning_rate": 0.01, "loss": 1.996, "step": 28107 }, { "epoch": 2.889300030835646, "grad_norm": 0.046677373349666595, "learning_rate": 0.01, "loss": 2.0204, "step": 28110 }, { "epoch": 2.8896083872957137, "grad_norm": 0.060302551835775375, "learning_rate": 0.01, "loss": 2.019, "step": 28113 }, { "epoch": 2.889916743755782, "grad_norm": 0.09422065317630768, "learning_rate": 0.01, "loss": 2.0223, "step": 28116 }, { "epoch": 2.8902251002158494, "grad_norm": 0.03713101148605347, "learning_rate": 0.01, "loss": 2.0008, "step": 28119 }, { "epoch": 2.890533456675917, "grad_norm": 0.062431566417217255, "learning_rate": 0.01, "loss": 2.0213, "step": 28122 }, { "epoch": 2.890841813135985, "grad_norm": 0.052230529487133026, "learning_rate": 0.01, "loss": 2.0233, "step": 28125 }, { "epoch": 2.8911501695960533, "grad_norm": 0.05514863133430481, "learning_rate": 0.01, "loss": 2.0233, "step": 28128 }, { "epoch": 2.891458526056121, "grad_norm": 0.04317644238471985, "learning_rate": 0.01, "loss": 2.0111, "step": 28131 }, { "epoch": 2.8917668825161886, "grad_norm": 0.05251277983188629, "learning_rate": 0.01, "loss": 2.0306, "step": 28134 }, { "epoch": 2.8920752389762567, "grad_norm": 0.10419400781393051, "learning_rate": 0.01, "loss": 2.0253, "step": 28137 }, { "epoch": 2.8923835954363244, "grad_norm": 0.08806058764457703, "learning_rate": 0.01, "loss": 2.0079, "step": 28140 }, { "epoch": 2.892691951896392, "grad_norm": 0.059875234961509705, "learning_rate": 0.01, "loss": 2.0066, "step": 28143 }, { "epoch": 2.89300030835646, "grad_norm": 0.036619991064071655, "learning_rate": 0.01, "loss": 2.0208, "step": 28146 }, { "epoch": 2.893308664816528, "grad_norm": 0.034454282373189926, "learning_rate": 0.01, "loss": 2.0068, "step": 28149 }, { "epoch": 2.8936170212765955, "grad_norm": 0.05209527164697647, "learning_rate": 0.01, "loss": 2.0252, "step": 28152 }, { "epoch": 2.8939253777366636, "grad_norm": 0.11549924314022064, "learning_rate": 0.01, "loss": 1.9974, "step": 28155 }, { "epoch": 2.8942337341967317, "grad_norm": 0.06887582689523697, "learning_rate": 0.01, "loss": 2.0, "step": 28158 }, { "epoch": 2.8945420906567993, "grad_norm": 0.046488743275403976, "learning_rate": 0.01, "loss": 1.9827, "step": 28161 }, { "epoch": 2.894850447116867, "grad_norm": 0.05086890980601311, "learning_rate": 0.01, "loss": 2.0073, "step": 28164 }, { "epoch": 2.895158803576935, "grad_norm": 0.03719864413142204, "learning_rate": 0.01, "loss": 2.0333, "step": 28167 }, { "epoch": 2.8954671600370028, "grad_norm": 0.059325605630874634, "learning_rate": 0.01, "loss": 2.0056, "step": 28170 }, { "epoch": 2.8957755164970704, "grad_norm": 0.088272824883461, "learning_rate": 0.01, "loss": 2.0114, "step": 28173 }, { "epoch": 2.8960838729571385, "grad_norm": 0.0874638557434082, "learning_rate": 0.01, "loss": 2.0158, "step": 28176 }, { "epoch": 2.896392229417206, "grad_norm": 0.059453610330820084, "learning_rate": 0.01, "loss": 1.9919, "step": 28179 }, { "epoch": 2.896700585877274, "grad_norm": 0.05221414193511009, "learning_rate": 0.01, "loss": 2.0056, "step": 28182 }, { "epoch": 2.897008942337342, "grad_norm": 0.03308747336268425, "learning_rate": 0.01, "loss": 2.0072, "step": 28185 }, { "epoch": 2.89731729879741, "grad_norm": 0.07506965100765228, "learning_rate": 0.01, "loss": 2.0022, "step": 28188 }, { "epoch": 2.8976256552574777, "grad_norm": 0.06189137324690819, "learning_rate": 0.01, "loss": 2.0046, "step": 28191 }, { "epoch": 2.8979340117175454, "grad_norm": 0.07855790853500366, "learning_rate": 0.01, "loss": 2.0307, "step": 28194 }, { "epoch": 2.8982423681776135, "grad_norm": 0.08880770951509476, "learning_rate": 0.01, "loss": 2.0242, "step": 28197 }, { "epoch": 2.898550724637681, "grad_norm": 0.05860179290175438, "learning_rate": 0.01, "loss": 2.0394, "step": 28200 }, { "epoch": 2.898859081097749, "grad_norm": 0.06037219241261482, "learning_rate": 0.01, "loss": 2.0131, "step": 28203 }, { "epoch": 2.899167437557817, "grad_norm": 0.16910460591316223, "learning_rate": 0.01, "loss": 2.0184, "step": 28206 }, { "epoch": 2.8994757940178846, "grad_norm": 0.05912697687745094, "learning_rate": 0.01, "loss": 2.0141, "step": 28209 }, { "epoch": 2.8997841504779522, "grad_norm": 0.04245199263095856, "learning_rate": 0.01, "loss": 2.0206, "step": 28212 }, { "epoch": 2.9000925069380203, "grad_norm": 0.03307129442691803, "learning_rate": 0.01, "loss": 2.0258, "step": 28215 }, { "epoch": 2.9004008633980884, "grad_norm": 0.03456464782357216, "learning_rate": 0.01, "loss": 2.0219, "step": 28218 }, { "epoch": 2.900709219858156, "grad_norm": 0.04525744169950485, "learning_rate": 0.01, "loss": 2.0417, "step": 28221 }, { "epoch": 2.9010175763182238, "grad_norm": 0.046930085867643356, "learning_rate": 0.01, "loss": 2.0296, "step": 28224 }, { "epoch": 2.901325932778292, "grad_norm": 0.07545941323041916, "learning_rate": 0.01, "loss": 2.028, "step": 28227 }, { "epoch": 2.9016342892383595, "grad_norm": 0.045901909470558167, "learning_rate": 0.01, "loss": 2.0143, "step": 28230 }, { "epoch": 2.901942645698427, "grad_norm": 0.06182632967829704, "learning_rate": 0.01, "loss": 2.0184, "step": 28233 }, { "epoch": 2.9022510021584953, "grad_norm": 0.1126553937792778, "learning_rate": 0.01, "loss": 2.0134, "step": 28236 }, { "epoch": 2.902559358618563, "grad_norm": 0.05637908726930618, "learning_rate": 0.01, "loss": 2.0023, "step": 28239 }, { "epoch": 2.902867715078631, "grad_norm": 0.03390496224164963, "learning_rate": 0.01, "loss": 2.0097, "step": 28242 }, { "epoch": 2.9031760715386987, "grad_norm": 0.1719658523797989, "learning_rate": 0.01, "loss": 2.0446, "step": 28245 }, { "epoch": 2.903484427998767, "grad_norm": 0.054201435297727585, "learning_rate": 0.01, "loss": 2.0101, "step": 28248 }, { "epoch": 2.9037927844588345, "grad_norm": 0.05339137092232704, "learning_rate": 0.01, "loss": 2.024, "step": 28251 }, { "epoch": 2.904101140918902, "grad_norm": 0.0490683875977993, "learning_rate": 0.01, "loss": 2.0112, "step": 28254 }, { "epoch": 2.9044094973789703, "grad_norm": 0.048101890832185745, "learning_rate": 0.01, "loss": 2.0244, "step": 28257 }, { "epoch": 2.904717853839038, "grad_norm": 0.034970205277204514, "learning_rate": 0.01, "loss": 2.0128, "step": 28260 }, { "epoch": 2.9050262102991056, "grad_norm": 0.05500125139951706, "learning_rate": 0.01, "loss": 1.9963, "step": 28263 }, { "epoch": 2.9053345667591737, "grad_norm": 0.0717696025967598, "learning_rate": 0.01, "loss": 2.0119, "step": 28266 }, { "epoch": 2.9056429232192413, "grad_norm": 0.08134905993938446, "learning_rate": 0.01, "loss": 2.0343, "step": 28269 }, { "epoch": 2.9059512796793094, "grad_norm": 0.13240204751491547, "learning_rate": 0.01, "loss": 2.0268, "step": 28272 }, { "epoch": 2.906259636139377, "grad_norm": 0.04060851037502289, "learning_rate": 0.01, "loss": 2.0163, "step": 28275 }, { "epoch": 2.906567992599445, "grad_norm": 0.04560050368309021, "learning_rate": 0.01, "loss": 2.0037, "step": 28278 }, { "epoch": 2.906876349059513, "grad_norm": 0.051935113966464996, "learning_rate": 0.01, "loss": 2.0025, "step": 28281 }, { "epoch": 2.9071847055195805, "grad_norm": 0.04502344876527786, "learning_rate": 0.01, "loss": 2.0212, "step": 28284 }, { "epoch": 2.9074930619796486, "grad_norm": 0.04561367630958557, "learning_rate": 0.01, "loss": 2.0369, "step": 28287 }, { "epoch": 2.9078014184397163, "grad_norm": 0.05686529353260994, "learning_rate": 0.01, "loss": 2.0217, "step": 28290 }, { "epoch": 2.908109774899784, "grad_norm": 0.06489894539117813, "learning_rate": 0.01, "loss": 2.0126, "step": 28293 }, { "epoch": 2.908418131359852, "grad_norm": 0.08427495509386063, "learning_rate": 0.01, "loss": 2.0, "step": 28296 }, { "epoch": 2.9087264878199197, "grad_norm": 0.08298421651124954, "learning_rate": 0.01, "loss": 2.0273, "step": 28299 }, { "epoch": 2.909034844279988, "grad_norm": 0.035743072628974915, "learning_rate": 0.01, "loss": 2.0167, "step": 28302 }, { "epoch": 2.9093432007400555, "grad_norm": 0.11177317798137665, "learning_rate": 0.01, "loss": 2.0037, "step": 28305 }, { "epoch": 2.9096515572001236, "grad_norm": 0.10962017625570297, "learning_rate": 0.01, "loss": 2.0155, "step": 28308 }, { "epoch": 2.9099599136601912, "grad_norm": 0.033734966069459915, "learning_rate": 0.01, "loss": 2.0131, "step": 28311 }, { "epoch": 2.910268270120259, "grad_norm": 0.050082772970199585, "learning_rate": 0.01, "loss": 2.0117, "step": 28314 }, { "epoch": 2.910576626580327, "grad_norm": 0.04048197343945503, "learning_rate": 0.01, "loss": 2.0085, "step": 28317 }, { "epoch": 2.9108849830403947, "grad_norm": 0.039657291024923325, "learning_rate": 0.01, "loss": 2.0233, "step": 28320 }, { "epoch": 2.9111933395004623, "grad_norm": 0.0639658197760582, "learning_rate": 0.01, "loss": 2.0001, "step": 28323 }, { "epoch": 2.9115016959605304, "grad_norm": 0.09306667000055313, "learning_rate": 0.01, "loss": 2.019, "step": 28326 }, { "epoch": 2.911810052420598, "grad_norm": 0.06027163937687874, "learning_rate": 0.01, "loss": 2.0341, "step": 28329 }, { "epoch": 2.912118408880666, "grad_norm": 0.08570882678031921, "learning_rate": 0.01, "loss": 2.0192, "step": 28332 }, { "epoch": 2.912426765340734, "grad_norm": 0.07936012744903564, "learning_rate": 0.01, "loss": 2.0165, "step": 28335 }, { "epoch": 2.912735121800802, "grad_norm": 0.06479290872812271, "learning_rate": 0.01, "loss": 2.027, "step": 28338 }, { "epoch": 2.9130434782608696, "grad_norm": 0.06973425298929214, "learning_rate": 0.01, "loss": 2.0037, "step": 28341 }, { "epoch": 2.9133518347209373, "grad_norm": 0.05557083338499069, "learning_rate": 0.01, "loss": 2.0323, "step": 28344 }, { "epoch": 2.9136601911810054, "grad_norm": 0.04901493340730667, "learning_rate": 0.01, "loss": 2.0088, "step": 28347 }, { "epoch": 2.913968547641073, "grad_norm": 0.1036488264799118, "learning_rate": 0.01, "loss": 2.0486, "step": 28350 }, { "epoch": 2.9142769041011407, "grad_norm": 0.04318247362971306, "learning_rate": 0.01, "loss": 2.0195, "step": 28353 }, { "epoch": 2.914585260561209, "grad_norm": 0.0404491052031517, "learning_rate": 0.01, "loss": 2.0237, "step": 28356 }, { "epoch": 2.9148936170212765, "grad_norm": 0.03727111592888832, "learning_rate": 0.01, "loss": 2.0153, "step": 28359 }, { "epoch": 2.9152019734813446, "grad_norm": 0.04736052453517914, "learning_rate": 0.01, "loss": 2.0487, "step": 28362 }, { "epoch": 2.9155103299414122, "grad_norm": 0.13712109625339508, "learning_rate": 0.01, "loss": 2.0434, "step": 28365 }, { "epoch": 2.9158186864014803, "grad_norm": 0.046428218483924866, "learning_rate": 0.01, "loss": 2.0177, "step": 28368 }, { "epoch": 2.916127042861548, "grad_norm": 0.036403872072696686, "learning_rate": 0.01, "loss": 2.0144, "step": 28371 }, { "epoch": 2.9164353993216157, "grad_norm": 0.04359270632266998, "learning_rate": 0.01, "loss": 2.0386, "step": 28374 }, { "epoch": 2.9167437557816838, "grad_norm": 0.06155150756239891, "learning_rate": 0.01, "loss": 2.0014, "step": 28377 }, { "epoch": 2.9170521122417514, "grad_norm": 0.035659730434417725, "learning_rate": 0.01, "loss": 2.012, "step": 28380 }, { "epoch": 2.917360468701819, "grad_norm": 0.09453250467777252, "learning_rate": 0.01, "loss": 2.0006, "step": 28383 }, { "epoch": 2.917668825161887, "grad_norm": 0.062337253242731094, "learning_rate": 0.01, "loss": 2.0131, "step": 28386 }, { "epoch": 2.917977181621955, "grad_norm": 0.05488646402955055, "learning_rate": 0.01, "loss": 2.0212, "step": 28389 }, { "epoch": 2.918285538082023, "grad_norm": 0.07092556357383728, "learning_rate": 0.01, "loss": 2.0123, "step": 28392 }, { "epoch": 2.9185938945420906, "grad_norm": 0.10335639864206314, "learning_rate": 0.01, "loss": 2.0336, "step": 28395 }, { "epoch": 2.9189022510021587, "grad_norm": 0.1187339797616005, "learning_rate": 0.01, "loss": 2.0218, "step": 28398 }, { "epoch": 2.9192106074622264, "grad_norm": 0.09254027903079987, "learning_rate": 0.01, "loss": 2.0077, "step": 28401 }, { "epoch": 2.919518963922294, "grad_norm": 0.04120078682899475, "learning_rate": 0.01, "loss": 2.032, "step": 28404 }, { "epoch": 2.919827320382362, "grad_norm": 0.04366682097315788, "learning_rate": 0.01, "loss": 2.0128, "step": 28407 }, { "epoch": 2.92013567684243, "grad_norm": 0.03700088709592819, "learning_rate": 0.01, "loss": 2.0027, "step": 28410 }, { "epoch": 2.9204440333024975, "grad_norm": 0.03409232571721077, "learning_rate": 0.01, "loss": 2.0068, "step": 28413 }, { "epoch": 2.9207523897625656, "grad_norm": 0.04503092169761658, "learning_rate": 0.01, "loss": 2.0042, "step": 28416 }, { "epoch": 2.9210607462226332, "grad_norm": 0.04988139122724533, "learning_rate": 0.01, "loss": 2.0418, "step": 28419 }, { "epoch": 2.9213691026827013, "grad_norm": 0.05121064558625221, "learning_rate": 0.01, "loss": 2.0241, "step": 28422 }, { "epoch": 2.921677459142769, "grad_norm": 0.03916258364915848, "learning_rate": 0.01, "loss": 2.0461, "step": 28425 }, { "epoch": 2.921985815602837, "grad_norm": 0.1349031776189804, "learning_rate": 0.01, "loss": 2.0186, "step": 28428 }, { "epoch": 2.9222941720629048, "grad_norm": 0.12042077630758286, "learning_rate": 0.01, "loss": 2.0378, "step": 28431 }, { "epoch": 2.9226025285229724, "grad_norm": 0.08157286792993546, "learning_rate": 0.01, "loss": 1.9957, "step": 28434 }, { "epoch": 2.9229108849830405, "grad_norm": 0.07750184834003448, "learning_rate": 0.01, "loss": 2.0193, "step": 28437 }, { "epoch": 2.923219241443108, "grad_norm": 0.07270103693008423, "learning_rate": 0.01, "loss": 2.015, "step": 28440 }, { "epoch": 2.923527597903176, "grad_norm": 0.05140276625752449, "learning_rate": 0.01, "loss": 1.9987, "step": 28443 }, { "epoch": 2.923835954363244, "grad_norm": 0.07426592707633972, "learning_rate": 0.01, "loss": 2.031, "step": 28446 }, { "epoch": 2.9241443108233116, "grad_norm": 0.05136018618941307, "learning_rate": 0.01, "loss": 2.0166, "step": 28449 }, { "epoch": 2.9244526672833797, "grad_norm": 0.03354816511273384, "learning_rate": 0.01, "loss": 1.9985, "step": 28452 }, { "epoch": 2.9247610237434474, "grad_norm": 0.03671132028102875, "learning_rate": 0.01, "loss": 1.9954, "step": 28455 }, { "epoch": 2.9250693802035155, "grad_norm": 0.05378049612045288, "learning_rate": 0.01, "loss": 2.0276, "step": 28458 }, { "epoch": 2.925377736663583, "grad_norm": 0.04772641509771347, "learning_rate": 0.01, "loss": 2.0113, "step": 28461 }, { "epoch": 2.925686093123651, "grad_norm": 0.05942932888865471, "learning_rate": 0.01, "loss": 2.0223, "step": 28464 }, { "epoch": 2.925994449583719, "grad_norm": 0.059020113199949265, "learning_rate": 0.01, "loss": 2.0089, "step": 28467 }, { "epoch": 2.9263028060437866, "grad_norm": 0.19283726811408997, "learning_rate": 0.01, "loss": 2.0302, "step": 28470 }, { "epoch": 2.9266111625038542, "grad_norm": 0.09231256693601608, "learning_rate": 0.01, "loss": 1.9915, "step": 28473 }, { "epoch": 2.9269195189639223, "grad_norm": 0.058674681931734085, "learning_rate": 0.01, "loss": 2.0095, "step": 28476 }, { "epoch": 2.92722787542399, "grad_norm": 0.05014891177415848, "learning_rate": 0.01, "loss": 2.0197, "step": 28479 }, { "epoch": 2.927536231884058, "grad_norm": 0.060597196221351624, "learning_rate": 0.01, "loss": 1.9932, "step": 28482 }, { "epoch": 2.9278445883441258, "grad_norm": 0.08379890769720078, "learning_rate": 0.01, "loss": 2.027, "step": 28485 }, { "epoch": 2.928152944804194, "grad_norm": 0.04832527041435242, "learning_rate": 0.01, "loss": 1.9973, "step": 28488 }, { "epoch": 2.9284613012642615, "grad_norm": 0.06709366291761398, "learning_rate": 0.01, "loss": 2.0287, "step": 28491 }, { "epoch": 2.928769657724329, "grad_norm": 0.044588811695575714, "learning_rate": 0.01, "loss": 2.0253, "step": 28494 }, { "epoch": 2.9290780141843973, "grad_norm": 0.05735667049884796, "learning_rate": 0.01, "loss": 1.9972, "step": 28497 }, { "epoch": 2.929386370644465, "grad_norm": 0.041256386786699295, "learning_rate": 0.01, "loss": 2.0109, "step": 28500 }, { "epoch": 2.9296947271045326, "grad_norm": 0.03460060432553291, "learning_rate": 0.01, "loss": 2.0028, "step": 28503 }, { "epoch": 2.9300030835646007, "grad_norm": 0.05080440267920494, "learning_rate": 0.01, "loss": 2.0186, "step": 28506 }, { "epoch": 2.9303114400246684, "grad_norm": 0.1281091421842575, "learning_rate": 0.01, "loss": 2.0219, "step": 28509 }, { "epoch": 2.9306197964847365, "grad_norm": 0.05667036399245262, "learning_rate": 0.01, "loss": 2.0034, "step": 28512 }, { "epoch": 2.930928152944804, "grad_norm": 0.10026687383651733, "learning_rate": 0.01, "loss": 2.0082, "step": 28515 }, { "epoch": 2.9312365094048722, "grad_norm": 0.04924483224749565, "learning_rate": 0.01, "loss": 2.0034, "step": 28518 }, { "epoch": 2.93154486586494, "grad_norm": 0.04905860871076584, "learning_rate": 0.01, "loss": 2.0125, "step": 28521 }, { "epoch": 2.9318532223250076, "grad_norm": 0.0525185652077198, "learning_rate": 0.01, "loss": 2.0193, "step": 28524 }, { "epoch": 2.9321615787850757, "grad_norm": 0.04196888953447342, "learning_rate": 0.01, "loss": 2.0167, "step": 28527 }, { "epoch": 2.9324699352451433, "grad_norm": 0.04660086706280708, "learning_rate": 0.01, "loss": 1.996, "step": 28530 }, { "epoch": 2.932778291705211, "grad_norm": 0.05741078406572342, "learning_rate": 0.01, "loss": 2.0093, "step": 28533 }, { "epoch": 2.933086648165279, "grad_norm": 0.052953120321035385, "learning_rate": 0.01, "loss": 2.0111, "step": 28536 }, { "epoch": 2.9333950046253467, "grad_norm": 0.04325610026717186, "learning_rate": 0.01, "loss": 2.0252, "step": 28539 }, { "epoch": 2.933703361085415, "grad_norm": 0.04387751594185829, "learning_rate": 0.01, "loss": 2.0052, "step": 28542 }, { "epoch": 2.9340117175454825, "grad_norm": 0.03476174548268318, "learning_rate": 0.01, "loss": 2.0143, "step": 28545 }, { "epoch": 2.9343200740055506, "grad_norm": 0.03839149698615074, "learning_rate": 0.01, "loss": 2.0289, "step": 28548 }, { "epoch": 2.9346284304656183, "grad_norm": 0.0841054692864418, "learning_rate": 0.01, "loss": 1.9642, "step": 28551 }, { "epoch": 2.934936786925686, "grad_norm": 0.10850472748279572, "learning_rate": 0.01, "loss": 2.0345, "step": 28554 }, { "epoch": 2.935245143385754, "grad_norm": 0.12255658954381943, "learning_rate": 0.01, "loss": 1.9961, "step": 28557 }, { "epoch": 2.9355534998458217, "grad_norm": 0.05664276331663132, "learning_rate": 0.01, "loss": 2.0202, "step": 28560 }, { "epoch": 2.9358618563058894, "grad_norm": 0.11724577099084854, "learning_rate": 0.01, "loss": 2.027, "step": 28563 }, { "epoch": 2.9361702127659575, "grad_norm": 0.06823346763849258, "learning_rate": 0.01, "loss": 2.0104, "step": 28566 }, { "epoch": 2.936478569226025, "grad_norm": 0.035576723515987396, "learning_rate": 0.01, "loss": 2.0302, "step": 28569 }, { "epoch": 2.9367869256860932, "grad_norm": 0.03186320886015892, "learning_rate": 0.01, "loss": 2.0181, "step": 28572 }, { "epoch": 2.937095282146161, "grad_norm": 0.03750181198120117, "learning_rate": 0.01, "loss": 2.0096, "step": 28575 }, { "epoch": 2.937403638606229, "grad_norm": 0.05272996798157692, "learning_rate": 0.01, "loss": 2.0021, "step": 28578 }, { "epoch": 2.9377119950662967, "grad_norm": 0.033289846032857895, "learning_rate": 0.01, "loss": 2.0055, "step": 28581 }, { "epoch": 2.9380203515263643, "grad_norm": 0.11037155240774155, "learning_rate": 0.01, "loss": 2.0271, "step": 28584 }, { "epoch": 2.9383287079864324, "grad_norm": 0.10620691627264023, "learning_rate": 0.01, "loss": 2.0012, "step": 28587 }, { "epoch": 2.9386370644465, "grad_norm": 0.052820686250925064, "learning_rate": 0.01, "loss": 2.009, "step": 28590 }, { "epoch": 2.9389454209065677, "grad_norm": 0.057419613003730774, "learning_rate": 0.01, "loss": 2.0122, "step": 28593 }, { "epoch": 2.939253777366636, "grad_norm": 0.04066954553127289, "learning_rate": 0.01, "loss": 2.0043, "step": 28596 }, { "epoch": 2.9395621338267035, "grad_norm": 0.04638439416885376, "learning_rate": 0.01, "loss": 2.0244, "step": 28599 }, { "epoch": 2.9398704902867716, "grad_norm": 0.11629427969455719, "learning_rate": 0.01, "loss": 1.9867, "step": 28602 }, { "epoch": 2.9401788467468393, "grad_norm": 0.05921720713376999, "learning_rate": 0.01, "loss": 2.0155, "step": 28605 }, { "epoch": 2.9404872032069074, "grad_norm": 0.05282594636082649, "learning_rate": 0.01, "loss": 2.0138, "step": 28608 }, { "epoch": 2.940795559666975, "grad_norm": 0.04680659994482994, "learning_rate": 0.01, "loss": 2.011, "step": 28611 }, { "epoch": 2.9411039161270427, "grad_norm": 0.11889224499464035, "learning_rate": 0.01, "loss": 2.0242, "step": 28614 }, { "epoch": 2.941412272587111, "grad_norm": 0.03500881418585777, "learning_rate": 0.01, "loss": 1.9972, "step": 28617 }, { "epoch": 2.9417206290471785, "grad_norm": 0.08639416098594666, "learning_rate": 0.01, "loss": 2.0108, "step": 28620 }, { "epoch": 2.942028985507246, "grad_norm": 0.03335639461874962, "learning_rate": 0.01, "loss": 2.0262, "step": 28623 }, { "epoch": 2.9423373419673142, "grad_norm": 0.045041028410196304, "learning_rate": 0.01, "loss": 2.0102, "step": 28626 }, { "epoch": 2.9426456984273823, "grad_norm": 0.047020211815834045, "learning_rate": 0.01, "loss": 2.0278, "step": 28629 }, { "epoch": 2.94295405488745, "grad_norm": 0.059858404099941254, "learning_rate": 0.01, "loss": 2.0038, "step": 28632 }, { "epoch": 2.9432624113475176, "grad_norm": 0.053345970809459686, "learning_rate": 0.01, "loss": 2.0135, "step": 28635 }, { "epoch": 2.9435707678075858, "grad_norm": 0.05528027564287186, "learning_rate": 0.01, "loss": 2.0044, "step": 28638 }, { "epoch": 2.9438791242676534, "grad_norm": 0.09895586222410202, "learning_rate": 0.01, "loss": 2.0117, "step": 28641 }, { "epoch": 2.944187480727721, "grad_norm": 0.11827319115400314, "learning_rate": 0.01, "loss": 1.9899, "step": 28644 }, { "epoch": 2.944495837187789, "grad_norm": 0.05013230815529823, "learning_rate": 0.01, "loss": 2.0146, "step": 28647 }, { "epoch": 2.944804193647857, "grad_norm": 0.0422658771276474, "learning_rate": 0.01, "loss": 2.0064, "step": 28650 }, { "epoch": 2.9451125501079245, "grad_norm": 0.04022778570652008, "learning_rate": 0.01, "loss": 1.9972, "step": 28653 }, { "epoch": 2.9454209065679926, "grad_norm": 0.043504439294338226, "learning_rate": 0.01, "loss": 2.0138, "step": 28656 }, { "epoch": 2.9457292630280607, "grad_norm": 0.06587618589401245, "learning_rate": 0.01, "loss": 2.0084, "step": 28659 }, { "epoch": 2.9460376194881284, "grad_norm": 0.10226722061634064, "learning_rate": 0.01, "loss": 2.0073, "step": 28662 }, { "epoch": 2.946345975948196, "grad_norm": 0.047040197998285294, "learning_rate": 0.01, "loss": 2.0082, "step": 28665 }, { "epoch": 2.946654332408264, "grad_norm": 0.0919933021068573, "learning_rate": 0.01, "loss": 2.0079, "step": 28668 }, { "epoch": 2.946962688868332, "grad_norm": 0.05978929251432419, "learning_rate": 0.01, "loss": 2.0271, "step": 28671 }, { "epoch": 2.9472710453283995, "grad_norm": 0.05814214050769806, "learning_rate": 0.01, "loss": 2.0224, "step": 28674 }, { "epoch": 2.9475794017884676, "grad_norm": 0.09039480984210968, "learning_rate": 0.01, "loss": 2.0026, "step": 28677 }, { "epoch": 2.947887758248535, "grad_norm": 0.1607222557067871, "learning_rate": 0.01, "loss": 2.0201, "step": 28680 }, { "epoch": 2.948196114708603, "grad_norm": 0.04198214039206505, "learning_rate": 0.01, "loss": 2.0157, "step": 28683 }, { "epoch": 2.948504471168671, "grad_norm": 0.06442588567733765, "learning_rate": 0.01, "loss": 1.9874, "step": 28686 }, { "epoch": 2.948812827628739, "grad_norm": 0.11082901805639267, "learning_rate": 0.01, "loss": 2.0165, "step": 28689 }, { "epoch": 2.9491211840888067, "grad_norm": 0.03669416159391403, "learning_rate": 0.01, "loss": 2.0311, "step": 28692 }, { "epoch": 2.9494295405488744, "grad_norm": 0.06837104260921478, "learning_rate": 0.01, "loss": 2.0052, "step": 28695 }, { "epoch": 2.9497378970089425, "grad_norm": 0.0466892383992672, "learning_rate": 0.01, "loss": 1.9896, "step": 28698 }, { "epoch": 2.95004625346901, "grad_norm": 0.06962305307388306, "learning_rate": 0.01, "loss": 2.023, "step": 28701 }, { "epoch": 2.950354609929078, "grad_norm": 0.06136500835418701, "learning_rate": 0.01, "loss": 2.0189, "step": 28704 }, { "epoch": 2.950662966389146, "grad_norm": 0.050067611038684845, "learning_rate": 0.01, "loss": 2.0258, "step": 28707 }, { "epoch": 2.9509713228492136, "grad_norm": 0.0527566559612751, "learning_rate": 0.01, "loss": 2.0157, "step": 28710 }, { "epoch": 2.9512796793092813, "grad_norm": 0.12275776267051697, "learning_rate": 0.01, "loss": 2.0061, "step": 28713 }, { "epoch": 2.9515880357693494, "grad_norm": 0.052756138145923615, "learning_rate": 0.01, "loss": 2.0235, "step": 28716 }, { "epoch": 2.9518963922294175, "grad_norm": 0.038834694772958755, "learning_rate": 0.01, "loss": 1.9956, "step": 28719 }, { "epoch": 2.952204748689485, "grad_norm": 0.07736565917730331, "learning_rate": 0.01, "loss": 2.0185, "step": 28722 }, { "epoch": 2.952513105149553, "grad_norm": 0.10177972167730331, "learning_rate": 0.01, "loss": 2.017, "step": 28725 }, { "epoch": 2.952821461609621, "grad_norm": 0.082985520362854, "learning_rate": 0.01, "loss": 1.9967, "step": 28728 }, { "epoch": 2.9531298180696886, "grad_norm": 0.05385474115610123, "learning_rate": 0.01, "loss": 2.0239, "step": 28731 }, { "epoch": 2.953438174529756, "grad_norm": 0.0803777202963829, "learning_rate": 0.01, "loss": 2.0155, "step": 28734 }, { "epoch": 2.9537465309898243, "grad_norm": 0.06875913590192795, "learning_rate": 0.01, "loss": 2.0237, "step": 28737 }, { "epoch": 2.954054887449892, "grad_norm": 0.09149770438671112, "learning_rate": 0.01, "loss": 2.0153, "step": 28740 }, { "epoch": 2.95436324390996, "grad_norm": 0.1182807981967926, "learning_rate": 0.01, "loss": 2.0117, "step": 28743 }, { "epoch": 2.9546716003700277, "grad_norm": 0.044717706739902496, "learning_rate": 0.01, "loss": 2.0233, "step": 28746 }, { "epoch": 2.954979956830096, "grad_norm": 0.04887622967362404, "learning_rate": 0.01, "loss": 2.0232, "step": 28749 }, { "epoch": 2.9552883132901635, "grad_norm": 0.041407834738492966, "learning_rate": 0.01, "loss": 1.9945, "step": 28752 }, { "epoch": 2.955596669750231, "grad_norm": 0.05831639841198921, "learning_rate": 0.01, "loss": 2.0485, "step": 28755 }, { "epoch": 2.9559050262102993, "grad_norm": 0.08279699832201004, "learning_rate": 0.01, "loss": 2.0275, "step": 28758 }, { "epoch": 2.956213382670367, "grad_norm": 0.08988698571920395, "learning_rate": 0.01, "loss": 1.9966, "step": 28761 }, { "epoch": 2.9565217391304346, "grad_norm": 0.0645131915807724, "learning_rate": 0.01, "loss": 2.0001, "step": 28764 }, { "epoch": 2.9568300955905027, "grad_norm": 0.08262491226196289, "learning_rate": 0.01, "loss": 1.999, "step": 28767 }, { "epoch": 2.9571384520505704, "grad_norm": 0.03773394227027893, "learning_rate": 0.01, "loss": 2.0108, "step": 28770 }, { "epoch": 2.9574468085106385, "grad_norm": 0.08012068271636963, "learning_rate": 0.01, "loss": 2.0032, "step": 28773 }, { "epoch": 2.957755164970706, "grad_norm": 0.04637681320309639, "learning_rate": 0.01, "loss": 2.0324, "step": 28776 }, { "epoch": 2.9580635214307742, "grad_norm": 0.031988177448511124, "learning_rate": 0.01, "loss": 1.9966, "step": 28779 }, { "epoch": 2.958371877890842, "grad_norm": 0.08325552940368652, "learning_rate": 0.01, "loss": 1.9993, "step": 28782 }, { "epoch": 2.9586802343509095, "grad_norm": 0.06131797283887863, "learning_rate": 0.01, "loss": 1.9963, "step": 28785 }, { "epoch": 2.9589885908109776, "grad_norm": 0.04146185144782066, "learning_rate": 0.01, "loss": 2.0353, "step": 28788 }, { "epoch": 2.9592969472710453, "grad_norm": 0.04004296660423279, "learning_rate": 0.01, "loss": 2.0113, "step": 28791 }, { "epoch": 2.959605303731113, "grad_norm": 0.07771418243646622, "learning_rate": 0.01, "loss": 2.025, "step": 28794 }, { "epoch": 2.959913660191181, "grad_norm": 0.048570699989795685, "learning_rate": 0.01, "loss": 2.0013, "step": 28797 }, { "epoch": 2.9602220166512487, "grad_norm": 0.0664374902844429, "learning_rate": 0.01, "loss": 2.0279, "step": 28800 }, { "epoch": 2.960530373111317, "grad_norm": 0.09744110703468323, "learning_rate": 0.01, "loss": 2.0058, "step": 28803 }, { "epoch": 2.9608387295713845, "grad_norm": 0.03430997580289841, "learning_rate": 0.01, "loss": 1.9996, "step": 28806 }, { "epoch": 2.9611470860314526, "grad_norm": 0.036999545991420746, "learning_rate": 0.01, "loss": 2.0146, "step": 28809 }, { "epoch": 2.9614554424915203, "grad_norm": 0.05290836840867996, "learning_rate": 0.01, "loss": 2.011, "step": 28812 }, { "epoch": 2.961763798951588, "grad_norm": 0.061555683612823486, "learning_rate": 0.01, "loss": 2.0145, "step": 28815 }, { "epoch": 2.962072155411656, "grad_norm": 0.06276807934045792, "learning_rate": 0.01, "loss": 2.0218, "step": 28818 }, { "epoch": 2.9623805118717237, "grad_norm": 0.05685890465974808, "learning_rate": 0.01, "loss": 1.9963, "step": 28821 }, { "epoch": 2.9626888683317913, "grad_norm": 0.09975235909223557, "learning_rate": 0.01, "loss": 2.0082, "step": 28824 }, { "epoch": 2.9629972247918595, "grad_norm": 0.04205232113599777, "learning_rate": 0.01, "loss": 2.0383, "step": 28827 }, { "epoch": 2.963305581251927, "grad_norm": 0.07952512800693512, "learning_rate": 0.01, "loss": 2.0059, "step": 28830 }, { "epoch": 2.963613937711995, "grad_norm": 0.04118579626083374, "learning_rate": 0.01, "loss": 1.9727, "step": 28833 }, { "epoch": 2.963922294172063, "grad_norm": 0.10875571519136429, "learning_rate": 0.01, "loss": 2.0153, "step": 28836 }, { "epoch": 2.964230650632131, "grad_norm": 0.16663865745067596, "learning_rate": 0.01, "loss": 2.001, "step": 28839 }, { "epoch": 2.9645390070921986, "grad_norm": 0.08370403200387955, "learning_rate": 0.01, "loss": 2.0158, "step": 28842 }, { "epoch": 2.9648473635522663, "grad_norm": 0.07209211587905884, "learning_rate": 0.01, "loss": 2.0271, "step": 28845 }, { "epoch": 2.9651557200123344, "grad_norm": 0.05848775431513786, "learning_rate": 0.01, "loss": 2.013, "step": 28848 }, { "epoch": 2.965464076472402, "grad_norm": 0.02791527472436428, "learning_rate": 0.01, "loss": 2.0331, "step": 28851 }, { "epoch": 2.9657724329324697, "grad_norm": 0.03520442545413971, "learning_rate": 0.01, "loss": 2.0346, "step": 28854 }, { "epoch": 2.966080789392538, "grad_norm": 0.03414986655116081, "learning_rate": 0.01, "loss": 2.0067, "step": 28857 }, { "epoch": 2.9663891458526055, "grad_norm": 0.0448157899081707, "learning_rate": 0.01, "loss": 2.0137, "step": 28860 }, { "epoch": 2.9666975023126736, "grad_norm": 0.09282497316598892, "learning_rate": 0.01, "loss": 2.0284, "step": 28863 }, { "epoch": 2.9670058587727413, "grad_norm": 0.05999337136745453, "learning_rate": 0.01, "loss": 2.0256, "step": 28866 }, { "epoch": 2.9673142152328094, "grad_norm": 0.038877278566360474, "learning_rate": 0.01, "loss": 1.9988, "step": 28869 }, { "epoch": 2.967622571692877, "grad_norm": 0.04760310798883438, "learning_rate": 0.01, "loss": 2.016, "step": 28872 }, { "epoch": 2.9679309281529447, "grad_norm": 0.049317944794893265, "learning_rate": 0.01, "loss": 1.9993, "step": 28875 }, { "epoch": 2.968239284613013, "grad_norm": 0.038830939680337906, "learning_rate": 0.01, "loss": 1.9969, "step": 28878 }, { "epoch": 2.9685476410730804, "grad_norm": 0.18675923347473145, "learning_rate": 0.01, "loss": 2.0217, "step": 28881 }, { "epoch": 2.968855997533148, "grad_norm": 0.04269779101014137, "learning_rate": 0.01, "loss": 2.0163, "step": 28884 }, { "epoch": 2.969164353993216, "grad_norm": 0.04740308225154877, "learning_rate": 0.01, "loss": 2.0184, "step": 28887 }, { "epoch": 2.969472710453284, "grad_norm": 0.058595623821020126, "learning_rate": 0.01, "loss": 2.0369, "step": 28890 }, { "epoch": 2.969781066913352, "grad_norm": 0.06136814132332802, "learning_rate": 0.01, "loss": 2.0229, "step": 28893 }, { "epoch": 2.9700894233734196, "grad_norm": 0.059679534286260605, "learning_rate": 0.01, "loss": 2.0151, "step": 28896 }, { "epoch": 2.9703977798334877, "grad_norm": 0.044182952493429184, "learning_rate": 0.01, "loss": 1.9977, "step": 28899 }, { "epoch": 2.9707061362935554, "grad_norm": 0.04152587056159973, "learning_rate": 0.01, "loss": 2.01, "step": 28902 }, { "epoch": 2.971014492753623, "grad_norm": 0.0424608550965786, "learning_rate": 0.01, "loss": 2.0228, "step": 28905 }, { "epoch": 2.971322849213691, "grad_norm": 0.040096018463373184, "learning_rate": 0.01, "loss": 1.9976, "step": 28908 }, { "epoch": 2.971631205673759, "grad_norm": 0.03288499265909195, "learning_rate": 0.01, "loss": 2.0416, "step": 28911 }, { "epoch": 2.9719395621338265, "grad_norm": 0.05150043964385986, "learning_rate": 0.01, "loss": 2.0, "step": 28914 }, { "epoch": 2.9722479185938946, "grad_norm": 0.05535360425710678, "learning_rate": 0.01, "loss": 2.0215, "step": 28917 }, { "epoch": 2.9725562750539622, "grad_norm": 0.09103085100650787, "learning_rate": 0.01, "loss": 2.0227, "step": 28920 }, { "epoch": 2.9728646315140304, "grad_norm": 0.060518983751535416, "learning_rate": 0.01, "loss": 1.9942, "step": 28923 }, { "epoch": 2.973172987974098, "grad_norm": 0.15921367704868317, "learning_rate": 0.01, "loss": 2.0203, "step": 28926 }, { "epoch": 2.973481344434166, "grad_norm": 0.04451125115156174, "learning_rate": 0.01, "loss": 2.0057, "step": 28929 }, { "epoch": 2.973789700894234, "grad_norm": 0.06099553778767586, "learning_rate": 0.01, "loss": 2.0317, "step": 28932 }, { "epoch": 2.9740980573543014, "grad_norm": 0.07864733040332794, "learning_rate": 0.01, "loss": 2.0392, "step": 28935 }, { "epoch": 2.9744064138143695, "grad_norm": 0.04279434680938721, "learning_rate": 0.01, "loss": 2.0189, "step": 28938 }, { "epoch": 2.974714770274437, "grad_norm": 0.06299113482236862, "learning_rate": 0.01, "loss": 2.0379, "step": 28941 }, { "epoch": 2.975023126734505, "grad_norm": 0.04803795740008354, "learning_rate": 0.01, "loss": 2.0161, "step": 28944 }, { "epoch": 2.975331483194573, "grad_norm": 0.04646730050444603, "learning_rate": 0.01, "loss": 2.026, "step": 28947 }, { "epoch": 2.9756398396546406, "grad_norm": 0.05352159962058067, "learning_rate": 0.01, "loss": 2.0114, "step": 28950 }, { "epoch": 2.9759481961147087, "grad_norm": 0.07281997799873352, "learning_rate": 0.01, "loss": 2.0328, "step": 28953 }, { "epoch": 2.9762565525747764, "grad_norm": 0.050049133598804474, "learning_rate": 0.01, "loss": 1.9943, "step": 28956 }, { "epoch": 2.9765649090348445, "grad_norm": 0.039795007556676865, "learning_rate": 0.01, "loss": 2.0128, "step": 28959 }, { "epoch": 2.976873265494912, "grad_norm": 0.09085065871477127, "learning_rate": 0.01, "loss": 2.005, "step": 28962 }, { "epoch": 2.97718162195498, "grad_norm": 0.1592961698770523, "learning_rate": 0.01, "loss": 2.0373, "step": 28965 }, { "epoch": 2.977489978415048, "grad_norm": 0.10092988610267639, "learning_rate": 0.01, "loss": 2.0009, "step": 28968 }, { "epoch": 2.9777983348751156, "grad_norm": 0.046051934361457825, "learning_rate": 0.01, "loss": 1.9942, "step": 28971 }, { "epoch": 2.9781066913351832, "grad_norm": 0.04267173632979393, "learning_rate": 0.01, "loss": 1.9997, "step": 28974 }, { "epoch": 2.9784150477952513, "grad_norm": 0.032134927809238434, "learning_rate": 0.01, "loss": 1.9694, "step": 28977 }, { "epoch": 2.978723404255319, "grad_norm": 0.055023569613695145, "learning_rate": 0.01, "loss": 2.0202, "step": 28980 }, { "epoch": 2.979031760715387, "grad_norm": 0.07177849858999252, "learning_rate": 0.01, "loss": 2.0287, "step": 28983 }, { "epoch": 2.9793401171754548, "grad_norm": 0.05890415981411934, "learning_rate": 0.01, "loss": 2.0226, "step": 28986 }, { "epoch": 2.979648473635523, "grad_norm": 0.04443821310997009, "learning_rate": 0.01, "loss": 2.042, "step": 28989 }, { "epoch": 2.9799568300955905, "grad_norm": 0.04153164103627205, "learning_rate": 0.01, "loss": 2.0111, "step": 28992 }, { "epoch": 2.980265186555658, "grad_norm": 0.07844668626785278, "learning_rate": 0.01, "loss": 2.0281, "step": 28995 }, { "epoch": 2.9805735430157263, "grad_norm": 0.09027374535799026, "learning_rate": 0.01, "loss": 2.0325, "step": 28998 }, { "epoch": 2.980881899475794, "grad_norm": 0.04559837654232979, "learning_rate": 0.01, "loss": 1.9926, "step": 29001 }, { "epoch": 2.9811902559358616, "grad_norm": 0.03771069645881653, "learning_rate": 0.01, "loss": 1.9966, "step": 29004 }, { "epoch": 2.9814986123959297, "grad_norm": 0.09008541703224182, "learning_rate": 0.01, "loss": 2.0246, "step": 29007 }, { "epoch": 2.9818069688559974, "grad_norm": 0.060089852660894394, "learning_rate": 0.01, "loss": 2.0127, "step": 29010 }, { "epoch": 2.9821153253160655, "grad_norm": 0.11291385442018509, "learning_rate": 0.01, "loss": 2.0397, "step": 29013 }, { "epoch": 2.982423681776133, "grad_norm": 0.09720003604888916, "learning_rate": 0.01, "loss": 2.0277, "step": 29016 }, { "epoch": 2.9827320382362013, "grad_norm": 0.060459867119789124, "learning_rate": 0.01, "loss": 1.9914, "step": 29019 }, { "epoch": 2.983040394696269, "grad_norm": 0.07619535177946091, "learning_rate": 0.01, "loss": 1.9964, "step": 29022 }, { "epoch": 2.9833487511563366, "grad_norm": 0.05402089282870293, "learning_rate": 0.01, "loss": 1.9867, "step": 29025 }, { "epoch": 2.9836571076164047, "grad_norm": 0.05162883922457695, "learning_rate": 0.01, "loss": 2.0135, "step": 29028 }, { "epoch": 2.9839654640764723, "grad_norm": 0.037369467318058014, "learning_rate": 0.01, "loss": 2.0232, "step": 29031 }, { "epoch": 2.98427382053654, "grad_norm": 0.05483279377222061, "learning_rate": 0.01, "loss": 2.0208, "step": 29034 }, { "epoch": 2.984582176996608, "grad_norm": 0.050847407430410385, "learning_rate": 0.01, "loss": 2.0043, "step": 29037 }, { "epoch": 2.9848905334566758, "grad_norm": 0.07075628638267517, "learning_rate": 0.01, "loss": 2.0408, "step": 29040 }, { "epoch": 2.985198889916744, "grad_norm": 0.05580511689186096, "learning_rate": 0.01, "loss": 2.0142, "step": 29043 }, { "epoch": 2.9855072463768115, "grad_norm": 0.043689336627721786, "learning_rate": 0.01, "loss": 2.0118, "step": 29046 }, { "epoch": 2.9858156028368796, "grad_norm": 0.07577245682477951, "learning_rate": 0.01, "loss": 2.0287, "step": 29049 }, { "epoch": 2.9861239592969473, "grad_norm": 0.07917779684066772, "learning_rate": 0.01, "loss": 2.0148, "step": 29052 }, { "epoch": 2.986432315757015, "grad_norm": 0.07137954980134964, "learning_rate": 0.01, "loss": 2.0007, "step": 29055 }, { "epoch": 2.986740672217083, "grad_norm": 0.09875428676605225, "learning_rate": 0.01, "loss": 2.0242, "step": 29058 }, { "epoch": 2.9870490286771507, "grad_norm": 0.059274058789014816, "learning_rate": 0.01, "loss": 2.0212, "step": 29061 }, { "epoch": 2.9873573851372184, "grad_norm": 0.04235726222395897, "learning_rate": 0.01, "loss": 2.0178, "step": 29064 }, { "epoch": 2.9876657415972865, "grad_norm": 0.1677253097295761, "learning_rate": 0.01, "loss": 1.9921, "step": 29067 }, { "epoch": 2.987974098057354, "grad_norm": 0.05953408405184746, "learning_rate": 0.01, "loss": 2.0138, "step": 29070 }, { "epoch": 2.9882824545174222, "grad_norm": 0.031471315771341324, "learning_rate": 0.01, "loss": 1.9953, "step": 29073 }, { "epoch": 2.98859081097749, "grad_norm": 0.0490463487803936, "learning_rate": 0.01, "loss": 2.0193, "step": 29076 }, { "epoch": 2.988899167437558, "grad_norm": 0.0685456171631813, "learning_rate": 0.01, "loss": 2.0459, "step": 29079 }, { "epoch": 2.9892075238976257, "grad_norm": 0.04472583159804344, "learning_rate": 0.01, "loss": 1.9909, "step": 29082 }, { "epoch": 2.9895158803576933, "grad_norm": 0.03528788685798645, "learning_rate": 0.01, "loss": 2.0024, "step": 29085 }, { "epoch": 2.9898242368177614, "grad_norm": 0.04196497052907944, "learning_rate": 0.01, "loss": 2.0015, "step": 29088 }, { "epoch": 2.990132593277829, "grad_norm": 0.1347953975200653, "learning_rate": 0.01, "loss": 1.9983, "step": 29091 }, { "epoch": 2.9904409497378968, "grad_norm": 0.03483536094427109, "learning_rate": 0.01, "loss": 2.0038, "step": 29094 }, { "epoch": 2.990749306197965, "grad_norm": 0.04203316941857338, "learning_rate": 0.01, "loss": 2.0364, "step": 29097 }, { "epoch": 2.9910576626580325, "grad_norm": 0.039628706872463226, "learning_rate": 0.01, "loss": 2.0212, "step": 29100 }, { "epoch": 2.9913660191181006, "grad_norm": 0.04524881765246391, "learning_rate": 0.01, "loss": 2.0436, "step": 29103 }, { "epoch": 2.9916743755781683, "grad_norm": 0.0704786404967308, "learning_rate": 0.01, "loss": 2.0084, "step": 29106 }, { "epoch": 2.9919827320382364, "grad_norm": 0.06169109791517258, "learning_rate": 0.01, "loss": 1.9879, "step": 29109 }, { "epoch": 2.992291088498304, "grad_norm": 0.05929429456591606, "learning_rate": 0.01, "loss": 2.0341, "step": 29112 }, { "epoch": 2.9925994449583717, "grad_norm": 0.06046414375305176, "learning_rate": 0.01, "loss": 2.0212, "step": 29115 }, { "epoch": 2.99290780141844, "grad_norm": 0.03632686287164688, "learning_rate": 0.01, "loss": 2.0288, "step": 29118 }, { "epoch": 2.9932161578785075, "grad_norm": 0.0416223518550396, "learning_rate": 0.01, "loss": 1.9955, "step": 29121 }, { "epoch": 2.993524514338575, "grad_norm": 0.033993568271398544, "learning_rate": 0.01, "loss": 2.0373, "step": 29124 }, { "epoch": 2.9938328707986432, "grad_norm": 0.11413303017616272, "learning_rate": 0.01, "loss": 2.0219, "step": 29127 }, { "epoch": 2.9941412272587113, "grad_norm": 0.04512523114681244, "learning_rate": 0.01, "loss": 2.0197, "step": 29130 }, { "epoch": 2.994449583718779, "grad_norm": 0.08577805757522583, "learning_rate": 0.01, "loss": 2.0263, "step": 29133 }, { "epoch": 2.9947579401788467, "grad_norm": 0.09573300927877426, "learning_rate": 0.01, "loss": 2.0057, "step": 29136 }, { "epoch": 2.9950662966389148, "grad_norm": 0.04170147702097893, "learning_rate": 0.01, "loss": 2.0287, "step": 29139 }, { "epoch": 2.9953746530989824, "grad_norm": 0.04233024641871452, "learning_rate": 0.01, "loss": 2.0084, "step": 29142 }, { "epoch": 2.99568300955905, "grad_norm": 0.05406009405851364, "learning_rate": 0.01, "loss": 2.0249, "step": 29145 }, { "epoch": 2.995991366019118, "grad_norm": 0.037997808307409286, "learning_rate": 0.01, "loss": 2.0106, "step": 29148 }, { "epoch": 2.996299722479186, "grad_norm": 0.08010167628526688, "learning_rate": 0.01, "loss": 1.9899, "step": 29151 }, { "epoch": 2.9966080789392535, "grad_norm": 0.05372076854109764, "learning_rate": 0.01, "loss": 2.0406, "step": 29154 }, { "epoch": 2.9969164353993216, "grad_norm": 0.04186830669641495, "learning_rate": 0.01, "loss": 2.0047, "step": 29157 }, { "epoch": 2.9972247918593897, "grad_norm": 0.0803917944431305, "learning_rate": 0.01, "loss": 2.0106, "step": 29160 }, { "epoch": 2.9975331483194574, "grad_norm": 0.06086145341396332, "learning_rate": 0.01, "loss": 2.0016, "step": 29163 }, { "epoch": 2.997841504779525, "grad_norm": 0.1197366788983345, "learning_rate": 0.01, "loss": 2.0025, "step": 29166 }, { "epoch": 2.998149861239593, "grad_norm": 0.09424477070569992, "learning_rate": 0.01, "loss": 2.0032, "step": 29169 }, { "epoch": 2.998458217699661, "grad_norm": 0.05025864019989967, "learning_rate": 0.01, "loss": 2.0166, "step": 29172 }, { "epoch": 2.9987665741597285, "grad_norm": 0.0840819776058197, "learning_rate": 0.01, "loss": 1.9983, "step": 29175 }, { "epoch": 2.9990749306197966, "grad_norm": 0.03415597230195999, "learning_rate": 0.01, "loss": 2.0233, "step": 29178 }, { "epoch": 2.9993832870798642, "grad_norm": 0.09066252410411835, "learning_rate": 0.01, "loss": 2.0245, "step": 29181 }, { "epoch": 2.999691643539932, "grad_norm": 0.05019732564687729, "learning_rate": 0.01, "loss": 1.9893, "step": 29184 }, { "epoch": 3.0, "grad_norm": 0.09318925440311432, "learning_rate": 0.01, "loss": 2.0062, "step": 29187 }, { "epoch": 2.999383477188656, "grad_norm": 0.08568185567855835, "learning_rate": 0.01, "loss": 2.0409, "step": 29190 }, { "epoch": 2.9996917385943282, "grad_norm": 0.05479726567864418, "learning_rate": 0.01, "loss": 1.9992, "step": 29193 }, { "epoch": 3.0, "grad_norm": 0.0812709629535675, "learning_rate": 0.01, "loss": 2.0193, "step": 29196 }, { "epoch": 3.000308261405672, "grad_norm": 0.06423875689506531, "learning_rate": 0.01, "loss": 2.0037, "step": 29199 }, { "epoch": 3.000616522811344, "grad_norm": 0.06900475919246674, "learning_rate": 0.01, "loss": 2.0235, "step": 29202 }, { "epoch": 3.000924784217016, "grad_norm": 0.09736461192369461, "learning_rate": 0.01, "loss": 2.0269, "step": 29205 }, { "epoch": 3.001233045622688, "grad_norm": 0.10431299358606339, "learning_rate": 0.01, "loss": 2.0366, "step": 29208 }, { "epoch": 3.00154130702836, "grad_norm": 0.06412489712238312, "learning_rate": 0.01, "loss": 2.0336, "step": 29211 }, { "epoch": 3.0018495684340323, "grad_norm": 0.07305043190717697, "learning_rate": 0.01, "loss": 2.0355, "step": 29214 }, { "epoch": 3.002157829839704, "grad_norm": 0.07680006325244904, "learning_rate": 0.01, "loss": 2.031, "step": 29217 }, { "epoch": 3.0024660912453762, "grad_norm": 0.04416871443390846, "learning_rate": 0.01, "loss": 2.0228, "step": 29220 }, { "epoch": 3.002774352651048, "grad_norm": 0.05895330011844635, "learning_rate": 0.01, "loss": 2.0124, "step": 29223 }, { "epoch": 3.00308261405672, "grad_norm": 0.16763944923877716, "learning_rate": 0.01, "loss": 2.0521, "step": 29226 }, { "epoch": 3.003390875462392, "grad_norm": 0.04152580350637436, "learning_rate": 0.01, "loss": 2.0144, "step": 29229 }, { "epoch": 3.003699136868064, "grad_norm": 0.05650210753083229, "learning_rate": 0.01, "loss": 2.0373, "step": 29232 }, { "epoch": 3.0040073982737363, "grad_norm": 0.10183783620595932, "learning_rate": 0.01, "loss": 2.0374, "step": 29235 }, { "epoch": 3.004315659679408, "grad_norm": 0.11545984447002411, "learning_rate": 0.01, "loss": 2.0359, "step": 29238 }, { "epoch": 3.0046239210850803, "grad_norm": 0.07768990844488144, "learning_rate": 0.01, "loss": 2.0222, "step": 29241 }, { "epoch": 3.004932182490752, "grad_norm": 0.06256531924009323, "learning_rate": 0.01, "loss": 2.0332, "step": 29244 }, { "epoch": 3.005240443896424, "grad_norm": 0.041665416210889816, "learning_rate": 0.01, "loss": 2.0061, "step": 29247 }, { "epoch": 3.0055487053020964, "grad_norm": 0.051996082067489624, "learning_rate": 0.01, "loss": 2.0237, "step": 29250 }, { "epoch": 3.005856966707768, "grad_norm": 0.032815441489219666, "learning_rate": 0.01, "loss": 2.0203, "step": 29253 }, { "epoch": 3.0061652281134403, "grad_norm": 0.060963522642850876, "learning_rate": 0.01, "loss": 2.0293, "step": 29256 }, { "epoch": 3.006473489519112, "grad_norm": 0.11818858236074448, "learning_rate": 0.01, "loss": 2.0246, "step": 29259 }, { "epoch": 3.0067817509247843, "grad_norm": 0.08653881400823593, "learning_rate": 0.01, "loss": 2.0208, "step": 29262 }, { "epoch": 3.007090012330456, "grad_norm": 0.09629751741886139, "learning_rate": 0.01, "loss": 1.9929, "step": 29265 }, { "epoch": 3.0073982737361282, "grad_norm": 0.04238956794142723, "learning_rate": 0.01, "loss": 2.0112, "step": 29268 }, { "epoch": 3.0077065351418004, "grad_norm": 0.040573202073574066, "learning_rate": 0.01, "loss": 2.0159, "step": 29271 }, { "epoch": 3.008014796547472, "grad_norm": 0.08061878383159637, "learning_rate": 0.01, "loss": 2.0562, "step": 29274 }, { "epoch": 3.0083230579531444, "grad_norm": 0.06944199651479721, "learning_rate": 0.01, "loss": 2.0077, "step": 29277 }, { "epoch": 3.008631319358816, "grad_norm": 0.06629909574985504, "learning_rate": 0.01, "loss": 1.9934, "step": 29280 }, { "epoch": 3.0089395807644883, "grad_norm": 0.06654530018568039, "learning_rate": 0.01, "loss": 2.0018, "step": 29283 }, { "epoch": 3.0092478421701605, "grad_norm": 0.08806800842285156, "learning_rate": 0.01, "loss": 2.0033, "step": 29286 }, { "epoch": 3.0095561035758323, "grad_norm": 0.12367472797632217, "learning_rate": 0.01, "loss": 2.0382, "step": 29289 }, { "epoch": 3.0098643649815044, "grad_norm": 0.052121471613645554, "learning_rate": 0.01, "loss": 2.0278, "step": 29292 }, { "epoch": 3.010172626387176, "grad_norm": 0.0409090481698513, "learning_rate": 0.01, "loss": 2.0146, "step": 29295 }, { "epoch": 3.0104808877928484, "grad_norm": 0.09956279397010803, "learning_rate": 0.01, "loss": 2.0339, "step": 29298 }, { "epoch": 3.01078914919852, "grad_norm": 0.04090806096792221, "learning_rate": 0.01, "loss": 2.031, "step": 29301 }, { "epoch": 3.0110974106041923, "grad_norm": 0.06308458000421524, "learning_rate": 0.01, "loss": 2.0347, "step": 29304 }, { "epoch": 3.0114056720098645, "grad_norm": 0.05140318349003792, "learning_rate": 0.01, "loss": 2.0341, "step": 29307 }, { "epoch": 3.0117139334155363, "grad_norm": 0.0383441224694252, "learning_rate": 0.01, "loss": 2.0167, "step": 29310 }, { "epoch": 3.0120221948212085, "grad_norm": 0.06803669035434723, "learning_rate": 0.01, "loss": 2.0442, "step": 29313 }, { "epoch": 3.0123304562268802, "grad_norm": 0.042336028069257736, "learning_rate": 0.01, "loss": 2.018, "step": 29316 }, { "epoch": 3.0126387176325524, "grad_norm": 0.052575305104255676, "learning_rate": 0.01, "loss": 1.9956, "step": 29319 }, { "epoch": 3.0129469790382246, "grad_norm": 0.04428831860423088, "learning_rate": 0.01, "loss": 2.0225, "step": 29322 }, { "epoch": 3.0132552404438964, "grad_norm": 0.03720409423112869, "learning_rate": 0.01, "loss": 2.0135, "step": 29325 }, { "epoch": 3.0135635018495686, "grad_norm": 0.03491399809718132, "learning_rate": 0.01, "loss": 1.9993, "step": 29328 }, { "epoch": 3.0138717632552403, "grad_norm": 0.11868512630462646, "learning_rate": 0.01, "loss": 2.014, "step": 29331 }, { "epoch": 3.0141800246609125, "grad_norm": 0.05709204822778702, "learning_rate": 0.01, "loss": 2.0043, "step": 29334 }, { "epoch": 3.0144882860665843, "grad_norm": 0.09501231461763382, "learning_rate": 0.01, "loss": 2.0056, "step": 29337 }, { "epoch": 3.0147965474722564, "grad_norm": 0.04431547597050667, "learning_rate": 0.01, "loss": 2.0223, "step": 29340 }, { "epoch": 3.0151048088779286, "grad_norm": 0.07580556720495224, "learning_rate": 0.01, "loss": 2.0343, "step": 29343 }, { "epoch": 3.0154130702836004, "grad_norm": 0.05567536503076553, "learning_rate": 0.01, "loss": 2.0327, "step": 29346 }, { "epoch": 3.0157213316892726, "grad_norm": 0.03637940436601639, "learning_rate": 0.01, "loss": 2.0366, "step": 29349 }, { "epoch": 3.0160295930949443, "grad_norm": 0.07379139959812164, "learning_rate": 0.01, "loss": 2.0196, "step": 29352 }, { "epoch": 3.0163378545006165, "grad_norm": 0.08311998844146729, "learning_rate": 0.01, "loss": 2.0013, "step": 29355 }, { "epoch": 3.0166461159062887, "grad_norm": 0.11170487850904465, "learning_rate": 0.01, "loss": 2.0174, "step": 29358 }, { "epoch": 3.0169543773119605, "grad_norm": 0.055827848613262177, "learning_rate": 0.01, "loss": 2.0184, "step": 29361 }, { "epoch": 3.0172626387176327, "grad_norm": 0.06052641570568085, "learning_rate": 0.01, "loss": 2.0238, "step": 29364 }, { "epoch": 3.0175709001233044, "grad_norm": 0.03150554001331329, "learning_rate": 0.01, "loss": 2.024, "step": 29367 }, { "epoch": 3.0178791615289766, "grad_norm": 0.06298622488975525, "learning_rate": 0.01, "loss": 2.0132, "step": 29370 }, { "epoch": 3.0181874229346484, "grad_norm": 0.09742710739374161, "learning_rate": 0.01, "loss": 2.0411, "step": 29373 }, { "epoch": 3.0184956843403206, "grad_norm": 0.07219108939170837, "learning_rate": 0.01, "loss": 2.0113, "step": 29376 }, { "epoch": 3.0188039457459928, "grad_norm": 0.05558139458298683, "learning_rate": 0.01, "loss": 2.0075, "step": 29379 }, { "epoch": 3.0191122071516645, "grad_norm": 0.05763263627886772, "learning_rate": 0.01, "loss": 2.0433, "step": 29382 }, { "epoch": 3.0194204685573367, "grad_norm": 0.04832174628973007, "learning_rate": 0.01, "loss": 2.007, "step": 29385 }, { "epoch": 3.0197287299630085, "grad_norm": 0.03823337331414223, "learning_rate": 0.01, "loss": 2.0307, "step": 29388 }, { "epoch": 3.0200369913686806, "grad_norm": 0.05253903195261955, "learning_rate": 0.01, "loss": 2.0018, "step": 29391 }, { "epoch": 3.020345252774353, "grad_norm": 0.10889255255460739, "learning_rate": 0.01, "loss": 2.0106, "step": 29394 }, { "epoch": 3.0206535141800246, "grad_norm": 0.04247021675109863, "learning_rate": 0.01, "loss": 2.0466, "step": 29397 }, { "epoch": 3.020961775585697, "grad_norm": 0.09347319602966309, "learning_rate": 0.01, "loss": 2.037, "step": 29400 }, { "epoch": 3.0212700369913685, "grad_norm": 0.05651739612221718, "learning_rate": 0.01, "loss": 2.0385, "step": 29403 }, { "epoch": 3.0215782983970407, "grad_norm": 0.0666181743144989, "learning_rate": 0.01, "loss": 2.0274, "step": 29406 }, { "epoch": 3.021886559802713, "grad_norm": 0.053186848759651184, "learning_rate": 0.01, "loss": 2.0127, "step": 29409 }, { "epoch": 3.0221948212083847, "grad_norm": 0.05201537534594536, "learning_rate": 0.01, "loss": 2.0009, "step": 29412 }, { "epoch": 3.022503082614057, "grad_norm": 0.02726483717560768, "learning_rate": 0.01, "loss": 2.0135, "step": 29415 }, { "epoch": 3.0228113440197286, "grad_norm": 0.10047302395105362, "learning_rate": 0.01, "loss": 2.0336, "step": 29418 }, { "epoch": 3.023119605425401, "grad_norm": 0.05461571365594864, "learning_rate": 0.01, "loss": 2.0213, "step": 29421 }, { "epoch": 3.0234278668310726, "grad_norm": 0.08373844623565674, "learning_rate": 0.01, "loss": 2.0246, "step": 29424 }, { "epoch": 3.0237361282367448, "grad_norm": 0.045885663479566574, "learning_rate": 0.01, "loss": 2.044, "step": 29427 }, { "epoch": 3.024044389642417, "grad_norm": 0.054790932685136795, "learning_rate": 0.01, "loss": 2.0387, "step": 29430 }, { "epoch": 3.0243526510480887, "grad_norm": 0.04917832091450691, "learning_rate": 0.01, "loss": 2.025, "step": 29433 }, { "epoch": 3.024660912453761, "grad_norm": 0.09375031292438507, "learning_rate": 0.01, "loss": 2.0482, "step": 29436 }, { "epoch": 3.0249691738594326, "grad_norm": 0.051234230399131775, "learning_rate": 0.01, "loss": 2.0024, "step": 29439 }, { "epoch": 3.025277435265105, "grad_norm": 0.057380858808755875, "learning_rate": 0.01, "loss": 2.0011, "step": 29442 }, { "epoch": 3.025585696670777, "grad_norm": 0.060605574399232864, "learning_rate": 0.01, "loss": 2.0158, "step": 29445 }, { "epoch": 3.025893958076449, "grad_norm": 0.09125442057847977, "learning_rate": 0.01, "loss": 2.0284, "step": 29448 }, { "epoch": 3.026202219482121, "grad_norm": 0.04081615433096886, "learning_rate": 0.01, "loss": 2.0029, "step": 29451 }, { "epoch": 3.0265104808877927, "grad_norm": 0.05347365140914917, "learning_rate": 0.01, "loss": 2.0319, "step": 29454 }, { "epoch": 3.026818742293465, "grad_norm": 0.04458871856331825, "learning_rate": 0.01, "loss": 2.0035, "step": 29457 }, { "epoch": 3.0271270036991367, "grad_norm": 0.04996645078063011, "learning_rate": 0.01, "loss": 2.0046, "step": 29460 }, { "epoch": 3.027435265104809, "grad_norm": 0.051296427845954895, "learning_rate": 0.01, "loss": 2.0099, "step": 29463 }, { "epoch": 3.027743526510481, "grad_norm": 0.05448664352297783, "learning_rate": 0.01, "loss": 2.0082, "step": 29466 }, { "epoch": 3.028051787916153, "grad_norm": 0.0587022639811039, "learning_rate": 0.01, "loss": 2.0083, "step": 29469 }, { "epoch": 3.028360049321825, "grad_norm": 0.0711282268166542, "learning_rate": 0.01, "loss": 1.9997, "step": 29472 }, { "epoch": 3.0286683107274968, "grad_norm": 0.14440664649009705, "learning_rate": 0.01, "loss": 2.0525, "step": 29475 }, { "epoch": 3.028976572133169, "grad_norm": 0.06147081404924393, "learning_rate": 0.01, "loss": 1.9979, "step": 29478 }, { "epoch": 3.029284833538841, "grad_norm": 0.0842541828751564, "learning_rate": 0.01, "loss": 2.0075, "step": 29481 }, { "epoch": 3.029593094944513, "grad_norm": 0.04915475845336914, "learning_rate": 0.01, "loss": 2.029, "step": 29484 }, { "epoch": 3.029901356350185, "grad_norm": 0.04254012182354927, "learning_rate": 0.01, "loss": 2.0406, "step": 29487 }, { "epoch": 3.030209617755857, "grad_norm": 0.03716140240430832, "learning_rate": 0.01, "loss": 2.0477, "step": 29490 }, { "epoch": 3.030517879161529, "grad_norm": 0.09156777709722519, "learning_rate": 0.01, "loss": 2.0178, "step": 29493 }, { "epoch": 3.030826140567201, "grad_norm": 0.05401970446109772, "learning_rate": 0.01, "loss": 2.005, "step": 29496 }, { "epoch": 3.031134401972873, "grad_norm": 0.11564016342163086, "learning_rate": 0.01, "loss": 2.0344, "step": 29499 }, { "epoch": 3.031442663378545, "grad_norm": 0.12813927233219147, "learning_rate": 0.01, "loss": 2.0159, "step": 29502 }, { "epoch": 3.031750924784217, "grad_norm": 0.04971994087100029, "learning_rate": 0.01, "loss": 2.0122, "step": 29505 }, { "epoch": 3.032059186189889, "grad_norm": 0.037013307213783264, "learning_rate": 0.01, "loss": 2.0165, "step": 29508 }, { "epoch": 3.032367447595561, "grad_norm": 0.048204224556684494, "learning_rate": 0.01, "loss": 2.0635, "step": 29511 }, { "epoch": 3.032675709001233, "grad_norm": 0.034393493086099625, "learning_rate": 0.01, "loss": 2.0283, "step": 29514 }, { "epoch": 3.0329839704069053, "grad_norm": 0.041031621396541595, "learning_rate": 0.01, "loss": 2.0062, "step": 29517 }, { "epoch": 3.033292231812577, "grad_norm": 0.049610815942287445, "learning_rate": 0.01, "loss": 2.015, "step": 29520 }, { "epoch": 3.033600493218249, "grad_norm": 0.07062069326639175, "learning_rate": 0.01, "loss": 2.0216, "step": 29523 }, { "epoch": 3.033908754623921, "grad_norm": 0.040892720222473145, "learning_rate": 0.01, "loss": 2.0223, "step": 29526 }, { "epoch": 3.034217016029593, "grad_norm": 0.036804720759391785, "learning_rate": 0.01, "loss": 2.0303, "step": 29529 }, { "epoch": 3.034525277435265, "grad_norm": 0.06749982386827469, "learning_rate": 0.01, "loss": 2.0267, "step": 29532 }, { "epoch": 3.034833538840937, "grad_norm": 0.07996654510498047, "learning_rate": 0.01, "loss": 2.0587, "step": 29535 }, { "epoch": 3.0351418002466093, "grad_norm": 0.05621659383177757, "learning_rate": 0.01, "loss": 2.0119, "step": 29538 }, { "epoch": 3.035450061652281, "grad_norm": 0.04464147612452507, "learning_rate": 0.01, "loss": 2.0308, "step": 29541 }, { "epoch": 3.0357583230579532, "grad_norm": 0.04619302973151207, "learning_rate": 0.01, "loss": 2.0028, "step": 29544 }, { "epoch": 3.036066584463625, "grad_norm": 0.09262579679489136, "learning_rate": 0.01, "loss": 2.0457, "step": 29547 }, { "epoch": 3.036374845869297, "grad_norm": 0.07954932749271393, "learning_rate": 0.01, "loss": 1.9981, "step": 29550 }, { "epoch": 3.0366831072749694, "grad_norm": 0.08746150135993958, "learning_rate": 0.01, "loss": 2.0072, "step": 29553 }, { "epoch": 3.036991368680641, "grad_norm": 0.04415113106369972, "learning_rate": 0.01, "loss": 2.0113, "step": 29556 }, { "epoch": 3.0372996300863133, "grad_norm": 0.08307299762964249, "learning_rate": 0.01, "loss": 2.0329, "step": 29559 }, { "epoch": 3.037607891491985, "grad_norm": 0.05882325395941734, "learning_rate": 0.01, "loss": 2.0094, "step": 29562 }, { "epoch": 3.0379161528976573, "grad_norm": 0.08524999022483826, "learning_rate": 0.01, "loss": 2.0039, "step": 29565 }, { "epoch": 3.038224414303329, "grad_norm": 0.07542447000741959, "learning_rate": 0.01, "loss": 2.0156, "step": 29568 }, { "epoch": 3.038532675709001, "grad_norm": 0.06394769251346588, "learning_rate": 0.01, "loss": 2.0551, "step": 29571 }, { "epoch": 3.0388409371146734, "grad_norm": 0.07811316847801208, "learning_rate": 0.01, "loss": 2.0198, "step": 29574 }, { "epoch": 3.039149198520345, "grad_norm": 0.0849740207195282, "learning_rate": 0.01, "loss": 2.0276, "step": 29577 }, { "epoch": 3.0394574599260173, "grad_norm": 0.05540047585964203, "learning_rate": 0.01, "loss": 2.0383, "step": 29580 }, { "epoch": 3.039765721331689, "grad_norm": 0.044082462787628174, "learning_rate": 0.01, "loss": 1.9947, "step": 29583 }, { "epoch": 3.0400739827373613, "grad_norm": 0.11179853975772858, "learning_rate": 0.01, "loss": 2.011, "step": 29586 }, { "epoch": 3.0403822441430335, "grad_norm": 0.10160455852746964, "learning_rate": 0.01, "loss": 2.0303, "step": 29589 }, { "epoch": 3.0406905055487052, "grad_norm": 0.08072539418935776, "learning_rate": 0.01, "loss": 2.0351, "step": 29592 }, { "epoch": 3.0409987669543774, "grad_norm": 0.039637934416532516, "learning_rate": 0.01, "loss": 2.0304, "step": 29595 }, { "epoch": 3.041307028360049, "grad_norm": 0.05847008153796196, "learning_rate": 0.01, "loss": 2.02, "step": 29598 }, { "epoch": 3.0416152897657214, "grad_norm": 0.04309094697237015, "learning_rate": 0.01, "loss": 2.0445, "step": 29601 }, { "epoch": 3.041923551171393, "grad_norm": 0.04230069741606712, "learning_rate": 0.01, "loss": 2.0014, "step": 29604 }, { "epoch": 3.0422318125770653, "grad_norm": 0.08122226595878601, "learning_rate": 0.01, "loss": 2.0214, "step": 29607 }, { "epoch": 3.0425400739827375, "grad_norm": 0.05134423449635506, "learning_rate": 0.01, "loss": 1.9923, "step": 29610 }, { "epoch": 3.0428483353884093, "grad_norm": 0.06343290954828262, "learning_rate": 0.01, "loss": 2.0146, "step": 29613 }, { "epoch": 3.0431565967940815, "grad_norm": 0.04415202513337135, "learning_rate": 0.01, "loss": 2.0189, "step": 29616 }, { "epoch": 3.043464858199753, "grad_norm": 0.09160872548818588, "learning_rate": 0.01, "loss": 2.0045, "step": 29619 }, { "epoch": 3.0437731196054254, "grad_norm": 0.0800345167517662, "learning_rate": 0.01, "loss": 2.0214, "step": 29622 }, { "epoch": 3.0440813810110976, "grad_norm": 0.11805953085422516, "learning_rate": 0.01, "loss": 2.0165, "step": 29625 }, { "epoch": 3.0443896424167693, "grad_norm": 0.036926135420799255, "learning_rate": 0.01, "loss": 2.0115, "step": 29628 }, { "epoch": 3.0446979038224415, "grad_norm": 0.04666229337453842, "learning_rate": 0.01, "loss": 2.0178, "step": 29631 }, { "epoch": 3.0450061652281133, "grad_norm": 0.152203768491745, "learning_rate": 0.01, "loss": 2.0173, "step": 29634 }, { "epoch": 3.0453144266337855, "grad_norm": 0.07508762180805206, "learning_rate": 0.01, "loss": 2.0276, "step": 29637 }, { "epoch": 3.0456226880394572, "grad_norm": 0.07548423856496811, "learning_rate": 0.01, "loss": 2.006, "step": 29640 }, { "epoch": 3.0459309494451294, "grad_norm": 0.03427097201347351, "learning_rate": 0.01, "loss": 2.0229, "step": 29643 }, { "epoch": 3.0462392108508016, "grad_norm": 0.0399865061044693, "learning_rate": 0.01, "loss": 2.012, "step": 29646 }, { "epoch": 3.0465474722564734, "grad_norm": 0.03335277736186981, "learning_rate": 0.01, "loss": 2.0083, "step": 29649 }, { "epoch": 3.0468557336621456, "grad_norm": 0.0520477369427681, "learning_rate": 0.01, "loss": 2.0389, "step": 29652 }, { "epoch": 3.0471639950678173, "grad_norm": 0.08565925061702728, "learning_rate": 0.01, "loss": 2.0088, "step": 29655 }, { "epoch": 3.0474722564734895, "grad_norm": 0.062498703598976135, "learning_rate": 0.01, "loss": 1.9971, "step": 29658 }, { "epoch": 3.0477805178791617, "grad_norm": 0.08171094208955765, "learning_rate": 0.01, "loss": 1.9849, "step": 29661 }, { "epoch": 3.0480887792848335, "grad_norm": 0.06803334504365921, "learning_rate": 0.01, "loss": 2.0127, "step": 29664 }, { "epoch": 3.0483970406905057, "grad_norm": 0.113568514585495, "learning_rate": 0.01, "loss": 2.0074, "step": 29667 }, { "epoch": 3.0487053020961774, "grad_norm": 0.049806151539087296, "learning_rate": 0.01, "loss": 2.0144, "step": 29670 }, { "epoch": 3.0490135635018496, "grad_norm": 0.05908326804637909, "learning_rate": 0.01, "loss": 2.0229, "step": 29673 }, { "epoch": 3.049321824907522, "grad_norm": 0.0421968549489975, "learning_rate": 0.01, "loss": 2.038, "step": 29676 }, { "epoch": 3.0496300863131935, "grad_norm": 0.04743592441082001, "learning_rate": 0.01, "loss": 2.0126, "step": 29679 }, { "epoch": 3.0499383477188657, "grad_norm": 0.11457180231809616, "learning_rate": 0.01, "loss": 2.0057, "step": 29682 }, { "epoch": 3.0502466091245375, "grad_norm": 0.052697159349918365, "learning_rate": 0.01, "loss": 2.0276, "step": 29685 }, { "epoch": 3.0505548705302097, "grad_norm": 0.140494704246521, "learning_rate": 0.01, "loss": 2.0083, "step": 29688 }, { "epoch": 3.0508631319358814, "grad_norm": 0.03548673912882805, "learning_rate": 0.01, "loss": 1.9985, "step": 29691 }, { "epoch": 3.0511713933415536, "grad_norm": 0.03223096579313278, "learning_rate": 0.01, "loss": 2.0254, "step": 29694 }, { "epoch": 3.051479654747226, "grad_norm": 0.040596138685941696, "learning_rate": 0.01, "loss": 2.0179, "step": 29697 }, { "epoch": 3.0517879161528976, "grad_norm": 0.09559677541255951, "learning_rate": 0.01, "loss": 2.0298, "step": 29700 }, { "epoch": 3.0520961775585698, "grad_norm": 0.10038384050130844, "learning_rate": 0.01, "loss": 1.9998, "step": 29703 }, { "epoch": 3.0524044389642415, "grad_norm": 0.06066306680440903, "learning_rate": 0.01, "loss": 2.0395, "step": 29706 }, { "epoch": 3.0527127003699137, "grad_norm": 0.08278308808803558, "learning_rate": 0.01, "loss": 2.0231, "step": 29709 }, { "epoch": 3.053020961775586, "grad_norm": 0.05971555784344673, "learning_rate": 0.01, "loss": 2.0275, "step": 29712 }, { "epoch": 3.0533292231812577, "grad_norm": 0.036974966526031494, "learning_rate": 0.01, "loss": 2.0315, "step": 29715 }, { "epoch": 3.05363748458693, "grad_norm": 0.0310160294175148, "learning_rate": 0.01, "loss": 2.0049, "step": 29718 }, { "epoch": 3.0539457459926016, "grad_norm": 0.03587731346487999, "learning_rate": 0.01, "loss": 2.019, "step": 29721 }, { "epoch": 3.054254007398274, "grad_norm": 0.05497679486870766, "learning_rate": 0.01, "loss": 2.0321, "step": 29724 }, { "epoch": 3.0545622688039455, "grad_norm": 0.06713774055242538, "learning_rate": 0.01, "loss": 1.9943, "step": 29727 }, { "epoch": 3.0548705302096177, "grad_norm": 0.034498222172260284, "learning_rate": 0.01, "loss": 2.0304, "step": 29730 }, { "epoch": 3.05517879161529, "grad_norm": 0.12036336213350296, "learning_rate": 0.01, "loss": 2.0249, "step": 29733 }, { "epoch": 3.0554870530209617, "grad_norm": 0.08315537869930267, "learning_rate": 0.01, "loss": 2.0003, "step": 29736 }, { "epoch": 3.055795314426634, "grad_norm": 0.041693978011608124, "learning_rate": 0.01, "loss": 2.0117, "step": 29739 }, { "epoch": 3.0561035758323056, "grad_norm": 0.08889281749725342, "learning_rate": 0.01, "loss": 2.0256, "step": 29742 }, { "epoch": 3.056411837237978, "grad_norm": 0.07068068534135818, "learning_rate": 0.01, "loss": 2.0018, "step": 29745 }, { "epoch": 3.05672009864365, "grad_norm": 0.09744048863649368, "learning_rate": 0.01, "loss": 2.001, "step": 29748 }, { "epoch": 3.0570283600493218, "grad_norm": 0.04529158025979996, "learning_rate": 0.01, "loss": 2.0084, "step": 29751 }, { "epoch": 3.057336621454994, "grad_norm": 0.041666992008686066, "learning_rate": 0.01, "loss": 2.031, "step": 29754 }, { "epoch": 3.0576448828606657, "grad_norm": 0.07549773156642914, "learning_rate": 0.01, "loss": 2.0274, "step": 29757 }, { "epoch": 3.057953144266338, "grad_norm": 0.046318188309669495, "learning_rate": 0.01, "loss": 2.0132, "step": 29760 }, { "epoch": 3.0582614056720097, "grad_norm": 0.05954563990235329, "learning_rate": 0.01, "loss": 2.01, "step": 29763 }, { "epoch": 3.058569667077682, "grad_norm": 0.03786987066268921, "learning_rate": 0.01, "loss": 1.9981, "step": 29766 }, { "epoch": 3.058877928483354, "grad_norm": 0.04249919578433037, "learning_rate": 0.01, "loss": 2.0168, "step": 29769 }, { "epoch": 3.059186189889026, "grad_norm": 0.09349595755338669, "learning_rate": 0.01, "loss": 2.0365, "step": 29772 }, { "epoch": 3.059494451294698, "grad_norm": 0.05637031048536301, "learning_rate": 0.01, "loss": 1.9966, "step": 29775 }, { "epoch": 3.0598027127003697, "grad_norm": 0.09952693432569504, "learning_rate": 0.01, "loss": 2.0295, "step": 29778 }, { "epoch": 3.060110974106042, "grad_norm": 0.10634247213602066, "learning_rate": 0.01, "loss": 2.0425, "step": 29781 }, { "epoch": 3.060419235511714, "grad_norm": 0.0774141326546669, "learning_rate": 0.01, "loss": 2.0222, "step": 29784 }, { "epoch": 3.060727496917386, "grad_norm": 0.04012331739068031, "learning_rate": 0.01, "loss": 2.021, "step": 29787 }, { "epoch": 3.061035758323058, "grad_norm": 0.04289887100458145, "learning_rate": 0.01, "loss": 2.0115, "step": 29790 }, { "epoch": 3.06134401972873, "grad_norm": 0.03678658604621887, "learning_rate": 0.01, "loss": 2.0269, "step": 29793 }, { "epoch": 3.061652281134402, "grad_norm": 0.034676194190979004, "learning_rate": 0.01, "loss": 2.0426, "step": 29796 }, { "epoch": 3.0619605425400738, "grad_norm": 0.08951381593942642, "learning_rate": 0.01, "loss": 2.001, "step": 29799 }, { "epoch": 3.062268803945746, "grad_norm": 0.053729988634586334, "learning_rate": 0.01, "loss": 2.0108, "step": 29802 }, { "epoch": 3.062577065351418, "grad_norm": 0.08025490492582321, "learning_rate": 0.01, "loss": 2.0091, "step": 29805 }, { "epoch": 3.06288532675709, "grad_norm": 0.11099457740783691, "learning_rate": 0.01, "loss": 2.0134, "step": 29808 }, { "epoch": 3.063193588162762, "grad_norm": 0.13354651629924774, "learning_rate": 0.01, "loss": 2.0417, "step": 29811 }, { "epoch": 3.063501849568434, "grad_norm": 0.07378706336021423, "learning_rate": 0.01, "loss": 2.0169, "step": 29814 }, { "epoch": 3.063810110974106, "grad_norm": 0.04699387028813362, "learning_rate": 0.01, "loss": 2.0211, "step": 29817 }, { "epoch": 3.0641183723797782, "grad_norm": 0.08287378400564194, "learning_rate": 0.01, "loss": 2.0414, "step": 29820 }, { "epoch": 3.06442663378545, "grad_norm": 0.04004284739494324, "learning_rate": 0.01, "loss": 2.0275, "step": 29823 }, { "epoch": 3.064734895191122, "grad_norm": 0.05801365152001381, "learning_rate": 0.01, "loss": 2.0315, "step": 29826 }, { "epoch": 3.065043156596794, "grad_norm": 0.05196039378643036, "learning_rate": 0.01, "loss": 2.0193, "step": 29829 }, { "epoch": 3.065351418002466, "grad_norm": 0.04543706774711609, "learning_rate": 0.01, "loss": 2.0032, "step": 29832 }, { "epoch": 3.065659679408138, "grad_norm": 0.05413155257701874, "learning_rate": 0.01, "loss": 2.0127, "step": 29835 }, { "epoch": 3.06596794081381, "grad_norm": 0.05158795788884163, "learning_rate": 0.01, "loss": 2.0193, "step": 29838 }, { "epoch": 3.0662762022194823, "grad_norm": 0.05547649413347244, "learning_rate": 0.01, "loss": 2.0217, "step": 29841 }, { "epoch": 3.066584463625154, "grad_norm": 0.04968711733818054, "learning_rate": 0.01, "loss": 2.0332, "step": 29844 }, { "epoch": 3.066892725030826, "grad_norm": 0.05625564232468605, "learning_rate": 0.01, "loss": 2.0115, "step": 29847 }, { "epoch": 3.067200986436498, "grad_norm": 0.05007552728056908, "learning_rate": 0.01, "loss": 2.022, "step": 29850 }, { "epoch": 3.06750924784217, "grad_norm": 0.08748306334018707, "learning_rate": 0.01, "loss": 2.0372, "step": 29853 }, { "epoch": 3.0678175092478424, "grad_norm": 0.08939902484416962, "learning_rate": 0.01, "loss": 2.0279, "step": 29856 }, { "epoch": 3.068125770653514, "grad_norm": 0.04412224516272545, "learning_rate": 0.01, "loss": 2.0039, "step": 29859 }, { "epoch": 3.0684340320591863, "grad_norm": 0.04574199765920639, "learning_rate": 0.01, "loss": 2.0204, "step": 29862 }, { "epoch": 3.068742293464858, "grad_norm": 0.059090469032526016, "learning_rate": 0.01, "loss": 2.0097, "step": 29865 }, { "epoch": 3.0690505548705302, "grad_norm": 0.048365235328674316, "learning_rate": 0.01, "loss": 1.988, "step": 29868 }, { "epoch": 3.0693588162762024, "grad_norm": 0.05044705048203468, "learning_rate": 0.01, "loss": 2.0242, "step": 29871 }, { "epoch": 3.069667077681874, "grad_norm": 0.06108652427792549, "learning_rate": 0.01, "loss": 2.0223, "step": 29874 }, { "epoch": 3.0699753390875464, "grad_norm": 0.04400194063782692, "learning_rate": 0.01, "loss": 2.0188, "step": 29877 }, { "epoch": 3.070283600493218, "grad_norm": 0.047024693340063095, "learning_rate": 0.01, "loss": 2.0298, "step": 29880 }, { "epoch": 3.0705918618988903, "grad_norm": 0.06958126276731491, "learning_rate": 0.01, "loss": 2.0122, "step": 29883 }, { "epoch": 3.070900123304562, "grad_norm": 0.07789502292871475, "learning_rate": 0.01, "loss": 2.0022, "step": 29886 }, { "epoch": 3.0712083847102343, "grad_norm": 0.06438666582107544, "learning_rate": 0.01, "loss": 2.035, "step": 29889 }, { "epoch": 3.0715166461159065, "grad_norm": 0.1446814239025116, "learning_rate": 0.01, "loss": 1.9922, "step": 29892 }, { "epoch": 3.071824907521578, "grad_norm": 0.055988240987062454, "learning_rate": 0.01, "loss": 2.0204, "step": 29895 }, { "epoch": 3.0721331689272504, "grad_norm": 0.04055717960000038, "learning_rate": 0.01, "loss": 2.025, "step": 29898 }, { "epoch": 3.072441430332922, "grad_norm": 0.11918565630912781, "learning_rate": 0.01, "loss": 2.0436, "step": 29901 }, { "epoch": 3.0727496917385944, "grad_norm": 0.04726850986480713, "learning_rate": 0.01, "loss": 2.0313, "step": 29904 }, { "epoch": 3.0730579531442666, "grad_norm": 0.0526423342525959, "learning_rate": 0.01, "loss": 2.0541, "step": 29907 }, { "epoch": 3.0733662145499383, "grad_norm": 0.048728957772254944, "learning_rate": 0.01, "loss": 2.0134, "step": 29910 }, { "epoch": 3.0736744759556105, "grad_norm": 0.036348532885313034, "learning_rate": 0.01, "loss": 1.9963, "step": 29913 }, { "epoch": 3.0739827373612822, "grad_norm": 0.08039457350969315, "learning_rate": 0.01, "loss": 1.9541, "step": 29916 }, { "epoch": 3.0742909987669544, "grad_norm": 0.05370686575770378, "learning_rate": 0.01, "loss": 2.0306, "step": 29919 }, { "epoch": 3.074599260172626, "grad_norm": 0.08430914580821991, "learning_rate": 0.01, "loss": 2.0159, "step": 29922 }, { "epoch": 3.0749075215782984, "grad_norm": 0.04675029218196869, "learning_rate": 0.01, "loss": 1.9867, "step": 29925 }, { "epoch": 3.0752157829839706, "grad_norm": 0.0455501526594162, "learning_rate": 0.01, "loss": 2.0117, "step": 29928 }, { "epoch": 3.0755240443896423, "grad_norm": 0.04330252856016159, "learning_rate": 0.01, "loss": 2.0166, "step": 29931 }, { "epoch": 3.0758323057953145, "grad_norm": 0.04302576184272766, "learning_rate": 0.01, "loss": 2.0252, "step": 29934 }, { "epoch": 3.0761405672009863, "grad_norm": 0.07246873527765274, "learning_rate": 0.01, "loss": 2.0118, "step": 29937 }, { "epoch": 3.0764488286066585, "grad_norm": 0.12140212953090668, "learning_rate": 0.01, "loss": 2.0384, "step": 29940 }, { "epoch": 3.0767570900123307, "grad_norm": 0.050940465182065964, "learning_rate": 0.01, "loss": 2.0315, "step": 29943 }, { "epoch": 3.0770653514180024, "grad_norm": 0.08281772583723068, "learning_rate": 0.01, "loss": 2.0209, "step": 29946 }, { "epoch": 3.0773736128236746, "grad_norm": 0.05485936999320984, "learning_rate": 0.01, "loss": 2.0086, "step": 29949 }, { "epoch": 3.0776818742293464, "grad_norm": 0.0354488380253315, "learning_rate": 0.01, "loss": 2.0127, "step": 29952 }, { "epoch": 3.0779901356350186, "grad_norm": 0.04454338923096657, "learning_rate": 0.01, "loss": 2.0225, "step": 29955 }, { "epoch": 3.0782983970406903, "grad_norm": 0.07980217784643173, "learning_rate": 0.01, "loss": 2.0178, "step": 29958 }, { "epoch": 3.0786066584463625, "grad_norm": 0.07601354271173477, "learning_rate": 0.01, "loss": 2.0062, "step": 29961 }, { "epoch": 3.0789149198520347, "grad_norm": 0.036425743252038956, "learning_rate": 0.01, "loss": 2.0094, "step": 29964 }, { "epoch": 3.0792231812577064, "grad_norm": 0.05909854918718338, "learning_rate": 0.01, "loss": 2.0291, "step": 29967 }, { "epoch": 3.0795314426633786, "grad_norm": 0.038156334310770035, "learning_rate": 0.01, "loss": 2.0146, "step": 29970 }, { "epoch": 3.0798397040690504, "grad_norm": 0.03241376578807831, "learning_rate": 0.01, "loss": 2.0132, "step": 29973 }, { "epoch": 3.0801479654747226, "grad_norm": 0.039934635162353516, "learning_rate": 0.01, "loss": 2.0315, "step": 29976 }, { "epoch": 3.0804562268803948, "grad_norm": 0.05287212133407593, "learning_rate": 0.01, "loss": 2.0126, "step": 29979 }, { "epoch": 3.0807644882860665, "grad_norm": 0.0807357057929039, "learning_rate": 0.01, "loss": 2.0162, "step": 29982 }, { "epoch": 3.0810727496917387, "grad_norm": 0.07264538109302521, "learning_rate": 0.01, "loss": 2.0103, "step": 29985 }, { "epoch": 3.0813810110974105, "grad_norm": 0.06328427046537399, "learning_rate": 0.01, "loss": 2.0377, "step": 29988 }, { "epoch": 3.0816892725030827, "grad_norm": 0.05093936249613762, "learning_rate": 0.01, "loss": 2.0318, "step": 29991 }, { "epoch": 3.0819975339087544, "grad_norm": 0.035449884831905365, "learning_rate": 0.01, "loss": 2.0076, "step": 29994 }, { "epoch": 3.0823057953144266, "grad_norm": 0.13781675696372986, "learning_rate": 0.01, "loss": 2.0333, "step": 29997 }, { "epoch": 3.082614056720099, "grad_norm": 0.06310711055994034, "learning_rate": 0.01, "loss": 1.9998, "step": 30000 }, { "epoch": 3.0829223181257706, "grad_norm": 0.059962496161460876, "learning_rate": 0.01, "loss": 2.0033, "step": 30003 }, { "epoch": 3.0832305795314427, "grad_norm": 0.07414010167121887, "learning_rate": 0.01, "loss": 2.0017, "step": 30006 }, { "epoch": 3.0835388409371145, "grad_norm": 0.05147621035575867, "learning_rate": 0.01, "loss": 2.0253, "step": 30009 }, { "epoch": 3.0838471023427867, "grad_norm": 0.08669129014015198, "learning_rate": 0.01, "loss": 2.029, "step": 30012 }, { "epoch": 3.084155363748459, "grad_norm": 0.03791889548301697, "learning_rate": 0.01, "loss": 1.997, "step": 30015 }, { "epoch": 3.0844636251541306, "grad_norm": 0.08246924728155136, "learning_rate": 0.01, "loss": 1.9742, "step": 30018 }, { "epoch": 3.084771886559803, "grad_norm": 0.11454186588525772, "learning_rate": 0.01, "loss": 2.0275, "step": 30021 }, { "epoch": 3.0850801479654746, "grad_norm": 0.07639762759208679, "learning_rate": 0.01, "loss": 2.033, "step": 30024 }, { "epoch": 3.085388409371147, "grad_norm": 0.05283968895673752, "learning_rate": 0.01, "loss": 2.0121, "step": 30027 }, { "epoch": 3.0856966707768185, "grad_norm": 0.041770000010728836, "learning_rate": 0.01, "loss": 2.0196, "step": 30030 }, { "epoch": 3.0860049321824907, "grad_norm": 0.034824103116989136, "learning_rate": 0.01, "loss": 2.0161, "step": 30033 }, { "epoch": 3.086313193588163, "grad_norm": 0.043972861021757126, "learning_rate": 0.01, "loss": 2.0217, "step": 30036 }, { "epoch": 3.0866214549938347, "grad_norm": 0.03452165424823761, "learning_rate": 0.01, "loss": 2.0153, "step": 30039 }, { "epoch": 3.086929716399507, "grad_norm": 0.07175113260746002, "learning_rate": 0.01, "loss": 2.0091, "step": 30042 }, { "epoch": 3.0872379778051786, "grad_norm": 0.0780046209692955, "learning_rate": 0.01, "loss": 2.0127, "step": 30045 }, { "epoch": 3.087546239210851, "grad_norm": 0.06838949769735336, "learning_rate": 0.01, "loss": 2.0108, "step": 30048 }, { "epoch": 3.087854500616523, "grad_norm": 0.114894799888134, "learning_rate": 0.01, "loss": 2.0199, "step": 30051 }, { "epoch": 3.0881627620221948, "grad_norm": 0.10075607150793076, "learning_rate": 0.01, "loss": 2.025, "step": 30054 }, { "epoch": 3.088471023427867, "grad_norm": 0.04926849529147148, "learning_rate": 0.01, "loss": 2.0171, "step": 30057 }, { "epoch": 3.0887792848335387, "grad_norm": 0.04143848270177841, "learning_rate": 0.01, "loss": 2.0275, "step": 30060 }, { "epoch": 3.089087546239211, "grad_norm": 0.04664148762822151, "learning_rate": 0.01, "loss": 2.0227, "step": 30063 }, { "epoch": 3.089395807644883, "grad_norm": 0.04764292761683464, "learning_rate": 0.01, "loss": 2.0408, "step": 30066 }, { "epoch": 3.089704069050555, "grad_norm": 0.06317167729139328, "learning_rate": 0.01, "loss": 2.0177, "step": 30069 }, { "epoch": 3.090012330456227, "grad_norm": 0.07983100414276123, "learning_rate": 0.01, "loss": 2.0285, "step": 30072 }, { "epoch": 3.090320591861899, "grad_norm": 0.03481268137693405, "learning_rate": 0.01, "loss": 2.0441, "step": 30075 }, { "epoch": 3.090628853267571, "grad_norm": 0.042781632393598557, "learning_rate": 0.01, "loss": 2.0278, "step": 30078 }, { "epoch": 3.0909371146732427, "grad_norm": 0.05203581228852272, "learning_rate": 0.01, "loss": 2.0073, "step": 30081 }, { "epoch": 3.091245376078915, "grad_norm": 0.07452182471752167, "learning_rate": 0.01, "loss": 2.031, "step": 30084 }, { "epoch": 3.091553637484587, "grad_norm": 0.05319884046912193, "learning_rate": 0.01, "loss": 2.02, "step": 30087 }, { "epoch": 3.091861898890259, "grad_norm": 0.11648225039243698, "learning_rate": 0.01, "loss": 1.9881, "step": 30090 }, { "epoch": 3.092170160295931, "grad_norm": 0.07870937138795853, "learning_rate": 0.01, "loss": 2.0367, "step": 30093 }, { "epoch": 3.092478421701603, "grad_norm": 0.04574506729841232, "learning_rate": 0.01, "loss": 2.027, "step": 30096 }, { "epoch": 3.092786683107275, "grad_norm": 0.07913927733898163, "learning_rate": 0.01, "loss": 2.0318, "step": 30099 }, { "epoch": 3.0930949445129468, "grad_norm": 0.08093220740556717, "learning_rate": 0.01, "loss": 2.0226, "step": 30102 }, { "epoch": 3.093403205918619, "grad_norm": 0.07089852541685104, "learning_rate": 0.01, "loss": 2.0292, "step": 30105 }, { "epoch": 3.093711467324291, "grad_norm": 0.06293002516031265, "learning_rate": 0.01, "loss": 1.9972, "step": 30108 }, { "epoch": 3.094019728729963, "grad_norm": 0.057953476905822754, "learning_rate": 0.01, "loss": 2.027, "step": 30111 }, { "epoch": 3.094327990135635, "grad_norm": 0.06989496946334839, "learning_rate": 0.01, "loss": 2.0318, "step": 30114 }, { "epoch": 3.094636251541307, "grad_norm": 0.04050515964627266, "learning_rate": 0.01, "loss": 2.0071, "step": 30117 }, { "epoch": 3.094944512946979, "grad_norm": 0.1112913116812706, "learning_rate": 0.01, "loss": 2.0082, "step": 30120 }, { "epoch": 3.0952527743526512, "grad_norm": 0.05135345458984375, "learning_rate": 0.01, "loss": 2.044, "step": 30123 }, { "epoch": 3.095561035758323, "grad_norm": 0.1411667764186859, "learning_rate": 0.01, "loss": 2.0096, "step": 30126 }, { "epoch": 3.095869297163995, "grad_norm": 0.08077210187911987, "learning_rate": 0.01, "loss": 2.0251, "step": 30129 }, { "epoch": 3.096177558569667, "grad_norm": 0.10934364050626755, "learning_rate": 0.01, "loss": 2.0169, "step": 30132 }, { "epoch": 3.096485819975339, "grad_norm": 0.07186675071716309, "learning_rate": 0.01, "loss": 2.0123, "step": 30135 }, { "epoch": 3.0967940813810113, "grad_norm": 0.11918327957391739, "learning_rate": 0.01, "loss": 2.0391, "step": 30138 }, { "epoch": 3.097102342786683, "grad_norm": 0.052199963480234146, "learning_rate": 0.01, "loss": 2.011, "step": 30141 }, { "epoch": 3.0974106041923553, "grad_norm": 0.06796009093523026, "learning_rate": 0.01, "loss": 2.0399, "step": 30144 }, { "epoch": 3.097718865598027, "grad_norm": 0.055949617177248, "learning_rate": 0.01, "loss": 2.0243, "step": 30147 }, { "epoch": 3.098027127003699, "grad_norm": 0.06256923824548721, "learning_rate": 0.01, "loss": 2.0299, "step": 30150 }, { "epoch": 3.098335388409371, "grad_norm": 0.04565596580505371, "learning_rate": 0.01, "loss": 2.0084, "step": 30153 }, { "epoch": 3.098643649815043, "grad_norm": 0.051202937960624695, "learning_rate": 0.01, "loss": 2.0276, "step": 30156 }, { "epoch": 3.0989519112207153, "grad_norm": 0.09557224065065384, "learning_rate": 0.01, "loss": 2.0133, "step": 30159 }, { "epoch": 3.099260172626387, "grad_norm": 0.07491281628608704, "learning_rate": 0.01, "loss": 2.0222, "step": 30162 }, { "epoch": 3.0995684340320593, "grad_norm": 0.06766588240861893, "learning_rate": 0.01, "loss": 2.0165, "step": 30165 }, { "epoch": 3.099876695437731, "grad_norm": 0.036496859043836594, "learning_rate": 0.01, "loss": 2.0135, "step": 30168 }, { "epoch": 3.1001849568434032, "grad_norm": 0.09757262468338013, "learning_rate": 0.01, "loss": 2.0397, "step": 30171 }, { "epoch": 3.1004932182490754, "grad_norm": 0.046625055372714996, "learning_rate": 0.01, "loss": 2.0149, "step": 30174 }, { "epoch": 3.100801479654747, "grad_norm": 0.07655061781406403, "learning_rate": 0.01, "loss": 2.0054, "step": 30177 }, { "epoch": 3.1011097410604194, "grad_norm": 0.04577295854687691, "learning_rate": 0.01, "loss": 2.0192, "step": 30180 }, { "epoch": 3.101418002466091, "grad_norm": 0.04528047516942024, "learning_rate": 0.01, "loss": 2.0181, "step": 30183 }, { "epoch": 3.1017262638717633, "grad_norm": 0.04630058631300926, "learning_rate": 0.01, "loss": 2.0127, "step": 30186 }, { "epoch": 3.102034525277435, "grad_norm": 0.03936396911740303, "learning_rate": 0.01, "loss": 2.0093, "step": 30189 }, { "epoch": 3.1023427866831073, "grad_norm": 0.05879136547446251, "learning_rate": 0.01, "loss": 2.0169, "step": 30192 }, { "epoch": 3.1026510480887795, "grad_norm": 0.061699915677309036, "learning_rate": 0.01, "loss": 2.0418, "step": 30195 }, { "epoch": 3.102959309494451, "grad_norm": 0.05961352214217186, "learning_rate": 0.01, "loss": 1.9985, "step": 30198 }, { "epoch": 3.1032675709001234, "grad_norm": 0.10209197551012039, "learning_rate": 0.01, "loss": 2.0048, "step": 30201 }, { "epoch": 3.103575832305795, "grad_norm": 0.10693737864494324, "learning_rate": 0.01, "loss": 2.0321, "step": 30204 }, { "epoch": 3.1038840937114673, "grad_norm": 0.11183932423591614, "learning_rate": 0.01, "loss": 2.0269, "step": 30207 }, { "epoch": 3.1041923551171395, "grad_norm": 0.07532890886068344, "learning_rate": 0.01, "loss": 2.0284, "step": 30210 }, { "epoch": 3.1045006165228113, "grad_norm": 0.04888713359832764, "learning_rate": 0.01, "loss": 1.9731, "step": 30213 }, { "epoch": 3.1048088779284835, "grad_norm": 0.0533674955368042, "learning_rate": 0.01, "loss": 2.0158, "step": 30216 }, { "epoch": 3.1051171393341552, "grad_norm": 0.044245727360248566, "learning_rate": 0.01, "loss": 2.0006, "step": 30219 }, { "epoch": 3.1054254007398274, "grad_norm": 0.032582368701696396, "learning_rate": 0.01, "loss": 2.0314, "step": 30222 }, { "epoch": 3.105733662145499, "grad_norm": 0.10102632641792297, "learning_rate": 0.01, "loss": 2.0094, "step": 30225 }, { "epoch": 3.1060419235511714, "grad_norm": 0.03793931379914284, "learning_rate": 0.01, "loss": 2.0356, "step": 30228 }, { "epoch": 3.1063501849568436, "grad_norm": 0.04896242544054985, "learning_rate": 0.01, "loss": 2.0167, "step": 30231 }, { "epoch": 3.1066584463625153, "grad_norm": 0.057475607842206955, "learning_rate": 0.01, "loss": 1.9813, "step": 30234 }, { "epoch": 3.1069667077681875, "grad_norm": 0.037703339010477066, "learning_rate": 0.01, "loss": 2.0002, "step": 30237 }, { "epoch": 3.1072749691738593, "grad_norm": 0.04800937697291374, "learning_rate": 0.01, "loss": 2.043, "step": 30240 }, { "epoch": 3.1075832305795315, "grad_norm": 0.042761534452438354, "learning_rate": 0.01, "loss": 2.0208, "step": 30243 }, { "epoch": 3.1078914919852036, "grad_norm": 0.0864332839846611, "learning_rate": 0.01, "loss": 2.0229, "step": 30246 }, { "epoch": 3.1081997533908754, "grad_norm": 0.10482830554246902, "learning_rate": 0.01, "loss": 2.0601, "step": 30249 }, { "epoch": 3.1085080147965476, "grad_norm": 0.10628984868526459, "learning_rate": 0.01, "loss": 2.0078, "step": 30252 }, { "epoch": 3.1088162762022193, "grad_norm": 0.04853438585996628, "learning_rate": 0.01, "loss": 1.9999, "step": 30255 }, { "epoch": 3.1091245376078915, "grad_norm": 0.05274122208356857, "learning_rate": 0.01, "loss": 1.9926, "step": 30258 }, { "epoch": 3.1094327990135637, "grad_norm": 0.03757349029183388, "learning_rate": 0.01, "loss": 2.003, "step": 30261 }, { "epoch": 3.1097410604192355, "grad_norm": 0.04505928233265877, "learning_rate": 0.01, "loss": 2.0126, "step": 30264 }, { "epoch": 3.1100493218249077, "grad_norm": 0.08804783225059509, "learning_rate": 0.01, "loss": 2.0285, "step": 30267 }, { "epoch": 3.1103575832305794, "grad_norm": 0.11545568704605103, "learning_rate": 0.01, "loss": 2.0105, "step": 30270 }, { "epoch": 3.1106658446362516, "grad_norm": 0.04955466091632843, "learning_rate": 0.01, "loss": 2.048, "step": 30273 }, { "epoch": 3.1109741060419234, "grad_norm": 0.04930401220917702, "learning_rate": 0.01, "loss": 2.0223, "step": 30276 }, { "epoch": 3.1112823674475956, "grad_norm": 0.05701540783047676, "learning_rate": 0.01, "loss": 2.046, "step": 30279 }, { "epoch": 3.1115906288532678, "grad_norm": 0.07442028075456619, "learning_rate": 0.01, "loss": 2.0085, "step": 30282 }, { "epoch": 3.1118988902589395, "grad_norm": 0.06538541615009308, "learning_rate": 0.01, "loss": 2.0194, "step": 30285 }, { "epoch": 3.1122071516646117, "grad_norm": 0.0826522633433342, "learning_rate": 0.01, "loss": 2.0267, "step": 30288 }, { "epoch": 3.1125154130702835, "grad_norm": 0.1353752762079239, "learning_rate": 0.01, "loss": 2.0089, "step": 30291 }, { "epoch": 3.1128236744759556, "grad_norm": 0.06406208872795105, "learning_rate": 0.01, "loss": 2.0174, "step": 30294 }, { "epoch": 3.1131319358816274, "grad_norm": 0.036653418093919754, "learning_rate": 0.01, "loss": 1.9951, "step": 30297 }, { "epoch": 3.1134401972872996, "grad_norm": 0.0839834213256836, "learning_rate": 0.01, "loss": 2.0213, "step": 30300 }, { "epoch": 3.113748458692972, "grad_norm": 0.04629015177488327, "learning_rate": 0.01, "loss": 2.0488, "step": 30303 }, { "epoch": 3.1140567200986435, "grad_norm": 0.08630986511707306, "learning_rate": 0.01, "loss": 2.0271, "step": 30306 }, { "epoch": 3.1143649815043157, "grad_norm": 0.07894504070281982, "learning_rate": 0.01, "loss": 2.0128, "step": 30309 }, { "epoch": 3.1146732429099875, "grad_norm": 0.09203556925058365, "learning_rate": 0.01, "loss": 2.0094, "step": 30312 }, { "epoch": 3.1149815043156597, "grad_norm": 0.061055563390254974, "learning_rate": 0.01, "loss": 2.0199, "step": 30315 }, { "epoch": 3.115289765721332, "grad_norm": 0.0711871087551117, "learning_rate": 0.01, "loss": 2.0091, "step": 30318 }, { "epoch": 3.1155980271270036, "grad_norm": 0.07315775007009506, "learning_rate": 0.01, "loss": 2.0209, "step": 30321 }, { "epoch": 3.115906288532676, "grad_norm": 0.1050182655453682, "learning_rate": 0.01, "loss": 2.0086, "step": 30324 }, { "epoch": 3.1162145499383476, "grad_norm": 0.04959526285529137, "learning_rate": 0.01, "loss": 1.9827, "step": 30327 }, { "epoch": 3.1165228113440198, "grad_norm": 0.061066534370183945, "learning_rate": 0.01, "loss": 2.0258, "step": 30330 }, { "epoch": 3.116831072749692, "grad_norm": 0.057984329760074615, "learning_rate": 0.01, "loss": 2.0119, "step": 30333 }, { "epoch": 3.1171393341553637, "grad_norm": 0.07337040454149246, "learning_rate": 0.01, "loss": 2.0069, "step": 30336 }, { "epoch": 3.117447595561036, "grad_norm": 0.07801029831171036, "learning_rate": 0.01, "loss": 2.0172, "step": 30339 }, { "epoch": 3.1177558569667077, "grad_norm": 0.06625111401081085, "learning_rate": 0.01, "loss": 2.022, "step": 30342 }, { "epoch": 3.11806411837238, "grad_norm": 0.11416434496641159, "learning_rate": 0.01, "loss": 2.0304, "step": 30345 }, { "epoch": 3.1183723797780516, "grad_norm": 0.05046262592077255, "learning_rate": 0.01, "loss": 2.0064, "step": 30348 }, { "epoch": 3.118680641183724, "grad_norm": 0.030621282756328583, "learning_rate": 0.01, "loss": 2.0085, "step": 30351 }, { "epoch": 3.118988902589396, "grad_norm": 0.03274908661842346, "learning_rate": 0.01, "loss": 2.0164, "step": 30354 }, { "epoch": 3.1192971639950677, "grad_norm": 0.03673490136861801, "learning_rate": 0.01, "loss": 2.0383, "step": 30357 }, { "epoch": 3.11960542540074, "grad_norm": 0.07161369919776917, "learning_rate": 0.01, "loss": 2.0122, "step": 30360 }, { "epoch": 3.1199136868064117, "grad_norm": 0.04244636744260788, "learning_rate": 0.01, "loss": 1.9901, "step": 30363 }, { "epoch": 3.120221948212084, "grad_norm": 0.05357150360941887, "learning_rate": 0.01, "loss": 1.9994, "step": 30366 }, { "epoch": 3.120530209617756, "grad_norm": 0.06689538061618805, "learning_rate": 0.01, "loss": 2.008, "step": 30369 }, { "epoch": 3.120838471023428, "grad_norm": 0.08160898089408875, "learning_rate": 0.01, "loss": 2.0189, "step": 30372 }, { "epoch": 3.1211467324291, "grad_norm": 0.04037999361753464, "learning_rate": 0.01, "loss": 1.993, "step": 30375 }, { "epoch": 3.1214549938347718, "grad_norm": 0.09855981171131134, "learning_rate": 0.01, "loss": 2.0157, "step": 30378 }, { "epoch": 3.121763255240444, "grad_norm": 0.04645252600312233, "learning_rate": 0.01, "loss": 2.0211, "step": 30381 }, { "epoch": 3.1220715166461157, "grad_norm": 0.10605467110872269, "learning_rate": 0.01, "loss": 2.015, "step": 30384 }, { "epoch": 3.122379778051788, "grad_norm": 0.11321547627449036, "learning_rate": 0.01, "loss": 2.0193, "step": 30387 }, { "epoch": 3.12268803945746, "grad_norm": 0.07283324748277664, "learning_rate": 0.01, "loss": 2.0327, "step": 30390 }, { "epoch": 3.122996300863132, "grad_norm": 0.07217562943696976, "learning_rate": 0.01, "loss": 2.0079, "step": 30393 }, { "epoch": 3.123304562268804, "grad_norm": 0.036078888922929764, "learning_rate": 0.01, "loss": 2.0211, "step": 30396 }, { "epoch": 3.123612823674476, "grad_norm": 0.03730477765202522, "learning_rate": 0.01, "loss": 2.0077, "step": 30399 }, { "epoch": 3.123921085080148, "grad_norm": 0.04707048460841179, "learning_rate": 0.01, "loss": 2.0204, "step": 30402 }, { "epoch": 3.12422934648582, "grad_norm": 0.0571308508515358, "learning_rate": 0.01, "loss": 2.0054, "step": 30405 }, { "epoch": 3.124537607891492, "grad_norm": 0.05167640373110771, "learning_rate": 0.01, "loss": 2.0134, "step": 30408 }, { "epoch": 3.124845869297164, "grad_norm": 0.06759858876466751, "learning_rate": 0.01, "loss": 2.0286, "step": 30411 }, { "epoch": 3.125154130702836, "grad_norm": 0.031637318432331085, "learning_rate": 0.01, "loss": 1.9848, "step": 30414 }, { "epoch": 3.125462392108508, "grad_norm": 0.0991082713007927, "learning_rate": 0.01, "loss": 2.0277, "step": 30417 }, { "epoch": 3.12577065351418, "grad_norm": 0.06524378806352615, "learning_rate": 0.01, "loss": 1.9995, "step": 30420 }, { "epoch": 3.126078914919852, "grad_norm": 0.05751586705446243, "learning_rate": 0.01, "loss": 2.012, "step": 30423 }, { "epoch": 3.126387176325524, "grad_norm": 0.03776116296648979, "learning_rate": 0.01, "loss": 2.0291, "step": 30426 }, { "epoch": 3.126695437731196, "grad_norm": 0.0619078204035759, "learning_rate": 0.01, "loss": 1.9884, "step": 30429 }, { "epoch": 3.127003699136868, "grad_norm": 0.0511760450899601, "learning_rate": 0.01, "loss": 2.0142, "step": 30432 }, { "epoch": 3.12731196054254, "grad_norm": 0.053964611142873764, "learning_rate": 0.01, "loss": 2.0361, "step": 30435 }, { "epoch": 3.127620221948212, "grad_norm": 0.11773833632469177, "learning_rate": 0.01, "loss": 2.0298, "step": 30438 }, { "epoch": 3.1279284833538843, "grad_norm": 0.04505350813269615, "learning_rate": 0.01, "loss": 2.0162, "step": 30441 }, { "epoch": 3.128236744759556, "grad_norm": 0.10077274590730667, "learning_rate": 0.01, "loss": 2.0101, "step": 30444 }, { "epoch": 3.1285450061652282, "grad_norm": 0.05291616916656494, "learning_rate": 0.01, "loss": 2.0148, "step": 30447 }, { "epoch": 3.1288532675709, "grad_norm": 0.049340371042490005, "learning_rate": 0.01, "loss": 2.0101, "step": 30450 }, { "epoch": 3.129161528976572, "grad_norm": 0.06910324841737747, "learning_rate": 0.01, "loss": 2.008, "step": 30453 }, { "epoch": 3.1294697903822444, "grad_norm": 0.07307056337594986, "learning_rate": 0.01, "loss": 1.9933, "step": 30456 }, { "epoch": 3.129778051787916, "grad_norm": 0.06828980147838593, "learning_rate": 0.01, "loss": 2.0343, "step": 30459 }, { "epoch": 3.1300863131935883, "grad_norm": 0.0480424240231514, "learning_rate": 0.01, "loss": 2.0207, "step": 30462 }, { "epoch": 3.13039457459926, "grad_norm": 0.10072044283151627, "learning_rate": 0.01, "loss": 2.0244, "step": 30465 }, { "epoch": 3.1307028360049323, "grad_norm": 0.04987982660531998, "learning_rate": 0.01, "loss": 2.0056, "step": 30468 }, { "epoch": 3.131011097410604, "grad_norm": 0.10036738961935043, "learning_rate": 0.01, "loss": 2.0069, "step": 30471 }, { "epoch": 3.131319358816276, "grad_norm": 0.06445086747407913, "learning_rate": 0.01, "loss": 2.0196, "step": 30474 }, { "epoch": 3.1316276202219484, "grad_norm": 0.10858535021543503, "learning_rate": 0.01, "loss": 2.0159, "step": 30477 }, { "epoch": 3.13193588162762, "grad_norm": 0.041025932878255844, "learning_rate": 0.01, "loss": 2.0156, "step": 30480 }, { "epoch": 3.1322441430332923, "grad_norm": 0.05287109315395355, "learning_rate": 0.01, "loss": 2.0143, "step": 30483 }, { "epoch": 3.132552404438964, "grad_norm": 0.09659206867218018, "learning_rate": 0.01, "loss": 1.9991, "step": 30486 }, { "epoch": 3.1328606658446363, "grad_norm": 0.07235360145568848, "learning_rate": 0.01, "loss": 2.0521, "step": 30489 }, { "epoch": 3.133168927250308, "grad_norm": 0.042338863015174866, "learning_rate": 0.01, "loss": 2.0403, "step": 30492 }, { "epoch": 3.1334771886559802, "grad_norm": 0.045676395297050476, "learning_rate": 0.01, "loss": 1.9921, "step": 30495 }, { "epoch": 3.1337854500616524, "grad_norm": 0.04957246780395508, "learning_rate": 0.01, "loss": 2.0095, "step": 30498 }, { "epoch": 3.134093711467324, "grad_norm": 0.047321684658527374, "learning_rate": 0.01, "loss": 2.025, "step": 30501 }, { "epoch": 3.1344019728729964, "grad_norm": 0.09219278395175934, "learning_rate": 0.01, "loss": 2.0326, "step": 30504 }, { "epoch": 3.134710234278668, "grad_norm": 0.07666533440351486, "learning_rate": 0.01, "loss": 2.0132, "step": 30507 }, { "epoch": 3.1350184956843403, "grad_norm": 0.06832748651504517, "learning_rate": 0.01, "loss": 2.0074, "step": 30510 }, { "epoch": 3.1353267570900125, "grad_norm": 0.05527606979012489, "learning_rate": 0.01, "loss": 1.9941, "step": 30513 }, { "epoch": 3.1356350184956843, "grad_norm": 0.09272732585668564, "learning_rate": 0.01, "loss": 1.9942, "step": 30516 }, { "epoch": 3.1359432799013565, "grad_norm": 0.02908925898373127, "learning_rate": 0.01, "loss": 2.0173, "step": 30519 }, { "epoch": 3.136251541307028, "grad_norm": 0.04611453413963318, "learning_rate": 0.01, "loss": 2.0332, "step": 30522 }, { "epoch": 3.1365598027127004, "grad_norm": 0.05230382829904556, "learning_rate": 0.01, "loss": 2.0226, "step": 30525 }, { "epoch": 3.1368680641183726, "grad_norm": 0.09377764165401459, "learning_rate": 0.01, "loss": 2.0395, "step": 30528 }, { "epoch": 3.1371763255240444, "grad_norm": 0.049708276987075806, "learning_rate": 0.01, "loss": 1.996, "step": 30531 }, { "epoch": 3.1374845869297165, "grad_norm": 0.05086242035031319, "learning_rate": 0.01, "loss": 1.9855, "step": 30534 }, { "epoch": 3.1377928483353883, "grad_norm": 0.05085150897502899, "learning_rate": 0.01, "loss": 1.9968, "step": 30537 }, { "epoch": 3.1381011097410605, "grad_norm": 0.05230359733104706, "learning_rate": 0.01, "loss": 2.0079, "step": 30540 }, { "epoch": 3.1384093711467322, "grad_norm": 0.07910077273845673, "learning_rate": 0.01, "loss": 2.0075, "step": 30543 }, { "epoch": 3.1387176325524044, "grad_norm": 0.044358085840940475, "learning_rate": 0.01, "loss": 2.0236, "step": 30546 }, { "epoch": 3.1390258939580766, "grad_norm": 0.07269710302352905, "learning_rate": 0.01, "loss": 1.9995, "step": 30549 }, { "epoch": 3.1393341553637484, "grad_norm": 0.09917914122343063, "learning_rate": 0.01, "loss": 2.0233, "step": 30552 }, { "epoch": 3.1396424167694206, "grad_norm": 0.0755099207162857, "learning_rate": 0.01, "loss": 2.0113, "step": 30555 }, { "epoch": 3.1399506781750923, "grad_norm": 0.07911227643489838, "learning_rate": 0.01, "loss": 1.9751, "step": 30558 }, { "epoch": 3.1402589395807645, "grad_norm": 0.04883533716201782, "learning_rate": 0.01, "loss": 2.0247, "step": 30561 }, { "epoch": 3.1405672009864363, "grad_norm": 0.0375591404736042, "learning_rate": 0.01, "loss": 2.004, "step": 30564 }, { "epoch": 3.1408754623921085, "grad_norm": 0.08654747903347015, "learning_rate": 0.01, "loss": 2.0315, "step": 30567 }, { "epoch": 3.1411837237977807, "grad_norm": 0.07025197148323059, "learning_rate": 0.01, "loss": 2.0334, "step": 30570 }, { "epoch": 3.1414919852034524, "grad_norm": 0.11750215291976929, "learning_rate": 0.01, "loss": 1.9902, "step": 30573 }, { "epoch": 3.1418002466091246, "grad_norm": 0.037444472312927246, "learning_rate": 0.01, "loss": 2.0272, "step": 30576 }, { "epoch": 3.1421085080147964, "grad_norm": 0.044617678970098495, "learning_rate": 0.01, "loss": 2.0304, "step": 30579 }, { "epoch": 3.1424167694204685, "grad_norm": 0.06604386866092682, "learning_rate": 0.01, "loss": 2.0153, "step": 30582 }, { "epoch": 3.1427250308261407, "grad_norm": 0.09958125650882721, "learning_rate": 0.01, "loss": 2.0217, "step": 30585 }, { "epoch": 3.1430332922318125, "grad_norm": 0.05710573121905327, "learning_rate": 0.01, "loss": 2.0199, "step": 30588 }, { "epoch": 3.1433415536374847, "grad_norm": 0.05984263867139816, "learning_rate": 0.01, "loss": 2.0091, "step": 30591 }, { "epoch": 3.1436498150431564, "grad_norm": 0.04073350876569748, "learning_rate": 0.01, "loss": 2.032, "step": 30594 }, { "epoch": 3.1439580764488286, "grad_norm": 0.07050776481628418, "learning_rate": 0.01, "loss": 1.9977, "step": 30597 }, { "epoch": 3.144266337854501, "grad_norm": 0.03400518372654915, "learning_rate": 0.01, "loss": 2.0404, "step": 30600 }, { "epoch": 3.1445745992601726, "grad_norm": 0.06751801073551178, "learning_rate": 0.01, "loss": 2.0138, "step": 30603 }, { "epoch": 3.1448828606658448, "grad_norm": 0.06015434488654137, "learning_rate": 0.01, "loss": 1.9956, "step": 30606 }, { "epoch": 3.1451911220715165, "grad_norm": 0.11231853067874908, "learning_rate": 0.01, "loss": 2.0367, "step": 30609 }, { "epoch": 3.1454993834771887, "grad_norm": 0.047932934015989304, "learning_rate": 0.01, "loss": 2.0133, "step": 30612 }, { "epoch": 3.1458076448828605, "grad_norm": 0.04414699971675873, "learning_rate": 0.01, "loss": 2.0155, "step": 30615 }, { "epoch": 3.1461159062885327, "grad_norm": 0.06486550718545914, "learning_rate": 0.01, "loss": 2.0251, "step": 30618 }, { "epoch": 3.146424167694205, "grad_norm": 0.056324832141399384, "learning_rate": 0.01, "loss": 2.0137, "step": 30621 }, { "epoch": 3.1467324290998766, "grad_norm": 0.030867785215377808, "learning_rate": 0.01, "loss": 2.0339, "step": 30624 }, { "epoch": 3.147040690505549, "grad_norm": 0.050489556044340134, "learning_rate": 0.01, "loss": 2.0233, "step": 30627 }, { "epoch": 3.1473489519112205, "grad_norm": 0.04749634861946106, "learning_rate": 0.01, "loss": 2.0061, "step": 30630 }, { "epoch": 3.1476572133168927, "grad_norm": 0.05312662571668625, "learning_rate": 0.01, "loss": 1.9881, "step": 30633 }, { "epoch": 3.147965474722565, "grad_norm": 0.1234770268201828, "learning_rate": 0.01, "loss": 2.0119, "step": 30636 }, { "epoch": 3.1482737361282367, "grad_norm": 0.1423310786485672, "learning_rate": 0.01, "loss": 1.99, "step": 30639 }, { "epoch": 3.148581997533909, "grad_norm": 0.08930431306362152, "learning_rate": 0.01, "loss": 2.023, "step": 30642 }, { "epoch": 3.1488902589395806, "grad_norm": 0.03335024416446686, "learning_rate": 0.01, "loss": 2.0072, "step": 30645 }, { "epoch": 3.149198520345253, "grad_norm": 0.07785200327634811, "learning_rate": 0.01, "loss": 2.0083, "step": 30648 }, { "epoch": 3.1495067817509246, "grad_norm": 0.047731272876262665, "learning_rate": 0.01, "loss": 1.9999, "step": 30651 }, { "epoch": 3.1498150431565968, "grad_norm": 0.10476487874984741, "learning_rate": 0.01, "loss": 2.0374, "step": 30654 }, { "epoch": 3.150123304562269, "grad_norm": 0.05200812220573425, "learning_rate": 0.01, "loss": 2.0204, "step": 30657 }, { "epoch": 3.1504315659679407, "grad_norm": 0.06658156216144562, "learning_rate": 0.01, "loss": 2.0331, "step": 30660 }, { "epoch": 3.150739827373613, "grad_norm": 0.06515184789896011, "learning_rate": 0.01, "loss": 2.0248, "step": 30663 }, { "epoch": 3.1510480887792847, "grad_norm": 0.03604321926832199, "learning_rate": 0.01, "loss": 2.0279, "step": 30666 }, { "epoch": 3.151356350184957, "grad_norm": 0.10496728122234344, "learning_rate": 0.01, "loss": 2.016, "step": 30669 }, { "epoch": 3.151664611590629, "grad_norm": 0.05922207236289978, "learning_rate": 0.01, "loss": 1.9988, "step": 30672 }, { "epoch": 3.151972872996301, "grad_norm": 0.03718853369355202, "learning_rate": 0.01, "loss": 2.0313, "step": 30675 }, { "epoch": 3.152281134401973, "grad_norm": 0.04691898077726364, "learning_rate": 0.01, "loss": 1.9755, "step": 30678 }, { "epoch": 3.1525893958076447, "grad_norm": 0.048930030316114426, "learning_rate": 0.01, "loss": 2.0079, "step": 30681 }, { "epoch": 3.152897657213317, "grad_norm": 0.028419634327292442, "learning_rate": 0.01, "loss": 1.9928, "step": 30684 }, { "epoch": 3.1532059186189887, "grad_norm": 0.05914349481463432, "learning_rate": 0.01, "loss": 2.004, "step": 30687 }, { "epoch": 3.153514180024661, "grad_norm": 0.05946047231554985, "learning_rate": 0.01, "loss": 2.025, "step": 30690 }, { "epoch": 3.153822441430333, "grad_norm": 0.06951475888490677, "learning_rate": 0.01, "loss": 2.0341, "step": 30693 }, { "epoch": 3.154130702836005, "grad_norm": 0.055885329842567444, "learning_rate": 0.01, "loss": 2.0335, "step": 30696 }, { "epoch": 3.154438964241677, "grad_norm": 0.043436676263809204, "learning_rate": 0.01, "loss": 1.9812, "step": 30699 }, { "epoch": 3.1547472256473488, "grad_norm": 0.04978411644697189, "learning_rate": 0.01, "loss": 2.0117, "step": 30702 }, { "epoch": 3.155055487053021, "grad_norm": 0.11427465081214905, "learning_rate": 0.01, "loss": 2.0179, "step": 30705 }, { "epoch": 3.155363748458693, "grad_norm": 0.045963071286678314, "learning_rate": 0.01, "loss": 1.9926, "step": 30708 }, { "epoch": 3.155672009864365, "grad_norm": 0.03811359032988548, "learning_rate": 0.01, "loss": 2.0241, "step": 30711 }, { "epoch": 3.155980271270037, "grad_norm": 0.05763382837176323, "learning_rate": 0.01, "loss": 2.0158, "step": 30714 }, { "epoch": 3.156288532675709, "grad_norm": 0.045626237988471985, "learning_rate": 0.01, "loss": 2.0033, "step": 30717 }, { "epoch": 3.156596794081381, "grad_norm": 0.037544216960668564, "learning_rate": 0.01, "loss": 1.9964, "step": 30720 }, { "epoch": 3.1569050554870532, "grad_norm": 0.14238816499710083, "learning_rate": 0.01, "loss": 2.0342, "step": 30723 }, { "epoch": 3.157213316892725, "grad_norm": 0.048318054527044296, "learning_rate": 0.01, "loss": 2.0181, "step": 30726 }, { "epoch": 3.157521578298397, "grad_norm": 0.041052673012018204, "learning_rate": 0.01, "loss": 1.9813, "step": 30729 }, { "epoch": 3.157829839704069, "grad_norm": 0.0370815135538578, "learning_rate": 0.01, "loss": 2.0042, "step": 30732 }, { "epoch": 3.158138101109741, "grad_norm": 0.07156988233327866, "learning_rate": 0.01, "loss": 2.0374, "step": 30735 }, { "epoch": 3.158446362515413, "grad_norm": 0.0757046490907669, "learning_rate": 0.01, "loss": 2.0147, "step": 30738 }, { "epoch": 3.158754623921085, "grad_norm": 0.0674174353480339, "learning_rate": 0.01, "loss": 1.9951, "step": 30741 }, { "epoch": 3.1590628853267573, "grad_norm": 0.05094176158308983, "learning_rate": 0.01, "loss": 2.0196, "step": 30744 }, { "epoch": 3.159371146732429, "grad_norm": 0.04377339780330658, "learning_rate": 0.01, "loss": 2.0129, "step": 30747 }, { "epoch": 3.159679408138101, "grad_norm": 0.04788428172469139, "learning_rate": 0.01, "loss": 2.032, "step": 30750 }, { "epoch": 3.159987669543773, "grad_norm": 0.08424562215805054, "learning_rate": 0.01, "loss": 1.9851, "step": 30753 }, { "epoch": 3.160295930949445, "grad_norm": 0.0742245689034462, "learning_rate": 0.01, "loss": 1.9942, "step": 30756 }, { "epoch": 3.160604192355117, "grad_norm": 0.127692312002182, "learning_rate": 0.01, "loss": 2.0294, "step": 30759 }, { "epoch": 3.160912453760789, "grad_norm": 0.06112000718712807, "learning_rate": 0.01, "loss": 2.0087, "step": 30762 }, { "epoch": 3.1612207151664613, "grad_norm": 0.04676929488778114, "learning_rate": 0.01, "loss": 2.0117, "step": 30765 }, { "epoch": 3.161528976572133, "grad_norm": 0.07944846153259277, "learning_rate": 0.01, "loss": 2.0239, "step": 30768 }, { "epoch": 3.1618372379778052, "grad_norm": 0.05377965793013573, "learning_rate": 0.01, "loss": 2.0424, "step": 30771 }, { "epoch": 3.162145499383477, "grad_norm": 0.04961777105927467, "learning_rate": 0.01, "loss": 1.9989, "step": 30774 }, { "epoch": 3.162453760789149, "grad_norm": 0.043640993535518646, "learning_rate": 0.01, "loss": 1.9938, "step": 30777 }, { "epoch": 3.1627620221948214, "grad_norm": 0.08145361393690109, "learning_rate": 0.01, "loss": 2.0253, "step": 30780 }, { "epoch": 3.163070283600493, "grad_norm": 0.03488341346383095, "learning_rate": 0.01, "loss": 1.9797, "step": 30783 }, { "epoch": 3.1633785450061653, "grad_norm": 0.054735999554395676, "learning_rate": 0.01, "loss": 2.0142, "step": 30786 }, { "epoch": 3.163686806411837, "grad_norm": 0.05064836144447327, "learning_rate": 0.01, "loss": 2.0255, "step": 30789 }, { "epoch": 3.1639950678175093, "grad_norm": 0.05693964287638664, "learning_rate": 0.01, "loss": 2.0073, "step": 30792 }, { "epoch": 3.1643033292231815, "grad_norm": 0.08762123435735703, "learning_rate": 0.01, "loss": 2.0049, "step": 30795 }, { "epoch": 3.164611590628853, "grad_norm": 0.08652741461992264, "learning_rate": 0.01, "loss": 2.0021, "step": 30798 }, { "epoch": 3.1649198520345254, "grad_norm": 0.15153749287128448, "learning_rate": 0.01, "loss": 2.0326, "step": 30801 }, { "epoch": 3.165228113440197, "grad_norm": 0.09822986274957657, "learning_rate": 0.01, "loss": 2.0232, "step": 30804 }, { "epoch": 3.1655363748458694, "grad_norm": 0.07334254682064056, "learning_rate": 0.01, "loss": 2.0176, "step": 30807 }, { "epoch": 3.165844636251541, "grad_norm": 0.042224787175655365, "learning_rate": 0.01, "loss": 2.0277, "step": 30810 }, { "epoch": 3.1661528976572133, "grad_norm": 0.047276921570301056, "learning_rate": 0.01, "loss": 2.0014, "step": 30813 }, { "epoch": 3.1664611590628855, "grad_norm": 0.112834133207798, "learning_rate": 0.01, "loss": 1.998, "step": 30816 }, { "epoch": 3.1667694204685573, "grad_norm": 0.0851617380976677, "learning_rate": 0.01, "loss": 2.013, "step": 30819 }, { "epoch": 3.1670776818742294, "grad_norm": 0.046194661408662796, "learning_rate": 0.01, "loss": 2.0029, "step": 30822 }, { "epoch": 3.167385943279901, "grad_norm": 0.056417178362607956, "learning_rate": 0.01, "loss": 2.0171, "step": 30825 }, { "epoch": 3.1676942046855734, "grad_norm": 0.06759685277938843, "learning_rate": 0.01, "loss": 2.0281, "step": 30828 }, { "epoch": 3.1680024660912456, "grad_norm": 0.08961043506860733, "learning_rate": 0.01, "loss": 2.0222, "step": 30831 }, { "epoch": 3.1683107274969173, "grad_norm": 0.03684352710843086, "learning_rate": 0.01, "loss": 2.0037, "step": 30834 }, { "epoch": 3.1686189889025895, "grad_norm": 0.1388491988182068, "learning_rate": 0.01, "loss": 2.0304, "step": 30837 }, { "epoch": 3.1689272503082613, "grad_norm": 0.03769170120358467, "learning_rate": 0.01, "loss": 1.9998, "step": 30840 }, { "epoch": 3.1692355117139335, "grad_norm": 0.040609996765851974, "learning_rate": 0.01, "loss": 2.0208, "step": 30843 }, { "epoch": 3.1695437731196052, "grad_norm": 0.04916587471961975, "learning_rate": 0.01, "loss": 2.0249, "step": 30846 }, { "epoch": 3.1698520345252774, "grad_norm": 0.042920999228954315, "learning_rate": 0.01, "loss": 2.0078, "step": 30849 }, { "epoch": 3.1701602959309496, "grad_norm": 0.05777138099074364, "learning_rate": 0.01, "loss": 2.019, "step": 30852 }, { "epoch": 3.1704685573366214, "grad_norm": 0.06128811836242676, "learning_rate": 0.01, "loss": 2.0123, "step": 30855 }, { "epoch": 3.1707768187422936, "grad_norm": 0.08042391389608383, "learning_rate": 0.01, "loss": 2.0123, "step": 30858 }, { "epoch": 3.1710850801479653, "grad_norm": 0.036383189260959625, "learning_rate": 0.01, "loss": 2.0011, "step": 30861 }, { "epoch": 3.1713933415536375, "grad_norm": 0.05068094655871391, "learning_rate": 0.01, "loss": 1.9922, "step": 30864 }, { "epoch": 3.1717016029593097, "grad_norm": 0.09005576372146606, "learning_rate": 0.01, "loss": 2.048, "step": 30867 }, { "epoch": 3.1720098643649814, "grad_norm": 0.0589495450258255, "learning_rate": 0.01, "loss": 2.0227, "step": 30870 }, { "epoch": 3.1723181257706536, "grad_norm": 0.04160517826676369, "learning_rate": 0.01, "loss": 1.9987, "step": 30873 }, { "epoch": 3.1726263871763254, "grad_norm": 0.044718582183122635, "learning_rate": 0.01, "loss": 1.9895, "step": 30876 }, { "epoch": 3.1729346485819976, "grad_norm": 0.09011929482221603, "learning_rate": 0.01, "loss": 2.0076, "step": 30879 }, { "epoch": 3.1732429099876693, "grad_norm": 0.0597953200340271, "learning_rate": 0.01, "loss": 1.9974, "step": 30882 }, { "epoch": 3.1735511713933415, "grad_norm": 0.06141204759478569, "learning_rate": 0.01, "loss": 2.0141, "step": 30885 }, { "epoch": 3.1738594327990137, "grad_norm": 0.09013784676790237, "learning_rate": 0.01, "loss": 2.0159, "step": 30888 }, { "epoch": 3.1741676942046855, "grad_norm": 0.08281320333480835, "learning_rate": 0.01, "loss": 2.0239, "step": 30891 }, { "epoch": 3.1744759556103577, "grad_norm": 0.09314266592264175, "learning_rate": 0.01, "loss": 2.0224, "step": 30894 }, { "epoch": 3.1747842170160294, "grad_norm": 0.04410851374268532, "learning_rate": 0.01, "loss": 2.0152, "step": 30897 }, { "epoch": 3.1750924784217016, "grad_norm": 0.12074366211891174, "learning_rate": 0.01, "loss": 2.0385, "step": 30900 }, { "epoch": 3.175400739827374, "grad_norm": 0.06861037015914917, "learning_rate": 0.01, "loss": 2.0024, "step": 30903 }, { "epoch": 3.1757090012330456, "grad_norm": 0.038134749978780746, "learning_rate": 0.01, "loss": 1.9987, "step": 30906 }, { "epoch": 3.1760172626387178, "grad_norm": 0.11108820140361786, "learning_rate": 0.01, "loss": 2.0149, "step": 30909 }, { "epoch": 3.1763255240443895, "grad_norm": 0.047687213867902756, "learning_rate": 0.01, "loss": 2.0063, "step": 30912 }, { "epoch": 3.1766337854500617, "grad_norm": 0.05983440950512886, "learning_rate": 0.01, "loss": 2.0099, "step": 30915 }, { "epoch": 3.176942046855734, "grad_norm": 0.038930587470531464, "learning_rate": 0.01, "loss": 2.0122, "step": 30918 }, { "epoch": 3.1772503082614056, "grad_norm": 0.08851155638694763, "learning_rate": 0.01, "loss": 2.0077, "step": 30921 }, { "epoch": 3.177558569667078, "grad_norm": 0.08430106192827225, "learning_rate": 0.01, "loss": 2.0082, "step": 30924 }, { "epoch": 3.1778668310727496, "grad_norm": 0.04469950869679451, "learning_rate": 0.01, "loss": 2.0138, "step": 30927 }, { "epoch": 3.178175092478422, "grad_norm": 0.06740089505910873, "learning_rate": 0.01, "loss": 2.0051, "step": 30930 }, { "epoch": 3.1784833538840935, "grad_norm": 0.06175517663359642, "learning_rate": 0.01, "loss": 2.0215, "step": 30933 }, { "epoch": 3.1787916152897657, "grad_norm": 0.047650065273046494, "learning_rate": 0.01, "loss": 2.0246, "step": 30936 }, { "epoch": 3.179099876695438, "grad_norm": 0.07261566072702408, "learning_rate": 0.01, "loss": 2.0107, "step": 30939 }, { "epoch": 3.1794081381011097, "grad_norm": 0.1270940601825714, "learning_rate": 0.01, "loss": 2.0208, "step": 30942 }, { "epoch": 3.179716399506782, "grad_norm": 0.054443880915641785, "learning_rate": 0.01, "loss": 2.0272, "step": 30945 }, { "epoch": 3.1800246609124536, "grad_norm": 0.06243740767240524, "learning_rate": 0.01, "loss": 2.0013, "step": 30948 }, { "epoch": 3.180332922318126, "grad_norm": 0.04665446653962135, "learning_rate": 0.01, "loss": 2.0269, "step": 30951 }, { "epoch": 3.1806411837237976, "grad_norm": 0.0470532663166523, "learning_rate": 0.01, "loss": 2.0194, "step": 30954 }, { "epoch": 3.1809494451294698, "grad_norm": 0.03944860398769379, "learning_rate": 0.01, "loss": 2.0166, "step": 30957 }, { "epoch": 3.181257706535142, "grad_norm": 0.04705018922686577, "learning_rate": 0.01, "loss": 2.0044, "step": 30960 }, { "epoch": 3.1815659679408137, "grad_norm": 0.04603470116853714, "learning_rate": 0.01, "loss": 1.9635, "step": 30963 }, { "epoch": 3.181874229346486, "grad_norm": 0.04761103168129921, "learning_rate": 0.01, "loss": 2.0245, "step": 30966 }, { "epoch": 3.1821824907521576, "grad_norm": 0.09109831601381302, "learning_rate": 0.01, "loss": 1.9919, "step": 30969 }, { "epoch": 3.18249075215783, "grad_norm": 0.07111615687608719, "learning_rate": 0.01, "loss": 2.0123, "step": 30972 }, { "epoch": 3.182799013563502, "grad_norm": 0.07623612880706787, "learning_rate": 0.01, "loss": 1.9949, "step": 30975 }, { "epoch": 3.183107274969174, "grad_norm": 0.0768456682562828, "learning_rate": 0.01, "loss": 2.0323, "step": 30978 }, { "epoch": 3.183415536374846, "grad_norm": 0.07147437334060669, "learning_rate": 0.01, "loss": 2.0126, "step": 30981 }, { "epoch": 3.1837237977805177, "grad_norm": 0.04732421785593033, "learning_rate": 0.01, "loss": 1.9975, "step": 30984 }, { "epoch": 3.18403205918619, "grad_norm": 0.04406769201159477, "learning_rate": 0.01, "loss": 2.0201, "step": 30987 }, { "epoch": 3.184340320591862, "grad_norm": 0.03760458901524544, "learning_rate": 0.01, "loss": 1.9994, "step": 30990 }, { "epoch": 3.184648581997534, "grad_norm": 0.051892757415771484, "learning_rate": 0.01, "loss": 2.0146, "step": 30993 }, { "epoch": 3.184956843403206, "grad_norm": 0.03961913660168648, "learning_rate": 0.01, "loss": 2.0145, "step": 30996 }, { "epoch": 3.185265104808878, "grad_norm": 0.07263363152742386, "learning_rate": 0.01, "loss": 2.0183, "step": 30999 }, { "epoch": 3.18557336621455, "grad_norm": 0.08618062734603882, "learning_rate": 0.01, "loss": 2.0074, "step": 31002 }, { "epoch": 3.1858816276202218, "grad_norm": 0.08712664991617203, "learning_rate": 0.01, "loss": 2.0084, "step": 31005 }, { "epoch": 3.186189889025894, "grad_norm": 0.05723334103822708, "learning_rate": 0.01, "loss": 2.0231, "step": 31008 }, { "epoch": 3.186498150431566, "grad_norm": 0.06170898675918579, "learning_rate": 0.01, "loss": 2.0079, "step": 31011 }, { "epoch": 3.186806411837238, "grad_norm": 0.04730013385415077, "learning_rate": 0.01, "loss": 2.0081, "step": 31014 }, { "epoch": 3.18711467324291, "grad_norm": 0.0381946824491024, "learning_rate": 0.01, "loss": 2.0194, "step": 31017 }, { "epoch": 3.187422934648582, "grad_norm": 0.09591019153594971, "learning_rate": 0.01, "loss": 1.9788, "step": 31020 }, { "epoch": 3.187731196054254, "grad_norm": 0.047843087464571, "learning_rate": 0.01, "loss": 1.9948, "step": 31023 }, { "epoch": 3.188039457459926, "grad_norm": 0.04370959475636482, "learning_rate": 0.01, "loss": 2.0249, "step": 31026 }, { "epoch": 3.188347718865598, "grad_norm": 0.060692187398672104, "learning_rate": 0.01, "loss": 2.0313, "step": 31029 }, { "epoch": 3.18865598027127, "grad_norm": 0.05906793847680092, "learning_rate": 0.01, "loss": 2.0144, "step": 31032 }, { "epoch": 3.188964241676942, "grad_norm": 0.06203675642609596, "learning_rate": 0.01, "loss": 2.0325, "step": 31035 }, { "epoch": 3.189272503082614, "grad_norm": 0.07943592220544815, "learning_rate": 0.01, "loss": 2.0079, "step": 31038 }, { "epoch": 3.189580764488286, "grad_norm": 0.08803451061248779, "learning_rate": 0.01, "loss": 2.0055, "step": 31041 }, { "epoch": 3.189889025893958, "grad_norm": 0.07365550100803375, "learning_rate": 0.01, "loss": 2.0234, "step": 31044 }, { "epoch": 3.1901972872996303, "grad_norm": 0.0795077532529831, "learning_rate": 0.01, "loss": 2.0253, "step": 31047 }, { "epoch": 3.190505548705302, "grad_norm": 0.08341194689273834, "learning_rate": 0.01, "loss": 1.9972, "step": 31050 }, { "epoch": 3.190813810110974, "grad_norm": 0.05842220410704613, "learning_rate": 0.01, "loss": 2.0244, "step": 31053 }, { "epoch": 3.191122071516646, "grad_norm": 0.09980861842632294, "learning_rate": 0.01, "loss": 2.0056, "step": 31056 }, { "epoch": 3.191430332922318, "grad_norm": 0.061474163085222244, "learning_rate": 0.01, "loss": 2.0059, "step": 31059 }, { "epoch": 3.1917385943279903, "grad_norm": 0.06752124428749084, "learning_rate": 0.01, "loss": 2.0243, "step": 31062 }, { "epoch": 3.192046855733662, "grad_norm": 0.06160835176706314, "learning_rate": 0.01, "loss": 1.9988, "step": 31065 }, { "epoch": 3.1923551171393343, "grad_norm": 0.09516782313585281, "learning_rate": 0.01, "loss": 1.9864, "step": 31068 }, { "epoch": 3.192663378545006, "grad_norm": 0.049451183527708054, "learning_rate": 0.01, "loss": 2.0311, "step": 31071 }, { "epoch": 3.1929716399506782, "grad_norm": 0.08874432742595673, "learning_rate": 0.01, "loss": 2.0198, "step": 31074 }, { "epoch": 3.19327990135635, "grad_norm": 0.07393426448106766, "learning_rate": 0.01, "loss": 1.9918, "step": 31077 }, { "epoch": 3.193588162762022, "grad_norm": 0.09064768254756927, "learning_rate": 0.01, "loss": 2.027, "step": 31080 }, { "epoch": 3.1938964241676944, "grad_norm": 0.05667688325047493, "learning_rate": 0.01, "loss": 2.0058, "step": 31083 }, { "epoch": 3.194204685573366, "grad_norm": 0.03858955577015877, "learning_rate": 0.01, "loss": 2.0184, "step": 31086 }, { "epoch": 3.1945129469790383, "grad_norm": 0.03137395530939102, "learning_rate": 0.01, "loss": 1.9912, "step": 31089 }, { "epoch": 3.19482120838471, "grad_norm": 0.09366928040981293, "learning_rate": 0.01, "loss": 2.0279, "step": 31092 }, { "epoch": 3.1951294697903823, "grad_norm": 0.05282336100935936, "learning_rate": 0.01, "loss": 2.0145, "step": 31095 }, { "epoch": 3.1954377311960545, "grad_norm": 0.07649802416563034, "learning_rate": 0.01, "loss": 1.9961, "step": 31098 }, { "epoch": 3.195745992601726, "grad_norm": 0.07249470800161362, "learning_rate": 0.01, "loss": 2.0063, "step": 31101 }, { "epoch": 3.1960542540073984, "grad_norm": 0.12900890409946442, "learning_rate": 0.01, "loss": 1.9952, "step": 31104 }, { "epoch": 3.19636251541307, "grad_norm": 0.08060257881879807, "learning_rate": 0.01, "loss": 2.0247, "step": 31107 }, { "epoch": 3.1966707768187423, "grad_norm": 0.07051622122526169, "learning_rate": 0.01, "loss": 1.9965, "step": 31110 }, { "epoch": 3.1969790382244145, "grad_norm": 0.0567597970366478, "learning_rate": 0.01, "loss": 2.0072, "step": 31113 }, { "epoch": 3.1972872996300863, "grad_norm": 0.053274448961019516, "learning_rate": 0.01, "loss": 1.9992, "step": 31116 }, { "epoch": 3.1975955610357585, "grad_norm": 0.0634993240237236, "learning_rate": 0.01, "loss": 2.0111, "step": 31119 }, { "epoch": 3.1979038224414302, "grad_norm": 0.060209862887859344, "learning_rate": 0.01, "loss": 2.0062, "step": 31122 }, { "epoch": 3.1982120838471024, "grad_norm": 0.05891990661621094, "learning_rate": 0.01, "loss": 2.0394, "step": 31125 }, { "epoch": 3.198520345252774, "grad_norm": 0.03743661195039749, "learning_rate": 0.01, "loss": 1.993, "step": 31128 }, { "epoch": 3.1988286066584464, "grad_norm": 0.047772981226444244, "learning_rate": 0.01, "loss": 1.9922, "step": 31131 }, { "epoch": 3.1991368680641186, "grad_norm": 0.09134045243263245, "learning_rate": 0.01, "loss": 2.0073, "step": 31134 }, { "epoch": 3.1994451294697903, "grad_norm": 0.040920648723840714, "learning_rate": 0.01, "loss": 1.9876, "step": 31137 }, { "epoch": 3.1997533908754625, "grad_norm": 0.09245988726615906, "learning_rate": 0.01, "loss": 1.993, "step": 31140 }, { "epoch": 3.2000616522811343, "grad_norm": 0.06823042035102844, "learning_rate": 0.01, "loss": 1.9983, "step": 31143 }, { "epoch": 3.2003699136868065, "grad_norm": 0.10324928909540176, "learning_rate": 0.01, "loss": 2.0429, "step": 31146 }, { "epoch": 3.200678175092478, "grad_norm": 0.09294021129608154, "learning_rate": 0.01, "loss": 2.0278, "step": 31149 }, { "epoch": 3.2009864364981504, "grad_norm": 0.07790663093328476, "learning_rate": 0.01, "loss": 2.0043, "step": 31152 }, { "epoch": 3.2012946979038226, "grad_norm": 0.041240107268095016, "learning_rate": 0.01, "loss": 2.0223, "step": 31155 }, { "epoch": 3.2016029593094943, "grad_norm": 0.042019765824079514, "learning_rate": 0.01, "loss": 2.0019, "step": 31158 }, { "epoch": 3.2019112207151665, "grad_norm": 0.03546491637825966, "learning_rate": 0.01, "loss": 1.9972, "step": 31161 }, { "epoch": 3.2022194821208383, "grad_norm": 0.08740135282278061, "learning_rate": 0.01, "loss": 2.0364, "step": 31164 }, { "epoch": 3.2025277435265105, "grad_norm": 0.06613611429929733, "learning_rate": 0.01, "loss": 2.0345, "step": 31167 }, { "epoch": 3.2028360049321827, "grad_norm": 0.1089286357164383, "learning_rate": 0.01, "loss": 2.0027, "step": 31170 }, { "epoch": 3.2031442663378544, "grad_norm": 0.037778157740831375, "learning_rate": 0.01, "loss": 2.0168, "step": 31173 }, { "epoch": 3.2034525277435266, "grad_norm": 0.07214018702507019, "learning_rate": 0.01, "loss": 2.0162, "step": 31176 }, { "epoch": 3.2037607891491984, "grad_norm": 0.05182633548974991, "learning_rate": 0.01, "loss": 2.0078, "step": 31179 }, { "epoch": 3.2040690505548706, "grad_norm": 0.043808627873659134, "learning_rate": 0.01, "loss": 2.0266, "step": 31182 }, { "epoch": 3.2043773119605428, "grad_norm": 0.03699186071753502, "learning_rate": 0.01, "loss": 2.0391, "step": 31185 }, { "epoch": 3.2046855733662145, "grad_norm": 0.10249976068735123, "learning_rate": 0.01, "loss": 1.9939, "step": 31188 }, { "epoch": 3.2049938347718867, "grad_norm": 0.05326079949736595, "learning_rate": 0.01, "loss": 2.0082, "step": 31191 }, { "epoch": 3.2053020961775585, "grad_norm": 0.09879110008478165, "learning_rate": 0.01, "loss": 2.0149, "step": 31194 }, { "epoch": 3.2056103575832307, "grad_norm": 0.042158786207437515, "learning_rate": 0.01, "loss": 1.9861, "step": 31197 }, { "epoch": 3.2059186189889024, "grad_norm": 0.13437750935554504, "learning_rate": 0.01, "loss": 2.0179, "step": 31200 }, { "epoch": 3.2062268803945746, "grad_norm": 0.07030022144317627, "learning_rate": 0.01, "loss": 2.0499, "step": 31203 }, { "epoch": 3.206535141800247, "grad_norm": 0.044758979231119156, "learning_rate": 0.01, "loss": 1.9956, "step": 31206 }, { "epoch": 3.2068434032059185, "grad_norm": 0.030563069507479668, "learning_rate": 0.01, "loss": 1.9987, "step": 31209 }, { "epoch": 3.2071516646115907, "grad_norm": 0.047487739473581314, "learning_rate": 0.01, "loss": 1.9763, "step": 31212 }, { "epoch": 3.2074599260172625, "grad_norm": 0.044461995363235474, "learning_rate": 0.01, "loss": 2.015, "step": 31215 }, { "epoch": 3.2077681874229347, "grad_norm": 0.16204911470413208, "learning_rate": 0.01, "loss": 2.0259, "step": 31218 }, { "epoch": 3.2080764488286064, "grad_norm": 0.0470188669860363, "learning_rate": 0.01, "loss": 2.0244, "step": 31221 }, { "epoch": 3.2083847102342786, "grad_norm": 0.04323815181851387, "learning_rate": 0.01, "loss": 2.0158, "step": 31224 }, { "epoch": 3.208692971639951, "grad_norm": 0.04388147220015526, "learning_rate": 0.01, "loss": 1.9996, "step": 31227 }, { "epoch": 3.2090012330456226, "grad_norm": 0.06811438500881195, "learning_rate": 0.01, "loss": 2.0196, "step": 31230 }, { "epoch": 3.2093094944512948, "grad_norm": 0.09423845261335373, "learning_rate": 0.01, "loss": 1.99, "step": 31233 }, { "epoch": 3.2096177558569665, "grad_norm": 0.05107983946800232, "learning_rate": 0.01, "loss": 2.0203, "step": 31236 }, { "epoch": 3.2099260172626387, "grad_norm": 0.06188793480396271, "learning_rate": 0.01, "loss": 2.0283, "step": 31239 }, { "epoch": 3.210234278668311, "grad_norm": 0.04658438265323639, "learning_rate": 0.01, "loss": 1.9986, "step": 31242 }, { "epoch": 3.2105425400739827, "grad_norm": 0.05382300913333893, "learning_rate": 0.01, "loss": 2.0242, "step": 31245 }, { "epoch": 3.210850801479655, "grad_norm": 0.04271601140499115, "learning_rate": 0.01, "loss": 1.9952, "step": 31248 }, { "epoch": 3.2111590628853266, "grad_norm": 0.06256501376628876, "learning_rate": 0.01, "loss": 2.0109, "step": 31251 }, { "epoch": 3.211467324290999, "grad_norm": 0.03523874282836914, "learning_rate": 0.01, "loss": 2.0107, "step": 31254 }, { "epoch": 3.211775585696671, "grad_norm": 0.18781809508800507, "learning_rate": 0.01, "loss": 2.0421, "step": 31257 }, { "epoch": 3.2120838471023427, "grad_norm": 0.11113902926445007, "learning_rate": 0.01, "loss": 2.0158, "step": 31260 }, { "epoch": 3.212392108508015, "grad_norm": 0.11578498035669327, "learning_rate": 0.01, "loss": 1.9993, "step": 31263 }, { "epoch": 3.2127003699136867, "grad_norm": 0.06620439887046814, "learning_rate": 0.01, "loss": 1.9711, "step": 31266 }, { "epoch": 3.213008631319359, "grad_norm": 0.04588307812809944, "learning_rate": 0.01, "loss": 2.0262, "step": 31269 }, { "epoch": 3.2133168927250306, "grad_norm": 0.06933780014514923, "learning_rate": 0.01, "loss": 2.0117, "step": 31272 }, { "epoch": 3.213625154130703, "grad_norm": 0.03479130566120148, "learning_rate": 0.01, "loss": 1.996, "step": 31275 }, { "epoch": 3.213933415536375, "grad_norm": 0.05823906138539314, "learning_rate": 0.01, "loss": 1.9988, "step": 31278 }, { "epoch": 3.2142416769420468, "grad_norm": 0.044943809509277344, "learning_rate": 0.01, "loss": 2.0121, "step": 31281 }, { "epoch": 3.214549938347719, "grad_norm": 0.05428524687886238, "learning_rate": 0.01, "loss": 1.9761, "step": 31284 }, { "epoch": 3.2148581997533907, "grad_norm": 0.07103761285543442, "learning_rate": 0.01, "loss": 2.0401, "step": 31287 }, { "epoch": 3.215166461159063, "grad_norm": 0.1237725242972374, "learning_rate": 0.01, "loss": 2.023, "step": 31290 }, { "epoch": 3.215474722564735, "grad_norm": 0.03638492897152901, "learning_rate": 0.01, "loss": 2.0036, "step": 31293 }, { "epoch": 3.215782983970407, "grad_norm": 0.05026063695549965, "learning_rate": 0.01, "loss": 1.9929, "step": 31296 }, { "epoch": 3.216091245376079, "grad_norm": 0.09602409601211548, "learning_rate": 0.01, "loss": 2.011, "step": 31299 }, { "epoch": 3.216399506781751, "grad_norm": 0.12711849808692932, "learning_rate": 0.01, "loss": 1.9912, "step": 31302 }, { "epoch": 3.216707768187423, "grad_norm": 0.051612287759780884, "learning_rate": 0.01, "loss": 2.0397, "step": 31305 }, { "epoch": 3.2170160295930947, "grad_norm": 0.055910855531692505, "learning_rate": 0.01, "loss": 2.0282, "step": 31308 }, { "epoch": 3.217324290998767, "grad_norm": 0.053995631635189056, "learning_rate": 0.01, "loss": 2.0034, "step": 31311 }, { "epoch": 3.217632552404439, "grad_norm": 0.03721768036484718, "learning_rate": 0.01, "loss": 1.9957, "step": 31314 }, { "epoch": 3.217940813810111, "grad_norm": 0.04018721356987953, "learning_rate": 0.01, "loss": 2.0215, "step": 31317 }, { "epoch": 3.218249075215783, "grad_norm": 0.06476118415594101, "learning_rate": 0.01, "loss": 2.0138, "step": 31320 }, { "epoch": 3.218557336621455, "grad_norm": 0.09766072034835815, "learning_rate": 0.01, "loss": 2.0343, "step": 31323 }, { "epoch": 3.218865598027127, "grad_norm": 0.0436365082859993, "learning_rate": 0.01, "loss": 2.0081, "step": 31326 }, { "epoch": 3.219173859432799, "grad_norm": 0.04617559164762497, "learning_rate": 0.01, "loss": 1.9809, "step": 31329 }, { "epoch": 3.219482120838471, "grad_norm": 0.0487680621445179, "learning_rate": 0.01, "loss": 2.0244, "step": 31332 }, { "epoch": 3.219790382244143, "grad_norm": 0.036623213440179825, "learning_rate": 0.01, "loss": 2.0159, "step": 31335 }, { "epoch": 3.220098643649815, "grad_norm": 0.04713229089975357, "learning_rate": 0.01, "loss": 2.01, "step": 31338 }, { "epoch": 3.220406905055487, "grad_norm": 0.03848060593008995, "learning_rate": 0.01, "loss": 1.9975, "step": 31341 }, { "epoch": 3.220715166461159, "grad_norm": 0.03674410656094551, "learning_rate": 0.01, "loss": 2.031, "step": 31344 }, { "epoch": 3.221023427866831, "grad_norm": 0.09635547548532486, "learning_rate": 0.01, "loss": 2.0069, "step": 31347 }, { "epoch": 3.2213316892725032, "grad_norm": 0.06311628222465515, "learning_rate": 0.01, "loss": 1.9864, "step": 31350 }, { "epoch": 3.221639950678175, "grad_norm": 0.05867060646414757, "learning_rate": 0.01, "loss": 2.0282, "step": 31353 }, { "epoch": 3.221948212083847, "grad_norm": 0.0816822350025177, "learning_rate": 0.01, "loss": 2.0192, "step": 31356 }, { "epoch": 3.222256473489519, "grad_norm": 0.0825878456234932, "learning_rate": 0.01, "loss": 2.0045, "step": 31359 }, { "epoch": 3.222564734895191, "grad_norm": 0.06253322958946228, "learning_rate": 0.01, "loss": 2.0461, "step": 31362 }, { "epoch": 3.2228729963008633, "grad_norm": 0.07637360692024231, "learning_rate": 0.01, "loss": 2.0028, "step": 31365 }, { "epoch": 3.223181257706535, "grad_norm": 0.081746406853199, "learning_rate": 0.01, "loss": 2.0226, "step": 31368 }, { "epoch": 3.2234895191122073, "grad_norm": 0.11683212220668793, "learning_rate": 0.01, "loss": 2.0075, "step": 31371 }, { "epoch": 3.223797780517879, "grad_norm": 0.07254528999328613, "learning_rate": 0.01, "loss": 2.0392, "step": 31374 }, { "epoch": 3.224106041923551, "grad_norm": 0.05394696444272995, "learning_rate": 0.01, "loss": 2.0199, "step": 31377 }, { "epoch": 3.2244143033292234, "grad_norm": 0.044905390590429306, "learning_rate": 0.01, "loss": 2.0156, "step": 31380 }, { "epoch": 3.224722564734895, "grad_norm": 0.07633423805236816, "learning_rate": 0.01, "loss": 2.0096, "step": 31383 }, { "epoch": 3.2250308261405674, "grad_norm": 0.03892616555094719, "learning_rate": 0.01, "loss": 2.014, "step": 31386 }, { "epoch": 3.225339087546239, "grad_norm": 0.03514908254146576, "learning_rate": 0.01, "loss": 2.0173, "step": 31389 }, { "epoch": 3.2256473489519113, "grad_norm": 0.07658538222312927, "learning_rate": 0.01, "loss": 2.0052, "step": 31392 }, { "epoch": 3.225955610357583, "grad_norm": 0.1004643440246582, "learning_rate": 0.01, "loss": 2.01, "step": 31395 }, { "epoch": 3.2262638717632552, "grad_norm": 0.08478312194347382, "learning_rate": 0.01, "loss": 2.0184, "step": 31398 }, { "epoch": 3.2265721331689274, "grad_norm": 0.05344710871577263, "learning_rate": 0.01, "loss": 1.9995, "step": 31401 }, { "epoch": 3.226880394574599, "grad_norm": 0.05862101912498474, "learning_rate": 0.01, "loss": 1.9907, "step": 31404 }, { "epoch": 3.2271886559802714, "grad_norm": 0.03430565074086189, "learning_rate": 0.01, "loss": 1.9976, "step": 31407 }, { "epoch": 3.227496917385943, "grad_norm": 0.03822310268878937, "learning_rate": 0.01, "loss": 2.0081, "step": 31410 }, { "epoch": 3.2278051787916153, "grad_norm": 0.04391561448574066, "learning_rate": 0.01, "loss": 2.0239, "step": 31413 }, { "epoch": 3.228113440197287, "grad_norm": 0.053595174103975296, "learning_rate": 0.01, "loss": 1.994, "step": 31416 }, { "epoch": 3.2284217016029593, "grad_norm": 0.06710030138492584, "learning_rate": 0.01, "loss": 2.0249, "step": 31419 }, { "epoch": 3.2287299630086315, "grad_norm": 0.19077260792255402, "learning_rate": 0.01, "loss": 2.0214, "step": 31422 }, { "epoch": 3.229038224414303, "grad_norm": 0.050228483974933624, "learning_rate": 0.01, "loss": 1.9957, "step": 31425 }, { "epoch": 3.2293464858199754, "grad_norm": 0.047246966511011124, "learning_rate": 0.01, "loss": 1.9984, "step": 31428 }, { "epoch": 3.229654747225647, "grad_norm": 0.03976801037788391, "learning_rate": 0.01, "loss": 2.0247, "step": 31431 }, { "epoch": 3.2299630086313194, "grad_norm": 0.052110929042100906, "learning_rate": 0.01, "loss": 2.0176, "step": 31434 }, { "epoch": 3.2302712700369915, "grad_norm": 0.1090363934636116, "learning_rate": 0.01, "loss": 2.011, "step": 31437 }, { "epoch": 3.2305795314426633, "grad_norm": 0.13336141407489777, "learning_rate": 0.01, "loss": 2.0023, "step": 31440 }, { "epoch": 3.2308877928483355, "grad_norm": 0.11287815868854523, "learning_rate": 0.01, "loss": 2.0384, "step": 31443 }, { "epoch": 3.2311960542540072, "grad_norm": 0.05775724723935127, "learning_rate": 0.01, "loss": 2.0001, "step": 31446 }, { "epoch": 3.2315043156596794, "grad_norm": 0.0489221066236496, "learning_rate": 0.01, "loss": 1.9857, "step": 31449 }, { "epoch": 3.2318125770653516, "grad_norm": 0.10708906501531601, "learning_rate": 0.01, "loss": 2.0214, "step": 31452 }, { "epoch": 3.2321208384710234, "grad_norm": 0.04961223527789116, "learning_rate": 0.01, "loss": 2.0244, "step": 31455 }, { "epoch": 3.2324290998766956, "grad_norm": 0.04106178134679794, "learning_rate": 0.01, "loss": 2.027, "step": 31458 }, { "epoch": 3.2327373612823673, "grad_norm": 0.051446568220853806, "learning_rate": 0.01, "loss": 2.0078, "step": 31461 }, { "epoch": 3.2330456226880395, "grad_norm": 0.046735066920518875, "learning_rate": 0.01, "loss": 2.0196, "step": 31464 }, { "epoch": 3.2333538840937113, "grad_norm": 0.04740822687745094, "learning_rate": 0.01, "loss": 1.9849, "step": 31467 }, { "epoch": 3.2336621454993835, "grad_norm": 0.03593340888619423, "learning_rate": 0.01, "loss": 2.0055, "step": 31470 }, { "epoch": 3.2339704069050557, "grad_norm": 0.058127082884311676, "learning_rate": 0.01, "loss": 2.0318, "step": 31473 }, { "epoch": 3.2342786683107274, "grad_norm": 0.05138585716485977, "learning_rate": 0.01, "loss": 2.0052, "step": 31476 }, { "epoch": 3.2345869297163996, "grad_norm": 0.04029039293527603, "learning_rate": 0.01, "loss": 2.0272, "step": 31479 }, { "epoch": 3.2348951911220714, "grad_norm": 0.05703110247850418, "learning_rate": 0.01, "loss": 2.0115, "step": 31482 }, { "epoch": 3.2352034525277436, "grad_norm": 0.06218143180012703, "learning_rate": 0.01, "loss": 1.9987, "step": 31485 }, { "epoch": 3.2355117139334153, "grad_norm": 0.07277306169271469, "learning_rate": 0.01, "loss": 2.0413, "step": 31488 }, { "epoch": 3.2358199753390875, "grad_norm": 0.08520001918077469, "learning_rate": 0.01, "loss": 2.0058, "step": 31491 }, { "epoch": 3.2361282367447597, "grad_norm": 0.062044426798820496, "learning_rate": 0.01, "loss": 2.0055, "step": 31494 }, { "epoch": 3.2364364981504314, "grad_norm": 0.062475502490997314, "learning_rate": 0.01, "loss": 2.0124, "step": 31497 }, { "epoch": 3.2367447595561036, "grad_norm": 0.05683014541864395, "learning_rate": 0.01, "loss": 2.0253, "step": 31500 }, { "epoch": 3.2370530209617754, "grad_norm": 0.0409809909760952, "learning_rate": 0.01, "loss": 2.0286, "step": 31503 }, { "epoch": 3.2373612823674476, "grad_norm": 0.05398055911064148, "learning_rate": 0.01, "loss": 2.0239, "step": 31506 }, { "epoch": 3.2376695437731198, "grad_norm": 0.09552880376577377, "learning_rate": 0.01, "loss": 2.0232, "step": 31509 }, { "epoch": 3.2379778051787915, "grad_norm": 0.042877551168203354, "learning_rate": 0.01, "loss": 2.007, "step": 31512 }, { "epoch": 3.2382860665844637, "grad_norm": 0.03086467832326889, "learning_rate": 0.01, "loss": 1.9831, "step": 31515 }, { "epoch": 3.2385943279901355, "grad_norm": 0.03575371578335762, "learning_rate": 0.01, "loss": 1.9614, "step": 31518 }, { "epoch": 3.2389025893958077, "grad_norm": 0.04962699115276337, "learning_rate": 0.01, "loss": 2.0084, "step": 31521 }, { "epoch": 3.23921085080148, "grad_norm": 0.07324356585741043, "learning_rate": 0.01, "loss": 2.0213, "step": 31524 }, { "epoch": 3.2395191122071516, "grad_norm": 0.08169112354516983, "learning_rate": 0.01, "loss": 2.0061, "step": 31527 }, { "epoch": 3.239827373612824, "grad_norm": 0.04129362106323242, "learning_rate": 0.01, "loss": 2.0143, "step": 31530 }, { "epoch": 3.2401356350184956, "grad_norm": 0.04921339079737663, "learning_rate": 0.01, "loss": 2.0248, "step": 31533 }, { "epoch": 3.2404438964241677, "grad_norm": 0.048255033791065216, "learning_rate": 0.01, "loss": 2.0486, "step": 31536 }, { "epoch": 3.2407521578298395, "grad_norm": 0.0420132540166378, "learning_rate": 0.01, "loss": 2.0254, "step": 31539 }, { "epoch": 3.2410604192355117, "grad_norm": 0.037761542946100235, "learning_rate": 0.01, "loss": 2.0105, "step": 31542 }, { "epoch": 3.241368680641184, "grad_norm": 0.059902340173721313, "learning_rate": 0.01, "loss": 2.0371, "step": 31545 }, { "epoch": 3.2416769420468556, "grad_norm": 0.07208621501922607, "learning_rate": 0.01, "loss": 2.014, "step": 31548 }, { "epoch": 3.241985203452528, "grad_norm": 0.039220135658979416, "learning_rate": 0.01, "loss": 2.0208, "step": 31551 }, { "epoch": 3.2422934648581996, "grad_norm": 0.09032812714576721, "learning_rate": 0.01, "loss": 1.9956, "step": 31554 }, { "epoch": 3.2426017262638718, "grad_norm": 0.04668070003390312, "learning_rate": 0.01, "loss": 2.0204, "step": 31557 }, { "epoch": 3.242909987669544, "grad_norm": 0.07167590409517288, "learning_rate": 0.01, "loss": 1.9882, "step": 31560 }, { "epoch": 3.2432182490752157, "grad_norm": 0.09092319756746292, "learning_rate": 0.01, "loss": 2.0257, "step": 31563 }, { "epoch": 3.243526510480888, "grad_norm": 0.05797998234629631, "learning_rate": 0.01, "loss": 1.9923, "step": 31566 }, { "epoch": 3.2438347718865597, "grad_norm": 0.05728116258978844, "learning_rate": 0.01, "loss": 2.0134, "step": 31569 }, { "epoch": 3.244143033292232, "grad_norm": 0.10221456736326218, "learning_rate": 0.01, "loss": 2.0452, "step": 31572 }, { "epoch": 3.244451294697904, "grad_norm": 0.0785583034157753, "learning_rate": 0.01, "loss": 1.9956, "step": 31575 }, { "epoch": 3.244759556103576, "grad_norm": 0.06105490028858185, "learning_rate": 0.01, "loss": 2.035, "step": 31578 }, { "epoch": 3.245067817509248, "grad_norm": 0.031959742307662964, "learning_rate": 0.01, "loss": 2.0186, "step": 31581 }, { "epoch": 3.2453760789149197, "grad_norm": 0.07373019307851791, "learning_rate": 0.01, "loss": 1.9778, "step": 31584 }, { "epoch": 3.245684340320592, "grad_norm": 0.05989330634474754, "learning_rate": 0.01, "loss": 2.0059, "step": 31587 }, { "epoch": 3.2459926017262637, "grad_norm": 0.0927957072854042, "learning_rate": 0.01, "loss": 1.9979, "step": 31590 }, { "epoch": 3.246300863131936, "grad_norm": 0.048555102199316025, "learning_rate": 0.01, "loss": 2.0111, "step": 31593 }, { "epoch": 3.246609124537608, "grad_norm": 0.05515364184975624, "learning_rate": 0.01, "loss": 2.0052, "step": 31596 }, { "epoch": 3.24691738594328, "grad_norm": 0.04753278195858002, "learning_rate": 0.01, "loss": 2.0171, "step": 31599 }, { "epoch": 3.247225647348952, "grad_norm": 0.06210014224052429, "learning_rate": 0.01, "loss": 1.9689, "step": 31602 }, { "epoch": 3.2475339087546238, "grad_norm": 0.060913342982530594, "learning_rate": 0.01, "loss": 2.0183, "step": 31605 }, { "epoch": 3.247842170160296, "grad_norm": 0.08301867544651031, "learning_rate": 0.01, "loss": 2.0182, "step": 31608 }, { "epoch": 3.2481504315659677, "grad_norm": 0.06884342432022095, "learning_rate": 0.01, "loss": 2.0332, "step": 31611 }, { "epoch": 3.24845869297164, "grad_norm": 0.046835094690322876, "learning_rate": 0.01, "loss": 2.0099, "step": 31614 }, { "epoch": 3.248766954377312, "grad_norm": 0.11094243824481964, "learning_rate": 0.01, "loss": 2.0319, "step": 31617 }, { "epoch": 3.249075215782984, "grad_norm": 0.0603213869035244, "learning_rate": 0.01, "loss": 2.0181, "step": 31620 }, { "epoch": 3.249383477188656, "grad_norm": 0.07283317297697067, "learning_rate": 0.01, "loss": 2.0205, "step": 31623 }, { "epoch": 3.249691738594328, "grad_norm": 0.05507282167673111, "learning_rate": 0.01, "loss": 2.0082, "step": 31626 }, { "epoch": 3.25, "grad_norm": 0.039191924035549164, "learning_rate": 0.01, "loss": 1.9936, "step": 31629 }, { "epoch": 3.250308261405672, "grad_norm": 0.1467886120080948, "learning_rate": 0.01, "loss": 2.0235, "step": 31632 }, { "epoch": 3.250616522811344, "grad_norm": 0.06955720484256744, "learning_rate": 0.01, "loss": 2.0168, "step": 31635 }, { "epoch": 3.250924784217016, "grad_norm": 0.0634884312748909, "learning_rate": 0.01, "loss": 2.0123, "step": 31638 }, { "epoch": 3.251233045622688, "grad_norm": 0.035188958048820496, "learning_rate": 0.01, "loss": 2.0061, "step": 31641 }, { "epoch": 3.25154130702836, "grad_norm": 0.07601841539144516, "learning_rate": 0.01, "loss": 2.0046, "step": 31644 }, { "epoch": 3.2518495684340323, "grad_norm": 0.06929823011159897, "learning_rate": 0.01, "loss": 2.0213, "step": 31647 }, { "epoch": 3.252157829839704, "grad_norm": 0.06300003081560135, "learning_rate": 0.01, "loss": 2.0111, "step": 31650 }, { "epoch": 3.2524660912453762, "grad_norm": 0.04881738871335983, "learning_rate": 0.01, "loss": 2.0423, "step": 31653 }, { "epoch": 3.252774352651048, "grad_norm": 0.04084230959415436, "learning_rate": 0.01, "loss": 2.0284, "step": 31656 }, { "epoch": 3.25308261405672, "grad_norm": 0.10158465802669525, "learning_rate": 0.01, "loss": 2.0084, "step": 31659 }, { "epoch": 3.253390875462392, "grad_norm": 0.04703471064567566, "learning_rate": 0.01, "loss": 1.9824, "step": 31662 }, { "epoch": 3.253699136868064, "grad_norm": 0.11601495742797852, "learning_rate": 0.01, "loss": 2.0249, "step": 31665 }, { "epoch": 3.2540073982737363, "grad_norm": 0.037081558257341385, "learning_rate": 0.01, "loss": 2.0104, "step": 31668 }, { "epoch": 3.254315659679408, "grad_norm": 0.03952954337000847, "learning_rate": 0.01, "loss": 2.023, "step": 31671 }, { "epoch": 3.2546239210850803, "grad_norm": 0.09437094628810883, "learning_rate": 0.01, "loss": 2.0528, "step": 31674 }, { "epoch": 3.254932182490752, "grad_norm": 0.04648669436573982, "learning_rate": 0.01, "loss": 1.9822, "step": 31677 }, { "epoch": 3.255240443896424, "grad_norm": 0.06830704212188721, "learning_rate": 0.01, "loss": 2.0029, "step": 31680 }, { "epoch": 3.255548705302096, "grad_norm": 0.039575349539518356, "learning_rate": 0.01, "loss": 2.0177, "step": 31683 }, { "epoch": 3.255856966707768, "grad_norm": 0.046931225806474686, "learning_rate": 0.01, "loss": 2.0081, "step": 31686 }, { "epoch": 3.2561652281134403, "grad_norm": 0.039921220391988754, "learning_rate": 0.01, "loss": 1.9982, "step": 31689 }, { "epoch": 3.256473489519112, "grad_norm": 0.1725001186132431, "learning_rate": 0.01, "loss": 1.9933, "step": 31692 }, { "epoch": 3.2567817509247843, "grad_norm": 0.10149878263473511, "learning_rate": 0.01, "loss": 2.0031, "step": 31695 }, { "epoch": 3.2570900123304565, "grad_norm": 0.0641789436340332, "learning_rate": 0.01, "loss": 2.0298, "step": 31698 }, { "epoch": 3.2573982737361282, "grad_norm": 0.09631607681512833, "learning_rate": 0.01, "loss": 2.0369, "step": 31701 }, { "epoch": 3.2577065351418004, "grad_norm": 0.04941624775528908, "learning_rate": 0.01, "loss": 2.0078, "step": 31704 }, { "epoch": 3.258014796547472, "grad_norm": 0.03704220801591873, "learning_rate": 0.01, "loss": 2.0192, "step": 31707 }, { "epoch": 3.2583230579531444, "grad_norm": 0.05716734007000923, "learning_rate": 0.01, "loss": 2.0117, "step": 31710 }, { "epoch": 3.258631319358816, "grad_norm": 0.04187513142824173, "learning_rate": 0.01, "loss": 2.0298, "step": 31713 }, { "epoch": 3.2589395807644883, "grad_norm": 0.04240023344755173, "learning_rate": 0.01, "loss": 2.0227, "step": 31716 }, { "epoch": 3.2592478421701605, "grad_norm": 0.0434168316423893, "learning_rate": 0.01, "loss": 2.0293, "step": 31719 }, { "epoch": 3.2595561035758323, "grad_norm": 0.10827293992042542, "learning_rate": 0.01, "loss": 2.019, "step": 31722 }, { "epoch": 3.2598643649815044, "grad_norm": 0.04294963926076889, "learning_rate": 0.01, "loss": 2.0289, "step": 31725 }, { "epoch": 3.260172626387176, "grad_norm": 0.11746285110712051, "learning_rate": 0.01, "loss": 2.0168, "step": 31728 }, { "epoch": 3.2604808877928484, "grad_norm": 0.13203038275241852, "learning_rate": 0.01, "loss": 2.0092, "step": 31731 }, { "epoch": 3.26078914919852, "grad_norm": 0.12324203550815582, "learning_rate": 0.01, "loss": 2.0158, "step": 31734 }, { "epoch": 3.2610974106041923, "grad_norm": 0.09455161541700363, "learning_rate": 0.01, "loss": 2.0113, "step": 31737 }, { "epoch": 3.2614056720098645, "grad_norm": 0.0551525354385376, "learning_rate": 0.01, "loss": 2.0042, "step": 31740 }, { "epoch": 3.2617139334155363, "grad_norm": 0.04528261721134186, "learning_rate": 0.01, "loss": 2.0083, "step": 31743 }, { "epoch": 3.2620221948212085, "grad_norm": 0.049497511237859726, "learning_rate": 0.01, "loss": 1.9953, "step": 31746 }, { "epoch": 3.2623304562268802, "grad_norm": 0.04797462373971939, "learning_rate": 0.01, "loss": 1.9974, "step": 31749 }, { "epoch": 3.2626387176325524, "grad_norm": 0.047107502818107605, "learning_rate": 0.01, "loss": 1.99, "step": 31752 }, { "epoch": 3.262946979038224, "grad_norm": 0.03374217078089714, "learning_rate": 0.01, "loss": 1.98, "step": 31755 }, { "epoch": 3.2632552404438964, "grad_norm": 0.05429157614707947, "learning_rate": 0.01, "loss": 2.0089, "step": 31758 }, { "epoch": 3.2635635018495686, "grad_norm": 0.12066765129566193, "learning_rate": 0.01, "loss": 2.0365, "step": 31761 }, { "epoch": 3.2638717632552403, "grad_norm": 0.15858452022075653, "learning_rate": 0.01, "loss": 2.021, "step": 31764 }, { "epoch": 3.2641800246609125, "grad_norm": 0.05770573392510414, "learning_rate": 0.01, "loss": 2.0513, "step": 31767 }, { "epoch": 3.2644882860665847, "grad_norm": 0.053706735372543335, "learning_rate": 0.01, "loss": 1.9939, "step": 31770 }, { "epoch": 3.2647965474722564, "grad_norm": 0.03511708602309227, "learning_rate": 0.01, "loss": 2.0108, "step": 31773 }, { "epoch": 3.2651048088779286, "grad_norm": 0.036902979016304016, "learning_rate": 0.01, "loss": 2.0264, "step": 31776 }, { "epoch": 3.2654130702836004, "grad_norm": 0.0450621172785759, "learning_rate": 0.01, "loss": 2.0203, "step": 31779 }, { "epoch": 3.2657213316892726, "grad_norm": 0.07691927254199982, "learning_rate": 0.01, "loss": 2.0167, "step": 31782 }, { "epoch": 3.2660295930949443, "grad_norm": 0.09160054475069046, "learning_rate": 0.01, "loss": 2.0221, "step": 31785 }, { "epoch": 3.2663378545006165, "grad_norm": 0.09052203595638275, "learning_rate": 0.01, "loss": 2.0185, "step": 31788 }, { "epoch": 3.2666461159062887, "grad_norm": 0.08806884288787842, "learning_rate": 0.01, "loss": 2.0103, "step": 31791 }, { "epoch": 3.2669543773119605, "grad_norm": 0.05619393661618233, "learning_rate": 0.01, "loss": 1.9998, "step": 31794 }, { "epoch": 3.2672626387176327, "grad_norm": 0.05563674122095108, "learning_rate": 0.01, "loss": 2.015, "step": 31797 }, { "epoch": 3.2675709001233044, "grad_norm": 0.058199040591716766, "learning_rate": 0.01, "loss": 2.014, "step": 31800 }, { "epoch": 3.2678791615289766, "grad_norm": 0.06011686101555824, "learning_rate": 0.01, "loss": 2.0103, "step": 31803 }, { "epoch": 3.2681874229346484, "grad_norm": 0.05391063541173935, "learning_rate": 0.01, "loss": 2.0047, "step": 31806 }, { "epoch": 3.2684956843403206, "grad_norm": 0.1030382513999939, "learning_rate": 0.01, "loss": 2.0132, "step": 31809 }, { "epoch": 3.2688039457459928, "grad_norm": 0.03978987783193588, "learning_rate": 0.01, "loss": 2.0099, "step": 31812 }, { "epoch": 3.2691122071516645, "grad_norm": 0.08974360674619675, "learning_rate": 0.01, "loss": 1.9975, "step": 31815 }, { "epoch": 3.2694204685573367, "grad_norm": 0.07530324161052704, "learning_rate": 0.01, "loss": 2.0224, "step": 31818 }, { "epoch": 3.2697287299630085, "grad_norm": 0.04715275764465332, "learning_rate": 0.01, "loss": 2.0011, "step": 31821 }, { "epoch": 3.2700369913686806, "grad_norm": 0.050625238567590714, "learning_rate": 0.01, "loss": 1.9988, "step": 31824 }, { "epoch": 3.270345252774353, "grad_norm": 0.05567210912704468, "learning_rate": 0.01, "loss": 2.0554, "step": 31827 }, { "epoch": 3.2706535141800246, "grad_norm": 0.09813915193080902, "learning_rate": 0.01, "loss": 1.9994, "step": 31830 }, { "epoch": 3.270961775585697, "grad_norm": 0.08543343842029572, "learning_rate": 0.01, "loss": 2.0405, "step": 31833 }, { "epoch": 3.2712700369913685, "grad_norm": 0.03340763971209526, "learning_rate": 0.01, "loss": 2.0302, "step": 31836 }, { "epoch": 3.2715782983970407, "grad_norm": 0.0335637666285038, "learning_rate": 0.01, "loss": 2.0286, "step": 31839 }, { "epoch": 3.271886559802713, "grad_norm": 0.06983290612697601, "learning_rate": 0.01, "loss": 2.0155, "step": 31842 }, { "epoch": 3.2721948212083847, "grad_norm": 0.1157732903957367, "learning_rate": 0.01, "loss": 2.0133, "step": 31845 }, { "epoch": 3.272503082614057, "grad_norm": 0.11864369362592697, "learning_rate": 0.01, "loss": 2.0367, "step": 31848 }, { "epoch": 3.2728113440197286, "grad_norm": 0.09083148092031479, "learning_rate": 0.01, "loss": 2.025, "step": 31851 }, { "epoch": 3.273119605425401, "grad_norm": 0.06563573330640793, "learning_rate": 0.01, "loss": 2.0175, "step": 31854 }, { "epoch": 3.2734278668310726, "grad_norm": 0.043879635632038116, "learning_rate": 0.01, "loss": 2.0288, "step": 31857 }, { "epoch": 3.2737361282367448, "grad_norm": 0.03548846393823624, "learning_rate": 0.01, "loss": 2.0246, "step": 31860 }, { "epoch": 3.274044389642417, "grad_norm": 0.036084435880184174, "learning_rate": 0.01, "loss": 2.0007, "step": 31863 }, { "epoch": 3.2743526510480887, "grad_norm": 0.03813619166612625, "learning_rate": 0.01, "loss": 2.0024, "step": 31866 }, { "epoch": 3.274660912453761, "grad_norm": 0.05276734009385109, "learning_rate": 0.01, "loss": 1.9945, "step": 31869 }, { "epoch": 3.2749691738594326, "grad_norm": 0.07557803392410278, "learning_rate": 0.01, "loss": 2.0038, "step": 31872 }, { "epoch": 3.275277435265105, "grad_norm": 0.060760460793972015, "learning_rate": 0.01, "loss": 2.0283, "step": 31875 }, { "epoch": 3.2755856966707766, "grad_norm": 0.11498884111642838, "learning_rate": 0.01, "loss": 2.0256, "step": 31878 }, { "epoch": 3.275893958076449, "grad_norm": 0.12193593382835388, "learning_rate": 0.01, "loss": 1.9791, "step": 31881 }, { "epoch": 3.276202219482121, "grad_norm": 0.10637890547513962, "learning_rate": 0.01, "loss": 2.0055, "step": 31884 }, { "epoch": 3.2765104808877927, "grad_norm": 0.08633279800415039, "learning_rate": 0.01, "loss": 2.0151, "step": 31887 }, { "epoch": 3.276818742293465, "grad_norm": 0.08105628192424774, "learning_rate": 0.01, "loss": 2.0124, "step": 31890 }, { "epoch": 3.2771270036991367, "grad_norm": 0.03692932799458504, "learning_rate": 0.01, "loss": 2.0134, "step": 31893 }, { "epoch": 3.277435265104809, "grad_norm": 0.03064770996570587, "learning_rate": 0.01, "loss": 2.0088, "step": 31896 }, { "epoch": 3.277743526510481, "grad_norm": 0.038218267261981964, "learning_rate": 0.01, "loss": 2.0045, "step": 31899 }, { "epoch": 3.278051787916153, "grad_norm": 0.05573924258351326, "learning_rate": 0.01, "loss": 2.0289, "step": 31902 }, { "epoch": 3.278360049321825, "grad_norm": 0.14293062686920166, "learning_rate": 0.01, "loss": 2.0147, "step": 31905 }, { "epoch": 3.2786683107274968, "grad_norm": 0.082049660384655, "learning_rate": 0.01, "loss": 2.0159, "step": 31908 }, { "epoch": 3.278976572133169, "grad_norm": 0.08122528344392776, "learning_rate": 0.01, "loss": 2.0043, "step": 31911 }, { "epoch": 3.279284833538841, "grad_norm": 0.054671067744493484, "learning_rate": 0.01, "loss": 2.0054, "step": 31914 }, { "epoch": 3.279593094944513, "grad_norm": 0.11112997680902481, "learning_rate": 0.01, "loss": 1.9826, "step": 31917 }, { "epoch": 3.279901356350185, "grad_norm": 0.12811101973056793, "learning_rate": 0.01, "loss": 2.0242, "step": 31920 }, { "epoch": 3.280209617755857, "grad_norm": 0.07637016475200653, "learning_rate": 0.01, "loss": 1.9879, "step": 31923 }, { "epoch": 3.280517879161529, "grad_norm": 0.03716239705681801, "learning_rate": 0.01, "loss": 2.0055, "step": 31926 }, { "epoch": 3.280826140567201, "grad_norm": 0.03950963541865349, "learning_rate": 0.01, "loss": 1.9905, "step": 31929 }, { "epoch": 3.281134401972873, "grad_norm": 0.09012940526008606, "learning_rate": 0.01, "loss": 2.0422, "step": 31932 }, { "epoch": 3.281442663378545, "grad_norm": 0.04000500217080116, "learning_rate": 0.01, "loss": 2.0054, "step": 31935 }, { "epoch": 3.281750924784217, "grad_norm": 0.06817129254341125, "learning_rate": 0.01, "loss": 2.0131, "step": 31938 }, { "epoch": 3.282059186189889, "grad_norm": 0.14285409450531006, "learning_rate": 0.01, "loss": 2.0235, "step": 31941 }, { "epoch": 3.282367447595561, "grad_norm": 0.03885696083307266, "learning_rate": 0.01, "loss": 2.007, "step": 31944 }, { "epoch": 3.282675709001233, "grad_norm": 0.047379713505506516, "learning_rate": 0.01, "loss": 1.9929, "step": 31947 }, { "epoch": 3.282983970406905, "grad_norm": 0.05476104095578194, "learning_rate": 0.01, "loss": 2.0199, "step": 31950 }, { "epoch": 3.283292231812577, "grad_norm": 0.03158089146018028, "learning_rate": 0.01, "loss": 2.0053, "step": 31953 }, { "epoch": 3.283600493218249, "grad_norm": 0.04537857696413994, "learning_rate": 0.01, "loss": 1.991, "step": 31956 }, { "epoch": 3.283908754623921, "grad_norm": 0.1525343656539917, "learning_rate": 0.01, "loss": 1.9972, "step": 31959 }, { "epoch": 3.284217016029593, "grad_norm": 0.11916167289018631, "learning_rate": 0.01, "loss": 2.0089, "step": 31962 }, { "epoch": 3.2845252774352653, "grad_norm": 0.09215250611305237, "learning_rate": 0.01, "loss": 1.9746, "step": 31965 }, { "epoch": 3.284833538840937, "grad_norm": 0.053620483726263046, "learning_rate": 0.01, "loss": 2.0148, "step": 31968 }, { "epoch": 3.2851418002466093, "grad_norm": 0.03575912117958069, "learning_rate": 0.01, "loss": 2.0237, "step": 31971 }, { "epoch": 3.285450061652281, "grad_norm": 0.09148744493722916, "learning_rate": 0.01, "loss": 2.0179, "step": 31974 }, { "epoch": 3.2857583230579532, "grad_norm": 0.06433127075433731, "learning_rate": 0.01, "loss": 2.0172, "step": 31977 }, { "epoch": 3.286066584463625, "grad_norm": 0.04605916887521744, "learning_rate": 0.01, "loss": 1.9956, "step": 31980 }, { "epoch": 3.286374845869297, "grad_norm": 0.07150716334581375, "learning_rate": 0.01, "loss": 2.0251, "step": 31983 }, { "epoch": 3.2866831072749694, "grad_norm": 0.04914524033665657, "learning_rate": 0.01, "loss": 2.0002, "step": 31986 }, { "epoch": 3.286991368680641, "grad_norm": 0.10281821340322495, "learning_rate": 0.01, "loss": 2.0137, "step": 31989 }, { "epoch": 3.2872996300863133, "grad_norm": 0.04685597866773605, "learning_rate": 0.01, "loss": 2.0283, "step": 31992 }, { "epoch": 3.287607891491985, "grad_norm": 0.04735150560736656, "learning_rate": 0.01, "loss": 2.005, "step": 31995 }, { "epoch": 3.2879161528976573, "grad_norm": 0.07897822558879852, "learning_rate": 0.01, "loss": 2.0123, "step": 31998 }, { "epoch": 3.288224414303329, "grad_norm": 0.08091110736131668, "learning_rate": 0.01, "loss": 2.0397, "step": 32001 }, { "epoch": 3.288532675709001, "grad_norm": 0.038828156888484955, "learning_rate": 0.01, "loss": 2.0245, "step": 32004 }, { "epoch": 3.2888409371146734, "grad_norm": 0.11410044133663177, "learning_rate": 0.01, "loss": 2.0081, "step": 32007 }, { "epoch": 3.289149198520345, "grad_norm": 0.11741339415311813, "learning_rate": 0.01, "loss": 2.0055, "step": 32010 }, { "epoch": 3.2894574599260173, "grad_norm": 0.054089032113552094, "learning_rate": 0.01, "loss": 2.0179, "step": 32013 }, { "epoch": 3.289765721331689, "grad_norm": 0.034770797938108444, "learning_rate": 0.01, "loss": 2.0148, "step": 32016 }, { "epoch": 3.2900739827373613, "grad_norm": 0.06313812732696533, "learning_rate": 0.01, "loss": 2.0294, "step": 32019 }, { "epoch": 3.2903822441430335, "grad_norm": 0.05844837799668312, "learning_rate": 0.01, "loss": 2.0227, "step": 32022 }, { "epoch": 3.2906905055487052, "grad_norm": 0.0374666191637516, "learning_rate": 0.01, "loss": 2.0342, "step": 32025 }, { "epoch": 3.2909987669543774, "grad_norm": 0.05456427484750748, "learning_rate": 0.01, "loss": 1.9794, "step": 32028 }, { "epoch": 3.291307028360049, "grad_norm": 0.11318197846412659, "learning_rate": 0.01, "loss": 1.9995, "step": 32031 }, { "epoch": 3.2916152897657214, "grad_norm": 0.04832073673605919, "learning_rate": 0.01, "loss": 2.0108, "step": 32034 }, { "epoch": 3.2919235511713936, "grad_norm": 0.07790713757276535, "learning_rate": 0.01, "loss": 1.9893, "step": 32037 }, { "epoch": 3.2922318125770653, "grad_norm": 0.05794338509440422, "learning_rate": 0.01, "loss": 2.0029, "step": 32040 }, { "epoch": 3.2925400739827375, "grad_norm": 0.06488461047410965, "learning_rate": 0.01, "loss": 1.9965, "step": 32043 }, { "epoch": 3.2928483353884093, "grad_norm": 0.10804028809070587, "learning_rate": 0.01, "loss": 1.9938, "step": 32046 }, { "epoch": 3.2931565967940815, "grad_norm": 0.04927225038409233, "learning_rate": 0.01, "loss": 2.0111, "step": 32049 }, { "epoch": 3.293464858199753, "grad_norm": 0.039539139717817307, "learning_rate": 0.01, "loss": 2.0129, "step": 32052 }, { "epoch": 3.2937731196054254, "grad_norm": 0.046698734164237976, "learning_rate": 0.01, "loss": 2.0149, "step": 32055 }, { "epoch": 3.2940813810110976, "grad_norm": 0.07978003472089767, "learning_rate": 0.01, "loss": 2.0001, "step": 32058 }, { "epoch": 3.2943896424167693, "grad_norm": 0.05909251049160957, "learning_rate": 0.01, "loss": 2.018, "step": 32061 }, { "epoch": 3.2946979038224415, "grad_norm": 0.09814689308404922, "learning_rate": 0.01, "loss": 1.9935, "step": 32064 }, { "epoch": 3.2950061652281133, "grad_norm": 0.07647134363651276, "learning_rate": 0.01, "loss": 2.0316, "step": 32067 }, { "epoch": 3.2953144266337855, "grad_norm": 0.09203072637319565, "learning_rate": 0.01, "loss": 2.0075, "step": 32070 }, { "epoch": 3.2956226880394572, "grad_norm": 0.14764010906219482, "learning_rate": 0.01, "loss": 2.0052, "step": 32073 }, { "epoch": 3.2959309494451294, "grad_norm": 0.07483326643705368, "learning_rate": 0.01, "loss": 2.0065, "step": 32076 }, { "epoch": 3.2962392108508016, "grad_norm": 0.03683464601635933, "learning_rate": 0.01, "loss": 2.0092, "step": 32079 }, { "epoch": 3.2965474722564734, "grad_norm": 0.0658450499176979, "learning_rate": 0.01, "loss": 2.0134, "step": 32082 }, { "epoch": 3.2968557336621456, "grad_norm": 0.05505736172199249, "learning_rate": 0.01, "loss": 1.9978, "step": 32085 }, { "epoch": 3.2971639950678173, "grad_norm": 0.07295443117618561, "learning_rate": 0.01, "loss": 2.0444, "step": 32088 }, { "epoch": 3.2974722564734895, "grad_norm": 0.03865521401166916, "learning_rate": 0.01, "loss": 2.0084, "step": 32091 }, { "epoch": 3.2977805178791617, "grad_norm": 0.12185568362474442, "learning_rate": 0.01, "loss": 2.0011, "step": 32094 }, { "epoch": 3.2980887792848335, "grad_norm": 0.04646170511841774, "learning_rate": 0.01, "loss": 1.9925, "step": 32097 }, { "epoch": 3.2983970406905057, "grad_norm": 0.10177022218704224, "learning_rate": 0.01, "loss": 2.0001, "step": 32100 }, { "epoch": 3.2987053020961774, "grad_norm": 0.04585393890738487, "learning_rate": 0.01, "loss": 2.0032, "step": 32103 }, { "epoch": 3.2990135635018496, "grad_norm": 0.1089714989066124, "learning_rate": 0.01, "loss": 1.9943, "step": 32106 }, { "epoch": 3.299321824907522, "grad_norm": 0.04579438269138336, "learning_rate": 0.01, "loss": 2.0241, "step": 32109 }, { "epoch": 3.2996300863131935, "grad_norm": 0.0699036568403244, "learning_rate": 0.01, "loss": 2.025, "step": 32112 }, { "epoch": 3.2999383477188657, "grad_norm": 0.10070015490055084, "learning_rate": 0.01, "loss": 2.0037, "step": 32115 }, { "epoch": 3.3002466091245375, "grad_norm": 0.08273176848888397, "learning_rate": 0.01, "loss": 2.0368, "step": 32118 }, { "epoch": 3.3005548705302097, "grad_norm": 0.08648907393217087, "learning_rate": 0.01, "loss": 2.0069, "step": 32121 }, { "epoch": 3.3008631319358814, "grad_norm": 0.059946708381175995, "learning_rate": 0.01, "loss": 2.0023, "step": 32124 }, { "epoch": 3.3011713933415536, "grad_norm": 0.034971099346876144, "learning_rate": 0.01, "loss": 1.9999, "step": 32127 }, { "epoch": 3.301479654747226, "grad_norm": 0.12256456911563873, "learning_rate": 0.01, "loss": 1.9763, "step": 32130 }, { "epoch": 3.3017879161528976, "grad_norm": 0.041138794273138046, "learning_rate": 0.01, "loss": 2.0417, "step": 32133 }, { "epoch": 3.3020961775585698, "grad_norm": 0.04145865887403488, "learning_rate": 0.01, "loss": 1.9986, "step": 32136 }, { "epoch": 3.3024044389642415, "grad_norm": 0.07052136212587357, "learning_rate": 0.01, "loss": 1.9986, "step": 32139 }, { "epoch": 3.3027127003699137, "grad_norm": 0.05553466081619263, "learning_rate": 0.01, "loss": 1.9964, "step": 32142 }, { "epoch": 3.3030209617755855, "grad_norm": 0.038209814578294754, "learning_rate": 0.01, "loss": 2.0056, "step": 32145 }, { "epoch": 3.3033292231812577, "grad_norm": 0.04882222041487694, "learning_rate": 0.01, "loss": 2.019, "step": 32148 }, { "epoch": 3.30363748458693, "grad_norm": 0.11157884448766708, "learning_rate": 0.01, "loss": 2.0012, "step": 32151 }, { "epoch": 3.3039457459926016, "grad_norm": 0.09312507510185242, "learning_rate": 0.01, "loss": 2.0221, "step": 32154 }, { "epoch": 3.304254007398274, "grad_norm": 0.05357594043016434, "learning_rate": 0.01, "loss": 1.9986, "step": 32157 }, { "epoch": 3.304562268803946, "grad_norm": 0.07024755328893661, "learning_rate": 0.01, "loss": 2.0032, "step": 32160 }, { "epoch": 3.3048705302096177, "grad_norm": 0.0773010402917862, "learning_rate": 0.01, "loss": 2.0273, "step": 32163 }, { "epoch": 3.30517879161529, "grad_norm": 0.04807543754577637, "learning_rate": 0.01, "loss": 2.0101, "step": 32166 }, { "epoch": 3.3054870530209617, "grad_norm": 0.03652816265821457, "learning_rate": 0.01, "loss": 1.9997, "step": 32169 }, { "epoch": 3.305795314426634, "grad_norm": 0.06250651925802231, "learning_rate": 0.01, "loss": 2.0255, "step": 32172 }, { "epoch": 3.3061035758323056, "grad_norm": 0.08029799908399582, "learning_rate": 0.01, "loss": 2.0066, "step": 32175 }, { "epoch": 3.306411837237978, "grad_norm": 0.10732737928628922, "learning_rate": 0.01, "loss": 1.9864, "step": 32178 }, { "epoch": 3.30672009864365, "grad_norm": 0.07458434998989105, "learning_rate": 0.01, "loss": 2.0043, "step": 32181 }, { "epoch": 3.3070283600493218, "grad_norm": 0.0794452428817749, "learning_rate": 0.01, "loss": 2.0239, "step": 32184 }, { "epoch": 3.307336621454994, "grad_norm": 0.06960796564817429, "learning_rate": 0.01, "loss": 2.0533, "step": 32187 }, { "epoch": 3.3076448828606657, "grad_norm": 0.09482841193675995, "learning_rate": 0.01, "loss": 1.9819, "step": 32190 }, { "epoch": 3.307953144266338, "grad_norm": 0.08358022570610046, "learning_rate": 0.01, "loss": 1.9936, "step": 32193 }, { "epoch": 3.3082614056720097, "grad_norm": 0.03823438659310341, "learning_rate": 0.01, "loss": 1.9934, "step": 32196 }, { "epoch": 3.308569667077682, "grad_norm": 0.07413027435541153, "learning_rate": 0.01, "loss": 2.0097, "step": 32199 }, { "epoch": 3.308877928483354, "grad_norm": 0.03165535256266594, "learning_rate": 0.01, "loss": 2.0157, "step": 32202 }, { "epoch": 3.309186189889026, "grad_norm": 0.1173541322350502, "learning_rate": 0.01, "loss": 2.0229, "step": 32205 }, { "epoch": 3.309494451294698, "grad_norm": 0.07542740553617477, "learning_rate": 0.01, "loss": 1.9974, "step": 32208 }, { "epoch": 3.3098027127003697, "grad_norm": 0.104609914124012, "learning_rate": 0.01, "loss": 2.0312, "step": 32211 }, { "epoch": 3.310110974106042, "grad_norm": 0.0530179999768734, "learning_rate": 0.01, "loss": 1.9999, "step": 32214 }, { "epoch": 3.3104192355117137, "grad_norm": 0.070836141705513, "learning_rate": 0.01, "loss": 1.992, "step": 32217 }, { "epoch": 3.310727496917386, "grad_norm": 0.04056829586625099, "learning_rate": 0.01, "loss": 2.0202, "step": 32220 }, { "epoch": 3.311035758323058, "grad_norm": 0.10000187903642654, "learning_rate": 0.01, "loss": 2.0125, "step": 32223 }, { "epoch": 3.31134401972873, "grad_norm": 0.04653482139110565, "learning_rate": 0.01, "loss": 1.9988, "step": 32226 }, { "epoch": 3.311652281134402, "grad_norm": 0.10693076252937317, "learning_rate": 0.01, "loss": 1.9757, "step": 32229 }, { "epoch": 3.311960542540074, "grad_norm": 0.055378034710884094, "learning_rate": 0.01, "loss": 2.021, "step": 32232 }, { "epoch": 3.312268803945746, "grad_norm": 0.037242140620946884, "learning_rate": 0.01, "loss": 2.0367, "step": 32235 }, { "epoch": 3.312577065351418, "grad_norm": 0.04497090354561806, "learning_rate": 0.01, "loss": 1.997, "step": 32238 }, { "epoch": 3.31288532675709, "grad_norm": 0.048384904861450195, "learning_rate": 0.01, "loss": 1.9945, "step": 32241 }, { "epoch": 3.313193588162762, "grad_norm": 0.05588208884000778, "learning_rate": 0.01, "loss": 2.0375, "step": 32244 }, { "epoch": 3.313501849568434, "grad_norm": 0.05643755942583084, "learning_rate": 0.01, "loss": 2.0384, "step": 32247 }, { "epoch": 3.313810110974106, "grad_norm": 0.03415621444582939, "learning_rate": 0.01, "loss": 2.0217, "step": 32250 }, { "epoch": 3.3141183723797782, "grad_norm": 0.03537356108427048, "learning_rate": 0.01, "loss": 2.0046, "step": 32253 }, { "epoch": 3.31442663378545, "grad_norm": 0.1299961805343628, "learning_rate": 0.01, "loss": 1.9806, "step": 32256 }, { "epoch": 3.314734895191122, "grad_norm": 0.05058746412396431, "learning_rate": 0.01, "loss": 2.0076, "step": 32259 }, { "epoch": 3.315043156596794, "grad_norm": 0.06381676346063614, "learning_rate": 0.01, "loss": 2.0156, "step": 32262 }, { "epoch": 3.315351418002466, "grad_norm": 0.039552103728055954, "learning_rate": 0.01, "loss": 1.9853, "step": 32265 }, { "epoch": 3.315659679408138, "grad_norm": 0.03872091323137283, "learning_rate": 0.01, "loss": 2.0174, "step": 32268 }, { "epoch": 3.31596794081381, "grad_norm": 0.04546678811311722, "learning_rate": 0.01, "loss": 2.0176, "step": 32271 }, { "epoch": 3.3162762022194823, "grad_norm": 0.04541923850774765, "learning_rate": 0.01, "loss": 2.023, "step": 32274 }, { "epoch": 3.316584463625154, "grad_norm": 0.04341261461377144, "learning_rate": 0.01, "loss": 2.0179, "step": 32277 }, { "epoch": 3.316892725030826, "grad_norm": 0.10473504662513733, "learning_rate": 0.01, "loss": 2.0263, "step": 32280 }, { "epoch": 3.317200986436498, "grad_norm": 0.0668938085436821, "learning_rate": 0.01, "loss": 2.0417, "step": 32283 }, { "epoch": 3.31750924784217, "grad_norm": 0.12829263508319855, "learning_rate": 0.01, "loss": 2.022, "step": 32286 }, { "epoch": 3.3178175092478424, "grad_norm": 0.05443095043301582, "learning_rate": 0.01, "loss": 2.0101, "step": 32289 }, { "epoch": 3.318125770653514, "grad_norm": 0.06268401443958282, "learning_rate": 0.01, "loss": 2.0178, "step": 32292 }, { "epoch": 3.3184340320591863, "grad_norm": 0.05700231343507767, "learning_rate": 0.01, "loss": 2.0203, "step": 32295 }, { "epoch": 3.318742293464858, "grad_norm": 0.07467647641897202, "learning_rate": 0.01, "loss": 2.0031, "step": 32298 }, { "epoch": 3.3190505548705302, "grad_norm": 0.0675196647644043, "learning_rate": 0.01, "loss": 2.013, "step": 32301 }, { "epoch": 3.3193588162762024, "grad_norm": 0.044399552047252655, "learning_rate": 0.01, "loss": 1.993, "step": 32304 }, { "epoch": 3.319667077681874, "grad_norm": 0.0451500304043293, "learning_rate": 0.01, "loss": 2.0158, "step": 32307 }, { "epoch": 3.3199753390875464, "grad_norm": 0.041543807834386826, "learning_rate": 0.01, "loss": 2.0157, "step": 32310 }, { "epoch": 3.320283600493218, "grad_norm": 0.08818163722753525, "learning_rate": 0.01, "loss": 1.9928, "step": 32313 }, { "epoch": 3.3205918618988903, "grad_norm": 0.05897468701004982, "learning_rate": 0.01, "loss": 1.999, "step": 32316 }, { "epoch": 3.320900123304562, "grad_norm": 0.10485360026359558, "learning_rate": 0.01, "loss": 2.0178, "step": 32319 }, { "epoch": 3.3212083847102343, "grad_norm": 0.048387110233306885, "learning_rate": 0.01, "loss": 2.021, "step": 32322 }, { "epoch": 3.3215166461159065, "grad_norm": 0.07535526156425476, "learning_rate": 0.01, "loss": 2.0228, "step": 32325 }, { "epoch": 3.321824907521578, "grad_norm": 0.04594804719090462, "learning_rate": 0.01, "loss": 2.0271, "step": 32328 }, { "epoch": 3.3221331689272504, "grad_norm": 0.03757678344845772, "learning_rate": 0.01, "loss": 2.0177, "step": 32331 }, { "epoch": 3.322441430332922, "grad_norm": 0.046382974833250046, "learning_rate": 0.01, "loss": 2.021, "step": 32334 }, { "epoch": 3.3227496917385944, "grad_norm": 0.03983695060014725, "learning_rate": 0.01, "loss": 1.9877, "step": 32337 }, { "epoch": 3.323057953144266, "grad_norm": 0.0615588016808033, "learning_rate": 0.01, "loss": 2.0374, "step": 32340 }, { "epoch": 3.3233662145499383, "grad_norm": 0.10143701732158661, "learning_rate": 0.01, "loss": 2.0262, "step": 32343 }, { "epoch": 3.3236744759556105, "grad_norm": 0.06885727494955063, "learning_rate": 0.01, "loss": 2.0055, "step": 32346 }, { "epoch": 3.3239827373612822, "grad_norm": 0.10029948502779007, "learning_rate": 0.01, "loss": 2.0224, "step": 32349 }, { "epoch": 3.3242909987669544, "grad_norm": 0.08583710342645645, "learning_rate": 0.01, "loss": 1.988, "step": 32352 }, { "epoch": 3.3245992601726266, "grad_norm": 0.08596043288707733, "learning_rate": 0.01, "loss": 2.0065, "step": 32355 }, { "epoch": 3.3249075215782984, "grad_norm": 0.0431043840944767, "learning_rate": 0.01, "loss": 2.0107, "step": 32358 }, { "epoch": 3.3252157829839706, "grad_norm": 0.044695861637592316, "learning_rate": 0.01, "loss": 1.9898, "step": 32361 }, { "epoch": 3.3255240443896423, "grad_norm": 0.03210937976837158, "learning_rate": 0.01, "loss": 2.0058, "step": 32364 }, { "epoch": 3.3258323057953145, "grad_norm": 0.03838266804814339, "learning_rate": 0.01, "loss": 2.0145, "step": 32367 }, { "epoch": 3.3261405672009863, "grad_norm": 0.05611315369606018, "learning_rate": 0.01, "loss": 2.0045, "step": 32370 }, { "epoch": 3.3264488286066585, "grad_norm": 0.10025975853204727, "learning_rate": 0.01, "loss": 2.0047, "step": 32373 }, { "epoch": 3.3267570900123307, "grad_norm": 0.10004039853811264, "learning_rate": 0.01, "loss": 2.0171, "step": 32376 }, { "epoch": 3.3270653514180024, "grad_norm": 0.0655856728553772, "learning_rate": 0.01, "loss": 2.0079, "step": 32379 }, { "epoch": 3.3273736128236746, "grad_norm": 0.09608997404575348, "learning_rate": 0.01, "loss": 2.0069, "step": 32382 }, { "epoch": 3.3276818742293464, "grad_norm": 0.14821460843086243, "learning_rate": 0.01, "loss": 2.0107, "step": 32385 }, { "epoch": 3.3279901356350186, "grad_norm": 0.05276164412498474, "learning_rate": 0.01, "loss": 2.0369, "step": 32388 }, { "epoch": 3.3282983970406903, "grad_norm": 0.049289003014564514, "learning_rate": 0.01, "loss": 1.9977, "step": 32391 }, { "epoch": 3.3286066584463625, "grad_norm": 0.05115703493356705, "learning_rate": 0.01, "loss": 2.004, "step": 32394 }, { "epoch": 3.3289149198520347, "grad_norm": 0.04041285440325737, "learning_rate": 0.01, "loss": 2.0262, "step": 32397 }, { "epoch": 3.3292231812577064, "grad_norm": 0.04861520603299141, "learning_rate": 0.01, "loss": 1.9955, "step": 32400 }, { "epoch": 3.3295314426633786, "grad_norm": 0.1111973226070404, "learning_rate": 0.01, "loss": 2.0101, "step": 32403 }, { "epoch": 3.3298397040690504, "grad_norm": 0.061960045248270035, "learning_rate": 0.01, "loss": 1.9887, "step": 32406 }, { "epoch": 3.3301479654747226, "grad_norm": 0.1108783558011055, "learning_rate": 0.01, "loss": 2.0207, "step": 32409 }, { "epoch": 3.3304562268803943, "grad_norm": 0.06897444278001785, "learning_rate": 0.01, "loss": 2.0321, "step": 32412 }, { "epoch": 3.3307644882860665, "grad_norm": 0.046277862042188644, "learning_rate": 0.01, "loss": 2.0018, "step": 32415 }, { "epoch": 3.3310727496917387, "grad_norm": 0.09898782521486282, "learning_rate": 0.01, "loss": 2.0146, "step": 32418 }, { "epoch": 3.3313810110974105, "grad_norm": 0.059529174119234085, "learning_rate": 0.01, "loss": 1.9783, "step": 32421 }, { "epoch": 3.3316892725030827, "grad_norm": 0.08007462322711945, "learning_rate": 0.01, "loss": 2.0324, "step": 32424 }, { "epoch": 3.331997533908755, "grad_norm": 0.07130561769008636, "learning_rate": 0.01, "loss": 2.0061, "step": 32427 }, { "epoch": 3.3323057953144266, "grad_norm": 0.04967787116765976, "learning_rate": 0.01, "loss": 2.0259, "step": 32430 }, { "epoch": 3.332614056720099, "grad_norm": 0.08194708079099655, "learning_rate": 0.01, "loss": 2.0137, "step": 32433 }, { "epoch": 3.3329223181257706, "grad_norm": 0.056519269943237305, "learning_rate": 0.01, "loss": 1.9917, "step": 32436 }, { "epoch": 3.3332305795314427, "grad_norm": 0.08086001873016357, "learning_rate": 0.01, "loss": 2.021, "step": 32439 }, { "epoch": 3.3335388409371145, "grad_norm": 0.04036881402134895, "learning_rate": 0.01, "loss": 2.0205, "step": 32442 }, { "epoch": 3.3338471023427867, "grad_norm": 0.04373360425233841, "learning_rate": 0.01, "loss": 2.0214, "step": 32445 }, { "epoch": 3.334155363748459, "grad_norm": 0.04562424495816231, "learning_rate": 0.01, "loss": 2.0093, "step": 32448 }, { "epoch": 3.3344636251541306, "grad_norm": 0.2069234549999237, "learning_rate": 0.01, "loss": 1.9904, "step": 32451 }, { "epoch": 3.334771886559803, "grad_norm": 0.08092590421438217, "learning_rate": 0.01, "loss": 2.0097, "step": 32454 }, { "epoch": 3.3350801479654746, "grad_norm": 0.059557970613241196, "learning_rate": 0.01, "loss": 2.0523, "step": 32457 }, { "epoch": 3.335388409371147, "grad_norm": 0.039045874029397964, "learning_rate": 0.01, "loss": 1.9891, "step": 32460 }, { "epoch": 3.3356966707768185, "grad_norm": 0.03718112036585808, "learning_rate": 0.01, "loss": 2.0099, "step": 32463 }, { "epoch": 3.3360049321824907, "grad_norm": 0.05162828043103218, "learning_rate": 0.01, "loss": 2.0151, "step": 32466 }, { "epoch": 3.336313193588163, "grad_norm": 0.05825696140527725, "learning_rate": 0.01, "loss": 2.0252, "step": 32469 }, { "epoch": 3.3366214549938347, "grad_norm": 0.0458202064037323, "learning_rate": 0.01, "loss": 2.0273, "step": 32472 }, { "epoch": 3.336929716399507, "grad_norm": 0.07931084930896759, "learning_rate": 0.01, "loss": 2.0026, "step": 32475 }, { "epoch": 3.3372379778051786, "grad_norm": 0.05673946067690849, "learning_rate": 0.01, "loss": 1.998, "step": 32478 }, { "epoch": 3.337546239210851, "grad_norm": 0.05536726489663124, "learning_rate": 0.01, "loss": 1.9994, "step": 32481 }, { "epoch": 3.337854500616523, "grad_norm": 0.053581688553094864, "learning_rate": 0.01, "loss": 1.9913, "step": 32484 }, { "epoch": 3.3381627620221948, "grad_norm": 0.057449061423540115, "learning_rate": 0.01, "loss": 1.9973, "step": 32487 }, { "epoch": 3.338471023427867, "grad_norm": 0.0768120214343071, "learning_rate": 0.01, "loss": 2.0294, "step": 32490 }, { "epoch": 3.3387792848335387, "grad_norm": 0.1322673261165619, "learning_rate": 0.01, "loss": 1.9827, "step": 32493 }, { "epoch": 3.339087546239211, "grad_norm": 0.10641443729400635, "learning_rate": 0.01, "loss": 1.9858, "step": 32496 }, { "epoch": 3.339395807644883, "grad_norm": 0.13880647718906403, "learning_rate": 0.01, "loss": 1.979, "step": 32499 }, { "epoch": 3.339704069050555, "grad_norm": 0.04653307795524597, "learning_rate": 0.01, "loss": 2.0206, "step": 32502 }, { "epoch": 3.340012330456227, "grad_norm": 0.05031618848443031, "learning_rate": 0.01, "loss": 2.0221, "step": 32505 }, { "epoch": 3.340320591861899, "grad_norm": 0.042637672275304794, "learning_rate": 0.01, "loss": 1.9986, "step": 32508 }, { "epoch": 3.340628853267571, "grad_norm": 0.03557129204273224, "learning_rate": 0.01, "loss": 2.0186, "step": 32511 }, { "epoch": 3.3409371146732427, "grad_norm": 0.04271169379353523, "learning_rate": 0.01, "loss": 1.9907, "step": 32514 }, { "epoch": 3.341245376078915, "grad_norm": 0.06750103831291199, "learning_rate": 0.01, "loss": 2.0013, "step": 32517 }, { "epoch": 3.341553637484587, "grad_norm": 0.05727256461977959, "learning_rate": 0.01, "loss": 2.0192, "step": 32520 }, { "epoch": 3.341861898890259, "grad_norm": 0.05737854540348053, "learning_rate": 0.01, "loss": 2.0194, "step": 32523 }, { "epoch": 3.342170160295931, "grad_norm": 0.04525689780712128, "learning_rate": 0.01, "loss": 1.9733, "step": 32526 }, { "epoch": 3.342478421701603, "grad_norm": 0.038834329694509506, "learning_rate": 0.01, "loss": 2.0287, "step": 32529 }, { "epoch": 3.342786683107275, "grad_norm": 0.14812909066677094, "learning_rate": 0.01, "loss": 1.9926, "step": 32532 }, { "epoch": 3.3430949445129468, "grad_norm": 0.09346423298120499, "learning_rate": 0.01, "loss": 1.9866, "step": 32535 }, { "epoch": 3.343403205918619, "grad_norm": 0.03221321851015091, "learning_rate": 0.01, "loss": 2.0114, "step": 32538 }, { "epoch": 3.343711467324291, "grad_norm": 0.05457564815878868, "learning_rate": 0.01, "loss": 2.0044, "step": 32541 }, { "epoch": 3.344019728729963, "grad_norm": 0.05203322321176529, "learning_rate": 0.01, "loss": 1.9944, "step": 32544 }, { "epoch": 3.344327990135635, "grad_norm": 0.05318872258067131, "learning_rate": 0.01, "loss": 2.0199, "step": 32547 }, { "epoch": 3.344636251541307, "grad_norm": 0.034635335206985474, "learning_rate": 0.01, "loss": 1.9841, "step": 32550 }, { "epoch": 3.344944512946979, "grad_norm": 0.0421120747923851, "learning_rate": 0.01, "loss": 1.9923, "step": 32553 }, { "epoch": 3.3452527743526512, "grad_norm": 0.08523180335760117, "learning_rate": 0.01, "loss": 2.0162, "step": 32556 }, { "epoch": 3.345561035758323, "grad_norm": 0.11694061011075974, "learning_rate": 0.01, "loss": 1.9921, "step": 32559 }, { "epoch": 3.345869297163995, "grad_norm": 0.05000199005007744, "learning_rate": 0.01, "loss": 2.0159, "step": 32562 }, { "epoch": 3.346177558569667, "grad_norm": 0.0399484746158123, "learning_rate": 0.01, "loss": 2.0316, "step": 32565 }, { "epoch": 3.346485819975339, "grad_norm": 0.0491316057741642, "learning_rate": 0.01, "loss": 2.0184, "step": 32568 }, { "epoch": 3.3467940813810113, "grad_norm": 0.042924270033836365, "learning_rate": 0.01, "loss": 1.9983, "step": 32571 }, { "epoch": 3.347102342786683, "grad_norm": 0.03486446663737297, "learning_rate": 0.01, "loss": 2.023, "step": 32574 }, { "epoch": 3.3474106041923553, "grad_norm": 0.031064294278621674, "learning_rate": 0.01, "loss": 2.0249, "step": 32577 }, { "epoch": 3.347718865598027, "grad_norm": 0.05951589718461037, "learning_rate": 0.01, "loss": 1.9989, "step": 32580 }, { "epoch": 3.348027127003699, "grad_norm": 0.04387381300330162, "learning_rate": 0.01, "loss": 2.0158, "step": 32583 }, { "epoch": 3.348335388409371, "grad_norm": 0.05328337103128433, "learning_rate": 0.01, "loss": 1.9941, "step": 32586 }, { "epoch": 3.348643649815043, "grad_norm": 0.04561325162649155, "learning_rate": 0.01, "loss": 2.0039, "step": 32589 }, { "epoch": 3.3489519112207153, "grad_norm": 0.047260623425245285, "learning_rate": 0.01, "loss": 2.0387, "step": 32592 }, { "epoch": 3.349260172626387, "grad_norm": 0.21082252264022827, "learning_rate": 0.01, "loss": 2.024, "step": 32595 }, { "epoch": 3.3495684340320593, "grad_norm": 0.08391027897596359, "learning_rate": 0.01, "loss": 1.9919, "step": 32598 }, { "epoch": 3.349876695437731, "grad_norm": 0.07036472856998444, "learning_rate": 0.01, "loss": 2.0218, "step": 32601 }, { "epoch": 3.3501849568434032, "grad_norm": 0.03812922164797783, "learning_rate": 0.01, "loss": 2.014, "step": 32604 }, { "epoch": 3.350493218249075, "grad_norm": 0.041235774755477905, "learning_rate": 0.01, "loss": 2.0135, "step": 32607 }, { "epoch": 3.350801479654747, "grad_norm": 0.05174950137734413, "learning_rate": 0.01, "loss": 2.026, "step": 32610 }, { "epoch": 3.3511097410604194, "grad_norm": 0.058260124176740646, "learning_rate": 0.01, "loss": 2.0177, "step": 32613 }, { "epoch": 3.351418002466091, "grad_norm": 0.045651067048311234, "learning_rate": 0.01, "loss": 2.0199, "step": 32616 }, { "epoch": 3.3517262638717633, "grad_norm": 0.043610829859972, "learning_rate": 0.01, "loss": 2.0253, "step": 32619 }, { "epoch": 3.3520345252774355, "grad_norm": 0.04924603924155235, "learning_rate": 0.01, "loss": 2.0141, "step": 32622 }, { "epoch": 3.3523427866831073, "grad_norm": 0.04765019193291664, "learning_rate": 0.01, "loss": 2.0398, "step": 32625 }, { "epoch": 3.3526510480887795, "grad_norm": 0.04744412377476692, "learning_rate": 0.01, "loss": 2.0159, "step": 32628 }, { "epoch": 3.352959309494451, "grad_norm": 0.09456950426101685, "learning_rate": 0.01, "loss": 2.0084, "step": 32631 }, { "epoch": 3.3532675709001234, "grad_norm": 0.17356513440608978, "learning_rate": 0.01, "loss": 2.0138, "step": 32634 }, { "epoch": 3.353575832305795, "grad_norm": 0.08420834690332413, "learning_rate": 0.01, "loss": 2.0004, "step": 32637 }, { "epoch": 3.3538840937114673, "grad_norm": 0.09453277289867401, "learning_rate": 0.01, "loss": 2.021, "step": 32640 }, { "epoch": 3.3541923551171395, "grad_norm": 0.05444180220365524, "learning_rate": 0.01, "loss": 2.0335, "step": 32643 }, { "epoch": 3.3545006165228113, "grad_norm": 0.04824339225888252, "learning_rate": 0.01, "loss": 1.9976, "step": 32646 }, { "epoch": 3.3548088779284835, "grad_norm": 0.06650727242231369, "learning_rate": 0.01, "loss": 2.0021, "step": 32649 }, { "epoch": 3.3551171393341552, "grad_norm": 0.05119656026363373, "learning_rate": 0.01, "loss": 2.0415, "step": 32652 }, { "epoch": 3.3554254007398274, "grad_norm": 0.044617343693971634, "learning_rate": 0.01, "loss": 1.9992, "step": 32655 }, { "epoch": 3.355733662145499, "grad_norm": 0.035579435527324677, "learning_rate": 0.01, "loss": 1.9993, "step": 32658 }, { "epoch": 3.3560419235511714, "grad_norm": 0.05802566558122635, "learning_rate": 0.01, "loss": 2.0112, "step": 32661 }, { "epoch": 3.3563501849568436, "grad_norm": 0.050934724509716034, "learning_rate": 0.01, "loss": 2.0039, "step": 32664 }, { "epoch": 3.3566584463625153, "grad_norm": 0.055400047451257706, "learning_rate": 0.01, "loss": 2.0179, "step": 32667 }, { "epoch": 3.3569667077681875, "grad_norm": 0.1315484195947647, "learning_rate": 0.01, "loss": 1.9883, "step": 32670 }, { "epoch": 3.3572749691738593, "grad_norm": 0.13136912882328033, "learning_rate": 0.01, "loss": 2.0123, "step": 32673 }, { "epoch": 3.3575832305795315, "grad_norm": 0.08574076741933823, "learning_rate": 0.01, "loss": 1.9902, "step": 32676 }, { "epoch": 3.357891491985203, "grad_norm": 0.04678389057517052, "learning_rate": 0.01, "loss": 2.0363, "step": 32679 }, { "epoch": 3.3581997533908754, "grad_norm": 0.03356343135237694, "learning_rate": 0.01, "loss": 2.0031, "step": 32682 }, { "epoch": 3.3585080147965476, "grad_norm": 0.046139661222696304, "learning_rate": 0.01, "loss": 2.0255, "step": 32685 }, { "epoch": 3.3588162762022193, "grad_norm": 0.03130761533975601, "learning_rate": 0.01, "loss": 2.0107, "step": 32688 }, { "epoch": 3.3591245376078915, "grad_norm": 0.17764367163181305, "learning_rate": 0.01, "loss": 1.9817, "step": 32691 }, { "epoch": 3.3594327990135637, "grad_norm": 0.04135056957602501, "learning_rate": 0.01, "loss": 2.0032, "step": 32694 }, { "epoch": 3.3597410604192355, "grad_norm": 0.11181548237800598, "learning_rate": 0.01, "loss": 2.048, "step": 32697 }, { "epoch": 3.3600493218249077, "grad_norm": 0.07631994783878326, "learning_rate": 0.01, "loss": 2.0222, "step": 32700 }, { "epoch": 3.3603575832305794, "grad_norm": 0.03839050978422165, "learning_rate": 0.01, "loss": 1.9715, "step": 32703 }, { "epoch": 3.3606658446362516, "grad_norm": 0.03893091529607773, "learning_rate": 0.01, "loss": 2.0204, "step": 32706 }, { "epoch": 3.3609741060419234, "grad_norm": 0.15776588022708893, "learning_rate": 0.01, "loss": 1.9747, "step": 32709 }, { "epoch": 3.3612823674475956, "grad_norm": 0.125548854470253, "learning_rate": 0.01, "loss": 2.0066, "step": 32712 }, { "epoch": 3.3615906288532678, "grad_norm": 0.06952936947345734, "learning_rate": 0.01, "loss": 2.0353, "step": 32715 }, { "epoch": 3.3618988902589395, "grad_norm": 0.03826635703444481, "learning_rate": 0.01, "loss": 2.0186, "step": 32718 }, { "epoch": 3.3622071516646117, "grad_norm": 0.03977655619382858, "learning_rate": 0.01, "loss": 2.0205, "step": 32721 }, { "epoch": 3.3625154130702835, "grad_norm": 0.02882550098001957, "learning_rate": 0.01, "loss": 1.9914, "step": 32724 }, { "epoch": 3.3628236744759556, "grad_norm": 0.03502441197633743, "learning_rate": 0.01, "loss": 1.9925, "step": 32727 }, { "epoch": 3.3631319358816274, "grad_norm": 0.04370797425508499, "learning_rate": 0.01, "loss": 2.0154, "step": 32730 }, { "epoch": 3.3634401972872996, "grad_norm": 0.03528802841901779, "learning_rate": 0.01, "loss": 2.0014, "step": 32733 }, { "epoch": 3.363748458692972, "grad_norm": 0.08671889454126358, "learning_rate": 0.01, "loss": 2.0048, "step": 32736 }, { "epoch": 3.3640567200986435, "grad_norm": 0.1123836413025856, "learning_rate": 0.01, "loss": 2.0119, "step": 32739 }, { "epoch": 3.3643649815043157, "grad_norm": 0.061064526438713074, "learning_rate": 0.01, "loss": 2.0054, "step": 32742 }, { "epoch": 3.3646732429099875, "grad_norm": 0.05037948489189148, "learning_rate": 0.01, "loss": 2.0282, "step": 32745 }, { "epoch": 3.3649815043156597, "grad_norm": 0.052206844091415405, "learning_rate": 0.01, "loss": 2.0226, "step": 32748 }, { "epoch": 3.365289765721332, "grad_norm": 0.05795833095908165, "learning_rate": 0.01, "loss": 2.0038, "step": 32751 }, { "epoch": 3.3655980271270036, "grad_norm": 0.030604414641857147, "learning_rate": 0.01, "loss": 2.0104, "step": 32754 }, { "epoch": 3.365906288532676, "grad_norm": 0.05441366508603096, "learning_rate": 0.01, "loss": 1.9975, "step": 32757 }, { "epoch": 3.3662145499383476, "grad_norm": 0.07509131729602814, "learning_rate": 0.01, "loss": 2.0117, "step": 32760 }, { "epoch": 3.3665228113440198, "grad_norm": 0.046888504177331924, "learning_rate": 0.01, "loss": 2.0158, "step": 32763 }, { "epoch": 3.366831072749692, "grad_norm": 0.05030560493469238, "learning_rate": 0.01, "loss": 2.0139, "step": 32766 }, { "epoch": 3.3671393341553637, "grad_norm": 0.0426168255507946, "learning_rate": 0.01, "loss": 2.0118, "step": 32769 }, { "epoch": 3.367447595561036, "grad_norm": 0.10896468907594681, "learning_rate": 0.01, "loss": 1.9939, "step": 32772 }, { "epoch": 3.3677558569667077, "grad_norm": 0.11696910113096237, "learning_rate": 0.01, "loss": 2.0353, "step": 32775 }, { "epoch": 3.36806411837238, "grad_norm": 0.07340724021196365, "learning_rate": 0.01, "loss": 2.0062, "step": 32778 }, { "epoch": 3.3683723797780516, "grad_norm": 0.037968121469020844, "learning_rate": 0.01, "loss": 2.0191, "step": 32781 }, { "epoch": 3.368680641183724, "grad_norm": 0.044434670358896255, "learning_rate": 0.01, "loss": 1.9993, "step": 32784 }, { "epoch": 3.368988902589396, "grad_norm": 0.03823886066675186, "learning_rate": 0.01, "loss": 2.0402, "step": 32787 }, { "epoch": 3.3692971639950677, "grad_norm": 0.06556801497936249, "learning_rate": 0.01, "loss": 2.0097, "step": 32790 }, { "epoch": 3.36960542540074, "grad_norm": 0.06128913164138794, "learning_rate": 0.01, "loss": 2.032, "step": 32793 }, { "epoch": 3.3699136868064117, "grad_norm": 0.08499012142419815, "learning_rate": 0.01, "loss": 2.0395, "step": 32796 }, { "epoch": 3.370221948212084, "grad_norm": 0.03410051763057709, "learning_rate": 0.01, "loss": 2.0055, "step": 32799 }, { "epoch": 3.3705302096177556, "grad_norm": 0.08818015456199646, "learning_rate": 0.01, "loss": 2.0034, "step": 32802 }, { "epoch": 3.370838471023428, "grad_norm": 0.045091863721609116, "learning_rate": 0.01, "loss": 2.0252, "step": 32805 }, { "epoch": 3.3711467324291, "grad_norm": 0.10982260853052139, "learning_rate": 0.01, "loss": 1.9912, "step": 32808 }, { "epoch": 3.3714549938347718, "grad_norm": 0.04633982852101326, "learning_rate": 0.01, "loss": 2.0256, "step": 32811 }, { "epoch": 3.371763255240444, "grad_norm": 0.04701898992061615, "learning_rate": 0.01, "loss": 2.0098, "step": 32814 }, { "epoch": 3.372071516646116, "grad_norm": 0.03449505567550659, "learning_rate": 0.01, "loss": 2.0046, "step": 32817 }, { "epoch": 3.372379778051788, "grad_norm": 0.03621023893356323, "learning_rate": 0.01, "loss": 1.9677, "step": 32820 }, { "epoch": 3.37268803945746, "grad_norm": 0.04743462800979614, "learning_rate": 0.01, "loss": 2.0308, "step": 32823 }, { "epoch": 3.372996300863132, "grad_norm": 0.04240218549966812, "learning_rate": 0.01, "loss": 2.0152, "step": 32826 }, { "epoch": 3.373304562268804, "grad_norm": 0.09400332719087601, "learning_rate": 0.01, "loss": 2.0098, "step": 32829 }, { "epoch": 3.373612823674476, "grad_norm": 0.07313279062509537, "learning_rate": 0.01, "loss": 2.0366, "step": 32832 }, { "epoch": 3.373921085080148, "grad_norm": 0.07604516297578812, "learning_rate": 0.01, "loss": 2.0013, "step": 32835 }, { "epoch": 3.37422934648582, "grad_norm": 0.044236812740564346, "learning_rate": 0.01, "loss": 2.0005, "step": 32838 }, { "epoch": 3.374537607891492, "grad_norm": 0.051601652055978775, "learning_rate": 0.01, "loss": 1.9981, "step": 32841 }, { "epoch": 3.374845869297164, "grad_norm": 0.10818912088871002, "learning_rate": 0.01, "loss": 2.001, "step": 32844 }, { "epoch": 3.375154130702836, "grad_norm": 0.04563935101032257, "learning_rate": 0.01, "loss": 2.0138, "step": 32847 }, { "epoch": 3.375462392108508, "grad_norm": 0.053665511310100555, "learning_rate": 0.01, "loss": 2.0193, "step": 32850 }, { "epoch": 3.37577065351418, "grad_norm": 0.08934652805328369, "learning_rate": 0.01, "loss": 2.0243, "step": 32853 }, { "epoch": 3.376078914919852, "grad_norm": 0.0752192884683609, "learning_rate": 0.01, "loss": 2.0429, "step": 32856 }, { "epoch": 3.376387176325524, "grad_norm": 0.05763142928481102, "learning_rate": 0.01, "loss": 2.0022, "step": 32859 }, { "epoch": 3.376695437731196, "grad_norm": 0.03926476463675499, "learning_rate": 0.01, "loss": 2.0132, "step": 32862 }, { "epoch": 3.377003699136868, "grad_norm": 0.05735384672880173, "learning_rate": 0.01, "loss": 2.0144, "step": 32865 }, { "epoch": 3.37731196054254, "grad_norm": 0.18665596842765808, "learning_rate": 0.01, "loss": 2.0028, "step": 32868 }, { "epoch": 3.377620221948212, "grad_norm": 0.06702150404453278, "learning_rate": 0.01, "loss": 2.0254, "step": 32871 }, { "epoch": 3.377928483353884, "grad_norm": 0.051258910447359085, "learning_rate": 0.01, "loss": 2.0135, "step": 32874 }, { "epoch": 3.378236744759556, "grad_norm": 0.05804390087723732, "learning_rate": 0.01, "loss": 2.0006, "step": 32877 }, { "epoch": 3.3785450061652282, "grad_norm": 0.04688677936792374, "learning_rate": 0.01, "loss": 2.0244, "step": 32880 }, { "epoch": 3.3788532675709, "grad_norm": 0.057768989354372025, "learning_rate": 0.01, "loss": 2.0082, "step": 32883 }, { "epoch": 3.379161528976572, "grad_norm": 0.05571329593658447, "learning_rate": 0.01, "loss": 2.0364, "step": 32886 }, { "epoch": 3.3794697903822444, "grad_norm": 0.04497957229614258, "learning_rate": 0.01, "loss": 1.9896, "step": 32889 }, { "epoch": 3.379778051787916, "grad_norm": 0.043453045189380646, "learning_rate": 0.01, "loss": 2.0196, "step": 32892 }, { "epoch": 3.3800863131935883, "grad_norm": 0.045709915459156036, "learning_rate": 0.01, "loss": 1.9904, "step": 32895 }, { "epoch": 3.38039457459926, "grad_norm": 0.05974254012107849, "learning_rate": 0.01, "loss": 2.0022, "step": 32898 }, { "epoch": 3.3807028360049323, "grad_norm": 0.16592206060886383, "learning_rate": 0.01, "loss": 2.0035, "step": 32901 }, { "epoch": 3.381011097410604, "grad_norm": 0.04671747609972954, "learning_rate": 0.01, "loss": 1.9948, "step": 32904 }, { "epoch": 3.381319358816276, "grad_norm": 0.07180918008089066, "learning_rate": 0.01, "loss": 2.0097, "step": 32907 }, { "epoch": 3.3816276202219484, "grad_norm": 0.06775587797164917, "learning_rate": 0.01, "loss": 2.0101, "step": 32910 }, { "epoch": 3.38193588162762, "grad_norm": 0.04381205141544342, "learning_rate": 0.01, "loss": 2.0053, "step": 32913 }, { "epoch": 3.3822441430332923, "grad_norm": 0.0345197468996048, "learning_rate": 0.01, "loss": 1.9829, "step": 32916 }, { "epoch": 3.382552404438964, "grad_norm": 0.04965033382177353, "learning_rate": 0.01, "loss": 2.0111, "step": 32919 }, { "epoch": 3.3828606658446363, "grad_norm": 0.20276527106761932, "learning_rate": 0.01, "loss": 2.0342, "step": 32922 }, { "epoch": 3.383168927250308, "grad_norm": 0.0702800378203392, "learning_rate": 0.01, "loss": 1.9971, "step": 32925 }, { "epoch": 3.3834771886559802, "grad_norm": 0.05775219202041626, "learning_rate": 0.01, "loss": 2.0062, "step": 32928 }, { "epoch": 3.3837854500616524, "grad_norm": 0.0662347599864006, "learning_rate": 0.01, "loss": 1.9962, "step": 32931 }, { "epoch": 3.384093711467324, "grad_norm": 0.05067736655473709, "learning_rate": 0.01, "loss": 1.9963, "step": 32934 }, { "epoch": 3.3844019728729964, "grad_norm": 0.057027652859687805, "learning_rate": 0.01, "loss": 2.0261, "step": 32937 }, { "epoch": 3.384710234278668, "grad_norm": 0.0408274307847023, "learning_rate": 0.01, "loss": 2.0139, "step": 32940 }, { "epoch": 3.3850184956843403, "grad_norm": 0.049467723816633224, "learning_rate": 0.01, "loss": 2.0133, "step": 32943 }, { "epoch": 3.3853267570900125, "grad_norm": 0.07573775947093964, "learning_rate": 0.01, "loss": 2.0331, "step": 32946 }, { "epoch": 3.3856350184956843, "grad_norm": 0.04027678817510605, "learning_rate": 0.01, "loss": 2.0118, "step": 32949 }, { "epoch": 3.3859432799013565, "grad_norm": 0.09980335086584091, "learning_rate": 0.01, "loss": 2.0158, "step": 32952 }, { "epoch": 3.386251541307028, "grad_norm": 0.06230602413415909, "learning_rate": 0.01, "loss": 2.0235, "step": 32955 }, { "epoch": 3.3865598027127004, "grad_norm": 0.09655454754829407, "learning_rate": 0.01, "loss": 2.0003, "step": 32958 }, { "epoch": 3.3868680641183726, "grad_norm": 0.053587790578603745, "learning_rate": 0.01, "loss": 2.002, "step": 32961 }, { "epoch": 3.3871763255240444, "grad_norm": 0.08395679295063019, "learning_rate": 0.01, "loss": 1.9928, "step": 32964 }, { "epoch": 3.3874845869297165, "grad_norm": 0.08353892713785172, "learning_rate": 0.01, "loss": 2.0052, "step": 32967 }, { "epoch": 3.3877928483353883, "grad_norm": 0.11298651993274689, "learning_rate": 0.01, "loss": 2.0039, "step": 32970 }, { "epoch": 3.3881011097410605, "grad_norm": 0.08317071199417114, "learning_rate": 0.01, "loss": 2.0095, "step": 32973 }, { "epoch": 3.3884093711467322, "grad_norm": 0.07725278288125992, "learning_rate": 0.01, "loss": 2.0255, "step": 32976 }, { "epoch": 3.3887176325524044, "grad_norm": 0.06264784932136536, "learning_rate": 0.01, "loss": 2.017, "step": 32979 }, { "epoch": 3.3890258939580766, "grad_norm": 0.0588025264441967, "learning_rate": 0.01, "loss": 2.0138, "step": 32982 }, { "epoch": 3.3893341553637484, "grad_norm": 0.033383727073669434, "learning_rate": 0.01, "loss": 2.0105, "step": 32985 }, { "epoch": 3.3896424167694206, "grad_norm": 0.04963357746601105, "learning_rate": 0.01, "loss": 2.0332, "step": 32988 }, { "epoch": 3.3899506781750923, "grad_norm": 0.03166192024946213, "learning_rate": 0.01, "loss": 1.9884, "step": 32991 }, { "epoch": 3.3902589395807645, "grad_norm": 0.0424019880592823, "learning_rate": 0.01, "loss": 1.9727, "step": 32994 }, { "epoch": 3.3905672009864363, "grad_norm": 0.0549466572701931, "learning_rate": 0.01, "loss": 1.9858, "step": 32997 }, { "epoch": 3.3908754623921085, "grad_norm": 0.06859169900417328, "learning_rate": 0.01, "loss": 1.9981, "step": 33000 }, { "epoch": 3.3911837237977807, "grad_norm": 0.05035685375332832, "learning_rate": 0.01, "loss": 2.0422, "step": 33003 }, { "epoch": 3.3914919852034524, "grad_norm": 0.10227832943201065, "learning_rate": 0.01, "loss": 1.9835, "step": 33006 }, { "epoch": 3.3918002466091246, "grad_norm": 0.052029043436050415, "learning_rate": 0.01, "loss": 2.0341, "step": 33009 }, { "epoch": 3.392108508014797, "grad_norm": 0.03569505736231804, "learning_rate": 0.01, "loss": 2.0065, "step": 33012 }, { "epoch": 3.3924167694204685, "grad_norm": 0.05492673069238663, "learning_rate": 0.01, "loss": 1.9839, "step": 33015 }, { "epoch": 3.3927250308261407, "grad_norm": 0.10698610544204712, "learning_rate": 0.01, "loss": 1.98, "step": 33018 }, { "epoch": 3.3930332922318125, "grad_norm": 0.051218993961811066, "learning_rate": 0.01, "loss": 1.9778, "step": 33021 }, { "epoch": 3.3933415536374847, "grad_norm": 0.10021807998418808, "learning_rate": 0.01, "loss": 1.997, "step": 33024 }, { "epoch": 3.3936498150431564, "grad_norm": 0.043556150048971176, "learning_rate": 0.01, "loss": 2.0145, "step": 33027 }, { "epoch": 3.3939580764488286, "grad_norm": 0.052555110305547714, "learning_rate": 0.01, "loss": 2.0, "step": 33030 }, { "epoch": 3.394266337854501, "grad_norm": 0.1656499058008194, "learning_rate": 0.01, "loss": 1.9747, "step": 33033 }, { "epoch": 3.3945745992601726, "grad_norm": 0.08107822388410568, "learning_rate": 0.01, "loss": 1.9894, "step": 33036 }, { "epoch": 3.3948828606658448, "grad_norm": 0.05703389272093773, "learning_rate": 0.01, "loss": 1.9935, "step": 33039 }, { "epoch": 3.3951911220715165, "grad_norm": 0.060602329671382904, "learning_rate": 0.01, "loss": 2.01, "step": 33042 }, { "epoch": 3.3954993834771887, "grad_norm": 0.05680840089917183, "learning_rate": 0.01, "loss": 2.0178, "step": 33045 }, { "epoch": 3.3958076448828605, "grad_norm": 0.053718626499176025, "learning_rate": 0.01, "loss": 2.0021, "step": 33048 }, { "epoch": 3.3961159062885327, "grad_norm": 0.04885102063417435, "learning_rate": 0.01, "loss": 2.0215, "step": 33051 }, { "epoch": 3.396424167694205, "grad_norm": 0.046444397419691086, "learning_rate": 0.01, "loss": 2.0117, "step": 33054 }, { "epoch": 3.3967324290998766, "grad_norm": 0.03031921572983265, "learning_rate": 0.01, "loss": 1.9973, "step": 33057 }, { "epoch": 3.397040690505549, "grad_norm": 0.048908550292253494, "learning_rate": 0.01, "loss": 2.0008, "step": 33060 }, { "epoch": 3.3973489519112205, "grad_norm": 0.06003925949335098, "learning_rate": 0.01, "loss": 1.9769, "step": 33063 }, { "epoch": 3.3976572133168927, "grad_norm": 0.10929730534553528, "learning_rate": 0.01, "loss": 1.998, "step": 33066 }, { "epoch": 3.3979654747225645, "grad_norm": 0.09032581746578217, "learning_rate": 0.01, "loss": 2.0331, "step": 33069 }, { "epoch": 3.3982737361282367, "grad_norm": 0.043940551578998566, "learning_rate": 0.01, "loss": 2.0076, "step": 33072 }, { "epoch": 3.398581997533909, "grad_norm": 0.10148674994707108, "learning_rate": 0.01, "loss": 1.9994, "step": 33075 }, { "epoch": 3.3988902589395806, "grad_norm": 0.04877715930342674, "learning_rate": 0.01, "loss": 2.0257, "step": 33078 }, { "epoch": 3.399198520345253, "grad_norm": 0.10485149919986725, "learning_rate": 0.01, "loss": 2.0297, "step": 33081 }, { "epoch": 3.399506781750925, "grad_norm": 0.12222550064325333, "learning_rate": 0.01, "loss": 2.0118, "step": 33084 }, { "epoch": 3.3998150431565968, "grad_norm": 0.0744134709239006, "learning_rate": 0.01, "loss": 2.0157, "step": 33087 }, { "epoch": 3.400123304562269, "grad_norm": 0.08050314337015152, "learning_rate": 0.01, "loss": 1.9932, "step": 33090 }, { "epoch": 3.4004315659679407, "grad_norm": 0.0806950256228447, "learning_rate": 0.01, "loss": 1.997, "step": 33093 }, { "epoch": 3.400739827373613, "grad_norm": 0.06369089335203171, "learning_rate": 0.01, "loss": 2.0101, "step": 33096 }, { "epoch": 3.4010480887792847, "grad_norm": 0.06041014939546585, "learning_rate": 0.01, "loss": 2.0408, "step": 33099 }, { "epoch": 3.401356350184957, "grad_norm": 0.0476149246096611, "learning_rate": 0.01, "loss": 1.988, "step": 33102 }, { "epoch": 3.401664611590629, "grad_norm": 0.05710010603070259, "learning_rate": 0.01, "loss": 2.0152, "step": 33105 }, { "epoch": 3.401972872996301, "grad_norm": 0.04351675137877464, "learning_rate": 0.01, "loss": 1.9752, "step": 33108 }, { "epoch": 3.402281134401973, "grad_norm": 0.04341613128781319, "learning_rate": 0.01, "loss": 2.0149, "step": 33111 }, { "epoch": 3.4025893958076447, "grad_norm": 0.04619600623846054, "learning_rate": 0.01, "loss": 2.0107, "step": 33114 }, { "epoch": 3.402897657213317, "grad_norm": 0.07674260437488556, "learning_rate": 0.01, "loss": 2.0243, "step": 33117 }, { "epoch": 3.4032059186189887, "grad_norm": 0.03869005665183067, "learning_rate": 0.01, "loss": 1.9774, "step": 33120 }, { "epoch": 3.403514180024661, "grad_norm": 0.08710911124944687, "learning_rate": 0.01, "loss": 1.995, "step": 33123 }, { "epoch": 3.403822441430333, "grad_norm": 0.09343576431274414, "learning_rate": 0.01, "loss": 1.9881, "step": 33126 }, { "epoch": 3.404130702836005, "grad_norm": 0.048723649233579636, "learning_rate": 0.01, "loss": 2.0273, "step": 33129 }, { "epoch": 3.404438964241677, "grad_norm": 0.07655228674411774, "learning_rate": 0.01, "loss": 2.0235, "step": 33132 }, { "epoch": 3.4047472256473488, "grad_norm": 0.08873139321804047, "learning_rate": 0.01, "loss": 1.9975, "step": 33135 }, { "epoch": 3.405055487053021, "grad_norm": 0.06514773517847061, "learning_rate": 0.01, "loss": 2.0167, "step": 33138 }, { "epoch": 3.405363748458693, "grad_norm": 0.06446196138858795, "learning_rate": 0.01, "loss": 2.0051, "step": 33141 }, { "epoch": 3.405672009864365, "grad_norm": 0.12146038562059402, "learning_rate": 0.01, "loss": 2.0171, "step": 33144 }, { "epoch": 3.405980271270037, "grad_norm": 0.06639200448989868, "learning_rate": 0.01, "loss": 2.0049, "step": 33147 }, { "epoch": 3.406288532675709, "grad_norm": 0.05644892901182175, "learning_rate": 0.01, "loss": 2.0172, "step": 33150 }, { "epoch": 3.406596794081381, "grad_norm": 0.037636831402778625, "learning_rate": 0.01, "loss": 2.008, "step": 33153 }, { "epoch": 3.4069050554870532, "grad_norm": 0.031332679092884064, "learning_rate": 0.01, "loss": 1.9821, "step": 33156 }, { "epoch": 3.407213316892725, "grad_norm": 0.051785338670015335, "learning_rate": 0.01, "loss": 2.011, "step": 33159 }, { "epoch": 3.407521578298397, "grad_norm": 0.07761172205209732, "learning_rate": 0.01, "loss": 2.0277, "step": 33162 }, { "epoch": 3.407829839704069, "grad_norm": 0.09536123275756836, "learning_rate": 0.01, "loss": 2.0222, "step": 33165 }, { "epoch": 3.408138101109741, "grad_norm": 0.04122615605592728, "learning_rate": 0.01, "loss": 2.0065, "step": 33168 }, { "epoch": 3.408446362515413, "grad_norm": 0.0360184982419014, "learning_rate": 0.01, "loss": 2.002, "step": 33171 }, { "epoch": 3.408754623921085, "grad_norm": 0.03222360834479332, "learning_rate": 0.01, "loss": 2.0083, "step": 33174 }, { "epoch": 3.4090628853267573, "grad_norm": 0.043042391538619995, "learning_rate": 0.01, "loss": 1.989, "step": 33177 }, { "epoch": 3.409371146732429, "grad_norm": 0.06593729555606842, "learning_rate": 0.01, "loss": 2.0115, "step": 33180 }, { "epoch": 3.409679408138101, "grad_norm": 0.09074060618877411, "learning_rate": 0.01, "loss": 2.0168, "step": 33183 }, { "epoch": 3.409987669543773, "grad_norm": 0.06270799785852432, "learning_rate": 0.01, "loss": 1.9973, "step": 33186 }, { "epoch": 3.410295930949445, "grad_norm": 0.08982829004526138, "learning_rate": 0.01, "loss": 1.9966, "step": 33189 }, { "epoch": 3.410604192355117, "grad_norm": 0.04844099283218384, "learning_rate": 0.01, "loss": 2.0223, "step": 33192 }, { "epoch": 3.410912453760789, "grad_norm": 0.07782240957021713, "learning_rate": 0.01, "loss": 2.025, "step": 33195 }, { "epoch": 3.4112207151664613, "grad_norm": 0.07867666333913803, "learning_rate": 0.01, "loss": 2.0076, "step": 33198 }, { "epoch": 3.411528976572133, "grad_norm": 0.07254987210035324, "learning_rate": 0.01, "loss": 2.0129, "step": 33201 }, { "epoch": 3.4118372379778052, "grad_norm": 0.04480341821908951, "learning_rate": 0.01, "loss": 1.9914, "step": 33204 }, { "epoch": 3.412145499383477, "grad_norm": 0.03986749053001404, "learning_rate": 0.01, "loss": 1.9867, "step": 33207 }, { "epoch": 3.412453760789149, "grad_norm": 0.04472361132502556, "learning_rate": 0.01, "loss": 2.0184, "step": 33210 }, { "epoch": 3.4127620221948214, "grad_norm": 0.07955579459667206, "learning_rate": 0.01, "loss": 2.01, "step": 33213 }, { "epoch": 3.413070283600493, "grad_norm": 0.056707967072725296, "learning_rate": 0.01, "loss": 2.0105, "step": 33216 }, { "epoch": 3.4133785450061653, "grad_norm": 0.04070746898651123, "learning_rate": 0.01, "loss": 2.0226, "step": 33219 }, { "epoch": 3.413686806411837, "grad_norm": 0.03773896023631096, "learning_rate": 0.01, "loss": 2.015, "step": 33222 }, { "epoch": 3.4139950678175093, "grad_norm": 0.10554299503564835, "learning_rate": 0.01, "loss": 2.0331, "step": 33225 }, { "epoch": 3.4143033292231815, "grad_norm": 0.0555710643529892, "learning_rate": 0.01, "loss": 1.996, "step": 33228 }, { "epoch": 3.414611590628853, "grad_norm": 0.1139519140124321, "learning_rate": 0.01, "loss": 1.9821, "step": 33231 }, { "epoch": 3.4149198520345254, "grad_norm": 0.042904384434223175, "learning_rate": 0.01, "loss": 2.0057, "step": 33234 }, { "epoch": 3.415228113440197, "grad_norm": 0.10528502613306046, "learning_rate": 0.01, "loss": 1.9781, "step": 33237 }, { "epoch": 3.4155363748458694, "grad_norm": 0.03914659097790718, "learning_rate": 0.01, "loss": 2.0029, "step": 33240 }, { "epoch": 3.415844636251541, "grad_norm": 0.11122586578130722, "learning_rate": 0.01, "loss": 1.9968, "step": 33243 }, { "epoch": 3.4161528976572133, "grad_norm": 0.06572670489549637, "learning_rate": 0.01, "loss": 2.0077, "step": 33246 }, { "epoch": 3.4164611590628855, "grad_norm": 0.05022534728050232, "learning_rate": 0.01, "loss": 2.0224, "step": 33249 }, { "epoch": 3.4167694204685573, "grad_norm": 0.08149400353431702, "learning_rate": 0.01, "loss": 2.0326, "step": 33252 }, { "epoch": 3.4170776818742294, "grad_norm": 0.04350002855062485, "learning_rate": 0.01, "loss": 1.9947, "step": 33255 }, { "epoch": 3.417385943279901, "grad_norm": 0.0445462241768837, "learning_rate": 0.01, "loss": 2.0372, "step": 33258 }, { "epoch": 3.4176942046855734, "grad_norm": 0.04082934185862541, "learning_rate": 0.01, "loss": 2.0199, "step": 33261 }, { "epoch": 3.418002466091245, "grad_norm": 0.060355301946401596, "learning_rate": 0.01, "loss": 2.0204, "step": 33264 }, { "epoch": 3.4183107274969173, "grad_norm": 0.04439264163374901, "learning_rate": 0.01, "loss": 2.0216, "step": 33267 }, { "epoch": 3.4186189889025895, "grad_norm": 0.11760549992322922, "learning_rate": 0.01, "loss": 2.0284, "step": 33270 }, { "epoch": 3.4189272503082613, "grad_norm": 0.08547448366880417, "learning_rate": 0.01, "loss": 2.0307, "step": 33273 }, { "epoch": 3.4192355117139335, "grad_norm": 0.06072860211133957, "learning_rate": 0.01, "loss": 1.992, "step": 33276 }, { "epoch": 3.4195437731196057, "grad_norm": 0.03637344762682915, "learning_rate": 0.01, "loss": 2.02, "step": 33279 }, { "epoch": 3.4198520345252774, "grad_norm": 0.0440024733543396, "learning_rate": 0.01, "loss": 2.007, "step": 33282 }, { "epoch": 3.4201602959309496, "grad_norm": 0.040933944284915924, "learning_rate": 0.01, "loss": 2.0253, "step": 33285 }, { "epoch": 3.4204685573366214, "grad_norm": 0.10233576595783234, "learning_rate": 0.01, "loss": 2.0433, "step": 33288 }, { "epoch": 3.4207768187422936, "grad_norm": 0.10494884103536606, "learning_rate": 0.01, "loss": 2.0007, "step": 33291 }, { "epoch": 3.4210850801479653, "grad_norm": 0.04247460886836052, "learning_rate": 0.01, "loss": 2.0119, "step": 33294 }, { "epoch": 3.4213933415536375, "grad_norm": 0.07929468154907227, "learning_rate": 0.01, "loss": 2.0067, "step": 33297 }, { "epoch": 3.4217016029593097, "grad_norm": 0.04947086423635483, "learning_rate": 0.01, "loss": 2.0049, "step": 33300 }, { "epoch": 3.4220098643649814, "grad_norm": 0.05473649874329567, "learning_rate": 0.01, "loss": 1.9872, "step": 33303 }, { "epoch": 3.4223181257706536, "grad_norm": 0.06789970397949219, "learning_rate": 0.01, "loss": 2.0173, "step": 33306 }, { "epoch": 3.4226263871763254, "grad_norm": 0.044122181832790375, "learning_rate": 0.01, "loss": 1.9957, "step": 33309 }, { "epoch": 3.4229346485819976, "grad_norm": 0.0713338777422905, "learning_rate": 0.01, "loss": 1.9912, "step": 33312 }, { "epoch": 3.4232429099876693, "grad_norm": 0.09774953871965408, "learning_rate": 0.01, "loss": 1.999, "step": 33315 }, { "epoch": 3.4235511713933415, "grad_norm": 0.049434032291173935, "learning_rate": 0.01, "loss": 2.0194, "step": 33318 }, { "epoch": 3.4238594327990137, "grad_norm": 0.06290262192487717, "learning_rate": 0.01, "loss": 2.0014, "step": 33321 }, { "epoch": 3.4241676942046855, "grad_norm": 0.042732108384370804, "learning_rate": 0.01, "loss": 1.9909, "step": 33324 }, { "epoch": 3.4244759556103577, "grad_norm": 0.03461041674017906, "learning_rate": 0.01, "loss": 1.9881, "step": 33327 }, { "epoch": 3.4247842170160294, "grad_norm": 0.04503572732210159, "learning_rate": 0.01, "loss": 1.9981, "step": 33330 }, { "epoch": 3.4250924784217016, "grad_norm": 0.04774646461009979, "learning_rate": 0.01, "loss": 2.0216, "step": 33333 }, { "epoch": 3.4254007398273734, "grad_norm": 0.08576779067516327, "learning_rate": 0.01, "loss": 2.0124, "step": 33336 }, { "epoch": 3.4257090012330456, "grad_norm": 0.13729599118232727, "learning_rate": 0.01, "loss": 2.0126, "step": 33339 }, { "epoch": 3.4260172626387178, "grad_norm": 0.060716260224580765, "learning_rate": 0.01, "loss": 2.0153, "step": 33342 }, { "epoch": 3.4263255240443895, "grad_norm": 0.037777479737997055, "learning_rate": 0.01, "loss": 2.0181, "step": 33345 }, { "epoch": 3.4266337854500617, "grad_norm": 0.042688485234975815, "learning_rate": 0.01, "loss": 1.9792, "step": 33348 }, { "epoch": 3.426942046855734, "grad_norm": 0.07009312510490417, "learning_rate": 0.01, "loss": 2.0143, "step": 33351 }, { "epoch": 3.4272503082614056, "grad_norm": 0.10260313749313354, "learning_rate": 0.01, "loss": 1.9985, "step": 33354 }, { "epoch": 3.427558569667078, "grad_norm": 0.08466068655252457, "learning_rate": 0.01, "loss": 1.9981, "step": 33357 }, { "epoch": 3.4278668310727496, "grad_norm": 0.061912521719932556, "learning_rate": 0.01, "loss": 2.0144, "step": 33360 }, { "epoch": 3.428175092478422, "grad_norm": 0.0470789298415184, "learning_rate": 0.01, "loss": 1.9863, "step": 33363 }, { "epoch": 3.4284833538840935, "grad_norm": 0.0477573424577713, "learning_rate": 0.01, "loss": 2.0189, "step": 33366 }, { "epoch": 3.4287916152897657, "grad_norm": 0.03324504569172859, "learning_rate": 0.01, "loss": 2.0107, "step": 33369 }, { "epoch": 3.429099876695438, "grad_norm": 0.07741666585206985, "learning_rate": 0.01, "loss": 2.0103, "step": 33372 }, { "epoch": 3.4294081381011097, "grad_norm": 0.05770926922559738, "learning_rate": 0.01, "loss": 2.0398, "step": 33375 }, { "epoch": 3.429716399506782, "grad_norm": 0.08471731096506119, "learning_rate": 0.01, "loss": 2.0451, "step": 33378 }, { "epoch": 3.4300246609124536, "grad_norm": 0.04667286202311516, "learning_rate": 0.01, "loss": 2.0136, "step": 33381 }, { "epoch": 3.430332922318126, "grad_norm": 0.0683809220790863, "learning_rate": 0.01, "loss": 2.0032, "step": 33384 }, { "epoch": 3.4306411837237976, "grad_norm": 0.07834406197071075, "learning_rate": 0.01, "loss": 2.0172, "step": 33387 }, { "epoch": 3.4309494451294698, "grad_norm": 0.04956913739442825, "learning_rate": 0.01, "loss": 2.0226, "step": 33390 }, { "epoch": 3.431257706535142, "grad_norm": 0.05492135509848595, "learning_rate": 0.01, "loss": 2.0153, "step": 33393 }, { "epoch": 3.4315659679408137, "grad_norm": 0.05343586578965187, "learning_rate": 0.01, "loss": 1.9995, "step": 33396 }, { "epoch": 3.431874229346486, "grad_norm": 0.04083942621946335, "learning_rate": 0.01, "loss": 2.005, "step": 33399 }, { "epoch": 3.4321824907521576, "grad_norm": 0.06474661827087402, "learning_rate": 0.01, "loss": 2.0238, "step": 33402 }, { "epoch": 3.43249075215783, "grad_norm": 0.09690015017986298, "learning_rate": 0.01, "loss": 1.9986, "step": 33405 }, { "epoch": 3.432799013563502, "grad_norm": 0.17796829342842102, "learning_rate": 0.01, "loss": 2.0288, "step": 33408 }, { "epoch": 3.433107274969174, "grad_norm": 0.11173928529024124, "learning_rate": 0.01, "loss": 2.0249, "step": 33411 }, { "epoch": 3.433415536374846, "grad_norm": 0.045607730746269226, "learning_rate": 0.01, "loss": 1.9694, "step": 33414 }, { "epoch": 3.4337237977805177, "grad_norm": 0.03982311487197876, "learning_rate": 0.01, "loss": 1.9895, "step": 33417 }, { "epoch": 3.43403205918619, "grad_norm": 0.03420604392886162, "learning_rate": 0.01, "loss": 1.9957, "step": 33420 }, { "epoch": 3.434340320591862, "grad_norm": 0.03757992014288902, "learning_rate": 0.01, "loss": 2.026, "step": 33423 }, { "epoch": 3.434648581997534, "grad_norm": 0.05664653331041336, "learning_rate": 0.01, "loss": 1.9884, "step": 33426 }, { "epoch": 3.434956843403206, "grad_norm": 0.05265260115265846, "learning_rate": 0.01, "loss": 2.0409, "step": 33429 }, { "epoch": 3.435265104808878, "grad_norm": 0.0430876798927784, "learning_rate": 0.01, "loss": 2.0004, "step": 33432 }, { "epoch": 3.43557336621455, "grad_norm": 0.04210485517978668, "learning_rate": 0.01, "loss": 2.0004, "step": 33435 }, { "epoch": 3.4358816276202218, "grad_norm": 0.049002841114997864, "learning_rate": 0.01, "loss": 2.0129, "step": 33438 }, { "epoch": 3.436189889025894, "grad_norm": 0.0786898285150528, "learning_rate": 0.01, "loss": 1.9915, "step": 33441 }, { "epoch": 3.436498150431566, "grad_norm": 0.04218638688325882, "learning_rate": 0.01, "loss": 2.0079, "step": 33444 }, { "epoch": 3.436806411837238, "grad_norm": 0.13452041149139404, "learning_rate": 0.01, "loss": 2.0105, "step": 33447 }, { "epoch": 3.43711467324291, "grad_norm": 0.06728319823741913, "learning_rate": 0.01, "loss": 2.0053, "step": 33450 }, { "epoch": 3.437422934648582, "grad_norm": 0.09041707217693329, "learning_rate": 0.01, "loss": 2.0059, "step": 33453 }, { "epoch": 3.437731196054254, "grad_norm": 0.04817497730255127, "learning_rate": 0.01, "loss": 2.0024, "step": 33456 }, { "epoch": 3.438039457459926, "grad_norm": 0.05033170431852341, "learning_rate": 0.01, "loss": 2.016, "step": 33459 }, { "epoch": 3.438347718865598, "grad_norm": 0.12856252491474152, "learning_rate": 0.01, "loss": 2.0202, "step": 33462 }, { "epoch": 3.43865598027127, "grad_norm": 0.03690528869628906, "learning_rate": 0.01, "loss": 2.0131, "step": 33465 }, { "epoch": 3.438964241676942, "grad_norm": 0.09053459018468857, "learning_rate": 0.01, "loss": 2.0057, "step": 33468 }, { "epoch": 3.439272503082614, "grad_norm": 0.11929309368133545, "learning_rate": 0.01, "loss": 1.9995, "step": 33471 }, { "epoch": 3.4395807644882863, "grad_norm": 0.06987284123897552, "learning_rate": 0.01, "loss": 1.9871, "step": 33474 }, { "epoch": 3.439889025893958, "grad_norm": 0.06181707605719566, "learning_rate": 0.01, "loss": 2.0167, "step": 33477 }, { "epoch": 3.4401972872996303, "grad_norm": 0.045914020389318466, "learning_rate": 0.01, "loss": 2.0067, "step": 33480 }, { "epoch": 3.440505548705302, "grad_norm": 0.04277556762099266, "learning_rate": 0.01, "loss": 2.0084, "step": 33483 }, { "epoch": 3.440813810110974, "grad_norm": 0.045943450182676315, "learning_rate": 0.01, "loss": 2.0117, "step": 33486 }, { "epoch": 3.441122071516646, "grad_norm": 0.04440785571932793, "learning_rate": 0.01, "loss": 1.9753, "step": 33489 }, { "epoch": 3.441430332922318, "grad_norm": 0.12033234536647797, "learning_rate": 0.01, "loss": 2.0216, "step": 33492 }, { "epoch": 3.4417385943279903, "grad_norm": 0.06069677323102951, "learning_rate": 0.01, "loss": 2.0139, "step": 33495 }, { "epoch": 3.442046855733662, "grad_norm": 0.08571046590805054, "learning_rate": 0.01, "loss": 2.0121, "step": 33498 }, { "epoch": 3.4423551171393343, "grad_norm": 0.05251142010092735, "learning_rate": 0.01, "loss": 2.0234, "step": 33501 }, { "epoch": 3.442663378545006, "grad_norm": 0.09658701717853546, "learning_rate": 0.01, "loss": 2.023, "step": 33504 }, { "epoch": 3.4429716399506782, "grad_norm": 0.10625968873500824, "learning_rate": 0.01, "loss": 2.0158, "step": 33507 }, { "epoch": 3.44327990135635, "grad_norm": 0.061645977199077606, "learning_rate": 0.01, "loss": 1.987, "step": 33510 }, { "epoch": 3.443588162762022, "grad_norm": 0.06879527121782303, "learning_rate": 0.01, "loss": 1.9988, "step": 33513 }, { "epoch": 3.4438964241676944, "grad_norm": 0.07986783981323242, "learning_rate": 0.01, "loss": 2.0087, "step": 33516 }, { "epoch": 3.444204685573366, "grad_norm": 0.06323929876089096, "learning_rate": 0.01, "loss": 2.0047, "step": 33519 }, { "epoch": 3.4445129469790383, "grad_norm": 0.08186205476522446, "learning_rate": 0.01, "loss": 2.0312, "step": 33522 }, { "epoch": 3.44482120838471, "grad_norm": 0.1049259677529335, "learning_rate": 0.01, "loss": 1.9993, "step": 33525 }, { "epoch": 3.4451294697903823, "grad_norm": 0.12427592277526855, "learning_rate": 0.01, "loss": 1.9984, "step": 33528 }, { "epoch": 3.445437731196054, "grad_norm": 0.04911283776164055, "learning_rate": 0.01, "loss": 1.9979, "step": 33531 }, { "epoch": 3.445745992601726, "grad_norm": 0.07451221346855164, "learning_rate": 0.01, "loss": 1.9814, "step": 33534 }, { "epoch": 3.4460542540073984, "grad_norm": 0.04838255047798157, "learning_rate": 0.01, "loss": 2.0044, "step": 33537 }, { "epoch": 3.44636251541307, "grad_norm": 0.0435669869184494, "learning_rate": 0.01, "loss": 2.0134, "step": 33540 }, { "epoch": 3.4466707768187423, "grad_norm": 0.12010036408901215, "learning_rate": 0.01, "loss": 1.965, "step": 33543 }, { "epoch": 3.4469790382244145, "grad_norm": 0.04258548840880394, "learning_rate": 0.01, "loss": 1.9871, "step": 33546 }, { "epoch": 3.4472872996300863, "grad_norm": 0.04736476391553879, "learning_rate": 0.01, "loss": 1.983, "step": 33549 }, { "epoch": 3.4475955610357585, "grad_norm": 0.04423545300960541, "learning_rate": 0.01, "loss": 2.001, "step": 33552 }, { "epoch": 3.4479038224414302, "grad_norm": 0.07585839927196503, "learning_rate": 0.01, "loss": 2.0094, "step": 33555 }, { "epoch": 3.4482120838471024, "grad_norm": 0.03559425473213196, "learning_rate": 0.01, "loss": 1.9835, "step": 33558 }, { "epoch": 3.448520345252774, "grad_norm": 0.058567702770233154, "learning_rate": 0.01, "loss": 1.9996, "step": 33561 }, { "epoch": 3.4488286066584464, "grad_norm": 0.05344400927424431, "learning_rate": 0.01, "loss": 2.0128, "step": 33564 }, { "epoch": 3.4491368680641186, "grad_norm": 0.0584418885409832, "learning_rate": 0.01, "loss": 2.0334, "step": 33567 }, { "epoch": 3.4494451294697903, "grad_norm": 0.14322839677333832, "learning_rate": 0.01, "loss": 2.0362, "step": 33570 }, { "epoch": 3.4497533908754625, "grad_norm": 0.039136361330747604, "learning_rate": 0.01, "loss": 1.9792, "step": 33573 }, { "epoch": 3.4500616522811343, "grad_norm": 0.0871317982673645, "learning_rate": 0.01, "loss": 2.03, "step": 33576 }, { "epoch": 3.4503699136868065, "grad_norm": 0.07295375317335129, "learning_rate": 0.01, "loss": 1.9906, "step": 33579 }, { "epoch": 3.450678175092478, "grad_norm": 0.04469291865825653, "learning_rate": 0.01, "loss": 2.0245, "step": 33582 }, { "epoch": 3.4509864364981504, "grad_norm": 0.06063535064458847, "learning_rate": 0.01, "loss": 2.0083, "step": 33585 }, { "epoch": 3.4512946979038226, "grad_norm": 0.07924182713031769, "learning_rate": 0.01, "loss": 2.0115, "step": 33588 }, { "epoch": 3.4516029593094943, "grad_norm": 0.089077427983284, "learning_rate": 0.01, "loss": 2.017, "step": 33591 }, { "epoch": 3.4519112207151665, "grad_norm": 0.08197617530822754, "learning_rate": 0.01, "loss": 1.9987, "step": 33594 }, { "epoch": 3.4522194821208383, "grad_norm": 0.039551399648189545, "learning_rate": 0.01, "loss": 2.0211, "step": 33597 }, { "epoch": 3.4525277435265105, "grad_norm": 0.08920145779848099, "learning_rate": 0.01, "loss": 1.9952, "step": 33600 }, { "epoch": 3.4528360049321827, "grad_norm": 0.04301462695002556, "learning_rate": 0.01, "loss": 1.9898, "step": 33603 }, { "epoch": 3.4531442663378544, "grad_norm": 0.05201060697436333, "learning_rate": 0.01, "loss": 2.0022, "step": 33606 }, { "epoch": 3.4534525277435266, "grad_norm": 0.05899956077337265, "learning_rate": 0.01, "loss": 2.0318, "step": 33609 }, { "epoch": 3.4537607891491984, "grad_norm": 0.09653299301862717, "learning_rate": 0.01, "loss": 1.9955, "step": 33612 }, { "epoch": 3.4540690505548706, "grad_norm": 0.06416913866996765, "learning_rate": 0.01, "loss": 1.9858, "step": 33615 }, { "epoch": 3.4543773119605428, "grad_norm": 0.07932529598474503, "learning_rate": 0.01, "loss": 1.9941, "step": 33618 }, { "epoch": 3.4546855733662145, "grad_norm": 0.06251846253871918, "learning_rate": 0.01, "loss": 1.984, "step": 33621 }, { "epoch": 3.4549938347718867, "grad_norm": 0.07022767513990402, "learning_rate": 0.01, "loss": 2.0032, "step": 33624 }, { "epoch": 3.4553020961775585, "grad_norm": 0.08116226643323898, "learning_rate": 0.01, "loss": 2.0229, "step": 33627 }, { "epoch": 3.4556103575832307, "grad_norm": 0.07052188366651535, "learning_rate": 0.01, "loss": 1.986, "step": 33630 }, { "epoch": 3.4559186189889024, "grad_norm": 0.055427566170692444, "learning_rate": 0.01, "loss": 1.9792, "step": 33633 }, { "epoch": 3.4562268803945746, "grad_norm": 0.049462925642728806, "learning_rate": 0.01, "loss": 1.9937, "step": 33636 }, { "epoch": 3.456535141800247, "grad_norm": 0.05117397755384445, "learning_rate": 0.01, "loss": 2.0058, "step": 33639 }, { "epoch": 3.4568434032059185, "grad_norm": 0.14244894683361053, "learning_rate": 0.01, "loss": 1.9981, "step": 33642 }, { "epoch": 3.4571516646115907, "grad_norm": 0.040531981736421585, "learning_rate": 0.01, "loss": 2.0291, "step": 33645 }, { "epoch": 3.4574599260172625, "grad_norm": 0.0898265540599823, "learning_rate": 0.01, "loss": 2.0103, "step": 33648 }, { "epoch": 3.4577681874229347, "grad_norm": 0.06414288282394409, "learning_rate": 0.01, "loss": 1.9944, "step": 33651 }, { "epoch": 3.4580764488286064, "grad_norm": 0.06362918019294739, "learning_rate": 0.01, "loss": 2.031, "step": 33654 }, { "epoch": 3.4583847102342786, "grad_norm": 0.0881992056965828, "learning_rate": 0.01, "loss": 2.0363, "step": 33657 }, { "epoch": 3.458692971639951, "grad_norm": 0.03909778594970703, "learning_rate": 0.01, "loss": 1.9831, "step": 33660 }, { "epoch": 3.4590012330456226, "grad_norm": 0.06597696989774704, "learning_rate": 0.01, "loss": 2.0153, "step": 33663 }, { "epoch": 3.4593094944512948, "grad_norm": 0.05599299073219299, "learning_rate": 0.01, "loss": 2.0203, "step": 33666 }, { "epoch": 3.4596177558569665, "grad_norm": 0.0734795406460762, "learning_rate": 0.01, "loss": 2.0126, "step": 33669 }, { "epoch": 3.4599260172626387, "grad_norm": 0.07773378491401672, "learning_rate": 0.01, "loss": 1.9858, "step": 33672 }, { "epoch": 3.460234278668311, "grad_norm": 0.04273884743452072, "learning_rate": 0.01, "loss": 2.0054, "step": 33675 }, { "epoch": 3.4605425400739827, "grad_norm": 0.08914119005203247, "learning_rate": 0.01, "loss": 2.0455, "step": 33678 }, { "epoch": 3.460850801479655, "grad_norm": 0.059121765196323395, "learning_rate": 0.01, "loss": 1.9822, "step": 33681 }, { "epoch": 3.4611590628853266, "grad_norm": 0.0828641727566719, "learning_rate": 0.01, "loss": 1.9946, "step": 33684 }, { "epoch": 3.461467324290999, "grad_norm": 0.07057880610227585, "learning_rate": 0.01, "loss": 1.9918, "step": 33687 }, { "epoch": 3.461775585696671, "grad_norm": 0.0789676234126091, "learning_rate": 0.01, "loss": 2.005, "step": 33690 }, { "epoch": 3.4620838471023427, "grad_norm": 0.06654086709022522, "learning_rate": 0.01, "loss": 1.9751, "step": 33693 }, { "epoch": 3.462392108508015, "grad_norm": 0.08804110437631607, "learning_rate": 0.01, "loss": 1.9984, "step": 33696 }, { "epoch": 3.4627003699136867, "grad_norm": 0.05654985085129738, "learning_rate": 0.01, "loss": 1.9957, "step": 33699 }, { "epoch": 3.463008631319359, "grad_norm": 0.03474681079387665, "learning_rate": 0.01, "loss": 1.9757, "step": 33702 }, { "epoch": 3.4633168927250306, "grad_norm": 0.03495550900697708, "learning_rate": 0.01, "loss": 1.985, "step": 33705 }, { "epoch": 3.463625154130703, "grad_norm": 0.07207003980875015, "learning_rate": 0.01, "loss": 2.0038, "step": 33708 }, { "epoch": 3.463933415536375, "grad_norm": 0.10733482986688614, "learning_rate": 0.01, "loss": 1.998, "step": 33711 }, { "epoch": 3.4642416769420468, "grad_norm": 0.17830905318260193, "learning_rate": 0.01, "loss": 2.0263, "step": 33714 }, { "epoch": 3.464549938347719, "grad_norm": 0.10432233661413193, "learning_rate": 0.01, "loss": 2.0258, "step": 33717 }, { "epoch": 3.4648581997533907, "grad_norm": 0.08804329484701157, "learning_rate": 0.01, "loss": 2.0208, "step": 33720 }, { "epoch": 3.465166461159063, "grad_norm": 0.09405447542667389, "learning_rate": 0.01, "loss": 2.0228, "step": 33723 }, { "epoch": 3.4654747225647347, "grad_norm": 0.08153831958770752, "learning_rate": 0.01, "loss": 2.0129, "step": 33726 }, { "epoch": 3.465782983970407, "grad_norm": 0.04865524545311928, "learning_rate": 0.01, "loss": 2.0067, "step": 33729 }, { "epoch": 3.466091245376079, "grad_norm": 0.0705978274345398, "learning_rate": 0.01, "loss": 2.005, "step": 33732 }, { "epoch": 3.466399506781751, "grad_norm": 0.08283443003892899, "learning_rate": 0.01, "loss": 1.9943, "step": 33735 }, { "epoch": 3.466707768187423, "grad_norm": 0.059983205050230026, "learning_rate": 0.01, "loss": 1.9991, "step": 33738 }, { "epoch": 3.467016029593095, "grad_norm": 0.045960474759340286, "learning_rate": 0.01, "loss": 1.9823, "step": 33741 }, { "epoch": 3.467324290998767, "grad_norm": 0.03882883861660957, "learning_rate": 0.01, "loss": 2.0191, "step": 33744 }, { "epoch": 3.467632552404439, "grad_norm": 0.07004483044147491, "learning_rate": 0.01, "loss": 1.9851, "step": 33747 }, { "epoch": 3.467940813810111, "grad_norm": 0.047444459050893784, "learning_rate": 0.01, "loss": 2.0257, "step": 33750 }, { "epoch": 3.468249075215783, "grad_norm": 0.04262397438287735, "learning_rate": 0.01, "loss": 1.9981, "step": 33753 }, { "epoch": 3.468557336621455, "grad_norm": 0.05599913001060486, "learning_rate": 0.01, "loss": 2.005, "step": 33756 }, { "epoch": 3.468865598027127, "grad_norm": 0.03917532414197922, "learning_rate": 0.01, "loss": 2.012, "step": 33759 }, { "epoch": 3.469173859432799, "grad_norm": 0.14925096929073334, "learning_rate": 0.01, "loss": 1.9998, "step": 33762 }, { "epoch": 3.469482120838471, "grad_norm": 0.09570999443531036, "learning_rate": 0.01, "loss": 1.987, "step": 33765 }, { "epoch": 3.469790382244143, "grad_norm": 0.07883327454328537, "learning_rate": 0.01, "loss": 2.0051, "step": 33768 }, { "epoch": 3.470098643649815, "grad_norm": 0.06480975449085236, "learning_rate": 0.01, "loss": 2.0074, "step": 33771 }, { "epoch": 3.470406905055487, "grad_norm": 0.054496169090270996, "learning_rate": 0.01, "loss": 2.0306, "step": 33774 }, { "epoch": 3.470715166461159, "grad_norm": 0.04248659685254097, "learning_rate": 0.01, "loss": 2.0213, "step": 33777 }, { "epoch": 3.471023427866831, "grad_norm": 0.04117753356695175, "learning_rate": 0.01, "loss": 1.9845, "step": 33780 }, { "epoch": 3.4713316892725032, "grad_norm": 0.06876087188720703, "learning_rate": 0.01, "loss": 2.0142, "step": 33783 }, { "epoch": 3.471639950678175, "grad_norm": 0.08680430799722672, "learning_rate": 0.01, "loss": 2.0103, "step": 33786 }, { "epoch": 3.471948212083847, "grad_norm": 0.07227423787117004, "learning_rate": 0.01, "loss": 2.0229, "step": 33789 }, { "epoch": 3.472256473489519, "grad_norm": 0.04946593940258026, "learning_rate": 0.01, "loss": 2.0042, "step": 33792 }, { "epoch": 3.472564734895191, "grad_norm": 0.05215618014335632, "learning_rate": 0.01, "loss": 2.0202, "step": 33795 }, { "epoch": 3.4728729963008633, "grad_norm": 0.044869258999824524, "learning_rate": 0.01, "loss": 2.014, "step": 33798 }, { "epoch": 3.473181257706535, "grad_norm": 0.048952534794807434, "learning_rate": 0.01, "loss": 2.0097, "step": 33801 }, { "epoch": 3.4734895191122073, "grad_norm": 0.031407974660396576, "learning_rate": 0.01, "loss": 1.9993, "step": 33804 }, { "epoch": 3.473797780517879, "grad_norm": 0.033864185214042664, "learning_rate": 0.01, "loss": 2.0037, "step": 33807 }, { "epoch": 3.474106041923551, "grad_norm": 0.08060206472873688, "learning_rate": 0.01, "loss": 2.0053, "step": 33810 }, { "epoch": 3.4744143033292234, "grad_norm": 0.059409331530332565, "learning_rate": 0.01, "loss": 1.9876, "step": 33813 }, { "epoch": 3.474722564734895, "grad_norm": 0.04131161794066429, "learning_rate": 0.01, "loss": 1.9964, "step": 33816 }, { "epoch": 3.4750308261405674, "grad_norm": 0.04256697744131088, "learning_rate": 0.01, "loss": 2.0479, "step": 33819 }, { "epoch": 3.475339087546239, "grad_norm": 0.08916765451431274, "learning_rate": 0.01, "loss": 1.9885, "step": 33822 }, { "epoch": 3.4756473489519113, "grad_norm": 0.08474129438400269, "learning_rate": 0.01, "loss": 2.0012, "step": 33825 }, { "epoch": 3.475955610357583, "grad_norm": 0.05649823695421219, "learning_rate": 0.01, "loss": 1.9993, "step": 33828 }, { "epoch": 3.4762638717632552, "grad_norm": 0.05935325473546982, "learning_rate": 0.01, "loss": 2.0004, "step": 33831 }, { "epoch": 3.4765721331689274, "grad_norm": 0.04731081798672676, "learning_rate": 0.01, "loss": 1.9693, "step": 33834 }, { "epoch": 3.476880394574599, "grad_norm": 0.03790346160531044, "learning_rate": 0.01, "loss": 2.0295, "step": 33837 }, { "epoch": 3.4771886559802714, "grad_norm": 0.03726886212825775, "learning_rate": 0.01, "loss": 1.9737, "step": 33840 }, { "epoch": 3.477496917385943, "grad_norm": 0.07210247963666916, "learning_rate": 0.01, "loss": 1.9936, "step": 33843 }, { "epoch": 3.4778051787916153, "grad_norm": 0.07953071594238281, "learning_rate": 0.01, "loss": 2.0269, "step": 33846 }, { "epoch": 3.478113440197287, "grad_norm": 0.08004479855298996, "learning_rate": 0.01, "loss": 1.9947, "step": 33849 }, { "epoch": 3.4784217016029593, "grad_norm": 0.12617255747318268, "learning_rate": 0.01, "loss": 2.041, "step": 33852 }, { "epoch": 3.4787299630086315, "grad_norm": 0.08742248266935349, "learning_rate": 0.01, "loss": 1.9946, "step": 33855 }, { "epoch": 3.479038224414303, "grad_norm": 0.07119353115558624, "learning_rate": 0.01, "loss": 2.0041, "step": 33858 }, { "epoch": 3.4793464858199754, "grad_norm": 0.06368902325630188, "learning_rate": 0.01, "loss": 2.011, "step": 33861 }, { "epoch": 3.479654747225647, "grad_norm": 0.04121660441160202, "learning_rate": 0.01, "loss": 1.9902, "step": 33864 }, { "epoch": 3.4799630086313194, "grad_norm": 0.1237914115190506, "learning_rate": 0.01, "loss": 2.012, "step": 33867 }, { "epoch": 3.4802712700369915, "grad_norm": 0.02918284945189953, "learning_rate": 0.01, "loss": 2.0104, "step": 33870 }, { "epoch": 3.4805795314426633, "grad_norm": 0.0423772819340229, "learning_rate": 0.01, "loss": 2.0024, "step": 33873 }, { "epoch": 3.4808877928483355, "grad_norm": 0.05689895898103714, "learning_rate": 0.01, "loss": 2.0074, "step": 33876 }, { "epoch": 3.4811960542540072, "grad_norm": 0.057178955525159836, "learning_rate": 0.01, "loss": 1.9796, "step": 33879 }, { "epoch": 3.4815043156596794, "grad_norm": 0.04840772971510887, "learning_rate": 0.01, "loss": 1.9846, "step": 33882 }, { "epoch": 3.4818125770653516, "grad_norm": 0.0467451848089695, "learning_rate": 0.01, "loss": 2.0124, "step": 33885 }, { "epoch": 3.4821208384710234, "grad_norm": 0.08849336951971054, "learning_rate": 0.01, "loss": 2.0056, "step": 33888 }, { "epoch": 3.4824290998766956, "grad_norm": 0.040159258991479874, "learning_rate": 0.01, "loss": 2.0074, "step": 33891 }, { "epoch": 3.4827373612823673, "grad_norm": 0.03237557038664818, "learning_rate": 0.01, "loss": 1.9885, "step": 33894 }, { "epoch": 3.4830456226880395, "grad_norm": 0.11882402747869492, "learning_rate": 0.01, "loss": 1.988, "step": 33897 }, { "epoch": 3.4833538840937113, "grad_norm": 0.04206301271915436, "learning_rate": 0.01, "loss": 2.0138, "step": 33900 }, { "epoch": 3.4836621454993835, "grad_norm": 0.08875782787799835, "learning_rate": 0.01, "loss": 2.0009, "step": 33903 }, { "epoch": 3.4839704069050557, "grad_norm": 0.07771812379360199, "learning_rate": 0.01, "loss": 1.9787, "step": 33906 }, { "epoch": 3.4842786683107274, "grad_norm": 0.0839293822646141, "learning_rate": 0.01, "loss": 2.0191, "step": 33909 }, { "epoch": 3.4845869297163996, "grad_norm": 0.051187288016080856, "learning_rate": 0.01, "loss": 2.0112, "step": 33912 }, { "epoch": 3.4848951911220714, "grad_norm": 0.07497724145650864, "learning_rate": 0.01, "loss": 2.0153, "step": 33915 }, { "epoch": 3.4852034525277436, "grad_norm": 0.09341751039028168, "learning_rate": 0.01, "loss": 1.9815, "step": 33918 }, { "epoch": 3.4855117139334153, "grad_norm": 0.17380410432815552, "learning_rate": 0.01, "loss": 2.0015, "step": 33921 }, { "epoch": 3.4858199753390875, "grad_norm": 0.1407613456249237, "learning_rate": 0.01, "loss": 2.0066, "step": 33924 }, { "epoch": 3.4861282367447597, "grad_norm": 0.04020331799983978, "learning_rate": 0.01, "loss": 1.9915, "step": 33927 }, { "epoch": 3.4864364981504314, "grad_norm": 0.07008705288171768, "learning_rate": 0.01, "loss": 2.0045, "step": 33930 }, { "epoch": 3.4867447595561036, "grad_norm": 0.04310291260480881, "learning_rate": 0.01, "loss": 2.0103, "step": 33933 }, { "epoch": 3.487053020961776, "grad_norm": 0.03993843123316765, "learning_rate": 0.01, "loss": 2.0051, "step": 33936 }, { "epoch": 3.4873612823674476, "grad_norm": 0.04465002939105034, "learning_rate": 0.01, "loss": 2.0089, "step": 33939 }, { "epoch": 3.4876695437731198, "grad_norm": 0.03715788200497627, "learning_rate": 0.01, "loss": 1.9676, "step": 33942 }, { "epoch": 3.4879778051787915, "grad_norm": 0.06855741888284683, "learning_rate": 0.01, "loss": 2.0064, "step": 33945 }, { "epoch": 3.4882860665844637, "grad_norm": 0.05004393681883812, "learning_rate": 0.01, "loss": 2.0027, "step": 33948 }, { "epoch": 3.4885943279901355, "grad_norm": 0.11410415172576904, "learning_rate": 0.01, "loss": 1.9663, "step": 33951 }, { "epoch": 3.4889025893958077, "grad_norm": 0.09844734519720078, "learning_rate": 0.01, "loss": 1.9873, "step": 33954 }, { "epoch": 3.48921085080148, "grad_norm": 0.06708649545907974, "learning_rate": 0.01, "loss": 2.0026, "step": 33957 }, { "epoch": 3.4895191122071516, "grad_norm": 0.10117511451244354, "learning_rate": 0.01, "loss": 2.019, "step": 33960 }, { "epoch": 3.489827373612824, "grad_norm": 0.09758836030960083, "learning_rate": 0.01, "loss": 2.0272, "step": 33963 }, { "epoch": 3.4901356350184956, "grad_norm": 0.05189559981226921, "learning_rate": 0.01, "loss": 2.0107, "step": 33966 }, { "epoch": 3.4904438964241677, "grad_norm": 0.04805564507842064, "learning_rate": 0.01, "loss": 1.9984, "step": 33969 }, { "epoch": 3.4907521578298395, "grad_norm": 0.10716047137975693, "learning_rate": 0.01, "loss": 2.0004, "step": 33972 }, { "epoch": 3.4910604192355117, "grad_norm": 0.0761241689324379, "learning_rate": 0.01, "loss": 1.994, "step": 33975 }, { "epoch": 3.491368680641184, "grad_norm": 0.09611841291189194, "learning_rate": 0.01, "loss": 2.0152, "step": 33978 }, { "epoch": 3.4916769420468556, "grad_norm": 0.05685526877641678, "learning_rate": 0.01, "loss": 2.0061, "step": 33981 }, { "epoch": 3.491985203452528, "grad_norm": 0.044158194214105606, "learning_rate": 0.01, "loss": 1.9759, "step": 33984 }, { "epoch": 3.4922934648581996, "grad_norm": 0.0626639872789383, "learning_rate": 0.01, "loss": 2.0238, "step": 33987 }, { "epoch": 3.4926017262638718, "grad_norm": 0.0559966154396534, "learning_rate": 0.01, "loss": 2.0002, "step": 33990 }, { "epoch": 3.4929099876695435, "grad_norm": 0.05526448041200638, "learning_rate": 0.01, "loss": 2.0161, "step": 33993 }, { "epoch": 3.4932182490752157, "grad_norm": 0.07910171896219254, "learning_rate": 0.01, "loss": 2.0098, "step": 33996 }, { "epoch": 3.493526510480888, "grad_norm": 0.09646596759557724, "learning_rate": 0.01, "loss": 2.0161, "step": 33999 }, { "epoch": 3.4938347718865597, "grad_norm": 0.06635434925556183, "learning_rate": 0.01, "loss": 2.0271, "step": 34002 }, { "epoch": 3.494143033292232, "grad_norm": 0.04328935965895653, "learning_rate": 0.01, "loss": 2.0079, "step": 34005 }, { "epoch": 3.494451294697904, "grad_norm": 0.040060825645923615, "learning_rate": 0.01, "loss": 2.0076, "step": 34008 }, { "epoch": 3.494759556103576, "grad_norm": 0.0396769754588604, "learning_rate": 0.01, "loss": 2.0319, "step": 34011 }, { "epoch": 3.495067817509248, "grad_norm": 0.035870879888534546, "learning_rate": 0.01, "loss": 2.0013, "step": 34014 }, { "epoch": 3.4953760789149197, "grad_norm": 0.07099170982837677, "learning_rate": 0.01, "loss": 2.0024, "step": 34017 }, { "epoch": 3.495684340320592, "grad_norm": 0.06994140148162842, "learning_rate": 0.01, "loss": 2.009, "step": 34020 }, { "epoch": 3.4959926017262637, "grad_norm": 0.03673701733350754, "learning_rate": 0.01, "loss": 2.0091, "step": 34023 }, { "epoch": 3.496300863131936, "grad_norm": 0.036696165800094604, "learning_rate": 0.01, "loss": 2.0196, "step": 34026 }, { "epoch": 3.496609124537608, "grad_norm": 0.1089354008436203, "learning_rate": 0.01, "loss": 2.0083, "step": 34029 }, { "epoch": 3.49691738594328, "grad_norm": 0.04758044332265854, "learning_rate": 0.01, "loss": 1.9975, "step": 34032 }, { "epoch": 3.497225647348952, "grad_norm": 0.05122329294681549, "learning_rate": 0.01, "loss": 1.9944, "step": 34035 }, { "epoch": 3.4975339087546238, "grad_norm": 0.04168769717216492, "learning_rate": 0.01, "loss": 2.013, "step": 34038 }, { "epoch": 3.497842170160296, "grad_norm": 0.03426910564303398, "learning_rate": 0.01, "loss": 2.0217, "step": 34041 }, { "epoch": 3.4981504315659677, "grad_norm": 0.036272600293159485, "learning_rate": 0.01, "loss": 2.0134, "step": 34044 }, { "epoch": 3.49845869297164, "grad_norm": 0.11142997443675995, "learning_rate": 0.01, "loss": 2.0006, "step": 34047 }, { "epoch": 3.498766954377312, "grad_norm": 0.08410037308931351, "learning_rate": 0.01, "loss": 2.0274, "step": 34050 }, { "epoch": 3.499075215782984, "grad_norm": 0.10037896782159805, "learning_rate": 0.01, "loss": 2.0187, "step": 34053 }, { "epoch": 3.499383477188656, "grad_norm": 0.05194736644625664, "learning_rate": 0.01, "loss": 1.9862, "step": 34056 }, { "epoch": 3.499691738594328, "grad_norm": 0.037128567695617676, "learning_rate": 0.01, "loss": 1.9806, "step": 34059 }, { "epoch": 3.5, "grad_norm": 0.059042248874902725, "learning_rate": 0.01, "loss": 2.0134, "step": 34062 }, { "epoch": 3.5003082614056718, "grad_norm": 0.06282515078783035, "learning_rate": 0.01, "loss": 1.9964, "step": 34065 }, { "epoch": 3.500616522811344, "grad_norm": 0.04248201847076416, "learning_rate": 0.01, "loss": 2.0045, "step": 34068 }, { "epoch": 3.500924784217016, "grad_norm": 0.047459498047828674, "learning_rate": 0.01, "loss": 2.0171, "step": 34071 }, { "epoch": 3.501233045622688, "grad_norm": 0.04447497799992561, "learning_rate": 0.01, "loss": 2.0263, "step": 34074 }, { "epoch": 3.50154130702836, "grad_norm": 0.03569423034787178, "learning_rate": 0.01, "loss": 1.9867, "step": 34077 }, { "epoch": 3.5018495684340323, "grad_norm": 0.055864010006189346, "learning_rate": 0.01, "loss": 2.0025, "step": 34080 }, { "epoch": 3.502157829839704, "grad_norm": 0.055652111768722534, "learning_rate": 0.01, "loss": 1.9972, "step": 34083 }, { "epoch": 3.5024660912453762, "grad_norm": 0.1195710226893425, "learning_rate": 0.01, "loss": 1.9942, "step": 34086 }, { "epoch": 3.502774352651048, "grad_norm": 0.04436422884464264, "learning_rate": 0.01, "loss": 2.0008, "step": 34089 }, { "epoch": 3.50308261405672, "grad_norm": 0.047165922820568085, "learning_rate": 0.01, "loss": 2.0211, "step": 34092 }, { "epoch": 3.503390875462392, "grad_norm": 0.07143979519605637, "learning_rate": 0.01, "loss": 2.0108, "step": 34095 }, { "epoch": 3.503699136868064, "grad_norm": 0.04734091833233833, "learning_rate": 0.01, "loss": 2.0183, "step": 34098 }, { "epoch": 3.5040073982737363, "grad_norm": 0.05058762803673744, "learning_rate": 0.01, "loss": 1.9887, "step": 34101 }, { "epoch": 3.504315659679408, "grad_norm": 0.03917768597602844, "learning_rate": 0.01, "loss": 2.0052, "step": 34104 }, { "epoch": 3.5046239210850803, "grad_norm": 0.06384671479463577, "learning_rate": 0.01, "loss": 2.0188, "step": 34107 }, { "epoch": 3.504932182490752, "grad_norm": 0.046532079577445984, "learning_rate": 0.01, "loss": 1.9841, "step": 34110 }, { "epoch": 3.505240443896424, "grad_norm": 0.033960457891225815, "learning_rate": 0.01, "loss": 2.0045, "step": 34113 }, { "epoch": 3.505548705302096, "grad_norm": 0.061024975031614304, "learning_rate": 0.01, "loss": 2.0121, "step": 34116 }, { "epoch": 3.505856966707768, "grad_norm": 0.09465770423412323, "learning_rate": 0.01, "loss": 2.0043, "step": 34119 }, { "epoch": 3.5061652281134403, "grad_norm": 0.08510823547840118, "learning_rate": 0.01, "loss": 2.0116, "step": 34122 }, { "epoch": 3.506473489519112, "grad_norm": 0.12201209366321564, "learning_rate": 0.01, "loss": 2.0137, "step": 34125 }, { "epoch": 3.5067817509247843, "grad_norm": 0.08503463119268417, "learning_rate": 0.01, "loss": 2.0055, "step": 34128 }, { "epoch": 3.5070900123304565, "grad_norm": 0.07231762260198593, "learning_rate": 0.01, "loss": 1.9907, "step": 34131 }, { "epoch": 3.5073982737361282, "grad_norm": 0.042714521288871765, "learning_rate": 0.01, "loss": 2.0132, "step": 34134 }, { "epoch": 3.5077065351418, "grad_norm": 0.06119026616215706, "learning_rate": 0.01, "loss": 2.024, "step": 34137 }, { "epoch": 3.508014796547472, "grad_norm": 0.047117751091718674, "learning_rate": 0.01, "loss": 1.9938, "step": 34140 }, { "epoch": 3.5083230579531444, "grad_norm": 0.06318202614784241, "learning_rate": 0.01, "loss": 1.9826, "step": 34143 }, { "epoch": 3.508631319358816, "grad_norm": 0.05050423741340637, "learning_rate": 0.01, "loss": 1.9882, "step": 34146 }, { "epoch": 3.5089395807644883, "grad_norm": 0.06105552241206169, "learning_rate": 0.01, "loss": 1.9882, "step": 34149 }, { "epoch": 3.5092478421701605, "grad_norm": 0.05940508469939232, "learning_rate": 0.01, "loss": 2.0127, "step": 34152 }, { "epoch": 3.5095561035758323, "grad_norm": 0.05157890543341637, "learning_rate": 0.01, "loss": 1.9999, "step": 34155 }, { "epoch": 3.5098643649815044, "grad_norm": 0.06597436964511871, "learning_rate": 0.01, "loss": 2.0054, "step": 34158 }, { "epoch": 3.510172626387176, "grad_norm": 0.10424167662858963, "learning_rate": 0.01, "loss": 1.9899, "step": 34161 }, { "epoch": 3.5104808877928484, "grad_norm": 0.09572573006153107, "learning_rate": 0.01, "loss": 2.0132, "step": 34164 }, { "epoch": 3.51078914919852, "grad_norm": 0.07485774159431458, "learning_rate": 0.01, "loss": 2.0063, "step": 34167 }, { "epoch": 3.5110974106041923, "grad_norm": 0.12162437289953232, "learning_rate": 0.01, "loss": 2.0342, "step": 34170 }, { "epoch": 3.5114056720098645, "grad_norm": 0.09338829666376114, "learning_rate": 0.01, "loss": 2.0366, "step": 34173 }, { "epoch": 3.5117139334155363, "grad_norm": 0.06095868721604347, "learning_rate": 0.01, "loss": 2.0202, "step": 34176 }, { "epoch": 3.5120221948212085, "grad_norm": 0.057355985045433044, "learning_rate": 0.01, "loss": 2.0216, "step": 34179 }, { "epoch": 3.5123304562268807, "grad_norm": 0.10891444981098175, "learning_rate": 0.01, "loss": 2.0339, "step": 34182 }, { "epoch": 3.5126387176325524, "grad_norm": 0.10567829012870789, "learning_rate": 0.01, "loss": 2.0142, "step": 34185 }, { "epoch": 3.512946979038224, "grad_norm": 0.05244187265634537, "learning_rate": 0.01, "loss": 1.9864, "step": 34188 }, { "epoch": 3.5132552404438964, "grad_norm": 0.046076592057943344, "learning_rate": 0.01, "loss": 2.0013, "step": 34191 }, { "epoch": 3.5135635018495686, "grad_norm": 0.04405316337943077, "learning_rate": 0.01, "loss": 1.9941, "step": 34194 }, { "epoch": 3.5138717632552403, "grad_norm": 0.03997344523668289, "learning_rate": 0.01, "loss": 1.9915, "step": 34197 }, { "epoch": 3.5141800246609125, "grad_norm": 0.03909468650817871, "learning_rate": 0.01, "loss": 1.9767, "step": 34200 }, { "epoch": 3.5144882860665847, "grad_norm": 0.11657961457967758, "learning_rate": 0.01, "loss": 1.9941, "step": 34203 }, { "epoch": 3.5147965474722564, "grad_norm": 0.11744063347578049, "learning_rate": 0.01, "loss": 2.0029, "step": 34206 }, { "epoch": 3.5151048088779286, "grad_norm": 0.039932142943143845, "learning_rate": 0.01, "loss": 2.0168, "step": 34209 }, { "epoch": 3.5154130702836004, "grad_norm": 0.03299909457564354, "learning_rate": 0.01, "loss": 2.022, "step": 34212 }, { "epoch": 3.5157213316892726, "grad_norm": 0.1055513322353363, "learning_rate": 0.01, "loss": 2.0165, "step": 34215 }, { "epoch": 3.5160295930949443, "grad_norm": 0.05073374882340431, "learning_rate": 0.01, "loss": 1.9951, "step": 34218 }, { "epoch": 3.5163378545006165, "grad_norm": 0.05423841252923012, "learning_rate": 0.01, "loss": 2.0115, "step": 34221 }, { "epoch": 3.5166461159062887, "grad_norm": 0.05695211887359619, "learning_rate": 0.01, "loss": 2.011, "step": 34224 }, { "epoch": 3.5169543773119605, "grad_norm": 0.04092913866043091, "learning_rate": 0.01, "loss": 2.0303, "step": 34227 }, { "epoch": 3.5172626387176327, "grad_norm": 0.05083661898970604, "learning_rate": 0.01, "loss": 2.0081, "step": 34230 }, { "epoch": 3.5175709001233044, "grad_norm": 0.04499472677707672, "learning_rate": 0.01, "loss": 2.0036, "step": 34233 }, { "epoch": 3.5178791615289766, "grad_norm": 0.09770061075687408, "learning_rate": 0.01, "loss": 2.0209, "step": 34236 }, { "epoch": 3.5181874229346484, "grad_norm": 0.09951679408550262, "learning_rate": 0.01, "loss": 1.9993, "step": 34239 }, { "epoch": 3.5184956843403206, "grad_norm": 0.11089123040437698, "learning_rate": 0.01, "loss": 2.0088, "step": 34242 }, { "epoch": 3.5188039457459928, "grad_norm": 0.05511726066470146, "learning_rate": 0.01, "loss": 2.0129, "step": 34245 }, { "epoch": 3.5191122071516645, "grad_norm": 0.035139378160238266, "learning_rate": 0.01, "loss": 2.016, "step": 34248 }, { "epoch": 3.5194204685573367, "grad_norm": 0.05882834270596504, "learning_rate": 0.01, "loss": 2.0061, "step": 34251 }, { "epoch": 3.519728729963009, "grad_norm": 0.06780868023633957, "learning_rate": 0.01, "loss": 1.977, "step": 34254 }, { "epoch": 3.5200369913686806, "grad_norm": 0.053879499435424805, "learning_rate": 0.01, "loss": 2.0055, "step": 34257 }, { "epoch": 3.5203452527743524, "grad_norm": 0.045473113656044006, "learning_rate": 0.01, "loss": 2.0108, "step": 34260 }, { "epoch": 3.5206535141800246, "grad_norm": 0.050460174679756165, "learning_rate": 0.01, "loss": 2.0327, "step": 34263 }, { "epoch": 3.520961775585697, "grad_norm": 0.040959011763334274, "learning_rate": 0.01, "loss": 1.9772, "step": 34266 }, { "epoch": 3.5212700369913685, "grad_norm": 0.048418451100587845, "learning_rate": 0.01, "loss": 2.0182, "step": 34269 }, { "epoch": 3.5215782983970407, "grad_norm": 0.06287720799446106, "learning_rate": 0.01, "loss": 2.0014, "step": 34272 }, { "epoch": 3.521886559802713, "grad_norm": 0.04529783874750137, "learning_rate": 0.01, "loss": 2.0011, "step": 34275 }, { "epoch": 3.5221948212083847, "grad_norm": 0.03729906305670738, "learning_rate": 0.01, "loss": 2.0183, "step": 34278 }, { "epoch": 3.522503082614057, "grad_norm": 0.1012713834643364, "learning_rate": 0.01, "loss": 1.9841, "step": 34281 }, { "epoch": 3.5228113440197286, "grad_norm": 0.10641775280237198, "learning_rate": 0.01, "loss": 2.0122, "step": 34284 }, { "epoch": 3.523119605425401, "grad_norm": 0.09289912134408951, "learning_rate": 0.01, "loss": 2.016, "step": 34287 }, { "epoch": 3.5234278668310726, "grad_norm": 0.13354292511940002, "learning_rate": 0.01, "loss": 2.0119, "step": 34290 }, { "epoch": 3.5237361282367448, "grad_norm": 0.08027501404285431, "learning_rate": 0.01, "loss": 2.0046, "step": 34293 }, { "epoch": 3.524044389642417, "grad_norm": 0.047286976128816605, "learning_rate": 0.01, "loss": 1.9796, "step": 34296 }, { "epoch": 3.5243526510480887, "grad_norm": 0.042211420834064484, "learning_rate": 0.01, "loss": 2.0426, "step": 34299 }, { "epoch": 3.524660912453761, "grad_norm": 0.11374162137508392, "learning_rate": 0.01, "loss": 1.9954, "step": 34302 }, { "epoch": 3.5249691738594326, "grad_norm": 0.05604710429906845, "learning_rate": 0.01, "loss": 2.0039, "step": 34305 }, { "epoch": 3.525277435265105, "grad_norm": 0.09744080156087875, "learning_rate": 0.01, "loss": 1.9944, "step": 34308 }, { "epoch": 3.5255856966707766, "grad_norm": 0.08903683722019196, "learning_rate": 0.01, "loss": 1.9931, "step": 34311 }, { "epoch": 3.525893958076449, "grad_norm": 0.048648543655872345, "learning_rate": 0.01, "loss": 2.0028, "step": 34314 }, { "epoch": 3.526202219482121, "grad_norm": 0.0710827112197876, "learning_rate": 0.01, "loss": 2.0168, "step": 34317 }, { "epoch": 3.5265104808877927, "grad_norm": 0.04736728593707085, "learning_rate": 0.01, "loss": 1.9812, "step": 34320 }, { "epoch": 3.526818742293465, "grad_norm": 0.05635381117463112, "learning_rate": 0.01, "loss": 2.0202, "step": 34323 }, { "epoch": 3.527127003699137, "grad_norm": 0.06686391681432724, "learning_rate": 0.01, "loss": 2.0196, "step": 34326 }, { "epoch": 3.527435265104809, "grad_norm": 0.03842944651842117, "learning_rate": 0.01, "loss": 1.9974, "step": 34329 }, { "epoch": 3.5277435265104806, "grad_norm": 0.042534928768873215, "learning_rate": 0.01, "loss": 2.0155, "step": 34332 }, { "epoch": 3.528051787916153, "grad_norm": 0.06037287786602974, "learning_rate": 0.01, "loss": 2.0192, "step": 34335 }, { "epoch": 3.528360049321825, "grad_norm": 0.06533370167016983, "learning_rate": 0.01, "loss": 1.9873, "step": 34338 }, { "epoch": 3.5286683107274968, "grad_norm": 0.0877017006278038, "learning_rate": 0.01, "loss": 2.0081, "step": 34341 }, { "epoch": 3.528976572133169, "grad_norm": 0.09864972531795502, "learning_rate": 0.01, "loss": 1.9866, "step": 34344 }, { "epoch": 3.529284833538841, "grad_norm": 0.049320898950099945, "learning_rate": 0.01, "loss": 1.9994, "step": 34347 }, { "epoch": 3.529593094944513, "grad_norm": 0.12996013462543488, "learning_rate": 0.01, "loss": 2.0136, "step": 34350 }, { "epoch": 3.529901356350185, "grad_norm": 0.06548713147640228, "learning_rate": 0.01, "loss": 2.0221, "step": 34353 }, { "epoch": 3.530209617755857, "grad_norm": 0.045482341200113297, "learning_rate": 0.01, "loss": 2.0062, "step": 34356 }, { "epoch": 3.530517879161529, "grad_norm": 0.04433637857437134, "learning_rate": 0.01, "loss": 2.0062, "step": 34359 }, { "epoch": 3.530826140567201, "grad_norm": 0.04450944438576698, "learning_rate": 0.01, "loss": 1.983, "step": 34362 }, { "epoch": 3.531134401972873, "grad_norm": 0.07188856601715088, "learning_rate": 0.01, "loss": 2.0426, "step": 34365 }, { "epoch": 3.531442663378545, "grad_norm": 0.049853190779685974, "learning_rate": 0.01, "loss": 1.9959, "step": 34368 }, { "epoch": 3.531750924784217, "grad_norm": 0.08662360161542892, "learning_rate": 0.01, "loss": 1.9886, "step": 34371 }, { "epoch": 3.532059186189889, "grad_norm": 0.07737040519714355, "learning_rate": 0.01, "loss": 2.0064, "step": 34374 }, { "epoch": 3.532367447595561, "grad_norm": 0.0868213102221489, "learning_rate": 0.01, "loss": 1.9836, "step": 34377 }, { "epoch": 3.532675709001233, "grad_norm": 0.06288056075572968, "learning_rate": 0.01, "loss": 2.0142, "step": 34380 }, { "epoch": 3.532983970406905, "grad_norm": 0.05537475645542145, "learning_rate": 0.01, "loss": 1.9937, "step": 34383 }, { "epoch": 3.533292231812577, "grad_norm": 0.13551141321659088, "learning_rate": 0.01, "loss": 2.0162, "step": 34386 }, { "epoch": 3.533600493218249, "grad_norm": 0.040236156433820724, "learning_rate": 0.01, "loss": 2.0006, "step": 34389 }, { "epoch": 3.533908754623921, "grad_norm": 0.06904727220535278, "learning_rate": 0.01, "loss": 1.9882, "step": 34392 }, { "epoch": 3.534217016029593, "grad_norm": 0.06675262004137039, "learning_rate": 0.01, "loss": 2.0018, "step": 34395 }, { "epoch": 3.5345252774352653, "grad_norm": 0.05011274665594101, "learning_rate": 0.01, "loss": 2.0267, "step": 34398 }, { "epoch": 3.534833538840937, "grad_norm": 0.04494976997375488, "learning_rate": 0.01, "loss": 2.0243, "step": 34401 }, { "epoch": 3.5351418002466093, "grad_norm": 0.04114719480276108, "learning_rate": 0.01, "loss": 2.0161, "step": 34404 }, { "epoch": 3.535450061652281, "grad_norm": 0.03544189780950546, "learning_rate": 0.01, "loss": 1.9987, "step": 34407 }, { "epoch": 3.5357583230579532, "grad_norm": 0.10223556309938431, "learning_rate": 0.01, "loss": 2.0237, "step": 34410 }, { "epoch": 3.536066584463625, "grad_norm": 0.03573578596115112, "learning_rate": 0.01, "loss": 2.0017, "step": 34413 }, { "epoch": 3.536374845869297, "grad_norm": 0.08199316263198853, "learning_rate": 0.01, "loss": 2.007, "step": 34416 }, { "epoch": 3.5366831072749694, "grad_norm": 0.04587673768401146, "learning_rate": 0.01, "loss": 2.01, "step": 34419 }, { "epoch": 3.536991368680641, "grad_norm": 0.061022888869047165, "learning_rate": 0.01, "loss": 1.9894, "step": 34422 }, { "epoch": 3.5372996300863133, "grad_norm": 0.09616536647081375, "learning_rate": 0.01, "loss": 2.0081, "step": 34425 }, { "epoch": 3.537607891491985, "grad_norm": 0.10521430522203445, "learning_rate": 0.01, "loss": 2.0187, "step": 34428 }, { "epoch": 3.5379161528976573, "grad_norm": 0.0551154688000679, "learning_rate": 0.01, "loss": 2.0034, "step": 34431 }, { "epoch": 3.538224414303329, "grad_norm": 0.0405128113925457, "learning_rate": 0.01, "loss": 2.0152, "step": 34434 }, { "epoch": 3.538532675709001, "grad_norm": 0.035197604447603226, "learning_rate": 0.01, "loss": 2.0158, "step": 34437 }, { "epoch": 3.5388409371146734, "grad_norm": 0.026394899934530258, "learning_rate": 0.01, "loss": 1.9898, "step": 34440 }, { "epoch": 3.539149198520345, "grad_norm": 0.09831299632787704, "learning_rate": 0.01, "loss": 2.0205, "step": 34443 }, { "epoch": 3.5394574599260173, "grad_norm": 0.08986491709947586, "learning_rate": 0.01, "loss": 2.023, "step": 34446 }, { "epoch": 3.5397657213316895, "grad_norm": 0.07974385470151901, "learning_rate": 0.01, "loss": 1.9975, "step": 34449 }, { "epoch": 3.5400739827373613, "grad_norm": 0.11314170062541962, "learning_rate": 0.01, "loss": 2.0045, "step": 34452 }, { "epoch": 3.540382244143033, "grad_norm": 0.05535215139389038, "learning_rate": 0.01, "loss": 2.0142, "step": 34455 }, { "epoch": 3.5406905055487052, "grad_norm": 0.0592242069542408, "learning_rate": 0.01, "loss": 2.0243, "step": 34458 }, { "epoch": 3.5409987669543774, "grad_norm": 0.12061761319637299, "learning_rate": 0.01, "loss": 2.0125, "step": 34461 }, { "epoch": 3.541307028360049, "grad_norm": 0.11443237960338593, "learning_rate": 0.01, "loss": 2.004, "step": 34464 }, { "epoch": 3.5416152897657214, "grad_norm": 0.050069622695446014, "learning_rate": 0.01, "loss": 2.013, "step": 34467 }, { "epoch": 3.5419235511713936, "grad_norm": 0.0419803261756897, "learning_rate": 0.01, "loss": 2.0141, "step": 34470 }, { "epoch": 3.5422318125770653, "grad_norm": 0.03493333235383034, "learning_rate": 0.01, "loss": 2.0106, "step": 34473 }, { "epoch": 3.5425400739827375, "grad_norm": 0.036889005452394485, "learning_rate": 0.01, "loss": 2.0085, "step": 34476 }, { "epoch": 3.5428483353884093, "grad_norm": 0.05217687413096428, "learning_rate": 0.01, "loss": 1.9987, "step": 34479 }, { "epoch": 3.5431565967940815, "grad_norm": 0.050754569470882416, "learning_rate": 0.01, "loss": 2.0333, "step": 34482 }, { "epoch": 3.543464858199753, "grad_norm": 0.04706338793039322, "learning_rate": 0.01, "loss": 1.9973, "step": 34485 }, { "epoch": 3.5437731196054254, "grad_norm": 0.07039839029312134, "learning_rate": 0.01, "loss": 2.0104, "step": 34488 }, { "epoch": 3.5440813810110976, "grad_norm": 0.07069671154022217, "learning_rate": 0.01, "loss": 1.978, "step": 34491 }, { "epoch": 3.5443896424167693, "grad_norm": 0.09058693796396255, "learning_rate": 0.01, "loss": 2.0212, "step": 34494 }, { "epoch": 3.5446979038224415, "grad_norm": 0.037666015326976776, "learning_rate": 0.01, "loss": 2.0312, "step": 34497 }, { "epoch": 3.5450061652281133, "grad_norm": 0.04604499414563179, "learning_rate": 0.01, "loss": 2.041, "step": 34500 }, { "epoch": 3.5453144266337855, "grad_norm": 0.03853873908519745, "learning_rate": 0.01, "loss": 2.0138, "step": 34503 }, { "epoch": 3.5456226880394572, "grad_norm": 0.07123208791017532, "learning_rate": 0.01, "loss": 2.0107, "step": 34506 }, { "epoch": 3.5459309494451294, "grad_norm": 0.0936029776930809, "learning_rate": 0.01, "loss": 2.0001, "step": 34509 }, { "epoch": 3.5462392108508016, "grad_norm": 0.07412207871675491, "learning_rate": 0.01, "loss": 2.0034, "step": 34512 }, { "epoch": 3.5465474722564734, "grad_norm": 0.07681329548358917, "learning_rate": 0.01, "loss": 1.9964, "step": 34515 }, { "epoch": 3.5468557336621456, "grad_norm": 0.10403033345937729, "learning_rate": 0.01, "loss": 2.0077, "step": 34518 }, { "epoch": 3.5471639950678178, "grad_norm": 0.14240513741970062, "learning_rate": 0.01, "loss": 1.9695, "step": 34521 }, { "epoch": 3.5474722564734895, "grad_norm": 0.14203394949436188, "learning_rate": 0.01, "loss": 2.0187, "step": 34524 }, { "epoch": 3.5477805178791613, "grad_norm": 0.09036475419998169, "learning_rate": 0.01, "loss": 2.0054, "step": 34527 }, { "epoch": 3.5480887792848335, "grad_norm": 0.037448760122060776, "learning_rate": 0.01, "loss": 1.9978, "step": 34530 }, { "epoch": 3.5483970406905057, "grad_norm": 0.05539664253592491, "learning_rate": 0.01, "loss": 2.0015, "step": 34533 }, { "epoch": 3.5487053020961774, "grad_norm": 0.05857717618346214, "learning_rate": 0.01, "loss": 1.9974, "step": 34536 }, { "epoch": 3.5490135635018496, "grad_norm": 0.06406868249177933, "learning_rate": 0.01, "loss": 2.009, "step": 34539 }, { "epoch": 3.549321824907522, "grad_norm": 0.04234686121344566, "learning_rate": 0.01, "loss": 2.0273, "step": 34542 }, { "epoch": 3.5496300863131935, "grad_norm": 0.038916390389204025, "learning_rate": 0.01, "loss": 2.0059, "step": 34545 }, { "epoch": 3.5499383477188657, "grad_norm": 0.039077602326869965, "learning_rate": 0.01, "loss": 2.0196, "step": 34548 }, { "epoch": 3.5502466091245375, "grad_norm": 0.03960174322128296, "learning_rate": 0.01, "loss": 2.0167, "step": 34551 }, { "epoch": 3.5505548705302097, "grad_norm": 0.12132997810840607, "learning_rate": 0.01, "loss": 1.9902, "step": 34554 }, { "epoch": 3.5508631319358814, "grad_norm": 0.09045036137104034, "learning_rate": 0.01, "loss": 2.0178, "step": 34557 }, { "epoch": 3.5511713933415536, "grad_norm": 0.07062508165836334, "learning_rate": 0.01, "loss": 1.9798, "step": 34560 }, { "epoch": 3.551479654747226, "grad_norm": 0.08182030916213989, "learning_rate": 0.01, "loss": 1.9715, "step": 34563 }, { "epoch": 3.5517879161528976, "grad_norm": 0.05917488783597946, "learning_rate": 0.01, "loss": 2.0149, "step": 34566 }, { "epoch": 3.5520961775585698, "grad_norm": 0.08519969880580902, "learning_rate": 0.01, "loss": 2.009, "step": 34569 }, { "epoch": 3.5524044389642415, "grad_norm": 0.04442654922604561, "learning_rate": 0.01, "loss": 2.0218, "step": 34572 }, { "epoch": 3.5527127003699137, "grad_norm": 0.08265768736600876, "learning_rate": 0.01, "loss": 2.0006, "step": 34575 }, { "epoch": 3.5530209617755855, "grad_norm": 0.0891944020986557, "learning_rate": 0.01, "loss": 1.991, "step": 34578 }, { "epoch": 3.5533292231812577, "grad_norm": 0.0688168928027153, "learning_rate": 0.01, "loss": 1.995, "step": 34581 }, { "epoch": 3.55363748458693, "grad_norm": 0.0936504453420639, "learning_rate": 0.01, "loss": 1.9835, "step": 34584 }, { "epoch": 3.5539457459926016, "grad_norm": 0.10172853618860245, "learning_rate": 0.01, "loss": 2.0016, "step": 34587 }, { "epoch": 3.554254007398274, "grad_norm": 0.04822350665926933, "learning_rate": 0.01, "loss": 1.9796, "step": 34590 }, { "epoch": 3.554562268803946, "grad_norm": 0.05222393944859505, "learning_rate": 0.01, "loss": 2.0036, "step": 34593 }, { "epoch": 3.5548705302096177, "grad_norm": 0.03592358157038689, "learning_rate": 0.01, "loss": 1.9911, "step": 34596 }, { "epoch": 3.5551787916152895, "grad_norm": 0.03903461620211601, "learning_rate": 0.01, "loss": 2.0062, "step": 34599 }, { "epoch": 3.5554870530209617, "grad_norm": 0.0467611663043499, "learning_rate": 0.01, "loss": 1.995, "step": 34602 }, { "epoch": 3.555795314426634, "grad_norm": 0.12140758335590363, "learning_rate": 0.01, "loss": 2.0168, "step": 34605 }, { "epoch": 3.5561035758323056, "grad_norm": 0.04155382886528969, "learning_rate": 0.01, "loss": 2.0048, "step": 34608 }, { "epoch": 3.556411837237978, "grad_norm": 0.039924267679452896, "learning_rate": 0.01, "loss": 1.9813, "step": 34611 }, { "epoch": 3.55672009864365, "grad_norm": 0.10513463616371155, "learning_rate": 0.01, "loss": 2.0098, "step": 34614 }, { "epoch": 3.5570283600493218, "grad_norm": 0.08956603705883026, "learning_rate": 0.01, "loss": 1.9951, "step": 34617 }, { "epoch": 3.557336621454994, "grad_norm": 0.045444682240486145, "learning_rate": 0.01, "loss": 2.0041, "step": 34620 }, { "epoch": 3.5576448828606657, "grad_norm": 0.05367853119969368, "learning_rate": 0.01, "loss": 2.0154, "step": 34623 }, { "epoch": 3.557953144266338, "grad_norm": 0.0465155765414238, "learning_rate": 0.01, "loss": 2.0094, "step": 34626 }, { "epoch": 3.5582614056720097, "grad_norm": 0.06788338720798492, "learning_rate": 0.01, "loss": 2.0526, "step": 34629 }, { "epoch": 3.558569667077682, "grad_norm": 0.04508093744516373, "learning_rate": 0.01, "loss": 2.0205, "step": 34632 }, { "epoch": 3.558877928483354, "grad_norm": 0.05827740207314491, "learning_rate": 0.01, "loss": 2.0154, "step": 34635 }, { "epoch": 3.559186189889026, "grad_norm": 0.09915097057819366, "learning_rate": 0.01, "loss": 2.0322, "step": 34638 }, { "epoch": 3.559494451294698, "grad_norm": 0.08571092784404755, "learning_rate": 0.01, "loss": 2.0157, "step": 34641 }, { "epoch": 3.55980271270037, "grad_norm": 0.06480662524700165, "learning_rate": 0.01, "loss": 1.9991, "step": 34644 }, { "epoch": 3.560110974106042, "grad_norm": 0.055505797266960144, "learning_rate": 0.01, "loss": 1.9894, "step": 34647 }, { "epoch": 3.5604192355117137, "grad_norm": 0.05562606081366539, "learning_rate": 0.01, "loss": 2.0173, "step": 34650 }, { "epoch": 3.560727496917386, "grad_norm": 0.05720195546746254, "learning_rate": 0.01, "loss": 1.999, "step": 34653 }, { "epoch": 3.561035758323058, "grad_norm": 0.14654351770877838, "learning_rate": 0.01, "loss": 2.0, "step": 34656 }, { "epoch": 3.56134401972873, "grad_norm": 0.04594961181282997, "learning_rate": 0.01, "loss": 1.9979, "step": 34659 }, { "epoch": 3.561652281134402, "grad_norm": 0.052737195044755936, "learning_rate": 0.01, "loss": 2.0013, "step": 34662 }, { "epoch": 3.561960542540074, "grad_norm": 0.0722641721367836, "learning_rate": 0.01, "loss": 2.0165, "step": 34665 }, { "epoch": 3.562268803945746, "grad_norm": 0.03973691165447235, "learning_rate": 0.01, "loss": 2.0317, "step": 34668 }, { "epoch": 3.562577065351418, "grad_norm": 0.06284237653017044, "learning_rate": 0.01, "loss": 2.0112, "step": 34671 }, { "epoch": 3.56288532675709, "grad_norm": 0.0880017802119255, "learning_rate": 0.01, "loss": 2.0034, "step": 34674 }, { "epoch": 3.563193588162762, "grad_norm": 0.06454182416200638, "learning_rate": 0.01, "loss": 2.0068, "step": 34677 }, { "epoch": 3.563501849568434, "grad_norm": 0.07054916024208069, "learning_rate": 0.01, "loss": 1.9997, "step": 34680 }, { "epoch": 3.563810110974106, "grad_norm": 0.08058945089578629, "learning_rate": 0.01, "loss": 1.9834, "step": 34683 }, { "epoch": 3.5641183723797782, "grad_norm": 0.08660910278558731, "learning_rate": 0.01, "loss": 2.0159, "step": 34686 }, { "epoch": 3.56442663378545, "grad_norm": 0.050544124096632004, "learning_rate": 0.01, "loss": 1.9975, "step": 34689 }, { "epoch": 3.564734895191122, "grad_norm": 0.056985314935445786, "learning_rate": 0.01, "loss": 2.0013, "step": 34692 }, { "epoch": 3.565043156596794, "grad_norm": 0.08204400539398193, "learning_rate": 0.01, "loss": 1.993, "step": 34695 }, { "epoch": 3.565351418002466, "grad_norm": 0.09590046107769012, "learning_rate": 0.01, "loss": 1.9995, "step": 34698 }, { "epoch": 3.565659679408138, "grad_norm": 0.03453322499990463, "learning_rate": 0.01, "loss": 2.001, "step": 34701 }, { "epoch": 3.56596794081381, "grad_norm": 0.03801025450229645, "learning_rate": 0.01, "loss": 2.0096, "step": 34704 }, { "epoch": 3.5662762022194823, "grad_norm": 0.05342378839850426, "learning_rate": 0.01, "loss": 1.9924, "step": 34707 }, { "epoch": 3.566584463625154, "grad_norm": 0.06871719658374786, "learning_rate": 0.01, "loss": 2.0254, "step": 34710 }, { "epoch": 3.566892725030826, "grad_norm": 0.06653191894292831, "learning_rate": 0.01, "loss": 1.9956, "step": 34713 }, { "epoch": 3.5672009864364984, "grad_norm": 0.10531385242938995, "learning_rate": 0.01, "loss": 2.0151, "step": 34716 }, { "epoch": 3.56750924784217, "grad_norm": 0.05481969192624092, "learning_rate": 0.01, "loss": 2.003, "step": 34719 }, { "epoch": 3.567817509247842, "grad_norm": 0.04904542118310928, "learning_rate": 0.01, "loss": 1.9992, "step": 34722 }, { "epoch": 3.568125770653514, "grad_norm": 0.034647226333618164, "learning_rate": 0.01, "loss": 2.0232, "step": 34725 }, { "epoch": 3.5684340320591863, "grad_norm": 0.07234811037778854, "learning_rate": 0.01, "loss": 1.9992, "step": 34728 }, { "epoch": 3.568742293464858, "grad_norm": 0.10623595118522644, "learning_rate": 0.01, "loss": 1.9955, "step": 34731 }, { "epoch": 3.5690505548705302, "grad_norm": 0.04955185204744339, "learning_rate": 0.01, "loss": 1.9894, "step": 34734 }, { "epoch": 3.5693588162762024, "grad_norm": 0.08183744549751282, "learning_rate": 0.01, "loss": 2.0236, "step": 34737 }, { "epoch": 3.569667077681874, "grad_norm": 0.04063693434000015, "learning_rate": 0.01, "loss": 1.9739, "step": 34740 }, { "epoch": 3.5699753390875464, "grad_norm": 0.05661273002624512, "learning_rate": 0.01, "loss": 2.0019, "step": 34743 }, { "epoch": 3.570283600493218, "grad_norm": 0.03300248831510544, "learning_rate": 0.01, "loss": 1.9687, "step": 34746 }, { "epoch": 3.5705918618988903, "grad_norm": 0.09492766112089157, "learning_rate": 0.01, "loss": 2.0099, "step": 34749 }, { "epoch": 3.570900123304562, "grad_norm": 0.0505099892616272, "learning_rate": 0.01, "loss": 2.0062, "step": 34752 }, { "epoch": 3.5712083847102343, "grad_norm": 0.094505175948143, "learning_rate": 0.01, "loss": 1.9948, "step": 34755 }, { "epoch": 3.5715166461159065, "grad_norm": 0.046727851033210754, "learning_rate": 0.01, "loss": 2.0081, "step": 34758 }, { "epoch": 3.571824907521578, "grad_norm": 0.06659562885761261, "learning_rate": 0.01, "loss": 2.0026, "step": 34761 }, { "epoch": 3.5721331689272504, "grad_norm": 0.0828694999217987, "learning_rate": 0.01, "loss": 1.9923, "step": 34764 }, { "epoch": 3.572441430332922, "grad_norm": 0.12309973686933517, "learning_rate": 0.01, "loss": 2.0145, "step": 34767 }, { "epoch": 3.5727496917385944, "grad_norm": 0.03831718862056732, "learning_rate": 0.01, "loss": 2.0022, "step": 34770 }, { "epoch": 3.573057953144266, "grad_norm": 0.049565766006708145, "learning_rate": 0.01, "loss": 1.9838, "step": 34773 }, { "epoch": 3.5733662145499383, "grad_norm": 0.055259205400943756, "learning_rate": 0.01, "loss": 2.0103, "step": 34776 }, { "epoch": 3.5736744759556105, "grad_norm": 0.08979593217372894, "learning_rate": 0.01, "loss": 1.9915, "step": 34779 }, { "epoch": 3.5739827373612822, "grad_norm": 0.05166800692677498, "learning_rate": 0.01, "loss": 2.005, "step": 34782 }, { "epoch": 3.5742909987669544, "grad_norm": 0.06904742866754532, "learning_rate": 0.01, "loss": 1.9823, "step": 34785 }, { "epoch": 3.5745992601726266, "grad_norm": 0.0667278841137886, "learning_rate": 0.01, "loss": 1.9862, "step": 34788 }, { "epoch": 3.5749075215782984, "grad_norm": 0.03483438491821289, "learning_rate": 0.01, "loss": 2.0044, "step": 34791 }, { "epoch": 3.57521578298397, "grad_norm": 0.08964372426271439, "learning_rate": 0.01, "loss": 1.9828, "step": 34794 }, { "epoch": 3.5755240443896423, "grad_norm": 0.09426937997341156, "learning_rate": 0.01, "loss": 2.0093, "step": 34797 }, { "epoch": 3.5758323057953145, "grad_norm": 0.0574275478720665, "learning_rate": 0.01, "loss": 2.0269, "step": 34800 }, { "epoch": 3.5761405672009863, "grad_norm": 0.03654215484857559, "learning_rate": 0.01, "loss": 1.9876, "step": 34803 }, { "epoch": 3.5764488286066585, "grad_norm": 0.05129201337695122, "learning_rate": 0.01, "loss": 2.0072, "step": 34806 }, { "epoch": 3.5767570900123307, "grad_norm": 0.07386317104101181, "learning_rate": 0.01, "loss": 2.0074, "step": 34809 }, { "epoch": 3.5770653514180024, "grad_norm": 0.1415167599916458, "learning_rate": 0.01, "loss": 2.0036, "step": 34812 }, { "epoch": 3.5773736128236746, "grad_norm": 0.04465651139616966, "learning_rate": 0.01, "loss": 1.9881, "step": 34815 }, { "epoch": 3.5776818742293464, "grad_norm": 0.05919940024614334, "learning_rate": 0.01, "loss": 2.0257, "step": 34818 }, { "epoch": 3.5779901356350186, "grad_norm": 0.03873635083436966, "learning_rate": 0.01, "loss": 2.0034, "step": 34821 }, { "epoch": 3.5782983970406903, "grad_norm": 0.05425990745425224, "learning_rate": 0.01, "loss": 1.9911, "step": 34824 }, { "epoch": 3.5786066584463625, "grad_norm": 0.11823546886444092, "learning_rate": 0.01, "loss": 1.9999, "step": 34827 }, { "epoch": 3.5789149198520347, "grad_norm": 0.12624318897724152, "learning_rate": 0.01, "loss": 2.0124, "step": 34830 }, { "epoch": 3.5792231812577064, "grad_norm": 0.05450008437037468, "learning_rate": 0.01, "loss": 1.9941, "step": 34833 }, { "epoch": 3.5795314426633786, "grad_norm": 0.04888772964477539, "learning_rate": 0.01, "loss": 2.007, "step": 34836 }, { "epoch": 3.579839704069051, "grad_norm": 0.033485714346170425, "learning_rate": 0.01, "loss": 2.0066, "step": 34839 }, { "epoch": 3.5801479654747226, "grad_norm": 0.08570647239685059, "learning_rate": 0.01, "loss": 2.0157, "step": 34842 }, { "epoch": 3.5804562268803943, "grad_norm": 0.06110506132245064, "learning_rate": 0.01, "loss": 2.0054, "step": 34845 }, { "epoch": 3.5807644882860665, "grad_norm": 0.08815490454435349, "learning_rate": 0.01, "loss": 1.9991, "step": 34848 }, { "epoch": 3.5810727496917387, "grad_norm": 0.09731253236532211, "learning_rate": 0.01, "loss": 1.9763, "step": 34851 }, { "epoch": 3.5813810110974105, "grad_norm": 0.08022457361221313, "learning_rate": 0.01, "loss": 2.0095, "step": 34854 }, { "epoch": 3.5816892725030827, "grad_norm": 0.042772453278303146, "learning_rate": 0.01, "loss": 2.0041, "step": 34857 }, { "epoch": 3.581997533908755, "grad_norm": 0.030573785305023193, "learning_rate": 0.01, "loss": 1.9908, "step": 34860 }, { "epoch": 3.5823057953144266, "grad_norm": 0.05686675012111664, "learning_rate": 0.01, "loss": 2.0131, "step": 34863 }, { "epoch": 3.582614056720099, "grad_norm": 0.04291863366961479, "learning_rate": 0.01, "loss": 1.9925, "step": 34866 }, { "epoch": 3.5829223181257706, "grad_norm": 0.0715685561299324, "learning_rate": 0.01, "loss": 2.0052, "step": 34869 }, { "epoch": 3.5832305795314427, "grad_norm": 0.06964897364377975, "learning_rate": 0.01, "loss": 2.0019, "step": 34872 }, { "epoch": 3.5835388409371145, "grad_norm": 0.043709173798561096, "learning_rate": 0.01, "loss": 1.9957, "step": 34875 }, { "epoch": 3.5838471023427867, "grad_norm": 0.06593792140483856, "learning_rate": 0.01, "loss": 2.0034, "step": 34878 }, { "epoch": 3.584155363748459, "grad_norm": 0.04702428728342056, "learning_rate": 0.01, "loss": 2.0174, "step": 34881 }, { "epoch": 3.5844636251541306, "grad_norm": 0.08917208015918732, "learning_rate": 0.01, "loss": 1.9914, "step": 34884 }, { "epoch": 3.584771886559803, "grad_norm": 0.1167001873254776, "learning_rate": 0.01, "loss": 2.0183, "step": 34887 }, { "epoch": 3.5850801479654746, "grad_norm": 0.07775711268186569, "learning_rate": 0.01, "loss": 2.0254, "step": 34890 }, { "epoch": 3.585388409371147, "grad_norm": 0.08915986865758896, "learning_rate": 0.01, "loss": 1.9832, "step": 34893 }, { "epoch": 3.5856966707768185, "grad_norm": 0.045163143426179886, "learning_rate": 0.01, "loss": 2.0024, "step": 34896 }, { "epoch": 3.5860049321824907, "grad_norm": 0.0571911595761776, "learning_rate": 0.01, "loss": 1.9897, "step": 34899 }, { "epoch": 3.586313193588163, "grad_norm": 0.0707104504108429, "learning_rate": 0.01, "loss": 2.0291, "step": 34902 }, { "epoch": 3.5866214549938347, "grad_norm": 0.04241738095879555, "learning_rate": 0.01, "loss": 1.9858, "step": 34905 }, { "epoch": 3.586929716399507, "grad_norm": 0.045163869857788086, "learning_rate": 0.01, "loss": 2.004, "step": 34908 }, { "epoch": 3.587237977805179, "grad_norm": 0.03191215172410011, "learning_rate": 0.01, "loss": 2.017, "step": 34911 }, { "epoch": 3.587546239210851, "grad_norm": 0.033771395683288574, "learning_rate": 0.01, "loss": 2.0137, "step": 34914 }, { "epoch": 3.5878545006165226, "grad_norm": 0.06919904053211212, "learning_rate": 0.01, "loss": 1.9974, "step": 34917 }, { "epoch": 3.5881627620221948, "grad_norm": 0.05089094117283821, "learning_rate": 0.01, "loss": 2.0174, "step": 34920 }, { "epoch": 3.588471023427867, "grad_norm": 0.10625668615102768, "learning_rate": 0.01, "loss": 2.0118, "step": 34923 }, { "epoch": 3.5887792848335387, "grad_norm": 0.05255312845110893, "learning_rate": 0.01, "loss": 1.9957, "step": 34926 }, { "epoch": 3.589087546239211, "grad_norm": 0.09395511448383331, "learning_rate": 0.01, "loss": 1.986, "step": 34929 }, { "epoch": 3.589395807644883, "grad_norm": 0.04209842532873154, "learning_rate": 0.01, "loss": 1.9993, "step": 34932 }, { "epoch": 3.589704069050555, "grad_norm": 0.0340830534696579, "learning_rate": 0.01, "loss": 2.036, "step": 34935 }, { "epoch": 3.590012330456227, "grad_norm": 0.041663311421871185, "learning_rate": 0.01, "loss": 1.9921, "step": 34938 }, { "epoch": 3.590320591861899, "grad_norm": 0.044666144996881485, "learning_rate": 0.01, "loss": 2.0027, "step": 34941 }, { "epoch": 3.590628853267571, "grad_norm": 0.05369654670357704, "learning_rate": 0.01, "loss": 2.0291, "step": 34944 }, { "epoch": 3.5909371146732427, "grad_norm": 0.054408960044384, "learning_rate": 0.01, "loss": 1.9996, "step": 34947 }, { "epoch": 3.591245376078915, "grad_norm": 0.09313920885324478, "learning_rate": 0.01, "loss": 1.9848, "step": 34950 }, { "epoch": 3.591553637484587, "grad_norm": 0.05207030475139618, "learning_rate": 0.01, "loss": 1.9925, "step": 34953 }, { "epoch": 3.591861898890259, "grad_norm": 0.06822825968265533, "learning_rate": 0.01, "loss": 2.0096, "step": 34956 }, { "epoch": 3.592170160295931, "grad_norm": 0.05392748489975929, "learning_rate": 0.01, "loss": 2.0129, "step": 34959 }, { "epoch": 3.592478421701603, "grad_norm": 0.06300345808267593, "learning_rate": 0.01, "loss": 2.0259, "step": 34962 }, { "epoch": 3.592786683107275, "grad_norm": 0.09526319801807404, "learning_rate": 0.01, "loss": 1.982, "step": 34965 }, { "epoch": 3.5930949445129468, "grad_norm": 0.060867104679346085, "learning_rate": 0.01, "loss": 2.0216, "step": 34968 }, { "epoch": 3.593403205918619, "grad_norm": 0.09763433039188385, "learning_rate": 0.01, "loss": 2.0151, "step": 34971 }, { "epoch": 3.593711467324291, "grad_norm": 0.05499181151390076, "learning_rate": 0.01, "loss": 2.0221, "step": 34974 }, { "epoch": 3.594019728729963, "grad_norm": 0.18828648328781128, "learning_rate": 0.01, "loss": 2.0094, "step": 34977 }, { "epoch": 3.594327990135635, "grad_norm": 0.1367756426334381, "learning_rate": 0.01, "loss": 1.9861, "step": 34980 }, { "epoch": 3.5946362515413073, "grad_norm": 0.0752996876835823, "learning_rate": 0.01, "loss": 1.9973, "step": 34983 }, { "epoch": 3.594944512946979, "grad_norm": 0.07500961422920227, "learning_rate": 0.01, "loss": 2.0196, "step": 34986 }, { "epoch": 3.595252774352651, "grad_norm": 0.08114928752183914, "learning_rate": 0.01, "loss": 1.9768, "step": 34989 }, { "epoch": 3.595561035758323, "grad_norm": 0.0735306516289711, "learning_rate": 0.01, "loss": 2.0046, "step": 34992 }, { "epoch": 3.595869297163995, "grad_norm": 0.04386765882372856, "learning_rate": 0.01, "loss": 2.0095, "step": 34995 }, { "epoch": 3.596177558569667, "grad_norm": 0.052437882870435715, "learning_rate": 0.01, "loss": 2.0008, "step": 34998 }, { "epoch": 3.596485819975339, "grad_norm": 0.044234346598386765, "learning_rate": 0.01, "loss": 2.0145, "step": 35001 }, { "epoch": 3.5967940813810113, "grad_norm": 0.04302847385406494, "learning_rate": 0.01, "loss": 2.0284, "step": 35004 }, { "epoch": 3.597102342786683, "grad_norm": 0.047404494136571884, "learning_rate": 0.01, "loss": 1.9965, "step": 35007 }, { "epoch": 3.5974106041923553, "grad_norm": 0.04339216649532318, "learning_rate": 0.01, "loss": 1.9888, "step": 35010 }, { "epoch": 3.597718865598027, "grad_norm": 0.07121206820011139, "learning_rate": 0.01, "loss": 1.9935, "step": 35013 }, { "epoch": 3.598027127003699, "grad_norm": 0.06314744055271149, "learning_rate": 0.01, "loss": 2.0062, "step": 35016 }, { "epoch": 3.598335388409371, "grad_norm": 0.0557650588452816, "learning_rate": 0.01, "loss": 1.9947, "step": 35019 }, { "epoch": 3.598643649815043, "grad_norm": 0.043066419661045074, "learning_rate": 0.01, "loss": 2.0061, "step": 35022 }, { "epoch": 3.5989519112207153, "grad_norm": 0.0739695131778717, "learning_rate": 0.01, "loss": 2.0139, "step": 35025 }, { "epoch": 3.599260172626387, "grad_norm": 0.13933037221431732, "learning_rate": 0.01, "loss": 1.9982, "step": 35028 }, { "epoch": 3.5995684340320593, "grad_norm": 0.03341522440314293, "learning_rate": 0.01, "loss": 1.9969, "step": 35031 }, { "epoch": 3.599876695437731, "grad_norm": 0.07127437740564346, "learning_rate": 0.01, "loss": 1.9988, "step": 35034 }, { "epoch": 3.6001849568434032, "grad_norm": 0.05078571289777756, "learning_rate": 0.01, "loss": 1.9961, "step": 35037 }, { "epoch": 3.600493218249075, "grad_norm": 0.049321308732032776, "learning_rate": 0.01, "loss": 2.0083, "step": 35040 }, { "epoch": 3.600801479654747, "grad_norm": 0.04840512201189995, "learning_rate": 0.01, "loss": 2.0086, "step": 35043 }, { "epoch": 3.6011097410604194, "grad_norm": 0.05541226640343666, "learning_rate": 0.01, "loss": 1.9975, "step": 35046 }, { "epoch": 3.601418002466091, "grad_norm": 0.03456197306513786, "learning_rate": 0.01, "loss": 2.0024, "step": 35049 }, { "epoch": 3.6017262638717633, "grad_norm": 0.0342303030192852, "learning_rate": 0.01, "loss": 2.0113, "step": 35052 }, { "epoch": 3.6020345252774355, "grad_norm": 0.043709222227334976, "learning_rate": 0.01, "loss": 1.9818, "step": 35055 }, { "epoch": 3.6023427866831073, "grad_norm": 0.05025354400277138, "learning_rate": 0.01, "loss": 1.9958, "step": 35058 }, { "epoch": 3.6026510480887795, "grad_norm": 0.0903453454375267, "learning_rate": 0.01, "loss": 1.9858, "step": 35061 }, { "epoch": 3.602959309494451, "grad_norm": 0.029984181746840477, "learning_rate": 0.01, "loss": 2.0049, "step": 35064 }, { "epoch": 3.6032675709001234, "grad_norm": 0.03956552594900131, "learning_rate": 0.01, "loss": 1.9821, "step": 35067 }, { "epoch": 3.603575832305795, "grad_norm": 0.0535871721804142, "learning_rate": 0.01, "loss": 2.0002, "step": 35070 }, { "epoch": 3.6038840937114673, "grad_norm": 0.07621357589960098, "learning_rate": 0.01, "loss": 1.9978, "step": 35073 }, { "epoch": 3.6041923551171395, "grad_norm": 0.08122174441814423, "learning_rate": 0.01, "loss": 2.0012, "step": 35076 }, { "epoch": 3.6045006165228113, "grad_norm": 0.04478932544589043, "learning_rate": 0.01, "loss": 2.0016, "step": 35079 }, { "epoch": 3.6048088779284835, "grad_norm": 0.07432077080011368, "learning_rate": 0.01, "loss": 1.9881, "step": 35082 }, { "epoch": 3.6051171393341552, "grad_norm": 0.08805304020643234, "learning_rate": 0.01, "loss": 2.0014, "step": 35085 }, { "epoch": 3.6054254007398274, "grad_norm": 0.15478205680847168, "learning_rate": 0.01, "loss": 1.9907, "step": 35088 }, { "epoch": 3.605733662145499, "grad_norm": 0.11871577054262161, "learning_rate": 0.01, "loss": 2.0125, "step": 35091 }, { "epoch": 3.6060419235511714, "grad_norm": 0.06366606056690216, "learning_rate": 0.01, "loss": 2.0011, "step": 35094 }, { "epoch": 3.6063501849568436, "grad_norm": 0.10421419143676758, "learning_rate": 0.01, "loss": 1.9947, "step": 35097 }, { "epoch": 3.6066584463625153, "grad_norm": 0.08190574496984482, "learning_rate": 0.01, "loss": 1.9893, "step": 35100 }, { "epoch": 3.6069667077681875, "grad_norm": 0.03852493688464165, "learning_rate": 0.01, "loss": 2.0112, "step": 35103 }, { "epoch": 3.6072749691738597, "grad_norm": 0.04792521148920059, "learning_rate": 0.01, "loss": 2.0063, "step": 35106 }, { "epoch": 3.6075832305795315, "grad_norm": 0.04788130521774292, "learning_rate": 0.01, "loss": 2.0138, "step": 35109 }, { "epoch": 3.607891491985203, "grad_norm": 0.06820268929004669, "learning_rate": 0.01, "loss": 1.9813, "step": 35112 }, { "epoch": 3.6081997533908754, "grad_norm": 0.05702923610806465, "learning_rate": 0.01, "loss": 2.0261, "step": 35115 }, { "epoch": 3.6085080147965476, "grad_norm": 0.10051339119672775, "learning_rate": 0.01, "loss": 1.9805, "step": 35118 }, { "epoch": 3.6088162762022193, "grad_norm": 0.07067910581827164, "learning_rate": 0.01, "loss": 2.0049, "step": 35121 }, { "epoch": 3.6091245376078915, "grad_norm": 0.03547900170087814, "learning_rate": 0.01, "loss": 1.9887, "step": 35124 }, { "epoch": 3.6094327990135637, "grad_norm": 0.062315478920936584, "learning_rate": 0.01, "loss": 2.0035, "step": 35127 }, { "epoch": 3.6097410604192355, "grad_norm": 0.12956464290618896, "learning_rate": 0.01, "loss": 2.0106, "step": 35130 }, { "epoch": 3.6100493218249077, "grad_norm": 0.1570357233285904, "learning_rate": 0.01, "loss": 2.0408, "step": 35133 }, { "epoch": 3.6103575832305794, "grad_norm": 0.06165264546871185, "learning_rate": 0.01, "loss": 2.0178, "step": 35136 }, { "epoch": 3.6106658446362516, "grad_norm": 0.046166177839040756, "learning_rate": 0.01, "loss": 2.0237, "step": 35139 }, { "epoch": 3.6109741060419234, "grad_norm": 0.052293550223112106, "learning_rate": 0.01, "loss": 1.9686, "step": 35142 }, { "epoch": 3.6112823674475956, "grad_norm": 0.046433527022600174, "learning_rate": 0.01, "loss": 1.9964, "step": 35145 }, { "epoch": 3.6115906288532678, "grad_norm": 0.029988888651132584, "learning_rate": 0.01, "loss": 1.9536, "step": 35148 }, { "epoch": 3.6118988902589395, "grad_norm": 0.05726629123091698, "learning_rate": 0.01, "loss": 2.0312, "step": 35151 }, { "epoch": 3.6122071516646117, "grad_norm": 0.054002657532691956, "learning_rate": 0.01, "loss": 1.9853, "step": 35154 }, { "epoch": 3.6125154130702835, "grad_norm": 0.05368361249566078, "learning_rate": 0.01, "loss": 1.978, "step": 35157 }, { "epoch": 3.6128236744759556, "grad_norm": 0.04732634872198105, "learning_rate": 0.01, "loss": 1.9876, "step": 35160 }, { "epoch": 3.6131319358816274, "grad_norm": 0.05976017564535141, "learning_rate": 0.01, "loss": 1.9695, "step": 35163 }, { "epoch": 3.6134401972872996, "grad_norm": 0.04517058655619621, "learning_rate": 0.01, "loss": 2.0137, "step": 35166 }, { "epoch": 3.613748458692972, "grad_norm": 0.12990356981754303, "learning_rate": 0.01, "loss": 2.0157, "step": 35169 }, { "epoch": 3.6140567200986435, "grad_norm": 0.059477876871824265, "learning_rate": 0.01, "loss": 1.9885, "step": 35172 }, { "epoch": 3.6143649815043157, "grad_norm": 0.05780341103672981, "learning_rate": 0.01, "loss": 2.0084, "step": 35175 }, { "epoch": 3.614673242909988, "grad_norm": 0.03971264883875847, "learning_rate": 0.01, "loss": 2.0061, "step": 35178 }, { "epoch": 3.6149815043156597, "grad_norm": 0.04069376364350319, "learning_rate": 0.01, "loss": 2.0013, "step": 35181 }, { "epoch": 3.6152897657213314, "grad_norm": 0.049401164054870605, "learning_rate": 0.01, "loss": 1.9785, "step": 35184 }, { "epoch": 3.6155980271270036, "grad_norm": 0.058909036219120026, "learning_rate": 0.01, "loss": 1.9979, "step": 35187 }, { "epoch": 3.615906288532676, "grad_norm": 0.1364602893590927, "learning_rate": 0.01, "loss": 2.0126, "step": 35190 }, { "epoch": 3.6162145499383476, "grad_norm": 0.0892588272690773, "learning_rate": 0.01, "loss": 1.9942, "step": 35193 }, { "epoch": 3.6165228113440198, "grad_norm": 0.03974326699972153, "learning_rate": 0.01, "loss": 2.0159, "step": 35196 }, { "epoch": 3.616831072749692, "grad_norm": 0.03898739442229271, "learning_rate": 0.01, "loss": 2.0303, "step": 35199 }, { "epoch": 3.6171393341553637, "grad_norm": 0.044964905828237534, "learning_rate": 0.01, "loss": 2.015, "step": 35202 }, { "epoch": 3.617447595561036, "grad_norm": 0.11181700229644775, "learning_rate": 0.01, "loss": 1.996, "step": 35205 }, { "epoch": 3.6177558569667077, "grad_norm": 0.0697384923696518, "learning_rate": 0.01, "loss": 2.0091, "step": 35208 }, { "epoch": 3.61806411837238, "grad_norm": 0.03923085704445839, "learning_rate": 0.01, "loss": 1.9976, "step": 35211 }, { "epoch": 3.6183723797780516, "grad_norm": 0.03635834902524948, "learning_rate": 0.01, "loss": 2.0082, "step": 35214 }, { "epoch": 3.618680641183724, "grad_norm": 0.05013216286897659, "learning_rate": 0.01, "loss": 2.0005, "step": 35217 }, { "epoch": 3.618988902589396, "grad_norm": 0.03531458228826523, "learning_rate": 0.01, "loss": 2.0256, "step": 35220 }, { "epoch": 3.6192971639950677, "grad_norm": 0.09228625893592834, "learning_rate": 0.01, "loss": 1.9941, "step": 35223 }, { "epoch": 3.61960542540074, "grad_norm": 0.06587129831314087, "learning_rate": 0.01, "loss": 2.0179, "step": 35226 }, { "epoch": 3.6199136868064117, "grad_norm": 0.11610520631074905, "learning_rate": 0.01, "loss": 2.0364, "step": 35229 }, { "epoch": 3.620221948212084, "grad_norm": 0.05594666674733162, "learning_rate": 0.01, "loss": 1.9921, "step": 35232 }, { "epoch": 3.6205302096177556, "grad_norm": 0.04040682688355446, "learning_rate": 0.01, "loss": 2.0213, "step": 35235 }, { "epoch": 3.620838471023428, "grad_norm": 0.038559917360544205, "learning_rate": 0.01, "loss": 1.9874, "step": 35238 }, { "epoch": 3.6211467324291, "grad_norm": 0.05012970045208931, "learning_rate": 0.01, "loss": 1.9959, "step": 35241 }, { "epoch": 3.6214549938347718, "grad_norm": 0.09769418835639954, "learning_rate": 0.01, "loss": 2.0246, "step": 35244 }, { "epoch": 3.621763255240444, "grad_norm": 0.07791107892990112, "learning_rate": 0.01, "loss": 2.0021, "step": 35247 }, { "epoch": 3.622071516646116, "grad_norm": 0.06556175649166107, "learning_rate": 0.01, "loss": 2.0175, "step": 35250 }, { "epoch": 3.622379778051788, "grad_norm": 0.04608479142189026, "learning_rate": 0.01, "loss": 2.0082, "step": 35253 }, { "epoch": 3.6226880394574597, "grad_norm": 0.03681867569684982, "learning_rate": 0.01, "loss": 2.0005, "step": 35256 }, { "epoch": 3.622996300863132, "grad_norm": 0.05940413847565651, "learning_rate": 0.01, "loss": 1.9957, "step": 35259 }, { "epoch": 3.623304562268804, "grad_norm": 0.09911686182022095, "learning_rate": 0.01, "loss": 1.9991, "step": 35262 }, { "epoch": 3.623612823674476, "grad_norm": 0.03739270940423012, "learning_rate": 0.01, "loss": 2.0285, "step": 35265 }, { "epoch": 3.623921085080148, "grad_norm": 0.11673395335674286, "learning_rate": 0.01, "loss": 2.0093, "step": 35268 }, { "epoch": 3.62422934648582, "grad_norm": 0.0795954018831253, "learning_rate": 0.01, "loss": 1.9954, "step": 35271 }, { "epoch": 3.624537607891492, "grad_norm": 0.04180069640278816, "learning_rate": 0.01, "loss": 1.9844, "step": 35274 }, { "epoch": 3.624845869297164, "grad_norm": 0.09516190737485886, "learning_rate": 0.01, "loss": 2.0031, "step": 35277 }, { "epoch": 3.625154130702836, "grad_norm": 0.11448989808559418, "learning_rate": 0.01, "loss": 2.0236, "step": 35280 }, { "epoch": 3.625462392108508, "grad_norm": 0.06945902854204178, "learning_rate": 0.01, "loss": 2.0211, "step": 35283 }, { "epoch": 3.62577065351418, "grad_norm": 0.039453648030757904, "learning_rate": 0.01, "loss": 2.0038, "step": 35286 }, { "epoch": 3.626078914919852, "grad_norm": 0.06266641616821289, "learning_rate": 0.01, "loss": 2.0267, "step": 35289 }, { "epoch": 3.626387176325524, "grad_norm": 0.04908052831888199, "learning_rate": 0.01, "loss": 1.9993, "step": 35292 }, { "epoch": 3.626695437731196, "grad_norm": 0.06186684966087341, "learning_rate": 0.01, "loss": 1.9813, "step": 35295 }, { "epoch": 3.627003699136868, "grad_norm": 0.13145780563354492, "learning_rate": 0.01, "loss": 1.9967, "step": 35298 }, { "epoch": 3.6273119605425403, "grad_norm": 0.05850294232368469, "learning_rate": 0.01, "loss": 1.9963, "step": 35301 }, { "epoch": 3.627620221948212, "grad_norm": 0.04934421926736832, "learning_rate": 0.01, "loss": 1.9952, "step": 35304 }, { "epoch": 3.627928483353884, "grad_norm": 0.07607220858335495, "learning_rate": 0.01, "loss": 2.0091, "step": 35307 }, { "epoch": 3.628236744759556, "grad_norm": 0.046615466475486755, "learning_rate": 0.01, "loss": 2.0037, "step": 35310 }, { "epoch": 3.6285450061652282, "grad_norm": 0.11176659911870956, "learning_rate": 0.01, "loss": 2.0077, "step": 35313 }, { "epoch": 3.6288532675709, "grad_norm": 0.07844175398349762, "learning_rate": 0.01, "loss": 2.0198, "step": 35316 }, { "epoch": 3.629161528976572, "grad_norm": 0.0730755403637886, "learning_rate": 0.01, "loss": 2.0014, "step": 35319 }, { "epoch": 3.6294697903822444, "grad_norm": 0.08503863960504532, "learning_rate": 0.01, "loss": 2.005, "step": 35322 }, { "epoch": 3.629778051787916, "grad_norm": 0.03936958312988281, "learning_rate": 0.01, "loss": 2.0211, "step": 35325 }, { "epoch": 3.6300863131935883, "grad_norm": 0.11586350947618484, "learning_rate": 0.01, "loss": 2.0054, "step": 35328 }, { "epoch": 3.63039457459926, "grad_norm": 0.06128397583961487, "learning_rate": 0.01, "loss": 2.0076, "step": 35331 }, { "epoch": 3.6307028360049323, "grad_norm": 0.04872961342334747, "learning_rate": 0.01, "loss": 2.017, "step": 35334 }, { "epoch": 3.631011097410604, "grad_norm": 0.08416412770748138, "learning_rate": 0.01, "loss": 2.0144, "step": 35337 }, { "epoch": 3.631319358816276, "grad_norm": 0.05844739452004433, "learning_rate": 0.01, "loss": 2.001, "step": 35340 }, { "epoch": 3.6316276202219484, "grad_norm": 0.08564214408397675, "learning_rate": 0.01, "loss": 2.0061, "step": 35343 }, { "epoch": 3.63193588162762, "grad_norm": 0.08769746124744415, "learning_rate": 0.01, "loss": 1.9914, "step": 35346 }, { "epoch": 3.6322441430332923, "grad_norm": 0.03819827735424042, "learning_rate": 0.01, "loss": 2.0103, "step": 35349 }, { "epoch": 3.632552404438964, "grad_norm": 0.12311212718486786, "learning_rate": 0.01, "loss": 1.9921, "step": 35352 }, { "epoch": 3.6328606658446363, "grad_norm": 0.037630997598171234, "learning_rate": 0.01, "loss": 2.007, "step": 35355 }, { "epoch": 3.633168927250308, "grad_norm": 0.08739642798900604, "learning_rate": 0.01, "loss": 2.0113, "step": 35358 }, { "epoch": 3.6334771886559802, "grad_norm": 0.04889992997050285, "learning_rate": 0.01, "loss": 2.0098, "step": 35361 }, { "epoch": 3.6337854500616524, "grad_norm": 0.035435836762189865, "learning_rate": 0.01, "loss": 2.0045, "step": 35364 }, { "epoch": 3.634093711467324, "grad_norm": 0.059248197823762894, "learning_rate": 0.01, "loss": 2.0159, "step": 35367 }, { "epoch": 3.6344019728729964, "grad_norm": 0.08831764757633209, "learning_rate": 0.01, "loss": 1.992, "step": 35370 }, { "epoch": 3.6347102342786686, "grad_norm": 0.14940893650054932, "learning_rate": 0.01, "loss": 2.0051, "step": 35373 }, { "epoch": 3.6350184956843403, "grad_norm": 0.06616901606321335, "learning_rate": 0.01, "loss": 1.9825, "step": 35376 }, { "epoch": 3.635326757090012, "grad_norm": 0.06391241401433945, "learning_rate": 0.01, "loss": 1.9636, "step": 35379 }, { "epoch": 3.6356350184956843, "grad_norm": 0.0580880343914032, "learning_rate": 0.01, "loss": 1.975, "step": 35382 }, { "epoch": 3.6359432799013565, "grad_norm": 0.057876862585544586, "learning_rate": 0.01, "loss": 1.9888, "step": 35385 }, { "epoch": 3.636251541307028, "grad_norm": 0.044347915798425674, "learning_rate": 0.01, "loss": 2.0051, "step": 35388 }, { "epoch": 3.6365598027127004, "grad_norm": 0.0400017574429512, "learning_rate": 0.01, "loss": 1.9722, "step": 35391 }, { "epoch": 3.6368680641183726, "grad_norm": 0.11369085311889648, "learning_rate": 0.01, "loss": 1.9855, "step": 35394 }, { "epoch": 3.6371763255240444, "grad_norm": 0.10995481163263321, "learning_rate": 0.01, "loss": 2.0037, "step": 35397 }, { "epoch": 3.6374845869297165, "grad_norm": 0.05769447609782219, "learning_rate": 0.01, "loss": 2.0109, "step": 35400 }, { "epoch": 3.6377928483353883, "grad_norm": 0.07872482389211655, "learning_rate": 0.01, "loss": 2.0101, "step": 35403 }, { "epoch": 3.6381011097410605, "grad_norm": 0.05196002870798111, "learning_rate": 0.01, "loss": 2.0108, "step": 35406 }, { "epoch": 3.6384093711467322, "grad_norm": 0.0493265725672245, "learning_rate": 0.01, "loss": 2.0283, "step": 35409 }, { "epoch": 3.6387176325524044, "grad_norm": 0.04237900674343109, "learning_rate": 0.01, "loss": 2.0216, "step": 35412 }, { "epoch": 3.6390258939580766, "grad_norm": 0.06882897764444351, "learning_rate": 0.01, "loss": 2.0073, "step": 35415 }, { "epoch": 3.6393341553637484, "grad_norm": 0.058774422854185104, "learning_rate": 0.01, "loss": 2.0114, "step": 35418 }, { "epoch": 3.6396424167694206, "grad_norm": 0.10032794624567032, "learning_rate": 0.01, "loss": 2.0008, "step": 35421 }, { "epoch": 3.6399506781750923, "grad_norm": 0.042988426983356476, "learning_rate": 0.01, "loss": 2.0311, "step": 35424 }, { "epoch": 3.6402589395807645, "grad_norm": 0.055391326546669006, "learning_rate": 0.01, "loss": 2.0019, "step": 35427 }, { "epoch": 3.6405672009864363, "grad_norm": 0.03977194428443909, "learning_rate": 0.01, "loss": 1.9992, "step": 35430 }, { "epoch": 3.6408754623921085, "grad_norm": 0.0903034657239914, "learning_rate": 0.01, "loss": 2.0207, "step": 35433 }, { "epoch": 3.6411837237977807, "grad_norm": 0.04610143229365349, "learning_rate": 0.01, "loss": 2.0015, "step": 35436 }, { "epoch": 3.6414919852034524, "grad_norm": 0.08295582234859467, "learning_rate": 0.01, "loss": 1.9848, "step": 35439 }, { "epoch": 3.6418002466091246, "grad_norm": 0.09561655670404434, "learning_rate": 0.01, "loss": 1.9931, "step": 35442 }, { "epoch": 3.642108508014797, "grad_norm": 0.07968761771917343, "learning_rate": 0.01, "loss": 1.9925, "step": 35445 }, { "epoch": 3.6424167694204685, "grad_norm": 0.04458128660917282, "learning_rate": 0.01, "loss": 2.0112, "step": 35448 }, { "epoch": 3.6427250308261403, "grad_norm": 0.12892895936965942, "learning_rate": 0.01, "loss": 2.0004, "step": 35451 }, { "epoch": 3.6430332922318125, "grad_norm": 0.1063292846083641, "learning_rate": 0.01, "loss": 2.0165, "step": 35454 }, { "epoch": 3.6433415536374847, "grad_norm": 0.04187742993235588, "learning_rate": 0.01, "loss": 2.0146, "step": 35457 }, { "epoch": 3.6436498150431564, "grad_norm": 0.06096494942903519, "learning_rate": 0.01, "loss": 1.9935, "step": 35460 }, { "epoch": 3.6439580764488286, "grad_norm": 0.035430386662483215, "learning_rate": 0.01, "loss": 2.0034, "step": 35463 }, { "epoch": 3.644266337854501, "grad_norm": 0.039337921887636185, "learning_rate": 0.01, "loss": 2.0163, "step": 35466 }, { "epoch": 3.6445745992601726, "grad_norm": 0.04446728155016899, "learning_rate": 0.01, "loss": 1.9804, "step": 35469 }, { "epoch": 3.6448828606658448, "grad_norm": 0.06175538897514343, "learning_rate": 0.01, "loss": 1.9991, "step": 35472 }, { "epoch": 3.6451911220715165, "grad_norm": 0.1305261105298996, "learning_rate": 0.01, "loss": 2.008, "step": 35475 }, { "epoch": 3.6454993834771887, "grad_norm": 0.14111317694187164, "learning_rate": 0.01, "loss": 1.9938, "step": 35478 }, { "epoch": 3.6458076448828605, "grad_norm": 0.13947373628616333, "learning_rate": 0.01, "loss": 2.0083, "step": 35481 }, { "epoch": 3.6461159062885327, "grad_norm": 0.057133182883262634, "learning_rate": 0.01, "loss": 1.9899, "step": 35484 }, { "epoch": 3.646424167694205, "grad_norm": 0.03709038347005844, "learning_rate": 0.01, "loss": 1.9825, "step": 35487 }, { "epoch": 3.6467324290998766, "grad_norm": 0.04786526411771774, "learning_rate": 0.01, "loss": 1.993, "step": 35490 }, { "epoch": 3.647040690505549, "grad_norm": 0.03816988691687584, "learning_rate": 0.01, "loss": 1.9762, "step": 35493 }, { "epoch": 3.6473489519112205, "grad_norm": 0.05153171718120575, "learning_rate": 0.01, "loss": 2.0236, "step": 35496 }, { "epoch": 3.6476572133168927, "grad_norm": 0.09072964638471603, "learning_rate": 0.01, "loss": 2.0153, "step": 35499 }, { "epoch": 3.6479654747225645, "grad_norm": 0.06302040815353394, "learning_rate": 0.01, "loss": 1.9963, "step": 35502 }, { "epoch": 3.6482737361282367, "grad_norm": 0.04940348491072655, "learning_rate": 0.01, "loss": 2.0015, "step": 35505 }, { "epoch": 3.648581997533909, "grad_norm": 0.04952861741185188, "learning_rate": 0.01, "loss": 2.0, "step": 35508 }, { "epoch": 3.6488902589395806, "grad_norm": 0.03227638080716133, "learning_rate": 0.01, "loss": 1.9961, "step": 35511 }, { "epoch": 3.649198520345253, "grad_norm": 0.04237101599574089, "learning_rate": 0.01, "loss": 2.019, "step": 35514 }, { "epoch": 3.649506781750925, "grad_norm": 0.10213712602853775, "learning_rate": 0.01, "loss": 2.0028, "step": 35517 }, { "epoch": 3.6498150431565968, "grad_norm": 0.0747971460223198, "learning_rate": 0.01, "loss": 2.0102, "step": 35520 }, { "epoch": 3.650123304562269, "grad_norm": 0.07060317695140839, "learning_rate": 0.01, "loss": 1.9951, "step": 35523 }, { "epoch": 3.6504315659679407, "grad_norm": 0.13745035231113434, "learning_rate": 0.01, "loss": 2.0171, "step": 35526 }, { "epoch": 3.650739827373613, "grad_norm": 0.11544306576251984, "learning_rate": 0.01, "loss": 2.0374, "step": 35529 }, { "epoch": 3.6510480887792847, "grad_norm": 0.07669739425182343, "learning_rate": 0.01, "loss": 2.0227, "step": 35532 }, { "epoch": 3.651356350184957, "grad_norm": 0.04519101604819298, "learning_rate": 0.01, "loss": 2.0136, "step": 35535 }, { "epoch": 3.651664611590629, "grad_norm": 0.04446371644735336, "learning_rate": 0.01, "loss": 2.0065, "step": 35538 }, { "epoch": 3.651972872996301, "grad_norm": 0.05518916994333267, "learning_rate": 0.01, "loss": 2.0036, "step": 35541 }, { "epoch": 3.652281134401973, "grad_norm": 0.04405786469578743, "learning_rate": 0.01, "loss": 2.0106, "step": 35544 }, { "epoch": 3.6525893958076447, "grad_norm": 0.06154394894838333, "learning_rate": 0.01, "loss": 2.0076, "step": 35547 }, { "epoch": 3.652897657213317, "grad_norm": 0.07919877767562866, "learning_rate": 0.01, "loss": 1.9895, "step": 35550 }, { "epoch": 3.6532059186189887, "grad_norm": 0.0575590617954731, "learning_rate": 0.01, "loss": 2.011, "step": 35553 }, { "epoch": 3.653514180024661, "grad_norm": 0.05680471658706665, "learning_rate": 0.01, "loss": 2.0096, "step": 35556 }, { "epoch": 3.653822441430333, "grad_norm": 0.0352899394929409, "learning_rate": 0.01, "loss": 1.9937, "step": 35559 }, { "epoch": 3.654130702836005, "grad_norm": 0.034432023763656616, "learning_rate": 0.01, "loss": 2.004, "step": 35562 }, { "epoch": 3.654438964241677, "grad_norm": 0.10298652946949005, "learning_rate": 0.01, "loss": 1.9992, "step": 35565 }, { "epoch": 3.654747225647349, "grad_norm": 0.03996056690812111, "learning_rate": 0.01, "loss": 2.007, "step": 35568 }, { "epoch": 3.655055487053021, "grad_norm": 0.07411230355501175, "learning_rate": 0.01, "loss": 1.9981, "step": 35571 }, { "epoch": 3.6553637484586927, "grad_norm": 0.04124278202652931, "learning_rate": 0.01, "loss": 1.9989, "step": 35574 }, { "epoch": 3.655672009864365, "grad_norm": 0.035065338015556335, "learning_rate": 0.01, "loss": 1.9903, "step": 35577 }, { "epoch": 3.655980271270037, "grad_norm": 0.04023493826389313, "learning_rate": 0.01, "loss": 2.0126, "step": 35580 }, { "epoch": 3.656288532675709, "grad_norm": 0.0333552286028862, "learning_rate": 0.01, "loss": 2.0139, "step": 35583 }, { "epoch": 3.656596794081381, "grad_norm": 0.1286098062992096, "learning_rate": 0.01, "loss": 1.9847, "step": 35586 }, { "epoch": 3.6569050554870532, "grad_norm": 0.061940498650074005, "learning_rate": 0.01, "loss": 2.0093, "step": 35589 }, { "epoch": 3.657213316892725, "grad_norm": 0.08766448497772217, "learning_rate": 0.01, "loss": 2.0062, "step": 35592 }, { "epoch": 3.657521578298397, "grad_norm": 0.07218505442142487, "learning_rate": 0.01, "loss": 1.9914, "step": 35595 }, { "epoch": 3.657829839704069, "grad_norm": 0.11700677126646042, "learning_rate": 0.01, "loss": 2.0164, "step": 35598 }, { "epoch": 3.658138101109741, "grad_norm": 0.05746941268444061, "learning_rate": 0.01, "loss": 1.9741, "step": 35601 }, { "epoch": 3.658446362515413, "grad_norm": 0.0517272874712944, "learning_rate": 0.01, "loss": 1.9803, "step": 35604 }, { "epoch": 3.658754623921085, "grad_norm": 0.04524237662553787, "learning_rate": 0.01, "loss": 1.9912, "step": 35607 }, { "epoch": 3.6590628853267573, "grad_norm": 0.053268514573574066, "learning_rate": 0.01, "loss": 2.0099, "step": 35610 }, { "epoch": 3.659371146732429, "grad_norm": 0.06391450762748718, "learning_rate": 0.01, "loss": 1.9958, "step": 35613 }, { "epoch": 3.659679408138101, "grad_norm": 0.06404808908700943, "learning_rate": 0.01, "loss": 2.0093, "step": 35616 }, { "epoch": 3.659987669543773, "grad_norm": 0.06734498590230942, "learning_rate": 0.01, "loss": 2.0135, "step": 35619 }, { "epoch": 3.660295930949445, "grad_norm": 0.056514717638492584, "learning_rate": 0.01, "loss": 2.0195, "step": 35622 }, { "epoch": 3.660604192355117, "grad_norm": 0.04377966374158859, "learning_rate": 0.01, "loss": 1.9945, "step": 35625 }, { "epoch": 3.660912453760789, "grad_norm": 0.0527014285326004, "learning_rate": 0.01, "loss": 2.0011, "step": 35628 }, { "epoch": 3.6612207151664613, "grad_norm": 0.08603314310312271, "learning_rate": 0.01, "loss": 1.9736, "step": 35631 }, { "epoch": 3.661528976572133, "grad_norm": 0.06637904793024063, "learning_rate": 0.01, "loss": 2.0078, "step": 35634 }, { "epoch": 3.6618372379778052, "grad_norm": 0.042211759835481644, "learning_rate": 0.01, "loss": 1.9934, "step": 35637 }, { "epoch": 3.6621454993834774, "grad_norm": 0.08589443564414978, "learning_rate": 0.01, "loss": 2.0076, "step": 35640 }, { "epoch": 3.662453760789149, "grad_norm": 0.12761181592941284, "learning_rate": 0.01, "loss": 1.9956, "step": 35643 }, { "epoch": 3.662762022194821, "grad_norm": 0.08702104538679123, "learning_rate": 0.01, "loss": 2.0147, "step": 35646 }, { "epoch": 3.663070283600493, "grad_norm": 0.1481538712978363, "learning_rate": 0.01, "loss": 2.0028, "step": 35649 }, { "epoch": 3.6633785450061653, "grad_norm": 0.05475825071334839, "learning_rate": 0.01, "loss": 1.991, "step": 35652 }, { "epoch": 3.663686806411837, "grad_norm": 0.0481879860162735, "learning_rate": 0.01, "loss": 2.0244, "step": 35655 }, { "epoch": 3.6639950678175093, "grad_norm": 0.07099757343530655, "learning_rate": 0.01, "loss": 2.027, "step": 35658 }, { "epoch": 3.6643033292231815, "grad_norm": 0.09161633253097534, "learning_rate": 0.01, "loss": 2.0139, "step": 35661 }, { "epoch": 3.664611590628853, "grad_norm": 0.06675172597169876, "learning_rate": 0.01, "loss": 1.9944, "step": 35664 }, { "epoch": 3.6649198520345254, "grad_norm": 0.06166239082813263, "learning_rate": 0.01, "loss": 1.992, "step": 35667 }, { "epoch": 3.665228113440197, "grad_norm": 0.07947093993425369, "learning_rate": 0.01, "loss": 2.0233, "step": 35670 }, { "epoch": 3.6655363748458694, "grad_norm": 0.07639001309871674, "learning_rate": 0.01, "loss": 2.0246, "step": 35673 }, { "epoch": 3.665844636251541, "grad_norm": 0.11109112948179245, "learning_rate": 0.01, "loss": 2.033, "step": 35676 }, { "epoch": 3.6661528976572133, "grad_norm": 0.0517781600356102, "learning_rate": 0.01, "loss": 2.0, "step": 35679 }, { "epoch": 3.6664611590628855, "grad_norm": 0.05615885183215141, "learning_rate": 0.01, "loss": 1.9858, "step": 35682 }, { "epoch": 3.6667694204685573, "grad_norm": 0.054977256804704666, "learning_rate": 0.01, "loss": 1.983, "step": 35685 }, { "epoch": 3.6670776818742294, "grad_norm": 0.08208134025335312, "learning_rate": 0.01, "loss": 2.0038, "step": 35688 }, { "epoch": 3.667385943279901, "grad_norm": 0.053141169250011444, "learning_rate": 0.01, "loss": 2.0, "step": 35691 }, { "epoch": 3.6676942046855734, "grad_norm": 0.0867038443684578, "learning_rate": 0.01, "loss": 1.9992, "step": 35694 }, { "epoch": 3.668002466091245, "grad_norm": 0.04153072461485863, "learning_rate": 0.01, "loss": 2.0156, "step": 35697 }, { "epoch": 3.6683107274969173, "grad_norm": 0.050722386687994, "learning_rate": 0.01, "loss": 2.0029, "step": 35700 }, { "epoch": 3.6686189889025895, "grad_norm": 0.036444198340177536, "learning_rate": 0.01, "loss": 1.98, "step": 35703 }, { "epoch": 3.6689272503082613, "grad_norm": 0.06851452589035034, "learning_rate": 0.01, "loss": 2.0365, "step": 35706 }, { "epoch": 3.6692355117139335, "grad_norm": 0.05442693457007408, "learning_rate": 0.01, "loss": 1.9783, "step": 35709 }, { "epoch": 3.6695437731196057, "grad_norm": 0.04789496958255768, "learning_rate": 0.01, "loss": 1.9919, "step": 35712 }, { "epoch": 3.6698520345252774, "grad_norm": 0.05229181796312332, "learning_rate": 0.01, "loss": 2.0011, "step": 35715 }, { "epoch": 3.670160295930949, "grad_norm": 0.05596926435828209, "learning_rate": 0.01, "loss": 1.9906, "step": 35718 }, { "epoch": 3.6704685573366214, "grad_norm": 0.04481478035449982, "learning_rate": 0.01, "loss": 2.0036, "step": 35721 }, { "epoch": 3.6707768187422936, "grad_norm": 0.053352151066064835, "learning_rate": 0.01, "loss": 1.9952, "step": 35724 }, { "epoch": 3.6710850801479653, "grad_norm": 0.07115049660205841, "learning_rate": 0.01, "loss": 1.9967, "step": 35727 }, { "epoch": 3.6713933415536375, "grad_norm": 0.06887483596801758, "learning_rate": 0.01, "loss": 1.9872, "step": 35730 }, { "epoch": 3.6717016029593097, "grad_norm": 0.14638669788837433, "learning_rate": 0.01, "loss": 2.0211, "step": 35733 }, { "epoch": 3.6720098643649814, "grad_norm": 0.06305021792650223, "learning_rate": 0.01, "loss": 2.0146, "step": 35736 }, { "epoch": 3.6723181257706536, "grad_norm": 0.041157372295856476, "learning_rate": 0.01, "loss": 2.0066, "step": 35739 }, { "epoch": 3.6726263871763254, "grad_norm": 0.10821440070867538, "learning_rate": 0.01, "loss": 1.9949, "step": 35742 }, { "epoch": 3.6729346485819976, "grad_norm": 0.06525052338838577, "learning_rate": 0.01, "loss": 2.0093, "step": 35745 }, { "epoch": 3.6732429099876693, "grad_norm": 0.03519715368747711, "learning_rate": 0.01, "loss": 2.0086, "step": 35748 }, { "epoch": 3.6735511713933415, "grad_norm": 0.0548231340944767, "learning_rate": 0.01, "loss": 1.9969, "step": 35751 }, { "epoch": 3.6738594327990137, "grad_norm": 0.08685827255249023, "learning_rate": 0.01, "loss": 1.9985, "step": 35754 }, { "epoch": 3.6741676942046855, "grad_norm": 0.05920972675085068, "learning_rate": 0.01, "loss": 1.9882, "step": 35757 }, { "epoch": 3.6744759556103577, "grad_norm": 0.060545727610588074, "learning_rate": 0.01, "loss": 2.0057, "step": 35760 }, { "epoch": 3.67478421701603, "grad_norm": 0.05436963587999344, "learning_rate": 0.01, "loss": 2.0023, "step": 35763 }, { "epoch": 3.6750924784217016, "grad_norm": 0.05975078046321869, "learning_rate": 0.01, "loss": 2.0112, "step": 35766 }, { "epoch": 3.6754007398273734, "grad_norm": 0.05793645977973938, "learning_rate": 0.01, "loss": 1.9861, "step": 35769 }, { "epoch": 3.6757090012330456, "grad_norm": 0.07932164520025253, "learning_rate": 0.01, "loss": 1.9808, "step": 35772 }, { "epoch": 3.6760172626387178, "grad_norm": 0.05182117223739624, "learning_rate": 0.01, "loss": 1.9987, "step": 35775 }, { "epoch": 3.6763255240443895, "grad_norm": 0.05408613011240959, "learning_rate": 0.01, "loss": 1.9912, "step": 35778 }, { "epoch": 3.6766337854500617, "grad_norm": 0.0396132618188858, "learning_rate": 0.01, "loss": 1.9922, "step": 35781 }, { "epoch": 3.676942046855734, "grad_norm": 0.04768325388431549, "learning_rate": 0.01, "loss": 2.0055, "step": 35784 }, { "epoch": 3.6772503082614056, "grad_norm": 0.09452294558286667, "learning_rate": 0.01, "loss": 1.9927, "step": 35787 }, { "epoch": 3.677558569667078, "grad_norm": 0.09846623986959457, "learning_rate": 0.01, "loss": 2.0182, "step": 35790 }, { "epoch": 3.6778668310727496, "grad_norm": 0.1176767498254776, "learning_rate": 0.01, "loss": 1.989, "step": 35793 }, { "epoch": 3.678175092478422, "grad_norm": 0.07070811092853546, "learning_rate": 0.01, "loss": 2.0283, "step": 35796 }, { "epoch": 3.6784833538840935, "grad_norm": 0.04359079524874687, "learning_rate": 0.01, "loss": 1.9875, "step": 35799 }, { "epoch": 3.6787916152897657, "grad_norm": 0.05718059092760086, "learning_rate": 0.01, "loss": 2.0186, "step": 35802 }, { "epoch": 3.679099876695438, "grad_norm": 0.03475908935070038, "learning_rate": 0.01, "loss": 1.9996, "step": 35805 }, { "epoch": 3.6794081381011097, "grad_norm": 0.03629058972001076, "learning_rate": 0.01, "loss": 2.0111, "step": 35808 }, { "epoch": 3.679716399506782, "grad_norm": 0.11145780235528946, "learning_rate": 0.01, "loss": 1.989, "step": 35811 }, { "epoch": 3.6800246609124536, "grad_norm": 0.07155486941337585, "learning_rate": 0.01, "loss": 2.025, "step": 35814 }, { "epoch": 3.680332922318126, "grad_norm": 0.05105472728610039, "learning_rate": 0.01, "loss": 2.0109, "step": 35817 }, { "epoch": 3.6806411837237976, "grad_norm": 0.04478003829717636, "learning_rate": 0.01, "loss": 2.0011, "step": 35820 }, { "epoch": 3.6809494451294698, "grad_norm": 0.03695627674460411, "learning_rate": 0.01, "loss": 2.0145, "step": 35823 }, { "epoch": 3.681257706535142, "grad_norm": 0.04222894459962845, "learning_rate": 0.01, "loss": 1.9761, "step": 35826 }, { "epoch": 3.6815659679408137, "grad_norm": 0.04362770542502403, "learning_rate": 0.01, "loss": 2.0082, "step": 35829 }, { "epoch": 3.681874229346486, "grad_norm": 0.07785011827945709, "learning_rate": 0.01, "loss": 1.9881, "step": 35832 }, { "epoch": 3.682182490752158, "grad_norm": 0.08382470905780792, "learning_rate": 0.01, "loss": 1.9778, "step": 35835 }, { "epoch": 3.68249075215783, "grad_norm": 0.09897731244564056, "learning_rate": 0.01, "loss": 1.9845, "step": 35838 }, { "epoch": 3.6827990135635016, "grad_norm": 0.05617908388376236, "learning_rate": 0.01, "loss": 1.977, "step": 35841 }, { "epoch": 3.683107274969174, "grad_norm": 0.07543769478797913, "learning_rate": 0.01, "loss": 1.9984, "step": 35844 }, { "epoch": 3.683415536374846, "grad_norm": 0.04036831855773926, "learning_rate": 0.01, "loss": 1.9944, "step": 35847 }, { "epoch": 3.6837237977805177, "grad_norm": 0.03254294767975807, "learning_rate": 0.01, "loss": 1.9971, "step": 35850 }, { "epoch": 3.68403205918619, "grad_norm": 0.08003180474042892, "learning_rate": 0.01, "loss": 1.9754, "step": 35853 }, { "epoch": 3.684340320591862, "grad_norm": 0.0821150541305542, "learning_rate": 0.01, "loss": 1.9944, "step": 35856 }, { "epoch": 3.684648581997534, "grad_norm": 0.15571752190589905, "learning_rate": 0.01, "loss": 1.997, "step": 35859 }, { "epoch": 3.684956843403206, "grad_norm": 0.07748742401599884, "learning_rate": 0.01, "loss": 2.001, "step": 35862 }, { "epoch": 3.685265104808878, "grad_norm": 0.08278176933526993, "learning_rate": 0.01, "loss": 2.0004, "step": 35865 }, { "epoch": 3.68557336621455, "grad_norm": 0.08014661818742752, "learning_rate": 0.01, "loss": 1.9751, "step": 35868 }, { "epoch": 3.6858816276202218, "grad_norm": 0.03511490300297737, "learning_rate": 0.01, "loss": 2.006, "step": 35871 }, { "epoch": 3.686189889025894, "grad_norm": 0.04790462180972099, "learning_rate": 0.01, "loss": 2.0058, "step": 35874 }, { "epoch": 3.686498150431566, "grad_norm": 0.05917114019393921, "learning_rate": 0.01, "loss": 1.9945, "step": 35877 }, { "epoch": 3.686806411837238, "grad_norm": 0.09026920050382614, "learning_rate": 0.01, "loss": 2.0278, "step": 35880 }, { "epoch": 3.68711467324291, "grad_norm": 0.07264431565999985, "learning_rate": 0.01, "loss": 1.9974, "step": 35883 }, { "epoch": 3.687422934648582, "grad_norm": 0.03798295557498932, "learning_rate": 0.01, "loss": 1.9907, "step": 35886 }, { "epoch": 3.687731196054254, "grad_norm": 0.1147773265838623, "learning_rate": 0.01, "loss": 1.9827, "step": 35889 }, { "epoch": 3.688039457459926, "grad_norm": 0.05281820893287659, "learning_rate": 0.01, "loss": 1.9969, "step": 35892 }, { "epoch": 3.688347718865598, "grad_norm": 0.0617707259953022, "learning_rate": 0.01, "loss": 1.9983, "step": 35895 }, { "epoch": 3.68865598027127, "grad_norm": 0.05084836110472679, "learning_rate": 0.01, "loss": 2.0124, "step": 35898 }, { "epoch": 3.688964241676942, "grad_norm": 0.04952919855713844, "learning_rate": 0.01, "loss": 1.9969, "step": 35901 }, { "epoch": 3.689272503082614, "grad_norm": 0.08321710675954819, "learning_rate": 0.01, "loss": 2.0127, "step": 35904 }, { "epoch": 3.6895807644882863, "grad_norm": 0.04074413329362869, "learning_rate": 0.01, "loss": 2.0056, "step": 35907 }, { "epoch": 3.689889025893958, "grad_norm": 0.11404154449701309, "learning_rate": 0.01, "loss": 2.0102, "step": 35910 }, { "epoch": 3.69019728729963, "grad_norm": 0.07283198833465576, "learning_rate": 0.01, "loss": 2.011, "step": 35913 }, { "epoch": 3.690505548705302, "grad_norm": 0.0645599216222763, "learning_rate": 0.01, "loss": 2.0343, "step": 35916 }, { "epoch": 3.690813810110974, "grad_norm": 0.0537913516163826, "learning_rate": 0.01, "loss": 2.0142, "step": 35919 }, { "epoch": 3.691122071516646, "grad_norm": 0.04984259232878685, "learning_rate": 0.01, "loss": 1.9863, "step": 35922 }, { "epoch": 3.691430332922318, "grad_norm": 0.05349741503596306, "learning_rate": 0.01, "loss": 1.9853, "step": 35925 }, { "epoch": 3.6917385943279903, "grad_norm": 0.05882362276315689, "learning_rate": 0.01, "loss": 2.0053, "step": 35928 }, { "epoch": 3.692046855733662, "grad_norm": 0.0809783861041069, "learning_rate": 0.01, "loss": 1.9967, "step": 35931 }, { "epoch": 3.6923551171393343, "grad_norm": 0.07075916230678558, "learning_rate": 0.01, "loss": 1.9872, "step": 35934 }, { "epoch": 3.692663378545006, "grad_norm": 0.058204181492328644, "learning_rate": 0.01, "loss": 1.9847, "step": 35937 }, { "epoch": 3.6929716399506782, "grad_norm": 0.10149111598730087, "learning_rate": 0.01, "loss": 2.0006, "step": 35940 }, { "epoch": 3.69327990135635, "grad_norm": 0.0722658559679985, "learning_rate": 0.01, "loss": 2.0364, "step": 35943 }, { "epoch": 3.693588162762022, "grad_norm": 0.08981306105852127, "learning_rate": 0.01, "loss": 2.0254, "step": 35946 }, { "epoch": 3.6938964241676944, "grad_norm": 0.04798738285899162, "learning_rate": 0.01, "loss": 2.0099, "step": 35949 }, { "epoch": 3.694204685573366, "grad_norm": 0.0338914729654789, "learning_rate": 0.01, "loss": 1.9843, "step": 35952 }, { "epoch": 3.6945129469790383, "grad_norm": 0.04813714697957039, "learning_rate": 0.01, "loss": 1.9948, "step": 35955 }, { "epoch": 3.6948212083847105, "grad_norm": 0.11086717247962952, "learning_rate": 0.01, "loss": 1.9963, "step": 35958 }, { "epoch": 3.6951294697903823, "grad_norm": 0.09754761308431625, "learning_rate": 0.01, "loss": 1.9965, "step": 35961 }, { "epoch": 3.695437731196054, "grad_norm": 0.07381515204906464, "learning_rate": 0.01, "loss": 1.9921, "step": 35964 }, { "epoch": 3.695745992601726, "grad_norm": 0.0501638762652874, "learning_rate": 0.01, "loss": 2.0231, "step": 35967 }, { "epoch": 3.6960542540073984, "grad_norm": 0.04152151197195053, "learning_rate": 0.01, "loss": 2.0033, "step": 35970 }, { "epoch": 3.69636251541307, "grad_norm": 0.06455028802156448, "learning_rate": 0.01, "loss": 2.0266, "step": 35973 }, { "epoch": 3.6966707768187423, "grad_norm": 0.07925435900688171, "learning_rate": 0.01, "loss": 2.0153, "step": 35976 }, { "epoch": 3.6969790382244145, "grad_norm": 0.17305727303028107, "learning_rate": 0.01, "loss": 1.9923, "step": 35979 }, { "epoch": 3.6972872996300863, "grad_norm": 0.04664710536599159, "learning_rate": 0.01, "loss": 1.9811, "step": 35982 }, { "epoch": 3.6975955610357585, "grad_norm": 0.03971162810921669, "learning_rate": 0.01, "loss": 2.0065, "step": 35985 }, { "epoch": 3.6979038224414302, "grad_norm": 0.04888729378581047, "learning_rate": 0.01, "loss": 1.9901, "step": 35988 }, { "epoch": 3.6982120838471024, "grad_norm": 0.03428930416703224, "learning_rate": 0.01, "loss": 1.9902, "step": 35991 }, { "epoch": 3.698520345252774, "grad_norm": 0.04609334468841553, "learning_rate": 0.01, "loss": 1.9818, "step": 35994 }, { "epoch": 3.6988286066584464, "grad_norm": 0.06719902157783508, "learning_rate": 0.01, "loss": 2.0144, "step": 35997 }, { "epoch": 3.6991368680641186, "grad_norm": 0.0409255288541317, "learning_rate": 0.01, "loss": 1.9893, "step": 36000 }, { "epoch": 3.6994451294697903, "grad_norm": 0.03683490306138992, "learning_rate": 0.01, "loss": 1.9709, "step": 36003 }, { "epoch": 3.6997533908754625, "grad_norm": 0.050057847052812576, "learning_rate": 0.01, "loss": 1.9875, "step": 36006 }, { "epoch": 3.7000616522811343, "grad_norm": 0.06051032990217209, "learning_rate": 0.01, "loss": 2.0034, "step": 36009 }, { "epoch": 3.7003699136868065, "grad_norm": 0.141363725066185, "learning_rate": 0.01, "loss": 1.9931, "step": 36012 }, { "epoch": 3.700678175092478, "grad_norm": 0.04017074033617973, "learning_rate": 0.01, "loss": 1.9919, "step": 36015 }, { "epoch": 3.7009864364981504, "grad_norm": 0.05374070256948471, "learning_rate": 0.01, "loss": 1.9975, "step": 36018 }, { "epoch": 3.7012946979038226, "grad_norm": 0.07037216424942017, "learning_rate": 0.01, "loss": 2.014, "step": 36021 }, { "epoch": 3.7016029593094943, "grad_norm": 0.06400209665298462, "learning_rate": 0.01, "loss": 2.0035, "step": 36024 }, { "epoch": 3.7019112207151665, "grad_norm": 0.04186910763382912, "learning_rate": 0.01, "loss": 1.9881, "step": 36027 }, { "epoch": 3.7022194821208387, "grad_norm": 0.05100405216217041, "learning_rate": 0.01, "loss": 2.0009, "step": 36030 }, { "epoch": 3.7025277435265105, "grad_norm": 0.06786265224218369, "learning_rate": 0.01, "loss": 2.0041, "step": 36033 }, { "epoch": 3.7028360049321822, "grad_norm": 0.058080319315195084, "learning_rate": 0.01, "loss": 1.9748, "step": 36036 }, { "epoch": 3.7031442663378544, "grad_norm": 0.09919019788503647, "learning_rate": 0.01, "loss": 2.0016, "step": 36039 }, { "epoch": 3.7034525277435266, "grad_norm": 0.08252627402544022, "learning_rate": 0.01, "loss": 2.0154, "step": 36042 }, { "epoch": 3.7037607891491984, "grad_norm": 0.06026385724544525, "learning_rate": 0.01, "loss": 1.9877, "step": 36045 }, { "epoch": 3.7040690505548706, "grad_norm": 0.08052903413772583, "learning_rate": 0.01, "loss": 1.9951, "step": 36048 }, { "epoch": 3.7043773119605428, "grad_norm": 0.0790569856762886, "learning_rate": 0.01, "loss": 1.9877, "step": 36051 }, { "epoch": 3.7046855733662145, "grad_norm": 0.04464380070567131, "learning_rate": 0.01, "loss": 2.018, "step": 36054 }, { "epoch": 3.7049938347718867, "grad_norm": 0.06567423790693283, "learning_rate": 0.01, "loss": 1.9886, "step": 36057 }, { "epoch": 3.7053020961775585, "grad_norm": 0.06602831929922104, "learning_rate": 0.01, "loss": 1.9934, "step": 36060 }, { "epoch": 3.7056103575832307, "grad_norm": 0.06838707625865936, "learning_rate": 0.01, "loss": 1.9853, "step": 36063 }, { "epoch": 3.7059186189889024, "grad_norm": 0.09807326644659042, "learning_rate": 0.01, "loss": 1.9919, "step": 36066 }, { "epoch": 3.7062268803945746, "grad_norm": 0.06948495656251907, "learning_rate": 0.01, "loss": 1.992, "step": 36069 }, { "epoch": 3.706535141800247, "grad_norm": 0.07893040776252747, "learning_rate": 0.01, "loss": 1.9804, "step": 36072 }, { "epoch": 3.7068434032059185, "grad_norm": 0.04618433490395546, "learning_rate": 0.01, "loss": 2.0149, "step": 36075 }, { "epoch": 3.7071516646115907, "grad_norm": 0.04784570261836052, "learning_rate": 0.01, "loss": 2.0144, "step": 36078 }, { "epoch": 3.7074599260172625, "grad_norm": 0.03364891558885574, "learning_rate": 0.01, "loss": 1.9914, "step": 36081 }, { "epoch": 3.7077681874229347, "grad_norm": 0.03379710018634796, "learning_rate": 0.01, "loss": 1.994, "step": 36084 }, { "epoch": 3.7080764488286064, "grad_norm": 0.03952077031135559, "learning_rate": 0.01, "loss": 2.0159, "step": 36087 }, { "epoch": 3.7083847102342786, "grad_norm": 0.07781558483839035, "learning_rate": 0.01, "loss": 2.0045, "step": 36090 }, { "epoch": 3.708692971639951, "grad_norm": 0.04583312198519707, "learning_rate": 0.01, "loss": 1.9942, "step": 36093 }, { "epoch": 3.7090012330456226, "grad_norm": 0.08673562854528427, "learning_rate": 0.01, "loss": 1.9915, "step": 36096 }, { "epoch": 3.7093094944512948, "grad_norm": 0.05132952705025673, "learning_rate": 0.01, "loss": 1.9993, "step": 36099 }, { "epoch": 3.709617755856967, "grad_norm": 0.09407106041908264, "learning_rate": 0.01, "loss": 2.0028, "step": 36102 }, { "epoch": 3.7099260172626387, "grad_norm": 0.052429962903261185, "learning_rate": 0.01, "loss": 1.9971, "step": 36105 }, { "epoch": 3.7102342786683105, "grad_norm": 0.10452061146497726, "learning_rate": 0.01, "loss": 1.991, "step": 36108 }, { "epoch": 3.7105425400739827, "grad_norm": 0.03275587409734726, "learning_rate": 0.01, "loss": 1.9941, "step": 36111 }, { "epoch": 3.710850801479655, "grad_norm": 0.10667680203914642, "learning_rate": 0.01, "loss": 1.9775, "step": 36114 }, { "epoch": 3.7111590628853266, "grad_norm": 0.08240865170955658, "learning_rate": 0.01, "loss": 2.0113, "step": 36117 }, { "epoch": 3.711467324290999, "grad_norm": 0.0850924402475357, "learning_rate": 0.01, "loss": 1.9963, "step": 36120 }, { "epoch": 3.711775585696671, "grad_norm": 0.03906584531068802, "learning_rate": 0.01, "loss": 1.9695, "step": 36123 }, { "epoch": 3.7120838471023427, "grad_norm": 0.04797567054629326, "learning_rate": 0.01, "loss": 2.0276, "step": 36126 }, { "epoch": 3.712392108508015, "grad_norm": 0.042494967579841614, "learning_rate": 0.01, "loss": 1.977, "step": 36129 }, { "epoch": 3.7127003699136867, "grad_norm": 0.05491645634174347, "learning_rate": 0.01, "loss": 2.0033, "step": 36132 }, { "epoch": 3.713008631319359, "grad_norm": 0.10729935020208359, "learning_rate": 0.01, "loss": 2.0001, "step": 36135 }, { "epoch": 3.7133168927250306, "grad_norm": 0.043170586228370667, "learning_rate": 0.01, "loss": 2.0007, "step": 36138 }, { "epoch": 3.713625154130703, "grad_norm": 0.08662062138319016, "learning_rate": 0.01, "loss": 2.0094, "step": 36141 }, { "epoch": 3.713933415536375, "grad_norm": 0.11379045993089676, "learning_rate": 0.01, "loss": 1.9953, "step": 36144 }, { "epoch": 3.7142416769420468, "grad_norm": 0.03698570281267166, "learning_rate": 0.01, "loss": 1.9863, "step": 36147 }, { "epoch": 3.714549938347719, "grad_norm": 0.11952532082796097, "learning_rate": 0.01, "loss": 1.9923, "step": 36150 }, { "epoch": 3.7148581997533907, "grad_norm": 0.06563457101583481, "learning_rate": 0.01, "loss": 1.9981, "step": 36153 }, { "epoch": 3.715166461159063, "grad_norm": 0.07150832563638687, "learning_rate": 0.01, "loss": 2.012, "step": 36156 }, { "epoch": 3.7154747225647347, "grad_norm": 0.04657771810889244, "learning_rate": 0.01, "loss": 2.0056, "step": 36159 }, { "epoch": 3.715782983970407, "grad_norm": 0.055245641618967056, "learning_rate": 0.01, "loss": 2.0095, "step": 36162 }, { "epoch": 3.716091245376079, "grad_norm": 0.040487829595804214, "learning_rate": 0.01, "loss": 2.0027, "step": 36165 }, { "epoch": 3.716399506781751, "grad_norm": 0.12351846694946289, "learning_rate": 0.01, "loss": 1.9858, "step": 36168 }, { "epoch": 3.716707768187423, "grad_norm": 0.053484536707401276, "learning_rate": 0.01, "loss": 2.0019, "step": 36171 }, { "epoch": 3.717016029593095, "grad_norm": 0.06141964718699455, "learning_rate": 0.01, "loss": 2.0013, "step": 36174 }, { "epoch": 3.717324290998767, "grad_norm": 0.09509648382663727, "learning_rate": 0.01, "loss": 1.9949, "step": 36177 }, { "epoch": 3.717632552404439, "grad_norm": 0.0969184935092926, "learning_rate": 0.01, "loss": 2.0168, "step": 36180 }, { "epoch": 3.717940813810111, "grad_norm": 0.0884992703795433, "learning_rate": 0.01, "loss": 2.0186, "step": 36183 }, { "epoch": 3.718249075215783, "grad_norm": 0.07382161915302277, "learning_rate": 0.01, "loss": 1.9974, "step": 36186 }, { "epoch": 3.718557336621455, "grad_norm": 0.07776398956775665, "learning_rate": 0.01, "loss": 2.0259, "step": 36189 }, { "epoch": 3.718865598027127, "grad_norm": 0.07049771398305893, "learning_rate": 0.01, "loss": 2.0103, "step": 36192 }, { "epoch": 3.719173859432799, "grad_norm": 0.08669892698526382, "learning_rate": 0.01, "loss": 1.9649, "step": 36195 }, { "epoch": 3.719482120838471, "grad_norm": 0.05756537616252899, "learning_rate": 0.01, "loss": 2.0025, "step": 36198 }, { "epoch": 3.719790382244143, "grad_norm": 0.06954919546842575, "learning_rate": 0.01, "loss": 2.0101, "step": 36201 }, { "epoch": 3.720098643649815, "grad_norm": 0.09297992289066315, "learning_rate": 0.01, "loss": 1.9945, "step": 36204 }, { "epoch": 3.720406905055487, "grad_norm": 0.07941876351833344, "learning_rate": 0.01, "loss": 2.0001, "step": 36207 }, { "epoch": 3.720715166461159, "grad_norm": 0.0698881521821022, "learning_rate": 0.01, "loss": 1.9782, "step": 36210 }, { "epoch": 3.721023427866831, "grad_norm": 0.09939584881067276, "learning_rate": 0.01, "loss": 2.0096, "step": 36213 }, { "epoch": 3.7213316892725032, "grad_norm": 0.059318870306015015, "learning_rate": 0.01, "loss": 1.9965, "step": 36216 }, { "epoch": 3.721639950678175, "grad_norm": 0.06797734647989273, "learning_rate": 0.01, "loss": 1.992, "step": 36219 }, { "epoch": 3.721948212083847, "grad_norm": 0.03932074084877968, "learning_rate": 0.01, "loss": 2.012, "step": 36222 }, { "epoch": 3.7222564734895194, "grad_norm": 0.07458118349313736, "learning_rate": 0.01, "loss": 2.0184, "step": 36225 }, { "epoch": 3.722564734895191, "grad_norm": 0.09383490681648254, "learning_rate": 0.01, "loss": 2.0282, "step": 36228 }, { "epoch": 3.722872996300863, "grad_norm": 0.06527485698461533, "learning_rate": 0.01, "loss": 2.0048, "step": 36231 }, { "epoch": 3.723181257706535, "grad_norm": 0.050437018275260925, "learning_rate": 0.01, "loss": 1.9992, "step": 36234 }, { "epoch": 3.7234895191122073, "grad_norm": 0.04380796477198601, "learning_rate": 0.01, "loss": 2.0181, "step": 36237 }, { "epoch": 3.723797780517879, "grad_norm": 0.04857274517416954, "learning_rate": 0.01, "loss": 2.0172, "step": 36240 }, { "epoch": 3.724106041923551, "grad_norm": 0.032346438616514206, "learning_rate": 0.01, "loss": 1.9889, "step": 36243 }, { "epoch": 3.7244143033292234, "grad_norm": 0.045984454452991486, "learning_rate": 0.01, "loss": 1.9947, "step": 36246 }, { "epoch": 3.724722564734895, "grad_norm": 0.058199312537908554, "learning_rate": 0.01, "loss": 2.0142, "step": 36249 }, { "epoch": 3.7250308261405674, "grad_norm": 0.0391390286386013, "learning_rate": 0.01, "loss": 2.0105, "step": 36252 }, { "epoch": 3.725339087546239, "grad_norm": 0.058478716760873795, "learning_rate": 0.01, "loss": 2.0171, "step": 36255 }, { "epoch": 3.7256473489519113, "grad_norm": 0.10977184772491455, "learning_rate": 0.01, "loss": 1.995, "step": 36258 }, { "epoch": 3.725955610357583, "grad_norm": 0.06433524191379547, "learning_rate": 0.01, "loss": 2.0038, "step": 36261 }, { "epoch": 3.7262638717632552, "grad_norm": 0.07871260493993759, "learning_rate": 0.01, "loss": 1.9805, "step": 36264 }, { "epoch": 3.7265721331689274, "grad_norm": 0.09282384812831879, "learning_rate": 0.01, "loss": 1.991, "step": 36267 }, { "epoch": 3.726880394574599, "grad_norm": 0.03165189549326897, "learning_rate": 0.01, "loss": 1.9864, "step": 36270 }, { "epoch": 3.7271886559802714, "grad_norm": 0.0789475068449974, "learning_rate": 0.01, "loss": 2.0143, "step": 36273 }, { "epoch": 3.727496917385943, "grad_norm": 0.13641710579395294, "learning_rate": 0.01, "loss": 1.996, "step": 36276 }, { "epoch": 3.7278051787916153, "grad_norm": 0.08470667898654938, "learning_rate": 0.01, "loss": 1.9953, "step": 36279 }, { "epoch": 3.728113440197287, "grad_norm": 0.09110618382692337, "learning_rate": 0.01, "loss": 2.005, "step": 36282 }, { "epoch": 3.7284217016029593, "grad_norm": 0.079217828810215, "learning_rate": 0.01, "loss": 1.9694, "step": 36285 }, { "epoch": 3.7287299630086315, "grad_norm": 0.03365809842944145, "learning_rate": 0.01, "loss": 1.998, "step": 36288 }, { "epoch": 3.729038224414303, "grad_norm": 0.0691695362329483, "learning_rate": 0.01, "loss": 1.9971, "step": 36291 }, { "epoch": 3.7293464858199754, "grad_norm": 0.03766563534736633, "learning_rate": 0.01, "loss": 2.0054, "step": 36294 }, { "epoch": 3.7296547472256476, "grad_norm": 0.13410314917564392, "learning_rate": 0.01, "loss": 2.0295, "step": 36297 }, { "epoch": 3.7299630086313194, "grad_norm": 0.08052795380353928, "learning_rate": 0.01, "loss": 1.988, "step": 36300 }, { "epoch": 3.730271270036991, "grad_norm": 0.04205624386668205, "learning_rate": 0.01, "loss": 2.0013, "step": 36303 }, { "epoch": 3.7305795314426633, "grad_norm": 0.055557433515787125, "learning_rate": 0.01, "loss": 2.0097, "step": 36306 }, { "epoch": 3.7308877928483355, "grad_norm": 0.08415018022060394, "learning_rate": 0.01, "loss": 2.0069, "step": 36309 }, { "epoch": 3.7311960542540072, "grad_norm": 0.057384416460990906, "learning_rate": 0.01, "loss": 2.0164, "step": 36312 }, { "epoch": 3.7315043156596794, "grad_norm": 0.046833690255880356, "learning_rate": 0.01, "loss": 1.9747, "step": 36315 }, { "epoch": 3.7318125770653516, "grad_norm": 0.0527094230055809, "learning_rate": 0.01, "loss": 1.9959, "step": 36318 }, { "epoch": 3.7321208384710234, "grad_norm": 0.04863157868385315, "learning_rate": 0.01, "loss": 2.0082, "step": 36321 }, { "epoch": 3.7324290998766956, "grad_norm": 0.05578998848795891, "learning_rate": 0.01, "loss": 1.9724, "step": 36324 }, { "epoch": 3.7327373612823673, "grad_norm": 0.08895915746688843, "learning_rate": 0.01, "loss": 2.0025, "step": 36327 }, { "epoch": 3.7330456226880395, "grad_norm": 0.14340227842330933, "learning_rate": 0.01, "loss": 1.999, "step": 36330 }, { "epoch": 3.7333538840937113, "grad_norm": 0.074753038585186, "learning_rate": 0.01, "loss": 1.9733, "step": 36333 }, { "epoch": 3.7336621454993835, "grad_norm": 0.05069069564342499, "learning_rate": 0.01, "loss": 2.025, "step": 36336 }, { "epoch": 3.7339704069050557, "grad_norm": 0.0417485274374485, "learning_rate": 0.01, "loss": 2.0078, "step": 36339 }, { "epoch": 3.7342786683107274, "grad_norm": 0.04727747291326523, "learning_rate": 0.01, "loss": 2.0165, "step": 36342 }, { "epoch": 3.7345869297163996, "grad_norm": 0.056336645036935806, "learning_rate": 0.01, "loss": 1.9956, "step": 36345 }, { "epoch": 3.7348951911220714, "grad_norm": 0.09889603406190872, "learning_rate": 0.01, "loss": 1.9908, "step": 36348 }, { "epoch": 3.7352034525277436, "grad_norm": 0.06653422117233276, "learning_rate": 0.01, "loss": 1.9675, "step": 36351 }, { "epoch": 3.7355117139334153, "grad_norm": 0.05115421861410141, "learning_rate": 0.01, "loss": 2.0004, "step": 36354 }, { "epoch": 3.7358199753390875, "grad_norm": 0.04250407963991165, "learning_rate": 0.01, "loss": 1.9803, "step": 36357 }, { "epoch": 3.7361282367447597, "grad_norm": 0.037854380905628204, "learning_rate": 0.01, "loss": 2.0014, "step": 36360 }, { "epoch": 3.7364364981504314, "grad_norm": 0.06039261072874069, "learning_rate": 0.01, "loss": 2.0364, "step": 36363 }, { "epoch": 3.7367447595561036, "grad_norm": 0.09946703165769577, "learning_rate": 0.01, "loss": 2.0169, "step": 36366 }, { "epoch": 3.737053020961776, "grad_norm": 0.10038801282644272, "learning_rate": 0.01, "loss": 2.012, "step": 36369 }, { "epoch": 3.7373612823674476, "grad_norm": 0.07845285534858704, "learning_rate": 0.01, "loss": 1.9782, "step": 36372 }, { "epoch": 3.7376695437731193, "grad_norm": 0.08079241961240768, "learning_rate": 0.01, "loss": 1.994, "step": 36375 }, { "epoch": 3.7379778051787915, "grad_norm": 0.04414941743016243, "learning_rate": 0.01, "loss": 2.0103, "step": 36378 }, { "epoch": 3.7382860665844637, "grad_norm": 0.14685457944869995, "learning_rate": 0.01, "loss": 1.9965, "step": 36381 }, { "epoch": 3.7385943279901355, "grad_norm": 0.04633990302681923, "learning_rate": 0.01, "loss": 2.0, "step": 36384 }, { "epoch": 3.7389025893958077, "grad_norm": 0.04103631526231766, "learning_rate": 0.01, "loss": 2.0023, "step": 36387 }, { "epoch": 3.73921085080148, "grad_norm": 0.03546123206615448, "learning_rate": 0.01, "loss": 1.9901, "step": 36390 }, { "epoch": 3.7395191122071516, "grad_norm": 0.08792685717344284, "learning_rate": 0.01, "loss": 1.9955, "step": 36393 }, { "epoch": 3.739827373612824, "grad_norm": 0.09209249913692474, "learning_rate": 0.01, "loss": 2.0211, "step": 36396 }, { "epoch": 3.7401356350184956, "grad_norm": 0.0480416901409626, "learning_rate": 0.01, "loss": 1.9943, "step": 36399 }, { "epoch": 3.7404438964241677, "grad_norm": 0.03383079916238785, "learning_rate": 0.01, "loss": 2.0016, "step": 36402 }, { "epoch": 3.7407521578298395, "grad_norm": 0.06483932584524155, "learning_rate": 0.01, "loss": 2.0077, "step": 36405 }, { "epoch": 3.7410604192355117, "grad_norm": 0.04043160006403923, "learning_rate": 0.01, "loss": 1.994, "step": 36408 }, { "epoch": 3.741368680641184, "grad_norm": 0.047389864921569824, "learning_rate": 0.01, "loss": 1.9923, "step": 36411 }, { "epoch": 3.7416769420468556, "grad_norm": 0.04405448958277702, "learning_rate": 0.01, "loss": 1.9969, "step": 36414 }, { "epoch": 3.741985203452528, "grad_norm": 0.03635013848543167, "learning_rate": 0.01, "loss": 2.0013, "step": 36417 }, { "epoch": 3.7422934648582, "grad_norm": 0.06296937167644501, "learning_rate": 0.01, "loss": 2.0106, "step": 36420 }, { "epoch": 3.7426017262638718, "grad_norm": 0.08138510584831238, "learning_rate": 0.01, "loss": 2.0052, "step": 36423 }, { "epoch": 3.7429099876695435, "grad_norm": 0.05814410001039505, "learning_rate": 0.01, "loss": 2.0081, "step": 36426 }, { "epoch": 3.7432182490752157, "grad_norm": 0.09043169766664505, "learning_rate": 0.01, "loss": 1.9894, "step": 36429 }, { "epoch": 3.743526510480888, "grad_norm": 0.038527343422174454, "learning_rate": 0.01, "loss": 2.0191, "step": 36432 }, { "epoch": 3.7438347718865597, "grad_norm": 0.03342151269316673, "learning_rate": 0.01, "loss": 1.9951, "step": 36435 }, { "epoch": 3.744143033292232, "grad_norm": 0.054572802037000656, "learning_rate": 0.01, "loss": 2.0117, "step": 36438 }, { "epoch": 3.744451294697904, "grad_norm": 0.10896478593349457, "learning_rate": 0.01, "loss": 2.0004, "step": 36441 }, { "epoch": 3.744759556103576, "grad_norm": 0.04996712505817413, "learning_rate": 0.01, "loss": 1.9957, "step": 36444 }, { "epoch": 3.745067817509248, "grad_norm": 0.06720001995563507, "learning_rate": 0.01, "loss": 1.9883, "step": 36447 }, { "epoch": 3.7453760789149197, "grad_norm": 0.07731924951076508, "learning_rate": 0.01, "loss": 2.0132, "step": 36450 }, { "epoch": 3.745684340320592, "grad_norm": 0.07545910775661469, "learning_rate": 0.01, "loss": 2.0, "step": 36453 }, { "epoch": 3.7459926017262637, "grad_norm": 0.07236587256193161, "learning_rate": 0.01, "loss": 1.9954, "step": 36456 }, { "epoch": 3.746300863131936, "grad_norm": 0.05105200409889221, "learning_rate": 0.01, "loss": 1.986, "step": 36459 }, { "epoch": 3.746609124537608, "grad_norm": 0.06345131993293762, "learning_rate": 0.01, "loss": 1.9977, "step": 36462 }, { "epoch": 3.74691738594328, "grad_norm": 0.08864062279462814, "learning_rate": 0.01, "loss": 1.989, "step": 36465 }, { "epoch": 3.747225647348952, "grad_norm": 0.06058958172798157, "learning_rate": 0.01, "loss": 2.0155, "step": 36468 }, { "epoch": 3.7475339087546238, "grad_norm": 0.08054932206869125, "learning_rate": 0.01, "loss": 1.9682, "step": 36471 }, { "epoch": 3.747842170160296, "grad_norm": 0.04538315162062645, "learning_rate": 0.01, "loss": 1.9707, "step": 36474 }, { "epoch": 3.7481504315659677, "grad_norm": 0.11694948375225067, "learning_rate": 0.01, "loss": 1.9962, "step": 36477 }, { "epoch": 3.74845869297164, "grad_norm": 0.037599921226501465, "learning_rate": 0.01, "loss": 1.9998, "step": 36480 }, { "epoch": 3.748766954377312, "grad_norm": 0.062208350747823715, "learning_rate": 0.01, "loss": 2.0154, "step": 36483 }, { "epoch": 3.749075215782984, "grad_norm": 0.04657081514596939, "learning_rate": 0.01, "loss": 1.9954, "step": 36486 }, { "epoch": 3.749383477188656, "grad_norm": 0.11601486057043076, "learning_rate": 0.01, "loss": 2.0216, "step": 36489 }, { "epoch": 3.7496917385943282, "grad_norm": 0.04120590165257454, "learning_rate": 0.01, "loss": 2.0076, "step": 36492 }, { "epoch": 3.75, "grad_norm": 0.07783524692058563, "learning_rate": 0.01, "loss": 1.9892, "step": 36495 }, { "epoch": 3.7503082614056718, "grad_norm": 0.04452613368630409, "learning_rate": 0.01, "loss": 2.0191, "step": 36498 }, { "epoch": 3.750616522811344, "grad_norm": 0.10393857210874557, "learning_rate": 0.01, "loss": 1.9765, "step": 36501 }, { "epoch": 3.750924784217016, "grad_norm": 0.11419197171926498, "learning_rate": 0.01, "loss": 1.9791, "step": 36504 }, { "epoch": 3.751233045622688, "grad_norm": 0.0752970427274704, "learning_rate": 0.01, "loss": 2.0128, "step": 36507 }, { "epoch": 3.75154130702836, "grad_norm": 0.06409469246864319, "learning_rate": 0.01, "loss": 2.0103, "step": 36510 }, { "epoch": 3.7518495684340323, "grad_norm": 0.053437262773513794, "learning_rate": 0.01, "loss": 1.9912, "step": 36513 }, { "epoch": 3.752157829839704, "grad_norm": 0.04102127254009247, "learning_rate": 0.01, "loss": 1.999, "step": 36516 }, { "epoch": 3.7524660912453762, "grad_norm": 0.03595980256795883, "learning_rate": 0.01, "loss": 1.9903, "step": 36519 }, { "epoch": 3.752774352651048, "grad_norm": 0.10266067087650299, "learning_rate": 0.01, "loss": 1.9884, "step": 36522 }, { "epoch": 3.75308261405672, "grad_norm": 0.07302891463041306, "learning_rate": 0.01, "loss": 2.012, "step": 36525 }, { "epoch": 3.753390875462392, "grad_norm": 0.08043140172958374, "learning_rate": 0.01, "loss": 2.0129, "step": 36528 }, { "epoch": 3.753699136868064, "grad_norm": 0.0765608549118042, "learning_rate": 0.01, "loss": 2.0029, "step": 36531 }, { "epoch": 3.7540073982737363, "grad_norm": 0.040770966559648514, "learning_rate": 0.01, "loss": 1.9894, "step": 36534 }, { "epoch": 3.754315659679408, "grad_norm": 0.09125746041536331, "learning_rate": 0.01, "loss": 2.0098, "step": 36537 }, { "epoch": 3.7546239210850803, "grad_norm": 0.10734240710735321, "learning_rate": 0.01, "loss": 1.9839, "step": 36540 }, { "epoch": 3.754932182490752, "grad_norm": 0.05876481533050537, "learning_rate": 0.01, "loss": 1.9759, "step": 36543 }, { "epoch": 3.755240443896424, "grad_norm": 0.04223402962088585, "learning_rate": 0.01, "loss": 1.9709, "step": 36546 }, { "epoch": 3.755548705302096, "grad_norm": 0.03419182077050209, "learning_rate": 0.01, "loss": 1.9934, "step": 36549 }, { "epoch": 3.755856966707768, "grad_norm": 0.08762037754058838, "learning_rate": 0.01, "loss": 2.0068, "step": 36552 }, { "epoch": 3.7561652281134403, "grad_norm": 0.035800088196992874, "learning_rate": 0.01, "loss": 1.9892, "step": 36555 }, { "epoch": 3.756473489519112, "grad_norm": 0.07538183778524399, "learning_rate": 0.01, "loss": 2.0066, "step": 36558 }, { "epoch": 3.7567817509247843, "grad_norm": 0.05067085847258568, "learning_rate": 0.01, "loss": 2.0124, "step": 36561 }, { "epoch": 3.7570900123304565, "grad_norm": 0.07041851431131363, "learning_rate": 0.01, "loss": 1.9941, "step": 36564 }, { "epoch": 3.7573982737361282, "grad_norm": 0.06819632649421692, "learning_rate": 0.01, "loss": 1.9986, "step": 36567 }, { "epoch": 3.7577065351418, "grad_norm": 0.11158425360918045, "learning_rate": 0.01, "loss": 1.9883, "step": 36570 }, { "epoch": 3.758014796547472, "grad_norm": 0.09598013758659363, "learning_rate": 0.01, "loss": 2.0164, "step": 36573 }, { "epoch": 3.7583230579531444, "grad_norm": 0.0501101016998291, "learning_rate": 0.01, "loss": 2.0264, "step": 36576 }, { "epoch": 3.758631319358816, "grad_norm": 0.03867189958691597, "learning_rate": 0.01, "loss": 2.0162, "step": 36579 }, { "epoch": 3.7589395807644883, "grad_norm": 0.03836961090564728, "learning_rate": 0.01, "loss": 2.0095, "step": 36582 }, { "epoch": 3.7592478421701605, "grad_norm": 0.04135824367403984, "learning_rate": 0.01, "loss": 2.0206, "step": 36585 }, { "epoch": 3.7595561035758323, "grad_norm": 0.0922931507229805, "learning_rate": 0.01, "loss": 1.9809, "step": 36588 }, { "epoch": 3.7598643649815044, "grad_norm": 0.048077963292598724, "learning_rate": 0.01, "loss": 1.9716, "step": 36591 }, { "epoch": 3.760172626387176, "grad_norm": 0.04263912886381149, "learning_rate": 0.01, "loss": 2.0082, "step": 36594 }, { "epoch": 3.7604808877928484, "grad_norm": 0.05825957655906677, "learning_rate": 0.01, "loss": 2.0036, "step": 36597 }, { "epoch": 3.76078914919852, "grad_norm": 0.04579320177435875, "learning_rate": 0.01, "loss": 1.9677, "step": 36600 }, { "epoch": 3.7610974106041923, "grad_norm": 0.07970302551984787, "learning_rate": 0.01, "loss": 2.0029, "step": 36603 }, { "epoch": 3.7614056720098645, "grad_norm": 0.1091640293598175, "learning_rate": 0.01, "loss": 1.9974, "step": 36606 }, { "epoch": 3.7617139334155363, "grad_norm": 0.07577066123485565, "learning_rate": 0.01, "loss": 2.0077, "step": 36609 }, { "epoch": 3.7620221948212085, "grad_norm": 0.07901261001825333, "learning_rate": 0.01, "loss": 2.0055, "step": 36612 }, { "epoch": 3.7623304562268807, "grad_norm": 0.040557824075222015, "learning_rate": 0.01, "loss": 2.0129, "step": 36615 }, { "epoch": 3.7626387176325524, "grad_norm": 0.03163420036435127, "learning_rate": 0.01, "loss": 1.9782, "step": 36618 }, { "epoch": 3.762946979038224, "grad_norm": 0.0960499569773674, "learning_rate": 0.01, "loss": 1.9882, "step": 36621 }, { "epoch": 3.7632552404438964, "grad_norm": 0.05283837392926216, "learning_rate": 0.01, "loss": 2.0351, "step": 36624 }, { "epoch": 3.7635635018495686, "grad_norm": 0.03764748573303223, "learning_rate": 0.01, "loss": 1.9943, "step": 36627 }, { "epoch": 3.7638717632552403, "grad_norm": 0.07017308473587036, "learning_rate": 0.01, "loss": 2.0008, "step": 36630 }, { "epoch": 3.7641800246609125, "grad_norm": 0.06506321579217911, "learning_rate": 0.01, "loss": 1.9925, "step": 36633 }, { "epoch": 3.7644882860665847, "grad_norm": 0.040651753544807434, "learning_rate": 0.01, "loss": 1.9799, "step": 36636 }, { "epoch": 3.7647965474722564, "grad_norm": 0.0897100567817688, "learning_rate": 0.01, "loss": 2.014, "step": 36639 }, { "epoch": 3.7651048088779286, "grad_norm": 0.1067187637090683, "learning_rate": 0.01, "loss": 2.0369, "step": 36642 }, { "epoch": 3.7654130702836004, "grad_norm": 0.08220155537128448, "learning_rate": 0.01, "loss": 1.9708, "step": 36645 }, { "epoch": 3.7657213316892726, "grad_norm": 0.0830981582403183, "learning_rate": 0.01, "loss": 1.9937, "step": 36648 }, { "epoch": 3.7660295930949443, "grad_norm": 0.04124632850289345, "learning_rate": 0.01, "loss": 1.9937, "step": 36651 }, { "epoch": 3.7663378545006165, "grad_norm": 0.03570066764950752, "learning_rate": 0.01, "loss": 2.0102, "step": 36654 }, { "epoch": 3.7666461159062887, "grad_norm": 0.04715826362371445, "learning_rate": 0.01, "loss": 1.9917, "step": 36657 }, { "epoch": 3.7669543773119605, "grad_norm": 0.05129298195242882, "learning_rate": 0.01, "loss": 2.0058, "step": 36660 }, { "epoch": 3.7672626387176327, "grad_norm": 0.07690376043319702, "learning_rate": 0.01, "loss": 1.9985, "step": 36663 }, { "epoch": 3.7675709001233044, "grad_norm": 0.06316812336444855, "learning_rate": 0.01, "loss": 1.9911, "step": 36666 }, { "epoch": 3.7678791615289766, "grad_norm": 0.045724622905254364, "learning_rate": 0.01, "loss": 2.007, "step": 36669 }, { "epoch": 3.7681874229346484, "grad_norm": 0.12661319971084595, "learning_rate": 0.01, "loss": 2.0159, "step": 36672 }, { "epoch": 3.7684956843403206, "grad_norm": 0.03898628428578377, "learning_rate": 0.01, "loss": 2.0127, "step": 36675 }, { "epoch": 3.7688039457459928, "grad_norm": 0.04297169670462608, "learning_rate": 0.01, "loss": 2.0072, "step": 36678 }, { "epoch": 3.7691122071516645, "grad_norm": 0.047098271548748016, "learning_rate": 0.01, "loss": 2.0019, "step": 36681 }, { "epoch": 3.7694204685573367, "grad_norm": 0.0880102887749672, "learning_rate": 0.01, "loss": 2.0009, "step": 36684 }, { "epoch": 3.769728729963009, "grad_norm": 0.09598886221647263, "learning_rate": 0.01, "loss": 1.987, "step": 36687 }, { "epoch": 3.7700369913686806, "grad_norm": 0.07920796424150467, "learning_rate": 0.01, "loss": 1.9992, "step": 36690 }, { "epoch": 3.7703452527743524, "grad_norm": 0.09643390029668808, "learning_rate": 0.01, "loss": 2.0219, "step": 36693 }, { "epoch": 3.7706535141800246, "grad_norm": 0.07026758790016174, "learning_rate": 0.01, "loss": 2.0217, "step": 36696 }, { "epoch": 3.770961775585697, "grad_norm": 0.04533863440155983, "learning_rate": 0.01, "loss": 2.0129, "step": 36699 }, { "epoch": 3.7712700369913685, "grad_norm": 0.03511887788772583, "learning_rate": 0.01, "loss": 2.0054, "step": 36702 }, { "epoch": 3.7715782983970407, "grad_norm": 0.03846128657460213, "learning_rate": 0.01, "loss": 2.0006, "step": 36705 }, { "epoch": 3.771886559802713, "grad_norm": 0.07523466646671295, "learning_rate": 0.01, "loss": 2.0232, "step": 36708 }, { "epoch": 3.7721948212083847, "grad_norm": 0.09649614989757538, "learning_rate": 0.01, "loss": 1.9851, "step": 36711 }, { "epoch": 3.772503082614057, "grad_norm": 0.04845314472913742, "learning_rate": 0.01, "loss": 1.9994, "step": 36714 }, { "epoch": 3.7728113440197286, "grad_norm": 0.037477899342775345, "learning_rate": 0.01, "loss": 2.0113, "step": 36717 }, { "epoch": 3.773119605425401, "grad_norm": 0.08308611810207367, "learning_rate": 0.01, "loss": 2.017, "step": 36720 }, { "epoch": 3.7734278668310726, "grad_norm": 0.048357848078012466, "learning_rate": 0.01, "loss": 1.9848, "step": 36723 }, { "epoch": 3.7737361282367448, "grad_norm": 0.04424552246928215, "learning_rate": 0.01, "loss": 2.0176, "step": 36726 }, { "epoch": 3.774044389642417, "grad_norm": 0.04857276752591133, "learning_rate": 0.01, "loss": 2.0304, "step": 36729 }, { "epoch": 3.7743526510480887, "grad_norm": 0.039389774203300476, "learning_rate": 0.01, "loss": 2.0044, "step": 36732 }, { "epoch": 3.774660912453761, "grad_norm": 0.04370071366429329, "learning_rate": 0.01, "loss": 2.0095, "step": 36735 }, { "epoch": 3.7749691738594326, "grad_norm": 0.05456184223294258, "learning_rate": 0.01, "loss": 2.0155, "step": 36738 }, { "epoch": 3.775277435265105, "grad_norm": 0.0888199731707573, "learning_rate": 0.01, "loss": 2.008, "step": 36741 }, { "epoch": 3.7755856966707766, "grad_norm": 0.19867762923240662, "learning_rate": 0.01, "loss": 1.9926, "step": 36744 }, { "epoch": 3.775893958076449, "grad_norm": 0.17659060657024384, "learning_rate": 0.01, "loss": 2.0021, "step": 36747 }, { "epoch": 3.776202219482121, "grad_norm": 0.0629454255104065, "learning_rate": 0.01, "loss": 1.9917, "step": 36750 }, { "epoch": 3.7765104808877927, "grad_norm": 0.05801904946565628, "learning_rate": 0.01, "loss": 1.9972, "step": 36753 }, { "epoch": 3.776818742293465, "grad_norm": 0.04907959699630737, "learning_rate": 0.01, "loss": 1.9899, "step": 36756 }, { "epoch": 3.777127003699137, "grad_norm": 0.08606990426778793, "learning_rate": 0.01, "loss": 1.9829, "step": 36759 }, { "epoch": 3.777435265104809, "grad_norm": 0.03293627128005028, "learning_rate": 0.01, "loss": 2.0177, "step": 36762 }, { "epoch": 3.7777435265104806, "grad_norm": 0.033272456377744675, "learning_rate": 0.01, "loss": 1.982, "step": 36765 }, { "epoch": 3.778051787916153, "grad_norm": 0.10520119965076447, "learning_rate": 0.01, "loss": 1.9592, "step": 36768 }, { "epoch": 3.778360049321825, "grad_norm": 0.03949661925435066, "learning_rate": 0.01, "loss": 2.0025, "step": 36771 }, { "epoch": 3.7786683107274968, "grad_norm": 0.10781146585941315, "learning_rate": 0.01, "loss": 2.0142, "step": 36774 }, { "epoch": 3.778976572133169, "grad_norm": 0.1707312911748886, "learning_rate": 0.01, "loss": 2.0072, "step": 36777 }, { "epoch": 3.779284833538841, "grad_norm": 0.10689792037010193, "learning_rate": 0.01, "loss": 1.9979, "step": 36780 }, { "epoch": 3.779593094944513, "grad_norm": 0.06377099454402924, "learning_rate": 0.01, "loss": 2.0019, "step": 36783 }, { "epoch": 3.779901356350185, "grad_norm": 0.050422243773937225, "learning_rate": 0.01, "loss": 1.9994, "step": 36786 }, { "epoch": 3.780209617755857, "grad_norm": 0.041165612637996674, "learning_rate": 0.01, "loss": 2.0116, "step": 36789 }, { "epoch": 3.780517879161529, "grad_norm": 0.07220548391342163, "learning_rate": 0.01, "loss": 2.0089, "step": 36792 }, { "epoch": 3.780826140567201, "grad_norm": 0.05860710144042969, "learning_rate": 0.01, "loss": 2.0221, "step": 36795 }, { "epoch": 3.781134401972873, "grad_norm": 0.03842824697494507, "learning_rate": 0.01, "loss": 1.9945, "step": 36798 }, { "epoch": 3.781442663378545, "grad_norm": 0.04346943646669388, "learning_rate": 0.01, "loss": 2.0203, "step": 36801 }, { "epoch": 3.781750924784217, "grad_norm": 0.16297590732574463, "learning_rate": 0.01, "loss": 2.0002, "step": 36804 }, { "epoch": 3.782059186189889, "grad_norm": 0.06720095127820969, "learning_rate": 0.01, "loss": 2.0, "step": 36807 }, { "epoch": 3.782367447595561, "grad_norm": 0.04820172116160393, "learning_rate": 0.01, "loss": 1.9773, "step": 36810 }, { "epoch": 3.782675709001233, "grad_norm": 0.0431947223842144, "learning_rate": 0.01, "loss": 2.0213, "step": 36813 }, { "epoch": 3.782983970406905, "grad_norm": 0.0517662838101387, "learning_rate": 0.01, "loss": 2.016, "step": 36816 }, { "epoch": 3.783292231812577, "grad_norm": 0.06249597296118736, "learning_rate": 0.01, "loss": 1.9905, "step": 36819 }, { "epoch": 3.783600493218249, "grad_norm": 0.04001644626259804, "learning_rate": 0.01, "loss": 2.0082, "step": 36822 }, { "epoch": 3.783908754623921, "grad_norm": 0.0713401585817337, "learning_rate": 0.01, "loss": 1.9978, "step": 36825 }, { "epoch": 3.784217016029593, "grad_norm": 0.08583984524011612, "learning_rate": 0.01, "loss": 2.0047, "step": 36828 }, { "epoch": 3.7845252774352653, "grad_norm": 0.05482606962323189, "learning_rate": 0.01, "loss": 2.0038, "step": 36831 }, { "epoch": 3.784833538840937, "grad_norm": 0.04373779147863388, "learning_rate": 0.01, "loss": 2.0003, "step": 36834 }, { "epoch": 3.7851418002466093, "grad_norm": 0.09752769768238068, "learning_rate": 0.01, "loss": 1.9854, "step": 36837 }, { "epoch": 3.785450061652281, "grad_norm": 0.07635717839002609, "learning_rate": 0.01, "loss": 2.0065, "step": 36840 }, { "epoch": 3.7857583230579532, "grad_norm": 0.07021810859441757, "learning_rate": 0.01, "loss": 1.9888, "step": 36843 }, { "epoch": 3.786066584463625, "grad_norm": 0.0541268065571785, "learning_rate": 0.01, "loss": 2.0252, "step": 36846 }, { "epoch": 3.786374845869297, "grad_norm": 0.07205335795879364, "learning_rate": 0.01, "loss": 1.9916, "step": 36849 }, { "epoch": 3.7866831072749694, "grad_norm": 0.057571567595005035, "learning_rate": 0.01, "loss": 1.9944, "step": 36852 }, { "epoch": 3.786991368680641, "grad_norm": 0.11135399341583252, "learning_rate": 0.01, "loss": 2.0053, "step": 36855 }, { "epoch": 3.7872996300863133, "grad_norm": 0.0698394924402237, "learning_rate": 0.01, "loss": 1.9832, "step": 36858 }, { "epoch": 3.787607891491985, "grad_norm": 0.09506375342607498, "learning_rate": 0.01, "loss": 1.9965, "step": 36861 }, { "epoch": 3.7879161528976573, "grad_norm": 0.08534973114728928, "learning_rate": 0.01, "loss": 1.9893, "step": 36864 }, { "epoch": 3.788224414303329, "grad_norm": 0.08360368013381958, "learning_rate": 0.01, "loss": 1.9962, "step": 36867 }, { "epoch": 3.788532675709001, "grad_norm": 0.08635003864765167, "learning_rate": 0.01, "loss": 2.0042, "step": 36870 }, { "epoch": 3.7888409371146734, "grad_norm": 0.033921390771865845, "learning_rate": 0.01, "loss": 2.0134, "step": 36873 }, { "epoch": 3.789149198520345, "grad_norm": 0.04312542825937271, "learning_rate": 0.01, "loss": 2.0188, "step": 36876 }, { "epoch": 3.7894574599260173, "grad_norm": 0.06157702952623367, "learning_rate": 0.01, "loss": 1.9622, "step": 36879 }, { "epoch": 3.7897657213316895, "grad_norm": 0.05039060115814209, "learning_rate": 0.01, "loss": 2.0035, "step": 36882 }, { "epoch": 3.7900739827373613, "grad_norm": 0.05873194709420204, "learning_rate": 0.01, "loss": 2.0335, "step": 36885 }, { "epoch": 3.790382244143033, "grad_norm": 0.07444643974304199, "learning_rate": 0.01, "loss": 1.9708, "step": 36888 }, { "epoch": 3.7906905055487052, "grad_norm": 0.06560081243515015, "learning_rate": 0.01, "loss": 2.0251, "step": 36891 }, { "epoch": 3.7909987669543774, "grad_norm": 0.08456238359212875, "learning_rate": 0.01, "loss": 1.9889, "step": 36894 }, { "epoch": 3.791307028360049, "grad_norm": 0.09048344194889069, "learning_rate": 0.01, "loss": 1.9992, "step": 36897 }, { "epoch": 3.7916152897657214, "grad_norm": 0.07640919834375381, "learning_rate": 0.01, "loss": 2.0104, "step": 36900 }, { "epoch": 3.7919235511713936, "grad_norm": 0.0968911275267601, "learning_rate": 0.01, "loss": 1.9664, "step": 36903 }, { "epoch": 3.7922318125770653, "grad_norm": 0.08743193000555038, "learning_rate": 0.01, "loss": 1.9942, "step": 36906 }, { "epoch": 3.7925400739827375, "grad_norm": 0.056414127349853516, "learning_rate": 0.01, "loss": 1.9847, "step": 36909 }, { "epoch": 3.7928483353884093, "grad_norm": 0.04174700006842613, "learning_rate": 0.01, "loss": 2.0113, "step": 36912 }, { "epoch": 3.7931565967940815, "grad_norm": 0.03753121197223663, "learning_rate": 0.01, "loss": 1.978, "step": 36915 }, { "epoch": 3.793464858199753, "grad_norm": 0.04957683011889458, "learning_rate": 0.01, "loss": 2.0017, "step": 36918 }, { "epoch": 3.7937731196054254, "grad_norm": 0.051355455070734024, "learning_rate": 0.01, "loss": 2.0076, "step": 36921 }, { "epoch": 3.7940813810110976, "grad_norm": 0.0345148891210556, "learning_rate": 0.01, "loss": 1.9869, "step": 36924 }, { "epoch": 3.7943896424167693, "grad_norm": 0.12389633059501648, "learning_rate": 0.01, "loss": 2.0207, "step": 36927 }, { "epoch": 3.7946979038224415, "grad_norm": 0.05504097789525986, "learning_rate": 0.01, "loss": 2.0119, "step": 36930 }, { "epoch": 3.7950061652281133, "grad_norm": 0.044859953224658966, "learning_rate": 0.01, "loss": 2.0009, "step": 36933 }, { "epoch": 3.7953144266337855, "grad_norm": 0.06867794692516327, "learning_rate": 0.01, "loss": 1.9942, "step": 36936 }, { "epoch": 3.7956226880394572, "grad_norm": 0.10308001190423965, "learning_rate": 0.01, "loss": 1.9884, "step": 36939 }, { "epoch": 3.7959309494451294, "grad_norm": 0.07946982234716415, "learning_rate": 0.01, "loss": 1.9817, "step": 36942 }, { "epoch": 3.7962392108508016, "grad_norm": 0.058571770787239075, "learning_rate": 0.01, "loss": 1.9873, "step": 36945 }, { "epoch": 3.7965474722564734, "grad_norm": 0.07778012007474899, "learning_rate": 0.01, "loss": 1.9951, "step": 36948 }, { "epoch": 3.7968557336621456, "grad_norm": 0.06965510547161102, "learning_rate": 0.01, "loss": 1.9944, "step": 36951 }, { "epoch": 3.7971639950678178, "grad_norm": 0.06443614512681961, "learning_rate": 0.01, "loss": 2.012, "step": 36954 }, { "epoch": 3.7974722564734895, "grad_norm": 0.04750034958124161, "learning_rate": 0.01, "loss": 1.9962, "step": 36957 }, { "epoch": 3.7977805178791613, "grad_norm": 0.039454251527786255, "learning_rate": 0.01, "loss": 2.0343, "step": 36960 }, { "epoch": 3.7980887792848335, "grad_norm": 0.07771621644496918, "learning_rate": 0.01, "loss": 2.0194, "step": 36963 }, { "epoch": 3.7983970406905057, "grad_norm": 0.06523919850587845, "learning_rate": 0.01, "loss": 1.9954, "step": 36966 }, { "epoch": 3.7987053020961774, "grad_norm": 0.055610157549381256, "learning_rate": 0.01, "loss": 2.0062, "step": 36969 }, { "epoch": 3.7990135635018496, "grad_norm": 0.08819667249917984, "learning_rate": 0.01, "loss": 1.9859, "step": 36972 }, { "epoch": 3.799321824907522, "grad_norm": 0.05418279394507408, "learning_rate": 0.01, "loss": 1.9951, "step": 36975 }, { "epoch": 3.7996300863131935, "grad_norm": 0.06648479402065277, "learning_rate": 0.01, "loss": 2.0151, "step": 36978 }, { "epoch": 3.7999383477188657, "grad_norm": 0.04629239812493324, "learning_rate": 0.01, "loss": 1.9928, "step": 36981 }, { "epoch": 3.8002466091245375, "grad_norm": 0.042264122515916824, "learning_rate": 0.01, "loss": 1.9832, "step": 36984 }, { "epoch": 3.8005548705302097, "grad_norm": 0.10186997801065445, "learning_rate": 0.01, "loss": 1.9888, "step": 36987 }, { "epoch": 3.8008631319358814, "grad_norm": 0.09278517216444016, "learning_rate": 0.01, "loss": 2.0063, "step": 36990 }, { "epoch": 3.8011713933415536, "grad_norm": 0.07084151357412338, "learning_rate": 0.01, "loss": 1.9972, "step": 36993 }, { "epoch": 3.801479654747226, "grad_norm": 0.07990908622741699, "learning_rate": 0.01, "loss": 2.0019, "step": 36996 }, { "epoch": 3.8017879161528976, "grad_norm": 0.039643771946430206, "learning_rate": 0.01, "loss": 1.9926, "step": 36999 }, { "epoch": 3.8020961775585698, "grad_norm": 0.03678774833679199, "learning_rate": 0.01, "loss": 1.9884, "step": 37002 }, { "epoch": 3.8024044389642415, "grad_norm": 0.040611330419778824, "learning_rate": 0.01, "loss": 2.0054, "step": 37005 }, { "epoch": 3.8027127003699137, "grad_norm": 0.03812083601951599, "learning_rate": 0.01, "loss": 1.9778, "step": 37008 }, { "epoch": 3.8030209617755855, "grad_norm": 0.07582142949104309, "learning_rate": 0.01, "loss": 1.9989, "step": 37011 }, { "epoch": 3.8033292231812577, "grad_norm": 0.14520719647407532, "learning_rate": 0.01, "loss": 1.9948, "step": 37014 }, { "epoch": 3.80363748458693, "grad_norm": 0.04497974365949631, "learning_rate": 0.01, "loss": 2.0239, "step": 37017 }, { "epoch": 3.8039457459926016, "grad_norm": 0.04178651422262192, "learning_rate": 0.01, "loss": 2.0156, "step": 37020 }, { "epoch": 3.804254007398274, "grad_norm": 0.047346848994493484, "learning_rate": 0.01, "loss": 1.9734, "step": 37023 }, { "epoch": 3.804562268803946, "grad_norm": 0.04626936465501785, "learning_rate": 0.01, "loss": 2.0209, "step": 37026 }, { "epoch": 3.8048705302096177, "grad_norm": 0.055330242961645126, "learning_rate": 0.01, "loss": 2.0, "step": 37029 }, { "epoch": 3.8051787916152895, "grad_norm": 0.040033504366874695, "learning_rate": 0.01, "loss": 2.0082, "step": 37032 }, { "epoch": 3.8054870530209617, "grad_norm": 0.03897986188530922, "learning_rate": 0.01, "loss": 1.9863, "step": 37035 }, { "epoch": 3.805795314426634, "grad_norm": 0.07525118440389633, "learning_rate": 0.01, "loss": 2.0316, "step": 37038 }, { "epoch": 3.8061035758323056, "grad_norm": 0.06256567686796188, "learning_rate": 0.01, "loss": 2.0018, "step": 37041 }, { "epoch": 3.806411837237978, "grad_norm": 0.05028248950839043, "learning_rate": 0.01, "loss": 2.0111, "step": 37044 }, { "epoch": 3.80672009864365, "grad_norm": 0.03530338406562805, "learning_rate": 0.01, "loss": 1.9918, "step": 37047 }, { "epoch": 3.8070283600493218, "grad_norm": 0.044229693710803986, "learning_rate": 0.01, "loss": 1.9906, "step": 37050 }, { "epoch": 3.807336621454994, "grad_norm": 0.1117880642414093, "learning_rate": 0.01, "loss": 2.0065, "step": 37053 }, { "epoch": 3.8076448828606657, "grad_norm": 0.04328616335988045, "learning_rate": 0.01, "loss": 1.9818, "step": 37056 }, { "epoch": 3.807953144266338, "grad_norm": 0.05948890000581741, "learning_rate": 0.01, "loss": 2.0013, "step": 37059 }, { "epoch": 3.8082614056720097, "grad_norm": 0.043167680501937866, "learning_rate": 0.01, "loss": 1.9996, "step": 37062 }, { "epoch": 3.808569667077682, "grad_norm": 0.04852902144193649, "learning_rate": 0.01, "loss": 2.0178, "step": 37065 }, { "epoch": 3.808877928483354, "grad_norm": 0.06212899461388588, "learning_rate": 0.01, "loss": 1.9709, "step": 37068 }, { "epoch": 3.809186189889026, "grad_norm": 0.07403160631656647, "learning_rate": 0.01, "loss": 1.9913, "step": 37071 }, { "epoch": 3.809494451294698, "grad_norm": 0.07734464108943939, "learning_rate": 0.01, "loss": 2.008, "step": 37074 }, { "epoch": 3.80980271270037, "grad_norm": 0.05057765170931816, "learning_rate": 0.01, "loss": 1.9949, "step": 37077 }, { "epoch": 3.810110974106042, "grad_norm": 0.11746016144752502, "learning_rate": 0.01, "loss": 1.9787, "step": 37080 }, { "epoch": 3.8104192355117137, "grad_norm": 0.10531330853700638, "learning_rate": 0.01, "loss": 2.0042, "step": 37083 }, { "epoch": 3.810727496917386, "grad_norm": 0.11436725407838821, "learning_rate": 0.01, "loss": 2.0051, "step": 37086 }, { "epoch": 3.811035758323058, "grad_norm": 0.04365989565849304, "learning_rate": 0.01, "loss": 1.9909, "step": 37089 }, { "epoch": 3.81134401972873, "grad_norm": 0.04271963611245155, "learning_rate": 0.01, "loss": 1.998, "step": 37092 }, { "epoch": 3.811652281134402, "grad_norm": 0.03649172931909561, "learning_rate": 0.01, "loss": 1.9963, "step": 37095 }, { "epoch": 3.811960542540074, "grad_norm": 0.054175905883312225, "learning_rate": 0.01, "loss": 2.0175, "step": 37098 }, { "epoch": 3.812268803945746, "grad_norm": 0.041479047387838364, "learning_rate": 0.01, "loss": 1.9962, "step": 37101 }, { "epoch": 3.812577065351418, "grad_norm": 0.03695906326174736, "learning_rate": 0.01, "loss": 1.9984, "step": 37104 }, { "epoch": 3.81288532675709, "grad_norm": 0.05167945846915245, "learning_rate": 0.01, "loss": 2.0219, "step": 37107 }, { "epoch": 3.813193588162762, "grad_norm": 0.0706062912940979, "learning_rate": 0.01, "loss": 1.9862, "step": 37110 }, { "epoch": 3.813501849568434, "grad_norm": 0.04282987117767334, "learning_rate": 0.01, "loss": 1.9829, "step": 37113 }, { "epoch": 3.813810110974106, "grad_norm": 0.048825137317180634, "learning_rate": 0.01, "loss": 1.9764, "step": 37116 }, { "epoch": 3.8141183723797782, "grad_norm": 0.13310399651527405, "learning_rate": 0.01, "loss": 1.9889, "step": 37119 }, { "epoch": 3.81442663378545, "grad_norm": 0.05691874772310257, "learning_rate": 0.01, "loss": 2.0029, "step": 37122 }, { "epoch": 3.814734895191122, "grad_norm": 0.07542920112609863, "learning_rate": 0.01, "loss": 2.0014, "step": 37125 }, { "epoch": 3.815043156596794, "grad_norm": 0.09146812558174133, "learning_rate": 0.01, "loss": 1.9908, "step": 37128 }, { "epoch": 3.815351418002466, "grad_norm": 0.04742105305194855, "learning_rate": 0.01, "loss": 1.9907, "step": 37131 }, { "epoch": 3.815659679408138, "grad_norm": 0.03387041017413139, "learning_rate": 0.01, "loss": 1.9763, "step": 37134 }, { "epoch": 3.81596794081381, "grad_norm": 0.036746036261320114, "learning_rate": 0.01, "loss": 1.9919, "step": 37137 }, { "epoch": 3.8162762022194823, "grad_norm": 0.06593815982341766, "learning_rate": 0.01, "loss": 1.9814, "step": 37140 }, { "epoch": 3.816584463625154, "grad_norm": 0.10538439452648163, "learning_rate": 0.01, "loss": 2.0011, "step": 37143 }, { "epoch": 3.816892725030826, "grad_norm": 0.07849781960248947, "learning_rate": 0.01, "loss": 2.0053, "step": 37146 }, { "epoch": 3.8172009864364984, "grad_norm": 0.10899082571268082, "learning_rate": 0.01, "loss": 2.0186, "step": 37149 }, { "epoch": 3.81750924784217, "grad_norm": 0.05171935632824898, "learning_rate": 0.01, "loss": 1.9911, "step": 37152 }, { "epoch": 3.817817509247842, "grad_norm": 0.06028240546584129, "learning_rate": 0.01, "loss": 1.9911, "step": 37155 }, { "epoch": 3.818125770653514, "grad_norm": 0.0440855398774147, "learning_rate": 0.01, "loss": 2.0063, "step": 37158 }, { "epoch": 3.8184340320591863, "grad_norm": 0.05764901638031006, "learning_rate": 0.01, "loss": 1.9834, "step": 37161 }, { "epoch": 3.818742293464858, "grad_norm": 0.042115770280361176, "learning_rate": 0.01, "loss": 1.9816, "step": 37164 }, { "epoch": 3.8190505548705302, "grad_norm": 0.0819876417517662, "learning_rate": 0.01, "loss": 1.9991, "step": 37167 }, { "epoch": 3.8193588162762024, "grad_norm": 0.09432919323444366, "learning_rate": 0.01, "loss": 1.999, "step": 37170 }, { "epoch": 3.819667077681874, "grad_norm": 0.1223209798336029, "learning_rate": 0.01, "loss": 1.994, "step": 37173 }, { "epoch": 3.8199753390875464, "grad_norm": 0.08615586161613464, "learning_rate": 0.01, "loss": 1.9676, "step": 37176 }, { "epoch": 3.820283600493218, "grad_norm": 0.0847785696387291, "learning_rate": 0.01, "loss": 2.0067, "step": 37179 }, { "epoch": 3.8205918618988903, "grad_norm": 0.059272028505802155, "learning_rate": 0.01, "loss": 1.9938, "step": 37182 }, { "epoch": 3.820900123304562, "grad_norm": 0.042237572371959686, "learning_rate": 0.01, "loss": 2.0011, "step": 37185 }, { "epoch": 3.8212083847102343, "grad_norm": 0.04662346839904785, "learning_rate": 0.01, "loss": 1.989, "step": 37188 }, { "epoch": 3.8215166461159065, "grad_norm": 0.04566279426217079, "learning_rate": 0.01, "loss": 2.0093, "step": 37191 }, { "epoch": 3.821824907521578, "grad_norm": 0.07829894125461578, "learning_rate": 0.01, "loss": 1.9811, "step": 37194 }, { "epoch": 3.8221331689272504, "grad_norm": 0.09386474639177322, "learning_rate": 0.01, "loss": 2.016, "step": 37197 }, { "epoch": 3.822441430332922, "grad_norm": 0.04232482239603996, "learning_rate": 0.01, "loss": 1.9899, "step": 37200 }, { "epoch": 3.8227496917385944, "grad_norm": 0.09978868067264557, "learning_rate": 0.01, "loss": 2.0112, "step": 37203 }, { "epoch": 3.823057953144266, "grad_norm": 0.04246772453188896, "learning_rate": 0.01, "loss": 1.9953, "step": 37206 }, { "epoch": 3.8233662145499383, "grad_norm": 0.13856659829616547, "learning_rate": 0.01, "loss": 2.0061, "step": 37209 }, { "epoch": 3.8236744759556105, "grad_norm": 0.05659123882651329, "learning_rate": 0.01, "loss": 1.9963, "step": 37212 }, { "epoch": 3.8239827373612822, "grad_norm": 0.05944579839706421, "learning_rate": 0.01, "loss": 2.0016, "step": 37215 }, { "epoch": 3.8242909987669544, "grad_norm": 0.040483999997377396, "learning_rate": 0.01, "loss": 1.996, "step": 37218 }, { "epoch": 3.8245992601726266, "grad_norm": 0.05651898682117462, "learning_rate": 0.01, "loss": 1.9863, "step": 37221 }, { "epoch": 3.8249075215782984, "grad_norm": 0.04841217026114464, "learning_rate": 0.01, "loss": 1.9924, "step": 37224 }, { "epoch": 3.82521578298397, "grad_norm": 0.05571025237441063, "learning_rate": 0.01, "loss": 1.9857, "step": 37227 }, { "epoch": 3.8255240443896423, "grad_norm": 0.07893476635217667, "learning_rate": 0.01, "loss": 2.0081, "step": 37230 }, { "epoch": 3.8258323057953145, "grad_norm": 0.07275935262441635, "learning_rate": 0.01, "loss": 2.0264, "step": 37233 }, { "epoch": 3.8261405672009863, "grad_norm": 0.14288173615932465, "learning_rate": 0.01, "loss": 1.9963, "step": 37236 }, { "epoch": 3.8264488286066585, "grad_norm": 0.18501073122024536, "learning_rate": 0.01, "loss": 2.0133, "step": 37239 }, { "epoch": 3.8267570900123307, "grad_norm": 0.12003073841333389, "learning_rate": 0.01, "loss": 1.9811, "step": 37242 }, { "epoch": 3.8270653514180024, "grad_norm": 0.05952821299433708, "learning_rate": 0.01, "loss": 2.0194, "step": 37245 }, { "epoch": 3.8273736128236746, "grad_norm": 0.052555590867996216, "learning_rate": 0.01, "loss": 2.0046, "step": 37248 }, { "epoch": 3.8276818742293464, "grad_norm": 0.05527700483798981, "learning_rate": 0.01, "loss": 1.9929, "step": 37251 }, { "epoch": 3.8279901356350186, "grad_norm": 0.043385427445173264, "learning_rate": 0.01, "loss": 1.9682, "step": 37254 }, { "epoch": 3.8282983970406903, "grad_norm": 0.05360834673047066, "learning_rate": 0.01, "loss": 1.9979, "step": 37257 }, { "epoch": 3.8286066584463625, "grad_norm": 0.04542825371026993, "learning_rate": 0.01, "loss": 1.981, "step": 37260 }, { "epoch": 3.8289149198520347, "grad_norm": 0.04607833921909332, "learning_rate": 0.01, "loss": 1.9995, "step": 37263 }, { "epoch": 3.8292231812577064, "grad_norm": 0.059813372790813446, "learning_rate": 0.01, "loss": 2.0043, "step": 37266 }, { "epoch": 3.8295314426633786, "grad_norm": 0.11444689333438873, "learning_rate": 0.01, "loss": 2.0026, "step": 37269 }, { "epoch": 3.829839704069051, "grad_norm": 0.07510776817798615, "learning_rate": 0.01, "loss": 2.0019, "step": 37272 }, { "epoch": 3.8301479654747226, "grad_norm": 0.051781926304101944, "learning_rate": 0.01, "loss": 2.0194, "step": 37275 }, { "epoch": 3.8304562268803943, "grad_norm": 0.051285672932863235, "learning_rate": 0.01, "loss": 1.9982, "step": 37278 }, { "epoch": 3.8307644882860665, "grad_norm": 0.0464506596326828, "learning_rate": 0.01, "loss": 2.0033, "step": 37281 }, { "epoch": 3.8310727496917387, "grad_norm": 0.03997113183140755, "learning_rate": 0.01, "loss": 1.9985, "step": 37284 }, { "epoch": 3.8313810110974105, "grad_norm": 0.03474081680178642, "learning_rate": 0.01, "loss": 1.9885, "step": 37287 }, { "epoch": 3.8316892725030827, "grad_norm": 0.13113395869731903, "learning_rate": 0.01, "loss": 2.0031, "step": 37290 }, { "epoch": 3.831997533908755, "grad_norm": 0.03894534707069397, "learning_rate": 0.01, "loss": 2.0232, "step": 37293 }, { "epoch": 3.8323057953144266, "grad_norm": 0.030656691640615463, "learning_rate": 0.01, "loss": 2.0052, "step": 37296 }, { "epoch": 3.832614056720099, "grad_norm": 0.08632262796163559, "learning_rate": 0.01, "loss": 2.0037, "step": 37299 }, { "epoch": 3.8329223181257706, "grad_norm": 0.09762530028820038, "learning_rate": 0.01, "loss": 1.997, "step": 37302 }, { "epoch": 3.8332305795314427, "grad_norm": 0.07323916256427765, "learning_rate": 0.01, "loss": 2.0051, "step": 37305 }, { "epoch": 3.8335388409371145, "grad_norm": 0.08117776364088058, "learning_rate": 0.01, "loss": 2.0123, "step": 37308 }, { "epoch": 3.8338471023427867, "grad_norm": 0.07396585494279861, "learning_rate": 0.01, "loss": 2.0043, "step": 37311 }, { "epoch": 3.834155363748459, "grad_norm": 0.07192173600196838, "learning_rate": 0.01, "loss": 2.0249, "step": 37314 }, { "epoch": 3.8344636251541306, "grad_norm": 0.06731805950403214, "learning_rate": 0.01, "loss": 2.0005, "step": 37317 }, { "epoch": 3.834771886559803, "grad_norm": 0.06851818412542343, "learning_rate": 0.01, "loss": 1.9924, "step": 37320 }, { "epoch": 3.8350801479654746, "grad_norm": 0.07433861494064331, "learning_rate": 0.01, "loss": 2.0238, "step": 37323 }, { "epoch": 3.835388409371147, "grad_norm": 0.13405515253543854, "learning_rate": 0.01, "loss": 2.0068, "step": 37326 }, { "epoch": 3.8356966707768185, "grad_norm": 0.0912710577249527, "learning_rate": 0.01, "loss": 1.9802, "step": 37329 }, { "epoch": 3.8360049321824907, "grad_norm": 0.05791820213198662, "learning_rate": 0.01, "loss": 1.9655, "step": 37332 }, { "epoch": 3.836313193588163, "grad_norm": 0.06457755714654922, "learning_rate": 0.01, "loss": 1.9818, "step": 37335 }, { "epoch": 3.8366214549938347, "grad_norm": 0.06486820429563522, "learning_rate": 0.01, "loss": 1.9909, "step": 37338 }, { "epoch": 3.836929716399507, "grad_norm": 0.04546729475259781, "learning_rate": 0.01, "loss": 1.9898, "step": 37341 }, { "epoch": 3.837237977805179, "grad_norm": 0.058978717774152756, "learning_rate": 0.01, "loss": 1.9948, "step": 37344 }, { "epoch": 3.837546239210851, "grad_norm": 0.09566085040569305, "learning_rate": 0.01, "loss": 2.0054, "step": 37347 }, { "epoch": 3.8378545006165226, "grad_norm": 0.038889624178409576, "learning_rate": 0.01, "loss": 2.0, "step": 37350 }, { "epoch": 3.8381627620221948, "grad_norm": 0.09589283168315887, "learning_rate": 0.01, "loss": 2.0305, "step": 37353 }, { "epoch": 3.838471023427867, "grad_norm": 0.04719104990363121, "learning_rate": 0.01, "loss": 2.0008, "step": 37356 }, { "epoch": 3.8387792848335387, "grad_norm": 0.09488753974437714, "learning_rate": 0.01, "loss": 1.9929, "step": 37359 }, { "epoch": 3.839087546239211, "grad_norm": 0.09091513603925705, "learning_rate": 0.01, "loss": 2.0032, "step": 37362 }, { "epoch": 3.839395807644883, "grad_norm": 0.06685637682676315, "learning_rate": 0.01, "loss": 1.9713, "step": 37365 }, { "epoch": 3.839704069050555, "grad_norm": 0.0734042376279831, "learning_rate": 0.01, "loss": 2.0043, "step": 37368 }, { "epoch": 3.840012330456227, "grad_norm": 0.04996969550848007, "learning_rate": 0.01, "loss": 1.9931, "step": 37371 }, { "epoch": 3.840320591861899, "grad_norm": 0.07901585847139359, "learning_rate": 0.01, "loss": 2.0007, "step": 37374 }, { "epoch": 3.840628853267571, "grad_norm": 0.06289222836494446, "learning_rate": 0.01, "loss": 1.978, "step": 37377 }, { "epoch": 3.8409371146732427, "grad_norm": 0.07179414480924606, "learning_rate": 0.01, "loss": 1.988, "step": 37380 }, { "epoch": 3.841245376078915, "grad_norm": 0.05835287645459175, "learning_rate": 0.01, "loss": 1.9777, "step": 37383 }, { "epoch": 3.841553637484587, "grad_norm": 0.07789372652769089, "learning_rate": 0.01, "loss": 1.9832, "step": 37386 }, { "epoch": 3.841861898890259, "grad_norm": 0.10866318643093109, "learning_rate": 0.01, "loss": 1.9973, "step": 37389 }, { "epoch": 3.842170160295931, "grad_norm": 0.12253516167402267, "learning_rate": 0.01, "loss": 1.9902, "step": 37392 }, { "epoch": 3.842478421701603, "grad_norm": 0.1773093193769455, "learning_rate": 0.01, "loss": 2.0107, "step": 37395 }, { "epoch": 3.842786683107275, "grad_norm": 0.12998591363430023, "learning_rate": 0.01, "loss": 1.9986, "step": 37398 }, { "epoch": 3.8430949445129468, "grad_norm": 0.07139603793621063, "learning_rate": 0.01, "loss": 2.0066, "step": 37401 }, { "epoch": 3.843403205918619, "grad_norm": 0.05447724089026451, "learning_rate": 0.01, "loss": 1.9924, "step": 37404 }, { "epoch": 3.843711467324291, "grad_norm": 0.053450725972652435, "learning_rate": 0.01, "loss": 2.0279, "step": 37407 }, { "epoch": 3.844019728729963, "grad_norm": 0.042635634541511536, "learning_rate": 0.01, "loss": 2.0146, "step": 37410 }, { "epoch": 3.844327990135635, "grad_norm": 0.06024963781237602, "learning_rate": 0.01, "loss": 1.9926, "step": 37413 }, { "epoch": 3.8446362515413073, "grad_norm": 0.04149017482995987, "learning_rate": 0.01, "loss": 1.9986, "step": 37416 }, { "epoch": 3.844944512946979, "grad_norm": 0.043965183198451996, "learning_rate": 0.01, "loss": 1.9881, "step": 37419 }, { "epoch": 3.845252774352651, "grad_norm": 0.06676291674375534, "learning_rate": 0.01, "loss": 1.9913, "step": 37422 }, { "epoch": 3.845561035758323, "grad_norm": 0.1391642689704895, "learning_rate": 0.01, "loss": 1.9918, "step": 37425 }, { "epoch": 3.845869297163995, "grad_norm": 0.12824542820453644, "learning_rate": 0.01, "loss": 2.0091, "step": 37428 }, { "epoch": 3.846177558569667, "grad_norm": 0.038818515837192535, "learning_rate": 0.01, "loss": 2.0008, "step": 37431 }, { "epoch": 3.846485819975339, "grad_norm": 0.06105605140328407, "learning_rate": 0.01, "loss": 2.005, "step": 37434 }, { "epoch": 3.8467940813810113, "grad_norm": 0.05692486837506294, "learning_rate": 0.01, "loss": 2.0234, "step": 37437 }, { "epoch": 3.847102342786683, "grad_norm": 0.03328600525856018, "learning_rate": 0.01, "loss": 1.9845, "step": 37440 }, { "epoch": 3.8474106041923553, "grad_norm": 0.05563362315297127, "learning_rate": 0.01, "loss": 1.9972, "step": 37443 }, { "epoch": 3.847718865598027, "grad_norm": 0.04523022845387459, "learning_rate": 0.01, "loss": 1.9607, "step": 37446 }, { "epoch": 3.848027127003699, "grad_norm": 0.12618042528629303, "learning_rate": 0.01, "loss": 1.9578, "step": 37449 }, { "epoch": 3.848335388409371, "grad_norm": 0.10030481964349747, "learning_rate": 0.01, "loss": 2.002, "step": 37452 }, { "epoch": 3.848643649815043, "grad_norm": 0.09524470567703247, "learning_rate": 0.01, "loss": 1.9772, "step": 37455 }, { "epoch": 3.8489519112207153, "grad_norm": 0.06369511038064957, "learning_rate": 0.01, "loss": 1.9853, "step": 37458 }, { "epoch": 3.849260172626387, "grad_norm": 0.052571844309568405, "learning_rate": 0.01, "loss": 1.9925, "step": 37461 }, { "epoch": 3.8495684340320593, "grad_norm": 0.040871210396289825, "learning_rate": 0.01, "loss": 2.0192, "step": 37464 }, { "epoch": 3.849876695437731, "grad_norm": 0.0393734946846962, "learning_rate": 0.01, "loss": 2.0312, "step": 37467 }, { "epoch": 3.8501849568434032, "grad_norm": 0.10289833694696426, "learning_rate": 0.01, "loss": 1.9791, "step": 37470 }, { "epoch": 3.850493218249075, "grad_norm": 0.053732771426439285, "learning_rate": 0.01, "loss": 1.9955, "step": 37473 }, { "epoch": 3.850801479654747, "grad_norm": 0.035672031342983246, "learning_rate": 0.01, "loss": 1.9896, "step": 37476 }, { "epoch": 3.8511097410604194, "grad_norm": 0.07263844460248947, "learning_rate": 0.01, "loss": 1.9779, "step": 37479 }, { "epoch": 3.851418002466091, "grad_norm": 0.06151323765516281, "learning_rate": 0.01, "loss": 1.9965, "step": 37482 }, { "epoch": 3.8517262638717633, "grad_norm": 0.08033137768507004, "learning_rate": 0.01, "loss": 2.0161, "step": 37485 }, { "epoch": 3.8520345252774355, "grad_norm": 0.09900759160518646, "learning_rate": 0.01, "loss": 2.0116, "step": 37488 }, { "epoch": 3.8523427866831073, "grad_norm": 0.061891715973615646, "learning_rate": 0.01, "loss": 1.9932, "step": 37491 }, { "epoch": 3.8526510480887795, "grad_norm": 0.039978478103876114, "learning_rate": 0.01, "loss": 1.998, "step": 37494 }, { "epoch": 3.852959309494451, "grad_norm": 0.04366837814450264, "learning_rate": 0.01, "loss": 1.9992, "step": 37497 }, { "epoch": 3.8532675709001234, "grad_norm": 0.03704690560698509, "learning_rate": 0.01, "loss": 2.0011, "step": 37500 }, { "epoch": 3.853575832305795, "grad_norm": 0.03964201733469963, "learning_rate": 0.01, "loss": 1.9773, "step": 37503 }, { "epoch": 3.8538840937114673, "grad_norm": 0.060866422951221466, "learning_rate": 0.01, "loss": 1.9779, "step": 37506 }, { "epoch": 3.8541923551171395, "grad_norm": 0.1373164802789688, "learning_rate": 0.01, "loss": 1.9751, "step": 37509 }, { "epoch": 3.8545006165228113, "grad_norm": 0.12396371364593506, "learning_rate": 0.01, "loss": 2.0007, "step": 37512 }, { "epoch": 3.8548088779284835, "grad_norm": 0.05732661485671997, "learning_rate": 0.01, "loss": 1.9889, "step": 37515 }, { "epoch": 3.8551171393341552, "grad_norm": 0.072254478931427, "learning_rate": 0.01, "loss": 2.0066, "step": 37518 }, { "epoch": 3.8554254007398274, "grad_norm": 0.05103025957942009, "learning_rate": 0.01, "loss": 1.981, "step": 37521 }, { "epoch": 3.855733662145499, "grad_norm": 0.06759181618690491, "learning_rate": 0.01, "loss": 1.9863, "step": 37524 }, { "epoch": 3.8560419235511714, "grad_norm": 0.08190746605396271, "learning_rate": 0.01, "loss": 2.0103, "step": 37527 }, { "epoch": 3.8563501849568436, "grad_norm": 0.052758537232875824, "learning_rate": 0.01, "loss": 2.0052, "step": 37530 }, { "epoch": 3.8566584463625153, "grad_norm": 0.06971180438995361, "learning_rate": 0.01, "loss": 1.9825, "step": 37533 }, { "epoch": 3.8569667077681875, "grad_norm": 0.051832135766744614, "learning_rate": 0.01, "loss": 2.0283, "step": 37536 }, { "epoch": 3.8572749691738597, "grad_norm": 0.11838405579328537, "learning_rate": 0.01, "loss": 1.9869, "step": 37539 }, { "epoch": 3.8575832305795315, "grad_norm": 0.0657329186797142, "learning_rate": 0.01, "loss": 2.0137, "step": 37542 }, { "epoch": 3.857891491985203, "grad_norm": 0.042532552033662796, "learning_rate": 0.01, "loss": 1.9633, "step": 37545 }, { "epoch": 3.8581997533908754, "grad_norm": 0.029605695977807045, "learning_rate": 0.01, "loss": 1.9747, "step": 37548 }, { "epoch": 3.8585080147965476, "grad_norm": 0.045208025723695755, "learning_rate": 0.01, "loss": 2.0034, "step": 37551 }, { "epoch": 3.8588162762022193, "grad_norm": 0.061939138919115067, "learning_rate": 0.01, "loss": 2.0042, "step": 37554 }, { "epoch": 3.8591245376078915, "grad_norm": 0.08816343545913696, "learning_rate": 0.01, "loss": 1.9995, "step": 37557 }, { "epoch": 3.8594327990135637, "grad_norm": 0.06432296335697174, "learning_rate": 0.01, "loss": 1.9992, "step": 37560 }, { "epoch": 3.8597410604192355, "grad_norm": 0.07691509276628494, "learning_rate": 0.01, "loss": 1.984, "step": 37563 }, { "epoch": 3.8600493218249077, "grad_norm": 0.06328088790178299, "learning_rate": 0.01, "loss": 2.0085, "step": 37566 }, { "epoch": 3.8603575832305794, "grad_norm": 0.10897444188594818, "learning_rate": 0.01, "loss": 2.0061, "step": 37569 }, { "epoch": 3.8606658446362516, "grad_norm": 0.07461336255073547, "learning_rate": 0.01, "loss": 1.9987, "step": 37572 }, { "epoch": 3.8609741060419234, "grad_norm": 0.08543375879526138, "learning_rate": 0.01, "loss": 1.9953, "step": 37575 }, { "epoch": 3.8612823674475956, "grad_norm": 0.09546118974685669, "learning_rate": 0.01, "loss": 1.9984, "step": 37578 }, { "epoch": 3.8615906288532678, "grad_norm": 0.0741833969950676, "learning_rate": 0.01, "loss": 2.0174, "step": 37581 }, { "epoch": 3.8618988902589395, "grad_norm": 0.06818868219852448, "learning_rate": 0.01, "loss": 1.9662, "step": 37584 }, { "epoch": 3.8622071516646117, "grad_norm": 0.05515025556087494, "learning_rate": 0.01, "loss": 2.0154, "step": 37587 }, { "epoch": 3.8625154130702835, "grad_norm": 0.035819582641124725, "learning_rate": 0.01, "loss": 2.0044, "step": 37590 }, { "epoch": 3.8628236744759556, "grad_norm": 0.03145124390721321, "learning_rate": 0.01, "loss": 1.9895, "step": 37593 }, { "epoch": 3.8631319358816274, "grad_norm": 0.036893073469400406, "learning_rate": 0.01, "loss": 1.971, "step": 37596 }, { "epoch": 3.8634401972872996, "grad_norm": 0.03417917340993881, "learning_rate": 0.01, "loss": 2.0172, "step": 37599 }, { "epoch": 3.863748458692972, "grad_norm": 0.06035599485039711, "learning_rate": 0.01, "loss": 1.9913, "step": 37602 }, { "epoch": 3.8640567200986435, "grad_norm": 0.12924471497535706, "learning_rate": 0.01, "loss": 1.9937, "step": 37605 }, { "epoch": 3.8643649815043157, "grad_norm": 0.1035354882478714, "learning_rate": 0.01, "loss": 1.9955, "step": 37608 }, { "epoch": 3.864673242909988, "grad_norm": 0.08066008985042572, "learning_rate": 0.01, "loss": 1.9858, "step": 37611 }, { "epoch": 3.8649815043156597, "grad_norm": 0.09418363124132156, "learning_rate": 0.01, "loss": 1.9869, "step": 37614 }, { "epoch": 3.8652897657213314, "grad_norm": 0.04805073142051697, "learning_rate": 0.01, "loss": 2.0007, "step": 37617 }, { "epoch": 3.8655980271270036, "grad_norm": 0.04753410443663597, "learning_rate": 0.01, "loss": 1.9875, "step": 37620 }, { "epoch": 3.865906288532676, "grad_norm": 0.0458386205136776, "learning_rate": 0.01, "loss": 1.9936, "step": 37623 }, { "epoch": 3.8662145499383476, "grad_norm": 0.1318642795085907, "learning_rate": 0.01, "loss": 1.9759, "step": 37626 }, { "epoch": 3.8665228113440198, "grad_norm": 0.10072299838066101, "learning_rate": 0.01, "loss": 2.0074, "step": 37629 }, { "epoch": 3.866831072749692, "grad_norm": 0.06701991707086563, "learning_rate": 0.01, "loss": 2.0128, "step": 37632 }, { "epoch": 3.8671393341553637, "grad_norm": 0.055372897535562515, "learning_rate": 0.01, "loss": 2.0143, "step": 37635 }, { "epoch": 3.867447595561036, "grad_norm": 0.061906322836875916, "learning_rate": 0.01, "loss": 1.9947, "step": 37638 }, { "epoch": 3.8677558569667077, "grad_norm": 0.05329981446266174, "learning_rate": 0.01, "loss": 2.0001, "step": 37641 }, { "epoch": 3.86806411837238, "grad_norm": 0.05068975314497948, "learning_rate": 0.01, "loss": 1.9685, "step": 37644 }, { "epoch": 3.8683723797780516, "grad_norm": 0.09255755692720413, "learning_rate": 0.01, "loss": 2.0059, "step": 37647 }, { "epoch": 3.868680641183724, "grad_norm": 0.08617328852415085, "learning_rate": 0.01, "loss": 1.9905, "step": 37650 }, { "epoch": 3.868988902589396, "grad_norm": 0.044413745403289795, "learning_rate": 0.01, "loss": 1.978, "step": 37653 }, { "epoch": 3.8692971639950677, "grad_norm": 0.086729995906353, "learning_rate": 0.01, "loss": 1.9997, "step": 37656 }, { "epoch": 3.86960542540074, "grad_norm": 0.07304412871599197, "learning_rate": 0.01, "loss": 1.9925, "step": 37659 }, { "epoch": 3.8699136868064117, "grad_norm": 0.06820013374090195, "learning_rate": 0.01, "loss": 2.0073, "step": 37662 }, { "epoch": 3.870221948212084, "grad_norm": 0.06025752052664757, "learning_rate": 0.01, "loss": 1.9999, "step": 37665 }, { "epoch": 3.8705302096177556, "grad_norm": 0.0649590715765953, "learning_rate": 0.01, "loss": 1.996, "step": 37668 }, { "epoch": 3.870838471023428, "grad_norm": 0.03553159162402153, "learning_rate": 0.01, "loss": 1.9689, "step": 37671 }, { "epoch": 3.8711467324291, "grad_norm": 0.06017104908823967, "learning_rate": 0.01, "loss": 2.0013, "step": 37674 }, { "epoch": 3.8714549938347718, "grad_norm": 0.05896780639886856, "learning_rate": 0.01, "loss": 1.9801, "step": 37677 }, { "epoch": 3.871763255240444, "grad_norm": 0.03493823856115341, "learning_rate": 0.01, "loss": 2.0032, "step": 37680 }, { "epoch": 3.872071516646116, "grad_norm": 0.11454503238201141, "learning_rate": 0.01, "loss": 2.0004, "step": 37683 }, { "epoch": 3.872379778051788, "grad_norm": 0.03603978455066681, "learning_rate": 0.01, "loss": 1.9834, "step": 37686 }, { "epoch": 3.8726880394574597, "grad_norm": 0.03927883505821228, "learning_rate": 0.01, "loss": 2.0015, "step": 37689 }, { "epoch": 3.872996300863132, "grad_norm": 0.06592239439487457, "learning_rate": 0.01, "loss": 2.0025, "step": 37692 }, { "epoch": 3.873304562268804, "grad_norm": 0.04402392357587814, "learning_rate": 0.01, "loss": 1.9845, "step": 37695 }, { "epoch": 3.873612823674476, "grad_norm": 0.04484686255455017, "learning_rate": 0.01, "loss": 1.9756, "step": 37698 }, { "epoch": 3.873921085080148, "grad_norm": 0.03577352687716484, "learning_rate": 0.01, "loss": 1.9957, "step": 37701 }, { "epoch": 3.87422934648582, "grad_norm": 0.03952937200665474, "learning_rate": 0.01, "loss": 1.9727, "step": 37704 }, { "epoch": 3.874537607891492, "grad_norm": 0.048469673842191696, "learning_rate": 0.01, "loss": 1.9783, "step": 37707 }, { "epoch": 3.874845869297164, "grad_norm": 0.04531377553939819, "learning_rate": 0.01, "loss": 1.9878, "step": 37710 }, { "epoch": 3.875154130702836, "grad_norm": 0.11962947994470596, "learning_rate": 0.01, "loss": 2.013, "step": 37713 }, { "epoch": 3.875462392108508, "grad_norm": 0.03379445523023605, "learning_rate": 0.01, "loss": 1.9817, "step": 37716 }, { "epoch": 3.87577065351418, "grad_norm": 0.036722879856824875, "learning_rate": 0.01, "loss": 1.969, "step": 37719 }, { "epoch": 3.876078914919852, "grad_norm": 0.03215174376964569, "learning_rate": 0.01, "loss": 1.9704, "step": 37722 }, { "epoch": 3.876387176325524, "grad_norm": 0.04591568931937218, "learning_rate": 0.01, "loss": 1.9886, "step": 37725 }, { "epoch": 3.876695437731196, "grad_norm": 0.04655212163925171, "learning_rate": 0.01, "loss": 1.9971, "step": 37728 }, { "epoch": 3.877003699136868, "grad_norm": 0.053555794060230255, "learning_rate": 0.01, "loss": 1.9863, "step": 37731 }, { "epoch": 3.8773119605425403, "grad_norm": 0.05492212250828743, "learning_rate": 0.01, "loss": 2.0099, "step": 37734 }, { "epoch": 3.877620221948212, "grad_norm": 0.07766193896532059, "learning_rate": 0.01, "loss": 1.982, "step": 37737 }, { "epoch": 3.877928483353884, "grad_norm": 0.040087949484586716, "learning_rate": 0.01, "loss": 2.018, "step": 37740 }, { "epoch": 3.878236744759556, "grad_norm": 0.07583244144916534, "learning_rate": 0.01, "loss": 1.9694, "step": 37743 }, { "epoch": 3.8785450061652282, "grad_norm": 0.0815076008439064, "learning_rate": 0.01, "loss": 1.9871, "step": 37746 }, { "epoch": 3.8788532675709, "grad_norm": 0.09940320998430252, "learning_rate": 0.01, "loss": 2.0211, "step": 37749 }, { "epoch": 3.879161528976572, "grad_norm": 0.07148890197277069, "learning_rate": 0.01, "loss": 2.0068, "step": 37752 }, { "epoch": 3.8794697903822444, "grad_norm": 0.0479053258895874, "learning_rate": 0.01, "loss": 2.0073, "step": 37755 }, { "epoch": 3.879778051787916, "grad_norm": 0.0394553542137146, "learning_rate": 0.01, "loss": 1.9948, "step": 37758 }, { "epoch": 3.8800863131935883, "grad_norm": 0.04193533584475517, "learning_rate": 0.01, "loss": 1.992, "step": 37761 }, { "epoch": 3.88039457459926, "grad_norm": 0.09164579212665558, "learning_rate": 0.01, "loss": 1.9968, "step": 37764 }, { "epoch": 3.8807028360049323, "grad_norm": 0.06852073222398758, "learning_rate": 0.01, "loss": 2.0127, "step": 37767 }, { "epoch": 3.881011097410604, "grad_norm": 0.049102578312158585, "learning_rate": 0.01, "loss": 2.0228, "step": 37770 }, { "epoch": 3.881319358816276, "grad_norm": 0.12974314391613007, "learning_rate": 0.01, "loss": 2.0158, "step": 37773 }, { "epoch": 3.8816276202219484, "grad_norm": 0.07722889631986618, "learning_rate": 0.01, "loss": 1.9791, "step": 37776 }, { "epoch": 3.88193588162762, "grad_norm": 0.07344157248735428, "learning_rate": 0.01, "loss": 2.0069, "step": 37779 }, { "epoch": 3.8822441430332923, "grad_norm": 0.035780053585767746, "learning_rate": 0.01, "loss": 2.0301, "step": 37782 }, { "epoch": 3.882552404438964, "grad_norm": 0.04180228337645531, "learning_rate": 0.01, "loss": 2.0013, "step": 37785 }, { "epoch": 3.8828606658446363, "grad_norm": 0.06659300625324249, "learning_rate": 0.01, "loss": 2.0016, "step": 37788 }, { "epoch": 3.883168927250308, "grad_norm": 0.08275042474269867, "learning_rate": 0.01, "loss": 1.9989, "step": 37791 }, { "epoch": 3.8834771886559802, "grad_norm": 0.12271406501531601, "learning_rate": 0.01, "loss": 2.0088, "step": 37794 }, { "epoch": 3.8837854500616524, "grad_norm": 0.14413659274578094, "learning_rate": 0.01, "loss": 2.0153, "step": 37797 }, { "epoch": 3.884093711467324, "grad_norm": 0.0712043046951294, "learning_rate": 0.01, "loss": 1.9928, "step": 37800 }, { "epoch": 3.8844019728729964, "grad_norm": 0.06497600674629211, "learning_rate": 0.01, "loss": 2.018, "step": 37803 }, { "epoch": 3.8847102342786686, "grad_norm": 0.053064875304698944, "learning_rate": 0.01, "loss": 1.9804, "step": 37806 }, { "epoch": 3.8850184956843403, "grad_norm": 0.04865000769495964, "learning_rate": 0.01, "loss": 2.0012, "step": 37809 }, { "epoch": 3.885326757090012, "grad_norm": 0.03862098976969719, "learning_rate": 0.01, "loss": 1.9836, "step": 37812 }, { "epoch": 3.8856350184956843, "grad_norm": 0.07821226119995117, "learning_rate": 0.01, "loss": 1.9974, "step": 37815 }, { "epoch": 3.8859432799013565, "grad_norm": 0.04477280378341675, "learning_rate": 0.01, "loss": 1.9914, "step": 37818 }, { "epoch": 3.886251541307028, "grad_norm": 0.11052905023097992, "learning_rate": 0.01, "loss": 1.9593, "step": 37821 }, { "epoch": 3.8865598027127004, "grad_norm": 0.07528085261583328, "learning_rate": 0.01, "loss": 1.9931, "step": 37824 }, { "epoch": 3.8868680641183726, "grad_norm": 0.09329832345247269, "learning_rate": 0.01, "loss": 1.9903, "step": 37827 }, { "epoch": 3.8871763255240444, "grad_norm": 0.03858843818306923, "learning_rate": 0.01, "loss": 2.0163, "step": 37830 }, { "epoch": 3.8874845869297165, "grad_norm": 0.05364726856350899, "learning_rate": 0.01, "loss": 1.9904, "step": 37833 }, { "epoch": 3.8877928483353883, "grad_norm": 0.036494240164756775, "learning_rate": 0.01, "loss": 1.9889, "step": 37836 }, { "epoch": 3.8881011097410605, "grad_norm": 0.033809784799814224, "learning_rate": 0.01, "loss": 1.9991, "step": 37839 }, { "epoch": 3.8884093711467322, "grad_norm": 0.08358091861009598, "learning_rate": 0.01, "loss": 1.9997, "step": 37842 }, { "epoch": 3.8887176325524044, "grad_norm": 0.04314170405268669, "learning_rate": 0.01, "loss": 1.9829, "step": 37845 }, { "epoch": 3.8890258939580766, "grad_norm": 0.06448940187692642, "learning_rate": 0.01, "loss": 2.0054, "step": 37848 }, { "epoch": 3.8893341553637484, "grad_norm": 0.04236144572496414, "learning_rate": 0.01, "loss": 2.0074, "step": 37851 }, { "epoch": 3.8896424167694206, "grad_norm": 0.08682555705308914, "learning_rate": 0.01, "loss": 1.9907, "step": 37854 }, { "epoch": 3.8899506781750923, "grad_norm": 0.1599910706281662, "learning_rate": 0.01, "loss": 2.0193, "step": 37857 }, { "epoch": 3.8902589395807645, "grad_norm": 0.04853571951389313, "learning_rate": 0.01, "loss": 2.013, "step": 37860 }, { "epoch": 3.8905672009864363, "grad_norm": 0.054008036851882935, "learning_rate": 0.01, "loss": 2.0035, "step": 37863 }, { "epoch": 3.8908754623921085, "grad_norm": 0.0459190271794796, "learning_rate": 0.01, "loss": 2.0101, "step": 37866 }, { "epoch": 3.8911837237977807, "grad_norm": 0.047940943390131, "learning_rate": 0.01, "loss": 1.9976, "step": 37869 }, { "epoch": 3.8914919852034524, "grad_norm": 0.041892241686582565, "learning_rate": 0.01, "loss": 1.9977, "step": 37872 }, { "epoch": 3.8918002466091246, "grad_norm": 0.037861257791519165, "learning_rate": 0.01, "loss": 2.0089, "step": 37875 }, { "epoch": 3.892108508014797, "grad_norm": 0.1314014196395874, "learning_rate": 0.01, "loss": 2.0124, "step": 37878 }, { "epoch": 3.8924167694204685, "grad_norm": 0.053526077419519424, "learning_rate": 0.01, "loss": 1.9998, "step": 37881 }, { "epoch": 3.8927250308261403, "grad_norm": 0.044471751898527145, "learning_rate": 0.01, "loss": 1.9975, "step": 37884 }, { "epoch": 3.8930332922318125, "grad_norm": 0.10089153051376343, "learning_rate": 0.01, "loss": 2.005, "step": 37887 }, { "epoch": 3.8933415536374847, "grad_norm": 0.09028996527194977, "learning_rate": 0.01, "loss": 2.0231, "step": 37890 }, { "epoch": 3.8936498150431564, "grad_norm": 0.06257162988185883, "learning_rate": 0.01, "loss": 2.0181, "step": 37893 }, { "epoch": 3.8939580764488286, "grad_norm": 0.056669414043426514, "learning_rate": 0.01, "loss": 2.0005, "step": 37896 }, { "epoch": 3.894266337854501, "grad_norm": 0.09135915338993073, "learning_rate": 0.01, "loss": 2.0008, "step": 37899 }, { "epoch": 3.8945745992601726, "grad_norm": 0.055741071701049805, "learning_rate": 0.01, "loss": 1.9884, "step": 37902 }, { "epoch": 3.8948828606658448, "grad_norm": 0.06624908000230789, "learning_rate": 0.01, "loss": 2.0107, "step": 37905 }, { "epoch": 3.8951911220715165, "grad_norm": 0.030004529282450676, "learning_rate": 0.01, "loss": 2.0076, "step": 37908 }, { "epoch": 3.8954993834771887, "grad_norm": 0.04271325841546059, "learning_rate": 0.01, "loss": 1.9966, "step": 37911 }, { "epoch": 3.8958076448828605, "grad_norm": 0.04701056331396103, "learning_rate": 0.01, "loss": 1.9912, "step": 37914 }, { "epoch": 3.8961159062885327, "grad_norm": 0.04298432916402817, "learning_rate": 0.01, "loss": 1.9828, "step": 37917 }, { "epoch": 3.896424167694205, "grad_norm": 0.04561863839626312, "learning_rate": 0.01, "loss": 1.9627, "step": 37920 }, { "epoch": 3.8967324290998766, "grad_norm": 0.12965039908885956, "learning_rate": 0.01, "loss": 2.0225, "step": 37923 }, { "epoch": 3.897040690505549, "grad_norm": 0.1109326183795929, "learning_rate": 0.01, "loss": 2.0033, "step": 37926 }, { "epoch": 3.8973489519112205, "grad_norm": 0.06224660202860832, "learning_rate": 0.01, "loss": 2.0008, "step": 37929 }, { "epoch": 3.8976572133168927, "grad_norm": 0.06318973749876022, "learning_rate": 0.01, "loss": 2.0092, "step": 37932 }, { "epoch": 3.8979654747225645, "grad_norm": 0.06799352914094925, "learning_rate": 0.01, "loss": 2.0034, "step": 37935 }, { "epoch": 3.8982737361282367, "grad_norm": 0.07071252167224884, "learning_rate": 0.01, "loss": 2.0239, "step": 37938 }, { "epoch": 3.898581997533909, "grad_norm": 0.03879907727241516, "learning_rate": 0.01, "loss": 1.9725, "step": 37941 }, { "epoch": 3.8988902589395806, "grad_norm": 0.048606619238853455, "learning_rate": 0.01, "loss": 2.0106, "step": 37944 }, { "epoch": 3.899198520345253, "grad_norm": 0.09814203530550003, "learning_rate": 0.01, "loss": 1.9919, "step": 37947 }, { "epoch": 3.899506781750925, "grad_norm": 0.04557656869292259, "learning_rate": 0.01, "loss": 1.98, "step": 37950 }, { "epoch": 3.8998150431565968, "grad_norm": 0.04122527688741684, "learning_rate": 0.01, "loss": 2.0131, "step": 37953 }, { "epoch": 3.900123304562269, "grad_norm": 0.060118671506643295, "learning_rate": 0.01, "loss": 2.0066, "step": 37956 }, { "epoch": 3.9004315659679407, "grad_norm": 0.14660358428955078, "learning_rate": 0.01, "loss": 1.9894, "step": 37959 }, { "epoch": 3.900739827373613, "grad_norm": 0.09819690883159637, "learning_rate": 0.01, "loss": 1.9912, "step": 37962 }, { "epoch": 3.9010480887792847, "grad_norm": 0.041503068059682846, "learning_rate": 0.01, "loss": 1.9565, "step": 37965 }, { "epoch": 3.901356350184957, "grad_norm": 0.04021459445357323, "learning_rate": 0.01, "loss": 2.0216, "step": 37968 }, { "epoch": 3.901664611590629, "grad_norm": 0.04789562150835991, "learning_rate": 0.01, "loss": 2.021, "step": 37971 }, { "epoch": 3.901972872996301, "grad_norm": 0.03949809446930885, "learning_rate": 0.01, "loss": 1.9655, "step": 37974 }, { "epoch": 3.902281134401973, "grad_norm": 0.041829485446214676, "learning_rate": 0.01, "loss": 2.0131, "step": 37977 }, { "epoch": 3.9025893958076447, "grad_norm": 0.13577204942703247, "learning_rate": 0.01, "loss": 1.9844, "step": 37980 }, { "epoch": 3.902897657213317, "grad_norm": 0.03821146488189697, "learning_rate": 0.01, "loss": 2.0024, "step": 37983 }, { "epoch": 3.9032059186189887, "grad_norm": 0.07382892072200775, "learning_rate": 0.01, "loss": 2.0065, "step": 37986 }, { "epoch": 3.903514180024661, "grad_norm": 0.04287475720047951, "learning_rate": 0.01, "loss": 1.9884, "step": 37989 }, { "epoch": 3.903822441430333, "grad_norm": 0.08588143438100815, "learning_rate": 0.01, "loss": 2.0178, "step": 37992 }, { "epoch": 3.904130702836005, "grad_norm": 0.060327284038066864, "learning_rate": 0.01, "loss": 1.9785, "step": 37995 }, { "epoch": 3.904438964241677, "grad_norm": 0.050726667046546936, "learning_rate": 0.01, "loss": 1.9798, "step": 37998 }, { "epoch": 3.904747225647349, "grad_norm": 0.0876336544752121, "learning_rate": 0.01, "loss": 1.9977, "step": 38001 }, { "epoch": 3.905055487053021, "grad_norm": 0.05690234154462814, "learning_rate": 0.01, "loss": 1.9835, "step": 38004 }, { "epoch": 3.9053637484586927, "grad_norm": 0.07977151870727539, "learning_rate": 0.01, "loss": 2.0067, "step": 38007 }, { "epoch": 3.905672009864365, "grad_norm": 0.038005661219358444, "learning_rate": 0.01, "loss": 2.0113, "step": 38010 }, { "epoch": 3.905980271270037, "grad_norm": 0.1033119261264801, "learning_rate": 0.01, "loss": 1.9964, "step": 38013 }, { "epoch": 3.906288532675709, "grad_norm": 0.05024554207921028, "learning_rate": 0.01, "loss": 1.9703, "step": 38016 }, { "epoch": 3.906596794081381, "grad_norm": 0.07625278830528259, "learning_rate": 0.01, "loss": 1.9895, "step": 38019 }, { "epoch": 3.9069050554870532, "grad_norm": 0.10337291657924652, "learning_rate": 0.01, "loss": 2.0167, "step": 38022 }, { "epoch": 3.907213316892725, "grad_norm": 0.04415413364768028, "learning_rate": 0.01, "loss": 2.0055, "step": 38025 }, { "epoch": 3.907521578298397, "grad_norm": 0.0347541943192482, "learning_rate": 0.01, "loss": 2.0018, "step": 38028 }, { "epoch": 3.907829839704069, "grad_norm": 0.036209288984537125, "learning_rate": 0.01, "loss": 1.9647, "step": 38031 }, { "epoch": 3.908138101109741, "grad_norm": 0.0959850400686264, "learning_rate": 0.01, "loss": 2.0013, "step": 38034 }, { "epoch": 3.908446362515413, "grad_norm": 0.05654274299740791, "learning_rate": 0.01, "loss": 1.983, "step": 38037 }, { "epoch": 3.908754623921085, "grad_norm": 0.11333990842103958, "learning_rate": 0.01, "loss": 1.9921, "step": 38040 }, { "epoch": 3.9090628853267573, "grad_norm": 0.05329067260026932, "learning_rate": 0.01, "loss": 2.0203, "step": 38043 }, { "epoch": 3.909371146732429, "grad_norm": 0.03489474579691887, "learning_rate": 0.01, "loss": 1.9992, "step": 38046 }, { "epoch": 3.909679408138101, "grad_norm": 0.044765155762434006, "learning_rate": 0.01, "loss": 1.9994, "step": 38049 }, { "epoch": 3.909987669543773, "grad_norm": 0.11689729988574982, "learning_rate": 0.01, "loss": 2.0322, "step": 38052 }, { "epoch": 3.910295930949445, "grad_norm": 0.04785189777612686, "learning_rate": 0.01, "loss": 2.0103, "step": 38055 }, { "epoch": 3.910604192355117, "grad_norm": 0.040157750248909, "learning_rate": 0.01, "loss": 1.9836, "step": 38058 }, { "epoch": 3.910912453760789, "grad_norm": 0.04107039049267769, "learning_rate": 0.01, "loss": 2.0106, "step": 38061 }, { "epoch": 3.9112207151664613, "grad_norm": 0.1298598051071167, "learning_rate": 0.01, "loss": 1.9866, "step": 38064 }, { "epoch": 3.911528976572133, "grad_norm": 0.04832058027386665, "learning_rate": 0.01, "loss": 1.9831, "step": 38067 }, { "epoch": 3.9118372379778052, "grad_norm": 0.035566527396440506, "learning_rate": 0.01, "loss": 2.0097, "step": 38070 }, { "epoch": 3.9121454993834774, "grad_norm": 0.12645745277404785, "learning_rate": 0.01, "loss": 1.9978, "step": 38073 }, { "epoch": 3.912453760789149, "grad_norm": 0.13044588267803192, "learning_rate": 0.01, "loss": 1.9921, "step": 38076 }, { "epoch": 3.912762022194821, "grad_norm": 0.0751858800649643, "learning_rate": 0.01, "loss": 2.024, "step": 38079 }, { "epoch": 3.913070283600493, "grad_norm": 0.06417164206504822, "learning_rate": 0.01, "loss": 2.0241, "step": 38082 }, { "epoch": 3.9133785450061653, "grad_norm": 0.03895511105656624, "learning_rate": 0.01, "loss": 2.0074, "step": 38085 }, { "epoch": 3.913686806411837, "grad_norm": 0.03581606224179268, "learning_rate": 0.01, "loss": 1.9923, "step": 38088 }, { "epoch": 3.9139950678175093, "grad_norm": 0.02936650812625885, "learning_rate": 0.01, "loss": 1.989, "step": 38091 }, { "epoch": 3.9143033292231815, "grad_norm": 0.036661721765995026, "learning_rate": 0.01, "loss": 2.009, "step": 38094 }, { "epoch": 3.914611590628853, "grad_norm": 0.08295129984617233, "learning_rate": 0.01, "loss": 2.0141, "step": 38097 }, { "epoch": 3.9149198520345254, "grad_norm": 0.09887045621871948, "learning_rate": 0.01, "loss": 2.0028, "step": 38100 }, { "epoch": 3.915228113440197, "grad_norm": 0.0821511521935463, "learning_rate": 0.01, "loss": 1.9886, "step": 38103 }, { "epoch": 3.9155363748458694, "grad_norm": 0.0894683301448822, "learning_rate": 0.01, "loss": 2.0059, "step": 38106 }, { "epoch": 3.915844636251541, "grad_norm": 0.043715961277484894, "learning_rate": 0.01, "loss": 2.0197, "step": 38109 }, { "epoch": 3.9161528976572133, "grad_norm": 0.06341227144002914, "learning_rate": 0.01, "loss": 2.004, "step": 38112 }, { "epoch": 3.9164611590628855, "grad_norm": 0.10574996471405029, "learning_rate": 0.01, "loss": 1.9834, "step": 38115 }, { "epoch": 3.9167694204685573, "grad_norm": 0.07099676132202148, "learning_rate": 0.01, "loss": 1.9827, "step": 38118 }, { "epoch": 3.9170776818742294, "grad_norm": 0.051802802830934525, "learning_rate": 0.01, "loss": 2.0185, "step": 38121 }, { "epoch": 3.917385943279901, "grad_norm": 0.059143371880054474, "learning_rate": 0.01, "loss": 2.0009, "step": 38124 }, { "epoch": 3.9176942046855734, "grad_norm": 0.07478468120098114, "learning_rate": 0.01, "loss": 1.99, "step": 38127 }, { "epoch": 3.918002466091245, "grad_norm": 0.05269934609532356, "learning_rate": 0.01, "loss": 2.002, "step": 38130 }, { "epoch": 3.9183107274969173, "grad_norm": 0.03774020075798035, "learning_rate": 0.01, "loss": 1.9863, "step": 38133 }, { "epoch": 3.9186189889025895, "grad_norm": 0.06620101630687714, "learning_rate": 0.01, "loss": 1.9894, "step": 38136 }, { "epoch": 3.9189272503082613, "grad_norm": 0.13302361965179443, "learning_rate": 0.01, "loss": 1.9997, "step": 38139 }, { "epoch": 3.9192355117139335, "grad_norm": 0.05435695871710777, "learning_rate": 0.01, "loss": 1.9911, "step": 38142 }, { "epoch": 3.9195437731196057, "grad_norm": 0.041471030563116074, "learning_rate": 0.01, "loss": 1.9936, "step": 38145 }, { "epoch": 3.9198520345252774, "grad_norm": 0.032130394130945206, "learning_rate": 0.01, "loss": 1.9983, "step": 38148 }, { "epoch": 3.920160295930949, "grad_norm": 0.032780006527900696, "learning_rate": 0.01, "loss": 1.9729, "step": 38151 }, { "epoch": 3.9204685573366214, "grad_norm": 0.03699677437543869, "learning_rate": 0.01, "loss": 2.0168, "step": 38154 }, { "epoch": 3.9207768187422936, "grad_norm": 0.04473430663347244, "learning_rate": 0.01, "loss": 2.0035, "step": 38157 }, { "epoch": 3.9210850801479653, "grad_norm": 0.06131662428379059, "learning_rate": 0.01, "loss": 2.0053, "step": 38160 }, { "epoch": 3.9213933415536375, "grad_norm": 0.10085607320070267, "learning_rate": 0.01, "loss": 1.9963, "step": 38163 }, { "epoch": 3.9217016029593097, "grad_norm": 0.11734067648649216, "learning_rate": 0.01, "loss": 1.9955, "step": 38166 }, { "epoch": 3.9220098643649814, "grad_norm": 0.09287893027067184, "learning_rate": 0.01, "loss": 1.9914, "step": 38169 }, { "epoch": 3.9223181257706536, "grad_norm": 0.058617495000362396, "learning_rate": 0.01, "loss": 1.9977, "step": 38172 }, { "epoch": 3.9226263871763254, "grad_norm": 0.0597362145781517, "learning_rate": 0.01, "loss": 1.9811, "step": 38175 }, { "epoch": 3.9229346485819976, "grad_norm": 0.046750448644161224, "learning_rate": 0.01, "loss": 1.9766, "step": 38178 }, { "epoch": 3.9232429099876693, "grad_norm": 0.03863513097167015, "learning_rate": 0.01, "loss": 2.0081, "step": 38181 }, { "epoch": 3.9235511713933415, "grad_norm": 0.03276417404413223, "learning_rate": 0.01, "loss": 2.0101, "step": 38184 }, { "epoch": 3.9238594327990137, "grad_norm": 0.03270519897341728, "learning_rate": 0.01, "loss": 1.9997, "step": 38187 }, { "epoch": 3.9241676942046855, "grad_norm": 0.05820414423942566, "learning_rate": 0.01, "loss": 2.0085, "step": 38190 }, { "epoch": 3.9244759556103577, "grad_norm": 0.0589318610727787, "learning_rate": 0.01, "loss": 2.015, "step": 38193 }, { "epoch": 3.92478421701603, "grad_norm": 0.05013816058635712, "learning_rate": 0.01, "loss": 1.9994, "step": 38196 }, { "epoch": 3.9250924784217016, "grad_norm": 0.10934565216302872, "learning_rate": 0.01, "loss": 2.0149, "step": 38199 }, { "epoch": 3.9254007398273734, "grad_norm": 0.08114335685968399, "learning_rate": 0.01, "loss": 1.9721, "step": 38202 }, { "epoch": 3.9257090012330456, "grad_norm": 0.056287068873643875, "learning_rate": 0.01, "loss": 1.9753, "step": 38205 }, { "epoch": 3.9260172626387178, "grad_norm": 0.04509197920560837, "learning_rate": 0.01, "loss": 2.0047, "step": 38208 }, { "epoch": 3.9263255240443895, "grad_norm": 0.04409080743789673, "learning_rate": 0.01, "loss": 1.9804, "step": 38211 }, { "epoch": 3.9266337854500617, "grad_norm": 0.044174257665872574, "learning_rate": 0.01, "loss": 1.9897, "step": 38214 }, { "epoch": 3.926942046855734, "grad_norm": 0.05122366175055504, "learning_rate": 0.01, "loss": 1.9924, "step": 38217 }, { "epoch": 3.9272503082614056, "grad_norm": 0.12582433223724365, "learning_rate": 0.01, "loss": 1.9903, "step": 38220 }, { "epoch": 3.927558569667078, "grad_norm": 0.060516782104969025, "learning_rate": 0.01, "loss": 2.0088, "step": 38223 }, { "epoch": 3.9278668310727496, "grad_norm": 0.04210484027862549, "learning_rate": 0.01, "loss": 2.0051, "step": 38226 }, { "epoch": 3.928175092478422, "grad_norm": 0.043509434908628464, "learning_rate": 0.01, "loss": 1.9963, "step": 38229 }, { "epoch": 3.9284833538840935, "grad_norm": 0.03601152077317238, "learning_rate": 0.01, "loss": 1.9945, "step": 38232 }, { "epoch": 3.9287916152897657, "grad_norm": 0.04590906575322151, "learning_rate": 0.01, "loss": 2.01, "step": 38235 }, { "epoch": 3.929099876695438, "grad_norm": 0.06336984038352966, "learning_rate": 0.01, "loss": 1.9757, "step": 38238 }, { "epoch": 3.9294081381011097, "grad_norm": 0.07014600187540054, "learning_rate": 0.01, "loss": 1.9891, "step": 38241 }, { "epoch": 3.929716399506782, "grad_norm": 0.07760234922170639, "learning_rate": 0.01, "loss": 2.0032, "step": 38244 }, { "epoch": 3.9300246609124536, "grad_norm": 0.1088213101029396, "learning_rate": 0.01, "loss": 2.0149, "step": 38247 }, { "epoch": 3.930332922318126, "grad_norm": 0.04306039586663246, "learning_rate": 0.01, "loss": 2.014, "step": 38250 }, { "epoch": 3.9306411837237976, "grad_norm": 0.057269759476184845, "learning_rate": 0.01, "loss": 1.9873, "step": 38253 }, { "epoch": 3.9309494451294698, "grad_norm": 0.04819104075431824, "learning_rate": 0.01, "loss": 2.011, "step": 38256 }, { "epoch": 3.931257706535142, "grad_norm": 0.038784999400377274, "learning_rate": 0.01, "loss": 1.9981, "step": 38259 }, { "epoch": 3.9315659679408137, "grad_norm": 0.08599941432476044, "learning_rate": 0.01, "loss": 1.9838, "step": 38262 }, { "epoch": 3.931874229346486, "grad_norm": 0.10680285841226578, "learning_rate": 0.01, "loss": 1.9891, "step": 38265 }, { "epoch": 3.932182490752158, "grad_norm": 0.09683788567781448, "learning_rate": 0.01, "loss": 2.0191, "step": 38268 }, { "epoch": 3.93249075215783, "grad_norm": 0.040799640119075775, "learning_rate": 0.01, "loss": 1.9741, "step": 38271 }, { "epoch": 3.9327990135635016, "grad_norm": 0.08265798538923264, "learning_rate": 0.01, "loss": 2.0359, "step": 38274 }, { "epoch": 3.933107274969174, "grad_norm": 0.0628572627902031, "learning_rate": 0.01, "loss": 2.018, "step": 38277 }, { "epoch": 3.933415536374846, "grad_norm": 0.06475923955440521, "learning_rate": 0.01, "loss": 2.0023, "step": 38280 }, { "epoch": 3.9337237977805177, "grad_norm": 0.043740466237068176, "learning_rate": 0.01, "loss": 2.0003, "step": 38283 }, { "epoch": 3.93403205918619, "grad_norm": 0.029720323160290718, "learning_rate": 0.01, "loss": 1.9905, "step": 38286 }, { "epoch": 3.934340320591862, "grad_norm": 0.050941772758960724, "learning_rate": 0.01, "loss": 1.9757, "step": 38289 }, { "epoch": 3.934648581997534, "grad_norm": 0.1068761870265007, "learning_rate": 0.01, "loss": 2.0054, "step": 38292 }, { "epoch": 3.934956843403206, "grad_norm": 0.06410616636276245, "learning_rate": 0.01, "loss": 1.9931, "step": 38295 }, { "epoch": 3.935265104808878, "grad_norm": 0.10598262399435043, "learning_rate": 0.01, "loss": 2.0068, "step": 38298 }, { "epoch": 3.93557336621455, "grad_norm": 0.05296116694808006, "learning_rate": 0.01, "loss": 1.9625, "step": 38301 }, { "epoch": 3.9358816276202218, "grad_norm": 0.04025321081280708, "learning_rate": 0.01, "loss": 2.0119, "step": 38304 }, { "epoch": 3.936189889025894, "grad_norm": 0.11896882951259613, "learning_rate": 0.01, "loss": 2.0066, "step": 38307 }, { "epoch": 3.936498150431566, "grad_norm": 0.07203146070241928, "learning_rate": 0.01, "loss": 1.9781, "step": 38310 }, { "epoch": 3.936806411837238, "grad_norm": 0.05813155323266983, "learning_rate": 0.01, "loss": 1.9872, "step": 38313 }, { "epoch": 3.93711467324291, "grad_norm": 0.0439312644302845, "learning_rate": 0.01, "loss": 1.9977, "step": 38316 }, { "epoch": 3.937422934648582, "grad_norm": 0.05411090329289436, "learning_rate": 0.01, "loss": 2.0366, "step": 38319 }, { "epoch": 3.937731196054254, "grad_norm": 0.035287659615278244, "learning_rate": 0.01, "loss": 2.0147, "step": 38322 }, { "epoch": 3.938039457459926, "grad_norm": 0.08188273012638092, "learning_rate": 0.01, "loss": 1.9825, "step": 38325 }, { "epoch": 3.938347718865598, "grad_norm": 0.06879545003175735, "learning_rate": 0.01, "loss": 2.0037, "step": 38328 }, { "epoch": 3.93865598027127, "grad_norm": 0.06965189427137375, "learning_rate": 0.01, "loss": 1.9757, "step": 38331 }, { "epoch": 3.938964241676942, "grad_norm": 0.07698633521795273, "learning_rate": 0.01, "loss": 1.9823, "step": 38334 }, { "epoch": 3.939272503082614, "grad_norm": 0.06989213079214096, "learning_rate": 0.01, "loss": 1.9765, "step": 38337 }, { "epoch": 3.9395807644882863, "grad_norm": 0.09224730730056763, "learning_rate": 0.01, "loss": 1.9898, "step": 38340 }, { "epoch": 3.939889025893958, "grad_norm": 0.15504223108291626, "learning_rate": 0.01, "loss": 2.0109, "step": 38343 }, { "epoch": 3.94019728729963, "grad_norm": 0.06935428082942963, "learning_rate": 0.01, "loss": 2.0157, "step": 38346 }, { "epoch": 3.940505548705302, "grad_norm": 0.051906321197748184, "learning_rate": 0.01, "loss": 1.9971, "step": 38349 }, { "epoch": 3.940813810110974, "grad_norm": 0.059517163783311844, "learning_rate": 0.01, "loss": 1.9764, "step": 38352 }, { "epoch": 3.941122071516646, "grad_norm": 0.031905319541692734, "learning_rate": 0.01, "loss": 1.9958, "step": 38355 }, { "epoch": 3.941430332922318, "grad_norm": 0.06473187357187271, "learning_rate": 0.01, "loss": 1.9852, "step": 38358 }, { "epoch": 3.9417385943279903, "grad_norm": 0.0756862536072731, "learning_rate": 0.01, "loss": 2.0025, "step": 38361 }, { "epoch": 3.942046855733662, "grad_norm": 0.06513907015323639, "learning_rate": 0.01, "loss": 2.0076, "step": 38364 }, { "epoch": 3.9423551171393343, "grad_norm": 0.0908990353345871, "learning_rate": 0.01, "loss": 1.9868, "step": 38367 }, { "epoch": 3.942663378545006, "grad_norm": 0.06427323073148727, "learning_rate": 0.01, "loss": 1.9848, "step": 38370 }, { "epoch": 3.9429716399506782, "grad_norm": 0.09153769910335541, "learning_rate": 0.01, "loss": 1.9816, "step": 38373 }, { "epoch": 3.94327990135635, "grad_norm": 0.0451352633535862, "learning_rate": 0.01, "loss": 1.9973, "step": 38376 }, { "epoch": 3.943588162762022, "grad_norm": 0.06844813376665115, "learning_rate": 0.01, "loss": 1.9987, "step": 38379 }, { "epoch": 3.9438964241676944, "grad_norm": 0.07901204377412796, "learning_rate": 0.01, "loss": 2.0011, "step": 38382 }, { "epoch": 3.944204685573366, "grad_norm": 0.08094186335802078, "learning_rate": 0.01, "loss": 2.0076, "step": 38385 }, { "epoch": 3.9445129469790383, "grad_norm": 0.09053739905357361, "learning_rate": 0.01, "loss": 1.9936, "step": 38388 }, { "epoch": 3.9448212083847105, "grad_norm": 0.04751008749008179, "learning_rate": 0.01, "loss": 1.993, "step": 38391 }, { "epoch": 3.9451294697903823, "grad_norm": 0.042345285415649414, "learning_rate": 0.01, "loss": 1.9857, "step": 38394 }, { "epoch": 3.945437731196054, "grad_norm": 0.1027415469288826, "learning_rate": 0.01, "loss": 1.9761, "step": 38397 }, { "epoch": 3.945745992601726, "grad_norm": 0.12780915200710297, "learning_rate": 0.01, "loss": 2.0122, "step": 38400 }, { "epoch": 3.9460542540073984, "grad_norm": 0.0731390118598938, "learning_rate": 0.01, "loss": 2.0005, "step": 38403 }, { "epoch": 3.94636251541307, "grad_norm": 0.07617928087711334, "learning_rate": 0.01, "loss": 2.0131, "step": 38406 }, { "epoch": 3.9466707768187423, "grad_norm": 0.049755457788705826, "learning_rate": 0.01, "loss": 1.9756, "step": 38409 }, { "epoch": 3.9469790382244145, "grad_norm": 0.043812233954668045, "learning_rate": 0.01, "loss": 1.9935, "step": 38412 }, { "epoch": 3.9472872996300863, "grad_norm": 0.07829032093286514, "learning_rate": 0.01, "loss": 2.0004, "step": 38415 }, { "epoch": 3.9475955610357585, "grad_norm": 0.056485142558813095, "learning_rate": 0.01, "loss": 2.0117, "step": 38418 }, { "epoch": 3.9479038224414302, "grad_norm": 0.09569665789604187, "learning_rate": 0.01, "loss": 1.9906, "step": 38421 }, { "epoch": 3.9482120838471024, "grad_norm": 0.07466506212949753, "learning_rate": 0.01, "loss": 2.0046, "step": 38424 }, { "epoch": 3.948520345252774, "grad_norm": 0.08420269191265106, "learning_rate": 0.01, "loss": 2.0009, "step": 38427 }, { "epoch": 3.9488286066584464, "grad_norm": 0.05188721418380737, "learning_rate": 0.01, "loss": 2.0409, "step": 38430 }, { "epoch": 3.9491368680641186, "grad_norm": 0.042890697717666626, "learning_rate": 0.01, "loss": 1.9798, "step": 38433 }, { "epoch": 3.9494451294697903, "grad_norm": 0.05752531811594963, "learning_rate": 0.01, "loss": 1.975, "step": 38436 }, { "epoch": 3.9497533908754625, "grad_norm": 0.0403323695063591, "learning_rate": 0.01, "loss": 1.9951, "step": 38439 }, { "epoch": 3.9500616522811343, "grad_norm": 0.035122547298669815, "learning_rate": 0.01, "loss": 1.9995, "step": 38442 }, { "epoch": 3.9503699136868065, "grad_norm": 0.04063395410776138, "learning_rate": 0.01, "loss": 2.0264, "step": 38445 }, { "epoch": 3.950678175092478, "grad_norm": 0.05300283804535866, "learning_rate": 0.01, "loss": 2.0159, "step": 38448 }, { "epoch": 3.9509864364981504, "grad_norm": 0.06941550225019455, "learning_rate": 0.01, "loss": 2.0155, "step": 38451 }, { "epoch": 3.9512946979038226, "grad_norm": 0.04268152639269829, "learning_rate": 0.01, "loss": 2.0116, "step": 38454 }, { "epoch": 3.9516029593094943, "grad_norm": 0.058871544897556305, "learning_rate": 0.01, "loss": 1.9888, "step": 38457 }, { "epoch": 3.9519112207151665, "grad_norm": 0.0643804594874382, "learning_rate": 0.01, "loss": 1.9971, "step": 38460 }, { "epoch": 3.9522194821208387, "grad_norm": 0.04509326070547104, "learning_rate": 0.01, "loss": 2.0231, "step": 38463 }, { "epoch": 3.9525277435265105, "grad_norm": 0.04596889764070511, "learning_rate": 0.01, "loss": 1.9926, "step": 38466 }, { "epoch": 3.9528360049321822, "grad_norm": 0.08639726787805557, "learning_rate": 0.01, "loss": 1.973, "step": 38469 }, { "epoch": 3.9531442663378544, "grad_norm": 0.08404930680990219, "learning_rate": 0.01, "loss": 1.9867, "step": 38472 }, { "epoch": 3.9534525277435266, "grad_norm": 0.09206783026456833, "learning_rate": 0.01, "loss": 1.968, "step": 38475 }, { "epoch": 3.9537607891491984, "grad_norm": 0.041316352784633636, "learning_rate": 0.01, "loss": 1.9764, "step": 38478 }, { "epoch": 3.9540690505548706, "grad_norm": 0.07000883668661118, "learning_rate": 0.01, "loss": 2.0071, "step": 38481 }, { "epoch": 3.9543773119605428, "grad_norm": 0.048716410994529724, "learning_rate": 0.01, "loss": 1.9984, "step": 38484 }, { "epoch": 3.9546855733662145, "grad_norm": 0.05251453444361687, "learning_rate": 0.01, "loss": 2.0007, "step": 38487 }, { "epoch": 3.9549938347718867, "grad_norm": 0.04517770931124687, "learning_rate": 0.01, "loss": 1.9998, "step": 38490 }, { "epoch": 3.9553020961775585, "grad_norm": 0.07227790355682373, "learning_rate": 0.01, "loss": 1.9965, "step": 38493 }, { "epoch": 3.9556103575832307, "grad_norm": 0.09953956305980682, "learning_rate": 0.01, "loss": 1.9875, "step": 38496 }, { "epoch": 3.9559186189889024, "grad_norm": 0.06194831430912018, "learning_rate": 0.01, "loss": 2.0008, "step": 38499 }, { "epoch": 3.9562268803945746, "grad_norm": 0.10566883534193039, "learning_rate": 0.01, "loss": 1.9839, "step": 38502 }, { "epoch": 3.956535141800247, "grad_norm": 0.04950516298413277, "learning_rate": 0.01, "loss": 1.9868, "step": 38505 }, { "epoch": 3.9568434032059185, "grad_norm": 0.04657790809869766, "learning_rate": 0.01, "loss": 1.9794, "step": 38508 }, { "epoch": 3.9571516646115907, "grad_norm": 0.05642826855182648, "learning_rate": 0.01, "loss": 1.9739, "step": 38511 }, { "epoch": 3.9574599260172625, "grad_norm": 0.049003373831510544, "learning_rate": 0.01, "loss": 1.9892, "step": 38514 }, { "epoch": 3.9577681874229347, "grad_norm": 0.05019281804561615, "learning_rate": 0.01, "loss": 1.9758, "step": 38517 }, { "epoch": 3.9580764488286064, "grad_norm": 0.05060233548283577, "learning_rate": 0.01, "loss": 2.0093, "step": 38520 }, { "epoch": 3.9583847102342786, "grad_norm": 0.051873739808797836, "learning_rate": 0.01, "loss": 1.998, "step": 38523 }, { "epoch": 3.958692971639951, "grad_norm": 0.11860267072916031, "learning_rate": 0.01, "loss": 1.9988, "step": 38526 }, { "epoch": 3.9590012330456226, "grad_norm": 0.03894282132387161, "learning_rate": 0.01, "loss": 2.0014, "step": 38529 }, { "epoch": 3.9593094944512948, "grad_norm": 0.0839400663971901, "learning_rate": 0.01, "loss": 1.9988, "step": 38532 }, { "epoch": 3.959617755856967, "grad_norm": 0.10270547866821289, "learning_rate": 0.01, "loss": 1.9829, "step": 38535 }, { "epoch": 3.9599260172626387, "grad_norm": 0.07309349626302719, "learning_rate": 0.01, "loss": 2.0124, "step": 38538 }, { "epoch": 3.9602342786683105, "grad_norm": 0.03441464155912399, "learning_rate": 0.01, "loss": 2.0036, "step": 38541 }, { "epoch": 3.9605425400739827, "grad_norm": 0.04434891417622566, "learning_rate": 0.01, "loss": 1.9907, "step": 38544 }, { "epoch": 3.960850801479655, "grad_norm": 0.030151626095175743, "learning_rate": 0.01, "loss": 1.9991, "step": 38547 }, { "epoch": 3.9611590628853266, "grad_norm": 0.03249426558613777, "learning_rate": 0.01, "loss": 1.9883, "step": 38550 }, { "epoch": 3.961467324290999, "grad_norm": 0.06774075329303741, "learning_rate": 0.01, "loss": 1.9894, "step": 38553 }, { "epoch": 3.961775585696671, "grad_norm": 0.1510474681854248, "learning_rate": 0.01, "loss": 1.989, "step": 38556 }, { "epoch": 3.9620838471023427, "grad_norm": 0.080832839012146, "learning_rate": 0.01, "loss": 2.0033, "step": 38559 }, { "epoch": 3.962392108508015, "grad_norm": 0.08914501219987869, "learning_rate": 0.01, "loss": 2.0032, "step": 38562 }, { "epoch": 3.9627003699136867, "grad_norm": 0.10509749501943588, "learning_rate": 0.01, "loss": 1.9611, "step": 38565 }, { "epoch": 3.963008631319359, "grad_norm": 0.0980307012796402, "learning_rate": 0.01, "loss": 2.006, "step": 38568 }, { "epoch": 3.9633168927250306, "grad_norm": 0.04917595908045769, "learning_rate": 0.01, "loss": 2.0061, "step": 38571 }, { "epoch": 3.963625154130703, "grad_norm": 0.04090237617492676, "learning_rate": 0.01, "loss": 2.0112, "step": 38574 }, { "epoch": 3.963933415536375, "grad_norm": 0.06782566010951996, "learning_rate": 0.01, "loss": 2.0158, "step": 38577 }, { "epoch": 3.9642416769420468, "grad_norm": 0.04495246335864067, "learning_rate": 0.01, "loss": 1.9784, "step": 38580 }, { "epoch": 3.964549938347719, "grad_norm": 0.11113790422677994, "learning_rate": 0.01, "loss": 1.9719, "step": 38583 }, { "epoch": 3.9648581997533907, "grad_norm": 0.09196839481592178, "learning_rate": 0.01, "loss": 1.997, "step": 38586 }, { "epoch": 3.965166461159063, "grad_norm": 0.07392636686563492, "learning_rate": 0.01, "loss": 2.0102, "step": 38589 }, { "epoch": 3.9654747225647347, "grad_norm": 0.0521097257733345, "learning_rate": 0.01, "loss": 1.983, "step": 38592 }, { "epoch": 3.965782983970407, "grad_norm": 0.05052189901471138, "learning_rate": 0.01, "loss": 1.9929, "step": 38595 }, { "epoch": 3.966091245376079, "grad_norm": 0.08592119067907333, "learning_rate": 0.01, "loss": 1.9949, "step": 38598 }, { "epoch": 3.966399506781751, "grad_norm": 0.08535821735858917, "learning_rate": 0.01, "loss": 1.9743, "step": 38601 }, { "epoch": 3.966707768187423, "grad_norm": 0.06658685207366943, "learning_rate": 0.01, "loss": 2.0038, "step": 38604 }, { "epoch": 3.967016029593095, "grad_norm": 0.04116528108716011, "learning_rate": 0.01, "loss": 1.969, "step": 38607 }, { "epoch": 3.967324290998767, "grad_norm": 0.04124008119106293, "learning_rate": 0.01, "loss": 1.9889, "step": 38610 }, { "epoch": 3.967632552404439, "grad_norm": 0.047569263726472855, "learning_rate": 0.01, "loss": 1.9848, "step": 38613 }, { "epoch": 3.967940813810111, "grad_norm": 0.04800506308674812, "learning_rate": 0.01, "loss": 2.0008, "step": 38616 }, { "epoch": 3.968249075215783, "grad_norm": 0.06227673590183258, "learning_rate": 0.01, "loss": 2.0099, "step": 38619 }, { "epoch": 3.968557336621455, "grad_norm": 0.036228880286216736, "learning_rate": 0.01, "loss": 1.9945, "step": 38622 }, { "epoch": 3.968865598027127, "grad_norm": 0.03745630383491516, "learning_rate": 0.01, "loss": 1.9916, "step": 38625 }, { "epoch": 3.969173859432799, "grad_norm": 0.08907367289066315, "learning_rate": 0.01, "loss": 2.0278, "step": 38628 }, { "epoch": 3.969482120838471, "grad_norm": 0.054506488144397736, "learning_rate": 0.01, "loss": 2.0001, "step": 38631 }, { "epoch": 3.969790382244143, "grad_norm": 0.04185614362359047, "learning_rate": 0.01, "loss": 1.9847, "step": 38634 }, { "epoch": 3.970098643649815, "grad_norm": 0.04917627200484276, "learning_rate": 0.01, "loss": 2.0155, "step": 38637 }, { "epoch": 3.970406905055487, "grad_norm": 0.040258195251226425, "learning_rate": 0.01, "loss": 2.0119, "step": 38640 }, { "epoch": 3.970715166461159, "grad_norm": 0.08127589523792267, "learning_rate": 0.01, "loss": 1.9753, "step": 38643 }, { "epoch": 3.971023427866831, "grad_norm": 0.07298692315816879, "learning_rate": 0.01, "loss": 1.9958, "step": 38646 }, { "epoch": 3.9713316892725032, "grad_norm": 0.09801699966192245, "learning_rate": 0.01, "loss": 1.9747, "step": 38649 }, { "epoch": 3.971639950678175, "grad_norm": 0.10016069561243057, "learning_rate": 0.01, "loss": 1.9986, "step": 38652 }, { "epoch": 3.971948212083847, "grad_norm": 0.038826216012239456, "learning_rate": 0.01, "loss": 1.9768, "step": 38655 }, { "epoch": 3.9722564734895194, "grad_norm": 0.04124876856803894, "learning_rate": 0.01, "loss": 1.977, "step": 38658 }, { "epoch": 3.972564734895191, "grad_norm": 0.03699329122900963, "learning_rate": 0.01, "loss": 2.0101, "step": 38661 }, { "epoch": 3.972872996300863, "grad_norm": 0.03506563603878021, "learning_rate": 0.01, "loss": 2.0035, "step": 38664 }, { "epoch": 3.973181257706535, "grad_norm": 0.08274342864751816, "learning_rate": 0.01, "loss": 1.9819, "step": 38667 }, { "epoch": 3.9734895191122073, "grad_norm": 0.10225984454154968, "learning_rate": 0.01, "loss": 2.0115, "step": 38670 }, { "epoch": 3.973797780517879, "grad_norm": 0.08367688208818436, "learning_rate": 0.01, "loss": 1.9809, "step": 38673 }, { "epoch": 3.974106041923551, "grad_norm": 0.05682690069079399, "learning_rate": 0.01, "loss": 2.0012, "step": 38676 }, { "epoch": 3.9744143033292234, "grad_norm": 0.03980601951479912, "learning_rate": 0.01, "loss": 1.9668, "step": 38679 }, { "epoch": 3.974722564734895, "grad_norm": 0.03310983628034592, "learning_rate": 0.01, "loss": 1.9976, "step": 38682 }, { "epoch": 3.9750308261405674, "grad_norm": 0.05037367716431618, "learning_rate": 0.01, "loss": 1.9845, "step": 38685 }, { "epoch": 3.975339087546239, "grad_norm": 0.09068721532821655, "learning_rate": 0.01, "loss": 1.9848, "step": 38688 }, { "epoch": 3.9756473489519113, "grad_norm": 0.10251244902610779, "learning_rate": 0.01, "loss": 1.9981, "step": 38691 }, { "epoch": 3.975955610357583, "grad_norm": 0.04860818758606911, "learning_rate": 0.01, "loss": 2.0073, "step": 38694 }, { "epoch": 3.9762638717632552, "grad_norm": 0.0460125096142292, "learning_rate": 0.01, "loss": 1.9925, "step": 38697 }, { "epoch": 3.9765721331689274, "grad_norm": 0.03295229375362396, "learning_rate": 0.01, "loss": 1.9682, "step": 38700 }, { "epoch": 3.976880394574599, "grad_norm": 0.0434846356511116, "learning_rate": 0.01, "loss": 1.9944, "step": 38703 }, { "epoch": 3.9771886559802714, "grad_norm": 0.04077495262026787, "learning_rate": 0.01, "loss": 2.0107, "step": 38706 }, { "epoch": 3.977496917385943, "grad_norm": 0.08263571560382843, "learning_rate": 0.01, "loss": 2.0125, "step": 38709 }, { "epoch": 3.9778051787916153, "grad_norm": 0.1033141016960144, "learning_rate": 0.01, "loss": 2.02, "step": 38712 }, { "epoch": 3.978113440197287, "grad_norm": 0.12253855168819427, "learning_rate": 0.01, "loss": 2.0039, "step": 38715 }, { "epoch": 3.9784217016029593, "grad_norm": 0.08476614207029343, "learning_rate": 0.01, "loss": 1.974, "step": 38718 }, { "epoch": 3.9787299630086315, "grad_norm": 0.08248502016067505, "learning_rate": 0.01, "loss": 1.9934, "step": 38721 }, { "epoch": 3.979038224414303, "grad_norm": 0.06338318437337875, "learning_rate": 0.01, "loss": 1.9632, "step": 38724 }, { "epoch": 3.9793464858199754, "grad_norm": 0.061125461012125015, "learning_rate": 0.01, "loss": 1.9902, "step": 38727 }, { "epoch": 3.9796547472256476, "grad_norm": 0.04191330447793007, "learning_rate": 0.01, "loss": 1.9837, "step": 38730 }, { "epoch": 3.9799630086313194, "grad_norm": 0.04181262478232384, "learning_rate": 0.01, "loss": 1.9952, "step": 38733 }, { "epoch": 3.980271270036991, "grad_norm": 0.054846856743097305, "learning_rate": 0.01, "loss": 2.0109, "step": 38736 }, { "epoch": 3.9805795314426633, "grad_norm": 0.1322845071554184, "learning_rate": 0.01, "loss": 1.9626, "step": 38739 }, { "epoch": 3.9808877928483355, "grad_norm": 0.06795060634613037, "learning_rate": 0.01, "loss": 1.9972, "step": 38742 }, { "epoch": 3.9811960542540072, "grad_norm": 0.04729272425174713, "learning_rate": 0.01, "loss": 2.0027, "step": 38745 }, { "epoch": 3.9815043156596794, "grad_norm": 0.05951160192489624, "learning_rate": 0.01, "loss": 2.0016, "step": 38748 }, { "epoch": 3.9818125770653516, "grad_norm": 0.14003396034240723, "learning_rate": 0.01, "loss": 2.0166, "step": 38751 }, { "epoch": 3.9821208384710234, "grad_norm": 0.04996626079082489, "learning_rate": 0.01, "loss": 2.002, "step": 38754 }, { "epoch": 3.9824290998766956, "grad_norm": 0.08134348690509796, "learning_rate": 0.01, "loss": 2.0159, "step": 38757 }, { "epoch": 3.9827373612823673, "grad_norm": 0.04592986777424812, "learning_rate": 0.01, "loss": 2.0107, "step": 38760 }, { "epoch": 3.9830456226880395, "grad_norm": 0.06769111007452011, "learning_rate": 0.01, "loss": 1.998, "step": 38763 }, { "epoch": 3.9833538840937113, "grad_norm": 0.04501201957464218, "learning_rate": 0.01, "loss": 2.0177, "step": 38766 }, { "epoch": 3.9836621454993835, "grad_norm": 0.07144643366336823, "learning_rate": 0.01, "loss": 1.9948, "step": 38769 }, { "epoch": 3.9839704069050557, "grad_norm": 0.05670906975865364, "learning_rate": 0.01, "loss": 2.0011, "step": 38772 }, { "epoch": 3.9842786683107274, "grad_norm": 0.03870624676346779, "learning_rate": 0.01, "loss": 2.0089, "step": 38775 }, { "epoch": 3.9845869297163996, "grad_norm": 0.08053532242774963, "learning_rate": 0.01, "loss": 1.9955, "step": 38778 }, { "epoch": 3.9848951911220714, "grad_norm": 0.03753774240612984, "learning_rate": 0.01, "loss": 1.9866, "step": 38781 }, { "epoch": 3.9852034525277436, "grad_norm": 0.04568708315491676, "learning_rate": 0.01, "loss": 1.9811, "step": 38784 }, { "epoch": 3.9855117139334153, "grad_norm": 0.03626095503568649, "learning_rate": 0.01, "loss": 1.995, "step": 38787 }, { "epoch": 3.9858199753390875, "grad_norm": 0.04620308801531792, "learning_rate": 0.01, "loss": 2.0172, "step": 38790 }, { "epoch": 3.9861282367447597, "grad_norm": 0.05767418071627617, "learning_rate": 0.01, "loss": 1.9905, "step": 38793 }, { "epoch": 3.9864364981504314, "grad_norm": 0.04969481751322746, "learning_rate": 0.01, "loss": 1.9795, "step": 38796 }, { "epoch": 3.9867447595561036, "grad_norm": 0.0532878078520298, "learning_rate": 0.01, "loss": 1.9875, "step": 38799 }, { "epoch": 3.987053020961776, "grad_norm": 0.07379619777202606, "learning_rate": 0.01, "loss": 2.0124, "step": 38802 }, { "epoch": 3.9873612823674476, "grad_norm": 0.07538430392742157, "learning_rate": 0.01, "loss": 2.0105, "step": 38805 }, { "epoch": 3.9876695437731193, "grad_norm": 0.0686052069067955, "learning_rate": 0.01, "loss": 2.0059, "step": 38808 }, { "epoch": 3.9879778051787915, "grad_norm": 0.08726216852664948, "learning_rate": 0.01, "loss": 1.9803, "step": 38811 }, { "epoch": 3.9882860665844637, "grad_norm": 0.08535965532064438, "learning_rate": 0.01, "loss": 1.9997, "step": 38814 }, { "epoch": 3.9885943279901355, "grad_norm": 0.046852223575115204, "learning_rate": 0.01, "loss": 1.982, "step": 38817 }, { "epoch": 3.9889025893958077, "grad_norm": 0.06200144812464714, "learning_rate": 0.01, "loss": 2.0423, "step": 38820 }, { "epoch": 3.98921085080148, "grad_norm": 0.03675130382180214, "learning_rate": 0.01, "loss": 2.0003, "step": 38823 }, { "epoch": 3.9895191122071516, "grad_norm": 0.054221536964178085, "learning_rate": 0.01, "loss": 2.0149, "step": 38826 }, { "epoch": 3.989827373612824, "grad_norm": 0.0411151684820652, "learning_rate": 0.01, "loss": 2.0047, "step": 38829 }, { "epoch": 3.9901356350184956, "grad_norm": 0.03962259367108345, "learning_rate": 0.01, "loss": 2.01, "step": 38832 }, { "epoch": 3.9904438964241677, "grad_norm": 0.04097359627485275, "learning_rate": 0.01, "loss": 1.9974, "step": 38835 }, { "epoch": 3.9907521578298395, "grad_norm": 0.13092494010925293, "learning_rate": 0.01, "loss": 1.9936, "step": 38838 }, { "epoch": 3.9910604192355117, "grad_norm": 0.03308350592851639, "learning_rate": 0.01, "loss": 1.9941, "step": 38841 }, { "epoch": 3.991368680641184, "grad_norm": 0.06447092443704605, "learning_rate": 0.01, "loss": 2.0022, "step": 38844 }, { "epoch": 3.9916769420468556, "grad_norm": 0.06456363946199417, "learning_rate": 0.01, "loss": 2.0051, "step": 38847 }, { "epoch": 3.991985203452528, "grad_norm": 0.04208895191550255, "learning_rate": 0.01, "loss": 2.01, "step": 38850 }, { "epoch": 3.9922934648582, "grad_norm": 0.03307265415787697, "learning_rate": 0.01, "loss": 1.9962, "step": 38853 }, { "epoch": 3.9926017262638718, "grad_norm": 0.06227843463420868, "learning_rate": 0.01, "loss": 1.9864, "step": 38856 }, { "epoch": 3.9929099876695435, "grad_norm": 0.1568191796541214, "learning_rate": 0.01, "loss": 1.9979, "step": 38859 }, { "epoch": 3.9932182490752157, "grad_norm": 0.05974143370985985, "learning_rate": 0.01, "loss": 2.0018, "step": 38862 }, { "epoch": 3.993526510480888, "grad_norm": 0.07882939279079437, "learning_rate": 0.01, "loss": 2.0102, "step": 38865 }, { "epoch": 3.9938347718865597, "grad_norm": 0.053341448307037354, "learning_rate": 0.01, "loss": 2.0123, "step": 38868 }, { "epoch": 3.994143033292232, "grad_norm": 0.0673101395368576, "learning_rate": 0.01, "loss": 2.0183, "step": 38871 }, { "epoch": 3.994451294697904, "grad_norm": 0.06340447813272476, "learning_rate": 0.01, "loss": 1.9966, "step": 38874 }, { "epoch": 3.994759556103576, "grad_norm": 0.06743529438972473, "learning_rate": 0.01, "loss": 2.0047, "step": 38877 }, { "epoch": 3.995067817509248, "grad_norm": 0.044702883809804916, "learning_rate": 0.01, "loss": 1.9927, "step": 38880 }, { "epoch": 3.9953760789149197, "grad_norm": 0.038102682679891586, "learning_rate": 0.01, "loss": 2.0005, "step": 38883 }, { "epoch": 3.995684340320592, "grad_norm": 0.0471016988158226, "learning_rate": 0.01, "loss": 1.9687, "step": 38886 }, { "epoch": 3.9959926017262637, "grad_norm": 0.10289987921714783, "learning_rate": 0.01, "loss": 2.0056, "step": 38889 }, { "epoch": 3.996300863131936, "grad_norm": 0.08012085407972336, "learning_rate": 0.01, "loss": 2.011, "step": 38892 }, { "epoch": 3.996609124537608, "grad_norm": 0.07357537001371384, "learning_rate": 0.01, "loss": 1.9837, "step": 38895 }, { "epoch": 3.99691738594328, "grad_norm": 0.08909714221954346, "learning_rate": 0.01, "loss": 2.0116, "step": 38898 }, { "epoch": 3.997225647348952, "grad_norm": 0.044727593660354614, "learning_rate": 0.01, "loss": 2.0027, "step": 38901 }, { "epoch": 3.9975339087546238, "grad_norm": 0.039593808352947235, "learning_rate": 0.01, "loss": 2.0071, "step": 38904 }, { "epoch": 3.997842170160296, "grad_norm": 0.03478686884045601, "learning_rate": 0.01, "loss": 2.0185, "step": 38907 }, { "epoch": 3.9981504315659677, "grad_norm": 0.09729648381471634, "learning_rate": 0.01, "loss": 2.0008, "step": 38910 }, { "epoch": 3.99845869297164, "grad_norm": 0.12499833852052689, "learning_rate": 0.01, "loss": 1.9861, "step": 38913 }, { "epoch": 3.998766954377312, "grad_norm": 0.10060276091098785, "learning_rate": 0.01, "loss": 1.9634, "step": 38916 }, { "epoch": 3.999075215782984, "grad_norm": 0.060861632227897644, "learning_rate": 0.01, "loss": 1.9902, "step": 38919 }, { "epoch": 3.999383477188656, "grad_norm": 0.058074526488780975, "learning_rate": 0.01, "loss": 1.9935, "step": 38922 }, { "epoch": 3.9996917385943282, "grad_norm": 0.08413925021886826, "learning_rate": 0.01, "loss": 1.9958, "step": 38925 }, { "epoch": 4.0, "grad_norm": 0.06637567281723022, "learning_rate": 0.01, "loss": 1.978, "step": 38928 }, { "epoch": 3.9937423061140747, "grad_norm": 0.04972624033689499, "learning_rate": 0.01, "loss": 2.0291, "step": 38931 }, { "epoch": 3.9940500615510874, "grad_norm": 0.04883033037185669, "learning_rate": 0.01, "loss": 2.0152, "step": 38934 }, { "epoch": 3.9943578169881, "grad_norm": 0.03551949933171272, "learning_rate": 0.01, "loss": 2.0028, "step": 38937 }, { "epoch": 3.994665572425113, "grad_norm": 0.03538177162408829, "learning_rate": 0.01, "loss": 2.0138, "step": 38940 }, { "epoch": 3.994973327862126, "grad_norm": 0.13709349930286407, "learning_rate": 0.01, "loss": 2.0197, "step": 38943 }, { "epoch": 3.9952810832991386, "grad_norm": 0.05646511912345886, "learning_rate": 0.01, "loss": 2.0143, "step": 38946 }, { "epoch": 3.995588838736151, "grad_norm": 0.05141003429889679, "learning_rate": 0.01, "loss": 2.025, "step": 38949 }, { "epoch": 3.9958965941731637, "grad_norm": 0.1064891591668129, "learning_rate": 0.01, "loss": 2.0142, "step": 38952 }, { "epoch": 3.9962043496101765, "grad_norm": 0.058718711137771606, "learning_rate": 0.01, "loss": 2.0061, "step": 38955 }, { "epoch": 3.9965121050471892, "grad_norm": 0.05866394191980362, "learning_rate": 0.01, "loss": 2.0145, "step": 38958 }, { "epoch": 3.996819860484202, "grad_norm": 0.036083538085222244, "learning_rate": 0.01, "loss": 1.9958, "step": 38961 }, { "epoch": 3.9971276159212143, "grad_norm": 0.05041573569178581, "learning_rate": 0.01, "loss": 2.0197, "step": 38964 }, { "epoch": 3.997435371358227, "grad_norm": 0.05070210620760918, "learning_rate": 0.01, "loss": 2.0093, "step": 38967 }, { "epoch": 3.99774312679524, "grad_norm": 0.06114819273352623, "learning_rate": 0.01, "loss": 2.0137, "step": 38970 }, { "epoch": 3.9980508822322527, "grad_norm": 0.05506499856710434, "learning_rate": 0.01, "loss": 1.9954, "step": 38973 }, { "epoch": 3.9983586376692655, "grad_norm": 0.06224251538515091, "learning_rate": 0.01, "loss": 2.0011, "step": 38976 }, { "epoch": 3.9986663931062782, "grad_norm": 0.05054394155740738, "learning_rate": 0.01, "loss": 1.9941, "step": 38979 }, { "epoch": 3.998974148543291, "grad_norm": 0.03862088546156883, "learning_rate": 0.01, "loss": 2.0132, "step": 38982 }, { "epoch": 3.999281903980304, "grad_norm": 0.04840533435344696, "learning_rate": 0.01, "loss": 2.0206, "step": 38985 }, { "epoch": 3.9995896594173166, "grad_norm": 0.05625883862376213, "learning_rate": 0.01, "loss": 1.993, "step": 38988 }, { "epoch": 3.9998974148543294, "grad_norm": 0.04202372580766678, "learning_rate": 0.01, "loss": 2.0116, "step": 38991 }, { "epoch": 4.000205170291342, "grad_norm": 0.03988848999142647, "learning_rate": 0.01, "loss": 2.005, "step": 38994 }, { "epoch": 4.000512925728355, "grad_norm": 0.08701229840517044, "learning_rate": 0.01, "loss": 1.9903, "step": 38997 }, { "epoch": 4.000820681165367, "grad_norm": 0.13237404823303223, "learning_rate": 0.01, "loss": 2.0183, "step": 39000 }, { "epoch": 4.00112843660238, "grad_norm": 0.0758156031370163, "learning_rate": 0.01, "loss": 1.9999, "step": 39003 }, { "epoch": 4.001436192039392, "grad_norm": 0.07759084552526474, "learning_rate": 0.01, "loss": 1.9862, "step": 39006 }, { "epoch": 4.001743947476405, "grad_norm": 0.08195871859788895, "learning_rate": 0.01, "loss": 2.0274, "step": 39009 }, { "epoch": 4.002051702913418, "grad_norm": 0.048866480588912964, "learning_rate": 0.01, "loss": 2.0046, "step": 39012 }, { "epoch": 4.002359458350431, "grad_norm": 0.04807426407933235, "learning_rate": 0.01, "loss": 1.9824, "step": 39015 }, { "epoch": 4.0026672137874435, "grad_norm": 0.07215370982885361, "learning_rate": 0.01, "loss": 2.0194, "step": 39018 }, { "epoch": 4.002974969224456, "grad_norm": 0.05422010272741318, "learning_rate": 0.01, "loss": 2.001, "step": 39021 }, { "epoch": 4.003282724661469, "grad_norm": 0.06720750778913498, "learning_rate": 0.01, "loss": 2.0191, "step": 39024 }, { "epoch": 4.003590480098482, "grad_norm": 0.04077715426683426, "learning_rate": 0.01, "loss": 2.0178, "step": 39027 }, { "epoch": 4.003898235535495, "grad_norm": 0.05866453796625137, "learning_rate": 0.01, "loss": 2.0035, "step": 39030 }, { "epoch": 4.004205990972507, "grad_norm": 0.03861930966377258, "learning_rate": 0.01, "loss": 2.0153, "step": 39033 }, { "epoch": 4.00451374640952, "grad_norm": 0.03546518459916115, "learning_rate": 0.01, "loss": 2.0189, "step": 39036 }, { "epoch": 4.004821501846533, "grad_norm": 0.034643933176994324, "learning_rate": 0.01, "loss": 2.0126, "step": 39039 }, { "epoch": 4.005129257283546, "grad_norm": 0.0831715315580368, "learning_rate": 0.01, "loss": 2.0098, "step": 39042 }, { "epoch": 4.005437012720558, "grad_norm": 0.06925207376480103, "learning_rate": 0.01, "loss": 2.0021, "step": 39045 }, { "epoch": 4.00574476815757, "grad_norm": 0.08311621099710464, "learning_rate": 0.01, "loss": 2.0402, "step": 39048 }, { "epoch": 4.006052523594583, "grad_norm": 0.08028697222471237, "learning_rate": 0.01, "loss": 2.0159, "step": 39051 }, { "epoch": 4.006360279031596, "grad_norm": 0.1259499341249466, "learning_rate": 0.01, "loss": 2.028, "step": 39054 }, { "epoch": 4.006668034468609, "grad_norm": 0.06442322582006454, "learning_rate": 0.01, "loss": 2.0176, "step": 39057 }, { "epoch": 4.0069757899056215, "grad_norm": 0.08281309902667999, "learning_rate": 0.01, "loss": 2.0191, "step": 39060 }, { "epoch": 4.007283545342634, "grad_norm": 0.04970608651638031, "learning_rate": 0.01, "loss": 2.0307, "step": 39063 }, { "epoch": 4.007591300779647, "grad_norm": 0.04361598566174507, "learning_rate": 0.01, "loss": 2.0094, "step": 39066 }, { "epoch": 4.00789905621666, "grad_norm": 0.04321054369211197, "learning_rate": 0.01, "loss": 2.002, "step": 39069 }, { "epoch": 4.008206811653673, "grad_norm": 0.05904306843876839, "learning_rate": 0.01, "loss": 2.0219, "step": 39072 }, { "epoch": 4.008514567090685, "grad_norm": 0.0728040412068367, "learning_rate": 0.01, "loss": 2.0044, "step": 39075 }, { "epoch": 4.008822322527698, "grad_norm": 0.09545755386352539, "learning_rate": 0.01, "loss": 1.9959, "step": 39078 }, { "epoch": 4.009130077964711, "grad_norm": 0.10636181384325027, "learning_rate": 0.01, "loss": 2.0097, "step": 39081 }, { "epoch": 4.009437833401724, "grad_norm": 0.08768682181835175, "learning_rate": 0.01, "loss": 2.0063, "step": 39084 }, { "epoch": 4.0097455888387366, "grad_norm": 0.04949640482664108, "learning_rate": 0.01, "loss": 2.0049, "step": 39087 }, { "epoch": 4.010053344275748, "grad_norm": 0.0413932166993618, "learning_rate": 0.01, "loss": 1.9751, "step": 39090 }, { "epoch": 4.010361099712761, "grad_norm": 0.02756902389228344, "learning_rate": 0.01, "loss": 1.9841, "step": 39093 }, { "epoch": 4.010668855149774, "grad_norm": 0.08485575020313263, "learning_rate": 0.01, "loss": 1.9895, "step": 39096 }, { "epoch": 4.010976610586787, "grad_norm": 0.0896422490477562, "learning_rate": 0.01, "loss": 1.9864, "step": 39099 }, { "epoch": 4.0112843660238, "grad_norm": 0.09696487337350845, "learning_rate": 0.01, "loss": 2.0172, "step": 39102 }, { "epoch": 4.011592121460812, "grad_norm": 0.059906743466854095, "learning_rate": 0.01, "loss": 1.9832, "step": 39105 }, { "epoch": 4.011899876897825, "grad_norm": 0.12596943974494934, "learning_rate": 0.01, "loss": 2.0141, "step": 39108 }, { "epoch": 4.012207632334838, "grad_norm": 0.08249086886644363, "learning_rate": 0.01, "loss": 2.0007, "step": 39111 }, { "epoch": 4.012515387771851, "grad_norm": 0.1073683500289917, "learning_rate": 0.01, "loss": 2.0216, "step": 39114 }, { "epoch": 4.0128231432088635, "grad_norm": 0.050193801522254944, "learning_rate": 0.01, "loss": 2.0219, "step": 39117 }, { "epoch": 4.013130898645876, "grad_norm": 0.04790155962109566, "learning_rate": 0.01, "loss": 1.9866, "step": 39120 }, { "epoch": 4.013438654082889, "grad_norm": 0.1130373477935791, "learning_rate": 0.01, "loss": 2.0338, "step": 39123 }, { "epoch": 4.013746409519902, "grad_norm": 0.16575901210308075, "learning_rate": 0.01, "loss": 1.9917, "step": 39126 }, { "epoch": 4.014054164956915, "grad_norm": 0.03896254301071167, "learning_rate": 0.01, "loss": 2.0133, "step": 39129 }, { "epoch": 4.014361920393927, "grad_norm": 0.033224210143089294, "learning_rate": 0.01, "loss": 1.9954, "step": 39132 }, { "epoch": 4.014669675830939, "grad_norm": 0.04422049596905708, "learning_rate": 0.01, "loss": 2.0278, "step": 39135 }, { "epoch": 4.014977431267952, "grad_norm": 0.046339549124240875, "learning_rate": 0.01, "loss": 2.0015, "step": 39138 }, { "epoch": 4.015285186704965, "grad_norm": 0.04534962773323059, "learning_rate": 0.01, "loss": 2.0238, "step": 39141 }, { "epoch": 4.015592942141978, "grad_norm": 0.0634172111749649, "learning_rate": 0.01, "loss": 2.0067, "step": 39144 }, { "epoch": 4.01590069757899, "grad_norm": 0.0435175783932209, "learning_rate": 0.01, "loss": 2.0146, "step": 39147 }, { "epoch": 4.016208453016003, "grad_norm": 0.04143211990594864, "learning_rate": 0.01, "loss": 1.9967, "step": 39150 }, { "epoch": 4.016516208453016, "grad_norm": 0.06689900159835815, "learning_rate": 0.01, "loss": 2.0033, "step": 39153 }, { "epoch": 4.016823963890029, "grad_norm": 0.19841893017292023, "learning_rate": 0.01, "loss": 2.0139, "step": 39156 }, { "epoch": 4.0171317193270415, "grad_norm": 0.09940902143716812, "learning_rate": 0.01, "loss": 2.0032, "step": 39159 }, { "epoch": 4.017439474764054, "grad_norm": 0.0738014280796051, "learning_rate": 0.01, "loss": 2.0058, "step": 39162 }, { "epoch": 4.017747230201067, "grad_norm": 0.06441953033208847, "learning_rate": 0.01, "loss": 1.9901, "step": 39165 }, { "epoch": 4.01805498563808, "grad_norm": 0.03884090855717659, "learning_rate": 0.01, "loss": 2.0078, "step": 39168 }, { "epoch": 4.018362741075093, "grad_norm": 0.036479830741882324, "learning_rate": 0.01, "loss": 1.9914, "step": 39171 }, { "epoch": 4.018670496512105, "grad_norm": 0.04188670963048935, "learning_rate": 0.01, "loss": 2.0239, "step": 39174 }, { "epoch": 4.018978251949118, "grad_norm": 0.07043974846601486, "learning_rate": 0.01, "loss": 2.0145, "step": 39177 }, { "epoch": 4.01928600738613, "grad_norm": 0.07807689160108566, "learning_rate": 0.01, "loss": 2.0254, "step": 39180 }, { "epoch": 4.019593762823143, "grad_norm": 0.05508783459663391, "learning_rate": 0.01, "loss": 2.0082, "step": 39183 }, { "epoch": 4.019901518260156, "grad_norm": 0.040311504155397415, "learning_rate": 0.01, "loss": 2.0097, "step": 39186 }, { "epoch": 4.020209273697168, "grad_norm": 0.03907238692045212, "learning_rate": 0.01, "loss": 2.0205, "step": 39189 }, { "epoch": 4.020517029134181, "grad_norm": 0.1371290385723114, "learning_rate": 0.01, "loss": 2.0055, "step": 39192 }, { "epoch": 4.020824784571194, "grad_norm": 0.05499713122844696, "learning_rate": 0.01, "loss": 2.0182, "step": 39195 }, { "epoch": 4.021132540008207, "grad_norm": 0.049838222563266754, "learning_rate": 0.01, "loss": 2.0118, "step": 39198 }, { "epoch": 4.0214402954452195, "grad_norm": 0.04565451666712761, "learning_rate": 0.01, "loss": 2.0178, "step": 39201 }, { "epoch": 4.021748050882232, "grad_norm": 0.06939809769392014, "learning_rate": 0.01, "loss": 1.9955, "step": 39204 }, { "epoch": 4.022055806319245, "grad_norm": 0.036337222903966904, "learning_rate": 0.01, "loss": 2.0002, "step": 39207 }, { "epoch": 4.022363561756258, "grad_norm": 0.11887694150209427, "learning_rate": 0.01, "loss": 2.0154, "step": 39210 }, { "epoch": 4.022671317193271, "grad_norm": 0.05074974149465561, "learning_rate": 0.01, "loss": 1.9788, "step": 39213 }, { "epoch": 4.022979072630283, "grad_norm": 0.04023103788495064, "learning_rate": 0.01, "loss": 2.0112, "step": 39216 }, { "epoch": 4.023286828067296, "grad_norm": 0.09158769249916077, "learning_rate": 0.01, "loss": 2.0184, "step": 39219 }, { "epoch": 4.023594583504309, "grad_norm": 0.06678687036037445, "learning_rate": 0.01, "loss": 2.0338, "step": 39222 }, { "epoch": 4.023902338941321, "grad_norm": 0.055356625467538834, "learning_rate": 0.01, "loss": 2.0084, "step": 39225 }, { "epoch": 4.024210094378334, "grad_norm": 0.051128923892974854, "learning_rate": 0.01, "loss": 2.0291, "step": 39228 }, { "epoch": 4.024517849815346, "grad_norm": 0.08770928531885147, "learning_rate": 0.01, "loss": 2.0006, "step": 39231 }, { "epoch": 4.024825605252359, "grad_norm": 0.06450860947370529, "learning_rate": 0.01, "loss": 1.9926, "step": 39234 }, { "epoch": 4.025133360689372, "grad_norm": 0.03998043015599251, "learning_rate": 0.01, "loss": 1.9993, "step": 39237 }, { "epoch": 4.025441116126385, "grad_norm": 0.036666858941316605, "learning_rate": 0.01, "loss": 2.0451, "step": 39240 }, { "epoch": 4.025748871563398, "grad_norm": 0.03850167244672775, "learning_rate": 0.01, "loss": 1.9825, "step": 39243 }, { "epoch": 4.02605662700041, "grad_norm": 0.03165037930011749, "learning_rate": 0.01, "loss": 1.9817, "step": 39246 }, { "epoch": 4.026364382437423, "grad_norm": 0.03471562638878822, "learning_rate": 0.01, "loss": 1.9923, "step": 39249 }, { "epoch": 4.026672137874436, "grad_norm": 0.10216906666755676, "learning_rate": 0.01, "loss": 1.9971, "step": 39252 }, { "epoch": 4.026979893311449, "grad_norm": 0.13467098772525787, "learning_rate": 0.01, "loss": 1.9973, "step": 39255 }, { "epoch": 4.0272876487484615, "grad_norm": 0.05322883278131485, "learning_rate": 0.01, "loss": 2.0068, "step": 39258 }, { "epoch": 4.027595404185474, "grad_norm": 0.0329531729221344, "learning_rate": 0.01, "loss": 2.0209, "step": 39261 }, { "epoch": 4.027903159622487, "grad_norm": 0.04283340275287628, "learning_rate": 0.01, "loss": 2.0052, "step": 39264 }, { "epoch": 4.0282109150595, "grad_norm": 0.09303436428308487, "learning_rate": 0.01, "loss": 2.003, "step": 39267 }, { "epoch": 4.028518670496512, "grad_norm": 0.04981641098856926, "learning_rate": 0.01, "loss": 1.9977, "step": 39270 }, { "epoch": 4.0288264259335245, "grad_norm": 0.05858089402318001, "learning_rate": 0.01, "loss": 2.0025, "step": 39273 }, { "epoch": 4.029134181370537, "grad_norm": 0.05271727591753006, "learning_rate": 0.01, "loss": 1.9937, "step": 39276 }, { "epoch": 4.02944193680755, "grad_norm": 0.0625569149851799, "learning_rate": 0.01, "loss": 2.0077, "step": 39279 }, { "epoch": 4.029749692244563, "grad_norm": 0.06783869862556458, "learning_rate": 0.01, "loss": 2.0183, "step": 39282 }, { "epoch": 4.030057447681576, "grad_norm": 0.05175579711794853, "learning_rate": 0.01, "loss": 2.0017, "step": 39285 }, { "epoch": 4.030365203118588, "grad_norm": 0.12327279895544052, "learning_rate": 0.01, "loss": 2.0061, "step": 39288 }, { "epoch": 4.030672958555601, "grad_norm": 0.09904535114765167, "learning_rate": 0.01, "loss": 2.0286, "step": 39291 }, { "epoch": 4.030980713992614, "grad_norm": 0.06502281874418259, "learning_rate": 0.01, "loss": 2.0119, "step": 39294 }, { "epoch": 4.031288469429627, "grad_norm": 0.05546945706009865, "learning_rate": 0.01, "loss": 2.0186, "step": 39297 }, { "epoch": 4.0315962248666395, "grad_norm": 0.03637825697660446, "learning_rate": 0.01, "loss": 1.9894, "step": 39300 }, { "epoch": 4.031903980303652, "grad_norm": 0.03696468472480774, "learning_rate": 0.01, "loss": 2.0207, "step": 39303 }, { "epoch": 4.032211735740665, "grad_norm": 0.11666533350944519, "learning_rate": 0.01, "loss": 2.0067, "step": 39306 }, { "epoch": 4.032519491177678, "grad_norm": 0.10067463666200638, "learning_rate": 0.01, "loss": 2.0312, "step": 39309 }, { "epoch": 4.032827246614691, "grad_norm": 0.0598643533885479, "learning_rate": 0.01, "loss": 2.0021, "step": 39312 }, { "epoch": 4.0331350020517025, "grad_norm": 0.057127151638269424, "learning_rate": 0.01, "loss": 1.9996, "step": 39315 }, { "epoch": 4.033442757488715, "grad_norm": 0.05786604434251785, "learning_rate": 0.01, "loss": 2.0131, "step": 39318 }, { "epoch": 4.033750512925728, "grad_norm": 0.040237389504909515, "learning_rate": 0.01, "loss": 2.0022, "step": 39321 }, { "epoch": 4.034058268362741, "grad_norm": 0.09420931339263916, "learning_rate": 0.01, "loss": 1.9952, "step": 39324 }, { "epoch": 4.034366023799754, "grad_norm": 0.11208292096853256, "learning_rate": 0.01, "loss": 2.0142, "step": 39327 }, { "epoch": 4.034673779236766, "grad_norm": 0.07000657171010971, "learning_rate": 0.01, "loss": 2.0025, "step": 39330 }, { "epoch": 4.034981534673779, "grad_norm": 0.05524434149265289, "learning_rate": 0.01, "loss": 2.0297, "step": 39333 }, { "epoch": 4.035289290110792, "grad_norm": 0.03453601896762848, "learning_rate": 0.01, "loss": 2.0086, "step": 39336 }, { "epoch": 4.035597045547805, "grad_norm": 0.046687569469213486, "learning_rate": 0.01, "loss": 1.9913, "step": 39339 }, { "epoch": 4.0359048009848175, "grad_norm": 0.04349486157298088, "learning_rate": 0.01, "loss": 2.0224, "step": 39342 }, { "epoch": 4.03621255642183, "grad_norm": 0.08332299441099167, "learning_rate": 0.01, "loss": 2.011, "step": 39345 }, { "epoch": 4.036520311858843, "grad_norm": 0.07960904389619827, "learning_rate": 0.01, "loss": 2.0046, "step": 39348 }, { "epoch": 4.036828067295856, "grad_norm": 0.06175240874290466, "learning_rate": 0.01, "loss": 2.0159, "step": 39351 }, { "epoch": 4.037135822732869, "grad_norm": 0.03728936240077019, "learning_rate": 0.01, "loss": 2.0074, "step": 39354 }, { "epoch": 4.037443578169881, "grad_norm": 0.046342454850673676, "learning_rate": 0.01, "loss": 2.0055, "step": 39357 }, { "epoch": 4.037751333606893, "grad_norm": 0.059486132115125656, "learning_rate": 0.01, "loss": 2.0171, "step": 39360 }, { "epoch": 4.038059089043906, "grad_norm": 0.09025575965642929, "learning_rate": 0.01, "loss": 2.0306, "step": 39363 }, { "epoch": 4.038366844480919, "grad_norm": 0.0599956177175045, "learning_rate": 0.01, "loss": 1.9936, "step": 39366 }, { "epoch": 4.038674599917932, "grad_norm": 0.06386277079582214, "learning_rate": 0.01, "loss": 2.0194, "step": 39369 }, { "epoch": 4.038982355354944, "grad_norm": 0.10979870706796646, "learning_rate": 0.01, "loss": 1.9965, "step": 39372 }, { "epoch": 4.039290110791957, "grad_norm": 0.13358891010284424, "learning_rate": 0.01, "loss": 1.9985, "step": 39375 }, { "epoch": 4.03959786622897, "grad_norm": 0.07506405562162399, "learning_rate": 0.01, "loss": 2.0069, "step": 39378 }, { "epoch": 4.039905621665983, "grad_norm": 0.03969530388712883, "learning_rate": 0.01, "loss": 2.0134, "step": 39381 }, { "epoch": 4.0402133771029956, "grad_norm": 0.06700876355171204, "learning_rate": 0.01, "loss": 2.005, "step": 39384 }, { "epoch": 4.040521132540008, "grad_norm": 0.06389278918504715, "learning_rate": 0.01, "loss": 2.015, "step": 39387 }, { "epoch": 4.040828887977021, "grad_norm": 0.04753594473004341, "learning_rate": 0.01, "loss": 2.0097, "step": 39390 }, { "epoch": 4.041136643414034, "grad_norm": 0.0517578050494194, "learning_rate": 0.01, "loss": 1.979, "step": 39393 }, { "epoch": 4.041444398851047, "grad_norm": 0.043198052793741226, "learning_rate": 0.01, "loss": 2.0231, "step": 39396 }, { "epoch": 4.0417521542880595, "grad_norm": 0.04130061715841293, "learning_rate": 0.01, "loss": 2.0426, "step": 39399 }, { "epoch": 4.042059909725072, "grad_norm": 0.037592917680740356, "learning_rate": 0.01, "loss": 2.0027, "step": 39402 }, { "epoch": 4.042367665162084, "grad_norm": 0.07257888466119766, "learning_rate": 0.01, "loss": 2.0057, "step": 39405 }, { "epoch": 4.042675420599097, "grad_norm": 0.08611748367547989, "learning_rate": 0.01, "loss": 2.0175, "step": 39408 }, { "epoch": 4.04298317603611, "grad_norm": 0.11179140955209732, "learning_rate": 0.01, "loss": 2.0172, "step": 39411 }, { "epoch": 4.0432909314731225, "grad_norm": 0.09140360355377197, "learning_rate": 0.01, "loss": 2.0149, "step": 39414 }, { "epoch": 4.043598686910135, "grad_norm": 0.040217410773038864, "learning_rate": 0.01, "loss": 2.0294, "step": 39417 }, { "epoch": 4.043906442347148, "grad_norm": 0.04423901066184044, "learning_rate": 0.01, "loss": 1.9824, "step": 39420 }, { "epoch": 4.044214197784161, "grad_norm": 0.047180719673633575, "learning_rate": 0.01, "loss": 1.9865, "step": 39423 }, { "epoch": 4.044521953221174, "grad_norm": 0.03836076334118843, "learning_rate": 0.01, "loss": 2.0081, "step": 39426 }, { "epoch": 4.044829708658186, "grad_norm": 0.08214177936315536, "learning_rate": 0.01, "loss": 2.0078, "step": 39429 }, { "epoch": 4.045137464095199, "grad_norm": 0.04551496356725693, "learning_rate": 0.01, "loss": 2.0025, "step": 39432 }, { "epoch": 4.045445219532212, "grad_norm": 0.08608072996139526, "learning_rate": 0.01, "loss": 1.9935, "step": 39435 }, { "epoch": 4.045752974969225, "grad_norm": 0.03998774662613869, "learning_rate": 0.01, "loss": 2.0074, "step": 39438 }, { "epoch": 4.0460607304062375, "grad_norm": 0.06219779700040817, "learning_rate": 0.01, "loss": 2.001, "step": 39441 }, { "epoch": 4.04636848584325, "grad_norm": 0.12329383194446564, "learning_rate": 0.01, "loss": 1.9988, "step": 39444 }, { "epoch": 4.046676241280263, "grad_norm": 0.10584307461977005, "learning_rate": 0.01, "loss": 1.9929, "step": 39447 }, { "epoch": 4.046983996717275, "grad_norm": 0.06407187879085541, "learning_rate": 0.01, "loss": 2.023, "step": 39450 }, { "epoch": 4.047291752154288, "grad_norm": 0.057736970484256744, "learning_rate": 0.01, "loss": 2.0166, "step": 39453 }, { "epoch": 4.0475995075913005, "grad_norm": 0.062264811247587204, "learning_rate": 0.01, "loss": 1.9873, "step": 39456 }, { "epoch": 4.047907263028313, "grad_norm": 0.0482734851539135, "learning_rate": 0.01, "loss": 1.9886, "step": 39459 }, { "epoch": 4.048215018465326, "grad_norm": 0.03942275047302246, "learning_rate": 0.01, "loss": 2.0209, "step": 39462 }, { "epoch": 4.048522773902339, "grad_norm": 0.08987545222043991, "learning_rate": 0.01, "loss": 2.0103, "step": 39465 }, { "epoch": 4.048830529339352, "grad_norm": 0.08133430033922195, "learning_rate": 0.01, "loss": 2.0039, "step": 39468 }, { "epoch": 4.049138284776364, "grad_norm": 0.03588191419839859, "learning_rate": 0.01, "loss": 2.0078, "step": 39471 }, { "epoch": 4.049446040213377, "grad_norm": 0.058000437915325165, "learning_rate": 0.01, "loss": 1.9893, "step": 39474 }, { "epoch": 4.04975379565039, "grad_norm": 0.08807062357664108, "learning_rate": 0.01, "loss": 2.0289, "step": 39477 }, { "epoch": 4.050061551087403, "grad_norm": 0.05153276026248932, "learning_rate": 0.01, "loss": 2.0242, "step": 39480 }, { "epoch": 4.0503693065244155, "grad_norm": 0.08411730825901031, "learning_rate": 0.01, "loss": 1.9963, "step": 39483 }, { "epoch": 4.050677061961428, "grad_norm": 0.09271445870399475, "learning_rate": 0.01, "loss": 2.0005, "step": 39486 }, { "epoch": 4.050984817398441, "grad_norm": 0.07173626124858856, "learning_rate": 0.01, "loss": 2.0146, "step": 39489 }, { "epoch": 4.051292572835454, "grad_norm": 0.058220986276865005, "learning_rate": 0.01, "loss": 2.0296, "step": 39492 }, { "epoch": 4.051600328272466, "grad_norm": 0.09059619903564453, "learning_rate": 0.01, "loss": 2.0276, "step": 39495 }, { "epoch": 4.0519080837094785, "grad_norm": 0.11159291118383408, "learning_rate": 0.01, "loss": 1.9806, "step": 39498 }, { "epoch": 4.052215839146491, "grad_norm": 0.03904344514012337, "learning_rate": 0.01, "loss": 2.0083, "step": 39501 }, { "epoch": 4.052523594583504, "grad_norm": 0.12682397663593292, "learning_rate": 0.01, "loss": 2.0227, "step": 39504 }, { "epoch": 4.052831350020517, "grad_norm": 0.06277670711278915, "learning_rate": 0.01, "loss": 1.9906, "step": 39507 }, { "epoch": 4.05313910545753, "grad_norm": 0.03622843325138092, "learning_rate": 0.01, "loss": 1.9835, "step": 39510 }, { "epoch": 4.053446860894542, "grad_norm": 0.056741926819086075, "learning_rate": 0.01, "loss": 2.0041, "step": 39513 }, { "epoch": 4.053754616331555, "grad_norm": 0.0654342994093895, "learning_rate": 0.01, "loss": 2.0023, "step": 39516 }, { "epoch": 4.054062371768568, "grad_norm": 0.05300883948802948, "learning_rate": 0.01, "loss": 2.0084, "step": 39519 }, { "epoch": 4.054370127205581, "grad_norm": 0.1103016659617424, "learning_rate": 0.01, "loss": 1.9686, "step": 39522 }, { "epoch": 4.0546778826425935, "grad_norm": 0.10315825045108795, "learning_rate": 0.01, "loss": 2.0048, "step": 39525 }, { "epoch": 4.054985638079606, "grad_norm": 0.11007767170667648, "learning_rate": 0.01, "loss": 1.9977, "step": 39528 }, { "epoch": 4.055293393516619, "grad_norm": 0.053403954952955246, "learning_rate": 0.01, "loss": 1.9443, "step": 39531 }, { "epoch": 4.055601148953632, "grad_norm": 0.05486089363694191, "learning_rate": 0.01, "loss": 2.0179, "step": 39534 }, { "epoch": 4.055908904390645, "grad_norm": 0.05680430680513382, "learning_rate": 0.01, "loss": 1.9991, "step": 39537 }, { "epoch": 4.056216659827657, "grad_norm": 0.07202226668596268, "learning_rate": 0.01, "loss": 2.0184, "step": 39540 }, { "epoch": 4.056524415264669, "grad_norm": 0.050710614770650864, "learning_rate": 0.01, "loss": 2.0005, "step": 39543 }, { "epoch": 4.056832170701682, "grad_norm": 0.05018226429820061, "learning_rate": 0.01, "loss": 2.0119, "step": 39546 }, { "epoch": 4.057139926138695, "grad_norm": 0.04539674147963524, "learning_rate": 0.01, "loss": 2.0258, "step": 39549 }, { "epoch": 4.057447681575708, "grad_norm": 0.0405915230512619, "learning_rate": 0.01, "loss": 2.0315, "step": 39552 }, { "epoch": 4.0577554370127205, "grad_norm": 0.04220306873321533, "learning_rate": 0.01, "loss": 2.0191, "step": 39555 }, { "epoch": 4.058063192449733, "grad_norm": 0.06802038848400116, "learning_rate": 0.01, "loss": 2.0144, "step": 39558 }, { "epoch": 4.058370947886746, "grad_norm": 0.16860038042068481, "learning_rate": 0.01, "loss": 1.9748, "step": 39561 }, { "epoch": 4.058678703323759, "grad_norm": 0.13039590418338776, "learning_rate": 0.01, "loss": 2.0167, "step": 39564 }, { "epoch": 4.058986458760772, "grad_norm": 0.05722019821405411, "learning_rate": 0.01, "loss": 1.9917, "step": 39567 }, { "epoch": 4.059294214197784, "grad_norm": 0.05984622612595558, "learning_rate": 0.01, "loss": 1.9915, "step": 39570 }, { "epoch": 4.059601969634797, "grad_norm": 0.04732242226600647, "learning_rate": 0.01, "loss": 1.9961, "step": 39573 }, { "epoch": 4.05990972507181, "grad_norm": 0.04328930750489235, "learning_rate": 0.01, "loss": 2.0046, "step": 39576 }, { "epoch": 4.060217480508823, "grad_norm": 0.06630559265613556, "learning_rate": 0.01, "loss": 2.0459, "step": 39579 }, { "epoch": 4.0605252359458355, "grad_norm": 0.06271739304065704, "learning_rate": 0.01, "loss": 2.0283, "step": 39582 }, { "epoch": 4.060832991382847, "grad_norm": 0.05598871782422066, "learning_rate": 0.01, "loss": 2.0322, "step": 39585 }, { "epoch": 4.06114074681986, "grad_norm": 0.04648594930768013, "learning_rate": 0.01, "loss": 2.0021, "step": 39588 }, { "epoch": 4.061448502256873, "grad_norm": 0.06579215824604034, "learning_rate": 0.01, "loss": 2.0039, "step": 39591 }, { "epoch": 4.061756257693886, "grad_norm": 0.05206866189837456, "learning_rate": 0.01, "loss": 2.0298, "step": 39594 }, { "epoch": 4.0620640131308985, "grad_norm": 0.06146420165896416, "learning_rate": 0.01, "loss": 1.998, "step": 39597 }, { "epoch": 4.062371768567911, "grad_norm": 0.09550274908542633, "learning_rate": 0.01, "loss": 2.023, "step": 39600 }, { "epoch": 4.062679524004924, "grad_norm": 0.041216351091861725, "learning_rate": 0.01, "loss": 2.0012, "step": 39603 }, { "epoch": 4.062987279441937, "grad_norm": 0.050457730889320374, "learning_rate": 0.01, "loss": 1.9998, "step": 39606 }, { "epoch": 4.06329503487895, "grad_norm": 0.03148980066180229, "learning_rate": 0.01, "loss": 2.0121, "step": 39609 }, { "epoch": 4.063602790315962, "grad_norm": 0.03415573388338089, "learning_rate": 0.01, "loss": 1.9832, "step": 39612 }, { "epoch": 4.063910545752975, "grad_norm": 0.038664404302835464, "learning_rate": 0.01, "loss": 1.9781, "step": 39615 }, { "epoch": 4.064218301189988, "grad_norm": 0.10088005661964417, "learning_rate": 0.01, "loss": 2.0241, "step": 39618 }, { "epoch": 4.064526056627001, "grad_norm": 0.05986418575048447, "learning_rate": 0.01, "loss": 1.9719, "step": 39621 }, { "epoch": 4.0648338120640135, "grad_norm": 0.04586298391222954, "learning_rate": 0.01, "loss": 2.0231, "step": 39624 }, { "epoch": 4.065141567501026, "grad_norm": 0.09720548987388611, "learning_rate": 0.01, "loss": 2.0234, "step": 39627 }, { "epoch": 4.065449322938038, "grad_norm": 0.1310441493988037, "learning_rate": 0.01, "loss": 2.0267, "step": 39630 }, { "epoch": 4.065757078375051, "grad_norm": 0.0445309616625309, "learning_rate": 0.01, "loss": 2.0295, "step": 39633 }, { "epoch": 4.066064833812064, "grad_norm": 0.05492424964904785, "learning_rate": 0.01, "loss": 2.0201, "step": 39636 }, { "epoch": 4.0663725892490765, "grad_norm": 0.09137557446956635, "learning_rate": 0.01, "loss": 1.9959, "step": 39639 }, { "epoch": 4.066680344686089, "grad_norm": 0.07394784688949585, "learning_rate": 0.01, "loss": 1.9933, "step": 39642 }, { "epoch": 4.066988100123102, "grad_norm": 0.0890335887670517, "learning_rate": 0.01, "loss": 2.0079, "step": 39645 }, { "epoch": 4.067295855560115, "grad_norm": 0.06005888432264328, "learning_rate": 0.01, "loss": 1.997, "step": 39648 }, { "epoch": 4.067603610997128, "grad_norm": 0.08164118975400925, "learning_rate": 0.01, "loss": 1.995, "step": 39651 }, { "epoch": 4.06791136643414, "grad_norm": 0.041462332010269165, "learning_rate": 0.01, "loss": 1.999, "step": 39654 }, { "epoch": 4.068219121871153, "grad_norm": 0.14200016856193542, "learning_rate": 0.01, "loss": 2.0083, "step": 39657 }, { "epoch": 4.068526877308166, "grad_norm": 0.04974464327096939, "learning_rate": 0.01, "loss": 2.0038, "step": 39660 }, { "epoch": 4.068834632745179, "grad_norm": 0.05781686678528786, "learning_rate": 0.01, "loss": 2.0022, "step": 39663 }, { "epoch": 4.0691423881821915, "grad_norm": 0.050336726009845734, "learning_rate": 0.01, "loss": 1.9971, "step": 39666 }, { "epoch": 4.069450143619204, "grad_norm": 0.0692603662610054, "learning_rate": 0.01, "loss": 2.024, "step": 39669 }, { "epoch": 4.069757899056217, "grad_norm": 0.052262064069509506, "learning_rate": 0.01, "loss": 2.0063, "step": 39672 }, { "epoch": 4.070065654493229, "grad_norm": 0.0743674710392952, "learning_rate": 0.01, "loss": 2.0361, "step": 39675 }, { "epoch": 4.070373409930242, "grad_norm": 0.03609304502606392, "learning_rate": 0.01, "loss": 1.9913, "step": 39678 }, { "epoch": 4.0706811653672546, "grad_norm": 0.05369256064295769, "learning_rate": 0.01, "loss": 2.027, "step": 39681 }, { "epoch": 4.070988920804267, "grad_norm": 0.04112347960472107, "learning_rate": 0.01, "loss": 2.0202, "step": 39684 }, { "epoch": 4.07129667624128, "grad_norm": 0.05583459138870239, "learning_rate": 0.01, "loss": 2.0233, "step": 39687 }, { "epoch": 4.071604431678293, "grad_norm": 0.06682361662387848, "learning_rate": 0.01, "loss": 2.0344, "step": 39690 }, { "epoch": 4.071912187115306, "grad_norm": 0.08554589748382568, "learning_rate": 0.01, "loss": 2.0021, "step": 39693 }, { "epoch": 4.0722199425523185, "grad_norm": 0.07087216526269913, "learning_rate": 0.01, "loss": 1.9897, "step": 39696 }, { "epoch": 4.072527697989331, "grad_norm": 0.06562238186597824, "learning_rate": 0.01, "loss": 2.0118, "step": 39699 }, { "epoch": 4.072835453426344, "grad_norm": 0.036247964948415756, "learning_rate": 0.01, "loss": 1.9787, "step": 39702 }, { "epoch": 4.073143208863357, "grad_norm": 0.08515971153974533, "learning_rate": 0.01, "loss": 1.9845, "step": 39705 }, { "epoch": 4.07345096430037, "grad_norm": 0.07769300043582916, "learning_rate": 0.01, "loss": 2.0183, "step": 39708 }, { "epoch": 4.073758719737382, "grad_norm": 0.0872216522693634, "learning_rate": 0.01, "loss": 1.9853, "step": 39711 }, { "epoch": 4.074066475174395, "grad_norm": 0.06590529531240463, "learning_rate": 0.01, "loss": 1.9917, "step": 39714 }, { "epoch": 4.074374230611408, "grad_norm": 0.059110917150974274, "learning_rate": 0.01, "loss": 2.0192, "step": 39717 }, { "epoch": 4.07468198604842, "grad_norm": 0.04299427196383476, "learning_rate": 0.01, "loss": 2.0017, "step": 39720 }, { "epoch": 4.074989741485433, "grad_norm": 0.10769655555486679, "learning_rate": 0.01, "loss": 1.9979, "step": 39723 }, { "epoch": 4.075297496922445, "grad_norm": 0.08587364107370377, "learning_rate": 0.01, "loss": 2.004, "step": 39726 }, { "epoch": 4.075605252359458, "grad_norm": 0.08979383856058121, "learning_rate": 0.01, "loss": 2.011, "step": 39729 }, { "epoch": 4.075913007796471, "grad_norm": 0.12115131318569183, "learning_rate": 0.01, "loss": 2.0182, "step": 39732 }, { "epoch": 4.076220763233484, "grad_norm": 0.1939251571893692, "learning_rate": 0.01, "loss": 1.9981, "step": 39735 }, { "epoch": 4.0765285186704965, "grad_norm": 0.05781332403421402, "learning_rate": 0.01, "loss": 2.0088, "step": 39738 }, { "epoch": 4.076836274107509, "grad_norm": 0.03456675633788109, "learning_rate": 0.01, "loss": 1.9861, "step": 39741 }, { "epoch": 4.077144029544522, "grad_norm": 0.040457479655742645, "learning_rate": 0.01, "loss": 1.9973, "step": 39744 }, { "epoch": 4.077451784981535, "grad_norm": 0.06412683427333832, "learning_rate": 0.01, "loss": 2.0057, "step": 39747 }, { "epoch": 4.077759540418548, "grad_norm": 0.05500893294811249, "learning_rate": 0.01, "loss": 2.0056, "step": 39750 }, { "epoch": 4.07806729585556, "grad_norm": 0.05995875224471092, "learning_rate": 0.01, "loss": 1.9968, "step": 39753 }, { "epoch": 4.078375051292573, "grad_norm": 0.1099732369184494, "learning_rate": 0.01, "loss": 2.0105, "step": 39756 }, { "epoch": 4.078682806729586, "grad_norm": 0.11458323895931244, "learning_rate": 0.01, "loss": 2.0063, "step": 39759 }, { "epoch": 4.078990562166599, "grad_norm": 0.07978501915931702, "learning_rate": 0.01, "loss": 2.0074, "step": 39762 }, { "epoch": 4.079298317603611, "grad_norm": 0.04269685968756676, "learning_rate": 0.01, "loss": 2.0154, "step": 39765 }, { "epoch": 4.079606073040623, "grad_norm": 0.051762815564870834, "learning_rate": 0.01, "loss": 2.0067, "step": 39768 }, { "epoch": 4.079913828477636, "grad_norm": 0.030949685722589493, "learning_rate": 0.01, "loss": 2.0025, "step": 39771 }, { "epoch": 4.080221583914649, "grad_norm": 0.1207941323518753, "learning_rate": 0.01, "loss": 2.0009, "step": 39774 }, { "epoch": 4.080529339351662, "grad_norm": 0.12475111335515976, "learning_rate": 0.01, "loss": 1.9991, "step": 39777 }, { "epoch": 4.0808370947886745, "grad_norm": 0.10487036406993866, "learning_rate": 0.01, "loss": 2.0025, "step": 39780 }, { "epoch": 4.081144850225687, "grad_norm": 0.088489830493927, "learning_rate": 0.01, "loss": 2.0399, "step": 39783 }, { "epoch": 4.0814526056627, "grad_norm": 0.06538698077201843, "learning_rate": 0.01, "loss": 1.9969, "step": 39786 }, { "epoch": 4.081760361099713, "grad_norm": 0.05029508098959923, "learning_rate": 0.01, "loss": 2.0022, "step": 39789 }, { "epoch": 4.082068116536726, "grad_norm": 0.04905729368329048, "learning_rate": 0.01, "loss": 2.005, "step": 39792 }, { "epoch": 4.082375871973738, "grad_norm": 0.060302965342998505, "learning_rate": 0.01, "loss": 1.9986, "step": 39795 }, { "epoch": 4.082683627410751, "grad_norm": 0.06065473333001137, "learning_rate": 0.01, "loss": 1.9804, "step": 39798 }, { "epoch": 4.082991382847764, "grad_norm": 0.04160602018237114, "learning_rate": 0.01, "loss": 2.0136, "step": 39801 }, { "epoch": 4.083299138284777, "grad_norm": 0.04311453923583031, "learning_rate": 0.01, "loss": 1.9955, "step": 39804 }, { "epoch": 4.0836068937217895, "grad_norm": 0.03628389909863472, "learning_rate": 0.01, "loss": 1.9935, "step": 39807 }, { "epoch": 4.083914649158801, "grad_norm": 0.09799020737409592, "learning_rate": 0.01, "loss": 1.9933, "step": 39810 }, { "epoch": 4.084222404595814, "grad_norm": 0.07026764005422592, "learning_rate": 0.01, "loss": 1.989, "step": 39813 }, { "epoch": 4.084530160032827, "grad_norm": 0.11306377500295639, "learning_rate": 0.01, "loss": 2.0241, "step": 39816 }, { "epoch": 4.08483791546984, "grad_norm": 0.13824355602264404, "learning_rate": 0.01, "loss": 2.0319, "step": 39819 }, { "epoch": 4.0851456709068525, "grad_norm": 0.07312388718128204, "learning_rate": 0.01, "loss": 2.0323, "step": 39822 }, { "epoch": 4.085453426343865, "grad_norm": 0.06378597021102905, "learning_rate": 0.01, "loss": 1.9952, "step": 39825 }, { "epoch": 4.085761181780878, "grad_norm": 0.04329591616988182, "learning_rate": 0.01, "loss": 2.0342, "step": 39828 }, { "epoch": 4.086068937217891, "grad_norm": 0.05057251825928688, "learning_rate": 0.01, "loss": 1.9695, "step": 39831 }, { "epoch": 4.086376692654904, "grad_norm": 0.047054242342710495, "learning_rate": 0.01, "loss": 2.0329, "step": 39834 }, { "epoch": 4.086684448091916, "grad_norm": 0.06537993997335434, "learning_rate": 0.01, "loss": 1.987, "step": 39837 }, { "epoch": 4.086992203528929, "grad_norm": 0.08527065068483353, "learning_rate": 0.01, "loss": 2.0031, "step": 39840 }, { "epoch": 4.087299958965942, "grad_norm": 0.05401263013482094, "learning_rate": 0.01, "loss": 1.9946, "step": 39843 }, { "epoch": 4.087607714402955, "grad_norm": 0.04258432239294052, "learning_rate": 0.01, "loss": 2.0098, "step": 39846 }, { "epoch": 4.087915469839968, "grad_norm": 0.08178570866584778, "learning_rate": 0.01, "loss": 1.9965, "step": 39849 }, { "epoch": 4.08822322527698, "grad_norm": 0.06067502498626709, "learning_rate": 0.01, "loss": 2.0209, "step": 39852 }, { "epoch": 4.088530980713992, "grad_norm": 0.04289801046252251, "learning_rate": 0.01, "loss": 1.9943, "step": 39855 }, { "epoch": 4.088838736151005, "grad_norm": 0.10091704875230789, "learning_rate": 0.01, "loss": 2.0113, "step": 39858 }, { "epoch": 4.089146491588018, "grad_norm": 0.05211932212114334, "learning_rate": 0.01, "loss": 2.0202, "step": 39861 }, { "epoch": 4.089454247025031, "grad_norm": 0.08183594793081284, "learning_rate": 0.01, "loss": 2.0139, "step": 39864 }, { "epoch": 4.089762002462043, "grad_norm": 0.13209478557109833, "learning_rate": 0.01, "loss": 2.0115, "step": 39867 }, { "epoch": 4.090069757899056, "grad_norm": 0.08384159207344055, "learning_rate": 0.01, "loss": 1.9652, "step": 39870 }, { "epoch": 4.090377513336069, "grad_norm": 0.10875227302312851, "learning_rate": 0.01, "loss": 1.9956, "step": 39873 }, { "epoch": 4.090685268773082, "grad_norm": 0.09350070357322693, "learning_rate": 0.01, "loss": 1.9902, "step": 39876 }, { "epoch": 4.0909930242100945, "grad_norm": 0.0466117262840271, "learning_rate": 0.01, "loss": 2.0039, "step": 39879 }, { "epoch": 4.091300779647107, "grad_norm": 0.09274325519800186, "learning_rate": 0.01, "loss": 1.9977, "step": 39882 }, { "epoch": 4.09160853508412, "grad_norm": 0.05788525938987732, "learning_rate": 0.01, "loss": 2.0027, "step": 39885 }, { "epoch": 4.091916290521133, "grad_norm": 0.07033047825098038, "learning_rate": 0.01, "loss": 2.0231, "step": 39888 }, { "epoch": 4.092224045958146, "grad_norm": 0.07227031141519547, "learning_rate": 0.01, "loss": 2.0085, "step": 39891 }, { "epoch": 4.092531801395158, "grad_norm": 0.08519124984741211, "learning_rate": 0.01, "loss": 2.0037, "step": 39894 }, { "epoch": 4.09283955683217, "grad_norm": 0.038923412561416626, "learning_rate": 0.01, "loss": 2.0254, "step": 39897 }, { "epoch": 4.093147312269183, "grad_norm": 0.03778720274567604, "learning_rate": 0.01, "loss": 1.9985, "step": 39900 }, { "epoch": 4.093455067706196, "grad_norm": 0.046422503888607025, "learning_rate": 0.01, "loss": 1.9981, "step": 39903 }, { "epoch": 4.093762823143209, "grad_norm": 0.13946807384490967, "learning_rate": 0.01, "loss": 2.0215, "step": 39906 }, { "epoch": 4.094070578580221, "grad_norm": 0.07924194633960724, "learning_rate": 0.01, "loss": 2.0005, "step": 39909 }, { "epoch": 4.094378334017234, "grad_norm": 0.04913933575153351, "learning_rate": 0.01, "loss": 1.9827, "step": 39912 }, { "epoch": 4.094686089454247, "grad_norm": 0.03534455969929695, "learning_rate": 0.01, "loss": 2.0171, "step": 39915 }, { "epoch": 4.09499384489126, "grad_norm": 0.03871719539165497, "learning_rate": 0.01, "loss": 1.9864, "step": 39918 }, { "epoch": 4.0953016003282725, "grad_norm": 0.05986125394701958, "learning_rate": 0.01, "loss": 2.01, "step": 39921 }, { "epoch": 4.095609355765285, "grad_norm": 0.06470568478107452, "learning_rate": 0.01, "loss": 2.0015, "step": 39924 }, { "epoch": 4.095917111202298, "grad_norm": 0.05725434049963951, "learning_rate": 0.01, "loss": 2.0056, "step": 39927 }, { "epoch": 4.096224866639311, "grad_norm": 0.05102056264877319, "learning_rate": 0.01, "loss": 2.0173, "step": 39930 }, { "epoch": 4.096532622076324, "grad_norm": 0.04540586844086647, "learning_rate": 0.01, "loss": 2.0176, "step": 39933 }, { "epoch": 4.096840377513336, "grad_norm": 0.08318594843149185, "learning_rate": 0.01, "loss": 2.0327, "step": 39936 }, { "epoch": 4.097148132950349, "grad_norm": 0.0833197757601738, "learning_rate": 0.01, "loss": 1.9842, "step": 39939 }, { "epoch": 4.097455888387361, "grad_norm": 0.06058152765035629, "learning_rate": 0.01, "loss": 2.0175, "step": 39942 }, { "epoch": 4.097763643824374, "grad_norm": 0.09669670462608337, "learning_rate": 0.01, "loss": 2.0037, "step": 39945 }, { "epoch": 4.098071399261387, "grad_norm": 0.06633847951889038, "learning_rate": 0.01, "loss": 2.0179, "step": 39948 }, { "epoch": 4.098379154698399, "grad_norm": 0.1021406352519989, "learning_rate": 0.01, "loss": 2.021, "step": 39951 }, { "epoch": 4.098686910135412, "grad_norm": 0.047858573496341705, "learning_rate": 0.01, "loss": 1.9945, "step": 39954 }, { "epoch": 4.098994665572425, "grad_norm": 0.04331503435969353, "learning_rate": 0.01, "loss": 2.0304, "step": 39957 }, { "epoch": 4.099302421009438, "grad_norm": 0.0399680957198143, "learning_rate": 0.01, "loss": 1.9915, "step": 39960 }, { "epoch": 4.0996101764464505, "grad_norm": 0.09427163749933243, "learning_rate": 0.01, "loss": 1.997, "step": 39963 }, { "epoch": 4.099917931883463, "grad_norm": 0.06199577823281288, "learning_rate": 0.01, "loss": 2.0176, "step": 39966 }, { "epoch": 4.100225687320476, "grad_norm": 0.04445228725671768, "learning_rate": 0.01, "loss": 1.98, "step": 39969 }, { "epoch": 4.100533442757489, "grad_norm": 0.04458901658654213, "learning_rate": 0.01, "loss": 1.9895, "step": 39972 }, { "epoch": 4.100841198194502, "grad_norm": 0.05720875784754753, "learning_rate": 0.01, "loss": 2.0225, "step": 39975 }, { "epoch": 4.101148953631514, "grad_norm": 0.0806422233581543, "learning_rate": 0.01, "loss": 2.0104, "step": 39978 }, { "epoch": 4.101456709068527, "grad_norm": 0.07191216200590134, "learning_rate": 0.01, "loss": 1.9938, "step": 39981 }, { "epoch": 4.10176446450554, "grad_norm": 0.04921800270676613, "learning_rate": 0.01, "loss": 2.0137, "step": 39984 }, { "epoch": 4.102072219942552, "grad_norm": 0.09171293675899506, "learning_rate": 0.01, "loss": 2.0173, "step": 39987 }, { "epoch": 4.102379975379565, "grad_norm": 0.08604893833398819, "learning_rate": 0.01, "loss": 1.998, "step": 39990 }, { "epoch": 4.1026877308165774, "grad_norm": 0.0489855632185936, "learning_rate": 0.01, "loss": 2.0014, "step": 39993 }, { "epoch": 4.10299548625359, "grad_norm": 0.09026241302490234, "learning_rate": 0.01, "loss": 2.0229, "step": 39996 }, { "epoch": 4.103303241690603, "grad_norm": 0.03529585152864456, "learning_rate": 0.01, "loss": 1.989, "step": 39999 }, { "epoch": 4.103610997127616, "grad_norm": 0.10228559374809265, "learning_rate": 0.01, "loss": 2.0266, "step": 40002 }, { "epoch": 4.103918752564629, "grad_norm": 0.04634955897927284, "learning_rate": 0.01, "loss": 1.9951, "step": 40005 }, { "epoch": 4.104226508001641, "grad_norm": 0.059635523706674576, "learning_rate": 0.01, "loss": 1.9998, "step": 40008 }, { "epoch": 4.104534263438654, "grad_norm": 0.07889647036790848, "learning_rate": 0.01, "loss": 2.0126, "step": 40011 }, { "epoch": 4.104842018875667, "grad_norm": 0.056453004479408264, "learning_rate": 0.01, "loss": 1.9949, "step": 40014 }, { "epoch": 4.10514977431268, "grad_norm": 0.038360368460416794, "learning_rate": 0.01, "loss": 2.0026, "step": 40017 }, { "epoch": 4.1054575297496925, "grad_norm": 0.08849389106035233, "learning_rate": 0.01, "loss": 2.0166, "step": 40020 }, { "epoch": 4.105765285186705, "grad_norm": 0.06063401326537132, "learning_rate": 0.01, "loss": 2.011, "step": 40023 }, { "epoch": 4.106073040623718, "grad_norm": 0.08102670311927795, "learning_rate": 0.01, "loss": 2.0157, "step": 40026 }, { "epoch": 4.106380796060731, "grad_norm": 0.12359301745891571, "learning_rate": 0.01, "loss": 2.0092, "step": 40029 }, { "epoch": 4.106688551497743, "grad_norm": 0.04933517053723335, "learning_rate": 0.01, "loss": 2.0297, "step": 40032 }, { "epoch": 4.1069963069347555, "grad_norm": 0.06319262832403183, "learning_rate": 0.01, "loss": 1.9731, "step": 40035 }, { "epoch": 4.107304062371768, "grad_norm": 0.1100512370467186, "learning_rate": 0.01, "loss": 1.9963, "step": 40038 }, { "epoch": 4.107611817808781, "grad_norm": 0.04997371882200241, "learning_rate": 0.01, "loss": 2.0149, "step": 40041 }, { "epoch": 4.107919573245794, "grad_norm": 0.034478992223739624, "learning_rate": 0.01, "loss": 2.0092, "step": 40044 }, { "epoch": 4.108227328682807, "grad_norm": 0.07332266122102737, "learning_rate": 0.01, "loss": 1.987, "step": 40047 }, { "epoch": 4.108535084119819, "grad_norm": 0.07530300319194794, "learning_rate": 0.01, "loss": 2.0238, "step": 40050 }, { "epoch": 4.108842839556832, "grad_norm": 0.05256710946559906, "learning_rate": 0.01, "loss": 2.0044, "step": 40053 }, { "epoch": 4.109150594993845, "grad_norm": 0.0342395044863224, "learning_rate": 0.01, "loss": 2.0046, "step": 40056 }, { "epoch": 4.109458350430858, "grad_norm": 0.05822393298149109, "learning_rate": 0.01, "loss": 1.9793, "step": 40059 }, { "epoch": 4.1097661058678705, "grad_norm": 0.11184973269701004, "learning_rate": 0.01, "loss": 2.0095, "step": 40062 }, { "epoch": 4.110073861304883, "grad_norm": 0.10424233227968216, "learning_rate": 0.01, "loss": 2.0096, "step": 40065 }, { "epoch": 4.110381616741896, "grad_norm": 0.07766477763652802, "learning_rate": 0.01, "loss": 2.0173, "step": 40068 }, { "epoch": 4.110689372178909, "grad_norm": 0.05194033682346344, "learning_rate": 0.01, "loss": 1.9849, "step": 40071 }, { "epoch": 4.110997127615922, "grad_norm": 0.059180594980716705, "learning_rate": 0.01, "loss": 2.0041, "step": 40074 }, { "epoch": 4.1113048830529335, "grad_norm": 0.056441277265548706, "learning_rate": 0.01, "loss": 2.0308, "step": 40077 }, { "epoch": 4.111612638489946, "grad_norm": 0.10722373425960541, "learning_rate": 0.01, "loss": 1.9856, "step": 40080 }, { "epoch": 4.111920393926959, "grad_norm": 0.05328166112303734, "learning_rate": 0.01, "loss": 2.0163, "step": 40083 }, { "epoch": 4.112228149363972, "grad_norm": 0.0920194610953331, "learning_rate": 0.01, "loss": 2.0242, "step": 40086 }, { "epoch": 4.112535904800985, "grad_norm": 0.09387510269880295, "learning_rate": 0.01, "loss": 1.9928, "step": 40089 }, { "epoch": 4.112843660237997, "grad_norm": 0.05598600208759308, "learning_rate": 0.01, "loss": 2.0063, "step": 40092 }, { "epoch": 4.11315141567501, "grad_norm": 0.07474517822265625, "learning_rate": 0.01, "loss": 2.026, "step": 40095 }, { "epoch": 4.113459171112023, "grad_norm": 0.05041923001408577, "learning_rate": 0.01, "loss": 2.0211, "step": 40098 }, { "epoch": 4.113766926549036, "grad_norm": 0.09958838671445847, "learning_rate": 0.01, "loss": 2.007, "step": 40101 }, { "epoch": 4.1140746819860485, "grad_norm": 0.07080104202032089, "learning_rate": 0.01, "loss": 2.0061, "step": 40104 }, { "epoch": 4.114382437423061, "grad_norm": 0.04018507897853851, "learning_rate": 0.01, "loss": 2.0279, "step": 40107 }, { "epoch": 4.114690192860074, "grad_norm": 0.034507866948843, "learning_rate": 0.01, "loss": 1.995, "step": 40110 }, { "epoch": 4.114997948297087, "grad_norm": 0.09373819082975388, "learning_rate": 0.01, "loss": 1.9992, "step": 40113 }, { "epoch": 4.1153057037341, "grad_norm": 0.09473507106304169, "learning_rate": 0.01, "loss": 2.0374, "step": 40116 }, { "epoch": 4.115613459171112, "grad_norm": 0.06728008389472961, "learning_rate": 0.01, "loss": 2.0208, "step": 40119 }, { "epoch": 4.115921214608124, "grad_norm": 0.03551316261291504, "learning_rate": 0.01, "loss": 2.0162, "step": 40122 }, { "epoch": 4.116228970045137, "grad_norm": 0.048464663326740265, "learning_rate": 0.01, "loss": 1.9957, "step": 40125 }, { "epoch": 4.11653672548215, "grad_norm": 0.06069257855415344, "learning_rate": 0.01, "loss": 2.0113, "step": 40128 }, { "epoch": 4.116844480919163, "grad_norm": 0.04196292161941528, "learning_rate": 0.01, "loss": 2.0046, "step": 40131 }, { "epoch": 4.117152236356175, "grad_norm": 0.056623879820108414, "learning_rate": 0.01, "loss": 1.9986, "step": 40134 }, { "epoch": 4.117459991793188, "grad_norm": 0.04838422313332558, "learning_rate": 0.01, "loss": 2.0206, "step": 40137 }, { "epoch": 4.117767747230201, "grad_norm": 0.05446647107601166, "learning_rate": 0.01, "loss": 2.0209, "step": 40140 }, { "epoch": 4.118075502667214, "grad_norm": 0.04145834967494011, "learning_rate": 0.01, "loss": 2.0121, "step": 40143 }, { "epoch": 4.118383258104227, "grad_norm": 0.07565242797136307, "learning_rate": 0.01, "loss": 2.0105, "step": 40146 }, { "epoch": 4.118691013541239, "grad_norm": 0.16024605929851532, "learning_rate": 0.01, "loss": 1.9793, "step": 40149 }, { "epoch": 4.118998768978252, "grad_norm": 0.10673293471336365, "learning_rate": 0.01, "loss": 2.0035, "step": 40152 }, { "epoch": 4.119306524415265, "grad_norm": 0.04822041094303131, "learning_rate": 0.01, "loss": 2.0003, "step": 40155 }, { "epoch": 4.119614279852278, "grad_norm": 0.05074264481663704, "learning_rate": 0.01, "loss": 2.0096, "step": 40158 }, { "epoch": 4.1199220352892905, "grad_norm": 0.0467289499938488, "learning_rate": 0.01, "loss": 1.9832, "step": 40161 }, { "epoch": 4.120229790726303, "grad_norm": 0.06745675206184387, "learning_rate": 0.01, "loss": 2.0095, "step": 40164 }, { "epoch": 4.120537546163315, "grad_norm": 0.05463431030511856, "learning_rate": 0.01, "loss": 2.0028, "step": 40167 }, { "epoch": 4.120845301600328, "grad_norm": 0.057621899992227554, "learning_rate": 0.01, "loss": 2.0032, "step": 40170 }, { "epoch": 4.121153057037341, "grad_norm": 0.03955928236246109, "learning_rate": 0.01, "loss": 1.9945, "step": 40173 }, { "epoch": 4.1214608124743535, "grad_norm": 0.09852177649736404, "learning_rate": 0.01, "loss": 2.0124, "step": 40176 }, { "epoch": 4.121768567911366, "grad_norm": 0.12283693999052048, "learning_rate": 0.01, "loss": 1.9923, "step": 40179 }, { "epoch": 4.122076323348379, "grad_norm": 0.046991802752017975, "learning_rate": 0.01, "loss": 2.0032, "step": 40182 }, { "epoch": 4.122384078785392, "grad_norm": 0.032669685781002045, "learning_rate": 0.01, "loss": 2.005, "step": 40185 }, { "epoch": 4.122691834222405, "grad_norm": 0.029325902462005615, "learning_rate": 0.01, "loss": 2.0086, "step": 40188 }, { "epoch": 4.122999589659417, "grad_norm": 0.05086469277739525, "learning_rate": 0.01, "loss": 1.986, "step": 40191 }, { "epoch": 4.12330734509643, "grad_norm": 0.05128968507051468, "learning_rate": 0.01, "loss": 1.983, "step": 40194 }, { "epoch": 4.123615100533443, "grad_norm": 0.06649978458881378, "learning_rate": 0.01, "loss": 2.0385, "step": 40197 }, { "epoch": 4.123922855970456, "grad_norm": 0.07910530269145966, "learning_rate": 0.01, "loss": 2.0116, "step": 40200 }, { "epoch": 4.1242306114074685, "grad_norm": 0.07179386913776398, "learning_rate": 0.01, "loss": 1.9867, "step": 40203 }, { "epoch": 4.124538366844481, "grad_norm": 0.04028356075286865, "learning_rate": 0.01, "loss": 1.9831, "step": 40206 }, { "epoch": 4.124846122281494, "grad_norm": 0.14858920872211456, "learning_rate": 0.01, "loss": 1.9975, "step": 40209 }, { "epoch": 4.125153877718506, "grad_norm": 0.0865345299243927, "learning_rate": 0.01, "loss": 1.9879, "step": 40212 }, { "epoch": 4.125461633155519, "grad_norm": 0.04412895813584328, "learning_rate": 0.01, "loss": 1.9943, "step": 40215 }, { "epoch": 4.1257693885925315, "grad_norm": 0.0482075996696949, "learning_rate": 0.01, "loss": 2.013, "step": 40218 }, { "epoch": 4.126077144029544, "grad_norm": 0.0381084606051445, "learning_rate": 0.01, "loss": 1.9939, "step": 40221 }, { "epoch": 4.126384899466557, "grad_norm": 0.03551500290632248, "learning_rate": 0.01, "loss": 2.0002, "step": 40224 }, { "epoch": 4.12669265490357, "grad_norm": 0.029879622161388397, "learning_rate": 0.01, "loss": 1.9924, "step": 40227 }, { "epoch": 4.127000410340583, "grad_norm": 0.04461509361863136, "learning_rate": 0.01, "loss": 2.006, "step": 40230 }, { "epoch": 4.127308165777595, "grad_norm": 0.08034146577119827, "learning_rate": 0.01, "loss": 1.9929, "step": 40233 }, { "epoch": 4.127615921214608, "grad_norm": 0.03752557560801506, "learning_rate": 0.01, "loss": 1.9943, "step": 40236 }, { "epoch": 4.127923676651621, "grad_norm": 0.09391690045595169, "learning_rate": 0.01, "loss": 2.0141, "step": 40239 }, { "epoch": 4.128231432088634, "grad_norm": 0.10475057363510132, "learning_rate": 0.01, "loss": 2.0169, "step": 40242 }, { "epoch": 4.1285391875256465, "grad_norm": 0.05833762139081955, "learning_rate": 0.01, "loss": 2.0309, "step": 40245 }, { "epoch": 4.128846942962659, "grad_norm": 0.07527395337820053, "learning_rate": 0.01, "loss": 2.0051, "step": 40248 }, { "epoch": 4.129154698399672, "grad_norm": 0.037238337099552155, "learning_rate": 0.01, "loss": 2.0569, "step": 40251 }, { "epoch": 4.129462453836685, "grad_norm": 0.08547326922416687, "learning_rate": 0.01, "loss": 1.9589, "step": 40254 }, { "epoch": 4.129770209273697, "grad_norm": 0.04352400079369545, "learning_rate": 0.01, "loss": 2.0079, "step": 40257 }, { "epoch": 4.1300779647107095, "grad_norm": 0.03715146332979202, "learning_rate": 0.01, "loss": 2.005, "step": 40260 }, { "epoch": 4.130385720147722, "grad_norm": 0.03312180936336517, "learning_rate": 0.01, "loss": 1.9865, "step": 40263 }, { "epoch": 4.130693475584735, "grad_norm": 0.14868606626987457, "learning_rate": 0.01, "loss": 2.016, "step": 40266 }, { "epoch": 4.131001231021748, "grad_norm": 0.0412895530462265, "learning_rate": 0.01, "loss": 2.0146, "step": 40269 }, { "epoch": 4.131308986458761, "grad_norm": 0.059335898607969284, "learning_rate": 0.01, "loss": 2.0028, "step": 40272 }, { "epoch": 4.131616741895773, "grad_norm": 0.08401087671518326, "learning_rate": 0.01, "loss": 2.0027, "step": 40275 }, { "epoch": 4.131924497332786, "grad_norm": 0.09615478664636612, "learning_rate": 0.01, "loss": 2.0188, "step": 40278 }, { "epoch": 4.132232252769799, "grad_norm": 0.06387398391962051, "learning_rate": 0.01, "loss": 2.0117, "step": 40281 }, { "epoch": 4.132540008206812, "grad_norm": 0.07762457430362701, "learning_rate": 0.01, "loss": 2.0155, "step": 40284 }, { "epoch": 4.132847763643825, "grad_norm": 0.08931776881217957, "learning_rate": 0.01, "loss": 2.0027, "step": 40287 }, { "epoch": 4.133155519080837, "grad_norm": 0.03763844072818756, "learning_rate": 0.01, "loss": 1.9951, "step": 40290 }, { "epoch": 4.13346327451785, "grad_norm": 0.06352074444293976, "learning_rate": 0.01, "loss": 2.0431, "step": 40293 }, { "epoch": 4.133771029954863, "grad_norm": 0.04778370261192322, "learning_rate": 0.01, "loss": 1.9819, "step": 40296 }, { "epoch": 4.134078785391876, "grad_norm": 0.06776099652051926, "learning_rate": 0.01, "loss": 1.9962, "step": 40299 }, { "epoch": 4.134386540828888, "grad_norm": 0.072423554956913, "learning_rate": 0.01, "loss": 2.0301, "step": 40302 }, { "epoch": 4.1346942962659, "grad_norm": 0.10224346071481705, "learning_rate": 0.01, "loss": 2.0101, "step": 40305 }, { "epoch": 4.135002051702913, "grad_norm": 0.045054350048303604, "learning_rate": 0.01, "loss": 1.9851, "step": 40308 }, { "epoch": 4.135309807139926, "grad_norm": 0.10343952476978302, "learning_rate": 0.01, "loss": 2.0008, "step": 40311 }, { "epoch": 4.135617562576939, "grad_norm": 0.12315492331981659, "learning_rate": 0.01, "loss": 2.0195, "step": 40314 }, { "epoch": 4.1359253180139515, "grad_norm": 0.06620458513498306, "learning_rate": 0.01, "loss": 2.0019, "step": 40317 }, { "epoch": 4.136233073450964, "grad_norm": 0.06378698348999023, "learning_rate": 0.01, "loss": 2.0046, "step": 40320 }, { "epoch": 4.136540828887977, "grad_norm": 0.06669528782367706, "learning_rate": 0.01, "loss": 2.022, "step": 40323 }, { "epoch": 4.13684858432499, "grad_norm": 0.07347302883863449, "learning_rate": 0.01, "loss": 2.0038, "step": 40326 }, { "epoch": 4.137156339762003, "grad_norm": 0.06786450743675232, "learning_rate": 0.01, "loss": 2.0327, "step": 40329 }, { "epoch": 4.137464095199015, "grad_norm": 0.10124550014734268, "learning_rate": 0.01, "loss": 2.0072, "step": 40332 }, { "epoch": 4.137771850636028, "grad_norm": 0.07563018798828125, "learning_rate": 0.01, "loss": 1.9818, "step": 40335 }, { "epoch": 4.138079606073041, "grad_norm": 0.08920261263847351, "learning_rate": 0.01, "loss": 2.032, "step": 40338 }, { "epoch": 4.138387361510054, "grad_norm": 0.07678209245204926, "learning_rate": 0.01, "loss": 2.0099, "step": 40341 }, { "epoch": 4.1386951169470665, "grad_norm": 0.03569749370217323, "learning_rate": 0.01, "loss": 2.0312, "step": 40344 }, { "epoch": 4.139002872384078, "grad_norm": 0.046265408396720886, "learning_rate": 0.01, "loss": 2.0071, "step": 40347 }, { "epoch": 4.139310627821091, "grad_norm": 0.04766137897968292, "learning_rate": 0.01, "loss": 2.0042, "step": 40350 }, { "epoch": 4.139618383258104, "grad_norm": 0.09092257916927338, "learning_rate": 0.01, "loss": 2.0095, "step": 40353 }, { "epoch": 4.139926138695117, "grad_norm": 0.05511738359928131, "learning_rate": 0.01, "loss": 2.019, "step": 40356 }, { "epoch": 4.1402338941321295, "grad_norm": 0.04944039136171341, "learning_rate": 0.01, "loss": 1.999, "step": 40359 }, { "epoch": 4.140541649569142, "grad_norm": 0.04905549809336662, "learning_rate": 0.01, "loss": 2.025, "step": 40362 }, { "epoch": 4.140849405006155, "grad_norm": 0.07323266565799713, "learning_rate": 0.01, "loss": 2.0044, "step": 40365 }, { "epoch": 4.141157160443168, "grad_norm": 0.07263099402189255, "learning_rate": 0.01, "loss": 2.016, "step": 40368 }, { "epoch": 4.141464915880181, "grad_norm": 0.05902627855539322, "learning_rate": 0.01, "loss": 1.9856, "step": 40371 }, { "epoch": 4.141772671317193, "grad_norm": 0.0597088448703289, "learning_rate": 0.01, "loss": 2.0105, "step": 40374 }, { "epoch": 4.142080426754206, "grad_norm": 0.04829247295856476, "learning_rate": 0.01, "loss": 2.0114, "step": 40377 }, { "epoch": 4.142388182191219, "grad_norm": 0.10093509405851364, "learning_rate": 0.01, "loss": 2.0092, "step": 40380 }, { "epoch": 4.142695937628232, "grad_norm": 0.06805883347988129, "learning_rate": 0.01, "loss": 1.9984, "step": 40383 }, { "epoch": 4.1430036930652445, "grad_norm": 0.05700362101197243, "learning_rate": 0.01, "loss": 2.0166, "step": 40386 }, { "epoch": 4.143311448502257, "grad_norm": 0.10143036395311356, "learning_rate": 0.01, "loss": 2.0133, "step": 40389 }, { "epoch": 4.143619203939269, "grad_norm": 0.0752461776137352, "learning_rate": 0.01, "loss": 2.0011, "step": 40392 }, { "epoch": 4.143926959376282, "grad_norm": 0.10534628480672836, "learning_rate": 0.01, "loss": 1.9837, "step": 40395 }, { "epoch": 4.144234714813295, "grad_norm": 0.042690735310316086, "learning_rate": 0.01, "loss": 1.9985, "step": 40398 }, { "epoch": 4.1445424702503075, "grad_norm": 0.04265071824193001, "learning_rate": 0.01, "loss": 1.9943, "step": 40401 }, { "epoch": 4.14485022568732, "grad_norm": 0.04449966922402382, "learning_rate": 0.01, "loss": 1.9964, "step": 40404 }, { "epoch": 4.145157981124333, "grad_norm": 0.06278623640537262, "learning_rate": 0.01, "loss": 1.9997, "step": 40407 }, { "epoch": 4.145465736561346, "grad_norm": 0.09633596986532211, "learning_rate": 0.01, "loss": 2.0184, "step": 40410 }, { "epoch": 4.145773491998359, "grad_norm": 0.06465546041727066, "learning_rate": 0.01, "loss": 2.0004, "step": 40413 }, { "epoch": 4.146081247435371, "grad_norm": 0.07929039746522903, "learning_rate": 0.01, "loss": 2.0275, "step": 40416 }, { "epoch": 4.146389002872384, "grad_norm": 0.04692156985402107, "learning_rate": 0.01, "loss": 1.9896, "step": 40419 }, { "epoch": 4.146696758309397, "grad_norm": 0.09933728724718094, "learning_rate": 0.01, "loss": 2.0155, "step": 40422 }, { "epoch": 4.14700451374641, "grad_norm": 0.07595301419496536, "learning_rate": 0.01, "loss": 2.0262, "step": 40425 }, { "epoch": 4.1473122691834226, "grad_norm": 0.06487558037042618, "learning_rate": 0.01, "loss": 2.0018, "step": 40428 }, { "epoch": 4.147620024620435, "grad_norm": 0.06612923741340637, "learning_rate": 0.01, "loss": 1.9956, "step": 40431 }, { "epoch": 4.147927780057448, "grad_norm": 0.08177967369556427, "learning_rate": 0.01, "loss": 2.0085, "step": 40434 }, { "epoch": 4.14823553549446, "grad_norm": 0.12479262799024582, "learning_rate": 0.01, "loss": 2.0037, "step": 40437 }, { "epoch": 4.148543290931473, "grad_norm": 0.053598951548337936, "learning_rate": 0.01, "loss": 2.0029, "step": 40440 }, { "epoch": 4.148851046368486, "grad_norm": 0.05604923143982887, "learning_rate": 0.01, "loss": 1.9985, "step": 40443 }, { "epoch": 4.149158801805498, "grad_norm": 0.044350240379571915, "learning_rate": 0.01, "loss": 2.0065, "step": 40446 }, { "epoch": 4.149466557242511, "grad_norm": 0.03988419473171234, "learning_rate": 0.01, "loss": 1.9988, "step": 40449 }, { "epoch": 4.149774312679524, "grad_norm": 0.03739091381430626, "learning_rate": 0.01, "loss": 2.0002, "step": 40452 }, { "epoch": 4.150082068116537, "grad_norm": 0.0570409819483757, "learning_rate": 0.01, "loss": 2.0044, "step": 40455 }, { "epoch": 4.1503898235535495, "grad_norm": 0.11358576267957687, "learning_rate": 0.01, "loss": 2.0115, "step": 40458 }, { "epoch": 4.150697578990562, "grad_norm": 0.08568841218948364, "learning_rate": 0.01, "loss": 2.0213, "step": 40461 }, { "epoch": 4.151005334427575, "grad_norm": 0.10045678168535233, "learning_rate": 0.01, "loss": 2.0108, "step": 40464 }, { "epoch": 4.151313089864588, "grad_norm": 0.0720805749297142, "learning_rate": 0.01, "loss": 2.0028, "step": 40467 }, { "epoch": 4.151620845301601, "grad_norm": 0.136969655752182, "learning_rate": 0.01, "loss": 2.0117, "step": 40470 }, { "epoch": 4.151928600738613, "grad_norm": 0.1128631979227066, "learning_rate": 0.01, "loss": 2.0143, "step": 40473 }, { "epoch": 4.152236356175626, "grad_norm": 0.06798527389764786, "learning_rate": 0.01, "loss": 1.9935, "step": 40476 }, { "epoch": 4.152544111612638, "grad_norm": 0.04990516975522041, "learning_rate": 0.01, "loss": 2.0104, "step": 40479 }, { "epoch": 4.152851867049651, "grad_norm": 0.0540931336581707, "learning_rate": 0.01, "loss": 2.0031, "step": 40482 }, { "epoch": 4.153159622486664, "grad_norm": 0.06866136193275452, "learning_rate": 0.01, "loss": 1.9815, "step": 40485 }, { "epoch": 4.153467377923676, "grad_norm": 0.05682525411248207, "learning_rate": 0.01, "loss": 2.009, "step": 40488 }, { "epoch": 4.153775133360689, "grad_norm": 0.07948753237724304, "learning_rate": 0.01, "loss": 1.9954, "step": 40491 }, { "epoch": 4.154082888797702, "grad_norm": 0.10209072381258011, "learning_rate": 0.01, "loss": 2.0219, "step": 40494 }, { "epoch": 4.154390644234715, "grad_norm": 0.04499402642250061, "learning_rate": 0.01, "loss": 2.0006, "step": 40497 }, { "epoch": 4.1546983996717275, "grad_norm": 0.03882760554552078, "learning_rate": 0.01, "loss": 1.9914, "step": 40500 }, { "epoch": 4.15500615510874, "grad_norm": 0.1203235313296318, "learning_rate": 0.01, "loss": 2.0149, "step": 40503 }, { "epoch": 4.155313910545753, "grad_norm": 0.10118594765663147, "learning_rate": 0.01, "loss": 2.0209, "step": 40506 }, { "epoch": 4.155621665982766, "grad_norm": 0.057894591242074966, "learning_rate": 0.01, "loss": 1.9982, "step": 40509 }, { "epoch": 4.155929421419779, "grad_norm": 0.09510418772697449, "learning_rate": 0.01, "loss": 2.0122, "step": 40512 }, { "epoch": 4.156237176856791, "grad_norm": 0.06964008510112762, "learning_rate": 0.01, "loss": 1.9887, "step": 40515 }, { "epoch": 4.156544932293804, "grad_norm": 0.04545210301876068, "learning_rate": 0.01, "loss": 2.0019, "step": 40518 }, { "epoch": 4.156852687730817, "grad_norm": 0.03616971895098686, "learning_rate": 0.01, "loss": 1.9781, "step": 40521 }, { "epoch": 4.157160443167829, "grad_norm": 0.038192588835954666, "learning_rate": 0.01, "loss": 1.9969, "step": 40524 }, { "epoch": 4.157468198604842, "grad_norm": 0.046076469123363495, "learning_rate": 0.01, "loss": 2.0271, "step": 40527 }, { "epoch": 4.157775954041854, "grad_norm": 0.08816591650247574, "learning_rate": 0.01, "loss": 2.0141, "step": 40530 }, { "epoch": 4.158083709478867, "grad_norm": 0.10513858497142792, "learning_rate": 0.01, "loss": 1.9838, "step": 40533 }, { "epoch": 4.15839146491588, "grad_norm": 0.05273423343896866, "learning_rate": 0.01, "loss": 1.9984, "step": 40536 }, { "epoch": 4.158699220352893, "grad_norm": 0.11179885268211365, "learning_rate": 0.01, "loss": 2.0221, "step": 40539 }, { "epoch": 4.1590069757899055, "grad_norm": 0.049401313066482544, "learning_rate": 0.01, "loss": 2.0208, "step": 40542 }, { "epoch": 4.159314731226918, "grad_norm": 0.04798784479498863, "learning_rate": 0.01, "loss": 1.9949, "step": 40545 }, { "epoch": 4.159622486663931, "grad_norm": 0.04317957162857056, "learning_rate": 0.01, "loss": 2.018, "step": 40548 }, { "epoch": 4.159930242100944, "grad_norm": 0.031536102294921875, "learning_rate": 0.01, "loss": 2.0026, "step": 40551 }, { "epoch": 4.160237997537957, "grad_norm": 0.09016025066375732, "learning_rate": 0.01, "loss": 2.0033, "step": 40554 }, { "epoch": 4.160545752974969, "grad_norm": 0.048575472086668015, "learning_rate": 0.01, "loss": 1.9979, "step": 40557 }, { "epoch": 4.160853508411982, "grad_norm": 0.1236988827586174, "learning_rate": 0.01, "loss": 2.0054, "step": 40560 }, { "epoch": 4.161161263848995, "grad_norm": 0.13077518343925476, "learning_rate": 0.01, "loss": 2.0178, "step": 40563 }, { "epoch": 4.161469019286008, "grad_norm": 0.10104335844516754, "learning_rate": 0.01, "loss": 1.998, "step": 40566 }, { "epoch": 4.16177677472302, "grad_norm": 0.08807270973920822, "learning_rate": 0.01, "loss": 2.0259, "step": 40569 }, { "epoch": 4.162084530160032, "grad_norm": 0.05569664016366005, "learning_rate": 0.01, "loss": 2.0218, "step": 40572 }, { "epoch": 4.162392285597045, "grad_norm": 0.04396286606788635, "learning_rate": 0.01, "loss": 2.0121, "step": 40575 }, { "epoch": 4.162700041034058, "grad_norm": 0.041279278695583344, "learning_rate": 0.01, "loss": 2.0012, "step": 40578 }, { "epoch": 4.163007796471071, "grad_norm": 0.05588890612125397, "learning_rate": 0.01, "loss": 1.9789, "step": 40581 }, { "epoch": 4.163315551908084, "grad_norm": 0.03629462048411369, "learning_rate": 0.01, "loss": 1.9929, "step": 40584 }, { "epoch": 4.163623307345096, "grad_norm": 0.05996616184711456, "learning_rate": 0.01, "loss": 1.9962, "step": 40587 }, { "epoch": 4.163931062782109, "grad_norm": 0.0385168232023716, "learning_rate": 0.01, "loss": 2.019, "step": 40590 }, { "epoch": 4.164238818219122, "grad_norm": 0.04890631511807442, "learning_rate": 0.01, "loss": 1.9967, "step": 40593 }, { "epoch": 4.164546573656135, "grad_norm": 0.06312946230173111, "learning_rate": 0.01, "loss": 2.0192, "step": 40596 }, { "epoch": 4.1648543290931475, "grad_norm": 0.05250309780240059, "learning_rate": 0.01, "loss": 2.0032, "step": 40599 }, { "epoch": 4.16516208453016, "grad_norm": 0.05581989511847496, "learning_rate": 0.01, "loss": 2.0196, "step": 40602 }, { "epoch": 4.165469839967173, "grad_norm": 0.033916786313056946, "learning_rate": 0.01, "loss": 1.9937, "step": 40605 }, { "epoch": 4.165777595404186, "grad_norm": 0.11886032670736313, "learning_rate": 0.01, "loss": 2.0084, "step": 40608 }, { "epoch": 4.166085350841199, "grad_norm": 0.06368937343358994, "learning_rate": 0.01, "loss": 2.014, "step": 40611 }, { "epoch": 4.1663931062782105, "grad_norm": 0.04808172211050987, "learning_rate": 0.01, "loss": 2.0084, "step": 40614 }, { "epoch": 4.166700861715223, "grad_norm": 0.03308833763003349, "learning_rate": 0.01, "loss": 1.9924, "step": 40617 }, { "epoch": 4.167008617152236, "grad_norm": 0.041934408247470856, "learning_rate": 0.01, "loss": 1.9891, "step": 40620 }, { "epoch": 4.167316372589249, "grad_norm": 0.0798778086900711, "learning_rate": 0.01, "loss": 2.0101, "step": 40623 }, { "epoch": 4.167624128026262, "grad_norm": 0.055231209844350815, "learning_rate": 0.01, "loss": 1.9722, "step": 40626 }, { "epoch": 4.167931883463274, "grad_norm": 0.04620687663555145, "learning_rate": 0.01, "loss": 1.9981, "step": 40629 }, { "epoch": 4.168239638900287, "grad_norm": 0.09074298292398453, "learning_rate": 0.01, "loss": 2.0001, "step": 40632 }, { "epoch": 4.1685473943373, "grad_norm": 0.09173654764890671, "learning_rate": 0.01, "loss": 2.017, "step": 40635 }, { "epoch": 4.168855149774313, "grad_norm": 0.052037082612514496, "learning_rate": 0.01, "loss": 2.0113, "step": 40638 }, { "epoch": 4.1691629052113255, "grad_norm": 0.07793330401182175, "learning_rate": 0.01, "loss": 2.0135, "step": 40641 }, { "epoch": 4.169470660648338, "grad_norm": 0.12099308520555496, "learning_rate": 0.01, "loss": 1.9946, "step": 40644 }, { "epoch": 4.169778416085351, "grad_norm": 0.08884571492671967, "learning_rate": 0.01, "loss": 2.015, "step": 40647 }, { "epoch": 4.170086171522364, "grad_norm": 0.07165110856294632, "learning_rate": 0.01, "loss": 1.9748, "step": 40650 }, { "epoch": 4.170393926959377, "grad_norm": 0.10293866693973541, "learning_rate": 0.01, "loss": 2.004, "step": 40653 }, { "epoch": 4.170701682396389, "grad_norm": 0.09834318608045578, "learning_rate": 0.01, "loss": 2.0313, "step": 40656 }, { "epoch": 4.171009437833401, "grad_norm": 0.047723252326250076, "learning_rate": 0.01, "loss": 1.9854, "step": 40659 }, { "epoch": 4.171317193270414, "grad_norm": 0.05522621423006058, "learning_rate": 0.01, "loss": 2.0193, "step": 40662 }, { "epoch": 4.171624948707427, "grad_norm": 0.03939025476574898, "learning_rate": 0.01, "loss": 1.9989, "step": 40665 }, { "epoch": 4.17193270414444, "grad_norm": 0.03266485407948494, "learning_rate": 0.01, "loss": 2.0121, "step": 40668 }, { "epoch": 4.172240459581452, "grad_norm": 0.033200979232788086, "learning_rate": 0.01, "loss": 1.9716, "step": 40671 }, { "epoch": 4.172548215018465, "grad_norm": 0.07104014605283737, "learning_rate": 0.01, "loss": 1.9833, "step": 40674 }, { "epoch": 4.172855970455478, "grad_norm": 0.0823940560221672, "learning_rate": 0.01, "loss": 1.9914, "step": 40677 }, { "epoch": 4.173163725892491, "grad_norm": 0.062255583703517914, "learning_rate": 0.01, "loss": 2.0182, "step": 40680 }, { "epoch": 4.1734714813295035, "grad_norm": 0.13120077550411224, "learning_rate": 0.01, "loss": 1.9878, "step": 40683 }, { "epoch": 4.173779236766516, "grad_norm": 0.07655888795852661, "learning_rate": 0.01, "loss": 2.0142, "step": 40686 }, { "epoch": 4.174086992203529, "grad_norm": 0.07436554878950119, "learning_rate": 0.01, "loss": 1.9829, "step": 40689 }, { "epoch": 4.174394747640542, "grad_norm": 0.052044034004211426, "learning_rate": 0.01, "loss": 2.0017, "step": 40692 }, { "epoch": 4.174702503077555, "grad_norm": 0.0385587215423584, "learning_rate": 0.01, "loss": 1.9918, "step": 40695 }, { "epoch": 4.175010258514567, "grad_norm": 0.04062971845269203, "learning_rate": 0.01, "loss": 2.0106, "step": 40698 }, { "epoch": 4.17531801395158, "grad_norm": 0.05078468099236488, "learning_rate": 0.01, "loss": 2.0044, "step": 40701 }, { "epoch": 4.175625769388592, "grad_norm": 0.05820399895310402, "learning_rate": 0.01, "loss": 1.9979, "step": 40704 }, { "epoch": 4.175933524825605, "grad_norm": 0.05630197748541832, "learning_rate": 0.01, "loss": 2.0046, "step": 40707 }, { "epoch": 4.176241280262618, "grad_norm": 0.047048747539520264, "learning_rate": 0.01, "loss": 2.0068, "step": 40710 }, { "epoch": 4.17654903569963, "grad_norm": 0.09604250639677048, "learning_rate": 0.01, "loss": 2.0011, "step": 40713 }, { "epoch": 4.176856791136643, "grad_norm": 0.06629175692796707, "learning_rate": 0.01, "loss": 1.9967, "step": 40716 }, { "epoch": 4.177164546573656, "grad_norm": 0.1041264608502388, "learning_rate": 0.01, "loss": 2.0302, "step": 40719 }, { "epoch": 4.177472302010669, "grad_norm": 0.10279016941785812, "learning_rate": 0.01, "loss": 1.9811, "step": 40722 }, { "epoch": 4.1777800574476815, "grad_norm": 0.047386229038238525, "learning_rate": 0.01, "loss": 1.9901, "step": 40725 }, { "epoch": 4.178087812884694, "grad_norm": 0.03940087929368019, "learning_rate": 0.01, "loss": 2.0454, "step": 40728 }, { "epoch": 4.178395568321707, "grad_norm": 0.1065608337521553, "learning_rate": 0.01, "loss": 2.0117, "step": 40731 }, { "epoch": 4.17870332375872, "grad_norm": 0.03307456895709038, "learning_rate": 0.01, "loss": 1.9962, "step": 40734 }, { "epoch": 4.179011079195733, "grad_norm": 0.044952236115932465, "learning_rate": 0.01, "loss": 1.9888, "step": 40737 }, { "epoch": 4.1793188346327454, "grad_norm": 0.04699942097067833, "learning_rate": 0.01, "loss": 1.9779, "step": 40740 }, { "epoch": 4.179626590069758, "grad_norm": 0.04086962714791298, "learning_rate": 0.01, "loss": 2.0037, "step": 40743 }, { "epoch": 4.179934345506771, "grad_norm": 0.04364610090851784, "learning_rate": 0.01, "loss": 2.0134, "step": 40746 }, { "epoch": 4.180242100943783, "grad_norm": 0.13256701827049255, "learning_rate": 0.01, "loss": 2.0185, "step": 40749 }, { "epoch": 4.180549856380796, "grad_norm": 0.1418839991092682, "learning_rate": 0.01, "loss": 1.9959, "step": 40752 }, { "epoch": 4.1808576118178085, "grad_norm": 0.10980424284934998, "learning_rate": 0.01, "loss": 2.0026, "step": 40755 }, { "epoch": 4.181165367254821, "grad_norm": 0.11712696403265, "learning_rate": 0.01, "loss": 2.0136, "step": 40758 }, { "epoch": 4.181473122691834, "grad_norm": 0.04676150158047676, "learning_rate": 0.01, "loss": 2.0204, "step": 40761 }, { "epoch": 4.181780878128847, "grad_norm": 0.06010957807302475, "learning_rate": 0.01, "loss": 1.9897, "step": 40764 }, { "epoch": 4.18208863356586, "grad_norm": 0.04058792442083359, "learning_rate": 0.01, "loss": 1.9985, "step": 40767 }, { "epoch": 4.182396389002872, "grad_norm": 0.05905113369226456, "learning_rate": 0.01, "loss": 1.9825, "step": 40770 }, { "epoch": 4.182704144439885, "grad_norm": 0.031559672206640244, "learning_rate": 0.01, "loss": 1.9935, "step": 40773 }, { "epoch": 4.183011899876898, "grad_norm": 0.03369845449924469, "learning_rate": 0.01, "loss": 2.0148, "step": 40776 }, { "epoch": 4.183319655313911, "grad_norm": 0.03323826193809509, "learning_rate": 0.01, "loss": 2.0067, "step": 40779 }, { "epoch": 4.1836274107509235, "grad_norm": 0.09272177517414093, "learning_rate": 0.01, "loss": 1.9888, "step": 40782 }, { "epoch": 4.183935166187936, "grad_norm": 0.12024440616369247, "learning_rate": 0.01, "loss": 1.9638, "step": 40785 }, { "epoch": 4.184242921624949, "grad_norm": 0.06225709244608879, "learning_rate": 0.01, "loss": 1.9923, "step": 40788 }, { "epoch": 4.184550677061962, "grad_norm": 0.07221471518278122, "learning_rate": 0.01, "loss": 2.015, "step": 40791 }, { "epoch": 4.184858432498974, "grad_norm": 0.05865592509508133, "learning_rate": 0.01, "loss": 2.026, "step": 40794 }, { "epoch": 4.1851661879359865, "grad_norm": 0.05323261767625809, "learning_rate": 0.01, "loss": 1.9944, "step": 40797 }, { "epoch": 4.185473943372999, "grad_norm": 0.0738186463713646, "learning_rate": 0.01, "loss": 1.9985, "step": 40800 }, { "epoch": 4.185781698810012, "grad_norm": 0.05065621808171272, "learning_rate": 0.01, "loss": 2.0006, "step": 40803 }, { "epoch": 4.186089454247025, "grad_norm": 0.07014346867799759, "learning_rate": 0.01, "loss": 1.9919, "step": 40806 }, { "epoch": 4.186397209684038, "grad_norm": 0.09202650934457779, "learning_rate": 0.01, "loss": 2.0173, "step": 40809 }, { "epoch": 4.18670496512105, "grad_norm": 0.07297877967357635, "learning_rate": 0.01, "loss": 2.0011, "step": 40812 }, { "epoch": 4.187012720558063, "grad_norm": 0.11443314701318741, "learning_rate": 0.01, "loss": 2.0166, "step": 40815 }, { "epoch": 4.187320475995076, "grad_norm": 0.10481325536966324, "learning_rate": 0.01, "loss": 1.989, "step": 40818 }, { "epoch": 4.187628231432089, "grad_norm": 0.03423392400145531, "learning_rate": 0.01, "loss": 1.9982, "step": 40821 }, { "epoch": 4.1879359868691015, "grad_norm": 0.03760581091046333, "learning_rate": 0.01, "loss": 2.0011, "step": 40824 }, { "epoch": 4.188243742306114, "grad_norm": 0.04355360567569733, "learning_rate": 0.01, "loss": 1.9884, "step": 40827 }, { "epoch": 4.188551497743127, "grad_norm": 0.07982810586690903, "learning_rate": 0.01, "loss": 2.0474, "step": 40830 }, { "epoch": 4.18885925318014, "grad_norm": 0.17527154088020325, "learning_rate": 0.01, "loss": 2.0216, "step": 40833 }, { "epoch": 4.189167008617153, "grad_norm": 0.09842000156641006, "learning_rate": 0.01, "loss": 2.0119, "step": 40836 }, { "epoch": 4.1894747640541645, "grad_norm": 0.04522009938955307, "learning_rate": 0.01, "loss": 2.0005, "step": 40839 }, { "epoch": 4.189782519491177, "grad_norm": 0.03711741417646408, "learning_rate": 0.01, "loss": 2.014, "step": 40842 }, { "epoch": 4.19009027492819, "grad_norm": 0.06014026701450348, "learning_rate": 0.01, "loss": 2.007, "step": 40845 }, { "epoch": 4.190398030365203, "grad_norm": 0.06271639466285706, "learning_rate": 0.01, "loss": 1.9824, "step": 40848 }, { "epoch": 4.190705785802216, "grad_norm": 0.09300049394369125, "learning_rate": 0.01, "loss": 1.9865, "step": 40851 }, { "epoch": 4.191013541239228, "grad_norm": 0.05301731079816818, "learning_rate": 0.01, "loss": 1.9758, "step": 40854 }, { "epoch": 4.191321296676241, "grad_norm": 0.031089186668395996, "learning_rate": 0.01, "loss": 1.9994, "step": 40857 }, { "epoch": 4.191629052113254, "grad_norm": 0.04351799562573433, "learning_rate": 0.01, "loss": 2.0165, "step": 40860 }, { "epoch": 4.191936807550267, "grad_norm": 0.03952482342720032, "learning_rate": 0.01, "loss": 1.9941, "step": 40863 }, { "epoch": 4.1922445629872795, "grad_norm": 0.09664411842823029, "learning_rate": 0.01, "loss": 2.0385, "step": 40866 }, { "epoch": 4.192552318424292, "grad_norm": 0.11290398985147476, "learning_rate": 0.01, "loss": 1.9979, "step": 40869 }, { "epoch": 4.192860073861305, "grad_norm": 0.03346327319741249, "learning_rate": 0.01, "loss": 1.9883, "step": 40872 }, { "epoch": 4.193167829298318, "grad_norm": 0.07427312433719635, "learning_rate": 0.01, "loss": 1.981, "step": 40875 }, { "epoch": 4.193475584735331, "grad_norm": 0.053705740720033646, "learning_rate": 0.01, "loss": 1.9965, "step": 40878 }, { "epoch": 4.193783340172343, "grad_norm": 0.0523017942905426, "learning_rate": 0.01, "loss": 1.9998, "step": 40881 }, { "epoch": 4.194091095609355, "grad_norm": 0.056644510477781296, "learning_rate": 0.01, "loss": 2.0161, "step": 40884 }, { "epoch": 4.194398851046368, "grad_norm": 0.03145689144730568, "learning_rate": 0.01, "loss": 1.9953, "step": 40887 }, { "epoch": 4.194706606483381, "grad_norm": 0.05200685188174248, "learning_rate": 0.01, "loss": 2.0054, "step": 40890 }, { "epoch": 4.195014361920394, "grad_norm": 0.06475922465324402, "learning_rate": 0.01, "loss": 2.0003, "step": 40893 }, { "epoch": 4.1953221173574065, "grad_norm": 0.0805899053812027, "learning_rate": 0.01, "loss": 1.9886, "step": 40896 }, { "epoch": 4.195629872794419, "grad_norm": 0.09969832003116608, "learning_rate": 0.01, "loss": 2.0236, "step": 40899 }, { "epoch": 4.195937628231432, "grad_norm": 0.0634358674287796, "learning_rate": 0.01, "loss": 2.0091, "step": 40902 }, { "epoch": 4.196245383668445, "grad_norm": 0.09012358635663986, "learning_rate": 0.01, "loss": 2.0019, "step": 40905 }, { "epoch": 4.196553139105458, "grad_norm": 0.06891899555921555, "learning_rate": 0.01, "loss": 2.0084, "step": 40908 }, { "epoch": 4.19686089454247, "grad_norm": 0.07446330040693283, "learning_rate": 0.01, "loss": 2.0082, "step": 40911 }, { "epoch": 4.197168649979483, "grad_norm": 0.03457944840192795, "learning_rate": 0.01, "loss": 1.9777, "step": 40914 }, { "epoch": 4.197476405416496, "grad_norm": 0.03754154592752457, "learning_rate": 0.01, "loss": 1.9845, "step": 40917 }, { "epoch": 4.197784160853509, "grad_norm": 0.0626758560538292, "learning_rate": 0.01, "loss": 2.0333, "step": 40920 }, { "epoch": 4.1980919162905215, "grad_norm": 0.0431663803756237, "learning_rate": 0.01, "loss": 1.9997, "step": 40923 }, { "epoch": 4.198399671727534, "grad_norm": 0.059978071600198746, "learning_rate": 0.01, "loss": 2.0219, "step": 40926 }, { "epoch": 4.198707427164546, "grad_norm": 0.045393504202365875, "learning_rate": 0.01, "loss": 2.0355, "step": 40929 }, { "epoch": 4.199015182601559, "grad_norm": 0.1481604129076004, "learning_rate": 0.01, "loss": 1.9887, "step": 40932 }, { "epoch": 4.199322938038572, "grad_norm": 0.08962929248809814, "learning_rate": 0.01, "loss": 1.9766, "step": 40935 }, { "epoch": 4.1996306934755845, "grad_norm": 0.10029556602239609, "learning_rate": 0.01, "loss": 2.0261, "step": 40938 }, { "epoch": 4.199938448912597, "grad_norm": 0.060972318053245544, "learning_rate": 0.01, "loss": 1.9932, "step": 40941 }, { "epoch": 4.20024620434961, "grad_norm": 0.08175275474786758, "learning_rate": 0.01, "loss": 2.0086, "step": 40944 }, { "epoch": 4.200553959786623, "grad_norm": 0.07742544263601303, "learning_rate": 0.01, "loss": 1.9752, "step": 40947 }, { "epoch": 4.200861715223636, "grad_norm": 0.06145945191383362, "learning_rate": 0.01, "loss": 2.0089, "step": 40950 }, { "epoch": 4.201169470660648, "grad_norm": 0.03785976395010948, "learning_rate": 0.01, "loss": 1.9858, "step": 40953 }, { "epoch": 4.201477226097661, "grad_norm": 0.049219708889722824, "learning_rate": 0.01, "loss": 2.0119, "step": 40956 }, { "epoch": 4.201784981534674, "grad_norm": 0.05865636467933655, "learning_rate": 0.01, "loss": 1.9859, "step": 40959 }, { "epoch": 4.202092736971687, "grad_norm": 0.04115782678127289, "learning_rate": 0.01, "loss": 2.0084, "step": 40962 }, { "epoch": 4.2024004924086995, "grad_norm": 0.07156416773796082, "learning_rate": 0.01, "loss": 2.0183, "step": 40965 }, { "epoch": 4.202708247845712, "grad_norm": 0.11241970211267471, "learning_rate": 0.01, "loss": 2.0136, "step": 40968 }, { "epoch": 4.203016003282725, "grad_norm": 0.052014194428920746, "learning_rate": 0.01, "loss": 2.0316, "step": 40971 }, { "epoch": 4.203323758719737, "grad_norm": 0.03571850433945656, "learning_rate": 0.01, "loss": 2.006, "step": 40974 }, { "epoch": 4.20363151415675, "grad_norm": 0.03782523795962334, "learning_rate": 0.01, "loss": 2.0127, "step": 40977 }, { "epoch": 4.2039392695937625, "grad_norm": 0.05531090870499611, "learning_rate": 0.01, "loss": 1.991, "step": 40980 }, { "epoch": 4.204247025030775, "grad_norm": 0.04620610550045967, "learning_rate": 0.01, "loss": 1.9929, "step": 40983 }, { "epoch": 4.204554780467788, "grad_norm": 0.034513216465711594, "learning_rate": 0.01, "loss": 2.004, "step": 40986 }, { "epoch": 4.204862535904801, "grad_norm": 0.12451375275850296, "learning_rate": 0.01, "loss": 1.9979, "step": 40989 }, { "epoch": 4.205170291341814, "grad_norm": 0.038147494196891785, "learning_rate": 0.01, "loss": 2.0012, "step": 40992 }, { "epoch": 4.205478046778826, "grad_norm": 0.0428970605134964, "learning_rate": 0.01, "loss": 1.9953, "step": 40995 }, { "epoch": 4.205785802215839, "grad_norm": 0.04861719533801079, "learning_rate": 0.01, "loss": 1.9732, "step": 40998 }, { "epoch": 4.206093557652852, "grad_norm": 0.05677000805735588, "learning_rate": 0.01, "loss": 1.9984, "step": 41001 }, { "epoch": 4.206401313089865, "grad_norm": 0.040025245398283005, "learning_rate": 0.01, "loss": 2.0046, "step": 41004 }, { "epoch": 4.2067090685268775, "grad_norm": 0.09284701943397522, "learning_rate": 0.01, "loss": 1.9852, "step": 41007 }, { "epoch": 4.20701682396389, "grad_norm": 0.06293929368257523, "learning_rate": 0.01, "loss": 1.9807, "step": 41010 }, { "epoch": 4.207324579400903, "grad_norm": 0.10422204434871674, "learning_rate": 0.01, "loss": 2.0103, "step": 41013 }, { "epoch": 4.207632334837916, "grad_norm": 0.05504370108246803, "learning_rate": 0.01, "loss": 2.0027, "step": 41016 }, { "epoch": 4.207940090274928, "grad_norm": 0.03916056081652641, "learning_rate": 0.01, "loss": 1.9898, "step": 41019 }, { "epoch": 4.2082478457119405, "grad_norm": 0.05535780265927315, "learning_rate": 0.01, "loss": 1.9761, "step": 41022 }, { "epoch": 4.208555601148953, "grad_norm": 0.11614353209733963, "learning_rate": 0.01, "loss": 2.0394, "step": 41025 }, { "epoch": 4.208863356585966, "grad_norm": 0.054821841418743134, "learning_rate": 0.01, "loss": 2.0171, "step": 41028 }, { "epoch": 4.209171112022979, "grad_norm": 0.0761130154132843, "learning_rate": 0.01, "loss": 2.0194, "step": 41031 }, { "epoch": 4.209478867459992, "grad_norm": 0.05529718101024628, "learning_rate": 0.01, "loss": 2.0132, "step": 41034 }, { "epoch": 4.2097866228970044, "grad_norm": 0.05739063769578934, "learning_rate": 0.01, "loss": 2.0408, "step": 41037 }, { "epoch": 4.210094378334017, "grad_norm": 0.03629623353481293, "learning_rate": 0.01, "loss": 2.0132, "step": 41040 }, { "epoch": 4.21040213377103, "grad_norm": 0.09579025954008102, "learning_rate": 0.01, "loss": 2.0051, "step": 41043 }, { "epoch": 4.210709889208043, "grad_norm": 0.09709680080413818, "learning_rate": 0.01, "loss": 2.0082, "step": 41046 }, { "epoch": 4.211017644645056, "grad_norm": 0.06710288673639297, "learning_rate": 0.01, "loss": 2.0092, "step": 41049 }, { "epoch": 4.211325400082068, "grad_norm": 0.17285798490047455, "learning_rate": 0.01, "loss": 2.0525, "step": 41052 }, { "epoch": 4.211633155519081, "grad_norm": 0.13886108994483948, "learning_rate": 0.01, "loss": 2.0024, "step": 41055 }, { "epoch": 4.211940910956094, "grad_norm": 0.08146315068006516, "learning_rate": 0.01, "loss": 2.0194, "step": 41058 }, { "epoch": 4.212248666393107, "grad_norm": 0.041313815861940384, "learning_rate": 0.01, "loss": 1.9926, "step": 41061 }, { "epoch": 4.212556421830119, "grad_norm": 0.07856039702892303, "learning_rate": 0.01, "loss": 1.9945, "step": 41064 }, { "epoch": 4.212864177267131, "grad_norm": 0.03942018374800682, "learning_rate": 0.01, "loss": 1.9853, "step": 41067 }, { "epoch": 4.213171932704144, "grad_norm": 0.06405628472566605, "learning_rate": 0.01, "loss": 2.0234, "step": 41070 }, { "epoch": 4.213479688141157, "grad_norm": 0.11233191937208176, "learning_rate": 0.01, "loss": 2.0014, "step": 41073 }, { "epoch": 4.21378744357817, "grad_norm": 0.061680734157562256, "learning_rate": 0.01, "loss": 1.9946, "step": 41076 }, { "epoch": 4.2140951990151825, "grad_norm": 0.049544453620910645, "learning_rate": 0.01, "loss": 2.0133, "step": 41079 }, { "epoch": 4.214402954452195, "grad_norm": 0.03959604352712631, "learning_rate": 0.01, "loss": 1.9868, "step": 41082 }, { "epoch": 4.214710709889208, "grad_norm": 0.03029470518231392, "learning_rate": 0.01, "loss": 1.9844, "step": 41085 }, { "epoch": 4.215018465326221, "grad_norm": 0.04742530733346939, "learning_rate": 0.01, "loss": 2.0083, "step": 41088 }, { "epoch": 4.215326220763234, "grad_norm": 0.11508273333311081, "learning_rate": 0.01, "loss": 2.0078, "step": 41091 }, { "epoch": 4.215633976200246, "grad_norm": 0.16011863946914673, "learning_rate": 0.01, "loss": 2.0077, "step": 41094 }, { "epoch": 4.215941731637259, "grad_norm": 0.0831206887960434, "learning_rate": 0.01, "loss": 2.0086, "step": 41097 }, { "epoch": 4.216249487074272, "grad_norm": 0.05117999389767647, "learning_rate": 0.01, "loss": 2.0104, "step": 41100 }, { "epoch": 4.216557242511285, "grad_norm": 0.08137553930282593, "learning_rate": 0.01, "loss": 2.0067, "step": 41103 }, { "epoch": 4.2168649979482975, "grad_norm": 0.04084109142422676, "learning_rate": 0.01, "loss": 1.9989, "step": 41106 }, { "epoch": 4.217172753385309, "grad_norm": 0.11708495765924454, "learning_rate": 0.01, "loss": 2.0223, "step": 41109 }, { "epoch": 4.217480508822322, "grad_norm": 0.036096177995204926, "learning_rate": 0.01, "loss": 2.0058, "step": 41112 }, { "epoch": 4.217788264259335, "grad_norm": 0.03205428272485733, "learning_rate": 0.01, "loss": 2.0139, "step": 41115 }, { "epoch": 4.218096019696348, "grad_norm": 0.05120716243982315, "learning_rate": 0.01, "loss": 1.9981, "step": 41118 }, { "epoch": 4.2184037751333605, "grad_norm": 0.08021282404661179, "learning_rate": 0.01, "loss": 1.9965, "step": 41121 }, { "epoch": 4.218711530570373, "grad_norm": 0.05003154277801514, "learning_rate": 0.01, "loss": 2.0134, "step": 41124 }, { "epoch": 4.219019286007386, "grad_norm": 0.0867641344666481, "learning_rate": 0.01, "loss": 2.01, "step": 41127 }, { "epoch": 4.219327041444399, "grad_norm": 0.0697675347328186, "learning_rate": 0.01, "loss": 1.9933, "step": 41130 }, { "epoch": 4.219634796881412, "grad_norm": 0.06696760654449463, "learning_rate": 0.01, "loss": 1.9863, "step": 41133 }, { "epoch": 4.219942552318424, "grad_norm": 0.09599725902080536, "learning_rate": 0.01, "loss": 1.9817, "step": 41136 }, { "epoch": 4.220250307755437, "grad_norm": 0.04233787953853607, "learning_rate": 0.01, "loss": 1.9715, "step": 41139 }, { "epoch": 4.22055806319245, "grad_norm": 0.08935974538326263, "learning_rate": 0.01, "loss": 2.0081, "step": 41142 }, { "epoch": 4.220865818629463, "grad_norm": 0.049136627465486526, "learning_rate": 0.01, "loss": 1.993, "step": 41145 }, { "epoch": 4.2211735740664755, "grad_norm": 0.0316915288567543, "learning_rate": 0.01, "loss": 2.006, "step": 41148 }, { "epoch": 4.221481329503488, "grad_norm": 0.04271787405014038, "learning_rate": 0.01, "loss": 2.002, "step": 41151 }, { "epoch": 4.2217890849405, "grad_norm": 0.03403954207897186, "learning_rate": 0.01, "loss": 2.0159, "step": 41154 }, { "epoch": 4.222096840377513, "grad_norm": 0.07457596808671951, "learning_rate": 0.01, "loss": 1.9975, "step": 41157 }, { "epoch": 4.222404595814526, "grad_norm": 0.06875099986791611, "learning_rate": 0.01, "loss": 1.9668, "step": 41160 }, { "epoch": 4.2227123512515385, "grad_norm": 0.05186415836215019, "learning_rate": 0.01, "loss": 1.9955, "step": 41163 }, { "epoch": 4.223020106688551, "grad_norm": 0.051673002541065216, "learning_rate": 0.01, "loss": 1.9831, "step": 41166 }, { "epoch": 4.223327862125564, "grad_norm": 0.03935972973704338, "learning_rate": 0.01, "loss": 1.9826, "step": 41169 }, { "epoch": 4.223635617562577, "grad_norm": 0.07746358215808868, "learning_rate": 0.01, "loss": 1.9943, "step": 41172 }, { "epoch": 4.22394337299959, "grad_norm": 0.14719370007514954, "learning_rate": 0.01, "loss": 2.0213, "step": 41175 }, { "epoch": 4.224251128436602, "grad_norm": 0.08811825513839722, "learning_rate": 0.01, "loss": 2.0064, "step": 41178 }, { "epoch": 4.224558883873615, "grad_norm": 0.04378504678606987, "learning_rate": 0.01, "loss": 2.0103, "step": 41181 }, { "epoch": 4.224866639310628, "grad_norm": 0.03978777304291725, "learning_rate": 0.01, "loss": 2.006, "step": 41184 }, { "epoch": 4.225174394747641, "grad_norm": 0.03860542178153992, "learning_rate": 0.01, "loss": 2.009, "step": 41187 }, { "epoch": 4.225482150184654, "grad_norm": 0.06839700043201447, "learning_rate": 0.01, "loss": 2.0195, "step": 41190 }, { "epoch": 4.225789905621666, "grad_norm": 0.09107422828674316, "learning_rate": 0.01, "loss": 2.0095, "step": 41193 }, { "epoch": 4.226097661058679, "grad_norm": 0.08443713933229446, "learning_rate": 0.01, "loss": 1.9715, "step": 41196 }, { "epoch": 4.226405416495691, "grad_norm": 0.07428136467933655, "learning_rate": 0.01, "loss": 1.9926, "step": 41199 }, { "epoch": 4.226713171932704, "grad_norm": 0.07102616876363754, "learning_rate": 0.01, "loss": 2.0161, "step": 41202 }, { "epoch": 4.227020927369717, "grad_norm": 0.06131945922970772, "learning_rate": 0.01, "loss": 1.9965, "step": 41205 }, { "epoch": 4.227328682806729, "grad_norm": 0.07921816408634186, "learning_rate": 0.01, "loss": 2.021, "step": 41208 }, { "epoch": 4.227636438243742, "grad_norm": 0.05274220556020737, "learning_rate": 0.01, "loss": 1.9706, "step": 41211 }, { "epoch": 4.227944193680755, "grad_norm": 0.12561160326004028, "learning_rate": 0.01, "loss": 2.0009, "step": 41214 }, { "epoch": 4.228251949117768, "grad_norm": 0.053055427968502045, "learning_rate": 0.01, "loss": 2.0263, "step": 41217 }, { "epoch": 4.2285597045547805, "grad_norm": 0.04999428242444992, "learning_rate": 0.01, "loss": 2.0028, "step": 41220 }, { "epoch": 4.228867459991793, "grad_norm": 0.06127345934510231, "learning_rate": 0.01, "loss": 2.0279, "step": 41223 }, { "epoch": 4.229175215428806, "grad_norm": 0.05634259432554245, "learning_rate": 0.01, "loss": 2.0321, "step": 41226 }, { "epoch": 4.229482970865819, "grad_norm": 0.030200546607375145, "learning_rate": 0.01, "loss": 1.9793, "step": 41229 }, { "epoch": 4.229790726302832, "grad_norm": 0.12201087921857834, "learning_rate": 0.01, "loss": 1.9938, "step": 41232 }, { "epoch": 4.230098481739844, "grad_norm": 0.07482577860355377, "learning_rate": 0.01, "loss": 2.0076, "step": 41235 }, { "epoch": 4.230406237176857, "grad_norm": 0.10311067849397659, "learning_rate": 0.01, "loss": 2.0183, "step": 41238 }, { "epoch": 4.23071399261387, "grad_norm": 0.09040312469005585, "learning_rate": 0.01, "loss": 1.9846, "step": 41241 }, { "epoch": 4.231021748050882, "grad_norm": 0.06239837408065796, "learning_rate": 0.01, "loss": 2.0053, "step": 41244 }, { "epoch": 4.231329503487895, "grad_norm": 0.033743564039468765, "learning_rate": 0.01, "loss": 2.0273, "step": 41247 }, { "epoch": 4.231637258924907, "grad_norm": 0.046916600316762924, "learning_rate": 0.01, "loss": 1.9828, "step": 41250 }, { "epoch": 4.23194501436192, "grad_norm": 0.06271935999393463, "learning_rate": 0.01, "loss": 2.0086, "step": 41253 }, { "epoch": 4.232252769798933, "grad_norm": 0.05794825777411461, "learning_rate": 0.01, "loss": 1.9867, "step": 41256 }, { "epoch": 4.232560525235946, "grad_norm": 0.0886342003941536, "learning_rate": 0.01, "loss": 2.011, "step": 41259 }, { "epoch": 4.2328682806729585, "grad_norm": 0.06492005288600922, "learning_rate": 0.01, "loss": 1.994, "step": 41262 }, { "epoch": 4.233176036109971, "grad_norm": 0.1275957226753235, "learning_rate": 0.01, "loss": 2.0202, "step": 41265 }, { "epoch": 4.233483791546984, "grad_norm": 0.08362898975610733, "learning_rate": 0.01, "loss": 2.0203, "step": 41268 }, { "epoch": 4.233791546983997, "grad_norm": 0.0909474641084671, "learning_rate": 0.01, "loss": 2.0047, "step": 41271 }, { "epoch": 4.23409930242101, "grad_norm": 0.04275381565093994, "learning_rate": 0.01, "loss": 2.0163, "step": 41274 }, { "epoch": 4.234407057858022, "grad_norm": 0.04790336266160011, "learning_rate": 0.01, "loss": 2.0001, "step": 41277 }, { "epoch": 4.234714813295035, "grad_norm": 0.03371073678135872, "learning_rate": 0.01, "loss": 1.9926, "step": 41280 }, { "epoch": 4.235022568732048, "grad_norm": 0.05681711062788963, "learning_rate": 0.01, "loss": 2.0195, "step": 41283 }, { "epoch": 4.235330324169061, "grad_norm": 0.040656253695487976, "learning_rate": 0.01, "loss": 2.0215, "step": 41286 }, { "epoch": 4.235638079606073, "grad_norm": 0.04097260907292366, "learning_rate": 0.01, "loss": 2.0042, "step": 41289 }, { "epoch": 4.235945835043085, "grad_norm": 0.042979929596185684, "learning_rate": 0.01, "loss": 1.9833, "step": 41292 }, { "epoch": 4.236253590480098, "grad_norm": 0.03832445293664932, "learning_rate": 0.01, "loss": 1.9915, "step": 41295 }, { "epoch": 4.236561345917111, "grad_norm": 0.04340042173862457, "learning_rate": 0.01, "loss": 2.0301, "step": 41298 }, { "epoch": 4.236869101354124, "grad_norm": 0.10501760244369507, "learning_rate": 0.01, "loss": 2.0058, "step": 41301 }, { "epoch": 4.2371768567911365, "grad_norm": 0.09777186065912247, "learning_rate": 0.01, "loss": 2.0283, "step": 41304 }, { "epoch": 4.237484612228149, "grad_norm": 0.06624646484851837, "learning_rate": 0.01, "loss": 2.0187, "step": 41307 }, { "epoch": 4.237792367665162, "grad_norm": 0.055278148502111435, "learning_rate": 0.01, "loss": 1.9958, "step": 41310 }, { "epoch": 4.238100123102175, "grad_norm": 0.11712317168712616, "learning_rate": 0.01, "loss": 1.9903, "step": 41313 }, { "epoch": 4.238407878539188, "grad_norm": 0.06384153664112091, "learning_rate": 0.01, "loss": 2.015, "step": 41316 }, { "epoch": 4.2387156339762, "grad_norm": 0.0486673079431057, "learning_rate": 0.01, "loss": 1.9864, "step": 41319 }, { "epoch": 4.239023389413213, "grad_norm": 0.04492638260126114, "learning_rate": 0.01, "loss": 2.0086, "step": 41322 }, { "epoch": 4.239331144850226, "grad_norm": 0.034185852855443954, "learning_rate": 0.01, "loss": 2.0209, "step": 41325 }, { "epoch": 4.239638900287239, "grad_norm": 0.04562927410006523, "learning_rate": 0.01, "loss": 2.0047, "step": 41328 }, { "epoch": 4.239946655724252, "grad_norm": 0.1411084234714508, "learning_rate": 0.01, "loss": 1.9964, "step": 41331 }, { "epoch": 4.2402544111612634, "grad_norm": 0.17035670578479767, "learning_rate": 0.01, "loss": 1.9915, "step": 41334 }, { "epoch": 4.240562166598276, "grad_norm": 0.06952272355556488, "learning_rate": 0.01, "loss": 2.0187, "step": 41337 }, { "epoch": 4.240869922035289, "grad_norm": 0.08368392288684845, "learning_rate": 0.01, "loss": 1.9897, "step": 41340 }, { "epoch": 4.241177677472302, "grad_norm": 0.04599005728960037, "learning_rate": 0.01, "loss": 2.0073, "step": 41343 }, { "epoch": 4.241485432909315, "grad_norm": 0.045579466968774796, "learning_rate": 0.01, "loss": 1.9885, "step": 41346 }, { "epoch": 4.241793188346327, "grad_norm": 0.049636196345090866, "learning_rate": 0.01, "loss": 1.9875, "step": 41349 }, { "epoch": 4.24210094378334, "grad_norm": 0.047992490231990814, "learning_rate": 0.01, "loss": 1.9881, "step": 41352 }, { "epoch": 4.242408699220353, "grad_norm": 0.05155181884765625, "learning_rate": 0.01, "loss": 1.9802, "step": 41355 }, { "epoch": 4.242716454657366, "grad_norm": 0.04805142432451248, "learning_rate": 0.01, "loss": 2.0093, "step": 41358 }, { "epoch": 4.2430242100943785, "grad_norm": 0.09783121943473816, "learning_rate": 0.01, "loss": 1.9703, "step": 41361 }, { "epoch": 4.243331965531391, "grad_norm": 0.05536942929029465, "learning_rate": 0.01, "loss": 1.9836, "step": 41364 }, { "epoch": 4.243639720968404, "grad_norm": 0.1335456669330597, "learning_rate": 0.01, "loss": 1.9989, "step": 41367 }, { "epoch": 4.243947476405417, "grad_norm": 0.05100298300385475, "learning_rate": 0.01, "loss": 2.0223, "step": 41370 }, { "epoch": 4.24425523184243, "grad_norm": 0.04158430173993111, "learning_rate": 0.01, "loss": 2.0127, "step": 41373 }, { "epoch": 4.244562987279442, "grad_norm": 0.03490443900227547, "learning_rate": 0.01, "loss": 2.0035, "step": 41376 }, { "epoch": 4.244870742716454, "grad_norm": 0.03529435768723488, "learning_rate": 0.01, "loss": 2.0055, "step": 41379 }, { "epoch": 4.245178498153467, "grad_norm": 0.03825982287526131, "learning_rate": 0.01, "loss": 2.0138, "step": 41382 }, { "epoch": 4.24548625359048, "grad_norm": 0.11641440540552139, "learning_rate": 0.01, "loss": 2.0487, "step": 41385 }, { "epoch": 4.245794009027493, "grad_norm": 0.043827321380376816, "learning_rate": 0.01, "loss": 2.009, "step": 41388 }, { "epoch": 4.246101764464505, "grad_norm": 0.060956139117479324, "learning_rate": 0.01, "loss": 1.9972, "step": 41391 }, { "epoch": 4.246409519901518, "grad_norm": 0.11849243938922882, "learning_rate": 0.01, "loss": 1.9968, "step": 41394 }, { "epoch": 4.246717275338531, "grad_norm": 0.08758600056171417, "learning_rate": 0.01, "loss": 1.9596, "step": 41397 }, { "epoch": 4.247025030775544, "grad_norm": 0.0848364606499672, "learning_rate": 0.01, "loss": 2.0016, "step": 41400 }, { "epoch": 4.2473327862125565, "grad_norm": 0.08009137958288193, "learning_rate": 0.01, "loss": 1.9934, "step": 41403 }, { "epoch": 4.247640541649569, "grad_norm": 0.06123640015721321, "learning_rate": 0.01, "loss": 1.9974, "step": 41406 }, { "epoch": 4.247948297086582, "grad_norm": 0.042272213846445084, "learning_rate": 0.01, "loss": 2.0196, "step": 41409 }, { "epoch": 4.248256052523595, "grad_norm": 0.051934532821178436, "learning_rate": 0.01, "loss": 2.0142, "step": 41412 }, { "epoch": 4.248563807960608, "grad_norm": 0.04881110414862633, "learning_rate": 0.01, "loss": 1.9795, "step": 41415 }, { "epoch": 4.24887156339762, "grad_norm": 0.03183252364397049, "learning_rate": 0.01, "loss": 1.9986, "step": 41418 }, { "epoch": 4.249179318834633, "grad_norm": 0.1036367192864418, "learning_rate": 0.01, "loss": 1.976, "step": 41421 }, { "epoch": 4.249487074271645, "grad_norm": 0.08779038488864899, "learning_rate": 0.01, "loss": 2.0271, "step": 41424 }, { "epoch": 4.249794829708658, "grad_norm": 0.07684400677680969, "learning_rate": 0.01, "loss": 1.9796, "step": 41427 }, { "epoch": 4.250102585145671, "grad_norm": 0.08273176103830338, "learning_rate": 0.01, "loss": 1.975, "step": 41430 }, { "epoch": 4.250410340582683, "grad_norm": 0.04036150500178337, "learning_rate": 0.01, "loss": 1.9984, "step": 41433 }, { "epoch": 4.250718096019696, "grad_norm": 0.04532729461789131, "learning_rate": 0.01, "loss": 1.9905, "step": 41436 }, { "epoch": 4.251025851456709, "grad_norm": 0.045257970690727234, "learning_rate": 0.01, "loss": 2.0002, "step": 41439 }, { "epoch": 4.251333606893722, "grad_norm": 0.11603814363479614, "learning_rate": 0.01, "loss": 1.9832, "step": 41442 }, { "epoch": 4.2516413623307345, "grad_norm": 0.07269345223903656, "learning_rate": 0.01, "loss": 1.9901, "step": 41445 }, { "epoch": 4.251949117767747, "grad_norm": 0.06193891167640686, "learning_rate": 0.01, "loss": 2.0064, "step": 41448 }, { "epoch": 4.25225687320476, "grad_norm": 0.09230732172727585, "learning_rate": 0.01, "loss": 2.0053, "step": 41451 }, { "epoch": 4.252564628641773, "grad_norm": 0.04278898611664772, "learning_rate": 0.01, "loss": 1.9902, "step": 41454 }, { "epoch": 4.252872384078786, "grad_norm": 0.04253445938229561, "learning_rate": 0.01, "loss": 2.0249, "step": 41457 }, { "epoch": 4.253180139515798, "grad_norm": 0.07245481014251709, "learning_rate": 0.01, "loss": 2.0126, "step": 41460 }, { "epoch": 4.253487894952811, "grad_norm": 0.10126817971467972, "learning_rate": 0.01, "loss": 1.9852, "step": 41463 }, { "epoch": 4.253795650389824, "grad_norm": 0.04528803005814552, "learning_rate": 0.01, "loss": 2.0018, "step": 41466 }, { "epoch": 4.254103405826836, "grad_norm": 0.04987543821334839, "learning_rate": 0.01, "loss": 1.9757, "step": 41469 }, { "epoch": 4.254411161263849, "grad_norm": 0.05521645396947861, "learning_rate": 0.01, "loss": 2.0067, "step": 41472 }, { "epoch": 4.254718916700861, "grad_norm": 0.06709643453359604, "learning_rate": 0.01, "loss": 1.9811, "step": 41475 }, { "epoch": 4.255026672137874, "grad_norm": 0.07900220155715942, "learning_rate": 0.01, "loss": 2.0332, "step": 41478 }, { "epoch": 4.255334427574887, "grad_norm": 0.06128143519163132, "learning_rate": 0.01, "loss": 2.0061, "step": 41481 }, { "epoch": 4.2556421830119, "grad_norm": 0.044023312628269196, "learning_rate": 0.01, "loss": 2.0242, "step": 41484 }, { "epoch": 4.255949938448913, "grad_norm": 0.034501735121011734, "learning_rate": 0.01, "loss": 1.9939, "step": 41487 }, { "epoch": 4.256257693885925, "grad_norm": 0.08301688730716705, "learning_rate": 0.01, "loss": 1.9939, "step": 41490 }, { "epoch": 4.256565449322938, "grad_norm": 0.05008229240775108, "learning_rate": 0.01, "loss": 1.9912, "step": 41493 }, { "epoch": 4.256873204759951, "grad_norm": 0.08899793028831482, "learning_rate": 0.01, "loss": 2.0225, "step": 41496 }, { "epoch": 4.257180960196964, "grad_norm": 0.055265627801418304, "learning_rate": 0.01, "loss": 2.0065, "step": 41499 }, { "epoch": 4.2574887156339765, "grad_norm": 0.04912308603525162, "learning_rate": 0.01, "loss": 1.9914, "step": 41502 }, { "epoch": 4.257796471070989, "grad_norm": 0.057400181889534, "learning_rate": 0.01, "loss": 1.9995, "step": 41505 }, { "epoch": 4.258104226508002, "grad_norm": 0.13875073194503784, "learning_rate": 0.01, "loss": 2.0468, "step": 41508 }, { "epoch": 4.258411981945015, "grad_norm": 0.10987017303705215, "learning_rate": 0.01, "loss": 1.9992, "step": 41511 }, { "epoch": 4.258719737382027, "grad_norm": 0.06881477683782578, "learning_rate": 0.01, "loss": 2.0203, "step": 41514 }, { "epoch": 4.2590274928190395, "grad_norm": 0.06515093892812729, "learning_rate": 0.01, "loss": 2.0139, "step": 41517 }, { "epoch": 4.259335248256052, "grad_norm": 0.05030984431505203, "learning_rate": 0.01, "loss": 1.9992, "step": 41520 }, { "epoch": 4.259643003693065, "grad_norm": 0.061900872737169266, "learning_rate": 0.01, "loss": 2.0136, "step": 41523 }, { "epoch": 4.259950759130078, "grad_norm": 0.052526991814374924, "learning_rate": 0.01, "loss": 2.0008, "step": 41526 }, { "epoch": 4.260258514567091, "grad_norm": 0.07355368137359619, "learning_rate": 0.01, "loss": 2.0165, "step": 41529 }, { "epoch": 4.260566270004103, "grad_norm": 0.09191373735666275, "learning_rate": 0.01, "loss": 2.0091, "step": 41532 }, { "epoch": 4.260874025441116, "grad_norm": 0.04627171531319618, "learning_rate": 0.01, "loss": 1.9867, "step": 41535 }, { "epoch": 4.261181780878129, "grad_norm": 0.04639091342687607, "learning_rate": 0.01, "loss": 2.002, "step": 41538 }, { "epoch": 4.261489536315142, "grad_norm": 0.0914679765701294, "learning_rate": 0.01, "loss": 1.9957, "step": 41541 }, { "epoch": 4.2617972917521545, "grad_norm": 0.08489304035902023, "learning_rate": 0.01, "loss": 1.9837, "step": 41544 }, { "epoch": 4.262105047189167, "grad_norm": 0.03301496058702469, "learning_rate": 0.01, "loss": 1.9908, "step": 41547 }, { "epoch": 4.26241280262618, "grad_norm": 0.08343908190727234, "learning_rate": 0.01, "loss": 2.0061, "step": 41550 }, { "epoch": 4.262720558063193, "grad_norm": 0.05923202261328697, "learning_rate": 0.01, "loss": 1.9929, "step": 41553 }, { "epoch": 4.263028313500206, "grad_norm": 0.044505976140499115, "learning_rate": 0.01, "loss": 2.0007, "step": 41556 }, { "epoch": 4.2633360689372175, "grad_norm": 0.05136754736304283, "learning_rate": 0.01, "loss": 1.9989, "step": 41559 }, { "epoch": 4.26364382437423, "grad_norm": 0.0830690786242485, "learning_rate": 0.01, "loss": 2.011, "step": 41562 }, { "epoch": 4.263951579811243, "grad_norm": 0.11335346102714539, "learning_rate": 0.01, "loss": 1.9812, "step": 41565 }, { "epoch": 4.264259335248256, "grad_norm": 0.0629742443561554, "learning_rate": 0.01, "loss": 2.0121, "step": 41568 }, { "epoch": 4.264567090685269, "grad_norm": 0.09297124296426773, "learning_rate": 0.01, "loss": 2.0126, "step": 41571 }, { "epoch": 4.264874846122281, "grad_norm": 0.05171617120504379, "learning_rate": 0.01, "loss": 1.9528, "step": 41574 }, { "epoch": 4.265182601559294, "grad_norm": 0.09433134645223618, "learning_rate": 0.01, "loss": 2.005, "step": 41577 }, { "epoch": 4.265490356996307, "grad_norm": 0.058073755353689194, "learning_rate": 0.01, "loss": 2.0063, "step": 41580 }, { "epoch": 4.26579811243332, "grad_norm": 0.037740156054496765, "learning_rate": 0.01, "loss": 2.0153, "step": 41583 }, { "epoch": 4.2661058678703325, "grad_norm": 0.12522292137145996, "learning_rate": 0.01, "loss": 1.9934, "step": 41586 }, { "epoch": 4.266413623307345, "grad_norm": 0.07263893634080887, "learning_rate": 0.01, "loss": 2.0016, "step": 41589 }, { "epoch": 4.266721378744358, "grad_norm": 0.08800274133682251, "learning_rate": 0.01, "loss": 1.9993, "step": 41592 }, { "epoch": 4.267029134181371, "grad_norm": 0.04018472880125046, "learning_rate": 0.01, "loss": 1.9909, "step": 41595 }, { "epoch": 4.267336889618384, "grad_norm": 0.04245147481560707, "learning_rate": 0.01, "loss": 1.9882, "step": 41598 }, { "epoch": 4.267644645055396, "grad_norm": 0.03626253455877304, "learning_rate": 0.01, "loss": 2.0083, "step": 41601 }, { "epoch": 4.267952400492408, "grad_norm": 0.1148434430360794, "learning_rate": 0.01, "loss": 1.9856, "step": 41604 }, { "epoch": 4.268260155929421, "grad_norm": 0.03992197662591934, "learning_rate": 0.01, "loss": 1.9999, "step": 41607 }, { "epoch": 4.268567911366434, "grad_norm": 0.07781147211790085, "learning_rate": 0.01, "loss": 2.0197, "step": 41610 }, { "epoch": 4.268875666803447, "grad_norm": 0.08838597685098648, "learning_rate": 0.01, "loss": 1.9812, "step": 41613 }, { "epoch": 4.269183422240459, "grad_norm": 0.07930974662303925, "learning_rate": 0.01, "loss": 1.9941, "step": 41616 }, { "epoch": 4.269491177677472, "grad_norm": 0.08094513416290283, "learning_rate": 0.01, "loss": 1.9861, "step": 41619 }, { "epoch": 4.269798933114485, "grad_norm": 0.06766554713249207, "learning_rate": 0.01, "loss": 1.9917, "step": 41622 }, { "epoch": 4.270106688551498, "grad_norm": 0.07384651899337769, "learning_rate": 0.01, "loss": 1.9754, "step": 41625 }, { "epoch": 4.2704144439885106, "grad_norm": 0.043780550360679626, "learning_rate": 0.01, "loss": 2.0226, "step": 41628 }, { "epoch": 4.270722199425523, "grad_norm": 0.06705281883478165, "learning_rate": 0.01, "loss": 2.008, "step": 41631 }, { "epoch": 4.271029954862536, "grad_norm": 0.051532238721847534, "learning_rate": 0.01, "loss": 1.9987, "step": 41634 }, { "epoch": 4.271337710299549, "grad_norm": 0.09668219834566116, "learning_rate": 0.01, "loss": 2.0226, "step": 41637 }, { "epoch": 4.271645465736562, "grad_norm": 0.0774909183382988, "learning_rate": 0.01, "loss": 2.0307, "step": 41640 }, { "epoch": 4.2719532211735745, "grad_norm": 0.11848165839910507, "learning_rate": 0.01, "loss": 2.0057, "step": 41643 }, { "epoch": 4.272260976610587, "grad_norm": 0.09055308997631073, "learning_rate": 0.01, "loss": 1.9871, "step": 41646 }, { "epoch": 4.272568732047599, "grad_norm": 0.06557660549879074, "learning_rate": 0.01, "loss": 1.9897, "step": 41649 }, { "epoch": 4.272876487484612, "grad_norm": 0.055074457079172134, "learning_rate": 0.01, "loss": 1.9692, "step": 41652 }, { "epoch": 4.273184242921625, "grad_norm": 0.07604243606328964, "learning_rate": 0.01, "loss": 1.9998, "step": 41655 }, { "epoch": 4.2734919983586375, "grad_norm": 0.07098755240440369, "learning_rate": 0.01, "loss": 2.0312, "step": 41658 }, { "epoch": 4.27379975379565, "grad_norm": 0.11538244038820267, "learning_rate": 0.01, "loss": 1.9713, "step": 41661 }, { "epoch": 4.274107509232663, "grad_norm": 0.03773383051156998, "learning_rate": 0.01, "loss": 1.999, "step": 41664 }, { "epoch": 4.274415264669676, "grad_norm": 0.03163829445838928, "learning_rate": 0.01, "loss": 1.9699, "step": 41667 }, { "epoch": 4.274723020106689, "grad_norm": 0.0678187757730484, "learning_rate": 0.01, "loss": 1.9944, "step": 41670 }, { "epoch": 4.275030775543701, "grad_norm": 0.0774695947766304, "learning_rate": 0.01, "loss": 1.9878, "step": 41673 }, { "epoch": 4.275338530980714, "grad_norm": 0.05830325931310654, "learning_rate": 0.01, "loss": 2.0059, "step": 41676 }, { "epoch": 4.275646286417727, "grad_norm": 0.11642345786094666, "learning_rate": 0.01, "loss": 1.993, "step": 41679 }, { "epoch": 4.27595404185474, "grad_norm": 0.12688972055912018, "learning_rate": 0.01, "loss": 1.9887, "step": 41682 }, { "epoch": 4.2762617972917525, "grad_norm": 0.11521381884813309, "learning_rate": 0.01, "loss": 2.0166, "step": 41685 }, { "epoch": 4.276569552728765, "grad_norm": 0.1203092560172081, "learning_rate": 0.01, "loss": 2.0148, "step": 41688 }, { "epoch": 4.276877308165778, "grad_norm": 0.09710691124200821, "learning_rate": 0.01, "loss": 2.0192, "step": 41691 }, { "epoch": 4.27718506360279, "grad_norm": 0.07945617288351059, "learning_rate": 0.01, "loss": 2.0106, "step": 41694 }, { "epoch": 4.277492819039803, "grad_norm": 0.07407965511083603, "learning_rate": 0.01, "loss": 2.0017, "step": 41697 }, { "epoch": 4.2778005744768155, "grad_norm": 0.06321244686841965, "learning_rate": 0.01, "loss": 1.9992, "step": 41700 }, { "epoch": 4.278108329913828, "grad_norm": 0.056950200349092484, "learning_rate": 0.01, "loss": 2.0218, "step": 41703 }, { "epoch": 4.278416085350841, "grad_norm": 0.039905134588479996, "learning_rate": 0.01, "loss": 1.9876, "step": 41706 }, { "epoch": 4.278723840787854, "grad_norm": 0.05050415173172951, "learning_rate": 0.01, "loss": 2.0038, "step": 41709 }, { "epoch": 4.279031596224867, "grad_norm": 0.05510304123163223, "learning_rate": 0.01, "loss": 2.0015, "step": 41712 }, { "epoch": 4.279339351661879, "grad_norm": 0.05586516857147217, "learning_rate": 0.01, "loss": 2.0227, "step": 41715 }, { "epoch": 4.279647107098892, "grad_norm": 0.05404721572995186, "learning_rate": 0.01, "loss": 1.985, "step": 41718 }, { "epoch": 4.279954862535905, "grad_norm": 0.04340701550245285, "learning_rate": 0.01, "loss": 2.0015, "step": 41721 }, { "epoch": 4.280262617972918, "grad_norm": 0.053679756820201874, "learning_rate": 0.01, "loss": 1.9983, "step": 41724 }, { "epoch": 4.2805703734099305, "grad_norm": 0.1286962479352951, "learning_rate": 0.01, "loss": 2.0077, "step": 41727 }, { "epoch": 4.280878128846943, "grad_norm": 0.07245181500911713, "learning_rate": 0.01, "loss": 1.9974, "step": 41730 }, { "epoch": 4.281185884283956, "grad_norm": 0.08518802374601364, "learning_rate": 0.01, "loss": 2.0305, "step": 41733 }, { "epoch": 4.281493639720969, "grad_norm": 0.07493653893470764, "learning_rate": 0.01, "loss": 1.9955, "step": 41736 }, { "epoch": 4.281801395157981, "grad_norm": 0.05031900852918625, "learning_rate": 0.01, "loss": 1.981, "step": 41739 }, { "epoch": 4.2821091505949935, "grad_norm": 0.07814056426286697, "learning_rate": 0.01, "loss": 1.9944, "step": 41742 }, { "epoch": 4.282416906032006, "grad_norm": 0.047507043927907944, "learning_rate": 0.01, "loss": 2.0129, "step": 41745 }, { "epoch": 4.282724661469019, "grad_norm": 0.06547687947750092, "learning_rate": 0.01, "loss": 2.0094, "step": 41748 }, { "epoch": 4.283032416906032, "grad_norm": 0.059194277971982956, "learning_rate": 0.01, "loss": 2.003, "step": 41751 }, { "epoch": 4.283340172343045, "grad_norm": 0.04526950791478157, "learning_rate": 0.01, "loss": 1.9877, "step": 41754 }, { "epoch": 4.283647927780057, "grad_norm": 0.031040268018841743, "learning_rate": 0.01, "loss": 1.9665, "step": 41757 }, { "epoch": 4.28395568321707, "grad_norm": 0.09988913685083389, "learning_rate": 0.01, "loss": 2.0104, "step": 41760 }, { "epoch": 4.284263438654083, "grad_norm": 0.04122784361243248, "learning_rate": 0.01, "loss": 1.9875, "step": 41763 }, { "epoch": 4.284571194091096, "grad_norm": 0.055508848279714584, "learning_rate": 0.01, "loss": 2.0457, "step": 41766 }, { "epoch": 4.2848789495281085, "grad_norm": 0.10151500999927521, "learning_rate": 0.01, "loss": 1.9868, "step": 41769 }, { "epoch": 4.285186704965121, "grad_norm": 0.041592396795749664, "learning_rate": 0.01, "loss": 1.9835, "step": 41772 }, { "epoch": 4.285494460402134, "grad_norm": 0.06108968332409859, "learning_rate": 0.01, "loss": 2.0195, "step": 41775 }, { "epoch": 4.285802215839147, "grad_norm": 0.06336821615695953, "learning_rate": 0.01, "loss": 1.9743, "step": 41778 }, { "epoch": 4.28610997127616, "grad_norm": 0.03348274528980255, "learning_rate": 0.01, "loss": 1.9822, "step": 41781 }, { "epoch": 4.286417726713172, "grad_norm": 0.04698178544640541, "learning_rate": 0.01, "loss": 2.0001, "step": 41784 }, { "epoch": 4.286725482150184, "grad_norm": 0.04394696652889252, "learning_rate": 0.01, "loss": 2.0148, "step": 41787 }, { "epoch": 4.287033237587197, "grad_norm": 0.15938106179237366, "learning_rate": 0.01, "loss": 2.012, "step": 41790 }, { "epoch": 4.28734099302421, "grad_norm": 0.08870629966259003, "learning_rate": 0.01, "loss": 1.9863, "step": 41793 }, { "epoch": 4.287648748461223, "grad_norm": 0.051104262471199036, "learning_rate": 0.01, "loss": 1.9954, "step": 41796 }, { "epoch": 4.2879565038982355, "grad_norm": 0.03226442262530327, "learning_rate": 0.01, "loss": 2.0087, "step": 41799 }, { "epoch": 4.288264259335248, "grad_norm": 0.033240124583244324, "learning_rate": 0.01, "loss": 2.0012, "step": 41802 }, { "epoch": 4.288572014772261, "grad_norm": 0.04799087345600128, "learning_rate": 0.01, "loss": 1.9862, "step": 41805 }, { "epoch": 4.288879770209274, "grad_norm": 0.04805191978812218, "learning_rate": 0.01, "loss": 1.998, "step": 41808 }, { "epoch": 4.289187525646287, "grad_norm": 0.10758639872074127, "learning_rate": 0.01, "loss": 2.0237, "step": 41811 }, { "epoch": 4.289495281083299, "grad_norm": 0.06141864135861397, "learning_rate": 0.01, "loss": 1.9953, "step": 41814 }, { "epoch": 4.289803036520312, "grad_norm": 0.057809922844171524, "learning_rate": 0.01, "loss": 1.998, "step": 41817 }, { "epoch": 4.290110791957325, "grad_norm": 0.06831464916467667, "learning_rate": 0.01, "loss": 2.0054, "step": 41820 }, { "epoch": 4.290418547394338, "grad_norm": 0.08271254599094391, "learning_rate": 0.01, "loss": 1.9823, "step": 41823 }, { "epoch": 4.2907263028313505, "grad_norm": 0.04244139790534973, "learning_rate": 0.01, "loss": 1.9897, "step": 41826 }, { "epoch": 4.291034058268362, "grad_norm": 0.04482168331742287, "learning_rate": 0.01, "loss": 2.0228, "step": 41829 }, { "epoch": 4.291341813705375, "grad_norm": 0.04742707312107086, "learning_rate": 0.01, "loss": 2.0172, "step": 41832 }, { "epoch": 4.291649569142388, "grad_norm": 0.12863384187221527, "learning_rate": 0.01, "loss": 1.996, "step": 41835 }, { "epoch": 4.291957324579401, "grad_norm": 0.12212485820055008, "learning_rate": 0.01, "loss": 1.9976, "step": 41838 }, { "epoch": 4.2922650800164135, "grad_norm": 0.0465460866689682, "learning_rate": 0.01, "loss": 2.0078, "step": 41841 }, { "epoch": 4.292572835453426, "grad_norm": 0.052605509757995605, "learning_rate": 0.01, "loss": 2.0019, "step": 41844 }, { "epoch": 4.292880590890439, "grad_norm": 0.046292662620544434, "learning_rate": 0.01, "loss": 2.0077, "step": 41847 }, { "epoch": 4.293188346327452, "grad_norm": 0.04217211529612541, "learning_rate": 0.01, "loss": 2.0, "step": 41850 }, { "epoch": 4.293496101764465, "grad_norm": 0.08655425906181335, "learning_rate": 0.01, "loss": 1.9707, "step": 41853 }, { "epoch": 4.293803857201477, "grad_norm": 0.0645185261964798, "learning_rate": 0.01, "loss": 2.0036, "step": 41856 }, { "epoch": 4.29411161263849, "grad_norm": 0.06806469708681107, "learning_rate": 0.01, "loss": 2.0034, "step": 41859 }, { "epoch": 4.294419368075503, "grad_norm": 0.05178931728005409, "learning_rate": 0.01, "loss": 1.9923, "step": 41862 }, { "epoch": 4.294727123512516, "grad_norm": 0.0440845787525177, "learning_rate": 0.01, "loss": 2.0183, "step": 41865 }, { "epoch": 4.2950348789495285, "grad_norm": 0.04800264537334442, "learning_rate": 0.01, "loss": 1.9908, "step": 41868 }, { "epoch": 4.295342634386541, "grad_norm": 0.03780617192387581, "learning_rate": 0.01, "loss": 2.0051, "step": 41871 }, { "epoch": 4.295650389823553, "grad_norm": 0.0536942295730114, "learning_rate": 0.01, "loss": 1.999, "step": 41874 }, { "epoch": 4.295958145260566, "grad_norm": 0.09988999366760254, "learning_rate": 0.01, "loss": 1.9926, "step": 41877 }, { "epoch": 4.296265900697579, "grad_norm": 0.06863691657781601, "learning_rate": 0.01, "loss": 2.0076, "step": 41880 }, { "epoch": 4.2965736561345915, "grad_norm": 0.04894992709159851, "learning_rate": 0.01, "loss": 1.9836, "step": 41883 }, { "epoch": 4.296881411571604, "grad_norm": 0.041689950972795486, "learning_rate": 0.01, "loss": 2.0101, "step": 41886 }, { "epoch": 4.297189167008617, "grad_norm": 0.10208067297935486, "learning_rate": 0.01, "loss": 1.9957, "step": 41889 }, { "epoch": 4.29749692244563, "grad_norm": 0.030840028077363968, "learning_rate": 0.01, "loss": 1.991, "step": 41892 }, { "epoch": 4.297804677882643, "grad_norm": 0.04242958873510361, "learning_rate": 0.01, "loss": 1.9992, "step": 41895 }, { "epoch": 4.298112433319655, "grad_norm": 0.11578787118196487, "learning_rate": 0.01, "loss": 2.0139, "step": 41898 }, { "epoch": 4.298420188756668, "grad_norm": 0.059238482266664505, "learning_rate": 0.01, "loss": 2.0087, "step": 41901 }, { "epoch": 4.298727944193681, "grad_norm": 0.03906838968396187, "learning_rate": 0.01, "loss": 2.0162, "step": 41904 }, { "epoch": 4.299035699630694, "grad_norm": 0.06461112201213837, "learning_rate": 0.01, "loss": 1.9819, "step": 41907 }, { "epoch": 4.2993434550677065, "grad_norm": 0.1491340696811676, "learning_rate": 0.01, "loss": 2.0193, "step": 41910 }, { "epoch": 4.299651210504719, "grad_norm": 0.06621528416872025, "learning_rate": 0.01, "loss": 1.9906, "step": 41913 }, { "epoch": 4.299958965941732, "grad_norm": 0.06322132796049118, "learning_rate": 0.01, "loss": 1.9737, "step": 41916 }, { "epoch": 4.300266721378744, "grad_norm": 0.04899013042449951, "learning_rate": 0.01, "loss": 1.996, "step": 41919 }, { "epoch": 4.300574476815757, "grad_norm": 0.04941810667514801, "learning_rate": 0.01, "loss": 1.9674, "step": 41922 }, { "epoch": 4.3008822322527696, "grad_norm": 0.04341074824333191, "learning_rate": 0.01, "loss": 1.9842, "step": 41925 }, { "epoch": 4.301189987689782, "grad_norm": 0.08827680349349976, "learning_rate": 0.01, "loss": 2.0119, "step": 41928 }, { "epoch": 4.301497743126795, "grad_norm": 0.05221998319029808, "learning_rate": 0.01, "loss": 2.0007, "step": 41931 }, { "epoch": 4.301805498563808, "grad_norm": 0.10630286484956741, "learning_rate": 0.01, "loss": 2.0174, "step": 41934 }, { "epoch": 4.302113254000821, "grad_norm": 0.043958306312561035, "learning_rate": 0.01, "loss": 1.9803, "step": 41937 }, { "epoch": 4.3024210094378335, "grad_norm": 0.057621411979198456, "learning_rate": 0.01, "loss": 2.01, "step": 41940 }, { "epoch": 4.302728764874846, "grad_norm": 0.09467203170061111, "learning_rate": 0.01, "loss": 2.0104, "step": 41943 }, { "epoch": 4.303036520311859, "grad_norm": 0.06720858812332153, "learning_rate": 0.01, "loss": 1.9772, "step": 41946 }, { "epoch": 4.303344275748872, "grad_norm": 0.06861304491758347, "learning_rate": 0.01, "loss": 1.9955, "step": 41949 }, { "epoch": 4.303652031185885, "grad_norm": 0.038296621292829514, "learning_rate": 0.01, "loss": 1.991, "step": 41952 }, { "epoch": 4.303959786622897, "grad_norm": 0.05808268114924431, "learning_rate": 0.01, "loss": 2.0097, "step": 41955 }, { "epoch": 4.30426754205991, "grad_norm": 0.05521778017282486, "learning_rate": 0.01, "loss": 1.995, "step": 41958 }, { "epoch": 4.304575297496923, "grad_norm": 0.05509946867823601, "learning_rate": 0.01, "loss": 2.0079, "step": 41961 }, { "epoch": 4.304883052933935, "grad_norm": 0.04729253426194191, "learning_rate": 0.01, "loss": 1.9928, "step": 41964 }, { "epoch": 4.305190808370948, "grad_norm": 0.06389732658863068, "learning_rate": 0.01, "loss": 1.9875, "step": 41967 }, { "epoch": 4.30549856380796, "grad_norm": 0.04609325900673866, "learning_rate": 0.01, "loss": 2.0058, "step": 41970 }, { "epoch": 4.305806319244973, "grad_norm": 0.06793837994337082, "learning_rate": 0.01, "loss": 1.9955, "step": 41973 }, { "epoch": 4.306114074681986, "grad_norm": 0.0791841521859169, "learning_rate": 0.01, "loss": 1.9659, "step": 41976 }, { "epoch": 4.306421830118999, "grad_norm": 0.07721048593521118, "learning_rate": 0.01, "loss": 1.9936, "step": 41979 }, { "epoch": 4.3067295855560115, "grad_norm": 0.04971477389335632, "learning_rate": 0.01, "loss": 1.971, "step": 41982 }, { "epoch": 4.307037340993024, "grad_norm": 0.11230115592479706, "learning_rate": 0.01, "loss": 2.013, "step": 41985 }, { "epoch": 4.307345096430037, "grad_norm": 0.08187992870807648, "learning_rate": 0.01, "loss": 2.0102, "step": 41988 }, { "epoch": 4.30765285186705, "grad_norm": 0.07407639175653458, "learning_rate": 0.01, "loss": 1.9617, "step": 41991 }, { "epoch": 4.307960607304063, "grad_norm": 0.051022280007600784, "learning_rate": 0.01, "loss": 1.9964, "step": 41994 }, { "epoch": 4.308268362741075, "grad_norm": 0.041618864983320236, "learning_rate": 0.01, "loss": 1.9961, "step": 41997 }, { "epoch": 4.308576118178088, "grad_norm": 0.0507926307618618, "learning_rate": 0.01, "loss": 1.9752, "step": 42000 }, { "epoch": 4.308883873615101, "grad_norm": 0.07467113435268402, "learning_rate": 0.01, "loss": 2.0045, "step": 42003 }, { "epoch": 4.309191629052114, "grad_norm": 0.06563274562358856, "learning_rate": 0.01, "loss": 2.0079, "step": 42006 }, { "epoch": 4.309499384489126, "grad_norm": 0.05920867621898651, "learning_rate": 0.01, "loss": 1.9677, "step": 42009 }, { "epoch": 4.309807139926138, "grad_norm": 0.12882298231124878, "learning_rate": 0.01, "loss": 2.0038, "step": 42012 }, { "epoch": 4.310114895363151, "grad_norm": 0.07217466831207275, "learning_rate": 0.01, "loss": 2.018, "step": 42015 }, { "epoch": 4.310422650800164, "grad_norm": 0.06030949205160141, "learning_rate": 0.01, "loss": 2.0043, "step": 42018 }, { "epoch": 4.310730406237177, "grad_norm": 0.04477029666304588, "learning_rate": 0.01, "loss": 1.9672, "step": 42021 }, { "epoch": 4.3110381616741895, "grad_norm": 0.042469825595617294, "learning_rate": 0.01, "loss": 2.0094, "step": 42024 }, { "epoch": 4.311345917111202, "grad_norm": 0.061194293200969696, "learning_rate": 0.01, "loss": 1.9986, "step": 42027 }, { "epoch": 4.311653672548215, "grad_norm": 0.07249957323074341, "learning_rate": 0.01, "loss": 1.9978, "step": 42030 }, { "epoch": 4.311961427985228, "grad_norm": 0.1172809898853302, "learning_rate": 0.01, "loss": 1.9815, "step": 42033 }, { "epoch": 4.312269183422241, "grad_norm": 0.05327514931559563, "learning_rate": 0.01, "loss": 2.0115, "step": 42036 }, { "epoch": 4.312576938859253, "grad_norm": 0.06902623176574707, "learning_rate": 0.01, "loss": 1.9916, "step": 42039 }, { "epoch": 4.312884694296266, "grad_norm": 0.039753060787916183, "learning_rate": 0.01, "loss": 1.9833, "step": 42042 }, { "epoch": 4.313192449733279, "grad_norm": 0.047289080917835236, "learning_rate": 0.01, "loss": 2.0063, "step": 42045 }, { "epoch": 4.313500205170292, "grad_norm": 0.11222794651985168, "learning_rate": 0.01, "loss": 2.008, "step": 42048 }, { "epoch": 4.313807960607304, "grad_norm": 0.09836508333683014, "learning_rate": 0.01, "loss": 2.0148, "step": 42051 }, { "epoch": 4.314115716044316, "grad_norm": 0.06988558173179626, "learning_rate": 0.01, "loss": 2.007, "step": 42054 }, { "epoch": 4.314423471481329, "grad_norm": 0.05915610492229462, "learning_rate": 0.01, "loss": 2.0016, "step": 42057 }, { "epoch": 4.314731226918342, "grad_norm": 0.14300785958766937, "learning_rate": 0.01, "loss": 1.9995, "step": 42060 }, { "epoch": 4.315038982355355, "grad_norm": 0.055672645568847656, "learning_rate": 0.01, "loss": 1.9658, "step": 42063 }, { "epoch": 4.3153467377923675, "grad_norm": 0.07753617316484451, "learning_rate": 0.01, "loss": 1.9656, "step": 42066 }, { "epoch": 4.31565449322938, "grad_norm": 0.04788988083600998, "learning_rate": 0.01, "loss": 2.0013, "step": 42069 }, { "epoch": 4.315962248666393, "grad_norm": 0.04761343449354172, "learning_rate": 0.01, "loss": 2.0033, "step": 42072 }, { "epoch": 4.316270004103406, "grad_norm": 0.051400624215602875, "learning_rate": 0.01, "loss": 2.0074, "step": 42075 }, { "epoch": 4.316577759540419, "grad_norm": 0.050774481147527695, "learning_rate": 0.01, "loss": 1.9822, "step": 42078 }, { "epoch": 4.3168855149774314, "grad_norm": 0.08724766224622726, "learning_rate": 0.01, "loss": 1.9984, "step": 42081 }, { "epoch": 4.317193270414444, "grad_norm": 0.07128842920064926, "learning_rate": 0.01, "loss": 1.9701, "step": 42084 }, { "epoch": 4.317501025851457, "grad_norm": 0.16834615170955658, "learning_rate": 0.01, "loss": 1.9971, "step": 42087 }, { "epoch": 4.31780878128847, "grad_norm": 0.07254631817340851, "learning_rate": 0.01, "loss": 1.962, "step": 42090 }, { "epoch": 4.318116536725482, "grad_norm": 0.0355360321700573, "learning_rate": 0.01, "loss": 2.0011, "step": 42093 }, { "epoch": 4.3184242921624945, "grad_norm": 0.03480659797787666, "learning_rate": 0.01, "loss": 2.0197, "step": 42096 }, { "epoch": 4.318732047599507, "grad_norm": 0.04848271980881691, "learning_rate": 0.01, "loss": 2.003, "step": 42099 }, { "epoch": 4.31903980303652, "grad_norm": 0.047225676476955414, "learning_rate": 0.01, "loss": 2.0117, "step": 42102 }, { "epoch": 4.319347558473533, "grad_norm": 0.07466583698987961, "learning_rate": 0.01, "loss": 1.9881, "step": 42105 }, { "epoch": 4.319655313910546, "grad_norm": 0.03580273687839508, "learning_rate": 0.01, "loss": 2.0099, "step": 42108 }, { "epoch": 4.319963069347558, "grad_norm": 0.0513874776661396, "learning_rate": 0.01, "loss": 1.9978, "step": 42111 }, { "epoch": 4.320270824784571, "grad_norm": 0.08838716894388199, "learning_rate": 0.01, "loss": 2.0022, "step": 42114 }, { "epoch": 4.320578580221584, "grad_norm": 0.048829689621925354, "learning_rate": 0.01, "loss": 2.0228, "step": 42117 }, { "epoch": 4.320886335658597, "grad_norm": 0.0572056770324707, "learning_rate": 0.01, "loss": 2.0366, "step": 42120 }, { "epoch": 4.3211940910956095, "grad_norm": 0.0953284278512001, "learning_rate": 0.01, "loss": 1.99, "step": 42123 }, { "epoch": 4.321501846532622, "grad_norm": 0.08970024436712265, "learning_rate": 0.01, "loss": 2.0048, "step": 42126 }, { "epoch": 4.321809601969635, "grad_norm": 0.03790782392024994, "learning_rate": 0.01, "loss": 2.017, "step": 42129 }, { "epoch": 4.322117357406648, "grad_norm": 0.11770451813936234, "learning_rate": 0.01, "loss": 1.9909, "step": 42132 }, { "epoch": 4.322425112843661, "grad_norm": 0.08634433895349503, "learning_rate": 0.01, "loss": 2.0067, "step": 42135 }, { "epoch": 4.3227328682806725, "grad_norm": 0.08693015575408936, "learning_rate": 0.01, "loss": 1.994, "step": 42138 }, { "epoch": 4.323040623717685, "grad_norm": 0.07767558842897415, "learning_rate": 0.01, "loss": 2.0127, "step": 42141 }, { "epoch": 4.323348379154698, "grad_norm": 0.03783591464161873, "learning_rate": 0.01, "loss": 1.9699, "step": 42144 }, { "epoch": 4.323656134591711, "grad_norm": 0.036055248230695724, "learning_rate": 0.01, "loss": 2.0056, "step": 42147 }, { "epoch": 4.323963890028724, "grad_norm": 0.056252893060445786, "learning_rate": 0.01, "loss": 2.0124, "step": 42150 }, { "epoch": 4.324271645465736, "grad_norm": 0.07847966253757477, "learning_rate": 0.01, "loss": 2.007, "step": 42153 }, { "epoch": 4.324579400902749, "grad_norm": 0.07340630888938904, "learning_rate": 0.01, "loss": 2.0161, "step": 42156 }, { "epoch": 4.324887156339762, "grad_norm": 0.0631684884428978, "learning_rate": 0.01, "loss": 1.9991, "step": 42159 }, { "epoch": 4.325194911776775, "grad_norm": 0.03661903738975525, "learning_rate": 0.01, "loss": 2.0048, "step": 42162 }, { "epoch": 4.3255026672137875, "grad_norm": 0.06492697447538376, "learning_rate": 0.01, "loss": 1.9726, "step": 42165 }, { "epoch": 4.3258104226508, "grad_norm": 0.07288751751184464, "learning_rate": 0.01, "loss": 1.9985, "step": 42168 }, { "epoch": 4.326118178087813, "grad_norm": 0.04615149274468422, "learning_rate": 0.01, "loss": 1.9835, "step": 42171 }, { "epoch": 4.326425933524826, "grad_norm": 0.06735312938690186, "learning_rate": 0.01, "loss": 1.9959, "step": 42174 }, { "epoch": 4.326733688961839, "grad_norm": 0.05109792947769165, "learning_rate": 0.01, "loss": 2.0274, "step": 42177 }, { "epoch": 4.327041444398851, "grad_norm": 0.047006141394376755, "learning_rate": 0.01, "loss": 2.0182, "step": 42180 }, { "epoch": 4.327349199835863, "grad_norm": 0.11803962290287018, "learning_rate": 0.01, "loss": 2.0149, "step": 42183 }, { "epoch": 4.327656955272876, "grad_norm": 0.06282762438058853, "learning_rate": 0.01, "loss": 1.9997, "step": 42186 }, { "epoch": 4.327964710709889, "grad_norm": 0.07451837509870529, "learning_rate": 0.01, "loss": 1.9834, "step": 42189 }, { "epoch": 4.328272466146902, "grad_norm": 0.05453307554125786, "learning_rate": 0.01, "loss": 2.0096, "step": 42192 }, { "epoch": 4.328580221583914, "grad_norm": 0.03320490941405296, "learning_rate": 0.01, "loss": 1.97, "step": 42195 }, { "epoch": 4.328887977020927, "grad_norm": 0.03890087082982063, "learning_rate": 0.01, "loss": 2.0277, "step": 42198 }, { "epoch": 4.32919573245794, "grad_norm": 0.04567558318376541, "learning_rate": 0.01, "loss": 2.0203, "step": 42201 }, { "epoch": 4.329503487894953, "grad_norm": 0.11574912816286087, "learning_rate": 0.01, "loss": 2.0025, "step": 42204 }, { "epoch": 4.3298112433319655, "grad_norm": 0.1619519144296646, "learning_rate": 0.01, "loss": 2.0264, "step": 42207 }, { "epoch": 4.330118998768978, "grad_norm": 0.07246614992618561, "learning_rate": 0.01, "loss": 1.9717, "step": 42210 }, { "epoch": 4.330426754205991, "grad_norm": 0.06830492615699768, "learning_rate": 0.01, "loss": 2.0022, "step": 42213 }, { "epoch": 4.330734509643004, "grad_norm": 0.03900605067610741, "learning_rate": 0.01, "loss": 2.005, "step": 42216 }, { "epoch": 4.331042265080017, "grad_norm": 0.04035233333706856, "learning_rate": 0.01, "loss": 1.9802, "step": 42219 }, { "epoch": 4.331350020517029, "grad_norm": 0.08681115508079529, "learning_rate": 0.01, "loss": 1.9944, "step": 42222 }, { "epoch": 4.331657775954042, "grad_norm": 0.10521326214075089, "learning_rate": 0.01, "loss": 1.9983, "step": 42225 }, { "epoch": 4.331965531391054, "grad_norm": 0.05561526492238045, "learning_rate": 0.01, "loss": 1.9738, "step": 42228 }, { "epoch": 4.332273286828067, "grad_norm": 0.10138184577226639, "learning_rate": 0.01, "loss": 2.0263, "step": 42231 }, { "epoch": 4.33258104226508, "grad_norm": 0.05151226371526718, "learning_rate": 0.01, "loss": 1.997, "step": 42234 }, { "epoch": 4.3328887977020925, "grad_norm": 0.04308367893099785, "learning_rate": 0.01, "loss": 1.989, "step": 42237 }, { "epoch": 4.333196553139105, "grad_norm": 0.09749255329370499, "learning_rate": 0.01, "loss": 2.0007, "step": 42240 }, { "epoch": 4.333504308576118, "grad_norm": 0.078557088971138, "learning_rate": 0.01, "loss": 1.9958, "step": 42243 }, { "epoch": 4.333812064013131, "grad_norm": 0.05035284161567688, "learning_rate": 0.01, "loss": 1.9977, "step": 42246 }, { "epoch": 4.334119819450144, "grad_norm": 0.08703385293483734, "learning_rate": 0.01, "loss": 1.9876, "step": 42249 }, { "epoch": 4.334427574887156, "grad_norm": 0.06751354783773422, "learning_rate": 0.01, "loss": 1.9668, "step": 42252 }, { "epoch": 4.334735330324169, "grad_norm": 0.06264498084783554, "learning_rate": 0.01, "loss": 2.0163, "step": 42255 }, { "epoch": 4.335043085761182, "grad_norm": 0.08927709609270096, "learning_rate": 0.01, "loss": 1.9938, "step": 42258 }, { "epoch": 4.335350841198195, "grad_norm": 0.0882975161075592, "learning_rate": 0.01, "loss": 1.982, "step": 42261 }, { "epoch": 4.3356585966352075, "grad_norm": 0.08568523824214935, "learning_rate": 0.01, "loss": 1.9992, "step": 42264 }, { "epoch": 4.33596635207222, "grad_norm": 0.10143941640853882, "learning_rate": 0.01, "loss": 2.0098, "step": 42267 }, { "epoch": 4.336274107509233, "grad_norm": 0.04442453384399414, "learning_rate": 0.01, "loss": 1.9998, "step": 42270 }, { "epoch": 4.336581862946245, "grad_norm": 0.04202549159526825, "learning_rate": 0.01, "loss": 2.0177, "step": 42273 }, { "epoch": 4.336889618383258, "grad_norm": 0.07024986296892166, "learning_rate": 0.01, "loss": 1.9997, "step": 42276 }, { "epoch": 4.3371973738202705, "grad_norm": 0.0714881494641304, "learning_rate": 0.01, "loss": 1.9988, "step": 42279 }, { "epoch": 4.337505129257283, "grad_norm": 0.05677810311317444, "learning_rate": 0.01, "loss": 2.0163, "step": 42282 }, { "epoch": 4.337812884694296, "grad_norm": 0.08475086092948914, "learning_rate": 0.01, "loss": 1.9804, "step": 42285 }, { "epoch": 4.338120640131309, "grad_norm": 0.05561790242791176, "learning_rate": 0.01, "loss": 1.9952, "step": 42288 }, { "epoch": 4.338428395568322, "grad_norm": 0.0845925584435463, "learning_rate": 0.01, "loss": 1.9964, "step": 42291 }, { "epoch": 4.338736151005334, "grad_norm": 0.07267442345619202, "learning_rate": 0.01, "loss": 1.9967, "step": 42294 }, { "epoch": 4.339043906442347, "grad_norm": 0.06486202031373978, "learning_rate": 0.01, "loss": 1.9915, "step": 42297 }, { "epoch": 4.33935166187936, "grad_norm": 0.08498860895633698, "learning_rate": 0.01, "loss": 1.9849, "step": 42300 }, { "epoch": 4.339659417316373, "grad_norm": 0.05737947300076485, "learning_rate": 0.01, "loss": 2.0168, "step": 42303 }, { "epoch": 4.3399671727533855, "grad_norm": 0.1146705150604248, "learning_rate": 0.01, "loss": 1.9874, "step": 42306 }, { "epoch": 4.340274928190398, "grad_norm": 0.06821808218955994, "learning_rate": 0.01, "loss": 2.003, "step": 42309 }, { "epoch": 4.340582683627411, "grad_norm": 0.06554093956947327, "learning_rate": 0.01, "loss": 2.0051, "step": 42312 }, { "epoch": 4.340890439064424, "grad_norm": 0.055730611085891724, "learning_rate": 0.01, "loss": 1.9784, "step": 42315 }, { "epoch": 4.341198194501436, "grad_norm": 0.12713190913200378, "learning_rate": 0.01, "loss": 1.9957, "step": 42318 }, { "epoch": 4.3415059499384485, "grad_norm": 0.04995013028383255, "learning_rate": 0.01, "loss": 1.9892, "step": 42321 }, { "epoch": 4.341813705375461, "grad_norm": 0.05785641819238663, "learning_rate": 0.01, "loss": 2.001, "step": 42324 }, { "epoch": 4.342121460812474, "grad_norm": 0.06933962553739548, "learning_rate": 0.01, "loss": 1.9708, "step": 42327 }, { "epoch": 4.342429216249487, "grad_norm": 0.08787462115287781, "learning_rate": 0.01, "loss": 1.9808, "step": 42330 }, { "epoch": 4.3427369716865, "grad_norm": 0.03759448602795601, "learning_rate": 0.01, "loss": 2.0184, "step": 42333 }, { "epoch": 4.343044727123512, "grad_norm": 0.08787883818149567, "learning_rate": 0.01, "loss": 2.0153, "step": 42336 }, { "epoch": 4.343352482560525, "grad_norm": 0.07424912601709366, "learning_rate": 0.01, "loss": 2.0099, "step": 42339 }, { "epoch": 4.343660237997538, "grad_norm": 0.08268291503190994, "learning_rate": 0.01, "loss": 2.0116, "step": 42342 }, { "epoch": 4.343967993434551, "grad_norm": 0.09832719713449478, "learning_rate": 0.01, "loss": 2.0129, "step": 42345 }, { "epoch": 4.3442757488715635, "grad_norm": 0.08465267717838287, "learning_rate": 0.01, "loss": 1.9878, "step": 42348 }, { "epoch": 4.344583504308576, "grad_norm": 0.08722463995218277, "learning_rate": 0.01, "loss": 2.0249, "step": 42351 }, { "epoch": 4.344891259745589, "grad_norm": 0.03291288763284683, "learning_rate": 0.01, "loss": 1.9878, "step": 42354 }, { "epoch": 4.345199015182602, "grad_norm": 0.05389249697327614, "learning_rate": 0.01, "loss": 1.9989, "step": 42357 }, { "epoch": 4.345506770619615, "grad_norm": 0.057934414595365524, "learning_rate": 0.01, "loss": 2.0047, "step": 42360 }, { "epoch": 4.3458145260566265, "grad_norm": 0.09527049958705902, "learning_rate": 0.01, "loss": 1.996, "step": 42363 }, { "epoch": 4.346122281493639, "grad_norm": 0.09653705358505249, "learning_rate": 0.01, "loss": 2.0058, "step": 42366 }, { "epoch": 4.346430036930652, "grad_norm": 0.050867971032857895, "learning_rate": 0.01, "loss": 2.0043, "step": 42369 }, { "epoch": 4.346737792367665, "grad_norm": 0.17523349821567535, "learning_rate": 0.01, "loss": 1.9933, "step": 42372 }, { "epoch": 4.347045547804678, "grad_norm": 0.04011973738670349, "learning_rate": 0.01, "loss": 2.0147, "step": 42375 }, { "epoch": 4.3473533032416904, "grad_norm": 0.0476989708840847, "learning_rate": 0.01, "loss": 2.0088, "step": 42378 }, { "epoch": 4.347661058678703, "grad_norm": 0.062353190034627914, "learning_rate": 0.01, "loss": 2.0106, "step": 42381 }, { "epoch": 4.347968814115716, "grad_norm": 0.03900708258152008, "learning_rate": 0.01, "loss": 2.0074, "step": 42384 }, { "epoch": 4.348276569552729, "grad_norm": 0.05116381496191025, "learning_rate": 0.01, "loss": 2.0263, "step": 42387 }, { "epoch": 4.348584324989742, "grad_norm": 0.0740213617682457, "learning_rate": 0.01, "loss": 1.9911, "step": 42390 }, { "epoch": 4.348892080426754, "grad_norm": 0.04419323056936264, "learning_rate": 0.01, "loss": 1.9859, "step": 42393 }, { "epoch": 4.349199835863767, "grad_norm": 0.09228520840406418, "learning_rate": 0.01, "loss": 1.9902, "step": 42396 }, { "epoch": 4.34950759130078, "grad_norm": 0.06969837844371796, "learning_rate": 0.01, "loss": 2.0195, "step": 42399 }, { "epoch": 4.349815346737793, "grad_norm": 0.06493277847766876, "learning_rate": 0.01, "loss": 1.9871, "step": 42402 }, { "epoch": 4.3501231021748055, "grad_norm": 0.10470300912857056, "learning_rate": 0.01, "loss": 1.9829, "step": 42405 }, { "epoch": 4.350430857611817, "grad_norm": 0.055797114968299866, "learning_rate": 0.01, "loss": 1.9753, "step": 42408 }, { "epoch": 4.35073861304883, "grad_norm": 0.08295798301696777, "learning_rate": 0.01, "loss": 2.0024, "step": 42411 }, { "epoch": 4.351046368485843, "grad_norm": 0.03319832682609558, "learning_rate": 0.01, "loss": 1.9977, "step": 42414 }, { "epoch": 4.351354123922856, "grad_norm": 0.08333670347929001, "learning_rate": 0.01, "loss": 1.9832, "step": 42417 }, { "epoch": 4.3516618793598685, "grad_norm": 0.15636205673217773, "learning_rate": 0.01, "loss": 2.0047, "step": 42420 }, { "epoch": 4.351969634796881, "grad_norm": 0.04145883768796921, "learning_rate": 0.01, "loss": 1.9993, "step": 42423 }, { "epoch": 4.352277390233894, "grad_norm": 0.06319641321897507, "learning_rate": 0.01, "loss": 2.0072, "step": 42426 }, { "epoch": 4.352585145670907, "grad_norm": 0.10287382453680038, "learning_rate": 0.01, "loss": 2.0008, "step": 42429 }, { "epoch": 4.35289290110792, "grad_norm": 0.05073247849941254, "learning_rate": 0.01, "loss": 2.0024, "step": 42432 }, { "epoch": 4.353200656544932, "grad_norm": 0.05621569603681564, "learning_rate": 0.01, "loss": 2.006, "step": 42435 }, { "epoch": 4.353508411981945, "grad_norm": 0.044255949556827545, "learning_rate": 0.01, "loss": 2.0127, "step": 42438 }, { "epoch": 4.353816167418958, "grad_norm": 0.09396500885486603, "learning_rate": 0.01, "loss": 2.011, "step": 42441 }, { "epoch": 4.354123922855971, "grad_norm": 0.04211452975869179, "learning_rate": 0.01, "loss": 2.0056, "step": 42444 }, { "epoch": 4.3544316782929835, "grad_norm": 0.09612414985895157, "learning_rate": 0.01, "loss": 1.9703, "step": 42447 }, { "epoch": 4.354739433729996, "grad_norm": 0.07747933268547058, "learning_rate": 0.01, "loss": 2.005, "step": 42450 }, { "epoch": 4.355047189167008, "grad_norm": 0.06796756386756897, "learning_rate": 0.01, "loss": 2.006, "step": 42453 }, { "epoch": 4.355354944604021, "grad_norm": 0.052594736218452454, "learning_rate": 0.01, "loss": 2.0158, "step": 42456 }, { "epoch": 4.355662700041034, "grad_norm": 0.09094807505607605, "learning_rate": 0.01, "loss": 1.9884, "step": 42459 }, { "epoch": 4.3559704554780465, "grad_norm": 0.06846702843904495, "learning_rate": 0.01, "loss": 1.9743, "step": 42462 }, { "epoch": 4.356278210915059, "grad_norm": 0.059900566935539246, "learning_rate": 0.01, "loss": 1.9951, "step": 42465 }, { "epoch": 4.356585966352072, "grad_norm": 0.07737037539482117, "learning_rate": 0.01, "loss": 2.005, "step": 42468 }, { "epoch": 4.356893721789085, "grad_norm": 0.10885298252105713, "learning_rate": 0.01, "loss": 1.9983, "step": 42471 }, { "epoch": 4.357201477226098, "grad_norm": 0.12740680575370789, "learning_rate": 0.01, "loss": 2.007, "step": 42474 }, { "epoch": 4.35750923266311, "grad_norm": 0.05705080181360245, "learning_rate": 0.01, "loss": 2.0148, "step": 42477 }, { "epoch": 4.357816988100123, "grad_norm": 0.047689832746982574, "learning_rate": 0.01, "loss": 1.9935, "step": 42480 }, { "epoch": 4.358124743537136, "grad_norm": 0.06346622854471207, "learning_rate": 0.01, "loss": 2.0007, "step": 42483 }, { "epoch": 4.358432498974149, "grad_norm": 0.08776500821113586, "learning_rate": 0.01, "loss": 2.0056, "step": 42486 }, { "epoch": 4.3587402544111615, "grad_norm": 0.08766961097717285, "learning_rate": 0.01, "loss": 1.9841, "step": 42489 }, { "epoch": 4.359048009848174, "grad_norm": 0.045849695801734924, "learning_rate": 0.01, "loss": 1.9967, "step": 42492 }, { "epoch": 4.359355765285187, "grad_norm": 0.10003980994224548, "learning_rate": 0.01, "loss": 2.003, "step": 42495 }, { "epoch": 4.359663520722199, "grad_norm": 0.05734704062342644, "learning_rate": 0.01, "loss": 2.0167, "step": 42498 }, { "epoch": 4.359971276159212, "grad_norm": 0.05776417255401611, "learning_rate": 0.01, "loss": 2.0221, "step": 42501 }, { "epoch": 4.3602790315962245, "grad_norm": 0.03467059135437012, "learning_rate": 0.01, "loss": 1.9826, "step": 42504 }, { "epoch": 4.360586787033237, "grad_norm": 0.03380267322063446, "learning_rate": 0.01, "loss": 2.0056, "step": 42507 }, { "epoch": 4.36089454247025, "grad_norm": 0.06621702015399933, "learning_rate": 0.01, "loss": 2.0018, "step": 42510 }, { "epoch": 4.361202297907263, "grad_norm": 0.05189868062734604, "learning_rate": 0.01, "loss": 1.9983, "step": 42513 }, { "epoch": 4.361510053344276, "grad_norm": 0.05479085072875023, "learning_rate": 0.01, "loss": 1.9818, "step": 42516 }, { "epoch": 4.361817808781288, "grad_norm": 0.07013875991106033, "learning_rate": 0.01, "loss": 2.0069, "step": 42519 }, { "epoch": 4.362125564218301, "grad_norm": 0.07816500216722488, "learning_rate": 0.01, "loss": 1.9825, "step": 42522 }, { "epoch": 4.362433319655314, "grad_norm": 0.05446416139602661, "learning_rate": 0.01, "loss": 1.9871, "step": 42525 }, { "epoch": 4.362741075092327, "grad_norm": 0.05260547250509262, "learning_rate": 0.01, "loss": 1.9789, "step": 42528 }, { "epoch": 4.36304883052934, "grad_norm": 0.042641572654247284, "learning_rate": 0.01, "loss": 1.9998, "step": 42531 }, { "epoch": 4.363356585966352, "grad_norm": 0.05067918077111244, "learning_rate": 0.01, "loss": 1.977, "step": 42534 }, { "epoch": 4.363664341403365, "grad_norm": 0.05734257400035858, "learning_rate": 0.01, "loss": 1.9884, "step": 42537 }, { "epoch": 4.363972096840378, "grad_norm": 0.12982626259326935, "learning_rate": 0.01, "loss": 1.9998, "step": 42540 }, { "epoch": 4.36427985227739, "grad_norm": 0.06049492582678795, "learning_rate": 0.01, "loss": 1.9958, "step": 42543 }, { "epoch": 4.364587607714403, "grad_norm": 0.03683345392346382, "learning_rate": 0.01, "loss": 1.9857, "step": 42546 }, { "epoch": 4.364895363151415, "grad_norm": 0.055864546447992325, "learning_rate": 0.01, "loss": 2.0082, "step": 42549 }, { "epoch": 4.365203118588428, "grad_norm": 0.046534840017557144, "learning_rate": 0.01, "loss": 2.0027, "step": 42552 }, { "epoch": 4.365510874025441, "grad_norm": 0.07829025387763977, "learning_rate": 0.01, "loss": 1.9982, "step": 42555 }, { "epoch": 4.365818629462454, "grad_norm": 0.03704720363020897, "learning_rate": 0.01, "loss": 1.9873, "step": 42558 }, { "epoch": 4.3661263848994665, "grad_norm": 0.03933021053671837, "learning_rate": 0.01, "loss": 1.9813, "step": 42561 }, { "epoch": 4.366434140336479, "grad_norm": 0.05160917341709137, "learning_rate": 0.01, "loss": 1.9743, "step": 42564 }, { "epoch": 4.366741895773492, "grad_norm": 0.16336973011493683, "learning_rate": 0.01, "loss": 2.0207, "step": 42567 }, { "epoch": 4.367049651210505, "grad_norm": 0.15664827823638916, "learning_rate": 0.01, "loss": 1.9948, "step": 42570 }, { "epoch": 4.367357406647518, "grad_norm": 0.06535400450229645, "learning_rate": 0.01, "loss": 1.9707, "step": 42573 }, { "epoch": 4.36766516208453, "grad_norm": 0.10573652386665344, "learning_rate": 0.01, "loss": 2.0185, "step": 42576 }, { "epoch": 4.367972917521543, "grad_norm": 0.06016778200864792, "learning_rate": 0.01, "loss": 1.9967, "step": 42579 }, { "epoch": 4.368280672958556, "grad_norm": 0.07716617733240128, "learning_rate": 0.01, "loss": 2.012, "step": 42582 }, { "epoch": 4.368588428395569, "grad_norm": 0.0795513242483139, "learning_rate": 0.01, "loss": 1.9885, "step": 42585 }, { "epoch": 4.368896183832581, "grad_norm": 0.042184434831142426, "learning_rate": 0.01, "loss": 1.9473, "step": 42588 }, { "epoch": 4.369203939269593, "grad_norm": 0.10784424096345901, "learning_rate": 0.01, "loss": 2.0099, "step": 42591 }, { "epoch": 4.369511694706606, "grad_norm": 0.0906398668885231, "learning_rate": 0.01, "loss": 1.9985, "step": 42594 }, { "epoch": 4.369819450143619, "grad_norm": 0.07714588940143585, "learning_rate": 0.01, "loss": 2.0083, "step": 42597 }, { "epoch": 4.370127205580632, "grad_norm": 0.07262132316827774, "learning_rate": 0.01, "loss": 2.0107, "step": 42600 }, { "epoch": 4.3704349610176445, "grad_norm": 0.04687703400850296, "learning_rate": 0.01, "loss": 1.9916, "step": 42603 }, { "epoch": 4.370742716454657, "grad_norm": 0.03540663421154022, "learning_rate": 0.01, "loss": 1.9784, "step": 42606 }, { "epoch": 4.37105047189167, "grad_norm": 0.0655878484249115, "learning_rate": 0.01, "loss": 2.0104, "step": 42609 }, { "epoch": 4.371358227328683, "grad_norm": 0.14327290654182434, "learning_rate": 0.01, "loss": 1.9783, "step": 42612 }, { "epoch": 4.371665982765696, "grad_norm": 0.051352307200431824, "learning_rate": 0.01, "loss": 1.9952, "step": 42615 }, { "epoch": 4.371973738202708, "grad_norm": 0.03427935391664505, "learning_rate": 0.01, "loss": 1.995, "step": 42618 }, { "epoch": 4.372281493639721, "grad_norm": 0.06677401810884476, "learning_rate": 0.01, "loss": 2.0074, "step": 42621 }, { "epoch": 4.372589249076734, "grad_norm": 0.0740409791469574, "learning_rate": 0.01, "loss": 2.0128, "step": 42624 }, { "epoch": 4.372897004513747, "grad_norm": 0.07356753200292587, "learning_rate": 0.01, "loss": 1.9861, "step": 42627 }, { "epoch": 4.3732047599507595, "grad_norm": 0.0342116579413414, "learning_rate": 0.01, "loss": 2.0028, "step": 42630 }, { "epoch": 4.373512515387771, "grad_norm": 0.04127725213766098, "learning_rate": 0.01, "loss": 2.0058, "step": 42633 }, { "epoch": 4.373820270824784, "grad_norm": 0.05259817838668823, "learning_rate": 0.01, "loss": 1.9704, "step": 42636 }, { "epoch": 4.374128026261797, "grad_norm": 0.041953880339860916, "learning_rate": 0.01, "loss": 1.9922, "step": 42639 }, { "epoch": 4.37443578169881, "grad_norm": 0.042254798114299774, "learning_rate": 0.01, "loss": 1.9852, "step": 42642 }, { "epoch": 4.3747435371358225, "grad_norm": 0.0722968801856041, "learning_rate": 0.01, "loss": 1.9903, "step": 42645 }, { "epoch": 4.375051292572835, "grad_norm": 0.12318527698516846, "learning_rate": 0.01, "loss": 2.0048, "step": 42648 }, { "epoch": 4.375359048009848, "grad_norm": 0.048684168606996536, "learning_rate": 0.01, "loss": 1.9864, "step": 42651 }, { "epoch": 4.375666803446861, "grad_norm": 0.09488745033740997, "learning_rate": 0.01, "loss": 1.9645, "step": 42654 }, { "epoch": 4.375974558883874, "grad_norm": 0.06261547654867172, "learning_rate": 0.01, "loss": 2.0027, "step": 42657 }, { "epoch": 4.376282314320886, "grad_norm": 0.03739078342914581, "learning_rate": 0.01, "loss": 2.0081, "step": 42660 }, { "epoch": 4.376590069757899, "grad_norm": 0.03686247393488884, "learning_rate": 0.01, "loss": 1.9983, "step": 42663 }, { "epoch": 4.376897825194912, "grad_norm": 0.038870710879564285, "learning_rate": 0.01, "loss": 2.012, "step": 42666 }, { "epoch": 4.377205580631925, "grad_norm": 0.04153933376073837, "learning_rate": 0.01, "loss": 1.9836, "step": 42669 }, { "epoch": 4.3775133360689376, "grad_norm": 0.07403891533613205, "learning_rate": 0.01, "loss": 1.9581, "step": 42672 }, { "epoch": 4.37782109150595, "grad_norm": 0.12554992735385895, "learning_rate": 0.01, "loss": 2.0276, "step": 42675 }, { "epoch": 4.378128846942962, "grad_norm": 0.07833071053028107, "learning_rate": 0.01, "loss": 1.9745, "step": 42678 }, { "epoch": 4.378436602379975, "grad_norm": 0.05616781488060951, "learning_rate": 0.01, "loss": 2.0098, "step": 42681 }, { "epoch": 4.378744357816988, "grad_norm": 0.034405291080474854, "learning_rate": 0.01, "loss": 1.9853, "step": 42684 }, { "epoch": 4.379052113254001, "grad_norm": 0.03216303512454033, "learning_rate": 0.01, "loss": 2.0074, "step": 42687 }, { "epoch": 4.379359868691013, "grad_norm": 0.03294415399432182, "learning_rate": 0.01, "loss": 1.9821, "step": 42690 }, { "epoch": 4.379667624128026, "grad_norm": 0.07378646731376648, "learning_rate": 0.01, "loss": 1.9866, "step": 42693 }, { "epoch": 4.379975379565039, "grad_norm": 0.0922391265630722, "learning_rate": 0.01, "loss": 1.9799, "step": 42696 }, { "epoch": 4.380283135002052, "grad_norm": 0.06231202930212021, "learning_rate": 0.01, "loss": 1.9904, "step": 42699 }, { "epoch": 4.3805908904390645, "grad_norm": 0.07775771617889404, "learning_rate": 0.01, "loss": 2.0098, "step": 42702 }, { "epoch": 4.380898645876077, "grad_norm": 0.041460320353507996, "learning_rate": 0.01, "loss": 2.0052, "step": 42705 }, { "epoch": 4.38120640131309, "grad_norm": 0.10348492860794067, "learning_rate": 0.01, "loss": 1.9761, "step": 42708 }, { "epoch": 4.381514156750103, "grad_norm": 0.06981760263442993, "learning_rate": 0.01, "loss": 2.0003, "step": 42711 }, { "epoch": 4.381821912187116, "grad_norm": 0.05024630203843117, "learning_rate": 0.01, "loss": 1.9952, "step": 42714 }, { "epoch": 4.382129667624128, "grad_norm": 0.09696882963180542, "learning_rate": 0.01, "loss": 1.9854, "step": 42717 }, { "epoch": 4.382437423061141, "grad_norm": 0.08955802023410797, "learning_rate": 0.01, "loss": 2.0075, "step": 42720 }, { "epoch": 4.382745178498153, "grad_norm": 0.053882379084825516, "learning_rate": 0.01, "loss": 1.9908, "step": 42723 }, { "epoch": 4.383052933935166, "grad_norm": 0.09641711413860321, "learning_rate": 0.01, "loss": 1.9923, "step": 42726 }, { "epoch": 4.383360689372179, "grad_norm": 0.03148825094103813, "learning_rate": 0.01, "loss": 2.0041, "step": 42729 }, { "epoch": 4.383668444809191, "grad_norm": 0.13528583943843842, "learning_rate": 0.01, "loss": 1.973, "step": 42732 }, { "epoch": 4.383976200246204, "grad_norm": 0.08340415358543396, "learning_rate": 0.01, "loss": 2.0033, "step": 42735 }, { "epoch": 4.384283955683217, "grad_norm": 0.04938989877700806, "learning_rate": 0.01, "loss": 2.0224, "step": 42738 }, { "epoch": 4.38459171112023, "grad_norm": 0.03983978554606438, "learning_rate": 0.01, "loss": 1.9745, "step": 42741 }, { "epoch": 4.3848994665572425, "grad_norm": 0.04310479387640953, "learning_rate": 0.01, "loss": 2.0133, "step": 42744 }, { "epoch": 4.385207221994255, "grad_norm": 0.047569889575242996, "learning_rate": 0.01, "loss": 2.0138, "step": 42747 }, { "epoch": 4.385514977431268, "grad_norm": 0.04927491769194603, "learning_rate": 0.01, "loss": 2.0133, "step": 42750 }, { "epoch": 4.385822732868281, "grad_norm": 0.17855384945869446, "learning_rate": 0.01, "loss": 2.0052, "step": 42753 }, { "epoch": 4.386130488305294, "grad_norm": 0.06133484095335007, "learning_rate": 0.01, "loss": 2.0078, "step": 42756 }, { "epoch": 4.386438243742306, "grad_norm": 0.0671703889966011, "learning_rate": 0.01, "loss": 1.9997, "step": 42759 }, { "epoch": 4.386745999179319, "grad_norm": 0.06793912500143051, "learning_rate": 0.01, "loss": 2.0044, "step": 42762 }, { "epoch": 4.387053754616332, "grad_norm": 0.042816221714019775, "learning_rate": 0.01, "loss": 1.9831, "step": 42765 }, { "epoch": 4.387361510053344, "grad_norm": 0.035412371158599854, "learning_rate": 0.01, "loss": 1.9925, "step": 42768 }, { "epoch": 4.387669265490357, "grad_norm": 0.05142438784241676, "learning_rate": 0.01, "loss": 2.026, "step": 42771 }, { "epoch": 4.387977020927369, "grad_norm": 0.04674524813890457, "learning_rate": 0.01, "loss": 1.9963, "step": 42774 }, { "epoch": 4.388284776364382, "grad_norm": 0.2127211093902588, "learning_rate": 0.01, "loss": 1.9995, "step": 42777 }, { "epoch": 4.388592531801395, "grad_norm": 0.09352391213178635, "learning_rate": 0.01, "loss": 1.9798, "step": 42780 }, { "epoch": 4.388900287238408, "grad_norm": 0.06070404127240181, "learning_rate": 0.01, "loss": 1.9913, "step": 42783 }, { "epoch": 4.3892080426754205, "grad_norm": 0.0433930978178978, "learning_rate": 0.01, "loss": 1.9898, "step": 42786 }, { "epoch": 4.389515798112433, "grad_norm": 0.03960421308875084, "learning_rate": 0.01, "loss": 2.0209, "step": 42789 }, { "epoch": 4.389823553549446, "grad_norm": 0.043349895626306534, "learning_rate": 0.01, "loss": 2.0067, "step": 42792 }, { "epoch": 4.390131308986459, "grad_norm": 0.04139290004968643, "learning_rate": 0.01, "loss": 1.9976, "step": 42795 }, { "epoch": 4.390439064423472, "grad_norm": 0.08798687905073166, "learning_rate": 0.01, "loss": 1.9892, "step": 42798 }, { "epoch": 4.390746819860484, "grad_norm": 0.061553046107292175, "learning_rate": 0.01, "loss": 1.9815, "step": 42801 }, { "epoch": 4.391054575297497, "grad_norm": 0.04880871623754501, "learning_rate": 0.01, "loss": 1.9976, "step": 42804 }, { "epoch": 4.39136233073451, "grad_norm": 0.06029786914587021, "learning_rate": 0.01, "loss": 1.9921, "step": 42807 }, { "epoch": 4.391670086171523, "grad_norm": 0.047968216240406036, "learning_rate": 0.01, "loss": 2.0255, "step": 42810 }, { "epoch": 4.391977841608535, "grad_norm": 0.11133129149675369, "learning_rate": 0.01, "loss": 1.9947, "step": 42813 }, { "epoch": 4.392285597045547, "grad_norm": 0.09831759333610535, "learning_rate": 0.01, "loss": 1.9738, "step": 42816 }, { "epoch": 4.39259335248256, "grad_norm": 0.0829746425151825, "learning_rate": 0.01, "loss": 1.9932, "step": 42819 }, { "epoch": 4.392901107919573, "grad_norm": 0.04381052777171135, "learning_rate": 0.01, "loss": 1.9988, "step": 42822 }, { "epoch": 4.393208863356586, "grad_norm": 0.06539449840784073, "learning_rate": 0.01, "loss": 1.9888, "step": 42825 }, { "epoch": 4.393516618793599, "grad_norm": 0.03784729540348053, "learning_rate": 0.01, "loss": 1.9718, "step": 42828 }, { "epoch": 4.393824374230611, "grad_norm": 0.10152629017829895, "learning_rate": 0.01, "loss": 2.0002, "step": 42831 }, { "epoch": 4.394132129667624, "grad_norm": 0.043561842292547226, "learning_rate": 0.01, "loss": 2.0051, "step": 42834 }, { "epoch": 4.394439885104637, "grad_norm": 0.06225054711103439, "learning_rate": 0.01, "loss": 1.9822, "step": 42837 }, { "epoch": 4.39474764054165, "grad_norm": 0.08047157526016235, "learning_rate": 0.01, "loss": 1.9877, "step": 42840 }, { "epoch": 4.3950553959786625, "grad_norm": 0.09267039597034454, "learning_rate": 0.01, "loss": 2.0022, "step": 42843 }, { "epoch": 4.395363151415675, "grad_norm": 0.10524188727140427, "learning_rate": 0.01, "loss": 1.9832, "step": 42846 }, { "epoch": 4.395670906852688, "grad_norm": 0.0726306214928627, "learning_rate": 0.01, "loss": 2.0324, "step": 42849 }, { "epoch": 4.395978662289701, "grad_norm": 0.036908458918333054, "learning_rate": 0.01, "loss": 1.9911, "step": 42852 }, { "epoch": 4.396286417726714, "grad_norm": 0.052610307931900024, "learning_rate": 0.01, "loss": 1.9566, "step": 42855 }, { "epoch": 4.3965941731637255, "grad_norm": 0.04229322075843811, "learning_rate": 0.01, "loss": 2.0177, "step": 42858 }, { "epoch": 4.396901928600738, "grad_norm": 0.1478923261165619, "learning_rate": 0.01, "loss": 1.9907, "step": 42861 }, { "epoch": 4.397209684037751, "grad_norm": 0.08942049741744995, "learning_rate": 0.01, "loss": 2.001, "step": 42864 }, { "epoch": 4.397517439474764, "grad_norm": 0.051714204251766205, "learning_rate": 0.01, "loss": 2.0194, "step": 42867 }, { "epoch": 4.397825194911777, "grad_norm": 0.03621753305196762, "learning_rate": 0.01, "loss": 1.9926, "step": 42870 }, { "epoch": 4.398132950348789, "grad_norm": 0.03929129242897034, "learning_rate": 0.01, "loss": 1.9912, "step": 42873 }, { "epoch": 4.398440705785802, "grad_norm": 0.04855189844965935, "learning_rate": 0.01, "loss": 1.9873, "step": 42876 }, { "epoch": 4.398748461222815, "grad_norm": 0.060284826904535294, "learning_rate": 0.01, "loss": 1.975, "step": 42879 }, { "epoch": 4.399056216659828, "grad_norm": 0.07179313898086548, "learning_rate": 0.01, "loss": 1.9957, "step": 42882 }, { "epoch": 4.3993639720968405, "grad_norm": 0.10332684218883514, "learning_rate": 0.01, "loss": 2.0003, "step": 42885 }, { "epoch": 4.399671727533853, "grad_norm": 0.13461638987064362, "learning_rate": 0.01, "loss": 1.9792, "step": 42888 }, { "epoch": 4.399979482970866, "grad_norm": 0.08879181742668152, "learning_rate": 0.01, "loss": 2.0071, "step": 42891 }, { "epoch": 4.400287238407879, "grad_norm": 0.064743272960186, "learning_rate": 0.01, "loss": 1.992, "step": 42894 }, { "epoch": 4.400594993844892, "grad_norm": 0.04650283232331276, "learning_rate": 0.01, "loss": 1.9802, "step": 42897 }, { "epoch": 4.400902749281904, "grad_norm": 0.05072256550192833, "learning_rate": 0.01, "loss": 1.9972, "step": 42900 }, { "epoch": 4.401210504718916, "grad_norm": 0.05984727293252945, "learning_rate": 0.01, "loss": 2.0123, "step": 42903 }, { "epoch": 4.401518260155929, "grad_norm": 0.0773845687508583, "learning_rate": 0.01, "loss": 1.9812, "step": 42906 }, { "epoch": 4.401826015592942, "grad_norm": 0.06231288984417915, "learning_rate": 0.01, "loss": 1.9875, "step": 42909 }, { "epoch": 4.402133771029955, "grad_norm": 0.07276583462953568, "learning_rate": 0.01, "loss": 2.0029, "step": 42912 }, { "epoch": 4.402441526466967, "grad_norm": 0.043372802436351776, "learning_rate": 0.01, "loss": 1.9978, "step": 42915 }, { "epoch": 4.40274928190398, "grad_norm": 0.05217559635639191, "learning_rate": 0.01, "loss": 2.021, "step": 42918 }, { "epoch": 4.403057037340993, "grad_norm": 0.058887772262096405, "learning_rate": 0.01, "loss": 2.0104, "step": 42921 }, { "epoch": 4.403364792778006, "grad_norm": 0.04377232864499092, "learning_rate": 0.01, "loss": 1.9981, "step": 42924 }, { "epoch": 4.4036725482150185, "grad_norm": 0.032925624400377274, "learning_rate": 0.01, "loss": 1.9881, "step": 42927 }, { "epoch": 4.403980303652031, "grad_norm": 0.1263015866279602, "learning_rate": 0.01, "loss": 1.9824, "step": 42930 }, { "epoch": 4.404288059089044, "grad_norm": 0.046885982155799866, "learning_rate": 0.01, "loss": 2.002, "step": 42933 }, { "epoch": 4.404595814526057, "grad_norm": 0.10321955382823944, "learning_rate": 0.01, "loss": 2.0221, "step": 42936 }, { "epoch": 4.40490356996307, "grad_norm": 0.06488531827926636, "learning_rate": 0.01, "loss": 2.0114, "step": 42939 }, { "epoch": 4.405211325400082, "grad_norm": 0.09423347562551498, "learning_rate": 0.01, "loss": 1.9653, "step": 42942 }, { "epoch": 4.405519080837095, "grad_norm": 0.06422553211450577, "learning_rate": 0.01, "loss": 2.014, "step": 42945 }, { "epoch": 4.405826836274107, "grad_norm": 0.05872100591659546, "learning_rate": 0.01, "loss": 1.9904, "step": 42948 }, { "epoch": 4.40613459171112, "grad_norm": 0.043210845440626144, "learning_rate": 0.01, "loss": 1.9934, "step": 42951 }, { "epoch": 4.406442347148133, "grad_norm": 0.041857898235321045, "learning_rate": 0.01, "loss": 1.9919, "step": 42954 }, { "epoch": 4.406750102585145, "grad_norm": 0.03347545117139816, "learning_rate": 0.01, "loss": 1.9947, "step": 42957 }, { "epoch": 4.407057858022158, "grad_norm": 0.0377437062561512, "learning_rate": 0.01, "loss": 1.9979, "step": 42960 }, { "epoch": 4.407365613459171, "grad_norm": 0.07432477176189423, "learning_rate": 0.01, "loss": 1.9823, "step": 42963 }, { "epoch": 4.407673368896184, "grad_norm": 0.038358092308044434, "learning_rate": 0.01, "loss": 1.9913, "step": 42966 }, { "epoch": 4.4079811243331966, "grad_norm": 0.09125792980194092, "learning_rate": 0.01, "loss": 1.9757, "step": 42969 }, { "epoch": 4.408288879770209, "grad_norm": 0.11572171747684479, "learning_rate": 0.01, "loss": 1.9858, "step": 42972 }, { "epoch": 4.408596635207222, "grad_norm": 0.05868324264883995, "learning_rate": 0.01, "loss": 1.9868, "step": 42975 }, { "epoch": 4.408904390644235, "grad_norm": 0.0679447203874588, "learning_rate": 0.01, "loss": 2.0116, "step": 42978 }, { "epoch": 4.409212146081248, "grad_norm": 0.050446171313524246, "learning_rate": 0.01, "loss": 1.9923, "step": 42981 }, { "epoch": 4.4095199015182605, "grad_norm": 0.05003255605697632, "learning_rate": 0.01, "loss": 2.0134, "step": 42984 }, { "epoch": 4.409827656955273, "grad_norm": 0.05460989847779274, "learning_rate": 0.01, "loss": 1.9911, "step": 42987 }, { "epoch": 4.410135412392286, "grad_norm": 0.04039241746068001, "learning_rate": 0.01, "loss": 1.981, "step": 42990 }, { "epoch": 4.410443167829298, "grad_norm": 0.04114096984267235, "learning_rate": 0.01, "loss": 1.9696, "step": 42993 }, { "epoch": 4.410750923266311, "grad_norm": 0.1269209384918213, "learning_rate": 0.01, "loss": 1.995, "step": 42996 }, { "epoch": 4.4110586787033235, "grad_norm": 0.059638604521751404, "learning_rate": 0.01, "loss": 2.0176, "step": 42999 }, { "epoch": 4.411366434140336, "grad_norm": 0.09658347070217133, "learning_rate": 0.01, "loss": 1.984, "step": 43002 }, { "epoch": 4.411674189577349, "grad_norm": 0.05749305710196495, "learning_rate": 0.01, "loss": 1.982, "step": 43005 }, { "epoch": 4.411981945014362, "grad_norm": 0.04522128403186798, "learning_rate": 0.01, "loss": 1.987, "step": 43008 }, { "epoch": 4.412289700451375, "grad_norm": 0.034170012921094894, "learning_rate": 0.01, "loss": 1.9874, "step": 43011 }, { "epoch": 4.412597455888387, "grad_norm": 0.04310225695371628, "learning_rate": 0.01, "loss": 1.9923, "step": 43014 }, { "epoch": 4.4129052113254, "grad_norm": 0.15212641656398773, "learning_rate": 0.01, "loss": 1.9796, "step": 43017 }, { "epoch": 4.413212966762413, "grad_norm": 0.12978756427764893, "learning_rate": 0.01, "loss": 1.99, "step": 43020 }, { "epoch": 4.413520722199426, "grad_norm": 0.07762716710567474, "learning_rate": 0.01, "loss": 1.9977, "step": 43023 }, { "epoch": 4.4138284776364385, "grad_norm": 0.049494557082653046, "learning_rate": 0.01, "loss": 1.9936, "step": 43026 }, { "epoch": 4.414136233073451, "grad_norm": 0.060781434178352356, "learning_rate": 0.01, "loss": 1.9916, "step": 43029 }, { "epoch": 4.414443988510464, "grad_norm": 0.03154407814145088, "learning_rate": 0.01, "loss": 1.9888, "step": 43032 }, { "epoch": 4.414751743947477, "grad_norm": 0.048733990639448166, "learning_rate": 0.01, "loss": 1.9783, "step": 43035 }, { "epoch": 4.415059499384489, "grad_norm": 0.04283260181546211, "learning_rate": 0.01, "loss": 1.9817, "step": 43038 }, { "epoch": 4.4153672548215015, "grad_norm": 0.06798703223466873, "learning_rate": 0.01, "loss": 1.9858, "step": 43041 }, { "epoch": 4.415675010258514, "grad_norm": 0.10504204034805298, "learning_rate": 0.01, "loss": 1.9933, "step": 43044 }, { "epoch": 4.415982765695527, "grad_norm": 0.1270654797554016, "learning_rate": 0.01, "loss": 2.0083, "step": 43047 }, { "epoch": 4.41629052113254, "grad_norm": 0.06907479465007782, "learning_rate": 0.01, "loss": 1.9641, "step": 43050 }, { "epoch": 4.416598276569553, "grad_norm": 0.034706976264715195, "learning_rate": 0.01, "loss": 2.0081, "step": 43053 }, { "epoch": 4.416906032006565, "grad_norm": 0.033550575375556946, "learning_rate": 0.01, "loss": 2.0029, "step": 43056 }, { "epoch": 4.417213787443578, "grad_norm": 0.03790535405278206, "learning_rate": 0.01, "loss": 2.009, "step": 43059 }, { "epoch": 4.417521542880591, "grad_norm": 0.04516111686825752, "learning_rate": 0.01, "loss": 2.0134, "step": 43062 }, { "epoch": 4.417829298317604, "grad_norm": 0.07906030863523483, "learning_rate": 0.01, "loss": 1.9944, "step": 43065 }, { "epoch": 4.4181370537546165, "grad_norm": 0.11393613368272781, "learning_rate": 0.01, "loss": 2.0164, "step": 43068 }, { "epoch": 4.418444809191629, "grad_norm": 0.04494154825806618, "learning_rate": 0.01, "loss": 1.9909, "step": 43071 }, { "epoch": 4.418752564628642, "grad_norm": 0.04806877672672272, "learning_rate": 0.01, "loss": 2.0064, "step": 43074 }, { "epoch": 4.419060320065655, "grad_norm": 0.09511521458625793, "learning_rate": 0.01, "loss": 1.9999, "step": 43077 }, { "epoch": 4.419368075502668, "grad_norm": 0.036587249487638474, "learning_rate": 0.01, "loss": 2.0116, "step": 43080 }, { "epoch": 4.4196758309396795, "grad_norm": 0.03894515708088875, "learning_rate": 0.01, "loss": 1.9858, "step": 43083 }, { "epoch": 4.419983586376692, "grad_norm": 0.06960659474134445, "learning_rate": 0.01, "loss": 2.0064, "step": 43086 }, { "epoch": 4.420291341813705, "grad_norm": 0.04635101556777954, "learning_rate": 0.01, "loss": 1.9889, "step": 43089 }, { "epoch": 4.420599097250718, "grad_norm": 0.09964662045240402, "learning_rate": 0.01, "loss": 2.0055, "step": 43092 }, { "epoch": 4.420906852687731, "grad_norm": 0.10087699443101883, "learning_rate": 0.01, "loss": 2.0002, "step": 43095 }, { "epoch": 4.421214608124743, "grad_norm": 0.09748293459415436, "learning_rate": 0.01, "loss": 1.9937, "step": 43098 }, { "epoch": 4.421522363561756, "grad_norm": 0.04654933884739876, "learning_rate": 0.01, "loss": 1.9927, "step": 43101 }, { "epoch": 4.421830118998769, "grad_norm": 0.05607482045888901, "learning_rate": 0.01, "loss": 2.0018, "step": 43104 }, { "epoch": 4.422137874435782, "grad_norm": 0.0513363815844059, "learning_rate": 0.01, "loss": 1.9793, "step": 43107 }, { "epoch": 4.4224456298727945, "grad_norm": 0.048932064324617386, "learning_rate": 0.01, "loss": 1.9764, "step": 43110 }, { "epoch": 4.422753385309807, "grad_norm": 0.05465288087725639, "learning_rate": 0.01, "loss": 1.9965, "step": 43113 }, { "epoch": 4.42306114074682, "grad_norm": 0.04400217905640602, "learning_rate": 0.01, "loss": 2.0257, "step": 43116 }, { "epoch": 4.423368896183833, "grad_norm": 0.03483066335320473, "learning_rate": 0.01, "loss": 1.9716, "step": 43119 }, { "epoch": 4.423676651620846, "grad_norm": 0.04909312352538109, "learning_rate": 0.01, "loss": 2.0137, "step": 43122 }, { "epoch": 4.4239844070578584, "grad_norm": 0.09361930191516876, "learning_rate": 0.01, "loss": 2.0031, "step": 43125 }, { "epoch": 4.42429216249487, "grad_norm": 0.06980263441801071, "learning_rate": 0.01, "loss": 1.9741, "step": 43128 }, { "epoch": 4.424599917931883, "grad_norm": 0.06628268212080002, "learning_rate": 0.01, "loss": 2.0002, "step": 43131 }, { "epoch": 4.424907673368896, "grad_norm": 0.10180728137493134, "learning_rate": 0.01, "loss": 2.0153, "step": 43134 }, { "epoch": 4.425215428805909, "grad_norm": 0.10624035447835922, "learning_rate": 0.01, "loss": 2.0225, "step": 43137 }, { "epoch": 4.4255231842429215, "grad_norm": 0.08775760233402252, "learning_rate": 0.01, "loss": 2.0051, "step": 43140 }, { "epoch": 4.425830939679934, "grad_norm": 0.06270244717597961, "learning_rate": 0.01, "loss": 1.9939, "step": 43143 }, { "epoch": 4.426138695116947, "grad_norm": 0.06912422925233841, "learning_rate": 0.01, "loss": 1.9912, "step": 43146 }, { "epoch": 4.42644645055396, "grad_norm": 0.05379674583673477, "learning_rate": 0.01, "loss": 1.9942, "step": 43149 }, { "epoch": 4.426754205990973, "grad_norm": 0.041072819381952286, "learning_rate": 0.01, "loss": 1.9982, "step": 43152 }, { "epoch": 4.427061961427985, "grad_norm": 0.05191728100180626, "learning_rate": 0.01, "loss": 1.9799, "step": 43155 }, { "epoch": 4.427369716864998, "grad_norm": 0.03773737698793411, "learning_rate": 0.01, "loss": 2.001, "step": 43158 }, { "epoch": 4.427677472302011, "grad_norm": 0.10148506611585617, "learning_rate": 0.01, "loss": 1.9877, "step": 43161 }, { "epoch": 4.427985227739024, "grad_norm": 0.071526899933815, "learning_rate": 0.01, "loss": 1.9992, "step": 43164 }, { "epoch": 4.4282929831760365, "grad_norm": 0.07906235009431839, "learning_rate": 0.01, "loss": 1.9976, "step": 43167 }, { "epoch": 4.428600738613049, "grad_norm": 0.18193936347961426, "learning_rate": 0.01, "loss": 2.0095, "step": 43170 }, { "epoch": 4.428908494050061, "grad_norm": 0.1734820157289505, "learning_rate": 0.01, "loss": 1.9997, "step": 43173 }, { "epoch": 4.429216249487074, "grad_norm": 0.09781987965106964, "learning_rate": 0.01, "loss": 2.0113, "step": 43176 }, { "epoch": 4.429524004924087, "grad_norm": 0.06472992151975632, "learning_rate": 0.01, "loss": 1.9971, "step": 43179 }, { "epoch": 4.4298317603610995, "grad_norm": 0.04425714537501335, "learning_rate": 0.01, "loss": 1.9961, "step": 43182 }, { "epoch": 4.430139515798112, "grad_norm": 0.04678405821323395, "learning_rate": 0.01, "loss": 1.9983, "step": 43185 }, { "epoch": 4.430447271235125, "grad_norm": 0.04311143234372139, "learning_rate": 0.01, "loss": 2.0079, "step": 43188 }, { "epoch": 4.430755026672138, "grad_norm": 0.060729045420885086, "learning_rate": 0.01, "loss": 2.0093, "step": 43191 }, { "epoch": 4.431062782109151, "grad_norm": 0.07931876182556152, "learning_rate": 0.01, "loss": 1.9979, "step": 43194 }, { "epoch": 4.431370537546163, "grad_norm": 0.048263415694236755, "learning_rate": 0.01, "loss": 2.012, "step": 43197 }, { "epoch": 4.431678292983176, "grad_norm": 0.05372344329953194, "learning_rate": 0.01, "loss": 1.9636, "step": 43200 }, { "epoch": 4.431986048420189, "grad_norm": 0.06506495922803879, "learning_rate": 0.01, "loss": 1.9934, "step": 43203 }, { "epoch": 4.432293803857202, "grad_norm": 0.03484785556793213, "learning_rate": 0.01, "loss": 2.0067, "step": 43206 }, { "epoch": 4.4326015592942145, "grad_norm": 0.18487481772899628, "learning_rate": 0.01, "loss": 1.9852, "step": 43209 }, { "epoch": 4.432909314731227, "grad_norm": 0.11683176457881927, "learning_rate": 0.01, "loss": 2.0134, "step": 43212 }, { "epoch": 4.43321707016824, "grad_norm": 0.08598039299249649, "learning_rate": 0.01, "loss": 2.0051, "step": 43215 }, { "epoch": 4.433524825605252, "grad_norm": 0.0889374166727066, "learning_rate": 0.01, "loss": 1.982, "step": 43218 }, { "epoch": 4.433832581042265, "grad_norm": 0.061477046459913254, "learning_rate": 0.01, "loss": 1.9836, "step": 43221 }, { "epoch": 4.4341403364792775, "grad_norm": 0.046285588294267654, "learning_rate": 0.01, "loss": 1.9982, "step": 43224 }, { "epoch": 4.43444809191629, "grad_norm": 0.06310747563838959, "learning_rate": 0.01, "loss": 2.0178, "step": 43227 }, { "epoch": 4.434755847353303, "grad_norm": 0.04792320355772972, "learning_rate": 0.01, "loss": 2.0126, "step": 43230 }, { "epoch": 4.435063602790316, "grad_norm": 0.03359830752015114, "learning_rate": 0.01, "loss": 1.9831, "step": 43233 }, { "epoch": 4.435371358227329, "grad_norm": 0.04137792810797691, "learning_rate": 0.01, "loss": 2.0146, "step": 43236 }, { "epoch": 4.435679113664341, "grad_norm": 0.04756288602948189, "learning_rate": 0.01, "loss": 2.0042, "step": 43239 }, { "epoch": 4.435986869101354, "grad_norm": 0.054949600249528885, "learning_rate": 0.01, "loss": 2.0041, "step": 43242 }, { "epoch": 4.436294624538367, "grad_norm": 0.14858773350715637, "learning_rate": 0.01, "loss": 2.0114, "step": 43245 }, { "epoch": 4.43660237997538, "grad_norm": 0.08089875429868698, "learning_rate": 0.01, "loss": 2.0079, "step": 43248 }, { "epoch": 4.4369101354123925, "grad_norm": 0.0340421199798584, "learning_rate": 0.01, "loss": 1.9996, "step": 43251 }, { "epoch": 4.437217890849405, "grad_norm": 0.04666365310549736, "learning_rate": 0.01, "loss": 1.9918, "step": 43254 }, { "epoch": 4.437525646286418, "grad_norm": 0.04860776662826538, "learning_rate": 0.01, "loss": 2.0152, "step": 43257 }, { "epoch": 4.437833401723431, "grad_norm": 0.04814871773123741, "learning_rate": 0.01, "loss": 2.0135, "step": 43260 }, { "epoch": 4.438141157160443, "grad_norm": 0.07159804552793503, "learning_rate": 0.01, "loss": 1.9873, "step": 43263 }, { "epoch": 4.4384489125974556, "grad_norm": 0.08109670132398605, "learning_rate": 0.01, "loss": 1.9799, "step": 43266 }, { "epoch": 4.438756668034468, "grad_norm": 0.10254354774951935, "learning_rate": 0.01, "loss": 1.991, "step": 43269 }, { "epoch": 4.439064423471481, "grad_norm": 0.10418622195720673, "learning_rate": 0.01, "loss": 1.9699, "step": 43272 }, { "epoch": 4.439372178908494, "grad_norm": 0.04432861879467964, "learning_rate": 0.01, "loss": 2.0032, "step": 43275 }, { "epoch": 4.439679934345507, "grad_norm": 0.05000505968928337, "learning_rate": 0.01, "loss": 2.016, "step": 43278 }, { "epoch": 4.4399876897825195, "grad_norm": 0.0718022808432579, "learning_rate": 0.01, "loss": 2.0209, "step": 43281 }, { "epoch": 4.440295445219532, "grad_norm": 0.038088515400886536, "learning_rate": 0.01, "loss": 1.9808, "step": 43284 }, { "epoch": 4.440603200656545, "grad_norm": 0.04395698755979538, "learning_rate": 0.01, "loss": 1.9987, "step": 43287 }, { "epoch": 4.440910956093558, "grad_norm": 0.03977638855576515, "learning_rate": 0.01, "loss": 1.9909, "step": 43290 }, { "epoch": 4.441218711530571, "grad_norm": 0.11403058469295502, "learning_rate": 0.01, "loss": 1.9967, "step": 43293 }, { "epoch": 4.441526466967583, "grad_norm": 0.03809293359518051, "learning_rate": 0.01, "loss": 2.0028, "step": 43296 }, { "epoch": 4.441834222404596, "grad_norm": 0.04494985193014145, "learning_rate": 0.01, "loss": 1.9854, "step": 43299 }, { "epoch": 4.442141977841609, "grad_norm": 0.11755307763814926, "learning_rate": 0.01, "loss": 2.0114, "step": 43302 }, { "epoch": 4.442449733278622, "grad_norm": 0.046333327889442444, "learning_rate": 0.01, "loss": 1.9655, "step": 43305 }, { "epoch": 4.442757488715634, "grad_norm": 0.05719512701034546, "learning_rate": 0.01, "loss": 1.9976, "step": 43308 }, { "epoch": 4.443065244152646, "grad_norm": 0.0707264393568039, "learning_rate": 0.01, "loss": 1.9761, "step": 43311 }, { "epoch": 4.443372999589659, "grad_norm": 0.043551988899707794, "learning_rate": 0.01, "loss": 2.0078, "step": 43314 }, { "epoch": 4.443680755026672, "grad_norm": 0.11364515870809555, "learning_rate": 0.01, "loss": 1.9897, "step": 43317 }, { "epoch": 4.443988510463685, "grad_norm": 0.15514688193798065, "learning_rate": 0.01, "loss": 1.9957, "step": 43320 }, { "epoch": 4.4442962659006975, "grad_norm": 0.10261968523263931, "learning_rate": 0.01, "loss": 1.9876, "step": 43323 }, { "epoch": 4.44460402133771, "grad_norm": 0.09683386236429214, "learning_rate": 0.01, "loss": 1.9961, "step": 43326 }, { "epoch": 4.444911776774723, "grad_norm": 0.055726949125528336, "learning_rate": 0.01, "loss": 1.9981, "step": 43329 }, { "epoch": 4.445219532211736, "grad_norm": 0.053106311708688736, "learning_rate": 0.01, "loss": 2.0087, "step": 43332 }, { "epoch": 4.445527287648749, "grad_norm": 0.043625880032777786, "learning_rate": 0.01, "loss": 2.023, "step": 43335 }, { "epoch": 4.445835043085761, "grad_norm": 0.05958040803670883, "learning_rate": 0.01, "loss": 1.987, "step": 43338 }, { "epoch": 4.446142798522774, "grad_norm": 0.043522778898477554, "learning_rate": 0.01, "loss": 2.0004, "step": 43341 }, { "epoch": 4.446450553959787, "grad_norm": 0.038740675896406174, "learning_rate": 0.01, "loss": 1.991, "step": 43344 }, { "epoch": 4.4467583093968, "grad_norm": 0.043132517486810684, "learning_rate": 0.01, "loss": 2.006, "step": 43347 }, { "epoch": 4.4470660648338125, "grad_norm": 0.07173844426870346, "learning_rate": 0.01, "loss": 2.0257, "step": 43350 }, { "epoch": 4.447373820270824, "grad_norm": 0.06787187606096268, "learning_rate": 0.01, "loss": 1.9997, "step": 43353 }, { "epoch": 4.447681575707837, "grad_norm": 0.1125207170844078, "learning_rate": 0.01, "loss": 1.9932, "step": 43356 }, { "epoch": 4.44798933114485, "grad_norm": 0.037585411220788956, "learning_rate": 0.01, "loss": 2.0055, "step": 43359 }, { "epoch": 4.448297086581863, "grad_norm": 0.09652412682771683, "learning_rate": 0.01, "loss": 1.9937, "step": 43362 }, { "epoch": 4.4486048420188755, "grad_norm": 0.0548916831612587, "learning_rate": 0.01, "loss": 2.0251, "step": 43365 }, { "epoch": 4.448912597455888, "grad_norm": 0.07648541778326035, "learning_rate": 0.01, "loss": 2.0098, "step": 43368 }, { "epoch": 4.449220352892901, "grad_norm": 0.07968877255916595, "learning_rate": 0.01, "loss": 1.9836, "step": 43371 }, { "epoch": 4.449528108329914, "grad_norm": 0.046214036643505096, "learning_rate": 0.01, "loss": 1.9994, "step": 43374 }, { "epoch": 4.449835863766927, "grad_norm": 0.03589135408401489, "learning_rate": 0.01, "loss": 2.0122, "step": 43377 }, { "epoch": 4.450143619203939, "grad_norm": 0.1217019185423851, "learning_rate": 0.01, "loss": 1.9895, "step": 43380 }, { "epoch": 4.450451374640952, "grad_norm": 0.14711309969425201, "learning_rate": 0.01, "loss": 1.9824, "step": 43383 }, { "epoch": 4.450759130077965, "grad_norm": 0.15004196763038635, "learning_rate": 0.01, "loss": 1.9774, "step": 43386 }, { "epoch": 4.451066885514978, "grad_norm": 0.143090158700943, "learning_rate": 0.01, "loss": 2.0206, "step": 43389 }, { "epoch": 4.4513746409519905, "grad_norm": 0.06957674771547318, "learning_rate": 0.01, "loss": 1.9934, "step": 43392 }, { "epoch": 4.451682396389003, "grad_norm": 0.10823699831962585, "learning_rate": 0.01, "loss": 2.0141, "step": 43395 }, { "epoch": 4.451990151826015, "grad_norm": 0.0608796700835228, "learning_rate": 0.01, "loss": 1.9998, "step": 43398 }, { "epoch": 4.452297907263028, "grad_norm": 0.038806330412626266, "learning_rate": 0.01, "loss": 1.9815, "step": 43401 }, { "epoch": 4.452605662700041, "grad_norm": 0.053961463272571564, "learning_rate": 0.01, "loss": 2.0196, "step": 43404 }, { "epoch": 4.4529134181370535, "grad_norm": 0.03717347979545593, "learning_rate": 0.01, "loss": 1.9702, "step": 43407 }, { "epoch": 4.453221173574066, "grad_norm": 0.04289384186267853, "learning_rate": 0.01, "loss": 1.9965, "step": 43410 }, { "epoch": 4.453528929011079, "grad_norm": 0.08649279922246933, "learning_rate": 0.01, "loss": 2.0135, "step": 43413 }, { "epoch": 4.453836684448092, "grad_norm": 0.1384621560573578, "learning_rate": 0.01, "loss": 1.9779, "step": 43416 }, { "epoch": 4.454144439885105, "grad_norm": 0.09169614315032959, "learning_rate": 0.01, "loss": 1.968, "step": 43419 }, { "epoch": 4.4544521953221174, "grad_norm": 0.07004016637802124, "learning_rate": 0.01, "loss": 1.9986, "step": 43422 }, { "epoch": 4.45475995075913, "grad_norm": 0.0574488528072834, "learning_rate": 0.01, "loss": 1.9666, "step": 43425 }, { "epoch": 4.455067706196143, "grad_norm": 0.048521172255277634, "learning_rate": 0.01, "loss": 1.9769, "step": 43428 }, { "epoch": 4.455375461633156, "grad_norm": 0.11481918394565582, "learning_rate": 0.01, "loss": 1.9951, "step": 43431 }, { "epoch": 4.455683217070169, "grad_norm": 0.09158429503440857, "learning_rate": 0.01, "loss": 1.9884, "step": 43434 }, { "epoch": 4.455990972507181, "grad_norm": 0.039962366223335266, "learning_rate": 0.01, "loss": 1.9947, "step": 43437 }, { "epoch": 4.456298727944194, "grad_norm": 0.04112045839428902, "learning_rate": 0.01, "loss": 1.9806, "step": 43440 }, { "epoch": 4.456606483381206, "grad_norm": 0.057623397558927536, "learning_rate": 0.01, "loss": 2.0006, "step": 43443 }, { "epoch": 4.456914238818219, "grad_norm": 0.0683390349149704, "learning_rate": 0.01, "loss": 2.0118, "step": 43446 }, { "epoch": 4.457221994255232, "grad_norm": 0.09766959398984909, "learning_rate": 0.01, "loss": 2.0217, "step": 43449 }, { "epoch": 4.457529749692244, "grad_norm": 0.041423216462135315, "learning_rate": 0.01, "loss": 1.9732, "step": 43452 }, { "epoch": 4.457837505129257, "grad_norm": 0.08813819289207458, "learning_rate": 0.01, "loss": 1.9777, "step": 43455 }, { "epoch": 4.45814526056627, "grad_norm": 0.06834255903959274, "learning_rate": 0.01, "loss": 2.0154, "step": 43458 }, { "epoch": 4.458453016003283, "grad_norm": 0.08782140165567398, "learning_rate": 0.01, "loss": 2.0052, "step": 43461 }, { "epoch": 4.4587607714402955, "grad_norm": 0.08695351332426071, "learning_rate": 0.01, "loss": 1.9848, "step": 43464 }, { "epoch": 4.459068526877308, "grad_norm": 0.09302899986505508, "learning_rate": 0.01, "loss": 1.9906, "step": 43467 }, { "epoch": 4.459376282314321, "grad_norm": 0.06920462846755981, "learning_rate": 0.01, "loss": 1.9963, "step": 43470 }, { "epoch": 4.459684037751334, "grad_norm": 0.04809924215078354, "learning_rate": 0.01, "loss": 2.0079, "step": 43473 }, { "epoch": 4.459991793188347, "grad_norm": 0.06642390787601471, "learning_rate": 0.01, "loss": 1.9854, "step": 43476 }, { "epoch": 4.460299548625359, "grad_norm": 0.04986566677689552, "learning_rate": 0.01, "loss": 1.9835, "step": 43479 }, { "epoch": 4.460607304062372, "grad_norm": 0.061649248003959656, "learning_rate": 0.01, "loss": 2.0061, "step": 43482 }, { "epoch": 4.460915059499385, "grad_norm": 0.049117859452962875, "learning_rate": 0.01, "loss": 2.0257, "step": 43485 }, { "epoch": 4.461222814936397, "grad_norm": 0.049163367599248886, "learning_rate": 0.01, "loss": 1.9871, "step": 43488 }, { "epoch": 4.46153057037341, "grad_norm": 0.036282382905483246, "learning_rate": 0.01, "loss": 2.0089, "step": 43491 }, { "epoch": 4.461838325810422, "grad_norm": 0.10942814499139786, "learning_rate": 0.01, "loss": 2.0058, "step": 43494 }, { "epoch": 4.462146081247435, "grad_norm": 0.06589425355195999, "learning_rate": 0.01, "loss": 1.9831, "step": 43497 }, { "epoch": 4.462453836684448, "grad_norm": 0.0967121571302414, "learning_rate": 0.01, "loss": 2.0149, "step": 43500 }, { "epoch": 4.462761592121461, "grad_norm": 0.0673070177435875, "learning_rate": 0.01, "loss": 1.9889, "step": 43503 }, { "epoch": 4.4630693475584735, "grad_norm": 0.09439177066087723, "learning_rate": 0.01, "loss": 1.9928, "step": 43506 }, { "epoch": 4.463377102995486, "grad_norm": 0.06875397264957428, "learning_rate": 0.01, "loss": 1.9826, "step": 43509 }, { "epoch": 4.463684858432499, "grad_norm": 0.0774875059723854, "learning_rate": 0.01, "loss": 1.9729, "step": 43512 }, { "epoch": 4.463992613869512, "grad_norm": 0.07138268649578094, "learning_rate": 0.01, "loss": 2.0024, "step": 43515 }, { "epoch": 4.464300369306525, "grad_norm": 0.03174857050180435, "learning_rate": 0.01, "loss": 1.9842, "step": 43518 }, { "epoch": 4.464608124743537, "grad_norm": 0.11445671319961548, "learning_rate": 0.01, "loss": 1.967, "step": 43521 }, { "epoch": 4.46491588018055, "grad_norm": 0.041139326989650726, "learning_rate": 0.01, "loss": 1.9953, "step": 43524 }, { "epoch": 4.465223635617563, "grad_norm": 0.043187763541936874, "learning_rate": 0.01, "loss": 2.024, "step": 43527 }, { "epoch": 4.465531391054576, "grad_norm": 0.09367438405752182, "learning_rate": 0.01, "loss": 2.0089, "step": 43530 }, { "epoch": 4.465839146491588, "grad_norm": 0.07866624742746353, "learning_rate": 0.01, "loss": 1.9997, "step": 43533 }, { "epoch": 4.4661469019286, "grad_norm": 0.08221796900033951, "learning_rate": 0.01, "loss": 1.9908, "step": 43536 }, { "epoch": 4.466454657365613, "grad_norm": 0.07687985897064209, "learning_rate": 0.01, "loss": 1.9927, "step": 43539 }, { "epoch": 4.466762412802626, "grad_norm": 0.042440030723810196, "learning_rate": 0.01, "loss": 2.0002, "step": 43542 }, { "epoch": 4.467070168239639, "grad_norm": 0.03422633558511734, "learning_rate": 0.01, "loss": 1.9868, "step": 43545 }, { "epoch": 4.4673779236766515, "grad_norm": 0.05047105997800827, "learning_rate": 0.01, "loss": 1.9935, "step": 43548 }, { "epoch": 4.467685679113664, "grad_norm": 0.0817263275384903, "learning_rate": 0.01, "loss": 2.0029, "step": 43551 }, { "epoch": 4.467993434550677, "grad_norm": 0.07055595517158508, "learning_rate": 0.01, "loss": 2.008, "step": 43554 }, { "epoch": 4.46830118998769, "grad_norm": 0.12304790318012238, "learning_rate": 0.01, "loss": 2.0027, "step": 43557 }, { "epoch": 4.468608945424703, "grad_norm": 0.057413533329963684, "learning_rate": 0.01, "loss": 2.0168, "step": 43560 }, { "epoch": 4.468916700861715, "grad_norm": 0.1333860456943512, "learning_rate": 0.01, "loss": 2.0154, "step": 43563 }, { "epoch": 4.469224456298728, "grad_norm": 0.04758629575371742, "learning_rate": 0.01, "loss": 1.9882, "step": 43566 }, { "epoch": 4.469532211735741, "grad_norm": 0.0403926707804203, "learning_rate": 0.01, "loss": 2.0074, "step": 43569 }, { "epoch": 4.469839967172754, "grad_norm": 0.05051876977086067, "learning_rate": 0.01, "loss": 1.9798, "step": 43572 }, { "epoch": 4.470147722609767, "grad_norm": 0.12117879837751389, "learning_rate": 0.01, "loss": 1.9886, "step": 43575 }, { "epoch": 4.4704554780467785, "grad_norm": 0.0969235822558403, "learning_rate": 0.01, "loss": 1.9754, "step": 43578 }, { "epoch": 4.470763233483791, "grad_norm": 0.10259240120649338, "learning_rate": 0.01, "loss": 2.0028, "step": 43581 }, { "epoch": 4.471070988920804, "grad_norm": 0.056327935308218, "learning_rate": 0.01, "loss": 2.0223, "step": 43584 }, { "epoch": 4.471378744357817, "grad_norm": 0.09567862004041672, "learning_rate": 0.01, "loss": 2.026, "step": 43587 }, { "epoch": 4.47168649979483, "grad_norm": 0.0687853991985321, "learning_rate": 0.01, "loss": 2.011, "step": 43590 }, { "epoch": 4.471994255231842, "grad_norm": 0.03400299698114395, "learning_rate": 0.01, "loss": 2.0126, "step": 43593 }, { "epoch": 4.472302010668855, "grad_norm": 0.05291087552905083, "learning_rate": 0.01, "loss": 2.0251, "step": 43596 }, { "epoch": 4.472609766105868, "grad_norm": 0.09208089858293533, "learning_rate": 0.01, "loss": 1.9933, "step": 43599 }, { "epoch": 4.472917521542881, "grad_norm": 0.08073403686285019, "learning_rate": 0.01, "loss": 1.9934, "step": 43602 }, { "epoch": 4.4732252769798935, "grad_norm": 0.04130372405052185, "learning_rate": 0.01, "loss": 1.9883, "step": 43605 }, { "epoch": 4.473533032416906, "grad_norm": 0.06413860619068146, "learning_rate": 0.01, "loss": 1.9817, "step": 43608 }, { "epoch": 4.473840787853919, "grad_norm": 0.10064514726400375, "learning_rate": 0.01, "loss": 1.9879, "step": 43611 }, { "epoch": 4.474148543290932, "grad_norm": 0.04522353783249855, "learning_rate": 0.01, "loss": 1.9923, "step": 43614 }, { "epoch": 4.474456298727945, "grad_norm": 0.03851405903697014, "learning_rate": 0.01, "loss": 1.9828, "step": 43617 }, { "epoch": 4.474764054164957, "grad_norm": 0.03857644274830818, "learning_rate": 0.01, "loss": 1.9754, "step": 43620 }, { "epoch": 4.475071809601969, "grad_norm": 0.07934223115444183, "learning_rate": 0.01, "loss": 1.9901, "step": 43623 }, { "epoch": 4.475379565038982, "grad_norm": 0.034822918474674225, "learning_rate": 0.01, "loss": 1.9949, "step": 43626 }, { "epoch": 4.475687320475995, "grad_norm": 0.07185374945402145, "learning_rate": 0.01, "loss": 1.9969, "step": 43629 }, { "epoch": 4.475995075913008, "grad_norm": 0.03542499616742134, "learning_rate": 0.01, "loss": 2.0171, "step": 43632 }, { "epoch": 4.47630283135002, "grad_norm": 0.034001853317022324, "learning_rate": 0.01, "loss": 2.0091, "step": 43635 }, { "epoch": 4.476610586787033, "grad_norm": 0.04940172657370567, "learning_rate": 0.01, "loss": 2.0006, "step": 43638 }, { "epoch": 4.476918342224046, "grad_norm": 0.04434162750840187, "learning_rate": 0.01, "loss": 1.983, "step": 43641 }, { "epoch": 4.477226097661059, "grad_norm": 0.08915964514017105, "learning_rate": 0.01, "loss": 1.9974, "step": 43644 }, { "epoch": 4.4775338530980715, "grad_norm": 0.05530424416065216, "learning_rate": 0.01, "loss": 1.98, "step": 43647 }, { "epoch": 4.477841608535084, "grad_norm": 0.06746593117713928, "learning_rate": 0.01, "loss": 1.9997, "step": 43650 }, { "epoch": 4.478149363972097, "grad_norm": 0.05016857758164406, "learning_rate": 0.01, "loss": 1.9764, "step": 43653 }, { "epoch": 4.47845711940911, "grad_norm": 0.04478998854756355, "learning_rate": 0.01, "loss": 1.9982, "step": 43656 }, { "epoch": 4.478764874846123, "grad_norm": 0.04121081531047821, "learning_rate": 0.01, "loss": 1.9814, "step": 43659 }, { "epoch": 4.479072630283135, "grad_norm": 0.0476783849298954, "learning_rate": 0.01, "loss": 2.0231, "step": 43662 }, { "epoch": 4.479380385720148, "grad_norm": 0.15095461905002594, "learning_rate": 0.01, "loss": 1.9695, "step": 43665 }, { "epoch": 4.47968814115716, "grad_norm": 0.06961385160684586, "learning_rate": 0.01, "loss": 2.0014, "step": 43668 }, { "epoch": 4.479995896594173, "grad_norm": 0.040564242750406265, "learning_rate": 0.01, "loss": 1.9983, "step": 43671 }, { "epoch": 4.480303652031186, "grad_norm": 0.07302725315093994, "learning_rate": 0.01, "loss": 2.0106, "step": 43674 }, { "epoch": 4.480611407468198, "grad_norm": 0.12494261562824249, "learning_rate": 0.01, "loss": 2.0155, "step": 43677 }, { "epoch": 4.480919162905211, "grad_norm": 0.06394487619400024, "learning_rate": 0.01, "loss": 2.0087, "step": 43680 }, { "epoch": 4.481226918342224, "grad_norm": 0.0740828663110733, "learning_rate": 0.01, "loss": 1.9907, "step": 43683 }, { "epoch": 4.481534673779237, "grad_norm": 0.06996148079633713, "learning_rate": 0.01, "loss": 2.0093, "step": 43686 }, { "epoch": 4.4818424292162495, "grad_norm": 0.041247058659791946, "learning_rate": 0.01, "loss": 1.9849, "step": 43689 }, { "epoch": 4.482150184653262, "grad_norm": 0.04092543199658394, "learning_rate": 0.01, "loss": 1.9997, "step": 43692 }, { "epoch": 4.482457940090275, "grad_norm": 0.05542224645614624, "learning_rate": 0.01, "loss": 2.0067, "step": 43695 }, { "epoch": 4.482765695527288, "grad_norm": 0.0780789703130722, "learning_rate": 0.01, "loss": 1.9686, "step": 43698 }, { "epoch": 4.483073450964301, "grad_norm": 0.06363578885793686, "learning_rate": 0.01, "loss": 1.9807, "step": 43701 }, { "epoch": 4.483381206401313, "grad_norm": 0.12707744538784027, "learning_rate": 0.01, "loss": 1.9427, "step": 43704 }, { "epoch": 4.483688961838326, "grad_norm": 0.14165590703487396, "learning_rate": 0.01, "loss": 1.9958, "step": 43707 }, { "epoch": 4.483996717275339, "grad_norm": 0.08245092630386353, "learning_rate": 0.01, "loss": 2.0143, "step": 43710 }, { "epoch": 4.484304472712351, "grad_norm": 0.048774946480989456, "learning_rate": 0.01, "loss": 1.9703, "step": 43713 }, { "epoch": 4.484612228149364, "grad_norm": 0.05654727295041084, "learning_rate": 0.01, "loss": 1.999, "step": 43716 }, { "epoch": 4.4849199835863764, "grad_norm": 0.046862781047821045, "learning_rate": 0.01, "loss": 1.9948, "step": 43719 }, { "epoch": 4.485227739023389, "grad_norm": 0.06660104542970657, "learning_rate": 0.01, "loss": 1.9756, "step": 43722 }, { "epoch": 4.485535494460402, "grad_norm": 0.05910905450582504, "learning_rate": 0.01, "loss": 1.992, "step": 43725 }, { "epoch": 4.485843249897415, "grad_norm": 0.05047675967216492, "learning_rate": 0.01, "loss": 2.0072, "step": 43728 }, { "epoch": 4.486151005334428, "grad_norm": 0.03190279006958008, "learning_rate": 0.01, "loss": 2.0058, "step": 43731 }, { "epoch": 4.48645876077144, "grad_norm": 0.04960602894425392, "learning_rate": 0.01, "loss": 1.9991, "step": 43734 }, { "epoch": 4.486766516208453, "grad_norm": 0.10073108971118927, "learning_rate": 0.01, "loss": 1.9863, "step": 43737 }, { "epoch": 4.487074271645466, "grad_norm": 0.051219914108514786, "learning_rate": 0.01, "loss": 1.9923, "step": 43740 }, { "epoch": 4.487382027082479, "grad_norm": 0.051645878702402115, "learning_rate": 0.01, "loss": 1.9838, "step": 43743 }, { "epoch": 4.4876897825194915, "grad_norm": 0.08252865076065063, "learning_rate": 0.01, "loss": 2.0034, "step": 43746 }, { "epoch": 4.487997537956504, "grad_norm": 0.06515108048915863, "learning_rate": 0.01, "loss": 1.9843, "step": 43749 }, { "epoch": 4.488305293393517, "grad_norm": 0.038027096539735794, "learning_rate": 0.01, "loss": 1.9856, "step": 43752 }, { "epoch": 4.48861304883053, "grad_norm": 0.06444455683231354, "learning_rate": 0.01, "loss": 1.9978, "step": 43755 }, { "epoch": 4.488920804267542, "grad_norm": 0.03724157065153122, "learning_rate": 0.01, "loss": 1.9947, "step": 43758 }, { "epoch": 4.4892285597045545, "grad_norm": 0.04831653833389282, "learning_rate": 0.01, "loss": 2.02, "step": 43761 }, { "epoch": 4.489536315141567, "grad_norm": 0.062199629843235016, "learning_rate": 0.01, "loss": 1.9885, "step": 43764 }, { "epoch": 4.48984407057858, "grad_norm": 0.047005314379930496, "learning_rate": 0.01, "loss": 2.0018, "step": 43767 }, { "epoch": 4.490151826015593, "grad_norm": 0.1316227912902832, "learning_rate": 0.01, "loss": 1.9741, "step": 43770 }, { "epoch": 4.490459581452606, "grad_norm": 0.05741405114531517, "learning_rate": 0.01, "loss": 2.0053, "step": 43773 }, { "epoch": 4.490767336889618, "grad_norm": 0.10407622158527374, "learning_rate": 0.01, "loss": 1.9765, "step": 43776 }, { "epoch": 4.491075092326631, "grad_norm": 0.05979277566075325, "learning_rate": 0.01, "loss": 1.9882, "step": 43779 }, { "epoch": 4.491382847763644, "grad_norm": 0.033753860741853714, "learning_rate": 0.01, "loss": 1.9822, "step": 43782 }, { "epoch": 4.491690603200657, "grad_norm": 0.03481614217162132, "learning_rate": 0.01, "loss": 2.0178, "step": 43785 }, { "epoch": 4.4919983586376695, "grad_norm": 0.05884877219796181, "learning_rate": 0.01, "loss": 1.98, "step": 43788 }, { "epoch": 4.492306114074682, "grad_norm": 0.10501875728368759, "learning_rate": 0.01, "loss": 2.0148, "step": 43791 }, { "epoch": 4.492613869511695, "grad_norm": 0.07319078594446182, "learning_rate": 0.01, "loss": 1.9872, "step": 43794 }, { "epoch": 4.492921624948708, "grad_norm": 0.04055263102054596, "learning_rate": 0.01, "loss": 2.0002, "step": 43797 }, { "epoch": 4.493229380385721, "grad_norm": 0.1268715113401413, "learning_rate": 0.01, "loss": 2.016, "step": 43800 }, { "epoch": 4.4935371358227325, "grad_norm": 0.0518607497215271, "learning_rate": 0.01, "loss": 1.9931, "step": 43803 }, { "epoch": 4.493844891259745, "grad_norm": 0.08158674836158752, "learning_rate": 0.01, "loss": 1.9974, "step": 43806 }, { "epoch": 4.494152646696758, "grad_norm": 0.07762011885643005, "learning_rate": 0.01, "loss": 2.0026, "step": 43809 }, { "epoch": 4.494460402133771, "grad_norm": 0.09235764294862747, "learning_rate": 0.01, "loss": 1.9937, "step": 43812 }, { "epoch": 4.494768157570784, "grad_norm": 0.050333961844444275, "learning_rate": 0.01, "loss": 2.0071, "step": 43815 }, { "epoch": 4.495075913007796, "grad_norm": 0.0546598955988884, "learning_rate": 0.01, "loss": 2.011, "step": 43818 }, { "epoch": 4.495383668444809, "grad_norm": 0.07909304648637772, "learning_rate": 0.01, "loss": 2.002, "step": 43821 }, { "epoch": 4.495691423881822, "grad_norm": 0.10644279420375824, "learning_rate": 0.01, "loss": 2.0013, "step": 43824 }, { "epoch": 4.495999179318835, "grad_norm": 0.06348330527544022, "learning_rate": 0.01, "loss": 1.9957, "step": 43827 }, { "epoch": 4.4963069347558475, "grad_norm": 0.060767900198698044, "learning_rate": 0.01, "loss": 1.9772, "step": 43830 }, { "epoch": 4.49661469019286, "grad_norm": 0.050787363201379776, "learning_rate": 0.01, "loss": 1.9858, "step": 43833 }, { "epoch": 4.496922445629873, "grad_norm": 0.0362878292798996, "learning_rate": 0.01, "loss": 2.0033, "step": 43836 }, { "epoch": 4.497230201066886, "grad_norm": 0.07702784240245819, "learning_rate": 0.01, "loss": 2.0046, "step": 43839 }, { "epoch": 4.497537956503899, "grad_norm": 0.05007346719503403, "learning_rate": 0.01, "loss": 2.0105, "step": 43842 }, { "epoch": 4.4978457119409105, "grad_norm": 0.04794207960367203, "learning_rate": 0.01, "loss": 2.0027, "step": 43845 }, { "epoch": 4.498153467377923, "grad_norm": 0.04176037013530731, "learning_rate": 0.01, "loss": 1.9922, "step": 43848 }, { "epoch": 4.498461222814936, "grad_norm": 0.06070488691329956, "learning_rate": 0.01, "loss": 1.9889, "step": 43851 }, { "epoch": 4.498768978251949, "grad_norm": 0.0750289261341095, "learning_rate": 0.01, "loss": 1.9803, "step": 43854 }, { "epoch": 4.499076733688962, "grad_norm": 0.1204972118139267, "learning_rate": 0.01, "loss": 1.9801, "step": 43857 }, { "epoch": 4.499384489125974, "grad_norm": 0.03909352049231529, "learning_rate": 0.01, "loss": 1.9919, "step": 43860 }, { "epoch": 4.499692244562987, "grad_norm": 0.060046713799238205, "learning_rate": 0.01, "loss": 2.0008, "step": 43863 }, { "epoch": 4.5, "grad_norm": 0.03170366585254669, "learning_rate": 0.01, "loss": 2.0012, "step": 43866 }, { "epoch": 4.500307755437013, "grad_norm": 0.07817841321229935, "learning_rate": 0.01, "loss": 1.9851, "step": 43869 }, { "epoch": 4.500615510874026, "grad_norm": 0.06809035688638687, "learning_rate": 0.01, "loss": 2.0199, "step": 43872 }, { "epoch": 4.500923266311038, "grad_norm": 0.0353827029466629, "learning_rate": 0.01, "loss": 2.0217, "step": 43875 }, { "epoch": 4.501231021748051, "grad_norm": 0.05174241214990616, "learning_rate": 0.01, "loss": 1.9882, "step": 43878 }, { "epoch": 4.501538777185064, "grad_norm": 0.05049923434853554, "learning_rate": 0.01, "loss": 2.0102, "step": 43881 }, { "epoch": 4.501846532622077, "grad_norm": 0.04014773294329643, "learning_rate": 0.01, "loss": 2.0177, "step": 43884 }, { "epoch": 4.502154288059089, "grad_norm": 0.08103401213884354, "learning_rate": 0.01, "loss": 1.9977, "step": 43887 }, { "epoch": 4.502462043496102, "grad_norm": 0.04270040616393089, "learning_rate": 0.01, "loss": 2.0035, "step": 43890 }, { "epoch": 4.502769798933114, "grad_norm": 0.05039724335074425, "learning_rate": 0.01, "loss": 1.9908, "step": 43893 }, { "epoch": 4.503077554370127, "grad_norm": 0.10098660737276077, "learning_rate": 0.01, "loss": 1.9876, "step": 43896 }, { "epoch": 4.50338530980714, "grad_norm": 0.08645161241292953, "learning_rate": 0.01, "loss": 1.9762, "step": 43899 }, { "epoch": 4.5036930652441525, "grad_norm": 0.06836593151092529, "learning_rate": 0.01, "loss": 2.0081, "step": 43902 }, { "epoch": 4.504000820681165, "grad_norm": 0.09137406945228577, "learning_rate": 0.01, "loss": 1.9875, "step": 43905 }, { "epoch": 4.504308576118178, "grad_norm": 0.04432569816708565, "learning_rate": 0.01, "loss": 1.985, "step": 43908 }, { "epoch": 4.504616331555191, "grad_norm": 0.03743622079491615, "learning_rate": 0.01, "loss": 2.0019, "step": 43911 }, { "epoch": 4.504924086992204, "grad_norm": 0.09280388057231903, "learning_rate": 0.01, "loss": 1.9984, "step": 43914 }, { "epoch": 4.505231842429216, "grad_norm": 0.06827981770038605, "learning_rate": 0.01, "loss": 2.017, "step": 43917 }, { "epoch": 4.505539597866229, "grad_norm": 0.13798165321350098, "learning_rate": 0.01, "loss": 1.9852, "step": 43920 }, { "epoch": 4.505847353303242, "grad_norm": 0.046148303896188736, "learning_rate": 0.01, "loss": 1.9979, "step": 43923 }, { "epoch": 4.506155108740255, "grad_norm": 0.06119319424033165, "learning_rate": 0.01, "loss": 1.9993, "step": 43926 }, { "epoch": 4.5064628641772675, "grad_norm": 0.04381044954061508, "learning_rate": 0.01, "loss": 1.9884, "step": 43929 }, { "epoch": 4.506770619614279, "grad_norm": 0.043389271944761276, "learning_rate": 0.01, "loss": 2.0023, "step": 43932 }, { "epoch": 4.507078375051293, "grad_norm": 0.0379410944879055, "learning_rate": 0.01, "loss": 2.002, "step": 43935 }, { "epoch": 4.507386130488305, "grad_norm": 0.043701719492673874, "learning_rate": 0.01, "loss": 1.9863, "step": 43938 }, { "epoch": 4.507693885925318, "grad_norm": 0.11455623805522919, "learning_rate": 0.01, "loss": 1.9864, "step": 43941 }, { "epoch": 4.5080016413623305, "grad_norm": 0.06367077678442001, "learning_rate": 0.01, "loss": 2.0068, "step": 43944 }, { "epoch": 4.508309396799343, "grad_norm": 0.12104804813861847, "learning_rate": 0.01, "loss": 2.0108, "step": 43947 }, { "epoch": 4.508617152236356, "grad_norm": 0.08374746888875961, "learning_rate": 0.01, "loss": 1.9676, "step": 43950 }, { "epoch": 4.508924907673369, "grad_norm": 0.12107446789741516, "learning_rate": 0.01, "loss": 2.0093, "step": 43953 }, { "epoch": 4.509232663110382, "grad_norm": 0.043210193514823914, "learning_rate": 0.01, "loss": 1.972, "step": 43956 }, { "epoch": 4.509540418547394, "grad_norm": 0.05984468385577202, "learning_rate": 0.01, "loss": 2.0054, "step": 43959 }, { "epoch": 4.509848173984407, "grad_norm": 0.04827868938446045, "learning_rate": 0.01, "loss": 2.0216, "step": 43962 }, { "epoch": 4.51015592942142, "grad_norm": 0.04763716831803322, "learning_rate": 0.01, "loss": 1.9968, "step": 43965 }, { "epoch": 4.510463684858433, "grad_norm": 0.045888885855674744, "learning_rate": 0.01, "loss": 1.9974, "step": 43968 }, { "epoch": 4.5107714402954455, "grad_norm": 0.06434401869773865, "learning_rate": 0.01, "loss": 2.0009, "step": 43971 }, { "epoch": 4.511079195732458, "grad_norm": 0.09480862319469452, "learning_rate": 0.01, "loss": 1.9885, "step": 43974 }, { "epoch": 4.51138695116947, "grad_norm": 0.10739507526159286, "learning_rate": 0.01, "loss": 1.9996, "step": 43977 }, { "epoch": 4.511694706606484, "grad_norm": 0.06446696072816849, "learning_rate": 0.01, "loss": 1.9798, "step": 43980 }, { "epoch": 4.512002462043496, "grad_norm": 0.0567287839949131, "learning_rate": 0.01, "loss": 1.9813, "step": 43983 }, { "epoch": 4.5123102174805085, "grad_norm": 0.0359378419816494, "learning_rate": 0.01, "loss": 1.9888, "step": 43986 }, { "epoch": 4.512617972917521, "grad_norm": 0.052227430045604706, "learning_rate": 0.01, "loss": 1.9993, "step": 43989 }, { "epoch": 4.512925728354534, "grad_norm": 0.058354392647743225, "learning_rate": 0.01, "loss": 2.0029, "step": 43992 }, { "epoch": 4.513233483791547, "grad_norm": 0.06100015714764595, "learning_rate": 0.01, "loss": 1.996, "step": 43995 }, { "epoch": 4.51354123922856, "grad_norm": 0.0857095718383789, "learning_rate": 0.01, "loss": 1.9899, "step": 43998 }, { "epoch": 4.513848994665572, "grad_norm": 0.11230607330799103, "learning_rate": 0.01, "loss": 1.9907, "step": 44001 }, { "epoch": 4.514156750102585, "grad_norm": 0.082217276096344, "learning_rate": 0.01, "loss": 1.9945, "step": 44004 }, { "epoch": 4.514464505539598, "grad_norm": 0.07966562360525131, "learning_rate": 0.01, "loss": 2.0134, "step": 44007 }, { "epoch": 4.514772260976611, "grad_norm": 0.0608987994492054, "learning_rate": 0.01, "loss": 1.9969, "step": 44010 }, { "epoch": 4.5150800164136236, "grad_norm": 0.029933054000139236, "learning_rate": 0.01, "loss": 1.9937, "step": 44013 }, { "epoch": 4.515387771850636, "grad_norm": 0.04696916788816452, "learning_rate": 0.01, "loss": 1.9903, "step": 44016 }, { "epoch": 4.515695527287649, "grad_norm": 0.0494072362780571, "learning_rate": 0.01, "loss": 2.003, "step": 44019 }, { "epoch": 4.516003282724661, "grad_norm": 0.06069015711545944, "learning_rate": 0.01, "loss": 1.9764, "step": 44022 }, { "epoch": 4.516311038161675, "grad_norm": 0.05014580115675926, "learning_rate": 0.01, "loss": 1.9922, "step": 44025 }, { "epoch": 4.516618793598687, "grad_norm": 0.04753846675157547, "learning_rate": 0.01, "loss": 1.9968, "step": 44028 }, { "epoch": 4.516926549035699, "grad_norm": 0.053366005420684814, "learning_rate": 0.01, "loss": 2.0075, "step": 44031 }, { "epoch": 4.517234304472712, "grad_norm": 0.09559368342161179, "learning_rate": 0.01, "loss": 1.9848, "step": 44034 }, { "epoch": 4.517542059909725, "grad_norm": 0.12470517307519913, "learning_rate": 0.01, "loss": 2.0018, "step": 44037 }, { "epoch": 4.517849815346738, "grad_norm": 0.0809641033411026, "learning_rate": 0.01, "loss": 2.0019, "step": 44040 }, { "epoch": 4.5181575707837505, "grad_norm": 0.04899699613451958, "learning_rate": 0.01, "loss": 1.9809, "step": 44043 }, { "epoch": 4.518465326220763, "grad_norm": 0.06368394196033478, "learning_rate": 0.01, "loss": 1.9935, "step": 44046 }, { "epoch": 4.518773081657776, "grad_norm": 0.06713195890188217, "learning_rate": 0.01, "loss": 1.9813, "step": 44049 }, { "epoch": 4.519080837094789, "grad_norm": 0.12240014225244522, "learning_rate": 0.01, "loss": 1.983, "step": 44052 }, { "epoch": 4.519388592531802, "grad_norm": 0.159604012966156, "learning_rate": 0.01, "loss": 2.0038, "step": 44055 }, { "epoch": 4.519696347968814, "grad_norm": 0.09968844056129456, "learning_rate": 0.01, "loss": 1.9851, "step": 44058 }, { "epoch": 4.520004103405827, "grad_norm": 0.1151421070098877, "learning_rate": 0.01, "loss": 1.999, "step": 44061 }, { "epoch": 4.52031185884284, "grad_norm": 0.117409847676754, "learning_rate": 0.01, "loss": 1.977, "step": 44064 }, { "epoch": 4.520619614279852, "grad_norm": 0.05031849071383476, "learning_rate": 0.01, "loss": 1.9929, "step": 44067 }, { "epoch": 4.5209273697168655, "grad_norm": 0.04156704619526863, "learning_rate": 0.01, "loss": 2.0103, "step": 44070 }, { "epoch": 4.521235125153877, "grad_norm": 0.04768043011426926, "learning_rate": 0.01, "loss": 2.0129, "step": 44073 }, { "epoch": 4.52154288059089, "grad_norm": 0.04492770880460739, "learning_rate": 0.01, "loss": 1.9951, "step": 44076 }, { "epoch": 4.521850636027903, "grad_norm": 0.0356624610722065, "learning_rate": 0.01, "loss": 1.9866, "step": 44079 }, { "epoch": 4.522158391464916, "grad_norm": 0.06383637338876724, "learning_rate": 0.01, "loss": 2.0088, "step": 44082 }, { "epoch": 4.5224661469019285, "grad_norm": 0.09323472529649734, "learning_rate": 0.01, "loss": 1.989, "step": 44085 }, { "epoch": 4.522773902338941, "grad_norm": 0.08748291432857513, "learning_rate": 0.01, "loss": 1.9946, "step": 44088 }, { "epoch": 4.523081657775954, "grad_norm": 0.12068930268287659, "learning_rate": 0.01, "loss": 1.9963, "step": 44091 }, { "epoch": 4.523389413212967, "grad_norm": 0.12926380336284637, "learning_rate": 0.01, "loss": 2.0079, "step": 44094 }, { "epoch": 4.52369716864998, "grad_norm": 0.053785938769578934, "learning_rate": 0.01, "loss": 1.9889, "step": 44097 }, { "epoch": 4.524004924086992, "grad_norm": 0.056706249713897705, "learning_rate": 0.01, "loss": 1.9529, "step": 44100 }, { "epoch": 4.524312679524005, "grad_norm": 0.040739428251981735, "learning_rate": 0.01, "loss": 1.9819, "step": 44103 }, { "epoch": 4.524620434961018, "grad_norm": 0.07002007216215134, "learning_rate": 0.01, "loss": 1.988, "step": 44106 }, { "epoch": 4.524928190398031, "grad_norm": 0.08624965697526932, "learning_rate": 0.01, "loss": 1.9939, "step": 44109 }, { "epoch": 4.525235945835043, "grad_norm": 0.05654805898666382, "learning_rate": 0.01, "loss": 1.9924, "step": 44112 }, { "epoch": 4.525543701272056, "grad_norm": 0.0629354789853096, "learning_rate": 0.01, "loss": 1.9798, "step": 44115 }, { "epoch": 4.525851456709068, "grad_norm": 0.10447383671998978, "learning_rate": 0.01, "loss": 1.9912, "step": 44118 }, { "epoch": 4.526159212146081, "grad_norm": 0.08281921595335007, "learning_rate": 0.01, "loss": 1.9917, "step": 44121 }, { "epoch": 4.526466967583094, "grad_norm": 0.10570526868104935, "learning_rate": 0.01, "loss": 2.0012, "step": 44124 }, { "epoch": 4.5267747230201065, "grad_norm": 0.040542326867580414, "learning_rate": 0.01, "loss": 1.9644, "step": 44127 }, { "epoch": 4.527082478457119, "grad_norm": 0.060009557753801346, "learning_rate": 0.01, "loss": 1.9859, "step": 44130 }, { "epoch": 4.527390233894132, "grad_norm": 0.09265922009944916, "learning_rate": 0.01, "loss": 1.9737, "step": 44133 }, { "epoch": 4.527697989331145, "grad_norm": 0.0457131452858448, "learning_rate": 0.01, "loss": 1.9718, "step": 44136 }, { "epoch": 4.528005744768158, "grad_norm": 0.038852475583553314, "learning_rate": 0.01, "loss": 1.9957, "step": 44139 }, { "epoch": 4.52831350020517, "grad_norm": 0.03757132962346077, "learning_rate": 0.01, "loss": 1.9932, "step": 44142 }, { "epoch": 4.528621255642183, "grad_norm": 0.04073426127433777, "learning_rate": 0.01, "loss": 1.988, "step": 44145 }, { "epoch": 4.528929011079196, "grad_norm": 0.05840800702571869, "learning_rate": 0.01, "loss": 1.9882, "step": 44148 }, { "epoch": 4.529236766516209, "grad_norm": 0.06951642036437988, "learning_rate": 0.01, "loss": 1.9842, "step": 44151 }, { "epoch": 4.5295445219532215, "grad_norm": 0.062115710228681564, "learning_rate": 0.01, "loss": 2.0055, "step": 44154 }, { "epoch": 4.529852277390233, "grad_norm": 0.0439622737467289, "learning_rate": 0.01, "loss": 1.969, "step": 44157 }, { "epoch": 4.530160032827247, "grad_norm": 0.06429772078990936, "learning_rate": 0.01, "loss": 2.0265, "step": 44160 }, { "epoch": 4.530467788264259, "grad_norm": 0.1072159856557846, "learning_rate": 0.01, "loss": 1.9889, "step": 44163 }, { "epoch": 4.530775543701272, "grad_norm": 0.08709017932415009, "learning_rate": 0.01, "loss": 1.9846, "step": 44166 }, { "epoch": 4.531083299138285, "grad_norm": 0.07494400441646576, "learning_rate": 0.01, "loss": 1.9814, "step": 44169 }, { "epoch": 4.531391054575297, "grad_norm": 0.09518608450889587, "learning_rate": 0.01, "loss": 1.9748, "step": 44172 }, { "epoch": 4.53169881001231, "grad_norm": 0.04578256234526634, "learning_rate": 0.01, "loss": 1.9758, "step": 44175 }, { "epoch": 4.532006565449323, "grad_norm": 0.03707175329327583, "learning_rate": 0.01, "loss": 1.9988, "step": 44178 }, { "epoch": 4.532314320886336, "grad_norm": 0.042071472853422165, "learning_rate": 0.01, "loss": 1.9992, "step": 44181 }, { "epoch": 4.5326220763233485, "grad_norm": 0.05591168627142906, "learning_rate": 0.01, "loss": 2.0053, "step": 44184 }, { "epoch": 4.532929831760361, "grad_norm": 0.04541923105716705, "learning_rate": 0.01, "loss": 1.9837, "step": 44187 }, { "epoch": 4.533237587197374, "grad_norm": 0.07019765675067902, "learning_rate": 0.01, "loss": 2.009, "step": 44190 }, { "epoch": 4.533545342634387, "grad_norm": 0.04877987504005432, "learning_rate": 0.01, "loss": 2.0081, "step": 44193 }, { "epoch": 4.5338530980714, "grad_norm": 0.04220633581280708, "learning_rate": 0.01, "loss": 2.0013, "step": 44196 }, { "epoch": 4.534160853508412, "grad_norm": 0.17436882853507996, "learning_rate": 0.01, "loss": 1.9924, "step": 44199 }, { "epoch": 4.534468608945424, "grad_norm": 0.03670203685760498, "learning_rate": 0.01, "loss": 1.9786, "step": 44202 }, { "epoch": 4.534776364382438, "grad_norm": 0.04544707387685776, "learning_rate": 0.01, "loss": 2.0046, "step": 44205 }, { "epoch": 4.53508411981945, "grad_norm": 0.03833407908678055, "learning_rate": 0.01, "loss": 2.0196, "step": 44208 }, { "epoch": 4.535391875256463, "grad_norm": 0.03395809605717659, "learning_rate": 0.01, "loss": 1.9846, "step": 44211 }, { "epoch": 4.535699630693475, "grad_norm": 0.04812290519475937, "learning_rate": 0.01, "loss": 2.0027, "step": 44214 }, { "epoch": 4.536007386130488, "grad_norm": 0.04580771178007126, "learning_rate": 0.01, "loss": 2.0093, "step": 44217 }, { "epoch": 4.536315141567501, "grad_norm": 0.04926560819149017, "learning_rate": 0.01, "loss": 2.0011, "step": 44220 }, { "epoch": 4.536622897004514, "grad_norm": 0.08950170129537582, "learning_rate": 0.01, "loss": 2.0266, "step": 44223 }, { "epoch": 4.5369306524415265, "grad_norm": 0.04416525736451149, "learning_rate": 0.01, "loss": 2.0037, "step": 44226 }, { "epoch": 4.537238407878539, "grad_norm": 0.04912101849913597, "learning_rate": 0.01, "loss": 2.0068, "step": 44229 }, { "epoch": 4.537546163315552, "grad_norm": 0.039088208228349686, "learning_rate": 0.01, "loss": 1.9946, "step": 44232 }, { "epoch": 4.537853918752565, "grad_norm": 0.03769220411777496, "learning_rate": 0.01, "loss": 1.9635, "step": 44235 }, { "epoch": 4.538161674189578, "grad_norm": 0.14437700808048248, "learning_rate": 0.01, "loss": 1.9917, "step": 44238 }, { "epoch": 4.53846942962659, "grad_norm": 0.09406454116106033, "learning_rate": 0.01, "loss": 1.997, "step": 44241 }, { "epoch": 4.538777185063603, "grad_norm": 0.0553220696747303, "learning_rate": 0.01, "loss": 1.999, "step": 44244 }, { "epoch": 4.539084940500615, "grad_norm": 0.04257743060588837, "learning_rate": 0.01, "loss": 2.0079, "step": 44247 }, { "epoch": 4.539392695937629, "grad_norm": 0.05212993547320366, "learning_rate": 0.01, "loss": 1.9864, "step": 44250 }, { "epoch": 4.539700451374641, "grad_norm": 0.050963662564754486, "learning_rate": 0.01, "loss": 1.9904, "step": 44253 }, { "epoch": 4.540008206811653, "grad_norm": 0.04554356262087822, "learning_rate": 0.01, "loss": 2.0002, "step": 44256 }, { "epoch": 4.540315962248666, "grad_norm": 0.11272426694631577, "learning_rate": 0.01, "loss": 1.9897, "step": 44259 }, { "epoch": 4.540623717685679, "grad_norm": 0.06912017613649368, "learning_rate": 0.01, "loss": 1.9837, "step": 44262 }, { "epoch": 4.540931473122692, "grad_norm": 0.057526472955942154, "learning_rate": 0.01, "loss": 1.9848, "step": 44265 }, { "epoch": 4.5412392285597045, "grad_norm": 0.035935260355472565, "learning_rate": 0.01, "loss": 1.9816, "step": 44268 }, { "epoch": 4.541546983996717, "grad_norm": 0.06651175767183304, "learning_rate": 0.01, "loss": 1.9753, "step": 44271 }, { "epoch": 4.54185473943373, "grad_norm": 0.11073578149080276, "learning_rate": 0.01, "loss": 1.9857, "step": 44274 }, { "epoch": 4.542162494870743, "grad_norm": 0.08459115028381348, "learning_rate": 0.01, "loss": 1.9724, "step": 44277 }, { "epoch": 4.542470250307756, "grad_norm": 0.04882695525884628, "learning_rate": 0.01, "loss": 2.0182, "step": 44280 }, { "epoch": 4.542778005744768, "grad_norm": 0.0500728003680706, "learning_rate": 0.01, "loss": 1.9624, "step": 44283 }, { "epoch": 4.543085761181781, "grad_norm": 0.03797118738293648, "learning_rate": 0.01, "loss": 1.9984, "step": 44286 }, { "epoch": 4.543393516618794, "grad_norm": 0.046217989176511765, "learning_rate": 0.01, "loss": 2.0108, "step": 44289 }, { "epoch": 4.543701272055806, "grad_norm": 0.057793501764535904, "learning_rate": 0.01, "loss": 1.9717, "step": 44292 }, { "epoch": 4.544009027492819, "grad_norm": 0.03469870239496231, "learning_rate": 0.01, "loss": 1.9874, "step": 44295 }, { "epoch": 4.544316782929831, "grad_norm": 0.11007340252399445, "learning_rate": 0.01, "loss": 2.0101, "step": 44298 }, { "epoch": 4.544624538366844, "grad_norm": 0.07559779286384583, "learning_rate": 0.01, "loss": 1.9773, "step": 44301 }, { "epoch": 4.544932293803857, "grad_norm": 0.10915376991033554, "learning_rate": 0.01, "loss": 2.0, "step": 44304 }, { "epoch": 4.54524004924087, "grad_norm": 0.07973102480173111, "learning_rate": 0.01, "loss": 1.9778, "step": 44307 }, { "epoch": 4.5455478046778826, "grad_norm": 0.06999395787715912, "learning_rate": 0.01, "loss": 2.0185, "step": 44310 }, { "epoch": 4.545855560114895, "grad_norm": 0.05275473743677139, "learning_rate": 0.01, "loss": 2.0153, "step": 44313 }, { "epoch": 4.546163315551908, "grad_norm": 0.054669059813022614, "learning_rate": 0.01, "loss": 1.9909, "step": 44316 }, { "epoch": 4.546471070988921, "grad_norm": 0.046913836151361465, "learning_rate": 0.01, "loss": 2.041, "step": 44319 }, { "epoch": 4.546778826425934, "grad_norm": 0.043503716588020325, "learning_rate": 0.01, "loss": 1.9777, "step": 44322 }, { "epoch": 4.5470865818629465, "grad_norm": 0.058872975409030914, "learning_rate": 0.01, "loss": 1.9936, "step": 44325 }, { "epoch": 4.547394337299959, "grad_norm": 0.08830869197845459, "learning_rate": 0.01, "loss": 2.022, "step": 44328 }, { "epoch": 4.547702092736972, "grad_norm": 0.0850677490234375, "learning_rate": 0.01, "loss": 1.9808, "step": 44331 }, { "epoch": 4.548009848173985, "grad_norm": 0.058975327759981155, "learning_rate": 0.01, "loss": 1.9956, "step": 44334 }, { "epoch": 4.548317603610997, "grad_norm": 0.04586830735206604, "learning_rate": 0.01, "loss": 1.9823, "step": 44337 }, { "epoch": 4.5486253590480095, "grad_norm": 0.046169981360435486, "learning_rate": 0.01, "loss": 2.0032, "step": 44340 }, { "epoch": 4.548933114485022, "grad_norm": 0.06343907862901688, "learning_rate": 0.01, "loss": 1.9647, "step": 44343 }, { "epoch": 4.549240869922035, "grad_norm": 0.03225269168615341, "learning_rate": 0.01, "loss": 1.9993, "step": 44346 }, { "epoch": 4.549548625359048, "grad_norm": 0.0852704718708992, "learning_rate": 0.01, "loss": 1.9803, "step": 44349 }, { "epoch": 4.549856380796061, "grad_norm": 0.058122653514146805, "learning_rate": 0.01, "loss": 1.9746, "step": 44352 }, { "epoch": 4.550164136233073, "grad_norm": 0.08065648376941681, "learning_rate": 0.01, "loss": 2.0035, "step": 44355 }, { "epoch": 4.550471891670086, "grad_norm": 0.04599373787641525, "learning_rate": 0.01, "loss": 2.0209, "step": 44358 }, { "epoch": 4.550779647107099, "grad_norm": 0.03361526504158974, "learning_rate": 0.01, "loss": 2.0019, "step": 44361 }, { "epoch": 4.551087402544112, "grad_norm": 0.04159310460090637, "learning_rate": 0.01, "loss": 2.006, "step": 44364 }, { "epoch": 4.5513951579811245, "grad_norm": 0.06349869817495346, "learning_rate": 0.01, "loss": 2.0035, "step": 44367 }, { "epoch": 4.551702913418137, "grad_norm": 0.08402734994888306, "learning_rate": 0.01, "loss": 1.9991, "step": 44370 }, { "epoch": 4.55201066885515, "grad_norm": 0.0695672482252121, "learning_rate": 0.01, "loss": 2.0002, "step": 44373 }, { "epoch": 4.552318424292163, "grad_norm": 0.038909196853637695, "learning_rate": 0.01, "loss": 1.9777, "step": 44376 }, { "epoch": 4.552626179729176, "grad_norm": 0.032846856862306595, "learning_rate": 0.01, "loss": 1.9896, "step": 44379 }, { "epoch": 4.5529339351661875, "grad_norm": 0.03906947746872902, "learning_rate": 0.01, "loss": 1.9829, "step": 44382 }, { "epoch": 4.5532416906032, "grad_norm": 0.06816279888153076, "learning_rate": 0.01, "loss": 1.9935, "step": 44385 }, { "epoch": 4.553549446040213, "grad_norm": 0.13728150725364685, "learning_rate": 0.01, "loss": 1.9883, "step": 44388 }, { "epoch": 4.553857201477226, "grad_norm": 0.11365284025669098, "learning_rate": 0.01, "loss": 2.0001, "step": 44391 }, { "epoch": 4.554164956914239, "grad_norm": 0.058936670422554016, "learning_rate": 0.01, "loss": 1.978, "step": 44394 }, { "epoch": 4.554472712351251, "grad_norm": 0.044918566942214966, "learning_rate": 0.01, "loss": 1.9966, "step": 44397 }, { "epoch": 4.554780467788264, "grad_norm": 0.04904935136437416, "learning_rate": 0.01, "loss": 2.0085, "step": 44400 }, { "epoch": 4.555088223225277, "grad_norm": 0.04912208393216133, "learning_rate": 0.01, "loss": 2.0077, "step": 44403 }, { "epoch": 4.55539597866229, "grad_norm": 0.0992012619972229, "learning_rate": 0.01, "loss": 1.9942, "step": 44406 }, { "epoch": 4.5557037340993025, "grad_norm": 0.05080341920256615, "learning_rate": 0.01, "loss": 2.004, "step": 44409 }, { "epoch": 4.556011489536315, "grad_norm": 0.10849186778068542, "learning_rate": 0.01, "loss": 2.0349, "step": 44412 }, { "epoch": 4.556319244973328, "grad_norm": 0.04914950951933861, "learning_rate": 0.01, "loss": 2.0075, "step": 44415 }, { "epoch": 4.556627000410341, "grad_norm": 0.040149789303541183, "learning_rate": 0.01, "loss": 2.0072, "step": 44418 }, { "epoch": 4.556934755847354, "grad_norm": 0.04807689040899277, "learning_rate": 0.01, "loss": 2.0102, "step": 44421 }, { "epoch": 4.557242511284366, "grad_norm": 0.03420715779066086, "learning_rate": 0.01, "loss": 2.0242, "step": 44424 }, { "epoch": 4.557550266721378, "grad_norm": 0.033837419003248215, "learning_rate": 0.01, "loss": 1.9849, "step": 44427 }, { "epoch": 4.557858022158391, "grad_norm": 0.05432615429162979, "learning_rate": 0.01, "loss": 1.9907, "step": 44430 }, { "epoch": 4.558165777595404, "grad_norm": 0.09718727320432663, "learning_rate": 0.01, "loss": 2.0001, "step": 44433 }, { "epoch": 4.558473533032417, "grad_norm": 0.09546732157468796, "learning_rate": 0.01, "loss": 2.0192, "step": 44436 }, { "epoch": 4.558781288469429, "grad_norm": 0.04132693633437157, "learning_rate": 0.01, "loss": 1.9634, "step": 44439 }, { "epoch": 4.559089043906442, "grad_norm": 0.07769662886857986, "learning_rate": 0.01, "loss": 1.9836, "step": 44442 }, { "epoch": 4.559396799343455, "grad_norm": 0.08077646046876907, "learning_rate": 0.01, "loss": 2.0083, "step": 44445 }, { "epoch": 4.559704554780468, "grad_norm": 0.0781002938747406, "learning_rate": 0.01, "loss": 1.976, "step": 44448 }, { "epoch": 4.5600123102174805, "grad_norm": 0.057897116988897324, "learning_rate": 0.01, "loss": 1.9946, "step": 44451 }, { "epoch": 4.560320065654493, "grad_norm": 0.06867967545986176, "learning_rate": 0.01, "loss": 1.9638, "step": 44454 }, { "epoch": 4.560627821091506, "grad_norm": 0.06109149381518364, "learning_rate": 0.01, "loss": 1.9623, "step": 44457 }, { "epoch": 4.560935576528519, "grad_norm": 0.038964226841926575, "learning_rate": 0.01, "loss": 2.0091, "step": 44460 }, { "epoch": 4.561243331965532, "grad_norm": 0.0609222911298275, "learning_rate": 0.01, "loss": 1.9984, "step": 44463 }, { "epoch": 4.5615510874025444, "grad_norm": 0.09428851306438446, "learning_rate": 0.01, "loss": 2.0005, "step": 44466 }, { "epoch": 4.561858842839557, "grad_norm": 0.09745607525110245, "learning_rate": 0.01, "loss": 1.9877, "step": 44469 }, { "epoch": 4.562166598276569, "grad_norm": 0.09413721412420273, "learning_rate": 0.01, "loss": 2.0004, "step": 44472 }, { "epoch": 4.562474353713582, "grad_norm": 0.0537935309112072, "learning_rate": 0.01, "loss": 2.0012, "step": 44475 }, { "epoch": 4.562782109150595, "grad_norm": 0.03104214183986187, "learning_rate": 0.01, "loss": 1.995, "step": 44478 }, { "epoch": 4.5630898645876075, "grad_norm": 0.028320681303739548, "learning_rate": 0.01, "loss": 1.9681, "step": 44481 }, { "epoch": 4.56339762002462, "grad_norm": 0.09736626595258713, "learning_rate": 0.01, "loss": 2.0077, "step": 44484 }, { "epoch": 4.563705375461633, "grad_norm": 0.03548412024974823, "learning_rate": 0.01, "loss": 2.008, "step": 44487 }, { "epoch": 4.564013130898646, "grad_norm": 0.11299438029527664, "learning_rate": 0.01, "loss": 1.9814, "step": 44490 }, { "epoch": 4.564320886335659, "grad_norm": 0.07769119739532471, "learning_rate": 0.01, "loss": 2.0133, "step": 44493 }, { "epoch": 4.564628641772671, "grad_norm": 0.04660061001777649, "learning_rate": 0.01, "loss": 1.9923, "step": 44496 }, { "epoch": 4.564936397209684, "grad_norm": 0.045153431594371796, "learning_rate": 0.01, "loss": 2.012, "step": 44499 }, { "epoch": 4.565244152646697, "grad_norm": 0.040483929216861725, "learning_rate": 0.01, "loss": 2.0136, "step": 44502 }, { "epoch": 4.56555190808371, "grad_norm": 0.04947364702820778, "learning_rate": 0.01, "loss": 1.9962, "step": 44505 }, { "epoch": 4.5658596635207225, "grad_norm": 0.0688960924744606, "learning_rate": 0.01, "loss": 1.9869, "step": 44508 }, { "epoch": 4.566167418957735, "grad_norm": 0.11255665123462677, "learning_rate": 0.01, "loss": 1.9831, "step": 44511 }, { "epoch": 4.566475174394748, "grad_norm": 0.07215286791324615, "learning_rate": 0.01, "loss": 2.0106, "step": 44514 }, { "epoch": 4.56678292983176, "grad_norm": 0.07593537867069244, "learning_rate": 0.01, "loss": 1.9867, "step": 44517 }, { "epoch": 4.567090685268773, "grad_norm": 0.053777799010276794, "learning_rate": 0.01, "loss": 1.9999, "step": 44520 }, { "epoch": 4.5673984407057855, "grad_norm": 0.04630263149738312, "learning_rate": 0.01, "loss": 1.9829, "step": 44523 }, { "epoch": 4.567706196142798, "grad_norm": 0.1870114654302597, "learning_rate": 0.01, "loss": 2.0189, "step": 44526 }, { "epoch": 4.568013951579811, "grad_norm": 0.04411546140909195, "learning_rate": 0.01, "loss": 1.9801, "step": 44529 }, { "epoch": 4.568321707016824, "grad_norm": 0.04254088178277016, "learning_rate": 0.01, "loss": 1.9802, "step": 44532 }, { "epoch": 4.568629462453837, "grad_norm": 0.048210930079221725, "learning_rate": 0.01, "loss": 1.9613, "step": 44535 }, { "epoch": 4.568937217890849, "grad_norm": 0.046062175184488297, "learning_rate": 0.01, "loss": 2.009, "step": 44538 }, { "epoch": 4.569244973327862, "grad_norm": 0.04656999930739403, "learning_rate": 0.01, "loss": 2.0012, "step": 44541 }, { "epoch": 4.569552728764875, "grad_norm": 0.04990769177675247, "learning_rate": 0.01, "loss": 1.9798, "step": 44544 }, { "epoch": 4.569860484201888, "grad_norm": 0.06637271493673325, "learning_rate": 0.01, "loss": 1.9974, "step": 44547 }, { "epoch": 4.5701682396389005, "grad_norm": 0.0446067713201046, "learning_rate": 0.01, "loss": 2.0068, "step": 44550 }, { "epoch": 4.570475995075913, "grad_norm": 0.044182851910591125, "learning_rate": 0.01, "loss": 1.9897, "step": 44553 }, { "epoch": 4.570783750512926, "grad_norm": 0.03427280858159065, "learning_rate": 0.01, "loss": 2.0013, "step": 44556 }, { "epoch": 4.571091505949939, "grad_norm": 0.0435144379734993, "learning_rate": 0.01, "loss": 1.9985, "step": 44559 }, { "epoch": 4.571399261386951, "grad_norm": 0.046480659395456314, "learning_rate": 0.01, "loss": 1.9871, "step": 44562 }, { "epoch": 4.5717070168239635, "grad_norm": 0.047410015016794205, "learning_rate": 0.01, "loss": 1.9871, "step": 44565 }, { "epoch": 4.572014772260976, "grad_norm": 0.0613541379570961, "learning_rate": 0.01, "loss": 1.967, "step": 44568 }, { "epoch": 4.572322527697989, "grad_norm": 0.04605888947844505, "learning_rate": 0.01, "loss": 1.9939, "step": 44571 }, { "epoch": 4.572630283135002, "grad_norm": 0.08377258479595184, "learning_rate": 0.01, "loss": 2.0123, "step": 44574 }, { "epoch": 4.572938038572015, "grad_norm": 0.09191520512104034, "learning_rate": 0.01, "loss": 1.984, "step": 44577 }, { "epoch": 4.573245794009027, "grad_norm": 0.11382446438074112, "learning_rate": 0.01, "loss": 1.9947, "step": 44580 }, { "epoch": 4.57355354944604, "grad_norm": 0.05597241222858429, "learning_rate": 0.01, "loss": 1.9919, "step": 44583 }, { "epoch": 4.573861304883053, "grad_norm": 0.044153302907943726, "learning_rate": 0.01, "loss": 1.9974, "step": 44586 }, { "epoch": 4.574169060320066, "grad_norm": 0.0449075847864151, "learning_rate": 0.01, "loss": 1.9689, "step": 44589 }, { "epoch": 4.5744768157570785, "grad_norm": 0.06252507120370865, "learning_rate": 0.01, "loss": 2.0106, "step": 44592 }, { "epoch": 4.574784571194091, "grad_norm": 0.047335173934698105, "learning_rate": 0.01, "loss": 1.9913, "step": 44595 }, { "epoch": 4.575092326631104, "grad_norm": 0.046538159251213074, "learning_rate": 0.01, "loss": 1.9753, "step": 44598 }, { "epoch": 4.575400082068117, "grad_norm": 0.043892424553632736, "learning_rate": 0.01, "loss": 1.9841, "step": 44601 }, { "epoch": 4.57570783750513, "grad_norm": 0.14133551716804504, "learning_rate": 0.01, "loss": 1.992, "step": 44604 }, { "epoch": 4.5760155929421416, "grad_norm": 0.06392577290534973, "learning_rate": 0.01, "loss": 1.9972, "step": 44607 }, { "epoch": 4.576323348379154, "grad_norm": 0.07599660754203796, "learning_rate": 0.01, "loss": 2.0064, "step": 44610 }, { "epoch": 4.576631103816167, "grad_norm": 0.0688132718205452, "learning_rate": 0.01, "loss": 1.9937, "step": 44613 }, { "epoch": 4.57693885925318, "grad_norm": 0.05262705683708191, "learning_rate": 0.01, "loss": 2.0085, "step": 44616 }, { "epoch": 4.577246614690193, "grad_norm": 0.04527880623936653, "learning_rate": 0.01, "loss": 1.9785, "step": 44619 }, { "epoch": 4.5775543701272055, "grad_norm": 0.05383728817105293, "learning_rate": 0.01, "loss": 2.0036, "step": 44622 }, { "epoch": 4.577862125564218, "grad_norm": 0.07166964560747147, "learning_rate": 0.01, "loss": 1.9983, "step": 44625 }, { "epoch": 4.578169881001231, "grad_norm": 0.04192749038338661, "learning_rate": 0.01, "loss": 1.9627, "step": 44628 }, { "epoch": 4.578477636438244, "grad_norm": 0.06395778805017471, "learning_rate": 0.01, "loss": 2.0003, "step": 44631 }, { "epoch": 4.578785391875257, "grad_norm": 0.05522435903549194, "learning_rate": 0.01, "loss": 2.0137, "step": 44634 }, { "epoch": 4.579093147312269, "grad_norm": 0.0383891724050045, "learning_rate": 0.01, "loss": 1.9754, "step": 44637 }, { "epoch": 4.579400902749282, "grad_norm": 0.11526113003492355, "learning_rate": 0.01, "loss": 2.0159, "step": 44640 }, { "epoch": 4.579708658186295, "grad_norm": 0.04987471178174019, "learning_rate": 0.01, "loss": 1.9878, "step": 44643 }, { "epoch": 4.580016413623308, "grad_norm": 0.055467016994953156, "learning_rate": 0.01, "loss": 1.991, "step": 44646 }, { "epoch": 4.5803241690603205, "grad_norm": 0.08823923766613007, "learning_rate": 0.01, "loss": 1.9787, "step": 44649 }, { "epoch": 4.580631924497332, "grad_norm": 0.06108856573700905, "learning_rate": 0.01, "loss": 1.9844, "step": 44652 }, { "epoch": 4.580939679934345, "grad_norm": 0.046790335327386856, "learning_rate": 0.01, "loss": 2.021, "step": 44655 }, { "epoch": 4.581247435371358, "grad_norm": 0.04152047634124756, "learning_rate": 0.01, "loss": 1.9674, "step": 44658 }, { "epoch": 4.581555190808371, "grad_norm": 0.06719448417425156, "learning_rate": 0.01, "loss": 1.9794, "step": 44661 }, { "epoch": 4.5818629462453835, "grad_norm": 0.14338631927967072, "learning_rate": 0.01, "loss": 2.0153, "step": 44664 }, { "epoch": 4.582170701682396, "grad_norm": 0.06660101562738419, "learning_rate": 0.01, "loss": 2.0095, "step": 44667 }, { "epoch": 4.582478457119409, "grad_norm": 0.0552043579518795, "learning_rate": 0.01, "loss": 2.0049, "step": 44670 }, { "epoch": 4.582786212556422, "grad_norm": 0.08016394078731537, "learning_rate": 0.01, "loss": 1.9783, "step": 44673 }, { "epoch": 4.583093967993435, "grad_norm": 0.05147711932659149, "learning_rate": 0.01, "loss": 1.9912, "step": 44676 }, { "epoch": 4.583401723430447, "grad_norm": 0.044893477112054825, "learning_rate": 0.01, "loss": 2.0132, "step": 44679 }, { "epoch": 4.58370947886746, "grad_norm": 0.1312035769224167, "learning_rate": 0.01, "loss": 1.994, "step": 44682 }, { "epoch": 4.584017234304473, "grad_norm": 0.0376642569899559, "learning_rate": 0.01, "loss": 1.951, "step": 44685 }, { "epoch": 4.584324989741486, "grad_norm": 0.042604606598615646, "learning_rate": 0.01, "loss": 1.9933, "step": 44688 }, { "epoch": 4.5846327451784985, "grad_norm": 0.04277000203728676, "learning_rate": 0.01, "loss": 2.0092, "step": 44691 }, { "epoch": 4.584940500615511, "grad_norm": 0.07913113385438919, "learning_rate": 0.01, "loss": 1.9953, "step": 44694 }, { "epoch": 4.585248256052523, "grad_norm": 0.08000600337982178, "learning_rate": 0.01, "loss": 1.9983, "step": 44697 }, { "epoch": 4.585556011489536, "grad_norm": 0.05239587649703026, "learning_rate": 0.01, "loss": 1.9665, "step": 44700 }, { "epoch": 4.585863766926549, "grad_norm": 0.04813678562641144, "learning_rate": 0.01, "loss": 1.9891, "step": 44703 }, { "epoch": 4.5861715223635615, "grad_norm": 0.11590283364057541, "learning_rate": 0.01, "loss": 2.0043, "step": 44706 }, { "epoch": 4.586479277800574, "grad_norm": 0.19828790426254272, "learning_rate": 0.01, "loss": 1.9754, "step": 44709 }, { "epoch": 4.586787033237587, "grad_norm": 0.14478129148483276, "learning_rate": 0.01, "loss": 1.9801, "step": 44712 }, { "epoch": 4.5870947886746, "grad_norm": 0.042516618967056274, "learning_rate": 0.01, "loss": 1.9996, "step": 44715 }, { "epoch": 4.587402544111613, "grad_norm": 0.04690965265035629, "learning_rate": 0.01, "loss": 2.0056, "step": 44718 }, { "epoch": 4.587710299548625, "grad_norm": 0.035743579268455505, "learning_rate": 0.01, "loss": 1.9923, "step": 44721 }, { "epoch": 4.588018054985638, "grad_norm": 0.041939638555049896, "learning_rate": 0.01, "loss": 2.0091, "step": 44724 }, { "epoch": 4.588325810422651, "grad_norm": 0.039647046476602554, "learning_rate": 0.01, "loss": 1.9768, "step": 44727 }, { "epoch": 4.588633565859664, "grad_norm": 0.04442642256617546, "learning_rate": 0.01, "loss": 1.9663, "step": 44730 }, { "epoch": 4.5889413212966765, "grad_norm": 0.08688386529684067, "learning_rate": 0.01, "loss": 1.9929, "step": 44733 }, { "epoch": 4.589249076733689, "grad_norm": 0.03309828042984009, "learning_rate": 0.01, "loss": 1.9997, "step": 44736 }, { "epoch": 4.589556832170702, "grad_norm": 0.1094844713807106, "learning_rate": 0.01, "loss": 1.9716, "step": 44739 }, { "epoch": 4.589864587607714, "grad_norm": 0.04613770171999931, "learning_rate": 0.01, "loss": 1.9908, "step": 44742 }, { "epoch": 4.590172343044727, "grad_norm": 0.10529785603284836, "learning_rate": 0.01, "loss": 2.0468, "step": 44745 }, { "epoch": 4.5904800984817395, "grad_norm": 0.09901240468025208, "learning_rate": 0.01, "loss": 1.9821, "step": 44748 }, { "epoch": 4.590787853918752, "grad_norm": 0.08486896753311157, "learning_rate": 0.01, "loss": 1.9962, "step": 44751 }, { "epoch": 4.591095609355765, "grad_norm": 0.05477188900113106, "learning_rate": 0.01, "loss": 1.9898, "step": 44754 }, { "epoch": 4.591403364792778, "grad_norm": 0.03555532172322273, "learning_rate": 0.01, "loss": 1.9894, "step": 44757 }, { "epoch": 4.591711120229791, "grad_norm": 0.0637480840086937, "learning_rate": 0.01, "loss": 2.0144, "step": 44760 }, { "epoch": 4.5920188756668034, "grad_norm": 0.09844323992729187, "learning_rate": 0.01, "loss": 1.9993, "step": 44763 }, { "epoch": 4.592326631103816, "grad_norm": 0.06749750673770905, "learning_rate": 0.01, "loss": 1.9775, "step": 44766 }, { "epoch": 4.592634386540829, "grad_norm": 0.0942719504237175, "learning_rate": 0.01, "loss": 1.9971, "step": 44769 }, { "epoch": 4.592942141977842, "grad_norm": 0.043663688004016876, "learning_rate": 0.01, "loss": 1.9633, "step": 44772 }, { "epoch": 4.593249897414855, "grad_norm": 0.1031889095902443, "learning_rate": 0.01, "loss": 1.9833, "step": 44775 }, { "epoch": 4.593557652851867, "grad_norm": 0.060684684664011, "learning_rate": 0.01, "loss": 2.0057, "step": 44778 }, { "epoch": 4.59386540828888, "grad_norm": 0.08744547516107559, "learning_rate": 0.01, "loss": 2.0255, "step": 44781 }, { "epoch": 4.594173163725893, "grad_norm": 0.05547311529517174, "learning_rate": 0.01, "loss": 1.9884, "step": 44784 }, { "epoch": 4.594480919162905, "grad_norm": 0.05069347843527794, "learning_rate": 0.01, "loss": 2.0019, "step": 44787 }, { "epoch": 4.594788674599918, "grad_norm": 0.11832018941640854, "learning_rate": 0.01, "loss": 2.0006, "step": 44790 }, { "epoch": 4.59509643003693, "grad_norm": 0.09115731716156006, "learning_rate": 0.01, "loss": 2.0098, "step": 44793 }, { "epoch": 4.595404185473943, "grad_norm": 0.03262989968061447, "learning_rate": 0.01, "loss": 2.0148, "step": 44796 }, { "epoch": 4.595711940910956, "grad_norm": 0.08880288898944855, "learning_rate": 0.01, "loss": 2.0165, "step": 44799 }, { "epoch": 4.596019696347969, "grad_norm": 0.059965986758470535, "learning_rate": 0.01, "loss": 2.0037, "step": 44802 }, { "epoch": 4.5963274517849815, "grad_norm": 0.040046051144599915, "learning_rate": 0.01, "loss": 2.0233, "step": 44805 }, { "epoch": 4.596635207221994, "grad_norm": 0.05327828601002693, "learning_rate": 0.01, "loss": 1.9821, "step": 44808 }, { "epoch": 4.596942962659007, "grad_norm": 0.0532514713704586, "learning_rate": 0.01, "loss": 2.0157, "step": 44811 }, { "epoch": 4.59725071809602, "grad_norm": 0.0502878837287426, "learning_rate": 0.01, "loss": 1.9821, "step": 44814 }, { "epoch": 4.597558473533033, "grad_norm": 0.05360707640647888, "learning_rate": 0.01, "loss": 2.0084, "step": 44817 }, { "epoch": 4.597866228970045, "grad_norm": 0.08648170530796051, "learning_rate": 0.01, "loss": 2.0006, "step": 44820 }, { "epoch": 4.598173984407058, "grad_norm": 0.056780677288770676, "learning_rate": 0.01, "loss": 2.0159, "step": 44823 }, { "epoch": 4.598481739844071, "grad_norm": 0.032956380397081375, "learning_rate": 0.01, "loss": 2.0065, "step": 44826 }, { "epoch": 4.598789495281084, "grad_norm": 0.04322326183319092, "learning_rate": 0.01, "loss": 1.9687, "step": 44829 }, { "epoch": 4.599097250718096, "grad_norm": 0.10878471285104752, "learning_rate": 0.01, "loss": 2.0089, "step": 44832 }, { "epoch": 4.599405006155108, "grad_norm": 0.041027724742889404, "learning_rate": 0.01, "loss": 1.9751, "step": 44835 }, { "epoch": 4.599712761592121, "grad_norm": 0.09541984647512436, "learning_rate": 0.01, "loss": 1.9968, "step": 44838 }, { "epoch": 4.600020517029134, "grad_norm": 0.0648130476474762, "learning_rate": 0.01, "loss": 2.0191, "step": 44841 }, { "epoch": 4.600328272466147, "grad_norm": 0.10848550498485565, "learning_rate": 0.01, "loss": 2.0216, "step": 44844 }, { "epoch": 4.6006360279031595, "grad_norm": 0.062398817390203476, "learning_rate": 0.01, "loss": 1.9728, "step": 44847 }, { "epoch": 4.600943783340172, "grad_norm": 0.05789678171277046, "learning_rate": 0.01, "loss": 1.999, "step": 44850 }, { "epoch": 4.601251538777185, "grad_norm": 0.03198247775435448, "learning_rate": 0.01, "loss": 1.9765, "step": 44853 }, { "epoch": 4.601559294214198, "grad_norm": 0.05636049434542656, "learning_rate": 0.01, "loss": 1.9965, "step": 44856 }, { "epoch": 4.601867049651211, "grad_norm": 0.07471849024295807, "learning_rate": 0.01, "loss": 1.997, "step": 44859 }, { "epoch": 4.602174805088223, "grad_norm": 0.09839174151420593, "learning_rate": 0.01, "loss": 1.9864, "step": 44862 }, { "epoch": 4.602482560525236, "grad_norm": 0.08253321796655655, "learning_rate": 0.01, "loss": 2.003, "step": 44865 }, { "epoch": 4.602790315962249, "grad_norm": 0.06502492725849152, "learning_rate": 0.01, "loss": 1.9946, "step": 44868 }, { "epoch": 4.603098071399262, "grad_norm": 0.04798365756869316, "learning_rate": 0.01, "loss": 1.9914, "step": 44871 }, { "epoch": 4.6034058268362745, "grad_norm": 0.04243507608771324, "learning_rate": 0.01, "loss": 1.9924, "step": 44874 }, { "epoch": 4.603713582273286, "grad_norm": 0.03410327062010765, "learning_rate": 0.01, "loss": 1.9984, "step": 44877 }, { "epoch": 4.604021337710299, "grad_norm": 0.09597765654325485, "learning_rate": 0.01, "loss": 1.9802, "step": 44880 }, { "epoch": 4.604329093147312, "grad_norm": 0.06745591014623642, "learning_rate": 0.01, "loss": 2.0062, "step": 44883 }, { "epoch": 4.604636848584325, "grad_norm": 0.10422816127538681, "learning_rate": 0.01, "loss": 2.0042, "step": 44886 }, { "epoch": 4.6049446040213375, "grad_norm": 0.05743027105927467, "learning_rate": 0.01, "loss": 2.0116, "step": 44889 }, { "epoch": 4.60525235945835, "grad_norm": 0.04410535469651222, "learning_rate": 0.01, "loss": 1.9713, "step": 44892 }, { "epoch": 4.605560114895363, "grad_norm": 0.07853147387504578, "learning_rate": 0.01, "loss": 1.9984, "step": 44895 }, { "epoch": 4.605867870332376, "grad_norm": 0.056344617158174515, "learning_rate": 0.01, "loss": 2.0026, "step": 44898 }, { "epoch": 4.606175625769389, "grad_norm": 0.045788802206516266, "learning_rate": 0.01, "loss": 2.0154, "step": 44901 }, { "epoch": 4.606483381206401, "grad_norm": 0.07213722914457321, "learning_rate": 0.01, "loss": 1.9746, "step": 44904 }, { "epoch": 4.606791136643414, "grad_norm": 0.08182168006896973, "learning_rate": 0.01, "loss": 1.9972, "step": 44907 }, { "epoch": 4.607098892080427, "grad_norm": 0.06299345195293427, "learning_rate": 0.01, "loss": 2.0124, "step": 44910 }, { "epoch": 4.60740664751744, "grad_norm": 0.046955596655607224, "learning_rate": 0.01, "loss": 1.9883, "step": 44913 }, { "epoch": 4.607714402954453, "grad_norm": 0.05844559147953987, "learning_rate": 0.01, "loss": 1.988, "step": 44916 }, { "epoch": 4.608022158391465, "grad_norm": 0.04892998933792114, "learning_rate": 0.01, "loss": 2.0062, "step": 44919 }, { "epoch": 4.608329913828477, "grad_norm": 0.040492888540029526, "learning_rate": 0.01, "loss": 1.9933, "step": 44922 }, { "epoch": 4.60863766926549, "grad_norm": 0.043649546802043915, "learning_rate": 0.01, "loss": 2.0103, "step": 44925 }, { "epoch": 4.608945424702503, "grad_norm": 0.19346511363983154, "learning_rate": 0.01, "loss": 2.0006, "step": 44928 }, { "epoch": 4.609253180139516, "grad_norm": 0.0724545568227768, "learning_rate": 0.01, "loss": 1.9877, "step": 44931 }, { "epoch": 4.609560935576528, "grad_norm": 0.048261623829603195, "learning_rate": 0.01, "loss": 1.9988, "step": 44934 }, { "epoch": 4.609868691013541, "grad_norm": 0.06115753576159477, "learning_rate": 0.01, "loss": 1.9829, "step": 44937 }, { "epoch": 4.610176446450554, "grad_norm": 0.05957823619246483, "learning_rate": 0.01, "loss": 2.0031, "step": 44940 }, { "epoch": 4.610484201887567, "grad_norm": 0.08261608332395554, "learning_rate": 0.01, "loss": 1.9724, "step": 44943 }, { "epoch": 4.6107919573245795, "grad_norm": 0.04762962833046913, "learning_rate": 0.01, "loss": 1.9538, "step": 44946 }, { "epoch": 4.611099712761592, "grad_norm": 0.06040159985423088, "learning_rate": 0.01, "loss": 1.9893, "step": 44949 }, { "epoch": 4.611407468198605, "grad_norm": 0.06975025683641434, "learning_rate": 0.01, "loss": 2.0012, "step": 44952 }, { "epoch": 4.611715223635618, "grad_norm": 0.06275621056556702, "learning_rate": 0.01, "loss": 1.9817, "step": 44955 }, { "epoch": 4.612022979072631, "grad_norm": 0.04177596792578697, "learning_rate": 0.01, "loss": 1.9904, "step": 44958 }, { "epoch": 4.612330734509643, "grad_norm": 0.05913246050477028, "learning_rate": 0.01, "loss": 1.9898, "step": 44961 }, { "epoch": 4.612638489946656, "grad_norm": 0.05337538942694664, "learning_rate": 0.01, "loss": 2.0009, "step": 44964 }, { "epoch": 4.612946245383668, "grad_norm": 0.04543198645114899, "learning_rate": 0.01, "loss": 1.9813, "step": 44967 }, { "epoch": 4.613254000820681, "grad_norm": 0.05056144669651985, "learning_rate": 0.01, "loss": 1.9628, "step": 44970 }, { "epoch": 4.613561756257694, "grad_norm": 0.2109542340040207, "learning_rate": 0.01, "loss": 1.9783, "step": 44973 }, { "epoch": 4.613869511694706, "grad_norm": 0.11741621792316437, "learning_rate": 0.01, "loss": 1.9883, "step": 44976 }, { "epoch": 4.614177267131719, "grad_norm": 0.06218573451042175, "learning_rate": 0.01, "loss": 1.9846, "step": 44979 }, { "epoch": 4.614485022568732, "grad_norm": 0.04318585991859436, "learning_rate": 0.01, "loss": 1.9903, "step": 44982 }, { "epoch": 4.614792778005745, "grad_norm": 0.05122276023030281, "learning_rate": 0.01, "loss": 2.0001, "step": 44985 }, { "epoch": 4.6151005334427575, "grad_norm": 0.06790675967931747, "learning_rate": 0.01, "loss": 1.9929, "step": 44988 }, { "epoch": 4.61540828887977, "grad_norm": 0.06424128264188766, "learning_rate": 0.01, "loss": 1.9874, "step": 44991 }, { "epoch": 4.615716044316783, "grad_norm": 0.055609673261642456, "learning_rate": 0.01, "loss": 1.9943, "step": 44994 }, { "epoch": 4.616023799753796, "grad_norm": 0.04479989409446716, "learning_rate": 0.01, "loss": 1.9931, "step": 44997 }, { "epoch": 4.616331555190809, "grad_norm": 0.046773526817560196, "learning_rate": 0.01, "loss": 1.9935, "step": 45000 }, { "epoch": 4.616639310627821, "grad_norm": 0.044277340173721313, "learning_rate": 0.01, "loss": 2.0009, "step": 45003 }, { "epoch": 4.616947066064834, "grad_norm": 0.06604879349470139, "learning_rate": 0.01, "loss": 2.0009, "step": 45006 }, { "epoch": 4.617254821501847, "grad_norm": 0.07454725354909897, "learning_rate": 0.01, "loss": 2.0105, "step": 45009 }, { "epoch": 4.617562576938859, "grad_norm": 0.09837469458580017, "learning_rate": 0.01, "loss": 1.998, "step": 45012 }, { "epoch": 4.617870332375872, "grad_norm": 0.12640568614006042, "learning_rate": 0.01, "loss": 1.9884, "step": 45015 }, { "epoch": 4.618178087812884, "grad_norm": 0.06357913464307785, "learning_rate": 0.01, "loss": 1.9416, "step": 45018 }, { "epoch": 4.618485843249897, "grad_norm": 0.05319363251328468, "learning_rate": 0.01, "loss": 2.001, "step": 45021 }, { "epoch": 4.61879359868691, "grad_norm": 0.05755499005317688, "learning_rate": 0.01, "loss": 1.9872, "step": 45024 }, { "epoch": 4.619101354123923, "grad_norm": 0.03921099379658699, "learning_rate": 0.01, "loss": 2.007, "step": 45027 }, { "epoch": 4.6194091095609355, "grad_norm": 0.062433432787656784, "learning_rate": 0.01, "loss": 1.9991, "step": 45030 }, { "epoch": 4.619716864997948, "grad_norm": 0.048542365431785583, "learning_rate": 0.01, "loss": 1.934, "step": 45033 }, { "epoch": 4.620024620434961, "grad_norm": 0.12152877449989319, "learning_rate": 0.01, "loss": 2.0085, "step": 45036 }, { "epoch": 4.620332375871974, "grad_norm": 0.052054520696401596, "learning_rate": 0.01, "loss": 1.9778, "step": 45039 }, { "epoch": 4.620640131308987, "grad_norm": 0.11336644738912582, "learning_rate": 0.01, "loss": 2.0224, "step": 45042 }, { "epoch": 4.620947886745999, "grad_norm": 0.05044134706258774, "learning_rate": 0.01, "loss": 1.9957, "step": 45045 }, { "epoch": 4.621255642183012, "grad_norm": 0.06728419661521912, "learning_rate": 0.01, "loss": 1.9987, "step": 45048 }, { "epoch": 4.621563397620025, "grad_norm": 0.0826968103647232, "learning_rate": 0.01, "loss": 1.989, "step": 45051 }, { "epoch": 4.621871153057038, "grad_norm": 0.07135532796382904, "learning_rate": 0.01, "loss": 1.9807, "step": 45054 }, { "epoch": 4.62217890849405, "grad_norm": 0.05112279951572418, "learning_rate": 0.01, "loss": 1.9866, "step": 45057 }, { "epoch": 4.6224866639310624, "grad_norm": 0.11058257520198822, "learning_rate": 0.01, "loss": 2.0042, "step": 45060 }, { "epoch": 4.622794419368075, "grad_norm": 0.12000041455030441, "learning_rate": 0.01, "loss": 1.9503, "step": 45063 }, { "epoch": 4.623102174805088, "grad_norm": 0.06073766201734543, "learning_rate": 0.01, "loss": 1.9722, "step": 45066 }, { "epoch": 4.623409930242101, "grad_norm": 0.055321842432022095, "learning_rate": 0.01, "loss": 1.9971, "step": 45069 }, { "epoch": 4.623717685679114, "grad_norm": 0.04220004379749298, "learning_rate": 0.01, "loss": 1.9731, "step": 45072 }, { "epoch": 4.624025441116126, "grad_norm": 0.03700163960456848, "learning_rate": 0.01, "loss": 1.9781, "step": 45075 }, { "epoch": 4.624333196553139, "grad_norm": 0.108041912317276, "learning_rate": 0.01, "loss": 1.9753, "step": 45078 }, { "epoch": 4.624640951990152, "grad_norm": 0.04682699963450432, "learning_rate": 0.01, "loss": 1.9974, "step": 45081 }, { "epoch": 4.624948707427165, "grad_norm": 0.07901785522699356, "learning_rate": 0.01, "loss": 1.9806, "step": 45084 }, { "epoch": 4.6252564628641775, "grad_norm": 0.07119090855121613, "learning_rate": 0.01, "loss": 1.9928, "step": 45087 }, { "epoch": 4.62556421830119, "grad_norm": 0.08904542028903961, "learning_rate": 0.01, "loss": 2.0217, "step": 45090 }, { "epoch": 4.625871973738203, "grad_norm": 0.06656675785779953, "learning_rate": 0.01, "loss": 1.9986, "step": 45093 }, { "epoch": 4.626179729175216, "grad_norm": 0.06560304015874863, "learning_rate": 0.01, "loss": 1.9716, "step": 45096 }, { "epoch": 4.626487484612229, "grad_norm": 0.05664901062846184, "learning_rate": 0.01, "loss": 1.9716, "step": 45099 }, { "epoch": 4.6267952400492405, "grad_norm": 0.0491751953959465, "learning_rate": 0.01, "loss": 1.986, "step": 45102 }, { "epoch": 4.627102995486253, "grad_norm": 0.11787789314985275, "learning_rate": 0.01, "loss": 1.9853, "step": 45105 }, { "epoch": 4.627410750923266, "grad_norm": 0.04068392515182495, "learning_rate": 0.01, "loss": 1.9926, "step": 45108 }, { "epoch": 4.627718506360279, "grad_norm": 0.03958519548177719, "learning_rate": 0.01, "loss": 1.9939, "step": 45111 }, { "epoch": 4.628026261797292, "grad_norm": 0.05667821317911148, "learning_rate": 0.01, "loss": 2.0201, "step": 45114 }, { "epoch": 4.628334017234304, "grad_norm": 0.04660872370004654, "learning_rate": 0.01, "loss": 1.9892, "step": 45117 }, { "epoch": 4.628641772671317, "grad_norm": 0.05565042048692703, "learning_rate": 0.01, "loss": 1.9992, "step": 45120 }, { "epoch": 4.62894952810833, "grad_norm": 0.05158378556370735, "learning_rate": 0.01, "loss": 1.9672, "step": 45123 }, { "epoch": 4.629257283545343, "grad_norm": 0.04500264301896095, "learning_rate": 0.01, "loss": 1.9597, "step": 45126 }, { "epoch": 4.6295650389823555, "grad_norm": 0.03781789168715477, "learning_rate": 0.01, "loss": 1.9804, "step": 45129 }, { "epoch": 4.629872794419368, "grad_norm": 0.09032827615737915, "learning_rate": 0.01, "loss": 1.9739, "step": 45132 }, { "epoch": 4.630180549856381, "grad_norm": 0.09161376953125, "learning_rate": 0.01, "loss": 1.9864, "step": 45135 }, { "epoch": 4.630488305293394, "grad_norm": 0.07650554925203323, "learning_rate": 0.01, "loss": 1.9775, "step": 45138 }, { "epoch": 4.630796060730407, "grad_norm": 0.07098524272441864, "learning_rate": 0.01, "loss": 2.0106, "step": 45141 }, { "epoch": 4.631103816167419, "grad_norm": 0.053486138582229614, "learning_rate": 0.01, "loss": 1.9687, "step": 45144 }, { "epoch": 4.631411571604431, "grad_norm": 0.06696584820747375, "learning_rate": 0.01, "loss": 1.9744, "step": 45147 }, { "epoch": 4.631719327041444, "grad_norm": 0.08330658078193665, "learning_rate": 0.01, "loss": 2.0043, "step": 45150 }, { "epoch": 4.632027082478457, "grad_norm": 0.05537624657154083, "learning_rate": 0.01, "loss": 1.99, "step": 45153 }, { "epoch": 4.63233483791547, "grad_norm": 0.07297627627849579, "learning_rate": 0.01, "loss": 1.9885, "step": 45156 }, { "epoch": 4.632642593352482, "grad_norm": 0.07492797076702118, "learning_rate": 0.01, "loss": 1.976, "step": 45159 }, { "epoch": 4.632950348789495, "grad_norm": 0.06871726363897324, "learning_rate": 0.01, "loss": 1.9877, "step": 45162 }, { "epoch": 4.633258104226508, "grad_norm": 0.04638645797967911, "learning_rate": 0.01, "loss": 1.9722, "step": 45165 }, { "epoch": 4.633565859663521, "grad_norm": 0.10735375434160233, "learning_rate": 0.01, "loss": 2.0048, "step": 45168 }, { "epoch": 4.6338736151005335, "grad_norm": 0.1550443023443222, "learning_rate": 0.01, "loss": 1.9663, "step": 45171 }, { "epoch": 4.634181370537546, "grad_norm": 0.14652806520462036, "learning_rate": 0.01, "loss": 2.0133, "step": 45174 }, { "epoch": 4.634489125974559, "grad_norm": 0.06344394385814667, "learning_rate": 0.01, "loss": 2.0081, "step": 45177 }, { "epoch": 4.634796881411572, "grad_norm": 0.03594981133937836, "learning_rate": 0.01, "loss": 2.0124, "step": 45180 }, { "epoch": 4.635104636848585, "grad_norm": 0.034205902367830276, "learning_rate": 0.01, "loss": 1.9808, "step": 45183 }, { "epoch": 4.635412392285597, "grad_norm": 0.041966572403907776, "learning_rate": 0.01, "loss": 1.9958, "step": 45186 }, { "epoch": 4.63572014772261, "grad_norm": 0.05192418769001961, "learning_rate": 0.01, "loss": 2.0068, "step": 45189 }, { "epoch": 4.636027903159622, "grad_norm": 0.04771101847290993, "learning_rate": 0.01, "loss": 1.9812, "step": 45192 }, { "epoch": 4.636335658596635, "grad_norm": 0.05367758497595787, "learning_rate": 0.01, "loss": 1.9885, "step": 45195 }, { "epoch": 4.636643414033648, "grad_norm": 0.08973217755556107, "learning_rate": 0.01, "loss": 2.0129, "step": 45198 }, { "epoch": 4.63695116947066, "grad_norm": 0.06793297082185745, "learning_rate": 0.01, "loss": 1.9893, "step": 45201 }, { "epoch": 4.637258924907673, "grad_norm": 0.06299318373203278, "learning_rate": 0.01, "loss": 2.0075, "step": 45204 }, { "epoch": 4.637566680344686, "grad_norm": 0.106409452855587, "learning_rate": 0.01, "loss": 1.9757, "step": 45207 }, { "epoch": 4.637874435781699, "grad_norm": 0.044573575258255005, "learning_rate": 0.01, "loss": 1.9644, "step": 45210 }, { "epoch": 4.638182191218712, "grad_norm": 0.04855922982096672, "learning_rate": 0.01, "loss": 1.9863, "step": 45213 }, { "epoch": 4.638489946655724, "grad_norm": 0.05310840159654617, "learning_rate": 0.01, "loss": 2.0067, "step": 45216 }, { "epoch": 4.638797702092737, "grad_norm": 0.1408100426197052, "learning_rate": 0.01, "loss": 2.016, "step": 45219 }, { "epoch": 4.63910545752975, "grad_norm": 0.07140195369720459, "learning_rate": 0.01, "loss": 1.9978, "step": 45222 }, { "epoch": 4.639413212966763, "grad_norm": 0.0864189863204956, "learning_rate": 0.01, "loss": 2.0122, "step": 45225 }, { "epoch": 4.6397209684037755, "grad_norm": 0.0877571702003479, "learning_rate": 0.01, "loss": 1.9844, "step": 45228 }, { "epoch": 4.640028723840788, "grad_norm": 0.06890790909528732, "learning_rate": 0.01, "loss": 1.9915, "step": 45231 }, { "epoch": 4.640336479277801, "grad_norm": 0.04007607325911522, "learning_rate": 0.01, "loss": 1.9682, "step": 45234 }, { "epoch": 4.640644234714813, "grad_norm": 0.09311459213495255, "learning_rate": 0.01, "loss": 1.9888, "step": 45237 }, { "epoch": 4.640951990151826, "grad_norm": 0.0721447765827179, "learning_rate": 0.01, "loss": 1.954, "step": 45240 }, { "epoch": 4.6412597455888385, "grad_norm": 0.07306293398141861, "learning_rate": 0.01, "loss": 2.02, "step": 45243 }, { "epoch": 4.641567501025851, "grad_norm": 0.044194191694259644, "learning_rate": 0.01, "loss": 1.9821, "step": 45246 }, { "epoch": 4.641875256462864, "grad_norm": 0.07482686638832092, "learning_rate": 0.01, "loss": 1.9969, "step": 45249 }, { "epoch": 4.642183011899877, "grad_norm": 0.05982416495680809, "learning_rate": 0.01, "loss": 1.9808, "step": 45252 }, { "epoch": 4.64249076733689, "grad_norm": 0.06082857400178909, "learning_rate": 0.01, "loss": 1.9854, "step": 45255 }, { "epoch": 4.642798522773902, "grad_norm": 0.12249299138784409, "learning_rate": 0.01, "loss": 1.9866, "step": 45258 }, { "epoch": 4.643106278210915, "grad_norm": 0.04660319164395332, "learning_rate": 0.01, "loss": 1.9974, "step": 45261 }, { "epoch": 4.643414033647928, "grad_norm": 0.13198639452457428, "learning_rate": 0.01, "loss": 2.0052, "step": 45264 }, { "epoch": 4.643721789084941, "grad_norm": 0.11743400990962982, "learning_rate": 0.01, "loss": 1.9944, "step": 45267 }, { "epoch": 4.6440295445219535, "grad_norm": 0.08904967457056046, "learning_rate": 0.01, "loss": 1.9585, "step": 45270 }, { "epoch": 4.644337299958966, "grad_norm": 0.051379457116127014, "learning_rate": 0.01, "loss": 1.9859, "step": 45273 }, { "epoch": 4.644645055395979, "grad_norm": 0.053696874529123306, "learning_rate": 0.01, "loss": 1.9972, "step": 45276 }, { "epoch": 4.644952810832992, "grad_norm": 0.03924264386296272, "learning_rate": 0.01, "loss": 1.9757, "step": 45279 }, { "epoch": 4.645260566270004, "grad_norm": 0.04208714887499809, "learning_rate": 0.01, "loss": 1.9968, "step": 45282 }, { "epoch": 4.6455683217070165, "grad_norm": 0.11654502153396606, "learning_rate": 0.01, "loss": 2.0116, "step": 45285 }, { "epoch": 4.645876077144029, "grad_norm": 0.06177401542663574, "learning_rate": 0.01, "loss": 1.9877, "step": 45288 }, { "epoch": 4.646183832581042, "grad_norm": 0.04343295469880104, "learning_rate": 0.01, "loss": 2.0025, "step": 45291 }, { "epoch": 4.646491588018055, "grad_norm": 0.036727067083120346, "learning_rate": 0.01, "loss": 1.9948, "step": 45294 }, { "epoch": 4.646799343455068, "grad_norm": 0.06670525670051575, "learning_rate": 0.01, "loss": 2.0094, "step": 45297 }, { "epoch": 4.64710709889208, "grad_norm": 0.08610829710960388, "learning_rate": 0.01, "loss": 1.9921, "step": 45300 }, { "epoch": 4.647414854329093, "grad_norm": 0.04402744024991989, "learning_rate": 0.01, "loss": 2.0245, "step": 45303 }, { "epoch": 4.647722609766106, "grad_norm": 0.09110675007104874, "learning_rate": 0.01, "loss": 1.9827, "step": 45306 }, { "epoch": 4.648030365203119, "grad_norm": 0.12931224703788757, "learning_rate": 0.01, "loss": 2.0143, "step": 45309 }, { "epoch": 4.6483381206401315, "grad_norm": 0.03825344517827034, "learning_rate": 0.01, "loss": 2.0175, "step": 45312 }, { "epoch": 4.648645876077144, "grad_norm": 0.04855644330382347, "learning_rate": 0.01, "loss": 1.992, "step": 45315 }, { "epoch": 4.648953631514157, "grad_norm": 0.08731474727392197, "learning_rate": 0.01, "loss": 2.0059, "step": 45318 }, { "epoch": 4.649261386951169, "grad_norm": 0.06591195613145828, "learning_rate": 0.01, "loss": 2.0107, "step": 45321 }, { "epoch": 4.649569142388183, "grad_norm": 0.051368821412324905, "learning_rate": 0.01, "loss": 2.0055, "step": 45324 }, { "epoch": 4.6498768978251945, "grad_norm": 0.09959837794303894, "learning_rate": 0.01, "loss": 2.0111, "step": 45327 }, { "epoch": 4.650184653262207, "grad_norm": 0.08669820427894592, "learning_rate": 0.01, "loss": 2.0138, "step": 45330 }, { "epoch": 4.65049240869922, "grad_norm": 0.03989667445421219, "learning_rate": 0.01, "loss": 1.996, "step": 45333 }, { "epoch": 4.650800164136233, "grad_norm": 0.033241745084524155, "learning_rate": 0.01, "loss": 2.0052, "step": 45336 }, { "epoch": 4.651107919573246, "grad_norm": 0.03395982086658478, "learning_rate": 0.01, "loss": 1.9674, "step": 45339 }, { "epoch": 4.651415675010258, "grad_norm": 0.038975682109594345, "learning_rate": 0.01, "loss": 1.9894, "step": 45342 }, { "epoch": 4.651723430447271, "grad_norm": 0.04904354363679886, "learning_rate": 0.01, "loss": 1.9875, "step": 45345 }, { "epoch": 4.652031185884284, "grad_norm": 0.11245663464069366, "learning_rate": 0.01, "loss": 1.9929, "step": 45348 }, { "epoch": 4.652338941321297, "grad_norm": 0.11784044653177261, "learning_rate": 0.01, "loss": 1.9836, "step": 45351 }, { "epoch": 4.6526466967583096, "grad_norm": 0.054707154631614685, "learning_rate": 0.01, "loss": 2.0161, "step": 45354 }, { "epoch": 4.652954452195322, "grad_norm": 0.07143942266702652, "learning_rate": 0.01, "loss": 1.9921, "step": 45357 }, { "epoch": 4.653262207632335, "grad_norm": 0.05629519745707512, "learning_rate": 0.01, "loss": 1.9809, "step": 45360 }, { "epoch": 4.653569963069348, "grad_norm": 0.03609561547636986, "learning_rate": 0.01, "loss": 1.9773, "step": 45363 }, { "epoch": 4.65387771850636, "grad_norm": 0.049254827201366425, "learning_rate": 0.01, "loss": 1.9866, "step": 45366 }, { "epoch": 4.6541854739433735, "grad_norm": 0.049836140125989914, "learning_rate": 0.01, "loss": 2.0302, "step": 45369 }, { "epoch": 4.654493229380385, "grad_norm": 0.043429117649793625, "learning_rate": 0.01, "loss": 2.0064, "step": 45372 }, { "epoch": 4.654800984817398, "grad_norm": 0.09842117130756378, "learning_rate": 0.01, "loss": 1.9911, "step": 45375 }, { "epoch": 4.655108740254411, "grad_norm": 0.06700047105550766, "learning_rate": 0.01, "loss": 1.9877, "step": 45378 }, { "epoch": 4.655416495691424, "grad_norm": 0.0717921331524849, "learning_rate": 0.01, "loss": 1.9809, "step": 45381 }, { "epoch": 4.6557242511284365, "grad_norm": 0.11065293103456497, "learning_rate": 0.01, "loss": 1.9845, "step": 45384 }, { "epoch": 4.656032006565449, "grad_norm": 0.07875839620828629, "learning_rate": 0.01, "loss": 1.9864, "step": 45387 }, { "epoch": 4.656339762002462, "grad_norm": 0.07618141919374466, "learning_rate": 0.01, "loss": 1.9818, "step": 45390 }, { "epoch": 4.656647517439475, "grad_norm": 0.05607747659087181, "learning_rate": 0.01, "loss": 2.007, "step": 45393 }, { "epoch": 4.656955272876488, "grad_norm": 0.08270974457263947, "learning_rate": 0.01, "loss": 2.0041, "step": 45396 }, { "epoch": 4.6572630283135, "grad_norm": 0.10751419514417648, "learning_rate": 0.01, "loss": 1.9982, "step": 45399 }, { "epoch": 4.657570783750513, "grad_norm": 0.07406135648488998, "learning_rate": 0.01, "loss": 1.9857, "step": 45402 }, { "epoch": 4.657878539187526, "grad_norm": 0.06641979515552521, "learning_rate": 0.01, "loss": 2.0221, "step": 45405 }, { "epoch": 4.658186294624539, "grad_norm": 0.04510427638888359, "learning_rate": 0.01, "loss": 1.9863, "step": 45408 }, { "epoch": 4.658494050061551, "grad_norm": 0.0381624698638916, "learning_rate": 0.01, "loss": 2.013, "step": 45411 }, { "epoch": 4.658801805498564, "grad_norm": 0.08912669867277145, "learning_rate": 0.01, "loss": 1.9966, "step": 45414 }, { "epoch": 4.659109560935576, "grad_norm": 0.04501614347100258, "learning_rate": 0.01, "loss": 1.9865, "step": 45417 }, { "epoch": 4.659417316372589, "grad_norm": 0.10741306841373444, "learning_rate": 0.01, "loss": 1.9793, "step": 45420 }, { "epoch": 4.659725071809602, "grad_norm": 0.05976792797446251, "learning_rate": 0.01, "loss": 2.0269, "step": 45423 }, { "epoch": 4.6600328272466145, "grad_norm": 0.09859906136989594, "learning_rate": 0.01, "loss": 2.0081, "step": 45426 }, { "epoch": 4.660340582683627, "grad_norm": 0.050599753856658936, "learning_rate": 0.01, "loss": 1.9862, "step": 45429 }, { "epoch": 4.66064833812064, "grad_norm": 0.04848974943161011, "learning_rate": 0.01, "loss": 2.0066, "step": 45432 }, { "epoch": 4.660956093557653, "grad_norm": 0.07970554381608963, "learning_rate": 0.01, "loss": 2.0102, "step": 45435 }, { "epoch": 4.661263848994666, "grad_norm": 0.06040119752287865, "learning_rate": 0.01, "loss": 2.001, "step": 45438 }, { "epoch": 4.661571604431678, "grad_norm": 0.05686302110552788, "learning_rate": 0.01, "loss": 1.9845, "step": 45441 }, { "epoch": 4.661879359868691, "grad_norm": 0.0757538229227066, "learning_rate": 0.01, "loss": 2.0019, "step": 45444 }, { "epoch": 4.662187115305704, "grad_norm": 0.07825610786676407, "learning_rate": 0.01, "loss": 2.0211, "step": 45447 }, { "epoch": 4.662494870742717, "grad_norm": 0.048520684242248535, "learning_rate": 0.01, "loss": 1.9991, "step": 45450 }, { "epoch": 4.6628026261797295, "grad_norm": 0.046711694449186325, "learning_rate": 0.01, "loss": 2.0003, "step": 45453 }, { "epoch": 4.663110381616741, "grad_norm": 0.058854326605796814, "learning_rate": 0.01, "loss": 1.9781, "step": 45456 }, { "epoch": 4.663418137053755, "grad_norm": 0.04749085754156113, "learning_rate": 0.01, "loss": 1.9882, "step": 45459 }, { "epoch": 4.663725892490767, "grad_norm": 0.05710771679878235, "learning_rate": 0.01, "loss": 1.9909, "step": 45462 }, { "epoch": 4.66403364792778, "grad_norm": 0.10407797247171402, "learning_rate": 0.01, "loss": 1.9993, "step": 45465 }, { "epoch": 4.6643414033647925, "grad_norm": 0.07726982980966568, "learning_rate": 0.01, "loss": 1.9727, "step": 45468 }, { "epoch": 4.664649158801805, "grad_norm": 0.11478970944881439, "learning_rate": 0.01, "loss": 2.0055, "step": 45471 }, { "epoch": 4.664956914238818, "grad_norm": 0.1078820750117302, "learning_rate": 0.01, "loss": 1.9855, "step": 45474 }, { "epoch": 4.665264669675831, "grad_norm": 0.06113171949982643, "learning_rate": 0.01, "loss": 1.9661, "step": 45477 }, { "epoch": 4.665572425112844, "grad_norm": 0.04639894515275955, "learning_rate": 0.01, "loss": 1.9616, "step": 45480 }, { "epoch": 4.665880180549856, "grad_norm": 0.04308944195508957, "learning_rate": 0.01, "loss": 1.9689, "step": 45483 }, { "epoch": 4.666187935986869, "grad_norm": 0.036519356071949005, "learning_rate": 0.01, "loss": 2.0034, "step": 45486 }, { "epoch": 4.666495691423882, "grad_norm": 0.04407155141234398, "learning_rate": 0.01, "loss": 1.9773, "step": 45489 }, { "epoch": 4.666803446860895, "grad_norm": 0.03955180197954178, "learning_rate": 0.01, "loss": 1.9971, "step": 45492 }, { "epoch": 4.6671112022979075, "grad_norm": 0.06779368221759796, "learning_rate": 0.01, "loss": 2.0009, "step": 45495 }, { "epoch": 4.66741895773492, "grad_norm": 0.11751384288072586, "learning_rate": 0.01, "loss": 2.0014, "step": 45498 }, { "epoch": 4.667726713171932, "grad_norm": 0.05809053033590317, "learning_rate": 0.01, "loss": 1.9988, "step": 45501 }, { "epoch": 4.668034468608946, "grad_norm": 0.07685140520334244, "learning_rate": 0.01, "loss": 1.9825, "step": 45504 }, { "epoch": 4.668342224045958, "grad_norm": 0.055087387561798096, "learning_rate": 0.01, "loss": 2.0133, "step": 45507 }, { "epoch": 4.668649979482971, "grad_norm": 0.05234852805733681, "learning_rate": 0.01, "loss": 1.997, "step": 45510 }, { "epoch": 4.668957734919983, "grad_norm": 0.048037078231573105, "learning_rate": 0.01, "loss": 1.9717, "step": 45513 }, { "epoch": 4.669265490356996, "grad_norm": 0.03744732588529587, "learning_rate": 0.01, "loss": 2.0247, "step": 45516 }, { "epoch": 4.669573245794009, "grad_norm": 0.053839731961488724, "learning_rate": 0.01, "loss": 1.9802, "step": 45519 }, { "epoch": 4.669881001231022, "grad_norm": 0.06782069802284241, "learning_rate": 0.01, "loss": 2.003, "step": 45522 }, { "epoch": 4.6701887566680345, "grad_norm": 0.05053957551717758, "learning_rate": 0.01, "loss": 2.004, "step": 45525 }, { "epoch": 4.670496512105047, "grad_norm": 0.07486964762210846, "learning_rate": 0.01, "loss": 2.0026, "step": 45528 }, { "epoch": 4.67080426754206, "grad_norm": 0.07367604970932007, "learning_rate": 0.01, "loss": 1.9788, "step": 45531 }, { "epoch": 4.671112022979073, "grad_norm": 0.06390465050935745, "learning_rate": 0.01, "loss": 1.981, "step": 45534 }, { "epoch": 4.671419778416086, "grad_norm": 0.1271253079175949, "learning_rate": 0.01, "loss": 1.9969, "step": 45537 }, { "epoch": 4.671727533853098, "grad_norm": 0.08515705168247223, "learning_rate": 0.01, "loss": 2.0074, "step": 45540 }, { "epoch": 4.672035289290111, "grad_norm": 0.03478894755244255, "learning_rate": 0.01, "loss": 1.9899, "step": 45543 }, { "epoch": 4.672343044727123, "grad_norm": 0.051871661096811295, "learning_rate": 0.01, "loss": 2.0114, "step": 45546 }, { "epoch": 4.672650800164137, "grad_norm": 0.0924333781003952, "learning_rate": 0.01, "loss": 1.9917, "step": 45549 }, { "epoch": 4.672958555601149, "grad_norm": 0.13955476880073547, "learning_rate": 0.01, "loss": 1.9923, "step": 45552 }, { "epoch": 4.673266311038161, "grad_norm": 0.07893053442239761, "learning_rate": 0.01, "loss": 1.9826, "step": 45555 }, { "epoch": 4.673574066475174, "grad_norm": 0.048040106892585754, "learning_rate": 0.01, "loss": 2.0011, "step": 45558 }, { "epoch": 4.673881821912187, "grad_norm": 0.05462590605020523, "learning_rate": 0.01, "loss": 1.9969, "step": 45561 }, { "epoch": 4.6741895773492, "grad_norm": 0.07224661111831665, "learning_rate": 0.01, "loss": 1.9815, "step": 45564 }, { "epoch": 4.6744973327862125, "grad_norm": 0.09311232715845108, "learning_rate": 0.01, "loss": 1.9797, "step": 45567 }, { "epoch": 4.674805088223225, "grad_norm": 0.051444876939058304, "learning_rate": 0.01, "loss": 1.9804, "step": 45570 }, { "epoch": 4.675112843660238, "grad_norm": 0.04749058559536934, "learning_rate": 0.01, "loss": 2.0038, "step": 45573 }, { "epoch": 4.675420599097251, "grad_norm": 0.04723438620567322, "learning_rate": 0.01, "loss": 2.0038, "step": 45576 }, { "epoch": 4.675728354534264, "grad_norm": 0.10198043286800385, "learning_rate": 0.01, "loss": 1.9845, "step": 45579 }, { "epoch": 4.676036109971276, "grad_norm": 0.07877392321825027, "learning_rate": 0.01, "loss": 1.9752, "step": 45582 }, { "epoch": 4.676343865408289, "grad_norm": 0.10339850187301636, "learning_rate": 0.01, "loss": 1.9716, "step": 45585 }, { "epoch": 4.676651620845302, "grad_norm": 0.07544226199388504, "learning_rate": 0.01, "loss": 2.0023, "step": 45588 }, { "epoch": 4.676959376282314, "grad_norm": 0.06043079495429993, "learning_rate": 0.01, "loss": 2.0105, "step": 45591 }, { "epoch": 4.6772671317193275, "grad_norm": 0.06057659909129143, "learning_rate": 0.01, "loss": 1.9932, "step": 45594 }, { "epoch": 4.677574887156339, "grad_norm": 0.08405612409114838, "learning_rate": 0.01, "loss": 1.9765, "step": 45597 }, { "epoch": 4.677882642593352, "grad_norm": 0.048926811665296555, "learning_rate": 0.01, "loss": 1.9958, "step": 45600 }, { "epoch": 4.678190398030365, "grad_norm": 0.05887090042233467, "learning_rate": 0.01, "loss": 1.9904, "step": 45603 }, { "epoch": 4.678498153467378, "grad_norm": 0.04116439446806908, "learning_rate": 0.01, "loss": 2.0309, "step": 45606 }, { "epoch": 4.6788059089043905, "grad_norm": 0.04675902798771858, "learning_rate": 0.01, "loss": 1.9901, "step": 45609 }, { "epoch": 4.679113664341403, "grad_norm": 0.036510296165943146, "learning_rate": 0.01, "loss": 1.986, "step": 45612 }, { "epoch": 4.679421419778416, "grad_norm": 0.06395171582698822, "learning_rate": 0.01, "loss": 1.9632, "step": 45615 }, { "epoch": 4.679729175215429, "grad_norm": 0.09183214604854584, "learning_rate": 0.01, "loss": 1.9844, "step": 45618 }, { "epoch": 4.680036930652442, "grad_norm": 0.03406717628240585, "learning_rate": 0.01, "loss": 1.9838, "step": 45621 }, { "epoch": 4.680344686089454, "grad_norm": 0.11709492653608322, "learning_rate": 0.01, "loss": 1.9687, "step": 45624 }, { "epoch": 4.680652441526467, "grad_norm": 0.054468922317028046, "learning_rate": 0.01, "loss": 1.9965, "step": 45627 }, { "epoch": 4.68096019696348, "grad_norm": 0.07991788536310196, "learning_rate": 0.01, "loss": 2.0017, "step": 45630 }, { "epoch": 4.681267952400493, "grad_norm": 0.046350929886102676, "learning_rate": 0.01, "loss": 1.9873, "step": 45633 }, { "epoch": 4.681575707837505, "grad_norm": 0.09740296006202698, "learning_rate": 0.01, "loss": 1.9912, "step": 45636 }, { "epoch": 4.681883463274518, "grad_norm": 0.0619819350540638, "learning_rate": 0.01, "loss": 2.0134, "step": 45639 }, { "epoch": 4.68219121871153, "grad_norm": 0.04457273334264755, "learning_rate": 0.01, "loss": 1.9918, "step": 45642 }, { "epoch": 4.682498974148543, "grad_norm": 0.07806507498025894, "learning_rate": 0.01, "loss": 1.9996, "step": 45645 }, { "epoch": 4.682806729585556, "grad_norm": 0.07045695930719376, "learning_rate": 0.01, "loss": 1.9979, "step": 45648 }, { "epoch": 4.6831144850225686, "grad_norm": 0.0896361917257309, "learning_rate": 0.01, "loss": 1.988, "step": 45651 }, { "epoch": 4.683422240459581, "grad_norm": 0.04496191442012787, "learning_rate": 0.01, "loss": 2.0188, "step": 45654 }, { "epoch": 4.683729995896594, "grad_norm": 0.041538260877132416, "learning_rate": 0.01, "loss": 2.0357, "step": 45657 }, { "epoch": 4.684037751333607, "grad_norm": 0.07717595249414444, "learning_rate": 0.01, "loss": 1.977, "step": 45660 }, { "epoch": 4.68434550677062, "grad_norm": 0.046603985130786896, "learning_rate": 0.01, "loss": 1.9853, "step": 45663 }, { "epoch": 4.6846532622076325, "grad_norm": 0.0826171487569809, "learning_rate": 0.01, "loss": 1.9716, "step": 45666 }, { "epoch": 4.684961017644645, "grad_norm": 0.09356506168842316, "learning_rate": 0.01, "loss": 1.9932, "step": 45669 }, { "epoch": 4.685268773081658, "grad_norm": 0.04961970075964928, "learning_rate": 0.01, "loss": 1.9813, "step": 45672 }, { "epoch": 4.685576528518671, "grad_norm": 0.08724430203437805, "learning_rate": 0.01, "loss": 2.0075, "step": 45675 }, { "epoch": 4.685884283955684, "grad_norm": 0.093470498919487, "learning_rate": 0.01, "loss": 1.995, "step": 45678 }, { "epoch": 4.6861920393926955, "grad_norm": 0.04148003086447716, "learning_rate": 0.01, "loss": 2.0063, "step": 45681 }, { "epoch": 4.686499794829709, "grad_norm": 0.04073407128453255, "learning_rate": 0.01, "loss": 1.9729, "step": 45684 }, { "epoch": 4.686807550266721, "grad_norm": 0.06185337156057358, "learning_rate": 0.01, "loss": 2.0155, "step": 45687 }, { "epoch": 4.687115305703734, "grad_norm": 0.06323441118001938, "learning_rate": 0.01, "loss": 2.0087, "step": 45690 }, { "epoch": 4.687423061140747, "grad_norm": 0.06550868600606918, "learning_rate": 0.01, "loss": 1.9642, "step": 45693 }, { "epoch": 4.687730816577759, "grad_norm": 0.10702592879533768, "learning_rate": 0.01, "loss": 1.9947, "step": 45696 }, { "epoch": 4.688038572014772, "grad_norm": 0.07939399778842926, "learning_rate": 0.01, "loss": 2.0218, "step": 45699 }, { "epoch": 4.688346327451785, "grad_norm": 0.09626048058271408, "learning_rate": 0.01, "loss": 1.9873, "step": 45702 }, { "epoch": 4.688654082888798, "grad_norm": 0.10478439927101135, "learning_rate": 0.01, "loss": 1.9968, "step": 45705 }, { "epoch": 4.6889618383258105, "grad_norm": 0.06496943533420563, "learning_rate": 0.01, "loss": 2.0107, "step": 45708 }, { "epoch": 4.689269593762823, "grad_norm": 0.060445625334978104, "learning_rate": 0.01, "loss": 1.9732, "step": 45711 }, { "epoch": 4.689577349199836, "grad_norm": 0.050236016511917114, "learning_rate": 0.01, "loss": 2.0052, "step": 45714 }, { "epoch": 4.689885104636849, "grad_norm": 0.07051600515842438, "learning_rate": 0.01, "loss": 1.9856, "step": 45717 }, { "epoch": 4.690192860073862, "grad_norm": 0.14171123504638672, "learning_rate": 0.01, "loss": 1.9885, "step": 45720 }, { "epoch": 4.690500615510874, "grad_norm": 0.04662410169839859, "learning_rate": 0.01, "loss": 1.9872, "step": 45723 }, { "epoch": 4.690808370947886, "grad_norm": 0.04155685007572174, "learning_rate": 0.01, "loss": 1.986, "step": 45726 }, { "epoch": 4.6911161263849, "grad_norm": 0.03519884869456291, "learning_rate": 0.01, "loss": 2.0192, "step": 45729 }, { "epoch": 4.691423881821912, "grad_norm": 0.04860999807715416, "learning_rate": 0.01, "loss": 1.9879, "step": 45732 }, { "epoch": 4.691731637258925, "grad_norm": 0.05452680215239525, "learning_rate": 0.01, "loss": 1.987, "step": 45735 }, { "epoch": 4.692039392695937, "grad_norm": 0.08102243393659592, "learning_rate": 0.01, "loss": 2.0163, "step": 45738 }, { "epoch": 4.69234714813295, "grad_norm": 0.04494756832718849, "learning_rate": 0.01, "loss": 1.9904, "step": 45741 }, { "epoch": 4.692654903569963, "grad_norm": 0.05493905395269394, "learning_rate": 0.01, "loss": 1.9823, "step": 45744 }, { "epoch": 4.692962659006976, "grad_norm": 0.037139080464839935, "learning_rate": 0.01, "loss": 1.9843, "step": 45747 }, { "epoch": 4.6932704144439885, "grad_norm": 0.04770129173994064, "learning_rate": 0.01, "loss": 1.9545, "step": 45750 }, { "epoch": 4.693578169881001, "grad_norm": 0.1386919915676117, "learning_rate": 0.01, "loss": 1.9886, "step": 45753 }, { "epoch": 4.693885925318014, "grad_norm": 0.038857076317071915, "learning_rate": 0.01, "loss": 1.9791, "step": 45756 }, { "epoch": 4.694193680755027, "grad_norm": 0.08089780062437057, "learning_rate": 0.01, "loss": 1.9789, "step": 45759 }, { "epoch": 4.69450143619204, "grad_norm": 0.05583566427230835, "learning_rate": 0.01, "loss": 1.9852, "step": 45762 }, { "epoch": 4.694809191629052, "grad_norm": 0.06422879546880722, "learning_rate": 0.01, "loss": 2.001, "step": 45765 }, { "epoch": 4.695116947066065, "grad_norm": 0.037838250398635864, "learning_rate": 0.01, "loss": 1.9796, "step": 45768 }, { "epoch": 4.695424702503077, "grad_norm": 0.04485329985618591, "learning_rate": 0.01, "loss": 2.0003, "step": 45771 }, { "epoch": 4.695732457940091, "grad_norm": 0.13175508379936218, "learning_rate": 0.01, "loss": 1.9796, "step": 45774 }, { "epoch": 4.696040213377103, "grad_norm": 0.04778193309903145, "learning_rate": 0.01, "loss": 1.9786, "step": 45777 }, { "epoch": 4.696347968814115, "grad_norm": 0.07214421778917313, "learning_rate": 0.01, "loss": 1.9976, "step": 45780 }, { "epoch": 4.696655724251128, "grad_norm": 0.04276994615793228, "learning_rate": 0.01, "loss": 2.0162, "step": 45783 }, { "epoch": 4.696963479688141, "grad_norm": 0.049977049231529236, "learning_rate": 0.01, "loss": 2.0031, "step": 45786 }, { "epoch": 4.697271235125154, "grad_norm": 0.031859464943408966, "learning_rate": 0.01, "loss": 1.9764, "step": 45789 }, { "epoch": 4.6975789905621665, "grad_norm": 0.05537641420960426, "learning_rate": 0.01, "loss": 1.9991, "step": 45792 }, { "epoch": 4.697886745999179, "grad_norm": 0.07403066009283066, "learning_rate": 0.01, "loss": 1.9886, "step": 45795 }, { "epoch": 4.698194501436192, "grad_norm": 0.037336938083171844, "learning_rate": 0.01, "loss": 1.9824, "step": 45798 }, { "epoch": 4.698502256873205, "grad_norm": 0.04439583793282509, "learning_rate": 0.01, "loss": 2.0029, "step": 45801 }, { "epoch": 4.698810012310218, "grad_norm": 0.05648301541805267, "learning_rate": 0.01, "loss": 2.0049, "step": 45804 }, { "epoch": 4.6991177677472304, "grad_norm": 0.07558471709489822, "learning_rate": 0.01, "loss": 1.9963, "step": 45807 }, { "epoch": 4.699425523184243, "grad_norm": 0.04831695184111595, "learning_rate": 0.01, "loss": 1.981, "step": 45810 }, { "epoch": 4.699733278621256, "grad_norm": 0.05198276415467262, "learning_rate": 0.01, "loss": 1.9696, "step": 45813 }, { "epoch": 4.700041034058268, "grad_norm": 0.05408303439617157, "learning_rate": 0.01, "loss": 1.9825, "step": 45816 }, { "epoch": 4.700348789495282, "grad_norm": 0.107550710439682, "learning_rate": 0.01, "loss": 1.9777, "step": 45819 }, { "epoch": 4.7006565449322935, "grad_norm": 0.05953781679272652, "learning_rate": 0.01, "loss": 1.9972, "step": 45822 }, { "epoch": 4.700964300369306, "grad_norm": 0.09626014530658722, "learning_rate": 0.01, "loss": 1.989, "step": 45825 }, { "epoch": 4.701272055806319, "grad_norm": 0.08343878388404846, "learning_rate": 0.01, "loss": 2.0162, "step": 45828 }, { "epoch": 4.701579811243332, "grad_norm": 0.037152498960494995, "learning_rate": 0.01, "loss": 1.9595, "step": 45831 }, { "epoch": 4.701887566680345, "grad_norm": 0.10978374630212784, "learning_rate": 0.01, "loss": 1.9975, "step": 45834 }, { "epoch": 4.702195322117357, "grad_norm": 0.04754413291811943, "learning_rate": 0.01, "loss": 1.997, "step": 45837 }, { "epoch": 4.70250307755437, "grad_norm": 0.04514869302511215, "learning_rate": 0.01, "loss": 1.9843, "step": 45840 }, { "epoch": 4.702810832991383, "grad_norm": 0.08105579018592834, "learning_rate": 0.01, "loss": 1.9678, "step": 45843 }, { "epoch": 4.703118588428396, "grad_norm": 0.032461777329444885, "learning_rate": 0.01, "loss": 1.9961, "step": 45846 }, { "epoch": 4.7034263438654085, "grad_norm": 0.13264963030815125, "learning_rate": 0.01, "loss": 1.9787, "step": 45849 }, { "epoch": 4.703734099302421, "grad_norm": 0.06816978752613068, "learning_rate": 0.01, "loss": 1.9947, "step": 45852 }, { "epoch": 4.704041854739434, "grad_norm": 0.09646057337522507, "learning_rate": 0.01, "loss": 1.9838, "step": 45855 }, { "epoch": 4.704349610176447, "grad_norm": 0.06418583542108536, "learning_rate": 0.01, "loss": 2.0025, "step": 45858 }, { "epoch": 4.704657365613459, "grad_norm": 0.03923073783516884, "learning_rate": 0.01, "loss": 2.008, "step": 45861 }, { "epoch": 4.704965121050472, "grad_norm": 0.03935366868972778, "learning_rate": 0.01, "loss": 1.9926, "step": 45864 }, { "epoch": 4.705272876487484, "grad_norm": 0.0409211590886116, "learning_rate": 0.01, "loss": 2.0004, "step": 45867 }, { "epoch": 4.705580631924497, "grad_norm": 0.02996712550520897, "learning_rate": 0.01, "loss": 1.9865, "step": 45870 }, { "epoch": 4.70588838736151, "grad_norm": 0.03149070590734482, "learning_rate": 0.01, "loss": 1.9687, "step": 45873 }, { "epoch": 4.706196142798523, "grad_norm": 0.11619815230369568, "learning_rate": 0.01, "loss": 2.0, "step": 45876 }, { "epoch": 4.706503898235535, "grad_norm": 0.08959172666072845, "learning_rate": 0.01, "loss": 1.9773, "step": 45879 }, { "epoch": 4.706811653672548, "grad_norm": 0.03898904472589493, "learning_rate": 0.01, "loss": 2.006, "step": 45882 }, { "epoch": 4.707119409109561, "grad_norm": 0.05690839886665344, "learning_rate": 0.01, "loss": 1.995, "step": 45885 }, { "epoch": 4.707427164546574, "grad_norm": 0.08014564961194992, "learning_rate": 0.01, "loss": 1.9871, "step": 45888 }, { "epoch": 4.7077349199835865, "grad_norm": 0.07317539304494858, "learning_rate": 0.01, "loss": 1.9881, "step": 45891 }, { "epoch": 4.708042675420599, "grad_norm": 0.040355369448661804, "learning_rate": 0.01, "loss": 1.9926, "step": 45894 }, { "epoch": 4.708350430857612, "grad_norm": 0.04205753654241562, "learning_rate": 0.01, "loss": 2.0031, "step": 45897 }, { "epoch": 4.708658186294625, "grad_norm": 0.07082203030586243, "learning_rate": 0.01, "loss": 1.9845, "step": 45900 }, { "epoch": 4.708965941731638, "grad_norm": 0.037901826202869415, "learning_rate": 0.01, "loss": 1.9742, "step": 45903 }, { "epoch": 4.7092736971686495, "grad_norm": 0.04728815332055092, "learning_rate": 0.01, "loss": 2.0117, "step": 45906 }, { "epoch": 4.709581452605663, "grad_norm": 0.048279549926519394, "learning_rate": 0.01, "loss": 1.982, "step": 45909 }, { "epoch": 4.709889208042675, "grad_norm": 0.06365431100130081, "learning_rate": 0.01, "loss": 2.0031, "step": 45912 }, { "epoch": 4.710196963479688, "grad_norm": 0.07750722020864487, "learning_rate": 0.01, "loss": 2.0182, "step": 45915 }, { "epoch": 4.710504718916701, "grad_norm": 0.07337852567434311, "learning_rate": 0.01, "loss": 1.9747, "step": 45918 }, { "epoch": 4.710812474353713, "grad_norm": 0.055369194597005844, "learning_rate": 0.01, "loss": 1.9822, "step": 45921 }, { "epoch": 4.711120229790726, "grad_norm": 0.07653316110372543, "learning_rate": 0.01, "loss": 1.9758, "step": 45924 }, { "epoch": 4.711427985227739, "grad_norm": 0.13082928955554962, "learning_rate": 0.01, "loss": 2.0137, "step": 45927 }, { "epoch": 4.711735740664752, "grad_norm": 0.07909176498651505, "learning_rate": 0.01, "loss": 1.9699, "step": 45930 }, { "epoch": 4.7120434961017645, "grad_norm": 0.055445730686187744, "learning_rate": 0.01, "loss": 2.0064, "step": 45933 }, { "epoch": 4.712351251538777, "grad_norm": 0.08689797669649124, "learning_rate": 0.01, "loss": 1.9905, "step": 45936 }, { "epoch": 4.71265900697579, "grad_norm": 0.051975540816783905, "learning_rate": 0.01, "loss": 2.0116, "step": 45939 }, { "epoch": 4.712966762412803, "grad_norm": 0.1104854941368103, "learning_rate": 0.01, "loss": 1.9894, "step": 45942 }, { "epoch": 4.713274517849816, "grad_norm": 0.05927729606628418, "learning_rate": 0.01, "loss": 1.9928, "step": 45945 }, { "epoch": 4.713582273286828, "grad_norm": 0.051379501819610596, "learning_rate": 0.01, "loss": 1.9805, "step": 45948 }, { "epoch": 4.71389002872384, "grad_norm": 0.03545750305056572, "learning_rate": 0.01, "loss": 1.9946, "step": 45951 }, { "epoch": 4.714197784160854, "grad_norm": 0.044772762805223465, "learning_rate": 0.01, "loss": 1.9931, "step": 45954 }, { "epoch": 4.714505539597866, "grad_norm": 0.04172508046030998, "learning_rate": 0.01, "loss": 1.9998, "step": 45957 }, { "epoch": 4.714813295034879, "grad_norm": 0.04818372428417206, "learning_rate": 0.01, "loss": 1.9686, "step": 45960 }, { "epoch": 4.7151210504718915, "grad_norm": 0.10572110861539841, "learning_rate": 0.01, "loss": 1.9645, "step": 45963 }, { "epoch": 4.715428805908904, "grad_norm": 0.04968751594424248, "learning_rate": 0.01, "loss": 1.9699, "step": 45966 }, { "epoch": 4.715736561345917, "grad_norm": 0.08244633674621582, "learning_rate": 0.01, "loss": 1.9836, "step": 45969 }, { "epoch": 4.71604431678293, "grad_norm": 0.0786314308643341, "learning_rate": 0.01, "loss": 2.0048, "step": 45972 }, { "epoch": 4.716352072219943, "grad_norm": 0.05768943205475807, "learning_rate": 0.01, "loss": 1.9754, "step": 45975 }, { "epoch": 4.716659827656955, "grad_norm": 0.04455199092626572, "learning_rate": 0.01, "loss": 1.993, "step": 45978 }, { "epoch": 4.716967583093968, "grad_norm": 0.05535956844687462, "learning_rate": 0.01, "loss": 1.9775, "step": 45981 }, { "epoch": 4.717275338530981, "grad_norm": 0.03555374965071678, "learning_rate": 0.01, "loss": 1.9867, "step": 45984 }, { "epoch": 4.717583093967994, "grad_norm": 0.06458409130573273, "learning_rate": 0.01, "loss": 2.0006, "step": 45987 }, { "epoch": 4.7178908494050065, "grad_norm": 0.05734160915017128, "learning_rate": 0.01, "loss": 1.9711, "step": 45990 }, { "epoch": 4.718198604842019, "grad_norm": 0.06240745261311531, "learning_rate": 0.01, "loss": 2.0156, "step": 45993 }, { "epoch": 4.718506360279031, "grad_norm": 0.08987503498792648, "learning_rate": 0.01, "loss": 2.0033, "step": 45996 }, { "epoch": 4.718814115716045, "grad_norm": 0.06078198924660683, "learning_rate": 0.01, "loss": 1.9953, "step": 45999 }, { "epoch": 4.719121871153057, "grad_norm": 0.12024971842765808, "learning_rate": 0.01, "loss": 2.006, "step": 46002 }, { "epoch": 4.7194296265900695, "grad_norm": 0.06620301306247711, "learning_rate": 0.01, "loss": 1.9997, "step": 46005 }, { "epoch": 4.719737382027082, "grad_norm": 0.0450078584253788, "learning_rate": 0.01, "loss": 1.9906, "step": 46008 }, { "epoch": 4.720045137464095, "grad_norm": 0.04509684816002846, "learning_rate": 0.01, "loss": 2.0145, "step": 46011 }, { "epoch": 4.720352892901108, "grad_norm": 0.06029663607478142, "learning_rate": 0.01, "loss": 1.9678, "step": 46014 }, { "epoch": 4.720660648338121, "grad_norm": 0.06620631366968155, "learning_rate": 0.01, "loss": 2.0016, "step": 46017 }, { "epoch": 4.720968403775133, "grad_norm": 0.09920935332775116, "learning_rate": 0.01, "loss": 1.9842, "step": 46020 }, { "epoch": 4.721276159212146, "grad_norm": 0.04000372067093849, "learning_rate": 0.01, "loss": 1.9904, "step": 46023 }, { "epoch": 4.721583914649159, "grad_norm": 0.10262927412986755, "learning_rate": 0.01, "loss": 1.9864, "step": 46026 }, { "epoch": 4.721891670086172, "grad_norm": 0.07029026746749878, "learning_rate": 0.01, "loss": 2.0492, "step": 46029 }, { "epoch": 4.7221994255231845, "grad_norm": 0.03733726218342781, "learning_rate": 0.01, "loss": 1.9715, "step": 46032 }, { "epoch": 4.722507180960197, "grad_norm": 0.16273632645606995, "learning_rate": 0.01, "loss": 2.0146, "step": 46035 }, { "epoch": 4.72281493639721, "grad_norm": 0.052774641662836075, "learning_rate": 0.01, "loss": 1.9727, "step": 46038 }, { "epoch": 4.723122691834222, "grad_norm": 0.04785846173763275, "learning_rate": 0.01, "loss": 1.9824, "step": 46041 }, { "epoch": 4.723430447271236, "grad_norm": 0.046762675046920776, "learning_rate": 0.01, "loss": 2.01, "step": 46044 }, { "epoch": 4.7237382027082475, "grad_norm": 0.055082205682992935, "learning_rate": 0.01, "loss": 1.9893, "step": 46047 }, { "epoch": 4.72404595814526, "grad_norm": 0.12330979853868484, "learning_rate": 0.01, "loss": 2.006, "step": 46050 }, { "epoch": 4.724353713582273, "grad_norm": 0.0426286906003952, "learning_rate": 0.01, "loss": 2.0191, "step": 46053 }, { "epoch": 4.724661469019286, "grad_norm": 0.09888054430484772, "learning_rate": 0.01, "loss": 2.0043, "step": 46056 }, { "epoch": 4.724969224456299, "grad_norm": 0.09335997700691223, "learning_rate": 0.01, "loss": 1.9991, "step": 46059 }, { "epoch": 4.725276979893311, "grad_norm": 0.06959028542041779, "learning_rate": 0.01, "loss": 1.9583, "step": 46062 }, { "epoch": 4.725584735330324, "grad_norm": 0.06538756936788559, "learning_rate": 0.01, "loss": 1.9642, "step": 46065 }, { "epoch": 4.725892490767337, "grad_norm": 0.0652519017457962, "learning_rate": 0.01, "loss": 1.9783, "step": 46068 }, { "epoch": 4.72620024620435, "grad_norm": 0.049920666962862015, "learning_rate": 0.01, "loss": 1.9916, "step": 46071 }, { "epoch": 4.7265080016413625, "grad_norm": 0.0796656534075737, "learning_rate": 0.01, "loss": 1.9637, "step": 46074 }, { "epoch": 4.726815757078375, "grad_norm": 0.08785120397806168, "learning_rate": 0.01, "loss": 1.9645, "step": 46077 }, { "epoch": 4.727123512515388, "grad_norm": 0.05050429701805115, "learning_rate": 0.01, "loss": 2.0051, "step": 46080 }, { "epoch": 4.727431267952401, "grad_norm": 0.039991263300180435, "learning_rate": 0.01, "loss": 2.0019, "step": 46083 }, { "epoch": 4.727739023389413, "grad_norm": 0.04895581305027008, "learning_rate": 0.01, "loss": 1.9998, "step": 46086 }, { "epoch": 4.728046778826426, "grad_norm": 0.13111530244350433, "learning_rate": 0.01, "loss": 2.0091, "step": 46089 }, { "epoch": 4.728354534263438, "grad_norm": 0.07493706792593002, "learning_rate": 0.01, "loss": 1.9753, "step": 46092 }, { "epoch": 4.728662289700451, "grad_norm": 0.06003058701753616, "learning_rate": 0.01, "loss": 1.9846, "step": 46095 }, { "epoch": 4.728970045137464, "grad_norm": 0.05581682547926903, "learning_rate": 0.01, "loss": 1.986, "step": 46098 }, { "epoch": 4.729277800574477, "grad_norm": 0.0741606056690216, "learning_rate": 0.01, "loss": 1.9859, "step": 46101 }, { "epoch": 4.7295855560114894, "grad_norm": 0.05753305181860924, "learning_rate": 0.01, "loss": 1.9926, "step": 46104 }, { "epoch": 4.729893311448502, "grad_norm": 0.06790949404239655, "learning_rate": 0.01, "loss": 1.9728, "step": 46107 }, { "epoch": 4.730201066885515, "grad_norm": 0.05599404126405716, "learning_rate": 0.01, "loss": 1.9877, "step": 46110 }, { "epoch": 4.730508822322528, "grad_norm": 0.06556524336338043, "learning_rate": 0.01, "loss": 1.9785, "step": 46113 }, { "epoch": 4.730816577759541, "grad_norm": 0.043739136308431625, "learning_rate": 0.01, "loss": 1.9924, "step": 46116 }, { "epoch": 4.731124333196553, "grad_norm": 0.05598102882504463, "learning_rate": 0.01, "loss": 1.9934, "step": 46119 }, { "epoch": 4.731432088633566, "grad_norm": 0.0398273728787899, "learning_rate": 0.01, "loss": 1.9827, "step": 46122 }, { "epoch": 4.731739844070579, "grad_norm": 0.06281284987926483, "learning_rate": 0.01, "loss": 1.9762, "step": 46125 }, { "epoch": 4.732047599507592, "grad_norm": 0.0575578436255455, "learning_rate": 0.01, "loss": 1.9939, "step": 46128 }, { "epoch": 4.732355354944604, "grad_norm": 0.044283825904130936, "learning_rate": 0.01, "loss": 2.0001, "step": 46131 }, { "epoch": 4.732663110381616, "grad_norm": 0.11150834709405899, "learning_rate": 0.01, "loss": 1.9773, "step": 46134 }, { "epoch": 4.732970865818629, "grad_norm": 0.11505670100450516, "learning_rate": 0.01, "loss": 1.9894, "step": 46137 }, { "epoch": 4.733278621255642, "grad_norm": 0.10406883805990219, "learning_rate": 0.01, "loss": 1.9752, "step": 46140 }, { "epoch": 4.733586376692655, "grad_norm": 0.07568677514791489, "learning_rate": 0.01, "loss": 1.9658, "step": 46143 }, { "epoch": 4.7338941321296675, "grad_norm": 0.05774052441120148, "learning_rate": 0.01, "loss": 1.9888, "step": 46146 }, { "epoch": 4.73420188756668, "grad_norm": 0.05138164013624191, "learning_rate": 0.01, "loss": 1.9697, "step": 46149 }, { "epoch": 4.734509643003693, "grad_norm": 0.09138276427984238, "learning_rate": 0.01, "loss": 1.9881, "step": 46152 }, { "epoch": 4.734817398440706, "grad_norm": 0.04819070175290108, "learning_rate": 0.01, "loss": 1.9934, "step": 46155 }, { "epoch": 4.735125153877719, "grad_norm": 0.06231243535876274, "learning_rate": 0.01, "loss": 2.0009, "step": 46158 }, { "epoch": 4.735432909314731, "grad_norm": 0.05212223157286644, "learning_rate": 0.01, "loss": 1.9759, "step": 46161 }, { "epoch": 4.735740664751744, "grad_norm": 0.05681807920336723, "learning_rate": 0.01, "loss": 1.9808, "step": 46164 }, { "epoch": 4.736048420188757, "grad_norm": 0.039965178817510605, "learning_rate": 0.01, "loss": 1.9881, "step": 46167 }, { "epoch": 4.73635617562577, "grad_norm": 0.0815226286649704, "learning_rate": 0.01, "loss": 1.9794, "step": 46170 }, { "epoch": 4.7366639310627825, "grad_norm": 0.121616430580616, "learning_rate": 0.01, "loss": 1.9979, "step": 46173 }, { "epoch": 4.736971686499794, "grad_norm": 0.06908087432384491, "learning_rate": 0.01, "loss": 2.0142, "step": 46176 }, { "epoch": 4.737279441936807, "grad_norm": 0.043230995535850525, "learning_rate": 0.01, "loss": 1.995, "step": 46179 }, { "epoch": 4.73758719737382, "grad_norm": 0.10230391472578049, "learning_rate": 0.01, "loss": 1.967, "step": 46182 }, { "epoch": 4.737894952810833, "grad_norm": 0.06521748751401901, "learning_rate": 0.01, "loss": 1.9531, "step": 46185 }, { "epoch": 4.7382027082478455, "grad_norm": 0.052706390619277954, "learning_rate": 0.01, "loss": 1.9886, "step": 46188 }, { "epoch": 4.738510463684858, "grad_norm": 0.0332234725356102, "learning_rate": 0.01, "loss": 2.0046, "step": 46191 }, { "epoch": 4.738818219121871, "grad_norm": 0.061153993010520935, "learning_rate": 0.01, "loss": 1.9983, "step": 46194 }, { "epoch": 4.739125974558884, "grad_norm": 0.12053243815898895, "learning_rate": 0.01, "loss": 1.997, "step": 46197 }, { "epoch": 4.739433729995897, "grad_norm": 0.09798255562782288, "learning_rate": 0.01, "loss": 2.01, "step": 46200 }, { "epoch": 4.739741485432909, "grad_norm": 0.05450673773884773, "learning_rate": 0.01, "loss": 1.9899, "step": 46203 }, { "epoch": 4.740049240869922, "grad_norm": 0.03913121297955513, "learning_rate": 0.01, "loss": 1.9764, "step": 46206 }, { "epoch": 4.740356996306935, "grad_norm": 0.036051902920007706, "learning_rate": 0.01, "loss": 1.9876, "step": 46209 }, { "epoch": 4.740664751743948, "grad_norm": 0.04099172353744507, "learning_rate": 0.01, "loss": 1.9706, "step": 46212 }, { "epoch": 4.7409725071809605, "grad_norm": 0.03864535689353943, "learning_rate": 0.01, "loss": 1.981, "step": 46215 }, { "epoch": 4.741280262617973, "grad_norm": 0.055634405463933945, "learning_rate": 0.01, "loss": 2.0069, "step": 46218 }, { "epoch": 4.741588018054985, "grad_norm": 0.06536100059747696, "learning_rate": 0.01, "loss": 2.0133, "step": 46221 }, { "epoch": 4.741895773491998, "grad_norm": 0.05309274420142174, "learning_rate": 0.01, "loss": 1.9951, "step": 46224 }, { "epoch": 4.742203528929011, "grad_norm": 0.09953348338603973, "learning_rate": 0.01, "loss": 2.0127, "step": 46227 }, { "epoch": 4.7425112843660235, "grad_norm": 0.04084203764796257, "learning_rate": 0.01, "loss": 1.982, "step": 46230 }, { "epoch": 4.742819039803036, "grad_norm": 0.12028990685939789, "learning_rate": 0.01, "loss": 2.0051, "step": 46233 }, { "epoch": 4.743126795240049, "grad_norm": 0.04335862770676613, "learning_rate": 0.01, "loss": 1.9552, "step": 46236 }, { "epoch": 4.743434550677062, "grad_norm": 0.06697961688041687, "learning_rate": 0.01, "loss": 1.9944, "step": 46239 }, { "epoch": 4.743742306114075, "grad_norm": 0.047941289842128754, "learning_rate": 0.01, "loss": 1.9999, "step": 46242 }, { "epoch": 4.744050061551087, "grad_norm": 0.03778136894106865, "learning_rate": 0.01, "loss": 2.0118, "step": 46245 }, { "epoch": 4.7443578169881, "grad_norm": 0.04735583811998367, "learning_rate": 0.01, "loss": 1.9793, "step": 46248 }, { "epoch": 4.744665572425113, "grad_norm": 0.04471844062209129, "learning_rate": 0.01, "loss": 1.9721, "step": 46251 }, { "epoch": 4.744973327862126, "grad_norm": 0.03621865063905716, "learning_rate": 0.01, "loss": 1.9704, "step": 46254 }, { "epoch": 4.745281083299139, "grad_norm": 0.12407182902097702, "learning_rate": 0.01, "loss": 1.9829, "step": 46257 }, { "epoch": 4.745588838736151, "grad_norm": 0.05654411017894745, "learning_rate": 0.01, "loss": 2.0199, "step": 46260 }, { "epoch": 4.745896594173164, "grad_norm": 0.08955933898687363, "learning_rate": 0.01, "loss": 2.0169, "step": 46263 }, { "epoch": 4.746204349610176, "grad_norm": 0.08366284519433975, "learning_rate": 0.01, "loss": 1.9695, "step": 46266 }, { "epoch": 4.746512105047189, "grad_norm": 0.056272462010383606, "learning_rate": 0.01, "loss": 1.9831, "step": 46269 }, { "epoch": 4.746819860484202, "grad_norm": 0.05544830113649368, "learning_rate": 0.01, "loss": 1.9909, "step": 46272 }, { "epoch": 4.747127615921214, "grad_norm": 0.04018571227788925, "learning_rate": 0.01, "loss": 1.9737, "step": 46275 }, { "epoch": 4.747435371358227, "grad_norm": 0.07131030410528183, "learning_rate": 0.01, "loss": 2.0068, "step": 46278 }, { "epoch": 4.74774312679524, "grad_norm": 0.03746544569730759, "learning_rate": 0.01, "loss": 1.9749, "step": 46281 }, { "epoch": 4.748050882232253, "grad_norm": 0.09419632703065872, "learning_rate": 0.01, "loss": 1.9981, "step": 46284 }, { "epoch": 4.7483586376692655, "grad_norm": 0.05771811679005623, "learning_rate": 0.01, "loss": 2.0154, "step": 46287 }, { "epoch": 4.748666393106278, "grad_norm": 0.0985054075717926, "learning_rate": 0.01, "loss": 2.0356, "step": 46290 }, { "epoch": 4.748974148543291, "grad_norm": 0.06566286832094193, "learning_rate": 0.01, "loss": 2.0037, "step": 46293 }, { "epoch": 4.749281903980304, "grad_norm": 0.043330125510692596, "learning_rate": 0.01, "loss": 1.9883, "step": 46296 }, { "epoch": 4.749589659417317, "grad_norm": 0.05344987288117409, "learning_rate": 0.01, "loss": 1.9871, "step": 46299 }, { "epoch": 4.749897414854329, "grad_norm": 0.032919514924287796, "learning_rate": 0.01, "loss": 1.9709, "step": 46302 }, { "epoch": 4.750205170291342, "grad_norm": 0.08051367104053497, "learning_rate": 0.01, "loss": 1.9671, "step": 46305 }, { "epoch": 4.750512925728355, "grad_norm": 0.09481201320886612, "learning_rate": 0.01, "loss": 1.978, "step": 46308 }, { "epoch": 4.750820681165367, "grad_norm": 0.1356784701347351, "learning_rate": 0.01, "loss": 1.9896, "step": 46311 }, { "epoch": 4.75112843660238, "grad_norm": 0.12291105091571808, "learning_rate": 0.01, "loss": 2.0064, "step": 46314 }, { "epoch": 4.751436192039392, "grad_norm": 0.050581369549036026, "learning_rate": 0.01, "loss": 1.9901, "step": 46317 }, { "epoch": 4.751743947476405, "grad_norm": 0.08423294126987457, "learning_rate": 0.01, "loss": 1.9972, "step": 46320 }, { "epoch": 4.752051702913418, "grad_norm": 0.0642426609992981, "learning_rate": 0.01, "loss": 2.0018, "step": 46323 }, { "epoch": 4.752359458350431, "grad_norm": 0.042584337294101715, "learning_rate": 0.01, "loss": 2.0238, "step": 46326 }, { "epoch": 4.7526672137874435, "grad_norm": 0.04110237583518028, "learning_rate": 0.01, "loss": 2.0071, "step": 46329 }, { "epoch": 4.752974969224456, "grad_norm": 0.034602370113134384, "learning_rate": 0.01, "loss": 2.0079, "step": 46332 }, { "epoch": 4.753282724661469, "grad_norm": 0.05274072289466858, "learning_rate": 0.01, "loss": 1.9606, "step": 46335 }, { "epoch": 4.753590480098482, "grad_norm": 0.09157788008451462, "learning_rate": 0.01, "loss": 1.9881, "step": 46338 }, { "epoch": 4.753898235535495, "grad_norm": 0.04491696506738663, "learning_rate": 0.01, "loss": 1.9808, "step": 46341 }, { "epoch": 4.754205990972507, "grad_norm": 0.044836509972810745, "learning_rate": 0.01, "loss": 2.0027, "step": 46344 }, { "epoch": 4.75451374640952, "grad_norm": 0.08086413145065308, "learning_rate": 0.01, "loss": 2.0052, "step": 46347 }, { "epoch": 4.754821501846533, "grad_norm": 0.04024931788444519, "learning_rate": 0.01, "loss": 1.9634, "step": 46350 }, { "epoch": 4.755129257283546, "grad_norm": 0.053472552448511124, "learning_rate": 0.01, "loss": 1.9677, "step": 46353 }, { "epoch": 4.755437012720558, "grad_norm": 0.06579295545816422, "learning_rate": 0.01, "loss": 1.9847, "step": 46356 }, { "epoch": 4.75574476815757, "grad_norm": 0.07256436347961426, "learning_rate": 0.01, "loss": 1.9771, "step": 46359 }, { "epoch": 4.756052523594583, "grad_norm": 0.1356937736272812, "learning_rate": 0.01, "loss": 1.9961, "step": 46362 }, { "epoch": 4.756360279031596, "grad_norm": 0.06238779425621033, "learning_rate": 0.01, "loss": 1.995, "step": 46365 }, { "epoch": 4.756668034468609, "grad_norm": 0.03617888316512108, "learning_rate": 0.01, "loss": 2.0118, "step": 46368 }, { "epoch": 4.7569757899056215, "grad_norm": 0.039169710129499435, "learning_rate": 0.01, "loss": 2.0018, "step": 46371 }, { "epoch": 4.757283545342634, "grad_norm": 0.03998740762472153, "learning_rate": 0.01, "loss": 1.989, "step": 46374 }, { "epoch": 4.757591300779647, "grad_norm": 0.09922688454389572, "learning_rate": 0.01, "loss": 2.0139, "step": 46377 }, { "epoch": 4.75789905621666, "grad_norm": 0.07870779931545258, "learning_rate": 0.01, "loss": 1.9868, "step": 46380 }, { "epoch": 4.758206811653673, "grad_norm": 0.09917730838060379, "learning_rate": 0.01, "loss": 2.0119, "step": 46383 }, { "epoch": 4.758514567090685, "grad_norm": 0.14981189370155334, "learning_rate": 0.01, "loss": 1.9983, "step": 46386 }, { "epoch": 4.758822322527698, "grad_norm": 0.08356380462646484, "learning_rate": 0.01, "loss": 1.984, "step": 46389 }, { "epoch": 4.759130077964711, "grad_norm": 0.05980211868882179, "learning_rate": 0.01, "loss": 1.975, "step": 46392 }, { "epoch": 4.759437833401724, "grad_norm": 0.05891801416873932, "learning_rate": 0.01, "loss": 1.9677, "step": 46395 }, { "epoch": 4.7597455888387366, "grad_norm": 0.08687376976013184, "learning_rate": 0.01, "loss": 2.0062, "step": 46398 }, { "epoch": 4.760053344275748, "grad_norm": 0.03482063114643097, "learning_rate": 0.01, "loss": 1.9766, "step": 46401 }, { "epoch": 4.760361099712761, "grad_norm": 0.04206204041838646, "learning_rate": 0.01, "loss": 1.9951, "step": 46404 }, { "epoch": 4.760668855149774, "grad_norm": 0.051311641931533813, "learning_rate": 0.01, "loss": 1.9887, "step": 46407 }, { "epoch": 4.760976610586787, "grad_norm": 0.06586962938308716, "learning_rate": 0.01, "loss": 2.0033, "step": 46410 }, { "epoch": 4.7612843660238, "grad_norm": 0.058405712246894836, "learning_rate": 0.01, "loss": 1.9814, "step": 46413 }, { "epoch": 4.761592121460812, "grad_norm": 0.060229748487472534, "learning_rate": 0.01, "loss": 2.0056, "step": 46416 }, { "epoch": 4.761899876897825, "grad_norm": 0.08985790610313416, "learning_rate": 0.01, "loss": 2.0112, "step": 46419 }, { "epoch": 4.762207632334838, "grad_norm": 0.13349880278110504, "learning_rate": 0.01, "loss": 1.9881, "step": 46422 }, { "epoch": 4.762515387771851, "grad_norm": 0.05167052149772644, "learning_rate": 0.01, "loss": 1.9791, "step": 46425 }, { "epoch": 4.7628231432088635, "grad_norm": 0.03675553575158119, "learning_rate": 0.01, "loss": 2.0275, "step": 46428 }, { "epoch": 4.763130898645876, "grad_norm": 0.06192685663700104, "learning_rate": 0.01, "loss": 2.0186, "step": 46431 }, { "epoch": 4.763438654082889, "grad_norm": 0.06537027657032013, "learning_rate": 0.01, "loss": 1.9669, "step": 46434 }, { "epoch": 4.763746409519902, "grad_norm": 0.07077626138925552, "learning_rate": 0.01, "loss": 1.9927, "step": 46437 }, { "epoch": 4.764054164956915, "grad_norm": 0.0640728622674942, "learning_rate": 0.01, "loss": 1.9934, "step": 46440 }, { "epoch": 4.764361920393927, "grad_norm": 0.04746294394135475, "learning_rate": 0.01, "loss": 1.9545, "step": 46443 }, { "epoch": 4.764669675830939, "grad_norm": 0.04115734621882439, "learning_rate": 0.01, "loss": 1.9552, "step": 46446 }, { "epoch": 4.764977431267952, "grad_norm": 0.11647975444793701, "learning_rate": 0.01, "loss": 1.9621, "step": 46449 }, { "epoch": 4.765285186704965, "grad_norm": 0.0611908994615078, "learning_rate": 0.01, "loss": 1.9977, "step": 46452 }, { "epoch": 4.765592942141978, "grad_norm": 0.11524607241153717, "learning_rate": 0.01, "loss": 2.0042, "step": 46455 }, { "epoch": 4.76590069757899, "grad_norm": 0.10703855752944946, "learning_rate": 0.01, "loss": 1.9887, "step": 46458 }, { "epoch": 4.766208453016003, "grad_norm": 0.052681829780340195, "learning_rate": 0.01, "loss": 1.9965, "step": 46461 }, { "epoch": 4.766516208453016, "grad_norm": 0.04313669726252556, "learning_rate": 0.01, "loss": 1.9921, "step": 46464 }, { "epoch": 4.766823963890029, "grad_norm": 0.06806657463312149, "learning_rate": 0.01, "loss": 1.988, "step": 46467 }, { "epoch": 4.7671317193270415, "grad_norm": 0.07249857485294342, "learning_rate": 0.01, "loss": 1.9802, "step": 46470 }, { "epoch": 4.767439474764054, "grad_norm": 0.07816947996616364, "learning_rate": 0.01, "loss": 1.9951, "step": 46473 }, { "epoch": 4.767747230201067, "grad_norm": 0.05120289698243141, "learning_rate": 0.01, "loss": 1.9786, "step": 46476 }, { "epoch": 4.76805498563808, "grad_norm": 0.09938623756170273, "learning_rate": 0.01, "loss": 2.0115, "step": 46479 }, { "epoch": 4.768362741075093, "grad_norm": 0.060353558510541916, "learning_rate": 0.01, "loss": 1.989, "step": 46482 }, { "epoch": 4.768670496512105, "grad_norm": 0.10951244086027145, "learning_rate": 0.01, "loss": 2.0108, "step": 46485 }, { "epoch": 4.768978251949118, "grad_norm": 0.04746172949671745, "learning_rate": 0.01, "loss": 2.0104, "step": 46488 }, { "epoch": 4.76928600738613, "grad_norm": 0.07137439399957657, "learning_rate": 0.01, "loss": 1.9842, "step": 46491 }, { "epoch": 4.769593762823143, "grad_norm": 0.13574503362178802, "learning_rate": 0.01, "loss": 1.9792, "step": 46494 }, { "epoch": 4.769901518260156, "grad_norm": 0.047539595514535904, "learning_rate": 0.01, "loss": 1.97, "step": 46497 }, { "epoch": 4.770209273697168, "grad_norm": 0.0612449012696743, "learning_rate": 0.01, "loss": 1.9959, "step": 46500 }, { "epoch": 4.770517029134181, "grad_norm": 0.05257963016629219, "learning_rate": 0.01, "loss": 1.9983, "step": 46503 }, { "epoch": 4.770824784571194, "grad_norm": 0.05487014353275299, "learning_rate": 0.01, "loss": 1.9881, "step": 46506 }, { "epoch": 4.771132540008207, "grad_norm": 0.03313500061631203, "learning_rate": 0.01, "loss": 1.9812, "step": 46509 }, { "epoch": 4.7714402954452195, "grad_norm": 0.04244723170995712, "learning_rate": 0.01, "loss": 1.9694, "step": 46512 }, { "epoch": 4.771748050882232, "grad_norm": 0.06064971908926964, "learning_rate": 0.01, "loss": 1.999, "step": 46515 }, { "epoch": 4.772055806319245, "grad_norm": 0.04784930869936943, "learning_rate": 0.01, "loss": 1.9868, "step": 46518 }, { "epoch": 4.772363561756258, "grad_norm": 0.04266538843512535, "learning_rate": 0.01, "loss": 1.9772, "step": 46521 }, { "epoch": 4.772671317193271, "grad_norm": 0.09486623853445053, "learning_rate": 0.01, "loss": 1.9836, "step": 46524 }, { "epoch": 4.772979072630283, "grad_norm": 0.05113883316516876, "learning_rate": 0.01, "loss": 1.9841, "step": 46527 }, { "epoch": 4.773286828067296, "grad_norm": 0.11440654844045639, "learning_rate": 0.01, "loss": 2.0118, "step": 46530 }, { "epoch": 4.773594583504309, "grad_norm": 0.07778608053922653, "learning_rate": 0.01, "loss": 1.9965, "step": 46533 }, { "epoch": 4.773902338941321, "grad_norm": 0.042305488139390945, "learning_rate": 0.01, "loss": 1.9751, "step": 46536 }, { "epoch": 4.774210094378334, "grad_norm": 0.06056047976016998, "learning_rate": 0.01, "loss": 1.9808, "step": 46539 }, { "epoch": 4.774517849815346, "grad_norm": 0.03576405718922615, "learning_rate": 0.01, "loss": 1.9825, "step": 46542 }, { "epoch": 4.774825605252359, "grad_norm": 0.04558248445391655, "learning_rate": 0.01, "loss": 1.9967, "step": 46545 }, { "epoch": 4.775133360689372, "grad_norm": 0.04870672523975372, "learning_rate": 0.01, "loss": 1.9775, "step": 46548 }, { "epoch": 4.775441116126385, "grad_norm": 0.04591087996959686, "learning_rate": 0.01, "loss": 2.0104, "step": 46551 }, { "epoch": 4.775748871563398, "grad_norm": 0.08307138830423355, "learning_rate": 0.01, "loss": 1.9643, "step": 46554 }, { "epoch": 4.77605662700041, "grad_norm": 0.0701267346739769, "learning_rate": 0.01, "loss": 2.0051, "step": 46557 }, { "epoch": 4.776364382437423, "grad_norm": 0.13456737995147705, "learning_rate": 0.01, "loss": 1.9958, "step": 46560 }, { "epoch": 4.776672137874436, "grad_norm": 0.06605277210474014, "learning_rate": 0.01, "loss": 1.984, "step": 46563 }, { "epoch": 4.776979893311449, "grad_norm": 0.05360471457242966, "learning_rate": 0.01, "loss": 2.0167, "step": 46566 }, { "epoch": 4.7772876487484615, "grad_norm": 0.04942501336336136, "learning_rate": 0.01, "loss": 1.9641, "step": 46569 }, { "epoch": 4.777595404185474, "grad_norm": 0.09876788407564163, "learning_rate": 0.01, "loss": 2.0284, "step": 46572 }, { "epoch": 4.777903159622487, "grad_norm": 0.07075868546962738, "learning_rate": 0.01, "loss": 1.9783, "step": 46575 }, { "epoch": 4.7782109150595, "grad_norm": 0.06624365597963333, "learning_rate": 0.01, "loss": 1.976, "step": 46578 }, { "epoch": 4.778518670496512, "grad_norm": 0.04903232306241989, "learning_rate": 0.01, "loss": 1.9583, "step": 46581 }, { "epoch": 4.7788264259335245, "grad_norm": 0.06817879527807236, "learning_rate": 0.01, "loss": 1.9827, "step": 46584 }, { "epoch": 4.779134181370537, "grad_norm": 0.10077866911888123, "learning_rate": 0.01, "loss": 2.0064, "step": 46587 }, { "epoch": 4.77944193680755, "grad_norm": 0.05322563648223877, "learning_rate": 0.01, "loss": 1.9933, "step": 46590 }, { "epoch": 4.779749692244563, "grad_norm": 0.09559939801692963, "learning_rate": 0.01, "loss": 1.9541, "step": 46593 }, { "epoch": 4.780057447681576, "grad_norm": 0.06942281872034073, "learning_rate": 0.01, "loss": 2.0074, "step": 46596 }, { "epoch": 4.780365203118588, "grad_norm": 0.1250401884317398, "learning_rate": 0.01, "loss": 1.9913, "step": 46599 }, { "epoch": 4.780672958555601, "grad_norm": 0.09563729166984558, "learning_rate": 0.01, "loss": 1.9964, "step": 46602 }, { "epoch": 4.780980713992614, "grad_norm": 0.08675848692655563, "learning_rate": 0.01, "loss": 2.0147, "step": 46605 }, { "epoch": 4.781288469429627, "grad_norm": 0.0791327953338623, "learning_rate": 0.01, "loss": 1.9661, "step": 46608 }, { "epoch": 4.7815962248666395, "grad_norm": 0.10748487710952759, "learning_rate": 0.01, "loss": 2.0146, "step": 46611 }, { "epoch": 4.781903980303652, "grad_norm": 0.056079965084791183, "learning_rate": 0.01, "loss": 1.9769, "step": 46614 }, { "epoch": 4.782211735740665, "grad_norm": 0.04614640399813652, "learning_rate": 0.01, "loss": 2.0216, "step": 46617 }, { "epoch": 4.782519491177678, "grad_norm": 0.07398012280464172, "learning_rate": 0.01, "loss": 2.0094, "step": 46620 }, { "epoch": 4.782827246614691, "grad_norm": 0.07945587486028671, "learning_rate": 0.01, "loss": 1.9818, "step": 46623 }, { "epoch": 4.7831350020517025, "grad_norm": 0.040440626442432404, "learning_rate": 0.01, "loss": 1.9681, "step": 46626 }, { "epoch": 4.783442757488715, "grad_norm": 0.1381184458732605, "learning_rate": 0.01, "loss": 1.9851, "step": 46629 }, { "epoch": 4.783750512925728, "grad_norm": 0.06996659189462662, "learning_rate": 0.01, "loss": 2.0096, "step": 46632 }, { "epoch": 4.784058268362741, "grad_norm": 0.03792927786707878, "learning_rate": 0.01, "loss": 2.0041, "step": 46635 }, { "epoch": 4.784366023799754, "grad_norm": 0.03911750763654709, "learning_rate": 0.01, "loss": 2.0102, "step": 46638 }, { "epoch": 4.784673779236766, "grad_norm": 0.04588170349597931, "learning_rate": 0.01, "loss": 1.9972, "step": 46641 }, { "epoch": 4.784981534673779, "grad_norm": 0.10618551820516586, "learning_rate": 0.01, "loss": 1.9863, "step": 46644 }, { "epoch": 4.785289290110792, "grad_norm": 0.07402194291353226, "learning_rate": 0.01, "loss": 1.9799, "step": 46647 }, { "epoch": 4.785597045547805, "grad_norm": 0.05611973628401756, "learning_rate": 0.01, "loss": 1.9618, "step": 46650 }, { "epoch": 4.7859048009848175, "grad_norm": 0.062334805727005005, "learning_rate": 0.01, "loss": 1.9858, "step": 46653 }, { "epoch": 4.78621255642183, "grad_norm": 0.05972002074122429, "learning_rate": 0.01, "loss": 1.9957, "step": 46656 }, { "epoch": 4.786520311858843, "grad_norm": 0.10354367643594742, "learning_rate": 0.01, "loss": 1.9983, "step": 46659 }, { "epoch": 4.786828067295856, "grad_norm": 0.07653992623090744, "learning_rate": 0.01, "loss": 1.982, "step": 46662 }, { "epoch": 4.787135822732869, "grad_norm": 0.0790034830570221, "learning_rate": 0.01, "loss": 2.0122, "step": 46665 }, { "epoch": 4.787443578169881, "grad_norm": 0.11524873226881027, "learning_rate": 0.01, "loss": 1.9924, "step": 46668 }, { "epoch": 4.787751333606893, "grad_norm": 0.11828222125768661, "learning_rate": 0.01, "loss": 1.987, "step": 46671 }, { "epoch": 4.788059089043906, "grad_norm": 0.06129033491015434, "learning_rate": 0.01, "loss": 1.9941, "step": 46674 }, { "epoch": 4.788366844480919, "grad_norm": 0.060850802809000015, "learning_rate": 0.01, "loss": 1.9997, "step": 46677 }, { "epoch": 4.788674599917932, "grad_norm": 0.0488838255405426, "learning_rate": 0.01, "loss": 1.9862, "step": 46680 }, { "epoch": 4.788982355354944, "grad_norm": 0.03288913145661354, "learning_rate": 0.01, "loss": 2.0044, "step": 46683 }, { "epoch": 4.789290110791957, "grad_norm": 0.038961250334978104, "learning_rate": 0.01, "loss": 2.0041, "step": 46686 }, { "epoch": 4.78959786622897, "grad_norm": 0.10202857851982117, "learning_rate": 0.01, "loss": 2.019, "step": 46689 }, { "epoch": 4.789905621665983, "grad_norm": 0.05087895691394806, "learning_rate": 0.01, "loss": 1.9973, "step": 46692 }, { "epoch": 4.7902133771029956, "grad_norm": 0.07954477518796921, "learning_rate": 0.01, "loss": 1.9857, "step": 46695 }, { "epoch": 4.790521132540008, "grad_norm": 0.11283519119024277, "learning_rate": 0.01, "loss": 2.0066, "step": 46698 }, { "epoch": 4.790828887977021, "grad_norm": 0.04189791902899742, "learning_rate": 0.01, "loss": 1.9999, "step": 46701 }, { "epoch": 4.791136643414034, "grad_norm": 0.09559596329927444, "learning_rate": 0.01, "loss": 1.9667, "step": 46704 }, { "epoch": 4.791444398851047, "grad_norm": 0.07935875654220581, "learning_rate": 0.01, "loss": 1.9709, "step": 46707 }, { "epoch": 4.7917521542880595, "grad_norm": 0.09595377743244171, "learning_rate": 0.01, "loss": 2.0147, "step": 46710 }, { "epoch": 4.792059909725072, "grad_norm": 0.06680227816104889, "learning_rate": 0.01, "loss": 1.9969, "step": 46713 }, { "epoch": 4.792367665162084, "grad_norm": 0.07122410833835602, "learning_rate": 0.01, "loss": 2.0114, "step": 46716 }, { "epoch": 4.792675420599097, "grad_norm": 0.0937454104423523, "learning_rate": 0.01, "loss": 1.9891, "step": 46719 }, { "epoch": 4.79298317603611, "grad_norm": 0.08886270225048065, "learning_rate": 0.01, "loss": 1.9803, "step": 46722 }, { "epoch": 4.7932909314731225, "grad_norm": 0.04537273570895195, "learning_rate": 0.01, "loss": 2.0151, "step": 46725 }, { "epoch": 4.793598686910135, "grad_norm": 0.10327073931694031, "learning_rate": 0.01, "loss": 2.0013, "step": 46728 }, { "epoch": 4.793906442347148, "grad_norm": 0.08701428025960922, "learning_rate": 0.01, "loss": 1.9633, "step": 46731 }, { "epoch": 4.794214197784161, "grad_norm": 0.06731964647769928, "learning_rate": 0.01, "loss": 1.9849, "step": 46734 }, { "epoch": 4.794521953221174, "grad_norm": 0.15847548842430115, "learning_rate": 0.01, "loss": 2.0006, "step": 46737 }, { "epoch": 4.794829708658186, "grad_norm": 0.04891221970319748, "learning_rate": 0.01, "loss": 1.9916, "step": 46740 }, { "epoch": 4.795137464095199, "grad_norm": 0.040448226034641266, "learning_rate": 0.01, "loss": 1.9777, "step": 46743 }, { "epoch": 4.795445219532212, "grad_norm": 0.04388444870710373, "learning_rate": 0.01, "loss": 2.0007, "step": 46746 }, { "epoch": 4.795752974969225, "grad_norm": 0.05044842138886452, "learning_rate": 0.01, "loss": 1.9842, "step": 46749 }, { "epoch": 4.7960607304062375, "grad_norm": 0.17745061218738556, "learning_rate": 0.01, "loss": 1.9986, "step": 46752 }, { "epoch": 4.79636848584325, "grad_norm": 0.07918280363082886, "learning_rate": 0.01, "loss": 1.9807, "step": 46755 }, { "epoch": 4.796676241280263, "grad_norm": 0.0841284915804863, "learning_rate": 0.01, "loss": 2.0123, "step": 46758 }, { "epoch": 4.796983996717275, "grad_norm": 0.07425282150506973, "learning_rate": 0.01, "loss": 1.9821, "step": 46761 }, { "epoch": 4.797291752154288, "grad_norm": 0.11900149285793304, "learning_rate": 0.01, "loss": 2.0072, "step": 46764 }, { "epoch": 4.7975995075913005, "grad_norm": 0.054541297256946564, "learning_rate": 0.01, "loss": 1.9947, "step": 46767 }, { "epoch": 4.797907263028313, "grad_norm": 0.05434846505522728, "learning_rate": 0.01, "loss": 1.9964, "step": 46770 }, { "epoch": 4.798215018465326, "grad_norm": 0.045795127749443054, "learning_rate": 0.01, "loss": 2.0005, "step": 46773 }, { "epoch": 4.798522773902339, "grad_norm": 0.044646721333265305, "learning_rate": 0.01, "loss": 1.9882, "step": 46776 }, { "epoch": 4.798830529339352, "grad_norm": 0.06140962243080139, "learning_rate": 0.01, "loss": 1.9988, "step": 46779 }, { "epoch": 4.799138284776364, "grad_norm": 0.1806284338235855, "learning_rate": 0.01, "loss": 1.9941, "step": 46782 }, { "epoch": 4.799446040213377, "grad_norm": 0.1726996749639511, "learning_rate": 0.01, "loss": 1.9823, "step": 46785 }, { "epoch": 4.79975379565039, "grad_norm": 0.093462273478508, "learning_rate": 0.01, "loss": 2.0224, "step": 46788 }, { "epoch": 4.800061551087403, "grad_norm": 0.09162432700395584, "learning_rate": 0.01, "loss": 1.9807, "step": 46791 }, { "epoch": 4.8003693065244155, "grad_norm": 0.05243920534849167, "learning_rate": 0.01, "loss": 2.0048, "step": 46794 }, { "epoch": 4.800677061961428, "grad_norm": 0.05156363546848297, "learning_rate": 0.01, "loss": 2.0012, "step": 46797 }, { "epoch": 4.800984817398441, "grad_norm": 0.03927348554134369, "learning_rate": 0.01, "loss": 1.9735, "step": 46800 }, { "epoch": 4.801292572835454, "grad_norm": 0.04184743016958237, "learning_rate": 0.01, "loss": 1.967, "step": 46803 }, { "epoch": 4.801600328272466, "grad_norm": 0.10456445813179016, "learning_rate": 0.01, "loss": 1.9963, "step": 46806 }, { "epoch": 4.8019080837094785, "grad_norm": 0.14495427906513214, "learning_rate": 0.01, "loss": 1.9995, "step": 46809 }, { "epoch": 4.802215839146491, "grad_norm": 0.06935403496026993, "learning_rate": 0.01, "loss": 1.9953, "step": 46812 }, { "epoch": 4.802523594583504, "grad_norm": 0.07718656957149506, "learning_rate": 0.01, "loss": 1.9789, "step": 46815 }, { "epoch": 4.802831350020517, "grad_norm": 0.04619847983121872, "learning_rate": 0.01, "loss": 2.0083, "step": 46818 }, { "epoch": 4.80313910545753, "grad_norm": 0.04528527334332466, "learning_rate": 0.01, "loss": 1.9811, "step": 46821 }, { "epoch": 4.803446860894542, "grad_norm": 0.0626635029911995, "learning_rate": 0.01, "loss": 2.0067, "step": 46824 }, { "epoch": 4.803754616331555, "grad_norm": 0.055482156574726105, "learning_rate": 0.01, "loss": 2.0015, "step": 46827 }, { "epoch": 4.804062371768568, "grad_norm": 0.04317907243967056, "learning_rate": 0.01, "loss": 1.9835, "step": 46830 }, { "epoch": 4.804370127205581, "grad_norm": 0.12392894178628922, "learning_rate": 0.01, "loss": 1.9905, "step": 46833 }, { "epoch": 4.8046778826425935, "grad_norm": 0.07023516297340393, "learning_rate": 0.01, "loss": 2.0094, "step": 46836 }, { "epoch": 4.804985638079606, "grad_norm": 0.10643202811479568, "learning_rate": 0.01, "loss": 2.0031, "step": 46839 }, { "epoch": 4.805293393516619, "grad_norm": 0.07375111430883408, "learning_rate": 0.01, "loss": 1.9657, "step": 46842 }, { "epoch": 4.805601148953632, "grad_norm": 0.039533890783786774, "learning_rate": 0.01, "loss": 1.9951, "step": 46845 }, { "epoch": 4.805908904390645, "grad_norm": 0.03812634199857712, "learning_rate": 0.01, "loss": 1.9887, "step": 46848 }, { "epoch": 4.806216659827657, "grad_norm": 0.049066439270973206, "learning_rate": 0.01, "loss": 1.9826, "step": 46851 }, { "epoch": 4.806524415264669, "grad_norm": 0.17093585431575775, "learning_rate": 0.01, "loss": 1.996, "step": 46854 }, { "epoch": 4.806832170701682, "grad_norm": 0.06310732662677765, "learning_rate": 0.01, "loss": 1.9727, "step": 46857 }, { "epoch": 4.807139926138695, "grad_norm": 0.04781056195497513, "learning_rate": 0.01, "loss": 1.987, "step": 46860 }, { "epoch": 4.807447681575708, "grad_norm": 0.04082764312624931, "learning_rate": 0.01, "loss": 2.0017, "step": 46863 }, { "epoch": 4.8077554370127205, "grad_norm": 0.06627890467643738, "learning_rate": 0.01, "loss": 2.0018, "step": 46866 }, { "epoch": 4.808063192449733, "grad_norm": 0.07753533869981766, "learning_rate": 0.01, "loss": 2.0041, "step": 46869 }, { "epoch": 4.808370947886746, "grad_norm": 0.05382109060883522, "learning_rate": 0.01, "loss": 2.0029, "step": 46872 }, { "epoch": 4.808678703323759, "grad_norm": 0.06153309345245361, "learning_rate": 0.01, "loss": 1.9806, "step": 46875 }, { "epoch": 4.808986458760772, "grad_norm": 0.0524064376950264, "learning_rate": 0.01, "loss": 1.975, "step": 46878 }, { "epoch": 4.809294214197784, "grad_norm": 0.04740193486213684, "learning_rate": 0.01, "loss": 1.9846, "step": 46881 }, { "epoch": 4.809601969634797, "grad_norm": 0.04750111699104309, "learning_rate": 0.01, "loss": 2.0125, "step": 46884 }, { "epoch": 4.80990972507181, "grad_norm": 0.1953573226928711, "learning_rate": 0.01, "loss": 1.9876, "step": 46887 }, { "epoch": 4.810217480508823, "grad_norm": 0.05784987658262253, "learning_rate": 0.01, "loss": 1.9961, "step": 46890 }, { "epoch": 4.8105252359458355, "grad_norm": 0.06495459377765656, "learning_rate": 0.01, "loss": 1.9802, "step": 46893 }, { "epoch": 4.810832991382847, "grad_norm": 0.04319414868950844, "learning_rate": 0.01, "loss": 1.9865, "step": 46896 }, { "epoch": 4.81114074681986, "grad_norm": 0.05057989060878754, "learning_rate": 0.01, "loss": 2.0131, "step": 46899 }, { "epoch": 4.811448502256873, "grad_norm": 0.04836462065577507, "learning_rate": 0.01, "loss": 1.9928, "step": 46902 }, { "epoch": 4.811756257693886, "grad_norm": 0.06928513944149017, "learning_rate": 0.01, "loss": 1.9722, "step": 46905 }, { "epoch": 4.8120640131308985, "grad_norm": 0.04418247938156128, "learning_rate": 0.01, "loss": 1.9731, "step": 46908 }, { "epoch": 4.812371768567911, "grad_norm": 0.05600956827402115, "learning_rate": 0.01, "loss": 1.9597, "step": 46911 }, { "epoch": 4.812679524004924, "grad_norm": 0.18245935440063477, "learning_rate": 0.01, "loss": 1.9895, "step": 46914 }, { "epoch": 4.812987279441937, "grad_norm": 0.05147803574800491, "learning_rate": 0.01, "loss": 1.9836, "step": 46917 }, { "epoch": 4.81329503487895, "grad_norm": 0.049182165414094925, "learning_rate": 0.01, "loss": 1.9799, "step": 46920 }, { "epoch": 4.813602790315962, "grad_norm": 0.03926403820514679, "learning_rate": 0.01, "loss": 1.9839, "step": 46923 }, { "epoch": 4.813910545752975, "grad_norm": 0.04399009421467781, "learning_rate": 0.01, "loss": 1.9911, "step": 46926 }, { "epoch": 4.814218301189988, "grad_norm": 0.05575468763709068, "learning_rate": 0.01, "loss": 2.0025, "step": 46929 }, { "epoch": 4.814526056627001, "grad_norm": 0.056770700961351395, "learning_rate": 0.01, "loss": 2.0191, "step": 46932 }, { "epoch": 4.8148338120640135, "grad_norm": 0.0790528804063797, "learning_rate": 0.01, "loss": 1.9792, "step": 46935 }, { "epoch": 4.815141567501026, "grad_norm": 0.07976264506578445, "learning_rate": 0.01, "loss": 2.0267, "step": 46938 }, { "epoch": 4.815449322938038, "grad_norm": 0.058160681277513504, "learning_rate": 0.01, "loss": 1.9802, "step": 46941 }, { "epoch": 4.815757078375051, "grad_norm": 0.05894159525632858, "learning_rate": 0.01, "loss": 2.0006, "step": 46944 }, { "epoch": 4.816064833812064, "grad_norm": 0.061383627355098724, "learning_rate": 0.01, "loss": 1.9937, "step": 46947 }, { "epoch": 4.8163725892490765, "grad_norm": 0.06993051618337631, "learning_rate": 0.01, "loss": 1.9951, "step": 46950 }, { "epoch": 4.816680344686089, "grad_norm": 0.1546536386013031, "learning_rate": 0.01, "loss": 1.9949, "step": 46953 }, { "epoch": 4.816988100123102, "grad_norm": 0.11143519729375839, "learning_rate": 0.01, "loss": 1.9965, "step": 46956 }, { "epoch": 4.817295855560115, "grad_norm": 0.054721418768167496, "learning_rate": 0.01, "loss": 2.0038, "step": 46959 }, { "epoch": 4.817603610997128, "grad_norm": 0.04508695378899574, "learning_rate": 0.01, "loss": 1.9824, "step": 46962 }, { "epoch": 4.81791136643414, "grad_norm": 0.06738116592168808, "learning_rate": 0.01, "loss": 1.9702, "step": 46965 }, { "epoch": 4.818219121871153, "grad_norm": 0.05306596681475639, "learning_rate": 0.01, "loss": 1.9741, "step": 46968 }, { "epoch": 4.818526877308166, "grad_norm": 0.05283980816602707, "learning_rate": 0.01, "loss": 1.9738, "step": 46971 }, { "epoch": 4.818834632745179, "grad_norm": 0.04085366427898407, "learning_rate": 0.01, "loss": 1.9924, "step": 46974 }, { "epoch": 4.8191423881821915, "grad_norm": 0.0549020953476429, "learning_rate": 0.01, "loss": 1.9762, "step": 46977 }, { "epoch": 4.819450143619204, "grad_norm": 0.08194736391305923, "learning_rate": 0.01, "loss": 1.9834, "step": 46980 }, { "epoch": 4.819757899056217, "grad_norm": 0.07541119307279587, "learning_rate": 0.01, "loss": 2.0103, "step": 46983 }, { "epoch": 4.820065654493229, "grad_norm": 0.05164722725749016, "learning_rate": 0.01, "loss": 1.988, "step": 46986 }, { "epoch": 4.820373409930242, "grad_norm": 0.03965797275304794, "learning_rate": 0.01, "loss": 1.988, "step": 46989 }, { "epoch": 4.8206811653672546, "grad_norm": 0.05798366665840149, "learning_rate": 0.01, "loss": 2.0045, "step": 46992 }, { "epoch": 4.820988920804267, "grad_norm": 0.09747203439474106, "learning_rate": 0.01, "loss": 1.9585, "step": 46995 }, { "epoch": 4.82129667624128, "grad_norm": 0.037640564143657684, "learning_rate": 0.01, "loss": 1.9987, "step": 46998 }, { "epoch": 4.821604431678293, "grad_norm": 0.06714722514152527, "learning_rate": 0.01, "loss": 1.9763, "step": 47001 }, { "epoch": 4.821912187115306, "grad_norm": 0.06069006398320198, "learning_rate": 0.01, "loss": 1.9959, "step": 47004 }, { "epoch": 4.8222199425523185, "grad_norm": 0.04241282492876053, "learning_rate": 0.01, "loss": 1.9665, "step": 47007 }, { "epoch": 4.822527697989331, "grad_norm": 0.07319297641515732, "learning_rate": 0.01, "loss": 1.961, "step": 47010 }, { "epoch": 4.822835453426344, "grad_norm": 0.11949057132005692, "learning_rate": 0.01, "loss": 1.9639, "step": 47013 }, { "epoch": 4.823143208863357, "grad_norm": 0.10720200836658478, "learning_rate": 0.01, "loss": 1.9813, "step": 47016 }, { "epoch": 4.82345096430037, "grad_norm": 0.05312662571668625, "learning_rate": 0.01, "loss": 2.0028, "step": 47019 }, { "epoch": 4.823758719737382, "grad_norm": 0.05288184434175491, "learning_rate": 0.01, "loss": 2.0004, "step": 47022 }, { "epoch": 4.824066475174395, "grad_norm": 0.04024795815348625, "learning_rate": 0.01, "loss": 1.9603, "step": 47025 }, { "epoch": 4.824374230611408, "grad_norm": 0.03714558109641075, "learning_rate": 0.01, "loss": 1.9997, "step": 47028 }, { "epoch": 4.82468198604842, "grad_norm": 0.043919868767261505, "learning_rate": 0.01, "loss": 2.0067, "step": 47031 }, { "epoch": 4.824989741485433, "grad_norm": 0.09199999272823334, "learning_rate": 0.01, "loss": 2.0002, "step": 47034 }, { "epoch": 4.825297496922445, "grad_norm": 0.09651891142129898, "learning_rate": 0.01, "loss": 1.9986, "step": 47037 }, { "epoch": 4.825605252359458, "grad_norm": 0.11211739480495453, "learning_rate": 0.01, "loss": 1.9967, "step": 47040 }, { "epoch": 4.825913007796471, "grad_norm": 0.11191114038228989, "learning_rate": 0.01, "loss": 1.9963, "step": 47043 }, { "epoch": 4.826220763233484, "grad_norm": 0.06726837903261185, "learning_rate": 0.01, "loss": 2.001, "step": 47046 }, { "epoch": 4.8265285186704965, "grad_norm": 0.04758633300662041, "learning_rate": 0.01, "loss": 2.0015, "step": 47049 }, { "epoch": 4.826836274107509, "grad_norm": 0.07138072699308395, "learning_rate": 0.01, "loss": 1.9816, "step": 47052 }, { "epoch": 4.827144029544522, "grad_norm": 0.08691728860139847, "learning_rate": 0.01, "loss": 1.9933, "step": 47055 }, { "epoch": 4.827451784981535, "grad_norm": 0.060696471482515335, "learning_rate": 0.01, "loss": 1.9884, "step": 47058 }, { "epoch": 4.827759540418548, "grad_norm": 0.04225600138306618, "learning_rate": 0.01, "loss": 1.9765, "step": 47061 }, { "epoch": 4.82806729585556, "grad_norm": 0.03796116262674332, "learning_rate": 0.01, "loss": 1.9722, "step": 47064 }, { "epoch": 4.828375051292573, "grad_norm": 0.056367259472608566, "learning_rate": 0.01, "loss": 2.0058, "step": 47067 }, { "epoch": 4.828682806729586, "grad_norm": 0.10544770210981369, "learning_rate": 0.01, "loss": 2.0124, "step": 47070 }, { "epoch": 4.828990562166599, "grad_norm": 0.06839180737733841, "learning_rate": 0.01, "loss": 2.0131, "step": 47073 }, { "epoch": 4.829298317603611, "grad_norm": 0.08350997418165207, "learning_rate": 0.01, "loss": 2.0025, "step": 47076 }, { "epoch": 4.829606073040623, "grad_norm": 0.05540642887353897, "learning_rate": 0.01, "loss": 1.97, "step": 47079 }, { "epoch": 4.829913828477636, "grad_norm": 0.08671069890260696, "learning_rate": 0.01, "loss": 1.967, "step": 47082 }, { "epoch": 4.830221583914649, "grad_norm": 0.1410573422908783, "learning_rate": 0.01, "loss": 1.9948, "step": 47085 }, { "epoch": 4.830529339351662, "grad_norm": 0.06324710696935654, "learning_rate": 0.01, "loss": 1.9722, "step": 47088 }, { "epoch": 4.8308370947886745, "grad_norm": 0.04952717572450638, "learning_rate": 0.01, "loss": 1.972, "step": 47091 }, { "epoch": 4.831144850225687, "grad_norm": 0.058474645018577576, "learning_rate": 0.01, "loss": 1.9962, "step": 47094 }, { "epoch": 4.8314526056627, "grad_norm": 0.08904005587100983, "learning_rate": 0.01, "loss": 1.9938, "step": 47097 }, { "epoch": 4.831760361099713, "grad_norm": 0.03303196281194687, "learning_rate": 0.01, "loss": 1.9925, "step": 47100 }, { "epoch": 4.832068116536726, "grad_norm": 0.0696646049618721, "learning_rate": 0.01, "loss": 2.0134, "step": 47103 }, { "epoch": 4.832375871973738, "grad_norm": 0.09613518416881561, "learning_rate": 0.01, "loss": 2.0197, "step": 47106 }, { "epoch": 4.832683627410751, "grad_norm": 0.08955409377813339, "learning_rate": 0.01, "loss": 1.9708, "step": 47109 }, { "epoch": 4.832991382847764, "grad_norm": 0.07022068649530411, "learning_rate": 0.01, "loss": 2.0066, "step": 47112 }, { "epoch": 4.833299138284777, "grad_norm": 0.05557604506611824, "learning_rate": 0.01, "loss": 1.9883, "step": 47115 }, { "epoch": 4.8336068937217895, "grad_norm": 0.04961394891142845, "learning_rate": 0.01, "loss": 1.9734, "step": 47118 }, { "epoch": 4.833914649158801, "grad_norm": 0.07381154596805573, "learning_rate": 0.01, "loss": 1.9893, "step": 47121 }, { "epoch": 4.834222404595814, "grad_norm": 0.08874998986721039, "learning_rate": 0.01, "loss": 1.9819, "step": 47124 }, { "epoch": 4.834530160032827, "grad_norm": 0.12152114510536194, "learning_rate": 0.01, "loss": 1.9904, "step": 47127 }, { "epoch": 4.83483791546984, "grad_norm": 0.18640320003032684, "learning_rate": 0.01, "loss": 2.0007, "step": 47130 }, { "epoch": 4.8351456709068525, "grad_norm": 0.13211971521377563, "learning_rate": 0.01, "loss": 1.9598, "step": 47133 }, { "epoch": 4.835453426343865, "grad_norm": 0.0825662836432457, "learning_rate": 0.01, "loss": 2.0019, "step": 47136 }, { "epoch": 4.835761181780878, "grad_norm": 0.05962991341948509, "learning_rate": 0.01, "loss": 1.991, "step": 47139 }, { "epoch": 4.836068937217891, "grad_norm": 0.045787274837493896, "learning_rate": 0.01, "loss": 1.9837, "step": 47142 }, { "epoch": 4.836376692654904, "grad_norm": 0.05166277661919594, "learning_rate": 0.01, "loss": 1.9703, "step": 47145 }, { "epoch": 4.836684448091916, "grad_norm": 0.07144337892532349, "learning_rate": 0.01, "loss": 2.0034, "step": 47148 }, { "epoch": 4.836992203528929, "grad_norm": 0.03732317313551903, "learning_rate": 0.01, "loss": 2.0051, "step": 47151 }, { "epoch": 4.837299958965942, "grad_norm": 0.06613879650831223, "learning_rate": 0.01, "loss": 1.9917, "step": 47154 }, { "epoch": 4.837607714402955, "grad_norm": 0.05564868822693825, "learning_rate": 0.01, "loss": 2.0116, "step": 47157 }, { "epoch": 4.837915469839967, "grad_norm": 0.03259768709540367, "learning_rate": 0.01, "loss": 2.0112, "step": 47160 }, { "epoch": 4.83822322527698, "grad_norm": 0.11291679739952087, "learning_rate": 0.01, "loss": 1.9619, "step": 47163 }, { "epoch": 4.838530980713992, "grad_norm": 0.06787018477916718, "learning_rate": 0.01, "loss": 2.0003, "step": 47166 }, { "epoch": 4.838838736151005, "grad_norm": 0.10512536764144897, "learning_rate": 0.01, "loss": 1.986, "step": 47169 }, { "epoch": 4.839146491588018, "grad_norm": 0.10330035537481308, "learning_rate": 0.01, "loss": 1.9875, "step": 47172 }, { "epoch": 4.839454247025031, "grad_norm": 0.06286308169364929, "learning_rate": 0.01, "loss": 1.994, "step": 47175 }, { "epoch": 4.839762002462043, "grad_norm": 0.04793789982795715, "learning_rate": 0.01, "loss": 1.9849, "step": 47178 }, { "epoch": 4.840069757899056, "grad_norm": 0.043382007628679276, "learning_rate": 0.01, "loss": 1.9958, "step": 47181 }, { "epoch": 4.840377513336069, "grad_norm": 0.08022630959749222, "learning_rate": 0.01, "loss": 1.9936, "step": 47184 }, { "epoch": 4.840685268773082, "grad_norm": 0.09300918877124786, "learning_rate": 0.01, "loss": 1.959, "step": 47187 }, { "epoch": 4.8409930242100945, "grad_norm": 0.05550304800271988, "learning_rate": 0.01, "loss": 1.9739, "step": 47190 }, { "epoch": 4.841300779647107, "grad_norm": 0.0407077856361866, "learning_rate": 0.01, "loss": 1.9859, "step": 47193 }, { "epoch": 4.84160853508412, "grad_norm": 0.04916580393910408, "learning_rate": 0.01, "loss": 1.9728, "step": 47196 }, { "epoch": 4.841916290521133, "grad_norm": 0.090577132999897, "learning_rate": 0.01, "loss": 1.9765, "step": 47199 }, { "epoch": 4.842224045958146, "grad_norm": 0.11230761557817459, "learning_rate": 0.01, "loss": 1.975, "step": 47202 }, { "epoch": 4.8425318013951575, "grad_norm": 0.11779794096946716, "learning_rate": 0.01, "loss": 1.9952, "step": 47205 }, { "epoch": 4.842839556832171, "grad_norm": 0.04648299142718315, "learning_rate": 0.01, "loss": 2.0093, "step": 47208 }, { "epoch": 4.843147312269183, "grad_norm": 0.04901035502552986, "learning_rate": 0.01, "loss": 1.9869, "step": 47211 }, { "epoch": 4.843455067706196, "grad_norm": 0.05582377314567566, "learning_rate": 0.01, "loss": 1.9938, "step": 47214 }, { "epoch": 4.843762823143209, "grad_norm": 0.08325490355491638, "learning_rate": 0.01, "loss": 1.9974, "step": 47217 }, { "epoch": 4.844070578580221, "grad_norm": 0.09663175791501999, "learning_rate": 0.01, "loss": 1.983, "step": 47220 }, { "epoch": 4.844378334017234, "grad_norm": 0.067304328083992, "learning_rate": 0.01, "loss": 1.9912, "step": 47223 }, { "epoch": 4.844686089454247, "grad_norm": 0.09804333001375198, "learning_rate": 0.01, "loss": 2.0023, "step": 47226 }, { "epoch": 4.84499384489126, "grad_norm": 0.07931843400001526, "learning_rate": 0.01, "loss": 1.9744, "step": 47229 }, { "epoch": 4.8453016003282725, "grad_norm": 0.07244410365819931, "learning_rate": 0.01, "loss": 2.0131, "step": 47232 }, { "epoch": 4.845609355765285, "grad_norm": 0.08068135380744934, "learning_rate": 0.01, "loss": 1.9623, "step": 47235 }, { "epoch": 4.845917111202298, "grad_norm": 0.062296342104673386, "learning_rate": 0.01, "loss": 1.993, "step": 47238 }, { "epoch": 4.846224866639311, "grad_norm": 0.09988021850585938, "learning_rate": 0.01, "loss": 2.0143, "step": 47241 }, { "epoch": 4.846532622076324, "grad_norm": 0.12119688838720322, "learning_rate": 0.01, "loss": 1.986, "step": 47244 }, { "epoch": 4.846840377513336, "grad_norm": 0.06407175213098526, "learning_rate": 0.01, "loss": 2.0009, "step": 47247 }, { "epoch": 4.847148132950348, "grad_norm": 0.05600906163454056, "learning_rate": 0.01, "loss": 2.0139, "step": 47250 }, { "epoch": 4.847455888387362, "grad_norm": 0.04448190703988075, "learning_rate": 0.01, "loss": 1.9816, "step": 47253 }, { "epoch": 4.847763643824374, "grad_norm": 0.046519339084625244, "learning_rate": 0.01, "loss": 1.9892, "step": 47256 }, { "epoch": 4.848071399261387, "grad_norm": 0.07584928721189499, "learning_rate": 0.01, "loss": 1.9972, "step": 47259 }, { "epoch": 4.848379154698399, "grad_norm": 0.06511086225509644, "learning_rate": 0.01, "loss": 1.9875, "step": 47262 }, { "epoch": 4.848686910135412, "grad_norm": 0.10978051275014877, "learning_rate": 0.01, "loss": 1.9995, "step": 47265 }, { "epoch": 4.848994665572425, "grad_norm": 0.10694348812103271, "learning_rate": 0.01, "loss": 2.0037, "step": 47268 }, { "epoch": 4.849302421009438, "grad_norm": 0.11489532142877579, "learning_rate": 0.01, "loss": 1.9941, "step": 47271 }, { "epoch": 4.8496101764464505, "grad_norm": 0.061532165855169296, "learning_rate": 0.01, "loss": 2.0089, "step": 47274 }, { "epoch": 4.849917931883463, "grad_norm": 0.034115977585315704, "learning_rate": 0.01, "loss": 1.9695, "step": 47277 }, { "epoch": 4.850225687320476, "grad_norm": 0.07114948332309723, "learning_rate": 0.01, "loss": 2.0014, "step": 47280 }, { "epoch": 4.850533442757489, "grad_norm": 0.03593161702156067, "learning_rate": 0.01, "loss": 1.9972, "step": 47283 }, { "epoch": 4.850841198194502, "grad_norm": 0.09017783403396606, "learning_rate": 0.01, "loss": 1.9703, "step": 47286 }, { "epoch": 4.851148953631514, "grad_norm": 0.045291002839803696, "learning_rate": 0.01, "loss": 1.9802, "step": 47289 }, { "epoch": 4.851456709068527, "grad_norm": 0.05328730493783951, "learning_rate": 0.01, "loss": 1.9684, "step": 47292 }, { "epoch": 4.851764464505539, "grad_norm": 0.08284175395965576, "learning_rate": 0.01, "loss": 1.9943, "step": 47295 }, { "epoch": 4.852072219942553, "grad_norm": 0.04549727216362953, "learning_rate": 0.01, "loss": 1.9808, "step": 47298 }, { "epoch": 4.852379975379565, "grad_norm": 0.09103915095329285, "learning_rate": 0.01, "loss": 1.9745, "step": 47301 }, { "epoch": 4.8526877308165774, "grad_norm": 0.09698940068483353, "learning_rate": 0.01, "loss": 2.0048, "step": 47304 }, { "epoch": 4.85299548625359, "grad_norm": 0.12437165528535843, "learning_rate": 0.01, "loss": 1.9746, "step": 47307 }, { "epoch": 4.853303241690603, "grad_norm": 0.0723615437746048, "learning_rate": 0.01, "loss": 1.9948, "step": 47310 }, { "epoch": 4.853610997127616, "grad_norm": 0.06390126794576645, "learning_rate": 0.01, "loss": 1.9994, "step": 47313 }, { "epoch": 4.853918752564629, "grad_norm": 0.04488823935389519, "learning_rate": 0.01, "loss": 2.0044, "step": 47316 }, { "epoch": 4.854226508001641, "grad_norm": 0.036022037267684937, "learning_rate": 0.01, "loss": 1.9725, "step": 47319 }, { "epoch": 4.854534263438654, "grad_norm": 0.06746529787778854, "learning_rate": 0.01, "loss": 1.9735, "step": 47322 }, { "epoch": 4.854842018875667, "grad_norm": 0.06267426908016205, "learning_rate": 0.01, "loss": 1.97, "step": 47325 }, { "epoch": 4.85514977431268, "grad_norm": 0.08596473187208176, "learning_rate": 0.01, "loss": 1.9942, "step": 47328 }, { "epoch": 4.8554575297496925, "grad_norm": 0.04593675211071968, "learning_rate": 0.01, "loss": 1.9986, "step": 47331 }, { "epoch": 4.855765285186705, "grad_norm": 0.08707760274410248, "learning_rate": 0.01, "loss": 1.9881, "step": 47334 }, { "epoch": 4.856073040623718, "grad_norm": 0.05224407836794853, "learning_rate": 0.01, "loss": 1.9835, "step": 47337 }, { "epoch": 4.85638079606073, "grad_norm": 0.05910166725516319, "learning_rate": 0.01, "loss": 1.9864, "step": 47340 }, { "epoch": 4.856688551497744, "grad_norm": 0.14354678988456726, "learning_rate": 0.01, "loss": 1.986, "step": 47343 }, { "epoch": 4.8569963069347555, "grad_norm": 0.11897499859333038, "learning_rate": 0.01, "loss": 2.0184, "step": 47346 }, { "epoch": 4.857304062371768, "grad_norm": 0.06904298812150955, "learning_rate": 0.01, "loss": 1.9881, "step": 47349 }, { "epoch": 4.857611817808781, "grad_norm": 0.04855037108063698, "learning_rate": 0.01, "loss": 1.9921, "step": 47352 }, { "epoch": 4.857919573245794, "grad_norm": 0.042485300451517105, "learning_rate": 0.01, "loss": 1.9864, "step": 47355 }, { "epoch": 4.858227328682807, "grad_norm": 0.052240677177906036, "learning_rate": 0.01, "loss": 1.9711, "step": 47358 }, { "epoch": 4.858535084119819, "grad_norm": 0.09882494062185287, "learning_rate": 0.01, "loss": 1.9753, "step": 47361 }, { "epoch": 4.858842839556832, "grad_norm": 0.059417568147182465, "learning_rate": 0.01, "loss": 1.9699, "step": 47364 }, { "epoch": 4.859150594993845, "grad_norm": 0.042187973856925964, "learning_rate": 0.01, "loss": 1.9445, "step": 47367 }, { "epoch": 4.859458350430858, "grad_norm": 0.11045199632644653, "learning_rate": 0.01, "loss": 2.0084, "step": 47370 }, { "epoch": 4.8597661058678705, "grad_norm": 0.06902570277452469, "learning_rate": 0.01, "loss": 1.9933, "step": 47373 }, { "epoch": 4.860073861304883, "grad_norm": 0.1256779134273529, "learning_rate": 0.01, "loss": 2.0201, "step": 47376 }, { "epoch": 4.860381616741896, "grad_norm": 0.0714133083820343, "learning_rate": 0.01, "loss": 1.9732, "step": 47379 }, { "epoch": 4.860689372178909, "grad_norm": 0.050711020827293396, "learning_rate": 0.01, "loss": 1.9994, "step": 47382 }, { "epoch": 4.860997127615921, "grad_norm": 0.04845008999109268, "learning_rate": 0.01, "loss": 1.9875, "step": 47385 }, { "epoch": 4.861304883052934, "grad_norm": 0.04254496470093727, "learning_rate": 0.01, "loss": 2.0029, "step": 47388 }, { "epoch": 4.861612638489946, "grad_norm": 0.03248690441250801, "learning_rate": 0.01, "loss": 1.9794, "step": 47391 }, { "epoch": 4.861920393926959, "grad_norm": 0.05999647080898285, "learning_rate": 0.01, "loss": 1.9929, "step": 47394 }, { "epoch": 4.862228149363972, "grad_norm": 0.09282340854406357, "learning_rate": 0.01, "loss": 1.9877, "step": 47397 }, { "epoch": 4.862535904800985, "grad_norm": 0.04029770568013191, "learning_rate": 0.01, "loss": 1.9877, "step": 47400 }, { "epoch": 4.862843660237997, "grad_norm": 0.030764177441596985, "learning_rate": 0.01, "loss": 1.9783, "step": 47403 }, { "epoch": 4.86315141567501, "grad_norm": 0.04076581448316574, "learning_rate": 0.01, "loss": 1.9974, "step": 47406 }, { "epoch": 4.863459171112023, "grad_norm": 0.0759870857000351, "learning_rate": 0.01, "loss": 1.9605, "step": 47409 }, { "epoch": 4.863766926549036, "grad_norm": 0.041376739740371704, "learning_rate": 0.01, "loss": 2.0275, "step": 47412 }, { "epoch": 4.8640746819860485, "grad_norm": 0.13923729956150055, "learning_rate": 0.01, "loss": 1.9829, "step": 47415 }, { "epoch": 4.864382437423061, "grad_norm": 0.06702505052089691, "learning_rate": 0.01, "loss": 2.002, "step": 47418 }, { "epoch": 4.864690192860074, "grad_norm": 0.04206367954611778, "learning_rate": 0.01, "loss": 2.003, "step": 47421 }, { "epoch": 4.864997948297087, "grad_norm": 0.043106552213430405, "learning_rate": 0.01, "loss": 1.9679, "step": 47424 }, { "epoch": 4.8653057037341, "grad_norm": 0.04751205816864967, "learning_rate": 0.01, "loss": 1.9687, "step": 47427 }, { "epoch": 4.8656134591711115, "grad_norm": 0.11286492645740509, "learning_rate": 0.01, "loss": 1.9888, "step": 47430 }, { "epoch": 4.865921214608125, "grad_norm": 0.04195297136902809, "learning_rate": 0.01, "loss": 2.0008, "step": 47433 }, { "epoch": 4.866228970045137, "grad_norm": 0.03481072559952736, "learning_rate": 0.01, "loss": 1.982, "step": 47436 }, { "epoch": 4.86653672548215, "grad_norm": 0.0647101029753685, "learning_rate": 0.01, "loss": 1.9958, "step": 47439 }, { "epoch": 4.866844480919163, "grad_norm": 0.09517384320497513, "learning_rate": 0.01, "loss": 1.9902, "step": 47442 }, { "epoch": 4.867152236356175, "grad_norm": 0.06879560649394989, "learning_rate": 0.01, "loss": 1.9897, "step": 47445 }, { "epoch": 4.867459991793188, "grad_norm": 0.08635231107473373, "learning_rate": 0.01, "loss": 1.9884, "step": 47448 }, { "epoch": 4.867767747230201, "grad_norm": 0.04220673069357872, "learning_rate": 0.01, "loss": 1.9774, "step": 47451 }, { "epoch": 4.868075502667214, "grad_norm": 0.07045330852270126, "learning_rate": 0.01, "loss": 1.9817, "step": 47454 }, { "epoch": 4.868383258104227, "grad_norm": 0.04260876774787903, "learning_rate": 0.01, "loss": 1.9859, "step": 47457 }, { "epoch": 4.868691013541239, "grad_norm": 0.040992897003889084, "learning_rate": 0.01, "loss": 1.9635, "step": 47460 }, { "epoch": 4.868998768978252, "grad_norm": 0.04472249746322632, "learning_rate": 0.01, "loss": 2.0002, "step": 47463 }, { "epoch": 4.869306524415265, "grad_norm": 0.03990897163748741, "learning_rate": 0.01, "loss": 1.9875, "step": 47466 }, { "epoch": 4.869614279852278, "grad_norm": 0.10024430602788925, "learning_rate": 0.01, "loss": 1.9922, "step": 47469 }, { "epoch": 4.8699220352892905, "grad_norm": 0.08980470895767212, "learning_rate": 0.01, "loss": 1.9894, "step": 47472 }, { "epoch": 4.870229790726302, "grad_norm": 0.05399052053689957, "learning_rate": 0.01, "loss": 1.9665, "step": 47475 }, { "epoch": 4.870537546163316, "grad_norm": 0.03817398473620415, "learning_rate": 0.01, "loss": 1.9943, "step": 47478 }, { "epoch": 4.870845301600328, "grad_norm": 0.0670650526881218, "learning_rate": 0.01, "loss": 1.9897, "step": 47481 }, { "epoch": 4.871153057037341, "grad_norm": 0.0896904319524765, "learning_rate": 0.01, "loss": 1.9982, "step": 47484 }, { "epoch": 4.8714608124743535, "grad_norm": 0.10440392047166824, "learning_rate": 0.01, "loss": 1.9863, "step": 47487 }, { "epoch": 4.871768567911366, "grad_norm": 0.0894385576248169, "learning_rate": 0.01, "loss": 1.9936, "step": 47490 }, { "epoch": 4.872076323348379, "grad_norm": 0.05117709934711456, "learning_rate": 0.01, "loss": 2.0049, "step": 47493 }, { "epoch": 4.872384078785392, "grad_norm": 0.03292097896337509, "learning_rate": 0.01, "loss": 1.9781, "step": 47496 }, { "epoch": 4.872691834222405, "grad_norm": 0.032028477638959885, "learning_rate": 0.01, "loss": 1.9925, "step": 47499 }, { "epoch": 4.872999589659417, "grad_norm": 0.046977583318948746, "learning_rate": 0.01, "loss": 2.0151, "step": 47502 }, { "epoch": 4.87330734509643, "grad_norm": 0.11463116854429245, "learning_rate": 0.01, "loss": 1.9807, "step": 47505 }, { "epoch": 4.873615100533443, "grad_norm": 0.031687479466199875, "learning_rate": 0.01, "loss": 1.9652, "step": 47508 }, { "epoch": 4.873922855970456, "grad_norm": 0.04342150315642357, "learning_rate": 0.01, "loss": 2.0073, "step": 47511 }, { "epoch": 4.8742306114074685, "grad_norm": 0.03970487043261528, "learning_rate": 0.01, "loss": 1.9922, "step": 47514 }, { "epoch": 4.874538366844481, "grad_norm": 0.05668526887893677, "learning_rate": 0.01, "loss": 1.9907, "step": 47517 }, { "epoch": 4.874846122281493, "grad_norm": 0.05608886480331421, "learning_rate": 0.01, "loss": 1.9869, "step": 47520 }, { "epoch": 4.875153877718507, "grad_norm": 0.13083675503730774, "learning_rate": 0.01, "loss": 1.9794, "step": 47523 }, { "epoch": 4.875461633155519, "grad_norm": 0.040449466556310654, "learning_rate": 0.01, "loss": 1.9718, "step": 47526 }, { "epoch": 4.8757693885925315, "grad_norm": 0.08093597739934921, "learning_rate": 0.01, "loss": 1.9938, "step": 47529 }, { "epoch": 4.876077144029544, "grad_norm": 0.08471735566854477, "learning_rate": 0.01, "loss": 1.9785, "step": 47532 }, { "epoch": 4.876384899466557, "grad_norm": 0.05568789690732956, "learning_rate": 0.01, "loss": 1.9748, "step": 47535 }, { "epoch": 4.87669265490357, "grad_norm": 0.07529760152101517, "learning_rate": 0.01, "loss": 1.9734, "step": 47538 }, { "epoch": 4.877000410340583, "grad_norm": 0.06487952917814255, "learning_rate": 0.01, "loss": 1.9598, "step": 47541 }, { "epoch": 4.877308165777595, "grad_norm": 0.049183983355760574, "learning_rate": 0.01, "loss": 1.9761, "step": 47544 }, { "epoch": 4.877615921214608, "grad_norm": 0.08050165325403214, "learning_rate": 0.01, "loss": 1.9767, "step": 47547 }, { "epoch": 4.877923676651621, "grad_norm": 0.061584021896123886, "learning_rate": 0.01, "loss": 1.9966, "step": 47550 }, { "epoch": 4.878231432088634, "grad_norm": 0.052319157868623734, "learning_rate": 0.01, "loss": 2.01, "step": 47553 }, { "epoch": 4.8785391875256465, "grad_norm": 0.059711821377277374, "learning_rate": 0.01, "loss": 1.9743, "step": 47556 }, { "epoch": 4.878846942962659, "grad_norm": 0.10813643783330917, "learning_rate": 0.01, "loss": 2.0095, "step": 47559 }, { "epoch": 4.879154698399672, "grad_norm": 0.0562468096613884, "learning_rate": 0.01, "loss": 1.9917, "step": 47562 }, { "epoch": 4.879462453836684, "grad_norm": 0.14491213858127594, "learning_rate": 0.01, "loss": 1.9776, "step": 47565 }, { "epoch": 4.879770209273698, "grad_norm": 0.06594674289226532, "learning_rate": 0.01, "loss": 1.9815, "step": 47568 }, { "epoch": 4.8800779647107095, "grad_norm": 0.05883457511663437, "learning_rate": 0.01, "loss": 2.01, "step": 47571 }, { "epoch": 4.880385720147722, "grad_norm": 0.049259252846241, "learning_rate": 0.01, "loss": 1.987, "step": 47574 }, { "epoch": 4.880693475584735, "grad_norm": 0.0817684531211853, "learning_rate": 0.01, "loss": 1.9907, "step": 47577 }, { "epoch": 4.881001231021748, "grad_norm": 0.07413051277399063, "learning_rate": 0.01, "loss": 2.0177, "step": 47580 }, { "epoch": 4.881308986458761, "grad_norm": 0.03806111216545105, "learning_rate": 0.01, "loss": 1.9924, "step": 47583 }, { "epoch": 4.881616741895773, "grad_norm": 0.11986687034368515, "learning_rate": 0.01, "loss": 1.9963, "step": 47586 }, { "epoch": 4.881924497332786, "grad_norm": 0.09377092123031616, "learning_rate": 0.01, "loss": 1.9818, "step": 47589 }, { "epoch": 4.882232252769799, "grad_norm": 0.05205828696489334, "learning_rate": 0.01, "loss": 1.9777, "step": 47592 }, { "epoch": 4.882540008206812, "grad_norm": 0.07014745473861694, "learning_rate": 0.01, "loss": 1.9842, "step": 47595 }, { "epoch": 4.882847763643825, "grad_norm": 0.04630004242062569, "learning_rate": 0.01, "loss": 1.9782, "step": 47598 }, { "epoch": 4.883155519080837, "grad_norm": 0.06072268262505531, "learning_rate": 0.01, "loss": 1.9826, "step": 47601 }, { "epoch": 4.88346327451785, "grad_norm": 0.04583202302455902, "learning_rate": 0.01, "loss": 1.9966, "step": 47604 }, { "epoch": 4.883771029954863, "grad_norm": 0.10760218650102615, "learning_rate": 0.01, "loss": 1.9954, "step": 47607 }, { "epoch": 4.884078785391875, "grad_norm": 0.09803519397974014, "learning_rate": 0.01, "loss": 2.0101, "step": 47610 }, { "epoch": 4.8843865408288885, "grad_norm": 0.05660640448331833, "learning_rate": 0.01, "loss": 1.9988, "step": 47613 }, { "epoch": 4.8846942962659, "grad_norm": 0.07300207018852234, "learning_rate": 0.01, "loss": 1.9966, "step": 47616 }, { "epoch": 4.885002051702913, "grad_norm": 0.05942653492093086, "learning_rate": 0.01, "loss": 1.9922, "step": 47619 }, { "epoch": 4.885309807139926, "grad_norm": 0.0775621309876442, "learning_rate": 0.01, "loss": 2.0027, "step": 47622 }, { "epoch": 4.885617562576939, "grad_norm": 0.08601850271224976, "learning_rate": 0.01, "loss": 1.9876, "step": 47625 }, { "epoch": 4.8859253180139515, "grad_norm": 0.07847361266613007, "learning_rate": 0.01, "loss": 1.9886, "step": 47628 }, { "epoch": 4.886233073450964, "grad_norm": 0.06876290589570999, "learning_rate": 0.01, "loss": 1.9969, "step": 47631 }, { "epoch": 4.886540828887977, "grad_norm": 0.06279505789279938, "learning_rate": 0.01, "loss": 1.9865, "step": 47634 }, { "epoch": 4.88684858432499, "grad_norm": 0.0461944080889225, "learning_rate": 0.01, "loss": 1.9942, "step": 47637 }, { "epoch": 4.887156339762003, "grad_norm": 0.08709289878606796, "learning_rate": 0.01, "loss": 1.9779, "step": 47640 }, { "epoch": 4.887464095199015, "grad_norm": 0.07311128079891205, "learning_rate": 0.01, "loss": 1.9975, "step": 47643 }, { "epoch": 4.887771850636028, "grad_norm": 0.11558615416288376, "learning_rate": 0.01, "loss": 1.9976, "step": 47646 }, { "epoch": 4.888079606073041, "grad_norm": 0.044442903250455856, "learning_rate": 0.01, "loss": 1.9826, "step": 47649 }, { "epoch": 4.888387361510054, "grad_norm": 0.03592165187001228, "learning_rate": 0.01, "loss": 1.997, "step": 47652 }, { "epoch": 4.888695116947066, "grad_norm": 0.053503166884183884, "learning_rate": 0.01, "loss": 2.0183, "step": 47655 }, { "epoch": 4.889002872384079, "grad_norm": 0.05185321345925331, "learning_rate": 0.01, "loss": 1.9816, "step": 47658 }, { "epoch": 4.889310627821091, "grad_norm": 0.10305842012166977, "learning_rate": 0.01, "loss": 1.9621, "step": 47661 }, { "epoch": 4.889618383258104, "grad_norm": 0.04605614393949509, "learning_rate": 0.01, "loss": 2.0359, "step": 47664 }, { "epoch": 4.889926138695117, "grad_norm": 0.08601140230894089, "learning_rate": 0.01, "loss": 1.9911, "step": 47667 }, { "epoch": 4.8902338941321295, "grad_norm": 0.039195116609334946, "learning_rate": 0.01, "loss": 1.978, "step": 47670 }, { "epoch": 4.890541649569142, "grad_norm": 0.04058361053466797, "learning_rate": 0.01, "loss": 1.9952, "step": 47673 }, { "epoch": 4.890849405006155, "grad_norm": 0.049590058624744415, "learning_rate": 0.01, "loss": 1.9788, "step": 47676 }, { "epoch": 4.891157160443168, "grad_norm": 0.05253410339355469, "learning_rate": 0.01, "loss": 2.0, "step": 47679 }, { "epoch": 4.891464915880181, "grad_norm": 0.05858082324266434, "learning_rate": 0.01, "loss": 1.9868, "step": 47682 }, { "epoch": 4.891772671317193, "grad_norm": 0.05094842612743378, "learning_rate": 0.01, "loss": 1.9743, "step": 47685 }, { "epoch": 4.892080426754206, "grad_norm": 0.0539892241358757, "learning_rate": 0.01, "loss": 1.9765, "step": 47688 }, { "epoch": 4.892388182191219, "grad_norm": 0.15945689380168915, "learning_rate": 0.01, "loss": 1.9983, "step": 47691 }, { "epoch": 4.892695937628232, "grad_norm": 0.03433748334646225, "learning_rate": 0.01, "loss": 1.9898, "step": 47694 }, { "epoch": 4.8930036930652445, "grad_norm": 0.047357238829135895, "learning_rate": 0.01, "loss": 1.9801, "step": 47697 }, { "epoch": 4.893311448502256, "grad_norm": 0.05974509194493294, "learning_rate": 0.01, "loss": 2.0253, "step": 47700 }, { "epoch": 4.89361920393927, "grad_norm": 0.05918588861823082, "learning_rate": 0.01, "loss": 1.9797, "step": 47703 }, { "epoch": 4.893926959376282, "grad_norm": 0.04418300464749336, "learning_rate": 0.01, "loss": 2.0014, "step": 47706 }, { "epoch": 4.894234714813295, "grad_norm": 0.034721486270427704, "learning_rate": 0.01, "loss": 2.0021, "step": 47709 }, { "epoch": 4.8945424702503075, "grad_norm": 0.08422865718603134, "learning_rate": 0.01, "loss": 1.976, "step": 47712 }, { "epoch": 4.89485022568732, "grad_norm": 0.12269464135169983, "learning_rate": 0.01, "loss": 1.9866, "step": 47715 }, { "epoch": 4.895157981124333, "grad_norm": 0.06587913632392883, "learning_rate": 0.01, "loss": 1.999, "step": 47718 }, { "epoch": 4.895465736561346, "grad_norm": 0.12484870105981827, "learning_rate": 0.01, "loss": 1.9895, "step": 47721 }, { "epoch": 4.895773491998359, "grad_norm": 0.07100055366754532, "learning_rate": 0.01, "loss": 1.985, "step": 47724 }, { "epoch": 4.896081247435371, "grad_norm": 0.03272338956594467, "learning_rate": 0.01, "loss": 1.9816, "step": 47727 }, { "epoch": 4.896389002872384, "grad_norm": 0.03650757670402527, "learning_rate": 0.01, "loss": 2.0009, "step": 47730 }, { "epoch": 4.896696758309397, "grad_norm": 0.04600701853632927, "learning_rate": 0.01, "loss": 2.0026, "step": 47733 }, { "epoch": 4.89700451374641, "grad_norm": 0.06520526856184006, "learning_rate": 0.01, "loss": 1.9877, "step": 47736 }, { "epoch": 4.8973122691834226, "grad_norm": 0.04096482694149017, "learning_rate": 0.01, "loss": 1.9706, "step": 47739 }, { "epoch": 4.897620024620435, "grad_norm": 0.03720017522573471, "learning_rate": 0.01, "loss": 1.9745, "step": 47742 }, { "epoch": 4.897927780057447, "grad_norm": 0.04412877559661865, "learning_rate": 0.01, "loss": 1.9969, "step": 47745 }, { "epoch": 4.898235535494461, "grad_norm": 0.03968506306409836, "learning_rate": 0.01, "loss": 1.9944, "step": 47748 }, { "epoch": 4.898543290931473, "grad_norm": 0.10775657743215561, "learning_rate": 0.01, "loss": 1.9709, "step": 47751 }, { "epoch": 4.898851046368486, "grad_norm": 0.05303318426012993, "learning_rate": 0.01, "loss": 1.9536, "step": 47754 }, { "epoch": 4.899158801805498, "grad_norm": 0.0775865837931633, "learning_rate": 0.01, "loss": 1.9916, "step": 47757 }, { "epoch": 4.899466557242511, "grad_norm": 0.10708294063806534, "learning_rate": 0.01, "loss": 1.972, "step": 47760 }, { "epoch": 4.899774312679524, "grad_norm": 0.05531914532184601, "learning_rate": 0.01, "loss": 2.0002, "step": 47763 }, { "epoch": 4.900082068116537, "grad_norm": 0.057492613792419434, "learning_rate": 0.01, "loss": 2.0103, "step": 47766 }, { "epoch": 4.9003898235535495, "grad_norm": 0.04654363915324211, "learning_rate": 0.01, "loss": 1.966, "step": 47769 }, { "epoch": 4.900697578990562, "grad_norm": 0.060792889446020126, "learning_rate": 0.01, "loss": 1.9836, "step": 47772 }, { "epoch": 4.901005334427575, "grad_norm": 0.053037889301776886, "learning_rate": 0.01, "loss": 2.0002, "step": 47775 }, { "epoch": 4.901313089864588, "grad_norm": 0.07746932655572891, "learning_rate": 0.01, "loss": 1.9738, "step": 47778 }, { "epoch": 4.901620845301601, "grad_norm": 0.07753589004278183, "learning_rate": 0.01, "loss": 2.0094, "step": 47781 }, { "epoch": 4.901928600738613, "grad_norm": 0.07140833884477615, "learning_rate": 0.01, "loss": 1.9908, "step": 47784 }, { "epoch": 4.902236356175626, "grad_norm": 0.12805962562561035, "learning_rate": 0.01, "loss": 1.984, "step": 47787 }, { "epoch": 4.902544111612638, "grad_norm": 0.10444001108407974, "learning_rate": 0.01, "loss": 1.9788, "step": 47790 }, { "epoch": 4.902851867049652, "grad_norm": 0.13313624262809753, "learning_rate": 0.01, "loss": 1.9913, "step": 47793 }, { "epoch": 4.903159622486664, "grad_norm": 0.0864720493555069, "learning_rate": 0.01, "loss": 2.0043, "step": 47796 }, { "epoch": 4.903467377923676, "grad_norm": 0.05329997465014458, "learning_rate": 0.01, "loss": 2.0022, "step": 47799 }, { "epoch": 4.903775133360689, "grad_norm": 0.053774427622556686, "learning_rate": 0.01, "loss": 1.9824, "step": 47802 }, { "epoch": 4.904082888797702, "grad_norm": 0.06760123372077942, "learning_rate": 0.01, "loss": 1.9593, "step": 47805 }, { "epoch": 4.904390644234715, "grad_norm": 0.06961317360401154, "learning_rate": 0.01, "loss": 1.9785, "step": 47808 }, { "epoch": 4.9046983996717275, "grad_norm": 0.05297344923019409, "learning_rate": 0.01, "loss": 1.9786, "step": 47811 }, { "epoch": 4.90500615510874, "grad_norm": 0.06930317729711533, "learning_rate": 0.01, "loss": 2.0076, "step": 47814 }, { "epoch": 4.905313910545753, "grad_norm": 0.039554011076688766, "learning_rate": 0.01, "loss": 1.9833, "step": 47817 }, { "epoch": 4.905621665982766, "grad_norm": 0.03862786293029785, "learning_rate": 0.01, "loss": 1.9911, "step": 47820 }, { "epoch": 4.905929421419779, "grad_norm": 0.03559514880180359, "learning_rate": 0.01, "loss": 1.9733, "step": 47823 }, { "epoch": 4.906237176856791, "grad_norm": 0.11802364140748978, "learning_rate": 0.01, "loss": 2.0073, "step": 47826 }, { "epoch": 4.906544932293804, "grad_norm": 0.049168869853019714, "learning_rate": 0.01, "loss": 1.9787, "step": 47829 }, { "epoch": 4.906852687730817, "grad_norm": 0.07636560499668121, "learning_rate": 0.01, "loss": 1.9821, "step": 47832 }, { "epoch": 4.907160443167829, "grad_norm": 0.08972848206758499, "learning_rate": 0.01, "loss": 1.9712, "step": 47835 }, { "epoch": 4.9074681986048425, "grad_norm": 0.06916697323322296, "learning_rate": 0.01, "loss": 1.9965, "step": 47838 }, { "epoch": 4.907775954041854, "grad_norm": 0.058684833347797394, "learning_rate": 0.01, "loss": 1.9866, "step": 47841 }, { "epoch": 4.908083709478867, "grad_norm": 0.045184120535850525, "learning_rate": 0.01, "loss": 1.9579, "step": 47844 }, { "epoch": 4.90839146491588, "grad_norm": 0.11755534261465073, "learning_rate": 0.01, "loss": 1.9775, "step": 47847 }, { "epoch": 4.908699220352893, "grad_norm": 0.07570980489253998, "learning_rate": 0.01, "loss": 1.9835, "step": 47850 }, { "epoch": 4.9090069757899055, "grad_norm": 0.050079140812158585, "learning_rate": 0.01, "loss": 1.9986, "step": 47853 }, { "epoch": 4.909314731226918, "grad_norm": 0.12019573897123337, "learning_rate": 0.01, "loss": 1.9825, "step": 47856 }, { "epoch": 4.909622486663931, "grad_norm": 0.10437455028295517, "learning_rate": 0.01, "loss": 1.9902, "step": 47859 }, { "epoch": 4.909930242100944, "grad_norm": 0.05818986892700195, "learning_rate": 0.01, "loss": 1.9871, "step": 47862 }, { "epoch": 4.910237997537957, "grad_norm": 0.04473674297332764, "learning_rate": 0.01, "loss": 1.9964, "step": 47865 }, { "epoch": 4.910545752974969, "grad_norm": 0.07120556384325027, "learning_rate": 0.01, "loss": 1.9877, "step": 47868 }, { "epoch": 4.910853508411982, "grad_norm": 0.0661742091178894, "learning_rate": 0.01, "loss": 1.9813, "step": 47871 }, { "epoch": 4.911161263848995, "grad_norm": 0.07108204811811447, "learning_rate": 0.01, "loss": 1.9847, "step": 47874 }, { "epoch": 4.911469019286008, "grad_norm": 0.09054633975028992, "learning_rate": 0.01, "loss": 2.0145, "step": 47877 }, { "epoch": 4.91177677472302, "grad_norm": 0.13030128180980682, "learning_rate": 0.01, "loss": 1.9726, "step": 47880 }, { "epoch": 4.912084530160033, "grad_norm": 0.15652155876159668, "learning_rate": 0.01, "loss": 2.0049, "step": 47883 }, { "epoch": 4.912392285597045, "grad_norm": 0.10536278784275055, "learning_rate": 0.01, "loss": 1.9871, "step": 47886 }, { "epoch": 4.912700041034058, "grad_norm": 0.0899362787604332, "learning_rate": 0.01, "loss": 1.9676, "step": 47889 }, { "epoch": 4.913007796471071, "grad_norm": 0.057359661906957626, "learning_rate": 0.01, "loss": 1.9633, "step": 47892 }, { "epoch": 4.913315551908084, "grad_norm": 0.04800909757614136, "learning_rate": 0.01, "loss": 1.994, "step": 47895 }, { "epoch": 4.913623307345096, "grad_norm": 0.04786820337176323, "learning_rate": 0.01, "loss": 2.002, "step": 47898 }, { "epoch": 4.913931062782109, "grad_norm": 0.04169714078307152, "learning_rate": 0.01, "loss": 1.9764, "step": 47901 }, { "epoch": 4.914238818219122, "grad_norm": 0.04943560063838959, "learning_rate": 0.01, "loss": 1.9579, "step": 47904 }, { "epoch": 4.914546573656135, "grad_norm": 0.06505610793828964, "learning_rate": 0.01, "loss": 2.0132, "step": 47907 }, { "epoch": 4.9148543290931475, "grad_norm": 0.057665206491947174, "learning_rate": 0.01, "loss": 1.9838, "step": 47910 }, { "epoch": 4.91516208453016, "grad_norm": 0.060052540153265, "learning_rate": 0.01, "loss": 1.989, "step": 47913 }, { "epoch": 4.915469839967173, "grad_norm": 0.062361933290958405, "learning_rate": 0.01, "loss": 1.9894, "step": 47916 }, { "epoch": 4.915777595404186, "grad_norm": 0.15210957825183868, "learning_rate": 0.01, "loss": 1.9865, "step": 47919 }, { "epoch": 4.916085350841199, "grad_norm": 0.05437038466334343, "learning_rate": 0.01, "loss": 1.9967, "step": 47922 }, { "epoch": 4.9163931062782105, "grad_norm": 0.07012643665075302, "learning_rate": 0.01, "loss": 1.9975, "step": 47925 }, { "epoch": 4.916700861715223, "grad_norm": 0.056515853852033615, "learning_rate": 0.01, "loss": 1.9873, "step": 47928 }, { "epoch": 4.917008617152236, "grad_norm": 0.13166914880275726, "learning_rate": 0.01, "loss": 1.9749, "step": 47931 }, { "epoch": 4.917316372589249, "grad_norm": 0.05462309718132019, "learning_rate": 0.01, "loss": 1.9831, "step": 47934 }, { "epoch": 4.917624128026262, "grad_norm": 0.0725654810667038, "learning_rate": 0.01, "loss": 1.9991, "step": 47937 }, { "epoch": 4.917931883463274, "grad_norm": 0.09769755601882935, "learning_rate": 0.01, "loss": 1.9826, "step": 47940 }, { "epoch": 4.918239638900287, "grad_norm": 0.10543838888406754, "learning_rate": 0.01, "loss": 2.015, "step": 47943 }, { "epoch": 4.9185473943373, "grad_norm": 0.08627963066101074, "learning_rate": 0.01, "loss": 1.9588, "step": 47946 }, { "epoch": 4.918855149774313, "grad_norm": 0.05162891000509262, "learning_rate": 0.01, "loss": 2.0049, "step": 47949 }, { "epoch": 4.9191629052113255, "grad_norm": 0.040748897939920425, "learning_rate": 0.01, "loss": 2.0034, "step": 47952 }, { "epoch": 4.919470660648338, "grad_norm": 0.042143408209085464, "learning_rate": 0.01, "loss": 1.9958, "step": 47955 }, { "epoch": 4.919778416085351, "grad_norm": 0.035993028432130814, "learning_rate": 0.01, "loss": 1.9651, "step": 47958 }, { "epoch": 4.920086171522364, "grad_norm": 0.13255073130130768, "learning_rate": 0.01, "loss": 1.9861, "step": 47961 }, { "epoch": 4.920393926959377, "grad_norm": 0.06527701765298843, "learning_rate": 0.01, "loss": 2.0059, "step": 47964 }, { "epoch": 4.920701682396389, "grad_norm": 0.08764603734016418, "learning_rate": 0.01, "loss": 1.9612, "step": 47967 }, { "epoch": 4.921009437833401, "grad_norm": 0.11476396769285202, "learning_rate": 0.01, "loss": 1.9796, "step": 47970 }, { "epoch": 4.921317193270414, "grad_norm": 0.11724267154932022, "learning_rate": 0.01, "loss": 1.9849, "step": 47973 }, { "epoch": 4.921624948707427, "grad_norm": 0.06371249258518219, "learning_rate": 0.01, "loss": 1.9996, "step": 47976 }, { "epoch": 4.92193270414444, "grad_norm": 0.05650569498538971, "learning_rate": 0.01, "loss": 1.9873, "step": 47979 }, { "epoch": 4.922240459581452, "grad_norm": 0.05101975426077843, "learning_rate": 0.01, "loss": 1.9909, "step": 47982 }, { "epoch": 4.922548215018465, "grad_norm": 0.06948293745517731, "learning_rate": 0.01, "loss": 1.9667, "step": 47985 }, { "epoch": 4.922855970455478, "grad_norm": 0.05776602029800415, "learning_rate": 0.01, "loss": 1.9918, "step": 47988 }, { "epoch": 4.923163725892491, "grad_norm": 0.03927835077047348, "learning_rate": 0.01, "loss": 1.9804, "step": 47991 }, { "epoch": 4.9234714813295035, "grad_norm": 0.07943009585142136, "learning_rate": 0.01, "loss": 1.9817, "step": 47994 }, { "epoch": 4.923779236766516, "grad_norm": 0.07352134585380554, "learning_rate": 0.01, "loss": 2.0015, "step": 47997 }, { "epoch": 4.924086992203529, "grad_norm": 0.07295040786266327, "learning_rate": 0.01, "loss": 1.9583, "step": 48000 }, { "epoch": 4.924394747640542, "grad_norm": 0.08415599912405014, "learning_rate": 0.01, "loss": 1.9878, "step": 48003 }, { "epoch": 4.924702503077555, "grad_norm": 0.0890984758734703, "learning_rate": 0.01, "loss": 1.9735, "step": 48006 }, { "epoch": 4.925010258514567, "grad_norm": 0.12055069953203201, "learning_rate": 0.01, "loss": 1.9916, "step": 48009 }, { "epoch": 4.92531801395158, "grad_norm": 0.0676504373550415, "learning_rate": 0.01, "loss": 1.9898, "step": 48012 }, { "epoch": 4.925625769388592, "grad_norm": 0.047612790018320084, "learning_rate": 0.01, "loss": 1.9818, "step": 48015 }, { "epoch": 4.925933524825605, "grad_norm": 0.06470039486885071, "learning_rate": 0.01, "loss": 1.9904, "step": 48018 }, { "epoch": 4.926241280262618, "grad_norm": 0.07510625571012497, "learning_rate": 0.01, "loss": 1.9749, "step": 48021 }, { "epoch": 4.92654903569963, "grad_norm": 0.07793190330266953, "learning_rate": 0.01, "loss": 1.9749, "step": 48024 }, { "epoch": 4.926856791136643, "grad_norm": 0.08655183017253876, "learning_rate": 0.01, "loss": 1.9609, "step": 48027 }, { "epoch": 4.927164546573656, "grad_norm": 0.07441940903663635, "learning_rate": 0.01, "loss": 1.9847, "step": 48030 }, { "epoch": 4.927472302010669, "grad_norm": 0.04783869534730911, "learning_rate": 0.01, "loss": 1.9981, "step": 48033 }, { "epoch": 4.9277800574476815, "grad_norm": 0.13127107918262482, "learning_rate": 0.01, "loss": 2.0117, "step": 48036 }, { "epoch": 4.928087812884694, "grad_norm": 0.05267849192023277, "learning_rate": 0.01, "loss": 1.9726, "step": 48039 }, { "epoch": 4.928395568321707, "grad_norm": 0.04276135936379433, "learning_rate": 0.01, "loss": 1.9768, "step": 48042 }, { "epoch": 4.92870332375872, "grad_norm": 0.04237223416566849, "learning_rate": 0.01, "loss": 1.989, "step": 48045 }, { "epoch": 4.929011079195733, "grad_norm": 0.052018001675605774, "learning_rate": 0.01, "loss": 1.9783, "step": 48048 }, { "epoch": 4.9293188346327454, "grad_norm": 0.05408487468957901, "learning_rate": 0.01, "loss": 1.9997, "step": 48051 }, { "epoch": 4.929626590069758, "grad_norm": 0.07300473004579544, "learning_rate": 0.01, "loss": 1.9986, "step": 48054 }, { "epoch": 4.929934345506771, "grad_norm": 0.08607280254364014, "learning_rate": 0.01, "loss": 1.9952, "step": 48057 }, { "epoch": 4.930242100943783, "grad_norm": 0.061927784234285355, "learning_rate": 0.01, "loss": 1.9687, "step": 48060 }, { "epoch": 4.930549856380796, "grad_norm": 0.08371556550264359, "learning_rate": 0.01, "loss": 1.9692, "step": 48063 }, { "epoch": 4.9308576118178085, "grad_norm": 0.03664041683077812, "learning_rate": 0.01, "loss": 1.997, "step": 48066 }, { "epoch": 4.931165367254821, "grad_norm": 0.06964648514986038, "learning_rate": 0.01, "loss": 1.9893, "step": 48069 }, { "epoch": 4.931473122691834, "grad_norm": 0.07257051765918732, "learning_rate": 0.01, "loss": 1.9869, "step": 48072 }, { "epoch": 4.931780878128847, "grad_norm": 0.06781255453824997, "learning_rate": 0.01, "loss": 1.9678, "step": 48075 }, { "epoch": 4.93208863356586, "grad_norm": 0.04258018732070923, "learning_rate": 0.01, "loss": 1.992, "step": 48078 }, { "epoch": 4.932396389002872, "grad_norm": 0.05434480682015419, "learning_rate": 0.01, "loss": 1.9782, "step": 48081 }, { "epoch": 4.932704144439885, "grad_norm": 0.03577110916376114, "learning_rate": 0.01, "loss": 1.9817, "step": 48084 }, { "epoch": 4.933011899876898, "grad_norm": 0.11260990798473358, "learning_rate": 0.01, "loss": 2.0087, "step": 48087 }, { "epoch": 4.933319655313911, "grad_norm": 0.09070918709039688, "learning_rate": 0.01, "loss": 1.9969, "step": 48090 }, { "epoch": 4.9336274107509235, "grad_norm": 0.06896468997001648, "learning_rate": 0.01, "loss": 1.9965, "step": 48093 }, { "epoch": 4.933935166187936, "grad_norm": 0.050933387130498886, "learning_rate": 0.01, "loss": 1.9885, "step": 48096 }, { "epoch": 4.934242921624949, "grad_norm": 0.04397554323077202, "learning_rate": 0.01, "loss": 1.9988, "step": 48099 }, { "epoch": 4.934550677061962, "grad_norm": 0.03253142163157463, "learning_rate": 0.01, "loss": 1.9974, "step": 48102 }, { "epoch": 4.934858432498974, "grad_norm": 0.051716264337301254, "learning_rate": 0.01, "loss": 1.9985, "step": 48105 }, { "epoch": 4.9351661879359865, "grad_norm": 0.04921911656856537, "learning_rate": 0.01, "loss": 1.988, "step": 48108 }, { "epoch": 4.935473943372999, "grad_norm": 0.05059951916337013, "learning_rate": 0.01, "loss": 1.989, "step": 48111 }, { "epoch": 4.935781698810012, "grad_norm": 0.11451517045497894, "learning_rate": 0.01, "loss": 1.989, "step": 48114 }, { "epoch": 4.936089454247025, "grad_norm": 0.06287554651498795, "learning_rate": 0.01, "loss": 1.9658, "step": 48117 }, { "epoch": 4.936397209684038, "grad_norm": 0.11319955438375473, "learning_rate": 0.01, "loss": 1.9907, "step": 48120 }, { "epoch": 4.93670496512105, "grad_norm": 0.04149606078863144, "learning_rate": 0.01, "loss": 1.9655, "step": 48123 }, { "epoch": 4.937012720558063, "grad_norm": 0.044562436640262604, "learning_rate": 0.01, "loss": 1.9738, "step": 48126 }, { "epoch": 4.937320475995076, "grad_norm": 0.04073583707213402, "learning_rate": 0.01, "loss": 1.9959, "step": 48129 }, { "epoch": 4.937628231432089, "grad_norm": 0.05489436164498329, "learning_rate": 0.01, "loss": 1.9708, "step": 48132 }, { "epoch": 4.9379359868691015, "grad_norm": 0.07751783728599548, "learning_rate": 0.01, "loss": 1.9892, "step": 48135 }, { "epoch": 4.938243742306114, "grad_norm": 0.06630382686853409, "learning_rate": 0.01, "loss": 1.9795, "step": 48138 }, { "epoch": 4.938551497743127, "grad_norm": 0.07446899265050888, "learning_rate": 0.01, "loss": 1.9807, "step": 48141 }, { "epoch": 4.93885925318014, "grad_norm": 0.0776534453034401, "learning_rate": 0.01, "loss": 1.9972, "step": 48144 }, { "epoch": 4.939167008617153, "grad_norm": 0.07835227996110916, "learning_rate": 0.01, "loss": 2.0016, "step": 48147 }, { "epoch": 4.9394747640541645, "grad_norm": 0.0798632800579071, "learning_rate": 0.01, "loss": 1.9653, "step": 48150 }, { "epoch": 4.939782519491177, "grad_norm": 0.044298071414232254, "learning_rate": 0.01, "loss": 2.0051, "step": 48153 }, { "epoch": 4.94009027492819, "grad_norm": 0.10383112728595734, "learning_rate": 0.01, "loss": 1.9804, "step": 48156 }, { "epoch": 4.940398030365203, "grad_norm": 0.12793728709220886, "learning_rate": 0.01, "loss": 1.9772, "step": 48159 }, { "epoch": 4.940705785802216, "grad_norm": 0.038207195699214935, "learning_rate": 0.01, "loss": 1.9651, "step": 48162 }, { "epoch": 4.941013541239228, "grad_norm": 0.0840001255273819, "learning_rate": 0.01, "loss": 1.977, "step": 48165 }, { "epoch": 4.941321296676241, "grad_norm": 0.11788827925920486, "learning_rate": 0.01, "loss": 1.9909, "step": 48168 }, { "epoch": 4.941629052113254, "grad_norm": 0.05462497100234032, "learning_rate": 0.01, "loss": 1.9857, "step": 48171 }, { "epoch": 4.941936807550267, "grad_norm": 0.0785188302397728, "learning_rate": 0.01, "loss": 1.9887, "step": 48174 }, { "epoch": 4.9422445629872795, "grad_norm": 0.09669160097837448, "learning_rate": 0.01, "loss": 1.9811, "step": 48177 }, { "epoch": 4.942552318424292, "grad_norm": 0.0880751982331276, "learning_rate": 0.01, "loss": 1.9953, "step": 48180 }, { "epoch": 4.942860073861305, "grad_norm": 0.04356632009148598, "learning_rate": 0.01, "loss": 1.9849, "step": 48183 }, { "epoch": 4.943167829298318, "grad_norm": 0.055611494928598404, "learning_rate": 0.01, "loss": 1.9936, "step": 48186 }, { "epoch": 4.943475584735331, "grad_norm": 0.05191848427057266, "learning_rate": 0.01, "loss": 1.9677, "step": 48189 }, { "epoch": 4.943783340172343, "grad_norm": 0.06415262818336487, "learning_rate": 0.01, "loss": 1.9893, "step": 48192 }, { "epoch": 4.944091095609355, "grad_norm": 0.04284198582172394, "learning_rate": 0.01, "loss": 1.999, "step": 48195 }, { "epoch": 4.944398851046368, "grad_norm": 0.03310967609286308, "learning_rate": 0.01, "loss": 1.9865, "step": 48198 }, { "epoch": 4.944706606483381, "grad_norm": 0.11449988186359406, "learning_rate": 0.01, "loss": 1.9771, "step": 48201 }, { "epoch": 4.945014361920394, "grad_norm": 0.0641254335641861, "learning_rate": 0.01, "loss": 1.9854, "step": 48204 }, { "epoch": 4.9453221173574065, "grad_norm": 0.08665329962968826, "learning_rate": 0.01, "loss": 1.9855, "step": 48207 }, { "epoch": 4.945629872794419, "grad_norm": 0.03855408355593681, "learning_rate": 0.01, "loss": 1.9997, "step": 48210 }, { "epoch": 4.945937628231432, "grad_norm": 0.03156152740120888, "learning_rate": 0.01, "loss": 1.9975, "step": 48213 }, { "epoch": 4.946245383668445, "grad_norm": 0.052971091121435165, "learning_rate": 0.01, "loss": 1.9712, "step": 48216 }, { "epoch": 4.946553139105458, "grad_norm": 0.04167775437235832, "learning_rate": 0.01, "loss": 1.9749, "step": 48219 }, { "epoch": 4.94686089454247, "grad_norm": 0.051760535687208176, "learning_rate": 0.01, "loss": 1.9695, "step": 48222 }, { "epoch": 4.947168649979483, "grad_norm": 0.0969996377825737, "learning_rate": 0.01, "loss": 1.9915, "step": 48225 }, { "epoch": 4.947476405416496, "grad_norm": 0.05064645782113075, "learning_rate": 0.01, "loss": 1.967, "step": 48228 }, { "epoch": 4.947784160853509, "grad_norm": 0.04742693901062012, "learning_rate": 0.01, "loss": 1.9663, "step": 48231 }, { "epoch": 4.9480919162905215, "grad_norm": 0.03507422283291817, "learning_rate": 0.01, "loss": 1.9858, "step": 48234 }, { "epoch": 4.948399671727534, "grad_norm": 0.03717927634716034, "learning_rate": 0.01, "loss": 1.9756, "step": 48237 }, { "epoch": 4.948707427164546, "grad_norm": 0.11404886096715927, "learning_rate": 0.01, "loss": 1.9924, "step": 48240 }, { "epoch": 4.949015182601559, "grad_norm": 0.05842389166355133, "learning_rate": 0.01, "loss": 1.9805, "step": 48243 }, { "epoch": 4.949322938038572, "grad_norm": 0.0430951751768589, "learning_rate": 0.01, "loss": 1.9993, "step": 48246 }, { "epoch": 4.9496306934755845, "grad_norm": 0.053793564438819885, "learning_rate": 0.01, "loss": 1.9853, "step": 48249 }, { "epoch": 4.949938448912597, "grad_norm": 0.04643286019563675, "learning_rate": 0.01, "loss": 2.0084, "step": 48252 }, { "epoch": 4.95024620434961, "grad_norm": 0.0575956366956234, "learning_rate": 0.01, "loss": 1.9927, "step": 48255 }, { "epoch": 4.950553959786623, "grad_norm": 0.05439090356230736, "learning_rate": 0.01, "loss": 1.978, "step": 48258 }, { "epoch": 4.950861715223636, "grad_norm": 0.05502607673406601, "learning_rate": 0.01, "loss": 1.9743, "step": 48261 }, { "epoch": 4.951169470660648, "grad_norm": 0.07984888553619385, "learning_rate": 0.01, "loss": 1.979, "step": 48264 }, { "epoch": 4.951477226097661, "grad_norm": 0.041320640593767166, "learning_rate": 0.01, "loss": 1.9822, "step": 48267 }, { "epoch": 4.951784981534674, "grad_norm": 0.12548021972179413, "learning_rate": 0.01, "loss": 1.9845, "step": 48270 }, { "epoch": 4.952092736971687, "grad_norm": 0.035638876259326935, "learning_rate": 0.01, "loss": 1.989, "step": 48273 }, { "epoch": 4.9524004924086995, "grad_norm": 0.13796448707580566, "learning_rate": 0.01, "loss": 2.0005, "step": 48276 }, { "epoch": 4.952708247845712, "grad_norm": 0.08978889882564545, "learning_rate": 0.01, "loss": 1.9814, "step": 48279 }, { "epoch": 4.953016003282725, "grad_norm": 0.0677703395485878, "learning_rate": 0.01, "loss": 1.9835, "step": 48282 }, { "epoch": 4.953323758719737, "grad_norm": 0.04120895639061928, "learning_rate": 0.01, "loss": 2.0018, "step": 48285 }, { "epoch": 4.95363151415675, "grad_norm": 0.043558813631534576, "learning_rate": 0.01, "loss": 1.9684, "step": 48288 }, { "epoch": 4.9539392695937625, "grad_norm": 0.03488008677959442, "learning_rate": 0.01, "loss": 1.9922, "step": 48291 }, { "epoch": 4.954247025030775, "grad_norm": 0.06290942430496216, "learning_rate": 0.01, "loss": 1.9924, "step": 48294 }, { "epoch": 4.954554780467788, "grad_norm": 0.05662504583597183, "learning_rate": 0.01, "loss": 1.9819, "step": 48297 }, { "epoch": 4.954862535904801, "grad_norm": 0.04810362681746483, "learning_rate": 0.01, "loss": 1.9855, "step": 48300 }, { "epoch": 4.955170291341814, "grad_norm": 0.06158094108104706, "learning_rate": 0.01, "loss": 1.984, "step": 48303 }, { "epoch": 4.955478046778826, "grad_norm": 0.1016242504119873, "learning_rate": 0.01, "loss": 1.9976, "step": 48306 }, { "epoch": 4.955785802215839, "grad_norm": 0.05129433050751686, "learning_rate": 0.01, "loss": 1.9674, "step": 48309 }, { "epoch": 4.956093557652852, "grad_norm": 0.09010904282331467, "learning_rate": 0.01, "loss": 1.9844, "step": 48312 }, { "epoch": 4.956401313089865, "grad_norm": 0.09995526820421219, "learning_rate": 0.01, "loss": 1.9706, "step": 48315 }, { "epoch": 4.9567090685268775, "grad_norm": 0.0642913207411766, "learning_rate": 0.01, "loss": 1.9573, "step": 48318 }, { "epoch": 4.95701682396389, "grad_norm": 0.03862082585692406, "learning_rate": 0.01, "loss": 2.0143, "step": 48321 }, { "epoch": 4.957324579400903, "grad_norm": 0.11760932207107544, "learning_rate": 0.01, "loss": 2.003, "step": 48324 }, { "epoch": 4.957632334837916, "grad_norm": 0.09624608606100082, "learning_rate": 0.01, "loss": 1.9876, "step": 48327 }, { "epoch": 4.957940090274928, "grad_norm": 0.06814394891262054, "learning_rate": 0.01, "loss": 1.9711, "step": 48330 }, { "epoch": 4.9582478457119405, "grad_norm": 0.09379391372203827, "learning_rate": 0.01, "loss": 2.0017, "step": 48333 }, { "epoch": 4.958555601148953, "grad_norm": 0.04310177266597748, "learning_rate": 0.01, "loss": 1.9869, "step": 48336 }, { "epoch": 4.958863356585966, "grad_norm": 0.048953697085380554, "learning_rate": 0.01, "loss": 1.9959, "step": 48339 }, { "epoch": 4.959171112022979, "grad_norm": 0.03562283515930176, "learning_rate": 0.01, "loss": 1.9993, "step": 48342 }, { "epoch": 4.959478867459992, "grad_norm": 0.040618475526571274, "learning_rate": 0.01, "loss": 1.9973, "step": 48345 }, { "epoch": 4.9597866228970044, "grad_norm": 0.11639663577079773, "learning_rate": 0.01, "loss": 1.979, "step": 48348 }, { "epoch": 4.960094378334017, "grad_norm": 0.07946206629276276, "learning_rate": 0.01, "loss": 2.0178, "step": 48351 }, { "epoch": 4.96040213377103, "grad_norm": 0.05358874425292015, "learning_rate": 0.01, "loss": 1.9862, "step": 48354 }, { "epoch": 4.960709889208043, "grad_norm": 0.07157052308320999, "learning_rate": 0.01, "loss": 1.9989, "step": 48357 }, { "epoch": 4.961017644645056, "grad_norm": 0.038184091448783875, "learning_rate": 0.01, "loss": 1.9805, "step": 48360 }, { "epoch": 4.961325400082068, "grad_norm": 0.04140111058950424, "learning_rate": 0.01, "loss": 1.9832, "step": 48363 }, { "epoch": 4.961633155519081, "grad_norm": 0.08905436098575592, "learning_rate": 0.01, "loss": 2.0117, "step": 48366 }, { "epoch": 4.961940910956094, "grad_norm": 0.07200294733047485, "learning_rate": 0.01, "loss": 1.9858, "step": 48369 }, { "epoch": 4.962248666393107, "grad_norm": 0.0786944180727005, "learning_rate": 0.01, "loss": 2.0012, "step": 48372 }, { "epoch": 4.962556421830119, "grad_norm": 0.08718118071556091, "learning_rate": 0.01, "loss": 2.0058, "step": 48375 }, { "epoch": 4.962864177267131, "grad_norm": 0.10729999840259552, "learning_rate": 0.01, "loss": 1.9669, "step": 48378 }, { "epoch": 4.963171932704144, "grad_norm": 0.05224734544754028, "learning_rate": 0.01, "loss": 2.0007, "step": 48381 }, { "epoch": 4.963479688141157, "grad_norm": 0.10347335785627365, "learning_rate": 0.01, "loss": 1.9887, "step": 48384 }, { "epoch": 4.96378744357817, "grad_norm": 0.09103485941886902, "learning_rate": 0.01, "loss": 1.9813, "step": 48387 }, { "epoch": 4.9640951990151825, "grad_norm": 0.05988696217536926, "learning_rate": 0.01, "loss": 1.9608, "step": 48390 }, { "epoch": 4.964402954452195, "grad_norm": 0.05333305522799492, "learning_rate": 0.01, "loss": 1.9836, "step": 48393 }, { "epoch": 4.964710709889208, "grad_norm": 0.039692364633083344, "learning_rate": 0.01, "loss": 1.9938, "step": 48396 }, { "epoch": 4.965018465326221, "grad_norm": 0.048610180616378784, "learning_rate": 0.01, "loss": 1.9839, "step": 48399 }, { "epoch": 4.965326220763234, "grad_norm": 0.06438957899808884, "learning_rate": 0.01, "loss": 1.9844, "step": 48402 }, { "epoch": 4.965633976200246, "grad_norm": 0.06306184828281403, "learning_rate": 0.01, "loss": 2.0163, "step": 48405 }, { "epoch": 4.965941731637259, "grad_norm": 0.0924021527171135, "learning_rate": 0.01, "loss": 1.9897, "step": 48408 }, { "epoch": 4.966249487074272, "grad_norm": 0.06868547946214676, "learning_rate": 0.01, "loss": 2.0068, "step": 48411 }, { "epoch": 4.966557242511285, "grad_norm": 0.04776471480727196, "learning_rate": 0.01, "loss": 1.9846, "step": 48414 }, { "epoch": 4.9668649979482975, "grad_norm": 0.0417228639125824, "learning_rate": 0.01, "loss": 1.9844, "step": 48417 }, { "epoch": 4.967172753385309, "grad_norm": 0.03331043943762779, "learning_rate": 0.01, "loss": 1.9844, "step": 48420 }, { "epoch": 4.967480508822322, "grad_norm": 0.07828322052955627, "learning_rate": 0.01, "loss": 2.0043, "step": 48423 }, { "epoch": 4.967788264259335, "grad_norm": 0.11830344796180725, "learning_rate": 0.01, "loss": 1.9765, "step": 48426 }, { "epoch": 4.968096019696348, "grad_norm": 0.13753825426101685, "learning_rate": 0.01, "loss": 1.9831, "step": 48429 }, { "epoch": 4.9684037751333605, "grad_norm": 0.10867384821176529, "learning_rate": 0.01, "loss": 1.9948, "step": 48432 }, { "epoch": 4.968711530570373, "grad_norm": 0.14065410196781158, "learning_rate": 0.01, "loss": 1.9892, "step": 48435 }, { "epoch": 4.969019286007386, "grad_norm": 0.1064314916729927, "learning_rate": 0.01, "loss": 1.963, "step": 48438 }, { "epoch": 4.969327041444399, "grad_norm": 0.08995570242404938, "learning_rate": 0.01, "loss": 1.9843, "step": 48441 }, { "epoch": 4.969634796881412, "grad_norm": 0.05942520126700401, "learning_rate": 0.01, "loss": 1.961, "step": 48444 }, { "epoch": 4.969942552318424, "grad_norm": 0.03557314723730087, "learning_rate": 0.01, "loss": 1.9917, "step": 48447 }, { "epoch": 4.970250307755437, "grad_norm": 0.058727700263261795, "learning_rate": 0.01, "loss": 1.9581, "step": 48450 }, { "epoch": 4.97055806319245, "grad_norm": 0.07610124349594116, "learning_rate": 0.01, "loss": 2.0181, "step": 48453 }, { "epoch": 4.970865818629463, "grad_norm": 0.062314316630363464, "learning_rate": 0.01, "loss": 2.0007, "step": 48456 }, { "epoch": 4.9711735740664755, "grad_norm": 0.060933034867048264, "learning_rate": 0.01, "loss": 1.959, "step": 48459 }, { "epoch": 4.971481329503488, "grad_norm": 0.11259069293737411, "learning_rate": 0.01, "loss": 1.9831, "step": 48462 }, { "epoch": 4.9717890849405, "grad_norm": 0.039150118827819824, "learning_rate": 0.01, "loss": 1.9762, "step": 48465 }, { "epoch": 4.972096840377513, "grad_norm": 0.06994688510894775, "learning_rate": 0.01, "loss": 2.0186, "step": 48468 }, { "epoch": 4.972404595814526, "grad_norm": 0.08845566213130951, "learning_rate": 0.01, "loss": 1.9827, "step": 48471 }, { "epoch": 4.9727123512515385, "grad_norm": 0.0835181474685669, "learning_rate": 0.01, "loss": 1.9734, "step": 48474 }, { "epoch": 4.973020106688551, "grad_norm": 0.059876278042793274, "learning_rate": 0.01, "loss": 1.9928, "step": 48477 }, { "epoch": 4.973327862125564, "grad_norm": 0.08387959748506546, "learning_rate": 0.01, "loss": 1.9581, "step": 48480 }, { "epoch": 4.973635617562577, "grad_norm": 0.04472680762410164, "learning_rate": 0.01, "loss": 2.0113, "step": 48483 }, { "epoch": 4.97394337299959, "grad_norm": 0.04980779439210892, "learning_rate": 0.01, "loss": 2.0031, "step": 48486 }, { "epoch": 4.974251128436602, "grad_norm": 0.05865331366658211, "learning_rate": 0.01, "loss": 1.9721, "step": 48489 }, { "epoch": 4.974558883873615, "grad_norm": 0.07731412351131439, "learning_rate": 0.01, "loss": 1.9838, "step": 48492 }, { "epoch": 4.974866639310628, "grad_norm": 0.0759957805275917, "learning_rate": 0.01, "loss": 1.9765, "step": 48495 }, { "epoch": 4.975174394747641, "grad_norm": 0.06392304599285126, "learning_rate": 0.01, "loss": 2.0035, "step": 48498 }, { "epoch": 4.975482150184654, "grad_norm": 0.04608240723609924, "learning_rate": 0.01, "loss": 1.9812, "step": 48501 }, { "epoch": 4.975789905621666, "grad_norm": 0.04515860974788666, "learning_rate": 0.01, "loss": 1.965, "step": 48504 }, { "epoch": 4.976097661058679, "grad_norm": 0.049364540725946426, "learning_rate": 0.01, "loss": 2.0013, "step": 48507 }, { "epoch": 4.976405416495691, "grad_norm": 0.05034893751144409, "learning_rate": 0.01, "loss": 2.0042, "step": 48510 }, { "epoch": 4.976713171932704, "grad_norm": 0.05965251848101616, "learning_rate": 0.01, "loss": 1.9829, "step": 48513 }, { "epoch": 4.977020927369717, "grad_norm": 0.03976500406861305, "learning_rate": 0.01, "loss": 1.9755, "step": 48516 }, { "epoch": 4.977328682806729, "grad_norm": 0.0873221829533577, "learning_rate": 0.01, "loss": 1.9835, "step": 48519 }, { "epoch": 4.977636438243742, "grad_norm": 0.10230670869350433, "learning_rate": 0.01, "loss": 1.991, "step": 48522 }, { "epoch": 4.977944193680755, "grad_norm": 0.1398303061723709, "learning_rate": 0.01, "loss": 2.0051, "step": 48525 }, { "epoch": 4.978251949117768, "grad_norm": 0.047815751284360886, "learning_rate": 0.01, "loss": 1.9831, "step": 48528 }, { "epoch": 4.9785597045547805, "grad_norm": 0.05280955508351326, "learning_rate": 0.01, "loss": 2.0061, "step": 48531 }, { "epoch": 4.978867459991793, "grad_norm": 0.05164310708642006, "learning_rate": 0.01, "loss": 1.9943, "step": 48534 }, { "epoch": 4.979175215428806, "grad_norm": 0.04212404415011406, "learning_rate": 0.01, "loss": 1.962, "step": 48537 }, { "epoch": 4.979482970865819, "grad_norm": 0.03191149979829788, "learning_rate": 0.01, "loss": 1.9907, "step": 48540 }, { "epoch": 4.979790726302832, "grad_norm": 0.09330854564905167, "learning_rate": 0.01, "loss": 1.9564, "step": 48543 }, { "epoch": 4.980098481739844, "grad_norm": 0.03602724149823189, "learning_rate": 0.01, "loss": 2.0041, "step": 48546 }, { "epoch": 4.980406237176857, "grad_norm": 0.1307518631219864, "learning_rate": 0.01, "loss": 1.987, "step": 48549 }, { "epoch": 4.98071399261387, "grad_norm": 0.17725148797035217, "learning_rate": 0.01, "loss": 1.9892, "step": 48552 }, { "epoch": 4.981021748050882, "grad_norm": 0.13484884798526764, "learning_rate": 0.01, "loss": 1.9553, "step": 48555 }, { "epoch": 4.981329503487895, "grad_norm": 0.15105341374874115, "learning_rate": 0.01, "loss": 1.9821, "step": 48558 }, { "epoch": 4.981637258924907, "grad_norm": 0.07108210027217865, "learning_rate": 0.01, "loss": 1.9704, "step": 48561 }, { "epoch": 4.98194501436192, "grad_norm": 0.04044497013092041, "learning_rate": 0.01, "loss": 2.0126, "step": 48564 }, { "epoch": 4.982252769798933, "grad_norm": 0.03189781680703163, "learning_rate": 0.01, "loss": 1.9908, "step": 48567 }, { "epoch": 4.982560525235946, "grad_norm": 0.12914758920669556, "learning_rate": 0.01, "loss": 2.0168, "step": 48570 }, { "epoch": 4.9828682806729585, "grad_norm": 0.09454645961523056, "learning_rate": 0.01, "loss": 1.966, "step": 48573 }, { "epoch": 4.983176036109971, "grad_norm": 0.09286819398403168, "learning_rate": 0.01, "loss": 2.0191, "step": 48576 }, { "epoch": 4.983483791546984, "grad_norm": 0.05915534123778343, "learning_rate": 0.01, "loss": 1.9644, "step": 48579 }, { "epoch": 4.983791546983997, "grad_norm": 0.09016136080026627, "learning_rate": 0.01, "loss": 1.9874, "step": 48582 }, { "epoch": 4.98409930242101, "grad_norm": 0.06749111413955688, "learning_rate": 0.01, "loss": 1.9842, "step": 48585 }, { "epoch": 4.984407057858022, "grad_norm": 0.05699833482503891, "learning_rate": 0.01, "loss": 1.9644, "step": 48588 }, { "epoch": 4.984714813295035, "grad_norm": 0.06643623113632202, "learning_rate": 0.01, "loss": 1.9956, "step": 48591 }, { "epoch": 4.985022568732048, "grad_norm": 0.05193551629781723, "learning_rate": 0.01, "loss": 1.9854, "step": 48594 }, { "epoch": 4.985330324169061, "grad_norm": 0.03996053710579872, "learning_rate": 0.01, "loss": 2.013, "step": 48597 }, { "epoch": 4.985638079606073, "grad_norm": 0.09908989816904068, "learning_rate": 0.01, "loss": 1.9584, "step": 48600 }, { "epoch": 4.985945835043085, "grad_norm": 0.09618446230888367, "learning_rate": 0.01, "loss": 1.9927, "step": 48603 }, { "epoch": 4.986253590480098, "grad_norm": 0.04452410712838173, "learning_rate": 0.01, "loss": 1.9993, "step": 48606 }, { "epoch": 4.986561345917111, "grad_norm": 0.08481396734714508, "learning_rate": 0.01, "loss": 1.9919, "step": 48609 }, { "epoch": 4.986869101354124, "grad_norm": 0.07361941784620285, "learning_rate": 0.01, "loss": 2.0121, "step": 48612 }, { "epoch": 4.9871768567911365, "grad_norm": 0.04092632979154587, "learning_rate": 0.01, "loss": 1.9846, "step": 48615 }, { "epoch": 4.987484612228149, "grad_norm": 0.05964810773730278, "learning_rate": 0.01, "loss": 1.9779, "step": 48618 }, { "epoch": 4.987792367665162, "grad_norm": 0.04937126114964485, "learning_rate": 0.01, "loss": 1.9797, "step": 48621 }, { "epoch": 4.988100123102175, "grad_norm": 0.061311025172472, "learning_rate": 0.01, "loss": 1.9829, "step": 48624 }, { "epoch": 4.988407878539188, "grad_norm": 0.11813945323228836, "learning_rate": 0.01, "loss": 1.9857, "step": 48627 }, { "epoch": 4.9887156339762, "grad_norm": 0.13528530299663544, "learning_rate": 0.01, "loss": 1.9662, "step": 48630 }, { "epoch": 4.989023389413213, "grad_norm": 0.13620121777057648, "learning_rate": 0.01, "loss": 1.9745, "step": 48633 }, { "epoch": 4.989331144850226, "grad_norm": 0.050104204565286636, "learning_rate": 0.01, "loss": 1.9976, "step": 48636 }, { "epoch": 4.989638900287239, "grad_norm": 0.04701809585094452, "learning_rate": 0.01, "loss": 2.0059, "step": 48639 }, { "epoch": 4.989946655724252, "grad_norm": 0.05600928142666817, "learning_rate": 0.01, "loss": 1.9777, "step": 48642 }, { "epoch": 4.9902544111612634, "grad_norm": 0.03713737428188324, "learning_rate": 0.01, "loss": 1.9688, "step": 48645 }, { "epoch": 4.990562166598276, "grad_norm": 0.10586056858301163, "learning_rate": 0.01, "loss": 2.023, "step": 48648 }, { "epoch": 4.990869922035289, "grad_norm": 0.07154802978038788, "learning_rate": 0.01, "loss": 1.9858, "step": 48651 }, { "epoch": 4.991177677472302, "grad_norm": 0.046645209193229675, "learning_rate": 0.01, "loss": 2.0023, "step": 48654 }, { "epoch": 4.991485432909315, "grad_norm": 0.10580138862133026, "learning_rate": 0.01, "loss": 1.9892, "step": 48657 }, { "epoch": 4.991793188346327, "grad_norm": 0.05583944171667099, "learning_rate": 0.01, "loss": 1.9666, "step": 48660 }, { "epoch": 4.99210094378334, "grad_norm": 0.04817153140902519, "learning_rate": 0.01, "loss": 2.0042, "step": 48663 }, { "epoch": 4.992408699220353, "grad_norm": 0.1129242405295372, "learning_rate": 0.01, "loss": 1.9761, "step": 48666 }, { "epoch": 4.992716454657366, "grad_norm": 0.09947514533996582, "learning_rate": 0.01, "loss": 1.9986, "step": 48669 }, { "epoch": 4.9930242100943785, "grad_norm": 0.07797490060329437, "learning_rate": 0.01, "loss": 1.9965, "step": 48672 }, { "epoch": 4.993331965531391, "grad_norm": 0.04402044415473938, "learning_rate": 0.01, "loss": 1.9888, "step": 48675 }, { "epoch": 4.993639720968404, "grad_norm": 0.05189018324017525, "learning_rate": 0.01, "loss": 1.9704, "step": 48678 }, { "epoch": 4.993947476405417, "grad_norm": 0.04376624524593353, "learning_rate": 0.01, "loss": 1.9883, "step": 48681 }, { "epoch": 4.99425523184243, "grad_norm": 0.06388211250305176, "learning_rate": 0.01, "loss": 1.9648, "step": 48684 }, { "epoch": 4.994562987279442, "grad_norm": 0.05990754812955856, "learning_rate": 0.01, "loss": 1.9796, "step": 48687 }, { "epoch": 4.994870742716454, "grad_norm": 0.06319117546081543, "learning_rate": 0.01, "loss": 1.9895, "step": 48690 }, { "epoch": 4.995178498153467, "grad_norm": 0.06176729500293732, "learning_rate": 0.01, "loss": 2.0171, "step": 48693 }, { "epoch": 4.99548625359048, "grad_norm": 0.06712348759174347, "learning_rate": 0.01, "loss": 1.9993, "step": 48696 }, { "epoch": 4.995794009027493, "grad_norm": 0.046728748828172684, "learning_rate": 0.01, "loss": 2.0015, "step": 48699 }, { "epoch": 4.996101764464505, "grad_norm": 0.04008499160408974, "learning_rate": 0.01, "loss": 2.0052, "step": 48702 }, { "epoch": 4.996409519901518, "grad_norm": 0.04931486025452614, "learning_rate": 0.01, "loss": 1.9815, "step": 48705 }, { "epoch": 4.996717275338531, "grad_norm": 0.09341800212860107, "learning_rate": 0.01, "loss": 1.9616, "step": 48708 }, { "epoch": 4.997025030775544, "grad_norm": 0.17475301027297974, "learning_rate": 0.01, "loss": 2.0156, "step": 48711 }, { "epoch": 4.9973327862125565, "grad_norm": 0.10119811445474625, "learning_rate": 0.01, "loss": 1.9786, "step": 48714 }, { "epoch": 4.997640541649569, "grad_norm": 0.04519075155258179, "learning_rate": 0.01, "loss": 2.0038, "step": 48717 }, { "epoch": 4.997948297086582, "grad_norm": 0.04394150897860527, "learning_rate": 0.01, "loss": 1.9691, "step": 48720 }, { "epoch": 4.998256052523595, "grad_norm": 0.03471103683114052, "learning_rate": 0.01, "loss": 1.9795, "step": 48723 }, { "epoch": 4.998563807960608, "grad_norm": 0.06422135978937149, "learning_rate": 0.01, "loss": 1.9652, "step": 48726 }, { "epoch": 4.99887156339762, "grad_norm": 0.10134468972682953, "learning_rate": 0.01, "loss": 2.0216, "step": 48729 }, { "epoch": 4.999179318834633, "grad_norm": 0.07451833784580231, "learning_rate": 0.01, "loss": 1.9897, "step": 48732 }, { "epoch": 4.999487074271645, "grad_norm": 0.12009834498167038, "learning_rate": 0.01, "loss": 1.9786, "step": 48735 }, { "epoch": 4.999794829708658, "grad_norm": 0.18911142647266388, "learning_rate": 0.01, "loss": 1.9925, "step": 48738 }, { "epoch": 5.005751258087707, "grad_norm": 0.11327698826789856, "learning_rate": 0.01, "loss": 2.0213, "step": 48741 }, { "epoch": 5.006059361199548, "grad_norm": 0.05276428908109665, "learning_rate": 0.01, "loss": 2.0193, "step": 48744 }, { "epoch": 5.006367464311389, "grad_norm": 0.04349682852625847, "learning_rate": 0.01, "loss": 2.0173, "step": 48747 }, { "epoch": 5.006675567423231, "grad_norm": 0.043818019330501556, "learning_rate": 0.01, "loss": 2.0334, "step": 48750 }, { "epoch": 5.006983670535073, "grad_norm": 0.04884595423936844, "learning_rate": 0.01, "loss": 2.0128, "step": 48753 }, { "epoch": 5.007291773646914, "grad_norm": 0.04434294253587723, "learning_rate": 0.01, "loss": 1.9983, "step": 48756 }, { "epoch": 5.007599876758755, "grad_norm": 0.07619164884090424, "learning_rate": 0.01, "loss": 1.9972, "step": 48759 }, { "epoch": 5.0079079798705965, "grad_norm": 0.04731893911957741, "learning_rate": 0.01, "loss": 2.006, "step": 48762 }, { "epoch": 5.008216082982438, "grad_norm": 0.03700774163007736, "learning_rate": 0.01, "loss": 2.0118, "step": 48765 }, { "epoch": 5.00852418609428, "grad_norm": 0.03112631104886532, "learning_rate": 0.01, "loss": 2.0249, "step": 48768 }, { "epoch": 5.008832289206121, "grad_norm": 0.12492241710424423, "learning_rate": 0.01, "loss": 2.0255, "step": 48771 }, { "epoch": 5.0091403923179625, "grad_norm": 0.03778549283742905, "learning_rate": 0.01, "loss": 1.968, "step": 48774 }, { "epoch": 5.009448495429804, "grad_norm": 0.06593509018421173, "learning_rate": 0.01, "loss": 2.0112, "step": 48777 }, { "epoch": 5.009756598541645, "grad_norm": 0.04298333451151848, "learning_rate": 0.01, "loss": 2.03, "step": 48780 }, { "epoch": 5.010064701653486, "grad_norm": 0.04566636681556702, "learning_rate": 0.01, "loss": 2.0217, "step": 48783 }, { "epoch": 5.010372804765328, "grad_norm": 0.045355070382356644, "learning_rate": 0.01, "loss": 2.0253, "step": 48786 }, { "epoch": 5.01068090787717, "grad_norm": 0.10494118183851242, "learning_rate": 0.01, "loss": 1.9849, "step": 48789 }, { "epoch": 5.010989010989011, "grad_norm": 0.07352183014154434, "learning_rate": 0.01, "loss": 2.0157, "step": 48792 }, { "epoch": 5.011297114100852, "grad_norm": 0.07270955294370651, "learning_rate": 0.01, "loss": 2.0133, "step": 48795 }, { "epoch": 5.0116052172126935, "grad_norm": 0.04416406527161598, "learning_rate": 0.01, "loss": 2.0346, "step": 48798 }, { "epoch": 5.011913320324536, "grad_norm": 0.08084416389465332, "learning_rate": 0.01, "loss": 2.0187, "step": 48801 }, { "epoch": 5.012221423436377, "grad_norm": 0.04440615326166153, "learning_rate": 0.01, "loss": 2.0034, "step": 48804 }, { "epoch": 5.012529526548218, "grad_norm": 0.0649610310792923, "learning_rate": 0.01, "loss": 2.0398, "step": 48807 }, { "epoch": 5.012837629660059, "grad_norm": 0.08533710986375809, "learning_rate": 0.01, "loss": 1.9951, "step": 48810 }, { "epoch": 5.013145732771901, "grad_norm": 0.06310707330703735, "learning_rate": 0.01, "loss": 2.0566, "step": 48813 }, { "epoch": 5.013453835883743, "grad_norm": 0.07096098363399506, "learning_rate": 0.01, "loss": 2.0022, "step": 48816 }, { "epoch": 5.013761938995584, "grad_norm": 0.08358955383300781, "learning_rate": 0.01, "loss": 2.011, "step": 48819 }, { "epoch": 5.014070042107425, "grad_norm": 0.07774472236633301, "learning_rate": 0.01, "loss": 2.0217, "step": 48822 }, { "epoch": 5.014378145219267, "grad_norm": 0.090924933552742, "learning_rate": 0.01, "loss": 2.0236, "step": 48825 }, { "epoch": 5.014686248331108, "grad_norm": 0.08830771595239639, "learning_rate": 0.01, "loss": 2.0131, "step": 48828 }, { "epoch": 5.014994351442949, "grad_norm": 0.09647586941719055, "learning_rate": 0.01, "loss": 2.0095, "step": 48831 }, { "epoch": 5.015302454554791, "grad_norm": 0.09413495659828186, "learning_rate": 0.01, "loss": 1.9973, "step": 48834 }, { "epoch": 5.0156105576666326, "grad_norm": 0.046874430030584335, "learning_rate": 0.01, "loss": 1.998, "step": 48837 }, { "epoch": 5.015918660778474, "grad_norm": 0.050309523940086365, "learning_rate": 0.01, "loss": 1.998, "step": 48840 }, { "epoch": 5.016226763890315, "grad_norm": 0.045271504670381546, "learning_rate": 0.01, "loss": 1.9903, "step": 48843 }, { "epoch": 5.016534867002156, "grad_norm": 0.03297847509384155, "learning_rate": 0.01, "loss": 2.0161, "step": 48846 }, { "epoch": 5.0168429701139985, "grad_norm": 0.08260242640972137, "learning_rate": 0.01, "loss": 2.02, "step": 48849 }, { "epoch": 5.01715107322584, "grad_norm": 0.04784693941473961, "learning_rate": 0.01, "loss": 1.9874, "step": 48852 }, { "epoch": 5.017459176337681, "grad_norm": 0.07972519099712372, "learning_rate": 0.01, "loss": 1.9839, "step": 48855 }, { "epoch": 5.017767279449522, "grad_norm": 0.09882769733667374, "learning_rate": 0.01, "loss": 2.0151, "step": 48858 }, { "epoch": 5.0180753825613635, "grad_norm": 0.059475596994161606, "learning_rate": 0.01, "loss": 1.9868, "step": 48861 }, { "epoch": 5.018383485673206, "grad_norm": 0.03795355185866356, "learning_rate": 0.01, "loss": 2.0319, "step": 48864 }, { "epoch": 5.018691588785047, "grad_norm": 0.04603972285985947, "learning_rate": 0.01, "loss": 1.9968, "step": 48867 }, { "epoch": 5.018999691896888, "grad_norm": 0.0678548589348793, "learning_rate": 0.01, "loss": 1.9866, "step": 48870 }, { "epoch": 5.0193077950087295, "grad_norm": 0.0728834941983223, "learning_rate": 0.01, "loss": 2.0082, "step": 48873 }, { "epoch": 5.019615898120571, "grad_norm": 0.08255651593208313, "learning_rate": 0.01, "loss": 1.9846, "step": 48876 }, { "epoch": 5.019924001232413, "grad_norm": 0.08427122980356216, "learning_rate": 0.01, "loss": 2.0082, "step": 48879 }, { "epoch": 5.020232104344254, "grad_norm": 0.06279109418392181, "learning_rate": 0.01, "loss": 1.997, "step": 48882 }, { "epoch": 5.020540207456095, "grad_norm": 0.12473344802856445, "learning_rate": 0.01, "loss": 2.0206, "step": 48885 }, { "epoch": 5.020848310567937, "grad_norm": 0.05066891387104988, "learning_rate": 0.01, "loss": 2.024, "step": 48888 }, { "epoch": 5.021156413679778, "grad_norm": 0.038826942443847656, "learning_rate": 0.01, "loss": 1.9918, "step": 48891 }, { "epoch": 5.021464516791619, "grad_norm": 0.058017902076244354, "learning_rate": 0.01, "loss": 2.0084, "step": 48894 }, { "epoch": 5.021772619903461, "grad_norm": 0.09492091834545135, "learning_rate": 0.01, "loss": 1.994, "step": 48897 }, { "epoch": 5.022080723015303, "grad_norm": 0.037246495485305786, "learning_rate": 0.01, "loss": 1.9979, "step": 48900 }, { "epoch": 5.022388826127144, "grad_norm": 0.06708169728517532, "learning_rate": 0.01, "loss": 2.0032, "step": 48903 }, { "epoch": 5.022696929238985, "grad_norm": 0.07195274531841278, "learning_rate": 0.01, "loss": 2.0184, "step": 48906 }, { "epoch": 5.023005032350826, "grad_norm": 0.04685629904270172, "learning_rate": 0.01, "loss": 2.0146, "step": 48909 }, { "epoch": 5.0233131354626686, "grad_norm": 0.08163397759199142, "learning_rate": 0.01, "loss": 2.0, "step": 48912 }, { "epoch": 5.02362123857451, "grad_norm": 0.05403751879930496, "learning_rate": 0.01, "loss": 2.0059, "step": 48915 }, { "epoch": 5.023929341686351, "grad_norm": 0.038569968193769455, "learning_rate": 0.01, "loss": 2.0312, "step": 48918 }, { "epoch": 5.024237444798192, "grad_norm": 0.055535174906253815, "learning_rate": 0.01, "loss": 2.003, "step": 48921 }, { "epoch": 5.024545547910034, "grad_norm": 0.05628294125199318, "learning_rate": 0.01, "loss": 2.0147, "step": 48924 }, { "epoch": 5.024853651021876, "grad_norm": 0.06412038207054138, "learning_rate": 0.01, "loss": 1.995, "step": 48927 }, { "epoch": 5.025161754133717, "grad_norm": 0.09966779500246048, "learning_rate": 0.01, "loss": 2.0417, "step": 48930 }, { "epoch": 5.025469857245558, "grad_norm": 0.059476420283317566, "learning_rate": 0.01, "loss": 2.0212, "step": 48933 }, { "epoch": 5.0257779603573995, "grad_norm": 0.04914550483226776, "learning_rate": 0.01, "loss": 2.017, "step": 48936 }, { "epoch": 5.026086063469241, "grad_norm": 0.03358187898993492, "learning_rate": 0.01, "loss": 1.9981, "step": 48939 }, { "epoch": 5.026394166581082, "grad_norm": 0.0671076700091362, "learning_rate": 0.01, "loss": 1.9936, "step": 48942 }, { "epoch": 5.026702269692924, "grad_norm": 0.14911647140979767, "learning_rate": 0.01, "loss": 2.027, "step": 48945 }, { "epoch": 5.0270103728047655, "grad_norm": 0.05344945192337036, "learning_rate": 0.01, "loss": 1.9975, "step": 48948 }, { "epoch": 5.027318475916607, "grad_norm": 0.05113920569419861, "learning_rate": 0.01, "loss": 1.9966, "step": 48951 }, { "epoch": 5.027626579028448, "grad_norm": 0.052601687610149384, "learning_rate": 0.01, "loss": 1.9968, "step": 48954 }, { "epoch": 5.027934682140289, "grad_norm": 0.04715648666024208, "learning_rate": 0.01, "loss": 2.0033, "step": 48957 }, { "epoch": 5.028242785252131, "grad_norm": 0.0414542555809021, "learning_rate": 0.01, "loss": 2.0176, "step": 48960 }, { "epoch": 5.028550888363973, "grad_norm": 0.057792216539382935, "learning_rate": 0.01, "loss": 1.9973, "step": 48963 }, { "epoch": 5.028858991475814, "grad_norm": 0.12371774017810822, "learning_rate": 0.01, "loss": 2.0134, "step": 48966 }, { "epoch": 5.029167094587655, "grad_norm": 0.053585126996040344, "learning_rate": 0.01, "loss": 2.003, "step": 48969 }, { "epoch": 5.0294751976994965, "grad_norm": 0.051625724881887436, "learning_rate": 0.01, "loss": 1.9944, "step": 48972 }, { "epoch": 5.029783300811339, "grad_norm": 0.03650549426674843, "learning_rate": 0.01, "loss": 1.9873, "step": 48975 }, { "epoch": 5.03009140392318, "grad_norm": 0.039793796837329865, "learning_rate": 0.01, "loss": 2.0092, "step": 48978 }, { "epoch": 5.030399507035021, "grad_norm": 0.03223228082060814, "learning_rate": 0.01, "loss": 2.0254, "step": 48981 }, { "epoch": 5.030707610146862, "grad_norm": 0.06604496389627457, "learning_rate": 0.01, "loss": 1.9915, "step": 48984 }, { "epoch": 5.031015713258704, "grad_norm": 0.13230498135089874, "learning_rate": 0.01, "loss": 2.014, "step": 48987 }, { "epoch": 5.031323816370545, "grad_norm": 0.07130693644285202, "learning_rate": 0.01, "loss": 2.0058, "step": 48990 }, { "epoch": 5.031631919482387, "grad_norm": 0.13743533194065094, "learning_rate": 0.01, "loss": 1.9949, "step": 48993 }, { "epoch": 5.031940022594228, "grad_norm": 0.08371279388666153, "learning_rate": 0.01, "loss": 1.9991, "step": 48996 }, { "epoch": 5.03224812570607, "grad_norm": 0.06169065833091736, "learning_rate": 0.01, "loss": 2.011, "step": 48999 }, { "epoch": 5.032556228817911, "grad_norm": 0.05723833665251732, "learning_rate": 0.01, "loss": 1.994, "step": 49002 }, { "epoch": 5.032864331929752, "grad_norm": 0.043473754078149796, "learning_rate": 0.01, "loss": 2.0156, "step": 49005 }, { "epoch": 5.033172435041594, "grad_norm": 0.08324360102415085, "learning_rate": 0.01, "loss": 2.002, "step": 49008 }, { "epoch": 5.0334805381534355, "grad_norm": 0.04197626933455467, "learning_rate": 0.01, "loss": 2.0118, "step": 49011 }, { "epoch": 5.033788641265277, "grad_norm": 0.07882298529148102, "learning_rate": 0.01, "loss": 2.0226, "step": 49014 }, { "epoch": 5.034096744377118, "grad_norm": 0.08073476701974869, "learning_rate": 0.01, "loss": 2.0272, "step": 49017 }, { "epoch": 5.034404847488959, "grad_norm": 0.0678163543343544, "learning_rate": 0.01, "loss": 2.0203, "step": 49020 }, { "epoch": 5.0347129506008015, "grad_norm": 0.04524783417582512, "learning_rate": 0.01, "loss": 2.0038, "step": 49023 }, { "epoch": 5.035021053712643, "grad_norm": 0.06805024296045303, "learning_rate": 0.01, "loss": 2.0321, "step": 49026 }, { "epoch": 5.035329156824484, "grad_norm": 0.12917374074459076, "learning_rate": 0.01, "loss": 1.9719, "step": 49029 }, { "epoch": 5.035637259936325, "grad_norm": 0.04843614995479584, "learning_rate": 0.01, "loss": 2.0189, "step": 49032 }, { "epoch": 5.0359453630481665, "grad_norm": 0.09850560128688812, "learning_rate": 0.01, "loss": 2.0141, "step": 49035 }, { "epoch": 5.036253466160008, "grad_norm": 0.04039955511689186, "learning_rate": 0.01, "loss": 2.0142, "step": 49038 }, { "epoch": 5.03656156927185, "grad_norm": 0.06758705526590347, "learning_rate": 0.01, "loss": 2.0286, "step": 49041 }, { "epoch": 5.036869672383691, "grad_norm": 0.03571697697043419, "learning_rate": 0.01, "loss": 1.9944, "step": 49044 }, { "epoch": 5.0371777754955325, "grad_norm": 0.034528639167547226, "learning_rate": 0.01, "loss": 1.9608, "step": 49047 }, { "epoch": 5.037485878607374, "grad_norm": 0.05735669657588005, "learning_rate": 0.01, "loss": 1.9942, "step": 49050 }, { "epoch": 5.037793981719215, "grad_norm": 0.11180947721004486, "learning_rate": 0.01, "loss": 2.0172, "step": 49053 }, { "epoch": 5.038102084831057, "grad_norm": 0.08823433518409729, "learning_rate": 0.01, "loss": 2.0125, "step": 49056 }, { "epoch": 5.038410187942898, "grad_norm": 0.06959319114685059, "learning_rate": 0.01, "loss": 1.9918, "step": 49059 }, { "epoch": 5.03871829105474, "grad_norm": 0.03767579421401024, "learning_rate": 0.01, "loss": 2.0122, "step": 49062 }, { "epoch": 5.039026394166581, "grad_norm": 0.032121919095516205, "learning_rate": 0.01, "loss": 2.0137, "step": 49065 }, { "epoch": 5.039334497278422, "grad_norm": 0.09328833967447281, "learning_rate": 0.01, "loss": 2.0125, "step": 49068 }, { "epoch": 5.039642600390264, "grad_norm": 0.05128837004303932, "learning_rate": 0.01, "loss": 2.0073, "step": 49071 }, { "epoch": 5.039950703502106, "grad_norm": 0.04546624422073364, "learning_rate": 0.01, "loss": 2.0395, "step": 49074 }, { "epoch": 5.040258806613947, "grad_norm": 0.03980513662099838, "learning_rate": 0.01, "loss": 2.0147, "step": 49077 }, { "epoch": 5.040566909725788, "grad_norm": 0.04204264655709267, "learning_rate": 0.01, "loss": 2.0044, "step": 49080 }, { "epoch": 5.040875012837629, "grad_norm": 0.04164162278175354, "learning_rate": 0.01, "loss": 2.0249, "step": 49083 }, { "epoch": 5.041183115949471, "grad_norm": 0.0698886588215828, "learning_rate": 0.01, "loss": 1.9975, "step": 49086 }, { "epoch": 5.041491219061313, "grad_norm": 0.11362603306770325, "learning_rate": 0.01, "loss": 1.9944, "step": 49089 }, { "epoch": 5.041799322173154, "grad_norm": 0.08820103108882904, "learning_rate": 0.01, "loss": 2.0056, "step": 49092 }, { "epoch": 5.042107425284995, "grad_norm": 0.06023460626602173, "learning_rate": 0.01, "loss": 2.0121, "step": 49095 }, { "epoch": 5.042415528396837, "grad_norm": 0.03523271903395653, "learning_rate": 0.01, "loss": 2.0173, "step": 49098 }, { "epoch": 5.042723631508678, "grad_norm": 0.062407344579696655, "learning_rate": 0.01, "loss": 1.9907, "step": 49101 }, { "epoch": 5.04303173462052, "grad_norm": 0.04747510328888893, "learning_rate": 0.01, "loss": 1.996, "step": 49104 }, { "epoch": 5.043339837732361, "grad_norm": 0.04824723303318024, "learning_rate": 0.01, "loss": 1.9955, "step": 49107 }, { "epoch": 5.0436479408442025, "grad_norm": 0.1672361046075821, "learning_rate": 0.01, "loss": 2.0082, "step": 49110 }, { "epoch": 5.043956043956044, "grad_norm": 0.09675043821334839, "learning_rate": 0.01, "loss": 2.0048, "step": 49113 }, { "epoch": 5.044264147067885, "grad_norm": 0.06804441660642624, "learning_rate": 0.01, "loss": 1.9977, "step": 49116 }, { "epoch": 5.044572250179727, "grad_norm": 0.05074039101600647, "learning_rate": 0.01, "loss": 2.0089, "step": 49119 }, { "epoch": 5.0448803532915685, "grad_norm": 0.049977827817201614, "learning_rate": 0.01, "loss": 2.0269, "step": 49122 }, { "epoch": 5.04518845640341, "grad_norm": 0.05549508333206177, "learning_rate": 0.01, "loss": 1.9883, "step": 49125 }, { "epoch": 5.045496559515251, "grad_norm": 0.03386814519762993, "learning_rate": 0.01, "loss": 2.0178, "step": 49128 }, { "epoch": 5.045804662627092, "grad_norm": 0.04851256310939789, "learning_rate": 0.01, "loss": 2.0093, "step": 49131 }, { "epoch": 5.0461127657389335, "grad_norm": 0.0848245769739151, "learning_rate": 0.01, "loss": 1.9886, "step": 49134 }, { "epoch": 5.046420868850776, "grad_norm": 0.03692524507641792, "learning_rate": 0.01, "loss": 1.9997, "step": 49137 }, { "epoch": 5.046728971962617, "grad_norm": 0.0406675823032856, "learning_rate": 0.01, "loss": 2.0188, "step": 49140 }, { "epoch": 5.047037075074458, "grad_norm": 0.07141982764005661, "learning_rate": 0.01, "loss": 2.0155, "step": 49143 }, { "epoch": 5.0473451781862995, "grad_norm": 0.0453253835439682, "learning_rate": 0.01, "loss": 1.9995, "step": 49146 }, { "epoch": 5.047653281298141, "grad_norm": 0.04967833310365677, "learning_rate": 0.01, "loss": 2.0021, "step": 49149 }, { "epoch": 5.047961384409983, "grad_norm": 0.09721168875694275, "learning_rate": 0.01, "loss": 1.9925, "step": 49152 }, { "epoch": 5.048269487521824, "grad_norm": 0.03806902468204498, "learning_rate": 0.01, "loss": 2.0228, "step": 49155 }, { "epoch": 5.048577590633665, "grad_norm": 0.048705510795116425, "learning_rate": 0.01, "loss": 2.0227, "step": 49158 }, { "epoch": 5.048885693745507, "grad_norm": 0.04676016420125961, "learning_rate": 0.01, "loss": 2.007, "step": 49161 }, { "epoch": 5.049193796857348, "grad_norm": 0.1444026082754135, "learning_rate": 0.01, "loss": 1.9997, "step": 49164 }, { "epoch": 5.04950189996919, "grad_norm": 0.07510826736688614, "learning_rate": 0.01, "loss": 2.0152, "step": 49167 }, { "epoch": 5.049810003081031, "grad_norm": 0.05828002095222473, "learning_rate": 0.01, "loss": 2.0027, "step": 49170 }, { "epoch": 5.050118106192873, "grad_norm": 0.04933730885386467, "learning_rate": 0.01, "loss": 1.9781, "step": 49173 }, { "epoch": 5.050426209304714, "grad_norm": 0.07392150163650513, "learning_rate": 0.01, "loss": 1.9918, "step": 49176 }, { "epoch": 5.050734312416555, "grad_norm": 0.03829526528716087, "learning_rate": 0.01, "loss": 2.0377, "step": 49179 }, { "epoch": 5.051042415528396, "grad_norm": 0.06644531339406967, "learning_rate": 0.01, "loss": 2.0086, "step": 49182 }, { "epoch": 5.0513505186402385, "grad_norm": 0.045286018401384354, "learning_rate": 0.01, "loss": 2.019, "step": 49185 }, { "epoch": 5.05165862175208, "grad_norm": 0.1067107766866684, "learning_rate": 0.01, "loss": 1.9967, "step": 49188 }, { "epoch": 5.051966724863921, "grad_norm": 0.09251461923122406, "learning_rate": 0.01, "loss": 1.9928, "step": 49191 }, { "epoch": 5.052274827975762, "grad_norm": 0.051101312041282654, "learning_rate": 0.01, "loss": 1.9809, "step": 49194 }, { "epoch": 5.052582931087604, "grad_norm": 0.060298655182123184, "learning_rate": 0.01, "loss": 2.0393, "step": 49197 }, { "epoch": 5.052891034199446, "grad_norm": 0.11112777143716812, "learning_rate": 0.01, "loss": 2.0006, "step": 49200 }, { "epoch": 5.053199137311287, "grad_norm": 0.05832698941230774, "learning_rate": 0.01, "loss": 2.0047, "step": 49203 }, { "epoch": 5.053507240423128, "grad_norm": 0.04670676961541176, "learning_rate": 0.01, "loss": 2.0106, "step": 49206 }, { "epoch": 5.0538153435349695, "grad_norm": 0.03849385678768158, "learning_rate": 0.01, "loss": 2.0102, "step": 49209 }, { "epoch": 5.054123446646811, "grad_norm": 0.043539416044950485, "learning_rate": 0.01, "loss": 1.9973, "step": 49212 }, { "epoch": 5.054431549758653, "grad_norm": 0.04201329126954079, "learning_rate": 0.01, "loss": 1.9961, "step": 49215 }, { "epoch": 5.054739652870494, "grad_norm": 0.046697720885276794, "learning_rate": 0.01, "loss": 2.0044, "step": 49218 }, { "epoch": 5.0550477559823355, "grad_norm": 0.13588139414787292, "learning_rate": 0.01, "loss": 2.0044, "step": 49221 }, { "epoch": 5.055355859094177, "grad_norm": 0.06000100448727608, "learning_rate": 0.01, "loss": 1.9979, "step": 49224 }, { "epoch": 5.055663962206018, "grad_norm": 0.05746195465326309, "learning_rate": 0.01, "loss": 2.017, "step": 49227 }, { "epoch": 5.05597206531786, "grad_norm": 0.05890364944934845, "learning_rate": 0.01, "loss": 2.0246, "step": 49230 }, { "epoch": 5.056280168429701, "grad_norm": 0.05617443472146988, "learning_rate": 0.01, "loss": 2.009, "step": 49233 }, { "epoch": 5.056588271541543, "grad_norm": 0.035529427230358124, "learning_rate": 0.01, "loss": 1.9948, "step": 49236 }, { "epoch": 5.056896374653384, "grad_norm": 0.08901556581258774, "learning_rate": 0.01, "loss": 2.0114, "step": 49239 }, { "epoch": 5.057204477765225, "grad_norm": 0.07534974068403244, "learning_rate": 0.01, "loss": 1.9976, "step": 49242 }, { "epoch": 5.0575125808770665, "grad_norm": 0.09517022967338562, "learning_rate": 0.01, "loss": 1.9911, "step": 49245 }, { "epoch": 5.057820683988909, "grad_norm": 0.0671476274728775, "learning_rate": 0.01, "loss": 2.0098, "step": 49248 }, { "epoch": 5.05812878710075, "grad_norm": 0.08913878351449966, "learning_rate": 0.01, "loss": 1.9914, "step": 49251 }, { "epoch": 5.058436890212591, "grad_norm": 0.06179509684443474, "learning_rate": 0.01, "loss": 2.015, "step": 49254 }, { "epoch": 5.058744993324432, "grad_norm": 0.055043041706085205, "learning_rate": 0.01, "loss": 1.9962, "step": 49257 }, { "epoch": 5.059053096436274, "grad_norm": 0.05528896301984787, "learning_rate": 0.01, "loss": 2.0115, "step": 49260 }, { "epoch": 5.059361199548116, "grad_norm": 0.1153421550989151, "learning_rate": 0.01, "loss": 2.0251, "step": 49263 }, { "epoch": 5.059669302659957, "grad_norm": 0.11637650430202484, "learning_rate": 0.01, "loss": 1.9815, "step": 49266 }, { "epoch": 5.059977405771798, "grad_norm": 0.04912794381380081, "learning_rate": 0.01, "loss": 1.9949, "step": 49269 }, { "epoch": 5.06028550888364, "grad_norm": 0.030755288898944855, "learning_rate": 0.01, "loss": 1.9968, "step": 49272 }, { "epoch": 5.060593611995481, "grad_norm": 0.03253510594367981, "learning_rate": 0.01, "loss": 1.999, "step": 49275 }, { "epoch": 5.060901715107323, "grad_norm": 0.0353284515440464, "learning_rate": 0.01, "loss": 2.008, "step": 49278 }, { "epoch": 5.061209818219164, "grad_norm": 0.04621535912156105, "learning_rate": 0.01, "loss": 2.0276, "step": 49281 }, { "epoch": 5.0615179213310055, "grad_norm": 0.08098949491977692, "learning_rate": 0.01, "loss": 1.98, "step": 49284 }, { "epoch": 5.061826024442847, "grad_norm": 0.07662132382392883, "learning_rate": 0.01, "loss": 2.0115, "step": 49287 }, { "epoch": 5.062134127554688, "grad_norm": 0.06088561937212944, "learning_rate": 0.01, "loss": 2.0079, "step": 49290 }, { "epoch": 5.062442230666529, "grad_norm": 0.09895024448633194, "learning_rate": 0.01, "loss": 1.9972, "step": 49293 }, { "epoch": 5.0627503337783715, "grad_norm": 0.05712589621543884, "learning_rate": 0.01, "loss": 1.9998, "step": 49296 }, { "epoch": 5.063058436890213, "grad_norm": 0.03906116634607315, "learning_rate": 0.01, "loss": 1.998, "step": 49299 }, { "epoch": 5.063366540002054, "grad_norm": 0.04972919449210167, "learning_rate": 0.01, "loss": 2.0043, "step": 49302 }, { "epoch": 5.063674643113895, "grad_norm": 0.048737164586782455, "learning_rate": 0.01, "loss": 1.9862, "step": 49305 }, { "epoch": 5.0639827462257365, "grad_norm": 0.06541066616773605, "learning_rate": 0.01, "loss": 2.0251, "step": 49308 }, { "epoch": 5.064290849337579, "grad_norm": 0.048164594918489456, "learning_rate": 0.01, "loss": 2.0352, "step": 49311 }, { "epoch": 5.06459895244942, "grad_norm": 0.09063941240310669, "learning_rate": 0.01, "loss": 2.0265, "step": 49314 }, { "epoch": 5.064907055561261, "grad_norm": 0.04573111608624458, "learning_rate": 0.01, "loss": 2.0126, "step": 49317 }, { "epoch": 5.0652151586731025, "grad_norm": 0.033861320465803146, "learning_rate": 0.01, "loss": 2.0023, "step": 49320 }, { "epoch": 5.065523261784944, "grad_norm": 0.03708826005458832, "learning_rate": 0.01, "loss": 2.0182, "step": 49323 }, { "epoch": 5.065831364896786, "grad_norm": 0.10216791927814484, "learning_rate": 0.01, "loss": 2.0111, "step": 49326 }, { "epoch": 5.066139468008627, "grad_norm": 0.06112508475780487, "learning_rate": 0.01, "loss": 1.9837, "step": 49329 }, { "epoch": 5.066447571120468, "grad_norm": 0.06171619892120361, "learning_rate": 0.01, "loss": 2.007, "step": 49332 }, { "epoch": 5.06675567423231, "grad_norm": 0.03746636584401131, "learning_rate": 0.01, "loss": 1.9994, "step": 49335 }, { "epoch": 5.067063777344151, "grad_norm": 0.06251370906829834, "learning_rate": 0.01, "loss": 1.9989, "step": 49338 }, { "epoch": 5.067371880455992, "grad_norm": 0.03557705506682396, "learning_rate": 0.01, "loss": 2.0141, "step": 49341 }, { "epoch": 5.067679983567834, "grad_norm": 0.036395229399204254, "learning_rate": 0.01, "loss": 2.0069, "step": 49344 }, { "epoch": 5.067988086679676, "grad_norm": 0.12709848582744598, "learning_rate": 0.01, "loss": 2.0077, "step": 49347 }, { "epoch": 5.068296189791517, "grad_norm": 0.05967814847826958, "learning_rate": 0.01, "loss": 2.0009, "step": 49350 }, { "epoch": 5.068604292903358, "grad_norm": 0.09794972836971283, "learning_rate": 0.01, "loss": 2.0088, "step": 49353 }, { "epoch": 5.068912396015199, "grad_norm": 0.06263583153486252, "learning_rate": 0.01, "loss": 2.0012, "step": 49356 }, { "epoch": 5.0692204991270415, "grad_norm": 0.08471337705850601, "learning_rate": 0.01, "loss": 1.9976, "step": 49359 }, { "epoch": 5.069528602238883, "grad_norm": 0.06739859282970428, "learning_rate": 0.01, "loss": 2.0002, "step": 49362 }, { "epoch": 5.069836705350724, "grad_norm": 0.09328118711709976, "learning_rate": 0.01, "loss": 2.0053, "step": 49365 }, { "epoch": 5.070144808462565, "grad_norm": 0.09012078493833542, "learning_rate": 0.01, "loss": 1.9857, "step": 49368 }, { "epoch": 5.070452911574407, "grad_norm": 0.03964925929903984, "learning_rate": 0.01, "loss": 2.0001, "step": 49371 }, { "epoch": 5.070761014686249, "grad_norm": 0.12138742208480835, "learning_rate": 0.01, "loss": 2.0361, "step": 49374 }, { "epoch": 5.07106911779809, "grad_norm": 0.04422127828001976, "learning_rate": 0.01, "loss": 2.0163, "step": 49377 }, { "epoch": 5.071377220909931, "grad_norm": 0.038298092782497406, "learning_rate": 0.01, "loss": 1.9843, "step": 49380 }, { "epoch": 5.0716853240217725, "grad_norm": 0.04425305128097534, "learning_rate": 0.01, "loss": 1.9841, "step": 49383 }, { "epoch": 5.071993427133614, "grad_norm": 0.08276382088661194, "learning_rate": 0.01, "loss": 1.9874, "step": 49386 }, { "epoch": 5.072301530245455, "grad_norm": 0.06378073245286942, "learning_rate": 0.01, "loss": 2.0165, "step": 49389 }, { "epoch": 5.072609633357297, "grad_norm": 0.12052054703235626, "learning_rate": 0.01, "loss": 1.9964, "step": 49392 }, { "epoch": 5.0729177364691385, "grad_norm": 0.04634268954396248, "learning_rate": 0.01, "loss": 2.0047, "step": 49395 }, { "epoch": 5.07322583958098, "grad_norm": 0.055755455046892166, "learning_rate": 0.01, "loss": 2.0214, "step": 49398 }, { "epoch": 5.073533942692821, "grad_norm": 0.04300512373447418, "learning_rate": 0.01, "loss": 2.0191, "step": 49401 }, { "epoch": 5.073842045804662, "grad_norm": 0.08284097909927368, "learning_rate": 0.01, "loss": 2.0158, "step": 49404 }, { "epoch": 5.074150148916504, "grad_norm": 0.046609655022621155, "learning_rate": 0.01, "loss": 2.0046, "step": 49407 }, { "epoch": 5.074458252028346, "grad_norm": 0.06387645751237869, "learning_rate": 0.01, "loss": 2.0109, "step": 49410 }, { "epoch": 5.074766355140187, "grad_norm": 0.0506816990673542, "learning_rate": 0.01, "loss": 1.9857, "step": 49413 }, { "epoch": 5.075074458252028, "grad_norm": 0.03865443170070648, "learning_rate": 0.01, "loss": 2.0096, "step": 49416 }, { "epoch": 5.0753825613638694, "grad_norm": 0.07702656835317612, "learning_rate": 0.01, "loss": 2.0087, "step": 49419 }, { "epoch": 5.075690664475712, "grad_norm": 0.12181714177131653, "learning_rate": 0.01, "loss": 2.0012, "step": 49422 }, { "epoch": 5.075998767587553, "grad_norm": 0.06351039558649063, "learning_rate": 0.01, "loss": 2.0137, "step": 49425 }, { "epoch": 5.076306870699394, "grad_norm": 0.09312722831964493, "learning_rate": 0.01, "loss": 1.9969, "step": 49428 }, { "epoch": 5.076614973811235, "grad_norm": 0.06879006326198578, "learning_rate": 0.01, "loss": 1.982, "step": 49431 }, { "epoch": 5.076923076923077, "grad_norm": 0.06043083220720291, "learning_rate": 0.01, "loss": 2.0201, "step": 49434 }, { "epoch": 5.077231180034918, "grad_norm": 0.049164239317178726, "learning_rate": 0.01, "loss": 2.0171, "step": 49437 }, { "epoch": 5.07753928314676, "grad_norm": 0.04470280185341835, "learning_rate": 0.01, "loss": 1.9967, "step": 49440 }, { "epoch": 5.077847386258601, "grad_norm": 0.04513731226325035, "learning_rate": 0.01, "loss": 2.0123, "step": 49443 }, { "epoch": 5.078155489370443, "grad_norm": 0.07849404215812683, "learning_rate": 0.01, "loss": 2.0244, "step": 49446 }, { "epoch": 5.078463592482284, "grad_norm": 0.04768620431423187, "learning_rate": 0.01, "loss": 2.0, "step": 49449 }, { "epoch": 5.078771695594125, "grad_norm": 0.09680715203285217, "learning_rate": 0.01, "loss": 2.0234, "step": 49452 }, { "epoch": 5.079079798705967, "grad_norm": 0.07119087129831314, "learning_rate": 0.01, "loss": 2.01, "step": 49455 }, { "epoch": 5.0793879018178085, "grad_norm": 0.11560992896556854, "learning_rate": 0.01, "loss": 1.9967, "step": 49458 }, { "epoch": 5.07969600492965, "grad_norm": 0.039443932473659515, "learning_rate": 0.01, "loss": 2.0117, "step": 49461 }, { "epoch": 5.080004108041491, "grad_norm": 0.040348734706640244, "learning_rate": 0.01, "loss": 1.9996, "step": 49464 }, { "epoch": 5.080312211153332, "grad_norm": 0.10496348142623901, "learning_rate": 0.01, "loss": 2.0161, "step": 49467 }, { "epoch": 5.0806203142651745, "grad_norm": 0.10493961721658707, "learning_rate": 0.01, "loss": 2.0033, "step": 49470 }, { "epoch": 5.080928417377016, "grad_norm": 0.08104293048381805, "learning_rate": 0.01, "loss": 2.008, "step": 49473 }, { "epoch": 5.081236520488857, "grad_norm": 0.07609682530164719, "learning_rate": 0.01, "loss": 2.0156, "step": 49476 }, { "epoch": 5.081544623600698, "grad_norm": 0.03504324331879616, "learning_rate": 0.01, "loss": 1.9998, "step": 49479 }, { "epoch": 5.0818527267125395, "grad_norm": 0.03438768908381462, "learning_rate": 0.01, "loss": 1.9962, "step": 49482 }, { "epoch": 5.082160829824382, "grad_norm": 0.0372585728764534, "learning_rate": 0.01, "loss": 2.0175, "step": 49485 }, { "epoch": 5.082468932936223, "grad_norm": 0.06997973471879959, "learning_rate": 0.01, "loss": 2.0001, "step": 49488 }, { "epoch": 5.082777036048064, "grad_norm": 0.07300589978694916, "learning_rate": 0.01, "loss": 1.9983, "step": 49491 }, { "epoch": 5.0830851391599055, "grad_norm": 0.053376756608486176, "learning_rate": 0.01, "loss": 2.0148, "step": 49494 }, { "epoch": 5.083393242271747, "grad_norm": 0.060256477445364, "learning_rate": 0.01, "loss": 2.0119, "step": 49497 }, { "epoch": 5.083701345383588, "grad_norm": 0.0293456818908453, "learning_rate": 0.01, "loss": 1.9951, "step": 49500 }, { "epoch": 5.08400944849543, "grad_norm": 0.08368721604347229, "learning_rate": 0.01, "loss": 2.0083, "step": 49503 }, { "epoch": 5.084317551607271, "grad_norm": 0.15514978766441345, "learning_rate": 0.01, "loss": 1.9993, "step": 49506 }, { "epoch": 5.084625654719113, "grad_norm": 0.07931728661060333, "learning_rate": 0.01, "loss": 2.0019, "step": 49509 }, { "epoch": 5.084933757830954, "grad_norm": 0.038475409150123596, "learning_rate": 0.01, "loss": 2.0137, "step": 49512 }, { "epoch": 5.085241860942795, "grad_norm": 0.03167513385415077, "learning_rate": 0.01, "loss": 2.0129, "step": 49515 }, { "epoch": 5.085549964054637, "grad_norm": 0.053547751158475876, "learning_rate": 0.01, "loss": 2.0138, "step": 49518 }, { "epoch": 5.085858067166479, "grad_norm": 0.06962644308805466, "learning_rate": 0.01, "loss": 2.0007, "step": 49521 }, { "epoch": 5.08616617027832, "grad_norm": 0.06724515557289124, "learning_rate": 0.01, "loss": 1.9911, "step": 49524 }, { "epoch": 5.086474273390161, "grad_norm": 0.06945919245481491, "learning_rate": 0.01, "loss": 2.0069, "step": 49527 }, { "epoch": 5.086782376502002, "grad_norm": 0.03792745992541313, "learning_rate": 0.01, "loss": 2.0023, "step": 49530 }, { "epoch": 5.0870904796138445, "grad_norm": 0.05056298151612282, "learning_rate": 0.01, "loss": 2.0414, "step": 49533 }, { "epoch": 5.087398582725686, "grad_norm": 0.07531926780939102, "learning_rate": 0.01, "loss": 1.9934, "step": 49536 }, { "epoch": 5.087706685837527, "grad_norm": 0.0834566205739975, "learning_rate": 0.01, "loss": 1.9984, "step": 49539 }, { "epoch": 5.088014788949368, "grad_norm": 0.038478609174489975, "learning_rate": 0.01, "loss": 2.0142, "step": 49542 }, { "epoch": 5.08832289206121, "grad_norm": 0.06002604216337204, "learning_rate": 0.01, "loss": 1.9947, "step": 49545 }, { "epoch": 5.088630995173051, "grad_norm": 0.046000853180885315, "learning_rate": 0.01, "loss": 2.0068, "step": 49548 }, { "epoch": 5.088939098284893, "grad_norm": 0.06248699501156807, "learning_rate": 0.01, "loss": 2.0116, "step": 49551 }, { "epoch": 5.089247201396734, "grad_norm": 0.10761575400829315, "learning_rate": 0.01, "loss": 1.983, "step": 49554 }, { "epoch": 5.0895553045085755, "grad_norm": 0.09329602122306824, "learning_rate": 0.01, "loss": 1.9953, "step": 49557 }, { "epoch": 5.089863407620417, "grad_norm": 0.08233852684497833, "learning_rate": 0.01, "loss": 1.993, "step": 49560 }, { "epoch": 5.090171510732258, "grad_norm": 0.06591898202896118, "learning_rate": 0.01, "loss": 1.9769, "step": 49563 }, { "epoch": 5.0904796138441, "grad_norm": 0.11147011071443558, "learning_rate": 0.01, "loss": 2.0415, "step": 49566 }, { "epoch": 5.0907877169559415, "grad_norm": 0.10501214861869812, "learning_rate": 0.01, "loss": 2.0139, "step": 49569 }, { "epoch": 5.091095820067783, "grad_norm": 0.04929099977016449, "learning_rate": 0.01, "loss": 2.0095, "step": 49572 }, { "epoch": 5.091403923179624, "grad_norm": 0.09390784054994583, "learning_rate": 0.01, "loss": 2.0121, "step": 49575 }, { "epoch": 5.091712026291465, "grad_norm": 0.08599641174077988, "learning_rate": 0.01, "loss": 2.0126, "step": 49578 }, { "epoch": 5.092020129403307, "grad_norm": 0.04285844415426254, "learning_rate": 0.01, "loss": 2.005, "step": 49581 }, { "epoch": 5.092328232515149, "grad_norm": 0.034273531287908554, "learning_rate": 0.01, "loss": 2.0119, "step": 49584 }, { "epoch": 5.09263633562699, "grad_norm": 0.04617328196763992, "learning_rate": 0.01, "loss": 1.986, "step": 49587 }, { "epoch": 5.092944438738831, "grad_norm": 0.08739766478538513, "learning_rate": 0.01, "loss": 1.9793, "step": 49590 }, { "epoch": 5.093252541850672, "grad_norm": 0.06263922899961472, "learning_rate": 0.01, "loss": 2.0396, "step": 49593 }, { "epoch": 5.093560644962514, "grad_norm": 0.06044527515769005, "learning_rate": 0.01, "loss": 2.0066, "step": 49596 }, { "epoch": 5.093868748074356, "grad_norm": 0.046116676181554794, "learning_rate": 0.01, "loss": 1.9828, "step": 49599 }, { "epoch": 5.094176851186197, "grad_norm": 0.1035202220082283, "learning_rate": 0.01, "loss": 2.0087, "step": 49602 }, { "epoch": 5.094484954298038, "grad_norm": 0.03816954419016838, "learning_rate": 0.01, "loss": 1.9871, "step": 49605 }, { "epoch": 5.09479305740988, "grad_norm": 0.04604346677660942, "learning_rate": 0.01, "loss": 1.997, "step": 49608 }, { "epoch": 5.095101160521721, "grad_norm": 0.05297078937292099, "learning_rate": 0.01, "loss": 1.9893, "step": 49611 }, { "epoch": 5.095409263633563, "grad_norm": 0.03836045414209366, "learning_rate": 0.01, "loss": 2.0122, "step": 49614 }, { "epoch": 5.095717366745404, "grad_norm": 0.047086507081985474, "learning_rate": 0.01, "loss": 2.0136, "step": 49617 }, { "epoch": 5.096025469857246, "grad_norm": 0.04657857492566109, "learning_rate": 0.01, "loss": 1.9912, "step": 49620 }, { "epoch": 5.096333572969087, "grad_norm": 0.08411470055580139, "learning_rate": 0.01, "loss": 1.9885, "step": 49623 }, { "epoch": 5.096641676080928, "grad_norm": 0.16819079220294952, "learning_rate": 0.01, "loss": 2.013, "step": 49626 }, { "epoch": 5.09694977919277, "grad_norm": 0.04950143024325371, "learning_rate": 0.01, "loss": 1.9834, "step": 49629 }, { "epoch": 5.0972578823046115, "grad_norm": 0.039262961596250534, "learning_rate": 0.01, "loss": 1.9927, "step": 49632 }, { "epoch": 5.097565985416453, "grad_norm": 0.03746853023767471, "learning_rate": 0.01, "loss": 2.0156, "step": 49635 }, { "epoch": 5.097874088528294, "grad_norm": 0.052741505205631256, "learning_rate": 0.01, "loss": 2.0088, "step": 49638 }, { "epoch": 5.098182191640135, "grad_norm": 0.1192871630191803, "learning_rate": 0.01, "loss": 2.0065, "step": 49641 }, { "epoch": 5.098490294751977, "grad_norm": 0.054899461567401886, "learning_rate": 0.01, "loss": 1.998, "step": 49644 }, { "epoch": 5.098798397863819, "grad_norm": 0.05850033089518547, "learning_rate": 0.01, "loss": 1.9867, "step": 49647 }, { "epoch": 5.09910650097566, "grad_norm": 0.08758574724197388, "learning_rate": 0.01, "loss": 2.0139, "step": 49650 }, { "epoch": 5.099414604087501, "grad_norm": 0.09481152147054672, "learning_rate": 0.01, "loss": 1.9929, "step": 49653 }, { "epoch": 5.0997227071993425, "grad_norm": 0.12264327704906464, "learning_rate": 0.01, "loss": 2.0019, "step": 49656 }, { "epoch": 5.100030810311184, "grad_norm": 0.06704699993133545, "learning_rate": 0.01, "loss": 2.0161, "step": 49659 }, { "epoch": 5.100338913423026, "grad_norm": 0.10752804577350616, "learning_rate": 0.01, "loss": 2.0098, "step": 49662 }, { "epoch": 5.100647016534867, "grad_norm": 0.09217878431081772, "learning_rate": 0.01, "loss": 2.019, "step": 49665 }, { "epoch": 5.1009551196467084, "grad_norm": 0.04479275271296501, "learning_rate": 0.01, "loss": 2.0088, "step": 49668 }, { "epoch": 5.10126322275855, "grad_norm": 0.06177330017089844, "learning_rate": 0.01, "loss": 2.0016, "step": 49671 }, { "epoch": 5.101571325870391, "grad_norm": 0.09911325573921204, "learning_rate": 0.01, "loss": 2.0058, "step": 49674 }, { "epoch": 5.101879428982233, "grad_norm": 0.06496170163154602, "learning_rate": 0.01, "loss": 1.9966, "step": 49677 }, { "epoch": 5.102187532094074, "grad_norm": 0.04296518489718437, "learning_rate": 0.01, "loss": 2.0, "step": 49680 }, { "epoch": 5.102495635205916, "grad_norm": 0.05492605268955231, "learning_rate": 0.01, "loss": 2.0067, "step": 49683 }, { "epoch": 5.102803738317757, "grad_norm": 0.033720582723617554, "learning_rate": 0.01, "loss": 1.9872, "step": 49686 }, { "epoch": 5.103111841429598, "grad_norm": 0.10172949731349945, "learning_rate": 0.01, "loss": 1.9937, "step": 49689 }, { "epoch": 5.103419944541439, "grad_norm": 0.05326984077692032, "learning_rate": 0.01, "loss": 1.9909, "step": 49692 }, { "epoch": 5.103728047653282, "grad_norm": 0.10597484558820724, "learning_rate": 0.01, "loss": 2.0102, "step": 49695 }, { "epoch": 5.104036150765123, "grad_norm": 0.03910377249121666, "learning_rate": 0.01, "loss": 2.0016, "step": 49698 }, { "epoch": 5.104344253876964, "grad_norm": 0.04232342913746834, "learning_rate": 0.01, "loss": 2.0125, "step": 49701 }, { "epoch": 5.104652356988805, "grad_norm": 0.11343611031770706, "learning_rate": 0.01, "loss": 2.0245, "step": 49704 }, { "epoch": 5.104960460100647, "grad_norm": 0.06593618541955948, "learning_rate": 0.01, "loss": 2.0134, "step": 49707 }, { "epoch": 5.105268563212489, "grad_norm": 0.07576625049114227, "learning_rate": 0.01, "loss": 2.0023, "step": 49710 }, { "epoch": 5.10557666632433, "grad_norm": 0.10935617238283157, "learning_rate": 0.01, "loss": 1.9982, "step": 49713 }, { "epoch": 5.105884769436171, "grad_norm": 0.04176800325512886, "learning_rate": 0.01, "loss": 2.0134, "step": 49716 }, { "epoch": 5.106192872548013, "grad_norm": 0.05533302202820778, "learning_rate": 0.01, "loss": 2.007, "step": 49719 }, { "epoch": 5.106500975659854, "grad_norm": 0.05446555092930794, "learning_rate": 0.01, "loss": 2.0219, "step": 49722 }, { "epoch": 5.106809078771696, "grad_norm": 0.05760158970952034, "learning_rate": 0.01, "loss": 2.0075, "step": 49725 }, { "epoch": 5.107117181883537, "grad_norm": 0.11345206201076508, "learning_rate": 0.01, "loss": 1.9991, "step": 49728 }, { "epoch": 5.1074252849953785, "grad_norm": 0.09869284927845001, "learning_rate": 0.01, "loss": 1.9922, "step": 49731 }, { "epoch": 5.10773338810722, "grad_norm": 0.050909820944070816, "learning_rate": 0.01, "loss": 1.9975, "step": 49734 }, { "epoch": 5.108041491219061, "grad_norm": 0.03889784961938858, "learning_rate": 0.01, "loss": 2.0031, "step": 49737 }, { "epoch": 5.108349594330903, "grad_norm": 0.04064812883734703, "learning_rate": 0.01, "loss": 2.0074, "step": 49740 }, { "epoch": 5.1086576974427444, "grad_norm": 0.13409452140331268, "learning_rate": 0.01, "loss": 2.0245, "step": 49743 }, { "epoch": 5.108965800554586, "grad_norm": 0.0955972746014595, "learning_rate": 0.01, "loss": 1.9714, "step": 49746 }, { "epoch": 5.109273903666427, "grad_norm": 0.11944996565580368, "learning_rate": 0.01, "loss": 1.9819, "step": 49749 }, { "epoch": 5.109582006778268, "grad_norm": 0.06599771976470947, "learning_rate": 0.01, "loss": 1.9904, "step": 49752 }, { "epoch": 5.1098901098901095, "grad_norm": 0.05203530192375183, "learning_rate": 0.01, "loss": 2.0158, "step": 49755 }, { "epoch": 5.110198213001952, "grad_norm": 0.052099455147981644, "learning_rate": 0.01, "loss": 1.9899, "step": 49758 }, { "epoch": 5.110506316113793, "grad_norm": 0.06005854532122612, "learning_rate": 0.01, "loss": 2.0099, "step": 49761 }, { "epoch": 5.110814419225634, "grad_norm": 0.0429551899433136, "learning_rate": 0.01, "loss": 1.9929, "step": 49764 }, { "epoch": 5.111122522337475, "grad_norm": 0.07173555344343185, "learning_rate": 0.01, "loss": 2.0108, "step": 49767 }, { "epoch": 5.111430625449317, "grad_norm": 0.05251854285597801, "learning_rate": 0.01, "loss": 2.0304, "step": 49770 }, { "epoch": 5.111738728561159, "grad_norm": 0.06687440723180771, "learning_rate": 0.01, "loss": 1.9996, "step": 49773 }, { "epoch": 5.112046831673, "grad_norm": 0.0841129869222641, "learning_rate": 0.01, "loss": 2.0104, "step": 49776 }, { "epoch": 5.112354934784841, "grad_norm": 0.086825892329216, "learning_rate": 0.01, "loss": 2.009, "step": 49779 }, { "epoch": 5.112663037896683, "grad_norm": 0.059246089309453964, "learning_rate": 0.01, "loss": 1.9956, "step": 49782 }, { "epoch": 5.112971141008524, "grad_norm": 0.061836544424295425, "learning_rate": 0.01, "loss": 2.0194, "step": 49785 }, { "epoch": 5.113279244120366, "grad_norm": 0.0878891870379448, "learning_rate": 0.01, "loss": 2.0169, "step": 49788 }, { "epoch": 5.113587347232207, "grad_norm": 0.061963435262441635, "learning_rate": 0.01, "loss": 1.9868, "step": 49791 }, { "epoch": 5.113895450344049, "grad_norm": 0.07002797722816467, "learning_rate": 0.01, "loss": 2.0097, "step": 49794 }, { "epoch": 5.11420355345589, "grad_norm": 0.07307753711938858, "learning_rate": 0.01, "loss": 2.0016, "step": 49797 }, { "epoch": 5.114511656567731, "grad_norm": 0.05582696199417114, "learning_rate": 0.01, "loss": 2.0227, "step": 49800 }, { "epoch": 5.114819759679572, "grad_norm": 0.11906569451093674, "learning_rate": 0.01, "loss": 2.0094, "step": 49803 }, { "epoch": 5.1151278627914145, "grad_norm": 0.041671089828014374, "learning_rate": 0.01, "loss": 1.9973, "step": 49806 }, { "epoch": 5.115435965903256, "grad_norm": 0.040786996483802795, "learning_rate": 0.01, "loss": 2.0033, "step": 49809 }, { "epoch": 5.115744069015097, "grad_norm": 0.06962720304727554, "learning_rate": 0.01, "loss": 2.0245, "step": 49812 }, { "epoch": 5.116052172126938, "grad_norm": 0.05079250782728195, "learning_rate": 0.01, "loss": 2.0132, "step": 49815 }, { "epoch": 5.11636027523878, "grad_norm": 0.03422404080629349, "learning_rate": 0.01, "loss": 1.9976, "step": 49818 }, { "epoch": 5.116668378350622, "grad_norm": 0.13034658133983612, "learning_rate": 0.01, "loss": 2.0303, "step": 49821 }, { "epoch": 5.116976481462463, "grad_norm": 0.05432206392288208, "learning_rate": 0.01, "loss": 1.9922, "step": 49824 }, { "epoch": 5.117284584574304, "grad_norm": 0.11153416335582733, "learning_rate": 0.01, "loss": 1.9886, "step": 49827 }, { "epoch": 5.1175926876861455, "grad_norm": 0.04324459657073021, "learning_rate": 0.01, "loss": 2.0056, "step": 49830 }, { "epoch": 5.117900790797987, "grad_norm": 0.035336676985025406, "learning_rate": 0.01, "loss": 2.0035, "step": 49833 }, { "epoch": 5.118208893909829, "grad_norm": 0.07241083681583405, "learning_rate": 0.01, "loss": 2.0045, "step": 49836 }, { "epoch": 5.11851699702167, "grad_norm": 0.05331496521830559, "learning_rate": 0.01, "loss": 1.9724, "step": 49839 }, { "epoch": 5.118825100133511, "grad_norm": 0.04856008291244507, "learning_rate": 0.01, "loss": 2.009, "step": 49842 }, { "epoch": 5.119133203245353, "grad_norm": 0.07900402694940567, "learning_rate": 0.01, "loss": 1.9957, "step": 49845 }, { "epoch": 5.119441306357194, "grad_norm": 0.04300111532211304, "learning_rate": 0.01, "loss": 2.013, "step": 49848 }, { "epoch": 5.119749409469035, "grad_norm": 0.09242533892393112, "learning_rate": 0.01, "loss": 1.996, "step": 49851 }, { "epoch": 5.120057512580877, "grad_norm": 0.047889694571495056, "learning_rate": 0.01, "loss": 1.9968, "step": 49854 }, { "epoch": 5.120365615692719, "grad_norm": 0.04471924528479576, "learning_rate": 0.01, "loss": 2.0219, "step": 49857 }, { "epoch": 5.12067371880456, "grad_norm": 0.11666672676801682, "learning_rate": 0.01, "loss": 1.9998, "step": 49860 }, { "epoch": 5.120981821916401, "grad_norm": 0.0470040999352932, "learning_rate": 0.01, "loss": 1.9728, "step": 49863 }, { "epoch": 5.121289925028242, "grad_norm": 0.10009215027093887, "learning_rate": 0.01, "loss": 2.0075, "step": 49866 }, { "epoch": 5.121598028140085, "grad_norm": 0.06280893087387085, "learning_rate": 0.01, "loss": 2.0109, "step": 49869 }, { "epoch": 5.121906131251926, "grad_norm": 0.046029094606637955, "learning_rate": 0.01, "loss": 1.9907, "step": 49872 }, { "epoch": 5.122214234363767, "grad_norm": 0.11030067503452301, "learning_rate": 0.01, "loss": 2.006, "step": 49875 }, { "epoch": 5.122522337475608, "grad_norm": 0.033092547208070755, "learning_rate": 0.01, "loss": 2.0064, "step": 49878 }, { "epoch": 5.12283044058745, "grad_norm": 0.051649268716573715, "learning_rate": 0.01, "loss": 1.9767, "step": 49881 }, { "epoch": 5.123138543699292, "grad_norm": 0.06344709545373917, "learning_rate": 0.01, "loss": 2.0163, "step": 49884 }, { "epoch": 5.123446646811133, "grad_norm": 0.08509237319231033, "learning_rate": 0.01, "loss": 2.0251, "step": 49887 }, { "epoch": 5.123754749922974, "grad_norm": 0.04539303854107857, "learning_rate": 0.01, "loss": 2.0089, "step": 49890 }, { "epoch": 5.124062853034816, "grad_norm": 0.03646918386220932, "learning_rate": 0.01, "loss": 1.9879, "step": 49893 }, { "epoch": 5.124370956146657, "grad_norm": 0.12570123374462128, "learning_rate": 0.01, "loss": 1.9997, "step": 49896 }, { "epoch": 5.124679059258498, "grad_norm": 0.12240875512361526, "learning_rate": 0.01, "loss": 1.9984, "step": 49899 }, { "epoch": 5.12498716237034, "grad_norm": 0.10540018230676651, "learning_rate": 0.01, "loss": 2.0088, "step": 49902 }, { "epoch": 5.1252952654821815, "grad_norm": 0.08514107763767242, "learning_rate": 0.01, "loss": 1.9776, "step": 49905 }, { "epoch": 5.125603368594023, "grad_norm": 0.04462766274809837, "learning_rate": 0.01, "loss": 2.0086, "step": 49908 }, { "epoch": 5.125911471705864, "grad_norm": 0.03537189960479736, "learning_rate": 0.01, "loss": 1.9994, "step": 49911 }, { "epoch": 5.126219574817705, "grad_norm": 0.053130924701690674, "learning_rate": 0.01, "loss": 1.9961, "step": 49914 }, { "epoch": 5.126527677929547, "grad_norm": 0.08888891339302063, "learning_rate": 0.01, "loss": 2.0169, "step": 49917 }, { "epoch": 5.126835781041389, "grad_norm": 0.051719069480895996, "learning_rate": 0.01, "loss": 2.0018, "step": 49920 }, { "epoch": 5.12714388415323, "grad_norm": 0.123109832406044, "learning_rate": 0.01, "loss": 1.9926, "step": 49923 }, { "epoch": 5.127451987265071, "grad_norm": 0.08993472158908844, "learning_rate": 0.01, "loss": 1.9913, "step": 49926 }, { "epoch": 5.1277600903769125, "grad_norm": 0.10910573601722717, "learning_rate": 0.01, "loss": 1.9856, "step": 49929 }, { "epoch": 5.128068193488755, "grad_norm": 0.13778774440288544, "learning_rate": 0.01, "loss": 1.9985, "step": 49932 }, { "epoch": 5.128376296600596, "grad_norm": 0.08411096781492233, "learning_rate": 0.01, "loss": 2.0119, "step": 49935 }, { "epoch": 5.128684399712437, "grad_norm": 0.05669613182544708, "learning_rate": 0.01, "loss": 2.0166, "step": 49938 }, { "epoch": 5.128992502824278, "grad_norm": 0.037695713341236115, "learning_rate": 0.01, "loss": 1.9769, "step": 49941 }, { "epoch": 5.12930060593612, "grad_norm": 0.07977671176195145, "learning_rate": 0.01, "loss": 2.0116, "step": 49944 }, { "epoch": 5.129608709047961, "grad_norm": 0.10012632608413696, "learning_rate": 0.01, "loss": 1.9685, "step": 49947 }, { "epoch": 5.129916812159803, "grad_norm": 0.06524799764156342, "learning_rate": 0.01, "loss": 1.9901, "step": 49950 }, { "epoch": 5.130224915271644, "grad_norm": 0.1130509302020073, "learning_rate": 0.01, "loss": 1.9893, "step": 49953 }, { "epoch": 5.130533018383486, "grad_norm": 0.06062379479408264, "learning_rate": 0.01, "loss": 2.0056, "step": 49956 }, { "epoch": 5.130841121495327, "grad_norm": 0.06229391694068909, "learning_rate": 0.01, "loss": 2.0003, "step": 49959 }, { "epoch": 5.131149224607168, "grad_norm": 0.04881103336811066, "learning_rate": 0.01, "loss": 2.0095, "step": 49962 }, { "epoch": 5.13145732771901, "grad_norm": 0.0779888853430748, "learning_rate": 0.01, "loss": 2.0085, "step": 49965 }, { "epoch": 5.131765430830852, "grad_norm": 0.03707825019955635, "learning_rate": 0.01, "loss": 2.016, "step": 49968 }, { "epoch": 5.132073533942693, "grad_norm": 0.12145381420850754, "learning_rate": 0.01, "loss": 2.007, "step": 49971 }, { "epoch": 5.132381637054534, "grad_norm": 0.08990494906902313, "learning_rate": 0.01, "loss": 1.9846, "step": 49974 }, { "epoch": 5.132689740166375, "grad_norm": 0.09638572484254837, "learning_rate": 0.01, "loss": 2.0001, "step": 49977 }, { "epoch": 5.1329978432782175, "grad_norm": 0.037770826369524, "learning_rate": 0.01, "loss": 2.0012, "step": 49980 }, { "epoch": 5.133305946390059, "grad_norm": 0.038396887481212616, "learning_rate": 0.01, "loss": 1.9952, "step": 49983 }, { "epoch": 5.1336140495019, "grad_norm": 0.052246298640966415, "learning_rate": 0.01, "loss": 1.9946, "step": 49986 }, { "epoch": 5.133922152613741, "grad_norm": 0.043142352253198624, "learning_rate": 0.01, "loss": 2.0161, "step": 49989 }, { "epoch": 5.134230255725583, "grad_norm": 0.04713413119316101, "learning_rate": 0.01, "loss": 1.972, "step": 49992 }, { "epoch": 5.134538358837425, "grad_norm": 0.06687591224908829, "learning_rate": 0.01, "loss": 2.0093, "step": 49995 }, { "epoch": 5.134846461949266, "grad_norm": 0.057987745851278305, "learning_rate": 0.01, "loss": 1.9958, "step": 49998 }, { "epoch": 5.135154565061107, "grad_norm": 0.048778288066387177, "learning_rate": 0.01, "loss": 1.9865, "step": 50001 }, { "epoch": 5.1354626681729485, "grad_norm": 0.04489746689796448, "learning_rate": 0.01, "loss": 2.0132, "step": 50004 }, { "epoch": 5.13577077128479, "grad_norm": 0.08324793726205826, "learning_rate": 0.01, "loss": 1.9947, "step": 50007 }, { "epoch": 5.136078874396631, "grad_norm": 0.04505028948187828, "learning_rate": 0.01, "loss": 2.0203, "step": 50010 }, { "epoch": 5.136386977508473, "grad_norm": 0.07827684283256531, "learning_rate": 0.01, "loss": 1.9831, "step": 50013 }, { "epoch": 5.136695080620314, "grad_norm": 0.07187260687351227, "learning_rate": 0.01, "loss": 2.0129, "step": 50016 }, { "epoch": 5.137003183732156, "grad_norm": 0.0374215804040432, "learning_rate": 0.01, "loss": 1.9748, "step": 50019 }, { "epoch": 5.137311286843997, "grad_norm": 0.06169632822275162, "learning_rate": 0.01, "loss": 1.9924, "step": 50022 }, { "epoch": 5.137619389955838, "grad_norm": 0.08089695870876312, "learning_rate": 0.01, "loss": 1.9784, "step": 50025 }, { "epoch": 5.13792749306768, "grad_norm": 0.06311798840761185, "learning_rate": 0.01, "loss": 2.0028, "step": 50028 }, { "epoch": 5.138235596179522, "grad_norm": 0.0869307816028595, "learning_rate": 0.01, "loss": 1.9934, "step": 50031 }, { "epoch": 5.138543699291363, "grad_norm": 0.07645547389984131, "learning_rate": 0.01, "loss": 1.9852, "step": 50034 }, { "epoch": 5.138851802403204, "grad_norm": 0.09578394144773483, "learning_rate": 0.01, "loss": 2.0077, "step": 50037 }, { "epoch": 5.139159905515045, "grad_norm": 0.09977228194475174, "learning_rate": 0.01, "loss": 1.9937, "step": 50040 }, { "epoch": 5.139468008626887, "grad_norm": 0.05820827558636665, "learning_rate": 0.01, "loss": 2.0138, "step": 50043 }, { "epoch": 5.139776111738729, "grad_norm": 0.03890974819660187, "learning_rate": 0.01, "loss": 2.0006, "step": 50046 }, { "epoch": 5.14008421485057, "grad_norm": 0.10406100749969482, "learning_rate": 0.01, "loss": 1.9962, "step": 50049 }, { "epoch": 5.140392317962411, "grad_norm": 0.057227544486522675, "learning_rate": 0.01, "loss": 2.0076, "step": 50052 }, { "epoch": 5.140700421074253, "grad_norm": 0.07142063230276108, "learning_rate": 0.01, "loss": 2.0143, "step": 50055 }, { "epoch": 5.141008524186094, "grad_norm": 0.047450605779886246, "learning_rate": 0.01, "loss": 2.0203, "step": 50058 }, { "epoch": 5.141316627297936, "grad_norm": 0.07935485243797302, "learning_rate": 0.01, "loss": 1.9998, "step": 50061 }, { "epoch": 5.141624730409777, "grad_norm": 0.14235766232013702, "learning_rate": 0.01, "loss": 2.0007, "step": 50064 }, { "epoch": 5.141932833521619, "grad_norm": 0.054345373064279556, "learning_rate": 0.01, "loss": 2.0062, "step": 50067 }, { "epoch": 5.14224093663346, "grad_norm": 0.03830769658088684, "learning_rate": 0.01, "loss": 2.0103, "step": 50070 }, { "epoch": 5.142549039745301, "grad_norm": 0.034983497112989426, "learning_rate": 0.01, "loss": 2.005, "step": 50073 }, { "epoch": 5.142857142857143, "grad_norm": 0.05580296739935875, "learning_rate": 0.01, "loss": 2.0175, "step": 50076 }, { "epoch": 5.1431652459689845, "grad_norm": 0.11556407809257507, "learning_rate": 0.01, "loss": 2.0065, "step": 50079 }, { "epoch": 5.143473349080826, "grad_norm": 0.05813178792595863, "learning_rate": 0.01, "loss": 2.0039, "step": 50082 }, { "epoch": 5.143781452192667, "grad_norm": 0.08391161262989044, "learning_rate": 0.01, "loss": 2.008, "step": 50085 }, { "epoch": 5.144089555304508, "grad_norm": 0.1391163021326065, "learning_rate": 0.01, "loss": 1.9844, "step": 50088 }, { "epoch": 5.14439765841635, "grad_norm": 0.038378529250621796, "learning_rate": 0.01, "loss": 2.0127, "step": 50091 }, { "epoch": 5.144705761528192, "grad_norm": 0.03499170020222664, "learning_rate": 0.01, "loss": 2.0067, "step": 50094 }, { "epoch": 5.145013864640033, "grad_norm": 0.038385018706321716, "learning_rate": 0.01, "loss": 2.0021, "step": 50097 }, { "epoch": 5.145321967751874, "grad_norm": 0.04664738476276398, "learning_rate": 0.01, "loss": 1.9992, "step": 50100 }, { "epoch": 5.1456300708637155, "grad_norm": 0.05988583341240883, "learning_rate": 0.01, "loss": 2.0116, "step": 50103 }, { "epoch": 5.145938173975557, "grad_norm": 0.05822984501719475, "learning_rate": 0.01, "loss": 1.9832, "step": 50106 }, { "epoch": 5.146246277087399, "grad_norm": 0.1176968440413475, "learning_rate": 0.01, "loss": 2.0017, "step": 50109 }, { "epoch": 5.14655438019924, "grad_norm": 0.04409262165427208, "learning_rate": 0.01, "loss": 1.9751, "step": 50112 }, { "epoch": 5.146862483311081, "grad_norm": 0.18404054641723633, "learning_rate": 0.01, "loss": 2.0388, "step": 50115 }, { "epoch": 5.147170586422923, "grad_norm": 0.07370147854089737, "learning_rate": 0.01, "loss": 2.0207, "step": 50118 }, { "epoch": 5.147478689534764, "grad_norm": 0.05898935720324516, "learning_rate": 0.01, "loss": 1.9861, "step": 50121 }, { "epoch": 5.147786792646606, "grad_norm": 0.04993924871087074, "learning_rate": 0.01, "loss": 1.9986, "step": 50124 }, { "epoch": 5.148094895758447, "grad_norm": 0.07191821932792664, "learning_rate": 0.01, "loss": 1.9945, "step": 50127 }, { "epoch": 5.148402998870289, "grad_norm": 0.06233205646276474, "learning_rate": 0.01, "loss": 1.9957, "step": 50130 }, { "epoch": 5.14871110198213, "grad_norm": 0.10599711537361145, "learning_rate": 0.01, "loss": 1.9935, "step": 50133 }, { "epoch": 5.149019205093971, "grad_norm": 0.05540407449007034, "learning_rate": 0.01, "loss": 2.0058, "step": 50136 }, { "epoch": 5.149327308205813, "grad_norm": 0.0512741319835186, "learning_rate": 0.01, "loss": 1.9917, "step": 50139 }, { "epoch": 5.149635411317655, "grad_norm": 0.060490820556879044, "learning_rate": 0.01, "loss": 1.9808, "step": 50142 }, { "epoch": 5.149943514429496, "grad_norm": 0.03850322216749191, "learning_rate": 0.01, "loss": 1.9957, "step": 50145 }, { "epoch": 5.150251617541337, "grad_norm": 0.12944872677326202, "learning_rate": 0.01, "loss": 2.0228, "step": 50148 }, { "epoch": 5.150559720653178, "grad_norm": 0.1282481700181961, "learning_rate": 0.01, "loss": 2.0104, "step": 50151 }, { "epoch": 5.15086782376502, "grad_norm": 0.09446661174297333, "learning_rate": 0.01, "loss": 2.011, "step": 50154 }, { "epoch": 5.151175926876862, "grad_norm": 0.04041890799999237, "learning_rate": 0.01, "loss": 2.0124, "step": 50157 }, { "epoch": 5.151484029988703, "grad_norm": 0.056447647511959076, "learning_rate": 0.01, "loss": 2.008, "step": 50160 }, { "epoch": 5.151792133100544, "grad_norm": 0.0449320413172245, "learning_rate": 0.01, "loss": 2.0123, "step": 50163 }, { "epoch": 5.1521002362123856, "grad_norm": 0.07914337515830994, "learning_rate": 0.01, "loss": 1.9795, "step": 50166 }, { "epoch": 5.152408339324227, "grad_norm": 0.06510215252637863, "learning_rate": 0.01, "loss": 2.0122, "step": 50169 }, { "epoch": 5.152716442436069, "grad_norm": 0.04413073882460594, "learning_rate": 0.01, "loss": 1.9958, "step": 50172 }, { "epoch": 5.15302454554791, "grad_norm": 0.045415036380290985, "learning_rate": 0.01, "loss": 1.9823, "step": 50175 }, { "epoch": 5.1533326486597515, "grad_norm": 0.04759620502591133, "learning_rate": 0.01, "loss": 2.0026, "step": 50178 }, { "epoch": 5.153640751771593, "grad_norm": 0.08537039160728455, "learning_rate": 0.01, "loss": 1.9982, "step": 50181 }, { "epoch": 5.153948854883434, "grad_norm": 0.10971151292324066, "learning_rate": 0.01, "loss": 2.0096, "step": 50184 }, { "epoch": 5.154256957995276, "grad_norm": 0.04773986339569092, "learning_rate": 0.01, "loss": 2.0192, "step": 50187 }, { "epoch": 5.154565061107117, "grad_norm": 0.13717158138751984, "learning_rate": 0.01, "loss": 2.0082, "step": 50190 }, { "epoch": 5.154873164218959, "grad_norm": 0.05396975204348564, "learning_rate": 0.01, "loss": 1.9911, "step": 50193 }, { "epoch": 5.1551812673308, "grad_norm": 0.03611796349287033, "learning_rate": 0.01, "loss": 1.9988, "step": 50196 }, { "epoch": 5.155489370442641, "grad_norm": 0.0544486828148365, "learning_rate": 0.01, "loss": 1.9916, "step": 50199 }, { "epoch": 5.1557974735544825, "grad_norm": 0.1198575496673584, "learning_rate": 0.01, "loss": 2.0187, "step": 50202 }, { "epoch": 5.156105576666325, "grad_norm": 0.04922753944993019, "learning_rate": 0.01, "loss": 2.0136, "step": 50205 }, { "epoch": 5.156413679778166, "grad_norm": 0.07132868468761444, "learning_rate": 0.01, "loss": 1.9788, "step": 50208 }, { "epoch": 5.156721782890007, "grad_norm": 0.06232021376490593, "learning_rate": 0.01, "loss": 1.9926, "step": 50211 }, { "epoch": 5.157029886001848, "grad_norm": 0.0492694191634655, "learning_rate": 0.01, "loss": 2.017, "step": 50214 }, { "epoch": 5.15733798911369, "grad_norm": 0.040781404823064804, "learning_rate": 0.01, "loss": 1.9982, "step": 50217 }, { "epoch": 5.157646092225532, "grad_norm": 0.04086026921868324, "learning_rate": 0.01, "loss": 2.0217, "step": 50220 }, { "epoch": 5.157954195337373, "grad_norm": 0.04154505580663681, "learning_rate": 0.01, "loss": 2.0069, "step": 50223 }, { "epoch": 5.158262298449214, "grad_norm": 0.043042492121458054, "learning_rate": 0.01, "loss": 1.9652, "step": 50226 }, { "epoch": 5.158570401561056, "grad_norm": 0.10211813449859619, "learning_rate": 0.01, "loss": 2.014, "step": 50229 }, { "epoch": 5.158878504672897, "grad_norm": 0.0644875019788742, "learning_rate": 0.01, "loss": 1.9997, "step": 50232 }, { "epoch": 5.159186607784739, "grad_norm": 0.09346074610948563, "learning_rate": 0.01, "loss": 2.0304, "step": 50235 }, { "epoch": 5.15949471089658, "grad_norm": 0.04961559548974037, "learning_rate": 0.01, "loss": 2.0267, "step": 50238 }, { "epoch": 5.159802814008422, "grad_norm": 0.07135666906833649, "learning_rate": 0.01, "loss": 2.0063, "step": 50241 }, { "epoch": 5.160110917120263, "grad_norm": 0.0363953560590744, "learning_rate": 0.01, "loss": 2.0157, "step": 50244 }, { "epoch": 5.160419020232104, "grad_norm": 0.05364762246608734, "learning_rate": 0.01, "loss": 1.9895, "step": 50247 }, { "epoch": 5.160727123343945, "grad_norm": 0.07405871897935867, "learning_rate": 0.01, "loss": 2.0068, "step": 50250 }, { "epoch": 5.1610352264557875, "grad_norm": 0.06045752391219139, "learning_rate": 0.01, "loss": 2.0126, "step": 50253 }, { "epoch": 5.161343329567629, "grad_norm": 0.05497819930315018, "learning_rate": 0.01, "loss": 1.9921, "step": 50256 }, { "epoch": 5.16165143267947, "grad_norm": 0.12490664422512054, "learning_rate": 0.01, "loss": 2.0215, "step": 50259 }, { "epoch": 5.161959535791311, "grad_norm": 0.04628274589776993, "learning_rate": 0.01, "loss": 2.0116, "step": 50262 }, { "epoch": 5.1622676389031525, "grad_norm": 0.06884883344173431, "learning_rate": 0.01, "loss": 2.021, "step": 50265 }, { "epoch": 5.162575742014995, "grad_norm": 0.057228926569223404, "learning_rate": 0.01, "loss": 1.9707, "step": 50268 }, { "epoch": 5.162883845126836, "grad_norm": 0.05467236414551735, "learning_rate": 0.01, "loss": 1.9975, "step": 50271 }, { "epoch": 5.163191948238677, "grad_norm": 0.10743111371994019, "learning_rate": 0.01, "loss": 2.0041, "step": 50274 }, { "epoch": 5.1635000513505185, "grad_norm": 0.06426141411066055, "learning_rate": 0.01, "loss": 2.0137, "step": 50277 }, { "epoch": 5.16380815446236, "grad_norm": 0.09047043323516846, "learning_rate": 0.01, "loss": 1.9975, "step": 50280 }, { "epoch": 5.164116257574202, "grad_norm": 0.032664790749549866, "learning_rate": 0.01, "loss": 2.0101, "step": 50283 }, { "epoch": 5.164424360686043, "grad_norm": 0.05022372305393219, "learning_rate": 0.01, "loss": 1.9948, "step": 50286 }, { "epoch": 5.164732463797884, "grad_norm": 0.06359392404556274, "learning_rate": 0.01, "loss": 1.9953, "step": 50289 }, { "epoch": 5.165040566909726, "grad_norm": 0.0480986088514328, "learning_rate": 0.01, "loss": 1.9781, "step": 50292 }, { "epoch": 5.165348670021567, "grad_norm": 0.04164107143878937, "learning_rate": 0.01, "loss": 1.994, "step": 50295 }, { "epoch": 5.165656773133408, "grad_norm": 0.06567779183387756, "learning_rate": 0.01, "loss": 2.0243, "step": 50298 }, { "epoch": 5.16596487624525, "grad_norm": 0.11087372153997421, "learning_rate": 0.01, "loss": 1.9872, "step": 50301 }, { "epoch": 5.166272979357092, "grad_norm": 0.056734826415777206, "learning_rate": 0.01, "loss": 2.0047, "step": 50304 }, { "epoch": 5.166581082468933, "grad_norm": 0.06245841085910797, "learning_rate": 0.01, "loss": 2.0065, "step": 50307 }, { "epoch": 5.166889185580774, "grad_norm": 0.07361923158168793, "learning_rate": 0.01, "loss": 1.9788, "step": 50310 }, { "epoch": 5.167197288692615, "grad_norm": 0.08085532486438751, "learning_rate": 0.01, "loss": 1.9912, "step": 50313 }, { "epoch": 5.167505391804458, "grad_norm": 0.08500105887651443, "learning_rate": 0.01, "loss": 2.0234, "step": 50316 }, { "epoch": 5.167813494916299, "grad_norm": 0.07736580818891525, "learning_rate": 0.01, "loss": 1.9908, "step": 50319 }, { "epoch": 5.16812159802814, "grad_norm": 0.08762416243553162, "learning_rate": 0.01, "loss": 1.9993, "step": 50322 }, { "epoch": 5.168429701139981, "grad_norm": 0.05399163439869881, "learning_rate": 0.01, "loss": 1.9932, "step": 50325 }, { "epoch": 5.168737804251823, "grad_norm": 0.05431235954165459, "learning_rate": 0.01, "loss": 1.9898, "step": 50328 }, { "epoch": 5.169045907363665, "grad_norm": 0.04496511071920395, "learning_rate": 0.01, "loss": 2.0105, "step": 50331 }, { "epoch": 5.169354010475506, "grad_norm": 0.04711058363318443, "learning_rate": 0.01, "loss": 1.9891, "step": 50334 }, { "epoch": 5.169662113587347, "grad_norm": 0.08989371359348297, "learning_rate": 0.01, "loss": 1.9997, "step": 50337 }, { "epoch": 5.1699702166991885, "grad_norm": 0.048945628106594086, "learning_rate": 0.01, "loss": 1.9945, "step": 50340 }, { "epoch": 5.17027831981103, "grad_norm": 0.09270982444286346, "learning_rate": 0.01, "loss": 1.9826, "step": 50343 }, { "epoch": 5.170586422922872, "grad_norm": 0.05704176053404808, "learning_rate": 0.01, "loss": 1.9958, "step": 50346 }, { "epoch": 5.170894526034713, "grad_norm": 0.03455890342593193, "learning_rate": 0.01, "loss": 2.0412, "step": 50349 }, { "epoch": 5.1712026291465545, "grad_norm": 0.041682351380586624, "learning_rate": 0.01, "loss": 2.0007, "step": 50352 }, { "epoch": 5.171510732258396, "grad_norm": 0.04951677471399307, "learning_rate": 0.01, "loss": 1.9945, "step": 50355 }, { "epoch": 5.171818835370237, "grad_norm": 0.056010644882917404, "learning_rate": 0.01, "loss": 1.9864, "step": 50358 }, { "epoch": 5.172126938482078, "grad_norm": 0.1273152083158493, "learning_rate": 0.01, "loss": 1.9878, "step": 50361 }, { "epoch": 5.17243504159392, "grad_norm": 0.08742735534906387, "learning_rate": 0.01, "loss": 1.9902, "step": 50364 }, { "epoch": 5.172743144705762, "grad_norm": 0.03796623274683952, "learning_rate": 0.01, "loss": 1.9849, "step": 50367 }, { "epoch": 5.173051247817603, "grad_norm": 0.035885389894247055, "learning_rate": 0.01, "loss": 2.0107, "step": 50370 }, { "epoch": 5.173359350929444, "grad_norm": 0.040873028337955475, "learning_rate": 0.01, "loss": 2.0045, "step": 50373 }, { "epoch": 5.1736674540412855, "grad_norm": 0.03645748645067215, "learning_rate": 0.01, "loss": 2.0087, "step": 50376 }, { "epoch": 5.173975557153128, "grad_norm": 0.08969543129205704, "learning_rate": 0.01, "loss": 2.0174, "step": 50379 }, { "epoch": 5.174283660264969, "grad_norm": 0.05043506622314453, "learning_rate": 0.01, "loss": 1.9939, "step": 50382 }, { "epoch": 5.17459176337681, "grad_norm": 0.053546857088804245, "learning_rate": 0.01, "loss": 1.9953, "step": 50385 }, { "epoch": 5.174899866488651, "grad_norm": 0.03222036734223366, "learning_rate": 0.01, "loss": 2.0002, "step": 50388 }, { "epoch": 5.175207969600493, "grad_norm": 0.07949094474315643, "learning_rate": 0.01, "loss": 1.9913, "step": 50391 }, { "epoch": 5.175516072712334, "grad_norm": 0.0858977660536766, "learning_rate": 0.01, "loss": 2.0061, "step": 50394 }, { "epoch": 5.175824175824176, "grad_norm": 0.10471560060977936, "learning_rate": 0.01, "loss": 2.0044, "step": 50397 }, { "epoch": 5.176132278936017, "grad_norm": 0.10742645710706711, "learning_rate": 0.01, "loss": 1.9701, "step": 50400 }, { "epoch": 5.176440382047859, "grad_norm": 0.035415954887866974, "learning_rate": 0.01, "loss": 2.0005, "step": 50403 }, { "epoch": 5.1767484851597, "grad_norm": 0.043476980179548264, "learning_rate": 0.01, "loss": 1.9984, "step": 50406 }, { "epoch": 5.177056588271541, "grad_norm": 0.05659857392311096, "learning_rate": 0.01, "loss": 2.006, "step": 50409 }, { "epoch": 5.177364691383383, "grad_norm": 0.09809272736310959, "learning_rate": 0.01, "loss": 1.9947, "step": 50412 }, { "epoch": 5.1776727944952246, "grad_norm": 0.0645933598279953, "learning_rate": 0.01, "loss": 1.9848, "step": 50415 }, { "epoch": 5.177980897607066, "grad_norm": 0.11137600988149643, "learning_rate": 0.01, "loss": 1.9903, "step": 50418 }, { "epoch": 5.178289000718907, "grad_norm": 0.03614633530378342, "learning_rate": 0.01, "loss": 2.0087, "step": 50421 }, { "epoch": 5.178597103830748, "grad_norm": 0.09580224752426147, "learning_rate": 0.01, "loss": 1.9932, "step": 50424 }, { "epoch": 5.1789052069425905, "grad_norm": 0.0874679684638977, "learning_rate": 0.01, "loss": 1.9941, "step": 50427 }, { "epoch": 5.179213310054432, "grad_norm": 0.06637660413980484, "learning_rate": 0.01, "loss": 2.0219, "step": 50430 }, { "epoch": 5.179521413166273, "grad_norm": 0.049818407744169235, "learning_rate": 0.01, "loss": 1.9953, "step": 50433 }, { "epoch": 5.179829516278114, "grad_norm": 0.035875897854566574, "learning_rate": 0.01, "loss": 1.9689, "step": 50436 }, { "epoch": 5.1801376193899555, "grad_norm": 0.12956075370311737, "learning_rate": 0.01, "loss": 1.9805, "step": 50439 }, { "epoch": 5.180445722501798, "grad_norm": 0.0568457767367363, "learning_rate": 0.01, "loss": 1.9922, "step": 50442 }, { "epoch": 5.180753825613639, "grad_norm": 0.06275864690542221, "learning_rate": 0.01, "loss": 1.9874, "step": 50445 }, { "epoch": 5.18106192872548, "grad_norm": 0.07609914243221283, "learning_rate": 0.01, "loss": 1.9822, "step": 50448 }, { "epoch": 5.1813700318373215, "grad_norm": 0.0721612498164177, "learning_rate": 0.01, "loss": 2.0329, "step": 50451 }, { "epoch": 5.181678134949163, "grad_norm": 0.08733054995536804, "learning_rate": 0.01, "loss": 1.9605, "step": 50454 }, { "epoch": 5.181986238061004, "grad_norm": 0.10601712018251419, "learning_rate": 0.01, "loss": 1.9896, "step": 50457 }, { "epoch": 5.182294341172846, "grad_norm": 0.17671173810958862, "learning_rate": 0.01, "loss": 1.9903, "step": 50460 }, { "epoch": 5.182602444284687, "grad_norm": 0.08600035309791565, "learning_rate": 0.01, "loss": 1.9823, "step": 50463 }, { "epoch": 5.182910547396529, "grad_norm": 0.05488591641187668, "learning_rate": 0.01, "loss": 2.0033, "step": 50466 }, { "epoch": 5.18321865050837, "grad_norm": 0.08030954003334045, "learning_rate": 0.01, "loss": 1.9899, "step": 50469 }, { "epoch": 5.183526753620211, "grad_norm": 0.05973450466990471, "learning_rate": 0.01, "loss": 2.0063, "step": 50472 }, { "epoch": 5.183834856732053, "grad_norm": 0.07898557931184769, "learning_rate": 0.01, "loss": 2.0174, "step": 50475 }, { "epoch": 5.184142959843895, "grad_norm": 0.10037455707788467, "learning_rate": 0.01, "loss": 2.0057, "step": 50478 }, { "epoch": 5.184451062955736, "grad_norm": 0.08369861543178558, "learning_rate": 0.01, "loss": 1.9906, "step": 50481 }, { "epoch": 5.184759166067577, "grad_norm": 0.06350437551736832, "learning_rate": 0.01, "loss": 1.9976, "step": 50484 }, { "epoch": 5.185067269179418, "grad_norm": 0.05332079157233238, "learning_rate": 0.01, "loss": 1.9818, "step": 50487 }, { "epoch": 5.1853753722912606, "grad_norm": 0.09419017285108566, "learning_rate": 0.01, "loss": 1.9884, "step": 50490 }, { "epoch": 5.185683475403102, "grad_norm": 0.05123918130993843, "learning_rate": 0.01, "loss": 1.9896, "step": 50493 }, { "epoch": 5.185991578514943, "grad_norm": 0.08905530720949173, "learning_rate": 0.01, "loss": 2.0142, "step": 50496 }, { "epoch": 5.186299681626784, "grad_norm": 0.04132084175944328, "learning_rate": 0.01, "loss": 1.9858, "step": 50499 }, { "epoch": 5.186607784738626, "grad_norm": 0.04010608792304993, "learning_rate": 0.01, "loss": 1.9863, "step": 50502 }, { "epoch": 5.186915887850467, "grad_norm": 0.035268135368824005, "learning_rate": 0.01, "loss": 2.0181, "step": 50505 }, { "epoch": 5.187223990962309, "grad_norm": 0.07214079797267914, "learning_rate": 0.01, "loss": 1.9995, "step": 50508 }, { "epoch": 5.18753209407415, "grad_norm": 0.0641101822257042, "learning_rate": 0.01, "loss": 2.0056, "step": 50511 }, { "epoch": 5.1878401971859915, "grad_norm": 0.05329526588320732, "learning_rate": 0.01, "loss": 1.9798, "step": 50514 }, { "epoch": 5.188148300297833, "grad_norm": 0.10318466275930405, "learning_rate": 0.01, "loss": 1.9931, "step": 50517 }, { "epoch": 5.188456403409674, "grad_norm": 0.04416116699576378, "learning_rate": 0.01, "loss": 2.0333, "step": 50520 }, { "epoch": 5.188764506521516, "grad_norm": 0.03857633098959923, "learning_rate": 0.01, "loss": 1.9833, "step": 50523 }, { "epoch": 5.1890726096333575, "grad_norm": 0.054197683930397034, "learning_rate": 0.01, "loss": 2.0029, "step": 50526 }, { "epoch": 5.189380712745199, "grad_norm": 0.045404430478811264, "learning_rate": 0.01, "loss": 2.0048, "step": 50529 }, { "epoch": 5.18968881585704, "grad_norm": 0.10853458195924759, "learning_rate": 0.01, "loss": 1.9961, "step": 50532 }, { "epoch": 5.189996918968881, "grad_norm": 0.043089404702186584, "learning_rate": 0.01, "loss": 1.9954, "step": 50535 }, { "epoch": 5.190305022080723, "grad_norm": 0.05289897695183754, "learning_rate": 0.01, "loss": 1.9959, "step": 50538 }, { "epoch": 5.190613125192565, "grad_norm": 0.06284762918949127, "learning_rate": 0.01, "loss": 1.9905, "step": 50541 }, { "epoch": 5.190921228304406, "grad_norm": 0.0615723617374897, "learning_rate": 0.01, "loss": 2.0115, "step": 50544 }, { "epoch": 5.191229331416247, "grad_norm": 0.10425807535648346, "learning_rate": 0.01, "loss": 2.003, "step": 50547 }, { "epoch": 5.1915374345280885, "grad_norm": 0.09661048650741577, "learning_rate": 0.01, "loss": 2.004, "step": 50550 }, { "epoch": 5.19184553763993, "grad_norm": 0.059544507414102554, "learning_rate": 0.01, "loss": 2.0324, "step": 50553 }, { "epoch": 5.192153640751772, "grad_norm": 0.08903210610151291, "learning_rate": 0.01, "loss": 1.9994, "step": 50556 }, { "epoch": 5.192461743863613, "grad_norm": 0.04461904987692833, "learning_rate": 0.01, "loss": 2.0073, "step": 50559 }, { "epoch": 5.192769846975454, "grad_norm": 0.07844073325395584, "learning_rate": 0.01, "loss": 2.0141, "step": 50562 }, { "epoch": 5.193077950087296, "grad_norm": 0.08520335704088211, "learning_rate": 0.01, "loss": 1.9663, "step": 50565 }, { "epoch": 5.193386053199137, "grad_norm": 0.13090673089027405, "learning_rate": 0.01, "loss": 1.9887, "step": 50568 }, { "epoch": 5.193694156310979, "grad_norm": 0.08202611654996872, "learning_rate": 0.01, "loss": 2.0021, "step": 50571 }, { "epoch": 5.19400225942282, "grad_norm": 0.05638153478503227, "learning_rate": 0.01, "loss": 1.9983, "step": 50574 }, { "epoch": 5.194310362534662, "grad_norm": 0.03813661262392998, "learning_rate": 0.01, "loss": 2.0002, "step": 50577 }, { "epoch": 5.194618465646503, "grad_norm": 0.13044418394565582, "learning_rate": 0.01, "loss": 2.0132, "step": 50580 }, { "epoch": 5.194926568758344, "grad_norm": 0.04875580221414566, "learning_rate": 0.01, "loss": 2.0023, "step": 50583 }, { "epoch": 5.195234671870186, "grad_norm": 0.03675463795661926, "learning_rate": 0.01, "loss": 1.9981, "step": 50586 }, { "epoch": 5.1955427749820275, "grad_norm": 0.050565268844366074, "learning_rate": 0.01, "loss": 1.9943, "step": 50589 }, { "epoch": 5.195850878093869, "grad_norm": 0.08294785022735596, "learning_rate": 0.01, "loss": 2.0057, "step": 50592 }, { "epoch": 5.19615898120571, "grad_norm": 0.04059421271085739, "learning_rate": 0.01, "loss": 1.9631, "step": 50595 }, { "epoch": 5.196467084317551, "grad_norm": 0.044956889003515244, "learning_rate": 0.01, "loss": 1.9804, "step": 50598 }, { "epoch": 5.1967751874293935, "grad_norm": 0.11942701786756516, "learning_rate": 0.01, "loss": 2.0148, "step": 50601 }, { "epoch": 5.197083290541235, "grad_norm": 0.04948386549949646, "learning_rate": 0.01, "loss": 2.0043, "step": 50604 }, { "epoch": 5.197391393653076, "grad_norm": 0.08146567642688751, "learning_rate": 0.01, "loss": 2.0315, "step": 50607 }, { "epoch": 5.197699496764917, "grad_norm": 0.0451958030462265, "learning_rate": 0.01, "loss": 2.0138, "step": 50610 }, { "epoch": 5.1980075998767585, "grad_norm": 0.036100223660469055, "learning_rate": 0.01, "loss": 1.9876, "step": 50613 }, { "epoch": 5.1983157029886, "grad_norm": 0.035324469208717346, "learning_rate": 0.01, "loss": 1.9929, "step": 50616 }, { "epoch": 5.198623806100442, "grad_norm": 0.05005744844675064, "learning_rate": 0.01, "loss": 2.0062, "step": 50619 }, { "epoch": 5.198931909212283, "grad_norm": 0.08080115169286728, "learning_rate": 0.01, "loss": 1.9893, "step": 50622 }, { "epoch": 5.1992400123241245, "grad_norm": 0.07723166048526764, "learning_rate": 0.01, "loss": 1.9991, "step": 50625 }, { "epoch": 5.199548115435966, "grad_norm": 0.11351467669010162, "learning_rate": 0.01, "loss": 1.9948, "step": 50628 }, { "epoch": 5.199856218547807, "grad_norm": 0.03940621018409729, "learning_rate": 0.01, "loss": 1.9724, "step": 50631 }, { "epoch": 5.200164321659649, "grad_norm": 0.06582506000995636, "learning_rate": 0.01, "loss": 1.9937, "step": 50634 }, { "epoch": 5.20047242477149, "grad_norm": 0.030670402571558952, "learning_rate": 0.01, "loss": 1.979, "step": 50637 }, { "epoch": 5.200780527883332, "grad_norm": 0.06636448204517365, "learning_rate": 0.01, "loss": 1.9824, "step": 50640 }, { "epoch": 5.201088630995173, "grad_norm": 0.06079031527042389, "learning_rate": 0.01, "loss": 2.0074, "step": 50643 }, { "epoch": 5.201396734107014, "grad_norm": 0.1049150750041008, "learning_rate": 0.01, "loss": 2.0114, "step": 50646 }, { "epoch": 5.2017048372188555, "grad_norm": 0.08547158539295197, "learning_rate": 0.01, "loss": 1.9965, "step": 50649 }, { "epoch": 5.202012940330698, "grad_norm": 0.06914935261011124, "learning_rate": 0.01, "loss": 2.0037, "step": 50652 }, { "epoch": 5.202321043442539, "grad_norm": 0.05997157096862793, "learning_rate": 0.01, "loss": 1.9891, "step": 50655 }, { "epoch": 5.20262914655438, "grad_norm": 0.052482035011053085, "learning_rate": 0.01, "loss": 1.9916, "step": 50658 }, { "epoch": 5.202937249666221, "grad_norm": 0.12078742682933807, "learning_rate": 0.01, "loss": 2.0135, "step": 50661 }, { "epoch": 5.203245352778063, "grad_norm": 0.08411680161952972, "learning_rate": 0.01, "loss": 2.0155, "step": 50664 }, { "epoch": 5.203553455889905, "grad_norm": 0.06186600774526596, "learning_rate": 0.01, "loss": 2.022, "step": 50667 }, { "epoch": 5.203861559001746, "grad_norm": 0.09473706781864166, "learning_rate": 0.01, "loss": 1.9982, "step": 50670 }, { "epoch": 5.204169662113587, "grad_norm": 0.05376753211021423, "learning_rate": 0.01, "loss": 1.9948, "step": 50673 }, { "epoch": 5.204477765225429, "grad_norm": 0.08271234482526779, "learning_rate": 0.01, "loss": 2.0173, "step": 50676 }, { "epoch": 5.20478586833727, "grad_norm": 0.07704611122608185, "learning_rate": 0.01, "loss": 1.9843, "step": 50679 }, { "epoch": 5.205093971449112, "grad_norm": 0.0716482475399971, "learning_rate": 0.01, "loss": 1.9891, "step": 50682 }, { "epoch": 5.205402074560953, "grad_norm": 0.07560107111930847, "learning_rate": 0.01, "loss": 2.0152, "step": 50685 }, { "epoch": 5.2057101776727945, "grad_norm": 0.07398194819688797, "learning_rate": 0.01, "loss": 2.0095, "step": 50688 }, { "epoch": 5.206018280784636, "grad_norm": 0.09211590141057968, "learning_rate": 0.01, "loss": 1.9672, "step": 50691 }, { "epoch": 5.206326383896477, "grad_norm": 0.0511045902967453, "learning_rate": 0.01, "loss": 1.9978, "step": 50694 }, { "epoch": 5.206634487008319, "grad_norm": 0.046343546360731125, "learning_rate": 0.01, "loss": 1.989, "step": 50697 }, { "epoch": 5.2069425901201605, "grad_norm": 0.0472230389714241, "learning_rate": 0.01, "loss": 1.9947, "step": 50700 }, { "epoch": 5.207250693232002, "grad_norm": 0.09529436379671097, "learning_rate": 0.01, "loss": 2.0117, "step": 50703 }, { "epoch": 5.207558796343843, "grad_norm": 0.15323568880558014, "learning_rate": 0.01, "loss": 2.0082, "step": 50706 }, { "epoch": 5.207866899455684, "grad_norm": 0.1495465189218521, "learning_rate": 0.01, "loss": 2.0021, "step": 50709 }, { "epoch": 5.2081750025675255, "grad_norm": 0.09416273981332779, "learning_rate": 0.01, "loss": 1.9982, "step": 50712 }, { "epoch": 5.208483105679368, "grad_norm": 0.05051533132791519, "learning_rate": 0.01, "loss": 1.9977, "step": 50715 }, { "epoch": 5.208791208791209, "grad_norm": 0.03743917867541313, "learning_rate": 0.01, "loss": 1.9872, "step": 50718 }, { "epoch": 5.20909931190305, "grad_norm": 0.04133860394358635, "learning_rate": 0.01, "loss": 1.9981, "step": 50721 }, { "epoch": 5.2094074150148915, "grad_norm": 0.05624905601143837, "learning_rate": 0.01, "loss": 2.0213, "step": 50724 }, { "epoch": 5.209715518126733, "grad_norm": 0.04526040330529213, "learning_rate": 0.01, "loss": 2.0013, "step": 50727 }, { "epoch": 5.210023621238575, "grad_norm": 0.06159577891230583, "learning_rate": 0.01, "loss": 1.9786, "step": 50730 }, { "epoch": 5.210331724350416, "grad_norm": 0.10257083177566528, "learning_rate": 0.01, "loss": 1.9977, "step": 50733 }, { "epoch": 5.210639827462257, "grad_norm": 0.0896536186337471, "learning_rate": 0.01, "loss": 1.9878, "step": 50736 }, { "epoch": 5.210947930574099, "grad_norm": 0.0499761626124382, "learning_rate": 0.01, "loss": 1.9854, "step": 50739 }, { "epoch": 5.21125603368594, "grad_norm": 0.044219888746738434, "learning_rate": 0.01, "loss": 2.0012, "step": 50742 }, { "epoch": 5.211564136797782, "grad_norm": 0.056992996484041214, "learning_rate": 0.01, "loss": 2.0009, "step": 50745 }, { "epoch": 5.211872239909623, "grad_norm": 0.10208120197057724, "learning_rate": 0.01, "loss": 2.014, "step": 50748 }, { "epoch": 5.212180343021465, "grad_norm": 0.07243528962135315, "learning_rate": 0.01, "loss": 2.0177, "step": 50751 }, { "epoch": 5.212488446133306, "grad_norm": 0.04413043335080147, "learning_rate": 0.01, "loss": 1.996, "step": 50754 }, { "epoch": 5.212796549245147, "grad_norm": 0.09615755081176758, "learning_rate": 0.01, "loss": 1.9998, "step": 50757 }, { "epoch": 5.213104652356988, "grad_norm": 0.04845009744167328, "learning_rate": 0.01, "loss": 1.9976, "step": 50760 }, { "epoch": 5.2134127554688305, "grad_norm": 0.12175197154283524, "learning_rate": 0.01, "loss": 2.0204, "step": 50763 }, { "epoch": 5.213720858580672, "grad_norm": 0.045076884329319, "learning_rate": 0.01, "loss": 1.9739, "step": 50766 }, { "epoch": 5.214028961692513, "grad_norm": 0.07600415498018265, "learning_rate": 0.01, "loss": 2.0119, "step": 50769 }, { "epoch": 5.214337064804354, "grad_norm": 0.06227222830057144, "learning_rate": 0.01, "loss": 1.9922, "step": 50772 }, { "epoch": 5.214645167916196, "grad_norm": 0.10495235025882721, "learning_rate": 0.01, "loss": 2.0132, "step": 50775 }, { "epoch": 5.214953271028038, "grad_norm": 0.08388978242874146, "learning_rate": 0.01, "loss": 2.018, "step": 50778 }, { "epoch": 5.215261374139879, "grad_norm": 0.07591544091701508, "learning_rate": 0.01, "loss": 2.0234, "step": 50781 }, { "epoch": 5.21556947725172, "grad_norm": 0.07411694526672363, "learning_rate": 0.01, "loss": 2.0082, "step": 50784 }, { "epoch": 5.2158775803635615, "grad_norm": 0.07496411353349686, "learning_rate": 0.01, "loss": 2.0198, "step": 50787 }, { "epoch": 5.216185683475403, "grad_norm": 0.07005130499601364, "learning_rate": 0.01, "loss": 1.9985, "step": 50790 }, { "epoch": 5.216493786587245, "grad_norm": 0.05165226384997368, "learning_rate": 0.01, "loss": 2.0114, "step": 50793 }, { "epoch": 5.216801889699086, "grad_norm": 0.09063982218503952, "learning_rate": 0.01, "loss": 1.9989, "step": 50796 }, { "epoch": 5.2171099928109275, "grad_norm": 0.03189365193247795, "learning_rate": 0.01, "loss": 1.9828, "step": 50799 }, { "epoch": 5.217418095922769, "grad_norm": 0.07051954418420792, "learning_rate": 0.01, "loss": 2.0027, "step": 50802 }, { "epoch": 5.21772619903461, "grad_norm": 0.045339349657297134, "learning_rate": 0.01, "loss": 1.9932, "step": 50805 }, { "epoch": 5.218034302146451, "grad_norm": 0.041392091661691666, "learning_rate": 0.01, "loss": 2.0054, "step": 50808 }, { "epoch": 5.218342405258293, "grad_norm": 0.05115804076194763, "learning_rate": 0.01, "loss": 2.0046, "step": 50811 }, { "epoch": 5.218650508370135, "grad_norm": 0.0681786835193634, "learning_rate": 0.01, "loss": 2.0167, "step": 50814 }, { "epoch": 5.218958611481976, "grad_norm": 0.057782597839832306, "learning_rate": 0.01, "loss": 1.9909, "step": 50817 }, { "epoch": 5.219266714593817, "grad_norm": 0.04151969403028488, "learning_rate": 0.01, "loss": 1.9884, "step": 50820 }, { "epoch": 5.2195748177056585, "grad_norm": 0.06815563142299652, "learning_rate": 0.01, "loss": 2.0215, "step": 50823 }, { "epoch": 5.219882920817501, "grad_norm": 0.1696784943342209, "learning_rate": 0.01, "loss": 2.0014, "step": 50826 }, { "epoch": 5.220191023929342, "grad_norm": 0.04126200079917908, "learning_rate": 0.01, "loss": 1.9838, "step": 50829 }, { "epoch": 5.220499127041183, "grad_norm": 0.03278988599777222, "learning_rate": 0.01, "loss": 2.0102, "step": 50832 }, { "epoch": 5.220807230153024, "grad_norm": 0.0632409155368805, "learning_rate": 0.01, "loss": 2.0014, "step": 50835 }, { "epoch": 5.221115333264866, "grad_norm": 0.06449749320745468, "learning_rate": 0.01, "loss": 2.0045, "step": 50838 }, { "epoch": 5.221423436376708, "grad_norm": 0.04530004784464836, "learning_rate": 0.01, "loss": 1.9978, "step": 50841 }, { "epoch": 5.221731539488549, "grad_norm": 0.06646733731031418, "learning_rate": 0.01, "loss": 1.9972, "step": 50844 }, { "epoch": 5.22203964260039, "grad_norm": 0.040766507387161255, "learning_rate": 0.01, "loss": 1.9718, "step": 50847 }, { "epoch": 5.222347745712232, "grad_norm": 0.04910712689161301, "learning_rate": 0.01, "loss": 1.9944, "step": 50850 }, { "epoch": 5.222655848824073, "grad_norm": 0.03790167346596718, "learning_rate": 0.01, "loss": 1.9806, "step": 50853 }, { "epoch": 5.222963951935915, "grad_norm": 0.04654322564601898, "learning_rate": 0.01, "loss": 1.9981, "step": 50856 }, { "epoch": 5.223272055047756, "grad_norm": 0.05322917550802231, "learning_rate": 0.01, "loss": 1.9919, "step": 50859 }, { "epoch": 5.2235801581595975, "grad_norm": 0.10331053286790848, "learning_rate": 0.01, "loss": 2.0289, "step": 50862 }, { "epoch": 5.223888261271439, "grad_norm": 0.14793960750102997, "learning_rate": 0.01, "loss": 1.9954, "step": 50865 }, { "epoch": 5.22419636438328, "grad_norm": 0.06507808715105057, "learning_rate": 0.01, "loss": 2.0158, "step": 50868 }, { "epoch": 5.224504467495121, "grad_norm": 0.06138547509908676, "learning_rate": 0.01, "loss": 2.0176, "step": 50871 }, { "epoch": 5.2248125706069635, "grad_norm": 0.04522119089961052, "learning_rate": 0.01, "loss": 1.9838, "step": 50874 }, { "epoch": 5.225120673718805, "grad_norm": 0.044236768037080765, "learning_rate": 0.01, "loss": 2.0052, "step": 50877 }, { "epoch": 5.225428776830646, "grad_norm": 0.05313803628087044, "learning_rate": 0.01, "loss": 2.0163, "step": 50880 }, { "epoch": 5.225736879942487, "grad_norm": 0.06283990293741226, "learning_rate": 0.01, "loss": 2.0061, "step": 50883 }, { "epoch": 5.2260449830543285, "grad_norm": 0.05976197123527527, "learning_rate": 0.01, "loss": 1.9953, "step": 50886 }, { "epoch": 5.226353086166171, "grad_norm": 0.10755223780870438, "learning_rate": 0.01, "loss": 1.9934, "step": 50889 }, { "epoch": 5.226661189278012, "grad_norm": 0.040908023715019226, "learning_rate": 0.01, "loss": 1.9898, "step": 50892 }, { "epoch": 5.226969292389853, "grad_norm": 0.08019308000802994, "learning_rate": 0.01, "loss": 1.9709, "step": 50895 }, { "epoch": 5.2272773955016945, "grad_norm": 0.04088309779763222, "learning_rate": 0.01, "loss": 2.0215, "step": 50898 }, { "epoch": 5.227585498613536, "grad_norm": 0.06042852997779846, "learning_rate": 0.01, "loss": 1.9801, "step": 50901 }, { "epoch": 5.227893601725377, "grad_norm": 0.06619790196418762, "learning_rate": 0.01, "loss": 2.0253, "step": 50904 }, { "epoch": 5.228201704837219, "grad_norm": 0.0903916209936142, "learning_rate": 0.01, "loss": 2.0068, "step": 50907 }, { "epoch": 5.22850980794906, "grad_norm": 0.06611988693475723, "learning_rate": 0.01, "loss": 1.9967, "step": 50910 }, { "epoch": 5.228817911060902, "grad_norm": 0.07255574315786362, "learning_rate": 0.01, "loss": 1.9713, "step": 50913 }, { "epoch": 5.229126014172743, "grad_norm": 0.07492070645093918, "learning_rate": 0.01, "loss": 1.9643, "step": 50916 }, { "epoch": 5.229434117284584, "grad_norm": 0.07850334793329239, "learning_rate": 0.01, "loss": 2.0012, "step": 50919 }, { "epoch": 5.229742220396426, "grad_norm": 0.0485299751162529, "learning_rate": 0.01, "loss": 1.999, "step": 50922 }, { "epoch": 5.230050323508268, "grad_norm": 0.09580907970666885, "learning_rate": 0.01, "loss": 2.0075, "step": 50925 }, { "epoch": 5.230358426620109, "grad_norm": 0.10689478367567062, "learning_rate": 0.01, "loss": 2.0018, "step": 50928 }, { "epoch": 5.23066652973195, "grad_norm": 0.08192954957485199, "learning_rate": 0.01, "loss": 2.0266, "step": 50931 }, { "epoch": 5.230974632843791, "grad_norm": 0.06330616772174835, "learning_rate": 0.01, "loss": 1.9731, "step": 50934 }, { "epoch": 5.2312827359556335, "grad_norm": 0.041825070977211, "learning_rate": 0.01, "loss": 2.0054, "step": 50937 }, { "epoch": 5.231590839067475, "grad_norm": 0.04370388388633728, "learning_rate": 0.01, "loss": 1.9912, "step": 50940 }, { "epoch": 5.231898942179316, "grad_norm": 0.039774972945451736, "learning_rate": 0.01, "loss": 1.9827, "step": 50943 }, { "epoch": 5.232207045291157, "grad_norm": 0.06065038964152336, "learning_rate": 0.01, "loss": 1.9772, "step": 50946 }, { "epoch": 5.232515148402999, "grad_norm": 0.1340397149324417, "learning_rate": 0.01, "loss": 1.9971, "step": 50949 }, { "epoch": 5.232823251514841, "grad_norm": 0.05149194970726967, "learning_rate": 0.01, "loss": 2.0259, "step": 50952 }, { "epoch": 5.233131354626682, "grad_norm": 0.043973732739686966, "learning_rate": 0.01, "loss": 1.9863, "step": 50955 }, { "epoch": 5.233439457738523, "grad_norm": 0.034024860709905624, "learning_rate": 0.01, "loss": 1.9867, "step": 50958 }, { "epoch": 5.2337475608503645, "grad_norm": 0.03558748587965965, "learning_rate": 0.01, "loss": 2.0271, "step": 50961 }, { "epoch": 5.234055663962206, "grad_norm": 0.03661567345261574, "learning_rate": 0.01, "loss": 2.0093, "step": 50964 }, { "epoch": 5.234363767074047, "grad_norm": 0.11346925795078278, "learning_rate": 0.01, "loss": 1.9861, "step": 50967 }, { "epoch": 5.234671870185889, "grad_norm": 0.05934759974479675, "learning_rate": 0.01, "loss": 2.0058, "step": 50970 }, { "epoch": 5.2349799732977305, "grad_norm": 0.08264946192502975, "learning_rate": 0.01, "loss": 1.9998, "step": 50973 }, { "epoch": 5.235288076409572, "grad_norm": 0.0470314584672451, "learning_rate": 0.01, "loss": 2.001, "step": 50976 }, { "epoch": 5.235596179521413, "grad_norm": 0.04239676892757416, "learning_rate": 0.01, "loss": 2.0114, "step": 50979 }, { "epoch": 5.235904282633254, "grad_norm": 0.09247265011072159, "learning_rate": 0.01, "loss": 2.0097, "step": 50982 }, { "epoch": 5.236212385745096, "grad_norm": 0.05270032957196236, "learning_rate": 0.01, "loss": 1.9795, "step": 50985 }, { "epoch": 5.236520488856938, "grad_norm": 0.0351402647793293, "learning_rate": 0.01, "loss": 2.0236, "step": 50988 }, { "epoch": 5.236828591968779, "grad_norm": 0.04401613771915436, "learning_rate": 0.01, "loss": 1.9836, "step": 50991 }, { "epoch": 5.23713669508062, "grad_norm": 0.0353761650621891, "learning_rate": 0.01, "loss": 2.0021, "step": 50994 }, { "epoch": 5.2374447981924614, "grad_norm": 0.13582314550876617, "learning_rate": 0.01, "loss": 2.0199, "step": 50997 }, { "epoch": 5.237752901304304, "grad_norm": 0.06481800973415375, "learning_rate": 0.01, "loss": 2.0023, "step": 51000 }, { "epoch": 5.238061004416145, "grad_norm": 0.06499173492193222, "learning_rate": 0.01, "loss": 2.0251, "step": 51003 }, { "epoch": 5.238369107527986, "grad_norm": 0.059027299284935, "learning_rate": 0.01, "loss": 1.9977, "step": 51006 }, { "epoch": 5.238677210639827, "grad_norm": 0.11211716383695602, "learning_rate": 0.01, "loss": 2.0132, "step": 51009 }, { "epoch": 5.238985313751669, "grad_norm": 0.041850294917821884, "learning_rate": 0.01, "loss": 1.9843, "step": 51012 }, { "epoch": 5.23929341686351, "grad_norm": 0.11037513613700867, "learning_rate": 0.01, "loss": 2.0258, "step": 51015 }, { "epoch": 5.239601519975352, "grad_norm": 0.06720243394374847, "learning_rate": 0.01, "loss": 1.9741, "step": 51018 }, { "epoch": 5.239909623087193, "grad_norm": 0.05992870405316353, "learning_rate": 0.01, "loss": 2.0079, "step": 51021 }, { "epoch": 5.240217726199035, "grad_norm": 0.046941906213760376, "learning_rate": 0.01, "loss": 1.9906, "step": 51024 }, { "epoch": 5.240525829310876, "grad_norm": 0.0415794812142849, "learning_rate": 0.01, "loss": 1.9986, "step": 51027 }, { "epoch": 5.240833932422717, "grad_norm": 0.046740952879190445, "learning_rate": 0.01, "loss": 1.9767, "step": 51030 }, { "epoch": 5.241142035534559, "grad_norm": 0.04328366369009018, "learning_rate": 0.01, "loss": 2.0068, "step": 51033 }, { "epoch": 5.2414501386464005, "grad_norm": 0.10686604678630829, "learning_rate": 0.01, "loss": 2.0091, "step": 51036 }, { "epoch": 5.241758241758242, "grad_norm": 0.0445748046040535, "learning_rate": 0.01, "loss": 1.9851, "step": 51039 }, { "epoch": 5.242066344870083, "grad_norm": 0.04299461841583252, "learning_rate": 0.01, "loss": 1.9822, "step": 51042 }, { "epoch": 5.242374447981924, "grad_norm": 0.06391250342130661, "learning_rate": 0.01, "loss": 2.0105, "step": 51045 }, { "epoch": 5.2426825510937665, "grad_norm": 0.03528610244393349, "learning_rate": 0.01, "loss": 2.0047, "step": 51048 }, { "epoch": 5.242990654205608, "grad_norm": 0.030670564621686935, "learning_rate": 0.01, "loss": 1.9809, "step": 51051 }, { "epoch": 5.243298757317449, "grad_norm": 0.11291905492544174, "learning_rate": 0.01, "loss": 2.0204, "step": 51054 }, { "epoch": 5.24360686042929, "grad_norm": 0.08508855104446411, "learning_rate": 0.01, "loss": 1.9977, "step": 51057 }, { "epoch": 5.2439149635411315, "grad_norm": 0.1319185048341751, "learning_rate": 0.01, "loss": 2.0088, "step": 51060 }, { "epoch": 5.244223066652973, "grad_norm": 0.08218291401863098, "learning_rate": 0.01, "loss": 2.0002, "step": 51063 }, { "epoch": 5.244531169764815, "grad_norm": 0.06311830878257751, "learning_rate": 0.01, "loss": 2.0104, "step": 51066 }, { "epoch": 5.244839272876656, "grad_norm": 0.03945956006646156, "learning_rate": 0.01, "loss": 1.9672, "step": 51069 }, { "epoch": 5.2451473759884975, "grad_norm": 0.045635320246219635, "learning_rate": 0.01, "loss": 2.0018, "step": 51072 }, { "epoch": 5.245455479100339, "grad_norm": 0.047721873968839645, "learning_rate": 0.01, "loss": 1.9753, "step": 51075 }, { "epoch": 5.24576358221218, "grad_norm": 0.04936512932181358, "learning_rate": 0.01, "loss": 1.9911, "step": 51078 }, { "epoch": 5.246071685324022, "grad_norm": 0.05856813117861748, "learning_rate": 0.01, "loss": 2.0055, "step": 51081 }, { "epoch": 5.246379788435863, "grad_norm": 0.05068356543779373, "learning_rate": 0.01, "loss": 1.9823, "step": 51084 }, { "epoch": 5.246687891547705, "grad_norm": 0.05900732800364494, "learning_rate": 0.01, "loss": 1.992, "step": 51087 }, { "epoch": 5.246995994659546, "grad_norm": 0.12454086542129517, "learning_rate": 0.01, "loss": 2.02, "step": 51090 }, { "epoch": 5.247304097771387, "grad_norm": 0.037892140448093414, "learning_rate": 0.01, "loss": 2.0078, "step": 51093 }, { "epoch": 5.247612200883229, "grad_norm": 0.07839351892471313, "learning_rate": 0.01, "loss": 1.9993, "step": 51096 }, { "epoch": 5.247920303995071, "grad_norm": 0.04203120246529579, "learning_rate": 0.01, "loss": 1.9974, "step": 51099 }, { "epoch": 5.248228407106912, "grad_norm": 0.07562438398599625, "learning_rate": 0.01, "loss": 1.9896, "step": 51102 }, { "epoch": 5.248536510218753, "grad_norm": 0.044604718685150146, "learning_rate": 0.01, "loss": 1.9879, "step": 51105 }, { "epoch": 5.248844613330594, "grad_norm": 0.09438357502222061, "learning_rate": 0.01, "loss": 2.0028, "step": 51108 }, { "epoch": 5.249152716442436, "grad_norm": 0.04773309826850891, "learning_rate": 0.01, "loss": 1.9631, "step": 51111 }, { "epoch": 5.249460819554278, "grad_norm": 0.033143848180770874, "learning_rate": 0.01, "loss": 1.9766, "step": 51114 }, { "epoch": 5.249768922666119, "grad_norm": 0.0772877037525177, "learning_rate": 0.01, "loss": 1.9921, "step": 51117 }, { "epoch": 5.25007702577796, "grad_norm": 0.054028406739234924, "learning_rate": 0.01, "loss": 2.0047, "step": 51120 }, { "epoch": 5.250385128889802, "grad_norm": 0.07667548954486847, "learning_rate": 0.01, "loss": 2.0012, "step": 51123 }, { "epoch": 5.250693232001643, "grad_norm": 0.06274887174367905, "learning_rate": 0.01, "loss": 1.9972, "step": 51126 }, { "epoch": 5.251001335113485, "grad_norm": 0.09405583888292313, "learning_rate": 0.01, "loss": 1.9902, "step": 51129 }, { "epoch": 5.251309438225326, "grad_norm": 0.06297732889652252, "learning_rate": 0.01, "loss": 1.9995, "step": 51132 }, { "epoch": 5.2516175413371675, "grad_norm": 0.08258040994405746, "learning_rate": 0.01, "loss": 1.9871, "step": 51135 }, { "epoch": 5.251925644449009, "grad_norm": 0.11019886285066605, "learning_rate": 0.01, "loss": 1.9902, "step": 51138 }, { "epoch": 5.25223374756085, "grad_norm": 0.06206972151994705, "learning_rate": 0.01, "loss": 2.0086, "step": 51141 }, { "epoch": 5.252541850672692, "grad_norm": 0.045681048184633255, "learning_rate": 0.01, "loss": 1.9874, "step": 51144 }, { "epoch": 5.2528499537845335, "grad_norm": 0.041622232645750046, "learning_rate": 0.01, "loss": 2.0125, "step": 51147 }, { "epoch": 5.253158056896375, "grad_norm": 0.05589982122182846, "learning_rate": 0.01, "loss": 2.0137, "step": 51150 }, { "epoch": 5.253466160008216, "grad_norm": 0.06394509226083755, "learning_rate": 0.01, "loss": 1.9974, "step": 51153 }, { "epoch": 5.253774263120057, "grad_norm": 0.05354580655694008, "learning_rate": 0.01, "loss": 1.9937, "step": 51156 }, { "epoch": 5.2540823662318985, "grad_norm": 0.06941652297973633, "learning_rate": 0.01, "loss": 2.0012, "step": 51159 }, { "epoch": 5.254390469343741, "grad_norm": 0.0771641731262207, "learning_rate": 0.01, "loss": 1.9945, "step": 51162 }, { "epoch": 5.254698572455582, "grad_norm": 0.08898761868476868, "learning_rate": 0.01, "loss": 2.0024, "step": 51165 }, { "epoch": 5.255006675567423, "grad_norm": 0.03489289805293083, "learning_rate": 0.01, "loss": 1.992, "step": 51168 }, { "epoch": 5.255314778679264, "grad_norm": 0.0783892497420311, "learning_rate": 0.01, "loss": 1.9831, "step": 51171 }, { "epoch": 5.255622881791106, "grad_norm": 0.08177445828914642, "learning_rate": 0.01, "loss": 2.01, "step": 51174 }, { "epoch": 5.255930984902948, "grad_norm": 0.04709051921963692, "learning_rate": 0.01, "loss": 1.9794, "step": 51177 }, { "epoch": 5.256239088014789, "grad_norm": 0.13084626197814941, "learning_rate": 0.01, "loss": 2.0024, "step": 51180 }, { "epoch": 5.25654719112663, "grad_norm": 0.04432738944888115, "learning_rate": 0.01, "loss": 1.9756, "step": 51183 }, { "epoch": 5.256855294238472, "grad_norm": 0.053161200135946274, "learning_rate": 0.01, "loss": 1.9859, "step": 51186 }, { "epoch": 5.257163397350313, "grad_norm": 0.039357706904411316, "learning_rate": 0.01, "loss": 1.9972, "step": 51189 }, { "epoch": 5.257471500462155, "grad_norm": 0.052901457995176315, "learning_rate": 0.01, "loss": 1.9953, "step": 51192 }, { "epoch": 5.257779603573996, "grad_norm": 0.11097205430269241, "learning_rate": 0.01, "loss": 2.0029, "step": 51195 }, { "epoch": 5.258087706685838, "grad_norm": 0.07301553338766098, "learning_rate": 0.01, "loss": 2.0066, "step": 51198 }, { "epoch": 5.258395809797679, "grad_norm": 0.17759805917739868, "learning_rate": 0.01, "loss": 2.0266, "step": 51201 }, { "epoch": 5.25870391290952, "grad_norm": 0.07599959522485733, "learning_rate": 0.01, "loss": 1.982, "step": 51204 }, { "epoch": 5.259012016021362, "grad_norm": 0.13010208308696747, "learning_rate": 0.01, "loss": 2.0013, "step": 51207 }, { "epoch": 5.2593201191332035, "grad_norm": 0.07105369120836258, "learning_rate": 0.01, "loss": 2.0059, "step": 51210 }, { "epoch": 5.259628222245045, "grad_norm": 0.16369540989398956, "learning_rate": 0.01, "loss": 2.0047, "step": 51213 }, { "epoch": 5.259936325356886, "grad_norm": 0.11588908731937408, "learning_rate": 0.01, "loss": 1.9934, "step": 51216 }, { "epoch": 5.260244428468727, "grad_norm": 0.045902661979198456, "learning_rate": 0.01, "loss": 1.9742, "step": 51219 }, { "epoch": 5.260552531580569, "grad_norm": 0.04714805632829666, "learning_rate": 0.01, "loss": 2.0054, "step": 51222 }, { "epoch": 5.260860634692411, "grad_norm": 0.05099105089902878, "learning_rate": 0.01, "loss": 1.9777, "step": 51225 }, { "epoch": 5.261168737804252, "grad_norm": 0.050430044531822205, "learning_rate": 0.01, "loss": 1.9919, "step": 51228 }, { "epoch": 5.261476840916093, "grad_norm": 0.03998453542590141, "learning_rate": 0.01, "loss": 2.0175, "step": 51231 }, { "epoch": 5.2617849440279345, "grad_norm": 0.05709048733115196, "learning_rate": 0.01, "loss": 1.9877, "step": 51234 }, { "epoch": 5.262093047139776, "grad_norm": 0.04667646437883377, "learning_rate": 0.01, "loss": 1.9854, "step": 51237 }, { "epoch": 5.262401150251618, "grad_norm": 0.0413067601621151, "learning_rate": 0.01, "loss": 2.0118, "step": 51240 }, { "epoch": 5.262709253363459, "grad_norm": 0.037300482392311096, "learning_rate": 0.01, "loss": 2.0181, "step": 51243 }, { "epoch": 5.2630173564753004, "grad_norm": 0.10188855230808258, "learning_rate": 0.01, "loss": 1.9705, "step": 51246 }, { "epoch": 5.263325459587142, "grad_norm": 0.060019396245479584, "learning_rate": 0.01, "loss": 2.0153, "step": 51249 }, { "epoch": 5.263633562698983, "grad_norm": 0.07587499171495438, "learning_rate": 0.01, "loss": 2.0006, "step": 51252 }, { "epoch": 5.263941665810824, "grad_norm": 0.17544515430927277, "learning_rate": 0.01, "loss": 1.9913, "step": 51255 }, { "epoch": 5.264249768922666, "grad_norm": 0.046111736446619034, "learning_rate": 0.01, "loss": 2.0078, "step": 51258 }, { "epoch": 5.264557872034508, "grad_norm": 0.03697605058550835, "learning_rate": 0.01, "loss": 2.0069, "step": 51261 }, { "epoch": 5.264865975146349, "grad_norm": 0.04376983270049095, "learning_rate": 0.01, "loss": 1.9866, "step": 51264 }, { "epoch": 5.26517407825819, "grad_norm": 0.06879153102636337, "learning_rate": 0.01, "loss": 1.9983, "step": 51267 }, { "epoch": 5.265482181370031, "grad_norm": 0.05067580193281174, "learning_rate": 0.01, "loss": 1.987, "step": 51270 }, { "epoch": 5.265790284481874, "grad_norm": 0.04876921325922012, "learning_rate": 0.01, "loss": 2.0042, "step": 51273 }, { "epoch": 5.266098387593715, "grad_norm": 0.08174460381269455, "learning_rate": 0.01, "loss": 1.9959, "step": 51276 }, { "epoch": 5.266406490705556, "grad_norm": 0.08350025117397308, "learning_rate": 0.01, "loss": 1.9784, "step": 51279 }, { "epoch": 5.266714593817397, "grad_norm": 0.10765060782432556, "learning_rate": 0.01, "loss": 1.9853, "step": 51282 }, { "epoch": 5.267022696929239, "grad_norm": 0.09596231579780579, "learning_rate": 0.01, "loss": 1.9896, "step": 51285 }, { "epoch": 5.267330800041081, "grad_norm": 0.03738878667354584, "learning_rate": 0.01, "loss": 1.9804, "step": 51288 }, { "epoch": 5.267638903152922, "grad_norm": 0.03415136784315109, "learning_rate": 0.01, "loss": 1.9909, "step": 51291 }, { "epoch": 5.267947006264763, "grad_norm": 0.04650856927037239, "learning_rate": 0.01, "loss": 2.0169, "step": 51294 }, { "epoch": 5.268255109376605, "grad_norm": 0.06548796594142914, "learning_rate": 0.01, "loss": 1.9862, "step": 51297 }, { "epoch": 5.268563212488446, "grad_norm": 0.05420374125242233, "learning_rate": 0.01, "loss": 1.9837, "step": 51300 }, { "epoch": 5.268871315600288, "grad_norm": 0.10747869312763214, "learning_rate": 0.01, "loss": 2.0013, "step": 51303 }, { "epoch": 5.269179418712129, "grad_norm": 0.03331341966986656, "learning_rate": 0.01, "loss": 1.9997, "step": 51306 }, { "epoch": 5.2694875218239705, "grad_norm": 0.10144448280334473, "learning_rate": 0.01, "loss": 2.0065, "step": 51309 }, { "epoch": 5.269795624935812, "grad_norm": 0.08695587515830994, "learning_rate": 0.01, "loss": 2.0069, "step": 51312 }, { "epoch": 5.270103728047653, "grad_norm": 0.053817540407180786, "learning_rate": 0.01, "loss": 2.026, "step": 51315 }, { "epoch": 5.270411831159494, "grad_norm": 0.04269054904580116, "learning_rate": 0.01, "loss": 2.0195, "step": 51318 }, { "epoch": 5.2707199342713364, "grad_norm": 0.048786550760269165, "learning_rate": 0.01, "loss": 1.9877, "step": 51321 }, { "epoch": 5.271028037383178, "grad_norm": 0.0634552389383316, "learning_rate": 0.01, "loss": 2.0173, "step": 51324 }, { "epoch": 5.271336140495019, "grad_norm": 0.14295388758182526, "learning_rate": 0.01, "loss": 2.0062, "step": 51327 }, { "epoch": 5.27164424360686, "grad_norm": 0.05080576241016388, "learning_rate": 0.01, "loss": 1.9895, "step": 51330 }, { "epoch": 5.2719523467187015, "grad_norm": 0.06417543441057205, "learning_rate": 0.01, "loss": 1.9843, "step": 51333 }, { "epoch": 5.272260449830544, "grad_norm": 0.06565088778734207, "learning_rate": 0.01, "loss": 1.9925, "step": 51336 }, { "epoch": 5.272568552942385, "grad_norm": 0.028631992638111115, "learning_rate": 0.01, "loss": 1.9949, "step": 51339 }, { "epoch": 5.272876656054226, "grad_norm": 0.10242610424757004, "learning_rate": 0.01, "loss": 1.9824, "step": 51342 }, { "epoch": 5.273184759166067, "grad_norm": 0.11979396641254425, "learning_rate": 0.01, "loss": 2.0258, "step": 51345 }, { "epoch": 5.273492862277909, "grad_norm": 0.07476961612701416, "learning_rate": 0.01, "loss": 2.0282, "step": 51348 }, { "epoch": 5.273800965389751, "grad_norm": 0.05603867769241333, "learning_rate": 0.01, "loss": 1.991, "step": 51351 }, { "epoch": 5.274109068501592, "grad_norm": 0.04897085204720497, "learning_rate": 0.01, "loss": 2.0187, "step": 51354 }, { "epoch": 5.274417171613433, "grad_norm": 0.03439117968082428, "learning_rate": 0.01, "loss": 1.9989, "step": 51357 }, { "epoch": 5.274725274725275, "grad_norm": 0.08314791321754456, "learning_rate": 0.01, "loss": 2.0117, "step": 51360 }, { "epoch": 5.275033377837116, "grad_norm": 0.0685926079750061, "learning_rate": 0.01, "loss": 1.9787, "step": 51363 }, { "epoch": 5.275341480948958, "grad_norm": 0.088614821434021, "learning_rate": 0.01, "loss": 1.9949, "step": 51366 }, { "epoch": 5.275649584060799, "grad_norm": 0.03873438388109207, "learning_rate": 0.01, "loss": 2.001, "step": 51369 }, { "epoch": 5.275957687172641, "grad_norm": 0.11569857597351074, "learning_rate": 0.01, "loss": 2.0017, "step": 51372 }, { "epoch": 5.276265790284482, "grad_norm": 0.11536852270364761, "learning_rate": 0.01, "loss": 2.024, "step": 51375 }, { "epoch": 5.276573893396323, "grad_norm": 0.040931250900030136, "learning_rate": 0.01, "loss": 2.0085, "step": 51378 }, { "epoch": 5.276881996508164, "grad_norm": 0.13397638499736786, "learning_rate": 0.01, "loss": 2.0248, "step": 51381 }, { "epoch": 5.2771900996200065, "grad_norm": 0.045118771493434906, "learning_rate": 0.01, "loss": 1.9991, "step": 51384 }, { "epoch": 5.277498202731848, "grad_norm": 0.04626978933811188, "learning_rate": 0.01, "loss": 2.0161, "step": 51387 }, { "epoch": 5.277806305843689, "grad_norm": 0.03898259624838829, "learning_rate": 0.01, "loss": 1.9965, "step": 51390 }, { "epoch": 5.27811440895553, "grad_norm": 0.14309214055538177, "learning_rate": 0.01, "loss": 2.0201, "step": 51393 }, { "epoch": 5.278422512067372, "grad_norm": 0.11283177882432938, "learning_rate": 0.01, "loss": 1.986, "step": 51396 }, { "epoch": 5.278730615179214, "grad_norm": 0.07250397652387619, "learning_rate": 0.01, "loss": 2.0112, "step": 51399 }, { "epoch": 5.279038718291055, "grad_norm": 0.04467407613992691, "learning_rate": 0.01, "loss": 1.9964, "step": 51402 }, { "epoch": 5.279346821402896, "grad_norm": 0.03758449852466583, "learning_rate": 0.01, "loss": 2.0086, "step": 51405 }, { "epoch": 5.2796549245147375, "grad_norm": 0.04290134087204933, "learning_rate": 0.01, "loss": 1.9711, "step": 51408 }, { "epoch": 5.279963027626579, "grad_norm": 0.21099735796451569, "learning_rate": 0.01, "loss": 2.0002, "step": 51411 }, { "epoch": 5.28027113073842, "grad_norm": 0.10676965117454529, "learning_rate": 0.01, "loss": 2.006, "step": 51414 }, { "epoch": 5.280579233850262, "grad_norm": 0.11726178228855133, "learning_rate": 0.01, "loss": 2.0066, "step": 51417 }, { "epoch": 5.280887336962103, "grad_norm": 0.06197341904044151, "learning_rate": 0.01, "loss": 2.0134, "step": 51420 }, { "epoch": 5.281195440073945, "grad_norm": 0.04718432575464249, "learning_rate": 0.01, "loss": 2.0004, "step": 51423 }, { "epoch": 5.281503543185786, "grad_norm": 0.049280427396297455, "learning_rate": 0.01, "loss": 1.9822, "step": 51426 }, { "epoch": 5.281811646297627, "grad_norm": 0.036890413612127304, "learning_rate": 0.01, "loss": 1.9858, "step": 51429 }, { "epoch": 5.282119749409469, "grad_norm": 0.03962325677275658, "learning_rate": 0.01, "loss": 1.9882, "step": 51432 }, { "epoch": 5.282427852521311, "grad_norm": 0.03786443546414375, "learning_rate": 0.01, "loss": 1.9874, "step": 51435 }, { "epoch": 5.282735955633152, "grad_norm": 0.06800255179405212, "learning_rate": 0.01, "loss": 1.965, "step": 51438 }, { "epoch": 5.283044058744993, "grad_norm": 0.1254369616508484, "learning_rate": 0.01, "loss": 1.9755, "step": 51441 }, { "epoch": 5.283352161856834, "grad_norm": 0.10241379588842392, "learning_rate": 0.01, "loss": 1.9908, "step": 51444 }, { "epoch": 5.283660264968677, "grad_norm": 0.07036352902650833, "learning_rate": 0.01, "loss": 1.986, "step": 51447 }, { "epoch": 5.283968368080518, "grad_norm": 0.04033416137099266, "learning_rate": 0.01, "loss": 1.9962, "step": 51450 }, { "epoch": 5.284276471192359, "grad_norm": 0.03396594151854515, "learning_rate": 0.01, "loss": 1.9944, "step": 51453 }, { "epoch": 5.2845845743042, "grad_norm": 0.038950271904468536, "learning_rate": 0.01, "loss": 1.99, "step": 51456 }, { "epoch": 5.284892677416042, "grad_norm": 0.12410634011030197, "learning_rate": 0.01, "loss": 1.9932, "step": 51459 }, { "epoch": 5.285200780527884, "grad_norm": 0.08871324360370636, "learning_rate": 0.01, "loss": 2.0303, "step": 51462 }, { "epoch": 5.285508883639725, "grad_norm": 0.06051253527402878, "learning_rate": 0.01, "loss": 2.0056, "step": 51465 }, { "epoch": 5.285816986751566, "grad_norm": 0.08062247186899185, "learning_rate": 0.01, "loss": 2.0108, "step": 51468 }, { "epoch": 5.286125089863408, "grad_norm": 0.09288395196199417, "learning_rate": 0.01, "loss": 2.0187, "step": 51471 }, { "epoch": 5.286433192975249, "grad_norm": 0.11983957886695862, "learning_rate": 0.01, "loss": 1.9799, "step": 51474 }, { "epoch": 5.28674129608709, "grad_norm": 0.12828132510185242, "learning_rate": 0.01, "loss": 2.0113, "step": 51477 }, { "epoch": 5.287049399198932, "grad_norm": 0.09051026403903961, "learning_rate": 0.01, "loss": 2.0068, "step": 51480 }, { "epoch": 5.2873575023107735, "grad_norm": 0.08622095733880997, "learning_rate": 0.01, "loss": 1.9865, "step": 51483 }, { "epoch": 5.287665605422615, "grad_norm": 0.0727437287569046, "learning_rate": 0.01, "loss": 1.9938, "step": 51486 }, { "epoch": 5.287973708534456, "grad_norm": 0.052949920296669006, "learning_rate": 0.01, "loss": 1.9779, "step": 51489 }, { "epoch": 5.288281811646297, "grad_norm": 0.04807107150554657, "learning_rate": 0.01, "loss": 2.0071, "step": 51492 }, { "epoch": 5.288589914758139, "grad_norm": 0.12251218408346176, "learning_rate": 0.01, "loss": 1.9911, "step": 51495 }, { "epoch": 5.288898017869981, "grad_norm": 0.07985778152942657, "learning_rate": 0.01, "loss": 2.0055, "step": 51498 }, { "epoch": 5.289206120981822, "grad_norm": 0.0659778043627739, "learning_rate": 0.01, "loss": 1.9511, "step": 51501 }, { "epoch": 5.289514224093663, "grad_norm": 0.0646664947271347, "learning_rate": 0.01, "loss": 1.9754, "step": 51504 }, { "epoch": 5.2898223272055045, "grad_norm": 0.05297090485692024, "learning_rate": 0.01, "loss": 2.0091, "step": 51507 }, { "epoch": 5.290130430317346, "grad_norm": 0.039955638349056244, "learning_rate": 0.01, "loss": 2.0142, "step": 51510 }, { "epoch": 5.290438533429188, "grad_norm": 0.03601599857211113, "learning_rate": 0.01, "loss": 2.0147, "step": 51513 }, { "epoch": 5.290746636541029, "grad_norm": 0.03554227575659752, "learning_rate": 0.01, "loss": 1.9824, "step": 51516 }, { "epoch": 5.29105473965287, "grad_norm": 0.0429302453994751, "learning_rate": 0.01, "loss": 1.996, "step": 51519 }, { "epoch": 5.291362842764712, "grad_norm": 0.07092410326004028, "learning_rate": 0.01, "loss": 1.9969, "step": 51522 }, { "epoch": 5.291670945876553, "grad_norm": 0.09126897156238556, "learning_rate": 0.01, "loss": 2.01, "step": 51525 }, { "epoch": 5.291979048988395, "grad_norm": 0.08033829182386398, "learning_rate": 0.01, "loss": 2.0035, "step": 51528 }, { "epoch": 5.292287152100236, "grad_norm": 0.0636729821562767, "learning_rate": 0.01, "loss": 1.985, "step": 51531 }, { "epoch": 5.292595255212078, "grad_norm": 0.05228522792458534, "learning_rate": 0.01, "loss": 1.9959, "step": 51534 }, { "epoch": 5.292903358323919, "grad_norm": 0.04770840331912041, "learning_rate": 0.01, "loss": 1.9626, "step": 51537 }, { "epoch": 5.29321146143576, "grad_norm": 0.06592493504285812, "learning_rate": 0.01, "loss": 2.0068, "step": 51540 }, { "epoch": 5.293519564547602, "grad_norm": 0.043730318546295166, "learning_rate": 0.01, "loss": 2.0128, "step": 51543 }, { "epoch": 5.293827667659444, "grad_norm": 0.11756035685539246, "learning_rate": 0.01, "loss": 2.015, "step": 51546 }, { "epoch": 5.294135770771285, "grad_norm": 0.05374009907245636, "learning_rate": 0.01, "loss": 1.978, "step": 51549 }, { "epoch": 5.294443873883126, "grad_norm": 0.03395754098892212, "learning_rate": 0.01, "loss": 1.992, "step": 51552 }, { "epoch": 5.294751976994967, "grad_norm": 0.04565683752298355, "learning_rate": 0.01, "loss": 1.9933, "step": 51555 }, { "epoch": 5.2950600801068095, "grad_norm": 0.06022532284259796, "learning_rate": 0.01, "loss": 2.0165, "step": 51558 }, { "epoch": 5.295368183218651, "grad_norm": 0.03321307152509689, "learning_rate": 0.01, "loss": 1.9962, "step": 51561 }, { "epoch": 5.295676286330492, "grad_norm": 0.09450728446245193, "learning_rate": 0.01, "loss": 2.0153, "step": 51564 }, { "epoch": 5.295984389442333, "grad_norm": 0.07322457432746887, "learning_rate": 0.01, "loss": 2.0003, "step": 51567 }, { "epoch": 5.296292492554175, "grad_norm": 0.07152074575424194, "learning_rate": 0.01, "loss": 1.9916, "step": 51570 }, { "epoch": 5.296600595666016, "grad_norm": 0.09634838998317719, "learning_rate": 0.01, "loss": 1.9888, "step": 51573 }, { "epoch": 5.296908698777858, "grad_norm": 0.055339280515909195, "learning_rate": 0.01, "loss": 1.9801, "step": 51576 }, { "epoch": 5.297216801889699, "grad_norm": 0.03655093163251877, "learning_rate": 0.01, "loss": 1.9844, "step": 51579 }, { "epoch": 5.2975249050015405, "grad_norm": 0.04170520231127739, "learning_rate": 0.01, "loss": 1.989, "step": 51582 }, { "epoch": 5.297833008113382, "grad_norm": 0.04089641198515892, "learning_rate": 0.01, "loss": 1.9996, "step": 51585 }, { "epoch": 5.298141111225223, "grad_norm": 0.03209071606397629, "learning_rate": 0.01, "loss": 1.9937, "step": 51588 }, { "epoch": 5.298449214337065, "grad_norm": 0.040264058858156204, "learning_rate": 0.01, "loss": 1.9813, "step": 51591 }, { "epoch": 5.298757317448906, "grad_norm": 0.040750179439783096, "learning_rate": 0.01, "loss": 1.9884, "step": 51594 }, { "epoch": 5.299065420560748, "grad_norm": 0.09608472883701324, "learning_rate": 0.01, "loss": 2.025, "step": 51597 }, { "epoch": 5.299373523672589, "grad_norm": 0.0965270921587944, "learning_rate": 0.01, "loss": 2.0181, "step": 51600 }, { "epoch": 5.29968162678443, "grad_norm": 0.07110472768545151, "learning_rate": 0.01, "loss": 1.9925, "step": 51603 }, { "epoch": 5.2999897298962715, "grad_norm": 0.08404947817325592, "learning_rate": 0.01, "loss": 2.0145, "step": 51606 }, { "epoch": 5.300297833008114, "grad_norm": 0.04731042683124542, "learning_rate": 0.01, "loss": 1.9832, "step": 51609 }, { "epoch": 5.300605936119955, "grad_norm": 0.045290857553482056, "learning_rate": 0.01, "loss": 2.0133, "step": 51612 }, { "epoch": 5.300914039231796, "grad_norm": 0.04302288219332695, "learning_rate": 0.01, "loss": 2.016, "step": 51615 }, { "epoch": 5.301222142343637, "grad_norm": 0.04258449375629425, "learning_rate": 0.01, "loss": 2.0119, "step": 51618 }, { "epoch": 5.301530245455479, "grad_norm": 0.04786079004406929, "learning_rate": 0.01, "loss": 2.0067, "step": 51621 }, { "epoch": 5.301838348567321, "grad_norm": 0.14586381614208221, "learning_rate": 0.01, "loss": 1.994, "step": 51624 }, { "epoch": 5.302146451679162, "grad_norm": 0.06271131336688995, "learning_rate": 0.01, "loss": 1.9983, "step": 51627 }, { "epoch": 5.302454554791003, "grad_norm": 0.05278899893164635, "learning_rate": 0.01, "loss": 1.9956, "step": 51630 }, { "epoch": 5.302762657902845, "grad_norm": 0.04671400785446167, "learning_rate": 0.01, "loss": 1.9911, "step": 51633 }, { "epoch": 5.303070761014686, "grad_norm": 0.07106892019510269, "learning_rate": 0.01, "loss": 2.0101, "step": 51636 }, { "epoch": 5.303378864126528, "grad_norm": 0.031581420451402664, "learning_rate": 0.01, "loss": 2.0088, "step": 51639 }, { "epoch": 5.303686967238369, "grad_norm": 0.04924008622765541, "learning_rate": 0.01, "loss": 1.9775, "step": 51642 }, { "epoch": 5.303995070350211, "grad_norm": 0.11483835428953171, "learning_rate": 0.01, "loss": 1.9781, "step": 51645 }, { "epoch": 5.304303173462052, "grad_norm": 0.07446176558732986, "learning_rate": 0.01, "loss": 2.0171, "step": 51648 }, { "epoch": 5.304611276573893, "grad_norm": 0.051111143082380295, "learning_rate": 0.01, "loss": 2.0008, "step": 51651 }, { "epoch": 5.304919379685735, "grad_norm": 0.040741320699453354, "learning_rate": 0.01, "loss": 1.9935, "step": 51654 }, { "epoch": 5.3052274827975765, "grad_norm": 0.06887371838092804, "learning_rate": 0.01, "loss": 2.0131, "step": 51657 }, { "epoch": 5.305535585909418, "grad_norm": 0.09351345151662827, "learning_rate": 0.01, "loss": 2.0056, "step": 51660 }, { "epoch": 5.305843689021259, "grad_norm": 0.0568513348698616, "learning_rate": 0.01, "loss": 1.9897, "step": 51663 }, { "epoch": 5.3061517921331, "grad_norm": 0.05117219313979149, "learning_rate": 0.01, "loss": 2.0016, "step": 51666 }, { "epoch": 5.3064598952449415, "grad_norm": 0.04374157264828682, "learning_rate": 0.01, "loss": 1.9708, "step": 51669 }, { "epoch": 5.306767998356784, "grad_norm": 0.04442832991480827, "learning_rate": 0.01, "loss": 2.0164, "step": 51672 }, { "epoch": 5.307076101468625, "grad_norm": 0.0560653991997242, "learning_rate": 0.01, "loss": 2.0034, "step": 51675 }, { "epoch": 5.307384204580466, "grad_norm": 0.11248762905597687, "learning_rate": 0.01, "loss": 2.0026, "step": 51678 }, { "epoch": 5.3076923076923075, "grad_norm": 0.05383045971393585, "learning_rate": 0.01, "loss": 2.0145, "step": 51681 }, { "epoch": 5.308000410804149, "grad_norm": 0.11476562172174454, "learning_rate": 0.01, "loss": 1.9891, "step": 51684 }, { "epoch": 5.308308513915991, "grad_norm": 0.04139787703752518, "learning_rate": 0.01, "loss": 2.0037, "step": 51687 }, { "epoch": 5.308616617027832, "grad_norm": 0.03532332554459572, "learning_rate": 0.01, "loss": 1.9938, "step": 51690 }, { "epoch": 5.308924720139673, "grad_norm": 0.04809233918786049, "learning_rate": 0.01, "loss": 1.9894, "step": 51693 }, { "epoch": 5.309232823251515, "grad_norm": 0.0827130451798439, "learning_rate": 0.01, "loss": 2.0009, "step": 51696 }, { "epoch": 5.309540926363356, "grad_norm": 0.05010407045483589, "learning_rate": 0.01, "loss": 2.0109, "step": 51699 }, { "epoch": 5.309849029475198, "grad_norm": 0.04733329638838768, "learning_rate": 0.01, "loss": 2.0, "step": 51702 }, { "epoch": 5.310157132587039, "grad_norm": 0.03515147417783737, "learning_rate": 0.01, "loss": 2.0103, "step": 51705 }, { "epoch": 5.310465235698881, "grad_norm": 0.12705236673355103, "learning_rate": 0.01, "loss": 2.0031, "step": 51708 }, { "epoch": 5.310773338810722, "grad_norm": 0.04811834171414375, "learning_rate": 0.01, "loss": 1.9934, "step": 51711 }, { "epoch": 5.311081441922563, "grad_norm": 0.04282277077436447, "learning_rate": 0.01, "loss": 2.012, "step": 51714 }, { "epoch": 5.311389545034405, "grad_norm": 0.039072856307029724, "learning_rate": 0.01, "loss": 1.9754, "step": 51717 }, { "epoch": 5.311697648146247, "grad_norm": 0.06571515649557114, "learning_rate": 0.01, "loss": 2.0007, "step": 51720 }, { "epoch": 5.312005751258088, "grad_norm": 0.10408081859350204, "learning_rate": 0.01, "loss": 2.0091, "step": 51723 }, { "epoch": 5.312313854369929, "grad_norm": 0.12248563021421432, "learning_rate": 0.01, "loss": 1.9859, "step": 51726 }, { "epoch": 5.31262195748177, "grad_norm": 0.1178579330444336, "learning_rate": 0.01, "loss": 2.0148, "step": 51729 }, { "epoch": 5.312930060593612, "grad_norm": 0.042379625141620636, "learning_rate": 0.01, "loss": 1.9786, "step": 51732 }, { "epoch": 5.313238163705454, "grad_norm": 0.05351175367832184, "learning_rate": 0.01, "loss": 1.9822, "step": 51735 }, { "epoch": 5.313546266817295, "grad_norm": 0.03492636978626251, "learning_rate": 0.01, "loss": 1.9868, "step": 51738 }, { "epoch": 5.313854369929136, "grad_norm": 0.037354279309511185, "learning_rate": 0.01, "loss": 2.0128, "step": 51741 }, { "epoch": 5.3141624730409776, "grad_norm": 0.0453021377325058, "learning_rate": 0.01, "loss": 1.9709, "step": 51744 }, { "epoch": 5.314470576152819, "grad_norm": 0.040230412036180496, "learning_rate": 0.01, "loss": 2.007, "step": 51747 }, { "epoch": 5.314778679264661, "grad_norm": 0.1004524752497673, "learning_rate": 0.01, "loss": 2.0159, "step": 51750 }, { "epoch": 5.315086782376502, "grad_norm": 0.08435871452093124, "learning_rate": 0.01, "loss": 1.9936, "step": 51753 }, { "epoch": 5.3153948854883435, "grad_norm": 0.04728049412369728, "learning_rate": 0.01, "loss": 1.9695, "step": 51756 }, { "epoch": 5.315702988600185, "grad_norm": 0.043835487216711044, "learning_rate": 0.01, "loss": 1.9875, "step": 51759 }, { "epoch": 5.316011091712026, "grad_norm": 0.03761078417301178, "learning_rate": 0.01, "loss": 1.9879, "step": 51762 }, { "epoch": 5.316319194823867, "grad_norm": 0.11318635195493698, "learning_rate": 0.01, "loss": 2.0072, "step": 51765 }, { "epoch": 5.316627297935709, "grad_norm": 0.06980354338884354, "learning_rate": 0.01, "loss": 1.9965, "step": 51768 }, { "epoch": 5.316935401047551, "grad_norm": 0.09948752075433731, "learning_rate": 0.01, "loss": 1.9912, "step": 51771 }, { "epoch": 5.317243504159392, "grad_norm": 0.07391460239887238, "learning_rate": 0.01, "loss": 2.0158, "step": 51774 }, { "epoch": 5.317551607271233, "grad_norm": 0.06508208811283112, "learning_rate": 0.01, "loss": 2.0109, "step": 51777 }, { "epoch": 5.3178597103830745, "grad_norm": 0.0692305937409401, "learning_rate": 0.01, "loss": 1.9963, "step": 51780 }, { "epoch": 5.318167813494917, "grad_norm": 0.07129498571157455, "learning_rate": 0.01, "loss": 2.0188, "step": 51783 }, { "epoch": 5.318475916606758, "grad_norm": 0.05130002647638321, "learning_rate": 0.01, "loss": 2.0042, "step": 51786 }, { "epoch": 5.318784019718599, "grad_norm": 0.07163992524147034, "learning_rate": 0.01, "loss": 1.9742, "step": 51789 }, { "epoch": 5.31909212283044, "grad_norm": 0.09687676280736923, "learning_rate": 0.01, "loss": 2.0073, "step": 51792 }, { "epoch": 5.319400225942282, "grad_norm": 0.08195064216852188, "learning_rate": 0.01, "loss": 1.9981, "step": 51795 }, { "epoch": 5.319708329054124, "grad_norm": 0.05641806125640869, "learning_rate": 0.01, "loss": 2.0018, "step": 51798 }, { "epoch": 5.320016432165965, "grad_norm": 0.06832972913980484, "learning_rate": 0.01, "loss": 1.9688, "step": 51801 }, { "epoch": 5.320324535277806, "grad_norm": 0.06402765214443207, "learning_rate": 0.01, "loss": 1.9935, "step": 51804 }, { "epoch": 5.320632638389648, "grad_norm": 0.045856647193431854, "learning_rate": 0.01, "loss": 1.9857, "step": 51807 }, { "epoch": 5.320940741501489, "grad_norm": 0.10627341270446777, "learning_rate": 0.01, "loss": 2.0, "step": 51810 }, { "epoch": 5.321248844613331, "grad_norm": 0.07530563324689865, "learning_rate": 0.01, "loss": 1.9766, "step": 51813 }, { "epoch": 5.321556947725172, "grad_norm": 0.06049705296754837, "learning_rate": 0.01, "loss": 2.0227, "step": 51816 }, { "epoch": 5.3218650508370136, "grad_norm": 0.05264494568109512, "learning_rate": 0.01, "loss": 1.9785, "step": 51819 }, { "epoch": 5.322173153948855, "grad_norm": 0.03524477034807205, "learning_rate": 0.01, "loss": 2.0069, "step": 51822 }, { "epoch": 5.322481257060696, "grad_norm": 0.03644990921020508, "learning_rate": 0.01, "loss": 2.0254, "step": 51825 }, { "epoch": 5.322789360172537, "grad_norm": 0.10562828928232193, "learning_rate": 0.01, "loss": 1.9894, "step": 51828 }, { "epoch": 5.3230974632843795, "grad_norm": 0.06203228980302811, "learning_rate": 0.01, "loss": 1.9915, "step": 51831 }, { "epoch": 5.323405566396221, "grad_norm": 0.06565918028354645, "learning_rate": 0.01, "loss": 1.9969, "step": 51834 }, { "epoch": 5.323713669508062, "grad_norm": 0.03533428907394409, "learning_rate": 0.01, "loss": 1.9765, "step": 51837 }, { "epoch": 5.324021772619903, "grad_norm": 0.054743893444538116, "learning_rate": 0.01, "loss": 1.9843, "step": 51840 }, { "epoch": 5.3243298757317445, "grad_norm": 0.04253426194190979, "learning_rate": 0.01, "loss": 2.0179, "step": 51843 }, { "epoch": 5.324637978843587, "grad_norm": 0.05230151116847992, "learning_rate": 0.01, "loss": 2.0335, "step": 51846 }, { "epoch": 5.324946081955428, "grad_norm": 0.08154330402612686, "learning_rate": 0.01, "loss": 1.972, "step": 51849 }, { "epoch": 5.325254185067269, "grad_norm": 0.0635039433836937, "learning_rate": 0.01, "loss": 2.0136, "step": 51852 }, { "epoch": 5.3255622881791105, "grad_norm": 0.07905185222625732, "learning_rate": 0.01, "loss": 1.9801, "step": 51855 }, { "epoch": 5.325870391290952, "grad_norm": 0.04034522920846939, "learning_rate": 0.01, "loss": 1.9908, "step": 51858 }, { "epoch": 5.326178494402793, "grad_norm": 0.045876212418079376, "learning_rate": 0.01, "loss": 2.0189, "step": 51861 }, { "epoch": 5.326486597514635, "grad_norm": 0.03529779985547066, "learning_rate": 0.01, "loss": 2.0107, "step": 51864 }, { "epoch": 5.326794700626476, "grad_norm": 0.04534045606851578, "learning_rate": 0.01, "loss": 1.9734, "step": 51867 }, { "epoch": 5.327102803738318, "grad_norm": 0.126200869679451, "learning_rate": 0.01, "loss": 1.9621, "step": 51870 }, { "epoch": 5.327410906850159, "grad_norm": 0.052106015384197235, "learning_rate": 0.01, "loss": 1.9906, "step": 51873 }, { "epoch": 5.327719009962, "grad_norm": 0.048572082072496414, "learning_rate": 0.01, "loss": 2.0044, "step": 51876 }, { "epoch": 5.328027113073842, "grad_norm": 0.057247593998909, "learning_rate": 0.01, "loss": 1.9794, "step": 51879 }, { "epoch": 5.328335216185684, "grad_norm": 0.05806770548224449, "learning_rate": 0.01, "loss": 1.989, "step": 51882 }, { "epoch": 5.328643319297525, "grad_norm": 0.13235436379909515, "learning_rate": 0.01, "loss": 2.0009, "step": 51885 }, { "epoch": 5.328951422409366, "grad_norm": 0.05209680646657944, "learning_rate": 0.01, "loss": 2.0027, "step": 51888 }, { "epoch": 5.329259525521207, "grad_norm": 0.04446623846888542, "learning_rate": 0.01, "loss": 2.0015, "step": 51891 }, { "epoch": 5.32956762863305, "grad_norm": 0.0674331933259964, "learning_rate": 0.01, "loss": 1.99, "step": 51894 }, { "epoch": 5.329875731744891, "grad_norm": 0.11401736736297607, "learning_rate": 0.01, "loss": 1.9766, "step": 51897 }, { "epoch": 5.330183834856732, "grad_norm": 0.08108768612146378, "learning_rate": 0.01, "loss": 1.9956, "step": 51900 }, { "epoch": 5.330491937968573, "grad_norm": 0.06601007282733917, "learning_rate": 0.01, "loss": 1.9955, "step": 51903 }, { "epoch": 5.330800041080415, "grad_norm": 0.05304531753063202, "learning_rate": 0.01, "loss": 1.9967, "step": 51906 }, { "epoch": 5.331108144192257, "grad_norm": 0.04129822179675102, "learning_rate": 0.01, "loss": 2.0316, "step": 51909 }, { "epoch": 5.331416247304098, "grad_norm": 0.06261187046766281, "learning_rate": 0.01, "loss": 1.9957, "step": 51912 }, { "epoch": 5.331724350415939, "grad_norm": 0.06908193230628967, "learning_rate": 0.01, "loss": 1.9841, "step": 51915 }, { "epoch": 5.3320324535277805, "grad_norm": 0.10063374042510986, "learning_rate": 0.01, "loss": 2.0091, "step": 51918 }, { "epoch": 5.332340556639622, "grad_norm": 0.11441531032323837, "learning_rate": 0.01, "loss": 1.9971, "step": 51921 }, { "epoch": 5.332648659751463, "grad_norm": 0.058185406029224396, "learning_rate": 0.01, "loss": 2.0023, "step": 51924 }, { "epoch": 5.332956762863305, "grad_norm": 0.041803937405347824, "learning_rate": 0.01, "loss": 2.0083, "step": 51927 }, { "epoch": 5.3332648659751465, "grad_norm": 0.12652553617954254, "learning_rate": 0.01, "loss": 2.0096, "step": 51930 }, { "epoch": 5.333572969086988, "grad_norm": 0.04237103462219238, "learning_rate": 0.01, "loss": 1.9789, "step": 51933 }, { "epoch": 5.333881072198829, "grad_norm": 0.052531465888023376, "learning_rate": 0.01, "loss": 2.0056, "step": 51936 }, { "epoch": 5.33418917531067, "grad_norm": 0.0361291877925396, "learning_rate": 0.01, "loss": 2.0085, "step": 51939 }, { "epoch": 5.334497278422512, "grad_norm": 0.17653311789035797, "learning_rate": 0.01, "loss": 2.0144, "step": 51942 }, { "epoch": 5.334805381534354, "grad_norm": 0.04208230972290039, "learning_rate": 0.01, "loss": 1.9929, "step": 51945 }, { "epoch": 5.335113484646195, "grad_norm": 0.05036168918013573, "learning_rate": 0.01, "loss": 1.9819, "step": 51948 }, { "epoch": 5.335421587758036, "grad_norm": 0.032406702637672424, "learning_rate": 0.01, "loss": 1.9909, "step": 51951 }, { "epoch": 5.3357296908698775, "grad_norm": 0.03707049414515495, "learning_rate": 0.01, "loss": 1.9886, "step": 51954 }, { "epoch": 5.33603779398172, "grad_norm": 0.10224796086549759, "learning_rate": 0.01, "loss": 2.0212, "step": 51957 }, { "epoch": 5.336345897093561, "grad_norm": 0.07522287964820862, "learning_rate": 0.01, "loss": 2.0236, "step": 51960 }, { "epoch": 5.336654000205402, "grad_norm": 0.08081628382205963, "learning_rate": 0.01, "loss": 2.0055, "step": 51963 }, { "epoch": 5.336962103317243, "grad_norm": 0.03779719024896622, "learning_rate": 0.01, "loss": 2.0006, "step": 51966 }, { "epoch": 5.337270206429085, "grad_norm": 0.05938563495874405, "learning_rate": 0.01, "loss": 1.9933, "step": 51969 }, { "epoch": 5.337578309540927, "grad_norm": 0.058824703097343445, "learning_rate": 0.01, "loss": 1.9977, "step": 51972 }, { "epoch": 5.337886412652768, "grad_norm": 0.09814310073852539, "learning_rate": 0.01, "loss": 2.004, "step": 51975 }, { "epoch": 5.338194515764609, "grad_norm": 0.038881540298461914, "learning_rate": 0.01, "loss": 1.9776, "step": 51978 }, { "epoch": 5.338502618876451, "grad_norm": 0.06858659535646439, "learning_rate": 0.01, "loss": 1.9769, "step": 51981 }, { "epoch": 5.338810721988292, "grad_norm": 0.05999214947223663, "learning_rate": 0.01, "loss": 2.0051, "step": 51984 }, { "epoch": 5.339118825100133, "grad_norm": 0.13660067319869995, "learning_rate": 0.01, "loss": 2.0003, "step": 51987 }, { "epoch": 5.339426928211975, "grad_norm": 0.039648644626140594, "learning_rate": 0.01, "loss": 2.0287, "step": 51990 }, { "epoch": 5.3397350313238165, "grad_norm": 0.04098828509449959, "learning_rate": 0.01, "loss": 2.0028, "step": 51993 }, { "epoch": 5.340043134435658, "grad_norm": 0.07291056960821152, "learning_rate": 0.01, "loss": 1.9797, "step": 51996 }, { "epoch": 5.340351237547499, "grad_norm": 0.07260385155677795, "learning_rate": 0.01, "loss": 2.0081, "step": 51999 }, { "epoch": 5.34065934065934, "grad_norm": 0.08830993622541428, "learning_rate": 0.01, "loss": 1.9863, "step": 52002 }, { "epoch": 5.3409674437711825, "grad_norm": 0.053973227739334106, "learning_rate": 0.01, "loss": 1.9924, "step": 52005 }, { "epoch": 5.341275546883024, "grad_norm": 0.06421860307455063, "learning_rate": 0.01, "loss": 1.9937, "step": 52008 }, { "epoch": 5.341583649994865, "grad_norm": 0.06905265897512436, "learning_rate": 0.01, "loss": 2.0258, "step": 52011 }, { "epoch": 5.341891753106706, "grad_norm": 0.1201087012887001, "learning_rate": 0.01, "loss": 2.0152, "step": 52014 }, { "epoch": 5.3421998562185475, "grad_norm": 0.0491718128323555, "learning_rate": 0.01, "loss": 1.9998, "step": 52017 }, { "epoch": 5.342507959330389, "grad_norm": 0.04123944044113159, "learning_rate": 0.01, "loss": 1.9924, "step": 52020 }, { "epoch": 5.342816062442231, "grad_norm": 0.059747952967882156, "learning_rate": 0.01, "loss": 1.9803, "step": 52023 }, { "epoch": 5.343124165554072, "grad_norm": 0.07639419287443161, "learning_rate": 0.01, "loss": 2.0135, "step": 52026 }, { "epoch": 5.3434322686659135, "grad_norm": 0.04553646221756935, "learning_rate": 0.01, "loss": 1.9915, "step": 52029 }, { "epoch": 5.343740371777755, "grad_norm": 0.05324438959360123, "learning_rate": 0.01, "loss": 2.011, "step": 52032 }, { "epoch": 5.344048474889596, "grad_norm": 0.07225726544857025, "learning_rate": 0.01, "loss": 1.988, "step": 52035 }, { "epoch": 5.344356578001438, "grad_norm": 0.10456754267215729, "learning_rate": 0.01, "loss": 2.0171, "step": 52038 }, { "epoch": 5.344664681113279, "grad_norm": 0.06349442899227142, "learning_rate": 0.01, "loss": 2.0006, "step": 52041 }, { "epoch": 5.344972784225121, "grad_norm": 0.08539248257875443, "learning_rate": 0.01, "loss": 1.9894, "step": 52044 }, { "epoch": 5.345280887336962, "grad_norm": 0.04877861216664314, "learning_rate": 0.01, "loss": 1.9805, "step": 52047 }, { "epoch": 5.345588990448803, "grad_norm": 0.0842355415225029, "learning_rate": 0.01, "loss": 2.0135, "step": 52050 }, { "epoch": 5.345897093560645, "grad_norm": 0.03320346772670746, "learning_rate": 0.01, "loss": 2.0038, "step": 52053 }, { "epoch": 5.346205196672487, "grad_norm": 0.09604234993457794, "learning_rate": 0.01, "loss": 2.0152, "step": 52056 }, { "epoch": 5.346513299784328, "grad_norm": 0.04435954615473747, "learning_rate": 0.01, "loss": 1.9884, "step": 52059 }, { "epoch": 5.346821402896169, "grad_norm": 0.07951738685369492, "learning_rate": 0.01, "loss": 1.9918, "step": 52062 }, { "epoch": 5.34712950600801, "grad_norm": 0.05883636698126793, "learning_rate": 0.01, "loss": 2.0099, "step": 52065 }, { "epoch": 5.3474376091198526, "grad_norm": 0.08984318375587463, "learning_rate": 0.01, "loss": 1.98, "step": 52068 }, { "epoch": 5.347745712231694, "grad_norm": 0.11542551219463348, "learning_rate": 0.01, "loss": 1.9835, "step": 52071 }, { "epoch": 5.348053815343535, "grad_norm": 0.07631759345531464, "learning_rate": 0.01, "loss": 2.0099, "step": 52074 }, { "epoch": 5.348361918455376, "grad_norm": 0.05458938702940941, "learning_rate": 0.01, "loss": 1.9866, "step": 52077 }, { "epoch": 5.348670021567218, "grad_norm": 0.03696439042687416, "learning_rate": 0.01, "loss": 2.0131, "step": 52080 }, { "epoch": 5.348978124679059, "grad_norm": 0.05871397256851196, "learning_rate": 0.01, "loss": 1.974, "step": 52083 }, { "epoch": 5.349286227790901, "grad_norm": 0.046202853322029114, "learning_rate": 0.01, "loss": 2.0081, "step": 52086 }, { "epoch": 5.349594330902742, "grad_norm": 0.035825759172439575, "learning_rate": 0.01, "loss": 1.9752, "step": 52089 }, { "epoch": 5.3499024340145835, "grad_norm": 0.12834282219409943, "learning_rate": 0.01, "loss": 1.9967, "step": 52092 }, { "epoch": 5.350210537126425, "grad_norm": 0.05964813381433487, "learning_rate": 0.01, "loss": 2.0028, "step": 52095 }, { "epoch": 5.350518640238266, "grad_norm": 0.0718245729804039, "learning_rate": 0.01, "loss": 2.0141, "step": 52098 }, { "epoch": 5.350826743350108, "grad_norm": 0.043250247836112976, "learning_rate": 0.01, "loss": 1.9815, "step": 52101 }, { "epoch": 5.3511348464619495, "grad_norm": 0.0491093210875988, "learning_rate": 0.01, "loss": 2.0078, "step": 52104 }, { "epoch": 5.351442949573791, "grad_norm": 0.06597597897052765, "learning_rate": 0.01, "loss": 1.9824, "step": 52107 }, { "epoch": 5.351751052685632, "grad_norm": 0.1260969340801239, "learning_rate": 0.01, "loss": 1.9889, "step": 52110 }, { "epoch": 5.352059155797473, "grad_norm": 0.04004066437482834, "learning_rate": 0.01, "loss": 2.0006, "step": 52113 }, { "epoch": 5.3523672589093145, "grad_norm": 0.03897303342819214, "learning_rate": 0.01, "loss": 1.9727, "step": 52116 }, { "epoch": 5.352675362021157, "grad_norm": 0.05720753222703934, "learning_rate": 0.01, "loss": 1.9941, "step": 52119 }, { "epoch": 5.352983465132998, "grad_norm": 0.09778839349746704, "learning_rate": 0.01, "loss": 1.9913, "step": 52122 }, { "epoch": 5.353291568244839, "grad_norm": 0.10825862735509872, "learning_rate": 0.01, "loss": 1.986, "step": 52125 }, { "epoch": 5.3535996713566805, "grad_norm": 0.06417248398065567, "learning_rate": 0.01, "loss": 1.9936, "step": 52128 }, { "epoch": 5.353907774468522, "grad_norm": 0.03844156488776207, "learning_rate": 0.01, "loss": 1.9876, "step": 52131 }, { "epoch": 5.354215877580364, "grad_norm": 0.05346846207976341, "learning_rate": 0.01, "loss": 2.0165, "step": 52134 }, { "epoch": 5.354523980692205, "grad_norm": 0.07128198444843292, "learning_rate": 0.01, "loss": 2.0159, "step": 52137 }, { "epoch": 5.354832083804046, "grad_norm": 0.03174986317753792, "learning_rate": 0.01, "loss": 1.9946, "step": 52140 }, { "epoch": 5.355140186915888, "grad_norm": 0.044908635318279266, "learning_rate": 0.01, "loss": 1.9926, "step": 52143 }, { "epoch": 5.355448290027729, "grad_norm": 0.14737063646316528, "learning_rate": 0.01, "loss": 2.0202, "step": 52146 }, { "epoch": 5.355756393139571, "grad_norm": 0.08280967175960541, "learning_rate": 0.01, "loss": 2.0071, "step": 52149 }, { "epoch": 5.356064496251412, "grad_norm": 0.037819184362888336, "learning_rate": 0.01, "loss": 1.9795, "step": 52152 }, { "epoch": 5.356372599363254, "grad_norm": 0.03838292881846428, "learning_rate": 0.01, "loss": 1.9886, "step": 52155 }, { "epoch": 5.356680702475095, "grad_norm": 0.04593195393681526, "learning_rate": 0.01, "loss": 1.9731, "step": 52158 }, { "epoch": 5.356988805586936, "grad_norm": 0.12287592142820358, "learning_rate": 0.01, "loss": 1.9913, "step": 52161 }, { "epoch": 5.357296908698778, "grad_norm": 0.05081895366311073, "learning_rate": 0.01, "loss": 1.9746, "step": 52164 }, { "epoch": 5.3576050118106195, "grad_norm": 0.15700340270996094, "learning_rate": 0.01, "loss": 2.0194, "step": 52167 }, { "epoch": 5.357913114922461, "grad_norm": 0.049683380872011185, "learning_rate": 0.01, "loss": 2.003, "step": 52170 }, { "epoch": 5.358221218034302, "grad_norm": 0.03398337587714195, "learning_rate": 0.01, "loss": 2.0134, "step": 52173 }, { "epoch": 5.358529321146143, "grad_norm": 0.04002157971262932, "learning_rate": 0.01, "loss": 1.9572, "step": 52176 }, { "epoch": 5.358837424257985, "grad_norm": 0.037759970873594284, "learning_rate": 0.01, "loss": 2.0035, "step": 52179 }, { "epoch": 5.359145527369827, "grad_norm": 0.04797700420022011, "learning_rate": 0.01, "loss": 1.9837, "step": 52182 }, { "epoch": 5.359453630481668, "grad_norm": 0.06852797418832779, "learning_rate": 0.01, "loss": 2.0288, "step": 52185 }, { "epoch": 5.359761733593509, "grad_norm": 0.04743816703557968, "learning_rate": 0.01, "loss": 1.9938, "step": 52188 }, { "epoch": 5.3600698367053505, "grad_norm": 0.05316634103655815, "learning_rate": 0.01, "loss": 2.0035, "step": 52191 }, { "epoch": 5.360377939817192, "grad_norm": 0.07262757420539856, "learning_rate": 0.01, "loss": 1.992, "step": 52194 }, { "epoch": 5.360686042929034, "grad_norm": 0.13863062858581543, "learning_rate": 0.01, "loss": 2.0122, "step": 52197 }, { "epoch": 5.360994146040875, "grad_norm": 0.0368843711912632, "learning_rate": 0.01, "loss": 1.9797, "step": 52200 }, { "epoch": 5.3613022491527165, "grad_norm": 0.05429408326745033, "learning_rate": 0.01, "loss": 1.9711, "step": 52203 }, { "epoch": 5.361610352264558, "grad_norm": 0.04265204817056656, "learning_rate": 0.01, "loss": 2.0074, "step": 52206 }, { "epoch": 5.361918455376399, "grad_norm": 0.04122883826494217, "learning_rate": 0.01, "loss": 1.9974, "step": 52209 }, { "epoch": 5.362226558488241, "grad_norm": 0.04695272073149681, "learning_rate": 0.01, "loss": 1.9856, "step": 52212 }, { "epoch": 5.362534661600082, "grad_norm": 0.04153517633676529, "learning_rate": 0.01, "loss": 2.0172, "step": 52215 }, { "epoch": 5.362842764711924, "grad_norm": 0.09016060084104538, "learning_rate": 0.01, "loss": 1.9984, "step": 52218 }, { "epoch": 5.363150867823765, "grad_norm": 0.06228369474411011, "learning_rate": 0.01, "loss": 1.9792, "step": 52221 }, { "epoch": 5.363458970935606, "grad_norm": 0.09304657578468323, "learning_rate": 0.01, "loss": 2.0058, "step": 52224 }, { "epoch": 5.363767074047448, "grad_norm": 0.07483693957328796, "learning_rate": 0.01, "loss": 1.9941, "step": 52227 }, { "epoch": 5.36407517715929, "grad_norm": 0.08632933348417282, "learning_rate": 0.01, "loss": 2.0113, "step": 52230 }, { "epoch": 5.364383280271131, "grad_norm": 0.08443045616149902, "learning_rate": 0.01, "loss": 2.0161, "step": 52233 }, { "epoch": 5.364691383382972, "grad_norm": 0.05415327101945877, "learning_rate": 0.01, "loss": 2.0251, "step": 52236 }, { "epoch": 5.364999486494813, "grad_norm": 0.048056915402412415, "learning_rate": 0.01, "loss": 1.9952, "step": 52239 }, { "epoch": 5.365307589606655, "grad_norm": 0.03430894389748573, "learning_rate": 0.01, "loss": 1.9957, "step": 52242 }, { "epoch": 5.365615692718497, "grad_norm": 0.10147974640130997, "learning_rate": 0.01, "loss": 2.0247, "step": 52245 }, { "epoch": 5.365923795830338, "grad_norm": 0.09672293066978455, "learning_rate": 0.01, "loss": 1.9989, "step": 52248 }, { "epoch": 5.366231898942179, "grad_norm": 0.08904405683279037, "learning_rate": 0.01, "loss": 2.0167, "step": 52251 }, { "epoch": 5.366540002054021, "grad_norm": 0.05163189023733139, "learning_rate": 0.01, "loss": 1.9928, "step": 52254 }, { "epoch": 5.366848105165862, "grad_norm": 0.03694668784737587, "learning_rate": 0.01, "loss": 1.9909, "step": 52257 }, { "epoch": 5.367156208277704, "grad_norm": 0.036716192960739136, "learning_rate": 0.01, "loss": 2.004, "step": 52260 }, { "epoch": 5.367464311389545, "grad_norm": 0.046369098126888275, "learning_rate": 0.01, "loss": 2.0128, "step": 52263 }, { "epoch": 5.3677724145013865, "grad_norm": 0.06647578626871109, "learning_rate": 0.01, "loss": 1.9564, "step": 52266 }, { "epoch": 5.368080517613228, "grad_norm": 0.11546402424573898, "learning_rate": 0.01, "loss": 2.0125, "step": 52269 }, { "epoch": 5.368388620725069, "grad_norm": 0.08381645381450653, "learning_rate": 0.01, "loss": 1.9736, "step": 52272 }, { "epoch": 5.36869672383691, "grad_norm": 0.09374178946018219, "learning_rate": 0.01, "loss": 2.0108, "step": 52275 }, { "epoch": 5.3690048269487525, "grad_norm": 0.13528801500797272, "learning_rate": 0.01, "loss": 1.9889, "step": 52278 }, { "epoch": 5.369312930060594, "grad_norm": 0.09537555277347565, "learning_rate": 0.01, "loss": 1.9928, "step": 52281 }, { "epoch": 5.369621033172435, "grad_norm": 0.06615101546049118, "learning_rate": 0.01, "loss": 2.0193, "step": 52284 }, { "epoch": 5.369929136284276, "grad_norm": 0.03951498121023178, "learning_rate": 0.01, "loss": 1.9919, "step": 52287 }, { "epoch": 5.3702372393961175, "grad_norm": 0.04514235630631447, "learning_rate": 0.01, "loss": 2.0002, "step": 52290 }, { "epoch": 5.37054534250796, "grad_norm": 0.05101289227604866, "learning_rate": 0.01, "loss": 1.9996, "step": 52293 }, { "epoch": 5.370853445619801, "grad_norm": 0.10146338492631912, "learning_rate": 0.01, "loss": 1.9955, "step": 52296 }, { "epoch": 5.371161548731642, "grad_norm": 0.06167877838015556, "learning_rate": 0.01, "loss": 1.9842, "step": 52299 }, { "epoch": 5.3714696518434835, "grad_norm": 0.10302039980888367, "learning_rate": 0.01, "loss": 1.9917, "step": 52302 }, { "epoch": 5.371777754955325, "grad_norm": 0.07716668397188187, "learning_rate": 0.01, "loss": 2.0046, "step": 52305 }, { "epoch": 5.372085858067167, "grad_norm": 0.08410248160362244, "learning_rate": 0.01, "loss": 1.9643, "step": 52308 }, { "epoch": 5.372393961179008, "grad_norm": 0.05326802283525467, "learning_rate": 0.01, "loss": 2.0048, "step": 52311 }, { "epoch": 5.372702064290849, "grad_norm": 0.08413115888834, "learning_rate": 0.01, "loss": 1.9889, "step": 52314 }, { "epoch": 5.373010167402691, "grad_norm": 0.053719133138656616, "learning_rate": 0.01, "loss": 1.9995, "step": 52317 }, { "epoch": 5.373318270514532, "grad_norm": 0.07752490788698196, "learning_rate": 0.01, "loss": 2.0065, "step": 52320 }, { "epoch": 5.373626373626374, "grad_norm": 0.0719638466835022, "learning_rate": 0.01, "loss": 2.0005, "step": 52323 }, { "epoch": 5.373934476738215, "grad_norm": 0.0800856202840805, "learning_rate": 0.01, "loss": 1.9823, "step": 52326 }, { "epoch": 5.374242579850057, "grad_norm": 0.0889526829123497, "learning_rate": 0.01, "loss": 1.9985, "step": 52329 }, { "epoch": 5.374550682961898, "grad_norm": 0.08525931090116501, "learning_rate": 0.01, "loss": 1.9964, "step": 52332 }, { "epoch": 5.374858786073739, "grad_norm": 0.038573917001485825, "learning_rate": 0.01, "loss": 2.0186, "step": 52335 }, { "epoch": 5.37516688918558, "grad_norm": 0.03686724230647087, "learning_rate": 0.01, "loss": 1.9865, "step": 52338 }, { "epoch": 5.3754749922974225, "grad_norm": 0.042029861360788345, "learning_rate": 0.01, "loss": 1.9885, "step": 52341 }, { "epoch": 5.375783095409264, "grad_norm": 0.08990278095006943, "learning_rate": 0.01, "loss": 2.0018, "step": 52344 }, { "epoch": 5.376091198521105, "grad_norm": 0.09406303614377975, "learning_rate": 0.01, "loss": 1.9758, "step": 52347 }, { "epoch": 5.376399301632946, "grad_norm": 0.1285853236913681, "learning_rate": 0.01, "loss": 1.9883, "step": 52350 }, { "epoch": 5.376707404744788, "grad_norm": 0.06205267086625099, "learning_rate": 0.01, "loss": 2.01, "step": 52353 }, { "epoch": 5.37701550785663, "grad_norm": 0.09469316154718399, "learning_rate": 0.01, "loss": 1.998, "step": 52356 }, { "epoch": 5.377323610968471, "grad_norm": 0.08087198436260223, "learning_rate": 0.01, "loss": 1.9878, "step": 52359 }, { "epoch": 5.377631714080312, "grad_norm": 0.10511235147714615, "learning_rate": 0.01, "loss": 2.0175, "step": 52362 }, { "epoch": 5.3779398171921535, "grad_norm": 0.07102752476930618, "learning_rate": 0.01, "loss": 2.0019, "step": 52365 }, { "epoch": 5.378247920303995, "grad_norm": 0.04023940861225128, "learning_rate": 0.01, "loss": 1.9922, "step": 52368 }, { "epoch": 5.378556023415836, "grad_norm": 0.06366323679685593, "learning_rate": 0.01, "loss": 1.9837, "step": 52371 }, { "epoch": 5.378864126527678, "grad_norm": 0.045444704592227936, "learning_rate": 0.01, "loss": 1.9989, "step": 52374 }, { "epoch": 5.3791722296395195, "grad_norm": 0.07034427672624588, "learning_rate": 0.01, "loss": 1.9917, "step": 52377 }, { "epoch": 5.379480332751361, "grad_norm": 0.04844088852405548, "learning_rate": 0.01, "loss": 1.9746, "step": 52380 }, { "epoch": 5.379788435863202, "grad_norm": 0.05628515034914017, "learning_rate": 0.01, "loss": 1.9995, "step": 52383 }, { "epoch": 5.380096538975043, "grad_norm": 0.10569090396165848, "learning_rate": 0.01, "loss": 1.9886, "step": 52386 }, { "epoch": 5.380404642086885, "grad_norm": 0.14752137660980225, "learning_rate": 0.01, "loss": 1.9925, "step": 52389 }, { "epoch": 5.380712745198727, "grad_norm": 0.09316124767065048, "learning_rate": 0.01, "loss": 1.978, "step": 52392 }, { "epoch": 5.381020848310568, "grad_norm": 0.0789172351360321, "learning_rate": 0.01, "loss": 2.0041, "step": 52395 }, { "epoch": 5.381328951422409, "grad_norm": 0.04636065661907196, "learning_rate": 0.01, "loss": 1.9603, "step": 52398 }, { "epoch": 5.3816370545342505, "grad_norm": 0.051946815103292465, "learning_rate": 0.01, "loss": 1.9709, "step": 52401 }, { "epoch": 5.381945157646093, "grad_norm": 0.0423317514359951, "learning_rate": 0.01, "loss": 2.0133, "step": 52404 }, { "epoch": 5.382253260757934, "grad_norm": 0.03624933212995529, "learning_rate": 0.01, "loss": 1.9734, "step": 52407 }, { "epoch": 5.382561363869775, "grad_norm": 0.0415869876742363, "learning_rate": 0.01, "loss": 1.9978, "step": 52410 }, { "epoch": 5.382869466981616, "grad_norm": 0.1304771602153778, "learning_rate": 0.01, "loss": 2.0065, "step": 52413 }, { "epoch": 5.383177570093458, "grad_norm": 0.05917195603251457, "learning_rate": 0.01, "loss": 2.0216, "step": 52416 }, { "epoch": 5.3834856732053, "grad_norm": 0.04846251383423805, "learning_rate": 0.01, "loss": 2.0213, "step": 52419 }, { "epoch": 5.383793776317141, "grad_norm": 0.03447849303483963, "learning_rate": 0.01, "loss": 1.9879, "step": 52422 }, { "epoch": 5.384101879428982, "grad_norm": 0.042299896478652954, "learning_rate": 0.01, "loss": 1.9572, "step": 52425 }, { "epoch": 5.384409982540824, "grad_norm": 0.053316473960876465, "learning_rate": 0.01, "loss": 2.0188, "step": 52428 }, { "epoch": 5.384718085652665, "grad_norm": 0.05238528177142143, "learning_rate": 0.01, "loss": 2.0125, "step": 52431 }, { "epoch": 5.385026188764506, "grad_norm": 0.05088873207569122, "learning_rate": 0.01, "loss": 2.0015, "step": 52434 }, { "epoch": 5.385334291876348, "grad_norm": 0.03723147138953209, "learning_rate": 0.01, "loss": 2.0062, "step": 52437 }, { "epoch": 5.3856423949881895, "grad_norm": 0.04240123927593231, "learning_rate": 0.01, "loss": 1.9994, "step": 52440 }, { "epoch": 5.385950498100031, "grad_norm": 0.04458059370517731, "learning_rate": 0.01, "loss": 1.9832, "step": 52443 }, { "epoch": 5.386258601211872, "grad_norm": 0.20143818855285645, "learning_rate": 0.01, "loss": 1.9968, "step": 52446 }, { "epoch": 5.386566704323713, "grad_norm": 0.05507873743772507, "learning_rate": 0.01, "loss": 1.9985, "step": 52449 }, { "epoch": 5.3868748074355555, "grad_norm": 0.040351588279008865, "learning_rate": 0.01, "loss": 1.9684, "step": 52452 }, { "epoch": 5.387182910547397, "grad_norm": 0.044757384806871414, "learning_rate": 0.01, "loss": 1.9724, "step": 52455 }, { "epoch": 5.387491013659238, "grad_norm": 0.05201313644647598, "learning_rate": 0.01, "loss": 2.0204, "step": 52458 }, { "epoch": 5.387799116771079, "grad_norm": 0.053258076310157776, "learning_rate": 0.01, "loss": 2.0027, "step": 52461 }, { "epoch": 5.3881072198829205, "grad_norm": 0.08738038688898087, "learning_rate": 0.01, "loss": 2.0029, "step": 52464 }, { "epoch": 5.388415322994762, "grad_norm": 0.06154690682888031, "learning_rate": 0.01, "loss": 1.9737, "step": 52467 }, { "epoch": 5.388723426106604, "grad_norm": 0.045472290366888046, "learning_rate": 0.01, "loss": 1.9912, "step": 52470 }, { "epoch": 5.389031529218445, "grad_norm": 0.05027802661061287, "learning_rate": 0.01, "loss": 1.99, "step": 52473 }, { "epoch": 5.3893396323302865, "grad_norm": 0.06531531363725662, "learning_rate": 0.01, "loss": 2.0118, "step": 52476 }, { "epoch": 5.389647735442128, "grad_norm": 0.0642043873667717, "learning_rate": 0.01, "loss": 1.9922, "step": 52479 }, { "epoch": 5.389955838553969, "grad_norm": 0.03432301804423332, "learning_rate": 0.01, "loss": 1.995, "step": 52482 }, { "epoch": 5.390263941665811, "grad_norm": 0.14385442435741425, "learning_rate": 0.01, "loss": 1.9903, "step": 52485 }, { "epoch": 5.390572044777652, "grad_norm": 0.05068526789546013, "learning_rate": 0.01, "loss": 2.0122, "step": 52488 }, { "epoch": 5.390880147889494, "grad_norm": 0.0718868151307106, "learning_rate": 0.01, "loss": 2.0141, "step": 52491 }, { "epoch": 5.391188251001335, "grad_norm": 0.06522323191165924, "learning_rate": 0.01, "loss": 2.0136, "step": 52494 }, { "epoch": 5.391496354113176, "grad_norm": 0.03200114890933037, "learning_rate": 0.01, "loss": 1.9779, "step": 52497 }, { "epoch": 5.391804457225018, "grad_norm": 0.11630173027515411, "learning_rate": 0.01, "loss": 2.0095, "step": 52500 }, { "epoch": 5.39211256033686, "grad_norm": 0.07612688839435577, "learning_rate": 0.01, "loss": 2.0146, "step": 52503 }, { "epoch": 5.392420663448701, "grad_norm": 0.09313171356916428, "learning_rate": 0.01, "loss": 2.0002, "step": 52506 }, { "epoch": 5.392728766560542, "grad_norm": 0.07034334540367126, "learning_rate": 0.01, "loss": 1.9957, "step": 52509 }, { "epoch": 5.393036869672383, "grad_norm": 0.07270947843790054, "learning_rate": 0.01, "loss": 2.0186, "step": 52512 }, { "epoch": 5.3933449727842255, "grad_norm": 0.051841702312231064, "learning_rate": 0.01, "loss": 2.0039, "step": 52515 }, { "epoch": 5.393653075896067, "grad_norm": 0.04291679337620735, "learning_rate": 0.01, "loss": 1.9774, "step": 52518 }, { "epoch": 5.393961179007908, "grad_norm": 0.03652627766132355, "learning_rate": 0.01, "loss": 1.9567, "step": 52521 }, { "epoch": 5.394269282119749, "grad_norm": 0.03804773837327957, "learning_rate": 0.01, "loss": 1.9994, "step": 52524 }, { "epoch": 5.394577385231591, "grad_norm": 0.05173708498477936, "learning_rate": 0.01, "loss": 2.0167, "step": 52527 }, { "epoch": 5.394885488343432, "grad_norm": 0.07177083939313889, "learning_rate": 0.01, "loss": 1.994, "step": 52530 }, { "epoch": 5.395193591455274, "grad_norm": 0.08862923830747604, "learning_rate": 0.01, "loss": 2.008, "step": 52533 }, { "epoch": 5.395501694567115, "grad_norm": 0.09389739483594894, "learning_rate": 0.01, "loss": 1.9873, "step": 52536 }, { "epoch": 5.3958097976789565, "grad_norm": 0.03884744644165039, "learning_rate": 0.01, "loss": 1.997, "step": 52539 }, { "epoch": 5.396117900790798, "grad_norm": 0.04176723584532738, "learning_rate": 0.01, "loss": 1.9845, "step": 52542 }, { "epoch": 5.396426003902639, "grad_norm": 0.04131205379962921, "learning_rate": 0.01, "loss": 1.9914, "step": 52545 }, { "epoch": 5.396734107014481, "grad_norm": 0.03634188696742058, "learning_rate": 0.01, "loss": 1.999, "step": 52548 }, { "epoch": 5.3970422101263225, "grad_norm": 0.09134528040885925, "learning_rate": 0.01, "loss": 2.0024, "step": 52551 }, { "epoch": 5.397350313238164, "grad_norm": 0.07570212334394455, "learning_rate": 0.01, "loss": 2.003, "step": 52554 }, { "epoch": 5.397658416350005, "grad_norm": 0.11560734361410141, "learning_rate": 0.01, "loss": 2.0023, "step": 52557 }, { "epoch": 5.397966519461846, "grad_norm": 0.03760908916592598, "learning_rate": 0.01, "loss": 1.9684, "step": 52560 }, { "epoch": 5.398274622573688, "grad_norm": 0.05205194652080536, "learning_rate": 0.01, "loss": 2.0133, "step": 52563 }, { "epoch": 5.39858272568553, "grad_norm": 0.07257603853940964, "learning_rate": 0.01, "loss": 1.9981, "step": 52566 }, { "epoch": 5.398890828797371, "grad_norm": 0.07266250997781754, "learning_rate": 0.01, "loss": 2.003, "step": 52569 }, { "epoch": 5.399198931909212, "grad_norm": 0.0367962047457695, "learning_rate": 0.01, "loss": 1.9909, "step": 52572 }, { "epoch": 5.3995070350210534, "grad_norm": 0.05613946169614792, "learning_rate": 0.01, "loss": 2.0038, "step": 52575 }, { "epoch": 5.399815138132896, "grad_norm": 0.10363534092903137, "learning_rate": 0.01, "loss": 1.99, "step": 52578 }, { "epoch": 5.400123241244737, "grad_norm": 0.09217333793640137, "learning_rate": 0.01, "loss": 1.9849, "step": 52581 }, { "epoch": 5.400431344356578, "grad_norm": 0.0713183805346489, "learning_rate": 0.01, "loss": 2.0056, "step": 52584 }, { "epoch": 5.400739447468419, "grad_norm": 0.0670512244105339, "learning_rate": 0.01, "loss": 1.9996, "step": 52587 }, { "epoch": 5.401047550580261, "grad_norm": 0.10012122988700867, "learning_rate": 0.01, "loss": 1.9857, "step": 52590 }, { "epoch": 5.401355653692102, "grad_norm": 0.047828782349824905, "learning_rate": 0.01, "loss": 1.99, "step": 52593 }, { "epoch": 5.401663756803944, "grad_norm": 0.09758254140615463, "learning_rate": 0.01, "loss": 2.0082, "step": 52596 }, { "epoch": 5.401971859915785, "grad_norm": 0.044294700026512146, "learning_rate": 0.01, "loss": 1.9965, "step": 52599 }, { "epoch": 5.402279963027627, "grad_norm": 0.04579133540391922, "learning_rate": 0.01, "loss": 2.0004, "step": 52602 }, { "epoch": 5.402588066139468, "grad_norm": 0.04279174283146858, "learning_rate": 0.01, "loss": 1.9895, "step": 52605 }, { "epoch": 5.402896169251309, "grad_norm": 0.08676068484783173, "learning_rate": 0.01, "loss": 1.9876, "step": 52608 }, { "epoch": 5.403204272363151, "grad_norm": 0.09505254775285721, "learning_rate": 0.01, "loss": 2.0037, "step": 52611 }, { "epoch": 5.4035123754749925, "grad_norm": 0.06927873194217682, "learning_rate": 0.01, "loss": 2.0129, "step": 52614 }, { "epoch": 5.403820478586834, "grad_norm": 0.09732890874147415, "learning_rate": 0.01, "loss": 1.9787, "step": 52617 }, { "epoch": 5.404128581698675, "grad_norm": 0.03702055662870407, "learning_rate": 0.01, "loss": 1.9943, "step": 52620 }, { "epoch": 5.404436684810516, "grad_norm": 0.08636049926280975, "learning_rate": 0.01, "loss": 2.0194, "step": 52623 }, { "epoch": 5.404744787922358, "grad_norm": 0.07209763675928116, "learning_rate": 0.01, "loss": 1.9983, "step": 52626 }, { "epoch": 5.4050528910342, "grad_norm": 0.05202876031398773, "learning_rate": 0.01, "loss": 1.9788, "step": 52629 }, { "epoch": 5.405360994146041, "grad_norm": 0.0709710493683815, "learning_rate": 0.01, "loss": 2.0176, "step": 52632 }, { "epoch": 5.405669097257882, "grad_norm": 0.03506077453494072, "learning_rate": 0.01, "loss": 2.0064, "step": 52635 }, { "epoch": 5.4059772003697235, "grad_norm": 0.04027107357978821, "learning_rate": 0.01, "loss": 2.0006, "step": 52638 }, { "epoch": 5.406285303481565, "grad_norm": 0.07768195122480392, "learning_rate": 0.01, "loss": 1.9824, "step": 52641 }, { "epoch": 5.406593406593407, "grad_norm": 0.13171711564064026, "learning_rate": 0.01, "loss": 1.9865, "step": 52644 }, { "epoch": 5.406901509705248, "grad_norm": 0.13514290750026703, "learning_rate": 0.01, "loss": 1.9987, "step": 52647 }, { "epoch": 5.4072096128170895, "grad_norm": 0.05603281781077385, "learning_rate": 0.01, "loss": 1.9865, "step": 52650 }, { "epoch": 5.407517715928931, "grad_norm": 0.07667485624551773, "learning_rate": 0.01, "loss": 2.0062, "step": 52653 }, { "epoch": 5.407825819040772, "grad_norm": 0.06463679671287537, "learning_rate": 0.01, "loss": 1.9842, "step": 52656 }, { "epoch": 5.408133922152614, "grad_norm": 0.06655814498662949, "learning_rate": 0.01, "loss": 1.9808, "step": 52659 }, { "epoch": 5.408442025264455, "grad_norm": 0.1041889637708664, "learning_rate": 0.01, "loss": 2.0085, "step": 52662 }, { "epoch": 5.408750128376297, "grad_norm": 0.06591400504112244, "learning_rate": 0.01, "loss": 1.9947, "step": 52665 }, { "epoch": 5.409058231488138, "grad_norm": 0.033431414514780045, "learning_rate": 0.01, "loss": 1.9908, "step": 52668 }, { "epoch": 5.409366334599979, "grad_norm": 0.04565748572349548, "learning_rate": 0.01, "loss": 1.9918, "step": 52671 }, { "epoch": 5.409674437711821, "grad_norm": 0.04200183227658272, "learning_rate": 0.01, "loss": 1.9882, "step": 52674 }, { "epoch": 5.409982540823663, "grad_norm": 0.03807186335325241, "learning_rate": 0.01, "loss": 2.0075, "step": 52677 }, { "epoch": 5.410290643935504, "grad_norm": 0.09112047404050827, "learning_rate": 0.01, "loss": 1.9751, "step": 52680 }, { "epoch": 5.410598747047345, "grad_norm": 0.10211756825447083, "learning_rate": 0.01, "loss": 1.9996, "step": 52683 }, { "epoch": 5.410906850159186, "grad_norm": 0.1345423012971878, "learning_rate": 0.01, "loss": 1.9803, "step": 52686 }, { "epoch": 5.411214953271028, "grad_norm": 0.07363910228013992, "learning_rate": 0.01, "loss": 2.0172, "step": 52689 }, { "epoch": 5.41152305638287, "grad_norm": 0.08829092979431152, "learning_rate": 0.01, "loss": 1.9759, "step": 52692 }, { "epoch": 5.411831159494711, "grad_norm": 0.06006823107600212, "learning_rate": 0.01, "loss": 1.9683, "step": 52695 }, { "epoch": 5.412139262606552, "grad_norm": 0.06111016124486923, "learning_rate": 0.01, "loss": 1.9713, "step": 52698 }, { "epoch": 5.412447365718394, "grad_norm": 0.057088688015937805, "learning_rate": 0.01, "loss": 1.9623, "step": 52701 }, { "epoch": 5.412755468830235, "grad_norm": 0.04059537127614021, "learning_rate": 0.01, "loss": 1.989, "step": 52704 }, { "epoch": 5.413063571942077, "grad_norm": 0.2159087210893631, "learning_rate": 0.01, "loss": 1.9993, "step": 52707 }, { "epoch": 5.413371675053918, "grad_norm": 0.07151810079813004, "learning_rate": 0.01, "loss": 1.9845, "step": 52710 }, { "epoch": 5.4136797781657595, "grad_norm": 0.09309381246566772, "learning_rate": 0.01, "loss": 2.0023, "step": 52713 }, { "epoch": 5.413987881277601, "grad_norm": 0.039323315024375916, "learning_rate": 0.01, "loss": 1.972, "step": 52716 }, { "epoch": 5.414295984389442, "grad_norm": 0.09738846868276596, "learning_rate": 0.01, "loss": 2.0016, "step": 52719 }, { "epoch": 5.414604087501283, "grad_norm": 0.07482850551605225, "learning_rate": 0.01, "loss": 2.0146, "step": 52722 }, { "epoch": 5.4149121906131255, "grad_norm": 0.05960662290453911, "learning_rate": 0.01, "loss": 1.9899, "step": 52725 }, { "epoch": 5.415220293724967, "grad_norm": 0.061568450182676315, "learning_rate": 0.01, "loss": 2.0127, "step": 52728 }, { "epoch": 5.415528396836808, "grad_norm": 0.05249075964093208, "learning_rate": 0.01, "loss": 2.0298, "step": 52731 }, { "epoch": 5.415836499948649, "grad_norm": 0.04260426387190819, "learning_rate": 0.01, "loss": 2.0029, "step": 52734 }, { "epoch": 5.4161446030604905, "grad_norm": 0.035422783344984055, "learning_rate": 0.01, "loss": 1.9778, "step": 52737 }, { "epoch": 5.416452706172333, "grad_norm": 0.03902021422982216, "learning_rate": 0.01, "loss": 1.9983, "step": 52740 }, { "epoch": 5.416760809284174, "grad_norm": 0.04080606997013092, "learning_rate": 0.01, "loss": 2.0131, "step": 52743 }, { "epoch": 5.417068912396015, "grad_norm": 0.04044007509946823, "learning_rate": 0.01, "loss": 1.9852, "step": 52746 }, { "epoch": 5.417377015507856, "grad_norm": 0.04506811872124672, "learning_rate": 0.01, "loss": 2.0027, "step": 52749 }, { "epoch": 5.417685118619698, "grad_norm": 0.053388748317956924, "learning_rate": 0.01, "loss": 1.9848, "step": 52752 }, { "epoch": 5.41799322173154, "grad_norm": 0.09019593149423599, "learning_rate": 0.01, "loss": 1.9914, "step": 52755 }, { "epoch": 5.418301324843381, "grad_norm": 0.12063855677843094, "learning_rate": 0.01, "loss": 2.0137, "step": 52758 }, { "epoch": 5.418609427955222, "grad_norm": 0.07498898357152939, "learning_rate": 0.01, "loss": 1.9965, "step": 52761 }, { "epoch": 5.418917531067064, "grad_norm": 0.07565269619226456, "learning_rate": 0.01, "loss": 2.0146, "step": 52764 }, { "epoch": 5.419225634178905, "grad_norm": 0.037328194826841354, "learning_rate": 0.01, "loss": 1.9905, "step": 52767 }, { "epoch": 5.419533737290747, "grad_norm": 0.059995539486408234, "learning_rate": 0.01, "loss": 2.0149, "step": 52770 }, { "epoch": 5.419841840402588, "grad_norm": 0.18053491413593292, "learning_rate": 0.01, "loss": 2.0116, "step": 52773 }, { "epoch": 5.42014994351443, "grad_norm": 0.1632491648197174, "learning_rate": 0.01, "loss": 1.9898, "step": 52776 }, { "epoch": 5.420458046626271, "grad_norm": 0.06863465160131454, "learning_rate": 0.01, "loss": 1.9998, "step": 52779 }, { "epoch": 5.420766149738112, "grad_norm": 0.05202522501349449, "learning_rate": 0.01, "loss": 1.9974, "step": 52782 }, { "epoch": 5.421074252849953, "grad_norm": 0.039888009428977966, "learning_rate": 0.01, "loss": 1.9838, "step": 52785 }, { "epoch": 5.4213823559617955, "grad_norm": 0.04122290015220642, "learning_rate": 0.01, "loss": 2.004, "step": 52788 }, { "epoch": 5.421690459073637, "grad_norm": 0.03863810375332832, "learning_rate": 0.01, "loss": 1.9912, "step": 52791 }, { "epoch": 5.421998562185478, "grad_norm": 0.04215069115161896, "learning_rate": 0.01, "loss": 1.9936, "step": 52794 }, { "epoch": 5.422306665297319, "grad_norm": 0.09779973328113556, "learning_rate": 0.01, "loss": 2.0071, "step": 52797 }, { "epoch": 5.422614768409161, "grad_norm": 0.0620935894548893, "learning_rate": 0.01, "loss": 1.9928, "step": 52800 }, { "epoch": 5.422922871521003, "grad_norm": 0.05332612618803978, "learning_rate": 0.01, "loss": 1.9945, "step": 52803 }, { "epoch": 5.423230974632844, "grad_norm": 0.04671257734298706, "learning_rate": 0.01, "loss": 1.9965, "step": 52806 }, { "epoch": 5.423539077744685, "grad_norm": 0.03246668353676796, "learning_rate": 0.01, "loss": 2.0076, "step": 52809 }, { "epoch": 5.4238471808565265, "grad_norm": 0.04667939245700836, "learning_rate": 0.01, "loss": 2.0136, "step": 52812 }, { "epoch": 5.424155283968368, "grad_norm": 0.08633271604776382, "learning_rate": 0.01, "loss": 2.0249, "step": 52815 }, { "epoch": 5.42446338708021, "grad_norm": 0.07084902375936508, "learning_rate": 0.01, "loss": 2.0185, "step": 52818 }, { "epoch": 5.424771490192051, "grad_norm": 0.05388319492340088, "learning_rate": 0.01, "loss": 1.9986, "step": 52821 }, { "epoch": 5.4250795933038924, "grad_norm": 0.05652158334851265, "learning_rate": 0.01, "loss": 1.9961, "step": 52824 }, { "epoch": 5.425387696415734, "grad_norm": 0.09929019957780838, "learning_rate": 0.01, "loss": 1.9703, "step": 52827 }, { "epoch": 5.425695799527575, "grad_norm": 0.1725517362356186, "learning_rate": 0.01, "loss": 2.0257, "step": 52830 }, { "epoch": 5.426003902639417, "grad_norm": 0.06725597381591797, "learning_rate": 0.01, "loss": 1.9997, "step": 52833 }, { "epoch": 5.426312005751258, "grad_norm": 0.03839759901165962, "learning_rate": 0.01, "loss": 1.9665, "step": 52836 }, { "epoch": 5.4266201088631, "grad_norm": 0.03678586333990097, "learning_rate": 0.01, "loss": 1.9787, "step": 52839 }, { "epoch": 5.426928211974941, "grad_norm": 0.04450935125350952, "learning_rate": 0.01, "loss": 2.0098, "step": 52842 }, { "epoch": 5.427236315086782, "grad_norm": 0.05059666931629181, "learning_rate": 0.01, "loss": 2.0007, "step": 52845 }, { "epoch": 5.427544418198623, "grad_norm": 0.05548638850450516, "learning_rate": 0.01, "loss": 1.9928, "step": 52848 }, { "epoch": 5.427852521310466, "grad_norm": 0.03727211058139801, "learning_rate": 0.01, "loss": 2.0055, "step": 52851 }, { "epoch": 5.428160624422307, "grad_norm": 0.1027202233672142, "learning_rate": 0.01, "loss": 2.0112, "step": 52854 }, { "epoch": 5.428468727534148, "grad_norm": 0.07482005655765533, "learning_rate": 0.01, "loss": 1.9833, "step": 52857 }, { "epoch": 5.428776830645989, "grad_norm": 0.06325170397758484, "learning_rate": 0.01, "loss": 2.0173, "step": 52860 }, { "epoch": 5.429084933757831, "grad_norm": 0.11125578731298447, "learning_rate": 0.01, "loss": 1.9847, "step": 52863 }, { "epoch": 5.429393036869673, "grad_norm": 0.07175584882497787, "learning_rate": 0.01, "loss": 2.0186, "step": 52866 }, { "epoch": 5.429701139981514, "grad_norm": 0.03667625039815903, "learning_rate": 0.01, "loss": 1.9743, "step": 52869 }, { "epoch": 5.430009243093355, "grad_norm": 0.0515107661485672, "learning_rate": 0.01, "loss": 1.9745, "step": 52872 }, { "epoch": 5.430317346205197, "grad_norm": 0.09832940995693207, "learning_rate": 0.01, "loss": 1.991, "step": 52875 }, { "epoch": 5.430625449317038, "grad_norm": 0.06182995066046715, "learning_rate": 0.01, "loss": 1.9875, "step": 52878 }, { "epoch": 5.430933552428879, "grad_norm": 0.1432095468044281, "learning_rate": 0.01, "loss": 1.9853, "step": 52881 }, { "epoch": 5.431241655540721, "grad_norm": 0.11832471191883087, "learning_rate": 0.01, "loss": 1.9924, "step": 52884 }, { "epoch": 5.4315497586525625, "grad_norm": 0.07653117924928665, "learning_rate": 0.01, "loss": 1.9937, "step": 52887 }, { "epoch": 5.431857861764404, "grad_norm": 0.06316263228654861, "learning_rate": 0.01, "loss": 1.9854, "step": 52890 }, { "epoch": 5.432165964876245, "grad_norm": 0.058272287249565125, "learning_rate": 0.01, "loss": 1.9918, "step": 52893 }, { "epoch": 5.432474067988086, "grad_norm": 0.06886950880289078, "learning_rate": 0.01, "loss": 1.987, "step": 52896 }, { "epoch": 5.4327821710999284, "grad_norm": 0.053393036127090454, "learning_rate": 0.01, "loss": 2.0044, "step": 52899 }, { "epoch": 5.43309027421177, "grad_norm": 0.06651714444160461, "learning_rate": 0.01, "loss": 2.0033, "step": 52902 }, { "epoch": 5.433398377323611, "grad_norm": 0.04366718977689743, "learning_rate": 0.01, "loss": 1.9734, "step": 52905 }, { "epoch": 5.433706480435452, "grad_norm": 0.040242016315460205, "learning_rate": 0.01, "loss": 2.0101, "step": 52908 }, { "epoch": 5.4340145835472935, "grad_norm": 0.0598304383456707, "learning_rate": 0.01, "loss": 1.9785, "step": 52911 }, { "epoch": 5.434322686659136, "grad_norm": 0.17316143214702606, "learning_rate": 0.01, "loss": 1.9767, "step": 52914 }, { "epoch": 5.434630789770977, "grad_norm": 0.13133604824543, "learning_rate": 0.01, "loss": 1.9819, "step": 52917 }, { "epoch": 5.434938892882818, "grad_norm": 0.0751945972442627, "learning_rate": 0.01, "loss": 1.9745, "step": 52920 }, { "epoch": 5.435246995994659, "grad_norm": 0.06649640202522278, "learning_rate": 0.01, "loss": 1.9884, "step": 52923 }, { "epoch": 5.435555099106501, "grad_norm": 0.13051864504814148, "learning_rate": 0.01, "loss": 1.9994, "step": 52926 }, { "epoch": 5.435863202218343, "grad_norm": 0.06402359157800674, "learning_rate": 0.01, "loss": 1.9606, "step": 52929 }, { "epoch": 5.436171305330184, "grad_norm": 0.06441336125135422, "learning_rate": 0.01, "loss": 1.9815, "step": 52932 }, { "epoch": 5.436479408442025, "grad_norm": 0.09019657224416733, "learning_rate": 0.01, "loss": 1.9954, "step": 52935 }, { "epoch": 5.436787511553867, "grad_norm": 0.04703563451766968, "learning_rate": 0.01, "loss": 1.9938, "step": 52938 }, { "epoch": 5.437095614665708, "grad_norm": 0.03909967467188835, "learning_rate": 0.01, "loss": 1.9879, "step": 52941 }, { "epoch": 5.437403717777549, "grad_norm": 0.05746940150856972, "learning_rate": 0.01, "loss": 1.9935, "step": 52944 }, { "epoch": 5.437711820889391, "grad_norm": 0.11890576034784317, "learning_rate": 0.01, "loss": 2.0062, "step": 52947 }, { "epoch": 5.438019924001233, "grad_norm": 0.07725408673286438, "learning_rate": 0.01, "loss": 1.9977, "step": 52950 }, { "epoch": 5.438328027113074, "grad_norm": 0.05112382769584656, "learning_rate": 0.01, "loss": 1.9891, "step": 52953 }, { "epoch": 5.438636130224915, "grad_norm": 0.08786381781101227, "learning_rate": 0.01, "loss": 1.9671, "step": 52956 }, { "epoch": 5.438944233336756, "grad_norm": 0.07547960430383682, "learning_rate": 0.01, "loss": 1.9953, "step": 52959 }, { "epoch": 5.4392523364485985, "grad_norm": 0.06332848221063614, "learning_rate": 0.01, "loss": 1.9917, "step": 52962 }, { "epoch": 5.43956043956044, "grad_norm": 0.09106019139289856, "learning_rate": 0.01, "loss": 2.0042, "step": 52965 }, { "epoch": 5.439868542672281, "grad_norm": 0.1387149691581726, "learning_rate": 0.01, "loss": 2.005, "step": 52968 }, { "epoch": 5.440176645784122, "grad_norm": 0.07359499484300613, "learning_rate": 0.01, "loss": 1.986, "step": 52971 }, { "epoch": 5.440484748895964, "grad_norm": 0.06554149091243744, "learning_rate": 0.01, "loss": 2.0092, "step": 52974 }, { "epoch": 5.440792852007805, "grad_norm": 0.08161085098981857, "learning_rate": 0.01, "loss": 1.9857, "step": 52977 }, { "epoch": 5.441100955119647, "grad_norm": 0.07675690948963165, "learning_rate": 0.01, "loss": 2.0121, "step": 52980 }, { "epoch": 5.441409058231488, "grad_norm": 0.04832153394818306, "learning_rate": 0.01, "loss": 1.9818, "step": 52983 }, { "epoch": 5.4417171613433295, "grad_norm": 0.03355338051915169, "learning_rate": 0.01, "loss": 1.9913, "step": 52986 }, { "epoch": 5.442025264455171, "grad_norm": 0.03371288999915123, "learning_rate": 0.01, "loss": 1.9981, "step": 52989 }, { "epoch": 5.442333367567012, "grad_norm": 0.11185171455144882, "learning_rate": 0.01, "loss": 1.9831, "step": 52992 }, { "epoch": 5.442641470678854, "grad_norm": 0.059647444635629654, "learning_rate": 0.01, "loss": 1.9779, "step": 52995 }, { "epoch": 5.442949573790695, "grad_norm": 0.03346579894423485, "learning_rate": 0.01, "loss": 1.9663, "step": 52998 }, { "epoch": 5.443257676902537, "grad_norm": 0.0569952167570591, "learning_rate": 0.01, "loss": 1.9747, "step": 53001 }, { "epoch": 5.443565780014378, "grad_norm": 0.07441236078739166, "learning_rate": 0.01, "loss": 1.991, "step": 53004 }, { "epoch": 5.443873883126219, "grad_norm": 0.11744049191474915, "learning_rate": 0.01, "loss": 1.9986, "step": 53007 }, { "epoch": 5.444181986238061, "grad_norm": 0.04918253421783447, "learning_rate": 0.01, "loss": 2.0148, "step": 53010 }, { "epoch": 5.444490089349903, "grad_norm": 0.03746315464377403, "learning_rate": 0.01, "loss": 2.0005, "step": 53013 }, { "epoch": 5.444798192461744, "grad_norm": 0.055012766271829605, "learning_rate": 0.01, "loss": 1.998, "step": 53016 }, { "epoch": 5.445106295573585, "grad_norm": 0.06424736231565475, "learning_rate": 0.01, "loss": 1.998, "step": 53019 }, { "epoch": 5.445414398685426, "grad_norm": 0.09382134675979614, "learning_rate": 0.01, "loss": 2.0071, "step": 53022 }, { "epoch": 5.445722501797269, "grad_norm": 0.048184242099523544, "learning_rate": 0.01, "loss": 1.985, "step": 53025 }, { "epoch": 5.44603060490911, "grad_norm": 0.05549463629722595, "learning_rate": 0.01, "loss": 2.0039, "step": 53028 }, { "epoch": 5.446338708020951, "grad_norm": 0.10756690055131912, "learning_rate": 0.01, "loss": 2.0116, "step": 53031 }, { "epoch": 5.446646811132792, "grad_norm": 0.03808826580643654, "learning_rate": 0.01, "loss": 1.9782, "step": 53034 }, { "epoch": 5.446954914244634, "grad_norm": 0.050678886473178864, "learning_rate": 0.01, "loss": 1.9925, "step": 53037 }, { "epoch": 5.447263017356475, "grad_norm": 0.03573575243353844, "learning_rate": 0.01, "loss": 2.0233, "step": 53040 }, { "epoch": 5.447571120468317, "grad_norm": 0.05302232876420021, "learning_rate": 0.01, "loss": 1.9904, "step": 53043 }, { "epoch": 5.447879223580158, "grad_norm": 0.03617763891816139, "learning_rate": 0.01, "loss": 1.9862, "step": 53046 }, { "epoch": 5.448187326692, "grad_norm": 0.039550527930259705, "learning_rate": 0.01, "loss": 2.0038, "step": 53049 }, { "epoch": 5.448495429803841, "grad_norm": 0.041170112788677216, "learning_rate": 0.01, "loss": 1.9428, "step": 53052 }, { "epoch": 5.448803532915682, "grad_norm": 0.04932108521461487, "learning_rate": 0.01, "loss": 2.0008, "step": 53055 }, { "epoch": 5.449111636027524, "grad_norm": 0.03305281326174736, "learning_rate": 0.01, "loss": 2.0141, "step": 53058 }, { "epoch": 5.4494197391393655, "grad_norm": 0.04628564044833183, "learning_rate": 0.01, "loss": 2.009, "step": 53061 }, { "epoch": 5.449727842251207, "grad_norm": 0.04561088606715202, "learning_rate": 0.01, "loss": 1.9911, "step": 53064 }, { "epoch": 5.450035945363048, "grad_norm": 0.11011772602796555, "learning_rate": 0.01, "loss": 1.9988, "step": 53067 }, { "epoch": 5.450344048474889, "grad_norm": 0.06649364531040192, "learning_rate": 0.01, "loss": 2.0095, "step": 53070 }, { "epoch": 5.450652151586731, "grad_norm": 0.1190907210111618, "learning_rate": 0.01, "loss": 1.9776, "step": 53073 }, { "epoch": 5.450960254698573, "grad_norm": 0.03680254891514778, "learning_rate": 0.01, "loss": 2.0242, "step": 53076 }, { "epoch": 5.451268357810414, "grad_norm": 0.04634016007184982, "learning_rate": 0.01, "loss": 2.0011, "step": 53079 }, { "epoch": 5.451576460922255, "grad_norm": 0.04958662390708923, "learning_rate": 0.01, "loss": 2.0121, "step": 53082 }, { "epoch": 5.4518845640340965, "grad_norm": 0.04827871546149254, "learning_rate": 0.01, "loss": 2.0159, "step": 53085 }, { "epoch": 5.452192667145939, "grad_norm": 0.10182766616344452, "learning_rate": 0.01, "loss": 2.0107, "step": 53088 }, { "epoch": 5.45250077025778, "grad_norm": 0.08067356050014496, "learning_rate": 0.01, "loss": 1.9764, "step": 53091 }, { "epoch": 5.452808873369621, "grad_norm": 0.06457395106554031, "learning_rate": 0.01, "loss": 2.0003, "step": 53094 }, { "epoch": 5.453116976481462, "grad_norm": 0.09059132635593414, "learning_rate": 0.01, "loss": 1.9763, "step": 53097 }, { "epoch": 5.453425079593304, "grad_norm": 0.05704977363348007, "learning_rate": 0.01, "loss": 1.9937, "step": 53100 }, { "epoch": 5.453733182705145, "grad_norm": 0.09887251257896423, "learning_rate": 0.01, "loss": 1.9857, "step": 53103 }, { "epoch": 5.454041285816987, "grad_norm": 0.03840193152427673, "learning_rate": 0.01, "loss": 1.9847, "step": 53106 }, { "epoch": 5.454349388928828, "grad_norm": 0.08743079751729965, "learning_rate": 0.01, "loss": 1.9904, "step": 53109 }, { "epoch": 5.45465749204067, "grad_norm": 0.054274316877126694, "learning_rate": 0.01, "loss": 2.0035, "step": 53112 }, { "epoch": 5.454965595152511, "grad_norm": 0.033519454300403595, "learning_rate": 0.01, "loss": 1.9915, "step": 53115 }, { "epoch": 5.455273698264352, "grad_norm": 0.1241462305188179, "learning_rate": 0.01, "loss": 2.003, "step": 53118 }, { "epoch": 5.455581801376194, "grad_norm": 0.11901737749576569, "learning_rate": 0.01, "loss": 2.0022, "step": 53121 }, { "epoch": 5.455889904488036, "grad_norm": 0.06983425468206406, "learning_rate": 0.01, "loss": 1.9779, "step": 53124 }, { "epoch": 5.456198007599877, "grad_norm": 0.07202869653701782, "learning_rate": 0.01, "loss": 1.9962, "step": 53127 }, { "epoch": 5.456506110711718, "grad_norm": 0.049013327807188034, "learning_rate": 0.01, "loss": 1.9848, "step": 53130 }, { "epoch": 5.456814213823559, "grad_norm": 0.04855305328965187, "learning_rate": 0.01, "loss": 2.0079, "step": 53133 }, { "epoch": 5.457122316935401, "grad_norm": 0.05479707196354866, "learning_rate": 0.01, "loss": 1.9877, "step": 53136 }, { "epoch": 5.457430420047243, "grad_norm": 0.04120011255145073, "learning_rate": 0.01, "loss": 2.0043, "step": 53139 }, { "epoch": 5.457738523159084, "grad_norm": 0.11232541501522064, "learning_rate": 0.01, "loss": 1.9851, "step": 53142 }, { "epoch": 5.458046626270925, "grad_norm": 0.0826067104935646, "learning_rate": 0.01, "loss": 2.0187, "step": 53145 }, { "epoch": 5.458354729382767, "grad_norm": 0.0358428992331028, "learning_rate": 0.01, "loss": 1.9936, "step": 53148 }, { "epoch": 5.458662832494608, "grad_norm": 0.06724508851766586, "learning_rate": 0.01, "loss": 2.0097, "step": 53151 }, { "epoch": 5.45897093560645, "grad_norm": 0.05402735620737076, "learning_rate": 0.01, "loss": 1.9626, "step": 53154 }, { "epoch": 5.459279038718291, "grad_norm": 0.10397180914878845, "learning_rate": 0.01, "loss": 2.0058, "step": 53157 }, { "epoch": 5.4595871418301325, "grad_norm": 0.16697724163532257, "learning_rate": 0.01, "loss": 2.0045, "step": 53160 }, { "epoch": 5.459895244941974, "grad_norm": 0.12541674077510834, "learning_rate": 0.01, "loss": 1.9965, "step": 53163 }, { "epoch": 5.460203348053815, "grad_norm": 0.11177428066730499, "learning_rate": 0.01, "loss": 1.9943, "step": 53166 }, { "epoch": 5.460511451165657, "grad_norm": 0.12203194200992584, "learning_rate": 0.01, "loss": 2.0014, "step": 53169 }, { "epoch": 5.460819554277498, "grad_norm": 0.03482053428888321, "learning_rate": 0.01, "loss": 1.9957, "step": 53172 }, { "epoch": 5.46112765738934, "grad_norm": 0.03225795179605484, "learning_rate": 0.01, "loss": 1.9601, "step": 53175 }, { "epoch": 5.461435760501181, "grad_norm": 0.03760567680001259, "learning_rate": 0.01, "loss": 2.0061, "step": 53178 }, { "epoch": 5.461743863613022, "grad_norm": 0.12543334066867828, "learning_rate": 0.01, "loss": 1.9754, "step": 53181 }, { "epoch": 5.462051966724864, "grad_norm": 0.04533065855503082, "learning_rate": 0.01, "loss": 2.0104, "step": 53184 }, { "epoch": 5.462360069836706, "grad_norm": 0.07519727945327759, "learning_rate": 0.01, "loss": 1.994, "step": 53187 }, { "epoch": 5.462668172948547, "grad_norm": 0.11682818084955215, "learning_rate": 0.01, "loss": 1.9819, "step": 53190 }, { "epoch": 5.462976276060388, "grad_norm": 0.043735865503549576, "learning_rate": 0.01, "loss": 2.0068, "step": 53193 }, { "epoch": 5.463284379172229, "grad_norm": 0.09902381896972656, "learning_rate": 0.01, "loss": 2.0089, "step": 53196 }, { "epoch": 5.463592482284071, "grad_norm": 0.05974416807293892, "learning_rate": 0.01, "loss": 1.9999, "step": 53199 }, { "epoch": 5.463900585395913, "grad_norm": 0.07005894929170609, "learning_rate": 0.01, "loss": 1.9903, "step": 53202 }, { "epoch": 5.464208688507754, "grad_norm": 0.055365189909935, "learning_rate": 0.01, "loss": 1.9798, "step": 53205 }, { "epoch": 5.464516791619595, "grad_norm": 0.07333406060934067, "learning_rate": 0.01, "loss": 1.9972, "step": 53208 }, { "epoch": 5.464824894731437, "grad_norm": 0.03612162545323372, "learning_rate": 0.01, "loss": 1.9946, "step": 53211 }, { "epoch": 5.465132997843278, "grad_norm": 0.036687567830085754, "learning_rate": 0.01, "loss": 1.9966, "step": 53214 }, { "epoch": 5.46544110095512, "grad_norm": 0.0842447429895401, "learning_rate": 0.01, "loss": 2.0063, "step": 53217 }, { "epoch": 5.465749204066961, "grad_norm": 0.06415588408708572, "learning_rate": 0.01, "loss": 1.9863, "step": 53220 }, { "epoch": 5.466057307178803, "grad_norm": 0.0578574612736702, "learning_rate": 0.01, "loss": 2.0036, "step": 53223 }, { "epoch": 5.466365410290644, "grad_norm": 0.04436985403299332, "learning_rate": 0.01, "loss": 1.9813, "step": 53226 }, { "epoch": 5.466673513402485, "grad_norm": 0.1035226359963417, "learning_rate": 0.01, "loss": 1.9639, "step": 53229 }, { "epoch": 5.466981616514326, "grad_norm": 0.04867973551154137, "learning_rate": 0.01, "loss": 2.0105, "step": 53232 }, { "epoch": 5.4672897196261685, "grad_norm": 0.0710483267903328, "learning_rate": 0.01, "loss": 1.9955, "step": 53235 }, { "epoch": 5.46759782273801, "grad_norm": 0.052401233464479446, "learning_rate": 0.01, "loss": 2.0043, "step": 53238 }, { "epoch": 5.467905925849851, "grad_norm": 0.036160893738269806, "learning_rate": 0.01, "loss": 1.9953, "step": 53241 }, { "epoch": 5.468214028961692, "grad_norm": 0.04797692224383354, "learning_rate": 0.01, "loss": 1.9814, "step": 53244 }, { "epoch": 5.4685221320735335, "grad_norm": 0.037899672985076904, "learning_rate": 0.01, "loss": 1.9793, "step": 53247 }, { "epoch": 5.468830235185376, "grad_norm": 0.054781220853328705, "learning_rate": 0.01, "loss": 1.9936, "step": 53250 }, { "epoch": 5.469138338297217, "grad_norm": 0.09595558792352676, "learning_rate": 0.01, "loss": 1.9983, "step": 53253 }, { "epoch": 5.469446441409058, "grad_norm": 0.0511094331741333, "learning_rate": 0.01, "loss": 2.0078, "step": 53256 }, { "epoch": 5.4697545445208995, "grad_norm": 0.05115760117769241, "learning_rate": 0.01, "loss": 1.9845, "step": 53259 }, { "epoch": 5.470062647632741, "grad_norm": 0.05593256279826164, "learning_rate": 0.01, "loss": 2.019, "step": 53262 }, { "epoch": 5.470370750744583, "grad_norm": 0.048911042511463165, "learning_rate": 0.01, "loss": 1.9953, "step": 53265 }, { "epoch": 5.470678853856424, "grad_norm": 0.05227271467447281, "learning_rate": 0.01, "loss": 2.0013, "step": 53268 }, { "epoch": 5.470986956968265, "grad_norm": 0.11259990185499191, "learning_rate": 0.01, "loss": 1.987, "step": 53271 }, { "epoch": 5.471295060080107, "grad_norm": 0.04125455394387245, "learning_rate": 0.01, "loss": 2.0069, "step": 53274 }, { "epoch": 5.471603163191948, "grad_norm": 0.11485335975885391, "learning_rate": 0.01, "loss": 1.9832, "step": 53277 }, { "epoch": 5.47191126630379, "grad_norm": 0.07149787247180939, "learning_rate": 0.01, "loss": 1.9752, "step": 53280 }, { "epoch": 5.472219369415631, "grad_norm": 0.05775618925690651, "learning_rate": 0.01, "loss": 2.0204, "step": 53283 }, { "epoch": 5.472527472527473, "grad_norm": 0.04515690356492996, "learning_rate": 0.01, "loss": 1.99, "step": 53286 }, { "epoch": 5.472835575639314, "grad_norm": 0.0433974415063858, "learning_rate": 0.01, "loss": 1.9746, "step": 53289 }, { "epoch": 5.473143678751155, "grad_norm": 0.03472794219851494, "learning_rate": 0.01, "loss": 2.0019, "step": 53292 }, { "epoch": 5.473451781862996, "grad_norm": 0.04648016393184662, "learning_rate": 0.01, "loss": 1.9567, "step": 53295 }, { "epoch": 5.473759884974839, "grad_norm": 0.048154812306165695, "learning_rate": 0.01, "loss": 1.988, "step": 53298 }, { "epoch": 5.47406798808668, "grad_norm": 0.040329910814762115, "learning_rate": 0.01, "loss": 1.9847, "step": 53301 }, { "epoch": 5.474376091198521, "grad_norm": 0.10805533081293106, "learning_rate": 0.01, "loss": 1.9859, "step": 53304 }, { "epoch": 5.474684194310362, "grad_norm": 0.04139288142323494, "learning_rate": 0.01, "loss": 1.9787, "step": 53307 }, { "epoch": 5.474992297422204, "grad_norm": 0.08323890715837479, "learning_rate": 0.01, "loss": 1.9751, "step": 53310 }, { "epoch": 5.475300400534046, "grad_norm": 0.07098772376775742, "learning_rate": 0.01, "loss": 2.001, "step": 53313 }, { "epoch": 5.475608503645887, "grad_norm": 0.09589933604001999, "learning_rate": 0.01, "loss": 2.0163, "step": 53316 }, { "epoch": 5.475916606757728, "grad_norm": 0.09487750381231308, "learning_rate": 0.01, "loss": 1.9941, "step": 53319 }, { "epoch": 5.4762247098695696, "grad_norm": 0.04062903672456741, "learning_rate": 0.01, "loss": 1.9845, "step": 53322 }, { "epoch": 5.476532812981411, "grad_norm": 0.036831121891736984, "learning_rate": 0.01, "loss": 1.9833, "step": 53325 }, { "epoch": 5.476840916093252, "grad_norm": 0.05887876823544502, "learning_rate": 0.01, "loss": 1.9974, "step": 53328 }, { "epoch": 5.477149019205094, "grad_norm": 0.07680708169937134, "learning_rate": 0.01, "loss": 1.9925, "step": 53331 }, { "epoch": 5.4774571223169355, "grad_norm": 0.06624645739793777, "learning_rate": 0.01, "loss": 2.0242, "step": 53334 }, { "epoch": 5.477765225428777, "grad_norm": 0.051530107855796814, "learning_rate": 0.01, "loss": 1.9942, "step": 53337 }, { "epoch": 5.478073328540618, "grad_norm": 0.046162448823451996, "learning_rate": 0.01, "loss": 1.9859, "step": 53340 }, { "epoch": 5.478381431652459, "grad_norm": 0.038336243480443954, "learning_rate": 0.01, "loss": 2.0043, "step": 53343 }, { "epoch": 5.478689534764301, "grad_norm": 0.03402607515454292, "learning_rate": 0.01, "loss": 1.9678, "step": 53346 }, { "epoch": 5.478997637876143, "grad_norm": 0.10580262541770935, "learning_rate": 0.01, "loss": 1.9766, "step": 53349 }, { "epoch": 5.479305740987984, "grad_norm": 0.13053545355796814, "learning_rate": 0.01, "loss": 1.9978, "step": 53352 }, { "epoch": 5.479613844099825, "grad_norm": 0.050016142427921295, "learning_rate": 0.01, "loss": 1.9924, "step": 53355 }, { "epoch": 5.4799219472116665, "grad_norm": 0.03820059821009636, "learning_rate": 0.01, "loss": 1.9905, "step": 53358 }, { "epoch": 5.480230050323509, "grad_norm": 0.04353965446352959, "learning_rate": 0.01, "loss": 2.0286, "step": 53361 }, { "epoch": 5.48053815343535, "grad_norm": 0.04177290201187134, "learning_rate": 0.01, "loss": 2.0074, "step": 53364 }, { "epoch": 5.480846256547191, "grad_norm": 0.03511528670787811, "learning_rate": 0.01, "loss": 1.9861, "step": 53367 }, { "epoch": 5.481154359659032, "grad_norm": 0.10579612106084824, "learning_rate": 0.01, "loss": 1.9883, "step": 53370 }, { "epoch": 5.481462462770874, "grad_norm": 0.08428878337144852, "learning_rate": 0.01, "loss": 2.0179, "step": 53373 }, { "epoch": 5.481770565882716, "grad_norm": 0.045665886253118515, "learning_rate": 0.01, "loss": 1.9834, "step": 53376 }, { "epoch": 5.482078668994557, "grad_norm": 0.10956547409296036, "learning_rate": 0.01, "loss": 1.9911, "step": 53379 }, { "epoch": 5.482386772106398, "grad_norm": 0.07283762842416763, "learning_rate": 0.01, "loss": 2.0061, "step": 53382 }, { "epoch": 5.48269487521824, "grad_norm": 0.11426263302564621, "learning_rate": 0.01, "loss": 2.0013, "step": 53385 }, { "epoch": 5.483002978330081, "grad_norm": 0.06644517928361893, "learning_rate": 0.01, "loss": 1.9973, "step": 53388 }, { "epoch": 5.483311081441922, "grad_norm": 0.06233721226453781, "learning_rate": 0.01, "loss": 2.0111, "step": 53391 }, { "epoch": 5.483619184553764, "grad_norm": 0.06707890331745148, "learning_rate": 0.01, "loss": 2.0027, "step": 53394 }, { "epoch": 5.4839272876656056, "grad_norm": 0.06242217868566513, "learning_rate": 0.01, "loss": 1.9823, "step": 53397 }, { "epoch": 5.484235390777447, "grad_norm": 0.03913586214184761, "learning_rate": 0.01, "loss": 1.9915, "step": 53400 }, { "epoch": 5.484543493889288, "grad_norm": 0.10017285495996475, "learning_rate": 0.01, "loss": 1.9768, "step": 53403 }, { "epoch": 5.484851597001129, "grad_norm": 0.07849404960870743, "learning_rate": 0.01, "loss": 2.017, "step": 53406 }, { "epoch": 5.4851597001129715, "grad_norm": 0.06911784410476685, "learning_rate": 0.01, "loss": 2.0207, "step": 53409 }, { "epoch": 5.485467803224813, "grad_norm": 0.07752948254346848, "learning_rate": 0.01, "loss": 2.0029, "step": 53412 }, { "epoch": 5.485775906336654, "grad_norm": 0.0855301171541214, "learning_rate": 0.01, "loss": 1.9779, "step": 53415 }, { "epoch": 5.486084009448495, "grad_norm": 0.039153728634119034, "learning_rate": 0.01, "loss": 1.9984, "step": 53418 }, { "epoch": 5.4863921125603365, "grad_norm": 0.09871362149715424, "learning_rate": 0.01, "loss": 1.9849, "step": 53421 }, { "epoch": 5.486700215672179, "grad_norm": 0.05920962989330292, "learning_rate": 0.01, "loss": 1.9729, "step": 53424 }, { "epoch": 5.48700831878402, "grad_norm": 0.04103608801960945, "learning_rate": 0.01, "loss": 2.0363, "step": 53427 }, { "epoch": 5.487316421895861, "grad_norm": 0.08447948098182678, "learning_rate": 0.01, "loss": 1.9856, "step": 53430 }, { "epoch": 5.4876245250077025, "grad_norm": 0.055456507951021194, "learning_rate": 0.01, "loss": 1.9746, "step": 53433 }, { "epoch": 5.487932628119544, "grad_norm": 0.05828576907515526, "learning_rate": 0.01, "loss": 1.983, "step": 53436 }, { "epoch": 5.488240731231386, "grad_norm": 0.09117809683084488, "learning_rate": 0.01, "loss": 2.0269, "step": 53439 }, { "epoch": 5.488548834343227, "grad_norm": 0.08137334883213043, "learning_rate": 0.01, "loss": 1.9983, "step": 53442 }, { "epoch": 5.488856937455068, "grad_norm": 0.08830280601978302, "learning_rate": 0.01, "loss": 2.0016, "step": 53445 }, { "epoch": 5.48916504056691, "grad_norm": 0.04386366158723831, "learning_rate": 0.01, "loss": 1.9987, "step": 53448 }, { "epoch": 5.489473143678751, "grad_norm": 0.0500885471701622, "learning_rate": 0.01, "loss": 1.997, "step": 53451 }, { "epoch": 5.489781246790592, "grad_norm": 0.044326718896627426, "learning_rate": 0.01, "loss": 1.9894, "step": 53454 }, { "epoch": 5.490089349902434, "grad_norm": 0.1106194257736206, "learning_rate": 0.01, "loss": 1.9871, "step": 53457 }, { "epoch": 5.490397453014276, "grad_norm": 0.05222434550523758, "learning_rate": 0.01, "loss": 1.9999, "step": 53460 }, { "epoch": 5.490705556126117, "grad_norm": 0.08016736060380936, "learning_rate": 0.01, "loss": 1.9846, "step": 53463 }, { "epoch": 5.491013659237958, "grad_norm": 0.06921573728322983, "learning_rate": 0.01, "loss": 2.0048, "step": 53466 }, { "epoch": 5.491321762349799, "grad_norm": 0.10140072554349899, "learning_rate": 0.01, "loss": 1.9885, "step": 53469 }, { "epoch": 5.491629865461642, "grad_norm": 0.05930350720882416, "learning_rate": 0.01, "loss": 2.0103, "step": 53472 }, { "epoch": 5.491937968573483, "grad_norm": 0.054523758590221405, "learning_rate": 0.01, "loss": 1.9926, "step": 53475 }, { "epoch": 5.492246071685324, "grad_norm": 0.06033066287636757, "learning_rate": 0.01, "loss": 2.0146, "step": 53478 }, { "epoch": 5.492554174797165, "grad_norm": 0.11593367904424667, "learning_rate": 0.01, "loss": 1.9988, "step": 53481 }, { "epoch": 5.492862277909007, "grad_norm": 0.054470766335725784, "learning_rate": 0.01, "loss": 2.0093, "step": 53484 }, { "epoch": 5.493170381020848, "grad_norm": 0.05710428208112717, "learning_rate": 0.01, "loss": 1.9988, "step": 53487 }, { "epoch": 5.49347848413269, "grad_norm": 0.08135402947664261, "learning_rate": 0.01, "loss": 1.9895, "step": 53490 }, { "epoch": 5.493786587244531, "grad_norm": 0.13751734793186188, "learning_rate": 0.01, "loss": 2.0027, "step": 53493 }, { "epoch": 5.4940946903563725, "grad_norm": 0.07379119843244553, "learning_rate": 0.01, "loss": 1.9757, "step": 53496 }, { "epoch": 5.494402793468214, "grad_norm": 0.0595991387963295, "learning_rate": 0.01, "loss": 1.9961, "step": 53499 }, { "epoch": 5.494710896580055, "grad_norm": 0.05081510916352272, "learning_rate": 0.01, "loss": 1.9804, "step": 53502 }, { "epoch": 5.495018999691897, "grad_norm": 0.040366947650909424, "learning_rate": 0.01, "loss": 1.9703, "step": 53505 }, { "epoch": 5.4953271028037385, "grad_norm": 0.10154601186513901, "learning_rate": 0.01, "loss": 1.9979, "step": 53508 }, { "epoch": 5.49563520591558, "grad_norm": 0.0562141053378582, "learning_rate": 0.01, "loss": 2.0076, "step": 53511 }, { "epoch": 5.495943309027421, "grad_norm": 0.07368075847625732, "learning_rate": 0.01, "loss": 2.0106, "step": 53514 }, { "epoch": 5.496251412139262, "grad_norm": 0.04702366143465042, "learning_rate": 0.01, "loss": 1.9734, "step": 53517 }, { "epoch": 5.496559515251104, "grad_norm": 0.05341365188360214, "learning_rate": 0.01, "loss": 1.9943, "step": 53520 }, { "epoch": 5.496867618362946, "grad_norm": 0.10820145905017853, "learning_rate": 0.01, "loss": 2.0101, "step": 53523 }, { "epoch": 5.497175721474787, "grad_norm": 0.053429488092660904, "learning_rate": 0.01, "loss": 1.9723, "step": 53526 }, { "epoch": 5.497483824586628, "grad_norm": 0.1272108554840088, "learning_rate": 0.01, "loss": 1.9985, "step": 53529 }, { "epoch": 5.4977919276984695, "grad_norm": 0.043057285249233246, "learning_rate": 0.01, "loss": 1.9961, "step": 53532 }, { "epoch": 5.498100030810312, "grad_norm": 0.055451132357120514, "learning_rate": 0.01, "loss": 1.9723, "step": 53535 }, { "epoch": 5.498408133922153, "grad_norm": 0.11000920832157135, "learning_rate": 0.01, "loss": 2.0072, "step": 53538 }, { "epoch": 5.498716237033994, "grad_norm": 0.043610602617263794, "learning_rate": 0.01, "loss": 1.9832, "step": 53541 }, { "epoch": 5.499024340145835, "grad_norm": 0.12951324880123138, "learning_rate": 0.01, "loss": 1.9833, "step": 53544 }, { "epoch": 5.499332443257677, "grad_norm": 0.1155344694852829, "learning_rate": 0.01, "loss": 2.0095, "step": 53547 }, { "epoch": 5.499640546369518, "grad_norm": 0.04202111065387726, "learning_rate": 0.01, "loss": 1.9721, "step": 53550 }, { "epoch": 5.49994864948136, "grad_norm": 0.05797869712114334, "learning_rate": 0.01, "loss": 1.9742, "step": 53553 }, { "epoch": 5.500256752593201, "grad_norm": 0.055514171719551086, "learning_rate": 0.01, "loss": 2.002, "step": 53556 }, { "epoch": 5.500564855705043, "grad_norm": 0.032696306705474854, "learning_rate": 0.01, "loss": 1.9675, "step": 53559 }, { "epoch": 5.500872958816884, "grad_norm": 0.07791826128959656, "learning_rate": 0.01, "loss": 2.0007, "step": 53562 }, { "epoch": 5.501181061928725, "grad_norm": 0.12042553722858429, "learning_rate": 0.01, "loss": 1.9967, "step": 53565 }, { "epoch": 5.501489165040567, "grad_norm": 0.08734118938446045, "learning_rate": 0.01, "loss": 1.9866, "step": 53568 }, { "epoch": 5.5017972681524085, "grad_norm": 0.1427556276321411, "learning_rate": 0.01, "loss": 1.9747, "step": 53571 }, { "epoch": 5.50210537126425, "grad_norm": 0.05691046267747879, "learning_rate": 0.01, "loss": 1.9907, "step": 53574 }, { "epoch": 5.502413474376091, "grad_norm": 0.05428704619407654, "learning_rate": 0.01, "loss": 1.9803, "step": 53577 }, { "epoch": 5.502721577487932, "grad_norm": 0.07390681654214859, "learning_rate": 0.01, "loss": 1.9959, "step": 53580 }, { "epoch": 5.503029680599774, "grad_norm": 0.04650009423494339, "learning_rate": 0.01, "loss": 2.0132, "step": 53583 }, { "epoch": 5.503337783711616, "grad_norm": 0.06623617559671402, "learning_rate": 0.01, "loss": 2.0145, "step": 53586 }, { "epoch": 5.503645886823457, "grad_norm": 0.08254354447126389, "learning_rate": 0.01, "loss": 2.0073, "step": 53589 }, { "epoch": 5.503953989935298, "grad_norm": 0.07407466322183609, "learning_rate": 0.01, "loss": 2.0043, "step": 53592 }, { "epoch": 5.5042620930471395, "grad_norm": 0.05553009733557701, "learning_rate": 0.01, "loss": 1.9923, "step": 53595 }, { "epoch": 5.504570196158982, "grad_norm": 0.068050317466259, "learning_rate": 0.01, "loss": 1.9975, "step": 53598 }, { "epoch": 5.504878299270823, "grad_norm": 0.036538973450660706, "learning_rate": 0.01, "loss": 2.0111, "step": 53601 }, { "epoch": 5.505186402382664, "grad_norm": 0.04083050787448883, "learning_rate": 0.01, "loss": 1.9942, "step": 53604 }, { "epoch": 5.5054945054945055, "grad_norm": 0.03994308039546013, "learning_rate": 0.01, "loss": 1.9752, "step": 53607 }, { "epoch": 5.505802608606347, "grad_norm": 0.039935458451509476, "learning_rate": 0.01, "loss": 2.0083, "step": 53610 }, { "epoch": 5.506110711718188, "grad_norm": 0.09762649983167648, "learning_rate": 0.01, "loss": 1.9997, "step": 53613 }, { "epoch": 5.50641881483003, "grad_norm": 0.058254171162843704, "learning_rate": 0.01, "loss": 1.9831, "step": 53616 }, { "epoch": 5.506726917941871, "grad_norm": 0.11991842836141586, "learning_rate": 0.01, "loss": 2.0198, "step": 53619 }, { "epoch": 5.507035021053713, "grad_norm": 0.04026370868086815, "learning_rate": 0.01, "loss": 1.9698, "step": 53622 }, { "epoch": 5.507343124165554, "grad_norm": 0.03855542093515396, "learning_rate": 0.01, "loss": 2.0004, "step": 53625 }, { "epoch": 5.507651227277395, "grad_norm": 0.04439062252640724, "learning_rate": 0.01, "loss": 1.9776, "step": 53628 }, { "epoch": 5.507959330389237, "grad_norm": 0.09283201396465302, "learning_rate": 0.01, "loss": 1.9897, "step": 53631 }, { "epoch": 5.508267433501079, "grad_norm": 0.05495656281709671, "learning_rate": 0.01, "loss": 1.9517, "step": 53634 }, { "epoch": 5.50857553661292, "grad_norm": 0.08464661240577698, "learning_rate": 0.01, "loss": 1.9946, "step": 53637 }, { "epoch": 5.508883639724761, "grad_norm": 0.06856382638216019, "learning_rate": 0.01, "loss": 1.9549, "step": 53640 }, { "epoch": 5.509191742836602, "grad_norm": 0.10367941111326218, "learning_rate": 0.01, "loss": 2.0157, "step": 53643 }, { "epoch": 5.509499845948444, "grad_norm": 0.07614094018936157, "learning_rate": 0.01, "loss": 1.9837, "step": 53646 }, { "epoch": 5.509807949060286, "grad_norm": 0.07301559299230576, "learning_rate": 0.01, "loss": 1.9778, "step": 53649 }, { "epoch": 5.510116052172127, "grad_norm": 0.03928094357252121, "learning_rate": 0.01, "loss": 1.9903, "step": 53652 }, { "epoch": 5.510424155283968, "grad_norm": 0.04542843624949455, "learning_rate": 0.01, "loss": 2.0183, "step": 53655 }, { "epoch": 5.51073225839581, "grad_norm": 0.08662543445825577, "learning_rate": 0.01, "loss": 1.9868, "step": 53658 }, { "epoch": 5.511040361507651, "grad_norm": 0.045004189014434814, "learning_rate": 0.01, "loss": 2.0143, "step": 53661 }, { "epoch": 5.511348464619493, "grad_norm": 0.03661379590630531, "learning_rate": 0.01, "loss": 1.9887, "step": 53664 }, { "epoch": 5.511656567731334, "grad_norm": 0.05117429420351982, "learning_rate": 0.01, "loss": 1.9681, "step": 53667 }, { "epoch": 5.5119646708431755, "grad_norm": 0.06222992762923241, "learning_rate": 0.01, "loss": 1.9904, "step": 53670 }, { "epoch": 5.512272773955017, "grad_norm": 0.04274081811308861, "learning_rate": 0.01, "loss": 1.977, "step": 53673 }, { "epoch": 5.512580877066858, "grad_norm": 0.09774081408977509, "learning_rate": 0.01, "loss": 1.9789, "step": 53676 }, { "epoch": 5.512888980178699, "grad_norm": 0.15694968402385712, "learning_rate": 0.01, "loss": 1.9814, "step": 53679 }, { "epoch": 5.5131970832905415, "grad_norm": 0.06906448304653168, "learning_rate": 0.01, "loss": 1.9997, "step": 53682 }, { "epoch": 5.513505186402383, "grad_norm": 0.04059962183237076, "learning_rate": 0.01, "loss": 1.9894, "step": 53685 }, { "epoch": 5.513813289514224, "grad_norm": 0.03401316702365875, "learning_rate": 0.01, "loss": 1.9862, "step": 53688 }, { "epoch": 5.514121392626065, "grad_norm": 0.03932545334100723, "learning_rate": 0.01, "loss": 1.9991, "step": 53691 }, { "epoch": 5.514429495737907, "grad_norm": 0.04314441978931427, "learning_rate": 0.01, "loss": 1.9895, "step": 53694 }, { "epoch": 5.514737598849749, "grad_norm": 0.04282282292842865, "learning_rate": 0.01, "loss": 2.0311, "step": 53697 }, { "epoch": 5.51504570196159, "grad_norm": 0.07264575362205505, "learning_rate": 0.01, "loss": 1.9968, "step": 53700 }, { "epoch": 5.515353805073431, "grad_norm": 0.031450774520635605, "learning_rate": 0.01, "loss": 2.0088, "step": 53703 }, { "epoch": 5.5156619081852725, "grad_norm": 0.04431367665529251, "learning_rate": 0.01, "loss": 2.0218, "step": 53706 }, { "epoch": 5.515970011297114, "grad_norm": 0.059152550995349884, "learning_rate": 0.01, "loss": 1.9776, "step": 53709 }, { "epoch": 5.516278114408956, "grad_norm": 0.05627863109111786, "learning_rate": 0.01, "loss": 2.0023, "step": 53712 }, { "epoch": 5.516586217520797, "grad_norm": 0.12007083743810654, "learning_rate": 0.01, "loss": 2.003, "step": 53715 }, { "epoch": 5.516894320632638, "grad_norm": 0.04498304799199104, "learning_rate": 0.01, "loss": 2.0171, "step": 53718 }, { "epoch": 5.51720242374448, "grad_norm": 0.05469832196831703, "learning_rate": 0.01, "loss": 2.0195, "step": 53721 }, { "epoch": 5.517510526856321, "grad_norm": 0.12083622813224792, "learning_rate": 0.01, "loss": 1.9909, "step": 53724 }, { "epoch": 5.517818629968163, "grad_norm": 0.05532558634877205, "learning_rate": 0.01, "loss": 2.0263, "step": 53727 }, { "epoch": 5.518126733080004, "grad_norm": 0.0655357614159584, "learning_rate": 0.01, "loss": 2.0141, "step": 53730 }, { "epoch": 5.518434836191846, "grad_norm": 0.03844601288437843, "learning_rate": 0.01, "loss": 2.0126, "step": 53733 }, { "epoch": 5.518742939303687, "grad_norm": 0.08736705034971237, "learning_rate": 0.01, "loss": 1.9982, "step": 53736 }, { "epoch": 5.519051042415528, "grad_norm": 0.03798610344529152, "learning_rate": 0.01, "loss": 2.0017, "step": 53739 }, { "epoch": 5.519359145527369, "grad_norm": 0.08266885578632355, "learning_rate": 0.01, "loss": 1.9744, "step": 53742 }, { "epoch": 5.5196672486392115, "grad_norm": 0.12414561212062836, "learning_rate": 0.01, "loss": 1.9877, "step": 53745 }, { "epoch": 5.519975351751053, "grad_norm": 0.07735760509967804, "learning_rate": 0.01, "loss": 1.9847, "step": 53748 }, { "epoch": 5.520283454862894, "grad_norm": 0.07708865404129028, "learning_rate": 0.01, "loss": 1.9994, "step": 53751 }, { "epoch": 5.520591557974735, "grad_norm": 0.08443256467580795, "learning_rate": 0.01, "loss": 1.9776, "step": 53754 }, { "epoch": 5.520899661086577, "grad_norm": 0.061041250824928284, "learning_rate": 0.01, "loss": 2.0072, "step": 53757 }, { "epoch": 5.521207764198419, "grad_norm": 0.06206076219677925, "learning_rate": 0.01, "loss": 1.9791, "step": 53760 }, { "epoch": 5.52151586731026, "grad_norm": 0.07099471986293793, "learning_rate": 0.01, "loss": 1.9812, "step": 53763 }, { "epoch": 5.521823970422101, "grad_norm": 0.08134863525629044, "learning_rate": 0.01, "loss": 1.9825, "step": 53766 }, { "epoch": 5.5221320735339425, "grad_norm": 0.08037443459033966, "learning_rate": 0.01, "loss": 2.0215, "step": 53769 }, { "epoch": 5.522440176645784, "grad_norm": 0.06751944124698639, "learning_rate": 0.01, "loss": 2.0003, "step": 53772 }, { "epoch": 5.522748279757625, "grad_norm": 0.04069173336029053, "learning_rate": 0.01, "loss": 1.9942, "step": 53775 }, { "epoch": 5.523056382869467, "grad_norm": 0.04320038482546806, "learning_rate": 0.01, "loss": 2.016, "step": 53778 }, { "epoch": 5.5233644859813085, "grad_norm": 0.11609040200710297, "learning_rate": 0.01, "loss": 1.9983, "step": 53781 }, { "epoch": 5.52367258909315, "grad_norm": 0.050336502492427826, "learning_rate": 0.01, "loss": 1.9971, "step": 53784 }, { "epoch": 5.523980692204991, "grad_norm": 0.039348434656858444, "learning_rate": 0.01, "loss": 1.9847, "step": 53787 }, { "epoch": 5.524288795316833, "grad_norm": 0.08109190315008163, "learning_rate": 0.01, "loss": 1.961, "step": 53790 }, { "epoch": 5.524596898428674, "grad_norm": 0.07181521505117416, "learning_rate": 0.01, "loss": 1.9684, "step": 53793 }, { "epoch": 5.524905001540516, "grad_norm": 0.04678622633218765, "learning_rate": 0.01, "loss": 1.967, "step": 53796 }, { "epoch": 5.525213104652357, "grad_norm": 0.079826720058918, "learning_rate": 0.01, "loss": 2.0056, "step": 53799 }, { "epoch": 5.525521207764198, "grad_norm": 0.08414063602685928, "learning_rate": 0.01, "loss": 1.9946, "step": 53802 }, { "epoch": 5.5258293108760395, "grad_norm": 0.04829956591129303, "learning_rate": 0.01, "loss": 1.9831, "step": 53805 }, { "epoch": 5.526137413987882, "grad_norm": 0.0862230584025383, "learning_rate": 0.01, "loss": 1.9738, "step": 53808 }, { "epoch": 5.526445517099723, "grad_norm": 0.06035618856549263, "learning_rate": 0.01, "loss": 2.0033, "step": 53811 }, { "epoch": 5.526753620211564, "grad_norm": 0.04363265261054039, "learning_rate": 0.01, "loss": 1.9914, "step": 53814 }, { "epoch": 5.527061723323405, "grad_norm": 0.06226656585931778, "learning_rate": 0.01, "loss": 1.9977, "step": 53817 }, { "epoch": 5.527369826435247, "grad_norm": 0.10821603238582611, "learning_rate": 0.01, "loss": 2.0045, "step": 53820 }, { "epoch": 5.527677929547089, "grad_norm": 0.06734979152679443, "learning_rate": 0.01, "loss": 2.0031, "step": 53823 }, { "epoch": 5.52798603265893, "grad_norm": 0.07929663360118866, "learning_rate": 0.01, "loss": 1.9829, "step": 53826 }, { "epoch": 5.528294135770771, "grad_norm": 0.11255302280187607, "learning_rate": 0.01, "loss": 1.99, "step": 53829 }, { "epoch": 5.528602238882613, "grad_norm": 0.06797608733177185, "learning_rate": 0.01, "loss": 1.9903, "step": 53832 }, { "epoch": 5.528910341994454, "grad_norm": 0.07606469094753265, "learning_rate": 0.01, "loss": 1.9926, "step": 53835 }, { "epoch": 5.529218445106295, "grad_norm": 0.04698120057582855, "learning_rate": 0.01, "loss": 1.9661, "step": 53838 }, { "epoch": 5.529526548218137, "grad_norm": 0.07087966799736023, "learning_rate": 0.01, "loss": 2.0004, "step": 53841 }, { "epoch": 5.5298346513299785, "grad_norm": 0.048692237585783005, "learning_rate": 0.01, "loss": 1.9858, "step": 53844 }, { "epoch": 5.53014275444182, "grad_norm": 0.04359065368771553, "learning_rate": 0.01, "loss": 1.9956, "step": 53847 }, { "epoch": 5.530450857553661, "grad_norm": 0.05424369126558304, "learning_rate": 0.01, "loss": 2.0108, "step": 53850 }, { "epoch": 5.530758960665503, "grad_norm": 0.13464900851249695, "learning_rate": 0.01, "loss": 1.9907, "step": 53853 }, { "epoch": 5.5310670637773445, "grad_norm": 0.038927361369132996, "learning_rate": 0.01, "loss": 1.9933, "step": 53856 }, { "epoch": 5.531375166889186, "grad_norm": 0.040312107652425766, "learning_rate": 0.01, "loss": 1.9988, "step": 53859 }, { "epoch": 5.531683270001027, "grad_norm": 0.05727381631731987, "learning_rate": 0.01, "loss": 1.9879, "step": 53862 }, { "epoch": 5.531991373112868, "grad_norm": 0.06750021874904633, "learning_rate": 0.01, "loss": 1.9842, "step": 53865 }, { "epoch": 5.5322994762247095, "grad_norm": 0.08220100402832031, "learning_rate": 0.01, "loss": 1.9826, "step": 53868 }, { "epoch": 5.532607579336552, "grad_norm": 0.05215999484062195, "learning_rate": 0.01, "loss": 2.026, "step": 53871 }, { "epoch": 5.532915682448393, "grad_norm": 0.06510480493307114, "learning_rate": 0.01, "loss": 2.0101, "step": 53874 }, { "epoch": 5.533223785560234, "grad_norm": 0.04641730710864067, "learning_rate": 0.01, "loss": 2.0139, "step": 53877 }, { "epoch": 5.5335318886720755, "grad_norm": 0.04414188116788864, "learning_rate": 0.01, "loss": 2.0314, "step": 53880 }, { "epoch": 5.533839991783917, "grad_norm": 0.041879378259181976, "learning_rate": 0.01, "loss": 1.9848, "step": 53883 }, { "epoch": 5.534148094895759, "grad_norm": 0.039413318037986755, "learning_rate": 0.01, "loss": 1.9972, "step": 53886 }, { "epoch": 5.5344561980076, "grad_norm": 0.0784526839852333, "learning_rate": 0.01, "loss": 2.0144, "step": 53889 }, { "epoch": 5.534764301119441, "grad_norm": 0.1049879789352417, "learning_rate": 0.01, "loss": 2.0088, "step": 53892 }, { "epoch": 5.535072404231283, "grad_norm": 0.15476803481578827, "learning_rate": 0.01, "loss": 2.017, "step": 53895 }, { "epoch": 5.535380507343124, "grad_norm": 0.1224508136510849, "learning_rate": 0.01, "loss": 1.9856, "step": 53898 }, { "epoch": 5.535688610454965, "grad_norm": 0.060283955186605453, "learning_rate": 0.01, "loss": 1.9801, "step": 53901 }, { "epoch": 5.535996713566807, "grad_norm": 0.05532943457365036, "learning_rate": 0.01, "loss": 1.9924, "step": 53904 }, { "epoch": 5.536304816678649, "grad_norm": 0.04745487496256828, "learning_rate": 0.01, "loss": 2.0121, "step": 53907 }, { "epoch": 5.53661291979049, "grad_norm": 0.05770549178123474, "learning_rate": 0.01, "loss": 1.9754, "step": 53910 }, { "epoch": 5.536921022902331, "grad_norm": 0.05675048008561134, "learning_rate": 0.01, "loss": 2.004, "step": 53913 }, { "epoch": 5.537229126014172, "grad_norm": 0.05538811534643173, "learning_rate": 0.01, "loss": 1.9991, "step": 53916 }, { "epoch": 5.5375372291260145, "grad_norm": 0.05474154278635979, "learning_rate": 0.01, "loss": 2.0032, "step": 53919 }, { "epoch": 5.537845332237856, "grad_norm": 0.1373576670885086, "learning_rate": 0.01, "loss": 1.9917, "step": 53922 }, { "epoch": 5.538153435349697, "grad_norm": 0.04938695207238197, "learning_rate": 0.01, "loss": 2.0097, "step": 53925 }, { "epoch": 5.538461538461538, "grad_norm": 0.08616656810045242, "learning_rate": 0.01, "loss": 1.9829, "step": 53928 }, { "epoch": 5.53876964157338, "grad_norm": 0.0794534832239151, "learning_rate": 0.01, "loss": 2.0125, "step": 53931 }, { "epoch": 5.539077744685221, "grad_norm": 0.07034434378147125, "learning_rate": 0.01, "loss": 2.0093, "step": 53934 }, { "epoch": 5.539385847797063, "grad_norm": 0.09252175688743591, "learning_rate": 0.01, "loss": 1.9782, "step": 53937 }, { "epoch": 5.539693950908904, "grad_norm": 0.05770866200327873, "learning_rate": 0.01, "loss": 2.0054, "step": 53940 }, { "epoch": 5.5400020540207455, "grad_norm": 0.06952711939811707, "learning_rate": 0.01, "loss": 2.0007, "step": 53943 }, { "epoch": 5.540310157132587, "grad_norm": 0.06332490593194962, "learning_rate": 0.01, "loss": 1.972, "step": 53946 }, { "epoch": 5.540618260244429, "grad_norm": 0.03297451511025429, "learning_rate": 0.01, "loss": 1.9877, "step": 53949 }, { "epoch": 5.54092636335627, "grad_norm": 0.03710363432765007, "learning_rate": 0.01, "loss": 1.9966, "step": 53952 }, { "epoch": 5.5412344664681115, "grad_norm": 0.05471167713403702, "learning_rate": 0.01, "loss": 1.9889, "step": 53955 }, { "epoch": 5.541542569579953, "grad_norm": 0.0861392468214035, "learning_rate": 0.01, "loss": 1.9909, "step": 53958 }, { "epoch": 5.541850672691794, "grad_norm": 0.08588635921478271, "learning_rate": 0.01, "loss": 2.003, "step": 53961 }, { "epoch": 5.542158775803635, "grad_norm": 0.09388336539268494, "learning_rate": 0.01, "loss": 1.9866, "step": 53964 }, { "epoch": 5.542466878915477, "grad_norm": 0.1397831290960312, "learning_rate": 0.01, "loss": 1.9989, "step": 53967 }, { "epoch": 5.542774982027319, "grad_norm": 0.10498721152544022, "learning_rate": 0.01, "loss": 2.0093, "step": 53970 }, { "epoch": 5.54308308513916, "grad_norm": 0.0574629008769989, "learning_rate": 0.01, "loss": 2.02, "step": 53973 }, { "epoch": 5.543391188251001, "grad_norm": 0.03409456089138985, "learning_rate": 0.01, "loss": 1.9942, "step": 53976 }, { "epoch": 5.5436992913628425, "grad_norm": 0.03830345720052719, "learning_rate": 0.01, "loss": 1.9648, "step": 53979 }, { "epoch": 5.544007394474685, "grad_norm": 0.0545862540602684, "learning_rate": 0.01, "loss": 1.9916, "step": 53982 }, { "epoch": 5.544315497586526, "grad_norm": 0.047009505331516266, "learning_rate": 0.01, "loss": 1.9757, "step": 53985 }, { "epoch": 5.544623600698367, "grad_norm": 0.0546221099793911, "learning_rate": 0.01, "loss": 1.9836, "step": 53988 }, { "epoch": 5.544931703810208, "grad_norm": 0.05153699591755867, "learning_rate": 0.01, "loss": 1.9832, "step": 53991 }, { "epoch": 5.54523980692205, "grad_norm": 0.1104668453335762, "learning_rate": 0.01, "loss": 1.9944, "step": 53994 }, { "epoch": 5.545547910033891, "grad_norm": 0.09211790561676025, "learning_rate": 0.01, "loss": 1.978, "step": 53997 }, { "epoch": 5.545856013145733, "grad_norm": 0.03746125102043152, "learning_rate": 0.01, "loss": 1.9822, "step": 54000 }, { "epoch": 5.546164116257574, "grad_norm": 0.05226179584860802, "learning_rate": 0.01, "loss": 1.981, "step": 54003 }, { "epoch": 5.546472219369416, "grad_norm": 0.08278732746839523, "learning_rate": 0.01, "loss": 1.9914, "step": 54006 }, { "epoch": 5.546780322481257, "grad_norm": 0.08003208786249161, "learning_rate": 0.01, "loss": 2.0169, "step": 54009 }, { "epoch": 5.547088425593098, "grad_norm": 0.09529578685760498, "learning_rate": 0.01, "loss": 1.98, "step": 54012 }, { "epoch": 5.54739652870494, "grad_norm": 0.08121462911367416, "learning_rate": 0.01, "loss": 2.0033, "step": 54015 }, { "epoch": 5.5477046318167815, "grad_norm": 0.07625698298215866, "learning_rate": 0.01, "loss": 1.9979, "step": 54018 }, { "epoch": 5.548012734928623, "grad_norm": 0.11102813482284546, "learning_rate": 0.01, "loss": 2.0028, "step": 54021 }, { "epoch": 5.548320838040464, "grad_norm": 0.05425316095352173, "learning_rate": 0.01, "loss": 1.9911, "step": 54024 }, { "epoch": 5.548628941152305, "grad_norm": 0.050913844257593155, "learning_rate": 0.01, "loss": 1.9919, "step": 54027 }, { "epoch": 5.548937044264147, "grad_norm": 0.049932949244976044, "learning_rate": 0.01, "loss": 1.985, "step": 54030 }, { "epoch": 5.549245147375989, "grad_norm": 0.058598097413778305, "learning_rate": 0.01, "loss": 1.9949, "step": 54033 }, { "epoch": 5.54955325048783, "grad_norm": 0.06767188012599945, "learning_rate": 0.01, "loss": 2.0074, "step": 54036 }, { "epoch": 5.549861353599671, "grad_norm": 0.0709863007068634, "learning_rate": 0.01, "loss": 1.97, "step": 54039 }, { "epoch": 5.5501694567115125, "grad_norm": 0.09273546189069748, "learning_rate": 0.01, "loss": 1.9545, "step": 54042 }, { "epoch": 5.550477559823355, "grad_norm": 0.04681755229830742, "learning_rate": 0.01, "loss": 1.982, "step": 54045 }, { "epoch": 5.550785662935196, "grad_norm": 0.08744305372238159, "learning_rate": 0.01, "loss": 1.9904, "step": 54048 }, { "epoch": 5.551093766047037, "grad_norm": 0.03497626259922981, "learning_rate": 0.01, "loss": 1.9794, "step": 54051 }, { "epoch": 5.5514018691588785, "grad_norm": 0.05988140404224396, "learning_rate": 0.01, "loss": 1.9883, "step": 54054 }, { "epoch": 5.55170997227072, "grad_norm": 0.0464041493833065, "learning_rate": 0.01, "loss": 1.9822, "step": 54057 }, { "epoch": 5.552018075382561, "grad_norm": 0.051982734352350235, "learning_rate": 0.01, "loss": 1.9965, "step": 54060 }, { "epoch": 5.552326178494403, "grad_norm": 0.045320626348257065, "learning_rate": 0.01, "loss": 1.9806, "step": 54063 }, { "epoch": 5.552634281606244, "grad_norm": 0.04230513423681259, "learning_rate": 0.01, "loss": 2.0064, "step": 54066 }, { "epoch": 5.552942384718086, "grad_norm": 0.0393347293138504, "learning_rate": 0.01, "loss": 2.0179, "step": 54069 }, { "epoch": 5.553250487829927, "grad_norm": 0.1013195738196373, "learning_rate": 0.01, "loss": 1.984, "step": 54072 }, { "epoch": 5.553558590941768, "grad_norm": 0.08008227497339249, "learning_rate": 0.01, "loss": 2.0113, "step": 54075 }, { "epoch": 5.55386669405361, "grad_norm": 0.0729672759771347, "learning_rate": 0.01, "loss": 1.9885, "step": 54078 }, { "epoch": 5.554174797165452, "grad_norm": 0.0624358095228672, "learning_rate": 0.01, "loss": 2.02, "step": 54081 }, { "epoch": 5.554482900277293, "grad_norm": 0.03352997824549675, "learning_rate": 0.01, "loss": 1.988, "step": 54084 }, { "epoch": 5.554791003389134, "grad_norm": 0.03241400793194771, "learning_rate": 0.01, "loss": 2.0163, "step": 54087 }, { "epoch": 5.555099106500975, "grad_norm": 0.033829499036073685, "learning_rate": 0.01, "loss": 2.0074, "step": 54090 }, { "epoch": 5.555407209612817, "grad_norm": 0.03398764133453369, "learning_rate": 0.01, "loss": 1.9916, "step": 54093 }, { "epoch": 5.555715312724659, "grad_norm": 0.04630080237984657, "learning_rate": 0.01, "loss": 1.9941, "step": 54096 }, { "epoch": 5.5560234158365, "grad_norm": 0.08986920863389969, "learning_rate": 0.01, "loss": 1.9908, "step": 54099 }, { "epoch": 5.556331518948341, "grad_norm": 0.07276762276887894, "learning_rate": 0.01, "loss": 1.985, "step": 54102 }, { "epoch": 5.556639622060183, "grad_norm": 0.07180680334568024, "learning_rate": 0.01, "loss": 1.9875, "step": 54105 }, { "epoch": 5.556947725172025, "grad_norm": 0.0470985509455204, "learning_rate": 0.01, "loss": 1.9998, "step": 54108 }, { "epoch": 5.557255828283866, "grad_norm": 0.07641714811325073, "learning_rate": 0.01, "loss": 1.9903, "step": 54111 }, { "epoch": 5.557563931395707, "grad_norm": 0.11065167188644409, "learning_rate": 0.01, "loss": 1.9954, "step": 54114 }, { "epoch": 5.5578720345075485, "grad_norm": 0.10439474880695343, "learning_rate": 0.01, "loss": 1.9979, "step": 54117 }, { "epoch": 5.55818013761939, "grad_norm": 0.044907502830028534, "learning_rate": 0.01, "loss": 2.0008, "step": 54120 }, { "epoch": 5.558488240731231, "grad_norm": 0.052572451531887054, "learning_rate": 0.01, "loss": 1.9945, "step": 54123 }, { "epoch": 5.558796343843073, "grad_norm": 0.03218472748994827, "learning_rate": 0.01, "loss": 1.995, "step": 54126 }, { "epoch": 5.5591044469549145, "grad_norm": 0.04737766459584236, "learning_rate": 0.01, "loss": 1.9572, "step": 54129 }, { "epoch": 5.559412550066756, "grad_norm": 0.045241083949804306, "learning_rate": 0.01, "loss": 1.9938, "step": 54132 }, { "epoch": 5.559720653178597, "grad_norm": 0.08022741228342056, "learning_rate": 0.01, "loss": 1.9641, "step": 54135 }, { "epoch": 5.560028756290438, "grad_norm": 0.1389225870370865, "learning_rate": 0.01, "loss": 1.9806, "step": 54138 }, { "epoch": 5.56033685940228, "grad_norm": 0.1731427162885666, "learning_rate": 0.01, "loss": 1.9961, "step": 54141 }, { "epoch": 5.560644962514122, "grad_norm": 0.13785654306411743, "learning_rate": 0.01, "loss": 1.9695, "step": 54144 }, { "epoch": 5.560953065625963, "grad_norm": 0.08374970406293869, "learning_rate": 0.01, "loss": 1.9702, "step": 54147 }, { "epoch": 5.561261168737804, "grad_norm": 0.038511909544467926, "learning_rate": 0.01, "loss": 1.9911, "step": 54150 }, { "epoch": 5.5615692718496454, "grad_norm": 0.04264768585562706, "learning_rate": 0.01, "loss": 1.9935, "step": 54153 }, { "epoch": 5.561877374961487, "grad_norm": 0.04369993880391121, "learning_rate": 0.01, "loss": 1.9771, "step": 54156 }, { "epoch": 5.562185478073329, "grad_norm": 0.04195026680827141, "learning_rate": 0.01, "loss": 2.0044, "step": 54159 }, { "epoch": 5.56249358118517, "grad_norm": 0.04474294185638428, "learning_rate": 0.01, "loss": 1.9984, "step": 54162 }, { "epoch": 5.562801684297011, "grad_norm": 0.049389854073524475, "learning_rate": 0.01, "loss": 2.027, "step": 54165 }, { "epoch": 5.563109787408853, "grad_norm": 0.06065073981881142, "learning_rate": 0.01, "loss": 2.0184, "step": 54168 }, { "epoch": 5.563417890520694, "grad_norm": 0.1028941348195076, "learning_rate": 0.01, "loss": 2.0015, "step": 54171 }, { "epoch": 5.563725993632536, "grad_norm": 0.05602087453007698, "learning_rate": 0.01, "loss": 2.0044, "step": 54174 }, { "epoch": 5.564034096744377, "grad_norm": 0.09422741830348969, "learning_rate": 0.01, "loss": 1.9656, "step": 54177 }, { "epoch": 5.564342199856219, "grad_norm": 0.08502856642007828, "learning_rate": 0.01, "loss": 2.0054, "step": 54180 }, { "epoch": 5.56465030296806, "grad_norm": 0.1169920489192009, "learning_rate": 0.01, "loss": 1.9849, "step": 54183 }, { "epoch": 5.564958406079901, "grad_norm": 0.08334633708000183, "learning_rate": 0.01, "loss": 2.0013, "step": 54186 }, { "epoch": 5.565266509191742, "grad_norm": 0.04524173587560654, "learning_rate": 0.01, "loss": 1.9991, "step": 54189 }, { "epoch": 5.5655746123035845, "grad_norm": 0.04702974855899811, "learning_rate": 0.01, "loss": 1.9957, "step": 54192 }, { "epoch": 5.565882715415426, "grad_norm": 0.052152637392282486, "learning_rate": 0.01, "loss": 1.9934, "step": 54195 }, { "epoch": 5.566190818527267, "grad_norm": 0.07976489514112473, "learning_rate": 0.01, "loss": 1.9818, "step": 54198 }, { "epoch": 5.566498921639108, "grad_norm": 0.10382959991693497, "learning_rate": 0.01, "loss": 2.0228, "step": 54201 }, { "epoch": 5.5668070247509505, "grad_norm": 0.062233816832304, "learning_rate": 0.01, "loss": 2.0144, "step": 54204 }, { "epoch": 5.567115127862792, "grad_norm": 0.0779416561126709, "learning_rate": 0.01, "loss": 2.0182, "step": 54207 }, { "epoch": 5.567423230974633, "grad_norm": 0.05779222771525383, "learning_rate": 0.01, "loss": 1.9977, "step": 54210 }, { "epoch": 5.567731334086474, "grad_norm": 0.035859446972608566, "learning_rate": 0.01, "loss": 1.983, "step": 54213 }, { "epoch": 5.5680394371983155, "grad_norm": 0.03528286889195442, "learning_rate": 0.01, "loss": 1.9969, "step": 54216 }, { "epoch": 5.568347540310157, "grad_norm": 0.03673080727458, "learning_rate": 0.01, "loss": 1.975, "step": 54219 }, { "epoch": 5.568655643421999, "grad_norm": 0.10049016028642654, "learning_rate": 0.01, "loss": 1.999, "step": 54222 }, { "epoch": 5.56896374653384, "grad_norm": 0.09414393454790115, "learning_rate": 0.01, "loss": 2.0009, "step": 54225 }, { "epoch": 5.5692718496456814, "grad_norm": 0.09574013203382492, "learning_rate": 0.01, "loss": 2.0118, "step": 54228 }, { "epoch": 5.569579952757523, "grad_norm": 0.06100035086274147, "learning_rate": 0.01, "loss": 2.0029, "step": 54231 }, { "epoch": 5.569888055869364, "grad_norm": 0.03801640495657921, "learning_rate": 0.01, "loss": 1.9899, "step": 54234 }, { "epoch": 5.570196158981206, "grad_norm": 0.04721992835402489, "learning_rate": 0.01, "loss": 1.9916, "step": 54237 }, { "epoch": 5.570504262093047, "grad_norm": 0.14547844231128693, "learning_rate": 0.01, "loss": 1.9996, "step": 54240 }, { "epoch": 5.570812365204889, "grad_norm": 0.0706636980175972, "learning_rate": 0.01, "loss": 1.9924, "step": 54243 }, { "epoch": 5.57112046831673, "grad_norm": 0.05795181170105934, "learning_rate": 0.01, "loss": 1.9894, "step": 54246 }, { "epoch": 5.571428571428571, "grad_norm": 0.04111074283719063, "learning_rate": 0.01, "loss": 1.9824, "step": 54249 }, { "epoch": 5.571736674540412, "grad_norm": 0.07275404036045074, "learning_rate": 0.01, "loss": 2.0116, "step": 54252 }, { "epoch": 5.572044777652255, "grad_norm": 0.06297045946121216, "learning_rate": 0.01, "loss": 1.9876, "step": 54255 }, { "epoch": 5.572352880764096, "grad_norm": 0.03748214617371559, "learning_rate": 0.01, "loss": 1.9963, "step": 54258 }, { "epoch": 5.572660983875937, "grad_norm": 0.0806899219751358, "learning_rate": 0.01, "loss": 1.9804, "step": 54261 }, { "epoch": 5.572969086987778, "grad_norm": 0.07261806726455688, "learning_rate": 0.01, "loss": 1.9871, "step": 54264 }, { "epoch": 5.57327719009962, "grad_norm": 0.09828619658946991, "learning_rate": 0.01, "loss": 2.0007, "step": 54267 }, { "epoch": 5.573585293211462, "grad_norm": 0.057755280286073685, "learning_rate": 0.01, "loss": 2.0058, "step": 54270 }, { "epoch": 5.573893396323303, "grad_norm": 0.053999438881874084, "learning_rate": 0.01, "loss": 1.992, "step": 54273 }, { "epoch": 5.574201499435144, "grad_norm": 0.058348219841718674, "learning_rate": 0.01, "loss": 1.9964, "step": 54276 }, { "epoch": 5.574509602546986, "grad_norm": 0.0748370811343193, "learning_rate": 0.01, "loss": 1.9881, "step": 54279 }, { "epoch": 5.574817705658827, "grad_norm": 0.12268301099538803, "learning_rate": 0.01, "loss": 1.9805, "step": 54282 }, { "epoch": 5.575125808770668, "grad_norm": 0.04664819315075874, "learning_rate": 0.01, "loss": 2.0232, "step": 54285 }, { "epoch": 5.57543391188251, "grad_norm": 0.061234939843416214, "learning_rate": 0.01, "loss": 1.9799, "step": 54288 }, { "epoch": 5.5757420149943515, "grad_norm": 0.034011028707027435, "learning_rate": 0.01, "loss": 2.0084, "step": 54291 }, { "epoch": 5.576050118106193, "grad_norm": 0.03644438087940216, "learning_rate": 0.01, "loss": 1.9863, "step": 54294 }, { "epoch": 5.576358221218034, "grad_norm": 0.03703255578875542, "learning_rate": 0.01, "loss": 2.01, "step": 54297 }, { "epoch": 5.576666324329876, "grad_norm": 0.06192943826317787, "learning_rate": 0.01, "loss": 1.9909, "step": 54300 }, { "epoch": 5.5769744274417175, "grad_norm": 0.07393354922533035, "learning_rate": 0.01, "loss": 2.0098, "step": 54303 }, { "epoch": 5.577282530553559, "grad_norm": 0.10186754167079926, "learning_rate": 0.01, "loss": 2.0114, "step": 54306 }, { "epoch": 5.5775906336654, "grad_norm": 0.05970088392496109, "learning_rate": 0.01, "loss": 1.9897, "step": 54309 }, { "epoch": 5.577898736777241, "grad_norm": 0.07863925397396088, "learning_rate": 0.01, "loss": 2.0041, "step": 54312 }, { "epoch": 5.5782068398890825, "grad_norm": 0.08788790553808212, "learning_rate": 0.01, "loss": 1.9686, "step": 54315 }, { "epoch": 5.578514943000925, "grad_norm": 0.04963497817516327, "learning_rate": 0.01, "loss": 2.0, "step": 54318 }, { "epoch": 5.578823046112766, "grad_norm": 0.06898415833711624, "learning_rate": 0.01, "loss": 1.9785, "step": 54321 }, { "epoch": 5.579131149224607, "grad_norm": 0.07146736234426498, "learning_rate": 0.01, "loss": 1.9857, "step": 54324 }, { "epoch": 5.579439252336448, "grad_norm": 0.08930405974388123, "learning_rate": 0.01, "loss": 1.9994, "step": 54327 }, { "epoch": 5.57974735544829, "grad_norm": 0.11615147441625595, "learning_rate": 0.01, "loss": 1.9866, "step": 54330 }, { "epoch": 5.580055458560132, "grad_norm": 0.12027255445718765, "learning_rate": 0.01, "loss": 1.9856, "step": 54333 }, { "epoch": 5.580363561671973, "grad_norm": 0.09193901717662811, "learning_rate": 0.01, "loss": 1.9878, "step": 54336 }, { "epoch": 5.580671664783814, "grad_norm": 0.0674210712313652, "learning_rate": 0.01, "loss": 2.0032, "step": 54339 }, { "epoch": 5.580979767895656, "grad_norm": 0.05408472940325737, "learning_rate": 0.01, "loss": 1.9864, "step": 54342 }, { "epoch": 5.581287871007497, "grad_norm": 0.04289040341973305, "learning_rate": 0.01, "loss": 1.9922, "step": 54345 }, { "epoch": 5.581595974119338, "grad_norm": 0.04341943934559822, "learning_rate": 0.01, "loss": 1.9803, "step": 54348 }, { "epoch": 5.58190407723118, "grad_norm": 0.03630705177783966, "learning_rate": 0.01, "loss": 2.0038, "step": 54351 }, { "epoch": 5.582212180343022, "grad_norm": 0.10383928567171097, "learning_rate": 0.01, "loss": 1.9807, "step": 54354 }, { "epoch": 5.582520283454863, "grad_norm": 0.042623065412044525, "learning_rate": 0.01, "loss": 2.0048, "step": 54357 }, { "epoch": 5.582828386566704, "grad_norm": 0.07011984288692474, "learning_rate": 0.01, "loss": 2.0105, "step": 54360 }, { "epoch": 5.583136489678545, "grad_norm": 0.09444596618413925, "learning_rate": 0.01, "loss": 1.989, "step": 54363 }, { "epoch": 5.5834445927903875, "grad_norm": 0.0737951472401619, "learning_rate": 0.01, "loss": 1.9701, "step": 54366 }, { "epoch": 5.583752695902229, "grad_norm": 0.06040029227733612, "learning_rate": 0.01, "loss": 1.9901, "step": 54369 }, { "epoch": 5.58406079901407, "grad_norm": 0.059084903448820114, "learning_rate": 0.01, "loss": 1.9991, "step": 54372 }, { "epoch": 5.584368902125911, "grad_norm": 0.043141599744558334, "learning_rate": 0.01, "loss": 1.9886, "step": 54375 }, { "epoch": 5.584677005237753, "grad_norm": 0.07285004109144211, "learning_rate": 0.01, "loss": 1.9833, "step": 54378 }, { "epoch": 5.584985108349595, "grad_norm": 0.05245014652609825, "learning_rate": 0.01, "loss": 1.9755, "step": 54381 }, { "epoch": 5.585293211461436, "grad_norm": 0.06724348664283752, "learning_rate": 0.01, "loss": 1.984, "step": 54384 }, { "epoch": 5.585601314573277, "grad_norm": 0.0695512667298317, "learning_rate": 0.01, "loss": 1.9735, "step": 54387 }, { "epoch": 5.5859094176851185, "grad_norm": 0.08221160620450974, "learning_rate": 0.01, "loss": 2.0008, "step": 54390 }, { "epoch": 5.58621752079696, "grad_norm": 0.04641614854335785, "learning_rate": 0.01, "loss": 1.9788, "step": 54393 }, { "epoch": 5.586525623908802, "grad_norm": 0.04592394083738327, "learning_rate": 0.01, "loss": 2.0142, "step": 54396 }, { "epoch": 5.586833727020643, "grad_norm": 0.04568556696176529, "learning_rate": 0.01, "loss": 2.0158, "step": 54399 }, { "epoch": 5.587141830132484, "grad_norm": 0.05011072754859924, "learning_rate": 0.01, "loss": 1.9939, "step": 54402 }, { "epoch": 5.587449933244326, "grad_norm": 0.05919931083917618, "learning_rate": 0.01, "loss": 1.9815, "step": 54405 }, { "epoch": 5.587758036356167, "grad_norm": 0.1045934408903122, "learning_rate": 0.01, "loss": 1.9926, "step": 54408 }, { "epoch": 5.588066139468008, "grad_norm": 0.06998443603515625, "learning_rate": 0.01, "loss": 1.9896, "step": 54411 }, { "epoch": 5.58837424257985, "grad_norm": 0.07034524530172348, "learning_rate": 0.01, "loss": 1.9891, "step": 54414 }, { "epoch": 5.588682345691692, "grad_norm": 0.10572990775108337, "learning_rate": 0.01, "loss": 1.9869, "step": 54417 }, { "epoch": 5.588990448803533, "grad_norm": 0.04444350302219391, "learning_rate": 0.01, "loss": 1.9848, "step": 54420 }, { "epoch": 5.589298551915374, "grad_norm": 0.06670062988996506, "learning_rate": 0.01, "loss": 1.998, "step": 54423 }, { "epoch": 5.589606655027215, "grad_norm": 0.08078444004058838, "learning_rate": 0.01, "loss": 1.991, "step": 54426 }, { "epoch": 5.589914758139058, "grad_norm": 0.06824928522109985, "learning_rate": 0.01, "loss": 1.9973, "step": 54429 }, { "epoch": 5.590222861250899, "grad_norm": 0.04053177684545517, "learning_rate": 0.01, "loss": 2.0025, "step": 54432 }, { "epoch": 5.59053096436274, "grad_norm": 0.07821489870548248, "learning_rate": 0.01, "loss": 1.9914, "step": 54435 }, { "epoch": 5.590839067474581, "grad_norm": 0.09564534574747086, "learning_rate": 0.01, "loss": 2.0017, "step": 54438 }, { "epoch": 5.591147170586423, "grad_norm": 0.06303589791059494, "learning_rate": 0.01, "loss": 1.984, "step": 54441 }, { "epoch": 5.591455273698264, "grad_norm": 0.08767011016607285, "learning_rate": 0.01, "loss": 1.986, "step": 54444 }, { "epoch": 5.591763376810106, "grad_norm": 0.06719005852937698, "learning_rate": 0.01, "loss": 1.9885, "step": 54447 }, { "epoch": 5.592071479921947, "grad_norm": 0.0828474760055542, "learning_rate": 0.01, "loss": 2.0041, "step": 54450 }, { "epoch": 5.592379583033789, "grad_norm": 0.07154206931591034, "learning_rate": 0.01, "loss": 1.9997, "step": 54453 }, { "epoch": 5.59268768614563, "grad_norm": 0.0818270891904831, "learning_rate": 0.01, "loss": 1.9933, "step": 54456 }, { "epoch": 5.592995789257472, "grad_norm": 0.046949781477451324, "learning_rate": 0.01, "loss": 2.0029, "step": 54459 }, { "epoch": 5.593303892369313, "grad_norm": 0.17068155109882355, "learning_rate": 0.01, "loss": 1.9964, "step": 54462 }, { "epoch": 5.5936119954811545, "grad_norm": 0.14092612266540527, "learning_rate": 0.01, "loss": 2.0142, "step": 54465 }, { "epoch": 5.593920098592996, "grad_norm": 0.12458490580320358, "learning_rate": 0.01, "loss": 1.9845, "step": 54468 }, { "epoch": 5.594228201704837, "grad_norm": 0.07861830294132233, "learning_rate": 0.01, "loss": 1.9777, "step": 54471 }, { "epoch": 5.594536304816678, "grad_norm": 0.0819336324930191, "learning_rate": 0.01, "loss": 1.9998, "step": 54474 }, { "epoch": 5.5948444079285204, "grad_norm": 0.04734665900468826, "learning_rate": 0.01, "loss": 1.999, "step": 54477 }, { "epoch": 5.595152511040362, "grad_norm": 0.04812568798661232, "learning_rate": 0.01, "loss": 2.0041, "step": 54480 }, { "epoch": 5.595460614152203, "grad_norm": 0.03981874883174896, "learning_rate": 0.01, "loss": 1.9971, "step": 54483 }, { "epoch": 5.595768717264044, "grad_norm": 0.034884098917245865, "learning_rate": 0.01, "loss": 1.9842, "step": 54486 }, { "epoch": 5.5960768203758855, "grad_norm": 0.04170473292469978, "learning_rate": 0.01, "loss": 1.98, "step": 54489 }, { "epoch": 5.596384923487728, "grad_norm": 0.06037883087992668, "learning_rate": 0.01, "loss": 1.9795, "step": 54492 }, { "epoch": 5.596693026599569, "grad_norm": 0.06271529197692871, "learning_rate": 0.01, "loss": 1.976, "step": 54495 }, { "epoch": 5.59700112971141, "grad_norm": 0.0646272599697113, "learning_rate": 0.01, "loss": 1.9708, "step": 54498 }, { "epoch": 5.597309232823251, "grad_norm": 0.05499435216188431, "learning_rate": 0.01, "loss": 2.0049, "step": 54501 }, { "epoch": 5.597617335935093, "grad_norm": 0.06327464431524277, "learning_rate": 0.01, "loss": 2.0053, "step": 54504 }, { "epoch": 5.597925439046934, "grad_norm": 0.07843179255723953, "learning_rate": 0.01, "loss": 2.0019, "step": 54507 }, { "epoch": 5.598233542158776, "grad_norm": 0.13546060025691986, "learning_rate": 0.01, "loss": 1.9699, "step": 54510 }, { "epoch": 5.598541645270617, "grad_norm": 0.05903356522321701, "learning_rate": 0.01, "loss": 1.9957, "step": 54513 }, { "epoch": 5.598849748382459, "grad_norm": 0.10746420919895172, "learning_rate": 0.01, "loss": 1.9966, "step": 54516 }, { "epoch": 5.5991578514943, "grad_norm": 0.07332249730825424, "learning_rate": 0.01, "loss": 2.004, "step": 54519 }, { "epoch": 5.599465954606141, "grad_norm": 0.05419987067580223, "learning_rate": 0.01, "loss": 1.9958, "step": 54522 }, { "epoch": 5.599774057717983, "grad_norm": 0.06261706352233887, "learning_rate": 0.01, "loss": 1.9917, "step": 54525 }, { "epoch": 5.600082160829825, "grad_norm": 0.06889716535806656, "learning_rate": 0.01, "loss": 1.9771, "step": 54528 }, { "epoch": 5.600390263941666, "grad_norm": 0.07229785621166229, "learning_rate": 0.01, "loss": 1.9874, "step": 54531 }, { "epoch": 5.600698367053507, "grad_norm": 0.07027054578065872, "learning_rate": 0.01, "loss": 1.983, "step": 54534 }, { "epoch": 5.601006470165348, "grad_norm": 0.0769757479429245, "learning_rate": 0.01, "loss": 1.9774, "step": 54537 }, { "epoch": 5.60131457327719, "grad_norm": 0.07835566997528076, "learning_rate": 0.01, "loss": 2.0014, "step": 54540 }, { "epoch": 5.601622676389032, "grad_norm": 0.059996653348207474, "learning_rate": 0.01, "loss": 2.0125, "step": 54543 }, { "epoch": 5.601930779500873, "grad_norm": 0.0982513576745987, "learning_rate": 0.01, "loss": 1.9933, "step": 54546 }, { "epoch": 5.602238882612714, "grad_norm": 0.08362521976232529, "learning_rate": 0.01, "loss": 1.9973, "step": 54549 }, { "epoch": 5.602546985724556, "grad_norm": 0.0593801811337471, "learning_rate": 0.01, "loss": 2.0037, "step": 54552 }, { "epoch": 5.602855088836398, "grad_norm": 0.09806974232196808, "learning_rate": 0.01, "loss": 1.9792, "step": 54555 }, { "epoch": 5.603163191948239, "grad_norm": 0.041621893644332886, "learning_rate": 0.01, "loss": 2.0033, "step": 54558 }, { "epoch": 5.60347129506008, "grad_norm": 0.04981600120663643, "learning_rate": 0.01, "loss": 1.9978, "step": 54561 }, { "epoch": 5.6037793981719215, "grad_norm": 0.08302900940179825, "learning_rate": 0.01, "loss": 2.0004, "step": 54564 }, { "epoch": 5.604087501283763, "grad_norm": 0.03515220060944557, "learning_rate": 0.01, "loss": 1.991, "step": 54567 }, { "epoch": 5.604395604395604, "grad_norm": 0.0783744752407074, "learning_rate": 0.01, "loss": 2.0215, "step": 54570 }, { "epoch": 5.604703707507446, "grad_norm": 0.03960327431559563, "learning_rate": 0.01, "loss": 2.0049, "step": 54573 }, { "epoch": 5.605011810619287, "grad_norm": 0.04865705221891403, "learning_rate": 0.01, "loss": 1.9862, "step": 54576 }, { "epoch": 5.605319913731129, "grad_norm": 0.03959539160132408, "learning_rate": 0.01, "loss": 1.9959, "step": 54579 }, { "epoch": 5.60562801684297, "grad_norm": 0.08320534974336624, "learning_rate": 0.01, "loss": 1.9979, "step": 54582 }, { "epoch": 5.605936119954811, "grad_norm": 0.038179684430360794, "learning_rate": 0.01, "loss": 1.9987, "step": 54585 }, { "epoch": 5.606244223066653, "grad_norm": 0.04269682243466377, "learning_rate": 0.01, "loss": 1.9976, "step": 54588 }, { "epoch": 5.606552326178495, "grad_norm": 0.08585328608751297, "learning_rate": 0.01, "loss": 1.9669, "step": 54591 }, { "epoch": 5.606860429290336, "grad_norm": 0.04968829080462456, "learning_rate": 0.01, "loss": 1.9976, "step": 54594 }, { "epoch": 5.607168532402177, "grad_norm": 0.04485391452908516, "learning_rate": 0.01, "loss": 1.9963, "step": 54597 }, { "epoch": 5.607476635514018, "grad_norm": 0.03415564075112343, "learning_rate": 0.01, "loss": 1.9878, "step": 54600 }, { "epoch": 5.60778473862586, "grad_norm": 0.10459756851196289, "learning_rate": 0.01, "loss": 1.9868, "step": 54603 }, { "epoch": 5.608092841737702, "grad_norm": 0.04644683003425598, "learning_rate": 0.01, "loss": 1.995, "step": 54606 }, { "epoch": 5.608400944849543, "grad_norm": 0.09354628622531891, "learning_rate": 0.01, "loss": 1.9941, "step": 54609 }, { "epoch": 5.608709047961384, "grad_norm": 0.07625284790992737, "learning_rate": 0.01, "loss": 2.017, "step": 54612 }, { "epoch": 5.609017151073226, "grad_norm": 0.07845180481672287, "learning_rate": 0.01, "loss": 1.99, "step": 54615 }, { "epoch": 5.609325254185067, "grad_norm": 0.060051124542951584, "learning_rate": 0.01, "loss": 2.001, "step": 54618 }, { "epoch": 5.609633357296909, "grad_norm": 0.07250028848648071, "learning_rate": 0.01, "loss": 1.9713, "step": 54621 }, { "epoch": 5.60994146040875, "grad_norm": 0.10221341997385025, "learning_rate": 0.01, "loss": 1.9864, "step": 54624 }, { "epoch": 5.610249563520592, "grad_norm": 0.03648064285516739, "learning_rate": 0.01, "loss": 1.9845, "step": 54627 }, { "epoch": 5.610557666632433, "grad_norm": 0.04965173453092575, "learning_rate": 0.01, "loss": 1.9911, "step": 54630 }, { "epoch": 5.610865769744274, "grad_norm": 0.04213662073016167, "learning_rate": 0.01, "loss": 1.9916, "step": 54633 }, { "epoch": 5.611173872856115, "grad_norm": 0.11422538757324219, "learning_rate": 0.01, "loss": 1.9985, "step": 54636 }, { "epoch": 5.6114819759679575, "grad_norm": 0.054801687598228455, "learning_rate": 0.01, "loss": 1.9578, "step": 54639 }, { "epoch": 5.611790079079799, "grad_norm": 0.06683303415775299, "learning_rate": 0.01, "loss": 1.9818, "step": 54642 }, { "epoch": 5.61209818219164, "grad_norm": 0.04719147831201553, "learning_rate": 0.01, "loss": 2.0048, "step": 54645 }, { "epoch": 5.612406285303481, "grad_norm": 0.040852706879377365, "learning_rate": 0.01, "loss": 1.9856, "step": 54648 }, { "epoch": 5.612714388415323, "grad_norm": 0.038343146443367004, "learning_rate": 0.01, "loss": 1.9995, "step": 54651 }, { "epoch": 5.613022491527165, "grad_norm": 0.03426671028137207, "learning_rate": 0.01, "loss": 2.0135, "step": 54654 }, { "epoch": 5.613330594639006, "grad_norm": 0.08030149340629578, "learning_rate": 0.01, "loss": 1.988, "step": 54657 }, { "epoch": 5.613638697750847, "grad_norm": 0.04974358528852463, "learning_rate": 0.01, "loss": 1.9767, "step": 54660 }, { "epoch": 5.6139468008626885, "grad_norm": 0.04733881726861, "learning_rate": 0.01, "loss": 1.9959, "step": 54663 }, { "epoch": 5.61425490397453, "grad_norm": 0.04850970208644867, "learning_rate": 0.01, "loss": 2.0017, "step": 54666 }, { "epoch": 5.614563007086372, "grad_norm": 0.08880652487277985, "learning_rate": 0.01, "loss": 1.984, "step": 54669 }, { "epoch": 5.614871110198213, "grad_norm": 0.07070966809988022, "learning_rate": 0.01, "loss": 2.0034, "step": 54672 }, { "epoch": 5.615179213310054, "grad_norm": 0.043020039796829224, "learning_rate": 0.01, "loss": 2.0113, "step": 54675 }, { "epoch": 5.615487316421896, "grad_norm": 0.06512738764286041, "learning_rate": 0.01, "loss": 2.0029, "step": 54678 }, { "epoch": 5.615795419533737, "grad_norm": 0.09249335527420044, "learning_rate": 0.01, "loss": 1.988, "step": 54681 }, { "epoch": 5.616103522645579, "grad_norm": 0.12002009153366089, "learning_rate": 0.01, "loss": 2.0126, "step": 54684 }, { "epoch": 5.61641162575742, "grad_norm": 0.046711284667253494, "learning_rate": 0.01, "loss": 1.9996, "step": 54687 }, { "epoch": 5.616719728869262, "grad_norm": 0.0453648641705513, "learning_rate": 0.01, "loss": 1.9813, "step": 54690 }, { "epoch": 5.617027831981103, "grad_norm": 0.06286431849002838, "learning_rate": 0.01, "loss": 1.9911, "step": 54693 }, { "epoch": 5.617335935092944, "grad_norm": 0.10770335048437119, "learning_rate": 0.01, "loss": 1.9801, "step": 54696 }, { "epoch": 5.617644038204785, "grad_norm": 0.038174375891685486, "learning_rate": 0.01, "loss": 1.9972, "step": 54699 }, { "epoch": 5.617952141316628, "grad_norm": 0.0749521404504776, "learning_rate": 0.01, "loss": 1.9962, "step": 54702 }, { "epoch": 5.618260244428469, "grad_norm": 0.08033082634210587, "learning_rate": 0.01, "loss": 2.0188, "step": 54705 }, { "epoch": 5.61856834754031, "grad_norm": 0.06879189610481262, "learning_rate": 0.01, "loss": 2.0035, "step": 54708 }, { "epoch": 5.618876450652151, "grad_norm": 0.06201532483100891, "learning_rate": 0.01, "loss": 1.9886, "step": 54711 }, { "epoch": 5.6191845537639935, "grad_norm": 0.0482785739004612, "learning_rate": 0.01, "loss": 1.9988, "step": 54714 }, { "epoch": 5.619492656875835, "grad_norm": 0.08434705436229706, "learning_rate": 0.01, "loss": 1.9682, "step": 54717 }, { "epoch": 5.619800759987676, "grad_norm": 0.06971675902605057, "learning_rate": 0.01, "loss": 1.9853, "step": 54720 }, { "epoch": 5.620108863099517, "grad_norm": 0.05243263766169548, "learning_rate": 0.01, "loss": 2.0031, "step": 54723 }, { "epoch": 5.620416966211359, "grad_norm": 0.05071935057640076, "learning_rate": 0.01, "loss": 1.9857, "step": 54726 }, { "epoch": 5.6207250693232, "grad_norm": 0.07799027860164642, "learning_rate": 0.01, "loss": 2.0045, "step": 54729 }, { "epoch": 5.621033172435042, "grad_norm": 0.05585414171218872, "learning_rate": 0.01, "loss": 1.9517, "step": 54732 }, { "epoch": 5.621341275546883, "grad_norm": 0.04890783876180649, "learning_rate": 0.01, "loss": 2.0014, "step": 54735 }, { "epoch": 5.6216493786587245, "grad_norm": 0.035397402942180634, "learning_rate": 0.01, "loss": 1.9882, "step": 54738 }, { "epoch": 5.621957481770566, "grad_norm": 0.04959969222545624, "learning_rate": 0.01, "loss": 1.9935, "step": 54741 }, { "epoch": 5.622265584882407, "grad_norm": 0.1236480176448822, "learning_rate": 0.01, "loss": 1.9914, "step": 54744 }, { "epoch": 5.622573687994249, "grad_norm": 0.05167923495173454, "learning_rate": 0.01, "loss": 1.9898, "step": 54747 }, { "epoch": 5.62288179110609, "grad_norm": 0.08985866606235504, "learning_rate": 0.01, "loss": 1.9832, "step": 54750 }, { "epoch": 5.623189894217932, "grad_norm": 0.051048051565885544, "learning_rate": 0.01, "loss": 1.9796, "step": 54753 }, { "epoch": 5.623497997329773, "grad_norm": 0.034315209835767746, "learning_rate": 0.01, "loss": 2.0143, "step": 54756 }, { "epoch": 5.623806100441614, "grad_norm": 0.034064918756484985, "learning_rate": 0.01, "loss": 1.9891, "step": 54759 }, { "epoch": 5.6241142035534555, "grad_norm": 0.04847462847828865, "learning_rate": 0.01, "loss": 1.9785, "step": 54762 }, { "epoch": 5.624422306665298, "grad_norm": 0.036340802907943726, "learning_rate": 0.01, "loss": 1.9776, "step": 54765 }, { "epoch": 5.624730409777139, "grad_norm": 0.07472583651542664, "learning_rate": 0.01, "loss": 2.0025, "step": 54768 }, { "epoch": 5.62503851288898, "grad_norm": 0.05728251487016678, "learning_rate": 0.01, "loss": 1.9907, "step": 54771 }, { "epoch": 5.625346616000821, "grad_norm": 0.08100686967372894, "learning_rate": 0.01, "loss": 1.9901, "step": 54774 }, { "epoch": 5.625654719112663, "grad_norm": 0.03416838496923447, "learning_rate": 0.01, "loss": 1.9714, "step": 54777 }, { "epoch": 5.625962822224505, "grad_norm": 0.08783119171857834, "learning_rate": 0.01, "loss": 2.0181, "step": 54780 }, { "epoch": 5.626270925336346, "grad_norm": 0.06878135353326797, "learning_rate": 0.01, "loss": 1.9804, "step": 54783 }, { "epoch": 5.626579028448187, "grad_norm": 0.08717949688434601, "learning_rate": 0.01, "loss": 1.9825, "step": 54786 }, { "epoch": 5.626887131560029, "grad_norm": 0.06441211700439453, "learning_rate": 0.01, "loss": 1.9827, "step": 54789 }, { "epoch": 5.62719523467187, "grad_norm": 0.0610957108438015, "learning_rate": 0.01, "loss": 2.0121, "step": 54792 }, { "epoch": 5.627503337783711, "grad_norm": 0.04509379714727402, "learning_rate": 0.01, "loss": 1.9932, "step": 54795 }, { "epoch": 5.627811440895553, "grad_norm": 0.05279795825481415, "learning_rate": 0.01, "loss": 1.998, "step": 54798 }, { "epoch": 5.628119544007395, "grad_norm": 0.03723832964897156, "learning_rate": 0.01, "loss": 1.9857, "step": 54801 }, { "epoch": 5.628427647119236, "grad_norm": 0.10692798346281052, "learning_rate": 0.01, "loss": 1.9708, "step": 54804 }, { "epoch": 5.628735750231077, "grad_norm": 0.09552669525146484, "learning_rate": 0.01, "loss": 1.9862, "step": 54807 }, { "epoch": 5.629043853342919, "grad_norm": 0.11247537285089493, "learning_rate": 0.01, "loss": 1.9629, "step": 54810 }, { "epoch": 5.6293519564547605, "grad_norm": 0.08334960043430328, "learning_rate": 0.01, "loss": 1.9591, "step": 54813 }, { "epoch": 5.629660059566602, "grad_norm": 0.03950768709182739, "learning_rate": 0.01, "loss": 1.993, "step": 54816 }, { "epoch": 5.629968162678443, "grad_norm": 0.032677773386240005, "learning_rate": 0.01, "loss": 1.9838, "step": 54819 }, { "epoch": 5.630276265790284, "grad_norm": 0.0373060442507267, "learning_rate": 0.01, "loss": 1.9888, "step": 54822 }, { "epoch": 5.6305843689021255, "grad_norm": 0.07568337768316269, "learning_rate": 0.01, "loss": 1.9853, "step": 54825 }, { "epoch": 5.630892472013968, "grad_norm": 0.08441541343927383, "learning_rate": 0.01, "loss": 2.017, "step": 54828 }, { "epoch": 5.631200575125809, "grad_norm": 0.06486140936613083, "learning_rate": 0.01, "loss": 1.9929, "step": 54831 }, { "epoch": 5.63150867823765, "grad_norm": 0.06372927129268646, "learning_rate": 0.01, "loss": 1.9953, "step": 54834 }, { "epoch": 5.6318167813494915, "grad_norm": 0.06529858708381653, "learning_rate": 0.01, "loss": 2.0085, "step": 54837 }, { "epoch": 5.632124884461333, "grad_norm": 0.08682098984718323, "learning_rate": 0.01, "loss": 1.9814, "step": 54840 }, { "epoch": 5.632432987573175, "grad_norm": 0.05896419286727905, "learning_rate": 0.01, "loss": 2.0086, "step": 54843 }, { "epoch": 5.632741090685016, "grad_norm": 0.06567796319723129, "learning_rate": 0.01, "loss": 1.9803, "step": 54846 }, { "epoch": 5.633049193796857, "grad_norm": 0.06519652158021927, "learning_rate": 0.01, "loss": 2.0124, "step": 54849 }, { "epoch": 5.633357296908699, "grad_norm": 0.09215736389160156, "learning_rate": 0.01, "loss": 1.9772, "step": 54852 }, { "epoch": 5.63366540002054, "grad_norm": 0.08821207284927368, "learning_rate": 0.01, "loss": 1.9821, "step": 54855 }, { "epoch": 5.633973503132381, "grad_norm": 0.06266074627637863, "learning_rate": 0.01, "loss": 2.0139, "step": 54858 }, { "epoch": 5.634281606244223, "grad_norm": 0.11765746027231216, "learning_rate": 0.01, "loss": 2.0164, "step": 54861 }, { "epoch": 5.634589709356065, "grad_norm": 0.07203897088766098, "learning_rate": 0.01, "loss": 1.9625, "step": 54864 }, { "epoch": 5.634897812467906, "grad_norm": 0.05088105797767639, "learning_rate": 0.01, "loss": 1.9678, "step": 54867 }, { "epoch": 5.635205915579747, "grad_norm": 0.04583646357059479, "learning_rate": 0.01, "loss": 1.9923, "step": 54870 }, { "epoch": 5.635514018691588, "grad_norm": 0.03866413235664368, "learning_rate": 0.01, "loss": 1.989, "step": 54873 }, { "epoch": 5.635822121803431, "grad_norm": 0.02919906936585903, "learning_rate": 0.01, "loss": 1.9957, "step": 54876 }, { "epoch": 5.636130224915272, "grad_norm": 0.04621945321559906, "learning_rate": 0.01, "loss": 1.9813, "step": 54879 }, { "epoch": 5.636438328027113, "grad_norm": 0.131577268242836, "learning_rate": 0.01, "loss": 2.0007, "step": 54882 }, { "epoch": 5.636746431138954, "grad_norm": 0.13305455446243286, "learning_rate": 0.01, "loss": 1.9824, "step": 54885 }, { "epoch": 5.637054534250796, "grad_norm": 0.046413157135248184, "learning_rate": 0.01, "loss": 1.9809, "step": 54888 }, { "epoch": 5.637362637362637, "grad_norm": 0.0828058049082756, "learning_rate": 0.01, "loss": 1.9907, "step": 54891 }, { "epoch": 5.637670740474479, "grad_norm": 0.09685652703046799, "learning_rate": 0.01, "loss": 2.0036, "step": 54894 }, { "epoch": 5.63797884358632, "grad_norm": 0.055401384830474854, "learning_rate": 0.01, "loss": 1.9648, "step": 54897 }, { "epoch": 5.6382869466981616, "grad_norm": 0.04287857934832573, "learning_rate": 0.01, "loss": 1.9692, "step": 54900 }, { "epoch": 5.638595049810003, "grad_norm": 0.09044301509857178, "learning_rate": 0.01, "loss": 1.9839, "step": 54903 }, { "epoch": 5.638903152921845, "grad_norm": 0.07684798538684845, "learning_rate": 0.01, "loss": 1.973, "step": 54906 }, { "epoch": 5.639211256033686, "grad_norm": 0.05879480764269829, "learning_rate": 0.01, "loss": 1.9706, "step": 54909 }, { "epoch": 5.6395193591455275, "grad_norm": 0.04824329912662506, "learning_rate": 0.01, "loss": 1.9882, "step": 54912 }, { "epoch": 5.639827462257369, "grad_norm": 0.0385722778737545, "learning_rate": 0.01, "loss": 2.0095, "step": 54915 }, { "epoch": 5.64013556536921, "grad_norm": 0.10394332557916641, "learning_rate": 0.01, "loss": 2.0104, "step": 54918 }, { "epoch": 5.640443668481051, "grad_norm": 0.08453499525785446, "learning_rate": 0.01, "loss": 2.0022, "step": 54921 }, { "epoch": 5.640751771592893, "grad_norm": 0.037400417029857635, "learning_rate": 0.01, "loss": 1.9885, "step": 54924 }, { "epoch": 5.641059874704735, "grad_norm": 0.04908459633588791, "learning_rate": 0.01, "loss": 2.0018, "step": 54927 }, { "epoch": 5.641367977816576, "grad_norm": 0.06965642422437668, "learning_rate": 0.01, "loss": 1.9925, "step": 54930 }, { "epoch": 5.641676080928417, "grad_norm": 0.11287185549736023, "learning_rate": 0.01, "loss": 2.0021, "step": 54933 }, { "epoch": 5.6419841840402585, "grad_norm": 0.039775166660547256, "learning_rate": 0.01, "loss": 2.0144, "step": 54936 }, { "epoch": 5.642292287152101, "grad_norm": 0.05167205631732941, "learning_rate": 0.01, "loss": 2.0006, "step": 54939 }, { "epoch": 5.642600390263942, "grad_norm": 0.09172987937927246, "learning_rate": 0.01, "loss": 1.9867, "step": 54942 }, { "epoch": 5.642908493375783, "grad_norm": 0.11361383646726608, "learning_rate": 0.01, "loss": 2.0029, "step": 54945 }, { "epoch": 5.643216596487624, "grad_norm": 0.07098819315433502, "learning_rate": 0.01, "loss": 1.9842, "step": 54948 }, { "epoch": 5.643524699599466, "grad_norm": 0.06430887430906296, "learning_rate": 0.01, "loss": 1.9575, "step": 54951 }, { "epoch": 5.643832802711307, "grad_norm": 0.04281394183635712, "learning_rate": 0.01, "loss": 1.98, "step": 54954 }, { "epoch": 5.644140905823149, "grad_norm": 0.04618782550096512, "learning_rate": 0.01, "loss": 1.9854, "step": 54957 }, { "epoch": 5.64444900893499, "grad_norm": 0.049203407019376755, "learning_rate": 0.01, "loss": 1.9667, "step": 54960 }, { "epoch": 5.644757112046832, "grad_norm": 0.07782324403524399, "learning_rate": 0.01, "loss": 1.9784, "step": 54963 }, { "epoch": 5.645065215158673, "grad_norm": 0.11196355521678925, "learning_rate": 0.01, "loss": 2.0154, "step": 54966 }, { "epoch": 5.645373318270515, "grad_norm": 0.08459745347499847, "learning_rate": 0.01, "loss": 1.9958, "step": 54969 }, { "epoch": 5.645681421382356, "grad_norm": 0.05610903725028038, "learning_rate": 0.01, "loss": 2.0065, "step": 54972 }, { "epoch": 5.6459895244941976, "grad_norm": 0.07638850063085556, "learning_rate": 0.01, "loss": 1.9952, "step": 54975 }, { "epoch": 5.646297627606039, "grad_norm": 0.0996907651424408, "learning_rate": 0.01, "loss": 1.9908, "step": 54978 }, { "epoch": 5.64660573071788, "grad_norm": 0.05218489095568657, "learning_rate": 0.01, "loss": 2.0118, "step": 54981 }, { "epoch": 5.646913833829721, "grad_norm": 0.05683809146285057, "learning_rate": 0.01, "loss": 1.9972, "step": 54984 }, { "epoch": 5.6472219369415635, "grad_norm": 0.08494491875171661, "learning_rate": 0.01, "loss": 2.0059, "step": 54987 }, { "epoch": 5.647530040053405, "grad_norm": 0.09330440312623978, "learning_rate": 0.01, "loss": 1.9925, "step": 54990 }, { "epoch": 5.647838143165246, "grad_norm": 0.1456758677959442, "learning_rate": 0.01, "loss": 1.9906, "step": 54993 }, { "epoch": 5.648146246277087, "grad_norm": 0.13807424902915955, "learning_rate": 0.01, "loss": 1.9826, "step": 54996 }, { "epoch": 5.6484543493889285, "grad_norm": 0.0649813711643219, "learning_rate": 0.01, "loss": 2.017, "step": 54999 }, { "epoch": 5.648762452500771, "grad_norm": 0.0742340162396431, "learning_rate": 0.01, "loss": 1.9713, "step": 55002 }, { "epoch": 5.649070555612612, "grad_norm": 0.09716660529375076, "learning_rate": 0.01, "loss": 1.9821, "step": 55005 }, { "epoch": 5.649378658724453, "grad_norm": 0.056228429079055786, "learning_rate": 0.01, "loss": 1.9979, "step": 55008 }, { "epoch": 5.6496867618362945, "grad_norm": 0.045122385025024414, "learning_rate": 0.01, "loss": 1.9805, "step": 55011 }, { "epoch": 5.649994864948136, "grad_norm": 0.0388353131711483, "learning_rate": 0.01, "loss": 2.0094, "step": 55014 }, { "epoch": 5.650302968059977, "grad_norm": 0.03766791522502899, "learning_rate": 0.01, "loss": 2.0001, "step": 55017 }, { "epoch": 5.650611071171819, "grad_norm": 0.037102095782756805, "learning_rate": 0.01, "loss": 1.9865, "step": 55020 }, { "epoch": 5.65091917428366, "grad_norm": 0.03547612577676773, "learning_rate": 0.01, "loss": 2.001, "step": 55023 }, { "epoch": 5.651227277395502, "grad_norm": 0.14113888144493103, "learning_rate": 0.01, "loss": 2.0096, "step": 55026 }, { "epoch": 5.651535380507343, "grad_norm": 0.13051262497901917, "learning_rate": 0.01, "loss": 2.002, "step": 55029 }, { "epoch": 5.651843483619184, "grad_norm": 0.050375718623399734, "learning_rate": 0.01, "loss": 1.9995, "step": 55032 }, { "epoch": 5.652151586731026, "grad_norm": 0.05951124429702759, "learning_rate": 0.01, "loss": 2.0184, "step": 55035 }, { "epoch": 5.652459689842868, "grad_norm": 0.0422302782535553, "learning_rate": 0.01, "loss": 1.9875, "step": 55038 }, { "epoch": 5.652767792954709, "grad_norm": 0.04383467882871628, "learning_rate": 0.01, "loss": 1.9966, "step": 55041 }, { "epoch": 5.65307589606655, "grad_norm": 0.06659442186355591, "learning_rate": 0.01, "loss": 1.9942, "step": 55044 }, { "epoch": 5.653383999178391, "grad_norm": 0.03979937732219696, "learning_rate": 0.01, "loss": 2.009, "step": 55047 }, { "epoch": 5.653692102290233, "grad_norm": 0.07855933159589767, "learning_rate": 0.01, "loss": 2.0032, "step": 55050 }, { "epoch": 5.654000205402075, "grad_norm": 0.06732465326786041, "learning_rate": 0.01, "loss": 1.9929, "step": 55053 }, { "epoch": 5.654308308513916, "grad_norm": 0.057772446423769, "learning_rate": 0.01, "loss": 1.9747, "step": 55056 }, { "epoch": 5.654616411625757, "grad_norm": 0.08292173594236374, "learning_rate": 0.01, "loss": 1.9825, "step": 55059 }, { "epoch": 5.654924514737599, "grad_norm": 0.0935162603855133, "learning_rate": 0.01, "loss": 2.0047, "step": 55062 }, { "epoch": 5.655232617849441, "grad_norm": 0.06377442181110382, "learning_rate": 0.01, "loss": 1.9789, "step": 55065 }, { "epoch": 5.655540720961282, "grad_norm": 0.08845983445644379, "learning_rate": 0.01, "loss": 2.0187, "step": 55068 }, { "epoch": 5.655848824073123, "grad_norm": 0.06194892153143883, "learning_rate": 0.01, "loss": 1.9933, "step": 55071 }, { "epoch": 5.6561569271849645, "grad_norm": 0.06892132014036179, "learning_rate": 0.01, "loss": 2.0008, "step": 55074 }, { "epoch": 5.656465030296806, "grad_norm": 0.04041128605604172, "learning_rate": 0.01, "loss": 1.9889, "step": 55077 }, { "epoch": 5.656773133408647, "grad_norm": 0.09989805519580841, "learning_rate": 0.01, "loss": 1.9763, "step": 55080 }, { "epoch": 5.657081236520489, "grad_norm": 0.0866503044962883, "learning_rate": 0.01, "loss": 1.9806, "step": 55083 }, { "epoch": 5.6573893396323305, "grad_norm": 0.043805480003356934, "learning_rate": 0.01, "loss": 1.9971, "step": 55086 }, { "epoch": 5.657697442744172, "grad_norm": 0.038269441574811935, "learning_rate": 0.01, "loss": 1.9519, "step": 55089 }, { "epoch": 5.658005545856013, "grad_norm": 0.04652794450521469, "learning_rate": 0.01, "loss": 1.9728, "step": 55092 }, { "epoch": 5.658313648967854, "grad_norm": 0.09699436277151108, "learning_rate": 0.01, "loss": 2.0021, "step": 55095 }, { "epoch": 5.658621752079696, "grad_norm": 0.07722003012895584, "learning_rate": 0.01, "loss": 2.0101, "step": 55098 }, { "epoch": 5.658929855191538, "grad_norm": 0.08195403218269348, "learning_rate": 0.01, "loss": 2.0104, "step": 55101 }, { "epoch": 5.659237958303379, "grad_norm": 0.04434705525636673, "learning_rate": 0.01, "loss": 1.981, "step": 55104 }, { "epoch": 5.65954606141522, "grad_norm": 0.03978164121508598, "learning_rate": 0.01, "loss": 2.0033, "step": 55107 }, { "epoch": 5.6598541645270615, "grad_norm": 0.1201346144080162, "learning_rate": 0.01, "loss": 1.9801, "step": 55110 }, { "epoch": 5.660162267638903, "grad_norm": 0.17597338557243347, "learning_rate": 0.01, "loss": 2.0094, "step": 55113 }, { "epoch": 5.660470370750745, "grad_norm": 0.13311070203781128, "learning_rate": 0.01, "loss": 2.0209, "step": 55116 }, { "epoch": 5.660778473862586, "grad_norm": 0.11461780220270157, "learning_rate": 0.01, "loss": 1.9889, "step": 55119 }, { "epoch": 5.661086576974427, "grad_norm": 0.04778537154197693, "learning_rate": 0.01, "loss": 1.9912, "step": 55122 }, { "epoch": 5.661394680086269, "grad_norm": 0.03900016471743584, "learning_rate": 0.01, "loss": 1.9935, "step": 55125 }, { "epoch": 5.66170278319811, "grad_norm": 0.03934657946228981, "learning_rate": 0.01, "loss": 2.0063, "step": 55128 }, { "epoch": 5.662010886309952, "grad_norm": 0.07235165685415268, "learning_rate": 0.01, "loss": 2.0177, "step": 55131 }, { "epoch": 5.662318989421793, "grad_norm": 0.09168880432844162, "learning_rate": 0.01, "loss": 2.0312, "step": 55134 }, { "epoch": 5.662627092533635, "grad_norm": 0.058992091566324234, "learning_rate": 0.01, "loss": 1.984, "step": 55137 }, { "epoch": 5.662935195645476, "grad_norm": 0.044471897184848785, "learning_rate": 0.01, "loss": 2.0053, "step": 55140 }, { "epoch": 5.663243298757317, "grad_norm": 0.04644366726279259, "learning_rate": 0.01, "loss": 2.018, "step": 55143 }, { "epoch": 5.663551401869158, "grad_norm": 0.04064124450087547, "learning_rate": 0.01, "loss": 1.9876, "step": 55146 }, { "epoch": 5.6638595049810005, "grad_norm": 0.0546630322933197, "learning_rate": 0.01, "loss": 1.9961, "step": 55149 }, { "epoch": 5.664167608092842, "grad_norm": 0.07874899357557297, "learning_rate": 0.01, "loss": 2.0005, "step": 55152 }, { "epoch": 5.664475711204683, "grad_norm": 0.10178565979003906, "learning_rate": 0.01, "loss": 1.9983, "step": 55155 }, { "epoch": 5.664783814316524, "grad_norm": 0.10570824146270752, "learning_rate": 0.01, "loss": 1.9842, "step": 55158 }, { "epoch": 5.6650919174283665, "grad_norm": 0.046325813978910446, "learning_rate": 0.01, "loss": 2.0006, "step": 55161 }, { "epoch": 5.665400020540208, "grad_norm": 0.05771588161587715, "learning_rate": 0.01, "loss": 2.0082, "step": 55164 }, { "epoch": 5.665708123652049, "grad_norm": 0.0589786060154438, "learning_rate": 0.01, "loss": 1.9781, "step": 55167 }, { "epoch": 5.66601622676389, "grad_norm": 0.043214842677116394, "learning_rate": 0.01, "loss": 1.9791, "step": 55170 }, { "epoch": 5.6663243298757315, "grad_norm": 0.09702472388744354, "learning_rate": 0.01, "loss": 1.9972, "step": 55173 }, { "epoch": 5.666632432987573, "grad_norm": 0.04949367418885231, "learning_rate": 0.01, "loss": 2.001, "step": 55176 }, { "epoch": 5.666940536099415, "grad_norm": 0.10156387835741043, "learning_rate": 0.01, "loss": 1.9972, "step": 55179 }, { "epoch": 5.667248639211256, "grad_norm": 0.04134589806199074, "learning_rate": 0.01, "loss": 1.992, "step": 55182 }, { "epoch": 5.6675567423230975, "grad_norm": 0.09493035078048706, "learning_rate": 0.01, "loss": 1.9788, "step": 55185 }, { "epoch": 5.667864845434939, "grad_norm": 0.06814124435186386, "learning_rate": 0.01, "loss": 1.9685, "step": 55188 }, { "epoch": 5.66817294854678, "grad_norm": 0.05078943446278572, "learning_rate": 0.01, "loss": 1.9709, "step": 55191 }, { "epoch": 5.668481051658622, "grad_norm": 0.04670390486717224, "learning_rate": 0.01, "loss": 1.9826, "step": 55194 }, { "epoch": 5.668789154770463, "grad_norm": 0.04085162654519081, "learning_rate": 0.01, "loss": 1.9787, "step": 55197 }, { "epoch": 5.669097257882305, "grad_norm": 0.05365972965955734, "learning_rate": 0.01, "loss": 2.0002, "step": 55200 }, { "epoch": 5.669405360994146, "grad_norm": 0.06176181137561798, "learning_rate": 0.01, "loss": 1.9751, "step": 55203 }, { "epoch": 5.669713464105987, "grad_norm": 0.08370962738990784, "learning_rate": 0.01, "loss": 1.9991, "step": 55206 }, { "epoch": 5.6700215672178285, "grad_norm": 0.06180374324321747, "learning_rate": 0.01, "loss": 1.9872, "step": 55209 }, { "epoch": 5.670329670329671, "grad_norm": 0.04040297865867615, "learning_rate": 0.01, "loss": 1.9788, "step": 55212 }, { "epoch": 5.670637773441512, "grad_norm": 0.041469234973192215, "learning_rate": 0.01, "loss": 1.9993, "step": 55215 }, { "epoch": 5.670945876553353, "grad_norm": 0.04767376556992531, "learning_rate": 0.01, "loss": 1.9957, "step": 55218 }, { "epoch": 5.671253979665194, "grad_norm": 0.127590611577034, "learning_rate": 0.01, "loss": 1.9806, "step": 55221 }, { "epoch": 5.671562082777036, "grad_norm": 0.09273551404476166, "learning_rate": 0.01, "loss": 1.9828, "step": 55224 }, { "epoch": 5.671870185888878, "grad_norm": 0.06734216213226318, "learning_rate": 0.01, "loss": 1.9951, "step": 55227 }, { "epoch": 5.672178289000719, "grad_norm": 0.04960544407367706, "learning_rate": 0.01, "loss": 1.9583, "step": 55230 }, { "epoch": 5.67248639211256, "grad_norm": 0.08085974305868149, "learning_rate": 0.01, "loss": 2.0236, "step": 55233 }, { "epoch": 5.672794495224402, "grad_norm": 0.09541311115026474, "learning_rate": 0.01, "loss": 2.0077, "step": 55236 }, { "epoch": 5.673102598336243, "grad_norm": 0.04482199624180794, "learning_rate": 0.01, "loss": 2.0372, "step": 55239 }, { "epoch": 5.673410701448085, "grad_norm": 0.0415610708296299, "learning_rate": 0.01, "loss": 2.0211, "step": 55242 }, { "epoch": 5.673718804559926, "grad_norm": 0.08066660910844803, "learning_rate": 0.01, "loss": 2.0093, "step": 55245 }, { "epoch": 5.6740269076717675, "grad_norm": 0.06819882988929749, "learning_rate": 0.01, "loss": 1.9637, "step": 55248 }, { "epoch": 5.674335010783609, "grad_norm": 0.10055188089609146, "learning_rate": 0.01, "loss": 1.9581, "step": 55251 }, { "epoch": 5.67464311389545, "grad_norm": 0.05193830654025078, "learning_rate": 0.01, "loss": 2.0189, "step": 55254 }, { "epoch": 5.674951217007292, "grad_norm": 0.09766846150159836, "learning_rate": 0.01, "loss": 1.9897, "step": 55257 }, { "epoch": 5.6752593201191335, "grad_norm": 0.041506506502628326, "learning_rate": 0.01, "loss": 1.9807, "step": 55260 }, { "epoch": 5.675567423230975, "grad_norm": 0.04589756205677986, "learning_rate": 0.01, "loss": 1.9822, "step": 55263 }, { "epoch": 5.675875526342816, "grad_norm": 0.05841269716620445, "learning_rate": 0.01, "loss": 1.9997, "step": 55266 }, { "epoch": 5.676183629454657, "grad_norm": 0.08855876326560974, "learning_rate": 0.01, "loss": 2.0031, "step": 55269 }, { "epoch": 5.6764917325664985, "grad_norm": 0.055619269609451294, "learning_rate": 0.01, "loss": 2.0033, "step": 55272 }, { "epoch": 5.676799835678341, "grad_norm": 0.04398469999432564, "learning_rate": 0.01, "loss": 2.0109, "step": 55275 }, { "epoch": 5.677107938790182, "grad_norm": 0.073618583381176, "learning_rate": 0.01, "loss": 1.9971, "step": 55278 }, { "epoch": 5.677416041902023, "grad_norm": 0.03379726782441139, "learning_rate": 0.01, "loss": 2.0152, "step": 55281 }, { "epoch": 5.6777241450138645, "grad_norm": 0.040897171944379807, "learning_rate": 0.01, "loss": 1.9854, "step": 55284 }, { "epoch": 5.678032248125706, "grad_norm": 0.09364822506904602, "learning_rate": 0.01, "loss": 2.02, "step": 55287 }, { "epoch": 5.678340351237548, "grad_norm": 0.05096183717250824, "learning_rate": 0.01, "loss": 2.0101, "step": 55290 }, { "epoch": 5.678648454349389, "grad_norm": 0.03920522332191467, "learning_rate": 0.01, "loss": 1.9825, "step": 55293 }, { "epoch": 5.67895655746123, "grad_norm": 0.12465393543243408, "learning_rate": 0.01, "loss": 2.0117, "step": 55296 }, { "epoch": 5.679264660573072, "grad_norm": 0.0732463076710701, "learning_rate": 0.01, "loss": 1.9749, "step": 55299 }, { "epoch": 5.679572763684913, "grad_norm": 0.048661403357982635, "learning_rate": 0.01, "loss": 2.0204, "step": 55302 }, { "epoch": 5.679880866796754, "grad_norm": 0.072269506752491, "learning_rate": 0.01, "loss": 1.9664, "step": 55305 }, { "epoch": 5.680188969908596, "grad_norm": 0.09489680081605911, "learning_rate": 0.01, "loss": 1.9677, "step": 55308 }, { "epoch": 5.680497073020438, "grad_norm": 0.05124731361865997, "learning_rate": 0.01, "loss": 1.9919, "step": 55311 }, { "epoch": 5.680805176132279, "grad_norm": 0.05603638291358948, "learning_rate": 0.01, "loss": 2.021, "step": 55314 }, { "epoch": 5.68111327924412, "grad_norm": 0.045195356011390686, "learning_rate": 0.01, "loss": 1.9934, "step": 55317 }, { "epoch": 5.681421382355962, "grad_norm": 0.08664342761039734, "learning_rate": 0.01, "loss": 1.985, "step": 55320 }, { "epoch": 5.6817294854678035, "grad_norm": 0.06681100279092789, "learning_rate": 0.01, "loss": 1.9705, "step": 55323 }, { "epoch": 5.682037588579645, "grad_norm": 0.040817707777023315, "learning_rate": 0.01, "loss": 1.9747, "step": 55326 }, { "epoch": 5.682345691691486, "grad_norm": 0.04150600731372833, "learning_rate": 0.01, "loss": 1.986, "step": 55329 }, { "epoch": 5.682653794803327, "grad_norm": 0.05804332345724106, "learning_rate": 0.01, "loss": 2.0082, "step": 55332 }, { "epoch": 5.682961897915169, "grad_norm": 0.06679341942071915, "learning_rate": 0.01, "loss": 2.0132, "step": 55335 }, { "epoch": 5.683270001027011, "grad_norm": 0.1082872524857521, "learning_rate": 0.01, "loss": 1.9618, "step": 55338 }, { "epoch": 5.683578104138852, "grad_norm": 0.039868131279945374, "learning_rate": 0.01, "loss": 1.9962, "step": 55341 }, { "epoch": 5.683886207250693, "grad_norm": 0.056761614978313446, "learning_rate": 0.01, "loss": 1.9671, "step": 55344 }, { "epoch": 5.6841943103625345, "grad_norm": 0.05743727087974548, "learning_rate": 0.01, "loss": 2.0146, "step": 55347 }, { "epoch": 5.684502413474376, "grad_norm": 0.07240551710128784, "learning_rate": 0.01, "loss": 1.9733, "step": 55350 }, { "epoch": 5.684810516586218, "grad_norm": 0.04794316738843918, "learning_rate": 0.01, "loss": 1.9986, "step": 55353 }, { "epoch": 5.685118619698059, "grad_norm": 0.03990946710109711, "learning_rate": 0.01, "loss": 1.9905, "step": 55356 }, { "epoch": 5.6854267228099005, "grad_norm": 0.030655885115265846, "learning_rate": 0.01, "loss": 1.9831, "step": 55359 }, { "epoch": 5.685734825921742, "grad_norm": 0.05635961890220642, "learning_rate": 0.01, "loss": 1.9743, "step": 55362 }, { "epoch": 5.686042929033583, "grad_norm": 0.058308038860559464, "learning_rate": 0.01, "loss": 2.0184, "step": 55365 }, { "epoch": 5.686351032145424, "grad_norm": 0.08183693885803223, "learning_rate": 0.01, "loss": 1.9729, "step": 55368 }, { "epoch": 5.686659135257266, "grad_norm": 0.14002621173858643, "learning_rate": 0.01, "loss": 2.0092, "step": 55371 }, { "epoch": 5.686967238369108, "grad_norm": 0.06427164375782013, "learning_rate": 0.01, "loss": 2.0034, "step": 55374 }, { "epoch": 5.687275341480949, "grad_norm": 0.03860678896307945, "learning_rate": 0.01, "loss": 1.9951, "step": 55377 }, { "epoch": 5.68758344459279, "grad_norm": 0.035588864237070084, "learning_rate": 0.01, "loss": 1.9627, "step": 55380 }, { "epoch": 5.6878915477046315, "grad_norm": 0.035896506160497665, "learning_rate": 0.01, "loss": 1.9818, "step": 55383 }, { "epoch": 5.688199650816474, "grad_norm": 0.042555298656225204, "learning_rate": 0.01, "loss": 2.0008, "step": 55386 }, { "epoch": 5.688507753928315, "grad_norm": 0.061510197818279266, "learning_rate": 0.01, "loss": 2.0048, "step": 55389 }, { "epoch": 5.688815857040156, "grad_norm": 0.058596979826688766, "learning_rate": 0.01, "loss": 1.989, "step": 55392 }, { "epoch": 5.689123960151997, "grad_norm": 0.14717184007167816, "learning_rate": 0.01, "loss": 1.9873, "step": 55395 }, { "epoch": 5.689432063263839, "grad_norm": 0.1496891975402832, "learning_rate": 0.01, "loss": 2.013, "step": 55398 }, { "epoch": 5.68974016637568, "grad_norm": 0.09148060530424118, "learning_rate": 0.01, "loss": 1.9623, "step": 55401 }, { "epoch": 5.690048269487522, "grad_norm": 0.06617487967014313, "learning_rate": 0.01, "loss": 1.9755, "step": 55404 }, { "epoch": 5.690356372599363, "grad_norm": 0.0447409562766552, "learning_rate": 0.01, "loss": 1.9838, "step": 55407 }, { "epoch": 5.690664475711205, "grad_norm": 0.07360169291496277, "learning_rate": 0.01, "loss": 1.9893, "step": 55410 }, { "epoch": 5.690972578823046, "grad_norm": 0.03748020529747009, "learning_rate": 0.01, "loss": 1.9817, "step": 55413 }, { "epoch": 5.691280681934888, "grad_norm": 0.0812792256474495, "learning_rate": 0.01, "loss": 1.9723, "step": 55416 }, { "epoch": 5.691588785046729, "grad_norm": 0.06878095865249634, "learning_rate": 0.01, "loss": 2.0051, "step": 55419 }, { "epoch": 5.6918968881585705, "grad_norm": 0.10537760704755783, "learning_rate": 0.01, "loss": 2.0093, "step": 55422 }, { "epoch": 5.692204991270412, "grad_norm": 0.04603094980120659, "learning_rate": 0.01, "loss": 1.9772, "step": 55425 }, { "epoch": 5.692513094382253, "grad_norm": 0.09780580550432205, "learning_rate": 0.01, "loss": 1.9796, "step": 55428 }, { "epoch": 5.692821197494094, "grad_norm": 0.07139319181442261, "learning_rate": 0.01, "loss": 2.0059, "step": 55431 }, { "epoch": 5.6931293006059365, "grad_norm": 0.10430671274662018, "learning_rate": 0.01, "loss": 1.9902, "step": 55434 }, { "epoch": 5.693437403717778, "grad_norm": 0.07455434650182724, "learning_rate": 0.01, "loss": 1.9818, "step": 55437 }, { "epoch": 5.693745506829619, "grad_norm": 0.06788843870162964, "learning_rate": 0.01, "loss": 1.9998, "step": 55440 }, { "epoch": 5.69405360994146, "grad_norm": 0.07325445860624313, "learning_rate": 0.01, "loss": 1.9999, "step": 55443 }, { "epoch": 5.6943617130533015, "grad_norm": 0.06702747941017151, "learning_rate": 0.01, "loss": 1.9889, "step": 55446 }, { "epoch": 5.694669816165144, "grad_norm": 0.05118415504693985, "learning_rate": 0.01, "loss": 1.9879, "step": 55449 }, { "epoch": 5.694977919276985, "grad_norm": 0.08692149817943573, "learning_rate": 0.01, "loss": 1.9744, "step": 55452 }, { "epoch": 5.695286022388826, "grad_norm": 0.11190303415060043, "learning_rate": 0.01, "loss": 2.0081, "step": 55455 }, { "epoch": 5.6955941255006675, "grad_norm": 0.048586875200271606, "learning_rate": 0.01, "loss": 1.9659, "step": 55458 }, { "epoch": 5.695902228612509, "grad_norm": 0.04205205664038658, "learning_rate": 0.01, "loss": 1.9794, "step": 55461 }, { "epoch": 5.69621033172435, "grad_norm": 0.06149724870920181, "learning_rate": 0.01, "loss": 1.9959, "step": 55464 }, { "epoch": 5.696518434836192, "grad_norm": 0.0992293581366539, "learning_rate": 0.01, "loss": 1.9993, "step": 55467 }, { "epoch": 5.696826537948033, "grad_norm": 0.13116061687469482, "learning_rate": 0.01, "loss": 1.9888, "step": 55470 }, { "epoch": 5.697134641059875, "grad_norm": 0.06159098818898201, "learning_rate": 0.01, "loss": 1.976, "step": 55473 }, { "epoch": 5.697442744171716, "grad_norm": 0.03453617915511131, "learning_rate": 0.01, "loss": 1.9777, "step": 55476 }, { "epoch": 5.697750847283557, "grad_norm": 0.09355533123016357, "learning_rate": 0.01, "loss": 1.9967, "step": 55479 }, { "epoch": 5.698058950395399, "grad_norm": 0.059095028787851334, "learning_rate": 0.01, "loss": 1.9949, "step": 55482 }, { "epoch": 5.698367053507241, "grad_norm": 0.0825907438993454, "learning_rate": 0.01, "loss": 1.9903, "step": 55485 }, { "epoch": 5.698675156619082, "grad_norm": 0.07268752157688141, "learning_rate": 0.01, "loss": 1.9881, "step": 55488 }, { "epoch": 5.698983259730923, "grad_norm": 0.07047848403453827, "learning_rate": 0.01, "loss": 1.9774, "step": 55491 }, { "epoch": 5.699291362842764, "grad_norm": 0.09214555472135544, "learning_rate": 0.01, "loss": 1.9972, "step": 55494 }, { "epoch": 5.699599465954606, "grad_norm": 0.11323852092027664, "learning_rate": 0.01, "loss": 2.0205, "step": 55497 }, { "epoch": 5.699907569066448, "grad_norm": 0.06520719081163406, "learning_rate": 0.01, "loss": 1.9786, "step": 55500 }, { "epoch": 5.700215672178289, "grad_norm": 0.04669101536273956, "learning_rate": 0.01, "loss": 2.0015, "step": 55503 }, { "epoch": 5.70052377529013, "grad_norm": 0.04730800911784172, "learning_rate": 0.01, "loss": 1.9764, "step": 55506 }, { "epoch": 5.700831878401972, "grad_norm": 0.043729908764362335, "learning_rate": 0.01, "loss": 1.9933, "step": 55509 }, { "epoch": 5.701139981513814, "grad_norm": 0.03472939133644104, "learning_rate": 0.01, "loss": 1.9783, "step": 55512 }, { "epoch": 5.701448084625655, "grad_norm": 0.050825025886297226, "learning_rate": 0.01, "loss": 1.975, "step": 55515 }, { "epoch": 5.701756187737496, "grad_norm": 0.06947685033082962, "learning_rate": 0.01, "loss": 1.9835, "step": 55518 }, { "epoch": 5.7020642908493375, "grad_norm": 0.036018531769514084, "learning_rate": 0.01, "loss": 1.9978, "step": 55521 }, { "epoch": 5.702372393961179, "grad_norm": 0.10103366523981094, "learning_rate": 0.01, "loss": 1.9807, "step": 55524 }, { "epoch": 5.70268049707302, "grad_norm": 0.07310350984334946, "learning_rate": 0.01, "loss": 1.9987, "step": 55527 }, { "epoch": 5.702988600184862, "grad_norm": 0.13540640473365784, "learning_rate": 0.01, "loss": 2.0221, "step": 55530 }, { "epoch": 5.7032967032967035, "grad_norm": 0.0949084460735321, "learning_rate": 0.01, "loss": 2.01, "step": 55533 }, { "epoch": 5.703604806408545, "grad_norm": 0.059144023805856705, "learning_rate": 0.01, "loss": 1.9724, "step": 55536 }, { "epoch": 5.703912909520386, "grad_norm": 0.0527474619448185, "learning_rate": 0.01, "loss": 1.9823, "step": 55539 }, { "epoch": 5.704221012632227, "grad_norm": 0.06749939173460007, "learning_rate": 0.01, "loss": 1.9977, "step": 55542 }, { "epoch": 5.704529115744069, "grad_norm": 0.03750371187925339, "learning_rate": 0.01, "loss": 1.9799, "step": 55545 }, { "epoch": 5.704837218855911, "grad_norm": 0.04007263481616974, "learning_rate": 0.01, "loss": 2.0147, "step": 55548 }, { "epoch": 5.705145321967752, "grad_norm": 0.1166180819272995, "learning_rate": 0.01, "loss": 1.9786, "step": 55551 }, { "epoch": 5.705453425079593, "grad_norm": 0.09550142288208008, "learning_rate": 0.01, "loss": 1.9833, "step": 55554 }, { "epoch": 5.7057615281914345, "grad_norm": 0.03766748309135437, "learning_rate": 0.01, "loss": 1.9798, "step": 55557 }, { "epoch": 5.706069631303276, "grad_norm": 0.05305793881416321, "learning_rate": 0.01, "loss": 2.0144, "step": 55560 }, { "epoch": 5.706377734415118, "grad_norm": 0.042423125356435776, "learning_rate": 0.01, "loss": 1.9934, "step": 55563 }, { "epoch": 5.706685837526959, "grad_norm": 0.08636484295129776, "learning_rate": 0.01, "loss": 1.9986, "step": 55566 }, { "epoch": 5.7069939406388, "grad_norm": 0.05162766948342323, "learning_rate": 0.01, "loss": 2.0144, "step": 55569 }, { "epoch": 5.707302043750642, "grad_norm": 0.08721368759870529, "learning_rate": 0.01, "loss": 1.9887, "step": 55572 }, { "epoch": 5.707610146862484, "grad_norm": 0.043255776166915894, "learning_rate": 0.01, "loss": 2.0111, "step": 55575 }, { "epoch": 5.707918249974325, "grad_norm": 0.0371641106903553, "learning_rate": 0.01, "loss": 2.0051, "step": 55578 }, { "epoch": 5.708226353086166, "grad_norm": 0.03695603460073471, "learning_rate": 0.01, "loss": 1.9999, "step": 55581 }, { "epoch": 5.708534456198008, "grad_norm": 0.07745962589979172, "learning_rate": 0.01, "loss": 2.0055, "step": 55584 }, { "epoch": 5.708842559309849, "grad_norm": 0.10786707699298859, "learning_rate": 0.01, "loss": 1.974, "step": 55587 }, { "epoch": 5.70915066242169, "grad_norm": 0.11932874470949173, "learning_rate": 0.01, "loss": 1.9749, "step": 55590 }, { "epoch": 5.709458765533532, "grad_norm": 0.09362657368183136, "learning_rate": 0.01, "loss": 2.0116, "step": 55593 }, { "epoch": 5.7097668686453735, "grad_norm": 0.049765367060899734, "learning_rate": 0.01, "loss": 1.9835, "step": 55596 }, { "epoch": 5.710074971757215, "grad_norm": 0.0707634910941124, "learning_rate": 0.01, "loss": 1.9923, "step": 55599 }, { "epoch": 5.710383074869056, "grad_norm": 0.05576471611857414, "learning_rate": 0.01, "loss": 2.0247, "step": 55602 }, { "epoch": 5.710691177980897, "grad_norm": 0.036324393004179, "learning_rate": 0.01, "loss": 1.9941, "step": 55605 }, { "epoch": 5.7109992810927395, "grad_norm": 0.043876856565475464, "learning_rate": 0.01, "loss": 1.9866, "step": 55608 }, { "epoch": 5.711307384204581, "grad_norm": 0.04931717738509178, "learning_rate": 0.01, "loss": 1.9934, "step": 55611 }, { "epoch": 5.711615487316422, "grad_norm": 0.10611497610807419, "learning_rate": 0.01, "loss": 1.9828, "step": 55614 }, { "epoch": 5.711923590428263, "grad_norm": 0.04808138310909271, "learning_rate": 0.01, "loss": 1.9795, "step": 55617 }, { "epoch": 5.7122316935401045, "grad_norm": 0.05550553277134895, "learning_rate": 0.01, "loss": 2.0022, "step": 55620 }, { "epoch": 5.712539796651946, "grad_norm": 0.07001514732837677, "learning_rate": 0.01, "loss": 2.0225, "step": 55623 }, { "epoch": 5.712847899763788, "grad_norm": 0.04201444238424301, "learning_rate": 0.01, "loss": 1.9674, "step": 55626 }, { "epoch": 5.713156002875629, "grad_norm": 0.04204641655087471, "learning_rate": 0.01, "loss": 2.0051, "step": 55629 }, { "epoch": 5.7134641059874705, "grad_norm": 0.07260296493768692, "learning_rate": 0.01, "loss": 1.9954, "step": 55632 }, { "epoch": 5.713772209099312, "grad_norm": 0.04765797406435013, "learning_rate": 0.01, "loss": 2.0221, "step": 55635 }, { "epoch": 5.714080312211153, "grad_norm": 0.10189679265022278, "learning_rate": 0.01, "loss": 1.9826, "step": 55638 }, { "epoch": 5.714388415322995, "grad_norm": 0.08928123861551285, "learning_rate": 0.01, "loss": 1.9825, "step": 55641 }, { "epoch": 5.714696518434836, "grad_norm": 0.041944585740566254, "learning_rate": 0.01, "loss": 1.9979, "step": 55644 }, { "epoch": 5.715004621546678, "grad_norm": 0.062304090708494186, "learning_rate": 0.01, "loss": 2.0229, "step": 55647 }, { "epoch": 5.715312724658519, "grad_norm": 0.03752179071307182, "learning_rate": 0.01, "loss": 1.9861, "step": 55650 }, { "epoch": 5.71562082777036, "grad_norm": 0.05441533774137497, "learning_rate": 0.01, "loss": 2.0013, "step": 55653 }, { "epoch": 5.715928930882201, "grad_norm": 0.0389556922018528, "learning_rate": 0.01, "loss": 1.9849, "step": 55656 }, { "epoch": 5.716237033994044, "grad_norm": 0.046292494982481, "learning_rate": 0.01, "loss": 2.0006, "step": 55659 }, { "epoch": 5.716545137105885, "grad_norm": 0.0998992770910263, "learning_rate": 0.01, "loss": 1.9953, "step": 55662 }, { "epoch": 5.716853240217726, "grad_norm": 0.07048474252223969, "learning_rate": 0.01, "loss": 1.9914, "step": 55665 }, { "epoch": 5.717161343329567, "grad_norm": 0.03293071687221527, "learning_rate": 0.01, "loss": 1.9971, "step": 55668 }, { "epoch": 5.7174694464414095, "grad_norm": 0.040962520986795425, "learning_rate": 0.01, "loss": 2.0123, "step": 55671 }, { "epoch": 5.717777549553251, "grad_norm": 0.08064155280590057, "learning_rate": 0.01, "loss": 2.0082, "step": 55674 }, { "epoch": 5.718085652665092, "grad_norm": 0.10333026945590973, "learning_rate": 0.01, "loss": 2.0194, "step": 55677 }, { "epoch": 5.718393755776933, "grad_norm": 0.06801532953977585, "learning_rate": 0.01, "loss": 1.9974, "step": 55680 }, { "epoch": 5.718701858888775, "grad_norm": 0.051722411066293716, "learning_rate": 0.01, "loss": 1.9785, "step": 55683 }, { "epoch": 5.719009962000616, "grad_norm": 0.08710015565156937, "learning_rate": 0.01, "loss": 1.982, "step": 55686 }, { "epoch": 5.719318065112458, "grad_norm": 0.06947915256023407, "learning_rate": 0.01, "loss": 2.0078, "step": 55689 }, { "epoch": 5.719626168224299, "grad_norm": 0.08588126301765442, "learning_rate": 0.01, "loss": 2.0111, "step": 55692 }, { "epoch": 5.7199342713361405, "grad_norm": 0.1062588170170784, "learning_rate": 0.01, "loss": 1.9812, "step": 55695 }, { "epoch": 5.720242374447982, "grad_norm": 0.044331252574920654, "learning_rate": 0.01, "loss": 2.0213, "step": 55698 }, { "epoch": 5.720550477559823, "grad_norm": 0.06031282618641853, "learning_rate": 0.01, "loss": 1.9923, "step": 55701 }, { "epoch": 5.720858580671665, "grad_norm": 0.0506250225007534, "learning_rate": 0.01, "loss": 1.9847, "step": 55704 }, { "epoch": 5.7211666837835065, "grad_norm": 0.04892846941947937, "learning_rate": 0.01, "loss": 1.9845, "step": 55707 }, { "epoch": 5.721474786895348, "grad_norm": 0.0958351120352745, "learning_rate": 0.01, "loss": 1.9916, "step": 55710 }, { "epoch": 5.721782890007189, "grad_norm": 0.03931442275643349, "learning_rate": 0.01, "loss": 1.9758, "step": 55713 }, { "epoch": 5.72209099311903, "grad_norm": 0.10494615882635117, "learning_rate": 0.01, "loss": 2.0026, "step": 55716 }, { "epoch": 5.7223990962308715, "grad_norm": 0.06329034268856049, "learning_rate": 0.01, "loss": 1.9781, "step": 55719 }, { "epoch": 5.722707199342714, "grad_norm": 0.08237000554800034, "learning_rate": 0.01, "loss": 2.0026, "step": 55722 }, { "epoch": 5.723015302454555, "grad_norm": 0.063106007874012, "learning_rate": 0.01, "loss": 1.9761, "step": 55725 }, { "epoch": 5.723323405566396, "grad_norm": 0.04435192793607712, "learning_rate": 0.01, "loss": 2.0046, "step": 55728 }, { "epoch": 5.7236315086782374, "grad_norm": 0.09624417126178741, "learning_rate": 0.01, "loss": 1.9986, "step": 55731 }, { "epoch": 5.723939611790079, "grad_norm": 0.04826802760362625, "learning_rate": 0.01, "loss": 1.9803, "step": 55734 }, { "epoch": 5.724247714901921, "grad_norm": 0.13376633822917938, "learning_rate": 0.01, "loss": 2.0249, "step": 55737 }, { "epoch": 5.724555818013762, "grad_norm": 0.10685109347105026, "learning_rate": 0.01, "loss": 1.9724, "step": 55740 }, { "epoch": 5.724863921125603, "grad_norm": 0.03832371160387993, "learning_rate": 0.01, "loss": 1.9977, "step": 55743 }, { "epoch": 5.725172024237445, "grad_norm": 0.06955388933420181, "learning_rate": 0.01, "loss": 1.983, "step": 55746 }, { "epoch": 5.725480127349286, "grad_norm": 0.10767482221126556, "learning_rate": 0.01, "loss": 2.0095, "step": 55749 }, { "epoch": 5.725788230461127, "grad_norm": 0.09542632102966309, "learning_rate": 0.01, "loss": 1.9976, "step": 55752 }, { "epoch": 5.726096333572969, "grad_norm": 0.07101954519748688, "learning_rate": 0.01, "loss": 1.9897, "step": 55755 }, { "epoch": 5.726404436684811, "grad_norm": 0.04699917882680893, "learning_rate": 0.01, "loss": 2.0034, "step": 55758 }, { "epoch": 5.726712539796652, "grad_norm": 0.03901418298482895, "learning_rate": 0.01, "loss": 1.9782, "step": 55761 }, { "epoch": 5.727020642908493, "grad_norm": 0.03572557494044304, "learning_rate": 0.01, "loss": 1.9926, "step": 55764 }, { "epoch": 5.727328746020335, "grad_norm": 0.048515480011701584, "learning_rate": 0.01, "loss": 1.9798, "step": 55767 }, { "epoch": 5.7276368491321765, "grad_norm": 0.09653262794017792, "learning_rate": 0.01, "loss": 1.9913, "step": 55770 }, { "epoch": 5.727944952244018, "grad_norm": 0.05700475722551346, "learning_rate": 0.01, "loss": 2.0076, "step": 55773 }, { "epoch": 5.728253055355859, "grad_norm": 0.1171969622373581, "learning_rate": 0.01, "loss": 1.9628, "step": 55776 }, { "epoch": 5.7285611584677, "grad_norm": 0.043651919811964035, "learning_rate": 0.01, "loss": 2.0058, "step": 55779 }, { "epoch": 5.728869261579542, "grad_norm": 0.034597914665937424, "learning_rate": 0.01, "loss": 1.9807, "step": 55782 }, { "epoch": 5.729177364691384, "grad_norm": 0.036805570125579834, "learning_rate": 0.01, "loss": 1.9867, "step": 55785 }, { "epoch": 5.729485467803225, "grad_norm": 0.0409085676074028, "learning_rate": 0.01, "loss": 2.0135, "step": 55788 }, { "epoch": 5.729793570915066, "grad_norm": 0.041818439960479736, "learning_rate": 0.01, "loss": 2.0131, "step": 55791 }, { "epoch": 5.7301016740269075, "grad_norm": 0.04998002573847771, "learning_rate": 0.01, "loss": 2.0026, "step": 55794 }, { "epoch": 5.730409777138749, "grad_norm": 0.13136835396289825, "learning_rate": 0.01, "loss": 1.9779, "step": 55797 }, { "epoch": 5.730717880250591, "grad_norm": 0.09088631719350815, "learning_rate": 0.01, "loss": 1.9816, "step": 55800 }, { "epoch": 5.731025983362432, "grad_norm": 0.04242292046546936, "learning_rate": 0.01, "loss": 1.995, "step": 55803 }, { "epoch": 5.7313340864742734, "grad_norm": 0.06529027223587036, "learning_rate": 0.01, "loss": 1.992, "step": 55806 }, { "epoch": 5.731642189586115, "grad_norm": 0.07886287569999695, "learning_rate": 0.01, "loss": 1.9938, "step": 55809 }, { "epoch": 5.731950292697956, "grad_norm": 0.05292735993862152, "learning_rate": 0.01, "loss": 2.0094, "step": 55812 }, { "epoch": 5.732258395809797, "grad_norm": 0.08382521569728851, "learning_rate": 0.01, "loss": 1.9827, "step": 55815 }, { "epoch": 5.732566498921639, "grad_norm": 0.16346225142478943, "learning_rate": 0.01, "loss": 1.9933, "step": 55818 }, { "epoch": 5.732874602033481, "grad_norm": 0.1087159663438797, "learning_rate": 0.01, "loss": 1.9706, "step": 55821 }, { "epoch": 5.733182705145322, "grad_norm": 0.08878868818283081, "learning_rate": 0.01, "loss": 1.9981, "step": 55824 }, { "epoch": 5.733490808257163, "grad_norm": 0.05677541717886925, "learning_rate": 0.01, "loss": 2.0083, "step": 55827 }, { "epoch": 5.733798911369005, "grad_norm": 0.047212038189172745, "learning_rate": 0.01, "loss": 1.9733, "step": 55830 }, { "epoch": 5.734107014480847, "grad_norm": 0.04530366137623787, "learning_rate": 0.01, "loss": 1.9799, "step": 55833 }, { "epoch": 5.734415117592688, "grad_norm": 0.03656543791294098, "learning_rate": 0.01, "loss": 1.9945, "step": 55836 }, { "epoch": 5.734723220704529, "grad_norm": 0.05110878124833107, "learning_rate": 0.01, "loss": 2.0001, "step": 55839 }, { "epoch": 5.73503132381637, "grad_norm": 0.046113792806863785, "learning_rate": 0.01, "loss": 2.0205, "step": 55842 }, { "epoch": 5.735339426928212, "grad_norm": 0.05249856784939766, "learning_rate": 0.01, "loss": 2.0061, "step": 55845 }, { "epoch": 5.735647530040054, "grad_norm": 0.10378940403461456, "learning_rate": 0.01, "loss": 1.99, "step": 55848 }, { "epoch": 5.735955633151895, "grad_norm": 0.04738283157348633, "learning_rate": 0.01, "loss": 1.9874, "step": 55851 }, { "epoch": 5.736263736263736, "grad_norm": 0.042795851826667786, "learning_rate": 0.01, "loss": 1.9761, "step": 55854 }, { "epoch": 5.736571839375578, "grad_norm": 0.060027096420526505, "learning_rate": 0.01, "loss": 2.0001, "step": 55857 }, { "epoch": 5.736879942487419, "grad_norm": 0.060275256633758545, "learning_rate": 0.01, "loss": 2.0119, "step": 55860 }, { "epoch": 5.737188045599261, "grad_norm": 0.038434866815805435, "learning_rate": 0.01, "loss": 1.9843, "step": 55863 }, { "epoch": 5.737496148711102, "grad_norm": 0.06112179905176163, "learning_rate": 0.01, "loss": 1.9656, "step": 55866 }, { "epoch": 5.7378042518229435, "grad_norm": 0.057865045964717865, "learning_rate": 0.01, "loss": 2.0079, "step": 55869 }, { "epoch": 5.738112354934785, "grad_norm": 0.10420432686805725, "learning_rate": 0.01, "loss": 2.0062, "step": 55872 }, { "epoch": 5.738420458046626, "grad_norm": 0.10990259796380997, "learning_rate": 0.01, "loss": 1.9878, "step": 55875 }, { "epoch": 5.738728561158467, "grad_norm": 0.047898877412080765, "learning_rate": 0.01, "loss": 2.0026, "step": 55878 }, { "epoch": 5.7390366642703095, "grad_norm": 0.09271835535764694, "learning_rate": 0.01, "loss": 2.0032, "step": 55881 }, { "epoch": 5.739344767382151, "grad_norm": 0.046148382127285004, "learning_rate": 0.01, "loss": 2.0049, "step": 55884 }, { "epoch": 5.739652870493992, "grad_norm": 0.09337476640939713, "learning_rate": 0.01, "loss": 2.0101, "step": 55887 }, { "epoch": 5.739960973605833, "grad_norm": 0.0676882416009903, "learning_rate": 0.01, "loss": 1.9989, "step": 55890 }, { "epoch": 5.7402690767176745, "grad_norm": 0.07760494947433472, "learning_rate": 0.01, "loss": 1.9789, "step": 55893 }, { "epoch": 5.740577179829517, "grad_norm": 0.0753534585237503, "learning_rate": 0.01, "loss": 2.0221, "step": 55896 }, { "epoch": 5.740885282941358, "grad_norm": 0.07207735627889633, "learning_rate": 0.01, "loss": 1.9996, "step": 55899 }, { "epoch": 5.741193386053199, "grad_norm": 0.10786162316799164, "learning_rate": 0.01, "loss": 1.994, "step": 55902 }, { "epoch": 5.74150148916504, "grad_norm": 0.05810957029461861, "learning_rate": 0.01, "loss": 1.9892, "step": 55905 }, { "epoch": 5.741809592276882, "grad_norm": 0.043440163135528564, "learning_rate": 0.01, "loss": 2.025, "step": 55908 }, { "epoch": 5.742117695388723, "grad_norm": 0.10091213136911392, "learning_rate": 0.01, "loss": 1.9735, "step": 55911 }, { "epoch": 5.742425798500565, "grad_norm": 0.05655033513903618, "learning_rate": 0.01, "loss": 1.9865, "step": 55914 }, { "epoch": 5.742733901612406, "grad_norm": 0.04754583537578583, "learning_rate": 0.01, "loss": 1.9866, "step": 55917 }, { "epoch": 5.743042004724248, "grad_norm": 0.04805220663547516, "learning_rate": 0.01, "loss": 2.0079, "step": 55920 }, { "epoch": 5.743350107836089, "grad_norm": 0.03961215168237686, "learning_rate": 0.01, "loss": 2.0027, "step": 55923 }, { "epoch": 5.743658210947931, "grad_norm": 0.0900992751121521, "learning_rate": 0.01, "loss": 2.004, "step": 55926 }, { "epoch": 5.743966314059772, "grad_norm": 0.06700705736875534, "learning_rate": 0.01, "loss": 2.0104, "step": 55929 }, { "epoch": 5.744274417171614, "grad_norm": 0.06921757012605667, "learning_rate": 0.01, "loss": 2.0068, "step": 55932 }, { "epoch": 5.744582520283455, "grad_norm": 0.08982661366462708, "learning_rate": 0.01, "loss": 1.9646, "step": 55935 }, { "epoch": 5.744890623395296, "grad_norm": 0.059035494923591614, "learning_rate": 0.01, "loss": 1.9974, "step": 55938 }, { "epoch": 5.745198726507137, "grad_norm": 0.11368583887815475, "learning_rate": 0.01, "loss": 1.9738, "step": 55941 }, { "epoch": 5.7455068296189795, "grad_norm": 0.08658309280872345, "learning_rate": 0.01, "loss": 1.9933, "step": 55944 }, { "epoch": 5.745814932730821, "grad_norm": 0.06159745901823044, "learning_rate": 0.01, "loss": 2.0021, "step": 55947 }, { "epoch": 5.746123035842662, "grad_norm": 0.09689722955226898, "learning_rate": 0.01, "loss": 1.9833, "step": 55950 }, { "epoch": 5.746431138954503, "grad_norm": 0.04213634133338928, "learning_rate": 0.01, "loss": 1.9909, "step": 55953 }, { "epoch": 5.746739242066345, "grad_norm": 0.09022536873817444, "learning_rate": 0.01, "loss": 1.9798, "step": 55956 }, { "epoch": 5.747047345178187, "grad_norm": 0.047155823558568954, "learning_rate": 0.01, "loss": 1.9914, "step": 55959 }, { "epoch": 5.747355448290028, "grad_norm": 0.12048459053039551, "learning_rate": 0.01, "loss": 1.9988, "step": 55962 }, { "epoch": 5.747663551401869, "grad_norm": 0.14343701303005219, "learning_rate": 0.01, "loss": 1.9728, "step": 55965 }, { "epoch": 5.7479716545137105, "grad_norm": 0.10892770439386368, "learning_rate": 0.01, "loss": 1.9709, "step": 55968 }, { "epoch": 5.748279757625552, "grad_norm": 0.08472274988889694, "learning_rate": 0.01, "loss": 1.9681, "step": 55971 }, { "epoch": 5.748587860737393, "grad_norm": 0.06257399171590805, "learning_rate": 0.01, "loss": 2.0121, "step": 55974 }, { "epoch": 5.748895963849235, "grad_norm": 0.050180837512016296, "learning_rate": 0.01, "loss": 1.9785, "step": 55977 }, { "epoch": 5.749204066961076, "grad_norm": 0.04516899958252907, "learning_rate": 0.01, "loss": 2.0091, "step": 55980 }, { "epoch": 5.749512170072918, "grad_norm": 0.06078992411494255, "learning_rate": 0.01, "loss": 2.0152, "step": 55983 }, { "epoch": 5.749820273184759, "grad_norm": 0.060303375124931335, "learning_rate": 0.01, "loss": 1.983, "step": 55986 }, { "epoch": 5.7501283762966, "grad_norm": 0.046474192291498184, "learning_rate": 0.01, "loss": 1.9981, "step": 55989 }, { "epoch": 5.750436479408442, "grad_norm": 0.042475223541259766, "learning_rate": 0.01, "loss": 2.0081, "step": 55992 }, { "epoch": 5.750744582520284, "grad_norm": 0.10338003933429718, "learning_rate": 0.01, "loss": 2.0098, "step": 55995 }, { "epoch": 5.751052685632125, "grad_norm": 0.07736215740442276, "learning_rate": 0.01, "loss": 1.9692, "step": 55998 }, { "epoch": 5.751360788743966, "grad_norm": 0.12122607231140137, "learning_rate": 0.01, "loss": 1.9965, "step": 56001 }, { "epoch": 5.751668891855807, "grad_norm": 0.12788768112659454, "learning_rate": 0.01, "loss": 2.0051, "step": 56004 }, { "epoch": 5.751976994967649, "grad_norm": 0.1270526796579361, "learning_rate": 0.01, "loss": 1.9902, "step": 56007 }, { "epoch": 5.752285098079491, "grad_norm": 0.07083453238010406, "learning_rate": 0.01, "loss": 2.0271, "step": 56010 }, { "epoch": 5.752593201191332, "grad_norm": 0.0539688877761364, "learning_rate": 0.01, "loss": 2.0156, "step": 56013 }, { "epoch": 5.752901304303173, "grad_norm": 0.05941673740744591, "learning_rate": 0.01, "loss": 2.0111, "step": 56016 }, { "epoch": 5.753209407415015, "grad_norm": 0.058209896087646484, "learning_rate": 0.01, "loss": 2.0078, "step": 56019 }, { "epoch": 5.753517510526857, "grad_norm": 0.046941760927438736, "learning_rate": 0.01, "loss": 1.9934, "step": 56022 }, { "epoch": 5.753825613638698, "grad_norm": 0.05372985079884529, "learning_rate": 0.01, "loss": 1.9865, "step": 56025 }, { "epoch": 5.754133716750539, "grad_norm": 0.04523862898349762, "learning_rate": 0.01, "loss": 2.0294, "step": 56028 }, { "epoch": 5.754441819862381, "grad_norm": 0.03816952556371689, "learning_rate": 0.01, "loss": 1.9829, "step": 56031 }, { "epoch": 5.754749922974222, "grad_norm": 0.06410220265388489, "learning_rate": 0.01, "loss": 2.0222, "step": 56034 }, { "epoch": 5.755058026086063, "grad_norm": 0.03941243886947632, "learning_rate": 0.01, "loss": 1.9731, "step": 56037 }, { "epoch": 5.755366129197905, "grad_norm": 0.054606303572654724, "learning_rate": 0.01, "loss": 1.9634, "step": 56040 }, { "epoch": 5.7556742323097465, "grad_norm": 0.12037497013807297, "learning_rate": 0.01, "loss": 2.013, "step": 56043 }, { "epoch": 5.755982335421588, "grad_norm": 0.09681439399719238, "learning_rate": 0.01, "loss": 2.0124, "step": 56046 }, { "epoch": 5.756290438533429, "grad_norm": 0.09768577665090561, "learning_rate": 0.01, "loss": 2.0103, "step": 56049 }, { "epoch": 5.75659854164527, "grad_norm": 0.08952365070581436, "learning_rate": 0.01, "loss": 2.0002, "step": 56052 }, { "epoch": 5.7569066447571124, "grad_norm": 0.09528356790542603, "learning_rate": 0.01, "loss": 1.9914, "step": 56055 }, { "epoch": 5.757214747868954, "grad_norm": 0.05678083375096321, "learning_rate": 0.01, "loss": 1.9775, "step": 56058 }, { "epoch": 5.757522850980795, "grad_norm": 0.040836770087480545, "learning_rate": 0.01, "loss": 1.9526, "step": 56061 }, { "epoch": 5.757830954092636, "grad_norm": 0.053255595266819, "learning_rate": 0.01, "loss": 1.977, "step": 56064 }, { "epoch": 5.7581390572044775, "grad_norm": 0.053145814687013626, "learning_rate": 0.01, "loss": 1.9792, "step": 56067 }, { "epoch": 5.758447160316319, "grad_norm": 0.04844813421368599, "learning_rate": 0.01, "loss": 1.9795, "step": 56070 }, { "epoch": 5.758755263428161, "grad_norm": 0.08748742938041687, "learning_rate": 0.01, "loss": 1.9728, "step": 56073 }, { "epoch": 5.759063366540002, "grad_norm": 0.03808058798313141, "learning_rate": 0.01, "loss": 1.9878, "step": 56076 }, { "epoch": 5.759371469651843, "grad_norm": 0.0475618913769722, "learning_rate": 0.01, "loss": 1.9702, "step": 56079 }, { "epoch": 5.759679572763685, "grad_norm": 0.04134085774421692, "learning_rate": 0.01, "loss": 1.9759, "step": 56082 }, { "epoch": 5.759987675875526, "grad_norm": 0.035581640899181366, "learning_rate": 0.01, "loss": 1.9846, "step": 56085 }, { "epoch": 5.760295778987368, "grad_norm": 0.1027594655752182, "learning_rate": 0.01, "loss": 1.9873, "step": 56088 }, { "epoch": 5.760603882099209, "grad_norm": 0.06380899995565414, "learning_rate": 0.01, "loss": 1.9913, "step": 56091 }, { "epoch": 5.760911985211051, "grad_norm": 0.04780165106058121, "learning_rate": 0.01, "loss": 1.9984, "step": 56094 }, { "epoch": 5.761220088322892, "grad_norm": 0.0501483753323555, "learning_rate": 0.01, "loss": 1.9949, "step": 56097 }, { "epoch": 5.761528191434733, "grad_norm": 0.06381183862686157, "learning_rate": 0.01, "loss": 1.9734, "step": 56100 }, { "epoch": 5.761836294546574, "grad_norm": 0.08505979180335999, "learning_rate": 0.01, "loss": 1.9734, "step": 56103 }, { "epoch": 5.762144397658417, "grad_norm": 0.09470894932746887, "learning_rate": 0.01, "loss": 1.9798, "step": 56106 }, { "epoch": 5.762452500770258, "grad_norm": 0.08021756261587143, "learning_rate": 0.01, "loss": 1.9843, "step": 56109 }, { "epoch": 5.762760603882099, "grad_norm": 0.05404616892337799, "learning_rate": 0.01, "loss": 1.9731, "step": 56112 }, { "epoch": 5.76306870699394, "grad_norm": 0.05812733992934227, "learning_rate": 0.01, "loss": 1.9987, "step": 56115 }, { "epoch": 5.7633768101057825, "grad_norm": 0.045452944934368134, "learning_rate": 0.01, "loss": 1.9802, "step": 56118 }, { "epoch": 5.763684913217624, "grad_norm": 0.06332234293222427, "learning_rate": 0.01, "loss": 1.9977, "step": 56121 }, { "epoch": 5.763993016329465, "grad_norm": 0.0571395680308342, "learning_rate": 0.01, "loss": 1.9647, "step": 56124 }, { "epoch": 5.764301119441306, "grad_norm": 0.057966724038124084, "learning_rate": 0.01, "loss": 1.983, "step": 56127 }, { "epoch": 5.764609222553148, "grad_norm": 0.03700026124715805, "learning_rate": 0.01, "loss": 1.9724, "step": 56130 }, { "epoch": 5.764917325664989, "grad_norm": 0.06366675347089767, "learning_rate": 0.01, "loss": 2.0134, "step": 56133 }, { "epoch": 5.765225428776831, "grad_norm": 0.12095164507627487, "learning_rate": 0.01, "loss": 1.9885, "step": 56136 }, { "epoch": 5.765533531888672, "grad_norm": 0.05511580780148506, "learning_rate": 0.01, "loss": 2.0004, "step": 56139 }, { "epoch": 5.7658416350005135, "grad_norm": 0.06662758439779282, "learning_rate": 0.01, "loss": 2.0039, "step": 56142 }, { "epoch": 5.766149738112355, "grad_norm": 0.05345858260989189, "learning_rate": 0.01, "loss": 2.0104, "step": 56145 }, { "epoch": 5.766457841224196, "grad_norm": 0.05073786526918411, "learning_rate": 0.01, "loss": 1.9908, "step": 56148 }, { "epoch": 5.766765944336038, "grad_norm": 0.041545137763023376, "learning_rate": 0.01, "loss": 1.9773, "step": 56151 }, { "epoch": 5.767074047447879, "grad_norm": 0.058929163962602615, "learning_rate": 0.01, "loss": 1.9695, "step": 56154 }, { "epoch": 5.767382150559721, "grad_norm": 0.0575728714466095, "learning_rate": 0.01, "loss": 1.993, "step": 56157 }, { "epoch": 5.767690253671562, "grad_norm": 0.05252945050597191, "learning_rate": 0.01, "loss": 1.9974, "step": 56160 }, { "epoch": 5.767998356783403, "grad_norm": 0.09407012909650803, "learning_rate": 0.01, "loss": 1.975, "step": 56163 }, { "epoch": 5.7683064598952445, "grad_norm": 0.07041514664888382, "learning_rate": 0.01, "loss": 1.9804, "step": 56166 }, { "epoch": 5.768614563007087, "grad_norm": 0.0976720005273819, "learning_rate": 0.01, "loss": 2.0004, "step": 56169 }, { "epoch": 5.768922666118928, "grad_norm": 0.06705368310213089, "learning_rate": 0.01, "loss": 2.0022, "step": 56172 }, { "epoch": 5.769230769230769, "grad_norm": 0.044996488839387894, "learning_rate": 0.01, "loss": 1.9837, "step": 56175 }, { "epoch": 5.76953887234261, "grad_norm": 0.05325739085674286, "learning_rate": 0.01, "loss": 2.0039, "step": 56178 }, { "epoch": 5.769846975454453, "grad_norm": 0.06506424397230148, "learning_rate": 0.01, "loss": 1.9852, "step": 56181 }, { "epoch": 5.770155078566294, "grad_norm": 0.05719895660877228, "learning_rate": 0.01, "loss": 1.9644, "step": 56184 }, { "epoch": 5.770463181678135, "grad_norm": 0.057186469435691833, "learning_rate": 0.01, "loss": 1.9881, "step": 56187 }, { "epoch": 5.770771284789976, "grad_norm": 0.06783724576234818, "learning_rate": 0.01, "loss": 2.0129, "step": 56190 }, { "epoch": 5.771079387901818, "grad_norm": 0.10789873450994492, "learning_rate": 0.01, "loss": 2.0168, "step": 56193 }, { "epoch": 5.771387491013659, "grad_norm": 0.07688331604003906, "learning_rate": 0.01, "loss": 1.9821, "step": 56196 }, { "epoch": 5.771695594125501, "grad_norm": 0.056692346930503845, "learning_rate": 0.01, "loss": 2.0001, "step": 56199 }, { "epoch": 5.772003697237342, "grad_norm": 0.04593949392437935, "learning_rate": 0.01, "loss": 2.0237, "step": 56202 }, { "epoch": 5.772311800349184, "grad_norm": 0.050233110785484314, "learning_rate": 0.01, "loss": 1.9874, "step": 56205 }, { "epoch": 5.772619903461025, "grad_norm": 0.049515530467033386, "learning_rate": 0.01, "loss": 2.0098, "step": 56208 }, { "epoch": 5.772928006572866, "grad_norm": 0.10692589730024338, "learning_rate": 0.01, "loss": 1.9865, "step": 56211 }, { "epoch": 5.773236109684708, "grad_norm": 0.054260801523923874, "learning_rate": 0.01, "loss": 2.0194, "step": 56214 }, { "epoch": 5.7735442127965495, "grad_norm": 0.042156677693128586, "learning_rate": 0.01, "loss": 1.983, "step": 56217 }, { "epoch": 5.773852315908391, "grad_norm": 0.06891094893217087, "learning_rate": 0.01, "loss": 1.981, "step": 56220 }, { "epoch": 5.774160419020232, "grad_norm": 0.09000008553266525, "learning_rate": 0.01, "loss": 1.9868, "step": 56223 }, { "epoch": 5.774468522132073, "grad_norm": 0.051672134548425674, "learning_rate": 0.01, "loss": 1.9953, "step": 56226 }, { "epoch": 5.7747766252439146, "grad_norm": 0.04013100638985634, "learning_rate": 0.01, "loss": 2.0094, "step": 56229 }, { "epoch": 5.775084728355757, "grad_norm": 0.051951587200164795, "learning_rate": 0.01, "loss": 1.9719, "step": 56232 }, { "epoch": 5.775392831467598, "grad_norm": 0.038350678980350494, "learning_rate": 0.01, "loss": 1.9587, "step": 56235 }, { "epoch": 5.775700934579439, "grad_norm": 0.10026026517152786, "learning_rate": 0.01, "loss": 1.9906, "step": 56238 }, { "epoch": 5.7760090376912805, "grad_norm": 0.07415781170129776, "learning_rate": 0.01, "loss": 1.9795, "step": 56241 }, { "epoch": 5.776317140803122, "grad_norm": 0.04173508659005165, "learning_rate": 0.01, "loss": 2.0075, "step": 56244 }, { "epoch": 5.776625243914964, "grad_norm": 0.1886957734823227, "learning_rate": 0.01, "loss": 1.9861, "step": 56247 }, { "epoch": 5.776933347026805, "grad_norm": 0.13357476890087128, "learning_rate": 0.01, "loss": 1.9562, "step": 56250 }, { "epoch": 5.777241450138646, "grad_norm": 0.08402860164642334, "learning_rate": 0.01, "loss": 1.9802, "step": 56253 }, { "epoch": 5.777549553250488, "grad_norm": 0.08017169684171677, "learning_rate": 0.01, "loss": 1.9688, "step": 56256 }, { "epoch": 5.777857656362329, "grad_norm": 0.08612015098333359, "learning_rate": 0.01, "loss": 2.0009, "step": 56259 }, { "epoch": 5.77816575947417, "grad_norm": 0.06540670245885849, "learning_rate": 0.01, "loss": 1.9948, "step": 56262 }, { "epoch": 5.778473862586012, "grad_norm": 0.03119773231446743, "learning_rate": 0.01, "loss": 1.9967, "step": 56265 }, { "epoch": 5.778781965697854, "grad_norm": 0.12924209237098694, "learning_rate": 0.01, "loss": 2.0022, "step": 56268 }, { "epoch": 5.779090068809695, "grad_norm": 0.07823926955461502, "learning_rate": 0.01, "loss": 1.9822, "step": 56271 }, { "epoch": 5.779398171921536, "grad_norm": 0.05518370866775513, "learning_rate": 0.01, "loss": 1.9894, "step": 56274 }, { "epoch": 5.779706275033378, "grad_norm": 0.041604407131671906, "learning_rate": 0.01, "loss": 1.9931, "step": 56277 }, { "epoch": 5.78001437814522, "grad_norm": 0.05646267905831337, "learning_rate": 0.01, "loss": 1.9798, "step": 56280 }, { "epoch": 5.780322481257061, "grad_norm": 0.051092468202114105, "learning_rate": 0.01, "loss": 2.0014, "step": 56283 }, { "epoch": 5.780630584368902, "grad_norm": 0.05034413933753967, "learning_rate": 0.01, "loss": 1.9559, "step": 56286 }, { "epoch": 5.780938687480743, "grad_norm": 0.03945804387331009, "learning_rate": 0.01, "loss": 1.9894, "step": 56289 }, { "epoch": 5.781246790592585, "grad_norm": 0.046318311244249344, "learning_rate": 0.01, "loss": 1.9839, "step": 56292 }, { "epoch": 5.781554893704427, "grad_norm": 0.0614444762468338, "learning_rate": 0.01, "loss": 2.0039, "step": 56295 }, { "epoch": 5.781862996816268, "grad_norm": 0.06555134057998657, "learning_rate": 0.01, "loss": 1.9804, "step": 56298 }, { "epoch": 5.782171099928109, "grad_norm": 0.08905301988124847, "learning_rate": 0.01, "loss": 1.9749, "step": 56301 }, { "epoch": 5.7824792030399506, "grad_norm": 0.06253067404031754, "learning_rate": 0.01, "loss": 1.9804, "step": 56304 }, { "epoch": 5.782787306151792, "grad_norm": 0.05287610739469528, "learning_rate": 0.01, "loss": 2.0049, "step": 56307 }, { "epoch": 5.783095409263634, "grad_norm": 0.11111927032470703, "learning_rate": 0.01, "loss": 2.0164, "step": 56310 }, { "epoch": 5.783403512375475, "grad_norm": 0.05433667451143265, "learning_rate": 0.01, "loss": 2.0039, "step": 56313 }, { "epoch": 5.7837116154873165, "grad_norm": 0.03661932423710823, "learning_rate": 0.01, "loss": 2.0006, "step": 56316 }, { "epoch": 5.784019718599158, "grad_norm": 0.06176665052771568, "learning_rate": 0.01, "loss": 1.9975, "step": 56319 }, { "epoch": 5.784327821710999, "grad_norm": 0.05721355602145195, "learning_rate": 0.01, "loss": 1.9852, "step": 56322 }, { "epoch": 5.78463592482284, "grad_norm": 0.043899018317461014, "learning_rate": 0.01, "loss": 1.9896, "step": 56325 }, { "epoch": 5.784944027934682, "grad_norm": 0.17083556950092316, "learning_rate": 0.01, "loss": 1.9842, "step": 56328 }, { "epoch": 5.785252131046524, "grad_norm": 0.12060192972421646, "learning_rate": 0.01, "loss": 1.9777, "step": 56331 }, { "epoch": 5.785560234158365, "grad_norm": 0.1456676870584488, "learning_rate": 0.01, "loss": 2.0144, "step": 56334 }, { "epoch": 5.785868337270206, "grad_norm": 0.08020723611116409, "learning_rate": 0.01, "loss": 1.9992, "step": 56337 }, { "epoch": 5.7861764403820475, "grad_norm": 0.058028947561979294, "learning_rate": 0.01, "loss": 1.9873, "step": 56340 }, { "epoch": 5.78648454349389, "grad_norm": 0.031094344332814217, "learning_rate": 0.01, "loss": 1.9892, "step": 56343 }, { "epoch": 5.786792646605731, "grad_norm": 0.03784453868865967, "learning_rate": 0.01, "loss": 2.0126, "step": 56346 }, { "epoch": 5.787100749717572, "grad_norm": 0.03983352333307266, "learning_rate": 0.01, "loss": 1.9971, "step": 56349 }, { "epoch": 5.787408852829413, "grad_norm": 0.048995792865753174, "learning_rate": 0.01, "loss": 2.0012, "step": 56352 }, { "epoch": 5.787716955941255, "grad_norm": 0.040493566542863846, "learning_rate": 0.01, "loss": 1.9774, "step": 56355 }, { "epoch": 5.788025059053096, "grad_norm": 0.04488976299762726, "learning_rate": 0.01, "loss": 1.9907, "step": 56358 }, { "epoch": 5.788333162164938, "grad_norm": 0.1611676812171936, "learning_rate": 0.01, "loss": 2.0129, "step": 56361 }, { "epoch": 5.788641265276779, "grad_norm": 0.079713374376297, "learning_rate": 0.01, "loss": 1.9736, "step": 56364 }, { "epoch": 5.788949368388621, "grad_norm": 0.06308376044034958, "learning_rate": 0.01, "loss": 1.9918, "step": 56367 }, { "epoch": 5.789257471500462, "grad_norm": 0.049073606729507446, "learning_rate": 0.01, "loss": 1.9981, "step": 56370 }, { "epoch": 5.789565574612304, "grad_norm": 0.06402777135372162, "learning_rate": 0.01, "loss": 1.9927, "step": 56373 }, { "epoch": 5.789873677724145, "grad_norm": 0.05663265287876129, "learning_rate": 0.01, "loss": 1.9938, "step": 56376 }, { "epoch": 5.790181780835987, "grad_norm": 0.05829964950680733, "learning_rate": 0.01, "loss": 2.0055, "step": 56379 }, { "epoch": 5.790489883947828, "grad_norm": 0.05013945326209068, "learning_rate": 0.01, "loss": 1.9722, "step": 56382 }, { "epoch": 5.790797987059669, "grad_norm": 0.04025455191731453, "learning_rate": 0.01, "loss": 1.9842, "step": 56385 }, { "epoch": 5.79110609017151, "grad_norm": 0.05371120572090149, "learning_rate": 0.01, "loss": 1.9788, "step": 56388 }, { "epoch": 5.7914141932833525, "grad_norm": 0.03760494664311409, "learning_rate": 0.01, "loss": 1.9658, "step": 56391 }, { "epoch": 5.791722296395194, "grad_norm": 0.05783433839678764, "learning_rate": 0.01, "loss": 2.0043, "step": 56394 }, { "epoch": 5.792030399507035, "grad_norm": 0.04827668145298958, "learning_rate": 0.01, "loss": 1.9705, "step": 56397 }, { "epoch": 5.792338502618876, "grad_norm": 0.055127400904893875, "learning_rate": 0.01, "loss": 1.9976, "step": 56400 }, { "epoch": 5.7926466057307175, "grad_norm": 0.07991458475589752, "learning_rate": 0.01, "loss": 1.9551, "step": 56403 }, { "epoch": 5.79295470884256, "grad_norm": 0.06287828832864761, "learning_rate": 0.01, "loss": 1.9848, "step": 56406 }, { "epoch": 5.793262811954401, "grad_norm": 0.07629155367612839, "learning_rate": 0.01, "loss": 2.0004, "step": 56409 }, { "epoch": 5.793570915066242, "grad_norm": 0.10665415227413177, "learning_rate": 0.01, "loss": 1.9869, "step": 56412 }, { "epoch": 5.7938790181780835, "grad_norm": 0.048113856464624405, "learning_rate": 0.01, "loss": 2.0162, "step": 56415 }, { "epoch": 5.794187121289925, "grad_norm": 0.12309219688177109, "learning_rate": 0.01, "loss": 1.9723, "step": 56418 }, { "epoch": 5.794495224401766, "grad_norm": 0.08479398488998413, "learning_rate": 0.01, "loss": 1.9979, "step": 56421 }, { "epoch": 5.794803327513608, "grad_norm": 0.06182289868593216, "learning_rate": 0.01, "loss": 2.0007, "step": 56424 }, { "epoch": 5.795111430625449, "grad_norm": 0.042767733335494995, "learning_rate": 0.01, "loss": 2.0061, "step": 56427 }, { "epoch": 5.795419533737291, "grad_norm": 0.07279690355062485, "learning_rate": 0.01, "loss": 2.0027, "step": 56430 }, { "epoch": 5.795727636849132, "grad_norm": 0.05660317465662956, "learning_rate": 0.01, "loss": 1.9924, "step": 56433 }, { "epoch": 5.796035739960974, "grad_norm": 0.048105914145708084, "learning_rate": 0.01, "loss": 1.9722, "step": 56436 }, { "epoch": 5.796343843072815, "grad_norm": 0.09529261291027069, "learning_rate": 0.01, "loss": 1.9771, "step": 56439 }, { "epoch": 5.796651946184657, "grad_norm": 0.09696771949529648, "learning_rate": 0.01, "loss": 2.0022, "step": 56442 }, { "epoch": 5.796960049296498, "grad_norm": 0.10225138068199158, "learning_rate": 0.01, "loss": 2.009, "step": 56445 }, { "epoch": 5.797268152408339, "grad_norm": 0.09067768603563309, "learning_rate": 0.01, "loss": 2.0002, "step": 56448 }, { "epoch": 5.79757625552018, "grad_norm": 0.03427765890955925, "learning_rate": 0.01, "loss": 2.0128, "step": 56451 }, { "epoch": 5.797884358632023, "grad_norm": 0.03189976140856743, "learning_rate": 0.01, "loss": 1.9747, "step": 56454 }, { "epoch": 5.798192461743864, "grad_norm": 0.04521431401371956, "learning_rate": 0.01, "loss": 1.9947, "step": 56457 }, { "epoch": 5.798500564855705, "grad_norm": 0.05165833979845047, "learning_rate": 0.01, "loss": 2.0026, "step": 56460 }, { "epoch": 5.798808667967546, "grad_norm": 0.051866378635168076, "learning_rate": 0.01, "loss": 2.0151, "step": 56463 }, { "epoch": 5.799116771079388, "grad_norm": 0.03956654295325279, "learning_rate": 0.01, "loss": 1.9971, "step": 56466 }, { "epoch": 5.79942487419123, "grad_norm": 0.04093864560127258, "learning_rate": 0.01, "loss": 1.9768, "step": 56469 }, { "epoch": 5.799732977303071, "grad_norm": 0.05415187031030655, "learning_rate": 0.01, "loss": 1.9907, "step": 56472 }, { "epoch": 5.800041080414912, "grad_norm": 0.057444531470537186, "learning_rate": 0.01, "loss": 2.0053, "step": 56475 }, { "epoch": 5.8003491835267535, "grad_norm": 0.08681999891996384, "learning_rate": 0.01, "loss": 1.9861, "step": 56478 }, { "epoch": 5.800657286638595, "grad_norm": 0.08110462129116058, "learning_rate": 0.01, "loss": 1.9969, "step": 56481 }, { "epoch": 5.800965389750436, "grad_norm": 0.04393799602985382, "learning_rate": 0.01, "loss": 1.9871, "step": 56484 }, { "epoch": 5.801273492862278, "grad_norm": 0.05482124909758568, "learning_rate": 0.01, "loss": 1.9971, "step": 56487 }, { "epoch": 5.8015815959741195, "grad_norm": 0.11982918530702591, "learning_rate": 0.01, "loss": 2.0002, "step": 56490 }, { "epoch": 5.801889699085961, "grad_norm": 0.10209926217794418, "learning_rate": 0.01, "loss": 1.9897, "step": 56493 }, { "epoch": 5.802197802197802, "grad_norm": 0.0397653728723526, "learning_rate": 0.01, "loss": 1.9938, "step": 56496 }, { "epoch": 5.802505905309643, "grad_norm": 0.09022632986307144, "learning_rate": 0.01, "loss": 1.9985, "step": 56499 }, { "epoch": 5.802814008421485, "grad_norm": 0.07979937642812729, "learning_rate": 0.01, "loss": 1.9855, "step": 56502 }, { "epoch": 5.803122111533327, "grad_norm": 0.05430705472826958, "learning_rate": 0.01, "loss": 1.9831, "step": 56505 }, { "epoch": 5.803430214645168, "grad_norm": 0.05586136132478714, "learning_rate": 0.01, "loss": 2.0002, "step": 56508 }, { "epoch": 5.803738317757009, "grad_norm": 0.05488377809524536, "learning_rate": 0.01, "loss": 1.9996, "step": 56511 }, { "epoch": 5.8040464208688505, "grad_norm": 0.04581581428647041, "learning_rate": 0.01, "loss": 1.9911, "step": 56514 }, { "epoch": 5.804354523980692, "grad_norm": 0.11193082481622696, "learning_rate": 0.01, "loss": 2.0275, "step": 56517 }, { "epoch": 5.804662627092534, "grad_norm": 0.0932486429810524, "learning_rate": 0.01, "loss": 2.0051, "step": 56520 }, { "epoch": 5.804970730204375, "grad_norm": 0.079062819480896, "learning_rate": 0.01, "loss": 2.0094, "step": 56523 }, { "epoch": 5.805278833316216, "grad_norm": 0.04556051269173622, "learning_rate": 0.01, "loss": 1.9906, "step": 56526 }, { "epoch": 5.805586936428058, "grad_norm": 0.04513680562376976, "learning_rate": 0.01, "loss": 1.9765, "step": 56529 }, { "epoch": 5.8058950395399, "grad_norm": 0.06065935268998146, "learning_rate": 0.01, "loss": 1.985, "step": 56532 }, { "epoch": 5.806203142651741, "grad_norm": 0.03341379761695862, "learning_rate": 0.01, "loss": 1.9817, "step": 56535 }, { "epoch": 5.806511245763582, "grad_norm": 0.13032658398151398, "learning_rate": 0.01, "loss": 1.9805, "step": 56538 }, { "epoch": 5.806819348875424, "grad_norm": 0.049995262175798416, "learning_rate": 0.01, "loss": 1.998, "step": 56541 }, { "epoch": 5.807127451987265, "grad_norm": 0.10843599587678909, "learning_rate": 0.01, "loss": 1.9833, "step": 56544 }, { "epoch": 5.807435555099106, "grad_norm": 0.12207616865634918, "learning_rate": 0.01, "loss": 2.0046, "step": 56547 }, { "epoch": 5.807743658210948, "grad_norm": 0.18218643963336945, "learning_rate": 0.01, "loss": 1.9768, "step": 56550 }, { "epoch": 5.8080517613227896, "grad_norm": 0.1526477336883545, "learning_rate": 0.01, "loss": 2.0081, "step": 56553 }, { "epoch": 5.808359864434631, "grad_norm": 0.08327028155326843, "learning_rate": 0.01, "loss": 1.971, "step": 56556 }, { "epoch": 5.808667967546472, "grad_norm": 0.05534267798066139, "learning_rate": 0.01, "loss": 1.9766, "step": 56559 }, { "epoch": 5.808976070658313, "grad_norm": 0.04754161089658737, "learning_rate": 0.01, "loss": 1.964, "step": 56562 }, { "epoch": 5.8092841737701555, "grad_norm": 0.039216890931129456, "learning_rate": 0.01, "loss": 2.0034, "step": 56565 }, { "epoch": 5.809592276881997, "grad_norm": 0.05179159343242645, "learning_rate": 0.01, "loss": 1.9925, "step": 56568 }, { "epoch": 5.809900379993838, "grad_norm": 0.03787977248430252, "learning_rate": 0.01, "loss": 1.9959, "step": 56571 }, { "epoch": 5.810208483105679, "grad_norm": 0.06589755415916443, "learning_rate": 0.01, "loss": 1.9944, "step": 56574 }, { "epoch": 5.8105165862175205, "grad_norm": 0.04956568777561188, "learning_rate": 0.01, "loss": 1.9836, "step": 56577 }, { "epoch": 5.810824689329362, "grad_norm": 0.039915554225444794, "learning_rate": 0.01, "loss": 2.0168, "step": 56580 }, { "epoch": 5.811132792441204, "grad_norm": 0.10185667872428894, "learning_rate": 0.01, "loss": 1.991, "step": 56583 }, { "epoch": 5.811440895553045, "grad_norm": 0.048244234174489975, "learning_rate": 0.01, "loss": 2.007, "step": 56586 }, { "epoch": 5.8117489986648865, "grad_norm": 0.07163000851869583, "learning_rate": 0.01, "loss": 2.0091, "step": 56589 }, { "epoch": 5.812057101776728, "grad_norm": 0.04289057105779648, "learning_rate": 0.01, "loss": 2.0326, "step": 56592 }, { "epoch": 5.812365204888569, "grad_norm": 0.06520239263772964, "learning_rate": 0.01, "loss": 2.0223, "step": 56595 }, { "epoch": 5.812673308000411, "grad_norm": 0.038391079753637314, "learning_rate": 0.01, "loss": 2.0037, "step": 56598 }, { "epoch": 5.812981411112252, "grad_norm": 0.039008256047964096, "learning_rate": 0.01, "loss": 1.9904, "step": 56601 }, { "epoch": 5.813289514224094, "grad_norm": 0.1321951448917389, "learning_rate": 0.01, "loss": 1.9769, "step": 56604 }, { "epoch": 5.813597617335935, "grad_norm": 0.058711450546979904, "learning_rate": 0.01, "loss": 1.9986, "step": 56607 }, { "epoch": 5.813905720447776, "grad_norm": 0.10861786454916, "learning_rate": 0.01, "loss": 1.9912, "step": 56610 }, { "epoch": 5.8142138235596175, "grad_norm": 0.09143928438425064, "learning_rate": 0.01, "loss": 1.9946, "step": 56613 }, { "epoch": 5.81452192667146, "grad_norm": 0.051334600895643234, "learning_rate": 0.01, "loss": 2.007, "step": 56616 }, { "epoch": 5.814830029783301, "grad_norm": 0.0675191655755043, "learning_rate": 0.01, "loss": 1.9805, "step": 56619 }, { "epoch": 5.815138132895142, "grad_norm": 0.038013167679309845, "learning_rate": 0.01, "loss": 1.9922, "step": 56622 }, { "epoch": 5.815446236006983, "grad_norm": 0.043695393949747086, "learning_rate": 0.01, "loss": 1.9982, "step": 56625 }, { "epoch": 5.8157543391188256, "grad_norm": 0.034250471740961075, "learning_rate": 0.01, "loss": 1.9587, "step": 56628 }, { "epoch": 5.816062442230667, "grad_norm": 0.11236503720283508, "learning_rate": 0.01, "loss": 1.9828, "step": 56631 }, { "epoch": 5.816370545342508, "grad_norm": 0.053564272820949554, "learning_rate": 0.01, "loss": 1.9897, "step": 56634 }, { "epoch": 5.816678648454349, "grad_norm": 0.08416426926851273, "learning_rate": 0.01, "loss": 1.9997, "step": 56637 }, { "epoch": 5.816986751566191, "grad_norm": 0.047465983778238297, "learning_rate": 0.01, "loss": 1.9813, "step": 56640 }, { "epoch": 5.817294854678032, "grad_norm": 0.12391756474971771, "learning_rate": 0.01, "loss": 1.997, "step": 56643 }, { "epoch": 5.817602957789874, "grad_norm": 0.12705251574516296, "learning_rate": 0.01, "loss": 2.0021, "step": 56646 }, { "epoch": 5.817911060901715, "grad_norm": 0.09919688105583191, "learning_rate": 0.01, "loss": 1.9895, "step": 56649 }, { "epoch": 5.8182191640135565, "grad_norm": 0.08405104279518127, "learning_rate": 0.01, "loss": 2.016, "step": 56652 }, { "epoch": 5.818527267125398, "grad_norm": 0.04591039568185806, "learning_rate": 0.01, "loss": 1.9716, "step": 56655 }, { "epoch": 5.818835370237239, "grad_norm": 0.04161922633647919, "learning_rate": 0.01, "loss": 1.9985, "step": 56658 }, { "epoch": 5.819143473349081, "grad_norm": 0.05382911115884781, "learning_rate": 0.01, "loss": 1.9799, "step": 56661 }, { "epoch": 5.8194515764609225, "grad_norm": 0.05096543952822685, "learning_rate": 0.01, "loss": 1.993, "step": 56664 }, { "epoch": 5.819759679572764, "grad_norm": 0.03849412500858307, "learning_rate": 0.01, "loss": 1.9875, "step": 56667 }, { "epoch": 5.820067782684605, "grad_norm": 0.04661410301923752, "learning_rate": 0.01, "loss": 1.9914, "step": 56670 }, { "epoch": 5.820375885796446, "grad_norm": 0.11310985684394836, "learning_rate": 0.01, "loss": 1.9887, "step": 56673 }, { "epoch": 5.8206839889082875, "grad_norm": 0.04283592104911804, "learning_rate": 0.01, "loss": 1.9839, "step": 56676 }, { "epoch": 5.82099209202013, "grad_norm": 0.09948618710041046, "learning_rate": 0.01, "loss": 2.0009, "step": 56679 }, { "epoch": 5.821300195131971, "grad_norm": 0.07223562151193619, "learning_rate": 0.01, "loss": 1.9975, "step": 56682 }, { "epoch": 5.821608298243812, "grad_norm": 0.067604660987854, "learning_rate": 0.01, "loss": 2.0109, "step": 56685 }, { "epoch": 5.8219164013556535, "grad_norm": 0.03564516827464104, "learning_rate": 0.01, "loss": 1.9924, "step": 56688 }, { "epoch": 5.822224504467496, "grad_norm": 0.046253006905317307, "learning_rate": 0.01, "loss": 1.9848, "step": 56691 }, { "epoch": 5.822532607579337, "grad_norm": 0.05620354413986206, "learning_rate": 0.01, "loss": 1.98, "step": 56694 }, { "epoch": 5.822840710691178, "grad_norm": 0.110667385160923, "learning_rate": 0.01, "loss": 1.9915, "step": 56697 }, { "epoch": 5.823148813803019, "grad_norm": 0.05062146484851837, "learning_rate": 0.01, "loss": 1.9848, "step": 56700 }, { "epoch": 5.823456916914861, "grad_norm": 0.10704591870307922, "learning_rate": 0.01, "loss": 1.9962, "step": 56703 }, { "epoch": 5.823765020026702, "grad_norm": 0.05372390151023865, "learning_rate": 0.01, "loss": 1.9775, "step": 56706 }, { "epoch": 5.824073123138544, "grad_norm": 0.05827971547842026, "learning_rate": 0.01, "loss": 1.9922, "step": 56709 }, { "epoch": 5.824381226250385, "grad_norm": 0.04202426224946976, "learning_rate": 0.01, "loss": 1.9506, "step": 56712 }, { "epoch": 5.824689329362227, "grad_norm": 0.08493483811616898, "learning_rate": 0.01, "loss": 2.0143, "step": 56715 }, { "epoch": 5.824997432474068, "grad_norm": 0.043885089457035065, "learning_rate": 0.01, "loss": 1.978, "step": 56718 }, { "epoch": 5.825305535585909, "grad_norm": 0.04681296646595001, "learning_rate": 0.01, "loss": 1.9664, "step": 56721 }, { "epoch": 5.825613638697751, "grad_norm": 0.04231657460331917, "learning_rate": 0.01, "loss": 1.982, "step": 56724 }, { "epoch": 5.8259217418095925, "grad_norm": 0.04888215288519859, "learning_rate": 0.01, "loss": 1.9802, "step": 56727 }, { "epoch": 5.826229844921434, "grad_norm": 0.0635887086391449, "learning_rate": 0.01, "loss": 2.0027, "step": 56730 }, { "epoch": 5.826537948033275, "grad_norm": 0.056875888258218765, "learning_rate": 0.01, "loss": 2.0209, "step": 56733 }, { "epoch": 5.826846051145116, "grad_norm": 0.05551552772521973, "learning_rate": 0.01, "loss": 1.9837, "step": 56736 }, { "epoch": 5.827154154256958, "grad_norm": 0.10465902090072632, "learning_rate": 0.01, "loss": 1.9638, "step": 56739 }, { "epoch": 5.8274622573688, "grad_norm": 0.08767648041248322, "learning_rate": 0.01, "loss": 1.9868, "step": 56742 }, { "epoch": 5.827770360480641, "grad_norm": 0.04951823502779007, "learning_rate": 0.01, "loss": 2.0045, "step": 56745 }, { "epoch": 5.828078463592482, "grad_norm": 0.04926539584994316, "learning_rate": 0.01, "loss": 1.984, "step": 56748 }, { "epoch": 5.8283865667043235, "grad_norm": 0.05217811465263367, "learning_rate": 0.01, "loss": 1.9942, "step": 56751 }, { "epoch": 5.828694669816165, "grad_norm": 0.0460977777838707, "learning_rate": 0.01, "loss": 2.0027, "step": 56754 }, { "epoch": 5.829002772928007, "grad_norm": 0.046701837331056595, "learning_rate": 0.01, "loss": 2.0079, "step": 56757 }, { "epoch": 5.829310876039848, "grad_norm": 0.07417173683643341, "learning_rate": 0.01, "loss": 1.9739, "step": 56760 }, { "epoch": 5.8296189791516895, "grad_norm": 0.09357844293117523, "learning_rate": 0.01, "loss": 1.9957, "step": 56763 }, { "epoch": 5.829927082263531, "grad_norm": 0.05171707645058632, "learning_rate": 0.01, "loss": 1.9856, "step": 56766 }, { "epoch": 5.830235185375372, "grad_norm": 0.08236930519342422, "learning_rate": 0.01, "loss": 1.9891, "step": 56769 }, { "epoch": 5.830543288487213, "grad_norm": 0.041667647659778595, "learning_rate": 0.01, "loss": 1.9927, "step": 56772 }, { "epoch": 5.830851391599055, "grad_norm": 0.055335305631160736, "learning_rate": 0.01, "loss": 1.9976, "step": 56775 }, { "epoch": 5.831159494710897, "grad_norm": 0.03667658939957619, "learning_rate": 0.01, "loss": 1.9859, "step": 56778 }, { "epoch": 5.831467597822738, "grad_norm": 0.05994969233870506, "learning_rate": 0.01, "loss": 2.0108, "step": 56781 }, { "epoch": 5.831775700934579, "grad_norm": 0.1395941525697708, "learning_rate": 0.01, "loss": 2.0184, "step": 56784 }, { "epoch": 5.832083804046421, "grad_norm": 0.0712098479270935, "learning_rate": 0.01, "loss": 1.9823, "step": 56787 }, { "epoch": 5.832391907158263, "grad_norm": 0.08360549062490463, "learning_rate": 0.01, "loss": 1.985, "step": 56790 }, { "epoch": 5.832700010270104, "grad_norm": 0.07638189941644669, "learning_rate": 0.01, "loss": 1.9848, "step": 56793 }, { "epoch": 5.833008113381945, "grad_norm": 0.07885719835758209, "learning_rate": 0.01, "loss": 1.9919, "step": 56796 }, { "epoch": 5.833316216493786, "grad_norm": 0.055790308862924576, "learning_rate": 0.01, "loss": 1.9866, "step": 56799 }, { "epoch": 5.833624319605628, "grad_norm": 0.03926543518900871, "learning_rate": 0.01, "loss": 1.9928, "step": 56802 }, { "epoch": 5.83393242271747, "grad_norm": 0.10085275024175644, "learning_rate": 0.01, "loss": 1.9989, "step": 56805 }, { "epoch": 5.834240525829311, "grad_norm": 0.07995092123746872, "learning_rate": 0.01, "loss": 1.9767, "step": 56808 }, { "epoch": 5.834548628941152, "grad_norm": 0.09439895302057266, "learning_rate": 0.01, "loss": 1.9854, "step": 56811 }, { "epoch": 5.834856732052994, "grad_norm": 0.04961158335208893, "learning_rate": 0.01, "loss": 1.9933, "step": 56814 }, { "epoch": 5.835164835164835, "grad_norm": 0.052938688546419144, "learning_rate": 0.01, "loss": 1.9686, "step": 56817 }, { "epoch": 5.835472938276677, "grad_norm": 0.04960361868143082, "learning_rate": 0.01, "loss": 1.9919, "step": 56820 }, { "epoch": 5.835781041388518, "grad_norm": 0.08586288243532181, "learning_rate": 0.01, "loss": 2.0012, "step": 56823 }, { "epoch": 5.8360891445003595, "grad_norm": 0.07194332033395767, "learning_rate": 0.01, "loss": 2.01, "step": 56826 }, { "epoch": 5.836397247612201, "grad_norm": 0.062126416712999344, "learning_rate": 0.01, "loss": 1.9826, "step": 56829 }, { "epoch": 5.836705350724042, "grad_norm": 0.06656119972467422, "learning_rate": 0.01, "loss": 2.0029, "step": 56832 }, { "epoch": 5.837013453835883, "grad_norm": 0.0884518176317215, "learning_rate": 0.01, "loss": 1.9983, "step": 56835 }, { "epoch": 5.8373215569477255, "grad_norm": 0.04115528613328934, "learning_rate": 0.01, "loss": 1.9905, "step": 56838 }, { "epoch": 5.837629660059567, "grad_norm": 0.05185645818710327, "learning_rate": 0.01, "loss": 2.0076, "step": 56841 }, { "epoch": 5.837937763171408, "grad_norm": 0.11587214469909668, "learning_rate": 0.01, "loss": 1.9973, "step": 56844 }, { "epoch": 5.838245866283249, "grad_norm": 0.13516643643379211, "learning_rate": 0.01, "loss": 2.0027, "step": 56847 }, { "epoch": 5.8385539693950905, "grad_norm": 0.0748513862490654, "learning_rate": 0.01, "loss": 1.9723, "step": 56850 }, { "epoch": 5.838862072506933, "grad_norm": 0.12254352867603302, "learning_rate": 0.01, "loss": 1.9822, "step": 56853 }, { "epoch": 5.839170175618774, "grad_norm": 0.07876627147197723, "learning_rate": 0.01, "loss": 2.0085, "step": 56856 }, { "epoch": 5.839478278730615, "grad_norm": 0.07364428788423538, "learning_rate": 0.01, "loss": 1.9794, "step": 56859 }, { "epoch": 5.8397863818424565, "grad_norm": 0.0350789949297905, "learning_rate": 0.01, "loss": 1.9687, "step": 56862 }, { "epoch": 5.840094484954298, "grad_norm": 0.046593520790338516, "learning_rate": 0.01, "loss": 2.0026, "step": 56865 }, { "epoch": 5.840402588066139, "grad_norm": 0.05111907422542572, "learning_rate": 0.01, "loss": 1.9659, "step": 56868 }, { "epoch": 5.840710691177981, "grad_norm": 0.046573907136917114, "learning_rate": 0.01, "loss": 2.0105, "step": 56871 }, { "epoch": 5.841018794289822, "grad_norm": 0.05139172077178955, "learning_rate": 0.01, "loss": 1.9674, "step": 56874 }, { "epoch": 5.841326897401664, "grad_norm": 0.06465848535299301, "learning_rate": 0.01, "loss": 1.994, "step": 56877 }, { "epoch": 5.841635000513505, "grad_norm": 0.046135857701301575, "learning_rate": 0.01, "loss": 1.9875, "step": 56880 }, { "epoch": 5.841943103625347, "grad_norm": 0.039065003395080566, "learning_rate": 0.01, "loss": 1.9676, "step": 56883 }, { "epoch": 5.842251206737188, "grad_norm": 0.05646162107586861, "learning_rate": 0.01, "loss": 1.9904, "step": 56886 }, { "epoch": 5.84255930984903, "grad_norm": 0.1448718160390854, "learning_rate": 0.01, "loss": 2.0192, "step": 56889 }, { "epoch": 5.842867412960871, "grad_norm": 0.12302438169717789, "learning_rate": 0.01, "loss": 2.0024, "step": 56892 }, { "epoch": 5.843175516072712, "grad_norm": 0.0701448991894722, "learning_rate": 0.01, "loss": 2.0049, "step": 56895 }, { "epoch": 5.843483619184553, "grad_norm": 0.08622895181179047, "learning_rate": 0.01, "loss": 1.9951, "step": 56898 }, { "epoch": 5.8437917222963955, "grad_norm": 0.04280500486493111, "learning_rate": 0.01, "loss": 1.954, "step": 56901 }, { "epoch": 5.844099825408237, "grad_norm": 0.04177187755703926, "learning_rate": 0.01, "loss": 1.9822, "step": 56904 }, { "epoch": 5.844407928520078, "grad_norm": 0.04321052134037018, "learning_rate": 0.01, "loss": 1.9818, "step": 56907 }, { "epoch": 5.844716031631919, "grad_norm": 0.10391934216022491, "learning_rate": 0.01, "loss": 1.9831, "step": 56910 }, { "epoch": 5.845024134743761, "grad_norm": 0.07634170353412628, "learning_rate": 0.01, "loss": 1.9638, "step": 56913 }, { "epoch": 5.845332237855603, "grad_norm": 0.05854891613125801, "learning_rate": 0.01, "loss": 1.9675, "step": 56916 }, { "epoch": 5.845640340967444, "grad_norm": 0.08058078587055206, "learning_rate": 0.01, "loss": 2.0034, "step": 56919 }, { "epoch": 5.845948444079285, "grad_norm": 0.10716883093118668, "learning_rate": 0.01, "loss": 1.9883, "step": 56922 }, { "epoch": 5.8462565471911265, "grad_norm": 0.1634785681962967, "learning_rate": 0.01, "loss": 1.9998, "step": 56925 }, { "epoch": 5.846564650302968, "grad_norm": 0.08859071135520935, "learning_rate": 0.01, "loss": 1.9666, "step": 56928 }, { "epoch": 5.846872753414809, "grad_norm": 0.05146167427301407, "learning_rate": 0.01, "loss": 1.997, "step": 56931 }, { "epoch": 5.847180856526651, "grad_norm": 0.057162970304489136, "learning_rate": 0.01, "loss": 2.0044, "step": 56934 }, { "epoch": 5.8474889596384925, "grad_norm": 0.042328424751758575, "learning_rate": 0.01, "loss": 2.005, "step": 56937 }, { "epoch": 5.847797062750334, "grad_norm": 0.05657609552145004, "learning_rate": 0.01, "loss": 2.0205, "step": 56940 }, { "epoch": 5.848105165862175, "grad_norm": 0.11977431923151016, "learning_rate": 0.01, "loss": 2.007, "step": 56943 }, { "epoch": 5.848413268974016, "grad_norm": 0.13686580955982208, "learning_rate": 0.01, "loss": 2.0215, "step": 56946 }, { "epoch": 5.848721372085858, "grad_norm": 0.06063876673579216, "learning_rate": 0.01, "loss": 2.0057, "step": 56949 }, { "epoch": 5.8490294751977, "grad_norm": 0.05211206525564194, "learning_rate": 0.01, "loss": 1.9885, "step": 56952 }, { "epoch": 5.849337578309541, "grad_norm": 0.03639880567789078, "learning_rate": 0.01, "loss": 1.9987, "step": 56955 }, { "epoch": 5.849645681421382, "grad_norm": 0.07671289145946503, "learning_rate": 0.01, "loss": 2.0046, "step": 56958 }, { "epoch": 5.8499537845332235, "grad_norm": 0.03963657468557358, "learning_rate": 0.01, "loss": 1.9815, "step": 56961 }, { "epoch": 5.850261887645065, "grad_norm": 0.040136873722076416, "learning_rate": 0.01, "loss": 1.9858, "step": 56964 }, { "epoch": 5.850569990756907, "grad_norm": 0.06622539460659027, "learning_rate": 0.01, "loss": 1.992, "step": 56967 }, { "epoch": 5.850878093868748, "grad_norm": 0.04892749339342117, "learning_rate": 0.01, "loss": 1.9727, "step": 56970 }, { "epoch": 5.851186196980589, "grad_norm": 0.10630463808774948, "learning_rate": 0.01, "loss": 1.9775, "step": 56973 }, { "epoch": 5.851494300092431, "grad_norm": 0.08189810067415237, "learning_rate": 0.01, "loss": 1.9867, "step": 56976 }, { "epoch": 5.851802403204273, "grad_norm": 0.04350016266107559, "learning_rate": 0.01, "loss": 1.984, "step": 56979 }, { "epoch": 5.852110506316114, "grad_norm": 0.04086649417877197, "learning_rate": 0.01, "loss": 1.9975, "step": 56982 }, { "epoch": 5.852418609427955, "grad_norm": 0.0460704006254673, "learning_rate": 0.01, "loss": 2.0029, "step": 56985 }, { "epoch": 5.852726712539797, "grad_norm": 0.04429091140627861, "learning_rate": 0.01, "loss": 1.9772, "step": 56988 }, { "epoch": 5.853034815651638, "grad_norm": 0.08944038301706314, "learning_rate": 0.01, "loss": 1.9889, "step": 56991 }, { "epoch": 5.853342918763479, "grad_norm": 0.039852675050497055, "learning_rate": 0.01, "loss": 1.9906, "step": 56994 }, { "epoch": 5.853651021875321, "grad_norm": 0.051225848495960236, "learning_rate": 0.01, "loss": 1.995, "step": 56997 }, { "epoch": 5.8539591249871625, "grad_norm": 0.07715211063623428, "learning_rate": 0.01, "loss": 1.9747, "step": 57000 }, { "epoch": 5.854267228099004, "grad_norm": 0.05754586309194565, "learning_rate": 0.01, "loss": 1.9835, "step": 57003 }, { "epoch": 5.854575331210845, "grad_norm": 0.0871339812874794, "learning_rate": 0.01, "loss": 1.9903, "step": 57006 }, { "epoch": 5.854883434322686, "grad_norm": 0.03794780746102333, "learning_rate": 0.01, "loss": 1.9883, "step": 57009 }, { "epoch": 5.8551915374345285, "grad_norm": 0.039838340133428574, "learning_rate": 0.01, "loss": 1.9881, "step": 57012 }, { "epoch": 5.85549964054637, "grad_norm": 0.03378939628601074, "learning_rate": 0.01, "loss": 1.9892, "step": 57015 }, { "epoch": 5.855807743658211, "grad_norm": 0.033859383314847946, "learning_rate": 0.01, "loss": 1.9775, "step": 57018 }, { "epoch": 5.856115846770052, "grad_norm": 0.08148138970136642, "learning_rate": 0.01, "loss": 2.0163, "step": 57021 }, { "epoch": 5.8564239498818935, "grad_norm": 0.11949529498815536, "learning_rate": 0.01, "loss": 1.9921, "step": 57024 }, { "epoch": 5.856732052993735, "grad_norm": 0.06349018216133118, "learning_rate": 0.01, "loss": 1.9789, "step": 57027 }, { "epoch": 5.857040156105577, "grad_norm": 0.03922737017273903, "learning_rate": 0.01, "loss": 1.9835, "step": 57030 }, { "epoch": 5.857348259217418, "grad_norm": 0.04704468697309494, "learning_rate": 0.01, "loss": 2.0088, "step": 57033 }, { "epoch": 5.8576563623292595, "grad_norm": 0.06949195265769958, "learning_rate": 0.01, "loss": 2.005, "step": 57036 }, { "epoch": 5.857964465441101, "grad_norm": 0.08585426956415176, "learning_rate": 0.01, "loss": 1.9679, "step": 57039 }, { "epoch": 5.858272568552943, "grad_norm": 0.09531115740537643, "learning_rate": 0.01, "loss": 1.9904, "step": 57042 }, { "epoch": 5.858580671664784, "grad_norm": 0.07545875012874603, "learning_rate": 0.01, "loss": 1.9858, "step": 57045 }, { "epoch": 5.858888774776625, "grad_norm": 0.11838539689779282, "learning_rate": 0.01, "loss": 1.9612, "step": 57048 }, { "epoch": 5.859196877888467, "grad_norm": 0.1177034080028534, "learning_rate": 0.01, "loss": 2.0015, "step": 57051 }, { "epoch": 5.859504981000308, "grad_norm": 0.08327648043632507, "learning_rate": 0.01, "loss": 1.9731, "step": 57054 }, { "epoch": 5.859813084112149, "grad_norm": 0.05028563365340233, "learning_rate": 0.01, "loss": 1.977, "step": 57057 }, { "epoch": 5.860121187223991, "grad_norm": 0.03819550573825836, "learning_rate": 0.01, "loss": 1.9791, "step": 57060 }, { "epoch": 5.860429290335833, "grad_norm": 0.06301325559616089, "learning_rate": 0.01, "loss": 1.9996, "step": 57063 }, { "epoch": 5.860737393447674, "grad_norm": 0.09482091665267944, "learning_rate": 0.01, "loss": 1.9907, "step": 57066 }, { "epoch": 5.861045496559515, "grad_norm": 0.07003244757652283, "learning_rate": 0.01, "loss": 1.9692, "step": 57069 }, { "epoch": 5.861353599671356, "grad_norm": 0.04322872310876846, "learning_rate": 0.01, "loss": 1.96, "step": 57072 }, { "epoch": 5.8616617027831985, "grad_norm": 0.04223179072141647, "learning_rate": 0.01, "loss": 1.9618, "step": 57075 }, { "epoch": 5.86196980589504, "grad_norm": 0.09732475876808167, "learning_rate": 0.01, "loss": 1.9774, "step": 57078 }, { "epoch": 5.862277909006881, "grad_norm": 0.08102111518383026, "learning_rate": 0.01, "loss": 2.0031, "step": 57081 }, { "epoch": 5.862586012118722, "grad_norm": 0.06564348191022873, "learning_rate": 0.01, "loss": 1.989, "step": 57084 }, { "epoch": 5.862894115230564, "grad_norm": 0.11364365369081497, "learning_rate": 0.01, "loss": 1.9736, "step": 57087 }, { "epoch": 5.863202218342405, "grad_norm": 0.05005546286702156, "learning_rate": 0.01, "loss": 1.9899, "step": 57090 }, { "epoch": 5.863510321454247, "grad_norm": 0.10924969613552094, "learning_rate": 0.01, "loss": 2.0024, "step": 57093 }, { "epoch": 5.863818424566088, "grad_norm": 0.06546615809202194, "learning_rate": 0.01, "loss": 2.0178, "step": 57096 }, { "epoch": 5.8641265276779295, "grad_norm": 0.04817098379135132, "learning_rate": 0.01, "loss": 2.0082, "step": 57099 }, { "epoch": 5.864434630789771, "grad_norm": 0.038462646305561066, "learning_rate": 0.01, "loss": 1.9894, "step": 57102 }, { "epoch": 5.864742733901612, "grad_norm": 0.03544708341360092, "learning_rate": 0.01, "loss": 1.9882, "step": 57105 }, { "epoch": 5.865050837013454, "grad_norm": 0.032861895859241486, "learning_rate": 0.01, "loss": 1.9565, "step": 57108 }, { "epoch": 5.8653589401252955, "grad_norm": 0.13162890076637268, "learning_rate": 0.01, "loss": 1.9916, "step": 57111 }, { "epoch": 5.865667043237137, "grad_norm": 0.13489802181720734, "learning_rate": 0.01, "loss": 1.9733, "step": 57114 }, { "epoch": 5.865975146348978, "grad_norm": 0.08409550786018372, "learning_rate": 0.01, "loss": 2.0175, "step": 57117 }, { "epoch": 5.866283249460819, "grad_norm": 0.04150281846523285, "learning_rate": 0.01, "loss": 2.0027, "step": 57120 }, { "epoch": 5.8665913525726605, "grad_norm": 0.04543888941407204, "learning_rate": 0.01, "loss": 1.9891, "step": 57123 }, { "epoch": 5.866899455684503, "grad_norm": 0.0386471152305603, "learning_rate": 0.01, "loss": 1.9853, "step": 57126 }, { "epoch": 5.867207558796344, "grad_norm": 0.029136119410395622, "learning_rate": 0.01, "loss": 1.9899, "step": 57129 }, { "epoch": 5.867515661908185, "grad_norm": 0.031866688281297684, "learning_rate": 0.01, "loss": 2.0055, "step": 57132 }, { "epoch": 5.8678237650200264, "grad_norm": 0.05178746208548546, "learning_rate": 0.01, "loss": 1.9831, "step": 57135 }, { "epoch": 5.868131868131869, "grad_norm": 0.10231690853834152, "learning_rate": 0.01, "loss": 1.9925, "step": 57138 }, { "epoch": 5.86843997124371, "grad_norm": 0.06103064864873886, "learning_rate": 0.01, "loss": 2.0041, "step": 57141 }, { "epoch": 5.868748074355551, "grad_norm": 0.05619410425424576, "learning_rate": 0.01, "loss": 1.9982, "step": 57144 }, { "epoch": 5.869056177467392, "grad_norm": 0.07958339899778366, "learning_rate": 0.01, "loss": 1.963, "step": 57147 }, { "epoch": 5.869364280579234, "grad_norm": 0.12638811767101288, "learning_rate": 0.01, "loss": 1.9885, "step": 57150 }, { "epoch": 5.869672383691075, "grad_norm": 0.05043957009911537, "learning_rate": 0.01, "loss": 1.9775, "step": 57153 }, { "epoch": 5.869980486802917, "grad_norm": 0.049426887184381485, "learning_rate": 0.01, "loss": 1.9766, "step": 57156 }, { "epoch": 5.870288589914758, "grad_norm": 0.058896906673908234, "learning_rate": 0.01, "loss": 1.9848, "step": 57159 }, { "epoch": 5.8705966930266, "grad_norm": 0.06517619639635086, "learning_rate": 0.01, "loss": 2.0153, "step": 57162 }, { "epoch": 5.870904796138441, "grad_norm": 0.04372745007276535, "learning_rate": 0.01, "loss": 1.9784, "step": 57165 }, { "epoch": 5.871212899250282, "grad_norm": 0.11192513257265091, "learning_rate": 0.01, "loss": 1.9899, "step": 57168 }, { "epoch": 5.871521002362124, "grad_norm": 0.04966152831912041, "learning_rate": 0.01, "loss": 2.0124, "step": 57171 }, { "epoch": 5.8718291054739655, "grad_norm": 0.04251588135957718, "learning_rate": 0.01, "loss": 2.0008, "step": 57174 }, { "epoch": 5.872137208585807, "grad_norm": 0.03627980872988701, "learning_rate": 0.01, "loss": 1.9823, "step": 57177 }, { "epoch": 5.872445311697648, "grad_norm": 0.035204388201236725, "learning_rate": 0.01, "loss": 2.0024, "step": 57180 }, { "epoch": 5.872753414809489, "grad_norm": 0.097038634121418, "learning_rate": 0.01, "loss": 2.0121, "step": 57183 }, { "epoch": 5.873061517921331, "grad_norm": 0.0762251615524292, "learning_rate": 0.01, "loss": 1.9699, "step": 57186 }, { "epoch": 5.873369621033173, "grad_norm": 0.12268881499767303, "learning_rate": 0.01, "loss": 1.9672, "step": 57189 }, { "epoch": 5.873677724145014, "grad_norm": 0.04830612614750862, "learning_rate": 0.01, "loss": 1.999, "step": 57192 }, { "epoch": 5.873985827256855, "grad_norm": 0.03913014009594917, "learning_rate": 0.01, "loss": 1.9709, "step": 57195 }, { "epoch": 5.8742939303686965, "grad_norm": 0.043585240840911865, "learning_rate": 0.01, "loss": 1.9949, "step": 57198 }, { "epoch": 5.874602033480538, "grad_norm": 0.07954023778438568, "learning_rate": 0.01, "loss": 2.0041, "step": 57201 }, { "epoch": 5.87491013659238, "grad_norm": 0.08643771708011627, "learning_rate": 0.01, "loss": 1.9933, "step": 57204 }, { "epoch": 5.875218239704221, "grad_norm": 0.0801578015089035, "learning_rate": 0.01, "loss": 2.0187, "step": 57207 }, { "epoch": 5.8755263428160625, "grad_norm": 0.07038013637065887, "learning_rate": 0.01, "loss": 1.9959, "step": 57210 }, { "epoch": 5.875834445927904, "grad_norm": 0.09337194263935089, "learning_rate": 0.01, "loss": 2.0194, "step": 57213 }, { "epoch": 5.876142549039745, "grad_norm": 0.07053009420633316, "learning_rate": 0.01, "loss": 1.9872, "step": 57216 }, { "epoch": 5.876450652151586, "grad_norm": 0.0576632060110569, "learning_rate": 0.01, "loss": 1.9923, "step": 57219 }, { "epoch": 5.876758755263428, "grad_norm": 0.0459139384329319, "learning_rate": 0.01, "loss": 1.9598, "step": 57222 }, { "epoch": 5.87706685837527, "grad_norm": 0.0351143553853035, "learning_rate": 0.01, "loss": 1.993, "step": 57225 }, { "epoch": 5.877374961487111, "grad_norm": 0.07681302726268768, "learning_rate": 0.01, "loss": 2.0058, "step": 57228 }, { "epoch": 5.877683064598952, "grad_norm": 0.08413039892911911, "learning_rate": 0.01, "loss": 2.0022, "step": 57231 }, { "epoch": 5.877991167710794, "grad_norm": 0.07289810478687286, "learning_rate": 0.01, "loss": 1.9994, "step": 57234 }, { "epoch": 5.878299270822636, "grad_norm": 0.10013864189386368, "learning_rate": 0.01, "loss": 1.9881, "step": 57237 }, { "epoch": 5.878607373934477, "grad_norm": 0.042369335889816284, "learning_rate": 0.01, "loss": 1.9895, "step": 57240 }, { "epoch": 5.878915477046318, "grad_norm": 0.06880860030651093, "learning_rate": 0.01, "loss": 1.9864, "step": 57243 }, { "epoch": 5.879223580158159, "grad_norm": 0.07166635245084763, "learning_rate": 0.01, "loss": 2.0128, "step": 57246 }, { "epoch": 5.879531683270001, "grad_norm": 0.07346632331609726, "learning_rate": 0.01, "loss": 2.003, "step": 57249 }, { "epoch": 5.879839786381843, "grad_norm": 0.039783868938684464, "learning_rate": 0.01, "loss": 1.9899, "step": 57252 }, { "epoch": 5.880147889493684, "grad_norm": 0.03261881321668625, "learning_rate": 0.01, "loss": 1.9996, "step": 57255 }, { "epoch": 5.880455992605525, "grad_norm": 0.05967556685209274, "learning_rate": 0.01, "loss": 1.9614, "step": 57258 }, { "epoch": 5.880764095717367, "grad_norm": 0.09463750571012497, "learning_rate": 0.01, "loss": 1.9969, "step": 57261 }, { "epoch": 5.881072198829208, "grad_norm": 0.11261694133281708, "learning_rate": 0.01, "loss": 1.9916, "step": 57264 }, { "epoch": 5.88138030194105, "grad_norm": 0.04502738267183304, "learning_rate": 0.01, "loss": 2.0151, "step": 57267 }, { "epoch": 5.881688405052891, "grad_norm": 0.048553161323070526, "learning_rate": 0.01, "loss": 1.9656, "step": 57270 }, { "epoch": 5.8819965081647325, "grad_norm": 0.04814717173576355, "learning_rate": 0.01, "loss": 1.9772, "step": 57273 }, { "epoch": 5.882304611276574, "grad_norm": 0.11577591300010681, "learning_rate": 0.01, "loss": 1.9696, "step": 57276 }, { "epoch": 5.882612714388415, "grad_norm": 0.07953160256147385, "learning_rate": 0.01, "loss": 1.9866, "step": 57279 }, { "epoch": 5.882920817500256, "grad_norm": 0.0455656498670578, "learning_rate": 0.01, "loss": 1.9971, "step": 57282 }, { "epoch": 5.8832289206120985, "grad_norm": 0.035418231040239334, "learning_rate": 0.01, "loss": 1.9733, "step": 57285 }, { "epoch": 5.88353702372394, "grad_norm": 0.03444494679570198, "learning_rate": 0.01, "loss": 1.9998, "step": 57288 }, { "epoch": 5.883845126835781, "grad_norm": 0.03543773293495178, "learning_rate": 0.01, "loss": 1.9938, "step": 57291 }, { "epoch": 5.884153229947622, "grad_norm": 0.032163105905056, "learning_rate": 0.01, "loss": 1.9931, "step": 57294 }, { "epoch": 5.884461333059464, "grad_norm": 0.051954787224531174, "learning_rate": 0.01, "loss": 2.0193, "step": 57297 }, { "epoch": 5.884769436171306, "grad_norm": 0.046241723001003265, "learning_rate": 0.01, "loss": 1.9825, "step": 57300 }, { "epoch": 5.885077539283147, "grad_norm": 0.08583594113588333, "learning_rate": 0.01, "loss": 1.9741, "step": 57303 }, { "epoch": 5.885385642394988, "grad_norm": 0.10830538719892502, "learning_rate": 0.01, "loss": 1.993, "step": 57306 }, { "epoch": 5.885693745506829, "grad_norm": 0.04670783504843712, "learning_rate": 0.01, "loss": 1.9688, "step": 57309 }, { "epoch": 5.886001848618671, "grad_norm": 0.08331948518753052, "learning_rate": 0.01, "loss": 1.9914, "step": 57312 }, { "epoch": 5.886309951730513, "grad_norm": 0.12991750240325928, "learning_rate": 0.01, "loss": 1.9861, "step": 57315 }, { "epoch": 5.886618054842354, "grad_norm": 0.03781532123684883, "learning_rate": 0.01, "loss": 1.9742, "step": 57318 }, { "epoch": 5.886926157954195, "grad_norm": 0.03549522906541824, "learning_rate": 0.01, "loss": 2.0197, "step": 57321 }, { "epoch": 5.887234261066037, "grad_norm": 0.03147748485207558, "learning_rate": 0.01, "loss": 1.9779, "step": 57324 }, { "epoch": 5.887542364177878, "grad_norm": 0.06375817954540253, "learning_rate": 0.01, "loss": 1.9794, "step": 57327 }, { "epoch": 5.88785046728972, "grad_norm": 0.06203543394804001, "learning_rate": 0.01, "loss": 1.9854, "step": 57330 }, { "epoch": 5.888158570401561, "grad_norm": 0.07817850261926651, "learning_rate": 0.01, "loss": 1.9844, "step": 57333 }, { "epoch": 5.888466673513403, "grad_norm": 0.06806226819753647, "learning_rate": 0.01, "loss": 1.9795, "step": 57336 }, { "epoch": 5.888774776625244, "grad_norm": 0.06197969987988472, "learning_rate": 0.01, "loss": 1.973, "step": 57339 }, { "epoch": 5.889082879737085, "grad_norm": 0.10083005577325821, "learning_rate": 0.01, "loss": 1.9745, "step": 57342 }, { "epoch": 5.889390982848926, "grad_norm": 0.04016329348087311, "learning_rate": 0.01, "loss": 1.9907, "step": 57345 }, { "epoch": 5.8896990859607685, "grad_norm": 0.047332972288131714, "learning_rate": 0.01, "loss": 1.9967, "step": 57348 }, { "epoch": 5.89000718907261, "grad_norm": 0.042285602539777756, "learning_rate": 0.01, "loss": 1.9842, "step": 57351 }, { "epoch": 5.890315292184451, "grad_norm": 0.0776951014995575, "learning_rate": 0.01, "loss": 1.9833, "step": 57354 }, { "epoch": 5.890623395296292, "grad_norm": 0.1287294626235962, "learning_rate": 0.01, "loss": 2.0342, "step": 57357 }, { "epoch": 5.890931498408134, "grad_norm": 0.08230342715978622, "learning_rate": 0.01, "loss": 1.9969, "step": 57360 }, { "epoch": 5.891239601519976, "grad_norm": 0.03847470134496689, "learning_rate": 0.01, "loss": 1.965, "step": 57363 }, { "epoch": 5.891547704631817, "grad_norm": 0.07836028188467026, "learning_rate": 0.01, "loss": 1.9971, "step": 57366 }, { "epoch": 5.891855807743658, "grad_norm": 0.03260621055960655, "learning_rate": 0.01, "loss": 2.0024, "step": 57369 }, { "epoch": 5.8921639108554995, "grad_norm": 0.036644842475652695, "learning_rate": 0.01, "loss": 1.9969, "step": 57372 }, { "epoch": 5.892472013967341, "grad_norm": 0.03415127098560333, "learning_rate": 0.01, "loss": 1.9984, "step": 57375 }, { "epoch": 5.892780117079182, "grad_norm": 0.05682433769106865, "learning_rate": 0.01, "loss": 1.9843, "step": 57378 }, { "epoch": 5.893088220191024, "grad_norm": 0.09808420389890671, "learning_rate": 0.01, "loss": 1.9828, "step": 57381 }, { "epoch": 5.8933963233028654, "grad_norm": 0.05321237072348595, "learning_rate": 0.01, "loss": 2.0029, "step": 57384 }, { "epoch": 5.893704426414707, "grad_norm": 0.03606530278921127, "learning_rate": 0.01, "loss": 1.9851, "step": 57387 }, { "epoch": 5.894012529526548, "grad_norm": 0.04051011800765991, "learning_rate": 0.01, "loss": 1.9815, "step": 57390 }, { "epoch": 5.89432063263839, "grad_norm": 0.039596255868673325, "learning_rate": 0.01, "loss": 1.9747, "step": 57393 }, { "epoch": 5.894628735750231, "grad_norm": 0.0865618884563446, "learning_rate": 0.01, "loss": 1.9794, "step": 57396 }, { "epoch": 5.894936838862073, "grad_norm": 0.05438371002674103, "learning_rate": 0.01, "loss": 1.9912, "step": 57399 }, { "epoch": 5.895244941973914, "grad_norm": 0.14930184185504913, "learning_rate": 0.01, "loss": 1.9767, "step": 57402 }, { "epoch": 5.895553045085755, "grad_norm": 0.05614696815609932, "learning_rate": 0.01, "loss": 1.9998, "step": 57405 }, { "epoch": 5.895861148197596, "grad_norm": 0.06839166581630707, "learning_rate": 0.01, "loss": 1.985, "step": 57408 }, { "epoch": 5.896169251309439, "grad_norm": 0.0660998523235321, "learning_rate": 0.01, "loss": 1.9721, "step": 57411 }, { "epoch": 5.89647735442128, "grad_norm": 0.07487353682518005, "learning_rate": 0.01, "loss": 1.996, "step": 57414 }, { "epoch": 5.896785457533121, "grad_norm": 0.05647150054574013, "learning_rate": 0.01, "loss": 2.0115, "step": 57417 }, { "epoch": 5.897093560644962, "grad_norm": 0.04570586606860161, "learning_rate": 0.01, "loss": 1.9749, "step": 57420 }, { "epoch": 5.897401663756804, "grad_norm": 0.10942738503217697, "learning_rate": 0.01, "loss": 1.9793, "step": 57423 }, { "epoch": 5.897709766868646, "grad_norm": 0.08452852070331573, "learning_rate": 0.01, "loss": 2.0137, "step": 57426 }, { "epoch": 5.898017869980487, "grad_norm": 0.07079064846038818, "learning_rate": 0.01, "loss": 2.0166, "step": 57429 }, { "epoch": 5.898325973092328, "grad_norm": 0.09797564148902893, "learning_rate": 0.01, "loss": 1.949, "step": 57432 }, { "epoch": 5.89863407620417, "grad_norm": 0.08445065468549728, "learning_rate": 0.01, "loss": 2.0102, "step": 57435 }, { "epoch": 5.898942179316011, "grad_norm": 0.08496986329555511, "learning_rate": 0.01, "loss": 1.9857, "step": 57438 }, { "epoch": 5.899250282427852, "grad_norm": 0.04799206554889679, "learning_rate": 0.01, "loss": 1.9913, "step": 57441 }, { "epoch": 5.899558385539694, "grad_norm": 0.047865018248558044, "learning_rate": 0.01, "loss": 2.0047, "step": 57444 }, { "epoch": 5.8998664886515355, "grad_norm": 0.040415599942207336, "learning_rate": 0.01, "loss": 2.0027, "step": 57447 }, { "epoch": 5.900174591763377, "grad_norm": 0.05303875729441643, "learning_rate": 0.01, "loss": 1.986, "step": 57450 }, { "epoch": 5.900482694875218, "grad_norm": 0.13320137560367584, "learning_rate": 0.01, "loss": 2.0125, "step": 57453 }, { "epoch": 5.900790797987059, "grad_norm": 0.05218903720378876, "learning_rate": 0.01, "loss": 2.0022, "step": 57456 }, { "epoch": 5.9010989010989015, "grad_norm": 0.06745684146881104, "learning_rate": 0.01, "loss": 1.9737, "step": 57459 }, { "epoch": 5.901407004210743, "grad_norm": 0.04268602281808853, "learning_rate": 0.01, "loss": 1.9863, "step": 57462 }, { "epoch": 5.901715107322584, "grad_norm": 0.034592967480421066, "learning_rate": 0.01, "loss": 1.9977, "step": 57465 }, { "epoch": 5.902023210434425, "grad_norm": 0.11330728232860565, "learning_rate": 0.01, "loss": 1.9778, "step": 57468 }, { "epoch": 5.9023313135462665, "grad_norm": 0.035573434084653854, "learning_rate": 0.01, "loss": 2.0189, "step": 57471 }, { "epoch": 5.902639416658108, "grad_norm": 0.09724743664264679, "learning_rate": 0.01, "loss": 2.0004, "step": 57474 }, { "epoch": 5.90294751976995, "grad_norm": 0.06769842654466629, "learning_rate": 0.01, "loss": 1.9953, "step": 57477 }, { "epoch": 5.903255622881791, "grad_norm": 0.07262661308050156, "learning_rate": 0.01, "loss": 1.972, "step": 57480 }, { "epoch": 5.903563725993632, "grad_norm": 0.08442901074886322, "learning_rate": 0.01, "loss": 1.9921, "step": 57483 }, { "epoch": 5.903871829105474, "grad_norm": 0.07244334369897842, "learning_rate": 0.01, "loss": 1.9969, "step": 57486 }, { "epoch": 5.904179932217316, "grad_norm": 0.0898541584610939, "learning_rate": 0.01, "loss": 1.9972, "step": 57489 }, { "epoch": 5.904488035329157, "grad_norm": 0.043548524379730225, "learning_rate": 0.01, "loss": 2.0112, "step": 57492 }, { "epoch": 5.904796138440998, "grad_norm": 0.035105109214782715, "learning_rate": 0.01, "loss": 1.968, "step": 57495 }, { "epoch": 5.90510424155284, "grad_norm": 0.06654632091522217, "learning_rate": 0.01, "loss": 1.9919, "step": 57498 }, { "epoch": 5.905412344664681, "grad_norm": 0.09614937752485275, "learning_rate": 0.01, "loss": 1.9801, "step": 57501 }, { "epoch": 5.905720447776522, "grad_norm": 0.07232387363910675, "learning_rate": 0.01, "loss": 2.0032, "step": 57504 }, { "epoch": 5.906028550888364, "grad_norm": 0.06795191764831543, "learning_rate": 0.01, "loss": 1.9686, "step": 57507 }, { "epoch": 5.906336654000206, "grad_norm": 0.1627775877714157, "learning_rate": 0.01, "loss": 2.0031, "step": 57510 }, { "epoch": 5.906644757112047, "grad_norm": 0.043800655752420425, "learning_rate": 0.01, "loss": 1.9972, "step": 57513 }, { "epoch": 5.906952860223888, "grad_norm": 0.09019804745912552, "learning_rate": 0.01, "loss": 2.0035, "step": 57516 }, { "epoch": 5.907260963335729, "grad_norm": 0.03738325461745262, "learning_rate": 0.01, "loss": 1.9949, "step": 57519 }, { "epoch": 5.9075690664475715, "grad_norm": 0.05333522707223892, "learning_rate": 0.01, "loss": 1.9712, "step": 57522 }, { "epoch": 5.907877169559413, "grad_norm": 0.03560793399810791, "learning_rate": 0.01, "loss": 1.9871, "step": 57525 }, { "epoch": 5.908185272671254, "grad_norm": 0.045784592628479004, "learning_rate": 0.01, "loss": 1.9699, "step": 57528 }, { "epoch": 5.908493375783095, "grad_norm": 0.05486133694648743, "learning_rate": 0.01, "loss": 1.9918, "step": 57531 }, { "epoch": 5.908801478894937, "grad_norm": 0.06346391886472702, "learning_rate": 0.01, "loss": 1.9864, "step": 57534 }, { "epoch": 5.909109582006778, "grad_norm": 0.06552404165267944, "learning_rate": 0.01, "loss": 2.0023, "step": 57537 }, { "epoch": 5.90941768511862, "grad_norm": 0.07176794856786728, "learning_rate": 0.01, "loss": 1.9855, "step": 57540 }, { "epoch": 5.909725788230461, "grad_norm": 0.05690211430191994, "learning_rate": 0.01, "loss": 1.9842, "step": 57543 }, { "epoch": 5.9100338913423025, "grad_norm": 0.07518581300973892, "learning_rate": 0.01, "loss": 2.0117, "step": 57546 }, { "epoch": 5.910341994454144, "grad_norm": 0.09749139845371246, "learning_rate": 0.01, "loss": 2.0104, "step": 57549 }, { "epoch": 5.910650097565985, "grad_norm": 0.06265246123075485, "learning_rate": 0.01, "loss": 1.9775, "step": 57552 }, { "epoch": 5.910958200677827, "grad_norm": 0.09425773471593857, "learning_rate": 0.01, "loss": 2.0063, "step": 57555 }, { "epoch": 5.911266303789668, "grad_norm": 0.07862033694982529, "learning_rate": 0.01, "loss": 1.9784, "step": 57558 }, { "epoch": 5.91157440690151, "grad_norm": 0.052102018147706985, "learning_rate": 0.01, "loss": 1.9938, "step": 57561 }, { "epoch": 5.911882510013351, "grad_norm": 0.04333946481347084, "learning_rate": 0.01, "loss": 2.0005, "step": 57564 }, { "epoch": 5.912190613125192, "grad_norm": 0.03312124311923981, "learning_rate": 0.01, "loss": 2.0017, "step": 57567 }, { "epoch": 5.912498716237034, "grad_norm": 0.11217939108610153, "learning_rate": 0.01, "loss": 1.9932, "step": 57570 }, { "epoch": 5.912806819348876, "grad_norm": 0.050783589482307434, "learning_rate": 0.01, "loss": 1.9766, "step": 57573 }, { "epoch": 5.913114922460717, "grad_norm": 0.05247243866324425, "learning_rate": 0.01, "loss": 2.0166, "step": 57576 }, { "epoch": 5.913423025572558, "grad_norm": 0.03641197457909584, "learning_rate": 0.01, "loss": 1.9992, "step": 57579 }, { "epoch": 5.913731128684399, "grad_norm": 0.03802553936839104, "learning_rate": 0.01, "loss": 2.0152, "step": 57582 }, { "epoch": 5.914039231796242, "grad_norm": 0.04474913701415062, "learning_rate": 0.01, "loss": 1.9781, "step": 57585 }, { "epoch": 5.914347334908083, "grad_norm": 0.050698988139629364, "learning_rate": 0.01, "loss": 2.0057, "step": 57588 }, { "epoch": 5.914655438019924, "grad_norm": 0.12217648327350616, "learning_rate": 0.01, "loss": 1.9973, "step": 57591 }, { "epoch": 5.914963541131765, "grad_norm": 0.10421942919492722, "learning_rate": 0.01, "loss": 1.9972, "step": 57594 }, { "epoch": 5.915271644243607, "grad_norm": 0.09191686660051346, "learning_rate": 0.01, "loss": 1.9944, "step": 57597 }, { "epoch": 5.915579747355448, "grad_norm": 0.07071512192487717, "learning_rate": 0.01, "loss": 1.9977, "step": 57600 }, { "epoch": 5.91588785046729, "grad_norm": 0.0706767663359642, "learning_rate": 0.01, "loss": 1.9909, "step": 57603 }, { "epoch": 5.916195953579131, "grad_norm": 0.04327024519443512, "learning_rate": 0.01, "loss": 1.9859, "step": 57606 }, { "epoch": 5.916504056690973, "grad_norm": 0.04743794724345207, "learning_rate": 0.01, "loss": 1.9948, "step": 57609 }, { "epoch": 5.916812159802814, "grad_norm": 0.04772097244858742, "learning_rate": 0.01, "loss": 1.9746, "step": 57612 }, { "epoch": 5.917120262914655, "grad_norm": 0.0430108904838562, "learning_rate": 0.01, "loss": 1.9859, "step": 57615 }, { "epoch": 5.917428366026497, "grad_norm": 0.07793119549751282, "learning_rate": 0.01, "loss": 1.9789, "step": 57618 }, { "epoch": 5.9177364691383385, "grad_norm": 0.10654459148645401, "learning_rate": 0.01, "loss": 1.9975, "step": 57621 }, { "epoch": 5.91804457225018, "grad_norm": 0.03443089500069618, "learning_rate": 0.01, "loss": 1.9731, "step": 57624 }, { "epoch": 5.918352675362021, "grad_norm": 0.07121492922306061, "learning_rate": 0.01, "loss": 1.9763, "step": 57627 }, { "epoch": 5.918660778473862, "grad_norm": 0.04436872899532318, "learning_rate": 0.01, "loss": 2.0261, "step": 57630 }, { "epoch": 5.918968881585704, "grad_norm": 0.08435078710317612, "learning_rate": 0.01, "loss": 1.9783, "step": 57633 }, { "epoch": 5.919276984697546, "grad_norm": 0.1054021418094635, "learning_rate": 0.01, "loss": 2.007, "step": 57636 }, { "epoch": 5.919585087809387, "grad_norm": 0.05516025796532631, "learning_rate": 0.01, "loss": 1.9433, "step": 57639 }, { "epoch": 5.919893190921228, "grad_norm": 0.067764513194561, "learning_rate": 0.01, "loss": 2.0029, "step": 57642 }, { "epoch": 5.9202012940330695, "grad_norm": 0.0520036555826664, "learning_rate": 0.01, "loss": 1.9952, "step": 57645 }, { "epoch": 5.920509397144912, "grad_norm": 0.071280837059021, "learning_rate": 0.01, "loss": 1.9881, "step": 57648 }, { "epoch": 5.920817500256753, "grad_norm": 0.04528915137052536, "learning_rate": 0.01, "loss": 1.9862, "step": 57651 }, { "epoch": 5.921125603368594, "grad_norm": 0.0340840145945549, "learning_rate": 0.01, "loss": 1.9829, "step": 57654 }, { "epoch": 5.921433706480435, "grad_norm": 0.033173661679029465, "learning_rate": 0.01, "loss": 1.9705, "step": 57657 }, { "epoch": 5.921741809592277, "grad_norm": 0.13926701247692108, "learning_rate": 0.01, "loss": 1.9926, "step": 57660 }, { "epoch": 5.922049912704118, "grad_norm": 0.07138355076313019, "learning_rate": 0.01, "loss": 1.966, "step": 57663 }, { "epoch": 5.92235801581596, "grad_norm": 0.0728163793683052, "learning_rate": 0.01, "loss": 1.9971, "step": 57666 }, { "epoch": 5.922666118927801, "grad_norm": 0.08973317593336105, "learning_rate": 0.01, "loss": 1.9996, "step": 57669 }, { "epoch": 5.922974222039643, "grad_norm": 0.0479208379983902, "learning_rate": 0.01, "loss": 1.9665, "step": 57672 }, { "epoch": 5.923282325151484, "grad_norm": 0.05372065678238869, "learning_rate": 0.01, "loss": 2.0279, "step": 57675 }, { "epoch": 5.923590428263325, "grad_norm": 0.048417530953884125, "learning_rate": 0.01, "loss": 1.9718, "step": 57678 }, { "epoch": 5.923898531375167, "grad_norm": 0.03113439865410328, "learning_rate": 0.01, "loss": 1.9936, "step": 57681 }, { "epoch": 5.924206634487009, "grad_norm": 0.0359586738049984, "learning_rate": 0.01, "loss": 1.9672, "step": 57684 }, { "epoch": 5.92451473759885, "grad_norm": 0.12019523233175278, "learning_rate": 0.01, "loss": 1.9879, "step": 57687 }, { "epoch": 5.924822840710691, "grad_norm": 0.0717984288930893, "learning_rate": 0.01, "loss": 1.9863, "step": 57690 }, { "epoch": 5.925130943822532, "grad_norm": 0.06465215235948563, "learning_rate": 0.01, "loss": 1.9864, "step": 57693 }, { "epoch": 5.925439046934374, "grad_norm": 0.1053556352853775, "learning_rate": 0.01, "loss": 1.9825, "step": 57696 }, { "epoch": 5.925747150046216, "grad_norm": 0.04565432667732239, "learning_rate": 0.01, "loss": 1.9987, "step": 57699 }, { "epoch": 5.926055253158057, "grad_norm": 0.05233050882816315, "learning_rate": 0.01, "loss": 1.9858, "step": 57702 }, { "epoch": 5.926363356269898, "grad_norm": 0.05413827672600746, "learning_rate": 0.01, "loss": 1.9791, "step": 57705 }, { "epoch": 5.92667145938174, "grad_norm": 0.0694475993514061, "learning_rate": 0.01, "loss": 2.0002, "step": 57708 }, { "epoch": 5.926979562493581, "grad_norm": 0.03973362222313881, "learning_rate": 0.01, "loss": 1.9963, "step": 57711 }, { "epoch": 5.927287665605423, "grad_norm": 0.048481449484825134, "learning_rate": 0.01, "loss": 1.964, "step": 57714 }, { "epoch": 5.927595768717264, "grad_norm": 0.04115751013159752, "learning_rate": 0.01, "loss": 1.9826, "step": 57717 }, { "epoch": 5.9279038718291055, "grad_norm": 0.056814514100551605, "learning_rate": 0.01, "loss": 2.0123, "step": 57720 }, { "epoch": 5.928211974940947, "grad_norm": 0.07510741055011749, "learning_rate": 0.01, "loss": 1.9933, "step": 57723 }, { "epoch": 5.928520078052788, "grad_norm": 0.10000273585319519, "learning_rate": 0.01, "loss": 1.979, "step": 57726 }, { "epoch": 5.928828181164629, "grad_norm": 0.10124478489160538, "learning_rate": 0.01, "loss": 1.9944, "step": 57729 }, { "epoch": 5.929136284276471, "grad_norm": 0.1368652582168579, "learning_rate": 0.01, "loss": 1.9818, "step": 57732 }, { "epoch": 5.929444387388313, "grad_norm": 0.06806531548500061, "learning_rate": 0.01, "loss": 1.9891, "step": 57735 }, { "epoch": 5.929752490500154, "grad_norm": 0.03443896025419235, "learning_rate": 0.01, "loss": 1.9785, "step": 57738 }, { "epoch": 5.930060593611995, "grad_norm": 0.03277003765106201, "learning_rate": 0.01, "loss": 1.9706, "step": 57741 }, { "epoch": 5.930368696723837, "grad_norm": 0.04082879796624184, "learning_rate": 0.01, "loss": 1.9843, "step": 57744 }, { "epoch": 5.930676799835679, "grad_norm": 0.11657889932394028, "learning_rate": 0.01, "loss": 2.0116, "step": 57747 }, { "epoch": 5.93098490294752, "grad_norm": 0.08914856612682343, "learning_rate": 0.01, "loss": 1.9995, "step": 57750 }, { "epoch": 5.931293006059361, "grad_norm": 0.06309764087200165, "learning_rate": 0.01, "loss": 1.9943, "step": 57753 }, { "epoch": 5.931601109171202, "grad_norm": 0.051464032381772995, "learning_rate": 0.01, "loss": 1.9949, "step": 57756 }, { "epoch": 5.931909212283044, "grad_norm": 0.037711381912231445, "learning_rate": 0.01, "loss": 2.0257, "step": 57759 }, { "epoch": 5.932217315394886, "grad_norm": 0.06184235215187073, "learning_rate": 0.01, "loss": 1.9991, "step": 57762 }, { "epoch": 5.932525418506727, "grad_norm": 0.07253403961658478, "learning_rate": 0.01, "loss": 1.9763, "step": 57765 }, { "epoch": 5.932833521618568, "grad_norm": 0.0968317911028862, "learning_rate": 0.01, "loss": 2.0105, "step": 57768 }, { "epoch": 5.93314162473041, "grad_norm": 0.0691918134689331, "learning_rate": 0.01, "loss": 1.9819, "step": 57771 }, { "epoch": 5.933449727842251, "grad_norm": 0.0434565395116806, "learning_rate": 0.01, "loss": 2.0185, "step": 57774 }, { "epoch": 5.933757830954093, "grad_norm": 0.04627044126391411, "learning_rate": 0.01, "loss": 2.0037, "step": 57777 }, { "epoch": 5.934065934065934, "grad_norm": 0.04456416517496109, "learning_rate": 0.01, "loss": 2.0219, "step": 57780 }, { "epoch": 5.934374037177776, "grad_norm": 0.038884878158569336, "learning_rate": 0.01, "loss": 1.9942, "step": 57783 }, { "epoch": 5.934682140289617, "grad_norm": 0.03564108535647392, "learning_rate": 0.01, "loss": 1.9891, "step": 57786 }, { "epoch": 5.934990243401458, "grad_norm": 0.06104537844657898, "learning_rate": 0.01, "loss": 1.9844, "step": 57789 }, { "epoch": 5.935298346513299, "grad_norm": 0.06845796853303909, "learning_rate": 0.01, "loss": 1.9933, "step": 57792 }, { "epoch": 5.9356064496251415, "grad_norm": 0.08483695238828659, "learning_rate": 0.01, "loss": 1.9665, "step": 57795 }, { "epoch": 5.935914552736983, "grad_norm": 0.05989287048578262, "learning_rate": 0.01, "loss": 2.0038, "step": 57798 }, { "epoch": 5.936222655848824, "grad_norm": 0.04331030324101448, "learning_rate": 0.01, "loss": 2.0049, "step": 57801 }, { "epoch": 5.936530758960665, "grad_norm": 0.05447021499276161, "learning_rate": 0.01, "loss": 1.9922, "step": 57804 }, { "epoch": 5.9368388620725066, "grad_norm": 0.10276515781879425, "learning_rate": 0.01, "loss": 1.9818, "step": 57807 }, { "epoch": 5.937146965184349, "grad_norm": 0.041528135538101196, "learning_rate": 0.01, "loss": 1.978, "step": 57810 }, { "epoch": 5.93745506829619, "grad_norm": 0.09760654717683792, "learning_rate": 0.01, "loss": 1.9844, "step": 57813 }, { "epoch": 5.937763171408031, "grad_norm": 0.07332238554954529, "learning_rate": 0.01, "loss": 2.0096, "step": 57816 }, { "epoch": 5.9380712745198725, "grad_norm": 0.07201950252056122, "learning_rate": 0.01, "loss": 1.9674, "step": 57819 }, { "epoch": 5.938379377631714, "grad_norm": 0.09542597830295563, "learning_rate": 0.01, "loss": 1.9897, "step": 57822 }, { "epoch": 5.938687480743555, "grad_norm": 0.08886115252971649, "learning_rate": 0.01, "loss": 2.016, "step": 57825 }, { "epoch": 5.938995583855397, "grad_norm": 0.0604422464966774, "learning_rate": 0.01, "loss": 2.0074, "step": 57828 }, { "epoch": 5.939303686967238, "grad_norm": 0.04549078643321991, "learning_rate": 0.01, "loss": 2.0073, "step": 57831 }, { "epoch": 5.93961179007908, "grad_norm": 0.09901918470859528, "learning_rate": 0.01, "loss": 2.0006, "step": 57834 }, { "epoch": 5.939919893190921, "grad_norm": 0.10208103060722351, "learning_rate": 0.01, "loss": 1.9797, "step": 57837 }, { "epoch": 5.940227996302763, "grad_norm": 0.08842794597148895, "learning_rate": 0.01, "loss": 1.977, "step": 57840 }, { "epoch": 5.940536099414604, "grad_norm": 0.06819964945316315, "learning_rate": 0.01, "loss": 1.9854, "step": 57843 }, { "epoch": 5.940844202526446, "grad_norm": 0.07434326410293579, "learning_rate": 0.01, "loss": 1.9875, "step": 57846 }, { "epoch": 5.941152305638287, "grad_norm": 0.06985678523778915, "learning_rate": 0.01, "loss": 1.9805, "step": 57849 }, { "epoch": 5.941460408750128, "grad_norm": 0.04155593365430832, "learning_rate": 0.01, "loss": 1.9645, "step": 57852 }, { "epoch": 5.941768511861969, "grad_norm": 0.07389968633651733, "learning_rate": 0.01, "loss": 2.0201, "step": 57855 }, { "epoch": 5.942076614973812, "grad_norm": 0.11060785502195358, "learning_rate": 0.01, "loss": 1.9677, "step": 57858 }, { "epoch": 5.942384718085653, "grad_norm": 0.035507991909980774, "learning_rate": 0.01, "loss": 1.9752, "step": 57861 }, { "epoch": 5.942692821197494, "grad_norm": 0.07544751465320587, "learning_rate": 0.01, "loss": 2.0023, "step": 57864 }, { "epoch": 5.943000924309335, "grad_norm": 0.03793545812368393, "learning_rate": 0.01, "loss": 1.998, "step": 57867 }, { "epoch": 5.943309027421177, "grad_norm": 0.07497958838939667, "learning_rate": 0.01, "loss": 1.9943, "step": 57870 }, { "epoch": 5.943617130533019, "grad_norm": 0.0712079107761383, "learning_rate": 0.01, "loss": 2.0135, "step": 57873 }, { "epoch": 5.94392523364486, "grad_norm": 0.03715150058269501, "learning_rate": 0.01, "loss": 1.9586, "step": 57876 }, { "epoch": 5.944233336756701, "grad_norm": 0.0650571882724762, "learning_rate": 0.01, "loss": 1.9873, "step": 57879 }, { "epoch": 5.9445414398685426, "grad_norm": 0.0977357029914856, "learning_rate": 0.01, "loss": 1.994, "step": 57882 }, { "epoch": 5.944849542980384, "grad_norm": 0.06483075022697449, "learning_rate": 0.01, "loss": 1.9779, "step": 57885 }, { "epoch": 5.945157646092225, "grad_norm": 0.09345372766256332, "learning_rate": 0.01, "loss": 1.9878, "step": 57888 }, { "epoch": 5.945465749204067, "grad_norm": 0.06418720632791519, "learning_rate": 0.01, "loss": 1.9899, "step": 57891 }, { "epoch": 5.9457738523159085, "grad_norm": 0.05608039349317551, "learning_rate": 0.01, "loss": 1.9835, "step": 57894 }, { "epoch": 5.94608195542775, "grad_norm": 0.04540601745247841, "learning_rate": 0.01, "loss": 1.9884, "step": 57897 }, { "epoch": 5.946390058539591, "grad_norm": 0.07881978154182434, "learning_rate": 0.01, "loss": 1.9901, "step": 57900 }, { "epoch": 5.946698161651433, "grad_norm": 0.05478779971599579, "learning_rate": 0.01, "loss": 1.9838, "step": 57903 }, { "epoch": 5.947006264763274, "grad_norm": 0.06574473530054092, "learning_rate": 0.01, "loss": 1.9842, "step": 57906 }, { "epoch": 5.947314367875116, "grad_norm": 0.06080929934978485, "learning_rate": 0.01, "loss": 1.9615, "step": 57909 }, { "epoch": 5.947622470986957, "grad_norm": 0.06462790071964264, "learning_rate": 0.01, "loss": 1.9889, "step": 57912 }, { "epoch": 5.947930574098798, "grad_norm": 0.06746607273817062, "learning_rate": 0.01, "loss": 1.9895, "step": 57915 }, { "epoch": 5.9482386772106395, "grad_norm": 0.048096004873514175, "learning_rate": 0.01, "loss": 1.9948, "step": 57918 }, { "epoch": 5.948546780322482, "grad_norm": 0.038138337433338165, "learning_rate": 0.01, "loss": 1.9728, "step": 57921 }, { "epoch": 5.948854883434323, "grad_norm": 0.05787679925560951, "learning_rate": 0.01, "loss": 1.9743, "step": 57924 }, { "epoch": 5.949162986546164, "grad_norm": 0.21429073810577393, "learning_rate": 0.01, "loss": 1.9997, "step": 57927 }, { "epoch": 5.949471089658005, "grad_norm": 0.103017657995224, "learning_rate": 0.01, "loss": 2.0201, "step": 57930 }, { "epoch": 5.949779192769847, "grad_norm": 0.08076374232769012, "learning_rate": 0.01, "loss": 2.0054, "step": 57933 }, { "epoch": 5.950087295881689, "grad_norm": 0.06803920865058899, "learning_rate": 0.01, "loss": 1.9788, "step": 57936 }, { "epoch": 5.95039539899353, "grad_norm": 0.0801796242594719, "learning_rate": 0.01, "loss": 1.994, "step": 57939 }, { "epoch": 5.950703502105371, "grad_norm": 0.0715690553188324, "learning_rate": 0.01, "loss": 1.9848, "step": 57942 }, { "epoch": 5.951011605217213, "grad_norm": 0.051856063306331635, "learning_rate": 0.01, "loss": 1.9924, "step": 57945 }, { "epoch": 5.951319708329054, "grad_norm": 0.03726186603307724, "learning_rate": 0.01, "loss": 2.006, "step": 57948 }, { "epoch": 5.951627811440895, "grad_norm": 0.04276345297694206, "learning_rate": 0.01, "loss": 1.994, "step": 57951 }, { "epoch": 5.951935914552737, "grad_norm": 0.06446687877178192, "learning_rate": 0.01, "loss": 1.9837, "step": 57954 }, { "epoch": 5.952244017664579, "grad_norm": 0.05513986572623253, "learning_rate": 0.01, "loss": 1.9882, "step": 57957 }, { "epoch": 5.95255212077642, "grad_norm": 0.06750188022851944, "learning_rate": 0.01, "loss": 2.0185, "step": 57960 }, { "epoch": 5.952860223888261, "grad_norm": 0.048399876803159714, "learning_rate": 0.01, "loss": 1.9937, "step": 57963 }, { "epoch": 5.953168327000102, "grad_norm": 0.04391476884484291, "learning_rate": 0.01, "loss": 1.9905, "step": 57966 }, { "epoch": 5.9534764301119445, "grad_norm": 0.05262988805770874, "learning_rate": 0.01, "loss": 1.98, "step": 57969 }, { "epoch": 5.953784533223786, "grad_norm": 0.1819307655096054, "learning_rate": 0.01, "loss": 1.9768, "step": 57972 }, { "epoch": 5.954092636335627, "grad_norm": 0.04456859081983566, "learning_rate": 0.01, "loss": 1.9679, "step": 57975 }, { "epoch": 5.954400739447468, "grad_norm": 0.11903820186853409, "learning_rate": 0.01, "loss": 1.9895, "step": 57978 }, { "epoch": 5.9547088425593095, "grad_norm": 0.08669394254684448, "learning_rate": 0.01, "loss": 2.0061, "step": 57981 }, { "epoch": 5.955016945671151, "grad_norm": 0.05538726970553398, "learning_rate": 0.01, "loss": 1.9652, "step": 57984 }, { "epoch": 5.955325048782993, "grad_norm": 0.06540273129940033, "learning_rate": 0.01, "loss": 1.954, "step": 57987 }, { "epoch": 5.955633151894834, "grad_norm": 0.05096886307001114, "learning_rate": 0.01, "loss": 2.0121, "step": 57990 }, { "epoch": 5.9559412550066755, "grad_norm": 0.0463346429169178, "learning_rate": 0.01, "loss": 1.985, "step": 57993 }, { "epoch": 5.956249358118517, "grad_norm": 0.12739206850528717, "learning_rate": 0.01, "loss": 1.9886, "step": 57996 }, { "epoch": 5.956557461230359, "grad_norm": 0.03725777193903923, "learning_rate": 0.01, "loss": 1.9851, "step": 57999 }, { "epoch": 5.9568655643422, "grad_norm": 0.041961897164583206, "learning_rate": 0.01, "loss": 1.9872, "step": 58002 }, { "epoch": 5.957173667454041, "grad_norm": 0.039089135825634, "learning_rate": 0.01, "loss": 1.9865, "step": 58005 }, { "epoch": 5.957481770565883, "grad_norm": 0.03455056622624397, "learning_rate": 0.01, "loss": 1.9644, "step": 58008 }, { "epoch": 5.957789873677724, "grad_norm": 0.10427875816822052, "learning_rate": 0.01, "loss": 1.9879, "step": 58011 }, { "epoch": 5.958097976789565, "grad_norm": 0.11531616002321243, "learning_rate": 0.01, "loss": 1.9947, "step": 58014 }, { "epoch": 5.958406079901407, "grad_norm": 0.05732328072190285, "learning_rate": 0.01, "loss": 1.9666, "step": 58017 }, { "epoch": 5.958714183013249, "grad_norm": 0.033681951463222504, "learning_rate": 0.01, "loss": 1.9839, "step": 58020 }, { "epoch": 5.95902228612509, "grad_norm": 0.03731759265065193, "learning_rate": 0.01, "loss": 2.0003, "step": 58023 }, { "epoch": 5.959330389236931, "grad_norm": 0.03403918072581291, "learning_rate": 0.01, "loss": 2.0195, "step": 58026 }, { "epoch": 5.959638492348772, "grad_norm": 0.10777036845684052, "learning_rate": 0.01, "loss": 1.9861, "step": 58029 }, { "epoch": 5.959946595460615, "grad_norm": 0.03863784670829773, "learning_rate": 0.01, "loss": 1.9806, "step": 58032 }, { "epoch": 5.960254698572456, "grad_norm": 0.11702859401702881, "learning_rate": 0.01, "loss": 2.0041, "step": 58035 }, { "epoch": 5.960562801684297, "grad_norm": 0.04561970755457878, "learning_rate": 0.01, "loss": 1.9808, "step": 58038 }, { "epoch": 5.960870904796138, "grad_norm": 0.07471676170825958, "learning_rate": 0.01, "loss": 1.9927, "step": 58041 }, { "epoch": 5.96117900790798, "grad_norm": 0.06594023108482361, "learning_rate": 0.01, "loss": 1.9952, "step": 58044 }, { "epoch": 5.961487111019821, "grad_norm": 0.10957533866167068, "learning_rate": 0.01, "loss": 1.9701, "step": 58047 }, { "epoch": 5.961795214131663, "grad_norm": 0.06384073197841644, "learning_rate": 0.01, "loss": 1.9951, "step": 58050 }, { "epoch": 5.962103317243504, "grad_norm": 0.04451090097427368, "learning_rate": 0.01, "loss": 1.9979, "step": 58053 }, { "epoch": 5.9624114203553455, "grad_norm": 0.036886006593704224, "learning_rate": 0.01, "loss": 1.982, "step": 58056 }, { "epoch": 5.962719523467187, "grad_norm": 0.06101905182003975, "learning_rate": 0.01, "loss": 1.9839, "step": 58059 }, { "epoch": 5.963027626579028, "grad_norm": 0.037433281540870667, "learning_rate": 0.01, "loss": 1.951, "step": 58062 }, { "epoch": 5.96333572969087, "grad_norm": 0.04497874528169632, "learning_rate": 0.01, "loss": 1.9854, "step": 58065 }, { "epoch": 5.9636438328027115, "grad_norm": 0.10465840995311737, "learning_rate": 0.01, "loss": 1.9674, "step": 58068 }, { "epoch": 5.963951935914553, "grad_norm": 0.08285420387983322, "learning_rate": 0.01, "loss": 1.9748, "step": 58071 }, { "epoch": 5.964260039026394, "grad_norm": 0.0607406347990036, "learning_rate": 0.01, "loss": 1.9738, "step": 58074 }, { "epoch": 5.964568142138235, "grad_norm": 0.07265600562095642, "learning_rate": 0.01, "loss": 1.978, "step": 58077 }, { "epoch": 5.9648762452500765, "grad_norm": 0.06835401058197021, "learning_rate": 0.01, "loss": 2.008, "step": 58080 }, { "epoch": 5.965184348361919, "grad_norm": 0.08795753121376038, "learning_rate": 0.01, "loss": 1.983, "step": 58083 }, { "epoch": 5.96549245147376, "grad_norm": 0.06189946457743645, "learning_rate": 0.01, "loss": 1.9896, "step": 58086 }, { "epoch": 5.965800554585601, "grad_norm": 0.05601467564702034, "learning_rate": 0.01, "loss": 1.9973, "step": 58089 }, { "epoch": 5.9661086576974425, "grad_norm": 0.07447000592947006, "learning_rate": 0.01, "loss": 2.0048, "step": 58092 }, { "epoch": 5.966416760809285, "grad_norm": 0.08373255282640457, "learning_rate": 0.01, "loss": 1.969, "step": 58095 }, { "epoch": 5.966724863921126, "grad_norm": 0.09241122007369995, "learning_rate": 0.01, "loss": 1.9638, "step": 58098 }, { "epoch": 5.967032967032967, "grad_norm": 0.0638846680521965, "learning_rate": 0.01, "loss": 1.9902, "step": 58101 }, { "epoch": 5.967341070144808, "grad_norm": 0.0532197505235672, "learning_rate": 0.01, "loss": 1.9652, "step": 58104 }, { "epoch": 5.96764917325665, "grad_norm": 0.10008466243743896, "learning_rate": 0.01, "loss": 1.9972, "step": 58107 }, { "epoch": 5.967957276368491, "grad_norm": 0.047970160841941833, "learning_rate": 0.01, "loss": 1.9937, "step": 58110 }, { "epoch": 5.968265379480333, "grad_norm": 0.046584948897361755, "learning_rate": 0.01, "loss": 2.0125, "step": 58113 }, { "epoch": 5.968573482592174, "grad_norm": 0.04055574908852577, "learning_rate": 0.01, "loss": 1.966, "step": 58116 }, { "epoch": 5.968881585704016, "grad_norm": 0.11462211608886719, "learning_rate": 0.01, "loss": 1.9865, "step": 58119 }, { "epoch": 5.969189688815857, "grad_norm": 0.07639250159263611, "learning_rate": 0.01, "loss": 1.9919, "step": 58122 }, { "epoch": 5.969497791927698, "grad_norm": 0.08450743556022644, "learning_rate": 0.01, "loss": 2.0176, "step": 58125 }, { "epoch": 5.96980589503954, "grad_norm": 0.06165710464119911, "learning_rate": 0.01, "loss": 1.9772, "step": 58128 }, { "epoch": 5.9701139981513816, "grad_norm": 0.042419277131557465, "learning_rate": 0.01, "loss": 1.9939, "step": 58131 }, { "epoch": 5.970422101263223, "grad_norm": 0.04378311708569527, "learning_rate": 0.01, "loss": 2.0167, "step": 58134 }, { "epoch": 5.970730204375064, "grad_norm": 0.05443147197365761, "learning_rate": 0.01, "loss": 2.0006, "step": 58137 }, { "epoch": 5.971038307486905, "grad_norm": 0.05718831345438957, "learning_rate": 0.01, "loss": 1.991, "step": 58140 }, { "epoch": 5.971346410598747, "grad_norm": 0.0620989128947258, "learning_rate": 0.01, "loss": 1.9865, "step": 58143 }, { "epoch": 5.971654513710589, "grad_norm": 0.11351976543664932, "learning_rate": 0.01, "loss": 1.9966, "step": 58146 }, { "epoch": 5.97196261682243, "grad_norm": 0.05350454896688461, "learning_rate": 0.01, "loss": 1.9868, "step": 58149 }, { "epoch": 5.972270719934271, "grad_norm": 0.08652366697788239, "learning_rate": 0.01, "loss": 2.0034, "step": 58152 }, { "epoch": 5.9725788230461125, "grad_norm": 0.054177410900592804, "learning_rate": 0.01, "loss": 1.9981, "step": 58155 }, { "epoch": 5.972886926157955, "grad_norm": 0.047728173434734344, "learning_rate": 0.01, "loss": 1.9865, "step": 58158 }, { "epoch": 5.973195029269796, "grad_norm": 0.048977095633745193, "learning_rate": 0.01, "loss": 1.9905, "step": 58161 }, { "epoch": 5.973503132381637, "grad_norm": 0.042776644229888916, "learning_rate": 0.01, "loss": 1.9993, "step": 58164 }, { "epoch": 5.9738112354934785, "grad_norm": 0.09739521145820618, "learning_rate": 0.01, "loss": 1.9979, "step": 58167 }, { "epoch": 5.97411933860532, "grad_norm": 0.0636570006608963, "learning_rate": 0.01, "loss": 2.0037, "step": 58170 }, { "epoch": 5.974427441717161, "grad_norm": 0.09212375432252884, "learning_rate": 0.01, "loss": 1.9995, "step": 58173 }, { "epoch": 5.974735544829003, "grad_norm": 0.06589896231889725, "learning_rate": 0.01, "loss": 2.0172, "step": 58176 }, { "epoch": 5.975043647940844, "grad_norm": 0.04317329451441765, "learning_rate": 0.01, "loss": 1.9987, "step": 58179 }, { "epoch": 5.975351751052686, "grad_norm": 0.04963945969939232, "learning_rate": 0.01, "loss": 1.9737, "step": 58182 }, { "epoch": 5.975659854164527, "grad_norm": 0.054626185446977615, "learning_rate": 0.01, "loss": 1.9961, "step": 58185 }, { "epoch": 5.975967957276368, "grad_norm": 0.0750926285982132, "learning_rate": 0.01, "loss": 1.9764, "step": 58188 }, { "epoch": 5.97627606038821, "grad_norm": 0.09490969032049179, "learning_rate": 0.01, "loss": 1.9988, "step": 58191 }, { "epoch": 5.976584163500052, "grad_norm": 0.19652721285820007, "learning_rate": 0.01, "loss": 2.0049, "step": 58194 }, { "epoch": 5.976892266611893, "grad_norm": 0.13692377507686615, "learning_rate": 0.01, "loss": 2.0059, "step": 58197 }, { "epoch": 5.977200369723734, "grad_norm": 0.10185639560222626, "learning_rate": 0.01, "loss": 1.9875, "step": 58200 }, { "epoch": 5.977508472835575, "grad_norm": 0.08201511949300766, "learning_rate": 0.01, "loss": 1.9697, "step": 58203 }, { "epoch": 5.977816575947417, "grad_norm": 0.06355354934930801, "learning_rate": 0.01, "loss": 1.9582, "step": 58206 }, { "epoch": 5.978124679059259, "grad_norm": 0.07959393411874771, "learning_rate": 0.01, "loss": 1.9891, "step": 58209 }, { "epoch": 5.9784327821711, "grad_norm": 0.08127464354038239, "learning_rate": 0.01, "loss": 1.9812, "step": 58212 }, { "epoch": 5.978740885282941, "grad_norm": 0.052127279341220856, "learning_rate": 0.01, "loss": 1.982, "step": 58215 }, { "epoch": 5.979048988394783, "grad_norm": 0.05060075968503952, "learning_rate": 0.01, "loss": 1.9739, "step": 58218 }, { "epoch": 5.979357091506624, "grad_norm": 0.0653509721159935, "learning_rate": 0.01, "loss": 1.9847, "step": 58221 }, { "epoch": 5.979665194618466, "grad_norm": 0.04588339477777481, "learning_rate": 0.01, "loss": 1.9795, "step": 58224 }, { "epoch": 5.979973297730307, "grad_norm": 0.027341017499566078, "learning_rate": 0.01, "loss": 1.9736, "step": 58227 }, { "epoch": 5.9802814008421485, "grad_norm": 0.039760638028383255, "learning_rate": 0.01, "loss": 1.9857, "step": 58230 }, { "epoch": 5.98058950395399, "grad_norm": 0.07517565786838531, "learning_rate": 0.01, "loss": 1.9947, "step": 58233 }, { "epoch": 5.980897607065831, "grad_norm": 0.07364608347415924, "learning_rate": 0.01, "loss": 1.9973, "step": 58236 }, { "epoch": 5.981205710177672, "grad_norm": 0.057517606765031815, "learning_rate": 0.01, "loss": 1.9934, "step": 58239 }, { "epoch": 5.9815138132895145, "grad_norm": 0.032561108469963074, "learning_rate": 0.01, "loss": 2.0029, "step": 58242 }, { "epoch": 5.981821916401356, "grad_norm": 0.08549618721008301, "learning_rate": 0.01, "loss": 1.9769, "step": 58245 }, { "epoch": 5.982130019513197, "grad_norm": 0.17904026806354523, "learning_rate": 0.01, "loss": 2.0021, "step": 58248 }, { "epoch": 5.982438122625038, "grad_norm": 0.10775711387395859, "learning_rate": 0.01, "loss": 1.9909, "step": 58251 }, { "epoch": 5.98274622573688, "grad_norm": 0.07369556277990341, "learning_rate": 0.01, "loss": 1.9942, "step": 58254 }, { "epoch": 5.983054328848722, "grad_norm": 0.059407759457826614, "learning_rate": 0.01, "loss": 1.9739, "step": 58257 }, { "epoch": 5.983362431960563, "grad_norm": 0.05741313472390175, "learning_rate": 0.01, "loss": 2.0013, "step": 58260 }, { "epoch": 5.983670535072404, "grad_norm": 0.07792560756206512, "learning_rate": 0.01, "loss": 2.0021, "step": 58263 }, { "epoch": 5.9839786381842455, "grad_norm": 0.058633413165807724, "learning_rate": 0.01, "loss": 1.9919, "step": 58266 }, { "epoch": 5.984286741296087, "grad_norm": 0.04207305610179901, "learning_rate": 0.01, "loss": 1.9993, "step": 58269 }, { "epoch": 5.984594844407929, "grad_norm": 0.039717648178339005, "learning_rate": 0.01, "loss": 2.0046, "step": 58272 }, { "epoch": 5.98490294751977, "grad_norm": 0.03505009412765503, "learning_rate": 0.01, "loss": 1.9855, "step": 58275 }, { "epoch": 5.985211050631611, "grad_norm": 0.03421800956130028, "learning_rate": 0.01, "loss": 2.0031, "step": 58278 }, { "epoch": 5.985519153743453, "grad_norm": 0.11582440882921219, "learning_rate": 0.01, "loss": 1.9791, "step": 58281 }, { "epoch": 5.985827256855294, "grad_norm": 0.058088093996047974, "learning_rate": 0.01, "loss": 2.0229, "step": 58284 }, { "epoch": 5.986135359967136, "grad_norm": 0.08388987928628922, "learning_rate": 0.01, "loss": 2.0025, "step": 58287 }, { "epoch": 5.986443463078977, "grad_norm": 0.07332730293273926, "learning_rate": 0.01, "loss": 1.9936, "step": 58290 }, { "epoch": 5.986751566190819, "grad_norm": 0.03843742609024048, "learning_rate": 0.01, "loss": 1.9989, "step": 58293 }, { "epoch": 5.98705966930266, "grad_norm": 0.07863465696573257, "learning_rate": 0.01, "loss": 1.9932, "step": 58296 }, { "epoch": 5.987367772414501, "grad_norm": 0.08023706078529358, "learning_rate": 0.01, "loss": 1.9803, "step": 58299 }, { "epoch": 5.987675875526342, "grad_norm": 0.037095215171575546, "learning_rate": 0.01, "loss": 1.9701, "step": 58302 }, { "epoch": 5.9879839786381845, "grad_norm": 0.04752547666430473, "learning_rate": 0.01, "loss": 1.9858, "step": 58305 }, { "epoch": 5.988292081750026, "grad_norm": 0.053975239396095276, "learning_rate": 0.01, "loss": 1.9971, "step": 58308 }, { "epoch": 5.988600184861867, "grad_norm": 0.08200733363628387, "learning_rate": 0.01, "loss": 1.991, "step": 58311 }, { "epoch": 5.988908287973708, "grad_norm": 0.056134093552827835, "learning_rate": 0.01, "loss": 2.0093, "step": 58314 }, { "epoch": 5.98921639108555, "grad_norm": 0.04401000216603279, "learning_rate": 0.01, "loss": 1.9762, "step": 58317 }, { "epoch": 5.989524494197392, "grad_norm": 0.06809469312429428, "learning_rate": 0.01, "loss": 2.0004, "step": 58320 }, { "epoch": 5.989832597309233, "grad_norm": 0.06399431079626083, "learning_rate": 0.01, "loss": 2.0206, "step": 58323 }, { "epoch": 5.990140700421074, "grad_norm": 0.1221606507897377, "learning_rate": 0.01, "loss": 1.9819, "step": 58326 }, { "epoch": 5.9904488035329155, "grad_norm": 0.054819442331790924, "learning_rate": 0.01, "loss": 1.9657, "step": 58329 }, { "epoch": 5.990756906644757, "grad_norm": 0.053588200360536575, "learning_rate": 0.01, "loss": 1.9871, "step": 58332 }, { "epoch": 5.991065009756598, "grad_norm": 0.060072384774684906, "learning_rate": 0.01, "loss": 1.9893, "step": 58335 }, { "epoch": 5.99137311286844, "grad_norm": 0.09491874277591705, "learning_rate": 0.01, "loss": 1.9721, "step": 58338 }, { "epoch": 5.9916812159802815, "grad_norm": 0.054036714136600494, "learning_rate": 0.01, "loss": 1.9843, "step": 58341 }, { "epoch": 5.991989319092123, "grad_norm": 0.04162592813372612, "learning_rate": 0.01, "loss": 1.9927, "step": 58344 }, { "epoch": 5.992297422203964, "grad_norm": 0.05602291598916054, "learning_rate": 0.01, "loss": 1.9965, "step": 58347 }, { "epoch": 5.992605525315806, "grad_norm": 0.038415491580963135, "learning_rate": 0.01, "loss": 1.9956, "step": 58350 }, { "epoch": 5.992913628427647, "grad_norm": 0.07123242318630219, "learning_rate": 0.01, "loss": 1.9662, "step": 58353 }, { "epoch": 5.993221731539489, "grad_norm": 0.06751185655593872, "learning_rate": 0.01, "loss": 1.9769, "step": 58356 }, { "epoch": 5.99352983465133, "grad_norm": 0.07169979065656662, "learning_rate": 0.01, "loss": 1.9675, "step": 58359 }, { "epoch": 5.993837937763171, "grad_norm": 0.13775870203971863, "learning_rate": 0.01, "loss": 2.0067, "step": 58362 }, { "epoch": 5.9941460408750125, "grad_norm": 0.08033827692270279, "learning_rate": 0.01, "loss": 2.0378, "step": 58365 }, { "epoch": 5.994454143986855, "grad_norm": 0.07793736457824707, "learning_rate": 0.01, "loss": 1.9651, "step": 58368 }, { "epoch": 5.994762247098696, "grad_norm": 0.06328029185533524, "learning_rate": 0.01, "loss": 1.9878, "step": 58371 }, { "epoch": 5.995070350210537, "grad_norm": 0.05584852769970894, "learning_rate": 0.01, "loss": 1.9806, "step": 58374 }, { "epoch": 5.995378453322378, "grad_norm": 0.04354814067482948, "learning_rate": 0.01, "loss": 1.9829, "step": 58377 }, { "epoch": 5.99568655643422, "grad_norm": 0.05028558522462845, "learning_rate": 0.01, "loss": 1.9582, "step": 58380 }, { "epoch": 5.995994659546062, "grad_norm": 0.053422022610902786, "learning_rate": 0.01, "loss": 1.997, "step": 58383 }, { "epoch": 5.996302762657903, "grad_norm": 0.03941678628325462, "learning_rate": 0.01, "loss": 1.9952, "step": 58386 }, { "epoch": 5.996610865769744, "grad_norm": 0.03764568641781807, "learning_rate": 0.01, "loss": 1.9905, "step": 58389 }, { "epoch": 5.996918968881586, "grad_norm": 0.044926904141902924, "learning_rate": 0.01, "loss": 1.9786, "step": 58392 }, { "epoch": 5.997227071993427, "grad_norm": 0.046521514654159546, "learning_rate": 0.01, "loss": 1.9825, "step": 58395 }, { "epoch": 5.997535175105268, "grad_norm": 0.12329057604074478, "learning_rate": 0.01, "loss": 1.9898, "step": 58398 }, { "epoch": 5.99784327821711, "grad_norm": 0.045972906053066254, "learning_rate": 0.01, "loss": 1.9735, "step": 58401 }, { "epoch": 5.9981513813289515, "grad_norm": 0.1293979287147522, "learning_rate": 0.01, "loss": 1.9704, "step": 58404 }, { "epoch": 5.998459484440793, "grad_norm": 0.06918042153120041, "learning_rate": 0.01, "loss": 2.0118, "step": 58407 }, { "epoch": 5.998767587552634, "grad_norm": 0.04279787465929985, "learning_rate": 0.01, "loss": 2.0002, "step": 58410 }, { "epoch": 5.999075690664475, "grad_norm": 0.09146596491336823, "learning_rate": 0.01, "loss": 1.9658, "step": 58413 }, { "epoch": 5.9993837937763175, "grad_norm": 0.06269899010658264, "learning_rate": 0.01, "loss": 1.9947, "step": 58416 }, { "epoch": 5.999691896888159, "grad_norm": 0.04407677426934242, "learning_rate": 0.01, "loss": 1.9891, "step": 58419 }, { "epoch": 6.0, "grad_norm": 0.04100476950407028, "learning_rate": 0.01, "loss": 2.015, "step": 58422 }, { "epoch": 5.999691928527419, "grad_norm": 0.05454897880554199, "learning_rate": 0.01, "loss": 1.9814, "step": 58425 }, { "epoch": 6.0, "grad_norm": 0.12488140910863876, "learning_rate": 0.01, "loss": 2.0163, "step": 58428 }, { "epoch": 6.000308071472582, "grad_norm": 0.08063201606273651, "learning_rate": 0.01, "loss": 1.9852, "step": 58431 }, { "epoch": 6.000616142945163, "grad_norm": 0.08693194389343262, "learning_rate": 0.01, "loss": 1.9877, "step": 58434 }, { "epoch": 6.000924214417745, "grad_norm": 0.048637568950653076, "learning_rate": 0.01, "loss": 2.0074, "step": 58437 }, { "epoch": 6.001232285890326, "grad_norm": 0.047587309032678604, "learning_rate": 0.01, "loss": 1.998, "step": 58440 }, { "epoch": 6.0015403573629085, "grad_norm": 0.04123762995004654, "learning_rate": 0.01, "loss": 1.9947, "step": 58443 }, { "epoch": 6.00184842883549, "grad_norm": 0.038054220378398895, "learning_rate": 0.01, "loss": 1.9784, "step": 58446 }, { "epoch": 6.002156500308072, "grad_norm": 0.07207085937261581, "learning_rate": 0.01, "loss": 1.9842, "step": 58449 }, { "epoch": 6.002464571780653, "grad_norm": 0.09471093118190765, "learning_rate": 0.01, "loss": 1.9824, "step": 58452 }, { "epoch": 6.002772643253235, "grad_norm": 0.050923608243465424, "learning_rate": 0.01, "loss": 1.9679, "step": 58455 }, { "epoch": 6.003080714725816, "grad_norm": 0.09074869751930237, "learning_rate": 0.01, "loss": 1.9862, "step": 58458 }, { "epoch": 6.003388786198398, "grad_norm": 0.12536507844924927, "learning_rate": 0.01, "loss": 1.9766, "step": 58461 }, { "epoch": 6.003696857670979, "grad_norm": 0.04264422506093979, "learning_rate": 0.01, "loss": 1.9975, "step": 58464 }, { "epoch": 6.004004929143561, "grad_norm": 0.03600556403398514, "learning_rate": 0.01, "loss": 1.9703, "step": 58467 }, { "epoch": 6.004313000616143, "grad_norm": 0.030723579227924347, "learning_rate": 0.01, "loss": 1.975, "step": 58470 }, { "epoch": 6.0046210720887245, "grad_norm": 0.042884331196546555, "learning_rate": 0.01, "loss": 1.994, "step": 58473 }, { "epoch": 6.004929143561307, "grad_norm": 0.07880793511867523, "learning_rate": 0.01, "loss": 1.9798, "step": 58476 }, { "epoch": 6.005237215033888, "grad_norm": 0.08945189416408539, "learning_rate": 0.01, "loss": 1.9724, "step": 58479 }, { "epoch": 6.00554528650647, "grad_norm": 0.07887895405292511, "learning_rate": 0.01, "loss": 1.9982, "step": 58482 }, { "epoch": 6.005853357979051, "grad_norm": 0.13316544890403748, "learning_rate": 0.01, "loss": 2.0046, "step": 58485 }, { "epoch": 6.006161429451633, "grad_norm": 0.06402456760406494, "learning_rate": 0.01, "loss": 1.9868, "step": 58488 }, { "epoch": 6.006469500924214, "grad_norm": 0.03985295444726944, "learning_rate": 0.01, "loss": 2.0051, "step": 58491 }, { "epoch": 6.006777572396796, "grad_norm": 0.03584379702806473, "learning_rate": 0.01, "loss": 1.9806, "step": 58494 }, { "epoch": 6.007085643869377, "grad_norm": 0.03731578588485718, "learning_rate": 0.01, "loss": 1.9965, "step": 58497 }, { "epoch": 6.007393715341959, "grad_norm": 0.04280545935034752, "learning_rate": 0.01, "loss": 2.0052, "step": 58500 }, { "epoch": 6.007701786814541, "grad_norm": 0.037182942032814026, "learning_rate": 0.01, "loss": 1.9659, "step": 58503 }, { "epoch": 6.008009858287123, "grad_norm": 0.04180056229233742, "learning_rate": 0.01, "loss": 1.9746, "step": 58506 }, { "epoch": 6.008317929759705, "grad_norm": 0.05659356340765953, "learning_rate": 0.01, "loss": 2.0131, "step": 58509 }, { "epoch": 6.008626001232286, "grad_norm": 0.04323246702551842, "learning_rate": 0.01, "loss": 1.9737, "step": 58512 }, { "epoch": 6.008934072704868, "grad_norm": 0.15815724432468414, "learning_rate": 0.01, "loss": 1.9927, "step": 58515 }, { "epoch": 6.009242144177449, "grad_norm": 0.041637711226940155, "learning_rate": 0.01, "loss": 1.9983, "step": 58518 }, { "epoch": 6.009550215650031, "grad_norm": 0.09015413373708725, "learning_rate": 0.01, "loss": 1.9835, "step": 58521 }, { "epoch": 6.009858287122612, "grad_norm": 0.10606445372104645, "learning_rate": 0.01, "loss": 1.9907, "step": 58524 }, { "epoch": 6.010166358595194, "grad_norm": 0.06839009374380112, "learning_rate": 0.01, "loss": 2.0005, "step": 58527 }, { "epoch": 6.0104744300677755, "grad_norm": 0.051956940442323685, "learning_rate": 0.01, "loss": 1.967, "step": 58530 }, { "epoch": 6.0107825015403575, "grad_norm": 0.034211624413728714, "learning_rate": 0.01, "loss": 1.9935, "step": 58533 }, { "epoch": 6.011090573012939, "grad_norm": 0.04351538419723511, "learning_rate": 0.01, "loss": 1.9849, "step": 58536 }, { "epoch": 6.011398644485521, "grad_norm": 0.07221609354019165, "learning_rate": 0.01, "loss": 1.9845, "step": 58539 }, { "epoch": 6.011706715958102, "grad_norm": 0.11168903857469559, "learning_rate": 0.01, "loss": 1.991, "step": 58542 }, { "epoch": 6.012014787430684, "grad_norm": 0.06332577764987946, "learning_rate": 0.01, "loss": 1.9906, "step": 58545 }, { "epoch": 6.012322858903265, "grad_norm": 0.0567488819360733, "learning_rate": 0.01, "loss": 1.9833, "step": 58548 }, { "epoch": 6.012630930375847, "grad_norm": 0.04636390134692192, "learning_rate": 0.01, "loss": 1.9829, "step": 58551 }, { "epoch": 6.012939001848429, "grad_norm": 0.040273915976285934, "learning_rate": 0.01, "loss": 1.9852, "step": 58554 }, { "epoch": 6.01324707332101, "grad_norm": 0.09343009442090988, "learning_rate": 0.01, "loss": 1.9805, "step": 58557 }, { "epoch": 6.013555144793592, "grad_norm": 0.07643181830644608, "learning_rate": 0.01, "loss": 1.9507, "step": 58560 }, { "epoch": 6.013863216266174, "grad_norm": 0.1327466368675232, "learning_rate": 0.01, "loss": 2.0052, "step": 58563 }, { "epoch": 6.014171287738756, "grad_norm": 0.1656189262866974, "learning_rate": 0.01, "loss": 1.9879, "step": 58566 }, { "epoch": 6.014479359211337, "grad_norm": 0.07180788367986679, "learning_rate": 0.01, "loss": 1.9798, "step": 58569 }, { "epoch": 6.014787430683919, "grad_norm": 0.04216361045837402, "learning_rate": 0.01, "loss": 1.9812, "step": 58572 }, { "epoch": 6.0150955021565, "grad_norm": 0.04374970495700836, "learning_rate": 0.01, "loss": 1.9683, "step": 58575 }, { "epoch": 6.015403573629082, "grad_norm": 0.038325972855091095, "learning_rate": 0.01, "loss": 1.9749, "step": 58578 }, { "epoch": 6.015711645101663, "grad_norm": 0.03523325175046921, "learning_rate": 0.01, "loss": 1.9839, "step": 58581 }, { "epoch": 6.016019716574245, "grad_norm": 0.03328186646103859, "learning_rate": 0.01, "loss": 1.9689, "step": 58584 }, { "epoch": 6.016327788046826, "grad_norm": 0.09052954614162445, "learning_rate": 0.01, "loss": 1.9815, "step": 58587 }, { "epoch": 6.0166358595194085, "grad_norm": 0.08605097979307175, "learning_rate": 0.01, "loss": 2.0121, "step": 58590 }, { "epoch": 6.0169439309919905, "grad_norm": 0.05200384184718132, "learning_rate": 0.01, "loss": 1.9738, "step": 58593 }, { "epoch": 6.017252002464572, "grad_norm": 0.07615986466407776, "learning_rate": 0.01, "loss": 1.9905, "step": 58596 }, { "epoch": 6.017560073937154, "grad_norm": 0.043676577508449554, "learning_rate": 0.01, "loss": 1.9933, "step": 58599 }, { "epoch": 6.017868145409735, "grad_norm": 0.058520250022411346, "learning_rate": 0.01, "loss": 1.9768, "step": 58602 }, { "epoch": 6.018176216882317, "grad_norm": 0.07133082300424576, "learning_rate": 0.01, "loss": 1.9723, "step": 58605 }, { "epoch": 6.018484288354898, "grad_norm": 0.08711257576942444, "learning_rate": 0.01, "loss": 1.9774, "step": 58608 }, { "epoch": 6.01879235982748, "grad_norm": 0.0483718067407608, "learning_rate": 0.01, "loss": 1.932, "step": 58611 }, { "epoch": 6.019100431300061, "grad_norm": 0.05014586076140404, "learning_rate": 0.01, "loss": 1.9768, "step": 58614 }, { "epoch": 6.019408502772643, "grad_norm": 0.10905557870864868, "learning_rate": 0.01, "loss": 1.9667, "step": 58617 }, { "epoch": 6.0197165742452245, "grad_norm": 0.03817577287554741, "learning_rate": 0.01, "loss": 1.9943, "step": 58620 }, { "epoch": 6.020024645717807, "grad_norm": 0.0783611536026001, "learning_rate": 0.01, "loss": 1.9599, "step": 58623 }, { "epoch": 6.020332717190388, "grad_norm": 0.0832376778125763, "learning_rate": 0.01, "loss": 1.9884, "step": 58626 }, { "epoch": 6.02064078866297, "grad_norm": 0.05190072953701019, "learning_rate": 0.01, "loss": 1.9639, "step": 58629 }, { "epoch": 6.020948860135552, "grad_norm": 0.09829907864332199, "learning_rate": 0.01, "loss": 2.0106, "step": 58632 }, { "epoch": 6.021256931608133, "grad_norm": 0.08857165277004242, "learning_rate": 0.01, "loss": 2.0049, "step": 58635 }, { "epoch": 6.021565003080715, "grad_norm": 0.07858851552009583, "learning_rate": 0.01, "loss": 1.9861, "step": 58638 }, { "epoch": 6.021873074553296, "grad_norm": 0.04164360463619232, "learning_rate": 0.01, "loss": 2.0072, "step": 58641 }, { "epoch": 6.022181146025878, "grad_norm": 0.037627752870321274, "learning_rate": 0.01, "loss": 1.9653, "step": 58644 }, { "epoch": 6.0224892174984594, "grad_norm": 0.03442845493555069, "learning_rate": 0.01, "loss": 1.9934, "step": 58647 }, { "epoch": 6.0227972889710415, "grad_norm": 0.04693920910358429, "learning_rate": 0.01, "loss": 2.0012, "step": 58650 }, { "epoch": 6.023105360443623, "grad_norm": 0.07563184201717377, "learning_rate": 0.01, "loss": 1.9913, "step": 58653 }, { "epoch": 6.023413431916205, "grad_norm": 0.08100397139787674, "learning_rate": 0.01, "loss": 1.9822, "step": 58656 }, { "epoch": 6.023721503388786, "grad_norm": 0.12132440507411957, "learning_rate": 0.01, "loss": 1.969, "step": 58659 }, { "epoch": 6.024029574861368, "grad_norm": 0.04983309283852577, "learning_rate": 0.01, "loss": 1.9661, "step": 58662 }, { "epoch": 6.024337646333949, "grad_norm": 0.06472596526145935, "learning_rate": 0.01, "loss": 2.0084, "step": 58665 }, { "epoch": 6.024645717806531, "grad_norm": 0.034389570355415344, "learning_rate": 0.01, "loss": 1.9762, "step": 58668 }, { "epoch": 6.024953789279113, "grad_norm": 0.10626552999019623, "learning_rate": 0.01, "loss": 2.0083, "step": 58671 }, { "epoch": 6.025261860751694, "grad_norm": 0.0675535798072815, "learning_rate": 0.01, "loss": 1.9948, "step": 58674 }, { "epoch": 6.025569932224276, "grad_norm": 0.04010651633143425, "learning_rate": 0.01, "loss": 1.969, "step": 58677 }, { "epoch": 6.0258780036968576, "grad_norm": 0.034451622515916824, "learning_rate": 0.01, "loss": 1.9716, "step": 58680 }, { "epoch": 6.02618607516944, "grad_norm": 0.06580517441034317, "learning_rate": 0.01, "loss": 1.9973, "step": 58683 }, { "epoch": 6.026494146642021, "grad_norm": 0.05853043496608734, "learning_rate": 0.01, "loss": 1.9855, "step": 58686 }, { "epoch": 6.026802218114603, "grad_norm": 0.04377220198512077, "learning_rate": 0.01, "loss": 1.9655, "step": 58689 }, { "epoch": 6.027110289587184, "grad_norm": 0.03288479149341583, "learning_rate": 0.01, "loss": 1.978, "step": 58692 }, { "epoch": 6.027418361059766, "grad_norm": 0.04060712829232216, "learning_rate": 0.01, "loss": 1.9982, "step": 58695 }, { "epoch": 6.027726432532347, "grad_norm": 0.11332166939973831, "learning_rate": 0.01, "loss": 1.9685, "step": 58698 }, { "epoch": 6.028034504004929, "grad_norm": 0.13282176852226257, "learning_rate": 0.01, "loss": 1.9528, "step": 58701 }, { "epoch": 6.02834257547751, "grad_norm": 0.08038832992315292, "learning_rate": 0.01, "loss": 2.0108, "step": 58704 }, { "epoch": 6.0286506469500925, "grad_norm": 0.05667036771774292, "learning_rate": 0.01, "loss": 1.9945, "step": 58707 }, { "epoch": 6.0289587184226745, "grad_norm": 0.06444176286458969, "learning_rate": 0.01, "loss": 1.972, "step": 58710 }, { "epoch": 6.029266789895256, "grad_norm": 0.08207248896360397, "learning_rate": 0.01, "loss": 1.9731, "step": 58713 }, { "epoch": 6.029574861367838, "grad_norm": 0.057919155806303024, "learning_rate": 0.01, "loss": 1.9799, "step": 58716 }, { "epoch": 6.029882932840419, "grad_norm": 0.0630873367190361, "learning_rate": 0.01, "loss": 2.004, "step": 58719 }, { "epoch": 6.030191004313001, "grad_norm": 0.14096465706825256, "learning_rate": 0.01, "loss": 2.0029, "step": 58722 }, { "epoch": 6.030499075785582, "grad_norm": 0.06392169743776321, "learning_rate": 0.01, "loss": 1.9605, "step": 58725 }, { "epoch": 6.030807147258164, "grad_norm": 0.051594968885183334, "learning_rate": 0.01, "loss": 1.9709, "step": 58728 }, { "epoch": 6.031115218730745, "grad_norm": 0.05645729973912239, "learning_rate": 0.01, "loss": 1.977, "step": 58731 }, { "epoch": 6.031423290203327, "grad_norm": 0.056138813495635986, "learning_rate": 0.01, "loss": 2.016, "step": 58734 }, { "epoch": 6.0317313616759085, "grad_norm": 0.08171719312667847, "learning_rate": 0.01, "loss": 1.9866, "step": 58737 }, { "epoch": 6.032039433148491, "grad_norm": 0.0954081118106842, "learning_rate": 0.01, "loss": 1.9761, "step": 58740 }, { "epoch": 6.032347504621072, "grad_norm": 0.09134764224290848, "learning_rate": 0.01, "loss": 1.9718, "step": 58743 }, { "epoch": 6.032655576093654, "grad_norm": 0.05880044028162956, "learning_rate": 0.01, "loss": 1.9819, "step": 58746 }, { "epoch": 6.032963647566235, "grad_norm": 0.06801950186491013, "learning_rate": 0.01, "loss": 2.0078, "step": 58749 }, { "epoch": 6.033271719038817, "grad_norm": 0.038541264832019806, "learning_rate": 0.01, "loss": 1.9595, "step": 58752 }, { "epoch": 6.033579790511399, "grad_norm": 0.03735314682126045, "learning_rate": 0.01, "loss": 1.9851, "step": 58755 }, { "epoch": 6.03388786198398, "grad_norm": 0.10068950057029724, "learning_rate": 0.01, "loss": 2.0018, "step": 58758 }, { "epoch": 6.034195933456562, "grad_norm": 0.04904381185770035, "learning_rate": 0.01, "loss": 1.9996, "step": 58761 }, { "epoch": 6.034504004929143, "grad_norm": 0.05660933256149292, "learning_rate": 0.01, "loss": 2.0051, "step": 58764 }, { "epoch": 6.0348120764017255, "grad_norm": 0.0632736012339592, "learning_rate": 0.01, "loss": 2.0073, "step": 58767 }, { "epoch": 6.035120147874307, "grad_norm": 0.06228591129183769, "learning_rate": 0.01, "loss": 1.9989, "step": 58770 }, { "epoch": 6.035428219346889, "grad_norm": 0.03460165485739708, "learning_rate": 0.01, "loss": 1.9976, "step": 58773 }, { "epoch": 6.03573629081947, "grad_norm": 0.04163900017738342, "learning_rate": 0.01, "loss": 1.983, "step": 58776 }, { "epoch": 6.036044362292052, "grad_norm": 0.07276707142591476, "learning_rate": 0.01, "loss": 1.9823, "step": 58779 }, { "epoch": 6.036352433764633, "grad_norm": 0.07738452404737473, "learning_rate": 0.01, "loss": 1.996, "step": 58782 }, { "epoch": 6.036660505237215, "grad_norm": 0.1065252274274826, "learning_rate": 0.01, "loss": 2.0061, "step": 58785 }, { "epoch": 6.036968576709796, "grad_norm": 0.05357460305094719, "learning_rate": 0.01, "loss": 1.9918, "step": 58788 }, { "epoch": 6.037276648182378, "grad_norm": 0.03857807815074921, "learning_rate": 0.01, "loss": 1.9788, "step": 58791 }, { "epoch": 6.03758471965496, "grad_norm": 0.029968131333589554, "learning_rate": 0.01, "loss": 1.9904, "step": 58794 }, { "epoch": 6.0378927911275415, "grad_norm": 0.0720914974808693, "learning_rate": 0.01, "loss": 1.995, "step": 58797 }, { "epoch": 6.038200862600124, "grad_norm": 0.12587358057498932, "learning_rate": 0.01, "loss": 1.9925, "step": 58800 }, { "epoch": 6.038508934072705, "grad_norm": 0.07049199193716049, "learning_rate": 0.01, "loss": 1.9816, "step": 58803 }, { "epoch": 6.038817005545287, "grad_norm": 0.05485573783516884, "learning_rate": 0.01, "loss": 2.0002, "step": 58806 }, { "epoch": 6.039125077017868, "grad_norm": 0.05481892451643944, "learning_rate": 0.01, "loss": 1.9901, "step": 58809 }, { "epoch": 6.03943314849045, "grad_norm": 0.07906284928321838, "learning_rate": 0.01, "loss": 1.9584, "step": 58812 }, { "epoch": 6.039741219963031, "grad_norm": 0.10519934445619583, "learning_rate": 0.01, "loss": 1.9589, "step": 58815 }, { "epoch": 6.040049291435613, "grad_norm": 0.06294261664152145, "learning_rate": 0.01, "loss": 2.0014, "step": 58818 }, { "epoch": 6.040357362908194, "grad_norm": 0.0904410257935524, "learning_rate": 0.01, "loss": 1.9626, "step": 58821 }, { "epoch": 6.040665434380776, "grad_norm": 0.053544074296951294, "learning_rate": 0.01, "loss": 1.9882, "step": 58824 }, { "epoch": 6.040973505853358, "grad_norm": 0.07926991581916809, "learning_rate": 0.01, "loss": 1.968, "step": 58827 }, { "epoch": 6.04128157732594, "grad_norm": 0.09197907149791718, "learning_rate": 0.01, "loss": 1.9756, "step": 58830 }, { "epoch": 6.041589648798522, "grad_norm": 0.05957088991999626, "learning_rate": 0.01, "loss": 1.9689, "step": 58833 }, { "epoch": 6.041897720271103, "grad_norm": 0.1431286334991455, "learning_rate": 0.01, "loss": 1.9854, "step": 58836 }, { "epoch": 6.042205791743685, "grad_norm": 0.06428467482328415, "learning_rate": 0.01, "loss": 1.9761, "step": 58839 }, { "epoch": 6.042513863216266, "grad_norm": 0.07960224896669388, "learning_rate": 0.01, "loss": 1.9728, "step": 58842 }, { "epoch": 6.042821934688848, "grad_norm": 0.040846168994903564, "learning_rate": 0.01, "loss": 1.9875, "step": 58845 }, { "epoch": 6.043130006161429, "grad_norm": 0.04430864006280899, "learning_rate": 0.01, "loss": 1.9945, "step": 58848 }, { "epoch": 6.043438077634011, "grad_norm": 0.04534313082695007, "learning_rate": 0.01, "loss": 1.9728, "step": 58851 }, { "epoch": 6.0437461491065925, "grad_norm": 0.04214495047926903, "learning_rate": 0.01, "loss": 1.9798, "step": 58854 }, { "epoch": 6.0440542205791745, "grad_norm": 0.036912716925144196, "learning_rate": 0.01, "loss": 2.0197, "step": 58857 }, { "epoch": 6.044362292051756, "grad_norm": 0.05208496004343033, "learning_rate": 0.01, "loss": 1.9674, "step": 58860 }, { "epoch": 6.044670363524338, "grad_norm": 0.10304973274469376, "learning_rate": 0.01, "loss": 1.9975, "step": 58863 }, { "epoch": 6.044978434996919, "grad_norm": 0.14296580851078033, "learning_rate": 0.01, "loss": 1.9819, "step": 58866 }, { "epoch": 6.045286506469501, "grad_norm": 0.037938348948955536, "learning_rate": 0.01, "loss": 1.9863, "step": 58869 }, { "epoch": 6.045594577942083, "grad_norm": 0.054850075393915176, "learning_rate": 0.01, "loss": 1.9933, "step": 58872 }, { "epoch": 6.045902649414664, "grad_norm": 0.0785508081316948, "learning_rate": 0.01, "loss": 1.974, "step": 58875 }, { "epoch": 6.046210720887246, "grad_norm": 0.07267174124717712, "learning_rate": 0.01, "loss": 1.9835, "step": 58878 }, { "epoch": 6.046518792359827, "grad_norm": 0.04367669299244881, "learning_rate": 0.01, "loss": 1.9942, "step": 58881 }, { "epoch": 6.046826863832409, "grad_norm": 0.046264924108982086, "learning_rate": 0.01, "loss": 2.0044, "step": 58884 }, { "epoch": 6.047134935304991, "grad_norm": 0.08904755860567093, "learning_rate": 0.01, "loss": 2.0053, "step": 58887 }, { "epoch": 6.047443006777573, "grad_norm": 0.06151941046118736, "learning_rate": 0.01, "loss": 1.9807, "step": 58890 }, { "epoch": 6.047751078250154, "grad_norm": 0.09928801655769348, "learning_rate": 0.01, "loss": 1.9916, "step": 58893 }, { "epoch": 6.048059149722736, "grad_norm": 0.04402837157249451, "learning_rate": 0.01, "loss": 1.9925, "step": 58896 }, { "epoch": 6.048367221195317, "grad_norm": 0.08484535664319992, "learning_rate": 0.01, "loss": 1.9957, "step": 58899 }, { "epoch": 6.048675292667899, "grad_norm": 0.07044728100299835, "learning_rate": 0.01, "loss": 1.9752, "step": 58902 }, { "epoch": 6.04898336414048, "grad_norm": 0.05778975039720535, "learning_rate": 0.01, "loss": 1.999, "step": 58905 }, { "epoch": 6.049291435613062, "grad_norm": 0.08870556205511093, "learning_rate": 0.01, "loss": 1.9794, "step": 58908 }, { "epoch": 6.049599507085643, "grad_norm": 0.06904757022857666, "learning_rate": 0.01, "loss": 1.9988, "step": 58911 }, { "epoch": 6.0499075785582255, "grad_norm": 0.07772015035152435, "learning_rate": 0.01, "loss": 1.9439, "step": 58914 }, { "epoch": 6.0502156500308075, "grad_norm": 0.05567057430744171, "learning_rate": 0.01, "loss": 1.9855, "step": 58917 }, { "epoch": 6.050523721503389, "grad_norm": 0.06966744363307953, "learning_rate": 0.01, "loss": 1.9801, "step": 58920 }, { "epoch": 6.050831792975971, "grad_norm": 0.05111026018857956, "learning_rate": 0.01, "loss": 1.9818, "step": 58923 }, { "epoch": 6.051139864448552, "grad_norm": 0.07328280061483383, "learning_rate": 0.01, "loss": 2.0044, "step": 58926 }, { "epoch": 6.051447935921134, "grad_norm": 0.06918063014745712, "learning_rate": 0.01, "loss": 1.9504, "step": 58929 }, { "epoch": 6.051756007393715, "grad_norm": 0.09920765459537506, "learning_rate": 0.01, "loss": 2.0058, "step": 58932 }, { "epoch": 6.052064078866297, "grad_norm": 0.057603057473897934, "learning_rate": 0.01, "loss": 1.9888, "step": 58935 }, { "epoch": 6.052372150338878, "grad_norm": 0.0861181914806366, "learning_rate": 0.01, "loss": 2.0107, "step": 58938 }, { "epoch": 6.05268022181146, "grad_norm": 0.1477275937795639, "learning_rate": 0.01, "loss": 1.9598, "step": 58941 }, { "epoch": 6.0529882932840415, "grad_norm": 0.04588627070188522, "learning_rate": 0.01, "loss": 1.9911, "step": 58944 }, { "epoch": 6.053296364756624, "grad_norm": 0.09523765742778778, "learning_rate": 0.01, "loss": 1.9673, "step": 58947 }, { "epoch": 6.053604436229205, "grad_norm": 0.05901789665222168, "learning_rate": 0.01, "loss": 2.0197, "step": 58950 }, { "epoch": 6.053912507701787, "grad_norm": 0.0464630164206028, "learning_rate": 0.01, "loss": 1.964, "step": 58953 }, { "epoch": 6.054220579174369, "grad_norm": 0.06684694439172745, "learning_rate": 0.01, "loss": 1.9787, "step": 58956 }, { "epoch": 6.05452865064695, "grad_norm": 0.062367189675569534, "learning_rate": 0.01, "loss": 1.9693, "step": 58959 }, { "epoch": 6.054836722119532, "grad_norm": 0.050568412989377975, "learning_rate": 0.01, "loss": 1.9858, "step": 58962 }, { "epoch": 6.055144793592113, "grad_norm": 0.05513317883014679, "learning_rate": 0.01, "loss": 1.9839, "step": 58965 }, { "epoch": 6.055452865064695, "grad_norm": 0.05048702657222748, "learning_rate": 0.01, "loss": 1.9873, "step": 58968 }, { "epoch": 6.055760936537276, "grad_norm": 0.04989680275321007, "learning_rate": 0.01, "loss": 1.971, "step": 58971 }, { "epoch": 6.0560690080098585, "grad_norm": 0.03208424523472786, "learning_rate": 0.01, "loss": 2.002, "step": 58974 }, { "epoch": 6.05637707948244, "grad_norm": 0.11022038012742996, "learning_rate": 0.01, "loss": 1.9974, "step": 58977 }, { "epoch": 6.056685150955022, "grad_norm": 0.10186657309532166, "learning_rate": 0.01, "loss": 1.9808, "step": 58980 }, { "epoch": 6.056993222427603, "grad_norm": 0.04729219898581505, "learning_rate": 0.01, "loss": 1.9879, "step": 58983 }, { "epoch": 6.057301293900185, "grad_norm": 0.08434822410345078, "learning_rate": 0.01, "loss": 2.0063, "step": 58986 }, { "epoch": 6.057609365372766, "grad_norm": 0.05614599958062172, "learning_rate": 0.01, "loss": 2.009, "step": 58989 }, { "epoch": 6.057917436845348, "grad_norm": 0.06520257890224457, "learning_rate": 0.01, "loss": 1.9684, "step": 58992 }, { "epoch": 6.05822550831793, "grad_norm": 0.035195183008909225, "learning_rate": 0.01, "loss": 1.9842, "step": 58995 }, { "epoch": 6.058533579790511, "grad_norm": 0.08378977328538895, "learning_rate": 0.01, "loss": 1.9709, "step": 58998 }, { "epoch": 6.058841651263093, "grad_norm": 0.05082874745130539, "learning_rate": 0.01, "loss": 2.0167, "step": 59001 }, { "epoch": 6.0591497227356745, "grad_norm": 0.10246657580137253, "learning_rate": 0.01, "loss": 1.9932, "step": 59004 }, { "epoch": 6.059457794208257, "grad_norm": 0.0681087076663971, "learning_rate": 0.01, "loss": 1.9932, "step": 59007 }, { "epoch": 6.059765865680838, "grad_norm": 0.10320194065570831, "learning_rate": 0.01, "loss": 1.9569, "step": 59010 }, { "epoch": 6.06007393715342, "grad_norm": 0.04554738476872444, "learning_rate": 0.01, "loss": 2.0199, "step": 59013 }, { "epoch": 6.060382008626001, "grad_norm": 0.11251513659954071, "learning_rate": 0.01, "loss": 1.9776, "step": 59016 }, { "epoch": 6.060690080098583, "grad_norm": 0.042101431638002396, "learning_rate": 0.01, "loss": 1.9971, "step": 59019 }, { "epoch": 6.060998151571164, "grad_norm": 0.05242328718304634, "learning_rate": 0.01, "loss": 1.977, "step": 59022 }, { "epoch": 6.061306223043746, "grad_norm": 0.07696748524904251, "learning_rate": 0.01, "loss": 1.9847, "step": 59025 }, { "epoch": 6.061614294516327, "grad_norm": 0.07448645681142807, "learning_rate": 0.01, "loss": 1.9781, "step": 59028 }, { "epoch": 6.061922365988909, "grad_norm": 0.05332387611269951, "learning_rate": 0.01, "loss": 1.9886, "step": 59031 }, { "epoch": 6.0622304374614915, "grad_norm": 0.046122957020998, "learning_rate": 0.01, "loss": 2.0017, "step": 59034 }, { "epoch": 6.062538508934073, "grad_norm": 0.06084294244647026, "learning_rate": 0.01, "loss": 1.9973, "step": 59037 }, { "epoch": 6.062846580406655, "grad_norm": 0.09956135600805283, "learning_rate": 0.01, "loss": 1.9777, "step": 59040 }, { "epoch": 6.063154651879236, "grad_norm": 0.05223178490996361, "learning_rate": 0.01, "loss": 1.9828, "step": 59043 }, { "epoch": 6.063462723351818, "grad_norm": 0.08285564929246902, "learning_rate": 0.01, "loss": 1.9842, "step": 59046 }, { "epoch": 6.063770794824399, "grad_norm": 0.05346845090389252, "learning_rate": 0.01, "loss": 1.9875, "step": 59049 }, { "epoch": 6.064078866296981, "grad_norm": 0.08269158750772476, "learning_rate": 0.01, "loss": 1.9905, "step": 59052 }, { "epoch": 6.064386937769562, "grad_norm": 0.07285866141319275, "learning_rate": 0.01, "loss": 1.9683, "step": 59055 }, { "epoch": 6.064695009242144, "grad_norm": 0.07736973464488983, "learning_rate": 0.01, "loss": 1.9928, "step": 59058 }, { "epoch": 6.0650030807147255, "grad_norm": 0.0628119483590126, "learning_rate": 0.01, "loss": 2.0, "step": 59061 }, { "epoch": 6.0653111521873075, "grad_norm": 0.044494885951280594, "learning_rate": 0.01, "loss": 2.0048, "step": 59064 }, { "epoch": 6.065619223659889, "grad_norm": 0.1270086020231247, "learning_rate": 0.01, "loss": 1.9853, "step": 59067 }, { "epoch": 6.065927295132471, "grad_norm": 0.045628707855939865, "learning_rate": 0.01, "loss": 1.9862, "step": 59070 }, { "epoch": 6.066235366605053, "grad_norm": 0.07165969908237457, "learning_rate": 0.01, "loss": 1.9738, "step": 59073 }, { "epoch": 6.066543438077634, "grad_norm": 0.047646794468164444, "learning_rate": 0.01, "loss": 1.9962, "step": 59076 }, { "epoch": 6.066851509550216, "grad_norm": 0.05064648762345314, "learning_rate": 0.01, "loss": 1.9851, "step": 59079 }, { "epoch": 6.067159581022797, "grad_norm": 0.07075773924589157, "learning_rate": 0.01, "loss": 1.9746, "step": 59082 }, { "epoch": 6.067467652495379, "grad_norm": 0.0734126940369606, "learning_rate": 0.01, "loss": 2.0036, "step": 59085 }, { "epoch": 6.06777572396796, "grad_norm": 0.07513909041881561, "learning_rate": 0.01, "loss": 2.0011, "step": 59088 }, { "epoch": 6.068083795440542, "grad_norm": 0.04252862557768822, "learning_rate": 0.01, "loss": 1.9909, "step": 59091 }, { "epoch": 6.068391866913124, "grad_norm": 0.06020990386605263, "learning_rate": 0.01, "loss": 1.9744, "step": 59094 }, { "epoch": 6.068699938385706, "grad_norm": 0.048400718718767166, "learning_rate": 0.01, "loss": 1.9986, "step": 59097 }, { "epoch": 6.069008009858287, "grad_norm": 0.10299364477396011, "learning_rate": 0.01, "loss": 1.9918, "step": 59100 }, { "epoch": 6.069316081330869, "grad_norm": 0.07474274933338165, "learning_rate": 0.01, "loss": 2.0238, "step": 59103 }, { "epoch": 6.06962415280345, "grad_norm": 0.069256491959095, "learning_rate": 0.01, "loss": 1.9933, "step": 59106 }, { "epoch": 6.069932224276032, "grad_norm": 0.07016000151634216, "learning_rate": 0.01, "loss": 1.9659, "step": 59109 }, { "epoch": 6.070240295748613, "grad_norm": 0.10577663034200668, "learning_rate": 0.01, "loss": 1.9724, "step": 59112 }, { "epoch": 6.070548367221195, "grad_norm": 0.10170361399650574, "learning_rate": 0.01, "loss": 1.9839, "step": 59115 }, { "epoch": 6.070856438693777, "grad_norm": 0.03669926896691322, "learning_rate": 0.01, "loss": 1.9588, "step": 59118 }, { "epoch": 6.0711645101663585, "grad_norm": 0.04145694151520729, "learning_rate": 0.01, "loss": 1.9565, "step": 59121 }, { "epoch": 6.0714725816389405, "grad_norm": 0.052021101117134094, "learning_rate": 0.01, "loss": 1.9919, "step": 59124 }, { "epoch": 6.071780653111522, "grad_norm": 0.10580527782440186, "learning_rate": 0.01, "loss": 1.9736, "step": 59127 }, { "epoch": 6.072088724584104, "grad_norm": 0.05877790227532387, "learning_rate": 0.01, "loss": 2.0073, "step": 59130 }, { "epoch": 6.072396796056685, "grad_norm": 0.047968316823244095, "learning_rate": 0.01, "loss": 1.9982, "step": 59133 }, { "epoch": 6.072704867529267, "grad_norm": 0.05399814620614052, "learning_rate": 0.01, "loss": 1.9785, "step": 59136 }, { "epoch": 6.073012939001848, "grad_norm": 0.0343160405755043, "learning_rate": 0.01, "loss": 1.9685, "step": 59139 }, { "epoch": 6.07332101047443, "grad_norm": 0.032716646790504456, "learning_rate": 0.01, "loss": 1.989, "step": 59142 }, { "epoch": 6.073629081947011, "grad_norm": 0.09958682209253311, "learning_rate": 0.01, "loss": 1.9854, "step": 59145 }, { "epoch": 6.073937153419593, "grad_norm": 0.14549368619918823, "learning_rate": 0.01, "loss": 1.9769, "step": 59148 }, { "epoch": 6.0742452248921746, "grad_norm": 0.12808917462825775, "learning_rate": 0.01, "loss": 2.0228, "step": 59151 }, { "epoch": 6.074553296364757, "grad_norm": 0.06274955719709396, "learning_rate": 0.01, "loss": 1.9703, "step": 59154 }, { "epoch": 6.074861367837339, "grad_norm": 0.06658618897199631, "learning_rate": 0.01, "loss": 1.9678, "step": 59157 }, { "epoch": 6.07516943930992, "grad_norm": 0.07997333258390427, "learning_rate": 0.01, "loss": 2.0145, "step": 59160 }, { "epoch": 6.075477510782502, "grad_norm": 0.040492795407772064, "learning_rate": 0.01, "loss": 1.9726, "step": 59163 }, { "epoch": 6.075785582255083, "grad_norm": 0.03949406370520592, "learning_rate": 0.01, "loss": 1.9907, "step": 59166 }, { "epoch": 6.076093653727665, "grad_norm": 0.0330752469599247, "learning_rate": 0.01, "loss": 1.9876, "step": 59169 }, { "epoch": 6.076401725200246, "grad_norm": 0.03734998777508736, "learning_rate": 0.01, "loss": 1.9643, "step": 59172 }, { "epoch": 6.076709796672828, "grad_norm": 0.04145140200853348, "learning_rate": 0.01, "loss": 1.9748, "step": 59175 }, { "epoch": 6.0770178681454095, "grad_norm": 0.04934060946106911, "learning_rate": 0.01, "loss": 1.9791, "step": 59178 }, { "epoch": 6.0773259396179915, "grad_norm": 0.09385284036397934, "learning_rate": 0.01, "loss": 2.0134, "step": 59181 }, { "epoch": 6.077634011090573, "grad_norm": 0.14405569434165955, "learning_rate": 0.01, "loss": 1.9802, "step": 59184 }, { "epoch": 6.077942082563155, "grad_norm": 0.12107555568218231, "learning_rate": 0.01, "loss": 1.9945, "step": 59187 }, { "epoch": 6.078250154035736, "grad_norm": 0.05842713639140129, "learning_rate": 0.01, "loss": 1.9649, "step": 59190 }, { "epoch": 6.078558225508318, "grad_norm": 0.049650490283966064, "learning_rate": 0.01, "loss": 1.9795, "step": 59193 }, { "epoch": 6.0788662969809, "grad_norm": 0.06532774120569229, "learning_rate": 0.01, "loss": 2.0231, "step": 59196 }, { "epoch": 6.079174368453481, "grad_norm": 0.03809603303670883, "learning_rate": 0.01, "loss": 1.9844, "step": 59199 }, { "epoch": 6.079482439926063, "grad_norm": 0.07845079898834229, "learning_rate": 0.01, "loss": 1.9974, "step": 59202 }, { "epoch": 6.079790511398644, "grad_norm": 0.15672674775123596, "learning_rate": 0.01, "loss": 1.997, "step": 59205 }, { "epoch": 6.080098582871226, "grad_norm": 0.11329050362110138, "learning_rate": 0.01, "loss": 2.0003, "step": 59208 }, { "epoch": 6.080406654343808, "grad_norm": 0.08986864238977432, "learning_rate": 0.01, "loss": 1.9638, "step": 59211 }, { "epoch": 6.08071472581639, "grad_norm": 0.040678489953279495, "learning_rate": 0.01, "loss": 1.9791, "step": 59214 }, { "epoch": 6.081022797288971, "grad_norm": 0.09467331320047379, "learning_rate": 0.01, "loss": 2.0112, "step": 59217 }, { "epoch": 6.081330868761553, "grad_norm": 0.0797513797879219, "learning_rate": 0.01, "loss": 1.9946, "step": 59220 }, { "epoch": 6.081638940234134, "grad_norm": 0.04497670754790306, "learning_rate": 0.01, "loss": 1.9816, "step": 59223 }, { "epoch": 6.081947011706716, "grad_norm": 0.08867593109607697, "learning_rate": 0.01, "loss": 1.9627, "step": 59226 }, { "epoch": 6.082255083179297, "grad_norm": 0.1013980433344841, "learning_rate": 0.01, "loss": 1.9837, "step": 59229 }, { "epoch": 6.082563154651879, "grad_norm": 0.05733179301023483, "learning_rate": 0.01, "loss": 1.9862, "step": 59232 }, { "epoch": 6.082871226124461, "grad_norm": 0.09696310758590698, "learning_rate": 0.01, "loss": 1.9978, "step": 59235 }, { "epoch": 6.0831792975970425, "grad_norm": 0.07615906745195389, "learning_rate": 0.01, "loss": 2.0072, "step": 59238 }, { "epoch": 6.0834873690696245, "grad_norm": 0.08034536242485046, "learning_rate": 0.01, "loss": 1.9871, "step": 59241 }, { "epoch": 6.083795440542206, "grad_norm": 0.04975961148738861, "learning_rate": 0.01, "loss": 1.9946, "step": 59244 }, { "epoch": 6.084103512014788, "grad_norm": 0.09680989384651184, "learning_rate": 0.01, "loss": 2.0012, "step": 59247 }, { "epoch": 6.084411583487369, "grad_norm": 0.06167134270071983, "learning_rate": 0.01, "loss": 1.9781, "step": 59250 }, { "epoch": 6.084719654959951, "grad_norm": 0.08627012372016907, "learning_rate": 0.01, "loss": 2.0036, "step": 59253 }, { "epoch": 6.085027726432532, "grad_norm": 0.053778212517499924, "learning_rate": 0.01, "loss": 2.0024, "step": 59256 }, { "epoch": 6.085335797905114, "grad_norm": 0.10851852595806122, "learning_rate": 0.01, "loss": 2.0025, "step": 59259 }, { "epoch": 6.085643869377695, "grad_norm": 0.08398763090372086, "learning_rate": 0.01, "loss": 1.9835, "step": 59262 }, { "epoch": 6.085951940850277, "grad_norm": 0.09030786901712418, "learning_rate": 0.01, "loss": 2.009, "step": 59265 }, { "epoch": 6.0862600123228585, "grad_norm": 0.08578209578990936, "learning_rate": 0.01, "loss": 1.9648, "step": 59268 }, { "epoch": 6.086568083795441, "grad_norm": 0.06893924623727798, "learning_rate": 0.01, "loss": 1.9847, "step": 59271 }, { "epoch": 6.086876155268023, "grad_norm": 0.044449809938669205, "learning_rate": 0.01, "loss": 1.9844, "step": 59274 }, { "epoch": 6.087184226740604, "grad_norm": 0.03283555805683136, "learning_rate": 0.01, "loss": 1.9807, "step": 59277 }, { "epoch": 6.087492298213186, "grad_norm": 0.05007379502058029, "learning_rate": 0.01, "loss": 1.994, "step": 59280 }, { "epoch": 6.087800369685767, "grad_norm": 0.05644026771187782, "learning_rate": 0.01, "loss": 1.9968, "step": 59283 }, { "epoch": 6.088108441158349, "grad_norm": 0.06911731511354446, "learning_rate": 0.01, "loss": 1.9543, "step": 59286 }, { "epoch": 6.08841651263093, "grad_norm": 0.06676710397005081, "learning_rate": 0.01, "loss": 1.9876, "step": 59289 }, { "epoch": 6.088724584103512, "grad_norm": 0.03128058835864067, "learning_rate": 0.01, "loss": 2.0105, "step": 59292 }, { "epoch": 6.089032655576093, "grad_norm": 0.05113793537020683, "learning_rate": 0.01, "loss": 1.966, "step": 59295 }, { "epoch": 6.0893407270486755, "grad_norm": 0.10867103934288025, "learning_rate": 0.01, "loss": 2.0168, "step": 59298 }, { "epoch": 6.089648798521257, "grad_norm": 0.10439618676900864, "learning_rate": 0.01, "loss": 1.9903, "step": 59301 }, { "epoch": 6.089956869993839, "grad_norm": 0.06279850751161575, "learning_rate": 0.01, "loss": 1.9788, "step": 59304 }, { "epoch": 6.09026494146642, "grad_norm": 0.07557038217782974, "learning_rate": 0.01, "loss": 1.9879, "step": 59307 }, { "epoch": 6.090573012939002, "grad_norm": 0.06895217299461365, "learning_rate": 0.01, "loss": 1.9832, "step": 59310 }, { "epoch": 6.090881084411583, "grad_norm": 0.058836501091718674, "learning_rate": 0.01, "loss": 1.976, "step": 59313 }, { "epoch": 6.091189155884165, "grad_norm": 0.10100754350423813, "learning_rate": 0.01, "loss": 1.9963, "step": 59316 }, { "epoch": 6.091497227356747, "grad_norm": 0.03705455735325813, "learning_rate": 0.01, "loss": 1.9814, "step": 59319 }, { "epoch": 6.091805298829328, "grad_norm": 0.09770546853542328, "learning_rate": 0.01, "loss": 1.9778, "step": 59322 }, { "epoch": 6.09211337030191, "grad_norm": 0.07368703931570053, "learning_rate": 0.01, "loss": 1.9476, "step": 59325 }, { "epoch": 6.0924214417744915, "grad_norm": 0.07088816910982132, "learning_rate": 0.01, "loss": 2.0124, "step": 59328 }, { "epoch": 6.092729513247074, "grad_norm": 0.05165349692106247, "learning_rate": 0.01, "loss": 1.9781, "step": 59331 }, { "epoch": 6.093037584719655, "grad_norm": 0.08270641416311264, "learning_rate": 0.01, "loss": 1.991, "step": 59334 }, { "epoch": 6.093345656192237, "grad_norm": 0.07178719341754913, "learning_rate": 0.01, "loss": 2.0069, "step": 59337 }, { "epoch": 6.093653727664818, "grad_norm": 0.07591982930898666, "learning_rate": 0.01, "loss": 1.9647, "step": 59340 }, { "epoch": 6.0939617991374, "grad_norm": 0.08362317830324173, "learning_rate": 0.01, "loss": 1.9717, "step": 59343 }, { "epoch": 6.094269870609981, "grad_norm": 0.04838939011096954, "learning_rate": 0.01, "loss": 1.9897, "step": 59346 }, { "epoch": 6.094577942082563, "grad_norm": 0.049368396401405334, "learning_rate": 0.01, "loss": 1.9877, "step": 59349 }, { "epoch": 6.094886013555144, "grad_norm": 0.05581490695476532, "learning_rate": 0.01, "loss": 1.9736, "step": 59352 }, { "epoch": 6.095194085027726, "grad_norm": 0.04965098947286606, "learning_rate": 0.01, "loss": 1.9675, "step": 59355 }, { "epoch": 6.0955021565003085, "grad_norm": 0.03643220290541649, "learning_rate": 0.01, "loss": 2.0234, "step": 59358 }, { "epoch": 6.09581022797289, "grad_norm": 0.08060404658317566, "learning_rate": 0.01, "loss": 1.987, "step": 59361 }, { "epoch": 6.096118299445472, "grad_norm": 0.08821488916873932, "learning_rate": 0.01, "loss": 1.9602, "step": 59364 }, { "epoch": 6.096426370918053, "grad_norm": 0.04556925222277641, "learning_rate": 0.01, "loss": 2.0006, "step": 59367 }, { "epoch": 6.096734442390635, "grad_norm": 0.07568595558404922, "learning_rate": 0.01, "loss": 1.989, "step": 59370 }, { "epoch": 6.097042513863216, "grad_norm": 0.06441906094551086, "learning_rate": 0.01, "loss": 1.9778, "step": 59373 }, { "epoch": 6.097350585335798, "grad_norm": 0.04911012202501297, "learning_rate": 0.01, "loss": 2.0094, "step": 59376 }, { "epoch": 6.097658656808379, "grad_norm": 0.10038016736507416, "learning_rate": 0.01, "loss": 1.97, "step": 59379 }, { "epoch": 6.097966728280961, "grad_norm": 0.08682964742183685, "learning_rate": 0.01, "loss": 1.9936, "step": 59382 }, { "epoch": 6.0982747997535425, "grad_norm": 0.07904212176799774, "learning_rate": 0.01, "loss": 1.9821, "step": 59385 }, { "epoch": 6.0985828712261245, "grad_norm": 0.07314693182706833, "learning_rate": 0.01, "loss": 1.9913, "step": 59388 }, { "epoch": 6.098890942698706, "grad_norm": 0.04866940528154373, "learning_rate": 0.01, "loss": 1.998, "step": 59391 }, { "epoch": 6.099199014171288, "grad_norm": 0.040898289531469345, "learning_rate": 0.01, "loss": 1.9953, "step": 59394 }, { "epoch": 6.09950708564387, "grad_norm": 0.04597887024283409, "learning_rate": 0.01, "loss": 2.0, "step": 59397 }, { "epoch": 6.099815157116451, "grad_norm": 0.04504725709557533, "learning_rate": 0.01, "loss": 2.0008, "step": 59400 }, { "epoch": 6.100123228589033, "grad_norm": 0.032154761254787445, "learning_rate": 0.01, "loss": 1.9804, "step": 59403 }, { "epoch": 6.100431300061614, "grad_norm": 0.06787566095590591, "learning_rate": 0.01, "loss": 1.999, "step": 59406 }, { "epoch": 6.100739371534196, "grad_norm": 0.07191510498523712, "learning_rate": 0.01, "loss": 1.9984, "step": 59409 }, { "epoch": 6.101047443006777, "grad_norm": 0.08066849410533905, "learning_rate": 0.01, "loss": 1.9812, "step": 59412 }, { "epoch": 6.101355514479359, "grad_norm": 0.05546758696436882, "learning_rate": 0.01, "loss": 1.9777, "step": 59415 }, { "epoch": 6.101663585951941, "grad_norm": 0.059205375611782074, "learning_rate": 0.01, "loss": 1.9817, "step": 59418 }, { "epoch": 6.101971657424523, "grad_norm": 0.03468778356909752, "learning_rate": 0.01, "loss": 1.9937, "step": 59421 }, { "epoch": 6.102279728897104, "grad_norm": 0.06100203096866608, "learning_rate": 0.01, "loss": 2.0237, "step": 59424 }, { "epoch": 6.102587800369686, "grad_norm": 0.04229642450809479, "learning_rate": 0.01, "loss": 1.969, "step": 59427 }, { "epoch": 6.102895871842267, "grad_norm": 0.09364788234233856, "learning_rate": 0.01, "loss": 1.9774, "step": 59430 }, { "epoch": 6.103203943314849, "grad_norm": 0.0706750899553299, "learning_rate": 0.01, "loss": 1.9873, "step": 59433 }, { "epoch": 6.103512014787431, "grad_norm": 0.1148315817117691, "learning_rate": 0.01, "loss": 1.9675, "step": 59436 }, { "epoch": 6.103820086260012, "grad_norm": 0.057128921151161194, "learning_rate": 0.01, "loss": 1.9544, "step": 59439 }, { "epoch": 6.104128157732594, "grad_norm": 0.054126545786857605, "learning_rate": 0.01, "loss": 2.0055, "step": 59442 }, { "epoch": 6.1044362292051755, "grad_norm": 0.050927091389894485, "learning_rate": 0.01, "loss": 1.9675, "step": 59445 }, { "epoch": 6.1047443006777575, "grad_norm": 0.05484678968787193, "learning_rate": 0.01, "loss": 1.9928, "step": 59448 }, { "epoch": 6.105052372150339, "grad_norm": 0.06659942865371704, "learning_rate": 0.01, "loss": 1.9842, "step": 59451 }, { "epoch": 6.105360443622921, "grad_norm": 0.06512758880853653, "learning_rate": 0.01, "loss": 1.9836, "step": 59454 }, { "epoch": 6.105668515095502, "grad_norm": 0.04978008568286896, "learning_rate": 0.01, "loss": 1.9708, "step": 59457 }, { "epoch": 6.105976586568084, "grad_norm": 0.03648846223950386, "learning_rate": 0.01, "loss": 1.9725, "step": 59460 }, { "epoch": 6.106284658040665, "grad_norm": 0.049807704985141754, "learning_rate": 0.01, "loss": 1.9768, "step": 59463 }, { "epoch": 6.106592729513247, "grad_norm": 0.04505782201886177, "learning_rate": 0.01, "loss": 1.9688, "step": 59466 }, { "epoch": 6.106900800985828, "grad_norm": 0.05675986409187317, "learning_rate": 0.01, "loss": 1.9758, "step": 59469 }, { "epoch": 6.10720887245841, "grad_norm": 0.06363193690776825, "learning_rate": 0.01, "loss": 1.9841, "step": 59472 }, { "epoch": 6.107516943930992, "grad_norm": 0.038001857697963715, "learning_rate": 0.01, "loss": 1.9912, "step": 59475 }, { "epoch": 6.107825015403574, "grad_norm": 0.034627631306648254, "learning_rate": 0.01, "loss": 1.976, "step": 59478 }, { "epoch": 6.108133086876156, "grad_norm": 0.0814884752035141, "learning_rate": 0.01, "loss": 1.9742, "step": 59481 }, { "epoch": 6.108441158348737, "grad_norm": 0.0862545445561409, "learning_rate": 0.01, "loss": 1.9793, "step": 59484 }, { "epoch": 6.108749229821319, "grad_norm": 0.05416722968220711, "learning_rate": 0.01, "loss": 1.9794, "step": 59487 }, { "epoch": 6.1090573012939, "grad_norm": 0.04082367196679115, "learning_rate": 0.01, "loss": 1.9933, "step": 59490 }, { "epoch": 6.109365372766482, "grad_norm": 0.0981779396533966, "learning_rate": 0.01, "loss": 1.9745, "step": 59493 }, { "epoch": 6.109673444239063, "grad_norm": 0.03428742289543152, "learning_rate": 0.01, "loss": 1.9762, "step": 59496 }, { "epoch": 6.109981515711645, "grad_norm": 0.07878245413303375, "learning_rate": 0.01, "loss": 1.9798, "step": 59499 }, { "epoch": 6.110289587184226, "grad_norm": 0.1018940880894661, "learning_rate": 0.01, "loss": 1.9923, "step": 59502 }, { "epoch": 6.1105976586568085, "grad_norm": 0.03978511691093445, "learning_rate": 0.01, "loss": 1.9941, "step": 59505 }, { "epoch": 6.11090573012939, "grad_norm": 0.05915730074048042, "learning_rate": 0.01, "loss": 1.9899, "step": 59508 }, { "epoch": 6.111213801601972, "grad_norm": 0.0455721952021122, "learning_rate": 0.01, "loss": 1.9844, "step": 59511 }, { "epoch": 6.111521873074553, "grad_norm": 0.050054021179676056, "learning_rate": 0.01, "loss": 1.9907, "step": 59514 }, { "epoch": 6.111829944547135, "grad_norm": 0.0369170643389225, "learning_rate": 0.01, "loss": 1.9817, "step": 59517 }, { "epoch": 6.112138016019717, "grad_norm": 0.044435255229473114, "learning_rate": 0.01, "loss": 2.0025, "step": 59520 }, { "epoch": 6.112446087492298, "grad_norm": 0.04968203604221344, "learning_rate": 0.01, "loss": 1.9816, "step": 59523 }, { "epoch": 6.11275415896488, "grad_norm": 0.04474148899316788, "learning_rate": 0.01, "loss": 1.9734, "step": 59526 }, { "epoch": 6.113062230437461, "grad_norm": 0.03932742401957512, "learning_rate": 0.01, "loss": 1.9926, "step": 59529 }, { "epoch": 6.113370301910043, "grad_norm": 0.10069919377565384, "learning_rate": 0.01, "loss": 2.0154, "step": 59532 }, { "epoch": 6.1136783733826245, "grad_norm": 0.08742933720350266, "learning_rate": 0.01, "loss": 1.9891, "step": 59535 }, { "epoch": 6.113986444855207, "grad_norm": 0.15486708283424377, "learning_rate": 0.01, "loss": 1.9888, "step": 59538 }, { "epoch": 6.114294516327788, "grad_norm": 0.05859851837158203, "learning_rate": 0.01, "loss": 2.01, "step": 59541 }, { "epoch": 6.11460258780037, "grad_norm": 0.04623505100607872, "learning_rate": 0.01, "loss": 2.0049, "step": 59544 }, { "epoch": 6.114910659272951, "grad_norm": 0.037663958966732025, "learning_rate": 0.01, "loss": 1.9699, "step": 59547 }, { "epoch": 6.115218730745533, "grad_norm": 0.03428487107157707, "learning_rate": 0.01, "loss": 2.0076, "step": 59550 }, { "epoch": 6.115526802218114, "grad_norm": 0.03549535945057869, "learning_rate": 0.01, "loss": 1.984, "step": 59553 }, { "epoch": 6.115834873690696, "grad_norm": 0.0357876755297184, "learning_rate": 0.01, "loss": 1.9636, "step": 59556 }, { "epoch": 6.116142945163278, "grad_norm": 0.05952336639165878, "learning_rate": 0.01, "loss": 1.9522, "step": 59559 }, { "epoch": 6.116451016635859, "grad_norm": 0.06661629676818848, "learning_rate": 0.01, "loss": 1.9885, "step": 59562 }, { "epoch": 6.1167590881084415, "grad_norm": 0.037326935678720474, "learning_rate": 0.01, "loss": 1.959, "step": 59565 }, { "epoch": 6.117067159581023, "grad_norm": 0.08297394216060638, "learning_rate": 0.01, "loss": 1.9881, "step": 59568 }, { "epoch": 6.117375231053605, "grad_norm": 0.08954669535160065, "learning_rate": 0.01, "loss": 1.9726, "step": 59571 }, { "epoch": 6.117683302526186, "grad_norm": 0.06428781896829605, "learning_rate": 0.01, "loss": 1.9688, "step": 59574 }, { "epoch": 6.117991373998768, "grad_norm": 0.0963866114616394, "learning_rate": 0.01, "loss": 2.0015, "step": 59577 }, { "epoch": 6.118299445471349, "grad_norm": 0.07301589101552963, "learning_rate": 0.01, "loss": 2.0128, "step": 59580 }, { "epoch": 6.118607516943931, "grad_norm": 0.05073026567697525, "learning_rate": 0.01, "loss": 1.9731, "step": 59583 }, { "epoch": 6.118915588416512, "grad_norm": 0.04244513809680939, "learning_rate": 0.01, "loss": 1.9694, "step": 59586 }, { "epoch": 6.119223659889094, "grad_norm": 0.0322258435189724, "learning_rate": 0.01, "loss": 1.9584, "step": 59589 }, { "epoch": 6.1195317313616755, "grad_norm": 0.040786877274513245, "learning_rate": 0.01, "loss": 1.9797, "step": 59592 }, { "epoch": 6.1198398028342575, "grad_norm": 0.1062447801232338, "learning_rate": 0.01, "loss": 1.9888, "step": 59595 }, { "epoch": 6.12014787430684, "grad_norm": 0.07283103466033936, "learning_rate": 0.01, "loss": 1.9989, "step": 59598 }, { "epoch": 6.120455945779421, "grad_norm": 0.04273154214024544, "learning_rate": 0.01, "loss": 1.9896, "step": 59601 }, { "epoch": 6.120764017252003, "grad_norm": 0.0657401755452156, "learning_rate": 0.01, "loss": 1.9798, "step": 59604 }, { "epoch": 6.121072088724584, "grad_norm": 0.03795840963721275, "learning_rate": 0.01, "loss": 1.9599, "step": 59607 }, { "epoch": 6.121380160197166, "grad_norm": 0.03355659544467926, "learning_rate": 0.01, "loss": 2.0068, "step": 59610 }, { "epoch": 6.121688231669747, "grad_norm": 0.13272467255592346, "learning_rate": 0.01, "loss": 1.9945, "step": 59613 }, { "epoch": 6.121996303142329, "grad_norm": 0.07595691084861755, "learning_rate": 0.01, "loss": 1.979, "step": 59616 }, { "epoch": 6.12230437461491, "grad_norm": 0.0829743966460228, "learning_rate": 0.01, "loss": 1.988, "step": 59619 }, { "epoch": 6.122612446087492, "grad_norm": 0.05950823426246643, "learning_rate": 0.01, "loss": 1.986, "step": 59622 }, { "epoch": 6.122920517560074, "grad_norm": 0.06740408390760422, "learning_rate": 0.01, "loss": 1.9864, "step": 59625 }, { "epoch": 6.123228589032656, "grad_norm": 0.03415486961603165, "learning_rate": 0.01, "loss": 1.9805, "step": 59628 }, { "epoch": 6.123536660505237, "grad_norm": 0.06124688684940338, "learning_rate": 0.01, "loss": 1.9978, "step": 59631 }, { "epoch": 6.123844731977819, "grad_norm": 0.06151755899190903, "learning_rate": 0.01, "loss": 1.9895, "step": 59634 }, { "epoch": 6.124152803450401, "grad_norm": 0.12702985107898712, "learning_rate": 0.01, "loss": 2.0098, "step": 59637 }, { "epoch": 6.124460874922982, "grad_norm": 0.08845409005880356, "learning_rate": 0.01, "loss": 1.9834, "step": 59640 }, { "epoch": 6.124768946395564, "grad_norm": 0.061156027019023895, "learning_rate": 0.01, "loss": 1.9725, "step": 59643 }, { "epoch": 6.125077017868145, "grad_norm": 0.050899405032396317, "learning_rate": 0.01, "loss": 1.9905, "step": 59646 }, { "epoch": 6.125385089340727, "grad_norm": 0.066854327917099, "learning_rate": 0.01, "loss": 1.9638, "step": 59649 }, { "epoch": 6.1256931608133085, "grad_norm": 0.06697458028793335, "learning_rate": 0.01, "loss": 1.9864, "step": 59652 }, { "epoch": 6.1260012322858906, "grad_norm": 0.05130474641919136, "learning_rate": 0.01, "loss": 1.996, "step": 59655 }, { "epoch": 6.126309303758472, "grad_norm": 0.04654531553387642, "learning_rate": 0.01, "loss": 1.9818, "step": 59658 }, { "epoch": 6.126617375231054, "grad_norm": 0.036213308572769165, "learning_rate": 0.01, "loss": 1.9758, "step": 59661 }, { "epoch": 6.126925446703635, "grad_norm": 0.052255984395742416, "learning_rate": 0.01, "loss": 1.9706, "step": 59664 }, { "epoch": 6.127233518176217, "grad_norm": 0.07511945068836212, "learning_rate": 0.01, "loss": 1.9924, "step": 59667 }, { "epoch": 6.127541589648798, "grad_norm": 0.09632497280836105, "learning_rate": 0.01, "loss": 1.978, "step": 59670 }, { "epoch": 6.12784966112138, "grad_norm": 0.059645794332027435, "learning_rate": 0.01, "loss": 1.9908, "step": 59673 }, { "epoch": 6.128157732593962, "grad_norm": 0.08466697484254837, "learning_rate": 0.01, "loss": 1.9728, "step": 59676 }, { "epoch": 6.128465804066543, "grad_norm": 0.06881053000688553, "learning_rate": 0.01, "loss": 1.9875, "step": 59679 }, { "epoch": 6.1287738755391254, "grad_norm": 0.04392458125948906, "learning_rate": 0.01, "loss": 1.9733, "step": 59682 }, { "epoch": 6.129081947011707, "grad_norm": 0.046918466687202454, "learning_rate": 0.01, "loss": 2.006, "step": 59685 }, { "epoch": 6.129390018484289, "grad_norm": 0.13943582773208618, "learning_rate": 0.01, "loss": 1.944, "step": 59688 }, { "epoch": 6.12969808995687, "grad_norm": 0.032962556928396225, "learning_rate": 0.01, "loss": 2.0044, "step": 59691 }, { "epoch": 6.130006161429452, "grad_norm": 0.04881718382239342, "learning_rate": 0.01, "loss": 1.9738, "step": 59694 }, { "epoch": 6.130314232902033, "grad_norm": 0.049873992800712585, "learning_rate": 0.01, "loss": 1.9694, "step": 59697 }, { "epoch": 6.130622304374615, "grad_norm": 0.11788579076528549, "learning_rate": 0.01, "loss": 1.9892, "step": 59700 }, { "epoch": 6.130930375847196, "grad_norm": 0.06456585973501205, "learning_rate": 0.01, "loss": 1.9675, "step": 59703 }, { "epoch": 6.131238447319778, "grad_norm": 0.04507961496710777, "learning_rate": 0.01, "loss": 1.9788, "step": 59706 }, { "epoch": 6.1315465187923595, "grad_norm": 0.04093556106090546, "learning_rate": 0.01, "loss": 1.992, "step": 59709 }, { "epoch": 6.1318545902649415, "grad_norm": 0.034374650567770004, "learning_rate": 0.01, "loss": 1.9818, "step": 59712 }, { "epoch": 6.132162661737523, "grad_norm": 0.09779324382543564, "learning_rate": 0.01, "loss": 1.9681, "step": 59715 }, { "epoch": 6.132470733210105, "grad_norm": 0.03912382200360298, "learning_rate": 0.01, "loss": 1.9946, "step": 59718 }, { "epoch": 6.132778804682687, "grad_norm": 0.046537820249795914, "learning_rate": 0.01, "loss": 1.9881, "step": 59721 }, { "epoch": 6.133086876155268, "grad_norm": 0.04848983883857727, "learning_rate": 0.01, "loss": 1.9756, "step": 59724 }, { "epoch": 6.13339494762785, "grad_norm": 0.03337480127811432, "learning_rate": 0.01, "loss": 1.9633, "step": 59727 }, { "epoch": 6.133703019100431, "grad_norm": 0.06988492608070374, "learning_rate": 0.01, "loss": 1.9742, "step": 59730 }, { "epoch": 6.134011090573013, "grad_norm": 0.1184145137667656, "learning_rate": 0.01, "loss": 1.9723, "step": 59733 }, { "epoch": 6.134319162045594, "grad_norm": 0.1065261960029602, "learning_rate": 0.01, "loss": 1.9516, "step": 59736 }, { "epoch": 6.134627233518176, "grad_norm": 0.03989608585834503, "learning_rate": 0.01, "loss": 1.9807, "step": 59739 }, { "epoch": 6.134935304990758, "grad_norm": 0.05950331315398216, "learning_rate": 0.01, "loss": 1.9715, "step": 59742 }, { "epoch": 6.13524337646334, "grad_norm": 0.044078536331653595, "learning_rate": 0.01, "loss": 1.9683, "step": 59745 }, { "epoch": 6.135551447935921, "grad_norm": 0.06196853518486023, "learning_rate": 0.01, "loss": 1.9875, "step": 59748 }, { "epoch": 6.135859519408503, "grad_norm": 0.05450844019651413, "learning_rate": 0.01, "loss": 1.9894, "step": 59751 }, { "epoch": 6.136167590881084, "grad_norm": 0.05260879918932915, "learning_rate": 0.01, "loss": 1.9602, "step": 59754 }, { "epoch": 6.136475662353666, "grad_norm": 0.04335511103272438, "learning_rate": 0.01, "loss": 1.9801, "step": 59757 }, { "epoch": 6.136783733826248, "grad_norm": 0.05219843611121178, "learning_rate": 0.01, "loss": 2.0052, "step": 59760 }, { "epoch": 6.137091805298829, "grad_norm": 0.06641986966133118, "learning_rate": 0.01, "loss": 2.021, "step": 59763 }, { "epoch": 6.137399876771411, "grad_norm": 0.10496216267347336, "learning_rate": 0.01, "loss": 2.001, "step": 59766 }, { "epoch": 6.1377079482439925, "grad_norm": 0.1305655688047409, "learning_rate": 0.01, "loss": 1.972, "step": 59769 }, { "epoch": 6.1380160197165745, "grad_norm": 0.06683500856161118, "learning_rate": 0.01, "loss": 1.9883, "step": 59772 }, { "epoch": 6.138324091189156, "grad_norm": 0.06884671747684479, "learning_rate": 0.01, "loss": 1.975, "step": 59775 }, { "epoch": 6.138632162661738, "grad_norm": 0.04899144545197487, "learning_rate": 0.01, "loss": 2.0069, "step": 59778 }, { "epoch": 6.138940234134319, "grad_norm": 0.061058346182107925, "learning_rate": 0.01, "loss": 1.9815, "step": 59781 }, { "epoch": 6.139248305606901, "grad_norm": 0.03729567304253578, "learning_rate": 0.01, "loss": 1.9593, "step": 59784 }, { "epoch": 6.139556377079482, "grad_norm": 0.1232062354683876, "learning_rate": 0.01, "loss": 2.0011, "step": 59787 }, { "epoch": 6.139864448552064, "grad_norm": 0.07398378103971481, "learning_rate": 0.01, "loss": 1.9824, "step": 59790 }, { "epoch": 6.140172520024645, "grad_norm": 0.06523609161376953, "learning_rate": 0.01, "loss": 1.9623, "step": 59793 }, { "epoch": 6.140480591497227, "grad_norm": 0.04748447984457016, "learning_rate": 0.01, "loss": 1.9731, "step": 59796 }, { "epoch": 6.140788662969809, "grad_norm": 0.10762014240026474, "learning_rate": 0.01, "loss": 2.0012, "step": 59799 }, { "epoch": 6.141096734442391, "grad_norm": 0.05152611434459686, "learning_rate": 0.01, "loss": 1.9775, "step": 59802 }, { "epoch": 6.141404805914973, "grad_norm": 0.03151378408074379, "learning_rate": 0.01, "loss": 1.9896, "step": 59805 }, { "epoch": 6.141712877387554, "grad_norm": 0.045448195189237595, "learning_rate": 0.01, "loss": 1.9635, "step": 59808 }, { "epoch": 6.142020948860136, "grad_norm": 0.03704848885536194, "learning_rate": 0.01, "loss": 1.9596, "step": 59811 }, { "epoch": 6.142329020332717, "grad_norm": 0.0592067614197731, "learning_rate": 0.01, "loss": 1.9417, "step": 59814 }, { "epoch": 6.142637091805299, "grad_norm": 0.11523483693599701, "learning_rate": 0.01, "loss": 1.9801, "step": 59817 }, { "epoch": 6.14294516327788, "grad_norm": 0.14963838458061218, "learning_rate": 0.01, "loss": 1.9822, "step": 59820 }, { "epoch": 6.143253234750462, "grad_norm": 0.08962684869766235, "learning_rate": 0.01, "loss": 1.9894, "step": 59823 }, { "epoch": 6.143561306223043, "grad_norm": 0.06388752162456512, "learning_rate": 0.01, "loss": 1.9935, "step": 59826 }, { "epoch": 6.1438693776956255, "grad_norm": 0.06632399559020996, "learning_rate": 0.01, "loss": 1.9724, "step": 59829 }, { "epoch": 6.144177449168207, "grad_norm": 0.08517513424158096, "learning_rate": 0.01, "loss": 1.988, "step": 59832 }, { "epoch": 6.144485520640789, "grad_norm": 0.05397310480475426, "learning_rate": 0.01, "loss": 1.9894, "step": 59835 }, { "epoch": 6.144793592113371, "grad_norm": 0.04470159485936165, "learning_rate": 0.01, "loss": 1.962, "step": 59838 }, { "epoch": 6.145101663585952, "grad_norm": 0.048731885850429535, "learning_rate": 0.01, "loss": 2.0055, "step": 59841 }, { "epoch": 6.145409735058534, "grad_norm": 0.13363894820213318, "learning_rate": 0.01, "loss": 1.9862, "step": 59844 }, { "epoch": 6.145717806531115, "grad_norm": 0.04807397723197937, "learning_rate": 0.01, "loss": 1.9891, "step": 59847 }, { "epoch": 6.146025878003697, "grad_norm": 0.05982908979058266, "learning_rate": 0.01, "loss": 1.993, "step": 59850 }, { "epoch": 6.146333949476278, "grad_norm": 0.0730501338839531, "learning_rate": 0.01, "loss": 1.9865, "step": 59853 }, { "epoch": 6.14664202094886, "grad_norm": 0.04355951398611069, "learning_rate": 0.01, "loss": 1.9638, "step": 59856 }, { "epoch": 6.1469500924214415, "grad_norm": 0.10257246345281601, "learning_rate": 0.01, "loss": 1.9685, "step": 59859 }, { "epoch": 6.147258163894024, "grad_norm": 0.0933048278093338, "learning_rate": 0.01, "loss": 1.9677, "step": 59862 }, { "epoch": 6.147566235366605, "grad_norm": 0.03633146733045578, "learning_rate": 0.01, "loss": 1.9681, "step": 59865 }, { "epoch": 6.147874306839187, "grad_norm": 0.07809139788150787, "learning_rate": 0.01, "loss": 1.9746, "step": 59868 }, { "epoch": 6.148182378311768, "grad_norm": 0.06432943791151047, "learning_rate": 0.01, "loss": 1.9611, "step": 59871 }, { "epoch": 6.14849044978435, "grad_norm": 0.04523482546210289, "learning_rate": 0.01, "loss": 1.9928, "step": 59874 }, { "epoch": 6.148798521256932, "grad_norm": 0.04328586533665657, "learning_rate": 0.01, "loss": 1.978, "step": 59877 }, { "epoch": 6.149106592729513, "grad_norm": 0.04336148872971535, "learning_rate": 0.01, "loss": 1.9849, "step": 59880 }, { "epoch": 6.149414664202095, "grad_norm": 0.11612840741872787, "learning_rate": 0.01, "loss": 1.9663, "step": 59883 }, { "epoch": 6.149722735674676, "grad_norm": 0.040377210825681686, "learning_rate": 0.01, "loss": 1.9995, "step": 59886 }, { "epoch": 6.1500308071472585, "grad_norm": 0.05374842882156372, "learning_rate": 0.01, "loss": 1.9995, "step": 59889 }, { "epoch": 6.15033887861984, "grad_norm": 0.042278483510017395, "learning_rate": 0.01, "loss": 1.9693, "step": 59892 }, { "epoch": 6.150646950092422, "grad_norm": 0.06057736650109291, "learning_rate": 0.01, "loss": 1.9718, "step": 59895 }, { "epoch": 6.150955021565003, "grad_norm": 0.07426716387271881, "learning_rate": 0.01, "loss": 1.9868, "step": 59898 }, { "epoch": 6.151263093037585, "grad_norm": 0.06555135548114777, "learning_rate": 0.01, "loss": 2.0028, "step": 59901 }, { "epoch": 6.151571164510166, "grad_norm": 0.031795963644981384, "learning_rate": 0.01, "loss": 1.9818, "step": 59904 }, { "epoch": 6.151879235982748, "grad_norm": 0.0928960070014, "learning_rate": 0.01, "loss": 1.9872, "step": 59907 }, { "epoch": 6.152187307455329, "grad_norm": 0.05229020118713379, "learning_rate": 0.01, "loss": 1.9801, "step": 59910 }, { "epoch": 6.152495378927911, "grad_norm": 0.05363921821117401, "learning_rate": 0.01, "loss": 1.9655, "step": 59913 }, { "epoch": 6.1528034504004925, "grad_norm": 0.05340690538287163, "learning_rate": 0.01, "loss": 1.9615, "step": 59916 }, { "epoch": 6.1531115218730745, "grad_norm": 0.04054640233516693, "learning_rate": 0.01, "loss": 1.9815, "step": 59919 }, { "epoch": 6.153419593345657, "grad_norm": 0.05909667909145355, "learning_rate": 0.01, "loss": 1.9926, "step": 59922 }, { "epoch": 6.153727664818238, "grad_norm": 0.04531288146972656, "learning_rate": 0.01, "loss": 1.9673, "step": 59925 }, { "epoch": 6.15403573629082, "grad_norm": 0.041250940412282944, "learning_rate": 0.01, "loss": 1.9777, "step": 59928 }, { "epoch": 6.154343807763401, "grad_norm": 0.042447373270988464, "learning_rate": 0.01, "loss": 1.9769, "step": 59931 }, { "epoch": 6.154651879235983, "grad_norm": 0.06417303532361984, "learning_rate": 0.01, "loss": 1.973, "step": 59934 }, { "epoch": 6.154959950708564, "grad_norm": 0.12572647631168365, "learning_rate": 0.01, "loss": 1.971, "step": 59937 }, { "epoch": 6.155268022181146, "grad_norm": 0.06711404025554657, "learning_rate": 0.01, "loss": 1.9693, "step": 59940 }, { "epoch": 6.155576093653727, "grad_norm": 0.09349125623703003, "learning_rate": 0.01, "loss": 1.9971, "step": 59943 }, { "epoch": 6.155884165126309, "grad_norm": 0.046295300126075745, "learning_rate": 0.01, "loss": 1.984, "step": 59946 }, { "epoch": 6.156192236598891, "grad_norm": 0.08443499356508255, "learning_rate": 0.01, "loss": 1.9803, "step": 59949 }, { "epoch": 6.156500308071473, "grad_norm": 0.048439864069223404, "learning_rate": 0.01, "loss": 2.0002, "step": 59952 }, { "epoch": 6.156808379544054, "grad_norm": 0.10180726647377014, "learning_rate": 0.01, "loss": 1.9799, "step": 59955 }, { "epoch": 6.157116451016636, "grad_norm": 0.03144579380750656, "learning_rate": 0.01, "loss": 2.0125, "step": 59958 }, { "epoch": 6.157424522489218, "grad_norm": 0.0712040439248085, "learning_rate": 0.01, "loss": 1.982, "step": 59961 }, { "epoch": 6.157732593961799, "grad_norm": 0.054967526346445084, "learning_rate": 0.01, "loss": 2.0012, "step": 59964 }, { "epoch": 6.158040665434381, "grad_norm": 0.04168044403195381, "learning_rate": 0.01, "loss": 1.9635, "step": 59967 }, { "epoch": 6.158348736906962, "grad_norm": 0.05962495878338814, "learning_rate": 0.01, "loss": 1.9924, "step": 59970 }, { "epoch": 6.158656808379544, "grad_norm": 0.11579214036464691, "learning_rate": 0.01, "loss": 2.0038, "step": 59973 }, { "epoch": 6.1589648798521255, "grad_norm": 0.03569880872964859, "learning_rate": 0.01, "loss": 1.9566, "step": 59976 }, { "epoch": 6.1592729513247075, "grad_norm": 0.09522563964128494, "learning_rate": 0.01, "loss": 2.0097, "step": 59979 }, { "epoch": 6.159581022797289, "grad_norm": 0.07732608169317245, "learning_rate": 0.01, "loss": 1.9811, "step": 59982 }, { "epoch": 6.159889094269871, "grad_norm": 0.057550933212041855, "learning_rate": 0.01, "loss": 1.9875, "step": 59985 }, { "epoch": 6.160197165742452, "grad_norm": 0.0942176878452301, "learning_rate": 0.01, "loss": 1.9938, "step": 59988 }, { "epoch": 6.160505237215034, "grad_norm": 0.0959196537733078, "learning_rate": 0.01, "loss": 1.9759, "step": 59991 }, { "epoch": 6.160813308687615, "grad_norm": 0.07278607040643692, "learning_rate": 0.01, "loss": 2.0074, "step": 59994 }, { "epoch": 6.161121380160197, "grad_norm": 0.05867347866296768, "learning_rate": 0.01, "loss": 1.9827, "step": 59997 }, { "epoch": 6.161429451632779, "grad_norm": 0.03255612030625343, "learning_rate": 0.01, "loss": 1.9843, "step": 60000 }, { "epoch": 6.16173752310536, "grad_norm": 0.056017834693193436, "learning_rate": 0.01, "loss": 1.9827, "step": 60003 }, { "epoch": 6.162045594577942, "grad_norm": 0.0943060889840126, "learning_rate": 0.01, "loss": 1.9873, "step": 60006 }, { "epoch": 6.162353666050524, "grad_norm": 0.08472935855388641, "learning_rate": 0.01, "loss": 2.004, "step": 60009 }, { "epoch": 6.162661737523106, "grad_norm": 0.053891006857156754, "learning_rate": 0.01, "loss": 1.9926, "step": 60012 }, { "epoch": 6.162969808995687, "grad_norm": 0.042883194983005524, "learning_rate": 0.01, "loss": 1.9827, "step": 60015 }, { "epoch": 6.163277880468269, "grad_norm": 0.05750950425863266, "learning_rate": 0.01, "loss": 1.9844, "step": 60018 }, { "epoch": 6.16358595194085, "grad_norm": 0.07917001843452454, "learning_rate": 0.01, "loss": 1.9833, "step": 60021 }, { "epoch": 6.163894023413432, "grad_norm": 0.06416572630405426, "learning_rate": 0.01, "loss": 1.9474, "step": 60024 }, { "epoch": 6.164202094886013, "grad_norm": 0.06587263941764832, "learning_rate": 0.01, "loss": 1.9872, "step": 60027 }, { "epoch": 6.164510166358595, "grad_norm": 0.09511373192071915, "learning_rate": 0.01, "loss": 2.0006, "step": 60030 }, { "epoch": 6.164818237831176, "grad_norm": 0.11690583825111389, "learning_rate": 0.01, "loss": 1.9966, "step": 60033 }, { "epoch": 6.1651263093037585, "grad_norm": 0.05916045978665352, "learning_rate": 0.01, "loss": 1.9648, "step": 60036 }, { "epoch": 6.1654343807763405, "grad_norm": 0.07679677754640579, "learning_rate": 0.01, "loss": 1.9894, "step": 60039 }, { "epoch": 6.165742452248922, "grad_norm": 0.05868346244096756, "learning_rate": 0.01, "loss": 2.0044, "step": 60042 }, { "epoch": 6.166050523721504, "grad_norm": 0.04336436092853546, "learning_rate": 0.01, "loss": 1.9956, "step": 60045 }, { "epoch": 6.166358595194085, "grad_norm": 0.053842004388570786, "learning_rate": 0.01, "loss": 1.9794, "step": 60048 }, { "epoch": 6.166666666666667, "grad_norm": 0.05783376097679138, "learning_rate": 0.01, "loss": 1.972, "step": 60051 }, { "epoch": 6.166974738139248, "grad_norm": 0.12786927819252014, "learning_rate": 0.01, "loss": 1.9734, "step": 60054 }, { "epoch": 6.16728280961183, "grad_norm": 0.10869529098272324, "learning_rate": 0.01, "loss": 2.0226, "step": 60057 }, { "epoch": 6.167590881084411, "grad_norm": 0.09978947043418884, "learning_rate": 0.01, "loss": 1.9633, "step": 60060 }, { "epoch": 6.167898952556993, "grad_norm": 0.06388422101736069, "learning_rate": 0.01, "loss": 1.9816, "step": 60063 }, { "epoch": 6.1682070240295745, "grad_norm": 0.05348409339785576, "learning_rate": 0.01, "loss": 1.9873, "step": 60066 }, { "epoch": 6.168515095502157, "grad_norm": 0.047684285789728165, "learning_rate": 0.01, "loss": 1.9827, "step": 60069 }, { "epoch": 6.168823166974738, "grad_norm": 0.031933482736349106, "learning_rate": 0.01, "loss": 1.9905, "step": 60072 }, { "epoch": 6.16913123844732, "grad_norm": 0.06409978866577148, "learning_rate": 0.01, "loss": 1.9719, "step": 60075 }, { "epoch": 6.169439309919902, "grad_norm": 0.07085554301738739, "learning_rate": 0.01, "loss": 1.9824, "step": 60078 }, { "epoch": 6.169747381392483, "grad_norm": 0.04958326742053032, "learning_rate": 0.01, "loss": 1.9736, "step": 60081 }, { "epoch": 6.170055452865065, "grad_norm": 0.037209220230579376, "learning_rate": 0.01, "loss": 1.9834, "step": 60084 }, { "epoch": 6.170363524337646, "grad_norm": 0.044104501605033875, "learning_rate": 0.01, "loss": 2.0076, "step": 60087 }, { "epoch": 6.170671595810228, "grad_norm": 0.06651319563388824, "learning_rate": 0.01, "loss": 1.9807, "step": 60090 }, { "epoch": 6.1709796672828094, "grad_norm": 0.15120342373847961, "learning_rate": 0.01, "loss": 1.9816, "step": 60093 }, { "epoch": 6.1712877387553915, "grad_norm": 0.17715202271938324, "learning_rate": 0.01, "loss": 2.015, "step": 60096 }, { "epoch": 6.171595810227973, "grad_norm": 0.07036132365465164, "learning_rate": 0.01, "loss": 2.0109, "step": 60099 }, { "epoch": 6.171903881700555, "grad_norm": 0.05515659227967262, "learning_rate": 0.01, "loss": 1.9692, "step": 60102 }, { "epoch": 6.172211953173136, "grad_norm": 0.054766733199357986, "learning_rate": 0.01, "loss": 1.9973, "step": 60105 }, { "epoch": 6.172520024645718, "grad_norm": 0.06652894616127014, "learning_rate": 0.01, "loss": 1.9999, "step": 60108 }, { "epoch": 6.172828096118299, "grad_norm": 0.05079234391450882, "learning_rate": 0.01, "loss": 1.9829, "step": 60111 }, { "epoch": 6.173136167590881, "grad_norm": 0.04592439532279968, "learning_rate": 0.01, "loss": 1.9934, "step": 60114 }, { "epoch": 6.173444239063462, "grad_norm": 0.03100336715579033, "learning_rate": 0.01, "loss": 1.9765, "step": 60117 }, { "epoch": 6.173752310536044, "grad_norm": 0.03455538675189018, "learning_rate": 0.01, "loss": 1.9661, "step": 60120 }, { "epoch": 6.174060382008626, "grad_norm": 0.04403119906783104, "learning_rate": 0.01, "loss": 1.9687, "step": 60123 }, { "epoch": 6.1743684534812076, "grad_norm": 0.1155620738863945, "learning_rate": 0.01, "loss": 2.0095, "step": 60126 }, { "epoch": 6.17467652495379, "grad_norm": 0.06684102863073349, "learning_rate": 0.01, "loss": 1.9644, "step": 60129 }, { "epoch": 6.174984596426371, "grad_norm": 0.04837580397725105, "learning_rate": 0.01, "loss": 1.9848, "step": 60132 }, { "epoch": 6.175292667898953, "grad_norm": 0.11638902872800827, "learning_rate": 0.01, "loss": 2.0013, "step": 60135 }, { "epoch": 6.175600739371534, "grad_norm": 0.06774953007698059, "learning_rate": 0.01, "loss": 1.9734, "step": 60138 }, { "epoch": 6.175908810844116, "grad_norm": 0.05518386512994766, "learning_rate": 0.01, "loss": 1.9865, "step": 60141 }, { "epoch": 6.176216882316697, "grad_norm": 0.03791549429297447, "learning_rate": 0.01, "loss": 1.9825, "step": 60144 }, { "epoch": 6.176524953789279, "grad_norm": 0.0599411316215992, "learning_rate": 0.01, "loss": 1.9722, "step": 60147 }, { "epoch": 6.17683302526186, "grad_norm": 0.08578135818243027, "learning_rate": 0.01, "loss": 1.9894, "step": 60150 }, { "epoch": 6.1771410967344424, "grad_norm": 0.04651505872607231, "learning_rate": 0.01, "loss": 1.9996, "step": 60153 }, { "epoch": 6.177449168207024, "grad_norm": 0.09119927883148193, "learning_rate": 0.01, "loss": 2.0062, "step": 60156 }, { "epoch": 6.177757239679606, "grad_norm": 0.12429966032505035, "learning_rate": 0.01, "loss": 1.9935, "step": 60159 }, { "epoch": 6.178065311152188, "grad_norm": 0.0350540392100811, "learning_rate": 0.01, "loss": 1.9885, "step": 60162 }, { "epoch": 6.178373382624769, "grad_norm": 0.06904268264770508, "learning_rate": 0.01, "loss": 1.9439, "step": 60165 }, { "epoch": 6.178681454097351, "grad_norm": 0.06134537607431412, "learning_rate": 0.01, "loss": 1.9955, "step": 60168 }, { "epoch": 6.178989525569932, "grad_norm": 0.07607719302177429, "learning_rate": 0.01, "loss": 2.0021, "step": 60171 }, { "epoch": 6.179297597042514, "grad_norm": 0.1023942083120346, "learning_rate": 0.01, "loss": 1.9637, "step": 60174 }, { "epoch": 6.179605668515095, "grad_norm": 0.04248664528131485, "learning_rate": 0.01, "loss": 1.9515, "step": 60177 }, { "epoch": 6.179913739987677, "grad_norm": 0.036357518285512924, "learning_rate": 0.01, "loss": 1.9509, "step": 60180 }, { "epoch": 6.1802218114602585, "grad_norm": 0.04085879400372505, "learning_rate": 0.01, "loss": 2.0125, "step": 60183 }, { "epoch": 6.180529882932841, "grad_norm": 0.05432041734457016, "learning_rate": 0.01, "loss": 1.9783, "step": 60186 }, { "epoch": 6.180837954405422, "grad_norm": 0.0373106487095356, "learning_rate": 0.01, "loss": 1.9559, "step": 60189 }, { "epoch": 6.181146025878004, "grad_norm": 0.13600775599479675, "learning_rate": 0.01, "loss": 2.0022, "step": 60192 }, { "epoch": 6.181454097350585, "grad_norm": 0.10217521339654922, "learning_rate": 0.01, "loss": 1.9617, "step": 60195 }, { "epoch": 6.181762168823167, "grad_norm": 0.11078688502311707, "learning_rate": 0.01, "loss": 1.9419, "step": 60198 }, { "epoch": 6.182070240295749, "grad_norm": 0.11216065287590027, "learning_rate": 0.01, "loss": 1.9614, "step": 60201 }, { "epoch": 6.18237831176833, "grad_norm": 0.07498262822628021, "learning_rate": 0.01, "loss": 2.004, "step": 60204 }, { "epoch": 6.182686383240912, "grad_norm": 0.0481102392077446, "learning_rate": 0.01, "loss": 1.9932, "step": 60207 }, { "epoch": 6.182994454713493, "grad_norm": 0.06563237309455872, "learning_rate": 0.01, "loss": 1.9979, "step": 60210 }, { "epoch": 6.1833025261860755, "grad_norm": 0.065886951982975, "learning_rate": 0.01, "loss": 1.9777, "step": 60213 }, { "epoch": 6.183610597658657, "grad_norm": 0.06081174686551094, "learning_rate": 0.01, "loss": 2.0077, "step": 60216 }, { "epoch": 6.183918669131239, "grad_norm": 0.040813907980918884, "learning_rate": 0.01, "loss": 1.9749, "step": 60219 }, { "epoch": 6.18422674060382, "grad_norm": 0.08460288494825363, "learning_rate": 0.01, "loss": 1.9912, "step": 60222 }, { "epoch": 6.184534812076402, "grad_norm": 0.1325504183769226, "learning_rate": 0.01, "loss": 1.9751, "step": 60225 }, { "epoch": 6.184842883548983, "grad_norm": 0.047543518245220184, "learning_rate": 0.01, "loss": 1.9975, "step": 60228 }, { "epoch": 6.185150955021565, "grad_norm": 0.09578222036361694, "learning_rate": 0.01, "loss": 1.9674, "step": 60231 }, { "epoch": 6.185459026494146, "grad_norm": 0.08167849481105804, "learning_rate": 0.01, "loss": 1.9771, "step": 60234 }, { "epoch": 6.185767097966728, "grad_norm": 0.04946918785572052, "learning_rate": 0.01, "loss": 1.9927, "step": 60237 }, { "epoch": 6.1860751694393095, "grad_norm": 0.08602156490087509, "learning_rate": 0.01, "loss": 1.9754, "step": 60240 }, { "epoch": 6.1863832409118915, "grad_norm": 0.07393812388181686, "learning_rate": 0.01, "loss": 1.974, "step": 60243 }, { "epoch": 6.186691312384474, "grad_norm": 0.04238250479102135, "learning_rate": 0.01, "loss": 1.9755, "step": 60246 }, { "epoch": 6.186999383857055, "grad_norm": 0.05332363396883011, "learning_rate": 0.01, "loss": 1.9832, "step": 60249 }, { "epoch": 6.187307455329637, "grad_norm": 0.048810552805662155, "learning_rate": 0.01, "loss": 1.9795, "step": 60252 }, { "epoch": 6.187615526802218, "grad_norm": 0.04000997915863991, "learning_rate": 0.01, "loss": 1.9742, "step": 60255 }, { "epoch": 6.1879235982748, "grad_norm": 0.07165291905403137, "learning_rate": 0.01, "loss": 2.0003, "step": 60258 }, { "epoch": 6.188231669747381, "grad_norm": 0.04843559488654137, "learning_rate": 0.01, "loss": 1.9686, "step": 60261 }, { "epoch": 6.188539741219963, "grad_norm": 0.048593342304229736, "learning_rate": 0.01, "loss": 1.9987, "step": 60264 }, { "epoch": 6.188847812692544, "grad_norm": 0.05250190943479538, "learning_rate": 0.01, "loss": 1.9647, "step": 60267 }, { "epoch": 6.189155884165126, "grad_norm": 0.09123876690864563, "learning_rate": 0.01, "loss": 1.9791, "step": 60270 }, { "epoch": 6.189463955637708, "grad_norm": 0.11671872437000275, "learning_rate": 0.01, "loss": 1.9752, "step": 60273 }, { "epoch": 6.18977202711029, "grad_norm": 0.048420678824186325, "learning_rate": 0.01, "loss": 1.9576, "step": 60276 }, { "epoch": 6.190080098582872, "grad_norm": 0.11455632746219635, "learning_rate": 0.01, "loss": 1.9589, "step": 60279 }, { "epoch": 6.190388170055453, "grad_norm": 0.08793000131845474, "learning_rate": 0.01, "loss": 1.9864, "step": 60282 }, { "epoch": 6.190696241528035, "grad_norm": 0.0460558645427227, "learning_rate": 0.01, "loss": 1.9712, "step": 60285 }, { "epoch": 6.191004313000616, "grad_norm": 0.0427497997879982, "learning_rate": 0.01, "loss": 1.9849, "step": 60288 }, { "epoch": 6.191312384473198, "grad_norm": 0.04136178269982338, "learning_rate": 0.01, "loss": 2.0002, "step": 60291 }, { "epoch": 6.191620455945779, "grad_norm": 0.06737589836120605, "learning_rate": 0.01, "loss": 1.9883, "step": 60294 }, { "epoch": 6.191928527418361, "grad_norm": 0.05001933127641678, "learning_rate": 0.01, "loss": 1.9759, "step": 60297 }, { "epoch": 6.1922365988909425, "grad_norm": 0.13523389399051666, "learning_rate": 0.01, "loss": 1.9871, "step": 60300 }, { "epoch": 6.1925446703635245, "grad_norm": 0.09118752926588058, "learning_rate": 0.01, "loss": 1.9865, "step": 60303 }, { "epoch": 6.192852741836106, "grad_norm": 0.061937130987644196, "learning_rate": 0.01, "loss": 1.9935, "step": 60306 }, { "epoch": 6.193160813308688, "grad_norm": 0.09417203068733215, "learning_rate": 0.01, "loss": 1.9688, "step": 60309 }, { "epoch": 6.193468884781269, "grad_norm": 0.1443474441766739, "learning_rate": 0.01, "loss": 1.9744, "step": 60312 }, { "epoch": 6.193776956253851, "grad_norm": 0.12889619171619415, "learning_rate": 0.01, "loss": 1.9847, "step": 60315 }, { "epoch": 6.194085027726432, "grad_norm": 0.08010325580835342, "learning_rate": 0.01, "loss": 1.9624, "step": 60318 }, { "epoch": 6.194393099199014, "grad_norm": 0.0863533690571785, "learning_rate": 0.01, "loss": 2.0098, "step": 60321 }, { "epoch": 6.194701170671596, "grad_norm": 0.0709909126162529, "learning_rate": 0.01, "loss": 1.9902, "step": 60324 }, { "epoch": 6.195009242144177, "grad_norm": 0.04236047342419624, "learning_rate": 0.01, "loss": 2.0178, "step": 60327 }, { "epoch": 6.195317313616759, "grad_norm": 0.06927076727151871, "learning_rate": 0.01, "loss": 2.0021, "step": 60330 }, { "epoch": 6.195625385089341, "grad_norm": 0.07250656932592392, "learning_rate": 0.01, "loss": 1.9854, "step": 60333 }, { "epoch": 6.195933456561923, "grad_norm": 0.06588046252727509, "learning_rate": 0.01, "loss": 1.9782, "step": 60336 }, { "epoch": 6.196241528034504, "grad_norm": 0.07093591243028641, "learning_rate": 0.01, "loss": 1.9851, "step": 60339 }, { "epoch": 6.196549599507086, "grad_norm": 0.10864640772342682, "learning_rate": 0.01, "loss": 1.979, "step": 60342 }, { "epoch": 6.196857670979667, "grad_norm": 0.12769301235675812, "learning_rate": 0.01, "loss": 1.9879, "step": 60345 }, { "epoch": 6.197165742452249, "grad_norm": 0.03568786382675171, "learning_rate": 0.01, "loss": 1.983, "step": 60348 }, { "epoch": 6.19747381392483, "grad_norm": 0.08632007986307144, "learning_rate": 0.01, "loss": 1.9598, "step": 60351 }, { "epoch": 6.197781885397412, "grad_norm": 0.047804027795791626, "learning_rate": 0.01, "loss": 1.995, "step": 60354 }, { "epoch": 6.198089956869993, "grad_norm": 0.08226168155670166, "learning_rate": 0.01, "loss": 1.979, "step": 60357 }, { "epoch": 6.1983980283425755, "grad_norm": 0.06796471774578094, "learning_rate": 0.01, "loss": 1.9944, "step": 60360 }, { "epoch": 6.1987060998151575, "grad_norm": 0.05324438586831093, "learning_rate": 0.01, "loss": 1.9974, "step": 60363 }, { "epoch": 6.199014171287739, "grad_norm": 0.05389709025621414, "learning_rate": 0.01, "loss": 1.9995, "step": 60366 }, { "epoch": 6.199322242760321, "grad_norm": 0.04287771135568619, "learning_rate": 0.01, "loss": 1.9836, "step": 60369 }, { "epoch": 6.199630314232902, "grad_norm": 0.06289256364107132, "learning_rate": 0.01, "loss": 1.9918, "step": 60372 }, { "epoch": 6.199938385705484, "grad_norm": 0.04109280928969383, "learning_rate": 0.01, "loss": 1.9742, "step": 60375 }, { "epoch": 6.200246457178065, "grad_norm": 0.13262084126472473, "learning_rate": 0.01, "loss": 1.9935, "step": 60378 }, { "epoch": 6.200554528650647, "grad_norm": 0.05487149953842163, "learning_rate": 0.01, "loss": 1.9792, "step": 60381 }, { "epoch": 6.200862600123228, "grad_norm": 0.08410999178886414, "learning_rate": 0.01, "loss": 1.9633, "step": 60384 }, { "epoch": 6.20117067159581, "grad_norm": 0.09868727624416351, "learning_rate": 0.01, "loss": 1.9959, "step": 60387 }, { "epoch": 6.2014787430683915, "grad_norm": 0.09402374923229218, "learning_rate": 0.01, "loss": 1.9723, "step": 60390 }, { "epoch": 6.201786814540974, "grad_norm": 0.09398900717496872, "learning_rate": 0.01, "loss": 1.968, "step": 60393 }, { "epoch": 6.202094886013555, "grad_norm": 0.07040904462337494, "learning_rate": 0.01, "loss": 1.9725, "step": 60396 }, { "epoch": 6.202402957486137, "grad_norm": 0.09048958867788315, "learning_rate": 0.01, "loss": 1.9861, "step": 60399 }, { "epoch": 6.202711028958719, "grad_norm": 0.08249911665916443, "learning_rate": 0.01, "loss": 1.9723, "step": 60402 }, { "epoch": 6.2030191004313, "grad_norm": 0.1375819742679596, "learning_rate": 0.01, "loss": 1.9723, "step": 60405 }, { "epoch": 6.203327171903882, "grad_norm": 0.12005623430013657, "learning_rate": 0.01, "loss": 1.9528, "step": 60408 }, { "epoch": 6.203635243376463, "grad_norm": 0.08359742164611816, "learning_rate": 0.01, "loss": 1.9895, "step": 60411 }, { "epoch": 6.203943314849045, "grad_norm": 0.08574346452951431, "learning_rate": 0.01, "loss": 1.9863, "step": 60414 }, { "epoch": 6.204251386321626, "grad_norm": 0.04601282626390457, "learning_rate": 0.01, "loss": 1.9718, "step": 60417 }, { "epoch": 6.2045594577942085, "grad_norm": 0.04463723674416542, "learning_rate": 0.01, "loss": 1.965, "step": 60420 }, { "epoch": 6.20486752926679, "grad_norm": 0.05424230918288231, "learning_rate": 0.01, "loss": 2.0251, "step": 60423 }, { "epoch": 6.205175600739372, "grad_norm": 0.0748244971036911, "learning_rate": 0.01, "loss": 1.9804, "step": 60426 }, { "epoch": 6.205483672211953, "grad_norm": 0.05594291165471077, "learning_rate": 0.01, "loss": 2.0024, "step": 60429 }, { "epoch": 6.205791743684535, "grad_norm": 0.03947063907980919, "learning_rate": 0.01, "loss": 1.9796, "step": 60432 }, { "epoch": 6.206099815157116, "grad_norm": 0.037693090736866, "learning_rate": 0.01, "loss": 1.9803, "step": 60435 }, { "epoch": 6.206407886629698, "grad_norm": 0.09147128462791443, "learning_rate": 0.01, "loss": 1.955, "step": 60438 }, { "epoch": 6.206715958102279, "grad_norm": 0.03525812551379204, "learning_rate": 0.01, "loss": 1.9847, "step": 60441 }, { "epoch": 6.207024029574861, "grad_norm": 0.0658460482954979, "learning_rate": 0.01, "loss": 1.9751, "step": 60444 }, { "epoch": 6.207332101047443, "grad_norm": 0.10629269480705261, "learning_rate": 0.01, "loss": 1.9983, "step": 60447 }, { "epoch": 6.2076401725200245, "grad_norm": 0.07760986685752869, "learning_rate": 0.01, "loss": 1.9858, "step": 60450 }, { "epoch": 6.207948243992607, "grad_norm": 0.0387595109641552, "learning_rate": 0.01, "loss": 1.9586, "step": 60453 }, { "epoch": 6.208256315465188, "grad_norm": 0.04037671163678169, "learning_rate": 0.01, "loss": 2.0052, "step": 60456 }, { "epoch": 6.20856438693777, "grad_norm": 0.049021679908037186, "learning_rate": 0.01, "loss": 1.9937, "step": 60459 }, { "epoch": 6.208872458410351, "grad_norm": 0.04115486890077591, "learning_rate": 0.01, "loss": 2.0025, "step": 60462 }, { "epoch": 6.209180529882933, "grad_norm": 0.06775692850351334, "learning_rate": 0.01, "loss": 1.9843, "step": 60465 }, { "epoch": 6.209488601355514, "grad_norm": 0.04244436323642731, "learning_rate": 0.01, "loss": 1.9982, "step": 60468 }, { "epoch": 6.209796672828096, "grad_norm": 0.046518050134181976, "learning_rate": 0.01, "loss": 1.9756, "step": 60471 }, { "epoch": 6.210104744300677, "grad_norm": 0.058811433613300323, "learning_rate": 0.01, "loss": 1.9948, "step": 60474 }, { "epoch": 6.210412815773259, "grad_norm": 0.08617477864027023, "learning_rate": 0.01, "loss": 1.9902, "step": 60477 }, { "epoch": 6.2107208872458415, "grad_norm": 0.1398414820432663, "learning_rate": 0.01, "loss": 1.9772, "step": 60480 }, { "epoch": 6.211028958718423, "grad_norm": 0.04411006346344948, "learning_rate": 0.01, "loss": 2.0152, "step": 60483 }, { "epoch": 6.211337030191005, "grad_norm": 0.05274950712919235, "learning_rate": 0.01, "loss": 1.9776, "step": 60486 }, { "epoch": 6.211645101663586, "grad_norm": 0.03863611817359924, "learning_rate": 0.01, "loss": 1.9826, "step": 60489 }, { "epoch": 6.211953173136168, "grad_norm": 0.04664864018559456, "learning_rate": 0.01, "loss": 1.9876, "step": 60492 }, { "epoch": 6.212261244608749, "grad_norm": 0.03843964263796806, "learning_rate": 0.01, "loss": 1.9772, "step": 60495 }, { "epoch": 6.212569316081331, "grad_norm": 0.02867240644991398, "learning_rate": 0.01, "loss": 1.9705, "step": 60498 }, { "epoch": 6.212877387553912, "grad_norm": 0.03859257698059082, "learning_rate": 0.01, "loss": 1.9635, "step": 60501 }, { "epoch": 6.213185459026494, "grad_norm": 0.04639716073870659, "learning_rate": 0.01, "loss": 2.0111, "step": 60504 }, { "epoch": 6.2134935304990755, "grad_norm": 0.05346795171499252, "learning_rate": 0.01, "loss": 1.9781, "step": 60507 }, { "epoch": 6.2138016019716575, "grad_norm": 0.09336802363395691, "learning_rate": 0.01, "loss": 2.0141, "step": 60510 }, { "epoch": 6.214109673444239, "grad_norm": 0.10028377920389175, "learning_rate": 0.01, "loss": 1.9978, "step": 60513 }, { "epoch": 6.214417744916821, "grad_norm": 0.04280233010649681, "learning_rate": 0.01, "loss": 2.0057, "step": 60516 }, { "epoch": 6.214725816389402, "grad_norm": 0.07395056635141373, "learning_rate": 0.01, "loss": 2.0094, "step": 60519 }, { "epoch": 6.215033887861984, "grad_norm": 0.08924415707588196, "learning_rate": 0.01, "loss": 1.9956, "step": 60522 }, { "epoch": 6.215341959334566, "grad_norm": 0.06368307024240494, "learning_rate": 0.01, "loss": 1.9676, "step": 60525 }, { "epoch": 6.215650030807147, "grad_norm": 0.07443444430828094, "learning_rate": 0.01, "loss": 2.0003, "step": 60528 }, { "epoch": 6.215958102279729, "grad_norm": 0.06193174049258232, "learning_rate": 0.01, "loss": 1.9948, "step": 60531 }, { "epoch": 6.21626617375231, "grad_norm": 0.05765648931264877, "learning_rate": 0.01, "loss": 1.9736, "step": 60534 }, { "epoch": 6.216574245224892, "grad_norm": 0.07838603854179382, "learning_rate": 0.01, "loss": 1.9695, "step": 60537 }, { "epoch": 6.216882316697474, "grad_norm": 0.0865786075592041, "learning_rate": 0.01, "loss": 1.9764, "step": 60540 }, { "epoch": 6.217190388170056, "grad_norm": 0.06937874108552933, "learning_rate": 0.01, "loss": 1.9574, "step": 60543 }, { "epoch": 6.217498459642637, "grad_norm": 0.04611770063638687, "learning_rate": 0.01, "loss": 1.9854, "step": 60546 }, { "epoch": 6.217806531115219, "grad_norm": 0.10261213779449463, "learning_rate": 0.01, "loss": 1.9841, "step": 60549 }, { "epoch": 6.2181146025878, "grad_norm": 0.06451880186796188, "learning_rate": 0.01, "loss": 1.9798, "step": 60552 }, { "epoch": 6.218422674060382, "grad_norm": 0.06459330767393112, "learning_rate": 0.01, "loss": 1.9723, "step": 60555 }, { "epoch": 6.218730745532963, "grad_norm": 0.06935889273881912, "learning_rate": 0.01, "loss": 1.9817, "step": 60558 }, { "epoch": 6.219038817005545, "grad_norm": 0.07407599687576294, "learning_rate": 0.01, "loss": 2.0052, "step": 60561 }, { "epoch": 6.219346888478127, "grad_norm": 0.09501077234745026, "learning_rate": 0.01, "loss": 1.9886, "step": 60564 }, { "epoch": 6.2196549599507085, "grad_norm": 0.0854761153459549, "learning_rate": 0.01, "loss": 1.9674, "step": 60567 }, { "epoch": 6.2199630314232905, "grad_norm": 0.052260614931583405, "learning_rate": 0.01, "loss": 1.9851, "step": 60570 }, { "epoch": 6.220271102895872, "grad_norm": 0.03612583130598068, "learning_rate": 0.01, "loss": 1.9695, "step": 60573 }, { "epoch": 6.220579174368454, "grad_norm": 0.04795749485492706, "learning_rate": 0.01, "loss": 1.9709, "step": 60576 }, { "epoch": 6.220887245841035, "grad_norm": 0.055898912250995636, "learning_rate": 0.01, "loss": 1.9725, "step": 60579 }, { "epoch": 6.221195317313617, "grad_norm": 0.056687548756599426, "learning_rate": 0.01, "loss": 1.9871, "step": 60582 }, { "epoch": 6.221503388786198, "grad_norm": 0.052614741027355194, "learning_rate": 0.01, "loss": 1.9953, "step": 60585 }, { "epoch": 6.22181146025878, "grad_norm": 0.08860262483358383, "learning_rate": 0.01, "loss": 1.9751, "step": 60588 }, { "epoch": 6.222119531731361, "grad_norm": 0.10606038570404053, "learning_rate": 0.01, "loss": 1.9732, "step": 60591 }, { "epoch": 6.222427603203943, "grad_norm": 0.05913405120372772, "learning_rate": 0.01, "loss": 1.9739, "step": 60594 }, { "epoch": 6.2227356746765246, "grad_norm": 0.05625823140144348, "learning_rate": 0.01, "loss": 1.9921, "step": 60597 }, { "epoch": 6.223043746149107, "grad_norm": 0.10503005236387253, "learning_rate": 0.01, "loss": 2.0125, "step": 60600 }, { "epoch": 6.223351817621689, "grad_norm": 0.06931479275226593, "learning_rate": 0.01, "loss": 1.979, "step": 60603 }, { "epoch": 6.22365988909427, "grad_norm": 0.07577981054782867, "learning_rate": 0.01, "loss": 1.9817, "step": 60606 }, { "epoch": 6.223967960566852, "grad_norm": 0.04593481123447418, "learning_rate": 0.01, "loss": 1.9587, "step": 60609 }, { "epoch": 6.224276032039433, "grad_norm": 0.040800996124744415, "learning_rate": 0.01, "loss": 1.976, "step": 60612 }, { "epoch": 6.224584103512015, "grad_norm": 0.035023946315050125, "learning_rate": 0.01, "loss": 1.9943, "step": 60615 }, { "epoch": 6.224892174984596, "grad_norm": 0.10616744309663773, "learning_rate": 0.01, "loss": 1.9685, "step": 60618 }, { "epoch": 6.225200246457178, "grad_norm": 0.06860567629337311, "learning_rate": 0.01, "loss": 1.9822, "step": 60621 }, { "epoch": 6.2255083179297594, "grad_norm": 0.06851069629192352, "learning_rate": 0.01, "loss": 1.9878, "step": 60624 }, { "epoch": 6.2258163894023415, "grad_norm": 0.06767469644546509, "learning_rate": 0.01, "loss": 1.9739, "step": 60627 }, { "epoch": 6.226124460874923, "grad_norm": 0.06542889773845673, "learning_rate": 0.01, "loss": 1.9793, "step": 60630 }, { "epoch": 6.226432532347505, "grad_norm": 0.0505717433989048, "learning_rate": 0.01, "loss": 1.9912, "step": 60633 }, { "epoch": 6.226740603820086, "grad_norm": 0.04307612031698227, "learning_rate": 0.01, "loss": 1.9811, "step": 60636 }, { "epoch": 6.227048675292668, "grad_norm": 0.12549397349357605, "learning_rate": 0.01, "loss": 1.9679, "step": 60639 }, { "epoch": 6.227356746765249, "grad_norm": 0.10073670744895935, "learning_rate": 0.01, "loss": 1.9931, "step": 60642 }, { "epoch": 6.227664818237831, "grad_norm": 0.042758628726005554, "learning_rate": 0.01, "loss": 1.9888, "step": 60645 }, { "epoch": 6.227972889710413, "grad_norm": 0.0379779078066349, "learning_rate": 0.01, "loss": 1.9946, "step": 60648 }, { "epoch": 6.228280961182994, "grad_norm": 0.05374911054968834, "learning_rate": 0.01, "loss": 1.9951, "step": 60651 }, { "epoch": 6.228589032655576, "grad_norm": 0.0438760407269001, "learning_rate": 0.01, "loss": 1.9998, "step": 60654 }, { "epoch": 6.228897104128158, "grad_norm": 0.05077781528234482, "learning_rate": 0.01, "loss": 2.0091, "step": 60657 }, { "epoch": 6.22920517560074, "grad_norm": 0.05097432807087898, "learning_rate": 0.01, "loss": 1.9708, "step": 60660 }, { "epoch": 6.229513247073321, "grad_norm": 0.061216987669467926, "learning_rate": 0.01, "loss": 1.9734, "step": 60663 }, { "epoch": 6.229821318545903, "grad_norm": 0.056796032935380936, "learning_rate": 0.01, "loss": 1.9859, "step": 60666 }, { "epoch": 6.230129390018484, "grad_norm": 0.04003208130598068, "learning_rate": 0.01, "loss": 1.9785, "step": 60669 }, { "epoch": 6.230437461491066, "grad_norm": 0.07271415740251541, "learning_rate": 0.01, "loss": 1.9655, "step": 60672 }, { "epoch": 6.230745532963647, "grad_norm": 0.07599439471960068, "learning_rate": 0.01, "loss": 1.9656, "step": 60675 }, { "epoch": 6.231053604436229, "grad_norm": 0.40395355224609375, "learning_rate": 0.01, "loss": 1.9841, "step": 60678 }, { "epoch": 6.231361675908811, "grad_norm": 0.10945183783769608, "learning_rate": 0.01, "loss": 2.0014, "step": 60681 }, { "epoch": 6.2316697473813925, "grad_norm": 0.10694218426942825, "learning_rate": 0.01, "loss": 1.9744, "step": 60684 }, { "epoch": 6.2319778188539745, "grad_norm": 0.05942070484161377, "learning_rate": 0.01, "loss": 2.0023, "step": 60687 }, { "epoch": 6.232285890326556, "grad_norm": 0.039438262581825256, "learning_rate": 0.01, "loss": 1.9747, "step": 60690 }, { "epoch": 6.232593961799138, "grad_norm": 0.042470790445804596, "learning_rate": 0.01, "loss": 1.9772, "step": 60693 }, { "epoch": 6.232902033271719, "grad_norm": 0.05736652761697769, "learning_rate": 0.01, "loss": 1.9556, "step": 60696 }, { "epoch": 6.233210104744301, "grad_norm": 0.03797580674290657, "learning_rate": 0.01, "loss": 1.9456, "step": 60699 }, { "epoch": 6.233518176216882, "grad_norm": 0.042780667543411255, "learning_rate": 0.01, "loss": 2.0001, "step": 60702 }, { "epoch": 6.233826247689464, "grad_norm": 0.04742565006017685, "learning_rate": 0.01, "loss": 1.9853, "step": 60705 }, { "epoch": 6.234134319162045, "grad_norm": 0.10806404799222946, "learning_rate": 0.01, "loss": 1.9911, "step": 60708 }, { "epoch": 6.234442390634627, "grad_norm": 0.09392410516738892, "learning_rate": 0.01, "loss": 1.9927, "step": 60711 }, { "epoch": 6.2347504621072085, "grad_norm": 0.06125201657414436, "learning_rate": 0.01, "loss": 1.996, "step": 60714 }, { "epoch": 6.235058533579791, "grad_norm": 0.08396472036838531, "learning_rate": 0.01, "loss": 1.9971, "step": 60717 }, { "epoch": 6.235366605052372, "grad_norm": 0.05900922417640686, "learning_rate": 0.01, "loss": 1.9767, "step": 60720 }, { "epoch": 6.235674676524954, "grad_norm": 0.10509319603443146, "learning_rate": 0.01, "loss": 2.003, "step": 60723 }, { "epoch": 6.235982747997536, "grad_norm": 0.13138900697231293, "learning_rate": 0.01, "loss": 1.9611, "step": 60726 }, { "epoch": 6.236290819470117, "grad_norm": 0.05676103010773659, "learning_rate": 0.01, "loss": 1.984, "step": 60729 }, { "epoch": 6.236598890942699, "grad_norm": 0.04155607149004936, "learning_rate": 0.01, "loss": 1.9766, "step": 60732 }, { "epoch": 6.23690696241528, "grad_norm": 0.03284106031060219, "learning_rate": 0.01, "loss": 1.9932, "step": 60735 }, { "epoch": 6.237215033887862, "grad_norm": 0.03673785179853439, "learning_rate": 0.01, "loss": 1.9973, "step": 60738 }, { "epoch": 6.237523105360443, "grad_norm": 0.04972091317176819, "learning_rate": 0.01, "loss": 1.9757, "step": 60741 }, { "epoch": 6.2378311768330255, "grad_norm": 0.06821681559085846, "learning_rate": 0.01, "loss": 1.987, "step": 60744 }, { "epoch": 6.238139248305607, "grad_norm": 0.14047065377235413, "learning_rate": 0.01, "loss": 1.9633, "step": 60747 }, { "epoch": 6.238447319778189, "grad_norm": 0.09423762559890747, "learning_rate": 0.01, "loss": 1.9788, "step": 60750 }, { "epoch": 6.23875539125077, "grad_norm": 0.08889120817184448, "learning_rate": 0.01, "loss": 1.9857, "step": 60753 }, { "epoch": 6.239063462723352, "grad_norm": 0.07696396857500076, "learning_rate": 0.01, "loss": 1.9761, "step": 60756 }, { "epoch": 6.239371534195933, "grad_norm": 0.13203272223472595, "learning_rate": 0.01, "loss": 1.9983, "step": 60759 }, { "epoch": 6.239679605668515, "grad_norm": 0.07637298852205276, "learning_rate": 0.01, "loss": 1.9643, "step": 60762 }, { "epoch": 6.239987677141097, "grad_norm": 0.035956770181655884, "learning_rate": 0.01, "loss": 1.9915, "step": 60765 }, { "epoch": 6.240295748613678, "grad_norm": 0.051339924335479736, "learning_rate": 0.01, "loss": 2.0075, "step": 60768 }, { "epoch": 6.24060382008626, "grad_norm": 0.05336092412471771, "learning_rate": 0.01, "loss": 1.9703, "step": 60771 }, { "epoch": 6.2409118915588415, "grad_norm": 0.061884574592113495, "learning_rate": 0.01, "loss": 1.9873, "step": 60774 }, { "epoch": 6.241219963031424, "grad_norm": 0.0763407051563263, "learning_rate": 0.01, "loss": 1.9792, "step": 60777 }, { "epoch": 6.241528034504005, "grad_norm": 0.06376196444034576, "learning_rate": 0.01, "loss": 1.9804, "step": 60780 }, { "epoch": 6.241836105976587, "grad_norm": 0.04951758310198784, "learning_rate": 0.01, "loss": 1.9824, "step": 60783 }, { "epoch": 6.242144177449168, "grad_norm": 0.03801729902625084, "learning_rate": 0.01, "loss": 1.9851, "step": 60786 }, { "epoch": 6.24245224892175, "grad_norm": 0.05235125496983528, "learning_rate": 0.01, "loss": 1.9879, "step": 60789 }, { "epoch": 6.242760320394331, "grad_norm": 0.09875135123729706, "learning_rate": 0.01, "loss": 1.9864, "step": 60792 }, { "epoch": 6.243068391866913, "grad_norm": 0.07917656749486923, "learning_rate": 0.01, "loss": 1.9791, "step": 60795 }, { "epoch": 6.243376463339494, "grad_norm": 0.09031876921653748, "learning_rate": 0.01, "loss": 1.9829, "step": 60798 }, { "epoch": 6.243684534812076, "grad_norm": 0.03974262252449989, "learning_rate": 0.01, "loss": 2.0007, "step": 60801 }, { "epoch": 6.2439926062846585, "grad_norm": 0.10321113467216492, "learning_rate": 0.01, "loss": 1.997, "step": 60804 }, { "epoch": 6.24430067775724, "grad_norm": 0.041257478296756744, "learning_rate": 0.01, "loss": 1.978, "step": 60807 }, { "epoch": 6.244608749229822, "grad_norm": 0.054699040949344635, "learning_rate": 0.01, "loss": 1.9478, "step": 60810 }, { "epoch": 6.244916820702403, "grad_norm": 0.06867776066064835, "learning_rate": 0.01, "loss": 1.9855, "step": 60813 }, { "epoch": 6.245224892174985, "grad_norm": 0.04628169536590576, "learning_rate": 0.01, "loss": 1.9868, "step": 60816 }, { "epoch": 6.245532963647566, "grad_norm": 0.04721110686659813, "learning_rate": 0.01, "loss": 1.981, "step": 60819 }, { "epoch": 6.245841035120148, "grad_norm": 0.1265910118818283, "learning_rate": 0.01, "loss": 1.9804, "step": 60822 }, { "epoch": 6.246149106592729, "grad_norm": 0.09225190430879593, "learning_rate": 0.01, "loss": 1.9928, "step": 60825 }, { "epoch": 6.246457178065311, "grad_norm": 0.06333420425653458, "learning_rate": 0.01, "loss": 1.9758, "step": 60828 }, { "epoch": 6.2467652495378925, "grad_norm": 0.06048038601875305, "learning_rate": 0.01, "loss": 1.9889, "step": 60831 }, { "epoch": 6.2470733210104745, "grad_norm": 0.032611507922410965, "learning_rate": 0.01, "loss": 1.9895, "step": 60834 }, { "epoch": 6.247381392483056, "grad_norm": 0.038066837936639786, "learning_rate": 0.01, "loss": 1.9914, "step": 60837 }, { "epoch": 6.247689463955638, "grad_norm": 0.0952901542186737, "learning_rate": 0.01, "loss": 1.979, "step": 60840 }, { "epoch": 6.247997535428219, "grad_norm": 0.051439084112644196, "learning_rate": 0.01, "loss": 1.9403, "step": 60843 }, { "epoch": 6.248305606900801, "grad_norm": 0.12137739360332489, "learning_rate": 0.01, "loss": 1.9708, "step": 60846 }, { "epoch": 6.248613678373383, "grad_norm": 0.04107406362891197, "learning_rate": 0.01, "loss": 1.9545, "step": 60849 }, { "epoch": 6.248921749845964, "grad_norm": 0.053470056504011154, "learning_rate": 0.01, "loss": 1.9892, "step": 60852 }, { "epoch": 6.249229821318546, "grad_norm": 0.10296319425106049, "learning_rate": 0.01, "loss": 2.0136, "step": 60855 }, { "epoch": 6.249537892791127, "grad_norm": 0.05375886335968971, "learning_rate": 0.01, "loss": 1.9957, "step": 60858 }, { "epoch": 6.249845964263709, "grad_norm": 0.04653402417898178, "learning_rate": 0.01, "loss": 1.9702, "step": 60861 }, { "epoch": 6.250154035736291, "grad_norm": 0.04048166796565056, "learning_rate": 0.01, "loss": 1.9928, "step": 60864 }, { "epoch": 6.250462107208873, "grad_norm": 0.04350602254271507, "learning_rate": 0.01, "loss": 1.9798, "step": 60867 }, { "epoch": 6.250770178681454, "grad_norm": 0.06153450533747673, "learning_rate": 0.01, "loss": 1.9806, "step": 60870 }, { "epoch": 6.251078250154036, "grad_norm": 0.05672883987426758, "learning_rate": 0.01, "loss": 1.9943, "step": 60873 }, { "epoch": 6.251386321626617, "grad_norm": 0.04813450202345848, "learning_rate": 0.01, "loss": 1.9887, "step": 60876 }, { "epoch": 6.251694393099199, "grad_norm": 0.03838599845767021, "learning_rate": 0.01, "loss": 2.0072, "step": 60879 }, { "epoch": 6.252002464571781, "grad_norm": 0.04607195779681206, "learning_rate": 0.01, "loss": 1.97, "step": 60882 }, { "epoch": 6.252310536044362, "grad_norm": 0.06564761698246002, "learning_rate": 0.01, "loss": 1.9766, "step": 60885 }, { "epoch": 6.252618607516944, "grad_norm": 0.0656748041510582, "learning_rate": 0.01, "loss": 1.9651, "step": 60888 }, { "epoch": 6.2529266789895255, "grad_norm": 0.09336625039577484, "learning_rate": 0.01, "loss": 1.9875, "step": 60891 }, { "epoch": 6.2532347504621075, "grad_norm": 0.08100081235170364, "learning_rate": 0.01, "loss": 1.9658, "step": 60894 }, { "epoch": 6.253542821934689, "grad_norm": 0.06392189860343933, "learning_rate": 0.01, "loss": 1.975, "step": 60897 }, { "epoch": 6.253850893407271, "grad_norm": 0.11884801834821701, "learning_rate": 0.01, "loss": 1.9931, "step": 60900 }, { "epoch": 6.254158964879852, "grad_norm": 0.08046597987413406, "learning_rate": 0.01, "loss": 1.9856, "step": 60903 }, { "epoch": 6.254467036352434, "grad_norm": 0.03278736025094986, "learning_rate": 0.01, "loss": 1.966, "step": 60906 }, { "epoch": 6.254775107825015, "grad_norm": 0.043066561222076416, "learning_rate": 0.01, "loss": 1.9956, "step": 60909 }, { "epoch": 6.255083179297597, "grad_norm": 0.08042460680007935, "learning_rate": 0.01, "loss": 1.9978, "step": 60912 }, { "epoch": 6.255391250770178, "grad_norm": 0.05132247880101204, "learning_rate": 0.01, "loss": 1.9898, "step": 60915 }, { "epoch": 6.25569932224276, "grad_norm": 0.04719965532422066, "learning_rate": 0.01, "loss": 1.9818, "step": 60918 }, { "epoch": 6.2560073937153415, "grad_norm": 0.08247886598110199, "learning_rate": 0.01, "loss": 1.9532, "step": 60921 }, { "epoch": 6.256315465187924, "grad_norm": 0.0856601819396019, "learning_rate": 0.01, "loss": 2.0051, "step": 60924 }, { "epoch": 6.256623536660506, "grad_norm": 0.06547616422176361, "learning_rate": 0.01, "loss": 1.9799, "step": 60927 }, { "epoch": 6.256931608133087, "grad_norm": 0.04583069309592247, "learning_rate": 0.01, "loss": 1.9743, "step": 60930 }, { "epoch": 6.257239679605669, "grad_norm": 0.1286034882068634, "learning_rate": 0.01, "loss": 2.0131, "step": 60933 }, { "epoch": 6.25754775107825, "grad_norm": 0.07549592107534409, "learning_rate": 0.01, "loss": 1.9873, "step": 60936 }, { "epoch": 6.257855822550832, "grad_norm": 0.06334639340639114, "learning_rate": 0.01, "loss": 1.9611, "step": 60939 }, { "epoch": 6.258163894023413, "grad_norm": 0.050806958228349686, "learning_rate": 0.01, "loss": 1.9642, "step": 60942 }, { "epoch": 6.258471965495995, "grad_norm": 0.03487928584218025, "learning_rate": 0.01, "loss": 1.9887, "step": 60945 }, { "epoch": 6.258780036968576, "grad_norm": 0.03992629423737526, "learning_rate": 0.01, "loss": 1.9632, "step": 60948 }, { "epoch": 6.2590881084411585, "grad_norm": 0.06908503919839859, "learning_rate": 0.01, "loss": 1.9727, "step": 60951 }, { "epoch": 6.25939617991374, "grad_norm": 0.10641307383775711, "learning_rate": 0.01, "loss": 1.9681, "step": 60954 }, { "epoch": 6.259704251386322, "grad_norm": 0.05809535086154938, "learning_rate": 0.01, "loss": 1.966, "step": 60957 }, { "epoch": 6.260012322858903, "grad_norm": 0.1139754205942154, "learning_rate": 0.01, "loss": 1.9958, "step": 60960 }, { "epoch": 6.260320394331485, "grad_norm": 0.1501069813966751, "learning_rate": 0.01, "loss": 1.9962, "step": 60963 }, { "epoch": 6.260628465804067, "grad_norm": 0.10279611498117447, "learning_rate": 0.01, "loss": 1.9849, "step": 60966 }, { "epoch": 6.260936537276648, "grad_norm": 0.04817177355289459, "learning_rate": 0.01, "loss": 1.9908, "step": 60969 }, { "epoch": 6.26124460874923, "grad_norm": 0.036743566393852234, "learning_rate": 0.01, "loss": 1.9561, "step": 60972 }, { "epoch": 6.261552680221811, "grad_norm": 0.041490793228149414, "learning_rate": 0.01, "loss": 1.9838, "step": 60975 }, { "epoch": 6.261860751694393, "grad_norm": 0.034102845937013626, "learning_rate": 0.01, "loss": 1.9866, "step": 60978 }, { "epoch": 6.2621688231669745, "grad_norm": 0.04929140582680702, "learning_rate": 0.01, "loss": 1.9571, "step": 60981 }, { "epoch": 6.262476894639557, "grad_norm": 0.1550266444683075, "learning_rate": 0.01, "loss": 1.9877, "step": 60984 }, { "epoch": 6.262784966112138, "grad_norm": 0.1096281111240387, "learning_rate": 0.01, "loss": 1.9832, "step": 60987 }, { "epoch": 6.26309303758472, "grad_norm": 0.057930875569581985, "learning_rate": 0.01, "loss": 1.9795, "step": 60990 }, { "epoch": 6.263401109057301, "grad_norm": 0.04393153637647629, "learning_rate": 0.01, "loss": 1.9468, "step": 60993 }, { "epoch": 6.263709180529883, "grad_norm": 0.039023157209157944, "learning_rate": 0.01, "loss": 1.9739, "step": 60996 }, { "epoch": 6.264017252002464, "grad_norm": 0.05385271832346916, "learning_rate": 0.01, "loss": 1.9809, "step": 60999 }, { "epoch": 6.264325323475046, "grad_norm": 0.11487401276826859, "learning_rate": 0.01, "loss": 1.9636, "step": 61002 }, { "epoch": 6.264633394947628, "grad_norm": 0.12395527213811874, "learning_rate": 0.01, "loss": 1.9657, "step": 61005 }, { "epoch": 6.264941466420209, "grad_norm": 0.05898820608854294, "learning_rate": 0.01, "loss": 1.9912, "step": 61008 }, { "epoch": 6.2652495378927915, "grad_norm": 0.06743215769529343, "learning_rate": 0.01, "loss": 1.9511, "step": 61011 }, { "epoch": 6.265557609365373, "grad_norm": 0.04019298404455185, "learning_rate": 0.01, "loss": 1.9813, "step": 61014 }, { "epoch": 6.265865680837955, "grad_norm": 0.053353726863861084, "learning_rate": 0.01, "loss": 1.9869, "step": 61017 }, { "epoch": 6.266173752310536, "grad_norm": 0.05660318583250046, "learning_rate": 0.01, "loss": 1.9907, "step": 61020 }, { "epoch": 6.266481823783118, "grad_norm": 0.08507215976715088, "learning_rate": 0.01, "loss": 2.0229, "step": 61023 }, { "epoch": 6.266789895255699, "grad_norm": 0.10339481383562088, "learning_rate": 0.01, "loss": 1.9907, "step": 61026 }, { "epoch": 6.267097966728281, "grad_norm": 0.06297086924314499, "learning_rate": 0.01, "loss": 1.9701, "step": 61029 }, { "epoch": 6.267406038200862, "grad_norm": 0.03891312703490257, "learning_rate": 0.01, "loss": 1.9829, "step": 61032 }, { "epoch": 6.267714109673444, "grad_norm": 0.05895547196269035, "learning_rate": 0.01, "loss": 1.984, "step": 61035 }, { "epoch": 6.2680221811460255, "grad_norm": 0.058469437062740326, "learning_rate": 0.01, "loss": 1.9852, "step": 61038 }, { "epoch": 6.2683302526186075, "grad_norm": 0.04402352124452591, "learning_rate": 0.01, "loss": 1.9456, "step": 61041 }, { "epoch": 6.268638324091189, "grad_norm": 0.07434606552124023, "learning_rate": 0.01, "loss": 1.9757, "step": 61044 }, { "epoch": 6.268946395563771, "grad_norm": 0.07005775719881058, "learning_rate": 0.01, "loss": 1.9783, "step": 61047 }, { "epoch": 6.269254467036353, "grad_norm": 0.08956246078014374, "learning_rate": 0.01, "loss": 1.9753, "step": 61050 }, { "epoch": 6.269562538508934, "grad_norm": 0.04520926997065544, "learning_rate": 0.01, "loss": 1.9769, "step": 61053 }, { "epoch": 6.269870609981516, "grad_norm": 0.09920626133680344, "learning_rate": 0.01, "loss": 1.9738, "step": 61056 }, { "epoch": 6.270178681454097, "grad_norm": 0.10885010659694672, "learning_rate": 0.01, "loss": 1.9978, "step": 61059 }, { "epoch": 6.270486752926679, "grad_norm": 0.0662059485912323, "learning_rate": 0.01, "loss": 1.9751, "step": 61062 }, { "epoch": 6.27079482439926, "grad_norm": 0.09873577207326889, "learning_rate": 0.01, "loss": 1.9955, "step": 61065 }, { "epoch": 6.271102895871842, "grad_norm": 0.0979217141866684, "learning_rate": 0.01, "loss": 1.9631, "step": 61068 }, { "epoch": 6.271410967344424, "grad_norm": 0.06409544497728348, "learning_rate": 0.01, "loss": 1.9676, "step": 61071 }, { "epoch": 6.271719038817006, "grad_norm": 0.05898788571357727, "learning_rate": 0.01, "loss": 1.9755, "step": 61074 }, { "epoch": 6.272027110289587, "grad_norm": 0.048941727727651596, "learning_rate": 0.01, "loss": 2.0042, "step": 61077 }, { "epoch": 6.272335181762169, "grad_norm": 0.04335403069853783, "learning_rate": 0.01, "loss": 1.9931, "step": 61080 }, { "epoch": 6.272643253234751, "grad_norm": 0.08291465789079666, "learning_rate": 0.01, "loss": 1.9936, "step": 61083 }, { "epoch": 6.272951324707332, "grad_norm": 0.09848134219646454, "learning_rate": 0.01, "loss": 1.9843, "step": 61086 }, { "epoch": 6.273259396179914, "grad_norm": 0.10949241369962692, "learning_rate": 0.01, "loss": 1.9686, "step": 61089 }, { "epoch": 6.273567467652495, "grad_norm": 0.07039642333984375, "learning_rate": 0.01, "loss": 1.9971, "step": 61092 }, { "epoch": 6.273875539125077, "grad_norm": 0.0475379079580307, "learning_rate": 0.01, "loss": 2.0068, "step": 61095 }, { "epoch": 6.2741836105976585, "grad_norm": 0.07531355321407318, "learning_rate": 0.01, "loss": 1.99, "step": 61098 }, { "epoch": 6.2744916820702406, "grad_norm": 0.04126337915658951, "learning_rate": 0.01, "loss": 1.9788, "step": 61101 }, { "epoch": 6.274799753542822, "grad_norm": 0.03969768062233925, "learning_rate": 0.01, "loss": 1.9955, "step": 61104 }, { "epoch": 6.275107825015404, "grad_norm": 0.06768359243869781, "learning_rate": 0.01, "loss": 1.9693, "step": 61107 }, { "epoch": 6.275415896487985, "grad_norm": 0.10613830387592316, "learning_rate": 0.01, "loss": 1.956, "step": 61110 }, { "epoch": 6.275723967960567, "grad_norm": 0.09580209851264954, "learning_rate": 0.01, "loss": 1.9867, "step": 61113 }, { "epoch": 6.276032039433148, "grad_norm": 0.06177694723010063, "learning_rate": 0.01, "loss": 1.9846, "step": 61116 }, { "epoch": 6.27634011090573, "grad_norm": 0.10451437532901764, "learning_rate": 0.01, "loss": 1.9762, "step": 61119 }, { "epoch": 6.276648182378311, "grad_norm": 0.1352027803659439, "learning_rate": 0.01, "loss": 1.993, "step": 61122 }, { "epoch": 6.276956253850893, "grad_norm": 0.0892416313290596, "learning_rate": 0.01, "loss": 1.9876, "step": 61125 }, { "epoch": 6.2772643253234754, "grad_norm": 0.04253193363547325, "learning_rate": 0.01, "loss": 1.9932, "step": 61128 }, { "epoch": 6.277572396796057, "grad_norm": 0.03616737946867943, "learning_rate": 0.01, "loss": 1.9522, "step": 61131 }, { "epoch": 6.277880468268639, "grad_norm": 0.09631824493408203, "learning_rate": 0.01, "loss": 1.9932, "step": 61134 }, { "epoch": 6.27818853974122, "grad_norm": 0.08627616614103317, "learning_rate": 0.01, "loss": 1.9745, "step": 61137 }, { "epoch": 6.278496611213802, "grad_norm": 0.07595238834619522, "learning_rate": 0.01, "loss": 1.9912, "step": 61140 }, { "epoch": 6.278804682686383, "grad_norm": 0.05115518346428871, "learning_rate": 0.01, "loss": 1.9808, "step": 61143 }, { "epoch": 6.279112754158965, "grad_norm": 0.07910732924938202, "learning_rate": 0.01, "loss": 1.9566, "step": 61146 }, { "epoch": 6.279420825631546, "grad_norm": 0.07935528457164764, "learning_rate": 0.01, "loss": 1.9893, "step": 61149 }, { "epoch": 6.279728897104128, "grad_norm": 0.09080187976360321, "learning_rate": 0.01, "loss": 1.9684, "step": 61152 }, { "epoch": 6.2800369685767095, "grad_norm": 0.07743295282125473, "learning_rate": 0.01, "loss": 1.9616, "step": 61155 }, { "epoch": 6.2803450400492915, "grad_norm": 0.07669807970523834, "learning_rate": 0.01, "loss": 1.9813, "step": 61158 }, { "epoch": 6.280653111521873, "grad_norm": 0.09190505743026733, "learning_rate": 0.01, "loss": 1.954, "step": 61161 }, { "epoch": 6.280961182994455, "grad_norm": 0.052665840834379196, "learning_rate": 0.01, "loss": 1.998, "step": 61164 }, { "epoch": 6.281269254467037, "grad_norm": 0.05133817344903946, "learning_rate": 0.01, "loss": 1.9841, "step": 61167 }, { "epoch": 6.281577325939618, "grad_norm": 0.038431648164987564, "learning_rate": 0.01, "loss": 1.9788, "step": 61170 }, { "epoch": 6.2818853974122, "grad_norm": 0.057378388941287994, "learning_rate": 0.01, "loss": 1.9623, "step": 61173 }, { "epoch": 6.282193468884781, "grad_norm": 0.08355475962162018, "learning_rate": 0.01, "loss": 1.9952, "step": 61176 }, { "epoch": 6.282501540357363, "grad_norm": 0.07666066288948059, "learning_rate": 0.01, "loss": 2.0007, "step": 61179 }, { "epoch": 6.282809611829944, "grad_norm": 0.12239743769168854, "learning_rate": 0.01, "loss": 1.9792, "step": 61182 }, { "epoch": 6.283117683302526, "grad_norm": 0.10445159673690796, "learning_rate": 0.01, "loss": 1.9845, "step": 61185 }, { "epoch": 6.283425754775108, "grad_norm": 0.04401460289955139, "learning_rate": 0.01, "loss": 1.9758, "step": 61188 }, { "epoch": 6.28373382624769, "grad_norm": 0.08247647434473038, "learning_rate": 0.01, "loss": 1.9774, "step": 61191 }, { "epoch": 6.284041897720271, "grad_norm": 0.06200701370835304, "learning_rate": 0.01, "loss": 1.9557, "step": 61194 }, { "epoch": 6.284349969192853, "grad_norm": 0.04233565926551819, "learning_rate": 0.01, "loss": 1.9883, "step": 61197 }, { "epoch": 6.284658040665434, "grad_norm": 0.09187982976436615, "learning_rate": 0.01, "loss": 1.9991, "step": 61200 }, { "epoch": 6.284966112138016, "grad_norm": 0.03373527526855469, "learning_rate": 0.01, "loss": 1.9599, "step": 61203 }, { "epoch": 6.285274183610598, "grad_norm": 0.09450080990791321, "learning_rate": 0.01, "loss": 1.9756, "step": 61206 }, { "epoch": 6.285582255083179, "grad_norm": 0.048632752150297165, "learning_rate": 0.01, "loss": 1.9823, "step": 61209 }, { "epoch": 6.285890326555761, "grad_norm": 0.11564646661281586, "learning_rate": 0.01, "loss": 1.9859, "step": 61212 }, { "epoch": 6.2861983980283425, "grad_norm": 0.04203884303569794, "learning_rate": 0.01, "loss": 1.9946, "step": 61215 }, { "epoch": 6.2865064695009245, "grad_norm": 0.08716481178998947, "learning_rate": 0.01, "loss": 1.9774, "step": 61218 }, { "epoch": 6.286814540973506, "grad_norm": 0.07940181344747543, "learning_rate": 0.01, "loss": 1.9667, "step": 61221 }, { "epoch": 6.287122612446088, "grad_norm": 0.06720132380723953, "learning_rate": 0.01, "loss": 1.9655, "step": 61224 }, { "epoch": 6.287430683918669, "grad_norm": 0.05566677451133728, "learning_rate": 0.01, "loss": 1.9849, "step": 61227 }, { "epoch": 6.287738755391251, "grad_norm": 0.09876339137554169, "learning_rate": 0.01, "loss": 1.9709, "step": 61230 }, { "epoch": 6.288046826863832, "grad_norm": 0.054778262972831726, "learning_rate": 0.01, "loss": 1.9652, "step": 61233 }, { "epoch": 6.288354898336414, "grad_norm": 0.06283058226108551, "learning_rate": 0.01, "loss": 1.9678, "step": 61236 }, { "epoch": 6.288662969808995, "grad_norm": 0.054917361587285995, "learning_rate": 0.01, "loss": 2.0248, "step": 61239 }, { "epoch": 6.288971041281577, "grad_norm": 0.10483105480670929, "learning_rate": 0.01, "loss": 1.9816, "step": 61242 }, { "epoch": 6.2892791127541585, "grad_norm": 0.07183388620615005, "learning_rate": 0.01, "loss": 1.9869, "step": 61245 }, { "epoch": 6.289587184226741, "grad_norm": 0.12913282215595245, "learning_rate": 0.01, "loss": 1.9802, "step": 61248 }, { "epoch": 6.289895255699323, "grad_norm": 0.1015811562538147, "learning_rate": 0.01, "loss": 1.992, "step": 61251 }, { "epoch": 6.290203327171904, "grad_norm": 0.09693120419979095, "learning_rate": 0.01, "loss": 1.9711, "step": 61254 }, { "epoch": 6.290511398644486, "grad_norm": 0.05746886506676674, "learning_rate": 0.01, "loss": 1.9914, "step": 61257 }, { "epoch": 6.290819470117067, "grad_norm": 0.07819090038537979, "learning_rate": 0.01, "loss": 1.9485, "step": 61260 }, { "epoch": 6.291127541589649, "grad_norm": 0.039794620126485825, "learning_rate": 0.01, "loss": 1.9623, "step": 61263 }, { "epoch": 6.29143561306223, "grad_norm": 0.06474246084690094, "learning_rate": 0.01, "loss": 1.9886, "step": 61266 }, { "epoch": 6.291743684534812, "grad_norm": 0.05018305405974388, "learning_rate": 0.01, "loss": 1.9792, "step": 61269 }, { "epoch": 6.292051756007393, "grad_norm": 0.10312799364328384, "learning_rate": 0.01, "loss": 1.9678, "step": 61272 }, { "epoch": 6.2923598274799755, "grad_norm": 0.0430091917514801, "learning_rate": 0.01, "loss": 1.9882, "step": 61275 }, { "epoch": 6.292667898952557, "grad_norm": 0.08931942284107208, "learning_rate": 0.01, "loss": 1.9937, "step": 61278 }, { "epoch": 6.292975970425139, "grad_norm": 0.09583033621311188, "learning_rate": 0.01, "loss": 1.98, "step": 61281 }, { "epoch": 6.293284041897721, "grad_norm": 0.048897985368967056, "learning_rate": 0.01, "loss": 1.9996, "step": 61284 }, { "epoch": 6.293592113370302, "grad_norm": 0.07638606429100037, "learning_rate": 0.01, "loss": 1.968, "step": 61287 }, { "epoch": 6.293900184842884, "grad_norm": 0.07323414832353592, "learning_rate": 0.01, "loss": 1.9631, "step": 61290 }, { "epoch": 6.294208256315465, "grad_norm": 0.08963891863822937, "learning_rate": 0.01, "loss": 2.002, "step": 61293 }, { "epoch": 6.294516327788047, "grad_norm": 0.047504641115665436, "learning_rate": 0.01, "loss": 1.962, "step": 61296 }, { "epoch": 6.294824399260628, "grad_norm": 0.04849178344011307, "learning_rate": 0.01, "loss": 1.983, "step": 61299 }, { "epoch": 6.29513247073321, "grad_norm": 0.06779339909553528, "learning_rate": 0.01, "loss": 1.9797, "step": 61302 }, { "epoch": 6.2954405422057915, "grad_norm": 0.03595037758350372, "learning_rate": 0.01, "loss": 1.9868, "step": 61305 }, { "epoch": 6.295748613678374, "grad_norm": 0.04232453927397728, "learning_rate": 0.01, "loss": 1.9948, "step": 61308 }, { "epoch": 6.296056685150955, "grad_norm": 0.03607799485325813, "learning_rate": 0.01, "loss": 1.9924, "step": 61311 }, { "epoch": 6.296364756623537, "grad_norm": 0.07026591151952744, "learning_rate": 0.01, "loss": 1.9812, "step": 61314 }, { "epoch": 6.296672828096118, "grad_norm": 0.09154605120420456, "learning_rate": 0.01, "loss": 1.9743, "step": 61317 }, { "epoch": 6.2969808995687, "grad_norm": 0.0732119083404541, "learning_rate": 0.01, "loss": 2.0005, "step": 61320 }, { "epoch": 6.297288971041281, "grad_norm": 0.12241560965776443, "learning_rate": 0.01, "loss": 1.9862, "step": 61323 }, { "epoch": 6.297597042513863, "grad_norm": 0.04552261903882027, "learning_rate": 0.01, "loss": 1.9819, "step": 61326 }, { "epoch": 6.297905113986445, "grad_norm": 0.10219983011484146, "learning_rate": 0.01, "loss": 1.9771, "step": 61329 }, { "epoch": 6.298213185459026, "grad_norm": 0.09614024311304092, "learning_rate": 0.01, "loss": 1.9891, "step": 61332 }, { "epoch": 6.2985212569316085, "grad_norm": 0.049760278314352036, "learning_rate": 0.01, "loss": 1.9792, "step": 61335 }, { "epoch": 6.29882932840419, "grad_norm": 0.11046284437179565, "learning_rate": 0.01, "loss": 1.9936, "step": 61338 }, { "epoch": 6.299137399876772, "grad_norm": 0.045198407024145126, "learning_rate": 0.01, "loss": 1.9717, "step": 61341 }, { "epoch": 6.299445471349353, "grad_norm": 0.0499146468937397, "learning_rate": 0.01, "loss": 1.9844, "step": 61344 }, { "epoch": 6.299753542821935, "grad_norm": 0.0700070783495903, "learning_rate": 0.01, "loss": 1.9667, "step": 61347 }, { "epoch": 6.300061614294516, "grad_norm": 0.042327769100666046, "learning_rate": 0.01, "loss": 1.9827, "step": 61350 }, { "epoch": 6.300369685767098, "grad_norm": 0.042714498937129974, "learning_rate": 0.01, "loss": 1.98, "step": 61353 }, { "epoch": 6.300677757239679, "grad_norm": 0.06567323952913284, "learning_rate": 0.01, "loss": 1.9802, "step": 61356 }, { "epoch": 6.300985828712261, "grad_norm": 0.07993308454751968, "learning_rate": 0.01, "loss": 1.9802, "step": 61359 }, { "epoch": 6.3012939001848425, "grad_norm": 0.08032864332199097, "learning_rate": 0.01, "loss": 2.0013, "step": 61362 }, { "epoch": 6.3016019716574245, "grad_norm": 0.05398751050233841, "learning_rate": 0.01, "loss": 1.9286, "step": 61365 }, { "epoch": 6.301910043130006, "grad_norm": 0.04342971369624138, "learning_rate": 0.01, "loss": 1.9741, "step": 61368 }, { "epoch": 6.302218114602588, "grad_norm": 0.0811370387673378, "learning_rate": 0.01, "loss": 1.9712, "step": 61371 }, { "epoch": 6.30252618607517, "grad_norm": 0.09355277568101883, "learning_rate": 0.01, "loss": 2.0003, "step": 61374 }, { "epoch": 6.302834257547751, "grad_norm": 0.06557541340589523, "learning_rate": 0.01, "loss": 1.9891, "step": 61377 }, { "epoch": 6.303142329020333, "grad_norm": 0.038711193948984146, "learning_rate": 0.01, "loss": 1.9859, "step": 61380 }, { "epoch": 6.303450400492914, "grad_norm": 0.04032447189092636, "learning_rate": 0.01, "loss": 1.9781, "step": 61383 }, { "epoch": 6.303758471965496, "grad_norm": 0.03779391944408417, "learning_rate": 0.01, "loss": 1.9768, "step": 61386 }, { "epoch": 6.304066543438077, "grad_norm": 0.054389867931604385, "learning_rate": 0.01, "loss": 1.994, "step": 61389 }, { "epoch": 6.304374614910659, "grad_norm": 0.0877804309129715, "learning_rate": 0.01, "loss": 1.9693, "step": 61392 }, { "epoch": 6.304682686383241, "grad_norm": 0.06545720249414444, "learning_rate": 0.01, "loss": 1.9409, "step": 61395 }, { "epoch": 6.304990757855823, "grad_norm": 0.10159642994403839, "learning_rate": 0.01, "loss": 1.9981, "step": 61398 }, { "epoch": 6.305298829328404, "grad_norm": 0.045314956456422806, "learning_rate": 0.01, "loss": 1.9609, "step": 61401 }, { "epoch": 6.305606900800986, "grad_norm": 0.1006140187382698, "learning_rate": 0.01, "loss": 1.962, "step": 61404 }, { "epoch": 6.305914972273568, "grad_norm": 0.09133684635162354, "learning_rate": 0.01, "loss": 1.9624, "step": 61407 }, { "epoch": 6.306223043746149, "grad_norm": 0.11910711973905563, "learning_rate": 0.01, "loss": 1.9799, "step": 61410 }, { "epoch": 6.306531115218731, "grad_norm": 0.11194338649511337, "learning_rate": 0.01, "loss": 1.9856, "step": 61413 }, { "epoch": 6.306839186691312, "grad_norm": 0.050022371113300323, "learning_rate": 0.01, "loss": 1.957, "step": 61416 }, { "epoch": 6.307147258163894, "grad_norm": 0.039385128766298294, "learning_rate": 0.01, "loss": 1.9669, "step": 61419 }, { "epoch": 6.3074553296364755, "grad_norm": 0.048881761729717255, "learning_rate": 0.01, "loss": 2.0099, "step": 61422 }, { "epoch": 6.3077634011090575, "grad_norm": 0.062211450189352036, "learning_rate": 0.01, "loss": 1.9975, "step": 61425 }, { "epoch": 6.308071472581639, "grad_norm": 0.047393426299095154, "learning_rate": 0.01, "loss": 1.9926, "step": 61428 }, { "epoch": 6.308379544054221, "grad_norm": 0.03558918833732605, "learning_rate": 0.01, "loss": 1.9794, "step": 61431 }, { "epoch": 6.308687615526802, "grad_norm": 0.05444757267832756, "learning_rate": 0.01, "loss": 1.9766, "step": 61434 }, { "epoch": 6.308995686999384, "grad_norm": 0.0579729862511158, "learning_rate": 0.01, "loss": 1.9952, "step": 61437 }, { "epoch": 6.309303758471965, "grad_norm": 0.10818339139223099, "learning_rate": 0.01, "loss": 1.9707, "step": 61440 }, { "epoch": 6.309611829944547, "grad_norm": 0.053433943539857864, "learning_rate": 0.01, "loss": 1.9886, "step": 61443 }, { "epoch": 6.309919901417128, "grad_norm": 0.09320402890443802, "learning_rate": 0.01, "loss": 1.9363, "step": 61446 }, { "epoch": 6.31022797288971, "grad_norm": 0.10474387556314468, "learning_rate": 0.01, "loss": 2.0013, "step": 61449 }, { "epoch": 6.310536044362292, "grad_norm": 0.059648603200912476, "learning_rate": 0.01, "loss": 1.9668, "step": 61452 }, { "epoch": 6.310844115834874, "grad_norm": 0.05086411163210869, "learning_rate": 0.01, "loss": 1.9748, "step": 61455 }, { "epoch": 6.311152187307456, "grad_norm": 0.05620935931801796, "learning_rate": 0.01, "loss": 1.9704, "step": 61458 }, { "epoch": 6.311460258780037, "grad_norm": 0.07844830304384232, "learning_rate": 0.01, "loss": 1.9962, "step": 61461 }, { "epoch": 6.311768330252619, "grad_norm": 0.1557673066854477, "learning_rate": 0.01, "loss": 1.9848, "step": 61464 }, { "epoch": 6.3120764017252, "grad_norm": 0.09628250449895859, "learning_rate": 0.01, "loss": 1.9956, "step": 61467 }, { "epoch": 6.312384473197782, "grad_norm": 0.045699939131736755, "learning_rate": 0.01, "loss": 2.0116, "step": 61470 }, { "epoch": 6.312692544670363, "grad_norm": 0.033145416527986526, "learning_rate": 0.01, "loss": 1.9749, "step": 61473 }, { "epoch": 6.313000616142945, "grad_norm": 0.03600894287228584, "learning_rate": 0.01, "loss": 1.9406, "step": 61476 }, { "epoch": 6.313308687615526, "grad_norm": 0.09642411768436432, "learning_rate": 0.01, "loss": 1.993, "step": 61479 }, { "epoch": 6.3136167590881085, "grad_norm": 0.047719743102788925, "learning_rate": 0.01, "loss": 1.9714, "step": 61482 }, { "epoch": 6.3139248305606905, "grad_norm": 0.0641685277223587, "learning_rate": 0.01, "loss": 1.9828, "step": 61485 }, { "epoch": 6.314232902033272, "grad_norm": 0.06864326447248459, "learning_rate": 0.01, "loss": 1.9792, "step": 61488 }, { "epoch": 6.314540973505854, "grad_norm": 0.062220584601163864, "learning_rate": 0.01, "loss": 1.988, "step": 61491 }, { "epoch": 6.314849044978435, "grad_norm": 0.0497710257768631, "learning_rate": 0.01, "loss": 1.9837, "step": 61494 }, { "epoch": 6.315157116451017, "grad_norm": 0.06263673305511475, "learning_rate": 0.01, "loss": 1.9553, "step": 61497 }, { "epoch": 6.315465187923598, "grad_norm": 0.1437608301639557, "learning_rate": 0.01, "loss": 1.9877, "step": 61500 }, { "epoch": 6.31577325939618, "grad_norm": 0.1427847445011139, "learning_rate": 0.01, "loss": 1.9823, "step": 61503 }, { "epoch": 6.316081330868761, "grad_norm": 0.06643825769424438, "learning_rate": 0.01, "loss": 1.9792, "step": 61506 }, { "epoch": 6.316389402341343, "grad_norm": 0.05207568779587746, "learning_rate": 0.01, "loss": 1.966, "step": 61509 }, { "epoch": 6.3166974738139245, "grad_norm": 0.05009368434548378, "learning_rate": 0.01, "loss": 2.0048, "step": 61512 }, { "epoch": 6.317005545286507, "grad_norm": 0.060909830033779144, "learning_rate": 0.01, "loss": 2.0055, "step": 61515 }, { "epoch": 6.317313616759088, "grad_norm": 0.050222769379615784, "learning_rate": 0.01, "loss": 2.0128, "step": 61518 }, { "epoch": 6.31762168823167, "grad_norm": 0.03374829515814781, "learning_rate": 0.01, "loss": 1.9725, "step": 61521 }, { "epoch": 6.317929759704251, "grad_norm": 0.07113096117973328, "learning_rate": 0.01, "loss": 1.9825, "step": 61524 }, { "epoch": 6.318237831176833, "grad_norm": 0.061376191675662994, "learning_rate": 0.01, "loss": 1.9869, "step": 61527 }, { "epoch": 6.318545902649415, "grad_norm": 0.07323316484689713, "learning_rate": 0.01, "loss": 1.9764, "step": 61530 }, { "epoch": 6.318853974121996, "grad_norm": 0.07050851732492447, "learning_rate": 0.01, "loss": 1.9988, "step": 61533 }, { "epoch": 6.319162045594578, "grad_norm": 0.04889465495944023, "learning_rate": 0.01, "loss": 1.9957, "step": 61536 }, { "epoch": 6.319470117067159, "grad_norm": 0.10975257307291031, "learning_rate": 0.01, "loss": 1.9758, "step": 61539 }, { "epoch": 6.3197781885397415, "grad_norm": 0.06909262388944626, "learning_rate": 0.01, "loss": 1.9868, "step": 61542 }, { "epoch": 6.320086260012323, "grad_norm": 0.0708390325307846, "learning_rate": 0.01, "loss": 1.9606, "step": 61545 }, { "epoch": 6.320394331484905, "grad_norm": 0.04191325604915619, "learning_rate": 0.01, "loss": 1.993, "step": 61548 }, { "epoch": 6.320702402957486, "grad_norm": 0.04027121514081955, "learning_rate": 0.01, "loss": 1.9663, "step": 61551 }, { "epoch": 6.321010474430068, "grad_norm": 0.03705594688653946, "learning_rate": 0.01, "loss": 1.9917, "step": 61554 }, { "epoch": 6.321318545902649, "grad_norm": 0.04737750440835953, "learning_rate": 0.01, "loss": 1.989, "step": 61557 }, { "epoch": 6.321626617375231, "grad_norm": 0.1080993041396141, "learning_rate": 0.01, "loss": 1.9938, "step": 61560 }, { "epoch": 6.321934688847812, "grad_norm": 0.08178503066301346, "learning_rate": 0.01, "loss": 1.9862, "step": 61563 }, { "epoch": 6.322242760320394, "grad_norm": 0.09055802971124649, "learning_rate": 0.01, "loss": 1.9869, "step": 61566 }, { "epoch": 6.3225508317929755, "grad_norm": 0.03342537209391594, "learning_rate": 0.01, "loss": 1.9723, "step": 61569 }, { "epoch": 6.3228589032655576, "grad_norm": 0.12082437425851822, "learning_rate": 0.01, "loss": 1.9915, "step": 61572 }, { "epoch": 6.32316697473814, "grad_norm": 0.05053761973977089, "learning_rate": 0.01, "loss": 1.9779, "step": 61575 }, { "epoch": 6.323475046210721, "grad_norm": 0.0372505746781826, "learning_rate": 0.01, "loss": 1.9748, "step": 61578 }, { "epoch": 6.323783117683303, "grad_norm": 0.0640554130077362, "learning_rate": 0.01, "loss": 1.9961, "step": 61581 }, { "epoch": 6.324091189155884, "grad_norm": 0.04472392797470093, "learning_rate": 0.01, "loss": 1.9859, "step": 61584 }, { "epoch": 6.324399260628466, "grad_norm": 0.09325999766588211, "learning_rate": 0.01, "loss": 1.9669, "step": 61587 }, { "epoch": 6.324707332101047, "grad_norm": 0.07012490928173065, "learning_rate": 0.01, "loss": 1.9937, "step": 61590 }, { "epoch": 6.325015403573629, "grad_norm": 0.13324911892414093, "learning_rate": 0.01, "loss": 1.9792, "step": 61593 }, { "epoch": 6.32532347504621, "grad_norm": 0.074961818754673, "learning_rate": 0.01, "loss": 1.9883, "step": 61596 }, { "epoch": 6.3256315465187924, "grad_norm": 0.06711915135383606, "learning_rate": 0.01, "loss": 1.9935, "step": 61599 }, { "epoch": 6.325939617991374, "grad_norm": 0.12803415954113007, "learning_rate": 0.01, "loss": 2.0037, "step": 61602 }, { "epoch": 6.326247689463956, "grad_norm": 0.07260297238826752, "learning_rate": 0.01, "loss": 1.9913, "step": 61605 }, { "epoch": 6.326555760936538, "grad_norm": 0.16781297326087952, "learning_rate": 0.01, "loss": 1.9804, "step": 61608 }, { "epoch": 6.326863832409119, "grad_norm": 0.05119356885552406, "learning_rate": 0.01, "loss": 2.0217, "step": 61611 }, { "epoch": 6.327171903881701, "grad_norm": 0.05765308439731598, "learning_rate": 0.01, "loss": 1.9813, "step": 61614 }, { "epoch": 6.327479975354282, "grad_norm": 0.039318110793828964, "learning_rate": 0.01, "loss": 1.9755, "step": 61617 }, { "epoch": 6.327788046826864, "grad_norm": 0.04655304551124573, "learning_rate": 0.01, "loss": 1.9894, "step": 61620 }, { "epoch": 6.328096118299445, "grad_norm": 0.08642113953828812, "learning_rate": 0.01, "loss": 1.9762, "step": 61623 }, { "epoch": 6.328404189772027, "grad_norm": 0.04844063147902489, "learning_rate": 0.01, "loss": 1.9705, "step": 61626 }, { "epoch": 6.3287122612446085, "grad_norm": 0.058742884546518326, "learning_rate": 0.01, "loss": 2.0026, "step": 61629 }, { "epoch": 6.3290203327171906, "grad_norm": 0.036662664264440536, "learning_rate": 0.01, "loss": 1.9805, "step": 61632 }, { "epoch": 6.329328404189772, "grad_norm": 0.08772391080856323, "learning_rate": 0.01, "loss": 2.0158, "step": 61635 }, { "epoch": 6.329636475662354, "grad_norm": 0.06502344459295273, "learning_rate": 0.01, "loss": 1.9812, "step": 61638 }, { "epoch": 6.329944547134935, "grad_norm": 0.06459985673427582, "learning_rate": 0.01, "loss": 1.9836, "step": 61641 }, { "epoch": 6.330252618607517, "grad_norm": 0.03769855946302414, "learning_rate": 0.01, "loss": 1.9956, "step": 61644 }, { "epoch": 6.330560690080098, "grad_norm": 0.046935372054576874, "learning_rate": 0.01, "loss": 2.0003, "step": 61647 }, { "epoch": 6.33086876155268, "grad_norm": 0.04765419661998749, "learning_rate": 0.01, "loss": 1.9738, "step": 61650 }, { "epoch": 6.331176833025262, "grad_norm": 0.07804597169160843, "learning_rate": 0.01, "loss": 1.961, "step": 61653 }, { "epoch": 6.331484904497843, "grad_norm": 0.04642481356859207, "learning_rate": 0.01, "loss": 1.9794, "step": 61656 }, { "epoch": 6.3317929759704255, "grad_norm": 0.11623187363147736, "learning_rate": 0.01, "loss": 2.0001, "step": 61659 }, { "epoch": 6.332101047443007, "grad_norm": 0.08578827977180481, "learning_rate": 0.01, "loss": 1.9888, "step": 61662 }, { "epoch": 6.332409118915589, "grad_norm": 0.05141110345721245, "learning_rate": 0.01, "loss": 1.9732, "step": 61665 }, { "epoch": 6.33271719038817, "grad_norm": 0.03601311147212982, "learning_rate": 0.01, "loss": 1.9787, "step": 61668 }, { "epoch": 6.333025261860752, "grad_norm": 0.06763188540935516, "learning_rate": 0.01, "loss": 1.9865, "step": 61671 }, { "epoch": 6.333333333333333, "grad_norm": 0.08627673238515854, "learning_rate": 0.01, "loss": 2.0061, "step": 61674 }, { "epoch": 6.333641404805915, "grad_norm": 0.06787147372961044, "learning_rate": 0.01, "loss": 1.9816, "step": 61677 }, { "epoch": 6.333949476278496, "grad_norm": 0.08557649701833725, "learning_rate": 0.01, "loss": 1.9927, "step": 61680 }, { "epoch": 6.334257547751078, "grad_norm": 0.08303692191839218, "learning_rate": 0.01, "loss": 1.9503, "step": 61683 }, { "epoch": 6.33456561922366, "grad_norm": 0.06721941381692886, "learning_rate": 0.01, "loss": 1.9774, "step": 61686 }, { "epoch": 6.3348736906962415, "grad_norm": 0.11784909665584564, "learning_rate": 0.01, "loss": 1.969, "step": 61689 }, { "epoch": 6.335181762168824, "grad_norm": 0.04680579900741577, "learning_rate": 0.01, "loss": 1.9926, "step": 61692 }, { "epoch": 6.335489833641405, "grad_norm": 0.10561893880367279, "learning_rate": 0.01, "loss": 1.9924, "step": 61695 }, { "epoch": 6.335797905113987, "grad_norm": 0.10209949314594269, "learning_rate": 0.01, "loss": 1.9784, "step": 61698 }, { "epoch": 6.336105976586568, "grad_norm": 0.0697985514998436, "learning_rate": 0.01, "loss": 1.9715, "step": 61701 }, { "epoch": 6.33641404805915, "grad_norm": 0.04770383983850479, "learning_rate": 0.01, "loss": 1.9882, "step": 61704 }, { "epoch": 6.336722119531731, "grad_norm": 0.03596745431423187, "learning_rate": 0.01, "loss": 1.9572, "step": 61707 }, { "epoch": 6.337030191004313, "grad_norm": 0.05980326980352402, "learning_rate": 0.01, "loss": 1.9913, "step": 61710 }, { "epoch": 6.337338262476894, "grad_norm": 0.04213361069560051, "learning_rate": 0.01, "loss": 1.987, "step": 61713 }, { "epoch": 6.337646333949476, "grad_norm": 0.12276975810527802, "learning_rate": 0.01, "loss": 1.9571, "step": 61716 }, { "epoch": 6.337954405422058, "grad_norm": 0.04725951328873634, "learning_rate": 0.01, "loss": 1.9701, "step": 61719 }, { "epoch": 6.33826247689464, "grad_norm": 0.08252322673797607, "learning_rate": 0.01, "loss": 1.9917, "step": 61722 }, { "epoch": 6.338570548367221, "grad_norm": 0.12866702675819397, "learning_rate": 0.01, "loss": 1.9933, "step": 61725 }, { "epoch": 6.338878619839803, "grad_norm": 0.11567038297653198, "learning_rate": 0.01, "loss": 1.9661, "step": 61728 }, { "epoch": 6.339186691312385, "grad_norm": 0.048908136785030365, "learning_rate": 0.01, "loss": 1.9765, "step": 61731 }, { "epoch": 6.339494762784966, "grad_norm": 0.04868915677070618, "learning_rate": 0.01, "loss": 1.9635, "step": 61734 }, { "epoch": 6.339802834257548, "grad_norm": 0.03802793473005295, "learning_rate": 0.01, "loss": 1.9711, "step": 61737 }, { "epoch": 6.340110905730129, "grad_norm": 0.040892910212278366, "learning_rate": 0.01, "loss": 1.9691, "step": 61740 }, { "epoch": 6.340418977202711, "grad_norm": 0.057897284626960754, "learning_rate": 0.01, "loss": 1.9802, "step": 61743 }, { "epoch": 6.3407270486752925, "grad_norm": 0.034404922276735306, "learning_rate": 0.01, "loss": 1.9694, "step": 61746 }, { "epoch": 6.3410351201478745, "grad_norm": 0.05493491142988205, "learning_rate": 0.01, "loss": 1.9567, "step": 61749 }, { "epoch": 6.341343191620456, "grad_norm": 0.09743682295084, "learning_rate": 0.01, "loss": 1.9753, "step": 61752 }, { "epoch": 6.341651263093038, "grad_norm": 0.06471186131238937, "learning_rate": 0.01, "loss": 1.9743, "step": 61755 }, { "epoch": 6.341959334565619, "grad_norm": 0.07594270259141922, "learning_rate": 0.01, "loss": 2.0016, "step": 61758 }, { "epoch": 6.342267406038201, "grad_norm": 0.05741607025265694, "learning_rate": 0.01, "loss": 1.9866, "step": 61761 }, { "epoch": 6.342575477510782, "grad_norm": 0.07117846608161926, "learning_rate": 0.01, "loss": 1.9908, "step": 61764 }, { "epoch": 6.342883548983364, "grad_norm": 0.08182870596647263, "learning_rate": 0.01, "loss": 1.9964, "step": 61767 }, { "epoch": 6.343191620455945, "grad_norm": 0.06772911548614502, "learning_rate": 0.01, "loss": 2.0039, "step": 61770 }, { "epoch": 6.343499691928527, "grad_norm": 0.04902360588312149, "learning_rate": 0.01, "loss": 1.9784, "step": 61773 }, { "epoch": 6.343807763401109, "grad_norm": 0.12090668827295303, "learning_rate": 0.01, "loss": 1.9589, "step": 61776 }, { "epoch": 6.344115834873691, "grad_norm": 0.04338901489973068, "learning_rate": 0.01, "loss": 1.9793, "step": 61779 }, { "epoch": 6.344423906346273, "grad_norm": 0.07592718303203583, "learning_rate": 0.01, "loss": 1.9873, "step": 61782 }, { "epoch": 6.344731977818854, "grad_norm": 0.0540313795208931, "learning_rate": 0.01, "loss": 1.9724, "step": 61785 }, { "epoch": 6.345040049291436, "grad_norm": 0.10975626856088638, "learning_rate": 0.01, "loss": 1.9752, "step": 61788 }, { "epoch": 6.345348120764017, "grad_norm": 0.1409357637166977, "learning_rate": 0.01, "loss": 1.9704, "step": 61791 }, { "epoch": 6.345656192236599, "grad_norm": 0.10727324336767197, "learning_rate": 0.01, "loss": 1.9466, "step": 61794 }, { "epoch": 6.34596426370918, "grad_norm": 0.08473125845193863, "learning_rate": 0.01, "loss": 1.9707, "step": 61797 }, { "epoch": 6.346272335181762, "grad_norm": 0.06947775185108185, "learning_rate": 0.01, "loss": 1.979, "step": 61800 }, { "epoch": 6.346580406654343, "grad_norm": 0.06887286901473999, "learning_rate": 0.01, "loss": 1.979, "step": 61803 }, { "epoch": 6.3468884781269255, "grad_norm": 0.08204744011163712, "learning_rate": 0.01, "loss": 1.9594, "step": 61806 }, { "epoch": 6.3471965495995075, "grad_norm": 0.06321869790554047, "learning_rate": 0.01, "loss": 1.9524, "step": 61809 }, { "epoch": 6.347504621072089, "grad_norm": 0.06997507065534592, "learning_rate": 0.01, "loss": 1.9743, "step": 61812 }, { "epoch": 6.347812692544671, "grad_norm": 0.04468904808163643, "learning_rate": 0.01, "loss": 1.9758, "step": 61815 }, { "epoch": 6.348120764017252, "grad_norm": 0.07359863072633743, "learning_rate": 0.01, "loss": 1.9894, "step": 61818 }, { "epoch": 6.348428835489834, "grad_norm": 0.08013535290956497, "learning_rate": 0.01, "loss": 1.9686, "step": 61821 }, { "epoch": 6.348736906962415, "grad_norm": 0.08246944099664688, "learning_rate": 0.01, "loss": 1.9988, "step": 61824 }, { "epoch": 6.349044978434997, "grad_norm": 0.05869217962026596, "learning_rate": 0.01, "loss": 1.9918, "step": 61827 }, { "epoch": 6.349353049907578, "grad_norm": 0.04672044888138771, "learning_rate": 0.01, "loss": 1.9645, "step": 61830 }, { "epoch": 6.34966112138016, "grad_norm": 0.03837782144546509, "learning_rate": 0.01, "loss": 1.9654, "step": 61833 }, { "epoch": 6.3499691928527415, "grad_norm": 0.04574459418654442, "learning_rate": 0.01, "loss": 1.9593, "step": 61836 }, { "epoch": 6.350277264325324, "grad_norm": 0.03845152631402016, "learning_rate": 0.01, "loss": 1.9607, "step": 61839 }, { "epoch": 6.350585335797905, "grad_norm": 0.03869105130434036, "learning_rate": 0.01, "loss": 1.9727, "step": 61842 }, { "epoch": 6.350893407270487, "grad_norm": 0.08753649145364761, "learning_rate": 0.01, "loss": 2.0112, "step": 61845 }, { "epoch": 6.351201478743068, "grad_norm": 0.05175444483757019, "learning_rate": 0.01, "loss": 1.9668, "step": 61848 }, { "epoch": 6.35150955021565, "grad_norm": 0.08007702976465225, "learning_rate": 0.01, "loss": 1.9545, "step": 61851 }, { "epoch": 6.351817621688232, "grad_norm": 0.08175135403871536, "learning_rate": 0.01, "loss": 1.9735, "step": 61854 }, { "epoch": 6.352125693160813, "grad_norm": 0.042124420404434204, "learning_rate": 0.01, "loss": 1.9739, "step": 61857 }, { "epoch": 6.352433764633395, "grad_norm": 0.030145036056637764, "learning_rate": 0.01, "loss": 1.9872, "step": 61860 }, { "epoch": 6.352741836105976, "grad_norm": 0.05578388646245003, "learning_rate": 0.01, "loss": 1.9838, "step": 61863 }, { "epoch": 6.3530499075785585, "grad_norm": 0.06224137172102928, "learning_rate": 0.01, "loss": 1.9986, "step": 61866 }, { "epoch": 6.35335797905114, "grad_norm": 0.03723108768463135, "learning_rate": 0.01, "loss": 1.9953, "step": 61869 }, { "epoch": 6.353666050523722, "grad_norm": 0.05681099742650986, "learning_rate": 0.01, "loss": 1.9688, "step": 61872 }, { "epoch": 6.353974121996303, "grad_norm": 0.06745577603578568, "learning_rate": 0.01, "loss": 1.9823, "step": 61875 }, { "epoch": 6.354282193468885, "grad_norm": 0.17442718148231506, "learning_rate": 0.01, "loss": 1.9928, "step": 61878 }, { "epoch": 6.354590264941466, "grad_norm": 0.07263945788145065, "learning_rate": 0.01, "loss": 1.9649, "step": 61881 }, { "epoch": 6.354898336414048, "grad_norm": 0.047694019973278046, "learning_rate": 0.01, "loss": 1.9744, "step": 61884 }, { "epoch": 6.35520640788663, "grad_norm": 0.03988689184188843, "learning_rate": 0.01, "loss": 1.9882, "step": 61887 }, { "epoch": 6.355514479359211, "grad_norm": 0.06448500603437424, "learning_rate": 0.01, "loss": 1.9847, "step": 61890 }, { "epoch": 6.355822550831793, "grad_norm": 0.09949947893619537, "learning_rate": 0.01, "loss": 1.9953, "step": 61893 }, { "epoch": 6.3561306223043745, "grad_norm": 0.04593118652701378, "learning_rate": 0.01, "loss": 1.9963, "step": 61896 }, { "epoch": 6.356438693776957, "grad_norm": 0.05074120685458183, "learning_rate": 0.01, "loss": 1.9895, "step": 61899 }, { "epoch": 6.356746765249538, "grad_norm": 0.044762320816516876, "learning_rate": 0.01, "loss": 1.9759, "step": 61902 }, { "epoch": 6.35705483672212, "grad_norm": 0.04705975204706192, "learning_rate": 0.01, "loss": 1.9911, "step": 61905 }, { "epoch": 6.357362908194701, "grad_norm": 0.03957728296518326, "learning_rate": 0.01, "loss": 1.9721, "step": 61908 }, { "epoch": 6.357670979667283, "grad_norm": 0.050351545214653015, "learning_rate": 0.01, "loss": 1.9772, "step": 61911 }, { "epoch": 6.357979051139864, "grad_norm": 0.05305488035082817, "learning_rate": 0.01, "loss": 1.9602, "step": 61914 }, { "epoch": 6.358287122612446, "grad_norm": 0.03815659508109093, "learning_rate": 0.01, "loss": 1.9816, "step": 61917 }, { "epoch": 6.358595194085027, "grad_norm": 0.09699538350105286, "learning_rate": 0.01, "loss": 1.9716, "step": 61920 }, { "epoch": 6.358903265557609, "grad_norm": 0.13865162432193756, "learning_rate": 0.01, "loss": 2.0001, "step": 61923 }, { "epoch": 6.359211337030191, "grad_norm": 0.0713433250784874, "learning_rate": 0.01, "loss": 1.9895, "step": 61926 }, { "epoch": 6.359519408502773, "grad_norm": 0.06301635503768921, "learning_rate": 0.01, "loss": 1.9614, "step": 61929 }, { "epoch": 6.359827479975355, "grad_norm": 0.06763561815023422, "learning_rate": 0.01, "loss": 2.0041, "step": 61932 }, { "epoch": 6.360135551447936, "grad_norm": 0.051800407469272614, "learning_rate": 0.01, "loss": 1.9735, "step": 61935 }, { "epoch": 6.360443622920518, "grad_norm": 0.07054926455020905, "learning_rate": 0.01, "loss": 1.9721, "step": 61938 }, { "epoch": 6.360751694393099, "grad_norm": 0.09424702078104019, "learning_rate": 0.01, "loss": 1.9971, "step": 61941 }, { "epoch": 6.361059765865681, "grad_norm": 0.06715767085552216, "learning_rate": 0.01, "loss": 1.9801, "step": 61944 }, { "epoch": 6.361367837338262, "grad_norm": 0.052323173731565475, "learning_rate": 0.01, "loss": 1.9941, "step": 61947 }, { "epoch": 6.361675908810844, "grad_norm": 0.04951905459165573, "learning_rate": 0.01, "loss": 1.9737, "step": 61950 }, { "epoch": 6.3619839802834255, "grad_norm": 0.056527990847826004, "learning_rate": 0.01, "loss": 1.9546, "step": 61953 }, { "epoch": 6.3622920517560075, "grad_norm": 0.04317193478345871, "learning_rate": 0.01, "loss": 1.9747, "step": 61956 }, { "epoch": 6.362600123228589, "grad_norm": 0.07142667472362518, "learning_rate": 0.01, "loss": 1.9552, "step": 61959 }, { "epoch": 6.362908194701171, "grad_norm": 0.087591253221035, "learning_rate": 0.01, "loss": 1.9825, "step": 61962 }, { "epoch": 6.363216266173752, "grad_norm": 0.044100482016801834, "learning_rate": 0.01, "loss": 1.9708, "step": 61965 }, { "epoch": 6.363524337646334, "grad_norm": 0.05616932362318039, "learning_rate": 0.01, "loss": 1.9685, "step": 61968 }, { "epoch": 6.363832409118915, "grad_norm": 0.05864831432700157, "learning_rate": 0.01, "loss": 1.9846, "step": 61971 }, { "epoch": 6.364140480591497, "grad_norm": 0.04249054566025734, "learning_rate": 0.01, "loss": 1.992, "step": 61974 }, { "epoch": 6.364448552064079, "grad_norm": 0.047203633934259415, "learning_rate": 0.01, "loss": 1.9739, "step": 61977 }, { "epoch": 6.36475662353666, "grad_norm": 0.0450938381254673, "learning_rate": 0.01, "loss": 1.9829, "step": 61980 }, { "epoch": 6.365064695009242, "grad_norm": 0.13708913326263428, "learning_rate": 0.01, "loss": 1.97, "step": 61983 }, { "epoch": 6.365372766481824, "grad_norm": 0.0903947725892067, "learning_rate": 0.01, "loss": 1.9938, "step": 61986 }, { "epoch": 6.365680837954406, "grad_norm": 0.039598993957042694, "learning_rate": 0.01, "loss": 1.9699, "step": 61989 }, { "epoch": 6.365988909426987, "grad_norm": 0.06196282058954239, "learning_rate": 0.01, "loss": 1.9819, "step": 61992 }, { "epoch": 6.366296980899569, "grad_norm": 0.058830760419368744, "learning_rate": 0.01, "loss": 1.9776, "step": 61995 }, { "epoch": 6.36660505237215, "grad_norm": 0.03394271805882454, "learning_rate": 0.01, "loss": 1.9957, "step": 61998 }, { "epoch": 6.366913123844732, "grad_norm": 0.11069590598344803, "learning_rate": 0.01, "loss": 2.0019, "step": 62001 }, { "epoch": 6.367221195317313, "grad_norm": 0.03749784827232361, "learning_rate": 0.01, "loss": 1.9631, "step": 62004 }, { "epoch": 6.367529266789895, "grad_norm": 0.06352918595075607, "learning_rate": 0.01, "loss": 1.9842, "step": 62007 }, { "epoch": 6.367837338262477, "grad_norm": 0.037357673048973083, "learning_rate": 0.01, "loss": 1.9839, "step": 62010 }, { "epoch": 6.3681454097350585, "grad_norm": 0.09099667519330978, "learning_rate": 0.01, "loss": 1.9692, "step": 62013 }, { "epoch": 6.3684534812076405, "grad_norm": 0.054853178560733795, "learning_rate": 0.01, "loss": 2.0031, "step": 62016 }, { "epoch": 6.368761552680222, "grad_norm": 0.04815756157040596, "learning_rate": 0.01, "loss": 1.9458, "step": 62019 }, { "epoch": 6.369069624152804, "grad_norm": 0.12704138457775116, "learning_rate": 0.01, "loss": 1.9922, "step": 62022 }, { "epoch": 6.369377695625385, "grad_norm": 0.1324865221977234, "learning_rate": 0.01, "loss": 1.9736, "step": 62025 }, { "epoch": 6.369685767097967, "grad_norm": 0.10938073694705963, "learning_rate": 0.01, "loss": 1.971, "step": 62028 }, { "epoch": 6.369993838570548, "grad_norm": 0.055503327399492264, "learning_rate": 0.01, "loss": 1.9997, "step": 62031 }, { "epoch": 6.37030191004313, "grad_norm": 0.049480196088552475, "learning_rate": 0.01, "loss": 1.9935, "step": 62034 }, { "epoch": 6.370609981515711, "grad_norm": 0.04294247552752495, "learning_rate": 0.01, "loss": 1.9979, "step": 62037 }, { "epoch": 6.370918052988293, "grad_norm": 0.03694196790456772, "learning_rate": 0.01, "loss": 1.9902, "step": 62040 }, { "epoch": 6.3712261244608746, "grad_norm": 0.08281165361404419, "learning_rate": 0.01, "loss": 2.0044, "step": 62043 }, { "epoch": 6.371534195933457, "grad_norm": 0.05012732744216919, "learning_rate": 0.01, "loss": 1.9971, "step": 62046 }, { "epoch": 6.371842267406038, "grad_norm": 0.06646348536014557, "learning_rate": 0.01, "loss": 1.9746, "step": 62049 }, { "epoch": 6.37215033887862, "grad_norm": 0.046320248395204544, "learning_rate": 0.01, "loss": 1.9818, "step": 62052 }, { "epoch": 6.372458410351202, "grad_norm": 0.13739866018295288, "learning_rate": 0.01, "loss": 1.9592, "step": 62055 }, { "epoch": 6.372766481823783, "grad_norm": 0.10860442370176315, "learning_rate": 0.01, "loss": 1.9933, "step": 62058 }, { "epoch": 6.373074553296365, "grad_norm": 0.1861836165189743, "learning_rate": 0.01, "loss": 1.9853, "step": 62061 }, { "epoch": 6.373382624768946, "grad_norm": 0.07088860869407654, "learning_rate": 0.01, "loss": 1.9816, "step": 62064 }, { "epoch": 6.373690696241528, "grad_norm": 0.055799081921577454, "learning_rate": 0.01, "loss": 1.9964, "step": 62067 }, { "epoch": 6.3739987677141094, "grad_norm": 0.08463507890701294, "learning_rate": 0.01, "loss": 1.988, "step": 62070 }, { "epoch": 6.3743068391866915, "grad_norm": 0.07785413414239883, "learning_rate": 0.01, "loss": 1.9594, "step": 62073 }, { "epoch": 6.374614910659273, "grad_norm": 0.042249925434589386, "learning_rate": 0.01, "loss": 1.9833, "step": 62076 }, { "epoch": 6.374922982131855, "grad_norm": 0.08121603727340698, "learning_rate": 0.01, "loss": 1.9567, "step": 62079 }, { "epoch": 6.375231053604436, "grad_norm": 0.08757340908050537, "learning_rate": 0.01, "loss": 2.0097, "step": 62082 }, { "epoch": 6.375539125077018, "grad_norm": 0.11189188808202744, "learning_rate": 0.01, "loss": 1.9862, "step": 62085 }, { "epoch": 6.3758471965496, "grad_norm": 0.04559013247489929, "learning_rate": 0.01, "loss": 1.9686, "step": 62088 }, { "epoch": 6.376155268022181, "grad_norm": 0.044948089867830276, "learning_rate": 0.01, "loss": 2.0105, "step": 62091 }, { "epoch": 6.376463339494763, "grad_norm": 0.030356265604496002, "learning_rate": 0.01, "loss": 1.9504, "step": 62094 }, { "epoch": 6.376771410967344, "grad_norm": 0.12627916038036346, "learning_rate": 0.01, "loss": 1.9999, "step": 62097 }, { "epoch": 6.377079482439926, "grad_norm": 0.046097807586193085, "learning_rate": 0.01, "loss": 1.968, "step": 62100 }, { "epoch": 6.377387553912508, "grad_norm": 0.057985857129096985, "learning_rate": 0.01, "loss": 2.0091, "step": 62103 }, { "epoch": 6.37769562538509, "grad_norm": 0.07194691151380539, "learning_rate": 0.01, "loss": 1.9823, "step": 62106 }, { "epoch": 6.378003696857671, "grad_norm": 0.07052842527627945, "learning_rate": 0.01, "loss": 1.9763, "step": 62109 }, { "epoch": 6.378311768330253, "grad_norm": 0.0420130155980587, "learning_rate": 0.01, "loss": 1.9822, "step": 62112 }, { "epoch": 6.378619839802834, "grad_norm": 0.08693109452724457, "learning_rate": 0.01, "loss": 1.9823, "step": 62115 }, { "epoch": 6.378927911275416, "grad_norm": 0.05671975761651993, "learning_rate": 0.01, "loss": 1.9806, "step": 62118 }, { "epoch": 6.379235982747997, "grad_norm": 0.04355829954147339, "learning_rate": 0.01, "loss": 1.9805, "step": 62121 }, { "epoch": 6.379544054220579, "grad_norm": 0.045782145112752914, "learning_rate": 0.01, "loss": 1.9765, "step": 62124 }, { "epoch": 6.37985212569316, "grad_norm": 0.07497083395719528, "learning_rate": 0.01, "loss": 1.994, "step": 62127 }, { "epoch": 6.3801601971657425, "grad_norm": 0.067107655107975, "learning_rate": 0.01, "loss": 1.9572, "step": 62130 }, { "epoch": 6.3804682686383245, "grad_norm": 0.10962393879890442, "learning_rate": 0.01, "loss": 1.9764, "step": 62133 }, { "epoch": 6.380776340110906, "grad_norm": 0.128241628408432, "learning_rate": 0.01, "loss": 1.96, "step": 62136 }, { "epoch": 6.381084411583488, "grad_norm": 0.03978749364614487, "learning_rate": 0.01, "loss": 1.9807, "step": 62139 }, { "epoch": 6.381392483056069, "grad_norm": 0.0986032783985138, "learning_rate": 0.01, "loss": 1.9917, "step": 62142 }, { "epoch": 6.381700554528651, "grad_norm": 0.05384555831551552, "learning_rate": 0.01, "loss": 1.9609, "step": 62145 }, { "epoch": 6.382008626001232, "grad_norm": 0.08981155604124069, "learning_rate": 0.01, "loss": 1.9911, "step": 62148 }, { "epoch": 6.382316697473814, "grad_norm": 0.14535114169120789, "learning_rate": 0.01, "loss": 1.9794, "step": 62151 }, { "epoch": 6.382624768946395, "grad_norm": 0.11009112000465393, "learning_rate": 0.01, "loss": 1.9863, "step": 62154 }, { "epoch": 6.382932840418977, "grad_norm": 0.07238960266113281, "learning_rate": 0.01, "loss": 1.9858, "step": 62157 }, { "epoch": 6.3832409118915585, "grad_norm": 0.0916430875658989, "learning_rate": 0.01, "loss": 1.9857, "step": 62160 }, { "epoch": 6.383548983364141, "grad_norm": 0.0457562655210495, "learning_rate": 0.01, "loss": 1.979, "step": 62163 }, { "epoch": 6.383857054836722, "grad_norm": 0.09100457280874252, "learning_rate": 0.01, "loss": 1.9816, "step": 62166 }, { "epoch": 6.384165126309304, "grad_norm": 0.06494874507188797, "learning_rate": 0.01, "loss": 1.9843, "step": 62169 }, { "epoch": 6.384473197781885, "grad_norm": 0.06534988433122635, "learning_rate": 0.01, "loss": 1.9821, "step": 62172 }, { "epoch": 6.384781269254467, "grad_norm": 0.04221831634640694, "learning_rate": 0.01, "loss": 1.9555, "step": 62175 }, { "epoch": 6.385089340727049, "grad_norm": 0.1350170373916626, "learning_rate": 0.01, "loss": 1.9788, "step": 62178 }, { "epoch": 6.38539741219963, "grad_norm": 0.10296313464641571, "learning_rate": 0.01, "loss": 1.9786, "step": 62181 }, { "epoch": 6.385705483672212, "grad_norm": 0.052958227694034576, "learning_rate": 0.01, "loss": 1.9942, "step": 62184 }, { "epoch": 6.386013555144793, "grad_norm": 0.046673450618982315, "learning_rate": 0.01, "loss": 1.9918, "step": 62187 }, { "epoch": 6.3863216266173755, "grad_norm": 0.052400704473257065, "learning_rate": 0.01, "loss": 1.9459, "step": 62190 }, { "epoch": 6.386629698089957, "grad_norm": 0.05483454838395119, "learning_rate": 0.01, "loss": 1.9565, "step": 62193 }, { "epoch": 6.386937769562539, "grad_norm": 0.04824971407651901, "learning_rate": 0.01, "loss": 1.9547, "step": 62196 }, { "epoch": 6.38724584103512, "grad_norm": 0.04102877154946327, "learning_rate": 0.01, "loss": 1.9761, "step": 62199 }, { "epoch": 6.387553912507702, "grad_norm": 0.052934035658836365, "learning_rate": 0.01, "loss": 1.9748, "step": 62202 }, { "epoch": 6.387861983980283, "grad_norm": 0.13166899979114532, "learning_rate": 0.01, "loss": 2.0194, "step": 62205 }, { "epoch": 6.388170055452865, "grad_norm": 0.06457889080047607, "learning_rate": 0.01, "loss": 1.9734, "step": 62208 }, { "epoch": 6.388478126925447, "grad_norm": 0.061608802527189255, "learning_rate": 0.01, "loss": 1.9968, "step": 62211 }, { "epoch": 6.388786198398028, "grad_norm": 0.03722333163022995, "learning_rate": 0.01, "loss": 1.9731, "step": 62214 }, { "epoch": 6.38909426987061, "grad_norm": 0.03712205961346626, "learning_rate": 0.01, "loss": 1.9694, "step": 62217 }, { "epoch": 6.3894023413431915, "grad_norm": 0.10517171770334244, "learning_rate": 0.01, "loss": 1.9878, "step": 62220 }, { "epoch": 6.389710412815774, "grad_norm": 0.07701227068901062, "learning_rate": 0.01, "loss": 1.9924, "step": 62223 }, { "epoch": 6.390018484288355, "grad_norm": 0.13547784090042114, "learning_rate": 0.01, "loss": 1.9829, "step": 62226 }, { "epoch": 6.390326555760937, "grad_norm": 0.11633336544036865, "learning_rate": 0.01, "loss": 2.0137, "step": 62229 }, { "epoch": 6.390634627233518, "grad_norm": 0.09636779129505157, "learning_rate": 0.01, "loss": 1.9763, "step": 62232 }, { "epoch": 6.3909426987061, "grad_norm": 0.08367501944303513, "learning_rate": 0.01, "loss": 1.9637, "step": 62235 }, { "epoch": 6.391250770178681, "grad_norm": 0.06181328743696213, "learning_rate": 0.01, "loss": 1.9545, "step": 62238 }, { "epoch": 6.391558841651263, "grad_norm": 0.08050753176212311, "learning_rate": 0.01, "loss": 2.0016, "step": 62241 }, { "epoch": 6.391866913123844, "grad_norm": 0.04931900277733803, "learning_rate": 0.01, "loss": 1.9901, "step": 62244 }, { "epoch": 6.392174984596426, "grad_norm": 0.050407491624355316, "learning_rate": 0.01, "loss": 2.001, "step": 62247 }, { "epoch": 6.392483056069008, "grad_norm": 0.03501970320940018, "learning_rate": 0.01, "loss": 1.9427, "step": 62250 }, { "epoch": 6.39279112754159, "grad_norm": 0.04515289515256882, "learning_rate": 0.01, "loss": 1.9769, "step": 62253 }, { "epoch": 6.393099199014172, "grad_norm": 0.07897801697254181, "learning_rate": 0.01, "loss": 1.9967, "step": 62256 }, { "epoch": 6.393407270486753, "grad_norm": 0.09098079800605774, "learning_rate": 0.01, "loss": 1.9675, "step": 62259 }, { "epoch": 6.393715341959335, "grad_norm": 0.046846143901348114, "learning_rate": 0.01, "loss": 1.9782, "step": 62262 }, { "epoch": 6.394023413431916, "grad_norm": 0.08435478061437607, "learning_rate": 0.01, "loss": 2.0053, "step": 62265 }, { "epoch": 6.394331484904498, "grad_norm": 0.04902444779872894, "learning_rate": 0.01, "loss": 2.0083, "step": 62268 }, { "epoch": 6.394639556377079, "grad_norm": 0.053175996989011765, "learning_rate": 0.01, "loss": 1.9669, "step": 62271 }, { "epoch": 6.394947627849661, "grad_norm": 0.04319942370057106, "learning_rate": 0.01, "loss": 1.957, "step": 62274 }, { "epoch": 6.3952556993222425, "grad_norm": 0.056976351886987686, "learning_rate": 0.01, "loss": 1.979, "step": 62277 }, { "epoch": 6.3955637707948245, "grad_norm": 0.12992046773433685, "learning_rate": 0.01, "loss": 1.9898, "step": 62280 }, { "epoch": 6.395871842267406, "grad_norm": 0.0788559541106224, "learning_rate": 0.01, "loss": 1.9768, "step": 62283 }, { "epoch": 6.396179913739988, "grad_norm": 0.039425671100616455, "learning_rate": 0.01, "loss": 1.9658, "step": 62286 }, { "epoch": 6.39648798521257, "grad_norm": 0.0928255170583725, "learning_rate": 0.01, "loss": 1.976, "step": 62289 }, { "epoch": 6.396796056685151, "grad_norm": 0.06437569856643677, "learning_rate": 0.01, "loss": 2.011, "step": 62292 }, { "epoch": 6.397104128157733, "grad_norm": 0.05675747990608215, "learning_rate": 0.01, "loss": 1.9789, "step": 62295 }, { "epoch": 6.397412199630314, "grad_norm": 0.06161409988999367, "learning_rate": 0.01, "loss": 1.9933, "step": 62298 }, { "epoch": 6.397720271102896, "grad_norm": 0.035643287003040314, "learning_rate": 0.01, "loss": 1.9875, "step": 62301 }, { "epoch": 6.398028342575477, "grad_norm": 0.06932775676250458, "learning_rate": 0.01, "loss": 2.0093, "step": 62304 }, { "epoch": 6.398336414048059, "grad_norm": 0.1191667765378952, "learning_rate": 0.01, "loss": 1.9782, "step": 62307 }, { "epoch": 6.398644485520641, "grad_norm": 0.06702014803886414, "learning_rate": 0.01, "loss": 1.9694, "step": 62310 }, { "epoch": 6.398952556993223, "grad_norm": 0.045102011412382126, "learning_rate": 0.01, "loss": 1.9784, "step": 62313 }, { "epoch": 6.399260628465804, "grad_norm": 0.042483970522880554, "learning_rate": 0.01, "loss": 1.9879, "step": 62316 }, { "epoch": 6.399568699938386, "grad_norm": 0.035098928958177567, "learning_rate": 0.01, "loss": 1.9512, "step": 62319 }, { "epoch": 6.399876771410967, "grad_norm": 0.05023285374045372, "learning_rate": 0.01, "loss": 1.9571, "step": 62322 }, { "epoch": 6.400184842883549, "grad_norm": 0.14267301559448242, "learning_rate": 0.01, "loss": 1.9759, "step": 62325 }, { "epoch": 6.40049291435613, "grad_norm": 0.12871447205543518, "learning_rate": 0.01, "loss": 1.9815, "step": 62328 }, { "epoch": 6.400800985828712, "grad_norm": 0.18785615265369415, "learning_rate": 0.01, "loss": 1.9662, "step": 62331 }, { "epoch": 6.401109057301294, "grad_norm": 0.14859537780284882, "learning_rate": 0.01, "loss": 1.9893, "step": 62334 }, { "epoch": 6.4014171287738755, "grad_norm": 0.10787920653820038, "learning_rate": 0.01, "loss": 1.979, "step": 62337 }, { "epoch": 6.4017252002464575, "grad_norm": 0.0459236241877079, "learning_rate": 0.01, "loss": 1.9536, "step": 62340 }, { "epoch": 6.402033271719039, "grad_norm": 0.04476577043533325, "learning_rate": 0.01, "loss": 1.9731, "step": 62343 }, { "epoch": 6.402341343191621, "grad_norm": 0.04856594651937485, "learning_rate": 0.01, "loss": 1.9599, "step": 62346 }, { "epoch": 6.402649414664202, "grad_norm": 0.047260161489248276, "learning_rate": 0.01, "loss": 1.9652, "step": 62349 }, { "epoch": 6.402957486136784, "grad_norm": 0.05097677931189537, "learning_rate": 0.01, "loss": 1.9836, "step": 62352 }, { "epoch": 6.403265557609365, "grad_norm": 0.10697360336780548, "learning_rate": 0.01, "loss": 1.9619, "step": 62355 }, { "epoch": 6.403573629081947, "grad_norm": 0.11435877531766891, "learning_rate": 0.01, "loss": 1.9935, "step": 62358 }, { "epoch": 6.403881700554528, "grad_norm": 0.036153070628643036, "learning_rate": 0.01, "loss": 1.9515, "step": 62361 }, { "epoch": 6.40418977202711, "grad_norm": 0.07002195715904236, "learning_rate": 0.01, "loss": 1.9677, "step": 62364 }, { "epoch": 6.4044978434996915, "grad_norm": 0.05492027476429939, "learning_rate": 0.01, "loss": 1.9763, "step": 62367 }, { "epoch": 6.404805914972274, "grad_norm": 0.06763515621423721, "learning_rate": 0.01, "loss": 1.97, "step": 62370 }, { "epoch": 6.405113986444855, "grad_norm": 0.03954880312085152, "learning_rate": 0.01, "loss": 1.9756, "step": 62373 }, { "epoch": 6.405422057917437, "grad_norm": 0.06005239859223366, "learning_rate": 0.01, "loss": 1.9666, "step": 62376 }, { "epoch": 6.405730129390019, "grad_norm": 0.06014340743422508, "learning_rate": 0.01, "loss": 1.9814, "step": 62379 }, { "epoch": 6.4060382008626, "grad_norm": 0.05861683562397957, "learning_rate": 0.01, "loss": 1.9733, "step": 62382 }, { "epoch": 6.406346272335182, "grad_norm": 0.07609166949987411, "learning_rate": 0.01, "loss": 2.0015, "step": 62385 }, { "epoch": 6.406654343807763, "grad_norm": 0.08934800326824188, "learning_rate": 0.01, "loss": 1.9715, "step": 62388 }, { "epoch": 6.406962415280345, "grad_norm": 0.14556893706321716, "learning_rate": 0.01, "loss": 1.9868, "step": 62391 }, { "epoch": 6.407270486752926, "grad_norm": 0.07740515470504761, "learning_rate": 0.01, "loss": 1.9631, "step": 62394 }, { "epoch": 6.4075785582255085, "grad_norm": 0.04366164281964302, "learning_rate": 0.01, "loss": 1.9745, "step": 62397 }, { "epoch": 6.40788662969809, "grad_norm": 0.04770512506365776, "learning_rate": 0.01, "loss": 1.9704, "step": 62400 }, { "epoch": 6.408194701170672, "grad_norm": 0.04657142981886864, "learning_rate": 0.01, "loss": 1.9712, "step": 62403 }, { "epoch": 6.408502772643253, "grad_norm": 0.04207182675600052, "learning_rate": 0.01, "loss": 1.9766, "step": 62406 }, { "epoch": 6.408810844115835, "grad_norm": 0.05520906671881676, "learning_rate": 0.01, "loss": 1.9899, "step": 62409 }, { "epoch": 6.409118915588417, "grad_norm": 0.07524003833532333, "learning_rate": 0.01, "loss": 2.0114, "step": 62412 }, { "epoch": 6.409426987060998, "grad_norm": 0.04558379203081131, "learning_rate": 0.01, "loss": 1.99, "step": 62415 }, { "epoch": 6.40973505853358, "grad_norm": 0.09892844408750534, "learning_rate": 0.01, "loss": 1.9771, "step": 62418 }, { "epoch": 6.410043130006161, "grad_norm": 0.07591421902179718, "learning_rate": 0.01, "loss": 1.9601, "step": 62421 }, { "epoch": 6.410351201478743, "grad_norm": 0.09021826833486557, "learning_rate": 0.01, "loss": 2.0068, "step": 62424 }, { "epoch": 6.4106592729513245, "grad_norm": 0.043195899575948715, "learning_rate": 0.01, "loss": 1.9557, "step": 62427 }, { "epoch": 6.410967344423907, "grad_norm": 0.10521116852760315, "learning_rate": 0.01, "loss": 1.9832, "step": 62430 }, { "epoch": 6.411275415896488, "grad_norm": 0.061183419078588486, "learning_rate": 0.01, "loss": 2.0113, "step": 62433 }, { "epoch": 6.41158348736907, "grad_norm": 0.0777936577796936, "learning_rate": 0.01, "loss": 2.0047, "step": 62436 }, { "epoch": 6.411891558841651, "grad_norm": 0.07783685624599457, "learning_rate": 0.01, "loss": 1.9624, "step": 62439 }, { "epoch": 6.412199630314233, "grad_norm": 0.10137835890054703, "learning_rate": 0.01, "loss": 1.9895, "step": 62442 }, { "epoch": 6.412507701786814, "grad_norm": 0.07583697885274887, "learning_rate": 0.01, "loss": 1.9631, "step": 62445 }, { "epoch": 6.412815773259396, "grad_norm": 0.05925040692090988, "learning_rate": 0.01, "loss": 1.9751, "step": 62448 }, { "epoch": 6.413123844731977, "grad_norm": 0.07420961558818817, "learning_rate": 0.01, "loss": 1.9927, "step": 62451 }, { "epoch": 6.413431916204559, "grad_norm": 0.07555720955133438, "learning_rate": 0.01, "loss": 1.9884, "step": 62454 }, { "epoch": 6.4137399876771415, "grad_norm": 0.05991441011428833, "learning_rate": 0.01, "loss": 1.9469, "step": 62457 }, { "epoch": 6.414048059149723, "grad_norm": 0.04959188401699066, "learning_rate": 0.01, "loss": 1.9636, "step": 62460 }, { "epoch": 6.414356130622305, "grad_norm": 0.09466046094894409, "learning_rate": 0.01, "loss": 1.9829, "step": 62463 }, { "epoch": 6.414664202094886, "grad_norm": 0.08828117698431015, "learning_rate": 0.01, "loss": 1.9817, "step": 62466 }, { "epoch": 6.414972273567468, "grad_norm": 0.0796947106719017, "learning_rate": 0.01, "loss": 1.9724, "step": 62469 }, { "epoch": 6.415280345040049, "grad_norm": 0.09187500178813934, "learning_rate": 0.01, "loss": 1.9676, "step": 62472 }, { "epoch": 6.415588416512631, "grad_norm": 0.032242316752672195, "learning_rate": 0.01, "loss": 1.9851, "step": 62475 }, { "epoch": 6.415896487985212, "grad_norm": 0.08650948852300644, "learning_rate": 0.01, "loss": 1.9873, "step": 62478 }, { "epoch": 6.416204559457794, "grad_norm": 0.07760648429393768, "learning_rate": 0.01, "loss": 1.985, "step": 62481 }, { "epoch": 6.4165126309303755, "grad_norm": 0.10039196908473969, "learning_rate": 0.01, "loss": 1.9643, "step": 62484 }, { "epoch": 6.4168207024029575, "grad_norm": 0.06620651483535767, "learning_rate": 0.01, "loss": 1.984, "step": 62487 }, { "epoch": 6.41712877387554, "grad_norm": 0.07534004002809525, "learning_rate": 0.01, "loss": 1.975, "step": 62490 }, { "epoch": 6.417436845348121, "grad_norm": 0.04290451481938362, "learning_rate": 0.01, "loss": 1.9609, "step": 62493 }, { "epoch": 6.417744916820703, "grad_norm": 0.040604498237371445, "learning_rate": 0.01, "loss": 1.9901, "step": 62496 }, { "epoch": 6.418052988293284, "grad_norm": 0.04257834702730179, "learning_rate": 0.01, "loss": 1.9863, "step": 62499 }, { "epoch": 6.418361059765866, "grad_norm": 0.06738926470279694, "learning_rate": 0.01, "loss": 1.9999, "step": 62502 }, { "epoch": 6.418669131238447, "grad_norm": 0.05635688826441765, "learning_rate": 0.01, "loss": 2.0168, "step": 62505 }, { "epoch": 6.418977202711029, "grad_norm": 0.10578261315822601, "learning_rate": 0.01, "loss": 1.9704, "step": 62508 }, { "epoch": 6.41928527418361, "grad_norm": 0.08642464131116867, "learning_rate": 0.01, "loss": 1.9973, "step": 62511 }, { "epoch": 6.419593345656192, "grad_norm": 0.036675188690423965, "learning_rate": 0.01, "loss": 1.9664, "step": 62514 }, { "epoch": 6.419901417128774, "grad_norm": 0.14191675186157227, "learning_rate": 0.01, "loss": 1.9978, "step": 62517 }, { "epoch": 6.420209488601356, "grad_norm": 0.13353928923606873, "learning_rate": 0.01, "loss": 1.9822, "step": 62520 }, { "epoch": 6.420517560073937, "grad_norm": 0.06307082623243332, "learning_rate": 0.01, "loss": 1.9746, "step": 62523 }, { "epoch": 6.420825631546519, "grad_norm": 0.04877189174294472, "learning_rate": 0.01, "loss": 1.9895, "step": 62526 }, { "epoch": 6.4211337030191, "grad_norm": 0.07062195241451263, "learning_rate": 0.01, "loss": 1.9535, "step": 62529 }, { "epoch": 6.421441774491682, "grad_norm": 0.03799920529127121, "learning_rate": 0.01, "loss": 1.9847, "step": 62532 }, { "epoch": 6.421749845964264, "grad_norm": 0.0635172426700592, "learning_rate": 0.01, "loss": 1.9664, "step": 62535 }, { "epoch": 6.422057917436845, "grad_norm": 0.04074868932366371, "learning_rate": 0.01, "loss": 1.956, "step": 62538 }, { "epoch": 6.422365988909427, "grad_norm": 0.1227545514702797, "learning_rate": 0.01, "loss": 2.0093, "step": 62541 }, { "epoch": 6.4226740603820085, "grad_norm": 0.10859929770231247, "learning_rate": 0.01, "loss": 1.9823, "step": 62544 }, { "epoch": 6.4229821318545905, "grad_norm": 0.10589033365249634, "learning_rate": 0.01, "loss": 1.9719, "step": 62547 }, { "epoch": 6.423290203327172, "grad_norm": 0.1324671059846878, "learning_rate": 0.01, "loss": 1.9673, "step": 62550 }, { "epoch": 6.423598274799754, "grad_norm": 0.0601799450814724, "learning_rate": 0.01, "loss": 1.9681, "step": 62553 }, { "epoch": 6.423906346272335, "grad_norm": 0.05349354445934296, "learning_rate": 0.01, "loss": 1.9659, "step": 62556 }, { "epoch": 6.424214417744917, "grad_norm": 0.03839118406176567, "learning_rate": 0.01, "loss": 1.9703, "step": 62559 }, { "epoch": 6.424522489217498, "grad_norm": 0.035764630883932114, "learning_rate": 0.01, "loss": 1.9882, "step": 62562 }, { "epoch": 6.42483056069008, "grad_norm": 0.05545911565423012, "learning_rate": 0.01, "loss": 1.9916, "step": 62565 }, { "epoch": 6.425138632162661, "grad_norm": 0.044913556426763535, "learning_rate": 0.01, "loss": 1.9601, "step": 62568 }, { "epoch": 6.425446703635243, "grad_norm": 0.040859974920749664, "learning_rate": 0.01, "loss": 2.0019, "step": 62571 }, { "epoch": 6.425754775107825, "grad_norm": 0.04757828265428543, "learning_rate": 0.01, "loss": 1.9723, "step": 62574 }, { "epoch": 6.426062846580407, "grad_norm": 0.0439058393239975, "learning_rate": 0.01, "loss": 1.9681, "step": 62577 }, { "epoch": 6.426370918052989, "grad_norm": 0.059357352554798126, "learning_rate": 0.01, "loss": 1.9749, "step": 62580 }, { "epoch": 6.42667898952557, "grad_norm": 0.1129942387342453, "learning_rate": 0.01, "loss": 1.957, "step": 62583 }, { "epoch": 6.426987060998152, "grad_norm": 0.09056062996387482, "learning_rate": 0.01, "loss": 1.9782, "step": 62586 }, { "epoch": 6.427295132470733, "grad_norm": 0.05224711075425148, "learning_rate": 0.01, "loss": 1.9771, "step": 62589 }, { "epoch": 6.427603203943315, "grad_norm": 0.03806770220398903, "learning_rate": 0.01, "loss": 1.9736, "step": 62592 }, { "epoch": 6.427911275415896, "grad_norm": 0.045875824987888336, "learning_rate": 0.01, "loss": 1.9619, "step": 62595 }, { "epoch": 6.428219346888478, "grad_norm": 0.054107557982206345, "learning_rate": 0.01, "loss": 1.9426, "step": 62598 }, { "epoch": 6.4285274183610595, "grad_norm": 0.049330100417137146, "learning_rate": 0.01, "loss": 1.9399, "step": 62601 }, { "epoch": 6.4288354898336415, "grad_norm": 0.09636044502258301, "learning_rate": 0.01, "loss": 1.9681, "step": 62604 }, { "epoch": 6.429143561306223, "grad_norm": 0.06452757120132446, "learning_rate": 0.01, "loss": 1.9612, "step": 62607 }, { "epoch": 6.429451632778805, "grad_norm": 0.04598628729581833, "learning_rate": 0.01, "loss": 1.9745, "step": 62610 }, { "epoch": 6.429759704251387, "grad_norm": 0.06763578951358795, "learning_rate": 0.01, "loss": 1.9924, "step": 62613 }, { "epoch": 6.430067775723968, "grad_norm": 0.12232638895511627, "learning_rate": 0.01, "loss": 1.9908, "step": 62616 }, { "epoch": 6.43037584719655, "grad_norm": 0.09073688089847565, "learning_rate": 0.01, "loss": 1.9598, "step": 62619 }, { "epoch": 6.430683918669131, "grad_norm": 0.08724263310432434, "learning_rate": 0.01, "loss": 1.9629, "step": 62622 }, { "epoch": 6.430991990141713, "grad_norm": 0.11307045817375183, "learning_rate": 0.01, "loss": 1.9681, "step": 62625 }, { "epoch": 6.431300061614294, "grad_norm": 0.037230703979730606, "learning_rate": 0.01, "loss": 1.9507, "step": 62628 }, { "epoch": 6.431608133086876, "grad_norm": 0.06643498688936234, "learning_rate": 0.01, "loss": 1.9711, "step": 62631 }, { "epoch": 6.431916204559458, "grad_norm": 0.04486044496297836, "learning_rate": 0.01, "loss": 1.9715, "step": 62634 }, { "epoch": 6.43222427603204, "grad_norm": 0.03463894501328468, "learning_rate": 0.01, "loss": 2.0016, "step": 62637 }, { "epoch": 6.432532347504621, "grad_norm": 0.04060268774628639, "learning_rate": 0.01, "loss": 1.975, "step": 62640 }, { "epoch": 6.432840418977203, "grad_norm": 0.11157824844121933, "learning_rate": 0.01, "loss": 1.9548, "step": 62643 }, { "epoch": 6.433148490449784, "grad_norm": 0.05599267780780792, "learning_rate": 0.01, "loss": 1.9936, "step": 62646 }, { "epoch": 6.433456561922366, "grad_norm": 0.05528303235769272, "learning_rate": 0.01, "loss": 2.0101, "step": 62649 }, { "epoch": 6.433764633394947, "grad_norm": 0.053959596902132034, "learning_rate": 0.01, "loss": 2.0109, "step": 62652 }, { "epoch": 6.434072704867529, "grad_norm": 0.06588397920131683, "learning_rate": 0.01, "loss": 1.989, "step": 62655 }, { "epoch": 6.434380776340111, "grad_norm": 0.058823537081480026, "learning_rate": 0.01, "loss": 1.9699, "step": 62658 }, { "epoch": 6.4346888478126925, "grad_norm": 0.1689031720161438, "learning_rate": 0.01, "loss": 1.9662, "step": 62661 }, { "epoch": 6.4349969192852745, "grad_norm": 0.09005407243967056, "learning_rate": 0.01, "loss": 2.0074, "step": 62664 }, { "epoch": 6.435304990757856, "grad_norm": 0.04335148632526398, "learning_rate": 0.01, "loss": 1.976, "step": 62667 }, { "epoch": 6.435613062230438, "grad_norm": 0.04634424299001694, "learning_rate": 0.01, "loss": 1.9452, "step": 62670 }, { "epoch": 6.435921133703019, "grad_norm": 0.03891894966363907, "learning_rate": 0.01, "loss": 1.9724, "step": 62673 }, { "epoch": 6.436229205175601, "grad_norm": 0.05635913461446762, "learning_rate": 0.01, "loss": 1.9774, "step": 62676 }, { "epoch": 6.436537276648182, "grad_norm": 0.05616259574890137, "learning_rate": 0.01, "loss": 1.9613, "step": 62679 }, { "epoch": 6.436845348120764, "grad_norm": 0.1745036244392395, "learning_rate": 0.01, "loss": 1.97, "step": 62682 }, { "epoch": 6.437153419593345, "grad_norm": 0.09336113184690475, "learning_rate": 0.01, "loss": 1.9742, "step": 62685 }, { "epoch": 6.437461491065927, "grad_norm": 0.06508792191743851, "learning_rate": 0.01, "loss": 1.9913, "step": 62688 }, { "epoch": 6.437769562538509, "grad_norm": 0.08041112869977951, "learning_rate": 0.01, "loss": 1.9879, "step": 62691 }, { "epoch": 6.438077634011091, "grad_norm": 0.050097182393074036, "learning_rate": 0.01, "loss": 1.9817, "step": 62694 }, { "epoch": 6.438385705483673, "grad_norm": 0.09112852811813354, "learning_rate": 0.01, "loss": 2.0008, "step": 62697 }, { "epoch": 6.438693776956254, "grad_norm": 0.06812035292387009, "learning_rate": 0.01, "loss": 2.0099, "step": 62700 }, { "epoch": 6.439001848428836, "grad_norm": 0.04868394508957863, "learning_rate": 0.01, "loss": 1.9786, "step": 62703 }, { "epoch": 6.439309919901417, "grad_norm": 0.03731069713830948, "learning_rate": 0.01, "loss": 1.9865, "step": 62706 }, { "epoch": 6.439617991373999, "grad_norm": 0.03705144673585892, "learning_rate": 0.01, "loss": 1.9439, "step": 62709 }, { "epoch": 6.43992606284658, "grad_norm": 0.06617160141468048, "learning_rate": 0.01, "loss": 1.9641, "step": 62712 }, { "epoch": 6.440234134319162, "grad_norm": 0.09520690143108368, "learning_rate": 0.01, "loss": 1.9827, "step": 62715 }, { "epoch": 6.440542205791743, "grad_norm": 0.10066824406385422, "learning_rate": 0.01, "loss": 1.9998, "step": 62718 }, { "epoch": 6.4408502772643255, "grad_norm": 0.04232907295227051, "learning_rate": 0.01, "loss": 1.9729, "step": 62721 }, { "epoch": 6.441158348736907, "grad_norm": 0.08188414573669434, "learning_rate": 0.01, "loss": 1.9495, "step": 62724 }, { "epoch": 6.441466420209489, "grad_norm": 0.11537371575832367, "learning_rate": 0.01, "loss": 2.0005, "step": 62727 }, { "epoch": 6.44177449168207, "grad_norm": 0.03267863765358925, "learning_rate": 0.01, "loss": 1.9782, "step": 62730 }, { "epoch": 6.442082563154652, "grad_norm": 0.0837605893611908, "learning_rate": 0.01, "loss": 1.9695, "step": 62733 }, { "epoch": 6.442390634627234, "grad_norm": 0.05545349791646004, "learning_rate": 0.01, "loss": 1.9948, "step": 62736 }, { "epoch": 6.442698706099815, "grad_norm": 0.06512763351202011, "learning_rate": 0.01, "loss": 1.9974, "step": 62739 }, { "epoch": 6.443006777572397, "grad_norm": 0.051384586840867996, "learning_rate": 0.01, "loss": 1.9842, "step": 62742 }, { "epoch": 6.443314849044978, "grad_norm": 0.035269416868686676, "learning_rate": 0.01, "loss": 1.9936, "step": 62745 }, { "epoch": 6.44362292051756, "grad_norm": 0.11074446141719818, "learning_rate": 0.01, "loss": 2.0109, "step": 62748 }, { "epoch": 6.4439309919901415, "grad_norm": 0.05051399767398834, "learning_rate": 0.01, "loss": 1.9779, "step": 62751 }, { "epoch": 6.444239063462724, "grad_norm": 0.07275965064764023, "learning_rate": 0.01, "loss": 1.9984, "step": 62754 }, { "epoch": 6.444547134935305, "grad_norm": 0.07027794420719147, "learning_rate": 0.01, "loss": 1.999, "step": 62757 }, { "epoch": 6.444855206407887, "grad_norm": 0.11397121101617813, "learning_rate": 0.01, "loss": 1.9713, "step": 62760 }, { "epoch": 6.445163277880468, "grad_norm": 0.08914832770824432, "learning_rate": 0.01, "loss": 1.9851, "step": 62763 }, { "epoch": 6.44547134935305, "grad_norm": 0.1111316904425621, "learning_rate": 0.01, "loss": 1.9661, "step": 62766 }, { "epoch": 6.445779420825631, "grad_norm": 0.11033376306295395, "learning_rate": 0.01, "loss": 2.001, "step": 62769 }, { "epoch": 6.446087492298213, "grad_norm": 0.07109701633453369, "learning_rate": 0.01, "loss": 1.9886, "step": 62772 }, { "epoch": 6.446395563770794, "grad_norm": 0.0448211170732975, "learning_rate": 0.01, "loss": 1.9995, "step": 62775 }, { "epoch": 6.446703635243376, "grad_norm": 0.04059508815407753, "learning_rate": 0.01, "loss": 1.9767, "step": 62778 }, { "epoch": 6.4470117067159585, "grad_norm": 0.04148499295115471, "learning_rate": 0.01, "loss": 1.9891, "step": 62781 }, { "epoch": 6.44731977818854, "grad_norm": 0.04664915055036545, "learning_rate": 0.01, "loss": 1.969, "step": 62784 }, { "epoch": 6.447627849661122, "grad_norm": 0.03414386510848999, "learning_rate": 0.01, "loss": 1.9736, "step": 62787 }, { "epoch": 6.447935921133703, "grad_norm": 0.047360535711050034, "learning_rate": 0.01, "loss": 1.9931, "step": 62790 }, { "epoch": 6.448243992606285, "grad_norm": 0.11773015558719635, "learning_rate": 0.01, "loss": 1.9424, "step": 62793 }, { "epoch": 6.448552064078866, "grad_norm": 0.08103271573781967, "learning_rate": 0.01, "loss": 2.0067, "step": 62796 }, { "epoch": 6.448860135551448, "grad_norm": 0.07534775882959366, "learning_rate": 0.01, "loss": 1.9818, "step": 62799 }, { "epoch": 6.449168207024029, "grad_norm": 0.0421251580119133, "learning_rate": 0.01, "loss": 1.9899, "step": 62802 }, { "epoch": 6.449476278496611, "grad_norm": 0.06340447813272476, "learning_rate": 0.01, "loss": 1.9611, "step": 62805 }, { "epoch": 6.4497843499691925, "grad_norm": 0.06287892907857895, "learning_rate": 0.01, "loss": 1.9908, "step": 62808 }, { "epoch": 6.4500924214417745, "grad_norm": 0.11696802824735641, "learning_rate": 0.01, "loss": 1.9796, "step": 62811 }, { "epoch": 6.450400492914357, "grad_norm": 0.061545319855213165, "learning_rate": 0.01, "loss": 1.9963, "step": 62814 }, { "epoch": 6.450708564386938, "grad_norm": 0.04134809225797653, "learning_rate": 0.01, "loss": 1.9419, "step": 62817 }, { "epoch": 6.45101663585952, "grad_norm": 0.11149853467941284, "learning_rate": 0.01, "loss": 1.9855, "step": 62820 }, { "epoch": 6.451324707332101, "grad_norm": 0.06323488801717758, "learning_rate": 0.01, "loss": 1.9718, "step": 62823 }, { "epoch": 6.451632778804683, "grad_norm": 0.0836939662694931, "learning_rate": 0.01, "loss": 2.006, "step": 62826 }, { "epoch": 6.451940850277264, "grad_norm": 0.04251860827207565, "learning_rate": 0.01, "loss": 1.9761, "step": 62829 }, { "epoch": 6.452248921749846, "grad_norm": 0.08365978300571442, "learning_rate": 0.01, "loss": 1.977, "step": 62832 }, { "epoch": 6.452556993222427, "grad_norm": 0.07985960692167282, "learning_rate": 0.01, "loss": 2.0022, "step": 62835 }, { "epoch": 6.452865064695009, "grad_norm": 0.0690624862909317, "learning_rate": 0.01, "loss": 1.9691, "step": 62838 }, { "epoch": 6.453173136167591, "grad_norm": 0.07743382453918457, "learning_rate": 0.01, "loss": 1.9828, "step": 62841 }, { "epoch": 6.453481207640173, "grad_norm": 0.06049206852912903, "learning_rate": 0.01, "loss": 2.0078, "step": 62844 }, { "epoch": 6.453789279112754, "grad_norm": 0.049416352063417435, "learning_rate": 0.01, "loss": 1.9806, "step": 62847 }, { "epoch": 6.454097350585336, "grad_norm": 0.03889904171228409, "learning_rate": 0.01, "loss": 1.9522, "step": 62850 }, { "epoch": 6.454405422057917, "grad_norm": 0.03183984011411667, "learning_rate": 0.01, "loss": 1.9544, "step": 62853 }, { "epoch": 6.454713493530499, "grad_norm": 0.03685016930103302, "learning_rate": 0.01, "loss": 1.9953, "step": 62856 }, { "epoch": 6.455021565003081, "grad_norm": 0.05258958786725998, "learning_rate": 0.01, "loss": 1.9811, "step": 62859 }, { "epoch": 6.455329636475662, "grad_norm": 0.12214828282594681, "learning_rate": 0.01, "loss": 1.9922, "step": 62862 }, { "epoch": 6.455637707948244, "grad_norm": 0.04400373622775078, "learning_rate": 0.01, "loss": 1.9588, "step": 62865 }, { "epoch": 6.4559457794208255, "grad_norm": 0.09445548057556152, "learning_rate": 0.01, "loss": 1.9745, "step": 62868 }, { "epoch": 6.4562538508934075, "grad_norm": 0.05584699660539627, "learning_rate": 0.01, "loss": 1.9717, "step": 62871 }, { "epoch": 6.456561922365989, "grad_norm": 0.041519295424222946, "learning_rate": 0.01, "loss": 1.9585, "step": 62874 }, { "epoch": 6.456869993838571, "grad_norm": 0.08750978112220764, "learning_rate": 0.01, "loss": 1.9901, "step": 62877 }, { "epoch": 6.457178065311152, "grad_norm": 0.0746956318616867, "learning_rate": 0.01, "loss": 1.99, "step": 62880 }, { "epoch": 6.457486136783734, "grad_norm": 0.05346643924713135, "learning_rate": 0.01, "loss": 1.9846, "step": 62883 }, { "epoch": 6.457794208256315, "grad_norm": 0.033672984689474106, "learning_rate": 0.01, "loss": 1.9696, "step": 62886 }, { "epoch": 6.458102279728897, "grad_norm": 0.07131464779376984, "learning_rate": 0.01, "loss": 1.9933, "step": 62889 }, { "epoch": 6.458410351201478, "grad_norm": 0.08488958328962326, "learning_rate": 0.01, "loss": 1.9658, "step": 62892 }, { "epoch": 6.45871842267406, "grad_norm": 0.041019588708877563, "learning_rate": 0.01, "loss": 1.9843, "step": 62895 }, { "epoch": 6.459026494146642, "grad_norm": 0.07616984844207764, "learning_rate": 0.01, "loss": 1.9969, "step": 62898 }, { "epoch": 6.459334565619224, "grad_norm": 0.071281798183918, "learning_rate": 0.01, "loss": 1.9888, "step": 62901 }, { "epoch": 6.459642637091806, "grad_norm": 0.04009273648262024, "learning_rate": 0.01, "loss": 1.9522, "step": 62904 }, { "epoch": 6.459950708564387, "grad_norm": 0.059646125882864, "learning_rate": 0.01, "loss": 2.0167, "step": 62907 }, { "epoch": 6.460258780036969, "grad_norm": 0.08733568340539932, "learning_rate": 0.01, "loss": 1.9812, "step": 62910 }, { "epoch": 6.46056685150955, "grad_norm": 0.06603758037090302, "learning_rate": 0.01, "loss": 1.9814, "step": 62913 }, { "epoch": 6.460874922982132, "grad_norm": 0.036482322961091995, "learning_rate": 0.01, "loss": 1.9888, "step": 62916 }, { "epoch": 6.461182994454713, "grad_norm": 0.08736536651849747, "learning_rate": 0.01, "loss": 1.9714, "step": 62919 }, { "epoch": 6.461491065927295, "grad_norm": 0.10915714502334595, "learning_rate": 0.01, "loss": 1.9726, "step": 62922 }, { "epoch": 6.461799137399876, "grad_norm": 0.07954758405685425, "learning_rate": 0.01, "loss": 1.9964, "step": 62925 }, { "epoch": 6.4621072088724585, "grad_norm": 0.05602665990591049, "learning_rate": 0.01, "loss": 1.9987, "step": 62928 }, { "epoch": 6.46241528034504, "grad_norm": 0.09531257301568985, "learning_rate": 0.01, "loss": 1.9556, "step": 62931 }, { "epoch": 6.462723351817622, "grad_norm": 0.044863101094961166, "learning_rate": 0.01, "loss": 1.9761, "step": 62934 }, { "epoch": 6.463031423290204, "grad_norm": 0.0783008560538292, "learning_rate": 0.01, "loss": 1.9402, "step": 62937 }, { "epoch": 6.463339494762785, "grad_norm": 0.05667195841670036, "learning_rate": 0.01, "loss": 1.9911, "step": 62940 }, { "epoch": 6.463647566235367, "grad_norm": 0.13006943464279175, "learning_rate": 0.01, "loss": 2.0073, "step": 62943 }, { "epoch": 6.463955637707948, "grad_norm": 0.08932863920927048, "learning_rate": 0.01, "loss": 1.9763, "step": 62946 }, { "epoch": 6.46426370918053, "grad_norm": 0.04788200557231903, "learning_rate": 0.01, "loss": 1.9902, "step": 62949 }, { "epoch": 6.464571780653111, "grad_norm": 0.03904096782207489, "learning_rate": 0.01, "loss": 1.956, "step": 62952 }, { "epoch": 6.464879852125693, "grad_norm": 0.06443644315004349, "learning_rate": 0.01, "loss": 1.9765, "step": 62955 }, { "epoch": 6.4651879235982745, "grad_norm": 0.08434759825468063, "learning_rate": 0.01, "loss": 1.9849, "step": 62958 }, { "epoch": 6.465495995070857, "grad_norm": 0.03871188312768936, "learning_rate": 0.01, "loss": 1.9737, "step": 62961 }, { "epoch": 6.465804066543438, "grad_norm": 0.11748526990413666, "learning_rate": 0.01, "loss": 1.9857, "step": 62964 }, { "epoch": 6.46611213801602, "grad_norm": 0.05763693153858185, "learning_rate": 0.01, "loss": 1.9485, "step": 62967 }, { "epoch": 6.466420209488601, "grad_norm": 0.0419640988111496, "learning_rate": 0.01, "loss": 1.983, "step": 62970 }, { "epoch": 6.466728280961183, "grad_norm": 0.061243150383234024, "learning_rate": 0.01, "loss": 1.9936, "step": 62973 }, { "epoch": 6.467036352433764, "grad_norm": 0.10403522849082947, "learning_rate": 0.01, "loss": 1.9732, "step": 62976 }, { "epoch": 6.467344423906346, "grad_norm": 0.14000679552555084, "learning_rate": 0.01, "loss": 1.9806, "step": 62979 }, { "epoch": 6.467652495378928, "grad_norm": 0.08773007243871689, "learning_rate": 0.01, "loss": 1.989, "step": 62982 }, { "epoch": 6.467960566851509, "grad_norm": 0.04810422658920288, "learning_rate": 0.01, "loss": 1.9772, "step": 62985 }, { "epoch": 6.4682686383240915, "grad_norm": 0.05809152126312256, "learning_rate": 0.01, "loss": 1.9849, "step": 62988 }, { "epoch": 6.468576709796673, "grad_norm": 0.05598244071006775, "learning_rate": 0.01, "loss": 1.9859, "step": 62991 }, { "epoch": 6.468884781269255, "grad_norm": 0.07979332655668259, "learning_rate": 0.01, "loss": 1.9527, "step": 62994 }, { "epoch": 6.469192852741836, "grad_norm": 0.044398579746484756, "learning_rate": 0.01, "loss": 1.9995, "step": 62997 }, { "epoch": 6.469500924214418, "grad_norm": 0.04663238674402237, "learning_rate": 0.01, "loss": 1.9816, "step": 63000 }, { "epoch": 6.469808995686999, "grad_norm": 0.04528966173529625, "learning_rate": 0.01, "loss": 1.9898, "step": 63003 }, { "epoch": 6.470117067159581, "grad_norm": 0.06028036028146744, "learning_rate": 0.01, "loss": 1.9504, "step": 63006 }, { "epoch": 6.470425138632162, "grad_norm": 0.094419464468956, "learning_rate": 0.01, "loss": 1.9973, "step": 63009 }, { "epoch": 6.470733210104744, "grad_norm": 0.18547490239143372, "learning_rate": 0.01, "loss": 2.003, "step": 63012 }, { "epoch": 6.471041281577326, "grad_norm": 0.09084778279066086, "learning_rate": 0.01, "loss": 1.9759, "step": 63015 }, { "epoch": 6.4713493530499075, "grad_norm": 0.07517223805189133, "learning_rate": 0.01, "loss": 1.9685, "step": 63018 }, { "epoch": 6.47165742452249, "grad_norm": 0.041310783475637436, "learning_rate": 0.01, "loss": 1.9962, "step": 63021 }, { "epoch": 6.471965495995071, "grad_norm": 0.04006296768784523, "learning_rate": 0.01, "loss": 1.9688, "step": 63024 }, { "epoch": 6.472273567467653, "grad_norm": 0.03506629914045334, "learning_rate": 0.01, "loss": 1.9701, "step": 63027 }, { "epoch": 6.472581638940234, "grad_norm": 0.04409859701991081, "learning_rate": 0.01, "loss": 1.9993, "step": 63030 }, { "epoch": 6.472889710412816, "grad_norm": 0.054022327065467834, "learning_rate": 0.01, "loss": 1.985, "step": 63033 }, { "epoch": 6.473197781885397, "grad_norm": 0.06177505478262901, "learning_rate": 0.01, "loss": 1.9894, "step": 63036 }, { "epoch": 6.473505853357979, "grad_norm": 0.10999149829149246, "learning_rate": 0.01, "loss": 1.9949, "step": 63039 }, { "epoch": 6.47381392483056, "grad_norm": 0.130665123462677, "learning_rate": 0.01, "loss": 1.9813, "step": 63042 }, { "epoch": 6.4741219963031424, "grad_norm": 0.15091292560100555, "learning_rate": 0.01, "loss": 2.008, "step": 63045 }, { "epoch": 6.474430067775724, "grad_norm": 0.1041717529296875, "learning_rate": 0.01, "loss": 1.9666, "step": 63048 }, { "epoch": 6.474738139248306, "grad_norm": 0.05246599763631821, "learning_rate": 0.01, "loss": 1.9521, "step": 63051 }, { "epoch": 6.475046210720887, "grad_norm": 0.04646655172109604, "learning_rate": 0.01, "loss": 1.998, "step": 63054 }, { "epoch": 6.475354282193469, "grad_norm": 0.047972384840250015, "learning_rate": 0.01, "loss": 1.9716, "step": 63057 }, { "epoch": 6.475662353666051, "grad_norm": 0.057053469121456146, "learning_rate": 0.01, "loss": 1.983, "step": 63060 }, { "epoch": 6.475970425138632, "grad_norm": 0.044478513300418854, "learning_rate": 0.01, "loss": 1.9598, "step": 63063 }, { "epoch": 6.476278496611214, "grad_norm": 0.13227659463882446, "learning_rate": 0.01, "loss": 1.9661, "step": 63066 }, { "epoch": 6.476586568083795, "grad_norm": 0.09201903641223907, "learning_rate": 0.01, "loss": 1.9711, "step": 63069 }, { "epoch": 6.476894639556377, "grad_norm": 0.07452027499675751, "learning_rate": 0.01, "loss": 1.9585, "step": 63072 }, { "epoch": 6.4772027110289585, "grad_norm": 0.04616081714630127, "learning_rate": 0.01, "loss": 1.9762, "step": 63075 }, { "epoch": 6.4775107825015406, "grad_norm": 0.047700148075819016, "learning_rate": 0.01, "loss": 1.961, "step": 63078 }, { "epoch": 6.477818853974122, "grad_norm": 0.0366378016769886, "learning_rate": 0.01, "loss": 2.0038, "step": 63081 }, { "epoch": 6.478126925446704, "grad_norm": 0.037750110030174255, "learning_rate": 0.01, "loss": 1.98, "step": 63084 }, { "epoch": 6.478434996919285, "grad_norm": 0.05896897614002228, "learning_rate": 0.01, "loss": 1.9679, "step": 63087 }, { "epoch": 6.478743068391867, "grad_norm": 0.13248635828495026, "learning_rate": 0.01, "loss": 1.9915, "step": 63090 }, { "epoch": 6.479051139864448, "grad_norm": 0.14373722672462463, "learning_rate": 0.01, "loss": 1.9855, "step": 63093 }, { "epoch": 6.47935921133703, "grad_norm": 0.07670046389102936, "learning_rate": 0.01, "loss": 1.9747, "step": 63096 }, { "epoch": 6.479667282809612, "grad_norm": 0.05786823108792305, "learning_rate": 0.01, "loss": 1.99, "step": 63099 }, { "epoch": 6.479975354282193, "grad_norm": 0.10580223053693771, "learning_rate": 0.01, "loss": 1.988, "step": 63102 }, { "epoch": 6.4802834257547755, "grad_norm": 0.09750565141439438, "learning_rate": 0.01, "loss": 1.9609, "step": 63105 }, { "epoch": 6.480591497227357, "grad_norm": 0.07120301574468613, "learning_rate": 0.01, "loss": 1.9575, "step": 63108 }, { "epoch": 6.480899568699939, "grad_norm": 0.046966131776571274, "learning_rate": 0.01, "loss": 1.9867, "step": 63111 }, { "epoch": 6.48120764017252, "grad_norm": 0.04642505198717117, "learning_rate": 0.01, "loss": 1.9916, "step": 63114 }, { "epoch": 6.481515711645102, "grad_norm": 0.0937868282198906, "learning_rate": 0.01, "loss": 1.9597, "step": 63117 }, { "epoch": 6.481823783117683, "grad_norm": 0.11612875759601593, "learning_rate": 0.01, "loss": 1.9849, "step": 63120 }, { "epoch": 6.482131854590265, "grad_norm": 0.042997948825359344, "learning_rate": 0.01, "loss": 2.0063, "step": 63123 }, { "epoch": 6.482439926062846, "grad_norm": 0.10609213262796402, "learning_rate": 0.01, "loss": 1.9562, "step": 63126 }, { "epoch": 6.482747997535428, "grad_norm": 0.09225320816040039, "learning_rate": 0.01, "loss": 1.9682, "step": 63129 }, { "epoch": 6.4830560690080095, "grad_norm": 0.053099583834409714, "learning_rate": 0.01, "loss": 1.993, "step": 63132 }, { "epoch": 6.4833641404805915, "grad_norm": 0.04756970703601837, "learning_rate": 0.01, "loss": 1.9854, "step": 63135 }, { "epoch": 6.483672211953174, "grad_norm": 0.04079375043511391, "learning_rate": 0.01, "loss": 1.9496, "step": 63138 }, { "epoch": 6.483980283425755, "grad_norm": 0.08000772446393967, "learning_rate": 0.01, "loss": 2.0007, "step": 63141 }, { "epoch": 6.484288354898337, "grad_norm": 0.05502250790596008, "learning_rate": 0.01, "loss": 1.9906, "step": 63144 }, { "epoch": 6.484596426370918, "grad_norm": 0.07607629895210266, "learning_rate": 0.01, "loss": 1.965, "step": 63147 }, { "epoch": 6.4849044978435, "grad_norm": 0.0457836389541626, "learning_rate": 0.01, "loss": 1.9916, "step": 63150 }, { "epoch": 6.485212569316081, "grad_norm": 0.04571022093296051, "learning_rate": 0.01, "loss": 1.9824, "step": 63153 }, { "epoch": 6.485520640788663, "grad_norm": 0.054583076387643814, "learning_rate": 0.01, "loss": 1.9734, "step": 63156 }, { "epoch": 6.485828712261244, "grad_norm": 0.08559385687112808, "learning_rate": 0.01, "loss": 1.9781, "step": 63159 }, { "epoch": 6.486136783733826, "grad_norm": 0.06536659598350525, "learning_rate": 0.01, "loss": 1.9888, "step": 63162 }, { "epoch": 6.486444855206408, "grad_norm": 0.031179916113615036, "learning_rate": 0.01, "loss": 1.9609, "step": 63165 }, { "epoch": 6.48675292667899, "grad_norm": 0.038442451506853104, "learning_rate": 0.01, "loss": 1.9676, "step": 63168 }, { "epoch": 6.487060998151571, "grad_norm": 0.05742299556732178, "learning_rate": 0.01, "loss": 1.9717, "step": 63171 }, { "epoch": 6.487369069624153, "grad_norm": 0.07068540900945663, "learning_rate": 0.01, "loss": 1.9797, "step": 63174 }, { "epoch": 6.487677141096734, "grad_norm": 0.041148263961076736, "learning_rate": 0.01, "loss": 1.9949, "step": 63177 }, { "epoch": 6.487985212569316, "grad_norm": 0.08058618754148483, "learning_rate": 0.01, "loss": 1.9969, "step": 63180 }, { "epoch": 6.488293284041898, "grad_norm": 0.10617338865995407, "learning_rate": 0.01, "loss": 1.9701, "step": 63183 }, { "epoch": 6.488601355514479, "grad_norm": 0.13301149010658264, "learning_rate": 0.01, "loss": 1.9829, "step": 63186 }, { "epoch": 6.488909426987061, "grad_norm": 0.12188655138015747, "learning_rate": 0.01, "loss": 1.9722, "step": 63189 }, { "epoch": 6.4892174984596425, "grad_norm": 0.07612743228673935, "learning_rate": 0.01, "loss": 2.0, "step": 63192 }, { "epoch": 6.4895255699322245, "grad_norm": 0.08802825957536697, "learning_rate": 0.01, "loss": 1.9677, "step": 63195 }, { "epoch": 6.489833641404806, "grad_norm": 0.05692920833826065, "learning_rate": 0.01, "loss": 1.985, "step": 63198 }, { "epoch": 6.490141712877388, "grad_norm": 0.0412018783390522, "learning_rate": 0.01, "loss": 2.0093, "step": 63201 }, { "epoch": 6.490449784349969, "grad_norm": 0.0954689234495163, "learning_rate": 0.01, "loss": 1.9927, "step": 63204 }, { "epoch": 6.490757855822551, "grad_norm": 0.11650601774454117, "learning_rate": 0.01, "loss": 1.9773, "step": 63207 }, { "epoch": 6.491065927295132, "grad_norm": 0.06705356389284134, "learning_rate": 0.01, "loss": 1.9719, "step": 63210 }, { "epoch": 6.491373998767714, "grad_norm": 0.04871569201350212, "learning_rate": 0.01, "loss": 1.9835, "step": 63213 }, { "epoch": 6.491682070240296, "grad_norm": 0.041996728628873825, "learning_rate": 0.01, "loss": 1.9822, "step": 63216 }, { "epoch": 6.491990141712877, "grad_norm": 0.03821207210421562, "learning_rate": 0.01, "loss": 1.9666, "step": 63219 }, { "epoch": 6.492298213185459, "grad_norm": 0.03589041158556938, "learning_rate": 0.01, "loss": 1.981, "step": 63222 }, { "epoch": 6.492606284658041, "grad_norm": 0.05628860369324684, "learning_rate": 0.01, "loss": 1.9676, "step": 63225 }, { "epoch": 6.492914356130623, "grad_norm": 0.10156149417161942, "learning_rate": 0.01, "loss": 1.9782, "step": 63228 }, { "epoch": 6.493222427603204, "grad_norm": 0.1743369847536087, "learning_rate": 0.01, "loss": 1.9564, "step": 63231 }, { "epoch": 6.493530499075786, "grad_norm": 0.13694548606872559, "learning_rate": 0.01, "loss": 1.9924, "step": 63234 }, { "epoch": 6.493838570548367, "grad_norm": 0.12643152475357056, "learning_rate": 0.01, "loss": 1.979, "step": 63237 }, { "epoch": 6.494146642020949, "grad_norm": 0.14127200841903687, "learning_rate": 0.01, "loss": 1.9621, "step": 63240 }, { "epoch": 6.49445471349353, "grad_norm": 0.05180880054831505, "learning_rate": 0.01, "loss": 1.9607, "step": 63243 }, { "epoch": 6.494762784966112, "grad_norm": 0.050564274191856384, "learning_rate": 0.01, "loss": 1.9772, "step": 63246 }, { "epoch": 6.495070856438693, "grad_norm": 0.09520803391933441, "learning_rate": 0.01, "loss": 1.9697, "step": 63249 }, { "epoch": 6.4953789279112755, "grad_norm": 0.09522721916437149, "learning_rate": 0.01, "loss": 1.957, "step": 63252 }, { "epoch": 6.495686999383857, "grad_norm": 0.1086997389793396, "learning_rate": 0.01, "loss": 1.9849, "step": 63255 }, { "epoch": 6.495995070856439, "grad_norm": 0.08121590316295624, "learning_rate": 0.01, "loss": 1.966, "step": 63258 }, { "epoch": 6.496303142329021, "grad_norm": 0.06703486293554306, "learning_rate": 0.01, "loss": 1.9985, "step": 63261 }, { "epoch": 6.496611213801602, "grad_norm": 0.04283798858523369, "learning_rate": 0.01, "loss": 1.9757, "step": 63264 }, { "epoch": 6.496919285274184, "grad_norm": 0.04943424090743065, "learning_rate": 0.01, "loss": 1.9672, "step": 63267 }, { "epoch": 6.497227356746765, "grad_norm": 0.0578305721282959, "learning_rate": 0.01, "loss": 1.9733, "step": 63270 }, { "epoch": 6.497535428219347, "grad_norm": 0.0359860323369503, "learning_rate": 0.01, "loss": 1.9931, "step": 63273 }, { "epoch": 6.497843499691928, "grad_norm": 0.04893532767891884, "learning_rate": 0.01, "loss": 1.9842, "step": 63276 }, { "epoch": 6.49815157116451, "grad_norm": 0.04525068402290344, "learning_rate": 0.01, "loss": 1.9695, "step": 63279 }, { "epoch": 6.4984596426370915, "grad_norm": 0.04810551553964615, "learning_rate": 0.01, "loss": 1.9921, "step": 63282 }, { "epoch": 6.498767714109674, "grad_norm": 0.05285782366991043, "learning_rate": 0.01, "loss": 1.9744, "step": 63285 }, { "epoch": 6.499075785582255, "grad_norm": 0.032118529081344604, "learning_rate": 0.01, "loss": 1.9732, "step": 63288 }, { "epoch": 6.499383857054837, "grad_norm": 0.049875080585479736, "learning_rate": 0.01, "loss": 1.9758, "step": 63291 }, { "epoch": 6.499691928527418, "grad_norm": 0.04658648371696472, "learning_rate": 0.01, "loss": 2.0009, "step": 63294 }, { "epoch": 6.5, "grad_norm": 0.05129887908697128, "learning_rate": 0.01, "loss": 1.9754, "step": 63297 }, { "epoch": 6.500308071472581, "grad_norm": 0.07968533784151077, "learning_rate": 0.01, "loss": 1.9645, "step": 63300 }, { "epoch": 6.500616142945163, "grad_norm": 0.08152198046445847, "learning_rate": 0.01, "loss": 1.9582, "step": 63303 }, { "epoch": 6.500924214417745, "grad_norm": 0.07405789196491241, "learning_rate": 0.01, "loss": 1.9891, "step": 63306 }, { "epoch": 6.501232285890326, "grad_norm": 0.07860677689313889, "learning_rate": 0.01, "loss": 1.9587, "step": 63309 }, { "epoch": 6.5015403573629085, "grad_norm": 0.051370490342378616, "learning_rate": 0.01, "loss": 1.9809, "step": 63312 }, { "epoch": 6.50184842883549, "grad_norm": 0.04912220686674118, "learning_rate": 0.01, "loss": 1.9814, "step": 63315 }, { "epoch": 6.502156500308072, "grad_norm": 0.042900361120700836, "learning_rate": 0.01, "loss": 1.981, "step": 63318 }, { "epoch": 6.502464571780653, "grad_norm": 0.16261926293373108, "learning_rate": 0.01, "loss": 1.9983, "step": 63321 }, { "epoch": 6.502772643253235, "grad_norm": 0.11455561220645905, "learning_rate": 0.01, "loss": 1.9578, "step": 63324 }, { "epoch": 6.503080714725816, "grad_norm": 0.04585309699177742, "learning_rate": 0.01, "loss": 1.9609, "step": 63327 }, { "epoch": 6.503388786198398, "grad_norm": 0.03483370691537857, "learning_rate": 0.01, "loss": 1.9664, "step": 63330 }, { "epoch": 6.503696857670979, "grad_norm": 0.08230151981115341, "learning_rate": 0.01, "loss": 1.9607, "step": 63333 }, { "epoch": 6.504004929143561, "grad_norm": 0.06978236138820648, "learning_rate": 0.01, "loss": 1.9593, "step": 63336 }, { "epoch": 6.504313000616143, "grad_norm": 0.03723878785967827, "learning_rate": 0.01, "loss": 1.9832, "step": 63339 }, { "epoch": 6.5046210720887245, "grad_norm": 0.0846422016620636, "learning_rate": 0.01, "loss": 1.9824, "step": 63342 }, { "epoch": 6.504929143561307, "grad_norm": 0.06443771719932556, "learning_rate": 0.01, "loss": 1.9698, "step": 63345 }, { "epoch": 6.505237215033888, "grad_norm": 0.07399289309978485, "learning_rate": 0.01, "loss": 2.0061, "step": 63348 }, { "epoch": 6.50554528650647, "grad_norm": 0.08382997661828995, "learning_rate": 0.01, "loss": 1.9713, "step": 63351 }, { "epoch": 6.505853357979051, "grad_norm": 0.0767715722322464, "learning_rate": 0.01, "loss": 1.9787, "step": 63354 }, { "epoch": 6.506161429451633, "grad_norm": 0.07842829078435898, "learning_rate": 0.01, "loss": 1.9532, "step": 63357 }, { "epoch": 6.506469500924214, "grad_norm": 0.04764385148882866, "learning_rate": 0.01, "loss": 1.9685, "step": 63360 }, { "epoch": 6.506777572396796, "grad_norm": 0.08968043327331543, "learning_rate": 0.01, "loss": 1.9704, "step": 63363 }, { "epoch": 6.507085643869377, "grad_norm": 0.07326337695121765, "learning_rate": 0.01, "loss": 1.9575, "step": 63366 }, { "epoch": 6.507393715341959, "grad_norm": 0.04753720387816429, "learning_rate": 0.01, "loss": 1.9866, "step": 63369 }, { "epoch": 6.507701786814541, "grad_norm": 0.05614113807678223, "learning_rate": 0.01, "loss": 1.9876, "step": 63372 }, { "epoch": 6.508009858287123, "grad_norm": 0.04377448558807373, "learning_rate": 0.01, "loss": 1.971, "step": 63375 }, { "epoch": 6.508317929759704, "grad_norm": 0.03820153325796127, "learning_rate": 0.01, "loss": 2.0105, "step": 63378 }, { "epoch": 6.508626001232286, "grad_norm": 0.03955162316560745, "learning_rate": 0.01, "loss": 1.9584, "step": 63381 }, { "epoch": 6.508934072704868, "grad_norm": 0.03919532150030136, "learning_rate": 0.01, "loss": 1.9572, "step": 63384 }, { "epoch": 6.509242144177449, "grad_norm": 0.13084079325199127, "learning_rate": 0.01, "loss": 1.9801, "step": 63387 }, { "epoch": 6.509550215650031, "grad_norm": 0.05490676313638687, "learning_rate": 0.01, "loss": 1.9969, "step": 63390 }, { "epoch": 6.509858287122612, "grad_norm": 0.048263441771268845, "learning_rate": 0.01, "loss": 1.9959, "step": 63393 }, { "epoch": 6.510166358595194, "grad_norm": 0.07833252102136612, "learning_rate": 0.01, "loss": 1.9664, "step": 63396 }, { "epoch": 6.5104744300677755, "grad_norm": 0.06575249880552292, "learning_rate": 0.01, "loss": 1.9734, "step": 63399 }, { "epoch": 6.5107825015403575, "grad_norm": 0.1300118863582611, "learning_rate": 0.01, "loss": 1.9681, "step": 63402 }, { "epoch": 6.511090573012939, "grad_norm": 0.05606196075677872, "learning_rate": 0.01, "loss": 1.9852, "step": 63405 }, { "epoch": 6.511398644485521, "grad_norm": 0.04275618493556976, "learning_rate": 0.01, "loss": 2.0126, "step": 63408 }, { "epoch": 6.511706715958102, "grad_norm": 0.11378994584083557, "learning_rate": 0.01, "loss": 1.9589, "step": 63411 }, { "epoch": 6.512014787430684, "grad_norm": 0.04268041253089905, "learning_rate": 0.01, "loss": 1.9571, "step": 63414 }, { "epoch": 6.512322858903266, "grad_norm": 0.18747419118881226, "learning_rate": 0.01, "loss": 1.9748, "step": 63417 }, { "epoch": 6.512630930375847, "grad_norm": 0.10926712304353714, "learning_rate": 0.01, "loss": 1.9855, "step": 63420 }, { "epoch": 6.512939001848429, "grad_norm": 0.12267261743545532, "learning_rate": 0.01, "loss": 1.9852, "step": 63423 }, { "epoch": 6.51324707332101, "grad_norm": 0.06755910813808441, "learning_rate": 0.01, "loss": 1.9691, "step": 63426 }, { "epoch": 6.513555144793592, "grad_norm": 0.06156865134835243, "learning_rate": 0.01, "loss": 2.0032, "step": 63429 }, { "epoch": 6.513863216266174, "grad_norm": 0.0420864075422287, "learning_rate": 0.01, "loss": 1.9596, "step": 63432 }, { "epoch": 6.514171287738756, "grad_norm": 0.037585970014333725, "learning_rate": 0.01, "loss": 1.9785, "step": 63435 }, { "epoch": 6.514479359211337, "grad_norm": 0.042937878519296646, "learning_rate": 0.01, "loss": 1.9753, "step": 63438 }, { "epoch": 6.514787430683919, "grad_norm": 0.04278058186173439, "learning_rate": 0.01, "loss": 1.9666, "step": 63441 }, { "epoch": 6.5150955021565, "grad_norm": 0.053777556866407394, "learning_rate": 0.01, "loss": 1.9675, "step": 63444 }, { "epoch": 6.515403573629082, "grad_norm": 0.11828344315290451, "learning_rate": 0.01, "loss": 1.9974, "step": 63447 }, { "epoch": 6.515711645101663, "grad_norm": 0.07692524790763855, "learning_rate": 0.01, "loss": 1.9837, "step": 63450 }, { "epoch": 6.516019716574245, "grad_norm": 0.036424048244953156, "learning_rate": 0.01, "loss": 1.9669, "step": 63453 }, { "epoch": 6.516327788046826, "grad_norm": 0.051881253719329834, "learning_rate": 0.01, "loss": 1.9613, "step": 63456 }, { "epoch": 6.5166358595194085, "grad_norm": 0.09392795711755753, "learning_rate": 0.01, "loss": 1.9757, "step": 63459 }, { "epoch": 6.5169439309919905, "grad_norm": 0.05411149188876152, "learning_rate": 0.01, "loss": 1.9763, "step": 63462 }, { "epoch": 6.517252002464572, "grad_norm": 0.07104165852069855, "learning_rate": 0.01, "loss": 1.9676, "step": 63465 }, { "epoch": 6.517560073937154, "grad_norm": 0.0653427243232727, "learning_rate": 0.01, "loss": 1.9812, "step": 63468 }, { "epoch": 6.517868145409735, "grad_norm": 0.07316904515028, "learning_rate": 0.01, "loss": 1.9809, "step": 63471 }, { "epoch": 6.518176216882317, "grad_norm": 0.08225788921117783, "learning_rate": 0.01, "loss": 1.968, "step": 63474 }, { "epoch": 6.518484288354898, "grad_norm": 0.07449998706579208, "learning_rate": 0.01, "loss": 1.9587, "step": 63477 }, { "epoch": 6.51879235982748, "grad_norm": 0.07791251689195633, "learning_rate": 0.01, "loss": 1.9827, "step": 63480 }, { "epoch": 6.519100431300061, "grad_norm": 0.1233309879899025, "learning_rate": 0.01, "loss": 1.968, "step": 63483 }, { "epoch": 6.519408502772643, "grad_norm": 0.041390251368284225, "learning_rate": 0.01, "loss": 1.9804, "step": 63486 }, { "epoch": 6.5197165742452245, "grad_norm": 0.039635319262742996, "learning_rate": 0.01, "loss": 1.9641, "step": 63489 }, { "epoch": 6.520024645717807, "grad_norm": 0.03680063784122467, "learning_rate": 0.01, "loss": 1.9501, "step": 63492 }, { "epoch": 6.520332717190389, "grad_norm": 0.08327323198318481, "learning_rate": 0.01, "loss": 1.9728, "step": 63495 }, { "epoch": 6.52064078866297, "grad_norm": 0.10368441045284271, "learning_rate": 0.01, "loss": 1.9563, "step": 63498 }, { "epoch": 6.520948860135551, "grad_norm": 0.05991170555353165, "learning_rate": 0.01, "loss": 1.9843, "step": 63501 }, { "epoch": 6.521256931608133, "grad_norm": 0.05868203192949295, "learning_rate": 0.01, "loss": 2.0054, "step": 63504 }, { "epoch": 6.521565003080715, "grad_norm": 0.04409966990351677, "learning_rate": 0.01, "loss": 1.9698, "step": 63507 }, { "epoch": 6.521873074553296, "grad_norm": 0.04939546808600426, "learning_rate": 0.01, "loss": 2.0037, "step": 63510 }, { "epoch": 6.522181146025878, "grad_norm": 0.041482388973236084, "learning_rate": 0.01, "loss": 1.9707, "step": 63513 }, { "epoch": 6.5224892174984594, "grad_norm": 0.03503831848502159, "learning_rate": 0.01, "loss": 1.9745, "step": 63516 }, { "epoch": 6.5227972889710415, "grad_norm": 0.07293230295181274, "learning_rate": 0.01, "loss": 1.9793, "step": 63519 }, { "epoch": 6.523105360443623, "grad_norm": 0.13121885061264038, "learning_rate": 0.01, "loss": 1.9747, "step": 63522 }, { "epoch": 6.523413431916205, "grad_norm": 0.05674733221530914, "learning_rate": 0.01, "loss": 1.9724, "step": 63525 }, { "epoch": 6.523721503388786, "grad_norm": 0.13403059542179108, "learning_rate": 0.01, "loss": 1.9897, "step": 63528 }, { "epoch": 6.524029574861368, "grad_norm": 0.05892037972807884, "learning_rate": 0.01, "loss": 1.9806, "step": 63531 }, { "epoch": 6.524337646333949, "grad_norm": 0.05163764953613281, "learning_rate": 0.01, "loss": 1.9752, "step": 63534 }, { "epoch": 6.524645717806531, "grad_norm": 0.10264305770397186, "learning_rate": 0.01, "loss": 1.9814, "step": 63537 }, { "epoch": 6.524953789279113, "grad_norm": 0.060135189443826675, "learning_rate": 0.01, "loss": 1.9778, "step": 63540 }, { "epoch": 6.525261860751694, "grad_norm": 0.033466801047325134, "learning_rate": 0.01, "loss": 1.9649, "step": 63543 }, { "epoch": 6.525569932224276, "grad_norm": 0.05220318213105202, "learning_rate": 0.01, "loss": 1.9861, "step": 63546 }, { "epoch": 6.5258780036968576, "grad_norm": 0.04468940570950508, "learning_rate": 0.01, "loss": 1.9974, "step": 63549 }, { "epoch": 6.52618607516944, "grad_norm": 0.07253078371286392, "learning_rate": 0.01, "loss": 1.9834, "step": 63552 }, { "epoch": 6.526494146642021, "grad_norm": 0.039232298731803894, "learning_rate": 0.01, "loss": 1.9901, "step": 63555 }, { "epoch": 6.526802218114603, "grad_norm": 0.08601675182580948, "learning_rate": 0.01, "loss": 1.9845, "step": 63558 }, { "epoch": 6.527110289587184, "grad_norm": 0.11496642231941223, "learning_rate": 0.01, "loss": 1.9742, "step": 63561 }, { "epoch": 6.527418361059766, "grad_norm": 0.21819603443145752, "learning_rate": 0.01, "loss": 2.0011, "step": 63564 }, { "epoch": 6.527726432532347, "grad_norm": 0.15641893446445465, "learning_rate": 0.01, "loss": 1.9666, "step": 63567 }, { "epoch": 6.528034504004929, "grad_norm": 0.07390442490577698, "learning_rate": 0.01, "loss": 1.9679, "step": 63570 }, { "epoch": 6.52834257547751, "grad_norm": 0.03637871518731117, "learning_rate": 0.01, "loss": 1.9839, "step": 63573 }, { "epoch": 6.5286506469500925, "grad_norm": 0.05536114424467087, "learning_rate": 0.01, "loss": 1.9713, "step": 63576 }, { "epoch": 6.528958718422674, "grad_norm": 0.08773189783096313, "learning_rate": 0.01, "loss": 1.9722, "step": 63579 }, { "epoch": 6.529266789895256, "grad_norm": 0.05206519737839699, "learning_rate": 0.01, "loss": 2.0209, "step": 63582 }, { "epoch": 6.529574861367838, "grad_norm": 0.12326787412166595, "learning_rate": 0.01, "loss": 2.0021, "step": 63585 }, { "epoch": 6.529882932840419, "grad_norm": 0.10255524516105652, "learning_rate": 0.01, "loss": 1.9552, "step": 63588 }, { "epoch": 6.530191004313001, "grad_norm": 0.08496958762407303, "learning_rate": 0.01, "loss": 1.9621, "step": 63591 }, { "epoch": 6.530499075785582, "grad_norm": 0.040596868842840195, "learning_rate": 0.01, "loss": 1.9465, "step": 63594 }, { "epoch": 6.530807147258164, "grad_norm": 0.033690448850393295, "learning_rate": 0.01, "loss": 1.967, "step": 63597 }, { "epoch": 6.531115218730745, "grad_norm": 0.0766686350107193, "learning_rate": 0.01, "loss": 1.9839, "step": 63600 }, { "epoch": 6.531423290203327, "grad_norm": 0.12199605256319046, "learning_rate": 0.01, "loss": 1.982, "step": 63603 }, { "epoch": 6.5317313616759085, "grad_norm": 0.03897617757320404, "learning_rate": 0.01, "loss": 1.9975, "step": 63606 }, { "epoch": 6.532039433148491, "grad_norm": 0.05026644468307495, "learning_rate": 0.01, "loss": 2.0126, "step": 63609 }, { "epoch": 6.532347504621072, "grad_norm": 0.03886674344539642, "learning_rate": 0.01, "loss": 1.9843, "step": 63612 }, { "epoch": 6.532655576093654, "grad_norm": 0.09314893931150436, "learning_rate": 0.01, "loss": 1.9715, "step": 63615 }, { "epoch": 6.532963647566236, "grad_norm": 0.048525359481573105, "learning_rate": 0.01, "loss": 1.9957, "step": 63618 }, { "epoch": 6.533271719038817, "grad_norm": 0.027339305728673935, "learning_rate": 0.01, "loss": 1.9555, "step": 63621 }, { "epoch": 6.533579790511399, "grad_norm": 0.0443040207028389, "learning_rate": 0.01, "loss": 1.9848, "step": 63624 }, { "epoch": 6.53388786198398, "grad_norm": 0.06850685924291611, "learning_rate": 0.01, "loss": 1.9784, "step": 63627 }, { "epoch": 6.534195933456562, "grad_norm": 0.06264297664165497, "learning_rate": 0.01, "loss": 1.9552, "step": 63630 }, { "epoch": 6.534504004929143, "grad_norm": 0.06047286093235016, "learning_rate": 0.01, "loss": 1.9898, "step": 63633 }, { "epoch": 6.5348120764017255, "grad_norm": 0.08621931076049805, "learning_rate": 0.01, "loss": 1.9852, "step": 63636 }, { "epoch": 6.535120147874307, "grad_norm": 0.06598355621099472, "learning_rate": 0.01, "loss": 1.9792, "step": 63639 }, { "epoch": 6.535428219346889, "grad_norm": 0.04652687907218933, "learning_rate": 0.01, "loss": 1.9644, "step": 63642 }, { "epoch": 6.53573629081947, "grad_norm": 0.04016298055648804, "learning_rate": 0.01, "loss": 1.9694, "step": 63645 }, { "epoch": 6.536044362292052, "grad_norm": 0.039485424757003784, "learning_rate": 0.01, "loss": 1.928, "step": 63648 }, { "epoch": 6.536352433764633, "grad_norm": 0.03557172790169716, "learning_rate": 0.01, "loss": 1.96, "step": 63651 }, { "epoch": 6.536660505237215, "grad_norm": 0.0416797399520874, "learning_rate": 0.01, "loss": 1.9884, "step": 63654 }, { "epoch": 6.536968576709796, "grad_norm": 0.09189657121896744, "learning_rate": 0.01, "loss": 1.98, "step": 63657 }, { "epoch": 6.537276648182378, "grad_norm": 0.04484305903315544, "learning_rate": 0.01, "loss": 2.0131, "step": 63660 }, { "epoch": 6.53758471965496, "grad_norm": 0.059794455766677856, "learning_rate": 0.01, "loss": 2.0018, "step": 63663 }, { "epoch": 6.5378927911275415, "grad_norm": 0.05551350116729736, "learning_rate": 0.01, "loss": 1.9546, "step": 63666 }, { "epoch": 6.538200862600124, "grad_norm": 0.19956345856189728, "learning_rate": 0.01, "loss": 1.961, "step": 63669 }, { "epoch": 6.538508934072705, "grad_norm": 0.0885189101099968, "learning_rate": 0.01, "loss": 1.9769, "step": 63672 }, { "epoch": 6.538817005545287, "grad_norm": 0.05539345368742943, "learning_rate": 0.01, "loss": 1.9832, "step": 63675 }, { "epoch": 6.539125077017868, "grad_norm": 0.037228211760520935, "learning_rate": 0.01, "loss": 1.9787, "step": 63678 }, { "epoch": 6.53943314849045, "grad_norm": 0.03209219127893448, "learning_rate": 0.01, "loss": 1.9876, "step": 63681 }, { "epoch": 6.539741219963031, "grad_norm": 0.029509764164686203, "learning_rate": 0.01, "loss": 1.9849, "step": 63684 }, { "epoch": 6.540049291435613, "grad_norm": 0.052676524966955185, "learning_rate": 0.01, "loss": 1.997, "step": 63687 }, { "epoch": 6.540357362908194, "grad_norm": 0.0744934156537056, "learning_rate": 0.01, "loss": 1.9809, "step": 63690 }, { "epoch": 6.540665434380776, "grad_norm": 0.05411761999130249, "learning_rate": 0.01, "loss": 1.9748, "step": 63693 }, { "epoch": 6.5409735058533585, "grad_norm": 0.04553327336907387, "learning_rate": 0.01, "loss": 1.9747, "step": 63696 }, { "epoch": 6.54128157732594, "grad_norm": 0.16345827281475067, "learning_rate": 0.01, "loss": 1.9884, "step": 63699 }, { "epoch": 6.541589648798521, "grad_norm": 0.05779232829809189, "learning_rate": 0.01, "loss": 1.97, "step": 63702 }, { "epoch": 6.541897720271103, "grad_norm": 0.032575078308582306, "learning_rate": 0.01, "loss": 1.9711, "step": 63705 }, { "epoch": 6.542205791743685, "grad_norm": 0.043234266340732574, "learning_rate": 0.01, "loss": 1.987, "step": 63708 }, { "epoch": 6.542513863216266, "grad_norm": 0.050926946103572845, "learning_rate": 0.01, "loss": 1.999, "step": 63711 }, { "epoch": 6.542821934688848, "grad_norm": 0.040199290961027145, "learning_rate": 0.01, "loss": 1.9849, "step": 63714 }, { "epoch": 6.543130006161429, "grad_norm": 0.0373854786157608, "learning_rate": 0.01, "loss": 1.9813, "step": 63717 }, { "epoch": 6.543438077634011, "grad_norm": 0.04420515522360802, "learning_rate": 0.01, "loss": 1.9614, "step": 63720 }, { "epoch": 6.5437461491065925, "grad_norm": 0.10335779935121536, "learning_rate": 0.01, "loss": 2.0047, "step": 63723 }, { "epoch": 6.5440542205791745, "grad_norm": 0.08750100433826447, "learning_rate": 0.01, "loss": 2.0028, "step": 63726 }, { "epoch": 6.544362292051756, "grad_norm": 0.08968067169189453, "learning_rate": 0.01, "loss": 2.003, "step": 63729 }, { "epoch": 6.544670363524338, "grad_norm": 0.05931568518280983, "learning_rate": 0.01, "loss": 1.9806, "step": 63732 }, { "epoch": 6.544978434996919, "grad_norm": 0.04726015031337738, "learning_rate": 0.01, "loss": 1.9871, "step": 63735 }, { "epoch": 6.545286506469501, "grad_norm": 0.048892538994550705, "learning_rate": 0.01, "loss": 1.9894, "step": 63738 }, { "epoch": 6.545594577942083, "grad_norm": 0.042674802243709564, "learning_rate": 0.01, "loss": 1.9829, "step": 63741 }, { "epoch": 6.545902649414664, "grad_norm": 0.11424809694290161, "learning_rate": 0.01, "loss": 1.9681, "step": 63744 }, { "epoch": 6.546210720887246, "grad_norm": 0.037352122366428375, "learning_rate": 0.01, "loss": 1.9509, "step": 63747 }, { "epoch": 6.546518792359827, "grad_norm": 0.04247456043958664, "learning_rate": 0.01, "loss": 1.9976, "step": 63750 }, { "epoch": 6.546826863832409, "grad_norm": 0.05194523185491562, "learning_rate": 0.01, "loss": 1.9767, "step": 63753 }, { "epoch": 6.547134935304991, "grad_norm": 0.055400025099515915, "learning_rate": 0.01, "loss": 1.9541, "step": 63756 }, { "epoch": 6.547443006777573, "grad_norm": 0.06295850872993469, "learning_rate": 0.01, "loss": 1.9756, "step": 63759 }, { "epoch": 6.547751078250154, "grad_norm": 0.05281165987253189, "learning_rate": 0.01, "loss": 1.9953, "step": 63762 }, { "epoch": 6.548059149722736, "grad_norm": 0.04838143289089203, "learning_rate": 0.01, "loss": 1.9818, "step": 63765 }, { "epoch": 6.548367221195317, "grad_norm": 0.03333796560764313, "learning_rate": 0.01, "loss": 1.9677, "step": 63768 }, { "epoch": 6.548675292667899, "grad_norm": 0.029952581971883774, "learning_rate": 0.01, "loss": 1.9974, "step": 63771 }, { "epoch": 6.54898336414048, "grad_norm": 0.03036186844110489, "learning_rate": 0.01, "loss": 1.9715, "step": 63774 }, { "epoch": 6.549291435613062, "grad_norm": 0.06050482019782066, "learning_rate": 0.01, "loss": 1.9668, "step": 63777 }, { "epoch": 6.549599507085643, "grad_norm": 0.08855880796909332, "learning_rate": 0.01, "loss": 1.9847, "step": 63780 }, { "epoch": 6.5499075785582255, "grad_norm": 0.08679243922233582, "learning_rate": 0.01, "loss": 1.9938, "step": 63783 }, { "epoch": 6.5502156500308075, "grad_norm": 0.04234171286225319, "learning_rate": 0.01, "loss": 1.9808, "step": 63786 }, { "epoch": 6.550523721503389, "grad_norm": 0.029922012239694595, "learning_rate": 0.01, "loss": 1.9517, "step": 63789 }, { "epoch": 6.550831792975971, "grad_norm": 0.11024188250303268, "learning_rate": 0.01, "loss": 1.9659, "step": 63792 }, { "epoch": 6.551139864448552, "grad_norm": 0.03447732329368591, "learning_rate": 0.01, "loss": 1.9677, "step": 63795 }, { "epoch": 6.551447935921134, "grad_norm": 0.04188188910484314, "learning_rate": 0.01, "loss": 1.98, "step": 63798 }, { "epoch": 6.551756007393715, "grad_norm": 0.040945570915937424, "learning_rate": 0.01, "loss": 1.9852, "step": 63801 }, { "epoch": 6.552064078866297, "grad_norm": 0.07144046574831009, "learning_rate": 0.01, "loss": 1.98, "step": 63804 }, { "epoch": 6.552372150338878, "grad_norm": 0.043573856353759766, "learning_rate": 0.01, "loss": 1.969, "step": 63807 }, { "epoch": 6.55268022181146, "grad_norm": 0.04076509550213814, "learning_rate": 0.01, "loss": 1.9931, "step": 63810 }, { "epoch": 6.5529882932840415, "grad_norm": 0.042045023292303085, "learning_rate": 0.01, "loss": 1.9812, "step": 63813 }, { "epoch": 6.553296364756624, "grad_norm": 0.047036029398441315, "learning_rate": 0.01, "loss": 1.9671, "step": 63816 }, { "epoch": 6.553604436229206, "grad_norm": 0.08206555247306824, "learning_rate": 0.01, "loss": 1.997, "step": 63819 }, { "epoch": 6.553912507701787, "grad_norm": 0.04344995692372322, "learning_rate": 0.01, "loss": 1.9864, "step": 63822 }, { "epoch": 6.554220579174368, "grad_norm": 0.05620454624295235, "learning_rate": 0.01, "loss": 1.9974, "step": 63825 }, { "epoch": 6.55452865064695, "grad_norm": 0.042846135795116425, "learning_rate": 0.01, "loss": 1.967, "step": 63828 }, { "epoch": 6.554836722119532, "grad_norm": 0.04379443824291229, "learning_rate": 0.01, "loss": 1.9887, "step": 63831 }, { "epoch": 6.555144793592113, "grad_norm": 0.050486281514167786, "learning_rate": 0.01, "loss": 1.9708, "step": 63834 }, { "epoch": 6.555452865064695, "grad_norm": 0.10757488012313843, "learning_rate": 0.01, "loss": 1.96, "step": 63837 }, { "epoch": 6.555760936537276, "grad_norm": 0.09059060364961624, "learning_rate": 0.01, "loss": 1.9658, "step": 63840 }, { "epoch": 6.5560690080098585, "grad_norm": 0.055471502244472504, "learning_rate": 0.01, "loss": 1.9772, "step": 63843 }, { "epoch": 6.55637707948244, "grad_norm": 0.10934463888406754, "learning_rate": 0.01, "loss": 1.9681, "step": 63846 }, { "epoch": 6.556685150955022, "grad_norm": 0.08888571709394455, "learning_rate": 0.01, "loss": 1.9686, "step": 63849 }, { "epoch": 6.556993222427603, "grad_norm": 0.11083342134952545, "learning_rate": 0.01, "loss": 1.9915, "step": 63852 }, { "epoch": 6.557301293900185, "grad_norm": 0.07405796647071838, "learning_rate": 0.01, "loss": 1.9707, "step": 63855 }, { "epoch": 6.557609365372766, "grad_norm": 0.08451070636510849, "learning_rate": 0.01, "loss": 2.0033, "step": 63858 }, { "epoch": 6.557917436845348, "grad_norm": 0.05954553931951523, "learning_rate": 0.01, "loss": 1.9894, "step": 63861 }, { "epoch": 6.55822550831793, "grad_norm": 0.06008210405707359, "learning_rate": 0.01, "loss": 1.9529, "step": 63864 }, { "epoch": 6.558533579790511, "grad_norm": 0.0861048474907875, "learning_rate": 0.01, "loss": 1.9767, "step": 63867 }, { "epoch": 6.558841651263093, "grad_norm": 0.06240614131093025, "learning_rate": 0.01, "loss": 1.9825, "step": 63870 }, { "epoch": 6.5591497227356745, "grad_norm": 0.05393391102552414, "learning_rate": 0.01, "loss": 1.9604, "step": 63873 }, { "epoch": 6.559457794208257, "grad_norm": 0.08556719869375229, "learning_rate": 0.01, "loss": 1.9926, "step": 63876 }, { "epoch": 6.559765865680838, "grad_norm": 0.045306041836738586, "learning_rate": 0.01, "loss": 1.9863, "step": 63879 }, { "epoch": 6.56007393715342, "grad_norm": 0.11380963772535324, "learning_rate": 0.01, "loss": 2.0034, "step": 63882 }, { "epoch": 6.560382008626001, "grad_norm": 0.04248273745179176, "learning_rate": 0.01, "loss": 1.9724, "step": 63885 }, { "epoch": 6.560690080098583, "grad_norm": 0.040910713374614716, "learning_rate": 0.01, "loss": 1.9818, "step": 63888 }, { "epoch": 6.560998151571164, "grad_norm": 0.046450424939394, "learning_rate": 0.01, "loss": 1.9842, "step": 63891 }, { "epoch": 6.561306223043746, "grad_norm": 0.07835663855075836, "learning_rate": 0.01, "loss": 1.973, "step": 63894 }, { "epoch": 6.561614294516328, "grad_norm": 0.04756823554635048, "learning_rate": 0.01, "loss": 1.9691, "step": 63897 }, { "epoch": 6.561922365988909, "grad_norm": 0.10655516386032104, "learning_rate": 0.01, "loss": 1.9948, "step": 63900 }, { "epoch": 6.562230437461491, "grad_norm": 0.08743315935134888, "learning_rate": 0.01, "loss": 1.9793, "step": 63903 }, { "epoch": 6.562538508934073, "grad_norm": 0.05087851360440254, "learning_rate": 0.01, "loss": 1.9687, "step": 63906 }, { "epoch": 6.562846580406655, "grad_norm": 0.03635932877659798, "learning_rate": 0.01, "loss": 1.9521, "step": 63909 }, { "epoch": 6.563154651879236, "grad_norm": 0.09845136106014252, "learning_rate": 0.01, "loss": 1.979, "step": 63912 }, { "epoch": 6.563462723351818, "grad_norm": 0.03554631769657135, "learning_rate": 0.01, "loss": 1.9448, "step": 63915 }, { "epoch": 6.563770794824399, "grad_norm": 0.053068485110998154, "learning_rate": 0.01, "loss": 1.9789, "step": 63918 }, { "epoch": 6.564078866296981, "grad_norm": 0.09752979129552841, "learning_rate": 0.01, "loss": 1.9749, "step": 63921 }, { "epoch": 6.564386937769562, "grad_norm": 0.10045387595891953, "learning_rate": 0.01, "loss": 1.9748, "step": 63924 }, { "epoch": 6.564695009242144, "grad_norm": 0.1082577034831047, "learning_rate": 0.01, "loss": 1.9848, "step": 63927 }, { "epoch": 6.5650030807147255, "grad_norm": 0.09065879136323929, "learning_rate": 0.01, "loss": 1.9871, "step": 63930 }, { "epoch": 6.5653111521873075, "grad_norm": 0.06496050953865051, "learning_rate": 0.01, "loss": 1.9963, "step": 63933 }, { "epoch": 6.565619223659889, "grad_norm": 0.07092882692813873, "learning_rate": 0.01, "loss": 1.9729, "step": 63936 }, { "epoch": 6.565927295132471, "grad_norm": 0.048295971006155014, "learning_rate": 0.01, "loss": 1.9828, "step": 63939 }, { "epoch": 6.566235366605053, "grad_norm": 0.045929424464702606, "learning_rate": 0.01, "loss": 1.9922, "step": 63942 }, { "epoch": 6.566543438077634, "grad_norm": 0.03663928061723709, "learning_rate": 0.01, "loss": 1.9835, "step": 63945 }, { "epoch": 6.566851509550216, "grad_norm": 0.03723525255918503, "learning_rate": 0.01, "loss": 1.9486, "step": 63948 }, { "epoch": 6.567159581022797, "grad_norm": 0.05147719010710716, "learning_rate": 0.01, "loss": 1.9737, "step": 63951 }, { "epoch": 6.567467652495379, "grad_norm": 0.04248051717877388, "learning_rate": 0.01, "loss": 1.9759, "step": 63954 }, { "epoch": 6.56777572396796, "grad_norm": 0.054685961455106735, "learning_rate": 0.01, "loss": 1.9616, "step": 63957 }, { "epoch": 6.568083795440542, "grad_norm": 0.07126769423484802, "learning_rate": 0.01, "loss": 1.9693, "step": 63960 }, { "epoch": 6.568391866913124, "grad_norm": 0.0660753920674324, "learning_rate": 0.01, "loss": 1.9703, "step": 63963 }, { "epoch": 6.568699938385706, "grad_norm": 0.04418491572141647, "learning_rate": 0.01, "loss": 1.9591, "step": 63966 }, { "epoch": 6.569008009858287, "grad_norm": 0.03849758952856064, "learning_rate": 0.01, "loss": 1.9853, "step": 63969 }, { "epoch": 6.569316081330869, "grad_norm": 0.0360284261405468, "learning_rate": 0.01, "loss": 1.9819, "step": 63972 }, { "epoch": 6.56962415280345, "grad_norm": 0.04783300310373306, "learning_rate": 0.01, "loss": 1.9934, "step": 63975 }, { "epoch": 6.569932224276032, "grad_norm": 0.104099340736866, "learning_rate": 0.01, "loss": 1.9797, "step": 63978 }, { "epoch": 6.570240295748613, "grad_norm": 0.06214939430356026, "learning_rate": 0.01, "loss": 1.9825, "step": 63981 }, { "epoch": 6.570548367221195, "grad_norm": 0.03583189472556114, "learning_rate": 0.01, "loss": 1.962, "step": 63984 }, { "epoch": 6.570856438693777, "grad_norm": 0.03806734457612038, "learning_rate": 0.01, "loss": 1.9445, "step": 63987 }, { "epoch": 6.5711645101663585, "grad_norm": 0.037077397108078, "learning_rate": 0.01, "loss": 1.9754, "step": 63990 }, { "epoch": 6.5714725816389405, "grad_norm": 0.1366073340177536, "learning_rate": 0.01, "loss": 1.9836, "step": 63993 }, { "epoch": 6.571780653111522, "grad_norm": 0.06739377975463867, "learning_rate": 0.01, "loss": 1.946, "step": 63996 }, { "epoch": 6.572088724584104, "grad_norm": 0.041534364223480225, "learning_rate": 0.01, "loss": 1.9853, "step": 63999 }, { "epoch": 6.572396796056685, "grad_norm": 0.04053931310772896, "learning_rate": 0.01, "loss": 1.9929, "step": 64002 }, { "epoch": 6.572704867529267, "grad_norm": 0.04500468447804451, "learning_rate": 0.01, "loss": 1.9702, "step": 64005 }, { "epoch": 6.573012939001848, "grad_norm": 0.07406989485025406, "learning_rate": 0.01, "loss": 1.9843, "step": 64008 }, { "epoch": 6.57332101047443, "grad_norm": 0.05247531831264496, "learning_rate": 0.01, "loss": 1.9658, "step": 64011 }, { "epoch": 6.573629081947011, "grad_norm": 0.117593914270401, "learning_rate": 0.01, "loss": 1.9978, "step": 64014 }, { "epoch": 6.573937153419593, "grad_norm": 0.07242528349161148, "learning_rate": 0.01, "loss": 1.9675, "step": 64017 }, { "epoch": 6.574245224892175, "grad_norm": 0.05359083414077759, "learning_rate": 0.01, "loss": 1.9632, "step": 64020 }, { "epoch": 6.574553296364757, "grad_norm": 0.036466315388679504, "learning_rate": 0.01, "loss": 1.9753, "step": 64023 }, { "epoch": 6.574861367837338, "grad_norm": 0.03925064578652382, "learning_rate": 0.01, "loss": 1.9683, "step": 64026 }, { "epoch": 6.57516943930992, "grad_norm": 0.05542049929499626, "learning_rate": 0.01, "loss": 1.9873, "step": 64029 }, { "epoch": 6.575477510782502, "grad_norm": 0.06237075477838516, "learning_rate": 0.01, "loss": 1.9851, "step": 64032 }, { "epoch": 6.575785582255083, "grad_norm": 0.04179569333791733, "learning_rate": 0.01, "loss": 1.9761, "step": 64035 }, { "epoch": 6.576093653727665, "grad_norm": 0.11507870256900787, "learning_rate": 0.01, "loss": 1.9753, "step": 64038 }, { "epoch": 6.576401725200246, "grad_norm": 0.06069584935903549, "learning_rate": 0.01, "loss": 1.9697, "step": 64041 }, { "epoch": 6.576709796672828, "grad_norm": 0.041366275399923325, "learning_rate": 0.01, "loss": 1.9638, "step": 64044 }, { "epoch": 6.5770178681454095, "grad_norm": 0.06424157321453094, "learning_rate": 0.01, "loss": 1.9826, "step": 64047 }, { "epoch": 6.5773259396179915, "grad_norm": 0.06879021972417831, "learning_rate": 0.01, "loss": 2.0027, "step": 64050 }, { "epoch": 6.577634011090573, "grad_norm": 0.043172743171453476, "learning_rate": 0.01, "loss": 1.9724, "step": 64053 }, { "epoch": 6.577942082563155, "grad_norm": 0.05824809893965721, "learning_rate": 0.01, "loss": 1.9463, "step": 64056 }, { "epoch": 6.578250154035736, "grad_norm": 0.045327652245759964, "learning_rate": 0.01, "loss": 1.9779, "step": 64059 }, { "epoch": 6.578558225508318, "grad_norm": 0.11322353035211563, "learning_rate": 0.01, "loss": 1.981, "step": 64062 }, { "epoch": 6.5788662969809, "grad_norm": 0.06283524632453918, "learning_rate": 0.01, "loss": 1.9633, "step": 64065 }, { "epoch": 6.579174368453481, "grad_norm": 0.06951749324798584, "learning_rate": 0.01, "loss": 1.9673, "step": 64068 }, { "epoch": 6.579482439926063, "grad_norm": 0.1091168001294136, "learning_rate": 0.01, "loss": 1.9652, "step": 64071 }, { "epoch": 6.579790511398644, "grad_norm": 0.0779736191034317, "learning_rate": 0.01, "loss": 1.9903, "step": 64074 }, { "epoch": 6.580098582871226, "grad_norm": 0.06814195215702057, "learning_rate": 0.01, "loss": 1.9717, "step": 64077 }, { "epoch": 6.580406654343808, "grad_norm": 0.06565315276384354, "learning_rate": 0.01, "loss": 1.9895, "step": 64080 }, { "epoch": 6.58071472581639, "grad_norm": 0.08950956165790558, "learning_rate": 0.01, "loss": 1.9727, "step": 64083 }, { "epoch": 6.581022797288971, "grad_norm": 0.06202126666903496, "learning_rate": 0.01, "loss": 1.992, "step": 64086 }, { "epoch": 6.581330868761553, "grad_norm": 0.10947906970977783, "learning_rate": 0.01, "loss": 1.988, "step": 64089 }, { "epoch": 6.581638940234134, "grad_norm": 0.051477983593940735, "learning_rate": 0.01, "loss": 1.9994, "step": 64092 }, { "epoch": 6.581947011706716, "grad_norm": 0.05183662101626396, "learning_rate": 0.01, "loss": 1.9831, "step": 64095 }, { "epoch": 6.582255083179298, "grad_norm": 0.04651165008544922, "learning_rate": 0.01, "loss": 1.9713, "step": 64098 }, { "epoch": 6.582563154651879, "grad_norm": 0.035913266241550446, "learning_rate": 0.01, "loss": 1.96, "step": 64101 }, { "epoch": 6.58287122612446, "grad_norm": 0.0991610661149025, "learning_rate": 0.01, "loss": 2.0085, "step": 64104 }, { "epoch": 6.5831792975970425, "grad_norm": 0.04391265660524368, "learning_rate": 0.01, "loss": 1.9765, "step": 64107 }, { "epoch": 6.5834873690696245, "grad_norm": 0.14331793785095215, "learning_rate": 0.01, "loss": 2.026, "step": 64110 }, { "epoch": 6.583795440542206, "grad_norm": 0.10682855546474457, "learning_rate": 0.01, "loss": 1.965, "step": 64113 }, { "epoch": 6.584103512014788, "grad_norm": 0.07142464816570282, "learning_rate": 0.01, "loss": 1.9844, "step": 64116 }, { "epoch": 6.584411583487369, "grad_norm": 0.05298778787255287, "learning_rate": 0.01, "loss": 1.9831, "step": 64119 }, { "epoch": 6.584719654959951, "grad_norm": 0.10031582415103912, "learning_rate": 0.01, "loss": 1.9498, "step": 64122 }, { "epoch": 6.585027726432532, "grad_norm": 0.03978649899363518, "learning_rate": 0.01, "loss": 1.9734, "step": 64125 }, { "epoch": 6.585335797905114, "grad_norm": 0.05913195759057999, "learning_rate": 0.01, "loss": 1.9689, "step": 64128 }, { "epoch": 6.585643869377695, "grad_norm": 0.03559190407395363, "learning_rate": 0.01, "loss": 1.9879, "step": 64131 }, { "epoch": 6.585951940850277, "grad_norm": 0.09079554677009583, "learning_rate": 0.01, "loss": 1.9875, "step": 64134 }, { "epoch": 6.5862600123228585, "grad_norm": 0.04525060951709747, "learning_rate": 0.01, "loss": 1.9758, "step": 64137 }, { "epoch": 6.586568083795441, "grad_norm": 0.03372851014137268, "learning_rate": 0.01, "loss": 1.983, "step": 64140 }, { "epoch": 6.586876155268023, "grad_norm": 0.08057595044374466, "learning_rate": 0.01, "loss": 1.9687, "step": 64143 }, { "epoch": 6.587184226740604, "grad_norm": 0.09445854276418686, "learning_rate": 0.01, "loss": 1.9685, "step": 64146 }, { "epoch": 6.587492298213186, "grad_norm": 0.07656941562891006, "learning_rate": 0.01, "loss": 1.987, "step": 64149 }, { "epoch": 6.587800369685767, "grad_norm": 0.08286568522453308, "learning_rate": 0.01, "loss": 1.9887, "step": 64152 }, { "epoch": 6.588108441158349, "grad_norm": 0.11229046434164047, "learning_rate": 0.01, "loss": 1.9904, "step": 64155 }, { "epoch": 6.58841651263093, "grad_norm": 0.06434221565723419, "learning_rate": 0.01, "loss": 1.9891, "step": 64158 }, { "epoch": 6.588724584103512, "grad_norm": 0.08923232555389404, "learning_rate": 0.01, "loss": 1.9935, "step": 64161 }, { "epoch": 6.589032655576093, "grad_norm": 0.07555590569972992, "learning_rate": 0.01, "loss": 1.9707, "step": 64164 }, { "epoch": 6.5893407270486755, "grad_norm": 0.05860813334584236, "learning_rate": 0.01, "loss": 1.982, "step": 64167 }, { "epoch": 6.589648798521257, "grad_norm": 0.03441310673952103, "learning_rate": 0.01, "loss": 1.9682, "step": 64170 }, { "epoch": 6.589956869993839, "grad_norm": 0.0361168198287487, "learning_rate": 0.01, "loss": 1.9671, "step": 64173 }, { "epoch": 6.59026494146642, "grad_norm": 0.04222318157553673, "learning_rate": 0.01, "loss": 1.9749, "step": 64176 }, { "epoch": 6.590573012939002, "grad_norm": 0.12981876730918884, "learning_rate": 0.01, "loss": 1.9699, "step": 64179 }, { "epoch": 6.590881084411583, "grad_norm": 0.05409705266356468, "learning_rate": 0.01, "loss": 1.9807, "step": 64182 }, { "epoch": 6.591189155884165, "grad_norm": 0.053002189844846725, "learning_rate": 0.01, "loss": 1.9874, "step": 64185 }, { "epoch": 6.591497227356747, "grad_norm": 0.061368245631456375, "learning_rate": 0.01, "loss": 1.9612, "step": 64188 }, { "epoch": 6.591805298829328, "grad_norm": 0.10517262667417526, "learning_rate": 0.01, "loss": 1.973, "step": 64191 }, { "epoch": 6.59211337030191, "grad_norm": 0.06446947157382965, "learning_rate": 0.01, "loss": 1.9956, "step": 64194 }, { "epoch": 6.5924214417744915, "grad_norm": 0.06423608213663101, "learning_rate": 0.01, "loss": 1.9959, "step": 64197 }, { "epoch": 6.592729513247074, "grad_norm": 0.034221209585666656, "learning_rate": 0.01, "loss": 1.962, "step": 64200 }, { "epoch": 6.593037584719655, "grad_norm": 0.12078302353620529, "learning_rate": 0.01, "loss": 1.9731, "step": 64203 }, { "epoch": 6.593345656192237, "grad_norm": 0.08189702033996582, "learning_rate": 0.01, "loss": 1.9648, "step": 64206 }, { "epoch": 6.593653727664818, "grad_norm": 0.0505688339471817, "learning_rate": 0.01, "loss": 1.9889, "step": 64209 }, { "epoch": 6.5939617991374, "grad_norm": 0.05289214104413986, "learning_rate": 0.01, "loss": 1.9856, "step": 64212 }, { "epoch": 6.594269870609981, "grad_norm": 0.0513903982937336, "learning_rate": 0.01, "loss": 2.0154, "step": 64215 }, { "epoch": 6.594577942082563, "grad_norm": 0.06634233146905899, "learning_rate": 0.01, "loss": 1.964, "step": 64218 }, { "epoch": 6.594886013555145, "grad_norm": 0.0847700983285904, "learning_rate": 0.01, "loss": 1.9755, "step": 64221 }, { "epoch": 6.595194085027726, "grad_norm": 0.08042684942483902, "learning_rate": 0.01, "loss": 1.991, "step": 64224 }, { "epoch": 6.595502156500308, "grad_norm": 0.046448756009340286, "learning_rate": 0.01, "loss": 1.9801, "step": 64227 }, { "epoch": 6.59581022797289, "grad_norm": 0.09152430295944214, "learning_rate": 0.01, "loss": 1.997, "step": 64230 }, { "epoch": 6.596118299445472, "grad_norm": 0.05537572130560875, "learning_rate": 0.01, "loss": 2.0014, "step": 64233 }, { "epoch": 6.596426370918053, "grad_norm": 0.040232930332422256, "learning_rate": 0.01, "loss": 1.9843, "step": 64236 }, { "epoch": 6.596734442390635, "grad_norm": 0.040378376841545105, "learning_rate": 0.01, "loss": 1.9749, "step": 64239 }, { "epoch": 6.597042513863216, "grad_norm": 0.036219045519828796, "learning_rate": 0.01, "loss": 1.9633, "step": 64242 }, { "epoch": 6.597350585335798, "grad_norm": 0.0671059712767601, "learning_rate": 0.01, "loss": 1.9738, "step": 64245 }, { "epoch": 6.597658656808379, "grad_norm": 0.05195783078670502, "learning_rate": 0.01, "loss": 1.9798, "step": 64248 }, { "epoch": 6.597966728280961, "grad_norm": 0.05707523226737976, "learning_rate": 0.01, "loss": 1.9654, "step": 64251 }, { "epoch": 6.5982747997535425, "grad_norm": 0.10035475343465805, "learning_rate": 0.01, "loss": 2.0062, "step": 64254 }, { "epoch": 6.5985828712261245, "grad_norm": 0.08237627893686295, "learning_rate": 0.01, "loss": 1.9667, "step": 64257 }, { "epoch": 6.598890942698706, "grad_norm": 0.06940212100744247, "learning_rate": 0.01, "loss": 1.9815, "step": 64260 }, { "epoch": 6.599199014171288, "grad_norm": 0.08880186080932617, "learning_rate": 0.01, "loss": 1.9756, "step": 64263 }, { "epoch": 6.59950708564387, "grad_norm": 0.07859902083873749, "learning_rate": 0.01, "loss": 1.9807, "step": 64266 }, { "epoch": 6.599815157116451, "grad_norm": 0.10869091004133224, "learning_rate": 0.01, "loss": 1.9805, "step": 64269 }, { "epoch": 6.600123228589033, "grad_norm": 0.09375559538602829, "learning_rate": 0.01, "loss": 1.9978, "step": 64272 }, { "epoch": 6.600431300061614, "grad_norm": 0.047256410121917725, "learning_rate": 0.01, "loss": 2.0044, "step": 64275 }, { "epoch": 6.600739371534196, "grad_norm": 0.04523398354649544, "learning_rate": 0.01, "loss": 1.9741, "step": 64278 }, { "epoch": 6.601047443006777, "grad_norm": 0.03981922194361687, "learning_rate": 0.01, "loss": 1.9791, "step": 64281 }, { "epoch": 6.601355514479359, "grad_norm": 0.05818277969956398, "learning_rate": 0.01, "loss": 2.002, "step": 64284 }, { "epoch": 6.601663585951941, "grad_norm": 0.04212537035346031, "learning_rate": 0.01, "loss": 1.9815, "step": 64287 }, { "epoch": 6.601971657424523, "grad_norm": 0.033235009759664536, "learning_rate": 0.01, "loss": 1.9896, "step": 64290 }, { "epoch": 6.602279728897104, "grad_norm": 0.0338917002081871, "learning_rate": 0.01, "loss": 1.9502, "step": 64293 }, { "epoch": 6.602587800369686, "grad_norm": 0.07970410585403442, "learning_rate": 0.01, "loss": 1.9692, "step": 64296 }, { "epoch": 6.602895871842268, "grad_norm": 0.05640435963869095, "learning_rate": 0.01, "loss": 2.0047, "step": 64299 }, { "epoch": 6.603203943314849, "grad_norm": 0.03606804460287094, "learning_rate": 0.01, "loss": 1.98, "step": 64302 }, { "epoch": 6.60351201478743, "grad_norm": 0.04679587855935097, "learning_rate": 0.01, "loss": 2.0004, "step": 64305 }, { "epoch": 6.603820086260012, "grad_norm": 0.10526128858327866, "learning_rate": 0.01, "loss": 1.9714, "step": 64308 }, { "epoch": 6.604128157732594, "grad_norm": 0.1200772076845169, "learning_rate": 0.01, "loss": 1.974, "step": 64311 }, { "epoch": 6.6044362292051755, "grad_norm": 0.1391524225473404, "learning_rate": 0.01, "loss": 1.965, "step": 64314 }, { "epoch": 6.6047443006777575, "grad_norm": 0.08838554471731186, "learning_rate": 0.01, "loss": 1.9816, "step": 64317 }, { "epoch": 6.605052372150339, "grad_norm": 0.07253210991621017, "learning_rate": 0.01, "loss": 2.0028, "step": 64320 }, { "epoch": 6.605360443622921, "grad_norm": 0.04417060315608978, "learning_rate": 0.01, "loss": 1.9559, "step": 64323 }, { "epoch": 6.605668515095502, "grad_norm": 0.04443688318133354, "learning_rate": 0.01, "loss": 1.9699, "step": 64326 }, { "epoch": 6.605976586568084, "grad_norm": 0.05711906775832176, "learning_rate": 0.01, "loss": 1.9709, "step": 64329 }, { "epoch": 6.606284658040665, "grad_norm": 0.05172707885503769, "learning_rate": 0.01, "loss": 1.9804, "step": 64332 }, { "epoch": 6.606592729513247, "grad_norm": 0.08967714011669159, "learning_rate": 0.01, "loss": 1.9843, "step": 64335 }, { "epoch": 6.606900800985828, "grad_norm": 0.03300314024090767, "learning_rate": 0.01, "loss": 1.9868, "step": 64338 }, { "epoch": 6.60720887245841, "grad_norm": 0.052630916237831116, "learning_rate": 0.01, "loss": 1.9787, "step": 64341 }, { "epoch": 6.607516943930992, "grad_norm": 0.041368499398231506, "learning_rate": 0.01, "loss": 1.9676, "step": 64344 }, { "epoch": 6.607825015403574, "grad_norm": 0.04549229145050049, "learning_rate": 0.01, "loss": 1.972, "step": 64347 }, { "epoch": 6.608133086876156, "grad_norm": 0.057475391775369644, "learning_rate": 0.01, "loss": 1.977, "step": 64350 }, { "epoch": 6.608441158348737, "grad_norm": 0.056770164519548416, "learning_rate": 0.01, "loss": 1.9915, "step": 64353 }, { "epoch": 6.608749229821319, "grad_norm": 0.12769392132759094, "learning_rate": 0.01, "loss": 1.9643, "step": 64356 }, { "epoch": 6.6090573012939, "grad_norm": 0.05759736895561218, "learning_rate": 0.01, "loss": 1.9962, "step": 64359 }, { "epoch": 6.609365372766482, "grad_norm": 0.10004235059022903, "learning_rate": 0.01, "loss": 1.9524, "step": 64362 }, { "epoch": 6.609673444239063, "grad_norm": 0.0365755669772625, "learning_rate": 0.01, "loss": 1.9676, "step": 64365 }, { "epoch": 6.609981515711645, "grad_norm": 0.07999947667121887, "learning_rate": 0.01, "loss": 1.9623, "step": 64368 }, { "epoch": 6.610289587184226, "grad_norm": 0.055819109082221985, "learning_rate": 0.01, "loss": 1.9727, "step": 64371 }, { "epoch": 6.6105976586568085, "grad_norm": 0.046389635652303696, "learning_rate": 0.01, "loss": 1.9857, "step": 64374 }, { "epoch": 6.61090573012939, "grad_norm": 0.04378519952297211, "learning_rate": 0.01, "loss": 1.9706, "step": 64377 }, { "epoch": 6.611213801601972, "grad_norm": 0.04519166797399521, "learning_rate": 0.01, "loss": 1.9777, "step": 64380 }, { "epoch": 6.611521873074553, "grad_norm": 0.037725985050201416, "learning_rate": 0.01, "loss": 1.9827, "step": 64383 }, { "epoch": 6.611829944547135, "grad_norm": 0.03793521970510483, "learning_rate": 0.01, "loss": 1.9545, "step": 64386 }, { "epoch": 6.612138016019717, "grad_norm": 0.04742049798369408, "learning_rate": 0.01, "loss": 1.9686, "step": 64389 }, { "epoch": 6.612446087492298, "grad_norm": 0.059588026255369186, "learning_rate": 0.01, "loss": 1.976, "step": 64392 }, { "epoch": 6.61275415896488, "grad_norm": 0.06134435534477234, "learning_rate": 0.01, "loss": 1.9881, "step": 64395 }, { "epoch": 6.613062230437461, "grad_norm": 0.11348873376846313, "learning_rate": 0.01, "loss": 1.9899, "step": 64398 }, { "epoch": 6.613370301910043, "grad_norm": 0.03893901780247688, "learning_rate": 0.01, "loss": 1.9708, "step": 64401 }, { "epoch": 6.6136783733826245, "grad_norm": 0.11874943226575851, "learning_rate": 0.01, "loss": 1.9656, "step": 64404 }, { "epoch": 6.613986444855207, "grad_norm": 0.08608514815568924, "learning_rate": 0.01, "loss": 1.9592, "step": 64407 }, { "epoch": 6.614294516327788, "grad_norm": 0.04932364821434021, "learning_rate": 0.01, "loss": 1.9503, "step": 64410 }, { "epoch": 6.61460258780037, "grad_norm": 0.04308156296610832, "learning_rate": 0.01, "loss": 1.9613, "step": 64413 }, { "epoch": 6.614910659272951, "grad_norm": 0.050084516406059265, "learning_rate": 0.01, "loss": 1.9996, "step": 64416 }, { "epoch": 6.615218730745533, "grad_norm": 0.054838646203279495, "learning_rate": 0.01, "loss": 1.9968, "step": 64419 }, { "epoch": 6.615526802218115, "grad_norm": 0.08248993754386902, "learning_rate": 0.01, "loss": 1.964, "step": 64422 }, { "epoch": 6.615834873690696, "grad_norm": 0.09142714738845825, "learning_rate": 0.01, "loss": 1.9613, "step": 64425 }, { "epoch": 6.616142945163277, "grad_norm": 0.07664018124341965, "learning_rate": 0.01, "loss": 1.9591, "step": 64428 }, { "epoch": 6.616451016635859, "grad_norm": 0.10052567720413208, "learning_rate": 0.01, "loss": 1.9717, "step": 64431 }, { "epoch": 6.6167590881084415, "grad_norm": 0.04676009342074394, "learning_rate": 0.01, "loss": 1.9909, "step": 64434 }, { "epoch": 6.617067159581023, "grad_norm": 0.05421235412359238, "learning_rate": 0.01, "loss": 1.9797, "step": 64437 }, { "epoch": 6.617375231053605, "grad_norm": 0.05721297487616539, "learning_rate": 0.01, "loss": 1.9847, "step": 64440 }, { "epoch": 6.617683302526186, "grad_norm": 0.05209130793809891, "learning_rate": 0.01, "loss": 2.0027, "step": 64443 }, { "epoch": 6.617991373998768, "grad_norm": 0.032787423580884933, "learning_rate": 0.01, "loss": 1.979, "step": 64446 }, { "epoch": 6.618299445471349, "grad_norm": 0.11715228855609894, "learning_rate": 0.01, "loss": 1.9863, "step": 64449 }, { "epoch": 6.618607516943931, "grad_norm": 0.044540733098983765, "learning_rate": 0.01, "loss": 1.9501, "step": 64452 }, { "epoch": 6.618915588416512, "grad_norm": 0.06670498847961426, "learning_rate": 0.01, "loss": 1.9556, "step": 64455 }, { "epoch": 6.619223659889094, "grad_norm": 0.09549476206302643, "learning_rate": 0.01, "loss": 1.9744, "step": 64458 }, { "epoch": 6.6195317313616755, "grad_norm": 0.06052359193563461, "learning_rate": 0.01, "loss": 1.9866, "step": 64461 }, { "epoch": 6.6198398028342575, "grad_norm": 0.04301074892282486, "learning_rate": 0.01, "loss": 1.9816, "step": 64464 }, { "epoch": 6.62014787430684, "grad_norm": 0.06434419006109238, "learning_rate": 0.01, "loss": 1.9689, "step": 64467 }, { "epoch": 6.620455945779421, "grad_norm": 0.051222655922174454, "learning_rate": 0.01, "loss": 1.9613, "step": 64470 }, { "epoch": 6.620764017252003, "grad_norm": 0.06115943565964699, "learning_rate": 0.01, "loss": 1.9729, "step": 64473 }, { "epoch": 6.621072088724584, "grad_norm": 0.07994543761014938, "learning_rate": 0.01, "loss": 1.9754, "step": 64476 }, { "epoch": 6.621380160197166, "grad_norm": 0.06953758746385574, "learning_rate": 0.01, "loss": 1.9682, "step": 64479 }, { "epoch": 6.621688231669747, "grad_norm": 0.12298289686441422, "learning_rate": 0.01, "loss": 1.9851, "step": 64482 }, { "epoch": 6.621996303142329, "grad_norm": 0.05092281848192215, "learning_rate": 0.01, "loss": 1.9941, "step": 64485 }, { "epoch": 6.62230437461491, "grad_norm": 0.043478354811668396, "learning_rate": 0.01, "loss": 1.9795, "step": 64488 }, { "epoch": 6.622612446087492, "grad_norm": 0.043610502034425735, "learning_rate": 0.01, "loss": 1.9741, "step": 64491 }, { "epoch": 6.622920517560074, "grad_norm": 0.052348487079143524, "learning_rate": 0.01, "loss": 2.0082, "step": 64494 }, { "epoch": 6.623228589032656, "grad_norm": 0.05436374992132187, "learning_rate": 0.01, "loss": 1.9554, "step": 64497 }, { "epoch": 6.623536660505238, "grad_norm": 0.057916752994060516, "learning_rate": 0.01, "loss": 1.9778, "step": 64500 }, { "epoch": 6.623844731977819, "grad_norm": 0.047713443636894226, "learning_rate": 0.01, "loss": 1.9707, "step": 64503 }, { "epoch": 6.6241528034504, "grad_norm": 0.03382120653986931, "learning_rate": 0.01, "loss": 1.9799, "step": 64506 }, { "epoch": 6.624460874922982, "grad_norm": 0.0990116074681282, "learning_rate": 0.01, "loss": 1.9705, "step": 64509 }, { "epoch": 6.624768946395564, "grad_norm": 0.0787537470459938, "learning_rate": 0.01, "loss": 1.988, "step": 64512 }, { "epoch": 6.625077017868145, "grad_norm": 0.07667285948991776, "learning_rate": 0.01, "loss": 1.9846, "step": 64515 }, { "epoch": 6.625385089340727, "grad_norm": 0.04837285354733467, "learning_rate": 0.01, "loss": 1.9926, "step": 64518 }, { "epoch": 6.6256931608133085, "grad_norm": 0.04143200442194939, "learning_rate": 0.01, "loss": 2.0021, "step": 64521 }, { "epoch": 6.6260012322858906, "grad_norm": 0.033114057034254074, "learning_rate": 0.01, "loss": 1.9956, "step": 64524 }, { "epoch": 6.626309303758472, "grad_norm": 0.0412205308675766, "learning_rate": 0.01, "loss": 1.9918, "step": 64527 }, { "epoch": 6.626617375231054, "grad_norm": 0.10429148375988007, "learning_rate": 0.01, "loss": 1.9965, "step": 64530 }, { "epoch": 6.626925446703635, "grad_norm": 0.05905142053961754, "learning_rate": 0.01, "loss": 1.9826, "step": 64533 }, { "epoch": 6.627233518176217, "grad_norm": 0.05937071144580841, "learning_rate": 0.01, "loss": 1.9721, "step": 64536 }, { "epoch": 6.627541589648798, "grad_norm": 0.10505139082670212, "learning_rate": 0.01, "loss": 1.9879, "step": 64539 }, { "epoch": 6.62784966112138, "grad_norm": 0.08412894606590271, "learning_rate": 0.01, "loss": 1.9716, "step": 64542 }, { "epoch": 6.628157732593962, "grad_norm": 0.07351752370595932, "learning_rate": 0.01, "loss": 1.9741, "step": 64545 }, { "epoch": 6.628465804066543, "grad_norm": 0.07745961844921112, "learning_rate": 0.01, "loss": 1.9912, "step": 64548 }, { "epoch": 6.6287738755391254, "grad_norm": 0.07289955765008926, "learning_rate": 0.01, "loss": 1.9546, "step": 64551 }, { "epoch": 6.629081947011707, "grad_norm": 0.04405929520726204, "learning_rate": 0.01, "loss": 1.9712, "step": 64554 }, { "epoch": 6.629390018484289, "grad_norm": 0.0912909060716629, "learning_rate": 0.01, "loss": 1.9768, "step": 64557 }, { "epoch": 6.62969808995687, "grad_norm": 0.042689189314842224, "learning_rate": 0.01, "loss": 1.9859, "step": 64560 }, { "epoch": 6.630006161429452, "grad_norm": 0.14025220274925232, "learning_rate": 0.01, "loss": 1.9644, "step": 64563 }, { "epoch": 6.630314232902033, "grad_norm": 0.1245136708021164, "learning_rate": 0.01, "loss": 1.9891, "step": 64566 }, { "epoch": 6.630622304374615, "grad_norm": 0.09421566873788834, "learning_rate": 0.01, "loss": 1.9583, "step": 64569 }, { "epoch": 6.630930375847196, "grad_norm": 0.0803411453962326, "learning_rate": 0.01, "loss": 1.9709, "step": 64572 }, { "epoch": 6.631238447319778, "grad_norm": 0.06703267991542816, "learning_rate": 0.01, "loss": 1.9602, "step": 64575 }, { "epoch": 6.6315465187923595, "grad_norm": 0.1008155420422554, "learning_rate": 0.01, "loss": 1.9158, "step": 64578 }, { "epoch": 6.6318545902649415, "grad_norm": 0.08870470523834229, "learning_rate": 0.01, "loss": 1.9768, "step": 64581 }, { "epoch": 6.632162661737523, "grad_norm": 0.06180182099342346, "learning_rate": 0.01, "loss": 1.992, "step": 64584 }, { "epoch": 6.632470733210105, "grad_norm": 0.06106216087937355, "learning_rate": 0.01, "loss": 1.9888, "step": 64587 }, { "epoch": 6.632778804682687, "grad_norm": 0.05340215936303139, "learning_rate": 0.01, "loss": 1.9667, "step": 64590 }, { "epoch": 6.633086876155268, "grad_norm": 0.04080619663000107, "learning_rate": 0.01, "loss": 1.97, "step": 64593 }, { "epoch": 6.63339494762785, "grad_norm": 0.031813837587833405, "learning_rate": 0.01, "loss": 1.9728, "step": 64596 }, { "epoch": 6.633703019100431, "grad_norm": 0.0589885450899601, "learning_rate": 0.01, "loss": 1.9692, "step": 64599 }, { "epoch": 6.634011090573013, "grad_norm": 0.14397123456001282, "learning_rate": 0.01, "loss": 1.9954, "step": 64602 }, { "epoch": 6.634319162045594, "grad_norm": 0.058227211236953735, "learning_rate": 0.01, "loss": 1.9922, "step": 64605 }, { "epoch": 6.634627233518176, "grad_norm": 0.05160403624176979, "learning_rate": 0.01, "loss": 1.953, "step": 64608 }, { "epoch": 6.634935304990758, "grad_norm": 0.052490971982479095, "learning_rate": 0.01, "loss": 1.9786, "step": 64611 }, { "epoch": 6.63524337646334, "grad_norm": 0.06203007325530052, "learning_rate": 0.01, "loss": 1.991, "step": 64614 }, { "epoch": 6.635551447935921, "grad_norm": 0.06790245324373245, "learning_rate": 0.01, "loss": 1.9649, "step": 64617 }, { "epoch": 6.635859519408503, "grad_norm": 0.0575333833694458, "learning_rate": 0.01, "loss": 1.9963, "step": 64620 }, { "epoch": 6.636167590881085, "grad_norm": 0.05253339931368828, "learning_rate": 0.01, "loss": 1.9631, "step": 64623 }, { "epoch": 6.636475662353666, "grad_norm": 0.0605839341878891, "learning_rate": 0.01, "loss": 1.9648, "step": 64626 }, { "epoch": 6.636783733826247, "grad_norm": 0.046164620667696, "learning_rate": 0.01, "loss": 1.9797, "step": 64629 }, { "epoch": 6.637091805298829, "grad_norm": 0.04549845680594444, "learning_rate": 0.01, "loss": 1.9647, "step": 64632 }, { "epoch": 6.637399876771411, "grad_norm": 0.04419662430882454, "learning_rate": 0.01, "loss": 1.9575, "step": 64635 }, { "epoch": 6.6377079482439925, "grad_norm": 0.04874144867062569, "learning_rate": 0.01, "loss": 1.9874, "step": 64638 }, { "epoch": 6.6380160197165745, "grad_norm": 0.06364091485738754, "learning_rate": 0.01, "loss": 1.9775, "step": 64641 }, { "epoch": 6.638324091189156, "grad_norm": 0.11188903450965881, "learning_rate": 0.01, "loss": 1.9876, "step": 64644 }, { "epoch": 6.638632162661738, "grad_norm": 0.03762582689523697, "learning_rate": 0.01, "loss": 1.97, "step": 64647 }, { "epoch": 6.638940234134319, "grad_norm": 0.03571620211005211, "learning_rate": 0.01, "loss": 1.975, "step": 64650 }, { "epoch": 6.639248305606901, "grad_norm": 0.05100245773792267, "learning_rate": 0.01, "loss": 1.9701, "step": 64653 }, { "epoch": 6.639556377079482, "grad_norm": 0.0773586705327034, "learning_rate": 0.01, "loss": 1.9905, "step": 64656 }, { "epoch": 6.639864448552064, "grad_norm": 0.056971583515405655, "learning_rate": 0.01, "loss": 1.9654, "step": 64659 }, { "epoch": 6.640172520024645, "grad_norm": 0.04947191849350929, "learning_rate": 0.01, "loss": 1.9586, "step": 64662 }, { "epoch": 6.640480591497227, "grad_norm": 0.039240311831235886, "learning_rate": 0.01, "loss": 2.0069, "step": 64665 }, { "epoch": 6.640788662969809, "grad_norm": 0.03866692632436752, "learning_rate": 0.01, "loss": 1.9856, "step": 64668 }, { "epoch": 6.641096734442391, "grad_norm": 0.05008673667907715, "learning_rate": 0.01, "loss": 1.9802, "step": 64671 }, { "epoch": 6.641404805914973, "grad_norm": 0.12828943133354187, "learning_rate": 0.01, "loss": 1.9751, "step": 64674 }, { "epoch": 6.641712877387554, "grad_norm": 0.04267043247818947, "learning_rate": 0.01, "loss": 1.9932, "step": 64677 }, { "epoch": 6.642020948860136, "grad_norm": 0.09645269066095352, "learning_rate": 0.01, "loss": 1.9822, "step": 64680 }, { "epoch": 6.642329020332717, "grad_norm": 0.053970299661159515, "learning_rate": 0.01, "loss": 1.9629, "step": 64683 }, { "epoch": 6.642637091805299, "grad_norm": 0.15396493673324585, "learning_rate": 0.01, "loss": 1.9827, "step": 64686 }, { "epoch": 6.64294516327788, "grad_norm": 0.06772658973932266, "learning_rate": 0.01, "loss": 1.9895, "step": 64689 }, { "epoch": 6.643253234750462, "grad_norm": 0.05688326433300972, "learning_rate": 0.01, "loss": 1.9633, "step": 64692 }, { "epoch": 6.643561306223043, "grad_norm": 0.05814405530691147, "learning_rate": 0.01, "loss": 1.9805, "step": 64695 }, { "epoch": 6.6438693776956255, "grad_norm": 0.033603183925151825, "learning_rate": 0.01, "loss": 1.9562, "step": 64698 }, { "epoch": 6.6441774491682075, "grad_norm": 0.06284506618976593, "learning_rate": 0.01, "loss": 2.0001, "step": 64701 }, { "epoch": 6.644485520640789, "grad_norm": 0.03463011607527733, "learning_rate": 0.01, "loss": 1.9749, "step": 64704 }, { "epoch": 6.64479359211337, "grad_norm": 0.039407879114151, "learning_rate": 0.01, "loss": 1.9792, "step": 64707 }, { "epoch": 6.645101663585952, "grad_norm": 0.034094471484422684, "learning_rate": 0.01, "loss": 1.9753, "step": 64710 }, { "epoch": 6.645409735058534, "grad_norm": 0.15402834117412567, "learning_rate": 0.01, "loss": 1.9939, "step": 64713 }, { "epoch": 6.645717806531115, "grad_norm": 0.055694807320833206, "learning_rate": 0.01, "loss": 1.9946, "step": 64716 }, { "epoch": 6.646025878003697, "grad_norm": 0.07708931714296341, "learning_rate": 0.01, "loss": 1.9773, "step": 64719 }, { "epoch": 6.646333949476278, "grad_norm": 0.07107258588075638, "learning_rate": 0.01, "loss": 1.9692, "step": 64722 }, { "epoch": 6.64664202094886, "grad_norm": 0.06758569926023483, "learning_rate": 0.01, "loss": 1.9544, "step": 64725 }, { "epoch": 6.6469500924214415, "grad_norm": 0.07093843817710876, "learning_rate": 0.01, "loss": 1.9627, "step": 64728 }, { "epoch": 6.647258163894024, "grad_norm": 0.06615183502435684, "learning_rate": 0.01, "loss": 1.9833, "step": 64731 }, { "epoch": 6.647566235366605, "grad_norm": 0.06096789985895157, "learning_rate": 0.01, "loss": 1.9968, "step": 64734 }, { "epoch": 6.647874306839187, "grad_norm": 0.03493250906467438, "learning_rate": 0.01, "loss": 1.9776, "step": 64737 }, { "epoch": 6.648182378311768, "grad_norm": 0.058653466403484344, "learning_rate": 0.01, "loss": 1.9707, "step": 64740 }, { "epoch": 6.64849044978435, "grad_norm": 0.1340709775686264, "learning_rate": 0.01, "loss": 1.9796, "step": 64743 }, { "epoch": 6.648798521256932, "grad_norm": 0.0948764905333519, "learning_rate": 0.01, "loss": 1.9954, "step": 64746 }, { "epoch": 6.649106592729513, "grad_norm": 0.03758051246404648, "learning_rate": 0.01, "loss": 1.9835, "step": 64749 }, { "epoch": 6.649414664202095, "grad_norm": 0.05022870749235153, "learning_rate": 0.01, "loss": 2.0035, "step": 64752 }, { "epoch": 6.649722735674676, "grad_norm": 0.04376206919550896, "learning_rate": 0.01, "loss": 1.9686, "step": 64755 }, { "epoch": 6.6500308071472585, "grad_norm": 0.05255085602402687, "learning_rate": 0.01, "loss": 1.9832, "step": 64758 }, { "epoch": 6.65033887861984, "grad_norm": 0.04554371163249016, "learning_rate": 0.01, "loss": 1.9882, "step": 64761 }, { "epoch": 6.650646950092422, "grad_norm": 0.046166516840457916, "learning_rate": 0.01, "loss": 1.9969, "step": 64764 }, { "epoch": 6.650955021565003, "grad_norm": 0.06942164152860641, "learning_rate": 0.01, "loss": 1.977, "step": 64767 }, { "epoch": 6.651263093037585, "grad_norm": 0.07894229888916016, "learning_rate": 0.01, "loss": 1.9941, "step": 64770 }, { "epoch": 6.651571164510166, "grad_norm": 0.10424064844846725, "learning_rate": 0.01, "loss": 1.9659, "step": 64773 }, { "epoch": 6.651879235982748, "grad_norm": 0.1305040717124939, "learning_rate": 0.01, "loss": 1.9598, "step": 64776 }, { "epoch": 6.652187307455329, "grad_norm": 0.0960015282034874, "learning_rate": 0.01, "loss": 1.9668, "step": 64779 }, { "epoch": 6.652495378927911, "grad_norm": 0.051948267966508865, "learning_rate": 0.01, "loss": 1.9784, "step": 64782 }, { "epoch": 6.6528034504004925, "grad_norm": 0.049625445157289505, "learning_rate": 0.01, "loss": 1.974, "step": 64785 }, { "epoch": 6.6531115218730745, "grad_norm": 0.03437644988298416, "learning_rate": 0.01, "loss": 1.9774, "step": 64788 }, { "epoch": 6.653419593345657, "grad_norm": 0.04177222028374672, "learning_rate": 0.01, "loss": 2.0032, "step": 64791 }, { "epoch": 6.653727664818238, "grad_norm": 0.12556210160255432, "learning_rate": 0.01, "loss": 1.9663, "step": 64794 }, { "epoch": 6.65403573629082, "grad_norm": 0.06504391878843307, "learning_rate": 0.01, "loss": 1.9859, "step": 64797 }, { "epoch": 6.654343807763401, "grad_norm": 0.0797589123249054, "learning_rate": 0.01, "loss": 2.0007, "step": 64800 }, { "epoch": 6.654651879235983, "grad_norm": 0.05979294329881668, "learning_rate": 0.01, "loss": 1.9808, "step": 64803 }, { "epoch": 6.654959950708564, "grad_norm": 0.07763978838920593, "learning_rate": 0.01, "loss": 1.9639, "step": 64806 }, { "epoch": 6.655268022181146, "grad_norm": 0.06913294643163681, "learning_rate": 0.01, "loss": 1.9651, "step": 64809 }, { "epoch": 6.655576093653727, "grad_norm": 0.0825156420469284, "learning_rate": 0.01, "loss": 1.9491, "step": 64812 }, { "epoch": 6.655884165126309, "grad_norm": 0.04779151454567909, "learning_rate": 0.01, "loss": 1.9637, "step": 64815 }, { "epoch": 6.656192236598891, "grad_norm": 0.09039946645498276, "learning_rate": 0.01, "loss": 1.979, "step": 64818 }, { "epoch": 6.656500308071473, "grad_norm": 0.03368096798658371, "learning_rate": 0.01, "loss": 1.9844, "step": 64821 }, { "epoch": 6.656808379544055, "grad_norm": 0.11677176505327225, "learning_rate": 0.01, "loss": 1.9897, "step": 64824 }, { "epoch": 6.657116451016636, "grad_norm": 0.09988972544670105, "learning_rate": 0.01, "loss": 1.9647, "step": 64827 }, { "epoch": 6.657424522489217, "grad_norm": 0.1377715915441513, "learning_rate": 0.01, "loss": 1.9832, "step": 64830 }, { "epoch": 6.657732593961799, "grad_norm": 0.04986416921019554, "learning_rate": 0.01, "loss": 1.9764, "step": 64833 }, { "epoch": 6.658040665434381, "grad_norm": 0.06681905686855316, "learning_rate": 0.01, "loss": 1.9867, "step": 64836 }, { "epoch": 6.658348736906962, "grad_norm": 0.05175319314002991, "learning_rate": 0.01, "loss": 1.9556, "step": 64839 }, { "epoch": 6.658656808379544, "grad_norm": 0.048531901091337204, "learning_rate": 0.01, "loss": 2.0054, "step": 64842 }, { "epoch": 6.6589648798521255, "grad_norm": 0.06149359792470932, "learning_rate": 0.01, "loss": 1.9804, "step": 64845 }, { "epoch": 6.6592729513247075, "grad_norm": 0.0352664515376091, "learning_rate": 0.01, "loss": 1.9821, "step": 64848 }, { "epoch": 6.659581022797289, "grad_norm": 0.0510052926838398, "learning_rate": 0.01, "loss": 1.9894, "step": 64851 }, { "epoch": 6.659889094269871, "grad_norm": 0.06455834954977036, "learning_rate": 0.01, "loss": 1.9664, "step": 64854 }, { "epoch": 6.660197165742452, "grad_norm": 0.13268320262432098, "learning_rate": 0.01, "loss": 1.9866, "step": 64857 }, { "epoch": 6.660505237215034, "grad_norm": 0.0314185805618763, "learning_rate": 0.01, "loss": 1.9935, "step": 64860 }, { "epoch": 6.660813308687615, "grad_norm": 0.06962891668081284, "learning_rate": 0.01, "loss": 1.9849, "step": 64863 }, { "epoch": 6.661121380160197, "grad_norm": 0.11769289523363113, "learning_rate": 0.01, "loss": 2.0032, "step": 64866 }, { "epoch": 6.661429451632779, "grad_norm": 0.03778669983148575, "learning_rate": 0.01, "loss": 1.9735, "step": 64869 }, { "epoch": 6.66173752310536, "grad_norm": 0.09534257650375366, "learning_rate": 0.01, "loss": 1.9734, "step": 64872 }, { "epoch": 6.662045594577942, "grad_norm": 0.03727418929338455, "learning_rate": 0.01, "loss": 1.9789, "step": 64875 }, { "epoch": 6.662353666050524, "grad_norm": 0.04617612436413765, "learning_rate": 0.01, "loss": 1.978, "step": 64878 }, { "epoch": 6.662661737523106, "grad_norm": 0.043121397495269775, "learning_rate": 0.01, "loss": 1.977, "step": 64881 }, { "epoch": 6.662969808995687, "grad_norm": 0.03990964964032173, "learning_rate": 0.01, "loss": 1.973, "step": 64884 }, { "epoch": 6.663277880468269, "grad_norm": 0.03709634020924568, "learning_rate": 0.01, "loss": 1.9594, "step": 64887 }, { "epoch": 6.66358595194085, "grad_norm": 0.042194269597530365, "learning_rate": 0.01, "loss": 1.9616, "step": 64890 }, { "epoch": 6.663894023413432, "grad_norm": 0.08765631169080734, "learning_rate": 0.01, "loss": 1.9964, "step": 64893 }, { "epoch": 6.664202094886013, "grad_norm": 0.10358686000108719, "learning_rate": 0.01, "loss": 1.9844, "step": 64896 }, { "epoch": 6.664510166358595, "grad_norm": 0.05577368661761284, "learning_rate": 0.01, "loss": 1.9848, "step": 64899 }, { "epoch": 6.664818237831177, "grad_norm": 0.07221733778715134, "learning_rate": 0.01, "loss": 1.9895, "step": 64902 }, { "epoch": 6.6651263093037585, "grad_norm": 0.08421463519334793, "learning_rate": 0.01, "loss": 1.9786, "step": 64905 }, { "epoch": 6.66543438077634, "grad_norm": 0.04022625833749771, "learning_rate": 0.01, "loss": 1.9654, "step": 64908 }, { "epoch": 6.665742452248922, "grad_norm": 0.10746564716100693, "learning_rate": 0.01, "loss": 1.9708, "step": 64911 }, { "epoch": 6.666050523721504, "grad_norm": 0.10090082883834839, "learning_rate": 0.01, "loss": 1.9674, "step": 64914 }, { "epoch": 6.666358595194085, "grad_norm": 0.03953251615166664, "learning_rate": 0.01, "loss": 1.9555, "step": 64917 }, { "epoch": 6.666666666666667, "grad_norm": 0.09518130123615265, "learning_rate": 0.01, "loss": 1.9642, "step": 64920 }, { "epoch": 6.666974738139248, "grad_norm": 0.06792232394218445, "learning_rate": 0.01, "loss": 1.9861, "step": 64923 }, { "epoch": 6.66728280961183, "grad_norm": 0.08679928630590439, "learning_rate": 0.01, "loss": 1.9761, "step": 64926 }, { "epoch": 6.667590881084411, "grad_norm": 0.07284796237945557, "learning_rate": 0.01, "loss": 1.9691, "step": 64929 }, { "epoch": 6.667898952556993, "grad_norm": 0.07962015271186829, "learning_rate": 0.01, "loss": 1.9834, "step": 64932 }, { "epoch": 6.6682070240295745, "grad_norm": 0.04905517399311066, "learning_rate": 0.01, "loss": 1.9889, "step": 64935 }, { "epoch": 6.668515095502157, "grad_norm": 0.1008431613445282, "learning_rate": 0.01, "loss": 1.9707, "step": 64938 }, { "epoch": 6.668823166974738, "grad_norm": 0.07995069026947021, "learning_rate": 0.01, "loss": 1.9803, "step": 64941 }, { "epoch": 6.66913123844732, "grad_norm": 0.08693604916334152, "learning_rate": 0.01, "loss": 1.9604, "step": 64944 }, { "epoch": 6.669439309919902, "grad_norm": 0.07200144976377487, "learning_rate": 0.01, "loss": 1.9782, "step": 64947 }, { "epoch": 6.669747381392483, "grad_norm": 0.10411632061004639, "learning_rate": 0.01, "loss": 1.9813, "step": 64950 }, { "epoch": 6.670055452865065, "grad_norm": 0.03982757031917572, "learning_rate": 0.01, "loss": 2.0043, "step": 64953 }, { "epoch": 6.670363524337646, "grad_norm": 0.03512009605765343, "learning_rate": 0.01, "loss": 1.9712, "step": 64956 }, { "epoch": 6.670671595810228, "grad_norm": 0.056927431374788284, "learning_rate": 0.01, "loss": 1.9744, "step": 64959 }, { "epoch": 6.6709796672828094, "grad_norm": 0.04020826891064644, "learning_rate": 0.01, "loss": 1.9408, "step": 64962 }, { "epoch": 6.6712877387553915, "grad_norm": 0.051895178854465485, "learning_rate": 0.01, "loss": 1.9598, "step": 64965 }, { "epoch": 6.671595810227973, "grad_norm": 0.1494154930114746, "learning_rate": 0.01, "loss": 1.9836, "step": 64968 }, { "epoch": 6.671903881700555, "grad_norm": 0.06913480907678604, "learning_rate": 0.01, "loss": 1.9997, "step": 64971 }, { "epoch": 6.672211953173136, "grad_norm": 0.0441024973988533, "learning_rate": 0.01, "loss": 1.9884, "step": 64974 }, { "epoch": 6.672520024645718, "grad_norm": 0.053763311356306076, "learning_rate": 0.01, "loss": 1.9948, "step": 64977 }, { "epoch": 6.672828096118299, "grad_norm": 0.08884178847074509, "learning_rate": 0.01, "loss": 1.9652, "step": 64980 }, { "epoch": 6.673136167590881, "grad_norm": 0.06425255537033081, "learning_rate": 0.01, "loss": 1.9631, "step": 64983 }, { "epoch": 6.673444239063462, "grad_norm": 0.07111632078886032, "learning_rate": 0.01, "loss": 1.9967, "step": 64986 }, { "epoch": 6.673752310536044, "grad_norm": 0.06380520761013031, "learning_rate": 0.01, "loss": 1.9768, "step": 64989 }, { "epoch": 6.674060382008626, "grad_norm": 0.06785110384225845, "learning_rate": 0.01, "loss": 1.9913, "step": 64992 }, { "epoch": 6.6743684534812076, "grad_norm": 0.13397693634033203, "learning_rate": 0.01, "loss": 1.9836, "step": 64995 }, { "epoch": 6.67467652495379, "grad_norm": 0.06838545948266983, "learning_rate": 0.01, "loss": 2.0009, "step": 64998 }, { "epoch": 6.674984596426371, "grad_norm": 0.05458493158221245, "learning_rate": 0.01, "loss": 2.0025, "step": 65001 }, { "epoch": 6.675292667898953, "grad_norm": 0.04406043142080307, "learning_rate": 0.01, "loss": 1.9596, "step": 65004 }, { "epoch": 6.675600739371534, "grad_norm": 0.03164122626185417, "learning_rate": 0.01, "loss": 1.9683, "step": 65007 }, { "epoch": 6.675908810844116, "grad_norm": 0.03512512892484665, "learning_rate": 0.01, "loss": 1.9773, "step": 65010 }, { "epoch": 6.676216882316697, "grad_norm": 0.03886500746011734, "learning_rate": 0.01, "loss": 1.9656, "step": 65013 }, { "epoch": 6.676524953789279, "grad_norm": 0.04235444217920303, "learning_rate": 0.01, "loss": 1.9672, "step": 65016 }, { "epoch": 6.67683302526186, "grad_norm": 0.11992576718330383, "learning_rate": 0.01, "loss": 1.9785, "step": 65019 }, { "epoch": 6.6771410967344424, "grad_norm": 0.07870931923389435, "learning_rate": 0.01, "loss": 1.9679, "step": 65022 }, { "epoch": 6.6774491682070245, "grad_norm": 0.11495670676231384, "learning_rate": 0.01, "loss": 1.9681, "step": 65025 }, { "epoch": 6.677757239679606, "grad_norm": 0.10441329330205917, "learning_rate": 0.01, "loss": 2.0027, "step": 65028 }, { "epoch": 6.678065311152187, "grad_norm": 0.08615846931934357, "learning_rate": 0.01, "loss": 1.9905, "step": 65031 }, { "epoch": 6.678373382624769, "grad_norm": 0.053645677864551544, "learning_rate": 0.01, "loss": 2.0039, "step": 65034 }, { "epoch": 6.678681454097351, "grad_norm": 0.0375412255525589, "learning_rate": 0.01, "loss": 1.9721, "step": 65037 }, { "epoch": 6.678989525569932, "grad_norm": 0.03644879162311554, "learning_rate": 0.01, "loss": 1.9553, "step": 65040 }, { "epoch": 6.679297597042514, "grad_norm": 0.09399533271789551, "learning_rate": 0.01, "loss": 1.9674, "step": 65043 }, { "epoch": 6.679605668515095, "grad_norm": 0.06882072985172272, "learning_rate": 0.01, "loss": 1.9863, "step": 65046 }, { "epoch": 6.679913739987677, "grad_norm": 0.10664650052785873, "learning_rate": 0.01, "loss": 1.9733, "step": 65049 }, { "epoch": 6.6802218114602585, "grad_norm": 0.044063057750463486, "learning_rate": 0.01, "loss": 1.9976, "step": 65052 }, { "epoch": 6.680529882932841, "grad_norm": 0.037052053958177567, "learning_rate": 0.01, "loss": 1.963, "step": 65055 }, { "epoch": 6.680837954405422, "grad_norm": 0.038574136793613434, "learning_rate": 0.01, "loss": 1.9605, "step": 65058 }, { "epoch": 6.681146025878004, "grad_norm": 0.06184682250022888, "learning_rate": 0.01, "loss": 1.9944, "step": 65061 }, { "epoch": 6.681454097350585, "grad_norm": 0.09441886097192764, "learning_rate": 0.01, "loss": 1.9866, "step": 65064 }, { "epoch": 6.681762168823167, "grad_norm": 0.07284927368164062, "learning_rate": 0.01, "loss": 1.9733, "step": 65067 }, { "epoch": 6.682070240295749, "grad_norm": 0.07610774785280228, "learning_rate": 0.01, "loss": 1.9756, "step": 65070 }, { "epoch": 6.68237831176833, "grad_norm": 0.04308845102787018, "learning_rate": 0.01, "loss": 1.9727, "step": 65073 }, { "epoch": 6.682686383240912, "grad_norm": 0.08398102223873138, "learning_rate": 0.01, "loss": 1.9889, "step": 65076 }, { "epoch": 6.682994454713493, "grad_norm": 0.11537694185972214, "learning_rate": 0.01, "loss": 1.9694, "step": 65079 }, { "epoch": 6.6833025261860755, "grad_norm": 0.03176436200737953, "learning_rate": 0.01, "loss": 1.9525, "step": 65082 }, { "epoch": 6.683610597658657, "grad_norm": 0.09571046382188797, "learning_rate": 0.01, "loss": 1.9763, "step": 65085 }, { "epoch": 6.683918669131239, "grad_norm": 0.04681390896439552, "learning_rate": 0.01, "loss": 1.9871, "step": 65088 }, { "epoch": 6.68422674060382, "grad_norm": 0.26577845215797424, "learning_rate": 0.01, "loss": 1.9925, "step": 65091 }, { "epoch": 6.684534812076402, "grad_norm": 0.12212441116571426, "learning_rate": 0.01, "loss": 1.9681, "step": 65094 }, { "epoch": 6.684842883548983, "grad_norm": 0.4320748448371887, "learning_rate": 0.01, "loss": 1.9581, "step": 65097 }, { "epoch": 6.685150955021565, "grad_norm": 0.04880547523498535, "learning_rate": 0.01, "loss": 1.9766, "step": 65100 }, { "epoch": 6.685459026494147, "grad_norm": 0.08876339346170425, "learning_rate": 0.01, "loss": 1.974, "step": 65103 }, { "epoch": 6.685767097966728, "grad_norm": 0.06511343270540237, "learning_rate": 0.01, "loss": 1.9877, "step": 65106 }, { "epoch": 6.6860751694393095, "grad_norm": 0.050562430173158646, "learning_rate": 0.01, "loss": 1.9807, "step": 65109 }, { "epoch": 6.6863832409118915, "grad_norm": 0.04717005044221878, "learning_rate": 0.01, "loss": 1.9602, "step": 65112 }, { "epoch": 6.686691312384474, "grad_norm": 0.08370763063430786, "learning_rate": 0.01, "loss": 1.9779, "step": 65115 }, { "epoch": 6.686999383857055, "grad_norm": 0.0477861724793911, "learning_rate": 0.01, "loss": 1.9572, "step": 65118 }, { "epoch": 6.687307455329637, "grad_norm": 0.02870395965874195, "learning_rate": 0.01, "loss": 2.0043, "step": 65121 }, { "epoch": 6.687615526802218, "grad_norm": 0.04413042590022087, "learning_rate": 0.01, "loss": 1.9689, "step": 65124 }, { "epoch": 6.6879235982748, "grad_norm": 0.041404690593481064, "learning_rate": 0.01, "loss": 1.9668, "step": 65127 }, { "epoch": 6.688231669747381, "grad_norm": 0.04159929230809212, "learning_rate": 0.01, "loss": 1.9783, "step": 65130 }, { "epoch": 6.688539741219963, "grad_norm": 0.06357663124799728, "learning_rate": 0.01, "loss": 1.9677, "step": 65133 }, { "epoch": 6.688847812692544, "grad_norm": 0.03610898181796074, "learning_rate": 0.01, "loss": 2.0024, "step": 65136 }, { "epoch": 6.689155884165126, "grad_norm": 0.038069941103458405, "learning_rate": 0.01, "loss": 1.9789, "step": 65139 }, { "epoch": 6.689463955637708, "grad_norm": 0.05543750151991844, "learning_rate": 0.01, "loss": 1.9695, "step": 65142 }, { "epoch": 6.68977202711029, "grad_norm": 0.10848159343004227, "learning_rate": 0.01, "loss": 1.9616, "step": 65145 }, { "epoch": 6.690080098582872, "grad_norm": 0.1446017026901245, "learning_rate": 0.01, "loss": 1.9915, "step": 65148 }, { "epoch": 6.690388170055453, "grad_norm": 0.10384636372327805, "learning_rate": 0.01, "loss": 1.9581, "step": 65151 }, { "epoch": 6.690696241528035, "grad_norm": 0.07247021794319153, "learning_rate": 0.01, "loss": 1.9817, "step": 65154 }, { "epoch": 6.691004313000616, "grad_norm": 0.04569610580801964, "learning_rate": 0.01, "loss": 1.9818, "step": 65157 }, { "epoch": 6.691312384473198, "grad_norm": 0.039695657789707184, "learning_rate": 0.01, "loss": 1.9897, "step": 65160 }, { "epoch": 6.691620455945779, "grad_norm": 0.053198885172605515, "learning_rate": 0.01, "loss": 1.998, "step": 65163 }, { "epoch": 6.691928527418361, "grad_norm": 0.042604029178619385, "learning_rate": 0.01, "loss": 1.9965, "step": 65166 }, { "epoch": 6.6922365988909425, "grad_norm": 0.031200909987092018, "learning_rate": 0.01, "loss": 1.9647, "step": 65169 }, { "epoch": 6.6925446703635245, "grad_norm": 0.10486329346895218, "learning_rate": 0.01, "loss": 1.9919, "step": 65172 }, { "epoch": 6.692852741836106, "grad_norm": 0.046825818717479706, "learning_rate": 0.01, "loss": 1.9795, "step": 65175 }, { "epoch": 6.693160813308688, "grad_norm": 0.1281237006187439, "learning_rate": 0.01, "loss": 2.0059, "step": 65178 }, { "epoch": 6.693468884781269, "grad_norm": 0.09412495791912079, "learning_rate": 0.01, "loss": 1.9904, "step": 65181 }, { "epoch": 6.693776956253851, "grad_norm": 0.08914454281330109, "learning_rate": 0.01, "loss": 1.9938, "step": 65184 }, { "epoch": 6.694085027726432, "grad_norm": 0.07769876718521118, "learning_rate": 0.01, "loss": 1.9673, "step": 65187 }, { "epoch": 6.694393099199014, "grad_norm": 0.10803245007991791, "learning_rate": 0.01, "loss": 1.9865, "step": 65190 }, { "epoch": 6.694701170671596, "grad_norm": 0.07634082436561584, "learning_rate": 0.01, "loss": 1.9459, "step": 65193 }, { "epoch": 6.695009242144177, "grad_norm": 0.033576395362615585, "learning_rate": 0.01, "loss": 1.9826, "step": 65196 }, { "epoch": 6.695317313616759, "grad_norm": 0.04042442888021469, "learning_rate": 0.01, "loss": 1.992, "step": 65199 }, { "epoch": 6.695625385089341, "grad_norm": 0.04007275775074959, "learning_rate": 0.01, "loss": 2.0092, "step": 65202 }, { "epoch": 6.695933456561923, "grad_norm": 0.036564283072948456, "learning_rate": 0.01, "loss": 1.9913, "step": 65205 }, { "epoch": 6.696241528034504, "grad_norm": 0.03179538995027542, "learning_rate": 0.01, "loss": 1.957, "step": 65208 }, { "epoch": 6.696549599507086, "grad_norm": 0.050880614668130875, "learning_rate": 0.01, "loss": 1.985, "step": 65211 }, { "epoch": 6.696857670979667, "grad_norm": 0.07803148031234741, "learning_rate": 0.01, "loss": 1.9987, "step": 65214 }, { "epoch": 6.697165742452249, "grad_norm": 0.11088874191045761, "learning_rate": 0.01, "loss": 1.9561, "step": 65217 }, { "epoch": 6.69747381392483, "grad_norm": 0.05180731415748596, "learning_rate": 0.01, "loss": 2.0025, "step": 65220 }, { "epoch": 6.697781885397412, "grad_norm": 0.09849876910448074, "learning_rate": 0.01, "loss": 1.9838, "step": 65223 }, { "epoch": 6.698089956869994, "grad_norm": 0.07928299903869629, "learning_rate": 0.01, "loss": 1.974, "step": 65226 }, { "epoch": 6.6983980283425755, "grad_norm": 0.058122437447309494, "learning_rate": 0.01, "loss": 1.9843, "step": 65229 }, { "epoch": 6.698706099815157, "grad_norm": 0.059113189578056335, "learning_rate": 0.01, "loss": 1.987, "step": 65232 }, { "epoch": 6.699014171287739, "grad_norm": 0.03366167098283768, "learning_rate": 0.01, "loss": 1.9818, "step": 65235 }, { "epoch": 6.699322242760321, "grad_norm": 0.036344975233078, "learning_rate": 0.01, "loss": 1.9468, "step": 65238 }, { "epoch": 6.699630314232902, "grad_norm": 0.10664808005094528, "learning_rate": 0.01, "loss": 1.9469, "step": 65241 }, { "epoch": 6.699938385705484, "grad_norm": 0.09889326244592667, "learning_rate": 0.01, "loss": 1.9851, "step": 65244 }, { "epoch": 6.700246457178065, "grad_norm": 0.05460183694958687, "learning_rate": 0.01, "loss": 2.0019, "step": 65247 }, { "epoch": 6.700554528650647, "grad_norm": 0.08168340474367142, "learning_rate": 0.01, "loss": 1.9788, "step": 65250 }, { "epoch": 6.700862600123228, "grad_norm": 0.0726526603102684, "learning_rate": 0.01, "loss": 1.9659, "step": 65253 }, { "epoch": 6.70117067159581, "grad_norm": 0.0469973124563694, "learning_rate": 0.01, "loss": 1.9772, "step": 65256 }, { "epoch": 6.7014787430683915, "grad_norm": 0.05402790755033493, "learning_rate": 0.01, "loss": 1.9723, "step": 65259 }, { "epoch": 6.701786814540974, "grad_norm": 0.03723495826125145, "learning_rate": 0.01, "loss": 1.9975, "step": 65262 }, { "epoch": 6.702094886013555, "grad_norm": 0.1069960966706276, "learning_rate": 0.01, "loss": 1.9526, "step": 65265 }, { "epoch": 6.702402957486137, "grad_norm": 0.0806579738855362, "learning_rate": 0.01, "loss": 1.98, "step": 65268 }, { "epoch": 6.702711028958719, "grad_norm": 0.05724218115210533, "learning_rate": 0.01, "loss": 2.001, "step": 65271 }, { "epoch": 6.7030191004313, "grad_norm": 0.10128419101238251, "learning_rate": 0.01, "loss": 1.9824, "step": 65274 }, { "epoch": 6.703327171903882, "grad_norm": 0.1476512998342514, "learning_rate": 0.01, "loss": 2.0008, "step": 65277 }, { "epoch": 6.703635243376463, "grad_norm": 0.10202914476394653, "learning_rate": 0.01, "loss": 1.9957, "step": 65280 }, { "epoch": 6.703943314849045, "grad_norm": 0.042887743562459946, "learning_rate": 0.01, "loss": 1.9819, "step": 65283 }, { "epoch": 6.704251386321626, "grad_norm": 0.0640069991350174, "learning_rate": 0.01, "loss": 1.9784, "step": 65286 }, { "epoch": 6.7045594577942085, "grad_norm": 0.042582917958498, "learning_rate": 0.01, "loss": 1.9819, "step": 65289 }, { "epoch": 6.70486752926679, "grad_norm": 0.0463312529027462, "learning_rate": 0.01, "loss": 1.9802, "step": 65292 }, { "epoch": 6.705175600739372, "grad_norm": 0.05490421503782272, "learning_rate": 0.01, "loss": 1.9697, "step": 65295 }, { "epoch": 6.705483672211953, "grad_norm": 0.07986756414175034, "learning_rate": 0.01, "loss": 1.9504, "step": 65298 }, { "epoch": 6.705791743684535, "grad_norm": 0.06604810804128647, "learning_rate": 0.01, "loss": 1.9782, "step": 65301 }, { "epoch": 6.706099815157117, "grad_norm": 0.036626990884542465, "learning_rate": 0.01, "loss": 1.9685, "step": 65304 }, { "epoch": 6.706407886629698, "grad_norm": 0.03825300931930542, "learning_rate": 0.01, "loss": 1.9751, "step": 65307 }, { "epoch": 6.706715958102279, "grad_norm": 0.07136386632919312, "learning_rate": 0.01, "loss": 1.9652, "step": 65310 }, { "epoch": 6.707024029574861, "grad_norm": 0.059436917304992676, "learning_rate": 0.01, "loss": 2.0015, "step": 65313 }, { "epoch": 6.707332101047443, "grad_norm": 0.06798838824033737, "learning_rate": 0.01, "loss": 1.9803, "step": 65316 }, { "epoch": 6.7076401725200245, "grad_norm": 0.05605984479188919, "learning_rate": 0.01, "loss": 2.0175, "step": 65319 }, { "epoch": 6.707948243992607, "grad_norm": 0.039418842643499374, "learning_rate": 0.01, "loss": 1.9744, "step": 65322 }, { "epoch": 6.708256315465188, "grad_norm": 0.037476614117622375, "learning_rate": 0.01, "loss": 1.985, "step": 65325 }, { "epoch": 6.70856438693777, "grad_norm": 0.03878027945756912, "learning_rate": 0.01, "loss": 1.9927, "step": 65328 }, { "epoch": 6.708872458410351, "grad_norm": 0.14439933001995087, "learning_rate": 0.01, "loss": 1.9748, "step": 65331 }, { "epoch": 6.709180529882933, "grad_norm": 0.1026519313454628, "learning_rate": 0.01, "loss": 1.9874, "step": 65334 }, { "epoch": 6.709488601355514, "grad_norm": 0.059400398284196854, "learning_rate": 0.01, "loss": 1.9556, "step": 65337 }, { "epoch": 6.709796672828096, "grad_norm": 0.03667070344090462, "learning_rate": 0.01, "loss": 1.9706, "step": 65340 }, { "epoch": 6.710104744300677, "grad_norm": 0.03821050748229027, "learning_rate": 0.01, "loss": 1.9647, "step": 65343 }, { "epoch": 6.710412815773259, "grad_norm": 0.034569595009088516, "learning_rate": 0.01, "loss": 1.9783, "step": 65346 }, { "epoch": 6.7107208872458415, "grad_norm": 0.0894671380519867, "learning_rate": 0.01, "loss": 1.979, "step": 65349 }, { "epoch": 6.711028958718423, "grad_norm": 0.1035616546869278, "learning_rate": 0.01, "loss": 1.9808, "step": 65352 }, { "epoch": 6.711337030191005, "grad_norm": 0.07528524845838547, "learning_rate": 0.01, "loss": 1.9708, "step": 65355 }, { "epoch": 6.711645101663586, "grad_norm": 0.073489710688591, "learning_rate": 0.01, "loss": 1.9892, "step": 65358 }, { "epoch": 6.711953173136168, "grad_norm": 0.047937504947185516, "learning_rate": 0.01, "loss": 1.9775, "step": 65361 }, { "epoch": 6.712261244608749, "grad_norm": 0.049860548228025436, "learning_rate": 0.01, "loss": 1.99, "step": 65364 }, { "epoch": 6.712569316081331, "grad_norm": 0.04179536923766136, "learning_rate": 0.01, "loss": 1.9644, "step": 65367 }, { "epoch": 6.712877387553912, "grad_norm": 0.03599520027637482, "learning_rate": 0.01, "loss": 1.9819, "step": 65370 }, { "epoch": 6.713185459026494, "grad_norm": 0.03743865713477135, "learning_rate": 0.01, "loss": 1.9791, "step": 65373 }, { "epoch": 6.7134935304990755, "grad_norm": 0.04530680924654007, "learning_rate": 0.01, "loss": 1.9728, "step": 65376 }, { "epoch": 6.7138016019716575, "grad_norm": 0.05956900492310524, "learning_rate": 0.01, "loss": 2.0093, "step": 65379 }, { "epoch": 6.714109673444239, "grad_norm": 0.03879319131374359, "learning_rate": 0.01, "loss": 1.9913, "step": 65382 }, { "epoch": 6.714417744916821, "grad_norm": 0.08729801326990128, "learning_rate": 0.01, "loss": 1.979, "step": 65385 }, { "epoch": 6.714725816389402, "grad_norm": 0.06801248341798782, "learning_rate": 0.01, "loss": 1.9922, "step": 65388 }, { "epoch": 6.715033887861984, "grad_norm": 0.10621631890535355, "learning_rate": 0.01, "loss": 1.9631, "step": 65391 }, { "epoch": 6.715341959334566, "grad_norm": 0.06591764837503433, "learning_rate": 0.01, "loss": 1.9941, "step": 65394 }, { "epoch": 6.715650030807147, "grad_norm": 0.08169190585613251, "learning_rate": 0.01, "loss": 1.9712, "step": 65397 }, { "epoch": 6.715958102279729, "grad_norm": 0.06591983139514923, "learning_rate": 0.01, "loss": 1.987, "step": 65400 }, { "epoch": 6.71626617375231, "grad_norm": 0.102086141705513, "learning_rate": 0.01, "loss": 1.962, "step": 65403 }, { "epoch": 6.716574245224892, "grad_norm": 0.13108035922050476, "learning_rate": 0.01, "loss": 1.9915, "step": 65406 }, { "epoch": 6.716882316697474, "grad_norm": 0.09894704073667526, "learning_rate": 0.01, "loss": 2.0025, "step": 65409 }, { "epoch": 6.717190388170056, "grad_norm": 0.05907077714800835, "learning_rate": 0.01, "loss": 1.9726, "step": 65412 }, { "epoch": 6.717498459642637, "grad_norm": 0.044414445757865906, "learning_rate": 0.01, "loss": 1.9734, "step": 65415 }, { "epoch": 6.717806531115219, "grad_norm": 0.04454610496759415, "learning_rate": 0.01, "loss": 1.947, "step": 65418 }, { "epoch": 6.7181146025878, "grad_norm": 0.044069502502679825, "learning_rate": 0.01, "loss": 1.9755, "step": 65421 }, { "epoch": 6.718422674060382, "grad_norm": 0.05330649018287659, "learning_rate": 0.01, "loss": 2.0213, "step": 65424 }, { "epoch": 6.718730745532964, "grad_norm": 0.13220176100730896, "learning_rate": 0.01, "loss": 1.9773, "step": 65427 }, { "epoch": 6.719038817005545, "grad_norm": 0.05000099912285805, "learning_rate": 0.01, "loss": 1.9834, "step": 65430 }, { "epoch": 6.7193468884781264, "grad_norm": 0.053126510232686996, "learning_rate": 0.01, "loss": 1.9683, "step": 65433 }, { "epoch": 6.7196549599507085, "grad_norm": 0.050792306661605835, "learning_rate": 0.01, "loss": 1.9877, "step": 65436 }, { "epoch": 6.7199630314232905, "grad_norm": 0.05356533080339432, "learning_rate": 0.01, "loss": 1.9913, "step": 65439 }, { "epoch": 6.720271102895872, "grad_norm": 0.03830750659108162, "learning_rate": 0.01, "loss": 1.974, "step": 65442 }, { "epoch": 6.720579174368454, "grad_norm": 0.0419539138674736, "learning_rate": 0.01, "loss": 1.9891, "step": 65445 }, { "epoch": 6.720887245841035, "grad_norm": 0.04633840546011925, "learning_rate": 0.01, "loss": 1.9725, "step": 65448 }, { "epoch": 6.721195317313617, "grad_norm": 0.048049312084913254, "learning_rate": 0.01, "loss": 1.978, "step": 65451 }, { "epoch": 6.721503388786198, "grad_norm": 0.031329698860645294, "learning_rate": 0.01, "loss": 1.9681, "step": 65454 }, { "epoch": 6.72181146025878, "grad_norm": 0.03230907768011093, "learning_rate": 0.01, "loss": 1.9974, "step": 65457 }, { "epoch": 6.722119531731361, "grad_norm": 0.09823235124349594, "learning_rate": 0.01, "loss": 1.9824, "step": 65460 }, { "epoch": 6.722427603203943, "grad_norm": 0.11478835344314575, "learning_rate": 0.01, "loss": 1.9782, "step": 65463 }, { "epoch": 6.7227356746765246, "grad_norm": 0.040770623832941055, "learning_rate": 0.01, "loss": 1.9768, "step": 65466 }, { "epoch": 6.723043746149107, "grad_norm": 0.11709921061992645, "learning_rate": 0.01, "loss": 1.9885, "step": 65469 }, { "epoch": 6.723351817621689, "grad_norm": 0.041359901428222656, "learning_rate": 0.01, "loss": 1.9778, "step": 65472 }, { "epoch": 6.72365988909427, "grad_norm": 0.03594958409667015, "learning_rate": 0.01, "loss": 1.9651, "step": 65475 }, { "epoch": 6.723967960566852, "grad_norm": 0.03344079852104187, "learning_rate": 0.01, "loss": 1.9872, "step": 65478 }, { "epoch": 6.724276032039433, "grad_norm": 0.03400930017232895, "learning_rate": 0.01, "loss": 1.9618, "step": 65481 }, { "epoch": 6.724584103512015, "grad_norm": 0.11725469678640366, "learning_rate": 0.01, "loss": 1.9798, "step": 65484 }, { "epoch": 6.724892174984596, "grad_norm": 0.08199802041053772, "learning_rate": 0.01, "loss": 1.9765, "step": 65487 }, { "epoch": 6.725200246457178, "grad_norm": 0.04940160736441612, "learning_rate": 0.01, "loss": 1.9546, "step": 65490 }, { "epoch": 6.7255083179297594, "grad_norm": 0.08786390721797943, "learning_rate": 0.01, "loss": 1.9616, "step": 65493 }, { "epoch": 6.7258163894023415, "grad_norm": 0.04642069712281227, "learning_rate": 0.01, "loss": 1.9623, "step": 65496 }, { "epoch": 6.726124460874923, "grad_norm": 0.0684671550989151, "learning_rate": 0.01, "loss": 1.9626, "step": 65499 }, { "epoch": 6.726432532347505, "grad_norm": 0.05767513066530228, "learning_rate": 0.01, "loss": 2.001, "step": 65502 }, { "epoch": 6.726740603820086, "grad_norm": 0.04825710505247116, "learning_rate": 0.01, "loss": 1.9883, "step": 65505 }, { "epoch": 6.727048675292668, "grad_norm": 0.08460171520709991, "learning_rate": 0.01, "loss": 1.9801, "step": 65508 }, { "epoch": 6.727356746765249, "grad_norm": 0.08406694233417511, "learning_rate": 0.01, "loss": 1.9708, "step": 65511 }, { "epoch": 6.727664818237831, "grad_norm": 0.09590575098991394, "learning_rate": 0.01, "loss": 1.9723, "step": 65514 }, { "epoch": 6.727972889710413, "grad_norm": 0.05631608888506889, "learning_rate": 0.01, "loss": 1.9611, "step": 65517 }, { "epoch": 6.728280961182994, "grad_norm": 0.04405519738793373, "learning_rate": 0.01, "loss": 1.973, "step": 65520 }, { "epoch": 6.728589032655576, "grad_norm": 0.04463804140686989, "learning_rate": 0.01, "loss": 1.9702, "step": 65523 }, { "epoch": 6.728897104128158, "grad_norm": 0.048253465443849564, "learning_rate": 0.01, "loss": 1.9753, "step": 65526 }, { "epoch": 6.72920517560074, "grad_norm": 0.058578286319971085, "learning_rate": 0.01, "loss": 1.9872, "step": 65529 }, { "epoch": 6.729513247073321, "grad_norm": 0.0791950449347496, "learning_rate": 0.01, "loss": 1.962, "step": 65532 }, { "epoch": 6.729821318545903, "grad_norm": 0.112459696829319, "learning_rate": 0.01, "loss": 1.9797, "step": 65535 }, { "epoch": 6.730129390018484, "grad_norm": 0.03688877448439598, "learning_rate": 0.01, "loss": 1.98, "step": 65538 }, { "epoch": 6.730437461491066, "grad_norm": 0.041237879544496536, "learning_rate": 0.01, "loss": 1.9844, "step": 65541 }, { "epoch": 6.730745532963647, "grad_norm": 0.09671095013618469, "learning_rate": 0.01, "loss": 1.9889, "step": 65544 }, { "epoch": 6.731053604436229, "grad_norm": 0.05881296843290329, "learning_rate": 0.01, "loss": 2.0005, "step": 65547 }, { "epoch": 6.731361675908811, "grad_norm": 0.07665944844484329, "learning_rate": 0.01, "loss": 1.9668, "step": 65550 }, { "epoch": 6.7316697473813925, "grad_norm": 0.12369941920042038, "learning_rate": 0.01, "loss": 1.9712, "step": 65553 }, { "epoch": 6.7319778188539745, "grad_norm": 0.03289101645350456, "learning_rate": 0.01, "loss": 1.972, "step": 65556 }, { "epoch": 6.732285890326556, "grad_norm": 0.07767914235591888, "learning_rate": 0.01, "loss": 1.9751, "step": 65559 }, { "epoch": 6.732593961799138, "grad_norm": 0.16727425158023834, "learning_rate": 0.01, "loss": 1.9728, "step": 65562 }, { "epoch": 6.732902033271719, "grad_norm": 0.13580100238323212, "learning_rate": 0.01, "loss": 1.9805, "step": 65565 }, { "epoch": 6.733210104744301, "grad_norm": 0.06935961544513702, "learning_rate": 0.01, "loss": 1.985, "step": 65568 }, { "epoch": 6.733518176216882, "grad_norm": 0.07026053220033646, "learning_rate": 0.01, "loss": 1.9791, "step": 65571 }, { "epoch": 6.733826247689464, "grad_norm": 0.08974307775497437, "learning_rate": 0.01, "loss": 1.9715, "step": 65574 }, { "epoch": 6.734134319162045, "grad_norm": 0.04675333574414253, "learning_rate": 0.01, "loss": 1.9894, "step": 65577 }, { "epoch": 6.734442390634627, "grad_norm": 0.04853864386677742, "learning_rate": 0.01, "loss": 1.9679, "step": 65580 }, { "epoch": 6.7347504621072085, "grad_norm": 0.03581572324037552, "learning_rate": 0.01, "loss": 1.947, "step": 65583 }, { "epoch": 6.735058533579791, "grad_norm": 0.03860647231340408, "learning_rate": 0.01, "loss": 2.0082, "step": 65586 }, { "epoch": 6.735366605052372, "grad_norm": 0.0573217049241066, "learning_rate": 0.01, "loss": 1.9692, "step": 65589 }, { "epoch": 6.735674676524954, "grad_norm": 0.09368444234132767, "learning_rate": 0.01, "loss": 1.9789, "step": 65592 }, { "epoch": 6.735982747997536, "grad_norm": 0.21956096589565277, "learning_rate": 0.01, "loss": 1.9777, "step": 65595 }, { "epoch": 6.736290819470117, "grad_norm": 0.12725447118282318, "learning_rate": 0.01, "loss": 1.9808, "step": 65598 }, { "epoch": 6.736598890942699, "grad_norm": 0.06952683627605438, "learning_rate": 0.01, "loss": 1.9499, "step": 65601 }, { "epoch": 6.73690696241528, "grad_norm": 0.03885588422417641, "learning_rate": 0.01, "loss": 1.9634, "step": 65604 }, { "epoch": 6.737215033887862, "grad_norm": 0.06260085850954056, "learning_rate": 0.01, "loss": 1.9751, "step": 65607 }, { "epoch": 6.737523105360443, "grad_norm": 0.045172885060310364, "learning_rate": 0.01, "loss": 1.9778, "step": 65610 }, { "epoch": 6.7378311768330255, "grad_norm": 0.05928365886211395, "learning_rate": 0.01, "loss": 1.9915, "step": 65613 }, { "epoch": 6.738139248305607, "grad_norm": 0.039838556200265884, "learning_rate": 0.01, "loss": 1.9747, "step": 65616 }, { "epoch": 6.738447319778189, "grad_norm": 0.035764627158641815, "learning_rate": 0.01, "loss": 1.9774, "step": 65619 }, { "epoch": 6.73875539125077, "grad_norm": 0.05831705033779144, "learning_rate": 0.01, "loss": 1.9647, "step": 65622 }, { "epoch": 6.739063462723352, "grad_norm": 0.07349859923124313, "learning_rate": 0.01, "loss": 1.9753, "step": 65625 }, { "epoch": 6.739371534195934, "grad_norm": 0.05156675726175308, "learning_rate": 0.01, "loss": 1.972, "step": 65628 }, { "epoch": 6.739679605668515, "grad_norm": 0.09530221670866013, "learning_rate": 0.01, "loss": 1.9801, "step": 65631 }, { "epoch": 6.739987677141096, "grad_norm": 0.04756055027246475, "learning_rate": 0.01, "loss": 1.9879, "step": 65634 }, { "epoch": 6.740295748613678, "grad_norm": 0.038086969405412674, "learning_rate": 0.01, "loss": 1.972, "step": 65637 }, { "epoch": 6.74060382008626, "grad_norm": 0.06435366719961166, "learning_rate": 0.01, "loss": 1.9785, "step": 65640 }, { "epoch": 6.7409118915588415, "grad_norm": 0.12931565940380096, "learning_rate": 0.01, "loss": 1.9875, "step": 65643 }, { "epoch": 6.741219963031424, "grad_norm": 0.05568262189626694, "learning_rate": 0.01, "loss": 1.9713, "step": 65646 }, { "epoch": 6.741528034504005, "grad_norm": 0.06151677295565605, "learning_rate": 0.01, "loss": 1.9735, "step": 65649 }, { "epoch": 6.741836105976587, "grad_norm": 0.05449217930436134, "learning_rate": 0.01, "loss": 2.0004, "step": 65652 }, { "epoch": 6.742144177449168, "grad_norm": 0.05131798982620239, "learning_rate": 0.01, "loss": 1.9909, "step": 65655 }, { "epoch": 6.74245224892175, "grad_norm": 0.04320504143834114, "learning_rate": 0.01, "loss": 1.9984, "step": 65658 }, { "epoch": 6.742760320394331, "grad_norm": 0.06800030171871185, "learning_rate": 0.01, "loss": 1.9785, "step": 65661 }, { "epoch": 6.743068391866913, "grad_norm": 0.04916686192154884, "learning_rate": 0.01, "loss": 1.9932, "step": 65664 }, { "epoch": 6.743376463339494, "grad_norm": 0.10272326320409775, "learning_rate": 0.01, "loss": 1.9765, "step": 65667 }, { "epoch": 6.743684534812076, "grad_norm": 0.03326984494924545, "learning_rate": 0.01, "loss": 1.9685, "step": 65670 }, { "epoch": 6.7439926062846585, "grad_norm": 0.03591003641486168, "learning_rate": 0.01, "loss": 1.9751, "step": 65673 }, { "epoch": 6.74430067775724, "grad_norm": 0.0768413096666336, "learning_rate": 0.01, "loss": 1.951, "step": 65676 }, { "epoch": 6.744608749229822, "grad_norm": 0.04805091395974159, "learning_rate": 0.01, "loss": 1.9654, "step": 65679 }, { "epoch": 6.744916820702403, "grad_norm": 0.060281701385974884, "learning_rate": 0.01, "loss": 1.9986, "step": 65682 }, { "epoch": 6.745224892174985, "grad_norm": 0.08178987354040146, "learning_rate": 0.01, "loss": 1.9693, "step": 65685 }, { "epoch": 6.745532963647566, "grad_norm": 0.047426290810108185, "learning_rate": 0.01, "loss": 1.9985, "step": 65688 }, { "epoch": 6.745841035120148, "grad_norm": 0.06165017560124397, "learning_rate": 0.01, "loss": 1.9793, "step": 65691 }, { "epoch": 6.746149106592729, "grad_norm": 0.0794510692358017, "learning_rate": 0.01, "loss": 1.9537, "step": 65694 }, { "epoch": 6.746457178065311, "grad_norm": 0.1278935670852661, "learning_rate": 0.01, "loss": 1.9794, "step": 65697 }, { "epoch": 6.7467652495378925, "grad_norm": 0.09933564066886902, "learning_rate": 0.01, "loss": 1.9877, "step": 65700 }, { "epoch": 6.7470733210104745, "grad_norm": 0.06581506133079529, "learning_rate": 0.01, "loss": 1.9882, "step": 65703 }, { "epoch": 6.747381392483056, "grad_norm": 0.07437628507614136, "learning_rate": 0.01, "loss": 1.9723, "step": 65706 }, { "epoch": 6.747689463955638, "grad_norm": 0.06629917025566101, "learning_rate": 0.01, "loss": 1.9722, "step": 65709 }, { "epoch": 6.747997535428219, "grad_norm": 0.04644669219851494, "learning_rate": 0.01, "loss": 1.9623, "step": 65712 }, { "epoch": 6.748305606900801, "grad_norm": 0.042347557842731476, "learning_rate": 0.01, "loss": 1.9564, "step": 65715 }, { "epoch": 6.748613678373383, "grad_norm": 0.05482964962720871, "learning_rate": 0.01, "loss": 1.957, "step": 65718 }, { "epoch": 6.748921749845964, "grad_norm": 0.07373082637786865, "learning_rate": 0.01, "loss": 1.9562, "step": 65721 }, { "epoch": 6.749229821318546, "grad_norm": 0.08168603479862213, "learning_rate": 0.01, "loss": 1.9857, "step": 65724 }, { "epoch": 6.749537892791127, "grad_norm": 0.09342950582504272, "learning_rate": 0.01, "loss": 1.9536, "step": 65727 }, { "epoch": 6.749845964263709, "grad_norm": 0.05923538655042648, "learning_rate": 0.01, "loss": 1.9711, "step": 65730 }, { "epoch": 6.750154035736291, "grad_norm": 0.09214897453784943, "learning_rate": 0.01, "loss": 1.9879, "step": 65733 }, { "epoch": 6.750462107208873, "grad_norm": 0.034419383853673935, "learning_rate": 0.01, "loss": 1.9925, "step": 65736 }, { "epoch": 6.750770178681454, "grad_norm": 0.05979537591338158, "learning_rate": 0.01, "loss": 1.9595, "step": 65739 }, { "epoch": 6.751078250154036, "grad_norm": 0.11106430739164352, "learning_rate": 0.01, "loss": 1.9711, "step": 65742 }, { "epoch": 6.751386321626617, "grad_norm": 0.04116882011294365, "learning_rate": 0.01, "loss": 1.9743, "step": 65745 }, { "epoch": 6.751694393099199, "grad_norm": 0.0603414922952652, "learning_rate": 0.01, "loss": 2.0005, "step": 65748 }, { "epoch": 6.752002464571781, "grad_norm": 0.08985871076583862, "learning_rate": 0.01, "loss": 1.989, "step": 65751 }, { "epoch": 6.752310536044362, "grad_norm": 0.058961328119039536, "learning_rate": 0.01, "loss": 2.0145, "step": 65754 }, { "epoch": 6.752618607516944, "grad_norm": 0.0800008699297905, "learning_rate": 0.01, "loss": 1.9734, "step": 65757 }, { "epoch": 6.7529266789895255, "grad_norm": 0.06763100624084473, "learning_rate": 0.01, "loss": 1.9666, "step": 65760 }, { "epoch": 6.7532347504621075, "grad_norm": 0.08112933486700058, "learning_rate": 0.01, "loss": 1.9635, "step": 65763 }, { "epoch": 6.753542821934689, "grad_norm": 0.03964497894048691, "learning_rate": 0.01, "loss": 1.9907, "step": 65766 }, { "epoch": 6.753850893407271, "grad_norm": 0.1308111995458603, "learning_rate": 0.01, "loss": 1.9561, "step": 65769 }, { "epoch": 6.754158964879852, "grad_norm": 0.09832756221294403, "learning_rate": 0.01, "loss": 1.9777, "step": 65772 }, { "epoch": 6.754467036352434, "grad_norm": 0.08136483281850815, "learning_rate": 0.01, "loss": 1.9727, "step": 65775 }, { "epoch": 6.754775107825015, "grad_norm": 0.0976012796163559, "learning_rate": 0.01, "loss": 1.9719, "step": 65778 }, { "epoch": 6.755083179297597, "grad_norm": 0.14576563239097595, "learning_rate": 0.01, "loss": 1.9803, "step": 65781 }, { "epoch": 6.755391250770178, "grad_norm": 0.09006581455469131, "learning_rate": 0.01, "loss": 1.9583, "step": 65784 }, { "epoch": 6.75569932224276, "grad_norm": 0.07345472276210785, "learning_rate": 0.01, "loss": 2.003, "step": 65787 }, { "epoch": 6.7560073937153415, "grad_norm": 0.06007661297917366, "learning_rate": 0.01, "loss": 1.9676, "step": 65790 }, { "epoch": 6.756315465187924, "grad_norm": 0.07360505312681198, "learning_rate": 0.01, "loss": 1.991, "step": 65793 }, { "epoch": 6.756623536660506, "grad_norm": 0.07598039507865906, "learning_rate": 0.01, "loss": 1.9945, "step": 65796 }, { "epoch": 6.756931608133087, "grad_norm": 0.09206894040107727, "learning_rate": 0.01, "loss": 1.986, "step": 65799 }, { "epoch": 6.757239679605669, "grad_norm": 0.0724322646856308, "learning_rate": 0.01, "loss": 2.0015, "step": 65802 }, { "epoch": 6.75754775107825, "grad_norm": 0.06573556363582611, "learning_rate": 0.01, "loss": 1.9694, "step": 65805 }, { "epoch": 6.757855822550832, "grad_norm": 0.04363197833299637, "learning_rate": 0.01, "loss": 1.9615, "step": 65808 }, { "epoch": 6.758163894023413, "grad_norm": 0.03666981682181358, "learning_rate": 0.01, "loss": 1.9787, "step": 65811 }, { "epoch": 6.758471965495995, "grad_norm": 0.05076993256807327, "learning_rate": 0.01, "loss": 1.9997, "step": 65814 }, { "epoch": 6.758780036968576, "grad_norm": 0.07672358304262161, "learning_rate": 0.01, "loss": 1.9793, "step": 65817 }, { "epoch": 6.7590881084411585, "grad_norm": 0.1728837937116623, "learning_rate": 0.01, "loss": 1.9722, "step": 65820 }, { "epoch": 6.75939617991374, "grad_norm": 0.14630280435085297, "learning_rate": 0.01, "loss": 1.9864, "step": 65823 }, { "epoch": 6.759704251386322, "grad_norm": 0.08959884941577911, "learning_rate": 0.01, "loss": 1.9648, "step": 65826 }, { "epoch": 6.760012322858904, "grad_norm": 0.05487535893917084, "learning_rate": 0.01, "loss": 2.0093, "step": 65829 }, { "epoch": 6.760320394331485, "grad_norm": 0.05097530409693718, "learning_rate": 0.01, "loss": 1.9804, "step": 65832 }, { "epoch": 6.760628465804066, "grad_norm": 0.04289190098643303, "learning_rate": 0.01, "loss": 1.9502, "step": 65835 }, { "epoch": 6.760936537276648, "grad_norm": 0.054402586072683334, "learning_rate": 0.01, "loss": 1.9824, "step": 65838 }, { "epoch": 6.76124460874923, "grad_norm": 0.046257637441158295, "learning_rate": 0.01, "loss": 1.9838, "step": 65841 }, { "epoch": 6.761552680221811, "grad_norm": 0.033798422664403915, "learning_rate": 0.01, "loss": 1.9841, "step": 65844 }, { "epoch": 6.761860751694393, "grad_norm": 0.03898288682103157, "learning_rate": 0.01, "loss": 1.964, "step": 65847 }, { "epoch": 6.7621688231669745, "grad_norm": 0.061388492584228516, "learning_rate": 0.01, "loss": 1.953, "step": 65850 }, { "epoch": 6.762476894639557, "grad_norm": 0.03913845494389534, "learning_rate": 0.01, "loss": 1.9564, "step": 65853 }, { "epoch": 6.762784966112138, "grad_norm": 0.06032055988907814, "learning_rate": 0.01, "loss": 1.9701, "step": 65856 }, { "epoch": 6.76309303758472, "grad_norm": 0.11745115369558334, "learning_rate": 0.01, "loss": 1.982, "step": 65859 }, { "epoch": 6.763401109057301, "grad_norm": 0.10399042069911957, "learning_rate": 0.01, "loss": 1.9666, "step": 65862 }, { "epoch": 6.763709180529883, "grad_norm": 0.06745419651269913, "learning_rate": 0.01, "loss": 1.9733, "step": 65865 }, { "epoch": 6.764017252002464, "grad_norm": 0.036637041717767715, "learning_rate": 0.01, "loss": 1.9632, "step": 65868 }, { "epoch": 6.764325323475046, "grad_norm": 0.03688213601708412, "learning_rate": 0.01, "loss": 1.9647, "step": 65871 }, { "epoch": 6.764633394947628, "grad_norm": 0.041792191565036774, "learning_rate": 0.01, "loss": 1.9368, "step": 65874 }, { "epoch": 6.764941466420209, "grad_norm": 0.03558392450213432, "learning_rate": 0.01, "loss": 1.9553, "step": 65877 }, { "epoch": 6.7652495378927915, "grad_norm": 0.037666697055101395, "learning_rate": 0.01, "loss": 1.9519, "step": 65880 }, { "epoch": 6.765557609365373, "grad_norm": 0.03376712277531624, "learning_rate": 0.01, "loss": 1.9706, "step": 65883 }, { "epoch": 6.765865680837955, "grad_norm": 0.09008733928203583, "learning_rate": 0.01, "loss": 1.9788, "step": 65886 }, { "epoch": 6.766173752310536, "grad_norm": 0.10737526416778564, "learning_rate": 0.01, "loss": 1.9893, "step": 65889 }, { "epoch": 6.766481823783118, "grad_norm": 0.12039290368556976, "learning_rate": 0.01, "loss": 1.9967, "step": 65892 }, { "epoch": 6.766789895255699, "grad_norm": 0.10525926202535629, "learning_rate": 0.01, "loss": 1.9466, "step": 65895 }, { "epoch": 6.767097966728281, "grad_norm": 0.11887960135936737, "learning_rate": 0.01, "loss": 1.9659, "step": 65898 }, { "epoch": 6.767406038200862, "grad_norm": 0.06034785136580467, "learning_rate": 0.01, "loss": 1.9726, "step": 65901 }, { "epoch": 6.767714109673444, "grad_norm": 0.04543771967291832, "learning_rate": 0.01, "loss": 1.9848, "step": 65904 }, { "epoch": 6.7680221811460255, "grad_norm": 0.04256156459450722, "learning_rate": 0.01, "loss": 1.9842, "step": 65907 }, { "epoch": 6.7683302526186075, "grad_norm": 0.04168063402175903, "learning_rate": 0.01, "loss": 1.9641, "step": 65910 }, { "epoch": 6.768638324091189, "grad_norm": 0.05196443200111389, "learning_rate": 0.01, "loss": 1.9854, "step": 65913 }, { "epoch": 6.768946395563771, "grad_norm": 0.03945057466626167, "learning_rate": 0.01, "loss": 1.9682, "step": 65916 }, { "epoch": 6.769254467036353, "grad_norm": 0.09867379069328308, "learning_rate": 0.01, "loss": 1.9873, "step": 65919 }, { "epoch": 6.769562538508934, "grad_norm": 0.03700730949640274, "learning_rate": 0.01, "loss": 1.9814, "step": 65922 }, { "epoch": 6.769870609981516, "grad_norm": 0.05173708498477936, "learning_rate": 0.01, "loss": 1.9516, "step": 65925 }, { "epoch": 6.770178681454097, "grad_norm": 0.04833926633000374, "learning_rate": 0.01, "loss": 1.9616, "step": 65928 }, { "epoch": 6.770486752926679, "grad_norm": 0.05311394855380058, "learning_rate": 0.01, "loss": 1.9876, "step": 65931 }, { "epoch": 6.77079482439926, "grad_norm": 0.13476112484931946, "learning_rate": 0.01, "loss": 1.9678, "step": 65934 }, { "epoch": 6.771102895871842, "grad_norm": 0.05324669927358627, "learning_rate": 0.01, "loss": 1.9839, "step": 65937 }, { "epoch": 6.771410967344424, "grad_norm": 0.050452303141355515, "learning_rate": 0.01, "loss": 1.9709, "step": 65940 }, { "epoch": 6.771719038817006, "grad_norm": 0.03241961449384689, "learning_rate": 0.01, "loss": 1.997, "step": 65943 }, { "epoch": 6.772027110289587, "grad_norm": 0.1082826629281044, "learning_rate": 0.01, "loss": 1.9622, "step": 65946 }, { "epoch": 6.772335181762169, "grad_norm": 0.08889704197645187, "learning_rate": 0.01, "loss": 1.9578, "step": 65949 }, { "epoch": 6.772643253234751, "grad_norm": 0.08153937757015228, "learning_rate": 0.01, "loss": 1.9395, "step": 65952 }, { "epoch": 6.772951324707332, "grad_norm": 0.07284180819988251, "learning_rate": 0.01, "loss": 1.9858, "step": 65955 }, { "epoch": 6.773259396179914, "grad_norm": 0.049150798469781876, "learning_rate": 0.01, "loss": 1.9699, "step": 65958 }, { "epoch": 6.773567467652495, "grad_norm": 0.10383135825395584, "learning_rate": 0.01, "loss": 1.9728, "step": 65961 }, { "epoch": 6.773875539125077, "grad_norm": 0.05065099522471428, "learning_rate": 0.01, "loss": 1.9868, "step": 65964 }, { "epoch": 6.7741836105976585, "grad_norm": 0.05559004843235016, "learning_rate": 0.01, "loss": 2.0035, "step": 65967 }, { "epoch": 6.7744916820702406, "grad_norm": 0.050398439168930054, "learning_rate": 0.01, "loss": 1.979, "step": 65970 }, { "epoch": 6.774799753542822, "grad_norm": 0.05198364332318306, "learning_rate": 0.01, "loss": 1.9815, "step": 65973 }, { "epoch": 6.775107825015404, "grad_norm": 0.06122811511158943, "learning_rate": 0.01, "loss": 1.9957, "step": 65976 }, { "epoch": 6.775415896487985, "grad_norm": 0.09441248327493668, "learning_rate": 0.01, "loss": 2.0144, "step": 65979 }, { "epoch": 6.775723967960567, "grad_norm": 0.06502705812454224, "learning_rate": 0.01, "loss": 1.9775, "step": 65982 }, { "epoch": 6.776032039433148, "grad_norm": 0.07678141444921494, "learning_rate": 0.01, "loss": 1.958, "step": 65985 }, { "epoch": 6.77634011090573, "grad_norm": 0.05069658160209656, "learning_rate": 0.01, "loss": 1.9911, "step": 65988 }, { "epoch": 6.776648182378311, "grad_norm": 0.11829594522714615, "learning_rate": 0.01, "loss": 1.9692, "step": 65991 }, { "epoch": 6.776956253850893, "grad_norm": 0.036905381828546524, "learning_rate": 0.01, "loss": 1.9611, "step": 65994 }, { "epoch": 6.7772643253234754, "grad_norm": 0.04046548530459404, "learning_rate": 0.01, "loss": 1.9668, "step": 65997 }, { "epoch": 6.777572396796057, "grad_norm": 0.04027654603123665, "learning_rate": 0.01, "loss": 1.9871, "step": 66000 }, { "epoch": 6.777880468268639, "grad_norm": 0.044706303626298904, "learning_rate": 0.01, "loss": 1.9772, "step": 66003 }, { "epoch": 6.77818853974122, "grad_norm": 0.0753062292933464, "learning_rate": 0.01, "loss": 2.001, "step": 66006 }, { "epoch": 6.778496611213802, "grad_norm": 0.10951106995344162, "learning_rate": 0.01, "loss": 1.9561, "step": 66009 }, { "epoch": 6.778804682686383, "grad_norm": 0.053536590188741684, "learning_rate": 0.01, "loss": 1.9702, "step": 66012 }, { "epoch": 6.779112754158965, "grad_norm": 0.060393862426280975, "learning_rate": 0.01, "loss": 1.9826, "step": 66015 }, { "epoch": 6.779420825631546, "grad_norm": 0.049091193825006485, "learning_rate": 0.01, "loss": 1.9661, "step": 66018 }, { "epoch": 6.779728897104128, "grad_norm": 0.0649518296122551, "learning_rate": 0.01, "loss": 1.9752, "step": 66021 }, { "epoch": 6.7800369685767095, "grad_norm": 0.039447683840990067, "learning_rate": 0.01, "loss": 1.9757, "step": 66024 }, { "epoch": 6.7803450400492915, "grad_norm": 0.04264757037162781, "learning_rate": 0.01, "loss": 1.9537, "step": 66027 }, { "epoch": 6.7806531115218736, "grad_norm": 0.06174059584736824, "learning_rate": 0.01, "loss": 1.981, "step": 66030 }, { "epoch": 6.780961182994455, "grad_norm": 0.05155632272362709, "learning_rate": 0.01, "loss": 1.9752, "step": 66033 }, { "epoch": 6.781269254467036, "grad_norm": 0.08351369947195053, "learning_rate": 0.01, "loss": 1.9641, "step": 66036 }, { "epoch": 6.781577325939618, "grad_norm": 0.09187694638967514, "learning_rate": 0.01, "loss": 1.9735, "step": 66039 }, { "epoch": 6.7818853974122, "grad_norm": 0.21938036382198334, "learning_rate": 0.01, "loss": 1.985, "step": 66042 }, { "epoch": 6.782193468884781, "grad_norm": 0.051359206438064575, "learning_rate": 0.01, "loss": 1.9821, "step": 66045 }, { "epoch": 6.782501540357363, "grad_norm": 0.08765391260385513, "learning_rate": 0.01, "loss": 1.9927, "step": 66048 }, { "epoch": 6.782809611829944, "grad_norm": 0.03909270092844963, "learning_rate": 0.01, "loss": 1.9739, "step": 66051 }, { "epoch": 6.783117683302526, "grad_norm": 0.06215377524495125, "learning_rate": 0.01, "loss": 1.9658, "step": 66054 }, { "epoch": 6.783425754775108, "grad_norm": 0.05099578574299812, "learning_rate": 0.01, "loss": 1.9605, "step": 66057 }, { "epoch": 6.78373382624769, "grad_norm": 0.037330977618694305, "learning_rate": 0.01, "loss": 1.9744, "step": 66060 }, { "epoch": 6.784041897720271, "grad_norm": 0.034515704959630966, "learning_rate": 0.01, "loss": 1.9836, "step": 66063 }, { "epoch": 6.784349969192853, "grad_norm": 0.07824211567640305, "learning_rate": 0.01, "loss": 1.9861, "step": 66066 }, { "epoch": 6.784658040665434, "grad_norm": 0.13664306700229645, "learning_rate": 0.01, "loss": 1.9825, "step": 66069 }, { "epoch": 6.784966112138016, "grad_norm": 0.11008740961551666, "learning_rate": 0.01, "loss": 1.9748, "step": 66072 }, { "epoch": 6.785274183610598, "grad_norm": 0.07172096520662308, "learning_rate": 0.01, "loss": 2.0085, "step": 66075 }, { "epoch": 6.785582255083179, "grad_norm": 0.08354049175977707, "learning_rate": 0.01, "loss": 1.9607, "step": 66078 }, { "epoch": 6.785890326555761, "grad_norm": 0.04367983713746071, "learning_rate": 0.01, "loss": 1.9804, "step": 66081 }, { "epoch": 6.7861983980283425, "grad_norm": 0.03163396567106247, "learning_rate": 0.01, "loss": 1.9793, "step": 66084 }, { "epoch": 6.7865064695009245, "grad_norm": 0.0645727813243866, "learning_rate": 0.01, "loss": 1.991, "step": 66087 }, { "epoch": 6.786814540973506, "grad_norm": 0.04862186312675476, "learning_rate": 0.01, "loss": 1.9681, "step": 66090 }, { "epoch": 6.787122612446088, "grad_norm": 0.035945162177085876, "learning_rate": 0.01, "loss": 1.9657, "step": 66093 }, { "epoch": 6.787430683918669, "grad_norm": 0.10152018815279007, "learning_rate": 0.01, "loss": 1.9673, "step": 66096 }, { "epoch": 6.787738755391251, "grad_norm": 0.047067925333976746, "learning_rate": 0.01, "loss": 1.965, "step": 66099 }, { "epoch": 6.788046826863832, "grad_norm": 0.0677446648478508, "learning_rate": 0.01, "loss": 1.9553, "step": 66102 }, { "epoch": 6.788354898336414, "grad_norm": 0.03530653938651085, "learning_rate": 0.01, "loss": 1.9848, "step": 66105 }, { "epoch": 6.788662969808995, "grad_norm": 0.04466121271252632, "learning_rate": 0.01, "loss": 1.9825, "step": 66108 }, { "epoch": 6.788971041281577, "grad_norm": 0.036109503358602524, "learning_rate": 0.01, "loss": 1.9883, "step": 66111 }, { "epoch": 6.7892791127541585, "grad_norm": 0.03432103618979454, "learning_rate": 0.01, "loss": 1.9818, "step": 66114 }, { "epoch": 6.789587184226741, "grad_norm": 0.06858955323696136, "learning_rate": 0.01, "loss": 1.9798, "step": 66117 }, { "epoch": 6.789895255699323, "grad_norm": 0.053574852645397186, "learning_rate": 0.01, "loss": 1.9778, "step": 66120 }, { "epoch": 6.790203327171904, "grad_norm": 0.0781317800283432, "learning_rate": 0.01, "loss": 1.9848, "step": 66123 }, { "epoch": 6.790511398644486, "grad_norm": 0.0939275249838829, "learning_rate": 0.01, "loss": 1.979, "step": 66126 }, { "epoch": 6.790819470117067, "grad_norm": 0.1132221594452858, "learning_rate": 0.01, "loss": 1.9867, "step": 66129 }, { "epoch": 6.791127541589649, "grad_norm": 0.07932759076356888, "learning_rate": 0.01, "loss": 1.9893, "step": 66132 }, { "epoch": 6.79143561306223, "grad_norm": 0.04189478978514671, "learning_rate": 0.01, "loss": 1.961, "step": 66135 }, { "epoch": 6.791743684534812, "grad_norm": 0.06015237420797348, "learning_rate": 0.01, "loss": 1.9555, "step": 66138 }, { "epoch": 6.792051756007393, "grad_norm": 0.05187036469578743, "learning_rate": 0.01, "loss": 2.0005, "step": 66141 }, { "epoch": 6.7923598274799755, "grad_norm": 0.07512509822845459, "learning_rate": 0.01, "loss": 1.9791, "step": 66144 }, { "epoch": 6.792667898952557, "grad_norm": 0.10434945672750473, "learning_rate": 0.01, "loss": 1.9922, "step": 66147 }, { "epoch": 6.792975970425139, "grad_norm": 0.07873903959989548, "learning_rate": 0.01, "loss": 1.981, "step": 66150 }, { "epoch": 6.793284041897721, "grad_norm": 0.04768504202365875, "learning_rate": 0.01, "loss": 1.9568, "step": 66153 }, { "epoch": 6.793592113370302, "grad_norm": 0.09988034516572952, "learning_rate": 0.01, "loss": 1.9806, "step": 66156 }, { "epoch": 6.793900184842883, "grad_norm": 0.07220810651779175, "learning_rate": 0.01, "loss": 1.9664, "step": 66159 }, { "epoch": 6.794208256315465, "grad_norm": 0.036455634981393814, "learning_rate": 0.01, "loss": 2.0003, "step": 66162 }, { "epoch": 6.794516327788047, "grad_norm": 0.04097495973110199, "learning_rate": 0.01, "loss": 1.9836, "step": 66165 }, { "epoch": 6.794824399260628, "grad_norm": 0.03743165358901024, "learning_rate": 0.01, "loss": 1.9423, "step": 66168 }, { "epoch": 6.79513247073321, "grad_norm": 0.14310762286186218, "learning_rate": 0.01, "loss": 2.0118, "step": 66171 }, { "epoch": 6.7954405422057915, "grad_norm": 0.067170649766922, "learning_rate": 0.01, "loss": 1.9556, "step": 66174 }, { "epoch": 6.795748613678374, "grad_norm": 0.04540235176682472, "learning_rate": 0.01, "loss": 1.9537, "step": 66177 }, { "epoch": 6.796056685150955, "grad_norm": 0.035935305058956146, "learning_rate": 0.01, "loss": 1.9703, "step": 66180 }, { "epoch": 6.796364756623537, "grad_norm": 0.10116109251976013, "learning_rate": 0.01, "loss": 1.994, "step": 66183 }, { "epoch": 6.796672828096118, "grad_norm": 0.09814930707216263, "learning_rate": 0.01, "loss": 1.96, "step": 66186 }, { "epoch": 6.7969808995687, "grad_norm": 0.04029448702931404, "learning_rate": 0.01, "loss": 1.9701, "step": 66189 }, { "epoch": 6.797288971041281, "grad_norm": 0.07453613728284836, "learning_rate": 0.01, "loss": 1.9623, "step": 66192 }, { "epoch": 6.797597042513863, "grad_norm": 0.07255508750677109, "learning_rate": 0.01, "loss": 1.9744, "step": 66195 }, { "epoch": 6.797905113986445, "grad_norm": 0.07982856780290604, "learning_rate": 0.01, "loss": 1.9942, "step": 66198 }, { "epoch": 6.798213185459026, "grad_norm": 0.09988022595643997, "learning_rate": 0.01, "loss": 1.972, "step": 66201 }, { "epoch": 6.7985212569316085, "grad_norm": 0.09353185445070267, "learning_rate": 0.01, "loss": 1.9625, "step": 66204 }, { "epoch": 6.79882932840419, "grad_norm": 0.04059029743075371, "learning_rate": 0.01, "loss": 1.9812, "step": 66207 }, { "epoch": 6.799137399876772, "grad_norm": 0.04064788296818733, "learning_rate": 0.01, "loss": 1.9616, "step": 66210 }, { "epoch": 6.799445471349353, "grad_norm": 0.052321575582027435, "learning_rate": 0.01, "loss": 1.9848, "step": 66213 }, { "epoch": 6.799753542821935, "grad_norm": 0.13621580600738525, "learning_rate": 0.01, "loss": 1.9807, "step": 66216 }, { "epoch": 6.800061614294516, "grad_norm": 0.04013410210609436, "learning_rate": 0.01, "loss": 1.9933, "step": 66219 }, { "epoch": 6.800369685767098, "grad_norm": 0.04535072669386864, "learning_rate": 0.01, "loss": 1.9999, "step": 66222 }, { "epoch": 6.800677757239679, "grad_norm": 0.05595749244093895, "learning_rate": 0.01, "loss": 1.9841, "step": 66225 }, { "epoch": 6.800985828712261, "grad_norm": 0.03648608550429344, "learning_rate": 0.01, "loss": 2.0068, "step": 66228 }, { "epoch": 6.801293900184843, "grad_norm": 0.039576612412929535, "learning_rate": 0.01, "loss": 1.9796, "step": 66231 }, { "epoch": 6.8016019716574245, "grad_norm": 0.0474928542971611, "learning_rate": 0.01, "loss": 1.9933, "step": 66234 }, { "epoch": 6.801910043130006, "grad_norm": 0.04102933779358864, "learning_rate": 0.01, "loss": 1.9754, "step": 66237 }, { "epoch": 6.802218114602588, "grad_norm": 0.03818250820040703, "learning_rate": 0.01, "loss": 1.9846, "step": 66240 }, { "epoch": 6.80252618607517, "grad_norm": 0.038981273770332336, "learning_rate": 0.01, "loss": 1.9789, "step": 66243 }, { "epoch": 6.802834257547751, "grad_norm": 0.12116497755050659, "learning_rate": 0.01, "loss": 1.9555, "step": 66246 }, { "epoch": 6.803142329020333, "grad_norm": 0.05318591371178627, "learning_rate": 0.01, "loss": 1.993, "step": 66249 }, { "epoch": 6.803450400492914, "grad_norm": 0.05970082804560661, "learning_rate": 0.01, "loss": 1.9809, "step": 66252 }, { "epoch": 6.803758471965496, "grad_norm": 0.06618655472993851, "learning_rate": 0.01, "loss": 1.9836, "step": 66255 }, { "epoch": 6.804066543438077, "grad_norm": 0.05062811076641083, "learning_rate": 0.01, "loss": 1.9729, "step": 66258 }, { "epoch": 6.804374614910659, "grad_norm": 0.03643089160323143, "learning_rate": 0.01, "loss": 1.9736, "step": 66261 }, { "epoch": 6.804682686383241, "grad_norm": 0.051748260855674744, "learning_rate": 0.01, "loss": 1.9959, "step": 66264 }, { "epoch": 6.804990757855823, "grad_norm": 0.029979856684803963, "learning_rate": 0.01, "loss": 1.9828, "step": 66267 }, { "epoch": 6.805298829328404, "grad_norm": 0.03129233047366142, "learning_rate": 0.01, "loss": 1.9617, "step": 66270 }, { "epoch": 6.805606900800986, "grad_norm": 0.06324151903390884, "learning_rate": 0.01, "loss": 1.9882, "step": 66273 }, { "epoch": 6.805914972273568, "grad_norm": 0.13628403842449188, "learning_rate": 0.01, "loss": 2.0176, "step": 66276 }, { "epoch": 6.806223043746149, "grad_norm": 0.05685223639011383, "learning_rate": 0.01, "loss": 1.9704, "step": 66279 }, { "epoch": 6.806531115218731, "grad_norm": 0.0537252239882946, "learning_rate": 0.01, "loss": 1.9616, "step": 66282 }, { "epoch": 6.806839186691312, "grad_norm": 0.11015389859676361, "learning_rate": 0.01, "loss": 1.9703, "step": 66285 }, { "epoch": 6.807147258163894, "grad_norm": 0.04667497053742409, "learning_rate": 0.01, "loss": 1.9714, "step": 66288 }, { "epoch": 6.8074553296364755, "grad_norm": 0.03570316731929779, "learning_rate": 0.01, "loss": 1.9779, "step": 66291 }, { "epoch": 6.8077634011090575, "grad_norm": 0.0808587595820427, "learning_rate": 0.01, "loss": 1.9526, "step": 66294 }, { "epoch": 6.808071472581639, "grad_norm": 0.10974206030368805, "learning_rate": 0.01, "loss": 1.9517, "step": 66297 }, { "epoch": 6.808379544054221, "grad_norm": 0.10623728483915329, "learning_rate": 0.01, "loss": 1.9976, "step": 66300 }, { "epoch": 6.808687615526802, "grad_norm": 0.07686350494623184, "learning_rate": 0.01, "loss": 1.9786, "step": 66303 }, { "epoch": 6.808995686999384, "grad_norm": 0.0518915168941021, "learning_rate": 0.01, "loss": 1.9741, "step": 66306 }, { "epoch": 6.809303758471965, "grad_norm": 0.05631876736879349, "learning_rate": 0.01, "loss": 1.9879, "step": 66309 }, { "epoch": 6.809611829944547, "grad_norm": 0.07675952464342117, "learning_rate": 0.01, "loss": 1.9706, "step": 66312 }, { "epoch": 6.809919901417128, "grad_norm": 0.05211762338876724, "learning_rate": 0.01, "loss": 1.9705, "step": 66315 }, { "epoch": 6.81022797288971, "grad_norm": 0.07859452068805695, "learning_rate": 0.01, "loss": 1.9845, "step": 66318 }, { "epoch": 6.810536044362292, "grad_norm": 0.05693339928984642, "learning_rate": 0.01, "loss": 1.9648, "step": 66321 }, { "epoch": 6.810844115834874, "grad_norm": 0.03533701226115227, "learning_rate": 0.01, "loss": 1.9809, "step": 66324 }, { "epoch": 6.811152187307456, "grad_norm": 0.10251706093549728, "learning_rate": 0.01, "loss": 1.9536, "step": 66327 }, { "epoch": 6.811460258780037, "grad_norm": 0.06078473851084709, "learning_rate": 0.01, "loss": 1.9835, "step": 66330 }, { "epoch": 6.811768330252619, "grad_norm": 0.0667276531457901, "learning_rate": 0.01, "loss": 1.9462, "step": 66333 }, { "epoch": 6.8120764017252, "grad_norm": 0.08299112319946289, "learning_rate": 0.01, "loss": 2.012, "step": 66336 }, { "epoch": 6.812384473197782, "grad_norm": 0.10200154036283493, "learning_rate": 0.01, "loss": 1.9656, "step": 66339 }, { "epoch": 6.812692544670363, "grad_norm": 0.06807447969913483, "learning_rate": 0.01, "loss": 1.9781, "step": 66342 }, { "epoch": 6.813000616142945, "grad_norm": 0.12666809558868408, "learning_rate": 0.01, "loss": 1.9803, "step": 66345 }, { "epoch": 6.813308687615526, "grad_norm": 0.07474800199270248, "learning_rate": 0.01, "loss": 1.9846, "step": 66348 }, { "epoch": 6.8136167590881085, "grad_norm": 0.045581359416246414, "learning_rate": 0.01, "loss": 1.9718, "step": 66351 }, { "epoch": 6.8139248305606905, "grad_norm": 0.03591908887028694, "learning_rate": 0.01, "loss": 1.9882, "step": 66354 }, { "epoch": 6.814232902033272, "grad_norm": 0.0317840613424778, "learning_rate": 0.01, "loss": 1.9799, "step": 66357 }, { "epoch": 6.814540973505853, "grad_norm": 0.12169979512691498, "learning_rate": 0.01, "loss": 1.987, "step": 66360 }, { "epoch": 6.814849044978435, "grad_norm": 0.159201517701149, "learning_rate": 0.01, "loss": 1.9776, "step": 66363 }, { "epoch": 6.815157116451017, "grad_norm": 0.0912163183093071, "learning_rate": 0.01, "loss": 1.9631, "step": 66366 }, { "epoch": 6.815465187923598, "grad_norm": 0.05623574182391167, "learning_rate": 0.01, "loss": 1.9889, "step": 66369 }, { "epoch": 6.81577325939618, "grad_norm": 0.0320771262049675, "learning_rate": 0.01, "loss": 1.9731, "step": 66372 }, { "epoch": 6.816081330868761, "grad_norm": 0.033067572861909866, "learning_rate": 0.01, "loss": 2.0206, "step": 66375 }, { "epoch": 6.816389402341343, "grad_norm": 0.043655119836330414, "learning_rate": 0.01, "loss": 1.9751, "step": 66378 }, { "epoch": 6.8166974738139245, "grad_norm": 0.03852735459804535, "learning_rate": 0.01, "loss": 1.9829, "step": 66381 }, { "epoch": 6.817005545286507, "grad_norm": 0.06447228044271469, "learning_rate": 0.01, "loss": 1.9728, "step": 66384 }, { "epoch": 6.817313616759088, "grad_norm": 0.1129051074385643, "learning_rate": 0.01, "loss": 2.0049, "step": 66387 }, { "epoch": 6.81762168823167, "grad_norm": 0.09710655361413956, "learning_rate": 0.01, "loss": 1.9459, "step": 66390 }, { "epoch": 6.817929759704251, "grad_norm": 0.13679730892181396, "learning_rate": 0.01, "loss": 1.9943, "step": 66393 }, { "epoch": 6.818237831176833, "grad_norm": 0.07610119134187698, "learning_rate": 0.01, "loss": 1.9796, "step": 66396 }, { "epoch": 6.818545902649415, "grad_norm": 0.06364092975854874, "learning_rate": 0.01, "loss": 1.9905, "step": 66399 }, { "epoch": 6.818853974121996, "grad_norm": 0.06607314199209213, "learning_rate": 0.01, "loss": 1.9489, "step": 66402 }, { "epoch": 6.819162045594578, "grad_norm": 0.06625553220510483, "learning_rate": 0.01, "loss": 1.9667, "step": 66405 }, { "epoch": 6.819470117067159, "grad_norm": 0.052945904433727264, "learning_rate": 0.01, "loss": 1.9705, "step": 66408 }, { "epoch": 6.8197781885397415, "grad_norm": 0.0977964773774147, "learning_rate": 0.01, "loss": 1.9751, "step": 66411 }, { "epoch": 6.820086260012323, "grad_norm": 0.09723694622516632, "learning_rate": 0.01, "loss": 1.9607, "step": 66414 }, { "epoch": 6.820394331484905, "grad_norm": 0.05858434736728668, "learning_rate": 0.01, "loss": 1.9873, "step": 66417 }, { "epoch": 6.820702402957486, "grad_norm": 0.09421772509813309, "learning_rate": 0.01, "loss": 1.9741, "step": 66420 }, { "epoch": 6.821010474430068, "grad_norm": 0.040733322501182556, "learning_rate": 0.01, "loss": 1.9752, "step": 66423 }, { "epoch": 6.821318545902649, "grad_norm": 0.0649617463350296, "learning_rate": 0.01, "loss": 1.9717, "step": 66426 }, { "epoch": 6.821626617375231, "grad_norm": 0.10224436968564987, "learning_rate": 0.01, "loss": 2.0167, "step": 66429 }, { "epoch": 6.821934688847813, "grad_norm": 0.07691013067960739, "learning_rate": 0.01, "loss": 1.977, "step": 66432 }, { "epoch": 6.822242760320394, "grad_norm": 0.1012914851307869, "learning_rate": 0.01, "loss": 1.985, "step": 66435 }, { "epoch": 6.8225508317929755, "grad_norm": 0.04432602971792221, "learning_rate": 0.01, "loss": 1.9678, "step": 66438 }, { "epoch": 6.8228589032655576, "grad_norm": 0.0863911360502243, "learning_rate": 0.01, "loss": 1.9949, "step": 66441 }, { "epoch": 6.82316697473814, "grad_norm": 0.0747685432434082, "learning_rate": 0.01, "loss": 1.9806, "step": 66444 }, { "epoch": 6.823475046210721, "grad_norm": 0.04725578799843788, "learning_rate": 0.01, "loss": 1.9782, "step": 66447 }, { "epoch": 6.823783117683303, "grad_norm": 0.1259828805923462, "learning_rate": 0.01, "loss": 1.984, "step": 66450 }, { "epoch": 6.824091189155884, "grad_norm": 0.05945250764489174, "learning_rate": 0.01, "loss": 1.9617, "step": 66453 }, { "epoch": 6.824399260628466, "grad_norm": 0.03467526659369469, "learning_rate": 0.01, "loss": 1.9694, "step": 66456 }, { "epoch": 6.824707332101047, "grad_norm": 0.04494983330368996, "learning_rate": 0.01, "loss": 1.9891, "step": 66459 }, { "epoch": 6.825015403573629, "grad_norm": 0.042065590620040894, "learning_rate": 0.01, "loss": 2.0022, "step": 66462 }, { "epoch": 6.82532347504621, "grad_norm": 0.1278277486562729, "learning_rate": 0.01, "loss": 1.9866, "step": 66465 }, { "epoch": 6.8256315465187924, "grad_norm": 0.04210862144827843, "learning_rate": 0.01, "loss": 1.9512, "step": 66468 }, { "epoch": 6.825939617991374, "grad_norm": 0.10816308856010437, "learning_rate": 0.01, "loss": 1.9699, "step": 66471 }, { "epoch": 6.826247689463956, "grad_norm": 0.04535592719912529, "learning_rate": 0.01, "loss": 2.0036, "step": 66474 }, { "epoch": 6.826555760936538, "grad_norm": 0.06771806627511978, "learning_rate": 0.01, "loss": 1.9786, "step": 66477 }, { "epoch": 6.826863832409119, "grad_norm": 0.13082286715507507, "learning_rate": 0.01, "loss": 1.9638, "step": 66480 }, { "epoch": 6.827171903881701, "grad_norm": 0.06389307230710983, "learning_rate": 0.01, "loss": 1.9555, "step": 66483 }, { "epoch": 6.827479975354282, "grad_norm": 0.04055839404463768, "learning_rate": 0.01, "loss": 1.9751, "step": 66486 }, { "epoch": 6.827788046826864, "grad_norm": 0.03493233397603035, "learning_rate": 0.01, "loss": 1.9821, "step": 66489 }, { "epoch": 6.828096118299445, "grad_norm": 0.03460715711116791, "learning_rate": 0.01, "loss": 1.9932, "step": 66492 }, { "epoch": 6.828404189772027, "grad_norm": 0.04448126628994942, "learning_rate": 0.01, "loss": 1.9681, "step": 66495 }, { "epoch": 6.8287122612446085, "grad_norm": 0.14231470227241516, "learning_rate": 0.01, "loss": 1.9878, "step": 66498 }, { "epoch": 6.8290203327171906, "grad_norm": 0.13960252702236176, "learning_rate": 0.01, "loss": 2.0018, "step": 66501 }, { "epoch": 6.829328404189772, "grad_norm": 0.053078074008226395, "learning_rate": 0.01, "loss": 1.9801, "step": 66504 }, { "epoch": 6.829636475662354, "grad_norm": 0.048882726579904556, "learning_rate": 0.01, "loss": 1.9818, "step": 66507 }, { "epoch": 6.829944547134935, "grad_norm": 0.052697841078042984, "learning_rate": 0.01, "loss": 1.9751, "step": 66510 }, { "epoch": 6.830252618607517, "grad_norm": 0.07299689203500748, "learning_rate": 0.01, "loss": 1.9554, "step": 66513 }, { "epoch": 6.830560690080098, "grad_norm": 0.06163341924548149, "learning_rate": 0.01, "loss": 1.9859, "step": 66516 }, { "epoch": 6.83086876155268, "grad_norm": 0.11083027720451355, "learning_rate": 0.01, "loss": 2.0029, "step": 66519 }, { "epoch": 6.831176833025262, "grad_norm": 0.07942959666252136, "learning_rate": 0.01, "loss": 1.9785, "step": 66522 }, { "epoch": 6.831484904497843, "grad_norm": 0.048363711684942245, "learning_rate": 0.01, "loss": 1.9661, "step": 66525 }, { "epoch": 6.8317929759704255, "grad_norm": 0.03769001364707947, "learning_rate": 0.01, "loss": 1.9449, "step": 66528 }, { "epoch": 6.832101047443007, "grad_norm": 0.03747798129916191, "learning_rate": 0.01, "loss": 1.9727, "step": 66531 }, { "epoch": 6.832409118915589, "grad_norm": 0.05700261518359184, "learning_rate": 0.01, "loss": 1.9723, "step": 66534 }, { "epoch": 6.83271719038817, "grad_norm": 0.08550623804330826, "learning_rate": 0.01, "loss": 2.0129, "step": 66537 }, { "epoch": 6.833025261860752, "grad_norm": 0.05213450267910957, "learning_rate": 0.01, "loss": 1.9825, "step": 66540 }, { "epoch": 6.833333333333333, "grad_norm": 0.12226033955812454, "learning_rate": 0.01, "loss": 1.9628, "step": 66543 }, { "epoch": 6.833641404805915, "grad_norm": 0.06343870609998703, "learning_rate": 0.01, "loss": 1.9572, "step": 66546 }, { "epoch": 6.833949476278496, "grad_norm": 0.06879778206348419, "learning_rate": 0.01, "loss": 1.9975, "step": 66549 }, { "epoch": 6.834257547751078, "grad_norm": 0.04970945045351982, "learning_rate": 0.01, "loss": 1.979, "step": 66552 }, { "epoch": 6.83456561922366, "grad_norm": 0.04559726640582085, "learning_rate": 0.01, "loss": 1.9718, "step": 66555 }, { "epoch": 6.8348736906962415, "grad_norm": 0.08180249482393265, "learning_rate": 0.01, "loss": 1.9704, "step": 66558 }, { "epoch": 6.835181762168823, "grad_norm": 0.044129811227321625, "learning_rate": 0.01, "loss": 1.9591, "step": 66561 }, { "epoch": 6.835489833641405, "grad_norm": 0.038416411727666855, "learning_rate": 0.01, "loss": 1.9519, "step": 66564 }, { "epoch": 6.835797905113987, "grad_norm": 0.06428057700395584, "learning_rate": 0.01, "loss": 1.9855, "step": 66567 }, { "epoch": 6.836105976586568, "grad_norm": 0.07858021557331085, "learning_rate": 0.01, "loss": 1.9836, "step": 66570 }, { "epoch": 6.83641404805915, "grad_norm": 0.07772648334503174, "learning_rate": 0.01, "loss": 1.9543, "step": 66573 }, { "epoch": 6.836722119531731, "grad_norm": 0.03566915541887283, "learning_rate": 0.01, "loss": 1.9563, "step": 66576 }, { "epoch": 6.837030191004313, "grad_norm": 0.04193243384361267, "learning_rate": 0.01, "loss": 1.9472, "step": 66579 }, { "epoch": 6.837338262476894, "grad_norm": 0.058167651295661926, "learning_rate": 0.01, "loss": 1.9685, "step": 66582 }, { "epoch": 6.837646333949476, "grad_norm": 0.12235833704471588, "learning_rate": 0.01, "loss": 1.951, "step": 66585 }, { "epoch": 6.837954405422058, "grad_norm": 0.052113085985183716, "learning_rate": 0.01, "loss": 1.9835, "step": 66588 }, { "epoch": 6.83826247689464, "grad_norm": 0.0758160799741745, "learning_rate": 0.01, "loss": 1.9813, "step": 66591 }, { "epoch": 6.838570548367221, "grad_norm": 0.05963435024023056, "learning_rate": 0.01, "loss": 1.9687, "step": 66594 }, { "epoch": 6.838878619839803, "grad_norm": 0.040900785475969315, "learning_rate": 0.01, "loss": 1.9922, "step": 66597 }, { "epoch": 6.839186691312385, "grad_norm": 0.042031388729810715, "learning_rate": 0.01, "loss": 1.9826, "step": 66600 }, { "epoch": 6.839494762784966, "grad_norm": 0.056323789060115814, "learning_rate": 0.01, "loss": 1.9528, "step": 66603 }, { "epoch": 6.839802834257548, "grad_norm": 0.07060620188713074, "learning_rate": 0.01, "loss": 1.9808, "step": 66606 }, { "epoch": 6.840110905730129, "grad_norm": 0.05406404659152031, "learning_rate": 0.01, "loss": 1.9635, "step": 66609 }, { "epoch": 6.840418977202711, "grad_norm": 0.06324061751365662, "learning_rate": 0.01, "loss": 1.9947, "step": 66612 }, { "epoch": 6.8407270486752925, "grad_norm": 0.04893625155091286, "learning_rate": 0.01, "loss": 2.0007, "step": 66615 }, { "epoch": 6.8410351201478745, "grad_norm": 0.062072839587926865, "learning_rate": 0.01, "loss": 1.9802, "step": 66618 }, { "epoch": 6.841343191620456, "grad_norm": 0.12954573333263397, "learning_rate": 0.01, "loss": 1.9708, "step": 66621 }, { "epoch": 6.841651263093038, "grad_norm": 0.06360910087823868, "learning_rate": 0.01, "loss": 1.9877, "step": 66624 }, { "epoch": 6.841959334565619, "grad_norm": 0.0654262900352478, "learning_rate": 0.01, "loss": 1.9869, "step": 66627 }, { "epoch": 6.842267406038201, "grad_norm": 0.12571407854557037, "learning_rate": 0.01, "loss": 1.9901, "step": 66630 }, { "epoch": 6.842575477510783, "grad_norm": 0.09581385552883148, "learning_rate": 0.01, "loss": 1.9594, "step": 66633 }, { "epoch": 6.842883548983364, "grad_norm": 0.08289510756731033, "learning_rate": 0.01, "loss": 1.9766, "step": 66636 }, { "epoch": 6.843191620455945, "grad_norm": 0.043533485382795334, "learning_rate": 0.01, "loss": 1.9793, "step": 66639 }, { "epoch": 6.843499691928527, "grad_norm": 0.04672224447131157, "learning_rate": 0.01, "loss": 1.9816, "step": 66642 }, { "epoch": 6.843807763401109, "grad_norm": 0.03264494985342026, "learning_rate": 0.01, "loss": 1.9976, "step": 66645 }, { "epoch": 6.844115834873691, "grad_norm": 0.10711422562599182, "learning_rate": 0.01, "loss": 1.9952, "step": 66648 }, { "epoch": 6.844423906346273, "grad_norm": 0.08725102990865707, "learning_rate": 0.01, "loss": 1.9728, "step": 66651 }, { "epoch": 6.844731977818854, "grad_norm": 0.03802330791950226, "learning_rate": 0.01, "loss": 1.9667, "step": 66654 }, { "epoch": 6.845040049291436, "grad_norm": 0.0540679395198822, "learning_rate": 0.01, "loss": 1.9887, "step": 66657 }, { "epoch": 6.845348120764017, "grad_norm": 0.10413230210542679, "learning_rate": 0.01, "loss": 1.9451, "step": 66660 }, { "epoch": 6.845656192236599, "grad_norm": 0.07961270213127136, "learning_rate": 0.01, "loss": 1.9462, "step": 66663 }, { "epoch": 6.84596426370918, "grad_norm": 0.06600063294172287, "learning_rate": 0.01, "loss": 1.9298, "step": 66666 }, { "epoch": 6.846272335181762, "grad_norm": 0.10893017053604126, "learning_rate": 0.01, "loss": 2.0068, "step": 66669 }, { "epoch": 6.846580406654343, "grad_norm": 0.07856502383947372, "learning_rate": 0.01, "loss": 1.9899, "step": 66672 }, { "epoch": 6.8468884781269255, "grad_norm": 0.06285273283720016, "learning_rate": 0.01, "loss": 1.9413, "step": 66675 }, { "epoch": 6.8471965495995075, "grad_norm": 0.07361593842506409, "learning_rate": 0.01, "loss": 1.9881, "step": 66678 }, { "epoch": 6.847504621072089, "grad_norm": 0.07996716350317001, "learning_rate": 0.01, "loss": 1.985, "step": 66681 }, { "epoch": 6.847812692544671, "grad_norm": 0.05737863853573799, "learning_rate": 0.01, "loss": 1.9702, "step": 66684 }, { "epoch": 6.848120764017252, "grad_norm": 0.05021853744983673, "learning_rate": 0.01, "loss": 1.9744, "step": 66687 }, { "epoch": 6.848428835489834, "grad_norm": 0.033845316618680954, "learning_rate": 0.01, "loss": 1.9695, "step": 66690 }, { "epoch": 6.848736906962415, "grad_norm": 0.10472722351551056, "learning_rate": 0.01, "loss": 1.973, "step": 66693 }, { "epoch": 6.849044978434997, "grad_norm": 0.07523515820503235, "learning_rate": 0.01, "loss": 1.9599, "step": 66696 }, { "epoch": 6.849353049907578, "grad_norm": 0.08648163080215454, "learning_rate": 0.01, "loss": 1.9795, "step": 66699 }, { "epoch": 6.84966112138016, "grad_norm": 0.05042044818401337, "learning_rate": 0.01, "loss": 1.9494, "step": 66702 }, { "epoch": 6.8499691928527415, "grad_norm": 0.04480702057480812, "learning_rate": 0.01, "loss": 1.9699, "step": 66705 }, { "epoch": 6.850277264325324, "grad_norm": 0.07494408637285233, "learning_rate": 0.01, "loss": 1.9765, "step": 66708 }, { "epoch": 6.850585335797905, "grad_norm": 0.061382777988910675, "learning_rate": 0.01, "loss": 1.9695, "step": 66711 }, { "epoch": 6.850893407270487, "grad_norm": 0.04542558267712593, "learning_rate": 0.01, "loss": 1.9928, "step": 66714 }, { "epoch": 6.851201478743068, "grad_norm": 0.039626192301511765, "learning_rate": 0.01, "loss": 1.9384, "step": 66717 }, { "epoch": 6.85150955021565, "grad_norm": 0.04274001717567444, "learning_rate": 0.01, "loss": 1.9762, "step": 66720 }, { "epoch": 6.851817621688232, "grad_norm": 0.16487199068069458, "learning_rate": 0.01, "loss": 1.9808, "step": 66723 }, { "epoch": 6.852125693160813, "grad_norm": 0.04666552320122719, "learning_rate": 0.01, "loss": 1.9914, "step": 66726 }, { "epoch": 6.852433764633395, "grad_norm": 0.04287717863917351, "learning_rate": 0.01, "loss": 1.9981, "step": 66729 }, { "epoch": 6.852741836105976, "grad_norm": 0.04304524138569832, "learning_rate": 0.01, "loss": 1.961, "step": 66732 }, { "epoch": 6.8530499075785585, "grad_norm": 0.06935624033212662, "learning_rate": 0.01, "loss": 1.9709, "step": 66735 }, { "epoch": 6.85335797905114, "grad_norm": 0.06261073797941208, "learning_rate": 0.01, "loss": 1.9558, "step": 66738 }, { "epoch": 6.853666050523722, "grad_norm": 0.07173455506563187, "learning_rate": 0.01, "loss": 1.9756, "step": 66741 }, { "epoch": 6.853974121996303, "grad_norm": 0.08861742913722992, "learning_rate": 0.01, "loss": 1.991, "step": 66744 }, { "epoch": 6.854282193468885, "grad_norm": 0.05243033543229103, "learning_rate": 0.01, "loss": 1.9649, "step": 66747 }, { "epoch": 6.854590264941466, "grad_norm": 0.03281958028674126, "learning_rate": 0.01, "loss": 1.9917, "step": 66750 }, { "epoch": 6.854898336414048, "grad_norm": 0.10798471421003342, "learning_rate": 0.01, "loss": 2.0004, "step": 66753 }, { "epoch": 6.85520640788663, "grad_norm": 0.10174170881509781, "learning_rate": 0.01, "loss": 1.9616, "step": 66756 }, { "epoch": 6.855514479359211, "grad_norm": 0.03573278710246086, "learning_rate": 0.01, "loss": 1.9906, "step": 66759 }, { "epoch": 6.8558225508317925, "grad_norm": 0.12209411710500717, "learning_rate": 0.01, "loss": 1.9913, "step": 66762 }, { "epoch": 6.8561306223043745, "grad_norm": 0.06323892623186111, "learning_rate": 0.01, "loss": 1.9882, "step": 66765 }, { "epoch": 6.856438693776957, "grad_norm": 0.03758067637681961, "learning_rate": 0.01, "loss": 1.9725, "step": 66768 }, { "epoch": 6.856746765249538, "grad_norm": 0.0766826942563057, "learning_rate": 0.01, "loss": 1.974, "step": 66771 }, { "epoch": 6.85705483672212, "grad_norm": 0.08893147110939026, "learning_rate": 0.01, "loss": 1.9959, "step": 66774 }, { "epoch": 6.857362908194701, "grad_norm": 0.06650066375732422, "learning_rate": 0.01, "loss": 1.9714, "step": 66777 }, { "epoch": 6.857670979667283, "grad_norm": 0.04757289960980415, "learning_rate": 0.01, "loss": 1.9913, "step": 66780 }, { "epoch": 6.857979051139864, "grad_norm": 0.0641031265258789, "learning_rate": 0.01, "loss": 1.9593, "step": 66783 }, { "epoch": 6.858287122612446, "grad_norm": 0.04178297147154808, "learning_rate": 0.01, "loss": 1.9613, "step": 66786 }, { "epoch": 6.858595194085027, "grad_norm": 0.042551252990961075, "learning_rate": 0.01, "loss": 2.0181, "step": 66789 }, { "epoch": 6.858903265557609, "grad_norm": 0.05980202183127403, "learning_rate": 0.01, "loss": 1.9742, "step": 66792 }, { "epoch": 6.859211337030191, "grad_norm": 0.11070612818002701, "learning_rate": 0.01, "loss": 1.953, "step": 66795 }, { "epoch": 6.859519408502773, "grad_norm": 0.03422870859503746, "learning_rate": 0.01, "loss": 1.9798, "step": 66798 }, { "epoch": 6.859827479975355, "grad_norm": 0.04661295935511589, "learning_rate": 0.01, "loss": 1.9874, "step": 66801 }, { "epoch": 6.860135551447936, "grad_norm": 0.04450797662138939, "learning_rate": 0.01, "loss": 2.0017, "step": 66804 }, { "epoch": 6.860443622920518, "grad_norm": 0.03987280651926994, "learning_rate": 0.01, "loss": 1.9817, "step": 66807 }, { "epoch": 6.860751694393099, "grad_norm": 0.0804753229022026, "learning_rate": 0.01, "loss": 1.9825, "step": 66810 }, { "epoch": 6.861059765865681, "grad_norm": 0.09321844577789307, "learning_rate": 0.01, "loss": 1.9841, "step": 66813 }, { "epoch": 6.861367837338262, "grad_norm": 0.128792867064476, "learning_rate": 0.01, "loss": 1.9654, "step": 66816 }, { "epoch": 6.861675908810844, "grad_norm": 0.0815480500459671, "learning_rate": 0.01, "loss": 1.9951, "step": 66819 }, { "epoch": 6.8619839802834255, "grad_norm": 0.047995518893003464, "learning_rate": 0.01, "loss": 1.9672, "step": 66822 }, { "epoch": 6.8622920517560075, "grad_norm": 0.03637409210205078, "learning_rate": 0.01, "loss": 1.9617, "step": 66825 }, { "epoch": 6.862600123228589, "grad_norm": 0.03809746354818344, "learning_rate": 0.01, "loss": 1.978, "step": 66828 }, { "epoch": 6.862908194701171, "grad_norm": 0.03449690714478493, "learning_rate": 0.01, "loss": 1.9781, "step": 66831 }, { "epoch": 6.863216266173753, "grad_norm": 0.09826438128948212, "learning_rate": 0.01, "loss": 1.9755, "step": 66834 }, { "epoch": 6.863524337646334, "grad_norm": 0.09669643640518188, "learning_rate": 0.01, "loss": 1.9931, "step": 66837 }, { "epoch": 6.863832409118915, "grad_norm": 0.03975251317024231, "learning_rate": 0.01, "loss": 1.9816, "step": 66840 }, { "epoch": 6.864140480591497, "grad_norm": 0.045158423483371735, "learning_rate": 0.01, "loss": 1.9833, "step": 66843 }, { "epoch": 6.864448552064079, "grad_norm": 0.08310145139694214, "learning_rate": 0.01, "loss": 1.9594, "step": 66846 }, { "epoch": 6.86475662353666, "grad_norm": 0.068028524518013, "learning_rate": 0.01, "loss": 1.9548, "step": 66849 }, { "epoch": 6.865064695009242, "grad_norm": 0.02985006757080555, "learning_rate": 0.01, "loss": 1.9847, "step": 66852 }, { "epoch": 6.865372766481824, "grad_norm": 0.03493596985936165, "learning_rate": 0.01, "loss": 1.9608, "step": 66855 }, { "epoch": 6.865680837954406, "grad_norm": 0.04760652035474777, "learning_rate": 0.01, "loss": 1.9665, "step": 66858 }, { "epoch": 6.865988909426987, "grad_norm": 0.08393906056880951, "learning_rate": 0.01, "loss": 1.9609, "step": 66861 }, { "epoch": 6.866296980899569, "grad_norm": 0.05745869129896164, "learning_rate": 0.01, "loss": 1.9851, "step": 66864 }, { "epoch": 6.86660505237215, "grad_norm": 0.04294672608375549, "learning_rate": 0.01, "loss": 1.9912, "step": 66867 }, { "epoch": 6.866913123844732, "grad_norm": 0.052304986864328384, "learning_rate": 0.01, "loss": 1.9808, "step": 66870 }, { "epoch": 6.867221195317313, "grad_norm": 0.04665624350309372, "learning_rate": 0.01, "loss": 1.9593, "step": 66873 }, { "epoch": 6.867529266789895, "grad_norm": 0.037743669003248215, "learning_rate": 0.01, "loss": 1.9874, "step": 66876 }, { "epoch": 6.867837338262477, "grad_norm": 0.19228719174861908, "learning_rate": 0.01, "loss": 1.9825, "step": 66879 }, { "epoch": 6.8681454097350585, "grad_norm": 0.044259946793317795, "learning_rate": 0.01, "loss": 2.0154, "step": 66882 }, { "epoch": 6.8684534812076405, "grad_norm": 0.03942949324846268, "learning_rate": 0.01, "loss": 1.981, "step": 66885 }, { "epoch": 6.868761552680222, "grad_norm": 0.03812559321522713, "learning_rate": 0.01, "loss": 1.9677, "step": 66888 }, { "epoch": 6.869069624152804, "grad_norm": 0.03519313409924507, "learning_rate": 0.01, "loss": 1.9776, "step": 66891 }, { "epoch": 6.869377695625385, "grad_norm": 0.04644708335399628, "learning_rate": 0.01, "loss": 1.9763, "step": 66894 }, { "epoch": 6.869685767097967, "grad_norm": 0.059009701013565063, "learning_rate": 0.01, "loss": 1.9847, "step": 66897 }, { "epoch": 6.869993838570548, "grad_norm": 0.08703511953353882, "learning_rate": 0.01, "loss": 1.9553, "step": 66900 }, { "epoch": 6.87030191004313, "grad_norm": 0.06761835515499115, "learning_rate": 0.01, "loss": 1.9753, "step": 66903 }, { "epoch": 6.870609981515711, "grad_norm": 0.12400759011507034, "learning_rate": 0.01, "loss": 1.9843, "step": 66906 }, { "epoch": 6.870918052988293, "grad_norm": 0.13963590562343597, "learning_rate": 0.01, "loss": 1.9987, "step": 66909 }, { "epoch": 6.8712261244608746, "grad_norm": 0.1004619374871254, "learning_rate": 0.01, "loss": 1.9759, "step": 66912 }, { "epoch": 6.871534195933457, "grad_norm": 0.05865470692515373, "learning_rate": 0.01, "loss": 2.0019, "step": 66915 }, { "epoch": 6.871842267406038, "grad_norm": 0.06006789952516556, "learning_rate": 0.01, "loss": 1.9681, "step": 66918 }, { "epoch": 6.87215033887862, "grad_norm": 0.03859030827879906, "learning_rate": 0.01, "loss": 1.9733, "step": 66921 }, { "epoch": 6.872458410351202, "grad_norm": 0.03140386939048767, "learning_rate": 0.01, "loss": 1.9827, "step": 66924 }, { "epoch": 6.872766481823783, "grad_norm": 0.030816826969385147, "learning_rate": 0.01, "loss": 1.9746, "step": 66927 }, { "epoch": 6.873074553296365, "grad_norm": 0.031165439635515213, "learning_rate": 0.01, "loss": 1.9777, "step": 66930 }, { "epoch": 6.873382624768946, "grad_norm": 0.1848057508468628, "learning_rate": 0.01, "loss": 1.9849, "step": 66933 }, { "epoch": 6.873690696241528, "grad_norm": 0.11120212823152542, "learning_rate": 0.01, "loss": 1.9846, "step": 66936 }, { "epoch": 6.8739987677141094, "grad_norm": 0.051300667226314545, "learning_rate": 0.01, "loss": 1.9601, "step": 66939 }, { "epoch": 6.8743068391866915, "grad_norm": 0.03566049784421921, "learning_rate": 0.01, "loss": 1.9589, "step": 66942 }, { "epoch": 6.874614910659273, "grad_norm": 0.0652051493525505, "learning_rate": 0.01, "loss": 1.9625, "step": 66945 }, { "epoch": 6.874922982131855, "grad_norm": 0.06834497302770615, "learning_rate": 0.01, "loss": 1.967, "step": 66948 }, { "epoch": 6.875231053604436, "grad_norm": 0.058571867644786835, "learning_rate": 0.01, "loss": 1.9778, "step": 66951 }, { "epoch": 6.875539125077018, "grad_norm": 0.05274929851293564, "learning_rate": 0.01, "loss": 1.999, "step": 66954 }, { "epoch": 6.8758471965496, "grad_norm": 0.044645778834819794, "learning_rate": 0.01, "loss": 1.9787, "step": 66957 }, { "epoch": 6.876155268022181, "grad_norm": 0.05121855065226555, "learning_rate": 0.01, "loss": 1.9773, "step": 66960 }, { "epoch": 6.876463339494762, "grad_norm": 0.0632731169462204, "learning_rate": 0.01, "loss": 1.991, "step": 66963 }, { "epoch": 6.876771410967344, "grad_norm": 0.0810575783252716, "learning_rate": 0.01, "loss": 1.9805, "step": 66966 }, { "epoch": 6.877079482439926, "grad_norm": 0.06958875060081482, "learning_rate": 0.01, "loss": 1.9784, "step": 66969 }, { "epoch": 6.877387553912508, "grad_norm": 0.06824204325675964, "learning_rate": 0.01, "loss": 2.0009, "step": 66972 }, { "epoch": 6.87769562538509, "grad_norm": 0.05225484445691109, "learning_rate": 0.01, "loss": 1.9467, "step": 66975 }, { "epoch": 6.878003696857671, "grad_norm": 0.05065394937992096, "learning_rate": 0.01, "loss": 1.9945, "step": 66978 }, { "epoch": 6.878311768330253, "grad_norm": 0.13061989843845367, "learning_rate": 0.01, "loss": 1.9895, "step": 66981 }, { "epoch": 6.878619839802834, "grad_norm": 0.12182870507240295, "learning_rate": 0.01, "loss": 1.9567, "step": 66984 }, { "epoch": 6.878927911275416, "grad_norm": 0.048450104892253876, "learning_rate": 0.01, "loss": 1.978, "step": 66987 }, { "epoch": 6.879235982747997, "grad_norm": 0.055096838623285294, "learning_rate": 0.01, "loss": 1.9648, "step": 66990 }, { "epoch": 6.879544054220579, "grad_norm": 0.05567490682005882, "learning_rate": 0.01, "loss": 1.9797, "step": 66993 }, { "epoch": 6.87985212569316, "grad_norm": 0.05405956879258156, "learning_rate": 0.01, "loss": 1.9811, "step": 66996 }, { "epoch": 6.8801601971657425, "grad_norm": 0.08444008976221085, "learning_rate": 0.01, "loss": 1.9848, "step": 66999 }, { "epoch": 6.8804682686383245, "grad_norm": 0.08067743480205536, "learning_rate": 0.01, "loss": 1.9636, "step": 67002 }, { "epoch": 6.880776340110906, "grad_norm": 0.042782243341207504, "learning_rate": 0.01, "loss": 1.9727, "step": 67005 }, { "epoch": 6.881084411583488, "grad_norm": 0.036419086158275604, "learning_rate": 0.01, "loss": 1.9638, "step": 67008 }, { "epoch": 6.881392483056069, "grad_norm": 0.05873832479119301, "learning_rate": 0.01, "loss": 1.9845, "step": 67011 }, { "epoch": 6.881700554528651, "grad_norm": 0.06187519058585167, "learning_rate": 0.01, "loss": 1.9953, "step": 67014 }, { "epoch": 6.882008626001232, "grad_norm": 0.049339089542627335, "learning_rate": 0.01, "loss": 1.966, "step": 67017 }, { "epoch": 6.882316697473814, "grad_norm": 0.037161026149988174, "learning_rate": 0.01, "loss": 1.9465, "step": 67020 }, { "epoch": 6.882624768946395, "grad_norm": 0.03601270541548729, "learning_rate": 0.01, "loss": 1.98, "step": 67023 }, { "epoch": 6.882932840418977, "grad_norm": 0.07066098600625992, "learning_rate": 0.01, "loss": 1.9797, "step": 67026 }, { "epoch": 6.8832409118915585, "grad_norm": 0.1363425999879837, "learning_rate": 0.01, "loss": 1.9648, "step": 67029 }, { "epoch": 6.883548983364141, "grad_norm": 0.060069505125284195, "learning_rate": 0.01, "loss": 1.9898, "step": 67032 }, { "epoch": 6.883857054836723, "grad_norm": 0.08887060731649399, "learning_rate": 0.01, "loss": 1.9947, "step": 67035 }, { "epoch": 6.884165126309304, "grad_norm": 0.03954588621854782, "learning_rate": 0.01, "loss": 1.9815, "step": 67038 }, { "epoch": 6.884473197781885, "grad_norm": 0.04933086037635803, "learning_rate": 0.01, "loss": 1.9766, "step": 67041 }, { "epoch": 6.884781269254467, "grad_norm": 0.05626452714204788, "learning_rate": 0.01, "loss": 1.9614, "step": 67044 }, { "epoch": 6.885089340727049, "grad_norm": 0.04893886297941208, "learning_rate": 0.01, "loss": 1.9646, "step": 67047 }, { "epoch": 6.88539741219963, "grad_norm": 0.10105928778648376, "learning_rate": 0.01, "loss": 1.971, "step": 67050 }, { "epoch": 6.885705483672212, "grad_norm": 0.08832778036594391, "learning_rate": 0.01, "loss": 1.979, "step": 67053 }, { "epoch": 6.886013555144793, "grad_norm": 0.059674110263586044, "learning_rate": 0.01, "loss": 1.9806, "step": 67056 }, { "epoch": 6.8863216266173755, "grad_norm": 0.06233122944831848, "learning_rate": 0.01, "loss": 1.9851, "step": 67059 }, { "epoch": 6.886629698089957, "grad_norm": 0.04024443402886391, "learning_rate": 0.01, "loss": 1.9768, "step": 67062 }, { "epoch": 6.886937769562539, "grad_norm": 0.03950473666191101, "learning_rate": 0.01, "loss": 1.9756, "step": 67065 }, { "epoch": 6.88724584103512, "grad_norm": 0.11595027148723602, "learning_rate": 0.01, "loss": 1.9688, "step": 67068 }, { "epoch": 6.887553912507702, "grad_norm": 0.10208969563245773, "learning_rate": 0.01, "loss": 1.9763, "step": 67071 }, { "epoch": 6.887861983980283, "grad_norm": 0.1006581112742424, "learning_rate": 0.01, "loss": 1.9783, "step": 67074 }, { "epoch": 6.888170055452865, "grad_norm": 0.06794067472219467, "learning_rate": 0.01, "loss": 1.9676, "step": 67077 }, { "epoch": 6.888478126925447, "grad_norm": 0.06897623836994171, "learning_rate": 0.01, "loss": 1.9688, "step": 67080 }, { "epoch": 6.888786198398028, "grad_norm": 0.07438699901103973, "learning_rate": 0.01, "loss": 1.9884, "step": 67083 }, { "epoch": 6.88909426987061, "grad_norm": 0.057675886899232864, "learning_rate": 0.01, "loss": 1.978, "step": 67086 }, { "epoch": 6.8894023413431915, "grad_norm": 0.0770813375711441, "learning_rate": 0.01, "loss": 1.9841, "step": 67089 }, { "epoch": 6.889710412815774, "grad_norm": 0.07523828744888306, "learning_rate": 0.01, "loss": 1.9515, "step": 67092 }, { "epoch": 6.890018484288355, "grad_norm": 0.11752041429281235, "learning_rate": 0.01, "loss": 1.9666, "step": 67095 }, { "epoch": 6.890326555760937, "grad_norm": 0.07361220568418503, "learning_rate": 0.01, "loss": 1.9475, "step": 67098 }, { "epoch": 6.890634627233518, "grad_norm": 0.1421179324388504, "learning_rate": 0.01, "loss": 1.96, "step": 67101 }, { "epoch": 6.8909426987061, "grad_norm": 0.10113172978162766, "learning_rate": 0.01, "loss": 1.9745, "step": 67104 }, { "epoch": 6.891250770178681, "grad_norm": 0.07775873690843582, "learning_rate": 0.01, "loss": 2.0027, "step": 67107 }, { "epoch": 6.891558841651263, "grad_norm": 0.06705240905284882, "learning_rate": 0.01, "loss": 1.9841, "step": 67110 }, { "epoch": 6.891866913123844, "grad_norm": 0.09598524123430252, "learning_rate": 0.01, "loss": 1.9638, "step": 67113 }, { "epoch": 6.892174984596426, "grad_norm": 0.04524253308773041, "learning_rate": 0.01, "loss": 1.9562, "step": 67116 }, { "epoch": 6.892483056069008, "grad_norm": 0.041274599730968475, "learning_rate": 0.01, "loss": 1.9733, "step": 67119 }, { "epoch": 6.89279112754159, "grad_norm": 0.09016481041908264, "learning_rate": 0.01, "loss": 1.9749, "step": 67122 }, { "epoch": 6.893099199014172, "grad_norm": 0.06743042171001434, "learning_rate": 0.01, "loss": 1.9766, "step": 67125 }, { "epoch": 6.893407270486753, "grad_norm": 0.08920291066169739, "learning_rate": 0.01, "loss": 2.0093, "step": 67128 }, { "epoch": 6.893715341959335, "grad_norm": 0.08669997751712799, "learning_rate": 0.01, "loss": 1.9628, "step": 67131 }, { "epoch": 6.894023413431916, "grad_norm": 0.03727005049586296, "learning_rate": 0.01, "loss": 1.9641, "step": 67134 }, { "epoch": 6.894331484904498, "grad_norm": 0.047707851976156235, "learning_rate": 0.01, "loss": 1.9626, "step": 67137 }, { "epoch": 6.894639556377079, "grad_norm": 0.03828176483511925, "learning_rate": 0.01, "loss": 1.9597, "step": 67140 }, { "epoch": 6.894947627849661, "grad_norm": 0.04043647646903992, "learning_rate": 0.01, "loss": 1.9949, "step": 67143 }, { "epoch": 6.8952556993222425, "grad_norm": 0.03885728865861893, "learning_rate": 0.01, "loss": 1.9679, "step": 67146 }, { "epoch": 6.8955637707948245, "grad_norm": 0.041276995092630386, "learning_rate": 0.01, "loss": 1.9662, "step": 67149 }, { "epoch": 6.895871842267406, "grad_norm": 0.04943333566188812, "learning_rate": 0.01, "loss": 1.9549, "step": 67152 }, { "epoch": 6.896179913739988, "grad_norm": 0.04476524516940117, "learning_rate": 0.01, "loss": 1.9485, "step": 67155 }, { "epoch": 6.89648798521257, "grad_norm": 0.10550431907176971, "learning_rate": 0.01, "loss": 1.9662, "step": 67158 }, { "epoch": 6.896796056685151, "grad_norm": 0.09191994369029999, "learning_rate": 0.01, "loss": 1.9677, "step": 67161 }, { "epoch": 6.897104128157732, "grad_norm": 0.05506186559796333, "learning_rate": 0.01, "loss": 1.9712, "step": 67164 }, { "epoch": 6.897412199630314, "grad_norm": 0.06780190020799637, "learning_rate": 0.01, "loss": 1.9831, "step": 67167 }, { "epoch": 6.897720271102896, "grad_norm": 0.04398869350552559, "learning_rate": 0.01, "loss": 1.9709, "step": 67170 }, { "epoch": 6.898028342575477, "grad_norm": 0.05016426742076874, "learning_rate": 0.01, "loss": 1.9821, "step": 67173 }, { "epoch": 6.898336414048059, "grad_norm": 0.03760296478867531, "learning_rate": 0.01, "loss": 1.9833, "step": 67176 }, { "epoch": 6.898644485520641, "grad_norm": 0.06771603971719742, "learning_rate": 0.01, "loss": 1.9924, "step": 67179 }, { "epoch": 6.898952556993223, "grad_norm": 0.07435780018568039, "learning_rate": 0.01, "loss": 1.9733, "step": 67182 }, { "epoch": 6.899260628465804, "grad_norm": 0.05153966695070267, "learning_rate": 0.01, "loss": 1.9968, "step": 67185 }, { "epoch": 6.899568699938386, "grad_norm": 0.06906291097402573, "learning_rate": 0.01, "loss": 1.9666, "step": 67188 }, { "epoch": 6.899876771410967, "grad_norm": 0.08245383203029633, "learning_rate": 0.01, "loss": 1.9731, "step": 67191 }, { "epoch": 6.900184842883549, "grad_norm": 0.06429754942655563, "learning_rate": 0.01, "loss": 1.9942, "step": 67194 }, { "epoch": 6.90049291435613, "grad_norm": 0.03510384261608124, "learning_rate": 0.01, "loss": 1.9801, "step": 67197 }, { "epoch": 6.900800985828712, "grad_norm": 0.063795305788517, "learning_rate": 0.01, "loss": 1.976, "step": 67200 }, { "epoch": 6.901109057301294, "grad_norm": 0.07294676452875137, "learning_rate": 0.01, "loss": 1.9611, "step": 67203 }, { "epoch": 6.9014171287738755, "grad_norm": 0.04569892957806587, "learning_rate": 0.01, "loss": 1.9775, "step": 67206 }, { "epoch": 6.9017252002464575, "grad_norm": 0.0556999109685421, "learning_rate": 0.01, "loss": 1.9949, "step": 67209 }, { "epoch": 6.902033271719039, "grad_norm": 0.038515105843544006, "learning_rate": 0.01, "loss": 2.0006, "step": 67212 }, { "epoch": 6.902341343191621, "grad_norm": 0.08272943645715714, "learning_rate": 0.01, "loss": 1.9549, "step": 67215 }, { "epoch": 6.902649414664202, "grad_norm": 0.08796203136444092, "learning_rate": 0.01, "loss": 1.9787, "step": 67218 }, { "epoch": 6.902957486136784, "grad_norm": 0.0925317257642746, "learning_rate": 0.01, "loss": 1.9775, "step": 67221 }, { "epoch": 6.903265557609365, "grad_norm": 0.04561033099889755, "learning_rate": 0.01, "loss": 1.9481, "step": 67224 }, { "epoch": 6.903573629081947, "grad_norm": 0.10068716108798981, "learning_rate": 0.01, "loss": 1.9978, "step": 67227 }, { "epoch": 6.903881700554528, "grad_norm": 0.06739984452724457, "learning_rate": 0.01, "loss": 1.9716, "step": 67230 }, { "epoch": 6.90418977202711, "grad_norm": 0.05066872388124466, "learning_rate": 0.01, "loss": 1.9969, "step": 67233 }, { "epoch": 6.904497843499692, "grad_norm": 0.047137267887592316, "learning_rate": 0.01, "loss": 1.9838, "step": 67236 }, { "epoch": 6.904805914972274, "grad_norm": 0.10280490666627884, "learning_rate": 0.01, "loss": 1.9662, "step": 67239 }, { "epoch": 6.905113986444855, "grad_norm": 0.08560338616371155, "learning_rate": 0.01, "loss": 1.9644, "step": 67242 }, { "epoch": 6.905422057917437, "grad_norm": 0.13022232055664062, "learning_rate": 0.01, "loss": 1.9786, "step": 67245 }, { "epoch": 6.905730129390019, "grad_norm": 0.04727041721343994, "learning_rate": 0.01, "loss": 1.9905, "step": 67248 }, { "epoch": 6.9060382008626, "grad_norm": 0.04549260064959526, "learning_rate": 0.01, "loss": 1.9749, "step": 67251 }, { "epoch": 6.906346272335182, "grad_norm": 0.14286646246910095, "learning_rate": 0.01, "loss": 1.9913, "step": 67254 }, { "epoch": 6.906654343807763, "grad_norm": 0.04126777872443199, "learning_rate": 0.01, "loss": 1.9516, "step": 67257 }, { "epoch": 6.906962415280345, "grad_norm": 0.04667791351675987, "learning_rate": 0.01, "loss": 1.9566, "step": 67260 }, { "epoch": 6.907270486752926, "grad_norm": 0.11388275027275085, "learning_rate": 0.01, "loss": 1.9958, "step": 67263 }, { "epoch": 6.9075785582255085, "grad_norm": 0.06364165991544724, "learning_rate": 0.01, "loss": 1.9731, "step": 67266 }, { "epoch": 6.90788662969809, "grad_norm": 0.09224580973386765, "learning_rate": 0.01, "loss": 1.9657, "step": 67269 }, { "epoch": 6.908194701170672, "grad_norm": 0.07214430719614029, "learning_rate": 0.01, "loss": 1.9633, "step": 67272 }, { "epoch": 6.908502772643253, "grad_norm": 0.04083709791302681, "learning_rate": 0.01, "loss": 1.9735, "step": 67275 }, { "epoch": 6.908810844115835, "grad_norm": 0.034071508795022964, "learning_rate": 0.01, "loss": 1.9854, "step": 67278 }, { "epoch": 6.909118915588417, "grad_norm": 0.06297708302736282, "learning_rate": 0.01, "loss": 1.9562, "step": 67281 }, { "epoch": 6.909426987060998, "grad_norm": 0.0424063615500927, "learning_rate": 0.01, "loss": 1.9717, "step": 67284 }, { "epoch": 6.90973505853358, "grad_norm": 0.04424364119768143, "learning_rate": 0.01, "loss": 1.9686, "step": 67287 }, { "epoch": 6.910043130006161, "grad_norm": 0.03503073751926422, "learning_rate": 0.01, "loss": 1.9708, "step": 67290 }, { "epoch": 6.910351201478743, "grad_norm": 0.04922670125961304, "learning_rate": 0.01, "loss": 1.9692, "step": 67293 }, { "epoch": 6.9106592729513245, "grad_norm": 0.0473727248609066, "learning_rate": 0.01, "loss": 1.9652, "step": 67296 }, { "epoch": 6.910967344423907, "grad_norm": 0.09901537001132965, "learning_rate": 0.01, "loss": 1.9813, "step": 67299 }, { "epoch": 6.911275415896488, "grad_norm": 0.08178123086690903, "learning_rate": 0.01, "loss": 1.9955, "step": 67302 }, { "epoch": 6.91158348736907, "grad_norm": 0.06637603044509888, "learning_rate": 0.01, "loss": 1.978, "step": 67305 }, { "epoch": 6.911891558841651, "grad_norm": 0.043472740799188614, "learning_rate": 0.01, "loss": 1.9641, "step": 67308 }, { "epoch": 6.912199630314233, "grad_norm": 0.03270704671740532, "learning_rate": 0.01, "loss": 1.9642, "step": 67311 }, { "epoch": 6.912507701786814, "grad_norm": 0.08558196574449539, "learning_rate": 0.01, "loss": 1.9978, "step": 67314 }, { "epoch": 6.912815773259396, "grad_norm": 0.09473107755184174, "learning_rate": 0.01, "loss": 1.9886, "step": 67317 }, { "epoch": 6.913123844731977, "grad_norm": 0.041321370750665665, "learning_rate": 0.01, "loss": 1.9751, "step": 67320 }, { "epoch": 6.913431916204559, "grad_norm": 0.10107487440109253, "learning_rate": 0.01, "loss": 1.977, "step": 67323 }, { "epoch": 6.9137399876771415, "grad_norm": 0.13234496116638184, "learning_rate": 0.01, "loss": 1.9821, "step": 67326 }, { "epoch": 6.914048059149723, "grad_norm": 0.05052580311894417, "learning_rate": 0.01, "loss": 1.9625, "step": 67329 }, { "epoch": 6.914356130622305, "grad_norm": 0.03741452470421791, "learning_rate": 0.01, "loss": 1.9709, "step": 67332 }, { "epoch": 6.914664202094886, "grad_norm": 0.03609738498926163, "learning_rate": 0.01, "loss": 1.9614, "step": 67335 }, { "epoch": 6.914972273567468, "grad_norm": 0.045461494475603104, "learning_rate": 0.01, "loss": 1.9906, "step": 67338 }, { "epoch": 6.915280345040049, "grad_norm": 0.07647386193275452, "learning_rate": 0.01, "loss": 1.9496, "step": 67341 }, { "epoch": 6.915588416512631, "grad_norm": 0.0546477772295475, "learning_rate": 0.01, "loss": 1.9828, "step": 67344 }, { "epoch": 6.915896487985212, "grad_norm": 0.05500864237546921, "learning_rate": 0.01, "loss": 1.9853, "step": 67347 }, { "epoch": 6.916204559457794, "grad_norm": 0.06956373900175095, "learning_rate": 0.01, "loss": 1.9697, "step": 67350 }, { "epoch": 6.9165126309303755, "grad_norm": 0.13182616233825684, "learning_rate": 0.01, "loss": 1.9938, "step": 67353 }, { "epoch": 6.9168207024029575, "grad_norm": 0.06322868913412094, "learning_rate": 0.01, "loss": 1.9599, "step": 67356 }, { "epoch": 6.91712877387554, "grad_norm": 0.053960807621479034, "learning_rate": 0.01, "loss": 1.9484, "step": 67359 }, { "epoch": 6.917436845348121, "grad_norm": 0.03774489462375641, "learning_rate": 0.01, "loss": 1.9874, "step": 67362 }, { "epoch": 6.917744916820702, "grad_norm": 0.04947468638420105, "learning_rate": 0.01, "loss": 1.9725, "step": 67365 }, { "epoch": 6.918052988293284, "grad_norm": 0.09014926850795746, "learning_rate": 0.01, "loss": 1.9956, "step": 67368 }, { "epoch": 6.918361059765866, "grad_norm": 0.047003794461488724, "learning_rate": 0.01, "loss": 2.0002, "step": 67371 }, { "epoch": 6.918669131238447, "grad_norm": 0.04293885454535484, "learning_rate": 0.01, "loss": 2.0253, "step": 67374 }, { "epoch": 6.918977202711029, "grad_norm": 0.08349283039569855, "learning_rate": 0.01, "loss": 1.9856, "step": 67377 }, { "epoch": 6.91928527418361, "grad_norm": 0.10643094033002853, "learning_rate": 0.01, "loss": 2.0004, "step": 67380 }, { "epoch": 6.919593345656192, "grad_norm": 0.05152636021375656, "learning_rate": 0.01, "loss": 1.9818, "step": 67383 }, { "epoch": 6.919901417128774, "grad_norm": 0.045483458787202835, "learning_rate": 0.01, "loss": 1.9808, "step": 67386 }, { "epoch": 6.920209488601356, "grad_norm": 0.10970417410135269, "learning_rate": 0.01, "loss": 1.9907, "step": 67389 }, { "epoch": 6.920517560073937, "grad_norm": 0.0620252899825573, "learning_rate": 0.01, "loss": 1.9623, "step": 67392 }, { "epoch": 6.920825631546519, "grad_norm": 0.042169239372015, "learning_rate": 0.01, "loss": 1.9682, "step": 67395 }, { "epoch": 6.9211337030191, "grad_norm": 0.07429879903793335, "learning_rate": 0.01, "loss": 1.9528, "step": 67398 }, { "epoch": 6.921441774491682, "grad_norm": 0.16096588969230652, "learning_rate": 0.01, "loss": 1.9932, "step": 67401 }, { "epoch": 6.921749845964264, "grad_norm": 0.06262468546628952, "learning_rate": 0.01, "loss": 1.9846, "step": 67404 }, { "epoch": 6.922057917436845, "grad_norm": 0.08196594566106796, "learning_rate": 0.01, "loss": 1.9497, "step": 67407 }, { "epoch": 6.922365988909427, "grad_norm": 0.07014669477939606, "learning_rate": 0.01, "loss": 1.9577, "step": 67410 }, { "epoch": 6.9226740603820085, "grad_norm": 0.048560310155153275, "learning_rate": 0.01, "loss": 1.9628, "step": 67413 }, { "epoch": 6.9229821318545905, "grad_norm": 0.053055111318826675, "learning_rate": 0.01, "loss": 1.9825, "step": 67416 }, { "epoch": 6.923290203327172, "grad_norm": 0.0406053327023983, "learning_rate": 0.01, "loss": 1.9764, "step": 67419 }, { "epoch": 6.923598274799754, "grad_norm": 0.05388790741562843, "learning_rate": 0.01, "loss": 1.9868, "step": 67422 }, { "epoch": 6.923906346272335, "grad_norm": 0.03949485719203949, "learning_rate": 0.01, "loss": 1.9719, "step": 67425 }, { "epoch": 6.924214417744917, "grad_norm": 0.04466044530272484, "learning_rate": 0.01, "loss": 1.9614, "step": 67428 }, { "epoch": 6.924522489217498, "grad_norm": 0.08420266211032867, "learning_rate": 0.01, "loss": 1.9817, "step": 67431 }, { "epoch": 6.92483056069008, "grad_norm": 0.06232386827468872, "learning_rate": 0.01, "loss": 1.9781, "step": 67434 }, { "epoch": 6.925138632162662, "grad_norm": 0.10322209447622299, "learning_rate": 0.01, "loss": 1.9775, "step": 67437 }, { "epoch": 6.925446703635243, "grad_norm": 0.04155166074633598, "learning_rate": 0.01, "loss": 1.9779, "step": 67440 }, { "epoch": 6.925754775107825, "grad_norm": 0.08774439245462418, "learning_rate": 0.01, "loss": 1.9807, "step": 67443 }, { "epoch": 6.926062846580407, "grad_norm": 0.08442433178424835, "learning_rate": 0.01, "loss": 1.9903, "step": 67446 }, { "epoch": 6.926370918052989, "grad_norm": 0.0743742361664772, "learning_rate": 0.01, "loss": 1.9641, "step": 67449 }, { "epoch": 6.92667898952557, "grad_norm": 0.09326713532209396, "learning_rate": 0.01, "loss": 1.9493, "step": 67452 }, { "epoch": 6.926987060998152, "grad_norm": 0.05275671184062958, "learning_rate": 0.01, "loss": 1.9797, "step": 67455 }, { "epoch": 6.927295132470733, "grad_norm": 0.0862322598695755, "learning_rate": 0.01, "loss": 1.9653, "step": 67458 }, { "epoch": 6.927603203943315, "grad_norm": 0.06256649643182755, "learning_rate": 0.01, "loss": 1.9526, "step": 67461 }, { "epoch": 6.927911275415896, "grad_norm": 0.08660640567541122, "learning_rate": 0.01, "loss": 1.9502, "step": 67464 }, { "epoch": 6.928219346888478, "grad_norm": 0.05134911462664604, "learning_rate": 0.01, "loss": 1.9894, "step": 67467 }, { "epoch": 6.9285274183610595, "grad_norm": 0.09808807820081711, "learning_rate": 0.01, "loss": 1.9675, "step": 67470 }, { "epoch": 6.9288354898336415, "grad_norm": 0.04677712544798851, "learning_rate": 0.01, "loss": 1.98, "step": 67473 }, { "epoch": 6.929143561306223, "grad_norm": 0.040743302553892136, "learning_rate": 0.01, "loss": 1.9689, "step": 67476 }, { "epoch": 6.929451632778805, "grad_norm": 0.04844764620065689, "learning_rate": 0.01, "loss": 2.0035, "step": 67479 }, { "epoch": 6.929759704251387, "grad_norm": 0.042815107852220535, "learning_rate": 0.01, "loss": 1.9861, "step": 67482 }, { "epoch": 6.930067775723968, "grad_norm": 0.040781863033771515, "learning_rate": 0.01, "loss": 1.9828, "step": 67485 }, { "epoch": 6.93037584719655, "grad_norm": 0.07171075791120529, "learning_rate": 0.01, "loss": 1.9699, "step": 67488 }, { "epoch": 6.930683918669131, "grad_norm": 0.03687571734189987, "learning_rate": 0.01, "loss": 1.9648, "step": 67491 }, { "epoch": 6.930991990141713, "grad_norm": 0.04326467588543892, "learning_rate": 0.01, "loss": 1.9819, "step": 67494 }, { "epoch": 6.931300061614294, "grad_norm": 0.24775917828083038, "learning_rate": 0.01, "loss": 1.9658, "step": 67497 }, { "epoch": 6.931608133086876, "grad_norm": 0.09830349683761597, "learning_rate": 0.01, "loss": 1.9743, "step": 67500 }, { "epoch": 6.931916204559458, "grad_norm": 0.05593564361333847, "learning_rate": 0.01, "loss": 1.9755, "step": 67503 }, { "epoch": 6.93222427603204, "grad_norm": 0.05278097838163376, "learning_rate": 0.01, "loss": 1.9906, "step": 67506 }, { "epoch": 6.932532347504621, "grad_norm": 0.06709492206573486, "learning_rate": 0.01, "loss": 1.9707, "step": 67509 }, { "epoch": 6.932840418977203, "grad_norm": 0.08978651463985443, "learning_rate": 0.01, "loss": 1.983, "step": 67512 }, { "epoch": 6.933148490449784, "grad_norm": 0.10322022438049316, "learning_rate": 0.01, "loss": 1.9676, "step": 67515 }, { "epoch": 6.933456561922366, "grad_norm": 0.08842363208532333, "learning_rate": 0.01, "loss": 1.9886, "step": 67518 }, { "epoch": 6.933764633394947, "grad_norm": 0.1034180223941803, "learning_rate": 0.01, "loss": 1.9953, "step": 67521 }, { "epoch": 6.934072704867529, "grad_norm": 0.05395280942320824, "learning_rate": 0.01, "loss": 1.9868, "step": 67524 }, { "epoch": 6.934380776340111, "grad_norm": 0.04033922404050827, "learning_rate": 0.01, "loss": 1.9848, "step": 67527 }, { "epoch": 6.9346888478126925, "grad_norm": 0.10645033419132233, "learning_rate": 0.01, "loss": 1.9801, "step": 67530 }, { "epoch": 6.9349969192852745, "grad_norm": 0.03000570461153984, "learning_rate": 0.01, "loss": 1.9612, "step": 67533 }, { "epoch": 6.935304990757856, "grad_norm": 0.06463263183832169, "learning_rate": 0.01, "loss": 1.977, "step": 67536 }, { "epoch": 6.935613062230438, "grad_norm": 0.0651232898235321, "learning_rate": 0.01, "loss": 1.9687, "step": 67539 }, { "epoch": 6.935921133703019, "grad_norm": 0.048303090035915375, "learning_rate": 0.01, "loss": 1.9926, "step": 67542 }, { "epoch": 6.936229205175601, "grad_norm": 0.09571239352226257, "learning_rate": 0.01, "loss": 1.9753, "step": 67545 }, { "epoch": 6.936537276648182, "grad_norm": 0.04778117686510086, "learning_rate": 0.01, "loss": 1.9806, "step": 67548 }, { "epoch": 6.936845348120764, "grad_norm": 0.16268859803676605, "learning_rate": 0.01, "loss": 1.967, "step": 67551 }, { "epoch": 6.937153419593345, "grad_norm": 0.08549009263515472, "learning_rate": 0.01, "loss": 1.9649, "step": 67554 }, { "epoch": 6.937461491065927, "grad_norm": 0.0720728412270546, "learning_rate": 0.01, "loss": 1.9589, "step": 67557 }, { "epoch": 6.937769562538509, "grad_norm": 0.04524467885494232, "learning_rate": 0.01, "loss": 1.9752, "step": 67560 }, { "epoch": 6.938077634011091, "grad_norm": 0.0630386620759964, "learning_rate": 0.01, "loss": 1.9725, "step": 67563 }, { "epoch": 6.938385705483672, "grad_norm": 0.07228325307369232, "learning_rate": 0.01, "loss": 2.0101, "step": 67566 }, { "epoch": 6.938693776956254, "grad_norm": 0.0827326700091362, "learning_rate": 0.01, "loss": 1.9615, "step": 67569 }, { "epoch": 6.939001848428836, "grad_norm": 0.0640362948179245, "learning_rate": 0.01, "loss": 1.9779, "step": 67572 }, { "epoch": 6.939309919901417, "grad_norm": 0.0734630599617958, "learning_rate": 0.01, "loss": 1.9541, "step": 67575 }, { "epoch": 6.939617991373999, "grad_norm": 0.09056416898965836, "learning_rate": 0.01, "loss": 1.9831, "step": 67578 }, { "epoch": 6.93992606284658, "grad_norm": 0.04308030381798744, "learning_rate": 0.01, "loss": 1.9791, "step": 67581 }, { "epoch": 6.940234134319162, "grad_norm": 0.038212817162275314, "learning_rate": 0.01, "loss": 1.9769, "step": 67584 }, { "epoch": 6.940542205791743, "grad_norm": 0.06669415533542633, "learning_rate": 0.01, "loss": 1.9813, "step": 67587 }, { "epoch": 6.9408502772643255, "grad_norm": 0.10573708266019821, "learning_rate": 0.01, "loss": 1.9772, "step": 67590 }, { "epoch": 6.941158348736907, "grad_norm": 0.13396821916103363, "learning_rate": 0.01, "loss": 1.9422, "step": 67593 }, { "epoch": 6.941466420209489, "grad_norm": 0.0890778973698616, "learning_rate": 0.01, "loss": 1.9934, "step": 67596 }, { "epoch": 6.94177449168207, "grad_norm": 0.06729544699192047, "learning_rate": 0.01, "loss": 1.9731, "step": 67599 }, { "epoch": 6.942082563154652, "grad_norm": 0.04166124016046524, "learning_rate": 0.01, "loss": 1.9587, "step": 67602 }, { "epoch": 6.942390634627234, "grad_norm": 0.06778619438409805, "learning_rate": 0.01, "loss": 1.9923, "step": 67605 }, { "epoch": 6.942698706099815, "grad_norm": 0.052519191056489944, "learning_rate": 0.01, "loss": 1.983, "step": 67608 }, { "epoch": 6.943006777572397, "grad_norm": 0.051862701773643494, "learning_rate": 0.01, "loss": 1.9657, "step": 67611 }, { "epoch": 6.943314849044978, "grad_norm": 0.04436833783984184, "learning_rate": 0.01, "loss": 1.975, "step": 67614 }, { "epoch": 6.94362292051756, "grad_norm": 0.08768297731876373, "learning_rate": 0.01, "loss": 1.995, "step": 67617 }, { "epoch": 6.9439309919901415, "grad_norm": 0.10748183727264404, "learning_rate": 0.01, "loss": 1.9836, "step": 67620 }, { "epoch": 6.944239063462724, "grad_norm": 0.06303614377975464, "learning_rate": 0.01, "loss": 2.0053, "step": 67623 }, { "epoch": 6.944547134935305, "grad_norm": 0.0495496429502964, "learning_rate": 0.01, "loss": 2.0075, "step": 67626 }, { "epoch": 6.944855206407887, "grad_norm": 0.07309143245220184, "learning_rate": 0.01, "loss": 1.9995, "step": 67629 }, { "epoch": 6.945163277880468, "grad_norm": 0.09533344954252243, "learning_rate": 0.01, "loss": 1.9682, "step": 67632 }, { "epoch": 6.94547134935305, "grad_norm": 0.07221339643001556, "learning_rate": 0.01, "loss": 1.9699, "step": 67635 }, { "epoch": 6.945779420825632, "grad_norm": 0.0772852674126625, "learning_rate": 0.01, "loss": 1.942, "step": 67638 }, { "epoch": 6.946087492298213, "grad_norm": 0.03566101938486099, "learning_rate": 0.01, "loss": 1.9413, "step": 67641 }, { "epoch": 6.946395563770794, "grad_norm": 0.0641225203871727, "learning_rate": 0.01, "loss": 2.0036, "step": 67644 }, { "epoch": 6.946703635243376, "grad_norm": 0.11845903843641281, "learning_rate": 0.01, "loss": 1.9814, "step": 67647 }, { "epoch": 6.9470117067159585, "grad_norm": 0.1297493278980255, "learning_rate": 0.01, "loss": 1.9722, "step": 67650 }, { "epoch": 6.94731977818854, "grad_norm": 0.0657927542924881, "learning_rate": 0.01, "loss": 1.9527, "step": 67653 }, { "epoch": 6.947627849661122, "grad_norm": 0.0618324875831604, "learning_rate": 0.01, "loss": 1.9936, "step": 67656 }, { "epoch": 6.947935921133703, "grad_norm": 0.04335208609700203, "learning_rate": 0.01, "loss": 1.9779, "step": 67659 }, { "epoch": 6.948243992606285, "grad_norm": 0.03921616077423096, "learning_rate": 0.01, "loss": 1.9908, "step": 67662 }, { "epoch": 6.948552064078866, "grad_norm": 0.05289353057742119, "learning_rate": 0.01, "loss": 1.9806, "step": 67665 }, { "epoch": 6.948860135551448, "grad_norm": 0.037309400737285614, "learning_rate": 0.01, "loss": 1.97, "step": 67668 }, { "epoch": 6.949168207024029, "grad_norm": 0.07012271881103516, "learning_rate": 0.01, "loss": 1.9718, "step": 67671 }, { "epoch": 6.949476278496611, "grad_norm": 0.08038291335105896, "learning_rate": 0.01, "loss": 1.9542, "step": 67674 }, { "epoch": 6.9497843499691925, "grad_norm": 0.08565990626811981, "learning_rate": 0.01, "loss": 1.9578, "step": 67677 }, { "epoch": 6.9500924214417745, "grad_norm": 0.13325315713882446, "learning_rate": 0.01, "loss": 1.9594, "step": 67680 }, { "epoch": 6.950400492914357, "grad_norm": 0.06681843101978302, "learning_rate": 0.01, "loss": 1.9903, "step": 67683 }, { "epoch": 6.950708564386938, "grad_norm": 0.051343731582164764, "learning_rate": 0.01, "loss": 2.0182, "step": 67686 }, { "epoch": 6.95101663585952, "grad_norm": 0.031108930706977844, "learning_rate": 0.01, "loss": 1.9567, "step": 67689 }, { "epoch": 6.951324707332101, "grad_norm": 0.06201615929603577, "learning_rate": 0.01, "loss": 1.9724, "step": 67692 }, { "epoch": 6.951632778804683, "grad_norm": 0.059839654713869095, "learning_rate": 0.01, "loss": 1.9728, "step": 67695 }, { "epoch": 6.951940850277264, "grad_norm": 0.04882393404841423, "learning_rate": 0.01, "loss": 1.9678, "step": 67698 }, { "epoch": 6.952248921749846, "grad_norm": 0.031460147351026535, "learning_rate": 0.01, "loss": 1.9395, "step": 67701 }, { "epoch": 6.952556993222427, "grad_norm": 0.09777851402759552, "learning_rate": 0.01, "loss": 2.0074, "step": 67704 }, { "epoch": 6.952865064695009, "grad_norm": 0.0367160402238369, "learning_rate": 0.01, "loss": 1.9671, "step": 67707 }, { "epoch": 6.953173136167591, "grad_norm": 0.032731834799051285, "learning_rate": 0.01, "loss": 1.9556, "step": 67710 }, { "epoch": 6.953481207640173, "grad_norm": 0.07224103063344955, "learning_rate": 0.01, "loss": 1.978, "step": 67713 }, { "epoch": 6.953789279112754, "grad_norm": 0.062362559139728546, "learning_rate": 0.01, "loss": 1.9805, "step": 67716 }, { "epoch": 6.954097350585336, "grad_norm": 0.05006725341081619, "learning_rate": 0.01, "loss": 1.9899, "step": 67719 }, { "epoch": 6.954405422057917, "grad_norm": 0.03857605531811714, "learning_rate": 0.01, "loss": 1.9778, "step": 67722 }, { "epoch": 6.954713493530499, "grad_norm": 0.10484008491039276, "learning_rate": 0.01, "loss": 1.9787, "step": 67725 }, { "epoch": 6.955021565003081, "grad_norm": 0.09993112832307816, "learning_rate": 0.01, "loss": 1.9794, "step": 67728 }, { "epoch": 6.955329636475662, "grad_norm": 0.08906539529561996, "learning_rate": 0.01, "loss": 1.966, "step": 67731 }, { "epoch": 6.955637707948244, "grad_norm": 0.04950569570064545, "learning_rate": 0.01, "loss": 1.9736, "step": 67734 }, { "epoch": 6.9559457794208255, "grad_norm": 0.04009339585900307, "learning_rate": 0.01, "loss": 1.9712, "step": 67737 }, { "epoch": 6.9562538508934075, "grad_norm": 0.03402652591466904, "learning_rate": 0.01, "loss": 1.9692, "step": 67740 }, { "epoch": 6.956561922365989, "grad_norm": 0.036231786012649536, "learning_rate": 0.01, "loss": 1.975, "step": 67743 }, { "epoch": 6.956869993838571, "grad_norm": 0.06914620101451874, "learning_rate": 0.01, "loss": 1.9675, "step": 67746 }, { "epoch": 6.957178065311152, "grad_norm": 0.12120147049427032, "learning_rate": 0.01, "loss": 1.9943, "step": 67749 }, { "epoch": 6.957486136783734, "grad_norm": 0.04155363887548447, "learning_rate": 0.01, "loss": 1.968, "step": 67752 }, { "epoch": 6.957794208256315, "grad_norm": 0.03857012465596199, "learning_rate": 0.01, "loss": 1.9986, "step": 67755 }, { "epoch": 6.958102279728897, "grad_norm": 0.05268177390098572, "learning_rate": 0.01, "loss": 1.9617, "step": 67758 }, { "epoch": 6.958410351201479, "grad_norm": 0.14224505424499512, "learning_rate": 0.01, "loss": 1.9735, "step": 67761 }, { "epoch": 6.95871842267406, "grad_norm": 0.10331138968467712, "learning_rate": 0.01, "loss": 1.9663, "step": 67764 }, { "epoch": 6.9590264941466415, "grad_norm": 0.13701403141021729, "learning_rate": 0.01, "loss": 1.9779, "step": 67767 }, { "epoch": 6.959334565619224, "grad_norm": 0.09614556282758713, "learning_rate": 0.01, "loss": 1.9599, "step": 67770 }, { "epoch": 6.959642637091806, "grad_norm": 0.07385848462581635, "learning_rate": 0.01, "loss": 1.9619, "step": 67773 }, { "epoch": 6.959950708564387, "grad_norm": 0.05891263857483864, "learning_rate": 0.01, "loss": 1.9567, "step": 67776 }, { "epoch": 6.960258780036969, "grad_norm": 0.06532695889472961, "learning_rate": 0.01, "loss": 1.9592, "step": 67779 }, { "epoch": 6.96056685150955, "grad_norm": 0.048335302621126175, "learning_rate": 0.01, "loss": 1.9872, "step": 67782 }, { "epoch": 6.960874922982132, "grad_norm": 0.0490678995847702, "learning_rate": 0.01, "loss": 1.9964, "step": 67785 }, { "epoch": 6.961182994454713, "grad_norm": 0.057322729378938675, "learning_rate": 0.01, "loss": 1.9868, "step": 67788 }, { "epoch": 6.961491065927295, "grad_norm": 0.09200582653284073, "learning_rate": 0.01, "loss": 1.9913, "step": 67791 }, { "epoch": 6.961799137399876, "grad_norm": 0.08140811324119568, "learning_rate": 0.01, "loss": 1.9687, "step": 67794 }, { "epoch": 6.9621072088724585, "grad_norm": 0.08386159688234329, "learning_rate": 0.01, "loss": 1.9775, "step": 67797 }, { "epoch": 6.96241528034504, "grad_norm": 0.09306125342845917, "learning_rate": 0.01, "loss": 1.9717, "step": 67800 }, { "epoch": 6.962723351817622, "grad_norm": 0.0702492967247963, "learning_rate": 0.01, "loss": 1.9634, "step": 67803 }, { "epoch": 6.963031423290204, "grad_norm": 0.07399389147758484, "learning_rate": 0.01, "loss": 1.9792, "step": 67806 }, { "epoch": 6.963339494762785, "grad_norm": 0.05269570276141167, "learning_rate": 0.01, "loss": 1.9827, "step": 67809 }, { "epoch": 6.963647566235367, "grad_norm": 0.0840226262807846, "learning_rate": 0.01, "loss": 1.9654, "step": 67812 }, { "epoch": 6.963955637707948, "grad_norm": 0.06911315023899078, "learning_rate": 0.01, "loss": 1.9819, "step": 67815 }, { "epoch": 6.96426370918053, "grad_norm": 0.04625274986028671, "learning_rate": 0.01, "loss": 1.9673, "step": 67818 }, { "epoch": 6.964571780653111, "grad_norm": 0.033663198351860046, "learning_rate": 0.01, "loss": 1.9811, "step": 67821 }, { "epoch": 6.964879852125693, "grad_norm": 0.035202909260988235, "learning_rate": 0.01, "loss": 1.9427, "step": 67824 }, { "epoch": 6.9651879235982745, "grad_norm": 0.04282594099640846, "learning_rate": 0.01, "loss": 1.9854, "step": 67827 }, { "epoch": 6.965495995070857, "grad_norm": 0.07154384255409241, "learning_rate": 0.01, "loss": 2.0019, "step": 67830 }, { "epoch": 6.965804066543438, "grad_norm": 0.09028030931949615, "learning_rate": 0.01, "loss": 1.9761, "step": 67833 }, { "epoch": 6.96611213801602, "grad_norm": 0.046586085110902786, "learning_rate": 0.01, "loss": 1.9904, "step": 67836 }, { "epoch": 6.966420209488601, "grad_norm": 0.03928957134485245, "learning_rate": 0.01, "loss": 1.9578, "step": 67839 }, { "epoch": 6.966728280961183, "grad_norm": 0.04739971458911896, "learning_rate": 0.01, "loss": 1.9665, "step": 67842 }, { "epoch": 6.967036352433764, "grad_norm": 0.06463371962308884, "learning_rate": 0.01, "loss": 1.978, "step": 67845 }, { "epoch": 6.967344423906346, "grad_norm": 0.03283145651221275, "learning_rate": 0.01, "loss": 1.9722, "step": 67848 }, { "epoch": 6.967652495378928, "grad_norm": 0.15586607158184052, "learning_rate": 0.01, "loss": 1.9614, "step": 67851 }, { "epoch": 6.967960566851509, "grad_norm": 0.08082199841737747, "learning_rate": 0.01, "loss": 1.9538, "step": 67854 }, { "epoch": 6.9682686383240915, "grad_norm": 0.06927336007356644, "learning_rate": 0.01, "loss": 1.9578, "step": 67857 }, { "epoch": 6.968576709796673, "grad_norm": 0.03947534039616585, "learning_rate": 0.01, "loss": 1.9882, "step": 67860 }, { "epoch": 6.968884781269255, "grad_norm": 0.059715401381254196, "learning_rate": 0.01, "loss": 1.9768, "step": 67863 }, { "epoch": 6.969192852741836, "grad_norm": 0.056624263525009155, "learning_rate": 0.01, "loss": 1.9577, "step": 67866 }, { "epoch": 6.969500924214418, "grad_norm": 0.0887855812907219, "learning_rate": 0.01, "loss": 1.9986, "step": 67869 }, { "epoch": 6.969808995686999, "grad_norm": 0.08432299643754959, "learning_rate": 0.01, "loss": 1.9656, "step": 67872 }, { "epoch": 6.970117067159581, "grad_norm": 0.0985739529132843, "learning_rate": 0.01, "loss": 1.984, "step": 67875 }, { "epoch": 6.970425138632162, "grad_norm": 0.0615275539457798, "learning_rate": 0.01, "loss": 2.0046, "step": 67878 }, { "epoch": 6.970733210104744, "grad_norm": 0.04872935637831688, "learning_rate": 0.01, "loss": 1.9751, "step": 67881 }, { "epoch": 6.971041281577326, "grad_norm": 0.04057471454143524, "learning_rate": 0.01, "loss": 1.9835, "step": 67884 }, { "epoch": 6.9713493530499075, "grad_norm": 0.09079626202583313, "learning_rate": 0.01, "loss": 1.9851, "step": 67887 }, { "epoch": 6.97165742452249, "grad_norm": 0.12390229851007462, "learning_rate": 0.01, "loss": 2.0097, "step": 67890 }, { "epoch": 6.971965495995071, "grad_norm": 0.08013787120580673, "learning_rate": 0.01, "loss": 1.9916, "step": 67893 }, { "epoch": 6.972273567467653, "grad_norm": 0.10543009638786316, "learning_rate": 0.01, "loss": 1.9595, "step": 67896 }, { "epoch": 6.972581638940234, "grad_norm": 0.06962577998638153, "learning_rate": 0.01, "loss": 1.9368, "step": 67899 }, { "epoch": 6.972889710412816, "grad_norm": 0.05179775133728981, "learning_rate": 0.01, "loss": 1.9784, "step": 67902 }, { "epoch": 6.973197781885397, "grad_norm": 0.04999608173966408, "learning_rate": 0.01, "loss": 1.9589, "step": 67905 }, { "epoch": 6.973505853357979, "grad_norm": 0.037668853998184204, "learning_rate": 0.01, "loss": 1.974, "step": 67908 }, { "epoch": 6.97381392483056, "grad_norm": 0.04385687783360481, "learning_rate": 0.01, "loss": 1.9713, "step": 67911 }, { "epoch": 6.9741219963031424, "grad_norm": 0.03812088444828987, "learning_rate": 0.01, "loss": 1.9791, "step": 67914 }, { "epoch": 6.974430067775724, "grad_norm": 0.08592873066663742, "learning_rate": 0.01, "loss": 1.9476, "step": 67917 }, { "epoch": 6.974738139248306, "grad_norm": 0.07441151887178421, "learning_rate": 0.01, "loss": 1.9734, "step": 67920 }, { "epoch": 6.975046210720887, "grad_norm": 0.055950410664081573, "learning_rate": 0.01, "loss": 1.965, "step": 67923 }, { "epoch": 6.975354282193469, "grad_norm": 0.07224567234516144, "learning_rate": 0.01, "loss": 1.9649, "step": 67926 }, { "epoch": 6.975662353666051, "grad_norm": 0.17487280070781708, "learning_rate": 0.01, "loss": 1.9706, "step": 67929 }, { "epoch": 6.975970425138632, "grad_norm": 0.040765196084976196, "learning_rate": 0.01, "loss": 1.9787, "step": 67932 }, { "epoch": 6.976278496611214, "grad_norm": 0.05561240762472153, "learning_rate": 0.01, "loss": 1.9699, "step": 67935 }, { "epoch": 6.976586568083795, "grad_norm": 0.07443460822105408, "learning_rate": 0.01, "loss": 1.9553, "step": 67938 }, { "epoch": 6.976894639556377, "grad_norm": 0.0669572651386261, "learning_rate": 0.01, "loss": 1.9887, "step": 67941 }, { "epoch": 6.9772027110289585, "grad_norm": 0.05161336436867714, "learning_rate": 0.01, "loss": 1.9854, "step": 67944 }, { "epoch": 6.9775107825015406, "grad_norm": 0.0417034886777401, "learning_rate": 0.01, "loss": 1.9739, "step": 67947 }, { "epoch": 6.977818853974122, "grad_norm": 0.06474210321903229, "learning_rate": 0.01, "loss": 1.9796, "step": 67950 }, { "epoch": 6.978126925446704, "grad_norm": 0.07746025174856186, "learning_rate": 0.01, "loss": 1.9753, "step": 67953 }, { "epoch": 6.978434996919285, "grad_norm": 0.045474573969841, "learning_rate": 0.01, "loss": 1.9518, "step": 67956 }, { "epoch": 6.978743068391867, "grad_norm": 0.03205155208706856, "learning_rate": 0.01, "loss": 1.9783, "step": 67959 }, { "epoch": 6.979051139864449, "grad_norm": 0.03780858963727951, "learning_rate": 0.01, "loss": 1.9759, "step": 67962 }, { "epoch": 6.97935921133703, "grad_norm": 0.04689191281795502, "learning_rate": 0.01, "loss": 1.9845, "step": 67965 }, { "epoch": 6.979667282809611, "grad_norm": 0.05307697504758835, "learning_rate": 0.01, "loss": 1.9462, "step": 67968 }, { "epoch": 6.979975354282193, "grad_norm": 0.19176854193210602, "learning_rate": 0.01, "loss": 1.9786, "step": 67971 }, { "epoch": 6.9802834257547755, "grad_norm": 0.13577581942081451, "learning_rate": 0.01, "loss": 1.9874, "step": 67974 }, { "epoch": 6.980591497227357, "grad_norm": 0.10115823894739151, "learning_rate": 0.01, "loss": 2.0068, "step": 67977 }, { "epoch": 6.980899568699939, "grad_norm": 0.08148372173309326, "learning_rate": 0.01, "loss": 1.9795, "step": 67980 }, { "epoch": 6.98120764017252, "grad_norm": 0.05191081017255783, "learning_rate": 0.01, "loss": 1.9597, "step": 67983 }, { "epoch": 6.981515711645102, "grad_norm": 0.06080775707960129, "learning_rate": 0.01, "loss": 1.978, "step": 67986 }, { "epoch": 6.981823783117683, "grad_norm": 0.07841724157333374, "learning_rate": 0.01, "loss": 2.0001, "step": 67989 }, { "epoch": 6.982131854590265, "grad_norm": 0.04652271047234535, "learning_rate": 0.01, "loss": 1.989, "step": 67992 }, { "epoch": 6.982439926062846, "grad_norm": 0.10853028297424316, "learning_rate": 0.01, "loss": 1.9663, "step": 67995 }, { "epoch": 6.982747997535428, "grad_norm": 0.03634324297308922, "learning_rate": 0.01, "loss": 1.9614, "step": 67998 }, { "epoch": 6.9830560690080095, "grad_norm": 0.13538099825382233, "learning_rate": 0.01, "loss": 1.9805, "step": 68001 }, { "epoch": 6.9833641404805915, "grad_norm": 0.06273335963487625, "learning_rate": 0.01, "loss": 1.9564, "step": 68004 }, { "epoch": 6.983672211953174, "grad_norm": 0.08126869797706604, "learning_rate": 0.01, "loss": 2.0036, "step": 68007 }, { "epoch": 6.983980283425755, "grad_norm": 0.05156498774886131, "learning_rate": 0.01, "loss": 1.9956, "step": 68010 }, { "epoch": 6.984288354898337, "grad_norm": 0.05055593326687813, "learning_rate": 0.01, "loss": 1.9717, "step": 68013 }, { "epoch": 6.984596426370918, "grad_norm": 0.04435109347105026, "learning_rate": 0.01, "loss": 1.9627, "step": 68016 }, { "epoch": 6.9849044978435, "grad_norm": 0.13580620288848877, "learning_rate": 0.01, "loss": 1.9931, "step": 68019 }, { "epoch": 6.985212569316081, "grad_norm": 0.03387708216905594, "learning_rate": 0.01, "loss": 1.9527, "step": 68022 }, { "epoch": 6.985520640788663, "grad_norm": 0.09359924495220184, "learning_rate": 0.01, "loss": 1.9677, "step": 68025 }, { "epoch": 6.985828712261244, "grad_norm": 0.09586784243583679, "learning_rate": 0.01, "loss": 1.958, "step": 68028 }, { "epoch": 6.986136783733826, "grad_norm": 0.08071450144052505, "learning_rate": 0.01, "loss": 1.9878, "step": 68031 }, { "epoch": 6.986444855206408, "grad_norm": 0.04609205946326256, "learning_rate": 0.01, "loss": 1.9778, "step": 68034 }, { "epoch": 6.98675292667899, "grad_norm": 0.05660049244761467, "learning_rate": 0.01, "loss": 1.9692, "step": 68037 }, { "epoch": 6.987060998151571, "grad_norm": 0.04224297031760216, "learning_rate": 0.01, "loss": 1.9708, "step": 68040 }, { "epoch": 6.987369069624153, "grad_norm": 0.056567490100860596, "learning_rate": 0.01, "loss": 1.9471, "step": 68043 }, { "epoch": 6.987677141096734, "grad_norm": 0.10390833020210266, "learning_rate": 0.01, "loss": 1.9526, "step": 68046 }, { "epoch": 6.987985212569316, "grad_norm": 0.04906808212399483, "learning_rate": 0.01, "loss": 1.9883, "step": 68049 }, { "epoch": 6.988293284041898, "grad_norm": 0.0763496458530426, "learning_rate": 0.01, "loss": 1.9625, "step": 68052 }, { "epoch": 6.988601355514479, "grad_norm": 0.042168740183115005, "learning_rate": 0.01, "loss": 1.971, "step": 68055 }, { "epoch": 6.988909426987061, "grad_norm": 0.044075168669223785, "learning_rate": 0.01, "loss": 1.9538, "step": 68058 }, { "epoch": 6.9892174984596425, "grad_norm": 0.06443364918231964, "learning_rate": 0.01, "loss": 1.9974, "step": 68061 }, { "epoch": 6.9895255699322245, "grad_norm": 0.06181558594107628, "learning_rate": 0.01, "loss": 1.9731, "step": 68064 }, { "epoch": 6.989833641404806, "grad_norm": 0.061378926038742065, "learning_rate": 0.01, "loss": 1.9591, "step": 68067 }, { "epoch": 6.990141712877388, "grad_norm": 0.11718502640724182, "learning_rate": 0.01, "loss": 1.9726, "step": 68070 }, { "epoch": 6.990449784349969, "grad_norm": 0.10844454169273376, "learning_rate": 0.01, "loss": 1.9943, "step": 68073 }, { "epoch": 6.990757855822551, "grad_norm": 0.04362885281443596, "learning_rate": 0.01, "loss": 1.9694, "step": 68076 }, { "epoch": 6.991065927295132, "grad_norm": 0.04131380096077919, "learning_rate": 0.01, "loss": 1.969, "step": 68079 }, { "epoch": 6.991373998767714, "grad_norm": 0.03312958776950836, "learning_rate": 0.01, "loss": 1.9731, "step": 68082 }, { "epoch": 6.991682070240296, "grad_norm": 0.05648859217762947, "learning_rate": 0.01, "loss": 1.9942, "step": 68085 }, { "epoch": 6.991990141712877, "grad_norm": 0.07927577197551727, "learning_rate": 0.01, "loss": 1.9604, "step": 68088 }, { "epoch": 6.992298213185459, "grad_norm": 0.081300288438797, "learning_rate": 0.01, "loss": 1.9668, "step": 68091 }, { "epoch": 6.992606284658041, "grad_norm": 0.08239664137363434, "learning_rate": 0.01, "loss": 1.991, "step": 68094 }, { "epoch": 6.992914356130623, "grad_norm": 0.14173032343387604, "learning_rate": 0.01, "loss": 1.9795, "step": 68097 }, { "epoch": 6.993222427603204, "grad_norm": 0.039897531270980835, "learning_rate": 0.01, "loss": 1.9652, "step": 68100 }, { "epoch": 6.993530499075786, "grad_norm": 0.08170229941606522, "learning_rate": 0.01, "loss": 1.9603, "step": 68103 }, { "epoch": 6.993838570548367, "grad_norm": 0.09108921140432358, "learning_rate": 0.01, "loss": 1.9484, "step": 68106 }, { "epoch": 6.994146642020949, "grad_norm": 0.048272643238306046, "learning_rate": 0.01, "loss": 1.9744, "step": 68109 }, { "epoch": 6.99445471349353, "grad_norm": 0.06137422099709511, "learning_rate": 0.01, "loss": 1.9776, "step": 68112 }, { "epoch": 6.994762784966112, "grad_norm": 0.060723815113306046, "learning_rate": 0.01, "loss": 1.9579, "step": 68115 }, { "epoch": 6.995070856438693, "grad_norm": 0.11094491183757782, "learning_rate": 0.01, "loss": 1.9974, "step": 68118 }, { "epoch": 6.9953789279112755, "grad_norm": 0.041866280138492584, "learning_rate": 0.01, "loss": 1.9696, "step": 68121 }, { "epoch": 6.995686999383857, "grad_norm": 0.032954033464193344, "learning_rate": 0.01, "loss": 1.9706, "step": 68124 }, { "epoch": 6.995995070856439, "grad_norm": 0.034947719424963, "learning_rate": 0.01, "loss": 1.975, "step": 68127 }, { "epoch": 6.996303142329021, "grad_norm": 0.03938392922282219, "learning_rate": 0.01, "loss": 1.994, "step": 68130 }, { "epoch": 6.996611213801602, "grad_norm": 0.1045796275138855, "learning_rate": 0.01, "loss": 1.9709, "step": 68133 }, { "epoch": 6.996919285274184, "grad_norm": 0.10855524986982346, "learning_rate": 0.01, "loss": 1.9603, "step": 68136 }, { "epoch": 6.997227356746765, "grad_norm": 0.08628056198358536, "learning_rate": 0.01, "loss": 1.9827, "step": 68139 }, { "epoch": 6.997535428219347, "grad_norm": 0.0505918487906456, "learning_rate": 0.01, "loss": 1.9752, "step": 68142 }, { "epoch": 6.997843499691928, "grad_norm": 0.04251931607723236, "learning_rate": 0.01, "loss": 1.9555, "step": 68145 }, { "epoch": 6.99815157116451, "grad_norm": 0.05464445427060127, "learning_rate": 0.01, "loss": 1.97, "step": 68148 }, { "epoch": 6.9984596426370915, "grad_norm": 0.04259735345840454, "learning_rate": 0.01, "loss": 1.9922, "step": 68151 }, { "epoch": 6.998767714109674, "grad_norm": 0.06472709774971008, "learning_rate": 0.01, "loss": 1.9549, "step": 68154 }, { "epoch": 6.999075785582255, "grad_norm": 0.0695829764008522, "learning_rate": 0.01, "loss": 1.9776, "step": 68157 }, { "epoch": 6.999383857054837, "grad_norm": 0.09822755306959152, "learning_rate": 0.01, "loss": 1.9376, "step": 68160 }, { "epoch": 6.999691928527419, "grad_norm": 0.10002230852842331, "learning_rate": 0.01, "loss": 1.9992, "step": 68163 }, { "epoch": 7.0, "grad_norm": 0.05069267004728317, "learning_rate": 0.01, "loss": 1.9642, "step": 68166 }, { "epoch": 7.00606372045221, "grad_norm": 0.054282162338495255, "learning_rate": 0.01, "loss": 1.9643, "step": 68169 }, { "epoch": 7.006372045220966, "grad_norm": 0.06988398730754852, "learning_rate": 0.01, "loss": 1.9899, "step": 68172 }, { "epoch": 7.006680369989723, "grad_norm": 0.06562650203704834, "learning_rate": 0.01, "loss": 2.0001, "step": 68175 }, { "epoch": 7.006988694758479, "grad_norm": 0.04521455615758896, "learning_rate": 0.01, "loss": 1.9925, "step": 68178 }, { "epoch": 7.007297019527235, "grad_norm": 0.05735592916607857, "learning_rate": 0.01, "loss": 1.9914, "step": 68181 }, { "epoch": 7.007605344295992, "grad_norm": 0.08022990822792053, "learning_rate": 0.01, "loss": 1.9981, "step": 68184 }, { "epoch": 7.007913669064748, "grad_norm": 0.052689116448163986, "learning_rate": 0.01, "loss": 1.9738, "step": 68187 }, { "epoch": 7.008221993833504, "grad_norm": 0.03922291100025177, "learning_rate": 0.01, "loss": 1.9792, "step": 68190 }, { "epoch": 7.008530318602261, "grad_norm": 0.11853530257940292, "learning_rate": 0.01, "loss": 1.9736, "step": 68193 }, { "epoch": 7.008838643371018, "grad_norm": 0.07033178955316544, "learning_rate": 0.01, "loss": 1.9988, "step": 68196 }, { "epoch": 7.009146968139774, "grad_norm": 0.049129668623209, "learning_rate": 0.01, "loss": 1.9779, "step": 68199 }, { "epoch": 7.00945529290853, "grad_norm": 0.044174324721097946, "learning_rate": 0.01, "loss": 1.9641, "step": 68202 }, { "epoch": 7.009763617677287, "grad_norm": 0.05447569862008095, "learning_rate": 0.01, "loss": 1.9951, "step": 68205 }, { "epoch": 7.010071942446043, "grad_norm": 0.06834445148706436, "learning_rate": 0.01, "loss": 1.9751, "step": 68208 }, { "epoch": 7.010380267214799, "grad_norm": 0.056646499782800674, "learning_rate": 0.01, "loss": 1.9802, "step": 68211 }, { "epoch": 7.010688591983556, "grad_norm": 0.08008157461881638, "learning_rate": 0.01, "loss": 1.9763, "step": 68214 }, { "epoch": 7.010996916752313, "grad_norm": 0.07106824219226837, "learning_rate": 0.01, "loss": 1.9977, "step": 68217 }, { "epoch": 7.011305241521069, "grad_norm": 0.05258387327194214, "learning_rate": 0.01, "loss": 1.9791, "step": 68220 }, { "epoch": 7.011613566289825, "grad_norm": 0.08909857273101807, "learning_rate": 0.01, "loss": 1.9838, "step": 68223 }, { "epoch": 7.011921891058582, "grad_norm": 0.05345101282000542, "learning_rate": 0.01, "loss": 1.9745, "step": 68226 }, { "epoch": 7.012230215827338, "grad_norm": 0.04160739853978157, "learning_rate": 0.01, "loss": 1.9682, "step": 68229 }, { "epoch": 7.012538540596094, "grad_norm": 0.03168926015496254, "learning_rate": 0.01, "loss": 1.9663, "step": 68232 }, { "epoch": 7.012846865364851, "grad_norm": 0.035804539918899536, "learning_rate": 0.01, "loss": 1.9821, "step": 68235 }, { "epoch": 7.013155190133608, "grad_norm": 0.04234134405851364, "learning_rate": 0.01, "loss": 2.0013, "step": 68238 }, { "epoch": 7.013463514902364, "grad_norm": 0.06941762566566467, "learning_rate": 0.01, "loss": 1.9982, "step": 68241 }, { "epoch": 7.01377183967112, "grad_norm": 0.07563450187444687, "learning_rate": 0.01, "loss": 1.9841, "step": 68244 }, { "epoch": 7.014080164439877, "grad_norm": 0.07446388155221939, "learning_rate": 0.01, "loss": 2.0143, "step": 68247 }, { "epoch": 7.014388489208633, "grad_norm": 0.040372107177972794, "learning_rate": 0.01, "loss": 1.9928, "step": 68250 }, { "epoch": 7.014696813977389, "grad_norm": 0.12671950459480286, "learning_rate": 0.01, "loss": 1.9914, "step": 68253 }, { "epoch": 7.015005138746146, "grad_norm": 0.06948719173669815, "learning_rate": 0.01, "loss": 1.9713, "step": 68256 }, { "epoch": 7.015313463514902, "grad_norm": 0.0657772570848465, "learning_rate": 0.01, "loss": 1.9835, "step": 68259 }, { "epoch": 7.015621788283659, "grad_norm": 0.07090414315462112, "learning_rate": 0.01, "loss": 1.9862, "step": 68262 }, { "epoch": 7.015930113052415, "grad_norm": 0.0721154510974884, "learning_rate": 0.01, "loss": 1.9925, "step": 68265 }, { "epoch": 7.016238437821172, "grad_norm": 0.05126158893108368, "learning_rate": 0.01, "loss": 1.9703, "step": 68268 }, { "epoch": 7.016546762589928, "grad_norm": 0.07218746840953827, "learning_rate": 0.01, "loss": 1.9704, "step": 68271 }, { "epoch": 7.016855087358684, "grad_norm": 0.08373812586069107, "learning_rate": 0.01, "loss": 1.9869, "step": 68274 }, { "epoch": 7.017163412127441, "grad_norm": 0.07843849062919617, "learning_rate": 0.01, "loss": 1.9482, "step": 68277 }, { "epoch": 7.017471736896197, "grad_norm": 0.042954906821250916, "learning_rate": 0.01, "loss": 1.981, "step": 68280 }, { "epoch": 7.017780061664954, "grad_norm": 0.04174837842583656, "learning_rate": 0.01, "loss": 1.9607, "step": 68283 }, { "epoch": 7.0180883864337105, "grad_norm": 0.04753096401691437, "learning_rate": 0.01, "loss": 2.0111, "step": 68286 }, { "epoch": 7.018396711202467, "grad_norm": 0.05489262193441391, "learning_rate": 0.01, "loss": 1.9765, "step": 68289 }, { "epoch": 7.018705035971223, "grad_norm": 0.09776978939771652, "learning_rate": 0.01, "loss": 1.9831, "step": 68292 }, { "epoch": 7.019013360739979, "grad_norm": 0.05086861178278923, "learning_rate": 0.01, "loss": 1.977, "step": 68295 }, { "epoch": 7.019321685508736, "grad_norm": 0.07187198102474213, "learning_rate": 0.01, "loss": 1.9738, "step": 68298 }, { "epoch": 7.019630010277492, "grad_norm": 0.10095050185918808, "learning_rate": 0.01, "loss": 2.0015, "step": 68301 }, { "epoch": 7.019938335046248, "grad_norm": 0.06792483478784561, "learning_rate": 0.01, "loss": 1.9925, "step": 68304 }, { "epoch": 7.0202466598150055, "grad_norm": 0.050041310489177704, "learning_rate": 0.01, "loss": 1.9845, "step": 68307 }, { "epoch": 7.020554984583762, "grad_norm": 0.04710831120610237, "learning_rate": 0.01, "loss": 1.9735, "step": 68310 }, { "epoch": 7.020863309352518, "grad_norm": 0.056352101266384125, "learning_rate": 0.01, "loss": 1.9734, "step": 68313 }, { "epoch": 7.021171634121274, "grad_norm": 0.08362201601266861, "learning_rate": 0.01, "loss": 1.976, "step": 68316 }, { "epoch": 7.021479958890031, "grad_norm": 0.03935181722044945, "learning_rate": 0.01, "loss": 1.9911, "step": 68319 }, { "epoch": 7.021788283658787, "grad_norm": 0.141304612159729, "learning_rate": 0.01, "loss": 2.0116, "step": 68322 }, { "epoch": 7.022096608427543, "grad_norm": 0.041724372655153275, "learning_rate": 0.01, "loss": 1.9873, "step": 68325 }, { "epoch": 7.0224049331963005, "grad_norm": 0.04254898428916931, "learning_rate": 0.01, "loss": 2.0029, "step": 68328 }, { "epoch": 7.022713257965057, "grad_norm": 0.06883671879768372, "learning_rate": 0.01, "loss": 1.9628, "step": 68331 }, { "epoch": 7.023021582733813, "grad_norm": 0.05049630254507065, "learning_rate": 0.01, "loss": 1.9658, "step": 68334 }, { "epoch": 7.0233299075025695, "grad_norm": 0.047389786690473557, "learning_rate": 0.01, "loss": 1.9701, "step": 68337 }, { "epoch": 7.023638232271326, "grad_norm": 0.14553938806056976, "learning_rate": 0.01, "loss": 1.984, "step": 68340 }, { "epoch": 7.023946557040082, "grad_norm": 0.0530347004532814, "learning_rate": 0.01, "loss": 1.9789, "step": 68343 }, { "epoch": 7.024254881808838, "grad_norm": 0.07391063868999481, "learning_rate": 0.01, "loss": 1.976, "step": 68346 }, { "epoch": 7.024563206577595, "grad_norm": 0.082502581179142, "learning_rate": 0.01, "loss": 1.9925, "step": 68349 }, { "epoch": 7.024871531346352, "grad_norm": 0.09995315968990326, "learning_rate": 0.01, "loss": 1.9881, "step": 68352 }, { "epoch": 7.025179856115108, "grad_norm": 0.0911843478679657, "learning_rate": 0.01, "loss": 1.9939, "step": 68355 }, { "epoch": 7.0254881808838645, "grad_norm": 0.04006153717637062, "learning_rate": 0.01, "loss": 2.0012, "step": 68358 }, { "epoch": 7.025796505652621, "grad_norm": 0.09338364750146866, "learning_rate": 0.01, "loss": 1.9732, "step": 68361 }, { "epoch": 7.026104830421377, "grad_norm": 0.03738764673471451, "learning_rate": 0.01, "loss": 1.9475, "step": 68364 }, { "epoch": 7.026413155190133, "grad_norm": 0.048774126917123795, "learning_rate": 0.01, "loss": 1.9693, "step": 68367 }, { "epoch": 7.02672147995889, "grad_norm": 0.08035308867692947, "learning_rate": 0.01, "loss": 1.9469, "step": 68370 }, { "epoch": 7.027029804727646, "grad_norm": 0.054234910756349564, "learning_rate": 0.01, "loss": 2.0002, "step": 68373 }, { "epoch": 7.027338129496403, "grad_norm": 0.08153880387544632, "learning_rate": 0.01, "loss": 1.9654, "step": 68376 }, { "epoch": 7.0276464542651595, "grad_norm": 0.047218646854162216, "learning_rate": 0.01, "loss": 1.9725, "step": 68379 }, { "epoch": 7.027954779033916, "grad_norm": 0.061120592057704926, "learning_rate": 0.01, "loss": 1.9594, "step": 68382 }, { "epoch": 7.028263103802672, "grad_norm": 0.08370841294527054, "learning_rate": 0.01, "loss": 1.9951, "step": 68385 }, { "epoch": 7.0285714285714285, "grad_norm": 0.0927995815873146, "learning_rate": 0.01, "loss": 2.0014, "step": 68388 }, { "epoch": 7.028879753340185, "grad_norm": 0.040699124336242676, "learning_rate": 0.01, "loss": 2.0016, "step": 68391 }, { "epoch": 7.029188078108941, "grad_norm": 0.12133225053548813, "learning_rate": 0.01, "loss": 1.9882, "step": 68394 }, { "epoch": 7.029496402877698, "grad_norm": 0.0616045743227005, "learning_rate": 0.01, "loss": 1.9605, "step": 68397 }, { "epoch": 7.029804727646455, "grad_norm": 0.0576595813035965, "learning_rate": 0.01, "loss": 1.9817, "step": 68400 }, { "epoch": 7.030113052415211, "grad_norm": 0.11460481584072113, "learning_rate": 0.01, "loss": 2.0056, "step": 68403 }, { "epoch": 7.030421377183967, "grad_norm": 0.046943407505750656, "learning_rate": 0.01, "loss": 2.0127, "step": 68406 }, { "epoch": 7.0307297019527235, "grad_norm": 0.034080930054187775, "learning_rate": 0.01, "loss": 1.9803, "step": 68409 }, { "epoch": 7.03103802672148, "grad_norm": 0.09172356873750687, "learning_rate": 0.01, "loss": 1.9931, "step": 68412 }, { "epoch": 7.031346351490236, "grad_norm": 0.06134629622101784, "learning_rate": 0.01, "loss": 2.0077, "step": 68415 }, { "epoch": 7.031654676258992, "grad_norm": 0.038659993559122086, "learning_rate": 0.01, "loss": 1.9846, "step": 68418 }, { "epoch": 7.03196300102775, "grad_norm": 0.03268113732337952, "learning_rate": 0.01, "loss": 1.976, "step": 68421 }, { "epoch": 7.032271325796506, "grad_norm": 0.05008908361196518, "learning_rate": 0.01, "loss": 1.9864, "step": 68424 }, { "epoch": 7.032579650565262, "grad_norm": 0.05112696439027786, "learning_rate": 0.01, "loss": 1.9892, "step": 68427 }, { "epoch": 7.0328879753340185, "grad_norm": 0.03850901499390602, "learning_rate": 0.01, "loss": 1.9772, "step": 68430 }, { "epoch": 7.033196300102775, "grad_norm": 0.05724974349141121, "learning_rate": 0.01, "loss": 1.9833, "step": 68433 }, { "epoch": 7.033504624871531, "grad_norm": 0.056213781237602234, "learning_rate": 0.01, "loss": 1.9596, "step": 68436 }, { "epoch": 7.0338129496402875, "grad_norm": 0.03625565767288208, "learning_rate": 0.01, "loss": 1.9982, "step": 68439 }, { "epoch": 7.034121274409044, "grad_norm": 0.040962837636470795, "learning_rate": 0.01, "loss": 1.9827, "step": 68442 }, { "epoch": 7.034429599177801, "grad_norm": 0.0518268421292305, "learning_rate": 0.01, "loss": 1.9668, "step": 68445 }, { "epoch": 7.034737923946557, "grad_norm": 0.07236559689044952, "learning_rate": 0.01, "loss": 2.0084, "step": 68448 }, { "epoch": 7.035046248715314, "grad_norm": 0.05604173243045807, "learning_rate": 0.01, "loss": 1.9832, "step": 68451 }, { "epoch": 7.03535457348407, "grad_norm": 0.0653105154633522, "learning_rate": 0.01, "loss": 1.961, "step": 68454 }, { "epoch": 7.035662898252826, "grad_norm": 0.1655261069536209, "learning_rate": 0.01, "loss": 1.966, "step": 68457 }, { "epoch": 7.0359712230215825, "grad_norm": 0.04352293908596039, "learning_rate": 0.01, "loss": 1.9935, "step": 68460 }, { "epoch": 7.036279547790339, "grad_norm": 0.04556136578321457, "learning_rate": 0.01, "loss": 1.997, "step": 68463 }, { "epoch": 7.036587872559096, "grad_norm": 0.10971470177173615, "learning_rate": 0.01, "loss": 2.008, "step": 68466 }, { "epoch": 7.036896197327852, "grad_norm": 0.05627235397696495, "learning_rate": 0.01, "loss": 1.9764, "step": 68469 }, { "epoch": 7.037204522096609, "grad_norm": 0.03856800124049187, "learning_rate": 0.01, "loss": 1.9835, "step": 68472 }, { "epoch": 7.037512846865365, "grad_norm": 0.032653167843818665, "learning_rate": 0.01, "loss": 1.9574, "step": 68475 }, { "epoch": 7.037821171634121, "grad_norm": 0.03872492164373398, "learning_rate": 0.01, "loss": 1.9949, "step": 68478 }, { "epoch": 7.0381294964028775, "grad_norm": 0.08332209289073944, "learning_rate": 0.01, "loss": 1.988, "step": 68481 }, { "epoch": 7.038437821171634, "grad_norm": 0.11315266042947769, "learning_rate": 0.01, "loss": 1.9852, "step": 68484 }, { "epoch": 7.03874614594039, "grad_norm": 0.058779433369636536, "learning_rate": 0.01, "loss": 1.9595, "step": 68487 }, { "epoch": 7.039054470709147, "grad_norm": 0.05338497459888458, "learning_rate": 0.01, "loss": 1.954, "step": 68490 }, { "epoch": 7.039362795477904, "grad_norm": 0.05961502715945244, "learning_rate": 0.01, "loss": 1.9546, "step": 68493 }, { "epoch": 7.03967112024666, "grad_norm": 0.10511081665754318, "learning_rate": 0.01, "loss": 1.9775, "step": 68496 }, { "epoch": 7.039979445015416, "grad_norm": 0.060618769377470016, "learning_rate": 0.01, "loss": 1.9754, "step": 68499 }, { "epoch": 7.040287769784173, "grad_norm": 0.05174575001001358, "learning_rate": 0.01, "loss": 1.9858, "step": 68502 }, { "epoch": 7.040596094552929, "grad_norm": 0.038627609610557556, "learning_rate": 0.01, "loss": 1.9625, "step": 68505 }, { "epoch": 7.040904419321685, "grad_norm": 0.04214971512556076, "learning_rate": 0.01, "loss": 1.9856, "step": 68508 }, { "epoch": 7.0412127440904415, "grad_norm": 0.08534090965986252, "learning_rate": 0.01, "loss": 1.9718, "step": 68511 }, { "epoch": 7.041521068859199, "grad_norm": 0.047119926661252975, "learning_rate": 0.01, "loss": 1.9885, "step": 68514 }, { "epoch": 7.041829393627955, "grad_norm": 0.05264575034379959, "learning_rate": 0.01, "loss": 2.003, "step": 68517 }, { "epoch": 7.042137718396711, "grad_norm": 0.06278250366449356, "learning_rate": 0.01, "loss": 1.984, "step": 68520 }, { "epoch": 7.042446043165468, "grad_norm": 0.09359404444694519, "learning_rate": 0.01, "loss": 1.9942, "step": 68523 }, { "epoch": 7.042754367934224, "grad_norm": 0.043020278215408325, "learning_rate": 0.01, "loss": 1.9981, "step": 68526 }, { "epoch": 7.04306269270298, "grad_norm": 0.04135395586490631, "learning_rate": 0.01, "loss": 1.9514, "step": 68529 }, { "epoch": 7.0433710174717366, "grad_norm": 0.09166526049375534, "learning_rate": 0.01, "loss": 1.9639, "step": 68532 }, { "epoch": 7.043679342240494, "grad_norm": 0.0364113412797451, "learning_rate": 0.01, "loss": 1.9824, "step": 68535 }, { "epoch": 7.04398766700925, "grad_norm": 0.04818427935242653, "learning_rate": 0.01, "loss": 1.9765, "step": 68538 }, { "epoch": 7.044295991778006, "grad_norm": 0.09592068195343018, "learning_rate": 0.01, "loss": 1.9758, "step": 68541 }, { "epoch": 7.044604316546763, "grad_norm": 0.07666312903165817, "learning_rate": 0.01, "loss": 1.9662, "step": 68544 }, { "epoch": 7.044912641315519, "grad_norm": 0.053152818232774734, "learning_rate": 0.01, "loss": 1.9713, "step": 68547 }, { "epoch": 7.045220966084275, "grad_norm": 0.04496999457478523, "learning_rate": 0.01, "loss": 1.977, "step": 68550 }, { "epoch": 7.045529290853032, "grad_norm": 0.07192201167345047, "learning_rate": 0.01, "loss": 1.9696, "step": 68553 }, { "epoch": 7.045837615621788, "grad_norm": 0.07535843551158905, "learning_rate": 0.01, "loss": 1.9875, "step": 68556 }, { "epoch": 7.046145940390545, "grad_norm": 0.04308157041668892, "learning_rate": 0.01, "loss": 1.9404, "step": 68559 }, { "epoch": 7.046454265159301, "grad_norm": 0.15922586619853973, "learning_rate": 0.01, "loss": 2.0141, "step": 68562 }, { "epoch": 7.046762589928058, "grad_norm": 0.06239364296197891, "learning_rate": 0.01, "loss": 1.9914, "step": 68565 }, { "epoch": 7.047070914696814, "grad_norm": 0.1485653668642044, "learning_rate": 0.01, "loss": 1.9785, "step": 68568 }, { "epoch": 7.04737923946557, "grad_norm": 0.07881268113851547, "learning_rate": 0.01, "loss": 2.0164, "step": 68571 }, { "epoch": 7.047687564234327, "grad_norm": 0.037833794951438904, "learning_rate": 0.01, "loss": 1.9801, "step": 68574 }, { "epoch": 7.047995889003083, "grad_norm": 0.06409921497106552, "learning_rate": 0.01, "loss": 1.9668, "step": 68577 }, { "epoch": 7.048304213771839, "grad_norm": 0.041993625462055206, "learning_rate": 0.01, "loss": 2.0, "step": 68580 }, { "epoch": 7.048612538540596, "grad_norm": 0.05115489289164543, "learning_rate": 0.01, "loss": 1.9916, "step": 68583 }, { "epoch": 7.048920863309353, "grad_norm": 0.037257734686136246, "learning_rate": 0.01, "loss": 1.9736, "step": 68586 }, { "epoch": 7.049229188078109, "grad_norm": 0.035398274660110474, "learning_rate": 0.01, "loss": 1.9883, "step": 68589 }, { "epoch": 7.049537512846865, "grad_norm": 0.05111207067966461, "learning_rate": 0.01, "loss": 1.9778, "step": 68592 }, { "epoch": 7.049845837615622, "grad_norm": 0.07091052085161209, "learning_rate": 0.01, "loss": 1.9973, "step": 68595 }, { "epoch": 7.050154162384378, "grad_norm": 0.06888426840305328, "learning_rate": 0.01, "loss": 1.9448, "step": 68598 }, { "epoch": 7.050462487153134, "grad_norm": 0.10825680941343307, "learning_rate": 0.01, "loss": 1.9711, "step": 68601 }, { "epoch": 7.0507708119218915, "grad_norm": 0.13681887090206146, "learning_rate": 0.01, "loss": 1.9963, "step": 68604 }, { "epoch": 7.051079136690648, "grad_norm": 0.09142705798149109, "learning_rate": 0.01, "loss": 1.9916, "step": 68607 }, { "epoch": 7.051387461459404, "grad_norm": 0.06074422225356102, "learning_rate": 0.01, "loss": 1.974, "step": 68610 }, { "epoch": 7.05169578622816, "grad_norm": 0.05737810209393501, "learning_rate": 0.01, "loss": 1.973, "step": 68613 }, { "epoch": 7.052004110996917, "grad_norm": 0.04696836322546005, "learning_rate": 0.01, "loss": 2.0003, "step": 68616 }, { "epoch": 7.052312435765673, "grad_norm": 0.06998541951179504, "learning_rate": 0.01, "loss": 1.9718, "step": 68619 }, { "epoch": 7.052620760534429, "grad_norm": 0.042207296937704086, "learning_rate": 0.01, "loss": 1.9833, "step": 68622 }, { "epoch": 7.052929085303186, "grad_norm": 0.044253282248973846, "learning_rate": 0.01, "loss": 1.9703, "step": 68625 }, { "epoch": 7.053237410071943, "grad_norm": 0.03673321008682251, "learning_rate": 0.01, "loss": 1.9609, "step": 68628 }, { "epoch": 7.053545734840699, "grad_norm": 0.06506076455116272, "learning_rate": 0.01, "loss": 1.9644, "step": 68631 }, { "epoch": 7.053854059609455, "grad_norm": 0.08981776982545853, "learning_rate": 0.01, "loss": 1.9934, "step": 68634 }, { "epoch": 7.054162384378212, "grad_norm": 0.04000147059559822, "learning_rate": 0.01, "loss": 1.9695, "step": 68637 }, { "epoch": 7.054470709146968, "grad_norm": 0.1273726224899292, "learning_rate": 0.01, "loss": 1.9819, "step": 68640 }, { "epoch": 7.054779033915724, "grad_norm": 0.16758222877979279, "learning_rate": 0.01, "loss": 1.9901, "step": 68643 }, { "epoch": 7.055087358684481, "grad_norm": 0.14335897564888, "learning_rate": 0.01, "loss": 1.9979, "step": 68646 }, { "epoch": 7.055395683453237, "grad_norm": 0.08894363045692444, "learning_rate": 0.01, "loss": 1.974, "step": 68649 }, { "epoch": 7.055704008221994, "grad_norm": 0.06796260178089142, "learning_rate": 0.01, "loss": 1.9917, "step": 68652 }, { "epoch": 7.0560123329907505, "grad_norm": 0.09469815343618393, "learning_rate": 0.01, "loss": 2.0015, "step": 68655 }, { "epoch": 7.056320657759507, "grad_norm": 0.07564645260572433, "learning_rate": 0.01, "loss": 1.9671, "step": 68658 }, { "epoch": 7.056628982528263, "grad_norm": 0.03675295040011406, "learning_rate": 0.01, "loss": 1.9526, "step": 68661 }, { "epoch": 7.056937307297019, "grad_norm": 0.03465329483151436, "learning_rate": 0.01, "loss": 1.9661, "step": 68664 }, { "epoch": 7.057245632065776, "grad_norm": 0.05159129574894905, "learning_rate": 0.01, "loss": 1.9681, "step": 68667 }, { "epoch": 7.057553956834532, "grad_norm": 0.10650437325239182, "learning_rate": 0.01, "loss": 1.9968, "step": 68670 }, { "epoch": 7.057862281603289, "grad_norm": 0.07661871612071991, "learning_rate": 0.01, "loss": 1.9898, "step": 68673 }, { "epoch": 7.0581706063720455, "grad_norm": 0.03891372308135033, "learning_rate": 0.01, "loss": 1.9695, "step": 68676 }, { "epoch": 7.058478931140802, "grad_norm": 0.07626038044691086, "learning_rate": 0.01, "loss": 1.9635, "step": 68679 }, { "epoch": 7.058787255909558, "grad_norm": 0.05628202483057976, "learning_rate": 0.01, "loss": 1.9857, "step": 68682 }, { "epoch": 7.059095580678314, "grad_norm": 0.05379130318760872, "learning_rate": 0.01, "loss": 1.9844, "step": 68685 }, { "epoch": 7.059403905447071, "grad_norm": 0.04126124083995819, "learning_rate": 0.01, "loss": 1.9882, "step": 68688 }, { "epoch": 7.059712230215827, "grad_norm": 0.04331153631210327, "learning_rate": 0.01, "loss": 1.9892, "step": 68691 }, { "epoch": 7.060020554984583, "grad_norm": 0.0378563366830349, "learning_rate": 0.01, "loss": 2.0034, "step": 68694 }, { "epoch": 7.060328879753341, "grad_norm": 0.04773402959108353, "learning_rate": 0.01, "loss": 1.9954, "step": 68697 }, { "epoch": 7.060637204522097, "grad_norm": 0.0653529167175293, "learning_rate": 0.01, "loss": 1.983, "step": 68700 }, { "epoch": 7.060945529290853, "grad_norm": 0.08799976110458374, "learning_rate": 0.01, "loss": 1.9795, "step": 68703 }, { "epoch": 7.0612538540596095, "grad_norm": 0.04789255931973457, "learning_rate": 0.01, "loss": 1.9976, "step": 68706 }, { "epoch": 7.061562178828366, "grad_norm": 0.06388677656650543, "learning_rate": 0.01, "loss": 1.9852, "step": 68709 }, { "epoch": 7.061870503597122, "grad_norm": 0.03625022992491722, "learning_rate": 0.01, "loss": 1.9799, "step": 68712 }, { "epoch": 7.062178828365878, "grad_norm": 0.0345403291285038, "learning_rate": 0.01, "loss": 1.9824, "step": 68715 }, { "epoch": 7.062487153134635, "grad_norm": 0.03319702669978142, "learning_rate": 0.01, "loss": 1.9805, "step": 68718 }, { "epoch": 7.062795477903392, "grad_norm": 0.05995936691761017, "learning_rate": 0.01, "loss": 1.9666, "step": 68721 }, { "epoch": 7.063103802672148, "grad_norm": 0.04327778145670891, "learning_rate": 0.01, "loss": 1.9582, "step": 68724 }, { "epoch": 7.0634121274409045, "grad_norm": 0.15124131739139557, "learning_rate": 0.01, "loss": 1.9743, "step": 68727 }, { "epoch": 7.063720452209661, "grad_norm": 0.08264999836683273, "learning_rate": 0.01, "loss": 1.9864, "step": 68730 }, { "epoch": 7.064028776978417, "grad_norm": 0.0442788302898407, "learning_rate": 0.01, "loss": 1.966, "step": 68733 }, { "epoch": 7.064337101747173, "grad_norm": 0.04176313802599907, "learning_rate": 0.01, "loss": 1.9719, "step": 68736 }, { "epoch": 7.06464542651593, "grad_norm": 0.04693405702710152, "learning_rate": 0.01, "loss": 1.9844, "step": 68739 }, { "epoch": 7.064953751284687, "grad_norm": 0.0443805493414402, "learning_rate": 0.01, "loss": 1.9785, "step": 68742 }, { "epoch": 7.065262076053443, "grad_norm": 0.086354561150074, "learning_rate": 0.01, "loss": 1.9793, "step": 68745 }, { "epoch": 7.0655704008222, "grad_norm": 0.05768636614084244, "learning_rate": 0.01, "loss": 1.9786, "step": 68748 }, { "epoch": 7.065878725590956, "grad_norm": 0.08465427905321121, "learning_rate": 0.01, "loss": 1.9761, "step": 68751 }, { "epoch": 7.066187050359712, "grad_norm": 0.08750718086957932, "learning_rate": 0.01, "loss": 1.9596, "step": 68754 }, { "epoch": 7.0664953751284685, "grad_norm": 0.06704618036746979, "learning_rate": 0.01, "loss": 1.9779, "step": 68757 }, { "epoch": 7.066803699897225, "grad_norm": 0.12646348774433136, "learning_rate": 0.01, "loss": 2.0056, "step": 68760 }, { "epoch": 7.067112024665981, "grad_norm": 0.07180266082286835, "learning_rate": 0.01, "loss": 1.9769, "step": 68763 }, { "epoch": 7.067420349434738, "grad_norm": 0.04616543650627136, "learning_rate": 0.01, "loss": 1.9871, "step": 68766 }, { "epoch": 7.067728674203495, "grad_norm": 0.044225241988897324, "learning_rate": 0.01, "loss": 1.9634, "step": 68769 }, { "epoch": 7.068036998972251, "grad_norm": 0.03617358207702637, "learning_rate": 0.01, "loss": 1.9598, "step": 68772 }, { "epoch": 7.068345323741007, "grad_norm": 0.09863235056400299, "learning_rate": 0.01, "loss": 1.9619, "step": 68775 }, { "epoch": 7.0686536485097635, "grad_norm": 0.06617226451635361, "learning_rate": 0.01, "loss": 1.9909, "step": 68778 }, { "epoch": 7.06896197327852, "grad_norm": 0.12394807487726212, "learning_rate": 0.01, "loss": 1.999, "step": 68781 }, { "epoch": 7.069270298047276, "grad_norm": 0.060029856860637665, "learning_rate": 0.01, "loss": 1.9811, "step": 68784 }, { "epoch": 7.069578622816033, "grad_norm": 0.045732829719781876, "learning_rate": 0.01, "loss": 1.9833, "step": 68787 }, { "epoch": 7.06988694758479, "grad_norm": 0.06220393627882004, "learning_rate": 0.01, "loss": 1.982, "step": 68790 }, { "epoch": 7.070195272353546, "grad_norm": 0.04120052605867386, "learning_rate": 0.01, "loss": 1.9707, "step": 68793 }, { "epoch": 7.070503597122302, "grad_norm": 0.049835655838251114, "learning_rate": 0.01, "loss": 1.979, "step": 68796 }, { "epoch": 7.070811921891059, "grad_norm": 0.03683771193027496, "learning_rate": 0.01, "loss": 1.9584, "step": 68799 }, { "epoch": 7.071120246659815, "grad_norm": 0.03647764027118683, "learning_rate": 0.01, "loss": 1.9751, "step": 68802 }, { "epoch": 7.071428571428571, "grad_norm": 0.16000273823738098, "learning_rate": 0.01, "loss": 1.9959, "step": 68805 }, { "epoch": 7.0717368961973275, "grad_norm": 0.03855353966355324, "learning_rate": 0.01, "loss": 1.9611, "step": 68808 }, { "epoch": 7.072045220966085, "grad_norm": 0.040406130254268646, "learning_rate": 0.01, "loss": 1.9596, "step": 68811 }, { "epoch": 7.072353545734841, "grad_norm": 0.03346218913793564, "learning_rate": 0.01, "loss": 1.9683, "step": 68814 }, { "epoch": 7.072661870503597, "grad_norm": 0.11391355842351913, "learning_rate": 0.01, "loss": 1.9859, "step": 68817 }, { "epoch": 7.072970195272354, "grad_norm": 0.060539521276950836, "learning_rate": 0.01, "loss": 1.9865, "step": 68820 }, { "epoch": 7.07327852004111, "grad_norm": 0.1784716248512268, "learning_rate": 0.01, "loss": 1.9659, "step": 68823 }, { "epoch": 7.073586844809866, "grad_norm": 0.04299479350447655, "learning_rate": 0.01, "loss": 2.0007, "step": 68826 }, { "epoch": 7.0738951695786225, "grad_norm": 0.05543602257966995, "learning_rate": 0.01, "loss": 1.9804, "step": 68829 }, { "epoch": 7.074203494347379, "grad_norm": 0.039931852370500565, "learning_rate": 0.01, "loss": 1.9684, "step": 68832 }, { "epoch": 7.074511819116136, "grad_norm": 0.044154778122901917, "learning_rate": 0.01, "loss": 1.9739, "step": 68835 }, { "epoch": 7.074820143884892, "grad_norm": 0.037736304104328156, "learning_rate": 0.01, "loss": 1.9656, "step": 68838 }, { "epoch": 7.075128468653649, "grad_norm": 0.06381729245185852, "learning_rate": 0.01, "loss": 1.9896, "step": 68841 }, { "epoch": 7.075436793422405, "grad_norm": 0.03960365802049637, "learning_rate": 0.01, "loss": 1.9792, "step": 68844 }, { "epoch": 7.075745118191161, "grad_norm": 0.11026263236999512, "learning_rate": 0.01, "loss": 1.968, "step": 68847 }, { "epoch": 7.076053442959918, "grad_norm": 0.09295713901519775, "learning_rate": 0.01, "loss": 1.9986, "step": 68850 }, { "epoch": 7.076361767728674, "grad_norm": 0.07637894153594971, "learning_rate": 0.01, "loss": 1.9915, "step": 68853 }, { "epoch": 7.076670092497431, "grad_norm": 0.062266815453767776, "learning_rate": 0.01, "loss": 1.9789, "step": 68856 }, { "epoch": 7.076978417266187, "grad_norm": 0.09450382739305496, "learning_rate": 0.01, "loss": 1.9909, "step": 68859 }, { "epoch": 7.077286742034944, "grad_norm": 0.07935396581888199, "learning_rate": 0.01, "loss": 1.9868, "step": 68862 }, { "epoch": 7.0775950668037, "grad_norm": 0.08313512057065964, "learning_rate": 0.01, "loss": 1.9769, "step": 68865 }, { "epoch": 7.077903391572456, "grad_norm": 0.044498976320028305, "learning_rate": 0.01, "loss": 1.9484, "step": 68868 }, { "epoch": 7.078211716341213, "grad_norm": 0.054316215217113495, "learning_rate": 0.01, "loss": 1.9839, "step": 68871 }, { "epoch": 7.078520041109969, "grad_norm": 0.07995834946632385, "learning_rate": 0.01, "loss": 1.9799, "step": 68874 }, { "epoch": 7.078828365878725, "grad_norm": 0.04581683129072189, "learning_rate": 0.01, "loss": 1.9578, "step": 68877 }, { "epoch": 7.079136690647482, "grad_norm": 0.03488059714436531, "learning_rate": 0.01, "loss": 1.9651, "step": 68880 }, { "epoch": 7.079445015416239, "grad_norm": 0.0278830174356699, "learning_rate": 0.01, "loss": 2.007, "step": 68883 }, { "epoch": 7.079753340184995, "grad_norm": 0.03560703247785568, "learning_rate": 0.01, "loss": 1.9721, "step": 68886 }, { "epoch": 7.080061664953751, "grad_norm": 0.1039506047964096, "learning_rate": 0.01, "loss": 1.9944, "step": 68889 }, { "epoch": 7.080369989722508, "grad_norm": 0.09519243985414505, "learning_rate": 0.01, "loss": 1.9722, "step": 68892 }, { "epoch": 7.080678314491264, "grad_norm": 0.039108019322156906, "learning_rate": 0.01, "loss": 1.9634, "step": 68895 }, { "epoch": 7.08098663926002, "grad_norm": 0.08773428946733475, "learning_rate": 0.01, "loss": 1.9521, "step": 68898 }, { "epoch": 7.081294964028777, "grad_norm": 0.03855564817786217, "learning_rate": 0.01, "loss": 1.9801, "step": 68901 }, { "epoch": 7.081603288797534, "grad_norm": 0.06529016047716141, "learning_rate": 0.01, "loss": 1.9862, "step": 68904 }, { "epoch": 7.08191161356629, "grad_norm": 0.07360291481018066, "learning_rate": 0.01, "loss": 1.9579, "step": 68907 }, { "epoch": 7.082219938335046, "grad_norm": 0.10342194885015488, "learning_rate": 0.01, "loss": 1.9821, "step": 68910 }, { "epoch": 7.082528263103803, "grad_norm": 0.04935605451464653, "learning_rate": 0.01, "loss": 1.9632, "step": 68913 }, { "epoch": 7.082836587872559, "grad_norm": 0.08817791938781738, "learning_rate": 0.01, "loss": 1.9774, "step": 68916 }, { "epoch": 7.083144912641315, "grad_norm": 0.052056267857551575, "learning_rate": 0.01, "loss": 1.9702, "step": 68919 }, { "epoch": 7.083453237410072, "grad_norm": 0.04656489938497543, "learning_rate": 0.01, "loss": 1.9754, "step": 68922 }, { "epoch": 7.083761562178829, "grad_norm": 0.06415385752916336, "learning_rate": 0.01, "loss": 1.9805, "step": 68925 }, { "epoch": 7.084069886947585, "grad_norm": 0.07505769282579422, "learning_rate": 0.01, "loss": 1.9669, "step": 68928 }, { "epoch": 7.084378211716341, "grad_norm": 0.12667891383171082, "learning_rate": 0.01, "loss": 1.9881, "step": 68931 }, { "epoch": 7.084686536485098, "grad_norm": 0.1055910661816597, "learning_rate": 0.01, "loss": 2.0074, "step": 68934 }, { "epoch": 7.084994861253854, "grad_norm": 0.06105726957321167, "learning_rate": 0.01, "loss": 1.9979, "step": 68937 }, { "epoch": 7.08530318602261, "grad_norm": 0.04314691945910454, "learning_rate": 0.01, "loss": 1.9801, "step": 68940 }, { "epoch": 7.085611510791367, "grad_norm": 0.039434608072042465, "learning_rate": 0.01, "loss": 1.9607, "step": 68943 }, { "epoch": 7.085919835560123, "grad_norm": 0.04623565822839737, "learning_rate": 0.01, "loss": 1.9882, "step": 68946 }, { "epoch": 7.08622816032888, "grad_norm": 0.03403662517666817, "learning_rate": 0.01, "loss": 1.9779, "step": 68949 }, { "epoch": 7.0865364850976365, "grad_norm": 0.044097188860177994, "learning_rate": 0.01, "loss": 1.9733, "step": 68952 }, { "epoch": 7.086844809866393, "grad_norm": 0.1629336178302765, "learning_rate": 0.01, "loss": 1.9813, "step": 68955 }, { "epoch": 7.087153134635149, "grad_norm": 0.061776354908943176, "learning_rate": 0.01, "loss": 2.0016, "step": 68958 }, { "epoch": 7.087461459403905, "grad_norm": 0.053984384983778, "learning_rate": 0.01, "loss": 1.9944, "step": 68961 }, { "epoch": 7.087769784172662, "grad_norm": 0.05198853835463524, "learning_rate": 0.01, "loss": 1.9866, "step": 68964 }, { "epoch": 7.088078108941418, "grad_norm": 0.030461978167295456, "learning_rate": 0.01, "loss": 1.9943, "step": 68967 }, { "epoch": 7.088386433710174, "grad_norm": 0.12187229096889496, "learning_rate": 0.01, "loss": 1.9561, "step": 68970 }, { "epoch": 7.0886947584789315, "grad_norm": 0.046721212565898895, "learning_rate": 0.01, "loss": 1.9762, "step": 68973 }, { "epoch": 7.089003083247688, "grad_norm": 0.113240085542202, "learning_rate": 0.01, "loss": 1.9803, "step": 68976 }, { "epoch": 7.089311408016444, "grad_norm": 0.17302365601062775, "learning_rate": 0.01, "loss": 1.9686, "step": 68979 }, { "epoch": 7.0896197327852, "grad_norm": 0.1341373175382614, "learning_rate": 0.01, "loss": 1.9778, "step": 68982 }, { "epoch": 7.089928057553957, "grad_norm": 0.04806041717529297, "learning_rate": 0.01, "loss": 1.9767, "step": 68985 }, { "epoch": 7.090236382322713, "grad_norm": 0.053426358848810196, "learning_rate": 0.01, "loss": 1.9596, "step": 68988 }, { "epoch": 7.090544707091469, "grad_norm": 0.05874931067228317, "learning_rate": 0.01, "loss": 1.9815, "step": 68991 }, { "epoch": 7.0908530318602265, "grad_norm": 0.04069295898079872, "learning_rate": 0.01, "loss": 1.9676, "step": 68994 }, { "epoch": 7.091161356628983, "grad_norm": 0.04333820566534996, "learning_rate": 0.01, "loss": 1.9745, "step": 68997 }, { "epoch": 7.091469681397739, "grad_norm": 0.08710654824972153, "learning_rate": 0.01, "loss": 1.9945, "step": 69000 }, { "epoch": 7.0917780061664955, "grad_norm": 0.05234698951244354, "learning_rate": 0.01, "loss": 1.9813, "step": 69003 }, { "epoch": 7.092086330935252, "grad_norm": 0.10434626042842865, "learning_rate": 0.01, "loss": 1.9747, "step": 69006 }, { "epoch": 7.092394655704008, "grad_norm": 0.10928107053041458, "learning_rate": 0.01, "loss": 1.9544, "step": 69009 }, { "epoch": 7.092702980472764, "grad_norm": 0.04639718681573868, "learning_rate": 0.01, "loss": 1.975, "step": 69012 }, { "epoch": 7.093011305241521, "grad_norm": 0.04324621334671974, "learning_rate": 0.01, "loss": 1.9815, "step": 69015 }, { "epoch": 7.093319630010278, "grad_norm": 0.04631352052092552, "learning_rate": 0.01, "loss": 2.0098, "step": 69018 }, { "epoch": 7.093627954779034, "grad_norm": 0.09556543827056885, "learning_rate": 0.01, "loss": 1.988, "step": 69021 }, { "epoch": 7.0939362795477905, "grad_norm": 0.15257610380649567, "learning_rate": 0.01, "loss": 1.9704, "step": 69024 }, { "epoch": 7.094244604316547, "grad_norm": 0.060181524604558945, "learning_rate": 0.01, "loss": 1.9758, "step": 69027 }, { "epoch": 7.094552929085303, "grad_norm": 0.04848707467317581, "learning_rate": 0.01, "loss": 1.9597, "step": 69030 }, { "epoch": 7.094861253854059, "grad_norm": 0.05945300683379173, "learning_rate": 0.01, "loss": 1.9627, "step": 69033 }, { "epoch": 7.095169578622816, "grad_norm": 0.08706066012382507, "learning_rate": 0.01, "loss": 1.9766, "step": 69036 }, { "epoch": 7.095477903391572, "grad_norm": 0.12813836336135864, "learning_rate": 0.01, "loss": 1.9801, "step": 69039 }, { "epoch": 7.095786228160329, "grad_norm": 0.08248843252658844, "learning_rate": 0.01, "loss": 1.9904, "step": 69042 }, { "epoch": 7.0960945529290855, "grad_norm": 0.05092019960284233, "learning_rate": 0.01, "loss": 1.9734, "step": 69045 }, { "epoch": 7.096402877697842, "grad_norm": 0.04098663106560707, "learning_rate": 0.01, "loss": 1.9948, "step": 69048 }, { "epoch": 7.096711202466598, "grad_norm": 0.04048779979348183, "learning_rate": 0.01, "loss": 1.9764, "step": 69051 }, { "epoch": 7.0970195272353545, "grad_norm": 0.06100444123148918, "learning_rate": 0.01, "loss": 1.9829, "step": 69054 }, { "epoch": 7.097327852004111, "grad_norm": 0.047660503536462784, "learning_rate": 0.01, "loss": 1.9847, "step": 69057 }, { "epoch": 7.097636176772867, "grad_norm": 0.04160270839929581, "learning_rate": 0.01, "loss": 1.9721, "step": 69060 }, { "epoch": 7.097944501541624, "grad_norm": 0.03557828441262245, "learning_rate": 0.01, "loss": 1.9643, "step": 69063 }, { "epoch": 7.098252826310381, "grad_norm": 0.06803721934556961, "learning_rate": 0.01, "loss": 1.9591, "step": 69066 }, { "epoch": 7.098561151079137, "grad_norm": 0.10636363923549652, "learning_rate": 0.01, "loss": 1.9808, "step": 69069 }, { "epoch": 7.098869475847893, "grad_norm": 0.0738653913140297, "learning_rate": 0.01, "loss": 1.967, "step": 69072 }, { "epoch": 7.0991778006166495, "grad_norm": 0.0479535274207592, "learning_rate": 0.01, "loss": 1.9901, "step": 69075 }, { "epoch": 7.099486125385406, "grad_norm": 0.06804942339658737, "learning_rate": 0.01, "loss": 1.979, "step": 69078 }, { "epoch": 7.099794450154162, "grad_norm": 0.06756333261728287, "learning_rate": 0.01, "loss": 1.9724, "step": 69081 }, { "epoch": 7.100102774922918, "grad_norm": 0.047195300459861755, "learning_rate": 0.01, "loss": 1.9543, "step": 69084 }, { "epoch": 7.100411099691676, "grad_norm": 0.03911405801773071, "learning_rate": 0.01, "loss": 1.9765, "step": 69087 }, { "epoch": 7.100719424460432, "grad_norm": 0.08936751633882523, "learning_rate": 0.01, "loss": 1.9755, "step": 69090 }, { "epoch": 7.101027749229188, "grad_norm": 0.040167100727558136, "learning_rate": 0.01, "loss": 1.9631, "step": 69093 }, { "epoch": 7.1013360739979445, "grad_norm": 0.03756394237279892, "learning_rate": 0.01, "loss": 1.9389, "step": 69096 }, { "epoch": 7.101644398766701, "grad_norm": 0.03320080041885376, "learning_rate": 0.01, "loss": 1.9994, "step": 69099 }, { "epoch": 7.101952723535457, "grad_norm": 0.07805169373750687, "learning_rate": 0.01, "loss": 1.9773, "step": 69102 }, { "epoch": 7.1022610483042135, "grad_norm": 0.06461729109287262, "learning_rate": 0.01, "loss": 1.9929, "step": 69105 }, { "epoch": 7.10256937307297, "grad_norm": 0.03744588792324066, "learning_rate": 0.01, "loss": 1.9464, "step": 69108 }, { "epoch": 7.102877697841727, "grad_norm": 0.03426771238446236, "learning_rate": 0.01, "loss": 1.9547, "step": 69111 }, { "epoch": 7.103186022610483, "grad_norm": 0.055179059505462646, "learning_rate": 0.01, "loss": 1.9764, "step": 69114 }, { "epoch": 7.10349434737924, "grad_norm": 0.0941012054681778, "learning_rate": 0.01, "loss": 1.9946, "step": 69117 }, { "epoch": 7.103802672147996, "grad_norm": 0.04403389245271683, "learning_rate": 0.01, "loss": 1.9514, "step": 69120 }, { "epoch": 7.104110996916752, "grad_norm": 0.06421434134244919, "learning_rate": 0.01, "loss": 1.9889, "step": 69123 }, { "epoch": 7.1044193216855085, "grad_norm": 0.06467723846435547, "learning_rate": 0.01, "loss": 1.9694, "step": 69126 }, { "epoch": 7.104727646454265, "grad_norm": 0.05050961673259735, "learning_rate": 0.01, "loss": 1.9788, "step": 69129 }, { "epoch": 7.105035971223022, "grad_norm": 0.05761819705367088, "learning_rate": 0.01, "loss": 1.9626, "step": 69132 }, { "epoch": 7.105344295991778, "grad_norm": 0.04577011242508888, "learning_rate": 0.01, "loss": 1.9693, "step": 69135 }, { "epoch": 7.105652620760535, "grad_norm": 0.06268461793661118, "learning_rate": 0.01, "loss": 1.9851, "step": 69138 }, { "epoch": 7.105960945529291, "grad_norm": 0.039437875151634216, "learning_rate": 0.01, "loss": 1.9611, "step": 69141 }, { "epoch": 7.106269270298047, "grad_norm": 0.09671974182128906, "learning_rate": 0.01, "loss": 1.9624, "step": 69144 }, { "epoch": 7.1065775950668035, "grad_norm": 0.08435185253620148, "learning_rate": 0.01, "loss": 1.9883, "step": 69147 }, { "epoch": 7.10688591983556, "grad_norm": 0.11098583042621613, "learning_rate": 0.01, "loss": 1.9784, "step": 69150 }, { "epoch": 7.107194244604316, "grad_norm": 0.04098542779684067, "learning_rate": 0.01, "loss": 1.9888, "step": 69153 }, { "epoch": 7.107502569373073, "grad_norm": 0.0462697297334671, "learning_rate": 0.01, "loss": 1.9783, "step": 69156 }, { "epoch": 7.10781089414183, "grad_norm": 0.03672792762517929, "learning_rate": 0.01, "loss": 1.9662, "step": 69159 }, { "epoch": 7.108119218910586, "grad_norm": 0.06978480517864227, "learning_rate": 0.01, "loss": 1.977, "step": 69162 }, { "epoch": 7.108427543679342, "grad_norm": 0.04515951871871948, "learning_rate": 0.01, "loss": 1.9847, "step": 69165 }, { "epoch": 7.108735868448099, "grad_norm": 0.03993092477321625, "learning_rate": 0.01, "loss": 1.958, "step": 69168 }, { "epoch": 7.109044193216855, "grad_norm": 0.0673045888543129, "learning_rate": 0.01, "loss": 1.955, "step": 69171 }, { "epoch": 7.109352517985611, "grad_norm": 0.05873231217265129, "learning_rate": 0.01, "loss": 2.0119, "step": 69174 }, { "epoch": 7.1096608427543675, "grad_norm": 0.053032733500003815, "learning_rate": 0.01, "loss": 1.9752, "step": 69177 }, { "epoch": 7.109969167523125, "grad_norm": 0.0582476407289505, "learning_rate": 0.01, "loss": 1.9783, "step": 69180 }, { "epoch": 7.110277492291881, "grad_norm": 0.08659794926643372, "learning_rate": 0.01, "loss": 1.9882, "step": 69183 }, { "epoch": 7.110585817060637, "grad_norm": 0.0926288440823555, "learning_rate": 0.01, "loss": 1.9885, "step": 69186 }, { "epoch": 7.110894141829394, "grad_norm": 0.07831533998250961, "learning_rate": 0.01, "loss": 1.9737, "step": 69189 }, { "epoch": 7.11120246659815, "grad_norm": 0.04761062562465668, "learning_rate": 0.01, "loss": 1.979, "step": 69192 }, { "epoch": 7.111510791366906, "grad_norm": 0.055868249386548996, "learning_rate": 0.01, "loss": 1.9807, "step": 69195 }, { "epoch": 7.1118191161356625, "grad_norm": 0.04533650353550911, "learning_rate": 0.01, "loss": 1.9766, "step": 69198 }, { "epoch": 7.11212744090442, "grad_norm": 0.10463204234838486, "learning_rate": 0.01, "loss": 1.9717, "step": 69201 }, { "epoch": 7.112435765673176, "grad_norm": 0.07214853912591934, "learning_rate": 0.01, "loss": 1.9586, "step": 69204 }, { "epoch": 7.112744090441932, "grad_norm": 0.059007078409194946, "learning_rate": 0.01, "loss": 1.9753, "step": 69207 }, { "epoch": 7.113052415210689, "grad_norm": 0.054935675114393234, "learning_rate": 0.01, "loss": 1.989, "step": 69210 }, { "epoch": 7.113360739979445, "grad_norm": 0.05916939303278923, "learning_rate": 0.01, "loss": 1.9505, "step": 69213 }, { "epoch": 7.113669064748201, "grad_norm": 0.08450639992952347, "learning_rate": 0.01, "loss": 2.0054, "step": 69216 }, { "epoch": 7.113977389516958, "grad_norm": 0.06881245970726013, "learning_rate": 0.01, "loss": 1.9586, "step": 69219 }, { "epoch": 7.114285714285714, "grad_norm": 0.07292987406253815, "learning_rate": 0.01, "loss": 1.9757, "step": 69222 }, { "epoch": 7.114594039054471, "grad_norm": 0.0532345175743103, "learning_rate": 0.01, "loss": 1.9755, "step": 69225 }, { "epoch": 7.114902363823227, "grad_norm": 0.051116786897182465, "learning_rate": 0.01, "loss": 1.9656, "step": 69228 }, { "epoch": 7.115210688591984, "grad_norm": 0.0622846744954586, "learning_rate": 0.01, "loss": 1.9804, "step": 69231 }, { "epoch": 7.11551901336074, "grad_norm": 0.052440278232097626, "learning_rate": 0.01, "loss": 1.9828, "step": 69234 }, { "epoch": 7.115827338129496, "grad_norm": 0.03851577267050743, "learning_rate": 0.01, "loss": 1.9929, "step": 69237 }, { "epoch": 7.116135662898253, "grad_norm": 0.10518687963485718, "learning_rate": 0.01, "loss": 1.966, "step": 69240 }, { "epoch": 7.116443987667009, "grad_norm": 0.08948950469493866, "learning_rate": 0.01, "loss": 1.9671, "step": 69243 }, { "epoch": 7.116752312435766, "grad_norm": 0.13956782221794128, "learning_rate": 0.01, "loss": 1.9851, "step": 69246 }, { "epoch": 7.117060637204522, "grad_norm": 0.06303557753562927, "learning_rate": 0.01, "loss": 1.9687, "step": 69249 }, { "epoch": 7.117368961973279, "grad_norm": 0.056266095489263535, "learning_rate": 0.01, "loss": 1.9457, "step": 69252 }, { "epoch": 7.117677286742035, "grad_norm": 0.07958390563726425, "learning_rate": 0.01, "loss": 1.994, "step": 69255 }, { "epoch": 7.117985611510791, "grad_norm": 0.0753171443939209, "learning_rate": 0.01, "loss": 1.9566, "step": 69258 }, { "epoch": 7.118293936279548, "grad_norm": 0.05444815382361412, "learning_rate": 0.01, "loss": 2.0003, "step": 69261 }, { "epoch": 7.118602261048304, "grad_norm": 0.061939787119627, "learning_rate": 0.01, "loss": 1.9891, "step": 69264 }, { "epoch": 7.11891058581706, "grad_norm": 0.118798166513443, "learning_rate": 0.01, "loss": 1.9883, "step": 69267 }, { "epoch": 7.1192189105858175, "grad_norm": 0.048886317759752274, "learning_rate": 0.01, "loss": 1.9912, "step": 69270 }, { "epoch": 7.119527235354574, "grad_norm": 0.05422835797071457, "learning_rate": 0.01, "loss": 1.9694, "step": 69273 }, { "epoch": 7.11983556012333, "grad_norm": 0.05022811144590378, "learning_rate": 0.01, "loss": 1.9571, "step": 69276 }, { "epoch": 7.120143884892086, "grad_norm": 0.07943117618560791, "learning_rate": 0.01, "loss": 1.9647, "step": 69279 }, { "epoch": 7.120452209660843, "grad_norm": 0.03822775557637215, "learning_rate": 0.01, "loss": 1.9949, "step": 69282 }, { "epoch": 7.120760534429599, "grad_norm": 0.06427093595266342, "learning_rate": 0.01, "loss": 1.9825, "step": 69285 }, { "epoch": 7.121068859198355, "grad_norm": 0.046098969876766205, "learning_rate": 0.01, "loss": 1.958, "step": 69288 }, { "epoch": 7.121377183967112, "grad_norm": 0.08187385648488998, "learning_rate": 0.01, "loss": 1.9806, "step": 69291 }, { "epoch": 7.121685508735869, "grad_norm": 0.0525503009557724, "learning_rate": 0.01, "loss": 2.0037, "step": 69294 }, { "epoch": 7.121993833504625, "grad_norm": 0.03687702864408493, "learning_rate": 0.01, "loss": 2.0015, "step": 69297 }, { "epoch": 7.122302158273381, "grad_norm": 0.06423792988061905, "learning_rate": 0.01, "loss": 1.9743, "step": 69300 }, { "epoch": 7.122610483042138, "grad_norm": 0.12554574012756348, "learning_rate": 0.01, "loss": 1.9514, "step": 69303 }, { "epoch": 7.122918807810894, "grad_norm": 0.06923548877239227, "learning_rate": 0.01, "loss": 2.0027, "step": 69306 }, { "epoch": 7.12322713257965, "grad_norm": 0.0461004301905632, "learning_rate": 0.01, "loss": 2.0095, "step": 69309 }, { "epoch": 7.123535457348407, "grad_norm": 0.04713965207338333, "learning_rate": 0.01, "loss": 2.0065, "step": 69312 }, { "epoch": 7.123843782117164, "grad_norm": 0.043988946825265884, "learning_rate": 0.01, "loss": 2.0037, "step": 69315 }, { "epoch": 7.12415210688592, "grad_norm": 0.03573235124349594, "learning_rate": 0.01, "loss": 1.9759, "step": 69318 }, { "epoch": 7.1244604316546765, "grad_norm": 0.03654714301228523, "learning_rate": 0.01, "loss": 1.9578, "step": 69321 }, { "epoch": 7.124768756423433, "grad_norm": 0.11765187233686447, "learning_rate": 0.01, "loss": 1.9724, "step": 69324 }, { "epoch": 7.125077081192189, "grad_norm": 0.06880181282758713, "learning_rate": 0.01, "loss": 1.9814, "step": 69327 }, { "epoch": 7.125385405960945, "grad_norm": 0.056520819664001465, "learning_rate": 0.01, "loss": 1.9927, "step": 69330 }, { "epoch": 7.125693730729702, "grad_norm": 0.04125680774450302, "learning_rate": 0.01, "loss": 1.9701, "step": 69333 }, { "epoch": 7.126002055498458, "grad_norm": 0.04342076554894447, "learning_rate": 0.01, "loss": 1.9927, "step": 69336 }, { "epoch": 7.126310380267215, "grad_norm": 0.03851790353655815, "learning_rate": 0.01, "loss": 1.9606, "step": 69339 }, { "epoch": 7.1266187050359715, "grad_norm": 0.03577910736203194, "learning_rate": 0.01, "loss": 1.9649, "step": 69342 }, { "epoch": 7.126927029804728, "grad_norm": 0.03813933581113815, "learning_rate": 0.01, "loss": 1.9886, "step": 69345 }, { "epoch": 7.127235354573484, "grad_norm": 0.05139464884996414, "learning_rate": 0.01, "loss": 1.9736, "step": 69348 }, { "epoch": 7.12754367934224, "grad_norm": 0.1345084309577942, "learning_rate": 0.01, "loss": 1.9685, "step": 69351 }, { "epoch": 7.127852004110997, "grad_norm": 0.04550566524267197, "learning_rate": 0.01, "loss": 1.9777, "step": 69354 }, { "epoch": 7.128160328879753, "grad_norm": 0.056097351014614105, "learning_rate": 0.01, "loss": 1.9631, "step": 69357 }, { "epoch": 7.128468653648509, "grad_norm": 0.03391636162996292, "learning_rate": 0.01, "loss": 1.9847, "step": 69360 }, { "epoch": 7.1287769784172665, "grad_norm": 0.04839693009853363, "learning_rate": 0.01, "loss": 1.9795, "step": 69363 }, { "epoch": 7.129085303186023, "grad_norm": 0.053586047142744064, "learning_rate": 0.01, "loss": 1.9974, "step": 69366 }, { "epoch": 7.129393627954779, "grad_norm": 0.07650946080684662, "learning_rate": 0.01, "loss": 1.9524, "step": 69369 }, { "epoch": 7.1297019527235355, "grad_norm": 0.12655211985111237, "learning_rate": 0.01, "loss": 1.9993, "step": 69372 }, { "epoch": 7.130010277492292, "grad_norm": 0.04835767298936844, "learning_rate": 0.01, "loss": 1.9921, "step": 69375 }, { "epoch": 7.130318602261048, "grad_norm": 0.10156697034835815, "learning_rate": 0.01, "loss": 1.9829, "step": 69378 }, { "epoch": 7.130626927029804, "grad_norm": 0.04780452698469162, "learning_rate": 0.01, "loss": 1.9807, "step": 69381 }, { "epoch": 7.130935251798562, "grad_norm": 0.06014325097203255, "learning_rate": 0.01, "loss": 1.9798, "step": 69384 }, { "epoch": 7.131243576567318, "grad_norm": 0.0386577844619751, "learning_rate": 0.01, "loss": 1.9658, "step": 69387 }, { "epoch": 7.131551901336074, "grad_norm": 0.06388622522354126, "learning_rate": 0.01, "loss": 1.9952, "step": 69390 }, { "epoch": 7.1318602261048305, "grad_norm": 0.08713452517986298, "learning_rate": 0.01, "loss": 1.9767, "step": 69393 }, { "epoch": 7.132168550873587, "grad_norm": 0.12167992442846298, "learning_rate": 0.01, "loss": 1.9923, "step": 69396 }, { "epoch": 7.132476875642343, "grad_norm": 0.12622815370559692, "learning_rate": 0.01, "loss": 1.9918, "step": 69399 }, { "epoch": 7.132785200411099, "grad_norm": 0.052541423588991165, "learning_rate": 0.01, "loss": 1.9789, "step": 69402 }, { "epoch": 7.133093525179856, "grad_norm": 0.035066355019807816, "learning_rate": 0.01, "loss": 1.958, "step": 69405 }, { "epoch": 7.133401849948613, "grad_norm": 0.047106850892305374, "learning_rate": 0.01, "loss": 1.9816, "step": 69408 }, { "epoch": 7.133710174717369, "grad_norm": 0.12262306362390518, "learning_rate": 0.01, "loss": 1.9878, "step": 69411 }, { "epoch": 7.1340184994861255, "grad_norm": 0.0389021597802639, "learning_rate": 0.01, "loss": 1.9763, "step": 69414 }, { "epoch": 7.134326824254882, "grad_norm": 0.03964044153690338, "learning_rate": 0.01, "loss": 2.0012, "step": 69417 }, { "epoch": 7.134635149023638, "grad_norm": 0.03052535466849804, "learning_rate": 0.01, "loss": 1.9718, "step": 69420 }, { "epoch": 7.1349434737923945, "grad_norm": 0.036022480577230453, "learning_rate": 0.01, "loss": 1.9841, "step": 69423 }, { "epoch": 7.135251798561151, "grad_norm": 0.055815573781728745, "learning_rate": 0.01, "loss": 1.9675, "step": 69426 }, { "epoch": 7.135560123329907, "grad_norm": 0.05197738856077194, "learning_rate": 0.01, "loss": 1.9608, "step": 69429 }, { "epoch": 7.135868448098664, "grad_norm": 0.1406523585319519, "learning_rate": 0.01, "loss": 1.984, "step": 69432 }, { "epoch": 7.136176772867421, "grad_norm": 0.056158799678087234, "learning_rate": 0.01, "loss": 1.9983, "step": 69435 }, { "epoch": 7.136485097636177, "grad_norm": 0.10209375619888306, "learning_rate": 0.01, "loss": 1.9548, "step": 69438 }, { "epoch": 7.136793422404933, "grad_norm": 0.12295509874820709, "learning_rate": 0.01, "loss": 1.9824, "step": 69441 }, { "epoch": 7.1371017471736895, "grad_norm": 0.09482801705598831, "learning_rate": 0.01, "loss": 1.9682, "step": 69444 }, { "epoch": 7.137410071942446, "grad_norm": 0.043474819511175156, "learning_rate": 0.01, "loss": 1.9464, "step": 69447 }, { "epoch": 7.137718396711202, "grad_norm": 0.03609083592891693, "learning_rate": 0.01, "loss": 1.9708, "step": 69450 }, { "epoch": 7.138026721479959, "grad_norm": 0.0358879528939724, "learning_rate": 0.01, "loss": 1.9952, "step": 69453 }, { "epoch": 7.138335046248716, "grad_norm": 0.04629027098417282, "learning_rate": 0.01, "loss": 1.9916, "step": 69456 }, { "epoch": 7.138643371017472, "grad_norm": 0.04923400655388832, "learning_rate": 0.01, "loss": 1.9754, "step": 69459 }, { "epoch": 7.138951695786228, "grad_norm": 0.05828343704342842, "learning_rate": 0.01, "loss": 1.9671, "step": 69462 }, { "epoch": 7.1392600205549845, "grad_norm": 0.059361234307289124, "learning_rate": 0.01, "loss": 1.9648, "step": 69465 }, { "epoch": 7.139568345323741, "grad_norm": 0.04384483024477959, "learning_rate": 0.01, "loss": 1.9761, "step": 69468 }, { "epoch": 7.139876670092497, "grad_norm": 0.03761192038655281, "learning_rate": 0.01, "loss": 1.9673, "step": 69471 }, { "epoch": 7.1401849948612535, "grad_norm": 0.14516139030456543, "learning_rate": 0.01, "loss": 1.9712, "step": 69474 }, { "epoch": 7.140493319630011, "grad_norm": 0.12505464255809784, "learning_rate": 0.01, "loss": 1.9616, "step": 69477 }, { "epoch": 7.140801644398767, "grad_norm": 0.06923521310091019, "learning_rate": 0.01, "loss": 1.9746, "step": 69480 }, { "epoch": 7.141109969167523, "grad_norm": 0.03950193524360657, "learning_rate": 0.01, "loss": 1.9872, "step": 69483 }, { "epoch": 7.14141829393628, "grad_norm": 0.05078789219260216, "learning_rate": 0.01, "loss": 1.9877, "step": 69486 }, { "epoch": 7.141726618705036, "grad_norm": 0.03560269996523857, "learning_rate": 0.01, "loss": 1.9856, "step": 69489 }, { "epoch": 7.142034943473792, "grad_norm": 0.050856418907642365, "learning_rate": 0.01, "loss": 1.9545, "step": 69492 }, { "epoch": 7.1423432682425485, "grad_norm": 0.06913231313228607, "learning_rate": 0.01, "loss": 1.975, "step": 69495 }, { "epoch": 7.142651593011305, "grad_norm": 0.11764489114284515, "learning_rate": 0.01, "loss": 2.0103, "step": 69498 }, { "epoch": 7.142959917780062, "grad_norm": 0.062085945159196854, "learning_rate": 0.01, "loss": 1.9847, "step": 69501 }, { "epoch": 7.143268242548818, "grad_norm": 0.029841940850019455, "learning_rate": 0.01, "loss": 1.9718, "step": 69504 }, { "epoch": 7.143576567317575, "grad_norm": 0.05607277899980545, "learning_rate": 0.01, "loss": 1.9823, "step": 69507 }, { "epoch": 7.143884892086331, "grad_norm": 0.03662040829658508, "learning_rate": 0.01, "loss": 1.9702, "step": 69510 }, { "epoch": 7.144193216855087, "grad_norm": 0.03297117352485657, "learning_rate": 0.01, "loss": 1.9742, "step": 69513 }, { "epoch": 7.1445015416238435, "grad_norm": 0.044707778841257095, "learning_rate": 0.01, "loss": 1.9912, "step": 69516 }, { "epoch": 7.1448098663926, "grad_norm": 0.10864292085170746, "learning_rate": 0.01, "loss": 1.9665, "step": 69519 }, { "epoch": 7.145118191161357, "grad_norm": 0.07812972366809845, "learning_rate": 0.01, "loss": 1.9746, "step": 69522 }, { "epoch": 7.145426515930113, "grad_norm": 0.03777284547686577, "learning_rate": 0.01, "loss": 2.0109, "step": 69525 }, { "epoch": 7.14573484069887, "grad_norm": 0.04257459193468094, "learning_rate": 0.01, "loss": 1.9318, "step": 69528 }, { "epoch": 7.146043165467626, "grad_norm": 0.04293552413582802, "learning_rate": 0.01, "loss": 1.9917, "step": 69531 }, { "epoch": 7.146351490236382, "grad_norm": 0.06645122170448303, "learning_rate": 0.01, "loss": 1.99, "step": 69534 }, { "epoch": 7.146659815005139, "grad_norm": 0.04492383822798729, "learning_rate": 0.01, "loss": 1.9484, "step": 69537 }, { "epoch": 7.146968139773895, "grad_norm": 0.03826465830206871, "learning_rate": 0.01, "loss": 1.9623, "step": 69540 }, { "epoch": 7.147276464542651, "grad_norm": 0.09625104069709778, "learning_rate": 0.01, "loss": 1.9861, "step": 69543 }, { "epoch": 7.147584789311408, "grad_norm": 0.08541668951511383, "learning_rate": 0.01, "loss": 1.9542, "step": 69546 }, { "epoch": 7.147893114080165, "grad_norm": 0.05660329386591911, "learning_rate": 0.01, "loss": 1.9683, "step": 69549 }, { "epoch": 7.148201438848921, "grad_norm": 0.04568130522966385, "learning_rate": 0.01, "loss": 1.9546, "step": 69552 }, { "epoch": 7.148509763617677, "grad_norm": 0.08119482547044754, "learning_rate": 0.01, "loss": 1.9708, "step": 69555 }, { "epoch": 7.148818088386434, "grad_norm": 0.05496470630168915, "learning_rate": 0.01, "loss": 1.9446, "step": 69558 }, { "epoch": 7.14912641315519, "grad_norm": 0.09700389951467514, "learning_rate": 0.01, "loss": 1.9821, "step": 69561 }, { "epoch": 7.149434737923946, "grad_norm": 0.0743723064661026, "learning_rate": 0.01, "loss": 1.9722, "step": 69564 }, { "epoch": 7.149743062692703, "grad_norm": 0.08218356221914291, "learning_rate": 0.01, "loss": 1.9624, "step": 69567 }, { "epoch": 7.15005138746146, "grad_norm": 0.04347396269440651, "learning_rate": 0.01, "loss": 2.0071, "step": 69570 }, { "epoch": 7.150359712230216, "grad_norm": 0.03787241131067276, "learning_rate": 0.01, "loss": 1.9661, "step": 69573 }, { "epoch": 7.150668036998972, "grad_norm": 0.05633244663476944, "learning_rate": 0.01, "loss": 1.9738, "step": 69576 }, { "epoch": 7.150976361767729, "grad_norm": 0.09029576182365417, "learning_rate": 0.01, "loss": 1.9817, "step": 69579 }, { "epoch": 7.151284686536485, "grad_norm": 0.12735773622989655, "learning_rate": 0.01, "loss": 1.9838, "step": 69582 }, { "epoch": 7.151593011305241, "grad_norm": 0.06273090839385986, "learning_rate": 0.01, "loss": 1.9606, "step": 69585 }, { "epoch": 7.151901336073998, "grad_norm": 0.052082959562540054, "learning_rate": 0.01, "loss": 1.9786, "step": 69588 }, { "epoch": 7.152209660842755, "grad_norm": 0.08231306821107864, "learning_rate": 0.01, "loss": 1.9523, "step": 69591 }, { "epoch": 7.152517985611511, "grad_norm": 0.07055219262838364, "learning_rate": 0.01, "loss": 1.9863, "step": 69594 }, { "epoch": 7.152826310380267, "grad_norm": 0.07429744303226471, "learning_rate": 0.01, "loss": 1.961, "step": 69597 }, { "epoch": 7.153134635149024, "grad_norm": 0.11131955683231354, "learning_rate": 0.01, "loss": 1.9406, "step": 69600 }, { "epoch": 7.15344295991778, "grad_norm": 0.1321670562028885, "learning_rate": 0.01, "loss": 1.9647, "step": 69603 }, { "epoch": 7.153751284686536, "grad_norm": 0.07883578538894653, "learning_rate": 0.01, "loss": 1.9847, "step": 69606 }, { "epoch": 7.154059609455293, "grad_norm": 0.06068054959177971, "learning_rate": 0.01, "loss": 1.9774, "step": 69609 }, { "epoch": 7.154367934224049, "grad_norm": 0.04163357615470886, "learning_rate": 0.01, "loss": 1.9895, "step": 69612 }, { "epoch": 7.154676258992806, "grad_norm": 0.04159896448254585, "learning_rate": 0.01, "loss": 1.9618, "step": 69615 }, { "epoch": 7.154984583761562, "grad_norm": 0.053873077034950256, "learning_rate": 0.01, "loss": 1.9872, "step": 69618 }, { "epoch": 7.155292908530319, "grad_norm": 0.11469054222106934, "learning_rate": 0.01, "loss": 1.9929, "step": 69621 }, { "epoch": 7.155601233299075, "grad_norm": 0.06450911611318588, "learning_rate": 0.01, "loss": 1.9703, "step": 69624 }, { "epoch": 7.155909558067831, "grad_norm": 0.07107888162136078, "learning_rate": 0.01, "loss": 1.9622, "step": 69627 }, { "epoch": 7.156217882836588, "grad_norm": 0.05037635937333107, "learning_rate": 0.01, "loss": 1.9885, "step": 69630 }, { "epoch": 7.156526207605344, "grad_norm": 0.04990808293223381, "learning_rate": 0.01, "loss": 1.9811, "step": 69633 }, { "epoch": 7.1568345323741, "grad_norm": 0.10353230684995651, "learning_rate": 0.01, "loss": 1.9704, "step": 69636 }, { "epoch": 7.1571428571428575, "grad_norm": 0.05986601114273071, "learning_rate": 0.01, "loss": 2.0041, "step": 69639 }, { "epoch": 7.157451181911614, "grad_norm": 0.03945085406303406, "learning_rate": 0.01, "loss": 1.9937, "step": 69642 }, { "epoch": 7.15775950668037, "grad_norm": 0.08378833532333374, "learning_rate": 0.01, "loss": 2.0033, "step": 69645 }, { "epoch": 7.158067831449126, "grad_norm": 0.07518744468688965, "learning_rate": 0.01, "loss": 1.9874, "step": 69648 }, { "epoch": 7.158376156217883, "grad_norm": 0.06334148347377777, "learning_rate": 0.01, "loss": 1.9818, "step": 69651 }, { "epoch": 7.158684480986639, "grad_norm": 0.08733922243118286, "learning_rate": 0.01, "loss": 1.9501, "step": 69654 }, { "epoch": 7.158992805755395, "grad_norm": 0.051821425557136536, "learning_rate": 0.01, "loss": 1.9445, "step": 69657 }, { "epoch": 7.1593011305241525, "grad_norm": 0.05659188702702522, "learning_rate": 0.01, "loss": 1.9872, "step": 69660 }, { "epoch": 7.159609455292909, "grad_norm": 0.07240952551364899, "learning_rate": 0.01, "loss": 1.9653, "step": 69663 }, { "epoch": 7.159917780061665, "grad_norm": 0.045318614691495895, "learning_rate": 0.01, "loss": 1.9836, "step": 69666 }, { "epoch": 7.160226104830421, "grad_norm": 0.08784624189138412, "learning_rate": 0.01, "loss": 1.9606, "step": 69669 }, { "epoch": 7.160534429599178, "grad_norm": 0.11292146146297455, "learning_rate": 0.01, "loss": 1.9714, "step": 69672 }, { "epoch": 7.160842754367934, "grad_norm": 0.22784489393234253, "learning_rate": 0.01, "loss": 2.0009, "step": 69675 }, { "epoch": 7.16115107913669, "grad_norm": 0.07886439561843872, "learning_rate": 0.01, "loss": 1.9758, "step": 69678 }, { "epoch": 7.161459403905447, "grad_norm": 0.15344084799289703, "learning_rate": 0.01, "loss": 1.972, "step": 69681 }, { "epoch": 7.161767728674204, "grad_norm": 0.07547879219055176, "learning_rate": 0.01, "loss": 1.9778, "step": 69684 }, { "epoch": 7.16207605344296, "grad_norm": 0.051672521978616714, "learning_rate": 0.01, "loss": 1.9542, "step": 69687 }, { "epoch": 7.1623843782117165, "grad_norm": 0.03164055570960045, "learning_rate": 0.01, "loss": 1.9569, "step": 69690 }, { "epoch": 7.162692702980473, "grad_norm": 0.03657243773341179, "learning_rate": 0.01, "loss": 1.98, "step": 69693 }, { "epoch": 7.163001027749229, "grad_norm": 0.0893481895327568, "learning_rate": 0.01, "loss": 1.9725, "step": 69696 }, { "epoch": 7.163309352517985, "grad_norm": 0.043061092495918274, "learning_rate": 0.01, "loss": 1.9895, "step": 69699 }, { "epoch": 7.163617677286742, "grad_norm": 0.04253191873431206, "learning_rate": 0.01, "loss": 1.9728, "step": 69702 }, { "epoch": 7.163926002055499, "grad_norm": 0.035961344838142395, "learning_rate": 0.01, "loss": 1.9686, "step": 69705 }, { "epoch": 7.164234326824255, "grad_norm": 0.03632194176316261, "learning_rate": 0.01, "loss": 1.9617, "step": 69708 }, { "epoch": 7.1645426515930115, "grad_norm": 0.038655634969472885, "learning_rate": 0.01, "loss": 1.9831, "step": 69711 }, { "epoch": 7.164850976361768, "grad_norm": 0.06551500409841537, "learning_rate": 0.01, "loss": 1.9837, "step": 69714 }, { "epoch": 7.165159301130524, "grad_norm": 0.06754742562770844, "learning_rate": 0.01, "loss": 2.0004, "step": 69717 }, { "epoch": 7.16546762589928, "grad_norm": 0.04923485219478607, "learning_rate": 0.01, "loss": 1.9503, "step": 69720 }, { "epoch": 7.165775950668037, "grad_norm": 0.13406577706336975, "learning_rate": 0.01, "loss": 1.9774, "step": 69723 }, { "epoch": 7.166084275436793, "grad_norm": 0.07451289147138596, "learning_rate": 0.01, "loss": 1.9783, "step": 69726 }, { "epoch": 7.16639260020555, "grad_norm": 0.03220764547586441, "learning_rate": 0.01, "loss": 1.9517, "step": 69729 }, { "epoch": 7.1667009249743066, "grad_norm": 0.041585735976696014, "learning_rate": 0.01, "loss": 1.9877, "step": 69732 }, { "epoch": 7.167009249743063, "grad_norm": 0.030779125168919563, "learning_rate": 0.01, "loss": 1.9807, "step": 69735 }, { "epoch": 7.167317574511819, "grad_norm": 0.05135290324687958, "learning_rate": 0.01, "loss": 1.965, "step": 69738 }, { "epoch": 7.1676258992805755, "grad_norm": 0.12039119750261307, "learning_rate": 0.01, "loss": 1.9639, "step": 69741 }, { "epoch": 7.167934224049332, "grad_norm": 0.06299499422311783, "learning_rate": 0.01, "loss": 1.9778, "step": 69744 }, { "epoch": 7.168242548818088, "grad_norm": 0.09064209461212158, "learning_rate": 0.01, "loss": 1.9736, "step": 69747 }, { "epoch": 7.168550873586844, "grad_norm": 0.08681541681289673, "learning_rate": 0.01, "loss": 1.9765, "step": 69750 }, { "epoch": 7.168859198355602, "grad_norm": 0.04912255331873894, "learning_rate": 0.01, "loss": 1.963, "step": 69753 }, { "epoch": 7.169167523124358, "grad_norm": 0.07424995303153992, "learning_rate": 0.01, "loss": 1.9692, "step": 69756 }, { "epoch": 7.169475847893114, "grad_norm": 0.0543096549808979, "learning_rate": 0.01, "loss": 1.9538, "step": 69759 }, { "epoch": 7.1697841726618705, "grad_norm": 0.05928770452737808, "learning_rate": 0.01, "loss": 1.9768, "step": 69762 }, { "epoch": 7.170092497430627, "grad_norm": 0.07718564569950104, "learning_rate": 0.01, "loss": 2.0063, "step": 69765 }, { "epoch": 7.170400822199383, "grad_norm": 0.040126901119947433, "learning_rate": 0.01, "loss": 1.979, "step": 69768 }, { "epoch": 7.170709146968139, "grad_norm": 0.057264335453510284, "learning_rate": 0.01, "loss": 1.9701, "step": 69771 }, { "epoch": 7.171017471736896, "grad_norm": 0.05051598697900772, "learning_rate": 0.01, "loss": 1.9898, "step": 69774 }, { "epoch": 7.171325796505653, "grad_norm": 0.06816336512565613, "learning_rate": 0.01, "loss": 1.9539, "step": 69777 }, { "epoch": 7.171634121274409, "grad_norm": 0.19789281487464905, "learning_rate": 0.01, "loss": 1.9878, "step": 69780 }, { "epoch": 7.1719424460431656, "grad_norm": 0.11976084858179092, "learning_rate": 0.01, "loss": 1.9573, "step": 69783 }, { "epoch": 7.172250770811922, "grad_norm": 0.15244784951210022, "learning_rate": 0.01, "loss": 1.9644, "step": 69786 }, { "epoch": 7.172559095580678, "grad_norm": 0.11733053624629974, "learning_rate": 0.01, "loss": 1.9922, "step": 69789 }, { "epoch": 7.1728674203494345, "grad_norm": 0.04050086438655853, "learning_rate": 0.01, "loss": 1.9928, "step": 69792 }, { "epoch": 7.173175745118191, "grad_norm": 0.045436665415763855, "learning_rate": 0.01, "loss": 1.9739, "step": 69795 }, { "epoch": 7.173484069886948, "grad_norm": 0.08359482884407043, "learning_rate": 0.01, "loss": 1.9994, "step": 69798 }, { "epoch": 7.173792394655704, "grad_norm": 0.04188248887658119, "learning_rate": 0.01, "loss": 1.9897, "step": 69801 }, { "epoch": 7.174100719424461, "grad_norm": 0.07575372606515884, "learning_rate": 0.01, "loss": 1.9775, "step": 69804 }, { "epoch": 7.174409044193217, "grad_norm": 0.09911274164915085, "learning_rate": 0.01, "loss": 1.9802, "step": 69807 }, { "epoch": 7.174717368961973, "grad_norm": 0.040851060301065445, "learning_rate": 0.01, "loss": 1.9578, "step": 69810 }, { "epoch": 7.1750256937307295, "grad_norm": 0.042334407567977905, "learning_rate": 0.01, "loss": 1.972, "step": 69813 }, { "epoch": 7.175334018499486, "grad_norm": 0.03406887874007225, "learning_rate": 0.01, "loss": 1.9722, "step": 69816 }, { "epoch": 7.175642343268242, "grad_norm": 0.06487525999546051, "learning_rate": 0.01, "loss": 1.9775, "step": 69819 }, { "epoch": 7.175950668036999, "grad_norm": 0.09128468483686447, "learning_rate": 0.01, "loss": 1.9722, "step": 69822 }, { "epoch": 7.176258992805756, "grad_norm": 0.06886399537324905, "learning_rate": 0.01, "loss": 1.9485, "step": 69825 }, { "epoch": 7.176567317574512, "grad_norm": 0.09585999697446823, "learning_rate": 0.01, "loss": 1.9676, "step": 69828 }, { "epoch": 7.176875642343268, "grad_norm": 0.10458575189113617, "learning_rate": 0.01, "loss": 1.9816, "step": 69831 }, { "epoch": 7.1771839671120246, "grad_norm": 0.08773664385080338, "learning_rate": 0.01, "loss": 1.9856, "step": 69834 }, { "epoch": 7.177492291880781, "grad_norm": 0.0375094898045063, "learning_rate": 0.01, "loss": 1.9858, "step": 69837 }, { "epoch": 7.177800616649537, "grad_norm": 0.04263398051261902, "learning_rate": 0.01, "loss": 1.9574, "step": 69840 }, { "epoch": 7.178108941418294, "grad_norm": 0.04100068286061287, "learning_rate": 0.01, "loss": 1.9542, "step": 69843 }, { "epoch": 7.178417266187051, "grad_norm": 0.11466946452856064, "learning_rate": 0.01, "loss": 1.9694, "step": 69846 }, { "epoch": 7.178725590955807, "grad_norm": 0.0899171456694603, "learning_rate": 0.01, "loss": 1.9723, "step": 69849 }, { "epoch": 7.179033915724563, "grad_norm": 0.061083417385816574, "learning_rate": 0.01, "loss": 1.9631, "step": 69852 }, { "epoch": 7.17934224049332, "grad_norm": 0.1451992243528366, "learning_rate": 0.01, "loss": 1.9633, "step": 69855 }, { "epoch": 7.179650565262076, "grad_norm": 0.051237862557172775, "learning_rate": 0.01, "loss": 1.9633, "step": 69858 }, { "epoch": 7.179958890030832, "grad_norm": 0.07895320653915405, "learning_rate": 0.01, "loss": 1.9675, "step": 69861 }, { "epoch": 7.1802672147995885, "grad_norm": 0.05484063923358917, "learning_rate": 0.01, "loss": 1.9756, "step": 69864 }, { "epoch": 7.180575539568346, "grad_norm": 0.06715114414691925, "learning_rate": 0.01, "loss": 1.9546, "step": 69867 }, { "epoch": 7.180883864337102, "grad_norm": 0.070882648229599, "learning_rate": 0.01, "loss": 1.9572, "step": 69870 }, { "epoch": 7.181192189105858, "grad_norm": 0.06570753455162048, "learning_rate": 0.01, "loss": 1.9677, "step": 69873 }, { "epoch": 7.181500513874615, "grad_norm": 0.10089487582445145, "learning_rate": 0.01, "loss": 1.9665, "step": 69876 }, { "epoch": 7.181808838643371, "grad_norm": 0.062209602445364, "learning_rate": 0.01, "loss": 1.981, "step": 69879 }, { "epoch": 7.182117163412127, "grad_norm": 0.08328171074390411, "learning_rate": 0.01, "loss": 1.9565, "step": 69882 }, { "epoch": 7.1824254881808836, "grad_norm": 0.0742257609963417, "learning_rate": 0.01, "loss": 1.9552, "step": 69885 }, { "epoch": 7.18273381294964, "grad_norm": 0.05598544701933861, "learning_rate": 0.01, "loss": 2.0187, "step": 69888 }, { "epoch": 7.183042137718397, "grad_norm": 0.07855668663978577, "learning_rate": 0.01, "loss": 1.9871, "step": 69891 }, { "epoch": 7.183350462487153, "grad_norm": 0.09616530686616898, "learning_rate": 0.01, "loss": 1.9816, "step": 69894 }, { "epoch": 7.18365878725591, "grad_norm": 0.044885601848363876, "learning_rate": 0.01, "loss": 1.9882, "step": 69897 }, { "epoch": 7.183967112024666, "grad_norm": 0.08826860785484314, "learning_rate": 0.01, "loss": 1.9561, "step": 69900 }, { "epoch": 7.184275436793422, "grad_norm": 0.07852128893136978, "learning_rate": 0.01, "loss": 1.9675, "step": 69903 }, { "epoch": 7.184583761562179, "grad_norm": 0.094597227871418, "learning_rate": 0.01, "loss": 2.0029, "step": 69906 }, { "epoch": 7.184892086330935, "grad_norm": 0.04735616222023964, "learning_rate": 0.01, "loss": 1.9816, "step": 69909 }, { "epoch": 7.185200411099692, "grad_norm": 0.05992793291807175, "learning_rate": 0.01, "loss": 1.9917, "step": 69912 }, { "epoch": 7.185508735868448, "grad_norm": 0.08416339755058289, "learning_rate": 0.01, "loss": 1.999, "step": 69915 }, { "epoch": 7.185817060637205, "grad_norm": 0.059381093829870224, "learning_rate": 0.01, "loss": 1.991, "step": 69918 }, { "epoch": 7.186125385405961, "grad_norm": 0.037824418395757675, "learning_rate": 0.01, "loss": 1.9678, "step": 69921 }, { "epoch": 7.186433710174717, "grad_norm": 0.04952680319547653, "learning_rate": 0.01, "loss": 1.9647, "step": 69924 }, { "epoch": 7.186742034943474, "grad_norm": 0.07231353968381882, "learning_rate": 0.01, "loss": 1.9631, "step": 69927 }, { "epoch": 7.18705035971223, "grad_norm": 0.11462277173995972, "learning_rate": 0.01, "loss": 1.9904, "step": 69930 }, { "epoch": 7.187358684480986, "grad_norm": 0.07132146507501602, "learning_rate": 0.01, "loss": 2.0002, "step": 69933 }, { "epoch": 7.187667009249743, "grad_norm": 0.0858379453420639, "learning_rate": 0.01, "loss": 1.9945, "step": 69936 }, { "epoch": 7.1879753340185, "grad_norm": 0.045474838465452194, "learning_rate": 0.01, "loss": 1.9563, "step": 69939 }, { "epoch": 7.188283658787256, "grad_norm": 0.12445644289255142, "learning_rate": 0.01, "loss": 1.9668, "step": 69942 }, { "epoch": 7.188591983556012, "grad_norm": 0.044513870030641556, "learning_rate": 0.01, "loss": 1.988, "step": 69945 }, { "epoch": 7.188900308324769, "grad_norm": 0.03778283670544624, "learning_rate": 0.01, "loss": 1.98, "step": 69948 }, { "epoch": 7.189208633093525, "grad_norm": 0.044365961104631424, "learning_rate": 0.01, "loss": 1.954, "step": 69951 }, { "epoch": 7.189516957862281, "grad_norm": 0.03456363081932068, "learning_rate": 0.01, "loss": 1.9774, "step": 69954 }, { "epoch": 7.189825282631038, "grad_norm": 0.0453643873333931, "learning_rate": 0.01, "loss": 1.9745, "step": 69957 }, { "epoch": 7.190133607399795, "grad_norm": 0.07771015167236328, "learning_rate": 0.01, "loss": 1.9603, "step": 69960 }, { "epoch": 7.190441932168551, "grad_norm": 0.0756489485502243, "learning_rate": 0.01, "loss": 1.9867, "step": 69963 }, { "epoch": 7.190750256937307, "grad_norm": 0.12961985170841217, "learning_rate": 0.01, "loss": 1.9765, "step": 69966 }, { "epoch": 7.191058581706064, "grad_norm": 0.07470633834600449, "learning_rate": 0.01, "loss": 1.9826, "step": 69969 }, { "epoch": 7.19136690647482, "grad_norm": 0.039709094911813736, "learning_rate": 0.01, "loss": 1.9664, "step": 69972 }, { "epoch": 7.191675231243576, "grad_norm": 0.0402153842151165, "learning_rate": 0.01, "loss": 1.9689, "step": 69975 }, { "epoch": 7.191983556012333, "grad_norm": 0.04119057580828667, "learning_rate": 0.01, "loss": 1.9707, "step": 69978 }, { "epoch": 7.19229188078109, "grad_norm": 0.036542341113090515, "learning_rate": 0.01, "loss": 1.9714, "step": 69981 }, { "epoch": 7.192600205549846, "grad_norm": 0.03711630031466484, "learning_rate": 0.01, "loss": 1.994, "step": 69984 }, { "epoch": 7.1929085303186024, "grad_norm": 0.050209950655698776, "learning_rate": 0.01, "loss": 1.9546, "step": 69987 }, { "epoch": 7.193216855087359, "grad_norm": 0.06897444278001785, "learning_rate": 0.01, "loss": 1.9632, "step": 69990 }, { "epoch": 7.193525179856115, "grad_norm": 0.10195379704236984, "learning_rate": 0.01, "loss": 1.952, "step": 69993 }, { "epoch": 7.193833504624871, "grad_norm": 0.08220812678337097, "learning_rate": 0.01, "loss": 2.0007, "step": 69996 }, { "epoch": 7.194141829393628, "grad_norm": 0.08391463756561279, "learning_rate": 0.01, "loss": 1.9623, "step": 69999 }, { "epoch": 7.194450154162384, "grad_norm": 0.03341416269540787, "learning_rate": 0.01, "loss": 1.9988, "step": 70002 }, { "epoch": 7.194758478931141, "grad_norm": 0.07957329601049423, "learning_rate": 0.01, "loss": 1.9785, "step": 70005 }, { "epoch": 7.1950668036998975, "grad_norm": 0.040082935243844986, "learning_rate": 0.01, "loss": 1.999, "step": 70008 }, { "epoch": 7.195375128468654, "grad_norm": 0.07456467300653458, "learning_rate": 0.01, "loss": 1.9593, "step": 70011 }, { "epoch": 7.19568345323741, "grad_norm": 0.046207722276449203, "learning_rate": 0.01, "loss": 1.9682, "step": 70014 }, { "epoch": 7.195991778006166, "grad_norm": 0.06496291607618332, "learning_rate": 0.01, "loss": 1.9833, "step": 70017 }, { "epoch": 7.196300102774923, "grad_norm": 0.047517258673906326, "learning_rate": 0.01, "loss": 1.9723, "step": 70020 }, { "epoch": 7.196608427543679, "grad_norm": 0.033581919968128204, "learning_rate": 0.01, "loss": 1.9737, "step": 70023 }, { "epoch": 7.196916752312436, "grad_norm": 0.08832699060440063, "learning_rate": 0.01, "loss": 1.9586, "step": 70026 }, { "epoch": 7.1972250770811925, "grad_norm": 0.12826929986476898, "learning_rate": 0.01, "loss": 1.977, "step": 70029 }, { "epoch": 7.197533401849949, "grad_norm": 0.11776229739189148, "learning_rate": 0.01, "loss": 1.9849, "step": 70032 }, { "epoch": 7.197841726618705, "grad_norm": 0.08085273951292038, "learning_rate": 0.01, "loss": 1.9391, "step": 70035 }, { "epoch": 7.1981500513874614, "grad_norm": 0.06540251523256302, "learning_rate": 0.01, "loss": 1.9803, "step": 70038 }, { "epoch": 7.198458376156218, "grad_norm": 0.05728829279541969, "learning_rate": 0.01, "loss": 1.9706, "step": 70041 }, { "epoch": 7.198766700924974, "grad_norm": 0.036182671785354614, "learning_rate": 0.01, "loss": 1.9761, "step": 70044 }, { "epoch": 7.19907502569373, "grad_norm": 0.03832792863249779, "learning_rate": 0.01, "loss": 1.9848, "step": 70047 }, { "epoch": 7.199383350462488, "grad_norm": 0.06086043268442154, "learning_rate": 0.01, "loss": 1.9589, "step": 70050 }, { "epoch": 7.199691675231244, "grad_norm": 0.02882802113890648, "learning_rate": 0.01, "loss": 1.9732, "step": 70053 }, { "epoch": 7.2, "grad_norm": 0.07022470235824585, "learning_rate": 0.01, "loss": 1.982, "step": 70056 }, { "epoch": 7.2003083247687565, "grad_norm": 0.061370596289634705, "learning_rate": 0.01, "loss": 1.9935, "step": 70059 }, { "epoch": 7.200616649537513, "grad_norm": 0.1273171752691269, "learning_rate": 0.01, "loss": 1.9713, "step": 70062 }, { "epoch": 7.200924974306269, "grad_norm": 0.11996814608573914, "learning_rate": 0.01, "loss": 1.9781, "step": 70065 }, { "epoch": 7.201233299075025, "grad_norm": 0.0758858323097229, "learning_rate": 0.01, "loss": 1.9666, "step": 70068 }, { "epoch": 7.201541623843782, "grad_norm": 0.04552829638123512, "learning_rate": 0.01, "loss": 1.9759, "step": 70071 }, { "epoch": 7.201849948612539, "grad_norm": 0.04942410811781883, "learning_rate": 0.01, "loss": 1.9904, "step": 70074 }, { "epoch": 7.202158273381295, "grad_norm": 0.10216709226369858, "learning_rate": 0.01, "loss": 1.9759, "step": 70077 }, { "epoch": 7.2024665981500515, "grad_norm": 0.05046083778142929, "learning_rate": 0.01, "loss": 1.9526, "step": 70080 }, { "epoch": 7.202774922918808, "grad_norm": 0.09301755577325821, "learning_rate": 0.01, "loss": 1.9842, "step": 70083 }, { "epoch": 7.203083247687564, "grad_norm": 0.06181971728801727, "learning_rate": 0.01, "loss": 1.976, "step": 70086 }, { "epoch": 7.2033915724563204, "grad_norm": 0.07836408168077469, "learning_rate": 0.01, "loss": 1.9718, "step": 70089 }, { "epoch": 7.203699897225077, "grad_norm": 0.04453438147902489, "learning_rate": 0.01, "loss": 1.9998, "step": 70092 }, { "epoch": 7.204008221993833, "grad_norm": 0.11325328052043915, "learning_rate": 0.01, "loss": 1.9754, "step": 70095 }, { "epoch": 7.20431654676259, "grad_norm": 0.17005731165409088, "learning_rate": 0.01, "loss": 1.9435, "step": 70098 }, { "epoch": 7.204624871531347, "grad_norm": 0.11921730637550354, "learning_rate": 0.01, "loss": 2.0139, "step": 70101 }, { "epoch": 7.204933196300103, "grad_norm": 0.07009397447109222, "learning_rate": 0.01, "loss": 1.9597, "step": 70104 }, { "epoch": 7.205241521068859, "grad_norm": 0.037264637649059296, "learning_rate": 0.01, "loss": 1.9772, "step": 70107 }, { "epoch": 7.2055498458376155, "grad_norm": 0.11778846383094788, "learning_rate": 0.01, "loss": 1.9679, "step": 70110 }, { "epoch": 7.205858170606372, "grad_norm": 0.13234436511993408, "learning_rate": 0.01, "loss": 1.9654, "step": 70113 }, { "epoch": 7.206166495375128, "grad_norm": 0.08017081022262573, "learning_rate": 0.01, "loss": 1.9616, "step": 70116 }, { "epoch": 7.206474820143885, "grad_norm": 0.10627610236406326, "learning_rate": 0.01, "loss": 1.9856, "step": 70119 }, { "epoch": 7.206783144912642, "grad_norm": 0.111640565097332, "learning_rate": 0.01, "loss": 1.9613, "step": 70122 }, { "epoch": 7.207091469681398, "grad_norm": 0.049196891486644745, "learning_rate": 0.01, "loss": 1.9752, "step": 70125 }, { "epoch": 7.207399794450154, "grad_norm": 0.041943904012441635, "learning_rate": 0.01, "loss": 1.9382, "step": 70128 }, { "epoch": 7.2077081192189105, "grad_norm": 0.06802201271057129, "learning_rate": 0.01, "loss": 1.9616, "step": 70131 }, { "epoch": 7.208016443987667, "grad_norm": 0.043165501207113266, "learning_rate": 0.01, "loss": 1.957, "step": 70134 }, { "epoch": 7.208324768756423, "grad_norm": 0.06273946911096573, "learning_rate": 0.01, "loss": 1.9803, "step": 70137 }, { "epoch": 7.2086330935251794, "grad_norm": 0.07074591517448425, "learning_rate": 0.01, "loss": 1.9771, "step": 70140 }, { "epoch": 7.208941418293937, "grad_norm": 0.0768829733133316, "learning_rate": 0.01, "loss": 1.9582, "step": 70143 }, { "epoch": 7.209249743062693, "grad_norm": 0.039911992847919464, "learning_rate": 0.01, "loss": 1.9668, "step": 70146 }, { "epoch": 7.209558067831449, "grad_norm": 0.06030252203345299, "learning_rate": 0.01, "loss": 1.9672, "step": 70149 }, { "epoch": 7.209866392600206, "grad_norm": 0.03984086588025093, "learning_rate": 0.01, "loss": 1.994, "step": 70152 }, { "epoch": 7.210174717368962, "grad_norm": 0.032670505344867706, "learning_rate": 0.01, "loss": 1.9619, "step": 70155 }, { "epoch": 7.210483042137718, "grad_norm": 0.10686333477497101, "learning_rate": 0.01, "loss": 2.0011, "step": 70158 }, { "epoch": 7.2107913669064745, "grad_norm": 0.04879242554306984, "learning_rate": 0.01, "loss": 1.9856, "step": 70161 }, { "epoch": 7.211099691675232, "grad_norm": 0.07059837132692337, "learning_rate": 0.01, "loss": 1.9836, "step": 70164 }, { "epoch": 7.211408016443988, "grad_norm": 0.09764829277992249, "learning_rate": 0.01, "loss": 1.9865, "step": 70167 }, { "epoch": 7.211716341212744, "grad_norm": 0.05652858689427376, "learning_rate": 0.01, "loss": 1.9418, "step": 70170 }, { "epoch": 7.212024665981501, "grad_norm": 0.051845990121364594, "learning_rate": 0.01, "loss": 1.9932, "step": 70173 }, { "epoch": 7.212332990750257, "grad_norm": 0.06887342035770416, "learning_rate": 0.01, "loss": 1.9771, "step": 70176 }, { "epoch": 7.212641315519013, "grad_norm": 0.05305744335055351, "learning_rate": 0.01, "loss": 1.9976, "step": 70179 }, { "epoch": 7.2129496402877695, "grad_norm": 0.05928622931241989, "learning_rate": 0.01, "loss": 1.9625, "step": 70182 }, { "epoch": 7.213257965056526, "grad_norm": 0.05189640820026398, "learning_rate": 0.01, "loss": 1.9904, "step": 70185 }, { "epoch": 7.213566289825283, "grad_norm": 0.050485290586948395, "learning_rate": 0.01, "loss": 1.946, "step": 70188 }, { "epoch": 7.213874614594039, "grad_norm": 0.05905221402645111, "learning_rate": 0.01, "loss": 1.9872, "step": 70191 }, { "epoch": 7.214182939362796, "grad_norm": 0.06555161625146866, "learning_rate": 0.01, "loss": 1.9589, "step": 70194 }, { "epoch": 7.214491264131552, "grad_norm": 0.03727792948484421, "learning_rate": 0.01, "loss": 1.9824, "step": 70197 }, { "epoch": 7.214799588900308, "grad_norm": 0.032580602914094925, "learning_rate": 0.01, "loss": 1.9586, "step": 70200 }, { "epoch": 7.215107913669065, "grad_norm": 0.042042069137096405, "learning_rate": 0.01, "loss": 1.9774, "step": 70203 }, { "epoch": 7.215416238437821, "grad_norm": 0.1405077874660492, "learning_rate": 0.01, "loss": 1.989, "step": 70206 }, { "epoch": 7.215724563206577, "grad_norm": 0.08418957144021988, "learning_rate": 0.01, "loss": 1.973, "step": 70209 }, { "epoch": 7.216032887975334, "grad_norm": 0.09414199739694595, "learning_rate": 0.01, "loss": 1.9752, "step": 70212 }, { "epoch": 7.216341212744091, "grad_norm": 0.053635187447071075, "learning_rate": 0.01, "loss": 1.9537, "step": 70215 }, { "epoch": 7.216649537512847, "grad_norm": 0.04404306784272194, "learning_rate": 0.01, "loss": 1.9582, "step": 70218 }, { "epoch": 7.216957862281603, "grad_norm": 0.0472695529460907, "learning_rate": 0.01, "loss": 1.983, "step": 70221 }, { "epoch": 7.21726618705036, "grad_norm": 0.04150666296482086, "learning_rate": 0.01, "loss": 1.9745, "step": 70224 }, { "epoch": 7.217574511819116, "grad_norm": 0.06759631633758545, "learning_rate": 0.01, "loss": 1.9824, "step": 70227 }, { "epoch": 7.217882836587872, "grad_norm": 0.12022452801465988, "learning_rate": 0.01, "loss": 1.9913, "step": 70230 }, { "epoch": 7.2181911613566285, "grad_norm": 0.10294222086668015, "learning_rate": 0.01, "loss": 2.0086, "step": 70233 }, { "epoch": 7.218499486125386, "grad_norm": 0.036887966096401215, "learning_rate": 0.01, "loss": 1.9857, "step": 70236 }, { "epoch": 7.218807810894142, "grad_norm": 0.03965602442622185, "learning_rate": 0.01, "loss": 1.9766, "step": 70239 }, { "epoch": 7.219116135662898, "grad_norm": 0.033555611968040466, "learning_rate": 0.01, "loss": 1.9787, "step": 70242 }, { "epoch": 7.219424460431655, "grad_norm": 0.03445450961589813, "learning_rate": 0.01, "loss": 1.9909, "step": 70245 }, { "epoch": 7.219732785200411, "grad_norm": 0.04350513964891434, "learning_rate": 0.01, "loss": 1.9738, "step": 70248 }, { "epoch": 7.220041109969167, "grad_norm": 0.06522413343191147, "learning_rate": 0.01, "loss": 1.9629, "step": 70251 }, { "epoch": 7.220349434737924, "grad_norm": 0.06339959055185318, "learning_rate": 0.01, "loss": 1.976, "step": 70254 }, { "epoch": 7.220657759506681, "grad_norm": 0.04636206105351448, "learning_rate": 0.01, "loss": 1.9776, "step": 70257 }, { "epoch": 7.220966084275437, "grad_norm": 0.055801764130592346, "learning_rate": 0.01, "loss": 1.9735, "step": 70260 }, { "epoch": 7.221274409044193, "grad_norm": 0.03851722180843353, "learning_rate": 0.01, "loss": 1.9809, "step": 70263 }, { "epoch": 7.22158273381295, "grad_norm": 0.06473366171121597, "learning_rate": 0.01, "loss": 1.97, "step": 70266 }, { "epoch": 7.221891058581706, "grad_norm": 0.08311999589204788, "learning_rate": 0.01, "loss": 1.9917, "step": 70269 }, { "epoch": 7.222199383350462, "grad_norm": 0.07522797584533691, "learning_rate": 0.01, "loss": 1.9859, "step": 70272 }, { "epoch": 7.222507708119219, "grad_norm": 0.09197179228067398, "learning_rate": 0.01, "loss": 1.9736, "step": 70275 }, { "epoch": 7.222816032887975, "grad_norm": 0.08302085846662521, "learning_rate": 0.01, "loss": 1.9737, "step": 70278 }, { "epoch": 7.223124357656732, "grad_norm": 0.07616004347801208, "learning_rate": 0.01, "loss": 1.9748, "step": 70281 }, { "epoch": 7.223432682425488, "grad_norm": 0.07368069142103195, "learning_rate": 0.01, "loss": 1.9914, "step": 70284 }, { "epoch": 7.223741007194245, "grad_norm": 0.07486431300640106, "learning_rate": 0.01, "loss": 1.9683, "step": 70287 }, { "epoch": 7.224049331963001, "grad_norm": 0.045419856905937195, "learning_rate": 0.01, "loss": 1.9535, "step": 70290 }, { "epoch": 7.224357656731757, "grad_norm": 0.05169800668954849, "learning_rate": 0.01, "loss": 1.9729, "step": 70293 }, { "epoch": 7.224665981500514, "grad_norm": 0.06700783222913742, "learning_rate": 0.01, "loss": 1.9483, "step": 70296 }, { "epoch": 7.22497430626927, "grad_norm": 0.0943138375878334, "learning_rate": 0.01, "loss": 1.9645, "step": 70299 }, { "epoch": 7.225282631038027, "grad_norm": 0.07699436694383621, "learning_rate": 0.01, "loss": 1.9686, "step": 70302 }, { "epoch": 7.2255909558067835, "grad_norm": 0.058129556477069855, "learning_rate": 0.01, "loss": 1.977, "step": 70305 }, { "epoch": 7.22589928057554, "grad_norm": 0.11801168322563171, "learning_rate": 0.01, "loss": 1.9628, "step": 70308 }, { "epoch": 7.226207605344296, "grad_norm": 0.046479515731334686, "learning_rate": 0.01, "loss": 1.965, "step": 70311 }, { "epoch": 7.226515930113052, "grad_norm": 0.0361471064388752, "learning_rate": 0.01, "loss": 1.97, "step": 70314 }, { "epoch": 7.226824254881809, "grad_norm": 0.12745732069015503, "learning_rate": 0.01, "loss": 1.9883, "step": 70317 }, { "epoch": 7.227132579650565, "grad_norm": 0.11508181691169739, "learning_rate": 0.01, "loss": 1.9874, "step": 70320 }, { "epoch": 7.227440904419321, "grad_norm": 0.09497086703777313, "learning_rate": 0.01, "loss": 1.9887, "step": 70323 }, { "epoch": 7.2277492291880785, "grad_norm": 0.045153483748435974, "learning_rate": 0.01, "loss": 1.9945, "step": 70326 }, { "epoch": 7.228057553956835, "grad_norm": 0.03023081086575985, "learning_rate": 0.01, "loss": 2.0191, "step": 70329 }, { "epoch": 7.228365878725591, "grad_norm": 0.036539237946271896, "learning_rate": 0.01, "loss": 1.9669, "step": 70332 }, { "epoch": 7.228674203494347, "grad_norm": 0.05517994239926338, "learning_rate": 0.01, "loss": 1.9856, "step": 70335 }, { "epoch": 7.228982528263104, "grad_norm": 0.08023694902658463, "learning_rate": 0.01, "loss": 1.9662, "step": 70338 }, { "epoch": 7.22929085303186, "grad_norm": 0.04393969848752022, "learning_rate": 0.01, "loss": 1.9624, "step": 70341 }, { "epoch": 7.229599177800616, "grad_norm": 0.036692652851343155, "learning_rate": 0.01, "loss": 1.9825, "step": 70344 }, { "epoch": 7.229907502569373, "grad_norm": 0.03756039962172508, "learning_rate": 0.01, "loss": 1.9767, "step": 70347 }, { "epoch": 7.23021582733813, "grad_norm": 0.046152982860803604, "learning_rate": 0.01, "loss": 1.9665, "step": 70350 }, { "epoch": 7.230524152106886, "grad_norm": 0.24146318435668945, "learning_rate": 0.01, "loss": 1.9864, "step": 70353 }, { "epoch": 7.2308324768756425, "grad_norm": 0.14046429097652435, "learning_rate": 0.01, "loss": 2.0109, "step": 70356 }, { "epoch": 7.231140801644399, "grad_norm": 0.10172725468873978, "learning_rate": 0.01, "loss": 1.9819, "step": 70359 }, { "epoch": 7.231449126413155, "grad_norm": 0.08112365752458572, "learning_rate": 0.01, "loss": 1.9745, "step": 70362 }, { "epoch": 7.231757451181911, "grad_norm": 0.036505427211523056, "learning_rate": 0.01, "loss": 1.9597, "step": 70365 }, { "epoch": 7.232065775950668, "grad_norm": 0.06573676317930222, "learning_rate": 0.01, "loss": 1.9647, "step": 70368 }, { "epoch": 7.232374100719425, "grad_norm": 0.04601527750492096, "learning_rate": 0.01, "loss": 1.9737, "step": 70371 }, { "epoch": 7.232682425488181, "grad_norm": 0.04128945618867874, "learning_rate": 0.01, "loss": 1.976, "step": 70374 }, { "epoch": 7.2329907502569375, "grad_norm": 0.03756236284971237, "learning_rate": 0.01, "loss": 1.9972, "step": 70377 }, { "epoch": 7.233299075025694, "grad_norm": 0.045076120644807816, "learning_rate": 0.01, "loss": 1.9929, "step": 70380 }, { "epoch": 7.23360739979445, "grad_norm": 0.07399538904428482, "learning_rate": 0.01, "loss": 1.9756, "step": 70383 }, { "epoch": 7.233915724563206, "grad_norm": 0.10196070373058319, "learning_rate": 0.01, "loss": 1.9502, "step": 70386 }, { "epoch": 7.234224049331963, "grad_norm": 0.06493962556123734, "learning_rate": 0.01, "loss": 1.9704, "step": 70389 }, { "epoch": 7.234532374100719, "grad_norm": 0.0905206948518753, "learning_rate": 0.01, "loss": 1.9675, "step": 70392 }, { "epoch": 7.234840698869476, "grad_norm": 0.051535073667764664, "learning_rate": 0.01, "loss": 1.9635, "step": 70395 }, { "epoch": 7.2351490236382325, "grad_norm": 0.03794541209936142, "learning_rate": 0.01, "loss": 1.9807, "step": 70398 }, { "epoch": 7.235457348406989, "grad_norm": 0.039028409868478775, "learning_rate": 0.01, "loss": 1.9922, "step": 70401 }, { "epoch": 7.235765673175745, "grad_norm": 0.03177224472165108, "learning_rate": 0.01, "loss": 1.9767, "step": 70404 }, { "epoch": 7.2360739979445015, "grad_norm": 0.03279658779501915, "learning_rate": 0.01, "loss": 1.9844, "step": 70407 }, { "epoch": 7.236382322713258, "grad_norm": 0.11790499091148376, "learning_rate": 0.01, "loss": 1.9678, "step": 70410 }, { "epoch": 7.236690647482014, "grad_norm": 0.08712024986743927, "learning_rate": 0.01, "loss": 1.9848, "step": 70413 }, { "epoch": 7.23699897225077, "grad_norm": 0.043823301792144775, "learning_rate": 0.01, "loss": 1.9612, "step": 70416 }, { "epoch": 7.237307297019528, "grad_norm": 0.03984406962990761, "learning_rate": 0.01, "loss": 1.9784, "step": 70419 }, { "epoch": 7.237615621788284, "grad_norm": 0.03345489874482155, "learning_rate": 0.01, "loss": 1.9782, "step": 70422 }, { "epoch": 7.23792394655704, "grad_norm": 0.030647002160549164, "learning_rate": 0.01, "loss": 1.9797, "step": 70425 }, { "epoch": 7.2382322713257965, "grad_norm": 0.061715736985206604, "learning_rate": 0.01, "loss": 1.9735, "step": 70428 }, { "epoch": 7.238540596094553, "grad_norm": 0.0880758985877037, "learning_rate": 0.01, "loss": 1.9787, "step": 70431 }, { "epoch": 7.238848920863309, "grad_norm": 0.051462333649396896, "learning_rate": 0.01, "loss": 1.9566, "step": 70434 }, { "epoch": 7.239157245632065, "grad_norm": 0.055362194776535034, "learning_rate": 0.01, "loss": 1.9753, "step": 70437 }, { "epoch": 7.239465570400823, "grad_norm": 0.04368078336119652, "learning_rate": 0.01, "loss": 1.9803, "step": 70440 }, { "epoch": 7.239773895169579, "grad_norm": 0.09169706702232361, "learning_rate": 0.01, "loss": 1.9541, "step": 70443 }, { "epoch": 7.240082219938335, "grad_norm": 0.03903011977672577, "learning_rate": 0.01, "loss": 1.9481, "step": 70446 }, { "epoch": 7.2403905447070915, "grad_norm": 0.19656112790107727, "learning_rate": 0.01, "loss": 1.9765, "step": 70449 }, { "epoch": 7.240698869475848, "grad_norm": 0.07515250146389008, "learning_rate": 0.01, "loss": 1.984, "step": 70452 }, { "epoch": 7.241007194244604, "grad_norm": 0.0853821262717247, "learning_rate": 0.01, "loss": 1.9959, "step": 70455 }, { "epoch": 7.2413155190133605, "grad_norm": 0.06227338686585426, "learning_rate": 0.01, "loss": 1.9844, "step": 70458 }, { "epoch": 7.241623843782117, "grad_norm": 0.047402843832969666, "learning_rate": 0.01, "loss": 1.9549, "step": 70461 }, { "epoch": 7.241932168550874, "grad_norm": 0.03917578607797623, "learning_rate": 0.01, "loss": 1.9794, "step": 70464 }, { "epoch": 7.24224049331963, "grad_norm": 0.055436741560697556, "learning_rate": 0.01, "loss": 1.9792, "step": 70467 }, { "epoch": 7.242548818088387, "grad_norm": 0.05304566025733948, "learning_rate": 0.01, "loss": 1.9745, "step": 70470 }, { "epoch": 7.242857142857143, "grad_norm": 0.04701549932360649, "learning_rate": 0.01, "loss": 1.9856, "step": 70473 }, { "epoch": 7.243165467625899, "grad_norm": 0.045375335961580276, "learning_rate": 0.01, "loss": 1.9751, "step": 70476 }, { "epoch": 7.2434737923946555, "grad_norm": 0.09745540469884872, "learning_rate": 0.01, "loss": 1.9757, "step": 70479 }, { "epoch": 7.243782117163412, "grad_norm": 0.10383448749780655, "learning_rate": 0.01, "loss": 1.9659, "step": 70482 }, { "epoch": 7.244090441932169, "grad_norm": 0.08810384571552277, "learning_rate": 0.01, "loss": 1.9665, "step": 70485 }, { "epoch": 7.244398766700925, "grad_norm": 0.05690396949648857, "learning_rate": 0.01, "loss": 1.9758, "step": 70488 }, { "epoch": 7.244707091469682, "grad_norm": 0.03899385407567024, "learning_rate": 0.01, "loss": 1.9865, "step": 70491 }, { "epoch": 7.245015416238438, "grad_norm": 0.050195615738630295, "learning_rate": 0.01, "loss": 1.9495, "step": 70494 }, { "epoch": 7.245323741007194, "grad_norm": 0.046150173991918564, "learning_rate": 0.01, "loss": 1.9794, "step": 70497 }, { "epoch": 7.2456320657759505, "grad_norm": 0.052966147661209106, "learning_rate": 0.01, "loss": 1.961, "step": 70500 }, { "epoch": 7.245940390544707, "grad_norm": 0.10062668472528458, "learning_rate": 0.01, "loss": 1.9618, "step": 70503 }, { "epoch": 7.246248715313463, "grad_norm": 0.07878834009170532, "learning_rate": 0.01, "loss": 1.9802, "step": 70506 }, { "epoch": 7.24655704008222, "grad_norm": 0.05327494442462921, "learning_rate": 0.01, "loss": 1.9571, "step": 70509 }, { "epoch": 7.246865364850977, "grad_norm": 0.03351713344454765, "learning_rate": 0.01, "loss": 1.9782, "step": 70512 }, { "epoch": 7.247173689619733, "grad_norm": 0.037406474351882935, "learning_rate": 0.01, "loss": 1.9649, "step": 70515 }, { "epoch": 7.247482014388489, "grad_norm": 0.11417467892169952, "learning_rate": 0.01, "loss": 1.9892, "step": 70518 }, { "epoch": 7.247790339157246, "grad_norm": 0.10274399816989899, "learning_rate": 0.01, "loss": 1.9608, "step": 70521 }, { "epoch": 7.248098663926002, "grad_norm": 0.03881362825632095, "learning_rate": 0.01, "loss": 1.971, "step": 70524 }, { "epoch": 7.248406988694758, "grad_norm": 0.051997456699609756, "learning_rate": 0.01, "loss": 1.9755, "step": 70527 }, { "epoch": 7.2487153134635145, "grad_norm": 0.06575778871774673, "learning_rate": 0.01, "loss": 1.9947, "step": 70530 }, { "epoch": 7.249023638232272, "grad_norm": 0.04608520120382309, "learning_rate": 0.01, "loss": 1.9507, "step": 70533 }, { "epoch": 7.249331963001028, "grad_norm": 0.04164005443453789, "learning_rate": 0.01, "loss": 1.9889, "step": 70536 }, { "epoch": 7.249640287769784, "grad_norm": 0.05026416853070259, "learning_rate": 0.01, "loss": 2.0011, "step": 70539 }, { "epoch": 7.249948612538541, "grad_norm": 0.12604720890522003, "learning_rate": 0.01, "loss": 1.9832, "step": 70542 }, { "epoch": 7.250256937307297, "grad_norm": 0.04925797879695892, "learning_rate": 0.01, "loss": 1.9609, "step": 70545 }, { "epoch": 7.250565262076053, "grad_norm": 0.031429026275873184, "learning_rate": 0.01, "loss": 2.0033, "step": 70548 }, { "epoch": 7.2508735868448095, "grad_norm": 0.10795203596353531, "learning_rate": 0.01, "loss": 1.9695, "step": 70551 }, { "epoch": 7.251181911613566, "grad_norm": 0.06839077174663544, "learning_rate": 0.01, "loss": 1.9978, "step": 70554 }, { "epoch": 7.251490236382323, "grad_norm": 0.09800900518894196, "learning_rate": 0.01, "loss": 1.9891, "step": 70557 }, { "epoch": 7.251798561151079, "grad_norm": 0.06234534829854965, "learning_rate": 0.01, "loss": 1.984, "step": 70560 }, { "epoch": 7.252106885919836, "grad_norm": 0.03908616676926613, "learning_rate": 0.01, "loss": 1.9536, "step": 70563 }, { "epoch": 7.252415210688592, "grad_norm": 0.06500250101089478, "learning_rate": 0.01, "loss": 1.99, "step": 70566 }, { "epoch": 7.252723535457348, "grad_norm": 0.050453029572963715, "learning_rate": 0.01, "loss": 1.9655, "step": 70569 }, { "epoch": 7.253031860226105, "grad_norm": 0.10915414243936539, "learning_rate": 0.01, "loss": 1.9789, "step": 70572 }, { "epoch": 7.253340184994861, "grad_norm": 0.04887484759092331, "learning_rate": 0.01, "loss": 1.9366, "step": 70575 }, { "epoch": 7.253648509763618, "grad_norm": 0.07314929366111755, "learning_rate": 0.01, "loss": 1.9751, "step": 70578 }, { "epoch": 7.253956834532374, "grad_norm": 0.05896662175655365, "learning_rate": 0.01, "loss": 1.9753, "step": 70581 }, { "epoch": 7.254265159301131, "grad_norm": 0.04080936685204506, "learning_rate": 0.01, "loss": 1.9805, "step": 70584 }, { "epoch": 7.254573484069887, "grad_norm": 0.058053355664014816, "learning_rate": 0.01, "loss": 1.9881, "step": 70587 }, { "epoch": 7.254881808838643, "grad_norm": 0.04267947003245354, "learning_rate": 0.01, "loss": 1.9828, "step": 70590 }, { "epoch": 7.2551901336074, "grad_norm": 0.04386641085147858, "learning_rate": 0.01, "loss": 1.9396, "step": 70593 }, { "epoch": 7.255498458376156, "grad_norm": 0.06609844416379929, "learning_rate": 0.01, "loss": 2.0021, "step": 70596 }, { "epoch": 7.255806783144912, "grad_norm": 0.0940227136015892, "learning_rate": 0.01, "loss": 1.984, "step": 70599 }, { "epoch": 7.256115107913669, "grad_norm": 0.03769245743751526, "learning_rate": 0.01, "loss": 1.9593, "step": 70602 }, { "epoch": 7.256423432682426, "grad_norm": 0.0913299098610878, "learning_rate": 0.01, "loss": 1.9693, "step": 70605 }, { "epoch": 7.256731757451182, "grad_norm": 0.10391309857368469, "learning_rate": 0.01, "loss": 1.9698, "step": 70608 }, { "epoch": 7.257040082219938, "grad_norm": 0.10451003164052963, "learning_rate": 0.01, "loss": 1.9702, "step": 70611 }, { "epoch": 7.257348406988695, "grad_norm": 0.059426721185445786, "learning_rate": 0.01, "loss": 1.985, "step": 70614 }, { "epoch": 7.257656731757451, "grad_norm": 0.1078886017203331, "learning_rate": 0.01, "loss": 1.9746, "step": 70617 }, { "epoch": 7.257965056526207, "grad_norm": 0.13962066173553467, "learning_rate": 0.01, "loss": 1.9669, "step": 70620 }, { "epoch": 7.2582733812949645, "grad_norm": 0.058368947356939316, "learning_rate": 0.01, "loss": 1.9624, "step": 70623 }, { "epoch": 7.258581706063721, "grad_norm": 0.06157883629202843, "learning_rate": 0.01, "loss": 1.9746, "step": 70626 }, { "epoch": 7.258890030832477, "grad_norm": 0.06946786493062973, "learning_rate": 0.01, "loss": 2.0012, "step": 70629 }, { "epoch": 7.259198355601233, "grad_norm": 0.049864597618579865, "learning_rate": 0.01, "loss": 1.9794, "step": 70632 }, { "epoch": 7.25950668036999, "grad_norm": 0.046192020177841187, "learning_rate": 0.01, "loss": 1.967, "step": 70635 }, { "epoch": 7.259815005138746, "grad_norm": 0.03985748812556267, "learning_rate": 0.01, "loss": 1.9685, "step": 70638 }, { "epoch": 7.260123329907502, "grad_norm": 0.05892154574394226, "learning_rate": 0.01, "loss": 1.9492, "step": 70641 }, { "epoch": 7.260431654676259, "grad_norm": 0.04130382463335991, "learning_rate": 0.01, "loss": 1.9579, "step": 70644 }, { "epoch": 7.260739979445016, "grad_norm": 0.0541166253387928, "learning_rate": 0.01, "loss": 1.9844, "step": 70647 }, { "epoch": 7.261048304213772, "grad_norm": 0.039703115820884705, "learning_rate": 0.01, "loss": 1.9716, "step": 70650 }, { "epoch": 7.261356628982528, "grad_norm": 0.10397301614284515, "learning_rate": 0.01, "loss": 1.9666, "step": 70653 }, { "epoch": 7.261664953751285, "grad_norm": 0.09001152962446213, "learning_rate": 0.01, "loss": 1.984, "step": 70656 }, { "epoch": 7.261973278520041, "grad_norm": 0.05959209427237511, "learning_rate": 0.01, "loss": 1.9763, "step": 70659 }, { "epoch": 7.262281603288797, "grad_norm": 0.06122048199176788, "learning_rate": 0.01, "loss": 1.9645, "step": 70662 }, { "epoch": 7.262589928057554, "grad_norm": 0.03712654113769531, "learning_rate": 0.01, "loss": 1.9899, "step": 70665 }, { "epoch": 7.262898252826311, "grad_norm": 0.06078263372182846, "learning_rate": 0.01, "loss": 1.9642, "step": 70668 }, { "epoch": 7.263206577595067, "grad_norm": 0.07234025001525879, "learning_rate": 0.01, "loss": 1.9675, "step": 70671 }, { "epoch": 7.2635149023638235, "grad_norm": 0.10953123867511749, "learning_rate": 0.01, "loss": 1.9882, "step": 70674 }, { "epoch": 7.26382322713258, "grad_norm": 0.10636293888092041, "learning_rate": 0.01, "loss": 1.9769, "step": 70677 }, { "epoch": 7.264131551901336, "grad_norm": 0.054197005927562714, "learning_rate": 0.01, "loss": 1.993, "step": 70680 }, { "epoch": 7.264439876670092, "grad_norm": 0.0320538766682148, "learning_rate": 0.01, "loss": 1.9539, "step": 70683 }, { "epoch": 7.264748201438849, "grad_norm": 0.03399870917201042, "learning_rate": 0.01, "loss": 1.9871, "step": 70686 }, { "epoch": 7.265056526207605, "grad_norm": 0.115841805934906, "learning_rate": 0.01, "loss": 1.9747, "step": 70689 }, { "epoch": 7.265364850976361, "grad_norm": 0.04310959577560425, "learning_rate": 0.01, "loss": 1.9676, "step": 70692 }, { "epoch": 7.2656731757451185, "grad_norm": 0.16276821494102478, "learning_rate": 0.01, "loss": 1.9722, "step": 70695 }, { "epoch": 7.265981500513875, "grad_norm": 0.08462589234113693, "learning_rate": 0.01, "loss": 1.9689, "step": 70698 }, { "epoch": 7.266289825282631, "grad_norm": 0.05889437720179558, "learning_rate": 0.01, "loss": 1.9698, "step": 70701 }, { "epoch": 7.266598150051387, "grad_norm": 0.0893205776810646, "learning_rate": 0.01, "loss": 1.9788, "step": 70704 }, { "epoch": 7.266906474820144, "grad_norm": 0.03911794722080231, "learning_rate": 0.01, "loss": 1.9949, "step": 70707 }, { "epoch": 7.2672147995889, "grad_norm": 0.043647050857543945, "learning_rate": 0.01, "loss": 1.9657, "step": 70710 }, { "epoch": 7.267523124357656, "grad_norm": 0.03468010574579239, "learning_rate": 0.01, "loss": 1.9792, "step": 70713 }, { "epoch": 7.2678314491264135, "grad_norm": 0.0826035663485527, "learning_rate": 0.01, "loss": 1.9736, "step": 70716 }, { "epoch": 7.26813977389517, "grad_norm": 0.1150084063410759, "learning_rate": 0.01, "loss": 1.9896, "step": 70719 }, { "epoch": 7.268448098663926, "grad_norm": 0.06612154096364975, "learning_rate": 0.01, "loss": 1.972, "step": 70722 }, { "epoch": 7.2687564234326825, "grad_norm": 0.09974373131990433, "learning_rate": 0.01, "loss": 1.9831, "step": 70725 }, { "epoch": 7.269064748201439, "grad_norm": 0.03690455108880997, "learning_rate": 0.01, "loss": 1.9759, "step": 70728 }, { "epoch": 7.269373072970195, "grad_norm": 0.0326729491353035, "learning_rate": 0.01, "loss": 1.9951, "step": 70731 }, { "epoch": 7.269681397738951, "grad_norm": 0.11430519074201584, "learning_rate": 0.01, "loss": 1.9719, "step": 70734 }, { "epoch": 7.269989722507708, "grad_norm": 0.043645069003105164, "learning_rate": 0.01, "loss": 1.9922, "step": 70737 }, { "epoch": 7.270298047276465, "grad_norm": 0.08154550194740295, "learning_rate": 0.01, "loss": 1.9599, "step": 70740 }, { "epoch": 7.270606372045221, "grad_norm": 0.07893747091293335, "learning_rate": 0.01, "loss": 1.9634, "step": 70743 }, { "epoch": 7.2709146968139775, "grad_norm": 0.05688686668872833, "learning_rate": 0.01, "loss": 1.9773, "step": 70746 }, { "epoch": 7.271223021582734, "grad_norm": 0.05141865462064743, "learning_rate": 0.01, "loss": 1.9904, "step": 70749 }, { "epoch": 7.27153134635149, "grad_norm": 0.041718002408742905, "learning_rate": 0.01, "loss": 1.9612, "step": 70752 }, { "epoch": 7.271839671120246, "grad_norm": 0.045142821967601776, "learning_rate": 0.01, "loss": 1.9722, "step": 70755 }, { "epoch": 7.272147995889003, "grad_norm": 0.06518824398517609, "learning_rate": 0.01, "loss": 1.9545, "step": 70758 }, { "epoch": 7.27245632065776, "grad_norm": 0.0417245589196682, "learning_rate": 0.01, "loss": 1.9576, "step": 70761 }, { "epoch": 7.272764645426516, "grad_norm": 0.06344173848628998, "learning_rate": 0.01, "loss": 2.0002, "step": 70764 }, { "epoch": 7.2730729701952725, "grad_norm": 0.1255732625722885, "learning_rate": 0.01, "loss": 1.9906, "step": 70767 }, { "epoch": 7.273381294964029, "grad_norm": 0.08490406721830368, "learning_rate": 0.01, "loss": 1.9812, "step": 70770 }, { "epoch": 7.273689619732785, "grad_norm": 0.0501803420484066, "learning_rate": 0.01, "loss": 1.9995, "step": 70773 }, { "epoch": 7.2739979445015415, "grad_norm": 0.04829498007893562, "learning_rate": 0.01, "loss": 1.9752, "step": 70776 }, { "epoch": 7.274306269270298, "grad_norm": 0.036724966019392014, "learning_rate": 0.01, "loss": 1.9631, "step": 70779 }, { "epoch": 7.274614594039054, "grad_norm": 0.03728116676211357, "learning_rate": 0.01, "loss": 1.987, "step": 70782 }, { "epoch": 7.274922918807811, "grad_norm": 0.03553638234734535, "learning_rate": 0.01, "loss": 1.993, "step": 70785 }, { "epoch": 7.275231243576568, "grad_norm": 0.04956892877817154, "learning_rate": 0.01, "loss": 1.9584, "step": 70788 }, { "epoch": 7.275539568345324, "grad_norm": 0.06011252477765083, "learning_rate": 0.01, "loss": 1.9745, "step": 70791 }, { "epoch": 7.27584789311408, "grad_norm": 0.05820993334054947, "learning_rate": 0.01, "loss": 1.9701, "step": 70794 }, { "epoch": 7.2761562178828365, "grad_norm": 0.11845695972442627, "learning_rate": 0.01, "loss": 1.9939, "step": 70797 }, { "epoch": 7.276464542651593, "grad_norm": 0.05176176875829697, "learning_rate": 0.01, "loss": 1.9594, "step": 70800 }, { "epoch": 7.276772867420349, "grad_norm": 0.043042611330747604, "learning_rate": 0.01, "loss": 1.973, "step": 70803 }, { "epoch": 7.277081192189106, "grad_norm": 0.07894362509250641, "learning_rate": 0.01, "loss": 1.9683, "step": 70806 }, { "epoch": 7.277389516957863, "grad_norm": 0.04929111897945404, "learning_rate": 0.01, "loss": 1.9697, "step": 70809 }, { "epoch": 7.277697841726619, "grad_norm": 0.09846871346235275, "learning_rate": 0.01, "loss": 1.9883, "step": 70812 }, { "epoch": 7.278006166495375, "grad_norm": 0.07354690134525299, "learning_rate": 0.01, "loss": 1.9545, "step": 70815 }, { "epoch": 7.2783144912641315, "grad_norm": 0.1623416393995285, "learning_rate": 0.01, "loss": 1.9678, "step": 70818 }, { "epoch": 7.278622816032888, "grad_norm": 0.11214367300271988, "learning_rate": 0.01, "loss": 1.9592, "step": 70821 }, { "epoch": 7.278931140801644, "grad_norm": 0.06578688323497772, "learning_rate": 0.01, "loss": 1.9622, "step": 70824 }, { "epoch": 7.2792394655704005, "grad_norm": 0.05367565527558327, "learning_rate": 0.01, "loss": 1.9709, "step": 70827 }, { "epoch": 7.279547790339157, "grad_norm": 0.039414871484041214, "learning_rate": 0.01, "loss": 1.9587, "step": 70830 }, { "epoch": 7.279856115107914, "grad_norm": 0.08670930564403534, "learning_rate": 0.01, "loss": 1.9566, "step": 70833 }, { "epoch": 7.28016443987667, "grad_norm": 0.04982636123895645, "learning_rate": 0.01, "loss": 1.9564, "step": 70836 }, { "epoch": 7.280472764645427, "grad_norm": 0.04024531692266464, "learning_rate": 0.01, "loss": 1.995, "step": 70839 }, { "epoch": 7.280781089414183, "grad_norm": 0.05240694060921669, "learning_rate": 0.01, "loss": 1.9623, "step": 70842 }, { "epoch": 7.281089414182939, "grad_norm": 0.047303877770900726, "learning_rate": 0.01, "loss": 1.982, "step": 70845 }, { "epoch": 7.2813977389516955, "grad_norm": 0.14105463027954102, "learning_rate": 0.01, "loss": 1.9583, "step": 70848 }, { "epoch": 7.281706063720452, "grad_norm": 0.05714312568306923, "learning_rate": 0.01, "loss": 1.9712, "step": 70851 }, { "epoch": 7.282014388489209, "grad_norm": 0.10342295467853546, "learning_rate": 0.01, "loss": 1.9757, "step": 70854 }, { "epoch": 7.282322713257965, "grad_norm": 0.12087305635213852, "learning_rate": 0.01, "loss": 1.9753, "step": 70857 }, { "epoch": 7.282631038026722, "grad_norm": 0.05759386718273163, "learning_rate": 0.01, "loss": 1.9868, "step": 70860 }, { "epoch": 7.282939362795478, "grad_norm": 0.04616248235106468, "learning_rate": 0.01, "loss": 1.9835, "step": 70863 }, { "epoch": 7.283247687564234, "grad_norm": 0.04186839610338211, "learning_rate": 0.01, "loss": 1.9878, "step": 70866 }, { "epoch": 7.2835560123329905, "grad_norm": 0.04863681644201279, "learning_rate": 0.01, "loss": 2.0009, "step": 70869 }, { "epoch": 7.283864337101747, "grad_norm": 0.050969015806913376, "learning_rate": 0.01, "loss": 1.9748, "step": 70872 }, { "epoch": 7.284172661870503, "grad_norm": 0.053215015679597855, "learning_rate": 0.01, "loss": 1.9913, "step": 70875 }, { "epoch": 7.28448098663926, "grad_norm": 0.10984474420547485, "learning_rate": 0.01, "loss": 1.9612, "step": 70878 }, { "epoch": 7.284789311408017, "grad_norm": 0.09052685648202896, "learning_rate": 0.01, "loss": 1.9648, "step": 70881 }, { "epoch": 7.285097636176773, "grad_norm": 0.09505495429039001, "learning_rate": 0.01, "loss": 1.9937, "step": 70884 }, { "epoch": 7.285405960945529, "grad_norm": 0.03760739415884018, "learning_rate": 0.01, "loss": 1.9599, "step": 70887 }, { "epoch": 7.285714285714286, "grad_norm": 0.03946032375097275, "learning_rate": 0.01, "loss": 1.9765, "step": 70890 }, { "epoch": 7.286022610483042, "grad_norm": 0.12385135889053345, "learning_rate": 0.01, "loss": 1.9757, "step": 70893 }, { "epoch": 7.286330935251798, "grad_norm": 0.11148396134376526, "learning_rate": 0.01, "loss": 1.9798, "step": 70896 }, { "epoch": 7.286639260020555, "grad_norm": 0.07253854721784592, "learning_rate": 0.01, "loss": 1.9552, "step": 70899 }, { "epoch": 7.286947584789312, "grad_norm": 0.048602595925331116, "learning_rate": 0.01, "loss": 1.965, "step": 70902 }, { "epoch": 7.287255909558068, "grad_norm": 0.05885033681988716, "learning_rate": 0.01, "loss": 1.9788, "step": 70905 }, { "epoch": 7.287564234326824, "grad_norm": 0.057307545095682144, "learning_rate": 0.01, "loss": 1.9777, "step": 70908 }, { "epoch": 7.287872559095581, "grad_norm": 0.08410783857107162, "learning_rate": 0.01, "loss": 1.9587, "step": 70911 }, { "epoch": 7.288180883864337, "grad_norm": 0.07850154489278793, "learning_rate": 0.01, "loss": 1.9745, "step": 70914 }, { "epoch": 7.288489208633093, "grad_norm": 0.07075075805187225, "learning_rate": 0.01, "loss": 1.971, "step": 70917 }, { "epoch": 7.2887975334018495, "grad_norm": 0.07684222608804703, "learning_rate": 0.01, "loss": 1.9801, "step": 70920 }, { "epoch": 7.289105858170607, "grad_norm": 0.10370174795389175, "learning_rate": 0.01, "loss": 1.9632, "step": 70923 }, { "epoch": 7.289414182939363, "grad_norm": 0.15722966194152832, "learning_rate": 0.01, "loss": 1.974, "step": 70926 }, { "epoch": 7.289722507708119, "grad_norm": 0.08678031712770462, "learning_rate": 0.01, "loss": 1.9737, "step": 70929 }, { "epoch": 7.290030832476876, "grad_norm": 0.046776097267866135, "learning_rate": 0.01, "loss": 2.0017, "step": 70932 }, { "epoch": 7.290339157245632, "grad_norm": 0.09871851652860641, "learning_rate": 0.01, "loss": 1.9658, "step": 70935 }, { "epoch": 7.290647482014388, "grad_norm": 0.03779609501361847, "learning_rate": 0.01, "loss": 1.9565, "step": 70938 }, { "epoch": 7.290955806783145, "grad_norm": 0.10300016403198242, "learning_rate": 0.01, "loss": 1.9701, "step": 70941 }, { "epoch": 7.291264131551902, "grad_norm": 0.051939114928245544, "learning_rate": 0.01, "loss": 1.9736, "step": 70944 }, { "epoch": 7.291572456320658, "grad_norm": 0.08400366455316544, "learning_rate": 0.01, "loss": 1.9955, "step": 70947 }, { "epoch": 7.291880781089414, "grad_norm": 0.04677712917327881, "learning_rate": 0.01, "loss": 1.9761, "step": 70950 }, { "epoch": 7.292189105858171, "grad_norm": 0.07676515728235245, "learning_rate": 0.01, "loss": 1.9632, "step": 70953 }, { "epoch": 7.292497430626927, "grad_norm": 0.0831826701760292, "learning_rate": 0.01, "loss": 1.9665, "step": 70956 }, { "epoch": 7.292805755395683, "grad_norm": 0.10186062008142471, "learning_rate": 0.01, "loss": 1.9636, "step": 70959 }, { "epoch": 7.29311408016444, "grad_norm": 0.04252918064594269, "learning_rate": 0.01, "loss": 1.9807, "step": 70962 }, { "epoch": 7.293422404933196, "grad_norm": 0.09480611979961395, "learning_rate": 0.01, "loss": 1.9599, "step": 70965 }, { "epoch": 7.293730729701953, "grad_norm": 0.0868213027715683, "learning_rate": 0.01, "loss": 1.9604, "step": 70968 }, { "epoch": 7.294039054470709, "grad_norm": 0.0904143899679184, "learning_rate": 0.01, "loss": 1.9905, "step": 70971 }, { "epoch": 7.294347379239466, "grad_norm": 0.05634519085288048, "learning_rate": 0.01, "loss": 1.955, "step": 70974 }, { "epoch": 7.294655704008222, "grad_norm": 0.09237989783287048, "learning_rate": 0.01, "loss": 1.945, "step": 70977 }, { "epoch": 7.294964028776978, "grad_norm": 0.07044427841901779, "learning_rate": 0.01, "loss": 1.9583, "step": 70980 }, { "epoch": 7.295272353545735, "grad_norm": 0.04916350916028023, "learning_rate": 0.01, "loss": 1.9545, "step": 70983 }, { "epoch": 7.295580678314491, "grad_norm": 0.037802912294864655, "learning_rate": 0.01, "loss": 1.9877, "step": 70986 }, { "epoch": 7.295889003083247, "grad_norm": 0.04316788166761398, "learning_rate": 0.01, "loss": 1.9766, "step": 70989 }, { "epoch": 7.2961973278520045, "grad_norm": 0.05099441111087799, "learning_rate": 0.01, "loss": 1.9871, "step": 70992 }, { "epoch": 7.296505652620761, "grad_norm": 0.09012050926685333, "learning_rate": 0.01, "loss": 2.0042, "step": 70995 }, { "epoch": 7.296813977389517, "grad_norm": 0.07649296522140503, "learning_rate": 0.01, "loss": 1.9859, "step": 70998 }, { "epoch": 7.297122302158273, "grad_norm": 0.06650659441947937, "learning_rate": 0.01, "loss": 2.0077, "step": 71001 }, { "epoch": 7.29743062692703, "grad_norm": 0.06413618475198746, "learning_rate": 0.01, "loss": 1.9878, "step": 71004 }, { "epoch": 7.297738951695786, "grad_norm": 0.039578627794981, "learning_rate": 0.01, "loss": 1.9475, "step": 71007 }, { "epoch": 7.298047276464542, "grad_norm": 0.07007674872875214, "learning_rate": 0.01, "loss": 1.9933, "step": 71010 }, { "epoch": 7.298355601233299, "grad_norm": 0.05441688373684883, "learning_rate": 0.01, "loss": 1.9642, "step": 71013 }, { "epoch": 7.298663926002056, "grad_norm": 0.04143676534295082, "learning_rate": 0.01, "loss": 1.9376, "step": 71016 }, { "epoch": 7.298972250770812, "grad_norm": 0.12231336534023285, "learning_rate": 0.01, "loss": 1.9736, "step": 71019 }, { "epoch": 7.299280575539568, "grad_norm": 0.03967202827334404, "learning_rate": 0.01, "loss": 1.9925, "step": 71022 }, { "epoch": 7.299588900308325, "grad_norm": 0.08297855406999588, "learning_rate": 0.01, "loss": 1.9773, "step": 71025 }, { "epoch": 7.299897225077081, "grad_norm": 0.08017514646053314, "learning_rate": 0.01, "loss": 1.9508, "step": 71028 }, { "epoch": 7.300205549845837, "grad_norm": 0.06581740826368332, "learning_rate": 0.01, "loss": 1.9817, "step": 71031 }, { "epoch": 7.300513874614594, "grad_norm": 0.10993166267871857, "learning_rate": 0.01, "loss": 1.9904, "step": 71034 }, { "epoch": 7.300822199383351, "grad_norm": 0.04322252422571182, "learning_rate": 0.01, "loss": 1.9762, "step": 71037 }, { "epoch": 7.301130524152107, "grad_norm": 0.04396703466773033, "learning_rate": 0.01, "loss": 1.9789, "step": 71040 }, { "epoch": 7.3014388489208635, "grad_norm": 0.042626939713954926, "learning_rate": 0.01, "loss": 1.9913, "step": 71043 }, { "epoch": 7.30174717368962, "grad_norm": 0.03424423187971115, "learning_rate": 0.01, "loss": 1.9588, "step": 71046 }, { "epoch": 7.302055498458376, "grad_norm": 0.11951079219579697, "learning_rate": 0.01, "loss": 1.9811, "step": 71049 }, { "epoch": 7.302363823227132, "grad_norm": 0.07407492399215698, "learning_rate": 0.01, "loss": 1.9699, "step": 71052 }, { "epoch": 7.302672147995889, "grad_norm": 0.0694723054766655, "learning_rate": 0.01, "loss": 1.9802, "step": 71055 }, { "epoch": 7.302980472764645, "grad_norm": 0.042367156594991684, "learning_rate": 0.01, "loss": 1.999, "step": 71058 }, { "epoch": 7.303288797533402, "grad_norm": 0.052406225353479385, "learning_rate": 0.01, "loss": 1.9671, "step": 71061 }, { "epoch": 7.3035971223021585, "grad_norm": 0.060722094029188156, "learning_rate": 0.01, "loss": 1.9819, "step": 71064 }, { "epoch": 7.303905447070915, "grad_norm": 0.0947566032409668, "learning_rate": 0.01, "loss": 1.9784, "step": 71067 }, { "epoch": 7.304213771839671, "grad_norm": 0.14418601989746094, "learning_rate": 0.01, "loss": 1.9688, "step": 71070 }, { "epoch": 7.304522096608427, "grad_norm": 0.0899231880903244, "learning_rate": 0.01, "loss": 1.9399, "step": 71073 }, { "epoch": 7.304830421377184, "grad_norm": 0.057492516934871674, "learning_rate": 0.01, "loss": 1.9743, "step": 71076 }, { "epoch": 7.30513874614594, "grad_norm": 0.042988330125808716, "learning_rate": 0.01, "loss": 1.9491, "step": 71079 }, { "epoch": 7.305447070914697, "grad_norm": 0.06894852221012115, "learning_rate": 0.01, "loss": 1.982, "step": 71082 }, { "epoch": 7.305755395683454, "grad_norm": 0.047955472022295, "learning_rate": 0.01, "loss": 1.9565, "step": 71085 }, { "epoch": 7.30606372045221, "grad_norm": 0.0502055399119854, "learning_rate": 0.01, "loss": 1.9518, "step": 71088 }, { "epoch": 7.306372045220966, "grad_norm": 0.042237646877765656, "learning_rate": 0.01, "loss": 1.9566, "step": 71091 }, { "epoch": 7.3066803699897225, "grad_norm": 0.07256586849689484, "learning_rate": 0.01, "loss": 1.9769, "step": 71094 }, { "epoch": 7.306988694758479, "grad_norm": 0.12676836550235748, "learning_rate": 0.01, "loss": 1.9804, "step": 71097 }, { "epoch": 7.307297019527235, "grad_norm": 0.10870865732431412, "learning_rate": 0.01, "loss": 1.9904, "step": 71100 }, { "epoch": 7.307605344295991, "grad_norm": 0.10361792892217636, "learning_rate": 0.01, "loss": 1.9598, "step": 71103 }, { "epoch": 7.307913669064749, "grad_norm": 0.05035272613167763, "learning_rate": 0.01, "loss": 1.9791, "step": 71106 }, { "epoch": 7.308221993833505, "grad_norm": 0.09772878140211105, "learning_rate": 0.01, "loss": 1.958, "step": 71109 }, { "epoch": 7.308530318602261, "grad_norm": 0.05700725317001343, "learning_rate": 0.01, "loss": 1.9465, "step": 71112 }, { "epoch": 7.3088386433710175, "grad_norm": 0.04437270760536194, "learning_rate": 0.01, "loss": 1.9662, "step": 71115 }, { "epoch": 7.309146968139774, "grad_norm": 0.0368645042181015, "learning_rate": 0.01, "loss": 1.9758, "step": 71118 }, { "epoch": 7.30945529290853, "grad_norm": 0.0390106737613678, "learning_rate": 0.01, "loss": 1.9644, "step": 71121 }, { "epoch": 7.309763617677286, "grad_norm": 0.18610909581184387, "learning_rate": 0.01, "loss": 1.9825, "step": 71124 }, { "epoch": 7.310071942446044, "grad_norm": 0.16700612008571625, "learning_rate": 0.01, "loss": 1.9692, "step": 71127 }, { "epoch": 7.3103802672148, "grad_norm": 0.15059486031532288, "learning_rate": 0.01, "loss": 2.0078, "step": 71130 }, { "epoch": 7.310688591983556, "grad_norm": 0.08873619139194489, "learning_rate": 0.01, "loss": 1.9713, "step": 71133 }, { "epoch": 7.310996916752313, "grad_norm": 0.05713324993848801, "learning_rate": 0.01, "loss": 1.9604, "step": 71136 }, { "epoch": 7.311305241521069, "grad_norm": 0.04134386405348778, "learning_rate": 0.01, "loss": 1.9697, "step": 71139 }, { "epoch": 7.311613566289825, "grad_norm": 0.03700621798634529, "learning_rate": 0.01, "loss": 1.9759, "step": 71142 }, { "epoch": 7.3119218910585815, "grad_norm": 0.04931655153632164, "learning_rate": 0.01, "loss": 1.9912, "step": 71145 }, { "epoch": 7.312230215827338, "grad_norm": 0.03286673501133919, "learning_rate": 0.01, "loss": 1.9717, "step": 71148 }, { "epoch": 7.312538540596094, "grad_norm": 0.04770418256521225, "learning_rate": 0.01, "loss": 1.9702, "step": 71151 }, { "epoch": 7.312846865364851, "grad_norm": 0.04040040448307991, "learning_rate": 0.01, "loss": 1.9651, "step": 71154 }, { "epoch": 7.313155190133608, "grad_norm": 0.041751354932785034, "learning_rate": 0.01, "loss": 1.9944, "step": 71157 }, { "epoch": 7.313463514902364, "grad_norm": 0.05756094679236412, "learning_rate": 0.01, "loss": 2.0109, "step": 71160 }, { "epoch": 7.31377183967112, "grad_norm": 0.0506497286260128, "learning_rate": 0.01, "loss": 1.9813, "step": 71163 }, { "epoch": 7.3140801644398765, "grad_norm": 0.11589113622903824, "learning_rate": 0.01, "loss": 1.9893, "step": 71166 }, { "epoch": 7.314388489208633, "grad_norm": 0.042656585574150085, "learning_rate": 0.01, "loss": 1.9667, "step": 71169 }, { "epoch": 7.314696813977389, "grad_norm": 0.09102977067232132, "learning_rate": 0.01, "loss": 1.9528, "step": 71172 }, { "epoch": 7.315005138746146, "grad_norm": 0.07668044418096542, "learning_rate": 0.01, "loss": 1.9644, "step": 71175 }, { "epoch": 7.315313463514903, "grad_norm": 0.1044536828994751, "learning_rate": 0.01, "loss": 1.9974, "step": 71178 }, { "epoch": 7.315621788283659, "grad_norm": 0.03361400589346886, "learning_rate": 0.01, "loss": 1.9731, "step": 71181 }, { "epoch": 7.315930113052415, "grad_norm": 0.036341775208711624, "learning_rate": 0.01, "loss": 1.9709, "step": 71184 }, { "epoch": 7.316238437821172, "grad_norm": 0.06732170283794403, "learning_rate": 0.01, "loss": 1.9882, "step": 71187 }, { "epoch": 7.316546762589928, "grad_norm": 0.056870389729738235, "learning_rate": 0.01, "loss": 1.9713, "step": 71190 }, { "epoch": 7.316855087358684, "grad_norm": 0.05577719956636429, "learning_rate": 0.01, "loss": 1.9871, "step": 71193 }, { "epoch": 7.3171634121274405, "grad_norm": 0.045024048537015915, "learning_rate": 0.01, "loss": 1.9817, "step": 71196 }, { "epoch": 7.317471736896198, "grad_norm": 0.05428456515073776, "learning_rate": 0.01, "loss": 1.953, "step": 71199 }, { "epoch": 7.317780061664954, "grad_norm": 0.0677822083234787, "learning_rate": 0.01, "loss": 1.9547, "step": 71202 }, { "epoch": 7.31808838643371, "grad_norm": 0.10919659584760666, "learning_rate": 0.01, "loss": 1.9506, "step": 71205 }, { "epoch": 7.318396711202467, "grad_norm": 0.0439334474503994, "learning_rate": 0.01, "loss": 1.9684, "step": 71208 }, { "epoch": 7.318705035971223, "grad_norm": 0.04475986957550049, "learning_rate": 0.01, "loss": 1.9696, "step": 71211 }, { "epoch": 7.319013360739979, "grad_norm": 0.04004666581749916, "learning_rate": 0.01, "loss": 1.9715, "step": 71214 }, { "epoch": 7.3193216855087355, "grad_norm": 0.03669333830475807, "learning_rate": 0.01, "loss": 1.9646, "step": 71217 }, { "epoch": 7.319630010277493, "grad_norm": 0.06024492159485817, "learning_rate": 0.01, "loss": 1.9629, "step": 71220 }, { "epoch": 7.319938335046249, "grad_norm": 0.052151869982481, "learning_rate": 0.01, "loss": 1.9739, "step": 71223 }, { "epoch": 7.320246659815005, "grad_norm": 0.04867462441325188, "learning_rate": 0.01, "loss": 1.9677, "step": 71226 }, { "epoch": 7.320554984583762, "grad_norm": 0.13954968750476837, "learning_rate": 0.01, "loss": 2.0095, "step": 71229 }, { "epoch": 7.320863309352518, "grad_norm": 0.07975666970014572, "learning_rate": 0.01, "loss": 2.0028, "step": 71232 }, { "epoch": 7.321171634121274, "grad_norm": 0.06927415728569031, "learning_rate": 0.01, "loss": 1.9721, "step": 71235 }, { "epoch": 7.321479958890031, "grad_norm": 0.05637050047516823, "learning_rate": 0.01, "loss": 1.9822, "step": 71238 }, { "epoch": 7.321788283658787, "grad_norm": 0.037037819623947144, "learning_rate": 0.01, "loss": 1.9774, "step": 71241 }, { "epoch": 7.322096608427544, "grad_norm": 0.03341764956712723, "learning_rate": 0.01, "loss": 1.9741, "step": 71244 }, { "epoch": 7.3224049331963, "grad_norm": 0.11337383091449738, "learning_rate": 0.01, "loss": 1.9677, "step": 71247 }, { "epoch": 7.322713257965057, "grad_norm": 0.05702521279454231, "learning_rate": 0.01, "loss": 1.9655, "step": 71250 }, { "epoch": 7.323021582733813, "grad_norm": 0.06875944882631302, "learning_rate": 0.01, "loss": 1.9905, "step": 71253 }, { "epoch": 7.323329907502569, "grad_norm": 0.09147006273269653, "learning_rate": 0.01, "loss": 1.9826, "step": 71256 }, { "epoch": 7.323638232271326, "grad_norm": 0.07072997838258743, "learning_rate": 0.01, "loss": 1.9762, "step": 71259 }, { "epoch": 7.323946557040082, "grad_norm": 0.08873667567968369, "learning_rate": 0.01, "loss": 1.967, "step": 71262 }, { "epoch": 7.324254881808839, "grad_norm": 0.06994672864675522, "learning_rate": 0.01, "loss": 1.9583, "step": 71265 }, { "epoch": 7.324563206577595, "grad_norm": 0.06470712274312973, "learning_rate": 0.01, "loss": 1.9661, "step": 71268 }, { "epoch": 7.324871531346352, "grad_norm": 0.07046987116336823, "learning_rate": 0.01, "loss": 1.9615, "step": 71271 }, { "epoch": 7.325179856115108, "grad_norm": 0.07113350182771683, "learning_rate": 0.01, "loss": 1.9791, "step": 71274 }, { "epoch": 7.325488180883864, "grad_norm": 0.07272438704967499, "learning_rate": 0.01, "loss": 1.9981, "step": 71277 }, { "epoch": 7.325796505652621, "grad_norm": 0.14653034508228302, "learning_rate": 0.01, "loss": 1.9831, "step": 71280 }, { "epoch": 7.326104830421377, "grad_norm": 0.0946449339389801, "learning_rate": 0.01, "loss": 1.9766, "step": 71283 }, { "epoch": 7.326413155190133, "grad_norm": 0.041181258857250214, "learning_rate": 0.01, "loss": 1.9871, "step": 71286 }, { "epoch": 7.32672147995889, "grad_norm": 0.049023959785699844, "learning_rate": 0.01, "loss": 1.9592, "step": 71289 }, { "epoch": 7.327029804727647, "grad_norm": 0.03693239763379097, "learning_rate": 0.01, "loss": 1.9956, "step": 71292 }, { "epoch": 7.327338129496403, "grad_norm": 0.03846334293484688, "learning_rate": 0.01, "loss": 1.989, "step": 71295 }, { "epoch": 7.327646454265159, "grad_norm": 0.061339206993579865, "learning_rate": 0.01, "loss": 1.9696, "step": 71298 }, { "epoch": 7.327954779033916, "grad_norm": 0.056017693132162094, "learning_rate": 0.01, "loss": 1.9807, "step": 71301 }, { "epoch": 7.328263103802672, "grad_norm": 0.046218231320381165, "learning_rate": 0.01, "loss": 1.9777, "step": 71304 }, { "epoch": 7.328571428571428, "grad_norm": 0.04835920408368111, "learning_rate": 0.01, "loss": 1.9642, "step": 71307 }, { "epoch": 7.328879753340185, "grad_norm": 0.035025544464588165, "learning_rate": 0.01, "loss": 1.9478, "step": 71310 }, { "epoch": 7.329188078108942, "grad_norm": 0.048764798790216446, "learning_rate": 0.01, "loss": 1.991, "step": 71313 }, { "epoch": 7.329496402877698, "grad_norm": 0.05903776362538338, "learning_rate": 0.01, "loss": 1.9718, "step": 71316 }, { "epoch": 7.329804727646454, "grad_norm": 0.05478388071060181, "learning_rate": 0.01, "loss": 1.9863, "step": 71319 }, { "epoch": 7.330113052415211, "grad_norm": 0.1303192377090454, "learning_rate": 0.01, "loss": 1.9541, "step": 71322 }, { "epoch": 7.330421377183967, "grad_norm": 0.10789795964956284, "learning_rate": 0.01, "loss": 1.9923, "step": 71325 }, { "epoch": 7.330729701952723, "grad_norm": 0.07266957312822342, "learning_rate": 0.01, "loss": 1.9828, "step": 71328 }, { "epoch": 7.33103802672148, "grad_norm": 0.043115101754665375, "learning_rate": 0.01, "loss": 1.9765, "step": 71331 }, { "epoch": 7.331346351490236, "grad_norm": 0.04297550767660141, "learning_rate": 0.01, "loss": 1.9643, "step": 71334 }, { "epoch": 7.331654676258993, "grad_norm": 0.1441577821969986, "learning_rate": 0.01, "loss": 1.9488, "step": 71337 }, { "epoch": 7.3319630010277494, "grad_norm": 0.04849548637866974, "learning_rate": 0.01, "loss": 1.9896, "step": 71340 }, { "epoch": 7.332271325796506, "grad_norm": 0.03402062505483627, "learning_rate": 0.01, "loss": 1.998, "step": 71343 }, { "epoch": 7.332579650565262, "grad_norm": 0.03585872799158096, "learning_rate": 0.01, "loss": 1.9636, "step": 71346 }, { "epoch": 7.332887975334018, "grad_norm": 0.07403067499399185, "learning_rate": 0.01, "loss": 1.99, "step": 71349 }, { "epoch": 7.333196300102775, "grad_norm": 0.05098595842719078, "learning_rate": 0.01, "loss": 1.9666, "step": 71352 }, { "epoch": 7.333504624871531, "grad_norm": 0.06518220156431198, "learning_rate": 0.01, "loss": 1.9746, "step": 71355 }, { "epoch": 7.333812949640288, "grad_norm": 0.04573628678917885, "learning_rate": 0.01, "loss": 1.9544, "step": 71358 }, { "epoch": 7.3341212744090445, "grad_norm": 0.07890237867832184, "learning_rate": 0.01, "loss": 1.9779, "step": 71361 }, { "epoch": 7.334429599177801, "grad_norm": 0.09591496735811234, "learning_rate": 0.01, "loss": 1.9793, "step": 71364 }, { "epoch": 7.334737923946557, "grad_norm": 0.13394559919834137, "learning_rate": 0.01, "loss": 1.9665, "step": 71367 }, { "epoch": 7.335046248715313, "grad_norm": 0.10109490901231766, "learning_rate": 0.01, "loss": 1.9822, "step": 71370 }, { "epoch": 7.33535457348407, "grad_norm": 0.09080103784799576, "learning_rate": 0.01, "loss": 1.9618, "step": 71373 }, { "epoch": 7.335662898252826, "grad_norm": 0.09327443689107895, "learning_rate": 0.01, "loss": 1.967, "step": 71376 }, { "epoch": 7.335971223021582, "grad_norm": 0.05303800478577614, "learning_rate": 0.01, "loss": 1.9694, "step": 71379 }, { "epoch": 7.3362795477903395, "grad_norm": 0.04416405409574509, "learning_rate": 0.01, "loss": 1.9758, "step": 71382 }, { "epoch": 7.336587872559096, "grad_norm": 0.031716201454401016, "learning_rate": 0.01, "loss": 1.9407, "step": 71385 }, { "epoch": 7.336896197327852, "grad_norm": 0.045393042266368866, "learning_rate": 0.01, "loss": 1.9502, "step": 71388 }, { "epoch": 7.3372045220966084, "grad_norm": 0.09052672982215881, "learning_rate": 0.01, "loss": 1.9949, "step": 71391 }, { "epoch": 7.337512846865365, "grad_norm": 0.046372365206480026, "learning_rate": 0.01, "loss": 1.9569, "step": 71394 }, { "epoch": 7.337821171634121, "grad_norm": 0.037272438406944275, "learning_rate": 0.01, "loss": 1.9836, "step": 71397 }, { "epoch": 7.338129496402877, "grad_norm": 0.10809680074453354, "learning_rate": 0.01, "loss": 1.9743, "step": 71400 }, { "epoch": 7.338437821171635, "grad_norm": 0.15810643136501312, "learning_rate": 0.01, "loss": 1.9703, "step": 71403 }, { "epoch": 7.338746145940391, "grad_norm": 0.13274040818214417, "learning_rate": 0.01, "loss": 1.9783, "step": 71406 }, { "epoch": 7.339054470709147, "grad_norm": 0.09903276711702347, "learning_rate": 0.01, "loss": 1.9656, "step": 71409 }, { "epoch": 7.3393627954779035, "grad_norm": 0.06785092502832413, "learning_rate": 0.01, "loss": 1.9448, "step": 71412 }, { "epoch": 7.33967112024666, "grad_norm": 0.044940706342458725, "learning_rate": 0.01, "loss": 1.9551, "step": 71415 }, { "epoch": 7.339979445015416, "grad_norm": 0.04073977470397949, "learning_rate": 0.01, "loss": 1.9565, "step": 71418 }, { "epoch": 7.340287769784172, "grad_norm": 0.03847421333193779, "learning_rate": 0.01, "loss": 1.9783, "step": 71421 }, { "epoch": 7.340596094552929, "grad_norm": 0.04725421220064163, "learning_rate": 0.01, "loss": 1.9617, "step": 71424 }, { "epoch": 7.340904419321686, "grad_norm": 0.05868101492524147, "learning_rate": 0.01, "loss": 1.9856, "step": 71427 }, { "epoch": 7.341212744090442, "grad_norm": 0.06789273023605347, "learning_rate": 0.01, "loss": 1.9981, "step": 71430 }, { "epoch": 7.3415210688591985, "grad_norm": 0.09006938338279724, "learning_rate": 0.01, "loss": 1.9455, "step": 71433 }, { "epoch": 7.341829393627955, "grad_norm": 0.05754098296165466, "learning_rate": 0.01, "loss": 1.9613, "step": 71436 }, { "epoch": 7.342137718396711, "grad_norm": 0.03706538304686546, "learning_rate": 0.01, "loss": 1.9569, "step": 71439 }, { "epoch": 7.3424460431654675, "grad_norm": 0.04427531361579895, "learning_rate": 0.01, "loss": 1.9654, "step": 71442 }, { "epoch": 7.342754367934224, "grad_norm": 0.0534258633852005, "learning_rate": 0.01, "loss": 1.9582, "step": 71445 }, { "epoch": 7.34306269270298, "grad_norm": 0.0816885381937027, "learning_rate": 0.01, "loss": 1.9931, "step": 71448 }, { "epoch": 7.343371017471737, "grad_norm": 0.09698469191789627, "learning_rate": 0.01, "loss": 1.9762, "step": 71451 }, { "epoch": 7.343679342240494, "grad_norm": 0.043008897453546524, "learning_rate": 0.01, "loss": 1.9827, "step": 71454 }, { "epoch": 7.34398766700925, "grad_norm": 0.1080782413482666, "learning_rate": 0.01, "loss": 1.9828, "step": 71457 }, { "epoch": 7.344295991778006, "grad_norm": 0.06114106625318527, "learning_rate": 0.01, "loss": 1.9422, "step": 71460 }, { "epoch": 7.3446043165467625, "grad_norm": 0.08944262564182281, "learning_rate": 0.01, "loss": 1.9367, "step": 71463 }, { "epoch": 7.344912641315519, "grad_norm": 0.08879029750823975, "learning_rate": 0.01, "loss": 1.9766, "step": 71466 }, { "epoch": 7.345220966084275, "grad_norm": 0.09932418167591095, "learning_rate": 0.01, "loss": 1.9699, "step": 71469 }, { "epoch": 7.345529290853031, "grad_norm": 0.050661344081163406, "learning_rate": 0.01, "loss": 1.9641, "step": 71472 }, { "epoch": 7.345837615621789, "grad_norm": 0.04675713926553726, "learning_rate": 0.01, "loss": 1.9806, "step": 71475 }, { "epoch": 7.346145940390545, "grad_norm": 0.04077441617846489, "learning_rate": 0.01, "loss": 1.975, "step": 71478 }, { "epoch": 7.346454265159301, "grad_norm": 0.04659559577703476, "learning_rate": 0.01, "loss": 1.9641, "step": 71481 }, { "epoch": 7.3467625899280575, "grad_norm": 0.09812557697296143, "learning_rate": 0.01, "loss": 1.9776, "step": 71484 }, { "epoch": 7.347070914696814, "grad_norm": 0.03201153874397278, "learning_rate": 0.01, "loss": 1.9826, "step": 71487 }, { "epoch": 7.34737923946557, "grad_norm": 0.12122839689254761, "learning_rate": 0.01, "loss": 1.9794, "step": 71490 }, { "epoch": 7.3476875642343265, "grad_norm": 0.11808911710977554, "learning_rate": 0.01, "loss": 1.9652, "step": 71493 }, { "epoch": 7.347995889003084, "grad_norm": 0.06771431118249893, "learning_rate": 0.01, "loss": 1.9831, "step": 71496 }, { "epoch": 7.34830421377184, "grad_norm": 0.07350712269544601, "learning_rate": 0.01, "loss": 1.9684, "step": 71499 }, { "epoch": 7.348612538540596, "grad_norm": 0.043973151594400406, "learning_rate": 0.01, "loss": 1.9729, "step": 71502 }, { "epoch": 7.348920863309353, "grad_norm": 0.03686361759901047, "learning_rate": 0.01, "loss": 1.9859, "step": 71505 }, { "epoch": 7.349229188078109, "grad_norm": 0.07905039191246033, "learning_rate": 0.01, "loss": 1.9651, "step": 71508 }, { "epoch": 7.349537512846865, "grad_norm": 0.05678734928369522, "learning_rate": 0.01, "loss": 1.9825, "step": 71511 }, { "epoch": 7.3498458376156215, "grad_norm": 0.10984398424625397, "learning_rate": 0.01, "loss": 1.9733, "step": 71514 }, { "epoch": 7.350154162384378, "grad_norm": 0.11405175179243088, "learning_rate": 0.01, "loss": 1.9807, "step": 71517 }, { "epoch": 7.350462487153135, "grad_norm": 0.04801741987466812, "learning_rate": 0.01, "loss": 1.9724, "step": 71520 }, { "epoch": 7.350770811921891, "grad_norm": 0.04515586793422699, "learning_rate": 0.01, "loss": 1.9765, "step": 71523 }, { "epoch": 7.351079136690648, "grad_norm": 0.06455957144498825, "learning_rate": 0.01, "loss": 1.9742, "step": 71526 }, { "epoch": 7.351387461459404, "grad_norm": 0.032415423542261124, "learning_rate": 0.01, "loss": 1.977, "step": 71529 }, { "epoch": 7.35169578622816, "grad_norm": 0.08034674823284149, "learning_rate": 0.01, "loss": 1.9948, "step": 71532 }, { "epoch": 7.3520041109969165, "grad_norm": 0.08528207242488861, "learning_rate": 0.01, "loss": 1.9709, "step": 71535 }, { "epoch": 7.352312435765673, "grad_norm": 0.04610508680343628, "learning_rate": 0.01, "loss": 1.9937, "step": 71538 }, { "epoch": 7.35262076053443, "grad_norm": 0.09876418858766556, "learning_rate": 0.01, "loss": 1.9726, "step": 71541 }, { "epoch": 7.352929085303186, "grad_norm": 0.04421771690249443, "learning_rate": 0.01, "loss": 1.9937, "step": 71544 }, { "epoch": 7.353237410071943, "grad_norm": 0.09357340633869171, "learning_rate": 0.01, "loss": 1.9725, "step": 71547 }, { "epoch": 7.353545734840699, "grad_norm": 0.07575297355651855, "learning_rate": 0.01, "loss": 1.9975, "step": 71550 }, { "epoch": 7.353854059609455, "grad_norm": 0.07043871283531189, "learning_rate": 0.01, "loss": 1.9864, "step": 71553 }, { "epoch": 7.354162384378212, "grad_norm": 0.08759008347988129, "learning_rate": 0.01, "loss": 1.9678, "step": 71556 }, { "epoch": 7.354470709146968, "grad_norm": 0.07198178768157959, "learning_rate": 0.01, "loss": 1.9822, "step": 71559 }, { "epoch": 7.354779033915724, "grad_norm": 0.039580024778842926, "learning_rate": 0.01, "loss": 1.9764, "step": 71562 }, { "epoch": 7.355087358684481, "grad_norm": 0.13564977049827576, "learning_rate": 0.01, "loss": 1.9681, "step": 71565 }, { "epoch": 7.355395683453238, "grad_norm": 0.07854889333248138, "learning_rate": 0.01, "loss": 1.941, "step": 71568 }, { "epoch": 7.355704008221994, "grad_norm": 0.057042330503463745, "learning_rate": 0.01, "loss": 1.9599, "step": 71571 }, { "epoch": 7.35601233299075, "grad_norm": 0.15226583182811737, "learning_rate": 0.01, "loss": 1.9563, "step": 71574 }, { "epoch": 7.356320657759507, "grad_norm": 0.07464998960494995, "learning_rate": 0.01, "loss": 1.9806, "step": 71577 }, { "epoch": 7.356628982528263, "grad_norm": 0.04508376121520996, "learning_rate": 0.01, "loss": 1.9572, "step": 71580 }, { "epoch": 7.356937307297019, "grad_norm": 0.03513191640377045, "learning_rate": 0.01, "loss": 1.9536, "step": 71583 }, { "epoch": 7.357245632065776, "grad_norm": 0.048798494040966034, "learning_rate": 0.01, "loss": 1.9597, "step": 71586 }, { "epoch": 7.357553956834533, "grad_norm": 0.053385645151138306, "learning_rate": 0.01, "loss": 1.9597, "step": 71589 }, { "epoch": 7.357862281603289, "grad_norm": 0.0522787980735302, "learning_rate": 0.01, "loss": 1.9684, "step": 71592 }, { "epoch": 7.358170606372045, "grad_norm": 0.07104373723268509, "learning_rate": 0.01, "loss": 1.9803, "step": 71595 }, { "epoch": 7.358478931140802, "grad_norm": 0.08457489311695099, "learning_rate": 0.01, "loss": 1.983, "step": 71598 }, { "epoch": 7.358787255909558, "grad_norm": 0.07561776041984558, "learning_rate": 0.01, "loss": 1.9959, "step": 71601 }, { "epoch": 7.359095580678314, "grad_norm": 0.07273251563310623, "learning_rate": 0.01, "loss": 1.9514, "step": 71604 }, { "epoch": 7.359403905447071, "grad_norm": 0.14364686608314514, "learning_rate": 0.01, "loss": 1.9789, "step": 71607 }, { "epoch": 7.359712230215827, "grad_norm": 0.06388013809919357, "learning_rate": 0.01, "loss": 1.9834, "step": 71610 }, { "epoch": 7.360020554984584, "grad_norm": 0.03216857835650444, "learning_rate": 0.01, "loss": 1.9629, "step": 71613 }, { "epoch": 7.36032887975334, "grad_norm": 0.05028613656759262, "learning_rate": 0.01, "loss": 1.989, "step": 71616 }, { "epoch": 7.360637204522097, "grad_norm": 0.06463950127363205, "learning_rate": 0.01, "loss": 2.0118, "step": 71619 }, { "epoch": 7.360945529290853, "grad_norm": 0.05385703966021538, "learning_rate": 0.01, "loss": 1.9619, "step": 71622 }, { "epoch": 7.361253854059609, "grad_norm": 0.05802121013402939, "learning_rate": 0.01, "loss": 1.9888, "step": 71625 }, { "epoch": 7.361562178828366, "grad_norm": 0.045689478516578674, "learning_rate": 0.01, "loss": 2.001, "step": 71628 }, { "epoch": 7.361870503597122, "grad_norm": 0.039995331317186356, "learning_rate": 0.01, "loss": 1.9995, "step": 71631 }, { "epoch": 7.362178828365879, "grad_norm": 0.11462247371673584, "learning_rate": 0.01, "loss": 1.957, "step": 71634 }, { "epoch": 7.362487153134635, "grad_norm": 0.0777439996600151, "learning_rate": 0.01, "loss": 1.9821, "step": 71637 }, { "epoch": 7.362795477903392, "grad_norm": 0.045238759368658066, "learning_rate": 0.01, "loss": 1.9686, "step": 71640 }, { "epoch": 7.363103802672148, "grad_norm": 0.02978505939245224, "learning_rate": 0.01, "loss": 2.0006, "step": 71643 }, { "epoch": 7.363412127440904, "grad_norm": 0.058491241186857224, "learning_rate": 0.01, "loss": 1.9703, "step": 71646 }, { "epoch": 7.363720452209661, "grad_norm": 0.037735093384981155, "learning_rate": 0.01, "loss": 1.95, "step": 71649 }, { "epoch": 7.364028776978417, "grad_norm": 0.030737249180674553, "learning_rate": 0.01, "loss": 1.9954, "step": 71652 }, { "epoch": 7.364337101747173, "grad_norm": 0.03551234304904938, "learning_rate": 0.01, "loss": 1.9767, "step": 71655 }, { "epoch": 7.3646454265159305, "grad_norm": 0.03984438255429268, "learning_rate": 0.01, "loss": 1.9837, "step": 71658 }, { "epoch": 7.364953751284687, "grad_norm": 0.16038914024829865, "learning_rate": 0.01, "loss": 1.9578, "step": 71661 }, { "epoch": 7.365262076053443, "grad_norm": 0.04318477585911751, "learning_rate": 0.01, "loss": 1.963, "step": 71664 }, { "epoch": 7.365570400822199, "grad_norm": 0.04927264153957367, "learning_rate": 0.01, "loss": 1.9618, "step": 71667 }, { "epoch": 7.365878725590956, "grad_norm": 0.052578482776880264, "learning_rate": 0.01, "loss": 1.9688, "step": 71670 }, { "epoch": 7.366187050359712, "grad_norm": 0.04304051399230957, "learning_rate": 0.01, "loss": 1.9801, "step": 71673 }, { "epoch": 7.366495375128468, "grad_norm": 0.11516401916742325, "learning_rate": 0.01, "loss": 1.9807, "step": 71676 }, { "epoch": 7.3668036998972255, "grad_norm": 0.07391729950904846, "learning_rate": 0.01, "loss": 1.9724, "step": 71679 }, { "epoch": 7.367112024665982, "grad_norm": 0.04147100821137428, "learning_rate": 0.01, "loss": 1.9786, "step": 71682 }, { "epoch": 7.367420349434738, "grad_norm": 0.0550803616642952, "learning_rate": 0.01, "loss": 1.9597, "step": 71685 }, { "epoch": 7.367728674203494, "grad_norm": 0.06658782064914703, "learning_rate": 0.01, "loss": 1.9748, "step": 71688 }, { "epoch": 7.368036998972251, "grad_norm": 0.07493577152490616, "learning_rate": 0.01, "loss": 1.9681, "step": 71691 }, { "epoch": 7.368345323741007, "grad_norm": 0.06494047492742538, "learning_rate": 0.01, "loss": 1.9648, "step": 71694 }, { "epoch": 7.368653648509763, "grad_norm": 0.044212259352207184, "learning_rate": 0.01, "loss": 1.977, "step": 71697 }, { "epoch": 7.36896197327852, "grad_norm": 0.04893089085817337, "learning_rate": 0.01, "loss": 1.9582, "step": 71700 }, { "epoch": 7.369270298047277, "grad_norm": 0.06412500143051147, "learning_rate": 0.01, "loss": 1.9721, "step": 71703 }, { "epoch": 7.369578622816033, "grad_norm": 0.11510922014713287, "learning_rate": 0.01, "loss": 1.9696, "step": 71706 }, { "epoch": 7.3698869475847895, "grad_norm": 0.06492109596729279, "learning_rate": 0.01, "loss": 1.9627, "step": 71709 }, { "epoch": 7.370195272353546, "grad_norm": 0.07777494192123413, "learning_rate": 0.01, "loss": 1.9891, "step": 71712 }, { "epoch": 7.370503597122302, "grad_norm": 0.06416970491409302, "learning_rate": 0.01, "loss": 1.9724, "step": 71715 }, { "epoch": 7.370811921891058, "grad_norm": 0.11529435962438583, "learning_rate": 0.01, "loss": 1.9571, "step": 71718 }, { "epoch": 7.371120246659815, "grad_norm": 0.033961158245801926, "learning_rate": 0.01, "loss": 1.9622, "step": 71721 }, { "epoch": 7.371428571428572, "grad_norm": 0.09541164338588715, "learning_rate": 0.01, "loss": 1.9745, "step": 71724 }, { "epoch": 7.371736896197328, "grad_norm": 0.05110830441117287, "learning_rate": 0.01, "loss": 1.9636, "step": 71727 }, { "epoch": 7.3720452209660845, "grad_norm": 0.08816475421190262, "learning_rate": 0.01, "loss": 1.955, "step": 71730 }, { "epoch": 7.372353545734841, "grad_norm": 0.11616508662700653, "learning_rate": 0.01, "loss": 1.9638, "step": 71733 }, { "epoch": 7.372661870503597, "grad_norm": 0.06593319773674011, "learning_rate": 0.01, "loss": 1.9871, "step": 71736 }, { "epoch": 7.372970195272353, "grad_norm": 0.08161871880292892, "learning_rate": 0.01, "loss": 1.9637, "step": 71739 }, { "epoch": 7.37327852004111, "grad_norm": 0.04027361795306206, "learning_rate": 0.01, "loss": 1.972, "step": 71742 }, { "epoch": 7.373586844809866, "grad_norm": 0.04333282262086868, "learning_rate": 0.01, "loss": 1.9982, "step": 71745 }, { "epoch": 7.373895169578623, "grad_norm": 0.04270986467599869, "learning_rate": 0.01, "loss": 1.9517, "step": 71748 }, { "epoch": 7.3742034943473795, "grad_norm": 0.09752412140369415, "learning_rate": 0.01, "loss": 1.9792, "step": 71751 }, { "epoch": 7.374511819116136, "grad_norm": 0.06835508346557617, "learning_rate": 0.01, "loss": 1.9866, "step": 71754 }, { "epoch": 7.374820143884892, "grad_norm": 0.07079548388719559, "learning_rate": 0.01, "loss": 1.9515, "step": 71757 }, { "epoch": 7.3751284686536485, "grad_norm": 0.06948207318782806, "learning_rate": 0.01, "loss": 1.9753, "step": 71760 }, { "epoch": 7.375436793422405, "grad_norm": 0.03838725760579109, "learning_rate": 0.01, "loss": 1.9758, "step": 71763 }, { "epoch": 7.375745118191161, "grad_norm": 0.07975951582193375, "learning_rate": 0.01, "loss": 1.9581, "step": 71766 }, { "epoch": 7.376053442959917, "grad_norm": 0.06829675287008286, "learning_rate": 0.01, "loss": 2.0081, "step": 71769 }, { "epoch": 7.376361767728675, "grad_norm": 0.0858384519815445, "learning_rate": 0.01, "loss": 1.9879, "step": 71772 }, { "epoch": 7.376670092497431, "grad_norm": 0.037788160145282745, "learning_rate": 0.01, "loss": 1.9734, "step": 71775 }, { "epoch": 7.376978417266187, "grad_norm": 0.040428534150123596, "learning_rate": 0.01, "loss": 1.9702, "step": 71778 }, { "epoch": 7.3772867420349435, "grad_norm": 0.041471801698207855, "learning_rate": 0.01, "loss": 1.9968, "step": 71781 }, { "epoch": 7.3775950668037, "grad_norm": 0.06048744544386864, "learning_rate": 0.01, "loss": 1.9926, "step": 71784 }, { "epoch": 7.377903391572456, "grad_norm": 0.0954107865691185, "learning_rate": 0.01, "loss": 1.9658, "step": 71787 }, { "epoch": 7.378211716341212, "grad_norm": 0.11208479851484299, "learning_rate": 0.01, "loss": 1.9868, "step": 71790 }, { "epoch": 7.378520041109969, "grad_norm": 0.037479761987924576, "learning_rate": 0.01, "loss": 1.9715, "step": 71793 }, { "epoch": 7.378828365878726, "grad_norm": 0.047237321734428406, "learning_rate": 0.01, "loss": 1.966, "step": 71796 }, { "epoch": 7.379136690647482, "grad_norm": 0.05881095305085182, "learning_rate": 0.01, "loss": 1.9857, "step": 71799 }, { "epoch": 7.3794450154162385, "grad_norm": 0.054790887981653214, "learning_rate": 0.01, "loss": 2.01, "step": 71802 }, { "epoch": 7.379753340184995, "grad_norm": 0.0610164999961853, "learning_rate": 0.01, "loss": 1.9849, "step": 71805 }, { "epoch": 7.380061664953751, "grad_norm": 0.04166460409760475, "learning_rate": 0.01, "loss": 1.9654, "step": 71808 }, { "epoch": 7.3803699897225075, "grad_norm": 0.058832067996263504, "learning_rate": 0.01, "loss": 1.9943, "step": 71811 }, { "epoch": 7.380678314491264, "grad_norm": 0.04992865025997162, "learning_rate": 0.01, "loss": 1.9833, "step": 71814 }, { "epoch": 7.380986639260021, "grad_norm": 0.11985593289136887, "learning_rate": 0.01, "loss": 1.9596, "step": 71817 }, { "epoch": 7.381294964028777, "grad_norm": 0.048121824860572815, "learning_rate": 0.01, "loss": 1.9733, "step": 71820 }, { "epoch": 7.381603288797534, "grad_norm": 0.0449887290596962, "learning_rate": 0.01, "loss": 1.9771, "step": 71823 }, { "epoch": 7.38191161356629, "grad_norm": 0.08304189890623093, "learning_rate": 0.01, "loss": 1.9677, "step": 71826 }, { "epoch": 7.382219938335046, "grad_norm": 0.06549306958913803, "learning_rate": 0.01, "loss": 1.9673, "step": 71829 }, { "epoch": 7.3825282631038025, "grad_norm": 0.05262019485235214, "learning_rate": 0.01, "loss": 1.9693, "step": 71832 }, { "epoch": 7.382836587872559, "grad_norm": 0.04085802286863327, "learning_rate": 0.01, "loss": 1.997, "step": 71835 }, { "epoch": 7.383144912641315, "grad_norm": 0.03672410920262337, "learning_rate": 0.01, "loss": 1.9734, "step": 71838 }, { "epoch": 7.383453237410072, "grad_norm": 0.04606030136346817, "learning_rate": 0.01, "loss": 1.9685, "step": 71841 }, { "epoch": 7.383761562178829, "grad_norm": 0.11922565847635269, "learning_rate": 0.01, "loss": 1.9698, "step": 71844 }, { "epoch": 7.384069886947585, "grad_norm": 0.060678258538246155, "learning_rate": 0.01, "loss": 1.9741, "step": 71847 }, { "epoch": 7.384378211716341, "grad_norm": 0.11298535019159317, "learning_rate": 0.01, "loss": 1.9886, "step": 71850 }, { "epoch": 7.3846865364850975, "grad_norm": 0.03260503709316254, "learning_rate": 0.01, "loss": 1.9787, "step": 71853 }, { "epoch": 7.384994861253854, "grad_norm": 0.044581152498722076, "learning_rate": 0.01, "loss": 1.9794, "step": 71856 }, { "epoch": 7.38530318602261, "grad_norm": 0.044959817081689835, "learning_rate": 0.01, "loss": 1.9444, "step": 71859 }, { "epoch": 7.385611510791367, "grad_norm": 0.04784368351101875, "learning_rate": 0.01, "loss": 1.9784, "step": 71862 }, { "epoch": 7.385919835560124, "grad_norm": 0.08526644110679626, "learning_rate": 0.01, "loss": 1.9821, "step": 71865 }, { "epoch": 7.38622816032888, "grad_norm": 0.057515472173690796, "learning_rate": 0.01, "loss": 1.9605, "step": 71868 }, { "epoch": 7.386536485097636, "grad_norm": 0.04409664124250412, "learning_rate": 0.01, "loss": 1.9771, "step": 71871 }, { "epoch": 7.386844809866393, "grad_norm": 0.04344470053911209, "learning_rate": 0.01, "loss": 1.9728, "step": 71874 }, { "epoch": 7.387153134635149, "grad_norm": 0.03933620825409889, "learning_rate": 0.01, "loss": 1.9926, "step": 71877 }, { "epoch": 7.387461459403905, "grad_norm": 0.1177898719906807, "learning_rate": 0.01, "loss": 2.0005, "step": 71880 }, { "epoch": 7.3877697841726615, "grad_norm": 0.045402709394693375, "learning_rate": 0.01, "loss": 1.9596, "step": 71883 }, { "epoch": 7.388078108941419, "grad_norm": 0.05361394211649895, "learning_rate": 0.01, "loss": 1.9905, "step": 71886 }, { "epoch": 7.388386433710175, "grad_norm": 0.05513688176870346, "learning_rate": 0.01, "loss": 1.9832, "step": 71889 }, { "epoch": 7.388694758478931, "grad_norm": 0.03191964328289032, "learning_rate": 0.01, "loss": 1.9598, "step": 71892 }, { "epoch": 7.389003083247688, "grad_norm": 0.05432026833295822, "learning_rate": 0.01, "loss": 1.962, "step": 71895 }, { "epoch": 7.389311408016444, "grad_norm": 0.06853076070547104, "learning_rate": 0.01, "loss": 1.9895, "step": 71898 }, { "epoch": 7.3896197327852, "grad_norm": 0.07504719495773315, "learning_rate": 0.01, "loss": 1.9929, "step": 71901 }, { "epoch": 7.3899280575539565, "grad_norm": 0.08959703892469406, "learning_rate": 0.01, "loss": 1.9792, "step": 71904 }, { "epoch": 7.390236382322713, "grad_norm": 0.04118792340159416, "learning_rate": 0.01, "loss": 1.9692, "step": 71907 }, { "epoch": 7.39054470709147, "grad_norm": 0.06344828754663467, "learning_rate": 0.01, "loss": 1.9844, "step": 71910 }, { "epoch": 7.390853031860226, "grad_norm": 0.03989424929022789, "learning_rate": 0.01, "loss": 1.9669, "step": 71913 }, { "epoch": 7.391161356628983, "grad_norm": 0.07987693697214127, "learning_rate": 0.01, "loss": 1.9749, "step": 71916 }, { "epoch": 7.391469681397739, "grad_norm": 0.09878303110599518, "learning_rate": 0.01, "loss": 1.965, "step": 71919 }, { "epoch": 7.391778006166495, "grad_norm": 0.15211869776248932, "learning_rate": 0.01, "loss": 1.9902, "step": 71922 }, { "epoch": 7.392086330935252, "grad_norm": 0.09302935749292374, "learning_rate": 0.01, "loss": 1.9531, "step": 71925 }, { "epoch": 7.392394655704008, "grad_norm": 0.08965945243835449, "learning_rate": 0.01, "loss": 1.9664, "step": 71928 }, { "epoch": 7.392702980472764, "grad_norm": 0.059964247047901154, "learning_rate": 0.01, "loss": 1.9591, "step": 71931 }, { "epoch": 7.393011305241521, "grad_norm": 0.03914433345198631, "learning_rate": 0.01, "loss": 1.9721, "step": 71934 }, { "epoch": 7.393319630010278, "grad_norm": 0.033087339252233505, "learning_rate": 0.01, "loss": 1.9747, "step": 71937 }, { "epoch": 7.393627954779034, "grad_norm": 0.03930596262216568, "learning_rate": 0.01, "loss": 1.9392, "step": 71940 }, { "epoch": 7.39393627954779, "grad_norm": 0.1557454764842987, "learning_rate": 0.01, "loss": 1.9986, "step": 71943 }, { "epoch": 7.394244604316547, "grad_norm": 0.12098176777362823, "learning_rate": 0.01, "loss": 1.9786, "step": 71946 }, { "epoch": 7.394552929085303, "grad_norm": 0.062015559524297714, "learning_rate": 0.01, "loss": 1.9976, "step": 71949 }, { "epoch": 7.394861253854059, "grad_norm": 0.051067329943180084, "learning_rate": 0.01, "loss": 2.0197, "step": 71952 }, { "epoch": 7.395169578622816, "grad_norm": 0.05703721195459366, "learning_rate": 0.01, "loss": 1.9605, "step": 71955 }, { "epoch": 7.395477903391573, "grad_norm": 0.05166958272457123, "learning_rate": 0.01, "loss": 1.9927, "step": 71958 }, { "epoch": 7.395786228160329, "grad_norm": 0.06948833912611008, "learning_rate": 0.01, "loss": 1.986, "step": 71961 }, { "epoch": 7.396094552929085, "grad_norm": 0.09943950176239014, "learning_rate": 0.01, "loss": 2.0107, "step": 71964 }, { "epoch": 7.396402877697842, "grad_norm": 0.04081091284751892, "learning_rate": 0.01, "loss": 1.9601, "step": 71967 }, { "epoch": 7.396711202466598, "grad_norm": 0.08712594956159592, "learning_rate": 0.01, "loss": 1.9742, "step": 71970 }, { "epoch": 7.397019527235354, "grad_norm": 0.10174566507339478, "learning_rate": 0.01, "loss": 1.9871, "step": 71973 }, { "epoch": 7.397327852004111, "grad_norm": 0.08403330296278, "learning_rate": 0.01, "loss": 1.9691, "step": 71976 }, { "epoch": 7.397636176772868, "grad_norm": 0.07419370114803314, "learning_rate": 0.01, "loss": 1.9603, "step": 71979 }, { "epoch": 7.397944501541624, "grad_norm": 0.08721747249364853, "learning_rate": 0.01, "loss": 1.9698, "step": 71982 }, { "epoch": 7.39825282631038, "grad_norm": 0.07544389367103577, "learning_rate": 0.01, "loss": 1.9422, "step": 71985 }, { "epoch": 7.398561151079137, "grad_norm": 0.03575440123677254, "learning_rate": 0.01, "loss": 1.9988, "step": 71988 }, { "epoch": 7.398869475847893, "grad_norm": 0.04670301452279091, "learning_rate": 0.01, "loss": 1.9623, "step": 71991 }, { "epoch": 7.399177800616649, "grad_norm": 0.04096400365233421, "learning_rate": 0.01, "loss": 1.9679, "step": 71994 }, { "epoch": 7.399486125385406, "grad_norm": 0.04938502982258797, "learning_rate": 0.01, "loss": 1.9781, "step": 71997 }, { "epoch": 7.399794450154163, "grad_norm": 0.13924731314182281, "learning_rate": 0.01, "loss": 1.9767, "step": 72000 }, { "epoch": 7.400102774922919, "grad_norm": 0.10529632121324539, "learning_rate": 0.01, "loss": 1.975, "step": 72003 }, { "epoch": 7.400411099691675, "grad_norm": 0.06417892873287201, "learning_rate": 0.01, "loss": 1.9757, "step": 72006 }, { "epoch": 7.400719424460432, "grad_norm": 0.04323699697852135, "learning_rate": 0.01, "loss": 1.9605, "step": 72009 }, { "epoch": 7.401027749229188, "grad_norm": 0.04034925252199173, "learning_rate": 0.01, "loss": 1.9898, "step": 72012 }, { "epoch": 7.401336073997944, "grad_norm": 0.12776517868041992, "learning_rate": 0.01, "loss": 1.9787, "step": 72015 }, { "epoch": 7.401644398766701, "grad_norm": 0.12993237376213074, "learning_rate": 0.01, "loss": 1.9608, "step": 72018 }, { "epoch": 7.401952723535457, "grad_norm": 0.09225061535835266, "learning_rate": 0.01, "loss": 1.9678, "step": 72021 }, { "epoch": 7.402261048304214, "grad_norm": 0.0604284442961216, "learning_rate": 0.01, "loss": 1.956, "step": 72024 }, { "epoch": 7.4025693730729705, "grad_norm": 0.042524028569459915, "learning_rate": 0.01, "loss": 1.9507, "step": 72027 }, { "epoch": 7.402877697841727, "grad_norm": 0.0651375949382782, "learning_rate": 0.01, "loss": 1.9575, "step": 72030 }, { "epoch": 7.403186022610483, "grad_norm": 0.04578625038266182, "learning_rate": 0.01, "loss": 1.9472, "step": 72033 }, { "epoch": 7.403494347379239, "grad_norm": 0.05088494345545769, "learning_rate": 0.01, "loss": 1.9506, "step": 72036 }, { "epoch": 7.403802672147996, "grad_norm": 0.04542406648397446, "learning_rate": 0.01, "loss": 1.9772, "step": 72039 }, { "epoch": 7.404110996916752, "grad_norm": 0.035000525414943695, "learning_rate": 0.01, "loss": 1.9838, "step": 72042 }, { "epoch": 7.404419321685509, "grad_norm": 0.10675634443759918, "learning_rate": 0.01, "loss": 1.9573, "step": 72045 }, { "epoch": 7.4047276464542655, "grad_norm": 0.071378692984581, "learning_rate": 0.01, "loss": 1.9602, "step": 72048 }, { "epoch": 7.405035971223022, "grad_norm": 0.047410156577825546, "learning_rate": 0.01, "loss": 1.9812, "step": 72051 }, { "epoch": 7.405344295991778, "grad_norm": 0.04333693906664848, "learning_rate": 0.01, "loss": 1.9579, "step": 72054 }, { "epoch": 7.405652620760534, "grad_norm": 0.038545072078704834, "learning_rate": 0.01, "loss": 1.9626, "step": 72057 }, { "epoch": 7.405960945529291, "grad_norm": 0.16361141204833984, "learning_rate": 0.01, "loss": 1.9613, "step": 72060 }, { "epoch": 7.406269270298047, "grad_norm": 0.11350101232528687, "learning_rate": 0.01, "loss": 1.9658, "step": 72063 }, { "epoch": 7.406577595066803, "grad_norm": 0.06721406430006027, "learning_rate": 0.01, "loss": 1.9535, "step": 72066 }, { "epoch": 7.40688591983556, "grad_norm": 0.061075255274772644, "learning_rate": 0.01, "loss": 1.9543, "step": 72069 }, { "epoch": 7.407194244604317, "grad_norm": 0.048585545271635056, "learning_rate": 0.01, "loss": 1.9818, "step": 72072 }, { "epoch": 7.407502569373073, "grad_norm": 0.04021257162094116, "learning_rate": 0.01, "loss": 1.9685, "step": 72075 }, { "epoch": 7.4078108941418295, "grad_norm": 0.054019588977098465, "learning_rate": 0.01, "loss": 1.9743, "step": 72078 }, { "epoch": 7.408119218910586, "grad_norm": 0.05107961595058441, "learning_rate": 0.01, "loss": 1.9301, "step": 72081 }, { "epoch": 7.408427543679342, "grad_norm": 0.10263427346944809, "learning_rate": 0.01, "loss": 1.9763, "step": 72084 }, { "epoch": 7.408735868448098, "grad_norm": 0.12283501029014587, "learning_rate": 0.01, "loss": 1.9854, "step": 72087 }, { "epoch": 7.409044193216855, "grad_norm": 0.058549631386995316, "learning_rate": 0.01, "loss": 1.961, "step": 72090 }, { "epoch": 7.409352517985612, "grad_norm": 0.05259247124195099, "learning_rate": 0.01, "loss": 1.9587, "step": 72093 }, { "epoch": 7.409660842754368, "grad_norm": 0.04460529983043671, "learning_rate": 0.01, "loss": 1.9722, "step": 72096 }, { "epoch": 7.4099691675231245, "grad_norm": 0.0474291630089283, "learning_rate": 0.01, "loss": 1.9974, "step": 72099 }, { "epoch": 7.410277492291881, "grad_norm": 0.049115218222141266, "learning_rate": 0.01, "loss": 1.9956, "step": 72102 }, { "epoch": 7.410585817060637, "grad_norm": 0.06523936986923218, "learning_rate": 0.01, "loss": 1.9685, "step": 72105 }, { "epoch": 7.410894141829393, "grad_norm": 0.059646543115377426, "learning_rate": 0.01, "loss": 1.9799, "step": 72108 }, { "epoch": 7.41120246659815, "grad_norm": 0.04670571908354759, "learning_rate": 0.01, "loss": 1.9732, "step": 72111 }, { "epoch": 7.411510791366906, "grad_norm": 0.1110573336482048, "learning_rate": 0.01, "loss": 1.9958, "step": 72114 }, { "epoch": 7.411819116135663, "grad_norm": 0.04579848423600197, "learning_rate": 0.01, "loss": 1.9778, "step": 72117 }, { "epoch": 7.4121274409044196, "grad_norm": 0.12220747023820877, "learning_rate": 0.01, "loss": 2.0113, "step": 72120 }, { "epoch": 7.412435765673176, "grad_norm": 0.08545442670583725, "learning_rate": 0.01, "loss": 1.9718, "step": 72123 }, { "epoch": 7.412744090441932, "grad_norm": 0.05775977298617363, "learning_rate": 0.01, "loss": 1.986, "step": 72126 }, { "epoch": 7.4130524152106885, "grad_norm": 0.03229491040110588, "learning_rate": 0.01, "loss": 1.9849, "step": 72129 }, { "epoch": 7.413360739979445, "grad_norm": 0.03648471459746361, "learning_rate": 0.01, "loss": 1.9673, "step": 72132 }, { "epoch": 7.413669064748201, "grad_norm": 0.06855278462171555, "learning_rate": 0.01, "loss": 1.9695, "step": 72135 }, { "epoch": 7.413977389516958, "grad_norm": 0.12804359197616577, "learning_rate": 0.01, "loss": 1.9735, "step": 72138 }, { "epoch": 7.414285714285715, "grad_norm": 0.06988925486803055, "learning_rate": 0.01, "loss": 1.9879, "step": 72141 }, { "epoch": 7.414594039054471, "grad_norm": 0.08092012256383896, "learning_rate": 0.01, "loss": 1.9727, "step": 72144 }, { "epoch": 7.414902363823227, "grad_norm": 0.062308866530656815, "learning_rate": 0.01, "loss": 1.95, "step": 72147 }, { "epoch": 7.4152106885919835, "grad_norm": 0.1187475249171257, "learning_rate": 0.01, "loss": 1.9741, "step": 72150 }, { "epoch": 7.41551901336074, "grad_norm": 0.038580019026994705, "learning_rate": 0.01, "loss": 1.9674, "step": 72153 }, { "epoch": 7.415827338129496, "grad_norm": 0.06444518268108368, "learning_rate": 0.01, "loss": 1.975, "step": 72156 }, { "epoch": 7.416135662898252, "grad_norm": 0.09006829559803009, "learning_rate": 0.01, "loss": 1.9775, "step": 72159 }, { "epoch": 7.41644398766701, "grad_norm": 0.1387302577495575, "learning_rate": 0.01, "loss": 1.9916, "step": 72162 }, { "epoch": 7.416752312435766, "grad_norm": 0.12567099928855896, "learning_rate": 0.01, "loss": 1.9581, "step": 72165 }, { "epoch": 7.417060637204522, "grad_norm": 0.046082064509391785, "learning_rate": 0.01, "loss": 1.956, "step": 72168 }, { "epoch": 7.4173689619732786, "grad_norm": 0.044009629637002945, "learning_rate": 0.01, "loss": 1.968, "step": 72171 }, { "epoch": 7.417677286742035, "grad_norm": 0.07774317264556885, "learning_rate": 0.01, "loss": 1.9506, "step": 72174 }, { "epoch": 7.417985611510791, "grad_norm": 0.11599218100309372, "learning_rate": 0.01, "loss": 1.9964, "step": 72177 }, { "epoch": 7.4182939362795475, "grad_norm": 0.03975779935717583, "learning_rate": 0.01, "loss": 1.9898, "step": 72180 }, { "epoch": 7.418602261048305, "grad_norm": 0.06963899731636047, "learning_rate": 0.01, "loss": 1.9975, "step": 72183 }, { "epoch": 7.418910585817061, "grad_norm": 0.0915340930223465, "learning_rate": 0.01, "loss": 1.9867, "step": 72186 }, { "epoch": 7.419218910585817, "grad_norm": 0.057447649538517, "learning_rate": 0.01, "loss": 1.9799, "step": 72189 }, { "epoch": 7.419527235354574, "grad_norm": 0.037418052554130554, "learning_rate": 0.01, "loss": 1.9578, "step": 72192 }, { "epoch": 7.41983556012333, "grad_norm": 0.03583716228604317, "learning_rate": 0.01, "loss": 1.9694, "step": 72195 }, { "epoch": 7.420143884892086, "grad_norm": 0.04073040559887886, "learning_rate": 0.01, "loss": 1.9709, "step": 72198 }, { "epoch": 7.4204522096608425, "grad_norm": 0.1194196417927742, "learning_rate": 0.01, "loss": 1.9577, "step": 72201 }, { "epoch": 7.420760534429599, "grad_norm": 0.07829037308692932, "learning_rate": 0.01, "loss": 1.9499, "step": 72204 }, { "epoch": 7.421068859198356, "grad_norm": 0.05158103257417679, "learning_rate": 0.01, "loss": 1.9692, "step": 72207 }, { "epoch": 7.421377183967112, "grad_norm": 0.13578635454177856, "learning_rate": 0.01, "loss": 1.9617, "step": 72210 }, { "epoch": 7.421685508735869, "grad_norm": 0.07069911062717438, "learning_rate": 0.01, "loss": 1.9781, "step": 72213 }, { "epoch": 7.421993833504625, "grad_norm": 0.04424047842621803, "learning_rate": 0.01, "loss": 1.9526, "step": 72216 }, { "epoch": 7.422302158273381, "grad_norm": 0.03345247730612755, "learning_rate": 0.01, "loss": 1.9701, "step": 72219 }, { "epoch": 7.4226104830421376, "grad_norm": 0.04876269772648811, "learning_rate": 0.01, "loss": 1.948, "step": 72222 }, { "epoch": 7.422918807810894, "grad_norm": 0.0414804071187973, "learning_rate": 0.01, "loss": 1.9704, "step": 72225 }, { "epoch": 7.42322713257965, "grad_norm": 0.10436029732227325, "learning_rate": 0.01, "loss": 1.9705, "step": 72228 }, { "epoch": 7.423535457348407, "grad_norm": 0.08055151253938675, "learning_rate": 0.01, "loss": 1.9617, "step": 72231 }, { "epoch": 7.423843782117164, "grad_norm": 0.11014366894960403, "learning_rate": 0.01, "loss": 1.9517, "step": 72234 }, { "epoch": 7.42415210688592, "grad_norm": 0.06515113264322281, "learning_rate": 0.01, "loss": 1.9962, "step": 72237 }, { "epoch": 7.424460431654676, "grad_norm": 0.04275600612163544, "learning_rate": 0.01, "loss": 1.9662, "step": 72240 }, { "epoch": 7.424768756423433, "grad_norm": 0.1309288591146469, "learning_rate": 0.01, "loss": 1.981, "step": 72243 }, { "epoch": 7.425077081192189, "grad_norm": 0.04589364305138588, "learning_rate": 0.01, "loss": 1.9797, "step": 72246 }, { "epoch": 7.425385405960945, "grad_norm": 0.06936043500900269, "learning_rate": 0.01, "loss": 1.9786, "step": 72249 }, { "epoch": 7.4256937307297015, "grad_norm": 0.06822799891233444, "learning_rate": 0.01, "loss": 1.9657, "step": 72252 }, { "epoch": 7.426002055498459, "grad_norm": 0.04731842502951622, "learning_rate": 0.01, "loss": 1.9615, "step": 72255 }, { "epoch": 7.426310380267215, "grad_norm": 0.14833036065101624, "learning_rate": 0.01, "loss": 1.9505, "step": 72258 }, { "epoch": 7.426618705035971, "grad_norm": 0.05417739227414131, "learning_rate": 0.01, "loss": 1.9976, "step": 72261 }, { "epoch": 7.426927029804728, "grad_norm": 0.06367053091526031, "learning_rate": 0.01, "loss": 1.9634, "step": 72264 }, { "epoch": 7.427235354573484, "grad_norm": 0.03428191319108009, "learning_rate": 0.01, "loss": 1.9779, "step": 72267 }, { "epoch": 7.42754367934224, "grad_norm": 0.044105369597673416, "learning_rate": 0.01, "loss": 1.9722, "step": 72270 }, { "epoch": 7.4278520041109966, "grad_norm": 0.08640552312135696, "learning_rate": 0.01, "loss": 1.9815, "step": 72273 }, { "epoch": 7.428160328879754, "grad_norm": 0.13555385172367096, "learning_rate": 0.01, "loss": 1.9952, "step": 72276 }, { "epoch": 7.42846865364851, "grad_norm": 0.06566069275140762, "learning_rate": 0.01, "loss": 1.9638, "step": 72279 }, { "epoch": 7.428776978417266, "grad_norm": 0.1009216234087944, "learning_rate": 0.01, "loss": 1.9702, "step": 72282 }, { "epoch": 7.429085303186023, "grad_norm": 0.06078832969069481, "learning_rate": 0.01, "loss": 1.9566, "step": 72285 }, { "epoch": 7.429393627954779, "grad_norm": 0.04854779317975044, "learning_rate": 0.01, "loss": 1.9588, "step": 72288 }, { "epoch": 7.429701952723535, "grad_norm": 0.06042282283306122, "learning_rate": 0.01, "loss": 1.9627, "step": 72291 }, { "epoch": 7.430010277492292, "grad_norm": 0.07528941333293915, "learning_rate": 0.01, "loss": 1.9883, "step": 72294 }, { "epoch": 7.430318602261048, "grad_norm": 0.048977117985486984, "learning_rate": 0.01, "loss": 1.9895, "step": 72297 }, { "epoch": 7.430626927029805, "grad_norm": 0.03058001957833767, "learning_rate": 0.01, "loss": 1.9634, "step": 72300 }, { "epoch": 7.430935251798561, "grad_norm": 0.031676169484853745, "learning_rate": 0.01, "loss": 1.9279, "step": 72303 }, { "epoch": 7.431243576567318, "grad_norm": 0.04474344104528427, "learning_rate": 0.01, "loss": 1.95, "step": 72306 }, { "epoch": 7.431551901336074, "grad_norm": 0.04538916051387787, "learning_rate": 0.01, "loss": 1.9819, "step": 72309 }, { "epoch": 7.43186022610483, "grad_norm": 0.0947703942656517, "learning_rate": 0.01, "loss": 1.9685, "step": 72312 }, { "epoch": 7.432168550873587, "grad_norm": 0.05523570254445076, "learning_rate": 0.01, "loss": 1.9813, "step": 72315 }, { "epoch": 7.432476875642343, "grad_norm": 0.09945077449083328, "learning_rate": 0.01, "loss": 1.9853, "step": 72318 }, { "epoch": 7.4327852004111, "grad_norm": 0.11858788877725601, "learning_rate": 0.01, "loss": 1.9703, "step": 72321 }, { "epoch": 7.433093525179856, "grad_norm": 0.056699126958847046, "learning_rate": 0.01, "loss": 1.9787, "step": 72324 }, { "epoch": 7.433401849948613, "grad_norm": 0.03996944800019264, "learning_rate": 0.01, "loss": 1.9835, "step": 72327 }, { "epoch": 7.433710174717369, "grad_norm": 0.04307657852768898, "learning_rate": 0.01, "loss": 1.9949, "step": 72330 }, { "epoch": 7.434018499486125, "grad_norm": 0.03870538994669914, "learning_rate": 0.01, "loss": 1.9615, "step": 72333 }, { "epoch": 7.434326824254882, "grad_norm": 0.04004659503698349, "learning_rate": 0.01, "loss": 1.9513, "step": 72336 }, { "epoch": 7.434635149023638, "grad_norm": 0.10981098562479019, "learning_rate": 0.01, "loss": 1.961, "step": 72339 }, { "epoch": 7.434943473792394, "grad_norm": 0.06930761784315109, "learning_rate": 0.01, "loss": 1.9602, "step": 72342 }, { "epoch": 7.4352517985611515, "grad_norm": 0.09295953810214996, "learning_rate": 0.01, "loss": 1.9725, "step": 72345 }, { "epoch": 7.435560123329908, "grad_norm": 0.07768440246582031, "learning_rate": 0.01, "loss": 1.9763, "step": 72348 }, { "epoch": 7.435868448098664, "grad_norm": 0.059423208236694336, "learning_rate": 0.01, "loss": 1.9692, "step": 72351 }, { "epoch": 7.43617677286742, "grad_norm": 0.06254580616950989, "learning_rate": 0.01, "loss": 1.9701, "step": 72354 }, { "epoch": 7.436485097636177, "grad_norm": 0.05663672834634781, "learning_rate": 0.01, "loss": 1.9767, "step": 72357 }, { "epoch": 7.436793422404933, "grad_norm": 0.09405243396759033, "learning_rate": 0.01, "loss": 1.9635, "step": 72360 }, { "epoch": 7.437101747173689, "grad_norm": 0.04204673692584038, "learning_rate": 0.01, "loss": 1.9515, "step": 72363 }, { "epoch": 7.437410071942446, "grad_norm": 0.04828297346830368, "learning_rate": 0.01, "loss": 1.9875, "step": 72366 }, { "epoch": 7.437718396711203, "grad_norm": 0.12901097536087036, "learning_rate": 0.01, "loss": 1.951, "step": 72369 }, { "epoch": 7.438026721479959, "grad_norm": 0.04471548646688461, "learning_rate": 0.01, "loss": 1.9554, "step": 72372 }, { "epoch": 7.438335046248715, "grad_norm": 0.10098598897457123, "learning_rate": 0.01, "loss": 1.9788, "step": 72375 }, { "epoch": 7.438643371017472, "grad_norm": 0.06057806685566902, "learning_rate": 0.01, "loss": 1.9911, "step": 72378 }, { "epoch": 7.438951695786228, "grad_norm": 0.07276615500450134, "learning_rate": 0.01, "loss": 1.9713, "step": 72381 }, { "epoch": 7.439260020554984, "grad_norm": 0.06687156856060028, "learning_rate": 0.01, "loss": 1.9677, "step": 72384 }, { "epoch": 7.439568345323741, "grad_norm": 0.10728184133768082, "learning_rate": 0.01, "loss": 1.9741, "step": 72387 }, { "epoch": 7.439876670092497, "grad_norm": 0.10064877569675446, "learning_rate": 0.01, "loss": 1.9839, "step": 72390 }, { "epoch": 7.440184994861254, "grad_norm": 0.07631184905767441, "learning_rate": 0.01, "loss": 1.9682, "step": 72393 }, { "epoch": 7.4404933196300105, "grad_norm": 0.07702022790908813, "learning_rate": 0.01, "loss": 1.9846, "step": 72396 }, { "epoch": 7.440801644398767, "grad_norm": 0.0484774112701416, "learning_rate": 0.01, "loss": 1.9894, "step": 72399 }, { "epoch": 7.441109969167523, "grad_norm": 0.03002420999109745, "learning_rate": 0.01, "loss": 1.9594, "step": 72402 }, { "epoch": 7.441418293936279, "grad_norm": 0.03068717196583748, "learning_rate": 0.01, "loss": 1.9847, "step": 72405 }, { "epoch": 7.441726618705036, "grad_norm": 0.03828945755958557, "learning_rate": 0.01, "loss": 1.977, "step": 72408 }, { "epoch": 7.442034943473792, "grad_norm": 0.11569713056087494, "learning_rate": 0.01, "loss": 1.9793, "step": 72411 }, { "epoch": 7.442343268242549, "grad_norm": 0.06883276998996735, "learning_rate": 0.01, "loss": 1.9733, "step": 72414 }, { "epoch": 7.4426515930113055, "grad_norm": 0.12915529310703278, "learning_rate": 0.01, "loss": 1.9672, "step": 72417 }, { "epoch": 7.442959917780062, "grad_norm": 0.05600148066878319, "learning_rate": 0.01, "loss": 1.9741, "step": 72420 }, { "epoch": 7.443268242548818, "grad_norm": 0.061895426362752914, "learning_rate": 0.01, "loss": 1.9696, "step": 72423 }, { "epoch": 7.443576567317574, "grad_norm": 0.04677951708436012, "learning_rate": 0.01, "loss": 1.9668, "step": 72426 }, { "epoch": 7.443884892086331, "grad_norm": 0.09104373306035995, "learning_rate": 0.01, "loss": 1.9773, "step": 72429 }, { "epoch": 7.444193216855087, "grad_norm": 0.04269783943891525, "learning_rate": 0.01, "loss": 1.9585, "step": 72432 }, { "epoch": 7.444501541623843, "grad_norm": 0.04557257890701294, "learning_rate": 0.01, "loss": 1.963, "step": 72435 }, { "epoch": 7.444809866392601, "grad_norm": 0.045879703015089035, "learning_rate": 0.01, "loss": 1.9697, "step": 72438 }, { "epoch": 7.445118191161357, "grad_norm": 0.09514186531305313, "learning_rate": 0.01, "loss": 1.9815, "step": 72441 }, { "epoch": 7.445426515930113, "grad_norm": 0.07008050382137299, "learning_rate": 0.01, "loss": 1.9813, "step": 72444 }, { "epoch": 7.4457348406988695, "grad_norm": 0.08302075415849686, "learning_rate": 0.01, "loss": 1.9619, "step": 72447 }, { "epoch": 7.446043165467626, "grad_norm": 0.052289433777332306, "learning_rate": 0.01, "loss": 1.9886, "step": 72450 }, { "epoch": 7.446351490236382, "grad_norm": 0.04208613932132721, "learning_rate": 0.01, "loss": 1.9707, "step": 72453 }, { "epoch": 7.446659815005138, "grad_norm": 0.0372624509036541, "learning_rate": 0.01, "loss": 1.9866, "step": 72456 }, { "epoch": 7.446968139773896, "grad_norm": 0.035864707082509995, "learning_rate": 0.01, "loss": 1.9725, "step": 72459 }, { "epoch": 7.447276464542652, "grad_norm": 0.03874954208731651, "learning_rate": 0.01, "loss": 1.9807, "step": 72462 }, { "epoch": 7.447584789311408, "grad_norm": 0.09163645654916763, "learning_rate": 0.01, "loss": 1.9797, "step": 72465 }, { "epoch": 7.4478931140801645, "grad_norm": 0.067960225045681, "learning_rate": 0.01, "loss": 1.9699, "step": 72468 }, { "epoch": 7.448201438848921, "grad_norm": 0.17044830322265625, "learning_rate": 0.01, "loss": 1.972, "step": 72471 }, { "epoch": 7.448509763617677, "grad_norm": 0.0597783625125885, "learning_rate": 0.01, "loss": 1.9901, "step": 72474 }, { "epoch": 7.4488180883864334, "grad_norm": 0.08422503620386124, "learning_rate": 0.01, "loss": 1.9749, "step": 72477 }, { "epoch": 7.44912641315519, "grad_norm": 0.09853143244981766, "learning_rate": 0.01, "loss": 1.9791, "step": 72480 }, { "epoch": 7.449434737923947, "grad_norm": 0.09409277886152267, "learning_rate": 0.01, "loss": 1.9661, "step": 72483 }, { "epoch": 7.449743062692703, "grad_norm": 0.04450713470578194, "learning_rate": 0.01, "loss": 1.9848, "step": 72486 }, { "epoch": 7.45005138746146, "grad_norm": 0.11108853667974472, "learning_rate": 0.01, "loss": 1.9763, "step": 72489 }, { "epoch": 7.450359712230216, "grad_norm": 0.09322323650121689, "learning_rate": 0.01, "loss": 1.9468, "step": 72492 }, { "epoch": 7.450668036998972, "grad_norm": 0.11880034953355789, "learning_rate": 0.01, "loss": 1.9824, "step": 72495 }, { "epoch": 7.4509763617677285, "grad_norm": 0.06539753824472427, "learning_rate": 0.01, "loss": 1.9759, "step": 72498 }, { "epoch": 7.451284686536485, "grad_norm": 0.04002269357442856, "learning_rate": 0.01, "loss": 1.9819, "step": 72501 }, { "epoch": 7.451593011305242, "grad_norm": 0.0480516254901886, "learning_rate": 0.01, "loss": 1.988, "step": 72504 }, { "epoch": 7.451901336073998, "grad_norm": 0.09114290028810501, "learning_rate": 0.01, "loss": 1.9679, "step": 72507 }, { "epoch": 7.452209660842755, "grad_norm": 0.057204026728868484, "learning_rate": 0.01, "loss": 1.9526, "step": 72510 }, { "epoch": 7.452517985611511, "grad_norm": 0.05074182525277138, "learning_rate": 0.01, "loss": 1.9822, "step": 72513 }, { "epoch": 7.452826310380267, "grad_norm": 0.09631665050983429, "learning_rate": 0.01, "loss": 1.9732, "step": 72516 }, { "epoch": 7.4531346351490235, "grad_norm": 0.04635360836982727, "learning_rate": 0.01, "loss": 1.9411, "step": 72519 }, { "epoch": 7.45344295991778, "grad_norm": 0.09677247703075409, "learning_rate": 0.01, "loss": 1.9853, "step": 72522 }, { "epoch": 7.453751284686536, "grad_norm": 0.05190379172563553, "learning_rate": 0.01, "loss": 1.981, "step": 72525 }, { "epoch": 7.4540596094552924, "grad_norm": 0.08189938962459564, "learning_rate": 0.01, "loss": 1.9769, "step": 72528 }, { "epoch": 7.45436793422405, "grad_norm": 0.03724074363708496, "learning_rate": 0.01, "loss": 1.9632, "step": 72531 }, { "epoch": 7.454676258992806, "grad_norm": 0.07144693285226822, "learning_rate": 0.01, "loss": 1.966, "step": 72534 }, { "epoch": 7.454984583761562, "grad_norm": 0.12171963602304459, "learning_rate": 0.01, "loss": 2.0007, "step": 72537 }, { "epoch": 7.455292908530319, "grad_norm": 0.09564460813999176, "learning_rate": 0.01, "loss": 1.9767, "step": 72540 }, { "epoch": 7.455601233299075, "grad_norm": 0.04494168609380722, "learning_rate": 0.01, "loss": 1.9617, "step": 72543 }, { "epoch": 7.455909558067831, "grad_norm": 0.049825269728899, "learning_rate": 0.01, "loss": 1.9891, "step": 72546 }, { "epoch": 7.4562178828365875, "grad_norm": 0.09712143987417221, "learning_rate": 0.01, "loss": 1.9591, "step": 72549 }, { "epoch": 7.456526207605345, "grad_norm": 0.10748656839132309, "learning_rate": 0.01, "loss": 2.0011, "step": 72552 }, { "epoch": 7.456834532374101, "grad_norm": 0.06327345967292786, "learning_rate": 0.01, "loss": 1.9788, "step": 72555 }, { "epoch": 7.457142857142857, "grad_norm": 0.11034796386957169, "learning_rate": 0.01, "loss": 1.9751, "step": 72558 }, { "epoch": 7.457451181911614, "grad_norm": 0.11681102961301804, "learning_rate": 0.01, "loss": 1.9923, "step": 72561 }, { "epoch": 7.45775950668037, "grad_norm": 0.11021427065134048, "learning_rate": 0.01, "loss": 1.9812, "step": 72564 }, { "epoch": 7.458067831449126, "grad_norm": 0.0419340580701828, "learning_rate": 0.01, "loss": 1.9866, "step": 72567 }, { "epoch": 7.4583761562178825, "grad_norm": 0.04036851227283478, "learning_rate": 0.01, "loss": 1.9924, "step": 72570 }, { "epoch": 7.458684480986639, "grad_norm": 0.049127209931612015, "learning_rate": 0.01, "loss": 1.9886, "step": 72573 }, { "epoch": 7.458992805755396, "grad_norm": 0.04860240966081619, "learning_rate": 0.01, "loss": 1.9697, "step": 72576 }, { "epoch": 7.459301130524152, "grad_norm": 0.08606428653001785, "learning_rate": 0.01, "loss": 1.9461, "step": 72579 }, { "epoch": 7.459609455292909, "grad_norm": 0.06198833882808685, "learning_rate": 0.01, "loss": 1.9505, "step": 72582 }, { "epoch": 7.459917780061665, "grad_norm": 0.049000997096300125, "learning_rate": 0.01, "loss": 1.9734, "step": 72585 }, { "epoch": 7.460226104830421, "grad_norm": 0.061215490102767944, "learning_rate": 0.01, "loss": 1.9768, "step": 72588 }, { "epoch": 7.460534429599178, "grad_norm": 0.048497434705495834, "learning_rate": 0.01, "loss": 1.9635, "step": 72591 }, { "epoch": 7.460842754367934, "grad_norm": 0.04738443344831467, "learning_rate": 0.01, "loss": 1.9718, "step": 72594 }, { "epoch": 7.461151079136691, "grad_norm": 0.10871072858572006, "learning_rate": 0.01, "loss": 1.9707, "step": 72597 }, { "epoch": 7.461459403905447, "grad_norm": 0.08442126959562302, "learning_rate": 0.01, "loss": 1.9643, "step": 72600 }, { "epoch": 7.461767728674204, "grad_norm": 0.11305920034646988, "learning_rate": 0.01, "loss": 1.9837, "step": 72603 }, { "epoch": 7.46207605344296, "grad_norm": 0.14034248888492584, "learning_rate": 0.01, "loss": 1.9753, "step": 72606 }, { "epoch": 7.462384378211716, "grad_norm": 0.05142485722899437, "learning_rate": 0.01, "loss": 1.9909, "step": 72609 }, { "epoch": 7.462692702980473, "grad_norm": 0.03771292045712471, "learning_rate": 0.01, "loss": 1.9916, "step": 72612 }, { "epoch": 7.463001027749229, "grad_norm": 0.040064070373773575, "learning_rate": 0.01, "loss": 1.9664, "step": 72615 }, { "epoch": 7.463309352517985, "grad_norm": 0.041471678763628006, "learning_rate": 0.01, "loss": 1.9639, "step": 72618 }, { "epoch": 7.463617677286742, "grad_norm": 0.047812748700380325, "learning_rate": 0.01, "loss": 1.9506, "step": 72621 }, { "epoch": 7.463926002055499, "grad_norm": 0.0737353190779686, "learning_rate": 0.01, "loss": 1.992, "step": 72624 }, { "epoch": 7.464234326824255, "grad_norm": 0.05118251591920853, "learning_rate": 0.01, "loss": 1.9388, "step": 72627 }, { "epoch": 7.464542651593011, "grad_norm": 0.15912941098213196, "learning_rate": 0.01, "loss": 1.9691, "step": 72630 }, { "epoch": 7.464850976361768, "grad_norm": 0.05329582840204239, "learning_rate": 0.01, "loss": 1.9681, "step": 72633 }, { "epoch": 7.465159301130524, "grad_norm": 0.08995845913887024, "learning_rate": 0.01, "loss": 1.9615, "step": 72636 }, { "epoch": 7.46546762589928, "grad_norm": 0.0392749048769474, "learning_rate": 0.01, "loss": 1.96, "step": 72639 }, { "epoch": 7.4657759506680375, "grad_norm": 0.031700219959020615, "learning_rate": 0.01, "loss": 1.9908, "step": 72642 }, { "epoch": 7.466084275436794, "grad_norm": 0.11098748445510864, "learning_rate": 0.01, "loss": 2.0008, "step": 72645 }, { "epoch": 7.46639260020555, "grad_norm": 0.06115696206688881, "learning_rate": 0.01, "loss": 1.9812, "step": 72648 }, { "epoch": 7.466700924974306, "grad_norm": 0.0435514971613884, "learning_rate": 0.01, "loss": 1.9592, "step": 72651 }, { "epoch": 7.467009249743063, "grad_norm": 0.04764613136649132, "learning_rate": 0.01, "loss": 1.9499, "step": 72654 }, { "epoch": 7.467317574511819, "grad_norm": 0.04776312783360481, "learning_rate": 0.01, "loss": 1.9863, "step": 72657 }, { "epoch": 7.467625899280575, "grad_norm": 0.06918791681528091, "learning_rate": 0.01, "loss": 1.9848, "step": 72660 }, { "epoch": 7.467934224049332, "grad_norm": 0.05166517570614815, "learning_rate": 0.01, "loss": 1.9808, "step": 72663 }, { "epoch": 7.468242548818089, "grad_norm": 0.04008159786462784, "learning_rate": 0.01, "loss": 1.9631, "step": 72666 }, { "epoch": 7.468550873586845, "grad_norm": 0.046165283769369125, "learning_rate": 0.01, "loss": 2.0057, "step": 72669 }, { "epoch": 7.468859198355601, "grad_norm": 0.051842544227838516, "learning_rate": 0.01, "loss": 1.972, "step": 72672 }, { "epoch": 7.469167523124358, "grad_norm": 0.04817887768149376, "learning_rate": 0.01, "loss": 1.9708, "step": 72675 }, { "epoch": 7.469475847893114, "grad_norm": 0.09956707060337067, "learning_rate": 0.01, "loss": 1.997, "step": 72678 }, { "epoch": 7.46978417266187, "grad_norm": 0.07913441956043243, "learning_rate": 0.01, "loss": 1.98, "step": 72681 }, { "epoch": 7.470092497430627, "grad_norm": 0.07743464410305023, "learning_rate": 0.01, "loss": 1.9883, "step": 72684 }, { "epoch": 7.470400822199383, "grad_norm": 0.06038682535290718, "learning_rate": 0.01, "loss": 1.9389, "step": 72687 }, { "epoch": 7.47070914696814, "grad_norm": 0.056036096066236496, "learning_rate": 0.01, "loss": 1.9465, "step": 72690 }, { "epoch": 7.4710174717368965, "grad_norm": 0.0786886215209961, "learning_rate": 0.01, "loss": 1.9667, "step": 72693 }, { "epoch": 7.471325796505653, "grad_norm": 0.054459333419799805, "learning_rate": 0.01, "loss": 1.9708, "step": 72696 }, { "epoch": 7.471634121274409, "grad_norm": 0.05316760390996933, "learning_rate": 0.01, "loss": 1.9662, "step": 72699 }, { "epoch": 7.471942446043165, "grad_norm": 0.04993414506316185, "learning_rate": 0.01, "loss": 1.9766, "step": 72702 }, { "epoch": 7.472250770811922, "grad_norm": 0.08382388204336166, "learning_rate": 0.01, "loss": 1.9565, "step": 72705 }, { "epoch": 7.472559095580678, "grad_norm": 0.05314520001411438, "learning_rate": 0.01, "loss": 1.9575, "step": 72708 }, { "epoch": 7.472867420349434, "grad_norm": 0.061621155589818954, "learning_rate": 0.01, "loss": 1.9621, "step": 72711 }, { "epoch": 7.4731757451181915, "grad_norm": 0.12471956759691238, "learning_rate": 0.01, "loss": 1.9813, "step": 72714 }, { "epoch": 7.473484069886948, "grad_norm": 0.05979030579328537, "learning_rate": 0.01, "loss": 1.9777, "step": 72717 }, { "epoch": 7.473792394655704, "grad_norm": 0.08850116282701492, "learning_rate": 0.01, "loss": 1.9536, "step": 72720 }, { "epoch": 7.47410071942446, "grad_norm": 0.12835745513439178, "learning_rate": 0.01, "loss": 1.954, "step": 72723 }, { "epoch": 7.474409044193217, "grad_norm": 0.04156319797039032, "learning_rate": 0.01, "loss": 1.9569, "step": 72726 }, { "epoch": 7.474717368961973, "grad_norm": 0.05407378822565079, "learning_rate": 0.01, "loss": 1.934, "step": 72729 }, { "epoch": 7.475025693730729, "grad_norm": 0.0464872308075428, "learning_rate": 0.01, "loss": 1.9836, "step": 72732 }, { "epoch": 7.4753340184994865, "grad_norm": 0.05535002052783966, "learning_rate": 0.01, "loss": 1.987, "step": 72735 }, { "epoch": 7.475642343268243, "grad_norm": 0.04521692544221878, "learning_rate": 0.01, "loss": 1.933, "step": 72738 }, { "epoch": 7.475950668036999, "grad_norm": 0.11313310265541077, "learning_rate": 0.01, "loss": 2.0125, "step": 72741 }, { "epoch": 7.4762589928057555, "grad_norm": 0.04605510085821152, "learning_rate": 0.01, "loss": 1.9729, "step": 72744 }, { "epoch": 7.476567317574512, "grad_norm": 0.0692354366183281, "learning_rate": 0.01, "loss": 1.9639, "step": 72747 }, { "epoch": 7.476875642343268, "grad_norm": 0.0635964423418045, "learning_rate": 0.01, "loss": 1.9775, "step": 72750 }, { "epoch": 7.477183967112024, "grad_norm": 0.10048150271177292, "learning_rate": 0.01, "loss": 2.0048, "step": 72753 }, { "epoch": 7.477492291880781, "grad_norm": 0.056747522205114365, "learning_rate": 0.01, "loss": 1.9964, "step": 72756 }, { "epoch": 7.477800616649538, "grad_norm": 0.0338391549885273, "learning_rate": 0.01, "loss": 1.9545, "step": 72759 }, { "epoch": 7.478108941418294, "grad_norm": 0.09825774282217026, "learning_rate": 0.01, "loss": 1.9826, "step": 72762 }, { "epoch": 7.4784172661870505, "grad_norm": 0.07101613283157349, "learning_rate": 0.01, "loss": 1.9545, "step": 72765 }, { "epoch": 7.478725590955807, "grad_norm": 0.06692194938659668, "learning_rate": 0.01, "loss": 1.9677, "step": 72768 }, { "epoch": 7.479033915724563, "grad_norm": 0.07290485501289368, "learning_rate": 0.01, "loss": 1.9683, "step": 72771 }, { "epoch": 7.479342240493319, "grad_norm": 0.042110659182071686, "learning_rate": 0.01, "loss": 1.9707, "step": 72774 }, { "epoch": 7.479650565262076, "grad_norm": 0.08670128881931305, "learning_rate": 0.01, "loss": 1.9922, "step": 72777 }, { "epoch": 7.479958890030833, "grad_norm": 0.050455573946237564, "learning_rate": 0.01, "loss": 1.9786, "step": 72780 }, { "epoch": 7.480267214799589, "grad_norm": 0.13073371350765228, "learning_rate": 0.01, "loss": 1.989, "step": 72783 }, { "epoch": 7.4805755395683455, "grad_norm": 0.11058662831783295, "learning_rate": 0.01, "loss": 1.9596, "step": 72786 }, { "epoch": 7.480883864337102, "grad_norm": 0.05915442109107971, "learning_rate": 0.01, "loss": 1.9921, "step": 72789 }, { "epoch": 7.481192189105858, "grad_norm": 0.05119236186146736, "learning_rate": 0.01, "loss": 1.973, "step": 72792 }, { "epoch": 7.4815005138746145, "grad_norm": 0.07420118898153305, "learning_rate": 0.01, "loss": 1.9785, "step": 72795 }, { "epoch": 7.481808838643371, "grad_norm": 0.061627451330423355, "learning_rate": 0.01, "loss": 1.9583, "step": 72798 }, { "epoch": 7.482117163412127, "grad_norm": 0.04703347012400627, "learning_rate": 0.01, "loss": 1.9798, "step": 72801 }, { "epoch": 7.482425488180884, "grad_norm": 0.038575369864702225, "learning_rate": 0.01, "loss": 1.9539, "step": 72804 }, { "epoch": 7.482733812949641, "grad_norm": 0.07879947870969772, "learning_rate": 0.01, "loss": 1.9795, "step": 72807 }, { "epoch": 7.483042137718397, "grad_norm": 0.0844467431306839, "learning_rate": 0.01, "loss": 1.9832, "step": 72810 }, { "epoch": 7.483350462487153, "grad_norm": 0.06974819302558899, "learning_rate": 0.01, "loss": 1.9813, "step": 72813 }, { "epoch": 7.4836587872559095, "grad_norm": 0.060370590537786484, "learning_rate": 0.01, "loss": 1.9792, "step": 72816 }, { "epoch": 7.483967112024666, "grad_norm": 0.02921084314584732, "learning_rate": 0.01, "loss": 1.9688, "step": 72819 }, { "epoch": 7.484275436793422, "grad_norm": 0.04770229756832123, "learning_rate": 0.01, "loss": 1.9698, "step": 72822 }, { "epoch": 7.484583761562178, "grad_norm": 0.08941125124692917, "learning_rate": 0.01, "loss": 1.9627, "step": 72825 }, { "epoch": 7.484892086330936, "grad_norm": 0.03461920842528343, "learning_rate": 0.01, "loss": 1.9762, "step": 72828 }, { "epoch": 7.485200411099692, "grad_norm": 0.10770180821418762, "learning_rate": 0.01, "loss": 1.9642, "step": 72831 }, { "epoch": 7.485508735868448, "grad_norm": 0.08093126863241196, "learning_rate": 0.01, "loss": 1.9581, "step": 72834 }, { "epoch": 7.4858170606372045, "grad_norm": 0.08929312229156494, "learning_rate": 0.01, "loss": 1.969, "step": 72837 }, { "epoch": 7.486125385405961, "grad_norm": 0.11654918640851974, "learning_rate": 0.01, "loss": 1.9955, "step": 72840 }, { "epoch": 7.486433710174717, "grad_norm": 0.04198765754699707, "learning_rate": 0.01, "loss": 1.9502, "step": 72843 }, { "epoch": 7.4867420349434735, "grad_norm": 0.041191764175891876, "learning_rate": 0.01, "loss": 1.9644, "step": 72846 }, { "epoch": 7.48705035971223, "grad_norm": 0.042416587471961975, "learning_rate": 0.01, "loss": 1.9712, "step": 72849 }, { "epoch": 7.487358684480987, "grad_norm": 0.033558644354343414, "learning_rate": 0.01, "loss": 1.9424, "step": 72852 }, { "epoch": 7.487667009249743, "grad_norm": 0.04876953363418579, "learning_rate": 0.01, "loss": 1.9652, "step": 72855 }, { "epoch": 7.4879753340185, "grad_norm": 0.11887954920530319, "learning_rate": 0.01, "loss": 1.9885, "step": 72858 }, { "epoch": 7.488283658787256, "grad_norm": 0.09340287744998932, "learning_rate": 0.01, "loss": 1.9788, "step": 72861 }, { "epoch": 7.488591983556012, "grad_norm": 0.0659438893198967, "learning_rate": 0.01, "loss": 1.965, "step": 72864 }, { "epoch": 7.4889003083247685, "grad_norm": 0.05727607384324074, "learning_rate": 0.01, "loss": 1.985, "step": 72867 }, { "epoch": 7.489208633093525, "grad_norm": 0.051362354308366776, "learning_rate": 0.01, "loss": 1.9697, "step": 72870 }, { "epoch": 7.489516957862282, "grad_norm": 0.07566554844379425, "learning_rate": 0.01, "loss": 1.949, "step": 72873 }, { "epoch": 7.489825282631038, "grad_norm": 0.052347857505083084, "learning_rate": 0.01, "loss": 1.9754, "step": 72876 }, { "epoch": 7.490133607399795, "grad_norm": 0.03814227133989334, "learning_rate": 0.01, "loss": 1.9628, "step": 72879 }, { "epoch": 7.490441932168551, "grad_norm": 0.03464813157916069, "learning_rate": 0.01, "loss": 1.9638, "step": 72882 }, { "epoch": 7.490750256937307, "grad_norm": 0.12053199857473373, "learning_rate": 0.01, "loss": 1.9694, "step": 72885 }, { "epoch": 7.4910585817060635, "grad_norm": 0.04944891110062599, "learning_rate": 0.01, "loss": 1.9826, "step": 72888 }, { "epoch": 7.49136690647482, "grad_norm": 0.12440962344408035, "learning_rate": 0.01, "loss": 1.9831, "step": 72891 }, { "epoch": 7.491675231243576, "grad_norm": 0.07274243980646133, "learning_rate": 0.01, "loss": 1.9887, "step": 72894 }, { "epoch": 7.491983556012333, "grad_norm": 0.041211891919374466, "learning_rate": 0.01, "loss": 1.9543, "step": 72897 }, { "epoch": 7.49229188078109, "grad_norm": 0.07639609277248383, "learning_rate": 0.01, "loss": 1.9498, "step": 72900 }, { "epoch": 7.492600205549846, "grad_norm": 0.05690251663327217, "learning_rate": 0.01, "loss": 1.9623, "step": 72903 }, { "epoch": 7.492908530318602, "grad_norm": 0.04439922422170639, "learning_rate": 0.01, "loss": 1.9661, "step": 72906 }, { "epoch": 7.493216855087359, "grad_norm": 0.03292795643210411, "learning_rate": 0.01, "loss": 1.9818, "step": 72909 }, { "epoch": 7.493525179856115, "grad_norm": 0.03560163453221321, "learning_rate": 0.01, "loss": 1.9894, "step": 72912 }, { "epoch": 7.493833504624871, "grad_norm": 0.09666794538497925, "learning_rate": 0.01, "loss": 1.9626, "step": 72915 }, { "epoch": 7.494141829393628, "grad_norm": 0.0834769606590271, "learning_rate": 0.01, "loss": 1.9853, "step": 72918 }, { "epoch": 7.494450154162385, "grad_norm": 0.04032580181956291, "learning_rate": 0.01, "loss": 1.9748, "step": 72921 }, { "epoch": 7.494758478931141, "grad_norm": 0.1358065903186798, "learning_rate": 0.01, "loss": 1.9707, "step": 72924 }, { "epoch": 7.495066803699897, "grad_norm": 0.07956533133983612, "learning_rate": 0.01, "loss": 1.9497, "step": 72927 }, { "epoch": 7.495375128468654, "grad_norm": 0.09689958393573761, "learning_rate": 0.01, "loss": 1.9639, "step": 72930 }, { "epoch": 7.49568345323741, "grad_norm": 0.09367521107196808, "learning_rate": 0.01, "loss": 1.9812, "step": 72933 }, { "epoch": 7.495991778006166, "grad_norm": 0.054803263396024704, "learning_rate": 0.01, "loss": 1.9808, "step": 72936 }, { "epoch": 7.4963001027749225, "grad_norm": 0.04972374439239502, "learning_rate": 0.01, "loss": 1.976, "step": 72939 }, { "epoch": 7.49660842754368, "grad_norm": 0.044981930404901505, "learning_rate": 0.01, "loss": 1.9535, "step": 72942 }, { "epoch": 7.496916752312436, "grad_norm": 0.03693649172782898, "learning_rate": 0.01, "loss": 1.9828, "step": 72945 }, { "epoch": 7.497225077081192, "grad_norm": 0.12305953353643417, "learning_rate": 0.01, "loss": 1.9923, "step": 72948 }, { "epoch": 7.497533401849949, "grad_norm": 0.10410351306200027, "learning_rate": 0.01, "loss": 1.9759, "step": 72951 }, { "epoch": 7.497841726618705, "grad_norm": 0.04615464434027672, "learning_rate": 0.01, "loss": 1.9798, "step": 72954 }, { "epoch": 7.498150051387461, "grad_norm": 0.06190275400876999, "learning_rate": 0.01, "loss": 1.9854, "step": 72957 }, { "epoch": 7.498458376156218, "grad_norm": 0.03822600841522217, "learning_rate": 0.01, "loss": 1.9758, "step": 72960 }, { "epoch": 7.498766700924975, "grad_norm": 0.06215745583176613, "learning_rate": 0.01, "loss": 1.9826, "step": 72963 }, { "epoch": 7.499075025693731, "grad_norm": 0.03885975107550621, "learning_rate": 0.01, "loss": 1.9608, "step": 72966 }, { "epoch": 7.499383350462487, "grad_norm": 0.04294813051819801, "learning_rate": 0.01, "loss": 1.99, "step": 72969 }, { "epoch": 7.499691675231244, "grad_norm": 0.04972092807292938, "learning_rate": 0.01, "loss": 1.9672, "step": 72972 }, { "epoch": 7.5, "grad_norm": 0.11483730375766754, "learning_rate": 0.01, "loss": 1.9492, "step": 72975 }, { "epoch": 7.500308324768756, "grad_norm": 0.09318023920059204, "learning_rate": 0.01, "loss": 1.967, "step": 72978 }, { "epoch": 7.500616649537513, "grad_norm": 0.08225825428962708, "learning_rate": 0.01, "loss": 1.9985, "step": 72981 }, { "epoch": 7.500924974306269, "grad_norm": 0.06477591395378113, "learning_rate": 0.01, "loss": 2.0082, "step": 72984 }, { "epoch": 7.501233299075025, "grad_norm": 0.03535725921392441, "learning_rate": 0.01, "loss": 1.9725, "step": 72987 }, { "epoch": 7.501541623843782, "grad_norm": 0.05080735310912132, "learning_rate": 0.01, "loss": 1.9835, "step": 72990 }, { "epoch": 7.501849948612539, "grad_norm": 0.0634625032544136, "learning_rate": 0.01, "loss": 1.9406, "step": 72993 }, { "epoch": 7.502158273381295, "grad_norm": 0.04936002939939499, "learning_rate": 0.01, "loss": 1.9436, "step": 72996 }, { "epoch": 7.502466598150051, "grad_norm": 0.055662546306848526, "learning_rate": 0.01, "loss": 1.9459, "step": 72999 }, { "epoch": 7.502774922918808, "grad_norm": 0.08707310259342194, "learning_rate": 0.01, "loss": 1.9947, "step": 73002 }, { "epoch": 7.503083247687564, "grad_norm": 0.0861874595284462, "learning_rate": 0.01, "loss": 1.9819, "step": 73005 }, { "epoch": 7.503391572456321, "grad_norm": 0.04461105167865753, "learning_rate": 0.01, "loss": 1.9803, "step": 73008 }, { "epoch": 7.5036998972250775, "grad_norm": 0.03987470641732216, "learning_rate": 0.01, "loss": 1.9855, "step": 73011 }, { "epoch": 7.504008221993834, "grad_norm": 0.03856007382273674, "learning_rate": 0.01, "loss": 1.9603, "step": 73014 }, { "epoch": 7.50431654676259, "grad_norm": 0.12175683677196503, "learning_rate": 0.01, "loss": 1.9612, "step": 73017 }, { "epoch": 7.504624871531346, "grad_norm": 0.04544226825237274, "learning_rate": 0.01, "loss": 1.9787, "step": 73020 }, { "epoch": 7.504933196300103, "grad_norm": 0.10112664103507996, "learning_rate": 0.01, "loss": 1.9999, "step": 73023 }, { "epoch": 7.505241521068859, "grad_norm": 0.0891900509595871, "learning_rate": 0.01, "loss": 1.9913, "step": 73026 }, { "epoch": 7.505549845837615, "grad_norm": 0.03948020190000534, "learning_rate": 0.01, "loss": 1.9645, "step": 73029 }, { "epoch": 7.505858170606372, "grad_norm": 0.041939374059438705, "learning_rate": 0.01, "loss": 1.9495, "step": 73032 }, { "epoch": 7.506166495375129, "grad_norm": 0.037538815289735794, "learning_rate": 0.01, "loss": 1.9467, "step": 73035 }, { "epoch": 7.506474820143885, "grad_norm": 0.03573056682944298, "learning_rate": 0.01, "loss": 1.9699, "step": 73038 }, { "epoch": 7.506783144912641, "grad_norm": 0.0640433058142662, "learning_rate": 0.01, "loss": 1.9808, "step": 73041 }, { "epoch": 7.507091469681398, "grad_norm": 0.1231018453836441, "learning_rate": 0.01, "loss": 1.9671, "step": 73044 }, { "epoch": 7.507399794450154, "grad_norm": 0.13998620212078094, "learning_rate": 0.01, "loss": 1.984, "step": 73047 }, { "epoch": 7.50770811921891, "grad_norm": 0.05272144079208374, "learning_rate": 0.01, "loss": 1.9795, "step": 73050 }, { "epoch": 7.508016443987667, "grad_norm": 0.06140724569559097, "learning_rate": 0.01, "loss": 1.9657, "step": 73053 }, { "epoch": 7.508324768756424, "grad_norm": 0.0687750056385994, "learning_rate": 0.01, "loss": 1.9726, "step": 73056 }, { "epoch": 7.50863309352518, "grad_norm": 0.06672102957963943, "learning_rate": 0.01, "loss": 1.992, "step": 73059 }, { "epoch": 7.5089414182939365, "grad_norm": 0.11446451395750046, "learning_rate": 0.01, "loss": 1.9866, "step": 73062 }, { "epoch": 7.509249743062693, "grad_norm": 0.05556824430823326, "learning_rate": 0.01, "loss": 1.9784, "step": 73065 }, { "epoch": 7.509558067831449, "grad_norm": 0.035088442265987396, "learning_rate": 0.01, "loss": 1.9691, "step": 73068 }, { "epoch": 7.509866392600205, "grad_norm": 0.06371704488992691, "learning_rate": 0.01, "loss": 1.9613, "step": 73071 }, { "epoch": 7.510174717368962, "grad_norm": 0.052248984575271606, "learning_rate": 0.01, "loss": 1.9643, "step": 73074 }, { "epoch": 7.510483042137718, "grad_norm": 0.04099287465214729, "learning_rate": 0.01, "loss": 1.9708, "step": 73077 }, { "epoch": 7.510791366906475, "grad_norm": 0.09975527226924896, "learning_rate": 0.01, "loss": 1.9906, "step": 73080 }, { "epoch": 7.5110996916752315, "grad_norm": 0.07624788582324982, "learning_rate": 0.01, "loss": 1.9525, "step": 73083 }, { "epoch": 7.511408016443988, "grad_norm": 0.07405281066894531, "learning_rate": 0.01, "loss": 1.9683, "step": 73086 }, { "epoch": 7.511716341212744, "grad_norm": 0.041836485266685486, "learning_rate": 0.01, "loss": 1.9779, "step": 73089 }, { "epoch": 7.5120246659815, "grad_norm": 0.043866775929927826, "learning_rate": 0.01, "loss": 1.948, "step": 73092 }, { "epoch": 7.512332990750257, "grad_norm": 0.032251179218292236, "learning_rate": 0.01, "loss": 1.9576, "step": 73095 }, { "epoch": 7.512641315519013, "grad_norm": 0.11554509401321411, "learning_rate": 0.01, "loss": 1.9473, "step": 73098 }, { "epoch": 7.51294964028777, "grad_norm": 0.06754744052886963, "learning_rate": 0.01, "loss": 1.9841, "step": 73101 }, { "epoch": 7.5132579650565265, "grad_norm": 0.09699345380067825, "learning_rate": 0.01, "loss": 2.0038, "step": 73104 }, { "epoch": 7.513566289825283, "grad_norm": 0.08807633817195892, "learning_rate": 0.01, "loss": 1.9804, "step": 73107 }, { "epoch": 7.513874614594039, "grad_norm": 0.037080902606248856, "learning_rate": 0.01, "loss": 1.9735, "step": 73110 }, { "epoch": 7.5141829393627955, "grad_norm": 0.06916693598031998, "learning_rate": 0.01, "loss": 1.9727, "step": 73113 }, { "epoch": 7.514491264131552, "grad_norm": 0.07650911808013916, "learning_rate": 0.01, "loss": 1.961, "step": 73116 }, { "epoch": 7.514799588900308, "grad_norm": 0.09734641015529633, "learning_rate": 0.01, "loss": 1.9825, "step": 73119 }, { "epoch": 7.515107913669064, "grad_norm": 0.11928999423980713, "learning_rate": 0.01, "loss": 1.9793, "step": 73122 }, { "epoch": 7.515416238437821, "grad_norm": 0.07260621339082718, "learning_rate": 0.01, "loss": 1.9715, "step": 73125 }, { "epoch": 7.515724563206578, "grad_norm": 0.046437349170446396, "learning_rate": 0.01, "loss": 1.9826, "step": 73128 }, { "epoch": 7.516032887975334, "grad_norm": 0.06214399263262749, "learning_rate": 0.01, "loss": 1.9769, "step": 73131 }, { "epoch": 7.5163412127440905, "grad_norm": 0.051302261650562286, "learning_rate": 0.01, "loss": 1.9769, "step": 73134 }, { "epoch": 7.516649537512847, "grad_norm": 0.041365306824445724, "learning_rate": 0.01, "loss": 1.9425, "step": 73137 }, { "epoch": 7.516957862281603, "grad_norm": 0.044153954833745956, "learning_rate": 0.01, "loss": 1.9508, "step": 73140 }, { "epoch": 7.517266187050359, "grad_norm": 0.10011675208806992, "learning_rate": 0.01, "loss": 1.9704, "step": 73143 }, { "epoch": 7.517574511819117, "grad_norm": 0.05302093178033829, "learning_rate": 0.01, "loss": 1.9906, "step": 73146 }, { "epoch": 7.517882836587873, "grad_norm": 0.0799306333065033, "learning_rate": 0.01, "loss": 1.9653, "step": 73149 }, { "epoch": 7.518191161356629, "grad_norm": 0.0636538714170456, "learning_rate": 0.01, "loss": 1.9621, "step": 73152 }, { "epoch": 7.5184994861253855, "grad_norm": 0.05640498548746109, "learning_rate": 0.01, "loss": 1.9918, "step": 73155 }, { "epoch": 7.518807810894142, "grad_norm": 0.09565453976392746, "learning_rate": 0.01, "loss": 1.9424, "step": 73158 }, { "epoch": 7.519116135662898, "grad_norm": 0.04038292542099953, "learning_rate": 0.01, "loss": 1.9496, "step": 73161 }, { "epoch": 7.5194244604316545, "grad_norm": 0.06211680546402931, "learning_rate": 0.01, "loss": 1.974, "step": 73164 }, { "epoch": 7.519732785200411, "grad_norm": 0.13773837685585022, "learning_rate": 0.01, "loss": 1.9706, "step": 73167 }, { "epoch": 7.520041109969167, "grad_norm": 0.04517769068479538, "learning_rate": 0.01, "loss": 1.9873, "step": 73170 }, { "epoch": 7.520349434737924, "grad_norm": 0.06648048013448715, "learning_rate": 0.01, "loss": 1.9665, "step": 73173 }, { "epoch": 7.520657759506681, "grad_norm": 0.05798725783824921, "learning_rate": 0.01, "loss": 1.9712, "step": 73176 }, { "epoch": 7.520966084275437, "grad_norm": 0.07782495766878128, "learning_rate": 0.01, "loss": 1.979, "step": 73179 }, { "epoch": 7.521274409044193, "grad_norm": 0.06339671462774277, "learning_rate": 0.01, "loss": 1.9505, "step": 73182 }, { "epoch": 7.5215827338129495, "grad_norm": 0.04342902451753616, "learning_rate": 0.01, "loss": 2.006, "step": 73185 }, { "epoch": 7.521891058581706, "grad_norm": 0.054694198071956635, "learning_rate": 0.01, "loss": 1.946, "step": 73188 }, { "epoch": 7.522199383350462, "grad_norm": 0.058501482009887695, "learning_rate": 0.01, "loss": 1.9744, "step": 73191 }, { "epoch": 7.522507708119219, "grad_norm": 0.06266117095947266, "learning_rate": 0.01, "loss": 1.9615, "step": 73194 }, { "epoch": 7.522816032887976, "grad_norm": 0.07241412997245789, "learning_rate": 0.01, "loss": 1.9572, "step": 73197 }, { "epoch": 7.523124357656732, "grad_norm": 0.06054093688726425, "learning_rate": 0.01, "loss": 1.9449, "step": 73200 }, { "epoch": 7.523432682425488, "grad_norm": 0.03412376344203949, "learning_rate": 0.01, "loss": 1.9492, "step": 73203 }, { "epoch": 7.5237410071942445, "grad_norm": 0.05688127875328064, "learning_rate": 0.01, "loss": 1.9952, "step": 73206 }, { "epoch": 7.524049331963001, "grad_norm": 0.048515647649765015, "learning_rate": 0.01, "loss": 1.9771, "step": 73209 }, { "epoch": 7.524357656731757, "grad_norm": 0.06496395915746689, "learning_rate": 0.01, "loss": 1.9967, "step": 73212 }, { "epoch": 7.5246659815005135, "grad_norm": 0.10364048182964325, "learning_rate": 0.01, "loss": 1.9803, "step": 73215 }, { "epoch": 7.524974306269271, "grad_norm": 0.15099607408046722, "learning_rate": 0.01, "loss": 1.9557, "step": 73218 }, { "epoch": 7.525282631038027, "grad_norm": 0.059038564562797546, "learning_rate": 0.01, "loss": 1.9834, "step": 73221 }, { "epoch": 7.525590955806783, "grad_norm": 0.056402787566185, "learning_rate": 0.01, "loss": 1.9756, "step": 73224 }, { "epoch": 7.52589928057554, "grad_norm": 0.07999878376722336, "learning_rate": 0.01, "loss": 1.9741, "step": 73227 }, { "epoch": 7.526207605344296, "grad_norm": 0.1127920150756836, "learning_rate": 0.01, "loss": 1.9853, "step": 73230 }, { "epoch": 7.526515930113052, "grad_norm": 0.058007560670375824, "learning_rate": 0.01, "loss": 1.9782, "step": 73233 }, { "epoch": 7.5268242548818085, "grad_norm": 0.039497777819633484, "learning_rate": 0.01, "loss": 1.9621, "step": 73236 }, { "epoch": 7.527132579650566, "grad_norm": 0.033727843314409256, "learning_rate": 0.01, "loss": 1.9974, "step": 73239 }, { "epoch": 7.527440904419322, "grad_norm": 0.04165032133460045, "learning_rate": 0.01, "loss": 1.9831, "step": 73242 }, { "epoch": 7.527749229188078, "grad_norm": 0.06078481674194336, "learning_rate": 0.01, "loss": 1.9629, "step": 73245 }, { "epoch": 7.528057553956835, "grad_norm": 0.09847116470336914, "learning_rate": 0.01, "loss": 1.9629, "step": 73248 }, { "epoch": 7.528365878725591, "grad_norm": 0.1251547634601593, "learning_rate": 0.01, "loss": 1.9722, "step": 73251 }, { "epoch": 7.528674203494347, "grad_norm": 0.09058269113302231, "learning_rate": 0.01, "loss": 1.9614, "step": 73254 }, { "epoch": 7.5289825282631035, "grad_norm": 0.10906793177127838, "learning_rate": 0.01, "loss": 1.9651, "step": 73257 }, { "epoch": 7.52929085303186, "grad_norm": 0.03927643597126007, "learning_rate": 0.01, "loss": 1.997, "step": 73260 }, { "epoch": 7.529599177800616, "grad_norm": 0.041548848152160645, "learning_rate": 0.01, "loss": 1.9708, "step": 73263 }, { "epoch": 7.529907502569373, "grad_norm": 0.03454883396625519, "learning_rate": 0.01, "loss": 1.9471, "step": 73266 }, { "epoch": 7.53021582733813, "grad_norm": 0.03971594572067261, "learning_rate": 0.01, "loss": 2.001, "step": 73269 }, { "epoch": 7.530524152106886, "grad_norm": 0.15191693603992462, "learning_rate": 0.01, "loss": 1.9626, "step": 73272 }, { "epoch": 7.530832476875642, "grad_norm": 0.051092084497213364, "learning_rate": 0.01, "loss": 1.9725, "step": 73275 }, { "epoch": 7.531140801644399, "grad_norm": 0.06430649757385254, "learning_rate": 0.01, "loss": 1.99, "step": 73278 }, { "epoch": 7.531449126413155, "grad_norm": 0.06463275849819183, "learning_rate": 0.01, "loss": 1.9731, "step": 73281 }, { "epoch": 7.531757451181912, "grad_norm": 0.037970978766679764, "learning_rate": 0.01, "loss": 1.9578, "step": 73284 }, { "epoch": 7.532065775950668, "grad_norm": 0.052109669893980026, "learning_rate": 0.01, "loss": 1.9831, "step": 73287 }, { "epoch": 7.532374100719425, "grad_norm": 0.04990661144256592, "learning_rate": 0.01, "loss": 1.9917, "step": 73290 }, { "epoch": 7.532682425488181, "grad_norm": 0.08009975403547287, "learning_rate": 0.01, "loss": 1.9576, "step": 73293 }, { "epoch": 7.532990750256937, "grad_norm": 0.05050569027662277, "learning_rate": 0.01, "loss": 1.9646, "step": 73296 }, { "epoch": 7.533299075025694, "grad_norm": 0.07872025668621063, "learning_rate": 0.01, "loss": 1.9649, "step": 73299 }, { "epoch": 7.53360739979445, "grad_norm": 0.04549863189458847, "learning_rate": 0.01, "loss": 1.9707, "step": 73302 }, { "epoch": 7.533915724563206, "grad_norm": 0.10252397507429123, "learning_rate": 0.01, "loss": 1.9803, "step": 73305 }, { "epoch": 7.5342240493319625, "grad_norm": 0.06999849528074265, "learning_rate": 0.01, "loss": 1.9737, "step": 73308 }, { "epoch": 7.53453237410072, "grad_norm": 0.062004949897527695, "learning_rate": 0.01, "loss": 1.9735, "step": 73311 }, { "epoch": 7.534840698869476, "grad_norm": 0.10107864439487457, "learning_rate": 0.01, "loss": 1.9569, "step": 73314 }, { "epoch": 7.535149023638232, "grad_norm": 0.042918670922517776, "learning_rate": 0.01, "loss": 1.9623, "step": 73317 }, { "epoch": 7.535457348406989, "grad_norm": 0.11060534417629242, "learning_rate": 0.01, "loss": 1.9698, "step": 73320 }, { "epoch": 7.535765673175745, "grad_norm": 0.033137235790491104, "learning_rate": 0.01, "loss": 1.9751, "step": 73323 }, { "epoch": 7.536073997944501, "grad_norm": 0.07771358639001846, "learning_rate": 0.01, "loss": 1.9653, "step": 73326 }, { "epoch": 7.536382322713258, "grad_norm": 0.05748758465051651, "learning_rate": 0.01, "loss": 1.965, "step": 73329 }, { "epoch": 7.536690647482015, "grad_norm": 0.06228995695710182, "learning_rate": 0.01, "loss": 1.9647, "step": 73332 }, { "epoch": 7.536998972250771, "grad_norm": 0.06400372087955475, "learning_rate": 0.01, "loss": 1.9905, "step": 73335 }, { "epoch": 7.537307297019527, "grad_norm": 0.11260011047124863, "learning_rate": 0.01, "loss": 1.9794, "step": 73338 }, { "epoch": 7.537615621788284, "grad_norm": 0.052524223923683167, "learning_rate": 0.01, "loss": 1.9624, "step": 73341 }, { "epoch": 7.53792394655704, "grad_norm": 0.07686839252710342, "learning_rate": 0.01, "loss": 1.9593, "step": 73344 }, { "epoch": 7.538232271325796, "grad_norm": 0.11364465206861496, "learning_rate": 0.01, "loss": 1.9736, "step": 73347 }, { "epoch": 7.538540596094553, "grad_norm": 0.06682565808296204, "learning_rate": 0.01, "loss": 1.9539, "step": 73350 }, { "epoch": 7.538848920863309, "grad_norm": 0.04542539268732071, "learning_rate": 0.01, "loss": 1.9923, "step": 73353 }, { "epoch": 7.539157245632066, "grad_norm": 0.0473509319126606, "learning_rate": 0.01, "loss": 1.9594, "step": 73356 }, { "epoch": 7.539465570400822, "grad_norm": 0.04453996196389198, "learning_rate": 0.01, "loss": 1.9388, "step": 73359 }, { "epoch": 7.539773895169579, "grad_norm": 0.03908911719918251, "learning_rate": 0.01, "loss": 1.9751, "step": 73362 }, { "epoch": 7.540082219938335, "grad_norm": 0.051413651555776596, "learning_rate": 0.01, "loss": 1.9819, "step": 73365 }, { "epoch": 7.540390544707091, "grad_norm": 0.053165409713983536, "learning_rate": 0.01, "loss": 1.9597, "step": 73368 }, { "epoch": 7.540698869475848, "grad_norm": 0.049723848700523376, "learning_rate": 0.01, "loss": 1.9738, "step": 73371 }, { "epoch": 7.541007194244604, "grad_norm": 0.06980393826961517, "learning_rate": 0.01, "loss": 1.9603, "step": 73374 }, { "epoch": 7.541315519013361, "grad_norm": 0.09864058345556259, "learning_rate": 0.01, "loss": 1.9864, "step": 73377 }, { "epoch": 7.5416238437821175, "grad_norm": 0.0965089350938797, "learning_rate": 0.01, "loss": 1.9686, "step": 73380 }, { "epoch": 7.541932168550874, "grad_norm": 0.09559361636638641, "learning_rate": 0.01, "loss": 1.9617, "step": 73383 }, { "epoch": 7.54224049331963, "grad_norm": 0.03276180103421211, "learning_rate": 0.01, "loss": 1.9362, "step": 73386 }, { "epoch": 7.542548818088386, "grad_norm": 0.07006944715976715, "learning_rate": 0.01, "loss": 1.9783, "step": 73389 }, { "epoch": 7.542857142857143, "grad_norm": 0.09948286414146423, "learning_rate": 0.01, "loss": 1.9674, "step": 73392 }, { "epoch": 7.543165467625899, "grad_norm": 0.09028782695531845, "learning_rate": 0.01, "loss": 1.9815, "step": 73395 }, { "epoch": 7.543473792394655, "grad_norm": 0.06747119873762131, "learning_rate": 0.01, "loss": 1.9929, "step": 73398 }, { "epoch": 7.5437821171634125, "grad_norm": 0.09048298001289368, "learning_rate": 0.01, "loss": 1.9508, "step": 73401 }, { "epoch": 7.544090441932169, "grad_norm": 0.03283631429076195, "learning_rate": 0.01, "loss": 1.9762, "step": 73404 }, { "epoch": 7.544398766700925, "grad_norm": 0.04949327930808067, "learning_rate": 0.01, "loss": 1.9652, "step": 73407 }, { "epoch": 7.544707091469681, "grad_norm": 0.07692143321037292, "learning_rate": 0.01, "loss": 1.9514, "step": 73410 }, { "epoch": 7.545015416238438, "grad_norm": 0.039769385010004044, "learning_rate": 0.01, "loss": 2.0088, "step": 73413 }, { "epoch": 7.545323741007194, "grad_norm": 0.05932982638478279, "learning_rate": 0.01, "loss": 1.9692, "step": 73416 }, { "epoch": 7.54563206577595, "grad_norm": 0.06630944460630417, "learning_rate": 0.01, "loss": 1.9734, "step": 73419 }, { "epoch": 7.5459403905447076, "grad_norm": 0.05466363951563835, "learning_rate": 0.01, "loss": 1.9676, "step": 73422 }, { "epoch": 7.546248715313464, "grad_norm": 0.04922345280647278, "learning_rate": 0.01, "loss": 1.9676, "step": 73425 }, { "epoch": 7.54655704008222, "grad_norm": 0.056301772594451904, "learning_rate": 0.01, "loss": 1.951, "step": 73428 }, { "epoch": 7.5468653648509765, "grad_norm": 0.05948075279593468, "learning_rate": 0.01, "loss": 1.9541, "step": 73431 }, { "epoch": 7.547173689619733, "grad_norm": 0.07484395802021027, "learning_rate": 0.01, "loss": 1.9645, "step": 73434 }, { "epoch": 7.547482014388489, "grad_norm": 0.055150918662548065, "learning_rate": 0.01, "loss": 1.9608, "step": 73437 }, { "epoch": 7.547790339157245, "grad_norm": 0.0486358217895031, "learning_rate": 0.01, "loss": 1.9858, "step": 73440 }, { "epoch": 7.548098663926002, "grad_norm": 0.058294083923101425, "learning_rate": 0.01, "loss": 1.9549, "step": 73443 }, { "epoch": 7.548406988694758, "grad_norm": 0.034051090478897095, "learning_rate": 0.01, "loss": 1.9681, "step": 73446 }, { "epoch": 7.548715313463515, "grad_norm": 0.04121851176023483, "learning_rate": 0.01, "loss": 1.9947, "step": 73449 }, { "epoch": 7.5490236382322715, "grad_norm": 0.05448736622929573, "learning_rate": 0.01, "loss": 1.9567, "step": 73452 }, { "epoch": 7.549331963001028, "grad_norm": 0.12663137912750244, "learning_rate": 0.01, "loss": 1.9877, "step": 73455 }, { "epoch": 7.549640287769784, "grad_norm": 0.07261858880519867, "learning_rate": 0.01, "loss": 1.9765, "step": 73458 }, { "epoch": 7.54994861253854, "grad_norm": 0.04712305963039398, "learning_rate": 0.01, "loss": 1.9818, "step": 73461 }, { "epoch": 7.550256937307297, "grad_norm": 0.1148359552025795, "learning_rate": 0.01, "loss": 1.9743, "step": 73464 }, { "epoch": 7.550565262076054, "grad_norm": 0.03637329116463661, "learning_rate": 0.01, "loss": 1.9816, "step": 73467 }, { "epoch": 7.55087358684481, "grad_norm": 0.03120165877044201, "learning_rate": 0.01, "loss": 1.9856, "step": 73470 }, { "epoch": 7.5511819116135666, "grad_norm": 0.03387007117271423, "learning_rate": 0.01, "loss": 1.9721, "step": 73473 }, { "epoch": 7.551490236382323, "grad_norm": 0.10267225652933121, "learning_rate": 0.01, "loss": 1.9922, "step": 73476 }, { "epoch": 7.551798561151079, "grad_norm": 0.0743681788444519, "learning_rate": 0.01, "loss": 1.9559, "step": 73479 }, { "epoch": 7.5521068859198355, "grad_norm": 0.041468244045972824, "learning_rate": 0.01, "loss": 1.9778, "step": 73482 }, { "epoch": 7.552415210688592, "grad_norm": 0.03812147676944733, "learning_rate": 0.01, "loss": 1.9556, "step": 73485 }, { "epoch": 7.552723535457348, "grad_norm": 0.046897489577531815, "learning_rate": 0.01, "loss": 1.9654, "step": 73488 }, { "epoch": 7.553031860226104, "grad_norm": 0.043389540165662766, "learning_rate": 0.01, "loss": 1.9916, "step": 73491 }, { "epoch": 7.553340184994862, "grad_norm": 0.04722335562109947, "learning_rate": 0.01, "loss": 1.9354, "step": 73494 }, { "epoch": 7.553648509763618, "grad_norm": 0.03955325484275818, "learning_rate": 0.01, "loss": 1.9831, "step": 73497 }, { "epoch": 7.553956834532374, "grad_norm": 0.11962294578552246, "learning_rate": 0.01, "loss": 1.965, "step": 73500 }, { "epoch": 7.5542651593011305, "grad_norm": 0.03522321209311485, "learning_rate": 0.01, "loss": 1.9779, "step": 73503 }, { "epoch": 7.554573484069887, "grad_norm": 0.13331198692321777, "learning_rate": 0.01, "loss": 1.9835, "step": 73506 }, { "epoch": 7.554881808838643, "grad_norm": 0.05670026317238808, "learning_rate": 0.01, "loss": 1.9658, "step": 73509 }, { "epoch": 7.555190133607399, "grad_norm": 0.05296124890446663, "learning_rate": 0.01, "loss": 1.9736, "step": 73512 }, { "epoch": 7.555498458376157, "grad_norm": 0.06895041465759277, "learning_rate": 0.01, "loss": 1.9819, "step": 73515 }, { "epoch": 7.555806783144913, "grad_norm": 0.06345079094171524, "learning_rate": 0.01, "loss": 1.9764, "step": 73518 }, { "epoch": 7.556115107913669, "grad_norm": 0.055405255407094955, "learning_rate": 0.01, "loss": 1.9896, "step": 73521 }, { "epoch": 7.556423432682426, "grad_norm": 0.053103215992450714, "learning_rate": 0.01, "loss": 1.9693, "step": 73524 }, { "epoch": 7.556731757451182, "grad_norm": 0.04727816581726074, "learning_rate": 0.01, "loss": 1.9461, "step": 73527 }, { "epoch": 7.557040082219938, "grad_norm": 0.059660717844963074, "learning_rate": 0.01, "loss": 1.9752, "step": 73530 }, { "epoch": 7.5573484069886945, "grad_norm": 0.05450676754117012, "learning_rate": 0.01, "loss": 1.9689, "step": 73533 }, { "epoch": 7.557656731757451, "grad_norm": 0.06650180369615555, "learning_rate": 0.01, "loss": 1.9922, "step": 73536 }, { "epoch": 7.557965056526208, "grad_norm": 0.08162131160497665, "learning_rate": 0.01, "loss": 1.9631, "step": 73539 }, { "epoch": 7.558273381294964, "grad_norm": 0.06207094341516495, "learning_rate": 0.01, "loss": 1.9671, "step": 73542 }, { "epoch": 7.558581706063721, "grad_norm": 0.07756184041500092, "learning_rate": 0.01, "loss": 1.958, "step": 73545 }, { "epoch": 7.558890030832477, "grad_norm": 0.036559395492076874, "learning_rate": 0.01, "loss": 1.9892, "step": 73548 }, { "epoch": 7.559198355601233, "grad_norm": 0.03526364639401436, "learning_rate": 0.01, "loss": 1.9849, "step": 73551 }, { "epoch": 7.5595066803699895, "grad_norm": 0.05811254307627678, "learning_rate": 0.01, "loss": 1.9813, "step": 73554 }, { "epoch": 7.559815005138746, "grad_norm": 0.10756142437458038, "learning_rate": 0.01, "loss": 1.9822, "step": 73557 }, { "epoch": 7.560123329907503, "grad_norm": 0.06948500126600266, "learning_rate": 0.01, "loss": 2.0019, "step": 73560 }, { "epoch": 7.560431654676259, "grad_norm": 0.04926348850131035, "learning_rate": 0.01, "loss": 1.9834, "step": 73563 }, { "epoch": 7.560739979445016, "grad_norm": 0.039930544793605804, "learning_rate": 0.01, "loss": 1.9662, "step": 73566 }, { "epoch": 7.561048304213772, "grad_norm": 0.03368406742811203, "learning_rate": 0.01, "loss": 1.9963, "step": 73569 }, { "epoch": 7.561356628982528, "grad_norm": 0.05603760853409767, "learning_rate": 0.01, "loss": 1.9768, "step": 73572 }, { "epoch": 7.561664953751285, "grad_norm": 0.1923237293958664, "learning_rate": 0.01, "loss": 1.9805, "step": 73575 }, { "epoch": 7.561973278520041, "grad_norm": 0.09853487461805344, "learning_rate": 0.01, "loss": 1.9746, "step": 73578 }, { "epoch": 7.562281603288797, "grad_norm": 0.0466923862695694, "learning_rate": 0.01, "loss": 1.9709, "step": 73581 }, { "epoch": 7.5625899280575535, "grad_norm": 0.06811875104904175, "learning_rate": 0.01, "loss": 1.9414, "step": 73584 }, { "epoch": 7.562898252826311, "grad_norm": 0.07578512281179428, "learning_rate": 0.01, "loss": 1.9863, "step": 73587 }, { "epoch": 7.563206577595067, "grad_norm": 0.056927092373371124, "learning_rate": 0.01, "loss": 1.9809, "step": 73590 }, { "epoch": 7.563514902363823, "grad_norm": 0.04470854252576828, "learning_rate": 0.01, "loss": 1.9731, "step": 73593 }, { "epoch": 7.56382322713258, "grad_norm": 0.0404474139213562, "learning_rate": 0.01, "loss": 1.9882, "step": 73596 }, { "epoch": 7.564131551901336, "grad_norm": 0.042724210768938065, "learning_rate": 0.01, "loss": 1.9826, "step": 73599 }, { "epoch": 7.564439876670092, "grad_norm": 0.05680299550294876, "learning_rate": 0.01, "loss": 1.945, "step": 73602 }, { "epoch": 7.564748201438849, "grad_norm": 0.10340821743011475, "learning_rate": 0.01, "loss": 1.9967, "step": 73605 }, { "epoch": 7.565056526207606, "grad_norm": 0.1016075536608696, "learning_rate": 0.01, "loss": 1.9736, "step": 73608 }, { "epoch": 7.565364850976362, "grad_norm": 0.0473574735224247, "learning_rate": 0.01, "loss": 1.9647, "step": 73611 }, { "epoch": 7.565673175745118, "grad_norm": 0.06154468655586243, "learning_rate": 0.01, "loss": 1.9768, "step": 73614 }, { "epoch": 7.565981500513875, "grad_norm": 0.04448488727211952, "learning_rate": 0.01, "loss": 1.9678, "step": 73617 }, { "epoch": 7.566289825282631, "grad_norm": 0.039563100785017014, "learning_rate": 0.01, "loss": 1.977, "step": 73620 }, { "epoch": 7.566598150051387, "grad_norm": 0.038476377725601196, "learning_rate": 0.01, "loss": 1.9808, "step": 73623 }, { "epoch": 7.566906474820144, "grad_norm": 0.04110477492213249, "learning_rate": 0.01, "loss": 1.9707, "step": 73626 }, { "epoch": 7.5672147995889, "grad_norm": 0.057609956711530685, "learning_rate": 0.01, "loss": 1.9846, "step": 73629 }, { "epoch": 7.567523124357657, "grad_norm": 0.03673383593559265, "learning_rate": 0.01, "loss": 1.9838, "step": 73632 }, { "epoch": 7.567831449126413, "grad_norm": 0.05421116575598717, "learning_rate": 0.01, "loss": 1.9909, "step": 73635 }, { "epoch": 7.56813977389517, "grad_norm": 0.09983667731285095, "learning_rate": 0.01, "loss": 1.9663, "step": 73638 }, { "epoch": 7.568448098663926, "grad_norm": 0.06927699595689774, "learning_rate": 0.01, "loss": 1.968, "step": 73641 }, { "epoch": 7.568756423432682, "grad_norm": 0.10717348009347916, "learning_rate": 0.01, "loss": 1.9785, "step": 73644 }, { "epoch": 7.569064748201439, "grad_norm": 0.11684148758649826, "learning_rate": 0.01, "loss": 1.9942, "step": 73647 }, { "epoch": 7.569373072970195, "grad_norm": 0.07459786534309387, "learning_rate": 0.01, "loss": 1.9593, "step": 73650 }, { "epoch": 7.569681397738952, "grad_norm": 0.07895784080028534, "learning_rate": 0.01, "loss": 1.9763, "step": 73653 }, { "epoch": 7.569989722507708, "grad_norm": 0.03824291005730629, "learning_rate": 0.01, "loss": 1.9791, "step": 73656 }, { "epoch": 7.570298047276465, "grad_norm": 0.037501122802495956, "learning_rate": 0.01, "loss": 1.9517, "step": 73659 }, { "epoch": 7.570606372045221, "grad_norm": 0.04228674992918968, "learning_rate": 0.01, "loss": 1.9702, "step": 73662 }, { "epoch": 7.570914696813977, "grad_norm": 0.04469291865825653, "learning_rate": 0.01, "loss": 1.9681, "step": 73665 }, { "epoch": 7.571223021582734, "grad_norm": 0.053544167429208755, "learning_rate": 0.01, "loss": 1.9819, "step": 73668 }, { "epoch": 7.57153134635149, "grad_norm": 0.052348144352436066, "learning_rate": 0.01, "loss": 1.9457, "step": 73671 }, { "epoch": 7.571839671120246, "grad_norm": 0.12371430546045303, "learning_rate": 0.01, "loss": 1.9635, "step": 73674 }, { "epoch": 7.5721479958890034, "grad_norm": 0.048446524888277054, "learning_rate": 0.01, "loss": 1.9418, "step": 73677 }, { "epoch": 7.57245632065776, "grad_norm": 0.049632761627435684, "learning_rate": 0.01, "loss": 1.9756, "step": 73680 }, { "epoch": 7.572764645426516, "grad_norm": 0.053510334342718124, "learning_rate": 0.01, "loss": 1.9751, "step": 73683 }, { "epoch": 7.573072970195272, "grad_norm": 0.0622355155646801, "learning_rate": 0.01, "loss": 1.9795, "step": 73686 }, { "epoch": 7.573381294964029, "grad_norm": 0.046275582164525986, "learning_rate": 0.01, "loss": 1.9884, "step": 73689 }, { "epoch": 7.573689619732785, "grad_norm": 0.07243076711893082, "learning_rate": 0.01, "loss": 1.9673, "step": 73692 }, { "epoch": 7.573997944501541, "grad_norm": 0.07151640206575394, "learning_rate": 0.01, "loss": 1.9618, "step": 73695 }, { "epoch": 7.5743062692702985, "grad_norm": 0.07176624238491058, "learning_rate": 0.01, "loss": 1.9396, "step": 73698 }, { "epoch": 7.574614594039055, "grad_norm": 0.06733313947916031, "learning_rate": 0.01, "loss": 1.9529, "step": 73701 }, { "epoch": 7.574922918807811, "grad_norm": 0.07993333786725998, "learning_rate": 0.01, "loss": 1.9664, "step": 73704 }, { "epoch": 7.575231243576567, "grad_norm": 0.11807093769311905, "learning_rate": 0.01, "loss": 1.9713, "step": 73707 }, { "epoch": 7.575539568345324, "grad_norm": 0.07152784615755081, "learning_rate": 0.01, "loss": 1.9448, "step": 73710 }, { "epoch": 7.57584789311408, "grad_norm": 0.05039028450846672, "learning_rate": 0.01, "loss": 1.9858, "step": 73713 }, { "epoch": 7.576156217882836, "grad_norm": 0.0637911856174469, "learning_rate": 0.01, "loss": 1.9794, "step": 73716 }, { "epoch": 7.576464542651593, "grad_norm": 0.04242941737174988, "learning_rate": 0.01, "loss": 1.9661, "step": 73719 }, { "epoch": 7.576772867420349, "grad_norm": 0.053346943110227585, "learning_rate": 0.01, "loss": 1.9545, "step": 73722 }, { "epoch": 7.577081192189106, "grad_norm": 0.03864128142595291, "learning_rate": 0.01, "loss": 1.9722, "step": 73725 }, { "epoch": 7.5773895169578624, "grad_norm": 0.052774764597415924, "learning_rate": 0.01, "loss": 1.9892, "step": 73728 }, { "epoch": 7.577697841726619, "grad_norm": 0.036650512367486954, "learning_rate": 0.01, "loss": 1.9637, "step": 73731 }, { "epoch": 7.578006166495375, "grad_norm": 0.10396769642829895, "learning_rate": 0.01, "loss": 1.9715, "step": 73734 }, { "epoch": 7.578314491264131, "grad_norm": 0.0873318612575531, "learning_rate": 0.01, "loss": 1.9412, "step": 73737 }, { "epoch": 7.578622816032888, "grad_norm": 0.044649526476860046, "learning_rate": 0.01, "loss": 1.9894, "step": 73740 }, { "epoch": 7.578931140801645, "grad_norm": 0.04690408334136009, "learning_rate": 0.01, "loss": 1.9563, "step": 73743 }, { "epoch": 7.579239465570401, "grad_norm": 0.03519474342465401, "learning_rate": 0.01, "loss": 1.9837, "step": 73746 }, { "epoch": 7.5795477903391575, "grad_norm": 0.05188727751374245, "learning_rate": 0.01, "loss": 1.9754, "step": 73749 }, { "epoch": 7.579856115107914, "grad_norm": 0.04967820271849632, "learning_rate": 0.01, "loss": 1.9677, "step": 73752 }, { "epoch": 7.58016443987667, "grad_norm": 0.03920558840036392, "learning_rate": 0.01, "loss": 1.9526, "step": 73755 }, { "epoch": 7.580472764645426, "grad_norm": 0.0493493489921093, "learning_rate": 0.01, "loss": 1.9558, "step": 73758 }, { "epoch": 7.580781089414183, "grad_norm": 0.03327930346131325, "learning_rate": 0.01, "loss": 1.9594, "step": 73761 }, { "epoch": 7.581089414182939, "grad_norm": 0.041295699775218964, "learning_rate": 0.01, "loss": 1.9951, "step": 73764 }, { "epoch": 7.581397738951695, "grad_norm": 0.10000699013471603, "learning_rate": 0.01, "loss": 1.9685, "step": 73767 }, { "epoch": 7.5817060637204525, "grad_norm": 0.06557929515838623, "learning_rate": 0.01, "loss": 1.9688, "step": 73770 }, { "epoch": 7.582014388489209, "grad_norm": 0.06633325666189194, "learning_rate": 0.01, "loss": 1.9382, "step": 73773 }, { "epoch": 7.582322713257965, "grad_norm": 0.08267974108457565, "learning_rate": 0.01, "loss": 1.9791, "step": 73776 }, { "epoch": 7.5826310380267214, "grad_norm": 0.0816752165555954, "learning_rate": 0.01, "loss": 1.9768, "step": 73779 }, { "epoch": 7.582939362795478, "grad_norm": 0.03547079861164093, "learning_rate": 0.01, "loss": 1.997, "step": 73782 }, { "epoch": 7.583247687564234, "grad_norm": 0.09666965156793594, "learning_rate": 0.01, "loss": 2.0044, "step": 73785 }, { "epoch": 7.58355601233299, "grad_norm": 0.056526027619838715, "learning_rate": 0.01, "loss": 2.0003, "step": 73788 }, { "epoch": 7.583864337101748, "grad_norm": 0.04913655295968056, "learning_rate": 0.01, "loss": 1.9375, "step": 73791 }, { "epoch": 7.584172661870504, "grad_norm": 0.10376237332820892, "learning_rate": 0.01, "loss": 1.9721, "step": 73794 }, { "epoch": 7.58448098663926, "grad_norm": 0.09046986699104309, "learning_rate": 0.01, "loss": 1.9953, "step": 73797 }, { "epoch": 7.5847893114080165, "grad_norm": 0.03161385655403137, "learning_rate": 0.01, "loss": 1.9602, "step": 73800 }, { "epoch": 7.585097636176773, "grad_norm": 0.0820087417960167, "learning_rate": 0.01, "loss": 1.9976, "step": 73803 }, { "epoch": 7.585405960945529, "grad_norm": 0.04556608945131302, "learning_rate": 0.01, "loss": 1.9879, "step": 73806 }, { "epoch": 7.585714285714285, "grad_norm": 0.03845854103565216, "learning_rate": 0.01, "loss": 1.9725, "step": 73809 }, { "epoch": 7.586022610483042, "grad_norm": 0.04036925733089447, "learning_rate": 0.01, "loss": 1.965, "step": 73812 }, { "epoch": 7.586330935251799, "grad_norm": 0.12400572001934052, "learning_rate": 0.01, "loss": 1.9944, "step": 73815 }, { "epoch": 7.586639260020555, "grad_norm": 0.11692599207162857, "learning_rate": 0.01, "loss": 1.9653, "step": 73818 }, { "epoch": 7.5869475847893115, "grad_norm": 0.10816165804862976, "learning_rate": 0.01, "loss": 1.9493, "step": 73821 }, { "epoch": 7.587255909558068, "grad_norm": 0.08663230389356613, "learning_rate": 0.01, "loss": 1.9778, "step": 73824 }, { "epoch": 7.587564234326824, "grad_norm": 0.10679950565099716, "learning_rate": 0.01, "loss": 1.9695, "step": 73827 }, { "epoch": 7.5878725590955804, "grad_norm": 0.08458245545625687, "learning_rate": 0.01, "loss": 1.9615, "step": 73830 }, { "epoch": 7.588180883864337, "grad_norm": 0.053197022527456284, "learning_rate": 0.01, "loss": 1.9573, "step": 73833 }, { "epoch": 7.588489208633094, "grad_norm": 0.033534977585077286, "learning_rate": 0.01, "loss": 1.9488, "step": 73836 }, { "epoch": 7.58879753340185, "grad_norm": 0.11700337380170822, "learning_rate": 0.01, "loss": 2.0056, "step": 73839 }, { "epoch": 7.589105858170607, "grad_norm": 0.26823991537094116, "learning_rate": 0.01, "loss": 1.9525, "step": 73842 }, { "epoch": 7.589414182939363, "grad_norm": 0.04270010069012642, "learning_rate": 0.01, "loss": 1.965, "step": 73845 }, { "epoch": 7.589722507708119, "grad_norm": 0.07512800395488739, "learning_rate": 0.01, "loss": 1.9684, "step": 73848 }, { "epoch": 7.5900308324768755, "grad_norm": 0.049686167389154434, "learning_rate": 0.01, "loss": 1.9627, "step": 73851 }, { "epoch": 7.590339157245632, "grad_norm": 0.036004502326250076, "learning_rate": 0.01, "loss": 1.9604, "step": 73854 }, { "epoch": 7.590647482014388, "grad_norm": 0.03266850858926773, "learning_rate": 0.01, "loss": 1.9697, "step": 73857 }, { "epoch": 7.590955806783145, "grad_norm": 0.041615623980760574, "learning_rate": 0.01, "loss": 1.9834, "step": 73860 }, { "epoch": 7.591264131551902, "grad_norm": 0.07116426527500153, "learning_rate": 0.01, "loss": 1.9529, "step": 73863 }, { "epoch": 7.591572456320658, "grad_norm": 0.10572681576013565, "learning_rate": 0.01, "loss": 1.9551, "step": 73866 }, { "epoch": 7.591880781089414, "grad_norm": 0.06464870274066925, "learning_rate": 0.01, "loss": 1.9701, "step": 73869 }, { "epoch": 7.5921891058581705, "grad_norm": 0.062195464968681335, "learning_rate": 0.01, "loss": 1.9855, "step": 73872 }, { "epoch": 7.592497430626927, "grad_norm": 0.05104728415608406, "learning_rate": 0.01, "loss": 1.9715, "step": 73875 }, { "epoch": 7.592805755395683, "grad_norm": 0.039663366973400116, "learning_rate": 0.01, "loss": 1.9551, "step": 73878 }, { "epoch": 7.59311408016444, "grad_norm": 0.030902177095413208, "learning_rate": 0.01, "loss": 1.9627, "step": 73881 }, { "epoch": 7.593422404933197, "grad_norm": 0.09647796303033829, "learning_rate": 0.01, "loss": 1.9762, "step": 73884 }, { "epoch": 7.593730729701953, "grad_norm": 0.08927876502275467, "learning_rate": 0.01, "loss": 1.9707, "step": 73887 }, { "epoch": 7.594039054470709, "grad_norm": 0.07941711694002151, "learning_rate": 0.01, "loss": 1.9797, "step": 73890 }, { "epoch": 7.594347379239466, "grad_norm": 0.067719966173172, "learning_rate": 0.01, "loss": 1.9934, "step": 73893 }, { "epoch": 7.594655704008222, "grad_norm": 0.07280530035495758, "learning_rate": 0.01, "loss": 1.9925, "step": 73896 }, { "epoch": 7.594964028776978, "grad_norm": 0.04862016811966896, "learning_rate": 0.01, "loss": 1.9568, "step": 73899 }, { "epoch": 7.5952723535457345, "grad_norm": 0.03787263110280037, "learning_rate": 0.01, "loss": 2.0097, "step": 73902 }, { "epoch": 7.595580678314491, "grad_norm": 0.05039701610803604, "learning_rate": 0.01, "loss": 1.9538, "step": 73905 }, { "epoch": 7.595889003083248, "grad_norm": 0.1198352724313736, "learning_rate": 0.01, "loss": 1.987, "step": 73908 }, { "epoch": 7.596197327852004, "grad_norm": 0.07588653266429901, "learning_rate": 0.01, "loss": 1.9725, "step": 73911 }, { "epoch": 7.596505652620761, "grad_norm": 0.13404078781604767, "learning_rate": 0.01, "loss": 1.9828, "step": 73914 }, { "epoch": 7.596813977389517, "grad_norm": 0.14311884343624115, "learning_rate": 0.01, "loss": 1.9504, "step": 73917 }, { "epoch": 7.597122302158273, "grad_norm": 0.07449166476726532, "learning_rate": 0.01, "loss": 1.9828, "step": 73920 }, { "epoch": 7.5974306269270295, "grad_norm": 0.06578455120325089, "learning_rate": 0.01, "loss": 1.995, "step": 73923 }, { "epoch": 7.597738951695787, "grad_norm": 0.04347372427582741, "learning_rate": 0.01, "loss": 1.9701, "step": 73926 }, { "epoch": 7.598047276464543, "grad_norm": 0.09917577356100082, "learning_rate": 0.01, "loss": 1.9728, "step": 73929 }, { "epoch": 7.598355601233299, "grad_norm": 0.11529657244682312, "learning_rate": 0.01, "loss": 1.9589, "step": 73932 }, { "epoch": 7.598663926002056, "grad_norm": 0.05575413629412651, "learning_rate": 0.01, "loss": 1.9756, "step": 73935 }, { "epoch": 7.598972250770812, "grad_norm": 0.039321184158325195, "learning_rate": 0.01, "loss": 1.9487, "step": 73938 }, { "epoch": 7.599280575539568, "grad_norm": 0.038657065480947495, "learning_rate": 0.01, "loss": 1.9777, "step": 73941 }, { "epoch": 7.599588900308325, "grad_norm": 0.11320240050554276, "learning_rate": 0.01, "loss": 1.9769, "step": 73944 }, { "epoch": 7.599897225077081, "grad_norm": 0.04929359257221222, "learning_rate": 0.01, "loss": 1.973, "step": 73947 }, { "epoch": 7.600205549845837, "grad_norm": 0.06260153651237488, "learning_rate": 0.01, "loss": 1.9624, "step": 73950 }, { "epoch": 7.600513874614594, "grad_norm": 0.06550951302051544, "learning_rate": 0.01, "loss": 1.9715, "step": 73953 }, { "epoch": 7.600822199383351, "grad_norm": 0.056575268507003784, "learning_rate": 0.01, "loss": 1.9728, "step": 73956 }, { "epoch": 7.601130524152107, "grad_norm": 0.06484346836805344, "learning_rate": 0.01, "loss": 1.9483, "step": 73959 }, { "epoch": 7.601438848920863, "grad_norm": 0.06656456738710403, "learning_rate": 0.01, "loss": 1.9665, "step": 73962 }, { "epoch": 7.60174717368962, "grad_norm": 0.08706523478031158, "learning_rate": 0.01, "loss": 2.0047, "step": 73965 }, { "epoch": 7.602055498458376, "grad_norm": 0.08907262235879898, "learning_rate": 0.01, "loss": 1.9569, "step": 73968 }, { "epoch": 7.602363823227132, "grad_norm": 0.08218540251255035, "learning_rate": 0.01, "loss": 1.9678, "step": 73971 }, { "epoch": 7.602672147995889, "grad_norm": 0.04430277273058891, "learning_rate": 0.01, "loss": 1.9688, "step": 73974 }, { "epoch": 7.602980472764646, "grad_norm": 0.04185359552502632, "learning_rate": 0.01, "loss": 1.976, "step": 73977 }, { "epoch": 7.603288797533402, "grad_norm": 0.14391767978668213, "learning_rate": 0.01, "loss": 1.9763, "step": 73980 }, { "epoch": 7.603597122302158, "grad_norm": 0.04302511736750603, "learning_rate": 0.01, "loss": 1.973, "step": 73983 }, { "epoch": 7.603905447070915, "grad_norm": 0.09394198656082153, "learning_rate": 0.01, "loss": 1.9633, "step": 73986 }, { "epoch": 7.604213771839671, "grad_norm": 0.04285489022731781, "learning_rate": 0.01, "loss": 1.9501, "step": 73989 }, { "epoch": 7.604522096608427, "grad_norm": 0.05482661724090576, "learning_rate": 0.01, "loss": 1.9801, "step": 73992 }, { "epoch": 7.604830421377184, "grad_norm": 0.08892566710710526, "learning_rate": 0.01, "loss": 1.9877, "step": 73995 }, { "epoch": 7.605138746145941, "grad_norm": 0.04641614109277725, "learning_rate": 0.01, "loss": 1.976, "step": 73998 }, { "epoch": 7.605447070914697, "grad_norm": 0.06017431989312172, "learning_rate": 0.01, "loss": 1.9793, "step": 74001 }, { "epoch": 7.605755395683453, "grad_norm": 0.11227381974458694, "learning_rate": 0.01, "loss": 1.9777, "step": 74004 }, { "epoch": 7.60606372045221, "grad_norm": 0.05176539719104767, "learning_rate": 0.01, "loss": 1.975, "step": 74007 }, { "epoch": 7.606372045220966, "grad_norm": 0.08115067332983017, "learning_rate": 0.01, "loss": 1.9633, "step": 74010 }, { "epoch": 7.606680369989722, "grad_norm": 0.08569370210170746, "learning_rate": 0.01, "loss": 1.9487, "step": 74013 }, { "epoch": 7.606988694758479, "grad_norm": 0.037027645856142044, "learning_rate": 0.01, "loss": 1.9993, "step": 74016 }, { "epoch": 7.607297019527236, "grad_norm": 0.049683477729558945, "learning_rate": 0.01, "loss": 1.9777, "step": 74019 }, { "epoch": 7.607605344295992, "grad_norm": 0.040880728513002396, "learning_rate": 0.01, "loss": 1.9679, "step": 74022 }, { "epoch": 7.607913669064748, "grad_norm": 0.07329095155000687, "learning_rate": 0.01, "loss": 1.9664, "step": 74025 }, { "epoch": 7.608221993833505, "grad_norm": 0.12572570145130157, "learning_rate": 0.01, "loss": 1.9752, "step": 74028 }, { "epoch": 7.608530318602261, "grad_norm": 0.045191969722509384, "learning_rate": 0.01, "loss": 1.9912, "step": 74031 }, { "epoch": 7.608838643371017, "grad_norm": 0.05481408163905144, "learning_rate": 0.01, "loss": 1.9755, "step": 74034 }, { "epoch": 7.609146968139774, "grad_norm": 0.07012124359607697, "learning_rate": 0.01, "loss": 1.9626, "step": 74037 }, { "epoch": 7.60945529290853, "grad_norm": 0.08710244297981262, "learning_rate": 0.01, "loss": 1.9847, "step": 74040 }, { "epoch": 7.609763617677286, "grad_norm": 0.06888248026371002, "learning_rate": 0.01, "loss": 1.952, "step": 74043 }, { "epoch": 7.6100719424460435, "grad_norm": 0.09116239845752716, "learning_rate": 0.01, "loss": 1.959, "step": 74046 }, { "epoch": 7.6103802672148, "grad_norm": 0.14548470079898834, "learning_rate": 0.01, "loss": 1.954, "step": 74049 }, { "epoch": 7.610688591983556, "grad_norm": 0.030288826674222946, "learning_rate": 0.01, "loss": 1.9448, "step": 74052 }, { "epoch": 7.610996916752312, "grad_norm": 0.03586267679929733, "learning_rate": 0.01, "loss": 1.9784, "step": 74055 }, { "epoch": 7.611305241521069, "grad_norm": 0.03944223001599312, "learning_rate": 0.01, "loss": 1.9649, "step": 74058 }, { "epoch": 7.611613566289825, "grad_norm": 0.06248027831315994, "learning_rate": 0.01, "loss": 1.9673, "step": 74061 }, { "epoch": 7.611921891058582, "grad_norm": 0.06302045285701752, "learning_rate": 0.01, "loss": 1.9816, "step": 74064 }, { "epoch": 7.6122302158273385, "grad_norm": 0.06776783615350723, "learning_rate": 0.01, "loss": 1.9688, "step": 74067 }, { "epoch": 7.612538540596095, "grad_norm": 0.11014634370803833, "learning_rate": 0.01, "loss": 1.958, "step": 74070 }, { "epoch": 7.612846865364851, "grad_norm": 0.23355001211166382, "learning_rate": 0.01, "loss": 1.9869, "step": 74073 }, { "epoch": 7.613155190133607, "grad_norm": 0.17155928909778595, "learning_rate": 0.01, "loss": 1.9683, "step": 74076 }, { "epoch": 7.613463514902364, "grad_norm": 0.08190061151981354, "learning_rate": 0.01, "loss": 1.9619, "step": 74079 }, { "epoch": 7.61377183967112, "grad_norm": 0.047722749412059784, "learning_rate": 0.01, "loss": 1.9704, "step": 74082 }, { "epoch": 7.614080164439876, "grad_norm": 0.03172357380390167, "learning_rate": 0.01, "loss": 1.9498, "step": 74085 }, { "epoch": 7.614388489208633, "grad_norm": 0.044041916728019714, "learning_rate": 0.01, "loss": 1.9603, "step": 74088 }, { "epoch": 7.61469681397739, "grad_norm": 0.07261842489242554, "learning_rate": 0.01, "loss": 1.9545, "step": 74091 }, { "epoch": 7.615005138746146, "grad_norm": 0.04767949879169464, "learning_rate": 0.01, "loss": 1.9751, "step": 74094 }, { "epoch": 7.6153134635149025, "grad_norm": 0.059191979467868805, "learning_rate": 0.01, "loss": 1.9879, "step": 74097 }, { "epoch": 7.615621788283659, "grad_norm": 0.044217757880687714, "learning_rate": 0.01, "loss": 1.9827, "step": 74100 }, { "epoch": 7.615930113052415, "grad_norm": 0.048439063131809235, "learning_rate": 0.01, "loss": 1.9764, "step": 74103 }, { "epoch": 7.616238437821171, "grad_norm": 0.0705018863081932, "learning_rate": 0.01, "loss": 1.9681, "step": 74106 }, { "epoch": 7.616546762589928, "grad_norm": 0.07772061973810196, "learning_rate": 0.01, "loss": 1.9755, "step": 74109 }, { "epoch": 7.616855087358685, "grad_norm": 0.1228966936469078, "learning_rate": 0.01, "loss": 1.9876, "step": 74112 }, { "epoch": 7.617163412127441, "grad_norm": 0.056798454374074936, "learning_rate": 0.01, "loss": 1.9807, "step": 74115 }, { "epoch": 7.6174717368961975, "grad_norm": 0.07300012558698654, "learning_rate": 0.01, "loss": 1.9922, "step": 74118 }, { "epoch": 7.617780061664954, "grad_norm": 0.034426249563694, "learning_rate": 0.01, "loss": 2.0117, "step": 74121 }, { "epoch": 7.61808838643371, "grad_norm": 0.03159654513001442, "learning_rate": 0.01, "loss": 1.9657, "step": 74124 }, { "epoch": 7.618396711202466, "grad_norm": 0.04546064883470535, "learning_rate": 0.01, "loss": 1.9969, "step": 74127 }, { "epoch": 7.618705035971223, "grad_norm": 0.0788118839263916, "learning_rate": 0.01, "loss": 2.0015, "step": 74130 }, { "epoch": 7.619013360739979, "grad_norm": 0.06187133863568306, "learning_rate": 0.01, "loss": 1.9571, "step": 74133 }, { "epoch": 7.619321685508736, "grad_norm": 0.11151747405529022, "learning_rate": 0.01, "loss": 1.9961, "step": 74136 }, { "epoch": 7.6196300102774925, "grad_norm": 0.08039221912622452, "learning_rate": 0.01, "loss": 1.9772, "step": 74139 }, { "epoch": 7.619938335046249, "grad_norm": 0.08473746478557587, "learning_rate": 0.01, "loss": 1.971, "step": 74142 }, { "epoch": 7.620246659815005, "grad_norm": 0.05223767086863518, "learning_rate": 0.01, "loss": 1.9839, "step": 74145 }, { "epoch": 7.6205549845837615, "grad_norm": 0.038708411157131195, "learning_rate": 0.01, "loss": 1.9568, "step": 74148 }, { "epoch": 7.620863309352518, "grad_norm": 0.04193318262696266, "learning_rate": 0.01, "loss": 1.9786, "step": 74151 }, { "epoch": 7.621171634121274, "grad_norm": 0.036744605749845505, "learning_rate": 0.01, "loss": 1.936, "step": 74154 }, { "epoch": 7.621479958890031, "grad_norm": 0.04393354803323746, "learning_rate": 0.01, "loss": 1.975, "step": 74157 }, { "epoch": 7.621788283658788, "grad_norm": 0.04185178875923157, "learning_rate": 0.01, "loss": 1.9714, "step": 74160 }, { "epoch": 7.622096608427544, "grad_norm": 0.09790702164173126, "learning_rate": 0.01, "loss": 1.9664, "step": 74163 }, { "epoch": 7.6224049331963, "grad_norm": 0.05819176137447357, "learning_rate": 0.01, "loss": 1.9448, "step": 74166 }, { "epoch": 7.6227132579650565, "grad_norm": 0.08030474185943604, "learning_rate": 0.01, "loss": 1.9789, "step": 74169 }, { "epoch": 7.623021582733813, "grad_norm": 0.1077614575624466, "learning_rate": 0.01, "loss": 1.9427, "step": 74172 }, { "epoch": 7.623329907502569, "grad_norm": 0.06347894668579102, "learning_rate": 0.01, "loss": 1.9878, "step": 74175 }, { "epoch": 7.623638232271325, "grad_norm": 0.08716027438640594, "learning_rate": 0.01, "loss": 1.9708, "step": 74178 }, { "epoch": 7.623946557040082, "grad_norm": 0.04236210137605667, "learning_rate": 0.01, "loss": 1.9554, "step": 74181 }, { "epoch": 7.624254881808839, "grad_norm": 0.05839502438902855, "learning_rate": 0.01, "loss": 1.9431, "step": 74184 }, { "epoch": 7.624563206577595, "grad_norm": 0.09577804803848267, "learning_rate": 0.01, "loss": 1.9956, "step": 74187 }, { "epoch": 7.6248715313463515, "grad_norm": 0.1194511279463768, "learning_rate": 0.01, "loss": 1.9801, "step": 74190 }, { "epoch": 7.625179856115108, "grad_norm": 0.09263192117214203, "learning_rate": 0.01, "loss": 1.9814, "step": 74193 }, { "epoch": 7.625488180883864, "grad_norm": 0.042605165392160416, "learning_rate": 0.01, "loss": 1.9801, "step": 74196 }, { "epoch": 7.6257965056526205, "grad_norm": 0.05061962082982063, "learning_rate": 0.01, "loss": 1.9336, "step": 74199 }, { "epoch": 7.626104830421378, "grad_norm": 0.03659835457801819, "learning_rate": 0.01, "loss": 1.9888, "step": 74202 }, { "epoch": 7.626413155190134, "grad_norm": 0.03055495396256447, "learning_rate": 0.01, "loss": 1.9796, "step": 74205 }, { "epoch": 7.62672147995889, "grad_norm": 0.10416147112846375, "learning_rate": 0.01, "loss": 1.9537, "step": 74208 }, { "epoch": 7.627029804727647, "grad_norm": 0.07998328655958176, "learning_rate": 0.01, "loss": 1.9684, "step": 74211 }, { "epoch": 7.627338129496403, "grad_norm": 0.0935954824090004, "learning_rate": 0.01, "loss": 1.995, "step": 74214 }, { "epoch": 7.627646454265159, "grad_norm": 0.07641303539276123, "learning_rate": 0.01, "loss": 1.9696, "step": 74217 }, { "epoch": 7.6279547790339155, "grad_norm": 0.0807437002658844, "learning_rate": 0.01, "loss": 1.9509, "step": 74220 }, { "epoch": 7.628263103802672, "grad_norm": 0.1031765341758728, "learning_rate": 0.01, "loss": 1.9907, "step": 74223 }, { "epoch": 7.628571428571428, "grad_norm": 0.06459169834852219, "learning_rate": 0.01, "loss": 1.971, "step": 74226 }, { "epoch": 7.628879753340185, "grad_norm": 0.04952657222747803, "learning_rate": 0.01, "loss": 1.9683, "step": 74229 }, { "epoch": 7.629188078108942, "grad_norm": 0.03405149653553963, "learning_rate": 0.01, "loss": 1.9927, "step": 74232 }, { "epoch": 7.629496402877698, "grad_norm": 0.04447784274816513, "learning_rate": 0.01, "loss": 1.9852, "step": 74235 }, { "epoch": 7.629804727646454, "grad_norm": 0.07677244395017624, "learning_rate": 0.01, "loss": 1.9496, "step": 74238 }, { "epoch": 7.6301130524152105, "grad_norm": 0.1031339168548584, "learning_rate": 0.01, "loss": 1.9639, "step": 74241 }, { "epoch": 7.630421377183967, "grad_norm": 0.1357228308916092, "learning_rate": 0.01, "loss": 1.9711, "step": 74244 }, { "epoch": 7.630729701952723, "grad_norm": 0.14237457513809204, "learning_rate": 0.01, "loss": 1.9785, "step": 74247 }, { "epoch": 7.63103802672148, "grad_norm": 0.11732343584299088, "learning_rate": 0.01, "loss": 1.9784, "step": 74250 }, { "epoch": 7.631346351490237, "grad_norm": 0.056218016892671585, "learning_rate": 0.01, "loss": 1.9774, "step": 74253 }, { "epoch": 7.631654676258993, "grad_norm": 0.0473707839846611, "learning_rate": 0.01, "loss": 1.9756, "step": 74256 }, { "epoch": 7.631963001027749, "grad_norm": 0.06250836700201035, "learning_rate": 0.01, "loss": 1.9653, "step": 74259 }, { "epoch": 7.632271325796506, "grad_norm": 0.039323173463344574, "learning_rate": 0.01, "loss": 1.9795, "step": 74262 }, { "epoch": 7.632579650565262, "grad_norm": 0.1005803793668747, "learning_rate": 0.01, "loss": 1.9777, "step": 74265 }, { "epoch": 7.632887975334018, "grad_norm": 0.08147422969341278, "learning_rate": 0.01, "loss": 1.9944, "step": 74268 }, { "epoch": 7.6331963001027745, "grad_norm": 0.07019870728254318, "learning_rate": 0.01, "loss": 1.9594, "step": 74271 }, { "epoch": 7.633504624871532, "grad_norm": 0.08461744338274002, "learning_rate": 0.01, "loss": 1.9748, "step": 74274 }, { "epoch": 7.633812949640288, "grad_norm": 0.129719540476799, "learning_rate": 0.01, "loss": 1.9808, "step": 74277 }, { "epoch": 7.634121274409044, "grad_norm": 0.17371834814548492, "learning_rate": 0.01, "loss": 1.9574, "step": 74280 }, { "epoch": 7.634429599177801, "grad_norm": 0.0878586545586586, "learning_rate": 0.01, "loss": 1.9658, "step": 74283 }, { "epoch": 7.634737923946557, "grad_norm": 0.04784208908677101, "learning_rate": 0.01, "loss": 1.9656, "step": 74286 }, { "epoch": 7.635046248715313, "grad_norm": 0.05528968945145607, "learning_rate": 0.01, "loss": 1.9888, "step": 74289 }, { "epoch": 7.6353545734840695, "grad_norm": 0.04713103175163269, "learning_rate": 0.01, "loss": 1.9539, "step": 74292 }, { "epoch": 7.635662898252827, "grad_norm": 0.13385649025440216, "learning_rate": 0.01, "loss": 1.9525, "step": 74295 }, { "epoch": 7.635971223021583, "grad_norm": 0.09349873661994934, "learning_rate": 0.01, "loss": 1.967, "step": 74298 }, { "epoch": 7.636279547790339, "grad_norm": 0.07165966928005219, "learning_rate": 0.01, "loss": 1.9984, "step": 74301 }, { "epoch": 7.636587872559096, "grad_norm": 0.04691112041473389, "learning_rate": 0.01, "loss": 1.969, "step": 74304 }, { "epoch": 7.636896197327852, "grad_norm": 0.0479462556540966, "learning_rate": 0.01, "loss": 1.9738, "step": 74307 }, { "epoch": 7.637204522096608, "grad_norm": 0.07629293203353882, "learning_rate": 0.01, "loss": 1.9814, "step": 74310 }, { "epoch": 7.637512846865365, "grad_norm": 0.09805203974246979, "learning_rate": 0.01, "loss": 1.9784, "step": 74313 }, { "epoch": 7.637821171634121, "grad_norm": 0.07098491489887238, "learning_rate": 0.01, "loss": 1.981, "step": 74316 }, { "epoch": 7.638129496402878, "grad_norm": 0.06553947925567627, "learning_rate": 0.01, "loss": 1.9971, "step": 74319 }, { "epoch": 7.638437821171634, "grad_norm": 0.04797985777258873, "learning_rate": 0.01, "loss": 1.9834, "step": 74322 }, { "epoch": 7.638746145940391, "grad_norm": 0.05064231529831886, "learning_rate": 0.01, "loss": 1.9895, "step": 74325 }, { "epoch": 7.639054470709147, "grad_norm": 0.07695363461971283, "learning_rate": 0.01, "loss": 1.9833, "step": 74328 }, { "epoch": 7.639362795477903, "grad_norm": 0.09473993629217148, "learning_rate": 0.01, "loss": 1.9746, "step": 74331 }, { "epoch": 7.63967112024666, "grad_norm": 0.05327700823545456, "learning_rate": 0.01, "loss": 1.9715, "step": 74334 }, { "epoch": 7.639979445015416, "grad_norm": 0.07472631335258484, "learning_rate": 0.01, "loss": 1.9599, "step": 74337 }, { "epoch": 7.640287769784173, "grad_norm": 0.04138251394033432, "learning_rate": 0.01, "loss": 1.9671, "step": 74340 }, { "epoch": 7.640596094552929, "grad_norm": 0.08596426993608475, "learning_rate": 0.01, "loss": 1.9435, "step": 74343 }, { "epoch": 7.640904419321686, "grad_norm": 0.08237265050411224, "learning_rate": 0.01, "loss": 1.9743, "step": 74346 }, { "epoch": 7.641212744090442, "grad_norm": 0.09412717074155807, "learning_rate": 0.01, "loss": 1.9823, "step": 74349 }, { "epoch": 7.641521068859198, "grad_norm": 0.07472441345453262, "learning_rate": 0.01, "loss": 1.9758, "step": 74352 }, { "epoch": 7.641829393627955, "grad_norm": 0.06872202455997467, "learning_rate": 0.01, "loss": 1.9609, "step": 74355 }, { "epoch": 7.642137718396711, "grad_norm": 0.05549430847167969, "learning_rate": 0.01, "loss": 1.9687, "step": 74358 }, { "epoch": 7.642446043165467, "grad_norm": 0.08854615688323975, "learning_rate": 0.01, "loss": 1.9703, "step": 74361 }, { "epoch": 7.642754367934224, "grad_norm": 0.12863200902938843, "learning_rate": 0.01, "loss": 1.9672, "step": 74364 }, { "epoch": 7.643062692702981, "grad_norm": 0.08491957187652588, "learning_rate": 0.01, "loss": 1.9783, "step": 74367 }, { "epoch": 7.643371017471737, "grad_norm": 0.04691247269511223, "learning_rate": 0.01, "loss": 1.9572, "step": 74370 }, { "epoch": 7.643679342240493, "grad_norm": 0.05789770558476448, "learning_rate": 0.01, "loss": 1.977, "step": 74373 }, { "epoch": 7.64398766700925, "grad_norm": 0.051950305700302124, "learning_rate": 0.01, "loss": 1.9717, "step": 74376 }, { "epoch": 7.644295991778006, "grad_norm": 0.12850762903690338, "learning_rate": 0.01, "loss": 1.9567, "step": 74379 }, { "epoch": 7.644604316546762, "grad_norm": 0.07465063780546188, "learning_rate": 0.01, "loss": 1.9807, "step": 74382 }, { "epoch": 7.6449126413155195, "grad_norm": 0.06719266623258591, "learning_rate": 0.01, "loss": 1.9874, "step": 74385 }, { "epoch": 7.645220966084276, "grad_norm": 0.06720881164073944, "learning_rate": 0.01, "loss": 1.9758, "step": 74388 }, { "epoch": 7.645529290853032, "grad_norm": 0.08389756828546524, "learning_rate": 0.01, "loss": 1.9535, "step": 74391 }, { "epoch": 7.645837615621788, "grad_norm": 0.14512526988983154, "learning_rate": 0.01, "loss": 1.9638, "step": 74394 }, { "epoch": 7.646145940390545, "grad_norm": 0.07856389135122299, "learning_rate": 0.01, "loss": 1.9617, "step": 74397 }, { "epoch": 7.646454265159301, "grad_norm": 0.06200588494539261, "learning_rate": 0.01, "loss": 1.9646, "step": 74400 }, { "epoch": 7.646762589928057, "grad_norm": 0.06547322124242783, "learning_rate": 0.01, "loss": 1.9793, "step": 74403 }, { "epoch": 7.647070914696814, "grad_norm": 0.04517245292663574, "learning_rate": 0.01, "loss": 1.9483, "step": 74406 }, { "epoch": 7.64737923946557, "grad_norm": 0.039127618074417114, "learning_rate": 0.01, "loss": 1.9918, "step": 74409 }, { "epoch": 7.647687564234327, "grad_norm": 0.12178776413202286, "learning_rate": 0.01, "loss": 1.9752, "step": 74412 }, { "epoch": 7.6479958890030835, "grad_norm": 0.03933233022689819, "learning_rate": 0.01, "loss": 1.9766, "step": 74415 }, { "epoch": 7.64830421377184, "grad_norm": 0.10145100206136703, "learning_rate": 0.01, "loss": 1.9588, "step": 74418 }, { "epoch": 7.648612538540596, "grad_norm": 0.0637749508023262, "learning_rate": 0.01, "loss": 1.9735, "step": 74421 }, { "epoch": 7.648920863309352, "grad_norm": 0.051577746868133545, "learning_rate": 0.01, "loss": 1.9965, "step": 74424 }, { "epoch": 7.649229188078109, "grad_norm": 0.08104600012302399, "learning_rate": 0.01, "loss": 1.9544, "step": 74427 }, { "epoch": 7.649537512846865, "grad_norm": 0.09299812465906143, "learning_rate": 0.01, "loss": 1.9786, "step": 74430 }, { "epoch": 7.649845837615622, "grad_norm": 0.06638593971729279, "learning_rate": 0.01, "loss": 1.9691, "step": 74433 }, { "epoch": 7.6501541623843785, "grad_norm": 0.0563388355076313, "learning_rate": 0.01, "loss": 1.9699, "step": 74436 }, { "epoch": 7.650462487153135, "grad_norm": 0.06957016885280609, "learning_rate": 0.01, "loss": 1.988, "step": 74439 }, { "epoch": 7.650770811921891, "grad_norm": 0.034762538969516754, "learning_rate": 0.01, "loss": 1.9441, "step": 74442 }, { "epoch": 7.651079136690647, "grad_norm": 0.043936941772699356, "learning_rate": 0.01, "loss": 1.9848, "step": 74445 }, { "epoch": 7.651387461459404, "grad_norm": 0.03923318162560463, "learning_rate": 0.01, "loss": 1.9481, "step": 74448 }, { "epoch": 7.65169578622816, "grad_norm": 0.07552552223205566, "learning_rate": 0.01, "loss": 1.973, "step": 74451 }, { "epoch": 7.652004110996916, "grad_norm": 0.05596005916595459, "learning_rate": 0.01, "loss": 1.9533, "step": 74454 }, { "epoch": 7.6523124357656735, "grad_norm": 0.056482747197151184, "learning_rate": 0.01, "loss": 1.9421, "step": 74457 }, { "epoch": 7.65262076053443, "grad_norm": 0.06143519654870033, "learning_rate": 0.01, "loss": 1.9591, "step": 74460 }, { "epoch": 7.652929085303186, "grad_norm": 0.07248186320066452, "learning_rate": 0.01, "loss": 1.9425, "step": 74463 }, { "epoch": 7.6532374100719425, "grad_norm": 0.14804747700691223, "learning_rate": 0.01, "loss": 1.963, "step": 74466 }, { "epoch": 7.653545734840699, "grad_norm": 0.03471624106168747, "learning_rate": 0.01, "loss": 1.9623, "step": 74469 }, { "epoch": 7.653854059609455, "grad_norm": 0.09821716696023941, "learning_rate": 0.01, "loss": 1.9446, "step": 74472 }, { "epoch": 7.654162384378211, "grad_norm": 0.09262007474899292, "learning_rate": 0.01, "loss": 1.9479, "step": 74475 }, { "epoch": 7.654470709146969, "grad_norm": 0.04531139135360718, "learning_rate": 0.01, "loss": 1.9985, "step": 74478 }, { "epoch": 7.654779033915725, "grad_norm": 0.04309883713722229, "learning_rate": 0.01, "loss": 1.9761, "step": 74481 }, { "epoch": 7.655087358684481, "grad_norm": 0.05365990102291107, "learning_rate": 0.01, "loss": 1.9731, "step": 74484 }, { "epoch": 7.6553956834532375, "grad_norm": 0.03971986845135689, "learning_rate": 0.01, "loss": 1.9777, "step": 74487 }, { "epoch": 7.655704008221994, "grad_norm": 0.05418335273861885, "learning_rate": 0.01, "loss": 1.9423, "step": 74490 }, { "epoch": 7.65601233299075, "grad_norm": 0.043865058571100235, "learning_rate": 0.01, "loss": 1.9575, "step": 74493 }, { "epoch": 7.656320657759506, "grad_norm": 0.17038394510746002, "learning_rate": 0.01, "loss": 1.9921, "step": 74496 }, { "epoch": 7.656628982528263, "grad_norm": 0.07129910588264465, "learning_rate": 0.01, "loss": 1.9575, "step": 74499 }, { "epoch": 7.656937307297019, "grad_norm": 0.0684434175491333, "learning_rate": 0.01, "loss": 1.9684, "step": 74502 }, { "epoch": 7.657245632065776, "grad_norm": 0.04037448391318321, "learning_rate": 0.01, "loss": 1.9628, "step": 74505 }, { "epoch": 7.6575539568345325, "grad_norm": 0.12347517162561417, "learning_rate": 0.01, "loss": 1.9503, "step": 74508 }, { "epoch": 7.657862281603289, "grad_norm": 0.061841484159231186, "learning_rate": 0.01, "loss": 1.9633, "step": 74511 }, { "epoch": 7.658170606372045, "grad_norm": 0.10890454053878784, "learning_rate": 0.01, "loss": 1.9865, "step": 74514 }, { "epoch": 7.6584789311408015, "grad_norm": 0.06274345517158508, "learning_rate": 0.01, "loss": 1.9675, "step": 74517 }, { "epoch": 7.658787255909558, "grad_norm": 0.07236039638519287, "learning_rate": 0.01, "loss": 1.983, "step": 74520 }, { "epoch": 7.659095580678315, "grad_norm": 0.09525978565216064, "learning_rate": 0.01, "loss": 1.9661, "step": 74523 }, { "epoch": 7.659403905447071, "grad_norm": 0.056358352303504944, "learning_rate": 0.01, "loss": 1.9565, "step": 74526 }, { "epoch": 7.659712230215828, "grad_norm": 0.07835443317890167, "learning_rate": 0.01, "loss": 1.9708, "step": 74529 }, { "epoch": 7.660020554984584, "grad_norm": 0.05551443248987198, "learning_rate": 0.01, "loss": 1.9653, "step": 74532 }, { "epoch": 7.66032887975334, "grad_norm": 0.05699507147073746, "learning_rate": 0.01, "loss": 1.9577, "step": 74535 }, { "epoch": 7.6606372045220965, "grad_norm": 0.05638470873236656, "learning_rate": 0.01, "loss": 1.9582, "step": 74538 }, { "epoch": 7.660945529290853, "grad_norm": 0.09323201328516006, "learning_rate": 0.01, "loss": 1.9553, "step": 74541 }, { "epoch": 7.661253854059609, "grad_norm": 0.08503230661153793, "learning_rate": 0.01, "loss": 1.985, "step": 74544 }, { "epoch": 7.661562178828365, "grad_norm": 0.07198848575353622, "learning_rate": 0.01, "loss": 1.9597, "step": 74547 }, { "epoch": 7.661870503597123, "grad_norm": 0.07523935288190842, "learning_rate": 0.01, "loss": 1.9725, "step": 74550 }, { "epoch": 7.662178828365879, "grad_norm": 0.07453587651252747, "learning_rate": 0.01, "loss": 1.9475, "step": 74553 }, { "epoch": 7.662487153134635, "grad_norm": 0.060776762664318085, "learning_rate": 0.01, "loss": 1.9732, "step": 74556 }, { "epoch": 7.6627954779033916, "grad_norm": 0.06872498989105225, "learning_rate": 0.01, "loss": 1.9769, "step": 74559 }, { "epoch": 7.663103802672148, "grad_norm": 0.06788360327482224, "learning_rate": 0.01, "loss": 1.9535, "step": 74562 }, { "epoch": 7.663412127440904, "grad_norm": 0.04364737123250961, "learning_rate": 0.01, "loss": 1.9624, "step": 74565 }, { "epoch": 7.6637204522096605, "grad_norm": 0.04367050155997276, "learning_rate": 0.01, "loss": 1.9657, "step": 74568 }, { "epoch": 7.664028776978418, "grad_norm": 0.053800318390131, "learning_rate": 0.01, "loss": 1.9657, "step": 74571 }, { "epoch": 7.664337101747174, "grad_norm": 0.049637794494628906, "learning_rate": 0.01, "loss": 1.9697, "step": 74574 }, { "epoch": 7.66464542651593, "grad_norm": 0.04785891994833946, "learning_rate": 0.01, "loss": 1.9688, "step": 74577 }, { "epoch": 7.664953751284687, "grad_norm": 0.07477859407663345, "learning_rate": 0.01, "loss": 1.978, "step": 74580 }, { "epoch": 7.665262076053443, "grad_norm": 0.06930319964885712, "learning_rate": 0.01, "loss": 1.9614, "step": 74583 }, { "epoch": 7.665570400822199, "grad_norm": 0.09280212223529816, "learning_rate": 0.01, "loss": 1.9598, "step": 74586 }, { "epoch": 7.6658787255909555, "grad_norm": 0.07756971567869186, "learning_rate": 0.01, "loss": 1.9962, "step": 74589 }, { "epoch": 7.666187050359712, "grad_norm": 0.04834478721022606, "learning_rate": 0.01, "loss": 1.9805, "step": 74592 }, { "epoch": 7.666495375128469, "grad_norm": 0.06786291301250458, "learning_rate": 0.01, "loss": 1.963, "step": 74595 }, { "epoch": 7.666803699897225, "grad_norm": 0.05437411740422249, "learning_rate": 0.01, "loss": 1.9774, "step": 74598 }, { "epoch": 7.667112024665982, "grad_norm": 0.06262870132923126, "learning_rate": 0.01, "loss": 1.9872, "step": 74601 }, { "epoch": 7.667420349434738, "grad_norm": 0.03245949372649193, "learning_rate": 0.01, "loss": 1.9538, "step": 74604 }, { "epoch": 7.667728674203494, "grad_norm": 0.029952844604849815, "learning_rate": 0.01, "loss": 1.9424, "step": 74607 }, { "epoch": 7.6680369989722506, "grad_norm": 0.05287361145019531, "learning_rate": 0.01, "loss": 1.9879, "step": 74610 }, { "epoch": 7.668345323741007, "grad_norm": 0.049092501401901245, "learning_rate": 0.01, "loss": 1.9708, "step": 74613 }, { "epoch": 7.668653648509764, "grad_norm": 0.1970340609550476, "learning_rate": 0.01, "loss": 1.9679, "step": 74616 }, { "epoch": 7.66896197327852, "grad_norm": 0.06653330475091934, "learning_rate": 0.01, "loss": 1.9524, "step": 74619 }, { "epoch": 7.669270298047277, "grad_norm": 0.08722051233053207, "learning_rate": 0.01, "loss": 1.9764, "step": 74622 }, { "epoch": 7.669578622816033, "grad_norm": 0.06388158351182938, "learning_rate": 0.01, "loss": 1.9754, "step": 74625 }, { "epoch": 7.669886947584789, "grad_norm": 0.04464726522564888, "learning_rate": 0.01, "loss": 1.9673, "step": 74628 }, { "epoch": 7.670195272353546, "grad_norm": 0.03846354782581329, "learning_rate": 0.01, "loss": 1.9823, "step": 74631 }, { "epoch": 7.670503597122302, "grad_norm": 0.038826361298561096, "learning_rate": 0.01, "loss": 1.9506, "step": 74634 }, { "epoch": 7.670811921891058, "grad_norm": 0.052090950310230255, "learning_rate": 0.01, "loss": 1.9671, "step": 74637 }, { "epoch": 7.6711202466598145, "grad_norm": 0.07527358084917068, "learning_rate": 0.01, "loss": 1.9672, "step": 74640 }, { "epoch": 7.671428571428572, "grad_norm": 0.05919142812490463, "learning_rate": 0.01, "loss": 1.9507, "step": 74643 }, { "epoch": 7.671736896197328, "grad_norm": 0.03949635475873947, "learning_rate": 0.01, "loss": 1.9658, "step": 74646 }, { "epoch": 7.672045220966084, "grad_norm": 0.12237614393234253, "learning_rate": 0.01, "loss": 1.9507, "step": 74649 }, { "epoch": 7.672353545734841, "grad_norm": 0.10136833041906357, "learning_rate": 0.01, "loss": 1.9501, "step": 74652 }, { "epoch": 7.672661870503597, "grad_norm": 0.11704662442207336, "learning_rate": 0.01, "loss": 1.9858, "step": 74655 }, { "epoch": 7.672970195272353, "grad_norm": 0.06047737970948219, "learning_rate": 0.01, "loss": 1.9904, "step": 74658 }, { "epoch": 7.67327852004111, "grad_norm": 0.05044262483716011, "learning_rate": 0.01, "loss": 1.9461, "step": 74661 }, { "epoch": 7.673586844809867, "grad_norm": 0.042892586439847946, "learning_rate": 0.01, "loss": 1.9935, "step": 74664 }, { "epoch": 7.673895169578623, "grad_norm": 0.04377809539437294, "learning_rate": 0.01, "loss": 1.9706, "step": 74667 }, { "epoch": 7.674203494347379, "grad_norm": 0.06385743618011475, "learning_rate": 0.01, "loss": 1.9906, "step": 74670 }, { "epoch": 7.674511819116136, "grad_norm": 0.04909441992640495, "learning_rate": 0.01, "loss": 1.9743, "step": 74673 }, { "epoch": 7.674820143884892, "grad_norm": 0.06172732263803482, "learning_rate": 0.01, "loss": 1.9682, "step": 74676 }, { "epoch": 7.675128468653648, "grad_norm": 0.06348180025815964, "learning_rate": 0.01, "loss": 1.9809, "step": 74679 }, { "epoch": 7.675436793422405, "grad_norm": 0.12463052570819855, "learning_rate": 0.01, "loss": 1.9469, "step": 74682 }, { "epoch": 7.675745118191161, "grad_norm": 0.04291809722781181, "learning_rate": 0.01, "loss": 1.9779, "step": 74685 }, { "epoch": 7.676053442959918, "grad_norm": 0.0535232275724411, "learning_rate": 0.01, "loss": 1.9649, "step": 74688 }, { "epoch": 7.676361767728674, "grad_norm": 0.0402233712375164, "learning_rate": 0.01, "loss": 1.9785, "step": 74691 }, { "epoch": 7.676670092497431, "grad_norm": 0.047401294112205505, "learning_rate": 0.01, "loss": 1.9451, "step": 74694 }, { "epoch": 7.676978417266187, "grad_norm": 0.04413991421461105, "learning_rate": 0.01, "loss": 1.9779, "step": 74697 }, { "epoch": 7.677286742034943, "grad_norm": 0.037016935646533966, "learning_rate": 0.01, "loss": 1.9814, "step": 74700 }, { "epoch": 7.6775950668037, "grad_norm": 0.041594166308641434, "learning_rate": 0.01, "loss": 1.9648, "step": 74703 }, { "epoch": 7.677903391572456, "grad_norm": 0.12671832740306854, "learning_rate": 0.01, "loss": 1.9869, "step": 74706 }, { "epoch": 7.678211716341213, "grad_norm": 0.04342658072710037, "learning_rate": 0.01, "loss": 1.9842, "step": 74709 }, { "epoch": 7.678520041109969, "grad_norm": 0.05605921521782875, "learning_rate": 0.01, "loss": 1.9765, "step": 74712 }, { "epoch": 7.678828365878726, "grad_norm": 0.1270131766796112, "learning_rate": 0.01, "loss": 1.9902, "step": 74715 }, { "epoch": 7.679136690647482, "grad_norm": 0.07328061014413834, "learning_rate": 0.01, "loss": 1.9882, "step": 74718 }, { "epoch": 7.679445015416238, "grad_norm": 0.09672074764966965, "learning_rate": 0.01, "loss": 1.9496, "step": 74721 }, { "epoch": 7.679753340184995, "grad_norm": 0.05370582267642021, "learning_rate": 0.01, "loss": 1.9469, "step": 74724 }, { "epoch": 7.680061664953751, "grad_norm": 0.059833183884620667, "learning_rate": 0.01, "loss": 1.9689, "step": 74727 }, { "epoch": 7.680369989722507, "grad_norm": 0.044636938720941544, "learning_rate": 0.01, "loss": 1.976, "step": 74730 }, { "epoch": 7.6806783144912645, "grad_norm": 0.0414838008582592, "learning_rate": 0.01, "loss": 1.9805, "step": 74733 }, { "epoch": 7.680986639260021, "grad_norm": 0.06097675487399101, "learning_rate": 0.01, "loss": 2.0028, "step": 74736 }, { "epoch": 7.681294964028777, "grad_norm": 0.1072980985045433, "learning_rate": 0.01, "loss": 1.9873, "step": 74739 }, { "epoch": 7.681603288797533, "grad_norm": 0.0738445371389389, "learning_rate": 0.01, "loss": 1.9503, "step": 74742 }, { "epoch": 7.68191161356629, "grad_norm": 0.06711167842149734, "learning_rate": 0.01, "loss": 1.9582, "step": 74745 }, { "epoch": 7.682219938335046, "grad_norm": 0.07318748533725739, "learning_rate": 0.01, "loss": 1.986, "step": 74748 }, { "epoch": 7.682528263103802, "grad_norm": 0.07076602429151535, "learning_rate": 0.01, "loss": 1.9628, "step": 74751 }, { "epoch": 7.6828365878725595, "grad_norm": 0.05887584760785103, "learning_rate": 0.01, "loss": 1.9698, "step": 74754 }, { "epoch": 7.683144912641316, "grad_norm": 0.1313849240541458, "learning_rate": 0.01, "loss": 1.9568, "step": 74757 }, { "epoch": 7.683453237410072, "grad_norm": 0.04983710125088692, "learning_rate": 0.01, "loss": 1.9613, "step": 74760 }, { "epoch": 7.683761562178828, "grad_norm": 0.051432423293590546, "learning_rate": 0.01, "loss": 1.9772, "step": 74763 }, { "epoch": 7.684069886947585, "grad_norm": 0.09883440285921097, "learning_rate": 0.01, "loss": 1.9598, "step": 74766 }, { "epoch": 7.684378211716341, "grad_norm": 0.07337962836027145, "learning_rate": 0.01, "loss": 1.962, "step": 74769 }, { "epoch": 7.684686536485097, "grad_norm": 0.05677385628223419, "learning_rate": 0.01, "loss": 1.9801, "step": 74772 }, { "epoch": 7.684994861253854, "grad_norm": 0.04124249145388603, "learning_rate": 0.01, "loss": 1.9628, "step": 74775 }, { "epoch": 7.685303186022611, "grad_norm": 0.03997712954878807, "learning_rate": 0.01, "loss": 1.964, "step": 74778 }, { "epoch": 7.685611510791367, "grad_norm": 0.04726220294833183, "learning_rate": 0.01, "loss": 1.9647, "step": 74781 }, { "epoch": 7.6859198355601235, "grad_norm": 0.04512409493327141, "learning_rate": 0.01, "loss": 1.9751, "step": 74784 }, { "epoch": 7.68622816032888, "grad_norm": 0.09864774346351624, "learning_rate": 0.01, "loss": 1.9951, "step": 74787 }, { "epoch": 7.686536485097636, "grad_norm": 0.062484268099069595, "learning_rate": 0.01, "loss": 1.959, "step": 74790 }, { "epoch": 7.686844809866392, "grad_norm": 0.09673088043928146, "learning_rate": 0.01, "loss": 1.9881, "step": 74793 }, { "epoch": 7.687153134635149, "grad_norm": 0.05277227610349655, "learning_rate": 0.01, "loss": 1.9727, "step": 74796 }, { "epoch": 7.687461459403906, "grad_norm": 0.06944403797388077, "learning_rate": 0.01, "loss": 1.9549, "step": 74799 }, { "epoch": 7.687769784172662, "grad_norm": 0.07019193470478058, "learning_rate": 0.01, "loss": 1.9652, "step": 74802 }, { "epoch": 7.6880781089414185, "grad_norm": 0.12948362529277802, "learning_rate": 0.01, "loss": 1.9872, "step": 74805 }, { "epoch": 7.688386433710175, "grad_norm": 0.06813304126262665, "learning_rate": 0.01, "loss": 1.9781, "step": 74808 }, { "epoch": 7.688694758478931, "grad_norm": 0.0443788580596447, "learning_rate": 0.01, "loss": 1.9801, "step": 74811 }, { "epoch": 7.689003083247687, "grad_norm": 0.04802471399307251, "learning_rate": 0.01, "loss": 1.9944, "step": 74814 }, { "epoch": 7.689311408016444, "grad_norm": 0.05055609717965126, "learning_rate": 0.01, "loss": 1.9578, "step": 74817 }, { "epoch": 7.6896197327852, "grad_norm": 0.031212560832500458, "learning_rate": 0.01, "loss": 1.957, "step": 74820 }, { "epoch": 7.689928057553956, "grad_norm": 0.11334410309791565, "learning_rate": 0.01, "loss": 1.9609, "step": 74823 }, { "epoch": 7.690236382322714, "grad_norm": 0.059831589460372925, "learning_rate": 0.01, "loss": 1.9729, "step": 74826 }, { "epoch": 7.69054470709147, "grad_norm": 0.10885782539844513, "learning_rate": 0.01, "loss": 1.9391, "step": 74829 }, { "epoch": 7.690853031860226, "grad_norm": 0.088487409055233, "learning_rate": 0.01, "loss": 1.9609, "step": 74832 }, { "epoch": 7.6911613566289825, "grad_norm": 0.050246331840753555, "learning_rate": 0.01, "loss": 1.9889, "step": 74835 }, { "epoch": 7.691469681397739, "grad_norm": 0.035412561148405075, "learning_rate": 0.01, "loss": 1.9705, "step": 74838 }, { "epoch": 7.691778006166495, "grad_norm": 0.08675045520067215, "learning_rate": 0.01, "loss": 1.9799, "step": 74841 }, { "epoch": 7.692086330935252, "grad_norm": 0.1179322674870491, "learning_rate": 0.01, "loss": 1.9859, "step": 74844 }, { "epoch": 7.692394655704009, "grad_norm": 0.060479529201984406, "learning_rate": 0.01, "loss": 1.9627, "step": 74847 }, { "epoch": 7.692702980472765, "grad_norm": 0.03623998537659645, "learning_rate": 0.01, "loss": 1.9662, "step": 74850 }, { "epoch": 7.693011305241521, "grad_norm": 0.039840538054704666, "learning_rate": 0.01, "loss": 1.9553, "step": 74853 }, { "epoch": 7.6933196300102775, "grad_norm": 0.0399545356631279, "learning_rate": 0.01, "loss": 1.9806, "step": 74856 }, { "epoch": 7.693627954779034, "grad_norm": 0.13020242750644684, "learning_rate": 0.01, "loss": 1.9372, "step": 74859 }, { "epoch": 7.69393627954779, "grad_norm": 0.05399748310446739, "learning_rate": 0.01, "loss": 1.9419, "step": 74862 }, { "epoch": 7.694244604316546, "grad_norm": 0.043653350323438644, "learning_rate": 0.01, "loss": 1.9902, "step": 74865 }, { "epoch": 7.694552929085303, "grad_norm": 0.04083268716931343, "learning_rate": 0.01, "loss": 1.9645, "step": 74868 }, { "epoch": 7.69486125385406, "grad_norm": 0.04305655509233475, "learning_rate": 0.01, "loss": 1.9643, "step": 74871 }, { "epoch": 7.695169578622816, "grad_norm": 0.06946787238121033, "learning_rate": 0.01, "loss": 1.9657, "step": 74874 }, { "epoch": 7.695477903391573, "grad_norm": 0.11347746849060059, "learning_rate": 0.01, "loss": 1.9921, "step": 74877 }, { "epoch": 7.695786228160329, "grad_norm": 0.14138512313365936, "learning_rate": 0.01, "loss": 1.9815, "step": 74880 }, { "epoch": 7.696094552929085, "grad_norm": 0.05391167849302292, "learning_rate": 0.01, "loss": 1.9642, "step": 74883 }, { "epoch": 7.6964028776978415, "grad_norm": 0.03958969935774803, "learning_rate": 0.01, "loss": 1.9615, "step": 74886 }, { "epoch": 7.696711202466598, "grad_norm": 0.04602275788784027, "learning_rate": 0.01, "loss": 1.9714, "step": 74889 }, { "epoch": 7.697019527235355, "grad_norm": 0.05577864497900009, "learning_rate": 0.01, "loss": 1.9865, "step": 74892 }, { "epoch": 7.697327852004111, "grad_norm": 0.08632632344961166, "learning_rate": 0.01, "loss": 1.9535, "step": 74895 }, { "epoch": 7.697636176772868, "grad_norm": 0.06367254257202148, "learning_rate": 0.01, "loss": 1.9973, "step": 74898 }, { "epoch": 7.697944501541624, "grad_norm": 0.07105473428964615, "learning_rate": 0.01, "loss": 1.9701, "step": 74901 }, { "epoch": 7.69825282631038, "grad_norm": 0.053121648728847504, "learning_rate": 0.01, "loss": 1.9925, "step": 74904 }, { "epoch": 7.6985611510791365, "grad_norm": 0.056938666850328445, "learning_rate": 0.01, "loss": 1.9782, "step": 74907 }, { "epoch": 7.698869475847893, "grad_norm": 0.06268006563186646, "learning_rate": 0.01, "loss": 1.9914, "step": 74910 }, { "epoch": 7.699177800616649, "grad_norm": 0.06493799388408661, "learning_rate": 0.01, "loss": 1.9542, "step": 74913 }, { "epoch": 7.699486125385406, "grad_norm": 0.05832339823246002, "learning_rate": 0.01, "loss": 1.9713, "step": 74916 }, { "epoch": 7.699794450154163, "grad_norm": 0.10625136643648148, "learning_rate": 0.01, "loss": 1.9989, "step": 74919 }, { "epoch": 7.700102774922919, "grad_norm": 0.04736369848251343, "learning_rate": 0.01, "loss": 1.9778, "step": 74922 }, { "epoch": 7.700411099691675, "grad_norm": 0.03734734281897545, "learning_rate": 0.01, "loss": 1.9753, "step": 74925 }, { "epoch": 7.700719424460432, "grad_norm": 0.05115366354584694, "learning_rate": 0.01, "loss": 1.9654, "step": 74928 }, { "epoch": 7.701027749229188, "grad_norm": 0.03465920314192772, "learning_rate": 0.01, "loss": 1.981, "step": 74931 }, { "epoch": 7.701336073997944, "grad_norm": 0.14209450781345367, "learning_rate": 0.01, "loss": 1.9774, "step": 74934 }, { "epoch": 7.701644398766701, "grad_norm": 0.07537394762039185, "learning_rate": 0.01, "loss": 1.9888, "step": 74937 }, { "epoch": 7.701952723535458, "grad_norm": 0.03767615184187889, "learning_rate": 0.01, "loss": 1.9829, "step": 74940 }, { "epoch": 7.702261048304214, "grad_norm": 0.034113120287656784, "learning_rate": 0.01, "loss": 1.9631, "step": 74943 }, { "epoch": 7.70256937307297, "grad_norm": 0.03793106600642204, "learning_rate": 0.01, "loss": 1.9816, "step": 74946 }, { "epoch": 7.702877697841727, "grad_norm": 0.041996173560619354, "learning_rate": 0.01, "loss": 1.9825, "step": 74949 }, { "epoch": 7.703186022610483, "grad_norm": 0.042373474687337875, "learning_rate": 0.01, "loss": 1.9617, "step": 74952 }, { "epoch": 7.703494347379239, "grad_norm": 0.05560959875583649, "learning_rate": 0.01, "loss": 1.9628, "step": 74955 }, { "epoch": 7.7038026721479955, "grad_norm": 0.052527736872434616, "learning_rate": 0.01, "loss": 1.951, "step": 74958 }, { "epoch": 7.704110996916752, "grad_norm": 0.1509440392255783, "learning_rate": 0.01, "loss": 1.9538, "step": 74961 }, { "epoch": 7.704419321685509, "grad_norm": 0.07441670447587967, "learning_rate": 0.01, "loss": 1.9738, "step": 74964 }, { "epoch": 7.704727646454265, "grad_norm": 0.09107119590044022, "learning_rate": 0.01, "loss": 1.9644, "step": 74967 }, { "epoch": 7.705035971223022, "grad_norm": 0.4417809247970581, "learning_rate": 0.01, "loss": 1.9873, "step": 74970 }, { "epoch": 7.705344295991778, "grad_norm": 0.049799490720033646, "learning_rate": 0.01, "loss": 1.9727, "step": 74973 }, { "epoch": 7.705652620760534, "grad_norm": 0.05132484808564186, "learning_rate": 0.01, "loss": 1.9841, "step": 74976 }, { "epoch": 7.705960945529291, "grad_norm": 0.04304105415940285, "learning_rate": 0.01, "loss": 1.9571, "step": 74979 }, { "epoch": 7.706269270298048, "grad_norm": 0.05429000407457352, "learning_rate": 0.01, "loss": 1.9803, "step": 74982 }, { "epoch": 7.706577595066804, "grad_norm": 0.035690829157829285, "learning_rate": 0.01, "loss": 1.9496, "step": 74985 }, { "epoch": 7.70688591983556, "grad_norm": 0.037559330463409424, "learning_rate": 0.01, "loss": 1.9778, "step": 74988 }, { "epoch": 7.707194244604317, "grad_norm": 0.03172672912478447, "learning_rate": 0.01, "loss": 1.9613, "step": 74991 }, { "epoch": 7.707502569373073, "grad_norm": 0.03989896923303604, "learning_rate": 0.01, "loss": 1.9597, "step": 74994 }, { "epoch": 7.707810894141829, "grad_norm": 0.06976346671581268, "learning_rate": 0.01, "loss": 1.9566, "step": 74997 }, { "epoch": 7.708119218910586, "grad_norm": 0.0848923996090889, "learning_rate": 0.01, "loss": 1.9741, "step": 75000 }, { "epoch": 7.708427543679342, "grad_norm": 0.07228533923625946, "learning_rate": 0.01, "loss": 1.9555, "step": 75003 }, { "epoch": 7.708735868448098, "grad_norm": 0.050755999982357025, "learning_rate": 0.01, "loss": 1.97, "step": 75006 }, { "epoch": 7.709044193216855, "grad_norm": 0.137029767036438, "learning_rate": 0.01, "loss": 1.9448, "step": 75009 }, { "epoch": 7.709352517985612, "grad_norm": 0.061711635440588, "learning_rate": 0.01, "loss": 1.9573, "step": 75012 }, { "epoch": 7.709660842754368, "grad_norm": 0.047601111233234406, "learning_rate": 0.01, "loss": 1.967, "step": 75015 }, { "epoch": 7.709969167523124, "grad_norm": 0.052504342049360275, "learning_rate": 0.01, "loss": 1.9616, "step": 75018 }, { "epoch": 7.710277492291881, "grad_norm": 0.10551578551530838, "learning_rate": 0.01, "loss": 1.9657, "step": 75021 }, { "epoch": 7.710585817060637, "grad_norm": 0.07759327441453934, "learning_rate": 0.01, "loss": 1.9562, "step": 75024 }, { "epoch": 7.710894141829393, "grad_norm": 0.05110660567879677, "learning_rate": 0.01, "loss": 1.9825, "step": 75027 }, { "epoch": 7.7112024665981505, "grad_norm": 0.040343545377254486, "learning_rate": 0.01, "loss": 1.9739, "step": 75030 }, { "epoch": 7.711510791366907, "grad_norm": 0.04899616539478302, "learning_rate": 0.01, "loss": 1.9723, "step": 75033 }, { "epoch": 7.711819116135663, "grad_norm": 0.072836734354496, "learning_rate": 0.01, "loss": 1.9561, "step": 75036 }, { "epoch": 7.712127440904419, "grad_norm": 0.05975237488746643, "learning_rate": 0.01, "loss": 1.9525, "step": 75039 }, { "epoch": 7.712435765673176, "grad_norm": 0.0680779218673706, "learning_rate": 0.01, "loss": 1.949, "step": 75042 }, { "epoch": 7.712744090441932, "grad_norm": 0.04388720169663429, "learning_rate": 0.01, "loss": 1.9466, "step": 75045 }, { "epoch": 7.713052415210688, "grad_norm": 0.04224342107772827, "learning_rate": 0.01, "loss": 1.9696, "step": 75048 }, { "epoch": 7.713360739979445, "grad_norm": 0.05068013072013855, "learning_rate": 0.01, "loss": 2.0021, "step": 75051 }, { "epoch": 7.713669064748202, "grad_norm": 0.08357524126768112, "learning_rate": 0.01, "loss": 2.0005, "step": 75054 }, { "epoch": 7.713977389516958, "grad_norm": 0.04642781615257263, "learning_rate": 0.01, "loss": 1.9509, "step": 75057 }, { "epoch": 7.714285714285714, "grad_norm": 0.039685651659965515, "learning_rate": 0.01, "loss": 1.9904, "step": 75060 }, { "epoch": 7.714594039054471, "grad_norm": 0.05703577399253845, "learning_rate": 0.01, "loss": 1.9213, "step": 75063 }, { "epoch": 7.714902363823227, "grad_norm": 0.06070883944630623, "learning_rate": 0.01, "loss": 1.966, "step": 75066 }, { "epoch": 7.715210688591983, "grad_norm": 0.12112396210432053, "learning_rate": 0.01, "loss": 1.9739, "step": 75069 }, { "epoch": 7.71551901336074, "grad_norm": 0.04316992312669754, "learning_rate": 0.01, "loss": 1.9834, "step": 75072 }, { "epoch": 7.715827338129497, "grad_norm": 0.09497283399105072, "learning_rate": 0.01, "loss": 1.949, "step": 75075 }, { "epoch": 7.716135662898253, "grad_norm": 0.1215139627456665, "learning_rate": 0.01, "loss": 1.992, "step": 75078 }, { "epoch": 7.7164439876670095, "grad_norm": 0.11401921510696411, "learning_rate": 0.01, "loss": 1.9828, "step": 75081 }, { "epoch": 7.716752312435766, "grad_norm": 0.034442540258169174, "learning_rate": 0.01, "loss": 1.967, "step": 75084 }, { "epoch": 7.717060637204522, "grad_norm": 0.0502055361866951, "learning_rate": 0.01, "loss": 1.9752, "step": 75087 }, { "epoch": 7.717368961973278, "grad_norm": 0.03723570331931114, "learning_rate": 0.01, "loss": 1.9605, "step": 75090 }, { "epoch": 7.717677286742035, "grad_norm": 0.03539155796170235, "learning_rate": 0.01, "loss": 1.9588, "step": 75093 }, { "epoch": 7.717985611510791, "grad_norm": 0.052659958600997925, "learning_rate": 0.01, "loss": 1.9499, "step": 75096 }, { "epoch": 7.718293936279547, "grad_norm": 0.10651390254497528, "learning_rate": 0.01, "loss": 1.9609, "step": 75099 }, { "epoch": 7.7186022610483045, "grad_norm": 0.09155957400798798, "learning_rate": 0.01, "loss": 1.9629, "step": 75102 }, { "epoch": 7.718910585817061, "grad_norm": 0.07794260233640671, "learning_rate": 0.01, "loss": 1.9591, "step": 75105 }, { "epoch": 7.719218910585817, "grad_norm": 0.10518870502710342, "learning_rate": 0.01, "loss": 1.9588, "step": 75108 }, { "epoch": 7.719527235354573, "grad_norm": 0.05979525297880173, "learning_rate": 0.01, "loss": 1.9414, "step": 75111 }, { "epoch": 7.71983556012333, "grad_norm": 0.10988526046276093, "learning_rate": 0.01, "loss": 1.9961, "step": 75114 }, { "epoch": 7.720143884892086, "grad_norm": 0.0853358581662178, "learning_rate": 0.01, "loss": 1.9532, "step": 75117 }, { "epoch": 7.720452209660843, "grad_norm": 0.046666599810123444, "learning_rate": 0.01, "loss": 1.9676, "step": 75120 }, { "epoch": 7.7207605344295995, "grad_norm": 0.05268367752432823, "learning_rate": 0.01, "loss": 1.9879, "step": 75123 }, { "epoch": 7.721068859198356, "grad_norm": 0.04782354086637497, "learning_rate": 0.01, "loss": 1.9823, "step": 75126 }, { "epoch": 7.721377183967112, "grad_norm": 0.04205673933029175, "learning_rate": 0.01, "loss": 1.9556, "step": 75129 }, { "epoch": 7.7216855087358685, "grad_norm": 0.0986146479845047, "learning_rate": 0.01, "loss": 1.9427, "step": 75132 }, { "epoch": 7.721993833504625, "grad_norm": 0.05076294392347336, "learning_rate": 0.01, "loss": 1.9902, "step": 75135 }, { "epoch": 7.722302158273381, "grad_norm": 0.08255535364151001, "learning_rate": 0.01, "loss": 1.9743, "step": 75138 }, { "epoch": 7.722610483042137, "grad_norm": 0.08000553399324417, "learning_rate": 0.01, "loss": 1.9677, "step": 75141 }, { "epoch": 7.722918807810894, "grad_norm": 0.03682593256235123, "learning_rate": 0.01, "loss": 1.9568, "step": 75144 }, { "epoch": 7.723227132579651, "grad_norm": 0.09714195877313614, "learning_rate": 0.01, "loss": 1.9744, "step": 75147 }, { "epoch": 7.723535457348407, "grad_norm": 0.05460646376013756, "learning_rate": 0.01, "loss": 1.9349, "step": 75150 }, { "epoch": 7.7238437821171635, "grad_norm": 0.03487299382686615, "learning_rate": 0.01, "loss": 1.9933, "step": 75153 }, { "epoch": 7.72415210688592, "grad_norm": 0.06751782447099686, "learning_rate": 0.01, "loss": 1.9736, "step": 75156 }, { "epoch": 7.724460431654676, "grad_norm": 0.10416395217180252, "learning_rate": 0.01, "loss": 1.9528, "step": 75159 }, { "epoch": 7.724768756423432, "grad_norm": 0.12070560455322266, "learning_rate": 0.01, "loss": 1.9806, "step": 75162 }, { "epoch": 7.725077081192189, "grad_norm": 0.0539083257317543, "learning_rate": 0.01, "loss": 1.9587, "step": 75165 }, { "epoch": 7.725385405960946, "grad_norm": 0.08535733073949814, "learning_rate": 0.01, "loss": 1.9531, "step": 75168 }, { "epoch": 7.725693730729702, "grad_norm": 0.047993749380111694, "learning_rate": 0.01, "loss": 1.954, "step": 75171 }, { "epoch": 7.7260020554984585, "grad_norm": 0.06361022591590881, "learning_rate": 0.01, "loss": 1.9656, "step": 75174 }, { "epoch": 7.726310380267215, "grad_norm": 0.049908868968486786, "learning_rate": 0.01, "loss": 1.9721, "step": 75177 }, { "epoch": 7.726618705035971, "grad_norm": 0.09875412285327911, "learning_rate": 0.01, "loss": 2.001, "step": 75180 }, { "epoch": 7.7269270298047275, "grad_norm": 0.0869210883975029, "learning_rate": 0.01, "loss": 1.9646, "step": 75183 }, { "epoch": 7.727235354573484, "grad_norm": 0.0573507621884346, "learning_rate": 0.01, "loss": 1.978, "step": 75186 }, { "epoch": 7.72754367934224, "grad_norm": 0.03936505317687988, "learning_rate": 0.01, "loss": 1.9763, "step": 75189 }, { "epoch": 7.727852004110997, "grad_norm": 0.0410919226706028, "learning_rate": 0.01, "loss": 1.9875, "step": 75192 }, { "epoch": 7.728160328879754, "grad_norm": 0.082386814057827, "learning_rate": 0.01, "loss": 1.9571, "step": 75195 }, { "epoch": 7.72846865364851, "grad_norm": 0.04592809081077576, "learning_rate": 0.01, "loss": 1.9524, "step": 75198 }, { "epoch": 7.728776978417266, "grad_norm": 0.03361072763800621, "learning_rate": 0.01, "loss": 1.9565, "step": 75201 }, { "epoch": 7.7290853031860225, "grad_norm": 0.08248721063137054, "learning_rate": 0.01, "loss": 1.992, "step": 75204 }, { "epoch": 7.729393627954779, "grad_norm": 0.05343814939260483, "learning_rate": 0.01, "loss": 1.9968, "step": 75207 }, { "epoch": 7.729701952723535, "grad_norm": 0.10746193677186966, "learning_rate": 0.01, "loss": 1.9795, "step": 75210 }, { "epoch": 7.730010277492292, "grad_norm": 0.07758577913045883, "learning_rate": 0.01, "loss": 1.9634, "step": 75213 }, { "epoch": 7.730318602261049, "grad_norm": 0.05135120823979378, "learning_rate": 0.01, "loss": 1.9696, "step": 75216 }, { "epoch": 7.730626927029805, "grad_norm": 0.10420482605695724, "learning_rate": 0.01, "loss": 1.9488, "step": 75219 }, { "epoch": 7.730935251798561, "grad_norm": 0.06070501729846001, "learning_rate": 0.01, "loss": 1.9587, "step": 75222 }, { "epoch": 7.7312435765673175, "grad_norm": 0.06866266578435898, "learning_rate": 0.01, "loss": 1.9788, "step": 75225 }, { "epoch": 7.731551901336074, "grad_norm": 0.07661131769418716, "learning_rate": 0.01, "loss": 1.9862, "step": 75228 }, { "epoch": 7.73186022610483, "grad_norm": 0.12407959252595901, "learning_rate": 0.01, "loss": 1.9624, "step": 75231 }, { "epoch": 7.7321685508735865, "grad_norm": 0.11292631924152374, "learning_rate": 0.01, "loss": 1.9595, "step": 75234 }, { "epoch": 7.732476875642344, "grad_norm": 0.10384037345647812, "learning_rate": 0.01, "loss": 1.9889, "step": 75237 }, { "epoch": 7.7327852004111, "grad_norm": 0.04758400097489357, "learning_rate": 0.01, "loss": 1.9988, "step": 75240 }, { "epoch": 7.733093525179856, "grad_norm": 0.0348513089120388, "learning_rate": 0.01, "loss": 1.9686, "step": 75243 }, { "epoch": 7.733401849948613, "grad_norm": 0.03560272976756096, "learning_rate": 0.01, "loss": 1.9741, "step": 75246 }, { "epoch": 7.733710174717369, "grad_norm": 0.038762979209423065, "learning_rate": 0.01, "loss": 1.9643, "step": 75249 }, { "epoch": 7.734018499486125, "grad_norm": 0.06412766128778458, "learning_rate": 0.01, "loss": 1.983, "step": 75252 }, { "epoch": 7.7343268242548815, "grad_norm": 0.09387977421283722, "learning_rate": 0.01, "loss": 1.9568, "step": 75255 }, { "epoch": 7.734635149023639, "grad_norm": 0.0382736399769783, "learning_rate": 0.01, "loss": 1.9778, "step": 75258 }, { "epoch": 7.734943473792395, "grad_norm": 0.061939794570207596, "learning_rate": 0.01, "loss": 1.9609, "step": 75261 }, { "epoch": 7.735251798561151, "grad_norm": 0.10443083941936493, "learning_rate": 0.01, "loss": 1.9632, "step": 75264 }, { "epoch": 7.735560123329908, "grad_norm": 0.10148416459560394, "learning_rate": 0.01, "loss": 1.9547, "step": 75267 }, { "epoch": 7.735868448098664, "grad_norm": 0.06240933761000633, "learning_rate": 0.01, "loss": 1.9713, "step": 75270 }, { "epoch": 7.73617677286742, "grad_norm": 0.062156736850738525, "learning_rate": 0.01, "loss": 1.9787, "step": 75273 }, { "epoch": 7.7364850976361765, "grad_norm": 0.07212886959314346, "learning_rate": 0.01, "loss": 1.9646, "step": 75276 }, { "epoch": 7.736793422404933, "grad_norm": 0.06498091667890549, "learning_rate": 0.01, "loss": 1.9747, "step": 75279 }, { "epoch": 7.737101747173689, "grad_norm": 0.07634437829256058, "learning_rate": 0.01, "loss": 1.9715, "step": 75282 }, { "epoch": 7.737410071942446, "grad_norm": 0.09608074277639389, "learning_rate": 0.01, "loss": 1.9476, "step": 75285 }, { "epoch": 7.737718396711203, "grad_norm": 0.07703720033168793, "learning_rate": 0.01, "loss": 1.977, "step": 75288 }, { "epoch": 7.738026721479959, "grad_norm": 0.07131288200616837, "learning_rate": 0.01, "loss": 1.9647, "step": 75291 }, { "epoch": 7.738335046248715, "grad_norm": 0.08612216264009476, "learning_rate": 0.01, "loss": 1.9584, "step": 75294 }, { "epoch": 7.738643371017472, "grad_norm": 0.19164220988750458, "learning_rate": 0.01, "loss": 1.9955, "step": 75297 }, { "epoch": 7.738951695786228, "grad_norm": 0.06035240739583969, "learning_rate": 0.01, "loss": 1.95, "step": 75300 }, { "epoch": 7.739260020554985, "grad_norm": 0.04050587862730026, "learning_rate": 0.01, "loss": 1.9557, "step": 75303 }, { "epoch": 7.739568345323741, "grad_norm": 0.03798283636569977, "learning_rate": 0.01, "loss": 1.9721, "step": 75306 }, { "epoch": 7.739876670092498, "grad_norm": 0.06570883095264435, "learning_rate": 0.01, "loss": 1.9675, "step": 75309 }, { "epoch": 7.740184994861254, "grad_norm": 0.05782955512404442, "learning_rate": 0.01, "loss": 1.9966, "step": 75312 }, { "epoch": 7.74049331963001, "grad_norm": 0.047876227647066116, "learning_rate": 0.01, "loss": 1.9654, "step": 75315 }, { "epoch": 7.740801644398767, "grad_norm": 0.06209714338183403, "learning_rate": 0.01, "loss": 1.9449, "step": 75318 }, { "epoch": 7.741109969167523, "grad_norm": 0.04943614453077316, "learning_rate": 0.01, "loss": 1.9557, "step": 75321 }, { "epoch": 7.741418293936279, "grad_norm": 0.05134053900837898, "learning_rate": 0.01, "loss": 1.9757, "step": 75324 }, { "epoch": 7.7417266187050355, "grad_norm": 0.03939725086092949, "learning_rate": 0.01, "loss": 1.9581, "step": 75327 }, { "epoch": 7.742034943473793, "grad_norm": 0.06609564274549484, "learning_rate": 0.01, "loss": 1.9712, "step": 75330 }, { "epoch": 7.742343268242549, "grad_norm": 0.06503685563802719, "learning_rate": 0.01, "loss": 1.9754, "step": 75333 }, { "epoch": 7.742651593011305, "grad_norm": 0.16153855621814728, "learning_rate": 0.01, "loss": 1.9787, "step": 75336 }, { "epoch": 7.742959917780062, "grad_norm": 0.07932616025209427, "learning_rate": 0.01, "loss": 1.9554, "step": 75339 }, { "epoch": 7.743268242548818, "grad_norm": 0.12574706971645355, "learning_rate": 0.01, "loss": 1.9941, "step": 75342 }, { "epoch": 7.743576567317574, "grad_norm": 0.0709739476442337, "learning_rate": 0.01, "loss": 1.991, "step": 75345 }, { "epoch": 7.743884892086331, "grad_norm": 0.05811988189816475, "learning_rate": 0.01, "loss": 1.9503, "step": 75348 }, { "epoch": 7.744193216855088, "grad_norm": 0.05921448767185211, "learning_rate": 0.01, "loss": 1.9589, "step": 75351 }, { "epoch": 7.744501541623844, "grad_norm": 0.0461372472345829, "learning_rate": 0.01, "loss": 1.952, "step": 75354 }, { "epoch": 7.7448098663926, "grad_norm": 0.05297720059752464, "learning_rate": 0.01, "loss": 1.94, "step": 75357 }, { "epoch": 7.745118191161357, "grad_norm": 0.05457683280110359, "learning_rate": 0.01, "loss": 1.9789, "step": 75360 }, { "epoch": 7.745426515930113, "grad_norm": 0.05880630761384964, "learning_rate": 0.01, "loss": 1.947, "step": 75363 }, { "epoch": 7.745734840698869, "grad_norm": 0.04461346194148064, "learning_rate": 0.01, "loss": 1.9774, "step": 75366 }, { "epoch": 7.746043165467626, "grad_norm": 0.036604154855012894, "learning_rate": 0.01, "loss": 1.9853, "step": 75369 }, { "epoch": 7.746351490236382, "grad_norm": 0.1032840833067894, "learning_rate": 0.01, "loss": 1.9755, "step": 75372 }, { "epoch": 7.746659815005139, "grad_norm": 0.060783445835113525, "learning_rate": 0.01, "loss": 1.9704, "step": 75375 }, { "epoch": 7.746968139773895, "grad_norm": 0.09891743212938309, "learning_rate": 0.01, "loss": 1.9895, "step": 75378 }, { "epoch": 7.747276464542652, "grad_norm": 0.06625793129205704, "learning_rate": 0.01, "loss": 1.9688, "step": 75381 }, { "epoch": 7.747584789311408, "grad_norm": 0.0494624525308609, "learning_rate": 0.01, "loss": 1.9624, "step": 75384 }, { "epoch": 7.747893114080164, "grad_norm": 0.06007739156484604, "learning_rate": 0.01, "loss": 1.972, "step": 75387 }, { "epoch": 7.748201438848921, "grad_norm": 0.051491837948560715, "learning_rate": 0.01, "loss": 1.9699, "step": 75390 }, { "epoch": 7.748509763617677, "grad_norm": 0.04195559397339821, "learning_rate": 0.01, "loss": 1.959, "step": 75393 }, { "epoch": 7.748818088386434, "grad_norm": 0.05081287398934364, "learning_rate": 0.01, "loss": 1.9438, "step": 75396 }, { "epoch": 7.7491264131551905, "grad_norm": 0.06100494787096977, "learning_rate": 0.01, "loss": 1.9906, "step": 75399 }, { "epoch": 7.749434737923947, "grad_norm": 0.18386486172676086, "learning_rate": 0.01, "loss": 1.9644, "step": 75402 }, { "epoch": 7.749743062692703, "grad_norm": 0.06177636235952377, "learning_rate": 0.01, "loss": 1.9777, "step": 75405 }, { "epoch": 7.750051387461459, "grad_norm": 0.0422503836452961, "learning_rate": 0.01, "loss": 1.9599, "step": 75408 }, { "epoch": 7.750359712230216, "grad_norm": 0.0480877049267292, "learning_rate": 0.01, "loss": 1.9612, "step": 75411 }, { "epoch": 7.750668036998972, "grad_norm": 0.04990262910723686, "learning_rate": 0.01, "loss": 1.9557, "step": 75414 }, { "epoch": 7.750976361767728, "grad_norm": 0.034894928336143494, "learning_rate": 0.01, "loss": 1.9664, "step": 75417 }, { "epoch": 7.751284686536485, "grad_norm": 0.05810752511024475, "learning_rate": 0.01, "loss": 1.9806, "step": 75420 }, { "epoch": 7.751593011305242, "grad_norm": 0.04341167211532593, "learning_rate": 0.01, "loss": 1.9567, "step": 75423 }, { "epoch": 7.751901336073998, "grad_norm": 0.08866117149591446, "learning_rate": 0.01, "loss": 1.9583, "step": 75426 }, { "epoch": 7.752209660842754, "grad_norm": 0.13528284430503845, "learning_rate": 0.01, "loss": 1.9709, "step": 75429 }, { "epoch": 7.752517985611511, "grad_norm": 0.05988186597824097, "learning_rate": 0.01, "loss": 1.9777, "step": 75432 }, { "epoch": 7.752826310380267, "grad_norm": 0.12178362905979156, "learning_rate": 0.01, "loss": 1.9745, "step": 75435 }, { "epoch": 7.753134635149023, "grad_norm": 0.08513082563877106, "learning_rate": 0.01, "loss": 1.9998, "step": 75438 }, { "epoch": 7.7534429599177805, "grad_norm": 0.05664602294564247, "learning_rate": 0.01, "loss": 1.9613, "step": 75441 }, { "epoch": 7.753751284686537, "grad_norm": 0.059373270720243454, "learning_rate": 0.01, "loss": 1.9792, "step": 75444 }, { "epoch": 7.754059609455293, "grad_norm": 0.06777229905128479, "learning_rate": 0.01, "loss": 1.9632, "step": 75447 }, { "epoch": 7.7543679342240495, "grad_norm": 0.06595178693532944, "learning_rate": 0.01, "loss": 1.9641, "step": 75450 }, { "epoch": 7.754676258992806, "grad_norm": 0.03551295027136803, "learning_rate": 0.01, "loss": 1.9841, "step": 75453 }, { "epoch": 7.754984583761562, "grad_norm": 0.13232779502868652, "learning_rate": 0.01, "loss": 1.9651, "step": 75456 }, { "epoch": 7.755292908530318, "grad_norm": 0.11032851040363312, "learning_rate": 0.01, "loss": 1.9578, "step": 75459 }, { "epoch": 7.755601233299075, "grad_norm": 0.09487207978963852, "learning_rate": 0.01, "loss": 1.951, "step": 75462 }, { "epoch": 7.755909558067831, "grad_norm": 0.07793836295604706, "learning_rate": 0.01, "loss": 1.9445, "step": 75465 }, { "epoch": 7.756217882836588, "grad_norm": 0.0555163212120533, "learning_rate": 0.01, "loss": 1.9723, "step": 75468 }, { "epoch": 7.7565262076053445, "grad_norm": 0.04495738446712494, "learning_rate": 0.01, "loss": 1.9942, "step": 75471 }, { "epoch": 7.756834532374101, "grad_norm": 0.03979022800922394, "learning_rate": 0.01, "loss": 1.9753, "step": 75474 }, { "epoch": 7.757142857142857, "grad_norm": 0.054774973541498184, "learning_rate": 0.01, "loss": 1.9761, "step": 75477 }, { "epoch": 7.757451181911613, "grad_norm": 0.07539385557174683, "learning_rate": 0.01, "loss": 1.9833, "step": 75480 }, { "epoch": 7.75775950668037, "grad_norm": 0.05782768502831459, "learning_rate": 0.01, "loss": 1.9901, "step": 75483 }, { "epoch": 7.758067831449126, "grad_norm": 0.035946596413850784, "learning_rate": 0.01, "loss": 1.9863, "step": 75486 }, { "epoch": 7.758376156217883, "grad_norm": 0.04099863022565842, "learning_rate": 0.01, "loss": 1.9681, "step": 75489 }, { "epoch": 7.7586844809866395, "grad_norm": 0.10557963699102402, "learning_rate": 0.01, "loss": 1.9738, "step": 75492 }, { "epoch": 7.758992805755396, "grad_norm": 0.061207111924886703, "learning_rate": 0.01, "loss": 1.9913, "step": 75495 }, { "epoch": 7.759301130524152, "grad_norm": 0.08292508125305176, "learning_rate": 0.01, "loss": 1.9862, "step": 75498 }, { "epoch": 7.7596094552929085, "grad_norm": 0.04073723405599594, "learning_rate": 0.01, "loss": 1.975, "step": 75501 }, { "epoch": 7.759917780061665, "grad_norm": 0.05526462569832802, "learning_rate": 0.01, "loss": 1.9802, "step": 75504 }, { "epoch": 7.760226104830421, "grad_norm": 0.06078262999653816, "learning_rate": 0.01, "loss": 1.9722, "step": 75507 }, { "epoch": 7.760534429599177, "grad_norm": 0.07975959032773972, "learning_rate": 0.01, "loss": 1.9697, "step": 75510 }, { "epoch": 7.760842754367935, "grad_norm": 0.06245322525501251, "learning_rate": 0.01, "loss": 1.9904, "step": 75513 }, { "epoch": 7.761151079136691, "grad_norm": 0.10303689539432526, "learning_rate": 0.01, "loss": 1.9559, "step": 75516 }, { "epoch": 7.761459403905447, "grad_norm": 0.1304595023393631, "learning_rate": 0.01, "loss": 1.9807, "step": 75519 }, { "epoch": 7.7617677286742035, "grad_norm": 0.04025387763977051, "learning_rate": 0.01, "loss": 1.9639, "step": 75522 }, { "epoch": 7.76207605344296, "grad_norm": 0.0802154690027237, "learning_rate": 0.01, "loss": 1.9707, "step": 75525 }, { "epoch": 7.762384378211716, "grad_norm": 0.09343558549880981, "learning_rate": 0.01, "loss": 1.9619, "step": 75528 }, { "epoch": 7.762692702980472, "grad_norm": 0.07421892136335373, "learning_rate": 0.01, "loss": 1.9904, "step": 75531 }, { "epoch": 7.76300102774923, "grad_norm": 0.03761820122599602, "learning_rate": 0.01, "loss": 1.9841, "step": 75534 }, { "epoch": 7.763309352517986, "grad_norm": 0.02898077666759491, "learning_rate": 0.01, "loss": 1.9564, "step": 75537 }, { "epoch": 7.763617677286742, "grad_norm": 0.060065463185310364, "learning_rate": 0.01, "loss": 1.9775, "step": 75540 }, { "epoch": 7.7639260020554985, "grad_norm": 0.08949063718318939, "learning_rate": 0.01, "loss": 1.9812, "step": 75543 }, { "epoch": 7.764234326824255, "grad_norm": 0.05697142332792282, "learning_rate": 0.01, "loss": 1.9501, "step": 75546 }, { "epoch": 7.764542651593011, "grad_norm": 0.03366275504231453, "learning_rate": 0.01, "loss": 1.9735, "step": 75549 }, { "epoch": 7.7648509763617675, "grad_norm": 0.11962536722421646, "learning_rate": 0.01, "loss": 1.9602, "step": 75552 }, { "epoch": 7.765159301130524, "grad_norm": 0.049452245235443115, "learning_rate": 0.01, "loss": 1.989, "step": 75555 }, { "epoch": 7.76546762589928, "grad_norm": 0.10120616108179092, "learning_rate": 0.01, "loss": 1.9721, "step": 75558 }, { "epoch": 7.765775950668037, "grad_norm": 0.049389541149139404, "learning_rate": 0.01, "loss": 1.9825, "step": 75561 }, { "epoch": 7.766084275436794, "grad_norm": 0.07128654420375824, "learning_rate": 0.01, "loss": 1.9668, "step": 75564 }, { "epoch": 7.76639260020555, "grad_norm": 0.05818384140729904, "learning_rate": 0.01, "loss": 1.9692, "step": 75567 }, { "epoch": 7.766700924974306, "grad_norm": 0.03975757211446762, "learning_rate": 0.01, "loss": 1.9576, "step": 75570 }, { "epoch": 7.7670092497430625, "grad_norm": 0.046733129769563675, "learning_rate": 0.01, "loss": 1.9491, "step": 75573 }, { "epoch": 7.767317574511819, "grad_norm": 0.06592223793268204, "learning_rate": 0.01, "loss": 1.9295, "step": 75576 }, { "epoch": 7.767625899280576, "grad_norm": 0.09464466571807861, "learning_rate": 0.01, "loss": 1.9733, "step": 75579 }, { "epoch": 7.767934224049332, "grad_norm": 0.07415366172790527, "learning_rate": 0.01, "loss": 1.951, "step": 75582 }, { "epoch": 7.768242548818089, "grad_norm": 0.06457046419382095, "learning_rate": 0.01, "loss": 1.9685, "step": 75585 }, { "epoch": 7.768550873586845, "grad_norm": 0.10259808599948883, "learning_rate": 0.01, "loss": 1.9662, "step": 75588 }, { "epoch": 7.768859198355601, "grad_norm": 0.186797097325325, "learning_rate": 0.01, "loss": 2.0009, "step": 75591 }, { "epoch": 7.7691675231243575, "grad_norm": 0.13249987363815308, "learning_rate": 0.01, "loss": 1.9644, "step": 75594 }, { "epoch": 7.769475847893114, "grad_norm": 0.07284609973430634, "learning_rate": 0.01, "loss": 1.9688, "step": 75597 }, { "epoch": 7.76978417266187, "grad_norm": 0.05419408157467842, "learning_rate": 0.01, "loss": 2.0002, "step": 75600 }, { "epoch": 7.7700924974306265, "grad_norm": 0.05547893047332764, "learning_rate": 0.01, "loss": 1.9646, "step": 75603 }, { "epoch": 7.770400822199384, "grad_norm": 0.04928140714764595, "learning_rate": 0.01, "loss": 1.954, "step": 75606 }, { "epoch": 7.77070914696814, "grad_norm": 0.04238557815551758, "learning_rate": 0.01, "loss": 1.9766, "step": 75609 }, { "epoch": 7.771017471736896, "grad_norm": 0.08550449460744858, "learning_rate": 0.01, "loss": 1.9816, "step": 75612 }, { "epoch": 7.771325796505653, "grad_norm": 0.07773346453905106, "learning_rate": 0.01, "loss": 1.935, "step": 75615 }, { "epoch": 7.771634121274409, "grad_norm": 0.06905487179756165, "learning_rate": 0.01, "loss": 1.9582, "step": 75618 }, { "epoch": 7.771942446043165, "grad_norm": 0.07177229970693588, "learning_rate": 0.01, "loss": 1.9655, "step": 75621 }, { "epoch": 7.7722507708119215, "grad_norm": 0.04951430857181549, "learning_rate": 0.01, "loss": 1.9543, "step": 75624 }, { "epoch": 7.772559095580679, "grad_norm": 0.06549627333879471, "learning_rate": 0.01, "loss": 1.9713, "step": 75627 }, { "epoch": 7.772867420349435, "grad_norm": 0.07106509059667587, "learning_rate": 0.01, "loss": 1.9524, "step": 75630 }, { "epoch": 7.773175745118191, "grad_norm": 0.09129591286182404, "learning_rate": 0.01, "loss": 1.9817, "step": 75633 }, { "epoch": 7.773484069886948, "grad_norm": 0.04063316062092781, "learning_rate": 0.01, "loss": 1.952, "step": 75636 }, { "epoch": 7.773792394655704, "grad_norm": 0.13061417639255524, "learning_rate": 0.01, "loss": 1.9687, "step": 75639 }, { "epoch": 7.77410071942446, "grad_norm": 0.0752875953912735, "learning_rate": 0.01, "loss": 1.9679, "step": 75642 }, { "epoch": 7.7744090441932165, "grad_norm": 0.03646187111735344, "learning_rate": 0.01, "loss": 1.982, "step": 75645 }, { "epoch": 7.774717368961973, "grad_norm": 0.03636857867240906, "learning_rate": 0.01, "loss": 1.9371, "step": 75648 }, { "epoch": 7.77502569373073, "grad_norm": 0.04543618857860565, "learning_rate": 0.01, "loss": 1.972, "step": 75651 }, { "epoch": 7.775334018499486, "grad_norm": 0.048112813383340836, "learning_rate": 0.01, "loss": 1.9865, "step": 75654 }, { "epoch": 7.775642343268243, "grad_norm": 0.054791998118162155, "learning_rate": 0.01, "loss": 2.0064, "step": 75657 }, { "epoch": 7.775950668036999, "grad_norm": 0.052178870886564255, "learning_rate": 0.01, "loss": 1.9639, "step": 75660 }, { "epoch": 7.776258992805755, "grad_norm": 0.05024852603673935, "learning_rate": 0.01, "loss": 1.9713, "step": 75663 }, { "epoch": 7.776567317574512, "grad_norm": 0.03811731934547424, "learning_rate": 0.01, "loss": 1.982, "step": 75666 }, { "epoch": 7.776875642343268, "grad_norm": 0.07078245282173157, "learning_rate": 0.01, "loss": 1.9642, "step": 75669 }, { "epoch": 7.777183967112025, "grad_norm": 0.07220859825611115, "learning_rate": 0.01, "loss": 1.9559, "step": 75672 }, { "epoch": 7.777492291880781, "grad_norm": 0.07006082683801651, "learning_rate": 0.01, "loss": 1.963, "step": 75675 }, { "epoch": 7.777800616649538, "grad_norm": 0.10420866310596466, "learning_rate": 0.01, "loss": 1.9867, "step": 75678 }, { "epoch": 7.778108941418294, "grad_norm": 0.08756904304027557, "learning_rate": 0.01, "loss": 1.9641, "step": 75681 }, { "epoch": 7.77841726618705, "grad_norm": 0.05425836890935898, "learning_rate": 0.01, "loss": 1.9745, "step": 75684 }, { "epoch": 7.778725590955807, "grad_norm": 0.044821374118328094, "learning_rate": 0.01, "loss": 1.9396, "step": 75687 }, { "epoch": 7.779033915724563, "grad_norm": 0.059608519077301025, "learning_rate": 0.01, "loss": 1.975, "step": 75690 }, { "epoch": 7.779342240493319, "grad_norm": 0.0818125456571579, "learning_rate": 0.01, "loss": 1.9552, "step": 75693 }, { "epoch": 7.779650565262076, "grad_norm": 0.09120543301105499, "learning_rate": 0.01, "loss": 1.9763, "step": 75696 }, { "epoch": 7.779958890030833, "grad_norm": 0.12951593101024628, "learning_rate": 0.01, "loss": 1.9544, "step": 75699 }, { "epoch": 7.780267214799589, "grad_norm": 0.12418895214796066, "learning_rate": 0.01, "loss": 1.974, "step": 75702 }, { "epoch": 7.780575539568345, "grad_norm": 0.1442076563835144, "learning_rate": 0.01, "loss": 1.9891, "step": 75705 }, { "epoch": 7.780883864337102, "grad_norm": 0.1782861202955246, "learning_rate": 0.01, "loss": 1.9631, "step": 75708 }, { "epoch": 7.781192189105858, "grad_norm": 0.13022981584072113, "learning_rate": 0.01, "loss": 1.9714, "step": 75711 }, { "epoch": 7.781500513874614, "grad_norm": 0.05073774605989456, "learning_rate": 0.01, "loss": 1.9785, "step": 75714 }, { "epoch": 7.7818088386433715, "grad_norm": 0.09594649821519852, "learning_rate": 0.01, "loss": 1.9578, "step": 75717 }, { "epoch": 7.782117163412128, "grad_norm": 0.04567958042025566, "learning_rate": 0.01, "loss": 1.9706, "step": 75720 }, { "epoch": 7.782425488180884, "grad_norm": 0.0413205660879612, "learning_rate": 0.01, "loss": 1.9568, "step": 75723 }, { "epoch": 7.78273381294964, "grad_norm": 0.046227239072322845, "learning_rate": 0.01, "loss": 1.9703, "step": 75726 }, { "epoch": 7.783042137718397, "grad_norm": 0.052021145820617676, "learning_rate": 0.01, "loss": 1.9781, "step": 75729 }, { "epoch": 7.783350462487153, "grad_norm": 0.05464644357562065, "learning_rate": 0.01, "loss": 1.967, "step": 75732 }, { "epoch": 7.783658787255909, "grad_norm": 0.07341251522302628, "learning_rate": 0.01, "loss": 1.9805, "step": 75735 }, { "epoch": 7.783967112024666, "grad_norm": 0.03740450739860535, "learning_rate": 0.01, "loss": 1.9845, "step": 75738 }, { "epoch": 7.784275436793422, "grad_norm": 0.051203422248363495, "learning_rate": 0.01, "loss": 1.9743, "step": 75741 }, { "epoch": 7.784583761562179, "grad_norm": 0.071673683822155, "learning_rate": 0.01, "loss": 1.9345, "step": 75744 }, { "epoch": 7.784892086330935, "grad_norm": 0.1421179473400116, "learning_rate": 0.01, "loss": 1.9591, "step": 75747 }, { "epoch": 7.785200411099692, "grad_norm": 0.054482489824295044, "learning_rate": 0.01, "loss": 1.9486, "step": 75750 }, { "epoch": 7.785508735868448, "grad_norm": 0.03319859504699707, "learning_rate": 0.01, "loss": 1.9698, "step": 75753 }, { "epoch": 7.785817060637204, "grad_norm": 0.08589328080415726, "learning_rate": 0.01, "loss": 1.9734, "step": 75756 }, { "epoch": 7.786125385405961, "grad_norm": 0.056132521480321884, "learning_rate": 0.01, "loss": 1.9651, "step": 75759 }, { "epoch": 7.786433710174718, "grad_norm": 0.10983091592788696, "learning_rate": 0.01, "loss": 1.9775, "step": 75762 }, { "epoch": 7.786742034943474, "grad_norm": 0.1317470520734787, "learning_rate": 0.01, "loss": 1.9783, "step": 75765 }, { "epoch": 7.7870503597122305, "grad_norm": 0.1627342402935028, "learning_rate": 0.01, "loss": 1.9815, "step": 75768 }, { "epoch": 7.787358684480987, "grad_norm": 0.13085448741912842, "learning_rate": 0.01, "loss": 1.982, "step": 75771 }, { "epoch": 7.787667009249743, "grad_norm": 0.08374191075563431, "learning_rate": 0.01, "loss": 1.9837, "step": 75774 }, { "epoch": 7.787975334018499, "grad_norm": 0.05581342428922653, "learning_rate": 0.01, "loss": 1.9791, "step": 75777 }, { "epoch": 7.788283658787256, "grad_norm": 0.07613394409418106, "learning_rate": 0.01, "loss": 1.9594, "step": 75780 }, { "epoch": 7.788591983556012, "grad_norm": 0.07574499398469925, "learning_rate": 0.01, "loss": 1.952, "step": 75783 }, { "epoch": 7.788900308324768, "grad_norm": 0.05884881317615509, "learning_rate": 0.01, "loss": 1.9271, "step": 75786 }, { "epoch": 7.7892086330935255, "grad_norm": 0.05409419536590576, "learning_rate": 0.01, "loss": 1.9753, "step": 75789 }, { "epoch": 7.789516957862282, "grad_norm": 0.06512802094221115, "learning_rate": 0.01, "loss": 1.9808, "step": 75792 }, { "epoch": 7.789825282631038, "grad_norm": 0.04650108888745308, "learning_rate": 0.01, "loss": 1.9729, "step": 75795 }, { "epoch": 7.790133607399794, "grad_norm": 0.06513743847608566, "learning_rate": 0.01, "loss": 1.9803, "step": 75798 }, { "epoch": 7.790441932168551, "grad_norm": 0.07151713222265244, "learning_rate": 0.01, "loss": 1.9836, "step": 75801 }, { "epoch": 7.790750256937307, "grad_norm": 0.09430654346942902, "learning_rate": 0.01, "loss": 1.9853, "step": 75804 }, { "epoch": 7.791058581706063, "grad_norm": 0.10549556463956833, "learning_rate": 0.01, "loss": 1.9892, "step": 75807 }, { "epoch": 7.7913669064748206, "grad_norm": 0.11038587987422943, "learning_rate": 0.01, "loss": 1.9798, "step": 75810 }, { "epoch": 7.791675231243577, "grad_norm": 0.06415724009275436, "learning_rate": 0.01, "loss": 1.9588, "step": 75813 }, { "epoch": 7.791983556012333, "grad_norm": 0.04490191116929054, "learning_rate": 0.01, "loss": 1.9603, "step": 75816 }, { "epoch": 7.7922918807810895, "grad_norm": 0.06832468509674072, "learning_rate": 0.01, "loss": 1.9724, "step": 75819 }, { "epoch": 7.792600205549846, "grad_norm": 0.07226554304361343, "learning_rate": 0.01, "loss": 1.9648, "step": 75822 }, { "epoch": 7.792908530318602, "grad_norm": 0.05115282163023949, "learning_rate": 0.01, "loss": 1.9519, "step": 75825 }, { "epoch": 7.793216855087358, "grad_norm": 0.05715862661600113, "learning_rate": 0.01, "loss": 1.9749, "step": 75828 }, { "epoch": 7.793525179856115, "grad_norm": 0.03245139494538307, "learning_rate": 0.01, "loss": 1.9663, "step": 75831 }, { "epoch": 7.793833504624872, "grad_norm": 0.036928169429302216, "learning_rate": 0.01, "loss": 1.9359, "step": 75834 }, { "epoch": 7.794141829393628, "grad_norm": 0.056494537740945816, "learning_rate": 0.01, "loss": 1.9915, "step": 75837 }, { "epoch": 7.7944501541623845, "grad_norm": 0.12836699187755585, "learning_rate": 0.01, "loss": 1.9518, "step": 75840 }, { "epoch": 7.794758478931141, "grad_norm": 0.11228854954242706, "learning_rate": 0.01, "loss": 1.9997, "step": 75843 }, { "epoch": 7.795066803699897, "grad_norm": 0.13982988893985748, "learning_rate": 0.01, "loss": 1.979, "step": 75846 }, { "epoch": 7.795375128468653, "grad_norm": 0.05313684791326523, "learning_rate": 0.01, "loss": 1.9788, "step": 75849 }, { "epoch": 7.79568345323741, "grad_norm": 0.038301192224025726, "learning_rate": 0.01, "loss": 1.9663, "step": 75852 }, { "epoch": 7.795991778006167, "grad_norm": 0.030965665355324745, "learning_rate": 0.01, "loss": 1.9735, "step": 75855 }, { "epoch": 7.796300102774923, "grad_norm": 0.05267125740647316, "learning_rate": 0.01, "loss": 1.9639, "step": 75858 }, { "epoch": 7.7966084275436796, "grad_norm": 0.11764229089021683, "learning_rate": 0.01, "loss": 2.0029, "step": 75861 }, { "epoch": 7.796916752312436, "grad_norm": 0.12397600710391998, "learning_rate": 0.01, "loss": 1.9801, "step": 75864 }, { "epoch": 7.797225077081192, "grad_norm": 0.15025153756141663, "learning_rate": 0.01, "loss": 1.9864, "step": 75867 }, { "epoch": 7.7975334018499485, "grad_norm": 0.09809085726737976, "learning_rate": 0.01, "loss": 1.9675, "step": 75870 }, { "epoch": 7.797841726618705, "grad_norm": 0.054064489901065826, "learning_rate": 0.01, "loss": 1.9777, "step": 75873 }, { "epoch": 7.798150051387461, "grad_norm": 0.03437699377536774, "learning_rate": 0.01, "loss": 1.986, "step": 75876 }, { "epoch": 7.798458376156217, "grad_norm": 0.03941173478960991, "learning_rate": 0.01, "loss": 1.9807, "step": 75879 }, { "epoch": 7.798766700924975, "grad_norm": 0.058382656425237656, "learning_rate": 0.01, "loss": 1.9973, "step": 75882 }, { "epoch": 7.799075025693731, "grad_norm": 0.03915715217590332, "learning_rate": 0.01, "loss": 1.9917, "step": 75885 }, { "epoch": 7.799383350462487, "grad_norm": 0.03458473086357117, "learning_rate": 0.01, "loss": 1.9843, "step": 75888 }, { "epoch": 7.7996916752312435, "grad_norm": 0.03535837680101395, "learning_rate": 0.01, "loss": 1.9659, "step": 75891 }, { "epoch": 7.8, "grad_norm": 0.10532993078231812, "learning_rate": 0.01, "loss": 1.9597, "step": 75894 }, { "epoch": 7.800308324768756, "grad_norm": 0.0679871067404747, "learning_rate": 0.01, "loss": 1.9494, "step": 75897 }, { "epoch": 7.800616649537513, "grad_norm": 0.09095168858766556, "learning_rate": 0.01, "loss": 1.9792, "step": 75900 }, { "epoch": 7.80092497430627, "grad_norm": 0.04139483720064163, "learning_rate": 0.01, "loss": 1.9513, "step": 75903 }, { "epoch": 7.801233299075026, "grad_norm": 0.034134142100811005, "learning_rate": 0.01, "loss": 1.9854, "step": 75906 }, { "epoch": 7.801541623843782, "grad_norm": 0.03619580343365669, "learning_rate": 0.01, "loss": 1.9702, "step": 75909 }, { "epoch": 7.8018499486125386, "grad_norm": 0.0656135305762291, "learning_rate": 0.01, "loss": 1.9736, "step": 75912 }, { "epoch": 7.802158273381295, "grad_norm": 0.09268335998058319, "learning_rate": 0.01, "loss": 1.9712, "step": 75915 }, { "epoch": 7.802466598150051, "grad_norm": 0.041520409286022186, "learning_rate": 0.01, "loss": 1.9703, "step": 75918 }, { "epoch": 7.8027749229188075, "grad_norm": 0.05438603460788727, "learning_rate": 0.01, "loss": 1.9827, "step": 75921 }, { "epoch": 7.803083247687564, "grad_norm": 0.05482487007975578, "learning_rate": 0.01, "loss": 1.9432, "step": 75924 }, { "epoch": 7.803391572456321, "grad_norm": 0.08617548644542694, "learning_rate": 0.01, "loss": 1.9959, "step": 75927 }, { "epoch": 7.803699897225077, "grad_norm": 0.08135716617107391, "learning_rate": 0.01, "loss": 1.9505, "step": 75930 }, { "epoch": 7.804008221993834, "grad_norm": 0.0932932123541832, "learning_rate": 0.01, "loss": 1.9868, "step": 75933 }, { "epoch": 7.80431654676259, "grad_norm": 0.06797019392251968, "learning_rate": 0.01, "loss": 1.9841, "step": 75936 }, { "epoch": 7.804624871531346, "grad_norm": 0.09875380992889404, "learning_rate": 0.01, "loss": 1.9655, "step": 75939 }, { "epoch": 7.8049331963001025, "grad_norm": 0.07438071072101593, "learning_rate": 0.01, "loss": 1.9805, "step": 75942 }, { "epoch": 7.805241521068859, "grad_norm": 0.06156305968761444, "learning_rate": 0.01, "loss": 1.9707, "step": 75945 }, { "epoch": 7.805549845837616, "grad_norm": 0.07288916409015656, "learning_rate": 0.01, "loss": 1.9534, "step": 75948 }, { "epoch": 7.805858170606372, "grad_norm": 0.06853283941745758, "learning_rate": 0.01, "loss": 1.9506, "step": 75951 }, { "epoch": 7.806166495375129, "grad_norm": 0.07273688167333603, "learning_rate": 0.01, "loss": 1.9697, "step": 75954 }, { "epoch": 7.806474820143885, "grad_norm": 0.0601770393550396, "learning_rate": 0.01, "loss": 1.9432, "step": 75957 }, { "epoch": 7.806783144912641, "grad_norm": 0.07322997599840164, "learning_rate": 0.01, "loss": 1.9833, "step": 75960 }, { "epoch": 7.8070914696813976, "grad_norm": 0.08790867030620575, "learning_rate": 0.01, "loss": 1.9765, "step": 75963 }, { "epoch": 7.807399794450154, "grad_norm": 0.07412376254796982, "learning_rate": 0.01, "loss": 1.9715, "step": 75966 }, { "epoch": 7.80770811921891, "grad_norm": 0.06878551095724106, "learning_rate": 0.01, "loss": 1.9917, "step": 75969 }, { "epoch": 7.808016443987667, "grad_norm": 0.11822834610939026, "learning_rate": 0.01, "loss": 1.9643, "step": 75972 }, { "epoch": 7.808324768756424, "grad_norm": 0.039995722472667694, "learning_rate": 0.01, "loss": 1.9745, "step": 75975 }, { "epoch": 7.80863309352518, "grad_norm": 0.10060998797416687, "learning_rate": 0.01, "loss": 1.9936, "step": 75978 }, { "epoch": 7.808941418293936, "grad_norm": 0.0810547024011612, "learning_rate": 0.01, "loss": 1.9651, "step": 75981 }, { "epoch": 7.809249743062693, "grad_norm": 0.10982684791088104, "learning_rate": 0.01, "loss": 1.9606, "step": 75984 }, { "epoch": 7.809558067831449, "grad_norm": 0.05619985982775688, "learning_rate": 0.01, "loss": 1.9632, "step": 75987 }, { "epoch": 7.809866392600205, "grad_norm": 0.05136675387620926, "learning_rate": 0.01, "loss": 1.9536, "step": 75990 }, { "epoch": 7.810174717368962, "grad_norm": 0.03846390172839165, "learning_rate": 0.01, "loss": 1.956, "step": 75993 }, { "epoch": 7.810483042137719, "grad_norm": 0.03186337649822235, "learning_rate": 0.01, "loss": 1.9652, "step": 75996 }, { "epoch": 7.810791366906475, "grad_norm": 0.104001484811306, "learning_rate": 0.01, "loss": 1.9678, "step": 75999 }, { "epoch": 7.811099691675231, "grad_norm": 0.14364612102508545, "learning_rate": 0.01, "loss": 1.9695, "step": 76002 }, { "epoch": 7.811408016443988, "grad_norm": 0.09179578721523285, "learning_rate": 0.01, "loss": 1.969, "step": 76005 }, { "epoch": 7.811716341212744, "grad_norm": 0.043222784996032715, "learning_rate": 0.01, "loss": 1.95, "step": 76008 }, { "epoch": 7.8120246659815, "grad_norm": 0.05643289536237717, "learning_rate": 0.01, "loss": 1.9902, "step": 76011 }, { "epoch": 7.812332990750257, "grad_norm": 0.09506022185087204, "learning_rate": 0.01, "loss": 1.9939, "step": 76014 }, { "epoch": 7.812641315519013, "grad_norm": 0.03661196306347847, "learning_rate": 0.01, "loss": 1.9535, "step": 76017 }, { "epoch": 7.81294964028777, "grad_norm": 0.08643520623445511, "learning_rate": 0.01, "loss": 1.9732, "step": 76020 }, { "epoch": 7.813257965056526, "grad_norm": 0.06595692783594131, "learning_rate": 0.01, "loss": 1.9815, "step": 76023 }, { "epoch": 7.813566289825283, "grad_norm": 0.0512213371694088, "learning_rate": 0.01, "loss": 1.9662, "step": 76026 }, { "epoch": 7.813874614594039, "grad_norm": 0.05945082753896713, "learning_rate": 0.01, "loss": 1.9694, "step": 76029 }, { "epoch": 7.814182939362795, "grad_norm": 0.1069507747888565, "learning_rate": 0.01, "loss": 1.9912, "step": 76032 }, { "epoch": 7.814491264131552, "grad_norm": 0.06452130526304245, "learning_rate": 0.01, "loss": 1.9576, "step": 76035 }, { "epoch": 7.814799588900309, "grad_norm": 0.138530895113945, "learning_rate": 0.01, "loss": 1.9691, "step": 76038 }, { "epoch": 7.815107913669065, "grad_norm": 0.10860975086688995, "learning_rate": 0.01, "loss": 1.9999, "step": 76041 }, { "epoch": 7.815416238437821, "grad_norm": 0.10083730518817902, "learning_rate": 0.01, "loss": 1.9747, "step": 76044 }, { "epoch": 7.815724563206578, "grad_norm": 0.08879086375236511, "learning_rate": 0.01, "loss": 1.9666, "step": 76047 }, { "epoch": 7.816032887975334, "grad_norm": 0.06405922770500183, "learning_rate": 0.01, "loss": 1.988, "step": 76050 }, { "epoch": 7.81634121274409, "grad_norm": 0.06605300307273865, "learning_rate": 0.01, "loss": 1.9515, "step": 76053 }, { "epoch": 7.816649537512847, "grad_norm": 0.08002083748579025, "learning_rate": 0.01, "loss": 1.9832, "step": 76056 }, { "epoch": 7.816957862281603, "grad_norm": 0.058482032269239426, "learning_rate": 0.01, "loss": 1.9727, "step": 76059 }, { "epoch": 7.817266187050359, "grad_norm": 0.10967635363340378, "learning_rate": 0.01, "loss": 1.9669, "step": 76062 }, { "epoch": 7.8175745118191164, "grad_norm": 0.04840927943587303, "learning_rate": 0.01, "loss": 1.9765, "step": 76065 }, { "epoch": 7.817882836587873, "grad_norm": 0.08987026661634445, "learning_rate": 0.01, "loss": 1.9691, "step": 76068 }, { "epoch": 7.818191161356629, "grad_norm": 0.07145990431308746, "learning_rate": 0.01, "loss": 1.9992, "step": 76071 }, { "epoch": 7.818499486125385, "grad_norm": 0.07257866114377975, "learning_rate": 0.01, "loss": 1.9672, "step": 76074 }, { "epoch": 7.818807810894142, "grad_norm": 0.043422192335128784, "learning_rate": 0.01, "loss": 1.9808, "step": 76077 }, { "epoch": 7.819116135662898, "grad_norm": 0.05529724061489105, "learning_rate": 0.01, "loss": 1.9855, "step": 76080 }, { "epoch": 7.819424460431654, "grad_norm": 0.09055875241756439, "learning_rate": 0.01, "loss": 1.9533, "step": 76083 }, { "epoch": 7.8197327852004115, "grad_norm": 0.14238907396793365, "learning_rate": 0.01, "loss": 1.9502, "step": 76086 }, { "epoch": 7.820041109969168, "grad_norm": 0.07417774200439453, "learning_rate": 0.01, "loss": 1.9735, "step": 76089 }, { "epoch": 7.820349434737924, "grad_norm": 0.05844178423285484, "learning_rate": 0.01, "loss": 1.9715, "step": 76092 }, { "epoch": 7.82065775950668, "grad_norm": 0.07558295875787735, "learning_rate": 0.01, "loss": 1.9619, "step": 76095 }, { "epoch": 7.820966084275437, "grad_norm": 0.07094567269086838, "learning_rate": 0.01, "loss": 1.9632, "step": 76098 }, { "epoch": 7.821274409044193, "grad_norm": 0.052053775638341904, "learning_rate": 0.01, "loss": 1.9593, "step": 76101 }, { "epoch": 7.821582733812949, "grad_norm": 0.03560258448123932, "learning_rate": 0.01, "loss": 1.9662, "step": 76104 }, { "epoch": 7.821891058581706, "grad_norm": 0.05725477635860443, "learning_rate": 0.01, "loss": 1.9447, "step": 76107 }, { "epoch": 7.822199383350463, "grad_norm": 0.06399683654308319, "learning_rate": 0.01, "loss": 1.9779, "step": 76110 }, { "epoch": 7.822507708119219, "grad_norm": 0.09520284831523895, "learning_rate": 0.01, "loss": 1.97, "step": 76113 }, { "epoch": 7.8228160328879754, "grad_norm": 0.07167236506938934, "learning_rate": 0.01, "loss": 1.9643, "step": 76116 }, { "epoch": 7.823124357656732, "grad_norm": 0.08711408823728561, "learning_rate": 0.01, "loss": 1.9538, "step": 76119 }, { "epoch": 7.823432682425488, "grad_norm": 0.03872615098953247, "learning_rate": 0.01, "loss": 1.9768, "step": 76122 }, { "epoch": 7.823741007194244, "grad_norm": 0.0413740873336792, "learning_rate": 0.01, "loss": 1.9739, "step": 76125 }, { "epoch": 7.824049331963001, "grad_norm": 0.06173890829086304, "learning_rate": 0.01, "loss": 1.9702, "step": 76128 }, { "epoch": 7.824357656731758, "grad_norm": 0.03490239009261131, "learning_rate": 0.01, "loss": 1.947, "step": 76131 }, { "epoch": 7.824665981500514, "grad_norm": 0.06848657131195068, "learning_rate": 0.01, "loss": 1.9499, "step": 76134 }, { "epoch": 7.8249743062692705, "grad_norm": 0.09866426885128021, "learning_rate": 0.01, "loss": 1.9404, "step": 76137 }, { "epoch": 7.825282631038027, "grad_norm": 0.08280821889638901, "learning_rate": 0.01, "loss": 1.9428, "step": 76140 }, { "epoch": 7.825590955806783, "grad_norm": 0.0681089535355568, "learning_rate": 0.01, "loss": 1.9752, "step": 76143 }, { "epoch": 7.825899280575539, "grad_norm": 0.062307052314281464, "learning_rate": 0.01, "loss": 1.9728, "step": 76146 }, { "epoch": 7.826207605344296, "grad_norm": 0.0920134112238884, "learning_rate": 0.01, "loss": 1.9972, "step": 76149 }, { "epoch": 7.826515930113052, "grad_norm": 0.07431496679782867, "learning_rate": 0.01, "loss": 1.964, "step": 76152 }, { "epoch": 7.826824254881809, "grad_norm": 0.11818663030862808, "learning_rate": 0.01, "loss": 1.9485, "step": 76155 }, { "epoch": 7.8271325796505655, "grad_norm": 0.09648428857326508, "learning_rate": 0.01, "loss": 1.954, "step": 76158 }, { "epoch": 7.827440904419322, "grad_norm": 0.052823755890131, "learning_rate": 0.01, "loss": 1.9694, "step": 76161 }, { "epoch": 7.827749229188078, "grad_norm": 0.048545703291893005, "learning_rate": 0.01, "loss": 1.9541, "step": 76164 }, { "epoch": 7.8280575539568344, "grad_norm": 0.03413026034832001, "learning_rate": 0.01, "loss": 1.9865, "step": 76167 }, { "epoch": 7.828365878725591, "grad_norm": 0.050574932247400284, "learning_rate": 0.01, "loss": 1.9789, "step": 76170 }, { "epoch": 7.828674203494347, "grad_norm": 0.04505613073706627, "learning_rate": 0.01, "loss": 1.9775, "step": 76173 }, { "epoch": 7.828982528263104, "grad_norm": 0.05546938627958298, "learning_rate": 0.01, "loss": 1.9657, "step": 76176 }, { "epoch": 7.829290853031861, "grad_norm": 0.10264269262552261, "learning_rate": 0.01, "loss": 1.9596, "step": 76179 }, { "epoch": 7.829599177800617, "grad_norm": 0.044220950454473495, "learning_rate": 0.01, "loss": 1.9754, "step": 76182 }, { "epoch": 7.829907502569373, "grad_norm": 0.042216695845127106, "learning_rate": 0.01, "loss": 1.9654, "step": 76185 }, { "epoch": 7.8302158273381295, "grad_norm": 0.04472136124968529, "learning_rate": 0.01, "loss": 1.9813, "step": 76188 }, { "epoch": 7.830524152106886, "grad_norm": 0.04147589951753616, "learning_rate": 0.01, "loss": 1.9411, "step": 76191 }, { "epoch": 7.830832476875642, "grad_norm": 0.07456754893064499, "learning_rate": 0.01, "loss": 1.9705, "step": 76194 }, { "epoch": 7.831140801644398, "grad_norm": 0.09007889777421951, "learning_rate": 0.01, "loss": 1.9913, "step": 76197 }, { "epoch": 7.831449126413155, "grad_norm": 0.07880180329084396, "learning_rate": 0.01, "loss": 1.9661, "step": 76200 }, { "epoch": 7.831757451181912, "grad_norm": 0.03387047350406647, "learning_rate": 0.01, "loss": 1.9696, "step": 76203 }, { "epoch": 7.832065775950668, "grad_norm": 0.03345126286149025, "learning_rate": 0.01, "loss": 1.9406, "step": 76206 }, { "epoch": 7.8323741007194245, "grad_norm": 0.041953541338443756, "learning_rate": 0.01, "loss": 1.9532, "step": 76209 }, { "epoch": 7.832682425488181, "grad_norm": 0.04632129892706871, "learning_rate": 0.01, "loss": 1.9862, "step": 76212 }, { "epoch": 7.832990750256937, "grad_norm": 0.04222474619746208, "learning_rate": 0.01, "loss": 1.9474, "step": 76215 }, { "epoch": 7.8332990750256934, "grad_norm": 0.07548588514328003, "learning_rate": 0.01, "loss": 1.9887, "step": 76218 }, { "epoch": 7.833607399794451, "grad_norm": 0.09007594734430313, "learning_rate": 0.01, "loss": 1.9781, "step": 76221 }, { "epoch": 7.833915724563207, "grad_norm": 0.08240062743425369, "learning_rate": 0.01, "loss": 2.0005, "step": 76224 }, { "epoch": 7.834224049331963, "grad_norm": 0.12393136322498322, "learning_rate": 0.01, "loss": 1.9803, "step": 76227 }, { "epoch": 7.83453237410072, "grad_norm": 0.05861087515950203, "learning_rate": 0.01, "loss": 1.9826, "step": 76230 }, { "epoch": 7.834840698869476, "grad_norm": 0.0426303967833519, "learning_rate": 0.01, "loss": 1.9755, "step": 76233 }, { "epoch": 7.835149023638232, "grad_norm": 0.06229201331734657, "learning_rate": 0.01, "loss": 1.9443, "step": 76236 }, { "epoch": 7.8354573484069885, "grad_norm": 0.07277770340442657, "learning_rate": 0.01, "loss": 1.962, "step": 76239 }, { "epoch": 7.835765673175745, "grad_norm": 0.06426133960485458, "learning_rate": 0.01, "loss": 1.9731, "step": 76242 }, { "epoch": 7.836073997944501, "grad_norm": 0.05311770737171173, "learning_rate": 0.01, "loss": 1.9618, "step": 76245 }, { "epoch": 7.836382322713258, "grad_norm": 0.05998200178146362, "learning_rate": 0.01, "loss": 1.9839, "step": 76248 }, { "epoch": 7.836690647482015, "grad_norm": 0.07519685477018356, "learning_rate": 0.01, "loss": 1.9712, "step": 76251 }, { "epoch": 7.836998972250771, "grad_norm": 0.12155578285455704, "learning_rate": 0.01, "loss": 2.0025, "step": 76254 }, { "epoch": 7.837307297019527, "grad_norm": 0.11642058193683624, "learning_rate": 0.01, "loss": 1.9638, "step": 76257 }, { "epoch": 7.8376156217882835, "grad_norm": 0.06032479554414749, "learning_rate": 0.01, "loss": 1.9949, "step": 76260 }, { "epoch": 7.83792394655704, "grad_norm": 0.05517754331231117, "learning_rate": 0.01, "loss": 1.9588, "step": 76263 }, { "epoch": 7.838232271325796, "grad_norm": 0.029733365401625633, "learning_rate": 0.01, "loss": 1.9683, "step": 76266 }, { "epoch": 7.838540596094553, "grad_norm": 0.10677066445350647, "learning_rate": 0.01, "loss": 1.9508, "step": 76269 }, { "epoch": 7.83884892086331, "grad_norm": 0.07923242449760437, "learning_rate": 0.01, "loss": 1.9533, "step": 76272 }, { "epoch": 7.839157245632066, "grad_norm": 0.05212559178471565, "learning_rate": 0.01, "loss": 1.9871, "step": 76275 }, { "epoch": 7.839465570400822, "grad_norm": 0.08050580322742462, "learning_rate": 0.01, "loss": 1.9553, "step": 76278 }, { "epoch": 7.839773895169579, "grad_norm": 0.08338946104049683, "learning_rate": 0.01, "loss": 2.0165, "step": 76281 }, { "epoch": 7.840082219938335, "grad_norm": 0.0553726889193058, "learning_rate": 0.01, "loss": 1.9907, "step": 76284 }, { "epoch": 7.840390544707091, "grad_norm": 0.06254331022500992, "learning_rate": 0.01, "loss": 1.9584, "step": 76287 }, { "epoch": 7.8406988694758475, "grad_norm": 0.058102309703826904, "learning_rate": 0.01, "loss": 1.9898, "step": 76290 }, { "epoch": 7.841007194244605, "grad_norm": 0.04690523445606232, "learning_rate": 0.01, "loss": 1.9791, "step": 76293 }, { "epoch": 7.841315519013361, "grad_norm": 0.036311473697423935, "learning_rate": 0.01, "loss": 1.9418, "step": 76296 }, { "epoch": 7.841623843782117, "grad_norm": 0.03946409747004509, "learning_rate": 0.01, "loss": 1.95, "step": 76299 }, { "epoch": 7.841932168550874, "grad_norm": 0.04926838353276253, "learning_rate": 0.01, "loss": 1.9483, "step": 76302 }, { "epoch": 7.84224049331963, "grad_norm": 0.1399640291929245, "learning_rate": 0.01, "loss": 1.9668, "step": 76305 }, { "epoch": 7.842548818088386, "grad_norm": 0.1891847401857376, "learning_rate": 0.01, "loss": 1.9701, "step": 76308 }, { "epoch": 7.8428571428571425, "grad_norm": 0.13039416074752808, "learning_rate": 0.01, "loss": 1.9849, "step": 76311 }, { "epoch": 7.8431654676259, "grad_norm": 0.0651208832859993, "learning_rate": 0.01, "loss": 1.9622, "step": 76314 }, { "epoch": 7.843473792394656, "grad_norm": 0.038830872625112534, "learning_rate": 0.01, "loss": 1.9669, "step": 76317 }, { "epoch": 7.843782117163412, "grad_norm": 0.04478251934051514, "learning_rate": 0.01, "loss": 1.9663, "step": 76320 }, { "epoch": 7.844090441932169, "grad_norm": 0.03909466788172722, "learning_rate": 0.01, "loss": 1.9773, "step": 76323 }, { "epoch": 7.844398766700925, "grad_norm": 0.045009203255176544, "learning_rate": 0.01, "loss": 1.9713, "step": 76326 }, { "epoch": 7.844707091469681, "grad_norm": 0.042739205062389374, "learning_rate": 0.01, "loss": 1.956, "step": 76329 }, { "epoch": 7.845015416238438, "grad_norm": 0.04345414787530899, "learning_rate": 0.01, "loss": 1.9551, "step": 76332 }, { "epoch": 7.845323741007194, "grad_norm": 0.056563850492239, "learning_rate": 0.01, "loss": 1.9546, "step": 76335 }, { "epoch": 7.84563206577595, "grad_norm": 0.04494136944413185, "learning_rate": 0.01, "loss": 1.9837, "step": 76338 }, { "epoch": 7.845940390544707, "grad_norm": 0.04633985832333565, "learning_rate": 0.01, "loss": 1.958, "step": 76341 }, { "epoch": 7.846248715313464, "grad_norm": 0.13986924290657043, "learning_rate": 0.01, "loss": 1.9652, "step": 76344 }, { "epoch": 7.84655704008222, "grad_norm": 0.11160435527563095, "learning_rate": 0.01, "loss": 1.9426, "step": 76347 }, { "epoch": 7.846865364850976, "grad_norm": 0.04715929180383682, "learning_rate": 0.01, "loss": 1.9737, "step": 76350 }, { "epoch": 7.847173689619733, "grad_norm": 0.03196023032069206, "learning_rate": 0.01, "loss": 1.9769, "step": 76353 }, { "epoch": 7.847482014388489, "grad_norm": 0.05613362416625023, "learning_rate": 0.01, "loss": 1.9509, "step": 76356 }, { "epoch": 7.847790339157246, "grad_norm": 0.06527100503444672, "learning_rate": 0.01, "loss": 1.9496, "step": 76359 }, { "epoch": 7.848098663926002, "grad_norm": 0.030473025515675545, "learning_rate": 0.01, "loss": 1.9716, "step": 76362 }, { "epoch": 7.848406988694759, "grad_norm": 0.0393981859087944, "learning_rate": 0.01, "loss": 1.9665, "step": 76365 }, { "epoch": 7.848715313463515, "grad_norm": 0.03515271842479706, "learning_rate": 0.01, "loss": 1.9576, "step": 76368 }, { "epoch": 7.849023638232271, "grad_norm": 0.03752501681447029, "learning_rate": 0.01, "loss": 1.9586, "step": 76371 }, { "epoch": 7.849331963001028, "grad_norm": 0.05749157443642616, "learning_rate": 0.01, "loss": 1.9479, "step": 76374 }, { "epoch": 7.849640287769784, "grad_norm": 0.1413191258907318, "learning_rate": 0.01, "loss": 1.9603, "step": 76377 }, { "epoch": 7.84994861253854, "grad_norm": 0.12259876728057861, "learning_rate": 0.01, "loss": 1.9594, "step": 76380 }, { "epoch": 7.850256937307297, "grad_norm": 0.04768449068069458, "learning_rate": 0.01, "loss": 1.9792, "step": 76383 }, { "epoch": 7.850565262076054, "grad_norm": 0.07419627904891968, "learning_rate": 0.01, "loss": 1.9631, "step": 76386 }, { "epoch": 7.85087358684481, "grad_norm": 0.05266867205500603, "learning_rate": 0.01, "loss": 1.9578, "step": 76389 }, { "epoch": 7.851181911613566, "grad_norm": 0.05332281067967415, "learning_rate": 0.01, "loss": 1.9534, "step": 76392 }, { "epoch": 7.851490236382323, "grad_norm": 0.04726416617631912, "learning_rate": 0.01, "loss": 1.9688, "step": 76395 }, { "epoch": 7.851798561151079, "grad_norm": 0.0462283231317997, "learning_rate": 0.01, "loss": 1.9643, "step": 76398 }, { "epoch": 7.852106885919835, "grad_norm": 0.038481879979372025, "learning_rate": 0.01, "loss": 1.973, "step": 76401 }, { "epoch": 7.852415210688592, "grad_norm": 0.06793560832738876, "learning_rate": 0.01, "loss": 1.9547, "step": 76404 }, { "epoch": 7.852723535457349, "grad_norm": 0.10262496024370193, "learning_rate": 0.01, "loss": 1.9536, "step": 76407 }, { "epoch": 7.853031860226105, "grad_norm": 0.07564928382635117, "learning_rate": 0.01, "loss": 1.9764, "step": 76410 }, { "epoch": 7.853340184994861, "grad_norm": 0.09007153660058975, "learning_rate": 0.01, "loss": 1.9502, "step": 76413 }, { "epoch": 7.853648509763618, "grad_norm": 0.05348995327949524, "learning_rate": 0.01, "loss": 1.9654, "step": 76416 }, { "epoch": 7.853956834532374, "grad_norm": 0.04418191686272621, "learning_rate": 0.01, "loss": 1.9521, "step": 76419 }, { "epoch": 7.85426515930113, "grad_norm": 0.038563381880521774, "learning_rate": 0.01, "loss": 1.97, "step": 76422 }, { "epoch": 7.854573484069887, "grad_norm": 0.12509912252426147, "learning_rate": 0.01, "loss": 1.9833, "step": 76425 }, { "epoch": 7.854881808838643, "grad_norm": 0.11487328261137009, "learning_rate": 0.01, "loss": 1.988, "step": 76428 }, { "epoch": 7.8551901336074, "grad_norm": 0.04599343240261078, "learning_rate": 0.01, "loss": 1.9683, "step": 76431 }, { "epoch": 7.8554984583761565, "grad_norm": 0.0848371610045433, "learning_rate": 0.01, "loss": 1.9363, "step": 76434 }, { "epoch": 7.855806783144913, "grad_norm": 0.06545133888721466, "learning_rate": 0.01, "loss": 1.9804, "step": 76437 }, { "epoch": 7.856115107913669, "grad_norm": 0.09184720367193222, "learning_rate": 0.01, "loss": 1.9747, "step": 76440 }, { "epoch": 7.856423432682425, "grad_norm": 0.04043002799153328, "learning_rate": 0.01, "loss": 1.9703, "step": 76443 }, { "epoch": 7.856731757451182, "grad_norm": 0.08890711516141891, "learning_rate": 0.01, "loss": 1.9927, "step": 76446 }, { "epoch": 7.857040082219938, "grad_norm": 0.13198690116405487, "learning_rate": 0.01, "loss": 1.9608, "step": 76449 }, { "epoch": 7.857348406988695, "grad_norm": 0.19590161740779877, "learning_rate": 0.01, "loss": 1.9864, "step": 76452 }, { "epoch": 7.8576567317574515, "grad_norm": 0.1467210203409195, "learning_rate": 0.01, "loss": 1.9641, "step": 76455 }, { "epoch": 7.857965056526208, "grad_norm": 0.06122981756925583, "learning_rate": 0.01, "loss": 1.9779, "step": 76458 }, { "epoch": 7.858273381294964, "grad_norm": 0.04069874808192253, "learning_rate": 0.01, "loss": 1.9587, "step": 76461 }, { "epoch": 7.85858170606372, "grad_norm": 0.05008160322904587, "learning_rate": 0.01, "loss": 1.9721, "step": 76464 }, { "epoch": 7.858890030832477, "grad_norm": 0.04874841496348381, "learning_rate": 0.01, "loss": 1.9791, "step": 76467 }, { "epoch": 7.859198355601233, "grad_norm": 0.0698808953166008, "learning_rate": 0.01, "loss": 1.9749, "step": 76470 }, { "epoch": 7.859506680369989, "grad_norm": 0.0393466018140316, "learning_rate": 0.01, "loss": 1.9637, "step": 76473 }, { "epoch": 7.859815005138746, "grad_norm": 0.04882080852985382, "learning_rate": 0.01, "loss": 1.9796, "step": 76476 }, { "epoch": 7.860123329907503, "grad_norm": 0.03266339749097824, "learning_rate": 0.01, "loss": 1.9622, "step": 76479 }, { "epoch": 7.860431654676259, "grad_norm": 0.028406213968992233, "learning_rate": 0.01, "loss": 1.9817, "step": 76482 }, { "epoch": 7.8607399794450155, "grad_norm": 0.10809627175331116, "learning_rate": 0.01, "loss": 1.9215, "step": 76485 }, { "epoch": 7.861048304213772, "grad_norm": 0.045726872980594635, "learning_rate": 0.01, "loss": 1.9763, "step": 76488 }, { "epoch": 7.861356628982528, "grad_norm": 0.06965349614620209, "learning_rate": 0.01, "loss": 1.9696, "step": 76491 }, { "epoch": 7.861664953751284, "grad_norm": 0.04185190051794052, "learning_rate": 0.01, "loss": 1.9601, "step": 76494 }, { "epoch": 7.861973278520042, "grad_norm": 0.09308112412691116, "learning_rate": 0.01, "loss": 1.9664, "step": 76497 }, { "epoch": 7.862281603288798, "grad_norm": 0.07587264478206635, "learning_rate": 0.01, "loss": 1.9481, "step": 76500 }, { "epoch": 7.862589928057554, "grad_norm": 0.05060400441288948, "learning_rate": 0.01, "loss": 1.9689, "step": 76503 }, { "epoch": 7.8628982528263105, "grad_norm": 0.03997211903333664, "learning_rate": 0.01, "loss": 1.9275, "step": 76506 }, { "epoch": 7.863206577595067, "grad_norm": 0.04034551605582237, "learning_rate": 0.01, "loss": 1.9768, "step": 76509 }, { "epoch": 7.863514902363823, "grad_norm": 0.03622760251164436, "learning_rate": 0.01, "loss": 1.955, "step": 76512 }, { "epoch": 7.863823227132579, "grad_norm": 0.04714718833565712, "learning_rate": 0.01, "loss": 1.9897, "step": 76515 }, { "epoch": 7.864131551901336, "grad_norm": 0.14129970967769623, "learning_rate": 0.01, "loss": 1.9594, "step": 76518 }, { "epoch": 7.864439876670092, "grad_norm": 0.04471175745129585, "learning_rate": 0.01, "loss": 1.9752, "step": 76521 }, { "epoch": 7.864748201438849, "grad_norm": 0.11593741178512573, "learning_rate": 0.01, "loss": 1.9648, "step": 76524 }, { "epoch": 7.8650565262076055, "grad_norm": 0.03648509830236435, "learning_rate": 0.01, "loss": 1.9527, "step": 76527 }, { "epoch": 7.865364850976362, "grad_norm": 0.04938739538192749, "learning_rate": 0.01, "loss": 1.9743, "step": 76530 }, { "epoch": 7.865673175745118, "grad_norm": 0.04273061454296112, "learning_rate": 0.01, "loss": 1.9756, "step": 76533 }, { "epoch": 7.8659815005138745, "grad_norm": 0.042451903223991394, "learning_rate": 0.01, "loss": 1.9654, "step": 76536 }, { "epoch": 7.866289825282631, "grad_norm": 0.04022755101323128, "learning_rate": 0.01, "loss": 2.0009, "step": 76539 }, { "epoch": 7.866598150051388, "grad_norm": 0.09318958967924118, "learning_rate": 0.01, "loss": 1.9932, "step": 76542 }, { "epoch": 7.866906474820144, "grad_norm": 0.07233930379152298, "learning_rate": 0.01, "loss": 1.955, "step": 76545 }, { "epoch": 7.867214799588901, "grad_norm": 0.06357026845216751, "learning_rate": 0.01, "loss": 1.9933, "step": 76548 }, { "epoch": 7.867523124357657, "grad_norm": 0.04960564523935318, "learning_rate": 0.01, "loss": 1.9698, "step": 76551 }, { "epoch": 7.867831449126413, "grad_norm": 0.039174117147922516, "learning_rate": 0.01, "loss": 1.9667, "step": 76554 }, { "epoch": 7.8681397738951695, "grad_norm": 0.049112312495708466, "learning_rate": 0.01, "loss": 1.9805, "step": 76557 }, { "epoch": 7.868448098663926, "grad_norm": 0.08262576162815094, "learning_rate": 0.01, "loss": 1.9674, "step": 76560 }, { "epoch": 7.868756423432682, "grad_norm": 0.09073341637849808, "learning_rate": 0.01, "loss": 1.9488, "step": 76563 }, { "epoch": 7.869064748201438, "grad_norm": 0.10602176189422607, "learning_rate": 0.01, "loss": 1.959, "step": 76566 }, { "epoch": 7.869373072970196, "grad_norm": 0.179390549659729, "learning_rate": 0.01, "loss": 1.9704, "step": 76569 }, { "epoch": 7.869681397738952, "grad_norm": 0.10436874628067017, "learning_rate": 0.01, "loss": 1.9821, "step": 76572 }, { "epoch": 7.869989722507708, "grad_norm": 0.06214969605207443, "learning_rate": 0.01, "loss": 1.9924, "step": 76575 }, { "epoch": 7.8702980472764645, "grad_norm": 0.04790744557976723, "learning_rate": 0.01, "loss": 1.978, "step": 76578 }, { "epoch": 7.870606372045221, "grad_norm": 0.127052441239357, "learning_rate": 0.01, "loss": 1.9836, "step": 76581 }, { "epoch": 7.870914696813977, "grad_norm": 0.06101028993725777, "learning_rate": 0.01, "loss": 1.9608, "step": 76584 }, { "epoch": 7.8712230215827335, "grad_norm": 0.04569672420620918, "learning_rate": 0.01, "loss": 1.9819, "step": 76587 }, { "epoch": 7.871531346351491, "grad_norm": 0.04835071787238121, "learning_rate": 0.01, "loss": 1.9682, "step": 76590 }, { "epoch": 7.871839671120247, "grad_norm": 0.048351310193538666, "learning_rate": 0.01, "loss": 1.9733, "step": 76593 }, { "epoch": 7.872147995889003, "grad_norm": 0.035411905497312546, "learning_rate": 0.01, "loss": 1.9733, "step": 76596 }, { "epoch": 7.87245632065776, "grad_norm": 0.09007140249013901, "learning_rate": 0.01, "loss": 1.9871, "step": 76599 }, { "epoch": 7.872764645426516, "grad_norm": 0.04166480526328087, "learning_rate": 0.01, "loss": 1.9626, "step": 76602 }, { "epoch": 7.873072970195272, "grad_norm": 0.11124366521835327, "learning_rate": 0.01, "loss": 1.9784, "step": 76605 }, { "epoch": 7.8733812949640285, "grad_norm": 0.07696343213319778, "learning_rate": 0.01, "loss": 1.9696, "step": 76608 }, { "epoch": 7.873689619732785, "grad_norm": 0.05737284570932388, "learning_rate": 0.01, "loss": 1.9779, "step": 76611 }, { "epoch": 7.873997944501542, "grad_norm": 0.05126175656914711, "learning_rate": 0.01, "loss": 1.9812, "step": 76614 }, { "epoch": 7.874306269270298, "grad_norm": 0.04633242264389992, "learning_rate": 0.01, "loss": 2.0058, "step": 76617 }, { "epoch": 7.874614594039055, "grad_norm": 0.08935544639825821, "learning_rate": 0.01, "loss": 1.957, "step": 76620 }, { "epoch": 7.874922918807811, "grad_norm": 0.06309745460748672, "learning_rate": 0.01, "loss": 1.9697, "step": 76623 }, { "epoch": 7.875231243576567, "grad_norm": 0.10125695914030075, "learning_rate": 0.01, "loss": 1.9697, "step": 76626 }, { "epoch": 7.8755395683453235, "grad_norm": 0.1011071428656578, "learning_rate": 0.01, "loss": 1.9362, "step": 76629 }, { "epoch": 7.87584789311408, "grad_norm": 0.07101625204086304, "learning_rate": 0.01, "loss": 1.9704, "step": 76632 }, { "epoch": 7.876156217882837, "grad_norm": 0.07854854315519333, "learning_rate": 0.01, "loss": 1.9579, "step": 76635 }, { "epoch": 7.876464542651593, "grad_norm": 0.05637693777680397, "learning_rate": 0.01, "loss": 1.9583, "step": 76638 }, { "epoch": 7.87677286742035, "grad_norm": 0.07590261101722717, "learning_rate": 0.01, "loss": 1.9732, "step": 76641 }, { "epoch": 7.877081192189106, "grad_norm": 0.08829659968614578, "learning_rate": 0.01, "loss": 1.9775, "step": 76644 }, { "epoch": 7.877389516957862, "grad_norm": 0.060618992894887924, "learning_rate": 0.01, "loss": 1.9996, "step": 76647 }, { "epoch": 7.877697841726619, "grad_norm": 0.048382773995399475, "learning_rate": 0.01, "loss": 1.9949, "step": 76650 }, { "epoch": 7.878006166495375, "grad_norm": 0.10038956254720688, "learning_rate": 0.01, "loss": 1.959, "step": 76653 }, { "epoch": 7.878314491264131, "grad_norm": 0.10625267773866653, "learning_rate": 0.01, "loss": 1.9595, "step": 76656 }, { "epoch": 7.8786228160328875, "grad_norm": 0.03938886523246765, "learning_rate": 0.01, "loss": 1.9634, "step": 76659 }, { "epoch": 7.878931140801645, "grad_norm": 0.07031627744436264, "learning_rate": 0.01, "loss": 1.9739, "step": 76662 }, { "epoch": 7.879239465570401, "grad_norm": 0.04160325601696968, "learning_rate": 0.01, "loss": 1.9563, "step": 76665 }, { "epoch": 7.879547790339157, "grad_norm": 0.04842138662934303, "learning_rate": 0.01, "loss": 1.9642, "step": 76668 }, { "epoch": 7.879856115107914, "grad_norm": 0.04438458010554314, "learning_rate": 0.01, "loss": 1.9501, "step": 76671 }, { "epoch": 7.88016443987667, "grad_norm": 0.13517126441001892, "learning_rate": 0.01, "loss": 1.9717, "step": 76674 }, { "epoch": 7.880472764645426, "grad_norm": 0.04812932014465332, "learning_rate": 0.01, "loss": 1.9633, "step": 76677 }, { "epoch": 7.880781089414183, "grad_norm": 0.04208145663142204, "learning_rate": 0.01, "loss": 1.9669, "step": 76680 }, { "epoch": 7.88108941418294, "grad_norm": 0.04132638871669769, "learning_rate": 0.01, "loss": 1.9724, "step": 76683 }, { "epoch": 7.881397738951696, "grad_norm": 0.08753607422113419, "learning_rate": 0.01, "loss": 1.9897, "step": 76686 }, { "epoch": 7.881706063720452, "grad_norm": 0.05934629589319229, "learning_rate": 0.01, "loss": 1.9795, "step": 76689 }, { "epoch": 7.882014388489209, "grad_norm": 0.1018429771065712, "learning_rate": 0.01, "loss": 1.9624, "step": 76692 }, { "epoch": 7.882322713257965, "grad_norm": 0.09443504363298416, "learning_rate": 0.01, "loss": 1.9822, "step": 76695 }, { "epoch": 7.882631038026721, "grad_norm": 0.06363807618618011, "learning_rate": 0.01, "loss": 1.9667, "step": 76698 }, { "epoch": 7.882939362795478, "grad_norm": 0.03805544599890709, "learning_rate": 0.01, "loss": 1.961, "step": 76701 }, { "epoch": 7.883247687564234, "grad_norm": 0.08899611979722977, "learning_rate": 0.01, "loss": 1.9915, "step": 76704 }, { "epoch": 7.883556012332991, "grad_norm": 0.03682880103588104, "learning_rate": 0.01, "loss": 1.9601, "step": 76707 }, { "epoch": 7.883864337101747, "grad_norm": 0.10958962887525558, "learning_rate": 0.01, "loss": 1.9827, "step": 76710 }, { "epoch": 7.884172661870504, "grad_norm": 0.10766680538654327, "learning_rate": 0.01, "loss": 1.9778, "step": 76713 }, { "epoch": 7.88448098663926, "grad_norm": 0.0537521131336689, "learning_rate": 0.01, "loss": 1.9544, "step": 76716 }, { "epoch": 7.884789311408016, "grad_norm": 0.05160677060484886, "learning_rate": 0.01, "loss": 1.9648, "step": 76719 }, { "epoch": 7.885097636176773, "grad_norm": 0.05430131033062935, "learning_rate": 0.01, "loss": 1.9656, "step": 76722 }, { "epoch": 7.885405960945529, "grad_norm": 0.04517725110054016, "learning_rate": 0.01, "loss": 1.9845, "step": 76725 }, { "epoch": 7.885714285714286, "grad_norm": 0.03742944449186325, "learning_rate": 0.01, "loss": 1.9494, "step": 76728 }, { "epoch": 7.886022610483042, "grad_norm": 0.03228388726711273, "learning_rate": 0.01, "loss": 1.9467, "step": 76731 }, { "epoch": 7.886330935251799, "grad_norm": 0.1236288994550705, "learning_rate": 0.01, "loss": 1.9433, "step": 76734 }, { "epoch": 7.886639260020555, "grad_norm": 0.06338070333003998, "learning_rate": 0.01, "loss": 1.9846, "step": 76737 }, { "epoch": 7.886947584789311, "grad_norm": 0.08285798132419586, "learning_rate": 0.01, "loss": 1.9858, "step": 76740 }, { "epoch": 7.887255909558068, "grad_norm": 0.07970725744962692, "learning_rate": 0.01, "loss": 1.9723, "step": 76743 }, { "epoch": 7.887564234326824, "grad_norm": 0.12220615893602371, "learning_rate": 0.01, "loss": 1.9637, "step": 76746 }, { "epoch": 7.88787255909558, "grad_norm": 0.04591503366827965, "learning_rate": 0.01, "loss": 1.9973, "step": 76749 }, { "epoch": 7.8881808838643375, "grad_norm": 0.03333088383078575, "learning_rate": 0.01, "loss": 1.9678, "step": 76752 }, { "epoch": 7.888489208633094, "grad_norm": 0.03739148750901222, "learning_rate": 0.01, "loss": 1.9863, "step": 76755 }, { "epoch": 7.88879753340185, "grad_norm": 0.048116832971572876, "learning_rate": 0.01, "loss": 1.9759, "step": 76758 }, { "epoch": 7.889105858170606, "grad_norm": 0.051414813846349716, "learning_rate": 0.01, "loss": 1.9663, "step": 76761 }, { "epoch": 7.889414182939363, "grad_norm": 0.04409851133823395, "learning_rate": 0.01, "loss": 1.9675, "step": 76764 }, { "epoch": 7.889722507708119, "grad_norm": 0.09132188558578491, "learning_rate": 0.01, "loss": 1.9572, "step": 76767 }, { "epoch": 7.890030832476875, "grad_norm": 0.10048239678144455, "learning_rate": 0.01, "loss": 1.9869, "step": 76770 }, { "epoch": 7.8903391572456325, "grad_norm": 0.1443302035331726, "learning_rate": 0.01, "loss": 1.9815, "step": 76773 }, { "epoch": 7.890647482014389, "grad_norm": 0.10798861086368561, "learning_rate": 0.01, "loss": 1.9591, "step": 76776 }, { "epoch": 7.890955806783145, "grad_norm": 0.08225159347057343, "learning_rate": 0.01, "loss": 1.9952, "step": 76779 }, { "epoch": 7.891264131551901, "grad_norm": 0.0331072062253952, "learning_rate": 0.01, "loss": 1.9865, "step": 76782 }, { "epoch": 7.891572456320658, "grad_norm": 0.03549625724554062, "learning_rate": 0.01, "loss": 1.9771, "step": 76785 }, { "epoch": 7.891880781089414, "grad_norm": 0.041428498923778534, "learning_rate": 0.01, "loss": 1.9432, "step": 76788 }, { "epoch": 7.89218910585817, "grad_norm": 0.059432294219732285, "learning_rate": 0.01, "loss": 1.9632, "step": 76791 }, { "epoch": 7.892497430626927, "grad_norm": 0.11326347291469574, "learning_rate": 0.01, "loss": 1.961, "step": 76794 }, { "epoch": 7.892805755395683, "grad_norm": 0.07109850645065308, "learning_rate": 0.01, "loss": 1.9814, "step": 76797 }, { "epoch": 7.89311408016444, "grad_norm": 0.0451069213449955, "learning_rate": 0.01, "loss": 1.9702, "step": 76800 }, { "epoch": 7.8934224049331965, "grad_norm": 0.03823963180184364, "learning_rate": 0.01, "loss": 1.9615, "step": 76803 }, { "epoch": 7.893730729701953, "grad_norm": 0.07655234634876251, "learning_rate": 0.01, "loss": 1.9594, "step": 76806 }, { "epoch": 7.894039054470709, "grad_norm": 0.06917836517095566, "learning_rate": 0.01, "loss": 1.9639, "step": 76809 }, { "epoch": 7.894347379239465, "grad_norm": 0.03960380703210831, "learning_rate": 0.01, "loss": 1.9825, "step": 76812 }, { "epoch": 7.894655704008222, "grad_norm": 0.04303770512342453, "learning_rate": 0.01, "loss": 1.9548, "step": 76815 }, { "epoch": 7.894964028776979, "grad_norm": 0.0763452872633934, "learning_rate": 0.01, "loss": 1.9575, "step": 76818 }, { "epoch": 7.895272353545735, "grad_norm": 0.0780813917517662, "learning_rate": 0.01, "loss": 1.9681, "step": 76821 }, { "epoch": 7.8955806783144915, "grad_norm": 0.14030003547668457, "learning_rate": 0.01, "loss": 1.9878, "step": 76824 }, { "epoch": 7.895889003083248, "grad_norm": 0.06355911493301392, "learning_rate": 0.01, "loss": 1.9348, "step": 76827 }, { "epoch": 7.896197327852004, "grad_norm": 0.06618300080299377, "learning_rate": 0.01, "loss": 1.9644, "step": 76830 }, { "epoch": 7.89650565262076, "grad_norm": 0.07435224950313568, "learning_rate": 0.01, "loss": 1.976, "step": 76833 }, { "epoch": 7.896813977389517, "grad_norm": 0.06644349545240402, "learning_rate": 0.01, "loss": 1.9964, "step": 76836 }, { "epoch": 7.897122302158273, "grad_norm": 0.03589347004890442, "learning_rate": 0.01, "loss": 1.9523, "step": 76839 }, { "epoch": 7.897430626927029, "grad_norm": 0.10725891590118408, "learning_rate": 0.01, "loss": 1.993, "step": 76842 }, { "epoch": 7.8977389516957865, "grad_norm": 0.03418595716357231, "learning_rate": 0.01, "loss": 1.9381, "step": 76845 }, { "epoch": 7.898047276464543, "grad_norm": 0.04659489914774895, "learning_rate": 0.01, "loss": 1.9506, "step": 76848 }, { "epoch": 7.898355601233299, "grad_norm": 0.11705581843852997, "learning_rate": 0.01, "loss": 1.949, "step": 76851 }, { "epoch": 7.8986639260020555, "grad_norm": 0.16697385907173157, "learning_rate": 0.01, "loss": 1.9368, "step": 76854 }, { "epoch": 7.898972250770812, "grad_norm": 0.18981826305389404, "learning_rate": 0.01, "loss": 1.9467, "step": 76857 }, { "epoch": 7.899280575539568, "grad_norm": 0.12204299122095108, "learning_rate": 0.01, "loss": 1.9581, "step": 76860 }, { "epoch": 7.899588900308324, "grad_norm": 0.08213921636343002, "learning_rate": 0.01, "loss": 1.9803, "step": 76863 }, { "epoch": 7.899897225077082, "grad_norm": 0.05336681753396988, "learning_rate": 0.01, "loss": 1.9816, "step": 76866 }, { "epoch": 7.900205549845838, "grad_norm": 0.034992218017578125, "learning_rate": 0.01, "loss": 1.9835, "step": 76869 }, { "epoch": 7.900513874614594, "grad_norm": 0.052847620099782944, "learning_rate": 0.01, "loss": 1.9575, "step": 76872 }, { "epoch": 7.9008221993833505, "grad_norm": 0.050509583204984665, "learning_rate": 0.01, "loss": 1.954, "step": 76875 }, { "epoch": 7.901130524152107, "grad_norm": 0.03660079464316368, "learning_rate": 0.01, "loss": 1.9665, "step": 76878 }, { "epoch": 7.901438848920863, "grad_norm": 0.04175078123807907, "learning_rate": 0.01, "loss": 1.9569, "step": 76881 }, { "epoch": 7.901747173689619, "grad_norm": 0.07008788734674454, "learning_rate": 0.01, "loss": 1.9773, "step": 76884 }, { "epoch": 7.902055498458376, "grad_norm": 0.1135629415512085, "learning_rate": 0.01, "loss": 1.9586, "step": 76887 }, { "epoch": 7.902363823227133, "grad_norm": 0.14856696128845215, "learning_rate": 0.01, "loss": 1.9525, "step": 76890 }, { "epoch": 7.902672147995889, "grad_norm": 0.07151875644922256, "learning_rate": 0.01, "loss": 1.9497, "step": 76893 }, { "epoch": 7.9029804727646455, "grad_norm": 0.05615755915641785, "learning_rate": 0.01, "loss": 1.9451, "step": 76896 }, { "epoch": 7.903288797533402, "grad_norm": 0.045580677688121796, "learning_rate": 0.01, "loss": 1.9855, "step": 76899 }, { "epoch": 7.903597122302158, "grad_norm": 0.09109973162412643, "learning_rate": 0.01, "loss": 1.9578, "step": 76902 }, { "epoch": 7.9039054470709145, "grad_norm": 0.08682844787836075, "learning_rate": 0.01, "loss": 1.9699, "step": 76905 }, { "epoch": 7.904213771839671, "grad_norm": 0.03720599412918091, "learning_rate": 0.01, "loss": 1.9755, "step": 76908 }, { "epoch": 7.904522096608428, "grad_norm": 0.03967565670609474, "learning_rate": 0.01, "loss": 1.9584, "step": 76911 }, { "epoch": 7.904830421377184, "grad_norm": 0.04664463922381401, "learning_rate": 0.01, "loss": 2.0114, "step": 76914 }, { "epoch": 7.905138746145941, "grad_norm": 0.04584002122282982, "learning_rate": 0.01, "loss": 1.9647, "step": 76917 }, { "epoch": 7.905447070914697, "grad_norm": 0.03752794861793518, "learning_rate": 0.01, "loss": 1.95, "step": 76920 }, { "epoch": 7.905755395683453, "grad_norm": 0.04139968752861023, "learning_rate": 0.01, "loss": 1.9445, "step": 76923 }, { "epoch": 7.9060637204522095, "grad_norm": 0.03457412123680115, "learning_rate": 0.01, "loss": 1.9488, "step": 76926 }, { "epoch": 7.906372045220966, "grad_norm": 0.08590247482061386, "learning_rate": 0.01, "loss": 1.9666, "step": 76929 }, { "epoch": 7.906680369989722, "grad_norm": 0.04614677280187607, "learning_rate": 0.01, "loss": 1.9621, "step": 76932 }, { "epoch": 7.906988694758479, "grad_norm": 0.054161593317985535, "learning_rate": 0.01, "loss": 1.9795, "step": 76935 }, { "epoch": 7.907297019527236, "grad_norm": 0.07664169371128082, "learning_rate": 0.01, "loss": 1.9779, "step": 76938 }, { "epoch": 7.907605344295992, "grad_norm": 0.059912554919719696, "learning_rate": 0.01, "loss": 1.9718, "step": 76941 }, { "epoch": 7.907913669064748, "grad_norm": 0.08653954416513443, "learning_rate": 0.01, "loss": 1.9763, "step": 76944 }, { "epoch": 7.9082219938335045, "grad_norm": 0.09183872491121292, "learning_rate": 0.01, "loss": 1.9793, "step": 76947 }, { "epoch": 7.908530318602261, "grad_norm": 0.05259134992957115, "learning_rate": 0.01, "loss": 1.9484, "step": 76950 }, { "epoch": 7.908838643371017, "grad_norm": 0.08326982706785202, "learning_rate": 0.01, "loss": 2.0106, "step": 76953 }, { "epoch": 7.909146968139774, "grad_norm": 0.09940369427204132, "learning_rate": 0.01, "loss": 1.954, "step": 76956 }, { "epoch": 7.909455292908531, "grad_norm": 0.04686363413929939, "learning_rate": 0.01, "loss": 1.9952, "step": 76959 }, { "epoch": 7.909763617677287, "grad_norm": 0.09306104481220245, "learning_rate": 0.01, "loss": 1.9646, "step": 76962 }, { "epoch": 7.910071942446043, "grad_norm": 0.07574539631605148, "learning_rate": 0.01, "loss": 1.9835, "step": 76965 }, { "epoch": 7.9103802672148, "grad_norm": 0.11940892785787582, "learning_rate": 0.01, "loss": 1.9535, "step": 76968 }, { "epoch": 7.910688591983556, "grad_norm": 0.07245193421840668, "learning_rate": 0.01, "loss": 1.9497, "step": 76971 }, { "epoch": 7.910996916752312, "grad_norm": 0.050414931029081345, "learning_rate": 0.01, "loss": 1.9617, "step": 76974 }, { "epoch": 7.9113052415210685, "grad_norm": 0.043788183480501175, "learning_rate": 0.01, "loss": 1.9424, "step": 76977 }, { "epoch": 7.911613566289825, "grad_norm": 0.07120411843061447, "learning_rate": 0.01, "loss": 1.9425, "step": 76980 }, { "epoch": 7.911921891058582, "grad_norm": 0.04601344093680382, "learning_rate": 0.01, "loss": 1.9666, "step": 76983 }, { "epoch": 7.912230215827338, "grad_norm": 0.053321413695812225, "learning_rate": 0.01, "loss": 1.9642, "step": 76986 }, { "epoch": 7.912538540596095, "grad_norm": 0.09367197006940842, "learning_rate": 0.01, "loss": 1.9595, "step": 76989 }, { "epoch": 7.912846865364851, "grad_norm": 0.10960140079259872, "learning_rate": 0.01, "loss": 2.0057, "step": 76992 }, { "epoch": 7.913155190133607, "grad_norm": 0.08764875680208206, "learning_rate": 0.01, "loss": 1.9747, "step": 76995 }, { "epoch": 7.9134635149023635, "grad_norm": 0.07808122038841248, "learning_rate": 0.01, "loss": 1.9516, "step": 76998 }, { "epoch": 7.913771839671121, "grad_norm": 0.059402767568826675, "learning_rate": 0.01, "loss": 1.981, "step": 77001 }, { "epoch": 7.914080164439877, "grad_norm": 0.09941643476486206, "learning_rate": 0.01, "loss": 1.9614, "step": 77004 }, { "epoch": 7.914388489208633, "grad_norm": 0.142394557595253, "learning_rate": 0.01, "loss": 1.9578, "step": 77007 }, { "epoch": 7.91469681397739, "grad_norm": 0.05479384586215019, "learning_rate": 0.01, "loss": 1.9717, "step": 77010 }, { "epoch": 7.915005138746146, "grad_norm": 0.04836162552237511, "learning_rate": 0.01, "loss": 1.9492, "step": 77013 }, { "epoch": 7.915313463514902, "grad_norm": 0.07146017253398895, "learning_rate": 0.01, "loss": 1.991, "step": 77016 }, { "epoch": 7.915621788283659, "grad_norm": 0.06672468036413193, "learning_rate": 0.01, "loss": 1.9627, "step": 77019 }, { "epoch": 7.915930113052415, "grad_norm": 0.050683509558439255, "learning_rate": 0.01, "loss": 1.9801, "step": 77022 }, { "epoch": 7.916238437821171, "grad_norm": 0.04118918254971504, "learning_rate": 0.01, "loss": 1.9834, "step": 77025 }, { "epoch": 7.916546762589928, "grad_norm": 0.11386791616678238, "learning_rate": 0.01, "loss": 1.9688, "step": 77028 }, { "epoch": 7.916855087358685, "grad_norm": 0.04421692341566086, "learning_rate": 0.01, "loss": 1.9569, "step": 77031 }, { "epoch": 7.917163412127441, "grad_norm": 0.03897789120674133, "learning_rate": 0.01, "loss": 1.9576, "step": 77034 }, { "epoch": 7.917471736896197, "grad_norm": 0.07048334926366806, "learning_rate": 0.01, "loss": 1.9647, "step": 77037 }, { "epoch": 7.917780061664954, "grad_norm": 0.07954574376344681, "learning_rate": 0.01, "loss": 1.9648, "step": 77040 }, { "epoch": 7.91808838643371, "grad_norm": 0.07385550439357758, "learning_rate": 0.01, "loss": 1.9693, "step": 77043 }, { "epoch": 7.918396711202466, "grad_norm": 0.099297434091568, "learning_rate": 0.01, "loss": 1.9752, "step": 77046 }, { "epoch": 7.918705035971223, "grad_norm": 0.10075324028730392, "learning_rate": 0.01, "loss": 1.9802, "step": 77049 }, { "epoch": 7.91901336073998, "grad_norm": 0.04904831945896149, "learning_rate": 0.01, "loss": 1.9702, "step": 77052 }, { "epoch": 7.919321685508736, "grad_norm": 0.042833998799324036, "learning_rate": 0.01, "loss": 1.9832, "step": 77055 }, { "epoch": 7.919630010277492, "grad_norm": 0.03839438781142235, "learning_rate": 0.01, "loss": 1.9994, "step": 77058 }, { "epoch": 7.919938335046249, "grad_norm": 0.05418895557522774, "learning_rate": 0.01, "loss": 1.9569, "step": 77061 }, { "epoch": 7.920246659815005, "grad_norm": 0.03642089292407036, "learning_rate": 0.01, "loss": 1.9525, "step": 77064 }, { "epoch": 7.920554984583761, "grad_norm": 0.10740983486175537, "learning_rate": 0.01, "loss": 1.9548, "step": 77067 }, { "epoch": 7.920863309352518, "grad_norm": 0.0470796562731266, "learning_rate": 0.01, "loss": 1.9453, "step": 77070 }, { "epoch": 7.921171634121275, "grad_norm": 0.0717134177684784, "learning_rate": 0.01, "loss": 1.9734, "step": 77073 }, { "epoch": 7.921479958890031, "grad_norm": 0.09272985905408859, "learning_rate": 0.01, "loss": 1.9614, "step": 77076 }, { "epoch": 7.921788283658787, "grad_norm": 0.06581149250268936, "learning_rate": 0.01, "loss": 2.005, "step": 77079 }, { "epoch": 7.922096608427544, "grad_norm": 0.10736197978258133, "learning_rate": 0.01, "loss": 1.9633, "step": 77082 }, { "epoch": 7.9224049331963, "grad_norm": 0.0577731616795063, "learning_rate": 0.01, "loss": 1.9718, "step": 77085 }, { "epoch": 7.922713257965056, "grad_norm": 0.05568409711122513, "learning_rate": 0.01, "loss": 1.9753, "step": 77088 }, { "epoch": 7.923021582733813, "grad_norm": 0.047936148941516876, "learning_rate": 0.01, "loss": 1.9844, "step": 77091 }, { "epoch": 7.92332990750257, "grad_norm": 0.04763433337211609, "learning_rate": 0.01, "loss": 1.9556, "step": 77094 }, { "epoch": 7.923638232271326, "grad_norm": 0.08284518867731094, "learning_rate": 0.01, "loss": 1.9588, "step": 77097 }, { "epoch": 7.923946557040082, "grad_norm": 0.06552954018115997, "learning_rate": 0.01, "loss": 1.972, "step": 77100 }, { "epoch": 7.924254881808839, "grad_norm": 0.07731842994689941, "learning_rate": 0.01, "loss": 1.9409, "step": 77103 }, { "epoch": 7.924563206577595, "grad_norm": 0.07882971316576004, "learning_rate": 0.01, "loss": 1.9623, "step": 77106 }, { "epoch": 7.924871531346351, "grad_norm": 0.06781595945358276, "learning_rate": 0.01, "loss": 1.9682, "step": 77109 }, { "epoch": 7.925179856115108, "grad_norm": 0.07871492207050323, "learning_rate": 0.01, "loss": 1.9679, "step": 77112 }, { "epoch": 7.925488180883864, "grad_norm": 0.09492309391498566, "learning_rate": 0.01, "loss": 1.9553, "step": 77115 }, { "epoch": 7.92579650565262, "grad_norm": 0.07608136534690857, "learning_rate": 0.01, "loss": 1.9683, "step": 77118 }, { "epoch": 7.9261048304213775, "grad_norm": 0.14081300795078278, "learning_rate": 0.01, "loss": 1.9834, "step": 77121 }, { "epoch": 7.926413155190134, "grad_norm": 0.10385705530643463, "learning_rate": 0.01, "loss": 1.9738, "step": 77124 }, { "epoch": 7.92672147995889, "grad_norm": 0.059850629419088364, "learning_rate": 0.01, "loss": 1.9671, "step": 77127 }, { "epoch": 7.927029804727646, "grad_norm": 0.03840718790888786, "learning_rate": 0.01, "loss": 1.9974, "step": 77130 }, { "epoch": 7.927338129496403, "grad_norm": 0.05099863559007645, "learning_rate": 0.01, "loss": 1.9799, "step": 77133 }, { "epoch": 7.927646454265159, "grad_norm": 0.10900937765836716, "learning_rate": 0.01, "loss": 1.9561, "step": 77136 }, { "epoch": 7.927954779033916, "grad_norm": 0.05620981380343437, "learning_rate": 0.01, "loss": 1.9736, "step": 77139 }, { "epoch": 7.9282631038026725, "grad_norm": 0.07825091481208801, "learning_rate": 0.01, "loss": 1.9806, "step": 77142 }, { "epoch": 7.928571428571429, "grad_norm": 0.047291044145822525, "learning_rate": 0.01, "loss": 1.9573, "step": 77145 }, { "epoch": 7.928879753340185, "grad_norm": 0.08942444622516632, "learning_rate": 0.01, "loss": 1.9908, "step": 77148 }, { "epoch": 7.929188078108941, "grad_norm": 0.0645388588309288, "learning_rate": 0.01, "loss": 1.9847, "step": 77151 }, { "epoch": 7.929496402877698, "grad_norm": 0.04266830161213875, "learning_rate": 0.01, "loss": 1.9626, "step": 77154 }, { "epoch": 7.929804727646454, "grad_norm": 0.04416031017899513, "learning_rate": 0.01, "loss": 1.9584, "step": 77157 }, { "epoch": 7.93011305241521, "grad_norm": 0.05279887467622757, "learning_rate": 0.01, "loss": 1.9831, "step": 77160 }, { "epoch": 7.930421377183967, "grad_norm": 0.0377926304936409, "learning_rate": 0.01, "loss": 1.9875, "step": 77163 }, { "epoch": 7.930729701952724, "grad_norm": 0.03909745067358017, "learning_rate": 0.01, "loss": 1.9685, "step": 77166 }, { "epoch": 7.93103802672148, "grad_norm": 0.05067327246069908, "learning_rate": 0.01, "loss": 1.9548, "step": 77169 }, { "epoch": 7.9313463514902365, "grad_norm": 0.11055053770542145, "learning_rate": 0.01, "loss": 1.9744, "step": 77172 }, { "epoch": 7.931654676258993, "grad_norm": 0.13083787262439728, "learning_rate": 0.01, "loss": 1.978, "step": 77175 }, { "epoch": 7.931963001027749, "grad_norm": 0.05712626501917839, "learning_rate": 0.01, "loss": 1.9797, "step": 77178 }, { "epoch": 7.932271325796505, "grad_norm": 0.034157317131757736, "learning_rate": 0.01, "loss": 1.9657, "step": 77181 }, { "epoch": 7.932579650565262, "grad_norm": 0.040653664618730545, "learning_rate": 0.01, "loss": 1.9721, "step": 77184 }, { "epoch": 7.932887975334019, "grad_norm": 0.05120181292295456, "learning_rate": 0.01, "loss": 1.9527, "step": 77187 }, { "epoch": 7.933196300102775, "grad_norm": 0.04877161234617233, "learning_rate": 0.01, "loss": 1.9448, "step": 77190 }, { "epoch": 7.9335046248715315, "grad_norm": 0.038983944803476334, "learning_rate": 0.01, "loss": 1.9476, "step": 77193 }, { "epoch": 7.933812949640288, "grad_norm": 0.10702808201313019, "learning_rate": 0.01, "loss": 1.9442, "step": 77196 }, { "epoch": 7.934121274409044, "grad_norm": 0.13693293929100037, "learning_rate": 0.01, "loss": 1.9654, "step": 77199 }, { "epoch": 7.9344295991778, "grad_norm": 0.06795570999383926, "learning_rate": 0.01, "loss": 1.9799, "step": 77202 }, { "epoch": 7.934737923946557, "grad_norm": 0.05547187477350235, "learning_rate": 0.01, "loss": 1.9289, "step": 77205 }, { "epoch": 7.935046248715313, "grad_norm": 0.042718227952718735, "learning_rate": 0.01, "loss": 1.9605, "step": 77208 }, { "epoch": 7.93535457348407, "grad_norm": 0.03317321464419365, "learning_rate": 0.01, "loss": 1.9811, "step": 77211 }, { "epoch": 7.935662898252827, "grad_norm": 0.03335217386484146, "learning_rate": 0.01, "loss": 1.9577, "step": 77214 }, { "epoch": 7.935971223021583, "grad_norm": 0.04254760593175888, "learning_rate": 0.01, "loss": 1.9751, "step": 77217 }, { "epoch": 7.936279547790339, "grad_norm": 0.0777120515704155, "learning_rate": 0.01, "loss": 1.9665, "step": 77220 }, { "epoch": 7.9365878725590955, "grad_norm": 0.10709066689014435, "learning_rate": 0.01, "loss": 1.966, "step": 77223 }, { "epoch": 7.936896197327852, "grad_norm": 0.07401084899902344, "learning_rate": 0.01, "loss": 1.9964, "step": 77226 }, { "epoch": 7.937204522096608, "grad_norm": 0.03645917400717735, "learning_rate": 0.01, "loss": 1.9629, "step": 77229 }, { "epoch": 7.937512846865365, "grad_norm": 0.09955336153507233, "learning_rate": 0.01, "loss": 1.9639, "step": 77232 }, { "epoch": 7.937821171634122, "grad_norm": 0.0894830971956253, "learning_rate": 0.01, "loss": 1.95, "step": 77235 }, { "epoch": 7.938129496402878, "grad_norm": 0.06080229952931404, "learning_rate": 0.01, "loss": 1.969, "step": 77238 }, { "epoch": 7.938437821171634, "grad_norm": 0.04642382264137268, "learning_rate": 0.01, "loss": 1.9522, "step": 77241 }, { "epoch": 7.9387461459403905, "grad_norm": 0.036709267646074295, "learning_rate": 0.01, "loss": 1.9637, "step": 77244 }, { "epoch": 7.939054470709147, "grad_norm": 0.041632648557424545, "learning_rate": 0.01, "loss": 1.9942, "step": 77247 }, { "epoch": 7.939362795477903, "grad_norm": 0.03514658287167549, "learning_rate": 0.01, "loss": 1.9664, "step": 77250 }, { "epoch": 7.939671120246659, "grad_norm": 0.028975483030080795, "learning_rate": 0.01, "loss": 1.987, "step": 77253 }, { "epoch": 7.939979445015416, "grad_norm": 0.04937351867556572, "learning_rate": 0.01, "loss": 1.9508, "step": 77256 }, { "epoch": 7.940287769784173, "grad_norm": 0.07647955417633057, "learning_rate": 0.01, "loss": 1.9543, "step": 77259 }, { "epoch": 7.940596094552929, "grad_norm": 0.054201941937208176, "learning_rate": 0.01, "loss": 1.9985, "step": 77262 }, { "epoch": 7.940904419321686, "grad_norm": 0.07279737293720245, "learning_rate": 0.01, "loss": 1.9712, "step": 77265 }, { "epoch": 7.941212744090442, "grad_norm": 0.08488020300865173, "learning_rate": 0.01, "loss": 1.9636, "step": 77268 }, { "epoch": 7.941521068859198, "grad_norm": 0.11421249061822891, "learning_rate": 0.01, "loss": 1.9767, "step": 77271 }, { "epoch": 7.9418293936279545, "grad_norm": 0.042113203555345535, "learning_rate": 0.01, "loss": 1.9557, "step": 77274 }, { "epoch": 7.942137718396712, "grad_norm": 0.06829381734132767, "learning_rate": 0.01, "loss": 1.9419, "step": 77277 }, { "epoch": 7.942446043165468, "grad_norm": 0.09846700727939606, "learning_rate": 0.01, "loss": 1.9829, "step": 77280 }, { "epoch": 7.942754367934224, "grad_norm": 0.03628145530819893, "learning_rate": 0.01, "loss": 1.9766, "step": 77283 }, { "epoch": 7.943062692702981, "grad_norm": 0.15278568863868713, "learning_rate": 0.01, "loss": 1.9733, "step": 77286 }, { "epoch": 7.943371017471737, "grad_norm": 0.0523282065987587, "learning_rate": 0.01, "loss": 1.9778, "step": 77289 }, { "epoch": 7.943679342240493, "grad_norm": 0.033178601413965225, "learning_rate": 0.01, "loss": 1.9388, "step": 77292 }, { "epoch": 7.9439876670092495, "grad_norm": 0.045855265110731125, "learning_rate": 0.01, "loss": 1.9637, "step": 77295 }, { "epoch": 7.944295991778006, "grad_norm": 0.043520331382751465, "learning_rate": 0.01, "loss": 1.9826, "step": 77298 }, { "epoch": 7.944604316546762, "grad_norm": 0.04189394786953926, "learning_rate": 0.01, "loss": 1.9565, "step": 77301 }, { "epoch": 7.944912641315519, "grad_norm": 0.04767363518476486, "learning_rate": 0.01, "loss": 1.9739, "step": 77304 }, { "epoch": 7.945220966084276, "grad_norm": 0.08458670228719711, "learning_rate": 0.01, "loss": 1.9549, "step": 77307 }, { "epoch": 7.945529290853032, "grad_norm": 0.0853220671415329, "learning_rate": 0.01, "loss": 1.9693, "step": 77310 }, { "epoch": 7.945837615621788, "grad_norm": 0.06229695677757263, "learning_rate": 0.01, "loss": 1.9614, "step": 77313 }, { "epoch": 7.946145940390545, "grad_norm": 0.09782623499631882, "learning_rate": 0.01, "loss": 1.9765, "step": 77316 }, { "epoch": 7.946454265159301, "grad_norm": 0.09582273662090302, "learning_rate": 0.01, "loss": 1.9627, "step": 77319 }, { "epoch": 7.946762589928057, "grad_norm": 0.057305045425891876, "learning_rate": 0.01, "loss": 1.9465, "step": 77322 }, { "epoch": 7.947070914696814, "grad_norm": 0.12745541334152222, "learning_rate": 0.01, "loss": 1.9639, "step": 77325 }, { "epoch": 7.947379239465571, "grad_norm": 0.044055260717868805, "learning_rate": 0.01, "loss": 1.9514, "step": 77328 }, { "epoch": 7.947687564234327, "grad_norm": 0.09511730819940567, "learning_rate": 0.01, "loss": 1.9806, "step": 77331 }, { "epoch": 7.947995889003083, "grad_norm": 0.05790237337350845, "learning_rate": 0.01, "loss": 1.9807, "step": 77334 }, { "epoch": 7.94830421377184, "grad_norm": 0.11035138368606567, "learning_rate": 0.01, "loss": 1.9481, "step": 77337 }, { "epoch": 7.948612538540596, "grad_norm": 0.04935318976640701, "learning_rate": 0.01, "loss": 1.9674, "step": 77340 }, { "epoch": 7.948920863309352, "grad_norm": 0.10183247923851013, "learning_rate": 0.01, "loss": 1.9776, "step": 77343 }, { "epoch": 7.9492291880781085, "grad_norm": 0.06453783065080643, "learning_rate": 0.01, "loss": 1.9525, "step": 77346 }, { "epoch": 7.949537512846866, "grad_norm": 0.05836249515414238, "learning_rate": 0.01, "loss": 1.9569, "step": 77349 }, { "epoch": 7.949845837615622, "grad_norm": 0.05511557310819626, "learning_rate": 0.01, "loss": 1.9588, "step": 77352 }, { "epoch": 7.950154162384378, "grad_norm": 0.09671493619680405, "learning_rate": 0.01, "loss": 1.9806, "step": 77355 }, { "epoch": 7.950462487153135, "grad_norm": 0.05204296484589577, "learning_rate": 0.01, "loss": 1.964, "step": 77358 }, { "epoch": 7.950770811921891, "grad_norm": 0.09407757967710495, "learning_rate": 0.01, "loss": 1.9587, "step": 77361 }, { "epoch": 7.951079136690647, "grad_norm": 0.08344008028507233, "learning_rate": 0.01, "loss": 1.9866, "step": 77364 }, { "epoch": 7.951387461459404, "grad_norm": 0.059847306460142136, "learning_rate": 0.01, "loss": 1.9527, "step": 77367 }, { "epoch": 7.951695786228161, "grad_norm": 0.05866989120841026, "learning_rate": 0.01, "loss": 1.9939, "step": 77370 }, { "epoch": 7.952004110996917, "grad_norm": 0.053589146584272385, "learning_rate": 0.01, "loss": 1.979, "step": 77373 }, { "epoch": 7.952312435765673, "grad_norm": 0.10758215188980103, "learning_rate": 0.01, "loss": 1.969, "step": 77376 }, { "epoch": 7.95262076053443, "grad_norm": 0.033645015209913254, "learning_rate": 0.01, "loss": 1.9891, "step": 77379 }, { "epoch": 7.952929085303186, "grad_norm": 0.050443585962057114, "learning_rate": 0.01, "loss": 1.9488, "step": 77382 }, { "epoch": 7.953237410071942, "grad_norm": 0.07028111815452576, "learning_rate": 0.01, "loss": 1.9608, "step": 77385 }, { "epoch": 7.953545734840699, "grad_norm": 0.05370425805449486, "learning_rate": 0.01, "loss": 1.9575, "step": 77388 }, { "epoch": 7.953854059609455, "grad_norm": 0.08151637017726898, "learning_rate": 0.01, "loss": 1.9592, "step": 77391 }, { "epoch": 7.954162384378212, "grad_norm": 0.046112194657325745, "learning_rate": 0.01, "loss": 1.953, "step": 77394 }, { "epoch": 7.954470709146968, "grad_norm": 0.09836447238922119, "learning_rate": 0.01, "loss": 1.9789, "step": 77397 }, { "epoch": 7.954779033915725, "grad_norm": 0.056767839938402176, "learning_rate": 0.01, "loss": 1.944, "step": 77400 }, { "epoch": 7.955087358684481, "grad_norm": 0.08596283942461014, "learning_rate": 0.01, "loss": 1.9714, "step": 77403 }, { "epoch": 7.955395683453237, "grad_norm": 0.03643207252025604, "learning_rate": 0.01, "loss": 1.9438, "step": 77406 }, { "epoch": 7.955704008221994, "grad_norm": 0.13480544090270996, "learning_rate": 0.01, "loss": 1.9758, "step": 77409 }, { "epoch": 7.95601233299075, "grad_norm": 0.058044008910655975, "learning_rate": 0.01, "loss": 1.9799, "step": 77412 }, { "epoch": 7.956320657759507, "grad_norm": 0.06414663046598434, "learning_rate": 0.01, "loss": 1.9665, "step": 77415 }, { "epoch": 7.9566289825282634, "grad_norm": 0.09021684527397156, "learning_rate": 0.01, "loss": 1.9759, "step": 77418 }, { "epoch": 7.95693730729702, "grad_norm": 0.08076009154319763, "learning_rate": 0.01, "loss": 1.9596, "step": 77421 }, { "epoch": 7.957245632065776, "grad_norm": 0.051470015197992325, "learning_rate": 0.01, "loss": 1.963, "step": 77424 }, { "epoch": 7.957553956834532, "grad_norm": 0.10929223895072937, "learning_rate": 0.01, "loss": 1.9771, "step": 77427 }, { "epoch": 7.957862281603289, "grad_norm": 0.08006475865840912, "learning_rate": 0.01, "loss": 1.9751, "step": 77430 }, { "epoch": 7.958170606372045, "grad_norm": 0.07928687334060669, "learning_rate": 0.01, "loss": 1.9581, "step": 77433 }, { "epoch": 7.958478931140801, "grad_norm": 0.07244311273097992, "learning_rate": 0.01, "loss": 1.9584, "step": 77436 }, { "epoch": 7.958787255909558, "grad_norm": 0.0400693379342556, "learning_rate": 0.01, "loss": 1.9686, "step": 77439 }, { "epoch": 7.959095580678315, "grad_norm": 0.03099592588841915, "learning_rate": 0.01, "loss": 1.944, "step": 77442 }, { "epoch": 7.959403905447071, "grad_norm": 0.05279327183961868, "learning_rate": 0.01, "loss": 1.9327, "step": 77445 }, { "epoch": 7.959712230215827, "grad_norm": 0.11631178855895996, "learning_rate": 0.01, "loss": 1.9912, "step": 77448 }, { "epoch": 7.960020554984584, "grad_norm": 0.06946063786745071, "learning_rate": 0.01, "loss": 1.9858, "step": 77451 }, { "epoch": 7.96032887975334, "grad_norm": 0.0573054701089859, "learning_rate": 0.01, "loss": 1.9344, "step": 77454 }, { "epoch": 7.960637204522096, "grad_norm": 0.08694712072610855, "learning_rate": 0.01, "loss": 1.9589, "step": 77457 }, { "epoch": 7.9609455292908535, "grad_norm": 0.046338554471731186, "learning_rate": 0.01, "loss": 1.9881, "step": 77460 }, { "epoch": 7.96125385405961, "grad_norm": 0.04105210676789284, "learning_rate": 0.01, "loss": 1.9881, "step": 77463 }, { "epoch": 7.961562178828366, "grad_norm": 0.03583085164427757, "learning_rate": 0.01, "loss": 1.9741, "step": 77466 }, { "epoch": 7.9618705035971225, "grad_norm": 0.08635620027780533, "learning_rate": 0.01, "loss": 1.9275, "step": 77469 }, { "epoch": 7.962178828365879, "grad_norm": 0.0696592703461647, "learning_rate": 0.01, "loss": 1.9783, "step": 77472 }, { "epoch": 7.962487153134635, "grad_norm": 0.0656665712594986, "learning_rate": 0.01, "loss": 1.9844, "step": 77475 }, { "epoch": 7.962795477903391, "grad_norm": 0.05063663423061371, "learning_rate": 0.01, "loss": 1.9681, "step": 77478 }, { "epoch": 7.963103802672148, "grad_norm": 0.08557248115539551, "learning_rate": 0.01, "loss": 1.9875, "step": 77481 }, { "epoch": 7.963412127440904, "grad_norm": 0.047050029039382935, "learning_rate": 0.01, "loss": 1.9591, "step": 77484 }, { "epoch": 7.963720452209661, "grad_norm": 0.03459413722157478, "learning_rate": 0.01, "loss": 1.977, "step": 77487 }, { "epoch": 7.9640287769784175, "grad_norm": 0.08979766815900803, "learning_rate": 0.01, "loss": 1.9759, "step": 77490 }, { "epoch": 7.964337101747174, "grad_norm": 0.05879922956228256, "learning_rate": 0.01, "loss": 1.9472, "step": 77493 }, { "epoch": 7.96464542651593, "grad_norm": 0.04225922003388405, "learning_rate": 0.01, "loss": 1.9598, "step": 77496 }, { "epoch": 7.964953751284686, "grad_norm": 0.043802402913570404, "learning_rate": 0.01, "loss": 1.9571, "step": 77499 }, { "epoch": 7.965262076053443, "grad_norm": 0.05574416741728783, "learning_rate": 0.01, "loss": 1.96, "step": 77502 }, { "epoch": 7.965570400822199, "grad_norm": 0.22006060183048248, "learning_rate": 0.01, "loss": 1.954, "step": 77505 }, { "epoch": 7.965878725590956, "grad_norm": 0.18854233622550964, "learning_rate": 0.01, "loss": 1.9455, "step": 77508 }, { "epoch": 7.9661870503597125, "grad_norm": 0.11298780888319016, "learning_rate": 0.01, "loss": 1.9902, "step": 77511 }, { "epoch": 7.966495375128469, "grad_norm": 0.0345136821269989, "learning_rate": 0.01, "loss": 1.9382, "step": 77514 }, { "epoch": 7.966803699897225, "grad_norm": 0.06418821960687637, "learning_rate": 0.01, "loss": 1.9818, "step": 77517 }, { "epoch": 7.9671120246659815, "grad_norm": 0.04282977804541588, "learning_rate": 0.01, "loss": 1.9502, "step": 77520 }, { "epoch": 7.967420349434738, "grad_norm": 0.07152153551578522, "learning_rate": 0.01, "loss": 1.9839, "step": 77523 }, { "epoch": 7.967728674203494, "grad_norm": 0.07487302273511887, "learning_rate": 0.01, "loss": 1.9558, "step": 77526 }, { "epoch": 7.96803699897225, "grad_norm": 0.038246192038059235, "learning_rate": 0.01, "loss": 1.9902, "step": 77529 }, { "epoch": 7.968345323741008, "grad_norm": 0.10243113338947296, "learning_rate": 0.01, "loss": 1.9795, "step": 77532 }, { "epoch": 7.968653648509764, "grad_norm": 0.05641162022948265, "learning_rate": 0.01, "loss": 1.9626, "step": 77535 }, { "epoch": 7.96896197327852, "grad_norm": 0.060813263058662415, "learning_rate": 0.01, "loss": 1.9727, "step": 77538 }, { "epoch": 7.9692702980472765, "grad_norm": 0.07178650796413422, "learning_rate": 0.01, "loss": 1.9738, "step": 77541 }, { "epoch": 7.969578622816033, "grad_norm": 0.11889466643333435, "learning_rate": 0.01, "loss": 1.9638, "step": 77544 }, { "epoch": 7.969886947584789, "grad_norm": 0.048543453216552734, "learning_rate": 0.01, "loss": 1.9805, "step": 77547 }, { "epoch": 7.970195272353545, "grad_norm": 0.09376556426286697, "learning_rate": 0.01, "loss": 1.985, "step": 77550 }, { "epoch": 7.970503597122303, "grad_norm": 0.12909746170043945, "learning_rate": 0.01, "loss": 1.9778, "step": 77553 }, { "epoch": 7.970811921891059, "grad_norm": 0.1283407062292099, "learning_rate": 0.01, "loss": 1.9653, "step": 77556 }, { "epoch": 7.971120246659815, "grad_norm": 0.05502600222826004, "learning_rate": 0.01, "loss": 1.9651, "step": 77559 }, { "epoch": 7.9714285714285715, "grad_norm": 0.036075152456760406, "learning_rate": 0.01, "loss": 1.9823, "step": 77562 }, { "epoch": 7.971736896197328, "grad_norm": 0.03715295344591141, "learning_rate": 0.01, "loss": 1.9529, "step": 77565 }, { "epoch": 7.972045220966084, "grad_norm": 0.10578956454992294, "learning_rate": 0.01, "loss": 1.9518, "step": 77568 }, { "epoch": 7.9723535457348405, "grad_norm": 0.07572545111179352, "learning_rate": 0.01, "loss": 1.97, "step": 77571 }, { "epoch": 7.972661870503597, "grad_norm": 0.04459410533308983, "learning_rate": 0.01, "loss": 1.98, "step": 77574 }, { "epoch": 7.972970195272353, "grad_norm": 0.047775302082300186, "learning_rate": 0.01, "loss": 1.9751, "step": 77577 }, { "epoch": 7.97327852004111, "grad_norm": 0.03661422058939934, "learning_rate": 0.01, "loss": 1.9483, "step": 77580 }, { "epoch": 7.973586844809867, "grad_norm": 0.06153013929724693, "learning_rate": 0.01, "loss": 1.9789, "step": 77583 }, { "epoch": 7.973895169578623, "grad_norm": 0.06503260880708694, "learning_rate": 0.01, "loss": 1.9583, "step": 77586 }, { "epoch": 7.974203494347379, "grad_norm": 0.07872997224330902, "learning_rate": 0.01, "loss": 1.9603, "step": 77589 }, { "epoch": 7.9745118191161355, "grad_norm": 0.09111315757036209, "learning_rate": 0.01, "loss": 1.9643, "step": 77592 }, { "epoch": 7.974820143884892, "grad_norm": 0.04510453715920448, "learning_rate": 0.01, "loss": 1.9711, "step": 77595 }, { "epoch": 7.975128468653649, "grad_norm": 0.1190980076789856, "learning_rate": 0.01, "loss": 1.9476, "step": 77598 }, { "epoch": 7.975436793422405, "grad_norm": 0.08375338464975357, "learning_rate": 0.01, "loss": 1.9445, "step": 77601 }, { "epoch": 7.975745118191162, "grad_norm": 0.041891466826200485, "learning_rate": 0.01, "loss": 1.9755, "step": 77604 }, { "epoch": 7.976053442959918, "grad_norm": 0.04583370313048363, "learning_rate": 0.01, "loss": 1.9828, "step": 77607 }, { "epoch": 7.976361767728674, "grad_norm": 0.09042174369096756, "learning_rate": 0.01, "loss": 1.9891, "step": 77610 }, { "epoch": 7.9766700924974305, "grad_norm": 0.05806156247854233, "learning_rate": 0.01, "loss": 1.9648, "step": 77613 }, { "epoch": 7.976978417266187, "grad_norm": 0.04089076817035675, "learning_rate": 0.01, "loss": 1.9687, "step": 77616 }, { "epoch": 7.977286742034943, "grad_norm": 0.049693990498781204, "learning_rate": 0.01, "loss": 1.9708, "step": 77619 }, { "epoch": 7.9775950668036995, "grad_norm": 0.07871042937040329, "learning_rate": 0.01, "loss": 1.9349, "step": 77622 }, { "epoch": 7.977903391572457, "grad_norm": 0.11597729474306107, "learning_rate": 0.01, "loss": 1.9744, "step": 77625 }, { "epoch": 7.978211716341213, "grad_norm": 0.040133338421583176, "learning_rate": 0.01, "loss": 1.9603, "step": 77628 }, { "epoch": 7.978520041109969, "grad_norm": 0.08938458561897278, "learning_rate": 0.01, "loss": 1.9645, "step": 77631 }, { "epoch": 7.978828365878726, "grad_norm": 0.0434061661362648, "learning_rate": 0.01, "loss": 1.9925, "step": 77634 }, { "epoch": 7.979136690647482, "grad_norm": 0.137980654835701, "learning_rate": 0.01, "loss": 1.9599, "step": 77637 }, { "epoch": 7.979445015416238, "grad_norm": 0.07370985299348831, "learning_rate": 0.01, "loss": 1.9542, "step": 77640 }, { "epoch": 7.9797533401849945, "grad_norm": 0.11431551724672318, "learning_rate": 0.01, "loss": 1.9951, "step": 77643 }, { "epoch": 7.980061664953752, "grad_norm": 0.1513805091381073, "learning_rate": 0.01, "loss": 1.9715, "step": 77646 }, { "epoch": 7.980369989722508, "grad_norm": 0.1414353847503662, "learning_rate": 0.01, "loss": 1.9556, "step": 77649 }, { "epoch": 7.980678314491264, "grad_norm": 0.0823885053396225, "learning_rate": 0.01, "loss": 1.9749, "step": 77652 }, { "epoch": 7.980986639260021, "grad_norm": 0.058041948825120926, "learning_rate": 0.01, "loss": 1.948, "step": 77655 }, { "epoch": 7.981294964028777, "grad_norm": 0.04310723394155502, "learning_rate": 0.01, "loss": 1.9773, "step": 77658 }, { "epoch": 7.981603288797533, "grad_norm": 0.041882872581481934, "learning_rate": 0.01, "loss": 1.9996, "step": 77661 }, { "epoch": 7.9819116135662895, "grad_norm": 0.05595613643527031, "learning_rate": 0.01, "loss": 1.9563, "step": 77664 }, { "epoch": 7.982219938335046, "grad_norm": 0.0313473679125309, "learning_rate": 0.01, "loss": 1.9559, "step": 77667 }, { "epoch": 7.982528263103803, "grad_norm": 0.059281859546899796, "learning_rate": 0.01, "loss": 1.9843, "step": 77670 }, { "epoch": 7.982836587872559, "grad_norm": 0.03586166724562645, "learning_rate": 0.01, "loss": 1.974, "step": 77673 }, { "epoch": 7.983144912641316, "grad_norm": 0.04249844327569008, "learning_rate": 0.01, "loss": 1.9678, "step": 77676 }, { "epoch": 7.983453237410072, "grad_norm": 0.05303768441081047, "learning_rate": 0.01, "loss": 1.9667, "step": 77679 }, { "epoch": 7.983761562178828, "grad_norm": 0.05120926350355148, "learning_rate": 0.01, "loss": 1.9575, "step": 77682 }, { "epoch": 7.984069886947585, "grad_norm": 0.11158134043216705, "learning_rate": 0.01, "loss": 1.9536, "step": 77685 }, { "epoch": 7.984378211716341, "grad_norm": 0.03699832409620285, "learning_rate": 0.01, "loss": 1.9612, "step": 77688 }, { "epoch": 7.984686536485098, "grad_norm": 0.03826748579740524, "learning_rate": 0.01, "loss": 1.9637, "step": 77691 }, { "epoch": 7.984994861253854, "grad_norm": 0.03865990415215492, "learning_rate": 0.01, "loss": 1.9781, "step": 77694 }, { "epoch": 7.985303186022611, "grad_norm": 0.07432328909635544, "learning_rate": 0.01, "loss": 1.9476, "step": 77697 }, { "epoch": 7.985611510791367, "grad_norm": 0.11026125401258469, "learning_rate": 0.01, "loss": 1.9754, "step": 77700 }, { "epoch": 7.985919835560123, "grad_norm": 0.08051371574401855, "learning_rate": 0.01, "loss": 1.9564, "step": 77703 }, { "epoch": 7.98622816032888, "grad_norm": 0.04520987346768379, "learning_rate": 0.01, "loss": 1.935, "step": 77706 }, { "epoch": 7.986536485097636, "grad_norm": 0.08161762356758118, "learning_rate": 0.01, "loss": 1.9942, "step": 77709 }, { "epoch": 7.986844809866392, "grad_norm": 0.08353172987699509, "learning_rate": 0.01, "loss": 1.9486, "step": 77712 }, { "epoch": 7.9871531346351485, "grad_norm": 0.05255158990621567, "learning_rate": 0.01, "loss": 1.9683, "step": 77715 }, { "epoch": 7.987461459403906, "grad_norm": 0.06637035310268402, "learning_rate": 0.01, "loss": 1.9502, "step": 77718 }, { "epoch": 7.987769784172662, "grad_norm": 0.05454893782734871, "learning_rate": 0.01, "loss": 1.9635, "step": 77721 }, { "epoch": 7.988078108941418, "grad_norm": 0.06507664918899536, "learning_rate": 0.01, "loss": 1.9704, "step": 77724 }, { "epoch": 7.988386433710175, "grad_norm": 0.03984587639570236, "learning_rate": 0.01, "loss": 1.9614, "step": 77727 }, { "epoch": 7.988694758478931, "grad_norm": 0.03732791915535927, "learning_rate": 0.01, "loss": 1.9608, "step": 77730 }, { "epoch": 7.989003083247687, "grad_norm": 0.05173516273498535, "learning_rate": 0.01, "loss": 1.9632, "step": 77733 }, { "epoch": 7.9893114080164445, "grad_norm": 0.11875148862600327, "learning_rate": 0.01, "loss": 1.964, "step": 77736 }, { "epoch": 7.989619732785201, "grad_norm": 0.0765930786728859, "learning_rate": 0.01, "loss": 1.9663, "step": 77739 }, { "epoch": 7.989928057553957, "grad_norm": 0.10672271251678467, "learning_rate": 0.01, "loss": 1.9682, "step": 77742 }, { "epoch": 7.990236382322713, "grad_norm": 0.08630343526601791, "learning_rate": 0.01, "loss": 1.9724, "step": 77745 }, { "epoch": 7.99054470709147, "grad_norm": 0.07723753899335861, "learning_rate": 0.01, "loss": 1.9644, "step": 77748 }, { "epoch": 7.990853031860226, "grad_norm": 0.05617719888687134, "learning_rate": 0.01, "loss": 1.9656, "step": 77751 }, { "epoch": 7.991161356628982, "grad_norm": 0.03311438858509064, "learning_rate": 0.01, "loss": 1.9689, "step": 77754 }, { "epoch": 7.991469681397739, "grad_norm": 0.03674701601266861, "learning_rate": 0.01, "loss": 1.9688, "step": 77757 }, { "epoch": 7.991778006166495, "grad_norm": 0.046452175825834274, "learning_rate": 0.01, "loss": 1.9673, "step": 77760 }, { "epoch": 7.992086330935252, "grad_norm": 0.12231157720088959, "learning_rate": 0.01, "loss": 1.9706, "step": 77763 }, { "epoch": 7.992394655704008, "grad_norm": 0.03398605436086655, "learning_rate": 0.01, "loss": 1.9886, "step": 77766 }, { "epoch": 7.992702980472765, "grad_norm": 0.0747060477733612, "learning_rate": 0.01, "loss": 1.9613, "step": 77769 }, { "epoch": 7.993011305241521, "grad_norm": 0.051723454147577286, "learning_rate": 0.01, "loss": 1.9714, "step": 77772 }, { "epoch": 7.993319630010277, "grad_norm": 0.045055799186229706, "learning_rate": 0.01, "loss": 1.9691, "step": 77775 }, { "epoch": 7.993627954779034, "grad_norm": 0.06425558030605316, "learning_rate": 0.01, "loss": 1.9687, "step": 77778 }, { "epoch": 7.99393627954779, "grad_norm": 0.04054182767868042, "learning_rate": 0.01, "loss": 1.9626, "step": 77781 }, { "epoch": 7.994244604316547, "grad_norm": 0.034561723470687866, "learning_rate": 0.01, "loss": 1.9626, "step": 77784 }, { "epoch": 7.9945529290853035, "grad_norm": 0.034934625029563904, "learning_rate": 0.01, "loss": 1.9649, "step": 77787 }, { "epoch": 7.99486125385406, "grad_norm": 0.04506867378950119, "learning_rate": 0.01, "loss": 1.9588, "step": 77790 }, { "epoch": 7.995169578622816, "grad_norm": 0.06831274926662445, "learning_rate": 0.01, "loss": 1.9826, "step": 77793 }, { "epoch": 7.995477903391572, "grad_norm": 0.06778151541948318, "learning_rate": 0.01, "loss": 1.9549, "step": 77796 }, { "epoch": 7.995786228160329, "grad_norm": 0.1303955465555191, "learning_rate": 0.01, "loss": 1.9625, "step": 77799 }, { "epoch": 7.996094552929085, "grad_norm": 0.04436461254954338, "learning_rate": 0.01, "loss": 1.9891, "step": 77802 }, { "epoch": 7.996402877697841, "grad_norm": 0.08111438155174255, "learning_rate": 0.01, "loss": 1.9764, "step": 77805 }, { "epoch": 7.9967112024665985, "grad_norm": 0.050543639808893204, "learning_rate": 0.01, "loss": 1.9787, "step": 77808 }, { "epoch": 7.997019527235355, "grad_norm": 0.09472209960222244, "learning_rate": 0.01, "loss": 1.944, "step": 77811 }, { "epoch": 7.997327852004111, "grad_norm": 0.05899638310074806, "learning_rate": 0.01, "loss": 1.9716, "step": 77814 }, { "epoch": 7.997636176772867, "grad_norm": 0.03349081799387932, "learning_rate": 0.01, "loss": 1.9574, "step": 77817 }, { "epoch": 7.997944501541624, "grad_norm": 0.03433087468147278, "learning_rate": 0.01, "loss": 1.9786, "step": 77820 }, { "epoch": 7.99825282631038, "grad_norm": 0.035451170057058334, "learning_rate": 0.01, "loss": 1.9611, "step": 77823 }, { "epoch": 7.998561151079136, "grad_norm": 0.07916496694087982, "learning_rate": 0.01, "loss": 1.9621, "step": 77826 }, { "epoch": 7.9988694758478935, "grad_norm": 0.08633740246295929, "learning_rate": 0.01, "loss": 1.9658, "step": 77829 }, { "epoch": 7.99917780061665, "grad_norm": 0.06353379040956497, "learning_rate": 0.01, "loss": 1.9433, "step": 77832 }, { "epoch": 7.999486125385406, "grad_norm": 0.040657125413417816, "learning_rate": 0.01, "loss": 1.9798, "step": 77835 }, { "epoch": 7.9997944501541625, "grad_norm": 0.09802965819835663, "learning_rate": 0.01, "loss": 1.9555, "step": 77838 }, { "epoch": 8.003392967304134, "grad_norm": 0.08189691603183746, "learning_rate": 0.01, "loss": 1.9273, "step": 77841 }, { "epoch": 8.003701418877236, "grad_norm": 0.04771219938993454, "learning_rate": 0.01, "loss": 1.9367, "step": 77844 }, { "epoch": 8.00400987045034, "grad_norm": 0.045312631875276566, "learning_rate": 0.01, "loss": 1.9755, "step": 77847 }, { "epoch": 8.004318322023442, "grad_norm": 0.077674999833107, "learning_rate": 0.01, "loss": 1.9578, "step": 77850 }, { "epoch": 8.004626773596545, "grad_norm": 0.03592463582754135, "learning_rate": 0.01, "loss": 2.0094, "step": 77853 }, { "epoch": 8.004935225169648, "grad_norm": 0.034409284591674805, "learning_rate": 0.01, "loss": 1.9555, "step": 77856 }, { "epoch": 8.00524367674275, "grad_norm": 0.08009333908557892, "learning_rate": 0.01, "loss": 1.9534, "step": 77859 }, { "epoch": 8.005552128315854, "grad_norm": 0.06267620623111725, "learning_rate": 0.01, "loss": 1.9611, "step": 77862 }, { "epoch": 8.005860579888957, "grad_norm": 0.06999866664409637, "learning_rate": 0.01, "loss": 1.9609, "step": 77865 }, { "epoch": 8.006169031462061, "grad_norm": 0.04267257824540138, "learning_rate": 0.01, "loss": 1.9482, "step": 77868 }, { "epoch": 8.006477483035164, "grad_norm": 0.13487477600574493, "learning_rate": 0.01, "loss": 1.9571, "step": 77871 }, { "epoch": 8.006785934608267, "grad_norm": 0.1191130056977272, "learning_rate": 0.01, "loss": 1.9565, "step": 77874 }, { "epoch": 8.00709438618137, "grad_norm": 0.10439812391996384, "learning_rate": 0.01, "loss": 1.9262, "step": 77877 }, { "epoch": 8.007402837754473, "grad_norm": 0.08259162306785583, "learning_rate": 0.01, "loss": 1.949, "step": 77880 }, { "epoch": 8.007711289327576, "grad_norm": 0.05473419278860092, "learning_rate": 0.01, "loss": 1.9456, "step": 77883 }, { "epoch": 8.008019740900679, "grad_norm": 0.06642327457666397, "learning_rate": 0.01, "loss": 1.9763, "step": 77886 }, { "epoch": 8.008328192473781, "grad_norm": 0.06296679377555847, "learning_rate": 0.01, "loss": 1.96, "step": 77889 }, { "epoch": 8.008636644046884, "grad_norm": 0.03993913158774376, "learning_rate": 0.01, "loss": 1.9495, "step": 77892 }, { "epoch": 8.008945095619987, "grad_norm": 0.03741374984383583, "learning_rate": 0.01, "loss": 1.9695, "step": 77895 }, { "epoch": 8.00925354719309, "grad_norm": 0.03153381496667862, "learning_rate": 0.01, "loss": 1.9747, "step": 77898 }, { "epoch": 8.009561998766193, "grad_norm": 0.04553218185901642, "learning_rate": 0.01, "loss": 1.9567, "step": 77901 }, { "epoch": 8.009870450339296, "grad_norm": 0.10947732627391815, "learning_rate": 0.01, "loss": 1.9627, "step": 77904 }, { "epoch": 8.0101789019124, "grad_norm": 0.0484643280506134, "learning_rate": 0.01, "loss": 1.9704, "step": 77907 }, { "epoch": 8.010487353485503, "grad_norm": 0.05445571616292, "learning_rate": 0.01, "loss": 1.9632, "step": 77910 }, { "epoch": 8.010795805058606, "grad_norm": 0.061891742050647736, "learning_rate": 0.01, "loss": 1.9564, "step": 77913 }, { "epoch": 8.01110425663171, "grad_norm": 0.04123906418681145, "learning_rate": 0.01, "loss": 1.9693, "step": 77916 }, { "epoch": 8.011412708204812, "grad_norm": 0.03290227800607681, "learning_rate": 0.01, "loss": 1.9415, "step": 77919 }, { "epoch": 8.011721159777915, "grad_norm": 0.06250923126935959, "learning_rate": 0.01, "loss": 1.9611, "step": 77922 }, { "epoch": 8.012029611351018, "grad_norm": 0.05352925509214401, "learning_rate": 0.01, "loss": 1.9506, "step": 77925 }, { "epoch": 8.01233806292412, "grad_norm": 0.1590663343667984, "learning_rate": 0.01, "loss": 1.9791, "step": 77928 }, { "epoch": 8.012646514497224, "grad_norm": 0.09142278879880905, "learning_rate": 0.01, "loss": 1.9847, "step": 77931 }, { "epoch": 8.012954966070327, "grad_norm": 0.1245221421122551, "learning_rate": 0.01, "loss": 1.9779, "step": 77934 }, { "epoch": 8.01326341764343, "grad_norm": 0.06170954182744026, "learning_rate": 0.01, "loss": 1.9864, "step": 77937 }, { "epoch": 8.013571869216532, "grad_norm": 0.042779482901096344, "learning_rate": 0.01, "loss": 1.9834, "step": 77940 }, { "epoch": 8.013880320789635, "grad_norm": 0.030806196853518486, "learning_rate": 0.01, "loss": 1.9658, "step": 77943 }, { "epoch": 8.01418877236274, "grad_norm": 0.038949958980083466, "learning_rate": 0.01, "loss": 1.972, "step": 77946 }, { "epoch": 8.014497223935843, "grad_norm": 0.049347564578056335, "learning_rate": 0.01, "loss": 1.9765, "step": 77949 }, { "epoch": 8.014805675508946, "grad_norm": 0.06526634842157364, "learning_rate": 0.01, "loss": 1.9647, "step": 77952 }, { "epoch": 8.015114127082049, "grad_norm": 0.09283331781625748, "learning_rate": 0.01, "loss": 1.9426, "step": 77955 }, { "epoch": 8.015422578655151, "grad_norm": 0.13813799619674683, "learning_rate": 0.01, "loss": 1.9557, "step": 77958 }, { "epoch": 8.015731030228254, "grad_norm": 0.06759168952703476, "learning_rate": 0.01, "loss": 1.9601, "step": 77961 }, { "epoch": 8.016039481801357, "grad_norm": 0.029820196330547333, "learning_rate": 0.01, "loss": 1.9547, "step": 77964 }, { "epoch": 8.01634793337446, "grad_norm": 0.042072828859090805, "learning_rate": 0.01, "loss": 1.9782, "step": 77967 }, { "epoch": 8.016656384947563, "grad_norm": 0.030679654330015182, "learning_rate": 0.01, "loss": 1.9628, "step": 77970 }, { "epoch": 8.016964836520666, "grad_norm": 0.054546356201171875, "learning_rate": 0.01, "loss": 1.9675, "step": 77973 }, { "epoch": 8.017273288093769, "grad_norm": 0.12222815304994583, "learning_rate": 0.01, "loss": 1.983, "step": 77976 }, { "epoch": 8.017581739666872, "grad_norm": 0.071864552795887, "learning_rate": 0.01, "loss": 1.976, "step": 77979 }, { "epoch": 8.017890191239974, "grad_norm": 0.08329971879720688, "learning_rate": 0.01, "loss": 1.9661, "step": 77982 }, { "epoch": 8.01819864281308, "grad_norm": 0.051458850502967834, "learning_rate": 0.01, "loss": 1.9574, "step": 77985 }, { "epoch": 8.018507094386182, "grad_norm": 0.02989008091390133, "learning_rate": 0.01, "loss": 1.9597, "step": 77988 }, { "epoch": 8.018815545959285, "grad_norm": 0.03450706601142883, "learning_rate": 0.01, "loss": 1.9443, "step": 77991 }, { "epoch": 8.019123997532388, "grad_norm": 0.13265404105186462, "learning_rate": 0.01, "loss": 1.9695, "step": 77994 }, { "epoch": 8.01943244910549, "grad_norm": 0.04526760056614876, "learning_rate": 0.01, "loss": 1.9463, "step": 77997 }, { "epoch": 8.019740900678594, "grad_norm": 0.0469353124499321, "learning_rate": 0.01, "loss": 1.9581, "step": 78000 }, { "epoch": 8.020049352251696, "grad_norm": 0.1211186870932579, "learning_rate": 0.01, "loss": 1.9357, "step": 78003 }, { "epoch": 8.0203578038248, "grad_norm": 0.07616788893938065, "learning_rate": 0.01, "loss": 1.9644, "step": 78006 }, { "epoch": 8.020666255397902, "grad_norm": 0.06855116039514542, "learning_rate": 0.01, "loss": 1.9846, "step": 78009 }, { "epoch": 8.020974706971005, "grad_norm": 0.04688597843050957, "learning_rate": 0.01, "loss": 1.9584, "step": 78012 }, { "epoch": 8.021283158544108, "grad_norm": 0.05328085646033287, "learning_rate": 0.01, "loss": 1.9771, "step": 78015 }, { "epoch": 8.021591610117211, "grad_norm": 0.051051221787929535, "learning_rate": 0.01, "loss": 1.9733, "step": 78018 }, { "epoch": 8.021900061690314, "grad_norm": 0.11757440865039825, "learning_rate": 0.01, "loss": 1.9645, "step": 78021 }, { "epoch": 8.022208513263418, "grad_norm": 0.04119103401899338, "learning_rate": 0.01, "loss": 1.9864, "step": 78024 }, { "epoch": 8.022516964836521, "grad_norm": 0.03476836904883385, "learning_rate": 0.01, "loss": 1.9601, "step": 78027 }, { "epoch": 8.022825416409624, "grad_norm": 0.035968925803899765, "learning_rate": 0.01, "loss": 1.9763, "step": 78030 }, { "epoch": 8.023133867982727, "grad_norm": 0.12972131371498108, "learning_rate": 0.01, "loss": 1.9476, "step": 78033 }, { "epoch": 8.02344231955583, "grad_norm": 0.06801027059555054, "learning_rate": 0.01, "loss": 1.9729, "step": 78036 }, { "epoch": 8.023750771128933, "grad_norm": 0.04971321299672127, "learning_rate": 0.01, "loss": 1.9562, "step": 78039 }, { "epoch": 8.024059222702036, "grad_norm": 0.035642463713884354, "learning_rate": 0.01, "loss": 1.9866, "step": 78042 }, { "epoch": 8.024367674275139, "grad_norm": 0.09247110038995743, "learning_rate": 0.01, "loss": 1.967, "step": 78045 }, { "epoch": 8.024676125848242, "grad_norm": 0.10186565667390823, "learning_rate": 0.01, "loss": 1.9748, "step": 78048 }, { "epoch": 8.024984577421344, "grad_norm": 0.040058303624391556, "learning_rate": 0.01, "loss": 1.9464, "step": 78051 }, { "epoch": 8.025293028994447, "grad_norm": 0.09354520589113235, "learning_rate": 0.01, "loss": 1.9671, "step": 78054 }, { "epoch": 8.02560148056755, "grad_norm": 0.07421064376831055, "learning_rate": 0.01, "loss": 1.972, "step": 78057 }, { "epoch": 8.025909932140653, "grad_norm": 0.057933758944272995, "learning_rate": 0.01, "loss": 1.9586, "step": 78060 }, { "epoch": 8.026218383713758, "grad_norm": 0.05190437287092209, "learning_rate": 0.01, "loss": 1.9642, "step": 78063 }, { "epoch": 8.02652683528686, "grad_norm": 0.06811270117759705, "learning_rate": 0.01, "loss": 1.9585, "step": 78066 }, { "epoch": 8.026835286859964, "grad_norm": 0.0652383342385292, "learning_rate": 0.01, "loss": 1.9588, "step": 78069 }, { "epoch": 8.027143738433066, "grad_norm": 0.04355868324637413, "learning_rate": 0.01, "loss": 1.9365, "step": 78072 }, { "epoch": 8.02745219000617, "grad_norm": 0.04373233765363693, "learning_rate": 0.01, "loss": 1.9797, "step": 78075 }, { "epoch": 8.027760641579272, "grad_norm": 0.04739624261856079, "learning_rate": 0.01, "loss": 1.9525, "step": 78078 }, { "epoch": 8.028069093152375, "grad_norm": 0.10096476972103119, "learning_rate": 0.01, "loss": 1.9541, "step": 78081 }, { "epoch": 8.028377544725478, "grad_norm": 0.04511374235153198, "learning_rate": 0.01, "loss": 1.9665, "step": 78084 }, { "epoch": 8.02868599629858, "grad_norm": 0.1182047426700592, "learning_rate": 0.01, "loss": 1.9538, "step": 78087 }, { "epoch": 8.028994447871684, "grad_norm": 0.044908009469509125, "learning_rate": 0.01, "loss": 1.9552, "step": 78090 }, { "epoch": 8.029302899444787, "grad_norm": 0.03893660008907318, "learning_rate": 0.01, "loss": 1.9495, "step": 78093 }, { "epoch": 8.02961135101789, "grad_norm": 0.08515772223472595, "learning_rate": 0.01, "loss": 1.9467, "step": 78096 }, { "epoch": 8.029919802590992, "grad_norm": 0.09454181045293808, "learning_rate": 0.01, "loss": 1.95, "step": 78099 }, { "epoch": 8.030228254164097, "grad_norm": 0.050406310707330704, "learning_rate": 0.01, "loss": 1.9842, "step": 78102 }, { "epoch": 8.0305367057372, "grad_norm": 0.1353389024734497, "learning_rate": 0.01, "loss": 1.99, "step": 78105 }, { "epoch": 8.030845157310303, "grad_norm": 0.14368806779384613, "learning_rate": 0.01, "loss": 1.9558, "step": 78108 }, { "epoch": 8.031153608883406, "grad_norm": 0.11506987363100052, "learning_rate": 0.01, "loss": 1.954, "step": 78111 }, { "epoch": 8.031462060456509, "grad_norm": 0.04621575027704239, "learning_rate": 0.01, "loss": 1.9627, "step": 78114 }, { "epoch": 8.031770512029611, "grad_norm": 0.06082366406917572, "learning_rate": 0.01, "loss": 1.9633, "step": 78117 }, { "epoch": 8.032078963602714, "grad_norm": 0.053143613040447235, "learning_rate": 0.01, "loss": 1.9501, "step": 78120 }, { "epoch": 8.032387415175817, "grad_norm": 0.04185790941119194, "learning_rate": 0.01, "loss": 1.9761, "step": 78123 }, { "epoch": 8.03269586674892, "grad_norm": 0.06397274881601334, "learning_rate": 0.01, "loss": 1.9692, "step": 78126 }, { "epoch": 8.033004318322023, "grad_norm": 0.1405619978904724, "learning_rate": 0.01, "loss": 1.9462, "step": 78129 }, { "epoch": 8.033312769895126, "grad_norm": 0.04655090346932411, "learning_rate": 0.01, "loss": 1.9708, "step": 78132 }, { "epoch": 8.033621221468229, "grad_norm": 0.041024383157491684, "learning_rate": 0.01, "loss": 1.9527, "step": 78135 }, { "epoch": 8.033929673041332, "grad_norm": 0.04685423895716667, "learning_rate": 0.01, "loss": 1.9896, "step": 78138 }, { "epoch": 8.034238124614436, "grad_norm": 0.044466495513916016, "learning_rate": 0.01, "loss": 1.9442, "step": 78141 }, { "epoch": 8.03454657618754, "grad_norm": 0.048852045089006424, "learning_rate": 0.01, "loss": 1.9589, "step": 78144 }, { "epoch": 8.034855027760642, "grad_norm": 0.03688976168632507, "learning_rate": 0.01, "loss": 1.9816, "step": 78147 }, { "epoch": 8.035163479333745, "grad_norm": 0.05261220410466194, "learning_rate": 0.01, "loss": 1.9511, "step": 78150 }, { "epoch": 8.035471930906848, "grad_norm": 0.09963783621788025, "learning_rate": 0.01, "loss": 1.9364, "step": 78153 }, { "epoch": 8.03578038247995, "grad_norm": 0.035708215087652206, "learning_rate": 0.01, "loss": 1.9658, "step": 78156 }, { "epoch": 8.036088834053054, "grad_norm": 0.031529273837804794, "learning_rate": 0.01, "loss": 1.9496, "step": 78159 }, { "epoch": 8.036397285626157, "grad_norm": 0.045165106654167175, "learning_rate": 0.01, "loss": 1.9857, "step": 78162 }, { "epoch": 8.03670573719926, "grad_norm": 0.10784061253070831, "learning_rate": 0.01, "loss": 1.9382, "step": 78165 }, { "epoch": 8.037014188772362, "grad_norm": 0.04860498756170273, "learning_rate": 0.01, "loss": 1.9473, "step": 78168 }, { "epoch": 8.037322640345465, "grad_norm": 0.055530451238155365, "learning_rate": 0.01, "loss": 1.9521, "step": 78171 }, { "epoch": 8.037631091918568, "grad_norm": 0.03332025185227394, "learning_rate": 0.01, "loss": 1.9515, "step": 78174 }, { "epoch": 8.037939543491671, "grad_norm": 0.04176173731684685, "learning_rate": 0.01, "loss": 1.9745, "step": 78177 }, { "epoch": 8.038247995064776, "grad_norm": 0.04813525825738907, "learning_rate": 0.01, "loss": 1.9447, "step": 78180 }, { "epoch": 8.038556446637878, "grad_norm": 0.06635365635156631, "learning_rate": 0.01, "loss": 1.9651, "step": 78183 }, { "epoch": 8.038864898210981, "grad_norm": 0.12882399559020996, "learning_rate": 0.01, "loss": 1.9628, "step": 78186 }, { "epoch": 8.039173349784084, "grad_norm": 0.03530122712254524, "learning_rate": 0.01, "loss": 1.9524, "step": 78189 }, { "epoch": 8.039481801357187, "grad_norm": 0.11424420028924942, "learning_rate": 0.01, "loss": 1.9459, "step": 78192 }, { "epoch": 8.03979025293029, "grad_norm": 0.05186377838253975, "learning_rate": 0.01, "loss": 1.9772, "step": 78195 }, { "epoch": 8.040098704503393, "grad_norm": 0.09932134300470352, "learning_rate": 0.01, "loss": 1.9558, "step": 78198 }, { "epoch": 8.040407156076496, "grad_norm": 0.05855415016412735, "learning_rate": 0.01, "loss": 1.9889, "step": 78201 }, { "epoch": 8.040715607649599, "grad_norm": 0.041826751083135605, "learning_rate": 0.01, "loss": 1.9607, "step": 78204 }, { "epoch": 8.041024059222702, "grad_norm": 0.04150779917836189, "learning_rate": 0.01, "loss": 1.942, "step": 78207 }, { "epoch": 8.041332510795804, "grad_norm": 0.04734393209218979, "learning_rate": 0.01, "loss": 1.9375, "step": 78210 }, { "epoch": 8.041640962368907, "grad_norm": 0.03574753925204277, "learning_rate": 0.01, "loss": 1.961, "step": 78213 }, { "epoch": 8.04194941394201, "grad_norm": 0.05483570322394371, "learning_rate": 0.01, "loss": 1.9661, "step": 78216 }, { "epoch": 8.042257865515115, "grad_norm": 0.10191001743078232, "learning_rate": 0.01, "loss": 1.9362, "step": 78219 }, { "epoch": 8.042566317088218, "grad_norm": 0.0770745575428009, "learning_rate": 0.01, "loss": 1.9703, "step": 78222 }, { "epoch": 8.04287476866132, "grad_norm": 0.04721737280488014, "learning_rate": 0.01, "loss": 1.9536, "step": 78225 }, { "epoch": 8.043183220234424, "grad_norm": 0.06755993515253067, "learning_rate": 0.01, "loss": 1.9529, "step": 78228 }, { "epoch": 8.043491671807526, "grad_norm": 0.07830438762903214, "learning_rate": 0.01, "loss": 1.9692, "step": 78231 }, { "epoch": 8.04380012338063, "grad_norm": 0.08949405699968338, "learning_rate": 0.01, "loss": 1.9394, "step": 78234 }, { "epoch": 8.044108574953732, "grad_norm": 0.05038638040423393, "learning_rate": 0.01, "loss": 1.9536, "step": 78237 }, { "epoch": 8.044417026526835, "grad_norm": 0.07417938113212585, "learning_rate": 0.01, "loss": 1.9666, "step": 78240 }, { "epoch": 8.044725478099938, "grad_norm": 0.07824010401964188, "learning_rate": 0.01, "loss": 1.9558, "step": 78243 }, { "epoch": 8.04503392967304, "grad_norm": 0.06778912246227264, "learning_rate": 0.01, "loss": 1.9755, "step": 78246 }, { "epoch": 8.045342381246144, "grad_norm": 0.09098850935697556, "learning_rate": 0.01, "loss": 1.9581, "step": 78249 }, { "epoch": 8.045650832819247, "grad_norm": 0.07562565058469772, "learning_rate": 0.01, "loss": 1.9345, "step": 78252 }, { "epoch": 8.04595928439235, "grad_norm": 0.06573687493801117, "learning_rate": 0.01, "loss": 1.9266, "step": 78255 }, { "epoch": 8.046267735965454, "grad_norm": 0.08350115269422531, "learning_rate": 0.01, "loss": 1.9486, "step": 78258 }, { "epoch": 8.046576187538557, "grad_norm": 0.06631845980882645, "learning_rate": 0.01, "loss": 1.9557, "step": 78261 }, { "epoch": 8.04688463911166, "grad_norm": 0.10218732804059982, "learning_rate": 0.01, "loss": 1.9596, "step": 78264 }, { "epoch": 8.047193090684763, "grad_norm": 0.08082437515258789, "learning_rate": 0.01, "loss": 1.9932, "step": 78267 }, { "epoch": 8.047501542257866, "grad_norm": 0.07313011586666107, "learning_rate": 0.01, "loss": 1.9523, "step": 78270 }, { "epoch": 8.047809993830969, "grad_norm": 0.07694101333618164, "learning_rate": 0.01, "loss": 1.9566, "step": 78273 }, { "epoch": 8.048118445404072, "grad_norm": 0.09288178384304047, "learning_rate": 0.01, "loss": 1.9579, "step": 78276 }, { "epoch": 8.048426896977174, "grad_norm": 0.13423343002796173, "learning_rate": 0.01, "loss": 1.9547, "step": 78279 }, { "epoch": 8.048735348550277, "grad_norm": 0.06640274822711945, "learning_rate": 0.01, "loss": 1.9612, "step": 78282 }, { "epoch": 8.04904380012338, "grad_norm": 0.042986899614334106, "learning_rate": 0.01, "loss": 1.9542, "step": 78285 }, { "epoch": 8.049352251696483, "grad_norm": 0.08272531628608704, "learning_rate": 0.01, "loss": 1.9597, "step": 78288 }, { "epoch": 8.049660703269586, "grad_norm": 0.06874362379312515, "learning_rate": 0.01, "loss": 1.9606, "step": 78291 }, { "epoch": 8.049969154842689, "grad_norm": 0.0621911846101284, "learning_rate": 0.01, "loss": 1.9541, "step": 78294 }, { "epoch": 8.050277606415793, "grad_norm": 0.10276035964488983, "learning_rate": 0.01, "loss": 1.9548, "step": 78297 }, { "epoch": 8.050586057988896, "grad_norm": 0.0899696797132492, "learning_rate": 0.01, "loss": 1.9646, "step": 78300 }, { "epoch": 8.050894509562, "grad_norm": 0.05405474826693535, "learning_rate": 0.01, "loss": 1.9602, "step": 78303 }, { "epoch": 8.051202961135102, "grad_norm": 0.10780783742666245, "learning_rate": 0.01, "loss": 1.9707, "step": 78306 }, { "epoch": 8.051511412708205, "grad_norm": 0.057042498141527176, "learning_rate": 0.01, "loss": 1.9572, "step": 78309 }, { "epoch": 8.051819864281308, "grad_norm": 0.08696413785219193, "learning_rate": 0.01, "loss": 1.9568, "step": 78312 }, { "epoch": 8.05212831585441, "grad_norm": 0.05428167060017586, "learning_rate": 0.01, "loss": 1.9585, "step": 78315 }, { "epoch": 8.052436767427514, "grad_norm": 0.03312308341264725, "learning_rate": 0.01, "loss": 1.9662, "step": 78318 }, { "epoch": 8.052745219000617, "grad_norm": 0.054687369614839554, "learning_rate": 0.01, "loss": 1.9535, "step": 78321 }, { "epoch": 8.05305367057372, "grad_norm": 0.078495554625988, "learning_rate": 0.01, "loss": 1.933, "step": 78324 }, { "epoch": 8.053362122146822, "grad_norm": 0.0804872140288353, "learning_rate": 0.01, "loss": 1.9329, "step": 78327 }, { "epoch": 8.053670573719925, "grad_norm": 0.0818592831492424, "learning_rate": 0.01, "loss": 1.9901, "step": 78330 }, { "epoch": 8.053979025293028, "grad_norm": 0.0550164058804512, "learning_rate": 0.01, "loss": 1.9513, "step": 78333 }, { "epoch": 8.054287476866133, "grad_norm": 0.04894454777240753, "learning_rate": 0.01, "loss": 1.9609, "step": 78336 }, { "epoch": 8.054595928439236, "grad_norm": 0.04496561363339424, "learning_rate": 0.01, "loss": 1.9587, "step": 78339 }, { "epoch": 8.054904380012339, "grad_norm": 0.04322918504476547, "learning_rate": 0.01, "loss": 1.9787, "step": 78342 }, { "epoch": 8.055212831585441, "grad_norm": 0.05272132530808449, "learning_rate": 0.01, "loss": 1.9287, "step": 78345 }, { "epoch": 8.055521283158544, "grad_norm": 0.045108698308467865, "learning_rate": 0.01, "loss": 1.9578, "step": 78348 }, { "epoch": 8.055829734731647, "grad_norm": 0.10814124345779419, "learning_rate": 0.01, "loss": 1.9728, "step": 78351 }, { "epoch": 8.05613818630475, "grad_norm": 0.04985149949789047, "learning_rate": 0.01, "loss": 1.9793, "step": 78354 }, { "epoch": 8.056446637877853, "grad_norm": 0.10839637368917465, "learning_rate": 0.01, "loss": 1.9707, "step": 78357 }, { "epoch": 8.056755089450956, "grad_norm": 0.0739506408572197, "learning_rate": 0.01, "loss": 1.9406, "step": 78360 }, { "epoch": 8.057063541024059, "grad_norm": 0.0455130897462368, "learning_rate": 0.01, "loss": 1.9867, "step": 78363 }, { "epoch": 8.057371992597162, "grad_norm": 0.04794808104634285, "learning_rate": 0.01, "loss": 1.9912, "step": 78366 }, { "epoch": 8.057680444170265, "grad_norm": 0.10262172669172287, "learning_rate": 0.01, "loss": 1.9512, "step": 78369 }, { "epoch": 8.057988895743367, "grad_norm": 0.1068585216999054, "learning_rate": 0.01, "loss": 1.9476, "step": 78372 }, { "epoch": 8.058297347316472, "grad_norm": 0.0809110701084137, "learning_rate": 0.01, "loss": 1.9853, "step": 78375 }, { "epoch": 8.058605798889575, "grad_norm": 0.03217419609427452, "learning_rate": 0.01, "loss": 1.9601, "step": 78378 }, { "epoch": 8.058914250462678, "grad_norm": 0.07601505517959595, "learning_rate": 0.01, "loss": 1.9385, "step": 78381 }, { "epoch": 8.05922270203578, "grad_norm": 0.1304755061864853, "learning_rate": 0.01, "loss": 1.9627, "step": 78384 }, { "epoch": 8.059531153608884, "grad_norm": 0.06895633786916733, "learning_rate": 0.01, "loss": 1.956, "step": 78387 }, { "epoch": 8.059839605181986, "grad_norm": 0.07370155304670334, "learning_rate": 0.01, "loss": 1.9686, "step": 78390 }, { "epoch": 8.06014805675509, "grad_norm": 0.07934858649969101, "learning_rate": 0.01, "loss": 1.9391, "step": 78393 }, { "epoch": 8.060456508328192, "grad_norm": 0.16500116884708405, "learning_rate": 0.01, "loss": 1.9841, "step": 78396 }, { "epoch": 8.060764959901295, "grad_norm": 0.036092065274715424, "learning_rate": 0.01, "loss": 1.9848, "step": 78399 }, { "epoch": 8.061073411474398, "grad_norm": 0.03540043160319328, "learning_rate": 0.01, "loss": 1.9557, "step": 78402 }, { "epoch": 8.061381863047501, "grad_norm": 0.050339192152023315, "learning_rate": 0.01, "loss": 1.9572, "step": 78405 }, { "epoch": 8.061690314620604, "grad_norm": 0.05511642247438431, "learning_rate": 0.01, "loss": 1.9693, "step": 78408 }, { "epoch": 8.061998766193707, "grad_norm": 0.033187735825777054, "learning_rate": 0.01, "loss": 1.9487, "step": 78411 }, { "epoch": 8.062307217766811, "grad_norm": 0.06421135365962982, "learning_rate": 0.01, "loss": 1.9566, "step": 78414 }, { "epoch": 8.062615669339914, "grad_norm": 0.042910970747470856, "learning_rate": 0.01, "loss": 1.9483, "step": 78417 }, { "epoch": 8.062924120913017, "grad_norm": 0.06334998458623886, "learning_rate": 0.01, "loss": 1.937, "step": 78420 }, { "epoch": 8.06323257248612, "grad_norm": 0.0897008404135704, "learning_rate": 0.01, "loss": 1.9753, "step": 78423 }, { "epoch": 8.063541024059223, "grad_norm": 0.07113679498434067, "learning_rate": 0.01, "loss": 1.9298, "step": 78426 }, { "epoch": 8.063849475632326, "grad_norm": 0.08476056903600693, "learning_rate": 0.01, "loss": 1.9452, "step": 78429 }, { "epoch": 8.064157927205429, "grad_norm": 0.05988757312297821, "learning_rate": 0.01, "loss": 1.9789, "step": 78432 }, { "epoch": 8.064466378778532, "grad_norm": 0.13881082832813263, "learning_rate": 0.01, "loss": 1.9715, "step": 78435 }, { "epoch": 8.064774830351634, "grad_norm": 0.05506844446063042, "learning_rate": 0.01, "loss": 1.9347, "step": 78438 }, { "epoch": 8.065083281924737, "grad_norm": 0.057986658066511154, "learning_rate": 0.01, "loss": 1.9588, "step": 78441 }, { "epoch": 8.06539173349784, "grad_norm": 0.06731247901916504, "learning_rate": 0.01, "loss": 1.969, "step": 78444 }, { "epoch": 8.065700185070943, "grad_norm": 0.10083737224340439, "learning_rate": 0.01, "loss": 1.9362, "step": 78447 }, { "epoch": 8.066008636644046, "grad_norm": 0.11750660091638565, "learning_rate": 0.01, "loss": 1.9563, "step": 78450 }, { "epoch": 8.06631708821715, "grad_norm": 0.07133502513170242, "learning_rate": 0.01, "loss": 1.962, "step": 78453 }, { "epoch": 8.066625539790254, "grad_norm": 0.055653370916843414, "learning_rate": 0.01, "loss": 1.941, "step": 78456 }, { "epoch": 8.066933991363356, "grad_norm": 0.04406362399458885, "learning_rate": 0.01, "loss": 1.9611, "step": 78459 }, { "epoch": 8.06724244293646, "grad_norm": 0.07277071475982666, "learning_rate": 0.01, "loss": 1.9584, "step": 78462 }, { "epoch": 8.067550894509562, "grad_norm": 0.09624621272087097, "learning_rate": 0.01, "loss": 1.9848, "step": 78465 }, { "epoch": 8.067859346082665, "grad_norm": 0.12320689857006073, "learning_rate": 0.01, "loss": 1.9675, "step": 78468 }, { "epoch": 8.068167797655768, "grad_norm": 0.05010239779949188, "learning_rate": 0.01, "loss": 1.9541, "step": 78471 }, { "epoch": 8.06847624922887, "grad_norm": 0.09163736552000046, "learning_rate": 0.01, "loss": 1.9605, "step": 78474 }, { "epoch": 8.068784700801974, "grad_norm": 0.07241861522197723, "learning_rate": 0.01, "loss": 1.9458, "step": 78477 }, { "epoch": 8.069093152375077, "grad_norm": 0.09269708395004272, "learning_rate": 0.01, "loss": 1.96, "step": 78480 }, { "epoch": 8.06940160394818, "grad_norm": 0.12642695009708405, "learning_rate": 0.01, "loss": 1.9378, "step": 78483 }, { "epoch": 8.069710055521282, "grad_norm": 0.051001306623220444, "learning_rate": 0.01, "loss": 1.9416, "step": 78486 }, { "epoch": 8.070018507094387, "grad_norm": 0.036773428320884705, "learning_rate": 0.01, "loss": 1.9718, "step": 78489 }, { "epoch": 8.07032695866749, "grad_norm": 0.0340329185128212, "learning_rate": 0.01, "loss": 1.9572, "step": 78492 }, { "epoch": 8.070635410240593, "grad_norm": 0.05808752402663231, "learning_rate": 0.01, "loss": 1.9477, "step": 78495 }, { "epoch": 8.070943861813696, "grad_norm": 0.07179629802703857, "learning_rate": 0.01, "loss": 1.9798, "step": 78498 }, { "epoch": 8.071252313386799, "grad_norm": 0.0458885133266449, "learning_rate": 0.01, "loss": 1.9396, "step": 78501 }, { "epoch": 8.071560764959901, "grad_norm": 0.04442944377660751, "learning_rate": 0.01, "loss": 1.9907, "step": 78504 }, { "epoch": 8.071869216533004, "grad_norm": 0.08146031200885773, "learning_rate": 0.01, "loss": 1.9325, "step": 78507 }, { "epoch": 8.072177668106107, "grad_norm": 0.08753294497728348, "learning_rate": 0.01, "loss": 1.9351, "step": 78510 }, { "epoch": 8.07248611967921, "grad_norm": 0.12875844538211823, "learning_rate": 0.01, "loss": 1.9489, "step": 78513 }, { "epoch": 8.072794571252313, "grad_norm": 0.11097821593284607, "learning_rate": 0.01, "loss": 1.9553, "step": 78516 }, { "epoch": 8.073103022825416, "grad_norm": 0.03981398791074753, "learning_rate": 0.01, "loss": 1.9648, "step": 78519 }, { "epoch": 8.073411474398519, "grad_norm": 0.047386813908815384, "learning_rate": 0.01, "loss": 1.9659, "step": 78522 }, { "epoch": 8.073719925971622, "grad_norm": 0.058222249150276184, "learning_rate": 0.01, "loss": 1.9455, "step": 78525 }, { "epoch": 8.074028377544726, "grad_norm": 0.04991047456860542, "learning_rate": 0.01, "loss": 1.9457, "step": 78528 }, { "epoch": 8.07433682911783, "grad_norm": 0.08375823497772217, "learning_rate": 0.01, "loss": 1.9495, "step": 78531 }, { "epoch": 8.074645280690932, "grad_norm": 0.07699902355670929, "learning_rate": 0.01, "loss": 1.9532, "step": 78534 }, { "epoch": 8.074953732264035, "grad_norm": 0.10210588574409485, "learning_rate": 0.01, "loss": 1.9516, "step": 78537 }, { "epoch": 8.075262183837138, "grad_norm": 0.04255238175392151, "learning_rate": 0.01, "loss": 1.9454, "step": 78540 }, { "epoch": 8.07557063541024, "grad_norm": 0.059122033417224884, "learning_rate": 0.01, "loss": 1.944, "step": 78543 }, { "epoch": 8.075879086983344, "grad_norm": 0.11363697797060013, "learning_rate": 0.01, "loss": 1.9372, "step": 78546 }, { "epoch": 8.076187538556447, "grad_norm": 0.11338911950588226, "learning_rate": 0.01, "loss": 1.9733, "step": 78549 }, { "epoch": 8.07649599012955, "grad_norm": 0.049438465386629105, "learning_rate": 0.01, "loss": 1.9347, "step": 78552 }, { "epoch": 8.076804441702652, "grad_norm": 0.040645066648721695, "learning_rate": 0.01, "loss": 1.9642, "step": 78555 }, { "epoch": 8.077112893275755, "grad_norm": 0.1113462969660759, "learning_rate": 0.01, "loss": 1.953, "step": 78558 }, { "epoch": 8.077421344848858, "grad_norm": 0.07110290974378586, "learning_rate": 0.01, "loss": 1.9531, "step": 78561 }, { "epoch": 8.077729796421961, "grad_norm": 0.12881991267204285, "learning_rate": 0.01, "loss": 1.9508, "step": 78564 }, { "epoch": 8.078038247995066, "grad_norm": 0.048802804201841354, "learning_rate": 0.01, "loss": 1.9548, "step": 78567 }, { "epoch": 8.078346699568169, "grad_norm": 0.049757737666368484, "learning_rate": 0.01, "loss": 1.9342, "step": 78570 }, { "epoch": 8.078655151141271, "grad_norm": 0.1158534437417984, "learning_rate": 0.01, "loss": 1.9761, "step": 78573 }, { "epoch": 8.078963602714374, "grad_norm": 0.037280384451150894, "learning_rate": 0.01, "loss": 1.9559, "step": 78576 }, { "epoch": 8.079272054287477, "grad_norm": 0.07610873132944107, "learning_rate": 0.01, "loss": 1.9482, "step": 78579 }, { "epoch": 8.07958050586058, "grad_norm": 0.09582018852233887, "learning_rate": 0.01, "loss": 1.9371, "step": 78582 }, { "epoch": 8.079888957433683, "grad_norm": 0.0594031997025013, "learning_rate": 0.01, "loss": 1.9469, "step": 78585 }, { "epoch": 8.080197409006786, "grad_norm": 0.10252749919891357, "learning_rate": 0.01, "loss": 1.9497, "step": 78588 }, { "epoch": 8.080505860579889, "grad_norm": 0.05171642452478409, "learning_rate": 0.01, "loss": 1.9323, "step": 78591 }, { "epoch": 8.080814312152992, "grad_norm": 0.044461604207754135, "learning_rate": 0.01, "loss": 1.9851, "step": 78594 }, { "epoch": 8.081122763726095, "grad_norm": 0.04300977289676666, "learning_rate": 0.01, "loss": 1.948, "step": 78597 }, { "epoch": 8.081431215299197, "grad_norm": 0.04183531552553177, "learning_rate": 0.01, "loss": 1.9684, "step": 78600 }, { "epoch": 8.0817396668723, "grad_norm": 0.14074847102165222, "learning_rate": 0.01, "loss": 1.9651, "step": 78603 }, { "epoch": 8.082048118445405, "grad_norm": 0.08665565401315689, "learning_rate": 0.01, "loss": 1.9464, "step": 78606 }, { "epoch": 8.082356570018508, "grad_norm": 0.10652801394462585, "learning_rate": 0.01, "loss": 1.9583, "step": 78609 }, { "epoch": 8.08266502159161, "grad_norm": 0.08588683605194092, "learning_rate": 0.01, "loss": 1.9691, "step": 78612 }, { "epoch": 8.082973473164714, "grad_norm": 0.05880355089902878, "learning_rate": 0.01, "loss": 1.9504, "step": 78615 }, { "epoch": 8.083281924737816, "grad_norm": 0.052834395319223404, "learning_rate": 0.01, "loss": 1.9468, "step": 78618 }, { "epoch": 8.08359037631092, "grad_norm": 0.03865375742316246, "learning_rate": 0.01, "loss": 1.9818, "step": 78621 }, { "epoch": 8.083898827884022, "grad_norm": 0.04939945414662361, "learning_rate": 0.01, "loss": 1.9534, "step": 78624 }, { "epoch": 8.084207279457125, "grad_norm": 0.10530546307563782, "learning_rate": 0.01, "loss": 1.9772, "step": 78627 }, { "epoch": 8.084515731030228, "grad_norm": 0.07206293940544128, "learning_rate": 0.01, "loss": 1.957, "step": 78630 }, { "epoch": 8.084824182603331, "grad_norm": 0.05856684222817421, "learning_rate": 0.01, "loss": 1.9532, "step": 78633 }, { "epoch": 8.085132634176434, "grad_norm": 0.07027263939380646, "learning_rate": 0.01, "loss": 1.9876, "step": 78636 }, { "epoch": 8.085441085749537, "grad_norm": 0.09340980648994446, "learning_rate": 0.01, "loss": 1.9626, "step": 78639 }, { "epoch": 8.08574953732264, "grad_norm": 0.06842325627803802, "learning_rate": 0.01, "loss": 1.975, "step": 78642 }, { "epoch": 8.086057988895744, "grad_norm": 0.06957598775625229, "learning_rate": 0.01, "loss": 1.9856, "step": 78645 }, { "epoch": 8.086366440468847, "grad_norm": 0.09373968094587326, "learning_rate": 0.01, "loss": 1.9355, "step": 78648 }, { "epoch": 8.08667489204195, "grad_norm": 0.05674611032009125, "learning_rate": 0.01, "loss": 1.9224, "step": 78651 }, { "epoch": 8.086983343615053, "grad_norm": 0.0915849506855011, "learning_rate": 0.01, "loss": 1.9687, "step": 78654 }, { "epoch": 8.087291795188156, "grad_norm": 0.08001093566417694, "learning_rate": 0.01, "loss": 1.9623, "step": 78657 }, { "epoch": 8.087600246761259, "grad_norm": 0.09341204166412354, "learning_rate": 0.01, "loss": 1.9373, "step": 78660 }, { "epoch": 8.087908698334362, "grad_norm": 0.04382408410310745, "learning_rate": 0.01, "loss": 1.9477, "step": 78663 }, { "epoch": 8.088217149907464, "grad_norm": 0.038650207221508026, "learning_rate": 0.01, "loss": 1.9516, "step": 78666 }, { "epoch": 8.088525601480567, "grad_norm": 0.05687751621007919, "learning_rate": 0.01, "loss": 1.9768, "step": 78669 }, { "epoch": 8.08883405305367, "grad_norm": 0.07734004408121109, "learning_rate": 0.01, "loss": 1.9655, "step": 78672 }, { "epoch": 8.089142504626773, "grad_norm": 0.08115539699792862, "learning_rate": 0.01, "loss": 1.9478, "step": 78675 }, { "epoch": 8.089450956199876, "grad_norm": 0.07764489203691483, "learning_rate": 0.01, "loss": 1.9428, "step": 78678 }, { "epoch": 8.089759407772979, "grad_norm": 0.04523799568414688, "learning_rate": 0.01, "loss": 1.9614, "step": 78681 }, { "epoch": 8.090067859346084, "grad_norm": 0.06825567036867142, "learning_rate": 0.01, "loss": 1.9818, "step": 78684 }, { "epoch": 8.090376310919186, "grad_norm": 0.10438218712806702, "learning_rate": 0.01, "loss": 1.9671, "step": 78687 }, { "epoch": 8.09068476249229, "grad_norm": 0.0568692572414875, "learning_rate": 0.01, "loss": 1.9431, "step": 78690 }, { "epoch": 8.090993214065392, "grad_norm": 0.0484088696539402, "learning_rate": 0.01, "loss": 1.9492, "step": 78693 }, { "epoch": 8.091301665638495, "grad_norm": 0.09627815335988998, "learning_rate": 0.01, "loss": 1.9727, "step": 78696 }, { "epoch": 8.091610117211598, "grad_norm": 0.06979174166917801, "learning_rate": 0.01, "loss": 1.9453, "step": 78699 }, { "epoch": 8.0919185687847, "grad_norm": 0.07845932990312576, "learning_rate": 0.01, "loss": 1.9639, "step": 78702 }, { "epoch": 8.092227020357804, "grad_norm": 0.0819554328918457, "learning_rate": 0.01, "loss": 1.9728, "step": 78705 }, { "epoch": 8.092535471930907, "grad_norm": 0.07853356748819351, "learning_rate": 0.01, "loss": 1.9587, "step": 78708 }, { "epoch": 8.09284392350401, "grad_norm": 0.04697944596409798, "learning_rate": 0.01, "loss": 1.9718, "step": 78711 }, { "epoch": 8.093152375077112, "grad_norm": 0.15374042093753815, "learning_rate": 0.01, "loss": 1.9378, "step": 78714 }, { "epoch": 8.093460826650215, "grad_norm": 0.05507707968354225, "learning_rate": 0.01, "loss": 1.9579, "step": 78717 }, { "epoch": 8.093769278223318, "grad_norm": 0.08202649652957916, "learning_rate": 0.01, "loss": 1.9612, "step": 78720 }, { "epoch": 8.094077729796423, "grad_norm": 0.06541413813829422, "learning_rate": 0.01, "loss": 1.9949, "step": 78723 }, { "epoch": 8.094386181369526, "grad_norm": 0.08683803677558899, "learning_rate": 0.01, "loss": 1.9638, "step": 78726 }, { "epoch": 8.094694632942629, "grad_norm": 0.11512414366006851, "learning_rate": 0.01, "loss": 1.9618, "step": 78729 }, { "epoch": 8.095003084515731, "grad_norm": 0.08850602805614471, "learning_rate": 0.01, "loss": 1.9612, "step": 78732 }, { "epoch": 8.095311536088834, "grad_norm": 0.13835574686527252, "learning_rate": 0.01, "loss": 1.9757, "step": 78735 }, { "epoch": 8.095619987661937, "grad_norm": 0.080533467233181, "learning_rate": 0.01, "loss": 1.9353, "step": 78738 }, { "epoch": 8.09592843923504, "grad_norm": 0.07228211313486099, "learning_rate": 0.01, "loss": 1.9723, "step": 78741 }, { "epoch": 8.096236890808143, "grad_norm": 0.05860430374741554, "learning_rate": 0.01, "loss": 1.9574, "step": 78744 }, { "epoch": 8.096545342381246, "grad_norm": 0.03812957927584648, "learning_rate": 0.01, "loss": 1.9604, "step": 78747 }, { "epoch": 8.096853793954349, "grad_norm": 0.04736247658729553, "learning_rate": 0.01, "loss": 1.9382, "step": 78750 }, { "epoch": 8.097162245527452, "grad_norm": 0.0691232830286026, "learning_rate": 0.01, "loss": 1.9671, "step": 78753 }, { "epoch": 8.097470697100555, "grad_norm": 0.09025582671165466, "learning_rate": 0.01, "loss": 1.9292, "step": 78756 }, { "epoch": 8.097779148673657, "grad_norm": 0.06315673142671585, "learning_rate": 0.01, "loss": 1.9531, "step": 78759 }, { "epoch": 8.098087600246762, "grad_norm": 0.04398050159215927, "learning_rate": 0.01, "loss": 1.9732, "step": 78762 }, { "epoch": 8.098396051819865, "grad_norm": 0.04201388731598854, "learning_rate": 0.01, "loss": 1.9569, "step": 78765 }, { "epoch": 8.098704503392968, "grad_norm": 0.11394036561250687, "learning_rate": 0.01, "loss": 1.9759, "step": 78768 }, { "epoch": 8.09901295496607, "grad_norm": 0.05638299137353897, "learning_rate": 0.01, "loss": 1.9736, "step": 78771 }, { "epoch": 8.099321406539174, "grad_norm": 0.04001401364803314, "learning_rate": 0.01, "loss": 1.945, "step": 78774 }, { "epoch": 8.099629858112277, "grad_norm": 0.13935402035713196, "learning_rate": 0.01, "loss": 1.9627, "step": 78777 }, { "epoch": 8.09993830968538, "grad_norm": 0.0622687041759491, "learning_rate": 0.01, "loss": 1.9631, "step": 78780 }, { "epoch": 8.100246761258482, "grad_norm": 0.04048505052924156, "learning_rate": 0.01, "loss": 1.9869, "step": 78783 }, { "epoch": 8.100555212831585, "grad_norm": 0.04119844362139702, "learning_rate": 0.01, "loss": 1.9837, "step": 78786 }, { "epoch": 8.100863664404688, "grad_norm": 0.03986690193414688, "learning_rate": 0.01, "loss": 1.9769, "step": 78789 }, { "epoch": 8.101172115977791, "grad_norm": 0.07594916224479675, "learning_rate": 0.01, "loss": 1.9758, "step": 78792 }, { "epoch": 8.101480567550894, "grad_norm": 0.0828123390674591, "learning_rate": 0.01, "loss": 1.9544, "step": 78795 }, { "epoch": 8.101789019123997, "grad_norm": 0.05890440568327904, "learning_rate": 0.01, "loss": 1.9445, "step": 78798 }, { "epoch": 8.102097470697101, "grad_norm": 0.09964809566736221, "learning_rate": 0.01, "loss": 1.9728, "step": 78801 }, { "epoch": 8.102405922270204, "grad_norm": 0.06926414370536804, "learning_rate": 0.01, "loss": 1.9611, "step": 78804 }, { "epoch": 8.102714373843307, "grad_norm": 0.0764726996421814, "learning_rate": 0.01, "loss": 1.9326, "step": 78807 }, { "epoch": 8.10302282541641, "grad_norm": 0.08960816264152527, "learning_rate": 0.01, "loss": 1.9479, "step": 78810 }, { "epoch": 8.103331276989513, "grad_norm": 0.05448700487613678, "learning_rate": 0.01, "loss": 1.9482, "step": 78813 }, { "epoch": 8.103639728562616, "grad_norm": 0.060038816183805466, "learning_rate": 0.01, "loss": 1.9465, "step": 78816 }, { "epoch": 8.103948180135719, "grad_norm": 0.06182781606912613, "learning_rate": 0.01, "loss": 1.9467, "step": 78819 }, { "epoch": 8.104256631708822, "grad_norm": 0.050959113985300064, "learning_rate": 0.01, "loss": 1.9527, "step": 78822 }, { "epoch": 8.104565083281924, "grad_norm": 0.042466700077056885, "learning_rate": 0.01, "loss": 1.9397, "step": 78825 }, { "epoch": 8.104873534855027, "grad_norm": 0.04001423716545105, "learning_rate": 0.01, "loss": 1.9343, "step": 78828 }, { "epoch": 8.10518198642813, "grad_norm": 0.04415873810648918, "learning_rate": 0.01, "loss": 1.9366, "step": 78831 }, { "epoch": 8.105490438001233, "grad_norm": 0.09015651047229767, "learning_rate": 0.01, "loss": 1.9681, "step": 78834 }, { "epoch": 8.105798889574336, "grad_norm": 0.1599554866552353, "learning_rate": 0.01, "loss": 1.9733, "step": 78837 }, { "epoch": 8.10610734114744, "grad_norm": 0.09218296408653259, "learning_rate": 0.01, "loss": 1.965, "step": 78840 }, { "epoch": 8.106415792720544, "grad_norm": 0.07291611284017563, "learning_rate": 0.01, "loss": 1.9505, "step": 78843 }, { "epoch": 8.106724244293646, "grad_norm": 0.042586617171764374, "learning_rate": 0.01, "loss": 1.9292, "step": 78846 }, { "epoch": 8.10703269586675, "grad_norm": 0.03527746722102165, "learning_rate": 0.01, "loss": 1.9714, "step": 78849 }, { "epoch": 8.107341147439852, "grad_norm": 0.04984118789434433, "learning_rate": 0.01, "loss": 1.9462, "step": 78852 }, { "epoch": 8.107649599012955, "grad_norm": 0.08641780912876129, "learning_rate": 0.01, "loss": 1.9624, "step": 78855 }, { "epoch": 8.107958050586058, "grad_norm": 0.05808859318494797, "learning_rate": 0.01, "loss": 1.9702, "step": 78858 }, { "epoch": 8.108266502159161, "grad_norm": 0.06159026548266411, "learning_rate": 0.01, "loss": 1.9498, "step": 78861 }, { "epoch": 8.108574953732264, "grad_norm": 0.08137316256761551, "learning_rate": 0.01, "loss": 1.9669, "step": 78864 }, { "epoch": 8.108883405305367, "grad_norm": 0.06507211178541183, "learning_rate": 0.01, "loss": 1.9717, "step": 78867 }, { "epoch": 8.10919185687847, "grad_norm": 0.04363485053181648, "learning_rate": 0.01, "loss": 1.9401, "step": 78870 }, { "epoch": 8.109500308451572, "grad_norm": 0.1976436972618103, "learning_rate": 0.01, "loss": 1.9613, "step": 78873 }, { "epoch": 8.109808760024675, "grad_norm": 0.18802042305469513, "learning_rate": 0.01, "loss": 1.9306, "step": 78876 }, { "epoch": 8.11011721159778, "grad_norm": 0.11292927712202072, "learning_rate": 0.01, "loss": 1.9469, "step": 78879 }, { "epoch": 8.110425663170883, "grad_norm": 0.06154080107808113, "learning_rate": 0.01, "loss": 1.9484, "step": 78882 }, { "epoch": 8.110734114743986, "grad_norm": 0.04565787315368652, "learning_rate": 0.01, "loss": 1.9606, "step": 78885 }, { "epoch": 8.111042566317089, "grad_norm": 0.07542978227138519, "learning_rate": 0.01, "loss": 1.9606, "step": 78888 }, { "epoch": 8.111351017890192, "grad_norm": 0.11255155503749847, "learning_rate": 0.01, "loss": 1.9457, "step": 78891 }, { "epoch": 8.111659469463294, "grad_norm": 0.09924767911434174, "learning_rate": 0.01, "loss": 1.9571, "step": 78894 }, { "epoch": 8.111967921036397, "grad_norm": 0.06724300235509872, "learning_rate": 0.01, "loss": 1.9195, "step": 78897 }, { "epoch": 8.1122763726095, "grad_norm": 0.061787478625774384, "learning_rate": 0.01, "loss": 1.9726, "step": 78900 }, { "epoch": 8.112584824182603, "grad_norm": 0.06443925201892853, "learning_rate": 0.01, "loss": 1.9727, "step": 78903 }, { "epoch": 8.112893275755706, "grad_norm": 0.042934153228998184, "learning_rate": 0.01, "loss": 1.9433, "step": 78906 }, { "epoch": 8.113201727328809, "grad_norm": 0.04675191268324852, "learning_rate": 0.01, "loss": 1.9521, "step": 78909 }, { "epoch": 8.113510178901912, "grad_norm": 0.15414629876613617, "learning_rate": 0.01, "loss": 1.9489, "step": 78912 }, { "epoch": 8.113818630475015, "grad_norm": 0.06636694073677063, "learning_rate": 0.01, "loss": 1.9517, "step": 78915 }, { "epoch": 8.11412708204812, "grad_norm": 0.08664028346538544, "learning_rate": 0.01, "loss": 1.9547, "step": 78918 }, { "epoch": 8.114435533621222, "grad_norm": 0.04274681210517883, "learning_rate": 0.01, "loss": 1.9665, "step": 78921 }, { "epoch": 8.114743985194325, "grad_norm": 0.05404650419950485, "learning_rate": 0.01, "loss": 1.9421, "step": 78924 }, { "epoch": 8.115052436767428, "grad_norm": 0.04403231665492058, "learning_rate": 0.01, "loss": 1.9414, "step": 78927 }, { "epoch": 8.11536088834053, "grad_norm": 0.06800958514213562, "learning_rate": 0.01, "loss": 1.9505, "step": 78930 }, { "epoch": 8.115669339913634, "grad_norm": 0.06787417083978653, "learning_rate": 0.01, "loss": 1.95, "step": 78933 }, { "epoch": 8.115977791486737, "grad_norm": 0.046553414314985275, "learning_rate": 0.01, "loss": 1.959, "step": 78936 }, { "epoch": 8.11628624305984, "grad_norm": 0.051033198833465576, "learning_rate": 0.01, "loss": 1.9383, "step": 78939 }, { "epoch": 8.116594694632942, "grad_norm": 0.05013803020119667, "learning_rate": 0.01, "loss": 1.9515, "step": 78942 }, { "epoch": 8.116903146206045, "grad_norm": 0.06531897187232971, "learning_rate": 0.01, "loss": 1.9543, "step": 78945 }, { "epoch": 8.117211597779148, "grad_norm": 0.03636271879076958, "learning_rate": 0.01, "loss": 1.9357, "step": 78948 }, { "epoch": 8.117520049352251, "grad_norm": 0.04407816380262375, "learning_rate": 0.01, "loss": 1.9711, "step": 78951 }, { "epoch": 8.117828500925354, "grad_norm": 0.049220457673072815, "learning_rate": 0.01, "loss": 1.9348, "step": 78954 }, { "epoch": 8.118136952498459, "grad_norm": 0.061168961226940155, "learning_rate": 0.01, "loss": 1.9628, "step": 78957 }, { "epoch": 8.118445404071561, "grad_norm": 0.10130732506513596, "learning_rate": 0.01, "loss": 1.9562, "step": 78960 }, { "epoch": 8.118753855644664, "grad_norm": 0.1906558871269226, "learning_rate": 0.01, "loss": 1.9511, "step": 78963 }, { "epoch": 8.119062307217767, "grad_norm": 0.08722063153982162, "learning_rate": 0.01, "loss": 1.9725, "step": 78966 }, { "epoch": 8.11937075879087, "grad_norm": 0.06511518359184265, "learning_rate": 0.01, "loss": 1.9418, "step": 78969 }, { "epoch": 8.119679210363973, "grad_norm": 0.04923033341765404, "learning_rate": 0.01, "loss": 1.9566, "step": 78972 }, { "epoch": 8.119987661937076, "grad_norm": 0.048516470938920975, "learning_rate": 0.01, "loss": 1.9482, "step": 78975 }, { "epoch": 8.120296113510179, "grad_norm": 0.04488534480333328, "learning_rate": 0.01, "loss": 1.9662, "step": 78978 }, { "epoch": 8.120604565083282, "grad_norm": 0.04538138210773468, "learning_rate": 0.01, "loss": 1.9652, "step": 78981 }, { "epoch": 8.120913016656385, "grad_norm": 0.07497114688158035, "learning_rate": 0.01, "loss": 1.9495, "step": 78984 }, { "epoch": 8.121221468229487, "grad_norm": 0.10374471545219421, "learning_rate": 0.01, "loss": 1.98, "step": 78987 }, { "epoch": 8.12152991980259, "grad_norm": 0.13686977326869965, "learning_rate": 0.01, "loss": 1.9418, "step": 78990 }, { "epoch": 8.121838371375693, "grad_norm": 0.06521309167146683, "learning_rate": 0.01, "loss": 1.961, "step": 78993 }, { "epoch": 8.122146822948798, "grad_norm": 0.03203187882900238, "learning_rate": 0.01, "loss": 1.9625, "step": 78996 }, { "epoch": 8.1224552745219, "grad_norm": 0.034699197858572006, "learning_rate": 0.01, "loss": 1.9445, "step": 78999 }, { "epoch": 8.122763726095004, "grad_norm": 0.1084802970290184, "learning_rate": 0.01, "loss": 1.9788, "step": 79002 }, { "epoch": 8.123072177668107, "grad_norm": 0.07200446724891663, "learning_rate": 0.01, "loss": 1.9412, "step": 79005 }, { "epoch": 8.12338062924121, "grad_norm": 0.057980168610811234, "learning_rate": 0.01, "loss": 1.9717, "step": 79008 }, { "epoch": 8.123689080814312, "grad_norm": 0.03710618615150452, "learning_rate": 0.01, "loss": 1.968, "step": 79011 }, { "epoch": 8.123997532387415, "grad_norm": 0.04603053256869316, "learning_rate": 0.01, "loss": 1.9594, "step": 79014 }, { "epoch": 8.124305983960518, "grad_norm": 0.088069386780262, "learning_rate": 0.01, "loss": 1.9597, "step": 79017 }, { "epoch": 8.124614435533621, "grad_norm": 0.04834933578968048, "learning_rate": 0.01, "loss": 1.9344, "step": 79020 }, { "epoch": 8.124922887106724, "grad_norm": 0.04151962324976921, "learning_rate": 0.01, "loss": 1.9496, "step": 79023 }, { "epoch": 8.125231338679827, "grad_norm": 0.051632124930620193, "learning_rate": 0.01, "loss": 1.9561, "step": 79026 }, { "epoch": 8.12553979025293, "grad_norm": 0.143802672624588, "learning_rate": 0.01, "loss": 1.9544, "step": 79029 }, { "epoch": 8.125848241826032, "grad_norm": 0.04302970692515373, "learning_rate": 0.01, "loss": 1.9363, "step": 79032 }, { "epoch": 8.126156693399137, "grad_norm": 0.1300458461046219, "learning_rate": 0.01, "loss": 1.9406, "step": 79035 }, { "epoch": 8.12646514497224, "grad_norm": 0.06043313816189766, "learning_rate": 0.01, "loss": 1.9686, "step": 79038 }, { "epoch": 8.126773596545343, "grad_norm": 0.03919702395796776, "learning_rate": 0.01, "loss": 1.9469, "step": 79041 }, { "epoch": 8.127082048118446, "grad_norm": 0.0503271222114563, "learning_rate": 0.01, "loss": 1.9525, "step": 79044 }, { "epoch": 8.127390499691549, "grad_norm": 0.035495515912771225, "learning_rate": 0.01, "loss": 1.9539, "step": 79047 }, { "epoch": 8.127698951264652, "grad_norm": 0.03414744511246681, "learning_rate": 0.01, "loss": 1.9523, "step": 79050 }, { "epoch": 8.128007402837754, "grad_norm": 0.08046834170818329, "learning_rate": 0.01, "loss": 1.9616, "step": 79053 }, { "epoch": 8.128315854410857, "grad_norm": 0.03690198436379433, "learning_rate": 0.01, "loss": 1.9766, "step": 79056 }, { "epoch": 8.12862430598396, "grad_norm": 0.06119079142808914, "learning_rate": 0.01, "loss": 1.9492, "step": 79059 }, { "epoch": 8.128932757557063, "grad_norm": 0.0938824862241745, "learning_rate": 0.01, "loss": 1.9628, "step": 79062 }, { "epoch": 8.129241209130166, "grad_norm": 0.09503504633903503, "learning_rate": 0.01, "loss": 1.9503, "step": 79065 }, { "epoch": 8.129549660703269, "grad_norm": 0.06319358944892883, "learning_rate": 0.01, "loss": 1.9389, "step": 79068 }, { "epoch": 8.129858112276372, "grad_norm": 0.08098098635673523, "learning_rate": 0.01, "loss": 1.95, "step": 79071 }, { "epoch": 8.130166563849476, "grad_norm": 0.06519079208374023, "learning_rate": 0.01, "loss": 1.9633, "step": 79074 }, { "epoch": 8.13047501542258, "grad_norm": 0.13367730379104614, "learning_rate": 0.01, "loss": 1.9397, "step": 79077 }, { "epoch": 8.130783466995682, "grad_norm": 0.09786729514598846, "learning_rate": 0.01, "loss": 1.9523, "step": 79080 }, { "epoch": 8.131091918568785, "grad_norm": 0.11255167424678802, "learning_rate": 0.01, "loss": 1.9508, "step": 79083 }, { "epoch": 8.131400370141888, "grad_norm": 0.05492749437689781, "learning_rate": 0.01, "loss": 1.9781, "step": 79086 }, { "epoch": 8.13170882171499, "grad_norm": 0.03756438195705414, "learning_rate": 0.01, "loss": 1.9499, "step": 79089 }, { "epoch": 8.132017273288094, "grad_norm": 0.03793524205684662, "learning_rate": 0.01, "loss": 1.9635, "step": 79092 }, { "epoch": 8.132325724861197, "grad_norm": 0.03778066486120224, "learning_rate": 0.01, "loss": 1.943, "step": 79095 }, { "epoch": 8.1326341764343, "grad_norm": 0.034888532012701035, "learning_rate": 0.01, "loss": 1.9557, "step": 79098 }, { "epoch": 8.132942628007402, "grad_norm": 0.0687558501958847, "learning_rate": 0.01, "loss": 1.9729, "step": 79101 }, { "epoch": 8.133251079580505, "grad_norm": 0.1305745244026184, "learning_rate": 0.01, "loss": 1.9667, "step": 79104 }, { "epoch": 8.133559531153608, "grad_norm": 0.06419795006513596, "learning_rate": 0.01, "loss": 1.9599, "step": 79107 }, { "epoch": 8.133867982726711, "grad_norm": 0.03223300725221634, "learning_rate": 0.01, "loss": 1.9418, "step": 79110 }, { "epoch": 8.134176434299816, "grad_norm": 0.03920907899737358, "learning_rate": 0.01, "loss": 1.9834, "step": 79113 }, { "epoch": 8.134484885872919, "grad_norm": 0.05031749606132507, "learning_rate": 0.01, "loss": 1.9427, "step": 79116 }, { "epoch": 8.134793337446022, "grad_norm": 0.08466044068336487, "learning_rate": 0.01, "loss": 1.9513, "step": 79119 }, { "epoch": 8.135101789019124, "grad_norm": 0.06535778194665909, "learning_rate": 0.01, "loss": 1.9592, "step": 79122 }, { "epoch": 8.135410240592227, "grad_norm": 0.037378523498773575, "learning_rate": 0.01, "loss": 1.9622, "step": 79125 }, { "epoch": 8.13571869216533, "grad_norm": 0.11165131628513336, "learning_rate": 0.01, "loss": 1.9482, "step": 79128 }, { "epoch": 8.136027143738433, "grad_norm": 0.07167794555425644, "learning_rate": 0.01, "loss": 1.9327, "step": 79131 }, { "epoch": 8.136335595311536, "grad_norm": 0.10369572788476944, "learning_rate": 0.01, "loss": 1.9438, "step": 79134 }, { "epoch": 8.136644046884639, "grad_norm": 0.051352955400943756, "learning_rate": 0.01, "loss": 1.9689, "step": 79137 }, { "epoch": 8.136952498457742, "grad_norm": 0.11947130411863327, "learning_rate": 0.01, "loss": 1.9338, "step": 79140 }, { "epoch": 8.137260950030845, "grad_norm": 0.07348339259624481, "learning_rate": 0.01, "loss": 1.9669, "step": 79143 }, { "epoch": 8.137569401603947, "grad_norm": 0.145231232047081, "learning_rate": 0.01, "loss": 1.978, "step": 79146 }, { "epoch": 8.13787785317705, "grad_norm": 0.053605154156684875, "learning_rate": 0.01, "loss": 1.9505, "step": 79149 }, { "epoch": 8.138186304750155, "grad_norm": 0.04041539132595062, "learning_rate": 0.01, "loss": 1.9614, "step": 79152 }, { "epoch": 8.138494756323258, "grad_norm": 0.06164342164993286, "learning_rate": 0.01, "loss": 1.9555, "step": 79155 }, { "epoch": 8.13880320789636, "grad_norm": 0.04916723445057869, "learning_rate": 0.01, "loss": 1.9527, "step": 79158 }, { "epoch": 8.139111659469464, "grad_norm": 0.04106106981635094, "learning_rate": 0.01, "loss": 1.9646, "step": 79161 }, { "epoch": 8.139420111042567, "grad_norm": 0.03548407554626465, "learning_rate": 0.01, "loss": 1.9621, "step": 79164 }, { "epoch": 8.13972856261567, "grad_norm": 0.05280381441116333, "learning_rate": 0.01, "loss": 1.9502, "step": 79167 }, { "epoch": 8.140037014188772, "grad_norm": 0.10448241233825684, "learning_rate": 0.01, "loss": 1.986, "step": 79170 }, { "epoch": 8.140345465761875, "grad_norm": 0.05269154906272888, "learning_rate": 0.01, "loss": 1.9334, "step": 79173 }, { "epoch": 8.140653917334978, "grad_norm": 0.14911656081676483, "learning_rate": 0.01, "loss": 1.9402, "step": 79176 }, { "epoch": 8.140962368908081, "grad_norm": 0.051300134509801865, "learning_rate": 0.01, "loss": 1.9228, "step": 79179 }, { "epoch": 8.141270820481184, "grad_norm": 0.03888317197561264, "learning_rate": 0.01, "loss": 1.9641, "step": 79182 }, { "epoch": 8.141579272054287, "grad_norm": 0.04629917070269585, "learning_rate": 0.01, "loss": 1.9575, "step": 79185 }, { "epoch": 8.14188772362739, "grad_norm": 0.03899998590350151, "learning_rate": 0.01, "loss": 1.9462, "step": 79188 }, { "epoch": 8.142196175200494, "grad_norm": 0.04058917611837387, "learning_rate": 0.01, "loss": 1.9664, "step": 79191 }, { "epoch": 8.142504626773597, "grad_norm": 0.03686654940247536, "learning_rate": 0.01, "loss": 1.9573, "step": 79194 }, { "epoch": 8.1428130783467, "grad_norm": 0.048519935458898544, "learning_rate": 0.01, "loss": 1.9653, "step": 79197 }, { "epoch": 8.143121529919803, "grad_norm": 0.037117861211299896, "learning_rate": 0.01, "loss": 1.9417, "step": 79200 }, { "epoch": 8.143429981492906, "grad_norm": 0.10335171222686768, "learning_rate": 0.01, "loss": 1.965, "step": 79203 }, { "epoch": 8.143738433066009, "grad_norm": 0.07381299138069153, "learning_rate": 0.01, "loss": 1.9431, "step": 79206 }, { "epoch": 8.144046884639112, "grad_norm": 0.06755456328392029, "learning_rate": 0.01, "loss": 1.9778, "step": 79209 }, { "epoch": 8.144355336212215, "grad_norm": 0.05779864639043808, "learning_rate": 0.01, "loss": 1.9603, "step": 79212 }, { "epoch": 8.144663787785317, "grad_norm": 0.09358736872673035, "learning_rate": 0.01, "loss": 1.9488, "step": 79215 }, { "epoch": 8.14497223935842, "grad_norm": 0.04860676825046539, "learning_rate": 0.01, "loss": 1.9513, "step": 79218 }, { "epoch": 8.145280690931523, "grad_norm": 0.032770249992609024, "learning_rate": 0.01, "loss": 1.9639, "step": 79221 }, { "epoch": 8.145589142504626, "grad_norm": 0.04577682912349701, "learning_rate": 0.01, "loss": 1.9664, "step": 79224 }, { "epoch": 8.145897594077729, "grad_norm": 0.0953214019536972, "learning_rate": 0.01, "loss": 1.9595, "step": 79227 }, { "epoch": 8.146206045650834, "grad_norm": 0.07593677937984467, "learning_rate": 0.01, "loss": 1.946, "step": 79230 }, { "epoch": 8.146514497223937, "grad_norm": 0.11113808304071426, "learning_rate": 0.01, "loss": 1.951, "step": 79233 }, { "epoch": 8.14682294879704, "grad_norm": 0.13596338033676147, "learning_rate": 0.01, "loss": 1.9361, "step": 79236 }, { "epoch": 8.147131400370142, "grad_norm": 0.1248130351305008, "learning_rate": 0.01, "loss": 1.955, "step": 79239 }, { "epoch": 8.147439851943245, "grad_norm": 0.09058087319135666, "learning_rate": 0.01, "loss": 1.9544, "step": 79242 }, { "epoch": 8.147748303516348, "grad_norm": 0.08766442537307739, "learning_rate": 0.01, "loss": 1.9517, "step": 79245 }, { "epoch": 8.148056755089451, "grad_norm": 0.04625086486339569, "learning_rate": 0.01, "loss": 1.9638, "step": 79248 }, { "epoch": 8.148365206662554, "grad_norm": 0.04851190373301506, "learning_rate": 0.01, "loss": 1.9551, "step": 79251 }, { "epoch": 8.148673658235657, "grad_norm": 0.0595279186964035, "learning_rate": 0.01, "loss": 1.9556, "step": 79254 }, { "epoch": 8.14898210980876, "grad_norm": 0.05664240941405296, "learning_rate": 0.01, "loss": 1.9623, "step": 79257 }, { "epoch": 8.149290561381862, "grad_norm": 0.06132050231099129, "learning_rate": 0.01, "loss": 1.9558, "step": 79260 }, { "epoch": 8.149599012954965, "grad_norm": 0.051271144300699234, "learning_rate": 0.01, "loss": 1.9448, "step": 79263 }, { "epoch": 8.149907464528068, "grad_norm": 0.07151178270578384, "learning_rate": 0.01, "loss": 1.9231, "step": 79266 }, { "epoch": 8.150215916101173, "grad_norm": 0.051999252289533615, "learning_rate": 0.01, "loss": 1.9626, "step": 79269 }, { "epoch": 8.150524367674276, "grad_norm": 0.05515766888856888, "learning_rate": 0.01, "loss": 1.9619, "step": 79272 }, { "epoch": 8.150832819247379, "grad_norm": 0.07011020928621292, "learning_rate": 0.01, "loss": 1.95, "step": 79275 }, { "epoch": 8.151141270820482, "grad_norm": 0.049128126353025436, "learning_rate": 0.01, "loss": 1.9676, "step": 79278 }, { "epoch": 8.151449722393584, "grad_norm": 0.04374023899435997, "learning_rate": 0.01, "loss": 1.9626, "step": 79281 }, { "epoch": 8.151758173966687, "grad_norm": 0.10206493735313416, "learning_rate": 0.01, "loss": 1.9443, "step": 79284 }, { "epoch": 8.15206662553979, "grad_norm": 0.06682003289461136, "learning_rate": 0.01, "loss": 1.953, "step": 79287 }, { "epoch": 8.152375077112893, "grad_norm": 0.08957073092460632, "learning_rate": 0.01, "loss": 1.9597, "step": 79290 }, { "epoch": 8.152683528685996, "grad_norm": 0.08002035319805145, "learning_rate": 0.01, "loss": 1.9472, "step": 79293 }, { "epoch": 8.152991980259099, "grad_norm": 0.09390459209680557, "learning_rate": 0.01, "loss": 1.9357, "step": 79296 }, { "epoch": 8.153300431832202, "grad_norm": 0.06374812871217728, "learning_rate": 0.01, "loss": 1.957, "step": 79299 }, { "epoch": 8.153608883405305, "grad_norm": 0.07658891379833221, "learning_rate": 0.01, "loss": 1.9467, "step": 79302 }, { "epoch": 8.153917334978408, "grad_norm": 0.11377871036529541, "learning_rate": 0.01, "loss": 1.957, "step": 79305 }, { "epoch": 8.154225786551512, "grad_norm": 0.049400169402360916, "learning_rate": 0.01, "loss": 1.9549, "step": 79308 }, { "epoch": 8.154534238124615, "grad_norm": 0.08506350219249725, "learning_rate": 0.01, "loss": 1.9663, "step": 79311 }, { "epoch": 8.154842689697718, "grad_norm": 0.06659626960754395, "learning_rate": 0.01, "loss": 1.9829, "step": 79314 }, { "epoch": 8.15515114127082, "grad_norm": 0.037887092679739, "learning_rate": 0.01, "loss": 1.9601, "step": 79317 }, { "epoch": 8.155459592843924, "grad_norm": 0.05094122514128685, "learning_rate": 0.01, "loss": 1.9584, "step": 79320 }, { "epoch": 8.155768044417027, "grad_norm": 0.05743343010544777, "learning_rate": 0.01, "loss": 1.9495, "step": 79323 }, { "epoch": 8.15607649599013, "grad_norm": 0.07642214000225067, "learning_rate": 0.01, "loss": 1.9655, "step": 79326 }, { "epoch": 8.156384947563232, "grad_norm": 0.07716453075408936, "learning_rate": 0.01, "loss": 1.9688, "step": 79329 }, { "epoch": 8.156693399136335, "grad_norm": 0.04788043722510338, "learning_rate": 0.01, "loss": 1.9688, "step": 79332 }, { "epoch": 8.157001850709438, "grad_norm": 0.037050921469926834, "learning_rate": 0.01, "loss": 1.9487, "step": 79335 }, { "epoch": 8.157310302282541, "grad_norm": 0.062088605016469955, "learning_rate": 0.01, "loss": 1.9591, "step": 79338 }, { "epoch": 8.157618753855644, "grad_norm": 0.042694903910160065, "learning_rate": 0.01, "loss": 1.9498, "step": 79341 }, { "epoch": 8.157927205428747, "grad_norm": 0.03817545622587204, "learning_rate": 0.01, "loss": 1.931, "step": 79344 }, { "epoch": 8.158235657001851, "grad_norm": 0.04757656529545784, "learning_rate": 0.01, "loss": 1.9508, "step": 79347 }, { "epoch": 8.158544108574954, "grad_norm": 0.07190608233213425, "learning_rate": 0.01, "loss": 1.9641, "step": 79350 }, { "epoch": 8.158852560148057, "grad_norm": 0.12211509048938751, "learning_rate": 0.01, "loss": 1.9413, "step": 79353 }, { "epoch": 8.15916101172116, "grad_norm": 0.05178214982151985, "learning_rate": 0.01, "loss": 1.9587, "step": 79356 }, { "epoch": 8.159469463294263, "grad_norm": 0.04701949656009674, "learning_rate": 0.01, "loss": 1.9634, "step": 79359 }, { "epoch": 8.159777914867366, "grad_norm": 0.10068555921316147, "learning_rate": 0.01, "loss": 1.9368, "step": 79362 }, { "epoch": 8.160086366440469, "grad_norm": 0.1051318496465683, "learning_rate": 0.01, "loss": 1.9373, "step": 79365 }, { "epoch": 8.160394818013572, "grad_norm": 0.07793916016817093, "learning_rate": 0.01, "loss": 1.9684, "step": 79368 }, { "epoch": 8.160703269586675, "grad_norm": 0.061303503811359406, "learning_rate": 0.01, "loss": 1.9663, "step": 79371 }, { "epoch": 8.161011721159777, "grad_norm": 0.05692371726036072, "learning_rate": 0.01, "loss": 1.9497, "step": 79374 }, { "epoch": 8.16132017273288, "grad_norm": 0.061426080763339996, "learning_rate": 0.01, "loss": 1.9697, "step": 79377 }, { "epoch": 8.161628624305983, "grad_norm": 0.04750361666083336, "learning_rate": 0.01, "loss": 1.9472, "step": 79380 }, { "epoch": 8.161937075879086, "grad_norm": 0.03101014718413353, "learning_rate": 0.01, "loss": 1.9602, "step": 79383 }, { "epoch": 8.16224552745219, "grad_norm": 0.043352603912353516, "learning_rate": 0.01, "loss": 1.9358, "step": 79386 }, { "epoch": 8.162553979025294, "grad_norm": 0.11773629486560822, "learning_rate": 0.01, "loss": 1.9828, "step": 79389 }, { "epoch": 8.162862430598397, "grad_norm": 0.08607412129640579, "learning_rate": 0.01, "loss": 1.9562, "step": 79392 }, { "epoch": 8.1631708821715, "grad_norm": 0.06435645371675491, "learning_rate": 0.01, "loss": 1.9633, "step": 79395 }, { "epoch": 8.163479333744602, "grad_norm": 0.035051826387643814, "learning_rate": 0.01, "loss": 1.9489, "step": 79398 }, { "epoch": 8.163787785317705, "grad_norm": 0.0367707759141922, "learning_rate": 0.01, "loss": 1.9443, "step": 79401 }, { "epoch": 8.164096236890808, "grad_norm": 0.04549092799425125, "learning_rate": 0.01, "loss": 1.9329, "step": 79404 }, { "epoch": 8.164404688463911, "grad_norm": 0.050185445696115494, "learning_rate": 0.01, "loss": 1.9321, "step": 79407 }, { "epoch": 8.164713140037014, "grad_norm": 0.053017307072877884, "learning_rate": 0.01, "loss": 1.9702, "step": 79410 }, { "epoch": 8.165021591610117, "grad_norm": 0.03889346867799759, "learning_rate": 0.01, "loss": 1.9673, "step": 79413 }, { "epoch": 8.16533004318322, "grad_norm": 0.09231189638376236, "learning_rate": 0.01, "loss": 1.9606, "step": 79416 }, { "epoch": 8.165638494756323, "grad_norm": 0.06801720708608627, "learning_rate": 0.01, "loss": 1.9463, "step": 79419 }, { "epoch": 8.165946946329425, "grad_norm": 0.08188890665769577, "learning_rate": 0.01, "loss": 1.9519, "step": 79422 }, { "epoch": 8.16625539790253, "grad_norm": 0.07723134011030197, "learning_rate": 0.01, "loss": 1.9682, "step": 79425 }, { "epoch": 8.166563849475633, "grad_norm": 0.05977725237607956, "learning_rate": 0.01, "loss": 1.9402, "step": 79428 }, { "epoch": 8.166872301048736, "grad_norm": 0.04500715434551239, "learning_rate": 0.01, "loss": 1.9526, "step": 79431 }, { "epoch": 8.167180752621839, "grad_norm": 0.044927868992090225, "learning_rate": 0.01, "loss": 1.9349, "step": 79434 }, { "epoch": 8.167489204194942, "grad_norm": 0.05496992543339729, "learning_rate": 0.01, "loss": 1.943, "step": 79437 }, { "epoch": 8.167797655768045, "grad_norm": 0.0723772794008255, "learning_rate": 0.01, "loss": 1.9677, "step": 79440 }, { "epoch": 8.168106107341147, "grad_norm": 0.0771140456199646, "learning_rate": 0.01, "loss": 1.9652, "step": 79443 }, { "epoch": 8.16841455891425, "grad_norm": 0.041013602167367935, "learning_rate": 0.01, "loss": 1.9665, "step": 79446 }, { "epoch": 8.168723010487353, "grad_norm": 0.035580825060606, "learning_rate": 0.01, "loss": 1.9418, "step": 79449 }, { "epoch": 8.169031462060456, "grad_norm": 0.06367741525173187, "learning_rate": 0.01, "loss": 1.9614, "step": 79452 }, { "epoch": 8.169339913633559, "grad_norm": 0.08021467179059982, "learning_rate": 0.01, "loss": 1.9561, "step": 79455 }, { "epoch": 8.169648365206662, "grad_norm": 0.08360808342695236, "learning_rate": 0.01, "loss": 1.9569, "step": 79458 }, { "epoch": 8.169956816779765, "grad_norm": 0.0339963473379612, "learning_rate": 0.01, "loss": 1.9374, "step": 79461 }, { "epoch": 8.17026526835287, "grad_norm": 0.07280755043029785, "learning_rate": 0.01, "loss": 1.9556, "step": 79464 }, { "epoch": 8.170573719925972, "grad_norm": 0.05875898897647858, "learning_rate": 0.01, "loss": 1.9276, "step": 79467 }, { "epoch": 8.170882171499075, "grad_norm": 0.05665907263755798, "learning_rate": 0.01, "loss": 1.9562, "step": 79470 }, { "epoch": 8.171190623072178, "grad_norm": 0.06836891919374466, "learning_rate": 0.01, "loss": 1.9585, "step": 79473 }, { "epoch": 8.171499074645281, "grad_norm": 0.05284962058067322, "learning_rate": 0.01, "loss": 1.9534, "step": 79476 }, { "epoch": 8.171807526218384, "grad_norm": 0.05243814364075661, "learning_rate": 0.01, "loss": 1.9525, "step": 79479 }, { "epoch": 8.172115977791487, "grad_norm": 0.11004475504159927, "learning_rate": 0.01, "loss": 1.9643, "step": 79482 }, { "epoch": 8.17242442936459, "grad_norm": 0.0900677889585495, "learning_rate": 0.01, "loss": 1.9611, "step": 79485 }, { "epoch": 8.172732880937692, "grad_norm": 0.07300006598234177, "learning_rate": 0.01, "loss": 1.9443, "step": 79488 }, { "epoch": 8.173041332510795, "grad_norm": 0.04999767243862152, "learning_rate": 0.01, "loss": 1.9766, "step": 79491 }, { "epoch": 8.173349784083898, "grad_norm": 0.06433633714914322, "learning_rate": 0.01, "loss": 1.9612, "step": 79494 }, { "epoch": 8.173658235657001, "grad_norm": 0.08774617314338684, "learning_rate": 0.01, "loss": 1.9516, "step": 79497 }, { "epoch": 8.173966687230104, "grad_norm": 0.06742536276578903, "learning_rate": 0.01, "loss": 1.9553, "step": 79500 }, { "epoch": 8.174275138803209, "grad_norm": 0.0422503799200058, "learning_rate": 0.01, "loss": 1.9585, "step": 79503 }, { "epoch": 8.174583590376312, "grad_norm": 0.15565431118011475, "learning_rate": 0.01, "loss": 1.9684, "step": 79506 }, { "epoch": 8.174892041949414, "grad_norm": 0.12713783979415894, "learning_rate": 0.01, "loss": 1.9595, "step": 79509 }, { "epoch": 8.175200493522517, "grad_norm": 0.08080834895372391, "learning_rate": 0.01, "loss": 1.9387, "step": 79512 }, { "epoch": 8.17550894509562, "grad_norm": 0.0670383870601654, "learning_rate": 0.01, "loss": 1.9374, "step": 79515 }, { "epoch": 8.175817396668723, "grad_norm": 0.04987994581460953, "learning_rate": 0.01, "loss": 1.9561, "step": 79518 }, { "epoch": 8.176125848241826, "grad_norm": 0.06311977654695511, "learning_rate": 0.01, "loss": 1.9345, "step": 79521 }, { "epoch": 8.176434299814929, "grad_norm": 0.05733686313033104, "learning_rate": 0.01, "loss": 1.9776, "step": 79524 }, { "epoch": 8.176742751388032, "grad_norm": 0.03165985271334648, "learning_rate": 0.01, "loss": 1.9477, "step": 79527 }, { "epoch": 8.177051202961135, "grad_norm": 0.1158774122595787, "learning_rate": 0.01, "loss": 1.9539, "step": 79530 }, { "epoch": 8.177359654534238, "grad_norm": 0.06680413335561752, "learning_rate": 0.01, "loss": 1.9479, "step": 79533 }, { "epoch": 8.17766810610734, "grad_norm": 0.08957581222057343, "learning_rate": 0.01, "loss": 1.9199, "step": 79536 }, { "epoch": 8.177976557680445, "grad_norm": 0.04930703341960907, "learning_rate": 0.01, "loss": 1.959, "step": 79539 }, { "epoch": 8.178285009253548, "grad_norm": 0.040650609880685806, "learning_rate": 0.01, "loss": 1.9632, "step": 79542 }, { "epoch": 8.17859346082665, "grad_norm": 0.04556789621710777, "learning_rate": 0.01, "loss": 1.9419, "step": 79545 }, { "epoch": 8.178901912399754, "grad_norm": 0.12077522277832031, "learning_rate": 0.01, "loss": 1.9465, "step": 79548 }, { "epoch": 8.179210363972857, "grad_norm": 0.041626036167144775, "learning_rate": 0.01, "loss": 1.9323, "step": 79551 }, { "epoch": 8.17951881554596, "grad_norm": 0.06800248473882675, "learning_rate": 0.01, "loss": 1.9341, "step": 79554 }, { "epoch": 8.179827267119062, "grad_norm": 0.07558157294988632, "learning_rate": 0.01, "loss": 1.9528, "step": 79557 }, { "epoch": 8.180135718692165, "grad_norm": 0.0845264345407486, "learning_rate": 0.01, "loss": 1.9513, "step": 79560 }, { "epoch": 8.180444170265268, "grad_norm": 0.07842035591602325, "learning_rate": 0.01, "loss": 1.9394, "step": 79563 }, { "epoch": 8.180752621838371, "grad_norm": 0.07811528444290161, "learning_rate": 0.01, "loss": 1.9436, "step": 79566 }, { "epoch": 8.181061073411474, "grad_norm": 0.07147359102964401, "learning_rate": 0.01, "loss": 1.9501, "step": 79569 }, { "epoch": 8.181369524984577, "grad_norm": 0.08217006921768188, "learning_rate": 0.01, "loss": 1.9714, "step": 79572 }, { "epoch": 8.18167797655768, "grad_norm": 0.06407132744789124, "learning_rate": 0.01, "loss": 1.9752, "step": 79575 }, { "epoch": 8.181986428130784, "grad_norm": 0.07353561371564865, "learning_rate": 0.01, "loss": 1.9604, "step": 79578 }, { "epoch": 8.182294879703887, "grad_norm": 0.11215024441480637, "learning_rate": 0.01, "loss": 1.968, "step": 79581 }, { "epoch": 8.18260333127699, "grad_norm": 0.042454659938812256, "learning_rate": 0.01, "loss": 1.963, "step": 79584 }, { "epoch": 8.182911782850093, "grad_norm": 0.06562569737434387, "learning_rate": 0.01, "loss": 1.9455, "step": 79587 }, { "epoch": 8.183220234423196, "grad_norm": 0.05847795680165291, "learning_rate": 0.01, "loss": 1.9524, "step": 79590 }, { "epoch": 8.183528685996299, "grad_norm": 0.03575105592608452, "learning_rate": 0.01, "loss": 1.9654, "step": 79593 }, { "epoch": 8.183837137569402, "grad_norm": 0.04204864054918289, "learning_rate": 0.01, "loss": 1.9704, "step": 79596 }, { "epoch": 8.184145589142505, "grad_norm": 0.13108733296394348, "learning_rate": 0.01, "loss": 1.9498, "step": 79599 }, { "epoch": 8.184454040715607, "grad_norm": 0.05569970980286598, "learning_rate": 0.01, "loss": 1.9627, "step": 79602 }, { "epoch": 8.18476249228871, "grad_norm": 0.07396795600652695, "learning_rate": 0.01, "loss": 1.9154, "step": 79605 }, { "epoch": 8.185070943861813, "grad_norm": 0.05299641191959381, "learning_rate": 0.01, "loss": 1.9732, "step": 79608 }, { "epoch": 8.185379395434916, "grad_norm": 0.04353434219956398, "learning_rate": 0.01, "loss": 1.9635, "step": 79611 }, { "epoch": 8.185687847008019, "grad_norm": 0.032626885920763016, "learning_rate": 0.01, "loss": 1.9572, "step": 79614 }, { "epoch": 8.185996298581124, "grad_norm": 0.06502965092658997, "learning_rate": 0.01, "loss": 1.9676, "step": 79617 }, { "epoch": 8.186304750154227, "grad_norm": 0.15308474004268646, "learning_rate": 0.01, "loss": 1.9698, "step": 79620 }, { "epoch": 8.18661320172733, "grad_norm": 0.13402137160301208, "learning_rate": 0.01, "loss": 1.9262, "step": 79623 }, { "epoch": 8.186921653300432, "grad_norm": 0.07767334580421448, "learning_rate": 0.01, "loss": 1.9356, "step": 79626 }, { "epoch": 8.187230104873535, "grad_norm": 0.0863209217786789, "learning_rate": 0.01, "loss": 1.9557, "step": 79629 }, { "epoch": 8.187538556446638, "grad_norm": 0.07084184139966965, "learning_rate": 0.01, "loss": 1.9582, "step": 79632 }, { "epoch": 8.187847008019741, "grad_norm": 0.06534894555807114, "learning_rate": 0.01, "loss": 1.9595, "step": 79635 }, { "epoch": 8.188155459592844, "grad_norm": 0.03883112967014313, "learning_rate": 0.01, "loss": 1.9341, "step": 79638 }, { "epoch": 8.188463911165947, "grad_norm": 0.08439522236585617, "learning_rate": 0.01, "loss": 1.9471, "step": 79641 }, { "epoch": 8.18877236273905, "grad_norm": 0.07321714609861374, "learning_rate": 0.01, "loss": 1.9566, "step": 79644 }, { "epoch": 8.189080814312153, "grad_norm": 0.04626638814806938, "learning_rate": 0.01, "loss": 1.9676, "step": 79647 }, { "epoch": 8.189389265885255, "grad_norm": 0.10366689413785934, "learning_rate": 0.01, "loss": 1.9352, "step": 79650 }, { "epoch": 8.189697717458358, "grad_norm": 0.08205170929431915, "learning_rate": 0.01, "loss": 1.9412, "step": 79653 }, { "epoch": 8.190006169031463, "grad_norm": 0.06450674682855606, "learning_rate": 0.01, "loss": 1.9343, "step": 79656 }, { "epoch": 8.190314620604566, "grad_norm": 0.05880254507064819, "learning_rate": 0.01, "loss": 1.9506, "step": 79659 }, { "epoch": 8.190623072177669, "grad_norm": 0.07828585803508759, "learning_rate": 0.01, "loss": 1.9564, "step": 79662 }, { "epoch": 8.190931523750772, "grad_norm": 0.06465885043144226, "learning_rate": 0.01, "loss": 1.9553, "step": 79665 }, { "epoch": 8.191239975323874, "grad_norm": 0.09105885773897171, "learning_rate": 0.01, "loss": 1.9525, "step": 79668 }, { "epoch": 8.191548426896977, "grad_norm": 0.0810224637389183, "learning_rate": 0.01, "loss": 1.9433, "step": 79671 }, { "epoch": 8.19185687847008, "grad_norm": 0.04941980540752411, "learning_rate": 0.01, "loss": 1.9573, "step": 79674 }, { "epoch": 8.192165330043183, "grad_norm": 0.06221986562013626, "learning_rate": 0.01, "loss": 1.9667, "step": 79677 }, { "epoch": 8.192473781616286, "grad_norm": 0.0800907164812088, "learning_rate": 0.01, "loss": 1.959, "step": 79680 }, { "epoch": 8.192782233189389, "grad_norm": 0.06656412035226822, "learning_rate": 0.01, "loss": 1.949, "step": 79683 }, { "epoch": 8.193090684762492, "grad_norm": 0.05629335716366768, "learning_rate": 0.01, "loss": 1.9657, "step": 79686 }, { "epoch": 8.193399136335595, "grad_norm": 0.1350521743297577, "learning_rate": 0.01, "loss": 1.9468, "step": 79689 }, { "epoch": 8.193707587908698, "grad_norm": 0.05548480898141861, "learning_rate": 0.01, "loss": 1.9389, "step": 79692 }, { "epoch": 8.194016039481802, "grad_norm": 0.038812119513750076, "learning_rate": 0.01, "loss": 1.9254, "step": 79695 }, { "epoch": 8.194324491054905, "grad_norm": 0.09969807416200638, "learning_rate": 0.01, "loss": 1.9402, "step": 79698 }, { "epoch": 8.194632942628008, "grad_norm": 0.10124362260103226, "learning_rate": 0.01, "loss": 1.9798, "step": 79701 }, { "epoch": 8.194941394201111, "grad_norm": 0.048608239740133286, "learning_rate": 0.01, "loss": 1.9513, "step": 79704 }, { "epoch": 8.195249845774214, "grad_norm": 0.03750385716557503, "learning_rate": 0.01, "loss": 1.9311, "step": 79707 }, { "epoch": 8.195558297347317, "grad_norm": 0.07153517007827759, "learning_rate": 0.01, "loss": 1.9723, "step": 79710 }, { "epoch": 8.19586674892042, "grad_norm": 0.07376372069120407, "learning_rate": 0.01, "loss": 1.937, "step": 79713 }, { "epoch": 8.196175200493522, "grad_norm": 0.08296847343444824, "learning_rate": 0.01, "loss": 1.9386, "step": 79716 }, { "epoch": 8.196483652066625, "grad_norm": 0.12598127126693726, "learning_rate": 0.01, "loss": 1.9658, "step": 79719 }, { "epoch": 8.196792103639728, "grad_norm": 0.06051617115736008, "learning_rate": 0.01, "loss": 1.9438, "step": 79722 }, { "epoch": 8.197100555212831, "grad_norm": 0.05080085247755051, "learning_rate": 0.01, "loss": 1.9383, "step": 79725 }, { "epoch": 8.197409006785934, "grad_norm": 0.04279420152306557, "learning_rate": 0.01, "loss": 1.9579, "step": 79728 }, { "epoch": 8.197717458359037, "grad_norm": 0.03769794479012489, "learning_rate": 0.01, "loss": 1.9746, "step": 79731 }, { "epoch": 8.198025909932142, "grad_norm": 0.03623786196112633, "learning_rate": 0.01, "loss": 1.9501, "step": 79734 }, { "epoch": 8.198334361505244, "grad_norm": 0.059139419347047806, "learning_rate": 0.01, "loss": 1.9878, "step": 79737 }, { "epoch": 8.198642813078347, "grad_norm": 0.051768649369478226, "learning_rate": 0.01, "loss": 1.9367, "step": 79740 }, { "epoch": 8.19895126465145, "grad_norm": 0.054058823734521866, "learning_rate": 0.01, "loss": 1.9598, "step": 79743 }, { "epoch": 8.199259716224553, "grad_norm": 0.06856348365545273, "learning_rate": 0.01, "loss": 1.9449, "step": 79746 }, { "epoch": 8.199568167797656, "grad_norm": 0.0483131967484951, "learning_rate": 0.01, "loss": 1.9377, "step": 79749 }, { "epoch": 8.199876619370759, "grad_norm": 0.0742148756980896, "learning_rate": 0.01, "loss": 1.94, "step": 79752 }, { "epoch": 8.200185070943862, "grad_norm": 0.05363794043660164, "learning_rate": 0.01, "loss": 1.9672, "step": 79755 }, { "epoch": 8.200493522516965, "grad_norm": 0.07250255346298218, "learning_rate": 0.01, "loss": 1.9539, "step": 79758 }, { "epoch": 8.200801974090068, "grad_norm": 0.10672403872013092, "learning_rate": 0.01, "loss": 1.9641, "step": 79761 }, { "epoch": 8.20111042566317, "grad_norm": 0.08459288626909256, "learning_rate": 0.01, "loss": 1.9405, "step": 79764 }, { "epoch": 8.201418877236273, "grad_norm": 0.06241921707987785, "learning_rate": 0.01, "loss": 1.9459, "step": 79767 }, { "epoch": 8.201727328809376, "grad_norm": 0.047745924443006516, "learning_rate": 0.01, "loss": 1.9878, "step": 79770 }, { "epoch": 8.20203578038248, "grad_norm": 0.11685614287853241, "learning_rate": 0.01, "loss": 1.9719, "step": 79773 }, { "epoch": 8.202344231955584, "grad_norm": 0.03775307908654213, "learning_rate": 0.01, "loss": 1.9446, "step": 79776 }, { "epoch": 8.202652683528687, "grad_norm": 0.04868243262171745, "learning_rate": 0.01, "loss": 1.9598, "step": 79779 }, { "epoch": 8.20296113510179, "grad_norm": 0.055632028728723526, "learning_rate": 0.01, "loss": 1.9499, "step": 79782 }, { "epoch": 8.203269586674892, "grad_norm": 0.05138279125094414, "learning_rate": 0.01, "loss": 1.9535, "step": 79785 }, { "epoch": 8.203578038247995, "grad_norm": 0.032587356865406036, "learning_rate": 0.01, "loss": 1.9451, "step": 79788 }, { "epoch": 8.203886489821098, "grad_norm": 0.08916875720024109, "learning_rate": 0.01, "loss": 1.9777, "step": 79791 }, { "epoch": 8.204194941394201, "grad_norm": 0.08575033396482468, "learning_rate": 0.01, "loss": 1.9793, "step": 79794 }, { "epoch": 8.204503392967304, "grad_norm": 0.06784272938966751, "learning_rate": 0.01, "loss": 1.9638, "step": 79797 }, { "epoch": 8.204811844540407, "grad_norm": 0.10347246378660202, "learning_rate": 0.01, "loss": 1.9312, "step": 79800 }, { "epoch": 8.20512029611351, "grad_norm": 0.04444975405931473, "learning_rate": 0.01, "loss": 1.9439, "step": 79803 }, { "epoch": 8.205428747686613, "grad_norm": 0.10432004928588867, "learning_rate": 0.01, "loss": 1.9788, "step": 79806 }, { "epoch": 8.205737199259715, "grad_norm": 0.1217341423034668, "learning_rate": 0.01, "loss": 1.951, "step": 79809 }, { "epoch": 8.20604565083282, "grad_norm": 0.09438272565603256, "learning_rate": 0.01, "loss": 1.9612, "step": 79812 }, { "epoch": 8.206354102405923, "grad_norm": 0.08129420131444931, "learning_rate": 0.01, "loss": 1.962, "step": 79815 }, { "epoch": 8.206662553979026, "grad_norm": 0.09659760445356369, "learning_rate": 0.01, "loss": 1.9487, "step": 79818 }, { "epoch": 8.206971005552129, "grad_norm": 0.06486459076404572, "learning_rate": 0.01, "loss": 1.9523, "step": 79821 }, { "epoch": 8.207279457125232, "grad_norm": 0.057728540152311325, "learning_rate": 0.01, "loss": 1.9572, "step": 79824 }, { "epoch": 8.207587908698335, "grad_norm": 0.08968479186296463, "learning_rate": 0.01, "loss": 1.9447, "step": 79827 }, { "epoch": 8.207896360271437, "grad_norm": 0.05536945164203644, "learning_rate": 0.01, "loss": 1.9478, "step": 79830 }, { "epoch": 8.20820481184454, "grad_norm": 0.03714819252490997, "learning_rate": 0.01, "loss": 1.9685, "step": 79833 }, { "epoch": 8.208513263417643, "grad_norm": 0.04826650768518448, "learning_rate": 0.01, "loss": 1.921, "step": 79836 }, { "epoch": 8.208821714990746, "grad_norm": 0.10562440007925034, "learning_rate": 0.01, "loss": 1.9417, "step": 79839 }, { "epoch": 8.209130166563849, "grad_norm": 0.12888909876346588, "learning_rate": 0.01, "loss": 1.945, "step": 79842 }, { "epoch": 8.209438618136952, "grad_norm": 0.04846480116248131, "learning_rate": 0.01, "loss": 1.959, "step": 79845 }, { "epoch": 8.209747069710055, "grad_norm": 0.041596248745918274, "learning_rate": 0.01, "loss": 1.9798, "step": 79848 }, { "epoch": 8.21005552128316, "grad_norm": 0.05451950803399086, "learning_rate": 0.01, "loss": 1.9689, "step": 79851 }, { "epoch": 8.210363972856262, "grad_norm": 0.046558499336242676, "learning_rate": 0.01, "loss": 1.9303, "step": 79854 }, { "epoch": 8.210672424429365, "grad_norm": 0.06177009269595146, "learning_rate": 0.01, "loss": 1.9714, "step": 79857 }, { "epoch": 8.210980876002468, "grad_norm": 0.09283863753080368, "learning_rate": 0.01, "loss": 1.9402, "step": 79860 }, { "epoch": 8.211289327575571, "grad_norm": 0.05722719803452492, "learning_rate": 0.01, "loss": 1.9588, "step": 79863 }, { "epoch": 8.211597779148674, "grad_norm": 0.08050020784139633, "learning_rate": 0.01, "loss": 1.9734, "step": 79866 }, { "epoch": 8.211906230721777, "grad_norm": 0.0464584156870842, "learning_rate": 0.01, "loss": 1.9654, "step": 79869 }, { "epoch": 8.21221468229488, "grad_norm": 0.03943070396780968, "learning_rate": 0.01, "loss": 1.9407, "step": 79872 }, { "epoch": 8.212523133867982, "grad_norm": 0.1355932652950287, "learning_rate": 0.01, "loss": 1.9515, "step": 79875 }, { "epoch": 8.212831585441085, "grad_norm": 0.060039740055799484, "learning_rate": 0.01, "loss": 1.9347, "step": 79878 }, { "epoch": 8.213140037014188, "grad_norm": 0.05428614467382431, "learning_rate": 0.01, "loss": 1.9436, "step": 79881 }, { "epoch": 8.213448488587291, "grad_norm": 0.04141784459352493, "learning_rate": 0.01, "loss": 1.9585, "step": 79884 }, { "epoch": 8.213756940160394, "grad_norm": 0.044172655791044235, "learning_rate": 0.01, "loss": 1.9612, "step": 79887 }, { "epoch": 8.214065391733499, "grad_norm": 0.10583905875682831, "learning_rate": 0.01, "loss": 1.9515, "step": 79890 }, { "epoch": 8.214373843306602, "grad_norm": 0.12462706863880157, "learning_rate": 0.01, "loss": 1.9581, "step": 79893 }, { "epoch": 8.214682294879704, "grad_norm": 0.14810222387313843, "learning_rate": 0.01, "loss": 1.9733, "step": 79896 }, { "epoch": 8.214990746452807, "grad_norm": 0.07370836287736893, "learning_rate": 0.01, "loss": 1.9743, "step": 79899 }, { "epoch": 8.21529919802591, "grad_norm": 0.047002315521240234, "learning_rate": 0.01, "loss": 1.9451, "step": 79902 }, { "epoch": 8.215607649599013, "grad_norm": 0.03757517784833908, "learning_rate": 0.01, "loss": 1.953, "step": 79905 }, { "epoch": 8.215916101172116, "grad_norm": 0.03486863523721695, "learning_rate": 0.01, "loss": 1.9572, "step": 79908 }, { "epoch": 8.216224552745219, "grad_norm": 0.045810382813215256, "learning_rate": 0.01, "loss": 1.95, "step": 79911 }, { "epoch": 8.216533004318322, "grad_norm": 0.03867745399475098, "learning_rate": 0.01, "loss": 1.9556, "step": 79914 }, { "epoch": 8.216841455891425, "grad_norm": 0.04912863299250603, "learning_rate": 0.01, "loss": 1.9491, "step": 79917 }, { "epoch": 8.217149907464528, "grad_norm": 0.04494732245802879, "learning_rate": 0.01, "loss": 1.9608, "step": 79920 }, { "epoch": 8.21745835903763, "grad_norm": 0.042188648134469986, "learning_rate": 0.01, "loss": 1.9527, "step": 79923 }, { "epoch": 8.217766810610733, "grad_norm": 0.04156626760959625, "learning_rate": 0.01, "loss": 1.9638, "step": 79926 }, { "epoch": 8.218075262183838, "grad_norm": 0.08161705732345581, "learning_rate": 0.01, "loss": 1.9321, "step": 79929 }, { "epoch": 8.21838371375694, "grad_norm": 0.0440400093793869, "learning_rate": 0.01, "loss": 1.9384, "step": 79932 }, { "epoch": 8.218692165330044, "grad_norm": 0.04624645411968231, "learning_rate": 0.01, "loss": 1.9547, "step": 79935 }, { "epoch": 8.219000616903147, "grad_norm": 0.05811585113406181, "learning_rate": 0.01, "loss": 1.9777, "step": 79938 }, { "epoch": 8.21930906847625, "grad_norm": 0.11906293034553528, "learning_rate": 0.01, "loss": 1.9471, "step": 79941 }, { "epoch": 8.219617520049352, "grad_norm": 0.13277171552181244, "learning_rate": 0.01, "loss": 1.9589, "step": 79944 }, { "epoch": 8.219925971622455, "grad_norm": 0.04586183652281761, "learning_rate": 0.01, "loss": 1.9533, "step": 79947 }, { "epoch": 8.220234423195558, "grad_norm": 0.03742174431681633, "learning_rate": 0.01, "loss": 1.9767, "step": 79950 }, { "epoch": 8.220542874768661, "grad_norm": 0.05153629928827286, "learning_rate": 0.01, "loss": 1.9479, "step": 79953 }, { "epoch": 8.220851326341764, "grad_norm": 0.07394517213106155, "learning_rate": 0.01, "loss": 1.9428, "step": 79956 }, { "epoch": 8.221159777914867, "grad_norm": 0.05437902361154556, "learning_rate": 0.01, "loss": 1.9497, "step": 79959 }, { "epoch": 8.22146822948797, "grad_norm": 0.0376947782933712, "learning_rate": 0.01, "loss": 1.9395, "step": 79962 }, { "epoch": 8.221776681061073, "grad_norm": 0.06275657564401627, "learning_rate": 0.01, "loss": 1.9886, "step": 79965 }, { "epoch": 8.222085132634177, "grad_norm": 0.04809385538101196, "learning_rate": 0.01, "loss": 1.9399, "step": 79968 }, { "epoch": 8.22239358420728, "grad_norm": 0.04617860168218613, "learning_rate": 0.01, "loss": 1.9467, "step": 79971 }, { "epoch": 8.222702035780383, "grad_norm": 0.040207915008068085, "learning_rate": 0.01, "loss": 1.9405, "step": 79974 }, { "epoch": 8.223010487353486, "grad_norm": 0.047990135848522186, "learning_rate": 0.01, "loss": 1.9472, "step": 79977 }, { "epoch": 8.223318938926589, "grad_norm": 0.12925958633422852, "learning_rate": 0.01, "loss": 1.954, "step": 79980 }, { "epoch": 8.223627390499692, "grad_norm": 0.0795065239071846, "learning_rate": 0.01, "loss": 1.9577, "step": 79983 }, { "epoch": 8.223935842072795, "grad_norm": 0.0691371038556099, "learning_rate": 0.01, "loss": 1.9404, "step": 79986 }, { "epoch": 8.224244293645897, "grad_norm": 0.05801672115921974, "learning_rate": 0.01, "loss": 1.9408, "step": 79989 }, { "epoch": 8.224552745219, "grad_norm": 0.03973524644970894, "learning_rate": 0.01, "loss": 1.929, "step": 79992 }, { "epoch": 8.224861196792103, "grad_norm": 0.03368094190955162, "learning_rate": 0.01, "loss": 1.9398, "step": 79995 }, { "epoch": 8.225169648365206, "grad_norm": 0.03496599942445755, "learning_rate": 0.01, "loss": 1.9539, "step": 79998 }, { "epoch": 8.225478099938309, "grad_norm": 0.04070889577269554, "learning_rate": 0.01, "loss": 1.9782, "step": 80001 }, { "epoch": 8.225786551511412, "grad_norm": 0.05320926755666733, "learning_rate": 0.01, "loss": 1.9517, "step": 80004 }, { "epoch": 8.226095003084517, "grad_norm": 0.10373469442129135, "learning_rate": 0.01, "loss": 1.9661, "step": 80007 }, { "epoch": 8.22640345465762, "grad_norm": 0.12059690803289413, "learning_rate": 0.01, "loss": 1.9398, "step": 80010 }, { "epoch": 8.226711906230722, "grad_norm": 0.1084703654050827, "learning_rate": 0.01, "loss": 1.9502, "step": 80013 }, { "epoch": 8.227020357803825, "grad_norm": 0.04436837136745453, "learning_rate": 0.01, "loss": 1.9708, "step": 80016 }, { "epoch": 8.227328809376928, "grad_norm": 0.03714198246598244, "learning_rate": 0.01, "loss": 1.9463, "step": 80019 }, { "epoch": 8.227637260950031, "grad_norm": 0.03939532861113548, "learning_rate": 0.01, "loss": 1.968, "step": 80022 }, { "epoch": 8.227945712523134, "grad_norm": 0.05460543558001518, "learning_rate": 0.01, "loss": 1.9574, "step": 80025 }, { "epoch": 8.228254164096237, "grad_norm": 0.10572293400764465, "learning_rate": 0.01, "loss": 1.9597, "step": 80028 }, { "epoch": 8.22856261566934, "grad_norm": 0.052371978759765625, "learning_rate": 0.01, "loss": 1.9602, "step": 80031 }, { "epoch": 8.228871067242443, "grad_norm": 0.03213518485426903, "learning_rate": 0.01, "loss": 1.9408, "step": 80034 }, { "epoch": 8.229179518815545, "grad_norm": 0.10267605632543564, "learning_rate": 0.01, "loss": 1.9713, "step": 80037 }, { "epoch": 8.229487970388648, "grad_norm": 0.11686369776725769, "learning_rate": 0.01, "loss": 1.9665, "step": 80040 }, { "epoch": 8.229796421961751, "grad_norm": 0.07059639692306519, "learning_rate": 0.01, "loss": 1.9669, "step": 80043 }, { "epoch": 8.230104873534856, "grad_norm": 0.048195354640483856, "learning_rate": 0.01, "loss": 1.9403, "step": 80046 }, { "epoch": 8.230413325107959, "grad_norm": 0.04270948842167854, "learning_rate": 0.01, "loss": 1.9643, "step": 80049 }, { "epoch": 8.230721776681062, "grad_norm": 0.04734241962432861, "learning_rate": 0.01, "loss": 1.9506, "step": 80052 }, { "epoch": 8.231030228254165, "grad_norm": 0.05041227117180824, "learning_rate": 0.01, "loss": 1.958, "step": 80055 }, { "epoch": 8.231338679827267, "grad_norm": 0.060459088534116745, "learning_rate": 0.01, "loss": 1.9475, "step": 80058 }, { "epoch": 8.23164713140037, "grad_norm": 0.1127137765288353, "learning_rate": 0.01, "loss": 1.9468, "step": 80061 }, { "epoch": 8.231955582973473, "grad_norm": 0.11185494065284729, "learning_rate": 0.01, "loss": 1.9261, "step": 80064 }, { "epoch": 8.232264034546576, "grad_norm": 0.09597989171743393, "learning_rate": 0.01, "loss": 1.9636, "step": 80067 }, { "epoch": 8.232572486119679, "grad_norm": 0.04449654370546341, "learning_rate": 0.01, "loss": 1.9502, "step": 80070 }, { "epoch": 8.232880937692782, "grad_norm": 0.12597239017486572, "learning_rate": 0.01, "loss": 1.9488, "step": 80073 }, { "epoch": 8.233189389265885, "grad_norm": 0.045494552701711655, "learning_rate": 0.01, "loss": 1.954, "step": 80076 }, { "epoch": 8.233497840838988, "grad_norm": 0.04805656895041466, "learning_rate": 0.01, "loss": 1.9652, "step": 80079 }, { "epoch": 8.23380629241209, "grad_norm": 0.06371653079986572, "learning_rate": 0.01, "loss": 1.9497, "step": 80082 }, { "epoch": 8.234114743985195, "grad_norm": 0.10116062313318253, "learning_rate": 0.01, "loss": 1.9851, "step": 80085 }, { "epoch": 8.234423195558298, "grad_norm": 0.08505774289369583, "learning_rate": 0.01, "loss": 1.9703, "step": 80088 }, { "epoch": 8.234731647131401, "grad_norm": 0.04425555467605591, "learning_rate": 0.01, "loss": 1.9421, "step": 80091 }, { "epoch": 8.235040098704504, "grad_norm": 0.06315706670284271, "learning_rate": 0.01, "loss": 1.9769, "step": 80094 }, { "epoch": 8.235348550277607, "grad_norm": 0.05656632408499718, "learning_rate": 0.01, "loss": 1.9386, "step": 80097 }, { "epoch": 8.23565700185071, "grad_norm": 0.10018274188041687, "learning_rate": 0.01, "loss": 1.9483, "step": 80100 }, { "epoch": 8.235965453423812, "grad_norm": 0.04145848751068115, "learning_rate": 0.01, "loss": 1.9258, "step": 80103 }, { "epoch": 8.236273904996915, "grad_norm": 0.0376301109790802, "learning_rate": 0.01, "loss": 1.9473, "step": 80106 }, { "epoch": 8.236582356570018, "grad_norm": 0.08260329812765121, "learning_rate": 0.01, "loss": 1.9663, "step": 80109 }, { "epoch": 8.236890808143121, "grad_norm": 0.03497771918773651, "learning_rate": 0.01, "loss": 1.9451, "step": 80112 }, { "epoch": 8.237199259716224, "grad_norm": 0.07940883189439774, "learning_rate": 0.01, "loss": 1.9528, "step": 80115 }, { "epoch": 8.237507711289327, "grad_norm": 0.06283021718263626, "learning_rate": 0.01, "loss": 1.9587, "step": 80118 }, { "epoch": 8.23781616286243, "grad_norm": 0.09119091182947159, "learning_rate": 0.01, "loss": 1.9432, "step": 80121 }, { "epoch": 8.238124614435534, "grad_norm": 0.11459033936262131, "learning_rate": 0.01, "loss": 1.9437, "step": 80124 }, { "epoch": 8.238433066008637, "grad_norm": 0.1344277411699295, "learning_rate": 0.01, "loss": 1.9714, "step": 80127 }, { "epoch": 8.23874151758174, "grad_norm": 0.08821120858192444, "learning_rate": 0.01, "loss": 1.9537, "step": 80130 }, { "epoch": 8.239049969154843, "grad_norm": 0.04385138675570488, "learning_rate": 0.01, "loss": 1.9459, "step": 80133 }, { "epoch": 8.239358420727946, "grad_norm": 0.04020407050848007, "learning_rate": 0.01, "loss": 1.9517, "step": 80136 }, { "epoch": 8.239666872301049, "grad_norm": 0.049893226474523544, "learning_rate": 0.01, "loss": 1.9494, "step": 80139 }, { "epoch": 8.239975323874152, "grad_norm": 0.11456778645515442, "learning_rate": 0.01, "loss": 1.9481, "step": 80142 }, { "epoch": 8.240283775447255, "grad_norm": 0.04332318529486656, "learning_rate": 0.01, "loss": 1.9518, "step": 80145 }, { "epoch": 8.240592227020358, "grad_norm": 0.03990904986858368, "learning_rate": 0.01, "loss": 1.9466, "step": 80148 }, { "epoch": 8.24090067859346, "grad_norm": 0.08683262765407562, "learning_rate": 0.01, "loss": 1.9683, "step": 80151 }, { "epoch": 8.241209130166563, "grad_norm": 0.034948088228702545, "learning_rate": 0.01, "loss": 1.9745, "step": 80154 }, { "epoch": 8.241517581739666, "grad_norm": 0.03932984918355942, "learning_rate": 0.01, "loss": 1.941, "step": 80157 }, { "epoch": 8.241826033312769, "grad_norm": 0.11009229719638824, "learning_rate": 0.01, "loss": 1.9518, "step": 80160 }, { "epoch": 8.242134484885874, "grad_norm": 0.07181216031312943, "learning_rate": 0.01, "loss": 1.9628, "step": 80163 }, { "epoch": 8.242442936458977, "grad_norm": 0.03774251788854599, "learning_rate": 0.01, "loss": 1.9238, "step": 80166 }, { "epoch": 8.24275138803208, "grad_norm": 0.048288244754076004, "learning_rate": 0.01, "loss": 1.9549, "step": 80169 }, { "epoch": 8.243059839605182, "grad_norm": 0.05333415046334267, "learning_rate": 0.01, "loss": 1.9246, "step": 80172 }, { "epoch": 8.243368291178285, "grad_norm": 0.13225023448467255, "learning_rate": 0.01, "loss": 1.9806, "step": 80175 }, { "epoch": 8.243676742751388, "grad_norm": 0.09060022234916687, "learning_rate": 0.01, "loss": 1.9449, "step": 80178 }, { "epoch": 8.243985194324491, "grad_norm": 0.039559829980134964, "learning_rate": 0.01, "loss": 1.9376, "step": 80181 }, { "epoch": 8.244293645897594, "grad_norm": 0.041622404009103775, "learning_rate": 0.01, "loss": 1.9451, "step": 80184 }, { "epoch": 8.244602097470697, "grad_norm": 0.11407449096441269, "learning_rate": 0.01, "loss": 1.9483, "step": 80187 }, { "epoch": 8.2449105490438, "grad_norm": 0.053344253450632095, "learning_rate": 0.01, "loss": 1.9555, "step": 80190 }, { "epoch": 8.245219000616903, "grad_norm": 0.08898445218801498, "learning_rate": 0.01, "loss": 1.9345, "step": 80193 }, { "epoch": 8.245527452190005, "grad_norm": 0.08817441761493683, "learning_rate": 0.01, "loss": 1.9556, "step": 80196 }, { "epoch": 8.245835903763108, "grad_norm": 0.052410975098609924, "learning_rate": 0.01, "loss": 1.9672, "step": 80199 }, { "epoch": 8.246144355336213, "grad_norm": 0.04144204780459404, "learning_rate": 0.01, "loss": 1.9538, "step": 80202 }, { "epoch": 8.246452806909316, "grad_norm": 0.095216304063797, "learning_rate": 0.01, "loss": 1.9328, "step": 80205 }, { "epoch": 8.246761258482419, "grad_norm": 0.05521360784769058, "learning_rate": 0.01, "loss": 1.957, "step": 80208 }, { "epoch": 8.247069710055522, "grad_norm": 0.08642126619815826, "learning_rate": 0.01, "loss": 1.9773, "step": 80211 }, { "epoch": 8.247378161628625, "grad_norm": 0.06482682377099991, "learning_rate": 0.01, "loss": 1.9714, "step": 80214 }, { "epoch": 8.247686613201727, "grad_norm": 0.04611750319600105, "learning_rate": 0.01, "loss": 1.9346, "step": 80217 }, { "epoch": 8.24799506477483, "grad_norm": 0.04172108694911003, "learning_rate": 0.01, "loss": 1.9251, "step": 80220 }, { "epoch": 8.248303516347933, "grad_norm": 0.038534946739673615, "learning_rate": 0.01, "loss": 1.9597, "step": 80223 }, { "epoch": 8.248611967921036, "grad_norm": 0.0295123141258955, "learning_rate": 0.01, "loss": 1.9352, "step": 80226 }, { "epoch": 8.248920419494139, "grad_norm": 0.05776858329772949, "learning_rate": 0.01, "loss": 1.9369, "step": 80229 }, { "epoch": 8.249228871067242, "grad_norm": 0.21042542159557343, "learning_rate": 0.01, "loss": 1.9429, "step": 80232 }, { "epoch": 8.249537322640345, "grad_norm": 0.06884560734033585, "learning_rate": 0.01, "loss": 1.9571, "step": 80235 }, { "epoch": 8.249845774213448, "grad_norm": 0.03800230845808983, "learning_rate": 0.01, "loss": 1.9593, "step": 80238 }, { "epoch": 8.250154225786552, "grad_norm": 0.06354160606861115, "learning_rate": 0.01, "loss": 1.9605, "step": 80241 }, { "epoch": 8.250462677359655, "grad_norm": 0.0691051259636879, "learning_rate": 0.01, "loss": 1.9366, "step": 80244 }, { "epoch": 8.250771128932758, "grad_norm": 0.0348472073674202, "learning_rate": 0.01, "loss": 1.9537, "step": 80247 }, { "epoch": 8.251079580505861, "grad_norm": 0.03314311057329178, "learning_rate": 0.01, "loss": 1.9445, "step": 80250 }, { "epoch": 8.251388032078964, "grad_norm": 0.03092731535434723, "learning_rate": 0.01, "loss": 1.9788, "step": 80253 }, { "epoch": 8.251696483652067, "grad_norm": 0.10996025800704956, "learning_rate": 0.01, "loss": 1.9462, "step": 80256 }, { "epoch": 8.25200493522517, "grad_norm": 0.05737313628196716, "learning_rate": 0.01, "loss": 1.9582, "step": 80259 }, { "epoch": 8.252313386798273, "grad_norm": 0.06444061547517776, "learning_rate": 0.01, "loss": 1.9371, "step": 80262 }, { "epoch": 8.252621838371375, "grad_norm": 0.09499012678861618, "learning_rate": 0.01, "loss": 1.9636, "step": 80265 }, { "epoch": 8.252930289944478, "grad_norm": 0.10432090610265732, "learning_rate": 0.01, "loss": 1.9331, "step": 80268 }, { "epoch": 8.253238741517581, "grad_norm": 0.06488598883152008, "learning_rate": 0.01, "loss": 1.9328, "step": 80271 }, { "epoch": 8.253547193090684, "grad_norm": 0.06193705275654793, "learning_rate": 0.01, "loss": 1.9552, "step": 80274 }, { "epoch": 8.253855644663787, "grad_norm": 0.0635480210185051, "learning_rate": 0.01, "loss": 1.9563, "step": 80277 }, { "epoch": 8.254164096236892, "grad_norm": 0.04530000314116478, "learning_rate": 0.01, "loss": 1.9278, "step": 80280 }, { "epoch": 8.254472547809995, "grad_norm": 0.07301802933216095, "learning_rate": 0.01, "loss": 1.942, "step": 80283 }, { "epoch": 8.254780999383097, "grad_norm": 0.0911862924695015, "learning_rate": 0.01, "loss": 1.958, "step": 80286 }, { "epoch": 8.2550894509562, "grad_norm": 0.12006412446498871, "learning_rate": 0.01, "loss": 1.9666, "step": 80289 }, { "epoch": 8.255397902529303, "grad_norm": 0.04310046136379242, "learning_rate": 0.01, "loss": 1.9576, "step": 80292 }, { "epoch": 8.255706354102406, "grad_norm": 0.055970869958400726, "learning_rate": 0.01, "loss": 1.9416, "step": 80295 }, { "epoch": 8.256014805675509, "grad_norm": 0.08021442592144012, "learning_rate": 0.01, "loss": 1.9488, "step": 80298 }, { "epoch": 8.256323257248612, "grad_norm": 0.05120861530303955, "learning_rate": 0.01, "loss": 1.9387, "step": 80301 }, { "epoch": 8.256631708821715, "grad_norm": 0.09437873214483261, "learning_rate": 0.01, "loss": 1.9279, "step": 80304 }, { "epoch": 8.256940160394818, "grad_norm": 0.04031134024262428, "learning_rate": 0.01, "loss": 1.966, "step": 80307 }, { "epoch": 8.25724861196792, "grad_norm": 0.10186951607465744, "learning_rate": 0.01, "loss": 1.9584, "step": 80310 }, { "epoch": 8.257557063541023, "grad_norm": 0.05123843625187874, "learning_rate": 0.01, "loss": 1.9557, "step": 80313 }, { "epoch": 8.257865515114126, "grad_norm": 0.03436708450317383, "learning_rate": 0.01, "loss": 1.948, "step": 80316 }, { "epoch": 8.258173966687231, "grad_norm": 0.045604050159454346, "learning_rate": 0.01, "loss": 1.9626, "step": 80319 }, { "epoch": 8.258482418260334, "grad_norm": 0.04059969261288643, "learning_rate": 0.01, "loss": 1.9533, "step": 80322 }, { "epoch": 8.258790869833437, "grad_norm": 0.05603836104273796, "learning_rate": 0.01, "loss": 1.9327, "step": 80325 }, { "epoch": 8.25909932140654, "grad_norm": 0.041143469512462616, "learning_rate": 0.01, "loss": 1.9315, "step": 80328 }, { "epoch": 8.259407772979642, "grad_norm": 0.051349326968193054, "learning_rate": 0.01, "loss": 1.9603, "step": 80331 }, { "epoch": 8.259716224552745, "grad_norm": 0.07487104833126068, "learning_rate": 0.01, "loss": 1.9442, "step": 80334 }, { "epoch": 8.260024676125848, "grad_norm": 0.09332002699375153, "learning_rate": 0.01, "loss": 1.9566, "step": 80337 }, { "epoch": 8.260333127698951, "grad_norm": 0.11811631172895432, "learning_rate": 0.01, "loss": 1.9363, "step": 80340 }, { "epoch": 8.260641579272054, "grad_norm": 0.041397687047719955, "learning_rate": 0.01, "loss": 1.9399, "step": 80343 }, { "epoch": 8.260950030845157, "grad_norm": 0.0496373251080513, "learning_rate": 0.01, "loss": 1.9546, "step": 80346 }, { "epoch": 8.26125848241826, "grad_norm": 0.0499790795147419, "learning_rate": 0.01, "loss": 1.9539, "step": 80349 }, { "epoch": 8.261566933991363, "grad_norm": 0.13844771683216095, "learning_rate": 0.01, "loss": 1.9443, "step": 80352 }, { "epoch": 8.261875385564466, "grad_norm": 0.06328003853559494, "learning_rate": 0.01, "loss": 1.9367, "step": 80355 }, { "epoch": 8.26218383713757, "grad_norm": 0.05448591709136963, "learning_rate": 0.01, "loss": 1.9691, "step": 80358 }, { "epoch": 8.262492288710673, "grad_norm": 0.034886013716459274, "learning_rate": 0.01, "loss": 1.9589, "step": 80361 }, { "epoch": 8.262800740283776, "grad_norm": 0.12212526053190231, "learning_rate": 0.01, "loss": 1.9609, "step": 80364 }, { "epoch": 8.263109191856879, "grad_norm": 0.04155818372964859, "learning_rate": 0.01, "loss": 1.9429, "step": 80367 }, { "epoch": 8.263417643429982, "grad_norm": 0.0539507232606411, "learning_rate": 0.01, "loss": 1.9372, "step": 80370 }, { "epoch": 8.263726095003085, "grad_norm": 0.04046839475631714, "learning_rate": 0.01, "loss": 1.9561, "step": 80373 }, { "epoch": 8.264034546576188, "grad_norm": 0.04962043836712837, "learning_rate": 0.01, "loss": 1.9406, "step": 80376 }, { "epoch": 8.26434299814929, "grad_norm": 0.043131254613399506, "learning_rate": 0.01, "loss": 1.9494, "step": 80379 }, { "epoch": 8.264651449722393, "grad_norm": 0.0805496796965599, "learning_rate": 0.01, "loss": 1.9693, "step": 80382 }, { "epoch": 8.264959901295496, "grad_norm": 0.08264986425638199, "learning_rate": 0.01, "loss": 1.9761, "step": 80385 }, { "epoch": 8.265268352868599, "grad_norm": 0.08037829399108887, "learning_rate": 0.01, "loss": 1.9286, "step": 80388 }, { "epoch": 8.265576804441702, "grad_norm": 0.06496891379356384, "learning_rate": 0.01, "loss": 1.9546, "step": 80391 }, { "epoch": 8.265885256014805, "grad_norm": 0.049148254096508026, "learning_rate": 0.01, "loss": 1.9751, "step": 80394 }, { "epoch": 8.26619370758791, "grad_norm": 0.0740509107708931, "learning_rate": 0.01, "loss": 1.9542, "step": 80397 }, { "epoch": 8.266502159161012, "grad_norm": 0.035503897815942764, "learning_rate": 0.01, "loss": 1.9712, "step": 80400 }, { "epoch": 8.266810610734115, "grad_norm": 0.043681301176548004, "learning_rate": 0.01, "loss": 1.948, "step": 80403 }, { "epoch": 8.267119062307218, "grad_norm": 0.04133985564112663, "learning_rate": 0.01, "loss": 1.9581, "step": 80406 }, { "epoch": 8.267427513880321, "grad_norm": 0.09284191578626633, "learning_rate": 0.01, "loss": 1.9494, "step": 80409 }, { "epoch": 8.267735965453424, "grad_norm": 0.0532713383436203, "learning_rate": 0.01, "loss": 1.9384, "step": 80412 }, { "epoch": 8.268044417026527, "grad_norm": 0.09650903940200806, "learning_rate": 0.01, "loss": 1.9528, "step": 80415 }, { "epoch": 8.26835286859963, "grad_norm": 0.04948548227548599, "learning_rate": 0.01, "loss": 1.9346, "step": 80418 }, { "epoch": 8.268661320172733, "grad_norm": 0.029745401814579964, "learning_rate": 0.01, "loss": 1.9511, "step": 80421 }, { "epoch": 8.268969771745835, "grad_norm": 0.04314611479640007, "learning_rate": 0.01, "loss": 1.9429, "step": 80424 }, { "epoch": 8.269278223318938, "grad_norm": 0.035734035074710846, "learning_rate": 0.01, "loss": 1.9454, "step": 80427 }, { "epoch": 8.269586674892041, "grad_norm": 0.06398891657590866, "learning_rate": 0.01, "loss": 1.965, "step": 80430 }, { "epoch": 8.269895126465144, "grad_norm": 0.08245398104190826, "learning_rate": 0.01, "loss": 1.9473, "step": 80433 }, { "epoch": 8.270203578038249, "grad_norm": 0.0568409189581871, "learning_rate": 0.01, "loss": 1.9356, "step": 80436 }, { "epoch": 8.270512029611352, "grad_norm": 0.05230523645877838, "learning_rate": 0.01, "loss": 1.9264, "step": 80439 }, { "epoch": 8.270820481184455, "grad_norm": 0.09985850751399994, "learning_rate": 0.01, "loss": 1.9424, "step": 80442 }, { "epoch": 8.271128932757557, "grad_norm": 0.0723845511674881, "learning_rate": 0.01, "loss": 1.9131, "step": 80445 }, { "epoch": 8.27143738433066, "grad_norm": 0.1009666696190834, "learning_rate": 0.01, "loss": 1.9569, "step": 80448 }, { "epoch": 8.271745835903763, "grad_norm": 0.03674089536070824, "learning_rate": 0.01, "loss": 1.9386, "step": 80451 }, { "epoch": 8.272054287476866, "grad_norm": 0.0843566283583641, "learning_rate": 0.01, "loss": 1.9732, "step": 80454 }, { "epoch": 8.272362739049969, "grad_norm": 0.04326341301202774, "learning_rate": 0.01, "loss": 1.9495, "step": 80457 }, { "epoch": 8.272671190623072, "grad_norm": 0.07571718096733093, "learning_rate": 0.01, "loss": 1.9476, "step": 80460 }, { "epoch": 8.272979642196175, "grad_norm": 0.04678694158792496, "learning_rate": 0.01, "loss": 1.9379, "step": 80463 }, { "epoch": 8.273288093769278, "grad_norm": 0.10458555072546005, "learning_rate": 0.01, "loss": 1.9549, "step": 80466 }, { "epoch": 8.27359654534238, "grad_norm": 0.08447905629873276, "learning_rate": 0.01, "loss": 1.95, "step": 80469 }, { "epoch": 8.273904996915483, "grad_norm": 0.07039906084537506, "learning_rate": 0.01, "loss": 1.9738, "step": 80472 }, { "epoch": 8.274213448488588, "grad_norm": 0.05202379822731018, "learning_rate": 0.01, "loss": 1.9502, "step": 80475 }, { "epoch": 8.274521900061691, "grad_norm": 0.08551952987909317, "learning_rate": 0.01, "loss": 1.944, "step": 80478 }, { "epoch": 8.274830351634794, "grad_norm": 0.10451101511716843, "learning_rate": 0.01, "loss": 1.9502, "step": 80481 }, { "epoch": 8.275138803207897, "grad_norm": 0.13095597922801971, "learning_rate": 0.01, "loss": 1.9492, "step": 80484 }, { "epoch": 8.275447254781, "grad_norm": 0.10141288489103317, "learning_rate": 0.01, "loss": 1.9573, "step": 80487 }, { "epoch": 8.275755706354103, "grad_norm": 0.09214049577713013, "learning_rate": 0.01, "loss": 1.9704, "step": 80490 }, { "epoch": 8.276064157927205, "grad_norm": 0.042113929986953735, "learning_rate": 0.01, "loss": 1.9792, "step": 80493 }, { "epoch": 8.276372609500308, "grad_norm": 0.051108475774526596, "learning_rate": 0.01, "loss": 1.9752, "step": 80496 }, { "epoch": 8.276681061073411, "grad_norm": 0.04188302531838417, "learning_rate": 0.01, "loss": 1.9464, "step": 80499 }, { "epoch": 8.276989512646514, "grad_norm": 0.0321899838745594, "learning_rate": 0.01, "loss": 1.9428, "step": 80502 }, { "epoch": 8.277297964219617, "grad_norm": 0.038304295390844345, "learning_rate": 0.01, "loss": 1.9416, "step": 80505 }, { "epoch": 8.27760641579272, "grad_norm": 0.04976971819996834, "learning_rate": 0.01, "loss": 1.9363, "step": 80508 }, { "epoch": 8.277914867365823, "grad_norm": 0.10897812247276306, "learning_rate": 0.01, "loss": 1.9518, "step": 80511 }, { "epoch": 8.278223318938927, "grad_norm": 0.04283289983868599, "learning_rate": 0.01, "loss": 1.9613, "step": 80514 }, { "epoch": 8.27853177051203, "grad_norm": 0.0434601865708828, "learning_rate": 0.01, "loss": 1.9597, "step": 80517 }, { "epoch": 8.278840222085133, "grad_norm": 0.1142946258187294, "learning_rate": 0.01, "loss": 1.9586, "step": 80520 }, { "epoch": 8.279148673658236, "grad_norm": 0.12008026242256165, "learning_rate": 0.01, "loss": 1.9573, "step": 80523 }, { "epoch": 8.279457125231339, "grad_norm": 0.04869767278432846, "learning_rate": 0.01, "loss": 1.9305, "step": 80526 }, { "epoch": 8.279765576804442, "grad_norm": 0.037496909499168396, "learning_rate": 0.01, "loss": 1.9487, "step": 80529 }, { "epoch": 8.280074028377545, "grad_norm": 0.041276488453149796, "learning_rate": 0.01, "loss": 1.9529, "step": 80532 }, { "epoch": 8.280382479950648, "grad_norm": 0.054701920598745346, "learning_rate": 0.01, "loss": 1.943, "step": 80535 }, { "epoch": 8.28069093152375, "grad_norm": 0.06538514047861099, "learning_rate": 0.01, "loss": 1.9386, "step": 80538 }, { "epoch": 8.280999383096853, "grad_norm": 0.05490681156516075, "learning_rate": 0.01, "loss": 1.9769, "step": 80541 }, { "epoch": 8.281307834669956, "grad_norm": 0.03307463601231575, "learning_rate": 0.01, "loss": 1.9339, "step": 80544 }, { "epoch": 8.28161628624306, "grad_norm": 0.12409739941358566, "learning_rate": 0.01, "loss": 1.956, "step": 80547 }, { "epoch": 8.281924737816162, "grad_norm": 0.0904029905796051, "learning_rate": 0.01, "loss": 1.9817, "step": 80550 }, { "epoch": 8.282233189389267, "grad_norm": 0.09008060395717621, "learning_rate": 0.01, "loss": 1.9671, "step": 80553 }, { "epoch": 8.28254164096237, "grad_norm": 0.06393894553184509, "learning_rate": 0.01, "loss": 1.9753, "step": 80556 }, { "epoch": 8.282850092535472, "grad_norm": 0.0343107245862484, "learning_rate": 0.01, "loss": 1.9456, "step": 80559 }, { "epoch": 8.283158544108575, "grad_norm": 0.04057645797729492, "learning_rate": 0.01, "loss": 1.9398, "step": 80562 }, { "epoch": 8.283466995681678, "grad_norm": 0.04213458299636841, "learning_rate": 0.01, "loss": 1.9378, "step": 80565 }, { "epoch": 8.283775447254781, "grad_norm": 0.03605170175433159, "learning_rate": 0.01, "loss": 1.9528, "step": 80568 }, { "epoch": 8.284083898827884, "grad_norm": 0.04358351603150368, "learning_rate": 0.01, "loss": 1.948, "step": 80571 }, { "epoch": 8.284392350400987, "grad_norm": 0.04204123094677925, "learning_rate": 0.01, "loss": 1.9635, "step": 80574 }, { "epoch": 8.28470080197409, "grad_norm": 0.037242695689201355, "learning_rate": 0.01, "loss": 1.9628, "step": 80577 }, { "epoch": 8.285009253547193, "grad_norm": 0.09867773950099945, "learning_rate": 0.01, "loss": 1.9283, "step": 80580 }, { "epoch": 8.285317705120296, "grad_norm": 0.08788815140724182, "learning_rate": 0.01, "loss": 1.9484, "step": 80583 }, { "epoch": 8.285626156693398, "grad_norm": 0.0887393206357956, "learning_rate": 0.01, "loss": 1.931, "step": 80586 }, { "epoch": 8.285934608266501, "grad_norm": 0.04635678976774216, "learning_rate": 0.01, "loss": 1.979, "step": 80589 }, { "epoch": 8.286243059839606, "grad_norm": 0.09219451993703842, "learning_rate": 0.01, "loss": 1.9491, "step": 80592 }, { "epoch": 8.286551511412709, "grad_norm": 0.08263669162988663, "learning_rate": 0.01, "loss": 1.933, "step": 80595 }, { "epoch": 8.286859962985812, "grad_norm": 0.03986113891005516, "learning_rate": 0.01, "loss": 1.919, "step": 80598 }, { "epoch": 8.287168414558915, "grad_norm": 0.04161595553159714, "learning_rate": 0.01, "loss": 1.9827, "step": 80601 }, { "epoch": 8.287476866132018, "grad_norm": 0.03629259765148163, "learning_rate": 0.01, "loss": 1.9463, "step": 80604 }, { "epoch": 8.28778531770512, "grad_norm": 0.05570584908127785, "learning_rate": 0.01, "loss": 1.9408, "step": 80607 }, { "epoch": 8.288093769278223, "grad_norm": 0.04991071671247482, "learning_rate": 0.01, "loss": 1.9594, "step": 80610 }, { "epoch": 8.288402220851326, "grad_norm": 0.11207103729248047, "learning_rate": 0.01, "loss": 1.9298, "step": 80613 }, { "epoch": 8.288710672424429, "grad_norm": 0.04575880244374275, "learning_rate": 0.01, "loss": 1.9431, "step": 80616 }, { "epoch": 8.289019123997532, "grad_norm": 0.046922992914915085, "learning_rate": 0.01, "loss": 1.9547, "step": 80619 }, { "epoch": 8.289327575570635, "grad_norm": 0.04037918150424957, "learning_rate": 0.01, "loss": 1.9295, "step": 80622 }, { "epoch": 8.289636027143738, "grad_norm": 0.06104828417301178, "learning_rate": 0.01, "loss": 1.9659, "step": 80625 }, { "epoch": 8.28994447871684, "grad_norm": 0.05090169981122017, "learning_rate": 0.01, "loss": 1.968, "step": 80628 }, { "epoch": 8.290252930289945, "grad_norm": 0.061494361609220505, "learning_rate": 0.01, "loss": 1.9656, "step": 80631 }, { "epoch": 8.290561381863048, "grad_norm": 0.04942132532596588, "learning_rate": 0.01, "loss": 1.9695, "step": 80634 }, { "epoch": 8.290869833436151, "grad_norm": 0.038736648857593536, "learning_rate": 0.01, "loss": 1.965, "step": 80637 }, { "epoch": 8.291178285009254, "grad_norm": 0.03663397207856178, "learning_rate": 0.01, "loss": 1.9473, "step": 80640 }, { "epoch": 8.291486736582357, "grad_norm": 0.10741380602121353, "learning_rate": 0.01, "loss": 1.9387, "step": 80643 }, { "epoch": 8.29179518815546, "grad_norm": 0.13612329959869385, "learning_rate": 0.01, "loss": 1.9511, "step": 80646 }, { "epoch": 8.292103639728563, "grad_norm": 0.06880326569080353, "learning_rate": 0.01, "loss": 1.9277, "step": 80649 }, { "epoch": 8.292412091301665, "grad_norm": 0.0510687455534935, "learning_rate": 0.01, "loss": 1.9577, "step": 80652 }, { "epoch": 8.292720542874768, "grad_norm": 0.05295844376087189, "learning_rate": 0.01, "loss": 1.9382, "step": 80655 }, { "epoch": 8.293028994447871, "grad_norm": 0.04496245086193085, "learning_rate": 0.01, "loss": 1.9405, "step": 80658 }, { "epoch": 8.293337446020974, "grad_norm": 0.07872740179300308, "learning_rate": 0.01, "loss": 1.9643, "step": 80661 }, { "epoch": 8.293645897594077, "grad_norm": 0.07173445075750351, "learning_rate": 0.01, "loss": 1.9575, "step": 80664 }, { "epoch": 8.29395434916718, "grad_norm": 0.08514854311943054, "learning_rate": 0.01, "loss": 1.9371, "step": 80667 }, { "epoch": 8.294262800740285, "grad_norm": 0.07339558005332947, "learning_rate": 0.01, "loss": 1.9502, "step": 80670 }, { "epoch": 8.294571252313387, "grad_norm": 0.07977834343910217, "learning_rate": 0.01, "loss": 1.9626, "step": 80673 }, { "epoch": 8.29487970388649, "grad_norm": 0.1017339676618576, "learning_rate": 0.01, "loss": 1.9403, "step": 80676 }, { "epoch": 8.295188155459593, "grad_norm": 0.038830697536468506, "learning_rate": 0.01, "loss": 1.9573, "step": 80679 }, { "epoch": 8.295496607032696, "grad_norm": 0.10399825125932693, "learning_rate": 0.01, "loss": 1.9592, "step": 80682 }, { "epoch": 8.295805058605799, "grad_norm": 0.0402609184384346, "learning_rate": 0.01, "loss": 1.9703, "step": 80685 }, { "epoch": 8.296113510178902, "grad_norm": 0.034392885863780975, "learning_rate": 0.01, "loss": 1.9567, "step": 80688 }, { "epoch": 8.296421961752005, "grad_norm": 0.1786748617887497, "learning_rate": 0.01, "loss": 1.9411, "step": 80691 }, { "epoch": 8.296730413325108, "grad_norm": 0.08792231976985931, "learning_rate": 0.01, "loss": 1.9353, "step": 80694 }, { "epoch": 8.29703886489821, "grad_norm": 0.10741207748651505, "learning_rate": 0.01, "loss": 1.9861, "step": 80697 }, { "epoch": 8.297347316471313, "grad_norm": 0.08831042796373367, "learning_rate": 0.01, "loss": 1.9522, "step": 80700 }, { "epoch": 8.297655768044416, "grad_norm": 0.09200239181518555, "learning_rate": 0.01, "loss": 1.9411, "step": 80703 }, { "epoch": 8.29796421961752, "grad_norm": 0.05381232500076294, "learning_rate": 0.01, "loss": 1.9365, "step": 80706 }, { "epoch": 8.298272671190624, "grad_norm": 0.032259222120046616, "learning_rate": 0.01, "loss": 1.9393, "step": 80709 }, { "epoch": 8.298581122763727, "grad_norm": 0.049715928733348846, "learning_rate": 0.01, "loss": 1.9585, "step": 80712 }, { "epoch": 8.29888957433683, "grad_norm": 0.0569734163582325, "learning_rate": 0.01, "loss": 1.9783, "step": 80715 }, { "epoch": 8.299198025909932, "grad_norm": 0.04328460991382599, "learning_rate": 0.01, "loss": 1.9538, "step": 80718 }, { "epoch": 8.299506477483035, "grad_norm": 0.09637049585580826, "learning_rate": 0.01, "loss": 1.9432, "step": 80721 }, { "epoch": 8.299814929056138, "grad_norm": 0.0515410453081131, "learning_rate": 0.01, "loss": 1.952, "step": 80724 }, { "epoch": 8.300123380629241, "grad_norm": 0.041060756891965866, "learning_rate": 0.01, "loss": 1.9349, "step": 80727 }, { "epoch": 8.300431832202344, "grad_norm": 0.07743452489376068, "learning_rate": 0.01, "loss": 1.9341, "step": 80730 }, { "epoch": 8.300740283775447, "grad_norm": 0.08073935657739639, "learning_rate": 0.01, "loss": 1.9548, "step": 80733 }, { "epoch": 8.30104873534855, "grad_norm": 0.13999415934085846, "learning_rate": 0.01, "loss": 1.9482, "step": 80736 }, { "epoch": 8.301357186921653, "grad_norm": 0.0980861559510231, "learning_rate": 0.01, "loss": 1.9986, "step": 80739 }, { "epoch": 8.301665638494756, "grad_norm": 0.09149259328842163, "learning_rate": 0.01, "loss": 1.9479, "step": 80742 }, { "epoch": 8.301974090067858, "grad_norm": 0.05871947482228279, "learning_rate": 0.01, "loss": 1.9437, "step": 80745 }, { "epoch": 8.302282541640963, "grad_norm": 0.045800916850566864, "learning_rate": 0.01, "loss": 1.9686, "step": 80748 }, { "epoch": 8.302590993214066, "grad_norm": 0.0665321871638298, "learning_rate": 0.01, "loss": 1.9486, "step": 80751 }, { "epoch": 8.302899444787169, "grad_norm": 0.08737419545650482, "learning_rate": 0.01, "loss": 1.9674, "step": 80754 }, { "epoch": 8.303207896360272, "grad_norm": 0.05846291407942772, "learning_rate": 0.01, "loss": 1.9617, "step": 80757 }, { "epoch": 8.303516347933375, "grad_norm": 0.058516647666692734, "learning_rate": 0.01, "loss": 1.9565, "step": 80760 }, { "epoch": 8.303824799506478, "grad_norm": 0.05862552672624588, "learning_rate": 0.01, "loss": 1.9642, "step": 80763 }, { "epoch": 8.30413325107958, "grad_norm": 0.04875510185956955, "learning_rate": 0.01, "loss": 1.9332, "step": 80766 }, { "epoch": 8.304441702652683, "grad_norm": 0.10074342787265778, "learning_rate": 0.01, "loss": 1.9308, "step": 80769 }, { "epoch": 8.304750154225786, "grad_norm": 0.03849528357386589, "learning_rate": 0.01, "loss": 1.9249, "step": 80772 }, { "epoch": 8.305058605798889, "grad_norm": 0.1587829291820526, "learning_rate": 0.01, "loss": 1.9728, "step": 80775 }, { "epoch": 8.305367057371992, "grad_norm": 0.06458421051502228, "learning_rate": 0.01, "loss": 1.9735, "step": 80778 }, { "epoch": 8.305675508945095, "grad_norm": 0.07363933324813843, "learning_rate": 0.01, "loss": 1.9399, "step": 80781 }, { "epoch": 8.305983960518198, "grad_norm": 0.03997383266687393, "learning_rate": 0.01, "loss": 1.9624, "step": 80784 }, { "epoch": 8.306292412091302, "grad_norm": 0.037976641207933426, "learning_rate": 0.01, "loss": 1.961, "step": 80787 }, { "epoch": 8.306600863664405, "grad_norm": 0.039313819259405136, "learning_rate": 0.01, "loss": 1.9378, "step": 80790 }, { "epoch": 8.306909315237508, "grad_norm": 0.1089361310005188, "learning_rate": 0.01, "loss": 1.9353, "step": 80793 }, { "epoch": 8.307217766810611, "grad_norm": 0.15032267570495605, "learning_rate": 0.01, "loss": 1.94, "step": 80796 }, { "epoch": 8.307526218383714, "grad_norm": 0.06436207890510559, "learning_rate": 0.01, "loss": 1.9527, "step": 80799 }, { "epoch": 8.307834669956817, "grad_norm": 0.03206050768494606, "learning_rate": 0.01, "loss": 1.9204, "step": 80802 }, { "epoch": 8.30814312152992, "grad_norm": 0.040079426020383835, "learning_rate": 0.01, "loss": 1.9678, "step": 80805 }, { "epoch": 8.308451573103023, "grad_norm": 0.04091683030128479, "learning_rate": 0.01, "loss": 1.9534, "step": 80808 }, { "epoch": 8.308760024676126, "grad_norm": 0.07872221618890762, "learning_rate": 0.01, "loss": 1.9596, "step": 80811 }, { "epoch": 8.309068476249228, "grad_norm": 0.10001574456691742, "learning_rate": 0.01, "loss": 1.9426, "step": 80814 }, { "epoch": 8.309376927822331, "grad_norm": 0.07447927445173264, "learning_rate": 0.01, "loss": 1.9453, "step": 80817 }, { "epoch": 8.309685379395434, "grad_norm": 0.048718079924583435, "learning_rate": 0.01, "loss": 1.9378, "step": 80820 }, { "epoch": 8.309993830968537, "grad_norm": 0.08882563561201096, "learning_rate": 0.01, "loss": 1.9505, "step": 80823 }, { "epoch": 8.310302282541642, "grad_norm": 0.04461067542433739, "learning_rate": 0.01, "loss": 1.9685, "step": 80826 }, { "epoch": 8.310610734114745, "grad_norm": 0.09042662382125854, "learning_rate": 0.01, "loss": 1.9755, "step": 80829 }, { "epoch": 8.310919185687847, "grad_norm": 0.07167694717645645, "learning_rate": 0.01, "loss": 1.9483, "step": 80832 }, { "epoch": 8.31122763726095, "grad_norm": 0.06814853847026825, "learning_rate": 0.01, "loss": 1.9559, "step": 80835 }, { "epoch": 8.311536088834053, "grad_norm": 0.09978099912405014, "learning_rate": 0.01, "loss": 1.9563, "step": 80838 }, { "epoch": 8.311844540407156, "grad_norm": 0.08152987062931061, "learning_rate": 0.01, "loss": 1.9551, "step": 80841 }, { "epoch": 8.312152991980259, "grad_norm": 0.03953191637992859, "learning_rate": 0.01, "loss": 1.9354, "step": 80844 }, { "epoch": 8.312461443553362, "grad_norm": 0.0646815299987793, "learning_rate": 0.01, "loss": 1.9667, "step": 80847 }, { "epoch": 8.312769895126465, "grad_norm": 0.10371677577495575, "learning_rate": 0.01, "loss": 1.9671, "step": 80850 }, { "epoch": 8.313078346699568, "grad_norm": 0.1298215538263321, "learning_rate": 0.01, "loss": 1.9558, "step": 80853 }, { "epoch": 8.31338679827267, "grad_norm": 0.05871061235666275, "learning_rate": 0.01, "loss": 1.9449, "step": 80856 }, { "epoch": 8.313695249845773, "grad_norm": 0.03924513980746269, "learning_rate": 0.01, "loss": 1.9454, "step": 80859 }, { "epoch": 8.314003701418876, "grad_norm": 0.05203021690249443, "learning_rate": 0.01, "loss": 1.9385, "step": 80862 }, { "epoch": 8.314312152991981, "grad_norm": 0.07967104017734528, "learning_rate": 0.01, "loss": 1.9579, "step": 80865 }, { "epoch": 8.314620604565084, "grad_norm": 0.09806268662214279, "learning_rate": 0.01, "loss": 1.9534, "step": 80868 }, { "epoch": 8.314929056138187, "grad_norm": 0.08093216270208359, "learning_rate": 0.01, "loss": 1.9725, "step": 80871 }, { "epoch": 8.31523750771129, "grad_norm": 0.05892954394221306, "learning_rate": 0.01, "loss": 1.9517, "step": 80874 }, { "epoch": 8.315545959284393, "grad_norm": 0.04190627112984657, "learning_rate": 0.01, "loss": 1.9628, "step": 80877 }, { "epoch": 8.315854410857495, "grad_norm": 0.09374337643384933, "learning_rate": 0.01, "loss": 1.9518, "step": 80880 }, { "epoch": 8.316162862430598, "grad_norm": 0.09757273644208908, "learning_rate": 0.01, "loss": 1.9541, "step": 80883 }, { "epoch": 8.316471314003701, "grad_norm": 0.07102153450250626, "learning_rate": 0.01, "loss": 1.9393, "step": 80886 }, { "epoch": 8.316779765576804, "grad_norm": 0.08393047004938126, "learning_rate": 0.01, "loss": 1.9722, "step": 80889 }, { "epoch": 8.317088217149907, "grad_norm": 0.09592314064502716, "learning_rate": 0.01, "loss": 1.9468, "step": 80892 }, { "epoch": 8.31739666872301, "grad_norm": 0.05190633237361908, "learning_rate": 0.01, "loss": 1.9384, "step": 80895 }, { "epoch": 8.317705120296113, "grad_norm": 0.041763097047805786, "learning_rate": 0.01, "loss": 1.9198, "step": 80898 }, { "epoch": 8.318013571869216, "grad_norm": 0.050488606095314026, "learning_rate": 0.01, "loss": 1.976, "step": 80901 }, { "epoch": 8.31832202344232, "grad_norm": 0.03751280531287193, "learning_rate": 0.01, "loss": 1.9623, "step": 80904 }, { "epoch": 8.318630475015423, "grad_norm": 0.04075468331575394, "learning_rate": 0.01, "loss": 1.9335, "step": 80907 }, { "epoch": 8.318938926588526, "grad_norm": 0.05772188678383827, "learning_rate": 0.01, "loss": 1.9636, "step": 80910 }, { "epoch": 8.319247378161629, "grad_norm": 0.06489258259534836, "learning_rate": 0.01, "loss": 1.9438, "step": 80913 }, { "epoch": 8.319555829734732, "grad_norm": 0.050094038248062134, "learning_rate": 0.01, "loss": 1.9341, "step": 80916 }, { "epoch": 8.319864281307835, "grad_norm": 0.08120666444301605, "learning_rate": 0.01, "loss": 1.964, "step": 80919 }, { "epoch": 8.320172732880938, "grad_norm": 0.12392567843198776, "learning_rate": 0.01, "loss": 1.9633, "step": 80922 }, { "epoch": 8.32048118445404, "grad_norm": 0.0636238157749176, "learning_rate": 0.01, "loss": 1.9245, "step": 80925 }, { "epoch": 8.320789636027143, "grad_norm": 0.051416803151369095, "learning_rate": 0.01, "loss": 1.939, "step": 80928 }, { "epoch": 8.321098087600246, "grad_norm": 0.0705709159374237, "learning_rate": 0.01, "loss": 1.9336, "step": 80931 }, { "epoch": 8.32140653917335, "grad_norm": 0.1259060502052307, "learning_rate": 0.01, "loss": 1.9556, "step": 80934 }, { "epoch": 8.321714990746452, "grad_norm": 0.06882835924625397, "learning_rate": 0.01, "loss": 1.9427, "step": 80937 }, { "epoch": 8.322023442319555, "grad_norm": 0.06935711205005646, "learning_rate": 0.01, "loss": 1.9467, "step": 80940 }, { "epoch": 8.32233189389266, "grad_norm": 0.07351617515087128, "learning_rate": 0.01, "loss": 1.9646, "step": 80943 }, { "epoch": 8.322640345465762, "grad_norm": 0.04996279254555702, "learning_rate": 0.01, "loss": 1.9423, "step": 80946 }, { "epoch": 8.322948797038865, "grad_norm": 0.08058802038431168, "learning_rate": 0.01, "loss": 1.9437, "step": 80949 }, { "epoch": 8.323257248611968, "grad_norm": 0.09268704801797867, "learning_rate": 0.01, "loss": 1.9686, "step": 80952 }, { "epoch": 8.323565700185071, "grad_norm": 0.14823900163173676, "learning_rate": 0.01, "loss": 1.9432, "step": 80955 }, { "epoch": 8.323874151758174, "grad_norm": 0.11526891589164734, "learning_rate": 0.01, "loss": 1.9734, "step": 80958 }, { "epoch": 8.324182603331277, "grad_norm": 0.10285145789384842, "learning_rate": 0.01, "loss": 1.9342, "step": 80961 }, { "epoch": 8.32449105490438, "grad_norm": 0.04557880014181137, "learning_rate": 0.01, "loss": 1.9575, "step": 80964 }, { "epoch": 8.324799506477483, "grad_norm": 0.039869774132966995, "learning_rate": 0.01, "loss": 1.9529, "step": 80967 }, { "epoch": 8.325107958050586, "grad_norm": 0.03244692087173462, "learning_rate": 0.01, "loss": 1.9441, "step": 80970 }, { "epoch": 8.325416409623688, "grad_norm": 0.040278058499097824, "learning_rate": 0.01, "loss": 1.9509, "step": 80973 }, { "epoch": 8.325724861196791, "grad_norm": 0.0789649486541748, "learning_rate": 0.01, "loss": 1.9482, "step": 80976 }, { "epoch": 8.326033312769894, "grad_norm": 0.06306403130292892, "learning_rate": 0.01, "loss": 1.9651, "step": 80979 }, { "epoch": 8.326341764342999, "grad_norm": 0.04320656880736351, "learning_rate": 0.01, "loss": 1.9416, "step": 80982 }, { "epoch": 8.326650215916102, "grad_norm": 0.03742117062211037, "learning_rate": 0.01, "loss": 1.9519, "step": 80985 }, { "epoch": 8.326958667489205, "grad_norm": 0.08791963756084442, "learning_rate": 0.01, "loss": 1.9515, "step": 80988 }, { "epoch": 8.327267119062308, "grad_norm": 0.05794864147901535, "learning_rate": 0.01, "loss": 1.9385, "step": 80991 }, { "epoch": 8.32757557063541, "grad_norm": 0.09958462417125702, "learning_rate": 0.01, "loss": 1.9483, "step": 80994 }, { "epoch": 8.327884022208513, "grad_norm": 0.0908295288681984, "learning_rate": 0.01, "loss": 1.9695, "step": 80997 }, { "epoch": 8.328192473781616, "grad_norm": 0.05362537130713463, "learning_rate": 0.01, "loss": 1.9667, "step": 81000 }, { "epoch": 8.328500925354719, "grad_norm": 0.03491285815834999, "learning_rate": 0.01, "loss": 1.9517, "step": 81003 }, { "epoch": 8.328809376927822, "grad_norm": 0.04607011377811432, "learning_rate": 0.01, "loss": 1.957, "step": 81006 }, { "epoch": 8.329117828500925, "grad_norm": 0.036862924695014954, "learning_rate": 0.01, "loss": 1.933, "step": 81009 }, { "epoch": 8.329426280074028, "grad_norm": 0.03782851994037628, "learning_rate": 0.01, "loss": 1.9338, "step": 81012 }, { "epoch": 8.32973473164713, "grad_norm": 0.10079167783260345, "learning_rate": 0.01, "loss": 1.9553, "step": 81015 }, { "epoch": 8.330043183220234, "grad_norm": 0.06869961321353912, "learning_rate": 0.01, "loss": 1.9397, "step": 81018 }, { "epoch": 8.330351634793338, "grad_norm": 0.09453833103179932, "learning_rate": 0.01, "loss": 1.9289, "step": 81021 }, { "epoch": 8.330660086366441, "grad_norm": 0.06898962706327438, "learning_rate": 0.01, "loss": 1.9679, "step": 81024 }, { "epoch": 8.330968537939544, "grad_norm": 0.0684114545583725, "learning_rate": 0.01, "loss": 1.9752, "step": 81027 }, { "epoch": 8.331276989512647, "grad_norm": 0.07283013314008713, "learning_rate": 0.01, "loss": 1.9309, "step": 81030 }, { "epoch": 8.33158544108575, "grad_norm": 0.059807684272527695, "learning_rate": 0.01, "loss": 1.9442, "step": 81033 }, { "epoch": 8.331893892658853, "grad_norm": 0.06721007823944092, "learning_rate": 0.01, "loss": 1.9567, "step": 81036 }, { "epoch": 8.332202344231955, "grad_norm": 0.0669078379869461, "learning_rate": 0.01, "loss": 1.9578, "step": 81039 }, { "epoch": 8.332510795805058, "grad_norm": 0.07649901509284973, "learning_rate": 0.01, "loss": 1.9525, "step": 81042 }, { "epoch": 8.332819247378161, "grad_norm": 0.07991660386323929, "learning_rate": 0.01, "loss": 1.9697, "step": 81045 }, { "epoch": 8.333127698951264, "grad_norm": 0.0629889965057373, "learning_rate": 0.01, "loss": 1.9894, "step": 81048 }, { "epoch": 8.333436150524367, "grad_norm": 0.036793168634176254, "learning_rate": 0.01, "loss": 1.9479, "step": 81051 }, { "epoch": 8.33374460209747, "grad_norm": 0.032679762691259384, "learning_rate": 0.01, "loss": 1.9254, "step": 81054 }, { "epoch": 8.334053053670573, "grad_norm": 0.08353456854820251, "learning_rate": 0.01, "loss": 1.9587, "step": 81057 }, { "epoch": 8.334361505243677, "grad_norm": 0.06313187628984451, "learning_rate": 0.01, "loss": 1.9426, "step": 81060 }, { "epoch": 8.33466995681678, "grad_norm": 0.03921021521091461, "learning_rate": 0.01, "loss": 1.9405, "step": 81063 }, { "epoch": 8.334978408389883, "grad_norm": 0.044407907873392105, "learning_rate": 0.01, "loss": 1.9344, "step": 81066 }, { "epoch": 8.335286859962986, "grad_norm": 0.0652817115187645, "learning_rate": 0.01, "loss": 1.9567, "step": 81069 }, { "epoch": 8.335595311536089, "grad_norm": 0.09266695380210876, "learning_rate": 0.01, "loss": 1.9598, "step": 81072 }, { "epoch": 8.335903763109192, "grad_norm": 0.07401798665523529, "learning_rate": 0.01, "loss": 1.9383, "step": 81075 }, { "epoch": 8.336212214682295, "grad_norm": 0.031432922929525375, "learning_rate": 0.01, "loss": 1.9327, "step": 81078 }, { "epoch": 8.336520666255398, "grad_norm": 0.06749407202005386, "learning_rate": 0.01, "loss": 1.9635, "step": 81081 }, { "epoch": 8.3368291178285, "grad_norm": 0.052030496299266815, "learning_rate": 0.01, "loss": 1.9558, "step": 81084 }, { "epoch": 8.337137569401603, "grad_norm": 0.034096091985702515, "learning_rate": 0.01, "loss": 1.9584, "step": 81087 }, { "epoch": 8.337446020974706, "grad_norm": 0.04452771693468094, "learning_rate": 0.01, "loss": 1.9652, "step": 81090 }, { "epoch": 8.33775447254781, "grad_norm": 0.11472178995609283, "learning_rate": 0.01, "loss": 1.9437, "step": 81093 }, { "epoch": 8.338062924120914, "grad_norm": 0.09258271008729935, "learning_rate": 0.01, "loss": 1.954, "step": 81096 }, { "epoch": 8.338371375694017, "grad_norm": 0.05785766988992691, "learning_rate": 0.01, "loss": 1.9561, "step": 81099 }, { "epoch": 8.33867982726712, "grad_norm": 0.03770722821354866, "learning_rate": 0.01, "loss": 1.9482, "step": 81102 }, { "epoch": 8.338988278840223, "grad_norm": 0.038187529891729355, "learning_rate": 0.01, "loss": 1.9374, "step": 81105 }, { "epoch": 8.339296730413325, "grad_norm": 0.044281262904405594, "learning_rate": 0.01, "loss": 1.9604, "step": 81108 }, { "epoch": 8.339605181986428, "grad_norm": 0.050860438495874405, "learning_rate": 0.01, "loss": 1.9619, "step": 81111 }, { "epoch": 8.339913633559531, "grad_norm": 0.08445332944393158, "learning_rate": 0.01, "loss": 1.9515, "step": 81114 }, { "epoch": 8.340222085132634, "grad_norm": 0.11284556984901428, "learning_rate": 0.01, "loss": 1.9547, "step": 81117 }, { "epoch": 8.340530536705737, "grad_norm": 0.05600573495030403, "learning_rate": 0.01, "loss": 1.9658, "step": 81120 }, { "epoch": 8.34083898827884, "grad_norm": 0.042293425649404526, "learning_rate": 0.01, "loss": 1.9624, "step": 81123 }, { "epoch": 8.341147439851943, "grad_norm": 0.03872934356331825, "learning_rate": 0.01, "loss": 1.9538, "step": 81126 }, { "epoch": 8.341455891425046, "grad_norm": 0.043065737932920456, "learning_rate": 0.01, "loss": 1.9631, "step": 81129 }, { "epoch": 8.341764342998149, "grad_norm": 0.04543273523449898, "learning_rate": 0.01, "loss": 1.935, "step": 81132 }, { "epoch": 8.342072794571253, "grad_norm": 0.15710321068763733, "learning_rate": 0.01, "loss": 1.9528, "step": 81135 }, { "epoch": 8.342381246144356, "grad_norm": 0.045610420405864716, "learning_rate": 0.01, "loss": 1.9399, "step": 81138 }, { "epoch": 8.342689697717459, "grad_norm": 0.04882073402404785, "learning_rate": 0.01, "loss": 1.9605, "step": 81141 }, { "epoch": 8.342998149290562, "grad_norm": 0.037526652216911316, "learning_rate": 0.01, "loss": 1.9559, "step": 81144 }, { "epoch": 8.343306600863665, "grad_norm": 0.03276045620441437, "learning_rate": 0.01, "loss": 1.9555, "step": 81147 }, { "epoch": 8.343615052436768, "grad_norm": 0.04296638444066048, "learning_rate": 0.01, "loss": 1.9548, "step": 81150 }, { "epoch": 8.34392350400987, "grad_norm": 0.07781503349542618, "learning_rate": 0.01, "loss": 1.9496, "step": 81153 }, { "epoch": 8.344231955582973, "grad_norm": 0.039542727172374725, "learning_rate": 0.01, "loss": 1.9175, "step": 81156 }, { "epoch": 8.344540407156076, "grad_norm": 0.045888807624578476, "learning_rate": 0.01, "loss": 1.9237, "step": 81159 }, { "epoch": 8.34484885872918, "grad_norm": 0.05557486042380333, "learning_rate": 0.01, "loss": 1.9683, "step": 81162 }, { "epoch": 8.345157310302282, "grad_norm": 0.06092909723520279, "learning_rate": 0.01, "loss": 1.9439, "step": 81165 }, { "epoch": 8.345465761875385, "grad_norm": 0.17519673705101013, "learning_rate": 0.01, "loss": 1.9464, "step": 81168 }, { "epoch": 8.345774213448488, "grad_norm": 0.09101748466491699, "learning_rate": 0.01, "loss": 1.938, "step": 81171 }, { "epoch": 8.346082665021592, "grad_norm": 0.05962994322180748, "learning_rate": 0.01, "loss": 1.9537, "step": 81174 }, { "epoch": 8.346391116594695, "grad_norm": 0.0337313674390316, "learning_rate": 0.01, "loss": 1.9515, "step": 81177 }, { "epoch": 8.346699568167798, "grad_norm": 0.06669741123914719, "learning_rate": 0.01, "loss": 1.9296, "step": 81180 }, { "epoch": 8.347008019740901, "grad_norm": 0.09194620698690414, "learning_rate": 0.01, "loss": 1.9502, "step": 81183 }, { "epoch": 8.347316471314004, "grad_norm": 0.04323070868849754, "learning_rate": 0.01, "loss": 1.9588, "step": 81186 }, { "epoch": 8.347624922887107, "grad_norm": 0.0381295420229435, "learning_rate": 0.01, "loss": 1.9388, "step": 81189 }, { "epoch": 8.34793337446021, "grad_norm": 0.03433126211166382, "learning_rate": 0.01, "loss": 1.9532, "step": 81192 }, { "epoch": 8.348241826033313, "grad_norm": 0.03897920623421669, "learning_rate": 0.01, "loss": 1.9688, "step": 81195 }, { "epoch": 8.348550277606416, "grad_norm": 0.04720534011721611, "learning_rate": 0.01, "loss": 1.9296, "step": 81198 }, { "epoch": 8.348858729179518, "grad_norm": 0.036591820418834686, "learning_rate": 0.01, "loss": 1.9467, "step": 81201 }, { "epoch": 8.349167180752621, "grad_norm": 0.06843716651201248, "learning_rate": 0.01, "loss": 1.9584, "step": 81204 }, { "epoch": 8.349475632325724, "grad_norm": 0.05791595205664635, "learning_rate": 0.01, "loss": 1.9487, "step": 81207 }, { "epoch": 8.349784083898827, "grad_norm": 0.11137792468070984, "learning_rate": 0.01, "loss": 1.9282, "step": 81210 }, { "epoch": 8.350092535471932, "grad_norm": 0.08609547466039658, "learning_rate": 0.01, "loss": 1.9468, "step": 81213 }, { "epoch": 8.350400987045035, "grad_norm": 0.1130346804857254, "learning_rate": 0.01, "loss": 1.947, "step": 81216 }, { "epoch": 8.350709438618138, "grad_norm": 0.06280580908060074, "learning_rate": 0.01, "loss": 1.92, "step": 81219 }, { "epoch": 8.35101789019124, "grad_norm": 0.059064436703920364, "learning_rate": 0.01, "loss": 1.941, "step": 81222 }, { "epoch": 8.351326341764343, "grad_norm": 0.03772406652569771, "learning_rate": 0.01, "loss": 1.9415, "step": 81225 }, { "epoch": 8.351634793337446, "grad_norm": 0.06599714607000351, "learning_rate": 0.01, "loss": 1.9469, "step": 81228 }, { "epoch": 8.351943244910549, "grad_norm": 0.08209977298974991, "learning_rate": 0.01, "loss": 1.9346, "step": 81231 }, { "epoch": 8.352251696483652, "grad_norm": 0.14253711700439453, "learning_rate": 0.01, "loss": 1.9592, "step": 81234 }, { "epoch": 8.352560148056755, "grad_norm": 0.06577881425619125, "learning_rate": 0.01, "loss": 1.954, "step": 81237 }, { "epoch": 8.352868599629858, "grad_norm": 0.07357854396104813, "learning_rate": 0.01, "loss": 1.9546, "step": 81240 }, { "epoch": 8.35317705120296, "grad_norm": 0.05654516443610191, "learning_rate": 0.01, "loss": 1.956, "step": 81243 }, { "epoch": 8.353485502776063, "grad_norm": 0.04166090860962868, "learning_rate": 0.01, "loss": 1.9475, "step": 81246 }, { "epoch": 8.353793954349166, "grad_norm": 0.03527102991938591, "learning_rate": 0.01, "loss": 1.9774, "step": 81249 }, { "epoch": 8.354102405922271, "grad_norm": 0.04787033051252365, "learning_rate": 0.01, "loss": 1.9525, "step": 81252 }, { "epoch": 8.354410857495374, "grad_norm": 0.1353912353515625, "learning_rate": 0.01, "loss": 1.9646, "step": 81255 }, { "epoch": 8.354719309068477, "grad_norm": 0.049617063254117966, "learning_rate": 0.01, "loss": 1.9662, "step": 81258 }, { "epoch": 8.35502776064158, "grad_norm": 0.0665043517947197, "learning_rate": 0.01, "loss": 1.9355, "step": 81261 }, { "epoch": 8.355336212214683, "grad_norm": 0.05647074803709984, "learning_rate": 0.01, "loss": 1.933, "step": 81264 }, { "epoch": 8.355644663787785, "grad_norm": 0.0868644118309021, "learning_rate": 0.01, "loss": 1.943, "step": 81267 }, { "epoch": 8.355953115360888, "grad_norm": 0.12009144574403763, "learning_rate": 0.01, "loss": 1.9547, "step": 81270 }, { "epoch": 8.356261566933991, "grad_norm": 0.09910716116428375, "learning_rate": 0.01, "loss": 1.9578, "step": 81273 }, { "epoch": 8.356570018507094, "grad_norm": 0.08180820196866989, "learning_rate": 0.01, "loss": 1.9288, "step": 81276 }, { "epoch": 8.356878470080197, "grad_norm": 0.04328274726867676, "learning_rate": 0.01, "loss": 1.9415, "step": 81279 }, { "epoch": 8.3571869216533, "grad_norm": 0.04688665270805359, "learning_rate": 0.01, "loss": 1.9524, "step": 81282 }, { "epoch": 8.357495373226403, "grad_norm": 0.05602161958813667, "learning_rate": 0.01, "loss": 1.9555, "step": 81285 }, { "epoch": 8.357803824799506, "grad_norm": 0.04227988049387932, "learning_rate": 0.01, "loss": 1.9434, "step": 81288 }, { "epoch": 8.35811227637261, "grad_norm": 0.10519243031740189, "learning_rate": 0.01, "loss": 1.9541, "step": 81291 }, { "epoch": 8.358420727945713, "grad_norm": 0.051333289593458176, "learning_rate": 0.01, "loss": 1.9463, "step": 81294 }, { "epoch": 8.358729179518816, "grad_norm": 0.08162673562765121, "learning_rate": 0.01, "loss": 1.957, "step": 81297 }, { "epoch": 8.359037631091919, "grad_norm": 0.04508831351995468, "learning_rate": 0.01, "loss": 1.9287, "step": 81300 }, { "epoch": 8.359346082665022, "grad_norm": 0.062075745314359665, "learning_rate": 0.01, "loss": 1.9575, "step": 81303 }, { "epoch": 8.359654534238125, "grad_norm": 0.055787600576877594, "learning_rate": 0.01, "loss": 1.9381, "step": 81306 }, { "epoch": 8.359962985811228, "grad_norm": 0.05872397869825363, "learning_rate": 0.01, "loss": 1.921, "step": 81309 }, { "epoch": 8.36027143738433, "grad_norm": 0.048803798854351044, "learning_rate": 0.01, "loss": 1.9503, "step": 81312 }, { "epoch": 8.360579888957433, "grad_norm": 0.03489650413393974, "learning_rate": 0.01, "loss": 1.9722, "step": 81315 }, { "epoch": 8.360888340530536, "grad_norm": 0.050747793167829514, "learning_rate": 0.01, "loss": 1.9682, "step": 81318 }, { "epoch": 8.36119679210364, "grad_norm": 0.08788271993398666, "learning_rate": 0.01, "loss": 1.9463, "step": 81321 }, { "epoch": 8.361505243676742, "grad_norm": 0.11580553650856018, "learning_rate": 0.01, "loss": 1.9378, "step": 81324 }, { "epoch": 8.361813695249845, "grad_norm": 0.05664614588022232, "learning_rate": 0.01, "loss": 1.9549, "step": 81327 }, { "epoch": 8.36212214682295, "grad_norm": 0.07168247550725937, "learning_rate": 0.01, "loss": 1.9375, "step": 81330 }, { "epoch": 8.362430598396053, "grad_norm": 0.0754905566573143, "learning_rate": 0.01, "loss": 1.9635, "step": 81333 }, { "epoch": 8.362739049969155, "grad_norm": 0.055534813553094864, "learning_rate": 0.01, "loss": 1.9551, "step": 81336 }, { "epoch": 8.363047501542258, "grad_norm": 0.13377831876277924, "learning_rate": 0.01, "loss": 1.9673, "step": 81339 }, { "epoch": 8.363355953115361, "grad_norm": 0.06023402139544487, "learning_rate": 0.01, "loss": 1.9536, "step": 81342 }, { "epoch": 8.363664404688464, "grad_norm": 0.05767045170068741, "learning_rate": 0.01, "loss": 1.9322, "step": 81345 }, { "epoch": 8.363972856261567, "grad_norm": 0.059322405606508255, "learning_rate": 0.01, "loss": 1.9672, "step": 81348 }, { "epoch": 8.36428130783467, "grad_norm": 0.042639970779418945, "learning_rate": 0.01, "loss": 1.9639, "step": 81351 }, { "epoch": 8.364589759407773, "grad_norm": 0.05120370537042618, "learning_rate": 0.01, "loss": 1.9318, "step": 81354 }, { "epoch": 8.364898210980876, "grad_norm": 0.04822765290737152, "learning_rate": 0.01, "loss": 1.957, "step": 81357 }, { "epoch": 8.365206662553978, "grad_norm": 0.05606779083609581, "learning_rate": 0.01, "loss": 1.932, "step": 81360 }, { "epoch": 8.365515114127081, "grad_norm": 0.05448019132018089, "learning_rate": 0.01, "loss": 1.9593, "step": 81363 }, { "epoch": 8.365823565700184, "grad_norm": 0.036996424198150635, "learning_rate": 0.01, "loss": 1.9438, "step": 81366 }, { "epoch": 8.366132017273289, "grad_norm": 0.03727582097053528, "learning_rate": 0.01, "loss": 1.9357, "step": 81369 }, { "epoch": 8.366440468846392, "grad_norm": 0.04183301329612732, "learning_rate": 0.01, "loss": 1.9761, "step": 81372 }, { "epoch": 8.366748920419495, "grad_norm": 0.06489592790603638, "learning_rate": 0.01, "loss": 1.9359, "step": 81375 }, { "epoch": 8.367057371992598, "grad_norm": 0.19914110004901886, "learning_rate": 0.01, "loss": 1.9475, "step": 81378 }, { "epoch": 8.3673658235657, "grad_norm": 0.10219461470842361, "learning_rate": 0.01, "loss": 1.9341, "step": 81381 }, { "epoch": 8.367674275138803, "grad_norm": 0.0821671262383461, "learning_rate": 0.01, "loss": 1.9233, "step": 81384 }, { "epoch": 8.367982726711906, "grad_norm": 0.08505228161811829, "learning_rate": 0.01, "loss": 2.0058, "step": 81387 }, { "epoch": 8.36829117828501, "grad_norm": 0.07406182587146759, "learning_rate": 0.01, "loss": 1.968, "step": 81390 }, { "epoch": 8.368599629858112, "grad_norm": 0.029416732490062714, "learning_rate": 0.01, "loss": 1.9563, "step": 81393 }, { "epoch": 8.368908081431215, "grad_norm": 0.03504988178610802, "learning_rate": 0.01, "loss": 1.9686, "step": 81396 }, { "epoch": 8.369216533004318, "grad_norm": 0.03661525994539261, "learning_rate": 0.01, "loss": 1.9386, "step": 81399 }, { "epoch": 8.36952498457742, "grad_norm": 0.04680737107992172, "learning_rate": 0.01, "loss": 1.9582, "step": 81402 }, { "epoch": 8.369833436150524, "grad_norm": 0.0412558875977993, "learning_rate": 0.01, "loss": 1.9829, "step": 81405 }, { "epoch": 8.370141887723628, "grad_norm": 0.08087299019098282, "learning_rate": 0.01, "loss": 1.9175, "step": 81408 }, { "epoch": 8.370450339296731, "grad_norm": 0.07752052694559097, "learning_rate": 0.01, "loss": 1.9729, "step": 81411 }, { "epoch": 8.370758790869834, "grad_norm": 0.08101408183574677, "learning_rate": 0.01, "loss": 1.9566, "step": 81414 }, { "epoch": 8.371067242442937, "grad_norm": 0.09059862792491913, "learning_rate": 0.01, "loss": 1.9529, "step": 81417 }, { "epoch": 8.37137569401604, "grad_norm": 0.05472108721733093, "learning_rate": 0.01, "loss": 1.9465, "step": 81420 }, { "epoch": 8.371684145589143, "grad_norm": 0.05902374908328056, "learning_rate": 0.01, "loss": 1.9505, "step": 81423 }, { "epoch": 8.371992597162246, "grad_norm": 0.16040004789829254, "learning_rate": 0.01, "loss": 1.9372, "step": 81426 }, { "epoch": 8.372301048735348, "grad_norm": 0.0819641575217247, "learning_rate": 0.01, "loss": 1.9425, "step": 81429 }, { "epoch": 8.372609500308451, "grad_norm": 0.06774242967367172, "learning_rate": 0.01, "loss": 1.9652, "step": 81432 }, { "epoch": 8.372917951881554, "grad_norm": 0.06819859147071838, "learning_rate": 0.01, "loss": 1.9564, "step": 81435 }, { "epoch": 8.373226403454657, "grad_norm": 0.06440560519695282, "learning_rate": 0.01, "loss": 1.953, "step": 81438 }, { "epoch": 8.37353485502776, "grad_norm": 0.05694984272122383, "learning_rate": 0.01, "loss": 1.9497, "step": 81441 }, { "epoch": 8.373843306600863, "grad_norm": 0.060925718396902084, "learning_rate": 0.01, "loss": 1.9701, "step": 81444 }, { "epoch": 8.374151758173968, "grad_norm": 0.07545589655637741, "learning_rate": 0.01, "loss": 1.9641, "step": 81447 }, { "epoch": 8.37446020974707, "grad_norm": 0.05031956359744072, "learning_rate": 0.01, "loss": 1.9404, "step": 81450 }, { "epoch": 8.374768661320173, "grad_norm": 0.05578349158167839, "learning_rate": 0.01, "loss": 1.9621, "step": 81453 }, { "epoch": 8.375077112893276, "grad_norm": 0.04404652491211891, "learning_rate": 0.01, "loss": 1.9587, "step": 81456 }, { "epoch": 8.375385564466379, "grad_norm": 0.05373278632760048, "learning_rate": 0.01, "loss": 1.9457, "step": 81459 }, { "epoch": 8.375694016039482, "grad_norm": 0.10714561492204666, "learning_rate": 0.01, "loss": 1.9633, "step": 81462 }, { "epoch": 8.376002467612585, "grad_norm": 0.062353793531656265, "learning_rate": 0.01, "loss": 1.9357, "step": 81465 }, { "epoch": 8.376310919185688, "grad_norm": 0.10784858465194702, "learning_rate": 0.01, "loss": 1.9511, "step": 81468 }, { "epoch": 8.37661937075879, "grad_norm": 0.17207425832748413, "learning_rate": 0.01, "loss": 1.9398, "step": 81471 }, { "epoch": 8.376927822331893, "grad_norm": 0.06604339182376862, "learning_rate": 0.01, "loss": 1.9416, "step": 81474 }, { "epoch": 8.377236273904996, "grad_norm": 0.04885290563106537, "learning_rate": 0.01, "loss": 1.9859, "step": 81477 }, { "epoch": 8.3775447254781, "grad_norm": 0.03585289046168327, "learning_rate": 0.01, "loss": 1.9606, "step": 81480 }, { "epoch": 8.377853177051202, "grad_norm": 0.055384352803230286, "learning_rate": 0.01, "loss": 1.9542, "step": 81483 }, { "epoch": 8.378161628624307, "grad_norm": 0.055960532277822495, "learning_rate": 0.01, "loss": 1.9539, "step": 81486 }, { "epoch": 8.37847008019741, "grad_norm": 0.04403486102819443, "learning_rate": 0.01, "loss": 1.9775, "step": 81489 }, { "epoch": 8.378778531770513, "grad_norm": 0.057822804898023605, "learning_rate": 0.01, "loss": 1.9595, "step": 81492 }, { "epoch": 8.379086983343615, "grad_norm": 0.12054527550935745, "learning_rate": 0.01, "loss": 1.9395, "step": 81495 }, { "epoch": 8.379395434916718, "grad_norm": 0.04777431860566139, "learning_rate": 0.01, "loss": 1.9462, "step": 81498 }, { "epoch": 8.379703886489821, "grad_norm": 0.10239491611719131, "learning_rate": 0.01, "loss": 1.9567, "step": 81501 }, { "epoch": 8.380012338062924, "grad_norm": 0.04464276507496834, "learning_rate": 0.01, "loss": 1.9712, "step": 81504 }, { "epoch": 8.380320789636027, "grad_norm": 0.09886369854211807, "learning_rate": 0.01, "loss": 1.944, "step": 81507 }, { "epoch": 8.38062924120913, "grad_norm": 0.054858770221471786, "learning_rate": 0.01, "loss": 1.9442, "step": 81510 }, { "epoch": 8.380937692782233, "grad_norm": 0.05009688064455986, "learning_rate": 0.01, "loss": 1.9457, "step": 81513 }, { "epoch": 8.381246144355336, "grad_norm": 0.11478911340236664, "learning_rate": 0.01, "loss": 1.9523, "step": 81516 }, { "epoch": 8.381554595928439, "grad_norm": 0.09376154839992523, "learning_rate": 0.01, "loss": 1.961, "step": 81519 }, { "epoch": 8.381863047501541, "grad_norm": 0.049762289971113205, "learning_rate": 0.01, "loss": 1.9288, "step": 81522 }, { "epoch": 8.382171499074646, "grad_norm": 0.09793833643198013, "learning_rate": 0.01, "loss": 1.9266, "step": 81525 }, { "epoch": 8.382479950647749, "grad_norm": 0.03599982336163521, "learning_rate": 0.01, "loss": 1.9484, "step": 81528 }, { "epoch": 8.382788402220852, "grad_norm": 0.06792202591896057, "learning_rate": 0.01, "loss": 1.9365, "step": 81531 }, { "epoch": 8.383096853793955, "grad_norm": 0.051511526107788086, "learning_rate": 0.01, "loss": 1.9644, "step": 81534 }, { "epoch": 8.383405305367058, "grad_norm": 0.04394154995679855, "learning_rate": 0.01, "loss": 1.9289, "step": 81537 }, { "epoch": 8.38371375694016, "grad_norm": 0.05700390040874481, "learning_rate": 0.01, "loss": 1.9301, "step": 81540 }, { "epoch": 8.384022208513263, "grad_norm": 0.03976348415017128, "learning_rate": 0.01, "loss": 1.9689, "step": 81543 }, { "epoch": 8.384330660086366, "grad_norm": 0.06435202807188034, "learning_rate": 0.01, "loss": 1.9635, "step": 81546 }, { "epoch": 8.38463911165947, "grad_norm": 0.13295422494411469, "learning_rate": 0.01, "loss": 1.9584, "step": 81549 }, { "epoch": 8.384947563232572, "grad_norm": 0.06909388303756714, "learning_rate": 0.01, "loss": 1.9547, "step": 81552 }, { "epoch": 8.385256014805675, "grad_norm": 0.13283444941043854, "learning_rate": 0.01, "loss": 1.9627, "step": 81555 }, { "epoch": 8.385564466378778, "grad_norm": 0.07221397757530212, "learning_rate": 0.01, "loss": 1.9471, "step": 81558 }, { "epoch": 8.38587291795188, "grad_norm": 0.05165419727563858, "learning_rate": 0.01, "loss": 1.9312, "step": 81561 }, { "epoch": 8.386181369524985, "grad_norm": 0.04133574664592743, "learning_rate": 0.01, "loss": 1.9448, "step": 81564 }, { "epoch": 8.386489821098088, "grad_norm": 0.05390239879488945, "learning_rate": 0.01, "loss": 1.9484, "step": 81567 }, { "epoch": 8.386798272671191, "grad_norm": 0.043544042855501175, "learning_rate": 0.01, "loss": 1.9368, "step": 81570 }, { "epoch": 8.387106724244294, "grad_norm": 0.07669848203659058, "learning_rate": 0.01, "loss": 1.9754, "step": 81573 }, { "epoch": 8.387415175817397, "grad_norm": 0.06485964357852936, "learning_rate": 0.01, "loss": 1.9464, "step": 81576 }, { "epoch": 8.3877236273905, "grad_norm": 0.09705982357263565, "learning_rate": 0.01, "loss": 1.9309, "step": 81579 }, { "epoch": 8.388032078963603, "grad_norm": 0.04892526939511299, "learning_rate": 0.01, "loss": 1.9541, "step": 81582 }, { "epoch": 8.388340530536706, "grad_norm": 0.041197143495082855, "learning_rate": 0.01, "loss": 1.9639, "step": 81585 }, { "epoch": 8.388648982109808, "grad_norm": 0.06936054676771164, "learning_rate": 0.01, "loss": 1.9531, "step": 81588 }, { "epoch": 8.388957433682911, "grad_norm": 0.09122376143932343, "learning_rate": 0.01, "loss": 1.9695, "step": 81591 }, { "epoch": 8.389265885256014, "grad_norm": 0.05180079862475395, "learning_rate": 0.01, "loss": 1.946, "step": 81594 }, { "epoch": 8.389574336829117, "grad_norm": 0.055592991411685944, "learning_rate": 0.01, "loss": 1.9528, "step": 81597 }, { "epoch": 8.38988278840222, "grad_norm": 0.049899131059646606, "learning_rate": 0.01, "loss": 1.9438, "step": 81600 }, { "epoch": 8.390191239975325, "grad_norm": 0.04033705219626427, "learning_rate": 0.01, "loss": 1.9317, "step": 81603 }, { "epoch": 8.390499691548428, "grad_norm": 0.03256101533770561, "learning_rate": 0.01, "loss": 1.9702, "step": 81606 }, { "epoch": 8.39080814312153, "grad_norm": 0.12379596382379532, "learning_rate": 0.01, "loss": 1.9692, "step": 81609 }, { "epoch": 8.391116594694633, "grad_norm": 0.058602914214134216, "learning_rate": 0.01, "loss": 1.9647, "step": 81612 }, { "epoch": 8.391425046267736, "grad_norm": 0.03897072747349739, "learning_rate": 0.01, "loss": 1.9411, "step": 81615 }, { "epoch": 8.391733497840839, "grad_norm": 0.05257393419742584, "learning_rate": 0.01, "loss": 1.9471, "step": 81618 }, { "epoch": 8.392041949413942, "grad_norm": 0.03705655410885811, "learning_rate": 0.01, "loss": 1.9365, "step": 81621 }, { "epoch": 8.392350400987045, "grad_norm": 0.03860629349946976, "learning_rate": 0.01, "loss": 1.9522, "step": 81624 }, { "epoch": 8.392658852560148, "grad_norm": 0.05735693871974945, "learning_rate": 0.01, "loss": 1.943, "step": 81627 }, { "epoch": 8.39296730413325, "grad_norm": 0.0472671315073967, "learning_rate": 0.01, "loss": 1.9262, "step": 81630 }, { "epoch": 8.393275755706354, "grad_norm": 0.04570858180522919, "learning_rate": 0.01, "loss": 1.9721, "step": 81633 }, { "epoch": 8.393584207279456, "grad_norm": 0.12013877183198929, "learning_rate": 0.01, "loss": 1.9416, "step": 81636 }, { "epoch": 8.39389265885256, "grad_norm": 0.1255820095539093, "learning_rate": 0.01, "loss": 1.9653, "step": 81639 }, { "epoch": 8.394201110425664, "grad_norm": 0.045648593455553055, "learning_rate": 0.01, "loss": 1.9481, "step": 81642 }, { "epoch": 8.394509561998767, "grad_norm": 0.03392338007688522, "learning_rate": 0.01, "loss": 1.9403, "step": 81645 }, { "epoch": 8.39481801357187, "grad_norm": 0.03878732770681381, "learning_rate": 0.01, "loss": 1.955, "step": 81648 }, { "epoch": 8.395126465144973, "grad_norm": 0.05559149011969566, "learning_rate": 0.01, "loss": 1.9455, "step": 81651 }, { "epoch": 8.395434916718076, "grad_norm": 0.044413961470127106, "learning_rate": 0.01, "loss": 1.9487, "step": 81654 }, { "epoch": 8.395743368291178, "grad_norm": 0.0495944581925869, "learning_rate": 0.01, "loss": 1.943, "step": 81657 }, { "epoch": 8.396051819864281, "grad_norm": 0.0679146870970726, "learning_rate": 0.01, "loss": 1.9724, "step": 81660 }, { "epoch": 8.396360271437384, "grad_norm": 0.10142147541046143, "learning_rate": 0.01, "loss": 1.9574, "step": 81663 }, { "epoch": 8.396668723010487, "grad_norm": 0.056064870208501816, "learning_rate": 0.01, "loss": 1.9544, "step": 81666 }, { "epoch": 8.39697717458359, "grad_norm": 0.09618430584669113, "learning_rate": 0.01, "loss": 1.9521, "step": 81669 }, { "epoch": 8.397285626156693, "grad_norm": 0.04978022351861, "learning_rate": 0.01, "loss": 1.9655, "step": 81672 }, { "epoch": 8.397594077729796, "grad_norm": 0.05624723434448242, "learning_rate": 0.01, "loss": 1.9464, "step": 81675 }, { "epoch": 8.397902529302899, "grad_norm": 0.09562240540981293, "learning_rate": 0.01, "loss": 1.9682, "step": 81678 }, { "epoch": 8.398210980876003, "grad_norm": 0.045868344604969025, "learning_rate": 0.01, "loss": 1.9571, "step": 81681 }, { "epoch": 8.398519432449106, "grad_norm": 0.07864541560411453, "learning_rate": 0.01, "loss": 1.9569, "step": 81684 }, { "epoch": 8.398827884022209, "grad_norm": 0.106157585978508, "learning_rate": 0.01, "loss": 1.9817, "step": 81687 }, { "epoch": 8.399136335595312, "grad_norm": 0.057111166417598724, "learning_rate": 0.01, "loss": 1.9644, "step": 81690 }, { "epoch": 8.399444787168415, "grad_norm": 0.10949339717626572, "learning_rate": 0.01, "loss": 1.9339, "step": 81693 }, { "epoch": 8.399753238741518, "grad_norm": 0.05869073048233986, "learning_rate": 0.01, "loss": 1.9589, "step": 81696 }, { "epoch": 8.40006169031462, "grad_norm": 0.033000875264406204, "learning_rate": 0.01, "loss": 1.9456, "step": 81699 }, { "epoch": 8.400370141887723, "grad_norm": 0.04507824033498764, "learning_rate": 0.01, "loss": 1.9572, "step": 81702 }, { "epoch": 8.400678593460826, "grad_norm": 0.04142510145902634, "learning_rate": 0.01, "loss": 1.9361, "step": 81705 }, { "epoch": 8.40098704503393, "grad_norm": 0.0351688452064991, "learning_rate": 0.01, "loss": 1.9447, "step": 81708 }, { "epoch": 8.401295496607032, "grad_norm": 0.039388950914144516, "learning_rate": 0.01, "loss": 1.9309, "step": 81711 }, { "epoch": 8.401603948180135, "grad_norm": 0.0492679700255394, "learning_rate": 0.01, "loss": 1.9611, "step": 81714 }, { "epoch": 8.401912399753238, "grad_norm": 0.11415845900774002, "learning_rate": 0.01, "loss": 1.9295, "step": 81717 }, { "epoch": 8.402220851326343, "grad_norm": 0.06980131566524506, "learning_rate": 0.01, "loss": 1.959, "step": 81720 }, { "epoch": 8.402529302899445, "grad_norm": 0.10816799849271774, "learning_rate": 0.01, "loss": 1.9557, "step": 81723 }, { "epoch": 8.402837754472548, "grad_norm": 0.042601246386766434, "learning_rate": 0.01, "loss": 1.9524, "step": 81726 }, { "epoch": 8.403146206045651, "grad_norm": 0.040994688868522644, "learning_rate": 0.01, "loss": 1.928, "step": 81729 }, { "epoch": 8.403454657618754, "grad_norm": 0.03924047201871872, "learning_rate": 0.01, "loss": 1.956, "step": 81732 }, { "epoch": 8.403763109191857, "grad_norm": 0.045216597616672516, "learning_rate": 0.01, "loss": 1.9569, "step": 81735 }, { "epoch": 8.40407156076496, "grad_norm": 0.05718405172228813, "learning_rate": 0.01, "loss": 1.9502, "step": 81738 }, { "epoch": 8.404380012338063, "grad_norm": 0.06921820342540741, "learning_rate": 0.01, "loss": 1.9309, "step": 81741 }, { "epoch": 8.404688463911166, "grad_norm": 0.07314276695251465, "learning_rate": 0.01, "loss": 1.9607, "step": 81744 }, { "epoch": 8.404996915484269, "grad_norm": 0.09398096799850464, "learning_rate": 0.01, "loss": 1.9464, "step": 81747 }, { "epoch": 8.405305367057371, "grad_norm": 0.0631139725446701, "learning_rate": 0.01, "loss": 1.9429, "step": 81750 }, { "epoch": 8.405613818630474, "grad_norm": 0.07777784019708633, "learning_rate": 0.01, "loss": 1.9465, "step": 81753 }, { "epoch": 8.405922270203577, "grad_norm": 0.05013573169708252, "learning_rate": 0.01, "loss": 1.9548, "step": 81756 }, { "epoch": 8.406230721776682, "grad_norm": 0.12393694370985031, "learning_rate": 0.01, "loss": 1.9189, "step": 81759 }, { "epoch": 8.406539173349785, "grad_norm": 0.04600420221686363, "learning_rate": 0.01, "loss": 1.9538, "step": 81762 }, { "epoch": 8.406847624922888, "grad_norm": 0.09453027695417404, "learning_rate": 0.01, "loss": 1.9475, "step": 81765 }, { "epoch": 8.40715607649599, "grad_norm": 0.08567167818546295, "learning_rate": 0.01, "loss": 1.9628, "step": 81768 }, { "epoch": 8.407464528069093, "grad_norm": 0.06742667406797409, "learning_rate": 0.01, "loss": 1.9625, "step": 81771 }, { "epoch": 8.407772979642196, "grad_norm": 0.03893236815929413, "learning_rate": 0.01, "loss": 1.9655, "step": 81774 }, { "epoch": 8.4080814312153, "grad_norm": 0.04947619512677193, "learning_rate": 0.01, "loss": 1.9482, "step": 81777 }, { "epoch": 8.408389882788402, "grad_norm": 0.03255341574549675, "learning_rate": 0.01, "loss": 1.9539, "step": 81780 }, { "epoch": 8.408698334361505, "grad_norm": 0.04050638526678085, "learning_rate": 0.01, "loss": 1.9518, "step": 81783 }, { "epoch": 8.409006785934608, "grad_norm": 0.08423347026109695, "learning_rate": 0.01, "loss": 1.9689, "step": 81786 }, { "epoch": 8.40931523750771, "grad_norm": 0.0701289102435112, "learning_rate": 0.01, "loss": 1.9516, "step": 81789 }, { "epoch": 8.409623689080814, "grad_norm": 0.09731067717075348, "learning_rate": 0.01, "loss": 1.9283, "step": 81792 }, { "epoch": 8.409932140653916, "grad_norm": 0.07500086724758148, "learning_rate": 0.01, "loss": 1.9551, "step": 81795 }, { "epoch": 8.410240592227021, "grad_norm": 0.07082207500934601, "learning_rate": 0.01, "loss": 1.988, "step": 81798 }, { "epoch": 8.410549043800124, "grad_norm": 0.08339577168226242, "learning_rate": 0.01, "loss": 1.9477, "step": 81801 }, { "epoch": 8.410857495373227, "grad_norm": 0.04280835762619972, "learning_rate": 0.01, "loss": 1.9523, "step": 81804 }, { "epoch": 8.41116594694633, "grad_norm": 0.07703588902950287, "learning_rate": 0.01, "loss": 1.9431, "step": 81807 }, { "epoch": 8.411474398519433, "grad_norm": 0.06267133355140686, "learning_rate": 0.01, "loss": 1.953, "step": 81810 }, { "epoch": 8.411782850092536, "grad_norm": 0.07680845260620117, "learning_rate": 0.01, "loss": 1.9407, "step": 81813 }, { "epoch": 8.412091301665638, "grad_norm": 0.08015266060829163, "learning_rate": 0.01, "loss": 1.9401, "step": 81816 }, { "epoch": 8.412399753238741, "grad_norm": 0.06529653072357178, "learning_rate": 0.01, "loss": 1.9502, "step": 81819 }, { "epoch": 8.412708204811844, "grad_norm": 0.042141351848840714, "learning_rate": 0.01, "loss": 1.972, "step": 81822 }, { "epoch": 8.413016656384947, "grad_norm": 0.04604843631386757, "learning_rate": 0.01, "loss": 1.9579, "step": 81825 }, { "epoch": 8.41332510795805, "grad_norm": 0.08277709037065506, "learning_rate": 0.01, "loss": 1.9471, "step": 81828 }, { "epoch": 8.413633559531153, "grad_norm": 0.038434263318777084, "learning_rate": 0.01, "loss": 1.94, "step": 81831 }, { "epoch": 8.413942011104258, "grad_norm": 0.05674101784825325, "learning_rate": 0.01, "loss": 1.9495, "step": 81834 }, { "epoch": 8.41425046267736, "grad_norm": 0.037434451282024384, "learning_rate": 0.01, "loss": 1.9483, "step": 81837 }, { "epoch": 8.414558914250463, "grad_norm": 0.14585204422473907, "learning_rate": 0.01, "loss": 1.9321, "step": 81840 }, { "epoch": 8.414867365823566, "grad_norm": 0.12854383885860443, "learning_rate": 0.01, "loss": 1.966, "step": 81843 }, { "epoch": 8.415175817396669, "grad_norm": 0.09817156195640564, "learning_rate": 0.01, "loss": 1.9404, "step": 81846 }, { "epoch": 8.415484268969772, "grad_norm": 0.07401823252439499, "learning_rate": 0.01, "loss": 1.9235, "step": 81849 }, { "epoch": 8.415792720542875, "grad_norm": 0.038704339414834976, "learning_rate": 0.01, "loss": 1.9492, "step": 81852 }, { "epoch": 8.416101172115978, "grad_norm": 0.0486832819879055, "learning_rate": 0.01, "loss": 1.9814, "step": 81855 }, { "epoch": 8.41640962368908, "grad_norm": 0.04595676064491272, "learning_rate": 0.01, "loss": 1.9363, "step": 81858 }, { "epoch": 8.416718075262184, "grad_norm": 0.05181482061743736, "learning_rate": 0.01, "loss": 1.9672, "step": 81861 }, { "epoch": 8.417026526835286, "grad_norm": 0.05815410986542702, "learning_rate": 0.01, "loss": 1.94, "step": 81864 }, { "epoch": 8.41733497840839, "grad_norm": 0.08507470041513443, "learning_rate": 0.01, "loss": 1.9527, "step": 81867 }, { "epoch": 8.417643429981492, "grad_norm": 0.056439630687236786, "learning_rate": 0.01, "loss": 1.947, "step": 81870 }, { "epoch": 8.417951881554597, "grad_norm": 0.037034884095191956, "learning_rate": 0.01, "loss": 1.9388, "step": 81873 }, { "epoch": 8.4182603331277, "grad_norm": 0.0958690196275711, "learning_rate": 0.01, "loss": 1.9368, "step": 81876 }, { "epoch": 8.418568784700803, "grad_norm": 0.1169692724943161, "learning_rate": 0.01, "loss": 1.9369, "step": 81879 }, { "epoch": 8.418877236273905, "grad_norm": 0.06900906562805176, "learning_rate": 0.01, "loss": 1.9579, "step": 81882 }, { "epoch": 8.419185687847008, "grad_norm": 0.09297920763492584, "learning_rate": 0.01, "loss": 1.9324, "step": 81885 }, { "epoch": 8.419494139420111, "grad_norm": 0.08288362622261047, "learning_rate": 0.01, "loss": 1.931, "step": 81888 }, { "epoch": 8.419802590993214, "grad_norm": 0.09441418945789337, "learning_rate": 0.01, "loss": 1.9328, "step": 81891 }, { "epoch": 8.420111042566317, "grad_norm": 0.058979861438274384, "learning_rate": 0.01, "loss": 1.9485, "step": 81894 }, { "epoch": 8.42041949413942, "grad_norm": 0.053389038890600204, "learning_rate": 0.01, "loss": 1.9446, "step": 81897 }, { "epoch": 8.420727945712523, "grad_norm": 0.05778560787439346, "learning_rate": 0.01, "loss": 1.9454, "step": 81900 }, { "epoch": 8.421036397285626, "grad_norm": 0.0623948872089386, "learning_rate": 0.01, "loss": 1.9246, "step": 81903 }, { "epoch": 8.421344848858729, "grad_norm": 0.04240475967526436, "learning_rate": 0.01, "loss": 1.9372, "step": 81906 }, { "epoch": 8.421653300431831, "grad_norm": 0.07669229060411453, "learning_rate": 0.01, "loss": 1.9607, "step": 81909 }, { "epoch": 8.421961752004936, "grad_norm": 0.11650708317756653, "learning_rate": 0.01, "loss": 1.9581, "step": 81912 }, { "epoch": 8.422270203578039, "grad_norm": 0.13154225051403046, "learning_rate": 0.01, "loss": 1.9562, "step": 81915 }, { "epoch": 8.422578655151142, "grad_norm": 0.07503214478492737, "learning_rate": 0.01, "loss": 1.9525, "step": 81918 }, { "epoch": 8.422887106724245, "grad_norm": 0.04421877861022949, "learning_rate": 0.01, "loss": 1.9368, "step": 81921 }, { "epoch": 8.423195558297348, "grad_norm": 0.03446371480822563, "learning_rate": 0.01, "loss": 1.9403, "step": 81924 }, { "epoch": 8.42350400987045, "grad_norm": 0.03708928823471069, "learning_rate": 0.01, "loss": 1.9695, "step": 81927 }, { "epoch": 8.423812461443553, "grad_norm": 0.032508306205272675, "learning_rate": 0.01, "loss": 1.9721, "step": 81930 }, { "epoch": 8.424120913016656, "grad_norm": 0.05352804809808731, "learning_rate": 0.01, "loss": 1.9394, "step": 81933 }, { "epoch": 8.42442936458976, "grad_norm": 0.15011733770370483, "learning_rate": 0.01, "loss": 1.9549, "step": 81936 }, { "epoch": 8.424737816162862, "grad_norm": 0.07135441154241562, "learning_rate": 0.01, "loss": 1.9689, "step": 81939 }, { "epoch": 8.425046267735965, "grad_norm": 0.05304455757141113, "learning_rate": 0.01, "loss": 1.9501, "step": 81942 }, { "epoch": 8.425354719309068, "grad_norm": 0.0310453400015831, "learning_rate": 0.01, "loss": 1.9674, "step": 81945 }, { "epoch": 8.42566317088217, "grad_norm": 0.03331723436713219, "learning_rate": 0.01, "loss": 1.9516, "step": 81948 }, { "epoch": 8.425971622455275, "grad_norm": 0.13005290925502777, "learning_rate": 0.01, "loss": 1.9774, "step": 81951 }, { "epoch": 8.426280074028378, "grad_norm": 0.09037820249795914, "learning_rate": 0.01, "loss": 1.9417, "step": 81954 }, { "epoch": 8.426588525601481, "grad_norm": 0.05044369027018547, "learning_rate": 0.01, "loss": 1.9738, "step": 81957 }, { "epoch": 8.426896977174584, "grad_norm": 0.06943196058273315, "learning_rate": 0.01, "loss": 1.989, "step": 81960 }, { "epoch": 8.427205428747687, "grad_norm": 0.07625371217727661, "learning_rate": 0.01, "loss": 1.9552, "step": 81963 }, { "epoch": 8.42751388032079, "grad_norm": 0.046040359884500504, "learning_rate": 0.01, "loss": 1.9304, "step": 81966 }, { "epoch": 8.427822331893893, "grad_norm": 0.03187291696667671, "learning_rate": 0.01, "loss": 1.9616, "step": 81969 }, { "epoch": 8.428130783466996, "grad_norm": 0.030660714954137802, "learning_rate": 0.01, "loss": 1.9359, "step": 81972 }, { "epoch": 8.428439235040099, "grad_norm": 0.05950603634119034, "learning_rate": 0.01, "loss": 1.9394, "step": 81975 }, { "epoch": 8.428747686613201, "grad_norm": 0.13979393243789673, "learning_rate": 0.01, "loss": 1.9593, "step": 81978 }, { "epoch": 8.429056138186304, "grad_norm": 0.10096840560436249, "learning_rate": 0.01, "loss": 1.9168, "step": 81981 }, { "epoch": 8.429364589759407, "grad_norm": 0.070526123046875, "learning_rate": 0.01, "loss": 1.943, "step": 81984 }, { "epoch": 8.42967304133251, "grad_norm": 0.047776173800230026, "learning_rate": 0.01, "loss": 1.9491, "step": 81987 }, { "epoch": 8.429981492905615, "grad_norm": 0.03126342594623566, "learning_rate": 0.01, "loss": 1.9347, "step": 81990 }, { "epoch": 8.430289944478718, "grad_norm": 0.1021898165345192, "learning_rate": 0.01, "loss": 1.9314, "step": 81993 }, { "epoch": 8.43059839605182, "grad_norm": 0.0435447096824646, "learning_rate": 0.01, "loss": 1.9462, "step": 81996 }, { "epoch": 8.430906847624923, "grad_norm": 0.09668678045272827, "learning_rate": 0.01, "loss": 1.9566, "step": 81999 }, { "epoch": 8.431215299198026, "grad_norm": 0.03794991225004196, "learning_rate": 0.01, "loss": 1.9405, "step": 82002 }, { "epoch": 8.43152375077113, "grad_norm": 0.044686999171972275, "learning_rate": 0.01, "loss": 1.9633, "step": 82005 }, { "epoch": 8.431832202344232, "grad_norm": 0.03441571816802025, "learning_rate": 0.01, "loss": 1.963, "step": 82008 }, { "epoch": 8.432140653917335, "grad_norm": 0.0457640215754509, "learning_rate": 0.01, "loss": 1.9538, "step": 82011 }, { "epoch": 8.432449105490438, "grad_norm": 0.06535539031028748, "learning_rate": 0.01, "loss": 1.938, "step": 82014 }, { "epoch": 8.43275755706354, "grad_norm": 0.06897538155317307, "learning_rate": 0.01, "loss": 1.9511, "step": 82017 }, { "epoch": 8.433066008636644, "grad_norm": 0.07700546830892563, "learning_rate": 0.01, "loss": 1.9769, "step": 82020 }, { "epoch": 8.433374460209746, "grad_norm": 0.07638740539550781, "learning_rate": 0.01, "loss": 1.9678, "step": 82023 }, { "epoch": 8.43368291178285, "grad_norm": 0.08059564232826233, "learning_rate": 0.01, "loss": 1.9218, "step": 82026 }, { "epoch": 8.433991363355954, "grad_norm": 0.09341084212064743, "learning_rate": 0.01, "loss": 1.9492, "step": 82029 }, { "epoch": 8.434299814929057, "grad_norm": 0.05106695368885994, "learning_rate": 0.01, "loss": 1.9286, "step": 82032 }, { "epoch": 8.43460826650216, "grad_norm": 0.05583924800157547, "learning_rate": 0.01, "loss": 1.9548, "step": 82035 }, { "epoch": 8.434916718075263, "grad_norm": 0.05295472964644432, "learning_rate": 0.01, "loss": 1.9607, "step": 82038 }, { "epoch": 8.435225169648366, "grad_norm": 0.12497080117464066, "learning_rate": 0.01, "loss": 1.9693, "step": 82041 }, { "epoch": 8.435533621221468, "grad_norm": 0.08089610189199448, "learning_rate": 0.01, "loss": 1.9591, "step": 82044 }, { "epoch": 8.435842072794571, "grad_norm": 0.0737653374671936, "learning_rate": 0.01, "loss": 1.9163, "step": 82047 }, { "epoch": 8.436150524367674, "grad_norm": 0.04853172227740288, "learning_rate": 0.01, "loss": 1.99, "step": 82050 }, { "epoch": 8.436458975940777, "grad_norm": 0.032473742961883545, "learning_rate": 0.01, "loss": 1.9456, "step": 82053 }, { "epoch": 8.43676742751388, "grad_norm": 0.03237362578511238, "learning_rate": 0.01, "loss": 1.9286, "step": 82056 }, { "epoch": 8.437075879086983, "grad_norm": 0.03169579803943634, "learning_rate": 0.01, "loss": 1.9462, "step": 82059 }, { "epoch": 8.437384330660086, "grad_norm": 0.08158145844936371, "learning_rate": 0.01, "loss": 1.9416, "step": 82062 }, { "epoch": 8.437692782233189, "grad_norm": 0.11348774284124374, "learning_rate": 0.01, "loss": 1.944, "step": 82065 }, { "epoch": 8.438001233806293, "grad_norm": 0.052066583186388016, "learning_rate": 0.01, "loss": 1.9336, "step": 82068 }, { "epoch": 8.438309685379396, "grad_norm": 0.05296801030635834, "learning_rate": 0.01, "loss": 1.9459, "step": 82071 }, { "epoch": 8.438618136952499, "grad_norm": 0.07596609741449356, "learning_rate": 0.01, "loss": 1.9749, "step": 82074 }, { "epoch": 8.438926588525602, "grad_norm": 0.057574667036533356, "learning_rate": 0.01, "loss": 1.9441, "step": 82077 }, { "epoch": 8.439235040098705, "grad_norm": 0.037525586783885956, "learning_rate": 0.01, "loss": 1.9656, "step": 82080 }, { "epoch": 8.439543491671808, "grad_norm": 0.05512101203203201, "learning_rate": 0.01, "loss": 1.9409, "step": 82083 }, { "epoch": 8.43985194324491, "grad_norm": 0.09372948855161667, "learning_rate": 0.01, "loss": 1.9588, "step": 82086 }, { "epoch": 8.440160394818014, "grad_norm": 0.08818025887012482, "learning_rate": 0.01, "loss": 1.9353, "step": 82089 }, { "epoch": 8.440468846391116, "grad_norm": 0.06324193626642227, "learning_rate": 0.01, "loss": 1.9413, "step": 82092 }, { "epoch": 8.44077729796422, "grad_norm": 0.0913136675953865, "learning_rate": 0.01, "loss": 1.9391, "step": 82095 }, { "epoch": 8.441085749537322, "grad_norm": 0.05476130172610283, "learning_rate": 0.01, "loss": 1.9451, "step": 82098 }, { "epoch": 8.441394201110425, "grad_norm": 0.12479494512081146, "learning_rate": 0.01, "loss": 1.9371, "step": 82101 }, { "epoch": 8.441702652683528, "grad_norm": 0.07040181756019592, "learning_rate": 0.01, "loss": 1.9517, "step": 82104 }, { "epoch": 8.442011104256633, "grad_norm": 0.04941793903708458, "learning_rate": 0.01, "loss": 1.9578, "step": 82107 }, { "epoch": 8.442319555829735, "grad_norm": 0.03700859844684601, "learning_rate": 0.01, "loss": 1.957, "step": 82110 }, { "epoch": 8.442628007402838, "grad_norm": 0.04570907726883888, "learning_rate": 0.01, "loss": 1.9441, "step": 82113 }, { "epoch": 8.442936458975941, "grad_norm": 0.03453392907977104, "learning_rate": 0.01, "loss": 1.919, "step": 82116 }, { "epoch": 8.443244910549044, "grad_norm": 0.05497143417596817, "learning_rate": 0.01, "loss": 1.9585, "step": 82119 }, { "epoch": 8.443553362122147, "grad_norm": 0.13390731811523438, "learning_rate": 0.01, "loss": 1.9726, "step": 82122 }, { "epoch": 8.44386181369525, "grad_norm": 0.08949720859527588, "learning_rate": 0.01, "loss": 1.9278, "step": 82125 }, { "epoch": 8.444170265268353, "grad_norm": 0.08513578027486801, "learning_rate": 0.01, "loss": 1.9527, "step": 82128 }, { "epoch": 8.444478716841456, "grad_norm": 0.06023092195391655, "learning_rate": 0.01, "loss": 1.9386, "step": 82131 }, { "epoch": 8.444787168414559, "grad_norm": 0.03851139172911644, "learning_rate": 0.01, "loss": 1.9707, "step": 82134 }, { "epoch": 8.445095619987661, "grad_norm": 0.04166555404663086, "learning_rate": 0.01, "loss": 1.9592, "step": 82137 }, { "epoch": 8.445404071560764, "grad_norm": 0.09604350477457047, "learning_rate": 0.01, "loss": 1.955, "step": 82140 }, { "epoch": 8.445712523133867, "grad_norm": 0.10311726480722427, "learning_rate": 0.01, "loss": 1.9572, "step": 82143 }, { "epoch": 8.446020974706972, "grad_norm": 0.07740923017263412, "learning_rate": 0.01, "loss": 1.9476, "step": 82146 }, { "epoch": 8.446329426280075, "grad_norm": 0.08379843086004257, "learning_rate": 0.01, "loss": 1.9514, "step": 82149 }, { "epoch": 8.446637877853178, "grad_norm": 0.11364433914422989, "learning_rate": 0.01, "loss": 1.9429, "step": 82152 }, { "epoch": 8.44694632942628, "grad_norm": 0.05542511120438576, "learning_rate": 0.01, "loss": 1.9435, "step": 82155 }, { "epoch": 8.447254780999383, "grad_norm": 0.09093154221773148, "learning_rate": 0.01, "loss": 1.9572, "step": 82158 }, { "epoch": 8.447563232572486, "grad_norm": 0.034466832876205444, "learning_rate": 0.01, "loss": 1.9535, "step": 82161 }, { "epoch": 8.44787168414559, "grad_norm": 0.03495443984866142, "learning_rate": 0.01, "loss": 1.941, "step": 82164 }, { "epoch": 8.448180135718692, "grad_norm": 0.09456860274076462, "learning_rate": 0.01, "loss": 1.9625, "step": 82167 }, { "epoch": 8.448488587291795, "grad_norm": 0.04304103925824165, "learning_rate": 0.01, "loss": 1.941, "step": 82170 }, { "epoch": 8.448797038864898, "grad_norm": 0.06458231806755066, "learning_rate": 0.01, "loss": 1.9739, "step": 82173 }, { "epoch": 8.449105490438, "grad_norm": 0.0449569895863533, "learning_rate": 0.01, "loss": 1.9576, "step": 82176 }, { "epoch": 8.449413942011104, "grad_norm": 0.12885241210460663, "learning_rate": 0.01, "loss": 1.9441, "step": 82179 }, { "epoch": 8.449722393584207, "grad_norm": 0.044088032096624374, "learning_rate": 0.01, "loss": 1.9333, "step": 82182 }, { "epoch": 8.450030845157311, "grad_norm": 0.09161775559186935, "learning_rate": 0.01, "loss": 1.9587, "step": 82185 }, { "epoch": 8.450339296730414, "grad_norm": 0.03646433353424072, "learning_rate": 0.01, "loss": 1.9323, "step": 82188 }, { "epoch": 8.450647748303517, "grad_norm": 0.12289903312921524, "learning_rate": 0.01, "loss": 1.94, "step": 82191 }, { "epoch": 8.45095619987662, "grad_norm": 0.06749970465898514, "learning_rate": 0.01, "loss": 1.9522, "step": 82194 }, { "epoch": 8.451264651449723, "grad_norm": 0.11313161998987198, "learning_rate": 0.01, "loss": 1.9636, "step": 82197 }, { "epoch": 8.451573103022826, "grad_norm": 0.04492225497961044, "learning_rate": 0.01, "loss": 1.9413, "step": 82200 }, { "epoch": 8.451881554595928, "grad_norm": 0.042263831943273544, "learning_rate": 0.01, "loss": 1.9562, "step": 82203 }, { "epoch": 8.452190006169031, "grad_norm": 0.029859155416488647, "learning_rate": 0.01, "loss": 1.9534, "step": 82206 }, { "epoch": 8.452498457742134, "grad_norm": 0.0463617742061615, "learning_rate": 0.01, "loss": 1.9469, "step": 82209 }, { "epoch": 8.452806909315237, "grad_norm": 0.1206277385354042, "learning_rate": 0.01, "loss": 1.9477, "step": 82212 }, { "epoch": 8.45311536088834, "grad_norm": 0.05199189484119415, "learning_rate": 0.01, "loss": 1.9225, "step": 82215 }, { "epoch": 8.453423812461443, "grad_norm": 0.08049193769693375, "learning_rate": 0.01, "loss": 1.9419, "step": 82218 }, { "epoch": 8.453732264034546, "grad_norm": 0.07479532808065414, "learning_rate": 0.01, "loss": 1.9293, "step": 82221 }, { "epoch": 8.45404071560765, "grad_norm": 0.0769563689827919, "learning_rate": 0.01, "loss": 1.9415, "step": 82224 }, { "epoch": 8.454349167180753, "grad_norm": 0.09309972077608109, "learning_rate": 0.01, "loss": 1.9377, "step": 82227 }, { "epoch": 8.454657618753856, "grad_norm": 0.040779951959848404, "learning_rate": 0.01, "loss": 1.9414, "step": 82230 }, { "epoch": 8.45496607032696, "grad_norm": 0.060668691992759705, "learning_rate": 0.01, "loss": 1.9355, "step": 82233 }, { "epoch": 8.455274521900062, "grad_norm": 0.09466727823019028, "learning_rate": 0.01, "loss": 1.956, "step": 82236 }, { "epoch": 8.455582973473165, "grad_norm": 0.08183979243040085, "learning_rate": 0.01, "loss": 1.9477, "step": 82239 }, { "epoch": 8.455891425046268, "grad_norm": 0.10821831226348877, "learning_rate": 0.01, "loss": 1.9726, "step": 82242 }, { "epoch": 8.45619987661937, "grad_norm": 0.07753495126962662, "learning_rate": 0.01, "loss": 1.9248, "step": 82245 }, { "epoch": 8.456508328192474, "grad_norm": 0.06566669046878815, "learning_rate": 0.01, "loss": 1.9373, "step": 82248 }, { "epoch": 8.456816779765576, "grad_norm": 0.043311357498168945, "learning_rate": 0.01, "loss": 1.9587, "step": 82251 }, { "epoch": 8.45712523133868, "grad_norm": 0.06685193628072739, "learning_rate": 0.01, "loss": 1.959, "step": 82254 }, { "epoch": 8.457433682911782, "grad_norm": 0.050073493272066116, "learning_rate": 0.01, "loss": 1.9513, "step": 82257 }, { "epoch": 8.457742134484885, "grad_norm": 0.045595601201057434, "learning_rate": 0.01, "loss": 1.9572, "step": 82260 }, { "epoch": 8.45805058605799, "grad_norm": 0.11144230514764786, "learning_rate": 0.01, "loss": 1.9421, "step": 82263 }, { "epoch": 8.458359037631093, "grad_norm": 0.09206417948007584, "learning_rate": 0.01, "loss": 1.9469, "step": 82266 }, { "epoch": 8.458667489204196, "grad_norm": 0.03726569563150406, "learning_rate": 0.01, "loss": 1.9363, "step": 82269 }, { "epoch": 8.458975940777298, "grad_norm": 0.039987582713365555, "learning_rate": 0.01, "loss": 1.9626, "step": 82272 }, { "epoch": 8.459284392350401, "grad_norm": 0.09079570323228836, "learning_rate": 0.01, "loss": 1.9538, "step": 82275 }, { "epoch": 8.459592843923504, "grad_norm": 0.0509127639234066, "learning_rate": 0.01, "loss": 1.9618, "step": 82278 }, { "epoch": 8.459901295496607, "grad_norm": 0.09340399503707886, "learning_rate": 0.01, "loss": 1.9259, "step": 82281 }, { "epoch": 8.46020974706971, "grad_norm": 0.12762415409088135, "learning_rate": 0.01, "loss": 1.9497, "step": 82284 }, { "epoch": 8.460518198642813, "grad_norm": 0.12151426821947098, "learning_rate": 0.01, "loss": 1.9307, "step": 82287 }, { "epoch": 8.460826650215916, "grad_norm": 0.04861297830939293, "learning_rate": 0.01, "loss": 1.9556, "step": 82290 }, { "epoch": 8.461135101789019, "grad_norm": 0.08445765823125839, "learning_rate": 0.01, "loss": 1.9723, "step": 82293 }, { "epoch": 8.461443553362122, "grad_norm": 0.06112569943070412, "learning_rate": 0.01, "loss": 1.9357, "step": 82296 }, { "epoch": 8.461752004935224, "grad_norm": 0.04099706560373306, "learning_rate": 0.01, "loss": 1.9777, "step": 82299 }, { "epoch": 8.462060456508329, "grad_norm": 0.049014266580343246, "learning_rate": 0.01, "loss": 1.9622, "step": 82302 }, { "epoch": 8.462368908081432, "grad_norm": 0.05799461528658867, "learning_rate": 0.01, "loss": 1.9701, "step": 82305 }, { "epoch": 8.462677359654535, "grad_norm": 0.08514149487018585, "learning_rate": 0.01, "loss": 1.9999, "step": 82308 }, { "epoch": 8.462985811227638, "grad_norm": 0.07080193608999252, "learning_rate": 0.01, "loss": 1.958, "step": 82311 }, { "epoch": 8.46329426280074, "grad_norm": 0.08238495886325836, "learning_rate": 0.01, "loss": 1.9377, "step": 82314 }, { "epoch": 8.463602714373843, "grad_norm": 0.05924540013074875, "learning_rate": 0.01, "loss": 1.9135, "step": 82317 }, { "epoch": 8.463911165946946, "grad_norm": 0.0707138329744339, "learning_rate": 0.01, "loss": 1.9694, "step": 82320 }, { "epoch": 8.46421961752005, "grad_norm": 0.12265108525753021, "learning_rate": 0.01, "loss": 1.9428, "step": 82323 }, { "epoch": 8.464528069093152, "grad_norm": 0.03225354850292206, "learning_rate": 0.01, "loss": 1.9557, "step": 82326 }, { "epoch": 8.464836520666255, "grad_norm": 0.034755658358335495, "learning_rate": 0.01, "loss": 1.9403, "step": 82329 }, { "epoch": 8.465144972239358, "grad_norm": 0.05887124314904213, "learning_rate": 0.01, "loss": 1.9397, "step": 82332 }, { "epoch": 8.46545342381246, "grad_norm": 0.09077783674001694, "learning_rate": 0.01, "loss": 1.9359, "step": 82335 }, { "epoch": 8.465761875385564, "grad_norm": 0.06020389124751091, "learning_rate": 0.01, "loss": 1.9527, "step": 82338 }, { "epoch": 8.466070326958668, "grad_norm": 0.09305040538311005, "learning_rate": 0.01, "loss": 1.9649, "step": 82341 }, { "epoch": 8.466378778531771, "grad_norm": 0.06462978571653366, "learning_rate": 0.01, "loss": 1.9332, "step": 82344 }, { "epoch": 8.466687230104874, "grad_norm": 0.0358070433139801, "learning_rate": 0.01, "loss": 1.948, "step": 82347 }, { "epoch": 8.466995681677977, "grad_norm": 0.038024164736270905, "learning_rate": 0.01, "loss": 1.9608, "step": 82350 }, { "epoch": 8.46730413325108, "grad_norm": 0.06127464398741722, "learning_rate": 0.01, "loss": 1.9623, "step": 82353 }, { "epoch": 8.467612584824183, "grad_norm": 0.051408492028713226, "learning_rate": 0.01, "loss": 1.936, "step": 82356 }, { "epoch": 8.467921036397286, "grad_norm": 0.08558293431997299, "learning_rate": 0.01, "loss": 1.9401, "step": 82359 }, { "epoch": 8.468229487970389, "grad_norm": 0.06267976760864258, "learning_rate": 0.01, "loss": 1.9325, "step": 82362 }, { "epoch": 8.468537939543491, "grad_norm": 0.12160573154687881, "learning_rate": 0.01, "loss": 1.9486, "step": 82365 }, { "epoch": 8.468846391116594, "grad_norm": 0.05924427509307861, "learning_rate": 0.01, "loss": 1.9503, "step": 82368 }, { "epoch": 8.469154842689697, "grad_norm": 0.06296840310096741, "learning_rate": 0.01, "loss": 1.9757, "step": 82371 }, { "epoch": 8.4694632942628, "grad_norm": 0.031919799745082855, "learning_rate": 0.01, "loss": 1.9373, "step": 82374 }, { "epoch": 8.469771745835903, "grad_norm": 0.03885785862803459, "learning_rate": 0.01, "loss": 1.9461, "step": 82377 }, { "epoch": 8.470080197409008, "grad_norm": 0.0466669462621212, "learning_rate": 0.01, "loss": 1.9593, "step": 82380 }, { "epoch": 8.47038864898211, "grad_norm": 0.04599282145500183, "learning_rate": 0.01, "loss": 1.9342, "step": 82383 }, { "epoch": 8.470697100555213, "grad_norm": 0.040090885013341904, "learning_rate": 0.01, "loss": 1.9563, "step": 82386 }, { "epoch": 8.471005552128316, "grad_norm": 0.08816850185394287, "learning_rate": 0.01, "loss": 1.9473, "step": 82389 }, { "epoch": 8.47131400370142, "grad_norm": 0.0854404866695404, "learning_rate": 0.01, "loss": 1.951, "step": 82392 }, { "epoch": 8.471622455274522, "grad_norm": 0.045315563678741455, "learning_rate": 0.01, "loss": 1.9209, "step": 82395 }, { "epoch": 8.471930906847625, "grad_norm": 0.049634288996458054, "learning_rate": 0.01, "loss": 1.9324, "step": 82398 }, { "epoch": 8.472239358420728, "grad_norm": 0.058090757578611374, "learning_rate": 0.01, "loss": 1.9698, "step": 82401 }, { "epoch": 8.47254780999383, "grad_norm": 0.047061413526535034, "learning_rate": 0.01, "loss": 1.9345, "step": 82404 }, { "epoch": 8.472856261566934, "grad_norm": 0.04962011054158211, "learning_rate": 0.01, "loss": 1.9295, "step": 82407 }, { "epoch": 8.473164713140036, "grad_norm": 0.09965474903583527, "learning_rate": 0.01, "loss": 1.9517, "step": 82410 }, { "epoch": 8.47347316471314, "grad_norm": 0.05239838361740112, "learning_rate": 0.01, "loss": 1.9562, "step": 82413 }, { "epoch": 8.473781616286242, "grad_norm": 0.04054271802306175, "learning_rate": 0.01, "loss": 1.9465, "step": 82416 }, { "epoch": 8.474090067859347, "grad_norm": 0.04137061908841133, "learning_rate": 0.01, "loss": 1.9532, "step": 82419 }, { "epoch": 8.47439851943245, "grad_norm": 0.037401482462882996, "learning_rate": 0.01, "loss": 1.958, "step": 82422 }, { "epoch": 8.474706971005553, "grad_norm": 0.08365823328495026, "learning_rate": 0.01, "loss": 1.9805, "step": 82425 }, { "epoch": 8.475015422578656, "grad_norm": 0.03423890843987465, "learning_rate": 0.01, "loss": 1.9514, "step": 82428 }, { "epoch": 8.475323874151758, "grad_norm": 0.04772539809346199, "learning_rate": 0.01, "loss": 1.959, "step": 82431 }, { "epoch": 8.475632325724861, "grad_norm": 0.049579694867134094, "learning_rate": 0.01, "loss": 1.9738, "step": 82434 }, { "epoch": 8.475940777297964, "grad_norm": 0.06134011968970299, "learning_rate": 0.01, "loss": 1.9627, "step": 82437 }, { "epoch": 8.476249228871067, "grad_norm": 0.0950068011879921, "learning_rate": 0.01, "loss": 1.9695, "step": 82440 }, { "epoch": 8.47655768044417, "grad_norm": 0.07112467288970947, "learning_rate": 0.01, "loss": 1.9613, "step": 82443 }, { "epoch": 8.476866132017273, "grad_norm": 0.07818299531936646, "learning_rate": 0.01, "loss": 1.9288, "step": 82446 }, { "epoch": 8.477174583590376, "grad_norm": 0.09234732389450073, "learning_rate": 0.01, "loss": 1.9565, "step": 82449 }, { "epoch": 8.477483035163479, "grad_norm": 0.09724172949790955, "learning_rate": 0.01, "loss": 1.9625, "step": 82452 }, { "epoch": 8.477791486736582, "grad_norm": 0.16661600768566132, "learning_rate": 0.01, "loss": 1.9402, "step": 82455 }, { "epoch": 8.478099938309686, "grad_norm": 0.08794001489877701, "learning_rate": 0.01, "loss": 1.9578, "step": 82458 }, { "epoch": 8.478408389882789, "grad_norm": 0.03663558512926102, "learning_rate": 0.01, "loss": 1.955, "step": 82461 }, { "epoch": 8.478716841455892, "grad_norm": 0.05669325217604637, "learning_rate": 0.01, "loss": 1.9557, "step": 82464 }, { "epoch": 8.479025293028995, "grad_norm": 0.10977914929389954, "learning_rate": 0.01, "loss": 1.9332, "step": 82467 }, { "epoch": 8.479333744602098, "grad_norm": 0.08488729596138, "learning_rate": 0.01, "loss": 1.9642, "step": 82470 }, { "epoch": 8.4796421961752, "grad_norm": 0.06477433443069458, "learning_rate": 0.01, "loss": 1.9421, "step": 82473 }, { "epoch": 8.479950647748304, "grad_norm": 0.07088177651166916, "learning_rate": 0.01, "loss": 1.9506, "step": 82476 }, { "epoch": 8.480259099321406, "grad_norm": 0.052466023713350296, "learning_rate": 0.01, "loss": 1.9418, "step": 82479 }, { "epoch": 8.48056755089451, "grad_norm": 0.039188675582408905, "learning_rate": 0.01, "loss": 1.9673, "step": 82482 }, { "epoch": 8.480876002467612, "grad_norm": 0.05050719901919365, "learning_rate": 0.01, "loss": 1.9309, "step": 82485 }, { "epoch": 8.481184454040715, "grad_norm": 0.04674654081463814, "learning_rate": 0.01, "loss": 1.9725, "step": 82488 }, { "epoch": 8.481492905613818, "grad_norm": 0.04279822111129761, "learning_rate": 0.01, "loss": 1.9461, "step": 82491 }, { "epoch": 8.48180135718692, "grad_norm": 0.1351974904537201, "learning_rate": 0.01, "loss": 1.9542, "step": 82494 }, { "epoch": 8.482109808760026, "grad_norm": 0.06532531976699829, "learning_rate": 0.01, "loss": 1.9418, "step": 82497 }, { "epoch": 8.482418260333128, "grad_norm": 0.10240834206342697, "learning_rate": 0.01, "loss": 1.9489, "step": 82500 }, { "epoch": 8.482726711906231, "grad_norm": 0.04963400214910507, "learning_rate": 0.01, "loss": 1.9515, "step": 82503 }, { "epoch": 8.483035163479334, "grad_norm": 0.07292640954256058, "learning_rate": 0.01, "loss": 1.9427, "step": 82506 }, { "epoch": 8.483343615052437, "grad_norm": 0.09584075212478638, "learning_rate": 0.01, "loss": 1.9413, "step": 82509 }, { "epoch": 8.48365206662554, "grad_norm": 0.051588594913482666, "learning_rate": 0.01, "loss": 1.95, "step": 82512 }, { "epoch": 8.483960518198643, "grad_norm": 0.0447857603430748, "learning_rate": 0.01, "loss": 1.9672, "step": 82515 }, { "epoch": 8.484268969771746, "grad_norm": 0.04466879740357399, "learning_rate": 0.01, "loss": 1.9559, "step": 82518 }, { "epoch": 8.484577421344849, "grad_norm": 0.041589345782995224, "learning_rate": 0.01, "loss": 1.9537, "step": 82521 }, { "epoch": 8.484885872917951, "grad_norm": 0.1081608310341835, "learning_rate": 0.01, "loss": 1.9488, "step": 82524 }, { "epoch": 8.485194324491054, "grad_norm": 0.13699102401733398, "learning_rate": 0.01, "loss": 1.9307, "step": 82527 }, { "epoch": 8.485502776064157, "grad_norm": 0.0641539990901947, "learning_rate": 0.01, "loss": 1.9631, "step": 82530 }, { "epoch": 8.48581122763726, "grad_norm": 0.04673508554697037, "learning_rate": 0.01, "loss": 1.9442, "step": 82533 }, { "epoch": 8.486119679210365, "grad_norm": 0.04663087800145149, "learning_rate": 0.01, "loss": 1.9708, "step": 82536 }, { "epoch": 8.486428130783468, "grad_norm": 0.040559981018304825, "learning_rate": 0.01, "loss": 1.9716, "step": 82539 }, { "epoch": 8.48673658235657, "grad_norm": 0.03951507806777954, "learning_rate": 0.01, "loss": 1.952, "step": 82542 }, { "epoch": 8.487045033929673, "grad_norm": 0.04637393727898598, "learning_rate": 0.01, "loss": 1.9398, "step": 82545 }, { "epoch": 8.487353485502776, "grad_norm": 0.09550820291042328, "learning_rate": 0.01, "loss": 1.9632, "step": 82548 }, { "epoch": 8.48766193707588, "grad_norm": 0.043463677167892456, "learning_rate": 0.01, "loss": 1.959, "step": 82551 }, { "epoch": 8.487970388648982, "grad_norm": 0.04329133778810501, "learning_rate": 0.01, "loss": 1.9271, "step": 82554 }, { "epoch": 8.488278840222085, "grad_norm": 0.044975396245718, "learning_rate": 0.01, "loss": 1.9496, "step": 82557 }, { "epoch": 8.488587291795188, "grad_norm": 0.04665372148156166, "learning_rate": 0.01, "loss": 1.9445, "step": 82560 }, { "epoch": 8.48889574336829, "grad_norm": 0.06570030748844147, "learning_rate": 0.01, "loss": 1.9616, "step": 82563 }, { "epoch": 8.489204194941394, "grad_norm": 0.07015156745910645, "learning_rate": 0.01, "loss": 1.9654, "step": 82566 }, { "epoch": 8.489512646514497, "grad_norm": 0.11700818687677383, "learning_rate": 0.01, "loss": 1.9689, "step": 82569 }, { "epoch": 8.4898210980876, "grad_norm": 0.04150941222906113, "learning_rate": 0.01, "loss": 1.941, "step": 82572 }, { "epoch": 8.490129549660704, "grad_norm": 0.057442814111709595, "learning_rate": 0.01, "loss": 1.9041, "step": 82575 }, { "epoch": 8.490438001233807, "grad_norm": 0.05241570249199867, "learning_rate": 0.01, "loss": 1.9541, "step": 82578 }, { "epoch": 8.49074645280691, "grad_norm": 0.04771868884563446, "learning_rate": 0.01, "loss": 1.9191, "step": 82581 }, { "epoch": 8.491054904380013, "grad_norm": 0.038936082273721695, "learning_rate": 0.01, "loss": 1.9809, "step": 82584 }, { "epoch": 8.491363355953116, "grad_norm": 0.05392014607787132, "learning_rate": 0.01, "loss": 1.9676, "step": 82587 }, { "epoch": 8.491671807526219, "grad_norm": 0.04587749391794205, "learning_rate": 0.01, "loss": 1.937, "step": 82590 }, { "epoch": 8.491980259099321, "grad_norm": 0.0473514162003994, "learning_rate": 0.01, "loss": 1.9493, "step": 82593 }, { "epoch": 8.492288710672424, "grad_norm": 0.04507661983370781, "learning_rate": 0.01, "loss": 1.9559, "step": 82596 }, { "epoch": 8.492597162245527, "grad_norm": 0.06868678331375122, "learning_rate": 0.01, "loss": 1.9694, "step": 82599 }, { "epoch": 8.49290561381863, "grad_norm": 0.0536494106054306, "learning_rate": 0.01, "loss": 1.9519, "step": 82602 }, { "epoch": 8.493214065391733, "grad_norm": 0.06346127390861511, "learning_rate": 0.01, "loss": 1.9464, "step": 82605 }, { "epoch": 8.493522516964836, "grad_norm": 0.1364736407995224, "learning_rate": 0.01, "loss": 1.9582, "step": 82608 }, { "epoch": 8.493830968537939, "grad_norm": 0.1090131625533104, "learning_rate": 0.01, "loss": 1.9475, "step": 82611 }, { "epoch": 8.494139420111043, "grad_norm": 0.06526965647935867, "learning_rate": 0.01, "loss": 1.9519, "step": 82614 }, { "epoch": 8.494447871684146, "grad_norm": 0.03388083726167679, "learning_rate": 0.01, "loss": 1.9513, "step": 82617 }, { "epoch": 8.49475632325725, "grad_norm": 0.044526632875204086, "learning_rate": 0.01, "loss": 1.9561, "step": 82620 }, { "epoch": 8.495064774830352, "grad_norm": 0.05937231332063675, "learning_rate": 0.01, "loss": 1.9406, "step": 82623 }, { "epoch": 8.495373226403455, "grad_norm": 0.0789630115032196, "learning_rate": 0.01, "loss": 1.9366, "step": 82626 }, { "epoch": 8.495681677976558, "grad_norm": 0.10676635056734085, "learning_rate": 0.01, "loss": 1.9371, "step": 82629 }, { "epoch": 8.49599012954966, "grad_norm": 0.06259648501873016, "learning_rate": 0.01, "loss": 1.9514, "step": 82632 }, { "epoch": 8.496298581122764, "grad_norm": 0.10849929600954056, "learning_rate": 0.01, "loss": 1.9375, "step": 82635 }, { "epoch": 8.496607032695866, "grad_norm": 0.044495515525341034, "learning_rate": 0.01, "loss": 1.9657, "step": 82638 }, { "epoch": 8.49691548426897, "grad_norm": 0.09883397072553635, "learning_rate": 0.01, "loss": 1.9467, "step": 82641 }, { "epoch": 8.497223935842072, "grad_norm": 0.06384971737861633, "learning_rate": 0.01, "loss": 1.9329, "step": 82644 }, { "epoch": 8.497532387415175, "grad_norm": 0.09279752522706985, "learning_rate": 0.01, "loss": 1.9455, "step": 82647 }, { "epoch": 8.497840838988278, "grad_norm": 0.053611353039741516, "learning_rate": 0.01, "loss": 1.9509, "step": 82650 }, { "epoch": 8.498149290561383, "grad_norm": 0.05061632767319679, "learning_rate": 0.01, "loss": 1.9633, "step": 82653 }, { "epoch": 8.498457742134486, "grad_norm": 0.04062135890126228, "learning_rate": 0.01, "loss": 1.9487, "step": 82656 }, { "epoch": 8.498766193707588, "grad_norm": 0.05774087458848953, "learning_rate": 0.01, "loss": 1.9481, "step": 82659 }, { "epoch": 8.499074645280691, "grad_norm": 0.1062593087553978, "learning_rate": 0.01, "loss": 1.9379, "step": 82662 }, { "epoch": 8.499383096853794, "grad_norm": 0.058600399643182755, "learning_rate": 0.01, "loss": 1.9494, "step": 82665 }, { "epoch": 8.499691548426897, "grad_norm": 0.03446796536445618, "learning_rate": 0.01, "loss": 1.9394, "step": 82668 }, { "epoch": 8.5, "grad_norm": 0.05771895498037338, "learning_rate": 0.01, "loss": 1.9386, "step": 82671 }, { "epoch": 8.500308451573103, "grad_norm": 0.047667182981967926, "learning_rate": 0.01, "loss": 1.9225, "step": 82674 }, { "epoch": 8.500616903146206, "grad_norm": 0.05757983401417732, "learning_rate": 0.01, "loss": 1.9389, "step": 82677 }, { "epoch": 8.500925354719309, "grad_norm": 0.0631047859787941, "learning_rate": 0.01, "loss": 1.9663, "step": 82680 }, { "epoch": 8.501233806292412, "grad_norm": 0.10003430396318436, "learning_rate": 0.01, "loss": 1.959, "step": 82683 }, { "epoch": 8.501542257865514, "grad_norm": 0.04452775418758392, "learning_rate": 0.01, "loss": 1.9751, "step": 82686 }, { "epoch": 8.501850709438617, "grad_norm": 0.07562670856714249, "learning_rate": 0.01, "loss": 1.9428, "step": 82689 }, { "epoch": 8.502159161011722, "grad_norm": 0.08108774572610855, "learning_rate": 0.01, "loss": 1.9325, "step": 82692 }, { "epoch": 8.502467612584825, "grad_norm": 0.07505381852388382, "learning_rate": 0.01, "loss": 1.9493, "step": 82695 }, { "epoch": 8.502776064157928, "grad_norm": 0.09649732708930969, "learning_rate": 0.01, "loss": 1.9394, "step": 82698 }, { "epoch": 8.50308451573103, "grad_norm": 0.05110400915145874, "learning_rate": 0.01, "loss": 1.9371, "step": 82701 }, { "epoch": 8.503392967304134, "grad_norm": 0.11231815814971924, "learning_rate": 0.01, "loss": 1.9402, "step": 82704 }, { "epoch": 8.503701418877236, "grad_norm": 0.033437538892030716, "learning_rate": 0.01, "loss": 1.9523, "step": 82707 }, { "epoch": 8.50400987045034, "grad_norm": 0.09465573728084564, "learning_rate": 0.01, "loss": 1.9293, "step": 82710 }, { "epoch": 8.504318322023442, "grad_norm": 0.047320276498794556, "learning_rate": 0.01, "loss": 1.917, "step": 82713 }, { "epoch": 8.504626773596545, "grad_norm": 0.05426005274057388, "learning_rate": 0.01, "loss": 1.9427, "step": 82716 }, { "epoch": 8.504935225169648, "grad_norm": 0.1052740216255188, "learning_rate": 0.01, "loss": 1.9489, "step": 82719 }, { "epoch": 8.50524367674275, "grad_norm": 0.12666532397270203, "learning_rate": 0.01, "loss": 1.9359, "step": 82722 }, { "epoch": 8.505552128315854, "grad_norm": 0.08487075567245483, "learning_rate": 0.01, "loss": 1.962, "step": 82725 }, { "epoch": 8.505860579888957, "grad_norm": 0.05700911208987236, "learning_rate": 0.01, "loss": 1.9594, "step": 82728 }, { "epoch": 8.506169031462061, "grad_norm": 0.033154360949993134, "learning_rate": 0.01, "loss": 1.9174, "step": 82731 }, { "epoch": 8.506477483035164, "grad_norm": 0.032864026725292206, "learning_rate": 0.01, "loss": 1.9402, "step": 82734 }, { "epoch": 8.506785934608267, "grad_norm": 0.056912586092948914, "learning_rate": 0.01, "loss": 1.9676, "step": 82737 }, { "epoch": 8.50709438618137, "grad_norm": 0.07113908231258392, "learning_rate": 0.01, "loss": 1.9678, "step": 82740 }, { "epoch": 8.507402837754473, "grad_norm": 0.04963776096701622, "learning_rate": 0.01, "loss": 1.9451, "step": 82743 }, { "epoch": 8.507711289327576, "grad_norm": 0.03446223959326744, "learning_rate": 0.01, "loss": 1.9592, "step": 82746 }, { "epoch": 8.508019740900679, "grad_norm": 0.1191178485751152, "learning_rate": 0.01, "loss": 1.9461, "step": 82749 }, { "epoch": 8.508328192473781, "grad_norm": 0.09850679337978363, "learning_rate": 0.01, "loss": 1.9414, "step": 82752 }, { "epoch": 8.508636644046884, "grad_norm": 0.04549625515937805, "learning_rate": 0.01, "loss": 1.9558, "step": 82755 }, { "epoch": 8.508945095619987, "grad_norm": 0.03925361484289169, "learning_rate": 0.01, "loss": 1.9604, "step": 82758 }, { "epoch": 8.50925354719309, "grad_norm": 0.05189771577715874, "learning_rate": 0.01, "loss": 1.946, "step": 82761 }, { "epoch": 8.509561998766193, "grad_norm": 0.04734313488006592, "learning_rate": 0.01, "loss": 1.9508, "step": 82764 }, { "epoch": 8.509870450339296, "grad_norm": 0.05462914705276489, "learning_rate": 0.01, "loss": 1.9386, "step": 82767 }, { "epoch": 8.5101789019124, "grad_norm": 0.047157566994428635, "learning_rate": 0.01, "loss": 1.9447, "step": 82770 }, { "epoch": 8.510487353485503, "grad_norm": 0.03989399969577789, "learning_rate": 0.01, "loss": 1.9277, "step": 82773 }, { "epoch": 8.510795805058606, "grad_norm": 0.13090354204177856, "learning_rate": 0.01, "loss": 1.9617, "step": 82776 }, { "epoch": 8.51110425663171, "grad_norm": 0.04144233837723732, "learning_rate": 0.01, "loss": 1.9662, "step": 82779 }, { "epoch": 8.511412708204812, "grad_norm": 0.06514573842287064, "learning_rate": 0.01, "loss": 1.9291, "step": 82782 }, { "epoch": 8.511721159777915, "grad_norm": 0.0639931708574295, "learning_rate": 0.01, "loss": 1.9508, "step": 82785 }, { "epoch": 8.512029611351018, "grad_norm": 0.04106876254081726, "learning_rate": 0.01, "loss": 1.9536, "step": 82788 }, { "epoch": 8.51233806292412, "grad_norm": 0.045197829604148865, "learning_rate": 0.01, "loss": 1.9782, "step": 82791 }, { "epoch": 8.512646514497224, "grad_norm": 0.04337150603532791, "learning_rate": 0.01, "loss": 1.9301, "step": 82794 }, { "epoch": 8.512954966070327, "grad_norm": 0.04576044902205467, "learning_rate": 0.01, "loss": 1.9735, "step": 82797 }, { "epoch": 8.51326341764343, "grad_norm": 0.06863435357809067, "learning_rate": 0.01, "loss": 1.9635, "step": 82800 }, { "epoch": 8.513571869216532, "grad_norm": 0.09685313701629639, "learning_rate": 0.01, "loss": 1.9562, "step": 82803 }, { "epoch": 8.513880320789635, "grad_norm": 0.09746305644512177, "learning_rate": 0.01, "loss": 1.965, "step": 82806 }, { "epoch": 8.51418877236274, "grad_norm": 0.039198584854602814, "learning_rate": 0.01, "loss": 1.9511, "step": 82809 }, { "epoch": 8.514497223935843, "grad_norm": 0.03824876993894577, "learning_rate": 0.01, "loss": 1.9747, "step": 82812 }, { "epoch": 8.514805675508946, "grad_norm": 0.07905596494674683, "learning_rate": 0.01, "loss": 1.9666, "step": 82815 }, { "epoch": 8.515114127082049, "grad_norm": 0.1133846566081047, "learning_rate": 0.01, "loss": 1.9492, "step": 82818 }, { "epoch": 8.515422578655151, "grad_norm": 0.09092739969491959, "learning_rate": 0.01, "loss": 1.9386, "step": 82821 }, { "epoch": 8.515731030228254, "grad_norm": 0.06377000361680984, "learning_rate": 0.01, "loss": 1.9519, "step": 82824 }, { "epoch": 8.516039481801357, "grad_norm": 0.033967819064855576, "learning_rate": 0.01, "loss": 1.977, "step": 82827 }, { "epoch": 8.51634793337446, "grad_norm": 0.03668487071990967, "learning_rate": 0.01, "loss": 1.8952, "step": 82830 }, { "epoch": 8.516656384947563, "grad_norm": 0.0694398507475853, "learning_rate": 0.01, "loss": 1.9574, "step": 82833 }, { "epoch": 8.516964836520666, "grad_norm": 0.10825932025909424, "learning_rate": 0.01, "loss": 1.9561, "step": 82836 }, { "epoch": 8.517273288093769, "grad_norm": 0.12376853823661804, "learning_rate": 0.01, "loss": 1.9508, "step": 82839 }, { "epoch": 8.517581739666872, "grad_norm": 0.04124746099114418, "learning_rate": 0.01, "loss": 1.9509, "step": 82842 }, { "epoch": 8.517890191239974, "grad_norm": 0.04353870078921318, "learning_rate": 0.01, "loss": 1.94, "step": 82845 }, { "epoch": 8.51819864281308, "grad_norm": 0.04228542745113373, "learning_rate": 0.01, "loss": 1.9248, "step": 82848 }, { "epoch": 8.518507094386182, "grad_norm": 0.05087719112634659, "learning_rate": 0.01, "loss": 1.9551, "step": 82851 }, { "epoch": 8.518815545959285, "grad_norm": 0.04548172652721405, "learning_rate": 0.01, "loss": 1.9566, "step": 82854 }, { "epoch": 8.519123997532388, "grad_norm": 0.04723157361149788, "learning_rate": 0.01, "loss": 1.9494, "step": 82857 }, { "epoch": 8.51943244910549, "grad_norm": 0.1239178255200386, "learning_rate": 0.01, "loss": 1.9455, "step": 82860 }, { "epoch": 8.519740900678594, "grad_norm": 0.06999621540307999, "learning_rate": 0.01, "loss": 1.9483, "step": 82863 }, { "epoch": 8.520049352251696, "grad_norm": 0.07094637304544449, "learning_rate": 0.01, "loss": 1.9618, "step": 82866 }, { "epoch": 8.5203578038248, "grad_norm": 0.047610655426979065, "learning_rate": 0.01, "loss": 1.9386, "step": 82869 }, { "epoch": 8.520666255397902, "grad_norm": 0.06207083538174629, "learning_rate": 0.01, "loss": 1.9577, "step": 82872 }, { "epoch": 8.520974706971005, "grad_norm": 0.04032672569155693, "learning_rate": 0.01, "loss": 1.9415, "step": 82875 }, { "epoch": 8.521283158544108, "grad_norm": 0.08010223507881165, "learning_rate": 0.01, "loss": 1.9611, "step": 82878 }, { "epoch": 8.521591610117211, "grad_norm": 0.08639922738075256, "learning_rate": 0.01, "loss": 1.9365, "step": 82881 }, { "epoch": 8.521900061690314, "grad_norm": 0.05493670329451561, "learning_rate": 0.01, "loss": 1.9496, "step": 82884 }, { "epoch": 8.522208513263418, "grad_norm": 0.0760105550289154, "learning_rate": 0.01, "loss": 1.9336, "step": 82887 }, { "epoch": 8.522516964836521, "grad_norm": 0.04124101251363754, "learning_rate": 0.01, "loss": 1.9582, "step": 82890 }, { "epoch": 8.522825416409624, "grad_norm": 0.04088043421506882, "learning_rate": 0.01, "loss": 1.9527, "step": 82893 }, { "epoch": 8.523133867982727, "grad_norm": 0.05277232453227043, "learning_rate": 0.01, "loss": 1.9574, "step": 82896 }, { "epoch": 8.52344231955583, "grad_norm": 0.04177923500537872, "learning_rate": 0.01, "loss": 1.9519, "step": 82899 }, { "epoch": 8.523750771128933, "grad_norm": 0.04745470732450485, "learning_rate": 0.01, "loss": 1.95, "step": 82902 }, { "epoch": 8.524059222702036, "grad_norm": 0.16573688387870789, "learning_rate": 0.01, "loss": 1.9628, "step": 82905 }, { "epoch": 8.524367674275139, "grad_norm": 0.046021923422813416, "learning_rate": 0.01, "loss": 1.9523, "step": 82908 }, { "epoch": 8.524676125848242, "grad_norm": 0.08142309635877609, "learning_rate": 0.01, "loss": 1.945, "step": 82911 }, { "epoch": 8.524984577421344, "grad_norm": 0.05172238498926163, "learning_rate": 0.01, "loss": 1.9601, "step": 82914 }, { "epoch": 8.525293028994447, "grad_norm": 0.04434951767325401, "learning_rate": 0.01, "loss": 1.9453, "step": 82917 }, { "epoch": 8.52560148056755, "grad_norm": 0.0400237962603569, "learning_rate": 0.01, "loss": 1.9533, "step": 82920 }, { "epoch": 8.525909932140653, "grad_norm": 0.07618872076272964, "learning_rate": 0.01, "loss": 1.9592, "step": 82923 }, { "epoch": 8.526218383713758, "grad_norm": 0.05563430115580559, "learning_rate": 0.01, "loss": 1.9473, "step": 82926 }, { "epoch": 8.52652683528686, "grad_norm": 0.12297258526086807, "learning_rate": 0.01, "loss": 1.9582, "step": 82929 }, { "epoch": 8.526835286859964, "grad_norm": 0.04254676029086113, "learning_rate": 0.01, "loss": 1.9312, "step": 82932 }, { "epoch": 8.527143738433066, "grad_norm": 0.04348786920309067, "learning_rate": 0.01, "loss": 1.9384, "step": 82935 }, { "epoch": 8.52745219000617, "grad_norm": 0.0981455147266388, "learning_rate": 0.01, "loss": 1.9545, "step": 82938 }, { "epoch": 8.527760641579272, "grad_norm": 0.06708617508411407, "learning_rate": 0.01, "loss": 1.9536, "step": 82941 }, { "epoch": 8.528069093152375, "grad_norm": 0.0691312775015831, "learning_rate": 0.01, "loss": 1.9553, "step": 82944 }, { "epoch": 8.528377544725478, "grad_norm": 0.040481530129909515, "learning_rate": 0.01, "loss": 1.9569, "step": 82947 }, { "epoch": 8.52868599629858, "grad_norm": 0.05544349551200867, "learning_rate": 0.01, "loss": 1.9597, "step": 82950 }, { "epoch": 8.528994447871684, "grad_norm": 0.06614125519990921, "learning_rate": 0.01, "loss": 1.9418, "step": 82953 }, { "epoch": 8.529302899444787, "grad_norm": 0.10472485423088074, "learning_rate": 0.01, "loss": 1.969, "step": 82956 }, { "epoch": 8.52961135101789, "grad_norm": 0.11792192608118057, "learning_rate": 0.01, "loss": 1.9443, "step": 82959 }, { "epoch": 8.529919802590992, "grad_norm": 0.03551974147558212, "learning_rate": 0.01, "loss": 1.9426, "step": 82962 }, { "epoch": 8.530228254164097, "grad_norm": 0.10112152248620987, "learning_rate": 0.01, "loss": 1.9346, "step": 82965 }, { "epoch": 8.5305367057372, "grad_norm": 0.10229824483394623, "learning_rate": 0.01, "loss": 1.962, "step": 82968 }, { "epoch": 8.530845157310303, "grad_norm": 0.04930291324853897, "learning_rate": 0.01, "loss": 1.9518, "step": 82971 }, { "epoch": 8.531153608883406, "grad_norm": 0.08808545768260956, "learning_rate": 0.01, "loss": 1.9449, "step": 82974 }, { "epoch": 8.531462060456509, "grad_norm": 0.05333753675222397, "learning_rate": 0.01, "loss": 1.9227, "step": 82977 }, { "epoch": 8.531770512029611, "grad_norm": 0.039773065596818924, "learning_rate": 0.01, "loss": 1.9395, "step": 82980 }, { "epoch": 8.532078963602714, "grad_norm": 0.04518011584877968, "learning_rate": 0.01, "loss": 1.9612, "step": 82983 }, { "epoch": 8.532387415175817, "grad_norm": 0.038609880954027176, "learning_rate": 0.01, "loss": 1.9618, "step": 82986 }, { "epoch": 8.53269586674892, "grad_norm": 0.054003313183784485, "learning_rate": 0.01, "loss": 1.9589, "step": 82989 }, { "epoch": 8.533004318322023, "grad_norm": 0.08430000394582748, "learning_rate": 0.01, "loss": 1.9548, "step": 82992 }, { "epoch": 8.533312769895126, "grad_norm": 0.10585802793502808, "learning_rate": 0.01, "loss": 1.9302, "step": 82995 }, { "epoch": 8.533621221468229, "grad_norm": 0.05213607847690582, "learning_rate": 0.01, "loss": 1.9319, "step": 82998 }, { "epoch": 8.533929673041332, "grad_norm": 0.04785900563001633, "learning_rate": 0.01, "loss": 1.9881, "step": 83001 }, { "epoch": 8.534238124614436, "grad_norm": 0.041036851704120636, "learning_rate": 0.01, "loss": 1.9258, "step": 83004 }, { "epoch": 8.53454657618754, "grad_norm": 0.03290323168039322, "learning_rate": 0.01, "loss": 1.9516, "step": 83007 }, { "epoch": 8.534855027760642, "grad_norm": 0.036765702068805695, "learning_rate": 0.01, "loss": 1.9293, "step": 83010 }, { "epoch": 8.535163479333745, "grad_norm": 0.10406450927257538, "learning_rate": 0.01, "loss": 1.9563, "step": 83013 }, { "epoch": 8.535471930906848, "grad_norm": 0.040695950388908386, "learning_rate": 0.01, "loss": 1.9553, "step": 83016 }, { "epoch": 8.53578038247995, "grad_norm": 0.047379184514284134, "learning_rate": 0.01, "loss": 1.9706, "step": 83019 }, { "epoch": 8.536088834053054, "grad_norm": 0.05943119153380394, "learning_rate": 0.01, "loss": 1.9395, "step": 83022 }, { "epoch": 8.536397285626157, "grad_norm": 0.055041078478097916, "learning_rate": 0.01, "loss": 1.9828, "step": 83025 }, { "epoch": 8.53670573719926, "grad_norm": 0.041810180991888046, "learning_rate": 0.01, "loss": 1.9488, "step": 83028 }, { "epoch": 8.537014188772362, "grad_norm": 0.10506521165370941, "learning_rate": 0.01, "loss": 1.9403, "step": 83031 }, { "epoch": 8.537322640345465, "grad_norm": 0.058530621230602264, "learning_rate": 0.01, "loss": 1.9525, "step": 83034 }, { "epoch": 8.537631091918568, "grad_norm": 0.03754234313964844, "learning_rate": 0.01, "loss": 1.9457, "step": 83037 }, { "epoch": 8.537939543491671, "grad_norm": 0.03643619269132614, "learning_rate": 0.01, "loss": 1.9503, "step": 83040 }, { "epoch": 8.538247995064776, "grad_norm": 0.03979889675974846, "learning_rate": 0.01, "loss": 1.9343, "step": 83043 }, { "epoch": 8.538556446637878, "grad_norm": 0.11619020998477936, "learning_rate": 0.01, "loss": 1.9232, "step": 83046 }, { "epoch": 8.538864898210981, "grad_norm": 0.06696553528308868, "learning_rate": 0.01, "loss": 1.9566, "step": 83049 }, { "epoch": 8.539173349784084, "grad_norm": 0.05368531122803688, "learning_rate": 0.01, "loss": 1.9611, "step": 83052 }, { "epoch": 8.539481801357187, "grad_norm": 0.13212686777114868, "learning_rate": 0.01, "loss": 1.9604, "step": 83055 }, { "epoch": 8.53979025293029, "grad_norm": 0.06416644155979156, "learning_rate": 0.01, "loss": 1.9384, "step": 83058 }, { "epoch": 8.540098704503393, "grad_norm": 0.08319517225027084, "learning_rate": 0.01, "loss": 1.9468, "step": 83061 }, { "epoch": 8.540407156076496, "grad_norm": 0.05614497885107994, "learning_rate": 0.01, "loss": 1.9285, "step": 83064 }, { "epoch": 8.540715607649599, "grad_norm": 0.048746507614851, "learning_rate": 0.01, "loss": 1.9599, "step": 83067 }, { "epoch": 8.541024059222702, "grad_norm": 0.0499294176697731, "learning_rate": 0.01, "loss": 1.9377, "step": 83070 }, { "epoch": 8.541332510795804, "grad_norm": 0.041048891842365265, "learning_rate": 0.01, "loss": 1.9381, "step": 83073 }, { "epoch": 8.541640962368907, "grad_norm": 0.038432419300079346, "learning_rate": 0.01, "loss": 1.9607, "step": 83076 }, { "epoch": 8.54194941394201, "grad_norm": 0.03735635429620743, "learning_rate": 0.01, "loss": 1.9654, "step": 83079 }, { "epoch": 8.542257865515115, "grad_norm": 0.11857511103153229, "learning_rate": 0.01, "loss": 1.9719, "step": 83082 }, { "epoch": 8.542566317088218, "grad_norm": 0.0405680276453495, "learning_rate": 0.01, "loss": 1.9374, "step": 83085 }, { "epoch": 8.54287476866132, "grad_norm": 0.09402373433113098, "learning_rate": 0.01, "loss": 1.9525, "step": 83088 }, { "epoch": 8.543183220234424, "grad_norm": 0.14297622442245483, "learning_rate": 0.01, "loss": 1.966, "step": 83091 }, { "epoch": 8.543491671807526, "grad_norm": 0.0900854766368866, "learning_rate": 0.01, "loss": 1.9357, "step": 83094 }, { "epoch": 8.54380012338063, "grad_norm": 0.057596318423748016, "learning_rate": 0.01, "loss": 1.9477, "step": 83097 }, { "epoch": 8.544108574953732, "grad_norm": 0.04268134385347366, "learning_rate": 0.01, "loss": 1.9591, "step": 83100 }, { "epoch": 8.544417026526835, "grad_norm": 0.05697793513536453, "learning_rate": 0.01, "loss": 1.9592, "step": 83103 }, { "epoch": 8.544725478099938, "grad_norm": 0.055229440331459045, "learning_rate": 0.01, "loss": 1.9178, "step": 83106 }, { "epoch": 8.54503392967304, "grad_norm": 0.04192070662975311, "learning_rate": 0.01, "loss": 1.9743, "step": 83109 }, { "epoch": 8.545342381246144, "grad_norm": 0.14123336970806122, "learning_rate": 0.01, "loss": 1.9421, "step": 83112 }, { "epoch": 8.545650832819247, "grad_norm": 0.12341219931840897, "learning_rate": 0.01, "loss": 1.9617, "step": 83115 }, { "epoch": 8.54595928439235, "grad_norm": 0.06125897914171219, "learning_rate": 0.01, "loss": 1.9383, "step": 83118 }, { "epoch": 8.546267735965454, "grad_norm": 0.04034245386719704, "learning_rate": 0.01, "loss": 1.9325, "step": 83121 }, { "epoch": 8.546576187538557, "grad_norm": 0.03154856339097023, "learning_rate": 0.01, "loss": 1.9459, "step": 83124 }, { "epoch": 8.54688463911166, "grad_norm": 0.0465167835354805, "learning_rate": 0.01, "loss": 1.9685, "step": 83127 }, { "epoch": 8.547193090684763, "grad_norm": 0.07463836669921875, "learning_rate": 0.01, "loss": 1.9495, "step": 83130 }, { "epoch": 8.547501542257866, "grad_norm": 0.04135550931096077, "learning_rate": 0.01, "loss": 1.9425, "step": 83133 }, { "epoch": 8.547809993830969, "grad_norm": 0.07107899338006973, "learning_rate": 0.01, "loss": 1.9384, "step": 83136 }, { "epoch": 8.548118445404072, "grad_norm": 0.08107513189315796, "learning_rate": 0.01, "loss": 1.936, "step": 83139 }, { "epoch": 8.548426896977174, "grad_norm": 0.045669108629226685, "learning_rate": 0.01, "loss": 1.925, "step": 83142 }, { "epoch": 8.548735348550277, "grad_norm": 0.04469345510005951, "learning_rate": 0.01, "loss": 1.956, "step": 83145 }, { "epoch": 8.54904380012338, "grad_norm": 0.0745619535446167, "learning_rate": 0.01, "loss": 1.9285, "step": 83148 }, { "epoch": 8.549352251696483, "grad_norm": 0.06670184433460236, "learning_rate": 0.01, "loss": 1.9394, "step": 83151 }, { "epoch": 8.549660703269586, "grad_norm": 0.04800862818956375, "learning_rate": 0.01, "loss": 1.9553, "step": 83154 }, { "epoch": 8.549969154842689, "grad_norm": 0.034935347735881805, "learning_rate": 0.01, "loss": 1.9392, "step": 83157 }, { "epoch": 8.550277606415793, "grad_norm": 0.08265478909015656, "learning_rate": 0.01, "loss": 1.9771, "step": 83160 }, { "epoch": 8.550586057988896, "grad_norm": 0.11869093775749207, "learning_rate": 0.01, "loss": 1.9421, "step": 83163 }, { "epoch": 8.550894509562, "grad_norm": 0.0789896696805954, "learning_rate": 0.01, "loss": 1.9554, "step": 83166 }, { "epoch": 8.551202961135102, "grad_norm": 0.059261299669742584, "learning_rate": 0.01, "loss": 1.9782, "step": 83169 }, { "epoch": 8.551511412708205, "grad_norm": 0.03951359540224075, "learning_rate": 0.01, "loss": 1.9648, "step": 83172 }, { "epoch": 8.551819864281308, "grad_norm": 0.03497397154569626, "learning_rate": 0.01, "loss": 1.9482, "step": 83175 }, { "epoch": 8.55212831585441, "grad_norm": 0.04117455706000328, "learning_rate": 0.01, "loss": 1.9114, "step": 83178 }, { "epoch": 8.552436767427514, "grad_norm": 0.039526063948869705, "learning_rate": 0.01, "loss": 1.9399, "step": 83181 }, { "epoch": 8.552745219000617, "grad_norm": 0.09101678431034088, "learning_rate": 0.01, "loss": 1.9508, "step": 83184 }, { "epoch": 8.55305367057372, "grad_norm": 0.06288325041532516, "learning_rate": 0.01, "loss": 1.9676, "step": 83187 }, { "epoch": 8.553362122146822, "grad_norm": 0.08381551504135132, "learning_rate": 0.01, "loss": 1.9673, "step": 83190 }, { "epoch": 8.553670573719925, "grad_norm": 0.054703015834093094, "learning_rate": 0.01, "loss": 1.9501, "step": 83193 }, { "epoch": 8.553979025293028, "grad_norm": 0.12512286007404327, "learning_rate": 0.01, "loss": 1.9426, "step": 83196 }, { "epoch": 8.554287476866133, "grad_norm": 0.06523016840219498, "learning_rate": 0.01, "loss": 1.9963, "step": 83199 }, { "epoch": 8.554595928439236, "grad_norm": 0.038854289799928665, "learning_rate": 0.01, "loss": 1.9159, "step": 83202 }, { "epoch": 8.554904380012339, "grad_norm": 0.042029645293951035, "learning_rate": 0.01, "loss": 1.9417, "step": 83205 }, { "epoch": 8.555212831585441, "grad_norm": 0.04491233825683594, "learning_rate": 0.01, "loss": 1.9554, "step": 83208 }, { "epoch": 8.555521283158544, "grad_norm": 0.06444685161113739, "learning_rate": 0.01, "loss": 1.9679, "step": 83211 }, { "epoch": 8.555829734731647, "grad_norm": 0.09291630983352661, "learning_rate": 0.01, "loss": 1.9544, "step": 83214 }, { "epoch": 8.55613818630475, "grad_norm": 0.10955164581537247, "learning_rate": 0.01, "loss": 1.934, "step": 83217 }, { "epoch": 8.556446637877853, "grad_norm": 0.06862539798021317, "learning_rate": 0.01, "loss": 1.9583, "step": 83220 }, { "epoch": 8.556755089450956, "grad_norm": 0.05166905000805855, "learning_rate": 0.01, "loss": 1.9342, "step": 83223 }, { "epoch": 8.557063541024059, "grad_norm": 0.11506062000989914, "learning_rate": 0.01, "loss": 1.9493, "step": 83226 }, { "epoch": 8.557371992597162, "grad_norm": 0.042790256440639496, "learning_rate": 0.01, "loss": 1.955, "step": 83229 }, { "epoch": 8.557680444170265, "grad_norm": 0.06458380818367004, "learning_rate": 0.01, "loss": 1.942, "step": 83232 }, { "epoch": 8.557988895743367, "grad_norm": 0.1059490218758583, "learning_rate": 0.01, "loss": 1.938, "step": 83235 }, { "epoch": 8.558297347316472, "grad_norm": 0.09995119273662567, "learning_rate": 0.01, "loss": 1.958, "step": 83238 }, { "epoch": 8.558605798889575, "grad_norm": 0.06877456605434418, "learning_rate": 0.01, "loss": 1.9566, "step": 83241 }, { "epoch": 8.558914250462678, "grad_norm": 0.054978057742118835, "learning_rate": 0.01, "loss": 1.9407, "step": 83244 }, { "epoch": 8.55922270203578, "grad_norm": 0.040848322212696075, "learning_rate": 0.01, "loss": 1.9734, "step": 83247 }, { "epoch": 8.559531153608884, "grad_norm": 0.07616148144006729, "learning_rate": 0.01, "loss": 1.956, "step": 83250 }, { "epoch": 8.559839605181986, "grad_norm": 0.05610670521855354, "learning_rate": 0.01, "loss": 1.9677, "step": 83253 }, { "epoch": 8.56014805675509, "grad_norm": 0.06030314415693283, "learning_rate": 0.01, "loss": 1.9215, "step": 83256 }, { "epoch": 8.560456508328192, "grad_norm": 0.037397000938653946, "learning_rate": 0.01, "loss": 1.9309, "step": 83259 }, { "epoch": 8.560764959901295, "grad_norm": 0.08328816294670105, "learning_rate": 0.01, "loss": 1.9556, "step": 83262 }, { "epoch": 8.561073411474398, "grad_norm": 0.04315868392586708, "learning_rate": 0.01, "loss": 1.9533, "step": 83265 }, { "epoch": 8.561381863047501, "grad_norm": 0.09421800822019577, "learning_rate": 0.01, "loss": 1.9396, "step": 83268 }, { "epoch": 8.561690314620604, "grad_norm": 0.0682055726647377, "learning_rate": 0.01, "loss": 1.937, "step": 83271 }, { "epoch": 8.561998766193707, "grad_norm": 0.10536344349384308, "learning_rate": 0.01, "loss": 1.9542, "step": 83274 }, { "epoch": 8.562307217766811, "grad_norm": 0.06057734787464142, "learning_rate": 0.01, "loss": 1.9438, "step": 83277 }, { "epoch": 8.562615669339914, "grad_norm": 0.07082143425941467, "learning_rate": 0.01, "loss": 1.942, "step": 83280 }, { "epoch": 8.562924120913017, "grad_norm": 0.061276625841856, "learning_rate": 0.01, "loss": 1.9565, "step": 83283 }, { "epoch": 8.56323257248612, "grad_norm": 0.06699836999177933, "learning_rate": 0.01, "loss": 1.9481, "step": 83286 }, { "epoch": 8.563541024059223, "grad_norm": 0.055695511400699615, "learning_rate": 0.01, "loss": 1.9578, "step": 83289 }, { "epoch": 8.563849475632326, "grad_norm": 0.04054784029722214, "learning_rate": 0.01, "loss": 1.9391, "step": 83292 }, { "epoch": 8.564157927205429, "grad_norm": 0.11725812405347824, "learning_rate": 0.01, "loss": 1.9398, "step": 83295 }, { "epoch": 8.564466378778532, "grad_norm": 0.034822978079319, "learning_rate": 0.01, "loss": 1.9169, "step": 83298 }, { "epoch": 8.564774830351634, "grad_norm": 0.09544063359498978, "learning_rate": 0.01, "loss": 1.9362, "step": 83301 }, { "epoch": 8.565083281924737, "grad_norm": 0.058509718626737595, "learning_rate": 0.01, "loss": 1.944, "step": 83304 }, { "epoch": 8.56539173349784, "grad_norm": 0.06134885177016258, "learning_rate": 0.01, "loss": 1.9431, "step": 83307 }, { "epoch": 8.565700185070943, "grad_norm": 0.07492076605558395, "learning_rate": 0.01, "loss": 1.984, "step": 83310 }, { "epoch": 8.566008636644046, "grad_norm": 0.10118688642978668, "learning_rate": 0.01, "loss": 1.9543, "step": 83313 }, { "epoch": 8.56631708821715, "grad_norm": 0.18619944155216217, "learning_rate": 0.01, "loss": 1.9472, "step": 83316 }, { "epoch": 8.566625539790254, "grad_norm": 0.13170650601387024, "learning_rate": 0.01, "loss": 1.9296, "step": 83319 }, { "epoch": 8.566933991363356, "grad_norm": 0.05305859446525574, "learning_rate": 0.01, "loss": 1.9274, "step": 83322 }, { "epoch": 8.56724244293646, "grad_norm": 0.04763732850551605, "learning_rate": 0.01, "loss": 1.9503, "step": 83325 }, { "epoch": 8.567550894509562, "grad_norm": 0.05493495613336563, "learning_rate": 0.01, "loss": 1.9409, "step": 83328 }, { "epoch": 8.567859346082665, "grad_norm": 0.07272114604711533, "learning_rate": 0.01, "loss": 1.9508, "step": 83331 }, { "epoch": 8.568167797655768, "grad_norm": 0.07231943309307098, "learning_rate": 0.01, "loss": 1.9423, "step": 83334 }, { "epoch": 8.56847624922887, "grad_norm": 0.04583733528852463, "learning_rate": 0.01, "loss": 1.9603, "step": 83337 }, { "epoch": 8.568784700801974, "grad_norm": 0.04909888654947281, "learning_rate": 0.01, "loss": 1.941, "step": 83340 }, { "epoch": 8.569093152375077, "grad_norm": 0.05668563023209572, "learning_rate": 0.01, "loss": 1.9625, "step": 83343 }, { "epoch": 8.56940160394818, "grad_norm": 0.09962517023086548, "learning_rate": 0.01, "loss": 1.9601, "step": 83346 }, { "epoch": 8.569710055521282, "grad_norm": 0.0734582468867302, "learning_rate": 0.01, "loss": 1.9331, "step": 83349 }, { "epoch": 8.570018507094385, "grad_norm": 0.0973704606294632, "learning_rate": 0.01, "loss": 1.9639, "step": 83352 }, { "epoch": 8.57032695866749, "grad_norm": 0.03622818738222122, "learning_rate": 0.01, "loss": 1.9693, "step": 83355 }, { "epoch": 8.570635410240593, "grad_norm": 0.041889727115631104, "learning_rate": 0.01, "loss": 1.9552, "step": 83358 }, { "epoch": 8.570943861813696, "grad_norm": 0.0979967936873436, "learning_rate": 0.01, "loss": 1.9564, "step": 83361 }, { "epoch": 8.571252313386799, "grad_norm": 0.07424527406692505, "learning_rate": 0.01, "loss": 1.9513, "step": 83364 }, { "epoch": 8.571560764959901, "grad_norm": 0.05315127596259117, "learning_rate": 0.01, "loss": 1.8978, "step": 83367 }, { "epoch": 8.571869216533004, "grad_norm": 0.0411335751414299, "learning_rate": 0.01, "loss": 1.965, "step": 83370 }, { "epoch": 8.572177668106107, "grad_norm": 0.12241435796022415, "learning_rate": 0.01, "loss": 1.9246, "step": 83373 }, { "epoch": 8.57248611967921, "grad_norm": 0.060931794345378876, "learning_rate": 0.01, "loss": 1.938, "step": 83376 }, { "epoch": 8.572794571252313, "grad_norm": 0.04587867110967636, "learning_rate": 0.01, "loss": 1.9456, "step": 83379 }, { "epoch": 8.573103022825416, "grad_norm": 0.047924742102622986, "learning_rate": 0.01, "loss": 1.9649, "step": 83382 }, { "epoch": 8.573411474398519, "grad_norm": 0.036105889827013016, "learning_rate": 0.01, "loss": 1.9755, "step": 83385 }, { "epoch": 8.573719925971622, "grad_norm": 0.05183165520429611, "learning_rate": 0.01, "loss": 1.9591, "step": 83388 }, { "epoch": 8.574028377544725, "grad_norm": 0.05435674265027046, "learning_rate": 0.01, "loss": 1.9728, "step": 83391 }, { "epoch": 8.57433682911783, "grad_norm": 0.04219867289066315, "learning_rate": 0.01, "loss": 1.9618, "step": 83394 }, { "epoch": 8.574645280690932, "grad_norm": 0.052681926637887955, "learning_rate": 0.01, "loss": 1.9534, "step": 83397 }, { "epoch": 8.574953732264035, "grad_norm": 0.1265089213848114, "learning_rate": 0.01, "loss": 1.9363, "step": 83400 }, { "epoch": 8.575262183837138, "grad_norm": 0.12898126244544983, "learning_rate": 0.01, "loss": 1.9273, "step": 83403 }, { "epoch": 8.57557063541024, "grad_norm": 0.08191855251789093, "learning_rate": 0.01, "loss": 1.9513, "step": 83406 }, { "epoch": 8.575879086983344, "grad_norm": 0.04627726227045059, "learning_rate": 0.01, "loss": 1.9522, "step": 83409 }, { "epoch": 8.576187538556447, "grad_norm": 0.054639171808958054, "learning_rate": 0.01, "loss": 1.9672, "step": 83412 }, { "epoch": 8.57649599012955, "grad_norm": 0.043029773980379105, "learning_rate": 0.01, "loss": 1.9447, "step": 83415 }, { "epoch": 8.576804441702652, "grad_norm": 0.029653949663043022, "learning_rate": 0.01, "loss": 1.9461, "step": 83418 }, { "epoch": 8.577112893275755, "grad_norm": 0.058871399611234665, "learning_rate": 0.01, "loss": 1.9256, "step": 83421 }, { "epoch": 8.577421344848858, "grad_norm": 0.09742128849029541, "learning_rate": 0.01, "loss": 1.956, "step": 83424 }, { "epoch": 8.577729796421961, "grad_norm": 0.10765515267848969, "learning_rate": 0.01, "loss": 1.9268, "step": 83427 }, { "epoch": 8.578038247995064, "grad_norm": 0.0624842531979084, "learning_rate": 0.01, "loss": 1.9337, "step": 83430 }, { "epoch": 8.578346699568169, "grad_norm": 0.04315463826060295, "learning_rate": 0.01, "loss": 1.9312, "step": 83433 }, { "epoch": 8.578655151141271, "grad_norm": 0.040104761719703674, "learning_rate": 0.01, "loss": 1.969, "step": 83436 }, { "epoch": 8.578963602714374, "grad_norm": 0.08021046966314316, "learning_rate": 0.01, "loss": 1.9444, "step": 83439 }, { "epoch": 8.579272054287477, "grad_norm": 0.03588419780135155, "learning_rate": 0.01, "loss": 1.9457, "step": 83442 }, { "epoch": 8.57958050586058, "grad_norm": 0.035717666149139404, "learning_rate": 0.01, "loss": 1.9376, "step": 83445 }, { "epoch": 8.579888957433683, "grad_norm": 0.05417096987366676, "learning_rate": 0.01, "loss": 1.9574, "step": 83448 }, { "epoch": 8.580197409006786, "grad_norm": 0.06687122583389282, "learning_rate": 0.01, "loss": 1.9653, "step": 83451 }, { "epoch": 8.580505860579889, "grad_norm": 0.1007574051618576, "learning_rate": 0.01, "loss": 1.9621, "step": 83454 }, { "epoch": 8.580814312152992, "grad_norm": 0.03898779675364494, "learning_rate": 0.01, "loss": 1.9223, "step": 83457 }, { "epoch": 8.581122763726095, "grad_norm": 0.12776483595371246, "learning_rate": 0.01, "loss": 1.9339, "step": 83460 }, { "epoch": 8.581431215299197, "grad_norm": 0.04682155326008797, "learning_rate": 0.01, "loss": 1.9633, "step": 83463 }, { "epoch": 8.5817396668723, "grad_norm": 0.06401663273572922, "learning_rate": 0.01, "loss": 1.9471, "step": 83466 }, { "epoch": 8.582048118445403, "grad_norm": 0.05969379097223282, "learning_rate": 0.01, "loss": 1.9605, "step": 83469 }, { "epoch": 8.582356570018508, "grad_norm": 0.04335704445838928, "learning_rate": 0.01, "loss": 1.9463, "step": 83472 }, { "epoch": 8.58266502159161, "grad_norm": 0.039102621376514435, "learning_rate": 0.01, "loss": 1.9489, "step": 83475 }, { "epoch": 8.582973473164714, "grad_norm": 0.0432644747197628, "learning_rate": 0.01, "loss": 1.9412, "step": 83478 }, { "epoch": 8.583281924737816, "grad_norm": 0.11703160405158997, "learning_rate": 0.01, "loss": 1.9592, "step": 83481 }, { "epoch": 8.58359037631092, "grad_norm": 0.09045902639627457, "learning_rate": 0.01, "loss": 1.9468, "step": 83484 }, { "epoch": 8.583898827884022, "grad_norm": 0.08998353034257889, "learning_rate": 0.01, "loss": 1.9356, "step": 83487 }, { "epoch": 8.584207279457125, "grad_norm": 0.08874877542257309, "learning_rate": 0.01, "loss": 1.9483, "step": 83490 }, { "epoch": 8.584515731030228, "grad_norm": 0.06457845866680145, "learning_rate": 0.01, "loss": 1.9472, "step": 83493 }, { "epoch": 8.584824182603331, "grad_norm": 0.0741107240319252, "learning_rate": 0.01, "loss": 1.9389, "step": 83496 }, { "epoch": 8.585132634176434, "grad_norm": 0.04416229575872421, "learning_rate": 0.01, "loss": 1.9399, "step": 83499 }, { "epoch": 8.585441085749537, "grad_norm": 0.04339141771197319, "learning_rate": 0.01, "loss": 1.9341, "step": 83502 }, { "epoch": 8.58574953732264, "grad_norm": 0.04124277085065842, "learning_rate": 0.01, "loss": 1.956, "step": 83505 }, { "epoch": 8.586057988895742, "grad_norm": 0.05257042124867439, "learning_rate": 0.01, "loss": 1.9546, "step": 83508 }, { "epoch": 8.586366440468847, "grad_norm": 0.061114367097616196, "learning_rate": 0.01, "loss": 1.951, "step": 83511 }, { "epoch": 8.58667489204195, "grad_norm": 0.09313886612653732, "learning_rate": 0.01, "loss": 1.9516, "step": 83514 }, { "epoch": 8.586983343615053, "grad_norm": 0.08771451562643051, "learning_rate": 0.01, "loss": 1.9583, "step": 83517 }, { "epoch": 8.587291795188156, "grad_norm": 0.04571617394685745, "learning_rate": 0.01, "loss": 1.9547, "step": 83520 }, { "epoch": 8.587600246761259, "grad_norm": 0.033631812781095505, "learning_rate": 0.01, "loss": 1.9693, "step": 83523 }, { "epoch": 8.587908698334362, "grad_norm": 0.05788915976881981, "learning_rate": 0.01, "loss": 1.9435, "step": 83526 }, { "epoch": 8.588217149907464, "grad_norm": 0.09849164634943008, "learning_rate": 0.01, "loss": 1.9568, "step": 83529 }, { "epoch": 8.588525601480567, "grad_norm": 0.06190909817814827, "learning_rate": 0.01, "loss": 1.9335, "step": 83532 }, { "epoch": 8.58883405305367, "grad_norm": 0.05096215009689331, "learning_rate": 0.01, "loss": 1.9519, "step": 83535 }, { "epoch": 8.589142504626773, "grad_norm": 0.10473909974098206, "learning_rate": 0.01, "loss": 1.9608, "step": 83538 }, { "epoch": 8.589450956199876, "grad_norm": 0.10345686972141266, "learning_rate": 0.01, "loss": 1.9623, "step": 83541 }, { "epoch": 8.589759407772979, "grad_norm": 0.060924358665943146, "learning_rate": 0.01, "loss": 1.9425, "step": 83544 }, { "epoch": 8.590067859346082, "grad_norm": 0.06810735166072845, "learning_rate": 0.01, "loss": 1.9309, "step": 83547 }, { "epoch": 8.590376310919186, "grad_norm": 0.11324567347764969, "learning_rate": 0.01, "loss": 1.9544, "step": 83550 }, { "epoch": 8.59068476249229, "grad_norm": 0.04607374966144562, "learning_rate": 0.01, "loss": 1.9495, "step": 83553 }, { "epoch": 8.590993214065392, "grad_norm": 0.07352105528116226, "learning_rate": 0.01, "loss": 1.964, "step": 83556 }, { "epoch": 8.591301665638495, "grad_norm": 0.060928743332624435, "learning_rate": 0.01, "loss": 1.9542, "step": 83559 }, { "epoch": 8.591610117211598, "grad_norm": 0.03746924549341202, "learning_rate": 0.01, "loss": 1.9438, "step": 83562 }, { "epoch": 8.5919185687847, "grad_norm": 0.06220705434679985, "learning_rate": 0.01, "loss": 1.9249, "step": 83565 }, { "epoch": 8.592227020357804, "grad_norm": 0.07664959877729416, "learning_rate": 0.01, "loss": 1.9362, "step": 83568 }, { "epoch": 8.592535471930907, "grad_norm": 0.09033758193254471, "learning_rate": 0.01, "loss": 1.9556, "step": 83571 }, { "epoch": 8.59284392350401, "grad_norm": 0.034112896770238876, "learning_rate": 0.01, "loss": 1.9705, "step": 83574 }, { "epoch": 8.593152375077112, "grad_norm": 0.04212299734354019, "learning_rate": 0.01, "loss": 1.9537, "step": 83577 }, { "epoch": 8.593460826650215, "grad_norm": 0.08151619881391525, "learning_rate": 0.01, "loss": 1.944, "step": 83580 }, { "epoch": 8.593769278223318, "grad_norm": 0.05353982374072075, "learning_rate": 0.01, "loss": 1.9534, "step": 83583 }, { "epoch": 8.594077729796421, "grad_norm": 0.057196274399757385, "learning_rate": 0.01, "loss": 1.9354, "step": 83586 }, { "epoch": 8.594386181369526, "grad_norm": 0.03714732453227043, "learning_rate": 0.01, "loss": 1.9487, "step": 83589 }, { "epoch": 8.594694632942629, "grad_norm": 0.03743036463856697, "learning_rate": 0.01, "loss": 1.9653, "step": 83592 }, { "epoch": 8.595003084515731, "grad_norm": 0.0360550694167614, "learning_rate": 0.01, "loss": 1.9233, "step": 83595 }, { "epoch": 8.595311536088834, "grad_norm": 0.1245717853307724, "learning_rate": 0.01, "loss": 1.9338, "step": 83598 }, { "epoch": 8.595619987661937, "grad_norm": 0.049492839723825455, "learning_rate": 0.01, "loss": 1.946, "step": 83601 }, { "epoch": 8.59592843923504, "grad_norm": 0.06890092045068741, "learning_rate": 0.01, "loss": 1.9558, "step": 83604 }, { "epoch": 8.596236890808143, "grad_norm": 0.049979422241449356, "learning_rate": 0.01, "loss": 1.9593, "step": 83607 }, { "epoch": 8.596545342381246, "grad_norm": 0.08141881227493286, "learning_rate": 0.01, "loss": 1.9587, "step": 83610 }, { "epoch": 8.596853793954349, "grad_norm": 0.06307000666856766, "learning_rate": 0.01, "loss": 1.953, "step": 83613 }, { "epoch": 8.597162245527452, "grad_norm": 0.057910919189453125, "learning_rate": 0.01, "loss": 1.9442, "step": 83616 }, { "epoch": 8.597470697100555, "grad_norm": 0.03516214340925217, "learning_rate": 0.01, "loss": 1.9437, "step": 83619 }, { "epoch": 8.597779148673657, "grad_norm": 0.04284656420350075, "learning_rate": 0.01, "loss": 1.9529, "step": 83622 }, { "epoch": 8.59808760024676, "grad_norm": 0.14242172241210938, "learning_rate": 0.01, "loss": 1.9701, "step": 83625 }, { "epoch": 8.598396051819865, "grad_norm": 0.04614776000380516, "learning_rate": 0.01, "loss": 1.9345, "step": 83628 }, { "epoch": 8.598704503392968, "grad_norm": 0.03917064145207405, "learning_rate": 0.01, "loss": 1.9651, "step": 83631 }, { "epoch": 8.59901295496607, "grad_norm": 0.037492744624614716, "learning_rate": 0.01, "loss": 1.9593, "step": 83634 }, { "epoch": 8.599321406539174, "grad_norm": 0.04139791429042816, "learning_rate": 0.01, "loss": 1.9622, "step": 83637 }, { "epoch": 8.599629858112277, "grad_norm": 0.05112408474087715, "learning_rate": 0.01, "loss": 1.9324, "step": 83640 }, { "epoch": 8.59993830968538, "grad_norm": 0.034215446561574936, "learning_rate": 0.01, "loss": 1.9539, "step": 83643 }, { "epoch": 8.600246761258482, "grad_norm": 0.06597255170345306, "learning_rate": 0.01, "loss": 1.9136, "step": 83646 }, { "epoch": 8.600555212831585, "grad_norm": 0.05571454018354416, "learning_rate": 0.01, "loss": 1.9509, "step": 83649 }, { "epoch": 8.600863664404688, "grad_norm": 0.04197344928979874, "learning_rate": 0.01, "loss": 1.9376, "step": 83652 }, { "epoch": 8.601172115977791, "grad_norm": 0.04194719344377518, "learning_rate": 0.01, "loss": 1.9236, "step": 83655 }, { "epoch": 8.601480567550894, "grad_norm": 0.032722242176532745, "learning_rate": 0.01, "loss": 1.9426, "step": 83658 }, { "epoch": 8.601789019123997, "grad_norm": 0.051183219999074936, "learning_rate": 0.01, "loss": 1.9512, "step": 83661 }, { "epoch": 8.6020974706971, "grad_norm": 0.06459864228963852, "learning_rate": 0.01, "loss": 1.9444, "step": 83664 }, { "epoch": 8.602405922270204, "grad_norm": 0.07737734168767929, "learning_rate": 0.01, "loss": 1.9457, "step": 83667 }, { "epoch": 8.602714373843307, "grad_norm": 0.09721986949443817, "learning_rate": 0.01, "loss": 1.9473, "step": 83670 }, { "epoch": 8.60302282541641, "grad_norm": 0.03549128770828247, "learning_rate": 0.01, "loss": 1.9454, "step": 83673 }, { "epoch": 8.603331276989513, "grad_norm": 0.040530189871788025, "learning_rate": 0.01, "loss": 1.9549, "step": 83676 }, { "epoch": 8.603639728562616, "grad_norm": 0.05348721519112587, "learning_rate": 0.01, "loss": 1.9182, "step": 83679 }, { "epoch": 8.603948180135719, "grad_norm": 0.052718210965394974, "learning_rate": 0.01, "loss": 1.9089, "step": 83682 }, { "epoch": 8.604256631708822, "grad_norm": 0.06333748251199722, "learning_rate": 0.01, "loss": 1.9534, "step": 83685 }, { "epoch": 8.604565083281924, "grad_norm": 0.14412350952625275, "learning_rate": 0.01, "loss": 1.9629, "step": 83688 }, { "epoch": 8.604873534855027, "grad_norm": 0.11650783568620682, "learning_rate": 0.01, "loss": 1.9392, "step": 83691 }, { "epoch": 8.60518198642813, "grad_norm": 0.12182766944169998, "learning_rate": 0.01, "loss": 1.9646, "step": 83694 }, { "epoch": 8.605490438001233, "grad_norm": 0.04255716875195503, "learning_rate": 0.01, "loss": 1.9653, "step": 83697 }, { "epoch": 8.605798889574336, "grad_norm": 0.03763425350189209, "learning_rate": 0.01, "loss": 1.9473, "step": 83700 }, { "epoch": 8.606107341147439, "grad_norm": 0.03447338193655014, "learning_rate": 0.01, "loss": 1.9526, "step": 83703 }, { "epoch": 8.606415792720544, "grad_norm": 0.04566109552979469, "learning_rate": 0.01, "loss": 1.9731, "step": 83706 }, { "epoch": 8.606724244293646, "grad_norm": 0.03781317174434662, "learning_rate": 0.01, "loss": 1.9233, "step": 83709 }, { "epoch": 8.60703269586675, "grad_norm": 0.11389817297458649, "learning_rate": 0.01, "loss": 1.9328, "step": 83712 }, { "epoch": 8.607341147439852, "grad_norm": 0.058238059282302856, "learning_rate": 0.01, "loss": 1.9402, "step": 83715 }, { "epoch": 8.607649599012955, "grad_norm": 0.04476723074913025, "learning_rate": 0.01, "loss": 1.9275, "step": 83718 }, { "epoch": 8.607958050586058, "grad_norm": 0.03965180367231369, "learning_rate": 0.01, "loss": 1.9567, "step": 83721 }, { "epoch": 8.608266502159161, "grad_norm": 0.052368249744176865, "learning_rate": 0.01, "loss": 1.9395, "step": 83724 }, { "epoch": 8.608574953732264, "grad_norm": 0.039367351680994034, "learning_rate": 0.01, "loss": 1.9496, "step": 83727 }, { "epoch": 8.608883405305367, "grad_norm": 0.03978506475687027, "learning_rate": 0.01, "loss": 1.9713, "step": 83730 }, { "epoch": 8.60919185687847, "grad_norm": 0.03550339117646217, "learning_rate": 0.01, "loss": 1.9662, "step": 83733 }, { "epoch": 8.609500308451572, "grad_norm": 0.06137849763035774, "learning_rate": 0.01, "loss": 1.9762, "step": 83736 }, { "epoch": 8.609808760024675, "grad_norm": 0.10555483400821686, "learning_rate": 0.01, "loss": 1.948, "step": 83739 }, { "epoch": 8.610117211597778, "grad_norm": 0.056359391659498215, "learning_rate": 0.01, "loss": 1.9577, "step": 83742 }, { "epoch": 8.610425663170883, "grad_norm": 0.039710041135549545, "learning_rate": 0.01, "loss": 1.9315, "step": 83745 }, { "epoch": 8.610734114743986, "grad_norm": 0.03399188816547394, "learning_rate": 0.01, "loss": 1.9405, "step": 83748 }, { "epoch": 8.611042566317089, "grad_norm": 0.05377564951777458, "learning_rate": 0.01, "loss": 1.9396, "step": 83751 }, { "epoch": 8.611351017890192, "grad_norm": 0.13132654130458832, "learning_rate": 0.01, "loss": 1.9552, "step": 83754 }, { "epoch": 8.611659469463294, "grad_norm": 0.0361807644367218, "learning_rate": 0.01, "loss": 1.9536, "step": 83757 }, { "epoch": 8.611967921036397, "grad_norm": 0.1197436973452568, "learning_rate": 0.01, "loss": 1.94, "step": 83760 }, { "epoch": 8.6122763726095, "grad_norm": 0.06834107637405396, "learning_rate": 0.01, "loss": 1.9577, "step": 83763 }, { "epoch": 8.612584824182603, "grad_norm": 0.055377859622240067, "learning_rate": 0.01, "loss": 1.9559, "step": 83766 }, { "epoch": 8.612893275755706, "grad_norm": 0.03706175461411476, "learning_rate": 0.01, "loss": 1.9566, "step": 83769 }, { "epoch": 8.613201727328809, "grad_norm": 0.058211736381053925, "learning_rate": 0.01, "loss": 1.9409, "step": 83772 }, { "epoch": 8.613510178901912, "grad_norm": 0.09024802595376968, "learning_rate": 0.01, "loss": 1.9309, "step": 83775 }, { "epoch": 8.613818630475015, "grad_norm": 0.046823352575302124, "learning_rate": 0.01, "loss": 1.9155, "step": 83778 }, { "epoch": 8.61412708204812, "grad_norm": 0.049351755529642105, "learning_rate": 0.01, "loss": 1.9464, "step": 83781 }, { "epoch": 8.614435533621222, "grad_norm": 0.04153472185134888, "learning_rate": 0.01, "loss": 1.9548, "step": 83784 }, { "epoch": 8.614743985194325, "grad_norm": 0.08837446570396423, "learning_rate": 0.01, "loss": 1.9376, "step": 83787 }, { "epoch": 8.615052436767428, "grad_norm": 0.09479968249797821, "learning_rate": 0.01, "loss": 1.9404, "step": 83790 }, { "epoch": 8.61536088834053, "grad_norm": 0.07434171438217163, "learning_rate": 0.01, "loss": 1.9694, "step": 83793 }, { "epoch": 8.615669339913634, "grad_norm": 0.04010748118162155, "learning_rate": 0.01, "loss": 1.9585, "step": 83796 }, { "epoch": 8.615977791486737, "grad_norm": 0.05582146719098091, "learning_rate": 0.01, "loss": 1.9561, "step": 83799 }, { "epoch": 8.61628624305984, "grad_norm": 0.057304516434669495, "learning_rate": 0.01, "loss": 1.942, "step": 83802 }, { "epoch": 8.616594694632942, "grad_norm": 0.046966370195150375, "learning_rate": 0.01, "loss": 1.9439, "step": 83805 }, { "epoch": 8.616903146206045, "grad_norm": 0.11336352676153183, "learning_rate": 0.01, "loss": 1.9338, "step": 83808 }, { "epoch": 8.617211597779148, "grad_norm": 0.051966287195682526, "learning_rate": 0.01, "loss": 1.9383, "step": 83811 }, { "epoch": 8.617520049352251, "grad_norm": 0.03408442437648773, "learning_rate": 0.01, "loss": 1.9558, "step": 83814 }, { "epoch": 8.617828500925354, "grad_norm": 0.04126676917076111, "learning_rate": 0.01, "loss": 1.9531, "step": 83817 }, { "epoch": 8.618136952498459, "grad_norm": 0.03017881140112877, "learning_rate": 0.01, "loss": 1.9416, "step": 83820 }, { "epoch": 8.618445404071561, "grad_norm": 0.13004688918590546, "learning_rate": 0.01, "loss": 1.9419, "step": 83823 }, { "epoch": 8.618753855644664, "grad_norm": 0.06664896011352539, "learning_rate": 0.01, "loss": 1.9385, "step": 83826 }, { "epoch": 8.619062307217767, "grad_norm": 0.0494823157787323, "learning_rate": 0.01, "loss": 1.927, "step": 83829 }, { "epoch": 8.61937075879087, "grad_norm": 0.0706791952252388, "learning_rate": 0.01, "loss": 1.9374, "step": 83832 }, { "epoch": 8.619679210363973, "grad_norm": 0.08942440897226334, "learning_rate": 0.01, "loss": 1.9408, "step": 83835 }, { "epoch": 8.619987661937076, "grad_norm": 0.04947560653090477, "learning_rate": 0.01, "loss": 1.9487, "step": 83838 }, { "epoch": 8.620296113510179, "grad_norm": 0.05541004240512848, "learning_rate": 0.01, "loss": 1.938, "step": 83841 }, { "epoch": 8.620604565083282, "grad_norm": 0.0484788678586483, "learning_rate": 0.01, "loss": 1.9311, "step": 83844 }, { "epoch": 8.620913016656385, "grad_norm": 0.04355737939476967, "learning_rate": 0.01, "loss": 1.9486, "step": 83847 }, { "epoch": 8.621221468229487, "grad_norm": 0.1241140142083168, "learning_rate": 0.01, "loss": 1.9265, "step": 83850 }, { "epoch": 8.62152991980259, "grad_norm": 0.04977406561374664, "learning_rate": 0.01, "loss": 1.9403, "step": 83853 }, { "epoch": 8.621838371375693, "grad_norm": 0.10179470479488373, "learning_rate": 0.01, "loss": 1.9444, "step": 83856 }, { "epoch": 8.622146822948798, "grad_norm": 0.05046776682138443, "learning_rate": 0.01, "loss": 1.9492, "step": 83859 }, { "epoch": 8.6224552745219, "grad_norm": 0.06996992230415344, "learning_rate": 0.01, "loss": 1.943, "step": 83862 }, { "epoch": 8.622763726095004, "grad_norm": 0.04119875654578209, "learning_rate": 0.01, "loss": 1.9381, "step": 83865 }, { "epoch": 8.623072177668107, "grad_norm": 0.04418569058179855, "learning_rate": 0.01, "loss": 1.955, "step": 83868 }, { "epoch": 8.62338062924121, "grad_norm": 0.04294402897357941, "learning_rate": 0.01, "loss": 1.9551, "step": 83871 }, { "epoch": 8.623689080814312, "grad_norm": 0.054850008338689804, "learning_rate": 0.01, "loss": 1.9559, "step": 83874 }, { "epoch": 8.623997532387415, "grad_norm": 0.053541239351034164, "learning_rate": 0.01, "loss": 1.9505, "step": 83877 }, { "epoch": 8.624305983960518, "grad_norm": 0.03481874614953995, "learning_rate": 0.01, "loss": 1.948, "step": 83880 }, { "epoch": 8.624614435533621, "grad_norm": 0.05405457317829132, "learning_rate": 0.01, "loss": 1.9331, "step": 83883 }, { "epoch": 8.624922887106724, "grad_norm": 0.07214061915874481, "learning_rate": 0.01, "loss": 1.9587, "step": 83886 }, { "epoch": 8.625231338679827, "grad_norm": 0.10415913164615631, "learning_rate": 0.01, "loss": 1.9396, "step": 83889 }, { "epoch": 8.62553979025293, "grad_norm": 0.055225618183612823, "learning_rate": 0.01, "loss": 1.9529, "step": 83892 }, { "epoch": 8.625848241826032, "grad_norm": 0.05259310081601143, "learning_rate": 0.01, "loss": 1.9372, "step": 83895 }, { "epoch": 8.626156693399137, "grad_norm": 0.03452230989933014, "learning_rate": 0.01, "loss": 1.9378, "step": 83898 }, { "epoch": 8.62646514497224, "grad_norm": 0.042962346225976944, "learning_rate": 0.01, "loss": 1.9724, "step": 83901 }, { "epoch": 8.626773596545343, "grad_norm": 0.0799313634634018, "learning_rate": 0.01, "loss": 1.9507, "step": 83904 }, { "epoch": 8.627082048118446, "grad_norm": 0.0853738859295845, "learning_rate": 0.01, "loss": 1.9821, "step": 83907 }, { "epoch": 8.627390499691549, "grad_norm": 0.084406279027462, "learning_rate": 0.01, "loss": 1.9473, "step": 83910 }, { "epoch": 8.627698951264652, "grad_norm": 0.07429136335849762, "learning_rate": 0.01, "loss": 1.9435, "step": 83913 }, { "epoch": 8.628007402837754, "grad_norm": 0.10301503539085388, "learning_rate": 0.01, "loss": 1.9511, "step": 83916 }, { "epoch": 8.628315854410857, "grad_norm": 0.06669825315475464, "learning_rate": 0.01, "loss": 1.9408, "step": 83919 }, { "epoch": 8.62862430598396, "grad_norm": 0.04260312020778656, "learning_rate": 0.01, "loss": 1.9348, "step": 83922 }, { "epoch": 8.628932757557063, "grad_norm": 0.09391593933105469, "learning_rate": 0.01, "loss": 1.9442, "step": 83925 }, { "epoch": 8.629241209130166, "grad_norm": 0.06853640079498291, "learning_rate": 0.01, "loss": 1.9435, "step": 83928 }, { "epoch": 8.629549660703269, "grad_norm": 0.03541896492242813, "learning_rate": 0.01, "loss": 1.9614, "step": 83931 }, { "epoch": 8.629858112276372, "grad_norm": 0.07495969533920288, "learning_rate": 0.01, "loss": 1.9738, "step": 83934 }, { "epoch": 8.630166563849476, "grad_norm": 0.08340419828891754, "learning_rate": 0.01, "loss": 1.9477, "step": 83937 }, { "epoch": 8.63047501542258, "grad_norm": 0.0702739953994751, "learning_rate": 0.01, "loss": 1.9523, "step": 83940 }, { "epoch": 8.630783466995682, "grad_norm": 0.07156652212142944, "learning_rate": 0.01, "loss": 1.9435, "step": 83943 }, { "epoch": 8.631091918568785, "grad_norm": 0.11016754060983658, "learning_rate": 0.01, "loss": 1.9587, "step": 83946 }, { "epoch": 8.631400370141888, "grad_norm": 0.07731759548187256, "learning_rate": 0.01, "loss": 1.9311, "step": 83949 }, { "epoch": 8.63170882171499, "grad_norm": 0.0376402772963047, "learning_rate": 0.01, "loss": 1.9588, "step": 83952 }, { "epoch": 8.632017273288094, "grad_norm": 0.04498516023159027, "learning_rate": 0.01, "loss": 1.9562, "step": 83955 }, { "epoch": 8.632325724861197, "grad_norm": 0.07725208252668381, "learning_rate": 0.01, "loss": 1.9685, "step": 83958 }, { "epoch": 8.6326341764343, "grad_norm": 0.17572909593582153, "learning_rate": 0.01, "loss": 1.9886, "step": 83961 }, { "epoch": 8.632942628007402, "grad_norm": 0.058839064091444016, "learning_rate": 0.01, "loss": 1.9177, "step": 83964 }, { "epoch": 8.633251079580505, "grad_norm": 0.04480128362774849, "learning_rate": 0.01, "loss": 1.9413, "step": 83967 }, { "epoch": 8.633559531153608, "grad_norm": 0.04104230925440788, "learning_rate": 0.01, "loss": 1.958, "step": 83970 }, { "epoch": 8.633867982726711, "grad_norm": 0.045199841260910034, "learning_rate": 0.01, "loss": 1.9512, "step": 83973 }, { "epoch": 8.634176434299816, "grad_norm": 0.03659757226705551, "learning_rate": 0.01, "loss": 1.9519, "step": 83976 }, { "epoch": 8.634484885872919, "grad_norm": 0.05661317706108093, "learning_rate": 0.01, "loss": 1.9629, "step": 83979 }, { "epoch": 8.634793337446022, "grad_norm": 0.04499388858675957, "learning_rate": 0.01, "loss": 1.9843, "step": 83982 }, { "epoch": 8.635101789019124, "grad_norm": 0.15299367904663086, "learning_rate": 0.01, "loss": 1.9368, "step": 83985 }, { "epoch": 8.635410240592227, "grad_norm": 0.041990868747234344, "learning_rate": 0.01, "loss": 1.9534, "step": 83988 }, { "epoch": 8.63571869216533, "grad_norm": 0.03629782050848007, "learning_rate": 0.01, "loss": 1.9554, "step": 83991 }, { "epoch": 8.636027143738433, "grad_norm": 0.05006648600101471, "learning_rate": 0.01, "loss": 1.9407, "step": 83994 }, { "epoch": 8.636335595311536, "grad_norm": 0.043421901762485504, "learning_rate": 0.01, "loss": 1.93, "step": 83997 }, { "epoch": 8.636644046884639, "grad_norm": 0.07872863858938217, "learning_rate": 0.01, "loss": 1.9396, "step": 84000 }, { "epoch": 8.636952498457742, "grad_norm": 0.06140751764178276, "learning_rate": 0.01, "loss": 1.9486, "step": 84003 }, { "epoch": 8.637260950030845, "grad_norm": 0.04020686075091362, "learning_rate": 0.01, "loss": 1.9733, "step": 84006 }, { "epoch": 8.637569401603947, "grad_norm": 0.07382489740848541, "learning_rate": 0.01, "loss": 1.9552, "step": 84009 }, { "epoch": 8.637877853177052, "grad_norm": 0.09471908211708069, "learning_rate": 0.01, "loss": 1.9307, "step": 84012 }, { "epoch": 8.638186304750155, "grad_norm": 0.13220688700675964, "learning_rate": 0.01, "loss": 1.9687, "step": 84015 }, { "epoch": 8.638494756323258, "grad_norm": 0.10175812989473343, "learning_rate": 0.01, "loss": 1.9361, "step": 84018 }, { "epoch": 8.63880320789636, "grad_norm": 0.043288156390190125, "learning_rate": 0.01, "loss": 1.938, "step": 84021 }, { "epoch": 8.639111659469464, "grad_norm": 0.05520573630928993, "learning_rate": 0.01, "loss": 1.9666, "step": 84024 }, { "epoch": 8.639420111042567, "grad_norm": 0.046032682061195374, "learning_rate": 0.01, "loss": 1.9443, "step": 84027 }, { "epoch": 8.63972856261567, "grad_norm": 0.0387708954513073, "learning_rate": 0.01, "loss": 1.9692, "step": 84030 }, { "epoch": 8.640037014188772, "grad_norm": 0.06580737978219986, "learning_rate": 0.01, "loss": 1.953, "step": 84033 }, { "epoch": 8.640345465761875, "grad_norm": 0.038871776312589645, "learning_rate": 0.01, "loss": 1.9538, "step": 84036 }, { "epoch": 8.640653917334978, "grad_norm": 0.033866509795188904, "learning_rate": 0.01, "loss": 1.9506, "step": 84039 }, { "epoch": 8.640962368908081, "grad_norm": 0.08168267458677292, "learning_rate": 0.01, "loss": 1.9427, "step": 84042 }, { "epoch": 8.641270820481184, "grad_norm": 0.06837775558233261, "learning_rate": 0.01, "loss": 1.9586, "step": 84045 }, { "epoch": 8.641579272054287, "grad_norm": 0.15111026167869568, "learning_rate": 0.01, "loss": 1.9294, "step": 84048 }, { "epoch": 8.641887723627391, "grad_norm": 0.08160342276096344, "learning_rate": 0.01, "loss": 1.9298, "step": 84051 }, { "epoch": 8.642196175200494, "grad_norm": 0.05196947604417801, "learning_rate": 0.01, "loss": 1.941, "step": 84054 }, { "epoch": 8.642504626773597, "grad_norm": 0.04552631825208664, "learning_rate": 0.01, "loss": 1.9708, "step": 84057 }, { "epoch": 8.6428130783467, "grad_norm": 0.04029174521565437, "learning_rate": 0.01, "loss": 1.976, "step": 84060 }, { "epoch": 8.643121529919803, "grad_norm": 0.04168844595551491, "learning_rate": 0.01, "loss": 1.9316, "step": 84063 }, { "epoch": 8.643429981492906, "grad_norm": 0.0722026526927948, "learning_rate": 0.01, "loss": 1.9564, "step": 84066 }, { "epoch": 8.643738433066009, "grad_norm": 0.04248455539345741, "learning_rate": 0.01, "loss": 1.9622, "step": 84069 }, { "epoch": 8.644046884639112, "grad_norm": 0.0467931292951107, "learning_rate": 0.01, "loss": 1.9238, "step": 84072 }, { "epoch": 8.644355336212215, "grad_norm": 0.1612360030412674, "learning_rate": 0.01, "loss": 1.9366, "step": 84075 }, { "epoch": 8.644663787785317, "grad_norm": 0.1361725628376007, "learning_rate": 0.01, "loss": 1.9362, "step": 84078 }, { "epoch": 8.64497223935842, "grad_norm": 0.058937378227710724, "learning_rate": 0.01, "loss": 1.9418, "step": 84081 }, { "epoch": 8.645280690931523, "grad_norm": 0.061618443578481674, "learning_rate": 0.01, "loss": 1.9636, "step": 84084 }, { "epoch": 8.645589142504626, "grad_norm": 0.039914749562740326, "learning_rate": 0.01, "loss": 1.9695, "step": 84087 }, { "epoch": 8.64589759407773, "grad_norm": 0.04626841843128204, "learning_rate": 0.01, "loss": 1.9342, "step": 84090 }, { "epoch": 8.646206045650834, "grad_norm": 0.049574919044971466, "learning_rate": 0.01, "loss": 1.9416, "step": 84093 }, { "epoch": 8.646514497223937, "grad_norm": 0.03658651188015938, "learning_rate": 0.01, "loss": 1.9312, "step": 84096 }, { "epoch": 8.64682294879704, "grad_norm": 0.035471998155117035, "learning_rate": 0.01, "loss": 1.945, "step": 84099 }, { "epoch": 8.647131400370142, "grad_norm": 0.04843059927225113, "learning_rate": 0.01, "loss": 1.9489, "step": 84102 }, { "epoch": 8.647439851943245, "grad_norm": 0.09598714858293533, "learning_rate": 0.01, "loss": 1.9927, "step": 84105 }, { "epoch": 8.647748303516348, "grad_norm": 0.1329140067100525, "learning_rate": 0.01, "loss": 1.9621, "step": 84108 }, { "epoch": 8.648056755089451, "grad_norm": 0.04247164726257324, "learning_rate": 0.01, "loss": 1.9517, "step": 84111 }, { "epoch": 8.648365206662554, "grad_norm": 0.044990718364715576, "learning_rate": 0.01, "loss": 1.921, "step": 84114 }, { "epoch": 8.648673658235657, "grad_norm": 0.05021393671631813, "learning_rate": 0.01, "loss": 1.936, "step": 84117 }, { "epoch": 8.64898210980876, "grad_norm": 0.048792239278554916, "learning_rate": 0.01, "loss": 1.9643, "step": 84120 }, { "epoch": 8.649290561381862, "grad_norm": 0.05867401510477066, "learning_rate": 0.01, "loss": 1.9517, "step": 84123 }, { "epoch": 8.649599012954965, "grad_norm": 0.04143434017896652, "learning_rate": 0.01, "loss": 1.9197, "step": 84126 }, { "epoch": 8.64990746452807, "grad_norm": 0.040858276188373566, "learning_rate": 0.01, "loss": 1.9652, "step": 84129 }, { "epoch": 8.650215916101173, "grad_norm": 0.057876672595739365, "learning_rate": 0.01, "loss": 1.956, "step": 84132 }, { "epoch": 8.650524367674276, "grad_norm": 0.12112187594175339, "learning_rate": 0.01, "loss": 1.9472, "step": 84135 }, { "epoch": 8.650832819247379, "grad_norm": 0.07901202887296677, "learning_rate": 0.01, "loss": 1.9438, "step": 84138 }, { "epoch": 8.651141270820482, "grad_norm": 0.14830529689788818, "learning_rate": 0.01, "loss": 1.9789, "step": 84141 }, { "epoch": 8.651449722393584, "grad_norm": 0.10633882135152817, "learning_rate": 0.01, "loss": 1.9614, "step": 84144 }, { "epoch": 8.651758173966687, "grad_norm": 0.07728983461856842, "learning_rate": 0.01, "loss": 1.9364, "step": 84147 }, { "epoch": 8.65206662553979, "grad_norm": 0.03409942612051964, "learning_rate": 0.01, "loss": 1.9535, "step": 84150 }, { "epoch": 8.652375077112893, "grad_norm": 0.035978421568870544, "learning_rate": 0.01, "loss": 1.9513, "step": 84153 }, { "epoch": 8.652683528685996, "grad_norm": 0.03137461096048355, "learning_rate": 0.01, "loss": 1.96, "step": 84156 }, { "epoch": 8.652991980259099, "grad_norm": 0.05230855941772461, "learning_rate": 0.01, "loss": 1.9445, "step": 84159 }, { "epoch": 8.653300431832202, "grad_norm": 0.040534112602472305, "learning_rate": 0.01, "loss": 1.9489, "step": 84162 }, { "epoch": 8.653608883405305, "grad_norm": 0.08531369268894196, "learning_rate": 0.01, "loss": 1.9431, "step": 84165 }, { "epoch": 8.65391733497841, "grad_norm": 0.05055827647447586, "learning_rate": 0.01, "loss": 1.9498, "step": 84168 }, { "epoch": 8.654225786551512, "grad_norm": 0.13753488659858704, "learning_rate": 0.01, "loss": 1.9703, "step": 84171 }, { "epoch": 8.654534238124615, "grad_norm": 0.11392176151275635, "learning_rate": 0.01, "loss": 1.9498, "step": 84174 }, { "epoch": 8.654842689697718, "grad_norm": 0.0441092848777771, "learning_rate": 0.01, "loss": 1.9305, "step": 84177 }, { "epoch": 8.65515114127082, "grad_norm": 0.05333095043897629, "learning_rate": 0.01, "loss": 1.9394, "step": 84180 }, { "epoch": 8.655459592843924, "grad_norm": 0.08408643305301666, "learning_rate": 0.01, "loss": 1.949, "step": 84183 }, { "epoch": 8.655768044417027, "grad_norm": 0.06018614023923874, "learning_rate": 0.01, "loss": 1.9176, "step": 84186 }, { "epoch": 8.65607649599013, "grad_norm": 0.049890197813510895, "learning_rate": 0.01, "loss": 1.9413, "step": 84189 }, { "epoch": 8.656384947563232, "grad_norm": 0.06057172268629074, "learning_rate": 0.01, "loss": 1.9434, "step": 84192 }, { "epoch": 8.656693399136335, "grad_norm": 0.06060672923922539, "learning_rate": 0.01, "loss": 1.9688, "step": 84195 }, { "epoch": 8.657001850709438, "grad_norm": 0.06296031922101974, "learning_rate": 0.01, "loss": 1.9401, "step": 84198 }, { "epoch": 8.657310302282541, "grad_norm": 0.05551283806562424, "learning_rate": 0.01, "loss": 1.9497, "step": 84201 }, { "epoch": 8.657618753855644, "grad_norm": 0.044948600232601166, "learning_rate": 0.01, "loss": 1.9433, "step": 84204 }, { "epoch": 8.657927205428749, "grad_norm": 0.04862402006983757, "learning_rate": 0.01, "loss": 1.9316, "step": 84207 }, { "epoch": 8.658235657001851, "grad_norm": 0.11310559511184692, "learning_rate": 0.01, "loss": 1.9408, "step": 84210 }, { "epoch": 8.658544108574954, "grad_norm": 0.07889222353696823, "learning_rate": 0.01, "loss": 1.9577, "step": 84213 }, { "epoch": 8.658852560148057, "grad_norm": 0.12257291376590729, "learning_rate": 0.01, "loss": 1.9583, "step": 84216 }, { "epoch": 8.65916101172116, "grad_norm": 0.05437285453081131, "learning_rate": 0.01, "loss": 1.9318, "step": 84219 }, { "epoch": 8.659469463294263, "grad_norm": 0.0486476868391037, "learning_rate": 0.01, "loss": 1.9347, "step": 84222 }, { "epoch": 8.659777914867366, "grad_norm": 0.04230161011219025, "learning_rate": 0.01, "loss": 1.9271, "step": 84225 }, { "epoch": 8.660086366440469, "grad_norm": 0.05395093187689781, "learning_rate": 0.01, "loss": 1.9686, "step": 84228 }, { "epoch": 8.660394818013572, "grad_norm": 0.050544533878564835, "learning_rate": 0.01, "loss": 1.964, "step": 84231 }, { "epoch": 8.660703269586675, "grad_norm": 0.0582561120390892, "learning_rate": 0.01, "loss": 1.9516, "step": 84234 }, { "epoch": 8.661011721159777, "grad_norm": 0.1040964350104332, "learning_rate": 0.01, "loss": 1.9565, "step": 84237 }, { "epoch": 8.66132017273288, "grad_norm": 0.03669446334242821, "learning_rate": 0.01, "loss": 1.942, "step": 84240 }, { "epoch": 8.661628624305983, "grad_norm": 0.11392202973365784, "learning_rate": 0.01, "loss": 1.9473, "step": 84243 }, { "epoch": 8.661937075879088, "grad_norm": 0.03846215084195137, "learning_rate": 0.01, "loss": 1.9368, "step": 84246 }, { "epoch": 8.66224552745219, "grad_norm": 0.09960336238145828, "learning_rate": 0.01, "loss": 1.9464, "step": 84249 }, { "epoch": 8.662553979025294, "grad_norm": 0.07459977269172668, "learning_rate": 0.01, "loss": 1.9426, "step": 84252 }, { "epoch": 8.662862430598397, "grad_norm": 0.03963596746325493, "learning_rate": 0.01, "loss": 1.9453, "step": 84255 }, { "epoch": 8.6631708821715, "grad_norm": 0.09526780992746353, "learning_rate": 0.01, "loss": 1.9819, "step": 84258 }, { "epoch": 8.663479333744602, "grad_norm": 0.13918493688106537, "learning_rate": 0.01, "loss": 1.9479, "step": 84261 }, { "epoch": 8.663787785317705, "grad_norm": 0.0965191200375557, "learning_rate": 0.01, "loss": 1.9624, "step": 84264 }, { "epoch": 8.664096236890808, "grad_norm": 0.07372299581766129, "learning_rate": 0.01, "loss": 1.9424, "step": 84267 }, { "epoch": 8.664404688463911, "grad_norm": 0.07126685976982117, "learning_rate": 0.01, "loss": 1.9511, "step": 84270 }, { "epoch": 8.664713140037014, "grad_norm": 0.04940878972411156, "learning_rate": 0.01, "loss": 1.96, "step": 84273 }, { "epoch": 8.665021591610117, "grad_norm": 0.041048500686883926, "learning_rate": 0.01, "loss": 1.9303, "step": 84276 }, { "epoch": 8.66533004318322, "grad_norm": 0.05010208114981651, "learning_rate": 0.01, "loss": 1.9658, "step": 84279 }, { "epoch": 8.665638494756323, "grad_norm": 0.035412829369306564, "learning_rate": 0.01, "loss": 1.9303, "step": 84282 }, { "epoch": 8.665946946329427, "grad_norm": 0.040003497153520584, "learning_rate": 0.01, "loss": 1.9388, "step": 84285 }, { "epoch": 8.66625539790253, "grad_norm": 0.09944424033164978, "learning_rate": 0.01, "loss": 1.9659, "step": 84288 }, { "epoch": 8.666563849475633, "grad_norm": 0.11967995762825012, "learning_rate": 0.01, "loss": 1.947, "step": 84291 }, { "epoch": 8.666872301048736, "grad_norm": 0.0617777556180954, "learning_rate": 0.01, "loss": 1.9651, "step": 84294 }, { "epoch": 8.667180752621839, "grad_norm": 0.07158997654914856, "learning_rate": 0.01, "loss": 1.9689, "step": 84297 }, { "epoch": 8.667489204194942, "grad_norm": 0.08579113334417343, "learning_rate": 0.01, "loss": 1.9376, "step": 84300 }, { "epoch": 8.667797655768045, "grad_norm": 0.06357930600643158, "learning_rate": 0.01, "loss": 1.9611, "step": 84303 }, { "epoch": 8.668106107341147, "grad_norm": 0.06668613851070404, "learning_rate": 0.01, "loss": 1.9482, "step": 84306 }, { "epoch": 8.66841455891425, "grad_norm": 0.06983932852745056, "learning_rate": 0.01, "loss": 1.9482, "step": 84309 }, { "epoch": 8.668723010487353, "grad_norm": 0.07324071228504181, "learning_rate": 0.01, "loss": 1.9236, "step": 84312 }, { "epoch": 8.669031462060456, "grad_norm": 0.12372716516256332, "learning_rate": 0.01, "loss": 1.9527, "step": 84315 }, { "epoch": 8.669339913633559, "grad_norm": 0.0913889929652214, "learning_rate": 0.01, "loss": 1.9604, "step": 84318 }, { "epoch": 8.669648365206662, "grad_norm": 0.08444677293300629, "learning_rate": 0.01, "loss": 1.9363, "step": 84321 }, { "epoch": 8.669956816779766, "grad_norm": 0.06621287018060684, "learning_rate": 0.01, "loss": 1.9624, "step": 84324 }, { "epoch": 8.67026526835287, "grad_norm": 0.05836626887321472, "learning_rate": 0.01, "loss": 1.9377, "step": 84327 }, { "epoch": 8.670573719925972, "grad_norm": 0.04178153723478317, "learning_rate": 0.01, "loss": 1.9475, "step": 84330 }, { "epoch": 8.670882171499075, "grad_norm": 0.04391157999634743, "learning_rate": 0.01, "loss": 1.9391, "step": 84333 }, { "epoch": 8.671190623072178, "grad_norm": 0.041439518332481384, "learning_rate": 0.01, "loss": 1.951, "step": 84336 }, { "epoch": 8.671499074645281, "grad_norm": 0.04575076699256897, "learning_rate": 0.01, "loss": 1.9466, "step": 84339 }, { "epoch": 8.671807526218384, "grad_norm": 0.12655547261238098, "learning_rate": 0.01, "loss": 1.95, "step": 84342 }, { "epoch": 8.672115977791487, "grad_norm": 0.0857333168387413, "learning_rate": 0.01, "loss": 1.9434, "step": 84345 }, { "epoch": 8.67242442936459, "grad_norm": 0.07120339572429657, "learning_rate": 0.01, "loss": 1.9371, "step": 84348 }, { "epoch": 8.672732880937692, "grad_norm": 0.08852653950452805, "learning_rate": 0.01, "loss": 1.9221, "step": 84351 }, { "epoch": 8.673041332510795, "grad_norm": 0.11633346229791641, "learning_rate": 0.01, "loss": 1.9932, "step": 84354 }, { "epoch": 8.673349784083898, "grad_norm": 0.07619529217481613, "learning_rate": 0.01, "loss": 1.9457, "step": 84357 }, { "epoch": 8.673658235657001, "grad_norm": 0.06116296350955963, "learning_rate": 0.01, "loss": 1.9665, "step": 84360 }, { "epoch": 8.673966687230106, "grad_norm": 0.08434920012950897, "learning_rate": 0.01, "loss": 1.9552, "step": 84363 }, { "epoch": 8.674275138803209, "grad_norm": 0.08315224945545197, "learning_rate": 0.01, "loss": 1.95, "step": 84366 }, { "epoch": 8.674583590376312, "grad_norm": 0.07332847267389297, "learning_rate": 0.01, "loss": 1.9191, "step": 84369 }, { "epoch": 8.674892041949414, "grad_norm": 0.04000057280063629, "learning_rate": 0.01, "loss": 1.9454, "step": 84372 }, { "epoch": 8.675200493522517, "grad_norm": 0.11484423279762268, "learning_rate": 0.01, "loss": 1.9361, "step": 84375 }, { "epoch": 8.67550894509562, "grad_norm": 0.09171456098556519, "learning_rate": 0.01, "loss": 1.9594, "step": 84378 }, { "epoch": 8.675817396668723, "grad_norm": 0.0660143718123436, "learning_rate": 0.01, "loss": 1.9449, "step": 84381 }, { "epoch": 8.676125848241826, "grad_norm": 0.03662162646651268, "learning_rate": 0.01, "loss": 1.9551, "step": 84384 }, { "epoch": 8.676434299814929, "grad_norm": 0.044858962297439575, "learning_rate": 0.01, "loss": 1.9519, "step": 84387 }, { "epoch": 8.676742751388032, "grad_norm": 0.05560542270541191, "learning_rate": 0.01, "loss": 1.9453, "step": 84390 }, { "epoch": 8.677051202961135, "grad_norm": 0.053774718195199966, "learning_rate": 0.01, "loss": 1.9435, "step": 84393 }, { "epoch": 8.677359654534238, "grad_norm": 0.11037126928567886, "learning_rate": 0.01, "loss": 1.9378, "step": 84396 }, { "epoch": 8.67766810610734, "grad_norm": 0.06227453798055649, "learning_rate": 0.01, "loss": 1.9514, "step": 84399 }, { "epoch": 8.677976557680445, "grad_norm": 0.09894663840532303, "learning_rate": 0.01, "loss": 1.9576, "step": 84402 }, { "epoch": 8.678285009253548, "grad_norm": 0.06907567381858826, "learning_rate": 0.01, "loss": 1.9226, "step": 84405 }, { "epoch": 8.67859346082665, "grad_norm": 0.05042900890111923, "learning_rate": 0.01, "loss": 1.9381, "step": 84408 }, { "epoch": 8.678901912399754, "grad_norm": 0.08946631848812103, "learning_rate": 0.01, "loss": 1.9463, "step": 84411 }, { "epoch": 8.679210363972857, "grad_norm": 0.08894028514623642, "learning_rate": 0.01, "loss": 1.9371, "step": 84414 }, { "epoch": 8.67951881554596, "grad_norm": 0.0353781059384346, "learning_rate": 0.01, "loss": 1.945, "step": 84417 }, { "epoch": 8.679827267119062, "grad_norm": 0.031145529821515083, "learning_rate": 0.01, "loss": 1.952, "step": 84420 }, { "epoch": 8.680135718692165, "grad_norm": 0.03298868238925934, "learning_rate": 0.01, "loss": 1.9584, "step": 84423 }, { "epoch": 8.680444170265268, "grad_norm": 0.09564276784658432, "learning_rate": 0.01, "loss": 1.9515, "step": 84426 }, { "epoch": 8.680752621838371, "grad_norm": 0.030355164781212807, "learning_rate": 0.01, "loss": 1.9296, "step": 84429 }, { "epoch": 8.681061073411474, "grad_norm": 0.04145561531186104, "learning_rate": 0.01, "loss": 1.9502, "step": 84432 }, { "epoch": 8.681369524984577, "grad_norm": 0.06485619395971298, "learning_rate": 0.01, "loss": 1.9652, "step": 84435 }, { "epoch": 8.68167797655768, "grad_norm": 0.0540875568985939, "learning_rate": 0.01, "loss": 1.9584, "step": 84438 }, { "epoch": 8.681986428130784, "grad_norm": 0.039081066846847534, "learning_rate": 0.01, "loss": 1.9436, "step": 84441 }, { "epoch": 8.682294879703887, "grad_norm": 0.03886258602142334, "learning_rate": 0.01, "loss": 1.938, "step": 84444 }, { "epoch": 8.68260333127699, "grad_norm": 0.0608995221555233, "learning_rate": 0.01, "loss": 1.9527, "step": 84447 }, { "epoch": 8.682911782850093, "grad_norm": 0.12530915439128876, "learning_rate": 0.01, "loss": 1.9613, "step": 84450 }, { "epoch": 8.683220234423196, "grad_norm": 0.08348006755113602, "learning_rate": 0.01, "loss": 1.9497, "step": 84453 }, { "epoch": 8.683528685996299, "grad_norm": 0.0368838831782341, "learning_rate": 0.01, "loss": 1.9791, "step": 84456 }, { "epoch": 8.683837137569402, "grad_norm": 0.09236108511686325, "learning_rate": 0.01, "loss": 1.9437, "step": 84459 }, { "epoch": 8.684145589142505, "grad_norm": 0.052283886820077896, "learning_rate": 0.01, "loss": 1.9303, "step": 84462 }, { "epoch": 8.684454040715607, "grad_norm": 0.10161884874105453, "learning_rate": 0.01, "loss": 1.9785, "step": 84465 }, { "epoch": 8.68476249228871, "grad_norm": 0.05157725512981415, "learning_rate": 0.01, "loss": 1.9291, "step": 84468 }, { "epoch": 8.685070943861813, "grad_norm": 0.04975030943751335, "learning_rate": 0.01, "loss": 1.9662, "step": 84471 }, { "epoch": 8.685379395434916, "grad_norm": 0.11820569634437561, "learning_rate": 0.01, "loss": 1.9659, "step": 84474 }, { "epoch": 8.685687847008019, "grad_norm": 0.07684563845396042, "learning_rate": 0.01, "loss": 1.9643, "step": 84477 }, { "epoch": 8.685996298581124, "grad_norm": 0.07410179823637009, "learning_rate": 0.01, "loss": 1.9358, "step": 84480 }, { "epoch": 8.686304750154227, "grad_norm": 0.0489877387881279, "learning_rate": 0.01, "loss": 1.956, "step": 84483 }, { "epoch": 8.68661320172733, "grad_norm": 0.03942607715725899, "learning_rate": 0.01, "loss": 1.9424, "step": 84486 }, { "epoch": 8.686921653300432, "grad_norm": 0.03467392548918724, "learning_rate": 0.01, "loss": 1.9453, "step": 84489 }, { "epoch": 8.687230104873535, "grad_norm": 0.05109754577279091, "learning_rate": 0.01, "loss": 1.967, "step": 84492 }, { "epoch": 8.687538556446638, "grad_norm": 0.06021739915013313, "learning_rate": 0.01, "loss": 1.9293, "step": 84495 }, { "epoch": 8.687847008019741, "grad_norm": 0.1043761596083641, "learning_rate": 0.01, "loss": 1.9618, "step": 84498 }, { "epoch": 8.688155459592844, "grad_norm": 0.047195639461278915, "learning_rate": 0.01, "loss": 1.9533, "step": 84501 }, { "epoch": 8.688463911165947, "grad_norm": 0.11019480228424072, "learning_rate": 0.01, "loss": 1.9719, "step": 84504 }, { "epoch": 8.68877236273905, "grad_norm": 0.054665159434080124, "learning_rate": 0.01, "loss": 1.9576, "step": 84507 }, { "epoch": 8.689080814312153, "grad_norm": 0.11282628029584885, "learning_rate": 0.01, "loss": 1.9389, "step": 84510 }, { "epoch": 8.689389265885255, "grad_norm": 0.059272971004247665, "learning_rate": 0.01, "loss": 1.9314, "step": 84513 }, { "epoch": 8.689697717458358, "grad_norm": 0.04295983165502548, "learning_rate": 0.01, "loss": 1.9399, "step": 84516 }, { "epoch": 8.690006169031463, "grad_norm": 0.045819997787475586, "learning_rate": 0.01, "loss": 1.9155, "step": 84519 }, { "epoch": 8.690314620604566, "grad_norm": 0.08226123452186584, "learning_rate": 0.01, "loss": 1.9238, "step": 84522 }, { "epoch": 8.690623072177669, "grad_norm": 0.08438362181186676, "learning_rate": 0.01, "loss": 1.9911, "step": 84525 }, { "epoch": 8.690931523750772, "grad_norm": 0.1893005669116974, "learning_rate": 0.01, "loss": 1.9727, "step": 84528 }, { "epoch": 8.691239975323874, "grad_norm": 0.07184705138206482, "learning_rate": 0.01, "loss": 1.9513, "step": 84531 }, { "epoch": 8.691548426896977, "grad_norm": 0.04775119200348854, "learning_rate": 0.01, "loss": 1.9317, "step": 84534 }, { "epoch": 8.69185687847008, "grad_norm": 0.04076853394508362, "learning_rate": 0.01, "loss": 1.9442, "step": 84537 }, { "epoch": 8.692165330043183, "grad_norm": 0.035510528832674026, "learning_rate": 0.01, "loss": 1.9385, "step": 84540 }, { "epoch": 8.692473781616286, "grad_norm": 0.039787620306015015, "learning_rate": 0.01, "loss": 1.9615, "step": 84543 }, { "epoch": 8.692782233189389, "grad_norm": 0.035327181220054626, "learning_rate": 0.01, "loss": 1.9488, "step": 84546 }, { "epoch": 8.693090684762492, "grad_norm": 0.06223301589488983, "learning_rate": 0.01, "loss": 1.9362, "step": 84549 }, { "epoch": 8.693399136335595, "grad_norm": 0.2152005136013031, "learning_rate": 0.01, "loss": 1.9334, "step": 84552 }, { "epoch": 8.693707587908698, "grad_norm": 0.10525162518024445, "learning_rate": 0.01, "loss": 1.9578, "step": 84555 }, { "epoch": 8.694016039481802, "grad_norm": 0.09987024962902069, "learning_rate": 0.01, "loss": 1.9402, "step": 84558 }, { "epoch": 8.694324491054905, "grad_norm": 0.06907907873392105, "learning_rate": 0.01, "loss": 1.9256, "step": 84561 }, { "epoch": 8.694632942628008, "grad_norm": 0.05267520993947983, "learning_rate": 0.01, "loss": 1.9502, "step": 84564 }, { "epoch": 8.694941394201111, "grad_norm": 0.03861707076430321, "learning_rate": 0.01, "loss": 1.9437, "step": 84567 }, { "epoch": 8.695249845774214, "grad_norm": 0.057318273931741714, "learning_rate": 0.01, "loss": 1.9733, "step": 84570 }, { "epoch": 8.695558297347317, "grad_norm": 0.05354475975036621, "learning_rate": 0.01, "loss": 1.9405, "step": 84573 }, { "epoch": 8.69586674892042, "grad_norm": 0.05339827388525009, "learning_rate": 0.01, "loss": 1.9543, "step": 84576 }, { "epoch": 8.696175200493522, "grad_norm": 0.050137948244810104, "learning_rate": 0.01, "loss": 1.9619, "step": 84579 }, { "epoch": 8.696483652066625, "grad_norm": 0.033642902970314026, "learning_rate": 0.01, "loss": 1.9451, "step": 84582 }, { "epoch": 8.696792103639728, "grad_norm": 0.03329618647694588, "learning_rate": 0.01, "loss": 1.9343, "step": 84585 }, { "epoch": 8.697100555212831, "grad_norm": 0.09975992143154144, "learning_rate": 0.01, "loss": 1.9353, "step": 84588 }, { "epoch": 8.697409006785934, "grad_norm": 0.15377679467201233, "learning_rate": 0.01, "loss": 1.9609, "step": 84591 }, { "epoch": 8.697717458359037, "grad_norm": 0.09404116123914719, "learning_rate": 0.01, "loss": 1.9685, "step": 84594 }, { "epoch": 8.698025909932142, "grad_norm": 0.05135189741849899, "learning_rate": 0.01, "loss": 1.9192, "step": 84597 }, { "epoch": 8.698334361505244, "grad_norm": 0.04255730286240578, "learning_rate": 0.01, "loss": 1.9515, "step": 84600 }, { "epoch": 8.698642813078347, "grad_norm": 0.04291402921080589, "learning_rate": 0.01, "loss": 1.9579, "step": 84603 }, { "epoch": 8.69895126465145, "grad_norm": 0.05834383890032768, "learning_rate": 0.01, "loss": 1.9602, "step": 84606 }, { "epoch": 8.699259716224553, "grad_norm": 0.05079890415072441, "learning_rate": 0.01, "loss": 1.9207, "step": 84609 }, { "epoch": 8.699568167797656, "grad_norm": 0.04097910597920418, "learning_rate": 0.01, "loss": 1.9572, "step": 84612 }, { "epoch": 8.699876619370759, "grad_norm": 0.03598971664905548, "learning_rate": 0.01, "loss": 1.9488, "step": 84615 }, { "epoch": 8.700185070943862, "grad_norm": 0.04535605013370514, "learning_rate": 0.01, "loss": 1.9278, "step": 84618 }, { "epoch": 8.700493522516965, "grad_norm": 0.07048530131578445, "learning_rate": 0.01, "loss": 1.9373, "step": 84621 }, { "epoch": 8.700801974090068, "grad_norm": 0.0713009163737297, "learning_rate": 0.01, "loss": 1.9653, "step": 84624 }, { "epoch": 8.70111042566317, "grad_norm": 0.10725833475589752, "learning_rate": 0.01, "loss": 1.9456, "step": 84627 }, { "epoch": 8.701418877236273, "grad_norm": 0.061335306614637375, "learning_rate": 0.01, "loss": 1.9815, "step": 84630 }, { "epoch": 8.701727328809376, "grad_norm": 0.09121435880661011, "learning_rate": 0.01, "loss": 1.9625, "step": 84633 }, { "epoch": 8.70203578038248, "grad_norm": 0.039087846875190735, "learning_rate": 0.01, "loss": 1.9321, "step": 84636 }, { "epoch": 8.702344231955584, "grad_norm": 0.06266528367996216, "learning_rate": 0.01, "loss": 1.9248, "step": 84639 }, { "epoch": 8.702652683528687, "grad_norm": 0.093431256711483, "learning_rate": 0.01, "loss": 1.9609, "step": 84642 }, { "epoch": 8.70296113510179, "grad_norm": 0.05956881865859032, "learning_rate": 0.01, "loss": 1.9571, "step": 84645 }, { "epoch": 8.703269586674892, "grad_norm": 0.08706646412611008, "learning_rate": 0.01, "loss": 1.9354, "step": 84648 }, { "epoch": 8.703578038247995, "grad_norm": 0.07080619037151337, "learning_rate": 0.01, "loss": 1.9512, "step": 84651 }, { "epoch": 8.703886489821098, "grad_norm": 0.05497487634420395, "learning_rate": 0.01, "loss": 1.9679, "step": 84654 }, { "epoch": 8.704194941394201, "grad_norm": 0.054549477994441986, "learning_rate": 0.01, "loss": 1.9593, "step": 84657 }, { "epoch": 8.704503392967304, "grad_norm": 0.04388706758618355, "learning_rate": 0.01, "loss": 1.965, "step": 84660 }, { "epoch": 8.704811844540407, "grad_norm": 0.04360177367925644, "learning_rate": 0.01, "loss": 1.944, "step": 84663 }, { "epoch": 8.70512029611351, "grad_norm": 0.07289383560419083, "learning_rate": 0.01, "loss": 1.9314, "step": 84666 }, { "epoch": 8.705428747686613, "grad_norm": 0.07846248149871826, "learning_rate": 0.01, "loss": 1.9242, "step": 84669 }, { "epoch": 8.705737199259715, "grad_norm": 0.10681965947151184, "learning_rate": 0.01, "loss": 1.9584, "step": 84672 }, { "epoch": 8.70604565083282, "grad_norm": 0.16123338043689728, "learning_rate": 0.01, "loss": 1.9371, "step": 84675 }, { "epoch": 8.706354102405923, "grad_norm": 0.12931574881076813, "learning_rate": 0.01, "loss": 1.941, "step": 84678 }, { "epoch": 8.706662553979026, "grad_norm": 0.05700478330254555, "learning_rate": 0.01, "loss": 1.9564, "step": 84681 }, { "epoch": 8.706971005552129, "grad_norm": 0.041084181517362595, "learning_rate": 0.01, "loss": 1.9349, "step": 84684 }, { "epoch": 8.707279457125232, "grad_norm": 0.03249974176287651, "learning_rate": 0.01, "loss": 1.9312, "step": 84687 }, { "epoch": 8.707587908698335, "grad_norm": 0.03458573669195175, "learning_rate": 0.01, "loss": 1.949, "step": 84690 }, { "epoch": 8.707896360271437, "grad_norm": 0.037099480628967285, "learning_rate": 0.01, "loss": 1.9347, "step": 84693 }, { "epoch": 8.70820481184454, "grad_norm": 0.057972583919763565, "learning_rate": 0.01, "loss": 1.9487, "step": 84696 }, { "epoch": 8.708513263417643, "grad_norm": 0.0855930969119072, "learning_rate": 0.01, "loss": 1.9273, "step": 84699 }, { "epoch": 8.708821714990746, "grad_norm": 0.10864733159542084, "learning_rate": 0.01, "loss": 1.9393, "step": 84702 }, { "epoch": 8.709130166563849, "grad_norm": 0.07841694355010986, "learning_rate": 0.01, "loss": 1.9257, "step": 84705 }, { "epoch": 8.709438618136952, "grad_norm": 0.08079460263252258, "learning_rate": 0.01, "loss": 1.9473, "step": 84708 }, { "epoch": 8.709747069710055, "grad_norm": 0.03550470247864723, "learning_rate": 0.01, "loss": 1.936, "step": 84711 }, { "epoch": 8.71005552128316, "grad_norm": 0.057293109595775604, "learning_rate": 0.01, "loss": 1.944, "step": 84714 }, { "epoch": 8.710363972856262, "grad_norm": 0.0852893516421318, "learning_rate": 0.01, "loss": 1.927, "step": 84717 }, { "epoch": 8.710672424429365, "grad_norm": 0.06920026242733002, "learning_rate": 0.01, "loss": 1.9337, "step": 84720 }, { "epoch": 8.710980876002468, "grad_norm": 0.07233172655105591, "learning_rate": 0.01, "loss": 1.9704, "step": 84723 }, { "epoch": 8.711289327575571, "grad_norm": 0.04527530446648598, "learning_rate": 0.01, "loss": 1.9433, "step": 84726 }, { "epoch": 8.711597779148674, "grad_norm": 0.10784745216369629, "learning_rate": 0.01, "loss": 1.944, "step": 84729 }, { "epoch": 8.711906230721777, "grad_norm": 0.047310832887887955, "learning_rate": 0.01, "loss": 1.9543, "step": 84732 }, { "epoch": 8.71221468229488, "grad_norm": 0.09746813774108887, "learning_rate": 0.01, "loss": 1.9418, "step": 84735 }, { "epoch": 8.712523133867982, "grad_norm": 0.04717058315873146, "learning_rate": 0.01, "loss": 1.9583, "step": 84738 }, { "epoch": 8.712831585441085, "grad_norm": 0.07984232157468796, "learning_rate": 0.01, "loss": 1.9192, "step": 84741 }, { "epoch": 8.713140037014188, "grad_norm": 0.07670383900403976, "learning_rate": 0.01, "loss": 1.9623, "step": 84744 }, { "epoch": 8.713448488587291, "grad_norm": 0.07070758193731308, "learning_rate": 0.01, "loss": 1.931, "step": 84747 }, { "epoch": 8.713756940160394, "grad_norm": 0.03936431184411049, "learning_rate": 0.01, "loss": 1.9218, "step": 84750 }, { "epoch": 8.714065391733499, "grad_norm": 0.04538992792367935, "learning_rate": 0.01, "loss": 1.9401, "step": 84753 }, { "epoch": 8.714373843306602, "grad_norm": 0.07445311546325684, "learning_rate": 0.01, "loss": 1.9375, "step": 84756 }, { "epoch": 8.714682294879704, "grad_norm": 0.06671564280986786, "learning_rate": 0.01, "loss": 1.9279, "step": 84759 }, { "epoch": 8.714990746452807, "grad_norm": 0.06232886016368866, "learning_rate": 0.01, "loss": 1.9491, "step": 84762 }, { "epoch": 8.71529919802591, "grad_norm": 0.06302858144044876, "learning_rate": 0.01, "loss": 1.9547, "step": 84765 }, { "epoch": 8.715607649599013, "grad_norm": 0.03369203582406044, "learning_rate": 0.01, "loss": 1.9678, "step": 84768 }, { "epoch": 8.715916101172116, "grad_norm": 0.04007703810930252, "learning_rate": 0.01, "loss": 1.9627, "step": 84771 }, { "epoch": 8.716224552745219, "grad_norm": 0.06994391232728958, "learning_rate": 0.01, "loss": 1.945, "step": 84774 }, { "epoch": 8.716533004318322, "grad_norm": 0.039773739874362946, "learning_rate": 0.01, "loss": 1.9758, "step": 84777 }, { "epoch": 8.716841455891425, "grad_norm": 0.189493790268898, "learning_rate": 0.01, "loss": 1.9671, "step": 84780 }, { "epoch": 8.717149907464528, "grad_norm": 0.09057370573282242, "learning_rate": 0.01, "loss": 1.9387, "step": 84783 }, { "epoch": 8.71745835903763, "grad_norm": 0.12711824476718903, "learning_rate": 0.01, "loss": 1.9334, "step": 84786 }, { "epoch": 8.717766810610733, "grad_norm": 0.0868084728717804, "learning_rate": 0.01, "loss": 1.9369, "step": 84789 }, { "epoch": 8.718075262183838, "grad_norm": 0.04147063195705414, "learning_rate": 0.01, "loss": 1.9655, "step": 84792 }, { "epoch": 8.71838371375694, "grad_norm": 0.04532619193196297, "learning_rate": 0.01, "loss": 1.955, "step": 84795 }, { "epoch": 8.718692165330044, "grad_norm": 0.07199708372354507, "learning_rate": 0.01, "loss": 1.9396, "step": 84798 }, { "epoch": 8.719000616903147, "grad_norm": 0.060223303735256195, "learning_rate": 0.01, "loss": 1.9724, "step": 84801 }, { "epoch": 8.71930906847625, "grad_norm": 0.03616713732481003, "learning_rate": 0.01, "loss": 1.9521, "step": 84804 }, { "epoch": 8.719617520049352, "grad_norm": 0.08611498028039932, "learning_rate": 0.01, "loss": 1.9502, "step": 84807 }, { "epoch": 8.719925971622455, "grad_norm": 0.04345080256462097, "learning_rate": 0.01, "loss": 1.9254, "step": 84810 }, { "epoch": 8.720234423195558, "grad_norm": 0.07039283215999603, "learning_rate": 0.01, "loss": 1.9618, "step": 84813 }, { "epoch": 8.720542874768661, "grad_norm": 0.03262734040617943, "learning_rate": 0.01, "loss": 1.9637, "step": 84816 }, { "epoch": 8.720851326341764, "grad_norm": 0.0588054358959198, "learning_rate": 0.01, "loss": 1.9269, "step": 84819 }, { "epoch": 8.721159777914867, "grad_norm": 0.17544658482074738, "learning_rate": 0.01, "loss": 1.9482, "step": 84822 }, { "epoch": 8.72146822948797, "grad_norm": 0.054100871086120605, "learning_rate": 0.01, "loss": 1.9489, "step": 84825 }, { "epoch": 8.721776681061073, "grad_norm": 0.07093433290719986, "learning_rate": 0.01, "loss": 1.963, "step": 84828 }, { "epoch": 8.722085132634177, "grad_norm": 0.041165079921483994, "learning_rate": 0.01, "loss": 1.9383, "step": 84831 }, { "epoch": 8.72239358420728, "grad_norm": 0.03833869844675064, "learning_rate": 0.01, "loss": 1.9459, "step": 84834 }, { "epoch": 8.722702035780383, "grad_norm": 0.03908578306436539, "learning_rate": 0.01, "loss": 1.9421, "step": 84837 }, { "epoch": 8.723010487353486, "grad_norm": 0.05425461381673813, "learning_rate": 0.01, "loss": 1.9486, "step": 84840 }, { "epoch": 8.723318938926589, "grad_norm": 0.06133231148123741, "learning_rate": 0.01, "loss": 1.9589, "step": 84843 }, { "epoch": 8.723627390499692, "grad_norm": 0.08673300594091415, "learning_rate": 0.01, "loss": 1.922, "step": 84846 }, { "epoch": 8.723935842072795, "grad_norm": 0.051887791603803635, "learning_rate": 0.01, "loss": 1.9766, "step": 84849 }, { "epoch": 8.724244293645897, "grad_norm": 0.05313627049326897, "learning_rate": 0.01, "loss": 1.9412, "step": 84852 }, { "epoch": 8.724552745219, "grad_norm": 0.03683719411492348, "learning_rate": 0.01, "loss": 1.9319, "step": 84855 }, { "epoch": 8.724861196792103, "grad_norm": 0.14222852885723114, "learning_rate": 0.01, "loss": 1.9441, "step": 84858 }, { "epoch": 8.725169648365206, "grad_norm": 0.06968168914318085, "learning_rate": 0.01, "loss": 1.9562, "step": 84861 }, { "epoch": 8.725478099938309, "grad_norm": 0.08058342337608337, "learning_rate": 0.01, "loss": 1.9819, "step": 84864 }, { "epoch": 8.725786551511412, "grad_norm": 0.05098986625671387, "learning_rate": 0.01, "loss": 1.9551, "step": 84867 }, { "epoch": 8.726095003084517, "grad_norm": 0.07216066867113113, "learning_rate": 0.01, "loss": 1.9608, "step": 84870 }, { "epoch": 8.72640345465762, "grad_norm": 0.05139782279729843, "learning_rate": 0.01, "loss": 1.9599, "step": 84873 }, { "epoch": 8.726711906230722, "grad_norm": 0.08893761783838272, "learning_rate": 0.01, "loss": 1.9481, "step": 84876 }, { "epoch": 8.727020357803825, "grad_norm": 0.12131284922361374, "learning_rate": 0.01, "loss": 1.9435, "step": 84879 }, { "epoch": 8.727328809376928, "grad_norm": 0.11023629456758499, "learning_rate": 0.01, "loss": 1.931, "step": 84882 }, { "epoch": 8.727637260950031, "grad_norm": 0.05484713986515999, "learning_rate": 0.01, "loss": 1.9502, "step": 84885 }, { "epoch": 8.727945712523134, "grad_norm": 0.0429035983979702, "learning_rate": 0.01, "loss": 1.9438, "step": 84888 }, { "epoch": 8.728254164096237, "grad_norm": 0.0505860261619091, "learning_rate": 0.01, "loss": 1.9371, "step": 84891 }, { "epoch": 8.72856261566934, "grad_norm": 0.06414588540792465, "learning_rate": 0.01, "loss": 1.9483, "step": 84894 }, { "epoch": 8.728871067242443, "grad_norm": 0.050940901041030884, "learning_rate": 0.01, "loss": 1.941, "step": 84897 }, { "epoch": 8.729179518815545, "grad_norm": 0.10562390089035034, "learning_rate": 0.01, "loss": 1.9098, "step": 84900 }, { "epoch": 8.729487970388648, "grad_norm": 0.06857279688119888, "learning_rate": 0.01, "loss": 1.9329, "step": 84903 }, { "epoch": 8.729796421961751, "grad_norm": 0.07322374731302261, "learning_rate": 0.01, "loss": 1.9472, "step": 84906 }, { "epoch": 8.730104873534856, "grad_norm": 0.09952128678560257, "learning_rate": 0.01, "loss": 1.9468, "step": 84909 }, { "epoch": 8.730413325107959, "grad_norm": 0.04598178341984749, "learning_rate": 0.01, "loss": 1.9507, "step": 84912 }, { "epoch": 8.730721776681062, "grad_norm": 0.08658500760793686, "learning_rate": 0.01, "loss": 1.9418, "step": 84915 }, { "epoch": 8.731030228254165, "grad_norm": 0.06861251592636108, "learning_rate": 0.01, "loss": 1.9425, "step": 84918 }, { "epoch": 8.731338679827267, "grad_norm": 0.07586270570755005, "learning_rate": 0.01, "loss": 1.9605, "step": 84921 }, { "epoch": 8.73164713140037, "grad_norm": 0.08280608057975769, "learning_rate": 0.01, "loss": 1.9588, "step": 84924 }, { "epoch": 8.731955582973473, "grad_norm": 0.04060471057891846, "learning_rate": 0.01, "loss": 1.9391, "step": 84927 }, { "epoch": 8.732264034546576, "grad_norm": 0.0505189448595047, "learning_rate": 0.01, "loss": 1.9731, "step": 84930 }, { "epoch": 8.732572486119679, "grad_norm": 0.04092942178249359, "learning_rate": 0.01, "loss": 1.9344, "step": 84933 }, { "epoch": 8.732880937692782, "grad_norm": 0.04049207270145416, "learning_rate": 0.01, "loss": 1.9302, "step": 84936 }, { "epoch": 8.733189389265885, "grad_norm": 0.0332498773932457, "learning_rate": 0.01, "loss": 1.9136, "step": 84939 }, { "epoch": 8.733497840838988, "grad_norm": 0.04378689453005791, "learning_rate": 0.01, "loss": 1.9219, "step": 84942 }, { "epoch": 8.73380629241209, "grad_norm": 0.05257491394877434, "learning_rate": 0.01, "loss": 1.9219, "step": 84945 }, { "epoch": 8.734114743985195, "grad_norm": 0.07151827961206436, "learning_rate": 0.01, "loss": 1.9311, "step": 84948 }, { "epoch": 8.734423195558298, "grad_norm": 0.05070538818836212, "learning_rate": 0.01, "loss": 1.9687, "step": 84951 }, { "epoch": 8.734731647131401, "grad_norm": 0.13645638525485992, "learning_rate": 0.01, "loss": 1.933, "step": 84954 }, { "epoch": 8.735040098704504, "grad_norm": 0.15855565667152405, "learning_rate": 0.01, "loss": 1.9679, "step": 84957 }, { "epoch": 8.735348550277607, "grad_norm": 0.10551951825618744, "learning_rate": 0.01, "loss": 1.9173, "step": 84960 }, { "epoch": 8.73565700185071, "grad_norm": 0.10616222769021988, "learning_rate": 0.01, "loss": 1.9601, "step": 84963 }, { "epoch": 8.735965453423812, "grad_norm": 0.05103769153356552, "learning_rate": 0.01, "loss": 1.9571, "step": 84966 }, { "epoch": 8.736273904996915, "grad_norm": 0.042684778571128845, "learning_rate": 0.01, "loss": 1.9684, "step": 84969 }, { "epoch": 8.736582356570018, "grad_norm": 0.04346473887562752, "learning_rate": 0.01, "loss": 1.9461, "step": 84972 }, { "epoch": 8.736890808143121, "grad_norm": 0.04694516584277153, "learning_rate": 0.01, "loss": 1.9465, "step": 84975 }, { "epoch": 8.737199259716224, "grad_norm": 0.03958739712834358, "learning_rate": 0.01, "loss": 1.9407, "step": 84978 }, { "epoch": 8.737507711289327, "grad_norm": 0.03674643859267235, "learning_rate": 0.01, "loss": 1.9646, "step": 84981 }, { "epoch": 8.73781616286243, "grad_norm": 0.08573244512081146, "learning_rate": 0.01, "loss": 1.9482, "step": 84984 }, { "epoch": 8.738124614435534, "grad_norm": 0.07804060727357864, "learning_rate": 0.01, "loss": 1.9456, "step": 84987 }, { "epoch": 8.738433066008637, "grad_norm": 0.06877141445875168, "learning_rate": 0.01, "loss": 1.9689, "step": 84990 }, { "epoch": 8.73874151758174, "grad_norm": 0.07265405356884003, "learning_rate": 0.01, "loss": 1.9525, "step": 84993 }, { "epoch": 8.739049969154843, "grad_norm": 0.09696147590875626, "learning_rate": 0.01, "loss": 1.9682, "step": 84996 }, { "epoch": 8.739358420727946, "grad_norm": 0.0650443360209465, "learning_rate": 0.01, "loss": 1.9574, "step": 84999 }, { "epoch": 8.739666872301049, "grad_norm": 0.054474495351314545, "learning_rate": 0.01, "loss": 1.9455, "step": 85002 }, { "epoch": 8.739975323874152, "grad_norm": 0.03832100331783295, "learning_rate": 0.01, "loss": 1.9426, "step": 85005 }, { "epoch": 8.740283775447255, "grad_norm": 0.027986668050289154, "learning_rate": 0.01, "loss": 1.9275, "step": 85008 }, { "epoch": 8.740592227020358, "grad_norm": 0.032918307930231094, "learning_rate": 0.01, "loss": 1.9661, "step": 85011 }, { "epoch": 8.74090067859346, "grad_norm": 0.07551144808530807, "learning_rate": 0.01, "loss": 1.9396, "step": 85014 }, { "epoch": 8.741209130166563, "grad_norm": 0.07230864465236664, "learning_rate": 0.01, "loss": 1.963, "step": 85017 }, { "epoch": 8.741517581739666, "grad_norm": 0.08824942260980606, "learning_rate": 0.01, "loss": 1.9556, "step": 85020 }, { "epoch": 8.741826033312769, "grad_norm": 0.07013983279466629, "learning_rate": 0.01, "loss": 1.9597, "step": 85023 }, { "epoch": 8.742134484885874, "grad_norm": 0.10559581965208054, "learning_rate": 0.01, "loss": 1.9534, "step": 85026 }, { "epoch": 8.742442936458977, "grad_norm": 0.05553511530160904, "learning_rate": 0.01, "loss": 1.9371, "step": 85029 }, { "epoch": 8.74275138803208, "grad_norm": 0.051644206047058105, "learning_rate": 0.01, "loss": 1.9516, "step": 85032 }, { "epoch": 8.743059839605182, "grad_norm": 0.07303252071142197, "learning_rate": 0.01, "loss": 1.9491, "step": 85035 }, { "epoch": 8.743368291178285, "grad_norm": 0.04157792776823044, "learning_rate": 0.01, "loss": 1.9391, "step": 85038 }, { "epoch": 8.743676742751388, "grad_norm": 0.09174187481403351, "learning_rate": 0.01, "loss": 1.9466, "step": 85041 }, { "epoch": 8.743985194324491, "grad_norm": 0.07028010487556458, "learning_rate": 0.01, "loss": 1.9591, "step": 85044 }, { "epoch": 8.744293645897594, "grad_norm": 0.07988093048334122, "learning_rate": 0.01, "loss": 1.941, "step": 85047 }, { "epoch": 8.744602097470697, "grad_norm": 0.04523743689060211, "learning_rate": 0.01, "loss": 1.9351, "step": 85050 }, { "epoch": 8.7449105490438, "grad_norm": 0.07089216262102127, "learning_rate": 0.01, "loss": 1.9608, "step": 85053 }, { "epoch": 8.745219000616903, "grad_norm": 0.06905648857355118, "learning_rate": 0.01, "loss": 1.9395, "step": 85056 }, { "epoch": 8.745527452190005, "grad_norm": 0.08611857891082764, "learning_rate": 0.01, "loss": 1.9502, "step": 85059 }, { "epoch": 8.745835903763108, "grad_norm": 0.03267739713191986, "learning_rate": 0.01, "loss": 1.9285, "step": 85062 }, { "epoch": 8.746144355336213, "grad_norm": 0.10321325063705444, "learning_rate": 0.01, "loss": 1.9454, "step": 85065 }, { "epoch": 8.746452806909316, "grad_norm": 0.10448356717824936, "learning_rate": 0.01, "loss": 1.9711, "step": 85068 }, { "epoch": 8.746761258482419, "grad_norm": 0.10813847929239273, "learning_rate": 0.01, "loss": 1.9494, "step": 85071 }, { "epoch": 8.747069710055522, "grad_norm": 0.07072289288043976, "learning_rate": 0.01, "loss": 1.9596, "step": 85074 }, { "epoch": 8.747378161628625, "grad_norm": 0.07409197837114334, "learning_rate": 0.01, "loss": 1.9449, "step": 85077 }, { "epoch": 8.747686613201727, "grad_norm": 0.06067260727286339, "learning_rate": 0.01, "loss": 1.9746, "step": 85080 }, { "epoch": 8.74799506477483, "grad_norm": 0.06234930828213692, "learning_rate": 0.01, "loss": 1.9505, "step": 85083 }, { "epoch": 8.748303516347933, "grad_norm": 0.08919490873813629, "learning_rate": 0.01, "loss": 1.9613, "step": 85086 }, { "epoch": 8.748611967921036, "grad_norm": 0.0363055095076561, "learning_rate": 0.01, "loss": 1.9448, "step": 85089 }, { "epoch": 8.748920419494139, "grad_norm": 0.0357215516269207, "learning_rate": 0.01, "loss": 1.9426, "step": 85092 }, { "epoch": 8.749228871067242, "grad_norm": 0.06636667251586914, "learning_rate": 0.01, "loss": 1.9581, "step": 85095 }, { "epoch": 8.749537322640345, "grad_norm": 0.06666699051856995, "learning_rate": 0.01, "loss": 1.9346, "step": 85098 }, { "epoch": 8.749845774213448, "grad_norm": 0.06280805170536041, "learning_rate": 0.01, "loss": 1.9475, "step": 85101 }, { "epoch": 8.750154225786552, "grad_norm": 0.05007186904549599, "learning_rate": 0.01, "loss": 1.9278, "step": 85104 }, { "epoch": 8.750462677359655, "grad_norm": 0.05247753486037254, "learning_rate": 0.01, "loss": 1.9431, "step": 85107 }, { "epoch": 8.750771128932758, "grad_norm": 0.06049021705985069, "learning_rate": 0.01, "loss": 1.9469, "step": 85110 }, { "epoch": 8.751079580505861, "grad_norm": 0.042631614953279495, "learning_rate": 0.01, "loss": 1.9899, "step": 85113 }, { "epoch": 8.751388032078964, "grad_norm": 0.04226498678326607, "learning_rate": 0.01, "loss": 1.9401, "step": 85116 }, { "epoch": 8.751696483652067, "grad_norm": 0.10684603452682495, "learning_rate": 0.01, "loss": 1.9712, "step": 85119 }, { "epoch": 8.75200493522517, "grad_norm": 0.1066361740231514, "learning_rate": 0.01, "loss": 1.9391, "step": 85122 }, { "epoch": 8.752313386798273, "grad_norm": 0.058565832674503326, "learning_rate": 0.01, "loss": 1.916, "step": 85125 }, { "epoch": 8.752621838371375, "grad_norm": 0.039739158004522324, "learning_rate": 0.01, "loss": 1.9341, "step": 85128 }, { "epoch": 8.752930289944478, "grad_norm": 0.03576759994029999, "learning_rate": 0.01, "loss": 1.9377, "step": 85131 }, { "epoch": 8.753238741517581, "grad_norm": 0.09221669286489487, "learning_rate": 0.01, "loss": 1.9511, "step": 85134 }, { "epoch": 8.753547193090684, "grad_norm": 0.04816079139709473, "learning_rate": 0.01, "loss": 1.9446, "step": 85137 }, { "epoch": 8.753855644663787, "grad_norm": 0.03829638287425041, "learning_rate": 0.01, "loss": 1.9613, "step": 85140 }, { "epoch": 8.754164096236892, "grad_norm": 0.04541938751935959, "learning_rate": 0.01, "loss": 1.9396, "step": 85143 }, { "epoch": 8.754472547809995, "grad_norm": 0.03273008018732071, "learning_rate": 0.01, "loss": 1.9448, "step": 85146 }, { "epoch": 8.754780999383097, "grad_norm": 0.06781888753175735, "learning_rate": 0.01, "loss": 1.9451, "step": 85149 }, { "epoch": 8.7550894509562, "grad_norm": 0.08772855252027512, "learning_rate": 0.01, "loss": 1.9272, "step": 85152 }, { "epoch": 8.755397902529303, "grad_norm": 0.05312326177954674, "learning_rate": 0.01, "loss": 1.9192, "step": 85155 }, { "epoch": 8.755706354102406, "grad_norm": 0.13668350875377655, "learning_rate": 0.01, "loss": 1.9424, "step": 85158 }, { "epoch": 8.756014805675509, "grad_norm": 0.1036950945854187, "learning_rate": 0.01, "loss": 1.9328, "step": 85161 }, { "epoch": 8.756323257248612, "grad_norm": 0.03806331753730774, "learning_rate": 0.01, "loss": 1.9368, "step": 85164 }, { "epoch": 8.756631708821715, "grad_norm": 0.0354275219142437, "learning_rate": 0.01, "loss": 1.9398, "step": 85167 }, { "epoch": 8.756940160394818, "grad_norm": 0.0587974451482296, "learning_rate": 0.01, "loss": 1.9579, "step": 85170 }, { "epoch": 8.75724861196792, "grad_norm": 0.09037166833877563, "learning_rate": 0.01, "loss": 1.9453, "step": 85173 }, { "epoch": 8.757557063541023, "grad_norm": 0.045411787927150726, "learning_rate": 0.01, "loss": 1.955, "step": 85176 }, { "epoch": 8.757865515114126, "grad_norm": 0.08137772977352142, "learning_rate": 0.01, "loss": 1.9513, "step": 85179 }, { "epoch": 8.758173966687231, "grad_norm": 0.08651778101921082, "learning_rate": 0.01, "loss": 1.942, "step": 85182 }, { "epoch": 8.758482418260334, "grad_norm": 0.06294746696949005, "learning_rate": 0.01, "loss": 1.9219, "step": 85185 }, { "epoch": 8.758790869833437, "grad_norm": 0.11187928915023804, "learning_rate": 0.01, "loss": 1.9352, "step": 85188 }, { "epoch": 8.75909932140654, "grad_norm": 0.062168415635824203, "learning_rate": 0.01, "loss": 1.9518, "step": 85191 }, { "epoch": 8.759407772979642, "grad_norm": 0.03791145980358124, "learning_rate": 0.01, "loss": 1.9493, "step": 85194 }, { "epoch": 8.759716224552745, "grad_norm": 0.04850441589951515, "learning_rate": 0.01, "loss": 1.9674, "step": 85197 }, { "epoch": 8.760024676125848, "grad_norm": 0.04435930401086807, "learning_rate": 0.01, "loss": 1.9529, "step": 85200 }, { "epoch": 8.760333127698951, "grad_norm": 0.11299605667591095, "learning_rate": 0.01, "loss": 1.9517, "step": 85203 }, { "epoch": 8.760641579272054, "grad_norm": 0.04934825748205185, "learning_rate": 0.01, "loss": 1.958, "step": 85206 }, { "epoch": 8.760950030845157, "grad_norm": 0.09066156297922134, "learning_rate": 0.01, "loss": 1.9372, "step": 85209 }, { "epoch": 8.76125848241826, "grad_norm": 0.07747044414281845, "learning_rate": 0.01, "loss": 1.9486, "step": 85212 }, { "epoch": 8.761566933991363, "grad_norm": 0.04040999710559845, "learning_rate": 0.01, "loss": 1.9116, "step": 85215 }, { "epoch": 8.761875385564466, "grad_norm": 0.07093887031078339, "learning_rate": 0.01, "loss": 1.9458, "step": 85218 }, { "epoch": 8.76218383713757, "grad_norm": 0.05974037945270538, "learning_rate": 0.01, "loss": 1.961, "step": 85221 }, { "epoch": 8.762492288710673, "grad_norm": 0.1216970756649971, "learning_rate": 0.01, "loss": 1.9475, "step": 85224 }, { "epoch": 8.762800740283776, "grad_norm": 0.12822993099689484, "learning_rate": 0.01, "loss": 1.9326, "step": 85227 }, { "epoch": 8.763109191856879, "grad_norm": 0.10270580649375916, "learning_rate": 0.01, "loss": 1.9406, "step": 85230 }, { "epoch": 8.763417643429982, "grad_norm": 0.06854567676782608, "learning_rate": 0.01, "loss": 1.9554, "step": 85233 }, { "epoch": 8.763726095003085, "grad_norm": 0.03591831400990486, "learning_rate": 0.01, "loss": 1.9553, "step": 85236 }, { "epoch": 8.764034546576188, "grad_norm": 0.0472213476896286, "learning_rate": 0.01, "loss": 1.9406, "step": 85239 }, { "epoch": 8.76434299814929, "grad_norm": 0.10494640469551086, "learning_rate": 0.01, "loss": 1.9345, "step": 85242 }, { "epoch": 8.764651449722393, "grad_norm": 0.055173519998788834, "learning_rate": 0.01, "loss": 1.9609, "step": 85245 }, { "epoch": 8.764959901295496, "grad_norm": 0.05719782039523125, "learning_rate": 0.01, "loss": 1.9281, "step": 85248 }, { "epoch": 8.765268352868599, "grad_norm": 0.048283543437719345, "learning_rate": 0.01, "loss": 1.9276, "step": 85251 }, { "epoch": 8.765576804441702, "grad_norm": 0.05473318323493004, "learning_rate": 0.01, "loss": 1.9523, "step": 85254 }, { "epoch": 8.765885256014805, "grad_norm": 0.037164732813835144, "learning_rate": 0.01, "loss": 1.9699, "step": 85257 }, { "epoch": 8.76619370758791, "grad_norm": 0.039799343794584274, "learning_rate": 0.01, "loss": 1.923, "step": 85260 }, { "epoch": 8.766502159161012, "grad_norm": 0.13168968260288239, "learning_rate": 0.01, "loss": 1.9698, "step": 85263 }, { "epoch": 8.766810610734115, "grad_norm": 0.04143999144434929, "learning_rate": 0.01, "loss": 1.9419, "step": 85266 }, { "epoch": 8.767119062307218, "grad_norm": 0.052542950958013535, "learning_rate": 0.01, "loss": 1.9534, "step": 85269 }, { "epoch": 8.767427513880321, "grad_norm": 0.04957433044910431, "learning_rate": 0.01, "loss": 1.9634, "step": 85272 }, { "epoch": 8.767735965453424, "grad_norm": 0.07333894819021225, "learning_rate": 0.01, "loss": 1.9361, "step": 85275 }, { "epoch": 8.768044417026527, "grad_norm": 0.04119938984513283, "learning_rate": 0.01, "loss": 1.9604, "step": 85278 }, { "epoch": 8.76835286859963, "grad_norm": 0.03327082470059395, "learning_rate": 0.01, "loss": 1.9643, "step": 85281 }, { "epoch": 8.768661320172733, "grad_norm": 0.04693789780139923, "learning_rate": 0.01, "loss": 1.949, "step": 85284 }, { "epoch": 8.768969771745835, "grad_norm": 0.06504803150892258, "learning_rate": 0.01, "loss": 1.9553, "step": 85287 }, { "epoch": 8.769278223318938, "grad_norm": 0.04598322883248329, "learning_rate": 0.01, "loss": 1.9571, "step": 85290 }, { "epoch": 8.769586674892041, "grad_norm": 0.03206741437315941, "learning_rate": 0.01, "loss": 1.9411, "step": 85293 }, { "epoch": 8.769895126465144, "grad_norm": 0.05733088403940201, "learning_rate": 0.01, "loss": 1.9386, "step": 85296 }, { "epoch": 8.770203578038249, "grad_norm": 0.07618236541748047, "learning_rate": 0.01, "loss": 1.9474, "step": 85299 }, { "epoch": 8.770512029611352, "grad_norm": 0.05973696708679199, "learning_rate": 0.01, "loss": 1.9587, "step": 85302 }, { "epoch": 8.770820481184455, "grad_norm": 0.05072012543678284, "learning_rate": 0.01, "loss": 1.9582, "step": 85305 }, { "epoch": 8.771128932757557, "grad_norm": 0.1093197762966156, "learning_rate": 0.01, "loss": 1.9463, "step": 85308 }, { "epoch": 8.77143738433066, "grad_norm": 0.04258941113948822, "learning_rate": 0.01, "loss": 1.9339, "step": 85311 }, { "epoch": 8.771745835903763, "grad_norm": 0.04937519505620003, "learning_rate": 0.01, "loss": 1.9706, "step": 85314 }, { "epoch": 8.772054287476866, "grad_norm": 0.043309785425662994, "learning_rate": 0.01, "loss": 1.9616, "step": 85317 }, { "epoch": 8.772362739049969, "grad_norm": 0.044104646891355515, "learning_rate": 0.01, "loss": 1.9702, "step": 85320 }, { "epoch": 8.772671190623072, "grad_norm": 0.05477212369441986, "learning_rate": 0.01, "loss": 1.9445, "step": 85323 }, { "epoch": 8.772979642196175, "grad_norm": 0.05495447665452957, "learning_rate": 0.01, "loss": 1.947, "step": 85326 }, { "epoch": 8.773288093769278, "grad_norm": 0.10645133256912231, "learning_rate": 0.01, "loss": 1.9538, "step": 85329 }, { "epoch": 8.77359654534238, "grad_norm": 0.07867071777582169, "learning_rate": 0.01, "loss": 1.916, "step": 85332 }, { "epoch": 8.773904996915483, "grad_norm": 0.11647599190473557, "learning_rate": 0.01, "loss": 1.9522, "step": 85335 }, { "epoch": 8.774213448488588, "grad_norm": 0.03819744288921356, "learning_rate": 0.01, "loss": 1.9488, "step": 85338 }, { "epoch": 8.774521900061691, "grad_norm": 0.08988303691148758, "learning_rate": 0.01, "loss": 1.9422, "step": 85341 }, { "epoch": 8.774830351634794, "grad_norm": 0.0678759217262268, "learning_rate": 0.01, "loss": 1.9621, "step": 85344 }, { "epoch": 8.775138803207897, "grad_norm": 0.08218473941087723, "learning_rate": 0.01, "loss": 1.9334, "step": 85347 }, { "epoch": 8.775447254781, "grad_norm": 0.07374904304742813, "learning_rate": 0.01, "loss": 1.9499, "step": 85350 }, { "epoch": 8.775755706354103, "grad_norm": 0.07110534608364105, "learning_rate": 0.01, "loss": 1.9383, "step": 85353 }, { "epoch": 8.776064157927205, "grad_norm": 0.0576554536819458, "learning_rate": 0.01, "loss": 1.937, "step": 85356 }, { "epoch": 8.776372609500308, "grad_norm": 0.08372887223958969, "learning_rate": 0.01, "loss": 1.9403, "step": 85359 }, { "epoch": 8.776681061073411, "grad_norm": 0.06293521821498871, "learning_rate": 0.01, "loss": 1.9597, "step": 85362 }, { "epoch": 8.776989512646514, "grad_norm": 0.047408126294612885, "learning_rate": 0.01, "loss": 1.9408, "step": 85365 }, { "epoch": 8.777297964219617, "grad_norm": 0.07918205112218857, "learning_rate": 0.01, "loss": 1.9193, "step": 85368 }, { "epoch": 8.77760641579272, "grad_norm": 0.04142564535140991, "learning_rate": 0.01, "loss": 1.9395, "step": 85371 }, { "epoch": 8.777914867365823, "grad_norm": 0.031209861859679222, "learning_rate": 0.01, "loss": 1.9625, "step": 85374 }, { "epoch": 8.778223318938927, "grad_norm": 0.048874612897634506, "learning_rate": 0.01, "loss": 1.946, "step": 85377 }, { "epoch": 8.77853177051203, "grad_norm": 0.04210768640041351, "learning_rate": 0.01, "loss": 1.9725, "step": 85380 }, { "epoch": 8.778840222085133, "grad_norm": 0.11695470660924911, "learning_rate": 0.01, "loss": 1.9502, "step": 85383 }, { "epoch": 8.779148673658236, "grad_norm": 0.08034269511699677, "learning_rate": 0.01, "loss": 1.9526, "step": 85386 }, { "epoch": 8.779457125231339, "grad_norm": 0.1190868392586708, "learning_rate": 0.01, "loss": 1.9509, "step": 85389 }, { "epoch": 8.779765576804442, "grad_norm": 0.0867137685418129, "learning_rate": 0.01, "loss": 1.9574, "step": 85392 }, { "epoch": 8.780074028377545, "grad_norm": 0.08815090358257294, "learning_rate": 0.01, "loss": 1.9308, "step": 85395 }, { "epoch": 8.780382479950648, "grad_norm": 0.04542379826307297, "learning_rate": 0.01, "loss": 1.9873, "step": 85398 }, { "epoch": 8.78069093152375, "grad_norm": 0.05640220269560814, "learning_rate": 0.01, "loss": 1.9516, "step": 85401 }, { "epoch": 8.780999383096853, "grad_norm": 0.08306989073753357, "learning_rate": 0.01, "loss": 1.9643, "step": 85404 }, { "epoch": 8.781307834669956, "grad_norm": 0.11904260516166687, "learning_rate": 0.01, "loss": 1.9247, "step": 85407 }, { "epoch": 8.78161628624306, "grad_norm": 0.11178072541952133, "learning_rate": 0.01, "loss": 1.9487, "step": 85410 }, { "epoch": 8.781924737816162, "grad_norm": 0.09281167387962341, "learning_rate": 0.01, "loss": 1.9334, "step": 85413 }, { "epoch": 8.782233189389267, "grad_norm": 0.055970724672079086, "learning_rate": 0.01, "loss": 1.9316, "step": 85416 }, { "epoch": 8.78254164096237, "grad_norm": 0.05929992347955704, "learning_rate": 0.01, "loss": 1.9545, "step": 85419 }, { "epoch": 8.782850092535472, "grad_norm": 0.05237572267651558, "learning_rate": 0.01, "loss": 1.9422, "step": 85422 }, { "epoch": 8.783158544108575, "grad_norm": 0.08916623145341873, "learning_rate": 0.01, "loss": 1.9557, "step": 85425 }, { "epoch": 8.783466995681678, "grad_norm": 0.058517616242170334, "learning_rate": 0.01, "loss": 1.9479, "step": 85428 }, { "epoch": 8.783775447254781, "grad_norm": 0.04481247439980507, "learning_rate": 0.01, "loss": 1.9552, "step": 85431 }, { "epoch": 8.784083898827884, "grad_norm": 0.04817540943622589, "learning_rate": 0.01, "loss": 1.962, "step": 85434 }, { "epoch": 8.784392350400987, "grad_norm": 0.0524100698530674, "learning_rate": 0.01, "loss": 1.9605, "step": 85437 }, { "epoch": 8.78470080197409, "grad_norm": 0.1673521101474762, "learning_rate": 0.01, "loss": 1.9568, "step": 85440 }, { "epoch": 8.785009253547193, "grad_norm": 0.058675531297922134, "learning_rate": 0.01, "loss": 1.9477, "step": 85443 }, { "epoch": 8.785317705120296, "grad_norm": 0.0696008950471878, "learning_rate": 0.01, "loss": 1.9452, "step": 85446 }, { "epoch": 8.785626156693398, "grad_norm": 0.05638658627867699, "learning_rate": 0.01, "loss": 1.9529, "step": 85449 }, { "epoch": 8.785934608266501, "grad_norm": 0.0473102405667305, "learning_rate": 0.01, "loss": 1.9451, "step": 85452 }, { "epoch": 8.786243059839606, "grad_norm": 0.04515746235847473, "learning_rate": 0.01, "loss": 1.9203, "step": 85455 }, { "epoch": 8.786551511412709, "grad_norm": 0.043432414531707764, "learning_rate": 0.01, "loss": 1.9421, "step": 85458 }, { "epoch": 8.786859962985812, "grad_norm": 0.0580633319914341, "learning_rate": 0.01, "loss": 1.9678, "step": 85461 }, { "epoch": 8.787168414558915, "grad_norm": 0.07870703935623169, "learning_rate": 0.01, "loss": 1.9507, "step": 85464 }, { "epoch": 8.787476866132018, "grad_norm": 0.08649545162916183, "learning_rate": 0.01, "loss": 1.9721, "step": 85467 }, { "epoch": 8.78778531770512, "grad_norm": 0.06479387730360031, "learning_rate": 0.01, "loss": 1.9356, "step": 85470 }, { "epoch": 8.788093769278223, "grad_norm": 0.07489529997110367, "learning_rate": 0.01, "loss": 1.9061, "step": 85473 }, { "epoch": 8.788402220851326, "grad_norm": 0.08207201212644577, "learning_rate": 0.01, "loss": 1.9658, "step": 85476 }, { "epoch": 8.788710672424429, "grad_norm": 0.07542536407709122, "learning_rate": 0.01, "loss": 1.9461, "step": 85479 }, { "epoch": 8.789019123997532, "grad_norm": 0.0705515518784523, "learning_rate": 0.01, "loss": 1.9487, "step": 85482 }, { "epoch": 8.789327575570635, "grad_norm": 0.05835072696208954, "learning_rate": 0.01, "loss": 1.986, "step": 85485 }, { "epoch": 8.789636027143738, "grad_norm": 0.05363980308175087, "learning_rate": 0.01, "loss": 1.9666, "step": 85488 }, { "epoch": 8.78994447871684, "grad_norm": 0.10389215499162674, "learning_rate": 0.01, "loss": 1.9544, "step": 85491 }, { "epoch": 8.790252930289945, "grad_norm": 0.0551920086145401, "learning_rate": 0.01, "loss": 1.9355, "step": 85494 }, { "epoch": 8.790561381863048, "grad_norm": 0.054502177983522415, "learning_rate": 0.01, "loss": 1.9241, "step": 85497 }, { "epoch": 8.790869833436151, "grad_norm": 0.04676192253828049, "learning_rate": 0.01, "loss": 1.9491, "step": 85500 }, { "epoch": 8.791178285009254, "grad_norm": 0.0815708339214325, "learning_rate": 0.01, "loss": 1.9673, "step": 85503 }, { "epoch": 8.791486736582357, "grad_norm": 0.07840388268232346, "learning_rate": 0.01, "loss": 1.9246, "step": 85506 }, { "epoch": 8.79179518815546, "grad_norm": 0.08336468786001205, "learning_rate": 0.01, "loss": 1.9715, "step": 85509 }, { "epoch": 8.792103639728563, "grad_norm": 0.1331731379032135, "learning_rate": 0.01, "loss": 1.9652, "step": 85512 }, { "epoch": 8.792412091301665, "grad_norm": 0.10310441255569458, "learning_rate": 0.01, "loss": 1.9496, "step": 85515 }, { "epoch": 8.792720542874768, "grad_norm": 0.03826957195997238, "learning_rate": 0.01, "loss": 1.925, "step": 85518 }, { "epoch": 8.793028994447871, "grad_norm": 0.04112190008163452, "learning_rate": 0.01, "loss": 1.9451, "step": 85521 }, { "epoch": 8.793337446020974, "grad_norm": 0.04075111821293831, "learning_rate": 0.01, "loss": 1.9213, "step": 85524 }, { "epoch": 8.793645897594077, "grad_norm": 0.03421452268958092, "learning_rate": 0.01, "loss": 1.9604, "step": 85527 }, { "epoch": 8.79395434916718, "grad_norm": 0.1482706516981125, "learning_rate": 0.01, "loss": 1.9646, "step": 85530 }, { "epoch": 8.794262800740285, "grad_norm": 0.11172833293676376, "learning_rate": 0.01, "loss": 1.9286, "step": 85533 }, { "epoch": 8.794571252313387, "grad_norm": 0.07505451142787933, "learning_rate": 0.01, "loss": 1.9668, "step": 85536 }, { "epoch": 8.79487970388649, "grad_norm": 0.07401387393474579, "learning_rate": 0.01, "loss": 1.9659, "step": 85539 }, { "epoch": 8.795188155459593, "grad_norm": 0.06274157762527466, "learning_rate": 0.01, "loss": 1.9517, "step": 85542 }, { "epoch": 8.795496607032696, "grad_norm": 0.05745621770620346, "learning_rate": 0.01, "loss": 1.9541, "step": 85545 }, { "epoch": 8.795805058605799, "grad_norm": 0.04131782427430153, "learning_rate": 0.01, "loss": 1.9623, "step": 85548 }, { "epoch": 8.796113510178902, "grad_norm": 0.04630117118358612, "learning_rate": 0.01, "loss": 1.9447, "step": 85551 }, { "epoch": 8.796421961752005, "grad_norm": 0.07542744278907776, "learning_rate": 0.01, "loss": 1.9131, "step": 85554 }, { "epoch": 8.796730413325108, "grad_norm": 0.10365857928991318, "learning_rate": 0.01, "loss": 1.9455, "step": 85557 }, { "epoch": 8.79703886489821, "grad_norm": 0.11497282981872559, "learning_rate": 0.01, "loss": 1.9606, "step": 85560 }, { "epoch": 8.797347316471313, "grad_norm": 0.05868663638830185, "learning_rate": 0.01, "loss": 1.9398, "step": 85563 }, { "epoch": 8.797655768044416, "grad_norm": 0.04206192493438721, "learning_rate": 0.01, "loss": 1.9401, "step": 85566 }, { "epoch": 8.79796421961752, "grad_norm": 0.038855090737342834, "learning_rate": 0.01, "loss": 1.9596, "step": 85569 }, { "epoch": 8.798272671190624, "grad_norm": 0.0406762957572937, "learning_rate": 0.01, "loss": 1.9407, "step": 85572 }, { "epoch": 8.798581122763727, "grad_norm": 0.12336095422506332, "learning_rate": 0.01, "loss": 1.9597, "step": 85575 }, { "epoch": 8.79888957433683, "grad_norm": 0.05633760988712311, "learning_rate": 0.01, "loss": 1.9673, "step": 85578 }, { "epoch": 8.799198025909932, "grad_norm": 0.05526827648282051, "learning_rate": 0.01, "loss": 1.9432, "step": 85581 }, { "epoch": 8.799506477483035, "grad_norm": 0.0719655379652977, "learning_rate": 0.01, "loss": 1.9845, "step": 85584 }, { "epoch": 8.799814929056138, "grad_norm": 0.15878325700759888, "learning_rate": 0.01, "loss": 1.9468, "step": 85587 }, { "epoch": 8.800123380629241, "grad_norm": 0.07367381453514099, "learning_rate": 0.01, "loss": 1.9219, "step": 85590 }, { "epoch": 8.800431832202344, "grad_norm": 0.08002124726772308, "learning_rate": 0.01, "loss": 1.9451, "step": 85593 }, { "epoch": 8.800740283775447, "grad_norm": 0.06393203884363174, "learning_rate": 0.01, "loss": 1.9528, "step": 85596 }, { "epoch": 8.80104873534855, "grad_norm": 0.03970704600214958, "learning_rate": 0.01, "loss": 1.9313, "step": 85599 }, { "epoch": 8.801357186921653, "grad_norm": 0.052820831537246704, "learning_rate": 0.01, "loss": 1.9658, "step": 85602 }, { "epoch": 8.801665638494756, "grad_norm": 0.06489507108926773, "learning_rate": 0.01, "loss": 1.9437, "step": 85605 }, { "epoch": 8.801974090067858, "grad_norm": 0.06538572162389755, "learning_rate": 0.01, "loss": 1.9397, "step": 85608 }, { "epoch": 8.802282541640963, "grad_norm": 0.041309911757707596, "learning_rate": 0.01, "loss": 1.9378, "step": 85611 }, { "epoch": 8.802590993214066, "grad_norm": 0.11032170802354813, "learning_rate": 0.01, "loss": 1.9253, "step": 85614 }, { "epoch": 8.802899444787169, "grad_norm": 0.03958607837557793, "learning_rate": 0.01, "loss": 1.9364, "step": 85617 }, { "epoch": 8.803207896360272, "grad_norm": 0.06811465322971344, "learning_rate": 0.01, "loss": 1.9477, "step": 85620 }, { "epoch": 8.803516347933375, "grad_norm": 0.0968390479683876, "learning_rate": 0.01, "loss": 1.9351, "step": 85623 }, { "epoch": 8.803824799506478, "grad_norm": 0.06796689331531525, "learning_rate": 0.01, "loss": 1.939, "step": 85626 }, { "epoch": 8.80413325107958, "grad_norm": 0.05755221098661423, "learning_rate": 0.01, "loss": 1.9673, "step": 85629 }, { "epoch": 8.804441702652683, "grad_norm": 0.12345452606678009, "learning_rate": 0.01, "loss": 1.9724, "step": 85632 }, { "epoch": 8.804750154225786, "grad_norm": 0.07588624209165573, "learning_rate": 0.01, "loss": 1.9296, "step": 85635 }, { "epoch": 8.805058605798889, "grad_norm": 0.04973727837204933, "learning_rate": 0.01, "loss": 1.9493, "step": 85638 }, { "epoch": 8.805367057371992, "grad_norm": 0.03331485763192177, "learning_rate": 0.01, "loss": 1.9522, "step": 85641 }, { "epoch": 8.805675508945095, "grad_norm": 0.03431696817278862, "learning_rate": 0.01, "loss": 1.9374, "step": 85644 }, { "epoch": 8.805983960518198, "grad_norm": 0.045221637934446335, "learning_rate": 0.01, "loss": 1.9276, "step": 85647 }, { "epoch": 8.806292412091302, "grad_norm": 0.0935794860124588, "learning_rate": 0.01, "loss": 1.9252, "step": 85650 }, { "epoch": 8.806600863664405, "grad_norm": 0.05588880181312561, "learning_rate": 0.01, "loss": 1.9365, "step": 85653 }, { "epoch": 8.806909315237508, "grad_norm": 0.039516277611255646, "learning_rate": 0.01, "loss": 1.9255, "step": 85656 }, { "epoch": 8.807217766810611, "grad_norm": 0.03812456503510475, "learning_rate": 0.01, "loss": 1.9512, "step": 85659 }, { "epoch": 8.807526218383714, "grad_norm": 0.04115702211856842, "learning_rate": 0.01, "loss": 1.9542, "step": 85662 }, { "epoch": 8.807834669956817, "grad_norm": 0.141036257147789, "learning_rate": 0.01, "loss": 1.9404, "step": 85665 }, { "epoch": 8.80814312152992, "grad_norm": 0.05860263481736183, "learning_rate": 0.01, "loss": 1.9596, "step": 85668 }, { "epoch": 8.808451573103023, "grad_norm": 0.05528310313820839, "learning_rate": 0.01, "loss": 1.951, "step": 85671 }, { "epoch": 8.808760024676126, "grad_norm": 0.03591969981789589, "learning_rate": 0.01, "loss": 1.9301, "step": 85674 }, { "epoch": 8.809068476249228, "grad_norm": 0.13484379649162292, "learning_rate": 0.01, "loss": 1.9468, "step": 85677 }, { "epoch": 8.809376927822331, "grad_norm": 0.08338242024183273, "learning_rate": 0.01, "loss": 1.9522, "step": 85680 }, { "epoch": 8.809685379395434, "grad_norm": 0.061400432139635086, "learning_rate": 0.01, "loss": 1.9366, "step": 85683 }, { "epoch": 8.809993830968537, "grad_norm": 0.029880136251449585, "learning_rate": 0.01, "loss": 1.9577, "step": 85686 }, { "epoch": 8.810302282541642, "grad_norm": 0.05177440494298935, "learning_rate": 0.01, "loss": 1.9289, "step": 85689 }, { "epoch": 8.810610734114745, "grad_norm": 0.08280415832996368, "learning_rate": 0.01, "loss": 1.931, "step": 85692 }, { "epoch": 8.810919185687847, "grad_norm": 0.09497891366481781, "learning_rate": 0.01, "loss": 1.9621, "step": 85695 }, { "epoch": 8.81122763726095, "grad_norm": 0.0856609046459198, "learning_rate": 0.01, "loss": 1.9529, "step": 85698 }, { "epoch": 8.811536088834053, "grad_norm": 0.05290984734892845, "learning_rate": 0.01, "loss": 1.9391, "step": 85701 }, { "epoch": 8.811844540407156, "grad_norm": 0.04517572000622749, "learning_rate": 0.01, "loss": 1.9423, "step": 85704 }, { "epoch": 8.812152991980259, "grad_norm": 0.040704116225242615, "learning_rate": 0.01, "loss": 1.9322, "step": 85707 }, { "epoch": 8.812461443553362, "grad_norm": 0.03796176612377167, "learning_rate": 0.01, "loss": 1.9638, "step": 85710 }, { "epoch": 8.812769895126465, "grad_norm": 0.11047817766666412, "learning_rate": 0.01, "loss": 1.9607, "step": 85713 }, { "epoch": 8.813078346699568, "grad_norm": 0.09995049983263016, "learning_rate": 0.01, "loss": 1.9457, "step": 85716 }, { "epoch": 8.81338679827267, "grad_norm": 0.037516675889492035, "learning_rate": 0.01, "loss": 1.9542, "step": 85719 }, { "epoch": 8.813695249845773, "grad_norm": 0.08218607306480408, "learning_rate": 0.01, "loss": 1.9672, "step": 85722 }, { "epoch": 8.814003701418876, "grad_norm": 0.09442456066608429, "learning_rate": 0.01, "loss": 1.9557, "step": 85725 }, { "epoch": 8.814312152991981, "grad_norm": 0.05343955382704735, "learning_rate": 0.01, "loss": 1.9444, "step": 85728 }, { "epoch": 8.814620604565084, "grad_norm": 0.0756995677947998, "learning_rate": 0.01, "loss": 1.9411, "step": 85731 }, { "epoch": 8.814929056138187, "grad_norm": 0.07240238785743713, "learning_rate": 0.01, "loss": 1.9473, "step": 85734 }, { "epoch": 8.81523750771129, "grad_norm": 0.058714382350444794, "learning_rate": 0.01, "loss": 1.9526, "step": 85737 }, { "epoch": 8.815545959284393, "grad_norm": 0.08831341564655304, "learning_rate": 0.01, "loss": 1.9644, "step": 85740 }, { "epoch": 8.815854410857495, "grad_norm": 0.05908043310046196, "learning_rate": 0.01, "loss": 1.9643, "step": 85743 }, { "epoch": 8.816162862430598, "grad_norm": 0.04405365511775017, "learning_rate": 0.01, "loss": 1.9426, "step": 85746 }, { "epoch": 8.816471314003701, "grad_norm": 0.04712482914328575, "learning_rate": 0.01, "loss": 1.959, "step": 85749 }, { "epoch": 8.816779765576804, "grad_norm": 0.10971397906541824, "learning_rate": 0.01, "loss": 1.9735, "step": 85752 }, { "epoch": 8.817088217149907, "grad_norm": 0.06402042508125305, "learning_rate": 0.01, "loss": 1.9682, "step": 85755 }, { "epoch": 8.81739666872301, "grad_norm": 0.09109006822109222, "learning_rate": 0.01, "loss": 1.9342, "step": 85758 }, { "epoch": 8.817705120296113, "grad_norm": 0.09684780985116959, "learning_rate": 0.01, "loss": 1.9349, "step": 85761 }, { "epoch": 8.818013571869216, "grad_norm": 0.0380178838968277, "learning_rate": 0.01, "loss": 1.9469, "step": 85764 }, { "epoch": 8.81832202344232, "grad_norm": 0.07201346755027771, "learning_rate": 0.01, "loss": 1.9727, "step": 85767 }, { "epoch": 8.818630475015423, "grad_norm": 0.05873304605484009, "learning_rate": 0.01, "loss": 1.9467, "step": 85770 }, { "epoch": 8.818938926588526, "grad_norm": 0.06054871901869774, "learning_rate": 0.01, "loss": 1.9347, "step": 85773 }, { "epoch": 8.819247378161629, "grad_norm": 0.06931430101394653, "learning_rate": 0.01, "loss": 1.959, "step": 85776 }, { "epoch": 8.819555829734732, "grad_norm": 0.08289149403572083, "learning_rate": 0.01, "loss": 1.9532, "step": 85779 }, { "epoch": 8.819864281307835, "grad_norm": 0.07705207169055939, "learning_rate": 0.01, "loss": 1.9492, "step": 85782 }, { "epoch": 8.820172732880938, "grad_norm": 0.1018168181180954, "learning_rate": 0.01, "loss": 1.9142, "step": 85785 }, { "epoch": 8.82048118445404, "grad_norm": 0.061625886708498, "learning_rate": 0.01, "loss": 1.9588, "step": 85788 }, { "epoch": 8.820789636027143, "grad_norm": 0.050136588513851166, "learning_rate": 0.01, "loss": 1.9535, "step": 85791 }, { "epoch": 8.821098087600246, "grad_norm": 0.040379468351602554, "learning_rate": 0.01, "loss": 1.929, "step": 85794 }, { "epoch": 8.82140653917335, "grad_norm": 0.032702744007110596, "learning_rate": 0.01, "loss": 1.9426, "step": 85797 }, { "epoch": 8.821714990746452, "grad_norm": 0.04039518162608147, "learning_rate": 0.01, "loss": 1.9508, "step": 85800 }, { "epoch": 8.822023442319555, "grad_norm": 0.13541679084300995, "learning_rate": 0.01, "loss": 1.962, "step": 85803 }, { "epoch": 8.82233189389266, "grad_norm": 0.11711031198501587, "learning_rate": 0.01, "loss": 1.9603, "step": 85806 }, { "epoch": 8.822640345465762, "grad_norm": 0.13755975663661957, "learning_rate": 0.01, "loss": 1.9661, "step": 85809 }, { "epoch": 8.822948797038865, "grad_norm": 0.10734470188617706, "learning_rate": 0.01, "loss": 1.94, "step": 85812 }, { "epoch": 8.823257248611968, "grad_norm": 0.112040676176548, "learning_rate": 0.01, "loss": 1.9524, "step": 85815 }, { "epoch": 8.823565700185071, "grad_norm": 0.07922054082155228, "learning_rate": 0.01, "loss": 1.945, "step": 85818 }, { "epoch": 8.823874151758174, "grad_norm": 0.04796600341796875, "learning_rate": 0.01, "loss": 1.9355, "step": 85821 }, { "epoch": 8.824182603331277, "grad_norm": 0.05426159128546715, "learning_rate": 0.01, "loss": 1.9152, "step": 85824 }, { "epoch": 8.82449105490438, "grad_norm": 0.04682827368378639, "learning_rate": 0.01, "loss": 1.9443, "step": 85827 }, { "epoch": 8.824799506477483, "grad_norm": 0.08252523839473724, "learning_rate": 0.01, "loss": 1.9676, "step": 85830 }, { "epoch": 8.825107958050586, "grad_norm": 0.04207620769739151, "learning_rate": 0.01, "loss": 1.9291, "step": 85833 }, { "epoch": 8.825416409623688, "grad_norm": 0.0742509737610817, "learning_rate": 0.01, "loss": 1.9221, "step": 85836 }, { "epoch": 8.825724861196791, "grad_norm": 0.03609723225235939, "learning_rate": 0.01, "loss": 1.932, "step": 85839 }, { "epoch": 8.826033312769894, "grad_norm": 0.0418362058699131, "learning_rate": 0.01, "loss": 1.9575, "step": 85842 }, { "epoch": 8.826341764342999, "grad_norm": 0.05531947314739227, "learning_rate": 0.01, "loss": 1.9426, "step": 85845 }, { "epoch": 8.826650215916102, "grad_norm": 0.05285338684916496, "learning_rate": 0.01, "loss": 1.9697, "step": 85848 }, { "epoch": 8.826958667489205, "grad_norm": 0.1251527965068817, "learning_rate": 0.01, "loss": 1.9386, "step": 85851 }, { "epoch": 8.827267119062308, "grad_norm": 0.038710471242666245, "learning_rate": 0.01, "loss": 1.9545, "step": 85854 }, { "epoch": 8.82757557063541, "grad_norm": 0.04924651235342026, "learning_rate": 0.01, "loss": 1.9597, "step": 85857 }, { "epoch": 8.827884022208513, "grad_norm": 0.07448245584964752, "learning_rate": 0.01, "loss": 1.9307, "step": 85860 }, { "epoch": 8.828192473781616, "grad_norm": 0.03801583871245384, "learning_rate": 0.01, "loss": 1.9343, "step": 85863 }, { "epoch": 8.828500925354719, "grad_norm": 0.04570269584655762, "learning_rate": 0.01, "loss": 1.9514, "step": 85866 }, { "epoch": 8.828809376927822, "grad_norm": 0.14020396769046783, "learning_rate": 0.01, "loss": 1.9648, "step": 85869 }, { "epoch": 8.829117828500925, "grad_norm": 0.07285384088754654, "learning_rate": 0.01, "loss": 1.9623, "step": 85872 }, { "epoch": 8.829426280074028, "grad_norm": 0.05504117161035538, "learning_rate": 0.01, "loss": 1.9373, "step": 85875 }, { "epoch": 8.82973473164713, "grad_norm": 0.05914220213890076, "learning_rate": 0.01, "loss": 1.9308, "step": 85878 }, { "epoch": 8.830043183220234, "grad_norm": 0.04905472695827484, "learning_rate": 0.01, "loss": 1.927, "step": 85881 }, { "epoch": 8.830351634793338, "grad_norm": 0.04060604050755501, "learning_rate": 0.01, "loss": 1.9484, "step": 85884 }, { "epoch": 8.830660086366441, "grad_norm": 0.034003157168626785, "learning_rate": 0.01, "loss": 1.9459, "step": 85887 }, { "epoch": 8.830968537939544, "grad_norm": 0.06942160427570343, "learning_rate": 0.01, "loss": 1.9512, "step": 85890 }, { "epoch": 8.831276989512647, "grad_norm": 0.14421160519123077, "learning_rate": 0.01, "loss": 1.9672, "step": 85893 }, { "epoch": 8.83158544108575, "grad_norm": 0.049919433891773224, "learning_rate": 0.01, "loss": 1.9546, "step": 85896 }, { "epoch": 8.831893892658853, "grad_norm": 0.03900736942887306, "learning_rate": 0.01, "loss": 1.9554, "step": 85899 }, { "epoch": 8.832202344231955, "grad_norm": 0.046023812144994736, "learning_rate": 0.01, "loss": 1.955, "step": 85902 }, { "epoch": 8.832510795805058, "grad_norm": 0.09192383289337158, "learning_rate": 0.01, "loss": 1.9515, "step": 85905 }, { "epoch": 8.832819247378161, "grad_norm": 0.08842380344867706, "learning_rate": 0.01, "loss": 1.9684, "step": 85908 }, { "epoch": 8.833127698951264, "grad_norm": 0.0487644225358963, "learning_rate": 0.01, "loss": 1.9431, "step": 85911 }, { "epoch": 8.833436150524367, "grad_norm": 0.0899803414940834, "learning_rate": 0.01, "loss": 1.9542, "step": 85914 }, { "epoch": 8.83374460209747, "grad_norm": 0.09449628740549088, "learning_rate": 0.01, "loss": 1.9524, "step": 85917 }, { "epoch": 8.834053053670573, "grad_norm": 0.041804246604442596, "learning_rate": 0.01, "loss": 1.9343, "step": 85920 }, { "epoch": 8.834361505243677, "grad_norm": 0.03399631753563881, "learning_rate": 0.01, "loss": 1.9003, "step": 85923 }, { "epoch": 8.83466995681678, "grad_norm": 0.1013641282916069, "learning_rate": 0.01, "loss": 1.9598, "step": 85926 }, { "epoch": 8.834978408389883, "grad_norm": 0.07109085470438004, "learning_rate": 0.01, "loss": 1.9293, "step": 85929 }, { "epoch": 8.835286859962986, "grad_norm": 0.0444265753030777, "learning_rate": 0.01, "loss": 1.9341, "step": 85932 }, { "epoch": 8.835595311536089, "grad_norm": 0.04196979105472565, "learning_rate": 0.01, "loss": 1.9114, "step": 85935 }, { "epoch": 8.835903763109192, "grad_norm": 0.057135023176670074, "learning_rate": 0.01, "loss": 1.9931, "step": 85938 }, { "epoch": 8.836212214682295, "grad_norm": 0.048445116728544235, "learning_rate": 0.01, "loss": 1.9105, "step": 85941 }, { "epoch": 8.836520666255398, "grad_norm": 0.04054427519440651, "learning_rate": 0.01, "loss": 1.9362, "step": 85944 }, { "epoch": 8.8368291178285, "grad_norm": 0.12011062353849411, "learning_rate": 0.01, "loss": 1.9499, "step": 85947 }, { "epoch": 8.837137569401603, "grad_norm": 0.13074417412281036, "learning_rate": 0.01, "loss": 1.9403, "step": 85950 }, { "epoch": 8.837446020974706, "grad_norm": 0.05142034962773323, "learning_rate": 0.01, "loss": 1.968, "step": 85953 }, { "epoch": 8.83775447254781, "grad_norm": 0.05493297800421715, "learning_rate": 0.01, "loss": 1.9356, "step": 85956 }, { "epoch": 8.838062924120912, "grad_norm": 0.037361111491918564, "learning_rate": 0.01, "loss": 1.9355, "step": 85959 }, { "epoch": 8.838371375694017, "grad_norm": 0.04489835351705551, "learning_rate": 0.01, "loss": 1.9555, "step": 85962 }, { "epoch": 8.83867982726712, "grad_norm": 0.03907403722405434, "learning_rate": 0.01, "loss": 1.9286, "step": 85965 }, { "epoch": 8.838988278840223, "grad_norm": 0.08557716757059097, "learning_rate": 0.01, "loss": 1.9372, "step": 85968 }, { "epoch": 8.839296730413325, "grad_norm": 0.061117153614759445, "learning_rate": 0.01, "loss": 1.9276, "step": 85971 }, { "epoch": 8.839605181986428, "grad_norm": 0.09186840802431107, "learning_rate": 0.01, "loss": 1.9392, "step": 85974 }, { "epoch": 8.839913633559531, "grad_norm": 0.049297600984573364, "learning_rate": 0.01, "loss": 1.9412, "step": 85977 }, { "epoch": 8.840222085132634, "grad_norm": 0.0782669186592102, "learning_rate": 0.01, "loss": 1.9505, "step": 85980 }, { "epoch": 8.840530536705737, "grad_norm": 0.10112703591585159, "learning_rate": 0.01, "loss": 1.9318, "step": 85983 }, { "epoch": 8.84083898827884, "grad_norm": 0.0647103562951088, "learning_rate": 0.01, "loss": 1.95, "step": 85986 }, { "epoch": 8.841147439851943, "grad_norm": 0.04339386895298958, "learning_rate": 0.01, "loss": 1.9715, "step": 85989 }, { "epoch": 8.841455891425046, "grad_norm": 0.0516306571662426, "learning_rate": 0.01, "loss": 1.9532, "step": 85992 }, { "epoch": 8.841764342998149, "grad_norm": 0.09764052927494049, "learning_rate": 0.01, "loss": 1.9789, "step": 85995 }, { "epoch": 8.842072794571251, "grad_norm": 0.060452841222286224, "learning_rate": 0.01, "loss": 1.9351, "step": 85998 }, { "epoch": 8.842381246144356, "grad_norm": 0.1076023206114769, "learning_rate": 0.01, "loss": 1.9313, "step": 86001 }, { "epoch": 8.842689697717459, "grad_norm": 0.056612275540828705, "learning_rate": 0.01, "loss": 1.9593, "step": 86004 }, { "epoch": 8.842998149290562, "grad_norm": 0.12334515899419785, "learning_rate": 0.01, "loss": 1.9373, "step": 86007 }, { "epoch": 8.843306600863665, "grad_norm": 0.05345802381634712, "learning_rate": 0.01, "loss": 1.9637, "step": 86010 }, { "epoch": 8.843615052436768, "grad_norm": 0.05063433572649956, "learning_rate": 0.01, "loss": 1.9432, "step": 86013 }, { "epoch": 8.84392350400987, "grad_norm": 0.05762813985347748, "learning_rate": 0.01, "loss": 1.9515, "step": 86016 }, { "epoch": 8.844231955582973, "grad_norm": 0.051242899149656296, "learning_rate": 0.01, "loss": 1.9378, "step": 86019 }, { "epoch": 8.844540407156076, "grad_norm": 0.045474182814359665, "learning_rate": 0.01, "loss": 1.91, "step": 86022 }, { "epoch": 8.84484885872918, "grad_norm": 0.062329549342393875, "learning_rate": 0.01, "loss": 1.9247, "step": 86025 }, { "epoch": 8.845157310302282, "grad_norm": 0.10545872151851654, "learning_rate": 0.01, "loss": 1.9525, "step": 86028 }, { "epoch": 8.845465761875385, "grad_norm": 0.06179576739668846, "learning_rate": 0.01, "loss": 1.9528, "step": 86031 }, { "epoch": 8.845774213448488, "grad_norm": 0.10388704389333725, "learning_rate": 0.01, "loss": 1.9494, "step": 86034 }, { "epoch": 8.84608266502159, "grad_norm": 0.06724070012569427, "learning_rate": 0.01, "loss": 1.9621, "step": 86037 }, { "epoch": 8.846391116594695, "grad_norm": 0.06309358775615692, "learning_rate": 0.01, "loss": 1.9624, "step": 86040 }, { "epoch": 8.846699568167798, "grad_norm": 0.07256193459033966, "learning_rate": 0.01, "loss": 1.9386, "step": 86043 }, { "epoch": 8.847008019740901, "grad_norm": 0.06528967618942261, "learning_rate": 0.01, "loss": 1.9302, "step": 86046 }, { "epoch": 8.847316471314004, "grad_norm": 0.03754352405667305, "learning_rate": 0.01, "loss": 1.9222, "step": 86049 }, { "epoch": 8.847624922887107, "grad_norm": 0.04206917807459831, "learning_rate": 0.01, "loss": 1.9641, "step": 86052 }, { "epoch": 8.84793337446021, "grad_norm": 0.06092971935868263, "learning_rate": 0.01, "loss": 1.9339, "step": 86055 }, { "epoch": 8.848241826033313, "grad_norm": 0.055908314883708954, "learning_rate": 0.01, "loss": 1.9173, "step": 86058 }, { "epoch": 8.848550277606416, "grad_norm": 0.04285154864192009, "learning_rate": 0.01, "loss": 1.955, "step": 86061 }, { "epoch": 8.848858729179518, "grad_norm": 0.04486127570271492, "learning_rate": 0.01, "loss": 1.9344, "step": 86064 }, { "epoch": 8.849167180752621, "grad_norm": 0.12317989021539688, "learning_rate": 0.01, "loss": 1.9311, "step": 86067 }, { "epoch": 8.849475632325724, "grad_norm": 0.04780368134379387, "learning_rate": 0.01, "loss": 1.9603, "step": 86070 }, { "epoch": 8.849784083898827, "grad_norm": 0.09250175207853317, "learning_rate": 0.01, "loss": 1.9558, "step": 86073 }, { "epoch": 8.85009253547193, "grad_norm": 0.05390471965074539, "learning_rate": 0.01, "loss": 1.9576, "step": 86076 }, { "epoch": 8.850400987045035, "grad_norm": 0.04068141430616379, "learning_rate": 0.01, "loss": 1.9588, "step": 86079 }, { "epoch": 8.850709438618138, "grad_norm": 0.037879377603530884, "learning_rate": 0.01, "loss": 1.9231, "step": 86082 }, { "epoch": 8.85101789019124, "grad_norm": 0.03136352822184563, "learning_rate": 0.01, "loss": 1.9086, "step": 86085 }, { "epoch": 8.851326341764343, "grad_norm": 0.13857915997505188, "learning_rate": 0.01, "loss": 1.9511, "step": 86088 }, { "epoch": 8.851634793337446, "grad_norm": 0.046125274151563644, "learning_rate": 0.01, "loss": 1.9407, "step": 86091 }, { "epoch": 8.851943244910549, "grad_norm": 0.11460349708795547, "learning_rate": 0.01, "loss": 1.9407, "step": 86094 }, { "epoch": 8.852251696483652, "grad_norm": 0.0628170296549797, "learning_rate": 0.01, "loss": 1.9567, "step": 86097 }, { "epoch": 8.852560148056755, "grad_norm": 0.0907411277294159, "learning_rate": 0.01, "loss": 1.9657, "step": 86100 }, { "epoch": 8.852868599629858, "grad_norm": 0.0656890794634819, "learning_rate": 0.01, "loss": 1.9297, "step": 86103 }, { "epoch": 8.85317705120296, "grad_norm": 0.0431518629193306, "learning_rate": 0.01, "loss": 1.9526, "step": 86106 }, { "epoch": 8.853485502776063, "grad_norm": 0.03522025793790817, "learning_rate": 0.01, "loss": 1.9466, "step": 86109 }, { "epoch": 8.853793954349166, "grad_norm": 0.09385208040475845, "learning_rate": 0.01, "loss": 1.933, "step": 86112 }, { "epoch": 8.85410240592227, "grad_norm": 0.04548000171780586, "learning_rate": 0.01, "loss": 1.9353, "step": 86115 }, { "epoch": 8.854410857495374, "grad_norm": 0.06799448281526566, "learning_rate": 0.01, "loss": 1.9474, "step": 86118 }, { "epoch": 8.854719309068477, "grad_norm": 0.041807446628808975, "learning_rate": 0.01, "loss": 1.9549, "step": 86121 }, { "epoch": 8.85502776064158, "grad_norm": 0.11906972527503967, "learning_rate": 0.01, "loss": 1.9363, "step": 86124 }, { "epoch": 8.855336212214683, "grad_norm": 0.11152509599924088, "learning_rate": 0.01, "loss": 1.9566, "step": 86127 }, { "epoch": 8.855644663787785, "grad_norm": 0.12766876816749573, "learning_rate": 0.01, "loss": 1.9285, "step": 86130 }, { "epoch": 8.855953115360888, "grad_norm": 0.11722945421934128, "learning_rate": 0.01, "loss": 1.9507, "step": 86133 }, { "epoch": 8.856261566933991, "grad_norm": 0.04626612737774849, "learning_rate": 0.01, "loss": 1.9613, "step": 86136 }, { "epoch": 8.856570018507094, "grad_norm": 0.040816087275743484, "learning_rate": 0.01, "loss": 1.9354, "step": 86139 }, { "epoch": 8.856878470080197, "grad_norm": 0.0497615672647953, "learning_rate": 0.01, "loss": 1.9421, "step": 86142 }, { "epoch": 8.8571869216533, "grad_norm": 0.04109172523021698, "learning_rate": 0.01, "loss": 1.9207, "step": 86145 }, { "epoch": 8.857495373226403, "grad_norm": 0.10170968621969223, "learning_rate": 0.01, "loss": 1.963, "step": 86148 }, { "epoch": 8.857803824799506, "grad_norm": 0.05853275582194328, "learning_rate": 0.01, "loss": 1.9511, "step": 86151 }, { "epoch": 8.858112276372609, "grad_norm": 0.04526359587907791, "learning_rate": 0.01, "loss": 1.9348, "step": 86154 }, { "epoch": 8.858420727945713, "grad_norm": 0.041641268879175186, "learning_rate": 0.01, "loss": 1.9309, "step": 86157 }, { "epoch": 8.858729179518816, "grad_norm": 0.04957803338766098, "learning_rate": 0.01, "loss": 1.939, "step": 86160 }, { "epoch": 8.859037631091919, "grad_norm": 0.09154763072729111, "learning_rate": 0.01, "loss": 1.9368, "step": 86163 }, { "epoch": 8.859346082665022, "grad_norm": 0.12526895105838776, "learning_rate": 0.01, "loss": 1.9384, "step": 86166 }, { "epoch": 8.859654534238125, "grad_norm": 0.056200604885816574, "learning_rate": 0.01, "loss": 1.9416, "step": 86169 }, { "epoch": 8.859962985811228, "grad_norm": 0.11231166869401932, "learning_rate": 0.01, "loss": 1.9381, "step": 86172 }, { "epoch": 8.86027143738433, "grad_norm": 0.11459070444107056, "learning_rate": 0.01, "loss": 1.9322, "step": 86175 }, { "epoch": 8.860579888957433, "grad_norm": 0.11196603626012802, "learning_rate": 0.01, "loss": 1.9685, "step": 86178 }, { "epoch": 8.860888340530536, "grad_norm": 0.056678954511880875, "learning_rate": 0.01, "loss": 1.9668, "step": 86181 }, { "epoch": 8.86119679210364, "grad_norm": 0.049753881990909576, "learning_rate": 0.01, "loss": 1.9671, "step": 86184 }, { "epoch": 8.861505243676742, "grad_norm": 0.045943088829517365, "learning_rate": 0.01, "loss": 1.9379, "step": 86187 }, { "epoch": 8.861813695249845, "grad_norm": 0.03190896287560463, "learning_rate": 0.01, "loss": 1.9364, "step": 86190 }, { "epoch": 8.862122146822948, "grad_norm": 0.034997474402189255, "learning_rate": 0.01, "loss": 1.9462, "step": 86193 }, { "epoch": 8.862430598396053, "grad_norm": 0.07974711060523987, "learning_rate": 0.01, "loss": 1.9708, "step": 86196 }, { "epoch": 8.862739049969155, "grad_norm": 0.10291502624750137, "learning_rate": 0.01, "loss": 1.9408, "step": 86199 }, { "epoch": 8.863047501542258, "grad_norm": 0.12553256750106812, "learning_rate": 0.01, "loss": 1.9437, "step": 86202 }, { "epoch": 8.863355953115361, "grad_norm": 0.11128824949264526, "learning_rate": 0.01, "loss": 1.9669, "step": 86205 }, { "epoch": 8.863664404688464, "grad_norm": 0.09532744437456131, "learning_rate": 0.01, "loss": 1.9651, "step": 86208 }, { "epoch": 8.863972856261567, "grad_norm": 0.06013484299182892, "learning_rate": 0.01, "loss": 1.9768, "step": 86211 }, { "epoch": 8.86428130783467, "grad_norm": 0.04983198642730713, "learning_rate": 0.01, "loss": 1.9457, "step": 86214 }, { "epoch": 8.864589759407773, "grad_norm": 0.04304705560207367, "learning_rate": 0.01, "loss": 1.9375, "step": 86217 }, { "epoch": 8.864898210980876, "grad_norm": 0.04451330006122589, "learning_rate": 0.01, "loss": 1.9692, "step": 86220 }, { "epoch": 8.865206662553978, "grad_norm": 0.052429914474487305, "learning_rate": 0.01, "loss": 1.942, "step": 86223 }, { "epoch": 8.865515114127081, "grad_norm": 0.15532724559307098, "learning_rate": 0.01, "loss": 1.9456, "step": 86226 }, { "epoch": 8.865823565700184, "grad_norm": 0.06498439610004425, "learning_rate": 0.01, "loss": 1.9528, "step": 86229 }, { "epoch": 8.866132017273289, "grad_norm": 0.028920544311404228, "learning_rate": 0.01, "loss": 1.9395, "step": 86232 }, { "epoch": 8.866440468846392, "grad_norm": 0.048817794770002365, "learning_rate": 0.01, "loss": 1.9351, "step": 86235 }, { "epoch": 8.866748920419495, "grad_norm": 0.08832227438688278, "learning_rate": 0.01, "loss": 1.9402, "step": 86238 }, { "epoch": 8.867057371992598, "grad_norm": 0.06445325165987015, "learning_rate": 0.01, "loss": 1.9294, "step": 86241 }, { "epoch": 8.8673658235657, "grad_norm": 0.04837856814265251, "learning_rate": 0.01, "loss": 1.9526, "step": 86244 }, { "epoch": 8.867674275138803, "grad_norm": 0.056823018938302994, "learning_rate": 0.01, "loss": 1.9667, "step": 86247 }, { "epoch": 8.867982726711906, "grad_norm": 0.0441332571208477, "learning_rate": 0.01, "loss": 1.9587, "step": 86250 }, { "epoch": 8.86829117828501, "grad_norm": 0.03880564495921135, "learning_rate": 0.01, "loss": 1.9406, "step": 86253 }, { "epoch": 8.868599629858112, "grad_norm": 0.09879901260137558, "learning_rate": 0.01, "loss": 1.9462, "step": 86256 }, { "epoch": 8.868908081431215, "grad_norm": 0.07412566244602203, "learning_rate": 0.01, "loss": 1.971, "step": 86259 }, { "epoch": 8.869216533004318, "grad_norm": 0.039541300386190414, "learning_rate": 0.01, "loss": 1.9007, "step": 86262 }, { "epoch": 8.86952498457742, "grad_norm": 0.043198730796575546, "learning_rate": 0.01, "loss": 1.9576, "step": 86265 }, { "epoch": 8.869833436150524, "grad_norm": 0.08590435981750488, "learning_rate": 0.01, "loss": 1.9438, "step": 86268 }, { "epoch": 8.870141887723628, "grad_norm": 0.06476505845785141, "learning_rate": 0.01, "loss": 1.918, "step": 86271 }, { "epoch": 8.870450339296731, "grad_norm": 0.10437195748090744, "learning_rate": 0.01, "loss": 1.9824, "step": 86274 }, { "epoch": 8.870758790869834, "grad_norm": 0.1389506608247757, "learning_rate": 0.01, "loss": 1.9419, "step": 86277 }, { "epoch": 8.871067242442937, "grad_norm": 0.13642540574073792, "learning_rate": 0.01, "loss": 1.9792, "step": 86280 }, { "epoch": 8.87137569401604, "grad_norm": 0.11155140399932861, "learning_rate": 0.01, "loss": 1.9354, "step": 86283 }, { "epoch": 8.871684145589143, "grad_norm": 0.06370694935321808, "learning_rate": 0.01, "loss": 1.9461, "step": 86286 }, { "epoch": 8.871992597162246, "grad_norm": 0.04209110513329506, "learning_rate": 0.01, "loss": 1.9598, "step": 86289 }, { "epoch": 8.872301048735348, "grad_norm": 0.047301799058914185, "learning_rate": 0.01, "loss": 1.9378, "step": 86292 }, { "epoch": 8.872609500308451, "grad_norm": 0.10057863593101501, "learning_rate": 0.01, "loss": 1.9359, "step": 86295 }, { "epoch": 8.872917951881554, "grad_norm": 0.08219350874423981, "learning_rate": 0.01, "loss": 1.944, "step": 86298 }, { "epoch": 8.873226403454657, "grad_norm": 0.08292625844478607, "learning_rate": 0.01, "loss": 1.9396, "step": 86301 }, { "epoch": 8.87353485502776, "grad_norm": 0.07303151488304138, "learning_rate": 0.01, "loss": 1.9517, "step": 86304 }, { "epoch": 8.873843306600863, "grad_norm": 0.058230068534612656, "learning_rate": 0.01, "loss": 1.9403, "step": 86307 }, { "epoch": 8.874151758173968, "grad_norm": 0.042837414890527725, "learning_rate": 0.01, "loss": 1.923, "step": 86310 }, { "epoch": 8.87446020974707, "grad_norm": 0.05397916957736015, "learning_rate": 0.01, "loss": 1.9619, "step": 86313 }, { "epoch": 8.874768661320173, "grad_norm": 0.04088616371154785, "learning_rate": 0.01, "loss": 1.9551, "step": 86316 }, { "epoch": 8.875077112893276, "grad_norm": 0.06871747970581055, "learning_rate": 0.01, "loss": 1.9423, "step": 86319 }, { "epoch": 8.875385564466379, "grad_norm": 0.10163968056440353, "learning_rate": 0.01, "loss": 1.9593, "step": 86322 }, { "epoch": 8.875694016039482, "grad_norm": 0.06452172249555588, "learning_rate": 0.01, "loss": 1.945, "step": 86325 }, { "epoch": 8.876002467612585, "grad_norm": 0.07819397747516632, "learning_rate": 0.01, "loss": 1.9408, "step": 86328 }, { "epoch": 8.876310919185688, "grad_norm": 0.0875793993473053, "learning_rate": 0.01, "loss": 1.9412, "step": 86331 }, { "epoch": 8.87661937075879, "grad_norm": 0.07515552639961243, "learning_rate": 0.01, "loss": 1.9469, "step": 86334 }, { "epoch": 8.876927822331893, "grad_norm": 0.03545791283249855, "learning_rate": 0.01, "loss": 1.9516, "step": 86337 }, { "epoch": 8.877236273904996, "grad_norm": 0.10219214111566544, "learning_rate": 0.01, "loss": 1.9497, "step": 86340 }, { "epoch": 8.8775447254781, "grad_norm": 0.07150894403457642, "learning_rate": 0.01, "loss": 1.9427, "step": 86343 }, { "epoch": 8.877853177051202, "grad_norm": 0.07047196477651596, "learning_rate": 0.01, "loss": 1.9423, "step": 86346 }, { "epoch": 8.878161628624307, "grad_norm": 0.058551233261823654, "learning_rate": 0.01, "loss": 1.9348, "step": 86349 }, { "epoch": 8.87847008019741, "grad_norm": 0.09620966762304306, "learning_rate": 0.01, "loss": 1.9616, "step": 86352 }, { "epoch": 8.878778531770513, "grad_norm": 0.043070271611213684, "learning_rate": 0.01, "loss": 1.9441, "step": 86355 }, { "epoch": 8.879086983343615, "grad_norm": 0.049491941928863525, "learning_rate": 0.01, "loss": 1.957, "step": 86358 }, { "epoch": 8.879395434916718, "grad_norm": 0.07890214771032333, "learning_rate": 0.01, "loss": 1.9479, "step": 86361 }, { "epoch": 8.879703886489821, "grad_norm": 0.11723397672176361, "learning_rate": 0.01, "loss": 1.9459, "step": 86364 }, { "epoch": 8.880012338062924, "grad_norm": 0.04008464887738228, "learning_rate": 0.01, "loss": 1.969, "step": 86367 }, { "epoch": 8.880320789636027, "grad_norm": 0.047027554363012314, "learning_rate": 0.01, "loss": 1.9618, "step": 86370 }, { "epoch": 8.88062924120913, "grad_norm": 0.0419597327709198, "learning_rate": 0.01, "loss": 1.9252, "step": 86373 }, { "epoch": 8.880937692782233, "grad_norm": 0.03923768550157547, "learning_rate": 0.01, "loss": 1.9642, "step": 86376 }, { "epoch": 8.881246144355336, "grad_norm": 0.09833482652902603, "learning_rate": 0.01, "loss": 1.9282, "step": 86379 }, { "epoch": 8.881554595928439, "grad_norm": 0.06715640425682068, "learning_rate": 0.01, "loss": 1.986, "step": 86382 }, { "epoch": 8.881863047501541, "grad_norm": 0.07140083611011505, "learning_rate": 0.01, "loss": 1.9482, "step": 86385 }, { "epoch": 8.882171499074646, "grad_norm": 0.13441288471221924, "learning_rate": 0.01, "loss": 1.9489, "step": 86388 }, { "epoch": 8.882479950647749, "grad_norm": 0.08413909375667572, "learning_rate": 0.01, "loss": 1.9316, "step": 86391 }, { "epoch": 8.882788402220852, "grad_norm": 0.08267387747764587, "learning_rate": 0.01, "loss": 1.9244, "step": 86394 }, { "epoch": 8.883096853793955, "grad_norm": 0.056718725711107254, "learning_rate": 0.01, "loss": 1.9311, "step": 86397 }, { "epoch": 8.883405305367058, "grad_norm": 0.042766470462083817, "learning_rate": 0.01, "loss": 1.9503, "step": 86400 }, { "epoch": 8.88371375694016, "grad_norm": 0.08797338604927063, "learning_rate": 0.01, "loss": 1.9618, "step": 86403 }, { "epoch": 8.884022208513263, "grad_norm": 0.043157946318387985, "learning_rate": 0.01, "loss": 1.9631, "step": 86406 }, { "epoch": 8.884330660086366, "grad_norm": 0.044488899409770966, "learning_rate": 0.01, "loss": 1.9313, "step": 86409 }, { "epoch": 8.88463911165947, "grad_norm": 0.06237373128533363, "learning_rate": 0.01, "loss": 1.9362, "step": 86412 }, { "epoch": 8.884947563232572, "grad_norm": 0.03562946245074272, "learning_rate": 0.01, "loss": 1.9384, "step": 86415 }, { "epoch": 8.885256014805675, "grad_norm": 0.1371944397687912, "learning_rate": 0.01, "loss": 1.9379, "step": 86418 }, { "epoch": 8.885564466378778, "grad_norm": 0.0579112283885479, "learning_rate": 0.01, "loss": 1.9547, "step": 86421 }, { "epoch": 8.88587291795188, "grad_norm": 0.07281753420829773, "learning_rate": 0.01, "loss": 1.9625, "step": 86424 }, { "epoch": 8.886181369524985, "grad_norm": 0.04341709613800049, "learning_rate": 0.01, "loss": 1.9627, "step": 86427 }, { "epoch": 8.886489821098088, "grad_norm": 0.0515635646879673, "learning_rate": 0.01, "loss": 1.9505, "step": 86430 }, { "epoch": 8.886798272671191, "grad_norm": 0.02943897247314453, "learning_rate": 0.01, "loss": 1.9662, "step": 86433 }, { "epoch": 8.887106724244294, "grad_norm": 0.0403241291642189, "learning_rate": 0.01, "loss": 1.9365, "step": 86436 }, { "epoch": 8.887415175817397, "grad_norm": 0.049521833658218384, "learning_rate": 0.01, "loss": 1.9374, "step": 86439 }, { "epoch": 8.8877236273905, "grad_norm": 0.03380339592695236, "learning_rate": 0.01, "loss": 1.9378, "step": 86442 }, { "epoch": 8.888032078963603, "grad_norm": 0.10890775173902512, "learning_rate": 0.01, "loss": 1.9708, "step": 86445 }, { "epoch": 8.888340530536706, "grad_norm": 0.08257224410772324, "learning_rate": 0.01, "loss": 1.9312, "step": 86448 }, { "epoch": 8.888648982109808, "grad_norm": 0.07431552559137344, "learning_rate": 0.01, "loss": 1.9672, "step": 86451 }, { "epoch": 8.888957433682911, "grad_norm": 0.05777169391512871, "learning_rate": 0.01, "loss": 1.9518, "step": 86454 }, { "epoch": 8.889265885256014, "grad_norm": 0.03272586315870285, "learning_rate": 0.01, "loss": 1.9476, "step": 86457 }, { "epoch": 8.889574336829117, "grad_norm": 0.036487750709056854, "learning_rate": 0.01, "loss": 1.954, "step": 86460 }, { "epoch": 8.889882788402222, "grad_norm": 0.10916222631931305, "learning_rate": 0.01, "loss": 1.9483, "step": 86463 }, { "epoch": 8.890191239975325, "grad_norm": 0.039324864745140076, "learning_rate": 0.01, "loss": 1.9604, "step": 86466 }, { "epoch": 8.890499691548428, "grad_norm": 0.08545426279306412, "learning_rate": 0.01, "loss": 1.9502, "step": 86469 }, { "epoch": 8.89080814312153, "grad_norm": 0.08365831524133682, "learning_rate": 0.01, "loss": 1.967, "step": 86472 }, { "epoch": 8.891116594694633, "grad_norm": 0.051737960427999496, "learning_rate": 0.01, "loss": 1.93, "step": 86475 }, { "epoch": 8.891425046267736, "grad_norm": 0.047015320509672165, "learning_rate": 0.01, "loss": 1.9379, "step": 86478 }, { "epoch": 8.891733497840839, "grad_norm": 0.05336649343371391, "learning_rate": 0.01, "loss": 1.9245, "step": 86481 }, { "epoch": 8.892041949413942, "grad_norm": 0.045248694717884064, "learning_rate": 0.01, "loss": 1.9817, "step": 86484 }, { "epoch": 8.892350400987045, "grad_norm": 0.10528840124607086, "learning_rate": 0.01, "loss": 1.9187, "step": 86487 }, { "epoch": 8.892658852560148, "grad_norm": 0.03929603472352028, "learning_rate": 0.01, "loss": 1.9426, "step": 86490 }, { "epoch": 8.89296730413325, "grad_norm": 0.10144193470478058, "learning_rate": 0.01, "loss": 1.9517, "step": 86493 }, { "epoch": 8.893275755706354, "grad_norm": 0.07188859581947327, "learning_rate": 0.01, "loss": 1.9283, "step": 86496 }, { "epoch": 8.893584207279456, "grad_norm": 0.1337640881538391, "learning_rate": 0.01, "loss": 1.9406, "step": 86499 }, { "epoch": 8.893892658852561, "grad_norm": 0.03903505951166153, "learning_rate": 0.01, "loss": 1.9386, "step": 86502 }, { "epoch": 8.894201110425664, "grad_norm": 0.14735199511051178, "learning_rate": 0.01, "loss": 1.9408, "step": 86505 }, { "epoch": 8.894509561998767, "grad_norm": 0.05615058168768883, "learning_rate": 0.01, "loss": 1.9345, "step": 86508 }, { "epoch": 8.89481801357187, "grad_norm": 0.13095706701278687, "learning_rate": 0.01, "loss": 1.9445, "step": 86511 }, { "epoch": 8.895126465144973, "grad_norm": 0.064826600253582, "learning_rate": 0.01, "loss": 1.9413, "step": 86514 }, { "epoch": 8.895434916718076, "grad_norm": 0.03320586308836937, "learning_rate": 0.01, "loss": 1.9519, "step": 86517 }, { "epoch": 8.895743368291178, "grad_norm": 0.035892751067876816, "learning_rate": 0.01, "loss": 1.9733, "step": 86520 }, { "epoch": 8.896051819864281, "grad_norm": 0.06792799383401871, "learning_rate": 0.01, "loss": 1.9452, "step": 86523 }, { "epoch": 8.896360271437384, "grad_norm": 0.03676078841090202, "learning_rate": 0.01, "loss": 1.9086, "step": 86526 }, { "epoch": 8.896668723010487, "grad_norm": 0.04583815485239029, "learning_rate": 0.01, "loss": 1.9586, "step": 86529 }, { "epoch": 8.89697717458359, "grad_norm": 0.0372345894575119, "learning_rate": 0.01, "loss": 1.9282, "step": 86532 }, { "epoch": 8.897285626156693, "grad_norm": 0.10942641645669937, "learning_rate": 0.01, "loss": 1.9388, "step": 86535 }, { "epoch": 8.897594077729796, "grad_norm": 0.07769682258367538, "learning_rate": 0.01, "loss": 1.9359, "step": 86538 }, { "epoch": 8.8979025293029, "grad_norm": 0.04752153158187866, "learning_rate": 0.01, "loss": 1.9477, "step": 86541 }, { "epoch": 8.898210980876003, "grad_norm": 0.05308716744184494, "learning_rate": 0.01, "loss": 1.9273, "step": 86544 }, { "epoch": 8.898519432449106, "grad_norm": 0.06798425316810608, "learning_rate": 0.01, "loss": 1.9337, "step": 86547 }, { "epoch": 8.898827884022209, "grad_norm": 0.04612553119659424, "learning_rate": 0.01, "loss": 1.9346, "step": 86550 }, { "epoch": 8.899136335595312, "grad_norm": 0.04493824392557144, "learning_rate": 0.01, "loss": 1.9263, "step": 86553 }, { "epoch": 8.899444787168415, "grad_norm": 0.04563165828585625, "learning_rate": 0.01, "loss": 1.9376, "step": 86556 }, { "epoch": 8.899753238741518, "grad_norm": 0.08082949370145798, "learning_rate": 0.01, "loss": 1.9583, "step": 86559 }, { "epoch": 8.90006169031462, "grad_norm": 0.05760066583752632, "learning_rate": 0.01, "loss": 1.9701, "step": 86562 }, { "epoch": 8.900370141887723, "grad_norm": 0.04055507853627205, "learning_rate": 0.01, "loss": 1.9569, "step": 86565 }, { "epoch": 8.900678593460826, "grad_norm": 0.12776689231395721, "learning_rate": 0.01, "loss": 1.9613, "step": 86568 }, { "epoch": 8.90098704503393, "grad_norm": 0.033561255782842636, "learning_rate": 0.01, "loss": 1.9754, "step": 86571 }, { "epoch": 8.901295496607032, "grad_norm": 0.04030522331595421, "learning_rate": 0.01, "loss": 1.9454, "step": 86574 }, { "epoch": 8.901603948180135, "grad_norm": 0.041517045348882675, "learning_rate": 0.01, "loss": 1.9542, "step": 86577 }, { "epoch": 8.90191239975324, "grad_norm": 0.13993878662586212, "learning_rate": 0.01, "loss": 1.9333, "step": 86580 }, { "epoch": 8.902220851326343, "grad_norm": 0.04944363608956337, "learning_rate": 0.01, "loss": 1.9523, "step": 86583 }, { "epoch": 8.902529302899445, "grad_norm": 0.0440177246928215, "learning_rate": 0.01, "loss": 1.9544, "step": 86586 }, { "epoch": 8.902837754472548, "grad_norm": 0.14848758280277252, "learning_rate": 0.01, "loss": 1.936, "step": 86589 }, { "epoch": 8.903146206045651, "grad_norm": 0.04407470300793648, "learning_rate": 0.01, "loss": 1.9588, "step": 86592 }, { "epoch": 8.903454657618754, "grad_norm": 0.04730687290430069, "learning_rate": 0.01, "loss": 1.94, "step": 86595 }, { "epoch": 8.903763109191857, "grad_norm": 0.04748192057013512, "learning_rate": 0.01, "loss": 1.9336, "step": 86598 }, { "epoch": 8.90407156076496, "grad_norm": 0.04898633435368538, "learning_rate": 0.01, "loss": 1.9582, "step": 86601 }, { "epoch": 8.904380012338063, "grad_norm": 0.040645040571689606, "learning_rate": 0.01, "loss": 1.9118, "step": 86604 }, { "epoch": 8.904688463911166, "grad_norm": 0.08757542073726654, "learning_rate": 0.01, "loss": 1.9694, "step": 86607 }, { "epoch": 8.904996915484269, "grad_norm": 0.04103793948888779, "learning_rate": 0.01, "loss": 1.9742, "step": 86610 }, { "epoch": 8.905305367057371, "grad_norm": 0.09652022272348404, "learning_rate": 0.01, "loss": 1.9379, "step": 86613 }, { "epoch": 8.905613818630474, "grad_norm": 0.09843424707651138, "learning_rate": 0.01, "loss": 1.964, "step": 86616 }, { "epoch": 8.905922270203579, "grad_norm": 0.05483357980847359, "learning_rate": 0.01, "loss": 1.9703, "step": 86619 }, { "epoch": 8.906230721776682, "grad_norm": 0.04492351412773132, "learning_rate": 0.01, "loss": 1.9463, "step": 86622 }, { "epoch": 8.906539173349785, "grad_norm": 0.037706341594457626, "learning_rate": 0.01, "loss": 1.9847, "step": 86625 }, { "epoch": 8.906847624922888, "grad_norm": 0.13539664447307587, "learning_rate": 0.01, "loss": 1.9237, "step": 86628 }, { "epoch": 8.90715607649599, "grad_norm": 0.038968782871961594, "learning_rate": 0.01, "loss": 1.9531, "step": 86631 }, { "epoch": 8.907464528069093, "grad_norm": 0.04004844278097153, "learning_rate": 0.01, "loss": 1.9235, "step": 86634 }, { "epoch": 8.907772979642196, "grad_norm": 0.05966116860508919, "learning_rate": 0.01, "loss": 1.941, "step": 86637 }, { "epoch": 8.9080814312153, "grad_norm": 0.032242551445961, "learning_rate": 0.01, "loss": 1.9532, "step": 86640 }, { "epoch": 8.908389882788402, "grad_norm": 0.04070298373699188, "learning_rate": 0.01, "loss": 1.9888, "step": 86643 }, { "epoch": 8.908698334361505, "grad_norm": 0.09953150898218155, "learning_rate": 0.01, "loss": 1.9235, "step": 86646 }, { "epoch": 8.909006785934608, "grad_norm": 0.09204036742448807, "learning_rate": 0.01, "loss": 1.963, "step": 86649 }, { "epoch": 8.90931523750771, "grad_norm": 0.1648140400648117, "learning_rate": 0.01, "loss": 1.93, "step": 86652 }, { "epoch": 8.909623689080814, "grad_norm": 0.09835021197795868, "learning_rate": 0.01, "loss": 1.9375, "step": 86655 }, { "epoch": 8.909932140653918, "grad_norm": 0.06145261600613594, "learning_rate": 0.01, "loss": 1.9373, "step": 86658 }, { "epoch": 8.910240592227021, "grad_norm": 0.060555968433618546, "learning_rate": 0.01, "loss": 1.9532, "step": 86661 }, { "epoch": 8.910549043800124, "grad_norm": 0.03184669092297554, "learning_rate": 0.01, "loss": 1.9139, "step": 86664 }, { "epoch": 8.910857495373227, "grad_norm": 0.10093247890472412, "learning_rate": 0.01, "loss": 1.9572, "step": 86667 }, { "epoch": 8.91116594694633, "grad_norm": 0.041602443903684616, "learning_rate": 0.01, "loss": 1.9381, "step": 86670 }, { "epoch": 8.911474398519433, "grad_norm": 0.05512705445289612, "learning_rate": 0.01, "loss": 1.9151, "step": 86673 }, { "epoch": 8.911782850092536, "grad_norm": 0.052319008857011795, "learning_rate": 0.01, "loss": 1.9387, "step": 86676 }, { "epoch": 8.912091301665638, "grad_norm": 0.09350787103176117, "learning_rate": 0.01, "loss": 1.9363, "step": 86679 }, { "epoch": 8.912399753238741, "grad_norm": 0.05260681360960007, "learning_rate": 0.01, "loss": 1.9783, "step": 86682 }, { "epoch": 8.912708204811844, "grad_norm": 0.03641582280397415, "learning_rate": 0.01, "loss": 1.9357, "step": 86685 }, { "epoch": 8.913016656384947, "grad_norm": 0.12317151576280594, "learning_rate": 0.01, "loss": 1.9584, "step": 86688 }, { "epoch": 8.91332510795805, "grad_norm": 0.05290086567401886, "learning_rate": 0.01, "loss": 1.9431, "step": 86691 }, { "epoch": 8.913633559531153, "grad_norm": 0.1165960282087326, "learning_rate": 0.01, "loss": 1.9466, "step": 86694 }, { "epoch": 8.913942011104258, "grad_norm": 0.03640148788690567, "learning_rate": 0.01, "loss": 1.9505, "step": 86697 }, { "epoch": 8.91425046267736, "grad_norm": 0.08245276659727097, "learning_rate": 0.01, "loss": 1.9419, "step": 86700 }, { "epoch": 8.914558914250463, "grad_norm": 0.035647567361593246, "learning_rate": 0.01, "loss": 1.9366, "step": 86703 }, { "epoch": 8.914867365823566, "grad_norm": 0.055706936866045, "learning_rate": 0.01, "loss": 1.9202, "step": 86706 }, { "epoch": 8.915175817396669, "grad_norm": 0.1239318773150444, "learning_rate": 0.01, "loss": 1.9644, "step": 86709 }, { "epoch": 8.915484268969772, "grad_norm": 0.08160992711782455, "learning_rate": 0.01, "loss": 1.9557, "step": 86712 }, { "epoch": 8.915792720542875, "grad_norm": 0.085784412920475, "learning_rate": 0.01, "loss": 1.9404, "step": 86715 }, { "epoch": 8.916101172115978, "grad_norm": 0.07544608414173126, "learning_rate": 0.01, "loss": 1.9442, "step": 86718 }, { "epoch": 8.91640962368908, "grad_norm": 0.05595572292804718, "learning_rate": 0.01, "loss": 1.948, "step": 86721 }, { "epoch": 8.916718075262184, "grad_norm": 0.03742340952157974, "learning_rate": 0.01, "loss": 1.9357, "step": 86724 }, { "epoch": 8.917026526835286, "grad_norm": 0.027727732434868813, "learning_rate": 0.01, "loss": 1.9082, "step": 86727 }, { "epoch": 8.91733497840839, "grad_norm": 0.0767802819609642, "learning_rate": 0.01, "loss": 1.9426, "step": 86730 }, { "epoch": 8.917643429981492, "grad_norm": 0.07831424474716187, "learning_rate": 0.01, "loss": 1.9291, "step": 86733 }, { "epoch": 8.917951881554597, "grad_norm": 0.07590378075838089, "learning_rate": 0.01, "loss": 1.9469, "step": 86736 }, { "epoch": 8.9182603331277, "grad_norm": 0.05041801184415817, "learning_rate": 0.01, "loss": 1.9696, "step": 86739 }, { "epoch": 8.918568784700803, "grad_norm": 0.039486199617385864, "learning_rate": 0.01, "loss": 1.9452, "step": 86742 }, { "epoch": 8.918877236273905, "grad_norm": 0.048161353915929794, "learning_rate": 0.01, "loss": 1.9481, "step": 86745 }, { "epoch": 8.919185687847008, "grad_norm": 0.04758733883500099, "learning_rate": 0.01, "loss": 1.9276, "step": 86748 }, { "epoch": 8.919494139420111, "grad_norm": 0.03529585897922516, "learning_rate": 0.01, "loss": 1.9288, "step": 86751 }, { "epoch": 8.919802590993214, "grad_norm": 0.033544719219207764, "learning_rate": 0.01, "loss": 1.9429, "step": 86754 }, { "epoch": 8.920111042566317, "grad_norm": 0.030024485662579536, "learning_rate": 0.01, "loss": 1.9557, "step": 86757 }, { "epoch": 8.92041949413942, "grad_norm": 0.04932880774140358, "learning_rate": 0.01, "loss": 1.9556, "step": 86760 }, { "epoch": 8.920727945712523, "grad_norm": 0.08779890090227127, "learning_rate": 0.01, "loss": 1.9515, "step": 86763 }, { "epoch": 8.921036397285626, "grad_norm": 0.04417244344949722, "learning_rate": 0.01, "loss": 1.9485, "step": 86766 }, { "epoch": 8.921344848858729, "grad_norm": 0.10385674238204956, "learning_rate": 0.01, "loss": 1.96, "step": 86769 }, { "epoch": 8.921653300431831, "grad_norm": 0.05979326739907265, "learning_rate": 0.01, "loss": 1.9653, "step": 86772 }, { "epoch": 8.921961752004936, "grad_norm": 0.0792737677693367, "learning_rate": 0.01, "loss": 1.9385, "step": 86775 }, { "epoch": 8.922270203578039, "grad_norm": 0.07325568795204163, "learning_rate": 0.01, "loss": 1.9324, "step": 86778 }, { "epoch": 8.922578655151142, "grad_norm": 0.03263154253363609, "learning_rate": 0.01, "loss": 1.9146, "step": 86781 }, { "epoch": 8.922887106724245, "grad_norm": 0.04729807749390602, "learning_rate": 0.01, "loss": 1.918, "step": 86784 }, { "epoch": 8.923195558297348, "grad_norm": 0.03822244331240654, "learning_rate": 0.01, "loss": 1.9453, "step": 86787 }, { "epoch": 8.92350400987045, "grad_norm": 0.06542357057332993, "learning_rate": 0.01, "loss": 1.9436, "step": 86790 }, { "epoch": 8.923812461443553, "grad_norm": 0.057463910430669785, "learning_rate": 0.01, "loss": 1.9433, "step": 86793 }, { "epoch": 8.924120913016656, "grad_norm": 0.09920434653759003, "learning_rate": 0.01, "loss": 1.9576, "step": 86796 }, { "epoch": 8.92442936458976, "grad_norm": 0.11771022528409958, "learning_rate": 0.01, "loss": 1.9204, "step": 86799 }, { "epoch": 8.924737816162862, "grad_norm": 0.06464545428752899, "learning_rate": 0.01, "loss": 1.9416, "step": 86802 }, { "epoch": 8.925046267735965, "grad_norm": 0.051915016025304794, "learning_rate": 0.01, "loss": 1.9655, "step": 86805 }, { "epoch": 8.925354719309068, "grad_norm": 0.05679486319422722, "learning_rate": 0.01, "loss": 1.9605, "step": 86808 }, { "epoch": 8.92566317088217, "grad_norm": 0.0370950922369957, "learning_rate": 0.01, "loss": 1.945, "step": 86811 }, { "epoch": 8.925971622455275, "grad_norm": 0.03662877529859543, "learning_rate": 0.01, "loss": 1.9543, "step": 86814 }, { "epoch": 8.926280074028378, "grad_norm": 0.11973265558481216, "learning_rate": 0.01, "loss": 1.945, "step": 86817 }, { "epoch": 8.926588525601481, "grad_norm": 0.051350776106119156, "learning_rate": 0.01, "loss": 1.9453, "step": 86820 }, { "epoch": 8.926896977174584, "grad_norm": 0.08452027291059494, "learning_rate": 0.01, "loss": 1.9399, "step": 86823 }, { "epoch": 8.927205428747687, "grad_norm": 0.06621302664279938, "learning_rate": 0.01, "loss": 1.9594, "step": 86826 }, { "epoch": 8.92751388032079, "grad_norm": 0.09285139292478561, "learning_rate": 0.01, "loss": 1.9322, "step": 86829 }, { "epoch": 8.927822331893893, "grad_norm": 0.05777567997574806, "learning_rate": 0.01, "loss": 1.9646, "step": 86832 }, { "epoch": 8.928130783466996, "grad_norm": 0.1285361349582672, "learning_rate": 0.01, "loss": 1.922, "step": 86835 }, { "epoch": 8.928439235040099, "grad_norm": 0.03722556307911873, "learning_rate": 0.01, "loss": 1.9741, "step": 86838 }, { "epoch": 8.928747686613201, "grad_norm": 0.03609219565987587, "learning_rate": 0.01, "loss": 1.9835, "step": 86841 }, { "epoch": 8.929056138186304, "grad_norm": 0.06217125803232193, "learning_rate": 0.01, "loss": 1.954, "step": 86844 }, { "epoch": 8.929364589759407, "grad_norm": 0.10021287202835083, "learning_rate": 0.01, "loss": 1.9727, "step": 86847 }, { "epoch": 8.92967304133251, "grad_norm": 0.15343636274337769, "learning_rate": 0.01, "loss": 1.9569, "step": 86850 }, { "epoch": 8.929981492905615, "grad_norm": 0.03606894984841347, "learning_rate": 0.01, "loss": 1.945, "step": 86853 }, { "epoch": 8.930289944478718, "grad_norm": 0.034981515258550644, "learning_rate": 0.01, "loss": 1.9718, "step": 86856 }, { "epoch": 8.93059839605182, "grad_norm": 0.043932538479566574, "learning_rate": 0.01, "loss": 1.9464, "step": 86859 }, { "epoch": 8.930906847624923, "grad_norm": 0.05214953050017357, "learning_rate": 0.01, "loss": 1.9349, "step": 86862 }, { "epoch": 8.931215299198026, "grad_norm": 0.06819628179073334, "learning_rate": 0.01, "loss": 1.9745, "step": 86865 }, { "epoch": 8.93152375077113, "grad_norm": 0.04332321509718895, "learning_rate": 0.01, "loss": 1.9078, "step": 86868 }, { "epoch": 8.931832202344232, "grad_norm": 0.04354182258248329, "learning_rate": 0.01, "loss": 1.9497, "step": 86871 }, { "epoch": 8.932140653917335, "grad_norm": 0.03317633643746376, "learning_rate": 0.01, "loss": 1.9536, "step": 86874 }, { "epoch": 8.932449105490438, "grad_norm": 0.0436597503721714, "learning_rate": 0.01, "loss": 1.9657, "step": 86877 }, { "epoch": 8.93275755706354, "grad_norm": 0.17182035744190216, "learning_rate": 0.01, "loss": 1.9426, "step": 86880 }, { "epoch": 8.933066008636644, "grad_norm": 0.04708772152662277, "learning_rate": 0.01, "loss": 1.9304, "step": 86883 }, { "epoch": 8.933374460209746, "grad_norm": 0.10389839112758636, "learning_rate": 0.01, "loss": 1.9494, "step": 86886 }, { "epoch": 8.93368291178285, "grad_norm": 0.07319999486207962, "learning_rate": 0.01, "loss": 1.9407, "step": 86889 }, { "epoch": 8.933991363355954, "grad_norm": 0.05096595361828804, "learning_rate": 0.01, "loss": 1.9246, "step": 86892 }, { "epoch": 8.934299814929057, "grad_norm": 0.03280860185623169, "learning_rate": 0.01, "loss": 1.9489, "step": 86895 }, { "epoch": 8.93460826650216, "grad_norm": 0.04681391641497612, "learning_rate": 0.01, "loss": 1.9448, "step": 86898 }, { "epoch": 8.934916718075263, "grad_norm": 0.07191100716590881, "learning_rate": 0.01, "loss": 1.9529, "step": 86901 }, { "epoch": 8.935225169648366, "grad_norm": 0.050077635794878006, "learning_rate": 0.01, "loss": 1.9689, "step": 86904 }, { "epoch": 8.935533621221468, "grad_norm": 0.03628779575228691, "learning_rate": 0.01, "loss": 1.9735, "step": 86907 }, { "epoch": 8.935842072794571, "grad_norm": 0.08990596234798431, "learning_rate": 0.01, "loss": 1.9344, "step": 86910 }, { "epoch": 8.936150524367674, "grad_norm": 0.061381399631500244, "learning_rate": 0.01, "loss": 1.9424, "step": 86913 }, { "epoch": 8.936458975940777, "grad_norm": 0.08864876627922058, "learning_rate": 0.01, "loss": 1.9556, "step": 86916 }, { "epoch": 8.93676742751388, "grad_norm": 0.06189894676208496, "learning_rate": 0.01, "loss": 1.9571, "step": 86919 }, { "epoch": 8.937075879086983, "grad_norm": 0.048378266394138336, "learning_rate": 0.01, "loss": 1.9318, "step": 86922 }, { "epoch": 8.937384330660086, "grad_norm": 0.04020404815673828, "learning_rate": 0.01, "loss": 1.9477, "step": 86925 }, { "epoch": 8.937692782233189, "grad_norm": 0.11243297904729843, "learning_rate": 0.01, "loss": 1.9077, "step": 86928 }, { "epoch": 8.938001233806293, "grad_norm": 0.08314108848571777, "learning_rate": 0.01, "loss": 1.9487, "step": 86931 }, { "epoch": 8.938309685379396, "grad_norm": 0.05910830944776535, "learning_rate": 0.01, "loss": 1.9065, "step": 86934 }, { "epoch": 8.938618136952499, "grad_norm": 0.03916241601109505, "learning_rate": 0.01, "loss": 1.9327, "step": 86937 }, { "epoch": 8.938926588525602, "grad_norm": 0.03755800798535347, "learning_rate": 0.01, "loss": 1.9476, "step": 86940 }, { "epoch": 8.939235040098705, "grad_norm": 0.0976007804274559, "learning_rate": 0.01, "loss": 1.9508, "step": 86943 }, { "epoch": 8.939543491671808, "grad_norm": 0.1216464415192604, "learning_rate": 0.01, "loss": 1.9483, "step": 86946 }, { "epoch": 8.93985194324491, "grad_norm": 0.07827116549015045, "learning_rate": 0.01, "loss": 1.9642, "step": 86949 }, { "epoch": 8.940160394818014, "grad_norm": 0.05447017401456833, "learning_rate": 0.01, "loss": 1.9581, "step": 86952 }, { "epoch": 8.940468846391116, "grad_norm": 0.04793046787381172, "learning_rate": 0.01, "loss": 1.9459, "step": 86955 }, { "epoch": 8.94077729796422, "grad_norm": 0.03350021317601204, "learning_rate": 0.01, "loss": 1.9346, "step": 86958 }, { "epoch": 8.941085749537322, "grad_norm": 0.038865212351083755, "learning_rate": 0.01, "loss": 1.9188, "step": 86961 }, { "epoch": 8.941394201110425, "grad_norm": 0.10471334308385849, "learning_rate": 0.01, "loss": 1.9585, "step": 86964 }, { "epoch": 8.941702652683528, "grad_norm": 0.06126569211483002, "learning_rate": 0.01, "loss": 1.9696, "step": 86967 }, { "epoch": 8.942011104256633, "grad_norm": 0.08202920854091644, "learning_rate": 0.01, "loss": 1.9443, "step": 86970 }, { "epoch": 8.942319555829735, "grad_norm": 0.10397789627313614, "learning_rate": 0.01, "loss": 1.9522, "step": 86973 }, { "epoch": 8.942628007402838, "grad_norm": 0.07265036553144455, "learning_rate": 0.01, "loss": 1.9262, "step": 86976 }, { "epoch": 8.942936458975941, "grad_norm": 0.035652872174978256, "learning_rate": 0.01, "loss": 1.9288, "step": 86979 }, { "epoch": 8.943244910549044, "grad_norm": 0.10862493515014648, "learning_rate": 0.01, "loss": 1.9319, "step": 86982 }, { "epoch": 8.943553362122147, "grad_norm": 0.06365188211202621, "learning_rate": 0.01, "loss": 1.9535, "step": 86985 }, { "epoch": 8.94386181369525, "grad_norm": 0.06253690272569656, "learning_rate": 0.01, "loss": 1.9316, "step": 86988 }, { "epoch": 8.944170265268353, "grad_norm": 0.035485751926898956, "learning_rate": 0.01, "loss": 1.9607, "step": 86991 }, { "epoch": 8.944478716841456, "grad_norm": 0.03434903174638748, "learning_rate": 0.01, "loss": 1.9212, "step": 86994 }, { "epoch": 8.944787168414559, "grad_norm": 0.03491094708442688, "learning_rate": 0.01, "loss": 1.9737, "step": 86997 }, { "epoch": 8.945095619987661, "grad_norm": 0.13820096850395203, "learning_rate": 0.01, "loss": 1.9504, "step": 87000 }, { "epoch": 8.945404071560764, "grad_norm": 0.11694854497909546, "learning_rate": 0.01, "loss": 1.9565, "step": 87003 }, { "epoch": 8.945712523133867, "grad_norm": 0.06137260049581528, "learning_rate": 0.01, "loss": 1.9523, "step": 87006 }, { "epoch": 8.946020974706972, "grad_norm": 0.041880011558532715, "learning_rate": 0.01, "loss": 1.9211, "step": 87009 }, { "epoch": 8.946329426280075, "grad_norm": 0.047957465052604675, "learning_rate": 0.01, "loss": 1.9447, "step": 87012 }, { "epoch": 8.946637877853178, "grad_norm": 0.048485107719898224, "learning_rate": 0.01, "loss": 1.9366, "step": 87015 }, { "epoch": 8.94694632942628, "grad_norm": 0.0791233703494072, "learning_rate": 0.01, "loss": 1.9441, "step": 87018 }, { "epoch": 8.947254780999383, "grad_norm": 0.07178815454244614, "learning_rate": 0.01, "loss": 1.9549, "step": 87021 }, { "epoch": 8.947563232572486, "grad_norm": 0.040599554777145386, "learning_rate": 0.01, "loss": 1.9351, "step": 87024 }, { "epoch": 8.94787168414559, "grad_norm": 0.05260973051190376, "learning_rate": 0.01, "loss": 1.9317, "step": 87027 }, { "epoch": 8.948180135718692, "grad_norm": 0.057267699390649796, "learning_rate": 0.01, "loss": 1.9383, "step": 87030 }, { "epoch": 8.948488587291795, "grad_norm": 0.036324381828308105, "learning_rate": 0.01, "loss": 1.9361, "step": 87033 }, { "epoch": 8.948797038864898, "grad_norm": 0.044209253042936325, "learning_rate": 0.01, "loss": 1.9221, "step": 87036 }, { "epoch": 8.949105490438, "grad_norm": 0.10309609025716782, "learning_rate": 0.01, "loss": 1.9563, "step": 87039 }, { "epoch": 8.949413942011104, "grad_norm": 0.10782811045646667, "learning_rate": 0.01, "loss": 1.9636, "step": 87042 }, { "epoch": 8.949722393584207, "grad_norm": 0.047176990658044815, "learning_rate": 0.01, "loss": 1.9621, "step": 87045 }, { "epoch": 8.950030845157311, "grad_norm": 0.05118265375494957, "learning_rate": 0.01, "loss": 1.9432, "step": 87048 }, { "epoch": 8.950339296730414, "grad_norm": 0.04261369630694389, "learning_rate": 0.01, "loss": 1.9633, "step": 87051 }, { "epoch": 8.950647748303517, "grad_norm": 0.05787837132811546, "learning_rate": 0.01, "loss": 1.9496, "step": 87054 }, { "epoch": 8.95095619987662, "grad_norm": 0.038357578217983246, "learning_rate": 0.01, "loss": 1.9639, "step": 87057 }, { "epoch": 8.951264651449723, "grad_norm": 0.05408007279038429, "learning_rate": 0.01, "loss": 1.9552, "step": 87060 }, { "epoch": 8.951573103022826, "grad_norm": 0.08756155520677567, "learning_rate": 0.01, "loss": 1.9296, "step": 87063 }, { "epoch": 8.951881554595928, "grad_norm": 0.11525848507881165, "learning_rate": 0.01, "loss": 1.9553, "step": 87066 }, { "epoch": 8.952190006169031, "grad_norm": 0.04911641404032707, "learning_rate": 0.01, "loss": 1.9417, "step": 87069 }, { "epoch": 8.952498457742134, "grad_norm": 0.11142449826002121, "learning_rate": 0.01, "loss": 1.9655, "step": 87072 }, { "epoch": 8.952806909315237, "grad_norm": 0.057849034667015076, "learning_rate": 0.01, "loss": 1.9481, "step": 87075 }, { "epoch": 8.95311536088834, "grad_norm": 0.054354969412088394, "learning_rate": 0.01, "loss": 1.9455, "step": 87078 }, { "epoch": 8.953423812461443, "grad_norm": 0.11387278884649277, "learning_rate": 0.01, "loss": 1.9238, "step": 87081 }, { "epoch": 8.953732264034546, "grad_norm": 0.043686944991350174, "learning_rate": 0.01, "loss": 1.9356, "step": 87084 }, { "epoch": 8.95404071560765, "grad_norm": 0.11665847152471542, "learning_rate": 0.01, "loss": 1.9586, "step": 87087 }, { "epoch": 8.954349167180753, "grad_norm": 0.04742182791233063, "learning_rate": 0.01, "loss": 1.9468, "step": 87090 }, { "epoch": 8.954657618753856, "grad_norm": 0.06386453658342361, "learning_rate": 0.01, "loss": 1.9493, "step": 87093 }, { "epoch": 8.95496607032696, "grad_norm": 0.047558024525642395, "learning_rate": 0.01, "loss": 1.9103, "step": 87096 }, { "epoch": 8.955274521900062, "grad_norm": 0.04645347595214844, "learning_rate": 0.01, "loss": 1.9471, "step": 87099 }, { "epoch": 8.955582973473165, "grad_norm": 0.0866866484284401, "learning_rate": 0.01, "loss": 1.9706, "step": 87102 }, { "epoch": 8.955891425046268, "grad_norm": 0.07707598805427551, "learning_rate": 0.01, "loss": 1.9445, "step": 87105 }, { "epoch": 8.95619987661937, "grad_norm": 0.03881002217531204, "learning_rate": 0.01, "loss": 1.9691, "step": 87108 }, { "epoch": 8.956508328192474, "grad_norm": 0.04681535065174103, "learning_rate": 0.01, "loss": 1.9286, "step": 87111 }, { "epoch": 8.956816779765576, "grad_norm": 0.09788013994693756, "learning_rate": 0.01, "loss": 1.9697, "step": 87114 }, { "epoch": 8.95712523133868, "grad_norm": 0.06216461583971977, "learning_rate": 0.01, "loss": 1.9726, "step": 87117 }, { "epoch": 8.957433682911782, "grad_norm": 0.06608910113573074, "learning_rate": 0.01, "loss": 1.9502, "step": 87120 }, { "epoch": 8.957742134484885, "grad_norm": 0.08615975826978683, "learning_rate": 0.01, "loss": 1.9851, "step": 87123 }, { "epoch": 8.95805058605799, "grad_norm": 0.08540789783000946, "learning_rate": 0.01, "loss": 1.9117, "step": 87126 }, { "epoch": 8.958359037631093, "grad_norm": 0.10461477190256119, "learning_rate": 0.01, "loss": 1.9527, "step": 87129 }, { "epoch": 8.958667489204196, "grad_norm": 0.06833938509225845, "learning_rate": 0.01, "loss": 1.9763, "step": 87132 }, { "epoch": 8.958975940777298, "grad_norm": 0.048409439623355865, "learning_rate": 0.01, "loss": 1.9405, "step": 87135 }, { "epoch": 8.959284392350401, "grad_norm": 0.030358022078871727, "learning_rate": 0.01, "loss": 1.9496, "step": 87138 }, { "epoch": 8.959592843923504, "grad_norm": 0.044521231204271317, "learning_rate": 0.01, "loss": 1.9555, "step": 87141 }, { "epoch": 8.959901295496607, "grad_norm": 0.045875221490859985, "learning_rate": 0.01, "loss": 1.9287, "step": 87144 }, { "epoch": 8.96020974706971, "grad_norm": 0.05634180083870888, "learning_rate": 0.01, "loss": 1.9419, "step": 87147 }, { "epoch": 8.960518198642813, "grad_norm": 0.04120972007513046, "learning_rate": 0.01, "loss": 1.9455, "step": 87150 }, { "epoch": 8.960826650215916, "grad_norm": 0.1378134787082672, "learning_rate": 0.01, "loss": 1.9367, "step": 87153 }, { "epoch": 8.961135101789019, "grad_norm": 0.09309209138154984, "learning_rate": 0.01, "loss": 1.9471, "step": 87156 }, { "epoch": 8.961443553362122, "grad_norm": 0.07671626657247543, "learning_rate": 0.01, "loss": 1.9586, "step": 87159 }, { "epoch": 8.961752004935224, "grad_norm": 0.03704199194908142, "learning_rate": 0.01, "loss": 1.9643, "step": 87162 }, { "epoch": 8.962060456508329, "grad_norm": 0.05708852410316467, "learning_rate": 0.01, "loss": 1.9334, "step": 87165 }, { "epoch": 8.962368908081432, "grad_norm": 0.04932823032140732, "learning_rate": 0.01, "loss": 1.9415, "step": 87168 }, { "epoch": 8.962677359654535, "grad_norm": 0.06841997802257538, "learning_rate": 0.01, "loss": 1.9596, "step": 87171 }, { "epoch": 8.962985811227638, "grad_norm": 0.09124533832073212, "learning_rate": 0.01, "loss": 1.95, "step": 87174 }, { "epoch": 8.96329426280074, "grad_norm": 0.0966772809624672, "learning_rate": 0.01, "loss": 1.9536, "step": 87177 }, { "epoch": 8.963602714373843, "grad_norm": 0.09721286594867706, "learning_rate": 0.01, "loss": 1.9577, "step": 87180 }, { "epoch": 8.963911165946946, "grad_norm": 0.09772830456495285, "learning_rate": 0.01, "loss": 1.9356, "step": 87183 }, { "epoch": 8.96421961752005, "grad_norm": 0.061515890061855316, "learning_rate": 0.01, "loss": 1.9565, "step": 87186 }, { "epoch": 8.964528069093152, "grad_norm": 0.043670911341905594, "learning_rate": 0.01, "loss": 1.9596, "step": 87189 }, { "epoch": 8.964836520666255, "grad_norm": 0.04097547382116318, "learning_rate": 0.01, "loss": 1.9468, "step": 87192 }, { "epoch": 8.965144972239358, "grad_norm": 0.045037586241960526, "learning_rate": 0.01, "loss": 1.9453, "step": 87195 }, { "epoch": 8.96545342381246, "grad_norm": 0.06821490079164505, "learning_rate": 0.01, "loss": 1.9648, "step": 87198 }, { "epoch": 8.965761875385564, "grad_norm": 0.0660410225391388, "learning_rate": 0.01, "loss": 1.9547, "step": 87201 }, { "epoch": 8.966070326958668, "grad_norm": 0.0699450895190239, "learning_rate": 0.01, "loss": 1.9506, "step": 87204 }, { "epoch": 8.966378778531771, "grad_norm": 0.09934176504611969, "learning_rate": 0.01, "loss": 1.9594, "step": 87207 }, { "epoch": 8.966687230104874, "grad_norm": 0.047030240297317505, "learning_rate": 0.01, "loss": 1.945, "step": 87210 }, { "epoch": 8.966995681677977, "grad_norm": 0.08642113953828812, "learning_rate": 0.01, "loss": 1.9631, "step": 87213 }, { "epoch": 8.96730413325108, "grad_norm": 0.11990761011838913, "learning_rate": 0.01, "loss": 1.939, "step": 87216 }, { "epoch": 8.967612584824183, "grad_norm": 0.05154174938797951, "learning_rate": 0.01, "loss": 1.9616, "step": 87219 }, { "epoch": 8.967921036397286, "grad_norm": 0.03815053030848503, "learning_rate": 0.01, "loss": 1.9444, "step": 87222 }, { "epoch": 8.968229487970389, "grad_norm": 0.049095261842012405, "learning_rate": 0.01, "loss": 1.9449, "step": 87225 }, { "epoch": 8.968537939543491, "grad_norm": 0.08481363207101822, "learning_rate": 0.01, "loss": 1.9446, "step": 87228 }, { "epoch": 8.968846391116594, "grad_norm": 0.04782922565937042, "learning_rate": 0.01, "loss": 1.9397, "step": 87231 }, { "epoch": 8.969154842689697, "grad_norm": 0.03404981642961502, "learning_rate": 0.01, "loss": 1.9605, "step": 87234 }, { "epoch": 8.9694632942628, "grad_norm": 0.04017357528209686, "learning_rate": 0.01, "loss": 1.9608, "step": 87237 }, { "epoch": 8.969771745835903, "grad_norm": 0.04835909977555275, "learning_rate": 0.01, "loss": 1.9429, "step": 87240 }, { "epoch": 8.970080197409008, "grad_norm": 0.06477301567792892, "learning_rate": 0.01, "loss": 1.9471, "step": 87243 }, { "epoch": 8.97038864898211, "grad_norm": 0.09944019466638565, "learning_rate": 0.01, "loss": 1.9625, "step": 87246 }, { "epoch": 8.970697100555213, "grad_norm": 0.04341105371713638, "learning_rate": 0.01, "loss": 1.9235, "step": 87249 }, { "epoch": 8.971005552128316, "grad_norm": 0.09491550177335739, "learning_rate": 0.01, "loss": 1.9278, "step": 87252 }, { "epoch": 8.97131400370142, "grad_norm": 0.08826873451471329, "learning_rate": 0.01, "loss": 1.9583, "step": 87255 }, { "epoch": 8.971622455274522, "grad_norm": 0.09465787559747696, "learning_rate": 0.01, "loss": 1.9475, "step": 87258 }, { "epoch": 8.971930906847625, "grad_norm": 0.08723200112581253, "learning_rate": 0.01, "loss": 1.9659, "step": 87261 }, { "epoch": 8.972239358420728, "grad_norm": 0.11504415422677994, "learning_rate": 0.01, "loss": 1.9638, "step": 87264 }, { "epoch": 8.97254780999383, "grad_norm": 0.04780331254005432, "learning_rate": 0.01, "loss": 1.9516, "step": 87267 }, { "epoch": 8.972856261566934, "grad_norm": 0.06980123370885849, "learning_rate": 0.01, "loss": 1.9315, "step": 87270 }, { "epoch": 8.973164713140036, "grad_norm": 0.05528416857123375, "learning_rate": 0.01, "loss": 1.9461, "step": 87273 }, { "epoch": 8.97347316471314, "grad_norm": 0.07386885583400726, "learning_rate": 0.01, "loss": 1.9437, "step": 87276 }, { "epoch": 8.973781616286242, "grad_norm": 0.12215300649404526, "learning_rate": 0.01, "loss": 1.9422, "step": 87279 }, { "epoch": 8.974090067859347, "grad_norm": 0.06513124704360962, "learning_rate": 0.01, "loss": 1.946, "step": 87282 }, { "epoch": 8.97439851943245, "grad_norm": 0.051685601472854614, "learning_rate": 0.01, "loss": 1.9774, "step": 87285 }, { "epoch": 8.974706971005553, "grad_norm": 0.09058764576911926, "learning_rate": 0.01, "loss": 1.9093, "step": 87288 }, { "epoch": 8.975015422578656, "grad_norm": 0.0636780709028244, "learning_rate": 0.01, "loss": 1.9245, "step": 87291 }, { "epoch": 8.975323874151758, "grad_norm": 0.1368209272623062, "learning_rate": 0.01, "loss": 1.9311, "step": 87294 }, { "epoch": 8.975632325724861, "grad_norm": 0.11455652862787247, "learning_rate": 0.01, "loss": 1.9628, "step": 87297 }, { "epoch": 8.975940777297964, "grad_norm": 0.10468421131372452, "learning_rate": 0.01, "loss": 1.9396, "step": 87300 }, { "epoch": 8.976249228871067, "grad_norm": 0.1308230608701706, "learning_rate": 0.01, "loss": 1.9614, "step": 87303 }, { "epoch": 8.97655768044417, "grad_norm": 0.07051686197519302, "learning_rate": 0.01, "loss": 1.9347, "step": 87306 }, { "epoch": 8.976866132017273, "grad_norm": 0.0951620489358902, "learning_rate": 0.01, "loss": 1.9218, "step": 87309 }, { "epoch": 8.977174583590376, "grad_norm": 0.0550762303173542, "learning_rate": 0.01, "loss": 1.9532, "step": 87312 }, { "epoch": 8.977483035163479, "grad_norm": 0.04169492423534393, "learning_rate": 0.01, "loss": 1.9494, "step": 87315 }, { "epoch": 8.977791486736582, "grad_norm": 0.035810817033052444, "learning_rate": 0.01, "loss": 1.95, "step": 87318 }, { "epoch": 8.978099938309686, "grad_norm": 0.040050823241472244, "learning_rate": 0.01, "loss": 1.9461, "step": 87321 }, { "epoch": 8.978408389882789, "grad_norm": 0.061197470873594284, "learning_rate": 0.01, "loss": 1.9694, "step": 87324 }, { "epoch": 8.978716841455892, "grad_norm": 0.06339312344789505, "learning_rate": 0.01, "loss": 1.9322, "step": 87327 }, { "epoch": 8.979025293028995, "grad_norm": 0.08060112595558167, "learning_rate": 0.01, "loss": 1.9449, "step": 87330 }, { "epoch": 8.979333744602098, "grad_norm": 0.09748803824186325, "learning_rate": 0.01, "loss": 1.9398, "step": 87333 }, { "epoch": 8.9796421961752, "grad_norm": 0.03794391080737114, "learning_rate": 0.01, "loss": 1.9538, "step": 87336 }, { "epoch": 8.979950647748304, "grad_norm": 0.07690727710723877, "learning_rate": 0.01, "loss": 1.9374, "step": 87339 }, { "epoch": 8.980259099321406, "grad_norm": 0.047774530947208405, "learning_rate": 0.01, "loss": 1.9425, "step": 87342 }, { "epoch": 8.98056755089451, "grad_norm": 0.05362236499786377, "learning_rate": 0.01, "loss": 1.9583, "step": 87345 }, { "epoch": 8.980876002467612, "grad_norm": 0.06028943508863449, "learning_rate": 0.01, "loss": 1.9448, "step": 87348 }, { "epoch": 8.981184454040715, "grad_norm": 0.04904436320066452, "learning_rate": 0.01, "loss": 1.9342, "step": 87351 }, { "epoch": 8.981492905613818, "grad_norm": 0.04311923310160637, "learning_rate": 0.01, "loss": 1.9435, "step": 87354 }, { "epoch": 8.98180135718692, "grad_norm": 0.11199583858251572, "learning_rate": 0.01, "loss": 1.9487, "step": 87357 }, { "epoch": 8.982109808760026, "grad_norm": 0.05089421197772026, "learning_rate": 0.01, "loss": 1.9522, "step": 87360 }, { "epoch": 8.982418260333128, "grad_norm": 0.12338538467884064, "learning_rate": 0.01, "loss": 1.9399, "step": 87363 }, { "epoch": 8.982726711906231, "grad_norm": 0.06985056400299072, "learning_rate": 0.01, "loss": 1.9483, "step": 87366 }, { "epoch": 8.983035163479334, "grad_norm": 0.08937839418649673, "learning_rate": 0.01, "loss": 1.9486, "step": 87369 }, { "epoch": 8.983343615052437, "grad_norm": 0.047080084681510925, "learning_rate": 0.01, "loss": 1.9525, "step": 87372 }, { "epoch": 8.98365206662554, "grad_norm": 0.03225898742675781, "learning_rate": 0.01, "loss": 1.9343, "step": 87375 }, { "epoch": 8.983960518198643, "grad_norm": 0.030636625364422798, "learning_rate": 0.01, "loss": 1.951, "step": 87378 }, { "epoch": 8.984268969771746, "grad_norm": 0.08280804008245468, "learning_rate": 0.01, "loss": 1.9514, "step": 87381 }, { "epoch": 8.984577421344849, "grad_norm": 0.09910699725151062, "learning_rate": 0.01, "loss": 1.9246, "step": 87384 }, { "epoch": 8.984885872917951, "grad_norm": 0.07833388447761536, "learning_rate": 0.01, "loss": 1.9268, "step": 87387 }, { "epoch": 8.985194324491054, "grad_norm": 0.03863595798611641, "learning_rate": 0.01, "loss": 1.9517, "step": 87390 }, { "epoch": 8.985502776064157, "grad_norm": 0.039544396102428436, "learning_rate": 0.01, "loss": 1.9309, "step": 87393 }, { "epoch": 8.98581122763726, "grad_norm": 0.057196080684661865, "learning_rate": 0.01, "loss": 1.9375, "step": 87396 }, { "epoch": 8.986119679210365, "grad_norm": 0.06040463596582413, "learning_rate": 0.01, "loss": 1.9269, "step": 87399 }, { "epoch": 8.986428130783468, "grad_norm": 0.09306348115205765, "learning_rate": 0.01, "loss": 1.9355, "step": 87402 }, { "epoch": 8.98673658235657, "grad_norm": 0.08759880810976028, "learning_rate": 0.01, "loss": 1.9262, "step": 87405 }, { "epoch": 8.987045033929673, "grad_norm": 0.06999802589416504, "learning_rate": 0.01, "loss": 1.9469, "step": 87408 }, { "epoch": 8.987353485502776, "grad_norm": 0.1001400351524353, "learning_rate": 0.01, "loss": 1.9382, "step": 87411 }, { "epoch": 8.98766193707588, "grad_norm": 0.11107636988162994, "learning_rate": 0.01, "loss": 1.953, "step": 87414 }, { "epoch": 8.987970388648982, "grad_norm": 0.09698151051998138, "learning_rate": 0.01, "loss": 1.9653, "step": 87417 }, { "epoch": 8.988278840222085, "grad_norm": 0.096869096159935, "learning_rate": 0.01, "loss": 1.9713, "step": 87420 }, { "epoch": 8.988587291795188, "grad_norm": 0.08064346015453339, "learning_rate": 0.01, "loss": 1.9298, "step": 87423 }, { "epoch": 8.98889574336829, "grad_norm": 0.08183082938194275, "learning_rate": 0.01, "loss": 1.9212, "step": 87426 }, { "epoch": 8.989204194941394, "grad_norm": 0.0429241918027401, "learning_rate": 0.01, "loss": 1.9163, "step": 87429 }, { "epoch": 8.989512646514497, "grad_norm": 0.0404486320912838, "learning_rate": 0.01, "loss": 1.9514, "step": 87432 }, { "epoch": 8.9898210980876, "grad_norm": 0.049001146107912064, "learning_rate": 0.01, "loss": 1.9699, "step": 87435 }, { "epoch": 8.990129549660704, "grad_norm": 0.05515841022133827, "learning_rate": 0.01, "loss": 1.9441, "step": 87438 }, { "epoch": 8.990438001233807, "grad_norm": 0.08830389380455017, "learning_rate": 0.01, "loss": 1.9669, "step": 87441 }, { "epoch": 8.99074645280691, "grad_norm": 0.07468203455209732, "learning_rate": 0.01, "loss": 1.923, "step": 87444 }, { "epoch": 8.991054904380013, "grad_norm": 0.0886622965335846, "learning_rate": 0.01, "loss": 1.9624, "step": 87447 }, { "epoch": 8.991363355953116, "grad_norm": 0.04467757046222687, "learning_rate": 0.01, "loss": 1.9706, "step": 87450 }, { "epoch": 8.991671807526219, "grad_norm": 0.05850984901189804, "learning_rate": 0.01, "loss": 1.9445, "step": 87453 }, { "epoch": 8.991980259099321, "grad_norm": 0.07062778621912003, "learning_rate": 0.01, "loss": 1.9366, "step": 87456 }, { "epoch": 8.992288710672424, "grad_norm": 0.09450197964906693, "learning_rate": 0.01, "loss": 1.9305, "step": 87459 }, { "epoch": 8.992597162245527, "grad_norm": 0.08685463666915894, "learning_rate": 0.01, "loss": 1.9476, "step": 87462 }, { "epoch": 8.99290561381863, "grad_norm": 0.06854233890771866, "learning_rate": 0.01, "loss": 1.9628, "step": 87465 }, { "epoch": 8.993214065391733, "grad_norm": 0.13164113461971283, "learning_rate": 0.01, "loss": 1.9396, "step": 87468 }, { "epoch": 8.993522516964836, "grad_norm": 0.09722437709569931, "learning_rate": 0.01, "loss": 1.9317, "step": 87471 }, { "epoch": 8.993830968537939, "grad_norm": 0.06404321640729904, "learning_rate": 0.01, "loss": 1.9636, "step": 87474 }, { "epoch": 8.994139420111043, "grad_norm": 0.04171566292643547, "learning_rate": 0.01, "loss": 1.9418, "step": 87477 }, { "epoch": 8.994447871684146, "grad_norm": 0.04546424746513367, "learning_rate": 0.01, "loss": 1.9636, "step": 87480 }, { "epoch": 8.99475632325725, "grad_norm": 0.051627617329359055, "learning_rate": 0.01, "loss": 1.9358, "step": 87483 }, { "epoch": 8.995064774830352, "grad_norm": 0.11216254532337189, "learning_rate": 0.01, "loss": 1.9487, "step": 87486 }, { "epoch": 8.995373226403455, "grad_norm": 0.059223853051662445, "learning_rate": 0.01, "loss": 1.9404, "step": 87489 }, { "epoch": 8.995681677976558, "grad_norm": 0.03879429027438164, "learning_rate": 0.01, "loss": 1.9502, "step": 87492 }, { "epoch": 8.99599012954966, "grad_norm": 0.03178856894373894, "learning_rate": 0.01, "loss": 1.9612, "step": 87495 }, { "epoch": 8.996298581122764, "grad_norm": 0.06816231459379196, "learning_rate": 0.01, "loss": 1.9364, "step": 87498 }, { "epoch": 8.996607032695866, "grad_norm": 0.07472996413707733, "learning_rate": 0.01, "loss": 1.9444, "step": 87501 }, { "epoch": 8.99691548426897, "grad_norm": 0.053777918219566345, "learning_rate": 0.01, "loss": 1.9585, "step": 87504 }, { "epoch": 8.997223935842072, "grad_norm": 0.11232874542474747, "learning_rate": 0.01, "loss": 1.9457, "step": 87507 }, { "epoch": 8.997532387415175, "grad_norm": 0.08200154453516006, "learning_rate": 0.01, "loss": 1.9431, "step": 87510 }, { "epoch": 8.997840838988278, "grad_norm": 0.1282903105020523, "learning_rate": 0.01, "loss": 1.9318, "step": 87513 }, { "epoch": 8.998149290561383, "grad_norm": 0.04237685352563858, "learning_rate": 0.01, "loss": 1.9503, "step": 87516 }, { "epoch": 8.998457742134486, "grad_norm": 0.03894351050257683, "learning_rate": 0.01, "loss": 1.9374, "step": 87519 }, { "epoch": 8.998766193707588, "grad_norm": 0.04536869749426842, "learning_rate": 0.01, "loss": 1.9629, "step": 87522 }, { "epoch": 8.999074645280691, "grad_norm": 0.08715896308422089, "learning_rate": 0.01, "loss": 1.96, "step": 87525 }, { "epoch": 8.999383096853794, "grad_norm": 0.05087468773126602, "learning_rate": 0.01, "loss": 1.9458, "step": 87528 }, { "epoch": 8.999691548426897, "grad_norm": 0.04533283784985542, "learning_rate": 0.01, "loss": 1.9506, "step": 87531 }, { "epoch": 9.0, "grad_norm": 0.06103600934147835, "learning_rate": 0.01, "loss": 1.9707, "step": 87534 }, { "epoch": 9.037476770596738, "grad_norm": 0.0529954619705677, "learning_rate": 0.01, "loss": 1.9383, "step": 87537 }, { "epoch": 9.03778649597357, "grad_norm": 0.11809484660625458, "learning_rate": 0.01, "loss": 1.9558, "step": 87540 }, { "epoch": 9.038096221350402, "grad_norm": 0.04205942526459694, "learning_rate": 0.01, "loss": 1.9558, "step": 87543 }, { "epoch": 9.038405946727234, "grad_norm": 0.12428398430347443, "learning_rate": 0.01, "loss": 1.9494, "step": 87546 }, { "epoch": 9.038715672104068, "grad_norm": 0.07727963477373123, "learning_rate": 0.01, "loss": 1.9394, "step": 87549 }, { "epoch": 9.0390253974809, "grad_norm": 0.033585965633392334, "learning_rate": 0.01, "loss": 1.9377, "step": 87552 }, { "epoch": 9.039335122857732, "grad_norm": 0.1038275957107544, "learning_rate": 0.01, "loss": 1.951, "step": 87555 }, { "epoch": 9.039644848234566, "grad_norm": 0.044634561985731125, "learning_rate": 0.01, "loss": 1.9596, "step": 87558 }, { "epoch": 9.039954573611398, "grad_norm": 0.035352591425180435, "learning_rate": 0.01, "loss": 1.9656, "step": 87561 }, { "epoch": 9.04026429898823, "grad_norm": 0.034989580512046814, "learning_rate": 0.01, "loss": 1.9396, "step": 87564 }, { "epoch": 9.040574024365062, "grad_norm": 0.04315212368965149, "learning_rate": 0.01, "loss": 1.9477, "step": 87567 }, { "epoch": 9.040883749741896, "grad_norm": 0.0486474372446537, "learning_rate": 0.01, "loss": 1.9644, "step": 87570 }, { "epoch": 9.041193475118728, "grad_norm": 0.07493503391742706, "learning_rate": 0.01, "loss": 1.9406, "step": 87573 }, { "epoch": 9.04150320049556, "grad_norm": 0.060698721557855606, "learning_rate": 0.01, "loss": 1.9527, "step": 87576 }, { "epoch": 9.041812925872392, "grad_norm": 0.05844523757696152, "learning_rate": 0.01, "loss": 1.9597, "step": 87579 }, { "epoch": 9.042122651249226, "grad_norm": 0.049893446266651154, "learning_rate": 0.01, "loss": 1.923, "step": 87582 }, { "epoch": 9.042432376626058, "grad_norm": 0.10238265991210938, "learning_rate": 0.01, "loss": 1.9557, "step": 87585 }, { "epoch": 9.04274210200289, "grad_norm": 0.04680422320961952, "learning_rate": 0.01, "loss": 1.9331, "step": 87588 }, { "epoch": 9.043051827379724, "grad_norm": 0.07950558513402939, "learning_rate": 0.01, "loss": 1.9744, "step": 87591 }, { "epoch": 9.043361552756556, "grad_norm": 0.07196646928787231, "learning_rate": 0.01, "loss": 1.966, "step": 87594 }, { "epoch": 9.043671278133388, "grad_norm": 0.06920761615037918, "learning_rate": 0.01, "loss": 1.9537, "step": 87597 }, { "epoch": 9.04398100351022, "grad_norm": 0.05272555723786354, "learning_rate": 0.01, "loss": 1.9598, "step": 87600 }, { "epoch": 9.044290728887054, "grad_norm": 0.057334136217832565, "learning_rate": 0.01, "loss": 1.9639, "step": 87603 }, { "epoch": 9.044600454263886, "grad_norm": 0.037144239991903305, "learning_rate": 0.01, "loss": 1.951, "step": 87606 }, { "epoch": 9.044910179640718, "grad_norm": 0.06027961149811745, "learning_rate": 0.01, "loss": 1.949, "step": 87609 }, { "epoch": 9.045219905017552, "grad_norm": 0.11373438686132431, "learning_rate": 0.01, "loss": 1.9333, "step": 87612 }, { "epoch": 9.045529630394384, "grad_norm": 0.11062182486057281, "learning_rate": 0.01, "loss": 1.9555, "step": 87615 }, { "epoch": 9.045839355771216, "grad_norm": 0.14294542372226715, "learning_rate": 0.01, "loss": 1.9365, "step": 87618 }, { "epoch": 9.046149081148048, "grad_norm": 0.10985869914293289, "learning_rate": 0.01, "loss": 1.9493, "step": 87621 }, { "epoch": 9.046458806524882, "grad_norm": 0.0926436111330986, "learning_rate": 0.01, "loss": 1.9351, "step": 87624 }, { "epoch": 9.046768531901714, "grad_norm": 0.07326491922140121, "learning_rate": 0.01, "loss": 1.9863, "step": 87627 }, { "epoch": 9.047078257278546, "grad_norm": 0.07650945335626602, "learning_rate": 0.01, "loss": 1.9283, "step": 87630 }, { "epoch": 9.047387982655378, "grad_norm": 0.06261960417032242, "learning_rate": 0.01, "loss": 1.9454, "step": 87633 }, { "epoch": 9.047697708032212, "grad_norm": 0.04512837529182434, "learning_rate": 0.01, "loss": 1.9339, "step": 87636 }, { "epoch": 9.048007433409044, "grad_norm": 0.06115034222602844, "learning_rate": 0.01, "loss": 1.955, "step": 87639 }, { "epoch": 9.048317158785876, "grad_norm": 0.05525890737771988, "learning_rate": 0.01, "loss": 1.9436, "step": 87642 }, { "epoch": 9.04862688416271, "grad_norm": 0.047319334000349045, "learning_rate": 0.01, "loss": 1.9377, "step": 87645 }, { "epoch": 9.048936609539542, "grad_norm": 0.12764939665794373, "learning_rate": 0.01, "loss": 1.9359, "step": 87648 }, { "epoch": 9.049246334916374, "grad_norm": 0.04160080850124359, "learning_rate": 0.01, "loss": 1.9535, "step": 87651 }, { "epoch": 9.049556060293206, "grad_norm": 0.09743288904428482, "learning_rate": 0.01, "loss": 1.9352, "step": 87654 }, { "epoch": 9.04986578567004, "grad_norm": 0.05857638269662857, "learning_rate": 0.01, "loss": 1.9513, "step": 87657 }, { "epoch": 9.050175511046872, "grad_norm": 0.08639366179704666, "learning_rate": 0.01, "loss": 1.9354, "step": 87660 }, { "epoch": 9.050485236423704, "grad_norm": 0.046659234911203384, "learning_rate": 0.01, "loss": 1.952, "step": 87663 }, { "epoch": 9.050794961800538, "grad_norm": 0.03477618098258972, "learning_rate": 0.01, "loss": 1.9626, "step": 87666 }, { "epoch": 9.05110468717737, "grad_norm": 0.0495758019387722, "learning_rate": 0.01, "loss": 1.9466, "step": 87669 }, { "epoch": 9.051414412554202, "grad_norm": 0.08402826637029648, "learning_rate": 0.01, "loss": 1.9345, "step": 87672 }, { "epoch": 9.051724137931034, "grad_norm": 0.06008414924144745, "learning_rate": 0.01, "loss": 1.9481, "step": 87675 }, { "epoch": 9.052033863307868, "grad_norm": 0.05847488343715668, "learning_rate": 0.01, "loss": 1.9667, "step": 87678 }, { "epoch": 9.0523435886847, "grad_norm": 0.08440254628658295, "learning_rate": 0.01, "loss": 1.9222, "step": 87681 }, { "epoch": 9.052653314061532, "grad_norm": 0.11293185502290726, "learning_rate": 0.01, "loss": 1.9276, "step": 87684 }, { "epoch": 9.052963039438364, "grad_norm": 0.0945296362042427, "learning_rate": 0.01, "loss": 1.9331, "step": 87687 }, { "epoch": 9.053272764815198, "grad_norm": 0.0900648981332779, "learning_rate": 0.01, "loss": 1.9342, "step": 87690 }, { "epoch": 9.05358249019203, "grad_norm": 0.06403549760580063, "learning_rate": 0.01, "loss": 1.9706, "step": 87693 }, { "epoch": 9.053892215568862, "grad_norm": 0.07860220223665237, "learning_rate": 0.01, "loss": 1.9445, "step": 87696 }, { "epoch": 9.054201940945696, "grad_norm": 0.0828719511628151, "learning_rate": 0.01, "loss": 1.9557, "step": 87699 }, { "epoch": 9.054511666322528, "grad_norm": 0.09248120337724686, "learning_rate": 0.01, "loss": 1.9568, "step": 87702 }, { "epoch": 9.05482139169936, "grad_norm": 0.04926043376326561, "learning_rate": 0.01, "loss": 1.9453, "step": 87705 }, { "epoch": 9.055131117076192, "grad_norm": 0.07462218403816223, "learning_rate": 0.01, "loss": 1.9453, "step": 87708 }, { "epoch": 9.055440842453025, "grad_norm": 0.09336571395397186, "learning_rate": 0.01, "loss": 1.9414, "step": 87711 }, { "epoch": 9.055750567829858, "grad_norm": 0.08080223947763443, "learning_rate": 0.01, "loss": 1.9221, "step": 87714 }, { "epoch": 9.05606029320669, "grad_norm": 0.051116976886987686, "learning_rate": 0.01, "loss": 1.927, "step": 87717 }, { "epoch": 9.056370018583523, "grad_norm": 0.07592598348855972, "learning_rate": 0.01, "loss": 1.9338, "step": 87720 }, { "epoch": 9.056679743960355, "grad_norm": 0.08347304910421371, "learning_rate": 0.01, "loss": 1.9443, "step": 87723 }, { "epoch": 9.056989469337188, "grad_norm": 0.12313603609800339, "learning_rate": 0.01, "loss": 1.9279, "step": 87726 }, { "epoch": 9.05729919471402, "grad_norm": 0.13923709094524384, "learning_rate": 0.01, "loss": 1.9285, "step": 87729 }, { "epoch": 9.057608920090853, "grad_norm": 0.09582033008337021, "learning_rate": 0.01, "loss": 1.9264, "step": 87732 }, { "epoch": 9.057918645467685, "grad_norm": 0.07468133419752121, "learning_rate": 0.01, "loss": 1.9052, "step": 87735 }, { "epoch": 9.058228370844517, "grad_norm": 0.05262709781527519, "learning_rate": 0.01, "loss": 1.9567, "step": 87738 }, { "epoch": 9.05853809622135, "grad_norm": 0.05200639367103577, "learning_rate": 0.01, "loss": 1.9572, "step": 87741 }, { "epoch": 9.058847821598183, "grad_norm": 0.08909532427787781, "learning_rate": 0.01, "loss": 1.935, "step": 87744 }, { "epoch": 9.059157546975015, "grad_norm": 0.08905582875013351, "learning_rate": 0.01, "loss": 1.9566, "step": 87747 }, { "epoch": 9.059467272351847, "grad_norm": 0.25634267926216125, "learning_rate": 0.01, "loss": 1.9561, "step": 87750 }, { "epoch": 9.059776997728681, "grad_norm": 0.11460110545158386, "learning_rate": 0.01, "loss": 1.9726, "step": 87753 }, { "epoch": 9.060086723105513, "grad_norm": 0.04081350937485695, "learning_rate": 0.01, "loss": 1.9553, "step": 87756 }, { "epoch": 9.060396448482345, "grad_norm": 0.06634104251861572, "learning_rate": 0.01, "loss": 1.9382, "step": 87759 }, { "epoch": 9.060706173859177, "grad_norm": 0.03965061530470848, "learning_rate": 0.01, "loss": 1.9377, "step": 87762 }, { "epoch": 9.061015899236011, "grad_norm": 0.045786574482917786, "learning_rate": 0.01, "loss": 1.9464, "step": 87765 }, { "epoch": 9.061325624612843, "grad_norm": 0.0300121009349823, "learning_rate": 0.01, "loss": 1.9541, "step": 87768 }, { "epoch": 9.061635349989675, "grad_norm": 0.04564841836690903, "learning_rate": 0.01, "loss": 1.9781, "step": 87771 }, { "epoch": 9.06194507536651, "grad_norm": 0.045970767736434937, "learning_rate": 0.01, "loss": 1.9599, "step": 87774 }, { "epoch": 9.062254800743341, "grad_norm": 0.04083496704697609, "learning_rate": 0.01, "loss": 1.9491, "step": 87777 }, { "epoch": 9.062564526120173, "grad_norm": 0.053099412471055984, "learning_rate": 0.01, "loss": 1.9531, "step": 87780 }, { "epoch": 9.062874251497005, "grad_norm": 0.06963446736335754, "learning_rate": 0.01, "loss": 1.9372, "step": 87783 }, { "epoch": 9.06318397687384, "grad_norm": 0.07380146533250809, "learning_rate": 0.01, "loss": 1.9604, "step": 87786 }, { "epoch": 9.063493702250671, "grad_norm": 0.05961961671710014, "learning_rate": 0.01, "loss": 1.9509, "step": 87789 }, { "epoch": 9.063803427627503, "grad_norm": 0.05864206328988075, "learning_rate": 0.01, "loss": 1.9348, "step": 87792 }, { "epoch": 9.064113153004335, "grad_norm": 0.1107788160443306, "learning_rate": 0.01, "loss": 1.9282, "step": 87795 }, { "epoch": 9.06442287838117, "grad_norm": 0.0659669041633606, "learning_rate": 0.01, "loss": 1.939, "step": 87798 }, { "epoch": 9.064732603758001, "grad_norm": 0.10495319217443466, "learning_rate": 0.01, "loss": 1.9511, "step": 87801 }, { "epoch": 9.065042329134833, "grad_norm": 0.04215942695736885, "learning_rate": 0.01, "loss": 1.9372, "step": 87804 }, { "epoch": 9.065352054511667, "grad_norm": 0.057210393249988556, "learning_rate": 0.01, "loss": 1.9686, "step": 87807 }, { "epoch": 9.065661779888499, "grad_norm": 0.05553483963012695, "learning_rate": 0.01, "loss": 1.9528, "step": 87810 }, { "epoch": 9.065971505265331, "grad_norm": 0.05527332052588463, "learning_rate": 0.01, "loss": 1.9565, "step": 87813 }, { "epoch": 9.066281230642163, "grad_norm": 0.03902657330036163, "learning_rate": 0.01, "loss": 1.9485, "step": 87816 }, { "epoch": 9.066590956018997, "grad_norm": 0.0412173829972744, "learning_rate": 0.01, "loss": 1.9485, "step": 87819 }, { "epoch": 9.066900681395829, "grad_norm": 0.04407156631350517, "learning_rate": 0.01, "loss": 1.9512, "step": 87822 }, { "epoch": 9.067210406772661, "grad_norm": 0.10649632662534714, "learning_rate": 0.01, "loss": 1.9364, "step": 87825 }, { "epoch": 9.067520132149495, "grad_norm": 0.07599849998950958, "learning_rate": 0.01, "loss": 1.9732, "step": 87828 }, { "epoch": 9.067829857526327, "grad_norm": 0.13357628881931305, "learning_rate": 0.01, "loss": 1.9594, "step": 87831 }, { "epoch": 9.068139582903159, "grad_norm": 0.047365784645080566, "learning_rate": 0.01, "loss": 1.9604, "step": 87834 }, { "epoch": 9.068449308279991, "grad_norm": 0.036827005445957184, "learning_rate": 0.01, "loss": 1.9314, "step": 87837 }, { "epoch": 9.068759033656825, "grad_norm": 0.0627468079328537, "learning_rate": 0.01, "loss": 1.9281, "step": 87840 }, { "epoch": 9.069068759033657, "grad_norm": 0.04302553832530975, "learning_rate": 0.01, "loss": 1.9335, "step": 87843 }, { "epoch": 9.069378484410489, "grad_norm": 0.059402476996183395, "learning_rate": 0.01, "loss": 1.9299, "step": 87846 }, { "epoch": 9.069688209787321, "grad_norm": 0.1257617026567459, "learning_rate": 0.01, "loss": 1.9426, "step": 87849 }, { "epoch": 9.069997935164155, "grad_norm": 0.11703430861234665, "learning_rate": 0.01, "loss": 1.9304, "step": 87852 }, { "epoch": 9.070307660540987, "grad_norm": 0.05478603020310402, "learning_rate": 0.01, "loss": 1.9358, "step": 87855 }, { "epoch": 9.070617385917819, "grad_norm": 0.08334581553936005, "learning_rate": 0.01, "loss": 1.9266, "step": 87858 }, { "epoch": 9.070927111294653, "grad_norm": 0.09045934677124023, "learning_rate": 0.01, "loss": 1.9465, "step": 87861 }, { "epoch": 9.071236836671485, "grad_norm": 0.0530586764216423, "learning_rate": 0.01, "loss": 1.9735, "step": 87864 }, { "epoch": 9.071546562048317, "grad_norm": 0.051753733307123184, "learning_rate": 0.01, "loss": 1.9668, "step": 87867 }, { "epoch": 9.071856287425149, "grad_norm": 0.03743633255362511, "learning_rate": 0.01, "loss": 1.9534, "step": 87870 }, { "epoch": 9.072166012801983, "grad_norm": 0.054411083459854126, "learning_rate": 0.01, "loss": 1.9355, "step": 87873 }, { "epoch": 9.072475738178815, "grad_norm": 0.11333665996789932, "learning_rate": 0.01, "loss": 1.9403, "step": 87876 }, { "epoch": 9.072785463555647, "grad_norm": 0.06036665290594101, "learning_rate": 0.01, "loss": 1.9489, "step": 87879 }, { "epoch": 9.07309518893248, "grad_norm": 0.05209678038954735, "learning_rate": 0.01, "loss": 1.9575, "step": 87882 }, { "epoch": 9.073404914309313, "grad_norm": 0.0966220274567604, "learning_rate": 0.01, "loss": 1.9213, "step": 87885 }, { "epoch": 9.073714639686145, "grad_norm": 0.17173926532268524, "learning_rate": 0.01, "loss": 1.934, "step": 87888 }, { "epoch": 9.074024365062977, "grad_norm": 0.07872462272644043, "learning_rate": 0.01, "loss": 1.9413, "step": 87891 }, { "epoch": 9.07433409043981, "grad_norm": 0.07835288345813751, "learning_rate": 0.01, "loss": 1.9277, "step": 87894 }, { "epoch": 9.074643815816643, "grad_norm": 0.05287022143602371, "learning_rate": 0.01, "loss": 1.969, "step": 87897 }, { "epoch": 9.074953541193475, "grad_norm": 0.054281800985336304, "learning_rate": 0.01, "loss": 1.9432, "step": 87900 }, { "epoch": 9.075263266570307, "grad_norm": 0.07372366636991501, "learning_rate": 0.01, "loss": 1.9443, "step": 87903 }, { "epoch": 9.07557299194714, "grad_norm": 0.06382538378238678, "learning_rate": 0.01, "loss": 1.9657, "step": 87906 }, { "epoch": 9.075882717323973, "grad_norm": 0.12408961355686188, "learning_rate": 0.01, "loss": 1.9646, "step": 87909 }, { "epoch": 9.076192442700805, "grad_norm": 0.07578717172145844, "learning_rate": 0.01, "loss": 1.9456, "step": 87912 }, { "epoch": 9.076502168077639, "grad_norm": 0.04379868134856224, "learning_rate": 0.01, "loss": 1.9533, "step": 87915 }, { "epoch": 9.07681189345447, "grad_norm": 0.053197700530290604, "learning_rate": 0.01, "loss": 1.9634, "step": 87918 }, { "epoch": 9.077121618831303, "grad_norm": 0.038773033767938614, "learning_rate": 0.01, "loss": 1.9524, "step": 87921 }, { "epoch": 9.077431344208135, "grad_norm": 0.08359051495790482, "learning_rate": 0.01, "loss": 1.9225, "step": 87924 }, { "epoch": 9.077741069584969, "grad_norm": 0.039631109684705734, "learning_rate": 0.01, "loss": 1.9395, "step": 87927 }, { "epoch": 9.0780507949618, "grad_norm": 0.0650608092546463, "learning_rate": 0.01, "loss": 1.9538, "step": 87930 }, { "epoch": 9.078360520338633, "grad_norm": 0.07246024161577225, "learning_rate": 0.01, "loss": 1.9446, "step": 87933 }, { "epoch": 9.078670245715466, "grad_norm": 0.15074563026428223, "learning_rate": 0.01, "loss": 1.9342, "step": 87936 }, { "epoch": 9.078979971092298, "grad_norm": 0.052601393312215805, "learning_rate": 0.01, "loss": 1.9616, "step": 87939 }, { "epoch": 9.07928969646913, "grad_norm": 0.0388791598379612, "learning_rate": 0.01, "loss": 1.9613, "step": 87942 }, { "epoch": 9.079599421845963, "grad_norm": 0.07762248814105988, "learning_rate": 0.01, "loss": 1.9432, "step": 87945 }, { "epoch": 9.079909147222796, "grad_norm": 0.09564360976219177, "learning_rate": 0.01, "loss": 1.9267, "step": 87948 }, { "epoch": 9.080218872599628, "grad_norm": 0.06043870002031326, "learning_rate": 0.01, "loss": 1.932, "step": 87951 }, { "epoch": 9.08052859797646, "grad_norm": 0.04472746327519417, "learning_rate": 0.01, "loss": 1.9264, "step": 87954 }, { "epoch": 9.080838323353293, "grad_norm": 0.10817110538482666, "learning_rate": 0.01, "loss": 1.9564, "step": 87957 }, { "epoch": 9.081148048730126, "grad_norm": 0.06465522199869156, "learning_rate": 0.01, "loss": 1.9396, "step": 87960 }, { "epoch": 9.081457774106958, "grad_norm": 0.0649423748254776, "learning_rate": 0.01, "loss": 1.9193, "step": 87963 }, { "epoch": 9.08176749948379, "grad_norm": 0.05806288123130798, "learning_rate": 0.01, "loss": 1.9674, "step": 87966 }, { "epoch": 9.082077224860624, "grad_norm": 0.04380207508802414, "learning_rate": 0.01, "loss": 1.979, "step": 87969 }, { "epoch": 9.082386950237456, "grad_norm": 0.03688583895564079, "learning_rate": 0.01, "loss": 1.9472, "step": 87972 }, { "epoch": 9.082696675614288, "grad_norm": 0.035529594868421555, "learning_rate": 0.01, "loss": 1.9486, "step": 87975 }, { "epoch": 9.08300640099112, "grad_norm": 0.03344225138425827, "learning_rate": 0.01, "loss": 1.9467, "step": 87978 }, { "epoch": 9.083316126367954, "grad_norm": 0.03736155480146408, "learning_rate": 0.01, "loss": 1.9366, "step": 87981 }, { "epoch": 9.083625851744786, "grad_norm": 0.07427693903446198, "learning_rate": 0.01, "loss": 1.9664, "step": 87984 }, { "epoch": 9.083935577121618, "grad_norm": 0.048297081142663956, "learning_rate": 0.01, "loss": 1.9381, "step": 87987 }, { "epoch": 9.084245302498452, "grad_norm": 0.11602925509214401, "learning_rate": 0.01, "loss": 1.9428, "step": 87990 }, { "epoch": 9.084555027875284, "grad_norm": 0.10392475128173828, "learning_rate": 0.01, "loss": 1.9163, "step": 87993 }, { "epoch": 9.084864753252116, "grad_norm": 0.16618256270885468, "learning_rate": 0.01, "loss": 1.9407, "step": 87996 }, { "epoch": 9.085174478628948, "grad_norm": 0.10360529273748398, "learning_rate": 0.01, "loss": 1.9565, "step": 87999 }, { "epoch": 9.085484204005782, "grad_norm": 0.09314300864934921, "learning_rate": 0.01, "loss": 1.9339, "step": 88002 }, { "epoch": 9.085793929382614, "grad_norm": 0.07424509525299072, "learning_rate": 0.01, "loss": 1.9092, "step": 88005 }, { "epoch": 9.086103654759446, "grad_norm": 0.04636434093117714, "learning_rate": 0.01, "loss": 1.9495, "step": 88008 }, { "epoch": 9.086413380136278, "grad_norm": 0.038621935993433, "learning_rate": 0.01, "loss": 1.9255, "step": 88011 }, { "epoch": 9.086723105513112, "grad_norm": 0.04796215519309044, "learning_rate": 0.01, "loss": 1.9909, "step": 88014 }, { "epoch": 9.087032830889944, "grad_norm": 0.05523217096924782, "learning_rate": 0.01, "loss": 1.9337, "step": 88017 }, { "epoch": 9.087342556266776, "grad_norm": 0.06570705026388168, "learning_rate": 0.01, "loss": 1.948, "step": 88020 }, { "epoch": 9.08765228164361, "grad_norm": 0.0739300474524498, "learning_rate": 0.01, "loss": 1.9743, "step": 88023 }, { "epoch": 9.087962007020442, "grad_norm": 0.09385763108730316, "learning_rate": 0.01, "loss": 1.9258, "step": 88026 }, { "epoch": 9.088271732397274, "grad_norm": 0.1467464715242386, "learning_rate": 0.01, "loss": 1.9619, "step": 88029 }, { "epoch": 9.088581457774106, "grad_norm": 0.10290052741765976, "learning_rate": 0.01, "loss": 1.9439, "step": 88032 }, { "epoch": 9.08889118315094, "grad_norm": 0.07046860456466675, "learning_rate": 0.01, "loss": 1.9522, "step": 88035 }, { "epoch": 9.089200908527772, "grad_norm": 0.04915101081132889, "learning_rate": 0.01, "loss": 1.9522, "step": 88038 }, { "epoch": 9.089510633904604, "grad_norm": 0.052908964455127716, "learning_rate": 0.01, "loss": 1.952, "step": 88041 }, { "epoch": 9.089820359281438, "grad_norm": 0.039025407284498215, "learning_rate": 0.01, "loss": 1.9531, "step": 88044 }, { "epoch": 9.09013008465827, "grad_norm": 0.03908696398139, "learning_rate": 0.01, "loss": 1.9397, "step": 88047 }, { "epoch": 9.090439810035102, "grad_norm": 0.059479523450136185, "learning_rate": 0.01, "loss": 1.9331, "step": 88050 }, { "epoch": 9.090749535411934, "grad_norm": 0.08643188327550888, "learning_rate": 0.01, "loss": 1.9383, "step": 88053 }, { "epoch": 9.091059260788768, "grad_norm": 0.07994077354669571, "learning_rate": 0.01, "loss": 1.9417, "step": 88056 }, { "epoch": 9.0913689861656, "grad_norm": 0.08260933309793472, "learning_rate": 0.01, "loss": 1.9322, "step": 88059 }, { "epoch": 9.091678711542432, "grad_norm": 0.08402281254529953, "learning_rate": 0.01, "loss": 1.9499, "step": 88062 }, { "epoch": 9.091988436919264, "grad_norm": 0.12822997570037842, "learning_rate": 0.01, "loss": 1.9203, "step": 88065 }, { "epoch": 9.092298162296098, "grad_norm": 0.10046152770519257, "learning_rate": 0.01, "loss": 1.9465, "step": 88068 }, { "epoch": 9.09260788767293, "grad_norm": 0.07425348460674286, "learning_rate": 0.01, "loss": 1.9537, "step": 88071 }, { "epoch": 9.092917613049762, "grad_norm": 0.05061847344040871, "learning_rate": 0.01, "loss": 1.9707, "step": 88074 }, { "epoch": 9.093227338426596, "grad_norm": 0.04643300175666809, "learning_rate": 0.01, "loss": 1.9355, "step": 88077 }, { "epoch": 9.093537063803428, "grad_norm": 0.040220703929662704, "learning_rate": 0.01, "loss": 1.948, "step": 88080 }, { "epoch": 9.09384678918026, "grad_norm": 0.04779227450489998, "learning_rate": 0.01, "loss": 1.9405, "step": 88083 }, { "epoch": 9.094156514557092, "grad_norm": 0.15425650775432587, "learning_rate": 0.01, "loss": 1.9415, "step": 88086 }, { "epoch": 9.094466239933926, "grad_norm": 0.05012909695506096, "learning_rate": 0.01, "loss": 1.9641, "step": 88089 }, { "epoch": 9.094775965310758, "grad_norm": 0.04676985740661621, "learning_rate": 0.01, "loss": 1.9683, "step": 88092 }, { "epoch": 9.09508569068759, "grad_norm": 0.0426156222820282, "learning_rate": 0.01, "loss": 1.9286, "step": 88095 }, { "epoch": 9.095395416064424, "grad_norm": 0.04056524485349655, "learning_rate": 0.01, "loss": 1.9535, "step": 88098 }, { "epoch": 9.095705141441256, "grad_norm": 0.13012756407260895, "learning_rate": 0.01, "loss": 1.933, "step": 88101 }, { "epoch": 9.096014866818088, "grad_norm": 0.17354853451251984, "learning_rate": 0.01, "loss": 1.9561, "step": 88104 }, { "epoch": 9.09632459219492, "grad_norm": 0.12001447379589081, "learning_rate": 0.01, "loss": 1.9565, "step": 88107 }, { "epoch": 9.096634317571754, "grad_norm": 0.07169081270694733, "learning_rate": 0.01, "loss": 1.9315, "step": 88110 }, { "epoch": 9.096944042948586, "grad_norm": 0.05362652987241745, "learning_rate": 0.01, "loss": 1.958, "step": 88113 }, { "epoch": 9.097253768325418, "grad_norm": 0.048951901495456696, "learning_rate": 0.01, "loss": 1.922, "step": 88116 }, { "epoch": 9.09756349370225, "grad_norm": 0.06211349740624428, "learning_rate": 0.01, "loss": 1.9499, "step": 88119 }, { "epoch": 9.097873219079084, "grad_norm": 0.04690651223063469, "learning_rate": 0.01, "loss": 1.9398, "step": 88122 }, { "epoch": 9.098182944455916, "grad_norm": 0.05850514769554138, "learning_rate": 0.01, "loss": 1.9567, "step": 88125 }, { "epoch": 9.098492669832748, "grad_norm": 0.05712956562638283, "learning_rate": 0.01, "loss": 1.9337, "step": 88128 }, { "epoch": 9.098802395209582, "grad_norm": 0.07003414630889893, "learning_rate": 0.01, "loss": 1.9525, "step": 88131 }, { "epoch": 9.099112120586414, "grad_norm": 0.06625586748123169, "learning_rate": 0.01, "loss": 1.9361, "step": 88134 }, { "epoch": 9.099421845963246, "grad_norm": 0.05052918195724487, "learning_rate": 0.01, "loss": 1.961, "step": 88137 }, { "epoch": 9.099731571340078, "grad_norm": 0.0572190098464489, "learning_rate": 0.01, "loss": 1.9262, "step": 88140 }, { "epoch": 9.100041296716912, "grad_norm": 0.07441101968288422, "learning_rate": 0.01, "loss": 1.9244, "step": 88143 }, { "epoch": 9.100351022093744, "grad_norm": 0.09752295166254044, "learning_rate": 0.01, "loss": 1.9447, "step": 88146 }, { "epoch": 9.100660747470576, "grad_norm": 0.05231703817844391, "learning_rate": 0.01, "loss": 1.9439, "step": 88149 }, { "epoch": 9.10097047284741, "grad_norm": 0.09808948636054993, "learning_rate": 0.01, "loss": 1.9484, "step": 88152 }, { "epoch": 9.101280198224242, "grad_norm": 0.057692382484674454, "learning_rate": 0.01, "loss": 1.9243, "step": 88155 }, { "epoch": 9.101589923601074, "grad_norm": 0.06832569092512131, "learning_rate": 0.01, "loss": 1.9718, "step": 88158 }, { "epoch": 9.101899648977906, "grad_norm": 0.05263699218630791, "learning_rate": 0.01, "loss": 1.965, "step": 88161 }, { "epoch": 9.10220937435474, "grad_norm": 0.10359843820333481, "learning_rate": 0.01, "loss": 1.9433, "step": 88164 }, { "epoch": 9.102519099731571, "grad_norm": 0.0683496743440628, "learning_rate": 0.01, "loss": 1.9552, "step": 88167 }, { "epoch": 9.102828825108404, "grad_norm": 0.04285841062664986, "learning_rate": 0.01, "loss": 1.9392, "step": 88170 }, { "epoch": 9.103138550485236, "grad_norm": 0.04572022333741188, "learning_rate": 0.01, "loss": 1.9702, "step": 88173 }, { "epoch": 9.10344827586207, "grad_norm": 0.07577993720769882, "learning_rate": 0.01, "loss": 1.9228, "step": 88176 }, { "epoch": 9.103758001238901, "grad_norm": 0.060188256204128265, "learning_rate": 0.01, "loss": 1.9304, "step": 88179 }, { "epoch": 9.104067726615733, "grad_norm": 0.044819220900535583, "learning_rate": 0.01, "loss": 1.9443, "step": 88182 }, { "epoch": 9.104377451992567, "grad_norm": 0.04718435928225517, "learning_rate": 0.01, "loss": 1.9409, "step": 88185 }, { "epoch": 9.1046871773694, "grad_norm": 0.04264286905527115, "learning_rate": 0.01, "loss": 1.9446, "step": 88188 }, { "epoch": 9.104996902746231, "grad_norm": 0.04543020576238632, "learning_rate": 0.01, "loss": 1.9561, "step": 88191 }, { "epoch": 9.105306628123063, "grad_norm": 0.04506497085094452, "learning_rate": 0.01, "loss": 1.9394, "step": 88194 }, { "epoch": 9.105616353499897, "grad_norm": 0.04982695356011391, "learning_rate": 0.01, "loss": 1.9586, "step": 88197 }, { "epoch": 9.10592607887673, "grad_norm": 0.07128612697124481, "learning_rate": 0.01, "loss": 1.9518, "step": 88200 }, { "epoch": 9.106235804253561, "grad_norm": 0.07622264325618744, "learning_rate": 0.01, "loss": 1.9411, "step": 88203 }, { "epoch": 9.106545529630395, "grad_norm": 0.04159415885806084, "learning_rate": 0.01, "loss": 1.9408, "step": 88206 }, { "epoch": 9.106855255007227, "grad_norm": 0.11658751219511032, "learning_rate": 0.01, "loss": 1.952, "step": 88209 }, { "epoch": 9.10716498038406, "grad_norm": 0.10910781472921371, "learning_rate": 0.01, "loss": 1.9309, "step": 88212 }, { "epoch": 9.107474705760891, "grad_norm": 0.05028378963470459, "learning_rate": 0.01, "loss": 1.9523, "step": 88215 }, { "epoch": 9.107784431137725, "grad_norm": 0.05754128098487854, "learning_rate": 0.01, "loss": 1.9213, "step": 88218 }, { "epoch": 9.108094156514557, "grad_norm": 0.09947003424167633, "learning_rate": 0.01, "loss": 1.9528, "step": 88221 }, { "epoch": 9.10840388189139, "grad_norm": 0.046979811042547226, "learning_rate": 0.01, "loss": 1.9342, "step": 88224 }, { "epoch": 9.108713607268221, "grad_norm": 0.04049091413617134, "learning_rate": 0.01, "loss": 1.9697, "step": 88227 }, { "epoch": 9.109023332645055, "grad_norm": 0.03384038433432579, "learning_rate": 0.01, "loss": 1.9334, "step": 88230 }, { "epoch": 9.109333058021887, "grad_norm": 0.06660273671150208, "learning_rate": 0.01, "loss": 1.9385, "step": 88233 }, { "epoch": 9.10964278339872, "grad_norm": 0.05666836351156235, "learning_rate": 0.01, "loss": 1.9411, "step": 88236 }, { "epoch": 9.109952508775553, "grad_norm": 0.066832534968853, "learning_rate": 0.01, "loss": 1.9522, "step": 88239 }, { "epoch": 9.110262234152385, "grad_norm": 0.10214311629533768, "learning_rate": 0.01, "loss": 1.9116, "step": 88242 }, { "epoch": 9.110571959529217, "grad_norm": 0.06897658109664917, "learning_rate": 0.01, "loss": 1.9594, "step": 88245 }, { "epoch": 9.11088168490605, "grad_norm": 0.11510921269655228, "learning_rate": 0.01, "loss": 1.9461, "step": 88248 }, { "epoch": 9.111191410282883, "grad_norm": 0.0748416930437088, "learning_rate": 0.01, "loss": 1.9361, "step": 88251 }, { "epoch": 9.111501135659715, "grad_norm": 0.03657582029700279, "learning_rate": 0.01, "loss": 1.9335, "step": 88254 }, { "epoch": 9.111810861036547, "grad_norm": 0.049296047538518906, "learning_rate": 0.01, "loss": 1.9599, "step": 88257 }, { "epoch": 9.112120586413381, "grad_norm": 0.10844705998897552, "learning_rate": 0.01, "loss": 1.9456, "step": 88260 }, { "epoch": 9.112430311790213, "grad_norm": 0.0642717257142067, "learning_rate": 0.01, "loss": 1.9315, "step": 88263 }, { "epoch": 9.112740037167045, "grad_norm": 0.04892707243561745, "learning_rate": 0.01, "loss": 1.9506, "step": 88266 }, { "epoch": 9.113049762543877, "grad_norm": 0.06426113098859787, "learning_rate": 0.01, "loss": 1.9337, "step": 88269 }, { "epoch": 9.113359487920711, "grad_norm": 0.03934602066874504, "learning_rate": 0.01, "loss": 1.9552, "step": 88272 }, { "epoch": 9.113669213297543, "grad_norm": 0.05431235954165459, "learning_rate": 0.01, "loss": 1.9625, "step": 88275 }, { "epoch": 9.113978938674375, "grad_norm": 0.05585315823554993, "learning_rate": 0.01, "loss": 1.9305, "step": 88278 }, { "epoch": 9.114288664051207, "grad_norm": 0.03399717062711716, "learning_rate": 0.01, "loss": 1.9403, "step": 88281 }, { "epoch": 9.114598389428041, "grad_norm": 0.10088639706373215, "learning_rate": 0.01, "loss": 1.948, "step": 88284 }, { "epoch": 9.114908114804873, "grad_norm": 0.06731191277503967, "learning_rate": 0.01, "loss": 1.9443, "step": 88287 }, { "epoch": 9.115217840181705, "grad_norm": 0.039947111159563065, "learning_rate": 0.01, "loss": 1.9466, "step": 88290 }, { "epoch": 9.115527565558539, "grad_norm": 0.04077018052339554, "learning_rate": 0.01, "loss": 1.9454, "step": 88293 }, { "epoch": 9.11583729093537, "grad_norm": 0.11213209480047226, "learning_rate": 0.01, "loss": 1.9425, "step": 88296 }, { "epoch": 9.116147016312203, "grad_norm": 0.0802016407251358, "learning_rate": 0.01, "loss": 1.942, "step": 88299 }, { "epoch": 9.116456741689035, "grad_norm": 0.06813075393438339, "learning_rate": 0.01, "loss": 1.9713, "step": 88302 }, { "epoch": 9.116766467065869, "grad_norm": 0.11700167506933212, "learning_rate": 0.01, "loss": 1.9588, "step": 88305 }, { "epoch": 9.1170761924427, "grad_norm": 0.07587660849094391, "learning_rate": 0.01, "loss": 1.9218, "step": 88308 }, { "epoch": 9.117385917819533, "grad_norm": 0.062029797583818436, "learning_rate": 0.01, "loss": 1.9338, "step": 88311 }, { "epoch": 9.117695643196367, "grad_norm": 0.11622146517038345, "learning_rate": 0.01, "loss": 1.9476, "step": 88314 }, { "epoch": 9.118005368573199, "grad_norm": 0.04664183780550957, "learning_rate": 0.01, "loss": 1.9337, "step": 88317 }, { "epoch": 9.11831509395003, "grad_norm": 0.10970574617385864, "learning_rate": 0.01, "loss": 1.9599, "step": 88320 }, { "epoch": 9.118624819326863, "grad_norm": 0.05169505998492241, "learning_rate": 0.01, "loss": 1.928, "step": 88323 }, { "epoch": 9.118934544703697, "grad_norm": 0.03827926144003868, "learning_rate": 0.01, "loss": 1.9176, "step": 88326 }, { "epoch": 9.119244270080529, "grad_norm": 0.04646414518356323, "learning_rate": 0.01, "loss": 1.9424, "step": 88329 }, { "epoch": 9.11955399545736, "grad_norm": 0.1304306983947754, "learning_rate": 0.01, "loss": 1.9295, "step": 88332 }, { "epoch": 9.119863720834193, "grad_norm": 0.11228543519973755, "learning_rate": 0.01, "loss": 1.9342, "step": 88335 }, { "epoch": 9.120173446211027, "grad_norm": 0.051064543426036835, "learning_rate": 0.01, "loss": 1.9416, "step": 88338 }, { "epoch": 9.120483171587859, "grad_norm": 0.09616478532552719, "learning_rate": 0.01, "loss": 1.9795, "step": 88341 }, { "epoch": 9.12079289696469, "grad_norm": 0.11419511586427689, "learning_rate": 0.01, "loss": 1.9565, "step": 88344 }, { "epoch": 9.121102622341525, "grad_norm": 0.08378113061189651, "learning_rate": 0.01, "loss": 1.9528, "step": 88347 }, { "epoch": 9.121412347718357, "grad_norm": 0.04137773811817169, "learning_rate": 0.01, "loss": 1.9578, "step": 88350 }, { "epoch": 9.121722073095189, "grad_norm": 0.055357303470373154, "learning_rate": 0.01, "loss": 1.9313, "step": 88353 }, { "epoch": 9.12203179847202, "grad_norm": 0.04019000381231308, "learning_rate": 0.01, "loss": 1.94, "step": 88356 }, { "epoch": 9.122341523848855, "grad_norm": 0.030802376568317413, "learning_rate": 0.01, "loss": 1.9507, "step": 88359 }, { "epoch": 9.122651249225687, "grad_norm": 0.08665869385004044, "learning_rate": 0.01, "loss": 1.9442, "step": 88362 }, { "epoch": 9.122960974602519, "grad_norm": 0.11755997687578201, "learning_rate": 0.01, "loss": 1.9264, "step": 88365 }, { "epoch": 9.123270699979352, "grad_norm": 0.04438506066799164, "learning_rate": 0.01, "loss": 1.9422, "step": 88368 }, { "epoch": 9.123580425356185, "grad_norm": 0.05268723517656326, "learning_rate": 0.01, "loss": 1.949, "step": 88371 }, { "epoch": 9.123890150733017, "grad_norm": 0.042669329792261124, "learning_rate": 0.01, "loss": 1.935, "step": 88374 }, { "epoch": 9.124199876109849, "grad_norm": 0.050564251840114594, "learning_rate": 0.01, "loss": 1.9489, "step": 88377 }, { "epoch": 9.124509601486682, "grad_norm": 0.06595560163259506, "learning_rate": 0.01, "loss": 1.9637, "step": 88380 }, { "epoch": 9.124819326863514, "grad_norm": 0.03814883902668953, "learning_rate": 0.01, "loss": 1.9486, "step": 88383 }, { "epoch": 9.125129052240347, "grad_norm": 0.0743202194571495, "learning_rate": 0.01, "loss": 1.9216, "step": 88386 }, { "epoch": 9.125438777617179, "grad_norm": 0.16788910329341888, "learning_rate": 0.01, "loss": 1.9579, "step": 88389 }, { "epoch": 9.125748502994012, "grad_norm": 0.038132194429636, "learning_rate": 0.01, "loss": 1.9016, "step": 88392 }, { "epoch": 9.126058228370844, "grad_norm": 0.04480666667222977, "learning_rate": 0.01, "loss": 1.9304, "step": 88395 }, { "epoch": 9.126367953747677, "grad_norm": 0.04021817073225975, "learning_rate": 0.01, "loss": 1.9515, "step": 88398 }, { "epoch": 9.12667767912451, "grad_norm": 0.036103565245866776, "learning_rate": 0.01, "loss": 1.9713, "step": 88401 }, { "epoch": 9.126987404501342, "grad_norm": 0.052644163370132446, "learning_rate": 0.01, "loss": 1.9223, "step": 88404 }, { "epoch": 9.127297129878174, "grad_norm": 0.07091756165027618, "learning_rate": 0.01, "loss": 1.9453, "step": 88407 }, { "epoch": 9.127606855255006, "grad_norm": 0.0955536738038063, "learning_rate": 0.01, "loss": 1.945, "step": 88410 }, { "epoch": 9.12791658063184, "grad_norm": 0.13555645942687988, "learning_rate": 0.01, "loss": 1.9736, "step": 88413 }, { "epoch": 9.128226306008672, "grad_norm": 0.038274601101875305, "learning_rate": 0.01, "loss": 1.9596, "step": 88416 }, { "epoch": 9.128536031385504, "grad_norm": 0.04228101298213005, "learning_rate": 0.01, "loss": 1.9386, "step": 88419 }, { "epoch": 9.128845756762338, "grad_norm": 0.03320973366498947, "learning_rate": 0.01, "loss": 1.9605, "step": 88422 }, { "epoch": 9.12915548213917, "grad_norm": 0.04140046238899231, "learning_rate": 0.01, "loss": 1.9531, "step": 88425 }, { "epoch": 9.129465207516002, "grad_norm": 0.04351486638188362, "learning_rate": 0.01, "loss": 1.9437, "step": 88428 }, { "epoch": 9.129774932892834, "grad_norm": 0.17408426105976105, "learning_rate": 0.01, "loss": 1.9295, "step": 88431 }, { "epoch": 9.130084658269668, "grad_norm": 0.08010722696781158, "learning_rate": 0.01, "loss": 1.9354, "step": 88434 }, { "epoch": 9.1303943836465, "grad_norm": 0.06910273432731628, "learning_rate": 0.01, "loss": 1.9634, "step": 88437 }, { "epoch": 9.130704109023332, "grad_norm": 0.03804326429963112, "learning_rate": 0.01, "loss": 1.9711, "step": 88440 }, { "epoch": 9.131013834400164, "grad_norm": 0.039677515625953674, "learning_rate": 0.01, "loss": 1.9466, "step": 88443 }, { "epoch": 9.131323559776998, "grad_norm": 0.047551997005939484, "learning_rate": 0.01, "loss": 1.9414, "step": 88446 }, { "epoch": 9.13163328515383, "grad_norm": 0.05167323350906372, "learning_rate": 0.01, "loss": 1.9431, "step": 88449 }, { "epoch": 9.131943010530662, "grad_norm": 0.1845468133687973, "learning_rate": 0.01, "loss": 1.9343, "step": 88452 }, { "epoch": 9.132252735907496, "grad_norm": 0.1411765217781067, "learning_rate": 0.01, "loss": 1.9222, "step": 88455 }, { "epoch": 9.132562461284328, "grad_norm": 0.07709158211946487, "learning_rate": 0.01, "loss": 1.9478, "step": 88458 }, { "epoch": 9.13287218666116, "grad_norm": 0.06372318416833878, "learning_rate": 0.01, "loss": 1.9543, "step": 88461 }, { "epoch": 9.133181912037992, "grad_norm": 0.040460873395204544, "learning_rate": 0.01, "loss": 1.9602, "step": 88464 }, { "epoch": 9.133491637414826, "grad_norm": 0.055337656289339066, "learning_rate": 0.01, "loss": 1.9218, "step": 88467 }, { "epoch": 9.133801362791658, "grad_norm": 0.03703693300485611, "learning_rate": 0.01, "loss": 1.9158, "step": 88470 }, { "epoch": 9.13411108816849, "grad_norm": 0.05583380162715912, "learning_rate": 0.01, "loss": 1.9489, "step": 88473 }, { "epoch": 9.134420813545324, "grad_norm": 0.05320487171411514, "learning_rate": 0.01, "loss": 1.9518, "step": 88476 }, { "epoch": 9.134730538922156, "grad_norm": 0.0624091736972332, "learning_rate": 0.01, "loss": 1.9425, "step": 88479 }, { "epoch": 9.135040264298988, "grad_norm": 0.14112161099910736, "learning_rate": 0.01, "loss": 1.942, "step": 88482 }, { "epoch": 9.13534998967582, "grad_norm": 0.08291780948638916, "learning_rate": 0.01, "loss": 1.944, "step": 88485 }, { "epoch": 9.135659715052654, "grad_norm": 0.13060036301612854, "learning_rate": 0.01, "loss": 1.9418, "step": 88488 }, { "epoch": 9.135969440429486, "grad_norm": 0.08995925635099411, "learning_rate": 0.01, "loss": 1.9438, "step": 88491 }, { "epoch": 9.136279165806318, "grad_norm": 0.0542483888566494, "learning_rate": 0.01, "loss": 1.9464, "step": 88494 }, { "epoch": 9.13658889118315, "grad_norm": 0.03333907201886177, "learning_rate": 0.01, "loss": 1.9394, "step": 88497 }, { "epoch": 9.136898616559984, "grad_norm": 0.04014088585972786, "learning_rate": 0.01, "loss": 1.9679, "step": 88500 }, { "epoch": 9.137208341936816, "grad_norm": 0.044594038277864456, "learning_rate": 0.01, "loss": 1.9424, "step": 88503 }, { "epoch": 9.137518067313648, "grad_norm": 0.06447494775056839, "learning_rate": 0.01, "loss": 1.9606, "step": 88506 }, { "epoch": 9.137827792690482, "grad_norm": 0.050712186843156815, "learning_rate": 0.01, "loss": 1.9385, "step": 88509 }, { "epoch": 9.138137518067314, "grad_norm": 0.06224539503455162, "learning_rate": 0.01, "loss": 1.9257, "step": 88512 }, { "epoch": 9.138447243444146, "grad_norm": 0.17767208814620972, "learning_rate": 0.01, "loss": 1.9412, "step": 88515 }, { "epoch": 9.138756968820978, "grad_norm": 0.06980250775814056, "learning_rate": 0.01, "loss": 1.9579, "step": 88518 }, { "epoch": 9.139066694197812, "grad_norm": 0.03640732541680336, "learning_rate": 0.01, "loss": 1.9585, "step": 88521 }, { "epoch": 9.139376419574644, "grad_norm": 0.03801807761192322, "learning_rate": 0.01, "loss": 1.9253, "step": 88524 }, { "epoch": 9.139686144951476, "grad_norm": 0.03328856825828552, "learning_rate": 0.01, "loss": 1.9368, "step": 88527 }, { "epoch": 9.13999587032831, "grad_norm": 0.07712339609861374, "learning_rate": 0.01, "loss": 1.9462, "step": 88530 }, { "epoch": 9.140305595705142, "grad_norm": 0.08324889093637466, "learning_rate": 0.01, "loss": 1.9418, "step": 88533 }, { "epoch": 9.140615321081974, "grad_norm": 0.0427752248942852, "learning_rate": 0.01, "loss": 1.9329, "step": 88536 }, { "epoch": 9.140925046458806, "grad_norm": 0.07253149896860123, "learning_rate": 0.01, "loss": 1.9562, "step": 88539 }, { "epoch": 9.14123477183564, "grad_norm": 0.0548226498067379, "learning_rate": 0.01, "loss": 1.9433, "step": 88542 }, { "epoch": 9.141544497212472, "grad_norm": 0.059894196689128876, "learning_rate": 0.01, "loss": 1.9495, "step": 88545 }, { "epoch": 9.141854222589304, "grad_norm": 0.08382166177034378, "learning_rate": 0.01, "loss": 1.9485, "step": 88548 }, { "epoch": 9.142163947966136, "grad_norm": 0.13770000636577606, "learning_rate": 0.01, "loss": 1.9371, "step": 88551 }, { "epoch": 9.14247367334297, "grad_norm": 0.14796362817287445, "learning_rate": 0.01, "loss": 1.9449, "step": 88554 }, { "epoch": 9.142783398719802, "grad_norm": 0.12419676780700684, "learning_rate": 0.01, "loss": 1.9347, "step": 88557 }, { "epoch": 9.143093124096634, "grad_norm": 0.04314526543021202, "learning_rate": 0.01, "loss": 1.9612, "step": 88560 }, { "epoch": 9.143402849473468, "grad_norm": 0.056149184703826904, "learning_rate": 0.01, "loss": 1.9363, "step": 88563 }, { "epoch": 9.1437125748503, "grad_norm": 0.04276338964700699, "learning_rate": 0.01, "loss": 1.9554, "step": 88566 }, { "epoch": 9.144022300227132, "grad_norm": 0.035918641835451126, "learning_rate": 0.01, "loss": 1.9454, "step": 88569 }, { "epoch": 9.144332025603964, "grad_norm": 0.037139903753995895, "learning_rate": 0.01, "loss": 1.9438, "step": 88572 }, { "epoch": 9.144641750980798, "grad_norm": 0.11460118740797043, "learning_rate": 0.01, "loss": 1.9235, "step": 88575 }, { "epoch": 9.14495147635763, "grad_norm": 0.04918679967522621, "learning_rate": 0.01, "loss": 1.9568, "step": 88578 }, { "epoch": 9.145261201734462, "grad_norm": 0.03683009743690491, "learning_rate": 0.01, "loss": 1.9347, "step": 88581 }, { "epoch": 9.145570927111295, "grad_norm": 0.06100183352828026, "learning_rate": 0.01, "loss": 1.97, "step": 88584 }, { "epoch": 9.145880652488128, "grad_norm": 0.07831919193267822, "learning_rate": 0.01, "loss": 1.9213, "step": 88587 }, { "epoch": 9.14619037786496, "grad_norm": 0.04614289477467537, "learning_rate": 0.01, "loss": 1.9243, "step": 88590 }, { "epoch": 9.146500103241792, "grad_norm": 0.12135063856840134, "learning_rate": 0.01, "loss": 1.9268, "step": 88593 }, { "epoch": 9.146809828618625, "grad_norm": 0.048958707600831985, "learning_rate": 0.01, "loss": 1.9566, "step": 88596 }, { "epoch": 9.147119553995458, "grad_norm": 0.04851505160331726, "learning_rate": 0.01, "loss": 1.9407, "step": 88599 }, { "epoch": 9.14742927937229, "grad_norm": 0.1740291863679886, "learning_rate": 0.01, "loss": 1.959, "step": 88602 }, { "epoch": 9.147739004749122, "grad_norm": 0.10942324995994568, "learning_rate": 0.01, "loss": 1.9176, "step": 88605 }, { "epoch": 9.148048730125955, "grad_norm": 0.04716290906071663, "learning_rate": 0.01, "loss": 1.9443, "step": 88608 }, { "epoch": 9.148358455502787, "grad_norm": 0.03627236187458038, "learning_rate": 0.01, "loss": 1.952, "step": 88611 }, { "epoch": 9.14866818087962, "grad_norm": 0.04512391239404678, "learning_rate": 0.01, "loss": 1.9343, "step": 88614 }, { "epoch": 9.148977906256453, "grad_norm": 0.055295899510383606, "learning_rate": 0.01, "loss": 1.9518, "step": 88617 }, { "epoch": 9.149287631633285, "grad_norm": 0.03937353938817978, "learning_rate": 0.01, "loss": 1.9306, "step": 88620 }, { "epoch": 9.149597357010117, "grad_norm": 0.057175163179636, "learning_rate": 0.01, "loss": 1.9578, "step": 88623 }, { "epoch": 9.14990708238695, "grad_norm": 0.03652303293347359, "learning_rate": 0.01, "loss": 1.9424, "step": 88626 }, { "epoch": 9.150216807763783, "grad_norm": 0.05022728443145752, "learning_rate": 0.01, "loss": 1.9493, "step": 88629 }, { "epoch": 9.150526533140615, "grad_norm": 0.04779180511832237, "learning_rate": 0.01, "loss": 1.9443, "step": 88632 }, { "epoch": 9.150836258517447, "grad_norm": 0.03462984785437584, "learning_rate": 0.01, "loss": 1.9338, "step": 88635 }, { "epoch": 9.151145983894281, "grad_norm": 0.15472935140132904, "learning_rate": 0.01, "loss": 1.9289, "step": 88638 }, { "epoch": 9.151455709271113, "grad_norm": 0.10227237641811371, "learning_rate": 0.01, "loss": 1.9467, "step": 88641 }, { "epoch": 9.151765434647945, "grad_norm": 0.05621829628944397, "learning_rate": 0.01, "loss": 1.9373, "step": 88644 }, { "epoch": 9.152075160024777, "grad_norm": 0.048698339611291885, "learning_rate": 0.01, "loss": 1.9311, "step": 88647 }, { "epoch": 9.152384885401611, "grad_norm": 0.04971965774893761, "learning_rate": 0.01, "loss": 1.9276, "step": 88650 }, { "epoch": 9.152694610778443, "grad_norm": 0.03834971413016319, "learning_rate": 0.01, "loss": 1.9388, "step": 88653 }, { "epoch": 9.153004336155275, "grad_norm": 0.03815067186951637, "learning_rate": 0.01, "loss": 1.9297, "step": 88656 }, { "epoch": 9.153314061532107, "grad_norm": 0.04239659011363983, "learning_rate": 0.01, "loss": 1.9513, "step": 88659 }, { "epoch": 9.153623786908941, "grad_norm": 0.04531934857368469, "learning_rate": 0.01, "loss": 1.9303, "step": 88662 }, { "epoch": 9.153933512285773, "grad_norm": 0.06465330719947815, "learning_rate": 0.01, "loss": 1.936, "step": 88665 }, { "epoch": 9.154243237662605, "grad_norm": 0.06051948666572571, "learning_rate": 0.01, "loss": 1.9365, "step": 88668 }, { "epoch": 9.154552963039439, "grad_norm": 0.06463321298360825, "learning_rate": 0.01, "loss": 1.9573, "step": 88671 }, { "epoch": 9.154862688416271, "grad_norm": 0.06618782877922058, "learning_rate": 0.01, "loss": 1.9528, "step": 88674 }, { "epoch": 9.155172413793103, "grad_norm": 0.1678209751844406, "learning_rate": 0.01, "loss": 1.95, "step": 88677 }, { "epoch": 9.155482139169935, "grad_norm": 0.1151156947016716, "learning_rate": 0.01, "loss": 1.947, "step": 88680 }, { "epoch": 9.155791864546769, "grad_norm": 0.04810039699077606, "learning_rate": 0.01, "loss": 1.9282, "step": 88683 }, { "epoch": 9.156101589923601, "grad_norm": 0.0373799167573452, "learning_rate": 0.01, "loss": 1.9355, "step": 88686 }, { "epoch": 9.156411315300433, "grad_norm": 0.03355636075139046, "learning_rate": 0.01, "loss": 1.9303, "step": 88689 }, { "epoch": 9.156721040677267, "grad_norm": 0.03397734463214874, "learning_rate": 0.01, "loss": 1.9223, "step": 88692 }, { "epoch": 9.157030766054099, "grad_norm": 0.07095854729413986, "learning_rate": 0.01, "loss": 1.9378, "step": 88695 }, { "epoch": 9.157340491430931, "grad_norm": 0.05653863027691841, "learning_rate": 0.01, "loss": 1.9604, "step": 88698 }, { "epoch": 9.157650216807763, "grad_norm": 0.10083531588315964, "learning_rate": 0.01, "loss": 1.9495, "step": 88701 }, { "epoch": 9.157959942184597, "grad_norm": 0.07726927101612091, "learning_rate": 0.01, "loss": 1.9243, "step": 88704 }, { "epoch": 9.158269667561429, "grad_norm": 0.07324657589197159, "learning_rate": 0.01, "loss": 1.94, "step": 88707 }, { "epoch": 9.158579392938261, "grad_norm": 0.07510195672512054, "learning_rate": 0.01, "loss": 1.9321, "step": 88710 }, { "epoch": 9.158889118315093, "grad_norm": 0.043642688542604446, "learning_rate": 0.01, "loss": 1.9454, "step": 88713 }, { "epoch": 9.159198843691927, "grad_norm": 0.10319583117961884, "learning_rate": 0.01, "loss": 1.9626, "step": 88716 }, { "epoch": 9.159508569068759, "grad_norm": 0.039388563483953476, "learning_rate": 0.01, "loss": 1.9375, "step": 88719 }, { "epoch": 9.159818294445591, "grad_norm": 0.12249758839607239, "learning_rate": 0.01, "loss": 1.9599, "step": 88722 }, { "epoch": 9.160128019822425, "grad_norm": 0.11489095538854599, "learning_rate": 0.01, "loss": 1.9572, "step": 88725 }, { "epoch": 9.160437745199257, "grad_norm": 0.04785621538758278, "learning_rate": 0.01, "loss": 1.9471, "step": 88728 }, { "epoch": 9.160747470576089, "grad_norm": 0.06265294551849365, "learning_rate": 0.01, "loss": 1.9427, "step": 88731 }, { "epoch": 9.161057195952921, "grad_norm": 0.06334743648767471, "learning_rate": 0.01, "loss": 1.9207, "step": 88734 }, { "epoch": 9.161366921329755, "grad_norm": 0.08872868865728378, "learning_rate": 0.01, "loss": 1.9422, "step": 88737 }, { "epoch": 9.161676646706587, "grad_norm": 0.052145641297101974, "learning_rate": 0.01, "loss": 1.9538, "step": 88740 }, { "epoch": 9.161986372083419, "grad_norm": 0.07859448343515396, "learning_rate": 0.01, "loss": 1.9166, "step": 88743 }, { "epoch": 9.162296097460253, "grad_norm": 0.0986669659614563, "learning_rate": 0.01, "loss": 1.9532, "step": 88746 }, { "epoch": 9.162605822837085, "grad_norm": 0.05713833123445511, "learning_rate": 0.01, "loss": 1.9315, "step": 88749 }, { "epoch": 9.162915548213917, "grad_norm": 0.07410735636949539, "learning_rate": 0.01, "loss": 1.9496, "step": 88752 }, { "epoch": 9.163225273590749, "grad_norm": 0.07402617484331131, "learning_rate": 0.01, "loss": 1.9575, "step": 88755 }, { "epoch": 9.163534998967583, "grad_norm": 0.05839969217777252, "learning_rate": 0.01, "loss": 1.9446, "step": 88758 }, { "epoch": 9.163844724344415, "grad_norm": 0.10944623500108719, "learning_rate": 0.01, "loss": 1.9804, "step": 88761 }, { "epoch": 9.164154449721247, "grad_norm": 0.13309495151042938, "learning_rate": 0.01, "loss": 1.9507, "step": 88764 }, { "epoch": 9.164464175098079, "grad_norm": 0.0856231153011322, "learning_rate": 0.01, "loss": 1.9433, "step": 88767 }, { "epoch": 9.164773900474913, "grad_norm": 0.03998791053891182, "learning_rate": 0.01, "loss": 1.91, "step": 88770 }, { "epoch": 9.165083625851745, "grad_norm": 0.12182749807834625, "learning_rate": 0.01, "loss": 1.9362, "step": 88773 }, { "epoch": 9.165393351228577, "grad_norm": 0.0821884498000145, "learning_rate": 0.01, "loss": 1.948, "step": 88776 }, { "epoch": 9.16570307660541, "grad_norm": 0.08010543137788773, "learning_rate": 0.01, "loss": 1.9273, "step": 88779 }, { "epoch": 9.166012801982243, "grad_norm": 0.04775281995534897, "learning_rate": 0.01, "loss": 1.9566, "step": 88782 }, { "epoch": 9.166322527359075, "grad_norm": 0.04545445367693901, "learning_rate": 0.01, "loss": 1.9187, "step": 88785 }, { "epoch": 9.166632252735907, "grad_norm": 0.06078621745109558, "learning_rate": 0.01, "loss": 1.958, "step": 88788 }, { "epoch": 9.16694197811274, "grad_norm": 0.062157388776540756, "learning_rate": 0.01, "loss": 1.9389, "step": 88791 }, { "epoch": 9.167251703489573, "grad_norm": 0.039783451706171036, "learning_rate": 0.01, "loss": 1.9214, "step": 88794 }, { "epoch": 9.167561428866405, "grad_norm": 0.042033612728118896, "learning_rate": 0.01, "loss": 1.9257, "step": 88797 }, { "epoch": 9.167871154243239, "grad_norm": 0.08056508004665375, "learning_rate": 0.01, "loss": 1.9795, "step": 88800 }, { "epoch": 9.16818087962007, "grad_norm": 0.05128775164484978, "learning_rate": 0.01, "loss": 1.9314, "step": 88803 }, { "epoch": 9.168490604996903, "grad_norm": 0.057393159717321396, "learning_rate": 0.01, "loss": 1.9487, "step": 88806 }, { "epoch": 9.168800330373735, "grad_norm": 0.03829537332057953, "learning_rate": 0.01, "loss": 1.9277, "step": 88809 }, { "epoch": 9.169110055750568, "grad_norm": 0.045484233647584915, "learning_rate": 0.01, "loss": 1.9384, "step": 88812 }, { "epoch": 9.1694197811274, "grad_norm": 0.05106499791145325, "learning_rate": 0.01, "loss": 1.9297, "step": 88815 }, { "epoch": 9.169729506504233, "grad_norm": 0.060923442244529724, "learning_rate": 0.01, "loss": 1.9371, "step": 88818 }, { "epoch": 9.170039231881065, "grad_norm": 0.1129612997174263, "learning_rate": 0.01, "loss": 1.9377, "step": 88821 }, { "epoch": 9.170348957257898, "grad_norm": 0.053068701177835464, "learning_rate": 0.01, "loss": 1.9335, "step": 88824 }, { "epoch": 9.17065868263473, "grad_norm": 0.12930330634117126, "learning_rate": 0.01, "loss": 1.9342, "step": 88827 }, { "epoch": 9.170968408011563, "grad_norm": 0.04084591194987297, "learning_rate": 0.01, "loss": 1.9562, "step": 88830 }, { "epoch": 9.171278133388396, "grad_norm": 0.039442602545022964, "learning_rate": 0.01, "loss": 1.9235, "step": 88833 }, { "epoch": 9.171587858765228, "grad_norm": 0.04807281494140625, "learning_rate": 0.01, "loss": 1.9378, "step": 88836 }, { "epoch": 9.17189758414206, "grad_norm": 0.049598827958106995, "learning_rate": 0.01, "loss": 1.9324, "step": 88839 }, { "epoch": 9.172207309518893, "grad_norm": 0.11387526988983154, "learning_rate": 0.01, "loss": 1.9428, "step": 88842 }, { "epoch": 9.172517034895726, "grad_norm": 0.08218611776828766, "learning_rate": 0.01, "loss": 1.9542, "step": 88845 }, { "epoch": 9.172826760272558, "grad_norm": 0.0520508848130703, "learning_rate": 0.01, "loss": 1.9387, "step": 88848 }, { "epoch": 9.17313648564939, "grad_norm": 0.040659550577402115, "learning_rate": 0.01, "loss": 1.9376, "step": 88851 }, { "epoch": 9.173446211026224, "grad_norm": 0.07411307096481323, "learning_rate": 0.01, "loss": 1.9467, "step": 88854 }, { "epoch": 9.173755936403056, "grad_norm": 0.03607996925711632, "learning_rate": 0.01, "loss": 1.953, "step": 88857 }, { "epoch": 9.174065661779888, "grad_norm": 0.04786485806107521, "learning_rate": 0.01, "loss": 1.9278, "step": 88860 }, { "epoch": 9.17437538715672, "grad_norm": 0.0968308299779892, "learning_rate": 0.01, "loss": 1.9433, "step": 88863 }, { "epoch": 9.174685112533554, "grad_norm": 0.09454333037137985, "learning_rate": 0.01, "loss": 1.9639, "step": 88866 }, { "epoch": 9.174994837910386, "grad_norm": 0.08687274158000946, "learning_rate": 0.01, "loss": 1.9517, "step": 88869 }, { "epoch": 9.175304563287218, "grad_norm": 0.03763791546225548, "learning_rate": 0.01, "loss": 1.9294, "step": 88872 }, { "epoch": 9.17561428866405, "grad_norm": 0.04863421246409416, "learning_rate": 0.01, "loss": 1.9439, "step": 88875 }, { "epoch": 9.175924014040884, "grad_norm": 0.07117434591054916, "learning_rate": 0.01, "loss": 1.9207, "step": 88878 }, { "epoch": 9.176233739417716, "grad_norm": 0.05109839886426926, "learning_rate": 0.01, "loss": 1.9123, "step": 88881 }, { "epoch": 9.176543464794548, "grad_norm": 0.05009722337126732, "learning_rate": 0.01, "loss": 1.9671, "step": 88884 }, { "epoch": 9.176853190171382, "grad_norm": 0.04079516977071762, "learning_rate": 0.01, "loss": 1.9411, "step": 88887 }, { "epoch": 9.177162915548214, "grad_norm": 0.04275686293840408, "learning_rate": 0.01, "loss": 1.9592, "step": 88890 }, { "epoch": 9.177472640925046, "grad_norm": 0.04627576842904091, "learning_rate": 0.01, "loss": 1.9594, "step": 88893 }, { "epoch": 9.177782366301878, "grad_norm": 0.05262031778693199, "learning_rate": 0.01, "loss": 1.9426, "step": 88896 }, { "epoch": 9.178092091678712, "grad_norm": 0.10498153418302536, "learning_rate": 0.01, "loss": 1.9656, "step": 88899 }, { "epoch": 9.178401817055544, "grad_norm": 0.08968192338943481, "learning_rate": 0.01, "loss": 1.9143, "step": 88902 }, { "epoch": 9.178711542432376, "grad_norm": 0.06271538883447647, "learning_rate": 0.01, "loss": 1.9183, "step": 88905 }, { "epoch": 9.17902126780921, "grad_norm": 0.07553781569004059, "learning_rate": 0.01, "loss": 1.9155, "step": 88908 }, { "epoch": 9.179330993186042, "grad_norm": 0.050929512828588486, "learning_rate": 0.01, "loss": 1.9446, "step": 88911 }, { "epoch": 9.179640718562874, "grad_norm": 0.06154989078640938, "learning_rate": 0.01, "loss": 1.9529, "step": 88914 }, { "epoch": 9.179950443939706, "grad_norm": 0.0712541937828064, "learning_rate": 0.01, "loss": 1.9486, "step": 88917 }, { "epoch": 9.18026016931654, "grad_norm": 0.06601454317569733, "learning_rate": 0.01, "loss": 1.9566, "step": 88920 }, { "epoch": 9.180569894693372, "grad_norm": 0.105747289955616, "learning_rate": 0.01, "loss": 1.9302, "step": 88923 }, { "epoch": 9.180879620070204, "grad_norm": 0.06339000165462494, "learning_rate": 0.01, "loss": 1.9602, "step": 88926 }, { "epoch": 9.181189345447036, "grad_norm": 0.0613056980073452, "learning_rate": 0.01, "loss": 1.9438, "step": 88929 }, { "epoch": 9.18149907082387, "grad_norm": 0.09535111486911774, "learning_rate": 0.01, "loss": 1.9629, "step": 88932 }, { "epoch": 9.181808796200702, "grad_norm": 0.05504938215017319, "learning_rate": 0.01, "loss": 1.9639, "step": 88935 }, { "epoch": 9.182118521577534, "grad_norm": 0.04098088666796684, "learning_rate": 0.01, "loss": 1.9596, "step": 88938 }, { "epoch": 9.182428246954368, "grad_norm": 0.15497301518917084, "learning_rate": 0.01, "loss": 1.9365, "step": 88941 }, { "epoch": 9.1827379723312, "grad_norm": 0.08422484993934631, "learning_rate": 0.01, "loss": 1.9599, "step": 88944 }, { "epoch": 9.183047697708032, "grad_norm": 0.07801347225904465, "learning_rate": 0.01, "loss": 1.9502, "step": 88947 }, { "epoch": 9.183357423084864, "grad_norm": 0.07705824077129364, "learning_rate": 0.01, "loss": 1.9763, "step": 88950 }, { "epoch": 9.183667148461698, "grad_norm": 0.13578467071056366, "learning_rate": 0.01, "loss": 1.9309, "step": 88953 }, { "epoch": 9.18397687383853, "grad_norm": 0.1082862839102745, "learning_rate": 0.01, "loss": 1.9447, "step": 88956 }, { "epoch": 9.184286599215362, "grad_norm": 0.08407740294933319, "learning_rate": 0.01, "loss": 1.9284, "step": 88959 }, { "epoch": 9.184596324592196, "grad_norm": 0.12279217690229416, "learning_rate": 0.01, "loss": 1.9392, "step": 88962 }, { "epoch": 9.184906049969028, "grad_norm": 0.14694684743881226, "learning_rate": 0.01, "loss": 1.9279, "step": 88965 }, { "epoch": 9.18521577534586, "grad_norm": 0.07982742041349411, "learning_rate": 0.01, "loss": 1.951, "step": 88968 }, { "epoch": 9.185525500722692, "grad_norm": 0.08249851316213608, "learning_rate": 0.01, "loss": 1.9551, "step": 88971 }, { "epoch": 9.185835226099526, "grad_norm": 0.04720144718885422, "learning_rate": 0.01, "loss": 1.949, "step": 88974 }, { "epoch": 9.186144951476358, "grad_norm": 0.09151851385831833, "learning_rate": 0.01, "loss": 1.932, "step": 88977 }, { "epoch": 9.18645467685319, "grad_norm": 0.07975881546735764, "learning_rate": 0.01, "loss": 1.9182, "step": 88980 }, { "epoch": 9.186764402230022, "grad_norm": 0.05716300383210182, "learning_rate": 0.01, "loss": 1.9394, "step": 88983 }, { "epoch": 9.187074127606856, "grad_norm": 0.050935614854097366, "learning_rate": 0.01, "loss": 1.9472, "step": 88986 }, { "epoch": 9.187383852983688, "grad_norm": 0.05949466675519943, "learning_rate": 0.01, "loss": 1.9341, "step": 88989 }, { "epoch": 9.18769357836052, "grad_norm": 0.10654468089342117, "learning_rate": 0.01, "loss": 1.9507, "step": 88992 }, { "epoch": 9.188003303737354, "grad_norm": 0.055688001215457916, "learning_rate": 0.01, "loss": 1.9152, "step": 88995 }, { "epoch": 9.188313029114186, "grad_norm": 0.04441916570067406, "learning_rate": 0.01, "loss": 1.9386, "step": 88998 }, { "epoch": 9.188622754491018, "grad_norm": 0.18019713461399078, "learning_rate": 0.01, "loss": 1.9646, "step": 89001 }, { "epoch": 9.18893247986785, "grad_norm": 0.10449805855751038, "learning_rate": 0.01, "loss": 1.9306, "step": 89004 }, { "epoch": 9.189242205244684, "grad_norm": 0.09775479882955551, "learning_rate": 0.01, "loss": 1.9336, "step": 89007 }, { "epoch": 9.189551930621516, "grad_norm": 0.05437876284122467, "learning_rate": 0.01, "loss": 1.9276, "step": 89010 }, { "epoch": 9.189861655998348, "grad_norm": 0.05880746990442276, "learning_rate": 0.01, "loss": 1.9305, "step": 89013 }, { "epoch": 9.190171381375182, "grad_norm": 0.03767773509025574, "learning_rate": 0.01, "loss": 1.9246, "step": 89016 }, { "epoch": 9.190481106752014, "grad_norm": 0.04172288253903389, "learning_rate": 0.01, "loss": 1.9348, "step": 89019 }, { "epoch": 9.190790832128846, "grad_norm": 0.045086201280355453, "learning_rate": 0.01, "loss": 1.9474, "step": 89022 }, { "epoch": 9.191100557505678, "grad_norm": 0.04238392040133476, "learning_rate": 0.01, "loss": 1.9323, "step": 89025 }, { "epoch": 9.191410282882511, "grad_norm": 0.08289175480604172, "learning_rate": 0.01, "loss": 1.9311, "step": 89028 }, { "epoch": 9.191720008259344, "grad_norm": 0.054400261491537094, "learning_rate": 0.01, "loss": 1.9471, "step": 89031 }, { "epoch": 9.192029733636176, "grad_norm": 0.05191842466592789, "learning_rate": 0.01, "loss": 1.9327, "step": 89034 }, { "epoch": 9.192339459013008, "grad_norm": 0.048883236944675446, "learning_rate": 0.01, "loss": 1.9484, "step": 89037 }, { "epoch": 9.192649184389841, "grad_norm": 0.06198425218462944, "learning_rate": 0.01, "loss": 1.9242, "step": 89040 }, { "epoch": 9.192958909766674, "grad_norm": 0.04245370998978615, "learning_rate": 0.01, "loss": 1.9193, "step": 89043 }, { "epoch": 9.193268635143506, "grad_norm": 0.04683458060026169, "learning_rate": 0.01, "loss": 1.9304, "step": 89046 }, { "epoch": 9.19357836052034, "grad_norm": 0.04018238186836243, "learning_rate": 0.01, "loss": 1.9243, "step": 89049 }, { "epoch": 9.193888085897171, "grad_norm": 0.08557484298944473, "learning_rate": 0.01, "loss": 1.9485, "step": 89052 }, { "epoch": 9.194197811274003, "grad_norm": 0.1037377119064331, "learning_rate": 0.01, "loss": 1.9341, "step": 89055 }, { "epoch": 9.194507536650836, "grad_norm": 0.05675356835126877, "learning_rate": 0.01, "loss": 1.9428, "step": 89058 }, { "epoch": 9.19481726202767, "grad_norm": 0.05811937898397446, "learning_rate": 0.01, "loss": 1.9532, "step": 89061 }, { "epoch": 9.195126987404501, "grad_norm": 0.11276617646217346, "learning_rate": 0.01, "loss": 1.928, "step": 89064 }, { "epoch": 9.195436712781333, "grad_norm": 0.06749601662158966, "learning_rate": 0.01, "loss": 1.9519, "step": 89067 }, { "epoch": 9.195746438158167, "grad_norm": 0.0429714135825634, "learning_rate": 0.01, "loss": 1.9013, "step": 89070 }, { "epoch": 9.196056163535, "grad_norm": 0.03318256884813309, "learning_rate": 0.01, "loss": 1.9449, "step": 89073 }, { "epoch": 9.196365888911831, "grad_norm": 0.04414292052388191, "learning_rate": 0.01, "loss": 1.9485, "step": 89076 }, { "epoch": 9.196675614288663, "grad_norm": 0.08458261936903, "learning_rate": 0.01, "loss": 1.9363, "step": 89079 }, { "epoch": 9.196985339665497, "grad_norm": 0.04780903086066246, "learning_rate": 0.01, "loss": 1.9333, "step": 89082 }, { "epoch": 9.19729506504233, "grad_norm": 0.04293385520577431, "learning_rate": 0.01, "loss": 1.9272, "step": 89085 }, { "epoch": 9.197604790419161, "grad_norm": 0.04496946558356285, "learning_rate": 0.01, "loss": 1.9312, "step": 89088 }, { "epoch": 9.197914515795993, "grad_norm": 0.0704391747713089, "learning_rate": 0.01, "loss": 1.951, "step": 89091 }, { "epoch": 9.198224241172827, "grad_norm": 0.09724891930818558, "learning_rate": 0.01, "loss": 1.9567, "step": 89094 }, { "epoch": 9.19853396654966, "grad_norm": 0.050086576491594315, "learning_rate": 0.01, "loss": 1.9573, "step": 89097 }, { "epoch": 9.198843691926491, "grad_norm": 0.037937577813863754, "learning_rate": 0.01, "loss": 1.9794, "step": 89100 }, { "epoch": 9.199153417303325, "grad_norm": 0.06578069925308228, "learning_rate": 0.01, "loss": 1.9304, "step": 89103 }, { "epoch": 9.199463142680157, "grad_norm": 0.13135863840579987, "learning_rate": 0.01, "loss": 1.9536, "step": 89106 }, { "epoch": 9.19977286805699, "grad_norm": 0.0574171282351017, "learning_rate": 0.01, "loss": 1.9509, "step": 89109 }, { "epoch": 9.200082593433821, "grad_norm": 0.044688817113637924, "learning_rate": 0.01, "loss": 1.9568, "step": 89112 }, { "epoch": 9.200392318810655, "grad_norm": 0.033014316111803055, "learning_rate": 0.01, "loss": 1.9557, "step": 89115 }, { "epoch": 9.200702044187487, "grad_norm": 0.1093095913529396, "learning_rate": 0.01, "loss": 1.9447, "step": 89118 }, { "epoch": 9.20101176956432, "grad_norm": 0.04808812960982323, "learning_rate": 0.01, "loss": 1.9289, "step": 89121 }, { "epoch": 9.201321494941153, "grad_norm": 0.11078277975320816, "learning_rate": 0.01, "loss": 1.9789, "step": 89124 }, { "epoch": 9.201631220317985, "grad_norm": 0.0814390480518341, "learning_rate": 0.01, "loss": 1.9106, "step": 89127 }, { "epoch": 9.201940945694817, "grad_norm": 0.058532990515232086, "learning_rate": 0.01, "loss": 1.9525, "step": 89130 }, { "epoch": 9.20225067107165, "grad_norm": 0.03977327421307564, "learning_rate": 0.01, "loss": 1.9241, "step": 89133 }, { "epoch": 9.202560396448483, "grad_norm": 0.04448265582323074, "learning_rate": 0.01, "loss": 1.9624, "step": 89136 }, { "epoch": 9.202870121825315, "grad_norm": 0.07655752450227737, "learning_rate": 0.01, "loss": 1.9522, "step": 89139 }, { "epoch": 9.203179847202147, "grad_norm": 0.04212338849902153, "learning_rate": 0.01, "loss": 1.9437, "step": 89142 }, { "epoch": 9.20348957257898, "grad_norm": 0.1010117158293724, "learning_rate": 0.01, "loss": 1.9395, "step": 89145 }, { "epoch": 9.203799297955813, "grad_norm": 0.061698488891124725, "learning_rate": 0.01, "loss": 1.9457, "step": 89148 }, { "epoch": 9.204109023332645, "grad_norm": 0.09103187173604965, "learning_rate": 0.01, "loss": 1.9361, "step": 89151 }, { "epoch": 9.204418748709477, "grad_norm": 0.04762488976120949, "learning_rate": 0.01, "loss": 1.952, "step": 89154 }, { "epoch": 9.204728474086311, "grad_norm": 0.11716189235448837, "learning_rate": 0.01, "loss": 1.9398, "step": 89157 }, { "epoch": 9.205038199463143, "grad_norm": 0.05731340870261192, "learning_rate": 0.01, "loss": 1.9118, "step": 89160 }, { "epoch": 9.205347924839975, "grad_norm": 0.07926695793867111, "learning_rate": 0.01, "loss": 1.9215, "step": 89163 }, { "epoch": 9.205657650216807, "grad_norm": 0.1536962240934372, "learning_rate": 0.01, "loss": 1.9634, "step": 89166 }, { "epoch": 9.20596737559364, "grad_norm": 0.09955775737762451, "learning_rate": 0.01, "loss": 1.9281, "step": 89169 }, { "epoch": 9.206277100970473, "grad_norm": 0.07915548235177994, "learning_rate": 0.01, "loss": 1.9194, "step": 89172 }, { "epoch": 9.206586826347305, "grad_norm": 0.1031462550163269, "learning_rate": 0.01, "loss": 1.9428, "step": 89175 }, { "epoch": 9.206896551724139, "grad_norm": 0.05071650445461273, "learning_rate": 0.01, "loss": 1.9392, "step": 89178 }, { "epoch": 9.20720627710097, "grad_norm": 0.03473865985870361, "learning_rate": 0.01, "loss": 1.9247, "step": 89181 }, { "epoch": 9.207516002477803, "grad_norm": 0.03383462131023407, "learning_rate": 0.01, "loss": 1.9314, "step": 89184 }, { "epoch": 9.207825727854635, "grad_norm": 0.044631265103816986, "learning_rate": 0.01, "loss": 1.9212, "step": 89187 }, { "epoch": 9.208135453231469, "grad_norm": 0.040731918066740036, "learning_rate": 0.01, "loss": 1.9534, "step": 89190 }, { "epoch": 9.2084451786083, "grad_norm": 0.09363244473934174, "learning_rate": 0.01, "loss": 1.9426, "step": 89193 }, { "epoch": 9.208754903985133, "grad_norm": 0.06661606580018997, "learning_rate": 0.01, "loss": 1.923, "step": 89196 }, { "epoch": 9.209064629361965, "grad_norm": 0.05025598779320717, "learning_rate": 0.01, "loss": 1.9209, "step": 89199 }, { "epoch": 9.209374354738799, "grad_norm": 0.11716365069150925, "learning_rate": 0.01, "loss": 1.9239, "step": 89202 }, { "epoch": 9.20968408011563, "grad_norm": 0.13573743402957916, "learning_rate": 0.01, "loss": 1.9414, "step": 89205 }, { "epoch": 9.209993805492463, "grad_norm": 0.049791369587183, "learning_rate": 0.01, "loss": 1.9443, "step": 89208 }, { "epoch": 9.210303530869297, "grad_norm": 0.03368228301405907, "learning_rate": 0.01, "loss": 1.9403, "step": 89211 }, { "epoch": 9.210613256246129, "grad_norm": 0.04646635428071022, "learning_rate": 0.01, "loss": 1.9779, "step": 89214 }, { "epoch": 9.21092298162296, "grad_norm": 0.08860430121421814, "learning_rate": 0.01, "loss": 1.95, "step": 89217 }, { "epoch": 9.211232706999793, "grad_norm": 0.04521026834845543, "learning_rate": 0.01, "loss": 1.9343, "step": 89220 }, { "epoch": 9.211542432376627, "grad_norm": 0.04164019972085953, "learning_rate": 0.01, "loss": 1.9439, "step": 89223 }, { "epoch": 9.211852157753459, "grad_norm": 0.09966282546520233, "learning_rate": 0.01, "loss": 1.9396, "step": 89226 }, { "epoch": 9.21216188313029, "grad_norm": 0.07088325917720795, "learning_rate": 0.01, "loss": 1.9323, "step": 89229 }, { "epoch": 9.212471608507125, "grad_norm": 0.05550432950258255, "learning_rate": 0.01, "loss": 1.9536, "step": 89232 }, { "epoch": 9.212781333883957, "grad_norm": 0.041758205741643906, "learning_rate": 0.01, "loss": 1.9633, "step": 89235 }, { "epoch": 9.213091059260789, "grad_norm": 0.04863423854112625, "learning_rate": 0.01, "loss": 1.9439, "step": 89238 }, { "epoch": 9.21340078463762, "grad_norm": 0.05925336852669716, "learning_rate": 0.01, "loss": 1.9299, "step": 89241 }, { "epoch": 9.213710510014455, "grad_norm": 0.03509088233113289, "learning_rate": 0.01, "loss": 1.9232, "step": 89244 }, { "epoch": 9.214020235391287, "grad_norm": 0.04175510257482529, "learning_rate": 0.01, "loss": 1.9286, "step": 89247 }, { "epoch": 9.214329960768119, "grad_norm": 0.1422414481639862, "learning_rate": 0.01, "loss": 1.9529, "step": 89250 }, { "epoch": 9.21463968614495, "grad_norm": 0.039441660046577454, "learning_rate": 0.01, "loss": 1.948, "step": 89253 }, { "epoch": 9.214949411521784, "grad_norm": 0.11482972651720047, "learning_rate": 0.01, "loss": 1.936, "step": 89256 }, { "epoch": 9.215259136898617, "grad_norm": 0.10223820805549622, "learning_rate": 0.01, "loss": 1.9475, "step": 89259 }, { "epoch": 9.215568862275449, "grad_norm": 0.08964929729700089, "learning_rate": 0.01, "loss": 1.9502, "step": 89262 }, { "epoch": 9.215878587652282, "grad_norm": 0.08693050593137741, "learning_rate": 0.01, "loss": 1.9262, "step": 89265 }, { "epoch": 9.216188313029114, "grad_norm": 0.0800836980342865, "learning_rate": 0.01, "loss": 1.9476, "step": 89268 }, { "epoch": 9.216498038405947, "grad_norm": 0.04038820415735245, "learning_rate": 0.01, "loss": 1.9657, "step": 89271 }, { "epoch": 9.216807763782779, "grad_norm": 0.04846915602684021, "learning_rate": 0.01, "loss": 1.9121, "step": 89274 }, { "epoch": 9.217117489159612, "grad_norm": 0.046729911118745804, "learning_rate": 0.01, "loss": 1.9327, "step": 89277 }, { "epoch": 9.217427214536444, "grad_norm": 0.04161561653017998, "learning_rate": 0.01, "loss": 1.955, "step": 89280 }, { "epoch": 9.217736939913276, "grad_norm": 0.07458865642547607, "learning_rate": 0.01, "loss": 1.8989, "step": 89283 }, { "epoch": 9.21804666529011, "grad_norm": 0.06652171164751053, "learning_rate": 0.01, "loss": 1.9329, "step": 89286 }, { "epoch": 9.218356390666942, "grad_norm": 0.07299696654081345, "learning_rate": 0.01, "loss": 1.9319, "step": 89289 }, { "epoch": 9.218666116043774, "grad_norm": 0.06781180948019028, "learning_rate": 0.01, "loss": 1.9493, "step": 89292 }, { "epoch": 9.218975841420606, "grad_norm": 0.06078062951564789, "learning_rate": 0.01, "loss": 1.9034, "step": 89295 }, { "epoch": 9.21928556679744, "grad_norm": 0.04806487634778023, "learning_rate": 0.01, "loss": 1.9468, "step": 89298 }, { "epoch": 9.219595292174272, "grad_norm": 0.03390311077237129, "learning_rate": 0.01, "loss": 1.922, "step": 89301 }, { "epoch": 9.219905017551104, "grad_norm": 0.043576549738645554, "learning_rate": 0.01, "loss": 1.959, "step": 89304 }, { "epoch": 9.220214742927936, "grad_norm": 0.058129116892814636, "learning_rate": 0.01, "loss": 1.9608, "step": 89307 }, { "epoch": 9.22052446830477, "grad_norm": 0.06500256806612015, "learning_rate": 0.01, "loss": 1.9399, "step": 89310 }, { "epoch": 9.220834193681602, "grad_norm": 0.12124396115541458, "learning_rate": 0.01, "loss": 1.9426, "step": 89313 }, { "epoch": 9.221143919058434, "grad_norm": 0.04002736508846283, "learning_rate": 0.01, "loss": 1.9424, "step": 89316 }, { "epoch": 9.221453644435268, "grad_norm": 0.10567335039377213, "learning_rate": 0.01, "loss": 1.9471, "step": 89319 }, { "epoch": 9.2217633698121, "grad_norm": 0.08138985186815262, "learning_rate": 0.01, "loss": 1.9386, "step": 89322 }, { "epoch": 9.222073095188932, "grad_norm": 0.056923553347587585, "learning_rate": 0.01, "loss": 1.9581, "step": 89325 }, { "epoch": 9.222382820565764, "grad_norm": 0.07420485466718674, "learning_rate": 0.01, "loss": 1.9557, "step": 89328 }, { "epoch": 9.222692545942598, "grad_norm": 0.05312581732869148, "learning_rate": 0.01, "loss": 1.9272, "step": 89331 }, { "epoch": 9.22300227131943, "grad_norm": 0.054729972034692764, "learning_rate": 0.01, "loss": 1.9286, "step": 89334 }, { "epoch": 9.223311996696262, "grad_norm": 0.0702265128493309, "learning_rate": 0.01, "loss": 1.9618, "step": 89337 }, { "epoch": 9.223621722073096, "grad_norm": 0.06925257295370102, "learning_rate": 0.01, "loss": 1.9132, "step": 89340 }, { "epoch": 9.223931447449928, "grad_norm": 0.03326869383454323, "learning_rate": 0.01, "loss": 1.9521, "step": 89343 }, { "epoch": 9.22424117282676, "grad_norm": 0.05594922602176666, "learning_rate": 0.01, "loss": 1.9336, "step": 89346 }, { "epoch": 9.224550898203592, "grad_norm": 0.054577749222517014, "learning_rate": 0.01, "loss": 1.9249, "step": 89349 }, { "epoch": 9.224860623580426, "grad_norm": 0.09225718677043915, "learning_rate": 0.01, "loss": 1.962, "step": 89352 }, { "epoch": 9.225170348957258, "grad_norm": 0.05223730951547623, "learning_rate": 0.01, "loss": 1.9328, "step": 89355 }, { "epoch": 9.22548007433409, "grad_norm": 0.04215600714087486, "learning_rate": 0.01, "loss": 1.9613, "step": 89358 }, { "epoch": 9.225789799710922, "grad_norm": 0.03782760351896286, "learning_rate": 0.01, "loss": 1.926, "step": 89361 }, { "epoch": 9.226099525087756, "grad_norm": 0.042087994515895844, "learning_rate": 0.01, "loss": 1.9488, "step": 89364 }, { "epoch": 9.226409250464588, "grad_norm": 0.10988534241914749, "learning_rate": 0.01, "loss": 1.9527, "step": 89367 }, { "epoch": 9.22671897584142, "grad_norm": 0.07139700651168823, "learning_rate": 0.01, "loss": 1.9506, "step": 89370 }, { "epoch": 9.227028701218254, "grad_norm": 0.03730588033795357, "learning_rate": 0.01, "loss": 1.9326, "step": 89373 }, { "epoch": 9.227338426595086, "grad_norm": 0.03242401033639908, "learning_rate": 0.01, "loss": 1.9128, "step": 89376 }, { "epoch": 9.227648151971918, "grad_norm": 0.08017165213823318, "learning_rate": 0.01, "loss": 1.9372, "step": 89379 }, { "epoch": 9.22795787734875, "grad_norm": 0.0404607355594635, "learning_rate": 0.01, "loss": 1.9538, "step": 89382 }, { "epoch": 9.228267602725584, "grad_norm": 0.12543754279613495, "learning_rate": 0.01, "loss": 1.9451, "step": 89385 }, { "epoch": 9.228577328102416, "grad_norm": 0.0824381411075592, "learning_rate": 0.01, "loss": 1.9194, "step": 89388 }, { "epoch": 9.228887053479248, "grad_norm": 0.0916644036769867, "learning_rate": 0.01, "loss": 1.9322, "step": 89391 }, { "epoch": 9.229196778856082, "grad_norm": 0.052297160029411316, "learning_rate": 0.01, "loss": 1.9367, "step": 89394 }, { "epoch": 9.229506504232914, "grad_norm": 0.043394576758146286, "learning_rate": 0.01, "loss": 1.938, "step": 89397 }, { "epoch": 9.229816229609746, "grad_norm": 0.03213174641132355, "learning_rate": 0.01, "loss": 1.9425, "step": 89400 }, { "epoch": 9.230125954986578, "grad_norm": 0.1214103251695633, "learning_rate": 0.01, "loss": 1.9614, "step": 89403 }, { "epoch": 9.230435680363412, "grad_norm": 0.05036572739481926, "learning_rate": 0.01, "loss": 1.9514, "step": 89406 }, { "epoch": 9.230745405740244, "grad_norm": 0.07506284862756729, "learning_rate": 0.01, "loss": 1.9408, "step": 89409 }, { "epoch": 9.231055131117076, "grad_norm": 0.0447104275226593, "learning_rate": 0.01, "loss": 1.9417, "step": 89412 }, { "epoch": 9.231364856493908, "grad_norm": 0.031230248510837555, "learning_rate": 0.01, "loss": 1.9747, "step": 89415 }, { "epoch": 9.231674581870742, "grad_norm": 0.041907474398612976, "learning_rate": 0.01, "loss": 1.9318, "step": 89418 }, { "epoch": 9.231984307247574, "grad_norm": 0.054957807064056396, "learning_rate": 0.01, "loss": 1.9309, "step": 89421 }, { "epoch": 9.232294032624406, "grad_norm": 0.13959866762161255, "learning_rate": 0.01, "loss": 1.9595, "step": 89424 }, { "epoch": 9.23260375800124, "grad_norm": 0.07568775117397308, "learning_rate": 0.01, "loss": 1.9548, "step": 89427 }, { "epoch": 9.232913483378072, "grad_norm": 0.04290013760328293, "learning_rate": 0.01, "loss": 1.9524, "step": 89430 }, { "epoch": 9.233223208754904, "grad_norm": 0.08651076257228851, "learning_rate": 0.01, "loss": 1.9502, "step": 89433 }, { "epoch": 9.233532934131736, "grad_norm": 0.04303092509508133, "learning_rate": 0.01, "loss": 1.9381, "step": 89436 }, { "epoch": 9.23384265950857, "grad_norm": 0.03936919942498207, "learning_rate": 0.01, "loss": 1.9418, "step": 89439 }, { "epoch": 9.234152384885402, "grad_norm": 0.03311093524098396, "learning_rate": 0.01, "loss": 1.948, "step": 89442 }, { "epoch": 9.234462110262234, "grad_norm": 0.07250311225652695, "learning_rate": 0.01, "loss": 1.9334, "step": 89445 }, { "epoch": 9.234771835639068, "grad_norm": 0.07465573400259018, "learning_rate": 0.01, "loss": 1.9489, "step": 89448 }, { "epoch": 9.2350815610159, "grad_norm": 0.04738716781139374, "learning_rate": 0.01, "loss": 1.9269, "step": 89451 }, { "epoch": 9.235391286392732, "grad_norm": 0.07104123383760452, "learning_rate": 0.01, "loss": 1.9523, "step": 89454 }, { "epoch": 9.235701011769564, "grad_norm": 0.04550927132368088, "learning_rate": 0.01, "loss": 1.936, "step": 89457 }, { "epoch": 9.236010737146398, "grad_norm": 0.08226420730352402, "learning_rate": 0.01, "loss": 1.9485, "step": 89460 }, { "epoch": 9.23632046252323, "grad_norm": 0.0893448144197464, "learning_rate": 0.01, "loss": 1.9537, "step": 89463 }, { "epoch": 9.236630187900062, "grad_norm": 0.039117638021707535, "learning_rate": 0.01, "loss": 1.9563, "step": 89466 }, { "epoch": 9.236939913276894, "grad_norm": 0.03807968646287918, "learning_rate": 0.01, "loss": 1.9297, "step": 89469 }, { "epoch": 9.237249638653728, "grad_norm": 0.030970802530646324, "learning_rate": 0.01, "loss": 1.9494, "step": 89472 }, { "epoch": 9.23755936403056, "grad_norm": 0.04818034917116165, "learning_rate": 0.01, "loss": 1.9751, "step": 89475 }, { "epoch": 9.237869089407392, "grad_norm": 0.07317550480365753, "learning_rate": 0.01, "loss": 1.9441, "step": 89478 }, { "epoch": 9.238178814784225, "grad_norm": 0.10925126820802689, "learning_rate": 0.01, "loss": 1.9615, "step": 89481 }, { "epoch": 9.238488540161057, "grad_norm": 0.060674529522657394, "learning_rate": 0.01, "loss": 1.945, "step": 89484 }, { "epoch": 9.23879826553789, "grad_norm": 0.1061653196811676, "learning_rate": 0.01, "loss": 1.9667, "step": 89487 }, { "epoch": 9.239107990914722, "grad_norm": 0.057163987308740616, "learning_rate": 0.01, "loss": 1.9197, "step": 89490 }, { "epoch": 9.239417716291555, "grad_norm": 0.05087054520845413, "learning_rate": 0.01, "loss": 1.936, "step": 89493 }, { "epoch": 9.239727441668387, "grad_norm": 0.03398473933339119, "learning_rate": 0.01, "loss": 1.9221, "step": 89496 }, { "epoch": 9.24003716704522, "grad_norm": 0.14643120765686035, "learning_rate": 0.01, "loss": 1.9397, "step": 89499 }, { "epoch": 9.240346892422053, "grad_norm": 0.09905476868152618, "learning_rate": 0.01, "loss": 1.9361, "step": 89502 }, { "epoch": 9.240656617798885, "grad_norm": 0.09477792680263519, "learning_rate": 0.01, "loss": 1.9427, "step": 89505 }, { "epoch": 9.240966343175717, "grad_norm": 0.061807479709386826, "learning_rate": 0.01, "loss": 1.9551, "step": 89508 }, { "epoch": 9.24127606855255, "grad_norm": 0.09526131302118301, "learning_rate": 0.01, "loss": 1.9405, "step": 89511 }, { "epoch": 9.241585793929383, "grad_norm": 0.05511989817023277, "learning_rate": 0.01, "loss": 1.9326, "step": 89514 }, { "epoch": 9.241895519306215, "grad_norm": 0.047247253358364105, "learning_rate": 0.01, "loss": 1.9511, "step": 89517 }, { "epoch": 9.242205244683047, "grad_norm": 0.04097644239664078, "learning_rate": 0.01, "loss": 1.9196, "step": 89520 }, { "epoch": 9.24251497005988, "grad_norm": 0.041109759360551834, "learning_rate": 0.01, "loss": 1.9416, "step": 89523 }, { "epoch": 9.242824695436713, "grad_norm": 0.045235078781843185, "learning_rate": 0.01, "loss": 1.9588, "step": 89526 }, { "epoch": 9.243134420813545, "grad_norm": 0.04100433364510536, "learning_rate": 0.01, "loss": 1.928, "step": 89529 }, { "epoch": 9.243444146190377, "grad_norm": 0.07918980717658997, "learning_rate": 0.01, "loss": 1.896, "step": 89532 }, { "epoch": 9.243753871567211, "grad_norm": 0.06758083403110504, "learning_rate": 0.01, "loss": 1.9459, "step": 89535 }, { "epoch": 9.244063596944043, "grad_norm": 0.09762846678495407, "learning_rate": 0.01, "loss": 1.9256, "step": 89538 }, { "epoch": 9.244373322320875, "grad_norm": 0.07526645809412003, "learning_rate": 0.01, "loss": 1.9283, "step": 89541 }, { "epoch": 9.244683047697707, "grad_norm": 0.048858094960451126, "learning_rate": 0.01, "loss": 1.9463, "step": 89544 }, { "epoch": 9.244992773074541, "grad_norm": 0.05891689285635948, "learning_rate": 0.01, "loss": 1.9087, "step": 89547 }, { "epoch": 9.245302498451373, "grad_norm": 0.08054155856370926, "learning_rate": 0.01, "loss": 1.9386, "step": 89550 }, { "epoch": 9.245612223828205, "grad_norm": 0.10516653954982758, "learning_rate": 0.01, "loss": 1.9544, "step": 89553 }, { "epoch": 9.245921949205039, "grad_norm": 0.1483267843723297, "learning_rate": 0.01, "loss": 1.9459, "step": 89556 }, { "epoch": 9.246231674581871, "grad_norm": 0.09309061616659164, "learning_rate": 0.01, "loss": 1.953, "step": 89559 }, { "epoch": 9.246541399958703, "grad_norm": 0.041449520736932755, "learning_rate": 0.01, "loss": 1.9501, "step": 89562 }, { "epoch": 9.246851125335535, "grad_norm": 0.05703721567988396, "learning_rate": 0.01, "loss": 1.9519, "step": 89565 }, { "epoch": 9.247160850712369, "grad_norm": 0.04464108496904373, "learning_rate": 0.01, "loss": 1.9168, "step": 89568 }, { "epoch": 9.247470576089201, "grad_norm": 0.04087911546230316, "learning_rate": 0.01, "loss": 1.9377, "step": 89571 }, { "epoch": 9.247780301466033, "grad_norm": 0.043997958302497864, "learning_rate": 0.01, "loss": 1.9103, "step": 89574 }, { "epoch": 9.248090026842865, "grad_norm": 0.06421921402215958, "learning_rate": 0.01, "loss": 1.9555, "step": 89577 }, { "epoch": 9.248399752219699, "grad_norm": 0.06022017076611519, "learning_rate": 0.01, "loss": 1.933, "step": 89580 }, { "epoch": 9.248709477596531, "grad_norm": 0.11454983055591583, "learning_rate": 0.01, "loss": 1.9374, "step": 89583 }, { "epoch": 9.249019202973363, "grad_norm": 0.046563856303691864, "learning_rate": 0.01, "loss": 1.9351, "step": 89586 }, { "epoch": 9.249328928350197, "grad_norm": 0.049571454524993896, "learning_rate": 0.01, "loss": 1.9423, "step": 89589 }, { "epoch": 9.249638653727029, "grad_norm": 0.10863058269023895, "learning_rate": 0.01, "loss": 1.9574, "step": 89592 }, { "epoch": 9.249948379103861, "grad_norm": 0.07477369159460068, "learning_rate": 0.01, "loss": 1.9436, "step": 89595 }, { "epoch": 9.250258104480693, "grad_norm": 0.0527978241443634, "learning_rate": 0.01, "loss": 1.9213, "step": 89598 }, { "epoch": 9.250567829857527, "grad_norm": 0.07022270560264587, "learning_rate": 0.01, "loss": 1.9605, "step": 89601 }, { "epoch": 9.250877555234359, "grad_norm": 0.0500531829893589, "learning_rate": 0.01, "loss": 1.953, "step": 89604 }, { "epoch": 9.251187280611191, "grad_norm": 0.038342274725437164, "learning_rate": 0.01, "loss": 1.9388, "step": 89607 }, { "epoch": 9.251497005988025, "grad_norm": 0.08321253955364227, "learning_rate": 0.01, "loss": 1.9628, "step": 89610 }, { "epoch": 9.251806731364857, "grad_norm": 0.06742928922176361, "learning_rate": 0.01, "loss": 1.9529, "step": 89613 }, { "epoch": 9.252116456741689, "grad_norm": 0.16153162717819214, "learning_rate": 0.01, "loss": 1.9147, "step": 89616 }, { "epoch": 9.252426182118521, "grad_norm": 0.06035587936639786, "learning_rate": 0.01, "loss": 1.9312, "step": 89619 }, { "epoch": 9.252735907495355, "grad_norm": 0.10703792423009872, "learning_rate": 0.01, "loss": 1.9315, "step": 89622 }, { "epoch": 9.253045632872187, "grad_norm": 0.08650069683790207, "learning_rate": 0.01, "loss": 1.9389, "step": 89625 }, { "epoch": 9.253355358249019, "grad_norm": 0.06951060891151428, "learning_rate": 0.01, "loss": 1.9331, "step": 89628 }, { "epoch": 9.253665083625851, "grad_norm": 0.03464649245142937, "learning_rate": 0.01, "loss": 1.9454, "step": 89631 }, { "epoch": 9.253974809002685, "grad_norm": 0.04344501346349716, "learning_rate": 0.01, "loss": 1.955, "step": 89634 }, { "epoch": 9.254284534379517, "grad_norm": 0.07010316103696823, "learning_rate": 0.01, "loss": 1.9197, "step": 89637 }, { "epoch": 9.254594259756349, "grad_norm": 0.11068741232156754, "learning_rate": 0.01, "loss": 1.9501, "step": 89640 }, { "epoch": 9.254903985133183, "grad_norm": 0.1069609746336937, "learning_rate": 0.01, "loss": 1.9283, "step": 89643 }, { "epoch": 9.255213710510015, "grad_norm": 0.03556428849697113, "learning_rate": 0.01, "loss": 1.9387, "step": 89646 }, { "epoch": 9.255523435886847, "grad_norm": 0.07954979687929153, "learning_rate": 0.01, "loss": 1.9636, "step": 89649 }, { "epoch": 9.255833161263679, "grad_norm": 0.12106265872716904, "learning_rate": 0.01, "loss": 1.9406, "step": 89652 }, { "epoch": 9.256142886640513, "grad_norm": 0.06580674648284912, "learning_rate": 0.01, "loss": 1.9615, "step": 89655 }, { "epoch": 9.256452612017345, "grad_norm": 0.06531988084316254, "learning_rate": 0.01, "loss": 1.9611, "step": 89658 }, { "epoch": 9.256762337394177, "grad_norm": 0.09100288897752762, "learning_rate": 0.01, "loss": 1.9177, "step": 89661 }, { "epoch": 9.257072062771009, "grad_norm": 0.04378080740571022, "learning_rate": 0.01, "loss": 1.9505, "step": 89664 }, { "epoch": 9.257381788147843, "grad_norm": 0.08889858424663544, "learning_rate": 0.01, "loss": 1.9194, "step": 89667 }, { "epoch": 9.257691513524675, "grad_norm": 0.09178147464990616, "learning_rate": 0.01, "loss": 1.9206, "step": 89670 }, { "epoch": 9.258001238901507, "grad_norm": 0.12640753388404846, "learning_rate": 0.01, "loss": 1.9087, "step": 89673 }, { "epoch": 9.25831096427834, "grad_norm": 0.08799244463443756, "learning_rate": 0.01, "loss": 1.9428, "step": 89676 }, { "epoch": 9.258620689655173, "grad_norm": 0.07690322399139404, "learning_rate": 0.01, "loss": 1.9495, "step": 89679 }, { "epoch": 9.258930415032005, "grad_norm": 0.07583325356245041, "learning_rate": 0.01, "loss": 1.9329, "step": 89682 }, { "epoch": 9.259240140408837, "grad_norm": 0.041603825986385345, "learning_rate": 0.01, "loss": 1.9227, "step": 89685 }, { "epoch": 9.25954986578567, "grad_norm": 0.14184615015983582, "learning_rate": 0.01, "loss": 1.9505, "step": 89688 }, { "epoch": 9.259859591162503, "grad_norm": 0.08319629728794098, "learning_rate": 0.01, "loss": 1.9336, "step": 89691 }, { "epoch": 9.260169316539335, "grad_norm": 0.09747131168842316, "learning_rate": 0.01, "loss": 1.963, "step": 89694 }, { "epoch": 9.260479041916168, "grad_norm": 3.834672451019287, "learning_rate": 0.01, "loss": 1.9896, "step": 89697 }, { "epoch": 9.260788767293, "grad_norm": 0.20725275576114655, "learning_rate": 0.01, "loss": 2.0049, "step": 89700 }, { "epoch": 9.261098492669833, "grad_norm": 0.16313984990119934, "learning_rate": 0.01, "loss": 1.9805, "step": 89703 }, { "epoch": 9.261408218046665, "grad_norm": 0.08664103597402573, "learning_rate": 0.01, "loss": 1.9766, "step": 89706 }, { "epoch": 9.261717943423498, "grad_norm": 0.05102289468050003, "learning_rate": 0.01, "loss": 1.937, "step": 89709 }, { "epoch": 9.26202766880033, "grad_norm": 0.05602896586060524, "learning_rate": 0.01, "loss": 1.9341, "step": 89712 }, { "epoch": 9.262337394177163, "grad_norm": 0.04888107627630234, "learning_rate": 0.01, "loss": 1.9621, "step": 89715 }, { "epoch": 9.262647119553996, "grad_norm": 0.06098625808954239, "learning_rate": 0.01, "loss": 1.9489, "step": 89718 }, { "epoch": 9.262956844930828, "grad_norm": 0.03745686635375023, "learning_rate": 0.01, "loss": 1.9613, "step": 89721 }, { "epoch": 9.26326657030766, "grad_norm": 0.035396233201026917, "learning_rate": 0.01, "loss": 1.9274, "step": 89724 }, { "epoch": 9.263576295684492, "grad_norm": 0.04478583112359047, "learning_rate": 0.01, "loss": 1.95, "step": 89727 }, { "epoch": 9.263886021061326, "grad_norm": 0.05072050169110298, "learning_rate": 0.01, "loss": 1.9636, "step": 89730 }, { "epoch": 9.264195746438158, "grad_norm": 0.09497334808111191, "learning_rate": 0.01, "loss": 1.9339, "step": 89733 }, { "epoch": 9.26450547181499, "grad_norm": 0.07028527557849884, "learning_rate": 0.01, "loss": 1.9217, "step": 89736 }, { "epoch": 9.264815197191822, "grad_norm": 0.11074947565793991, "learning_rate": 0.01, "loss": 1.945, "step": 89739 }, { "epoch": 9.265124922568656, "grad_norm": 0.051569730043411255, "learning_rate": 0.01, "loss": 1.9499, "step": 89742 }, { "epoch": 9.265434647945488, "grad_norm": 0.04874719679355621, "learning_rate": 0.01, "loss": 1.9301, "step": 89745 }, { "epoch": 9.26574437332232, "grad_norm": 0.04530943185091019, "learning_rate": 0.01, "loss": 1.9253, "step": 89748 }, { "epoch": 9.266054098699154, "grad_norm": 0.06027248129248619, "learning_rate": 0.01, "loss": 1.9478, "step": 89751 }, { "epoch": 9.266363824075986, "grad_norm": 0.07296335697174072, "learning_rate": 0.01, "loss": 1.96, "step": 89754 }, { "epoch": 9.266673549452818, "grad_norm": 0.06033644825220108, "learning_rate": 0.01, "loss": 1.9293, "step": 89757 }, { "epoch": 9.26698327482965, "grad_norm": 0.04669598489999771, "learning_rate": 0.01, "loss": 1.8868, "step": 89760 }, { "epoch": 9.267293000206484, "grad_norm": 0.04095854610204697, "learning_rate": 0.01, "loss": 1.9316, "step": 89763 }, { "epoch": 9.267602725583316, "grad_norm": 0.045767225325107574, "learning_rate": 0.01, "loss": 1.94, "step": 89766 }, { "epoch": 9.267912450960148, "grad_norm": 0.10907436907291412, "learning_rate": 0.01, "loss": 1.942, "step": 89769 }, { "epoch": 9.26822217633698, "grad_norm": 0.06417029350996017, "learning_rate": 0.01, "loss": 1.9496, "step": 89772 }, { "epoch": 9.268531901713814, "grad_norm": 0.049392636865377426, "learning_rate": 0.01, "loss": 1.9104, "step": 89775 }, { "epoch": 9.268841627090646, "grad_norm": 0.06661033630371094, "learning_rate": 0.01, "loss": 1.9357, "step": 89778 }, { "epoch": 9.269151352467478, "grad_norm": 0.04778747260570526, "learning_rate": 0.01, "loss": 1.8937, "step": 89781 }, { "epoch": 9.269461077844312, "grad_norm": 0.06895989179611206, "learning_rate": 0.01, "loss": 1.9511, "step": 89784 }, { "epoch": 9.269770803221144, "grad_norm": 0.06295426189899445, "learning_rate": 0.01, "loss": 1.9505, "step": 89787 }, { "epoch": 9.270080528597976, "grad_norm": 0.09221582114696503, "learning_rate": 0.01, "loss": 1.9361, "step": 89790 }, { "epoch": 9.270390253974808, "grad_norm": 0.04774243384599686, "learning_rate": 0.01, "loss": 1.9383, "step": 89793 }, { "epoch": 9.270699979351642, "grad_norm": 0.06828971207141876, "learning_rate": 0.01, "loss": 1.9195, "step": 89796 }, { "epoch": 9.271009704728474, "grad_norm": 0.054797038435935974, "learning_rate": 0.01, "loss": 1.9449, "step": 89799 }, { "epoch": 9.271319430105306, "grad_norm": 0.03856116533279419, "learning_rate": 0.01, "loss": 1.9185, "step": 89802 }, { "epoch": 9.27162915548214, "grad_norm": 0.035631731152534485, "learning_rate": 0.01, "loss": 1.9367, "step": 89805 }, { "epoch": 9.271938880858972, "grad_norm": 0.057642899453639984, "learning_rate": 0.01, "loss": 1.93, "step": 89808 }, { "epoch": 9.272248606235804, "grad_norm": 0.08952697366476059, "learning_rate": 0.01, "loss": 1.9313, "step": 89811 }, { "epoch": 9.272558331612636, "grad_norm": 0.04947827383875847, "learning_rate": 0.01, "loss": 1.9307, "step": 89814 }, { "epoch": 9.27286805698947, "grad_norm": 0.06767935305833817, "learning_rate": 0.01, "loss": 1.9386, "step": 89817 }, { "epoch": 9.273177782366302, "grad_norm": 0.050101347267627716, "learning_rate": 0.01, "loss": 1.9385, "step": 89820 }, { "epoch": 9.273487507743134, "grad_norm": 0.03736945986747742, "learning_rate": 0.01, "loss": 1.9341, "step": 89823 }, { "epoch": 9.273797233119968, "grad_norm": 0.12808963656425476, "learning_rate": 0.01, "loss": 1.9335, "step": 89826 }, { "epoch": 9.2741069584968, "grad_norm": 0.058463335037231445, "learning_rate": 0.01, "loss": 1.9359, "step": 89829 }, { "epoch": 9.274416683873632, "grad_norm": 0.06605935841798782, "learning_rate": 0.01, "loss": 1.9248, "step": 89832 }, { "epoch": 9.274726409250464, "grad_norm": 0.09979791194200516, "learning_rate": 0.01, "loss": 1.9394, "step": 89835 }, { "epoch": 9.275036134627298, "grad_norm": 0.03688298538327217, "learning_rate": 0.01, "loss": 1.9482, "step": 89838 }, { "epoch": 9.27534586000413, "grad_norm": 0.06498592346906662, "learning_rate": 0.01, "loss": 1.9208, "step": 89841 }, { "epoch": 9.275655585380962, "grad_norm": 0.04934919252991676, "learning_rate": 0.01, "loss": 1.9282, "step": 89844 }, { "epoch": 9.275965310757794, "grad_norm": 0.0446552038192749, "learning_rate": 0.01, "loss": 1.9403, "step": 89847 }, { "epoch": 9.276275036134628, "grad_norm": 0.052687738090753555, "learning_rate": 0.01, "loss": 1.9398, "step": 89850 }, { "epoch": 9.27658476151146, "grad_norm": 0.03818351775407791, "learning_rate": 0.01, "loss": 1.9494, "step": 89853 }, { "epoch": 9.276894486888292, "grad_norm": 0.042557138949632645, "learning_rate": 0.01, "loss": 1.9343, "step": 89856 }, { "epoch": 9.277204212265126, "grad_norm": 0.055316612124443054, "learning_rate": 0.01, "loss": 1.947, "step": 89859 }, { "epoch": 9.277513937641958, "grad_norm": 0.09695148468017578, "learning_rate": 0.01, "loss": 1.9589, "step": 89862 }, { "epoch": 9.27782366301879, "grad_norm": 0.09990256279706955, "learning_rate": 0.01, "loss": 1.9424, "step": 89865 }, { "epoch": 9.278133388395622, "grad_norm": 0.07283806800842285, "learning_rate": 0.01, "loss": 1.9553, "step": 89868 }, { "epoch": 9.278443113772456, "grad_norm": 0.08778601884841919, "learning_rate": 0.01, "loss": 1.9675, "step": 89871 }, { "epoch": 9.278752839149288, "grad_norm": 0.038158684968948364, "learning_rate": 0.01, "loss": 1.9337, "step": 89874 }, { "epoch": 9.27906256452612, "grad_norm": 0.03885229304432869, "learning_rate": 0.01, "loss": 1.9341, "step": 89877 }, { "epoch": 9.279372289902952, "grad_norm": 0.09096448123455048, "learning_rate": 0.01, "loss": 1.8973, "step": 89880 }, { "epoch": 9.279682015279786, "grad_norm": 0.04410238564014435, "learning_rate": 0.01, "loss": 1.9279, "step": 89883 }, { "epoch": 9.279991740656618, "grad_norm": 0.05357813090085983, "learning_rate": 0.01, "loss": 1.9638, "step": 89886 }, { "epoch": 9.28030146603345, "grad_norm": 0.08851207047700882, "learning_rate": 0.01, "loss": 1.9438, "step": 89889 }, { "epoch": 9.280611191410284, "grad_norm": 0.1310880333185196, "learning_rate": 0.01, "loss": 1.952, "step": 89892 }, { "epoch": 9.280920916787116, "grad_norm": 0.06100614741444588, "learning_rate": 0.01, "loss": 1.946, "step": 89895 }, { "epoch": 9.281230642163948, "grad_norm": 0.05535770207643509, "learning_rate": 0.01, "loss": 1.9214, "step": 89898 }, { "epoch": 9.28154036754078, "grad_norm": 0.05252685025334358, "learning_rate": 0.01, "loss": 1.9321, "step": 89901 }, { "epoch": 9.281850092917614, "grad_norm": 0.044191114604473114, "learning_rate": 0.01, "loss": 1.9392, "step": 89904 }, { "epoch": 9.282159818294446, "grad_norm": 0.05596025660634041, "learning_rate": 0.01, "loss": 1.9816, "step": 89907 }, { "epoch": 9.282469543671278, "grad_norm": 0.08656829595565796, "learning_rate": 0.01, "loss": 1.9653, "step": 89910 }, { "epoch": 9.282779269048111, "grad_norm": 0.05122004449367523, "learning_rate": 0.01, "loss": 1.9393, "step": 89913 }, { "epoch": 9.283088994424944, "grad_norm": 0.08028247952461243, "learning_rate": 0.01, "loss": 1.9305, "step": 89916 }, { "epoch": 9.283398719801776, "grad_norm": 0.06237107142806053, "learning_rate": 0.01, "loss": 1.9221, "step": 89919 }, { "epoch": 9.283708445178608, "grad_norm": 0.12027272582054138, "learning_rate": 0.01, "loss": 1.9599, "step": 89922 }, { "epoch": 9.284018170555441, "grad_norm": 0.08568006008863449, "learning_rate": 0.01, "loss": 1.937, "step": 89925 }, { "epoch": 9.284327895932273, "grad_norm": 0.053323738276958466, "learning_rate": 0.01, "loss": 1.9498, "step": 89928 }, { "epoch": 9.284637621309106, "grad_norm": 0.12184597551822662, "learning_rate": 0.01, "loss": 1.9386, "step": 89931 }, { "epoch": 9.28494734668594, "grad_norm": 0.11333894729614258, "learning_rate": 0.01, "loss": 1.9605, "step": 89934 }, { "epoch": 9.285257072062771, "grad_norm": 0.06243620440363884, "learning_rate": 0.01, "loss": 1.9229, "step": 89937 }, { "epoch": 9.285566797439603, "grad_norm": 0.04748154804110527, "learning_rate": 0.01, "loss": 1.9197, "step": 89940 }, { "epoch": 9.285876522816435, "grad_norm": 0.031249042600393295, "learning_rate": 0.01, "loss": 1.9503, "step": 89943 }, { "epoch": 9.28618624819327, "grad_norm": 0.036813315004110336, "learning_rate": 0.01, "loss": 1.9432, "step": 89946 }, { "epoch": 9.286495973570101, "grad_norm": 0.03633863851428032, "learning_rate": 0.01, "loss": 1.9296, "step": 89949 }, { "epoch": 9.286805698946933, "grad_norm": 0.054639723151922226, "learning_rate": 0.01, "loss": 1.9395, "step": 89952 }, { "epoch": 9.287115424323765, "grad_norm": 0.09005313366651535, "learning_rate": 0.01, "loss": 1.9005, "step": 89955 }, { "epoch": 9.2874251497006, "grad_norm": 0.1011664867401123, "learning_rate": 0.01, "loss": 1.9325, "step": 89958 }, { "epoch": 9.287734875077431, "grad_norm": 0.10966178774833679, "learning_rate": 0.01, "loss": 1.9411, "step": 89961 }, { "epoch": 9.288044600454263, "grad_norm": 0.05809110030531883, "learning_rate": 0.01, "loss": 1.9474, "step": 89964 }, { "epoch": 9.288354325831097, "grad_norm": 0.04828932136297226, "learning_rate": 0.01, "loss": 1.9309, "step": 89967 }, { "epoch": 9.28866405120793, "grad_norm": 0.04604678228497505, "learning_rate": 0.01, "loss": 1.9249, "step": 89970 }, { "epoch": 9.288973776584761, "grad_norm": 0.05810960382223129, "learning_rate": 0.01, "loss": 1.9284, "step": 89973 }, { "epoch": 9.289283501961593, "grad_norm": 0.05069134756922722, "learning_rate": 0.01, "loss": 1.9358, "step": 89976 }, { "epoch": 9.289593227338427, "grad_norm": 0.061285290867090225, "learning_rate": 0.01, "loss": 1.9392, "step": 89979 }, { "epoch": 9.28990295271526, "grad_norm": 0.11732938140630722, "learning_rate": 0.01, "loss": 1.9493, "step": 89982 }, { "epoch": 9.290212678092091, "grad_norm": 0.08811193704605103, "learning_rate": 0.01, "loss": 1.9507, "step": 89985 }, { "epoch": 9.290522403468923, "grad_norm": 0.039724934846162796, "learning_rate": 0.01, "loss": 1.9348, "step": 89988 }, { "epoch": 9.290832128845757, "grad_norm": 0.049808014184236526, "learning_rate": 0.01, "loss": 1.9469, "step": 89991 }, { "epoch": 9.29114185422259, "grad_norm": 0.041384246200323105, "learning_rate": 0.01, "loss": 1.9517, "step": 89994 }, { "epoch": 9.291451579599421, "grad_norm": 0.09925634413957596, "learning_rate": 0.01, "loss": 1.9437, "step": 89997 }, { "epoch": 9.291761304976255, "grad_norm": 0.04714850336313248, "learning_rate": 0.01, "loss": 1.922, "step": 90000 }, { "epoch": 9.292071030353087, "grad_norm": 0.07816148549318314, "learning_rate": 0.01, "loss": 1.9514, "step": 90003 }, { "epoch": 9.29238075572992, "grad_norm": 0.037051137536764145, "learning_rate": 0.01, "loss": 1.9378, "step": 90006 }, { "epoch": 9.292690481106751, "grad_norm": 0.03261815384030342, "learning_rate": 0.01, "loss": 1.9159, "step": 90009 }, { "epoch": 9.293000206483585, "grad_norm": 0.061712704598903656, "learning_rate": 0.01, "loss": 1.9323, "step": 90012 }, { "epoch": 9.293309931860417, "grad_norm": 0.09313449263572693, "learning_rate": 0.01, "loss": 1.9331, "step": 90015 }, { "epoch": 9.29361965723725, "grad_norm": 0.06282445043325424, "learning_rate": 0.01, "loss": 1.9157, "step": 90018 }, { "epoch": 9.293929382614083, "grad_norm": 0.04284929111599922, "learning_rate": 0.01, "loss": 1.9369, "step": 90021 }, { "epoch": 9.294239107990915, "grad_norm": 0.0422949492931366, "learning_rate": 0.01, "loss": 1.9517, "step": 90024 }, { "epoch": 9.294548833367747, "grad_norm": 0.058564431965351105, "learning_rate": 0.01, "loss": 1.9503, "step": 90027 }, { "epoch": 9.29485855874458, "grad_norm": 0.10857044905424118, "learning_rate": 0.01, "loss": 1.9234, "step": 90030 }, { "epoch": 9.295168284121413, "grad_norm": 0.048352450132369995, "learning_rate": 0.01, "loss": 1.9417, "step": 90033 }, { "epoch": 9.295478009498245, "grad_norm": 0.03552021086215973, "learning_rate": 0.01, "loss": 1.9229, "step": 90036 }, { "epoch": 9.295787734875077, "grad_norm": 0.050572823733091354, "learning_rate": 0.01, "loss": 1.9111, "step": 90039 }, { "epoch": 9.29609746025191, "grad_norm": 0.06984183937311172, "learning_rate": 0.01, "loss": 1.9378, "step": 90042 }, { "epoch": 9.296407185628743, "grad_norm": 0.06555435806512833, "learning_rate": 0.01, "loss": 1.9544, "step": 90045 }, { "epoch": 9.296716911005575, "grad_norm": 0.05915297940373421, "learning_rate": 0.01, "loss": 1.9133, "step": 90048 }, { "epoch": 9.297026636382407, "grad_norm": 0.10481391102075577, "learning_rate": 0.01, "loss": 1.9547, "step": 90051 }, { "epoch": 9.29733636175924, "grad_norm": 0.05892582982778549, "learning_rate": 0.01, "loss": 1.9205, "step": 90054 }, { "epoch": 9.297646087136073, "grad_norm": 0.042443741112947464, "learning_rate": 0.01, "loss": 1.9465, "step": 90057 }, { "epoch": 9.297955812512905, "grad_norm": 0.11886375397443771, "learning_rate": 0.01, "loss": 1.9602, "step": 90060 }, { "epoch": 9.298265537889737, "grad_norm": 0.0912526547908783, "learning_rate": 0.01, "loss": 1.9507, "step": 90063 }, { "epoch": 9.29857526326657, "grad_norm": 0.06104797124862671, "learning_rate": 0.01, "loss": 1.9407, "step": 90066 }, { "epoch": 9.298884988643403, "grad_norm": 0.04334283620119095, "learning_rate": 0.01, "loss": 1.9214, "step": 90069 }, { "epoch": 9.299194714020235, "grad_norm": 0.0571666918694973, "learning_rate": 0.01, "loss": 1.9399, "step": 90072 }, { "epoch": 9.299504439397069, "grad_norm": 0.04852711036801338, "learning_rate": 0.01, "loss": 1.9671, "step": 90075 }, { "epoch": 9.2998141647739, "grad_norm": 0.03939691558480263, "learning_rate": 0.01, "loss": 1.9407, "step": 90078 }, { "epoch": 9.300123890150733, "grad_norm": 0.07253614813089371, "learning_rate": 0.01, "loss": 1.9373, "step": 90081 }, { "epoch": 9.300433615527565, "grad_norm": 0.07965242862701416, "learning_rate": 0.01, "loss": 1.9477, "step": 90084 }, { "epoch": 9.300743340904399, "grad_norm": 0.08113238215446472, "learning_rate": 0.01, "loss": 1.9017, "step": 90087 }, { "epoch": 9.30105306628123, "grad_norm": 0.048515964299440384, "learning_rate": 0.01, "loss": 1.937, "step": 90090 }, { "epoch": 9.301362791658063, "grad_norm": 0.12356071174144745, "learning_rate": 0.01, "loss": 1.9568, "step": 90093 }, { "epoch": 9.301672517034895, "grad_norm": 0.05115806311368942, "learning_rate": 0.01, "loss": 1.9282, "step": 90096 }, { "epoch": 9.301982242411729, "grad_norm": 0.07609254121780396, "learning_rate": 0.01, "loss": 1.9301, "step": 90099 }, { "epoch": 9.30229196778856, "grad_norm": 0.07136466354131699, "learning_rate": 0.01, "loss": 1.9504, "step": 90102 }, { "epoch": 9.302601693165393, "grad_norm": 0.043960392475128174, "learning_rate": 0.01, "loss": 1.9339, "step": 90105 }, { "epoch": 9.302911418542227, "grad_norm": 0.034706417471170425, "learning_rate": 0.01, "loss": 1.9526, "step": 90108 }, { "epoch": 9.303221143919059, "grad_norm": 0.055189475417137146, "learning_rate": 0.01, "loss": 1.9304, "step": 90111 }, { "epoch": 9.30353086929589, "grad_norm": 0.06000189483165741, "learning_rate": 0.01, "loss": 1.9466, "step": 90114 }, { "epoch": 9.303840594672723, "grad_norm": 0.04692503809928894, "learning_rate": 0.01, "loss": 1.9374, "step": 90117 }, { "epoch": 9.304150320049557, "grad_norm": 0.09640234708786011, "learning_rate": 0.01, "loss": 1.9528, "step": 90120 }, { "epoch": 9.304460045426389, "grad_norm": 0.03152190148830414, "learning_rate": 0.01, "loss": 1.9311, "step": 90123 }, { "epoch": 9.30476977080322, "grad_norm": 0.03566571697592735, "learning_rate": 0.01, "loss": 1.932, "step": 90126 }, { "epoch": 9.305079496180054, "grad_norm": 0.04585658758878708, "learning_rate": 0.01, "loss": 1.9591, "step": 90129 }, { "epoch": 9.305389221556887, "grad_norm": 0.033661067485809326, "learning_rate": 0.01, "loss": 1.9231, "step": 90132 }, { "epoch": 9.305698946933719, "grad_norm": 0.07970356196165085, "learning_rate": 0.01, "loss": 1.9305, "step": 90135 }, { "epoch": 9.30600867231055, "grad_norm": 0.15736587345600128, "learning_rate": 0.01, "loss": 1.9653, "step": 90138 }, { "epoch": 9.306318397687384, "grad_norm": 0.10455471277236938, "learning_rate": 0.01, "loss": 1.9303, "step": 90141 }, { "epoch": 9.306628123064216, "grad_norm": 0.08667217195034027, "learning_rate": 0.01, "loss": 1.927, "step": 90144 }, { "epoch": 9.306937848441049, "grad_norm": 0.08747159689664841, "learning_rate": 0.01, "loss": 1.9493, "step": 90147 }, { "epoch": 9.307247573817882, "grad_norm": 0.05152470991015434, "learning_rate": 0.01, "loss": 1.9438, "step": 90150 }, { "epoch": 9.307557299194714, "grad_norm": 0.041649870574474335, "learning_rate": 0.01, "loss": 1.933, "step": 90153 }, { "epoch": 9.307867024571546, "grad_norm": 0.07984058558940887, "learning_rate": 0.01, "loss": 1.9319, "step": 90156 }, { "epoch": 9.308176749948379, "grad_norm": 0.03657011687755585, "learning_rate": 0.01, "loss": 1.9637, "step": 90159 }, { "epoch": 9.308486475325212, "grad_norm": 0.052740272134542465, "learning_rate": 0.01, "loss": 1.932, "step": 90162 }, { "epoch": 9.308796200702044, "grad_norm": 0.037107206881046295, "learning_rate": 0.01, "loss": 1.932, "step": 90165 }, { "epoch": 9.309105926078876, "grad_norm": 0.047946516424417496, "learning_rate": 0.01, "loss": 1.9388, "step": 90168 }, { "epoch": 9.309415651455708, "grad_norm": 0.036284878849983215, "learning_rate": 0.01, "loss": 1.9307, "step": 90171 }, { "epoch": 9.309725376832542, "grad_norm": 0.09453091770410538, "learning_rate": 0.01, "loss": 1.9294, "step": 90174 }, { "epoch": 9.310035102209374, "grad_norm": 0.0442422591149807, "learning_rate": 0.01, "loss": 1.9334, "step": 90177 }, { "epoch": 9.310344827586206, "grad_norm": 0.08194606751203537, "learning_rate": 0.01, "loss": 1.9411, "step": 90180 }, { "epoch": 9.31065455296304, "grad_norm": 0.08908070623874664, "learning_rate": 0.01, "loss": 1.9316, "step": 90183 }, { "epoch": 9.310964278339872, "grad_norm": 0.08012444525957108, "learning_rate": 0.01, "loss": 1.9496, "step": 90186 }, { "epoch": 9.311274003716704, "grad_norm": 0.05593520402908325, "learning_rate": 0.01, "loss": 1.9556, "step": 90189 }, { "epoch": 9.311583729093536, "grad_norm": 0.10340272635221481, "learning_rate": 0.01, "loss": 1.9285, "step": 90192 }, { "epoch": 9.31189345447037, "grad_norm": 0.06450596451759338, "learning_rate": 0.01, "loss": 1.925, "step": 90195 }, { "epoch": 9.312203179847202, "grad_norm": 0.06476901471614838, "learning_rate": 0.01, "loss": 1.9347, "step": 90198 }, { "epoch": 9.312512905224034, "grad_norm": 0.04243314638733864, "learning_rate": 0.01, "loss": 1.9485, "step": 90201 }, { "epoch": 9.312822630600866, "grad_norm": 0.052618399262428284, "learning_rate": 0.01, "loss": 1.9377, "step": 90204 }, { "epoch": 9.3131323559777, "grad_norm": 0.049052439630031586, "learning_rate": 0.01, "loss": 1.9515, "step": 90207 }, { "epoch": 9.313442081354532, "grad_norm": 0.08724159002304077, "learning_rate": 0.01, "loss": 1.9452, "step": 90210 }, { "epoch": 9.313751806731364, "grad_norm": 0.08713918179273605, "learning_rate": 0.01, "loss": 1.9398, "step": 90213 }, { "epoch": 9.314061532108198, "grad_norm": 0.0694577768445015, "learning_rate": 0.01, "loss": 1.9127, "step": 90216 }, { "epoch": 9.31437125748503, "grad_norm": 0.07178552448749542, "learning_rate": 0.01, "loss": 1.9807, "step": 90219 }, { "epoch": 9.314680982861862, "grad_norm": 0.08677991479635239, "learning_rate": 0.01, "loss": 1.9232, "step": 90222 }, { "epoch": 9.314990708238694, "grad_norm": 0.054113663733005524, "learning_rate": 0.01, "loss": 1.9265, "step": 90225 }, { "epoch": 9.315300433615528, "grad_norm": 0.12051310390233994, "learning_rate": 0.01, "loss": 1.9243, "step": 90228 }, { "epoch": 9.31561015899236, "grad_norm": 0.04537734016776085, "learning_rate": 0.01, "loss": 1.9424, "step": 90231 }, { "epoch": 9.315919884369192, "grad_norm": 0.04667481780052185, "learning_rate": 0.01, "loss": 1.9393, "step": 90234 }, { "epoch": 9.316229609746026, "grad_norm": 0.12717698514461517, "learning_rate": 0.01, "loss": 1.9039, "step": 90237 }, { "epoch": 9.316539335122858, "grad_norm": 0.06177690252661705, "learning_rate": 0.01, "loss": 1.9416, "step": 90240 }, { "epoch": 9.31684906049969, "grad_norm": 0.16978679597377777, "learning_rate": 0.01, "loss": 1.9418, "step": 90243 }, { "epoch": 9.317158785876522, "grad_norm": 0.13136741518974304, "learning_rate": 0.01, "loss": 1.922, "step": 90246 }, { "epoch": 9.317468511253356, "grad_norm": 0.07961270958185196, "learning_rate": 0.01, "loss": 1.9561, "step": 90249 }, { "epoch": 9.317778236630188, "grad_norm": 0.0522325336933136, "learning_rate": 0.01, "loss": 1.9314, "step": 90252 }, { "epoch": 9.31808796200702, "grad_norm": 0.05689280480146408, "learning_rate": 0.01, "loss": 1.9472, "step": 90255 }, { "epoch": 9.318397687383854, "grad_norm": 0.06072314828634262, "learning_rate": 0.01, "loss": 1.9219, "step": 90258 }, { "epoch": 9.318707412760686, "grad_norm": 0.045333363115787506, "learning_rate": 0.01, "loss": 1.9599, "step": 90261 }, { "epoch": 9.319017138137518, "grad_norm": 0.046115368604660034, "learning_rate": 0.01, "loss": 1.9372, "step": 90264 }, { "epoch": 9.31932686351435, "grad_norm": 0.03999978303909302, "learning_rate": 0.01, "loss": 1.953, "step": 90267 }, { "epoch": 9.319636588891184, "grad_norm": 0.04297192022204399, "learning_rate": 0.01, "loss": 1.943, "step": 90270 }, { "epoch": 9.319946314268016, "grad_norm": 0.03738616779446602, "learning_rate": 0.01, "loss": 1.9768, "step": 90273 }, { "epoch": 9.320256039644848, "grad_norm": 0.08931316435337067, "learning_rate": 0.01, "loss": 1.9407, "step": 90276 }, { "epoch": 9.32056576502168, "grad_norm": 0.19095301628112793, "learning_rate": 0.01, "loss": 1.9377, "step": 90279 }, { "epoch": 9.320875490398514, "grad_norm": 0.12510459125041962, "learning_rate": 0.01, "loss": 1.9365, "step": 90282 }, { "epoch": 9.321185215775346, "grad_norm": 0.09745209664106369, "learning_rate": 0.01, "loss": 1.951, "step": 90285 }, { "epoch": 9.321494941152178, "grad_norm": 0.08224628865718842, "learning_rate": 0.01, "loss": 1.9237, "step": 90288 }, { "epoch": 9.321804666529012, "grad_norm": 0.05055355280637741, "learning_rate": 0.01, "loss": 1.953, "step": 90291 }, { "epoch": 9.322114391905844, "grad_norm": 0.07061073184013367, "learning_rate": 0.01, "loss": 1.9637, "step": 90294 }, { "epoch": 9.322424117282676, "grad_norm": 0.06762760132551193, "learning_rate": 0.01, "loss": 1.9299, "step": 90297 }, { "epoch": 9.322733842659508, "grad_norm": 0.049081481993198395, "learning_rate": 0.01, "loss": 1.9343, "step": 90300 }, { "epoch": 9.323043568036342, "grad_norm": 0.045287519693374634, "learning_rate": 0.01, "loss": 1.9387, "step": 90303 }, { "epoch": 9.323353293413174, "grad_norm": 0.03433917835354805, "learning_rate": 0.01, "loss": 1.9483, "step": 90306 }, { "epoch": 9.323663018790006, "grad_norm": 0.10234330594539642, "learning_rate": 0.01, "loss": 1.9304, "step": 90309 }, { "epoch": 9.323972744166838, "grad_norm": 0.044973790645599365, "learning_rate": 0.01, "loss": 1.9427, "step": 90312 }, { "epoch": 9.324282469543672, "grad_norm": 0.10993959754705429, "learning_rate": 0.01, "loss": 1.9301, "step": 90315 }, { "epoch": 9.324592194920504, "grad_norm": 0.147218719124794, "learning_rate": 0.01, "loss": 1.9402, "step": 90318 }, { "epoch": 9.324901920297336, "grad_norm": 0.10825378447771072, "learning_rate": 0.01, "loss": 1.9492, "step": 90321 }, { "epoch": 9.32521164567417, "grad_norm": 0.12275347113609314, "learning_rate": 0.01, "loss": 1.9335, "step": 90324 }, { "epoch": 9.325521371051002, "grad_norm": 0.06429765373468399, "learning_rate": 0.01, "loss": 1.9411, "step": 90327 }, { "epoch": 9.325831096427834, "grad_norm": 0.05523232743144035, "learning_rate": 0.01, "loss": 1.9441, "step": 90330 }, { "epoch": 9.326140821804666, "grad_norm": 0.03777391090989113, "learning_rate": 0.01, "loss": 1.9375, "step": 90333 }, { "epoch": 9.3264505471815, "grad_norm": 0.04383401945233345, "learning_rate": 0.01, "loss": 1.9255, "step": 90336 }, { "epoch": 9.326760272558332, "grad_norm": 0.05174276977777481, "learning_rate": 0.01, "loss": 1.9452, "step": 90339 }, { "epoch": 9.327069997935164, "grad_norm": 0.041363853961229324, "learning_rate": 0.01, "loss": 1.9267, "step": 90342 }, { "epoch": 9.327379723311997, "grad_norm": 0.10272231698036194, "learning_rate": 0.01, "loss": 1.9411, "step": 90345 }, { "epoch": 9.32768944868883, "grad_norm": 0.06003765016794205, "learning_rate": 0.01, "loss": 1.9326, "step": 90348 }, { "epoch": 9.327999174065662, "grad_norm": 0.07973077148199081, "learning_rate": 0.01, "loss": 1.9467, "step": 90351 }, { "epoch": 9.328308899442494, "grad_norm": 0.11034880578517914, "learning_rate": 0.01, "loss": 1.9326, "step": 90354 }, { "epoch": 9.328618624819327, "grad_norm": 0.12927527725696564, "learning_rate": 0.01, "loss": 1.956, "step": 90357 }, { "epoch": 9.32892835019616, "grad_norm": 0.039803747087717056, "learning_rate": 0.01, "loss": 1.9387, "step": 90360 }, { "epoch": 9.329238075572992, "grad_norm": 0.04133141040802002, "learning_rate": 0.01, "loss": 1.9347, "step": 90363 }, { "epoch": 9.329547800949825, "grad_norm": 0.04647175967693329, "learning_rate": 0.01, "loss": 1.9328, "step": 90366 }, { "epoch": 9.329857526326657, "grad_norm": 0.03702395781874657, "learning_rate": 0.01, "loss": 1.9385, "step": 90369 }, { "epoch": 9.33016725170349, "grad_norm": 0.04293076694011688, "learning_rate": 0.01, "loss": 1.948, "step": 90372 }, { "epoch": 9.330476977080322, "grad_norm": 0.04811659827828407, "learning_rate": 0.01, "loss": 1.9361, "step": 90375 }, { "epoch": 9.330786702457155, "grad_norm": 0.09386304020881653, "learning_rate": 0.01, "loss": 1.9547, "step": 90378 }, { "epoch": 9.331096427833987, "grad_norm": 0.05897035449743271, "learning_rate": 0.01, "loss": 1.922, "step": 90381 }, { "epoch": 9.33140615321082, "grad_norm": 0.041449174284935, "learning_rate": 0.01, "loss": 1.935, "step": 90384 }, { "epoch": 9.331715878587652, "grad_norm": 0.03846067562699318, "learning_rate": 0.01, "loss": 1.9301, "step": 90387 }, { "epoch": 9.332025603964485, "grad_norm": 0.04674214497208595, "learning_rate": 0.01, "loss": 1.9314, "step": 90390 }, { "epoch": 9.332335329341317, "grad_norm": 0.06381560117006302, "learning_rate": 0.01, "loss": 1.9248, "step": 90393 }, { "epoch": 9.33264505471815, "grad_norm": 0.060292668640613556, "learning_rate": 0.01, "loss": 1.9417, "step": 90396 }, { "epoch": 9.332954780094983, "grad_norm": 0.08171455562114716, "learning_rate": 0.01, "loss": 1.9492, "step": 90399 }, { "epoch": 9.333264505471815, "grad_norm": 0.07539816200733185, "learning_rate": 0.01, "loss": 1.9629, "step": 90402 }, { "epoch": 9.333574230848647, "grad_norm": 0.16030851006507874, "learning_rate": 0.01, "loss": 1.9328, "step": 90405 }, { "epoch": 9.33388395622548, "grad_norm": 0.0498042106628418, "learning_rate": 0.01, "loss": 1.9384, "step": 90408 }, { "epoch": 9.334193681602313, "grad_norm": 0.035880159586668015, "learning_rate": 0.01, "loss": 1.9642, "step": 90411 }, { "epoch": 9.334503406979145, "grad_norm": 0.032716989517211914, "learning_rate": 0.01, "loss": 1.9495, "step": 90414 }, { "epoch": 9.334813132355977, "grad_norm": 0.05374199151992798, "learning_rate": 0.01, "loss": 1.9344, "step": 90417 }, { "epoch": 9.33512285773281, "grad_norm": 0.0500338152050972, "learning_rate": 0.01, "loss": 1.9368, "step": 90420 }, { "epoch": 9.335432583109643, "grad_norm": 0.05141221731901169, "learning_rate": 0.01, "loss": 1.9494, "step": 90423 }, { "epoch": 9.335742308486475, "grad_norm": 0.1170889213681221, "learning_rate": 0.01, "loss": 1.9206, "step": 90426 }, { "epoch": 9.336052033863307, "grad_norm": 0.08207867294549942, "learning_rate": 0.01, "loss": 1.9267, "step": 90429 }, { "epoch": 9.336361759240141, "grad_norm": 0.126504048705101, "learning_rate": 0.01, "loss": 1.9504, "step": 90432 }, { "epoch": 9.336671484616973, "grad_norm": 0.09300342202186584, "learning_rate": 0.01, "loss": 1.9203, "step": 90435 }, { "epoch": 9.336981209993805, "grad_norm": 0.11592224985361099, "learning_rate": 0.01, "loss": 1.9387, "step": 90438 }, { "epoch": 9.337290935370637, "grad_norm": 0.07160703837871552, "learning_rate": 0.01, "loss": 1.9162, "step": 90441 }, { "epoch": 9.337600660747471, "grad_norm": 0.05016907677054405, "learning_rate": 0.01, "loss": 1.9821, "step": 90444 }, { "epoch": 9.337910386124303, "grad_norm": 0.06592227518558502, "learning_rate": 0.01, "loss": 1.9519, "step": 90447 }, { "epoch": 9.338220111501135, "grad_norm": 0.06010131537914276, "learning_rate": 0.01, "loss": 1.9228, "step": 90450 }, { "epoch": 9.338529836877969, "grad_norm": 0.06576816737651825, "learning_rate": 0.01, "loss": 1.9311, "step": 90453 }, { "epoch": 9.338839562254801, "grad_norm": 0.03570152446627617, "learning_rate": 0.01, "loss": 1.9539, "step": 90456 }, { "epoch": 9.339149287631633, "grad_norm": 0.036140549927949905, "learning_rate": 0.01, "loss": 1.9452, "step": 90459 }, { "epoch": 9.339459013008465, "grad_norm": 0.06858191639184952, "learning_rate": 0.01, "loss": 1.9246, "step": 90462 }, { "epoch": 9.339768738385299, "grad_norm": 0.09158644825220108, "learning_rate": 0.01, "loss": 1.9313, "step": 90465 }, { "epoch": 9.340078463762131, "grad_norm": 0.11463913321495056, "learning_rate": 0.01, "loss": 1.937, "step": 90468 }, { "epoch": 9.340388189138963, "grad_norm": 0.06548649817705154, "learning_rate": 0.01, "loss": 1.9624, "step": 90471 }, { "epoch": 9.340697914515797, "grad_norm": 0.04369160160422325, "learning_rate": 0.01, "loss": 1.94, "step": 90474 }, { "epoch": 9.341007639892629, "grad_norm": 0.058460842818021774, "learning_rate": 0.01, "loss": 1.9471, "step": 90477 }, { "epoch": 9.341317365269461, "grad_norm": 0.03069923259317875, "learning_rate": 0.01, "loss": 1.9168, "step": 90480 }, { "epoch": 9.341627090646293, "grad_norm": 0.03194497153162956, "learning_rate": 0.01, "loss": 1.9652, "step": 90483 }, { "epoch": 9.341936816023127, "grad_norm": 0.12790267169475555, "learning_rate": 0.01, "loss": 1.9483, "step": 90486 }, { "epoch": 9.342246541399959, "grad_norm": 0.058374807238578796, "learning_rate": 0.01, "loss": 1.9476, "step": 90489 }, { "epoch": 9.342556266776791, "grad_norm": 0.05721079930663109, "learning_rate": 0.01, "loss": 1.9298, "step": 90492 }, { "epoch": 9.342865992153623, "grad_norm": 0.060209229588508606, "learning_rate": 0.01, "loss": 1.9351, "step": 90495 }, { "epoch": 9.343175717530457, "grad_norm": 0.05442395433783531, "learning_rate": 0.01, "loss": 1.9285, "step": 90498 }, { "epoch": 9.343485442907289, "grad_norm": 0.05215894803404808, "learning_rate": 0.01, "loss": 1.9459, "step": 90501 }, { "epoch": 9.343795168284121, "grad_norm": 0.10672825574874878, "learning_rate": 0.01, "loss": 1.9334, "step": 90504 }, { "epoch": 9.344104893660955, "grad_norm": 0.0659959614276886, "learning_rate": 0.01, "loss": 1.923, "step": 90507 }, { "epoch": 9.344414619037787, "grad_norm": 0.05211605504155159, "learning_rate": 0.01, "loss": 1.9354, "step": 90510 }, { "epoch": 9.344724344414619, "grad_norm": 0.05553697422146797, "learning_rate": 0.01, "loss": 1.95, "step": 90513 }, { "epoch": 9.345034069791451, "grad_norm": 0.03250126913189888, "learning_rate": 0.01, "loss": 1.937, "step": 90516 }, { "epoch": 9.345343795168285, "grad_norm": 0.0480884313583374, "learning_rate": 0.01, "loss": 1.9387, "step": 90519 }, { "epoch": 9.345653520545117, "grad_norm": 0.09070666134357452, "learning_rate": 0.01, "loss": 1.9287, "step": 90522 }, { "epoch": 9.345963245921949, "grad_norm": 0.07415267080068588, "learning_rate": 0.01, "loss": 1.9318, "step": 90525 }, { "epoch": 9.34627297129878, "grad_norm": 0.09671496599912643, "learning_rate": 0.01, "loss": 1.9462, "step": 90528 }, { "epoch": 9.346582696675615, "grad_norm": 0.11006847023963928, "learning_rate": 0.01, "loss": 1.9388, "step": 90531 }, { "epoch": 9.346892422052447, "grad_norm": 0.04296498745679855, "learning_rate": 0.01, "loss": 1.9375, "step": 90534 }, { "epoch": 9.347202147429279, "grad_norm": 0.15365970134735107, "learning_rate": 0.01, "loss": 1.9187, "step": 90537 }, { "epoch": 9.347511872806113, "grad_norm": 0.09222111850976944, "learning_rate": 0.01, "loss": 1.9328, "step": 90540 }, { "epoch": 9.347821598182945, "grad_norm": 0.07133835554122925, "learning_rate": 0.01, "loss": 1.9344, "step": 90543 }, { "epoch": 9.348131323559777, "grad_norm": 0.0642387866973877, "learning_rate": 0.01, "loss": 1.9567, "step": 90546 }, { "epoch": 9.348441048936609, "grad_norm": 0.040202390402555466, "learning_rate": 0.01, "loss": 1.937, "step": 90549 }, { "epoch": 9.348750774313443, "grad_norm": 0.0321086123585701, "learning_rate": 0.01, "loss": 1.9554, "step": 90552 }, { "epoch": 9.349060499690275, "grad_norm": 0.04373585432767868, "learning_rate": 0.01, "loss": 1.9442, "step": 90555 }, { "epoch": 9.349370225067107, "grad_norm": 0.0545133501291275, "learning_rate": 0.01, "loss": 1.9427, "step": 90558 }, { "epoch": 9.34967995044394, "grad_norm": 0.03567107766866684, "learning_rate": 0.01, "loss": 1.9344, "step": 90561 }, { "epoch": 9.349989675820773, "grad_norm": 0.0539405420422554, "learning_rate": 0.01, "loss": 1.9791, "step": 90564 }, { "epoch": 9.350299401197605, "grad_norm": 0.13431000709533691, "learning_rate": 0.01, "loss": 1.9482, "step": 90567 }, { "epoch": 9.350609126574437, "grad_norm": 0.12407714128494263, "learning_rate": 0.01, "loss": 1.9413, "step": 90570 }, { "epoch": 9.35091885195127, "grad_norm": 0.10649970173835754, "learning_rate": 0.01, "loss": 1.9439, "step": 90573 }, { "epoch": 9.351228577328103, "grad_norm": 0.04066113010048866, "learning_rate": 0.01, "loss": 1.9401, "step": 90576 }, { "epoch": 9.351538302704935, "grad_norm": 0.0388568751513958, "learning_rate": 0.01, "loss": 1.9333, "step": 90579 }, { "epoch": 9.351848028081768, "grad_norm": 0.03337500989437103, "learning_rate": 0.01, "loss": 1.9469, "step": 90582 }, { "epoch": 9.3521577534586, "grad_norm": 0.04804845526814461, "learning_rate": 0.01, "loss": 1.9435, "step": 90585 }, { "epoch": 9.352467478835433, "grad_norm": 0.07341484725475311, "learning_rate": 0.01, "loss": 1.9485, "step": 90588 }, { "epoch": 9.352777204212265, "grad_norm": 0.044978316873311996, "learning_rate": 0.01, "loss": 1.9346, "step": 90591 }, { "epoch": 9.353086929589098, "grad_norm": 0.10656917840242386, "learning_rate": 0.01, "loss": 1.9469, "step": 90594 }, { "epoch": 9.35339665496593, "grad_norm": 0.04323936998844147, "learning_rate": 0.01, "loss": 1.9642, "step": 90597 }, { "epoch": 9.353706380342762, "grad_norm": 0.11398027092218399, "learning_rate": 0.01, "loss": 1.9363, "step": 90600 }, { "epoch": 9.354016105719595, "grad_norm": 0.09031344950199127, "learning_rate": 0.01, "loss": 1.921, "step": 90603 }, { "epoch": 9.354325831096428, "grad_norm": 0.08325620740652084, "learning_rate": 0.01, "loss": 1.9559, "step": 90606 }, { "epoch": 9.35463555647326, "grad_norm": 0.05016699433326721, "learning_rate": 0.01, "loss": 1.9123, "step": 90609 }, { "epoch": 9.354945281850092, "grad_norm": 0.039517778903245926, "learning_rate": 0.01, "loss": 1.9316, "step": 90612 }, { "epoch": 9.355255007226926, "grad_norm": 0.031809695065021515, "learning_rate": 0.01, "loss": 1.9236, "step": 90615 }, { "epoch": 9.355564732603758, "grad_norm": 0.03748030960559845, "learning_rate": 0.01, "loss": 1.9442, "step": 90618 }, { "epoch": 9.35587445798059, "grad_norm": 0.04791229963302612, "learning_rate": 0.01, "loss": 1.9328, "step": 90621 }, { "epoch": 9.356184183357422, "grad_norm": 0.12503692507743835, "learning_rate": 0.01, "loss": 1.9259, "step": 90624 }, { "epoch": 9.356493908734256, "grad_norm": 0.08294980973005295, "learning_rate": 0.01, "loss": 1.9072, "step": 90627 }, { "epoch": 9.356803634111088, "grad_norm": 0.04816403612494469, "learning_rate": 0.01, "loss": 1.9251, "step": 90630 }, { "epoch": 9.35711335948792, "grad_norm": 0.10670629888772964, "learning_rate": 0.01, "loss": 1.9161, "step": 90633 }, { "epoch": 9.357423084864752, "grad_norm": 0.05907604843378067, "learning_rate": 0.01, "loss": 1.9444, "step": 90636 }, { "epoch": 9.357732810241586, "grad_norm": 0.03816728666424751, "learning_rate": 0.01, "loss": 1.9405, "step": 90639 }, { "epoch": 9.358042535618418, "grad_norm": 0.10674931108951569, "learning_rate": 0.01, "loss": 1.9377, "step": 90642 }, { "epoch": 9.35835226099525, "grad_norm": 0.12507742643356323, "learning_rate": 0.01, "loss": 1.9431, "step": 90645 }, { "epoch": 9.358661986372084, "grad_norm": 0.14347107708454132, "learning_rate": 0.01, "loss": 1.9577, "step": 90648 }, { "epoch": 9.358971711748916, "grad_norm": 0.1007409393787384, "learning_rate": 0.01, "loss": 1.9344, "step": 90651 }, { "epoch": 9.359281437125748, "grad_norm": 0.0874379575252533, "learning_rate": 0.01, "loss": 1.9349, "step": 90654 }, { "epoch": 9.35959116250258, "grad_norm": 0.06938479095697403, "learning_rate": 0.01, "loss": 1.9705, "step": 90657 }, { "epoch": 9.359900887879414, "grad_norm": 0.04262612760066986, "learning_rate": 0.01, "loss": 1.9228, "step": 90660 }, { "epoch": 9.360210613256246, "grad_norm": 0.060341037809848785, "learning_rate": 0.01, "loss": 1.9456, "step": 90663 }, { "epoch": 9.360520338633078, "grad_norm": 0.04864312708377838, "learning_rate": 0.01, "loss": 1.9429, "step": 90666 }, { "epoch": 9.360830064009912, "grad_norm": 0.04025941342115402, "learning_rate": 0.01, "loss": 1.9302, "step": 90669 }, { "epoch": 9.361139789386744, "grad_norm": 0.049005236476659775, "learning_rate": 0.01, "loss": 1.8861, "step": 90672 }, { "epoch": 9.361449514763576, "grad_norm": 0.04088768735527992, "learning_rate": 0.01, "loss": 1.9269, "step": 90675 }, { "epoch": 9.361759240140408, "grad_norm": 0.03597968444228172, "learning_rate": 0.01, "loss": 1.9469, "step": 90678 }, { "epoch": 9.362068965517242, "grad_norm": 0.10069102048873901, "learning_rate": 0.01, "loss": 1.9576, "step": 90681 }, { "epoch": 9.362378690894074, "grad_norm": 0.07363972812891006, "learning_rate": 0.01, "loss": 1.9261, "step": 90684 }, { "epoch": 9.362688416270906, "grad_norm": 0.052969079464673996, "learning_rate": 0.01, "loss": 1.9292, "step": 90687 }, { "epoch": 9.36299814164774, "grad_norm": 0.06485411524772644, "learning_rate": 0.01, "loss": 1.9117, "step": 90690 }, { "epoch": 9.363307867024572, "grad_norm": 0.08032330125570297, "learning_rate": 0.01, "loss": 1.943, "step": 90693 }, { "epoch": 9.363617592401404, "grad_norm": 0.08398518711328506, "learning_rate": 0.01, "loss": 1.9422, "step": 90696 }, { "epoch": 9.363927317778236, "grad_norm": 0.08669327944517136, "learning_rate": 0.01, "loss": 1.9182, "step": 90699 }, { "epoch": 9.36423704315507, "grad_norm": 0.04991896077990532, "learning_rate": 0.01, "loss": 1.9379, "step": 90702 }, { "epoch": 9.364546768531902, "grad_norm": 0.07150976359844208, "learning_rate": 0.01, "loss": 1.9215, "step": 90705 }, { "epoch": 9.364856493908734, "grad_norm": 0.08911240100860596, "learning_rate": 0.01, "loss": 1.9607, "step": 90708 }, { "epoch": 9.365166219285566, "grad_norm": 0.03794144466519356, "learning_rate": 0.01, "loss": 1.9184, "step": 90711 }, { "epoch": 9.3654759446624, "grad_norm": 0.16332855820655823, "learning_rate": 0.01, "loss": 1.9394, "step": 90714 }, { "epoch": 9.365785670039232, "grad_norm": 0.053398165851831436, "learning_rate": 0.01, "loss": 1.9606, "step": 90717 }, { "epoch": 9.366095395416064, "grad_norm": 0.10861405730247498, "learning_rate": 0.01, "loss": 1.9398, "step": 90720 }, { "epoch": 9.366405120792898, "grad_norm": 0.061357125639915466, "learning_rate": 0.01, "loss": 1.8979, "step": 90723 }, { "epoch": 9.36671484616973, "grad_norm": 0.03730995953083038, "learning_rate": 0.01, "loss": 1.9345, "step": 90726 }, { "epoch": 9.367024571546562, "grad_norm": 0.057155217975378036, "learning_rate": 0.01, "loss": 1.9278, "step": 90729 }, { "epoch": 9.367334296923394, "grad_norm": 0.04852285608649254, "learning_rate": 0.01, "loss": 1.9645, "step": 90732 }, { "epoch": 9.367644022300228, "grad_norm": 0.10618282109498978, "learning_rate": 0.01, "loss": 1.934, "step": 90735 }, { "epoch": 9.36795374767706, "grad_norm": 0.0839443951845169, "learning_rate": 0.01, "loss": 1.9413, "step": 90738 }, { "epoch": 9.368263473053892, "grad_norm": 0.05539103224873543, "learning_rate": 0.01, "loss": 1.9529, "step": 90741 }, { "epoch": 9.368573198430724, "grad_norm": 0.036319341510534286, "learning_rate": 0.01, "loss": 1.9404, "step": 90744 }, { "epoch": 9.368882923807558, "grad_norm": 0.033435918390750885, "learning_rate": 0.01, "loss": 1.9337, "step": 90747 }, { "epoch": 9.36919264918439, "grad_norm": 0.0851496085524559, "learning_rate": 0.01, "loss": 1.9513, "step": 90750 }, { "epoch": 9.369502374561222, "grad_norm": 0.037316080182790756, "learning_rate": 0.01, "loss": 1.9213, "step": 90753 }, { "epoch": 9.369812099938056, "grad_norm": 0.13374057412147522, "learning_rate": 0.01, "loss": 1.953, "step": 90756 }, { "epoch": 9.370121825314888, "grad_norm": 0.12201882153749466, "learning_rate": 0.01, "loss": 1.9445, "step": 90759 }, { "epoch": 9.37043155069172, "grad_norm": 0.07799084484577179, "learning_rate": 0.01, "loss": 1.9136, "step": 90762 }, { "epoch": 9.370741276068552, "grad_norm": 0.044005535542964935, "learning_rate": 0.01, "loss": 1.939, "step": 90765 }, { "epoch": 9.371051001445386, "grad_norm": 0.0691835880279541, "learning_rate": 0.01, "loss": 1.922, "step": 90768 }, { "epoch": 9.371360726822218, "grad_norm": 0.041414376348257065, "learning_rate": 0.01, "loss": 1.946, "step": 90771 }, { "epoch": 9.37167045219905, "grad_norm": 0.05714799463748932, "learning_rate": 0.01, "loss": 1.9403, "step": 90774 }, { "epoch": 9.371980177575884, "grad_norm": 0.0594872385263443, "learning_rate": 0.01, "loss": 1.9241, "step": 90777 }, { "epoch": 9.372289902952716, "grad_norm": 0.044780146330595016, "learning_rate": 0.01, "loss": 1.9333, "step": 90780 }, { "epoch": 9.372599628329548, "grad_norm": 0.039911285042762756, "learning_rate": 0.01, "loss": 1.9747, "step": 90783 }, { "epoch": 9.37290935370638, "grad_norm": 0.09436476230621338, "learning_rate": 0.01, "loss": 1.9265, "step": 90786 }, { "epoch": 9.373219079083214, "grad_norm": 0.0415303073823452, "learning_rate": 0.01, "loss": 1.9232, "step": 90789 }, { "epoch": 9.373528804460046, "grad_norm": 0.03091426193714142, "learning_rate": 0.01, "loss": 1.9091, "step": 90792 }, { "epoch": 9.373838529836878, "grad_norm": 0.04031349718570709, "learning_rate": 0.01, "loss": 1.9517, "step": 90795 }, { "epoch": 9.374148255213711, "grad_norm": 0.05020138993859291, "learning_rate": 0.01, "loss": 1.9234, "step": 90798 }, { "epoch": 9.374457980590543, "grad_norm": 0.07621460407972336, "learning_rate": 0.01, "loss": 1.9361, "step": 90801 }, { "epoch": 9.374767705967376, "grad_norm": 0.049681663513183594, "learning_rate": 0.01, "loss": 1.9319, "step": 90804 }, { "epoch": 9.375077431344208, "grad_norm": 0.058287203311920166, "learning_rate": 0.01, "loss": 1.9288, "step": 90807 }, { "epoch": 9.375387156721041, "grad_norm": 0.11261413246393204, "learning_rate": 0.01, "loss": 1.9409, "step": 90810 }, { "epoch": 9.375696882097873, "grad_norm": 0.13443779945373535, "learning_rate": 0.01, "loss": 1.9392, "step": 90813 }, { "epoch": 9.376006607474705, "grad_norm": 0.10898131877183914, "learning_rate": 0.01, "loss": 1.9447, "step": 90816 }, { "epoch": 9.376316332851538, "grad_norm": 0.06500370800495148, "learning_rate": 0.01, "loss": 1.9515, "step": 90819 }, { "epoch": 9.376626058228371, "grad_norm": 0.07922185212373734, "learning_rate": 0.01, "loss": 1.9523, "step": 90822 }, { "epoch": 9.376935783605203, "grad_norm": 0.05019281432032585, "learning_rate": 0.01, "loss": 1.9358, "step": 90825 }, { "epoch": 9.377245508982035, "grad_norm": 0.049235813319683075, "learning_rate": 0.01, "loss": 1.9207, "step": 90828 }, { "epoch": 9.37755523435887, "grad_norm": 0.04269546642899513, "learning_rate": 0.01, "loss": 1.9269, "step": 90831 }, { "epoch": 9.377864959735701, "grad_norm": 0.06699442118406296, "learning_rate": 0.01, "loss": 1.9389, "step": 90834 }, { "epoch": 9.378174685112533, "grad_norm": 0.034419186413288116, "learning_rate": 0.01, "loss": 1.9284, "step": 90837 }, { "epoch": 9.378484410489365, "grad_norm": 0.11870981007814407, "learning_rate": 0.01, "loss": 1.9504, "step": 90840 }, { "epoch": 9.3787941358662, "grad_norm": 0.058509461581707, "learning_rate": 0.01, "loss": 1.9286, "step": 90843 }, { "epoch": 9.379103861243031, "grad_norm": 0.12032335996627808, "learning_rate": 0.01, "loss": 1.953, "step": 90846 }, { "epoch": 9.379413586619863, "grad_norm": 0.05439126119017601, "learning_rate": 0.01, "loss": 1.9292, "step": 90849 }, { "epoch": 9.379723311996695, "grad_norm": 0.03926823288202286, "learning_rate": 0.01, "loss": 1.9351, "step": 90852 }, { "epoch": 9.38003303737353, "grad_norm": 0.03611249104142189, "learning_rate": 0.01, "loss": 1.9186, "step": 90855 }, { "epoch": 9.380342762750361, "grad_norm": 0.045519620180130005, "learning_rate": 0.01, "loss": 1.9163, "step": 90858 }, { "epoch": 9.380652488127193, "grad_norm": 0.05714515969157219, "learning_rate": 0.01, "loss": 1.9381, "step": 90861 }, { "epoch": 9.380962213504027, "grad_norm": 0.06265503913164139, "learning_rate": 0.01, "loss": 1.9345, "step": 90864 }, { "epoch": 9.38127193888086, "grad_norm": 0.15718643367290497, "learning_rate": 0.01, "loss": 1.9444, "step": 90867 }, { "epoch": 9.381581664257691, "grad_norm": 0.13615955412387848, "learning_rate": 0.01, "loss": 1.9487, "step": 90870 }, { "epoch": 9.381891389634523, "grad_norm": 0.09134041517972946, "learning_rate": 0.01, "loss": 1.9316, "step": 90873 }, { "epoch": 9.382201115011357, "grad_norm": 0.09111947566270828, "learning_rate": 0.01, "loss": 1.972, "step": 90876 }, { "epoch": 9.38251084038819, "grad_norm": 0.050355955958366394, "learning_rate": 0.01, "loss": 1.9429, "step": 90879 }, { "epoch": 9.382820565765021, "grad_norm": 0.03346274793148041, "learning_rate": 0.01, "loss": 1.9526, "step": 90882 }, { "epoch": 9.383130291141855, "grad_norm": 0.10060890763998032, "learning_rate": 0.01, "loss": 1.9264, "step": 90885 }, { "epoch": 9.383440016518687, "grad_norm": 0.07907859981060028, "learning_rate": 0.01, "loss": 1.9366, "step": 90888 }, { "epoch": 9.38374974189552, "grad_norm": 0.06177898123860359, "learning_rate": 0.01, "loss": 1.9381, "step": 90891 }, { "epoch": 9.384059467272351, "grad_norm": 0.033914852887392044, "learning_rate": 0.01, "loss": 1.9172, "step": 90894 }, { "epoch": 9.384369192649185, "grad_norm": 0.03818202018737793, "learning_rate": 0.01, "loss": 1.9446, "step": 90897 }, { "epoch": 9.384678918026017, "grad_norm": 0.09121721982955933, "learning_rate": 0.01, "loss": 1.9568, "step": 90900 }, { "epoch": 9.384988643402849, "grad_norm": 0.089520663022995, "learning_rate": 0.01, "loss": 1.9545, "step": 90903 }, { "epoch": 9.385298368779681, "grad_norm": 0.06648974865674973, "learning_rate": 0.01, "loss": 1.9364, "step": 90906 }, { "epoch": 9.385608094156515, "grad_norm": 0.07574710994958878, "learning_rate": 0.01, "loss": 1.8943, "step": 90909 }, { "epoch": 9.385917819533347, "grad_norm": 0.046774134039878845, "learning_rate": 0.01, "loss": 1.9376, "step": 90912 }, { "epoch": 9.386227544910179, "grad_norm": 0.03651203215122223, "learning_rate": 0.01, "loss": 1.9439, "step": 90915 }, { "epoch": 9.386537270287013, "grad_norm": 0.07176890969276428, "learning_rate": 0.01, "loss": 1.95, "step": 90918 }, { "epoch": 9.386846995663845, "grad_norm": 0.10504325479269028, "learning_rate": 0.01, "loss": 1.9161, "step": 90921 }, { "epoch": 9.387156721040677, "grad_norm": 0.04525873437523842, "learning_rate": 0.01, "loss": 1.9278, "step": 90924 }, { "epoch": 9.387466446417509, "grad_norm": 0.03573272004723549, "learning_rate": 0.01, "loss": 1.9314, "step": 90927 }, { "epoch": 9.387776171794343, "grad_norm": 0.03916339576244354, "learning_rate": 0.01, "loss": 1.931, "step": 90930 }, { "epoch": 9.388085897171175, "grad_norm": 0.05404656380414963, "learning_rate": 0.01, "loss": 1.9022, "step": 90933 }, { "epoch": 9.388395622548007, "grad_norm": 0.12576231360435486, "learning_rate": 0.01, "loss": 1.9209, "step": 90936 }, { "epoch": 9.38870534792484, "grad_norm": 0.048078350722789764, "learning_rate": 0.01, "loss": 1.9333, "step": 90939 }, { "epoch": 9.389015073301673, "grad_norm": 0.102527916431427, "learning_rate": 0.01, "loss": 1.9183, "step": 90942 }, { "epoch": 9.389324798678505, "grad_norm": 0.058806050568819046, "learning_rate": 0.01, "loss": 1.9532, "step": 90945 }, { "epoch": 9.389634524055337, "grad_norm": 0.07547862827777863, "learning_rate": 0.01, "loss": 1.9191, "step": 90948 }, { "epoch": 9.38994424943217, "grad_norm": 0.08734951913356781, "learning_rate": 0.01, "loss": 1.9389, "step": 90951 }, { "epoch": 9.390253974809003, "grad_norm": 0.08816061913967133, "learning_rate": 0.01, "loss": 1.9353, "step": 90954 }, { "epoch": 9.390563700185835, "grad_norm": 0.05598800256848335, "learning_rate": 0.01, "loss": 1.9357, "step": 90957 }, { "epoch": 9.390873425562667, "grad_norm": 0.06532081216573715, "learning_rate": 0.01, "loss": 1.9427, "step": 90960 }, { "epoch": 9.3911831509395, "grad_norm": 0.07077542692422867, "learning_rate": 0.01, "loss": 1.9398, "step": 90963 }, { "epoch": 9.391492876316333, "grad_norm": 0.05597352236509323, "learning_rate": 0.01, "loss": 1.9187, "step": 90966 }, { "epoch": 9.391802601693165, "grad_norm": 0.07413847744464874, "learning_rate": 0.01, "loss": 1.9453, "step": 90969 }, { "epoch": 9.392112327069999, "grad_norm": 0.037031251937150955, "learning_rate": 0.01, "loss": 1.95, "step": 90972 }, { "epoch": 9.39242205244683, "grad_norm": 0.06918266415596008, "learning_rate": 0.01, "loss": 1.9733, "step": 90975 }, { "epoch": 9.392731777823663, "grad_norm": 0.0500267818570137, "learning_rate": 0.01, "loss": 1.917, "step": 90978 }, { "epoch": 9.393041503200495, "grad_norm": 0.03715291619300842, "learning_rate": 0.01, "loss": 1.9514, "step": 90981 }, { "epoch": 9.393351228577329, "grad_norm": 0.050200916826725006, "learning_rate": 0.01, "loss": 1.9624, "step": 90984 }, { "epoch": 9.39366095395416, "grad_norm": 0.08894219994544983, "learning_rate": 0.01, "loss": 1.9355, "step": 90987 }, { "epoch": 9.393970679330993, "grad_norm": 0.08678343892097473, "learning_rate": 0.01, "loss": 1.9243, "step": 90990 }, { "epoch": 9.394280404707827, "grad_norm": 0.09989691525697708, "learning_rate": 0.01, "loss": 1.9497, "step": 90993 }, { "epoch": 9.394590130084659, "grad_norm": 0.12368086725473404, "learning_rate": 0.01, "loss": 1.952, "step": 90996 }, { "epoch": 9.39489985546149, "grad_norm": 0.04987645894289017, "learning_rate": 0.01, "loss": 1.9591, "step": 90999 }, { "epoch": 9.395209580838323, "grad_norm": 0.03483951464295387, "learning_rate": 0.01, "loss": 1.9252, "step": 91002 }, { "epoch": 9.395519306215157, "grad_norm": 0.044155314564704895, "learning_rate": 0.01, "loss": 1.941, "step": 91005 }, { "epoch": 9.395829031591989, "grad_norm": 0.04584622010588646, "learning_rate": 0.01, "loss": 1.9525, "step": 91008 }, { "epoch": 9.39613875696882, "grad_norm": 0.04356890171766281, "learning_rate": 0.01, "loss": 1.9179, "step": 91011 }, { "epoch": 9.396448482345653, "grad_norm": 0.03459366783499718, "learning_rate": 0.01, "loss": 1.9502, "step": 91014 }, { "epoch": 9.396758207722486, "grad_norm": 0.0733804851770401, "learning_rate": 0.01, "loss": 1.9341, "step": 91017 }, { "epoch": 9.397067933099319, "grad_norm": 0.10501904040575027, "learning_rate": 0.01, "loss": 1.9544, "step": 91020 }, { "epoch": 9.39737765847615, "grad_norm": 0.08252866566181183, "learning_rate": 0.01, "loss": 1.9303, "step": 91023 }, { "epoch": 9.397687383852984, "grad_norm": 0.11142603307962418, "learning_rate": 0.01, "loss": 1.9451, "step": 91026 }, { "epoch": 9.397997109229816, "grad_norm": 0.06776826083660126, "learning_rate": 0.01, "loss": 1.9425, "step": 91029 }, { "epoch": 9.398306834606649, "grad_norm": 0.055962324142456055, "learning_rate": 0.01, "loss": 1.9551, "step": 91032 }, { "epoch": 9.39861655998348, "grad_norm": 0.10423536598682404, "learning_rate": 0.01, "loss": 1.9582, "step": 91035 }, { "epoch": 9.398926285360314, "grad_norm": 0.03981839492917061, "learning_rate": 0.01, "loss": 1.9488, "step": 91038 }, { "epoch": 9.399236010737146, "grad_norm": 0.06059722229838371, "learning_rate": 0.01, "loss": 1.9248, "step": 91041 }, { "epoch": 9.399545736113978, "grad_norm": 0.10510721802711487, "learning_rate": 0.01, "loss": 1.9107, "step": 91044 }, { "epoch": 9.399855461490812, "grad_norm": 0.07276123762130737, "learning_rate": 0.01, "loss": 1.9341, "step": 91047 }, { "epoch": 9.400165186867644, "grad_norm": 0.06601347774267197, "learning_rate": 0.01, "loss": 1.9284, "step": 91050 }, { "epoch": 9.400474912244476, "grad_norm": 0.11832351237535477, "learning_rate": 0.01, "loss": 1.9271, "step": 91053 }, { "epoch": 9.400784637621308, "grad_norm": 0.056920114904642105, "learning_rate": 0.01, "loss": 1.941, "step": 91056 }, { "epoch": 9.401094362998142, "grad_norm": 0.03529761731624603, "learning_rate": 0.01, "loss": 1.948, "step": 91059 }, { "epoch": 9.401404088374974, "grad_norm": 0.04953724890947342, "learning_rate": 0.01, "loss": 1.9391, "step": 91062 }, { "epoch": 9.401713813751806, "grad_norm": 0.11988922953605652, "learning_rate": 0.01, "loss": 1.9161, "step": 91065 }, { "epoch": 9.402023539128638, "grad_norm": 0.06321597844362259, "learning_rate": 0.01, "loss": 1.959, "step": 91068 }, { "epoch": 9.402333264505472, "grad_norm": 0.05112435296177864, "learning_rate": 0.01, "loss": 1.9416, "step": 91071 }, { "epoch": 9.402642989882304, "grad_norm": 0.12304303050041199, "learning_rate": 0.01, "loss": 1.9726, "step": 91074 }, { "epoch": 9.402952715259136, "grad_norm": 0.038611236959695816, "learning_rate": 0.01, "loss": 1.9523, "step": 91077 }, { "epoch": 9.40326244063597, "grad_norm": 0.04994663596153259, "learning_rate": 0.01, "loss": 1.9548, "step": 91080 }, { "epoch": 9.403572166012802, "grad_norm": 0.034984659403562546, "learning_rate": 0.01, "loss": 1.9414, "step": 91083 }, { "epoch": 9.403881891389634, "grad_norm": 0.034549251198768616, "learning_rate": 0.01, "loss": 1.9291, "step": 91086 }, { "epoch": 9.404191616766466, "grad_norm": 0.09396814554929733, "learning_rate": 0.01, "loss": 1.9181, "step": 91089 }, { "epoch": 9.4045013421433, "grad_norm": 0.05054985359311104, "learning_rate": 0.01, "loss": 1.9359, "step": 91092 }, { "epoch": 9.404811067520132, "grad_norm": 0.0785905048251152, "learning_rate": 0.01, "loss": 1.9431, "step": 91095 }, { "epoch": 9.405120792896964, "grad_norm": 0.0745377391576767, "learning_rate": 0.01, "loss": 1.9362, "step": 91098 }, { "epoch": 9.405430518273798, "grad_norm": 0.05711502581834793, "learning_rate": 0.01, "loss": 1.9412, "step": 91101 }, { "epoch": 9.40574024365063, "grad_norm": 0.13965703547000885, "learning_rate": 0.01, "loss": 1.959, "step": 91104 }, { "epoch": 9.406049969027462, "grad_norm": 0.09781201183795929, "learning_rate": 0.01, "loss": 1.969, "step": 91107 }, { "epoch": 9.406359694404294, "grad_norm": 0.04011009633541107, "learning_rate": 0.01, "loss": 1.9271, "step": 91110 }, { "epoch": 9.406669419781128, "grad_norm": 0.06505788117647171, "learning_rate": 0.01, "loss": 1.9516, "step": 91113 }, { "epoch": 9.40697914515796, "grad_norm": 0.08113677054643631, "learning_rate": 0.01, "loss": 1.9146, "step": 91116 }, { "epoch": 9.407288870534792, "grad_norm": 0.12400487810373306, "learning_rate": 0.01, "loss": 1.9421, "step": 91119 }, { "epoch": 9.407598595911624, "grad_norm": 0.08019889891147614, "learning_rate": 0.01, "loss": 1.935, "step": 91122 }, { "epoch": 9.407908321288458, "grad_norm": 0.04559165611863136, "learning_rate": 0.01, "loss": 1.9284, "step": 91125 }, { "epoch": 9.40821804666529, "grad_norm": 0.10827832669019699, "learning_rate": 0.01, "loss": 1.9131, "step": 91128 }, { "epoch": 9.408527772042122, "grad_norm": 0.04035136103630066, "learning_rate": 0.01, "loss": 1.9616, "step": 91131 }, { "epoch": 9.408837497418956, "grad_norm": 0.03153857961297035, "learning_rate": 0.01, "loss": 1.937, "step": 91134 }, { "epoch": 9.409147222795788, "grad_norm": 0.07644420862197876, "learning_rate": 0.01, "loss": 1.8987, "step": 91137 }, { "epoch": 9.40945694817262, "grad_norm": 0.10326854139566422, "learning_rate": 0.01, "loss": 1.9602, "step": 91140 }, { "epoch": 9.409766673549452, "grad_norm": 0.10630738735198975, "learning_rate": 0.01, "loss": 1.9663, "step": 91143 }, { "epoch": 9.410076398926286, "grad_norm": 0.10723258554935455, "learning_rate": 0.01, "loss": 1.9376, "step": 91146 }, { "epoch": 9.410386124303118, "grad_norm": 0.07178448140621185, "learning_rate": 0.01, "loss": 1.9359, "step": 91149 }, { "epoch": 9.41069584967995, "grad_norm": 0.039453644305467606, "learning_rate": 0.01, "loss": 1.9373, "step": 91152 }, { "epoch": 9.411005575056784, "grad_norm": 0.04125725105404854, "learning_rate": 0.01, "loss": 1.9385, "step": 91155 }, { "epoch": 9.411315300433616, "grad_norm": 0.07123439013957977, "learning_rate": 0.01, "loss": 1.9149, "step": 91158 }, { "epoch": 9.411625025810448, "grad_norm": 0.07479481399059296, "learning_rate": 0.01, "loss": 1.9344, "step": 91161 }, { "epoch": 9.41193475118728, "grad_norm": 0.062062572687864304, "learning_rate": 0.01, "loss": 1.9573, "step": 91164 }, { "epoch": 9.412244476564114, "grad_norm": 0.09150844812393188, "learning_rate": 0.01, "loss": 1.9375, "step": 91167 }, { "epoch": 9.412554201940946, "grad_norm": 0.06751282513141632, "learning_rate": 0.01, "loss": 1.9499, "step": 91170 }, { "epoch": 9.412863927317778, "grad_norm": 0.09919881075620651, "learning_rate": 0.01, "loss": 1.9146, "step": 91173 }, { "epoch": 9.41317365269461, "grad_norm": 0.11017844080924988, "learning_rate": 0.01, "loss": 1.9135, "step": 91176 }, { "epoch": 9.413483378071444, "grad_norm": 0.07611265033483505, "learning_rate": 0.01, "loss": 1.9296, "step": 91179 }, { "epoch": 9.413793103448276, "grad_norm": 0.10450519621372223, "learning_rate": 0.01, "loss": 1.9504, "step": 91182 }, { "epoch": 9.414102828825108, "grad_norm": 0.05414097383618355, "learning_rate": 0.01, "loss": 1.9282, "step": 91185 }, { "epoch": 9.414412554201942, "grad_norm": 0.04041386395692825, "learning_rate": 0.01, "loss": 1.9212, "step": 91188 }, { "epoch": 9.414722279578774, "grad_norm": 0.0727272480726242, "learning_rate": 0.01, "loss": 1.9029, "step": 91191 }, { "epoch": 9.415032004955606, "grad_norm": 0.06471801549196243, "learning_rate": 0.01, "loss": 1.9532, "step": 91194 }, { "epoch": 9.415341730332438, "grad_norm": 0.04700074717402458, "learning_rate": 0.01, "loss": 1.9456, "step": 91197 }, { "epoch": 9.415651455709272, "grad_norm": 0.04389588534832001, "learning_rate": 0.01, "loss": 1.9171, "step": 91200 }, { "epoch": 9.415961181086104, "grad_norm": 0.08153852075338364, "learning_rate": 0.01, "loss": 1.9152, "step": 91203 }, { "epoch": 9.416270906462936, "grad_norm": 0.048129767179489136, "learning_rate": 0.01, "loss": 1.9456, "step": 91206 }, { "epoch": 9.41658063183977, "grad_norm": 0.057881567627191544, "learning_rate": 0.01, "loss": 1.9334, "step": 91209 }, { "epoch": 9.416890357216602, "grad_norm": 0.040281008929014206, "learning_rate": 0.01, "loss": 1.9343, "step": 91212 }, { "epoch": 9.417200082593434, "grad_norm": 0.06497588753700256, "learning_rate": 0.01, "loss": 1.9327, "step": 91215 }, { "epoch": 9.417509807970266, "grad_norm": 0.045624736696481705, "learning_rate": 0.01, "loss": 1.9255, "step": 91218 }, { "epoch": 9.4178195333471, "grad_norm": 0.037451039999723434, "learning_rate": 0.01, "loss": 1.9044, "step": 91221 }, { "epoch": 9.418129258723932, "grad_norm": 0.13164624571800232, "learning_rate": 0.01, "loss": 1.9421, "step": 91224 }, { "epoch": 9.418438984100764, "grad_norm": 0.09604440629482269, "learning_rate": 0.01, "loss": 1.951, "step": 91227 }, { "epoch": 9.418748709477596, "grad_norm": 0.07141084223985672, "learning_rate": 0.01, "loss": 1.9331, "step": 91230 }, { "epoch": 9.41905843485443, "grad_norm": 0.11629889160394669, "learning_rate": 0.01, "loss": 1.9453, "step": 91233 }, { "epoch": 9.419368160231262, "grad_norm": 0.07452220469713211, "learning_rate": 0.01, "loss": 1.9458, "step": 91236 }, { "epoch": 9.419677885608094, "grad_norm": 0.04817759245634079, "learning_rate": 0.01, "loss": 1.9326, "step": 91239 }, { "epoch": 9.419987610984927, "grad_norm": 0.053956806659698486, "learning_rate": 0.01, "loss": 1.9297, "step": 91242 }, { "epoch": 9.42029733636176, "grad_norm": 0.053349558264017105, "learning_rate": 0.01, "loss": 1.9301, "step": 91245 }, { "epoch": 9.420607061738592, "grad_norm": 0.03342839330434799, "learning_rate": 0.01, "loss": 1.922, "step": 91248 }, { "epoch": 9.420916787115424, "grad_norm": 0.07104140520095825, "learning_rate": 0.01, "loss": 1.9195, "step": 91251 }, { "epoch": 9.421226512492257, "grad_norm": 0.10545811057090759, "learning_rate": 0.01, "loss": 1.9505, "step": 91254 }, { "epoch": 9.42153623786909, "grad_norm": 0.07372496277093887, "learning_rate": 0.01, "loss": 1.947, "step": 91257 }, { "epoch": 9.421845963245921, "grad_norm": 0.06759019941091537, "learning_rate": 0.01, "loss": 1.9466, "step": 91260 }, { "epoch": 9.422155688622755, "grad_norm": 0.08602011948823929, "learning_rate": 0.01, "loss": 1.9106, "step": 91263 }, { "epoch": 9.422465413999587, "grad_norm": 0.04749952629208565, "learning_rate": 0.01, "loss": 1.9507, "step": 91266 }, { "epoch": 9.42277513937642, "grad_norm": 0.04223652184009552, "learning_rate": 0.01, "loss": 1.938, "step": 91269 }, { "epoch": 9.423084864753251, "grad_norm": 0.08851899951696396, "learning_rate": 0.01, "loss": 1.9651, "step": 91272 }, { "epoch": 9.423394590130085, "grad_norm": 0.04346892237663269, "learning_rate": 0.01, "loss": 1.9352, "step": 91275 }, { "epoch": 9.423704315506917, "grad_norm": 0.042326390743255615, "learning_rate": 0.01, "loss": 1.9294, "step": 91278 }, { "epoch": 9.42401404088375, "grad_norm": 0.10447720438241959, "learning_rate": 0.01, "loss": 1.9286, "step": 91281 }, { "epoch": 9.424323766260581, "grad_norm": 0.04779525101184845, "learning_rate": 0.01, "loss": 1.9422, "step": 91284 }, { "epoch": 9.424633491637415, "grad_norm": 0.05690819025039673, "learning_rate": 0.01, "loss": 1.9234, "step": 91287 }, { "epoch": 9.424943217014247, "grad_norm": 0.05316159501671791, "learning_rate": 0.01, "loss": 1.9431, "step": 91290 }, { "epoch": 9.42525294239108, "grad_norm": 0.09380586445331573, "learning_rate": 0.01, "loss": 1.9137, "step": 91293 }, { "epoch": 9.425562667767913, "grad_norm": 0.04061369597911835, "learning_rate": 0.01, "loss": 1.9565, "step": 91296 }, { "epoch": 9.425872393144745, "grad_norm": 0.052185557782649994, "learning_rate": 0.01, "loss": 1.9204, "step": 91299 }, { "epoch": 9.426182118521577, "grad_norm": 0.05126102641224861, "learning_rate": 0.01, "loss": 1.9125, "step": 91302 }, { "epoch": 9.42649184389841, "grad_norm": 0.037692565470933914, "learning_rate": 0.01, "loss": 1.9343, "step": 91305 }, { "epoch": 9.426801569275243, "grad_norm": 0.03374462202191353, "learning_rate": 0.01, "loss": 1.9327, "step": 91308 }, { "epoch": 9.427111294652075, "grad_norm": 0.03585955500602722, "learning_rate": 0.01, "loss": 1.9419, "step": 91311 }, { "epoch": 9.427421020028907, "grad_norm": 0.10094792395830154, "learning_rate": 0.01, "loss": 1.9419, "step": 91314 }, { "epoch": 9.427730745405741, "grad_norm": 0.10577898472547531, "learning_rate": 0.01, "loss": 1.9079, "step": 91317 }, { "epoch": 9.428040470782573, "grad_norm": 0.09183473140001297, "learning_rate": 0.01, "loss": 1.9279, "step": 91320 }, { "epoch": 9.428350196159405, "grad_norm": 0.05396624654531479, "learning_rate": 0.01, "loss": 1.9682, "step": 91323 }, { "epoch": 9.428659921536237, "grad_norm": 0.035900574177503586, "learning_rate": 0.01, "loss": 1.9553, "step": 91326 }, { "epoch": 9.428969646913071, "grad_norm": 0.07058069109916687, "learning_rate": 0.01, "loss": 1.9411, "step": 91329 }, { "epoch": 9.429279372289903, "grad_norm": 0.11194363981485367, "learning_rate": 0.01, "loss": 1.9236, "step": 91332 }, { "epoch": 9.429589097666735, "grad_norm": 0.06465640664100647, "learning_rate": 0.01, "loss": 1.9409, "step": 91335 }, { "epoch": 9.429898823043567, "grad_norm": 0.09346874803304672, "learning_rate": 0.01, "loss": 1.9195, "step": 91338 }, { "epoch": 9.430208548420401, "grad_norm": 0.05075174570083618, "learning_rate": 0.01, "loss": 1.9265, "step": 91341 }, { "epoch": 9.430518273797233, "grad_norm": 0.04691362753510475, "learning_rate": 0.01, "loss": 1.9135, "step": 91344 }, { "epoch": 9.430827999174065, "grad_norm": 0.05307089909911156, "learning_rate": 0.01, "loss": 1.9548, "step": 91347 }, { "epoch": 9.431137724550899, "grad_norm": 0.04385623335838318, "learning_rate": 0.01, "loss": 1.925, "step": 91350 }, { "epoch": 9.431447449927731, "grad_norm": 0.14806324243545532, "learning_rate": 0.01, "loss": 1.9296, "step": 91353 }, { "epoch": 9.431757175304563, "grad_norm": 0.07075516134500504, "learning_rate": 0.01, "loss": 1.9474, "step": 91356 }, { "epoch": 9.432066900681395, "grad_norm": 0.03600502386689186, "learning_rate": 0.01, "loss": 1.9408, "step": 91359 }, { "epoch": 9.432376626058229, "grad_norm": 0.05355400964617729, "learning_rate": 0.01, "loss": 1.9427, "step": 91362 }, { "epoch": 9.432686351435061, "grad_norm": 0.0357879213988781, "learning_rate": 0.01, "loss": 1.9171, "step": 91365 }, { "epoch": 9.432996076811893, "grad_norm": 0.03480469807982445, "learning_rate": 0.01, "loss": 1.936, "step": 91368 }, { "epoch": 9.433305802188727, "grad_norm": 0.07176405936479568, "learning_rate": 0.01, "loss": 1.9236, "step": 91371 }, { "epoch": 9.433615527565559, "grad_norm": 0.06469430774450302, "learning_rate": 0.01, "loss": 1.9406, "step": 91374 }, { "epoch": 9.433925252942391, "grad_norm": 0.06961306929588318, "learning_rate": 0.01, "loss": 1.9309, "step": 91377 }, { "epoch": 9.434234978319223, "grad_norm": 0.04767259955406189, "learning_rate": 0.01, "loss": 1.9533, "step": 91380 }, { "epoch": 9.434544703696057, "grad_norm": 0.03745473176240921, "learning_rate": 0.01, "loss": 1.9656, "step": 91383 }, { "epoch": 9.434854429072889, "grad_norm": 0.054868750274181366, "learning_rate": 0.01, "loss": 1.9373, "step": 91386 }, { "epoch": 9.435164154449721, "grad_norm": 0.12046508491039276, "learning_rate": 0.01, "loss": 1.9532, "step": 91389 }, { "epoch": 9.435473879826553, "grad_norm": 0.1015070453286171, "learning_rate": 0.01, "loss": 1.9604, "step": 91392 }, { "epoch": 9.435783605203387, "grad_norm": 0.04544771835207939, "learning_rate": 0.01, "loss": 1.923, "step": 91395 }, { "epoch": 9.436093330580219, "grad_norm": 0.04002662003040314, "learning_rate": 0.01, "loss": 1.9574, "step": 91398 }, { "epoch": 9.43640305595705, "grad_norm": 0.04304361715912819, "learning_rate": 0.01, "loss": 1.9455, "step": 91401 }, { "epoch": 9.436712781333885, "grad_norm": 0.03594297543168068, "learning_rate": 0.01, "loss": 1.9468, "step": 91404 }, { "epoch": 9.437022506710717, "grad_norm": 0.05000785365700722, "learning_rate": 0.01, "loss": 1.9451, "step": 91407 }, { "epoch": 9.437332232087549, "grad_norm": 0.06557739526033401, "learning_rate": 0.01, "loss": 1.9381, "step": 91410 }, { "epoch": 9.43764195746438, "grad_norm": 0.041119322180747986, "learning_rate": 0.01, "loss": 1.9541, "step": 91413 }, { "epoch": 9.437951682841215, "grad_norm": 0.054089631885290146, "learning_rate": 0.01, "loss": 1.9279, "step": 91416 }, { "epoch": 9.438261408218047, "grad_norm": 0.05677805095911026, "learning_rate": 0.01, "loss": 1.9741, "step": 91419 }, { "epoch": 9.438571133594879, "grad_norm": 0.032810989767313004, "learning_rate": 0.01, "loss": 1.9195, "step": 91422 }, { "epoch": 9.438880858971713, "grad_norm": 0.12531718611717224, "learning_rate": 0.01, "loss": 1.9474, "step": 91425 }, { "epoch": 9.439190584348545, "grad_norm": 0.10076866298913956, "learning_rate": 0.01, "loss": 1.9458, "step": 91428 }, { "epoch": 9.439500309725377, "grad_norm": 0.07036837190389633, "learning_rate": 0.01, "loss": 1.9378, "step": 91431 }, { "epoch": 9.439810035102209, "grad_norm": 0.10938290506601334, "learning_rate": 0.01, "loss": 1.9345, "step": 91434 }, { "epoch": 9.440119760479043, "grad_norm": 0.10378368198871613, "learning_rate": 0.01, "loss": 1.9406, "step": 91437 }, { "epoch": 9.440429485855875, "grad_norm": 0.07203853875398636, "learning_rate": 0.01, "loss": 1.9584, "step": 91440 }, { "epoch": 9.440739211232707, "grad_norm": 0.04642992094159126, "learning_rate": 0.01, "loss": 1.9339, "step": 91443 }, { "epoch": 9.441048936609539, "grad_norm": 0.03274267911911011, "learning_rate": 0.01, "loss": 1.9299, "step": 91446 }, { "epoch": 9.441358661986373, "grad_norm": 0.03794633969664574, "learning_rate": 0.01, "loss": 1.9114, "step": 91449 }, { "epoch": 9.441668387363205, "grad_norm": 0.045492980629205704, "learning_rate": 0.01, "loss": 1.9272, "step": 91452 }, { "epoch": 9.441978112740037, "grad_norm": 0.09170827269554138, "learning_rate": 0.01, "loss": 1.9577, "step": 91455 }, { "epoch": 9.44228783811687, "grad_norm": 0.07204066962003708, "learning_rate": 0.01, "loss": 1.9177, "step": 91458 }, { "epoch": 9.442597563493702, "grad_norm": 0.07380224019289017, "learning_rate": 0.01, "loss": 1.9558, "step": 91461 }, { "epoch": 9.442907288870535, "grad_norm": 0.05300910398364067, "learning_rate": 0.01, "loss": 1.9213, "step": 91464 }, { "epoch": 9.443217014247367, "grad_norm": 0.046401552855968475, "learning_rate": 0.01, "loss": 1.9386, "step": 91467 }, { "epoch": 9.4435267396242, "grad_norm": 0.039238277822732925, "learning_rate": 0.01, "loss": 1.9195, "step": 91470 }, { "epoch": 9.443836465001032, "grad_norm": 0.13839541375637054, "learning_rate": 0.01, "loss": 1.9234, "step": 91473 }, { "epoch": 9.444146190377865, "grad_norm": 0.06052450090646744, "learning_rate": 0.01, "loss": 1.9383, "step": 91476 }, { "epoch": 9.444455915754698, "grad_norm": 0.05661856010556221, "learning_rate": 0.01, "loss": 1.9279, "step": 91479 }, { "epoch": 9.44476564113153, "grad_norm": 0.08223719894886017, "learning_rate": 0.01, "loss": 1.9229, "step": 91482 }, { "epoch": 9.445075366508362, "grad_norm": 0.04421024024486542, "learning_rate": 0.01, "loss": 1.9347, "step": 91485 }, { "epoch": 9.445385091885194, "grad_norm": 0.0505010187625885, "learning_rate": 0.01, "loss": 1.9389, "step": 91488 }, { "epoch": 9.445694817262028, "grad_norm": 0.04484516754746437, "learning_rate": 0.01, "loss": 1.9483, "step": 91491 }, { "epoch": 9.44600454263886, "grad_norm": 0.05021611973643303, "learning_rate": 0.01, "loss": 1.9438, "step": 91494 }, { "epoch": 9.446314268015692, "grad_norm": 0.03842268884181976, "learning_rate": 0.01, "loss": 1.9373, "step": 91497 }, { "epoch": 9.446623993392524, "grad_norm": 0.12103386968374252, "learning_rate": 0.01, "loss": 1.9391, "step": 91500 }, { "epoch": 9.446933718769358, "grad_norm": 0.054335884749889374, "learning_rate": 0.01, "loss": 1.9484, "step": 91503 }, { "epoch": 9.44724344414619, "grad_norm": 0.05245980620384216, "learning_rate": 0.01, "loss": 1.9555, "step": 91506 }, { "epoch": 9.447553169523022, "grad_norm": 0.05430982634425163, "learning_rate": 0.01, "loss": 1.9609, "step": 91509 }, { "epoch": 9.447862894899856, "grad_norm": 0.04710724577307701, "learning_rate": 0.01, "loss": 1.9159, "step": 91512 }, { "epoch": 9.448172620276688, "grad_norm": 0.03400622680783272, "learning_rate": 0.01, "loss": 1.9458, "step": 91515 }, { "epoch": 9.44848234565352, "grad_norm": 0.05350743234157562, "learning_rate": 0.01, "loss": 1.9164, "step": 91518 }, { "epoch": 9.448792071030352, "grad_norm": 0.05976387858390808, "learning_rate": 0.01, "loss": 1.9314, "step": 91521 }, { "epoch": 9.449101796407186, "grad_norm": 0.07914312928915024, "learning_rate": 0.01, "loss": 1.9245, "step": 91524 }, { "epoch": 9.449411521784018, "grad_norm": 0.07157033681869507, "learning_rate": 0.01, "loss": 1.9057, "step": 91527 }, { "epoch": 9.44972124716085, "grad_norm": 0.05218801274895668, "learning_rate": 0.01, "loss": 1.9127, "step": 91530 }, { "epoch": 9.450030972537684, "grad_norm": 0.04557066783308983, "learning_rate": 0.01, "loss": 1.9296, "step": 91533 }, { "epoch": 9.450340697914516, "grad_norm": 0.05901813507080078, "learning_rate": 0.01, "loss": 1.8917, "step": 91536 }, { "epoch": 9.450650423291348, "grad_norm": 0.10986040532588959, "learning_rate": 0.01, "loss": 1.9529, "step": 91539 }, { "epoch": 9.45096014866818, "grad_norm": 0.03527398407459259, "learning_rate": 0.01, "loss": 1.9477, "step": 91542 }, { "epoch": 9.451269874045014, "grad_norm": 0.12370608001947403, "learning_rate": 0.01, "loss": 1.9468, "step": 91545 }, { "epoch": 9.451579599421846, "grad_norm": 0.06623967736959457, "learning_rate": 0.01, "loss": 1.9546, "step": 91548 }, { "epoch": 9.451889324798678, "grad_norm": 0.04259668290615082, "learning_rate": 0.01, "loss": 1.9428, "step": 91551 }, { "epoch": 9.45219905017551, "grad_norm": 0.04617312550544739, "learning_rate": 0.01, "loss": 1.9472, "step": 91554 }, { "epoch": 9.452508775552344, "grad_norm": 0.044426169246435165, "learning_rate": 0.01, "loss": 1.9277, "step": 91557 }, { "epoch": 9.452818500929176, "grad_norm": 0.05939152091741562, "learning_rate": 0.01, "loss": 1.9431, "step": 91560 }, { "epoch": 9.453128226306008, "grad_norm": 0.05148043483495712, "learning_rate": 0.01, "loss": 1.9553, "step": 91563 }, { "epoch": 9.453437951682842, "grad_norm": 0.051897548139095306, "learning_rate": 0.01, "loss": 1.9358, "step": 91566 }, { "epoch": 9.453747677059674, "grad_norm": 0.04463929310441017, "learning_rate": 0.01, "loss": 1.9264, "step": 91569 }, { "epoch": 9.454057402436506, "grad_norm": 0.058007046580314636, "learning_rate": 0.01, "loss": 1.9142, "step": 91572 }, { "epoch": 9.454367127813338, "grad_norm": 0.10628005117177963, "learning_rate": 0.01, "loss": 1.9338, "step": 91575 }, { "epoch": 9.454676853190172, "grad_norm": 0.15172232687473297, "learning_rate": 0.01, "loss": 1.9483, "step": 91578 }, { "epoch": 9.454986578567004, "grad_norm": 0.11548087745904922, "learning_rate": 0.01, "loss": 1.9275, "step": 91581 }, { "epoch": 9.455296303943836, "grad_norm": 0.05106515437364578, "learning_rate": 0.01, "loss": 1.9566, "step": 91584 }, { "epoch": 9.45560602932067, "grad_norm": 0.052183981984853745, "learning_rate": 0.01, "loss": 1.93, "step": 91587 }, { "epoch": 9.455915754697502, "grad_norm": 0.04708034172654152, "learning_rate": 0.01, "loss": 1.9483, "step": 91590 }, { "epoch": 9.456225480074334, "grad_norm": 0.04493347555398941, "learning_rate": 0.01, "loss": 1.9153, "step": 91593 }, { "epoch": 9.456535205451166, "grad_norm": 0.05144806206226349, "learning_rate": 0.01, "loss": 1.9296, "step": 91596 }, { "epoch": 9.456844930828, "grad_norm": 0.10107263922691345, "learning_rate": 0.01, "loss": 1.9685, "step": 91599 }, { "epoch": 9.457154656204832, "grad_norm": 0.06296777725219727, "learning_rate": 0.01, "loss": 1.9401, "step": 91602 }, { "epoch": 9.457464381581664, "grad_norm": 0.08265284448862076, "learning_rate": 0.01, "loss": 1.9626, "step": 91605 }, { "epoch": 9.457774106958496, "grad_norm": 0.0472281277179718, "learning_rate": 0.01, "loss": 1.9461, "step": 91608 }, { "epoch": 9.45808383233533, "grad_norm": 0.0438406802713871, "learning_rate": 0.01, "loss": 1.9298, "step": 91611 }, { "epoch": 9.458393557712162, "grad_norm": 0.03896843641996384, "learning_rate": 0.01, "loss": 1.9551, "step": 91614 }, { "epoch": 9.458703283088994, "grad_norm": 0.08807415515184402, "learning_rate": 0.01, "loss": 1.9429, "step": 91617 }, { "epoch": 9.459013008465828, "grad_norm": 0.11590650677680969, "learning_rate": 0.01, "loss": 1.9193, "step": 91620 }, { "epoch": 9.45932273384266, "grad_norm": 0.06758120656013489, "learning_rate": 0.01, "loss": 1.9392, "step": 91623 }, { "epoch": 9.459632459219492, "grad_norm": 0.05359683930873871, "learning_rate": 0.01, "loss": 1.9253, "step": 91626 }, { "epoch": 9.459942184596324, "grad_norm": 0.08736684173345566, "learning_rate": 0.01, "loss": 1.9518, "step": 91629 }, { "epoch": 9.460251909973158, "grad_norm": 0.06551837921142578, "learning_rate": 0.01, "loss": 1.8991, "step": 91632 }, { "epoch": 9.46056163534999, "grad_norm": 0.09359174221754074, "learning_rate": 0.01, "loss": 1.945, "step": 91635 }, { "epoch": 9.460871360726822, "grad_norm": 0.0556727759540081, "learning_rate": 0.01, "loss": 1.9462, "step": 91638 }, { "epoch": 9.461181086103656, "grad_norm": 0.04954926669597626, "learning_rate": 0.01, "loss": 1.9383, "step": 91641 }, { "epoch": 9.461490811480488, "grad_norm": 0.03850363940000534, "learning_rate": 0.01, "loss": 1.9405, "step": 91644 }, { "epoch": 9.46180053685732, "grad_norm": 0.11323880404233932, "learning_rate": 0.01, "loss": 1.9274, "step": 91647 }, { "epoch": 9.462110262234152, "grad_norm": 0.1314762681722641, "learning_rate": 0.01, "loss": 1.9373, "step": 91650 }, { "epoch": 9.462419987610986, "grad_norm": 0.049930673092603683, "learning_rate": 0.01, "loss": 1.9312, "step": 91653 }, { "epoch": 9.462729712987818, "grad_norm": 0.04937874525785446, "learning_rate": 0.01, "loss": 1.9207, "step": 91656 }, { "epoch": 9.46303943836465, "grad_norm": 0.03621145337820053, "learning_rate": 0.01, "loss": 1.9316, "step": 91659 }, { "epoch": 9.463349163741482, "grad_norm": 0.036950428038835526, "learning_rate": 0.01, "loss": 1.9513, "step": 91662 }, { "epoch": 9.463658889118316, "grad_norm": 0.06989537179470062, "learning_rate": 0.01, "loss": 1.9186, "step": 91665 }, { "epoch": 9.463968614495148, "grad_norm": 0.06468947231769562, "learning_rate": 0.01, "loss": 1.9602, "step": 91668 }, { "epoch": 9.46427833987198, "grad_norm": 0.07130585610866547, "learning_rate": 0.01, "loss": 1.9422, "step": 91671 }, { "epoch": 9.464588065248813, "grad_norm": 0.04903630167245865, "learning_rate": 0.01, "loss": 1.945, "step": 91674 }, { "epoch": 9.464897790625646, "grad_norm": 0.0678560733795166, "learning_rate": 0.01, "loss": 1.9363, "step": 91677 }, { "epoch": 9.465207516002478, "grad_norm": 0.11219093948602676, "learning_rate": 0.01, "loss": 1.9207, "step": 91680 }, { "epoch": 9.46551724137931, "grad_norm": 0.0979389101266861, "learning_rate": 0.01, "loss": 1.9514, "step": 91683 }, { "epoch": 9.465826966756143, "grad_norm": 0.07034581154584885, "learning_rate": 0.01, "loss": 1.9279, "step": 91686 }, { "epoch": 9.466136692132975, "grad_norm": 0.06349025666713715, "learning_rate": 0.01, "loss": 1.9141, "step": 91689 }, { "epoch": 9.466446417509808, "grad_norm": 0.08993300050497055, "learning_rate": 0.01, "loss": 1.9581, "step": 91692 }, { "epoch": 9.466756142886641, "grad_norm": 0.039396051317453384, "learning_rate": 0.01, "loss": 1.93, "step": 91695 }, { "epoch": 9.467065868263473, "grad_norm": 0.04936233535408974, "learning_rate": 0.01, "loss": 1.9391, "step": 91698 }, { "epoch": 9.467375593640305, "grad_norm": 0.041177988052368164, "learning_rate": 0.01, "loss": 1.9397, "step": 91701 }, { "epoch": 9.467685319017138, "grad_norm": 0.10703952610492706, "learning_rate": 0.01, "loss": 1.9367, "step": 91704 }, { "epoch": 9.467995044393971, "grad_norm": 0.03918042033910751, "learning_rate": 0.01, "loss": 1.9287, "step": 91707 }, { "epoch": 9.468304769770803, "grad_norm": 0.08904026448726654, "learning_rate": 0.01, "loss": 1.9451, "step": 91710 }, { "epoch": 9.468614495147635, "grad_norm": 0.1134420707821846, "learning_rate": 0.01, "loss": 1.9013, "step": 91713 }, { "epoch": 9.468924220524467, "grad_norm": 0.11754108965396881, "learning_rate": 0.01, "loss": 1.9594, "step": 91716 }, { "epoch": 9.469233945901301, "grad_norm": 0.06243946775794029, "learning_rate": 0.01, "loss": 1.9221, "step": 91719 }, { "epoch": 9.469543671278133, "grad_norm": 0.05874892324209213, "learning_rate": 0.01, "loss": 1.9266, "step": 91722 }, { "epoch": 9.469853396654965, "grad_norm": 0.06552620232105255, "learning_rate": 0.01, "loss": 1.9634, "step": 91725 }, { "epoch": 9.4701631220318, "grad_norm": 0.03773637115955353, "learning_rate": 0.01, "loss": 1.9539, "step": 91728 }, { "epoch": 9.470472847408631, "grad_norm": 0.03343876451253891, "learning_rate": 0.01, "loss": 1.9484, "step": 91731 }, { "epoch": 9.470782572785463, "grad_norm": 0.06106181442737579, "learning_rate": 0.01, "loss": 1.9423, "step": 91734 }, { "epoch": 9.471092298162295, "grad_norm": 0.04692843556404114, "learning_rate": 0.01, "loss": 1.9335, "step": 91737 }, { "epoch": 9.47140202353913, "grad_norm": 0.046416301280260086, "learning_rate": 0.01, "loss": 1.9278, "step": 91740 }, { "epoch": 9.471711748915961, "grad_norm": 0.05070651322603226, "learning_rate": 0.01, "loss": 1.9182, "step": 91743 }, { "epoch": 9.472021474292793, "grad_norm": 0.17885123193264008, "learning_rate": 0.01, "loss": 1.9477, "step": 91746 }, { "epoch": 9.472331199669627, "grad_norm": 0.05231613665819168, "learning_rate": 0.01, "loss": 1.9143, "step": 91749 }, { "epoch": 9.47264092504646, "grad_norm": 0.06618047505617142, "learning_rate": 0.01, "loss": 1.942, "step": 91752 }, { "epoch": 9.472950650423291, "grad_norm": 0.06575289368629456, "learning_rate": 0.01, "loss": 1.9358, "step": 91755 }, { "epoch": 9.473260375800123, "grad_norm": 0.10142083466053009, "learning_rate": 0.01, "loss": 1.9604, "step": 91758 }, { "epoch": 9.473570101176957, "grad_norm": 0.0768078863620758, "learning_rate": 0.01, "loss": 1.946, "step": 91761 }, { "epoch": 9.47387982655379, "grad_norm": 0.07403820008039474, "learning_rate": 0.01, "loss": 1.9325, "step": 91764 }, { "epoch": 9.474189551930621, "grad_norm": 0.05097631365060806, "learning_rate": 0.01, "loss": 1.9261, "step": 91767 }, { "epoch": 9.474499277307453, "grad_norm": 0.05768020078539848, "learning_rate": 0.01, "loss": 1.9506, "step": 91770 }, { "epoch": 9.474809002684287, "grad_norm": 0.04441077262163162, "learning_rate": 0.01, "loss": 1.9241, "step": 91773 }, { "epoch": 9.475118728061119, "grad_norm": 0.04517601802945137, "learning_rate": 0.01, "loss": 1.9521, "step": 91776 }, { "epoch": 9.475428453437951, "grad_norm": 0.03389676660299301, "learning_rate": 0.01, "loss": 1.9444, "step": 91779 }, { "epoch": 9.475738178814785, "grad_norm": 0.04144905135035515, "learning_rate": 0.01, "loss": 1.9442, "step": 91782 }, { "epoch": 9.476047904191617, "grad_norm": 0.03675378859043121, "learning_rate": 0.01, "loss": 1.9121, "step": 91785 }, { "epoch": 9.476357629568449, "grad_norm": 0.07525095343589783, "learning_rate": 0.01, "loss": 1.9243, "step": 91788 }, { "epoch": 9.476667354945281, "grad_norm": 0.19068317115306854, "learning_rate": 0.01, "loss": 1.9793, "step": 91791 }, { "epoch": 9.476977080322115, "grad_norm": 0.10045140981674194, "learning_rate": 0.01, "loss": 1.9488, "step": 91794 }, { "epoch": 9.477286805698947, "grad_norm": 0.04264092817902565, "learning_rate": 0.01, "loss": 1.9734, "step": 91797 }, { "epoch": 9.477596531075779, "grad_norm": 0.03995588794350624, "learning_rate": 0.01, "loss": 1.95, "step": 91800 }, { "epoch": 9.477906256452613, "grad_norm": 0.036804333329200745, "learning_rate": 0.01, "loss": 1.9334, "step": 91803 }, { "epoch": 9.478215981829445, "grad_norm": 0.05768147483468056, "learning_rate": 0.01, "loss": 1.9554, "step": 91806 }, { "epoch": 9.478525707206277, "grad_norm": 0.05145462602376938, "learning_rate": 0.01, "loss": 1.9227, "step": 91809 }, { "epoch": 9.478835432583109, "grad_norm": 0.10472197830677032, "learning_rate": 0.01, "loss": 1.9335, "step": 91812 }, { "epoch": 9.479145157959943, "grad_norm": 0.04309898242354393, "learning_rate": 0.01, "loss": 1.94, "step": 91815 }, { "epoch": 9.479454883336775, "grad_norm": 0.0850476399064064, "learning_rate": 0.01, "loss": 1.9162, "step": 91818 }, { "epoch": 9.479764608713607, "grad_norm": 0.07317638397216797, "learning_rate": 0.01, "loss": 1.9457, "step": 91821 }, { "epoch": 9.480074334090439, "grad_norm": 0.13520605862140656, "learning_rate": 0.01, "loss": 1.9555, "step": 91824 }, { "epoch": 9.480384059467273, "grad_norm": 0.03584807738661766, "learning_rate": 0.01, "loss": 1.9173, "step": 91827 }, { "epoch": 9.480693784844105, "grad_norm": 0.05352088063955307, "learning_rate": 0.01, "loss": 1.9194, "step": 91830 }, { "epoch": 9.481003510220937, "grad_norm": 0.03882770612835884, "learning_rate": 0.01, "loss": 1.9299, "step": 91833 }, { "epoch": 9.48131323559777, "grad_norm": 0.059367503970861435, "learning_rate": 0.01, "loss": 1.9491, "step": 91836 }, { "epoch": 9.481622960974603, "grad_norm": 0.06464463472366333, "learning_rate": 0.01, "loss": 1.9252, "step": 91839 }, { "epoch": 9.481932686351435, "grad_norm": 0.042919889092445374, "learning_rate": 0.01, "loss": 1.9481, "step": 91842 }, { "epoch": 9.482242411728267, "grad_norm": 0.04149112477898598, "learning_rate": 0.01, "loss": 1.9421, "step": 91845 }, { "epoch": 9.4825521371051, "grad_norm": 0.03673546761274338, "learning_rate": 0.01, "loss": 1.9335, "step": 91848 }, { "epoch": 9.482861862481933, "grad_norm": 0.04928651452064514, "learning_rate": 0.01, "loss": 1.9184, "step": 91851 }, { "epoch": 9.483171587858765, "grad_norm": 0.11838643252849579, "learning_rate": 0.01, "loss": 1.9416, "step": 91854 }, { "epoch": 9.483481313235599, "grad_norm": 0.08629243820905685, "learning_rate": 0.01, "loss": 1.9336, "step": 91857 }, { "epoch": 9.48379103861243, "grad_norm": 0.05888286978006363, "learning_rate": 0.01, "loss": 1.9313, "step": 91860 }, { "epoch": 9.484100763989263, "grad_norm": 0.07269131392240524, "learning_rate": 0.01, "loss": 1.9172, "step": 91863 }, { "epoch": 9.484410489366095, "grad_norm": 0.049393005669116974, "learning_rate": 0.01, "loss": 1.9368, "step": 91866 }, { "epoch": 9.484720214742929, "grad_norm": 0.057828810065984726, "learning_rate": 0.01, "loss": 1.9481, "step": 91869 }, { "epoch": 9.48502994011976, "grad_norm": 0.051319871097803116, "learning_rate": 0.01, "loss": 1.9687, "step": 91872 }, { "epoch": 9.485339665496593, "grad_norm": 0.03736547380685806, "learning_rate": 0.01, "loss": 1.9201, "step": 91875 }, { "epoch": 9.485649390873425, "grad_norm": 0.03503834083676338, "learning_rate": 0.01, "loss": 1.939, "step": 91878 }, { "epoch": 9.485959116250259, "grad_norm": 0.03310151398181915, "learning_rate": 0.01, "loss": 1.9475, "step": 91881 }, { "epoch": 9.48626884162709, "grad_norm": 0.10385093092918396, "learning_rate": 0.01, "loss": 1.8883, "step": 91884 }, { "epoch": 9.486578567003923, "grad_norm": 0.061645470559597015, "learning_rate": 0.01, "loss": 1.94, "step": 91887 }, { "epoch": 9.486888292380756, "grad_norm": 0.08191576600074768, "learning_rate": 0.01, "loss": 1.918, "step": 91890 }, { "epoch": 9.487198017757589, "grad_norm": 0.06519359350204468, "learning_rate": 0.01, "loss": 1.9273, "step": 91893 }, { "epoch": 9.48750774313442, "grad_norm": 0.07691865414381027, "learning_rate": 0.01, "loss": 1.9194, "step": 91896 }, { "epoch": 9.487817468511253, "grad_norm": 0.07537966966629028, "learning_rate": 0.01, "loss": 1.9416, "step": 91899 }, { "epoch": 9.488127193888086, "grad_norm": 0.12493839114904404, "learning_rate": 0.01, "loss": 1.9282, "step": 91902 }, { "epoch": 9.488436919264919, "grad_norm": 0.04485747963190079, "learning_rate": 0.01, "loss": 1.9353, "step": 91905 }, { "epoch": 9.48874664464175, "grad_norm": 0.06224820017814636, "learning_rate": 0.01, "loss": 1.9624, "step": 91908 }, { "epoch": 9.489056370018584, "grad_norm": 0.03537846729159355, "learning_rate": 0.01, "loss": 1.919, "step": 91911 }, { "epoch": 9.489366095395416, "grad_norm": 0.05944729223847389, "learning_rate": 0.01, "loss": 1.909, "step": 91914 }, { "epoch": 9.489675820772248, "grad_norm": 0.11124984174966812, "learning_rate": 0.01, "loss": 1.9089, "step": 91917 }, { "epoch": 9.48998554614908, "grad_norm": 0.06960305571556091, "learning_rate": 0.01, "loss": 1.9331, "step": 91920 }, { "epoch": 9.490295271525914, "grad_norm": 0.08293911069631577, "learning_rate": 0.01, "loss": 1.9454, "step": 91923 }, { "epoch": 9.490604996902746, "grad_norm": 0.0760396271944046, "learning_rate": 0.01, "loss": 1.9224, "step": 91926 }, { "epoch": 9.490914722279578, "grad_norm": 0.08136795461177826, "learning_rate": 0.01, "loss": 1.9444, "step": 91929 }, { "epoch": 9.49122444765641, "grad_norm": 0.0484798438847065, "learning_rate": 0.01, "loss": 1.9415, "step": 91932 }, { "epoch": 9.491534173033244, "grad_norm": 0.034965045750141144, "learning_rate": 0.01, "loss": 1.9327, "step": 91935 }, { "epoch": 9.491843898410076, "grad_norm": 0.037586700171232224, "learning_rate": 0.01, "loss": 1.9437, "step": 91938 }, { "epoch": 9.492153623786908, "grad_norm": 0.1187121570110321, "learning_rate": 0.01, "loss": 1.9431, "step": 91941 }, { "epoch": 9.492463349163742, "grad_norm": 0.043870143592357635, "learning_rate": 0.01, "loss": 1.9346, "step": 91944 }, { "epoch": 9.492773074540574, "grad_norm": 0.057566460222005844, "learning_rate": 0.01, "loss": 1.9464, "step": 91947 }, { "epoch": 9.493082799917406, "grad_norm": 0.04957624152302742, "learning_rate": 0.01, "loss": 1.9549, "step": 91950 }, { "epoch": 9.493392525294238, "grad_norm": 0.05048470199108124, "learning_rate": 0.01, "loss": 1.9057, "step": 91953 }, { "epoch": 9.493702250671072, "grad_norm": 0.046507786959409714, "learning_rate": 0.01, "loss": 1.9439, "step": 91956 }, { "epoch": 9.494011976047904, "grad_norm": 0.04052937403321266, "learning_rate": 0.01, "loss": 1.9516, "step": 91959 }, { "epoch": 9.494321701424736, "grad_norm": 0.05633610486984253, "learning_rate": 0.01, "loss": 1.9104, "step": 91962 }, { "epoch": 9.49463142680157, "grad_norm": 0.11403268575668335, "learning_rate": 0.01, "loss": 1.939, "step": 91965 }, { "epoch": 9.494941152178402, "grad_norm": 0.06479247659444809, "learning_rate": 0.01, "loss": 1.927, "step": 91968 }, { "epoch": 9.495250877555234, "grad_norm": 0.09049377590417862, "learning_rate": 0.01, "loss": 1.954, "step": 91971 }, { "epoch": 9.495560602932066, "grad_norm": 0.031010787934064865, "learning_rate": 0.01, "loss": 1.9357, "step": 91974 }, { "epoch": 9.4958703283089, "grad_norm": 0.057340823113918304, "learning_rate": 0.01, "loss": 1.9446, "step": 91977 }, { "epoch": 9.496180053685732, "grad_norm": 0.08693788945674896, "learning_rate": 0.01, "loss": 1.9655, "step": 91980 }, { "epoch": 9.496489779062564, "grad_norm": 0.07110211253166199, "learning_rate": 0.01, "loss": 1.9433, "step": 91983 }, { "epoch": 9.496799504439396, "grad_norm": 0.03922433406114578, "learning_rate": 0.01, "loss": 1.9345, "step": 91986 }, { "epoch": 9.49710922981623, "grad_norm": 0.045608848333358765, "learning_rate": 0.01, "loss": 1.9261, "step": 91989 }, { "epoch": 9.497418955193062, "grad_norm": 0.05738776549696922, "learning_rate": 0.01, "loss": 1.9432, "step": 91992 }, { "epoch": 9.497728680569894, "grad_norm": 0.03917551040649414, "learning_rate": 0.01, "loss": 1.9372, "step": 91995 }, { "epoch": 9.498038405946728, "grad_norm": 0.09609241038560867, "learning_rate": 0.01, "loss": 1.9359, "step": 91998 }, { "epoch": 9.49834813132356, "grad_norm": 0.08743585646152496, "learning_rate": 0.01, "loss": 1.9254, "step": 92001 }, { "epoch": 9.498657856700392, "grad_norm": 0.09634451568126678, "learning_rate": 0.01, "loss": 1.9334, "step": 92004 }, { "epoch": 9.498967582077224, "grad_norm": 0.10952823609113693, "learning_rate": 0.01, "loss": 1.949, "step": 92007 }, { "epoch": 9.499277307454058, "grad_norm": 0.05229433625936508, "learning_rate": 0.01, "loss": 1.9321, "step": 92010 }, { "epoch": 9.49958703283089, "grad_norm": 0.05109557509422302, "learning_rate": 0.01, "loss": 1.9251, "step": 92013 }, { "epoch": 9.499896758207722, "grad_norm": 0.03826722875237465, "learning_rate": 0.01, "loss": 1.9037, "step": 92016 }, { "epoch": 9.500206483584556, "grad_norm": 0.04049082472920418, "learning_rate": 0.01, "loss": 1.9287, "step": 92019 }, { "epoch": 9.500516208961388, "grad_norm": 0.06057979166507721, "learning_rate": 0.01, "loss": 1.929, "step": 92022 }, { "epoch": 9.50082593433822, "grad_norm": 0.05316155031323433, "learning_rate": 0.01, "loss": 1.9278, "step": 92025 }, { "epoch": 9.501135659715052, "grad_norm": 0.12783922255039215, "learning_rate": 0.01, "loss": 1.9399, "step": 92028 }, { "epoch": 9.501445385091886, "grad_norm": 0.054156944155693054, "learning_rate": 0.01, "loss": 1.9245, "step": 92031 }, { "epoch": 9.501755110468718, "grad_norm": 0.05763794109225273, "learning_rate": 0.01, "loss": 1.938, "step": 92034 }, { "epoch": 9.50206483584555, "grad_norm": 0.10921145230531693, "learning_rate": 0.01, "loss": 1.9265, "step": 92037 }, { "epoch": 9.502374561222382, "grad_norm": 0.033268216997385025, "learning_rate": 0.01, "loss": 1.9345, "step": 92040 }, { "epoch": 9.502684286599216, "grad_norm": 0.16365273296833038, "learning_rate": 0.01, "loss": 1.9272, "step": 92043 }, { "epoch": 9.502994011976048, "grad_norm": 0.1298300176858902, "learning_rate": 0.01, "loss": 1.9268, "step": 92046 }, { "epoch": 9.50330373735288, "grad_norm": 0.05752571299672127, "learning_rate": 0.01, "loss": 1.9185, "step": 92049 }, { "epoch": 9.503613462729714, "grad_norm": 0.04481608420610428, "learning_rate": 0.01, "loss": 1.9227, "step": 92052 }, { "epoch": 9.503923188106546, "grad_norm": 0.04134489968419075, "learning_rate": 0.01, "loss": 1.9443, "step": 92055 }, { "epoch": 9.504232913483378, "grad_norm": 0.04483073204755783, "learning_rate": 0.01, "loss": 1.9188, "step": 92058 }, { "epoch": 9.50454263886021, "grad_norm": 0.06324251741170883, "learning_rate": 0.01, "loss": 1.9277, "step": 92061 }, { "epoch": 9.504852364237044, "grad_norm": 0.047804441303014755, "learning_rate": 0.01, "loss": 1.9346, "step": 92064 }, { "epoch": 9.505162089613876, "grad_norm": 0.04165074974298477, "learning_rate": 0.01, "loss": 1.9408, "step": 92067 }, { "epoch": 9.505471814990708, "grad_norm": 0.05379807949066162, "learning_rate": 0.01, "loss": 1.9113, "step": 92070 }, { "epoch": 9.50578154036754, "grad_norm": 0.06253156065940857, "learning_rate": 0.01, "loss": 1.967, "step": 92073 }, { "epoch": 9.506091265744374, "grad_norm": 0.04284733533859253, "learning_rate": 0.01, "loss": 1.926, "step": 92076 }, { "epoch": 9.506400991121206, "grad_norm": 0.05847569555044174, "learning_rate": 0.01, "loss": 1.9464, "step": 92079 }, { "epoch": 9.506710716498038, "grad_norm": 0.049981147050857544, "learning_rate": 0.01, "loss": 1.9202, "step": 92082 }, { "epoch": 9.507020441874872, "grad_norm": 0.056745145469903946, "learning_rate": 0.01, "loss": 1.9651, "step": 92085 }, { "epoch": 9.507330167251704, "grad_norm": 0.08482272922992706, "learning_rate": 0.01, "loss": 1.9355, "step": 92088 }, { "epoch": 9.507639892628536, "grad_norm": 0.036681801080703735, "learning_rate": 0.01, "loss": 1.9468, "step": 92091 }, { "epoch": 9.507949618005368, "grad_norm": 0.05905177444219589, "learning_rate": 0.01, "loss": 1.9321, "step": 92094 }, { "epoch": 9.508259343382202, "grad_norm": 0.09597323834896088, "learning_rate": 0.01, "loss": 1.938, "step": 92097 }, { "epoch": 9.508569068759034, "grad_norm": 0.07549446821212769, "learning_rate": 0.01, "loss": 1.9172, "step": 92100 }, { "epoch": 9.508878794135866, "grad_norm": 0.06610140949487686, "learning_rate": 0.01, "loss": 1.937, "step": 92103 }, { "epoch": 9.5091885195127, "grad_norm": 0.05344447121024132, "learning_rate": 0.01, "loss": 1.9424, "step": 92106 }, { "epoch": 9.509498244889532, "grad_norm": 0.04548199102282524, "learning_rate": 0.01, "loss": 1.9298, "step": 92109 }, { "epoch": 9.509807970266364, "grad_norm": 0.06513753533363342, "learning_rate": 0.01, "loss": 1.9306, "step": 92112 }, { "epoch": 9.510117695643196, "grad_norm": 0.04577668383717537, "learning_rate": 0.01, "loss": 1.928, "step": 92115 }, { "epoch": 9.51042742102003, "grad_norm": 0.1895677149295807, "learning_rate": 0.01, "loss": 1.9041, "step": 92118 }, { "epoch": 9.510737146396862, "grad_norm": 0.04677163064479828, "learning_rate": 0.01, "loss": 1.947, "step": 92121 }, { "epoch": 9.511046871773694, "grad_norm": 0.04040669649839401, "learning_rate": 0.01, "loss": 1.9537, "step": 92124 }, { "epoch": 9.511356597150527, "grad_norm": 0.034733910113573074, "learning_rate": 0.01, "loss": 1.9418, "step": 92127 }, { "epoch": 9.51166632252736, "grad_norm": 0.04119290038943291, "learning_rate": 0.01, "loss": 1.9552, "step": 92130 }, { "epoch": 9.511976047904191, "grad_norm": 0.05976971238851547, "learning_rate": 0.01, "loss": 1.9365, "step": 92133 }, { "epoch": 9.512285773281024, "grad_norm": 0.099224753677845, "learning_rate": 0.01, "loss": 1.9389, "step": 92136 }, { "epoch": 9.512595498657857, "grad_norm": 0.1720251590013504, "learning_rate": 0.01, "loss": 1.9196, "step": 92139 }, { "epoch": 9.51290522403469, "grad_norm": 0.10159769654273987, "learning_rate": 0.01, "loss": 1.946, "step": 92142 }, { "epoch": 9.513214949411521, "grad_norm": 0.07902245968580246, "learning_rate": 0.01, "loss": 1.9549, "step": 92145 }, { "epoch": 9.513524674788354, "grad_norm": 0.05916019156575203, "learning_rate": 0.01, "loss": 1.9459, "step": 92148 }, { "epoch": 9.513834400165187, "grad_norm": 0.07787574082612991, "learning_rate": 0.01, "loss": 1.9194, "step": 92151 }, { "epoch": 9.51414412554202, "grad_norm": 0.05786137282848358, "learning_rate": 0.01, "loss": 1.9485, "step": 92154 }, { "epoch": 9.514453850918851, "grad_norm": 0.06773325055837631, "learning_rate": 0.01, "loss": 1.9496, "step": 92157 }, { "epoch": 9.514763576295685, "grad_norm": 0.08434304594993591, "learning_rate": 0.01, "loss": 1.927, "step": 92160 }, { "epoch": 9.515073301672517, "grad_norm": 0.04950158670544624, "learning_rate": 0.01, "loss": 1.9558, "step": 92163 }, { "epoch": 9.51538302704935, "grad_norm": 0.06106577441096306, "learning_rate": 0.01, "loss": 1.941, "step": 92166 }, { "epoch": 9.515692752426181, "grad_norm": 0.07273181527853012, "learning_rate": 0.01, "loss": 1.9167, "step": 92169 }, { "epoch": 9.516002477803015, "grad_norm": 0.0501217283308506, "learning_rate": 0.01, "loss": 1.9284, "step": 92172 }, { "epoch": 9.516312203179847, "grad_norm": 0.09720636904239655, "learning_rate": 0.01, "loss": 1.9117, "step": 92175 }, { "epoch": 9.51662192855668, "grad_norm": 0.10266958177089691, "learning_rate": 0.01, "loss": 1.9411, "step": 92178 }, { "epoch": 9.516931653933511, "grad_norm": 0.059654563665390015, "learning_rate": 0.01, "loss": 1.9289, "step": 92181 }, { "epoch": 9.517241379310345, "grad_norm": 0.029505256563425064, "learning_rate": 0.01, "loss": 1.917, "step": 92184 }, { "epoch": 9.517551104687177, "grad_norm": 0.03506319597363472, "learning_rate": 0.01, "loss": 1.9233, "step": 92187 }, { "epoch": 9.51786083006401, "grad_norm": 0.058983176946640015, "learning_rate": 0.01, "loss": 1.9539, "step": 92190 }, { "epoch": 9.518170555440843, "grad_norm": 0.08283732086420059, "learning_rate": 0.01, "loss": 1.9334, "step": 92193 }, { "epoch": 9.518480280817675, "grad_norm": 0.06719473749399185, "learning_rate": 0.01, "loss": 1.9558, "step": 92196 }, { "epoch": 9.518790006194507, "grad_norm": 0.13410811126232147, "learning_rate": 0.01, "loss": 1.9103, "step": 92199 }, { "epoch": 9.51909973157134, "grad_norm": 0.04871629923582077, "learning_rate": 0.01, "loss": 1.946, "step": 92202 }, { "epoch": 9.519409456948173, "grad_norm": 0.06901410222053528, "learning_rate": 0.01, "loss": 1.9489, "step": 92205 }, { "epoch": 9.519719182325005, "grad_norm": 0.04040338844060898, "learning_rate": 0.01, "loss": 1.9422, "step": 92208 }, { "epoch": 9.520028907701837, "grad_norm": 0.04763796925544739, "learning_rate": 0.01, "loss": 1.9315, "step": 92211 }, { "epoch": 9.520338633078671, "grad_norm": 0.056972626596689224, "learning_rate": 0.01, "loss": 1.9441, "step": 92214 }, { "epoch": 9.520648358455503, "grad_norm": 0.0520622618496418, "learning_rate": 0.01, "loss": 1.9439, "step": 92217 }, { "epoch": 9.520958083832335, "grad_norm": 0.11873430758714676, "learning_rate": 0.01, "loss": 1.9368, "step": 92220 }, { "epoch": 9.521267809209167, "grad_norm": 0.04310165345668793, "learning_rate": 0.01, "loss": 1.9404, "step": 92223 }, { "epoch": 9.521577534586001, "grad_norm": 0.10814736783504486, "learning_rate": 0.01, "loss": 1.9512, "step": 92226 }, { "epoch": 9.521887259962833, "grad_norm": 0.0352197028696537, "learning_rate": 0.01, "loss": 1.9259, "step": 92229 }, { "epoch": 9.522196985339665, "grad_norm": 0.07474922388792038, "learning_rate": 0.01, "loss": 1.9349, "step": 92232 }, { "epoch": 9.522506710716499, "grad_norm": 0.07361693680286407, "learning_rate": 0.01, "loss": 1.9338, "step": 92235 }, { "epoch": 9.522816436093331, "grad_norm": 0.07509438693523407, "learning_rate": 0.01, "loss": 1.9187, "step": 92238 }, { "epoch": 9.523126161470163, "grad_norm": 0.053927212953567505, "learning_rate": 0.01, "loss": 1.9536, "step": 92241 }, { "epoch": 9.523435886846995, "grad_norm": 0.04635823145508766, "learning_rate": 0.01, "loss": 1.9139, "step": 92244 }, { "epoch": 9.523745612223829, "grad_norm": 0.06367374211549759, "learning_rate": 0.01, "loss": 1.9364, "step": 92247 }, { "epoch": 9.524055337600661, "grad_norm": 0.08359774947166443, "learning_rate": 0.01, "loss": 1.9455, "step": 92250 }, { "epoch": 9.524365062977493, "grad_norm": 0.05048409104347229, "learning_rate": 0.01, "loss": 1.9204, "step": 92253 }, { "epoch": 9.524674788354325, "grad_norm": 0.050950534641742706, "learning_rate": 0.01, "loss": 1.9196, "step": 92256 }, { "epoch": 9.524984513731159, "grad_norm": 0.09251030534505844, "learning_rate": 0.01, "loss": 1.9521, "step": 92259 }, { "epoch": 9.525294239107991, "grad_norm": 0.10388283431529999, "learning_rate": 0.01, "loss": 1.9336, "step": 92262 }, { "epoch": 9.525603964484823, "grad_norm": 0.06290477514266968, "learning_rate": 0.01, "loss": 1.9441, "step": 92265 }, { "epoch": 9.525913689861657, "grad_norm": 0.06396323442459106, "learning_rate": 0.01, "loss": 1.9292, "step": 92268 }, { "epoch": 9.526223415238489, "grad_norm": 0.049475327134132385, "learning_rate": 0.01, "loss": 1.939, "step": 92271 }, { "epoch": 9.52653314061532, "grad_norm": 0.04767010733485222, "learning_rate": 0.01, "loss": 1.9549, "step": 92274 }, { "epoch": 9.526842865992153, "grad_norm": 0.050544627010822296, "learning_rate": 0.01, "loss": 1.949, "step": 92277 }, { "epoch": 9.527152591368987, "grad_norm": 0.05860473960638046, "learning_rate": 0.01, "loss": 1.958, "step": 92280 }, { "epoch": 9.527462316745819, "grad_norm": 0.053062696009874344, "learning_rate": 0.01, "loss": 1.9595, "step": 92283 }, { "epoch": 9.52777204212265, "grad_norm": 0.04594952240586281, "learning_rate": 0.01, "loss": 1.9167, "step": 92286 }, { "epoch": 9.528081767499483, "grad_norm": 0.04546007886528969, "learning_rate": 0.01, "loss": 1.9282, "step": 92289 }, { "epoch": 9.528391492876317, "grad_norm": 0.13376648724079132, "learning_rate": 0.01, "loss": 1.9488, "step": 92292 }, { "epoch": 9.528701218253149, "grad_norm": 0.039482396095991135, "learning_rate": 0.01, "loss": 1.9156, "step": 92295 }, { "epoch": 9.52901094362998, "grad_norm": 0.08189577609300613, "learning_rate": 0.01, "loss": 1.9342, "step": 92298 }, { "epoch": 9.529320669006815, "grad_norm": 0.043656591325998306, "learning_rate": 0.01, "loss": 1.9406, "step": 92301 }, { "epoch": 9.529630394383647, "grad_norm": 0.047711532562971115, "learning_rate": 0.01, "loss": 1.9417, "step": 92304 }, { "epoch": 9.529940119760479, "grad_norm": 0.0450308732688427, "learning_rate": 0.01, "loss": 1.8815, "step": 92307 }, { "epoch": 9.53024984513731, "grad_norm": 0.038819797337055206, "learning_rate": 0.01, "loss": 1.9448, "step": 92310 }, { "epoch": 9.530559570514145, "grad_norm": 0.07087231427431107, "learning_rate": 0.01, "loss": 1.9421, "step": 92313 }, { "epoch": 9.530869295890977, "grad_norm": 0.09496890753507614, "learning_rate": 0.01, "loss": 1.9435, "step": 92316 }, { "epoch": 9.531179021267809, "grad_norm": 0.11516691744327545, "learning_rate": 0.01, "loss": 1.917, "step": 92319 }, { "epoch": 9.531488746644643, "grad_norm": 0.07193636149168015, "learning_rate": 0.01, "loss": 1.9315, "step": 92322 }, { "epoch": 9.531798472021475, "grad_norm": 0.06717932969331741, "learning_rate": 0.01, "loss": 1.9247, "step": 92325 }, { "epoch": 9.532108197398307, "grad_norm": 0.06636080890893936, "learning_rate": 0.01, "loss": 1.9305, "step": 92328 }, { "epoch": 9.532417922775139, "grad_norm": 0.03784407302737236, "learning_rate": 0.01, "loss": 1.9264, "step": 92331 }, { "epoch": 9.532727648151972, "grad_norm": 0.14190025627613068, "learning_rate": 0.01, "loss": 1.969, "step": 92334 }, { "epoch": 9.533037373528805, "grad_norm": 0.0631750226020813, "learning_rate": 0.01, "loss": 1.9116, "step": 92337 }, { "epoch": 9.533347098905637, "grad_norm": 0.058418259024620056, "learning_rate": 0.01, "loss": 1.95, "step": 92340 }, { "epoch": 9.53365682428247, "grad_norm": 0.05124678462743759, "learning_rate": 0.01, "loss": 1.952, "step": 92343 }, { "epoch": 9.533966549659302, "grad_norm": 0.040973249822854996, "learning_rate": 0.01, "loss": 1.9483, "step": 92346 }, { "epoch": 9.534276275036135, "grad_norm": 0.039508648216724396, "learning_rate": 0.01, "loss": 1.9549, "step": 92349 }, { "epoch": 9.534586000412967, "grad_norm": 0.04920020326972008, "learning_rate": 0.01, "loss": 1.9555, "step": 92352 }, { "epoch": 9.5348957257898, "grad_norm": 0.06824755668640137, "learning_rate": 0.01, "loss": 1.9453, "step": 92355 }, { "epoch": 9.535205451166632, "grad_norm": 0.061849042773246765, "learning_rate": 0.01, "loss": 1.923, "step": 92358 }, { "epoch": 9.535515176543464, "grad_norm": 0.04916265979409218, "learning_rate": 0.01, "loss": 1.9268, "step": 92361 }, { "epoch": 9.535824901920297, "grad_norm": 0.054886508733034134, "learning_rate": 0.01, "loss": 1.9314, "step": 92364 }, { "epoch": 9.53613462729713, "grad_norm": 0.04167830944061279, "learning_rate": 0.01, "loss": 1.9372, "step": 92367 }, { "epoch": 9.536444352673962, "grad_norm": 0.10808145999908447, "learning_rate": 0.01, "loss": 1.9498, "step": 92370 }, { "epoch": 9.536754078050794, "grad_norm": 0.08303140848875046, "learning_rate": 0.01, "loss": 1.9327, "step": 92373 }, { "epoch": 9.537063803427628, "grad_norm": 0.046498388051986694, "learning_rate": 0.01, "loss": 1.9385, "step": 92376 }, { "epoch": 9.53737352880446, "grad_norm": 0.07408872991800308, "learning_rate": 0.01, "loss": 1.9252, "step": 92379 }, { "epoch": 9.537683254181292, "grad_norm": 0.03534821420907974, "learning_rate": 0.01, "loss": 1.9445, "step": 92382 }, { "epoch": 9.537992979558124, "grad_norm": 0.050212305039167404, "learning_rate": 0.01, "loss": 1.9411, "step": 92385 }, { "epoch": 9.538302704934958, "grad_norm": 0.03994080796837807, "learning_rate": 0.01, "loss": 1.9461, "step": 92388 }, { "epoch": 9.53861243031179, "grad_norm": 0.04362984001636505, "learning_rate": 0.01, "loss": 1.9291, "step": 92391 }, { "epoch": 9.538922155688622, "grad_norm": 0.0660531148314476, "learning_rate": 0.01, "loss": 1.9449, "step": 92394 }, { "epoch": 9.539231881065454, "grad_norm": 0.11909878253936768, "learning_rate": 0.01, "loss": 1.9456, "step": 92397 }, { "epoch": 9.539541606442288, "grad_norm": 0.06280426681041718, "learning_rate": 0.01, "loss": 1.9442, "step": 92400 }, { "epoch": 9.53985133181912, "grad_norm": 0.07050773501396179, "learning_rate": 0.01, "loss": 1.931, "step": 92403 }, { "epoch": 9.540161057195952, "grad_norm": 0.031119951978325844, "learning_rate": 0.01, "loss": 1.9336, "step": 92406 }, { "epoch": 9.540470782572786, "grad_norm": 0.04030071943998337, "learning_rate": 0.01, "loss": 1.9152, "step": 92409 }, { "epoch": 9.540780507949618, "grad_norm": 0.11780594289302826, "learning_rate": 0.01, "loss": 1.9325, "step": 92412 }, { "epoch": 9.54109023332645, "grad_norm": 0.07259298115968704, "learning_rate": 0.01, "loss": 1.9482, "step": 92415 }, { "epoch": 9.541399958703282, "grad_norm": 0.07972489297389984, "learning_rate": 0.01, "loss": 1.9271, "step": 92418 }, { "epoch": 9.541709684080116, "grad_norm": 0.06060254946351051, "learning_rate": 0.01, "loss": 1.9417, "step": 92421 }, { "epoch": 9.542019409456948, "grad_norm": 0.057979363948106766, "learning_rate": 0.01, "loss": 1.9549, "step": 92424 }, { "epoch": 9.54232913483378, "grad_norm": 0.042889319360256195, "learning_rate": 0.01, "loss": 1.9508, "step": 92427 }, { "epoch": 9.542638860210614, "grad_norm": 0.11210845410823822, "learning_rate": 0.01, "loss": 1.9521, "step": 92430 }, { "epoch": 9.542948585587446, "grad_norm": 0.09843835234642029, "learning_rate": 0.01, "loss": 1.9436, "step": 92433 }, { "epoch": 9.543258310964278, "grad_norm": 0.03776775673031807, "learning_rate": 0.01, "loss": 1.9312, "step": 92436 }, { "epoch": 9.54356803634111, "grad_norm": 0.06357332319021225, "learning_rate": 0.01, "loss": 1.9111, "step": 92439 }, { "epoch": 9.543877761717944, "grad_norm": 0.0598602257668972, "learning_rate": 0.01, "loss": 1.9362, "step": 92442 }, { "epoch": 9.544187487094776, "grad_norm": 0.04040971025824547, "learning_rate": 0.01, "loss": 1.9199, "step": 92445 }, { "epoch": 9.544497212471608, "grad_norm": 0.055280424654483795, "learning_rate": 0.01, "loss": 1.9391, "step": 92448 }, { "epoch": 9.544806937848442, "grad_norm": 0.06608704477548599, "learning_rate": 0.01, "loss": 1.9466, "step": 92451 }, { "epoch": 9.545116663225274, "grad_norm": 0.04509027302265167, "learning_rate": 0.01, "loss": 1.9348, "step": 92454 }, { "epoch": 9.545426388602106, "grad_norm": 0.12730321288108826, "learning_rate": 0.01, "loss": 1.9274, "step": 92457 }, { "epoch": 9.545736113978938, "grad_norm": 0.05638007074594498, "learning_rate": 0.01, "loss": 1.9545, "step": 92460 }, { "epoch": 9.546045839355772, "grad_norm": 0.0741688534617424, "learning_rate": 0.01, "loss": 1.9095, "step": 92463 }, { "epoch": 9.546355564732604, "grad_norm": 0.11796123534440994, "learning_rate": 0.01, "loss": 1.9914, "step": 92466 }, { "epoch": 9.546665290109436, "grad_norm": 0.05692535266280174, "learning_rate": 0.01, "loss": 1.9016, "step": 92469 }, { "epoch": 9.546975015486268, "grad_norm": 0.060367029160261154, "learning_rate": 0.01, "loss": 1.9624, "step": 92472 }, { "epoch": 9.547284740863102, "grad_norm": 0.062419138848781586, "learning_rate": 0.01, "loss": 1.9492, "step": 92475 }, { "epoch": 9.547594466239934, "grad_norm": 0.04204673692584038, "learning_rate": 0.01, "loss": 1.9258, "step": 92478 }, { "epoch": 9.547904191616766, "grad_norm": 0.07150115817785263, "learning_rate": 0.01, "loss": 1.9264, "step": 92481 }, { "epoch": 9.5482139169936, "grad_norm": 0.08063635975122452, "learning_rate": 0.01, "loss": 1.9221, "step": 92484 }, { "epoch": 9.548523642370432, "grad_norm": 0.06927258521318436, "learning_rate": 0.01, "loss": 1.92, "step": 92487 }, { "epoch": 9.548833367747264, "grad_norm": 0.08684887737035751, "learning_rate": 0.01, "loss": 1.9407, "step": 92490 }, { "epoch": 9.549143093124096, "grad_norm": 0.10413134843111038, "learning_rate": 0.01, "loss": 1.9372, "step": 92493 }, { "epoch": 9.54945281850093, "grad_norm": 0.05053870007395744, "learning_rate": 0.01, "loss": 1.9434, "step": 92496 }, { "epoch": 9.549762543877762, "grad_norm": 0.1296710968017578, "learning_rate": 0.01, "loss": 1.9601, "step": 92499 }, { "epoch": 9.550072269254594, "grad_norm": 0.052506376057863235, "learning_rate": 0.01, "loss": 1.9421, "step": 92502 }, { "epoch": 9.550381994631426, "grad_norm": 0.0511232390999794, "learning_rate": 0.01, "loss": 1.9881, "step": 92505 }, { "epoch": 9.55069172000826, "grad_norm": 0.05901256203651428, "learning_rate": 0.01, "loss": 1.9477, "step": 92508 }, { "epoch": 9.551001445385092, "grad_norm": 0.03338488191366196, "learning_rate": 0.01, "loss": 1.923, "step": 92511 }, { "epoch": 9.551311170761924, "grad_norm": 0.04231966286897659, "learning_rate": 0.01, "loss": 1.926, "step": 92514 }, { "epoch": 9.551620896138758, "grad_norm": 0.061377670615911484, "learning_rate": 0.01, "loss": 1.9624, "step": 92517 }, { "epoch": 9.55193062151559, "grad_norm": 0.06680765748023987, "learning_rate": 0.01, "loss": 1.9513, "step": 92520 }, { "epoch": 9.552240346892422, "grad_norm": 0.0845053419470787, "learning_rate": 0.01, "loss": 1.937, "step": 92523 }, { "epoch": 9.552550072269254, "grad_norm": 0.06742209941148758, "learning_rate": 0.01, "loss": 1.9095, "step": 92526 }, { "epoch": 9.552859797646088, "grad_norm": 0.0977376326918602, "learning_rate": 0.01, "loss": 1.9466, "step": 92529 }, { "epoch": 9.55316952302292, "grad_norm": 0.054616544395685196, "learning_rate": 0.01, "loss": 1.9364, "step": 92532 }, { "epoch": 9.553479248399752, "grad_norm": 0.09607339650392532, "learning_rate": 0.01, "loss": 1.9074, "step": 92535 }, { "epoch": 9.553788973776586, "grad_norm": 0.04456086456775665, "learning_rate": 0.01, "loss": 1.9511, "step": 92538 }, { "epoch": 9.554098699153418, "grad_norm": 0.07154840975999832, "learning_rate": 0.01, "loss": 1.9595, "step": 92541 }, { "epoch": 9.55440842453025, "grad_norm": 0.08459034562110901, "learning_rate": 0.01, "loss": 1.936, "step": 92544 }, { "epoch": 9.554718149907082, "grad_norm": 0.03846704959869385, "learning_rate": 0.01, "loss": 1.9508, "step": 92547 }, { "epoch": 9.555027875283916, "grad_norm": 0.09594748169183731, "learning_rate": 0.01, "loss": 1.9297, "step": 92550 }, { "epoch": 9.555337600660748, "grad_norm": 0.0669037476181984, "learning_rate": 0.01, "loss": 1.9367, "step": 92553 }, { "epoch": 9.55564732603758, "grad_norm": 0.06939956545829773, "learning_rate": 0.01, "loss": 1.9302, "step": 92556 }, { "epoch": 9.555957051414413, "grad_norm": 0.07697443664073944, "learning_rate": 0.01, "loss": 1.9189, "step": 92559 }, { "epoch": 9.556266776791245, "grad_norm": 0.13253872096538544, "learning_rate": 0.01, "loss": 1.9308, "step": 92562 }, { "epoch": 9.556576502168078, "grad_norm": 0.040784794837236404, "learning_rate": 0.01, "loss": 1.9583, "step": 92565 }, { "epoch": 9.55688622754491, "grad_norm": 0.04093778505921364, "learning_rate": 0.01, "loss": 1.9481, "step": 92568 }, { "epoch": 9.557195952921743, "grad_norm": 0.045812614262104034, "learning_rate": 0.01, "loss": 1.9051, "step": 92571 }, { "epoch": 9.557505678298575, "grad_norm": 0.04944195970892906, "learning_rate": 0.01, "loss": 1.9369, "step": 92574 }, { "epoch": 9.557815403675407, "grad_norm": 0.04963093623518944, "learning_rate": 0.01, "loss": 1.9288, "step": 92577 }, { "epoch": 9.55812512905224, "grad_norm": 0.11150357872247696, "learning_rate": 0.01, "loss": 1.926, "step": 92580 }, { "epoch": 9.558434854429073, "grad_norm": 0.07571767270565033, "learning_rate": 0.01, "loss": 1.933, "step": 92583 }, { "epoch": 9.558744579805905, "grad_norm": 0.11754986643791199, "learning_rate": 0.01, "loss": 1.9507, "step": 92586 }, { "epoch": 9.559054305182737, "grad_norm": 0.05976712331175804, "learning_rate": 0.01, "loss": 1.9263, "step": 92589 }, { "epoch": 9.559364030559571, "grad_norm": 0.04479925334453583, "learning_rate": 0.01, "loss": 1.9074, "step": 92592 }, { "epoch": 9.559673755936403, "grad_norm": 0.047420915216207504, "learning_rate": 0.01, "loss": 1.9393, "step": 92595 }, { "epoch": 9.559983481313235, "grad_norm": 0.08457601070404053, "learning_rate": 0.01, "loss": 1.9232, "step": 92598 }, { "epoch": 9.560293206690067, "grad_norm": 0.09099945425987244, "learning_rate": 0.01, "loss": 1.9634, "step": 92601 }, { "epoch": 9.560602932066901, "grad_norm": 0.06987515091896057, "learning_rate": 0.01, "loss": 1.9126, "step": 92604 }, { "epoch": 9.560912657443733, "grad_norm": 0.05262571573257446, "learning_rate": 0.01, "loss": 1.939, "step": 92607 }, { "epoch": 9.561222382820565, "grad_norm": 0.05382346734404564, "learning_rate": 0.01, "loss": 1.9308, "step": 92610 }, { "epoch": 9.561532108197397, "grad_norm": 0.0843144878745079, "learning_rate": 0.01, "loss": 1.9234, "step": 92613 }, { "epoch": 9.561841833574231, "grad_norm": 0.09895461797714233, "learning_rate": 0.01, "loss": 1.9307, "step": 92616 }, { "epoch": 9.562151558951063, "grad_norm": 0.036865390837192535, "learning_rate": 0.01, "loss": 1.9158, "step": 92619 }, { "epoch": 9.562461284327895, "grad_norm": 0.04547613859176636, "learning_rate": 0.01, "loss": 1.9352, "step": 92622 }, { "epoch": 9.56277100970473, "grad_norm": 0.04785327613353729, "learning_rate": 0.01, "loss": 1.9339, "step": 92625 }, { "epoch": 9.563080735081561, "grad_norm": 0.03501299396157265, "learning_rate": 0.01, "loss": 1.9213, "step": 92628 }, { "epoch": 9.563390460458393, "grad_norm": 0.09908343851566315, "learning_rate": 0.01, "loss": 1.9159, "step": 92631 }, { "epoch": 9.563700185835225, "grad_norm": 0.04419758915901184, "learning_rate": 0.01, "loss": 1.9197, "step": 92634 }, { "epoch": 9.56400991121206, "grad_norm": 0.0945008248090744, "learning_rate": 0.01, "loss": 1.9461, "step": 92637 }, { "epoch": 9.564319636588891, "grad_norm": 0.05049658194184303, "learning_rate": 0.01, "loss": 1.9301, "step": 92640 }, { "epoch": 9.564629361965723, "grad_norm": 0.044659726321697235, "learning_rate": 0.01, "loss": 1.9525, "step": 92643 }, { "epoch": 9.564939087342557, "grad_norm": 0.09373857825994492, "learning_rate": 0.01, "loss": 1.932, "step": 92646 }, { "epoch": 9.565248812719389, "grad_norm": 0.0912860855460167, "learning_rate": 0.01, "loss": 1.9336, "step": 92649 }, { "epoch": 9.565558538096221, "grad_norm": 0.07291973382234573, "learning_rate": 0.01, "loss": 1.9109, "step": 92652 }, { "epoch": 9.565868263473053, "grad_norm": 0.08288957923650742, "learning_rate": 0.01, "loss": 1.9501, "step": 92655 }, { "epoch": 9.566177988849887, "grad_norm": 0.06711997836828232, "learning_rate": 0.01, "loss": 1.9316, "step": 92658 }, { "epoch": 9.566487714226719, "grad_norm": 0.048471078276634216, "learning_rate": 0.01, "loss": 1.9545, "step": 92661 }, { "epoch": 9.566797439603551, "grad_norm": 0.10623566806316376, "learning_rate": 0.01, "loss": 1.9318, "step": 92664 }, { "epoch": 9.567107164980385, "grad_norm": 0.06134321168065071, "learning_rate": 0.01, "loss": 1.9289, "step": 92667 }, { "epoch": 9.567416890357217, "grad_norm": 0.05728840082883835, "learning_rate": 0.01, "loss": 1.9377, "step": 92670 }, { "epoch": 9.567726615734049, "grad_norm": 0.07476432621479034, "learning_rate": 0.01, "loss": 1.9188, "step": 92673 }, { "epoch": 9.568036341110881, "grad_norm": 0.11206084489822388, "learning_rate": 0.01, "loss": 1.9618, "step": 92676 }, { "epoch": 9.568346066487715, "grad_norm": 0.05124055594205856, "learning_rate": 0.01, "loss": 1.9293, "step": 92679 }, { "epoch": 9.568655791864547, "grad_norm": 0.05256558582186699, "learning_rate": 0.01, "loss": 1.9255, "step": 92682 }, { "epoch": 9.568965517241379, "grad_norm": 0.04224633798003197, "learning_rate": 0.01, "loss": 1.9289, "step": 92685 }, { "epoch": 9.569275242618211, "grad_norm": 0.035335294902324677, "learning_rate": 0.01, "loss": 1.9329, "step": 92688 }, { "epoch": 9.569584967995045, "grad_norm": 0.09272423386573792, "learning_rate": 0.01, "loss": 1.9397, "step": 92691 }, { "epoch": 9.569894693371877, "grad_norm": 0.10086727887392044, "learning_rate": 0.01, "loss": 1.9313, "step": 92694 }, { "epoch": 9.570204418748709, "grad_norm": 0.0433303527534008, "learning_rate": 0.01, "loss": 1.9152, "step": 92697 }, { "epoch": 9.570514144125543, "grad_norm": 0.05553479865193367, "learning_rate": 0.01, "loss": 1.9426, "step": 92700 }, { "epoch": 9.570823869502375, "grad_norm": 0.048300497233867645, "learning_rate": 0.01, "loss": 1.9388, "step": 92703 }, { "epoch": 9.571133594879207, "grad_norm": 0.04815465584397316, "learning_rate": 0.01, "loss": 1.9421, "step": 92706 }, { "epoch": 9.571443320256039, "grad_norm": 0.035893216729164124, "learning_rate": 0.01, "loss": 1.9189, "step": 92709 }, { "epoch": 9.571753045632873, "grad_norm": 0.09880388528108597, "learning_rate": 0.01, "loss": 1.9683, "step": 92712 }, { "epoch": 9.572062771009705, "grad_norm": 0.08409396559000015, "learning_rate": 0.01, "loss": 1.9632, "step": 92715 }, { "epoch": 9.572372496386537, "grad_norm": 0.0986364409327507, "learning_rate": 0.01, "loss": 1.9185, "step": 92718 }, { "epoch": 9.572682221763369, "grad_norm": 0.08425180613994598, "learning_rate": 0.01, "loss": 1.927, "step": 92721 }, { "epoch": 9.572991947140203, "grad_norm": 0.11412467062473297, "learning_rate": 0.01, "loss": 1.9345, "step": 92724 }, { "epoch": 9.573301672517035, "grad_norm": 0.06946521252393723, "learning_rate": 0.01, "loss": 1.952, "step": 92727 }, { "epoch": 9.573611397893867, "grad_norm": 0.07573796808719635, "learning_rate": 0.01, "loss": 1.928, "step": 92730 }, { "epoch": 9.5739211232707, "grad_norm": 0.060061559081077576, "learning_rate": 0.01, "loss": 1.949, "step": 92733 }, { "epoch": 9.574230848647533, "grad_norm": 0.036126162856817245, "learning_rate": 0.01, "loss": 1.9169, "step": 92736 }, { "epoch": 9.574540574024365, "grad_norm": 0.03753150627017021, "learning_rate": 0.01, "loss": 1.9469, "step": 92739 }, { "epoch": 9.574850299401197, "grad_norm": 0.03168240562081337, "learning_rate": 0.01, "loss": 1.9455, "step": 92742 }, { "epoch": 9.57516002477803, "grad_norm": 0.043251775205135345, "learning_rate": 0.01, "loss": 1.916, "step": 92745 }, { "epoch": 9.575469750154863, "grad_norm": 0.06953471899032593, "learning_rate": 0.01, "loss": 1.9217, "step": 92748 }, { "epoch": 9.575779475531695, "grad_norm": 0.1758134961128235, "learning_rate": 0.01, "loss": 1.9413, "step": 92751 }, { "epoch": 9.576089200908529, "grad_norm": 0.06691476702690125, "learning_rate": 0.01, "loss": 1.9416, "step": 92754 }, { "epoch": 9.57639892628536, "grad_norm": 0.06858311593532562, "learning_rate": 0.01, "loss": 1.9097, "step": 92757 }, { "epoch": 9.576708651662193, "grad_norm": 0.04022316634654999, "learning_rate": 0.01, "loss": 1.939, "step": 92760 }, { "epoch": 9.577018377039025, "grad_norm": 0.038416821509599686, "learning_rate": 0.01, "loss": 1.9371, "step": 92763 }, { "epoch": 9.577328102415859, "grad_norm": 0.04924089461565018, "learning_rate": 0.01, "loss": 1.9352, "step": 92766 }, { "epoch": 9.57763782779269, "grad_norm": 0.12462669610977173, "learning_rate": 0.01, "loss": 1.964, "step": 92769 }, { "epoch": 9.577947553169523, "grad_norm": 0.07955029606819153, "learning_rate": 0.01, "loss": 1.9315, "step": 92772 }, { "epoch": 9.578257278546356, "grad_norm": 0.0630539059638977, "learning_rate": 0.01, "loss": 1.9015, "step": 92775 }, { "epoch": 9.578567003923188, "grad_norm": 0.04444093257188797, "learning_rate": 0.01, "loss": 1.8976, "step": 92778 }, { "epoch": 9.57887672930002, "grad_norm": 0.08930259197950363, "learning_rate": 0.01, "loss": 1.9299, "step": 92781 }, { "epoch": 9.579186454676853, "grad_norm": 0.04263152554631233, "learning_rate": 0.01, "loss": 1.9299, "step": 92784 }, { "epoch": 9.579496180053686, "grad_norm": 0.06594117730855942, "learning_rate": 0.01, "loss": 1.9427, "step": 92787 }, { "epoch": 9.579805905430518, "grad_norm": 0.0988270565867424, "learning_rate": 0.01, "loss": 1.9188, "step": 92790 }, { "epoch": 9.58011563080735, "grad_norm": 0.041310135275125504, "learning_rate": 0.01, "loss": 1.9211, "step": 92793 }, { "epoch": 9.580425356184183, "grad_norm": 0.051440875977277756, "learning_rate": 0.01, "loss": 1.9349, "step": 92796 }, { "epoch": 9.580735081561016, "grad_norm": 0.03437655046582222, "learning_rate": 0.01, "loss": 1.9366, "step": 92799 }, { "epoch": 9.581044806937848, "grad_norm": 0.07797033339738846, "learning_rate": 0.01, "loss": 1.9347, "step": 92802 }, { "epoch": 9.58135453231468, "grad_norm": 0.07684805989265442, "learning_rate": 0.01, "loss": 1.9511, "step": 92805 }, { "epoch": 9.581664257691514, "grad_norm": 0.04312015697360039, "learning_rate": 0.01, "loss": 1.9457, "step": 92808 }, { "epoch": 9.581973983068346, "grad_norm": 0.03924431651830673, "learning_rate": 0.01, "loss": 1.9253, "step": 92811 }, { "epoch": 9.582283708445178, "grad_norm": 0.06500852853059769, "learning_rate": 0.01, "loss": 1.9491, "step": 92814 }, { "epoch": 9.58259343382201, "grad_norm": 0.08903056383132935, "learning_rate": 0.01, "loss": 1.9319, "step": 92817 }, { "epoch": 9.582903159198844, "grad_norm": 0.08870675414800644, "learning_rate": 0.01, "loss": 1.9231, "step": 92820 }, { "epoch": 9.583212884575676, "grad_norm": 0.03884018585085869, "learning_rate": 0.01, "loss": 1.9452, "step": 92823 }, { "epoch": 9.583522609952508, "grad_norm": 0.1110977828502655, "learning_rate": 0.01, "loss": 1.9577, "step": 92826 }, { "epoch": 9.58383233532934, "grad_norm": 0.061049140989780426, "learning_rate": 0.01, "loss": 1.9477, "step": 92829 }, { "epoch": 9.584142060706174, "grad_norm": 0.0767841562628746, "learning_rate": 0.01, "loss": 1.9306, "step": 92832 }, { "epoch": 9.584451786083006, "grad_norm": 0.05373747646808624, "learning_rate": 0.01, "loss": 1.9186, "step": 92835 }, { "epoch": 9.584761511459838, "grad_norm": 0.07027586549520493, "learning_rate": 0.01, "loss": 1.932, "step": 92838 }, { "epoch": 9.585071236836672, "grad_norm": 0.035306427627801895, "learning_rate": 0.01, "loss": 1.954, "step": 92841 }, { "epoch": 9.585380962213504, "grad_norm": 0.049179621040821075, "learning_rate": 0.01, "loss": 1.919, "step": 92844 }, { "epoch": 9.585690687590336, "grad_norm": 0.11390917003154755, "learning_rate": 0.01, "loss": 1.9446, "step": 92847 }, { "epoch": 9.586000412967168, "grad_norm": 0.11494887620210648, "learning_rate": 0.01, "loss": 1.9144, "step": 92850 }, { "epoch": 9.586310138344002, "grad_norm": 0.057366497814655304, "learning_rate": 0.01, "loss": 1.9644, "step": 92853 }, { "epoch": 9.586619863720834, "grad_norm": 0.03873692825436592, "learning_rate": 0.01, "loss": 1.9469, "step": 92856 }, { "epoch": 9.586929589097666, "grad_norm": 0.0915505513548851, "learning_rate": 0.01, "loss": 1.9669, "step": 92859 }, { "epoch": 9.5872393144745, "grad_norm": 0.08138495683670044, "learning_rate": 0.01, "loss": 1.9276, "step": 92862 }, { "epoch": 9.587549039851332, "grad_norm": 0.042756564915180206, "learning_rate": 0.01, "loss": 1.9399, "step": 92865 }, { "epoch": 9.587858765228164, "grad_norm": 0.038679689168930054, "learning_rate": 0.01, "loss": 1.9104, "step": 92868 }, { "epoch": 9.588168490604996, "grad_norm": 0.060535311698913574, "learning_rate": 0.01, "loss": 1.9463, "step": 92871 }, { "epoch": 9.58847821598183, "grad_norm": 0.10231431573629379, "learning_rate": 0.01, "loss": 1.9309, "step": 92874 }, { "epoch": 9.588787941358662, "grad_norm": 0.04502299055457115, "learning_rate": 0.01, "loss": 1.9404, "step": 92877 }, { "epoch": 9.589097666735494, "grad_norm": 0.09531942009925842, "learning_rate": 0.01, "loss": 1.9289, "step": 92880 }, { "epoch": 9.589407392112328, "grad_norm": 0.08416978269815445, "learning_rate": 0.01, "loss": 1.931, "step": 92883 }, { "epoch": 9.58971711748916, "grad_norm": 0.10989884287118912, "learning_rate": 0.01, "loss": 1.9475, "step": 92886 }, { "epoch": 9.590026842865992, "grad_norm": 0.1414986252784729, "learning_rate": 0.01, "loss": 1.9521, "step": 92889 }, { "epoch": 9.590336568242824, "grad_norm": 0.08130434900522232, "learning_rate": 0.01, "loss": 1.9221, "step": 92892 }, { "epoch": 9.590646293619658, "grad_norm": 0.0677848532795906, "learning_rate": 0.01, "loss": 1.926, "step": 92895 }, { "epoch": 9.59095601899649, "grad_norm": 0.0400870256125927, "learning_rate": 0.01, "loss": 1.9242, "step": 92898 }, { "epoch": 9.591265744373322, "grad_norm": 0.04095206409692764, "learning_rate": 0.01, "loss": 1.8968, "step": 92901 }, { "epoch": 9.591575469750154, "grad_norm": 0.041344065219163895, "learning_rate": 0.01, "loss": 1.9185, "step": 92904 }, { "epoch": 9.591885195126988, "grad_norm": 0.0387251190841198, "learning_rate": 0.01, "loss": 1.9385, "step": 92907 }, { "epoch": 9.59219492050382, "grad_norm": 0.05318775400519371, "learning_rate": 0.01, "loss": 1.9352, "step": 92910 }, { "epoch": 9.592504645880652, "grad_norm": 0.11325523257255554, "learning_rate": 0.01, "loss": 1.9135, "step": 92913 }, { "epoch": 9.592814371257486, "grad_norm": 0.0618145689368248, "learning_rate": 0.01, "loss": 1.93, "step": 92916 }, { "epoch": 9.593124096634318, "grad_norm": 0.0990997925400734, "learning_rate": 0.01, "loss": 1.9212, "step": 92919 }, { "epoch": 9.59343382201115, "grad_norm": 0.046235982328653336, "learning_rate": 0.01, "loss": 1.9229, "step": 92922 }, { "epoch": 9.593743547387982, "grad_norm": 0.10246602445840836, "learning_rate": 0.01, "loss": 1.9557, "step": 92925 }, { "epoch": 9.594053272764816, "grad_norm": 0.10803071409463882, "learning_rate": 0.01, "loss": 1.9288, "step": 92928 }, { "epoch": 9.594362998141648, "grad_norm": 0.04555898904800415, "learning_rate": 0.01, "loss": 1.945, "step": 92931 }, { "epoch": 9.59467272351848, "grad_norm": 0.10263539105653763, "learning_rate": 0.01, "loss": 1.9812, "step": 92934 }, { "epoch": 9.594982448895312, "grad_norm": 0.05140804871916771, "learning_rate": 0.01, "loss": 1.9327, "step": 92937 }, { "epoch": 9.595292174272146, "grad_norm": 0.053982093930244446, "learning_rate": 0.01, "loss": 1.912, "step": 92940 }, { "epoch": 9.595601899648978, "grad_norm": 0.10115093737840652, "learning_rate": 0.01, "loss": 1.9258, "step": 92943 }, { "epoch": 9.59591162502581, "grad_norm": 0.04465701803565025, "learning_rate": 0.01, "loss": 1.9236, "step": 92946 }, { "epoch": 9.596221350402644, "grad_norm": 0.04940544068813324, "learning_rate": 0.01, "loss": 1.9254, "step": 92949 }, { "epoch": 9.596531075779476, "grad_norm": 0.05474409461021423, "learning_rate": 0.01, "loss": 1.9523, "step": 92952 }, { "epoch": 9.596840801156308, "grad_norm": 0.04154708608984947, "learning_rate": 0.01, "loss": 1.923, "step": 92955 }, { "epoch": 9.59715052653314, "grad_norm": 0.14576861262321472, "learning_rate": 0.01, "loss": 1.9416, "step": 92958 }, { "epoch": 9.597460251909974, "grad_norm": 0.09885968267917633, "learning_rate": 0.01, "loss": 1.9182, "step": 92961 }, { "epoch": 9.597769977286806, "grad_norm": 0.0651608407497406, "learning_rate": 0.01, "loss": 1.9315, "step": 92964 }, { "epoch": 9.598079702663638, "grad_norm": 0.06366647034883499, "learning_rate": 0.01, "loss": 1.9344, "step": 92967 }, { "epoch": 9.598389428040472, "grad_norm": 0.07258301228284836, "learning_rate": 0.01, "loss": 1.9329, "step": 92970 }, { "epoch": 9.598699153417304, "grad_norm": 0.051206622272729874, "learning_rate": 0.01, "loss": 1.9549, "step": 92973 }, { "epoch": 9.599008878794136, "grad_norm": 0.06507684290409088, "learning_rate": 0.01, "loss": 1.9239, "step": 92976 }, { "epoch": 9.599318604170968, "grad_norm": 0.03854987770318985, "learning_rate": 0.01, "loss": 1.9178, "step": 92979 }, { "epoch": 9.599628329547802, "grad_norm": 0.11893489211797714, "learning_rate": 0.01, "loss": 1.9294, "step": 92982 }, { "epoch": 9.599938054924634, "grad_norm": 0.06678856909275055, "learning_rate": 0.01, "loss": 1.9233, "step": 92985 }, { "epoch": 9.600247780301466, "grad_norm": 0.08233048021793365, "learning_rate": 0.01, "loss": 1.9318, "step": 92988 }, { "epoch": 9.6005575056783, "grad_norm": 0.05037279799580574, "learning_rate": 0.01, "loss": 1.9378, "step": 92991 }, { "epoch": 9.600867231055132, "grad_norm": 0.08859597146511078, "learning_rate": 0.01, "loss": 1.9375, "step": 92994 }, { "epoch": 9.601176956431964, "grad_norm": 0.10530859231948853, "learning_rate": 0.01, "loss": 1.9329, "step": 92997 }, { "epoch": 9.601486681808796, "grad_norm": 0.07208415120840073, "learning_rate": 0.01, "loss": 1.9317, "step": 93000 }, { "epoch": 9.60179640718563, "grad_norm": 0.0347345806658268, "learning_rate": 0.01, "loss": 1.9263, "step": 93003 }, { "epoch": 9.602106132562461, "grad_norm": 0.08983615785837173, "learning_rate": 0.01, "loss": 1.9338, "step": 93006 }, { "epoch": 9.602415857939294, "grad_norm": 0.03540850058197975, "learning_rate": 0.01, "loss": 1.9214, "step": 93009 }, { "epoch": 9.602725583316126, "grad_norm": 0.03276904672384262, "learning_rate": 0.01, "loss": 1.9039, "step": 93012 }, { "epoch": 9.60303530869296, "grad_norm": 0.09266610443592072, "learning_rate": 0.01, "loss": 1.9355, "step": 93015 }, { "epoch": 9.603345034069791, "grad_norm": 0.04843165725469589, "learning_rate": 0.01, "loss": 1.9264, "step": 93018 }, { "epoch": 9.603654759446624, "grad_norm": 0.05365300923585892, "learning_rate": 0.01, "loss": 1.9478, "step": 93021 }, { "epoch": 9.603964484823457, "grad_norm": 0.07595492154359818, "learning_rate": 0.01, "loss": 1.9384, "step": 93024 }, { "epoch": 9.60427421020029, "grad_norm": 0.05023255944252014, "learning_rate": 0.01, "loss": 1.919, "step": 93027 }, { "epoch": 9.604583935577121, "grad_norm": 0.07495186477899551, "learning_rate": 0.01, "loss": 1.9738, "step": 93030 }, { "epoch": 9.604893660953953, "grad_norm": 0.05238397419452667, "learning_rate": 0.01, "loss": 1.9315, "step": 93033 }, { "epoch": 9.605203386330787, "grad_norm": 0.08827707171440125, "learning_rate": 0.01, "loss": 1.9464, "step": 93036 }, { "epoch": 9.60551311170762, "grad_norm": 0.13802281022071838, "learning_rate": 0.01, "loss": 1.93, "step": 93039 }, { "epoch": 9.605822837084451, "grad_norm": 0.048278581351041794, "learning_rate": 0.01, "loss": 1.9399, "step": 93042 }, { "epoch": 9.606132562461283, "grad_norm": 0.04927634447813034, "learning_rate": 0.01, "loss": 1.9252, "step": 93045 }, { "epoch": 9.606442287838117, "grad_norm": 0.045615509152412415, "learning_rate": 0.01, "loss": 1.946, "step": 93048 }, { "epoch": 9.60675201321495, "grad_norm": 0.04975781962275505, "learning_rate": 0.01, "loss": 1.9463, "step": 93051 }, { "epoch": 9.607061738591781, "grad_norm": 0.058548714965581894, "learning_rate": 0.01, "loss": 1.9258, "step": 93054 }, { "epoch": 9.607371463968615, "grad_norm": 0.06122971326112747, "learning_rate": 0.01, "loss": 1.9553, "step": 93057 }, { "epoch": 9.607681189345447, "grad_norm": 0.0376429446041584, "learning_rate": 0.01, "loss": 1.9102, "step": 93060 }, { "epoch": 9.60799091472228, "grad_norm": 0.06916382163763046, "learning_rate": 0.01, "loss": 1.9367, "step": 93063 }, { "epoch": 9.608300640099111, "grad_norm": 0.09932322800159454, "learning_rate": 0.01, "loss": 1.9262, "step": 93066 }, { "epoch": 9.608610365475945, "grad_norm": 0.15092147886753082, "learning_rate": 0.01, "loss": 1.9438, "step": 93069 }, { "epoch": 9.608920090852777, "grad_norm": 0.12672391533851624, "learning_rate": 0.01, "loss": 1.9291, "step": 93072 }, { "epoch": 9.60922981622961, "grad_norm": 0.05705375596880913, "learning_rate": 0.01, "loss": 1.9583, "step": 93075 }, { "epoch": 9.609539541606443, "grad_norm": 0.06204779073596001, "learning_rate": 0.01, "loss": 1.9419, "step": 93078 }, { "epoch": 9.609849266983275, "grad_norm": 0.045914001762866974, "learning_rate": 0.01, "loss": 1.9434, "step": 93081 }, { "epoch": 9.610158992360107, "grad_norm": 0.03120456449687481, "learning_rate": 0.01, "loss": 1.9393, "step": 93084 }, { "epoch": 9.61046871773694, "grad_norm": 0.03763057291507721, "learning_rate": 0.01, "loss": 1.9184, "step": 93087 }, { "epoch": 9.610778443113773, "grad_norm": 0.04199619218707085, "learning_rate": 0.01, "loss": 1.9349, "step": 93090 }, { "epoch": 9.611088168490605, "grad_norm": 0.03523045405745506, "learning_rate": 0.01, "loss": 1.9244, "step": 93093 }, { "epoch": 9.611397893867437, "grad_norm": 0.12717922031879425, "learning_rate": 0.01, "loss": 1.9667, "step": 93096 }, { "epoch": 9.611707619244271, "grad_norm": 0.06919953227043152, "learning_rate": 0.01, "loss": 1.9492, "step": 93099 }, { "epoch": 9.612017344621103, "grad_norm": 0.05430379509925842, "learning_rate": 0.01, "loss": 1.9363, "step": 93102 }, { "epoch": 9.612327069997935, "grad_norm": 0.06786497682332993, "learning_rate": 0.01, "loss": 1.9302, "step": 93105 }, { "epoch": 9.612636795374767, "grad_norm": 0.09560369700193405, "learning_rate": 0.01, "loss": 1.934, "step": 93108 }, { "epoch": 9.612946520751601, "grad_norm": 0.046379491686820984, "learning_rate": 0.01, "loss": 1.935, "step": 93111 }, { "epoch": 9.613256246128433, "grad_norm": 0.04021504893898964, "learning_rate": 0.01, "loss": 1.9475, "step": 93114 }, { "epoch": 9.613565971505265, "grad_norm": 0.04521257057785988, "learning_rate": 0.01, "loss": 1.9539, "step": 93117 }, { "epoch": 9.613875696882097, "grad_norm": 0.07617924362421036, "learning_rate": 0.01, "loss": 1.9488, "step": 93120 }, { "epoch": 9.614185422258931, "grad_norm": 0.07008475810289383, "learning_rate": 0.01, "loss": 1.9428, "step": 93123 }, { "epoch": 9.614495147635763, "grad_norm": 0.04604260250926018, "learning_rate": 0.01, "loss": 1.9427, "step": 93126 }, { "epoch": 9.614804873012595, "grad_norm": 0.035664889961481094, "learning_rate": 0.01, "loss": 1.9404, "step": 93129 }, { "epoch": 9.615114598389429, "grad_norm": 0.06766800582408905, "learning_rate": 0.01, "loss": 1.9315, "step": 93132 }, { "epoch": 9.61542432376626, "grad_norm": 0.08869469165802002, "learning_rate": 0.01, "loss": 1.9474, "step": 93135 }, { "epoch": 9.615734049143093, "grad_norm": 0.056641317903995514, "learning_rate": 0.01, "loss": 1.9291, "step": 93138 }, { "epoch": 9.616043774519925, "grad_norm": 0.08216816186904907, "learning_rate": 0.01, "loss": 1.9335, "step": 93141 }, { "epoch": 9.616353499896759, "grad_norm": 0.07369711995124817, "learning_rate": 0.01, "loss": 1.954, "step": 93144 }, { "epoch": 9.61666322527359, "grad_norm": 0.08076739311218262, "learning_rate": 0.01, "loss": 1.9052, "step": 93147 }, { "epoch": 9.616972950650423, "grad_norm": 0.06903894990682602, "learning_rate": 0.01, "loss": 1.9526, "step": 93150 }, { "epoch": 9.617282676027255, "grad_norm": 0.03939782455563545, "learning_rate": 0.01, "loss": 1.918, "step": 93153 }, { "epoch": 9.617592401404089, "grad_norm": 0.053101763129234314, "learning_rate": 0.01, "loss": 1.9455, "step": 93156 }, { "epoch": 9.61790212678092, "grad_norm": 0.0437733419239521, "learning_rate": 0.01, "loss": 1.9242, "step": 93159 }, { "epoch": 9.618211852157753, "grad_norm": 0.08929965645074844, "learning_rate": 0.01, "loss": 1.9348, "step": 93162 }, { "epoch": 9.618521577534587, "grad_norm": 0.05190601572394371, "learning_rate": 0.01, "loss": 1.9221, "step": 93165 }, { "epoch": 9.618831302911419, "grad_norm": 0.08993231505155563, "learning_rate": 0.01, "loss": 1.9422, "step": 93168 }, { "epoch": 9.61914102828825, "grad_norm": 0.06623240560293198, "learning_rate": 0.01, "loss": 1.934, "step": 93171 }, { "epoch": 9.619450753665083, "grad_norm": 0.07955315709114075, "learning_rate": 0.01, "loss": 1.9343, "step": 93174 }, { "epoch": 9.619760479041917, "grad_norm": 0.0649283230304718, "learning_rate": 0.01, "loss": 1.9152, "step": 93177 }, { "epoch": 9.620070204418749, "grad_norm": 0.050466012209653854, "learning_rate": 0.01, "loss": 1.9269, "step": 93180 }, { "epoch": 9.62037992979558, "grad_norm": 0.04734960198402405, "learning_rate": 0.01, "loss": 1.9348, "step": 93183 }, { "epoch": 9.620689655172415, "grad_norm": 0.15201205015182495, "learning_rate": 0.01, "loss": 1.931, "step": 93186 }, { "epoch": 9.620999380549247, "grad_norm": 0.09963729977607727, "learning_rate": 0.01, "loss": 1.9594, "step": 93189 }, { "epoch": 9.621309105926079, "grad_norm": 0.09011012315750122, "learning_rate": 0.01, "loss": 1.9366, "step": 93192 }, { "epoch": 9.62161883130291, "grad_norm": 0.09119027107954025, "learning_rate": 0.01, "loss": 1.9334, "step": 93195 }, { "epoch": 9.621928556679745, "grad_norm": 0.06065910682082176, "learning_rate": 0.01, "loss": 1.9383, "step": 93198 }, { "epoch": 9.622238282056577, "grad_norm": 0.06997659057378769, "learning_rate": 0.01, "loss": 1.9496, "step": 93201 }, { "epoch": 9.622548007433409, "grad_norm": 0.06493129581212997, "learning_rate": 0.01, "loss": 1.9156, "step": 93204 }, { "epoch": 9.622857732810242, "grad_norm": 0.06256227195262909, "learning_rate": 0.01, "loss": 1.951, "step": 93207 }, { "epoch": 9.623167458187075, "grad_norm": 0.055494748055934906, "learning_rate": 0.01, "loss": 1.9361, "step": 93210 }, { "epoch": 9.623477183563907, "grad_norm": 0.1232457309961319, "learning_rate": 0.01, "loss": 1.9374, "step": 93213 }, { "epoch": 9.623786908940739, "grad_norm": 0.07807248085737228, "learning_rate": 0.01, "loss": 1.9653, "step": 93216 }, { "epoch": 9.624096634317572, "grad_norm": 0.06218323856592178, "learning_rate": 0.01, "loss": 1.9197, "step": 93219 }, { "epoch": 9.624406359694405, "grad_norm": 0.09360960870981216, "learning_rate": 0.01, "loss": 1.9304, "step": 93222 }, { "epoch": 9.624716085071237, "grad_norm": 0.040567923337221146, "learning_rate": 0.01, "loss": 1.9255, "step": 93225 }, { "epoch": 9.625025810448069, "grad_norm": 0.06413479149341583, "learning_rate": 0.01, "loss": 1.9628, "step": 93228 }, { "epoch": 9.625335535824902, "grad_norm": 0.04551082104444504, "learning_rate": 0.01, "loss": 1.9687, "step": 93231 }, { "epoch": 9.625645261201734, "grad_norm": 0.04174100235104561, "learning_rate": 0.01, "loss": 1.9431, "step": 93234 }, { "epoch": 9.625954986578567, "grad_norm": 0.06028440222144127, "learning_rate": 0.01, "loss": 1.9325, "step": 93237 }, { "epoch": 9.6262647119554, "grad_norm": 0.049742285162210464, "learning_rate": 0.01, "loss": 1.948, "step": 93240 }, { "epoch": 9.626574437332232, "grad_norm": 0.036308467388153076, "learning_rate": 0.01, "loss": 1.9315, "step": 93243 }, { "epoch": 9.626884162709064, "grad_norm": 0.11060460656881332, "learning_rate": 0.01, "loss": 1.9566, "step": 93246 }, { "epoch": 9.627193888085896, "grad_norm": 0.05999557301402092, "learning_rate": 0.01, "loss": 1.9215, "step": 93249 }, { "epoch": 9.62750361346273, "grad_norm": 0.12154773622751236, "learning_rate": 0.01, "loss": 1.9285, "step": 93252 }, { "epoch": 9.627813338839562, "grad_norm": 0.10073697566986084, "learning_rate": 0.01, "loss": 1.927, "step": 93255 }, { "epoch": 9.628123064216394, "grad_norm": 0.05522200092673302, "learning_rate": 0.01, "loss": 1.9366, "step": 93258 }, { "epoch": 9.628432789593226, "grad_norm": 0.04138825461268425, "learning_rate": 0.01, "loss": 1.9408, "step": 93261 }, { "epoch": 9.62874251497006, "grad_norm": 0.04448959603905678, "learning_rate": 0.01, "loss": 1.9336, "step": 93264 }, { "epoch": 9.629052240346892, "grad_norm": 0.043327491730451584, "learning_rate": 0.01, "loss": 1.9499, "step": 93267 }, { "epoch": 9.629361965723724, "grad_norm": 0.034704022109508514, "learning_rate": 0.01, "loss": 1.9577, "step": 93270 }, { "epoch": 9.629671691100558, "grad_norm": 0.0420006588101387, "learning_rate": 0.01, "loss": 1.937, "step": 93273 }, { "epoch": 9.62998141647739, "grad_norm": 0.04239601641893387, "learning_rate": 0.01, "loss": 1.9412, "step": 93276 }, { "epoch": 9.630291141854222, "grad_norm": 0.09182067960500717, "learning_rate": 0.01, "loss": 1.9542, "step": 93279 }, { "epoch": 9.630600867231054, "grad_norm": 0.07504469156265259, "learning_rate": 0.01, "loss": 1.9536, "step": 93282 }, { "epoch": 9.630910592607888, "grad_norm": 0.0762309581041336, "learning_rate": 0.01, "loss": 1.9207, "step": 93285 }, { "epoch": 9.63122031798472, "grad_norm": 0.09295113384723663, "learning_rate": 0.01, "loss": 1.9333, "step": 93288 }, { "epoch": 9.631530043361552, "grad_norm": 0.06212402880191803, "learning_rate": 0.01, "loss": 1.9379, "step": 93291 }, { "epoch": 9.631839768738386, "grad_norm": 0.08962065726518631, "learning_rate": 0.01, "loss": 1.9539, "step": 93294 }, { "epoch": 9.632149494115218, "grad_norm": 0.04338514432311058, "learning_rate": 0.01, "loss": 1.9498, "step": 93297 }, { "epoch": 9.63245921949205, "grad_norm": 0.05132930725812912, "learning_rate": 0.01, "loss": 1.9139, "step": 93300 }, { "epoch": 9.632768944868882, "grad_norm": 0.03200831636786461, "learning_rate": 0.01, "loss": 1.9354, "step": 93303 }, { "epoch": 9.633078670245716, "grad_norm": 0.12237490713596344, "learning_rate": 0.01, "loss": 1.9307, "step": 93306 }, { "epoch": 9.633388395622548, "grad_norm": 0.05049234256148338, "learning_rate": 0.01, "loss": 1.9327, "step": 93309 }, { "epoch": 9.63369812099938, "grad_norm": 0.1869441121816635, "learning_rate": 0.01, "loss": 1.9634, "step": 93312 }, { "epoch": 9.634007846376214, "grad_norm": 0.05273398756980896, "learning_rate": 0.01, "loss": 1.9403, "step": 93315 }, { "epoch": 9.634317571753046, "grad_norm": 0.04563438892364502, "learning_rate": 0.01, "loss": 1.9444, "step": 93318 }, { "epoch": 9.634627297129878, "grad_norm": 0.04140450805425644, "learning_rate": 0.01, "loss": 1.9303, "step": 93321 }, { "epoch": 9.63493702250671, "grad_norm": 0.03873087465763092, "learning_rate": 0.01, "loss": 1.9384, "step": 93324 }, { "epoch": 9.635246747883544, "grad_norm": 0.03511977195739746, "learning_rate": 0.01, "loss": 1.9256, "step": 93327 }, { "epoch": 9.635556473260376, "grad_norm": 0.0669543594121933, "learning_rate": 0.01, "loss": 1.9452, "step": 93330 }, { "epoch": 9.635866198637208, "grad_norm": 0.14799566566944122, "learning_rate": 0.01, "loss": 1.9451, "step": 93333 }, { "epoch": 9.63617592401404, "grad_norm": 0.07729782164096832, "learning_rate": 0.01, "loss": 1.9576, "step": 93336 }, { "epoch": 9.636485649390874, "grad_norm": 0.03945566713809967, "learning_rate": 0.01, "loss": 1.905, "step": 93339 }, { "epoch": 9.636795374767706, "grad_norm": 0.03338122367858887, "learning_rate": 0.01, "loss": 1.9407, "step": 93342 }, { "epoch": 9.637105100144538, "grad_norm": 0.06058921664953232, "learning_rate": 0.01, "loss": 1.945, "step": 93345 }, { "epoch": 9.637414825521372, "grad_norm": 0.07650914788246155, "learning_rate": 0.01, "loss": 1.936, "step": 93348 }, { "epoch": 9.637724550898204, "grad_norm": 0.06135985255241394, "learning_rate": 0.01, "loss": 1.9419, "step": 93351 }, { "epoch": 9.638034276275036, "grad_norm": 0.0720086619257927, "learning_rate": 0.01, "loss": 1.9508, "step": 93354 }, { "epoch": 9.638344001651868, "grad_norm": 0.12504549324512482, "learning_rate": 0.01, "loss": 1.9479, "step": 93357 }, { "epoch": 9.638653727028702, "grad_norm": 0.058934710919857025, "learning_rate": 0.01, "loss": 1.9625, "step": 93360 }, { "epoch": 9.638963452405534, "grad_norm": 0.06218874454498291, "learning_rate": 0.01, "loss": 1.9068, "step": 93363 }, { "epoch": 9.639273177782366, "grad_norm": 0.06695770472288132, "learning_rate": 0.01, "loss": 1.9726, "step": 93366 }, { "epoch": 9.639582903159198, "grad_norm": 0.05433553829789162, "learning_rate": 0.01, "loss": 1.9299, "step": 93369 }, { "epoch": 9.639892628536032, "grad_norm": 0.05624277889728546, "learning_rate": 0.01, "loss": 1.9318, "step": 93372 }, { "epoch": 9.640202353912864, "grad_norm": 0.03462133929133415, "learning_rate": 0.01, "loss": 1.9064, "step": 93375 }, { "epoch": 9.640512079289696, "grad_norm": 0.04203572869300842, "learning_rate": 0.01, "loss": 1.9164, "step": 93378 }, { "epoch": 9.64082180466653, "grad_norm": 0.09270434826612473, "learning_rate": 0.01, "loss": 1.9381, "step": 93381 }, { "epoch": 9.641131530043362, "grad_norm": 0.04383547976613045, "learning_rate": 0.01, "loss": 1.9304, "step": 93384 }, { "epoch": 9.641441255420194, "grad_norm": 0.041868455708026886, "learning_rate": 0.01, "loss": 1.9409, "step": 93387 }, { "epoch": 9.641750980797026, "grad_norm": 0.04283531755208969, "learning_rate": 0.01, "loss": 1.9321, "step": 93390 }, { "epoch": 9.64206070617386, "grad_norm": 0.049920182675123215, "learning_rate": 0.01, "loss": 1.9384, "step": 93393 }, { "epoch": 9.642370431550692, "grad_norm": 0.12349256128072739, "learning_rate": 0.01, "loss": 1.9264, "step": 93396 }, { "epoch": 9.642680156927524, "grad_norm": 0.03577020764350891, "learning_rate": 0.01, "loss": 1.9047, "step": 93399 }, { "epoch": 9.642989882304358, "grad_norm": 0.03897339478135109, "learning_rate": 0.01, "loss": 1.9142, "step": 93402 }, { "epoch": 9.64329960768119, "grad_norm": 0.08809322863817215, "learning_rate": 0.01, "loss": 1.9354, "step": 93405 }, { "epoch": 9.643609333058022, "grad_norm": 0.09816404432058334, "learning_rate": 0.01, "loss": 1.9303, "step": 93408 }, { "epoch": 9.643919058434854, "grad_norm": 0.07009526342153549, "learning_rate": 0.01, "loss": 1.9397, "step": 93411 }, { "epoch": 9.644228783811688, "grad_norm": 0.04368029162287712, "learning_rate": 0.01, "loss": 1.8955, "step": 93414 }, { "epoch": 9.64453850918852, "grad_norm": 0.03799109160900116, "learning_rate": 0.01, "loss": 1.9377, "step": 93417 }, { "epoch": 9.644848234565352, "grad_norm": 0.045468222349882126, "learning_rate": 0.01, "loss": 1.9061, "step": 93420 }, { "epoch": 9.645157959942185, "grad_norm": 0.033892299979925156, "learning_rate": 0.01, "loss": 1.9242, "step": 93423 }, { "epoch": 9.645467685319018, "grad_norm": 0.03280634060502052, "learning_rate": 0.01, "loss": 1.9283, "step": 93426 }, { "epoch": 9.64577741069585, "grad_norm": 0.032541584223508835, "learning_rate": 0.01, "loss": 1.9358, "step": 93429 }, { "epoch": 9.646087136072682, "grad_norm": 0.0309646837413311, "learning_rate": 0.01, "loss": 1.927, "step": 93432 }, { "epoch": 9.646396861449515, "grad_norm": 0.11828819662332535, "learning_rate": 0.01, "loss": 1.9195, "step": 93435 }, { "epoch": 9.646706586826348, "grad_norm": 0.10888717323541641, "learning_rate": 0.01, "loss": 1.9582, "step": 93438 }, { "epoch": 9.64701631220318, "grad_norm": 0.0515931062400341, "learning_rate": 0.01, "loss": 1.9478, "step": 93441 }, { "epoch": 9.647326037580012, "grad_norm": 0.051003944128751755, "learning_rate": 0.01, "loss": 1.9371, "step": 93444 }, { "epoch": 9.647635762956845, "grad_norm": 0.06467358022928238, "learning_rate": 0.01, "loss": 1.9489, "step": 93447 }, { "epoch": 9.647945488333677, "grad_norm": 0.041775960475206375, "learning_rate": 0.01, "loss": 1.9404, "step": 93450 }, { "epoch": 9.64825521371051, "grad_norm": 0.03359523043036461, "learning_rate": 0.01, "loss": 1.9404, "step": 93453 }, { "epoch": 9.648564939087343, "grad_norm": 0.05004175379872322, "learning_rate": 0.01, "loss": 1.9661, "step": 93456 }, { "epoch": 9.648874664464175, "grad_norm": 0.07100548595190048, "learning_rate": 0.01, "loss": 1.9456, "step": 93459 }, { "epoch": 9.649184389841007, "grad_norm": 0.05144869163632393, "learning_rate": 0.01, "loss": 1.951, "step": 93462 }, { "epoch": 9.64949411521784, "grad_norm": 0.1208404004573822, "learning_rate": 0.01, "loss": 1.9562, "step": 93465 }, { "epoch": 9.649803840594673, "grad_norm": 0.04204929992556572, "learning_rate": 0.01, "loss": 1.954, "step": 93468 }, { "epoch": 9.650113565971505, "grad_norm": 0.11732341349124908, "learning_rate": 0.01, "loss": 1.9357, "step": 93471 }, { "epoch": 9.650423291348337, "grad_norm": 0.03591461852192879, "learning_rate": 0.01, "loss": 1.9268, "step": 93474 }, { "epoch": 9.65073301672517, "grad_norm": 0.09749674052000046, "learning_rate": 0.01, "loss": 1.928, "step": 93477 }, { "epoch": 9.651042742102003, "grad_norm": 0.04847512021660805, "learning_rate": 0.01, "loss": 1.9295, "step": 93480 }, { "epoch": 9.651352467478835, "grad_norm": 0.06401492655277252, "learning_rate": 0.01, "loss": 1.9397, "step": 93483 }, { "epoch": 9.651662192855667, "grad_norm": 0.13890819251537323, "learning_rate": 0.01, "loss": 1.93, "step": 93486 }, { "epoch": 9.651971918232501, "grad_norm": 0.034448061138391495, "learning_rate": 0.01, "loss": 1.9182, "step": 93489 }, { "epoch": 9.652281643609333, "grad_norm": 0.11787496507167816, "learning_rate": 0.01, "loss": 1.9296, "step": 93492 }, { "epoch": 9.652591368986165, "grad_norm": 0.05250073969364166, "learning_rate": 0.01, "loss": 1.9356, "step": 93495 }, { "epoch": 9.652901094362997, "grad_norm": 0.04947948083281517, "learning_rate": 0.01, "loss": 1.9342, "step": 93498 }, { "epoch": 9.653210819739831, "grad_norm": 0.04423551633954048, "learning_rate": 0.01, "loss": 1.9538, "step": 93501 }, { "epoch": 9.653520545116663, "grad_norm": 0.041208140552043915, "learning_rate": 0.01, "loss": 1.9277, "step": 93504 }, { "epoch": 9.653830270493495, "grad_norm": 0.13026712834835052, "learning_rate": 0.01, "loss": 1.9455, "step": 93507 }, { "epoch": 9.65413999587033, "grad_norm": 0.07483094930648804, "learning_rate": 0.01, "loss": 1.9482, "step": 93510 }, { "epoch": 9.654449721247161, "grad_norm": 0.05455175042152405, "learning_rate": 0.01, "loss": 1.9343, "step": 93513 }, { "epoch": 9.654759446623993, "grad_norm": 0.0686800554394722, "learning_rate": 0.01, "loss": 1.931, "step": 93516 }, { "epoch": 9.655069172000825, "grad_norm": 0.0592319555580616, "learning_rate": 0.01, "loss": 1.9375, "step": 93519 }, { "epoch": 9.655378897377659, "grad_norm": 0.03538976609706879, "learning_rate": 0.01, "loss": 1.945, "step": 93522 }, { "epoch": 9.655688622754491, "grad_norm": 0.039937421679496765, "learning_rate": 0.01, "loss": 1.9559, "step": 93525 }, { "epoch": 9.655998348131323, "grad_norm": 0.04662489891052246, "learning_rate": 0.01, "loss": 1.9033, "step": 93528 }, { "epoch": 9.656308073508157, "grad_norm": 0.1549932211637497, "learning_rate": 0.01, "loss": 1.9383, "step": 93531 }, { "epoch": 9.656617798884989, "grad_norm": 0.07844512164592743, "learning_rate": 0.01, "loss": 1.9192, "step": 93534 }, { "epoch": 9.656927524261821, "grad_norm": 0.05962245911359787, "learning_rate": 0.01, "loss": 1.9355, "step": 93537 }, { "epoch": 9.657237249638653, "grad_norm": 0.06986347585916519, "learning_rate": 0.01, "loss": 1.951, "step": 93540 }, { "epoch": 9.657546975015487, "grad_norm": 0.045444563031196594, "learning_rate": 0.01, "loss": 1.9353, "step": 93543 }, { "epoch": 9.657856700392319, "grad_norm": 0.042958684265613556, "learning_rate": 0.01, "loss": 1.9295, "step": 93546 }, { "epoch": 9.658166425769151, "grad_norm": 0.05604341998696327, "learning_rate": 0.01, "loss": 1.9132, "step": 93549 }, { "epoch": 9.658476151145983, "grad_norm": 0.05136589705944061, "learning_rate": 0.01, "loss": 1.9211, "step": 93552 }, { "epoch": 9.658785876522817, "grad_norm": 0.06568500399589539, "learning_rate": 0.01, "loss": 1.9392, "step": 93555 }, { "epoch": 9.659095601899649, "grad_norm": 0.042793937027454376, "learning_rate": 0.01, "loss": 1.9278, "step": 93558 }, { "epoch": 9.659405327276481, "grad_norm": 0.08512739837169647, "learning_rate": 0.01, "loss": 1.9245, "step": 93561 }, { "epoch": 9.659715052653315, "grad_norm": 0.059787552803754807, "learning_rate": 0.01, "loss": 1.9152, "step": 93564 }, { "epoch": 9.660024778030147, "grad_norm": 0.095461905002594, "learning_rate": 0.01, "loss": 1.9396, "step": 93567 }, { "epoch": 9.660334503406979, "grad_norm": 0.11361975967884064, "learning_rate": 0.01, "loss": 1.9673, "step": 93570 }, { "epoch": 9.660644228783811, "grad_norm": 0.055556997656822205, "learning_rate": 0.01, "loss": 1.9518, "step": 93573 }, { "epoch": 9.660953954160645, "grad_norm": 0.043150875717401505, "learning_rate": 0.01, "loss": 1.9254, "step": 93576 }, { "epoch": 9.661263679537477, "grad_norm": 0.0998530462384224, "learning_rate": 0.01, "loss": 1.9343, "step": 93579 }, { "epoch": 9.661573404914309, "grad_norm": 0.045185189694166183, "learning_rate": 0.01, "loss": 1.9371, "step": 93582 }, { "epoch": 9.661883130291141, "grad_norm": 0.14654050767421722, "learning_rate": 0.01, "loss": 1.9645, "step": 93585 }, { "epoch": 9.662192855667975, "grad_norm": 0.10469841212034225, "learning_rate": 0.01, "loss": 1.9414, "step": 93588 }, { "epoch": 9.662502581044807, "grad_norm": 0.09240784496068954, "learning_rate": 0.01, "loss": 1.9214, "step": 93591 }, { "epoch": 9.662812306421639, "grad_norm": 0.07269421219825745, "learning_rate": 0.01, "loss": 1.9361, "step": 93594 }, { "epoch": 9.663122031798473, "grad_norm": 0.042303770780563354, "learning_rate": 0.01, "loss": 1.9349, "step": 93597 }, { "epoch": 9.663431757175305, "grad_norm": 0.03580670431256294, "learning_rate": 0.01, "loss": 1.9481, "step": 93600 }, { "epoch": 9.663741482552137, "grad_norm": 0.03630633279681206, "learning_rate": 0.01, "loss": 1.9189, "step": 93603 }, { "epoch": 9.664051207928969, "grad_norm": 0.0431305356323719, "learning_rate": 0.01, "loss": 1.9491, "step": 93606 }, { "epoch": 9.664360933305803, "grad_norm": 0.044870369136333466, "learning_rate": 0.01, "loss": 1.935, "step": 93609 }, { "epoch": 9.664670658682635, "grad_norm": 0.12574565410614014, "learning_rate": 0.01, "loss": 1.9236, "step": 93612 }, { "epoch": 9.664980384059467, "grad_norm": 0.12211044132709503, "learning_rate": 0.01, "loss": 1.9235, "step": 93615 }, { "epoch": 9.6652901094363, "grad_norm": 0.1176372617483139, "learning_rate": 0.01, "loss": 1.941, "step": 93618 }, { "epoch": 9.665599834813133, "grad_norm": 0.0372295081615448, "learning_rate": 0.01, "loss": 1.9215, "step": 93621 }, { "epoch": 9.665909560189965, "grad_norm": 0.040826380252838135, "learning_rate": 0.01, "loss": 1.9326, "step": 93624 }, { "epoch": 9.666219285566797, "grad_norm": 0.030447613447904587, "learning_rate": 0.01, "loss": 1.9503, "step": 93627 }, { "epoch": 9.66652901094363, "grad_norm": 0.04039299115538597, "learning_rate": 0.01, "loss": 1.9448, "step": 93630 }, { "epoch": 9.666838736320463, "grad_norm": 0.08394279330968857, "learning_rate": 0.01, "loss": 1.9485, "step": 93633 }, { "epoch": 9.667148461697295, "grad_norm": 0.04779527708888054, "learning_rate": 0.01, "loss": 1.9107, "step": 93636 }, { "epoch": 9.667458187074129, "grad_norm": 0.05684870854020119, "learning_rate": 0.01, "loss": 1.9137, "step": 93639 }, { "epoch": 9.66776791245096, "grad_norm": 0.04491376876831055, "learning_rate": 0.01, "loss": 1.9425, "step": 93642 }, { "epoch": 9.668077637827793, "grad_norm": 0.035552386194467545, "learning_rate": 0.01, "loss": 1.939, "step": 93645 }, { "epoch": 9.668387363204625, "grad_norm": 0.03917543217539787, "learning_rate": 0.01, "loss": 1.9308, "step": 93648 }, { "epoch": 9.668697088581458, "grad_norm": 0.05415418744087219, "learning_rate": 0.01, "loss": 1.9512, "step": 93651 }, { "epoch": 9.66900681395829, "grad_norm": 0.13435325026512146, "learning_rate": 0.01, "loss": 1.9204, "step": 93654 }, { "epoch": 9.669316539335123, "grad_norm": 0.13546143472194672, "learning_rate": 0.01, "loss": 1.9408, "step": 93657 }, { "epoch": 9.669626264711955, "grad_norm": 0.11451159417629242, "learning_rate": 0.01, "loss": 1.9514, "step": 93660 }, { "epoch": 9.669935990088788, "grad_norm": 0.05074388161301613, "learning_rate": 0.01, "loss": 1.952, "step": 93663 }, { "epoch": 9.67024571546562, "grad_norm": 0.07373972982168198, "learning_rate": 0.01, "loss": 1.9175, "step": 93666 }, { "epoch": 9.670555440842453, "grad_norm": 0.08737852424383163, "learning_rate": 0.01, "loss": 1.9498, "step": 93669 }, { "epoch": 9.670865166219286, "grad_norm": 0.05184822157025337, "learning_rate": 0.01, "loss": 1.9264, "step": 93672 }, { "epoch": 9.671174891596118, "grad_norm": 0.08283158391714096, "learning_rate": 0.01, "loss": 1.9408, "step": 93675 }, { "epoch": 9.67148461697295, "grad_norm": 0.10233674198389053, "learning_rate": 0.01, "loss": 1.9388, "step": 93678 }, { "epoch": 9.671794342349783, "grad_norm": 0.041018836200237274, "learning_rate": 0.01, "loss": 1.9476, "step": 93681 }, { "epoch": 9.672104067726616, "grad_norm": 0.10328731685876846, "learning_rate": 0.01, "loss": 1.9503, "step": 93684 }, { "epoch": 9.672413793103448, "grad_norm": 0.04173845052719116, "learning_rate": 0.01, "loss": 1.9091, "step": 93687 }, { "epoch": 9.67272351848028, "grad_norm": 0.04470297694206238, "learning_rate": 0.01, "loss": 1.9296, "step": 93690 }, { "epoch": 9.673033243857112, "grad_norm": 0.04156685620546341, "learning_rate": 0.01, "loss": 1.942, "step": 93693 }, { "epoch": 9.673342969233946, "grad_norm": 0.04977422207593918, "learning_rate": 0.01, "loss": 1.9214, "step": 93696 }, { "epoch": 9.673652694610778, "grad_norm": 0.034027110785245895, "learning_rate": 0.01, "loss": 1.9264, "step": 93699 }, { "epoch": 9.67396241998761, "grad_norm": 0.04569841921329498, "learning_rate": 0.01, "loss": 1.9136, "step": 93702 }, { "epoch": 9.674272145364444, "grad_norm": 0.04238857328891754, "learning_rate": 0.01, "loss": 1.8984, "step": 93705 }, { "epoch": 9.674581870741276, "grad_norm": 0.04386452957987785, "learning_rate": 0.01, "loss": 1.9198, "step": 93708 }, { "epoch": 9.674891596118108, "grad_norm": 0.043252211064100266, "learning_rate": 0.01, "loss": 1.9165, "step": 93711 }, { "epoch": 9.67520132149494, "grad_norm": 0.10518099367618561, "learning_rate": 0.01, "loss": 1.9052, "step": 93714 }, { "epoch": 9.675511046871774, "grad_norm": 0.095424585044384, "learning_rate": 0.01, "loss": 1.9282, "step": 93717 }, { "epoch": 9.675820772248606, "grad_norm": 0.09081821888685226, "learning_rate": 0.01, "loss": 1.9227, "step": 93720 }, { "epoch": 9.676130497625438, "grad_norm": 0.17587430775165558, "learning_rate": 0.01, "loss": 1.9226, "step": 93723 }, { "epoch": 9.676440223002272, "grad_norm": 0.0808170810341835, "learning_rate": 0.01, "loss": 1.9303, "step": 93726 }, { "epoch": 9.676749948379104, "grad_norm": 0.060308847576379776, "learning_rate": 0.01, "loss": 1.9614, "step": 93729 }, { "epoch": 9.677059673755936, "grad_norm": 0.03717552870512009, "learning_rate": 0.01, "loss": 1.9463, "step": 93732 }, { "epoch": 9.677369399132768, "grad_norm": 0.033456869423389435, "learning_rate": 0.01, "loss": 1.9771, "step": 93735 }, { "epoch": 9.677679124509602, "grad_norm": 0.04936378076672554, "learning_rate": 0.01, "loss": 1.9468, "step": 93738 }, { "epoch": 9.677988849886434, "grad_norm": 0.05672374367713928, "learning_rate": 0.01, "loss": 1.9429, "step": 93741 }, { "epoch": 9.678298575263266, "grad_norm": 0.05045805498957634, "learning_rate": 0.01, "loss": 1.9203, "step": 93744 }, { "epoch": 9.6786083006401, "grad_norm": 0.0464802086353302, "learning_rate": 0.01, "loss": 1.9366, "step": 93747 }, { "epoch": 9.678918026016932, "grad_norm": 0.09897595643997192, "learning_rate": 0.01, "loss": 1.9151, "step": 93750 }, { "epoch": 9.679227751393764, "grad_norm": 0.04183182865381241, "learning_rate": 0.01, "loss": 1.9241, "step": 93753 }, { "epoch": 9.679537476770596, "grad_norm": 0.07497096061706543, "learning_rate": 0.01, "loss": 1.9311, "step": 93756 }, { "epoch": 9.67984720214743, "grad_norm": 0.09940138459205627, "learning_rate": 0.01, "loss": 1.9165, "step": 93759 }, { "epoch": 9.680156927524262, "grad_norm": 0.0419076569378376, "learning_rate": 0.01, "loss": 1.9437, "step": 93762 }, { "epoch": 9.680466652901094, "grad_norm": 0.10052657127380371, "learning_rate": 0.01, "loss": 1.9481, "step": 93765 }, { "epoch": 9.680776378277926, "grad_norm": 0.12983714044094086, "learning_rate": 0.01, "loss": 1.9372, "step": 93768 }, { "epoch": 9.68108610365476, "grad_norm": 0.06584013253450394, "learning_rate": 0.01, "loss": 1.9276, "step": 93771 }, { "epoch": 9.681395829031592, "grad_norm": 0.0655849277973175, "learning_rate": 0.01, "loss": 1.9017, "step": 93774 }, { "epoch": 9.681705554408424, "grad_norm": 0.05604313686490059, "learning_rate": 0.01, "loss": 1.928, "step": 93777 }, { "epoch": 9.682015279785258, "grad_norm": 0.049449238926172256, "learning_rate": 0.01, "loss": 1.9529, "step": 93780 }, { "epoch": 9.68232500516209, "grad_norm": 0.032564688473939896, "learning_rate": 0.01, "loss": 1.9422, "step": 93783 }, { "epoch": 9.682634730538922, "grad_norm": 0.033273547887802124, "learning_rate": 0.01, "loss": 1.9315, "step": 93786 }, { "epoch": 9.682944455915754, "grad_norm": 0.033502086997032166, "learning_rate": 0.01, "loss": 1.9407, "step": 93789 }, { "epoch": 9.683254181292588, "grad_norm": 0.12671968340873718, "learning_rate": 0.01, "loss": 1.9243, "step": 93792 }, { "epoch": 9.68356390666942, "grad_norm": 0.045263972133398056, "learning_rate": 0.01, "loss": 1.9541, "step": 93795 }, { "epoch": 9.683873632046252, "grad_norm": 0.08929682523012161, "learning_rate": 0.01, "loss": 1.9256, "step": 93798 }, { "epoch": 9.684183357423084, "grad_norm": 0.05344196781516075, "learning_rate": 0.01, "loss": 1.9497, "step": 93801 }, { "epoch": 9.684493082799918, "grad_norm": 0.04417494311928749, "learning_rate": 0.01, "loss": 1.9554, "step": 93804 }, { "epoch": 9.68480280817675, "grad_norm": 0.04135802015662193, "learning_rate": 0.01, "loss": 1.942, "step": 93807 }, { "epoch": 9.685112533553582, "grad_norm": 0.07783691585063934, "learning_rate": 0.01, "loss": 1.908, "step": 93810 }, { "epoch": 9.685422258930416, "grad_norm": 0.08820218592882156, "learning_rate": 0.01, "loss": 1.9261, "step": 93813 }, { "epoch": 9.685731984307248, "grad_norm": 0.03493189811706543, "learning_rate": 0.01, "loss": 1.9053, "step": 93816 }, { "epoch": 9.68604170968408, "grad_norm": 0.06502649933099747, "learning_rate": 0.01, "loss": 1.9198, "step": 93819 }, { "epoch": 9.686351435060912, "grad_norm": 0.08531218767166138, "learning_rate": 0.01, "loss": 1.9285, "step": 93822 }, { "epoch": 9.686661160437746, "grad_norm": 0.12796832621097565, "learning_rate": 0.01, "loss": 1.9439, "step": 93825 }, { "epoch": 9.686970885814578, "grad_norm": 0.07860841602087021, "learning_rate": 0.01, "loss": 1.9542, "step": 93828 }, { "epoch": 9.68728061119141, "grad_norm": 0.04484981298446655, "learning_rate": 0.01, "loss": 1.9423, "step": 93831 }, { "epoch": 9.687590336568244, "grad_norm": 0.04047702997922897, "learning_rate": 0.01, "loss": 1.9267, "step": 93834 }, { "epoch": 9.687900061945076, "grad_norm": 0.04358268901705742, "learning_rate": 0.01, "loss": 1.9396, "step": 93837 }, { "epoch": 9.688209787321908, "grad_norm": 0.12023859471082687, "learning_rate": 0.01, "loss": 1.9242, "step": 93840 }, { "epoch": 9.68851951269874, "grad_norm": 0.038970865309238434, "learning_rate": 0.01, "loss": 1.9326, "step": 93843 }, { "epoch": 9.688829238075574, "grad_norm": 0.13150472939014435, "learning_rate": 0.01, "loss": 1.9146, "step": 93846 }, { "epoch": 9.689138963452406, "grad_norm": 0.035846102982759476, "learning_rate": 0.01, "loss": 1.9102, "step": 93849 }, { "epoch": 9.689448688829238, "grad_norm": 0.0676359161734581, "learning_rate": 0.01, "loss": 1.9373, "step": 93852 }, { "epoch": 9.689758414206072, "grad_norm": 0.052405230700969696, "learning_rate": 0.01, "loss": 1.9246, "step": 93855 }, { "epoch": 9.690068139582904, "grad_norm": 0.054263848811388016, "learning_rate": 0.01, "loss": 1.9245, "step": 93858 }, { "epoch": 9.690377864959736, "grad_norm": 0.09067951887845993, "learning_rate": 0.01, "loss": 1.9376, "step": 93861 }, { "epoch": 9.690687590336568, "grad_norm": 0.11018327623605728, "learning_rate": 0.01, "loss": 1.9496, "step": 93864 }, { "epoch": 9.690997315713402, "grad_norm": 0.03586273640394211, "learning_rate": 0.01, "loss": 1.92, "step": 93867 }, { "epoch": 9.691307041090234, "grad_norm": 0.08600044995546341, "learning_rate": 0.01, "loss": 1.9054, "step": 93870 }, { "epoch": 9.691616766467066, "grad_norm": 0.06296738237142563, "learning_rate": 0.01, "loss": 1.9136, "step": 93873 }, { "epoch": 9.691926491843898, "grad_norm": 0.10977789759635925, "learning_rate": 0.01, "loss": 1.9637, "step": 93876 }, { "epoch": 9.692236217220731, "grad_norm": 0.09508870542049408, "learning_rate": 0.01, "loss": 1.9295, "step": 93879 }, { "epoch": 9.692545942597564, "grad_norm": 0.04756419360637665, "learning_rate": 0.01, "loss": 1.9258, "step": 93882 }, { "epoch": 9.692855667974396, "grad_norm": 0.049777496606111526, "learning_rate": 0.01, "loss": 1.9396, "step": 93885 }, { "epoch": 9.69316539335123, "grad_norm": 0.03871287778019905, "learning_rate": 0.01, "loss": 1.915, "step": 93888 }, { "epoch": 9.693475118728061, "grad_norm": 0.07852483540773392, "learning_rate": 0.01, "loss": 1.9375, "step": 93891 }, { "epoch": 9.693784844104893, "grad_norm": 0.07227092236280441, "learning_rate": 0.01, "loss": 1.9217, "step": 93894 }, { "epoch": 9.694094569481726, "grad_norm": 0.0830816924571991, "learning_rate": 0.01, "loss": 1.9518, "step": 93897 }, { "epoch": 9.69440429485856, "grad_norm": 0.06940535455942154, "learning_rate": 0.01, "loss": 1.9235, "step": 93900 }, { "epoch": 9.694714020235391, "grad_norm": 0.07247503846883774, "learning_rate": 0.01, "loss": 1.9387, "step": 93903 }, { "epoch": 9.695023745612223, "grad_norm": 0.08987094461917877, "learning_rate": 0.01, "loss": 1.9027, "step": 93906 }, { "epoch": 9.695333470989056, "grad_norm": 0.05902135744690895, "learning_rate": 0.01, "loss": 1.9091, "step": 93909 }, { "epoch": 9.69564319636589, "grad_norm": 0.08232377469539642, "learning_rate": 0.01, "loss": 1.9231, "step": 93912 }, { "epoch": 9.695952921742721, "grad_norm": 0.07965484261512756, "learning_rate": 0.01, "loss": 1.9367, "step": 93915 }, { "epoch": 9.696262647119553, "grad_norm": 0.03626954182982445, "learning_rate": 0.01, "loss": 1.9455, "step": 93918 }, { "epoch": 9.696572372496387, "grad_norm": 0.11025188863277435, "learning_rate": 0.01, "loss": 1.941, "step": 93921 }, { "epoch": 9.69688209787322, "grad_norm": 0.06925832480192184, "learning_rate": 0.01, "loss": 1.9226, "step": 93924 }, { "epoch": 9.697191823250051, "grad_norm": 0.09669098258018494, "learning_rate": 0.01, "loss": 1.9267, "step": 93927 }, { "epoch": 9.697501548626883, "grad_norm": 0.06018374115228653, "learning_rate": 0.01, "loss": 1.9624, "step": 93930 }, { "epoch": 9.697811274003717, "grad_norm": 0.03994041308760643, "learning_rate": 0.01, "loss": 1.9137, "step": 93933 }, { "epoch": 9.69812099938055, "grad_norm": 0.046831220388412476, "learning_rate": 0.01, "loss": 1.9179, "step": 93936 }, { "epoch": 9.698430724757381, "grad_norm": 0.07097730785608292, "learning_rate": 0.01, "loss": 1.9757, "step": 93939 }, { "epoch": 9.698740450134215, "grad_norm": 0.15823347866535187, "learning_rate": 0.01, "loss": 1.9537, "step": 93942 }, { "epoch": 9.699050175511047, "grad_norm": 0.05094249173998833, "learning_rate": 0.01, "loss": 1.9524, "step": 93945 }, { "epoch": 9.69935990088788, "grad_norm": 0.11403997242450714, "learning_rate": 0.01, "loss": 1.9296, "step": 93948 }, { "epoch": 9.699669626264711, "grad_norm": 0.04830975458025932, "learning_rate": 0.01, "loss": 1.9399, "step": 93951 }, { "epoch": 9.699979351641545, "grad_norm": 0.055670302361249924, "learning_rate": 0.01, "loss": 1.9308, "step": 93954 }, { "epoch": 9.700289077018377, "grad_norm": 0.06251242011785507, "learning_rate": 0.01, "loss": 1.924, "step": 93957 }, { "epoch": 9.70059880239521, "grad_norm": 0.0666978657245636, "learning_rate": 0.01, "loss": 1.9269, "step": 93960 }, { "epoch": 9.700908527772043, "grad_norm": 0.043735913932323456, "learning_rate": 0.01, "loss": 1.9111, "step": 93963 }, { "epoch": 9.701218253148875, "grad_norm": 0.03567013889551163, "learning_rate": 0.01, "loss": 1.9469, "step": 93966 }, { "epoch": 9.701527978525707, "grad_norm": 0.12823991477489471, "learning_rate": 0.01, "loss": 1.9405, "step": 93969 }, { "epoch": 9.70183770390254, "grad_norm": 0.11092595010995865, "learning_rate": 0.01, "loss": 1.9384, "step": 93972 }, { "epoch": 9.702147429279373, "grad_norm": 0.07530339807271957, "learning_rate": 0.01, "loss": 1.9637, "step": 93975 }, { "epoch": 9.702457154656205, "grad_norm": 0.040059320628643036, "learning_rate": 0.01, "loss": 1.9305, "step": 93978 }, { "epoch": 9.702766880033037, "grad_norm": 0.04048408940434456, "learning_rate": 0.01, "loss": 1.9245, "step": 93981 }, { "epoch": 9.70307660540987, "grad_norm": 0.038570586591959, "learning_rate": 0.01, "loss": 1.9114, "step": 93984 }, { "epoch": 9.703386330786703, "grad_norm": 0.04940098896622658, "learning_rate": 0.01, "loss": 1.9314, "step": 93987 }, { "epoch": 9.703696056163535, "grad_norm": 0.056408241391181946, "learning_rate": 0.01, "loss": 1.9155, "step": 93990 }, { "epoch": 9.704005781540367, "grad_norm": 0.042289089411497116, "learning_rate": 0.01, "loss": 1.9146, "step": 93993 }, { "epoch": 9.704315506917201, "grad_norm": 0.040116313844919205, "learning_rate": 0.01, "loss": 1.9073, "step": 93996 }, { "epoch": 9.704625232294033, "grad_norm": 0.09464330226182938, "learning_rate": 0.01, "loss": 1.9369, "step": 93999 }, { "epoch": 9.704934957670865, "grad_norm": 0.1511952131986618, "learning_rate": 0.01, "loss": 1.9224, "step": 94002 }, { "epoch": 9.705244683047697, "grad_norm": 0.09546513110399246, "learning_rate": 0.01, "loss": 1.9223, "step": 94005 }, { "epoch": 9.70555440842453, "grad_norm": 0.05483248457312584, "learning_rate": 0.01, "loss": 1.9287, "step": 94008 }, { "epoch": 9.705864133801363, "grad_norm": 0.036017656326293945, "learning_rate": 0.01, "loss": 1.9289, "step": 94011 }, { "epoch": 9.706173859178195, "grad_norm": 0.04008268564939499, "learning_rate": 0.01, "loss": 1.9235, "step": 94014 }, { "epoch": 9.706483584555027, "grad_norm": 0.03564837574958801, "learning_rate": 0.01, "loss": 1.9502, "step": 94017 }, { "epoch": 9.70679330993186, "grad_norm": 0.0589480847120285, "learning_rate": 0.01, "loss": 1.9354, "step": 94020 }, { "epoch": 9.707103035308693, "grad_norm": 0.07352419942617416, "learning_rate": 0.01, "loss": 1.9411, "step": 94023 }, { "epoch": 9.707412760685525, "grad_norm": 0.06446439027786255, "learning_rate": 0.01, "loss": 1.9469, "step": 94026 }, { "epoch": 9.707722486062359, "grad_norm": 0.0859951302409172, "learning_rate": 0.01, "loss": 1.9406, "step": 94029 }, { "epoch": 9.70803221143919, "grad_norm": 0.1257733255624771, "learning_rate": 0.01, "loss": 1.9196, "step": 94032 }, { "epoch": 9.708341936816023, "grad_norm": 0.0344783179461956, "learning_rate": 0.01, "loss": 1.9415, "step": 94035 }, { "epoch": 9.708651662192855, "grad_norm": 0.06627075374126434, "learning_rate": 0.01, "loss": 1.9365, "step": 94038 }, { "epoch": 9.708961387569689, "grad_norm": 0.0375131331384182, "learning_rate": 0.01, "loss": 1.9175, "step": 94041 }, { "epoch": 9.70927111294652, "grad_norm": 0.03659706190228462, "learning_rate": 0.01, "loss": 1.9417, "step": 94044 }, { "epoch": 9.709580838323353, "grad_norm": 0.06533805280923843, "learning_rate": 0.01, "loss": 1.9477, "step": 94047 }, { "epoch": 9.709890563700187, "grad_norm": 0.09291239827871323, "learning_rate": 0.01, "loss": 1.9654, "step": 94050 }, { "epoch": 9.710200289077019, "grad_norm": 0.06271101534366608, "learning_rate": 0.01, "loss": 1.9377, "step": 94053 }, { "epoch": 9.71051001445385, "grad_norm": 0.05989930406212807, "learning_rate": 0.01, "loss": 1.9086, "step": 94056 }, { "epoch": 9.710819739830683, "grad_norm": 0.10313070565462112, "learning_rate": 0.01, "loss": 1.9821, "step": 94059 }, { "epoch": 9.711129465207517, "grad_norm": 0.12868840992450714, "learning_rate": 0.01, "loss": 1.9152, "step": 94062 }, { "epoch": 9.711439190584349, "grad_norm": 0.03902678191661835, "learning_rate": 0.01, "loss": 1.9516, "step": 94065 }, { "epoch": 9.71174891596118, "grad_norm": 0.03207237645983696, "learning_rate": 0.01, "loss": 1.9205, "step": 94068 }, { "epoch": 9.712058641338015, "grad_norm": 0.04138148948550224, "learning_rate": 0.01, "loss": 1.9408, "step": 94071 }, { "epoch": 9.712368366714847, "grad_norm": 0.048512060195207596, "learning_rate": 0.01, "loss": 1.9207, "step": 94074 }, { "epoch": 9.712678092091679, "grad_norm": 0.03996929153800011, "learning_rate": 0.01, "loss": 1.9225, "step": 94077 }, { "epoch": 9.71298781746851, "grad_norm": 0.042945004999637604, "learning_rate": 0.01, "loss": 1.929, "step": 94080 }, { "epoch": 9.713297542845345, "grad_norm": 0.05019443482160568, "learning_rate": 0.01, "loss": 1.9401, "step": 94083 }, { "epoch": 9.713607268222177, "grad_norm": 0.059888970106840134, "learning_rate": 0.01, "loss": 1.9354, "step": 94086 }, { "epoch": 9.713916993599009, "grad_norm": 0.04927165061235428, "learning_rate": 0.01, "loss": 1.9548, "step": 94089 }, { "epoch": 9.71422671897584, "grad_norm": 0.039205461740493774, "learning_rate": 0.01, "loss": 1.937, "step": 94092 }, { "epoch": 9.714536444352674, "grad_norm": 0.04728487879037857, "learning_rate": 0.01, "loss": 1.9324, "step": 94095 }, { "epoch": 9.714846169729507, "grad_norm": 0.03699108585715294, "learning_rate": 0.01, "loss": 1.9556, "step": 94098 }, { "epoch": 9.715155895106339, "grad_norm": 0.1421017348766327, "learning_rate": 0.01, "loss": 1.9403, "step": 94101 }, { "epoch": 9.715465620483172, "grad_norm": 0.041983768343925476, "learning_rate": 0.01, "loss": 1.9403, "step": 94104 }, { "epoch": 9.715775345860004, "grad_norm": 0.10116805136203766, "learning_rate": 0.01, "loss": 1.9455, "step": 94107 }, { "epoch": 9.716085071236837, "grad_norm": 0.06465780735015869, "learning_rate": 0.01, "loss": 1.926, "step": 94110 }, { "epoch": 9.716394796613669, "grad_norm": 0.06708145141601562, "learning_rate": 0.01, "loss": 1.9192, "step": 94113 }, { "epoch": 9.716704521990502, "grad_norm": 0.06895887106657028, "learning_rate": 0.01, "loss": 1.9243, "step": 94116 }, { "epoch": 9.717014247367334, "grad_norm": 0.1283697783946991, "learning_rate": 0.01, "loss": 1.9397, "step": 94119 }, { "epoch": 9.717323972744166, "grad_norm": 0.044195953756570816, "learning_rate": 0.01, "loss": 1.9243, "step": 94122 }, { "epoch": 9.717633698120999, "grad_norm": 0.034305259585380554, "learning_rate": 0.01, "loss": 1.9549, "step": 94125 }, { "epoch": 9.717943423497832, "grad_norm": 0.07333043962717056, "learning_rate": 0.01, "loss": 1.9257, "step": 94128 }, { "epoch": 9.718253148874664, "grad_norm": 0.04106907546520233, "learning_rate": 0.01, "loss": 1.9485, "step": 94131 }, { "epoch": 9.718562874251496, "grad_norm": 0.04604107141494751, "learning_rate": 0.01, "loss": 1.9583, "step": 94134 }, { "epoch": 9.71887259962833, "grad_norm": 0.050639402121305466, "learning_rate": 0.01, "loss": 1.9255, "step": 94137 }, { "epoch": 9.719182325005162, "grad_norm": 0.06188986077904701, "learning_rate": 0.01, "loss": 1.9409, "step": 94140 }, { "epoch": 9.719492050381994, "grad_norm": 0.07035218924283981, "learning_rate": 0.01, "loss": 1.95, "step": 94143 }, { "epoch": 9.719801775758826, "grad_norm": 0.06794343888759613, "learning_rate": 0.01, "loss": 1.9458, "step": 94146 }, { "epoch": 9.72011150113566, "grad_norm": 0.08361504226922989, "learning_rate": 0.01, "loss": 1.9286, "step": 94149 }, { "epoch": 9.720421226512492, "grad_norm": 0.11391738802194595, "learning_rate": 0.01, "loss": 1.9404, "step": 94152 }, { "epoch": 9.720730951889324, "grad_norm": 0.08625376224517822, "learning_rate": 0.01, "loss": 1.9518, "step": 94155 }, { "epoch": 9.721040677266158, "grad_norm": 0.1078452318906784, "learning_rate": 0.01, "loss": 1.9087, "step": 94158 }, { "epoch": 9.72135040264299, "grad_norm": 0.053820785135030746, "learning_rate": 0.01, "loss": 1.9624, "step": 94161 }, { "epoch": 9.721660128019822, "grad_norm": 0.05952325463294983, "learning_rate": 0.01, "loss": 1.9597, "step": 94164 }, { "epoch": 9.721969853396654, "grad_norm": 0.05312516167759895, "learning_rate": 0.01, "loss": 1.9272, "step": 94167 }, { "epoch": 9.722279578773488, "grad_norm": 0.0658746063709259, "learning_rate": 0.01, "loss": 1.9422, "step": 94170 }, { "epoch": 9.72258930415032, "grad_norm": 0.05180962756276131, "learning_rate": 0.01, "loss": 1.9542, "step": 94173 }, { "epoch": 9.722899029527152, "grad_norm": 0.053514860570430756, "learning_rate": 0.01, "loss": 1.9382, "step": 94176 }, { "epoch": 9.723208754903986, "grad_norm": 0.09407076984643936, "learning_rate": 0.01, "loss": 1.9317, "step": 94179 }, { "epoch": 9.723518480280818, "grad_norm": 0.10801953822374344, "learning_rate": 0.01, "loss": 1.9372, "step": 94182 }, { "epoch": 9.72382820565765, "grad_norm": 0.04985484853386879, "learning_rate": 0.01, "loss": 1.934, "step": 94185 }, { "epoch": 9.724137931034482, "grad_norm": 0.04344812035560608, "learning_rate": 0.01, "loss": 1.9485, "step": 94188 }, { "epoch": 9.724447656411316, "grad_norm": 0.039459019899368286, "learning_rate": 0.01, "loss": 1.9235, "step": 94191 }, { "epoch": 9.724757381788148, "grad_norm": 0.03749649599194527, "learning_rate": 0.01, "loss": 1.9355, "step": 94194 }, { "epoch": 9.72506710716498, "grad_norm": 0.036703795194625854, "learning_rate": 0.01, "loss": 1.9152, "step": 94197 }, { "epoch": 9.725376832541812, "grad_norm": 0.03291583061218262, "learning_rate": 0.01, "loss": 1.9317, "step": 94200 }, { "epoch": 9.725686557918646, "grad_norm": 0.1574249416589737, "learning_rate": 0.01, "loss": 1.9454, "step": 94203 }, { "epoch": 9.725996283295478, "grad_norm": 0.15639059245586395, "learning_rate": 0.01, "loss": 1.9142, "step": 94206 }, { "epoch": 9.72630600867231, "grad_norm": 0.05640319734811783, "learning_rate": 0.01, "loss": 1.9356, "step": 94209 }, { "epoch": 9.726615734049144, "grad_norm": 0.069240041077137, "learning_rate": 0.01, "loss": 1.9271, "step": 94212 }, { "epoch": 9.726925459425976, "grad_norm": 0.08438346534967422, "learning_rate": 0.01, "loss": 1.925, "step": 94215 }, { "epoch": 9.727235184802808, "grad_norm": 0.03589455783367157, "learning_rate": 0.01, "loss": 1.9621, "step": 94218 }, { "epoch": 9.72754491017964, "grad_norm": 0.06589308381080627, "learning_rate": 0.01, "loss": 1.9415, "step": 94221 }, { "epoch": 9.727854635556474, "grad_norm": 0.04225952550768852, "learning_rate": 0.01, "loss": 1.9591, "step": 94224 }, { "epoch": 9.728164360933306, "grad_norm": 0.0393117219209671, "learning_rate": 0.01, "loss": 1.9242, "step": 94227 }, { "epoch": 9.728474086310138, "grad_norm": 0.03414738550782204, "learning_rate": 0.01, "loss": 1.9412, "step": 94230 }, { "epoch": 9.72878381168697, "grad_norm": 0.043435826897621155, "learning_rate": 0.01, "loss": 1.9252, "step": 94233 }, { "epoch": 9.729093537063804, "grad_norm": 0.05065900832414627, "learning_rate": 0.01, "loss": 1.9522, "step": 94236 }, { "epoch": 9.729403262440636, "grad_norm": 0.048157140612602234, "learning_rate": 0.01, "loss": 1.9746, "step": 94239 }, { "epoch": 9.729712987817468, "grad_norm": 0.04764106124639511, "learning_rate": 0.01, "loss": 1.9363, "step": 94242 }, { "epoch": 9.730022713194302, "grad_norm": 0.1098858192563057, "learning_rate": 0.01, "loss": 1.9557, "step": 94245 }, { "epoch": 9.730332438571134, "grad_norm": 0.06745657324790955, "learning_rate": 0.01, "loss": 1.9388, "step": 94248 }, { "epoch": 9.730642163947966, "grad_norm": 0.11722870171070099, "learning_rate": 0.01, "loss": 1.9256, "step": 94251 }, { "epoch": 9.730951889324798, "grad_norm": 0.04447784274816513, "learning_rate": 0.01, "loss": 1.9324, "step": 94254 }, { "epoch": 9.731261614701632, "grad_norm": 0.10149898380041122, "learning_rate": 0.01, "loss": 1.9381, "step": 94257 }, { "epoch": 9.731571340078464, "grad_norm": 0.0666227862238884, "learning_rate": 0.01, "loss": 1.9313, "step": 94260 }, { "epoch": 9.731881065455296, "grad_norm": 0.08770105987787247, "learning_rate": 0.01, "loss": 1.9462, "step": 94263 }, { "epoch": 9.732190790832128, "grad_norm": 0.0607566311955452, "learning_rate": 0.01, "loss": 1.9358, "step": 94266 }, { "epoch": 9.732500516208962, "grad_norm": 0.07751908153295517, "learning_rate": 0.01, "loss": 1.9405, "step": 94269 }, { "epoch": 9.732810241585794, "grad_norm": 0.04078710824251175, "learning_rate": 0.01, "loss": 1.9606, "step": 94272 }, { "epoch": 9.733119966962626, "grad_norm": 0.06762789934873581, "learning_rate": 0.01, "loss": 1.9211, "step": 94275 }, { "epoch": 9.73342969233946, "grad_norm": 0.0777420923113823, "learning_rate": 0.01, "loss": 1.9331, "step": 94278 }, { "epoch": 9.733739417716292, "grad_norm": 0.07008164376020432, "learning_rate": 0.01, "loss": 1.914, "step": 94281 }, { "epoch": 9.734049143093124, "grad_norm": 0.08447692543268204, "learning_rate": 0.01, "loss": 1.9484, "step": 94284 }, { "epoch": 9.734358868469958, "grad_norm": 0.06624925881624222, "learning_rate": 0.01, "loss": 1.9502, "step": 94287 }, { "epoch": 9.73466859384679, "grad_norm": 0.04541005566716194, "learning_rate": 0.01, "loss": 1.9363, "step": 94290 }, { "epoch": 9.734978319223622, "grad_norm": 0.036812674254179, "learning_rate": 0.01, "loss": 1.9276, "step": 94293 }, { "epoch": 9.735288044600454, "grad_norm": 0.12216412276029587, "learning_rate": 0.01, "loss": 1.9525, "step": 94296 }, { "epoch": 9.735597769977288, "grad_norm": 0.06444603949785233, "learning_rate": 0.01, "loss": 1.9486, "step": 94299 }, { "epoch": 9.73590749535412, "grad_norm": 0.05880026891827583, "learning_rate": 0.01, "loss": 1.9337, "step": 94302 }, { "epoch": 9.736217220730952, "grad_norm": 0.09832989424467087, "learning_rate": 0.01, "loss": 1.9274, "step": 94305 }, { "epoch": 9.736526946107784, "grad_norm": 0.04116860777139664, "learning_rate": 0.01, "loss": 1.9517, "step": 94308 }, { "epoch": 9.736836671484618, "grad_norm": 0.08587076514959335, "learning_rate": 0.01, "loss": 1.9612, "step": 94311 }, { "epoch": 9.73714639686145, "grad_norm": 0.060674894601106644, "learning_rate": 0.01, "loss": 1.9093, "step": 94314 }, { "epoch": 9.737456122238282, "grad_norm": 0.10280962288379669, "learning_rate": 0.01, "loss": 1.9407, "step": 94317 }, { "epoch": 9.737765847615115, "grad_norm": 0.09550301730632782, "learning_rate": 0.01, "loss": 1.9486, "step": 94320 }, { "epoch": 9.738075572991947, "grad_norm": 0.04915456846356392, "learning_rate": 0.01, "loss": 1.9237, "step": 94323 }, { "epoch": 9.73838529836878, "grad_norm": 0.03588581085205078, "learning_rate": 0.01, "loss": 1.9588, "step": 94326 }, { "epoch": 9.738695023745612, "grad_norm": 0.03683014586567879, "learning_rate": 0.01, "loss": 1.919, "step": 94329 }, { "epoch": 9.739004749122445, "grad_norm": 0.11679235100746155, "learning_rate": 0.01, "loss": 1.9266, "step": 94332 }, { "epoch": 9.739314474499277, "grad_norm": 0.0934172049164772, "learning_rate": 0.01, "loss": 1.9327, "step": 94335 }, { "epoch": 9.73962419987611, "grad_norm": 0.05006689950823784, "learning_rate": 0.01, "loss": 1.9479, "step": 94338 }, { "epoch": 9.739933925252942, "grad_norm": 0.08405818790197372, "learning_rate": 0.01, "loss": 1.931, "step": 94341 }, { "epoch": 9.740243650629775, "grad_norm": 0.06463057547807693, "learning_rate": 0.01, "loss": 1.9456, "step": 94344 }, { "epoch": 9.740553376006607, "grad_norm": 0.05067126825451851, "learning_rate": 0.01, "loss": 1.9406, "step": 94347 }, { "epoch": 9.74086310138344, "grad_norm": 0.05063599720597267, "learning_rate": 0.01, "loss": 1.9256, "step": 94350 }, { "epoch": 9.741172826760273, "grad_norm": 0.056624624878168106, "learning_rate": 0.01, "loss": 1.9314, "step": 94353 }, { "epoch": 9.741482552137105, "grad_norm": 0.04202519729733467, "learning_rate": 0.01, "loss": 1.9095, "step": 94356 }, { "epoch": 9.741792277513937, "grad_norm": 0.0348421148955822, "learning_rate": 0.01, "loss": 1.9291, "step": 94359 }, { "epoch": 9.74210200289077, "grad_norm": 0.04054299741983414, "learning_rate": 0.01, "loss": 1.9467, "step": 94362 }, { "epoch": 9.742411728267603, "grad_norm": 0.1300070583820343, "learning_rate": 0.01, "loss": 1.9077, "step": 94365 }, { "epoch": 9.742721453644435, "grad_norm": 0.05025966838002205, "learning_rate": 0.01, "loss": 1.9432, "step": 94368 }, { "epoch": 9.743031179021267, "grad_norm": 0.10579799860715866, "learning_rate": 0.01, "loss": 1.9267, "step": 94371 }, { "epoch": 9.7433409043981, "grad_norm": 0.06107889860868454, "learning_rate": 0.01, "loss": 1.9516, "step": 94374 }, { "epoch": 9.743650629774933, "grad_norm": 0.09993497282266617, "learning_rate": 0.01, "loss": 1.9374, "step": 94377 }, { "epoch": 9.743960355151765, "grad_norm": 0.04587731882929802, "learning_rate": 0.01, "loss": 1.9588, "step": 94380 }, { "epoch": 9.744270080528597, "grad_norm": 0.0887177363038063, "learning_rate": 0.01, "loss": 1.9407, "step": 94383 }, { "epoch": 9.744579805905431, "grad_norm": 0.056814342737197876, "learning_rate": 0.01, "loss": 1.9237, "step": 94386 }, { "epoch": 9.744889531282263, "grad_norm": 0.05701100826263428, "learning_rate": 0.01, "loss": 1.9252, "step": 94389 }, { "epoch": 9.745199256659095, "grad_norm": 0.06427867710590363, "learning_rate": 0.01, "loss": 1.9379, "step": 94392 }, { "epoch": 9.745508982035929, "grad_norm": 0.0418747141957283, "learning_rate": 0.01, "loss": 1.9203, "step": 94395 }, { "epoch": 9.745818707412761, "grad_norm": 0.04488182067871094, "learning_rate": 0.01, "loss": 1.9458, "step": 94398 }, { "epoch": 9.746128432789593, "grad_norm": 0.1035093367099762, "learning_rate": 0.01, "loss": 1.9219, "step": 94401 }, { "epoch": 9.746438158166425, "grad_norm": 0.03518347442150116, "learning_rate": 0.01, "loss": 1.9525, "step": 94404 }, { "epoch": 9.746747883543259, "grad_norm": 0.05361732468008995, "learning_rate": 0.01, "loss": 1.9594, "step": 94407 }, { "epoch": 9.747057608920091, "grad_norm": 0.08221562951803207, "learning_rate": 0.01, "loss": 1.9296, "step": 94410 }, { "epoch": 9.747367334296923, "grad_norm": 0.09609878063201904, "learning_rate": 0.01, "loss": 1.94, "step": 94413 }, { "epoch": 9.747677059673755, "grad_norm": 0.03668706491589546, "learning_rate": 0.01, "loss": 1.9487, "step": 94416 }, { "epoch": 9.747986785050589, "grad_norm": 0.04190170392394066, "learning_rate": 0.01, "loss": 1.9317, "step": 94419 }, { "epoch": 9.748296510427421, "grad_norm": 0.08498567342758179, "learning_rate": 0.01, "loss": 1.9211, "step": 94422 }, { "epoch": 9.748606235804253, "grad_norm": 0.07723391801118851, "learning_rate": 0.01, "loss": 1.9249, "step": 94425 }, { "epoch": 9.748915961181087, "grad_norm": 0.07648925483226776, "learning_rate": 0.01, "loss": 1.9396, "step": 94428 }, { "epoch": 9.749225686557919, "grad_norm": 0.06334717571735382, "learning_rate": 0.01, "loss": 1.9559, "step": 94431 }, { "epoch": 9.749535411934751, "grad_norm": 0.07004591822624207, "learning_rate": 0.01, "loss": 1.9171, "step": 94434 }, { "epoch": 9.749845137311583, "grad_norm": 0.05966423079371452, "learning_rate": 0.01, "loss": 1.9443, "step": 94437 }, { "epoch": 9.750154862688417, "grad_norm": 0.052176862955093384, "learning_rate": 0.01, "loss": 1.9335, "step": 94440 }, { "epoch": 9.750464588065249, "grad_norm": 0.04152917116880417, "learning_rate": 0.01, "loss": 1.9323, "step": 94443 }, { "epoch": 9.750774313442081, "grad_norm": 0.05614443123340607, "learning_rate": 0.01, "loss": 1.9251, "step": 94446 }, { "epoch": 9.751084038818913, "grad_norm": 0.039038725197315216, "learning_rate": 0.01, "loss": 1.9187, "step": 94449 }, { "epoch": 9.751393764195747, "grad_norm": 0.09913656860589981, "learning_rate": 0.01, "loss": 1.924, "step": 94452 }, { "epoch": 9.751703489572579, "grad_norm": 0.08466944843530655, "learning_rate": 0.01, "loss": 1.9195, "step": 94455 }, { "epoch": 9.752013214949411, "grad_norm": 0.13703182339668274, "learning_rate": 0.01, "loss": 1.9417, "step": 94458 }, { "epoch": 9.752322940326245, "grad_norm": 0.13580304384231567, "learning_rate": 0.01, "loss": 1.9358, "step": 94461 }, { "epoch": 9.752632665703077, "grad_norm": 0.0782763808965683, "learning_rate": 0.01, "loss": 1.9309, "step": 94464 }, { "epoch": 9.752942391079909, "grad_norm": 0.07290788739919662, "learning_rate": 0.01, "loss": 1.9311, "step": 94467 }, { "epoch": 9.753252116456741, "grad_norm": 0.085691437125206, "learning_rate": 0.01, "loss": 1.942, "step": 94470 }, { "epoch": 9.753561841833575, "grad_norm": 0.05388426035642624, "learning_rate": 0.01, "loss": 1.9403, "step": 94473 }, { "epoch": 9.753871567210407, "grad_norm": 0.1038847267627716, "learning_rate": 0.01, "loss": 1.9115, "step": 94476 }, { "epoch": 9.754181292587239, "grad_norm": 0.08317159861326218, "learning_rate": 0.01, "loss": 1.9177, "step": 94479 }, { "epoch": 9.754491017964071, "grad_norm": 0.05998384952545166, "learning_rate": 0.01, "loss": 1.9276, "step": 94482 }, { "epoch": 9.754800743340905, "grad_norm": 0.04784899204969406, "learning_rate": 0.01, "loss": 1.9318, "step": 94485 }, { "epoch": 9.755110468717737, "grad_norm": 0.05339213088154793, "learning_rate": 0.01, "loss": 1.9257, "step": 94488 }, { "epoch": 9.755420194094569, "grad_norm": 0.030975909903645515, "learning_rate": 0.01, "loss": 1.9257, "step": 94491 }, { "epoch": 9.755729919471403, "grad_norm": 0.069178506731987, "learning_rate": 0.01, "loss": 1.9448, "step": 94494 }, { "epoch": 9.756039644848235, "grad_norm": 0.07615378499031067, "learning_rate": 0.01, "loss": 1.9233, "step": 94497 }, { "epoch": 9.756349370225067, "grad_norm": 0.1228436827659607, "learning_rate": 0.01, "loss": 1.9437, "step": 94500 }, { "epoch": 9.7566590956019, "grad_norm": 0.046662211418151855, "learning_rate": 0.01, "loss": 1.9185, "step": 94503 }, { "epoch": 9.756968820978733, "grad_norm": 0.03317369148135185, "learning_rate": 0.01, "loss": 1.9339, "step": 94506 }, { "epoch": 9.757278546355565, "grad_norm": 0.048495639115571976, "learning_rate": 0.01, "loss": 1.9284, "step": 94509 }, { "epoch": 9.757588271732397, "grad_norm": 0.04089610278606415, "learning_rate": 0.01, "loss": 1.9178, "step": 94512 }, { "epoch": 9.75789799710923, "grad_norm": 0.0714641660451889, "learning_rate": 0.01, "loss": 1.9363, "step": 94515 }, { "epoch": 9.758207722486063, "grad_norm": 0.1012755036354065, "learning_rate": 0.01, "loss": 1.9407, "step": 94518 }, { "epoch": 9.758517447862895, "grad_norm": 0.09065414220094681, "learning_rate": 0.01, "loss": 1.9739, "step": 94521 }, { "epoch": 9.758827173239727, "grad_norm": 0.061119962483644485, "learning_rate": 0.01, "loss": 1.9474, "step": 94524 }, { "epoch": 9.75913689861656, "grad_norm": 0.11485234647989273, "learning_rate": 0.01, "loss": 1.9603, "step": 94527 }, { "epoch": 9.759446623993393, "grad_norm": 0.050607532262802124, "learning_rate": 0.01, "loss": 1.9246, "step": 94530 }, { "epoch": 9.759756349370225, "grad_norm": 0.07120644301176071, "learning_rate": 0.01, "loss": 1.9333, "step": 94533 }, { "epoch": 9.760066074747058, "grad_norm": 0.04502887651324272, "learning_rate": 0.01, "loss": 1.9279, "step": 94536 }, { "epoch": 9.76037580012389, "grad_norm": 0.07653278112411499, "learning_rate": 0.01, "loss": 1.9291, "step": 94539 }, { "epoch": 9.760685525500723, "grad_norm": 0.1033986359834671, "learning_rate": 0.01, "loss": 1.9357, "step": 94542 }, { "epoch": 9.760995250877555, "grad_norm": 0.08466217666864395, "learning_rate": 0.01, "loss": 1.9204, "step": 94545 }, { "epoch": 9.761304976254388, "grad_norm": 0.037229862064123154, "learning_rate": 0.01, "loss": 1.94, "step": 94548 }, { "epoch": 9.76161470163122, "grad_norm": 0.11992764472961426, "learning_rate": 0.01, "loss": 1.9378, "step": 94551 }, { "epoch": 9.761924427008053, "grad_norm": 0.052539899945259094, "learning_rate": 0.01, "loss": 1.9442, "step": 94554 }, { "epoch": 9.762234152384885, "grad_norm": 0.08188603818416595, "learning_rate": 0.01, "loss": 1.9159, "step": 94557 }, { "epoch": 9.762543877761718, "grad_norm": 0.09905125945806503, "learning_rate": 0.01, "loss": 1.9456, "step": 94560 }, { "epoch": 9.76285360313855, "grad_norm": 0.1362028270959854, "learning_rate": 0.01, "loss": 1.9493, "step": 94563 }, { "epoch": 9.763163328515382, "grad_norm": 0.07078815251588821, "learning_rate": 0.01, "loss": 1.9488, "step": 94566 }, { "epoch": 9.763473053892216, "grad_norm": 0.04589496925473213, "learning_rate": 0.01, "loss": 1.952, "step": 94569 }, { "epoch": 9.763782779269048, "grad_norm": 0.03925814852118492, "learning_rate": 0.01, "loss": 1.9362, "step": 94572 }, { "epoch": 9.76409250464588, "grad_norm": 0.08715146780014038, "learning_rate": 0.01, "loss": 1.936, "step": 94575 }, { "epoch": 9.764402230022712, "grad_norm": 0.05460555851459503, "learning_rate": 0.01, "loss": 1.9098, "step": 94578 }, { "epoch": 9.764711955399546, "grad_norm": 0.08715993911027908, "learning_rate": 0.01, "loss": 1.9385, "step": 94581 }, { "epoch": 9.765021680776378, "grad_norm": 0.07886404544115067, "learning_rate": 0.01, "loss": 1.9275, "step": 94584 }, { "epoch": 9.76533140615321, "grad_norm": 0.06578825414180756, "learning_rate": 0.01, "loss": 1.9441, "step": 94587 }, { "epoch": 9.765641131530042, "grad_norm": 0.06801187247037888, "learning_rate": 0.01, "loss": 1.9061, "step": 94590 }, { "epoch": 9.765950856906876, "grad_norm": 0.1005634069442749, "learning_rate": 0.01, "loss": 1.9672, "step": 94593 }, { "epoch": 9.766260582283708, "grad_norm": 0.04338005930185318, "learning_rate": 0.01, "loss": 1.9253, "step": 94596 }, { "epoch": 9.76657030766054, "grad_norm": 0.045400865375995636, "learning_rate": 0.01, "loss": 1.9268, "step": 94599 }, { "epoch": 9.766880033037374, "grad_norm": 0.19539475440979004, "learning_rate": 0.01, "loss": 1.9327, "step": 94602 }, { "epoch": 9.767189758414206, "grad_norm": 0.04993902146816254, "learning_rate": 0.01, "loss": 1.9427, "step": 94605 }, { "epoch": 9.767499483791038, "grad_norm": 0.036195334047079086, "learning_rate": 0.01, "loss": 1.9194, "step": 94608 }, { "epoch": 9.767809209167872, "grad_norm": 0.052537743002176285, "learning_rate": 0.01, "loss": 1.9354, "step": 94611 }, { "epoch": 9.768118934544704, "grad_norm": 0.03671710193157196, "learning_rate": 0.01, "loss": 1.9619, "step": 94614 }, { "epoch": 9.768428659921536, "grad_norm": 0.06318136304616928, "learning_rate": 0.01, "loss": 1.9268, "step": 94617 }, { "epoch": 9.768738385298368, "grad_norm": 0.05337478220462799, "learning_rate": 0.01, "loss": 1.9347, "step": 94620 }, { "epoch": 9.769048110675202, "grad_norm": 0.05959227681159973, "learning_rate": 0.01, "loss": 1.924, "step": 94623 }, { "epoch": 9.769357836052034, "grad_norm": 0.06474972516298294, "learning_rate": 0.01, "loss": 1.958, "step": 94626 }, { "epoch": 9.769667561428866, "grad_norm": 0.13635705411434174, "learning_rate": 0.01, "loss": 1.9043, "step": 94629 }, { "epoch": 9.769977286805698, "grad_norm": 0.047785691916942596, "learning_rate": 0.01, "loss": 1.9257, "step": 94632 }, { "epoch": 9.770287012182532, "grad_norm": 0.040347639471292496, "learning_rate": 0.01, "loss": 1.9164, "step": 94635 }, { "epoch": 9.770596737559364, "grad_norm": 0.05130305141210556, "learning_rate": 0.01, "loss": 1.9569, "step": 94638 }, { "epoch": 9.770906462936196, "grad_norm": 0.1306859850883484, "learning_rate": 0.01, "loss": 1.9269, "step": 94641 }, { "epoch": 9.77121618831303, "grad_norm": 0.06589134782552719, "learning_rate": 0.01, "loss": 1.9116, "step": 94644 }, { "epoch": 9.771525913689862, "grad_norm": 0.06414980441331863, "learning_rate": 0.01, "loss": 1.9322, "step": 94647 }, { "epoch": 9.771835639066694, "grad_norm": 0.0561249703168869, "learning_rate": 0.01, "loss": 1.9648, "step": 94650 }, { "epoch": 9.772145364443526, "grad_norm": 0.03154081106185913, "learning_rate": 0.01, "loss": 1.9302, "step": 94653 }, { "epoch": 9.77245508982036, "grad_norm": 0.044503260403871536, "learning_rate": 0.01, "loss": 1.9344, "step": 94656 }, { "epoch": 9.772764815197192, "grad_norm": 0.03981982544064522, "learning_rate": 0.01, "loss": 1.9253, "step": 94659 }, { "epoch": 9.773074540574024, "grad_norm": 0.061454884707927704, "learning_rate": 0.01, "loss": 1.9541, "step": 94662 }, { "epoch": 9.773384265950856, "grad_norm": 0.1729876846075058, "learning_rate": 0.01, "loss": 1.974, "step": 94665 }, { "epoch": 9.77369399132769, "grad_norm": 0.06331760436296463, "learning_rate": 0.01, "loss": 1.9429, "step": 94668 }, { "epoch": 9.774003716704522, "grad_norm": 0.06349468976259232, "learning_rate": 0.01, "loss": 1.9436, "step": 94671 }, { "epoch": 9.774313442081354, "grad_norm": 0.08989334106445312, "learning_rate": 0.01, "loss": 1.944, "step": 94674 }, { "epoch": 9.774623167458188, "grad_norm": 0.06618794798851013, "learning_rate": 0.01, "loss": 1.9491, "step": 94677 }, { "epoch": 9.77493289283502, "grad_norm": 0.031386930495500565, "learning_rate": 0.01, "loss": 1.9467, "step": 94680 }, { "epoch": 9.775242618211852, "grad_norm": 0.036483556032180786, "learning_rate": 0.01, "loss": 1.9436, "step": 94683 }, { "epoch": 9.775552343588684, "grad_norm": 0.043264761567115784, "learning_rate": 0.01, "loss": 1.9602, "step": 94686 }, { "epoch": 9.775862068965518, "grad_norm": 0.03680359944701195, "learning_rate": 0.01, "loss": 1.9309, "step": 94689 }, { "epoch": 9.77617179434235, "grad_norm": 0.06547412276268005, "learning_rate": 0.01, "loss": 1.9345, "step": 94692 }, { "epoch": 9.776481519719182, "grad_norm": 0.16053377091884613, "learning_rate": 0.01, "loss": 1.928, "step": 94695 }, { "epoch": 9.776791245096014, "grad_norm": 0.08217158168554306, "learning_rate": 0.01, "loss": 1.9302, "step": 94698 }, { "epoch": 9.777100970472848, "grad_norm": 0.14973968267440796, "learning_rate": 0.01, "loss": 1.9291, "step": 94701 }, { "epoch": 9.77741069584968, "grad_norm": 0.07262899726629257, "learning_rate": 0.01, "loss": 1.95, "step": 94704 }, { "epoch": 9.777720421226512, "grad_norm": 0.05001377314329147, "learning_rate": 0.01, "loss": 1.9468, "step": 94707 }, { "epoch": 9.778030146603346, "grad_norm": 0.06505423784255981, "learning_rate": 0.01, "loss": 1.946, "step": 94710 }, { "epoch": 9.778339871980178, "grad_norm": 0.04276656731963158, "learning_rate": 0.01, "loss": 1.9269, "step": 94713 }, { "epoch": 9.77864959735701, "grad_norm": 0.03513118252158165, "learning_rate": 0.01, "loss": 1.9129, "step": 94716 }, { "epoch": 9.778959322733842, "grad_norm": 0.0770474448800087, "learning_rate": 0.01, "loss": 1.9257, "step": 94719 }, { "epoch": 9.779269048110676, "grad_norm": 0.050384245812892914, "learning_rate": 0.01, "loss": 1.9336, "step": 94722 }, { "epoch": 9.779578773487508, "grad_norm": 0.06375786662101746, "learning_rate": 0.01, "loss": 1.9245, "step": 94725 }, { "epoch": 9.77988849886434, "grad_norm": 0.16862791776657104, "learning_rate": 0.01, "loss": 1.9146, "step": 94728 }, { "epoch": 9.780198224241174, "grad_norm": 0.07121732085943222, "learning_rate": 0.01, "loss": 1.9158, "step": 94731 }, { "epoch": 9.780507949618006, "grad_norm": 0.1161705031991005, "learning_rate": 0.01, "loss": 1.9512, "step": 94734 }, { "epoch": 9.780817674994838, "grad_norm": 0.08478477597236633, "learning_rate": 0.01, "loss": 1.9322, "step": 94737 }, { "epoch": 9.78112740037167, "grad_norm": 0.1556144654750824, "learning_rate": 0.01, "loss": 1.9303, "step": 94740 }, { "epoch": 9.781437125748504, "grad_norm": 0.09649685770273209, "learning_rate": 0.01, "loss": 1.9551, "step": 94743 }, { "epoch": 9.781746851125336, "grad_norm": 0.04481673240661621, "learning_rate": 0.01, "loss": 1.9421, "step": 94746 }, { "epoch": 9.782056576502168, "grad_norm": 0.0322875939309597, "learning_rate": 0.01, "loss": 1.9291, "step": 94749 }, { "epoch": 9.782366301879001, "grad_norm": 0.042956385761499405, "learning_rate": 0.01, "loss": 1.8979, "step": 94752 }, { "epoch": 9.782676027255834, "grad_norm": 0.06072737276554108, "learning_rate": 0.01, "loss": 1.9472, "step": 94755 }, { "epoch": 9.782985752632666, "grad_norm": 0.04351195693016052, "learning_rate": 0.01, "loss": 1.9403, "step": 94758 }, { "epoch": 9.783295478009498, "grad_norm": 0.044757772237062454, "learning_rate": 0.01, "loss": 1.9253, "step": 94761 }, { "epoch": 9.783605203386331, "grad_norm": 0.10578258335590363, "learning_rate": 0.01, "loss": 1.9344, "step": 94764 }, { "epoch": 9.783914928763163, "grad_norm": 0.1356111615896225, "learning_rate": 0.01, "loss": 1.906, "step": 94767 }, { "epoch": 9.784224654139996, "grad_norm": 0.07396072149276733, "learning_rate": 0.01, "loss": 1.9511, "step": 94770 }, { "epoch": 9.784534379516828, "grad_norm": 0.08166845142841339, "learning_rate": 0.01, "loss": 1.9632, "step": 94773 }, { "epoch": 9.784844104893661, "grad_norm": 0.07386428862810135, "learning_rate": 0.01, "loss": 1.9158, "step": 94776 }, { "epoch": 9.785153830270493, "grad_norm": 0.06322996318340302, "learning_rate": 0.01, "loss": 1.9479, "step": 94779 }, { "epoch": 9.785463555647326, "grad_norm": 0.042523063719272614, "learning_rate": 0.01, "loss": 1.9286, "step": 94782 }, { "epoch": 9.78577328102416, "grad_norm": 0.05621153116226196, "learning_rate": 0.01, "loss": 1.912, "step": 94785 }, { "epoch": 9.786083006400991, "grad_norm": 0.05215676873922348, "learning_rate": 0.01, "loss": 1.9344, "step": 94788 }, { "epoch": 9.786392731777823, "grad_norm": 0.05371996760368347, "learning_rate": 0.01, "loss": 1.9234, "step": 94791 }, { "epoch": 9.786702457154655, "grad_norm": 0.05981604382395744, "learning_rate": 0.01, "loss": 1.934, "step": 94794 }, { "epoch": 9.78701218253149, "grad_norm": 0.048599109053611755, "learning_rate": 0.01, "loss": 1.9329, "step": 94797 }, { "epoch": 9.787321907908321, "grad_norm": 0.045395396649837494, "learning_rate": 0.01, "loss": 1.9298, "step": 94800 }, { "epoch": 9.787631633285153, "grad_norm": 0.06925131380558014, "learning_rate": 0.01, "loss": 1.9368, "step": 94803 }, { "epoch": 9.787941358661985, "grad_norm": 0.1601201444864273, "learning_rate": 0.01, "loss": 1.9457, "step": 94806 }, { "epoch": 9.78825108403882, "grad_norm": 0.41700854897499084, "learning_rate": 0.01, "loss": 2.0459, "step": 94809 }, { "epoch": 9.788560809415651, "grad_norm": 0.1901201605796814, "learning_rate": 0.01, "loss": 1.9683, "step": 94812 }, { "epoch": 9.788870534792483, "grad_norm": 0.09801574796438217, "learning_rate": 0.01, "loss": 1.9844, "step": 94815 }, { "epoch": 9.789180260169317, "grad_norm": 0.08561704307794571, "learning_rate": 0.01, "loss": 1.951, "step": 94818 }, { "epoch": 9.78948998554615, "grad_norm": 0.05033617466688156, "learning_rate": 0.01, "loss": 1.9493, "step": 94821 }, { "epoch": 9.789799710922981, "grad_norm": 0.05378749221563339, "learning_rate": 0.01, "loss": 1.955, "step": 94824 }, { "epoch": 9.790109436299813, "grad_norm": 0.05089649185538292, "learning_rate": 0.01, "loss": 1.9441, "step": 94827 }, { "epoch": 9.790419161676647, "grad_norm": 0.04963233694434166, "learning_rate": 0.01, "loss": 1.9557, "step": 94830 }, { "epoch": 9.79072888705348, "grad_norm": 0.06947825849056244, "learning_rate": 0.01, "loss": 1.9243, "step": 94833 }, { "epoch": 9.791038612430311, "grad_norm": 0.09509982913732529, "learning_rate": 0.01, "loss": 1.9237, "step": 94836 }, { "epoch": 9.791348337807145, "grad_norm": 0.04908059537410736, "learning_rate": 0.01, "loss": 1.9117, "step": 94839 }, { "epoch": 9.791658063183977, "grad_norm": 0.03284559026360512, "learning_rate": 0.01, "loss": 1.9412, "step": 94842 }, { "epoch": 9.79196778856081, "grad_norm": 0.04718421399593353, "learning_rate": 0.01, "loss": 1.9614, "step": 94845 }, { "epoch": 9.792277513937641, "grad_norm": 0.05559903010725975, "learning_rate": 0.01, "loss": 1.9419, "step": 94848 }, { "epoch": 9.792587239314475, "grad_norm": 0.0743434950709343, "learning_rate": 0.01, "loss": 1.9371, "step": 94851 }, { "epoch": 9.792896964691307, "grad_norm": 0.07633355259895325, "learning_rate": 0.01, "loss": 1.9446, "step": 94854 }, { "epoch": 9.79320669006814, "grad_norm": 0.10470660775899887, "learning_rate": 0.01, "loss": 1.9016, "step": 94857 }, { "epoch": 9.793516415444973, "grad_norm": 0.16582956910133362, "learning_rate": 0.01, "loss": 1.9171, "step": 94860 }, { "epoch": 9.793826140821805, "grad_norm": 0.04770703613758087, "learning_rate": 0.01, "loss": 1.9448, "step": 94863 }, { "epoch": 9.794135866198637, "grad_norm": 0.11444547772407532, "learning_rate": 0.01, "loss": 1.9074, "step": 94866 }, { "epoch": 9.79444559157547, "grad_norm": 0.04240221902728081, "learning_rate": 0.01, "loss": 1.9172, "step": 94869 }, { "epoch": 9.794755316952303, "grad_norm": 0.05061916634440422, "learning_rate": 0.01, "loss": 1.9362, "step": 94872 }, { "epoch": 9.795065042329135, "grad_norm": 0.04344547167420387, "learning_rate": 0.01, "loss": 1.9249, "step": 94875 }, { "epoch": 9.795374767705967, "grad_norm": 0.05513598024845123, "learning_rate": 0.01, "loss": 1.9064, "step": 94878 }, { "epoch": 9.795684493082799, "grad_norm": 0.08316171914339066, "learning_rate": 0.01, "loss": 1.9415, "step": 94881 }, { "epoch": 9.795994218459633, "grad_norm": 0.06647349894046783, "learning_rate": 0.01, "loss": 1.9561, "step": 94884 }, { "epoch": 9.796303943836465, "grad_norm": 0.05216540768742561, "learning_rate": 0.01, "loss": 1.9407, "step": 94887 }, { "epoch": 9.796613669213297, "grad_norm": 0.05408751964569092, "learning_rate": 0.01, "loss": 1.925, "step": 94890 }, { "epoch": 9.79692339459013, "grad_norm": 0.039487335830926895, "learning_rate": 0.01, "loss": 1.9244, "step": 94893 }, { "epoch": 9.797233119966963, "grad_norm": 0.054186344146728516, "learning_rate": 0.01, "loss": 1.9561, "step": 94896 }, { "epoch": 9.797542845343795, "grad_norm": 0.03691032901406288, "learning_rate": 0.01, "loss": 1.9377, "step": 94899 }, { "epoch": 9.797852570720627, "grad_norm": 0.12448958307504654, "learning_rate": 0.01, "loss": 1.9059, "step": 94902 }, { "epoch": 9.79816229609746, "grad_norm": 0.1222798153758049, "learning_rate": 0.01, "loss": 1.9257, "step": 94905 }, { "epoch": 9.798472021474293, "grad_norm": 0.04015497863292694, "learning_rate": 0.01, "loss": 1.9308, "step": 94908 }, { "epoch": 9.798781746851125, "grad_norm": 0.04652899503707886, "learning_rate": 0.01, "loss": 1.9494, "step": 94911 }, { "epoch": 9.799091472227957, "grad_norm": 0.07459242641925812, "learning_rate": 0.01, "loss": 1.9099, "step": 94914 }, { "epoch": 9.79940119760479, "grad_norm": 0.05896695703268051, "learning_rate": 0.01, "loss": 1.9283, "step": 94917 }, { "epoch": 9.799710922981623, "grad_norm": 0.12853103876113892, "learning_rate": 0.01, "loss": 1.9474, "step": 94920 }, { "epoch": 9.800020648358455, "grad_norm": 0.04624097794294357, "learning_rate": 0.01, "loss": 1.9381, "step": 94923 }, { "epoch": 9.800330373735289, "grad_norm": 0.09572327882051468, "learning_rate": 0.01, "loss": 1.9139, "step": 94926 }, { "epoch": 9.80064009911212, "grad_norm": 0.03788885474205017, "learning_rate": 0.01, "loss": 1.9293, "step": 94929 }, { "epoch": 9.800949824488953, "grad_norm": 0.06076391786336899, "learning_rate": 0.01, "loss": 1.9415, "step": 94932 }, { "epoch": 9.801259549865785, "grad_norm": 0.0562455952167511, "learning_rate": 0.01, "loss": 1.9321, "step": 94935 }, { "epoch": 9.801569275242619, "grad_norm": 0.04046221449971199, "learning_rate": 0.01, "loss": 1.9443, "step": 94938 }, { "epoch": 9.80187900061945, "grad_norm": 0.05040094256401062, "learning_rate": 0.01, "loss": 1.9249, "step": 94941 }, { "epoch": 9.802188725996283, "grad_norm": 0.042106032371520996, "learning_rate": 0.01, "loss": 1.9324, "step": 94944 }, { "epoch": 9.802498451373117, "grad_norm": 0.04450448974967003, "learning_rate": 0.01, "loss": 1.9412, "step": 94947 }, { "epoch": 9.802808176749949, "grad_norm": 0.04702811688184738, "learning_rate": 0.01, "loss": 1.9374, "step": 94950 }, { "epoch": 9.80311790212678, "grad_norm": 0.05073699355125427, "learning_rate": 0.01, "loss": 1.9057, "step": 94953 }, { "epoch": 9.803427627503613, "grad_norm": 0.0670652911067009, "learning_rate": 0.01, "loss": 1.9574, "step": 94956 }, { "epoch": 9.803737352880447, "grad_norm": 0.10583470016717911, "learning_rate": 0.01, "loss": 1.9259, "step": 94959 }, { "epoch": 9.804047078257279, "grad_norm": 0.11010700464248657, "learning_rate": 0.01, "loss": 1.9356, "step": 94962 }, { "epoch": 9.80435680363411, "grad_norm": 0.04617714136838913, "learning_rate": 0.01, "loss": 1.9216, "step": 94965 }, { "epoch": 9.804666529010944, "grad_norm": 0.19462630152702332, "learning_rate": 0.01, "loss": 1.9172, "step": 94968 }, { "epoch": 9.804976254387777, "grad_norm": 0.04227757081389427, "learning_rate": 0.01, "loss": 1.9372, "step": 94971 }, { "epoch": 9.805285979764609, "grad_norm": 0.04136215150356293, "learning_rate": 0.01, "loss": 1.935, "step": 94974 }, { "epoch": 9.80559570514144, "grad_norm": 0.032104820013046265, "learning_rate": 0.01, "loss": 1.9243, "step": 94977 }, { "epoch": 9.805905430518274, "grad_norm": 0.03970668092370033, "learning_rate": 0.01, "loss": 1.9298, "step": 94980 }, { "epoch": 9.806215155895107, "grad_norm": 0.046333905309438705, "learning_rate": 0.01, "loss": 1.9247, "step": 94983 }, { "epoch": 9.806524881271939, "grad_norm": 0.09408304840326309, "learning_rate": 0.01, "loss": 1.9318, "step": 94986 }, { "epoch": 9.80683460664877, "grad_norm": 0.061889585107564926, "learning_rate": 0.01, "loss": 1.9415, "step": 94989 }, { "epoch": 9.807144332025604, "grad_norm": 0.05078985542058945, "learning_rate": 0.01, "loss": 1.941, "step": 94992 }, { "epoch": 9.807454057402436, "grad_norm": 0.04673658683896065, "learning_rate": 0.01, "loss": 1.9351, "step": 94995 }, { "epoch": 9.807763782779269, "grad_norm": 0.04183147847652435, "learning_rate": 0.01, "loss": 1.9289, "step": 94998 }, { "epoch": 9.808073508156102, "grad_norm": 0.050529371947050095, "learning_rate": 0.01, "loss": 1.9452, "step": 95001 }, { "epoch": 9.808383233532934, "grad_norm": 0.05828772857785225, "learning_rate": 0.01, "loss": 1.951, "step": 95004 }, { "epoch": 9.808692958909766, "grad_norm": 0.11839016526937485, "learning_rate": 0.01, "loss": 1.9358, "step": 95007 }, { "epoch": 9.809002684286598, "grad_norm": 0.05272934213280678, "learning_rate": 0.01, "loss": 1.9626, "step": 95010 }, { "epoch": 9.809312409663432, "grad_norm": 0.07402176409959793, "learning_rate": 0.01, "loss": 1.9488, "step": 95013 }, { "epoch": 9.809622135040264, "grad_norm": 0.0874079018831253, "learning_rate": 0.01, "loss": 1.9386, "step": 95016 }, { "epoch": 9.809931860417096, "grad_norm": 0.050489626824855804, "learning_rate": 0.01, "loss": 1.9425, "step": 95019 }, { "epoch": 9.810241585793928, "grad_norm": 0.07709213346242905, "learning_rate": 0.01, "loss": 1.9288, "step": 95022 }, { "epoch": 9.810551311170762, "grad_norm": 0.07870500534772873, "learning_rate": 0.01, "loss": 1.9351, "step": 95025 }, { "epoch": 9.810861036547594, "grad_norm": 0.1378456950187683, "learning_rate": 0.01, "loss": 1.9605, "step": 95028 }, { "epoch": 9.811170761924426, "grad_norm": 0.10447420179843903, "learning_rate": 0.01, "loss": 1.9595, "step": 95031 }, { "epoch": 9.81148048730126, "grad_norm": 0.08695314824581146, "learning_rate": 0.01, "loss": 1.9191, "step": 95034 }, { "epoch": 9.811790212678092, "grad_norm": 0.03956886753439903, "learning_rate": 0.01, "loss": 1.9427, "step": 95037 }, { "epoch": 9.812099938054924, "grad_norm": 0.03908482939004898, "learning_rate": 0.01, "loss": 1.9605, "step": 95040 }, { "epoch": 9.812409663431756, "grad_norm": 0.04740505293011665, "learning_rate": 0.01, "loss": 1.9397, "step": 95043 }, { "epoch": 9.81271938880859, "grad_norm": 0.07152485102415085, "learning_rate": 0.01, "loss": 1.9461, "step": 95046 }, { "epoch": 9.813029114185422, "grad_norm": 0.09586811810731888, "learning_rate": 0.01, "loss": 1.9458, "step": 95049 }, { "epoch": 9.813338839562254, "grad_norm": 0.14370545744895935, "learning_rate": 0.01, "loss": 1.9263, "step": 95052 }, { "epoch": 9.813648564939088, "grad_norm": 0.10440492630004883, "learning_rate": 0.01, "loss": 1.9227, "step": 95055 }, { "epoch": 9.81395829031592, "grad_norm": 0.06501776725053787, "learning_rate": 0.01, "loss": 1.9388, "step": 95058 }, { "epoch": 9.814268015692752, "grad_norm": 0.06675376743078232, "learning_rate": 0.01, "loss": 1.9386, "step": 95061 }, { "epoch": 9.814577741069584, "grad_norm": 0.05271616205573082, "learning_rate": 0.01, "loss": 1.9465, "step": 95064 }, { "epoch": 9.814887466446418, "grad_norm": 0.05236978828907013, "learning_rate": 0.01, "loss": 1.9294, "step": 95067 }, { "epoch": 9.81519719182325, "grad_norm": 0.03621961921453476, "learning_rate": 0.01, "loss": 1.9346, "step": 95070 }, { "epoch": 9.815506917200082, "grad_norm": 0.06704110652208328, "learning_rate": 0.01, "loss": 1.9458, "step": 95073 }, { "epoch": 9.815816642576916, "grad_norm": 0.09148068726062775, "learning_rate": 0.01, "loss": 1.9289, "step": 95076 }, { "epoch": 9.816126367953748, "grad_norm": 0.0595364049077034, "learning_rate": 0.01, "loss": 1.9681, "step": 95079 }, { "epoch": 9.81643609333058, "grad_norm": 0.14149333536624908, "learning_rate": 0.01, "loss": 1.9525, "step": 95082 }, { "epoch": 9.816745818707412, "grad_norm": 0.04914262518286705, "learning_rate": 0.01, "loss": 1.9284, "step": 95085 }, { "epoch": 9.817055544084246, "grad_norm": 0.04873903468251228, "learning_rate": 0.01, "loss": 1.9344, "step": 95088 }, { "epoch": 9.817365269461078, "grad_norm": 0.08692160248756409, "learning_rate": 0.01, "loss": 1.9291, "step": 95091 }, { "epoch": 9.81767499483791, "grad_norm": 0.057351529598236084, "learning_rate": 0.01, "loss": 1.9342, "step": 95094 }, { "epoch": 9.817984720214742, "grad_norm": 0.0691007673740387, "learning_rate": 0.01, "loss": 1.9619, "step": 95097 }, { "epoch": 9.818294445591576, "grad_norm": 0.08830489963293076, "learning_rate": 0.01, "loss": 1.955, "step": 95100 }, { "epoch": 9.818604170968408, "grad_norm": 0.03572781756520271, "learning_rate": 0.01, "loss": 1.9496, "step": 95103 }, { "epoch": 9.81891389634524, "grad_norm": 0.09434852749109268, "learning_rate": 0.01, "loss": 1.9275, "step": 95106 }, { "epoch": 9.819223621722074, "grad_norm": 0.09754703938961029, "learning_rate": 0.01, "loss": 1.9004, "step": 95109 }, { "epoch": 9.819533347098906, "grad_norm": 0.0586463026702404, "learning_rate": 0.01, "loss": 1.9515, "step": 95112 }, { "epoch": 9.819843072475738, "grad_norm": 0.046134430915117264, "learning_rate": 0.01, "loss": 1.9454, "step": 95115 }, { "epoch": 9.82015279785257, "grad_norm": 0.05168531462550163, "learning_rate": 0.01, "loss": 1.9197, "step": 95118 }, { "epoch": 9.820462523229404, "grad_norm": 0.08128103613853455, "learning_rate": 0.01, "loss": 1.9584, "step": 95121 }, { "epoch": 9.820772248606236, "grad_norm": 0.06025461107492447, "learning_rate": 0.01, "loss": 1.9477, "step": 95124 }, { "epoch": 9.821081973983068, "grad_norm": 0.07480666786432266, "learning_rate": 0.01, "loss": 1.9293, "step": 95127 }, { "epoch": 9.8213916993599, "grad_norm": 0.04217499494552612, "learning_rate": 0.01, "loss": 1.9424, "step": 95130 }, { "epoch": 9.821701424736734, "grad_norm": 0.12061578780412674, "learning_rate": 0.01, "loss": 1.9413, "step": 95133 }, { "epoch": 9.822011150113566, "grad_norm": 0.04855797812342644, "learning_rate": 0.01, "loss": 1.9355, "step": 95136 }, { "epoch": 9.822320875490398, "grad_norm": 0.05785534158349037, "learning_rate": 0.01, "loss": 1.9291, "step": 95139 }, { "epoch": 9.822630600867232, "grad_norm": 0.09376546740531921, "learning_rate": 0.01, "loss": 1.9366, "step": 95142 }, { "epoch": 9.822940326244064, "grad_norm": 0.03916393965482712, "learning_rate": 0.01, "loss": 1.937, "step": 95145 }, { "epoch": 9.823250051620896, "grad_norm": 0.07211345434188843, "learning_rate": 0.01, "loss": 1.9181, "step": 95148 }, { "epoch": 9.823559776997728, "grad_norm": 0.05421319231390953, "learning_rate": 0.01, "loss": 1.9018, "step": 95151 }, { "epoch": 9.823869502374562, "grad_norm": 0.09411376714706421, "learning_rate": 0.01, "loss": 1.9278, "step": 95154 }, { "epoch": 9.824179227751394, "grad_norm": 0.09898379445075989, "learning_rate": 0.01, "loss": 1.9298, "step": 95157 }, { "epoch": 9.824488953128226, "grad_norm": 0.05836961045861244, "learning_rate": 0.01, "loss": 1.9357, "step": 95160 }, { "epoch": 9.82479867850506, "grad_norm": 0.03766075149178505, "learning_rate": 0.01, "loss": 1.9329, "step": 95163 }, { "epoch": 9.825108403881892, "grad_norm": 0.033823855221271515, "learning_rate": 0.01, "loss": 1.9138, "step": 95166 }, { "epoch": 9.825418129258724, "grad_norm": 0.04306140914559364, "learning_rate": 0.01, "loss": 1.9153, "step": 95169 }, { "epoch": 9.825727854635556, "grad_norm": 0.03758104890584946, "learning_rate": 0.01, "loss": 1.9573, "step": 95172 }, { "epoch": 9.82603758001239, "grad_norm": 0.05285748839378357, "learning_rate": 0.01, "loss": 1.9483, "step": 95175 }, { "epoch": 9.826347305389222, "grad_norm": 0.04774449020624161, "learning_rate": 0.01, "loss": 1.9241, "step": 95178 }, { "epoch": 9.826657030766054, "grad_norm": 0.04499258100986481, "learning_rate": 0.01, "loss": 1.9259, "step": 95181 }, { "epoch": 9.826966756142888, "grad_norm": 0.06346732378005981, "learning_rate": 0.01, "loss": 1.9355, "step": 95184 }, { "epoch": 9.82727648151972, "grad_norm": 0.15880587697029114, "learning_rate": 0.01, "loss": 1.9274, "step": 95187 }, { "epoch": 9.827586206896552, "grad_norm": 0.07601436227560043, "learning_rate": 0.01, "loss": 1.9565, "step": 95190 }, { "epoch": 9.827895932273384, "grad_norm": 0.04568092152476311, "learning_rate": 0.01, "loss": 1.9216, "step": 95193 }, { "epoch": 9.828205657650217, "grad_norm": 0.052929848432540894, "learning_rate": 0.01, "loss": 1.926, "step": 95196 }, { "epoch": 9.82851538302705, "grad_norm": 0.04511851444840431, "learning_rate": 0.01, "loss": 1.9511, "step": 95199 }, { "epoch": 9.828825108403882, "grad_norm": 0.09903811663389206, "learning_rate": 0.01, "loss": 1.9623, "step": 95202 }, { "epoch": 9.829134833780714, "grad_norm": 0.062109824270009995, "learning_rate": 0.01, "loss": 1.9312, "step": 95205 }, { "epoch": 9.829444559157547, "grad_norm": 0.08777434378862381, "learning_rate": 0.01, "loss": 1.9229, "step": 95208 }, { "epoch": 9.82975428453438, "grad_norm": 0.06251123547554016, "learning_rate": 0.01, "loss": 1.9365, "step": 95211 }, { "epoch": 9.830064009911212, "grad_norm": 0.09451593458652496, "learning_rate": 0.01, "loss": 1.9007, "step": 95214 }, { "epoch": 9.830373735288045, "grad_norm": 0.07134056836366653, "learning_rate": 0.01, "loss": 1.9496, "step": 95217 }, { "epoch": 9.830683460664877, "grad_norm": 0.0673132911324501, "learning_rate": 0.01, "loss": 1.9035, "step": 95220 }, { "epoch": 9.83099318604171, "grad_norm": 0.13773980736732483, "learning_rate": 0.01, "loss": 1.9176, "step": 95223 }, { "epoch": 9.831302911418542, "grad_norm": 0.0492391400039196, "learning_rate": 0.01, "loss": 1.9364, "step": 95226 }, { "epoch": 9.831612636795375, "grad_norm": 0.043367672711610794, "learning_rate": 0.01, "loss": 1.9381, "step": 95229 }, { "epoch": 9.831922362172207, "grad_norm": 0.03407084569334984, "learning_rate": 0.01, "loss": 1.9286, "step": 95232 }, { "epoch": 9.83223208754904, "grad_norm": 0.0420520156621933, "learning_rate": 0.01, "loss": 1.9376, "step": 95235 }, { "epoch": 9.832541812925871, "grad_norm": 0.06530791521072388, "learning_rate": 0.01, "loss": 1.9447, "step": 95238 }, { "epoch": 9.832851538302705, "grad_norm": 0.061357442289590836, "learning_rate": 0.01, "loss": 1.9157, "step": 95241 }, { "epoch": 9.833161263679537, "grad_norm": 0.04315570369362831, "learning_rate": 0.01, "loss": 1.9268, "step": 95244 }, { "epoch": 9.83347098905637, "grad_norm": 0.19494427740573883, "learning_rate": 0.01, "loss": 1.9403, "step": 95247 }, { "epoch": 9.833780714433203, "grad_norm": 0.0513029620051384, "learning_rate": 0.01, "loss": 1.9549, "step": 95250 }, { "epoch": 9.834090439810035, "grad_norm": 0.04042904078960419, "learning_rate": 0.01, "loss": 1.9501, "step": 95253 }, { "epoch": 9.834400165186867, "grad_norm": 0.03216146305203438, "learning_rate": 0.01, "loss": 1.9638, "step": 95256 }, { "epoch": 9.8347098905637, "grad_norm": 0.030513131991028786, "learning_rate": 0.01, "loss": 1.9264, "step": 95259 }, { "epoch": 9.835019615940533, "grad_norm": 0.05615859106183052, "learning_rate": 0.01, "loss": 1.9515, "step": 95262 }, { "epoch": 9.835329341317365, "grad_norm": 0.04247753694653511, "learning_rate": 0.01, "loss": 1.9669, "step": 95265 }, { "epoch": 9.835639066694197, "grad_norm": 0.03994342312216759, "learning_rate": 0.01, "loss": 1.9195, "step": 95268 }, { "epoch": 9.835948792071031, "grad_norm": 0.05301568657159805, "learning_rate": 0.01, "loss": 1.9359, "step": 95271 }, { "epoch": 9.836258517447863, "grad_norm": 0.1442309468984604, "learning_rate": 0.01, "loss": 1.942, "step": 95274 }, { "epoch": 9.836568242824695, "grad_norm": 0.12944433093070984, "learning_rate": 0.01, "loss": 1.9282, "step": 95277 }, { "epoch": 9.836877968201527, "grad_norm": 0.09283921867609024, "learning_rate": 0.01, "loss": 1.9402, "step": 95280 }, { "epoch": 9.837187693578361, "grad_norm": 0.0523645244538784, "learning_rate": 0.01, "loss": 1.9327, "step": 95283 }, { "epoch": 9.837497418955193, "grad_norm": 0.0349854975938797, "learning_rate": 0.01, "loss": 1.9441, "step": 95286 }, { "epoch": 9.837807144332025, "grad_norm": 0.03146263211965561, "learning_rate": 0.01, "loss": 1.9372, "step": 95289 }, { "epoch": 9.838116869708859, "grad_norm": 0.04970834031701088, "learning_rate": 0.01, "loss": 1.9534, "step": 95292 }, { "epoch": 9.838426595085691, "grad_norm": 0.061335138976573944, "learning_rate": 0.01, "loss": 1.9435, "step": 95295 }, { "epoch": 9.838736320462523, "grad_norm": 0.09407352656126022, "learning_rate": 0.01, "loss": 1.9455, "step": 95298 }, { "epoch": 9.839046045839355, "grad_norm": 0.10056418925523758, "learning_rate": 0.01, "loss": 1.9404, "step": 95301 }, { "epoch": 9.839355771216189, "grad_norm": 0.13487358391284943, "learning_rate": 0.01, "loss": 1.956, "step": 95304 }, { "epoch": 9.839665496593021, "grad_norm": 0.06113987788558006, "learning_rate": 0.01, "loss": 1.9075, "step": 95307 }, { "epoch": 9.839975221969853, "grad_norm": 0.050525832921266556, "learning_rate": 0.01, "loss": 1.9375, "step": 95310 }, { "epoch": 9.840284947346685, "grad_norm": 0.034293282777071, "learning_rate": 0.01, "loss": 1.9231, "step": 95313 }, { "epoch": 9.840594672723519, "grad_norm": 0.052330173552036285, "learning_rate": 0.01, "loss": 1.9706, "step": 95316 }, { "epoch": 9.840904398100351, "grad_norm": 0.06765829026699066, "learning_rate": 0.01, "loss": 1.9206, "step": 95319 }, { "epoch": 9.841214123477183, "grad_norm": 0.04363483935594559, "learning_rate": 0.01, "loss": 1.9322, "step": 95322 }, { "epoch": 9.841523848854017, "grad_norm": 0.10867563635110855, "learning_rate": 0.01, "loss": 1.9486, "step": 95325 }, { "epoch": 9.841833574230849, "grad_norm": 0.04092460125684738, "learning_rate": 0.01, "loss": 1.9289, "step": 95328 }, { "epoch": 9.842143299607681, "grad_norm": 0.07562436908483505, "learning_rate": 0.01, "loss": 1.9587, "step": 95331 }, { "epoch": 9.842453024984513, "grad_norm": 0.08705686777830124, "learning_rate": 0.01, "loss": 1.9558, "step": 95334 }, { "epoch": 9.842762750361347, "grad_norm": 0.0464167594909668, "learning_rate": 0.01, "loss": 1.9507, "step": 95337 }, { "epoch": 9.843072475738179, "grad_norm": 0.046409085392951965, "learning_rate": 0.01, "loss": 1.9583, "step": 95340 }, { "epoch": 9.843382201115011, "grad_norm": 0.04688123241066933, "learning_rate": 0.01, "loss": 1.915, "step": 95343 }, { "epoch": 9.843691926491843, "grad_norm": 0.04283418133854866, "learning_rate": 0.01, "loss": 1.9217, "step": 95346 }, { "epoch": 9.844001651868677, "grad_norm": 0.040036801248788834, "learning_rate": 0.01, "loss": 1.9574, "step": 95349 }, { "epoch": 9.844311377245509, "grad_norm": 0.060970354825258255, "learning_rate": 0.01, "loss": 1.9319, "step": 95352 }, { "epoch": 9.844621102622341, "grad_norm": 0.08856649696826935, "learning_rate": 0.01, "loss": 1.949, "step": 95355 }, { "epoch": 9.844930827999175, "grad_norm": 0.07604091614484787, "learning_rate": 0.01, "loss": 1.9404, "step": 95358 }, { "epoch": 9.845240553376007, "grad_norm": 0.045264385640621185, "learning_rate": 0.01, "loss": 1.9415, "step": 95361 }, { "epoch": 9.845550278752839, "grad_norm": 0.0438438318669796, "learning_rate": 0.01, "loss": 1.9179, "step": 95364 }, { "epoch": 9.84586000412967, "grad_norm": 0.07063893228769302, "learning_rate": 0.01, "loss": 1.9217, "step": 95367 }, { "epoch": 9.846169729506505, "grad_norm": 0.05054168403148651, "learning_rate": 0.01, "loss": 1.962, "step": 95370 }, { "epoch": 9.846479454883337, "grad_norm": 0.04927874729037285, "learning_rate": 0.01, "loss": 1.946, "step": 95373 }, { "epoch": 9.846789180260169, "grad_norm": 0.14182618260383606, "learning_rate": 0.01, "loss": 1.9462, "step": 95376 }, { "epoch": 9.847098905637003, "grad_norm": 0.10691936314105988, "learning_rate": 0.01, "loss": 1.9437, "step": 95379 }, { "epoch": 9.847408631013835, "grad_norm": 0.09157336503267288, "learning_rate": 0.01, "loss": 1.9188, "step": 95382 }, { "epoch": 9.847718356390667, "grad_norm": 0.055040616542100906, "learning_rate": 0.01, "loss": 1.9371, "step": 95385 }, { "epoch": 9.848028081767499, "grad_norm": 0.035021986812353134, "learning_rate": 0.01, "loss": 1.9393, "step": 95388 }, { "epoch": 9.848337807144333, "grad_norm": 0.08447353541851044, "learning_rate": 0.01, "loss": 1.941, "step": 95391 }, { "epoch": 9.848647532521165, "grad_norm": 0.06649502366781235, "learning_rate": 0.01, "loss": 1.9331, "step": 95394 }, { "epoch": 9.848957257897997, "grad_norm": 0.07391461730003357, "learning_rate": 0.01, "loss": 1.9435, "step": 95397 }, { "epoch": 9.84926698327483, "grad_norm": 0.0677117258310318, "learning_rate": 0.01, "loss": 1.9205, "step": 95400 }, { "epoch": 9.849576708651663, "grad_norm": 0.09915631264448166, "learning_rate": 0.01, "loss": 1.9179, "step": 95403 }, { "epoch": 9.849886434028495, "grad_norm": 0.12769655883312225, "learning_rate": 0.01, "loss": 1.9176, "step": 95406 }, { "epoch": 9.850196159405327, "grad_norm": 0.07113716751337051, "learning_rate": 0.01, "loss": 1.9513, "step": 95409 }, { "epoch": 9.85050588478216, "grad_norm": 0.05216817185282707, "learning_rate": 0.01, "loss": 1.9496, "step": 95412 }, { "epoch": 9.850815610158993, "grad_norm": 0.07261928915977478, "learning_rate": 0.01, "loss": 1.9396, "step": 95415 }, { "epoch": 9.851125335535825, "grad_norm": 0.035049788653850555, "learning_rate": 0.01, "loss": 1.9295, "step": 95418 }, { "epoch": 9.851435060912657, "grad_norm": 0.04018479213118553, "learning_rate": 0.01, "loss": 1.9326, "step": 95421 }, { "epoch": 9.85174478628949, "grad_norm": 0.08323214203119278, "learning_rate": 0.01, "loss": 1.9084, "step": 95424 }, { "epoch": 9.852054511666323, "grad_norm": 0.07037657499313354, "learning_rate": 0.01, "loss": 1.9536, "step": 95427 }, { "epoch": 9.852364237043155, "grad_norm": 0.07828336954116821, "learning_rate": 0.01, "loss": 1.9357, "step": 95430 }, { "epoch": 9.852673962419988, "grad_norm": 0.07287444919347763, "learning_rate": 0.01, "loss": 1.9464, "step": 95433 }, { "epoch": 9.85298368779682, "grad_norm": 0.08128449320793152, "learning_rate": 0.01, "loss": 1.9397, "step": 95436 }, { "epoch": 9.853293413173652, "grad_norm": 0.04227570444345474, "learning_rate": 0.01, "loss": 1.9218, "step": 95439 }, { "epoch": 9.853603138550485, "grad_norm": 0.11544642597436905, "learning_rate": 0.01, "loss": 1.935, "step": 95442 }, { "epoch": 9.853912863927318, "grad_norm": 0.09630688279867172, "learning_rate": 0.01, "loss": 1.9471, "step": 95445 }, { "epoch": 9.85422258930415, "grad_norm": 0.09273266792297363, "learning_rate": 0.01, "loss": 1.9362, "step": 95448 }, { "epoch": 9.854532314680982, "grad_norm": 0.049246251583099365, "learning_rate": 0.01, "loss": 1.9401, "step": 95451 }, { "epoch": 9.854842040057815, "grad_norm": 0.0742521584033966, "learning_rate": 0.01, "loss": 1.9183, "step": 95454 }, { "epoch": 9.855151765434648, "grad_norm": 0.04551522433757782, "learning_rate": 0.01, "loss": 1.9465, "step": 95457 }, { "epoch": 9.85546149081148, "grad_norm": 0.1283591091632843, "learning_rate": 0.01, "loss": 1.9307, "step": 95460 }, { "epoch": 9.855771216188312, "grad_norm": 0.04384034126996994, "learning_rate": 0.01, "loss": 1.9451, "step": 95463 }, { "epoch": 9.856080941565146, "grad_norm": 0.057369425892829895, "learning_rate": 0.01, "loss": 1.9279, "step": 95466 }, { "epoch": 9.856390666941978, "grad_norm": 0.10041297972202301, "learning_rate": 0.01, "loss": 1.9272, "step": 95469 }, { "epoch": 9.85670039231881, "grad_norm": 0.09233054518699646, "learning_rate": 0.01, "loss": 1.9436, "step": 95472 }, { "epoch": 9.857010117695642, "grad_norm": 0.03711867704987526, "learning_rate": 0.01, "loss": 1.934, "step": 95475 }, { "epoch": 9.857319843072476, "grad_norm": 0.04442448914051056, "learning_rate": 0.01, "loss": 1.9244, "step": 95478 }, { "epoch": 9.857629568449308, "grad_norm": 0.09285016357898712, "learning_rate": 0.01, "loss": 1.9267, "step": 95481 }, { "epoch": 9.85793929382614, "grad_norm": 0.0895034596323967, "learning_rate": 0.01, "loss": 1.9199, "step": 95484 }, { "epoch": 9.858249019202974, "grad_norm": 0.05122547596693039, "learning_rate": 0.01, "loss": 1.9299, "step": 95487 }, { "epoch": 9.858558744579806, "grad_norm": 0.053021177649497986, "learning_rate": 0.01, "loss": 1.9397, "step": 95490 }, { "epoch": 9.858868469956638, "grad_norm": 0.06684883683919907, "learning_rate": 0.01, "loss": 1.9335, "step": 95493 }, { "epoch": 9.85917819533347, "grad_norm": 0.06873955577611923, "learning_rate": 0.01, "loss": 1.9388, "step": 95496 }, { "epoch": 9.859487920710304, "grad_norm": 0.03738557919859886, "learning_rate": 0.01, "loss": 1.9318, "step": 95499 }, { "epoch": 9.859797646087136, "grad_norm": 0.046948738396167755, "learning_rate": 0.01, "loss": 1.9376, "step": 95502 }, { "epoch": 9.860107371463968, "grad_norm": 0.05666372552514076, "learning_rate": 0.01, "loss": 1.9434, "step": 95505 }, { "epoch": 9.860417096840802, "grad_norm": 0.12917053699493408, "learning_rate": 0.01, "loss": 1.948, "step": 95508 }, { "epoch": 9.860726822217634, "grad_norm": 0.0822906345129013, "learning_rate": 0.01, "loss": 1.9359, "step": 95511 }, { "epoch": 9.861036547594466, "grad_norm": 0.05405477061867714, "learning_rate": 0.01, "loss": 1.9249, "step": 95514 }, { "epoch": 9.861346272971298, "grad_norm": 0.041714996099472046, "learning_rate": 0.01, "loss": 1.9259, "step": 95517 }, { "epoch": 9.861655998348132, "grad_norm": 0.03900561481714249, "learning_rate": 0.01, "loss": 1.953, "step": 95520 }, { "epoch": 9.861965723724964, "grad_norm": 0.04210944101214409, "learning_rate": 0.01, "loss": 1.9147, "step": 95523 }, { "epoch": 9.862275449101796, "grad_norm": 0.0644078478217125, "learning_rate": 0.01, "loss": 1.953, "step": 95526 }, { "epoch": 9.862585174478628, "grad_norm": 0.03967900574207306, "learning_rate": 0.01, "loss": 1.9216, "step": 95529 }, { "epoch": 9.862894899855462, "grad_norm": 0.12260102480649948, "learning_rate": 0.01, "loss": 1.9073, "step": 95532 }, { "epoch": 9.863204625232294, "grad_norm": 0.06045837700366974, "learning_rate": 0.01, "loss": 1.9686, "step": 95535 }, { "epoch": 9.863514350609126, "grad_norm": 0.10581725835800171, "learning_rate": 0.01, "loss": 1.9444, "step": 95538 }, { "epoch": 9.86382407598596, "grad_norm": 0.04299296438694, "learning_rate": 0.01, "loss": 1.9566, "step": 95541 }, { "epoch": 9.864133801362792, "grad_norm": 0.0774567574262619, "learning_rate": 0.01, "loss": 1.9426, "step": 95544 }, { "epoch": 9.864443526739624, "grad_norm": 0.05911951884627342, "learning_rate": 0.01, "loss": 1.9485, "step": 95547 }, { "epoch": 9.864753252116456, "grad_norm": 0.07533189654350281, "learning_rate": 0.01, "loss": 1.947, "step": 95550 }, { "epoch": 9.86506297749329, "grad_norm": 0.035809364169836044, "learning_rate": 0.01, "loss": 1.9249, "step": 95553 }, { "epoch": 9.865372702870122, "grad_norm": 0.08691052347421646, "learning_rate": 0.01, "loss": 1.9223, "step": 95556 }, { "epoch": 9.865682428246954, "grad_norm": 0.11932963132858276, "learning_rate": 0.01, "loss": 1.9449, "step": 95559 }, { "epoch": 9.865992153623786, "grad_norm": 0.0466434583067894, "learning_rate": 0.01, "loss": 1.9427, "step": 95562 }, { "epoch": 9.86630187900062, "grad_norm": 0.045600853860378265, "learning_rate": 0.01, "loss": 1.9358, "step": 95565 }, { "epoch": 9.866611604377452, "grad_norm": 0.06262704730033875, "learning_rate": 0.01, "loss": 1.9453, "step": 95568 }, { "epoch": 9.866921329754284, "grad_norm": 0.05210849270224571, "learning_rate": 0.01, "loss": 1.9677, "step": 95571 }, { "epoch": 9.867231055131118, "grad_norm": 0.10397256910800934, "learning_rate": 0.01, "loss": 1.9236, "step": 95574 }, { "epoch": 9.86754078050795, "grad_norm": 0.05808919668197632, "learning_rate": 0.01, "loss": 1.9324, "step": 95577 }, { "epoch": 9.867850505884782, "grad_norm": 0.07182762771844864, "learning_rate": 0.01, "loss": 1.9376, "step": 95580 }, { "epoch": 9.868160231261614, "grad_norm": 0.07244507968425751, "learning_rate": 0.01, "loss": 1.9419, "step": 95583 }, { "epoch": 9.868469956638448, "grad_norm": 0.058532387018203735, "learning_rate": 0.01, "loss": 1.9665, "step": 95586 }, { "epoch": 9.86877968201528, "grad_norm": 0.0981719046831131, "learning_rate": 0.01, "loss": 1.9244, "step": 95589 }, { "epoch": 9.869089407392112, "grad_norm": 0.07616667449474335, "learning_rate": 0.01, "loss": 1.9322, "step": 95592 }, { "epoch": 9.869399132768946, "grad_norm": 0.0804872065782547, "learning_rate": 0.01, "loss": 1.9638, "step": 95595 }, { "epoch": 9.869708858145778, "grad_norm": 0.05157414451241493, "learning_rate": 0.01, "loss": 1.9505, "step": 95598 }, { "epoch": 9.87001858352261, "grad_norm": 0.033212315291166306, "learning_rate": 0.01, "loss": 1.919, "step": 95601 }, { "epoch": 9.870328308899442, "grad_norm": 0.04200216010212898, "learning_rate": 0.01, "loss": 1.9433, "step": 95604 }, { "epoch": 9.870638034276276, "grad_norm": 0.042693279683589935, "learning_rate": 0.01, "loss": 1.9097, "step": 95607 }, { "epoch": 9.870947759653108, "grad_norm": 0.16291595995426178, "learning_rate": 0.01, "loss": 1.9385, "step": 95610 }, { "epoch": 9.87125748502994, "grad_norm": 0.1320657581090927, "learning_rate": 0.01, "loss": 1.9266, "step": 95613 }, { "epoch": 9.871567210406774, "grad_norm": 0.05239594727754593, "learning_rate": 0.01, "loss": 1.9505, "step": 95616 }, { "epoch": 9.871876935783606, "grad_norm": 0.04437854140996933, "learning_rate": 0.01, "loss": 1.9194, "step": 95619 }, { "epoch": 9.872186661160438, "grad_norm": 0.04155666381120682, "learning_rate": 0.01, "loss": 1.9296, "step": 95622 }, { "epoch": 9.87249638653727, "grad_norm": 0.08610442280769348, "learning_rate": 0.01, "loss": 1.9317, "step": 95625 }, { "epoch": 9.872806111914104, "grad_norm": 0.12307862192392349, "learning_rate": 0.01, "loss": 1.9107, "step": 95628 }, { "epoch": 9.873115837290936, "grad_norm": 0.09790744632482529, "learning_rate": 0.01, "loss": 1.9551, "step": 95631 }, { "epoch": 9.873425562667768, "grad_norm": 0.061377834528684616, "learning_rate": 0.01, "loss": 1.9207, "step": 95634 }, { "epoch": 9.8737352880446, "grad_norm": 0.08830194175243378, "learning_rate": 0.01, "loss": 1.9286, "step": 95637 }, { "epoch": 9.874045013421433, "grad_norm": 0.06755909323692322, "learning_rate": 0.01, "loss": 1.9271, "step": 95640 }, { "epoch": 9.874354738798266, "grad_norm": 0.03959019482135773, "learning_rate": 0.01, "loss": 1.935, "step": 95643 }, { "epoch": 9.874664464175098, "grad_norm": 0.08053558319807053, "learning_rate": 0.01, "loss": 1.9697, "step": 95646 }, { "epoch": 9.874974189551931, "grad_norm": 0.09366048872470856, "learning_rate": 0.01, "loss": 1.9496, "step": 95649 }, { "epoch": 9.875283914928763, "grad_norm": 0.06561397761106491, "learning_rate": 0.01, "loss": 1.9452, "step": 95652 }, { "epoch": 9.875593640305595, "grad_norm": 0.08625023812055588, "learning_rate": 0.01, "loss": 1.9232, "step": 95655 }, { "epoch": 9.875903365682428, "grad_norm": 0.04743865877389908, "learning_rate": 0.01, "loss": 1.9275, "step": 95658 }, { "epoch": 9.876213091059261, "grad_norm": 0.12848925590515137, "learning_rate": 0.01, "loss": 1.9373, "step": 95661 }, { "epoch": 9.876522816436093, "grad_norm": 0.13109102845191956, "learning_rate": 0.01, "loss": 1.913, "step": 95664 }, { "epoch": 9.876832541812925, "grad_norm": 0.10117898881435394, "learning_rate": 0.01, "loss": 1.9229, "step": 95667 }, { "epoch": 9.877142267189758, "grad_norm": 0.041157037019729614, "learning_rate": 0.01, "loss": 1.9381, "step": 95670 }, { "epoch": 9.877451992566591, "grad_norm": 0.03664329648017883, "learning_rate": 0.01, "loss": 1.9654, "step": 95673 }, { "epoch": 9.877761717943423, "grad_norm": 0.030358728021383286, "learning_rate": 0.01, "loss": 1.9446, "step": 95676 }, { "epoch": 9.878071443320255, "grad_norm": 0.045593880116939545, "learning_rate": 0.01, "loss": 1.954, "step": 95679 }, { "epoch": 9.87838116869709, "grad_norm": 0.11838401108980179, "learning_rate": 0.01, "loss": 1.9302, "step": 95682 }, { "epoch": 9.878690894073921, "grad_norm": 0.10561030358076096, "learning_rate": 0.01, "loss": 1.9347, "step": 95685 }, { "epoch": 9.879000619450753, "grad_norm": 0.04690423980355263, "learning_rate": 0.01, "loss": 1.9332, "step": 95688 }, { "epoch": 9.879310344827585, "grad_norm": 0.03910710662603378, "learning_rate": 0.01, "loss": 1.9257, "step": 95691 }, { "epoch": 9.87962007020442, "grad_norm": 0.04057015851140022, "learning_rate": 0.01, "loss": 1.9503, "step": 95694 }, { "epoch": 9.879929795581251, "grad_norm": 0.11349897086620331, "learning_rate": 0.01, "loss": 1.9477, "step": 95697 }, { "epoch": 9.880239520958083, "grad_norm": 0.05625935643911362, "learning_rate": 0.01, "loss": 1.916, "step": 95700 }, { "epoch": 9.880549246334917, "grad_norm": 0.06775499880313873, "learning_rate": 0.01, "loss": 1.9382, "step": 95703 }, { "epoch": 9.88085897171175, "grad_norm": 0.053724609315395355, "learning_rate": 0.01, "loss": 1.9308, "step": 95706 }, { "epoch": 9.881168697088581, "grad_norm": 0.047246869653463364, "learning_rate": 0.01, "loss": 1.9334, "step": 95709 }, { "epoch": 9.881478422465413, "grad_norm": 0.03901404142379761, "learning_rate": 0.01, "loss": 1.9289, "step": 95712 }, { "epoch": 9.881788147842247, "grad_norm": 0.048984501510858536, "learning_rate": 0.01, "loss": 1.9497, "step": 95715 }, { "epoch": 9.88209787321908, "grad_norm": 0.06687568873167038, "learning_rate": 0.01, "loss": 1.929, "step": 95718 }, { "epoch": 9.882407598595911, "grad_norm": 0.06769860535860062, "learning_rate": 0.01, "loss": 1.9005, "step": 95721 }, { "epoch": 9.882717323972745, "grad_norm": 0.08247470110654831, "learning_rate": 0.01, "loss": 1.9232, "step": 95724 }, { "epoch": 9.883027049349577, "grad_norm": 0.05325501412153244, "learning_rate": 0.01, "loss": 1.9402, "step": 95727 }, { "epoch": 9.88333677472641, "grad_norm": 0.04076659306883812, "learning_rate": 0.01, "loss": 1.941, "step": 95730 }, { "epoch": 9.883646500103241, "grad_norm": 0.05497249215841293, "learning_rate": 0.01, "loss": 1.9513, "step": 95733 }, { "epoch": 9.883956225480075, "grad_norm": 0.06618578732013702, "learning_rate": 0.01, "loss": 1.949, "step": 95736 }, { "epoch": 9.884265950856907, "grad_norm": 0.08831392973661423, "learning_rate": 0.01, "loss": 1.9374, "step": 95739 }, { "epoch": 9.88457567623374, "grad_norm": 0.07051166146993637, "learning_rate": 0.01, "loss": 1.9314, "step": 95742 }, { "epoch": 9.884885401610571, "grad_norm": 0.09569734334945679, "learning_rate": 0.01, "loss": 1.9189, "step": 95745 }, { "epoch": 9.885195126987405, "grad_norm": 0.10191222280263901, "learning_rate": 0.01, "loss": 1.9063, "step": 95748 }, { "epoch": 9.885504852364237, "grad_norm": 0.054515983909368515, "learning_rate": 0.01, "loss": 1.9194, "step": 95751 }, { "epoch": 9.885814577741069, "grad_norm": 0.03909364342689514, "learning_rate": 0.01, "loss": 1.9258, "step": 95754 }, { "epoch": 9.886124303117903, "grad_norm": 0.0464809313416481, "learning_rate": 0.01, "loss": 1.944, "step": 95757 }, { "epoch": 9.886434028494735, "grad_norm": 0.03945815935730934, "learning_rate": 0.01, "loss": 1.972, "step": 95760 }, { "epoch": 9.886743753871567, "grad_norm": 0.037694625556468964, "learning_rate": 0.01, "loss": 1.9321, "step": 95763 }, { "epoch": 9.887053479248399, "grad_norm": 0.04368235170841217, "learning_rate": 0.01, "loss": 1.9025, "step": 95766 }, { "epoch": 9.887363204625233, "grad_norm": 0.09463667869567871, "learning_rate": 0.01, "loss": 1.9384, "step": 95769 }, { "epoch": 9.887672930002065, "grad_norm": 0.07126586139202118, "learning_rate": 0.01, "loss": 1.9275, "step": 95772 }, { "epoch": 9.887982655378897, "grad_norm": 0.0477958507835865, "learning_rate": 0.01, "loss": 1.9379, "step": 95775 }, { "epoch": 9.888292380755729, "grad_norm": 0.04310915991663933, "learning_rate": 0.01, "loss": 1.9356, "step": 95778 }, { "epoch": 9.888602106132563, "grad_norm": 0.05492854118347168, "learning_rate": 0.01, "loss": 1.9804, "step": 95781 }, { "epoch": 9.888911831509395, "grad_norm": 0.12847760319709778, "learning_rate": 0.01, "loss": 1.9207, "step": 95784 }, { "epoch": 9.889221556886227, "grad_norm": 0.04450586438179016, "learning_rate": 0.01, "loss": 1.9519, "step": 95787 }, { "epoch": 9.88953128226306, "grad_norm": 0.07517846673727036, "learning_rate": 0.01, "loss": 1.955, "step": 95790 }, { "epoch": 9.889841007639893, "grad_norm": 0.08548619598150253, "learning_rate": 0.01, "loss": 1.9242, "step": 95793 }, { "epoch": 9.890150733016725, "grad_norm": 0.037881866097450256, "learning_rate": 0.01, "loss": 1.9395, "step": 95796 }, { "epoch": 9.890460458393557, "grad_norm": 0.07290767133235931, "learning_rate": 0.01, "loss": 1.9328, "step": 95799 }, { "epoch": 9.89077018377039, "grad_norm": 0.10962492227554321, "learning_rate": 0.01, "loss": 1.9315, "step": 95802 }, { "epoch": 9.891079909147223, "grad_norm": 0.07267545163631439, "learning_rate": 0.01, "loss": 1.9343, "step": 95805 }, { "epoch": 9.891389634524055, "grad_norm": 0.04466653987765312, "learning_rate": 0.01, "loss": 1.9258, "step": 95808 }, { "epoch": 9.891699359900889, "grad_norm": 0.06062067300081253, "learning_rate": 0.01, "loss": 1.9344, "step": 95811 }, { "epoch": 9.89200908527772, "grad_norm": 0.05122390761971474, "learning_rate": 0.01, "loss": 1.938, "step": 95814 }, { "epoch": 9.892318810654553, "grad_norm": 0.03910289704799652, "learning_rate": 0.01, "loss": 1.9269, "step": 95817 }, { "epoch": 9.892628536031385, "grad_norm": 0.03563802316784859, "learning_rate": 0.01, "loss": 1.9274, "step": 95820 }, { "epoch": 9.892938261408219, "grad_norm": 0.10363304615020752, "learning_rate": 0.01, "loss": 1.9207, "step": 95823 }, { "epoch": 9.89324798678505, "grad_norm": 0.04152027145028114, "learning_rate": 0.01, "loss": 1.9367, "step": 95826 }, { "epoch": 9.893557712161883, "grad_norm": 0.037295687943696976, "learning_rate": 0.01, "loss": 1.9273, "step": 95829 }, { "epoch": 9.893867437538717, "grad_norm": 0.1127936989068985, "learning_rate": 0.01, "loss": 1.9252, "step": 95832 }, { "epoch": 9.894177162915549, "grad_norm": 0.06783781945705414, "learning_rate": 0.01, "loss": 1.9431, "step": 95835 }, { "epoch": 9.89448688829238, "grad_norm": 0.07427812367677689, "learning_rate": 0.01, "loss": 1.9236, "step": 95838 }, { "epoch": 9.894796613669213, "grad_norm": 0.07258497923612595, "learning_rate": 0.01, "loss": 1.9373, "step": 95841 }, { "epoch": 9.895106339046047, "grad_norm": 0.04777050018310547, "learning_rate": 0.01, "loss": 1.9248, "step": 95844 }, { "epoch": 9.895416064422879, "grad_norm": 0.05332339182496071, "learning_rate": 0.01, "loss": 1.9195, "step": 95847 }, { "epoch": 9.89572578979971, "grad_norm": 0.07612593472003937, "learning_rate": 0.01, "loss": 1.9224, "step": 95850 }, { "epoch": 9.896035515176543, "grad_norm": 0.0600535124540329, "learning_rate": 0.01, "loss": 1.9374, "step": 95853 }, { "epoch": 9.896345240553376, "grad_norm": 0.05714597553014755, "learning_rate": 0.01, "loss": 1.9259, "step": 95856 }, { "epoch": 9.896654965930209, "grad_norm": 0.051216330379247665, "learning_rate": 0.01, "loss": 1.9368, "step": 95859 }, { "epoch": 9.89696469130704, "grad_norm": 0.05697392299771309, "learning_rate": 0.01, "loss": 1.9816, "step": 95862 }, { "epoch": 9.897274416683874, "grad_norm": 0.060521986335515976, "learning_rate": 0.01, "loss": 1.9371, "step": 95865 }, { "epoch": 9.897584142060706, "grad_norm": 0.04552103206515312, "learning_rate": 0.01, "loss": 1.9131, "step": 95868 }, { "epoch": 9.897893867437539, "grad_norm": 0.1266215741634369, "learning_rate": 0.01, "loss": 1.932, "step": 95871 }, { "epoch": 9.89820359281437, "grad_norm": 0.08632038533687592, "learning_rate": 0.01, "loss": 1.9372, "step": 95874 }, { "epoch": 9.898513318191204, "grad_norm": 0.11756304651498795, "learning_rate": 0.01, "loss": 1.9586, "step": 95877 }, { "epoch": 9.898823043568036, "grad_norm": 0.08925943821668625, "learning_rate": 0.01, "loss": 1.9166, "step": 95880 }, { "epoch": 9.899132768944868, "grad_norm": 0.044926900416612625, "learning_rate": 0.01, "loss": 1.9619, "step": 95883 }, { "epoch": 9.8994424943217, "grad_norm": 0.05228542163968086, "learning_rate": 0.01, "loss": 1.9147, "step": 95886 }, { "epoch": 9.899752219698534, "grad_norm": 0.04257636144757271, "learning_rate": 0.01, "loss": 1.9453, "step": 95889 }, { "epoch": 9.900061945075366, "grad_norm": 0.04858774319291115, "learning_rate": 0.01, "loss": 1.9215, "step": 95892 }, { "epoch": 9.900371670452198, "grad_norm": 0.08439365029335022, "learning_rate": 0.01, "loss": 1.9369, "step": 95895 }, { "epoch": 9.900681395829032, "grad_norm": 0.061978358775377274, "learning_rate": 0.01, "loss": 1.9218, "step": 95898 }, { "epoch": 9.900991121205864, "grad_norm": 0.07678073644638062, "learning_rate": 0.01, "loss": 1.9158, "step": 95901 }, { "epoch": 9.901300846582696, "grad_norm": 0.08055569231510162, "learning_rate": 0.01, "loss": 1.927, "step": 95904 }, { "epoch": 9.901610571959528, "grad_norm": 0.09484147280454636, "learning_rate": 0.01, "loss": 1.9262, "step": 95907 }, { "epoch": 9.901920297336362, "grad_norm": 0.15379561483860016, "learning_rate": 0.01, "loss": 1.9505, "step": 95910 }, { "epoch": 9.902230022713194, "grad_norm": 0.11704928427934647, "learning_rate": 0.01, "loss": 1.9154, "step": 95913 }, { "epoch": 9.902539748090026, "grad_norm": 0.054909687489271164, "learning_rate": 0.01, "loss": 1.9423, "step": 95916 }, { "epoch": 9.90284947346686, "grad_norm": 0.056868162006139755, "learning_rate": 0.01, "loss": 1.9426, "step": 95919 }, { "epoch": 9.903159198843692, "grad_norm": 0.05950642749667168, "learning_rate": 0.01, "loss": 1.9097, "step": 95922 }, { "epoch": 9.903468924220524, "grad_norm": 0.06228267773985863, "learning_rate": 0.01, "loss": 1.9419, "step": 95925 }, { "epoch": 9.903778649597356, "grad_norm": 0.053820542991161346, "learning_rate": 0.01, "loss": 1.936, "step": 95928 }, { "epoch": 9.90408837497419, "grad_norm": 0.03804078698158264, "learning_rate": 0.01, "loss": 1.9159, "step": 95931 }, { "epoch": 9.904398100351022, "grad_norm": 0.04085548594594002, "learning_rate": 0.01, "loss": 1.9206, "step": 95934 }, { "epoch": 9.904707825727854, "grad_norm": 0.07046306878328323, "learning_rate": 0.01, "loss": 1.9292, "step": 95937 }, { "epoch": 9.905017551104688, "grad_norm": 0.07246625423431396, "learning_rate": 0.01, "loss": 1.9201, "step": 95940 }, { "epoch": 9.90532727648152, "grad_norm": 0.20051436126232147, "learning_rate": 0.01, "loss": 1.9326, "step": 95943 }, { "epoch": 9.905637001858352, "grad_norm": 0.06060566008090973, "learning_rate": 0.01, "loss": 1.9328, "step": 95946 }, { "epoch": 9.905946727235184, "grad_norm": 0.03977508842945099, "learning_rate": 0.01, "loss": 1.936, "step": 95949 }, { "epoch": 9.906256452612018, "grad_norm": 0.055726293474435806, "learning_rate": 0.01, "loss": 1.9122, "step": 95952 }, { "epoch": 9.90656617798885, "grad_norm": 0.040252525359392166, "learning_rate": 0.01, "loss": 1.9478, "step": 95955 }, { "epoch": 9.906875903365682, "grad_norm": 0.03784322366118431, "learning_rate": 0.01, "loss": 1.9261, "step": 95958 }, { "epoch": 9.907185628742514, "grad_norm": 0.055127233266830444, "learning_rate": 0.01, "loss": 1.944, "step": 95961 }, { "epoch": 9.907495354119348, "grad_norm": 0.10798919945955276, "learning_rate": 0.01, "loss": 1.9225, "step": 95964 }, { "epoch": 9.90780507949618, "grad_norm": 0.05120984837412834, "learning_rate": 0.01, "loss": 1.9485, "step": 95967 }, { "epoch": 9.908114804873012, "grad_norm": 0.08842770755290985, "learning_rate": 0.01, "loss": 1.9281, "step": 95970 }, { "epoch": 9.908424530249846, "grad_norm": 0.1395348310470581, "learning_rate": 0.01, "loss": 1.9263, "step": 95973 }, { "epoch": 9.908734255626678, "grad_norm": 0.15325050055980682, "learning_rate": 0.01, "loss": 1.9385, "step": 95976 }, { "epoch": 9.90904398100351, "grad_norm": 0.08843661844730377, "learning_rate": 0.01, "loss": 1.9649, "step": 95979 }, { "epoch": 9.909353706380342, "grad_norm": 0.06873669475317001, "learning_rate": 0.01, "loss": 1.9587, "step": 95982 }, { "epoch": 9.909663431757176, "grad_norm": 0.04710160940885544, "learning_rate": 0.01, "loss": 1.9452, "step": 95985 }, { "epoch": 9.909973157134008, "grad_norm": 0.083259716629982, "learning_rate": 0.01, "loss": 1.9379, "step": 95988 }, { "epoch": 9.91028288251084, "grad_norm": 0.048664819449186325, "learning_rate": 0.01, "loss": 1.9395, "step": 95991 }, { "epoch": 9.910592607887672, "grad_norm": 0.041180554777383804, "learning_rate": 0.01, "loss": 1.9524, "step": 95994 }, { "epoch": 9.910902333264506, "grad_norm": 0.06644929200410843, "learning_rate": 0.01, "loss": 1.9246, "step": 95997 }, { "epoch": 9.911212058641338, "grad_norm": 0.04565279185771942, "learning_rate": 0.01, "loss": 1.9097, "step": 96000 }, { "epoch": 9.91152178401817, "grad_norm": 0.09502280503511429, "learning_rate": 0.01, "loss": 1.9123, "step": 96003 }, { "epoch": 9.911831509395004, "grad_norm": 0.056951504200696945, "learning_rate": 0.01, "loss": 1.9564, "step": 96006 }, { "epoch": 9.912141234771836, "grad_norm": 0.04809977486729622, "learning_rate": 0.01, "loss": 1.9178, "step": 96009 }, { "epoch": 9.912450960148668, "grad_norm": 0.06078126281499863, "learning_rate": 0.01, "loss": 1.9348, "step": 96012 }, { "epoch": 9.9127606855255, "grad_norm": 0.054974962025880814, "learning_rate": 0.01, "loss": 1.9168, "step": 96015 }, { "epoch": 9.913070410902334, "grad_norm": 0.07565493881702423, "learning_rate": 0.01, "loss": 1.9439, "step": 96018 }, { "epoch": 9.913380136279166, "grad_norm": 0.1033167764544487, "learning_rate": 0.01, "loss": 1.9256, "step": 96021 }, { "epoch": 9.913689861655998, "grad_norm": 0.07701541483402252, "learning_rate": 0.01, "loss": 1.9128, "step": 96024 }, { "epoch": 9.913999587032832, "grad_norm": 0.1151469275355339, "learning_rate": 0.01, "loss": 1.9381, "step": 96027 }, { "epoch": 9.914309312409664, "grad_norm": 0.0574842132627964, "learning_rate": 0.01, "loss": 1.9233, "step": 96030 }, { "epoch": 9.914619037786496, "grad_norm": 0.09228459000587463, "learning_rate": 0.01, "loss": 1.9401, "step": 96033 }, { "epoch": 9.914928763163328, "grad_norm": 0.07984200119972229, "learning_rate": 0.01, "loss": 1.9307, "step": 96036 }, { "epoch": 9.915238488540162, "grad_norm": 0.07425150275230408, "learning_rate": 0.01, "loss": 1.9354, "step": 96039 }, { "epoch": 9.915548213916994, "grad_norm": 0.09937628358602524, "learning_rate": 0.01, "loss": 1.9217, "step": 96042 }, { "epoch": 9.915857939293826, "grad_norm": 0.11574780195951462, "learning_rate": 0.01, "loss": 1.9163, "step": 96045 }, { "epoch": 9.91616766467066, "grad_norm": 0.0494074672460556, "learning_rate": 0.01, "loss": 1.966, "step": 96048 }, { "epoch": 9.916477390047492, "grad_norm": 0.04185641556978226, "learning_rate": 0.01, "loss": 1.9606, "step": 96051 }, { "epoch": 9.916787115424324, "grad_norm": 0.03591124713420868, "learning_rate": 0.01, "loss": 1.9258, "step": 96054 }, { "epoch": 9.917096840801156, "grad_norm": 0.059095386415719986, "learning_rate": 0.01, "loss": 1.9451, "step": 96057 }, { "epoch": 9.91740656617799, "grad_norm": 0.12536944448947906, "learning_rate": 0.01, "loss": 1.9246, "step": 96060 }, { "epoch": 9.917716291554822, "grad_norm": 0.04808446019887924, "learning_rate": 0.01, "loss": 1.943, "step": 96063 }, { "epoch": 9.918026016931654, "grad_norm": 0.031612422317266464, "learning_rate": 0.01, "loss": 1.9538, "step": 96066 }, { "epoch": 9.918335742308486, "grad_norm": 0.035517603158950806, "learning_rate": 0.01, "loss": 1.9453, "step": 96069 }, { "epoch": 9.91864546768532, "grad_norm": 0.041796356439590454, "learning_rate": 0.01, "loss": 1.9573, "step": 96072 }, { "epoch": 9.918955193062152, "grad_norm": 0.07575595378875732, "learning_rate": 0.01, "loss": 1.9172, "step": 96075 }, { "epoch": 9.919264918438984, "grad_norm": 0.1338336169719696, "learning_rate": 0.01, "loss": 1.9315, "step": 96078 }, { "epoch": 9.919574643815817, "grad_norm": 0.0776827409863472, "learning_rate": 0.01, "loss": 1.9171, "step": 96081 }, { "epoch": 9.91988436919265, "grad_norm": 0.04113177955150604, "learning_rate": 0.01, "loss": 1.9226, "step": 96084 }, { "epoch": 9.920194094569482, "grad_norm": 0.04225945472717285, "learning_rate": 0.01, "loss": 1.9465, "step": 96087 }, { "epoch": 9.920503819946314, "grad_norm": 0.03149348869919777, "learning_rate": 0.01, "loss": 1.9218, "step": 96090 }, { "epoch": 9.920813545323147, "grad_norm": 0.03339305892586708, "learning_rate": 0.01, "loss": 1.9374, "step": 96093 }, { "epoch": 9.92112327069998, "grad_norm": 0.06829036772251129, "learning_rate": 0.01, "loss": 1.9255, "step": 96096 }, { "epoch": 9.921432996076812, "grad_norm": 0.0362083837389946, "learning_rate": 0.01, "loss": 1.9545, "step": 96099 }, { "epoch": 9.921742721453644, "grad_norm": 0.0873231515288353, "learning_rate": 0.01, "loss": 1.9187, "step": 96102 }, { "epoch": 9.922052446830477, "grad_norm": 0.1664249300956726, "learning_rate": 0.01, "loss": 1.9381, "step": 96105 }, { "epoch": 9.92236217220731, "grad_norm": 0.09485960006713867, "learning_rate": 0.01, "loss": 1.9346, "step": 96108 }, { "epoch": 9.922671897584141, "grad_norm": 0.09327783435583115, "learning_rate": 0.01, "loss": 1.9394, "step": 96111 }, { "epoch": 9.922981622960975, "grad_norm": 0.0972425639629364, "learning_rate": 0.01, "loss": 1.9232, "step": 96114 }, { "epoch": 9.923291348337807, "grad_norm": 0.05768739432096481, "learning_rate": 0.01, "loss": 1.9585, "step": 96117 }, { "epoch": 9.92360107371464, "grad_norm": 0.04108816757798195, "learning_rate": 0.01, "loss": 1.9405, "step": 96120 }, { "epoch": 9.923910799091471, "grad_norm": 0.056108977645635605, "learning_rate": 0.01, "loss": 1.9372, "step": 96123 }, { "epoch": 9.924220524468305, "grad_norm": 0.07638213783502579, "learning_rate": 0.01, "loss": 1.93, "step": 96126 }, { "epoch": 9.924530249845137, "grad_norm": 0.06884434074163437, "learning_rate": 0.01, "loss": 1.9759, "step": 96129 }, { "epoch": 9.92483997522197, "grad_norm": 0.05558433011174202, "learning_rate": 0.01, "loss": 1.9577, "step": 96132 }, { "epoch": 9.925149700598803, "grad_norm": 0.15693187713623047, "learning_rate": 0.01, "loss": 1.9323, "step": 96135 }, { "epoch": 9.925459425975635, "grad_norm": 0.043495237827301025, "learning_rate": 0.01, "loss": 1.9281, "step": 96138 }, { "epoch": 9.925769151352467, "grad_norm": 0.05494603142142296, "learning_rate": 0.01, "loss": 1.9668, "step": 96141 }, { "epoch": 9.9260788767293, "grad_norm": 0.05449117347598076, "learning_rate": 0.01, "loss": 1.9589, "step": 96144 }, { "epoch": 9.926388602106133, "grad_norm": 0.08051479607820511, "learning_rate": 0.01, "loss": 1.9528, "step": 96147 }, { "epoch": 9.926698327482965, "grad_norm": 0.034158267080783844, "learning_rate": 0.01, "loss": 1.9253, "step": 96150 }, { "epoch": 9.927008052859797, "grad_norm": 0.17496581375598907, "learning_rate": 0.01, "loss": 1.9426, "step": 96153 }, { "epoch": 9.927317778236631, "grad_norm": 0.1495855450630188, "learning_rate": 0.01, "loss": 1.9219, "step": 96156 }, { "epoch": 9.927627503613463, "grad_norm": 0.06771902740001678, "learning_rate": 0.01, "loss": 1.9596, "step": 96159 }, { "epoch": 9.927937228990295, "grad_norm": 0.12524180114269257, "learning_rate": 0.01, "loss": 1.9512, "step": 96162 }, { "epoch": 9.928246954367127, "grad_norm": 0.05685262009501457, "learning_rate": 0.01, "loss": 1.946, "step": 96165 }, { "epoch": 9.928556679743961, "grad_norm": 0.039354316890239716, "learning_rate": 0.01, "loss": 1.9348, "step": 96168 }, { "epoch": 9.928866405120793, "grad_norm": 0.06542901694774628, "learning_rate": 0.01, "loss": 1.9414, "step": 96171 }, { "epoch": 9.929176130497625, "grad_norm": 0.0561458095908165, "learning_rate": 0.01, "loss": 1.9181, "step": 96174 }, { "epoch": 9.929485855874457, "grad_norm": 0.06581629812717438, "learning_rate": 0.01, "loss": 1.9391, "step": 96177 }, { "epoch": 9.929795581251291, "grad_norm": 0.14144189655780792, "learning_rate": 0.01, "loss": 1.9158, "step": 96180 }, { "epoch": 9.930105306628123, "grad_norm": 0.1060752272605896, "learning_rate": 0.01, "loss": 1.9614, "step": 96183 }, { "epoch": 9.930415032004955, "grad_norm": 0.05076698586344719, "learning_rate": 0.01, "loss": 1.9236, "step": 96186 }, { "epoch": 9.930724757381789, "grad_norm": 0.0615849643945694, "learning_rate": 0.01, "loss": 1.9436, "step": 96189 }, { "epoch": 9.931034482758621, "grad_norm": 0.06269994378089905, "learning_rate": 0.01, "loss": 1.9149, "step": 96192 }, { "epoch": 9.931344208135453, "grad_norm": 0.10822846740484238, "learning_rate": 0.01, "loss": 1.9324, "step": 96195 }, { "epoch": 9.931653933512285, "grad_norm": 0.13326896727085114, "learning_rate": 0.01, "loss": 1.9314, "step": 96198 }, { "epoch": 9.931963658889119, "grad_norm": 0.13338686525821686, "learning_rate": 0.01, "loss": 1.9103, "step": 96201 }, { "epoch": 9.932273384265951, "grad_norm": 0.054622832685709, "learning_rate": 0.01, "loss": 1.9394, "step": 96204 }, { "epoch": 9.932583109642783, "grad_norm": 0.046194273978471756, "learning_rate": 0.01, "loss": 1.9469, "step": 96207 }, { "epoch": 9.932892835019615, "grad_norm": 0.049110498279333115, "learning_rate": 0.01, "loss": 1.9354, "step": 96210 }, { "epoch": 9.933202560396449, "grad_norm": 0.05172931030392647, "learning_rate": 0.01, "loss": 1.9365, "step": 96213 }, { "epoch": 9.933512285773281, "grad_norm": 0.044886182993650436, "learning_rate": 0.01, "loss": 1.9569, "step": 96216 }, { "epoch": 9.933822011150113, "grad_norm": 0.04979904741048813, "learning_rate": 0.01, "loss": 1.9227, "step": 96219 }, { "epoch": 9.934131736526947, "grad_norm": 0.12607920169830322, "learning_rate": 0.01, "loss": 1.938, "step": 96222 }, { "epoch": 9.934441461903779, "grad_norm": 0.05032966658473015, "learning_rate": 0.01, "loss": 1.9317, "step": 96225 }, { "epoch": 9.934751187280611, "grad_norm": 0.08132116496562958, "learning_rate": 0.01, "loss": 1.9498, "step": 96228 }, { "epoch": 9.935060912657443, "grad_norm": 0.07728183269500732, "learning_rate": 0.01, "loss": 1.9186, "step": 96231 }, { "epoch": 9.935370638034277, "grad_norm": 0.07342918962240219, "learning_rate": 0.01, "loss": 1.9439, "step": 96234 }, { "epoch": 9.935680363411109, "grad_norm": 0.056004371494054794, "learning_rate": 0.01, "loss": 1.9329, "step": 96237 }, { "epoch": 9.93599008878794, "grad_norm": 0.040742505341768265, "learning_rate": 0.01, "loss": 1.9481, "step": 96240 }, { "epoch": 9.936299814164775, "grad_norm": 0.1418849527835846, "learning_rate": 0.01, "loss": 1.9426, "step": 96243 }, { "epoch": 9.936609539541607, "grad_norm": 0.09691282361745834, "learning_rate": 0.01, "loss": 1.9171, "step": 96246 }, { "epoch": 9.936919264918439, "grad_norm": 0.10721616446971893, "learning_rate": 0.01, "loss": 1.9281, "step": 96249 }, { "epoch": 9.93722899029527, "grad_norm": 0.07174794375896454, "learning_rate": 0.01, "loss": 1.9415, "step": 96252 }, { "epoch": 9.937538715672105, "grad_norm": 0.04398215934634209, "learning_rate": 0.01, "loss": 1.9556, "step": 96255 }, { "epoch": 9.937848441048937, "grad_norm": 0.032949745655059814, "learning_rate": 0.01, "loss": 1.9164, "step": 96258 }, { "epoch": 9.938158166425769, "grad_norm": 0.036921992897987366, "learning_rate": 0.01, "loss": 1.9176, "step": 96261 }, { "epoch": 9.938467891802603, "grad_norm": 0.11214341968297958, "learning_rate": 0.01, "loss": 1.9231, "step": 96264 }, { "epoch": 9.938777617179435, "grad_norm": 0.10016003996133804, "learning_rate": 0.01, "loss": 1.934, "step": 96267 }, { "epoch": 9.939087342556267, "grad_norm": 0.05769341066479683, "learning_rate": 0.01, "loss": 1.9259, "step": 96270 }, { "epoch": 9.939397067933099, "grad_norm": 0.046199750155210495, "learning_rate": 0.01, "loss": 1.9296, "step": 96273 }, { "epoch": 9.939706793309933, "grad_norm": 0.03568249195814133, "learning_rate": 0.01, "loss": 1.9232, "step": 96276 }, { "epoch": 9.940016518686765, "grad_norm": 0.031168581917881966, "learning_rate": 0.01, "loss": 1.9379, "step": 96279 }, { "epoch": 9.940326244063597, "grad_norm": 0.038201022893190384, "learning_rate": 0.01, "loss": 1.9394, "step": 96282 }, { "epoch": 9.940635969440429, "grad_norm": 0.03458917513489723, "learning_rate": 0.01, "loss": 1.9176, "step": 96285 }, { "epoch": 9.940945694817263, "grad_norm": 0.10806361585855484, "learning_rate": 0.01, "loss": 1.9354, "step": 96288 }, { "epoch": 9.941255420194095, "grad_norm": 0.07543551176786423, "learning_rate": 0.01, "loss": 1.9223, "step": 96291 }, { "epoch": 9.941565145570927, "grad_norm": 0.0484190434217453, "learning_rate": 0.01, "loss": 1.9121, "step": 96294 }, { "epoch": 9.94187487094776, "grad_norm": 0.08257054537534714, "learning_rate": 0.01, "loss": 1.9441, "step": 96297 }, { "epoch": 9.942184596324593, "grad_norm": 0.08587981015443802, "learning_rate": 0.01, "loss": 1.9429, "step": 96300 }, { "epoch": 9.942494321701425, "grad_norm": 0.04479651898145676, "learning_rate": 0.01, "loss": 1.9322, "step": 96303 }, { "epoch": 9.942804047078257, "grad_norm": 0.07514610141515732, "learning_rate": 0.01, "loss": 1.9137, "step": 96306 }, { "epoch": 9.94311377245509, "grad_norm": 0.043522909283638, "learning_rate": 0.01, "loss": 1.9312, "step": 96309 }, { "epoch": 9.943423497831922, "grad_norm": 0.034205563366413116, "learning_rate": 0.01, "loss": 1.9306, "step": 96312 }, { "epoch": 9.943733223208755, "grad_norm": 0.04049249365925789, "learning_rate": 0.01, "loss": 1.9278, "step": 96315 }, { "epoch": 9.944042948585587, "grad_norm": 0.05075372755527496, "learning_rate": 0.01, "loss": 1.9038, "step": 96318 }, { "epoch": 9.94435267396242, "grad_norm": 0.07100080698728561, "learning_rate": 0.01, "loss": 1.9422, "step": 96321 }, { "epoch": 9.944662399339252, "grad_norm": 0.09260247647762299, "learning_rate": 0.01, "loss": 1.9255, "step": 96324 }, { "epoch": 9.944972124716084, "grad_norm": 0.06434981524944305, "learning_rate": 0.01, "loss": 1.9417, "step": 96327 }, { "epoch": 9.945281850092918, "grad_norm": 0.04493270814418793, "learning_rate": 0.01, "loss": 1.9487, "step": 96330 }, { "epoch": 9.94559157546975, "grad_norm": 0.0432509109377861, "learning_rate": 0.01, "loss": 1.9431, "step": 96333 }, { "epoch": 9.945901300846582, "grad_norm": 0.0948626771569252, "learning_rate": 0.01, "loss": 1.9079, "step": 96336 }, { "epoch": 9.946211026223414, "grad_norm": 0.10249466449022293, "learning_rate": 0.01, "loss": 1.954, "step": 96339 }, { "epoch": 9.946520751600248, "grad_norm": 0.130013570189476, "learning_rate": 0.01, "loss": 1.9286, "step": 96342 }, { "epoch": 9.94683047697708, "grad_norm": 0.048307985067367554, "learning_rate": 0.01, "loss": 1.971, "step": 96345 }, { "epoch": 9.947140202353912, "grad_norm": 0.08395209163427353, "learning_rate": 0.01, "loss": 1.9153, "step": 96348 }, { "epoch": 9.947449927730746, "grad_norm": 0.05564103275537491, "learning_rate": 0.01, "loss": 1.9419, "step": 96351 }, { "epoch": 9.947759653107578, "grad_norm": 0.0359511636197567, "learning_rate": 0.01, "loss": 1.9249, "step": 96354 }, { "epoch": 9.94806937848441, "grad_norm": 0.04344267398118973, "learning_rate": 0.01, "loss": 1.9295, "step": 96357 }, { "epoch": 9.948379103861242, "grad_norm": 0.048588648438453674, "learning_rate": 0.01, "loss": 1.9487, "step": 96360 }, { "epoch": 9.948688829238076, "grad_norm": 0.12489151209592819, "learning_rate": 0.01, "loss": 1.8952, "step": 96363 }, { "epoch": 9.948998554614908, "grad_norm": 0.07510516047477722, "learning_rate": 0.01, "loss": 1.9445, "step": 96366 }, { "epoch": 9.94930827999174, "grad_norm": 0.09619615226984024, "learning_rate": 0.01, "loss": 1.9378, "step": 96369 }, { "epoch": 9.949618005368574, "grad_norm": 0.10213521867990494, "learning_rate": 0.01, "loss": 1.9535, "step": 96372 }, { "epoch": 9.949927730745406, "grad_norm": 0.07023943960666656, "learning_rate": 0.01, "loss": 1.951, "step": 96375 }, { "epoch": 9.950237456122238, "grad_norm": 0.06509052217006683, "learning_rate": 0.01, "loss": 1.9042, "step": 96378 }, { "epoch": 9.95054718149907, "grad_norm": 0.0720127746462822, "learning_rate": 0.01, "loss": 1.936, "step": 96381 }, { "epoch": 9.950856906875904, "grad_norm": 0.0382302924990654, "learning_rate": 0.01, "loss": 1.9186, "step": 96384 }, { "epoch": 9.951166632252736, "grad_norm": 0.030933093279600143, "learning_rate": 0.01, "loss": 1.9092, "step": 96387 }, { "epoch": 9.951476357629568, "grad_norm": 0.05807695537805557, "learning_rate": 0.01, "loss": 1.9342, "step": 96390 }, { "epoch": 9.9517860830064, "grad_norm": 0.08505658805370331, "learning_rate": 0.01, "loss": 1.945, "step": 96393 }, { "epoch": 9.952095808383234, "grad_norm": 0.053451940417289734, "learning_rate": 0.01, "loss": 1.9447, "step": 96396 }, { "epoch": 9.952405533760066, "grad_norm": 0.09265648573637009, "learning_rate": 0.01, "loss": 1.9409, "step": 96399 }, { "epoch": 9.952715259136898, "grad_norm": 0.10184822976589203, "learning_rate": 0.01, "loss": 1.9598, "step": 96402 }, { "epoch": 9.953024984513732, "grad_norm": 0.10975353419780731, "learning_rate": 0.01, "loss": 1.9205, "step": 96405 }, { "epoch": 9.953334709890564, "grad_norm": 0.14230191707611084, "learning_rate": 0.01, "loss": 1.9382, "step": 96408 }, { "epoch": 9.953644435267396, "grad_norm": 0.10149675607681274, "learning_rate": 0.01, "loss": 1.937, "step": 96411 }, { "epoch": 9.953954160644228, "grad_norm": 0.04591508209705353, "learning_rate": 0.01, "loss": 1.9229, "step": 96414 }, { "epoch": 9.954263886021062, "grad_norm": 0.04944121092557907, "learning_rate": 0.01, "loss": 1.9275, "step": 96417 }, { "epoch": 9.954573611397894, "grad_norm": 0.046226534992456436, "learning_rate": 0.01, "loss": 1.9615, "step": 96420 }, { "epoch": 9.954883336774726, "grad_norm": 0.04897132143378258, "learning_rate": 0.01, "loss": 1.9393, "step": 96423 }, { "epoch": 9.955193062151558, "grad_norm": 0.049237675964832306, "learning_rate": 0.01, "loss": 1.9269, "step": 96426 }, { "epoch": 9.955502787528392, "grad_norm": 0.10047107934951782, "learning_rate": 0.01, "loss": 1.9423, "step": 96429 }, { "epoch": 9.955812512905224, "grad_norm": 0.05859467759728432, "learning_rate": 0.01, "loss": 1.9247, "step": 96432 }, { "epoch": 9.956122238282056, "grad_norm": 0.1209932193160057, "learning_rate": 0.01, "loss": 1.9091, "step": 96435 }, { "epoch": 9.95643196365889, "grad_norm": 0.06430116295814514, "learning_rate": 0.01, "loss": 1.9201, "step": 96438 }, { "epoch": 9.956741689035722, "grad_norm": 0.03878641873598099, "learning_rate": 0.01, "loss": 1.9516, "step": 96441 }, { "epoch": 9.957051414412554, "grad_norm": 0.03463767468929291, "learning_rate": 0.01, "loss": 1.9407, "step": 96444 }, { "epoch": 9.957361139789386, "grad_norm": 0.1033138632774353, "learning_rate": 0.01, "loss": 1.9347, "step": 96447 }, { "epoch": 9.95767086516622, "grad_norm": 0.07953305542469025, "learning_rate": 0.01, "loss": 1.9443, "step": 96450 }, { "epoch": 9.957980590543052, "grad_norm": 0.06403129547834396, "learning_rate": 0.01, "loss": 1.9549, "step": 96453 }, { "epoch": 9.958290315919884, "grad_norm": 0.06012757867574692, "learning_rate": 0.01, "loss": 1.9383, "step": 96456 }, { "epoch": 9.958600041296718, "grad_norm": 0.042264413088560104, "learning_rate": 0.01, "loss": 1.9377, "step": 96459 }, { "epoch": 9.95890976667355, "grad_norm": 0.08021681010723114, "learning_rate": 0.01, "loss": 1.9312, "step": 96462 }, { "epoch": 9.959219492050382, "grad_norm": 0.06785903871059418, "learning_rate": 0.01, "loss": 1.9117, "step": 96465 }, { "epoch": 9.959529217427214, "grad_norm": 0.07423307001590729, "learning_rate": 0.01, "loss": 1.9439, "step": 96468 }, { "epoch": 9.959838942804048, "grad_norm": 0.09907034784555435, "learning_rate": 0.01, "loss": 1.9479, "step": 96471 }, { "epoch": 9.96014866818088, "grad_norm": 0.04186716303229332, "learning_rate": 0.01, "loss": 1.9422, "step": 96474 }, { "epoch": 9.960458393557712, "grad_norm": 0.09196318686008453, "learning_rate": 0.01, "loss": 1.9394, "step": 96477 }, { "epoch": 9.960768118934546, "grad_norm": 0.139618918299675, "learning_rate": 0.01, "loss": 1.9237, "step": 96480 }, { "epoch": 9.961077844311378, "grad_norm": 0.06983165442943573, "learning_rate": 0.01, "loss": 1.9292, "step": 96483 }, { "epoch": 9.96138756968821, "grad_norm": 0.05909397080540657, "learning_rate": 0.01, "loss": 1.9309, "step": 96486 }, { "epoch": 9.961697295065042, "grad_norm": 0.05737200006842613, "learning_rate": 0.01, "loss": 1.9367, "step": 96489 }, { "epoch": 9.962007020441876, "grad_norm": 0.042182207107543945, "learning_rate": 0.01, "loss": 1.894, "step": 96492 }, { "epoch": 9.962316745818708, "grad_norm": 0.052041444927453995, "learning_rate": 0.01, "loss": 1.9402, "step": 96495 }, { "epoch": 9.96262647119554, "grad_norm": 0.05366899445652962, "learning_rate": 0.01, "loss": 1.9501, "step": 96498 }, { "epoch": 9.962936196572372, "grad_norm": 0.05489904433488846, "learning_rate": 0.01, "loss": 1.9464, "step": 96501 }, { "epoch": 9.963245921949206, "grad_norm": 0.0658499151468277, "learning_rate": 0.01, "loss": 1.9507, "step": 96504 }, { "epoch": 9.963555647326038, "grad_norm": 0.05275774002075195, "learning_rate": 0.01, "loss": 1.9153, "step": 96507 }, { "epoch": 9.96386537270287, "grad_norm": 0.037739306688308716, "learning_rate": 0.01, "loss": 1.9502, "step": 96510 }, { "epoch": 9.964175098079703, "grad_norm": 0.04795077443122864, "learning_rate": 0.01, "loss": 1.9333, "step": 96513 }, { "epoch": 9.964484823456536, "grad_norm": 0.059451814740896225, "learning_rate": 0.01, "loss": 1.9375, "step": 96516 }, { "epoch": 9.964794548833368, "grad_norm": 0.05571671202778816, "learning_rate": 0.01, "loss": 1.9113, "step": 96519 }, { "epoch": 9.9651042742102, "grad_norm": 0.1349763125181198, "learning_rate": 0.01, "loss": 1.9543, "step": 96522 }, { "epoch": 9.965413999587033, "grad_norm": 0.11729591339826584, "learning_rate": 0.01, "loss": 1.9377, "step": 96525 }, { "epoch": 9.965723724963865, "grad_norm": 0.0958571508526802, "learning_rate": 0.01, "loss": 1.9229, "step": 96528 }, { "epoch": 9.966033450340698, "grad_norm": 0.06040540337562561, "learning_rate": 0.01, "loss": 1.9343, "step": 96531 }, { "epoch": 9.96634317571753, "grad_norm": 0.04997546970844269, "learning_rate": 0.01, "loss": 1.9596, "step": 96534 }, { "epoch": 9.966652901094363, "grad_norm": 0.043729156255722046, "learning_rate": 0.01, "loss": 1.9429, "step": 96537 }, { "epoch": 9.966962626471195, "grad_norm": 0.11820992082357407, "learning_rate": 0.01, "loss": 1.9413, "step": 96540 }, { "epoch": 9.967272351848028, "grad_norm": 0.04722660034894943, "learning_rate": 0.01, "loss": 1.9408, "step": 96543 }, { "epoch": 9.967582077224861, "grad_norm": 0.0528867170214653, "learning_rate": 0.01, "loss": 1.9305, "step": 96546 }, { "epoch": 9.967891802601693, "grad_norm": 0.0687720999121666, "learning_rate": 0.01, "loss": 1.9358, "step": 96549 }, { "epoch": 9.968201527978525, "grad_norm": 0.06109067052602768, "learning_rate": 0.01, "loss": 1.8936, "step": 96552 }, { "epoch": 9.968511253355357, "grad_norm": 0.03622574731707573, "learning_rate": 0.01, "loss": 1.9376, "step": 96555 }, { "epoch": 9.968820978732191, "grad_norm": 0.05403563007712364, "learning_rate": 0.01, "loss": 1.9365, "step": 96558 }, { "epoch": 9.969130704109023, "grad_norm": 0.05532357841730118, "learning_rate": 0.01, "loss": 1.9337, "step": 96561 }, { "epoch": 9.969440429485855, "grad_norm": 0.04689712077379227, "learning_rate": 0.01, "loss": 1.9553, "step": 96564 }, { "epoch": 9.96975015486269, "grad_norm": 0.058481086045503616, "learning_rate": 0.01, "loss": 1.9168, "step": 96567 }, { "epoch": 9.970059880239521, "grad_norm": 0.12241359055042267, "learning_rate": 0.01, "loss": 1.9185, "step": 96570 }, { "epoch": 9.970369605616353, "grad_norm": 0.1142013892531395, "learning_rate": 0.01, "loss": 1.9481, "step": 96573 }, { "epoch": 9.970679330993185, "grad_norm": 0.052777521312236786, "learning_rate": 0.01, "loss": 1.9448, "step": 96576 }, { "epoch": 9.97098905637002, "grad_norm": 0.1075984537601471, "learning_rate": 0.01, "loss": 1.929, "step": 96579 }, { "epoch": 9.971298781746851, "grad_norm": 0.0782780796289444, "learning_rate": 0.01, "loss": 1.9089, "step": 96582 }, { "epoch": 9.971608507123683, "grad_norm": 0.05531663820147514, "learning_rate": 0.01, "loss": 1.9229, "step": 96585 }, { "epoch": 9.971918232500517, "grad_norm": 0.09160934388637543, "learning_rate": 0.01, "loss": 1.9426, "step": 96588 }, { "epoch": 9.97222795787735, "grad_norm": 0.09960715472698212, "learning_rate": 0.01, "loss": 1.938, "step": 96591 }, { "epoch": 9.972537683254181, "grad_norm": 0.05900340527296066, "learning_rate": 0.01, "loss": 1.9444, "step": 96594 }, { "epoch": 9.972847408631013, "grad_norm": 0.04487355425953865, "learning_rate": 0.01, "loss": 1.9426, "step": 96597 }, { "epoch": 9.973157134007847, "grad_norm": 0.036559127271175385, "learning_rate": 0.01, "loss": 1.9133, "step": 96600 }, { "epoch": 9.97346685938468, "grad_norm": 0.04605504497885704, "learning_rate": 0.01, "loss": 1.9418, "step": 96603 }, { "epoch": 9.973776584761511, "grad_norm": 0.04498891904950142, "learning_rate": 0.01, "loss": 1.9637, "step": 96606 }, { "epoch": 9.974086310138343, "grad_norm": 0.07214579731225967, "learning_rate": 0.01, "loss": 1.9141, "step": 96609 }, { "epoch": 9.974396035515177, "grad_norm": 0.0739932432770729, "learning_rate": 0.01, "loss": 1.9039, "step": 96612 }, { "epoch": 9.974705760892009, "grad_norm": 0.051533691585063934, "learning_rate": 0.01, "loss": 1.9335, "step": 96615 }, { "epoch": 9.975015486268841, "grad_norm": 0.048333242535591125, "learning_rate": 0.01, "loss": 1.9022, "step": 96618 }, { "epoch": 9.975325211645675, "grad_norm": 0.05780849605798721, "learning_rate": 0.01, "loss": 1.9087, "step": 96621 }, { "epoch": 9.975634937022507, "grad_norm": 0.050407566130161285, "learning_rate": 0.01, "loss": 1.9232, "step": 96624 }, { "epoch": 9.975944662399339, "grad_norm": 0.03372873738408089, "learning_rate": 0.01, "loss": 1.9445, "step": 96627 }, { "epoch": 9.976254387776171, "grad_norm": 0.06596677005290985, "learning_rate": 0.01, "loss": 1.9047, "step": 96630 }, { "epoch": 9.976564113153005, "grad_norm": 0.05827241763472557, "learning_rate": 0.01, "loss": 1.9252, "step": 96633 }, { "epoch": 9.976873838529837, "grad_norm": 0.06429224461317062, "learning_rate": 0.01, "loss": 1.953, "step": 96636 }, { "epoch": 9.977183563906669, "grad_norm": 0.05854794755578041, "learning_rate": 0.01, "loss": 1.9446, "step": 96639 }, { "epoch": 9.977493289283501, "grad_norm": 0.0726938396692276, "learning_rate": 0.01, "loss": 1.9366, "step": 96642 }, { "epoch": 9.977803014660335, "grad_norm": 0.12848325073719025, "learning_rate": 0.01, "loss": 1.9199, "step": 96645 }, { "epoch": 9.978112740037167, "grad_norm": 0.049772996455430984, "learning_rate": 0.01, "loss": 1.9212, "step": 96648 }, { "epoch": 9.978422465413999, "grad_norm": 0.0814037024974823, "learning_rate": 0.01, "loss": 1.9426, "step": 96651 }, { "epoch": 9.978732190790833, "grad_norm": 0.03594604507088661, "learning_rate": 0.01, "loss": 1.9353, "step": 96654 }, { "epoch": 9.979041916167665, "grad_norm": 0.04974773898720741, "learning_rate": 0.01, "loss": 1.9364, "step": 96657 }, { "epoch": 9.979351641544497, "grad_norm": 0.07818830758333206, "learning_rate": 0.01, "loss": 1.9475, "step": 96660 }, { "epoch": 9.979661366921329, "grad_norm": 0.070968858897686, "learning_rate": 0.01, "loss": 1.9322, "step": 96663 }, { "epoch": 9.979971092298163, "grad_norm": 0.043815940618515015, "learning_rate": 0.01, "loss": 1.925, "step": 96666 }, { "epoch": 9.980280817674995, "grad_norm": 0.10502909123897552, "learning_rate": 0.01, "loss": 1.9503, "step": 96669 }, { "epoch": 9.980590543051827, "grad_norm": 0.09334055334329605, "learning_rate": 0.01, "loss": 1.9354, "step": 96672 }, { "epoch": 9.98090026842866, "grad_norm": 0.07804276049137115, "learning_rate": 0.01, "loss": 1.9384, "step": 96675 }, { "epoch": 9.981209993805493, "grad_norm": 0.04819759726524353, "learning_rate": 0.01, "loss": 1.9171, "step": 96678 }, { "epoch": 9.981519719182325, "grad_norm": 0.09342513978481293, "learning_rate": 0.01, "loss": 1.9379, "step": 96681 }, { "epoch": 9.981829444559157, "grad_norm": 0.08521047979593277, "learning_rate": 0.01, "loss": 1.9499, "step": 96684 }, { "epoch": 9.98213916993599, "grad_norm": 0.14616277813911438, "learning_rate": 0.01, "loss": 1.9455, "step": 96687 }, { "epoch": 9.982448895312823, "grad_norm": 0.1458623856306076, "learning_rate": 0.01, "loss": 1.9131, "step": 96690 }, { "epoch": 9.982758620689655, "grad_norm": 0.09821382910013199, "learning_rate": 0.01, "loss": 1.9529, "step": 96693 }, { "epoch": 9.983068346066489, "grad_norm": 0.05744251608848572, "learning_rate": 0.01, "loss": 1.9331, "step": 96696 }, { "epoch": 9.98337807144332, "grad_norm": 0.04756811261177063, "learning_rate": 0.01, "loss": 1.9271, "step": 96699 }, { "epoch": 9.983687796820153, "grad_norm": 0.045846566557884216, "learning_rate": 0.01, "loss": 1.9291, "step": 96702 }, { "epoch": 9.983997522196985, "grad_norm": 0.036973029375076294, "learning_rate": 0.01, "loss": 1.9146, "step": 96705 }, { "epoch": 9.984307247573819, "grad_norm": 0.03365800529718399, "learning_rate": 0.01, "loss": 1.9281, "step": 96708 }, { "epoch": 9.98461697295065, "grad_norm": 0.07403205335140228, "learning_rate": 0.01, "loss": 1.9203, "step": 96711 }, { "epoch": 9.984926698327483, "grad_norm": 0.07294251024723053, "learning_rate": 0.01, "loss": 1.925, "step": 96714 }, { "epoch": 9.985236423704315, "grad_norm": 0.055860526859760284, "learning_rate": 0.01, "loss": 1.9081, "step": 96717 }, { "epoch": 9.985546149081149, "grad_norm": 0.122382752597332, "learning_rate": 0.01, "loss": 1.9366, "step": 96720 }, { "epoch": 9.98585587445798, "grad_norm": 0.15544384717941284, "learning_rate": 0.01, "loss": 1.9308, "step": 96723 }, { "epoch": 9.986165599834813, "grad_norm": 0.11838815361261368, "learning_rate": 0.01, "loss": 1.9325, "step": 96726 }, { "epoch": 9.986475325211646, "grad_norm": 0.048733849078416824, "learning_rate": 0.01, "loss": 1.9375, "step": 96729 }, { "epoch": 9.986785050588479, "grad_norm": 0.05413370579481125, "learning_rate": 0.01, "loss": 1.9503, "step": 96732 }, { "epoch": 9.98709477596531, "grad_norm": 0.04110780730843544, "learning_rate": 0.01, "loss": 1.9425, "step": 96735 }, { "epoch": 9.987404501342143, "grad_norm": 0.045921992510557175, "learning_rate": 0.01, "loss": 1.9015, "step": 96738 }, { "epoch": 9.987714226718976, "grad_norm": 0.09011565148830414, "learning_rate": 0.01, "loss": 1.9258, "step": 96741 }, { "epoch": 9.988023952095809, "grad_norm": 0.06843160837888718, "learning_rate": 0.01, "loss": 1.9491, "step": 96744 }, { "epoch": 9.98833367747264, "grad_norm": 0.042586639523506165, "learning_rate": 0.01, "loss": 1.9395, "step": 96747 }, { "epoch": 9.988643402849473, "grad_norm": 0.06954126805067062, "learning_rate": 0.01, "loss": 1.926, "step": 96750 }, { "epoch": 9.988953128226306, "grad_norm": 0.07393761724233627, "learning_rate": 0.01, "loss": 1.9157, "step": 96753 }, { "epoch": 9.989262853603138, "grad_norm": 0.10468722879886627, "learning_rate": 0.01, "loss": 1.917, "step": 96756 }, { "epoch": 9.98957257897997, "grad_norm": 0.05889539793133736, "learning_rate": 0.01, "loss": 1.9097, "step": 96759 }, { "epoch": 9.989882304356804, "grad_norm": 0.07115521281957626, "learning_rate": 0.01, "loss": 1.9151, "step": 96762 }, { "epoch": 9.990192029733636, "grad_norm": 0.07057791203260422, "learning_rate": 0.01, "loss": 1.9325, "step": 96765 }, { "epoch": 9.990501755110468, "grad_norm": 0.062427669763565063, "learning_rate": 0.01, "loss": 1.9072, "step": 96768 }, { "epoch": 9.9908114804873, "grad_norm": 0.04834975302219391, "learning_rate": 0.01, "loss": 1.9276, "step": 96771 }, { "epoch": 9.991121205864134, "grad_norm": 0.03624477609992027, "learning_rate": 0.01, "loss": 1.9406, "step": 96774 }, { "epoch": 9.991430931240966, "grad_norm": 0.03459803760051727, "learning_rate": 0.01, "loss": 1.9174, "step": 96777 }, { "epoch": 9.991740656617798, "grad_norm": 0.11813360452651978, "learning_rate": 0.01, "loss": 1.9314, "step": 96780 }, { "epoch": 9.992050381994632, "grad_norm": 0.07058688253164291, "learning_rate": 0.01, "loss": 1.9528, "step": 96783 }, { "epoch": 9.992360107371464, "grad_norm": 0.09416121989488602, "learning_rate": 0.01, "loss": 1.9409, "step": 96786 }, { "epoch": 9.992669832748296, "grad_norm": 0.07006985694169998, "learning_rate": 0.01, "loss": 1.926, "step": 96789 }, { "epoch": 9.992979558125128, "grad_norm": 0.0769728347659111, "learning_rate": 0.01, "loss": 1.935, "step": 96792 }, { "epoch": 9.993289283501962, "grad_norm": 0.15726791322231293, "learning_rate": 0.01, "loss": 1.9499, "step": 96795 }, { "epoch": 9.993599008878794, "grad_norm": 0.07349522411823273, "learning_rate": 0.01, "loss": 1.9433, "step": 96798 }, { "epoch": 9.993908734255626, "grad_norm": 0.04822126403450966, "learning_rate": 0.01, "loss": 1.9328, "step": 96801 }, { "epoch": 9.99421845963246, "grad_norm": 0.03172096982598305, "learning_rate": 0.01, "loss": 1.9239, "step": 96804 }, { "epoch": 9.994528185009292, "grad_norm": 0.03926796466112137, "learning_rate": 0.01, "loss": 1.909, "step": 96807 }, { "epoch": 9.994837910386124, "grad_norm": 0.057607777416706085, "learning_rate": 0.01, "loss": 1.935, "step": 96810 }, { "epoch": 9.995147635762956, "grad_norm": 0.07060913741588593, "learning_rate": 0.01, "loss": 1.927, "step": 96813 }, { "epoch": 9.99545736113979, "grad_norm": 0.10870376974344254, "learning_rate": 0.01, "loss": 1.9443, "step": 96816 }, { "epoch": 9.995767086516622, "grad_norm": 0.05336311459541321, "learning_rate": 0.01, "loss": 1.9332, "step": 96819 }, { "epoch": 9.996076811893454, "grad_norm": 0.10393037647008896, "learning_rate": 0.01, "loss": 1.9436, "step": 96822 }, { "epoch": 9.996386537270286, "grad_norm": 0.05877384915947914, "learning_rate": 0.01, "loss": 1.9291, "step": 96825 }, { "epoch": 9.99669626264712, "grad_norm": 0.07578223943710327, "learning_rate": 0.01, "loss": 1.9488, "step": 96828 }, { "epoch": 9.997005988023952, "grad_norm": 0.046317122876644135, "learning_rate": 0.01, "loss": 1.9175, "step": 96831 }, { "epoch": 9.997315713400784, "grad_norm": 0.0316709503531456, "learning_rate": 0.01, "loss": 1.9493, "step": 96834 }, { "epoch": 9.997625438777618, "grad_norm": 0.04384656623005867, "learning_rate": 0.01, "loss": 1.9597, "step": 96837 }, { "epoch": 9.99793516415445, "grad_norm": 0.061252620071172714, "learning_rate": 0.01, "loss": 1.9364, "step": 96840 }, { "epoch": 9.998244889531282, "grad_norm": 0.08033587783575058, "learning_rate": 0.01, "loss": 1.9363, "step": 96843 }, { "epoch": 9.998554614908114, "grad_norm": 0.09759991616010666, "learning_rate": 0.01, "loss": 1.9676, "step": 96846 }, { "epoch": 9.998864340284948, "grad_norm": 0.060363709926605225, "learning_rate": 0.01, "loss": 1.949, "step": 96849 }, { "epoch": 9.99917406566178, "grad_norm": 0.06367176026105881, "learning_rate": 0.01, "loss": 1.9553, "step": 96852 }, { "epoch": 9.999483791038612, "grad_norm": 0.08114373683929443, "learning_rate": 0.01, "loss": 1.9186, "step": 96855 }, { "epoch": 9.999793516415444, "grad_norm": 0.04385897517204285, "learning_rate": 0.01, "loss": 1.9583, "step": 96858 }, { "epoch": 9.947725172024237, "grad_norm": 0.08046164363622665, "learning_rate": 0.01, "loss": 1.9126, "step": 96861 }, { "epoch": 9.948033275136078, "grad_norm": 0.0815848708152771, "learning_rate": 0.01, "loss": 1.9446, "step": 96864 }, { "epoch": 9.948341378247921, "grad_norm": 0.06400562822818756, "learning_rate": 0.01, "loss": 1.9391, "step": 96867 }, { "epoch": 9.948649481359762, "grad_norm": 0.063548743724823, "learning_rate": 0.01, "loss": 1.965, "step": 96870 }, { "epoch": 9.948957584471604, "grad_norm": 0.09831225126981735, "learning_rate": 0.01, "loss": 1.9456, "step": 96873 }, { "epoch": 9.949265687583445, "grad_norm": 0.03349125012755394, "learning_rate": 0.01, "loss": 1.9353, "step": 96876 }, { "epoch": 9.949573790695286, "grad_norm": 0.08987690508365631, "learning_rate": 0.01, "loss": 1.9528, "step": 96879 }, { "epoch": 9.949881893807127, "grad_norm": 0.20651590824127197, "learning_rate": 0.01, "loss": 1.9596, "step": 96882 }, { "epoch": 9.950189996918969, "grad_norm": 0.09576163440942764, "learning_rate": 0.01, "loss": 1.9491, "step": 96885 }, { "epoch": 9.95049810003081, "grad_norm": 0.07056623697280884, "learning_rate": 0.01, "loss": 1.9652, "step": 96888 }, { "epoch": 9.950806203142651, "grad_norm": 0.06381063908338547, "learning_rate": 0.01, "loss": 1.9512, "step": 96891 }, { "epoch": 9.951114306254492, "grad_norm": 0.04859933257102966, "learning_rate": 0.01, "loss": 1.9578, "step": 96894 }, { "epoch": 9.951422409366334, "grad_norm": 0.05717281252145767, "learning_rate": 0.01, "loss": 1.9521, "step": 96897 }, { "epoch": 9.951730512478177, "grad_norm": 0.06399691849946976, "learning_rate": 0.01, "loss": 1.9312, "step": 96900 }, { "epoch": 9.952038615590018, "grad_norm": 0.04647698998451233, "learning_rate": 0.01, "loss": 1.9594, "step": 96903 }, { "epoch": 9.95234671870186, "grad_norm": 0.0519287995994091, "learning_rate": 0.01, "loss": 1.9729, "step": 96906 }, { "epoch": 9.9526548218137, "grad_norm": 0.03411509841680527, "learning_rate": 0.01, "loss": 1.926, "step": 96909 }, { "epoch": 9.952962924925542, "grad_norm": 0.03161589428782463, "learning_rate": 0.01, "loss": 1.9552, "step": 96912 }, { "epoch": 9.953271028037383, "grad_norm": 0.0575750507414341, "learning_rate": 0.01, "loss": 1.9491, "step": 96915 }, { "epoch": 9.953579131149224, "grad_norm": 0.19468317925930023, "learning_rate": 0.01, "loss": 1.9698, "step": 96918 }, { "epoch": 9.953887234261066, "grad_norm": 0.1294582337141037, "learning_rate": 0.01, "loss": 1.9445, "step": 96921 }, { "epoch": 9.954195337372907, "grad_norm": 0.04406146705150604, "learning_rate": 0.01, "loss": 1.9687, "step": 96924 }, { "epoch": 9.954503440484748, "grad_norm": 0.05637388303875923, "learning_rate": 0.01, "loss": 1.9691, "step": 96927 }, { "epoch": 9.954811543596591, "grad_norm": 0.03598056361079216, "learning_rate": 0.01, "loss": 1.9514, "step": 96930 }, { "epoch": 9.955119646708432, "grad_norm": 0.03543659299612045, "learning_rate": 0.01, "loss": 1.9298, "step": 96933 }, { "epoch": 9.955427749820274, "grad_norm": 0.04690230265259743, "learning_rate": 0.01, "loss": 1.9542, "step": 96936 }, { "epoch": 9.955735852932115, "grad_norm": 0.06882453709840775, "learning_rate": 0.01, "loss": 1.9487, "step": 96939 }, { "epoch": 9.956043956043956, "grad_norm": 0.05158957466483116, "learning_rate": 0.01, "loss": 1.9447, "step": 96942 }, { "epoch": 9.956352059155797, "grad_norm": 0.04015819728374481, "learning_rate": 0.01, "loss": 1.9481, "step": 96945 }, { "epoch": 9.956660162267639, "grad_norm": 0.0542755052447319, "learning_rate": 0.01, "loss": 1.9527, "step": 96948 }, { "epoch": 9.95696826537948, "grad_norm": 0.03693319484591484, "learning_rate": 0.01, "loss": 1.9402, "step": 96951 }, { "epoch": 9.957276368491321, "grad_norm": 0.07938143610954285, "learning_rate": 0.01, "loss": 1.9617, "step": 96954 }, { "epoch": 9.957584471603163, "grad_norm": 0.09673090279102325, "learning_rate": 0.01, "loss": 1.9673, "step": 96957 }, { "epoch": 9.957892574715004, "grad_norm": 0.07723116129636765, "learning_rate": 0.01, "loss": 1.9365, "step": 96960 }, { "epoch": 9.958200677826847, "grad_norm": 0.09069114923477173, "learning_rate": 0.01, "loss": 1.9461, "step": 96963 }, { "epoch": 9.958508780938688, "grad_norm": 0.0510699637234211, "learning_rate": 0.01, "loss": 1.972, "step": 96966 }, { "epoch": 9.95881688405053, "grad_norm": 0.07924008369445801, "learning_rate": 0.01, "loss": 1.9419, "step": 96969 }, { "epoch": 9.95912498716237, "grad_norm": 0.07602304220199585, "learning_rate": 0.01, "loss": 1.9321, "step": 96972 }, { "epoch": 9.959433090274212, "grad_norm": 0.06452027708292007, "learning_rate": 0.01, "loss": 1.9681, "step": 96975 }, { "epoch": 9.959741193386053, "grad_norm": 0.053950998932123184, "learning_rate": 0.01, "loss": 1.944, "step": 96978 }, { "epoch": 9.960049296497894, "grad_norm": 0.05036810413002968, "learning_rate": 0.01, "loss": 1.9454, "step": 96981 }, { "epoch": 9.960357399609736, "grad_norm": 0.05384952202439308, "learning_rate": 0.01, "loss": 1.95, "step": 96984 }, { "epoch": 9.960665502721577, "grad_norm": 0.11406214535236359, "learning_rate": 0.01, "loss": 1.9248, "step": 96987 }, { "epoch": 9.960973605833418, "grad_norm": 0.05955125764012337, "learning_rate": 0.01, "loss": 1.9513, "step": 96990 }, { "epoch": 9.961281708945261, "grad_norm": 0.09721150994300842, "learning_rate": 0.01, "loss": 1.9478, "step": 96993 }, { "epoch": 9.961589812057102, "grad_norm": 0.08787155151367188, "learning_rate": 0.01, "loss": 1.9531, "step": 96996 }, { "epoch": 9.961897915168944, "grad_norm": 0.040302030742168427, "learning_rate": 0.01, "loss": 1.9663, "step": 96999 }, { "epoch": 9.962206018280785, "grad_norm": 0.05395068973302841, "learning_rate": 0.01, "loss": 1.9349, "step": 97002 }, { "epoch": 9.962514121392626, "grad_norm": 0.0569995641708374, "learning_rate": 0.01, "loss": 1.9425, "step": 97005 }, { "epoch": 9.962822224504468, "grad_norm": 0.05147429555654526, "learning_rate": 0.01, "loss": 1.9547, "step": 97008 }, { "epoch": 9.963130327616309, "grad_norm": 0.1475445181131363, "learning_rate": 0.01, "loss": 1.9442, "step": 97011 }, { "epoch": 9.96343843072815, "grad_norm": 0.1312050074338913, "learning_rate": 0.01, "loss": 1.9815, "step": 97014 }, { "epoch": 9.963746533839991, "grad_norm": 0.08530642837285995, "learning_rate": 0.01, "loss": 1.9541, "step": 97017 }, { "epoch": 9.964054636951833, "grad_norm": 0.049393344670534134, "learning_rate": 0.01, "loss": 1.9536, "step": 97020 }, { "epoch": 9.964362740063674, "grad_norm": 0.05835723131895065, "learning_rate": 0.01, "loss": 1.9675, "step": 97023 }, { "epoch": 9.964670843175517, "grad_norm": 0.05166759714484215, "learning_rate": 0.01, "loss": 1.9674, "step": 97026 }, { "epoch": 9.964978946287358, "grad_norm": 0.045471616089344025, "learning_rate": 0.01, "loss": 1.9486, "step": 97029 }, { "epoch": 9.9652870493992, "grad_norm": 0.07382349669933319, "learning_rate": 0.01, "loss": 1.9396, "step": 97032 }, { "epoch": 9.96559515251104, "grad_norm": 0.1263457089662552, "learning_rate": 0.01, "loss": 1.9643, "step": 97035 }, { "epoch": 9.965903255622882, "grad_norm": 0.04571104049682617, "learning_rate": 0.01, "loss": 1.963, "step": 97038 }, { "epoch": 9.966211358734723, "grad_norm": 0.04100766405463219, "learning_rate": 0.01, "loss": 1.9569, "step": 97041 }, { "epoch": 9.966519461846564, "grad_norm": 0.062699094414711, "learning_rate": 0.01, "loss": 1.9736, "step": 97044 }, { "epoch": 9.966827564958406, "grad_norm": 0.05958600714802742, "learning_rate": 0.01, "loss": 1.953, "step": 97047 }, { "epoch": 9.967135668070247, "grad_norm": 0.037050146609544754, "learning_rate": 0.01, "loss": 1.913, "step": 97050 }, { "epoch": 9.967443771182088, "grad_norm": 0.09023641049861908, "learning_rate": 0.01, "loss": 1.9589, "step": 97053 }, { "epoch": 9.967751874293931, "grad_norm": 0.09687948226928711, "learning_rate": 0.01, "loss": 1.9169, "step": 97056 }, { "epoch": 9.968059977405773, "grad_norm": 0.08525572717189789, "learning_rate": 0.01, "loss": 1.9441, "step": 97059 }, { "epoch": 9.968368080517614, "grad_norm": 0.07634325325489044, "learning_rate": 0.01, "loss": 1.9491, "step": 97062 }, { "epoch": 9.968676183629455, "grad_norm": 0.07752718031406403, "learning_rate": 0.01, "loss": 1.9681, "step": 97065 }, { "epoch": 9.968984286741296, "grad_norm": 0.09133943915367126, "learning_rate": 0.01, "loss": 1.9413, "step": 97068 }, { "epoch": 9.969292389853138, "grad_norm": 0.08312353491783142, "learning_rate": 0.01, "loss": 1.9653, "step": 97071 }, { "epoch": 9.969600492964979, "grad_norm": 0.06459927558898926, "learning_rate": 0.01, "loss": 1.9639, "step": 97074 }, { "epoch": 9.96990859607682, "grad_norm": 0.06323962658643723, "learning_rate": 0.01, "loss": 1.9622, "step": 97077 }, { "epoch": 9.970216699188661, "grad_norm": 0.063611701130867, "learning_rate": 0.01, "loss": 1.9649, "step": 97080 }, { "epoch": 9.970524802300503, "grad_norm": 0.052672822028398514, "learning_rate": 0.01, "loss": 1.975, "step": 97083 }, { "epoch": 9.970832905412344, "grad_norm": 0.09678171575069427, "learning_rate": 0.01, "loss": 1.9324, "step": 97086 }, { "epoch": 9.971141008524185, "grad_norm": 0.131818950176239, "learning_rate": 0.01, "loss": 1.961, "step": 97089 }, { "epoch": 9.971449111636028, "grad_norm": 0.05830173194408417, "learning_rate": 0.01, "loss": 1.9352, "step": 97092 }, { "epoch": 9.97175721474787, "grad_norm": 0.09154964983463287, "learning_rate": 0.01, "loss": 1.9595, "step": 97095 }, { "epoch": 9.97206531785971, "grad_norm": 0.0545133538544178, "learning_rate": 0.01, "loss": 1.943, "step": 97098 }, { "epoch": 9.972373420971552, "grad_norm": 0.03951120749115944, "learning_rate": 0.01, "loss": 1.9704, "step": 97101 }, { "epoch": 9.972681524083393, "grad_norm": 0.03921626880764961, "learning_rate": 0.01, "loss": 1.9653, "step": 97104 }, { "epoch": 9.972989627195235, "grad_norm": 0.06999723613262177, "learning_rate": 0.01, "loss": 1.9351, "step": 97107 }, { "epoch": 9.973297730307076, "grad_norm": 0.06404420733451843, "learning_rate": 0.01, "loss": 1.9512, "step": 97110 }, { "epoch": 9.973605833418917, "grad_norm": 0.12217292934656143, "learning_rate": 0.01, "loss": 1.9557, "step": 97113 }, { "epoch": 9.973913936530758, "grad_norm": 0.044860534369945526, "learning_rate": 0.01, "loss": 1.9507, "step": 97116 }, { "epoch": 9.9742220396426, "grad_norm": 0.05524575710296631, "learning_rate": 0.01, "loss": 1.9639, "step": 97119 }, { "epoch": 9.974530142754443, "grad_norm": 0.05118962749838829, "learning_rate": 0.01, "loss": 1.9612, "step": 97122 }, { "epoch": 9.974838245866284, "grad_norm": 0.08901583403348923, "learning_rate": 0.01, "loss": 1.9737, "step": 97125 }, { "epoch": 9.975146348978125, "grad_norm": 0.08375976234674454, "learning_rate": 0.01, "loss": 1.9473, "step": 97128 }, { "epoch": 9.975454452089966, "grad_norm": 0.10059249401092529, "learning_rate": 0.01, "loss": 1.967, "step": 97131 }, { "epoch": 9.975762555201808, "grad_norm": 0.09325744956731796, "learning_rate": 0.01, "loss": 1.95, "step": 97134 }, { "epoch": 9.976070658313649, "grad_norm": 0.06354837119579315, "learning_rate": 0.01, "loss": 1.9496, "step": 97137 }, { "epoch": 9.97637876142549, "grad_norm": 0.05322406068444252, "learning_rate": 0.01, "loss": 1.9532, "step": 97140 }, { "epoch": 9.976686864537331, "grad_norm": 0.04118852689862251, "learning_rate": 0.01, "loss": 1.9387, "step": 97143 }, { "epoch": 9.976994967649173, "grad_norm": 0.0986984446644783, "learning_rate": 0.01, "loss": 1.9325, "step": 97146 }, { "epoch": 9.977303070761014, "grad_norm": 0.11657184362411499, "learning_rate": 0.01, "loss": 1.9398, "step": 97149 }, { "epoch": 9.977611173872855, "grad_norm": 0.04773658141493797, "learning_rate": 0.01, "loss": 1.9591, "step": 97152 }, { "epoch": 9.977919276984698, "grad_norm": 0.08913548290729523, "learning_rate": 0.01, "loss": 1.9505, "step": 97155 }, { "epoch": 9.97822738009654, "grad_norm": 0.1064072772860527, "learning_rate": 0.01, "loss": 1.9577, "step": 97158 }, { "epoch": 9.97853548320838, "grad_norm": 0.06742316484451294, "learning_rate": 0.01, "loss": 1.9522, "step": 97161 }, { "epoch": 9.978843586320222, "grad_norm": 0.057154104113578796, "learning_rate": 0.01, "loss": 1.9137, "step": 97164 }, { "epoch": 9.979151689432063, "grad_norm": 0.04546293243765831, "learning_rate": 0.01, "loss": 1.9401, "step": 97167 }, { "epoch": 9.979459792543905, "grad_norm": 0.04663031920790672, "learning_rate": 0.01, "loss": 1.9285, "step": 97170 }, { "epoch": 9.979767895655746, "grad_norm": 0.08622399717569351, "learning_rate": 0.01, "loss": 1.9669, "step": 97173 }, { "epoch": 9.980075998767587, "grad_norm": 0.039252907037734985, "learning_rate": 0.01, "loss": 1.9487, "step": 97176 }, { "epoch": 9.980384101879428, "grad_norm": 0.051296334713697433, "learning_rate": 0.01, "loss": 1.9179, "step": 97179 }, { "epoch": 9.98069220499127, "grad_norm": 0.04865694418549538, "learning_rate": 0.01, "loss": 1.9266, "step": 97182 }, { "epoch": 9.981000308103113, "grad_norm": 0.03953705355525017, "learning_rate": 0.01, "loss": 1.9158, "step": 97185 }, { "epoch": 9.981308411214954, "grad_norm": 0.05082760378718376, "learning_rate": 0.01, "loss": 1.9664, "step": 97188 }, { "epoch": 9.981616514326795, "grad_norm": 0.057986754924058914, "learning_rate": 0.01, "loss": 1.9238, "step": 97191 }, { "epoch": 9.981924617438636, "grad_norm": 0.0460967980325222, "learning_rate": 0.01, "loss": 1.9513, "step": 97194 }, { "epoch": 9.982232720550478, "grad_norm": 0.035832539200782776, "learning_rate": 0.01, "loss": 1.9405, "step": 97197 }, { "epoch": 9.982540823662319, "grad_norm": 0.055434614419937134, "learning_rate": 0.01, "loss": 1.9442, "step": 97200 }, { "epoch": 9.98284892677416, "grad_norm": 0.04218105971813202, "learning_rate": 0.01, "loss": 1.9433, "step": 97203 }, { "epoch": 9.983157029886002, "grad_norm": 0.17435774207115173, "learning_rate": 0.01, "loss": 1.9498, "step": 97206 }, { "epoch": 9.983465132997843, "grad_norm": 0.13309799134731293, "learning_rate": 0.01, "loss": 1.957, "step": 97209 }, { "epoch": 9.983773236109684, "grad_norm": 0.06844424456357956, "learning_rate": 0.01, "loss": 1.9548, "step": 97212 }, { "epoch": 9.984081339221525, "grad_norm": 0.04057778790593147, "learning_rate": 0.01, "loss": 1.9691, "step": 97215 }, { "epoch": 9.984389442333368, "grad_norm": 0.07741393893957138, "learning_rate": 0.01, "loss": 1.9272, "step": 97218 }, { "epoch": 9.98469754544521, "grad_norm": 0.0387113019824028, "learning_rate": 0.01, "loss": 1.973, "step": 97221 }, { "epoch": 9.98500564855705, "grad_norm": 0.056523408740758896, "learning_rate": 0.01, "loss": 1.9447, "step": 97224 }, { "epoch": 9.985313751668892, "grad_norm": 0.09296201169490814, "learning_rate": 0.01, "loss": 1.9577, "step": 97227 }, { "epoch": 9.985621854780733, "grad_norm": 0.05774252116680145, "learning_rate": 0.01, "loss": 1.9528, "step": 97230 }, { "epoch": 9.985929957892575, "grad_norm": 0.09069366008043289, "learning_rate": 0.01, "loss": 1.9273, "step": 97233 }, { "epoch": 9.986238061004416, "grad_norm": 0.08415558934211731, "learning_rate": 0.01, "loss": 1.9503, "step": 97236 }, { "epoch": 9.986546164116257, "grad_norm": 0.0969925969839096, "learning_rate": 0.01, "loss": 1.9609, "step": 97239 }, { "epoch": 9.986854267228098, "grad_norm": 0.1274038702249527, "learning_rate": 0.01, "loss": 1.9559, "step": 97242 }, { "epoch": 9.98716237033994, "grad_norm": 0.05992702394723892, "learning_rate": 0.01, "loss": 1.948, "step": 97245 }, { "epoch": 9.987470473451783, "grad_norm": 0.045540135353803635, "learning_rate": 0.01, "loss": 1.9585, "step": 97248 }, { "epoch": 9.987778576563624, "grad_norm": 0.03293858468532562, "learning_rate": 0.01, "loss": 1.9474, "step": 97251 }, { "epoch": 9.988086679675465, "grad_norm": 0.03892706707119942, "learning_rate": 0.01, "loss": 1.9518, "step": 97254 }, { "epoch": 9.988394782787307, "grad_norm": 0.08603250980377197, "learning_rate": 0.01, "loss": 1.9429, "step": 97257 }, { "epoch": 9.988702885899148, "grad_norm": 0.053922899067401886, "learning_rate": 0.01, "loss": 1.958, "step": 97260 }, { "epoch": 9.989010989010989, "grad_norm": 0.04998045787215233, "learning_rate": 0.01, "loss": 1.9238, "step": 97263 }, { "epoch": 9.98931909212283, "grad_norm": 0.05024062469601631, "learning_rate": 0.01, "loss": 1.9321, "step": 97266 }, { "epoch": 9.989627195234672, "grad_norm": 0.09126166999340057, "learning_rate": 0.01, "loss": 1.9423, "step": 97269 }, { "epoch": 9.989935298346513, "grad_norm": 0.03651963546872139, "learning_rate": 0.01, "loss": 1.9397, "step": 97272 }, { "epoch": 9.990243401458354, "grad_norm": 0.06524238735437393, "learning_rate": 0.01, "loss": 1.95, "step": 97275 }, { "epoch": 9.990551504570195, "grad_norm": 0.04085659235715866, "learning_rate": 0.01, "loss": 1.9345, "step": 97278 }, { "epoch": 9.990859607682038, "grad_norm": 0.044826142489910126, "learning_rate": 0.01, "loss": 1.9603, "step": 97281 }, { "epoch": 9.99116771079388, "grad_norm": 0.07010653614997864, "learning_rate": 0.01, "loss": 1.9518, "step": 97284 }, { "epoch": 9.991475813905721, "grad_norm": 0.058311570435762405, "learning_rate": 0.01, "loss": 1.9529, "step": 97287 }, { "epoch": 9.991783917017562, "grad_norm": 0.09507184475660324, "learning_rate": 0.01, "loss": 1.9438, "step": 97290 }, { "epoch": 9.992092020129403, "grad_norm": 0.035488102585077286, "learning_rate": 0.01, "loss": 1.9743, "step": 97293 }, { "epoch": 9.992400123241245, "grad_norm": 0.08211905509233475, "learning_rate": 0.01, "loss": 1.9522, "step": 97296 }, { "epoch": 9.992708226353086, "grad_norm": 0.1444222629070282, "learning_rate": 0.01, "loss": 1.9304, "step": 97299 }, { "epoch": 9.993016329464927, "grad_norm": 0.0890636295080185, "learning_rate": 0.01, "loss": 1.9363, "step": 97302 }, { "epoch": 9.993324432576769, "grad_norm": 0.07359254360198975, "learning_rate": 0.01, "loss": 1.9454, "step": 97305 }, { "epoch": 9.99363253568861, "grad_norm": 0.050952211022377014, "learning_rate": 0.01, "loss": 1.9581, "step": 97308 }, { "epoch": 9.993940638800453, "grad_norm": 0.08539391309022903, "learning_rate": 0.01, "loss": 1.9485, "step": 97311 }, { "epoch": 9.994248741912294, "grad_norm": 0.06802923232316971, "learning_rate": 0.01, "loss": 1.9372, "step": 97314 }, { "epoch": 9.994556845024135, "grad_norm": 0.25237563252449036, "learning_rate": 0.01, "loss": 1.9629, "step": 97317 }, { "epoch": 9.994864948135977, "grad_norm": 0.03497587889432907, "learning_rate": 0.01, "loss": 1.9367, "step": 97320 }, { "epoch": 9.995173051247818, "grad_norm": 0.04146720468997955, "learning_rate": 0.01, "loss": 1.9684, "step": 97323 }, { "epoch": 9.99548115435966, "grad_norm": 0.06152525544166565, "learning_rate": 0.01, "loss": 1.9431, "step": 97326 }, { "epoch": 9.9957892574715, "grad_norm": 0.0637136921286583, "learning_rate": 0.01, "loss": 1.9186, "step": 97329 }, { "epoch": 9.996097360583342, "grad_norm": 0.06904961168766022, "learning_rate": 0.01, "loss": 1.9351, "step": 97332 }, { "epoch": 9.996405463695183, "grad_norm": 0.11976516991853714, "learning_rate": 0.01, "loss": 1.9496, "step": 97335 }, { "epoch": 9.996713566807024, "grad_norm": 0.102267324924469, "learning_rate": 0.01, "loss": 1.9271, "step": 97338 }, { "epoch": 9.997021669918865, "grad_norm": 0.07641902565956116, "learning_rate": 0.01, "loss": 1.9156, "step": 97341 }, { "epoch": 9.997329773030707, "grad_norm": 0.0694715678691864, "learning_rate": 0.01, "loss": 1.9614, "step": 97344 }, { "epoch": 9.99763787614255, "grad_norm": 0.10273284465074539, "learning_rate": 0.01, "loss": 1.9458, "step": 97347 }, { "epoch": 9.997945979254391, "grad_norm": 0.06914559006690979, "learning_rate": 0.01, "loss": 1.9295, "step": 97350 }, { "epoch": 9.998254082366232, "grad_norm": 0.05837008357048035, "learning_rate": 0.01, "loss": 1.9438, "step": 97353 }, { "epoch": 9.998562185478074, "grad_norm": 0.0501350574195385, "learning_rate": 0.01, "loss": 1.9229, "step": 97356 }, { "epoch": 9.998870288589915, "grad_norm": 0.05369944125413895, "learning_rate": 0.01, "loss": 1.9489, "step": 97359 }, { "epoch": 9.999178391701756, "grad_norm": 0.04862399771809578, "learning_rate": 0.01, "loss": 1.9364, "step": 97362 }, { "epoch": 9.999486494813597, "grad_norm": 0.04371797293424606, "learning_rate": 0.01, "loss": 1.9643, "step": 97365 }, { "epoch": 9.999794597925439, "grad_norm": 0.03798222169280052, "learning_rate": 0.01, "loss": 1.9366, "step": 97368 }, { "epoch": 10.00010270103728, "grad_norm": 0.12691354751586914, "learning_rate": 0.01, "loss": 1.9833, "step": 97371 }, { "epoch": 10.000410804149121, "grad_norm": 0.03646055608987808, "learning_rate": 0.01, "loss": 1.9118, "step": 97374 }, { "epoch": 10.000718907260964, "grad_norm": 0.04508265480399132, "learning_rate": 0.01, "loss": 1.9322, "step": 97377 }, { "epoch": 10.001027010372805, "grad_norm": 0.05073779076337814, "learning_rate": 0.01, "loss": 1.9538, "step": 97380 }, { "epoch": 10.001335113484647, "grad_norm": 0.04759017750620842, "learning_rate": 0.01, "loss": 1.9516, "step": 97383 }, { "epoch": 10.001643216596488, "grad_norm": 0.05554679408669472, "learning_rate": 0.01, "loss": 1.96, "step": 97386 }, { "epoch": 10.00195131970833, "grad_norm": 0.09978814423084259, "learning_rate": 0.01, "loss": 1.941, "step": 97389 }, { "epoch": 10.00225942282017, "grad_norm": 0.10586490482091904, "learning_rate": 0.01, "loss": 1.9194, "step": 97392 }, { "epoch": 10.002567525932012, "grad_norm": 0.06032977253198624, "learning_rate": 0.01, "loss": 1.9007, "step": 97395 }, { "epoch": 10.002875629043853, "grad_norm": 0.16167794167995453, "learning_rate": 0.01, "loss": 1.9284, "step": 97398 }, { "epoch": 10.003183732155694, "grad_norm": 0.05945628881454468, "learning_rate": 0.01, "loss": 1.9428, "step": 97401 }, { "epoch": 10.003491835267535, "grad_norm": 0.06830620020627975, "learning_rate": 0.01, "loss": 1.9661, "step": 97404 }, { "epoch": 10.003799938379377, "grad_norm": 0.05241851136088371, "learning_rate": 0.01, "loss": 1.9178, "step": 97407 }, { "epoch": 10.00410804149122, "grad_norm": 0.04357186704874039, "learning_rate": 0.01, "loss": 1.9302, "step": 97410 }, { "epoch": 10.004416144603061, "grad_norm": 0.08057430386543274, "learning_rate": 0.01, "loss": 1.9593, "step": 97413 }, { "epoch": 10.004724247714902, "grad_norm": 0.08658464252948761, "learning_rate": 0.01, "loss": 1.9453, "step": 97416 }, { "epoch": 10.005032350826744, "grad_norm": 0.06090567260980606, "learning_rate": 0.01, "loss": 1.9204, "step": 97419 }, { "epoch": 10.005340453938585, "grad_norm": 0.09897079318761826, "learning_rate": 0.01, "loss": 1.9229, "step": 97422 }, { "epoch": 10.005648557050426, "grad_norm": 0.1031961739063263, "learning_rate": 0.01, "loss": 1.9428, "step": 97425 }, { "epoch": 10.005956660162267, "grad_norm": 0.0531962588429451, "learning_rate": 0.01, "loss": 1.9412, "step": 97428 }, { "epoch": 10.006264763274109, "grad_norm": 0.10613752156496048, "learning_rate": 0.01, "loss": 1.9375, "step": 97431 }, { "epoch": 10.00657286638595, "grad_norm": 0.046731069684028625, "learning_rate": 0.01, "loss": 1.9333, "step": 97434 }, { "epoch": 10.006880969497791, "grad_norm": 0.11759690195322037, "learning_rate": 0.01, "loss": 1.9361, "step": 97437 }, { "epoch": 10.007189072609634, "grad_norm": 0.04036640003323555, "learning_rate": 0.01, "loss": 1.9618, "step": 97440 }, { "epoch": 10.007497175721475, "grad_norm": 0.08520452678203583, "learning_rate": 0.01, "loss": 1.9426, "step": 97443 }, { "epoch": 10.007805278833317, "grad_norm": 0.03992795944213867, "learning_rate": 0.01, "loss": 1.9534, "step": 97446 }, { "epoch": 10.008113381945158, "grad_norm": 0.0406220443546772, "learning_rate": 0.01, "loss": 1.9409, "step": 97449 }, { "epoch": 10.008421485057, "grad_norm": 0.04602857306599617, "learning_rate": 0.01, "loss": 1.9485, "step": 97452 }, { "epoch": 10.00872958816884, "grad_norm": 0.1277029812335968, "learning_rate": 0.01, "loss": 1.9464, "step": 97455 }, { "epoch": 10.009037691280682, "grad_norm": 0.12406671047210693, "learning_rate": 0.01, "loss": 1.9483, "step": 97458 }, { "epoch": 10.009345794392523, "grad_norm": 0.050995584577322006, "learning_rate": 0.01, "loss": 1.958, "step": 97461 }, { "epoch": 10.009653897504364, "grad_norm": 0.05945532023906708, "learning_rate": 0.01, "loss": 1.9621, "step": 97464 }, { "epoch": 10.009962000616206, "grad_norm": 0.047782059758901596, "learning_rate": 0.01, "loss": 1.9377, "step": 97467 }, { "epoch": 10.010270103728047, "grad_norm": 0.0939398780465126, "learning_rate": 0.01, "loss": 1.9425, "step": 97470 }, { "epoch": 10.01057820683989, "grad_norm": 0.07378079742193222, "learning_rate": 0.01, "loss": 1.934, "step": 97473 }, { "epoch": 10.010886309951731, "grad_norm": 0.09242421388626099, "learning_rate": 0.01, "loss": 1.9518, "step": 97476 }, { "epoch": 10.011194413063572, "grad_norm": 0.04233480617403984, "learning_rate": 0.01, "loss": 1.9259, "step": 97479 }, { "epoch": 10.011502516175414, "grad_norm": 0.03884666785597801, "learning_rate": 0.01, "loss": 1.9496, "step": 97482 }, { "epoch": 10.011810619287255, "grad_norm": 0.044267915189266205, "learning_rate": 0.01, "loss": 1.9435, "step": 97485 }, { "epoch": 10.012118722399096, "grad_norm": 0.0717068612575531, "learning_rate": 0.01, "loss": 1.9324, "step": 97488 }, { "epoch": 10.012426825510937, "grad_norm": 0.04711198806762695, "learning_rate": 0.01, "loss": 1.9394, "step": 97491 }, { "epoch": 10.012734928622779, "grad_norm": 0.044493578374385834, "learning_rate": 0.01, "loss": 1.9258, "step": 97494 }, { "epoch": 10.01304303173462, "grad_norm": 0.05505421757698059, "learning_rate": 0.01, "loss": 1.9269, "step": 97497 }, { "epoch": 10.013351134846461, "grad_norm": 0.06965785473585129, "learning_rate": 0.01, "loss": 1.9274, "step": 97500 }, { "epoch": 10.013659237958304, "grad_norm": 0.04743869602680206, "learning_rate": 0.01, "loss": 1.9637, "step": 97503 }, { "epoch": 10.013967341070146, "grad_norm": 0.043692149221897125, "learning_rate": 0.01, "loss": 1.9639, "step": 97506 }, { "epoch": 10.014275444181987, "grad_norm": 0.15378451347351074, "learning_rate": 0.01, "loss": 1.9398, "step": 97509 }, { "epoch": 10.014583547293828, "grad_norm": 0.11334779113531113, "learning_rate": 0.01, "loss": 1.9628, "step": 97512 }, { "epoch": 10.01489165040567, "grad_norm": 0.05877596512436867, "learning_rate": 0.01, "loss": 1.9368, "step": 97515 }, { "epoch": 10.01519975351751, "grad_norm": 0.04958738759160042, "learning_rate": 0.01, "loss": 1.9568, "step": 97518 }, { "epoch": 10.015507856629352, "grad_norm": 0.038849733769893646, "learning_rate": 0.01, "loss": 1.9272, "step": 97521 }, { "epoch": 10.015815959741193, "grad_norm": 0.04901808872818947, "learning_rate": 0.01, "loss": 1.9405, "step": 97524 }, { "epoch": 10.016124062853034, "grad_norm": 0.047270163893699646, "learning_rate": 0.01, "loss": 1.9639, "step": 97527 }, { "epoch": 10.016432165964876, "grad_norm": 0.11565852165222168, "learning_rate": 0.01, "loss": 1.9302, "step": 97530 }, { "epoch": 10.016740269076717, "grad_norm": 0.04593334347009659, "learning_rate": 0.01, "loss": 1.9418, "step": 97533 }, { "epoch": 10.01704837218856, "grad_norm": 0.08739186823368073, "learning_rate": 0.01, "loss": 1.9284, "step": 97536 }, { "epoch": 10.017356475300401, "grad_norm": 0.10565205663442612, "learning_rate": 0.01, "loss": 1.9558, "step": 97539 }, { "epoch": 10.017664578412242, "grad_norm": 0.07832454144954681, "learning_rate": 0.01, "loss": 1.934, "step": 97542 }, { "epoch": 10.017972681524084, "grad_norm": 0.07256269454956055, "learning_rate": 0.01, "loss": 1.9346, "step": 97545 }, { "epoch": 10.018280784635925, "grad_norm": 0.05213913321495056, "learning_rate": 0.01, "loss": 1.9323, "step": 97548 }, { "epoch": 10.018588887747766, "grad_norm": 0.05286802724003792, "learning_rate": 0.01, "loss": 1.9571, "step": 97551 }, { "epoch": 10.018896990859607, "grad_norm": 0.049530480057001114, "learning_rate": 0.01, "loss": 1.9313, "step": 97554 }, { "epoch": 10.019205093971449, "grad_norm": 0.04447256028652191, "learning_rate": 0.01, "loss": 1.9663, "step": 97557 }, { "epoch": 10.01951319708329, "grad_norm": 0.12635476887226105, "learning_rate": 0.01, "loss": 1.9454, "step": 97560 }, { "epoch": 10.019821300195131, "grad_norm": 0.10345647484064102, "learning_rate": 0.01, "loss": 1.9471, "step": 97563 }, { "epoch": 10.020129403306973, "grad_norm": 0.0937427282333374, "learning_rate": 0.01, "loss": 1.9446, "step": 97566 }, { "epoch": 10.020437506418816, "grad_norm": 0.05710608884692192, "learning_rate": 0.01, "loss": 1.9317, "step": 97569 }, { "epoch": 10.020745609530657, "grad_norm": 0.03611237183213234, "learning_rate": 0.01, "loss": 1.9472, "step": 97572 }, { "epoch": 10.021053712642498, "grad_norm": 0.03928690403699875, "learning_rate": 0.01, "loss": 1.9473, "step": 97575 }, { "epoch": 10.02136181575434, "grad_norm": 0.03709922730922699, "learning_rate": 0.01, "loss": 1.9494, "step": 97578 }, { "epoch": 10.02166991886618, "grad_norm": 0.11630689352750778, "learning_rate": 0.01, "loss": 1.9363, "step": 97581 }, { "epoch": 10.021978021978022, "grad_norm": 0.16802328824996948, "learning_rate": 0.01, "loss": 1.9691, "step": 97584 }, { "epoch": 10.022286125089863, "grad_norm": 0.07408058643341064, "learning_rate": 0.01, "loss": 1.9647, "step": 97587 }, { "epoch": 10.022594228201704, "grad_norm": 0.09721572697162628, "learning_rate": 0.01, "loss": 1.9506, "step": 97590 }, { "epoch": 10.022902331313546, "grad_norm": 0.06861985474824905, "learning_rate": 0.01, "loss": 1.9428, "step": 97593 }, { "epoch": 10.023210434425387, "grad_norm": 0.0371435172855854, "learning_rate": 0.01, "loss": 1.9549, "step": 97596 }, { "epoch": 10.02351853753723, "grad_norm": 0.046418141573667526, "learning_rate": 0.01, "loss": 1.9464, "step": 97599 }, { "epoch": 10.023826640649071, "grad_norm": 0.11881248652935028, "learning_rate": 0.01, "loss": 1.9385, "step": 97602 }, { "epoch": 10.024134743760913, "grad_norm": 0.0706331804394722, "learning_rate": 0.01, "loss": 1.9394, "step": 97605 }, { "epoch": 10.024442846872754, "grad_norm": 0.07528916001319885, "learning_rate": 0.01, "loss": 1.9378, "step": 97608 }, { "epoch": 10.024750949984595, "grad_norm": 0.09359458833932877, "learning_rate": 0.01, "loss": 1.9448, "step": 97611 }, { "epoch": 10.025059053096436, "grad_norm": 0.08137216418981552, "learning_rate": 0.01, "loss": 1.9384, "step": 97614 }, { "epoch": 10.025367156208278, "grad_norm": 0.0373852401971817, "learning_rate": 0.01, "loss": 1.9584, "step": 97617 }, { "epoch": 10.025675259320119, "grad_norm": 0.09041198343038559, "learning_rate": 0.01, "loss": 1.9445, "step": 97620 }, { "epoch": 10.02598336243196, "grad_norm": 0.0771503895521164, "learning_rate": 0.01, "loss": 1.9662, "step": 97623 }, { "epoch": 10.026291465543801, "grad_norm": 0.07588748633861542, "learning_rate": 0.01, "loss": 1.9552, "step": 97626 }, { "epoch": 10.026599568655643, "grad_norm": 0.03945888206362724, "learning_rate": 0.01, "loss": 1.95, "step": 97629 }, { "epoch": 10.026907671767486, "grad_norm": 0.12288627028465271, "learning_rate": 0.01, "loss": 1.9442, "step": 97632 }, { "epoch": 10.027215774879327, "grad_norm": 0.13935460150241852, "learning_rate": 0.01, "loss": 1.9394, "step": 97635 }, { "epoch": 10.027523877991168, "grad_norm": 0.07311210036277771, "learning_rate": 0.01, "loss": 1.9504, "step": 97638 }, { "epoch": 10.02783198110301, "grad_norm": 0.04974691942334175, "learning_rate": 0.01, "loss": 1.9448, "step": 97641 }, { "epoch": 10.02814008421485, "grad_norm": 0.04890896752476692, "learning_rate": 0.01, "loss": 1.9608, "step": 97644 }, { "epoch": 10.028448187326692, "grad_norm": 0.06187541037797928, "learning_rate": 0.01, "loss": 1.9337, "step": 97647 }, { "epoch": 10.028756290438533, "grad_norm": 0.0689026489853859, "learning_rate": 0.01, "loss": 1.9656, "step": 97650 }, { "epoch": 10.029064393550374, "grad_norm": 0.10951229929924011, "learning_rate": 0.01, "loss": 1.9514, "step": 97653 }, { "epoch": 10.029372496662216, "grad_norm": 0.10918518900871277, "learning_rate": 0.01, "loss": 1.949, "step": 97656 }, { "epoch": 10.029680599774057, "grad_norm": 0.062250666320323944, "learning_rate": 0.01, "loss": 1.9585, "step": 97659 }, { "epoch": 10.029988702885898, "grad_norm": 0.0697980672121048, "learning_rate": 0.01, "loss": 1.9567, "step": 97662 }, { "epoch": 10.030296805997741, "grad_norm": 0.07267870008945465, "learning_rate": 0.01, "loss": 1.9444, "step": 97665 }, { "epoch": 10.030604909109583, "grad_norm": 0.061427708715200424, "learning_rate": 0.01, "loss": 1.9435, "step": 97668 }, { "epoch": 10.030913012221424, "grad_norm": 0.05644296482205391, "learning_rate": 0.01, "loss": 1.97, "step": 97671 }, { "epoch": 10.031221115333265, "grad_norm": 0.0500921756029129, "learning_rate": 0.01, "loss": 1.9307, "step": 97674 }, { "epoch": 10.031529218445106, "grad_norm": 0.04461059346795082, "learning_rate": 0.01, "loss": 1.9485, "step": 97677 }, { "epoch": 10.031837321556948, "grad_norm": 0.046942368149757385, "learning_rate": 0.01, "loss": 1.9534, "step": 97680 }, { "epoch": 10.032145424668789, "grad_norm": 0.06114652007818222, "learning_rate": 0.01, "loss": 1.9489, "step": 97683 }, { "epoch": 10.03245352778063, "grad_norm": 0.08929812908172607, "learning_rate": 0.01, "loss": 1.9739, "step": 97686 }, { "epoch": 10.032761630892471, "grad_norm": 0.05619662627577782, "learning_rate": 0.01, "loss": 1.9335, "step": 97689 }, { "epoch": 10.033069734004313, "grad_norm": 0.157526895403862, "learning_rate": 0.01, "loss": 1.9613, "step": 97692 }, { "epoch": 10.033377837116156, "grad_norm": 0.15800322592258453, "learning_rate": 0.01, "loss": 1.9698, "step": 97695 }, { "epoch": 10.033685940227997, "grad_norm": 0.10482832789421082, "learning_rate": 0.01, "loss": 1.945, "step": 97698 }, { "epoch": 10.033994043339838, "grad_norm": 0.09328657388687134, "learning_rate": 0.01, "loss": 1.9486, "step": 97701 }, { "epoch": 10.03430214645168, "grad_norm": 0.035461828112602234, "learning_rate": 0.01, "loss": 1.9321, "step": 97704 }, { "epoch": 10.03461024956352, "grad_norm": 0.08092792332172394, "learning_rate": 0.01, "loss": 1.9491, "step": 97707 }, { "epoch": 10.034918352675362, "grad_norm": 0.043945904821157455, "learning_rate": 0.01, "loss": 1.9408, "step": 97710 }, { "epoch": 10.035226455787203, "grad_norm": 0.042452260851860046, "learning_rate": 0.01, "loss": 1.9423, "step": 97713 }, { "epoch": 10.035534558899045, "grad_norm": 0.0670391321182251, "learning_rate": 0.01, "loss": 1.9303, "step": 97716 }, { "epoch": 10.035842662010886, "grad_norm": 0.07876156270503998, "learning_rate": 0.01, "loss": 1.9498, "step": 97719 }, { "epoch": 10.036150765122727, "grad_norm": 0.0945872887969017, "learning_rate": 0.01, "loss": 1.9359, "step": 97722 }, { "epoch": 10.036458868234568, "grad_norm": 0.05473790690302849, "learning_rate": 0.01, "loss": 1.9562, "step": 97725 }, { "epoch": 10.036766971346411, "grad_norm": 0.043251413851976395, "learning_rate": 0.01, "loss": 1.9433, "step": 97728 }, { "epoch": 10.037075074458253, "grad_norm": 0.03373229131102562, "learning_rate": 0.01, "loss": 1.9371, "step": 97731 }, { "epoch": 10.037383177570094, "grad_norm": 0.04231463745236397, "learning_rate": 0.01, "loss": 1.9517, "step": 97734 }, { "epoch": 10.037691280681935, "grad_norm": 0.0730859637260437, "learning_rate": 0.01, "loss": 1.926, "step": 97737 }, { "epoch": 10.037999383793776, "grad_norm": 0.07237992435693741, "learning_rate": 0.01, "loss": 1.9458, "step": 97740 }, { "epoch": 10.038307486905618, "grad_norm": 0.10237434506416321, "learning_rate": 0.01, "loss": 1.9714, "step": 97743 }, { "epoch": 10.038615590017459, "grad_norm": 0.04624316468834877, "learning_rate": 0.01, "loss": 1.9325, "step": 97746 }, { "epoch": 10.0389236931293, "grad_norm": 0.05136566981673241, "learning_rate": 0.01, "loss": 1.9394, "step": 97749 }, { "epoch": 10.039231796241141, "grad_norm": 0.049200087785720825, "learning_rate": 0.01, "loss": 1.9256, "step": 97752 }, { "epoch": 10.039539899352983, "grad_norm": 0.049420248717069626, "learning_rate": 0.01, "loss": 1.9288, "step": 97755 }, { "epoch": 10.039848002464826, "grad_norm": 0.04861652851104736, "learning_rate": 0.01, "loss": 1.9508, "step": 97758 }, { "epoch": 10.040156105576667, "grad_norm": 0.08401332050561905, "learning_rate": 0.01, "loss": 1.939, "step": 97761 }, { "epoch": 10.040464208688508, "grad_norm": 0.059221383184194565, "learning_rate": 0.01, "loss": 1.9437, "step": 97764 }, { "epoch": 10.04077231180035, "grad_norm": 0.04965361952781677, "learning_rate": 0.01, "loss": 1.9501, "step": 97767 }, { "epoch": 10.04108041491219, "grad_norm": 0.06314288079738617, "learning_rate": 0.01, "loss": 1.9563, "step": 97770 }, { "epoch": 10.041388518024032, "grad_norm": 0.03867454081773758, "learning_rate": 0.01, "loss": 1.9458, "step": 97773 }, { "epoch": 10.041696621135873, "grad_norm": 0.03719132021069527, "learning_rate": 0.01, "loss": 1.927, "step": 97776 }, { "epoch": 10.042004724247715, "grad_norm": 0.1044577807188034, "learning_rate": 0.01, "loss": 1.9766, "step": 97779 }, { "epoch": 10.042312827359556, "grad_norm": 0.06323778629302979, "learning_rate": 0.01, "loss": 1.9276, "step": 97782 }, { "epoch": 10.042620930471397, "grad_norm": 0.07908964902162552, "learning_rate": 0.01, "loss": 1.9421, "step": 97785 }, { "epoch": 10.042929033583238, "grad_norm": 0.06004498898983002, "learning_rate": 0.01, "loss": 1.9457, "step": 97788 }, { "epoch": 10.043237136695081, "grad_norm": 0.11528830230236053, "learning_rate": 0.01, "loss": 1.9548, "step": 97791 }, { "epoch": 10.043545239806923, "grad_norm": 0.05327809602022171, "learning_rate": 0.01, "loss": 1.9277, "step": 97794 }, { "epoch": 10.043853342918764, "grad_norm": 0.03932127356529236, "learning_rate": 0.01, "loss": 1.9652, "step": 97797 }, { "epoch": 10.044161446030605, "grad_norm": 0.040303848683834076, "learning_rate": 0.01, "loss": 1.926, "step": 97800 }, { "epoch": 10.044469549142446, "grad_norm": 0.04516441002488136, "learning_rate": 0.01, "loss": 1.9405, "step": 97803 }, { "epoch": 10.044777652254288, "grad_norm": 0.12306668609380722, "learning_rate": 0.01, "loss": 1.9387, "step": 97806 }, { "epoch": 10.045085755366129, "grad_norm": 0.03442014381289482, "learning_rate": 0.01, "loss": 1.9431, "step": 97809 }, { "epoch": 10.04539385847797, "grad_norm": 0.0522041916847229, "learning_rate": 0.01, "loss": 1.9424, "step": 97812 }, { "epoch": 10.045701961589812, "grad_norm": 0.04728509485721588, "learning_rate": 0.01, "loss": 1.9598, "step": 97815 }, { "epoch": 10.046010064701653, "grad_norm": 0.04077201709151268, "learning_rate": 0.01, "loss": 1.9489, "step": 97818 }, { "epoch": 10.046318167813494, "grad_norm": 0.05416601523756981, "learning_rate": 0.01, "loss": 1.9551, "step": 97821 }, { "epoch": 10.046626270925337, "grad_norm": 0.07108070701360703, "learning_rate": 0.01, "loss": 1.9467, "step": 97824 }, { "epoch": 10.046934374037178, "grad_norm": 0.047428593039512634, "learning_rate": 0.01, "loss": 1.956, "step": 97827 }, { "epoch": 10.04724247714902, "grad_norm": 0.05475901812314987, "learning_rate": 0.01, "loss": 1.9318, "step": 97830 }, { "epoch": 10.047550580260861, "grad_norm": 0.03730958327651024, "learning_rate": 0.01, "loss": 1.9355, "step": 97833 }, { "epoch": 10.047858683372702, "grad_norm": 0.132392019033432, "learning_rate": 0.01, "loss": 1.947, "step": 97836 }, { "epoch": 10.048166786484543, "grad_norm": 0.11565215140581131, "learning_rate": 0.01, "loss": 1.9263, "step": 97839 }, { "epoch": 10.048474889596385, "grad_norm": 0.11753211915493011, "learning_rate": 0.01, "loss": 1.9386, "step": 97842 }, { "epoch": 10.048782992708226, "grad_norm": 0.08669470250606537, "learning_rate": 0.01, "loss": 1.9528, "step": 97845 }, { "epoch": 10.049091095820067, "grad_norm": 0.08564909547567368, "learning_rate": 0.01, "loss": 1.9469, "step": 97848 }, { "epoch": 10.049399198931908, "grad_norm": 0.06066494807600975, "learning_rate": 0.01, "loss": 1.9612, "step": 97851 }, { "epoch": 10.049707302043752, "grad_norm": 0.03739757835865021, "learning_rate": 0.01, "loss": 1.9339, "step": 97854 }, { "epoch": 10.050015405155593, "grad_norm": 0.0431349016726017, "learning_rate": 0.01, "loss": 1.9254, "step": 97857 }, { "epoch": 10.050323508267434, "grad_norm": 0.10025177896022797, "learning_rate": 0.01, "loss": 1.9665, "step": 97860 }, { "epoch": 10.050631611379275, "grad_norm": 0.08526377379894257, "learning_rate": 0.01, "loss": 1.9522, "step": 97863 }, { "epoch": 10.050939714491117, "grad_norm": 0.07417726516723633, "learning_rate": 0.01, "loss": 1.9512, "step": 97866 }, { "epoch": 10.051247817602958, "grad_norm": 0.04944293573498726, "learning_rate": 0.01, "loss": 1.9795, "step": 97869 }, { "epoch": 10.051555920714799, "grad_norm": 0.045282039791345596, "learning_rate": 0.01, "loss": 1.9224, "step": 97872 }, { "epoch": 10.05186402382664, "grad_norm": 0.04437606409192085, "learning_rate": 0.01, "loss": 1.9217, "step": 97875 }, { "epoch": 10.052172126938482, "grad_norm": 0.08778560161590576, "learning_rate": 0.01, "loss": 1.9347, "step": 97878 }, { "epoch": 10.052480230050323, "grad_norm": 0.08885132521390915, "learning_rate": 0.01, "loss": 1.9645, "step": 97881 }, { "epoch": 10.052788333162164, "grad_norm": 0.11218232661485672, "learning_rate": 0.01, "loss": 1.94, "step": 97884 }, { "epoch": 10.053096436274007, "grad_norm": 0.09743648767471313, "learning_rate": 0.01, "loss": 1.939, "step": 97887 }, { "epoch": 10.053404539385848, "grad_norm": 0.07041345536708832, "learning_rate": 0.01, "loss": 1.9617, "step": 97890 }, { "epoch": 10.05371264249769, "grad_norm": 0.052324507385492325, "learning_rate": 0.01, "loss": 1.9464, "step": 97893 }, { "epoch": 10.054020745609531, "grad_norm": 0.04444773122668266, "learning_rate": 0.01, "loss": 1.9285, "step": 97896 }, { "epoch": 10.054328848721372, "grad_norm": 0.07826077193021774, "learning_rate": 0.01, "loss": 1.941, "step": 97899 }, { "epoch": 10.054636951833213, "grad_norm": 0.03888843208551407, "learning_rate": 0.01, "loss": 1.9703, "step": 97902 }, { "epoch": 10.054945054945055, "grad_norm": 0.04141579940915108, "learning_rate": 0.01, "loss": 1.9754, "step": 97905 }, { "epoch": 10.055253158056896, "grad_norm": 0.039864491671323776, "learning_rate": 0.01, "loss": 1.9436, "step": 97908 }, { "epoch": 10.055561261168737, "grad_norm": 0.13369201123714447, "learning_rate": 0.01, "loss": 1.9511, "step": 97911 }, { "epoch": 10.055869364280579, "grad_norm": 0.053901948034763336, "learning_rate": 0.01, "loss": 1.9841, "step": 97914 }, { "epoch": 10.05617746739242, "grad_norm": 0.06346002221107483, "learning_rate": 0.01, "loss": 1.954, "step": 97917 }, { "epoch": 10.056485570504263, "grad_norm": 0.04997136443853378, "learning_rate": 0.01, "loss": 1.9726, "step": 97920 }, { "epoch": 10.056793673616104, "grad_norm": 0.036513641476631165, "learning_rate": 0.01, "loss": 1.9329, "step": 97923 }, { "epoch": 10.057101776727945, "grad_norm": 0.04684044420719147, "learning_rate": 0.01, "loss": 1.9245, "step": 97926 }, { "epoch": 10.057409879839787, "grad_norm": 0.06806880980730057, "learning_rate": 0.01, "loss": 1.9485, "step": 97929 }, { "epoch": 10.057717982951628, "grad_norm": 0.0641106516122818, "learning_rate": 0.01, "loss": 1.9659, "step": 97932 }, { "epoch": 10.05802608606347, "grad_norm": 0.06563923507928848, "learning_rate": 0.01, "loss": 1.9324, "step": 97935 }, { "epoch": 10.05833418917531, "grad_norm": 0.05737874656915665, "learning_rate": 0.01, "loss": 1.9505, "step": 97938 }, { "epoch": 10.058642292287152, "grad_norm": 0.07749006897211075, "learning_rate": 0.01, "loss": 1.9472, "step": 97941 }, { "epoch": 10.058950395398993, "grad_norm": 0.11980284005403519, "learning_rate": 0.01, "loss": 1.9574, "step": 97944 }, { "epoch": 10.059258498510834, "grad_norm": 0.04357290640473366, "learning_rate": 0.01, "loss": 1.9589, "step": 97947 }, { "epoch": 10.059566601622677, "grad_norm": 0.05592518299818039, "learning_rate": 0.01, "loss": 1.9747, "step": 97950 }, { "epoch": 10.059874704734519, "grad_norm": 0.08463305234909058, "learning_rate": 0.01, "loss": 1.9612, "step": 97953 }, { "epoch": 10.06018280784636, "grad_norm": 0.08054778724908829, "learning_rate": 0.01, "loss": 1.9267, "step": 97956 }, { "epoch": 10.060490910958201, "grad_norm": 0.08148549497127533, "learning_rate": 0.01, "loss": 1.9562, "step": 97959 }, { "epoch": 10.060799014070042, "grad_norm": 0.06928577274084091, "learning_rate": 0.01, "loss": 1.9381, "step": 97962 }, { "epoch": 10.061107117181884, "grad_norm": 0.11390242725610733, "learning_rate": 0.01, "loss": 1.9299, "step": 97965 }, { "epoch": 10.061415220293725, "grad_norm": 0.03531615436077118, "learning_rate": 0.01, "loss": 1.9434, "step": 97968 }, { "epoch": 10.061723323405566, "grad_norm": 0.05216292291879654, "learning_rate": 0.01, "loss": 1.9074, "step": 97971 }, { "epoch": 10.062031426517407, "grad_norm": 0.05882908031344414, "learning_rate": 0.01, "loss": 1.9229, "step": 97974 }, { "epoch": 10.062339529629249, "grad_norm": 0.13236938416957855, "learning_rate": 0.01, "loss": 1.9254, "step": 97977 }, { "epoch": 10.06264763274109, "grad_norm": 0.04366321116685867, "learning_rate": 0.01, "loss": 1.9487, "step": 97980 }, { "epoch": 10.062955735852933, "grad_norm": 0.04574698582291603, "learning_rate": 0.01, "loss": 1.9333, "step": 97983 }, { "epoch": 10.063263838964774, "grad_norm": 0.10057555884122849, "learning_rate": 0.01, "loss": 1.9467, "step": 97986 }, { "epoch": 10.063571942076615, "grad_norm": 0.06712167710065842, "learning_rate": 0.01, "loss": 1.9364, "step": 97989 }, { "epoch": 10.063880045188457, "grad_norm": 0.11221185326576233, "learning_rate": 0.01, "loss": 1.9831, "step": 97992 }, { "epoch": 10.064188148300298, "grad_norm": 0.03754079341888428, "learning_rate": 0.01, "loss": 1.9444, "step": 97995 }, { "epoch": 10.06449625141214, "grad_norm": 0.06335268914699554, "learning_rate": 0.01, "loss": 1.9545, "step": 97998 }, { "epoch": 10.06480435452398, "grad_norm": 0.037058502435684204, "learning_rate": 0.01, "loss": 1.9467, "step": 98001 }, { "epoch": 10.065112457635822, "grad_norm": 0.043899256736040115, "learning_rate": 0.01, "loss": 1.9513, "step": 98004 }, { "epoch": 10.065420560747663, "grad_norm": 0.04954110458493233, "learning_rate": 0.01, "loss": 1.9377, "step": 98007 }, { "epoch": 10.065728663859504, "grad_norm": 0.046918366104364395, "learning_rate": 0.01, "loss": 1.9664, "step": 98010 }, { "epoch": 10.066036766971347, "grad_norm": 0.053783174604177475, "learning_rate": 0.01, "loss": 1.9417, "step": 98013 }, { "epoch": 10.066344870083189, "grad_norm": 0.03612091764807701, "learning_rate": 0.01, "loss": 1.9453, "step": 98016 }, { "epoch": 10.06665297319503, "grad_norm": 0.05525078997015953, "learning_rate": 0.01, "loss": 1.953, "step": 98019 }, { "epoch": 10.066961076306871, "grad_norm": 0.041319839656353, "learning_rate": 0.01, "loss": 1.9682, "step": 98022 }, { "epoch": 10.067269179418712, "grad_norm": 0.13023270666599274, "learning_rate": 0.01, "loss": 1.9297, "step": 98025 }, { "epoch": 10.067577282530554, "grad_norm": 0.14246028661727905, "learning_rate": 0.01, "loss": 1.9593, "step": 98028 }, { "epoch": 10.067885385642395, "grad_norm": 0.08004479110240936, "learning_rate": 0.01, "loss": 1.9445, "step": 98031 }, { "epoch": 10.068193488754236, "grad_norm": 0.06986592710018158, "learning_rate": 0.01, "loss": 1.9267, "step": 98034 }, { "epoch": 10.068501591866077, "grad_norm": 0.08863560855388641, "learning_rate": 0.01, "loss": 1.9585, "step": 98037 }, { "epoch": 10.068809694977919, "grad_norm": 0.0571909137070179, "learning_rate": 0.01, "loss": 1.9243, "step": 98040 }, { "epoch": 10.06911779808976, "grad_norm": 0.043870389461517334, "learning_rate": 0.01, "loss": 1.9307, "step": 98043 }, { "epoch": 10.069425901201603, "grad_norm": 0.04596741124987602, "learning_rate": 0.01, "loss": 1.9486, "step": 98046 }, { "epoch": 10.069734004313444, "grad_norm": 0.07051980495452881, "learning_rate": 0.01, "loss": 1.9327, "step": 98049 }, { "epoch": 10.070042107425285, "grad_norm": 0.11420032382011414, "learning_rate": 0.01, "loss": 1.9422, "step": 98052 }, { "epoch": 10.070350210537127, "grad_norm": 0.08207684010267258, "learning_rate": 0.01, "loss": 1.9739, "step": 98055 }, { "epoch": 10.070658313648968, "grad_norm": 0.10102898627519608, "learning_rate": 0.01, "loss": 1.9435, "step": 98058 }, { "epoch": 10.07096641676081, "grad_norm": 0.09918655455112457, "learning_rate": 0.01, "loss": 1.9366, "step": 98061 }, { "epoch": 10.07127451987265, "grad_norm": 0.061670657247304916, "learning_rate": 0.01, "loss": 1.9493, "step": 98064 }, { "epoch": 10.071582622984492, "grad_norm": 0.047955017536878586, "learning_rate": 0.01, "loss": 1.9512, "step": 98067 }, { "epoch": 10.071890726096333, "grad_norm": 0.040025174617767334, "learning_rate": 0.01, "loss": 1.9551, "step": 98070 }, { "epoch": 10.072198829208174, "grad_norm": 0.09726526588201523, "learning_rate": 0.01, "loss": 1.9384, "step": 98073 }, { "epoch": 10.072506932320016, "grad_norm": 0.043067190796136856, "learning_rate": 0.01, "loss": 1.9414, "step": 98076 }, { "epoch": 10.072815035431859, "grad_norm": 0.10681457817554474, "learning_rate": 0.01, "loss": 1.9803, "step": 98079 }, { "epoch": 10.0731231385437, "grad_norm": 0.08581650257110596, "learning_rate": 0.01, "loss": 1.9336, "step": 98082 }, { "epoch": 10.073431241655541, "grad_norm": 0.08076117932796478, "learning_rate": 0.01, "loss": 1.9505, "step": 98085 }, { "epoch": 10.073739344767382, "grad_norm": 0.04616018384695053, "learning_rate": 0.01, "loss": 1.9334, "step": 98088 }, { "epoch": 10.074047447879224, "grad_norm": 0.03525041416287422, "learning_rate": 0.01, "loss": 1.9569, "step": 98091 }, { "epoch": 10.074355550991065, "grad_norm": 0.05245237797498703, "learning_rate": 0.01, "loss": 1.9634, "step": 98094 }, { "epoch": 10.074663654102906, "grad_norm": 0.09995900094509125, "learning_rate": 0.01, "loss": 1.9392, "step": 98097 }, { "epoch": 10.074971757214747, "grad_norm": 0.06834854930639267, "learning_rate": 0.01, "loss": 1.9658, "step": 98100 }, { "epoch": 10.075279860326589, "grad_norm": 0.10982584208250046, "learning_rate": 0.01, "loss": 1.9452, "step": 98103 }, { "epoch": 10.07558796343843, "grad_norm": 0.07823304831981659, "learning_rate": 0.01, "loss": 1.9024, "step": 98106 }, { "epoch": 10.075896066550273, "grad_norm": 0.050893526524305344, "learning_rate": 0.01, "loss": 1.9372, "step": 98109 }, { "epoch": 10.076204169662114, "grad_norm": 0.05059904605150223, "learning_rate": 0.01, "loss": 1.9547, "step": 98112 }, { "epoch": 10.076512272773956, "grad_norm": 0.039330050349235535, "learning_rate": 0.01, "loss": 1.9547, "step": 98115 }, { "epoch": 10.076820375885797, "grad_norm": 0.03900914266705513, "learning_rate": 0.01, "loss": 1.9221, "step": 98118 }, { "epoch": 10.077128478997638, "grad_norm": 0.0831712931394577, "learning_rate": 0.01, "loss": 1.9166, "step": 98121 }, { "epoch": 10.07743658210948, "grad_norm": 0.06024106591939926, "learning_rate": 0.01, "loss": 1.9345, "step": 98124 }, { "epoch": 10.07774468522132, "grad_norm": 0.04956630989909172, "learning_rate": 0.01, "loss": 1.9242, "step": 98127 }, { "epoch": 10.078052788333162, "grad_norm": 0.04718092456459999, "learning_rate": 0.01, "loss": 1.9119, "step": 98130 }, { "epoch": 10.078360891445003, "grad_norm": 0.04898585006594658, "learning_rate": 0.01, "loss": 1.9565, "step": 98133 }, { "epoch": 10.078668994556844, "grad_norm": 0.06265314668416977, "learning_rate": 0.01, "loss": 1.9684, "step": 98136 }, { "epoch": 10.078977097668686, "grad_norm": 0.033283136785030365, "learning_rate": 0.01, "loss": 1.9545, "step": 98139 }, { "epoch": 10.079285200780529, "grad_norm": 0.08072887361049652, "learning_rate": 0.01, "loss": 1.9623, "step": 98142 }, { "epoch": 10.07959330389237, "grad_norm": 0.07667970657348633, "learning_rate": 0.01, "loss": 1.9441, "step": 98145 }, { "epoch": 10.079901407004211, "grad_norm": 0.0799844041466713, "learning_rate": 0.01, "loss": 1.9368, "step": 98148 }, { "epoch": 10.080209510116052, "grad_norm": 0.05267045274376869, "learning_rate": 0.01, "loss": 1.9505, "step": 98151 }, { "epoch": 10.080517613227894, "grad_norm": 0.09652364999055862, "learning_rate": 0.01, "loss": 1.9595, "step": 98154 }, { "epoch": 10.080825716339735, "grad_norm": 0.1304524838924408, "learning_rate": 0.01, "loss": 1.9613, "step": 98157 }, { "epoch": 10.081133819451576, "grad_norm": 0.10048730671405792, "learning_rate": 0.01, "loss": 1.9429, "step": 98160 }, { "epoch": 10.081441922563418, "grad_norm": 0.07810193300247192, "learning_rate": 0.01, "loss": 1.9454, "step": 98163 }, { "epoch": 10.081750025675259, "grad_norm": 0.06694421172142029, "learning_rate": 0.01, "loss": 1.944, "step": 98166 }, { "epoch": 10.0820581287871, "grad_norm": 0.056690763682127, "learning_rate": 0.01, "loss": 1.9797, "step": 98169 }, { "epoch": 10.082366231898941, "grad_norm": 0.04931468889117241, "learning_rate": 0.01, "loss": 1.9644, "step": 98172 }, { "epoch": 10.082674335010784, "grad_norm": 0.057076919823884964, "learning_rate": 0.01, "loss": 1.9645, "step": 98175 }, { "epoch": 10.082982438122626, "grad_norm": 0.0563746877014637, "learning_rate": 0.01, "loss": 1.9328, "step": 98178 }, { "epoch": 10.083290541234467, "grad_norm": 0.05300070717930794, "learning_rate": 0.01, "loss": 1.9281, "step": 98181 }, { "epoch": 10.083598644346308, "grad_norm": 0.0513119101524353, "learning_rate": 0.01, "loss": 1.9403, "step": 98184 }, { "epoch": 10.08390674745815, "grad_norm": 0.05145730450749397, "learning_rate": 0.01, "loss": 1.93, "step": 98187 }, { "epoch": 10.08421485056999, "grad_norm": 0.05403425544500351, "learning_rate": 0.01, "loss": 1.9465, "step": 98190 }, { "epoch": 10.084522953681832, "grad_norm": 0.06168578565120697, "learning_rate": 0.01, "loss": 1.9635, "step": 98193 }, { "epoch": 10.084831056793673, "grad_norm": 0.09515652805566788, "learning_rate": 0.01, "loss": 1.9401, "step": 98196 }, { "epoch": 10.085139159905514, "grad_norm": 0.056796614080667496, "learning_rate": 0.01, "loss": 1.9515, "step": 98199 }, { "epoch": 10.085447263017356, "grad_norm": 0.04301804304122925, "learning_rate": 0.01, "loss": 1.9589, "step": 98202 }, { "epoch": 10.085755366129199, "grad_norm": 0.0407172255218029, "learning_rate": 0.01, "loss": 1.9502, "step": 98205 }, { "epoch": 10.08606346924104, "grad_norm": 0.03882700577378273, "learning_rate": 0.01, "loss": 1.9438, "step": 98208 }, { "epoch": 10.086371572352881, "grad_norm": 0.055570971220731735, "learning_rate": 0.01, "loss": 1.9507, "step": 98211 }, { "epoch": 10.086679675464723, "grad_norm": 0.09834793210029602, "learning_rate": 0.01, "loss": 1.9267, "step": 98214 }, { "epoch": 10.086987778576564, "grad_norm": 0.09147577732801437, "learning_rate": 0.01, "loss": 1.9367, "step": 98217 }, { "epoch": 10.087295881688405, "grad_norm": 0.08858241885900497, "learning_rate": 0.01, "loss": 1.9426, "step": 98220 }, { "epoch": 10.087603984800246, "grad_norm": 0.07103687524795532, "learning_rate": 0.01, "loss": 1.9384, "step": 98223 }, { "epoch": 10.087912087912088, "grad_norm": 0.05130713805556297, "learning_rate": 0.01, "loss": 1.9273, "step": 98226 }, { "epoch": 10.088220191023929, "grad_norm": 0.0382094606757164, "learning_rate": 0.01, "loss": 1.9456, "step": 98229 }, { "epoch": 10.08852829413577, "grad_norm": 0.048191484063863754, "learning_rate": 0.01, "loss": 1.9255, "step": 98232 }, { "epoch": 10.088836397247611, "grad_norm": 0.0679757371544838, "learning_rate": 0.01, "loss": 1.9177, "step": 98235 }, { "epoch": 10.089144500359454, "grad_norm": 0.0679628774523735, "learning_rate": 0.01, "loss": 1.9313, "step": 98238 }, { "epoch": 10.089452603471296, "grad_norm": 0.11466601490974426, "learning_rate": 0.01, "loss": 1.9374, "step": 98241 }, { "epoch": 10.089760706583137, "grad_norm": 0.05466683208942413, "learning_rate": 0.01, "loss": 1.9412, "step": 98244 }, { "epoch": 10.090068809694978, "grad_norm": 0.10592523962259293, "learning_rate": 0.01, "loss": 1.9575, "step": 98247 }, { "epoch": 10.09037691280682, "grad_norm": 0.0421140193939209, "learning_rate": 0.01, "loss": 1.9238, "step": 98250 }, { "epoch": 10.09068501591866, "grad_norm": 0.0407244972884655, "learning_rate": 0.01, "loss": 1.9266, "step": 98253 }, { "epoch": 10.090993119030502, "grad_norm": 0.11097019910812378, "learning_rate": 0.01, "loss": 1.9341, "step": 98256 }, { "epoch": 10.091301222142343, "grad_norm": 0.043895840644836426, "learning_rate": 0.01, "loss": 1.9416, "step": 98259 }, { "epoch": 10.091609325254185, "grad_norm": 0.04927389323711395, "learning_rate": 0.01, "loss": 1.9226, "step": 98262 }, { "epoch": 10.091917428366026, "grad_norm": 0.04536551982164383, "learning_rate": 0.01, "loss": 1.9373, "step": 98265 }, { "epoch": 10.092225531477867, "grad_norm": 0.02990969829261303, "learning_rate": 0.01, "loss": 1.9348, "step": 98268 }, { "epoch": 10.09253363458971, "grad_norm": 0.04231426119804382, "learning_rate": 0.01, "loss": 1.9389, "step": 98271 }, { "epoch": 10.092841737701551, "grad_norm": 0.10033588856458664, "learning_rate": 0.01, "loss": 1.9398, "step": 98274 }, { "epoch": 10.093149840813393, "grad_norm": 0.06602507084608078, "learning_rate": 0.01, "loss": 1.9516, "step": 98277 }, { "epoch": 10.093457943925234, "grad_norm": 0.10166456550359726, "learning_rate": 0.01, "loss": 1.9227, "step": 98280 }, { "epoch": 10.093766047037075, "grad_norm": 0.055442363023757935, "learning_rate": 0.01, "loss": 1.9539, "step": 98283 }, { "epoch": 10.094074150148916, "grad_norm": 0.09489361941814423, "learning_rate": 0.01, "loss": 1.927, "step": 98286 }, { "epoch": 10.094382253260758, "grad_norm": 0.10719319432973862, "learning_rate": 0.01, "loss": 1.9298, "step": 98289 }, { "epoch": 10.094690356372599, "grad_norm": 0.04897269234061241, "learning_rate": 0.01, "loss": 1.9417, "step": 98292 }, { "epoch": 10.09499845948444, "grad_norm": 0.17686758935451508, "learning_rate": 0.01, "loss": 1.9443, "step": 98295 }, { "epoch": 10.095306562596281, "grad_norm": 0.1403108537197113, "learning_rate": 0.01, "loss": 1.9176, "step": 98298 }, { "epoch": 10.095614665708124, "grad_norm": 0.09026072919368744, "learning_rate": 0.01, "loss": 1.9527, "step": 98301 }, { "epoch": 10.095922768819966, "grad_norm": 0.029938554391264915, "learning_rate": 0.01, "loss": 1.9403, "step": 98304 }, { "epoch": 10.096230871931807, "grad_norm": 0.03506121039390564, "learning_rate": 0.01, "loss": 1.9344, "step": 98307 }, { "epoch": 10.096538975043648, "grad_norm": 0.058797042816877365, "learning_rate": 0.01, "loss": 1.9358, "step": 98310 }, { "epoch": 10.09684707815549, "grad_norm": 0.04589388146996498, "learning_rate": 0.01, "loss": 1.9469, "step": 98313 }, { "epoch": 10.09715518126733, "grad_norm": 0.04293623939156532, "learning_rate": 0.01, "loss": 1.9236, "step": 98316 }, { "epoch": 10.097463284379172, "grad_norm": 0.04917249456048012, "learning_rate": 0.01, "loss": 1.9275, "step": 98319 }, { "epoch": 10.097771387491013, "grad_norm": 0.0648779645562172, "learning_rate": 0.01, "loss": 1.9293, "step": 98322 }, { "epoch": 10.098079490602855, "grad_norm": 0.08563850075006485, "learning_rate": 0.01, "loss": 1.9286, "step": 98325 }, { "epoch": 10.098387593714696, "grad_norm": 0.06887205690145493, "learning_rate": 0.01, "loss": 1.9433, "step": 98328 }, { "epoch": 10.098695696826537, "grad_norm": 0.047499123960733414, "learning_rate": 0.01, "loss": 1.9324, "step": 98331 }, { "epoch": 10.09900379993838, "grad_norm": 0.13514503836631775, "learning_rate": 0.01, "loss": 1.9521, "step": 98334 }, { "epoch": 10.099311903050221, "grad_norm": 0.04670017212629318, "learning_rate": 0.01, "loss": 1.9554, "step": 98337 }, { "epoch": 10.099620006162063, "grad_norm": 0.10404140502214432, "learning_rate": 0.01, "loss": 1.9405, "step": 98340 }, { "epoch": 10.099928109273904, "grad_norm": 0.05887841805815697, "learning_rate": 0.01, "loss": 1.9193, "step": 98343 }, { "epoch": 10.100236212385745, "grad_norm": 0.0849578008055687, "learning_rate": 0.01, "loss": 1.9671, "step": 98346 }, { "epoch": 10.100544315497586, "grad_norm": 0.11289205402135849, "learning_rate": 0.01, "loss": 1.9217, "step": 98349 }, { "epoch": 10.100852418609428, "grad_norm": 0.03669322282075882, "learning_rate": 0.01, "loss": 1.9452, "step": 98352 }, { "epoch": 10.101160521721269, "grad_norm": 0.04403113201260567, "learning_rate": 0.01, "loss": 1.9242, "step": 98355 }, { "epoch": 10.10146862483311, "grad_norm": 0.07399263978004456, "learning_rate": 0.01, "loss": 1.9507, "step": 98358 }, { "epoch": 10.101776727944952, "grad_norm": 0.03815837949514389, "learning_rate": 0.01, "loss": 1.9475, "step": 98361 }, { "epoch": 10.102084831056793, "grad_norm": 0.043647266924381256, "learning_rate": 0.01, "loss": 1.9664, "step": 98364 }, { "epoch": 10.102392934168636, "grad_norm": 0.052343569695949554, "learning_rate": 0.01, "loss": 1.9379, "step": 98367 }, { "epoch": 10.102701037280477, "grad_norm": 0.043194808065891266, "learning_rate": 0.01, "loss": 1.9586, "step": 98370 }, { "epoch": 10.103009140392318, "grad_norm": 0.13488149642944336, "learning_rate": 0.01, "loss": 1.9571, "step": 98373 }, { "epoch": 10.10331724350416, "grad_norm": 0.04257534071803093, "learning_rate": 0.01, "loss": 1.9127, "step": 98376 }, { "epoch": 10.103625346616, "grad_norm": 0.0840926319360733, "learning_rate": 0.01, "loss": 1.9133, "step": 98379 }, { "epoch": 10.103933449727842, "grad_norm": 0.07795858383178711, "learning_rate": 0.01, "loss": 1.9506, "step": 98382 }, { "epoch": 10.104241552839683, "grad_norm": 0.0341707319021225, "learning_rate": 0.01, "loss": 1.9405, "step": 98385 }, { "epoch": 10.104549655951525, "grad_norm": 0.05060354247689247, "learning_rate": 0.01, "loss": 1.954, "step": 98388 }, { "epoch": 10.104857759063366, "grad_norm": 0.06552443653345108, "learning_rate": 0.01, "loss": 1.9518, "step": 98391 }, { "epoch": 10.105165862175207, "grad_norm": 0.05386805906891823, "learning_rate": 0.01, "loss": 1.9308, "step": 98394 }, { "epoch": 10.10547396528705, "grad_norm": 0.046048302203416824, "learning_rate": 0.01, "loss": 1.947, "step": 98397 }, { "epoch": 10.105782068398891, "grad_norm": 0.039221879094839096, "learning_rate": 0.01, "loss": 1.9381, "step": 98400 }, { "epoch": 10.106090171510733, "grad_norm": 0.04174118861556053, "learning_rate": 0.01, "loss": 1.9534, "step": 98403 }, { "epoch": 10.106398274622574, "grad_norm": 0.06665545701980591, "learning_rate": 0.01, "loss": 1.9379, "step": 98406 }, { "epoch": 10.106706377734415, "grad_norm": 0.12490153312683105, "learning_rate": 0.01, "loss": 1.9286, "step": 98409 }, { "epoch": 10.107014480846257, "grad_norm": 0.09812960773706436, "learning_rate": 0.01, "loss": 1.9622, "step": 98412 }, { "epoch": 10.107322583958098, "grad_norm": 0.07532085478305817, "learning_rate": 0.01, "loss": 1.9346, "step": 98415 }, { "epoch": 10.107630687069939, "grad_norm": 0.06836399435997009, "learning_rate": 0.01, "loss": 1.9661, "step": 98418 }, { "epoch": 10.10793879018178, "grad_norm": 0.045399703085422516, "learning_rate": 0.01, "loss": 1.9451, "step": 98421 }, { "epoch": 10.108246893293622, "grad_norm": 0.04112372174859047, "learning_rate": 0.01, "loss": 1.9588, "step": 98424 }, { "epoch": 10.108554996405463, "grad_norm": 0.07263118773698807, "learning_rate": 0.01, "loss": 1.947, "step": 98427 }, { "epoch": 10.108863099517306, "grad_norm": 0.0632065162062645, "learning_rate": 0.01, "loss": 1.9549, "step": 98430 }, { "epoch": 10.109171202629147, "grad_norm": 0.11991485953330994, "learning_rate": 0.01, "loss": 1.9335, "step": 98433 }, { "epoch": 10.109479305740988, "grad_norm": 0.06707876920700073, "learning_rate": 0.01, "loss": 1.9222, "step": 98436 }, { "epoch": 10.10978740885283, "grad_norm": 0.06976279616355896, "learning_rate": 0.01, "loss": 1.9432, "step": 98439 }, { "epoch": 10.110095511964671, "grad_norm": 0.05575570464134216, "learning_rate": 0.01, "loss": 1.9411, "step": 98442 }, { "epoch": 10.110403615076512, "grad_norm": 0.03482991084456444, "learning_rate": 0.01, "loss": 1.9751, "step": 98445 }, { "epoch": 10.110711718188353, "grad_norm": 0.09989995509386063, "learning_rate": 0.01, "loss": 1.9363, "step": 98448 }, { "epoch": 10.111019821300195, "grad_norm": 0.08258899301290512, "learning_rate": 0.01, "loss": 1.9419, "step": 98451 }, { "epoch": 10.111327924412036, "grad_norm": 0.0769890621304512, "learning_rate": 0.01, "loss": 1.9535, "step": 98454 }, { "epoch": 10.111636027523877, "grad_norm": 0.13591155409812927, "learning_rate": 0.01, "loss": 1.9362, "step": 98457 }, { "epoch": 10.11194413063572, "grad_norm": 0.051655545830726624, "learning_rate": 0.01, "loss": 1.9274, "step": 98460 }, { "epoch": 10.112252233747562, "grad_norm": 0.08032787591218948, "learning_rate": 0.01, "loss": 1.9304, "step": 98463 }, { "epoch": 10.112560336859403, "grad_norm": 0.04594959691166878, "learning_rate": 0.01, "loss": 1.9283, "step": 98466 }, { "epoch": 10.112868439971244, "grad_norm": 0.03724440559744835, "learning_rate": 0.01, "loss": 1.9406, "step": 98469 }, { "epoch": 10.113176543083085, "grad_norm": 0.04428113251924515, "learning_rate": 0.01, "loss": 1.9439, "step": 98472 }, { "epoch": 10.113484646194927, "grad_norm": 0.11865317076444626, "learning_rate": 0.01, "loss": 1.9539, "step": 98475 }, { "epoch": 10.113792749306768, "grad_norm": 0.07116881012916565, "learning_rate": 0.01, "loss": 1.9339, "step": 98478 }, { "epoch": 10.11410085241861, "grad_norm": 0.07484801113605499, "learning_rate": 0.01, "loss": 1.9263, "step": 98481 }, { "epoch": 10.11440895553045, "grad_norm": 0.1013493537902832, "learning_rate": 0.01, "loss": 1.9541, "step": 98484 }, { "epoch": 10.114717058642292, "grad_norm": 0.06404826790094376, "learning_rate": 0.01, "loss": 1.9493, "step": 98487 }, { "epoch": 10.115025161754133, "grad_norm": 0.05708220228552818, "learning_rate": 0.01, "loss": 1.9365, "step": 98490 }, { "epoch": 10.115333264865976, "grad_norm": 0.05591821298003197, "learning_rate": 0.01, "loss": 1.9672, "step": 98493 }, { "epoch": 10.115641367977817, "grad_norm": 0.06925122439861298, "learning_rate": 0.01, "loss": 1.9417, "step": 98496 }, { "epoch": 10.115949471089658, "grad_norm": 0.0722322165966034, "learning_rate": 0.01, "loss": 1.9438, "step": 98499 }, { "epoch": 10.1162575742015, "grad_norm": 0.07561221718788147, "learning_rate": 0.01, "loss": 1.9188, "step": 98502 }, { "epoch": 10.116565677313341, "grad_norm": 0.04920943081378937, "learning_rate": 0.01, "loss": 1.9226, "step": 98505 }, { "epoch": 10.116873780425182, "grad_norm": 0.03715353459119797, "learning_rate": 0.01, "loss": 1.9362, "step": 98508 }, { "epoch": 10.117181883537024, "grad_norm": 0.046310905367136, "learning_rate": 0.01, "loss": 1.9392, "step": 98511 }, { "epoch": 10.117489986648865, "grad_norm": 0.1061708852648735, "learning_rate": 0.01, "loss": 1.926, "step": 98514 }, { "epoch": 10.117798089760706, "grad_norm": 0.04743969440460205, "learning_rate": 0.01, "loss": 1.941, "step": 98517 }, { "epoch": 10.118106192872547, "grad_norm": 0.10688664019107819, "learning_rate": 0.01, "loss": 1.9529, "step": 98520 }, { "epoch": 10.118414295984389, "grad_norm": 0.06978907436132431, "learning_rate": 0.01, "loss": 1.9209, "step": 98523 }, { "epoch": 10.118722399096232, "grad_norm": 0.05398084223270416, "learning_rate": 0.01, "loss": 1.9301, "step": 98526 }, { "epoch": 10.119030502208073, "grad_norm": 0.05511185899376869, "learning_rate": 0.01, "loss": 1.9406, "step": 98529 }, { "epoch": 10.119338605319914, "grad_norm": 0.03953128308057785, "learning_rate": 0.01, "loss": 1.9378, "step": 98532 }, { "epoch": 10.119646708431755, "grad_norm": 0.04132496565580368, "learning_rate": 0.01, "loss": 1.9479, "step": 98535 }, { "epoch": 10.119954811543597, "grad_norm": 0.06378614157438278, "learning_rate": 0.01, "loss": 1.9549, "step": 98538 }, { "epoch": 10.120262914655438, "grad_norm": 0.08414656668901443, "learning_rate": 0.01, "loss": 1.9437, "step": 98541 }, { "epoch": 10.12057101776728, "grad_norm": 0.04202824458479881, "learning_rate": 0.01, "loss": 1.9518, "step": 98544 }, { "epoch": 10.12087912087912, "grad_norm": 0.03575130179524422, "learning_rate": 0.01, "loss": 1.9295, "step": 98547 }, { "epoch": 10.121187223990962, "grad_norm": 0.1546347439289093, "learning_rate": 0.01, "loss": 1.9496, "step": 98550 }, { "epoch": 10.121495327102803, "grad_norm": 0.04151654988527298, "learning_rate": 0.01, "loss": 1.9239, "step": 98553 }, { "epoch": 10.121803430214646, "grad_norm": 0.0792761966586113, "learning_rate": 0.01, "loss": 1.9222, "step": 98556 }, { "epoch": 10.122111533326487, "grad_norm": 0.06739489734172821, "learning_rate": 0.01, "loss": 1.9368, "step": 98559 }, { "epoch": 10.122419636438329, "grad_norm": 0.10332737863063812, "learning_rate": 0.01, "loss": 1.9463, "step": 98562 }, { "epoch": 10.12272773955017, "grad_norm": 0.14671210944652557, "learning_rate": 0.01, "loss": 1.9197, "step": 98565 }, { "epoch": 10.123035842662011, "grad_norm": 0.09036523848772049, "learning_rate": 0.01, "loss": 1.9477, "step": 98568 }, { "epoch": 10.123343945773852, "grad_norm": 0.11435233056545258, "learning_rate": 0.01, "loss": 1.9327, "step": 98571 }, { "epoch": 10.123652048885694, "grad_norm": 0.0682324543595314, "learning_rate": 0.01, "loss": 1.9337, "step": 98574 }, { "epoch": 10.123960151997535, "grad_norm": 0.05735497549176216, "learning_rate": 0.01, "loss": 1.944, "step": 98577 }, { "epoch": 10.124268255109376, "grad_norm": 0.06683596223592758, "learning_rate": 0.01, "loss": 1.9595, "step": 98580 }, { "epoch": 10.124576358221217, "grad_norm": 0.059495940804481506, "learning_rate": 0.01, "loss": 1.9565, "step": 98583 }, { "epoch": 10.124884461333059, "grad_norm": 0.03599031642079353, "learning_rate": 0.01, "loss": 1.9488, "step": 98586 }, { "epoch": 10.125192564444902, "grad_norm": 0.03614237904548645, "learning_rate": 0.01, "loss": 1.9154, "step": 98589 }, { "epoch": 10.125500667556743, "grad_norm": 0.0915595218539238, "learning_rate": 0.01, "loss": 1.9375, "step": 98592 }, { "epoch": 10.125808770668584, "grad_norm": 0.08191665261983871, "learning_rate": 0.01, "loss": 1.918, "step": 98595 }, { "epoch": 10.126116873780425, "grad_norm": 0.08690435439348221, "learning_rate": 0.01, "loss": 1.944, "step": 98598 }, { "epoch": 10.126424976892267, "grad_norm": 0.08599332720041275, "learning_rate": 0.01, "loss": 1.9347, "step": 98601 }, { "epoch": 10.126733080004108, "grad_norm": 0.08404678106307983, "learning_rate": 0.01, "loss": 1.9395, "step": 98604 }, { "epoch": 10.12704118311595, "grad_norm": 0.07725178450345993, "learning_rate": 0.01, "loss": 1.9238, "step": 98607 }, { "epoch": 10.12734928622779, "grad_norm": 0.08164679259061813, "learning_rate": 0.01, "loss": 1.9222, "step": 98610 }, { "epoch": 10.127657389339632, "grad_norm": 0.056525520980358124, "learning_rate": 0.01, "loss": 1.9434, "step": 98613 }, { "epoch": 10.127965492451473, "grad_norm": 0.03929056599736214, "learning_rate": 0.01, "loss": 1.9368, "step": 98616 }, { "epoch": 10.128273595563314, "grad_norm": 0.0967596173286438, "learning_rate": 0.01, "loss": 1.9423, "step": 98619 }, { "epoch": 10.128581698675157, "grad_norm": 0.044262755662202835, "learning_rate": 0.01, "loss": 1.9359, "step": 98622 }, { "epoch": 10.128889801786999, "grad_norm": 0.06301670521497726, "learning_rate": 0.01, "loss": 1.9547, "step": 98625 }, { "epoch": 10.12919790489884, "grad_norm": 0.05127216875553131, "learning_rate": 0.01, "loss": 1.9424, "step": 98628 }, { "epoch": 10.129506008010681, "grad_norm": 0.09189940243959427, "learning_rate": 0.01, "loss": 1.9448, "step": 98631 }, { "epoch": 10.129814111122522, "grad_norm": 0.041789088398218155, "learning_rate": 0.01, "loss": 1.9151, "step": 98634 }, { "epoch": 10.130122214234364, "grad_norm": 0.10127456486225128, "learning_rate": 0.01, "loss": 1.9451, "step": 98637 }, { "epoch": 10.130430317346205, "grad_norm": 0.043131157755851746, "learning_rate": 0.01, "loss": 1.9626, "step": 98640 }, { "epoch": 10.130738420458046, "grad_norm": 0.12438475340604782, "learning_rate": 0.01, "loss": 1.9249, "step": 98643 }, { "epoch": 10.131046523569887, "grad_norm": 0.04802525416016579, "learning_rate": 0.01, "loss": 1.975, "step": 98646 }, { "epoch": 10.131354626681729, "grad_norm": 0.0507151298224926, "learning_rate": 0.01, "loss": 1.9524, "step": 98649 }, { "epoch": 10.131662729793572, "grad_norm": 0.086209736764431, "learning_rate": 0.01, "loss": 1.9291, "step": 98652 }, { "epoch": 10.131970832905413, "grad_norm": 0.060542210936546326, "learning_rate": 0.01, "loss": 1.9522, "step": 98655 }, { "epoch": 10.132278936017254, "grad_norm": 0.04687495529651642, "learning_rate": 0.01, "loss": 1.9314, "step": 98658 }, { "epoch": 10.132587039129096, "grad_norm": 0.05478186532855034, "learning_rate": 0.01, "loss": 1.9312, "step": 98661 }, { "epoch": 10.132895142240937, "grad_norm": 0.0812118798494339, "learning_rate": 0.01, "loss": 1.9536, "step": 98664 }, { "epoch": 10.133203245352778, "grad_norm": 0.06534958630800247, "learning_rate": 0.01, "loss": 1.9384, "step": 98667 }, { "epoch": 10.13351134846462, "grad_norm": 0.11175373941659927, "learning_rate": 0.01, "loss": 1.9513, "step": 98670 }, { "epoch": 10.13381945157646, "grad_norm": 0.03280024975538254, "learning_rate": 0.01, "loss": 1.9059, "step": 98673 }, { "epoch": 10.134127554688302, "grad_norm": 0.11052536219358444, "learning_rate": 0.01, "loss": 1.9385, "step": 98676 }, { "epoch": 10.134435657800143, "grad_norm": 0.07362852990627289, "learning_rate": 0.01, "loss": 1.9453, "step": 98679 }, { "epoch": 10.134743760911984, "grad_norm": 0.061130788177251816, "learning_rate": 0.01, "loss": 1.9405, "step": 98682 }, { "epoch": 10.135051864023827, "grad_norm": 0.0477757602930069, "learning_rate": 0.01, "loss": 1.951, "step": 98685 }, { "epoch": 10.135359967135669, "grad_norm": 0.03536587953567505, "learning_rate": 0.01, "loss": 1.9329, "step": 98688 }, { "epoch": 10.13566807024751, "grad_norm": 0.04630674049258232, "learning_rate": 0.01, "loss": 1.9356, "step": 98691 }, { "epoch": 10.135976173359351, "grad_norm": 0.051892686635255814, "learning_rate": 0.01, "loss": 1.9326, "step": 98694 }, { "epoch": 10.136284276471192, "grad_norm": 0.11422517150640488, "learning_rate": 0.01, "loss": 1.9343, "step": 98697 }, { "epoch": 10.136592379583034, "grad_norm": 0.12365932762622833, "learning_rate": 0.01, "loss": 1.9463, "step": 98700 }, { "epoch": 10.136900482694875, "grad_norm": 0.03768802434206009, "learning_rate": 0.01, "loss": 1.9398, "step": 98703 }, { "epoch": 10.137208585806716, "grad_norm": 0.07217808812856674, "learning_rate": 0.01, "loss": 1.9378, "step": 98706 }, { "epoch": 10.137516688918558, "grad_norm": 0.10465411841869354, "learning_rate": 0.01, "loss": 1.962, "step": 98709 }, { "epoch": 10.137824792030399, "grad_norm": 0.03864384815096855, "learning_rate": 0.01, "loss": 1.9295, "step": 98712 }, { "epoch": 10.138132895142242, "grad_norm": 0.051542460918426514, "learning_rate": 0.01, "loss": 1.933, "step": 98715 }, { "epoch": 10.138440998254083, "grad_norm": 0.04962679743766785, "learning_rate": 0.01, "loss": 1.9485, "step": 98718 }, { "epoch": 10.138749101365924, "grad_norm": 0.03136231377720833, "learning_rate": 0.01, "loss": 1.922, "step": 98721 }, { "epoch": 10.139057204477766, "grad_norm": 0.0425989143550396, "learning_rate": 0.01, "loss": 1.9337, "step": 98724 }, { "epoch": 10.139365307589607, "grad_norm": 0.11813602596521378, "learning_rate": 0.01, "loss": 1.9264, "step": 98727 }, { "epoch": 10.139673410701448, "grad_norm": 0.04321186617016792, "learning_rate": 0.01, "loss": 1.9432, "step": 98730 }, { "epoch": 10.13998151381329, "grad_norm": 0.06261945515871048, "learning_rate": 0.01, "loss": 1.9252, "step": 98733 }, { "epoch": 10.14028961692513, "grad_norm": 0.04242541640996933, "learning_rate": 0.01, "loss": 1.918, "step": 98736 }, { "epoch": 10.140597720036972, "grad_norm": 0.05050935596227646, "learning_rate": 0.01, "loss": 1.9086, "step": 98739 }, { "epoch": 10.140905823148813, "grad_norm": 0.11892428994178772, "learning_rate": 0.01, "loss": 1.9559, "step": 98742 }, { "epoch": 10.141213926260654, "grad_norm": 0.1320609301328659, "learning_rate": 0.01, "loss": 1.965, "step": 98745 }, { "epoch": 10.141522029372497, "grad_norm": 0.08089153468608856, "learning_rate": 0.01, "loss": 1.9276, "step": 98748 }, { "epoch": 10.141830132484339, "grad_norm": 0.08700688183307648, "learning_rate": 0.01, "loss": 1.9472, "step": 98751 }, { "epoch": 10.14213823559618, "grad_norm": 0.14676186442375183, "learning_rate": 0.01, "loss": 1.9382, "step": 98754 }, { "epoch": 10.142446338708021, "grad_norm": 0.11584928631782532, "learning_rate": 0.01, "loss": 1.9322, "step": 98757 }, { "epoch": 10.142754441819863, "grad_norm": 0.09546506404876709, "learning_rate": 0.01, "loss": 1.9462, "step": 98760 }, { "epoch": 10.143062544931704, "grad_norm": 0.04285535216331482, "learning_rate": 0.01, "loss": 1.9408, "step": 98763 }, { "epoch": 10.143370648043545, "grad_norm": 0.045634932816028595, "learning_rate": 0.01, "loss": 1.9637, "step": 98766 }, { "epoch": 10.143678751155386, "grad_norm": 0.05358066409826279, "learning_rate": 0.01, "loss": 1.9561, "step": 98769 }, { "epoch": 10.143986854267228, "grad_norm": 0.05766141787171364, "learning_rate": 0.01, "loss": 1.9602, "step": 98772 }, { "epoch": 10.144294957379069, "grad_norm": 0.07127935439348221, "learning_rate": 0.01, "loss": 1.932, "step": 98775 }, { "epoch": 10.14460306049091, "grad_norm": 0.05239563062787056, "learning_rate": 0.01, "loss": 1.9591, "step": 98778 }, { "epoch": 10.144911163602753, "grad_norm": 0.06676071882247925, "learning_rate": 0.01, "loss": 1.9703, "step": 98781 }, { "epoch": 10.145219266714594, "grad_norm": 0.06993058323860168, "learning_rate": 0.01, "loss": 1.9377, "step": 98784 }, { "epoch": 10.145527369826436, "grad_norm": 0.09807359427213669, "learning_rate": 0.01, "loss": 1.938, "step": 98787 }, { "epoch": 10.145835472938277, "grad_norm": 0.03647478297352791, "learning_rate": 0.01, "loss": 1.9014, "step": 98790 }, { "epoch": 10.146143576050118, "grad_norm": 0.05585514008998871, "learning_rate": 0.01, "loss": 1.9571, "step": 98793 }, { "epoch": 10.14645167916196, "grad_norm": 0.06017756089568138, "learning_rate": 0.01, "loss": 1.9519, "step": 98796 }, { "epoch": 10.1467597822738, "grad_norm": 0.052070360630750656, "learning_rate": 0.01, "loss": 1.9354, "step": 98799 }, { "epoch": 10.147067885385642, "grad_norm": 0.0319511704146862, "learning_rate": 0.01, "loss": 1.9255, "step": 98802 }, { "epoch": 10.147375988497483, "grad_norm": 0.03699100762605667, "learning_rate": 0.01, "loss": 1.9632, "step": 98805 }, { "epoch": 10.147684091609324, "grad_norm": 0.1164286732673645, "learning_rate": 0.01, "loss": 1.9513, "step": 98808 }, { "epoch": 10.147992194721168, "grad_norm": 0.07725051790475845, "learning_rate": 0.01, "loss": 1.9448, "step": 98811 }, { "epoch": 10.148300297833009, "grad_norm": 0.04559989646077156, "learning_rate": 0.01, "loss": 1.952, "step": 98814 }, { "epoch": 10.14860840094485, "grad_norm": 0.049496665596961975, "learning_rate": 0.01, "loss": 1.9334, "step": 98817 }, { "epoch": 10.148916504056691, "grad_norm": 0.0559081993997097, "learning_rate": 0.01, "loss": 1.9325, "step": 98820 }, { "epoch": 10.149224607168533, "grad_norm": 0.05707261338829994, "learning_rate": 0.01, "loss": 1.9524, "step": 98823 }, { "epoch": 10.149532710280374, "grad_norm": 0.045833077281713486, "learning_rate": 0.01, "loss": 1.9685, "step": 98826 }, { "epoch": 10.149840813392215, "grad_norm": 0.05008349567651749, "learning_rate": 0.01, "loss": 1.9406, "step": 98829 }, { "epoch": 10.150148916504056, "grad_norm": 0.11216545850038528, "learning_rate": 0.01, "loss": 1.9608, "step": 98832 }, { "epoch": 10.150457019615898, "grad_norm": 0.04488324001431465, "learning_rate": 0.01, "loss": 1.9304, "step": 98835 }, { "epoch": 10.150765122727739, "grad_norm": 0.04801878705620766, "learning_rate": 0.01, "loss": 1.947, "step": 98838 }, { "epoch": 10.15107322583958, "grad_norm": 0.08773142844438553, "learning_rate": 0.01, "loss": 1.9312, "step": 98841 }, { "epoch": 10.151381328951423, "grad_norm": 0.07734539359807968, "learning_rate": 0.01, "loss": 1.9121, "step": 98844 }, { "epoch": 10.151689432063264, "grad_norm": 0.06627245247364044, "learning_rate": 0.01, "loss": 1.9586, "step": 98847 }, { "epoch": 10.151997535175106, "grad_norm": 0.09218309819698334, "learning_rate": 0.01, "loss": 1.9443, "step": 98850 }, { "epoch": 10.152305638286947, "grad_norm": 0.11950929462909698, "learning_rate": 0.01, "loss": 1.9456, "step": 98853 }, { "epoch": 10.152613741398788, "grad_norm": 0.05259060114622116, "learning_rate": 0.01, "loss": 1.9486, "step": 98856 }, { "epoch": 10.15292184451063, "grad_norm": 0.045241810381412506, "learning_rate": 0.01, "loss": 1.9503, "step": 98859 }, { "epoch": 10.15322994762247, "grad_norm": 0.03596232831478119, "learning_rate": 0.01, "loss": 1.9129, "step": 98862 }, { "epoch": 10.153538050734312, "grad_norm": 0.052145007997751236, "learning_rate": 0.01, "loss": 1.9451, "step": 98865 }, { "epoch": 10.153846153846153, "grad_norm": 0.10584969818592072, "learning_rate": 0.01, "loss": 1.9811, "step": 98868 }, { "epoch": 10.154154256957995, "grad_norm": 0.0873233750462532, "learning_rate": 0.01, "loss": 1.9533, "step": 98871 }, { "epoch": 10.154462360069836, "grad_norm": 0.09436676651239395, "learning_rate": 0.01, "loss": 1.9541, "step": 98874 }, { "epoch": 10.154770463181679, "grad_norm": 0.05636145547032356, "learning_rate": 0.01, "loss": 1.9277, "step": 98877 }, { "epoch": 10.15507856629352, "grad_norm": 0.08092790096998215, "learning_rate": 0.01, "loss": 1.9216, "step": 98880 }, { "epoch": 10.155386669405361, "grad_norm": 0.05163249373435974, "learning_rate": 0.01, "loss": 1.94, "step": 98883 }, { "epoch": 10.155694772517203, "grad_norm": 0.11606719344854355, "learning_rate": 0.01, "loss": 1.942, "step": 98886 }, { "epoch": 10.156002875629044, "grad_norm": 0.050113923847675323, "learning_rate": 0.01, "loss": 1.9619, "step": 98889 }, { "epoch": 10.156310978740885, "grad_norm": 0.07689042389392853, "learning_rate": 0.01, "loss": 1.9439, "step": 98892 }, { "epoch": 10.156619081852726, "grad_norm": 0.07897762209177017, "learning_rate": 0.01, "loss": 1.9351, "step": 98895 }, { "epoch": 10.156927184964568, "grad_norm": 0.05787033215165138, "learning_rate": 0.01, "loss": 1.9027, "step": 98898 }, { "epoch": 10.157235288076409, "grad_norm": 0.057969070971012115, "learning_rate": 0.01, "loss": 1.9572, "step": 98901 }, { "epoch": 10.15754339118825, "grad_norm": 0.10790562629699707, "learning_rate": 0.01, "loss": 1.9449, "step": 98904 }, { "epoch": 10.157851494300093, "grad_norm": 0.07232168316841125, "learning_rate": 0.01, "loss": 1.9265, "step": 98907 }, { "epoch": 10.158159597411935, "grad_norm": 0.13184112310409546, "learning_rate": 0.01, "loss": 1.9553, "step": 98910 }, { "epoch": 10.158467700523776, "grad_norm": 0.08225245773792267, "learning_rate": 0.01, "loss": 1.938, "step": 98913 }, { "epoch": 10.158775803635617, "grad_norm": 0.04216809943318367, "learning_rate": 0.01, "loss": 1.9214, "step": 98916 }, { "epoch": 10.159083906747458, "grad_norm": 0.039220843464136124, "learning_rate": 0.01, "loss": 1.9301, "step": 98919 }, { "epoch": 10.1593920098593, "grad_norm": 0.04414791241288185, "learning_rate": 0.01, "loss": 1.9354, "step": 98922 }, { "epoch": 10.15970011297114, "grad_norm": 0.06560777127742767, "learning_rate": 0.01, "loss": 1.9325, "step": 98925 }, { "epoch": 10.160008216082982, "grad_norm": 0.050211433321237564, "learning_rate": 0.01, "loss": 1.9165, "step": 98928 }, { "epoch": 10.160316319194823, "grad_norm": 0.041922181844711304, "learning_rate": 0.01, "loss": 1.9581, "step": 98931 }, { "epoch": 10.160624422306665, "grad_norm": 0.035320624709129333, "learning_rate": 0.01, "loss": 1.9388, "step": 98934 }, { "epoch": 10.160932525418506, "grad_norm": 0.13377632200717926, "learning_rate": 0.01, "loss": 1.9388, "step": 98937 }, { "epoch": 10.161240628530349, "grad_norm": 0.13629798591136932, "learning_rate": 0.01, "loss": 1.9111, "step": 98940 }, { "epoch": 10.16154873164219, "grad_norm": 0.0511309877038002, "learning_rate": 0.01, "loss": 1.925, "step": 98943 }, { "epoch": 10.161856834754031, "grad_norm": 0.03774381801486015, "learning_rate": 0.01, "loss": 1.9108, "step": 98946 }, { "epoch": 10.162164937865873, "grad_norm": 0.04062725231051445, "learning_rate": 0.01, "loss": 1.9462, "step": 98949 }, { "epoch": 10.162473040977714, "grad_norm": 0.04439100995659828, "learning_rate": 0.01, "loss": 1.9503, "step": 98952 }, { "epoch": 10.162781144089555, "grad_norm": 0.03251064941287041, "learning_rate": 0.01, "loss": 1.9252, "step": 98955 }, { "epoch": 10.163089247201397, "grad_norm": 0.07445567101240158, "learning_rate": 0.01, "loss": 1.9297, "step": 98958 }, { "epoch": 10.163397350313238, "grad_norm": 0.0814160406589508, "learning_rate": 0.01, "loss": 1.954, "step": 98961 }, { "epoch": 10.163705453425079, "grad_norm": 0.048640090972185135, "learning_rate": 0.01, "loss": 1.9481, "step": 98964 }, { "epoch": 10.16401355653692, "grad_norm": 0.044837836176157, "learning_rate": 0.01, "loss": 1.9641, "step": 98967 }, { "epoch": 10.164321659648763, "grad_norm": 0.049571964889764786, "learning_rate": 0.01, "loss": 1.9772, "step": 98970 }, { "epoch": 10.164629762760605, "grad_norm": 0.045311830937862396, "learning_rate": 0.01, "loss": 1.9203, "step": 98973 }, { "epoch": 10.164937865872446, "grad_norm": 0.040575895458459854, "learning_rate": 0.01, "loss": 1.943, "step": 98976 }, { "epoch": 10.165245968984287, "grad_norm": 0.07294129580259323, "learning_rate": 0.01, "loss": 1.9357, "step": 98979 }, { "epoch": 10.165554072096128, "grad_norm": 0.054039083421230316, "learning_rate": 0.01, "loss": 1.949, "step": 98982 }, { "epoch": 10.16586217520797, "grad_norm": 0.23129263520240784, "learning_rate": 0.01, "loss": 1.9595, "step": 98985 }, { "epoch": 10.166170278319811, "grad_norm": 0.08735502511262894, "learning_rate": 0.01, "loss": 1.9674, "step": 98988 }, { "epoch": 10.166478381431652, "grad_norm": 0.04697715863585472, "learning_rate": 0.01, "loss": 1.9597, "step": 98991 }, { "epoch": 10.166786484543493, "grad_norm": 0.03498484939336777, "learning_rate": 0.01, "loss": 1.9373, "step": 98994 }, { "epoch": 10.167094587655335, "grad_norm": 0.045701831579208374, "learning_rate": 0.01, "loss": 1.9602, "step": 98997 }, { "epoch": 10.167402690767176, "grad_norm": 0.0513944998383522, "learning_rate": 0.01, "loss": 1.9306, "step": 99000 }, { "epoch": 10.167710793879019, "grad_norm": 0.0901717096567154, "learning_rate": 0.01, "loss": 1.9422, "step": 99003 }, { "epoch": 10.16801889699086, "grad_norm": 0.08373749256134033, "learning_rate": 0.01, "loss": 1.9538, "step": 99006 }, { "epoch": 10.168327000102702, "grad_norm": 0.07009940594434738, "learning_rate": 0.01, "loss": 1.9525, "step": 99009 }, { "epoch": 10.168635103214543, "grad_norm": 0.05948394536972046, "learning_rate": 0.01, "loss": 1.9335, "step": 99012 }, { "epoch": 10.168943206326384, "grad_norm": 0.0597563236951828, "learning_rate": 0.01, "loss": 1.9375, "step": 99015 }, { "epoch": 10.169251309438225, "grad_norm": 0.06647255271673203, "learning_rate": 0.01, "loss": 1.9366, "step": 99018 }, { "epoch": 10.169559412550067, "grad_norm": 0.04476379230618477, "learning_rate": 0.01, "loss": 1.9417, "step": 99021 }, { "epoch": 10.169867515661908, "grad_norm": 0.043609198182821274, "learning_rate": 0.01, "loss": 1.9551, "step": 99024 }, { "epoch": 10.170175618773749, "grad_norm": 0.05181267112493515, "learning_rate": 0.01, "loss": 1.9363, "step": 99027 }, { "epoch": 10.17048372188559, "grad_norm": 0.04127652943134308, "learning_rate": 0.01, "loss": 1.9618, "step": 99030 }, { "epoch": 10.170791824997432, "grad_norm": 0.04352453723549843, "learning_rate": 0.01, "loss": 1.9515, "step": 99033 }, { "epoch": 10.171099928109275, "grad_norm": 0.1716119796037674, "learning_rate": 0.01, "loss": 1.937, "step": 99036 }, { "epoch": 10.171408031221116, "grad_norm": 0.08144392818212509, "learning_rate": 0.01, "loss": 1.9414, "step": 99039 }, { "epoch": 10.171716134332957, "grad_norm": 0.08152837306261063, "learning_rate": 0.01, "loss": 1.9511, "step": 99042 }, { "epoch": 10.172024237444798, "grad_norm": 0.046277958899736404, "learning_rate": 0.01, "loss": 1.9672, "step": 99045 }, { "epoch": 10.17233234055664, "grad_norm": 0.038435254245996475, "learning_rate": 0.01, "loss": 1.9264, "step": 99048 }, { "epoch": 10.172640443668481, "grad_norm": 0.038541242480278015, "learning_rate": 0.01, "loss": 1.9679, "step": 99051 }, { "epoch": 10.172948546780322, "grad_norm": 0.04055872932076454, "learning_rate": 0.01, "loss": 1.9404, "step": 99054 }, { "epoch": 10.173256649892163, "grad_norm": 0.06344175338745117, "learning_rate": 0.01, "loss": 1.9204, "step": 99057 }, { "epoch": 10.173564753004005, "grad_norm": 0.06188404932618141, "learning_rate": 0.01, "loss": 1.9149, "step": 99060 }, { "epoch": 10.173872856115846, "grad_norm": 0.12819097936153412, "learning_rate": 0.01, "loss": 1.9457, "step": 99063 }, { "epoch": 10.174180959227689, "grad_norm": 0.05753006786108017, "learning_rate": 0.01, "loss": 1.9589, "step": 99066 }, { "epoch": 10.17448906233953, "grad_norm": 0.15209268033504486, "learning_rate": 0.01, "loss": 1.9403, "step": 99069 }, { "epoch": 10.174797165451372, "grad_norm": 0.10588809102773666, "learning_rate": 0.01, "loss": 1.9256, "step": 99072 }, { "epoch": 10.175105268563213, "grad_norm": 0.07004300504922867, "learning_rate": 0.01, "loss": 1.9518, "step": 99075 }, { "epoch": 10.175413371675054, "grad_norm": 0.0807129442691803, "learning_rate": 0.01, "loss": 1.9394, "step": 99078 }, { "epoch": 10.175721474786895, "grad_norm": 0.056306447833776474, "learning_rate": 0.01, "loss": 1.9404, "step": 99081 }, { "epoch": 10.176029577898737, "grad_norm": 0.04143708571791649, "learning_rate": 0.01, "loss": 1.93, "step": 99084 }, { "epoch": 10.176337681010578, "grad_norm": 0.03577942028641701, "learning_rate": 0.01, "loss": 1.952, "step": 99087 }, { "epoch": 10.17664578412242, "grad_norm": 0.05482630059123039, "learning_rate": 0.01, "loss": 1.9373, "step": 99090 }, { "epoch": 10.17695388723426, "grad_norm": 0.04353375732898712, "learning_rate": 0.01, "loss": 1.9424, "step": 99093 }, { "epoch": 10.177261990346102, "grad_norm": 0.05555172637104988, "learning_rate": 0.01, "loss": 1.9553, "step": 99096 }, { "epoch": 10.177570093457945, "grad_norm": 0.08030430227518082, "learning_rate": 0.01, "loss": 1.9198, "step": 99099 }, { "epoch": 10.177878196569786, "grad_norm": 0.07171040773391724, "learning_rate": 0.01, "loss": 1.9579, "step": 99102 }, { "epoch": 10.178186299681627, "grad_norm": 0.15464453399181366, "learning_rate": 0.01, "loss": 1.9379, "step": 99105 }, { "epoch": 10.178494402793469, "grad_norm": 0.055558331310749054, "learning_rate": 0.01, "loss": 1.9333, "step": 99108 }, { "epoch": 10.17880250590531, "grad_norm": 0.10895703732967377, "learning_rate": 0.01, "loss": 1.9526, "step": 99111 }, { "epoch": 10.179110609017151, "grad_norm": 0.04339831322431564, "learning_rate": 0.01, "loss": 1.9497, "step": 99114 }, { "epoch": 10.179418712128992, "grad_norm": 0.04216046631336212, "learning_rate": 0.01, "loss": 1.9538, "step": 99117 }, { "epoch": 10.179726815240834, "grad_norm": 0.047386620193719864, "learning_rate": 0.01, "loss": 1.9236, "step": 99120 }, { "epoch": 10.180034918352675, "grad_norm": 0.04763692989945412, "learning_rate": 0.01, "loss": 1.9359, "step": 99123 }, { "epoch": 10.180343021464516, "grad_norm": 0.05423538386821747, "learning_rate": 0.01, "loss": 1.9341, "step": 99126 }, { "epoch": 10.180651124576357, "grad_norm": 0.08128771185874939, "learning_rate": 0.01, "loss": 1.9407, "step": 99129 }, { "epoch": 10.1809592276882, "grad_norm": 0.08237950503826141, "learning_rate": 0.01, "loss": 1.9499, "step": 99132 }, { "epoch": 10.181267330800042, "grad_norm": 0.07690861076116562, "learning_rate": 0.01, "loss": 1.9391, "step": 99135 }, { "epoch": 10.181575433911883, "grad_norm": 0.08734242618083954, "learning_rate": 0.01, "loss": 1.957, "step": 99138 }, { "epoch": 10.181883537023724, "grad_norm": 0.0518592968583107, "learning_rate": 0.01, "loss": 1.941, "step": 99141 }, { "epoch": 10.182191640135565, "grad_norm": 0.08018185198307037, "learning_rate": 0.01, "loss": 1.9335, "step": 99144 }, { "epoch": 10.182499743247407, "grad_norm": 0.06972198933362961, "learning_rate": 0.01, "loss": 1.9237, "step": 99147 }, { "epoch": 10.182807846359248, "grad_norm": 0.06894844025373459, "learning_rate": 0.01, "loss": 1.9173, "step": 99150 }, { "epoch": 10.18311594947109, "grad_norm": 0.09211789071559906, "learning_rate": 0.01, "loss": 1.9327, "step": 99153 }, { "epoch": 10.18342405258293, "grad_norm": 0.06418921053409576, "learning_rate": 0.01, "loss": 1.9689, "step": 99156 }, { "epoch": 10.183732155694772, "grad_norm": 0.12758499383926392, "learning_rate": 0.01, "loss": 1.9327, "step": 99159 }, { "epoch": 10.184040258806615, "grad_norm": 0.07596320658922195, "learning_rate": 0.01, "loss": 1.9553, "step": 99162 }, { "epoch": 10.184348361918456, "grad_norm": 0.06082075461745262, "learning_rate": 0.01, "loss": 1.9698, "step": 99165 }, { "epoch": 10.184656465030297, "grad_norm": 0.05972994118928909, "learning_rate": 0.01, "loss": 1.9492, "step": 99168 }, { "epoch": 10.184964568142139, "grad_norm": 0.053260814398527145, "learning_rate": 0.01, "loss": 1.9301, "step": 99171 }, { "epoch": 10.18527267125398, "grad_norm": 0.06778529286384583, "learning_rate": 0.01, "loss": 1.9565, "step": 99174 }, { "epoch": 10.185580774365821, "grad_norm": 0.06456658989191055, "learning_rate": 0.01, "loss": 1.9572, "step": 99177 }, { "epoch": 10.185888877477662, "grad_norm": 0.06979983299970627, "learning_rate": 0.01, "loss": 1.9531, "step": 99180 }, { "epoch": 10.186196980589504, "grad_norm": 0.049924179911613464, "learning_rate": 0.01, "loss": 1.9356, "step": 99183 }, { "epoch": 10.186505083701345, "grad_norm": 0.031892478466033936, "learning_rate": 0.01, "loss": 1.9018, "step": 99186 }, { "epoch": 10.186813186813186, "grad_norm": 0.05626950040459633, "learning_rate": 0.01, "loss": 1.9244, "step": 99189 }, { "epoch": 10.187121289925027, "grad_norm": 0.12525342404842377, "learning_rate": 0.01, "loss": 1.9552, "step": 99192 }, { "epoch": 10.18742939303687, "grad_norm": 0.10168839991092682, "learning_rate": 0.01, "loss": 1.92, "step": 99195 }, { "epoch": 10.187737496148712, "grad_norm": 0.04261062666773796, "learning_rate": 0.01, "loss": 1.9415, "step": 99198 }, { "epoch": 10.188045599260553, "grad_norm": 0.04495152086019516, "learning_rate": 0.01, "loss": 1.9577, "step": 99201 }, { "epoch": 10.188353702372394, "grad_norm": 0.038814619183540344, "learning_rate": 0.01, "loss": 1.9574, "step": 99204 }, { "epoch": 10.188661805484236, "grad_norm": 0.036831244826316833, "learning_rate": 0.01, "loss": 1.9094, "step": 99207 }, { "epoch": 10.188969908596077, "grad_norm": 0.10675276815891266, "learning_rate": 0.01, "loss": 1.957, "step": 99210 }, { "epoch": 10.189278011707918, "grad_norm": 0.09722632169723511, "learning_rate": 0.01, "loss": 1.9372, "step": 99213 }, { "epoch": 10.18958611481976, "grad_norm": 0.1113773062825203, "learning_rate": 0.01, "loss": 1.926, "step": 99216 }, { "epoch": 10.1898942179316, "grad_norm": 0.04317975789308548, "learning_rate": 0.01, "loss": 1.9561, "step": 99219 }, { "epoch": 10.190202321043442, "grad_norm": 0.05818440020084381, "learning_rate": 0.01, "loss": 1.9552, "step": 99222 }, { "epoch": 10.190510424155285, "grad_norm": 0.08506343513727188, "learning_rate": 0.01, "loss": 1.9371, "step": 99225 }, { "epoch": 10.190818527267126, "grad_norm": 0.05146276578307152, "learning_rate": 0.01, "loss": 1.931, "step": 99228 }, { "epoch": 10.191126630378967, "grad_norm": 0.05243841931223869, "learning_rate": 0.01, "loss": 1.9154, "step": 99231 }, { "epoch": 10.191434733490809, "grad_norm": 0.048839930444955826, "learning_rate": 0.01, "loss": 1.9407, "step": 99234 }, { "epoch": 10.19174283660265, "grad_norm": 0.04465770348906517, "learning_rate": 0.01, "loss": 1.938, "step": 99237 }, { "epoch": 10.192050939714491, "grad_norm": 0.11541350930929184, "learning_rate": 0.01, "loss": 1.9201, "step": 99240 }, { "epoch": 10.192359042826332, "grad_norm": 0.06835901737213135, "learning_rate": 0.01, "loss": 1.9442, "step": 99243 }, { "epoch": 10.192667145938174, "grad_norm": 0.06168109178543091, "learning_rate": 0.01, "loss": 1.9289, "step": 99246 }, { "epoch": 10.192975249050015, "grad_norm": 0.13129253685474396, "learning_rate": 0.01, "loss": 1.9602, "step": 99249 }, { "epoch": 10.193283352161856, "grad_norm": 0.037970367819070816, "learning_rate": 0.01, "loss": 1.9532, "step": 99252 }, { "epoch": 10.193591455273697, "grad_norm": 0.04477749764919281, "learning_rate": 0.01, "loss": 1.9527, "step": 99255 }, { "epoch": 10.19389955838554, "grad_norm": 0.039190951734781265, "learning_rate": 0.01, "loss": 1.9277, "step": 99258 }, { "epoch": 10.194207661497382, "grad_norm": 0.08986736088991165, "learning_rate": 0.01, "loss": 1.9275, "step": 99261 }, { "epoch": 10.194515764609223, "grad_norm": 0.07922101765871048, "learning_rate": 0.01, "loss": 1.9231, "step": 99264 }, { "epoch": 10.194823867721064, "grad_norm": 0.13183197379112244, "learning_rate": 0.01, "loss": 1.9453, "step": 99267 }, { "epoch": 10.195131970832906, "grad_norm": 0.11073755472898483, "learning_rate": 0.01, "loss": 1.9538, "step": 99270 }, { "epoch": 10.195440073944747, "grad_norm": 0.04988548159599304, "learning_rate": 0.01, "loss": 1.9591, "step": 99273 }, { "epoch": 10.195748177056588, "grad_norm": 0.03584609180688858, "learning_rate": 0.01, "loss": 1.951, "step": 99276 }, { "epoch": 10.19605628016843, "grad_norm": 0.03539060056209564, "learning_rate": 0.01, "loss": 1.9599, "step": 99279 }, { "epoch": 10.19636438328027, "grad_norm": 0.11993156373500824, "learning_rate": 0.01, "loss": 1.9086, "step": 99282 }, { "epoch": 10.196672486392112, "grad_norm": 0.03564785420894623, "learning_rate": 0.01, "loss": 1.9402, "step": 99285 }, { "epoch": 10.196980589503953, "grad_norm": 0.10162950307130814, "learning_rate": 0.01, "loss": 1.9426, "step": 99288 }, { "epoch": 10.197288692615796, "grad_norm": 0.13340280950069427, "learning_rate": 0.01, "loss": 1.9224, "step": 99291 }, { "epoch": 10.197596795727637, "grad_norm": 0.11081166565418243, "learning_rate": 0.01, "loss": 1.9457, "step": 99294 }, { "epoch": 10.197904898839479, "grad_norm": 0.06974168866872787, "learning_rate": 0.01, "loss": 1.9398, "step": 99297 }, { "epoch": 10.19821300195132, "grad_norm": 0.052043814212083817, "learning_rate": 0.01, "loss": 1.9506, "step": 99300 }, { "epoch": 10.198521105063161, "grad_norm": 0.060473427176475525, "learning_rate": 0.01, "loss": 1.9673, "step": 99303 }, { "epoch": 10.198829208175002, "grad_norm": 0.06991663575172424, "learning_rate": 0.01, "loss": 1.9477, "step": 99306 }, { "epoch": 10.199137311286844, "grad_norm": 0.051819734275341034, "learning_rate": 0.01, "loss": 1.9376, "step": 99309 }, { "epoch": 10.199445414398685, "grad_norm": 0.044669054448604584, "learning_rate": 0.01, "loss": 1.9549, "step": 99312 }, { "epoch": 10.199753517510526, "grad_norm": 0.044240985065698624, "learning_rate": 0.01, "loss": 1.9462, "step": 99315 }, { "epoch": 10.200061620622368, "grad_norm": 0.07338231801986694, "learning_rate": 0.01, "loss": 1.9617, "step": 99318 }, { "epoch": 10.20036972373421, "grad_norm": 0.07212042063474655, "learning_rate": 0.01, "loss": 1.9471, "step": 99321 }, { "epoch": 10.200677826846052, "grad_norm": 0.09006025642156601, "learning_rate": 0.01, "loss": 1.9542, "step": 99324 }, { "epoch": 10.200985929957893, "grad_norm": 0.12026429176330566, "learning_rate": 0.01, "loss": 1.9292, "step": 99327 }, { "epoch": 10.201294033069734, "grad_norm": 0.07984338700771332, "learning_rate": 0.01, "loss": 1.9729, "step": 99330 }, { "epoch": 10.201602136181576, "grad_norm": 0.17421205341815948, "learning_rate": 0.01, "loss": 1.961, "step": 99333 }, { "epoch": 10.201910239293417, "grad_norm": 0.09257086366415024, "learning_rate": 0.01, "loss": 1.9235, "step": 99336 }, { "epoch": 10.202218342405258, "grad_norm": 0.04701230302453041, "learning_rate": 0.01, "loss": 1.9103, "step": 99339 }, { "epoch": 10.2025264455171, "grad_norm": 0.03216308727860451, "learning_rate": 0.01, "loss": 1.954, "step": 99342 }, { "epoch": 10.20283454862894, "grad_norm": 0.04874720424413681, "learning_rate": 0.01, "loss": 1.9266, "step": 99345 }, { "epoch": 10.203142651740782, "grad_norm": 0.04846279323101044, "learning_rate": 0.01, "loss": 1.911, "step": 99348 }, { "epoch": 10.203450754852623, "grad_norm": 0.10036344826221466, "learning_rate": 0.01, "loss": 1.9365, "step": 99351 }, { "epoch": 10.203758857964466, "grad_norm": 0.04600038379430771, "learning_rate": 0.01, "loss": 1.9409, "step": 99354 }, { "epoch": 10.204066961076308, "grad_norm": 0.12066859006881714, "learning_rate": 0.01, "loss": 1.9366, "step": 99357 }, { "epoch": 10.204375064188149, "grad_norm": 0.10237548500299454, "learning_rate": 0.01, "loss": 1.9558, "step": 99360 }, { "epoch": 10.20468316729999, "grad_norm": 0.04999920725822449, "learning_rate": 0.01, "loss": 1.9564, "step": 99363 }, { "epoch": 10.204991270411831, "grad_norm": 0.054514575749635696, "learning_rate": 0.01, "loss": 1.929, "step": 99366 }, { "epoch": 10.205299373523673, "grad_norm": 0.09816866368055344, "learning_rate": 0.01, "loss": 1.9313, "step": 99369 }, { "epoch": 10.205607476635514, "grad_norm": 0.12557558715343475, "learning_rate": 0.01, "loss": 1.925, "step": 99372 }, { "epoch": 10.205915579747355, "grad_norm": 0.061024587601423264, "learning_rate": 0.01, "loss": 1.9447, "step": 99375 }, { "epoch": 10.206223682859196, "grad_norm": 0.042588815093040466, "learning_rate": 0.01, "loss": 1.9446, "step": 99378 }, { "epoch": 10.206531785971038, "grad_norm": 0.050229258835315704, "learning_rate": 0.01, "loss": 1.9273, "step": 99381 }, { "epoch": 10.206839889082879, "grad_norm": 0.043651677668094635, "learning_rate": 0.01, "loss": 1.9306, "step": 99384 }, { "epoch": 10.207147992194722, "grad_norm": 0.03906402364373207, "learning_rate": 0.01, "loss": 1.9354, "step": 99387 }, { "epoch": 10.207456095306563, "grad_norm": 0.037026580423116684, "learning_rate": 0.01, "loss": 1.9504, "step": 99390 }, { "epoch": 10.207764198418404, "grad_norm": 0.11340945959091187, "learning_rate": 0.01, "loss": 1.9274, "step": 99393 }, { "epoch": 10.208072301530246, "grad_norm": 0.07180839031934738, "learning_rate": 0.01, "loss": 1.9576, "step": 99396 }, { "epoch": 10.208380404642087, "grad_norm": 0.05614294111728668, "learning_rate": 0.01, "loss": 1.965, "step": 99399 }, { "epoch": 10.208688507753928, "grad_norm": 0.0997401773929596, "learning_rate": 0.01, "loss": 1.9453, "step": 99402 }, { "epoch": 10.20899661086577, "grad_norm": 0.056495551019907, "learning_rate": 0.01, "loss": 1.9331, "step": 99405 }, { "epoch": 10.20930471397761, "grad_norm": 0.11858171969652176, "learning_rate": 0.01, "loss": 1.9407, "step": 99408 }, { "epoch": 10.209612817089452, "grad_norm": 0.04259796068072319, "learning_rate": 0.01, "loss": 1.9371, "step": 99411 }, { "epoch": 10.209920920201293, "grad_norm": 0.09054366499185562, "learning_rate": 0.01, "loss": 1.9328, "step": 99414 }, { "epoch": 10.210229023313136, "grad_norm": 0.16497641801834106, "learning_rate": 0.01, "loss": 1.9417, "step": 99417 }, { "epoch": 10.210537126424978, "grad_norm": 0.13353492319583893, "learning_rate": 0.01, "loss": 1.914, "step": 99420 }, { "epoch": 10.210845229536819, "grad_norm": 0.06359057128429413, "learning_rate": 0.01, "loss": 1.9431, "step": 99423 }, { "epoch": 10.21115333264866, "grad_norm": 0.06252306699752808, "learning_rate": 0.01, "loss": 1.9215, "step": 99426 }, { "epoch": 10.211461435760501, "grad_norm": 0.06305409222841263, "learning_rate": 0.01, "loss": 1.9404, "step": 99429 }, { "epoch": 10.211769538872343, "grad_norm": 0.039893072098493576, "learning_rate": 0.01, "loss": 1.9403, "step": 99432 }, { "epoch": 10.212077641984184, "grad_norm": 0.055040694773197174, "learning_rate": 0.01, "loss": 1.947, "step": 99435 }, { "epoch": 10.212385745096025, "grad_norm": 0.12885569036006927, "learning_rate": 0.01, "loss": 1.9687, "step": 99438 }, { "epoch": 10.212693848207866, "grad_norm": 0.04882066696882248, "learning_rate": 0.01, "loss": 1.9544, "step": 99441 }, { "epoch": 10.213001951319708, "grad_norm": 0.05029818043112755, "learning_rate": 0.01, "loss": 1.9418, "step": 99444 }, { "epoch": 10.213310054431549, "grad_norm": 0.06048410385847092, "learning_rate": 0.01, "loss": 1.9599, "step": 99447 }, { "epoch": 10.213618157543392, "grad_norm": 0.039041806012392044, "learning_rate": 0.01, "loss": 1.9436, "step": 99450 }, { "epoch": 10.213926260655233, "grad_norm": 0.11554940789937973, "learning_rate": 0.01, "loss": 1.9515, "step": 99453 }, { "epoch": 10.214234363767074, "grad_norm": 0.07602337002754211, "learning_rate": 0.01, "loss": 1.9188, "step": 99456 }, { "epoch": 10.214542466878916, "grad_norm": 0.05026714876294136, "learning_rate": 0.01, "loss": 1.9431, "step": 99459 }, { "epoch": 10.214850569990757, "grad_norm": 0.05463573336601257, "learning_rate": 0.01, "loss": 1.9604, "step": 99462 }, { "epoch": 10.215158673102598, "grad_norm": 0.030814429745078087, "learning_rate": 0.01, "loss": 1.908, "step": 99465 }, { "epoch": 10.21546677621444, "grad_norm": 0.12945370376110077, "learning_rate": 0.01, "loss": 1.9358, "step": 99468 }, { "epoch": 10.21577487932628, "grad_norm": 0.0723688155412674, "learning_rate": 0.01, "loss": 1.9415, "step": 99471 }, { "epoch": 10.216082982438122, "grad_norm": 0.10440348833799362, "learning_rate": 0.01, "loss": 1.9243, "step": 99474 }, { "epoch": 10.216391085549963, "grad_norm": 0.04114481061697006, "learning_rate": 0.01, "loss": 1.9218, "step": 99477 }, { "epoch": 10.216699188661806, "grad_norm": 0.10391386598348618, "learning_rate": 0.01, "loss": 1.9356, "step": 99480 }, { "epoch": 10.217007291773648, "grad_norm": 0.050123002380132675, "learning_rate": 0.01, "loss": 1.9428, "step": 99483 }, { "epoch": 10.217315394885489, "grad_norm": 0.0880192220211029, "learning_rate": 0.01, "loss": 1.9363, "step": 99486 }, { "epoch": 10.21762349799733, "grad_norm": 0.09628675878047943, "learning_rate": 0.01, "loss": 1.9706, "step": 99489 }, { "epoch": 10.217931601109171, "grad_norm": 0.034183621406555176, "learning_rate": 0.01, "loss": 1.9513, "step": 99492 }, { "epoch": 10.218239704221013, "grad_norm": 0.04564540833234787, "learning_rate": 0.01, "loss": 1.9257, "step": 99495 }, { "epoch": 10.218547807332854, "grad_norm": 0.04579702392220497, "learning_rate": 0.01, "loss": 1.9527, "step": 99498 }, { "epoch": 10.218855910444695, "grad_norm": 0.09346463531255722, "learning_rate": 0.01, "loss": 1.9324, "step": 99501 }, { "epoch": 10.219164013556536, "grad_norm": 0.03784818574786186, "learning_rate": 0.01, "loss": 1.9315, "step": 99504 }, { "epoch": 10.219472116668378, "grad_norm": 0.10040204226970673, "learning_rate": 0.01, "loss": 1.9472, "step": 99507 }, { "epoch": 10.219780219780219, "grad_norm": 0.0887511596083641, "learning_rate": 0.01, "loss": 1.9328, "step": 99510 }, { "epoch": 10.220088322892062, "grad_norm": 0.06611691415309906, "learning_rate": 0.01, "loss": 1.917, "step": 99513 }, { "epoch": 10.220396426003903, "grad_norm": 0.040397051721811295, "learning_rate": 0.01, "loss": 1.9464, "step": 99516 }, { "epoch": 10.220704529115745, "grad_norm": 0.045187678188085556, "learning_rate": 0.01, "loss": 1.9467, "step": 99519 }, { "epoch": 10.221012632227586, "grad_norm": 0.07442598044872284, "learning_rate": 0.01, "loss": 1.9449, "step": 99522 }, { "epoch": 10.221320735339427, "grad_norm": 0.07791969925165176, "learning_rate": 0.01, "loss": 1.9433, "step": 99525 }, { "epoch": 10.221628838451268, "grad_norm": 0.113225556910038, "learning_rate": 0.01, "loss": 1.9279, "step": 99528 }, { "epoch": 10.22193694156311, "grad_norm": 0.04235997796058655, "learning_rate": 0.01, "loss": 1.9544, "step": 99531 }, { "epoch": 10.22224504467495, "grad_norm": 0.093788281083107, "learning_rate": 0.01, "loss": 1.9306, "step": 99534 }, { "epoch": 10.222553147786792, "grad_norm": 0.04600469395518303, "learning_rate": 0.01, "loss": 1.9342, "step": 99537 }, { "epoch": 10.222861250898633, "grad_norm": 0.03700911998748779, "learning_rate": 0.01, "loss": 1.9299, "step": 99540 }, { "epoch": 10.223169354010475, "grad_norm": 0.0421181283891201, "learning_rate": 0.01, "loss": 1.9253, "step": 99543 }, { "epoch": 10.223477457122318, "grad_norm": 0.037187300622463226, "learning_rate": 0.01, "loss": 1.9447, "step": 99546 }, { "epoch": 10.223785560234159, "grad_norm": 0.03952307999134064, "learning_rate": 0.01, "loss": 1.924, "step": 99549 }, { "epoch": 10.224093663346, "grad_norm": 0.1031050756573677, "learning_rate": 0.01, "loss": 1.9377, "step": 99552 }, { "epoch": 10.224401766457841, "grad_norm": 0.06861066818237305, "learning_rate": 0.01, "loss": 1.9274, "step": 99555 }, { "epoch": 10.224709869569683, "grad_norm": 0.10859041661024094, "learning_rate": 0.01, "loss": 1.9441, "step": 99558 }, { "epoch": 10.225017972681524, "grad_norm": 0.09697724133729935, "learning_rate": 0.01, "loss": 1.9234, "step": 99561 }, { "epoch": 10.225326075793365, "grad_norm": 0.057506710290908813, "learning_rate": 0.01, "loss": 1.9513, "step": 99564 }, { "epoch": 10.225634178905207, "grad_norm": 0.04600291699171066, "learning_rate": 0.01, "loss": 1.9394, "step": 99567 }, { "epoch": 10.225942282017048, "grad_norm": 0.03872712329030037, "learning_rate": 0.01, "loss": 1.9347, "step": 99570 }, { "epoch": 10.226250385128889, "grad_norm": 0.04338938742876053, "learning_rate": 0.01, "loss": 1.9495, "step": 99573 }, { "epoch": 10.226558488240732, "grad_norm": 0.04375258460640907, "learning_rate": 0.01, "loss": 1.9473, "step": 99576 }, { "epoch": 10.226866591352573, "grad_norm": 0.03891351819038391, "learning_rate": 0.01, "loss": 1.9253, "step": 99579 }, { "epoch": 10.227174694464415, "grad_norm": 0.03821360319852829, "learning_rate": 0.01, "loss": 1.9141, "step": 99582 }, { "epoch": 10.227482797576256, "grad_norm": 0.06259328871965408, "learning_rate": 0.01, "loss": 1.953, "step": 99585 }, { "epoch": 10.227790900688097, "grad_norm": 0.08775690943002701, "learning_rate": 0.01, "loss": 1.9412, "step": 99588 }, { "epoch": 10.228099003799938, "grad_norm": 0.10065601766109467, "learning_rate": 0.01, "loss": 1.9257, "step": 99591 }, { "epoch": 10.22840710691178, "grad_norm": 0.052031807601451874, "learning_rate": 0.01, "loss": 1.969, "step": 99594 }, { "epoch": 10.228715210023621, "grad_norm": 0.04331258311867714, "learning_rate": 0.01, "loss": 1.9182, "step": 99597 }, { "epoch": 10.229023313135462, "grad_norm": 0.044273119419813156, "learning_rate": 0.01, "loss": 1.92, "step": 99600 }, { "epoch": 10.229331416247303, "grad_norm": 0.0545615628361702, "learning_rate": 0.01, "loss": 1.9378, "step": 99603 }, { "epoch": 10.229639519359145, "grad_norm": 0.0441010445356369, "learning_rate": 0.01, "loss": 1.9277, "step": 99606 }, { "epoch": 10.229947622470988, "grad_norm": 0.11924725770950317, "learning_rate": 0.01, "loss": 1.9252, "step": 99609 }, { "epoch": 10.230255725582829, "grad_norm": 0.045972730964422226, "learning_rate": 0.01, "loss": 1.9367, "step": 99612 }, { "epoch": 10.23056382869467, "grad_norm": 0.039950691163539886, "learning_rate": 0.01, "loss": 1.9481, "step": 99615 }, { "epoch": 10.230871931806512, "grad_norm": 0.05845572054386139, "learning_rate": 0.01, "loss": 1.9353, "step": 99618 }, { "epoch": 10.231180034918353, "grad_norm": 0.09689660370349884, "learning_rate": 0.01, "loss": 1.9468, "step": 99621 }, { "epoch": 10.231488138030194, "grad_norm": 0.055139146745204926, "learning_rate": 0.01, "loss": 1.8958, "step": 99624 }, { "epoch": 10.231796241142035, "grad_norm": 0.05283995345234871, "learning_rate": 0.01, "loss": 1.9242, "step": 99627 }, { "epoch": 10.232104344253877, "grad_norm": 0.043195389211177826, "learning_rate": 0.01, "loss": 1.9485, "step": 99630 }, { "epoch": 10.232412447365718, "grad_norm": 0.07608004659414291, "learning_rate": 0.01, "loss": 1.9353, "step": 99633 }, { "epoch": 10.23272055047756, "grad_norm": 0.053913239389657974, "learning_rate": 0.01, "loss": 1.9542, "step": 99636 }, { "epoch": 10.2330286535894, "grad_norm": 0.038333795964717865, "learning_rate": 0.01, "loss": 1.9251, "step": 99639 }, { "epoch": 10.233336756701243, "grad_norm": 0.0701310858130455, "learning_rate": 0.01, "loss": 1.9307, "step": 99642 }, { "epoch": 10.233644859813085, "grad_norm": 0.06686718761920929, "learning_rate": 0.01, "loss": 1.9472, "step": 99645 }, { "epoch": 10.233952962924926, "grad_norm": 0.05882362276315689, "learning_rate": 0.01, "loss": 1.9584, "step": 99648 }, { "epoch": 10.234261066036767, "grad_norm": 0.04660504683852196, "learning_rate": 0.01, "loss": 1.9409, "step": 99651 }, { "epoch": 10.234569169148608, "grad_norm": 0.14900517463684082, "learning_rate": 0.01, "loss": 1.936, "step": 99654 }, { "epoch": 10.23487727226045, "grad_norm": 0.0535171777009964, "learning_rate": 0.01, "loss": 1.9495, "step": 99657 }, { "epoch": 10.235185375372291, "grad_norm": 0.07865377515554428, "learning_rate": 0.01, "loss": 1.9613, "step": 99660 }, { "epoch": 10.235493478484132, "grad_norm": 0.05888461321592331, "learning_rate": 0.01, "loss": 1.9321, "step": 99663 }, { "epoch": 10.235801581595974, "grad_norm": 0.05282779783010483, "learning_rate": 0.01, "loss": 1.9468, "step": 99666 }, { "epoch": 10.236109684707815, "grad_norm": 0.0752410888671875, "learning_rate": 0.01, "loss": 1.9374, "step": 99669 }, { "epoch": 10.236417787819658, "grad_norm": 0.06479644775390625, "learning_rate": 0.01, "loss": 1.9298, "step": 99672 }, { "epoch": 10.236725890931499, "grad_norm": 0.052778132259845734, "learning_rate": 0.01, "loss": 1.9361, "step": 99675 }, { "epoch": 10.23703399404334, "grad_norm": 0.05656125769019127, "learning_rate": 0.01, "loss": 1.9272, "step": 99678 }, { "epoch": 10.237342097155182, "grad_norm": 0.04665529355406761, "learning_rate": 0.01, "loss": 1.9659, "step": 99681 }, { "epoch": 10.237650200267023, "grad_norm": 0.035816192626953125, "learning_rate": 0.01, "loss": 1.9026, "step": 99684 }, { "epoch": 10.237958303378864, "grad_norm": 0.04678123816847801, "learning_rate": 0.01, "loss": 1.9276, "step": 99687 }, { "epoch": 10.238266406490705, "grad_norm": 0.07228527218103409, "learning_rate": 0.01, "loss": 1.9402, "step": 99690 }, { "epoch": 10.238574509602547, "grad_norm": 0.15547145903110504, "learning_rate": 0.01, "loss": 1.9564, "step": 99693 }, { "epoch": 10.238882612714388, "grad_norm": 0.10459847003221512, "learning_rate": 0.01, "loss": 1.9479, "step": 99696 }, { "epoch": 10.23919071582623, "grad_norm": 0.07936637848615646, "learning_rate": 0.01, "loss": 1.9274, "step": 99699 }, { "epoch": 10.23949881893807, "grad_norm": 0.058107465505599976, "learning_rate": 0.01, "loss": 1.9515, "step": 99702 }, { "epoch": 10.239806922049913, "grad_norm": 0.03447607904672623, "learning_rate": 0.01, "loss": 1.9546, "step": 99705 }, { "epoch": 10.240115025161755, "grad_norm": 0.029857028275728226, "learning_rate": 0.01, "loss": 1.9196, "step": 99708 }, { "epoch": 10.240423128273596, "grad_norm": 0.037723857909440994, "learning_rate": 0.01, "loss": 1.9499, "step": 99711 }, { "epoch": 10.240731231385437, "grad_norm": 0.051165997982025146, "learning_rate": 0.01, "loss": 1.9286, "step": 99714 }, { "epoch": 10.241039334497279, "grad_norm": 0.03829183802008629, "learning_rate": 0.01, "loss": 1.9553, "step": 99717 }, { "epoch": 10.24134743760912, "grad_norm": 0.03870538994669914, "learning_rate": 0.01, "loss": 1.9455, "step": 99720 }, { "epoch": 10.241655540720961, "grad_norm": 0.16005821526050568, "learning_rate": 0.01, "loss": 1.9346, "step": 99723 }, { "epoch": 10.241963643832802, "grad_norm": 0.07645492255687714, "learning_rate": 0.01, "loss": 1.9565, "step": 99726 }, { "epoch": 10.242271746944644, "grad_norm": 0.0685250461101532, "learning_rate": 0.01, "loss": 1.9578, "step": 99729 }, { "epoch": 10.242579850056485, "grad_norm": 0.058242980390787125, "learning_rate": 0.01, "loss": 1.9351, "step": 99732 }, { "epoch": 10.242887953168328, "grad_norm": 0.11625604331493378, "learning_rate": 0.01, "loss": 1.9337, "step": 99735 }, { "epoch": 10.24319605628017, "grad_norm": 0.07878465950489044, "learning_rate": 0.01, "loss": 1.9432, "step": 99738 }, { "epoch": 10.24350415939201, "grad_norm": 0.10668841004371643, "learning_rate": 0.01, "loss": 1.9489, "step": 99741 }, { "epoch": 10.243812262503852, "grad_norm": 0.050039686262607574, "learning_rate": 0.01, "loss": 1.9368, "step": 99744 }, { "epoch": 10.244120365615693, "grad_norm": 0.06939005851745605, "learning_rate": 0.01, "loss": 1.9328, "step": 99747 }, { "epoch": 10.244428468727534, "grad_norm": 0.049836236983537674, "learning_rate": 0.01, "loss": 1.9277, "step": 99750 }, { "epoch": 10.244736571839375, "grad_norm": 0.09250394254922867, "learning_rate": 0.01, "loss": 1.9483, "step": 99753 }, { "epoch": 10.245044674951217, "grad_norm": 0.051497768610715866, "learning_rate": 0.01, "loss": 1.9412, "step": 99756 }, { "epoch": 10.245352778063058, "grad_norm": 0.03910185024142265, "learning_rate": 0.01, "loss": 1.9512, "step": 99759 }, { "epoch": 10.2456608811749, "grad_norm": 0.03284444287419319, "learning_rate": 0.01, "loss": 1.9374, "step": 99762 }, { "epoch": 10.24596898428674, "grad_norm": 0.0388886034488678, "learning_rate": 0.01, "loss": 1.9537, "step": 99765 }, { "epoch": 10.246277087398584, "grad_norm": 0.1467835158109665, "learning_rate": 0.01, "loss": 1.9436, "step": 99768 }, { "epoch": 10.246585190510425, "grad_norm": 0.15012861788272858, "learning_rate": 0.01, "loss": 1.9614, "step": 99771 }, { "epoch": 10.246893293622266, "grad_norm": 0.06511637568473816, "learning_rate": 0.01, "loss": 1.9552, "step": 99774 }, { "epoch": 10.247201396734107, "grad_norm": 0.047038592398166656, "learning_rate": 0.01, "loss": 1.9552, "step": 99777 }, { "epoch": 10.247509499845949, "grad_norm": 0.04511455446481705, "learning_rate": 0.01, "loss": 1.9389, "step": 99780 }, { "epoch": 10.24781760295779, "grad_norm": 0.06333138048648834, "learning_rate": 0.01, "loss": 1.9287, "step": 99783 }, { "epoch": 10.248125706069631, "grad_norm": 0.08840259909629822, "learning_rate": 0.01, "loss": 1.9367, "step": 99786 }, { "epoch": 10.248433809181472, "grad_norm": 0.054976560175418854, "learning_rate": 0.01, "loss": 1.9402, "step": 99789 }, { "epoch": 10.248741912293314, "grad_norm": 0.03340661898255348, "learning_rate": 0.01, "loss": 1.9555, "step": 99792 }, { "epoch": 10.249050015405155, "grad_norm": 0.04250464215874672, "learning_rate": 0.01, "loss": 1.9292, "step": 99795 }, { "epoch": 10.249358118516996, "grad_norm": 0.03535330295562744, "learning_rate": 0.01, "loss": 1.9544, "step": 99798 }, { "epoch": 10.24966622162884, "grad_norm": 0.09514196217060089, "learning_rate": 0.01, "loss": 1.9371, "step": 99801 }, { "epoch": 10.24997432474068, "grad_norm": 0.12286601215600967, "learning_rate": 0.01, "loss": 1.9399, "step": 99804 }, { "epoch": 10.250282427852522, "grad_norm": 0.0697341039776802, "learning_rate": 0.01, "loss": 1.9355, "step": 99807 }, { "epoch": 10.250590530964363, "grad_norm": 0.04430979862809181, "learning_rate": 0.01, "loss": 1.9562, "step": 99810 }, { "epoch": 10.250898634076204, "grad_norm": 0.0723920464515686, "learning_rate": 0.01, "loss": 1.9403, "step": 99813 }, { "epoch": 10.251206737188046, "grad_norm": 0.0418916754424572, "learning_rate": 0.01, "loss": 1.9427, "step": 99816 }, { "epoch": 10.251514840299887, "grad_norm": 0.04525751248002052, "learning_rate": 0.01, "loss": 1.9293, "step": 99819 }, { "epoch": 10.251822943411728, "grad_norm": 0.0397968664765358, "learning_rate": 0.01, "loss": 1.9521, "step": 99822 }, { "epoch": 10.25213104652357, "grad_norm": 0.04624214768409729, "learning_rate": 0.01, "loss": 1.9604, "step": 99825 }, { "epoch": 10.25243914963541, "grad_norm": 0.053155429661273956, "learning_rate": 0.01, "loss": 1.9308, "step": 99828 }, { "epoch": 10.252747252747252, "grad_norm": 0.062388960272073746, "learning_rate": 0.01, "loss": 1.923, "step": 99831 }, { "epoch": 10.253055355859095, "grad_norm": 0.03869679942727089, "learning_rate": 0.01, "loss": 1.9574, "step": 99834 }, { "epoch": 10.253363458970936, "grad_norm": 0.08281092345714569, "learning_rate": 0.01, "loss": 1.9638, "step": 99837 }, { "epoch": 10.253671562082777, "grad_norm": 0.10424105823040009, "learning_rate": 0.01, "loss": 1.9203, "step": 99840 }, { "epoch": 10.253979665194619, "grad_norm": 0.13120271265506744, "learning_rate": 0.01, "loss": 1.9525, "step": 99843 }, { "epoch": 10.25428776830646, "grad_norm": 0.06335999816656113, "learning_rate": 0.01, "loss": 1.9343, "step": 99846 }, { "epoch": 10.254595871418301, "grad_norm": 0.035308219492435455, "learning_rate": 0.01, "loss": 1.9215, "step": 99849 }, { "epoch": 10.254903974530142, "grad_norm": 0.10058265179395676, "learning_rate": 0.01, "loss": 1.9153, "step": 99852 }, { "epoch": 10.255212077641984, "grad_norm": 0.05711854621767998, "learning_rate": 0.01, "loss": 1.9288, "step": 99855 }, { "epoch": 10.255520180753825, "grad_norm": 0.13923723995685577, "learning_rate": 0.01, "loss": 1.9285, "step": 99858 }, { "epoch": 10.255828283865666, "grad_norm": 0.09265607595443726, "learning_rate": 0.01, "loss": 1.973, "step": 99861 }, { "epoch": 10.25613638697751, "grad_norm": 0.05978146195411682, "learning_rate": 0.01, "loss": 1.9236, "step": 99864 }, { "epoch": 10.25644449008935, "grad_norm": 0.05084634944796562, "learning_rate": 0.01, "loss": 1.9595, "step": 99867 }, { "epoch": 10.256752593201192, "grad_norm": 0.06485851854085922, "learning_rate": 0.01, "loss": 1.9365, "step": 99870 }, { "epoch": 10.257060696313033, "grad_norm": 0.05474282428622246, "learning_rate": 0.01, "loss": 1.9484, "step": 99873 }, { "epoch": 10.257368799424874, "grad_norm": 0.08651530742645264, "learning_rate": 0.01, "loss": 1.9366, "step": 99876 }, { "epoch": 10.257676902536716, "grad_norm": 0.08198468387126923, "learning_rate": 0.01, "loss": 1.9582, "step": 99879 }, { "epoch": 10.257985005648557, "grad_norm": 0.0553009919822216, "learning_rate": 0.01, "loss": 1.9351, "step": 99882 }, { "epoch": 10.258293108760398, "grad_norm": 0.10407020151615143, "learning_rate": 0.01, "loss": 1.9127, "step": 99885 }, { "epoch": 10.25860121187224, "grad_norm": 0.06234075501561165, "learning_rate": 0.01, "loss": 1.9608, "step": 99888 }, { "epoch": 10.25890931498408, "grad_norm": 0.03628971427679062, "learning_rate": 0.01, "loss": 1.9398, "step": 99891 }, { "epoch": 10.259217418095922, "grad_norm": 0.036581892520189285, "learning_rate": 0.01, "loss": 1.9484, "step": 99894 }, { "epoch": 10.259525521207765, "grad_norm": 0.04579423740506172, "learning_rate": 0.01, "loss": 1.9064, "step": 99897 }, { "epoch": 10.259833624319606, "grad_norm": 0.03739017993211746, "learning_rate": 0.01, "loss": 1.9674, "step": 99900 }, { "epoch": 10.260141727431447, "grad_norm": 0.1198686808347702, "learning_rate": 0.01, "loss": 1.944, "step": 99903 }, { "epoch": 10.260449830543289, "grad_norm": 0.05373760312795639, "learning_rate": 0.01, "loss": 1.9446, "step": 99906 }, { "epoch": 10.26075793365513, "grad_norm": 0.06018951162695885, "learning_rate": 0.01, "loss": 1.9502, "step": 99909 }, { "epoch": 10.261066036766971, "grad_norm": 0.05019623786211014, "learning_rate": 0.01, "loss": 1.9502, "step": 99912 }, { "epoch": 10.261374139878813, "grad_norm": 0.09850800037384033, "learning_rate": 0.01, "loss": 1.9337, "step": 99915 }, { "epoch": 10.261682242990654, "grad_norm": 0.04459189996123314, "learning_rate": 0.01, "loss": 1.9198, "step": 99918 }, { "epoch": 10.261990346102495, "grad_norm": 0.08180344104766846, "learning_rate": 0.01, "loss": 1.9286, "step": 99921 }, { "epoch": 10.262298449214336, "grad_norm": 0.05907866731286049, "learning_rate": 0.01, "loss": 1.9327, "step": 99924 }, { "epoch": 10.26260655232618, "grad_norm": 0.11938024312257767, "learning_rate": 0.01, "loss": 1.948, "step": 99927 }, { "epoch": 10.26291465543802, "grad_norm": 0.09561539441347122, "learning_rate": 0.01, "loss": 1.9684, "step": 99930 }, { "epoch": 10.263222758549862, "grad_norm": 0.11872205883264542, "learning_rate": 0.01, "loss": 1.9403, "step": 99933 }, { "epoch": 10.263530861661703, "grad_norm": 0.10854201763868332, "learning_rate": 0.01, "loss": 1.9543, "step": 99936 }, { "epoch": 10.263838964773544, "grad_norm": 0.07768164575099945, "learning_rate": 0.01, "loss": 1.9468, "step": 99939 }, { "epoch": 10.264147067885386, "grad_norm": 0.0876104012131691, "learning_rate": 0.01, "loss": 1.9253, "step": 99942 }, { "epoch": 10.264455170997227, "grad_norm": 0.05668925493955612, "learning_rate": 0.01, "loss": 1.9303, "step": 99945 }, { "epoch": 10.264763274109068, "grad_norm": 0.07997167110443115, "learning_rate": 0.01, "loss": 1.9239, "step": 99948 }, { "epoch": 10.26507137722091, "grad_norm": 0.09238320589065552, "learning_rate": 0.01, "loss": 1.9745, "step": 99951 }, { "epoch": 10.26537948033275, "grad_norm": 0.04772636666893959, "learning_rate": 0.01, "loss": 1.923, "step": 99954 }, { "epoch": 10.265687583444592, "grad_norm": 0.04315643757581711, "learning_rate": 0.01, "loss": 1.9405, "step": 99957 }, { "epoch": 10.265995686556435, "grad_norm": 0.0615558885037899, "learning_rate": 0.01, "loss": 1.9257, "step": 99960 }, { "epoch": 10.266303789668276, "grad_norm": 0.1043780967593193, "learning_rate": 0.01, "loss": 1.9568, "step": 99963 }, { "epoch": 10.266611892780118, "grad_norm": 0.05783291906118393, "learning_rate": 0.01, "loss": 1.9628, "step": 99966 }, { "epoch": 10.266919995891959, "grad_norm": 0.09446723759174347, "learning_rate": 0.01, "loss": 1.9603, "step": 99969 }, { "epoch": 10.2672280990038, "grad_norm": 0.08406702429056168, "learning_rate": 0.01, "loss": 1.9215, "step": 99972 }, { "epoch": 10.267536202115641, "grad_norm": 0.055889952927827835, "learning_rate": 0.01, "loss": 1.9397, "step": 99975 }, { "epoch": 10.267844305227483, "grad_norm": 0.06571850925683975, "learning_rate": 0.01, "loss": 1.9611, "step": 99978 }, { "epoch": 10.268152408339324, "grad_norm": 0.09014123678207397, "learning_rate": 0.01, "loss": 1.9724, "step": 99981 }, { "epoch": 10.268460511451165, "grad_norm": 0.05884508416056633, "learning_rate": 0.01, "loss": 1.8986, "step": 99984 }, { "epoch": 10.268768614563006, "grad_norm": 0.09543084353208542, "learning_rate": 0.01, "loss": 1.9475, "step": 99987 }, { "epoch": 10.26907671767485, "grad_norm": 0.048076704144477844, "learning_rate": 0.01, "loss": 1.9593, "step": 99990 }, { "epoch": 10.26938482078669, "grad_norm": 0.11011002212762833, "learning_rate": 0.01, "loss": 1.9494, "step": 99993 }, { "epoch": 10.269692923898532, "grad_norm": 0.04269436001777649, "learning_rate": 0.01, "loss": 1.9254, "step": 99996 }, { "epoch": 10.270001027010373, "grad_norm": 0.09961564093828201, "learning_rate": 0.01, "loss": 1.9643, "step": 99999 }, { "epoch": 10.270309130122214, "grad_norm": 0.08587975800037384, "learning_rate": 0.01, "loss": 1.9124, "step": 100002 }, { "epoch": 10.270617233234056, "grad_norm": 0.07682827115058899, "learning_rate": 0.01, "loss": 1.9573, "step": 100005 }, { "epoch": 10.270925336345897, "grad_norm": 0.0795656368136406, "learning_rate": 0.01, "loss": 1.9515, "step": 100008 }, { "epoch": 10.271233439457738, "grad_norm": 0.054615724831819534, "learning_rate": 0.01, "loss": 1.9617, "step": 100011 }, { "epoch": 10.27154154256958, "grad_norm": 0.10448376089334488, "learning_rate": 0.01, "loss": 1.9651, "step": 100014 }, { "epoch": 10.27184964568142, "grad_norm": 0.04411044716835022, "learning_rate": 0.01, "loss": 1.933, "step": 100017 }, { "epoch": 10.272157748793262, "grad_norm": 0.04791110008955002, "learning_rate": 0.01, "loss": 1.9555, "step": 100020 }, { "epoch": 10.272465851905105, "grad_norm": 0.033793505281209946, "learning_rate": 0.01, "loss": 1.9428, "step": 100023 }, { "epoch": 10.272773955016946, "grad_norm": 0.03846541419625282, "learning_rate": 0.01, "loss": 1.9294, "step": 100026 }, { "epoch": 10.273082058128788, "grad_norm": 0.06312572211027145, "learning_rate": 0.01, "loss": 1.9273, "step": 100029 }, { "epoch": 10.273390161240629, "grad_norm": 0.044390659779310226, "learning_rate": 0.01, "loss": 1.9095, "step": 100032 }, { "epoch": 10.27369826435247, "grad_norm": 0.09738004207611084, "learning_rate": 0.01, "loss": 1.9194, "step": 100035 }, { "epoch": 10.274006367464311, "grad_norm": 0.07020962238311768, "learning_rate": 0.01, "loss": 1.9314, "step": 100038 }, { "epoch": 10.274314470576153, "grad_norm": 0.09719542413949966, "learning_rate": 0.01, "loss": 1.9167, "step": 100041 }, { "epoch": 10.274622573687994, "grad_norm": 0.04547543823719025, "learning_rate": 0.01, "loss": 1.9315, "step": 100044 }, { "epoch": 10.274930676799835, "grad_norm": 0.09154791384935379, "learning_rate": 0.01, "loss": 1.9334, "step": 100047 }, { "epoch": 10.275238779911676, "grad_norm": 0.07742651551961899, "learning_rate": 0.01, "loss": 1.972, "step": 100050 }, { "epoch": 10.275546883023518, "grad_norm": 0.05887138843536377, "learning_rate": 0.01, "loss": 1.9155, "step": 100053 }, { "epoch": 10.27585498613536, "grad_norm": 0.09364501386880875, "learning_rate": 0.01, "loss": 1.9468, "step": 100056 }, { "epoch": 10.276163089247202, "grad_norm": 0.1074671745300293, "learning_rate": 0.01, "loss": 1.946, "step": 100059 }, { "epoch": 10.276471192359043, "grad_norm": 0.04482719674706459, "learning_rate": 0.01, "loss": 1.935, "step": 100062 }, { "epoch": 10.276779295470885, "grad_norm": 0.0769670158624649, "learning_rate": 0.01, "loss": 1.9316, "step": 100065 }, { "epoch": 10.277087398582726, "grad_norm": 0.08696067333221436, "learning_rate": 0.01, "loss": 1.9267, "step": 100068 }, { "epoch": 10.277395501694567, "grad_norm": 0.06060957908630371, "learning_rate": 0.01, "loss": 1.9557, "step": 100071 }, { "epoch": 10.277703604806408, "grad_norm": 0.09612435847520828, "learning_rate": 0.01, "loss": 1.9533, "step": 100074 }, { "epoch": 10.27801170791825, "grad_norm": 0.06246870011091232, "learning_rate": 0.01, "loss": 1.9413, "step": 100077 }, { "epoch": 10.27831981103009, "grad_norm": 0.040454257279634476, "learning_rate": 0.01, "loss": 1.9318, "step": 100080 }, { "epoch": 10.278627914141932, "grad_norm": 0.056246764957904816, "learning_rate": 0.01, "loss": 1.9668, "step": 100083 }, { "epoch": 10.278936017253773, "grad_norm": 0.039684563875198364, "learning_rate": 0.01, "loss": 1.9248, "step": 100086 }, { "epoch": 10.279244120365616, "grad_norm": 0.03487759456038475, "learning_rate": 0.01, "loss": 1.9495, "step": 100089 }, { "epoch": 10.279552223477458, "grad_norm": 0.04042143002152443, "learning_rate": 0.01, "loss": 1.9598, "step": 100092 }, { "epoch": 10.279860326589299, "grad_norm": 0.07096949219703674, "learning_rate": 0.01, "loss": 1.9295, "step": 100095 }, { "epoch": 10.28016842970114, "grad_norm": 0.0847165584564209, "learning_rate": 0.01, "loss": 1.9249, "step": 100098 }, { "epoch": 10.280476532812981, "grad_norm": 0.04604329913854599, "learning_rate": 0.01, "loss": 1.9384, "step": 100101 }, { "epoch": 10.280784635924823, "grad_norm": 0.1045752689242363, "learning_rate": 0.01, "loss": 1.9563, "step": 100104 }, { "epoch": 10.281092739036664, "grad_norm": 0.10349004715681076, "learning_rate": 0.01, "loss": 1.9309, "step": 100107 }, { "epoch": 10.281400842148505, "grad_norm": 0.05549321696162224, "learning_rate": 0.01, "loss": 1.923, "step": 100110 }, { "epoch": 10.281708945260347, "grad_norm": 0.042191196233034134, "learning_rate": 0.01, "loss": 1.9233, "step": 100113 }, { "epoch": 10.282017048372188, "grad_norm": 0.043062347918748856, "learning_rate": 0.01, "loss": 1.9392, "step": 100116 }, { "epoch": 10.28232515148403, "grad_norm": 0.13222946226596832, "learning_rate": 0.01, "loss": 1.9295, "step": 100119 }, { "epoch": 10.282633254595872, "grad_norm": 0.04551191255450249, "learning_rate": 0.01, "loss": 1.9411, "step": 100122 }, { "epoch": 10.282941357707713, "grad_norm": 0.041608721017837524, "learning_rate": 0.01, "loss": 1.9518, "step": 100125 }, { "epoch": 10.283249460819555, "grad_norm": 0.03729642927646637, "learning_rate": 0.01, "loss": 1.9456, "step": 100128 }, { "epoch": 10.283557563931396, "grad_norm": 0.11232533305883408, "learning_rate": 0.01, "loss": 1.9422, "step": 100131 }, { "epoch": 10.283865667043237, "grad_norm": 0.12017875909805298, "learning_rate": 0.01, "loss": 1.9422, "step": 100134 }, { "epoch": 10.284173770155078, "grad_norm": 0.08112366497516632, "learning_rate": 0.01, "loss": 1.9613, "step": 100137 }, { "epoch": 10.28448187326692, "grad_norm": 0.053107235580682755, "learning_rate": 0.01, "loss": 1.9752, "step": 100140 }, { "epoch": 10.284789976378761, "grad_norm": 0.03232099860906601, "learning_rate": 0.01, "loss": 1.9222, "step": 100143 }, { "epoch": 10.285098079490602, "grad_norm": 0.034685634076595306, "learning_rate": 0.01, "loss": 1.9492, "step": 100146 }, { "epoch": 10.285406182602443, "grad_norm": 0.06027882918715477, "learning_rate": 0.01, "loss": 1.9273, "step": 100149 }, { "epoch": 10.285714285714286, "grad_norm": 0.14968882501125336, "learning_rate": 0.01, "loss": 1.9308, "step": 100152 }, { "epoch": 10.286022388826128, "grad_norm": 0.07659412175416946, "learning_rate": 0.01, "loss": 1.9427, "step": 100155 }, { "epoch": 10.286330491937969, "grad_norm": 0.05984310433268547, "learning_rate": 0.01, "loss": 1.9326, "step": 100158 }, { "epoch": 10.28663859504981, "grad_norm": 0.07672173529863358, "learning_rate": 0.01, "loss": 1.955, "step": 100161 }, { "epoch": 10.286946698161652, "grad_norm": 0.04206996411085129, "learning_rate": 0.01, "loss": 1.9512, "step": 100164 }, { "epoch": 10.287254801273493, "grad_norm": 0.07788781821727753, "learning_rate": 0.01, "loss": 1.9527, "step": 100167 }, { "epoch": 10.287562904385334, "grad_norm": 0.04337256774306297, "learning_rate": 0.01, "loss": 1.9427, "step": 100170 }, { "epoch": 10.287871007497175, "grad_norm": 0.03464846685528755, "learning_rate": 0.01, "loss": 1.9397, "step": 100173 }, { "epoch": 10.288179110609017, "grad_norm": 0.10294554382562637, "learning_rate": 0.01, "loss": 1.9408, "step": 100176 }, { "epoch": 10.288487213720858, "grad_norm": 0.0609651543200016, "learning_rate": 0.01, "loss": 1.933, "step": 100179 }, { "epoch": 10.2887953168327, "grad_norm": 0.0706949234008789, "learning_rate": 0.01, "loss": 1.9488, "step": 100182 }, { "epoch": 10.289103419944542, "grad_norm": 0.08191784471273422, "learning_rate": 0.01, "loss": 1.9362, "step": 100185 }, { "epoch": 10.289411523056383, "grad_norm": 0.06882110238075256, "learning_rate": 0.01, "loss": 1.9203, "step": 100188 }, { "epoch": 10.289719626168225, "grad_norm": 0.0786321684718132, "learning_rate": 0.01, "loss": 1.9505, "step": 100191 }, { "epoch": 10.290027729280066, "grad_norm": 0.03936995565891266, "learning_rate": 0.01, "loss": 1.9406, "step": 100194 }, { "epoch": 10.290335832391907, "grad_norm": 0.046122245490550995, "learning_rate": 0.01, "loss": 1.9296, "step": 100197 }, { "epoch": 10.290643935503748, "grad_norm": 0.039319563657045364, "learning_rate": 0.01, "loss": 1.9286, "step": 100200 }, { "epoch": 10.29095203861559, "grad_norm": 0.11181558668613434, "learning_rate": 0.01, "loss": 1.9405, "step": 100203 }, { "epoch": 10.291260141727431, "grad_norm": 0.11319585144519806, "learning_rate": 0.01, "loss": 1.9622, "step": 100206 }, { "epoch": 10.291568244839272, "grad_norm": 0.1189330592751503, "learning_rate": 0.01, "loss": 1.9417, "step": 100209 }, { "epoch": 10.291876347951114, "grad_norm": 0.1420219987630844, "learning_rate": 0.01, "loss": 1.9464, "step": 100212 }, { "epoch": 10.292184451062957, "grad_norm": 0.05568074434995651, "learning_rate": 0.01, "loss": 1.9262, "step": 100215 }, { "epoch": 10.292492554174798, "grad_norm": 0.0501365102827549, "learning_rate": 0.01, "loss": 1.9401, "step": 100218 }, { "epoch": 10.292800657286639, "grad_norm": 0.038629259914159775, "learning_rate": 0.01, "loss": 1.9364, "step": 100221 }, { "epoch": 10.29310876039848, "grad_norm": 0.05056120455265045, "learning_rate": 0.01, "loss": 1.9374, "step": 100224 }, { "epoch": 10.293416863510322, "grad_norm": 0.04930442199110985, "learning_rate": 0.01, "loss": 1.9313, "step": 100227 }, { "epoch": 10.293724966622163, "grad_norm": 0.04605105519294739, "learning_rate": 0.01, "loss": 1.9341, "step": 100230 }, { "epoch": 10.294033069734004, "grad_norm": 0.11034657806158066, "learning_rate": 0.01, "loss": 1.9353, "step": 100233 }, { "epoch": 10.294341172845845, "grad_norm": 0.048685695976018906, "learning_rate": 0.01, "loss": 1.9251, "step": 100236 }, { "epoch": 10.294649275957687, "grad_norm": 0.1337871253490448, "learning_rate": 0.01, "loss": 1.9471, "step": 100239 }, { "epoch": 10.294957379069528, "grad_norm": 0.11180833727121353, "learning_rate": 0.01, "loss": 1.9617, "step": 100242 }, { "epoch": 10.295265482181371, "grad_norm": 0.05863872915506363, "learning_rate": 0.01, "loss": 1.9534, "step": 100245 }, { "epoch": 10.295573585293212, "grad_norm": 0.0769515186548233, "learning_rate": 0.01, "loss": 1.923, "step": 100248 }, { "epoch": 10.295881688405053, "grad_norm": 0.09128164499998093, "learning_rate": 0.01, "loss": 1.9295, "step": 100251 }, { "epoch": 10.296189791516895, "grad_norm": 0.08195117115974426, "learning_rate": 0.01, "loss": 1.9499, "step": 100254 }, { "epoch": 10.296497894628736, "grad_norm": 0.05502847582101822, "learning_rate": 0.01, "loss": 1.9278, "step": 100257 }, { "epoch": 10.296805997740577, "grad_norm": 0.04186360910534859, "learning_rate": 0.01, "loss": 1.938, "step": 100260 }, { "epoch": 10.297114100852419, "grad_norm": 0.03892785683274269, "learning_rate": 0.01, "loss": 1.9218, "step": 100263 }, { "epoch": 10.29742220396426, "grad_norm": 0.042466506361961365, "learning_rate": 0.01, "loss": 1.9382, "step": 100266 }, { "epoch": 10.297730307076101, "grad_norm": 0.09952762722969055, "learning_rate": 0.01, "loss": 1.9472, "step": 100269 }, { "epoch": 10.298038410187942, "grad_norm": 0.09318811446428299, "learning_rate": 0.01, "loss": 1.9364, "step": 100272 }, { "epoch": 10.298346513299784, "grad_norm": 0.05363912135362625, "learning_rate": 0.01, "loss": 1.934, "step": 100275 }, { "epoch": 10.298654616411627, "grad_norm": 0.08193670958280563, "learning_rate": 0.01, "loss": 1.9585, "step": 100278 }, { "epoch": 10.298962719523468, "grad_norm": 0.03696845844388008, "learning_rate": 0.01, "loss": 1.9545, "step": 100281 }, { "epoch": 10.29927082263531, "grad_norm": 0.03941279277205467, "learning_rate": 0.01, "loss": 1.9174, "step": 100284 }, { "epoch": 10.29957892574715, "grad_norm": 0.033926572650671005, "learning_rate": 0.01, "loss": 1.9606, "step": 100287 }, { "epoch": 10.299887028858992, "grad_norm": 0.052354805171489716, "learning_rate": 0.01, "loss": 1.9486, "step": 100290 }, { "epoch": 10.300195131970833, "grad_norm": 0.04476560652256012, "learning_rate": 0.01, "loss": 1.93, "step": 100293 }, { "epoch": 10.300503235082674, "grad_norm": 0.05641723424196243, "learning_rate": 0.01, "loss": 1.9706, "step": 100296 }, { "epoch": 10.300811338194515, "grad_norm": 0.035899363458156586, "learning_rate": 0.01, "loss": 1.9387, "step": 100299 }, { "epoch": 10.301119441306357, "grad_norm": 0.10292194783687592, "learning_rate": 0.01, "loss": 1.9478, "step": 100302 }, { "epoch": 10.301427544418198, "grad_norm": 0.04253707453608513, "learning_rate": 0.01, "loss": 1.9542, "step": 100305 }, { "epoch": 10.30173564753004, "grad_norm": 0.04332521930336952, "learning_rate": 0.01, "loss": 1.9374, "step": 100308 }, { "epoch": 10.302043750641882, "grad_norm": 0.060099441558122635, "learning_rate": 0.01, "loss": 1.9532, "step": 100311 }, { "epoch": 10.302351853753724, "grad_norm": 0.039917197078466415, "learning_rate": 0.01, "loss": 1.9355, "step": 100314 }, { "epoch": 10.302659956865565, "grad_norm": 0.05037233605980873, "learning_rate": 0.01, "loss": 1.9246, "step": 100317 }, { "epoch": 10.302968059977406, "grad_norm": 0.038528043776750565, "learning_rate": 0.01, "loss": 1.9294, "step": 100320 }, { "epoch": 10.303276163089247, "grad_norm": 0.06853263825178146, "learning_rate": 0.01, "loss": 1.966, "step": 100323 }, { "epoch": 10.303584266201089, "grad_norm": 0.04892963916063309, "learning_rate": 0.01, "loss": 1.9265, "step": 100326 }, { "epoch": 10.30389236931293, "grad_norm": 0.06138693541288376, "learning_rate": 0.01, "loss": 1.9519, "step": 100329 }, { "epoch": 10.304200472424771, "grad_norm": 0.07353366911411285, "learning_rate": 0.01, "loss": 1.9252, "step": 100332 }, { "epoch": 10.304508575536612, "grad_norm": 0.11467985063791275, "learning_rate": 0.01, "loss": 1.9488, "step": 100335 }, { "epoch": 10.304816678648454, "grad_norm": 0.059402890503406525, "learning_rate": 0.01, "loss": 1.932, "step": 100338 }, { "epoch": 10.305124781760295, "grad_norm": 0.0429309643805027, "learning_rate": 0.01, "loss": 1.9278, "step": 100341 }, { "epoch": 10.305432884872138, "grad_norm": 0.04219118505716324, "learning_rate": 0.01, "loss": 1.9159, "step": 100344 }, { "epoch": 10.30574098798398, "grad_norm": 0.03355926647782326, "learning_rate": 0.01, "loss": 1.9138, "step": 100347 }, { "epoch": 10.30604909109582, "grad_norm": 0.120177261531353, "learning_rate": 0.01, "loss": 1.955, "step": 100350 }, { "epoch": 10.306357194207662, "grad_norm": 0.04763586446642876, "learning_rate": 0.01, "loss": 1.9215, "step": 100353 }, { "epoch": 10.306665297319503, "grad_norm": 0.07865716516971588, "learning_rate": 0.01, "loss": 1.9432, "step": 100356 }, { "epoch": 10.306973400431344, "grad_norm": 0.047695744782686234, "learning_rate": 0.01, "loss": 1.9459, "step": 100359 }, { "epoch": 10.307281503543186, "grad_norm": 0.07048449665307999, "learning_rate": 0.01, "loss": 1.9485, "step": 100362 }, { "epoch": 10.307589606655027, "grad_norm": 0.05483188107609749, "learning_rate": 0.01, "loss": 1.9505, "step": 100365 }, { "epoch": 10.307897709766868, "grad_norm": 0.057623494416475296, "learning_rate": 0.01, "loss": 1.9402, "step": 100368 }, { "epoch": 10.30820581287871, "grad_norm": 0.05770512670278549, "learning_rate": 0.01, "loss": 1.9472, "step": 100371 }, { "epoch": 10.308513915990552, "grad_norm": 0.04068088158965111, "learning_rate": 0.01, "loss": 1.933, "step": 100374 }, { "epoch": 10.308822019102394, "grad_norm": 0.046589791774749756, "learning_rate": 0.01, "loss": 1.9525, "step": 100377 }, { "epoch": 10.309130122214235, "grad_norm": 0.04968668520450592, "learning_rate": 0.01, "loss": 1.9301, "step": 100380 }, { "epoch": 10.309438225326076, "grad_norm": 0.12425777316093445, "learning_rate": 0.01, "loss": 1.9496, "step": 100383 }, { "epoch": 10.309746328437917, "grad_norm": 0.07676790654659271, "learning_rate": 0.01, "loss": 1.9289, "step": 100386 }, { "epoch": 10.310054431549759, "grad_norm": 0.09203135967254639, "learning_rate": 0.01, "loss": 1.941, "step": 100389 }, { "epoch": 10.3103625346616, "grad_norm": 0.057039182633161545, "learning_rate": 0.01, "loss": 1.9485, "step": 100392 }, { "epoch": 10.310670637773441, "grad_norm": 0.0423281192779541, "learning_rate": 0.01, "loss": 1.9592, "step": 100395 }, { "epoch": 10.310978740885282, "grad_norm": 0.05860494449734688, "learning_rate": 0.01, "loss": 1.9413, "step": 100398 }, { "epoch": 10.311286843997124, "grad_norm": 0.06034504249691963, "learning_rate": 0.01, "loss": 1.9265, "step": 100401 }, { "epoch": 10.311594947108965, "grad_norm": 0.07440521568059921, "learning_rate": 0.01, "loss": 1.9562, "step": 100404 }, { "epoch": 10.311903050220808, "grad_norm": 0.0757577195763588, "learning_rate": 0.01, "loss": 1.9698, "step": 100407 }, { "epoch": 10.31221115333265, "grad_norm": 0.06766487658023834, "learning_rate": 0.01, "loss": 1.9189, "step": 100410 }, { "epoch": 10.31251925644449, "grad_norm": 0.09500867873430252, "learning_rate": 0.01, "loss": 1.9443, "step": 100413 }, { "epoch": 10.312827359556332, "grad_norm": 0.12139034271240234, "learning_rate": 0.01, "loss": 1.9387, "step": 100416 }, { "epoch": 10.313135462668173, "grad_norm": 0.09395263344049454, "learning_rate": 0.01, "loss": 1.9533, "step": 100419 }, { "epoch": 10.313443565780014, "grad_norm": 0.1121523380279541, "learning_rate": 0.01, "loss": 1.9471, "step": 100422 }, { "epoch": 10.313751668891856, "grad_norm": 0.11462993174791336, "learning_rate": 0.01, "loss": 1.9479, "step": 100425 }, { "epoch": 10.314059772003697, "grad_norm": 0.03882847726345062, "learning_rate": 0.01, "loss": 1.9513, "step": 100428 }, { "epoch": 10.314367875115538, "grad_norm": 0.03873373568058014, "learning_rate": 0.01, "loss": 1.95, "step": 100431 }, { "epoch": 10.31467597822738, "grad_norm": 0.04255416616797447, "learning_rate": 0.01, "loss": 1.9139, "step": 100434 }, { "epoch": 10.314984081339222, "grad_norm": 0.05258651077747345, "learning_rate": 0.01, "loss": 1.9595, "step": 100437 }, { "epoch": 10.315292184451064, "grad_norm": 0.04408138990402222, "learning_rate": 0.01, "loss": 1.9563, "step": 100440 }, { "epoch": 10.315600287562905, "grad_norm": 0.04116139933466911, "learning_rate": 0.01, "loss": 1.9368, "step": 100443 }, { "epoch": 10.315908390674746, "grad_norm": 0.06574319303035736, "learning_rate": 0.01, "loss": 1.9406, "step": 100446 }, { "epoch": 10.316216493786587, "grad_norm": 0.040884729474782944, "learning_rate": 0.01, "loss": 1.92, "step": 100449 }, { "epoch": 10.316524596898429, "grad_norm": 0.052256714552640915, "learning_rate": 0.01, "loss": 1.96, "step": 100452 }, { "epoch": 10.31683270001027, "grad_norm": 0.05303657799959183, "learning_rate": 0.01, "loss": 1.926, "step": 100455 }, { "epoch": 10.317140803122111, "grad_norm": 0.19547231495380402, "learning_rate": 0.01, "loss": 1.949, "step": 100458 }, { "epoch": 10.317448906233952, "grad_norm": 0.14729028940200806, "learning_rate": 0.01, "loss": 1.955, "step": 100461 }, { "epoch": 10.317757009345794, "grad_norm": 0.04628564044833183, "learning_rate": 0.01, "loss": 1.9554, "step": 100464 }, { "epoch": 10.318065112457635, "grad_norm": 0.04533575847744942, "learning_rate": 0.01, "loss": 1.945, "step": 100467 }, { "epoch": 10.318373215569478, "grad_norm": 0.04745553433895111, "learning_rate": 0.01, "loss": 1.9261, "step": 100470 }, { "epoch": 10.31868131868132, "grad_norm": 0.04309360310435295, "learning_rate": 0.01, "loss": 1.9371, "step": 100473 }, { "epoch": 10.31898942179316, "grad_norm": 0.0440213605761528, "learning_rate": 0.01, "loss": 1.9585, "step": 100476 }, { "epoch": 10.319297524905002, "grad_norm": 0.1071145236492157, "learning_rate": 0.01, "loss": 1.9269, "step": 100479 }, { "epoch": 10.319605628016843, "grad_norm": 0.06137925758957863, "learning_rate": 0.01, "loss": 1.9389, "step": 100482 }, { "epoch": 10.319913731128684, "grad_norm": 0.06886938214302063, "learning_rate": 0.01, "loss": 1.9357, "step": 100485 }, { "epoch": 10.320221834240526, "grad_norm": 0.049702685326337814, "learning_rate": 0.01, "loss": 1.9362, "step": 100488 }, { "epoch": 10.320529937352367, "grad_norm": 0.11094728857278824, "learning_rate": 0.01, "loss": 1.9402, "step": 100491 }, { "epoch": 10.320838040464208, "grad_norm": 0.06528878957033157, "learning_rate": 0.01, "loss": 1.9283, "step": 100494 }, { "epoch": 10.32114614357605, "grad_norm": 0.06091786548495293, "learning_rate": 0.01, "loss": 1.9307, "step": 100497 }, { "epoch": 10.32145424668789, "grad_norm": 0.09342446178197861, "learning_rate": 0.01, "loss": 1.9289, "step": 100500 }, { "epoch": 10.321762349799734, "grad_norm": 0.08900013566017151, "learning_rate": 0.01, "loss": 1.9342, "step": 100503 }, { "epoch": 10.322070452911575, "grad_norm": 0.04888587072491646, "learning_rate": 0.01, "loss": 1.9563, "step": 100506 }, { "epoch": 10.322378556023416, "grad_norm": 0.10019969940185547, "learning_rate": 0.01, "loss": 1.945, "step": 100509 }, { "epoch": 10.322686659135258, "grad_norm": 0.11456365883350372, "learning_rate": 0.01, "loss": 1.9588, "step": 100512 }, { "epoch": 10.322994762247099, "grad_norm": 0.09369392693042755, "learning_rate": 0.01, "loss": 1.9248, "step": 100515 }, { "epoch": 10.32330286535894, "grad_norm": 0.08547468483448029, "learning_rate": 0.01, "loss": 1.9505, "step": 100518 }, { "epoch": 10.323610968470781, "grad_norm": 0.15878820419311523, "learning_rate": 0.01, "loss": 1.9386, "step": 100521 }, { "epoch": 10.323919071582623, "grad_norm": 0.1548968404531479, "learning_rate": 0.01, "loss": 1.9693, "step": 100524 }, { "epoch": 10.324227174694464, "grad_norm": 0.12693747878074646, "learning_rate": 0.01, "loss": 1.9182, "step": 100527 }, { "epoch": 10.324535277806305, "grad_norm": 0.08565516024827957, "learning_rate": 0.01, "loss": 1.9422, "step": 100530 }, { "epoch": 10.324843380918148, "grad_norm": 0.04825468361377716, "learning_rate": 0.01, "loss": 1.8997, "step": 100533 }, { "epoch": 10.32515148402999, "grad_norm": 0.04770709201693535, "learning_rate": 0.01, "loss": 1.9443, "step": 100536 }, { "epoch": 10.32545958714183, "grad_norm": 0.038272783160209656, "learning_rate": 0.01, "loss": 1.9521, "step": 100539 }, { "epoch": 10.325767690253672, "grad_norm": 0.041678592562675476, "learning_rate": 0.01, "loss": 1.9189, "step": 100542 }, { "epoch": 10.326075793365513, "grad_norm": 0.03310447931289673, "learning_rate": 0.01, "loss": 1.9514, "step": 100545 }, { "epoch": 10.326383896477354, "grad_norm": 0.03896121308207512, "learning_rate": 0.01, "loss": 1.9292, "step": 100548 }, { "epoch": 10.326691999589196, "grad_norm": 0.04841841757297516, "learning_rate": 0.01, "loss": 1.9342, "step": 100551 }, { "epoch": 10.327000102701037, "grad_norm": 0.08073825389146805, "learning_rate": 0.01, "loss": 1.9344, "step": 100554 }, { "epoch": 10.327308205812878, "grad_norm": 0.09406491369009018, "learning_rate": 0.01, "loss": 1.9565, "step": 100557 }, { "epoch": 10.32761630892472, "grad_norm": 0.044824738055467606, "learning_rate": 0.01, "loss": 1.9642, "step": 100560 }, { "epoch": 10.32792441203656, "grad_norm": 0.08207328617572784, "learning_rate": 0.01, "loss": 1.9648, "step": 100563 }, { "epoch": 10.328232515148404, "grad_norm": 0.10041683167219162, "learning_rate": 0.01, "loss": 1.9688, "step": 100566 }, { "epoch": 10.328540618260245, "grad_norm": 0.04280916601419449, "learning_rate": 0.01, "loss": 1.915, "step": 100569 }, { "epoch": 10.328848721372086, "grad_norm": 0.047373414039611816, "learning_rate": 0.01, "loss": 1.9341, "step": 100572 }, { "epoch": 10.329156824483928, "grad_norm": 0.06335277855396271, "learning_rate": 0.01, "loss": 1.9572, "step": 100575 }, { "epoch": 10.329464927595769, "grad_norm": 0.05805109813809395, "learning_rate": 0.01, "loss": 1.9395, "step": 100578 }, { "epoch": 10.32977303070761, "grad_norm": 0.11722945421934128, "learning_rate": 0.01, "loss": 1.9217, "step": 100581 }, { "epoch": 10.330081133819451, "grad_norm": 0.06972774118185043, "learning_rate": 0.01, "loss": 1.9204, "step": 100584 }, { "epoch": 10.330389236931293, "grad_norm": 0.07877903431653976, "learning_rate": 0.01, "loss": 1.9645, "step": 100587 }, { "epoch": 10.330697340043134, "grad_norm": 0.08234605193138123, "learning_rate": 0.01, "loss": 1.9434, "step": 100590 }, { "epoch": 10.331005443154975, "grad_norm": 0.05679229274392128, "learning_rate": 0.01, "loss": 1.9472, "step": 100593 }, { "epoch": 10.331313546266816, "grad_norm": 0.04508120194077492, "learning_rate": 0.01, "loss": 1.9596, "step": 100596 }, { "epoch": 10.33162164937866, "grad_norm": 0.08512305468320847, "learning_rate": 0.01, "loss": 1.9349, "step": 100599 }, { "epoch": 10.3319297524905, "grad_norm": 0.07284285128116608, "learning_rate": 0.01, "loss": 1.948, "step": 100602 }, { "epoch": 10.332237855602342, "grad_norm": 0.11658084392547607, "learning_rate": 0.01, "loss": 1.9645, "step": 100605 }, { "epoch": 10.332545958714183, "grad_norm": 0.14260004460811615, "learning_rate": 0.01, "loss": 1.9393, "step": 100608 }, { "epoch": 10.332854061826025, "grad_norm": 0.06603503972291946, "learning_rate": 0.01, "loss": 1.948, "step": 100611 }, { "epoch": 10.333162164937866, "grad_norm": 0.037755999714136124, "learning_rate": 0.01, "loss": 1.9618, "step": 100614 }, { "epoch": 10.333470268049707, "grad_norm": 0.04604731872677803, "learning_rate": 0.01, "loss": 1.9224, "step": 100617 }, { "epoch": 10.333778371161548, "grad_norm": 0.05650022253394127, "learning_rate": 0.01, "loss": 1.9482, "step": 100620 }, { "epoch": 10.33408647427339, "grad_norm": 0.05804010108113289, "learning_rate": 0.01, "loss": 1.9706, "step": 100623 }, { "epoch": 10.33439457738523, "grad_norm": 0.06526564806699753, "learning_rate": 0.01, "loss": 1.9338, "step": 100626 }, { "epoch": 10.334702680497074, "grad_norm": 0.055868957191705704, "learning_rate": 0.01, "loss": 1.9488, "step": 100629 }, { "epoch": 10.335010783608915, "grad_norm": 0.07749674469232559, "learning_rate": 0.01, "loss": 1.94, "step": 100632 }, { "epoch": 10.335318886720756, "grad_norm": 0.08783943951129913, "learning_rate": 0.01, "loss": 1.9344, "step": 100635 }, { "epoch": 10.335626989832598, "grad_norm": 0.0857509896159172, "learning_rate": 0.01, "loss": 1.9775, "step": 100638 }, { "epoch": 10.335935092944439, "grad_norm": 0.0713290348649025, "learning_rate": 0.01, "loss": 1.9292, "step": 100641 }, { "epoch": 10.33624319605628, "grad_norm": 0.08365776389837265, "learning_rate": 0.01, "loss": 1.9322, "step": 100644 }, { "epoch": 10.336551299168121, "grad_norm": 0.06503187865018845, "learning_rate": 0.01, "loss": 1.9629, "step": 100647 }, { "epoch": 10.336859402279963, "grad_norm": 0.08043991774320602, "learning_rate": 0.01, "loss": 1.9197, "step": 100650 }, { "epoch": 10.337167505391804, "grad_norm": 0.06439603865146637, "learning_rate": 0.01, "loss": 1.9401, "step": 100653 }, { "epoch": 10.337475608503645, "grad_norm": 0.1296110451221466, "learning_rate": 0.01, "loss": 1.9453, "step": 100656 }, { "epoch": 10.337783711615486, "grad_norm": 0.1248815581202507, "learning_rate": 0.01, "loss": 1.9129, "step": 100659 }, { "epoch": 10.33809181472733, "grad_norm": 0.09833067655563354, "learning_rate": 0.01, "loss": 1.9541, "step": 100662 }, { "epoch": 10.33839991783917, "grad_norm": 0.056371044367551804, "learning_rate": 0.01, "loss": 1.944, "step": 100665 }, { "epoch": 10.338708020951012, "grad_norm": 0.07650956511497498, "learning_rate": 0.01, "loss": 1.9693, "step": 100668 }, { "epoch": 10.339016124062853, "grad_norm": 0.049769867211580276, "learning_rate": 0.01, "loss": 1.9358, "step": 100671 }, { "epoch": 10.339324227174695, "grad_norm": 0.03484850376844406, "learning_rate": 0.01, "loss": 1.9247, "step": 100674 }, { "epoch": 10.339632330286536, "grad_norm": 0.07614227384328842, "learning_rate": 0.01, "loss": 1.9115, "step": 100677 }, { "epoch": 10.339940433398377, "grad_norm": 0.08095826953649521, "learning_rate": 0.01, "loss": 1.9313, "step": 100680 }, { "epoch": 10.340248536510218, "grad_norm": 0.05176486819982529, "learning_rate": 0.01, "loss": 1.9381, "step": 100683 }, { "epoch": 10.34055663962206, "grad_norm": 0.0889786034822464, "learning_rate": 0.01, "loss": 1.9139, "step": 100686 }, { "epoch": 10.3408647427339, "grad_norm": 0.07184679061174393, "learning_rate": 0.01, "loss": 1.9402, "step": 100689 }, { "epoch": 10.341172845845744, "grad_norm": 0.057422127574682236, "learning_rate": 0.01, "loss": 1.9321, "step": 100692 }, { "epoch": 10.341480948957585, "grad_norm": 0.11470889300107956, "learning_rate": 0.01, "loss": 1.9591, "step": 100695 }, { "epoch": 10.341789052069426, "grad_norm": 0.03806557506322861, "learning_rate": 0.01, "loss": 1.9383, "step": 100698 }, { "epoch": 10.342097155181268, "grad_norm": 0.14512214064598083, "learning_rate": 0.01, "loss": 1.9455, "step": 100701 }, { "epoch": 10.342405258293109, "grad_norm": 0.058107271790504456, "learning_rate": 0.01, "loss": 1.9383, "step": 100704 }, { "epoch": 10.34271336140495, "grad_norm": 0.040821373462677, "learning_rate": 0.01, "loss": 1.9282, "step": 100707 }, { "epoch": 10.343021464516791, "grad_norm": 0.048679567873477936, "learning_rate": 0.01, "loss": 1.9552, "step": 100710 }, { "epoch": 10.343329567628633, "grad_norm": 0.03365056589245796, "learning_rate": 0.01, "loss": 1.9346, "step": 100713 }, { "epoch": 10.343637670740474, "grad_norm": 0.13085153698921204, "learning_rate": 0.01, "loss": 1.9249, "step": 100716 }, { "epoch": 10.343945773852315, "grad_norm": 0.037281155586242676, "learning_rate": 0.01, "loss": 1.9383, "step": 100719 }, { "epoch": 10.344253876964157, "grad_norm": 0.04350685700774193, "learning_rate": 0.01, "loss": 1.9578, "step": 100722 }, { "epoch": 10.344561980076, "grad_norm": 0.03874243050813675, "learning_rate": 0.01, "loss": 1.9378, "step": 100725 }, { "epoch": 10.34487008318784, "grad_norm": 0.052796006202697754, "learning_rate": 0.01, "loss": 1.9248, "step": 100728 }, { "epoch": 10.345178186299682, "grad_norm": 0.04441223666071892, "learning_rate": 0.01, "loss": 1.9481, "step": 100731 }, { "epoch": 10.345486289411523, "grad_norm": 0.04818743094801903, "learning_rate": 0.01, "loss": 1.9687, "step": 100734 }, { "epoch": 10.345794392523365, "grad_norm": 0.07991688698530197, "learning_rate": 0.01, "loss": 1.9528, "step": 100737 }, { "epoch": 10.346102495635206, "grad_norm": 0.07785684615373611, "learning_rate": 0.01, "loss": 1.9557, "step": 100740 }, { "epoch": 10.346410598747047, "grad_norm": 0.05483667552471161, "learning_rate": 0.01, "loss": 1.9442, "step": 100743 }, { "epoch": 10.346718701858888, "grad_norm": 0.05420774593949318, "learning_rate": 0.01, "loss": 1.9709, "step": 100746 }, { "epoch": 10.34702680497073, "grad_norm": 0.03596429526805878, "learning_rate": 0.01, "loss": 1.9512, "step": 100749 }, { "epoch": 10.347334908082571, "grad_norm": 0.04498706012964249, "learning_rate": 0.01, "loss": 1.9688, "step": 100752 }, { "epoch": 10.347643011194412, "grad_norm": 0.038987092673778534, "learning_rate": 0.01, "loss": 1.9454, "step": 100755 }, { "epoch": 10.347951114306255, "grad_norm": 0.13664734363555908, "learning_rate": 0.01, "loss": 1.9398, "step": 100758 }, { "epoch": 10.348259217418097, "grad_norm": 0.0331043042242527, "learning_rate": 0.01, "loss": 1.9449, "step": 100761 }, { "epoch": 10.348567320529938, "grad_norm": 0.1004151701927185, "learning_rate": 0.01, "loss": 1.9675, "step": 100764 }, { "epoch": 10.348875423641779, "grad_norm": 0.10532857477664948, "learning_rate": 0.01, "loss": 1.9173, "step": 100767 }, { "epoch": 10.34918352675362, "grad_norm": 0.05065859481692314, "learning_rate": 0.01, "loss": 1.9359, "step": 100770 }, { "epoch": 10.349491629865462, "grad_norm": 0.06383136659860611, "learning_rate": 0.01, "loss": 1.9384, "step": 100773 }, { "epoch": 10.349799732977303, "grad_norm": 0.08385428041219711, "learning_rate": 0.01, "loss": 1.9508, "step": 100776 }, { "epoch": 10.350107836089144, "grad_norm": 0.06286201626062393, "learning_rate": 0.01, "loss": 1.9293, "step": 100779 }, { "epoch": 10.350415939200985, "grad_norm": 0.056411102414131165, "learning_rate": 0.01, "loss": 1.9202, "step": 100782 }, { "epoch": 10.350724042312827, "grad_norm": 0.0546356700360775, "learning_rate": 0.01, "loss": 1.9322, "step": 100785 }, { "epoch": 10.351032145424668, "grad_norm": 0.04161200299859047, "learning_rate": 0.01, "loss": 1.9468, "step": 100788 }, { "epoch": 10.351340248536511, "grad_norm": 0.1038103774189949, "learning_rate": 0.01, "loss": 1.9351, "step": 100791 }, { "epoch": 10.351648351648352, "grad_norm": 0.038900673389434814, "learning_rate": 0.01, "loss": 1.9414, "step": 100794 }, { "epoch": 10.351956454760193, "grad_norm": 0.04477495327591896, "learning_rate": 0.01, "loss": 1.9518, "step": 100797 }, { "epoch": 10.352264557872035, "grad_norm": 0.034626856446266174, "learning_rate": 0.01, "loss": 1.9532, "step": 100800 }, { "epoch": 10.352572660983876, "grad_norm": 0.13096442818641663, "learning_rate": 0.01, "loss": 1.943, "step": 100803 }, { "epoch": 10.352880764095717, "grad_norm": 0.05774092674255371, "learning_rate": 0.01, "loss": 1.9162, "step": 100806 }, { "epoch": 10.353188867207558, "grad_norm": 0.12401199340820312, "learning_rate": 0.01, "loss": 1.9614, "step": 100809 }, { "epoch": 10.3534969703194, "grad_norm": 0.09031431376934052, "learning_rate": 0.01, "loss": 1.9304, "step": 100812 }, { "epoch": 10.353805073431241, "grad_norm": 0.052858997136354446, "learning_rate": 0.01, "loss": 1.9546, "step": 100815 }, { "epoch": 10.354113176543082, "grad_norm": 0.059029802680015564, "learning_rate": 0.01, "loss": 1.9371, "step": 100818 }, { "epoch": 10.354421279654925, "grad_norm": 0.041640978306531906, "learning_rate": 0.01, "loss": 1.9633, "step": 100821 }, { "epoch": 10.354729382766767, "grad_norm": 0.1006421223282814, "learning_rate": 0.01, "loss": 1.9717, "step": 100824 }, { "epoch": 10.355037485878608, "grad_norm": 0.0704476535320282, "learning_rate": 0.01, "loss": 1.9392, "step": 100827 }, { "epoch": 10.355345588990449, "grad_norm": 0.03361036628484726, "learning_rate": 0.01, "loss": 1.9327, "step": 100830 }, { "epoch": 10.35565369210229, "grad_norm": 0.052998024970293045, "learning_rate": 0.01, "loss": 1.955, "step": 100833 }, { "epoch": 10.355961795214132, "grad_norm": 0.06729929149150848, "learning_rate": 0.01, "loss": 1.9459, "step": 100836 }, { "epoch": 10.356269898325973, "grad_norm": 0.03766258805990219, "learning_rate": 0.01, "loss": 1.9472, "step": 100839 }, { "epoch": 10.356578001437814, "grad_norm": 0.13822922110557556, "learning_rate": 0.01, "loss": 1.9467, "step": 100842 }, { "epoch": 10.356886104549655, "grad_norm": 0.09319800138473511, "learning_rate": 0.01, "loss": 1.9257, "step": 100845 }, { "epoch": 10.357194207661497, "grad_norm": 0.07348558306694031, "learning_rate": 0.01, "loss": 1.962, "step": 100848 }, { "epoch": 10.357502310773338, "grad_norm": 0.14174668490886688, "learning_rate": 0.01, "loss": 1.9498, "step": 100851 }, { "epoch": 10.357810413885181, "grad_norm": 0.05898958817124367, "learning_rate": 0.01, "loss": 1.9383, "step": 100854 }, { "epoch": 10.358118516997022, "grad_norm": 0.08681494742631912, "learning_rate": 0.01, "loss": 1.9565, "step": 100857 }, { "epoch": 10.358426620108864, "grad_norm": 0.036777254194021225, "learning_rate": 0.01, "loss": 1.9482, "step": 100860 }, { "epoch": 10.358734723220705, "grad_norm": 0.06975427269935608, "learning_rate": 0.01, "loss": 1.957, "step": 100863 }, { "epoch": 10.359042826332546, "grad_norm": 0.06511854380369186, "learning_rate": 0.01, "loss": 1.9312, "step": 100866 }, { "epoch": 10.359350929444387, "grad_norm": 0.057764261960983276, "learning_rate": 0.01, "loss": 1.9424, "step": 100869 }, { "epoch": 10.359659032556229, "grad_norm": 0.08371473848819733, "learning_rate": 0.01, "loss": 1.9319, "step": 100872 }, { "epoch": 10.35996713566807, "grad_norm": 0.0854247510433197, "learning_rate": 0.01, "loss": 1.9551, "step": 100875 }, { "epoch": 10.360275238779911, "grad_norm": 0.04530758038163185, "learning_rate": 0.01, "loss": 1.941, "step": 100878 }, { "epoch": 10.360583341891752, "grad_norm": 0.03615698590874672, "learning_rate": 0.01, "loss": 1.9293, "step": 100881 }, { "epoch": 10.360891445003595, "grad_norm": 0.06091855838894844, "learning_rate": 0.01, "loss": 1.9159, "step": 100884 }, { "epoch": 10.361199548115437, "grad_norm": 0.04644336923956871, "learning_rate": 0.01, "loss": 1.9213, "step": 100887 }, { "epoch": 10.361507651227278, "grad_norm": 0.043100859969854355, "learning_rate": 0.01, "loss": 1.9386, "step": 100890 }, { "epoch": 10.36181575433912, "grad_norm": 0.04365437850356102, "learning_rate": 0.01, "loss": 1.9316, "step": 100893 }, { "epoch": 10.36212385745096, "grad_norm": 0.11414897441864014, "learning_rate": 0.01, "loss": 1.9153, "step": 100896 }, { "epoch": 10.362431960562802, "grad_norm": 0.05801675096154213, "learning_rate": 0.01, "loss": 1.9302, "step": 100899 }, { "epoch": 10.362740063674643, "grad_norm": 0.10355260968208313, "learning_rate": 0.01, "loss": 1.96, "step": 100902 }, { "epoch": 10.363048166786484, "grad_norm": 0.049150459468364716, "learning_rate": 0.01, "loss": 1.9474, "step": 100905 }, { "epoch": 10.363356269898325, "grad_norm": 0.08133083581924438, "learning_rate": 0.01, "loss": 1.9486, "step": 100908 }, { "epoch": 10.363664373010167, "grad_norm": 0.0906137004494667, "learning_rate": 0.01, "loss": 1.9222, "step": 100911 }, { "epoch": 10.363972476122008, "grad_norm": 0.06084192171692848, "learning_rate": 0.01, "loss": 1.9377, "step": 100914 }, { "epoch": 10.364280579233851, "grad_norm": 0.0443035326898098, "learning_rate": 0.01, "loss": 1.9323, "step": 100917 }, { "epoch": 10.364588682345692, "grad_norm": 0.03251441940665245, "learning_rate": 0.01, "loss": 1.9306, "step": 100920 }, { "epoch": 10.364896785457534, "grad_norm": 0.056791745126247406, "learning_rate": 0.01, "loss": 1.9598, "step": 100923 }, { "epoch": 10.365204888569375, "grad_norm": 0.11314947158098221, "learning_rate": 0.01, "loss": 1.9287, "step": 100926 }, { "epoch": 10.365512991681216, "grad_norm": 0.09841377288103104, "learning_rate": 0.01, "loss": 1.9344, "step": 100929 }, { "epoch": 10.365821094793057, "grad_norm": 0.060636650770902634, "learning_rate": 0.01, "loss": 1.937, "step": 100932 }, { "epoch": 10.366129197904899, "grad_norm": 0.05676575005054474, "learning_rate": 0.01, "loss": 1.9233, "step": 100935 }, { "epoch": 10.36643730101674, "grad_norm": 0.1176658570766449, "learning_rate": 0.01, "loss": 1.9324, "step": 100938 }, { "epoch": 10.366745404128581, "grad_norm": 0.033294521272182465, "learning_rate": 0.01, "loss": 1.949, "step": 100941 }, { "epoch": 10.367053507240422, "grad_norm": 0.061117228120565414, "learning_rate": 0.01, "loss": 1.9548, "step": 100944 }, { "epoch": 10.367361610352265, "grad_norm": 0.04445219412446022, "learning_rate": 0.01, "loss": 1.9312, "step": 100947 }, { "epoch": 10.367669713464107, "grad_norm": 0.06714378297328949, "learning_rate": 0.01, "loss": 1.9324, "step": 100950 }, { "epoch": 10.367977816575948, "grad_norm": 0.08990859985351562, "learning_rate": 0.01, "loss": 1.9432, "step": 100953 }, { "epoch": 10.36828591968779, "grad_norm": 0.1147884726524353, "learning_rate": 0.01, "loss": 1.958, "step": 100956 }, { "epoch": 10.36859402279963, "grad_norm": 0.07784424722194672, "learning_rate": 0.01, "loss": 1.9534, "step": 100959 }, { "epoch": 10.368902125911472, "grad_norm": 0.06763900071382523, "learning_rate": 0.01, "loss": 1.9446, "step": 100962 }, { "epoch": 10.369210229023313, "grad_norm": 0.047785356640815735, "learning_rate": 0.01, "loss": 1.9413, "step": 100965 }, { "epoch": 10.369518332135154, "grad_norm": 0.044531069695949554, "learning_rate": 0.01, "loss": 1.9446, "step": 100968 }, { "epoch": 10.369826435246996, "grad_norm": 0.06693824380636215, "learning_rate": 0.01, "loss": 1.9454, "step": 100971 }, { "epoch": 10.370134538358837, "grad_norm": 0.058132026344537735, "learning_rate": 0.01, "loss": 1.9463, "step": 100974 }, { "epoch": 10.370442641470678, "grad_norm": 0.09861744195222855, "learning_rate": 0.01, "loss": 1.9507, "step": 100977 }, { "epoch": 10.370750744582521, "grad_norm": 0.04031767696142197, "learning_rate": 0.01, "loss": 1.9373, "step": 100980 }, { "epoch": 10.371058847694362, "grad_norm": 0.10672684013843536, "learning_rate": 0.01, "loss": 1.9365, "step": 100983 }, { "epoch": 10.371366950806204, "grad_norm": 0.04467643052339554, "learning_rate": 0.01, "loss": 1.925, "step": 100986 }, { "epoch": 10.371675053918045, "grad_norm": 0.09104414284229279, "learning_rate": 0.01, "loss": 1.9249, "step": 100989 }, { "epoch": 10.371983157029886, "grad_norm": 0.05075051635503769, "learning_rate": 0.01, "loss": 1.931, "step": 100992 }, { "epoch": 10.372291260141727, "grad_norm": 0.11250297725200653, "learning_rate": 0.01, "loss": 1.9432, "step": 100995 }, { "epoch": 10.372599363253569, "grad_norm": 0.03872716799378395, "learning_rate": 0.01, "loss": 1.9653, "step": 100998 }, { "epoch": 10.37290746636541, "grad_norm": 0.08845558017492294, "learning_rate": 0.01, "loss": 1.9279, "step": 101001 }, { "epoch": 10.373215569477251, "grad_norm": 0.07507075369358063, "learning_rate": 0.01, "loss": 1.9448, "step": 101004 }, { "epoch": 10.373523672589092, "grad_norm": 0.0837487205862999, "learning_rate": 0.01, "loss": 1.9655, "step": 101007 }, { "epoch": 10.373831775700934, "grad_norm": 0.06820300966501236, "learning_rate": 0.01, "loss": 1.9377, "step": 101010 }, { "epoch": 10.374139878812777, "grad_norm": 0.11727225035429001, "learning_rate": 0.01, "loss": 1.9653, "step": 101013 }, { "epoch": 10.374447981924618, "grad_norm": 0.06376228481531143, "learning_rate": 0.01, "loss": 1.9198, "step": 101016 }, { "epoch": 10.37475608503646, "grad_norm": 0.035101041197776794, "learning_rate": 0.01, "loss": 1.912, "step": 101019 }, { "epoch": 10.3750641881483, "grad_norm": 0.032660722732543945, "learning_rate": 0.01, "loss": 1.9612, "step": 101022 }, { "epoch": 10.375372291260142, "grad_norm": 0.04537061229348183, "learning_rate": 0.01, "loss": 1.9486, "step": 101025 }, { "epoch": 10.375680394371983, "grad_norm": 0.04433763399720192, "learning_rate": 0.01, "loss": 1.9162, "step": 101028 }, { "epoch": 10.375988497483824, "grad_norm": 0.0950620248913765, "learning_rate": 0.01, "loss": 1.955, "step": 101031 }, { "epoch": 10.376296600595666, "grad_norm": 0.07553868740797043, "learning_rate": 0.01, "loss": 1.9305, "step": 101034 }, { "epoch": 10.376604703707507, "grad_norm": 0.05983571335673332, "learning_rate": 0.01, "loss": 1.9614, "step": 101037 }, { "epoch": 10.376912806819348, "grad_norm": 0.08115866035223007, "learning_rate": 0.01, "loss": 1.9488, "step": 101040 }, { "epoch": 10.37722090993119, "grad_norm": 0.12146302312612534, "learning_rate": 0.01, "loss": 1.9259, "step": 101043 }, { "epoch": 10.377529013043032, "grad_norm": 0.06241605430841446, "learning_rate": 0.01, "loss": 1.9609, "step": 101046 }, { "epoch": 10.377837116154874, "grad_norm": 0.03604963421821594, "learning_rate": 0.01, "loss": 1.9317, "step": 101049 }, { "epoch": 10.378145219266715, "grad_norm": 0.0537908673286438, "learning_rate": 0.01, "loss": 1.9357, "step": 101052 }, { "epoch": 10.378453322378556, "grad_norm": 0.044751640409231186, "learning_rate": 0.01, "loss": 1.9379, "step": 101055 }, { "epoch": 10.378761425490397, "grad_norm": 0.0636962354183197, "learning_rate": 0.01, "loss": 1.971, "step": 101058 }, { "epoch": 10.379069528602239, "grad_norm": 0.11618872731924057, "learning_rate": 0.01, "loss": 1.9588, "step": 101061 }, { "epoch": 10.37937763171408, "grad_norm": 0.1530059278011322, "learning_rate": 0.01, "loss": 1.944, "step": 101064 }, { "epoch": 10.379685734825921, "grad_norm": 0.05721871927380562, "learning_rate": 0.01, "loss": 1.9391, "step": 101067 }, { "epoch": 10.379993837937763, "grad_norm": 0.07816772162914276, "learning_rate": 0.01, "loss": 1.9084, "step": 101070 }, { "epoch": 10.380301941049604, "grad_norm": 0.04495503008365631, "learning_rate": 0.01, "loss": 1.9196, "step": 101073 }, { "epoch": 10.380610044161447, "grad_norm": 0.07357476651668549, "learning_rate": 0.01, "loss": 1.9565, "step": 101076 }, { "epoch": 10.380918147273288, "grad_norm": 0.049054451286792755, "learning_rate": 0.01, "loss": 1.9277, "step": 101079 }, { "epoch": 10.38122625038513, "grad_norm": 0.20468761026859283, "learning_rate": 0.01, "loss": 1.9492, "step": 101082 }, { "epoch": 10.38153435349697, "grad_norm": 0.12951242923736572, "learning_rate": 0.01, "loss": 1.9511, "step": 101085 }, { "epoch": 10.381842456608812, "grad_norm": 0.05691198632121086, "learning_rate": 0.01, "loss": 1.939, "step": 101088 }, { "epoch": 10.382150559720653, "grad_norm": 0.09454532712697983, "learning_rate": 0.01, "loss": 1.9491, "step": 101091 }, { "epoch": 10.382458662832494, "grad_norm": 0.046739835292100906, "learning_rate": 0.01, "loss": 1.9444, "step": 101094 }, { "epoch": 10.382766765944336, "grad_norm": 0.04217229038476944, "learning_rate": 0.01, "loss": 1.935, "step": 101097 }, { "epoch": 10.383074869056177, "grad_norm": 0.0381905771791935, "learning_rate": 0.01, "loss": 1.9181, "step": 101100 }, { "epoch": 10.383382972168018, "grad_norm": 0.03863144665956497, "learning_rate": 0.01, "loss": 1.9581, "step": 101103 }, { "epoch": 10.38369107527986, "grad_norm": 0.068651482462883, "learning_rate": 0.01, "loss": 1.948, "step": 101106 }, { "epoch": 10.383999178391703, "grad_norm": 0.062461916357278824, "learning_rate": 0.01, "loss": 1.9155, "step": 101109 }, { "epoch": 10.384307281503544, "grad_norm": 0.0718960091471672, "learning_rate": 0.01, "loss": 1.92, "step": 101112 }, { "epoch": 10.384615384615385, "grad_norm": 0.044950731098651886, "learning_rate": 0.01, "loss": 1.9236, "step": 101115 }, { "epoch": 10.384923487727226, "grad_norm": 0.0483747161924839, "learning_rate": 0.01, "loss": 1.9585, "step": 101118 }, { "epoch": 10.385231590839068, "grad_norm": 0.05185241624712944, "learning_rate": 0.01, "loss": 1.9307, "step": 101121 }, { "epoch": 10.385539693950909, "grad_norm": 0.03757419437170029, "learning_rate": 0.01, "loss": 1.9381, "step": 101124 }, { "epoch": 10.38584779706275, "grad_norm": 0.12432057410478592, "learning_rate": 0.01, "loss": 1.9363, "step": 101127 }, { "epoch": 10.386155900174591, "grad_norm": 0.0804310292005539, "learning_rate": 0.01, "loss": 1.9596, "step": 101130 }, { "epoch": 10.386464003286433, "grad_norm": 0.09767717868089676, "learning_rate": 0.01, "loss": 1.9572, "step": 101133 }, { "epoch": 10.386772106398274, "grad_norm": 0.05865232273936272, "learning_rate": 0.01, "loss": 1.9365, "step": 101136 }, { "epoch": 10.387080209510117, "grad_norm": 0.04162360355257988, "learning_rate": 0.01, "loss": 1.9218, "step": 101139 }, { "epoch": 10.387388312621958, "grad_norm": 0.12615089118480682, "learning_rate": 0.01, "loss": 1.9722, "step": 101142 }, { "epoch": 10.3876964157338, "grad_norm": 0.04601338505744934, "learning_rate": 0.01, "loss": 1.9277, "step": 101145 }, { "epoch": 10.38800451884564, "grad_norm": 0.0515436977148056, "learning_rate": 0.01, "loss": 1.9737, "step": 101148 }, { "epoch": 10.388312621957482, "grad_norm": 0.05919606238603592, "learning_rate": 0.01, "loss": 1.9582, "step": 101151 }, { "epoch": 10.388620725069323, "grad_norm": 0.2011437565088272, "learning_rate": 0.01, "loss": 1.9336, "step": 101154 }, { "epoch": 10.388928828181164, "grad_norm": 0.13947412371635437, "learning_rate": 0.01, "loss": 1.9438, "step": 101157 }, { "epoch": 10.389236931293006, "grad_norm": 0.10039437562227249, "learning_rate": 0.01, "loss": 1.9558, "step": 101160 }, { "epoch": 10.389545034404847, "grad_norm": 0.052422210574150085, "learning_rate": 0.01, "loss": 1.9592, "step": 101163 }, { "epoch": 10.389853137516688, "grad_norm": 0.03887460008263588, "learning_rate": 0.01, "loss": 1.9138, "step": 101166 }, { "epoch": 10.39016124062853, "grad_norm": 0.06007706746459007, "learning_rate": 0.01, "loss": 1.9528, "step": 101169 }, { "epoch": 10.390469343740373, "grad_norm": 0.05732855945825577, "learning_rate": 0.01, "loss": 1.9497, "step": 101172 }, { "epoch": 10.390777446852214, "grad_norm": 0.0792255848646164, "learning_rate": 0.01, "loss": 1.9246, "step": 101175 }, { "epoch": 10.391085549964055, "grad_norm": 0.06666497141122818, "learning_rate": 0.01, "loss": 1.938, "step": 101178 }, { "epoch": 10.391393653075896, "grad_norm": 0.06329864263534546, "learning_rate": 0.01, "loss": 1.9283, "step": 101181 }, { "epoch": 10.391701756187738, "grad_norm": 0.17151835560798645, "learning_rate": 0.01, "loss": 1.9608, "step": 101184 }, { "epoch": 10.392009859299579, "grad_norm": 0.11637787520885468, "learning_rate": 0.01, "loss": 1.9517, "step": 101187 }, { "epoch": 10.39231796241142, "grad_norm": 0.06190858781337738, "learning_rate": 0.01, "loss": 1.9321, "step": 101190 }, { "epoch": 10.392626065523261, "grad_norm": 0.042335715144872665, "learning_rate": 0.01, "loss": 1.9514, "step": 101193 }, { "epoch": 10.392934168635103, "grad_norm": 0.03620636835694313, "learning_rate": 0.01, "loss": 1.9305, "step": 101196 }, { "epoch": 10.393242271746944, "grad_norm": 0.03169773519039154, "learning_rate": 0.01, "loss": 1.9404, "step": 101199 }, { "epoch": 10.393550374858787, "grad_norm": 0.11039429903030396, "learning_rate": 0.01, "loss": 1.948, "step": 101202 }, { "epoch": 10.393858477970628, "grad_norm": 0.07459361106157303, "learning_rate": 0.01, "loss": 1.9514, "step": 101205 }, { "epoch": 10.39416658108247, "grad_norm": 0.08071883022785187, "learning_rate": 0.01, "loss": 1.9604, "step": 101208 }, { "epoch": 10.39447468419431, "grad_norm": 0.08011837303638458, "learning_rate": 0.01, "loss": 1.909, "step": 101211 }, { "epoch": 10.394782787306152, "grad_norm": 0.08391046524047852, "learning_rate": 0.01, "loss": 1.9526, "step": 101214 }, { "epoch": 10.395090890417993, "grad_norm": 0.07405078411102295, "learning_rate": 0.01, "loss": 1.9309, "step": 101217 }, { "epoch": 10.395398993529835, "grad_norm": 0.10181564837694168, "learning_rate": 0.01, "loss": 1.9333, "step": 101220 }, { "epoch": 10.395707096641676, "grad_norm": 0.0628264769911766, "learning_rate": 0.01, "loss": 1.9253, "step": 101223 }, { "epoch": 10.396015199753517, "grad_norm": 0.06534316390752792, "learning_rate": 0.01, "loss": 1.9547, "step": 101226 }, { "epoch": 10.396323302865358, "grad_norm": 0.07027024030685425, "learning_rate": 0.01, "loss": 1.9264, "step": 101229 }, { "epoch": 10.3966314059772, "grad_norm": 0.09791549295186996, "learning_rate": 0.01, "loss": 1.9473, "step": 101232 }, { "epoch": 10.396939509089043, "grad_norm": 0.1024041548371315, "learning_rate": 0.01, "loss": 1.9402, "step": 101235 }, { "epoch": 10.397247612200884, "grad_norm": 0.06439261138439178, "learning_rate": 0.01, "loss": 1.9436, "step": 101238 }, { "epoch": 10.397555715312725, "grad_norm": 0.04745156317949295, "learning_rate": 0.01, "loss": 1.9482, "step": 101241 }, { "epoch": 10.397863818424566, "grad_norm": 0.0973968431353569, "learning_rate": 0.01, "loss": 1.9081, "step": 101244 }, { "epoch": 10.398171921536408, "grad_norm": 0.0683659166097641, "learning_rate": 0.01, "loss": 1.9517, "step": 101247 }, { "epoch": 10.398480024648249, "grad_norm": 0.04603632539510727, "learning_rate": 0.01, "loss": 1.9402, "step": 101250 }, { "epoch": 10.39878812776009, "grad_norm": 0.11405742913484573, "learning_rate": 0.01, "loss": 1.9522, "step": 101253 }, { "epoch": 10.399096230871931, "grad_norm": 0.046128056943416595, "learning_rate": 0.01, "loss": 1.9336, "step": 101256 }, { "epoch": 10.399404333983773, "grad_norm": 0.04291325807571411, "learning_rate": 0.01, "loss": 1.9375, "step": 101259 }, { "epoch": 10.399712437095614, "grad_norm": 0.029826214537024498, "learning_rate": 0.01, "loss": 1.9179, "step": 101262 }, { "epoch": 10.400020540207455, "grad_norm": 0.09401890635490417, "learning_rate": 0.01, "loss": 1.9421, "step": 101265 }, { "epoch": 10.400328643319298, "grad_norm": 0.04324827343225479, "learning_rate": 0.01, "loss": 1.9362, "step": 101268 }, { "epoch": 10.40063674643114, "grad_norm": 0.05711056664586067, "learning_rate": 0.01, "loss": 1.9391, "step": 101271 }, { "epoch": 10.40094484954298, "grad_norm": 0.07818640768527985, "learning_rate": 0.01, "loss": 1.9257, "step": 101274 }, { "epoch": 10.401252952654822, "grad_norm": 0.05406804382801056, "learning_rate": 0.01, "loss": 1.9308, "step": 101277 }, { "epoch": 10.401561055766663, "grad_norm": 0.051603805273771286, "learning_rate": 0.01, "loss": 1.9404, "step": 101280 }, { "epoch": 10.401869158878505, "grad_norm": 0.11336014419794083, "learning_rate": 0.01, "loss": 1.9296, "step": 101283 }, { "epoch": 10.402177261990346, "grad_norm": 0.07639746367931366, "learning_rate": 0.01, "loss": 1.949, "step": 101286 }, { "epoch": 10.402485365102187, "grad_norm": 0.08636012673377991, "learning_rate": 0.01, "loss": 1.9259, "step": 101289 }, { "epoch": 10.402793468214028, "grad_norm": 0.04350695013999939, "learning_rate": 0.01, "loss": 1.9614, "step": 101292 }, { "epoch": 10.40310157132587, "grad_norm": 0.030775388702750206, "learning_rate": 0.01, "loss": 1.9676, "step": 101295 }, { "epoch": 10.403409674437711, "grad_norm": 0.05844002217054367, "learning_rate": 0.01, "loss": 1.9526, "step": 101298 }, { "epoch": 10.403717777549554, "grad_norm": 0.0463801771402359, "learning_rate": 0.01, "loss": 1.9397, "step": 101301 }, { "epoch": 10.404025880661395, "grad_norm": 0.03980204463005066, "learning_rate": 0.01, "loss": 1.9772, "step": 101304 }, { "epoch": 10.404333983773236, "grad_norm": 0.043364375829696655, "learning_rate": 0.01, "loss": 1.9379, "step": 101307 }, { "epoch": 10.404642086885078, "grad_norm": 0.1211235523223877, "learning_rate": 0.01, "loss": 1.9393, "step": 101310 }, { "epoch": 10.404950189996919, "grad_norm": 0.044283825904130936, "learning_rate": 0.01, "loss": 1.9175, "step": 101313 }, { "epoch": 10.40525829310876, "grad_norm": 0.06269537657499313, "learning_rate": 0.01, "loss": 1.9348, "step": 101316 }, { "epoch": 10.405566396220602, "grad_norm": 0.11686865240335464, "learning_rate": 0.01, "loss": 1.9371, "step": 101319 }, { "epoch": 10.405874499332443, "grad_norm": 0.04533189907670021, "learning_rate": 0.01, "loss": 1.9379, "step": 101322 }, { "epoch": 10.406182602444284, "grad_norm": 0.06254197657108307, "learning_rate": 0.01, "loss": 1.9346, "step": 101325 }, { "epoch": 10.406490705556125, "grad_norm": 0.06646399945020676, "learning_rate": 0.01, "loss": 1.9227, "step": 101328 }, { "epoch": 10.406798808667968, "grad_norm": 0.057041414082050323, "learning_rate": 0.01, "loss": 1.9495, "step": 101331 }, { "epoch": 10.40710691177981, "grad_norm": 0.10227653384208679, "learning_rate": 0.01, "loss": 1.9516, "step": 101334 }, { "epoch": 10.40741501489165, "grad_norm": 0.07010546326637268, "learning_rate": 0.01, "loss": 1.9281, "step": 101337 }, { "epoch": 10.407723118003492, "grad_norm": 0.10337510704994202, "learning_rate": 0.01, "loss": 1.9605, "step": 101340 }, { "epoch": 10.408031221115333, "grad_norm": 0.05713025480508804, "learning_rate": 0.01, "loss": 1.9347, "step": 101343 }, { "epoch": 10.408339324227175, "grad_norm": 0.08766386657953262, "learning_rate": 0.01, "loss": 1.9704, "step": 101346 }, { "epoch": 10.408647427339016, "grad_norm": 0.057308826595544815, "learning_rate": 0.01, "loss": 1.945, "step": 101349 }, { "epoch": 10.408955530450857, "grad_norm": 0.11045978218317032, "learning_rate": 0.01, "loss": 1.9348, "step": 101352 }, { "epoch": 10.409263633562698, "grad_norm": 0.04463179409503937, "learning_rate": 0.01, "loss": 1.9252, "step": 101355 }, { "epoch": 10.40957173667454, "grad_norm": 0.06062712520360947, "learning_rate": 0.01, "loss": 1.9362, "step": 101358 }, { "epoch": 10.409879839786381, "grad_norm": 0.04317406192421913, "learning_rate": 0.01, "loss": 1.9512, "step": 101361 }, { "epoch": 10.410187942898224, "grad_norm": 0.07476109266281128, "learning_rate": 0.01, "loss": 1.9491, "step": 101364 }, { "epoch": 10.410496046010065, "grad_norm": 0.040464941412210464, "learning_rate": 0.01, "loss": 1.9431, "step": 101367 }, { "epoch": 10.410804149121907, "grad_norm": 0.0591270849108696, "learning_rate": 0.01, "loss": 1.9282, "step": 101370 }, { "epoch": 10.411112252233748, "grad_norm": 0.03704477474093437, "learning_rate": 0.01, "loss": 1.9465, "step": 101373 }, { "epoch": 10.411420355345589, "grad_norm": 0.04234340786933899, "learning_rate": 0.01, "loss": 1.9229, "step": 101376 }, { "epoch": 10.41172845845743, "grad_norm": 0.08466865122318268, "learning_rate": 0.01, "loss": 1.9452, "step": 101379 }, { "epoch": 10.412036561569272, "grad_norm": 0.06604104489088058, "learning_rate": 0.01, "loss": 1.9445, "step": 101382 }, { "epoch": 10.412344664681113, "grad_norm": 0.07191555202007294, "learning_rate": 0.01, "loss": 1.9324, "step": 101385 }, { "epoch": 10.412652767792954, "grad_norm": 0.05433400720357895, "learning_rate": 0.01, "loss": 1.9592, "step": 101388 }, { "epoch": 10.412960870904795, "grad_norm": 0.08139751106500626, "learning_rate": 0.01, "loss": 1.9174, "step": 101391 }, { "epoch": 10.413268974016638, "grad_norm": 0.08413826674222946, "learning_rate": 0.01, "loss": 1.9416, "step": 101394 }, { "epoch": 10.41357707712848, "grad_norm": 0.038020603358745575, "learning_rate": 0.01, "loss": 1.9324, "step": 101397 }, { "epoch": 10.413885180240321, "grad_norm": 0.11257899552583694, "learning_rate": 0.01, "loss": 1.9553, "step": 101400 }, { "epoch": 10.414193283352162, "grad_norm": 0.05421124026179314, "learning_rate": 0.01, "loss": 1.9255, "step": 101403 }, { "epoch": 10.414501386464003, "grad_norm": 0.08173589408397675, "learning_rate": 0.01, "loss": 1.9512, "step": 101406 }, { "epoch": 10.414809489575845, "grad_norm": 0.09774955362081528, "learning_rate": 0.01, "loss": 1.9575, "step": 101409 }, { "epoch": 10.415117592687686, "grad_norm": 0.06760906428098679, "learning_rate": 0.01, "loss": 1.9779, "step": 101412 }, { "epoch": 10.415425695799527, "grad_norm": 0.06532621383666992, "learning_rate": 0.01, "loss": 1.9625, "step": 101415 }, { "epoch": 10.415733798911369, "grad_norm": 0.051986172795295715, "learning_rate": 0.01, "loss": 1.9316, "step": 101418 }, { "epoch": 10.41604190202321, "grad_norm": 0.04217454046010971, "learning_rate": 0.01, "loss": 1.92, "step": 101421 }, { "epoch": 10.416350005135051, "grad_norm": 0.04921566694974899, "learning_rate": 0.01, "loss": 1.9542, "step": 101424 }, { "epoch": 10.416658108246894, "grad_norm": 0.09551133960485458, "learning_rate": 0.01, "loss": 1.9507, "step": 101427 }, { "epoch": 10.416966211358735, "grad_norm": 0.07457684725522995, "learning_rate": 0.01, "loss": 1.9288, "step": 101430 }, { "epoch": 10.417274314470577, "grad_norm": 0.05960552766919136, "learning_rate": 0.01, "loss": 1.935, "step": 101433 }, { "epoch": 10.417582417582418, "grad_norm": 0.04713088646531105, "learning_rate": 0.01, "loss": 1.9394, "step": 101436 }, { "epoch": 10.41789052069426, "grad_norm": 0.08014922589063644, "learning_rate": 0.01, "loss": 1.9444, "step": 101439 }, { "epoch": 10.4181986238061, "grad_norm": 0.08689352869987488, "learning_rate": 0.01, "loss": 1.9452, "step": 101442 }, { "epoch": 10.418506726917942, "grad_norm": 0.04182841256260872, "learning_rate": 0.01, "loss": 1.9576, "step": 101445 }, { "epoch": 10.418814830029783, "grad_norm": 0.0788508802652359, "learning_rate": 0.01, "loss": 1.9433, "step": 101448 }, { "epoch": 10.419122933141624, "grad_norm": 0.06277182698249817, "learning_rate": 0.01, "loss": 1.9184, "step": 101451 }, { "epoch": 10.419431036253465, "grad_norm": 0.04268977791070938, "learning_rate": 0.01, "loss": 1.9513, "step": 101454 }, { "epoch": 10.419739139365308, "grad_norm": 0.08060577511787415, "learning_rate": 0.01, "loss": 1.9441, "step": 101457 }, { "epoch": 10.42004724247715, "grad_norm": 0.05500905588269234, "learning_rate": 0.01, "loss": 1.9318, "step": 101460 }, { "epoch": 10.420355345588991, "grad_norm": 0.09350892156362534, "learning_rate": 0.01, "loss": 1.9557, "step": 101463 }, { "epoch": 10.420663448700832, "grad_norm": 0.10302212089300156, "learning_rate": 0.01, "loss": 1.9446, "step": 101466 }, { "epoch": 10.420971551812674, "grad_norm": 0.0546325221657753, "learning_rate": 0.01, "loss": 1.9525, "step": 101469 }, { "epoch": 10.421279654924515, "grad_norm": 0.10181728005409241, "learning_rate": 0.01, "loss": 1.9532, "step": 101472 }, { "epoch": 10.421587758036356, "grad_norm": 0.1607157438993454, "learning_rate": 0.01, "loss": 1.9433, "step": 101475 }, { "epoch": 10.421895861148197, "grad_norm": 0.0734848827123642, "learning_rate": 0.01, "loss": 1.9245, "step": 101478 }, { "epoch": 10.422203964260039, "grad_norm": 0.05575236305594444, "learning_rate": 0.01, "loss": 1.9408, "step": 101481 }, { "epoch": 10.42251206737188, "grad_norm": 0.03977865353226662, "learning_rate": 0.01, "loss": 1.9376, "step": 101484 }, { "epoch": 10.422820170483721, "grad_norm": 0.03199474513530731, "learning_rate": 0.01, "loss": 1.9539, "step": 101487 }, { "epoch": 10.423128273595564, "grad_norm": 0.03404001519083977, "learning_rate": 0.01, "loss": 1.9207, "step": 101490 }, { "epoch": 10.423436376707405, "grad_norm": 0.055631235241889954, "learning_rate": 0.01, "loss": 1.9488, "step": 101493 }, { "epoch": 10.423744479819247, "grad_norm": 0.03958062827587128, "learning_rate": 0.01, "loss": 1.9497, "step": 101496 }, { "epoch": 10.424052582931088, "grad_norm": 0.057350341230630875, "learning_rate": 0.01, "loss": 1.9397, "step": 101499 }, { "epoch": 10.42436068604293, "grad_norm": 0.041089460253715515, "learning_rate": 0.01, "loss": 1.9287, "step": 101502 }, { "epoch": 10.42466878915477, "grad_norm": 0.09128007292747498, "learning_rate": 0.01, "loss": 1.9301, "step": 101505 }, { "epoch": 10.424976892266612, "grad_norm": 0.03688080236315727, "learning_rate": 0.01, "loss": 1.9295, "step": 101508 }, { "epoch": 10.425284995378453, "grad_norm": 0.04330899193882942, "learning_rate": 0.01, "loss": 1.9437, "step": 101511 }, { "epoch": 10.425593098490294, "grad_norm": 0.0362963005900383, "learning_rate": 0.01, "loss": 1.9102, "step": 101514 }, { "epoch": 10.425901201602136, "grad_norm": 0.08184661716222763, "learning_rate": 0.01, "loss": 1.9285, "step": 101517 }, { "epoch": 10.426209304713977, "grad_norm": 0.07895220816135406, "learning_rate": 0.01, "loss": 1.9522, "step": 101520 }, { "epoch": 10.42651740782582, "grad_norm": 0.047668907791376114, "learning_rate": 0.01, "loss": 1.9326, "step": 101523 }, { "epoch": 10.426825510937661, "grad_norm": 0.049812547862529755, "learning_rate": 0.01, "loss": 1.9411, "step": 101526 }, { "epoch": 10.427133614049502, "grad_norm": 0.04061616584658623, "learning_rate": 0.01, "loss": 1.9689, "step": 101529 }, { "epoch": 10.427441717161344, "grad_norm": 0.03955051302909851, "learning_rate": 0.01, "loss": 1.9344, "step": 101532 }, { "epoch": 10.427749820273185, "grad_norm": 0.05154630169272423, "learning_rate": 0.01, "loss": 1.9619, "step": 101535 }, { "epoch": 10.428057923385026, "grad_norm": 0.134513258934021, "learning_rate": 0.01, "loss": 1.9367, "step": 101538 }, { "epoch": 10.428366026496867, "grad_norm": 0.054661769419908524, "learning_rate": 0.01, "loss": 1.9179, "step": 101541 }, { "epoch": 10.428674129608709, "grad_norm": 0.04197604954242706, "learning_rate": 0.01, "loss": 1.9445, "step": 101544 }, { "epoch": 10.42898223272055, "grad_norm": 0.04818270727992058, "learning_rate": 0.01, "loss": 1.9355, "step": 101547 }, { "epoch": 10.429290335832391, "grad_norm": 0.07613711804151535, "learning_rate": 0.01, "loss": 1.9283, "step": 101550 }, { "epoch": 10.429598438944232, "grad_norm": 0.093401700258255, "learning_rate": 0.01, "loss": 1.9435, "step": 101553 }, { "epoch": 10.429906542056075, "grad_norm": 0.08218391984701157, "learning_rate": 0.01, "loss": 1.9576, "step": 101556 }, { "epoch": 10.430214645167917, "grad_norm": 0.09376956522464752, "learning_rate": 0.01, "loss": 1.9456, "step": 101559 }, { "epoch": 10.430522748279758, "grad_norm": 0.06790527701377869, "learning_rate": 0.01, "loss": 1.9482, "step": 101562 }, { "epoch": 10.4308308513916, "grad_norm": 0.04368535429239273, "learning_rate": 0.01, "loss": 1.9427, "step": 101565 }, { "epoch": 10.43113895450344, "grad_norm": 0.03572575002908707, "learning_rate": 0.01, "loss": 1.9228, "step": 101568 }, { "epoch": 10.431447057615282, "grad_norm": 0.10959416627883911, "learning_rate": 0.01, "loss": 1.9174, "step": 101571 }, { "epoch": 10.431755160727123, "grad_norm": 0.10492794215679169, "learning_rate": 0.01, "loss": 1.939, "step": 101574 }, { "epoch": 10.432063263838964, "grad_norm": 0.034721072763204575, "learning_rate": 0.01, "loss": 1.9242, "step": 101577 }, { "epoch": 10.432371366950806, "grad_norm": 0.08810894191265106, "learning_rate": 0.01, "loss": 1.9408, "step": 101580 }, { "epoch": 10.432679470062647, "grad_norm": 0.058836497366428375, "learning_rate": 0.01, "loss": 1.9431, "step": 101583 }, { "epoch": 10.43298757317449, "grad_norm": 0.0684552937746048, "learning_rate": 0.01, "loss": 1.94, "step": 101586 }, { "epoch": 10.433295676286331, "grad_norm": 0.04458843544125557, "learning_rate": 0.01, "loss": 1.916, "step": 101589 }, { "epoch": 10.433603779398172, "grad_norm": 0.03802128881216049, "learning_rate": 0.01, "loss": 1.9182, "step": 101592 }, { "epoch": 10.433911882510014, "grad_norm": 0.04631553217768669, "learning_rate": 0.01, "loss": 1.9479, "step": 101595 }, { "epoch": 10.434219985621855, "grad_norm": 0.0350140780210495, "learning_rate": 0.01, "loss": 1.9384, "step": 101598 }, { "epoch": 10.434528088733696, "grad_norm": 0.06783036887645721, "learning_rate": 0.01, "loss": 1.937, "step": 101601 }, { "epoch": 10.434836191845537, "grad_norm": 0.10419753193855286, "learning_rate": 0.01, "loss": 1.957, "step": 101604 }, { "epoch": 10.435144294957379, "grad_norm": 0.057775016874074936, "learning_rate": 0.01, "loss": 1.946, "step": 101607 }, { "epoch": 10.43545239806922, "grad_norm": 0.034822724759578705, "learning_rate": 0.01, "loss": 1.947, "step": 101610 }, { "epoch": 10.435760501181061, "grad_norm": 0.1210988312959671, "learning_rate": 0.01, "loss": 1.954, "step": 101613 }, { "epoch": 10.436068604292903, "grad_norm": 0.08582430332899094, "learning_rate": 0.01, "loss": 1.9499, "step": 101616 }, { "epoch": 10.436376707404746, "grad_norm": 0.037638887763023376, "learning_rate": 0.01, "loss": 1.9371, "step": 101619 }, { "epoch": 10.436684810516587, "grad_norm": 0.149775430560112, "learning_rate": 0.01, "loss": 1.9408, "step": 101622 }, { "epoch": 10.436992913628428, "grad_norm": 0.036241546273231506, "learning_rate": 0.01, "loss": 1.9383, "step": 101625 }, { "epoch": 10.43730101674027, "grad_norm": 0.051932793110609055, "learning_rate": 0.01, "loss": 1.9335, "step": 101628 }, { "epoch": 10.43760911985211, "grad_norm": 0.05552282929420471, "learning_rate": 0.01, "loss": 1.9574, "step": 101631 }, { "epoch": 10.437917222963952, "grad_norm": 0.05709502846002579, "learning_rate": 0.01, "loss": 1.9204, "step": 101634 }, { "epoch": 10.438225326075793, "grad_norm": 0.055031366646289825, "learning_rate": 0.01, "loss": 1.9087, "step": 101637 }, { "epoch": 10.438533429187634, "grad_norm": 0.04440178722143173, "learning_rate": 0.01, "loss": 1.9287, "step": 101640 }, { "epoch": 10.438841532299476, "grad_norm": 0.033032674342393875, "learning_rate": 0.01, "loss": 1.9582, "step": 101643 }, { "epoch": 10.439149635411317, "grad_norm": 0.06382546573877335, "learning_rate": 0.01, "loss": 1.9386, "step": 101646 }, { "epoch": 10.43945773852316, "grad_norm": 0.15189075469970703, "learning_rate": 0.01, "loss": 1.9292, "step": 101649 }, { "epoch": 10.439765841635001, "grad_norm": 0.035068389028310776, "learning_rate": 0.01, "loss": 1.9566, "step": 101652 }, { "epoch": 10.440073944746842, "grad_norm": 0.1286683976650238, "learning_rate": 0.01, "loss": 1.9619, "step": 101655 }, { "epoch": 10.440382047858684, "grad_norm": 0.12208466976881027, "learning_rate": 0.01, "loss": 1.9305, "step": 101658 }, { "epoch": 10.440690150970525, "grad_norm": 0.08962827175855637, "learning_rate": 0.01, "loss": 1.9649, "step": 101661 }, { "epoch": 10.440998254082366, "grad_norm": 0.05762847512960434, "learning_rate": 0.01, "loss": 1.9569, "step": 101664 }, { "epoch": 10.441306357194208, "grad_norm": 0.032505180686712265, "learning_rate": 0.01, "loss": 1.9406, "step": 101667 }, { "epoch": 10.441614460306049, "grad_norm": 0.04459118843078613, "learning_rate": 0.01, "loss": 1.9359, "step": 101670 }, { "epoch": 10.44192256341789, "grad_norm": 0.04378930851817131, "learning_rate": 0.01, "loss": 1.9293, "step": 101673 }, { "epoch": 10.442230666529731, "grad_norm": 0.036537352949380875, "learning_rate": 0.01, "loss": 1.914, "step": 101676 }, { "epoch": 10.442538769641573, "grad_norm": 0.042318109422922134, "learning_rate": 0.01, "loss": 1.9334, "step": 101679 }, { "epoch": 10.442846872753416, "grad_norm": 0.08487118780612946, "learning_rate": 0.01, "loss": 1.9486, "step": 101682 }, { "epoch": 10.443154975865257, "grad_norm": 0.05887780711054802, "learning_rate": 0.01, "loss": 1.9302, "step": 101685 }, { "epoch": 10.443463078977098, "grad_norm": 0.09170451015233994, "learning_rate": 0.01, "loss": 1.9552, "step": 101688 }, { "epoch": 10.44377118208894, "grad_norm": 0.07705742865800858, "learning_rate": 0.01, "loss": 1.9178, "step": 101691 }, { "epoch": 10.44407928520078, "grad_norm": 0.0543515719473362, "learning_rate": 0.01, "loss": 1.9336, "step": 101694 }, { "epoch": 10.444387388312622, "grad_norm": 0.03692963346838951, "learning_rate": 0.01, "loss": 1.9224, "step": 101697 }, { "epoch": 10.444695491424463, "grad_norm": 0.04865596815943718, "learning_rate": 0.01, "loss": 1.9501, "step": 101700 }, { "epoch": 10.445003594536304, "grad_norm": 0.12262431532144547, "learning_rate": 0.01, "loss": 1.945, "step": 101703 }, { "epoch": 10.445311697648146, "grad_norm": 0.04820642247796059, "learning_rate": 0.01, "loss": 1.9133, "step": 101706 }, { "epoch": 10.445619800759987, "grad_norm": 0.10539522767066956, "learning_rate": 0.01, "loss": 1.954, "step": 101709 }, { "epoch": 10.44592790387183, "grad_norm": 0.0626974105834961, "learning_rate": 0.01, "loss": 1.923, "step": 101712 }, { "epoch": 10.446236006983671, "grad_norm": 0.0897459164261818, "learning_rate": 0.01, "loss": 1.9288, "step": 101715 }, { "epoch": 10.446544110095513, "grad_norm": 0.049581412225961685, "learning_rate": 0.01, "loss": 1.9464, "step": 101718 }, { "epoch": 10.446852213207354, "grad_norm": 0.06107807531952858, "learning_rate": 0.01, "loss": 1.9607, "step": 101721 }, { "epoch": 10.447160316319195, "grad_norm": 0.051195114850997925, "learning_rate": 0.01, "loss": 1.9631, "step": 101724 }, { "epoch": 10.447468419431036, "grad_norm": 0.09900028258562088, "learning_rate": 0.01, "loss": 1.9398, "step": 101727 }, { "epoch": 10.447776522542878, "grad_norm": 0.07909564673900604, "learning_rate": 0.01, "loss": 1.9111, "step": 101730 }, { "epoch": 10.448084625654719, "grad_norm": 0.06978385150432587, "learning_rate": 0.01, "loss": 1.9606, "step": 101733 }, { "epoch": 10.44839272876656, "grad_norm": 0.07636460661888123, "learning_rate": 0.01, "loss": 1.9447, "step": 101736 }, { "epoch": 10.448700831878401, "grad_norm": 0.0727100670337677, "learning_rate": 0.01, "loss": 1.9431, "step": 101739 }, { "epoch": 10.449008934990243, "grad_norm": 0.06627271324396133, "learning_rate": 0.01, "loss": 1.9353, "step": 101742 }, { "epoch": 10.449317038102086, "grad_norm": 0.07906875014305115, "learning_rate": 0.01, "loss": 1.9355, "step": 101745 }, { "epoch": 10.449625141213927, "grad_norm": 0.08305513858795166, "learning_rate": 0.01, "loss": 1.9396, "step": 101748 }, { "epoch": 10.449933244325768, "grad_norm": 0.05429422855377197, "learning_rate": 0.01, "loss": 1.9184, "step": 101751 }, { "epoch": 10.45024134743761, "grad_norm": 0.10072019696235657, "learning_rate": 0.01, "loss": 1.929, "step": 101754 }, { "epoch": 10.45054945054945, "grad_norm": 0.053927551954984665, "learning_rate": 0.01, "loss": 1.923, "step": 101757 }, { "epoch": 10.450857553661292, "grad_norm": 0.08865250647068024, "learning_rate": 0.01, "loss": 1.9562, "step": 101760 }, { "epoch": 10.451165656773133, "grad_norm": 0.08605709671974182, "learning_rate": 0.01, "loss": 1.9301, "step": 101763 }, { "epoch": 10.451473759884975, "grad_norm": 0.06711055338382721, "learning_rate": 0.01, "loss": 1.9263, "step": 101766 }, { "epoch": 10.451781862996816, "grad_norm": 0.06331571936607361, "learning_rate": 0.01, "loss": 1.9276, "step": 101769 }, { "epoch": 10.452089966108657, "grad_norm": 0.08312960714101791, "learning_rate": 0.01, "loss": 1.9389, "step": 101772 }, { "epoch": 10.452398069220498, "grad_norm": 0.03663955256342888, "learning_rate": 0.01, "loss": 1.9439, "step": 101775 }, { "epoch": 10.452706172332341, "grad_norm": 0.036953091621398926, "learning_rate": 0.01, "loss": 1.9359, "step": 101778 }, { "epoch": 10.453014275444183, "grad_norm": 0.12208108603954315, "learning_rate": 0.01, "loss": 1.9243, "step": 101781 }, { "epoch": 10.453322378556024, "grad_norm": 0.06827596575021744, "learning_rate": 0.01, "loss": 1.9339, "step": 101784 }, { "epoch": 10.453630481667865, "grad_norm": 0.061575282365083694, "learning_rate": 0.01, "loss": 1.9593, "step": 101787 }, { "epoch": 10.453938584779706, "grad_norm": 0.04751850664615631, "learning_rate": 0.01, "loss": 1.928, "step": 101790 }, { "epoch": 10.454246687891548, "grad_norm": 0.1076505035161972, "learning_rate": 0.01, "loss": 1.9326, "step": 101793 }, { "epoch": 10.454554791003389, "grad_norm": 0.04396897554397583, "learning_rate": 0.01, "loss": 1.9619, "step": 101796 }, { "epoch": 10.45486289411523, "grad_norm": 0.03534824028611183, "learning_rate": 0.01, "loss": 1.9147, "step": 101799 }, { "epoch": 10.455170997227071, "grad_norm": 0.05526803433895111, "learning_rate": 0.01, "loss": 1.9595, "step": 101802 }, { "epoch": 10.455479100338913, "grad_norm": 0.049421172589063644, "learning_rate": 0.01, "loss": 1.9281, "step": 101805 }, { "epoch": 10.455787203450754, "grad_norm": 0.06431230902671814, "learning_rate": 0.01, "loss": 1.9283, "step": 101808 }, { "epoch": 10.456095306562597, "grad_norm": 0.08008962869644165, "learning_rate": 0.01, "loss": 1.9354, "step": 101811 }, { "epoch": 10.456403409674438, "grad_norm": 0.05697495862841606, "learning_rate": 0.01, "loss": 1.9405, "step": 101814 }, { "epoch": 10.45671151278628, "grad_norm": 0.11419016867876053, "learning_rate": 0.01, "loss": 1.9591, "step": 101817 }, { "epoch": 10.45701961589812, "grad_norm": 0.07900203764438629, "learning_rate": 0.01, "loss": 1.9352, "step": 101820 }, { "epoch": 10.457327719009962, "grad_norm": 0.08858697861433029, "learning_rate": 0.01, "loss": 1.9454, "step": 101823 }, { "epoch": 10.457635822121803, "grad_norm": 0.051916588097810745, "learning_rate": 0.01, "loss": 1.9297, "step": 101826 }, { "epoch": 10.457943925233645, "grad_norm": 0.03870150074362755, "learning_rate": 0.01, "loss": 1.9223, "step": 101829 }, { "epoch": 10.458252028345486, "grad_norm": 0.03717373311519623, "learning_rate": 0.01, "loss": 1.9284, "step": 101832 }, { "epoch": 10.458560131457327, "grad_norm": 0.12929514050483704, "learning_rate": 0.01, "loss": 1.9491, "step": 101835 }, { "epoch": 10.458868234569168, "grad_norm": 0.11549792438745499, "learning_rate": 0.01, "loss": 1.9375, "step": 101838 }, { "epoch": 10.459176337681011, "grad_norm": 0.08777003735303879, "learning_rate": 0.01, "loss": 1.9416, "step": 101841 }, { "epoch": 10.459484440792853, "grad_norm": 0.063051737844944, "learning_rate": 0.01, "loss": 1.9157, "step": 101844 }, { "epoch": 10.459792543904694, "grad_norm": 0.04587935656309128, "learning_rate": 0.01, "loss": 1.9184, "step": 101847 }, { "epoch": 10.460100647016535, "grad_norm": 0.09982280433177948, "learning_rate": 0.01, "loss": 1.9496, "step": 101850 }, { "epoch": 10.460408750128376, "grad_norm": 0.09921959787607193, "learning_rate": 0.01, "loss": 1.9435, "step": 101853 }, { "epoch": 10.460716853240218, "grad_norm": 0.038731105625629425, "learning_rate": 0.01, "loss": 1.9291, "step": 101856 }, { "epoch": 10.461024956352059, "grad_norm": 0.07030004262924194, "learning_rate": 0.01, "loss": 1.9715, "step": 101859 }, { "epoch": 10.4613330594639, "grad_norm": 0.052664730697870255, "learning_rate": 0.01, "loss": 1.9249, "step": 101862 }, { "epoch": 10.461641162575742, "grad_norm": 0.11606428027153015, "learning_rate": 0.01, "loss": 1.9453, "step": 101865 }, { "epoch": 10.461949265687583, "grad_norm": 0.05401783809065819, "learning_rate": 0.01, "loss": 1.952, "step": 101868 }, { "epoch": 10.462257368799424, "grad_norm": 0.03583219647407532, "learning_rate": 0.01, "loss": 1.9131, "step": 101871 }, { "epoch": 10.462565471911267, "grad_norm": 0.07720806449651718, "learning_rate": 0.01, "loss": 1.9553, "step": 101874 }, { "epoch": 10.462873575023108, "grad_norm": 0.054585494101047516, "learning_rate": 0.01, "loss": 1.9477, "step": 101877 }, { "epoch": 10.46318167813495, "grad_norm": 0.06677903980016708, "learning_rate": 0.01, "loss": 1.9365, "step": 101880 }, { "epoch": 10.46348978124679, "grad_norm": 0.03949226066470146, "learning_rate": 0.01, "loss": 1.9401, "step": 101883 }, { "epoch": 10.463797884358632, "grad_norm": 0.03900604695081711, "learning_rate": 0.01, "loss": 1.9517, "step": 101886 }, { "epoch": 10.464105987470473, "grad_norm": 0.08953657001256943, "learning_rate": 0.01, "loss": 1.9211, "step": 101889 }, { "epoch": 10.464414090582315, "grad_norm": 0.09946227818727493, "learning_rate": 0.01, "loss": 1.9583, "step": 101892 }, { "epoch": 10.464722193694156, "grad_norm": 0.036433037370443344, "learning_rate": 0.01, "loss": 1.9369, "step": 101895 }, { "epoch": 10.465030296805997, "grad_norm": 0.09698110073804855, "learning_rate": 0.01, "loss": 1.9194, "step": 101898 }, { "epoch": 10.465338399917838, "grad_norm": 0.04699701443314552, "learning_rate": 0.01, "loss": 1.9369, "step": 101901 }, { "epoch": 10.465646503029681, "grad_norm": 0.03788859769701958, "learning_rate": 0.01, "loss": 1.9374, "step": 101904 }, { "epoch": 10.465954606141523, "grad_norm": 0.09247510880231857, "learning_rate": 0.01, "loss": 1.9344, "step": 101907 }, { "epoch": 10.466262709253364, "grad_norm": 0.09411049634218216, "learning_rate": 0.01, "loss": 1.9356, "step": 101910 }, { "epoch": 10.466570812365205, "grad_norm": 0.12619060277938843, "learning_rate": 0.01, "loss": 1.9317, "step": 101913 }, { "epoch": 10.466878915477047, "grad_norm": 0.17733120918273926, "learning_rate": 0.01, "loss": 1.9407, "step": 101916 }, { "epoch": 10.467187018588888, "grad_norm": 0.10386303067207336, "learning_rate": 0.01, "loss": 1.9572, "step": 101919 }, { "epoch": 10.467495121700729, "grad_norm": 0.035836637020111084, "learning_rate": 0.01, "loss": 1.9555, "step": 101922 }, { "epoch": 10.46780322481257, "grad_norm": 0.050569795072078705, "learning_rate": 0.01, "loss": 1.9527, "step": 101925 }, { "epoch": 10.468111327924412, "grad_norm": 0.04953821375966072, "learning_rate": 0.01, "loss": 1.9504, "step": 101928 }, { "epoch": 10.468419431036253, "grad_norm": 0.05620476230978966, "learning_rate": 0.01, "loss": 1.9457, "step": 101931 }, { "epoch": 10.468727534148094, "grad_norm": 0.05553643777966499, "learning_rate": 0.01, "loss": 1.9291, "step": 101934 }, { "epoch": 10.469035637259937, "grad_norm": 0.04494090378284454, "learning_rate": 0.01, "loss": 1.9303, "step": 101937 }, { "epoch": 10.469343740371778, "grad_norm": 0.09270351380109787, "learning_rate": 0.01, "loss": 1.9434, "step": 101940 }, { "epoch": 10.46965184348362, "grad_norm": 0.04817723482847214, "learning_rate": 0.01, "loss": 1.9512, "step": 101943 }, { "epoch": 10.469959946595461, "grad_norm": 0.09144774824380875, "learning_rate": 0.01, "loss": 1.9449, "step": 101946 }, { "epoch": 10.470268049707302, "grad_norm": 0.048304855823516846, "learning_rate": 0.01, "loss": 1.9453, "step": 101949 }, { "epoch": 10.470576152819143, "grad_norm": 0.08919715136289597, "learning_rate": 0.01, "loss": 1.9153, "step": 101952 }, { "epoch": 10.470884255930985, "grad_norm": 0.07856735587120056, "learning_rate": 0.01, "loss": 1.9139, "step": 101955 }, { "epoch": 10.471192359042826, "grad_norm": 0.06114446744322777, "learning_rate": 0.01, "loss": 1.9668, "step": 101958 }, { "epoch": 10.471500462154667, "grad_norm": 0.04188235104084015, "learning_rate": 0.01, "loss": 1.9376, "step": 101961 }, { "epoch": 10.471808565266508, "grad_norm": 0.04990050569176674, "learning_rate": 0.01, "loss": 1.9523, "step": 101964 }, { "epoch": 10.472116668378352, "grad_norm": 0.08834641426801682, "learning_rate": 0.01, "loss": 1.918, "step": 101967 }, { "epoch": 10.472424771490193, "grad_norm": 0.09818597882986069, "learning_rate": 0.01, "loss": 1.9389, "step": 101970 }, { "epoch": 10.472732874602034, "grad_norm": 0.048872750252485275, "learning_rate": 0.01, "loss": 1.9321, "step": 101973 }, { "epoch": 10.473040977713875, "grad_norm": 0.03154008090496063, "learning_rate": 0.01, "loss": 1.9388, "step": 101976 }, { "epoch": 10.473349080825717, "grad_norm": 0.0439290851354599, "learning_rate": 0.01, "loss": 1.9491, "step": 101979 }, { "epoch": 10.473657183937558, "grad_norm": 0.0607743076980114, "learning_rate": 0.01, "loss": 1.9351, "step": 101982 }, { "epoch": 10.473965287049399, "grad_norm": 0.06029101461172104, "learning_rate": 0.01, "loss": 1.9486, "step": 101985 }, { "epoch": 10.47427339016124, "grad_norm": 0.06238150969147682, "learning_rate": 0.01, "loss": 1.93, "step": 101988 }, { "epoch": 10.474581493273082, "grad_norm": 0.09172660112380981, "learning_rate": 0.01, "loss": 1.9296, "step": 101991 }, { "epoch": 10.474889596384923, "grad_norm": 0.04956614598631859, "learning_rate": 0.01, "loss": 1.9323, "step": 101994 }, { "epoch": 10.475197699496764, "grad_norm": 0.08379212021827698, "learning_rate": 0.01, "loss": 1.9786, "step": 101997 }, { "epoch": 10.475505802608607, "grad_norm": 0.04099489748477936, "learning_rate": 0.01, "loss": 1.9397, "step": 102000 }, { "epoch": 10.475813905720448, "grad_norm": 0.0487617626786232, "learning_rate": 0.01, "loss": 1.9155, "step": 102003 }, { "epoch": 10.47612200883229, "grad_norm": 0.039811212569475174, "learning_rate": 0.01, "loss": 1.9208, "step": 102006 }, { "epoch": 10.476430111944131, "grad_norm": 0.044633690267801285, "learning_rate": 0.01, "loss": 1.951, "step": 102009 }, { "epoch": 10.476738215055972, "grad_norm": 0.05821068957448006, "learning_rate": 0.01, "loss": 1.9393, "step": 102012 }, { "epoch": 10.477046318167814, "grad_norm": 0.05007709562778473, "learning_rate": 0.01, "loss": 1.935, "step": 102015 }, { "epoch": 10.477354421279655, "grad_norm": 0.04428219422698021, "learning_rate": 0.01, "loss": 1.9668, "step": 102018 }, { "epoch": 10.477662524391496, "grad_norm": 0.0576540008187294, "learning_rate": 0.01, "loss": 1.9195, "step": 102021 }, { "epoch": 10.477970627503337, "grad_norm": 0.032921202480793, "learning_rate": 0.01, "loss": 1.9203, "step": 102024 }, { "epoch": 10.478278730615179, "grad_norm": 0.06370113790035248, "learning_rate": 0.01, "loss": 1.9217, "step": 102027 }, { "epoch": 10.47858683372702, "grad_norm": 0.11116142570972443, "learning_rate": 0.01, "loss": 1.9374, "step": 102030 }, { "epoch": 10.478894936838863, "grad_norm": 0.10910597443580627, "learning_rate": 0.01, "loss": 1.9371, "step": 102033 }, { "epoch": 10.479203039950704, "grad_norm": 0.06832683831453323, "learning_rate": 0.01, "loss": 1.9522, "step": 102036 }, { "epoch": 10.479511143062545, "grad_norm": 0.036917220801115036, "learning_rate": 0.01, "loss": 1.9313, "step": 102039 }, { "epoch": 10.479819246174387, "grad_norm": 0.0481659471988678, "learning_rate": 0.01, "loss": 1.955, "step": 102042 }, { "epoch": 10.480127349286228, "grad_norm": 0.043729156255722046, "learning_rate": 0.01, "loss": 1.9406, "step": 102045 }, { "epoch": 10.48043545239807, "grad_norm": 0.046008024364709854, "learning_rate": 0.01, "loss": 1.9495, "step": 102048 }, { "epoch": 10.48074355550991, "grad_norm": 0.09844762831926346, "learning_rate": 0.01, "loss": 1.9406, "step": 102051 }, { "epoch": 10.481051658621752, "grad_norm": 0.0938354879617691, "learning_rate": 0.01, "loss": 1.9308, "step": 102054 }, { "epoch": 10.481359761733593, "grad_norm": 0.07487953454256058, "learning_rate": 0.01, "loss": 1.9379, "step": 102057 }, { "epoch": 10.481667864845434, "grad_norm": 0.06973642110824585, "learning_rate": 0.01, "loss": 1.9497, "step": 102060 }, { "epoch": 10.481975967957275, "grad_norm": 0.04177610203623772, "learning_rate": 0.01, "loss": 1.952, "step": 102063 }, { "epoch": 10.482284071069119, "grad_norm": 0.042998723685741425, "learning_rate": 0.01, "loss": 1.9346, "step": 102066 }, { "epoch": 10.48259217418096, "grad_norm": 0.04426990821957588, "learning_rate": 0.01, "loss": 1.9413, "step": 102069 }, { "epoch": 10.482900277292801, "grad_norm": 0.10405926406383514, "learning_rate": 0.01, "loss": 1.94, "step": 102072 }, { "epoch": 10.483208380404642, "grad_norm": 0.06819596141576767, "learning_rate": 0.01, "loss": 1.9431, "step": 102075 }, { "epoch": 10.483516483516484, "grad_norm": 0.07998234778642654, "learning_rate": 0.01, "loss": 1.9541, "step": 102078 }, { "epoch": 10.483824586628325, "grad_norm": 0.06735380738973618, "learning_rate": 0.01, "loss": 1.9389, "step": 102081 }, { "epoch": 10.484132689740166, "grad_norm": 0.1081424206495285, "learning_rate": 0.01, "loss": 1.9466, "step": 102084 }, { "epoch": 10.484440792852007, "grad_norm": 0.05134006217122078, "learning_rate": 0.01, "loss": 1.9601, "step": 102087 }, { "epoch": 10.484748895963849, "grad_norm": 0.0856146588921547, "learning_rate": 0.01, "loss": 1.9184, "step": 102090 }, { "epoch": 10.48505699907569, "grad_norm": 0.19568829238414764, "learning_rate": 0.01, "loss": 1.9444, "step": 102093 }, { "epoch": 10.485365102187533, "grad_norm": 0.1644512414932251, "learning_rate": 0.01, "loss": 1.9104, "step": 102096 }, { "epoch": 10.485673205299374, "grad_norm": 0.1425795555114746, "learning_rate": 0.01, "loss": 1.9489, "step": 102099 }, { "epoch": 10.485981308411215, "grad_norm": 0.07443909347057343, "learning_rate": 0.01, "loss": 1.9225, "step": 102102 }, { "epoch": 10.486289411523057, "grad_norm": 0.09227898716926575, "learning_rate": 0.01, "loss": 1.9448, "step": 102105 }, { "epoch": 10.486597514634898, "grad_norm": 0.07184717059135437, "learning_rate": 0.01, "loss": 1.9384, "step": 102108 }, { "epoch": 10.48690561774674, "grad_norm": 0.06727568805217743, "learning_rate": 0.01, "loss": 1.9452, "step": 102111 }, { "epoch": 10.48721372085858, "grad_norm": 0.06257406622171402, "learning_rate": 0.01, "loss": 1.9164, "step": 102114 }, { "epoch": 10.487521823970422, "grad_norm": 0.057932376861572266, "learning_rate": 0.01, "loss": 1.9237, "step": 102117 }, { "epoch": 10.487829927082263, "grad_norm": 0.04455142468214035, "learning_rate": 0.01, "loss": 1.9359, "step": 102120 }, { "epoch": 10.488138030194104, "grad_norm": 0.0342220775783062, "learning_rate": 0.01, "loss": 1.9233, "step": 102123 }, { "epoch": 10.488446133305946, "grad_norm": 0.08886317163705826, "learning_rate": 0.01, "loss": 1.9207, "step": 102126 }, { "epoch": 10.488754236417789, "grad_norm": 0.05419706553220749, "learning_rate": 0.01, "loss": 1.9131, "step": 102129 }, { "epoch": 10.48906233952963, "grad_norm": 0.10033708810806274, "learning_rate": 0.01, "loss": 1.9322, "step": 102132 }, { "epoch": 10.489370442641471, "grad_norm": 0.12334150075912476, "learning_rate": 0.01, "loss": 1.9227, "step": 102135 }, { "epoch": 10.489678545753312, "grad_norm": 0.061110034584999084, "learning_rate": 0.01, "loss": 1.9488, "step": 102138 }, { "epoch": 10.489986648865154, "grad_norm": 0.10176794975996017, "learning_rate": 0.01, "loss": 1.9479, "step": 102141 }, { "epoch": 10.490294751976995, "grad_norm": 0.07273289561271667, "learning_rate": 0.01, "loss": 1.9152, "step": 102144 }, { "epoch": 10.490602855088836, "grad_norm": 0.055310558527708054, "learning_rate": 0.01, "loss": 1.9319, "step": 102147 }, { "epoch": 10.490910958200677, "grad_norm": 0.10140859335660934, "learning_rate": 0.01, "loss": 1.9519, "step": 102150 }, { "epoch": 10.491219061312519, "grad_norm": 0.051482249051332474, "learning_rate": 0.01, "loss": 1.9513, "step": 102153 }, { "epoch": 10.49152716442436, "grad_norm": 0.08374358713626862, "learning_rate": 0.01, "loss": 1.9065, "step": 102156 }, { "epoch": 10.491835267536203, "grad_norm": 0.050834473222494125, "learning_rate": 0.01, "loss": 1.9301, "step": 102159 }, { "epoch": 10.492143370648044, "grad_norm": 0.10886992514133453, "learning_rate": 0.01, "loss": 1.9669, "step": 102162 }, { "epoch": 10.492451473759886, "grad_norm": 0.06643300503492355, "learning_rate": 0.01, "loss": 1.9449, "step": 102165 }, { "epoch": 10.492759576871727, "grad_norm": 0.050651658326387405, "learning_rate": 0.01, "loss": 1.9238, "step": 102168 }, { "epoch": 10.493067679983568, "grad_norm": 0.04459983855485916, "learning_rate": 0.01, "loss": 1.9524, "step": 102171 }, { "epoch": 10.49337578309541, "grad_norm": 0.0398244671523571, "learning_rate": 0.01, "loss": 1.9262, "step": 102174 }, { "epoch": 10.49368388620725, "grad_norm": 0.06146949157118797, "learning_rate": 0.01, "loss": 1.9323, "step": 102177 }, { "epoch": 10.493991989319092, "grad_norm": 0.06933523714542389, "learning_rate": 0.01, "loss": 1.9347, "step": 102180 }, { "epoch": 10.494300092430933, "grad_norm": 0.08496151119470596, "learning_rate": 0.01, "loss": 1.924, "step": 102183 }, { "epoch": 10.494608195542774, "grad_norm": 0.06656196713447571, "learning_rate": 0.01, "loss": 1.9318, "step": 102186 }, { "epoch": 10.494916298654616, "grad_norm": 0.057561956346035004, "learning_rate": 0.01, "loss": 1.9302, "step": 102189 }, { "epoch": 10.495224401766459, "grad_norm": 0.03765184432268143, "learning_rate": 0.01, "loss": 1.8994, "step": 102192 }, { "epoch": 10.4955325048783, "grad_norm": 0.055984582751989365, "learning_rate": 0.01, "loss": 1.962, "step": 102195 }, { "epoch": 10.495840607990141, "grad_norm": 0.06702415645122528, "learning_rate": 0.01, "loss": 1.9355, "step": 102198 }, { "epoch": 10.496148711101982, "grad_norm": 0.04814969003200531, "learning_rate": 0.01, "loss": 1.9386, "step": 102201 }, { "epoch": 10.496456814213824, "grad_norm": 0.03827301785349846, "learning_rate": 0.01, "loss": 1.9467, "step": 102204 }, { "epoch": 10.496764917325665, "grad_norm": 0.04301337152719498, "learning_rate": 0.01, "loss": 1.9546, "step": 102207 }, { "epoch": 10.497073020437506, "grad_norm": 0.04337724670767784, "learning_rate": 0.01, "loss": 1.9078, "step": 102210 }, { "epoch": 10.497381123549347, "grad_norm": 0.05614280700683594, "learning_rate": 0.01, "loss": 1.9709, "step": 102213 }, { "epoch": 10.497689226661189, "grad_norm": 0.10362491011619568, "learning_rate": 0.01, "loss": 1.9567, "step": 102216 }, { "epoch": 10.49799732977303, "grad_norm": 0.05749976262450218, "learning_rate": 0.01, "loss": 1.9314, "step": 102219 }, { "epoch": 10.498305432884871, "grad_norm": 0.05132078006863594, "learning_rate": 0.01, "loss": 1.9304, "step": 102222 }, { "epoch": 10.498613535996714, "grad_norm": 0.08391030132770538, "learning_rate": 0.01, "loss": 1.9571, "step": 102225 }, { "epoch": 10.498921639108556, "grad_norm": 0.08488337695598602, "learning_rate": 0.01, "loss": 1.9315, "step": 102228 }, { "epoch": 10.499229742220397, "grad_norm": 0.07492703944444656, "learning_rate": 0.01, "loss": 1.9616, "step": 102231 }, { "epoch": 10.499537845332238, "grad_norm": 0.0787576213479042, "learning_rate": 0.01, "loss": 1.916, "step": 102234 }, { "epoch": 10.49984594844408, "grad_norm": 0.0703982412815094, "learning_rate": 0.01, "loss": 1.9418, "step": 102237 }, { "epoch": 10.50015405155592, "grad_norm": 0.08891353011131287, "learning_rate": 0.01, "loss": 1.9252, "step": 102240 }, { "epoch": 10.500462154667762, "grad_norm": 0.12258651852607727, "learning_rate": 0.01, "loss": 1.9604, "step": 102243 }, { "epoch": 10.500770257779603, "grad_norm": 0.04051290079951286, "learning_rate": 0.01, "loss": 1.9382, "step": 102246 }, { "epoch": 10.501078360891444, "grad_norm": 0.07038848102092743, "learning_rate": 0.01, "loss": 1.9278, "step": 102249 }, { "epoch": 10.501386464003286, "grad_norm": 0.059617362916469574, "learning_rate": 0.01, "loss": 1.9614, "step": 102252 }, { "epoch": 10.501694567115127, "grad_norm": 0.06750741600990295, "learning_rate": 0.01, "loss": 1.9432, "step": 102255 }, { "epoch": 10.50200267022697, "grad_norm": 0.03963152691721916, "learning_rate": 0.01, "loss": 1.9366, "step": 102258 }, { "epoch": 10.502310773338811, "grad_norm": 0.05788528546690941, "learning_rate": 0.01, "loss": 1.9285, "step": 102261 }, { "epoch": 10.502618876450653, "grad_norm": 0.0345892570912838, "learning_rate": 0.01, "loss": 1.9326, "step": 102264 }, { "epoch": 10.502926979562494, "grad_norm": 0.04929973930120468, "learning_rate": 0.01, "loss": 1.9383, "step": 102267 }, { "epoch": 10.503235082674335, "grad_norm": 0.03237371891736984, "learning_rate": 0.01, "loss": 1.9268, "step": 102270 }, { "epoch": 10.503543185786176, "grad_norm": 0.12326578050851822, "learning_rate": 0.01, "loss": 1.9485, "step": 102273 }, { "epoch": 10.503851288898018, "grad_norm": 0.050269100815057755, "learning_rate": 0.01, "loss": 1.9504, "step": 102276 }, { "epoch": 10.504159392009859, "grad_norm": 0.08985825628042221, "learning_rate": 0.01, "loss": 1.9157, "step": 102279 }, { "epoch": 10.5044674951217, "grad_norm": 0.07561146467924118, "learning_rate": 0.01, "loss": 1.9414, "step": 102282 }, { "epoch": 10.504775598233541, "grad_norm": 0.10341837257146835, "learning_rate": 0.01, "loss": 1.9252, "step": 102285 }, { "epoch": 10.505083701345384, "grad_norm": 0.06874041259288788, "learning_rate": 0.01, "loss": 1.9581, "step": 102288 }, { "epoch": 10.505391804457226, "grad_norm": 0.05333573743700981, "learning_rate": 0.01, "loss": 1.9272, "step": 102291 }, { "epoch": 10.505699907569067, "grad_norm": 0.05619170144200325, "learning_rate": 0.01, "loss": 1.9462, "step": 102294 }, { "epoch": 10.506008010680908, "grad_norm": 0.06462277472019196, "learning_rate": 0.01, "loss": 1.9202, "step": 102297 }, { "epoch": 10.50631611379275, "grad_norm": 0.05119762942194939, "learning_rate": 0.01, "loss": 1.9262, "step": 102300 }, { "epoch": 10.50662421690459, "grad_norm": 0.10214772075414658, "learning_rate": 0.01, "loss": 1.9505, "step": 102303 }, { "epoch": 10.506932320016432, "grad_norm": 0.06918077915906906, "learning_rate": 0.01, "loss": 1.9229, "step": 102306 }, { "epoch": 10.507240423128273, "grad_norm": 0.06975413113832474, "learning_rate": 0.01, "loss": 1.9455, "step": 102309 }, { "epoch": 10.507548526240114, "grad_norm": 0.06599710136651993, "learning_rate": 0.01, "loss": 1.9354, "step": 102312 }, { "epoch": 10.507856629351956, "grad_norm": 0.08133608102798462, "learning_rate": 0.01, "loss": 1.9256, "step": 102315 }, { "epoch": 10.508164732463797, "grad_norm": 0.0877838209271431, "learning_rate": 0.01, "loss": 1.9435, "step": 102318 }, { "epoch": 10.50847283557564, "grad_norm": 0.09431187063455582, "learning_rate": 0.01, "loss": 1.9297, "step": 102321 }, { "epoch": 10.508780938687481, "grad_norm": 0.04923112317919731, "learning_rate": 0.01, "loss": 1.9563, "step": 102324 }, { "epoch": 10.509089041799323, "grad_norm": 0.10650596022605896, "learning_rate": 0.01, "loss": 1.9503, "step": 102327 }, { "epoch": 10.509397144911164, "grad_norm": 0.03952709957957268, "learning_rate": 0.01, "loss": 1.9308, "step": 102330 }, { "epoch": 10.509705248023005, "grad_norm": 0.05486638471484184, "learning_rate": 0.01, "loss": 1.9355, "step": 102333 }, { "epoch": 10.510013351134846, "grad_norm": 0.10510875284671783, "learning_rate": 0.01, "loss": 1.9508, "step": 102336 }, { "epoch": 10.510321454246688, "grad_norm": 0.08444318920373917, "learning_rate": 0.01, "loss": 1.9657, "step": 102339 }, { "epoch": 10.510629557358529, "grad_norm": 0.1074652373790741, "learning_rate": 0.01, "loss": 1.947, "step": 102342 }, { "epoch": 10.51093766047037, "grad_norm": 0.08538160473108292, "learning_rate": 0.01, "loss": 1.9488, "step": 102345 }, { "epoch": 10.511245763582211, "grad_norm": 0.04091689735651016, "learning_rate": 0.01, "loss": 1.8937, "step": 102348 }, { "epoch": 10.511553866694054, "grad_norm": 0.05738305673003197, "learning_rate": 0.01, "loss": 1.9461, "step": 102351 }, { "epoch": 10.511861969805896, "grad_norm": 0.0682215765118599, "learning_rate": 0.01, "loss": 1.9232, "step": 102354 }, { "epoch": 10.512170072917737, "grad_norm": 0.05720492824912071, "learning_rate": 0.01, "loss": 1.9349, "step": 102357 }, { "epoch": 10.512478176029578, "grad_norm": 0.04315927252173424, "learning_rate": 0.01, "loss": 1.9167, "step": 102360 }, { "epoch": 10.51278627914142, "grad_norm": 0.049331653863191605, "learning_rate": 0.01, "loss": 1.9591, "step": 102363 }, { "epoch": 10.51309438225326, "grad_norm": 0.03660670667886734, "learning_rate": 0.01, "loss": 1.9188, "step": 102366 }, { "epoch": 10.513402485365102, "grad_norm": 0.04299522191286087, "learning_rate": 0.01, "loss": 1.9172, "step": 102369 }, { "epoch": 10.513710588476943, "grad_norm": 0.1256340593099594, "learning_rate": 0.01, "loss": 1.9437, "step": 102372 }, { "epoch": 10.514018691588785, "grad_norm": 0.13235409557819366, "learning_rate": 0.01, "loss": 1.934, "step": 102375 }, { "epoch": 10.514326794700626, "grad_norm": 0.05709142982959747, "learning_rate": 0.01, "loss": 1.962, "step": 102378 }, { "epoch": 10.514634897812467, "grad_norm": 0.07259286940097809, "learning_rate": 0.01, "loss": 1.9114, "step": 102381 }, { "epoch": 10.51494300092431, "grad_norm": 0.047436416149139404, "learning_rate": 0.01, "loss": 1.945, "step": 102384 }, { "epoch": 10.515251104036151, "grad_norm": 0.03879249840974808, "learning_rate": 0.01, "loss": 1.9315, "step": 102387 }, { "epoch": 10.515559207147993, "grad_norm": 0.050321247428655624, "learning_rate": 0.01, "loss": 1.954, "step": 102390 }, { "epoch": 10.515867310259834, "grad_norm": 0.06300079822540283, "learning_rate": 0.01, "loss": 1.9446, "step": 102393 }, { "epoch": 10.516175413371675, "grad_norm": 0.0426388643682003, "learning_rate": 0.01, "loss": 1.9098, "step": 102396 }, { "epoch": 10.516483516483516, "grad_norm": 0.04434233158826828, "learning_rate": 0.01, "loss": 1.9369, "step": 102399 }, { "epoch": 10.516791619595358, "grad_norm": 0.06005145609378815, "learning_rate": 0.01, "loss": 1.9375, "step": 102402 }, { "epoch": 10.517099722707199, "grad_norm": 0.10316505283117294, "learning_rate": 0.01, "loss": 1.9144, "step": 102405 }, { "epoch": 10.51740782581904, "grad_norm": 0.08546032756567001, "learning_rate": 0.01, "loss": 1.9306, "step": 102408 }, { "epoch": 10.517715928930881, "grad_norm": 0.06833061575889587, "learning_rate": 0.01, "loss": 1.9315, "step": 102411 }, { "epoch": 10.518024032042725, "grad_norm": 0.07763389497995377, "learning_rate": 0.01, "loss": 1.9252, "step": 102414 }, { "epoch": 10.518332135154566, "grad_norm": 0.13648200035095215, "learning_rate": 0.01, "loss": 1.9213, "step": 102417 }, { "epoch": 10.518640238266407, "grad_norm": 0.0665363073348999, "learning_rate": 0.01, "loss": 1.9281, "step": 102420 }, { "epoch": 10.518948341378248, "grad_norm": 0.07532433420419693, "learning_rate": 0.01, "loss": 1.9389, "step": 102423 }, { "epoch": 10.51925644449009, "grad_norm": 0.05814901739358902, "learning_rate": 0.01, "loss": 1.9155, "step": 102426 }, { "epoch": 10.51956454760193, "grad_norm": 0.06840399652719498, "learning_rate": 0.01, "loss": 1.9538, "step": 102429 }, { "epoch": 10.519872650713772, "grad_norm": 0.059958960860967636, "learning_rate": 0.01, "loss": 1.9646, "step": 102432 }, { "epoch": 10.520180753825613, "grad_norm": 0.041250091046094894, "learning_rate": 0.01, "loss": 1.9431, "step": 102435 }, { "epoch": 10.520488856937455, "grad_norm": 0.03929082676768303, "learning_rate": 0.01, "loss": 1.9273, "step": 102438 }, { "epoch": 10.520796960049296, "grad_norm": 0.09511935710906982, "learning_rate": 0.01, "loss": 1.9233, "step": 102441 }, { "epoch": 10.521105063161137, "grad_norm": 0.1492692083120346, "learning_rate": 0.01, "loss": 1.9473, "step": 102444 }, { "epoch": 10.52141316627298, "grad_norm": 0.12860162556171417, "learning_rate": 0.01, "loss": 1.9283, "step": 102447 }, { "epoch": 10.521721269384821, "grad_norm": 0.06535381823778152, "learning_rate": 0.01, "loss": 1.9247, "step": 102450 }, { "epoch": 10.522029372496663, "grad_norm": 0.03897741809487343, "learning_rate": 0.01, "loss": 1.9347, "step": 102453 }, { "epoch": 10.522337475608504, "grad_norm": 0.03247740864753723, "learning_rate": 0.01, "loss": 1.9237, "step": 102456 }, { "epoch": 10.522645578720345, "grad_norm": 0.05719994381070137, "learning_rate": 0.01, "loss": 1.9092, "step": 102459 }, { "epoch": 10.522953681832186, "grad_norm": 0.0653923973441124, "learning_rate": 0.01, "loss": 1.9467, "step": 102462 }, { "epoch": 10.523261784944028, "grad_norm": 0.051299069076776505, "learning_rate": 0.01, "loss": 1.9235, "step": 102465 }, { "epoch": 10.523569888055869, "grad_norm": 0.04528390243649483, "learning_rate": 0.01, "loss": 1.9331, "step": 102468 }, { "epoch": 10.52387799116771, "grad_norm": 0.03322979062795639, "learning_rate": 0.01, "loss": 1.9365, "step": 102471 }, { "epoch": 10.524186094279552, "grad_norm": 0.05223941057920456, "learning_rate": 0.01, "loss": 1.9592, "step": 102474 }, { "epoch": 10.524494197391395, "grad_norm": 0.04564807564020157, "learning_rate": 0.01, "loss": 1.9539, "step": 102477 }, { "epoch": 10.524802300503236, "grad_norm": 0.07500727474689484, "learning_rate": 0.01, "loss": 1.9129, "step": 102480 }, { "epoch": 10.525110403615077, "grad_norm": 0.12562263011932373, "learning_rate": 0.01, "loss": 1.9447, "step": 102483 }, { "epoch": 10.525418506726918, "grad_norm": 0.09475285559892654, "learning_rate": 0.01, "loss": 1.9413, "step": 102486 }, { "epoch": 10.52572660983876, "grad_norm": 0.10476790368556976, "learning_rate": 0.01, "loss": 1.9532, "step": 102489 }, { "epoch": 10.526034712950601, "grad_norm": 0.03594356030225754, "learning_rate": 0.01, "loss": 1.9241, "step": 102492 }, { "epoch": 10.526342816062442, "grad_norm": 0.03649017587304115, "learning_rate": 0.01, "loss": 1.9505, "step": 102495 }, { "epoch": 10.526650919174283, "grad_norm": 0.10896097868680954, "learning_rate": 0.01, "loss": 1.9519, "step": 102498 }, { "epoch": 10.526959022286125, "grad_norm": 0.06398046016693115, "learning_rate": 0.01, "loss": 1.9284, "step": 102501 }, { "epoch": 10.527267125397966, "grad_norm": 0.03608668968081474, "learning_rate": 0.01, "loss": 1.9352, "step": 102504 }, { "epoch": 10.527575228509807, "grad_norm": 0.03560887649655342, "learning_rate": 0.01, "loss": 1.93, "step": 102507 }, { "epoch": 10.527883331621648, "grad_norm": 0.07564496248960495, "learning_rate": 0.01, "loss": 1.9596, "step": 102510 }, { "epoch": 10.528191434733492, "grad_norm": 0.07473336160182953, "learning_rate": 0.01, "loss": 1.9351, "step": 102513 }, { "epoch": 10.528499537845333, "grad_norm": 0.09763288497924805, "learning_rate": 0.01, "loss": 1.9491, "step": 102516 }, { "epoch": 10.528807640957174, "grad_norm": 0.09780536592006683, "learning_rate": 0.01, "loss": 1.9218, "step": 102519 }, { "epoch": 10.529115744069015, "grad_norm": 0.11341667920351028, "learning_rate": 0.01, "loss": 1.9372, "step": 102522 }, { "epoch": 10.529423847180857, "grad_norm": 0.04304501786828041, "learning_rate": 0.01, "loss": 1.9298, "step": 102525 }, { "epoch": 10.529731950292698, "grad_norm": 0.038570135831832886, "learning_rate": 0.01, "loss": 1.9382, "step": 102528 }, { "epoch": 10.530040053404539, "grad_norm": 0.08291329443454742, "learning_rate": 0.01, "loss": 1.9573, "step": 102531 }, { "epoch": 10.53034815651638, "grad_norm": 0.06479398161172867, "learning_rate": 0.01, "loss": 1.9259, "step": 102534 }, { "epoch": 10.530656259628222, "grad_norm": 0.10525523871183395, "learning_rate": 0.01, "loss": 1.9578, "step": 102537 }, { "epoch": 10.530964362740063, "grad_norm": 0.06369463354349136, "learning_rate": 0.01, "loss": 1.9327, "step": 102540 }, { "epoch": 10.531272465851906, "grad_norm": 0.04059941694140434, "learning_rate": 0.01, "loss": 1.9218, "step": 102543 }, { "epoch": 10.531580568963747, "grad_norm": 0.07877116650342941, "learning_rate": 0.01, "loss": 1.949, "step": 102546 }, { "epoch": 10.531888672075588, "grad_norm": 0.06322876363992691, "learning_rate": 0.01, "loss": 1.9413, "step": 102549 }, { "epoch": 10.53219677518743, "grad_norm": 0.09018049389123917, "learning_rate": 0.01, "loss": 1.9418, "step": 102552 }, { "epoch": 10.532504878299271, "grad_norm": 0.05731950327754021, "learning_rate": 0.01, "loss": 1.9517, "step": 102555 }, { "epoch": 10.532812981411112, "grad_norm": 0.055027980357408524, "learning_rate": 0.01, "loss": 1.9349, "step": 102558 }, { "epoch": 10.533121084522953, "grad_norm": 0.044410835951566696, "learning_rate": 0.01, "loss": 1.9295, "step": 102561 }, { "epoch": 10.533429187634795, "grad_norm": 0.07688567042350769, "learning_rate": 0.01, "loss": 1.9153, "step": 102564 }, { "epoch": 10.533737290746636, "grad_norm": 0.11015530675649643, "learning_rate": 0.01, "loss": 1.9578, "step": 102567 }, { "epoch": 10.534045393858477, "grad_norm": 0.06602126359939575, "learning_rate": 0.01, "loss": 1.9265, "step": 102570 }, { "epoch": 10.534353496970319, "grad_norm": 0.04021180421113968, "learning_rate": 0.01, "loss": 1.9563, "step": 102573 }, { "epoch": 10.534661600082162, "grad_norm": 0.039868198335170746, "learning_rate": 0.01, "loss": 1.9556, "step": 102576 }, { "epoch": 10.534969703194003, "grad_norm": 0.06292813271284103, "learning_rate": 0.01, "loss": 1.933, "step": 102579 }, { "epoch": 10.535277806305844, "grad_norm": 0.034579694271087646, "learning_rate": 0.01, "loss": 1.9424, "step": 102582 }, { "epoch": 10.535585909417685, "grad_norm": 0.036958981305360794, "learning_rate": 0.01, "loss": 1.9412, "step": 102585 }, { "epoch": 10.535894012529527, "grad_norm": 0.04140496253967285, "learning_rate": 0.01, "loss": 1.9542, "step": 102588 }, { "epoch": 10.536202115641368, "grad_norm": 0.12174369394779205, "learning_rate": 0.01, "loss": 1.9535, "step": 102591 }, { "epoch": 10.53651021875321, "grad_norm": 0.033348649740219116, "learning_rate": 0.01, "loss": 1.926, "step": 102594 }, { "epoch": 10.53681832186505, "grad_norm": 0.03767925873398781, "learning_rate": 0.01, "loss": 1.9548, "step": 102597 }, { "epoch": 10.537126424976892, "grad_norm": 0.044103484600782394, "learning_rate": 0.01, "loss": 1.9418, "step": 102600 }, { "epoch": 10.537434528088733, "grad_norm": 0.05621744319796562, "learning_rate": 0.01, "loss": 1.9271, "step": 102603 }, { "epoch": 10.537742631200576, "grad_norm": 0.05371059104800224, "learning_rate": 0.01, "loss": 1.9306, "step": 102606 }, { "epoch": 10.538050734312417, "grad_norm": 0.07096249610185623, "learning_rate": 0.01, "loss": 1.9415, "step": 102609 }, { "epoch": 10.538358837424258, "grad_norm": 0.04149243235588074, "learning_rate": 0.01, "loss": 1.9272, "step": 102612 }, { "epoch": 10.5386669405361, "grad_norm": 0.04006730020046234, "learning_rate": 0.01, "loss": 1.9519, "step": 102615 }, { "epoch": 10.538975043647941, "grad_norm": 0.07221797108650208, "learning_rate": 0.01, "loss": 1.9557, "step": 102618 }, { "epoch": 10.539283146759782, "grad_norm": 0.20590007305145264, "learning_rate": 0.01, "loss": 1.9281, "step": 102621 }, { "epoch": 10.539591249871624, "grad_norm": 0.08784258365631104, "learning_rate": 0.01, "loss": 1.9679, "step": 102624 }, { "epoch": 10.539899352983465, "grad_norm": 0.07793600857257843, "learning_rate": 0.01, "loss": 1.924, "step": 102627 }, { "epoch": 10.540207456095306, "grad_norm": 0.08263092488050461, "learning_rate": 0.01, "loss": 1.9632, "step": 102630 }, { "epoch": 10.540515559207147, "grad_norm": 0.051660630851984024, "learning_rate": 0.01, "loss": 1.9256, "step": 102633 }, { "epoch": 10.540823662318989, "grad_norm": 0.03541038930416107, "learning_rate": 0.01, "loss": 1.9541, "step": 102636 }, { "epoch": 10.541131765430832, "grad_norm": 0.04703596606850624, "learning_rate": 0.01, "loss": 1.9171, "step": 102639 }, { "epoch": 10.541439868542673, "grad_norm": 0.04202953726053238, "learning_rate": 0.01, "loss": 1.9149, "step": 102642 }, { "epoch": 10.541747971654514, "grad_norm": 0.05148185044527054, "learning_rate": 0.01, "loss": 1.8994, "step": 102645 }, { "epoch": 10.542056074766355, "grad_norm": 0.04630628600716591, "learning_rate": 0.01, "loss": 1.9329, "step": 102648 }, { "epoch": 10.542364177878197, "grad_norm": 0.058917704969644547, "learning_rate": 0.01, "loss": 1.9476, "step": 102651 }, { "epoch": 10.542672280990038, "grad_norm": 0.050363462418317795, "learning_rate": 0.01, "loss": 1.9541, "step": 102654 }, { "epoch": 10.54298038410188, "grad_norm": 0.07656794041395187, "learning_rate": 0.01, "loss": 1.937, "step": 102657 }, { "epoch": 10.54328848721372, "grad_norm": 0.09826540946960449, "learning_rate": 0.01, "loss": 1.9501, "step": 102660 }, { "epoch": 10.543596590325562, "grad_norm": 0.09988114982843399, "learning_rate": 0.01, "loss": 1.9248, "step": 102663 }, { "epoch": 10.543904693437403, "grad_norm": 0.09750048071146011, "learning_rate": 0.01, "loss": 1.9605, "step": 102666 }, { "epoch": 10.544212796549246, "grad_norm": 0.0635206550359726, "learning_rate": 0.01, "loss": 1.959, "step": 102669 }, { "epoch": 10.544520899661087, "grad_norm": 0.05383175611495972, "learning_rate": 0.01, "loss": 1.9656, "step": 102672 }, { "epoch": 10.544829002772929, "grad_norm": 0.037810489535331726, "learning_rate": 0.01, "loss": 1.9378, "step": 102675 }, { "epoch": 10.54513710588477, "grad_norm": 0.04186122491955757, "learning_rate": 0.01, "loss": 1.9591, "step": 102678 }, { "epoch": 10.545445208996611, "grad_norm": 0.05252018943428993, "learning_rate": 0.01, "loss": 1.9359, "step": 102681 }, { "epoch": 10.545753312108452, "grad_norm": 0.13041871786117554, "learning_rate": 0.01, "loss": 1.9318, "step": 102684 }, { "epoch": 10.546061415220294, "grad_norm": 0.11638039350509644, "learning_rate": 0.01, "loss": 1.9322, "step": 102687 }, { "epoch": 10.546369518332135, "grad_norm": 0.052283596247434616, "learning_rate": 0.01, "loss": 1.9383, "step": 102690 }, { "epoch": 10.546677621443976, "grad_norm": 0.05509389564394951, "learning_rate": 0.01, "loss": 1.9236, "step": 102693 }, { "epoch": 10.546985724555817, "grad_norm": 0.08381091803312302, "learning_rate": 0.01, "loss": 1.9691, "step": 102696 }, { "epoch": 10.547293827667659, "grad_norm": 0.05065052583813667, "learning_rate": 0.01, "loss": 1.9521, "step": 102699 }, { "epoch": 10.547601930779502, "grad_norm": 0.035547565668821335, "learning_rate": 0.01, "loss": 1.9373, "step": 102702 }, { "epoch": 10.547910033891343, "grad_norm": 0.051477137953042984, "learning_rate": 0.01, "loss": 1.9263, "step": 102705 }, { "epoch": 10.548218137003184, "grad_norm": 0.07911818474531174, "learning_rate": 0.01, "loss": 1.9377, "step": 102708 }, { "epoch": 10.548526240115025, "grad_norm": 0.0840601772069931, "learning_rate": 0.01, "loss": 1.9506, "step": 102711 }, { "epoch": 10.548834343226867, "grad_norm": 0.05134335905313492, "learning_rate": 0.01, "loss": 1.9408, "step": 102714 }, { "epoch": 10.549142446338708, "grad_norm": 0.039568942040205, "learning_rate": 0.01, "loss": 1.9446, "step": 102717 }, { "epoch": 10.54945054945055, "grad_norm": 0.13316182792186737, "learning_rate": 0.01, "loss": 1.9631, "step": 102720 }, { "epoch": 10.54975865256239, "grad_norm": 0.04146268963813782, "learning_rate": 0.01, "loss": 1.9124, "step": 102723 }, { "epoch": 10.550066755674232, "grad_norm": 0.06591855734586716, "learning_rate": 0.01, "loss": 1.9449, "step": 102726 }, { "epoch": 10.550374858786073, "grad_norm": 0.05796660855412483, "learning_rate": 0.01, "loss": 1.9258, "step": 102729 }, { "epoch": 10.550682961897916, "grad_norm": 0.08629636466503143, "learning_rate": 0.01, "loss": 1.9113, "step": 102732 }, { "epoch": 10.550991065009757, "grad_norm": 0.0613933801651001, "learning_rate": 0.01, "loss": 1.9429, "step": 102735 }, { "epoch": 10.551299168121599, "grad_norm": 0.038255706429481506, "learning_rate": 0.01, "loss": 1.9395, "step": 102738 }, { "epoch": 10.55160727123344, "grad_norm": 0.0523841418325901, "learning_rate": 0.01, "loss": 1.9469, "step": 102741 }, { "epoch": 10.551915374345281, "grad_norm": 0.05940907076001167, "learning_rate": 0.01, "loss": 1.9395, "step": 102744 }, { "epoch": 10.552223477457122, "grad_norm": 0.043213821947574615, "learning_rate": 0.01, "loss": 1.933, "step": 102747 }, { "epoch": 10.552531580568964, "grad_norm": 0.13153788447380066, "learning_rate": 0.01, "loss": 1.9358, "step": 102750 }, { "epoch": 10.552839683680805, "grad_norm": 0.047780994325876236, "learning_rate": 0.01, "loss": 1.961, "step": 102753 }, { "epoch": 10.553147786792646, "grad_norm": 0.0988832414150238, "learning_rate": 0.01, "loss": 1.9323, "step": 102756 }, { "epoch": 10.553455889904487, "grad_norm": 0.06222406029701233, "learning_rate": 0.01, "loss": 1.9624, "step": 102759 }, { "epoch": 10.553763993016329, "grad_norm": 0.052710965275764465, "learning_rate": 0.01, "loss": 1.9507, "step": 102762 }, { "epoch": 10.55407209612817, "grad_norm": 0.04257237911224365, "learning_rate": 0.01, "loss": 1.9278, "step": 102765 }, { "epoch": 10.554380199240013, "grad_norm": 0.06545194983482361, "learning_rate": 0.01, "loss": 1.9268, "step": 102768 }, { "epoch": 10.554688302351854, "grad_norm": 0.13155023753643036, "learning_rate": 0.01, "loss": 1.9378, "step": 102771 }, { "epoch": 10.554996405463696, "grad_norm": 0.06403207033872604, "learning_rate": 0.01, "loss": 1.9245, "step": 102774 }, { "epoch": 10.555304508575537, "grad_norm": 0.0546175017952919, "learning_rate": 0.01, "loss": 1.9621, "step": 102777 }, { "epoch": 10.555612611687378, "grad_norm": 0.04145312309265137, "learning_rate": 0.01, "loss": 1.958, "step": 102780 }, { "epoch": 10.55592071479922, "grad_norm": 0.03439625725150108, "learning_rate": 0.01, "loss": 1.9174, "step": 102783 }, { "epoch": 10.55622881791106, "grad_norm": 0.0312015563249588, "learning_rate": 0.01, "loss": 1.9106, "step": 102786 }, { "epoch": 10.556536921022902, "grad_norm": 0.09145013242959976, "learning_rate": 0.01, "loss": 1.9405, "step": 102789 }, { "epoch": 10.556845024134743, "grad_norm": 0.04190470650792122, "learning_rate": 0.01, "loss": 1.9364, "step": 102792 }, { "epoch": 10.557153127246584, "grad_norm": 0.07646419107913971, "learning_rate": 0.01, "loss": 1.9265, "step": 102795 }, { "epoch": 10.557461230358427, "grad_norm": 0.04356531798839569, "learning_rate": 0.01, "loss": 1.9419, "step": 102798 }, { "epoch": 10.557769333470269, "grad_norm": 0.05221903324127197, "learning_rate": 0.01, "loss": 1.9331, "step": 102801 }, { "epoch": 10.55807743658211, "grad_norm": 0.04501466825604439, "learning_rate": 0.01, "loss": 1.9519, "step": 102804 }, { "epoch": 10.558385539693951, "grad_norm": 0.11923180520534515, "learning_rate": 0.01, "loss": 1.95, "step": 102807 }, { "epoch": 10.558693642805792, "grad_norm": 0.05566087365150452, "learning_rate": 0.01, "loss": 1.8893, "step": 102810 }, { "epoch": 10.559001745917634, "grad_norm": 0.10343495011329651, "learning_rate": 0.01, "loss": 1.9317, "step": 102813 }, { "epoch": 10.559309849029475, "grad_norm": 0.055790793150663376, "learning_rate": 0.01, "loss": 1.93, "step": 102816 }, { "epoch": 10.559617952141316, "grad_norm": 0.03885180503129959, "learning_rate": 0.01, "loss": 1.9313, "step": 102819 }, { "epoch": 10.559926055253158, "grad_norm": 0.0437774695456028, "learning_rate": 0.01, "loss": 1.915, "step": 102822 }, { "epoch": 10.560234158364999, "grad_norm": 0.044454824179410934, "learning_rate": 0.01, "loss": 1.9389, "step": 102825 }, { "epoch": 10.56054226147684, "grad_norm": 0.13780072331428528, "learning_rate": 0.01, "loss": 1.9379, "step": 102828 }, { "epoch": 10.560850364588683, "grad_norm": 0.04543958604335785, "learning_rate": 0.01, "loss": 1.9341, "step": 102831 }, { "epoch": 10.561158467700524, "grad_norm": 0.040763672441244125, "learning_rate": 0.01, "loss": 1.9495, "step": 102834 }, { "epoch": 10.561466570812366, "grad_norm": 0.03325378894805908, "learning_rate": 0.01, "loss": 1.9284, "step": 102837 }, { "epoch": 10.561774673924207, "grad_norm": 0.0563448891043663, "learning_rate": 0.01, "loss": 1.9567, "step": 102840 }, { "epoch": 10.562082777036048, "grad_norm": 0.0488412082195282, "learning_rate": 0.01, "loss": 1.9492, "step": 102843 }, { "epoch": 10.56239088014789, "grad_norm": 0.065616674721241, "learning_rate": 0.01, "loss": 1.9498, "step": 102846 }, { "epoch": 10.56269898325973, "grad_norm": 0.10472216457128525, "learning_rate": 0.01, "loss": 1.9329, "step": 102849 }, { "epoch": 10.563007086371572, "grad_norm": 0.08060616999864578, "learning_rate": 0.01, "loss": 1.9507, "step": 102852 }, { "epoch": 10.563315189483413, "grad_norm": 0.062388233840465546, "learning_rate": 0.01, "loss": 1.9356, "step": 102855 }, { "epoch": 10.563623292595254, "grad_norm": 0.09207983314990997, "learning_rate": 0.01, "loss": 1.948, "step": 102858 }, { "epoch": 10.563931395707097, "grad_norm": 0.06631029397249222, "learning_rate": 0.01, "loss": 1.9374, "step": 102861 }, { "epoch": 10.564239498818939, "grad_norm": 0.07797393947839737, "learning_rate": 0.01, "loss": 1.9556, "step": 102864 }, { "epoch": 10.56454760193078, "grad_norm": 0.06673528254032135, "learning_rate": 0.01, "loss": 1.9539, "step": 102867 }, { "epoch": 10.564855705042621, "grad_norm": 0.09968683868646622, "learning_rate": 0.01, "loss": 1.9553, "step": 102870 }, { "epoch": 10.565163808154463, "grad_norm": 0.03743911162018776, "learning_rate": 0.01, "loss": 1.9539, "step": 102873 }, { "epoch": 10.565471911266304, "grad_norm": 0.07518713921308517, "learning_rate": 0.01, "loss": 1.9589, "step": 102876 }, { "epoch": 10.565780014378145, "grad_norm": 0.035093970596790314, "learning_rate": 0.01, "loss": 1.9346, "step": 102879 }, { "epoch": 10.566088117489986, "grad_norm": 0.060128387063741684, "learning_rate": 0.01, "loss": 1.9547, "step": 102882 }, { "epoch": 10.566396220601828, "grad_norm": 0.03313090279698372, "learning_rate": 0.01, "loss": 1.9165, "step": 102885 }, { "epoch": 10.566704323713669, "grad_norm": 0.03142146021127701, "learning_rate": 0.01, "loss": 1.9297, "step": 102888 }, { "epoch": 10.56701242682551, "grad_norm": 0.05178171396255493, "learning_rate": 0.01, "loss": 1.9527, "step": 102891 }, { "epoch": 10.567320529937353, "grad_norm": 0.06281552463769913, "learning_rate": 0.01, "loss": 1.9438, "step": 102894 }, { "epoch": 10.567628633049194, "grad_norm": 0.10062888264656067, "learning_rate": 0.01, "loss": 1.934, "step": 102897 }, { "epoch": 10.567936736161036, "grad_norm": 0.08972020447254181, "learning_rate": 0.01, "loss": 1.9204, "step": 102900 }, { "epoch": 10.568244839272877, "grad_norm": 0.07290536165237427, "learning_rate": 0.01, "loss": 1.9346, "step": 102903 }, { "epoch": 10.568552942384718, "grad_norm": 0.10600350797176361, "learning_rate": 0.01, "loss": 1.9583, "step": 102906 }, { "epoch": 10.56886104549656, "grad_norm": 0.07006838172674179, "learning_rate": 0.01, "loss": 1.9312, "step": 102909 }, { "epoch": 10.5691691486084, "grad_norm": 0.03484833613038063, "learning_rate": 0.01, "loss": 1.9391, "step": 102912 }, { "epoch": 10.569477251720242, "grad_norm": 0.09895322471857071, "learning_rate": 0.01, "loss": 1.9245, "step": 102915 }, { "epoch": 10.569785354832083, "grad_norm": 0.08670248836278915, "learning_rate": 0.01, "loss": 1.9361, "step": 102918 }, { "epoch": 10.570093457943925, "grad_norm": 0.10926210880279541, "learning_rate": 0.01, "loss": 1.9309, "step": 102921 }, { "epoch": 10.570401561055768, "grad_norm": 0.05001296475529671, "learning_rate": 0.01, "loss": 1.9444, "step": 102924 }, { "epoch": 10.570709664167609, "grad_norm": 0.05949382856488228, "learning_rate": 0.01, "loss": 1.9646, "step": 102927 }, { "epoch": 10.57101776727945, "grad_norm": 0.10504626482725143, "learning_rate": 0.01, "loss": 1.9722, "step": 102930 }, { "epoch": 10.571325870391291, "grad_norm": 0.037058062851428986, "learning_rate": 0.01, "loss": 1.9658, "step": 102933 }, { "epoch": 10.571633973503133, "grad_norm": 0.06955143809318542, "learning_rate": 0.01, "loss": 1.9179, "step": 102936 }, { "epoch": 10.571942076614974, "grad_norm": 0.08500155806541443, "learning_rate": 0.01, "loss": 1.9345, "step": 102939 }, { "epoch": 10.572250179726815, "grad_norm": 0.05796537175774574, "learning_rate": 0.01, "loss": 1.9423, "step": 102942 }, { "epoch": 10.572558282838656, "grad_norm": 0.10697682201862335, "learning_rate": 0.01, "loss": 1.9477, "step": 102945 }, { "epoch": 10.572866385950498, "grad_norm": 0.08648983389139175, "learning_rate": 0.01, "loss": 1.9481, "step": 102948 }, { "epoch": 10.573174489062339, "grad_norm": 0.0635012835264206, "learning_rate": 0.01, "loss": 1.9446, "step": 102951 }, { "epoch": 10.57348259217418, "grad_norm": 0.08696910738945007, "learning_rate": 0.01, "loss": 1.9141, "step": 102954 }, { "epoch": 10.573790695286021, "grad_norm": 0.0655401274561882, "learning_rate": 0.01, "loss": 1.9426, "step": 102957 }, { "epoch": 10.574098798397864, "grad_norm": 0.03808220475912094, "learning_rate": 0.01, "loss": 1.9573, "step": 102960 }, { "epoch": 10.574406901509706, "grad_norm": 0.10985107719898224, "learning_rate": 0.01, "loss": 1.9375, "step": 102963 }, { "epoch": 10.574715004621547, "grad_norm": 0.04316844791173935, "learning_rate": 0.01, "loss": 1.9127, "step": 102966 }, { "epoch": 10.575023107733388, "grad_norm": 0.07953254133462906, "learning_rate": 0.01, "loss": 1.9323, "step": 102969 }, { "epoch": 10.57533121084523, "grad_norm": 0.09557713568210602, "learning_rate": 0.01, "loss": 1.9249, "step": 102972 }, { "epoch": 10.57563931395707, "grad_norm": 0.11650662869215012, "learning_rate": 0.01, "loss": 1.9557, "step": 102975 }, { "epoch": 10.575947417068912, "grad_norm": 0.056457098573446274, "learning_rate": 0.01, "loss": 1.9507, "step": 102978 }, { "epoch": 10.576255520180753, "grad_norm": 0.04905838891863823, "learning_rate": 0.01, "loss": 1.9221, "step": 102981 }, { "epoch": 10.576563623292595, "grad_norm": 0.13630616664886475, "learning_rate": 0.01, "loss": 1.9198, "step": 102984 }, { "epoch": 10.576871726404438, "grad_norm": 0.04883322864770889, "learning_rate": 0.01, "loss": 1.9436, "step": 102987 }, { "epoch": 10.577179829516279, "grad_norm": 0.055827610194683075, "learning_rate": 0.01, "loss": 1.9347, "step": 102990 }, { "epoch": 10.57748793262812, "grad_norm": 0.04752153530716896, "learning_rate": 0.01, "loss": 1.9396, "step": 102993 }, { "epoch": 10.577796035739961, "grad_norm": 0.03894266113638878, "learning_rate": 0.01, "loss": 1.9409, "step": 102996 }, { "epoch": 10.578104138851803, "grad_norm": 0.1049915999174118, "learning_rate": 0.01, "loss": 1.947, "step": 102999 }, { "epoch": 10.578412241963644, "grad_norm": 0.047973405569791794, "learning_rate": 0.01, "loss": 1.9403, "step": 103002 }, { "epoch": 10.578720345075485, "grad_norm": 0.03760508447885513, "learning_rate": 0.01, "loss": 1.9284, "step": 103005 }, { "epoch": 10.579028448187326, "grad_norm": 0.036963608115911484, "learning_rate": 0.01, "loss": 1.9332, "step": 103008 }, { "epoch": 10.579336551299168, "grad_norm": 0.052010320127010345, "learning_rate": 0.01, "loss": 1.9576, "step": 103011 }, { "epoch": 10.579644654411009, "grad_norm": 0.054368756711483, "learning_rate": 0.01, "loss": 1.9305, "step": 103014 }, { "epoch": 10.57995275752285, "grad_norm": 0.04135025665163994, "learning_rate": 0.01, "loss": 1.9428, "step": 103017 }, { "epoch": 10.580260860634692, "grad_norm": 0.1422293335199356, "learning_rate": 0.01, "loss": 1.9371, "step": 103020 }, { "epoch": 10.580568963746535, "grad_norm": 0.10143115371465683, "learning_rate": 0.01, "loss": 1.9372, "step": 103023 }, { "epoch": 10.580877066858376, "grad_norm": 0.03662586957216263, "learning_rate": 0.01, "loss": 1.9401, "step": 103026 }, { "epoch": 10.581185169970217, "grad_norm": 0.06366636604070663, "learning_rate": 0.01, "loss": 1.9561, "step": 103029 }, { "epoch": 10.581493273082058, "grad_norm": 0.06303658336400986, "learning_rate": 0.01, "loss": 1.9356, "step": 103032 }, { "epoch": 10.5818013761939, "grad_norm": 0.07589997351169586, "learning_rate": 0.01, "loss": 1.9342, "step": 103035 }, { "epoch": 10.58210947930574, "grad_norm": 0.06703688204288483, "learning_rate": 0.01, "loss": 1.9364, "step": 103038 }, { "epoch": 10.582417582417582, "grad_norm": 0.06799531728029251, "learning_rate": 0.01, "loss": 1.9321, "step": 103041 }, { "epoch": 10.582725685529423, "grad_norm": 0.0771314799785614, "learning_rate": 0.01, "loss": 1.9298, "step": 103044 }, { "epoch": 10.583033788641265, "grad_norm": 0.0924944281578064, "learning_rate": 0.01, "loss": 1.9673, "step": 103047 }, { "epoch": 10.583341891753106, "grad_norm": 0.04260650649666786, "learning_rate": 0.01, "loss": 1.9291, "step": 103050 }, { "epoch": 10.583649994864949, "grad_norm": 0.07379073649644852, "learning_rate": 0.01, "loss": 1.9603, "step": 103053 }, { "epoch": 10.58395809797679, "grad_norm": 0.10091949254274368, "learning_rate": 0.01, "loss": 1.9283, "step": 103056 }, { "epoch": 10.584266201088631, "grad_norm": 0.07707443088293076, "learning_rate": 0.01, "loss": 1.957, "step": 103059 }, { "epoch": 10.584574304200473, "grad_norm": 0.07976933568716049, "learning_rate": 0.01, "loss": 1.9379, "step": 103062 }, { "epoch": 10.584882407312314, "grad_norm": 0.04869668558239937, "learning_rate": 0.01, "loss": 1.9194, "step": 103065 }, { "epoch": 10.585190510424155, "grad_norm": 0.04817415028810501, "learning_rate": 0.01, "loss": 1.9371, "step": 103068 }, { "epoch": 10.585498613535997, "grad_norm": 0.03385568782687187, "learning_rate": 0.01, "loss": 1.9684, "step": 103071 }, { "epoch": 10.585806716647838, "grad_norm": 0.04275520145893097, "learning_rate": 0.01, "loss": 1.9435, "step": 103074 }, { "epoch": 10.586114819759679, "grad_norm": 0.14263440668582916, "learning_rate": 0.01, "loss": 1.9405, "step": 103077 }, { "epoch": 10.58642292287152, "grad_norm": 0.05198967084288597, "learning_rate": 0.01, "loss": 1.9498, "step": 103080 }, { "epoch": 10.586731025983362, "grad_norm": 0.041763633489608765, "learning_rate": 0.01, "loss": 1.9313, "step": 103083 }, { "epoch": 10.587039129095205, "grad_norm": 0.04080444574356079, "learning_rate": 0.01, "loss": 1.9307, "step": 103086 }, { "epoch": 10.587347232207046, "grad_norm": 0.08491925895214081, "learning_rate": 0.01, "loss": 1.925, "step": 103089 }, { "epoch": 10.587655335318887, "grad_norm": 0.07890848815441132, "learning_rate": 0.01, "loss": 1.9351, "step": 103092 }, { "epoch": 10.587963438430728, "grad_norm": 0.11974522471427917, "learning_rate": 0.01, "loss": 1.9547, "step": 103095 }, { "epoch": 10.58827154154257, "grad_norm": 0.1163688525557518, "learning_rate": 0.01, "loss": 1.9267, "step": 103098 }, { "epoch": 10.588579644654411, "grad_norm": 0.048066623508930206, "learning_rate": 0.01, "loss": 1.9288, "step": 103101 }, { "epoch": 10.588887747766252, "grad_norm": 0.04997541010379791, "learning_rate": 0.01, "loss": 1.9339, "step": 103104 }, { "epoch": 10.589195850878093, "grad_norm": 0.06714275479316711, "learning_rate": 0.01, "loss": 1.9253, "step": 103107 }, { "epoch": 10.589503953989935, "grad_norm": 0.03831176087260246, "learning_rate": 0.01, "loss": 1.9496, "step": 103110 }, { "epoch": 10.589812057101776, "grad_norm": 0.03693114221096039, "learning_rate": 0.01, "loss": 1.9107, "step": 103113 }, { "epoch": 10.590120160213619, "grad_norm": 0.03890356793999672, "learning_rate": 0.01, "loss": 1.9349, "step": 103116 }, { "epoch": 10.59042826332546, "grad_norm": 0.11209408938884735, "learning_rate": 0.01, "loss": 1.9332, "step": 103119 }, { "epoch": 10.590736366437302, "grad_norm": 0.07923496514558792, "learning_rate": 0.01, "loss": 1.9604, "step": 103122 }, { "epoch": 10.591044469549143, "grad_norm": 0.04634235054254532, "learning_rate": 0.01, "loss": 1.9392, "step": 103125 }, { "epoch": 10.591352572660984, "grad_norm": 0.05020580068230629, "learning_rate": 0.01, "loss": 1.9394, "step": 103128 }, { "epoch": 10.591660675772825, "grad_norm": 0.03846663236618042, "learning_rate": 0.01, "loss": 1.9559, "step": 103131 }, { "epoch": 10.591968778884667, "grad_norm": 0.03346196934580803, "learning_rate": 0.01, "loss": 1.9437, "step": 103134 }, { "epoch": 10.592276881996508, "grad_norm": 0.0380299910902977, "learning_rate": 0.01, "loss": 1.9428, "step": 103137 }, { "epoch": 10.59258498510835, "grad_norm": 0.06727387756109238, "learning_rate": 0.01, "loss": 1.96, "step": 103140 }, { "epoch": 10.59289308822019, "grad_norm": 0.06813313812017441, "learning_rate": 0.01, "loss": 1.9394, "step": 103143 }, { "epoch": 10.593201191332032, "grad_norm": 0.1453511118888855, "learning_rate": 0.01, "loss": 1.9697, "step": 103146 }, { "epoch": 10.593509294443875, "grad_norm": 0.04510387405753136, "learning_rate": 0.01, "loss": 1.9476, "step": 103149 }, { "epoch": 10.593817397555716, "grad_norm": 0.03431488573551178, "learning_rate": 0.01, "loss": 1.9242, "step": 103152 }, { "epoch": 10.594125500667557, "grad_norm": 0.0411214642226696, "learning_rate": 0.01, "loss": 1.944, "step": 103155 }, { "epoch": 10.594433603779398, "grad_norm": 0.054067108780145645, "learning_rate": 0.01, "loss": 1.944, "step": 103158 }, { "epoch": 10.59474170689124, "grad_norm": 0.050103701651096344, "learning_rate": 0.01, "loss": 1.9656, "step": 103161 }, { "epoch": 10.595049810003081, "grad_norm": 0.04060819745063782, "learning_rate": 0.01, "loss": 1.9121, "step": 103164 }, { "epoch": 10.595357913114922, "grad_norm": 0.06645625084638596, "learning_rate": 0.01, "loss": 1.921, "step": 103167 }, { "epoch": 10.595666016226764, "grad_norm": 0.08807400614023209, "learning_rate": 0.01, "loss": 1.9383, "step": 103170 }, { "epoch": 10.595974119338605, "grad_norm": 0.1098518967628479, "learning_rate": 0.01, "loss": 1.9328, "step": 103173 }, { "epoch": 10.596282222450446, "grad_norm": 0.048754602670669556, "learning_rate": 0.01, "loss": 1.9561, "step": 103176 }, { "epoch": 10.596590325562289, "grad_norm": 0.0500849112868309, "learning_rate": 0.01, "loss": 1.9365, "step": 103179 }, { "epoch": 10.59689842867413, "grad_norm": 0.0513179674744606, "learning_rate": 0.01, "loss": 1.9357, "step": 103182 }, { "epoch": 10.597206531785972, "grad_norm": 0.0895581841468811, "learning_rate": 0.01, "loss": 1.9261, "step": 103185 }, { "epoch": 10.597514634897813, "grad_norm": 0.08084497600793839, "learning_rate": 0.01, "loss": 1.9415, "step": 103188 }, { "epoch": 10.597822738009654, "grad_norm": 0.04788369685411453, "learning_rate": 0.01, "loss": 1.9255, "step": 103191 }, { "epoch": 10.598130841121495, "grad_norm": 0.07950782030820847, "learning_rate": 0.01, "loss": 1.9314, "step": 103194 }, { "epoch": 10.598438944233337, "grad_norm": 0.07396837323904037, "learning_rate": 0.01, "loss": 1.9472, "step": 103197 }, { "epoch": 10.598747047345178, "grad_norm": 0.08950217068195343, "learning_rate": 0.01, "loss": 1.9396, "step": 103200 }, { "epoch": 10.59905515045702, "grad_norm": 0.04552943632006645, "learning_rate": 0.01, "loss": 1.9666, "step": 103203 }, { "epoch": 10.59936325356886, "grad_norm": 0.09012352675199509, "learning_rate": 0.01, "loss": 1.9193, "step": 103206 }, { "epoch": 10.599671356680702, "grad_norm": 0.05487809702754021, "learning_rate": 0.01, "loss": 1.9496, "step": 103209 }, { "epoch": 10.599979459792543, "grad_norm": 0.11796051263809204, "learning_rate": 0.01, "loss": 1.9442, "step": 103212 }, { "epoch": 10.600287562904386, "grad_norm": 0.07048312574625015, "learning_rate": 0.01, "loss": 1.9442, "step": 103215 }, { "epoch": 10.600595666016227, "grad_norm": 0.04323925822973251, "learning_rate": 0.01, "loss": 1.9475, "step": 103218 }, { "epoch": 10.600903769128069, "grad_norm": 0.08954882621765137, "learning_rate": 0.01, "loss": 1.9315, "step": 103221 }, { "epoch": 10.60121187223991, "grad_norm": 0.08253049850463867, "learning_rate": 0.01, "loss": 1.9624, "step": 103224 }, { "epoch": 10.601519975351751, "grad_norm": 0.08243921399116516, "learning_rate": 0.01, "loss": 1.9611, "step": 103227 }, { "epoch": 10.601828078463592, "grad_norm": 0.05085128918290138, "learning_rate": 0.01, "loss": 1.9646, "step": 103230 }, { "epoch": 10.602136181575434, "grad_norm": 0.1342676877975464, "learning_rate": 0.01, "loss": 1.9453, "step": 103233 }, { "epoch": 10.602444284687275, "grad_norm": 0.05520642176270485, "learning_rate": 0.01, "loss": 1.9342, "step": 103236 }, { "epoch": 10.602752387799116, "grad_norm": 0.04326225444674492, "learning_rate": 0.01, "loss": 1.954, "step": 103239 }, { "epoch": 10.603060490910957, "grad_norm": 0.09352917969226837, "learning_rate": 0.01, "loss": 1.9253, "step": 103242 }, { "epoch": 10.6033685940228, "grad_norm": 0.048500899225473404, "learning_rate": 0.01, "loss": 1.9453, "step": 103245 }, { "epoch": 10.603676697134642, "grad_norm": 0.05939725786447525, "learning_rate": 0.01, "loss": 1.9784, "step": 103248 }, { "epoch": 10.603984800246483, "grad_norm": 0.06792570650577545, "learning_rate": 0.01, "loss": 1.9698, "step": 103251 }, { "epoch": 10.604292903358324, "grad_norm": 0.08179488033056259, "learning_rate": 0.01, "loss": 1.9462, "step": 103254 }, { "epoch": 10.604601006470165, "grad_norm": 0.0700092688202858, "learning_rate": 0.01, "loss": 1.9346, "step": 103257 }, { "epoch": 10.604909109582007, "grad_norm": 0.07969602197408676, "learning_rate": 0.01, "loss": 1.9381, "step": 103260 }, { "epoch": 10.605217212693848, "grad_norm": 0.05254344269633293, "learning_rate": 0.01, "loss": 1.952, "step": 103263 }, { "epoch": 10.60552531580569, "grad_norm": 0.051222894340753555, "learning_rate": 0.01, "loss": 1.9467, "step": 103266 }, { "epoch": 10.60583341891753, "grad_norm": 0.1603354960680008, "learning_rate": 0.01, "loss": 1.9368, "step": 103269 }, { "epoch": 10.606141522029372, "grad_norm": 0.0578870065510273, "learning_rate": 0.01, "loss": 1.9474, "step": 103272 }, { "epoch": 10.606449625141213, "grad_norm": 0.05055926367640495, "learning_rate": 0.01, "loss": 1.9321, "step": 103275 }, { "epoch": 10.606757728253056, "grad_norm": 0.07149633020162582, "learning_rate": 0.01, "loss": 1.9198, "step": 103278 }, { "epoch": 10.607065831364897, "grad_norm": 0.0640505850315094, "learning_rate": 0.01, "loss": 1.9191, "step": 103281 }, { "epoch": 10.607373934476739, "grad_norm": 0.050150126218795776, "learning_rate": 0.01, "loss": 1.9389, "step": 103284 }, { "epoch": 10.60768203758858, "grad_norm": 0.04435747489333153, "learning_rate": 0.01, "loss": 1.9369, "step": 103287 }, { "epoch": 10.607990140700421, "grad_norm": 0.0679631382226944, "learning_rate": 0.01, "loss": 1.9472, "step": 103290 }, { "epoch": 10.608298243812262, "grad_norm": 0.09328020364046097, "learning_rate": 0.01, "loss": 1.9552, "step": 103293 }, { "epoch": 10.608606346924104, "grad_norm": 0.07357189059257507, "learning_rate": 0.01, "loss": 1.9385, "step": 103296 }, { "epoch": 10.608914450035945, "grad_norm": 0.059523552656173706, "learning_rate": 0.01, "loss": 1.9438, "step": 103299 }, { "epoch": 10.609222553147786, "grad_norm": 0.050263259559869766, "learning_rate": 0.01, "loss": 1.9171, "step": 103302 }, { "epoch": 10.609530656259627, "grad_norm": 0.19661392271518707, "learning_rate": 0.01, "loss": 1.9544, "step": 103305 }, { "epoch": 10.60983875937147, "grad_norm": 0.061137836426496506, "learning_rate": 0.01, "loss": 1.9292, "step": 103308 }, { "epoch": 10.610146862483312, "grad_norm": 0.0863509327173233, "learning_rate": 0.01, "loss": 1.9147, "step": 103311 }, { "epoch": 10.610454965595153, "grad_norm": 0.07706782966852188, "learning_rate": 0.01, "loss": 1.9169, "step": 103314 }, { "epoch": 10.610763068706994, "grad_norm": 0.052820686250925064, "learning_rate": 0.01, "loss": 1.9406, "step": 103317 }, { "epoch": 10.611071171818836, "grad_norm": 0.0664602667093277, "learning_rate": 0.01, "loss": 1.939, "step": 103320 }, { "epoch": 10.611379274930677, "grad_norm": 0.05431080237030983, "learning_rate": 0.01, "loss": 1.9517, "step": 103323 }, { "epoch": 10.611687378042518, "grad_norm": 0.03953436389565468, "learning_rate": 0.01, "loss": 1.9399, "step": 103326 }, { "epoch": 10.61199548115436, "grad_norm": 0.06307961046695709, "learning_rate": 0.01, "loss": 1.9465, "step": 103329 }, { "epoch": 10.6123035842662, "grad_norm": 0.03332679718732834, "learning_rate": 0.01, "loss": 1.9512, "step": 103332 }, { "epoch": 10.612611687378042, "grad_norm": 0.038423825055360794, "learning_rate": 0.01, "loss": 1.9536, "step": 103335 }, { "epoch": 10.612919790489883, "grad_norm": 0.06862691789865494, "learning_rate": 0.01, "loss": 1.9507, "step": 103338 }, { "epoch": 10.613227893601726, "grad_norm": 0.1888892948627472, "learning_rate": 0.01, "loss": 1.9417, "step": 103341 }, { "epoch": 10.613535996713567, "grad_norm": 0.15006403625011444, "learning_rate": 0.01, "loss": 1.9302, "step": 103344 }, { "epoch": 10.613844099825409, "grad_norm": 0.06723996996879578, "learning_rate": 0.01, "loss": 1.9152, "step": 103347 }, { "epoch": 10.61415220293725, "grad_norm": 0.04298614710569382, "learning_rate": 0.01, "loss": 1.9447, "step": 103350 }, { "epoch": 10.614460306049091, "grad_norm": 0.032380230724811554, "learning_rate": 0.01, "loss": 1.9325, "step": 103353 }, { "epoch": 10.614768409160932, "grad_norm": 0.03456073999404907, "learning_rate": 0.01, "loss": 1.9476, "step": 103356 }, { "epoch": 10.615076512272774, "grad_norm": 0.06206822022795677, "learning_rate": 0.01, "loss": 1.9599, "step": 103359 }, { "epoch": 10.615384615384615, "grad_norm": 0.0719355046749115, "learning_rate": 0.01, "loss": 1.9317, "step": 103362 }, { "epoch": 10.615692718496456, "grad_norm": 0.03969092667102814, "learning_rate": 0.01, "loss": 1.9412, "step": 103365 }, { "epoch": 10.616000821608297, "grad_norm": 0.04106650501489639, "learning_rate": 0.01, "loss": 1.9291, "step": 103368 }, { "epoch": 10.61630892472014, "grad_norm": 0.11985891312360764, "learning_rate": 0.01, "loss": 1.9478, "step": 103371 }, { "epoch": 10.616617027831982, "grad_norm": 0.03696669265627861, "learning_rate": 0.01, "loss": 1.932, "step": 103374 }, { "epoch": 10.616925130943823, "grad_norm": 0.13408684730529785, "learning_rate": 0.01, "loss": 1.9405, "step": 103377 }, { "epoch": 10.617233234055664, "grad_norm": 0.1047915667295456, "learning_rate": 0.01, "loss": 1.9517, "step": 103380 }, { "epoch": 10.617541337167506, "grad_norm": 0.04256188124418259, "learning_rate": 0.01, "loss": 1.9312, "step": 103383 }, { "epoch": 10.617849440279347, "grad_norm": 0.04784403368830681, "learning_rate": 0.01, "loss": 1.9182, "step": 103386 }, { "epoch": 10.618157543391188, "grad_norm": 0.04611273109912872, "learning_rate": 0.01, "loss": 1.9427, "step": 103389 }, { "epoch": 10.61846564650303, "grad_norm": 0.04264547675848007, "learning_rate": 0.01, "loss": 1.9654, "step": 103392 }, { "epoch": 10.61877374961487, "grad_norm": 0.061344727873802185, "learning_rate": 0.01, "loss": 1.9327, "step": 103395 }, { "epoch": 10.619081852726712, "grad_norm": 0.10193429887294769, "learning_rate": 0.01, "loss": 1.9824, "step": 103398 }, { "epoch": 10.619389955838553, "grad_norm": 0.13179463148117065, "learning_rate": 0.01, "loss": 1.9565, "step": 103401 }, { "epoch": 10.619698058950396, "grad_norm": 0.05853363499045372, "learning_rate": 0.01, "loss": 1.9126, "step": 103404 }, { "epoch": 10.620006162062237, "grad_norm": 0.04775145649909973, "learning_rate": 0.01, "loss": 1.9537, "step": 103407 }, { "epoch": 10.620314265174079, "grad_norm": 0.041010648012161255, "learning_rate": 0.01, "loss": 1.9428, "step": 103410 }, { "epoch": 10.62062236828592, "grad_norm": 0.044196419417858124, "learning_rate": 0.01, "loss": 1.9221, "step": 103413 }, { "epoch": 10.620930471397761, "grad_norm": 0.0341707244515419, "learning_rate": 0.01, "loss": 1.9217, "step": 103416 }, { "epoch": 10.621238574509603, "grad_norm": 0.041209764778614044, "learning_rate": 0.01, "loss": 1.9363, "step": 103419 }, { "epoch": 10.621546677621444, "grad_norm": 0.0913657546043396, "learning_rate": 0.01, "loss": 1.9605, "step": 103422 }, { "epoch": 10.621854780733285, "grad_norm": 0.10722614079713821, "learning_rate": 0.01, "loss": 1.9234, "step": 103425 }, { "epoch": 10.622162883845126, "grad_norm": 0.1794777810573578, "learning_rate": 0.01, "loss": 1.9389, "step": 103428 }, { "epoch": 10.622470986956968, "grad_norm": 0.14904111623764038, "learning_rate": 0.01, "loss": 1.9283, "step": 103431 }, { "epoch": 10.62277909006881, "grad_norm": 0.06658944487571716, "learning_rate": 0.01, "loss": 1.9447, "step": 103434 }, { "epoch": 10.623087193180652, "grad_norm": 0.03310980647802353, "learning_rate": 0.01, "loss": 1.9324, "step": 103437 }, { "epoch": 10.623395296292493, "grad_norm": 0.039458371698856354, "learning_rate": 0.01, "loss": 1.958, "step": 103440 }, { "epoch": 10.623703399404334, "grad_norm": 0.065035380423069, "learning_rate": 0.01, "loss": 1.938, "step": 103443 }, { "epoch": 10.624011502516176, "grad_norm": 0.04462461546063423, "learning_rate": 0.01, "loss": 1.9089, "step": 103446 }, { "epoch": 10.624319605628017, "grad_norm": 0.0657215565443039, "learning_rate": 0.01, "loss": 1.9464, "step": 103449 }, { "epoch": 10.624627708739858, "grad_norm": 0.042614828795194626, "learning_rate": 0.01, "loss": 1.9453, "step": 103452 }, { "epoch": 10.6249358118517, "grad_norm": 0.05409550666809082, "learning_rate": 0.01, "loss": 1.9519, "step": 103455 }, { "epoch": 10.62524391496354, "grad_norm": 0.07609287649393082, "learning_rate": 0.01, "loss": 1.9591, "step": 103458 }, { "epoch": 10.625552018075382, "grad_norm": 0.04622379690408707, "learning_rate": 0.01, "loss": 1.9615, "step": 103461 }, { "epoch": 10.625860121187223, "grad_norm": 0.049494169652462006, "learning_rate": 0.01, "loss": 1.9271, "step": 103464 }, { "epoch": 10.626168224299064, "grad_norm": 0.058355800807476044, "learning_rate": 0.01, "loss": 1.9559, "step": 103467 }, { "epoch": 10.626476327410908, "grad_norm": 0.10112365335226059, "learning_rate": 0.01, "loss": 1.9763, "step": 103470 }, { "epoch": 10.626784430522749, "grad_norm": 0.11377458274364471, "learning_rate": 0.01, "loss": 1.9397, "step": 103473 }, { "epoch": 10.62709253363459, "grad_norm": 0.06197943165898323, "learning_rate": 0.01, "loss": 1.9312, "step": 103476 }, { "epoch": 10.627400636746431, "grad_norm": 0.04241367056965828, "learning_rate": 0.01, "loss": 1.937, "step": 103479 }, { "epoch": 10.627708739858273, "grad_norm": 0.04215838760137558, "learning_rate": 0.01, "loss": 1.9364, "step": 103482 }, { "epoch": 10.628016842970114, "grad_norm": 0.03457578644156456, "learning_rate": 0.01, "loss": 1.9061, "step": 103485 }, { "epoch": 10.628324946081955, "grad_norm": 0.13142448663711548, "learning_rate": 0.01, "loss": 1.9344, "step": 103488 }, { "epoch": 10.628633049193796, "grad_norm": 0.05435696244239807, "learning_rate": 0.01, "loss": 1.9194, "step": 103491 }, { "epoch": 10.628941152305638, "grad_norm": 0.04535049572587013, "learning_rate": 0.01, "loss": 1.9245, "step": 103494 }, { "epoch": 10.629249255417479, "grad_norm": 0.06671206653118134, "learning_rate": 0.01, "loss": 1.9397, "step": 103497 }, { "epoch": 10.629557358529322, "grad_norm": 0.04529844969511032, "learning_rate": 0.01, "loss": 1.9708, "step": 103500 }, { "epoch": 10.629865461641163, "grad_norm": 0.05721516162157059, "learning_rate": 0.01, "loss": 1.9361, "step": 103503 }, { "epoch": 10.630173564753004, "grad_norm": 0.04245228320360184, "learning_rate": 0.01, "loss": 1.9171, "step": 103506 }, { "epoch": 10.630481667864846, "grad_norm": 0.06019366532564163, "learning_rate": 0.01, "loss": 1.942, "step": 103509 }, { "epoch": 10.630789770976687, "grad_norm": 0.04783914238214493, "learning_rate": 0.01, "loss": 1.94, "step": 103512 }, { "epoch": 10.631097874088528, "grad_norm": 0.04354009032249451, "learning_rate": 0.01, "loss": 1.9377, "step": 103515 }, { "epoch": 10.63140597720037, "grad_norm": 0.0568438284099102, "learning_rate": 0.01, "loss": 1.9308, "step": 103518 }, { "epoch": 10.63171408031221, "grad_norm": 0.04983118176460266, "learning_rate": 0.01, "loss": 1.9405, "step": 103521 }, { "epoch": 10.632022183424052, "grad_norm": 0.06222904846072197, "learning_rate": 0.01, "loss": 1.9534, "step": 103524 }, { "epoch": 10.632330286535893, "grad_norm": 0.06613916158676147, "learning_rate": 0.01, "loss": 1.9343, "step": 103527 }, { "epoch": 10.632638389647735, "grad_norm": 0.10853058844804764, "learning_rate": 0.01, "loss": 1.9687, "step": 103530 }, { "epoch": 10.632946492759578, "grad_norm": 0.07089220732450485, "learning_rate": 0.01, "loss": 1.9665, "step": 103533 }, { "epoch": 10.633254595871419, "grad_norm": 0.054203297942876816, "learning_rate": 0.01, "loss": 1.9341, "step": 103536 }, { "epoch": 10.63356269898326, "grad_norm": 0.06964759528636932, "learning_rate": 0.01, "loss": 1.9458, "step": 103539 }, { "epoch": 10.633870802095101, "grad_norm": 0.07693734765052795, "learning_rate": 0.01, "loss": 1.9517, "step": 103542 }, { "epoch": 10.634178905206943, "grad_norm": 0.08025120943784714, "learning_rate": 0.01, "loss": 1.9277, "step": 103545 }, { "epoch": 10.634487008318784, "grad_norm": 0.08096393942832947, "learning_rate": 0.01, "loss": 1.9449, "step": 103548 }, { "epoch": 10.634795111430625, "grad_norm": 0.0732489600777626, "learning_rate": 0.01, "loss": 1.9297, "step": 103551 }, { "epoch": 10.635103214542466, "grad_norm": 0.06532766669988632, "learning_rate": 0.01, "loss": 1.9059, "step": 103554 }, { "epoch": 10.635411317654308, "grad_norm": 0.07934626191854477, "learning_rate": 0.01, "loss": 1.9545, "step": 103557 }, { "epoch": 10.635719420766149, "grad_norm": 0.04989808052778244, "learning_rate": 0.01, "loss": 1.9504, "step": 103560 }, { "epoch": 10.636027523877992, "grad_norm": 0.057780925184488297, "learning_rate": 0.01, "loss": 1.9354, "step": 103563 }, { "epoch": 10.636335626989833, "grad_norm": 0.040844134986400604, "learning_rate": 0.01, "loss": 1.9458, "step": 103566 }, { "epoch": 10.636643730101675, "grad_norm": 0.04943647235631943, "learning_rate": 0.01, "loss": 1.9467, "step": 103569 }, { "epoch": 10.636951833213516, "grad_norm": 0.13489899039268494, "learning_rate": 0.01, "loss": 1.9202, "step": 103572 }, { "epoch": 10.637259936325357, "grad_norm": 0.04333699867129326, "learning_rate": 0.01, "loss": 1.9632, "step": 103575 }, { "epoch": 10.637568039437198, "grad_norm": 0.08138672262430191, "learning_rate": 0.01, "loss": 1.9248, "step": 103578 }, { "epoch": 10.63787614254904, "grad_norm": 0.08707044273614883, "learning_rate": 0.01, "loss": 1.9218, "step": 103581 }, { "epoch": 10.63818424566088, "grad_norm": 0.05804765969514847, "learning_rate": 0.01, "loss": 1.9394, "step": 103584 }, { "epoch": 10.638492348772722, "grad_norm": 0.04205789417028427, "learning_rate": 0.01, "loss": 1.932, "step": 103587 }, { "epoch": 10.638800451884563, "grad_norm": 0.05844965577125549, "learning_rate": 0.01, "loss": 1.9293, "step": 103590 }, { "epoch": 10.639108554996405, "grad_norm": 0.052618734538555145, "learning_rate": 0.01, "loss": 1.9387, "step": 103593 }, { "epoch": 10.639416658108248, "grad_norm": 0.04634091630578041, "learning_rate": 0.01, "loss": 1.9547, "step": 103596 }, { "epoch": 10.639724761220089, "grad_norm": 0.043263375759124756, "learning_rate": 0.01, "loss": 1.9633, "step": 103599 }, { "epoch": 10.64003286433193, "grad_norm": 0.10971635580062866, "learning_rate": 0.01, "loss": 1.9357, "step": 103602 }, { "epoch": 10.640340967443771, "grad_norm": 0.053758516907691956, "learning_rate": 0.01, "loss": 1.9292, "step": 103605 }, { "epoch": 10.640649070555613, "grad_norm": 0.08278075605630875, "learning_rate": 0.01, "loss": 1.9327, "step": 103608 }, { "epoch": 10.640957173667454, "grad_norm": 0.101168192923069, "learning_rate": 0.01, "loss": 1.9168, "step": 103611 }, { "epoch": 10.641265276779295, "grad_norm": 0.047223761677742004, "learning_rate": 0.01, "loss": 1.9562, "step": 103614 }, { "epoch": 10.641573379891136, "grad_norm": 0.04839014261960983, "learning_rate": 0.01, "loss": 1.9328, "step": 103617 }, { "epoch": 10.641881483002978, "grad_norm": 0.11143355071544647, "learning_rate": 0.01, "loss": 1.9564, "step": 103620 }, { "epoch": 10.642189586114819, "grad_norm": 0.06389351189136505, "learning_rate": 0.01, "loss": 1.9552, "step": 103623 }, { "epoch": 10.642497689226662, "grad_norm": 0.08618534356355667, "learning_rate": 0.01, "loss": 1.967, "step": 103626 }, { "epoch": 10.642805792338503, "grad_norm": 0.06256888061761856, "learning_rate": 0.01, "loss": 1.957, "step": 103629 }, { "epoch": 10.643113895450345, "grad_norm": 0.0647435113787651, "learning_rate": 0.01, "loss": 1.9281, "step": 103632 }, { "epoch": 10.643421998562186, "grad_norm": 0.04641449451446533, "learning_rate": 0.01, "loss": 1.8989, "step": 103635 }, { "epoch": 10.643730101674027, "grad_norm": 0.08702532947063446, "learning_rate": 0.01, "loss": 1.9526, "step": 103638 }, { "epoch": 10.644038204785868, "grad_norm": 0.05984359234571457, "learning_rate": 0.01, "loss": 1.9404, "step": 103641 }, { "epoch": 10.64434630789771, "grad_norm": 0.07888772338628769, "learning_rate": 0.01, "loss": 1.9421, "step": 103644 }, { "epoch": 10.644654411009551, "grad_norm": 0.11645808070898056, "learning_rate": 0.01, "loss": 1.9392, "step": 103647 }, { "epoch": 10.644962514121392, "grad_norm": 0.07762550562620163, "learning_rate": 0.01, "loss": 1.9351, "step": 103650 }, { "epoch": 10.645270617233233, "grad_norm": 0.06812987476587296, "learning_rate": 0.01, "loss": 1.9372, "step": 103653 }, { "epoch": 10.645578720345075, "grad_norm": 0.12551312148571014, "learning_rate": 0.01, "loss": 1.9327, "step": 103656 }, { "epoch": 10.645886823456918, "grad_norm": 0.11314782500267029, "learning_rate": 0.01, "loss": 1.9511, "step": 103659 }, { "epoch": 10.646194926568759, "grad_norm": 0.05329858884215355, "learning_rate": 0.01, "loss": 1.946, "step": 103662 }, { "epoch": 10.6465030296806, "grad_norm": 0.05354480445384979, "learning_rate": 0.01, "loss": 1.927, "step": 103665 }, { "epoch": 10.646811132792442, "grad_norm": 0.04435323551297188, "learning_rate": 0.01, "loss": 1.94, "step": 103668 }, { "epoch": 10.647119235904283, "grad_norm": 0.05537758395075798, "learning_rate": 0.01, "loss": 1.9389, "step": 103671 }, { "epoch": 10.647427339016124, "grad_norm": 0.10855624824762344, "learning_rate": 0.01, "loss": 1.931, "step": 103674 }, { "epoch": 10.647735442127965, "grad_norm": 0.0933624804019928, "learning_rate": 0.01, "loss": 1.9412, "step": 103677 }, { "epoch": 10.648043545239807, "grad_norm": 0.046108171343803406, "learning_rate": 0.01, "loss": 1.9337, "step": 103680 }, { "epoch": 10.648351648351648, "grad_norm": 0.08186739683151245, "learning_rate": 0.01, "loss": 1.9279, "step": 103683 }, { "epoch": 10.648659751463489, "grad_norm": 0.12513357400894165, "learning_rate": 0.01, "loss": 1.9388, "step": 103686 }, { "epoch": 10.648967854575332, "grad_norm": 0.08822114765644073, "learning_rate": 0.01, "loss": 1.9483, "step": 103689 }, { "epoch": 10.649275957687173, "grad_norm": 0.06944266706705093, "learning_rate": 0.01, "loss": 1.9099, "step": 103692 }, { "epoch": 10.649584060799015, "grad_norm": 0.0754237249493599, "learning_rate": 0.01, "loss": 1.9417, "step": 103695 }, { "epoch": 10.649892163910856, "grad_norm": 0.11520032584667206, "learning_rate": 0.01, "loss": 1.9563, "step": 103698 }, { "epoch": 10.650200267022697, "grad_norm": 0.07506398856639862, "learning_rate": 0.01, "loss": 1.9489, "step": 103701 }, { "epoch": 10.650508370134538, "grad_norm": 0.057485081255435944, "learning_rate": 0.01, "loss": 1.9493, "step": 103704 }, { "epoch": 10.65081647324638, "grad_norm": 0.0681445300579071, "learning_rate": 0.01, "loss": 1.951, "step": 103707 }, { "epoch": 10.651124576358221, "grad_norm": 0.04196304827928543, "learning_rate": 0.01, "loss": 1.9426, "step": 103710 }, { "epoch": 10.651432679470062, "grad_norm": 0.08239495754241943, "learning_rate": 0.01, "loss": 1.9559, "step": 103713 }, { "epoch": 10.651740782581903, "grad_norm": 0.057062406092882156, "learning_rate": 0.01, "loss": 1.9059, "step": 103716 }, { "epoch": 10.652048885693745, "grad_norm": 0.04289278760552406, "learning_rate": 0.01, "loss": 1.9497, "step": 103719 }, { "epoch": 10.652356988805586, "grad_norm": 0.08105800300836563, "learning_rate": 0.01, "loss": 1.9194, "step": 103722 }, { "epoch": 10.652665091917429, "grad_norm": 0.05087985843420029, "learning_rate": 0.01, "loss": 1.9411, "step": 103725 }, { "epoch": 10.65297319502927, "grad_norm": 0.06125885993242264, "learning_rate": 0.01, "loss": 1.9537, "step": 103728 }, { "epoch": 10.653281298141112, "grad_norm": 0.11369001865386963, "learning_rate": 0.01, "loss": 1.9773, "step": 103731 }, { "epoch": 10.653589401252953, "grad_norm": 0.09310147911310196, "learning_rate": 0.01, "loss": 1.9215, "step": 103734 }, { "epoch": 10.653897504364794, "grad_norm": 0.0653354674577713, "learning_rate": 0.01, "loss": 1.9172, "step": 103737 }, { "epoch": 10.654205607476635, "grad_norm": 0.041785553097724915, "learning_rate": 0.01, "loss": 1.939, "step": 103740 }, { "epoch": 10.654513710588477, "grad_norm": 0.03783538565039635, "learning_rate": 0.01, "loss": 1.945, "step": 103743 }, { "epoch": 10.654821813700318, "grad_norm": 0.09894753247499466, "learning_rate": 0.01, "loss": 1.9377, "step": 103746 }, { "epoch": 10.65512991681216, "grad_norm": 0.09766748547554016, "learning_rate": 0.01, "loss": 1.9467, "step": 103749 }, { "epoch": 10.655438019924, "grad_norm": 0.06367824971675873, "learning_rate": 0.01, "loss": 1.9162, "step": 103752 }, { "epoch": 10.655746123035843, "grad_norm": 0.10057856142520905, "learning_rate": 0.01, "loss": 1.9572, "step": 103755 }, { "epoch": 10.656054226147685, "grad_norm": 0.06819339096546173, "learning_rate": 0.01, "loss": 1.933, "step": 103758 }, { "epoch": 10.656362329259526, "grad_norm": 0.04021526128053665, "learning_rate": 0.01, "loss": 1.9433, "step": 103761 }, { "epoch": 10.656670432371367, "grad_norm": 0.050846509635448456, "learning_rate": 0.01, "loss": 1.9367, "step": 103764 }, { "epoch": 10.656978535483209, "grad_norm": 0.048599909991025925, "learning_rate": 0.01, "loss": 1.9215, "step": 103767 }, { "epoch": 10.65728663859505, "grad_norm": 0.1137971431016922, "learning_rate": 0.01, "loss": 1.9338, "step": 103770 }, { "epoch": 10.657594741706891, "grad_norm": 0.08687986433506012, "learning_rate": 0.01, "loss": 1.937, "step": 103773 }, { "epoch": 10.657902844818732, "grad_norm": 0.07247614115476608, "learning_rate": 0.01, "loss": 1.9829, "step": 103776 }, { "epoch": 10.658210947930574, "grad_norm": 0.05305969715118408, "learning_rate": 0.01, "loss": 1.9309, "step": 103779 }, { "epoch": 10.658519051042415, "grad_norm": 0.043938048183918, "learning_rate": 0.01, "loss": 1.9282, "step": 103782 }, { "epoch": 10.658827154154256, "grad_norm": 0.058209795504808426, "learning_rate": 0.01, "loss": 1.9292, "step": 103785 }, { "epoch": 10.6591352572661, "grad_norm": 0.08251186460256577, "learning_rate": 0.01, "loss": 1.956, "step": 103788 }, { "epoch": 10.65944336037794, "grad_norm": 0.06727088242769241, "learning_rate": 0.01, "loss": 1.9286, "step": 103791 }, { "epoch": 10.659751463489782, "grad_norm": 0.06473030149936676, "learning_rate": 0.01, "loss": 1.9611, "step": 103794 }, { "epoch": 10.660059566601623, "grad_norm": 0.12840333580970764, "learning_rate": 0.01, "loss": 1.9279, "step": 103797 }, { "epoch": 10.660367669713464, "grad_norm": 0.11226318776607513, "learning_rate": 0.01, "loss": 1.9399, "step": 103800 }, { "epoch": 10.660675772825305, "grad_norm": 0.04158809781074524, "learning_rate": 0.01, "loss": 1.9419, "step": 103803 }, { "epoch": 10.660983875937147, "grad_norm": 0.11002946645021439, "learning_rate": 0.01, "loss": 1.9357, "step": 103806 }, { "epoch": 10.661291979048988, "grad_norm": 0.11193925142288208, "learning_rate": 0.01, "loss": 1.9532, "step": 103809 }, { "epoch": 10.66160008216083, "grad_norm": 0.04690947011113167, "learning_rate": 0.01, "loss": 1.9231, "step": 103812 }, { "epoch": 10.66190818527267, "grad_norm": 0.03682129830121994, "learning_rate": 0.01, "loss": 1.9428, "step": 103815 }, { "epoch": 10.662216288384514, "grad_norm": 0.03982137516140938, "learning_rate": 0.01, "loss": 1.9385, "step": 103818 }, { "epoch": 10.662524391496355, "grad_norm": 0.05768255144357681, "learning_rate": 0.01, "loss": 1.9325, "step": 103821 }, { "epoch": 10.662832494608196, "grad_norm": 0.03755057603120804, "learning_rate": 0.01, "loss": 1.9722, "step": 103824 }, { "epoch": 10.663140597720037, "grad_norm": 0.10326661914587021, "learning_rate": 0.01, "loss": 1.9575, "step": 103827 }, { "epoch": 10.663448700831879, "grad_norm": 0.06437335908412933, "learning_rate": 0.01, "loss": 1.9429, "step": 103830 }, { "epoch": 10.66375680394372, "grad_norm": 0.10684889554977417, "learning_rate": 0.01, "loss": 1.9327, "step": 103833 }, { "epoch": 10.664064907055561, "grad_norm": 0.0675310418009758, "learning_rate": 0.01, "loss": 1.97, "step": 103836 }, { "epoch": 10.664373010167402, "grad_norm": 0.15718181431293488, "learning_rate": 0.01, "loss": 1.9487, "step": 103839 }, { "epoch": 10.664681113279244, "grad_norm": 0.08253775537014008, "learning_rate": 0.01, "loss": 1.9575, "step": 103842 }, { "epoch": 10.664989216391085, "grad_norm": 0.06902614235877991, "learning_rate": 0.01, "loss": 1.9607, "step": 103845 }, { "epoch": 10.665297319502926, "grad_norm": 0.07362678647041321, "learning_rate": 0.01, "loss": 1.9293, "step": 103848 }, { "epoch": 10.66560542261477, "grad_norm": 0.08512471616268158, "learning_rate": 0.01, "loss": 1.8981, "step": 103851 }, { "epoch": 10.66591352572661, "grad_norm": 0.09559623152017593, "learning_rate": 0.01, "loss": 1.9473, "step": 103854 }, { "epoch": 10.666221628838452, "grad_norm": 0.1007629781961441, "learning_rate": 0.01, "loss": 1.9343, "step": 103857 }, { "epoch": 10.666529731950293, "grad_norm": 0.03798322752118111, "learning_rate": 0.01, "loss": 1.907, "step": 103860 }, { "epoch": 10.666837835062134, "grad_norm": 0.056238915771245956, "learning_rate": 0.01, "loss": 1.9605, "step": 103863 }, { "epoch": 10.667145938173975, "grad_norm": 0.06413375586271286, "learning_rate": 0.01, "loss": 1.9445, "step": 103866 }, { "epoch": 10.667454041285817, "grad_norm": 0.0409151092171669, "learning_rate": 0.01, "loss": 1.9395, "step": 103869 }, { "epoch": 10.667762144397658, "grad_norm": 0.07371127605438232, "learning_rate": 0.01, "loss": 1.9314, "step": 103872 }, { "epoch": 10.6680702475095, "grad_norm": 0.037266094237565994, "learning_rate": 0.01, "loss": 1.9263, "step": 103875 }, { "epoch": 10.66837835062134, "grad_norm": 0.04316391795873642, "learning_rate": 0.01, "loss": 1.9096, "step": 103878 }, { "epoch": 10.668686453733184, "grad_norm": 0.044815342873334885, "learning_rate": 0.01, "loss": 1.9402, "step": 103881 }, { "epoch": 10.668994556845025, "grad_norm": 0.1637774407863617, "learning_rate": 0.01, "loss": 1.9426, "step": 103884 }, { "epoch": 10.669302659956866, "grad_norm": 0.08285655081272125, "learning_rate": 0.01, "loss": 1.9238, "step": 103887 }, { "epoch": 10.669610763068707, "grad_norm": 0.050641678273677826, "learning_rate": 0.01, "loss": 1.9321, "step": 103890 }, { "epoch": 10.669918866180549, "grad_norm": 0.034874316304922104, "learning_rate": 0.01, "loss": 1.9336, "step": 103893 }, { "epoch": 10.67022696929239, "grad_norm": 0.03403162956237793, "learning_rate": 0.01, "loss": 1.9348, "step": 103896 }, { "epoch": 10.670535072404231, "grad_norm": 0.030581431463360786, "learning_rate": 0.01, "loss": 1.937, "step": 103899 }, { "epoch": 10.670843175516072, "grad_norm": 0.05116745084524155, "learning_rate": 0.01, "loss": 1.919, "step": 103902 }, { "epoch": 10.671151278627914, "grad_norm": 0.04860899969935417, "learning_rate": 0.01, "loss": 1.9375, "step": 103905 }, { "epoch": 10.671459381739755, "grad_norm": 0.08376903831958771, "learning_rate": 0.01, "loss": 1.9445, "step": 103908 }, { "epoch": 10.671767484851596, "grad_norm": 0.04416081681847572, "learning_rate": 0.01, "loss": 1.9693, "step": 103911 }, { "epoch": 10.67207558796344, "grad_norm": 0.07991000264883041, "learning_rate": 0.01, "loss": 1.9607, "step": 103914 }, { "epoch": 10.67238369107528, "grad_norm": 0.0784083679318428, "learning_rate": 0.01, "loss": 1.9365, "step": 103917 }, { "epoch": 10.672691794187122, "grad_norm": 0.0480702742934227, "learning_rate": 0.01, "loss": 1.9311, "step": 103920 }, { "epoch": 10.672999897298963, "grad_norm": 0.03636328503489494, "learning_rate": 0.01, "loss": 1.9215, "step": 103923 }, { "epoch": 10.673308000410804, "grad_norm": 0.04388698935508728, "learning_rate": 0.01, "loss": 1.9274, "step": 103926 }, { "epoch": 10.673616103522646, "grad_norm": 0.10632793605327606, "learning_rate": 0.01, "loss": 1.948, "step": 103929 }, { "epoch": 10.673924206634487, "grad_norm": 0.09042569249868393, "learning_rate": 0.01, "loss": 1.9014, "step": 103932 }, { "epoch": 10.674232309746328, "grad_norm": 0.11588452756404877, "learning_rate": 0.01, "loss": 1.9498, "step": 103935 }, { "epoch": 10.67454041285817, "grad_norm": 0.0780494436621666, "learning_rate": 0.01, "loss": 1.9563, "step": 103938 }, { "epoch": 10.67484851597001, "grad_norm": 0.05877825245261192, "learning_rate": 0.01, "loss": 1.9415, "step": 103941 }, { "epoch": 10.675156619081854, "grad_norm": 0.051702238619327545, "learning_rate": 0.01, "loss": 1.9308, "step": 103944 }, { "epoch": 10.675464722193695, "grad_norm": 0.07100822031497955, "learning_rate": 0.01, "loss": 1.9305, "step": 103947 }, { "epoch": 10.675772825305536, "grad_norm": 0.042687855660915375, "learning_rate": 0.01, "loss": 1.93, "step": 103950 }, { "epoch": 10.676080928417377, "grad_norm": 0.04055164381861687, "learning_rate": 0.01, "loss": 1.9505, "step": 103953 }, { "epoch": 10.676389031529219, "grad_norm": 0.05162336304783821, "learning_rate": 0.01, "loss": 1.9494, "step": 103956 }, { "epoch": 10.67669713464106, "grad_norm": 0.0412101112306118, "learning_rate": 0.01, "loss": 1.941, "step": 103959 }, { "epoch": 10.677005237752901, "grad_norm": 0.0823889672756195, "learning_rate": 0.01, "loss": 1.9337, "step": 103962 }, { "epoch": 10.677313340864742, "grad_norm": 0.13297125697135925, "learning_rate": 0.01, "loss": 1.9349, "step": 103965 }, { "epoch": 10.677621443976584, "grad_norm": 0.04852784797549248, "learning_rate": 0.01, "loss": 1.9314, "step": 103968 }, { "epoch": 10.677929547088425, "grad_norm": 0.04298737272620201, "learning_rate": 0.01, "loss": 1.9384, "step": 103971 }, { "epoch": 10.678237650200266, "grad_norm": 0.04336988553404808, "learning_rate": 0.01, "loss": 1.9556, "step": 103974 }, { "epoch": 10.678545753312108, "grad_norm": 0.09840462356805801, "learning_rate": 0.01, "loss": 1.9251, "step": 103977 }, { "epoch": 10.67885385642395, "grad_norm": 0.06105434149503708, "learning_rate": 0.01, "loss": 1.9391, "step": 103980 }, { "epoch": 10.679161959535792, "grad_norm": 0.048671264201402664, "learning_rate": 0.01, "loss": 1.9313, "step": 103983 }, { "epoch": 10.679470062647633, "grad_norm": 0.05965422838926315, "learning_rate": 0.01, "loss": 1.9471, "step": 103986 }, { "epoch": 10.679778165759474, "grad_norm": 0.0500924289226532, "learning_rate": 0.01, "loss": 1.9222, "step": 103989 }, { "epoch": 10.680086268871316, "grad_norm": 0.04120882600545883, "learning_rate": 0.01, "loss": 1.9236, "step": 103992 }, { "epoch": 10.680394371983157, "grad_norm": 0.07993176579475403, "learning_rate": 0.01, "loss": 1.941, "step": 103995 }, { "epoch": 10.680702475094998, "grad_norm": 0.03478511795401573, "learning_rate": 0.01, "loss": 1.9221, "step": 103998 }, { "epoch": 10.68101057820684, "grad_norm": 0.09511188417673111, "learning_rate": 0.01, "loss": 1.9435, "step": 104001 }, { "epoch": 10.68131868131868, "grad_norm": 0.07670678943395615, "learning_rate": 0.01, "loss": 1.9359, "step": 104004 }, { "epoch": 10.681626784430522, "grad_norm": 0.05489913374185562, "learning_rate": 0.01, "loss": 1.9378, "step": 104007 }, { "epoch": 10.681934887542365, "grad_norm": 0.09096886962652206, "learning_rate": 0.01, "loss": 1.9499, "step": 104010 }, { "epoch": 10.682242990654206, "grad_norm": 0.05405969172716141, "learning_rate": 0.01, "loss": 1.9249, "step": 104013 }, { "epoch": 10.682551093766048, "grad_norm": 0.08224764466285706, "learning_rate": 0.01, "loss": 1.9256, "step": 104016 }, { "epoch": 10.682859196877889, "grad_norm": 0.07674596458673477, "learning_rate": 0.01, "loss": 1.9609, "step": 104019 }, { "epoch": 10.68316729998973, "grad_norm": 0.05211837589740753, "learning_rate": 0.01, "loss": 1.925, "step": 104022 }, { "epoch": 10.683475403101571, "grad_norm": 0.07650485634803772, "learning_rate": 0.01, "loss": 1.9577, "step": 104025 }, { "epoch": 10.683783506213413, "grad_norm": 0.09514635056257248, "learning_rate": 0.01, "loss": 1.9275, "step": 104028 }, { "epoch": 10.684091609325254, "grad_norm": 0.049703311175107956, "learning_rate": 0.01, "loss": 1.9339, "step": 104031 }, { "epoch": 10.684399712437095, "grad_norm": 0.037082523107528687, "learning_rate": 0.01, "loss": 1.915, "step": 104034 }, { "epoch": 10.684707815548936, "grad_norm": 0.035258326679468155, "learning_rate": 0.01, "loss": 1.9368, "step": 104037 }, { "epoch": 10.685015918660778, "grad_norm": 0.12024477869272232, "learning_rate": 0.01, "loss": 1.9314, "step": 104040 }, { "epoch": 10.68532402177262, "grad_norm": 0.10448408126831055, "learning_rate": 0.01, "loss": 1.9375, "step": 104043 }, { "epoch": 10.685632124884462, "grad_norm": 0.03678799793124199, "learning_rate": 0.01, "loss": 1.9415, "step": 104046 }, { "epoch": 10.685940227996303, "grad_norm": 0.08878728747367859, "learning_rate": 0.01, "loss": 1.9081, "step": 104049 }, { "epoch": 10.686248331108144, "grad_norm": 0.04152219742536545, "learning_rate": 0.01, "loss": 1.9567, "step": 104052 }, { "epoch": 10.686556434219986, "grad_norm": 0.07425837218761444, "learning_rate": 0.01, "loss": 1.9398, "step": 104055 }, { "epoch": 10.686864537331827, "grad_norm": 0.03856934979557991, "learning_rate": 0.01, "loss": 1.9338, "step": 104058 }, { "epoch": 10.687172640443668, "grad_norm": 0.03087785467505455, "learning_rate": 0.01, "loss": 1.9465, "step": 104061 }, { "epoch": 10.68748074355551, "grad_norm": 0.054895635694265366, "learning_rate": 0.01, "loss": 1.9221, "step": 104064 }, { "epoch": 10.68778884666735, "grad_norm": 0.11289512366056442, "learning_rate": 0.01, "loss": 1.9483, "step": 104067 }, { "epoch": 10.688096949779192, "grad_norm": 0.050542768090963364, "learning_rate": 0.01, "loss": 1.9366, "step": 104070 }, { "epoch": 10.688405052891035, "grad_norm": 0.07830153405666351, "learning_rate": 0.01, "loss": 1.9483, "step": 104073 }, { "epoch": 10.688713156002876, "grad_norm": 0.03961231932044029, "learning_rate": 0.01, "loss": 1.9383, "step": 104076 }, { "epoch": 10.689021259114718, "grad_norm": 0.03384247049689293, "learning_rate": 0.01, "loss": 1.9096, "step": 104079 }, { "epoch": 10.689329362226559, "grad_norm": 0.035751618444919586, "learning_rate": 0.01, "loss": 1.9456, "step": 104082 }, { "epoch": 10.6896374653384, "grad_norm": 0.04944929853081703, "learning_rate": 0.01, "loss": 1.9366, "step": 104085 }, { "epoch": 10.689945568450241, "grad_norm": 0.06898265331983566, "learning_rate": 0.01, "loss": 1.9508, "step": 104088 }, { "epoch": 10.690253671562083, "grad_norm": 0.1251329779624939, "learning_rate": 0.01, "loss": 1.9547, "step": 104091 }, { "epoch": 10.690561774673924, "grad_norm": 0.05030253902077675, "learning_rate": 0.01, "loss": 1.9357, "step": 104094 }, { "epoch": 10.690869877785765, "grad_norm": 0.041641298681497574, "learning_rate": 0.01, "loss": 1.9322, "step": 104097 }, { "epoch": 10.691177980897606, "grad_norm": 0.11983517557382584, "learning_rate": 0.01, "loss": 1.9469, "step": 104100 }, { "epoch": 10.691486084009448, "grad_norm": 0.04607795923948288, "learning_rate": 0.01, "loss": 1.9382, "step": 104103 }, { "epoch": 10.69179418712129, "grad_norm": 0.11364562064409256, "learning_rate": 0.01, "loss": 1.9679, "step": 104106 }, { "epoch": 10.692102290233132, "grad_norm": 0.04407837986946106, "learning_rate": 0.01, "loss": 1.9327, "step": 104109 }, { "epoch": 10.692410393344973, "grad_norm": 0.035745903849601746, "learning_rate": 0.01, "loss": 1.9496, "step": 104112 }, { "epoch": 10.692718496456814, "grad_norm": 0.03364009037613869, "learning_rate": 0.01, "loss": 1.9467, "step": 104115 }, { "epoch": 10.693026599568656, "grad_norm": 0.03567780181765556, "learning_rate": 0.01, "loss": 1.9202, "step": 104118 }, { "epoch": 10.693334702680497, "grad_norm": 0.07825560867786407, "learning_rate": 0.01, "loss": 1.9461, "step": 104121 }, { "epoch": 10.693642805792338, "grad_norm": 0.04002043232321739, "learning_rate": 0.01, "loss": 1.9563, "step": 104124 }, { "epoch": 10.69395090890418, "grad_norm": 0.044410590082407, "learning_rate": 0.01, "loss": 1.9467, "step": 104127 }, { "epoch": 10.69425901201602, "grad_norm": 0.03323044627904892, "learning_rate": 0.01, "loss": 1.9527, "step": 104130 }, { "epoch": 10.694567115127862, "grad_norm": 0.0373239666223526, "learning_rate": 0.01, "loss": 1.9263, "step": 104133 }, { "epoch": 10.694875218239705, "grad_norm": 0.058051690459251404, "learning_rate": 0.01, "loss": 1.9235, "step": 104136 }, { "epoch": 10.695183321351546, "grad_norm": 0.1667533963918686, "learning_rate": 0.01, "loss": 1.96, "step": 104139 }, { "epoch": 10.695491424463388, "grad_norm": 0.046205274760723114, "learning_rate": 0.01, "loss": 1.9495, "step": 104142 }, { "epoch": 10.695799527575229, "grad_norm": 0.07125911861658096, "learning_rate": 0.01, "loss": 1.9559, "step": 104145 }, { "epoch": 10.69610763068707, "grad_norm": 0.09456393122673035, "learning_rate": 0.01, "loss": 1.9588, "step": 104148 }, { "epoch": 10.696415733798911, "grad_norm": 0.10501796752214432, "learning_rate": 0.01, "loss": 1.9504, "step": 104151 }, { "epoch": 10.696723836910753, "grad_norm": 0.05251304805278778, "learning_rate": 0.01, "loss": 1.928, "step": 104154 }, { "epoch": 10.697031940022594, "grad_norm": 0.05828684940934181, "learning_rate": 0.01, "loss": 1.926, "step": 104157 }, { "epoch": 10.697340043134435, "grad_norm": 0.048068683594465256, "learning_rate": 0.01, "loss": 1.9639, "step": 104160 }, { "epoch": 10.697648146246276, "grad_norm": 0.09374748915433884, "learning_rate": 0.01, "loss": 1.9242, "step": 104163 }, { "epoch": 10.697956249358118, "grad_norm": 0.057782042771577835, "learning_rate": 0.01, "loss": 1.9471, "step": 104166 }, { "epoch": 10.69826435246996, "grad_norm": 0.052854880690574646, "learning_rate": 0.01, "loss": 1.9286, "step": 104169 }, { "epoch": 10.698572455581802, "grad_norm": 0.050680410116910934, "learning_rate": 0.01, "loss": 1.9317, "step": 104172 }, { "epoch": 10.698880558693643, "grad_norm": 0.05988655984401703, "learning_rate": 0.01, "loss": 1.9414, "step": 104175 }, { "epoch": 10.699188661805485, "grad_norm": 0.04978611692786217, "learning_rate": 0.01, "loss": 1.9113, "step": 104178 }, { "epoch": 10.699496764917326, "grad_norm": 0.06838058680295944, "learning_rate": 0.01, "loss": 1.9338, "step": 104181 }, { "epoch": 10.699804868029167, "grad_norm": 0.04077281057834625, "learning_rate": 0.01, "loss": 1.9482, "step": 104184 }, { "epoch": 10.700112971141008, "grad_norm": 0.050454989075660706, "learning_rate": 0.01, "loss": 1.9253, "step": 104187 }, { "epoch": 10.70042107425285, "grad_norm": 0.04417501762509346, "learning_rate": 0.01, "loss": 1.934, "step": 104190 }, { "epoch": 10.70072917736469, "grad_norm": 0.03660339489579201, "learning_rate": 0.01, "loss": 1.942, "step": 104193 }, { "epoch": 10.701037280476532, "grad_norm": 0.03626766800880432, "learning_rate": 0.01, "loss": 1.9355, "step": 104196 }, { "epoch": 10.701345383588375, "grad_norm": 0.11914758384227753, "learning_rate": 0.01, "loss": 1.9307, "step": 104199 }, { "epoch": 10.701653486700216, "grad_norm": 0.07309869676828384, "learning_rate": 0.01, "loss": 1.9584, "step": 104202 }, { "epoch": 10.701961589812058, "grad_norm": 0.1361292004585266, "learning_rate": 0.01, "loss": 1.9238, "step": 104205 }, { "epoch": 10.702269692923899, "grad_norm": 0.07265239208936691, "learning_rate": 0.01, "loss": 1.9266, "step": 104208 }, { "epoch": 10.70257779603574, "grad_norm": 0.05486297979950905, "learning_rate": 0.01, "loss": 1.9297, "step": 104211 }, { "epoch": 10.702885899147581, "grad_norm": 0.05077483132481575, "learning_rate": 0.01, "loss": 1.9632, "step": 104214 }, { "epoch": 10.703194002259423, "grad_norm": 0.04137976095080376, "learning_rate": 0.01, "loss": 1.9531, "step": 104217 }, { "epoch": 10.703502105371264, "grad_norm": 0.040473662316799164, "learning_rate": 0.01, "loss": 1.9176, "step": 104220 }, { "epoch": 10.703810208483105, "grad_norm": 0.04199198633432388, "learning_rate": 0.01, "loss": 1.9506, "step": 104223 }, { "epoch": 10.704118311594947, "grad_norm": 0.054861754179000854, "learning_rate": 0.01, "loss": 1.9172, "step": 104226 }, { "epoch": 10.704426414706788, "grad_norm": 0.04389738291501999, "learning_rate": 0.01, "loss": 1.9323, "step": 104229 }, { "epoch": 10.704734517818629, "grad_norm": 0.10139840841293335, "learning_rate": 0.01, "loss": 1.9651, "step": 104232 }, { "epoch": 10.705042620930472, "grad_norm": 0.18040791153907776, "learning_rate": 0.01, "loss": 1.9371, "step": 104235 }, { "epoch": 10.705350724042313, "grad_norm": 0.09351298958063126, "learning_rate": 0.01, "loss": 1.9246, "step": 104238 }, { "epoch": 10.705658827154155, "grad_norm": 0.04835623502731323, "learning_rate": 0.01, "loss": 1.9582, "step": 104241 }, { "epoch": 10.705966930265996, "grad_norm": 0.07934877276420593, "learning_rate": 0.01, "loss": 1.932, "step": 104244 }, { "epoch": 10.706275033377837, "grad_norm": 0.04809239134192467, "learning_rate": 0.01, "loss": 1.9462, "step": 104247 }, { "epoch": 10.706583136489678, "grad_norm": 0.09115726500749588, "learning_rate": 0.01, "loss": 1.9397, "step": 104250 }, { "epoch": 10.70689123960152, "grad_norm": 0.09909907728433609, "learning_rate": 0.01, "loss": 1.927, "step": 104253 }, { "epoch": 10.707199342713361, "grad_norm": 0.06753792613744736, "learning_rate": 0.01, "loss": 1.9191, "step": 104256 }, { "epoch": 10.707507445825202, "grad_norm": 0.047283709049224854, "learning_rate": 0.01, "loss": 1.9471, "step": 104259 }, { "epoch": 10.707815548937043, "grad_norm": 0.0870928168296814, "learning_rate": 0.01, "loss": 1.8767, "step": 104262 }, { "epoch": 10.708123652048886, "grad_norm": 0.04126760736107826, "learning_rate": 0.01, "loss": 1.9286, "step": 104265 }, { "epoch": 10.708431755160728, "grad_norm": 0.052089888602495193, "learning_rate": 0.01, "loss": 1.9636, "step": 104268 }, { "epoch": 10.708739858272569, "grad_norm": 0.040085259824991226, "learning_rate": 0.01, "loss": 1.9198, "step": 104271 }, { "epoch": 10.70904796138441, "grad_norm": 0.03780793771147728, "learning_rate": 0.01, "loss": 1.9291, "step": 104274 }, { "epoch": 10.709356064496252, "grad_norm": 0.03962109982967377, "learning_rate": 0.01, "loss": 1.921, "step": 104277 }, { "epoch": 10.709664167608093, "grad_norm": 0.10808218270540237, "learning_rate": 0.01, "loss": 1.9512, "step": 104280 }, { "epoch": 10.709972270719934, "grad_norm": 0.06844981759786606, "learning_rate": 0.01, "loss": 1.9329, "step": 104283 }, { "epoch": 10.710280373831775, "grad_norm": 0.08019477128982544, "learning_rate": 0.01, "loss": 1.9223, "step": 104286 }, { "epoch": 10.710588476943617, "grad_norm": 0.07778795063495636, "learning_rate": 0.01, "loss": 1.9706, "step": 104289 }, { "epoch": 10.710896580055458, "grad_norm": 0.03524641692638397, "learning_rate": 0.01, "loss": 1.9343, "step": 104292 }, { "epoch": 10.7112046831673, "grad_norm": 0.11965351551771164, "learning_rate": 0.01, "loss": 1.9625, "step": 104295 }, { "epoch": 10.711512786279142, "grad_norm": 0.10487991571426392, "learning_rate": 0.01, "loss": 1.9582, "step": 104298 }, { "epoch": 10.711820889390983, "grad_norm": 0.10835332423448563, "learning_rate": 0.01, "loss": 1.9294, "step": 104301 }, { "epoch": 10.712128992502825, "grad_norm": 0.058550573885440826, "learning_rate": 0.01, "loss": 1.935, "step": 104304 }, { "epoch": 10.712437095614666, "grad_norm": 0.041659001260995865, "learning_rate": 0.01, "loss": 1.9492, "step": 104307 }, { "epoch": 10.712745198726507, "grad_norm": 0.059879958629608154, "learning_rate": 0.01, "loss": 1.9602, "step": 104310 }, { "epoch": 10.713053301838348, "grad_norm": 0.03812127932906151, "learning_rate": 0.01, "loss": 1.9426, "step": 104313 }, { "epoch": 10.71336140495019, "grad_norm": 0.0401906780898571, "learning_rate": 0.01, "loss": 1.933, "step": 104316 }, { "epoch": 10.713669508062031, "grad_norm": 0.0772302895784378, "learning_rate": 0.01, "loss": 1.9515, "step": 104319 }, { "epoch": 10.713977611173872, "grad_norm": 0.045765917748212814, "learning_rate": 0.01, "loss": 1.9584, "step": 104322 }, { "epoch": 10.714285714285714, "grad_norm": 0.05593492090702057, "learning_rate": 0.01, "loss": 1.9282, "step": 104325 }, { "epoch": 10.714593817397557, "grad_norm": 0.08135896176099777, "learning_rate": 0.01, "loss": 1.9434, "step": 104328 }, { "epoch": 10.714901920509398, "grad_norm": 0.06063408404588699, "learning_rate": 0.01, "loss": 1.9311, "step": 104331 }, { "epoch": 10.715210023621239, "grad_norm": 0.04096072539687157, "learning_rate": 0.01, "loss": 1.9654, "step": 104334 }, { "epoch": 10.71551812673308, "grad_norm": 0.050055526196956635, "learning_rate": 0.01, "loss": 1.9306, "step": 104337 }, { "epoch": 10.715826229844922, "grad_norm": 0.04591112583875656, "learning_rate": 0.01, "loss": 1.9426, "step": 104340 }, { "epoch": 10.716134332956763, "grad_norm": 0.1036655455827713, "learning_rate": 0.01, "loss": 1.9504, "step": 104343 }, { "epoch": 10.716442436068604, "grad_norm": 0.037715498358011246, "learning_rate": 0.01, "loss": 1.9378, "step": 104346 }, { "epoch": 10.716750539180445, "grad_norm": 0.047764185816049576, "learning_rate": 0.01, "loss": 1.946, "step": 104349 }, { "epoch": 10.717058642292287, "grad_norm": 0.06210208684206009, "learning_rate": 0.01, "loss": 1.962, "step": 104352 }, { "epoch": 10.717366745404128, "grad_norm": 0.10889417678117752, "learning_rate": 0.01, "loss": 1.9424, "step": 104355 }, { "epoch": 10.71767484851597, "grad_norm": 0.05223528668284416, "learning_rate": 0.01, "loss": 1.9468, "step": 104358 }, { "epoch": 10.717982951627812, "grad_norm": 0.08993866294622421, "learning_rate": 0.01, "loss": 1.9524, "step": 104361 }, { "epoch": 10.718291054739653, "grad_norm": 0.12946274876594543, "learning_rate": 0.01, "loss": 1.9256, "step": 104364 }, { "epoch": 10.718599157851495, "grad_norm": 0.05884292721748352, "learning_rate": 0.01, "loss": 1.9428, "step": 104367 }, { "epoch": 10.718907260963336, "grad_norm": 0.05492502823472023, "learning_rate": 0.01, "loss": 1.9225, "step": 104370 }, { "epoch": 10.719215364075177, "grad_norm": 0.049445055425167084, "learning_rate": 0.01, "loss": 1.9433, "step": 104373 }, { "epoch": 10.719523467187019, "grad_norm": 0.039928633719682693, "learning_rate": 0.01, "loss": 1.9361, "step": 104376 }, { "epoch": 10.71983157029886, "grad_norm": 0.045593008399009705, "learning_rate": 0.01, "loss": 1.941, "step": 104379 }, { "epoch": 10.720139673410701, "grad_norm": 0.10593906044960022, "learning_rate": 0.01, "loss": 1.9574, "step": 104382 }, { "epoch": 10.720447776522542, "grad_norm": 0.07790132611989975, "learning_rate": 0.01, "loss": 1.9463, "step": 104385 }, { "epoch": 10.720755879634384, "grad_norm": 0.08720636367797852, "learning_rate": 0.01, "loss": 1.9402, "step": 104388 }, { "epoch": 10.721063982746227, "grad_norm": 0.07232289761304855, "learning_rate": 0.01, "loss": 1.9608, "step": 104391 }, { "epoch": 10.721372085858068, "grad_norm": 0.044367242604494095, "learning_rate": 0.01, "loss": 1.9584, "step": 104394 }, { "epoch": 10.72168018896991, "grad_norm": 0.050533805042505264, "learning_rate": 0.01, "loss": 1.9396, "step": 104397 }, { "epoch": 10.72198829208175, "grad_norm": 0.0687800869345665, "learning_rate": 0.01, "loss": 1.95, "step": 104400 }, { "epoch": 10.722296395193592, "grad_norm": 0.09476820379495621, "learning_rate": 0.01, "loss": 1.9534, "step": 104403 }, { "epoch": 10.722604498305433, "grad_norm": 0.08872959762811661, "learning_rate": 0.01, "loss": 1.9514, "step": 104406 }, { "epoch": 10.722912601417274, "grad_norm": 0.10332068800926208, "learning_rate": 0.01, "loss": 1.9485, "step": 104409 }, { "epoch": 10.723220704529115, "grad_norm": 0.05728401243686676, "learning_rate": 0.01, "loss": 1.9344, "step": 104412 }, { "epoch": 10.723528807640957, "grad_norm": 0.04051383212208748, "learning_rate": 0.01, "loss": 1.9416, "step": 104415 }, { "epoch": 10.723836910752798, "grad_norm": 0.06004336103796959, "learning_rate": 0.01, "loss": 1.9341, "step": 104418 }, { "epoch": 10.72414501386464, "grad_norm": 0.03955170512199402, "learning_rate": 0.01, "loss": 1.9099, "step": 104421 }, { "epoch": 10.724453116976482, "grad_norm": 0.031162617728114128, "learning_rate": 0.01, "loss": 1.9551, "step": 104424 }, { "epoch": 10.724761220088324, "grad_norm": 0.03177347779273987, "learning_rate": 0.01, "loss": 1.9325, "step": 104427 }, { "epoch": 10.725069323200165, "grad_norm": 0.04968690127134323, "learning_rate": 0.01, "loss": 1.9596, "step": 104430 }, { "epoch": 10.725377426312006, "grad_norm": 0.05992182344198227, "learning_rate": 0.01, "loss": 1.9465, "step": 104433 }, { "epoch": 10.725685529423847, "grad_norm": 0.042911652475595474, "learning_rate": 0.01, "loss": 1.9391, "step": 104436 }, { "epoch": 10.725993632535689, "grad_norm": 0.07481258362531662, "learning_rate": 0.01, "loss": 1.9431, "step": 104439 }, { "epoch": 10.72630173564753, "grad_norm": 0.192671537399292, "learning_rate": 0.01, "loss": 1.9395, "step": 104442 }, { "epoch": 10.726609838759371, "grad_norm": 0.08988282084465027, "learning_rate": 0.01, "loss": 1.9441, "step": 104445 }, { "epoch": 10.726917941871212, "grad_norm": 0.04226021468639374, "learning_rate": 0.01, "loss": 1.9468, "step": 104448 }, { "epoch": 10.727226044983054, "grad_norm": 0.04570133611559868, "learning_rate": 0.01, "loss": 1.9224, "step": 104451 }, { "epoch": 10.727534148094897, "grad_norm": 0.05308179929852486, "learning_rate": 0.01, "loss": 1.9253, "step": 104454 }, { "epoch": 10.727842251206738, "grad_norm": 0.03369840979576111, "learning_rate": 0.01, "loss": 1.9155, "step": 104457 }, { "epoch": 10.72815035431858, "grad_norm": 0.05607397481799126, "learning_rate": 0.01, "loss": 1.9486, "step": 104460 }, { "epoch": 10.72845845743042, "grad_norm": 0.0777030736207962, "learning_rate": 0.01, "loss": 1.9247, "step": 104463 }, { "epoch": 10.728766560542262, "grad_norm": 0.055943794548511505, "learning_rate": 0.01, "loss": 1.9215, "step": 104466 }, { "epoch": 10.729074663654103, "grad_norm": 0.03508768975734711, "learning_rate": 0.01, "loss": 1.9552, "step": 104469 }, { "epoch": 10.729382766765944, "grad_norm": 0.06143694743514061, "learning_rate": 0.01, "loss": 1.9186, "step": 104472 }, { "epoch": 10.729690869877786, "grad_norm": 0.051577091217041016, "learning_rate": 0.01, "loss": 1.9509, "step": 104475 }, { "epoch": 10.729998972989627, "grad_norm": 0.048224084079265594, "learning_rate": 0.01, "loss": 1.9456, "step": 104478 }, { "epoch": 10.730307076101468, "grad_norm": 0.17025841772556305, "learning_rate": 0.01, "loss": 1.93, "step": 104481 }, { "epoch": 10.73061517921331, "grad_norm": 0.0435960553586483, "learning_rate": 0.01, "loss": 1.942, "step": 104484 }, { "epoch": 10.73092328232515, "grad_norm": 0.035013701766729355, "learning_rate": 0.01, "loss": 1.9467, "step": 104487 }, { "epoch": 10.731231385436994, "grad_norm": 0.03989393264055252, "learning_rate": 0.01, "loss": 1.9439, "step": 104490 }, { "epoch": 10.731539488548835, "grad_norm": 0.04265978932380676, "learning_rate": 0.01, "loss": 1.9325, "step": 104493 }, { "epoch": 10.731847591660676, "grad_norm": 0.04900422692298889, "learning_rate": 0.01, "loss": 1.9395, "step": 104496 }, { "epoch": 10.732155694772517, "grad_norm": 0.040579188615083694, "learning_rate": 0.01, "loss": 1.9098, "step": 104499 }, { "epoch": 10.732463797884359, "grad_norm": 0.04971577972173691, "learning_rate": 0.01, "loss": 1.962, "step": 104502 }, { "epoch": 10.7327719009962, "grad_norm": 0.04374386742711067, "learning_rate": 0.01, "loss": 1.9565, "step": 104505 }, { "epoch": 10.733080004108041, "grad_norm": 0.0434911847114563, "learning_rate": 0.01, "loss": 1.96, "step": 104508 }, { "epoch": 10.733388107219882, "grad_norm": 0.04322110116481781, "learning_rate": 0.01, "loss": 1.9317, "step": 104511 }, { "epoch": 10.733696210331724, "grad_norm": 0.08954337239265442, "learning_rate": 0.01, "loss": 1.9323, "step": 104514 }, { "epoch": 10.734004313443565, "grad_norm": 0.09366462379693985, "learning_rate": 0.01, "loss": 1.9371, "step": 104517 }, { "epoch": 10.734312416555408, "grad_norm": 0.09955374151468277, "learning_rate": 0.01, "loss": 1.9413, "step": 104520 }, { "epoch": 10.73462051966725, "grad_norm": 0.08628708124160767, "learning_rate": 0.01, "loss": 1.9424, "step": 104523 }, { "epoch": 10.73492862277909, "grad_norm": 0.0417771115899086, "learning_rate": 0.01, "loss": 1.9189, "step": 104526 }, { "epoch": 10.735236725890932, "grad_norm": 0.041715867817401886, "learning_rate": 0.01, "loss": 1.9339, "step": 104529 }, { "epoch": 10.735544829002773, "grad_norm": 0.043204814195632935, "learning_rate": 0.01, "loss": 1.9611, "step": 104532 }, { "epoch": 10.735852932114614, "grad_norm": 0.04433635249733925, "learning_rate": 0.01, "loss": 1.9232, "step": 104535 }, { "epoch": 10.736161035226456, "grad_norm": 0.09020950645208359, "learning_rate": 0.01, "loss": 1.9414, "step": 104538 }, { "epoch": 10.736469138338297, "grad_norm": 0.09889649599790573, "learning_rate": 0.01, "loss": 1.9321, "step": 104541 }, { "epoch": 10.736777241450138, "grad_norm": 0.0474759116768837, "learning_rate": 0.01, "loss": 1.9406, "step": 104544 }, { "epoch": 10.73708534456198, "grad_norm": 0.04423435404896736, "learning_rate": 0.01, "loss": 1.9383, "step": 104547 }, { "epoch": 10.73739344767382, "grad_norm": 0.07683929055929184, "learning_rate": 0.01, "loss": 1.9213, "step": 104550 }, { "epoch": 10.737701550785664, "grad_norm": 0.06812088936567307, "learning_rate": 0.01, "loss": 1.9435, "step": 104553 }, { "epoch": 10.738009653897505, "grad_norm": 0.04490400478243828, "learning_rate": 0.01, "loss": 1.9266, "step": 104556 }, { "epoch": 10.738317757009346, "grad_norm": 0.03471510857343674, "learning_rate": 0.01, "loss": 1.9341, "step": 104559 }, { "epoch": 10.738625860121187, "grad_norm": 0.06390637904405594, "learning_rate": 0.01, "loss": 1.9149, "step": 104562 }, { "epoch": 10.738933963233029, "grad_norm": 0.0859960988163948, "learning_rate": 0.01, "loss": 1.9191, "step": 104565 }, { "epoch": 10.73924206634487, "grad_norm": 0.052980903536081314, "learning_rate": 0.01, "loss": 1.9394, "step": 104568 }, { "epoch": 10.739550169456711, "grad_norm": 0.10932338237762451, "learning_rate": 0.01, "loss": 1.945, "step": 104571 }, { "epoch": 10.739858272568553, "grad_norm": 0.09292354434728622, "learning_rate": 0.01, "loss": 1.9174, "step": 104574 }, { "epoch": 10.740166375680394, "grad_norm": 0.06333106756210327, "learning_rate": 0.01, "loss": 1.9378, "step": 104577 }, { "epoch": 10.740474478792235, "grad_norm": 0.06950338929891586, "learning_rate": 0.01, "loss": 1.9419, "step": 104580 }, { "epoch": 10.740782581904078, "grad_norm": 0.04017019271850586, "learning_rate": 0.01, "loss": 1.9324, "step": 104583 }, { "epoch": 10.74109068501592, "grad_norm": 0.09870757907629013, "learning_rate": 0.01, "loss": 1.9531, "step": 104586 }, { "epoch": 10.74139878812776, "grad_norm": 0.07882300764322281, "learning_rate": 0.01, "loss": 1.9297, "step": 104589 }, { "epoch": 10.741706891239602, "grad_norm": 0.06481506675481796, "learning_rate": 0.01, "loss": 1.9352, "step": 104592 }, { "epoch": 10.742014994351443, "grad_norm": 0.05816543847322464, "learning_rate": 0.01, "loss": 1.9181, "step": 104595 }, { "epoch": 10.742323097463284, "grad_norm": 0.046804074198007584, "learning_rate": 0.01, "loss": 1.9647, "step": 104598 }, { "epoch": 10.742631200575126, "grad_norm": 0.0676804855465889, "learning_rate": 0.01, "loss": 1.9192, "step": 104601 }, { "epoch": 10.742939303686967, "grad_norm": 0.06489752978086472, "learning_rate": 0.01, "loss": 1.9492, "step": 104604 }, { "epoch": 10.743247406798808, "grad_norm": 0.033941950649023056, "learning_rate": 0.01, "loss": 1.9362, "step": 104607 }, { "epoch": 10.74355550991065, "grad_norm": 0.1338924765586853, "learning_rate": 0.01, "loss": 1.9474, "step": 104610 }, { "epoch": 10.74386361302249, "grad_norm": 0.04623845964670181, "learning_rate": 0.01, "loss": 1.9306, "step": 104613 }, { "epoch": 10.744171716134334, "grad_norm": 0.07495556771755219, "learning_rate": 0.01, "loss": 1.9277, "step": 104616 }, { "epoch": 10.744479819246175, "grad_norm": 0.07099320739507675, "learning_rate": 0.01, "loss": 1.9252, "step": 104619 }, { "epoch": 10.744787922358016, "grad_norm": 0.03660536929965019, "learning_rate": 0.01, "loss": 1.9559, "step": 104622 }, { "epoch": 10.745096025469858, "grad_norm": 0.06336542963981628, "learning_rate": 0.01, "loss": 1.9269, "step": 104625 }, { "epoch": 10.745404128581699, "grad_norm": 0.09526252746582031, "learning_rate": 0.01, "loss": 1.9439, "step": 104628 }, { "epoch": 10.74571223169354, "grad_norm": 0.04167678952217102, "learning_rate": 0.01, "loss": 1.9132, "step": 104631 }, { "epoch": 10.746020334805381, "grad_norm": 0.049752864986658096, "learning_rate": 0.01, "loss": 1.9483, "step": 104634 }, { "epoch": 10.746328437917223, "grad_norm": 0.10626377910375595, "learning_rate": 0.01, "loss": 1.9336, "step": 104637 }, { "epoch": 10.746636541029064, "grad_norm": 0.08697515726089478, "learning_rate": 0.01, "loss": 1.9597, "step": 104640 }, { "epoch": 10.746944644140905, "grad_norm": 0.07837111502885818, "learning_rate": 0.01, "loss": 1.9221, "step": 104643 }, { "epoch": 10.747252747252748, "grad_norm": 0.056924059987068176, "learning_rate": 0.01, "loss": 1.9416, "step": 104646 }, { "epoch": 10.74756085036459, "grad_norm": 0.05303655564785004, "learning_rate": 0.01, "loss": 1.9528, "step": 104649 }, { "epoch": 10.74786895347643, "grad_norm": 0.043836649507284164, "learning_rate": 0.01, "loss": 1.9441, "step": 104652 }, { "epoch": 10.748177056588272, "grad_norm": 0.06553082168102264, "learning_rate": 0.01, "loss": 1.9327, "step": 104655 }, { "epoch": 10.748485159700113, "grad_norm": 0.05108625441789627, "learning_rate": 0.01, "loss": 1.9307, "step": 104658 }, { "epoch": 10.748793262811954, "grad_norm": 0.04943982884287834, "learning_rate": 0.01, "loss": 1.9618, "step": 104661 }, { "epoch": 10.749101365923796, "grad_norm": 0.06580820679664612, "learning_rate": 0.01, "loss": 1.9558, "step": 104664 }, { "epoch": 10.749409469035637, "grad_norm": 0.09794413298368454, "learning_rate": 0.01, "loss": 1.9385, "step": 104667 }, { "epoch": 10.749717572147478, "grad_norm": 0.08265780657529831, "learning_rate": 0.01, "loss": 1.9484, "step": 104670 }, { "epoch": 10.75002567525932, "grad_norm": 0.11228012293577194, "learning_rate": 0.01, "loss": 1.9488, "step": 104673 }, { "epoch": 10.75033377837116, "grad_norm": 0.054208461195230484, "learning_rate": 0.01, "loss": 1.9223, "step": 104676 }, { "epoch": 10.750641881483002, "grad_norm": 0.1329595148563385, "learning_rate": 0.01, "loss": 1.9245, "step": 104679 }, { "epoch": 10.750949984594845, "grad_norm": 0.08030585944652557, "learning_rate": 0.01, "loss": 1.9351, "step": 104682 }, { "epoch": 10.751258087706686, "grad_norm": 0.05090648680925369, "learning_rate": 0.01, "loss": 1.9434, "step": 104685 }, { "epoch": 10.751566190818528, "grad_norm": 0.05258609354496002, "learning_rate": 0.01, "loss": 1.948, "step": 104688 }, { "epoch": 10.751874293930369, "grad_norm": 0.05537552759051323, "learning_rate": 0.01, "loss": 1.9491, "step": 104691 }, { "epoch": 10.75218239704221, "grad_norm": 0.043664395809173584, "learning_rate": 0.01, "loss": 1.9332, "step": 104694 }, { "epoch": 10.752490500154051, "grad_norm": 0.03498004749417305, "learning_rate": 0.01, "loss": 1.9501, "step": 104697 }, { "epoch": 10.752798603265893, "grad_norm": 0.03816048428416252, "learning_rate": 0.01, "loss": 1.9299, "step": 104700 }, { "epoch": 10.753106706377734, "grad_norm": 0.11150369048118591, "learning_rate": 0.01, "loss": 1.9408, "step": 104703 }, { "epoch": 10.753414809489575, "grad_norm": 0.108778215944767, "learning_rate": 0.01, "loss": 1.9318, "step": 104706 }, { "epoch": 10.753722912601418, "grad_norm": 0.05636920407414436, "learning_rate": 0.01, "loss": 1.9283, "step": 104709 }, { "epoch": 10.75403101571326, "grad_norm": 0.038366153836250305, "learning_rate": 0.01, "loss": 1.9325, "step": 104712 }, { "epoch": 10.7543391188251, "grad_norm": 0.07660912722349167, "learning_rate": 0.01, "loss": 1.9394, "step": 104715 }, { "epoch": 10.754647221936942, "grad_norm": 0.06754130125045776, "learning_rate": 0.01, "loss": 1.9319, "step": 104718 }, { "epoch": 10.754955325048783, "grad_norm": 0.09265803545713425, "learning_rate": 0.01, "loss": 1.9592, "step": 104721 }, { "epoch": 10.755263428160625, "grad_norm": 0.06105842813849449, "learning_rate": 0.01, "loss": 1.9336, "step": 104724 }, { "epoch": 10.755571531272466, "grad_norm": 0.07468662410974503, "learning_rate": 0.01, "loss": 1.9386, "step": 104727 }, { "epoch": 10.755879634384307, "grad_norm": 0.05258229002356529, "learning_rate": 0.01, "loss": 1.9292, "step": 104730 }, { "epoch": 10.756187737496148, "grad_norm": 0.0563662014901638, "learning_rate": 0.01, "loss": 1.9467, "step": 104733 }, { "epoch": 10.75649584060799, "grad_norm": 0.04614382982254028, "learning_rate": 0.01, "loss": 1.9475, "step": 104736 }, { "epoch": 10.75680394371983, "grad_norm": 0.04521797597408295, "learning_rate": 0.01, "loss": 1.928, "step": 104739 }, { "epoch": 10.757112046831672, "grad_norm": 0.05099884420633316, "learning_rate": 0.01, "loss": 1.943, "step": 104742 }, { "epoch": 10.757420149943515, "grad_norm": 0.05011932551860809, "learning_rate": 0.01, "loss": 1.9639, "step": 104745 }, { "epoch": 10.757728253055356, "grad_norm": 0.15515439212322235, "learning_rate": 0.01, "loss": 1.9441, "step": 104748 }, { "epoch": 10.758036356167198, "grad_norm": 0.04284700006246567, "learning_rate": 0.01, "loss": 1.9122, "step": 104751 }, { "epoch": 10.758344459279039, "grad_norm": 0.045263636857271194, "learning_rate": 0.01, "loss": 1.9288, "step": 104754 }, { "epoch": 10.75865256239088, "grad_norm": 0.0417122021317482, "learning_rate": 0.01, "loss": 1.9357, "step": 104757 }, { "epoch": 10.758960665502721, "grad_norm": 0.06633123755455017, "learning_rate": 0.01, "loss": 1.9315, "step": 104760 }, { "epoch": 10.759268768614563, "grad_norm": 0.043190255761146545, "learning_rate": 0.01, "loss": 1.9511, "step": 104763 }, { "epoch": 10.759576871726404, "grad_norm": 0.031169939786195755, "learning_rate": 0.01, "loss": 1.9577, "step": 104766 }, { "epoch": 10.759884974838245, "grad_norm": 0.03632303699851036, "learning_rate": 0.01, "loss": 1.9225, "step": 104769 }, { "epoch": 10.760193077950087, "grad_norm": 0.07166427373886108, "learning_rate": 0.01, "loss": 1.9385, "step": 104772 }, { "epoch": 10.76050118106193, "grad_norm": 0.06929494440555573, "learning_rate": 0.01, "loss": 1.9421, "step": 104775 }, { "epoch": 10.76080928417377, "grad_norm": 0.05270274356007576, "learning_rate": 0.01, "loss": 1.9464, "step": 104778 }, { "epoch": 10.761117387285612, "grad_norm": 0.1968749314546585, "learning_rate": 0.01, "loss": 1.9268, "step": 104781 }, { "epoch": 10.761425490397453, "grad_norm": 0.08403410017490387, "learning_rate": 0.01, "loss": 1.9788, "step": 104784 }, { "epoch": 10.761733593509295, "grad_norm": 0.04399842768907547, "learning_rate": 0.01, "loss": 1.9417, "step": 104787 }, { "epoch": 10.762041696621136, "grad_norm": 0.05895080417394638, "learning_rate": 0.01, "loss": 1.9308, "step": 104790 }, { "epoch": 10.762349799732977, "grad_norm": 0.060437798500061035, "learning_rate": 0.01, "loss": 1.9406, "step": 104793 }, { "epoch": 10.762657902844818, "grad_norm": 0.06297816336154938, "learning_rate": 0.01, "loss": 1.9271, "step": 104796 }, { "epoch": 10.76296600595666, "grad_norm": 0.06990213692188263, "learning_rate": 0.01, "loss": 1.9249, "step": 104799 }, { "epoch": 10.763274109068501, "grad_norm": 0.044904761016368866, "learning_rate": 0.01, "loss": 1.9402, "step": 104802 }, { "epoch": 10.763582212180342, "grad_norm": 0.05253970995545387, "learning_rate": 0.01, "loss": 1.9292, "step": 104805 }, { "epoch": 10.763890315292185, "grad_norm": 0.03301846608519554, "learning_rate": 0.01, "loss": 1.9387, "step": 104808 }, { "epoch": 10.764198418404026, "grad_norm": 0.04922977834939957, "learning_rate": 0.01, "loss": 1.9479, "step": 104811 }, { "epoch": 10.764506521515868, "grad_norm": 0.03909482806921005, "learning_rate": 0.01, "loss": 1.9642, "step": 104814 }, { "epoch": 10.764814624627709, "grad_norm": 0.04505687952041626, "learning_rate": 0.01, "loss": 1.9496, "step": 104817 }, { "epoch": 10.76512272773955, "grad_norm": 0.0761536955833435, "learning_rate": 0.01, "loss": 1.8965, "step": 104820 }, { "epoch": 10.765430830851392, "grad_norm": 0.08779765665531158, "learning_rate": 0.01, "loss": 1.925, "step": 104823 }, { "epoch": 10.765738933963233, "grad_norm": 0.16538570821285248, "learning_rate": 0.01, "loss": 1.9416, "step": 104826 }, { "epoch": 10.766047037075074, "grad_norm": 0.08162948489189148, "learning_rate": 0.01, "loss": 1.9625, "step": 104829 }, { "epoch": 10.766355140186915, "grad_norm": 0.07075890153646469, "learning_rate": 0.01, "loss": 1.9674, "step": 104832 }, { "epoch": 10.766663243298757, "grad_norm": 0.040055204182863235, "learning_rate": 0.01, "loss": 1.9143, "step": 104835 }, { "epoch": 10.7669713464106, "grad_norm": 0.03459317237138748, "learning_rate": 0.01, "loss": 1.941, "step": 104838 }, { "epoch": 10.76727944952244, "grad_norm": 0.06219128519296646, "learning_rate": 0.01, "loss": 1.9117, "step": 104841 }, { "epoch": 10.767587552634282, "grad_norm": 0.06373794376850128, "learning_rate": 0.01, "loss": 1.9448, "step": 104844 }, { "epoch": 10.767895655746123, "grad_norm": 0.05703836679458618, "learning_rate": 0.01, "loss": 1.9427, "step": 104847 }, { "epoch": 10.768203758857965, "grad_norm": 0.03425431251525879, "learning_rate": 0.01, "loss": 1.9239, "step": 104850 }, { "epoch": 10.768511861969806, "grad_norm": 0.04029223695397377, "learning_rate": 0.01, "loss": 1.9385, "step": 104853 }, { "epoch": 10.768819965081647, "grad_norm": 0.04060615599155426, "learning_rate": 0.01, "loss": 1.9273, "step": 104856 }, { "epoch": 10.769128068193488, "grad_norm": 0.03595336154103279, "learning_rate": 0.01, "loss": 1.929, "step": 104859 }, { "epoch": 10.76943617130533, "grad_norm": 0.046418748795986176, "learning_rate": 0.01, "loss": 1.9633, "step": 104862 }, { "epoch": 10.769744274417171, "grad_norm": 0.05077604576945305, "learning_rate": 0.01, "loss": 1.9631, "step": 104865 }, { "epoch": 10.770052377529012, "grad_norm": 0.1307033896446228, "learning_rate": 0.01, "loss": 1.9341, "step": 104868 }, { "epoch": 10.770360480640855, "grad_norm": 0.03684141859412193, "learning_rate": 0.01, "loss": 1.9192, "step": 104871 }, { "epoch": 10.770668583752697, "grad_norm": 0.06678260862827301, "learning_rate": 0.01, "loss": 1.9443, "step": 104874 }, { "epoch": 10.770976686864538, "grad_norm": 0.037949930876493454, "learning_rate": 0.01, "loss": 1.9322, "step": 104877 }, { "epoch": 10.771284789976379, "grad_norm": 0.03636316582560539, "learning_rate": 0.01, "loss": 1.9359, "step": 104880 }, { "epoch": 10.77159289308822, "grad_norm": 0.07280274480581284, "learning_rate": 0.01, "loss": 1.9493, "step": 104883 }, { "epoch": 10.771900996200062, "grad_norm": 0.09522299468517303, "learning_rate": 0.01, "loss": 1.9478, "step": 104886 }, { "epoch": 10.772209099311903, "grad_norm": 0.08580028265714645, "learning_rate": 0.01, "loss": 1.9255, "step": 104889 }, { "epoch": 10.772517202423744, "grad_norm": 0.08504713326692581, "learning_rate": 0.01, "loss": 1.9646, "step": 104892 }, { "epoch": 10.772825305535585, "grad_norm": 0.05523672699928284, "learning_rate": 0.01, "loss": 1.9374, "step": 104895 }, { "epoch": 10.773133408647427, "grad_norm": 0.08716922998428345, "learning_rate": 0.01, "loss": 1.9049, "step": 104898 }, { "epoch": 10.77344151175927, "grad_norm": 0.06506255269050598, "learning_rate": 0.01, "loss": 1.9235, "step": 104901 }, { "epoch": 10.773749614871111, "grad_norm": 0.11295832693576813, "learning_rate": 0.01, "loss": 1.9305, "step": 104904 }, { "epoch": 10.774057717982952, "grad_norm": 0.03108265809714794, "learning_rate": 0.01, "loss": 1.932, "step": 104907 }, { "epoch": 10.774365821094793, "grad_norm": 0.04045362398028374, "learning_rate": 0.01, "loss": 1.9474, "step": 104910 }, { "epoch": 10.774673924206635, "grad_norm": 0.09854461252689362, "learning_rate": 0.01, "loss": 1.9556, "step": 104913 }, { "epoch": 10.774982027318476, "grad_norm": 0.10017503798007965, "learning_rate": 0.01, "loss": 1.9553, "step": 104916 }, { "epoch": 10.775290130430317, "grad_norm": 0.04605772718787193, "learning_rate": 0.01, "loss": 1.9257, "step": 104919 }, { "epoch": 10.775598233542159, "grad_norm": 0.04422129690647125, "learning_rate": 0.01, "loss": 1.9401, "step": 104922 }, { "epoch": 10.775906336654, "grad_norm": 0.03512374684214592, "learning_rate": 0.01, "loss": 1.9176, "step": 104925 }, { "epoch": 10.776214439765841, "grad_norm": 0.05168703570961952, "learning_rate": 0.01, "loss": 1.9462, "step": 104928 }, { "epoch": 10.776522542877682, "grad_norm": 0.04373502731323242, "learning_rate": 0.01, "loss": 1.9167, "step": 104931 }, { "epoch": 10.776830645989524, "grad_norm": 0.1377280056476593, "learning_rate": 0.01, "loss": 1.9438, "step": 104934 }, { "epoch": 10.777138749101367, "grad_norm": 0.09566672146320343, "learning_rate": 0.01, "loss": 1.9263, "step": 104937 }, { "epoch": 10.777446852213208, "grad_norm": 0.055807486176490784, "learning_rate": 0.01, "loss": 1.9218, "step": 104940 }, { "epoch": 10.77775495532505, "grad_norm": 0.037279289215803146, "learning_rate": 0.01, "loss": 1.9426, "step": 104943 }, { "epoch": 10.77806305843689, "grad_norm": 0.04037223383784294, "learning_rate": 0.01, "loss": 1.9258, "step": 104946 }, { "epoch": 10.778371161548732, "grad_norm": 0.04375686123967171, "learning_rate": 0.01, "loss": 1.9216, "step": 104949 }, { "epoch": 10.778679264660573, "grad_norm": 0.10367769747972488, "learning_rate": 0.01, "loss": 1.9476, "step": 104952 }, { "epoch": 10.778987367772414, "grad_norm": 0.06901142001152039, "learning_rate": 0.01, "loss": 1.936, "step": 104955 }, { "epoch": 10.779295470884255, "grad_norm": 0.05770748108625412, "learning_rate": 0.01, "loss": 1.9383, "step": 104958 }, { "epoch": 10.779603573996097, "grad_norm": 0.054886963218450546, "learning_rate": 0.01, "loss": 1.9483, "step": 104961 }, { "epoch": 10.779911677107938, "grad_norm": 0.050134312361478806, "learning_rate": 0.01, "loss": 1.9373, "step": 104964 }, { "epoch": 10.780219780219781, "grad_norm": 0.04675987735390663, "learning_rate": 0.01, "loss": 1.9351, "step": 104967 }, { "epoch": 10.780527883331622, "grad_norm": 0.09763073921203613, "learning_rate": 0.01, "loss": 1.9589, "step": 104970 }, { "epoch": 10.780835986443464, "grad_norm": 0.04318242520093918, "learning_rate": 0.01, "loss": 1.9332, "step": 104973 }, { "epoch": 10.781144089555305, "grad_norm": 0.039554860442876816, "learning_rate": 0.01, "loss": 1.9247, "step": 104976 }, { "epoch": 10.781452192667146, "grad_norm": 0.04658977687358856, "learning_rate": 0.01, "loss": 1.9308, "step": 104979 }, { "epoch": 10.781760295778987, "grad_norm": 0.036048904061317444, "learning_rate": 0.01, "loss": 1.936, "step": 104982 }, { "epoch": 10.782068398890829, "grad_norm": 0.08908376097679138, "learning_rate": 0.01, "loss": 1.9472, "step": 104985 }, { "epoch": 10.78237650200267, "grad_norm": 0.11830753833055496, "learning_rate": 0.01, "loss": 1.9496, "step": 104988 }, { "epoch": 10.782684605114511, "grad_norm": 0.13836215436458588, "learning_rate": 0.01, "loss": 1.9565, "step": 104991 }, { "epoch": 10.782992708226352, "grad_norm": 0.06564682722091675, "learning_rate": 0.01, "loss": 1.9523, "step": 104994 }, { "epoch": 10.783300811338194, "grad_norm": 0.10762113332748413, "learning_rate": 0.01, "loss": 1.9567, "step": 104997 }, { "epoch": 10.783608914450037, "grad_norm": 0.04483092948794365, "learning_rate": 0.01, "loss": 1.9204, "step": 105000 }, { "epoch": 10.783917017561878, "grad_norm": 0.05532277747988701, "learning_rate": 0.01, "loss": 1.9195, "step": 105003 }, { "epoch": 10.78422512067372, "grad_norm": 0.04673232138156891, "learning_rate": 0.01, "loss": 1.9223, "step": 105006 }, { "epoch": 10.78453322378556, "grad_norm": 0.07055231183767319, "learning_rate": 0.01, "loss": 1.9541, "step": 105009 }, { "epoch": 10.784841326897402, "grad_norm": 0.05611100792884827, "learning_rate": 0.01, "loss": 1.9099, "step": 105012 }, { "epoch": 10.785149430009243, "grad_norm": 0.1283992975950241, "learning_rate": 0.01, "loss": 1.9314, "step": 105015 }, { "epoch": 10.785457533121084, "grad_norm": 0.16754893958568573, "learning_rate": 0.01, "loss": 1.9631, "step": 105018 }, { "epoch": 10.785765636232926, "grad_norm": 0.08787377923727036, "learning_rate": 0.01, "loss": 1.9188, "step": 105021 }, { "epoch": 10.786073739344767, "grad_norm": 0.08007776737213135, "learning_rate": 0.01, "loss": 1.9381, "step": 105024 }, { "epoch": 10.786381842456608, "grad_norm": 0.07712419331073761, "learning_rate": 0.01, "loss": 1.9396, "step": 105027 }, { "epoch": 10.786689945568451, "grad_norm": 0.08709436655044556, "learning_rate": 0.01, "loss": 1.9288, "step": 105030 }, { "epoch": 10.786998048680292, "grad_norm": 0.06541412323713303, "learning_rate": 0.01, "loss": 1.9464, "step": 105033 }, { "epoch": 10.787306151792134, "grad_norm": 0.11988494545221329, "learning_rate": 0.01, "loss": 1.9296, "step": 105036 }, { "epoch": 10.787614254903975, "grad_norm": 0.12408696115016937, "learning_rate": 0.01, "loss": 1.9281, "step": 105039 }, { "epoch": 10.787922358015816, "grad_norm": 0.05449619144201279, "learning_rate": 0.01, "loss": 1.963, "step": 105042 }, { "epoch": 10.788230461127657, "grad_norm": 0.04881265014410019, "learning_rate": 0.01, "loss": 1.9381, "step": 105045 }, { "epoch": 10.788538564239499, "grad_norm": 0.05132092535495758, "learning_rate": 0.01, "loss": 1.9301, "step": 105048 }, { "epoch": 10.78884666735134, "grad_norm": 0.06299763172864914, "learning_rate": 0.01, "loss": 1.939, "step": 105051 }, { "epoch": 10.789154770463181, "grad_norm": 0.0902947410941124, "learning_rate": 0.01, "loss": 1.936, "step": 105054 }, { "epoch": 10.789462873575022, "grad_norm": 0.045308105647563934, "learning_rate": 0.01, "loss": 1.9122, "step": 105057 }, { "epoch": 10.789770976686864, "grad_norm": 0.09681692719459534, "learning_rate": 0.01, "loss": 1.9141, "step": 105060 }, { "epoch": 10.790079079798707, "grad_norm": 0.08720479160547256, "learning_rate": 0.01, "loss": 1.9437, "step": 105063 }, { "epoch": 10.790387182910548, "grad_norm": 0.06638406217098236, "learning_rate": 0.01, "loss": 1.963, "step": 105066 }, { "epoch": 10.79069528602239, "grad_norm": 0.09998981654644012, "learning_rate": 0.01, "loss": 1.9235, "step": 105069 }, { "epoch": 10.79100338913423, "grad_norm": 0.05286186933517456, "learning_rate": 0.01, "loss": 1.9308, "step": 105072 }, { "epoch": 10.791311492246072, "grad_norm": 0.041253458708524704, "learning_rate": 0.01, "loss": 1.9266, "step": 105075 }, { "epoch": 10.791619595357913, "grad_norm": 0.0601825937628746, "learning_rate": 0.01, "loss": 1.9543, "step": 105078 }, { "epoch": 10.791927698469754, "grad_norm": 0.03299073502421379, "learning_rate": 0.01, "loss": 1.9326, "step": 105081 }, { "epoch": 10.792235801581596, "grad_norm": 0.10112438350915909, "learning_rate": 0.01, "loss": 1.9469, "step": 105084 }, { "epoch": 10.792543904693437, "grad_norm": 0.0791049674153328, "learning_rate": 0.01, "loss": 1.9222, "step": 105087 }, { "epoch": 10.792852007805278, "grad_norm": 0.09565617889165878, "learning_rate": 0.01, "loss": 1.9501, "step": 105090 }, { "epoch": 10.793160110917121, "grad_norm": 0.0910487100481987, "learning_rate": 0.01, "loss": 1.9313, "step": 105093 }, { "epoch": 10.793468214028962, "grad_norm": 0.10622784495353699, "learning_rate": 0.01, "loss": 1.9423, "step": 105096 }, { "epoch": 10.793776317140804, "grad_norm": 0.0922565832734108, "learning_rate": 0.01, "loss": 1.9144, "step": 105099 }, { "epoch": 10.794084420252645, "grad_norm": 0.12033677101135254, "learning_rate": 0.01, "loss": 1.9636, "step": 105102 }, { "epoch": 10.794392523364486, "grad_norm": 0.10453055799007416, "learning_rate": 0.01, "loss": 1.9261, "step": 105105 }, { "epoch": 10.794700626476327, "grad_norm": 0.07022013515233994, "learning_rate": 0.01, "loss": 1.9597, "step": 105108 }, { "epoch": 10.795008729588169, "grad_norm": 0.05361005663871765, "learning_rate": 0.01, "loss": 1.9616, "step": 105111 }, { "epoch": 10.79531683270001, "grad_norm": 0.034731000661849976, "learning_rate": 0.01, "loss": 1.9527, "step": 105114 }, { "epoch": 10.795624935811851, "grad_norm": 0.06049367040395737, "learning_rate": 0.01, "loss": 1.9664, "step": 105117 }, { "epoch": 10.795933038923692, "grad_norm": 0.0643046572804451, "learning_rate": 0.01, "loss": 1.9398, "step": 105120 }, { "epoch": 10.796241142035534, "grad_norm": 0.041732240468263626, "learning_rate": 0.01, "loss": 1.9568, "step": 105123 }, { "epoch": 10.796549245147377, "grad_norm": 0.10261611640453339, "learning_rate": 0.01, "loss": 1.9523, "step": 105126 }, { "epoch": 10.796857348259218, "grad_norm": 0.0781281590461731, "learning_rate": 0.01, "loss": 1.9546, "step": 105129 }, { "epoch": 10.79716545137106, "grad_norm": 0.04957817494869232, "learning_rate": 0.01, "loss": 1.9529, "step": 105132 }, { "epoch": 10.7974735544829, "grad_norm": 0.05353771522641182, "learning_rate": 0.01, "loss": 1.9569, "step": 105135 }, { "epoch": 10.797781657594742, "grad_norm": 0.048365335911512375, "learning_rate": 0.01, "loss": 1.951, "step": 105138 }, { "epoch": 10.798089760706583, "grad_norm": 0.0623091422021389, "learning_rate": 0.01, "loss": 1.9373, "step": 105141 }, { "epoch": 10.798397863818424, "grad_norm": 0.05436871945858002, "learning_rate": 0.01, "loss": 1.9085, "step": 105144 }, { "epoch": 10.798705966930266, "grad_norm": 0.05013308674097061, "learning_rate": 0.01, "loss": 1.918, "step": 105147 }, { "epoch": 10.799014070042107, "grad_norm": 0.04122906178236008, "learning_rate": 0.01, "loss": 1.9472, "step": 105150 }, { "epoch": 10.799322173153948, "grad_norm": 0.04418342188000679, "learning_rate": 0.01, "loss": 1.9202, "step": 105153 }, { "epoch": 10.799630276265791, "grad_norm": 0.05578082427382469, "learning_rate": 0.01, "loss": 1.9318, "step": 105156 }, { "epoch": 10.799938379377632, "grad_norm": 0.1632402539253235, "learning_rate": 0.01, "loss": 1.9093, "step": 105159 }, { "epoch": 10.800246482489474, "grad_norm": 0.06019952893257141, "learning_rate": 0.01, "loss": 1.9455, "step": 105162 }, { "epoch": 10.800554585601315, "grad_norm": 0.06759855896234512, "learning_rate": 0.01, "loss": 1.9281, "step": 105165 }, { "epoch": 10.800862688713156, "grad_norm": 0.03876945376396179, "learning_rate": 0.01, "loss": 1.9494, "step": 105168 }, { "epoch": 10.801170791824998, "grad_norm": 0.09414643049240112, "learning_rate": 0.01, "loss": 1.9757, "step": 105171 }, { "epoch": 10.801478894936839, "grad_norm": 0.04190301150083542, "learning_rate": 0.01, "loss": 1.9397, "step": 105174 }, { "epoch": 10.80178699804868, "grad_norm": 0.05320903658866882, "learning_rate": 0.01, "loss": 1.9687, "step": 105177 }, { "epoch": 10.802095101160521, "grad_norm": 0.056490443646907806, "learning_rate": 0.01, "loss": 1.948, "step": 105180 }, { "epoch": 10.802403204272363, "grad_norm": 0.04092302918434143, "learning_rate": 0.01, "loss": 1.9206, "step": 105183 }, { "epoch": 10.802711307384204, "grad_norm": 0.040519844740629196, "learning_rate": 0.01, "loss": 1.9489, "step": 105186 }, { "epoch": 10.803019410496045, "grad_norm": 0.04397949203848839, "learning_rate": 0.01, "loss": 1.9562, "step": 105189 }, { "epoch": 10.803327513607888, "grad_norm": 0.0402667336165905, "learning_rate": 0.01, "loss": 1.9339, "step": 105192 }, { "epoch": 10.80363561671973, "grad_norm": 0.05854826420545578, "learning_rate": 0.01, "loss": 1.9302, "step": 105195 }, { "epoch": 10.80394371983157, "grad_norm": 0.05287005379796028, "learning_rate": 0.01, "loss": 1.9358, "step": 105198 }, { "epoch": 10.804251822943412, "grad_norm": 0.03999342396855354, "learning_rate": 0.01, "loss": 1.9496, "step": 105201 }, { "epoch": 10.804559926055253, "grad_norm": 0.10459694266319275, "learning_rate": 0.01, "loss": 1.9352, "step": 105204 }, { "epoch": 10.804868029167094, "grad_norm": 0.03152311593294144, "learning_rate": 0.01, "loss": 1.9326, "step": 105207 }, { "epoch": 10.805176132278936, "grad_norm": 0.05782946199178696, "learning_rate": 0.01, "loss": 1.9661, "step": 105210 }, { "epoch": 10.805484235390777, "grad_norm": 0.05249985307455063, "learning_rate": 0.01, "loss": 1.9463, "step": 105213 }, { "epoch": 10.805792338502618, "grad_norm": 0.05483485013246536, "learning_rate": 0.01, "loss": 1.9748, "step": 105216 }, { "epoch": 10.80610044161446, "grad_norm": 0.051782358437776566, "learning_rate": 0.01, "loss": 1.9466, "step": 105219 }, { "epoch": 10.806408544726303, "grad_norm": 0.09521941095590591, "learning_rate": 0.01, "loss": 1.9442, "step": 105222 }, { "epoch": 10.806716647838144, "grad_norm": 0.06587459146976471, "learning_rate": 0.01, "loss": 1.9394, "step": 105225 }, { "epoch": 10.807024750949985, "grad_norm": 0.049619127064943314, "learning_rate": 0.01, "loss": 1.9255, "step": 105228 }, { "epoch": 10.807332854061826, "grad_norm": 0.113286092877388, "learning_rate": 0.01, "loss": 1.927, "step": 105231 }, { "epoch": 10.807640957173668, "grad_norm": 0.09651494026184082, "learning_rate": 0.01, "loss": 1.945, "step": 105234 }, { "epoch": 10.807949060285509, "grad_norm": 0.04343407601118088, "learning_rate": 0.01, "loss": 1.9493, "step": 105237 }, { "epoch": 10.80825716339735, "grad_norm": 0.043445128947496414, "learning_rate": 0.01, "loss": 1.9446, "step": 105240 }, { "epoch": 10.808565266509191, "grad_norm": 0.05014490336179733, "learning_rate": 0.01, "loss": 1.9306, "step": 105243 }, { "epoch": 10.808873369621033, "grad_norm": 0.03343608230352402, "learning_rate": 0.01, "loss": 1.9423, "step": 105246 }, { "epoch": 10.809181472732874, "grad_norm": 0.07791464030742645, "learning_rate": 0.01, "loss": 1.9434, "step": 105249 }, { "epoch": 10.809489575844715, "grad_norm": 0.05957683175802231, "learning_rate": 0.01, "loss": 1.9364, "step": 105252 }, { "epoch": 10.809797678956558, "grad_norm": 0.10276157408952713, "learning_rate": 0.01, "loss": 1.9212, "step": 105255 }, { "epoch": 10.8101057820684, "grad_norm": 0.07973282784223557, "learning_rate": 0.01, "loss": 1.9405, "step": 105258 }, { "epoch": 10.81041388518024, "grad_norm": 0.08871182054281235, "learning_rate": 0.01, "loss": 1.9358, "step": 105261 }, { "epoch": 10.810721988292082, "grad_norm": 0.060441359877586365, "learning_rate": 0.01, "loss": 1.938, "step": 105264 }, { "epoch": 10.811030091403923, "grad_norm": 0.04853738099336624, "learning_rate": 0.01, "loss": 1.9418, "step": 105267 }, { "epoch": 10.811338194515764, "grad_norm": 0.0857999175786972, "learning_rate": 0.01, "loss": 1.9337, "step": 105270 }, { "epoch": 10.811646297627606, "grad_norm": 0.05908980965614319, "learning_rate": 0.01, "loss": 1.9418, "step": 105273 }, { "epoch": 10.811954400739447, "grad_norm": 0.059544436633586884, "learning_rate": 0.01, "loss": 1.9677, "step": 105276 }, { "epoch": 10.812262503851288, "grad_norm": 0.050158899277448654, "learning_rate": 0.01, "loss": 1.9267, "step": 105279 }, { "epoch": 10.81257060696313, "grad_norm": 0.04424465075135231, "learning_rate": 0.01, "loss": 1.937, "step": 105282 }, { "epoch": 10.812878710074973, "grad_norm": 0.04916578158736229, "learning_rate": 0.01, "loss": 1.9615, "step": 105285 }, { "epoch": 10.813186813186814, "grad_norm": 0.11380893737077713, "learning_rate": 0.01, "loss": 1.9209, "step": 105288 }, { "epoch": 10.813494916298655, "grad_norm": 0.15234796702861786, "learning_rate": 0.01, "loss": 1.9603, "step": 105291 }, { "epoch": 10.813803019410496, "grad_norm": 0.05364861339330673, "learning_rate": 0.01, "loss": 1.9143, "step": 105294 }, { "epoch": 10.814111122522338, "grad_norm": 0.05904761701822281, "learning_rate": 0.01, "loss": 1.9466, "step": 105297 }, { "epoch": 10.814419225634179, "grad_norm": 0.07022589445114136, "learning_rate": 0.01, "loss": 1.9246, "step": 105300 }, { "epoch": 10.81472732874602, "grad_norm": 0.07958179712295532, "learning_rate": 0.01, "loss": 1.9174, "step": 105303 }, { "epoch": 10.815035431857861, "grad_norm": 0.08987373113632202, "learning_rate": 0.01, "loss": 1.9352, "step": 105306 }, { "epoch": 10.815343534969703, "grad_norm": 0.04922526329755783, "learning_rate": 0.01, "loss": 1.9282, "step": 105309 }, { "epoch": 10.815651638081544, "grad_norm": 0.0366976335644722, "learning_rate": 0.01, "loss": 1.9499, "step": 105312 }, { "epoch": 10.815959741193385, "grad_norm": 0.04322095960378647, "learning_rate": 0.01, "loss": 1.9395, "step": 105315 }, { "epoch": 10.816267844305228, "grad_norm": 0.07414691895246506, "learning_rate": 0.01, "loss": 1.927, "step": 105318 }, { "epoch": 10.81657594741707, "grad_norm": 0.06225109100341797, "learning_rate": 0.01, "loss": 1.9449, "step": 105321 }, { "epoch": 10.81688405052891, "grad_norm": 0.07541634887456894, "learning_rate": 0.01, "loss": 1.9603, "step": 105324 }, { "epoch": 10.817192153640752, "grad_norm": 0.033274658024311066, "learning_rate": 0.01, "loss": 1.9295, "step": 105327 }, { "epoch": 10.817500256752593, "grad_norm": 0.0380442850291729, "learning_rate": 0.01, "loss": 1.9261, "step": 105330 }, { "epoch": 10.817808359864435, "grad_norm": 0.15397582948207855, "learning_rate": 0.01, "loss": 1.9561, "step": 105333 }, { "epoch": 10.818116462976276, "grad_norm": 0.08010608702898026, "learning_rate": 0.01, "loss": 1.947, "step": 105336 }, { "epoch": 10.818424566088117, "grad_norm": 0.07294953614473343, "learning_rate": 0.01, "loss": 1.9467, "step": 105339 }, { "epoch": 10.818732669199958, "grad_norm": 0.042549289762973785, "learning_rate": 0.01, "loss": 1.937, "step": 105342 }, { "epoch": 10.8190407723118, "grad_norm": 0.03370673209428787, "learning_rate": 0.01, "loss": 1.9246, "step": 105345 }, { "epoch": 10.819348875423643, "grad_norm": 0.046656735241413116, "learning_rate": 0.01, "loss": 1.928, "step": 105348 }, { "epoch": 10.819656978535484, "grad_norm": 0.0739334300160408, "learning_rate": 0.01, "loss": 1.9814, "step": 105351 }, { "epoch": 10.819965081647325, "grad_norm": 0.047819964587688446, "learning_rate": 0.01, "loss": 1.9377, "step": 105354 }, { "epoch": 10.820273184759166, "grad_norm": 0.06027652695775032, "learning_rate": 0.01, "loss": 1.9353, "step": 105357 }, { "epoch": 10.820581287871008, "grad_norm": 0.1334407478570938, "learning_rate": 0.01, "loss": 1.916, "step": 105360 }, { "epoch": 10.820889390982849, "grad_norm": 0.04469862952828407, "learning_rate": 0.01, "loss": 1.9237, "step": 105363 }, { "epoch": 10.82119749409469, "grad_norm": 0.03328389301896095, "learning_rate": 0.01, "loss": 1.9411, "step": 105366 }, { "epoch": 10.821505597206531, "grad_norm": 0.0356464758515358, "learning_rate": 0.01, "loss": 1.9139, "step": 105369 }, { "epoch": 10.821813700318373, "grad_norm": 0.04654991999268532, "learning_rate": 0.01, "loss": 1.9151, "step": 105372 }, { "epoch": 10.822121803430214, "grad_norm": 0.04388376325368881, "learning_rate": 0.01, "loss": 1.9376, "step": 105375 }, { "epoch": 10.822429906542055, "grad_norm": 0.04658830538392067, "learning_rate": 0.01, "loss": 1.939, "step": 105378 }, { "epoch": 10.822738009653898, "grad_norm": 0.04263482987880707, "learning_rate": 0.01, "loss": 1.9102, "step": 105381 }, { "epoch": 10.82304611276574, "grad_norm": 0.07428035885095596, "learning_rate": 0.01, "loss": 1.9093, "step": 105384 }, { "epoch": 10.82335421587758, "grad_norm": 0.10800222307443619, "learning_rate": 0.01, "loss": 1.9282, "step": 105387 }, { "epoch": 10.823662318989422, "grad_norm": 0.05401541665196419, "learning_rate": 0.01, "loss": 1.9634, "step": 105390 }, { "epoch": 10.823970422101263, "grad_norm": 0.12794886529445648, "learning_rate": 0.01, "loss": 1.9568, "step": 105393 }, { "epoch": 10.824278525213105, "grad_norm": 0.0494115948677063, "learning_rate": 0.01, "loss": 1.9379, "step": 105396 }, { "epoch": 10.824586628324946, "grad_norm": 0.035063013434410095, "learning_rate": 0.01, "loss": 1.9371, "step": 105399 }, { "epoch": 10.824894731436787, "grad_norm": 0.04617554321885109, "learning_rate": 0.01, "loss": 1.9509, "step": 105402 }, { "epoch": 10.825202834548628, "grad_norm": 0.060868896543979645, "learning_rate": 0.01, "loss": 1.9497, "step": 105405 }, { "epoch": 10.82551093766047, "grad_norm": 0.035648807883262634, "learning_rate": 0.01, "loss": 1.9351, "step": 105408 }, { "epoch": 10.825819040772313, "grad_norm": 0.058884233236312866, "learning_rate": 0.01, "loss": 1.9573, "step": 105411 }, { "epoch": 10.826127143884154, "grad_norm": 0.16527080535888672, "learning_rate": 0.01, "loss": 1.9436, "step": 105414 }, { "epoch": 10.826435246995995, "grad_norm": 0.08593026548624039, "learning_rate": 0.01, "loss": 1.9467, "step": 105417 }, { "epoch": 10.826743350107837, "grad_norm": 0.05153747275471687, "learning_rate": 0.01, "loss": 1.9394, "step": 105420 }, { "epoch": 10.827051453219678, "grad_norm": 0.046141065657138824, "learning_rate": 0.01, "loss": 1.9745, "step": 105423 }, { "epoch": 10.827359556331519, "grad_norm": 0.05500772222876549, "learning_rate": 0.01, "loss": 1.9468, "step": 105426 }, { "epoch": 10.82766765944336, "grad_norm": 0.07828307896852493, "learning_rate": 0.01, "loss": 1.9514, "step": 105429 }, { "epoch": 10.827975762555202, "grad_norm": 0.0653647780418396, "learning_rate": 0.01, "loss": 1.9512, "step": 105432 }, { "epoch": 10.828283865667043, "grad_norm": 0.049936968833208084, "learning_rate": 0.01, "loss": 1.9166, "step": 105435 }, { "epoch": 10.828591968778884, "grad_norm": 0.06601990014314651, "learning_rate": 0.01, "loss": 1.9387, "step": 105438 }, { "epoch": 10.828900071890725, "grad_norm": 0.10459323972463608, "learning_rate": 0.01, "loss": 1.9472, "step": 105441 }, { "epoch": 10.829208175002567, "grad_norm": 0.06045229360461235, "learning_rate": 0.01, "loss": 1.9415, "step": 105444 }, { "epoch": 10.82951627811441, "grad_norm": 0.11680969595909119, "learning_rate": 0.01, "loss": 1.9235, "step": 105447 }, { "epoch": 10.829824381226251, "grad_norm": 0.04912514239549637, "learning_rate": 0.01, "loss": 1.9374, "step": 105450 }, { "epoch": 10.830132484338092, "grad_norm": 0.05238822475075722, "learning_rate": 0.01, "loss": 1.9271, "step": 105453 }, { "epoch": 10.830440587449933, "grad_norm": 0.03835156559944153, "learning_rate": 0.01, "loss": 1.9111, "step": 105456 }, { "epoch": 10.830748690561775, "grad_norm": 0.03455403074622154, "learning_rate": 0.01, "loss": 1.9487, "step": 105459 }, { "epoch": 10.831056793673616, "grad_norm": 0.09034903347492218, "learning_rate": 0.01, "loss": 1.9465, "step": 105462 }, { "epoch": 10.831364896785457, "grad_norm": 0.04471142962574959, "learning_rate": 0.01, "loss": 1.9358, "step": 105465 }, { "epoch": 10.831672999897298, "grad_norm": 0.04482191801071167, "learning_rate": 0.01, "loss": 1.9585, "step": 105468 }, { "epoch": 10.83198110300914, "grad_norm": 0.04457875341176987, "learning_rate": 0.01, "loss": 1.9451, "step": 105471 }, { "epoch": 10.832289206120981, "grad_norm": 0.03215394541621208, "learning_rate": 0.01, "loss": 1.9265, "step": 105474 }, { "epoch": 10.832597309232824, "grad_norm": 0.034922465682029724, "learning_rate": 0.01, "loss": 1.942, "step": 105477 }, { "epoch": 10.832905412344665, "grad_norm": 0.11798029392957687, "learning_rate": 0.01, "loss": 1.9365, "step": 105480 }, { "epoch": 10.833213515456507, "grad_norm": 0.0991378054022789, "learning_rate": 0.01, "loss": 1.9344, "step": 105483 }, { "epoch": 10.833521618568348, "grad_norm": 0.09143664687871933, "learning_rate": 0.01, "loss": 1.9609, "step": 105486 }, { "epoch": 10.833829721680189, "grad_norm": 0.07089017331600189, "learning_rate": 0.01, "loss": 1.9173, "step": 105489 }, { "epoch": 10.83413782479203, "grad_norm": 0.05932415649294853, "learning_rate": 0.01, "loss": 1.9379, "step": 105492 }, { "epoch": 10.834445927903872, "grad_norm": 0.07785198092460632, "learning_rate": 0.01, "loss": 1.9677, "step": 105495 }, { "epoch": 10.834754031015713, "grad_norm": 0.04854836314916611, "learning_rate": 0.01, "loss": 1.9467, "step": 105498 }, { "epoch": 10.835062134127554, "grad_norm": 0.04837128147482872, "learning_rate": 0.01, "loss": 1.9579, "step": 105501 }, { "epoch": 10.835370237239395, "grad_norm": 0.055888544768095016, "learning_rate": 0.01, "loss": 1.9262, "step": 105504 }, { "epoch": 10.835678340351237, "grad_norm": 0.07242245227098465, "learning_rate": 0.01, "loss": 1.9304, "step": 105507 }, { "epoch": 10.83598644346308, "grad_norm": 0.07105785608291626, "learning_rate": 0.01, "loss": 1.9368, "step": 105510 }, { "epoch": 10.836294546574921, "grad_norm": 0.08170506358146667, "learning_rate": 0.01, "loss": 1.9462, "step": 105513 }, { "epoch": 10.836602649686762, "grad_norm": 0.11783326417207718, "learning_rate": 0.01, "loss": 1.9753, "step": 105516 }, { "epoch": 10.836910752798603, "grad_norm": 0.08829626441001892, "learning_rate": 0.01, "loss": 1.9361, "step": 105519 }, { "epoch": 10.837218855910445, "grad_norm": 0.08297731727361679, "learning_rate": 0.01, "loss": 1.941, "step": 105522 }, { "epoch": 10.837526959022286, "grad_norm": 0.07735975831747055, "learning_rate": 0.01, "loss": 1.9281, "step": 105525 }, { "epoch": 10.837835062134127, "grad_norm": 0.07283669710159302, "learning_rate": 0.01, "loss": 1.9214, "step": 105528 }, { "epoch": 10.838143165245969, "grad_norm": 0.08361595869064331, "learning_rate": 0.01, "loss": 1.9399, "step": 105531 }, { "epoch": 10.83845126835781, "grad_norm": 0.08340589702129364, "learning_rate": 0.01, "loss": 1.9589, "step": 105534 }, { "epoch": 10.838759371469651, "grad_norm": 0.06210103631019592, "learning_rate": 0.01, "loss": 1.9328, "step": 105537 }, { "epoch": 10.839067474581494, "grad_norm": 0.04093151167035103, "learning_rate": 0.01, "loss": 1.9455, "step": 105540 }, { "epoch": 10.839375577693335, "grad_norm": 0.049949757754802704, "learning_rate": 0.01, "loss": 1.9377, "step": 105543 }, { "epoch": 10.839683680805177, "grad_norm": 0.0919983834028244, "learning_rate": 0.01, "loss": 1.9512, "step": 105546 }, { "epoch": 10.839991783917018, "grad_norm": 0.04312887787818909, "learning_rate": 0.01, "loss": 1.9248, "step": 105549 }, { "epoch": 10.84029988702886, "grad_norm": 0.03706655651330948, "learning_rate": 0.01, "loss": 1.9258, "step": 105552 }, { "epoch": 10.8406079901407, "grad_norm": 0.10449834913015366, "learning_rate": 0.01, "loss": 1.9417, "step": 105555 }, { "epoch": 10.840916093252542, "grad_norm": 0.053434476256370544, "learning_rate": 0.01, "loss": 1.9442, "step": 105558 }, { "epoch": 10.841224196364383, "grad_norm": 0.05519875884056091, "learning_rate": 0.01, "loss": 1.9426, "step": 105561 }, { "epoch": 10.841532299476224, "grad_norm": 0.10106226056814194, "learning_rate": 0.01, "loss": 1.9419, "step": 105564 }, { "epoch": 10.841840402588065, "grad_norm": 0.1005539521574974, "learning_rate": 0.01, "loss": 1.9336, "step": 105567 }, { "epoch": 10.842148505699907, "grad_norm": 0.07332777231931686, "learning_rate": 0.01, "loss": 1.9564, "step": 105570 }, { "epoch": 10.84245660881175, "grad_norm": 0.04059474542737007, "learning_rate": 0.01, "loss": 1.9031, "step": 105573 }, { "epoch": 10.842764711923591, "grad_norm": 0.039799902588129044, "learning_rate": 0.01, "loss": 1.9263, "step": 105576 }, { "epoch": 10.843072815035432, "grad_norm": 0.06343264877796173, "learning_rate": 0.01, "loss": 1.9347, "step": 105579 }, { "epoch": 10.843380918147274, "grad_norm": 0.0510416105389595, "learning_rate": 0.01, "loss": 1.9284, "step": 105582 }, { "epoch": 10.843689021259115, "grad_norm": 0.033778950572013855, "learning_rate": 0.01, "loss": 1.9283, "step": 105585 }, { "epoch": 10.843997124370956, "grad_norm": 0.05271367356181145, "learning_rate": 0.01, "loss": 1.9531, "step": 105588 }, { "epoch": 10.844305227482797, "grad_norm": 0.10913360118865967, "learning_rate": 0.01, "loss": 1.9328, "step": 105591 }, { "epoch": 10.844613330594639, "grad_norm": 0.10924379527568817, "learning_rate": 0.01, "loss": 1.9165, "step": 105594 }, { "epoch": 10.84492143370648, "grad_norm": 0.0577227920293808, "learning_rate": 0.01, "loss": 1.9093, "step": 105597 }, { "epoch": 10.845229536818321, "grad_norm": 0.03460630401968956, "learning_rate": 0.01, "loss": 1.9199, "step": 105600 }, { "epoch": 10.845537639930164, "grad_norm": 0.05811109021306038, "learning_rate": 0.01, "loss": 1.9257, "step": 105603 }, { "epoch": 10.845845743042005, "grad_norm": 0.10903934389352798, "learning_rate": 0.01, "loss": 1.9322, "step": 105606 }, { "epoch": 10.846153846153847, "grad_norm": 0.07660596072673798, "learning_rate": 0.01, "loss": 1.9368, "step": 105609 }, { "epoch": 10.846461949265688, "grad_norm": 0.08013112097978592, "learning_rate": 0.01, "loss": 1.9442, "step": 105612 }, { "epoch": 10.84677005237753, "grad_norm": 0.0570279099047184, "learning_rate": 0.01, "loss": 1.9547, "step": 105615 }, { "epoch": 10.84707815548937, "grad_norm": 0.03606827184557915, "learning_rate": 0.01, "loss": 1.9112, "step": 105618 }, { "epoch": 10.847386258601212, "grad_norm": 0.06117340549826622, "learning_rate": 0.01, "loss": 1.9453, "step": 105621 }, { "epoch": 10.847694361713053, "grad_norm": 0.09413411468267441, "learning_rate": 0.01, "loss": 1.9735, "step": 105624 }, { "epoch": 10.848002464824894, "grad_norm": 0.058760952204465866, "learning_rate": 0.01, "loss": 1.93, "step": 105627 }, { "epoch": 10.848310567936736, "grad_norm": 0.11886738240718842, "learning_rate": 0.01, "loss": 1.9452, "step": 105630 }, { "epoch": 10.848618671048577, "grad_norm": 0.08395174890756607, "learning_rate": 0.01, "loss": 1.9288, "step": 105633 }, { "epoch": 10.84892677416042, "grad_norm": 0.05414260923862457, "learning_rate": 0.01, "loss": 1.9332, "step": 105636 }, { "epoch": 10.849234877272261, "grad_norm": 0.08581097424030304, "learning_rate": 0.01, "loss": 1.9393, "step": 105639 }, { "epoch": 10.849542980384102, "grad_norm": 0.05798320099711418, "learning_rate": 0.01, "loss": 1.9491, "step": 105642 }, { "epoch": 10.849851083495944, "grad_norm": 0.0710185319185257, "learning_rate": 0.01, "loss": 1.9434, "step": 105645 }, { "epoch": 10.850159186607785, "grad_norm": 0.08223643898963928, "learning_rate": 0.01, "loss": 1.9365, "step": 105648 }, { "epoch": 10.850467289719626, "grad_norm": 0.1025887280702591, "learning_rate": 0.01, "loss": 1.9283, "step": 105651 }, { "epoch": 10.850775392831467, "grad_norm": 0.05991736054420471, "learning_rate": 0.01, "loss": 1.9306, "step": 105654 }, { "epoch": 10.851083495943309, "grad_norm": 0.08577022701501846, "learning_rate": 0.01, "loss": 1.9275, "step": 105657 }, { "epoch": 10.85139159905515, "grad_norm": 0.08255545049905777, "learning_rate": 0.01, "loss": 1.9596, "step": 105660 }, { "epoch": 10.851699702166991, "grad_norm": 0.06794632971286774, "learning_rate": 0.01, "loss": 1.9324, "step": 105663 }, { "epoch": 10.852007805278834, "grad_norm": 0.06520742923021317, "learning_rate": 0.01, "loss": 1.929, "step": 105666 }, { "epoch": 10.852315908390676, "grad_norm": 0.10006118565797806, "learning_rate": 0.01, "loss": 1.9216, "step": 105669 }, { "epoch": 10.852624011502517, "grad_norm": 0.035613786429166794, "learning_rate": 0.01, "loss": 1.9518, "step": 105672 }, { "epoch": 10.852932114614358, "grad_norm": 0.09051407128572464, "learning_rate": 0.01, "loss": 1.9325, "step": 105675 }, { "epoch": 10.8532402177262, "grad_norm": 0.08709491789340973, "learning_rate": 0.01, "loss": 1.9057, "step": 105678 }, { "epoch": 10.85354832083804, "grad_norm": 0.04722654074430466, "learning_rate": 0.01, "loss": 1.9344, "step": 105681 }, { "epoch": 10.853856423949882, "grad_norm": 0.0712883397936821, "learning_rate": 0.01, "loss": 1.9293, "step": 105684 }, { "epoch": 10.854164527061723, "grad_norm": 0.07477840781211853, "learning_rate": 0.01, "loss": 1.9688, "step": 105687 }, { "epoch": 10.854472630173564, "grad_norm": 0.03935958445072174, "learning_rate": 0.01, "loss": 1.9498, "step": 105690 }, { "epoch": 10.854780733285406, "grad_norm": 0.04436472803354263, "learning_rate": 0.01, "loss": 1.907, "step": 105693 }, { "epoch": 10.855088836397247, "grad_norm": 0.05736646056175232, "learning_rate": 0.01, "loss": 1.9421, "step": 105696 }, { "epoch": 10.855396939509088, "grad_norm": 0.05523679777979851, "learning_rate": 0.01, "loss": 1.9551, "step": 105699 }, { "epoch": 10.855705042620931, "grad_norm": 0.04662425071001053, "learning_rate": 0.01, "loss": 1.9468, "step": 105702 }, { "epoch": 10.856013145732772, "grad_norm": 0.0389157310128212, "learning_rate": 0.01, "loss": 1.9239, "step": 105705 }, { "epoch": 10.856321248844614, "grad_norm": 0.03747498244047165, "learning_rate": 0.01, "loss": 1.9429, "step": 105708 }, { "epoch": 10.856629351956455, "grad_norm": 0.04804906249046326, "learning_rate": 0.01, "loss": 1.9383, "step": 105711 }, { "epoch": 10.856937455068296, "grad_norm": 0.0921889990568161, "learning_rate": 0.01, "loss": 1.9349, "step": 105714 }, { "epoch": 10.857245558180137, "grad_norm": 0.09470658004283905, "learning_rate": 0.01, "loss": 1.9364, "step": 105717 }, { "epoch": 10.857553661291979, "grad_norm": 0.061489563435316086, "learning_rate": 0.01, "loss": 1.9465, "step": 105720 }, { "epoch": 10.85786176440382, "grad_norm": 0.05320308357477188, "learning_rate": 0.01, "loss": 1.9551, "step": 105723 }, { "epoch": 10.858169867515661, "grad_norm": 0.047891486436128616, "learning_rate": 0.01, "loss": 1.9471, "step": 105726 }, { "epoch": 10.858477970627503, "grad_norm": 0.03658050298690796, "learning_rate": 0.01, "loss": 1.9214, "step": 105729 }, { "epoch": 10.858786073739346, "grad_norm": 0.0334072969853878, "learning_rate": 0.01, "loss": 1.9374, "step": 105732 }, { "epoch": 10.859094176851187, "grad_norm": 0.09168829023838043, "learning_rate": 0.01, "loss": 1.9456, "step": 105735 }, { "epoch": 10.859402279963028, "grad_norm": 0.04450211301445961, "learning_rate": 0.01, "loss": 1.9664, "step": 105738 }, { "epoch": 10.85971038307487, "grad_norm": 0.07760917395353317, "learning_rate": 0.01, "loss": 1.9433, "step": 105741 }, { "epoch": 10.86001848618671, "grad_norm": 0.04388490691781044, "learning_rate": 0.01, "loss": 1.9285, "step": 105744 }, { "epoch": 10.860326589298552, "grad_norm": 0.10173279792070389, "learning_rate": 0.01, "loss": 1.92, "step": 105747 }, { "epoch": 10.860634692410393, "grad_norm": 0.037178948521614075, "learning_rate": 0.01, "loss": 1.9276, "step": 105750 }, { "epoch": 10.860942795522234, "grad_norm": 0.10235720127820969, "learning_rate": 0.01, "loss": 1.9608, "step": 105753 }, { "epoch": 10.861250898634076, "grad_norm": 0.04074683040380478, "learning_rate": 0.01, "loss": 1.9254, "step": 105756 }, { "epoch": 10.861559001745917, "grad_norm": 0.10367844253778458, "learning_rate": 0.01, "loss": 1.9085, "step": 105759 }, { "epoch": 10.861867104857758, "grad_norm": 0.050666362047195435, "learning_rate": 0.01, "loss": 1.9243, "step": 105762 }, { "epoch": 10.862175207969601, "grad_norm": 0.11741497367620468, "learning_rate": 0.01, "loss": 1.9456, "step": 105765 }, { "epoch": 10.862483311081442, "grad_norm": 0.06941113620996475, "learning_rate": 0.01, "loss": 1.9588, "step": 105768 }, { "epoch": 10.862791414193284, "grad_norm": 0.05297830328345299, "learning_rate": 0.01, "loss": 1.9321, "step": 105771 }, { "epoch": 10.863099517305125, "grad_norm": 0.0719386413693428, "learning_rate": 0.01, "loss": 1.9404, "step": 105774 }, { "epoch": 10.863407620416966, "grad_norm": 0.044960249215364456, "learning_rate": 0.01, "loss": 1.931, "step": 105777 }, { "epoch": 10.863715723528808, "grad_norm": 0.048919543623924255, "learning_rate": 0.01, "loss": 1.9382, "step": 105780 }, { "epoch": 10.864023826640649, "grad_norm": 0.0472579151391983, "learning_rate": 0.01, "loss": 1.9464, "step": 105783 }, { "epoch": 10.86433192975249, "grad_norm": 0.11817888915538788, "learning_rate": 0.01, "loss": 1.9254, "step": 105786 }, { "epoch": 10.864640032864331, "grad_norm": 0.10530492663383484, "learning_rate": 0.01, "loss": 1.9478, "step": 105789 }, { "epoch": 10.864948135976173, "grad_norm": 0.05322025716304779, "learning_rate": 0.01, "loss": 1.9425, "step": 105792 }, { "epoch": 10.865256239088016, "grad_norm": 0.08680053055286407, "learning_rate": 0.01, "loss": 1.9216, "step": 105795 }, { "epoch": 10.865564342199857, "grad_norm": 0.08292030543088913, "learning_rate": 0.01, "loss": 1.9444, "step": 105798 }, { "epoch": 10.865872445311698, "grad_norm": 0.07252789288759232, "learning_rate": 0.01, "loss": 1.9419, "step": 105801 }, { "epoch": 10.86618054842354, "grad_norm": 0.06576648354530334, "learning_rate": 0.01, "loss": 1.9318, "step": 105804 }, { "epoch": 10.86648865153538, "grad_norm": 0.17087021470069885, "learning_rate": 0.01, "loss": 1.933, "step": 105807 }, { "epoch": 10.866796754647222, "grad_norm": 0.08984529972076416, "learning_rate": 0.01, "loss": 1.9486, "step": 105810 }, { "epoch": 10.867104857759063, "grad_norm": 0.07745181024074554, "learning_rate": 0.01, "loss": 1.9127, "step": 105813 }, { "epoch": 10.867412960870904, "grad_norm": 0.048002324998378754, "learning_rate": 0.01, "loss": 1.934, "step": 105816 }, { "epoch": 10.867721063982746, "grad_norm": 0.04052358493208885, "learning_rate": 0.01, "loss": 1.9311, "step": 105819 }, { "epoch": 10.868029167094587, "grad_norm": 0.038736648857593536, "learning_rate": 0.01, "loss": 1.9665, "step": 105822 }, { "epoch": 10.868337270206428, "grad_norm": 0.054375361651182175, "learning_rate": 0.01, "loss": 1.9312, "step": 105825 }, { "epoch": 10.868645373318271, "grad_norm": 0.07048241794109344, "learning_rate": 0.01, "loss": 1.9387, "step": 105828 }, { "epoch": 10.868953476430113, "grad_norm": 0.03943666070699692, "learning_rate": 0.01, "loss": 1.9639, "step": 105831 }, { "epoch": 10.869261579541954, "grad_norm": 0.12087706476449966, "learning_rate": 0.01, "loss": 1.9485, "step": 105834 }, { "epoch": 10.869569682653795, "grad_norm": 0.055464401841163635, "learning_rate": 0.01, "loss": 1.9596, "step": 105837 }, { "epoch": 10.869877785765636, "grad_norm": 0.05967879295349121, "learning_rate": 0.01, "loss": 1.9353, "step": 105840 }, { "epoch": 10.870185888877478, "grad_norm": 0.10653529316186905, "learning_rate": 0.01, "loss": 1.9561, "step": 105843 }, { "epoch": 10.870493991989319, "grad_norm": 0.03763870522379875, "learning_rate": 0.01, "loss": 1.9347, "step": 105846 }, { "epoch": 10.87080209510116, "grad_norm": 0.11706111580133438, "learning_rate": 0.01, "loss": 1.9701, "step": 105849 }, { "epoch": 10.871110198213001, "grad_norm": 0.05440943315625191, "learning_rate": 0.01, "loss": 1.9524, "step": 105852 }, { "epoch": 10.871418301324843, "grad_norm": 0.04757893830537796, "learning_rate": 0.01, "loss": 1.924, "step": 105855 }, { "epoch": 10.871726404436686, "grad_norm": 0.04554521292448044, "learning_rate": 0.01, "loss": 1.9223, "step": 105858 }, { "epoch": 10.872034507548527, "grad_norm": 0.05292823910713196, "learning_rate": 0.01, "loss": 1.9438, "step": 105861 }, { "epoch": 10.872342610660368, "grad_norm": 0.05600232630968094, "learning_rate": 0.01, "loss": 1.9232, "step": 105864 }, { "epoch": 10.87265071377221, "grad_norm": 0.050174783915281296, "learning_rate": 0.01, "loss": 1.9292, "step": 105867 }, { "epoch": 10.87295881688405, "grad_norm": 0.14416180551052094, "learning_rate": 0.01, "loss": 1.9439, "step": 105870 }, { "epoch": 10.873266919995892, "grad_norm": 0.07949387282133102, "learning_rate": 0.01, "loss": 1.9236, "step": 105873 }, { "epoch": 10.873575023107733, "grad_norm": 0.07921165227890015, "learning_rate": 0.01, "loss": 1.9175, "step": 105876 }, { "epoch": 10.873883126219575, "grad_norm": 0.05337239056825638, "learning_rate": 0.01, "loss": 1.9384, "step": 105879 }, { "epoch": 10.874191229331416, "grad_norm": 0.033021338284015656, "learning_rate": 0.01, "loss": 1.9285, "step": 105882 }, { "epoch": 10.874499332443257, "grad_norm": 0.035085279494524, "learning_rate": 0.01, "loss": 1.9276, "step": 105885 }, { "epoch": 10.874807435555098, "grad_norm": 0.11920043081045151, "learning_rate": 0.01, "loss": 1.959, "step": 105888 }, { "epoch": 10.875115538666941, "grad_norm": 0.05265756696462631, "learning_rate": 0.01, "loss": 1.9311, "step": 105891 }, { "epoch": 10.875423641778783, "grad_norm": 0.09155082702636719, "learning_rate": 0.01, "loss": 1.9255, "step": 105894 }, { "epoch": 10.875731744890624, "grad_norm": 0.0716506689786911, "learning_rate": 0.01, "loss": 1.9235, "step": 105897 }, { "epoch": 10.876039848002465, "grad_norm": 0.06458982080221176, "learning_rate": 0.01, "loss": 1.9394, "step": 105900 }, { "epoch": 10.876347951114306, "grad_norm": 0.09309206157922745, "learning_rate": 0.01, "loss": 1.9415, "step": 105903 }, { "epoch": 10.876656054226148, "grad_norm": 0.04247670620679855, "learning_rate": 0.01, "loss": 1.9346, "step": 105906 }, { "epoch": 10.876964157337989, "grad_norm": 0.047627042979002, "learning_rate": 0.01, "loss": 1.9132, "step": 105909 }, { "epoch": 10.87727226044983, "grad_norm": 0.06475477665662766, "learning_rate": 0.01, "loss": 1.9569, "step": 105912 }, { "epoch": 10.877580363561671, "grad_norm": 0.042110294103622437, "learning_rate": 0.01, "loss": 1.9349, "step": 105915 }, { "epoch": 10.877888466673513, "grad_norm": 0.10465336591005325, "learning_rate": 0.01, "loss": 1.9411, "step": 105918 }, { "epoch": 10.878196569785356, "grad_norm": 0.091881662607193, "learning_rate": 0.01, "loss": 1.9265, "step": 105921 }, { "epoch": 10.878504672897197, "grad_norm": 0.05411464720964432, "learning_rate": 0.01, "loss": 1.9683, "step": 105924 }, { "epoch": 10.878812776009038, "grad_norm": 0.040598805993795395, "learning_rate": 0.01, "loss": 1.9491, "step": 105927 }, { "epoch": 10.87912087912088, "grad_norm": 0.038992274552583694, "learning_rate": 0.01, "loss": 1.9612, "step": 105930 }, { "epoch": 10.87942898223272, "grad_norm": 0.06576912105083466, "learning_rate": 0.01, "loss": 1.9477, "step": 105933 }, { "epoch": 10.879737085344562, "grad_norm": 0.08308625966310501, "learning_rate": 0.01, "loss": 1.9468, "step": 105936 }, { "epoch": 10.880045188456403, "grad_norm": 0.048460930585861206, "learning_rate": 0.01, "loss": 1.9695, "step": 105939 }, { "epoch": 10.880353291568245, "grad_norm": 0.07270680367946625, "learning_rate": 0.01, "loss": 1.9246, "step": 105942 }, { "epoch": 10.880661394680086, "grad_norm": 0.12631477415561676, "learning_rate": 0.01, "loss": 1.9574, "step": 105945 }, { "epoch": 10.880969497791927, "grad_norm": 0.061026062816381454, "learning_rate": 0.01, "loss": 1.953, "step": 105948 }, { "epoch": 10.881277600903768, "grad_norm": 0.058033816516399384, "learning_rate": 0.01, "loss": 1.9252, "step": 105951 }, { "epoch": 10.88158570401561, "grad_norm": 0.10059273988008499, "learning_rate": 0.01, "loss": 1.9436, "step": 105954 }, { "epoch": 10.881893807127453, "grad_norm": 0.12196524441242218, "learning_rate": 0.01, "loss": 1.9411, "step": 105957 }, { "epoch": 10.882201910239294, "grad_norm": 0.05642728507518768, "learning_rate": 0.01, "loss": 1.9249, "step": 105960 }, { "epoch": 10.882510013351135, "grad_norm": 0.048246677964925766, "learning_rate": 0.01, "loss": 1.9448, "step": 105963 }, { "epoch": 10.882818116462976, "grad_norm": 0.07132484018802643, "learning_rate": 0.01, "loss": 1.9556, "step": 105966 }, { "epoch": 10.883126219574818, "grad_norm": 0.06366395950317383, "learning_rate": 0.01, "loss": 1.9287, "step": 105969 }, { "epoch": 10.883434322686659, "grad_norm": 0.077542744576931, "learning_rate": 0.01, "loss": 1.9286, "step": 105972 }, { "epoch": 10.8837424257985, "grad_norm": 0.09937810897827148, "learning_rate": 0.01, "loss": 1.9254, "step": 105975 }, { "epoch": 10.884050528910342, "grad_norm": 0.10000995546579361, "learning_rate": 0.01, "loss": 1.93, "step": 105978 }, { "epoch": 10.884358632022183, "grad_norm": 0.03962424024939537, "learning_rate": 0.01, "loss": 1.9485, "step": 105981 }, { "epoch": 10.884666735134024, "grad_norm": 0.03430528566241264, "learning_rate": 0.01, "loss": 1.9352, "step": 105984 }, { "epoch": 10.884974838245867, "grad_norm": 0.05221163481473923, "learning_rate": 0.01, "loss": 1.9346, "step": 105987 }, { "epoch": 10.885282941357708, "grad_norm": 0.06595531851053238, "learning_rate": 0.01, "loss": 1.9307, "step": 105990 }, { "epoch": 10.88559104446955, "grad_norm": 0.0340123176574707, "learning_rate": 0.01, "loss": 1.9153, "step": 105993 }, { "epoch": 10.88589914758139, "grad_norm": 0.12168629467487335, "learning_rate": 0.01, "loss": 1.9352, "step": 105996 }, { "epoch": 10.886207250693232, "grad_norm": 0.045555856078863144, "learning_rate": 0.01, "loss": 1.9412, "step": 105999 }, { "epoch": 10.886515353805073, "grad_norm": 0.11041539162397385, "learning_rate": 0.01, "loss": 1.9354, "step": 106002 }, { "epoch": 10.886823456916915, "grad_norm": 0.0428503155708313, "learning_rate": 0.01, "loss": 1.9144, "step": 106005 }, { "epoch": 10.887131560028756, "grad_norm": 0.04097161069512367, "learning_rate": 0.01, "loss": 1.9391, "step": 106008 }, { "epoch": 10.887439663140597, "grad_norm": 0.055234361439943314, "learning_rate": 0.01, "loss": 1.9396, "step": 106011 }, { "epoch": 10.887747766252438, "grad_norm": 0.047480981796979904, "learning_rate": 0.01, "loss": 1.9354, "step": 106014 }, { "epoch": 10.88805586936428, "grad_norm": 0.07451288402080536, "learning_rate": 0.01, "loss": 1.9056, "step": 106017 }, { "epoch": 10.888363972476123, "grad_norm": 0.09008581936359406, "learning_rate": 0.01, "loss": 1.9668, "step": 106020 }, { "epoch": 10.888672075587964, "grad_norm": 0.07265052944421768, "learning_rate": 0.01, "loss": 1.926, "step": 106023 }, { "epoch": 10.888980178699805, "grad_norm": 0.09329503774642944, "learning_rate": 0.01, "loss": 1.9188, "step": 106026 }, { "epoch": 10.889288281811647, "grad_norm": 0.07104849815368652, "learning_rate": 0.01, "loss": 1.9443, "step": 106029 }, { "epoch": 10.889596384923488, "grad_norm": 0.08163333684206009, "learning_rate": 0.01, "loss": 1.9238, "step": 106032 }, { "epoch": 10.889904488035329, "grad_norm": 0.10383013635873795, "learning_rate": 0.01, "loss": 1.9105, "step": 106035 }, { "epoch": 10.89021259114717, "grad_norm": 0.06662754714488983, "learning_rate": 0.01, "loss": 1.9329, "step": 106038 }, { "epoch": 10.890520694259012, "grad_norm": 0.056699350476264954, "learning_rate": 0.01, "loss": 1.9477, "step": 106041 }, { "epoch": 10.890828797370853, "grad_norm": 0.05268816277384758, "learning_rate": 0.01, "loss": 1.9071, "step": 106044 }, { "epoch": 10.891136900482694, "grad_norm": 0.037846196442842484, "learning_rate": 0.01, "loss": 1.9244, "step": 106047 }, { "epoch": 10.891445003594537, "grad_norm": 0.03779904544353485, "learning_rate": 0.01, "loss": 1.9288, "step": 106050 }, { "epoch": 10.891753106706378, "grad_norm": 0.12322263419628143, "learning_rate": 0.01, "loss": 1.9531, "step": 106053 }, { "epoch": 10.89206120981822, "grad_norm": 0.07598162442445755, "learning_rate": 0.01, "loss": 1.9363, "step": 106056 }, { "epoch": 10.892369312930061, "grad_norm": 0.12336615473031998, "learning_rate": 0.01, "loss": 1.9512, "step": 106059 }, { "epoch": 10.892677416041902, "grad_norm": 0.09707363694906235, "learning_rate": 0.01, "loss": 1.9253, "step": 106062 }, { "epoch": 10.892985519153743, "grad_norm": 0.04322752729058266, "learning_rate": 0.01, "loss": 1.9273, "step": 106065 }, { "epoch": 10.893293622265585, "grad_norm": 0.0348566398024559, "learning_rate": 0.01, "loss": 1.9133, "step": 106068 }, { "epoch": 10.893601725377426, "grad_norm": 0.046460725367069244, "learning_rate": 0.01, "loss": 1.9529, "step": 106071 }, { "epoch": 10.893909828489267, "grad_norm": 0.06775607168674469, "learning_rate": 0.01, "loss": 1.9044, "step": 106074 }, { "epoch": 10.894217931601109, "grad_norm": 0.03778929263353348, "learning_rate": 0.01, "loss": 1.931, "step": 106077 }, { "epoch": 10.89452603471295, "grad_norm": 0.04769425094127655, "learning_rate": 0.01, "loss": 1.9512, "step": 106080 }, { "epoch": 10.894834137824793, "grad_norm": 0.05076927691698074, "learning_rate": 0.01, "loss": 1.953, "step": 106083 }, { "epoch": 10.895142240936634, "grad_norm": 0.10340415686368942, "learning_rate": 0.01, "loss": 1.9585, "step": 106086 }, { "epoch": 10.895450344048475, "grad_norm": 0.12192103266716003, "learning_rate": 0.01, "loss": 1.9375, "step": 106089 }, { "epoch": 10.895758447160317, "grad_norm": 0.06740839779376984, "learning_rate": 0.01, "loss": 1.9266, "step": 106092 }, { "epoch": 10.896066550272158, "grad_norm": 0.03260725364089012, "learning_rate": 0.01, "loss": 1.9436, "step": 106095 }, { "epoch": 10.896374653384, "grad_norm": 0.06192610785365105, "learning_rate": 0.01, "loss": 1.9257, "step": 106098 }, { "epoch": 10.89668275649584, "grad_norm": 0.06303668022155762, "learning_rate": 0.01, "loss": 1.9454, "step": 106101 }, { "epoch": 10.896990859607682, "grad_norm": 0.1754002720117569, "learning_rate": 0.01, "loss": 1.9341, "step": 106104 }, { "epoch": 10.897298962719523, "grad_norm": 0.0929434522986412, "learning_rate": 0.01, "loss": 1.899, "step": 106107 }, { "epoch": 10.897607065831364, "grad_norm": 0.07154718786478043, "learning_rate": 0.01, "loss": 1.9388, "step": 106110 }, { "epoch": 10.897915168943207, "grad_norm": 0.0400879830121994, "learning_rate": 0.01, "loss": 1.9387, "step": 106113 }, { "epoch": 10.898223272055048, "grad_norm": 0.034502558410167694, "learning_rate": 0.01, "loss": 1.9351, "step": 106116 }, { "epoch": 10.89853137516689, "grad_norm": 0.03446902707219124, "learning_rate": 0.01, "loss": 1.9373, "step": 106119 }, { "epoch": 10.898839478278731, "grad_norm": 0.03258249908685684, "learning_rate": 0.01, "loss": 1.9505, "step": 106122 }, { "epoch": 10.899147581390572, "grad_norm": 0.04886491969227791, "learning_rate": 0.01, "loss": 1.9781, "step": 106125 }, { "epoch": 10.899455684502414, "grad_norm": 0.07080874592065811, "learning_rate": 0.01, "loss": 1.938, "step": 106128 }, { "epoch": 10.899763787614255, "grad_norm": 0.09572204947471619, "learning_rate": 0.01, "loss": 1.9328, "step": 106131 }, { "epoch": 10.900071890726096, "grad_norm": 0.0943838506937027, "learning_rate": 0.01, "loss": 1.9313, "step": 106134 }, { "epoch": 10.900379993837937, "grad_norm": 0.0512869767844677, "learning_rate": 0.01, "loss": 1.9079, "step": 106137 }, { "epoch": 10.900688096949779, "grad_norm": 0.04730043560266495, "learning_rate": 0.01, "loss": 1.9367, "step": 106140 }, { "epoch": 10.90099620006162, "grad_norm": 0.18809223175048828, "learning_rate": 0.01, "loss": 1.9456, "step": 106143 }, { "epoch": 10.901304303173463, "grad_norm": 0.08880333602428436, "learning_rate": 0.01, "loss": 1.9498, "step": 106146 }, { "epoch": 10.901612406285304, "grad_norm": 0.08753082156181335, "learning_rate": 0.01, "loss": 1.9485, "step": 106149 }, { "epoch": 10.901920509397145, "grad_norm": 0.0716850608587265, "learning_rate": 0.01, "loss": 1.9498, "step": 106152 }, { "epoch": 10.902228612508987, "grad_norm": 0.06579985469579697, "learning_rate": 0.01, "loss": 1.9292, "step": 106155 }, { "epoch": 10.902536715620828, "grad_norm": 0.06625322252511978, "learning_rate": 0.01, "loss": 1.9332, "step": 106158 }, { "epoch": 10.90284481873267, "grad_norm": 0.04039857164025307, "learning_rate": 0.01, "loss": 1.9277, "step": 106161 }, { "epoch": 10.90315292184451, "grad_norm": 0.0351543202996254, "learning_rate": 0.01, "loss": 1.9555, "step": 106164 }, { "epoch": 10.903461024956352, "grad_norm": 0.0355987548828125, "learning_rate": 0.01, "loss": 1.9289, "step": 106167 }, { "epoch": 10.903769128068193, "grad_norm": 0.03463450074195862, "learning_rate": 0.01, "loss": 1.9439, "step": 106170 }, { "epoch": 10.904077231180034, "grad_norm": 0.05925464630126953, "learning_rate": 0.01, "loss": 1.9319, "step": 106173 }, { "epoch": 10.904385334291877, "grad_norm": 0.057822998613119125, "learning_rate": 0.01, "loss": 1.9357, "step": 106176 }, { "epoch": 10.904693437403719, "grad_norm": 0.15748798847198486, "learning_rate": 0.01, "loss": 1.925, "step": 106179 }, { "epoch": 10.90500154051556, "grad_norm": 0.04025212302803993, "learning_rate": 0.01, "loss": 1.9267, "step": 106182 }, { "epoch": 10.905309643627401, "grad_norm": 0.08656113594770432, "learning_rate": 0.01, "loss": 1.9402, "step": 106185 }, { "epoch": 10.905617746739242, "grad_norm": 0.0952494889497757, "learning_rate": 0.01, "loss": 1.9039, "step": 106188 }, { "epoch": 10.905925849851084, "grad_norm": 0.05528434365987778, "learning_rate": 0.01, "loss": 1.9517, "step": 106191 }, { "epoch": 10.906233952962925, "grad_norm": 0.04796331375837326, "learning_rate": 0.01, "loss": 1.9517, "step": 106194 }, { "epoch": 10.906542056074766, "grad_norm": 0.04357631504535675, "learning_rate": 0.01, "loss": 1.9331, "step": 106197 }, { "epoch": 10.906850159186607, "grad_norm": 0.040124353021383286, "learning_rate": 0.01, "loss": 1.9378, "step": 106200 }, { "epoch": 10.907158262298449, "grad_norm": 0.03544517606496811, "learning_rate": 0.01, "loss": 1.9287, "step": 106203 }, { "epoch": 10.90746636541029, "grad_norm": 0.058340318500995636, "learning_rate": 0.01, "loss": 1.9169, "step": 106206 }, { "epoch": 10.907774468522131, "grad_norm": 0.08568999916315079, "learning_rate": 0.01, "loss": 1.949, "step": 106209 }, { "epoch": 10.908082571633974, "grad_norm": 0.06406117230653763, "learning_rate": 0.01, "loss": 1.9179, "step": 106212 }, { "epoch": 10.908390674745815, "grad_norm": 0.059249307960271835, "learning_rate": 0.01, "loss": 1.9505, "step": 106215 }, { "epoch": 10.908698777857657, "grad_norm": 0.052112944424152374, "learning_rate": 0.01, "loss": 1.926, "step": 106218 }, { "epoch": 10.909006880969498, "grad_norm": 0.03925701603293419, "learning_rate": 0.01, "loss": 1.9384, "step": 106221 }, { "epoch": 10.90931498408134, "grad_norm": 0.14518700540065765, "learning_rate": 0.01, "loss": 1.9411, "step": 106224 }, { "epoch": 10.90962308719318, "grad_norm": 0.13229291141033173, "learning_rate": 0.01, "loss": 1.9414, "step": 106227 }, { "epoch": 10.909931190305022, "grad_norm": 0.05290767550468445, "learning_rate": 0.01, "loss": 1.9278, "step": 106230 }, { "epoch": 10.910239293416863, "grad_norm": 0.044173240661621094, "learning_rate": 0.01, "loss": 1.9535, "step": 106233 }, { "epoch": 10.910547396528704, "grad_norm": 0.042460594326257706, "learning_rate": 0.01, "loss": 1.9402, "step": 106236 }, { "epoch": 10.910855499640546, "grad_norm": 0.03345232084393501, "learning_rate": 0.01, "loss": 1.9282, "step": 106239 }, { "epoch": 10.911163602752389, "grad_norm": 0.03705401346087456, "learning_rate": 0.01, "loss": 1.9585, "step": 106242 }, { "epoch": 10.91147170586423, "grad_norm": 0.06710290163755417, "learning_rate": 0.01, "loss": 1.9241, "step": 106245 }, { "epoch": 10.911779808976071, "grad_norm": 0.042429160326719284, "learning_rate": 0.01, "loss": 1.9464, "step": 106248 }, { "epoch": 10.912087912087912, "grad_norm": 0.0361759252846241, "learning_rate": 0.01, "loss": 1.9015, "step": 106251 }, { "epoch": 10.912396015199754, "grad_norm": 0.02957993932068348, "learning_rate": 0.01, "loss": 1.9287, "step": 106254 }, { "epoch": 10.912704118311595, "grad_norm": 0.045638322830200195, "learning_rate": 0.01, "loss": 1.9259, "step": 106257 }, { "epoch": 10.913012221423436, "grad_norm": 0.06491947174072266, "learning_rate": 0.01, "loss": 1.9433, "step": 106260 }, { "epoch": 10.913320324535277, "grad_norm": 0.12178110331296921, "learning_rate": 0.01, "loss": 1.9323, "step": 106263 }, { "epoch": 10.913628427647119, "grad_norm": 0.060336388647556305, "learning_rate": 0.01, "loss": 1.9444, "step": 106266 }, { "epoch": 10.91393653075896, "grad_norm": 0.0462242066860199, "learning_rate": 0.01, "loss": 1.9212, "step": 106269 }, { "epoch": 10.914244633870801, "grad_norm": 0.11648637056350708, "learning_rate": 0.01, "loss": 1.935, "step": 106272 }, { "epoch": 10.914552736982644, "grad_norm": 0.05701465904712677, "learning_rate": 0.01, "loss": 1.946, "step": 106275 }, { "epoch": 10.914860840094486, "grad_norm": 0.06645436584949493, "learning_rate": 0.01, "loss": 1.926, "step": 106278 }, { "epoch": 10.915168943206327, "grad_norm": 0.06109488010406494, "learning_rate": 0.01, "loss": 1.9517, "step": 106281 }, { "epoch": 10.915477046318168, "grad_norm": 0.0647864118218422, "learning_rate": 0.01, "loss": 1.9262, "step": 106284 }, { "epoch": 10.91578514943001, "grad_norm": 0.16096295416355133, "learning_rate": 0.01, "loss": 1.9378, "step": 106287 }, { "epoch": 10.91609325254185, "grad_norm": 0.16784384846687317, "learning_rate": 0.01, "loss": 1.9403, "step": 106290 }, { "epoch": 10.916401355653692, "grad_norm": 0.11912429332733154, "learning_rate": 0.01, "loss": 1.935, "step": 106293 }, { "epoch": 10.916709458765533, "grad_norm": 0.10303348302841187, "learning_rate": 0.01, "loss": 1.951, "step": 106296 }, { "epoch": 10.917017561877374, "grad_norm": 0.09231548756361008, "learning_rate": 0.01, "loss": 1.9286, "step": 106299 }, { "epoch": 10.917325664989216, "grad_norm": 0.06794840842485428, "learning_rate": 0.01, "loss": 1.9167, "step": 106302 }, { "epoch": 10.917633768101059, "grad_norm": 0.12076061964035034, "learning_rate": 0.01, "loss": 1.9063, "step": 106305 }, { "epoch": 10.9179418712129, "grad_norm": 0.06557746976613998, "learning_rate": 0.01, "loss": 1.9307, "step": 106308 }, { "epoch": 10.918249974324741, "grad_norm": 0.04917408525943756, "learning_rate": 0.01, "loss": 1.9397, "step": 106311 }, { "epoch": 10.918558077436582, "grad_norm": 0.09011615812778473, "learning_rate": 0.01, "loss": 1.9526, "step": 106314 }, { "epoch": 10.918866180548424, "grad_norm": 0.057904936373233795, "learning_rate": 0.01, "loss": 1.9267, "step": 106317 }, { "epoch": 10.919174283660265, "grad_norm": 0.042555589228868484, "learning_rate": 0.01, "loss": 1.9136, "step": 106320 }, { "epoch": 10.919482386772106, "grad_norm": 0.10149985551834106, "learning_rate": 0.01, "loss": 1.929, "step": 106323 }, { "epoch": 10.919790489883948, "grad_norm": 0.061605051159858704, "learning_rate": 0.01, "loss": 1.9014, "step": 106326 }, { "epoch": 10.920098592995789, "grad_norm": 0.05120980367064476, "learning_rate": 0.01, "loss": 1.9472, "step": 106329 }, { "epoch": 10.92040669610763, "grad_norm": 0.07240397483110428, "learning_rate": 0.01, "loss": 1.9468, "step": 106332 }, { "epoch": 10.920714799219471, "grad_norm": 0.10335461795330048, "learning_rate": 0.01, "loss": 1.948, "step": 106335 }, { "epoch": 10.921022902331314, "grad_norm": 0.05146472528576851, "learning_rate": 0.01, "loss": 1.9211, "step": 106338 }, { "epoch": 10.921331005443156, "grad_norm": 0.09158754348754883, "learning_rate": 0.01, "loss": 1.9112, "step": 106341 }, { "epoch": 10.921639108554997, "grad_norm": 0.0714024156332016, "learning_rate": 0.01, "loss": 1.9328, "step": 106344 }, { "epoch": 10.921947211666838, "grad_norm": 0.07200371474027634, "learning_rate": 0.01, "loss": 1.9189, "step": 106347 }, { "epoch": 10.92225531477868, "grad_norm": 0.033079616725444794, "learning_rate": 0.01, "loss": 1.9162, "step": 106350 }, { "epoch": 10.92256341789052, "grad_norm": 0.06759501993656158, "learning_rate": 0.01, "loss": 1.9408, "step": 106353 }, { "epoch": 10.922871521002362, "grad_norm": 0.07132084667682648, "learning_rate": 0.01, "loss": 1.9285, "step": 106356 }, { "epoch": 10.923179624114203, "grad_norm": 0.03897568956017494, "learning_rate": 0.01, "loss": 1.9534, "step": 106359 }, { "epoch": 10.923487727226044, "grad_norm": 0.04293280094861984, "learning_rate": 0.01, "loss": 1.9495, "step": 106362 }, { "epoch": 10.923795830337886, "grad_norm": 0.11670386046171188, "learning_rate": 0.01, "loss": 1.9598, "step": 106365 }, { "epoch": 10.924103933449729, "grad_norm": 0.12519721686840057, "learning_rate": 0.01, "loss": 1.9662, "step": 106368 }, { "epoch": 10.92441203656157, "grad_norm": 0.038999710232019424, "learning_rate": 0.01, "loss": 1.9239, "step": 106371 }, { "epoch": 10.924720139673411, "grad_norm": 0.07099395245313644, "learning_rate": 0.01, "loss": 1.9214, "step": 106374 }, { "epoch": 10.925028242785253, "grad_norm": 0.05256061628460884, "learning_rate": 0.01, "loss": 1.9393, "step": 106377 }, { "epoch": 10.925336345897094, "grad_norm": 0.10918042808771133, "learning_rate": 0.01, "loss": 1.9163, "step": 106380 }, { "epoch": 10.925644449008935, "grad_norm": 0.15344181656837463, "learning_rate": 0.01, "loss": 1.9309, "step": 106383 }, { "epoch": 10.925952552120776, "grad_norm": 0.07730207592248917, "learning_rate": 0.01, "loss": 1.9296, "step": 106386 }, { "epoch": 10.926260655232618, "grad_norm": 0.06737470626831055, "learning_rate": 0.01, "loss": 1.9123, "step": 106389 }, { "epoch": 10.926568758344459, "grad_norm": 0.06340444087982178, "learning_rate": 0.01, "loss": 1.9207, "step": 106392 }, { "epoch": 10.9268768614563, "grad_norm": 0.08433797955513, "learning_rate": 0.01, "loss": 1.9469, "step": 106395 }, { "epoch": 10.927184964568141, "grad_norm": 0.059738487005233765, "learning_rate": 0.01, "loss": 1.9405, "step": 106398 }, { "epoch": 10.927493067679983, "grad_norm": 0.038807209581136703, "learning_rate": 0.01, "loss": 1.9288, "step": 106401 }, { "epoch": 10.927801170791826, "grad_norm": 0.05405537784099579, "learning_rate": 0.01, "loss": 1.9504, "step": 106404 }, { "epoch": 10.928109273903667, "grad_norm": 0.04065915197134018, "learning_rate": 0.01, "loss": 1.9539, "step": 106407 }, { "epoch": 10.928417377015508, "grad_norm": 0.03751445561647415, "learning_rate": 0.01, "loss": 1.9239, "step": 106410 }, { "epoch": 10.92872548012735, "grad_norm": 0.034337520599365234, "learning_rate": 0.01, "loss": 1.917, "step": 106413 }, { "epoch": 10.92903358323919, "grad_norm": 0.07262330502271652, "learning_rate": 0.01, "loss": 1.9395, "step": 106416 }, { "epoch": 10.929341686351032, "grad_norm": 0.04483049735426903, "learning_rate": 0.01, "loss": 1.9175, "step": 106419 }, { "epoch": 10.929649789462873, "grad_norm": 0.06375708431005478, "learning_rate": 0.01, "loss": 1.9382, "step": 106422 }, { "epoch": 10.929957892574715, "grad_norm": 0.0656374990940094, "learning_rate": 0.01, "loss": 1.9287, "step": 106425 }, { "epoch": 10.930265995686556, "grad_norm": 0.1104300320148468, "learning_rate": 0.01, "loss": 1.9569, "step": 106428 }, { "epoch": 10.930574098798397, "grad_norm": 0.04479607939720154, "learning_rate": 0.01, "loss": 1.9431, "step": 106431 }, { "epoch": 10.93088220191024, "grad_norm": 0.09327235817909241, "learning_rate": 0.01, "loss": 1.9412, "step": 106434 }, { "epoch": 10.931190305022081, "grad_norm": 0.07236454635858536, "learning_rate": 0.01, "loss": 1.9291, "step": 106437 }, { "epoch": 10.931498408133923, "grad_norm": 0.03589305654168129, "learning_rate": 0.01, "loss": 1.9534, "step": 106440 }, { "epoch": 10.931806511245764, "grad_norm": 0.034649983048439026, "learning_rate": 0.01, "loss": 1.9624, "step": 106443 }, { "epoch": 10.932114614357605, "grad_norm": 0.09589678049087524, "learning_rate": 0.01, "loss": 1.9312, "step": 106446 }, { "epoch": 10.932422717469446, "grad_norm": 0.05428173393011093, "learning_rate": 0.01, "loss": 1.9534, "step": 106449 }, { "epoch": 10.932730820581288, "grad_norm": 0.06434815376996994, "learning_rate": 0.01, "loss": 1.9458, "step": 106452 }, { "epoch": 10.933038923693129, "grad_norm": 0.12415088713169098, "learning_rate": 0.01, "loss": 1.9353, "step": 106455 }, { "epoch": 10.93334702680497, "grad_norm": 0.06227521225810051, "learning_rate": 0.01, "loss": 1.9209, "step": 106458 }, { "epoch": 10.933655129916811, "grad_norm": 0.06899651139974594, "learning_rate": 0.01, "loss": 1.92, "step": 106461 }, { "epoch": 10.933963233028653, "grad_norm": 0.035584982484579086, "learning_rate": 0.01, "loss": 1.9279, "step": 106464 }, { "epoch": 10.934271336140496, "grad_norm": 0.05347567796707153, "learning_rate": 0.01, "loss": 1.9437, "step": 106467 }, { "epoch": 10.934579439252337, "grad_norm": 0.04459035024046898, "learning_rate": 0.01, "loss": 1.9455, "step": 106470 }, { "epoch": 10.934887542364178, "grad_norm": 0.06677372008562088, "learning_rate": 0.01, "loss": 1.9547, "step": 106473 }, { "epoch": 10.93519564547602, "grad_norm": 0.12824846804141998, "learning_rate": 0.01, "loss": 1.956, "step": 106476 }, { "epoch": 10.93550374858786, "grad_norm": 0.05906982347369194, "learning_rate": 0.01, "loss": 1.9109, "step": 106479 }, { "epoch": 10.935811851699702, "grad_norm": 0.08169703930616379, "learning_rate": 0.01, "loss": 1.9644, "step": 106482 }, { "epoch": 10.936119954811543, "grad_norm": 0.12834133207798004, "learning_rate": 0.01, "loss": 1.9367, "step": 106485 }, { "epoch": 10.936428057923385, "grad_norm": 0.053135670721530914, "learning_rate": 0.01, "loss": 1.9427, "step": 106488 }, { "epoch": 10.936736161035226, "grad_norm": 0.03859506547451019, "learning_rate": 0.01, "loss": 1.9464, "step": 106491 }, { "epoch": 10.937044264147067, "grad_norm": 0.06961911916732788, "learning_rate": 0.01, "loss": 1.9216, "step": 106494 }, { "epoch": 10.93735236725891, "grad_norm": 0.1543235182762146, "learning_rate": 0.01, "loss": 1.9376, "step": 106497 }, { "epoch": 10.937660470370751, "grad_norm": 0.04941339045763016, "learning_rate": 0.01, "loss": 1.9548, "step": 106500 }, { "epoch": 10.937968573482593, "grad_norm": 0.0709512010216713, "learning_rate": 0.01, "loss": 1.9174, "step": 106503 }, { "epoch": 10.938276676594434, "grad_norm": 0.04137871414422989, "learning_rate": 0.01, "loss": 1.8939, "step": 106506 }, { "epoch": 10.938584779706275, "grad_norm": 0.05609670281410217, "learning_rate": 0.01, "loss": 1.9319, "step": 106509 }, { "epoch": 10.938892882818116, "grad_norm": 0.04416004940867424, "learning_rate": 0.01, "loss": 1.9259, "step": 106512 }, { "epoch": 10.939200985929958, "grad_norm": 0.0448085255920887, "learning_rate": 0.01, "loss": 1.9473, "step": 106515 }, { "epoch": 10.939509089041799, "grad_norm": 0.04962565377354622, "learning_rate": 0.01, "loss": 1.9316, "step": 106518 }, { "epoch": 10.93981719215364, "grad_norm": 0.07378124445676804, "learning_rate": 0.01, "loss": 1.918, "step": 106521 }, { "epoch": 10.940125295265481, "grad_norm": 0.08252052217721939, "learning_rate": 0.01, "loss": 1.9321, "step": 106524 }, { "epoch": 10.940433398377323, "grad_norm": 0.07529515773057938, "learning_rate": 0.01, "loss": 1.9427, "step": 106527 }, { "epoch": 10.940741501489166, "grad_norm": 0.07012537121772766, "learning_rate": 0.01, "loss": 1.96, "step": 106530 }, { "epoch": 10.941049604601007, "grad_norm": 0.0447557158768177, "learning_rate": 0.01, "loss": 1.9266, "step": 106533 }, { "epoch": 10.941357707712848, "grad_norm": 0.056523241102695465, "learning_rate": 0.01, "loss": 1.9663, "step": 106536 }, { "epoch": 10.94166581082469, "grad_norm": 0.07511433959007263, "learning_rate": 0.01, "loss": 1.9555, "step": 106539 }, { "epoch": 10.94197391393653, "grad_norm": 0.04001323878765106, "learning_rate": 0.01, "loss": 1.9508, "step": 106542 }, { "epoch": 10.942282017048372, "grad_norm": 0.12339727580547333, "learning_rate": 0.01, "loss": 1.9227, "step": 106545 }, { "epoch": 10.942590120160213, "grad_norm": 0.07011760026216507, "learning_rate": 0.01, "loss": 1.9449, "step": 106548 }, { "epoch": 10.942898223272055, "grad_norm": 0.08485225588083267, "learning_rate": 0.01, "loss": 1.9574, "step": 106551 }, { "epoch": 10.943206326383896, "grad_norm": 0.1317882388830185, "learning_rate": 0.01, "loss": 1.9172, "step": 106554 }, { "epoch": 10.943514429495737, "grad_norm": 0.05222870036959648, "learning_rate": 0.01, "loss": 1.9233, "step": 106557 }, { "epoch": 10.94382253260758, "grad_norm": 0.060174986720085144, "learning_rate": 0.01, "loss": 1.9481, "step": 106560 }, { "epoch": 10.944130635719421, "grad_norm": 0.06661222130060196, "learning_rate": 0.01, "loss": 1.9612, "step": 106563 }, { "epoch": 10.944438738831263, "grad_norm": 0.037411950528621674, "learning_rate": 0.01, "loss": 1.9367, "step": 106566 }, { "epoch": 10.944746841943104, "grad_norm": 0.046929627656936646, "learning_rate": 0.01, "loss": 1.9423, "step": 106569 }, { "epoch": 10.945054945054945, "grad_norm": 0.10040893405675888, "learning_rate": 0.01, "loss": 1.9238, "step": 106572 }, { "epoch": 10.945363048166787, "grad_norm": 0.038712795823812485, "learning_rate": 0.01, "loss": 1.9229, "step": 106575 }, { "epoch": 10.945671151278628, "grad_norm": 0.12498601526021957, "learning_rate": 0.01, "loss": 1.9498, "step": 106578 }, { "epoch": 10.945979254390469, "grad_norm": 0.05384666845202446, "learning_rate": 0.01, "loss": 1.9329, "step": 106581 }, { "epoch": 10.94628735750231, "grad_norm": 0.04159768298268318, "learning_rate": 0.01, "loss": 1.9558, "step": 106584 }, { "epoch": 10.946595460614152, "grad_norm": 0.1507614552974701, "learning_rate": 0.01, "loss": 1.9469, "step": 106587 }, { "epoch": 10.946903563725993, "grad_norm": 0.04219367727637291, "learning_rate": 0.01, "loss": 1.9317, "step": 106590 }, { "epoch": 10.947211666837836, "grad_norm": 0.046481501311063766, "learning_rate": 0.01, "loss": 1.9395, "step": 106593 }, { "epoch": 10.947519769949677, "grad_norm": 0.05947163328528404, "learning_rate": 0.01, "loss": 1.925, "step": 106596 }, { "epoch": 10.947827873061518, "grad_norm": 0.1727290153503418, "learning_rate": 0.01, "loss": 1.949, "step": 106599 }, { "epoch": 10.94813597617336, "grad_norm": 0.08525878190994263, "learning_rate": 0.01, "loss": 1.9352, "step": 106602 }, { "epoch": 10.948444079285201, "grad_norm": 0.058909181505441666, "learning_rate": 0.01, "loss": 1.906, "step": 106605 }, { "epoch": 10.948752182397042, "grad_norm": 0.04929728806018829, "learning_rate": 0.01, "loss": 1.9398, "step": 106608 }, { "epoch": 10.949060285508883, "grad_norm": 0.039295829832553864, "learning_rate": 0.01, "loss": 1.9342, "step": 106611 }, { "epoch": 10.949368388620725, "grad_norm": 0.04213673621416092, "learning_rate": 0.01, "loss": 1.9574, "step": 106614 }, { "epoch": 10.949676491732566, "grad_norm": 0.04450298100709915, "learning_rate": 0.01, "loss": 1.9476, "step": 106617 }, { "epoch": 10.949984594844407, "grad_norm": 0.05633500590920448, "learning_rate": 0.01, "loss": 1.9729, "step": 106620 }, { "epoch": 10.95029269795625, "grad_norm": 0.042609844356775284, "learning_rate": 0.01, "loss": 1.9397, "step": 106623 }, { "epoch": 10.950600801068092, "grad_norm": 0.03682399168610573, "learning_rate": 0.01, "loss": 1.9607, "step": 106626 }, { "epoch": 10.950908904179933, "grad_norm": 0.06465191394090652, "learning_rate": 0.01, "loss": 1.9536, "step": 106629 }, { "epoch": 10.951217007291774, "grad_norm": 0.05256539583206177, "learning_rate": 0.01, "loss": 1.9208, "step": 106632 }, { "epoch": 10.951525110403615, "grad_norm": 0.07118343561887741, "learning_rate": 0.01, "loss": 1.9438, "step": 106635 }, { "epoch": 10.951833213515457, "grad_norm": 0.17668597400188446, "learning_rate": 0.01, "loss": 1.9651, "step": 106638 }, { "epoch": 10.952141316627298, "grad_norm": 0.045385267585515976, "learning_rate": 0.01, "loss": 1.9306, "step": 106641 }, { "epoch": 10.952449419739139, "grad_norm": 0.06024050712585449, "learning_rate": 0.01, "loss": 1.9349, "step": 106644 }, { "epoch": 10.95275752285098, "grad_norm": 0.04597383365035057, "learning_rate": 0.01, "loss": 1.9217, "step": 106647 }, { "epoch": 10.953065625962822, "grad_norm": 0.035271596163511276, "learning_rate": 0.01, "loss": 1.9377, "step": 106650 }, { "epoch": 10.953373729074663, "grad_norm": 0.048514969646930695, "learning_rate": 0.01, "loss": 1.935, "step": 106653 }, { "epoch": 10.953681832186504, "grad_norm": 0.05543520674109459, "learning_rate": 0.01, "loss": 1.92, "step": 106656 }, { "epoch": 10.953989935298347, "grad_norm": 0.06604902446269989, "learning_rate": 0.01, "loss": 1.9246, "step": 106659 }, { "epoch": 10.954298038410188, "grad_norm": 0.06005027890205383, "learning_rate": 0.01, "loss": 1.9584, "step": 106662 }, { "epoch": 10.95460614152203, "grad_norm": 0.05095071718096733, "learning_rate": 0.01, "loss": 1.9303, "step": 106665 }, { "epoch": 10.954914244633871, "grad_norm": 0.1646306812763214, "learning_rate": 0.01, "loss": 1.9097, "step": 106668 }, { "epoch": 10.955222347745712, "grad_norm": 0.15995948016643524, "learning_rate": 0.01, "loss": 1.9515, "step": 106671 }, { "epoch": 10.955530450857554, "grad_norm": 0.09556877613067627, "learning_rate": 0.01, "loss": 1.9383, "step": 106674 }, { "epoch": 10.955838553969395, "grad_norm": 0.04684971272945404, "learning_rate": 0.01, "loss": 1.9425, "step": 106677 }, { "epoch": 10.956146657081236, "grad_norm": 0.04624011367559433, "learning_rate": 0.01, "loss": 1.9362, "step": 106680 }, { "epoch": 10.956454760193077, "grad_norm": 0.03910151869058609, "learning_rate": 0.01, "loss": 1.9349, "step": 106683 }, { "epoch": 10.956762863304919, "grad_norm": 0.08027227967977524, "learning_rate": 0.01, "loss": 1.9261, "step": 106686 }, { "epoch": 10.957070966416762, "grad_norm": 0.07811252772808075, "learning_rate": 0.01, "loss": 1.9064, "step": 106689 }, { "epoch": 10.957379069528603, "grad_norm": 0.07080382108688354, "learning_rate": 0.01, "loss": 1.9278, "step": 106692 }, { "epoch": 10.957687172640444, "grad_norm": 0.03555811569094658, "learning_rate": 0.01, "loss": 1.9348, "step": 106695 }, { "epoch": 10.957995275752285, "grad_norm": 0.033408600836992264, "learning_rate": 0.01, "loss": 1.9446, "step": 106698 }, { "epoch": 10.958303378864127, "grad_norm": 0.03809535875916481, "learning_rate": 0.01, "loss": 1.9418, "step": 106701 }, { "epoch": 10.958611481975968, "grad_norm": 0.08328834921121597, "learning_rate": 0.01, "loss": 1.9354, "step": 106704 }, { "epoch": 10.95891958508781, "grad_norm": 0.09299707412719727, "learning_rate": 0.01, "loss": 1.9463, "step": 106707 }, { "epoch": 10.95922768819965, "grad_norm": 0.10310142487287521, "learning_rate": 0.01, "loss": 1.927, "step": 106710 }, { "epoch": 10.959535791311492, "grad_norm": 0.11186525225639343, "learning_rate": 0.01, "loss": 1.9279, "step": 106713 }, { "epoch": 10.959843894423333, "grad_norm": 0.08874673396348953, "learning_rate": 0.01, "loss": 1.9367, "step": 106716 }, { "epoch": 10.960151997535174, "grad_norm": 0.08445736765861511, "learning_rate": 0.01, "loss": 1.9435, "step": 106719 }, { "epoch": 10.960460100647017, "grad_norm": 0.07096145302057266, "learning_rate": 0.01, "loss": 1.9395, "step": 106722 }, { "epoch": 10.960768203758859, "grad_norm": 0.045588068664073944, "learning_rate": 0.01, "loss": 1.9461, "step": 106725 }, { "epoch": 10.9610763068707, "grad_norm": 0.0643002986907959, "learning_rate": 0.01, "loss": 1.9517, "step": 106728 }, { "epoch": 10.961384409982541, "grad_norm": 0.12200552970170975, "learning_rate": 0.01, "loss": 1.9284, "step": 106731 }, { "epoch": 10.961692513094382, "grad_norm": 0.06449657678604126, "learning_rate": 0.01, "loss": 1.9441, "step": 106734 }, { "epoch": 10.962000616206224, "grad_norm": 0.09041350334882736, "learning_rate": 0.01, "loss": 1.9209, "step": 106737 }, { "epoch": 10.962308719318065, "grad_norm": 0.04235527664422989, "learning_rate": 0.01, "loss": 1.9389, "step": 106740 }, { "epoch": 10.962616822429906, "grad_norm": 0.06367935985326767, "learning_rate": 0.01, "loss": 1.9411, "step": 106743 }, { "epoch": 10.962924925541747, "grad_norm": 0.03904595971107483, "learning_rate": 0.01, "loss": 1.927, "step": 106746 }, { "epoch": 10.963233028653589, "grad_norm": 0.09359109401702881, "learning_rate": 0.01, "loss": 1.9202, "step": 106749 }, { "epoch": 10.963541131765432, "grad_norm": 0.058446332812309265, "learning_rate": 0.01, "loss": 1.9441, "step": 106752 }, { "epoch": 10.963849234877273, "grad_norm": 0.10020552575588226, "learning_rate": 0.01, "loss": 1.9486, "step": 106755 }, { "epoch": 10.964157337989114, "grad_norm": 0.050879430025815964, "learning_rate": 0.01, "loss": 1.9412, "step": 106758 }, { "epoch": 10.964465441100955, "grad_norm": 0.04936183616518974, "learning_rate": 0.01, "loss": 1.9098, "step": 106761 }, { "epoch": 10.964773544212797, "grad_norm": 0.05340530350804329, "learning_rate": 0.01, "loss": 1.9201, "step": 106764 }, { "epoch": 10.965081647324638, "grad_norm": 0.1125936210155487, "learning_rate": 0.01, "loss": 1.9415, "step": 106767 }, { "epoch": 10.96538975043648, "grad_norm": 0.13663361966609955, "learning_rate": 0.01, "loss": 1.9471, "step": 106770 }, { "epoch": 10.96569785354832, "grad_norm": 0.1450393944978714, "learning_rate": 0.01, "loss": 1.9518, "step": 106773 }, { "epoch": 10.966005956660162, "grad_norm": 0.09279601275920868, "learning_rate": 0.01, "loss": 1.9583, "step": 106776 }, { "epoch": 10.966314059772003, "grad_norm": 0.03667345643043518, "learning_rate": 0.01, "loss": 1.9342, "step": 106779 }, { "epoch": 10.966622162883844, "grad_norm": 0.040375106036663055, "learning_rate": 0.01, "loss": 1.9187, "step": 106782 }, { "epoch": 10.966930265995687, "grad_norm": 0.05492724850773811, "learning_rate": 0.01, "loss": 1.9205, "step": 106785 }, { "epoch": 10.967238369107529, "grad_norm": 0.07538987696170807, "learning_rate": 0.01, "loss": 1.9334, "step": 106788 }, { "epoch": 10.96754647221937, "grad_norm": 0.049806006252765656, "learning_rate": 0.01, "loss": 1.9534, "step": 106791 }, { "epoch": 10.967854575331211, "grad_norm": 0.04567021504044533, "learning_rate": 0.01, "loss": 1.9461, "step": 106794 }, { "epoch": 10.968162678443052, "grad_norm": 0.055650562047958374, "learning_rate": 0.01, "loss": 1.9346, "step": 106797 }, { "epoch": 10.968470781554894, "grad_norm": 0.04961836710572243, "learning_rate": 0.01, "loss": 1.9458, "step": 106800 }, { "epoch": 10.968778884666735, "grad_norm": 0.08718889951705933, "learning_rate": 0.01, "loss": 1.9416, "step": 106803 }, { "epoch": 10.969086987778576, "grad_norm": 0.05088066682219505, "learning_rate": 0.01, "loss": 1.9562, "step": 106806 }, { "epoch": 10.969395090890417, "grad_norm": 0.042699042707681656, "learning_rate": 0.01, "loss": 1.9172, "step": 106809 }, { "epoch": 10.969703194002259, "grad_norm": 0.04571409523487091, "learning_rate": 0.01, "loss": 1.9308, "step": 106812 }, { "epoch": 10.970011297114102, "grad_norm": 0.058041494339704514, "learning_rate": 0.01, "loss": 1.9357, "step": 106815 }, { "epoch": 10.970319400225943, "grad_norm": 0.1141694188117981, "learning_rate": 0.01, "loss": 1.9376, "step": 106818 }, { "epoch": 10.970627503337784, "grad_norm": 0.06193096935749054, "learning_rate": 0.01, "loss": 1.9395, "step": 106821 }, { "epoch": 10.970935606449626, "grad_norm": 0.06552765518426895, "learning_rate": 0.01, "loss": 1.9426, "step": 106824 }, { "epoch": 10.971243709561467, "grad_norm": 0.10089132189750671, "learning_rate": 0.01, "loss": 1.9215, "step": 106827 }, { "epoch": 10.971551812673308, "grad_norm": 0.04266245290637016, "learning_rate": 0.01, "loss": 1.9374, "step": 106830 }, { "epoch": 10.97185991578515, "grad_norm": 0.045328978449106216, "learning_rate": 0.01, "loss": 1.9548, "step": 106833 }, { "epoch": 10.97216801889699, "grad_norm": 0.03314703330397606, "learning_rate": 0.01, "loss": 1.9175, "step": 106836 }, { "epoch": 10.972476122008832, "grad_norm": 0.10586504638195038, "learning_rate": 0.01, "loss": 1.9487, "step": 106839 }, { "epoch": 10.972784225120673, "grad_norm": 0.07202047109603882, "learning_rate": 0.01, "loss": 1.9412, "step": 106842 }, { "epoch": 10.973092328232514, "grad_norm": 0.08412293344736099, "learning_rate": 0.01, "loss": 1.9624, "step": 106845 }, { "epoch": 10.973400431344357, "grad_norm": 0.041309576481580734, "learning_rate": 0.01, "loss": 1.916, "step": 106848 }, { "epoch": 10.973708534456199, "grad_norm": 0.09424412250518799, "learning_rate": 0.01, "loss": 1.925, "step": 106851 }, { "epoch": 10.97401663756804, "grad_norm": 0.0540972538292408, "learning_rate": 0.01, "loss": 1.9479, "step": 106854 }, { "epoch": 10.974324740679881, "grad_norm": 0.04370080307126045, "learning_rate": 0.01, "loss": 1.9816, "step": 106857 }, { "epoch": 10.974632843791722, "grad_norm": 0.029536878690123558, "learning_rate": 0.01, "loss": 1.9501, "step": 106860 }, { "epoch": 10.974940946903564, "grad_norm": 0.12666262686252594, "learning_rate": 0.01, "loss": 1.925, "step": 106863 }, { "epoch": 10.975249050015405, "grad_norm": 0.07763486355543137, "learning_rate": 0.01, "loss": 1.9368, "step": 106866 }, { "epoch": 10.975557153127246, "grad_norm": 0.08744452148675919, "learning_rate": 0.01, "loss": 1.9552, "step": 106869 }, { "epoch": 10.975865256239087, "grad_norm": 0.04688967019319534, "learning_rate": 0.01, "loss": 1.933, "step": 106872 }, { "epoch": 10.976173359350929, "grad_norm": 0.044772904366254807, "learning_rate": 0.01, "loss": 1.9423, "step": 106875 }, { "epoch": 10.976481462462772, "grad_norm": 0.042988039553165436, "learning_rate": 0.01, "loss": 1.9363, "step": 106878 }, { "epoch": 10.976789565574613, "grad_norm": 0.044973667711019516, "learning_rate": 0.01, "loss": 1.932, "step": 106881 }, { "epoch": 10.977097668686454, "grad_norm": 0.07078361511230469, "learning_rate": 0.01, "loss": 1.9415, "step": 106884 }, { "epoch": 10.977405771798296, "grad_norm": 0.03690226748585701, "learning_rate": 0.01, "loss": 1.9342, "step": 106887 }, { "epoch": 10.977713874910137, "grad_norm": 0.08343128859996796, "learning_rate": 0.01, "loss": 1.9184, "step": 106890 }, { "epoch": 10.978021978021978, "grad_norm": 0.0915609672665596, "learning_rate": 0.01, "loss": 1.9691, "step": 106893 }, { "epoch": 10.97833008113382, "grad_norm": 0.051095981150865555, "learning_rate": 0.01, "loss": 1.9134, "step": 106896 }, { "epoch": 10.97863818424566, "grad_norm": 0.08984770625829697, "learning_rate": 0.01, "loss": 1.9591, "step": 106899 }, { "epoch": 10.978946287357502, "grad_norm": 0.033725060522556305, "learning_rate": 0.01, "loss": 1.9435, "step": 106902 }, { "epoch": 10.979254390469343, "grad_norm": 0.05065962299704552, "learning_rate": 0.01, "loss": 1.941, "step": 106905 }, { "epoch": 10.979562493581184, "grad_norm": 0.057545311748981476, "learning_rate": 0.01, "loss": 1.9358, "step": 106908 }, { "epoch": 10.979870596693026, "grad_norm": 0.04934195429086685, "learning_rate": 0.01, "loss": 1.9243, "step": 106911 }, { "epoch": 10.980178699804869, "grad_norm": 0.050023820251226425, "learning_rate": 0.01, "loss": 1.9219, "step": 106914 }, { "epoch": 10.98048680291671, "grad_norm": 0.05062216520309448, "learning_rate": 0.01, "loss": 1.9381, "step": 106917 }, { "epoch": 10.980794906028551, "grad_norm": 0.035237058997154236, "learning_rate": 0.01, "loss": 1.9242, "step": 106920 }, { "epoch": 10.981103009140393, "grad_norm": 0.047082848846912384, "learning_rate": 0.01, "loss": 1.9587, "step": 106923 }, { "epoch": 10.981411112252234, "grad_norm": 0.04227985069155693, "learning_rate": 0.01, "loss": 1.9262, "step": 106926 }, { "epoch": 10.981719215364075, "grad_norm": 0.09781994670629501, "learning_rate": 0.01, "loss": 1.9221, "step": 106929 }, { "epoch": 10.982027318475916, "grad_norm": 0.04272658750414848, "learning_rate": 0.01, "loss": 1.9506, "step": 106932 }, { "epoch": 10.982335421587758, "grad_norm": 0.04117373377084732, "learning_rate": 0.01, "loss": 1.9215, "step": 106935 }, { "epoch": 10.982643524699599, "grad_norm": 0.0534319207072258, "learning_rate": 0.01, "loss": 1.9391, "step": 106938 }, { "epoch": 10.98295162781144, "grad_norm": 0.06119696795940399, "learning_rate": 0.01, "loss": 1.9493, "step": 106941 }, { "epoch": 10.983259730923283, "grad_norm": 0.03843209147453308, "learning_rate": 0.01, "loss": 1.9116, "step": 106944 }, { "epoch": 10.983567834035124, "grad_norm": 0.04389924556016922, "learning_rate": 0.01, "loss": 1.9306, "step": 106947 }, { "epoch": 10.983875937146966, "grad_norm": 0.056084759533405304, "learning_rate": 0.01, "loss": 1.9543, "step": 106950 }, { "epoch": 10.984184040258807, "grad_norm": 0.09229186177253723, "learning_rate": 0.01, "loss": 1.9272, "step": 106953 }, { "epoch": 10.984492143370648, "grad_norm": 0.10481631010770798, "learning_rate": 0.01, "loss": 1.9508, "step": 106956 }, { "epoch": 10.98480024648249, "grad_norm": 0.12012122571468353, "learning_rate": 0.01, "loss": 1.9336, "step": 106959 }, { "epoch": 10.98510834959433, "grad_norm": 0.06024550646543503, "learning_rate": 0.01, "loss": 1.9541, "step": 106962 }, { "epoch": 10.985416452706172, "grad_norm": 0.06280180811882019, "learning_rate": 0.01, "loss": 1.9115, "step": 106965 }, { "epoch": 10.985724555818013, "grad_norm": 0.045513831079006195, "learning_rate": 0.01, "loss": 1.9268, "step": 106968 }, { "epoch": 10.986032658929854, "grad_norm": 0.07376816123723984, "learning_rate": 0.01, "loss": 1.9158, "step": 106971 }, { "epoch": 10.986340762041696, "grad_norm": 0.05059327185153961, "learning_rate": 0.01, "loss": 1.9366, "step": 106974 }, { "epoch": 10.986648865153539, "grad_norm": 0.06152288243174553, "learning_rate": 0.01, "loss": 1.9175, "step": 106977 }, { "epoch": 10.98695696826538, "grad_norm": 0.05427305027842522, "learning_rate": 0.01, "loss": 1.9341, "step": 106980 }, { "epoch": 10.987265071377221, "grad_norm": 0.08136880397796631, "learning_rate": 0.01, "loss": 1.9349, "step": 106983 }, { "epoch": 10.987573174489063, "grad_norm": 0.07303925603628159, "learning_rate": 0.01, "loss": 1.9289, "step": 106986 }, { "epoch": 10.987881277600904, "grad_norm": 0.08733496814966202, "learning_rate": 0.01, "loss": 1.9283, "step": 106989 }, { "epoch": 10.988189380712745, "grad_norm": 0.09592670202255249, "learning_rate": 0.01, "loss": 1.9432, "step": 106992 }, { "epoch": 10.988497483824586, "grad_norm": 0.05195619538426399, "learning_rate": 0.01, "loss": 1.9411, "step": 106995 }, { "epoch": 10.988805586936428, "grad_norm": 0.06398551166057587, "learning_rate": 0.01, "loss": 1.9365, "step": 106998 }, { "epoch": 10.989113690048269, "grad_norm": 0.10886025428771973, "learning_rate": 0.01, "loss": 1.9628, "step": 107001 }, { "epoch": 10.98942179316011, "grad_norm": 0.04574966058135033, "learning_rate": 0.01, "loss": 1.9292, "step": 107004 }, { "epoch": 10.989729896271953, "grad_norm": 0.04682609811425209, "learning_rate": 0.01, "loss": 1.9518, "step": 107007 }, { "epoch": 10.990037999383794, "grad_norm": 0.10652533173561096, "learning_rate": 0.01, "loss": 1.9517, "step": 107010 }, { "epoch": 10.990346102495636, "grad_norm": 0.15787062048912048, "learning_rate": 0.01, "loss": 1.9498, "step": 107013 }, { "epoch": 10.990654205607477, "grad_norm": 0.04114218056201935, "learning_rate": 0.01, "loss": 1.921, "step": 107016 }, { "epoch": 10.990962308719318, "grad_norm": 0.05407201871275902, "learning_rate": 0.01, "loss": 1.9039, "step": 107019 }, { "epoch": 10.99127041183116, "grad_norm": 0.12445944547653198, "learning_rate": 0.01, "loss": 1.9396, "step": 107022 }, { "epoch": 10.991578514943, "grad_norm": 0.08229243010282516, "learning_rate": 0.01, "loss": 1.9588, "step": 107025 }, { "epoch": 10.991886618054842, "grad_norm": 0.047715965658426285, "learning_rate": 0.01, "loss": 1.9369, "step": 107028 }, { "epoch": 10.992194721166683, "grad_norm": 0.04792723432183266, "learning_rate": 0.01, "loss": 1.9306, "step": 107031 }, { "epoch": 10.992502824278525, "grad_norm": 0.05532047525048256, "learning_rate": 0.01, "loss": 1.9498, "step": 107034 }, { "epoch": 10.992810927390366, "grad_norm": 0.045810431241989136, "learning_rate": 0.01, "loss": 1.9566, "step": 107037 }, { "epoch": 10.993119030502209, "grad_norm": 0.11059387773275375, "learning_rate": 0.01, "loss": 1.9228, "step": 107040 }, { "epoch": 10.99342713361405, "grad_norm": 0.05643268674612045, "learning_rate": 0.01, "loss": 1.9347, "step": 107043 }, { "epoch": 10.993735236725891, "grad_norm": 0.050066348165273666, "learning_rate": 0.01, "loss": 1.9466, "step": 107046 }, { "epoch": 10.994043339837733, "grad_norm": 0.06248445808887482, "learning_rate": 0.01, "loss": 1.9358, "step": 107049 }, { "epoch": 10.994351442949574, "grad_norm": 0.05078605189919472, "learning_rate": 0.01, "loss": 1.9356, "step": 107052 }, { "epoch": 10.994659546061415, "grad_norm": 0.11182273924350739, "learning_rate": 0.01, "loss": 1.9493, "step": 107055 }, { "epoch": 10.994967649173256, "grad_norm": 0.09776394814252853, "learning_rate": 0.01, "loss": 1.9533, "step": 107058 }, { "epoch": 10.995275752285098, "grad_norm": 0.047179535031318665, "learning_rate": 0.01, "loss": 1.9296, "step": 107061 }, { "epoch": 10.995583855396939, "grad_norm": 0.03766768425703049, "learning_rate": 0.01, "loss": 1.9091, "step": 107064 }, { "epoch": 10.99589195850878, "grad_norm": 0.04526562616229057, "learning_rate": 0.01, "loss": 1.9377, "step": 107067 }, { "epoch": 10.996200061620623, "grad_norm": 0.1715056300163269, "learning_rate": 0.01, "loss": 1.9416, "step": 107070 }, { "epoch": 10.996508164732465, "grad_norm": 0.13546305894851685, "learning_rate": 0.01, "loss": 1.9452, "step": 107073 }, { "epoch": 10.996816267844306, "grad_norm": 0.050267528742551804, "learning_rate": 0.01, "loss": 1.929, "step": 107076 }, { "epoch": 10.997124370956147, "grad_norm": 0.05380922183394432, "learning_rate": 0.01, "loss": 1.9695, "step": 107079 }, { "epoch": 10.997432474067988, "grad_norm": 0.046985093504190445, "learning_rate": 0.01, "loss": 1.9383, "step": 107082 }, { "epoch": 10.99774057717983, "grad_norm": 0.0655679777264595, "learning_rate": 0.01, "loss": 1.9378, "step": 107085 }, { "epoch": 10.99804868029167, "grad_norm": 0.04053609445691109, "learning_rate": 0.01, "loss": 1.937, "step": 107088 }, { "epoch": 10.998356783403512, "grad_norm": 0.03527331352233887, "learning_rate": 0.01, "loss": 1.9386, "step": 107091 }, { "epoch": 10.998664886515353, "grad_norm": 0.033256895840168, "learning_rate": 0.01, "loss": 1.9428, "step": 107094 }, { "epoch": 10.998972989627195, "grad_norm": 0.09747351706027985, "learning_rate": 0.01, "loss": 1.9498, "step": 107097 }, { "epoch": 10.999281092739036, "grad_norm": 0.06879878789186478, "learning_rate": 0.01, "loss": 1.9483, "step": 107100 }, { "epoch": 10.999589195850879, "grad_norm": 0.09804810583591461, "learning_rate": 0.01, "loss": 1.9217, "step": 107103 }, { "epoch": 10.99989729896272, "grad_norm": 0.08518742024898529, "learning_rate": 0.01, "loss": 1.9186, "step": 107106 }, { "epoch": 11.008119218910586, "grad_norm": 0.10636568069458008, "learning_rate": 0.01, "loss": 1.9537, "step": 107109 }, { "epoch": 11.008427543679343, "grad_norm": 0.0721297413110733, "learning_rate": 0.01, "loss": 1.9415, "step": 107112 }, { "epoch": 11.008735868448099, "grad_norm": 0.06640434265136719, "learning_rate": 0.01, "loss": 1.9257, "step": 107115 }, { "epoch": 11.009044193216855, "grad_norm": 0.04051525518298149, "learning_rate": 0.01, "loss": 1.9368, "step": 107118 }, { "epoch": 11.009352517985612, "grad_norm": 0.058860983699560165, "learning_rate": 0.01, "loss": 1.9448, "step": 107121 }, { "epoch": 11.009660842754368, "grad_norm": 0.0524006150662899, "learning_rate": 0.01, "loss": 1.9548, "step": 107124 }, { "epoch": 11.009969167523124, "grad_norm": 0.1158260628581047, "learning_rate": 0.01, "loss": 1.964, "step": 107127 }, { "epoch": 11.01027749229188, "grad_norm": 0.0887027382850647, "learning_rate": 0.01, "loss": 1.9406, "step": 107130 }, { "epoch": 11.010585817060637, "grad_norm": 0.054239798337221146, "learning_rate": 0.01, "loss": 1.9356, "step": 107133 }, { "epoch": 11.010894141829393, "grad_norm": 0.062151018530130386, "learning_rate": 0.01, "loss": 1.9387, "step": 107136 }, { "epoch": 11.01120246659815, "grad_norm": 0.10690824687480927, "learning_rate": 0.01, "loss": 1.9539, "step": 107139 }, { "epoch": 11.011510791366906, "grad_norm": 0.09708454459905624, "learning_rate": 0.01, "loss": 1.9269, "step": 107142 }, { "epoch": 11.011819116135664, "grad_norm": 0.05205899477005005, "learning_rate": 0.01, "loss": 1.9651, "step": 107145 }, { "epoch": 11.01212744090442, "grad_norm": 0.0874539464712143, "learning_rate": 0.01, "loss": 1.9617, "step": 107148 }, { "epoch": 11.012435765673176, "grad_norm": 0.054691608995199203, "learning_rate": 0.01, "loss": 1.9468, "step": 107151 }, { "epoch": 11.012744090441933, "grad_norm": 0.05851634964346886, "learning_rate": 0.01, "loss": 1.9653, "step": 107154 }, { "epoch": 11.013052415210689, "grad_norm": 0.05712060630321503, "learning_rate": 0.01, "loss": 1.9566, "step": 107157 }, { "epoch": 11.013360739979445, "grad_norm": 0.07348459213972092, "learning_rate": 0.01, "loss": 1.9456, "step": 107160 }, { "epoch": 11.013669064748202, "grad_norm": 0.0613148994743824, "learning_rate": 0.01, "loss": 1.9463, "step": 107163 }, { "epoch": 11.013977389516958, "grad_norm": 0.09934704750776291, "learning_rate": 0.01, "loss": 1.9341, "step": 107166 }, { "epoch": 11.014285714285714, "grad_norm": 0.04725782200694084, "learning_rate": 0.01, "loss": 1.9418, "step": 107169 }, { "epoch": 11.01459403905447, "grad_norm": 0.050498757511377335, "learning_rate": 0.01, "loss": 1.9369, "step": 107172 }, { "epoch": 11.014902363823227, "grad_norm": 0.04003717005252838, "learning_rate": 0.01, "loss": 1.9421, "step": 107175 }, { "epoch": 11.015210688591983, "grad_norm": 0.11621799319982529, "learning_rate": 0.01, "loss": 1.9361, "step": 107178 }, { "epoch": 11.01551901336074, "grad_norm": 0.060241155326366425, "learning_rate": 0.01, "loss": 1.9523, "step": 107181 }, { "epoch": 11.015827338129496, "grad_norm": 0.07701603323221207, "learning_rate": 0.01, "loss": 1.9417, "step": 107184 }, { "epoch": 11.016135662898252, "grad_norm": 0.10775244981050491, "learning_rate": 0.01, "loss": 1.952, "step": 107187 }, { "epoch": 11.016443987667008, "grad_norm": 0.1312934160232544, "learning_rate": 0.01, "loss": 1.959, "step": 107190 }, { "epoch": 11.016752312435766, "grad_norm": 0.20268575847148895, "learning_rate": 0.01, "loss": 1.9461, "step": 107193 }, { "epoch": 11.017060637204523, "grad_norm": 0.15342745184898376, "learning_rate": 0.01, "loss": 1.9528, "step": 107196 }, { "epoch": 11.017368961973279, "grad_norm": 0.08270037919282913, "learning_rate": 0.01, "loss": 1.9441, "step": 107199 }, { "epoch": 11.017677286742035, "grad_norm": 0.053513944149017334, "learning_rate": 0.01, "loss": 1.9413, "step": 107202 }, { "epoch": 11.017985611510792, "grad_norm": 0.051726341247558594, "learning_rate": 0.01, "loss": 1.9387, "step": 107205 }, { "epoch": 11.018293936279548, "grad_norm": 0.04074995592236519, "learning_rate": 0.01, "loss": 1.9533, "step": 107208 }, { "epoch": 11.018602261048304, "grad_norm": 0.045796941965818405, "learning_rate": 0.01, "loss": 1.9507, "step": 107211 }, { "epoch": 11.01891058581706, "grad_norm": 0.07273279130458832, "learning_rate": 0.01, "loss": 1.9717, "step": 107214 }, { "epoch": 11.019218910585817, "grad_norm": 0.060059960931539536, "learning_rate": 0.01, "loss": 1.9259, "step": 107217 }, { "epoch": 11.019527235354573, "grad_norm": 0.03590172901749611, "learning_rate": 0.01, "loss": 1.9445, "step": 107220 }, { "epoch": 11.01983556012333, "grad_norm": 0.09819181263446808, "learning_rate": 0.01, "loss": 1.9353, "step": 107223 }, { "epoch": 11.020143884892086, "grad_norm": 0.11009673029184341, "learning_rate": 0.01, "loss": 1.9557, "step": 107226 }, { "epoch": 11.020452209660842, "grad_norm": 0.0914946123957634, "learning_rate": 0.01, "loss": 1.9424, "step": 107229 }, { "epoch": 11.020760534429598, "grad_norm": 0.12640529870986938, "learning_rate": 0.01, "loss": 1.9356, "step": 107232 }, { "epoch": 11.021068859198355, "grad_norm": 0.1176491379737854, "learning_rate": 0.01, "loss": 1.9484, "step": 107235 }, { "epoch": 11.021377183967113, "grad_norm": 0.06920988112688065, "learning_rate": 0.01, "loss": 1.9238, "step": 107238 }, { "epoch": 11.02168550873587, "grad_norm": 0.034434013068675995, "learning_rate": 0.01, "loss": 1.9309, "step": 107241 }, { "epoch": 11.021993833504625, "grad_norm": 0.04428716003894806, "learning_rate": 0.01, "loss": 1.9408, "step": 107244 }, { "epoch": 11.022302158273382, "grad_norm": 0.06214417517185211, "learning_rate": 0.01, "loss": 1.9336, "step": 107247 }, { "epoch": 11.022610483042138, "grad_norm": 0.053140249103307724, "learning_rate": 0.01, "loss": 1.9477, "step": 107250 }, { "epoch": 11.022918807810894, "grad_norm": 0.05963686481118202, "learning_rate": 0.01, "loss": 1.9445, "step": 107253 }, { "epoch": 11.02322713257965, "grad_norm": 0.12098684161901474, "learning_rate": 0.01, "loss": 1.9487, "step": 107256 }, { "epoch": 11.023535457348407, "grad_norm": 0.06041080132126808, "learning_rate": 0.01, "loss": 1.9455, "step": 107259 }, { "epoch": 11.023843782117163, "grad_norm": 0.08065960556268692, "learning_rate": 0.01, "loss": 1.9279, "step": 107262 }, { "epoch": 11.02415210688592, "grad_norm": 0.05811164900660515, "learning_rate": 0.01, "loss": 1.9639, "step": 107265 }, { "epoch": 11.024460431654676, "grad_norm": 0.11217566579580307, "learning_rate": 0.01, "loss": 1.9458, "step": 107268 }, { "epoch": 11.024768756423432, "grad_norm": 0.06718678772449493, "learning_rate": 0.01, "loss": 1.954, "step": 107271 }, { "epoch": 11.025077081192189, "grad_norm": 0.05535897985100746, "learning_rate": 0.01, "loss": 1.9556, "step": 107274 }, { "epoch": 11.025385405960945, "grad_norm": 0.07758058607578278, "learning_rate": 0.01, "loss": 1.9382, "step": 107277 }, { "epoch": 11.025693730729701, "grad_norm": 0.11350016295909882, "learning_rate": 0.01, "loss": 1.9594, "step": 107280 }, { "epoch": 11.02600205549846, "grad_norm": 0.09810200333595276, "learning_rate": 0.01, "loss": 1.9477, "step": 107283 }, { "epoch": 11.026310380267216, "grad_norm": 0.1021398976445198, "learning_rate": 0.01, "loss": 1.9504, "step": 107286 }, { "epoch": 11.026618705035972, "grad_norm": 0.051616981625556946, "learning_rate": 0.01, "loss": 1.9319, "step": 107289 }, { "epoch": 11.026927029804728, "grad_norm": 0.039984527975320816, "learning_rate": 0.01, "loss": 1.9483, "step": 107292 }, { "epoch": 11.027235354573484, "grad_norm": 0.05325577035546303, "learning_rate": 0.01, "loss": 1.9555, "step": 107295 }, { "epoch": 11.02754367934224, "grad_norm": 0.06299860030412674, "learning_rate": 0.01, "loss": 1.9279, "step": 107298 }, { "epoch": 11.027852004110997, "grad_norm": 0.03828146308660507, "learning_rate": 0.01, "loss": 1.9317, "step": 107301 }, { "epoch": 11.028160328879753, "grad_norm": 0.04722427949309349, "learning_rate": 0.01, "loss": 1.9539, "step": 107304 }, { "epoch": 11.02846865364851, "grad_norm": 0.06583574414253235, "learning_rate": 0.01, "loss": 1.9686, "step": 107307 }, { "epoch": 11.028776978417266, "grad_norm": 0.1033751517534256, "learning_rate": 0.01, "loss": 1.9398, "step": 107310 }, { "epoch": 11.029085303186022, "grad_norm": 0.08672161400318146, "learning_rate": 0.01, "loss": 1.9315, "step": 107313 }, { "epoch": 11.029393627954779, "grad_norm": 0.04516211152076721, "learning_rate": 0.01, "loss": 1.917, "step": 107316 }, { "epoch": 11.029701952723535, "grad_norm": 0.03711722046136856, "learning_rate": 0.01, "loss": 1.9279, "step": 107319 }, { "epoch": 11.030010277492291, "grad_norm": 0.033995870500802994, "learning_rate": 0.01, "loss": 1.9434, "step": 107322 }, { "epoch": 11.030318602261048, "grad_norm": 0.04240257292985916, "learning_rate": 0.01, "loss": 1.9236, "step": 107325 }, { "epoch": 11.030626927029804, "grad_norm": 0.06180763244628906, "learning_rate": 0.01, "loss": 1.9319, "step": 107328 }, { "epoch": 11.030935251798562, "grad_norm": 0.13815215229988098, "learning_rate": 0.01, "loss": 1.9393, "step": 107331 }, { "epoch": 11.031243576567318, "grad_norm": 0.10653617233037949, "learning_rate": 0.01, "loss": 1.9743, "step": 107334 }, { "epoch": 11.031551901336075, "grad_norm": 0.05722755938768387, "learning_rate": 0.01, "loss": 1.9438, "step": 107337 }, { "epoch": 11.03186022610483, "grad_norm": 0.044469717890024185, "learning_rate": 0.01, "loss": 1.9523, "step": 107340 }, { "epoch": 11.032168550873587, "grad_norm": 0.029477711766958237, "learning_rate": 0.01, "loss": 1.9658, "step": 107343 }, { "epoch": 11.032476875642343, "grad_norm": 0.05988297611474991, "learning_rate": 0.01, "loss": 1.9573, "step": 107346 }, { "epoch": 11.0327852004111, "grad_norm": 0.11533161252737045, "learning_rate": 0.01, "loss": 1.9331, "step": 107349 }, { "epoch": 11.033093525179856, "grad_norm": 0.09519381821155548, "learning_rate": 0.01, "loss": 1.9414, "step": 107352 }, { "epoch": 11.033401849948612, "grad_norm": 0.06721679866313934, "learning_rate": 0.01, "loss": 1.956, "step": 107355 }, { "epoch": 11.033710174717369, "grad_norm": 0.051016733050346375, "learning_rate": 0.01, "loss": 1.9784, "step": 107358 }, { "epoch": 11.034018499486125, "grad_norm": 0.06869174540042877, "learning_rate": 0.01, "loss": 1.9279, "step": 107361 }, { "epoch": 11.034326824254881, "grad_norm": 0.038322094827890396, "learning_rate": 0.01, "loss": 1.9521, "step": 107364 }, { "epoch": 11.034635149023638, "grad_norm": 0.17970217764377594, "learning_rate": 0.01, "loss": 1.9382, "step": 107367 }, { "epoch": 11.034943473792394, "grad_norm": 0.0593324676156044, "learning_rate": 0.01, "loss": 1.9408, "step": 107370 }, { "epoch": 11.03525179856115, "grad_norm": 0.03376584127545357, "learning_rate": 0.01, "loss": 1.9606, "step": 107373 }, { "epoch": 11.035560123329908, "grad_norm": 0.03950095176696777, "learning_rate": 0.01, "loss": 1.9395, "step": 107376 }, { "epoch": 11.035868448098665, "grad_norm": 0.03454350680112839, "learning_rate": 0.01, "loss": 1.9501, "step": 107379 }, { "epoch": 11.036176772867421, "grad_norm": 0.037530187517404556, "learning_rate": 0.01, "loss": 1.964, "step": 107382 }, { "epoch": 11.036485097636177, "grad_norm": 0.13739140331745148, "learning_rate": 0.01, "loss": 1.9454, "step": 107385 }, { "epoch": 11.036793422404934, "grad_norm": 0.06409517675638199, "learning_rate": 0.01, "loss": 1.9626, "step": 107388 }, { "epoch": 11.03710174717369, "grad_norm": 0.07674208283424377, "learning_rate": 0.01, "loss": 1.9493, "step": 107391 }, { "epoch": 11.037410071942446, "grad_norm": 0.04071156308054924, "learning_rate": 0.01, "loss": 1.9433, "step": 107394 }, { "epoch": 11.037718396711202, "grad_norm": 0.04651397839188576, "learning_rate": 0.01, "loss": 1.9528, "step": 107397 }, { "epoch": 11.038026721479959, "grad_norm": 0.04479502886533737, "learning_rate": 0.01, "loss": 1.9659, "step": 107400 }, { "epoch": 11.038335046248715, "grad_norm": 0.03764665499329567, "learning_rate": 0.01, "loss": 1.9423, "step": 107403 }, { "epoch": 11.038643371017471, "grad_norm": 0.03378015384078026, "learning_rate": 0.01, "loss": 1.9637, "step": 107406 }, { "epoch": 11.038951695786228, "grad_norm": 0.04425158351659775, "learning_rate": 0.01, "loss": 1.9453, "step": 107409 }, { "epoch": 11.039260020554984, "grad_norm": 0.07492903620004654, "learning_rate": 0.01, "loss": 1.9136, "step": 107412 }, { "epoch": 11.03956834532374, "grad_norm": 0.15413755178451538, "learning_rate": 0.01, "loss": 1.9509, "step": 107415 }, { "epoch": 11.039876670092497, "grad_norm": 0.06271611899137497, "learning_rate": 0.01, "loss": 1.963, "step": 107418 }, { "epoch": 11.040184994861255, "grad_norm": 0.08407186716794968, "learning_rate": 0.01, "loss": 1.9516, "step": 107421 }, { "epoch": 11.040493319630011, "grad_norm": 0.07308407872915268, "learning_rate": 0.01, "loss": 1.9683, "step": 107424 }, { "epoch": 11.040801644398767, "grad_norm": 0.0837329849600792, "learning_rate": 0.01, "loss": 1.9487, "step": 107427 }, { "epoch": 11.041109969167524, "grad_norm": 0.07305663079023361, "learning_rate": 0.01, "loss": 1.9317, "step": 107430 }, { "epoch": 11.04141829393628, "grad_norm": 0.06407970190048218, "learning_rate": 0.01, "loss": 1.9381, "step": 107433 }, { "epoch": 11.041726618705036, "grad_norm": 0.06141374260187149, "learning_rate": 0.01, "loss": 1.9343, "step": 107436 }, { "epoch": 11.042034943473793, "grad_norm": 0.12983256578445435, "learning_rate": 0.01, "loss": 1.9425, "step": 107439 }, { "epoch": 11.042343268242549, "grad_norm": 0.1181797981262207, "learning_rate": 0.01, "loss": 1.9516, "step": 107442 }, { "epoch": 11.042651593011305, "grad_norm": 0.06571821868419647, "learning_rate": 0.01, "loss": 1.9288, "step": 107445 }, { "epoch": 11.042959917780061, "grad_norm": 0.047969378530979156, "learning_rate": 0.01, "loss": 1.961, "step": 107448 }, { "epoch": 11.043268242548818, "grad_norm": 0.04128817096352577, "learning_rate": 0.01, "loss": 1.9417, "step": 107451 }, { "epoch": 11.043576567317574, "grad_norm": 0.0404379703104496, "learning_rate": 0.01, "loss": 1.9616, "step": 107454 }, { "epoch": 11.04388489208633, "grad_norm": 0.07635513693094254, "learning_rate": 0.01, "loss": 1.9584, "step": 107457 }, { "epoch": 11.044193216855087, "grad_norm": 0.09891491383314133, "learning_rate": 0.01, "loss": 1.9715, "step": 107460 }, { "epoch": 11.044501541623843, "grad_norm": 0.08441543579101562, "learning_rate": 0.01, "loss": 1.9407, "step": 107463 }, { "epoch": 11.044809866392601, "grad_norm": 0.10859276354312897, "learning_rate": 0.01, "loss": 1.9794, "step": 107466 }, { "epoch": 11.045118191161357, "grad_norm": 0.10530184209346771, "learning_rate": 0.01, "loss": 1.913, "step": 107469 }, { "epoch": 11.045426515930114, "grad_norm": 0.04014790803194046, "learning_rate": 0.01, "loss": 1.9615, "step": 107472 }, { "epoch": 11.04573484069887, "grad_norm": 0.09878093749284744, "learning_rate": 0.01, "loss": 1.9602, "step": 107475 }, { "epoch": 11.046043165467626, "grad_norm": 0.08944501727819443, "learning_rate": 0.01, "loss": 1.9466, "step": 107478 }, { "epoch": 11.046351490236383, "grad_norm": 0.03847057744860649, "learning_rate": 0.01, "loss": 1.9256, "step": 107481 }, { "epoch": 11.046659815005139, "grad_norm": 0.06581749767065048, "learning_rate": 0.01, "loss": 1.9293, "step": 107484 }, { "epoch": 11.046968139773895, "grad_norm": 0.08069849759340286, "learning_rate": 0.01, "loss": 1.9628, "step": 107487 }, { "epoch": 11.047276464542652, "grad_norm": 0.049780331552028656, "learning_rate": 0.01, "loss": 1.9189, "step": 107490 }, { "epoch": 11.047584789311408, "grad_norm": 0.06270769983530045, "learning_rate": 0.01, "loss": 1.9314, "step": 107493 }, { "epoch": 11.047893114080164, "grad_norm": 0.055340807884931564, "learning_rate": 0.01, "loss": 1.9495, "step": 107496 }, { "epoch": 11.04820143884892, "grad_norm": 0.09283401072025299, "learning_rate": 0.01, "loss": 1.9522, "step": 107499 }, { "epoch": 11.048509763617677, "grad_norm": 0.0609809048473835, "learning_rate": 0.01, "loss": 1.9499, "step": 107502 }, { "epoch": 11.048818088386433, "grad_norm": 0.13390469551086426, "learning_rate": 0.01, "loss": 1.9491, "step": 107505 }, { "epoch": 11.04912641315519, "grad_norm": 0.0418582409620285, "learning_rate": 0.01, "loss": 1.9676, "step": 107508 }, { "epoch": 11.049434737923946, "grad_norm": 0.06463424861431122, "learning_rate": 0.01, "loss": 1.9561, "step": 107511 }, { "epoch": 11.049743062692704, "grad_norm": 0.05462968349456787, "learning_rate": 0.01, "loss": 1.9224, "step": 107514 }, { "epoch": 11.05005138746146, "grad_norm": 0.040294911712408066, "learning_rate": 0.01, "loss": 1.9564, "step": 107517 }, { "epoch": 11.050359712230216, "grad_norm": 0.07260268926620483, "learning_rate": 0.01, "loss": 1.9686, "step": 107520 }, { "epoch": 11.050668036998973, "grad_norm": 0.10488167405128479, "learning_rate": 0.01, "loss": 1.9537, "step": 107523 }, { "epoch": 11.050976361767729, "grad_norm": 0.043960295617580414, "learning_rate": 0.01, "loss": 1.9299, "step": 107526 }, { "epoch": 11.051284686536485, "grad_norm": 0.0684865415096283, "learning_rate": 0.01, "loss": 1.9484, "step": 107529 }, { "epoch": 11.051593011305242, "grad_norm": 0.08690311014652252, "learning_rate": 0.01, "loss": 1.9495, "step": 107532 }, { "epoch": 11.051901336073998, "grad_norm": 0.053025078028440475, "learning_rate": 0.01, "loss": 1.9294, "step": 107535 }, { "epoch": 11.052209660842754, "grad_norm": 0.07714751362800598, "learning_rate": 0.01, "loss": 1.9464, "step": 107538 }, { "epoch": 11.05251798561151, "grad_norm": 0.06200997158885002, "learning_rate": 0.01, "loss": 1.965, "step": 107541 }, { "epoch": 11.052826310380267, "grad_norm": 0.08136401325464249, "learning_rate": 0.01, "loss": 1.9213, "step": 107544 }, { "epoch": 11.053134635149023, "grad_norm": 0.047901589423418045, "learning_rate": 0.01, "loss": 1.9401, "step": 107547 }, { "epoch": 11.05344295991778, "grad_norm": 0.03807402402162552, "learning_rate": 0.01, "loss": 1.9174, "step": 107550 }, { "epoch": 11.053751284686536, "grad_norm": 0.06316124647855759, "learning_rate": 0.01, "loss": 1.9229, "step": 107553 }, { "epoch": 11.054059609455292, "grad_norm": 0.05572817102074623, "learning_rate": 0.01, "loss": 1.9392, "step": 107556 }, { "epoch": 11.05436793422405, "grad_norm": 0.04559632018208504, "learning_rate": 0.01, "loss": 1.9405, "step": 107559 }, { "epoch": 11.054676258992806, "grad_norm": 0.07237659394741058, "learning_rate": 0.01, "loss": 1.9452, "step": 107562 }, { "epoch": 11.054984583761563, "grad_norm": 0.092888742685318, "learning_rate": 0.01, "loss": 1.971, "step": 107565 }, { "epoch": 11.055292908530319, "grad_norm": 0.08010265231132507, "learning_rate": 0.01, "loss": 1.939, "step": 107568 }, { "epoch": 11.055601233299075, "grad_norm": 0.08017858117818832, "learning_rate": 0.01, "loss": 1.9589, "step": 107571 }, { "epoch": 11.055909558067832, "grad_norm": 0.07664944976568222, "learning_rate": 0.01, "loss": 1.9431, "step": 107574 }, { "epoch": 11.056217882836588, "grad_norm": 0.04926150292158127, "learning_rate": 0.01, "loss": 1.9364, "step": 107577 }, { "epoch": 11.056526207605344, "grad_norm": 0.04636576399207115, "learning_rate": 0.01, "loss": 1.9509, "step": 107580 }, { "epoch": 11.0568345323741, "grad_norm": 0.053231917321681976, "learning_rate": 0.01, "loss": 1.9586, "step": 107583 }, { "epoch": 11.057142857142857, "grad_norm": 0.06848327815532684, "learning_rate": 0.01, "loss": 1.979, "step": 107586 }, { "epoch": 11.057451181911613, "grad_norm": 0.05932049825787544, "learning_rate": 0.01, "loss": 1.9294, "step": 107589 }, { "epoch": 11.05775950668037, "grad_norm": 0.040187012404203415, "learning_rate": 0.01, "loss": 1.912, "step": 107592 }, { "epoch": 11.058067831449126, "grad_norm": 0.21725735068321228, "learning_rate": 0.01, "loss": 1.9563, "step": 107595 }, { "epoch": 11.058376156217882, "grad_norm": 0.10536807775497437, "learning_rate": 0.01, "loss": 1.9389, "step": 107598 }, { "epoch": 11.058684480986638, "grad_norm": 0.07488951086997986, "learning_rate": 0.01, "loss": 1.9394, "step": 107601 }, { "epoch": 11.058992805755397, "grad_norm": 0.04247715696692467, "learning_rate": 0.01, "loss": 1.9342, "step": 107604 }, { "epoch": 11.059301130524153, "grad_norm": 0.033919550478458405, "learning_rate": 0.01, "loss": 1.9495, "step": 107607 }, { "epoch": 11.05960945529291, "grad_norm": 0.04327520728111267, "learning_rate": 0.01, "loss": 1.9236, "step": 107610 }, { "epoch": 11.059917780061665, "grad_norm": 0.08149202167987823, "learning_rate": 0.01, "loss": 1.9533, "step": 107613 }, { "epoch": 11.060226104830422, "grad_norm": 0.04497545585036278, "learning_rate": 0.01, "loss": 1.9268, "step": 107616 }, { "epoch": 11.060534429599178, "grad_norm": 0.049667440354824066, "learning_rate": 0.01, "loss": 1.9344, "step": 107619 }, { "epoch": 11.060842754367934, "grad_norm": 0.040313173085451126, "learning_rate": 0.01, "loss": 1.942, "step": 107622 }, { "epoch": 11.06115107913669, "grad_norm": 0.042108844965696335, "learning_rate": 0.01, "loss": 1.9493, "step": 107625 }, { "epoch": 11.061459403905447, "grad_norm": 0.13305234909057617, "learning_rate": 0.01, "loss": 1.9383, "step": 107628 }, { "epoch": 11.061767728674203, "grad_norm": 0.05455876141786575, "learning_rate": 0.01, "loss": 1.9359, "step": 107631 }, { "epoch": 11.06207605344296, "grad_norm": 0.07920195162296295, "learning_rate": 0.01, "loss": 1.9183, "step": 107634 }, { "epoch": 11.062384378211716, "grad_norm": 0.07053566724061966, "learning_rate": 0.01, "loss": 1.9549, "step": 107637 }, { "epoch": 11.062692702980472, "grad_norm": 0.0445457398891449, "learning_rate": 0.01, "loss": 1.9555, "step": 107640 }, { "epoch": 11.063001027749229, "grad_norm": 0.044817425310611725, "learning_rate": 0.01, "loss": 1.9535, "step": 107643 }, { "epoch": 11.063309352517985, "grad_norm": 0.12310293316841125, "learning_rate": 0.01, "loss": 1.9518, "step": 107646 }, { "epoch": 11.063617677286741, "grad_norm": 0.04152672365307808, "learning_rate": 0.01, "loss": 1.9467, "step": 107649 }, { "epoch": 11.0639260020555, "grad_norm": 0.07941756397485733, "learning_rate": 0.01, "loss": 1.9564, "step": 107652 }, { "epoch": 11.064234326824256, "grad_norm": 0.04817092791199684, "learning_rate": 0.01, "loss": 1.9437, "step": 107655 }, { "epoch": 11.064542651593012, "grad_norm": 0.04424908757209778, "learning_rate": 0.01, "loss": 1.9237, "step": 107658 }, { "epoch": 11.064850976361768, "grad_norm": 0.1192043200135231, "learning_rate": 0.01, "loss": 1.963, "step": 107661 }, { "epoch": 11.065159301130524, "grad_norm": 0.07869096100330353, "learning_rate": 0.01, "loss": 1.9544, "step": 107664 }, { "epoch": 11.06546762589928, "grad_norm": 0.06656874716281891, "learning_rate": 0.01, "loss": 1.9667, "step": 107667 }, { "epoch": 11.065775950668037, "grad_norm": 0.04989762231707573, "learning_rate": 0.01, "loss": 1.9158, "step": 107670 }, { "epoch": 11.066084275436793, "grad_norm": 0.06311410665512085, "learning_rate": 0.01, "loss": 1.9324, "step": 107673 }, { "epoch": 11.06639260020555, "grad_norm": 0.059550900012254715, "learning_rate": 0.01, "loss": 1.9378, "step": 107676 }, { "epoch": 11.066700924974306, "grad_norm": 0.058306027203798294, "learning_rate": 0.01, "loss": 1.9403, "step": 107679 }, { "epoch": 11.067009249743062, "grad_norm": 0.042203981429338455, "learning_rate": 0.01, "loss": 1.9269, "step": 107682 }, { "epoch": 11.067317574511819, "grad_norm": 0.03726264089345932, "learning_rate": 0.01, "loss": 1.9498, "step": 107685 }, { "epoch": 11.067625899280575, "grad_norm": 0.06475093215703964, "learning_rate": 0.01, "loss": 1.9489, "step": 107688 }, { "epoch": 11.067934224049331, "grad_norm": 0.06762620806694031, "learning_rate": 0.01, "loss": 1.9368, "step": 107691 }, { "epoch": 11.068242548818088, "grad_norm": 0.11405432224273682, "learning_rate": 0.01, "loss": 1.9316, "step": 107694 }, { "epoch": 11.068550873586846, "grad_norm": 0.07248923927545547, "learning_rate": 0.01, "loss": 1.9251, "step": 107697 }, { "epoch": 11.068859198355602, "grad_norm": 0.12362180650234222, "learning_rate": 0.01, "loss": 1.9117, "step": 107700 }, { "epoch": 11.069167523124358, "grad_norm": 0.05302852764725685, "learning_rate": 0.01, "loss": 1.964, "step": 107703 }, { "epoch": 11.069475847893115, "grad_norm": 0.05188078433275223, "learning_rate": 0.01, "loss": 1.9603, "step": 107706 }, { "epoch": 11.06978417266187, "grad_norm": 0.11408820003271103, "learning_rate": 0.01, "loss": 1.9439, "step": 107709 }, { "epoch": 11.070092497430627, "grad_norm": 0.06259067356586456, "learning_rate": 0.01, "loss": 1.9424, "step": 107712 }, { "epoch": 11.070400822199383, "grad_norm": 0.042797449976205826, "learning_rate": 0.01, "loss": 1.9545, "step": 107715 }, { "epoch": 11.07070914696814, "grad_norm": 0.03658578172326088, "learning_rate": 0.01, "loss": 1.9464, "step": 107718 }, { "epoch": 11.071017471736896, "grad_norm": 0.09516727924346924, "learning_rate": 0.01, "loss": 1.9512, "step": 107721 }, { "epoch": 11.071325796505652, "grad_norm": 0.0809289962053299, "learning_rate": 0.01, "loss": 1.9626, "step": 107724 }, { "epoch": 11.071634121274409, "grad_norm": 0.04550709202885628, "learning_rate": 0.01, "loss": 1.9449, "step": 107727 }, { "epoch": 11.071942446043165, "grad_norm": 0.09415169060230255, "learning_rate": 0.01, "loss": 1.9223, "step": 107730 }, { "epoch": 11.072250770811921, "grad_norm": 0.11107844114303589, "learning_rate": 0.01, "loss": 1.9492, "step": 107733 }, { "epoch": 11.072559095580678, "grad_norm": 0.045347124338150024, "learning_rate": 0.01, "loss": 1.9161, "step": 107736 }, { "epoch": 11.072867420349434, "grad_norm": 0.07700034976005554, "learning_rate": 0.01, "loss": 1.9552, "step": 107739 }, { "epoch": 11.073175745118192, "grad_norm": 0.07656693458557129, "learning_rate": 0.01, "loss": 1.9516, "step": 107742 }, { "epoch": 11.073484069886948, "grad_norm": 0.038799382746219635, "learning_rate": 0.01, "loss": 1.9324, "step": 107745 }, { "epoch": 11.073792394655705, "grad_norm": 0.09669993072748184, "learning_rate": 0.01, "loss": 1.9352, "step": 107748 }, { "epoch": 11.074100719424461, "grad_norm": 0.06955236941576004, "learning_rate": 0.01, "loss": 1.9658, "step": 107751 }, { "epoch": 11.074409044193217, "grad_norm": 0.05242108553647995, "learning_rate": 0.01, "loss": 1.9509, "step": 107754 }, { "epoch": 11.074717368961974, "grad_norm": 0.07128479331731796, "learning_rate": 0.01, "loss": 1.9284, "step": 107757 }, { "epoch": 11.07502569373073, "grad_norm": 0.1151743233203888, "learning_rate": 0.01, "loss": 1.9333, "step": 107760 }, { "epoch": 11.075334018499486, "grad_norm": 0.04790006950497627, "learning_rate": 0.01, "loss": 1.9504, "step": 107763 }, { "epoch": 11.075642343268242, "grad_norm": 0.03786927089095116, "learning_rate": 0.01, "loss": 1.927, "step": 107766 }, { "epoch": 11.075950668036999, "grad_norm": 0.16423648595809937, "learning_rate": 0.01, "loss": 1.9391, "step": 107769 }, { "epoch": 11.076258992805755, "grad_norm": 0.0376586988568306, "learning_rate": 0.01, "loss": 1.9381, "step": 107772 }, { "epoch": 11.076567317574511, "grad_norm": 0.0358593575656414, "learning_rate": 0.01, "loss": 1.9265, "step": 107775 }, { "epoch": 11.076875642343268, "grad_norm": 0.0530848503112793, "learning_rate": 0.01, "loss": 1.9579, "step": 107778 }, { "epoch": 11.077183967112024, "grad_norm": 0.04405958577990532, "learning_rate": 0.01, "loss": 1.9419, "step": 107781 }, { "epoch": 11.07749229188078, "grad_norm": 0.04691191390156746, "learning_rate": 0.01, "loss": 1.9363, "step": 107784 }, { "epoch": 11.077800616649537, "grad_norm": 0.09400805830955505, "learning_rate": 0.01, "loss": 1.9487, "step": 107787 }, { "epoch": 11.078108941418295, "grad_norm": 0.04769006371498108, "learning_rate": 0.01, "loss": 1.9151, "step": 107790 }, { "epoch": 11.078417266187051, "grad_norm": 0.22307366132736206, "learning_rate": 0.01, "loss": 1.9678, "step": 107793 }, { "epoch": 11.078725590955807, "grad_norm": 0.08780405670404434, "learning_rate": 0.01, "loss": 1.9557, "step": 107796 }, { "epoch": 11.079033915724564, "grad_norm": 0.06174111366271973, "learning_rate": 0.01, "loss": 1.9462, "step": 107799 }, { "epoch": 11.07934224049332, "grad_norm": 0.0673871785402298, "learning_rate": 0.01, "loss": 1.936, "step": 107802 }, { "epoch": 11.079650565262076, "grad_norm": 0.04536505043506622, "learning_rate": 0.01, "loss": 1.9367, "step": 107805 }, { "epoch": 11.079958890030833, "grad_norm": 0.06448676437139511, "learning_rate": 0.01, "loss": 1.9534, "step": 107808 }, { "epoch": 11.080267214799589, "grad_norm": 0.03986390680074692, "learning_rate": 0.01, "loss": 1.9598, "step": 107811 }, { "epoch": 11.080575539568345, "grad_norm": 0.03483456373214722, "learning_rate": 0.01, "loss": 1.9395, "step": 107814 }, { "epoch": 11.080883864337101, "grad_norm": 0.04754864424467087, "learning_rate": 0.01, "loss": 1.9782, "step": 107817 }, { "epoch": 11.081192189105858, "grad_norm": 0.04833069071173668, "learning_rate": 0.01, "loss": 1.9593, "step": 107820 }, { "epoch": 11.081500513874614, "grad_norm": 0.044434234499931335, "learning_rate": 0.01, "loss": 1.922, "step": 107823 }, { "epoch": 11.08180883864337, "grad_norm": 0.04959580674767494, "learning_rate": 0.01, "loss": 1.9494, "step": 107826 }, { "epoch": 11.082117163412127, "grad_norm": 0.16906990110874176, "learning_rate": 0.01, "loss": 1.9383, "step": 107829 }, { "epoch": 11.082425488180883, "grad_norm": 0.04020620882511139, "learning_rate": 0.01, "loss": 1.9495, "step": 107832 }, { "epoch": 11.082733812949641, "grad_norm": 0.04799540340900421, "learning_rate": 0.01, "loss": 1.9384, "step": 107835 }, { "epoch": 11.083042137718397, "grad_norm": 0.06791256368160248, "learning_rate": 0.01, "loss": 1.938, "step": 107838 }, { "epoch": 11.083350462487154, "grad_norm": 0.06625140458345413, "learning_rate": 0.01, "loss": 1.9616, "step": 107841 }, { "epoch": 11.08365878725591, "grad_norm": 0.04998164623975754, "learning_rate": 0.01, "loss": 1.9459, "step": 107844 }, { "epoch": 11.083967112024666, "grad_norm": 0.04162587970495224, "learning_rate": 0.01, "loss": 1.9603, "step": 107847 }, { "epoch": 11.084275436793423, "grad_norm": 0.049901679158210754, "learning_rate": 0.01, "loss": 1.9558, "step": 107850 }, { "epoch": 11.084583761562179, "grad_norm": 0.04299694299697876, "learning_rate": 0.01, "loss": 1.9464, "step": 107853 }, { "epoch": 11.084892086330935, "grad_norm": 0.04808099940419197, "learning_rate": 0.01, "loss": 1.9506, "step": 107856 }, { "epoch": 11.085200411099692, "grad_norm": 0.09989727288484573, "learning_rate": 0.01, "loss": 1.9255, "step": 107859 }, { "epoch": 11.085508735868448, "grad_norm": 0.047710586339235306, "learning_rate": 0.01, "loss": 1.9446, "step": 107862 }, { "epoch": 11.085817060637204, "grad_norm": 0.06054253876209259, "learning_rate": 0.01, "loss": 1.9583, "step": 107865 }, { "epoch": 11.08612538540596, "grad_norm": 0.05632666498422623, "learning_rate": 0.01, "loss": 1.9466, "step": 107868 }, { "epoch": 11.086433710174717, "grad_norm": 0.046154700219631195, "learning_rate": 0.01, "loss": 1.9405, "step": 107871 }, { "epoch": 11.086742034943473, "grad_norm": 0.13304947316646576, "learning_rate": 0.01, "loss": 1.956, "step": 107874 }, { "epoch": 11.08705035971223, "grad_norm": 0.06604369729757309, "learning_rate": 0.01, "loss": 1.9456, "step": 107877 }, { "epoch": 11.087358684480987, "grad_norm": 0.04450514167547226, "learning_rate": 0.01, "loss": 1.9464, "step": 107880 }, { "epoch": 11.087667009249744, "grad_norm": 0.03248503804206848, "learning_rate": 0.01, "loss": 1.9363, "step": 107883 }, { "epoch": 11.0879753340185, "grad_norm": 0.054392118006944656, "learning_rate": 0.01, "loss": 1.9527, "step": 107886 }, { "epoch": 11.088283658787256, "grad_norm": 0.07481544464826584, "learning_rate": 0.01, "loss": 1.9231, "step": 107889 }, { "epoch": 11.088591983556013, "grad_norm": 0.11516573280096054, "learning_rate": 0.01, "loss": 1.9547, "step": 107892 }, { "epoch": 11.088900308324769, "grad_norm": 0.06140699237585068, "learning_rate": 0.01, "loss": 1.9667, "step": 107895 }, { "epoch": 11.089208633093525, "grad_norm": 0.04883519187569618, "learning_rate": 0.01, "loss": 1.9395, "step": 107898 }, { "epoch": 11.089516957862282, "grad_norm": 0.03312469646334648, "learning_rate": 0.01, "loss": 1.9347, "step": 107901 }, { "epoch": 11.089825282631038, "grad_norm": 0.10212545096874237, "learning_rate": 0.01, "loss": 1.9659, "step": 107904 }, { "epoch": 11.090133607399794, "grad_norm": 0.09011369198560715, "learning_rate": 0.01, "loss": 1.937, "step": 107907 }, { "epoch": 11.09044193216855, "grad_norm": 0.08168302476406097, "learning_rate": 0.01, "loss": 1.9562, "step": 107910 }, { "epoch": 11.090750256937307, "grad_norm": 0.09307815134525299, "learning_rate": 0.01, "loss": 1.9442, "step": 107913 }, { "epoch": 11.091058581706063, "grad_norm": 0.06893011182546616, "learning_rate": 0.01, "loss": 1.9492, "step": 107916 }, { "epoch": 11.09136690647482, "grad_norm": 0.11134271323680878, "learning_rate": 0.01, "loss": 1.9291, "step": 107919 }, { "epoch": 11.091675231243576, "grad_norm": 0.06912576407194138, "learning_rate": 0.01, "loss": 1.9439, "step": 107922 }, { "epoch": 11.091983556012334, "grad_norm": 0.052220139652490616, "learning_rate": 0.01, "loss": 1.9514, "step": 107925 }, { "epoch": 11.09229188078109, "grad_norm": 0.05896226316690445, "learning_rate": 0.01, "loss": 1.9349, "step": 107928 }, { "epoch": 11.092600205549846, "grad_norm": 0.059881266206502914, "learning_rate": 0.01, "loss": 1.9594, "step": 107931 }, { "epoch": 11.092908530318603, "grad_norm": 0.07263971865177155, "learning_rate": 0.01, "loss": 1.9565, "step": 107934 }, { "epoch": 11.093216855087359, "grad_norm": 0.048972729593515396, "learning_rate": 0.01, "loss": 1.9535, "step": 107937 }, { "epoch": 11.093525179856115, "grad_norm": 0.03646502271294594, "learning_rate": 0.01, "loss": 1.9393, "step": 107940 }, { "epoch": 11.093833504624872, "grad_norm": 0.03585082292556763, "learning_rate": 0.01, "loss": 1.9367, "step": 107943 }, { "epoch": 11.094141829393628, "grad_norm": 0.05213327333331108, "learning_rate": 0.01, "loss": 1.9205, "step": 107946 }, { "epoch": 11.094450154162384, "grad_norm": 0.056754935532808304, "learning_rate": 0.01, "loss": 1.9606, "step": 107949 }, { "epoch": 11.09475847893114, "grad_norm": 0.06303158402442932, "learning_rate": 0.01, "loss": 1.9497, "step": 107952 }, { "epoch": 11.095066803699897, "grad_norm": 0.04736315459012985, "learning_rate": 0.01, "loss": 1.9327, "step": 107955 }, { "epoch": 11.095375128468653, "grad_norm": 0.04783078655600548, "learning_rate": 0.01, "loss": 1.941, "step": 107958 }, { "epoch": 11.09568345323741, "grad_norm": 0.0429544523358345, "learning_rate": 0.01, "loss": 1.9526, "step": 107961 }, { "epoch": 11.095991778006166, "grad_norm": 0.10795792192220688, "learning_rate": 0.01, "loss": 1.9181, "step": 107964 }, { "epoch": 11.096300102774922, "grad_norm": 0.051838699728250504, "learning_rate": 0.01, "loss": 1.9486, "step": 107967 }, { "epoch": 11.096608427543678, "grad_norm": 0.14740125834941864, "learning_rate": 0.01, "loss": 1.9672, "step": 107970 }, { "epoch": 11.096916752312437, "grad_norm": 0.10696642845869064, "learning_rate": 0.01, "loss": 1.9516, "step": 107973 }, { "epoch": 11.097225077081193, "grad_norm": 0.04325505718588829, "learning_rate": 0.01, "loss": 1.9421, "step": 107976 }, { "epoch": 11.09753340184995, "grad_norm": 0.03891993314027786, "learning_rate": 0.01, "loss": 1.9354, "step": 107979 }, { "epoch": 11.097841726618705, "grad_norm": 0.06790821254253387, "learning_rate": 0.01, "loss": 1.9492, "step": 107982 }, { "epoch": 11.098150051387462, "grad_norm": 0.0682348757982254, "learning_rate": 0.01, "loss": 1.9489, "step": 107985 }, { "epoch": 11.098458376156218, "grad_norm": 0.04477721452713013, "learning_rate": 0.01, "loss": 1.9458, "step": 107988 }, { "epoch": 11.098766700924974, "grad_norm": 0.1636943221092224, "learning_rate": 0.01, "loss": 1.9227, "step": 107991 }, { "epoch": 11.09907502569373, "grad_norm": 0.10245567560195923, "learning_rate": 0.01, "loss": 1.9498, "step": 107994 }, { "epoch": 11.099383350462487, "grad_norm": 0.09457534551620483, "learning_rate": 0.01, "loss": 1.9504, "step": 107997 }, { "epoch": 11.099691675231243, "grad_norm": 0.06260280311107635, "learning_rate": 0.01, "loss": 1.9439, "step": 108000 }, { "epoch": 11.1, "grad_norm": 0.04717349261045456, "learning_rate": 0.01, "loss": 1.9528, "step": 108003 }, { "epoch": 11.100308324768756, "grad_norm": 0.0400058813393116, "learning_rate": 0.01, "loss": 1.9483, "step": 108006 }, { "epoch": 11.100616649537512, "grad_norm": 0.04016717150807381, "learning_rate": 0.01, "loss": 1.9488, "step": 108009 }, { "epoch": 11.100924974306269, "grad_norm": 0.03915245458483696, "learning_rate": 0.01, "loss": 1.9554, "step": 108012 }, { "epoch": 11.101233299075025, "grad_norm": 0.050850559026002884, "learning_rate": 0.01, "loss": 1.9324, "step": 108015 }, { "epoch": 11.101541623843783, "grad_norm": 0.055654145777225494, "learning_rate": 0.01, "loss": 1.9389, "step": 108018 }, { "epoch": 11.10184994861254, "grad_norm": 0.1390688419342041, "learning_rate": 0.01, "loss": 1.9441, "step": 108021 }, { "epoch": 11.102158273381296, "grad_norm": 0.044200409203767776, "learning_rate": 0.01, "loss": 1.9503, "step": 108024 }, { "epoch": 11.102466598150052, "grad_norm": 0.03880887106060982, "learning_rate": 0.01, "loss": 1.9487, "step": 108027 }, { "epoch": 11.102774922918808, "grad_norm": 0.08580953627824783, "learning_rate": 0.01, "loss": 1.9428, "step": 108030 }, { "epoch": 11.103083247687564, "grad_norm": 0.08826398104429245, "learning_rate": 0.01, "loss": 1.9528, "step": 108033 }, { "epoch": 11.10339157245632, "grad_norm": 0.06979897618293762, "learning_rate": 0.01, "loss": 1.9301, "step": 108036 }, { "epoch": 11.103699897225077, "grad_norm": 0.05558208376169205, "learning_rate": 0.01, "loss": 1.9441, "step": 108039 }, { "epoch": 11.104008221993833, "grad_norm": 0.07635145634412766, "learning_rate": 0.01, "loss": 1.9402, "step": 108042 }, { "epoch": 11.10431654676259, "grad_norm": 0.06586254388093948, "learning_rate": 0.01, "loss": 1.9331, "step": 108045 }, { "epoch": 11.104624871531346, "grad_norm": 0.11627889424562454, "learning_rate": 0.01, "loss": 1.967, "step": 108048 }, { "epoch": 11.104933196300102, "grad_norm": 0.05880044028162956, "learning_rate": 0.01, "loss": 1.9353, "step": 108051 }, { "epoch": 11.105241521068859, "grad_norm": 0.04572506994009018, "learning_rate": 0.01, "loss": 1.9556, "step": 108054 }, { "epoch": 11.105549845837615, "grad_norm": 0.09277968853712082, "learning_rate": 0.01, "loss": 1.9361, "step": 108057 }, { "epoch": 11.105858170606371, "grad_norm": 0.03708489239215851, "learning_rate": 0.01, "loss": 1.9181, "step": 108060 }, { "epoch": 11.10616649537513, "grad_norm": 0.11607236415147781, "learning_rate": 0.01, "loss": 1.9522, "step": 108063 }, { "epoch": 11.106474820143886, "grad_norm": 0.060427650809288025, "learning_rate": 0.01, "loss": 1.94, "step": 108066 }, { "epoch": 11.106783144912642, "grad_norm": 0.062446895986795425, "learning_rate": 0.01, "loss": 1.9454, "step": 108069 }, { "epoch": 11.107091469681398, "grad_norm": 0.059566475450992584, "learning_rate": 0.01, "loss": 1.9431, "step": 108072 }, { "epoch": 11.107399794450155, "grad_norm": 0.042464662343263626, "learning_rate": 0.01, "loss": 1.9383, "step": 108075 }, { "epoch": 11.10770811921891, "grad_norm": 0.13029395043849945, "learning_rate": 0.01, "loss": 1.9644, "step": 108078 }, { "epoch": 11.108016443987667, "grad_norm": 0.08560604602098465, "learning_rate": 0.01, "loss": 1.9645, "step": 108081 }, { "epoch": 11.108324768756423, "grad_norm": 0.06263414770364761, "learning_rate": 0.01, "loss": 1.9467, "step": 108084 }, { "epoch": 11.10863309352518, "grad_norm": 0.04986066371202469, "learning_rate": 0.01, "loss": 1.9125, "step": 108087 }, { "epoch": 11.108941418293936, "grad_norm": 0.0476389154791832, "learning_rate": 0.01, "loss": 1.9508, "step": 108090 }, { "epoch": 11.109249743062692, "grad_norm": 0.05869157984852791, "learning_rate": 0.01, "loss": 1.9269, "step": 108093 }, { "epoch": 11.109558067831449, "grad_norm": 0.04113049432635307, "learning_rate": 0.01, "loss": 1.9353, "step": 108096 }, { "epoch": 11.109866392600205, "grad_norm": 0.06492377817630768, "learning_rate": 0.01, "loss": 1.9525, "step": 108099 }, { "epoch": 11.110174717368961, "grad_norm": 0.0680563822388649, "learning_rate": 0.01, "loss": 1.9273, "step": 108102 }, { "epoch": 11.110483042137718, "grad_norm": 0.09744305908679962, "learning_rate": 0.01, "loss": 1.9466, "step": 108105 }, { "epoch": 11.110791366906474, "grad_norm": 0.090627022087574, "learning_rate": 0.01, "loss": 1.9504, "step": 108108 }, { "epoch": 11.111099691675232, "grad_norm": 0.07619655877351761, "learning_rate": 0.01, "loss": 1.9634, "step": 108111 }, { "epoch": 11.111408016443988, "grad_norm": 0.05335146561264992, "learning_rate": 0.01, "loss": 1.9263, "step": 108114 }, { "epoch": 11.111716341212745, "grad_norm": 0.05738540366292, "learning_rate": 0.01, "loss": 1.9516, "step": 108117 }, { "epoch": 11.112024665981501, "grad_norm": 0.12277184426784515, "learning_rate": 0.01, "loss": 1.9148, "step": 108120 }, { "epoch": 11.112332990750257, "grad_norm": 0.05805842578411102, "learning_rate": 0.01, "loss": 1.9413, "step": 108123 }, { "epoch": 11.112641315519014, "grad_norm": 0.061712007969617844, "learning_rate": 0.01, "loss": 1.9433, "step": 108126 }, { "epoch": 11.11294964028777, "grad_norm": 0.045319247990846634, "learning_rate": 0.01, "loss": 1.9439, "step": 108129 }, { "epoch": 11.113257965056526, "grad_norm": 0.1417672485113144, "learning_rate": 0.01, "loss": 1.9501, "step": 108132 }, { "epoch": 11.113566289825282, "grad_norm": 0.051083218306303024, "learning_rate": 0.01, "loss": 1.9318, "step": 108135 }, { "epoch": 11.113874614594039, "grad_norm": 0.03624936193227768, "learning_rate": 0.01, "loss": 1.9488, "step": 108138 }, { "epoch": 11.114182939362795, "grad_norm": 0.05050056055188179, "learning_rate": 0.01, "loss": 1.9392, "step": 108141 }, { "epoch": 11.114491264131551, "grad_norm": 0.05254499241709709, "learning_rate": 0.01, "loss": 1.9296, "step": 108144 }, { "epoch": 11.114799588900308, "grad_norm": 0.04745284095406532, "learning_rate": 0.01, "loss": 1.9372, "step": 108147 }, { "epoch": 11.115107913669064, "grad_norm": 0.05609699711203575, "learning_rate": 0.01, "loss": 1.9565, "step": 108150 }, { "epoch": 11.11541623843782, "grad_norm": 0.04675271734595299, "learning_rate": 0.01, "loss": 1.9421, "step": 108153 }, { "epoch": 11.115724563206578, "grad_norm": 0.11510089039802551, "learning_rate": 0.01, "loss": 1.9254, "step": 108156 }, { "epoch": 11.116032887975335, "grad_norm": 0.06580482423305511, "learning_rate": 0.01, "loss": 1.9751, "step": 108159 }, { "epoch": 11.116341212744091, "grad_norm": 0.10699275135993958, "learning_rate": 0.01, "loss": 1.9541, "step": 108162 }, { "epoch": 11.116649537512847, "grad_norm": 0.10668794810771942, "learning_rate": 0.01, "loss": 1.9294, "step": 108165 }, { "epoch": 11.116957862281604, "grad_norm": 0.05328281223773956, "learning_rate": 0.01, "loss": 1.9374, "step": 108168 }, { "epoch": 11.11726618705036, "grad_norm": 0.13215139508247375, "learning_rate": 0.01, "loss": 1.9607, "step": 108171 }, { "epoch": 11.117574511819116, "grad_norm": 0.12169384956359863, "learning_rate": 0.01, "loss": 1.9334, "step": 108174 }, { "epoch": 11.117882836587873, "grad_norm": 0.05846225097775459, "learning_rate": 0.01, "loss": 1.9392, "step": 108177 }, { "epoch": 11.118191161356629, "grad_norm": 0.0506817102432251, "learning_rate": 0.01, "loss": 1.9547, "step": 108180 }, { "epoch": 11.118499486125385, "grad_norm": 0.04678288847208023, "learning_rate": 0.01, "loss": 1.9447, "step": 108183 }, { "epoch": 11.118807810894141, "grad_norm": 0.051260873675346375, "learning_rate": 0.01, "loss": 1.9489, "step": 108186 }, { "epoch": 11.119116135662898, "grad_norm": 0.07663340866565704, "learning_rate": 0.01, "loss": 1.9437, "step": 108189 }, { "epoch": 11.119424460431654, "grad_norm": 0.09651526063680649, "learning_rate": 0.01, "loss": 1.9545, "step": 108192 }, { "epoch": 11.11973278520041, "grad_norm": 0.09631019085645676, "learning_rate": 0.01, "loss": 1.9459, "step": 108195 }, { "epoch": 11.120041109969167, "grad_norm": 0.07368805259466171, "learning_rate": 0.01, "loss": 1.9347, "step": 108198 }, { "epoch": 11.120349434737925, "grad_norm": 0.04535972326993942, "learning_rate": 0.01, "loss": 1.9461, "step": 108201 }, { "epoch": 11.120657759506681, "grad_norm": 0.04977213591337204, "learning_rate": 0.01, "loss": 1.9276, "step": 108204 }, { "epoch": 11.120966084275437, "grad_norm": 0.06690436601638794, "learning_rate": 0.01, "loss": 1.9327, "step": 108207 }, { "epoch": 11.121274409044194, "grad_norm": 0.03737137094140053, "learning_rate": 0.01, "loss": 1.9276, "step": 108210 }, { "epoch": 11.12158273381295, "grad_norm": 0.17570669949054718, "learning_rate": 0.01, "loss": 1.9485, "step": 108213 }, { "epoch": 11.121891058581706, "grad_norm": 0.06054900959134102, "learning_rate": 0.01, "loss": 1.9485, "step": 108216 }, { "epoch": 11.122199383350463, "grad_norm": 0.052555911242961884, "learning_rate": 0.01, "loss": 1.9552, "step": 108219 }, { "epoch": 11.122507708119219, "grad_norm": 0.04419514164328575, "learning_rate": 0.01, "loss": 1.9597, "step": 108222 }, { "epoch": 11.122816032887975, "grad_norm": 0.05516252666711807, "learning_rate": 0.01, "loss": 1.9414, "step": 108225 }, { "epoch": 11.123124357656732, "grad_norm": 0.0422653891146183, "learning_rate": 0.01, "loss": 1.9206, "step": 108228 }, { "epoch": 11.123432682425488, "grad_norm": 0.031638115644454956, "learning_rate": 0.01, "loss": 1.9313, "step": 108231 }, { "epoch": 11.123741007194244, "grad_norm": 0.03453582525253296, "learning_rate": 0.01, "loss": 1.9394, "step": 108234 }, { "epoch": 11.124049331963, "grad_norm": 0.13449911773204803, "learning_rate": 0.01, "loss": 1.9212, "step": 108237 }, { "epoch": 11.124357656731757, "grad_norm": 0.10251076519489288, "learning_rate": 0.01, "loss": 1.929, "step": 108240 }, { "epoch": 11.124665981500513, "grad_norm": 0.07683219015598297, "learning_rate": 0.01, "loss": 1.9223, "step": 108243 }, { "epoch": 11.12497430626927, "grad_norm": 0.04399493709206581, "learning_rate": 0.01, "loss": 1.9455, "step": 108246 }, { "epoch": 11.125282631038028, "grad_norm": 0.04075119271874428, "learning_rate": 0.01, "loss": 1.9479, "step": 108249 }, { "epoch": 11.125590955806784, "grad_norm": 0.04534753039479256, "learning_rate": 0.01, "loss": 1.9471, "step": 108252 }, { "epoch": 11.12589928057554, "grad_norm": 0.04419349879026413, "learning_rate": 0.01, "loss": 1.9548, "step": 108255 }, { "epoch": 11.126207605344296, "grad_norm": 0.0396546870470047, "learning_rate": 0.01, "loss": 1.9248, "step": 108258 }, { "epoch": 11.126515930113053, "grad_norm": 0.07965173572301865, "learning_rate": 0.01, "loss": 1.9301, "step": 108261 }, { "epoch": 11.126824254881809, "grad_norm": 0.03837994113564491, "learning_rate": 0.01, "loss": 1.9494, "step": 108264 }, { "epoch": 11.127132579650565, "grad_norm": 0.05804935842752457, "learning_rate": 0.01, "loss": 1.9455, "step": 108267 }, { "epoch": 11.127440904419322, "grad_norm": 0.058049287647008896, "learning_rate": 0.01, "loss": 1.9505, "step": 108270 }, { "epoch": 11.127749229188078, "grad_norm": 0.055208366364240646, "learning_rate": 0.01, "loss": 1.9189, "step": 108273 }, { "epoch": 11.128057553956834, "grad_norm": 0.10259728878736496, "learning_rate": 0.01, "loss": 1.9436, "step": 108276 }, { "epoch": 11.12836587872559, "grad_norm": 0.04577815160155296, "learning_rate": 0.01, "loss": 1.98, "step": 108279 }, { "epoch": 11.128674203494347, "grad_norm": 0.061545539647340775, "learning_rate": 0.01, "loss": 1.9463, "step": 108282 }, { "epoch": 11.128982528263103, "grad_norm": 0.03951617330312729, "learning_rate": 0.01, "loss": 1.941, "step": 108285 }, { "epoch": 11.12929085303186, "grad_norm": 0.06631302833557129, "learning_rate": 0.01, "loss": 1.9456, "step": 108288 }, { "epoch": 11.129599177800616, "grad_norm": 0.047301799058914185, "learning_rate": 0.01, "loss": 1.9313, "step": 108291 }, { "epoch": 11.129907502569374, "grad_norm": 0.06022251769900322, "learning_rate": 0.01, "loss": 1.9223, "step": 108294 }, { "epoch": 11.13021582733813, "grad_norm": 0.07660970091819763, "learning_rate": 0.01, "loss": 1.9745, "step": 108297 }, { "epoch": 11.130524152106887, "grad_norm": 0.06937503069639206, "learning_rate": 0.01, "loss": 1.9331, "step": 108300 }, { "epoch": 11.130832476875643, "grad_norm": 0.09794414788484573, "learning_rate": 0.01, "loss": 1.9372, "step": 108303 }, { "epoch": 11.1311408016444, "grad_norm": 0.05716533213853836, "learning_rate": 0.01, "loss": 1.9401, "step": 108306 }, { "epoch": 11.131449126413155, "grad_norm": 0.09398681670427322, "learning_rate": 0.01, "loss": 1.9556, "step": 108309 }, { "epoch": 11.131757451181912, "grad_norm": 0.052096445113420486, "learning_rate": 0.01, "loss": 1.9323, "step": 108312 }, { "epoch": 11.132065775950668, "grad_norm": 0.09356788545846939, "learning_rate": 0.01, "loss": 1.9391, "step": 108315 }, { "epoch": 11.132374100719424, "grad_norm": 0.04018421098589897, "learning_rate": 0.01, "loss": 1.9556, "step": 108318 }, { "epoch": 11.13268242548818, "grad_norm": 0.11394383758306503, "learning_rate": 0.01, "loss": 1.9314, "step": 108321 }, { "epoch": 11.132990750256937, "grad_norm": 0.09426377713680267, "learning_rate": 0.01, "loss": 1.9437, "step": 108324 }, { "epoch": 11.133299075025693, "grad_norm": 0.08016916364431381, "learning_rate": 0.01, "loss": 1.9227, "step": 108327 }, { "epoch": 11.13360739979445, "grad_norm": 0.042565468698740005, "learning_rate": 0.01, "loss": 1.9395, "step": 108330 }, { "epoch": 11.133915724563206, "grad_norm": 0.11248867213726044, "learning_rate": 0.01, "loss": 1.965, "step": 108333 }, { "epoch": 11.134224049331962, "grad_norm": 0.036491721868515015, "learning_rate": 0.01, "loss": 1.9678, "step": 108336 }, { "epoch": 11.13453237410072, "grad_norm": 0.04385516792535782, "learning_rate": 0.01, "loss": 1.9578, "step": 108339 }, { "epoch": 11.134840698869477, "grad_norm": 0.05395470932126045, "learning_rate": 0.01, "loss": 1.9451, "step": 108342 }, { "epoch": 11.135149023638233, "grad_norm": 0.04313792288303375, "learning_rate": 0.01, "loss": 1.9402, "step": 108345 }, { "epoch": 11.13545734840699, "grad_norm": 0.03667489066720009, "learning_rate": 0.01, "loss": 1.9143, "step": 108348 }, { "epoch": 11.135765673175746, "grad_norm": 0.04531373456120491, "learning_rate": 0.01, "loss": 1.934, "step": 108351 }, { "epoch": 11.136073997944502, "grad_norm": 0.056769829243421555, "learning_rate": 0.01, "loss": 1.9556, "step": 108354 }, { "epoch": 11.136382322713258, "grad_norm": 0.05138956755399704, "learning_rate": 0.01, "loss": 1.9551, "step": 108357 }, { "epoch": 11.136690647482014, "grad_norm": 0.048939868807792664, "learning_rate": 0.01, "loss": 1.9456, "step": 108360 }, { "epoch": 11.13699897225077, "grad_norm": 0.03497758135199547, "learning_rate": 0.01, "loss": 1.9463, "step": 108363 }, { "epoch": 11.137307297019527, "grad_norm": 0.11892599612474442, "learning_rate": 0.01, "loss": 1.9408, "step": 108366 }, { "epoch": 11.137615621788283, "grad_norm": 0.04540543258190155, "learning_rate": 0.01, "loss": 1.9441, "step": 108369 }, { "epoch": 11.13792394655704, "grad_norm": 0.03498394042253494, "learning_rate": 0.01, "loss": 1.9197, "step": 108372 }, { "epoch": 11.138232271325796, "grad_norm": 0.049198199063539505, "learning_rate": 0.01, "loss": 1.9518, "step": 108375 }, { "epoch": 11.138540596094552, "grad_norm": 0.1480265110731125, "learning_rate": 0.01, "loss": 1.9305, "step": 108378 }, { "epoch": 11.138848920863309, "grad_norm": 0.07885235548019409, "learning_rate": 0.01, "loss": 1.9657, "step": 108381 }, { "epoch": 11.139157245632067, "grad_norm": 0.04114838317036629, "learning_rate": 0.01, "loss": 1.9468, "step": 108384 }, { "epoch": 11.139465570400823, "grad_norm": 0.03757159411907196, "learning_rate": 0.01, "loss": 1.9546, "step": 108387 }, { "epoch": 11.13977389516958, "grad_norm": 0.05755399167537689, "learning_rate": 0.01, "loss": 1.9401, "step": 108390 }, { "epoch": 11.140082219938336, "grad_norm": 0.05723578855395317, "learning_rate": 0.01, "loss": 1.9389, "step": 108393 }, { "epoch": 11.140390544707092, "grad_norm": 0.13173669576644897, "learning_rate": 0.01, "loss": 1.967, "step": 108396 }, { "epoch": 11.140698869475848, "grad_norm": 0.049970634281635284, "learning_rate": 0.01, "loss": 1.9556, "step": 108399 }, { "epoch": 11.141007194244605, "grad_norm": 0.1683085411787033, "learning_rate": 0.01, "loss": 1.9658, "step": 108402 }, { "epoch": 11.14131551901336, "grad_norm": 0.10416439175605774, "learning_rate": 0.01, "loss": 1.9398, "step": 108405 }, { "epoch": 11.141623843782117, "grad_norm": 0.05204487591981888, "learning_rate": 0.01, "loss": 1.9546, "step": 108408 }, { "epoch": 11.141932168550873, "grad_norm": 0.05820777267217636, "learning_rate": 0.01, "loss": 1.9436, "step": 108411 }, { "epoch": 11.14224049331963, "grad_norm": 0.0588134303689003, "learning_rate": 0.01, "loss": 1.9735, "step": 108414 }, { "epoch": 11.142548818088386, "grad_norm": 0.041087377816438675, "learning_rate": 0.01, "loss": 1.9445, "step": 108417 }, { "epoch": 11.142857142857142, "grad_norm": 0.058787308633327484, "learning_rate": 0.01, "loss": 1.9616, "step": 108420 }, { "epoch": 11.143165467625899, "grad_norm": 0.06542397290468216, "learning_rate": 0.01, "loss": 1.9433, "step": 108423 }, { "epoch": 11.143473792394655, "grad_norm": 0.07437827438116074, "learning_rate": 0.01, "loss": 1.9193, "step": 108426 }, { "epoch": 11.143782117163411, "grad_norm": 0.04234655201435089, "learning_rate": 0.01, "loss": 1.9394, "step": 108429 }, { "epoch": 11.14409044193217, "grad_norm": 0.08659413456916809, "learning_rate": 0.01, "loss": 1.9207, "step": 108432 }, { "epoch": 11.144398766700926, "grad_norm": 0.0776323676109314, "learning_rate": 0.01, "loss": 1.9308, "step": 108435 }, { "epoch": 11.144707091469682, "grad_norm": 0.16545532643795013, "learning_rate": 0.01, "loss": 1.9581, "step": 108438 }, { "epoch": 11.145015416238438, "grad_norm": 0.1424427479505539, "learning_rate": 0.01, "loss": 1.9394, "step": 108441 }, { "epoch": 11.145323741007195, "grad_norm": 0.07523380219936371, "learning_rate": 0.01, "loss": 1.9232, "step": 108444 }, { "epoch": 11.145632065775951, "grad_norm": 0.047919511795043945, "learning_rate": 0.01, "loss": 1.9431, "step": 108447 }, { "epoch": 11.145940390544707, "grad_norm": 0.04040754586458206, "learning_rate": 0.01, "loss": 1.9152, "step": 108450 }, { "epoch": 11.146248715313464, "grad_norm": 0.06334877759218216, "learning_rate": 0.01, "loss": 1.9312, "step": 108453 }, { "epoch": 11.14655704008222, "grad_norm": 0.040293674916028976, "learning_rate": 0.01, "loss": 1.928, "step": 108456 }, { "epoch": 11.146865364850976, "grad_norm": 0.11464455723762512, "learning_rate": 0.01, "loss": 1.9311, "step": 108459 }, { "epoch": 11.147173689619732, "grad_norm": 0.04293007403612137, "learning_rate": 0.01, "loss": 1.9446, "step": 108462 }, { "epoch": 11.147482014388489, "grad_norm": 0.08018584549427032, "learning_rate": 0.01, "loss": 1.9284, "step": 108465 }, { "epoch": 11.147790339157245, "grad_norm": 0.06225692853331566, "learning_rate": 0.01, "loss": 1.9447, "step": 108468 }, { "epoch": 11.148098663926001, "grad_norm": 0.054755352437496185, "learning_rate": 0.01, "loss": 1.9426, "step": 108471 }, { "epoch": 11.148406988694758, "grad_norm": 0.04388297721743584, "learning_rate": 0.01, "loss": 1.9409, "step": 108474 }, { "epoch": 11.148715313463516, "grad_norm": 0.1110197901725769, "learning_rate": 0.01, "loss": 1.9377, "step": 108477 }, { "epoch": 11.149023638232272, "grad_norm": 0.06123514100909233, "learning_rate": 0.01, "loss": 1.9368, "step": 108480 }, { "epoch": 11.149331963001028, "grad_norm": 0.08198761194944382, "learning_rate": 0.01, "loss": 1.9445, "step": 108483 }, { "epoch": 11.149640287769785, "grad_norm": 0.06199076399207115, "learning_rate": 0.01, "loss": 1.9355, "step": 108486 }, { "epoch": 11.149948612538541, "grad_norm": 0.06349356472492218, "learning_rate": 0.01, "loss": 1.9233, "step": 108489 }, { "epoch": 11.150256937307297, "grad_norm": 0.09270074963569641, "learning_rate": 0.01, "loss": 1.951, "step": 108492 }, { "epoch": 11.150565262076054, "grad_norm": 0.046484947204589844, "learning_rate": 0.01, "loss": 1.9173, "step": 108495 }, { "epoch": 11.15087358684481, "grad_norm": 0.03359349071979523, "learning_rate": 0.01, "loss": 1.9632, "step": 108498 }, { "epoch": 11.151181911613566, "grad_norm": 0.03490680083632469, "learning_rate": 0.01, "loss": 1.9353, "step": 108501 }, { "epoch": 11.151490236382323, "grad_norm": 0.03208662569522858, "learning_rate": 0.01, "loss": 1.9422, "step": 108504 }, { "epoch": 11.151798561151079, "grad_norm": 0.0415281280875206, "learning_rate": 0.01, "loss": 1.9363, "step": 108507 }, { "epoch": 11.152106885919835, "grad_norm": 0.24639929831027985, "learning_rate": 0.01, "loss": 1.9526, "step": 108510 }, { "epoch": 11.152415210688591, "grad_norm": 0.13605011999607086, "learning_rate": 0.01, "loss": 1.962, "step": 108513 }, { "epoch": 11.152723535457348, "grad_norm": 0.15605586767196655, "learning_rate": 0.01, "loss": 1.9393, "step": 108516 }, { "epoch": 11.153031860226104, "grad_norm": 0.07057122886180878, "learning_rate": 0.01, "loss": 1.9396, "step": 108519 }, { "epoch": 11.153340184994862, "grad_norm": 0.04619940370321274, "learning_rate": 0.01, "loss": 1.9349, "step": 108522 }, { "epoch": 11.153648509763618, "grad_norm": 0.10820236057043076, "learning_rate": 0.01, "loss": 1.974, "step": 108525 }, { "epoch": 11.153956834532375, "grad_norm": 0.06613638252019882, "learning_rate": 0.01, "loss": 1.9281, "step": 108528 }, { "epoch": 11.154265159301131, "grad_norm": 0.042178694158792496, "learning_rate": 0.01, "loss": 1.8919, "step": 108531 }, { "epoch": 11.154573484069887, "grad_norm": 0.042413171380758286, "learning_rate": 0.01, "loss": 1.9574, "step": 108534 }, { "epoch": 11.154881808838644, "grad_norm": 0.0663883239030838, "learning_rate": 0.01, "loss": 1.9269, "step": 108537 }, { "epoch": 11.1551901336074, "grad_norm": 0.04206288233399391, "learning_rate": 0.01, "loss": 1.9389, "step": 108540 }, { "epoch": 11.155498458376156, "grad_norm": 0.05079186335206032, "learning_rate": 0.01, "loss": 1.9644, "step": 108543 }, { "epoch": 11.155806783144913, "grad_norm": 0.047072358429431915, "learning_rate": 0.01, "loss": 1.9345, "step": 108546 }, { "epoch": 11.156115107913669, "grad_norm": 0.06086193770170212, "learning_rate": 0.01, "loss": 1.9472, "step": 108549 }, { "epoch": 11.156423432682425, "grad_norm": 0.08996503055095673, "learning_rate": 0.01, "loss": 1.9607, "step": 108552 }, { "epoch": 11.156731757451182, "grad_norm": 0.13274416327476501, "learning_rate": 0.01, "loss": 1.9434, "step": 108555 }, { "epoch": 11.157040082219938, "grad_norm": 0.21141725778579712, "learning_rate": 0.01, "loss": 1.9222, "step": 108558 }, { "epoch": 11.157348406988694, "grad_norm": 0.131058931350708, "learning_rate": 0.01, "loss": 1.9373, "step": 108561 }, { "epoch": 11.15765673175745, "grad_norm": 0.05547711253166199, "learning_rate": 0.01, "loss": 1.9239, "step": 108564 }, { "epoch": 11.157965056526207, "grad_norm": 0.03413417190313339, "learning_rate": 0.01, "loss": 1.9253, "step": 108567 }, { "epoch": 11.158273381294965, "grad_norm": 0.03346508368849754, "learning_rate": 0.01, "loss": 1.9145, "step": 108570 }, { "epoch": 11.158581706063721, "grad_norm": 0.03702901676297188, "learning_rate": 0.01, "loss": 1.955, "step": 108573 }, { "epoch": 11.158890030832477, "grad_norm": 0.07465177029371262, "learning_rate": 0.01, "loss": 1.9572, "step": 108576 }, { "epoch": 11.159198355601234, "grad_norm": 0.04305208474397659, "learning_rate": 0.01, "loss": 1.938, "step": 108579 }, { "epoch": 11.15950668036999, "grad_norm": 0.05763426795601845, "learning_rate": 0.01, "loss": 1.9417, "step": 108582 }, { "epoch": 11.159815005138746, "grad_norm": 0.04999389126896858, "learning_rate": 0.01, "loss": 1.9454, "step": 108585 }, { "epoch": 11.160123329907503, "grad_norm": 0.03652753308415413, "learning_rate": 0.01, "loss": 1.9591, "step": 108588 }, { "epoch": 11.160431654676259, "grad_norm": 0.17381641268730164, "learning_rate": 0.01, "loss": 1.9414, "step": 108591 }, { "epoch": 11.160739979445015, "grad_norm": 0.04396112635731697, "learning_rate": 0.01, "loss": 1.9855, "step": 108594 }, { "epoch": 11.161048304213772, "grad_norm": 0.03453262522816658, "learning_rate": 0.01, "loss": 1.9472, "step": 108597 }, { "epoch": 11.161356628982528, "grad_norm": 0.10160734504461288, "learning_rate": 0.01, "loss": 1.9337, "step": 108600 }, { "epoch": 11.161664953751284, "grad_norm": 0.10512173175811768, "learning_rate": 0.01, "loss": 1.9562, "step": 108603 }, { "epoch": 11.16197327852004, "grad_norm": 0.0531027652323246, "learning_rate": 0.01, "loss": 1.9487, "step": 108606 }, { "epoch": 11.162281603288797, "grad_norm": 0.1427735984325409, "learning_rate": 0.01, "loss": 1.9351, "step": 108609 }, { "epoch": 11.162589928057553, "grad_norm": 0.04474338889122009, "learning_rate": 0.01, "loss": 1.9335, "step": 108612 }, { "epoch": 11.162898252826311, "grad_norm": 0.1090499684214592, "learning_rate": 0.01, "loss": 1.941, "step": 108615 }, { "epoch": 11.163206577595068, "grad_norm": 0.0636020377278328, "learning_rate": 0.01, "loss": 1.9425, "step": 108618 }, { "epoch": 11.163514902363824, "grad_norm": 0.044732339680194855, "learning_rate": 0.01, "loss": 1.9291, "step": 108621 }, { "epoch": 11.16382322713258, "grad_norm": 0.05239364132285118, "learning_rate": 0.01, "loss": 1.9262, "step": 108624 }, { "epoch": 11.164131551901336, "grad_norm": 0.1660080850124359, "learning_rate": 0.01, "loss": 1.9551, "step": 108627 }, { "epoch": 11.164439876670093, "grad_norm": 0.055751584470272064, "learning_rate": 0.01, "loss": 1.9328, "step": 108630 }, { "epoch": 11.164748201438849, "grad_norm": 0.042030543088912964, "learning_rate": 0.01, "loss": 1.9183, "step": 108633 }, { "epoch": 11.165056526207605, "grad_norm": 0.050695545971393585, "learning_rate": 0.01, "loss": 1.9439, "step": 108636 }, { "epoch": 11.165364850976362, "grad_norm": 0.08683747053146362, "learning_rate": 0.01, "loss": 1.9667, "step": 108639 }, { "epoch": 11.165673175745118, "grad_norm": 0.05954531580209732, "learning_rate": 0.01, "loss": 1.9831, "step": 108642 }, { "epoch": 11.165981500513874, "grad_norm": 0.1576981097459793, "learning_rate": 0.01, "loss": 1.9555, "step": 108645 }, { "epoch": 11.16628982528263, "grad_norm": 0.09284688532352448, "learning_rate": 0.01, "loss": 1.932, "step": 108648 }, { "epoch": 11.166598150051387, "grad_norm": 0.07134409248828888, "learning_rate": 0.01, "loss": 1.952, "step": 108651 }, { "epoch": 11.166906474820143, "grad_norm": 0.05715997517108917, "learning_rate": 0.01, "loss": 1.9412, "step": 108654 }, { "epoch": 11.1672147995889, "grad_norm": 0.043932173401117325, "learning_rate": 0.01, "loss": 1.9435, "step": 108657 }, { "epoch": 11.167523124357658, "grad_norm": 0.02749806083738804, "learning_rate": 0.01, "loss": 1.9393, "step": 108660 }, { "epoch": 11.167831449126414, "grad_norm": 0.055440399795770645, "learning_rate": 0.01, "loss": 1.9377, "step": 108663 }, { "epoch": 11.16813977389517, "grad_norm": 0.07681028544902802, "learning_rate": 0.01, "loss": 1.9397, "step": 108666 }, { "epoch": 11.168448098663927, "grad_norm": 0.14702077209949493, "learning_rate": 0.01, "loss": 1.9443, "step": 108669 }, { "epoch": 11.168756423432683, "grad_norm": 0.06999138742685318, "learning_rate": 0.01, "loss": 1.9445, "step": 108672 }, { "epoch": 11.16906474820144, "grad_norm": 0.03188035637140274, "learning_rate": 0.01, "loss": 1.9397, "step": 108675 }, { "epoch": 11.169373072970195, "grad_norm": 0.03169883415102959, "learning_rate": 0.01, "loss": 1.9575, "step": 108678 }, { "epoch": 11.169681397738952, "grad_norm": 0.14469371736049652, "learning_rate": 0.01, "loss": 1.9493, "step": 108681 }, { "epoch": 11.169989722507708, "grad_norm": 0.1052892655134201, "learning_rate": 0.01, "loss": 1.9595, "step": 108684 }, { "epoch": 11.170298047276464, "grad_norm": 0.06744600832462311, "learning_rate": 0.01, "loss": 1.9604, "step": 108687 }, { "epoch": 11.17060637204522, "grad_norm": 0.05542672798037529, "learning_rate": 0.01, "loss": 1.9596, "step": 108690 }, { "epoch": 11.170914696813977, "grad_norm": 0.07598409056663513, "learning_rate": 0.01, "loss": 1.9625, "step": 108693 }, { "epoch": 11.171223021582733, "grad_norm": 0.06378299742937088, "learning_rate": 0.01, "loss": 1.9411, "step": 108696 }, { "epoch": 11.17153134635149, "grad_norm": 0.04912390932440758, "learning_rate": 0.01, "loss": 1.9084, "step": 108699 }, { "epoch": 11.171839671120246, "grad_norm": 0.07536765933036804, "learning_rate": 0.01, "loss": 1.9368, "step": 108702 }, { "epoch": 11.172147995889002, "grad_norm": 0.05856078118085861, "learning_rate": 0.01, "loss": 1.9406, "step": 108705 }, { "epoch": 11.17245632065776, "grad_norm": 0.09633874893188477, "learning_rate": 0.01, "loss": 1.9215, "step": 108708 }, { "epoch": 11.172764645426517, "grad_norm": 0.09800992161035538, "learning_rate": 0.01, "loss": 1.9165, "step": 108711 }, { "epoch": 11.173072970195273, "grad_norm": 0.08409328013658524, "learning_rate": 0.01, "loss": 1.9474, "step": 108714 }, { "epoch": 11.17338129496403, "grad_norm": 0.09450411051511765, "learning_rate": 0.01, "loss": 1.9492, "step": 108717 }, { "epoch": 11.173689619732786, "grad_norm": 0.11592711508274078, "learning_rate": 0.01, "loss": 1.9175, "step": 108720 }, { "epoch": 11.173997944501542, "grad_norm": 0.09436693042516708, "learning_rate": 0.01, "loss": 1.9239, "step": 108723 }, { "epoch": 11.174306269270298, "grad_norm": 0.06513006240129471, "learning_rate": 0.01, "loss": 1.9435, "step": 108726 }, { "epoch": 11.174614594039054, "grad_norm": 0.03724005073308945, "learning_rate": 0.01, "loss": 1.9225, "step": 108729 }, { "epoch": 11.17492291880781, "grad_norm": 0.03580351173877716, "learning_rate": 0.01, "loss": 1.9573, "step": 108732 }, { "epoch": 11.175231243576567, "grad_norm": 0.037552379071712494, "learning_rate": 0.01, "loss": 1.933, "step": 108735 }, { "epoch": 11.175539568345323, "grad_norm": 0.06251754611730576, "learning_rate": 0.01, "loss": 1.9115, "step": 108738 }, { "epoch": 11.17584789311408, "grad_norm": 0.059087805449962616, "learning_rate": 0.01, "loss": 1.9541, "step": 108741 }, { "epoch": 11.176156217882836, "grad_norm": 0.0471821054816246, "learning_rate": 0.01, "loss": 1.9565, "step": 108744 }, { "epoch": 11.176464542651592, "grad_norm": 0.03444281965494156, "learning_rate": 0.01, "loss": 1.919, "step": 108747 }, { "epoch": 11.176772867420349, "grad_norm": 0.16312456130981445, "learning_rate": 0.01, "loss": 1.952, "step": 108750 }, { "epoch": 11.177081192189107, "grad_norm": 0.05206234008073807, "learning_rate": 0.01, "loss": 1.9206, "step": 108753 }, { "epoch": 11.177389516957863, "grad_norm": 0.062144502997398376, "learning_rate": 0.01, "loss": 1.9511, "step": 108756 }, { "epoch": 11.17769784172662, "grad_norm": 0.04214511439204216, "learning_rate": 0.01, "loss": 1.9124, "step": 108759 }, { "epoch": 11.178006166495376, "grad_norm": 0.05748814716935158, "learning_rate": 0.01, "loss": 1.9234, "step": 108762 }, { "epoch": 11.178314491264132, "grad_norm": 0.05643995851278305, "learning_rate": 0.01, "loss": 1.9151, "step": 108765 }, { "epoch": 11.178622816032888, "grad_norm": 0.07424315065145493, "learning_rate": 0.01, "loss": 1.9449, "step": 108768 }, { "epoch": 11.178931140801645, "grad_norm": 0.08109375834465027, "learning_rate": 0.01, "loss": 1.9453, "step": 108771 }, { "epoch": 11.1792394655704, "grad_norm": 0.05265939235687256, "learning_rate": 0.01, "loss": 1.9473, "step": 108774 }, { "epoch": 11.179547790339157, "grad_norm": 0.03445740416646004, "learning_rate": 0.01, "loss": 1.9312, "step": 108777 }, { "epoch": 11.179856115107913, "grad_norm": 0.11327429115772247, "learning_rate": 0.01, "loss": 1.9493, "step": 108780 }, { "epoch": 11.18016443987667, "grad_norm": 0.09449184685945511, "learning_rate": 0.01, "loss": 1.9615, "step": 108783 }, { "epoch": 11.180472764645426, "grad_norm": 0.0852329358458519, "learning_rate": 0.01, "loss": 1.9332, "step": 108786 }, { "epoch": 11.180781089414182, "grad_norm": 0.04656582325696945, "learning_rate": 0.01, "loss": 1.9456, "step": 108789 }, { "epoch": 11.181089414182939, "grad_norm": 0.075565405189991, "learning_rate": 0.01, "loss": 1.9251, "step": 108792 }, { "epoch": 11.181397738951695, "grad_norm": 0.08553548902273178, "learning_rate": 0.01, "loss": 1.9384, "step": 108795 }, { "epoch": 11.181706063720453, "grad_norm": 0.03676193952560425, "learning_rate": 0.01, "loss": 1.9453, "step": 108798 }, { "epoch": 11.18201438848921, "grad_norm": 0.08095156401395798, "learning_rate": 0.01, "loss": 1.9228, "step": 108801 }, { "epoch": 11.182322713257966, "grad_norm": 0.0733218714594841, "learning_rate": 0.01, "loss": 1.9493, "step": 108804 }, { "epoch": 11.182631038026722, "grad_norm": 0.038306981325149536, "learning_rate": 0.01, "loss": 1.9493, "step": 108807 }, { "epoch": 11.182939362795478, "grad_norm": 0.06717819720506668, "learning_rate": 0.01, "loss": 1.9166, "step": 108810 }, { "epoch": 11.183247687564235, "grad_norm": 0.058772485703229904, "learning_rate": 0.01, "loss": 1.957, "step": 108813 }, { "epoch": 11.183556012332991, "grad_norm": 0.05892009660601616, "learning_rate": 0.01, "loss": 1.9265, "step": 108816 }, { "epoch": 11.183864337101747, "grad_norm": 0.07855512946844101, "learning_rate": 0.01, "loss": 1.9487, "step": 108819 }, { "epoch": 11.184172661870504, "grad_norm": 0.07210905104875565, "learning_rate": 0.01, "loss": 1.9482, "step": 108822 }, { "epoch": 11.18448098663926, "grad_norm": 0.15390074253082275, "learning_rate": 0.01, "loss": 1.9666, "step": 108825 }, { "epoch": 11.184789311408016, "grad_norm": 0.16397853195667267, "learning_rate": 0.01, "loss": 1.9414, "step": 108828 }, { "epoch": 11.185097636176772, "grad_norm": 0.1202760562300682, "learning_rate": 0.01, "loss": 1.9461, "step": 108831 }, { "epoch": 11.185405960945529, "grad_norm": 0.056249383836984634, "learning_rate": 0.01, "loss": 1.9605, "step": 108834 }, { "epoch": 11.185714285714285, "grad_norm": 0.03812577202916145, "learning_rate": 0.01, "loss": 1.9444, "step": 108837 }, { "epoch": 11.186022610483041, "grad_norm": 0.21584199368953705, "learning_rate": 0.01, "loss": 1.9239, "step": 108840 }, { "epoch": 11.1863309352518, "grad_norm": 0.048358093947172165, "learning_rate": 0.01, "loss": 1.958, "step": 108843 }, { "epoch": 11.186639260020556, "grad_norm": 0.05890496075153351, "learning_rate": 0.01, "loss": 1.9638, "step": 108846 }, { "epoch": 11.186947584789312, "grad_norm": 0.07943232357501984, "learning_rate": 0.01, "loss": 1.9583, "step": 108849 }, { "epoch": 11.187255909558068, "grad_norm": 0.08584515005350113, "learning_rate": 0.01, "loss": 1.9611, "step": 108852 }, { "epoch": 11.187564234326825, "grad_norm": 0.03383355960249901, "learning_rate": 0.01, "loss": 1.9299, "step": 108855 }, { "epoch": 11.187872559095581, "grad_norm": 0.09473607689142227, "learning_rate": 0.01, "loss": 1.9331, "step": 108858 }, { "epoch": 11.188180883864337, "grad_norm": 0.03801342844963074, "learning_rate": 0.01, "loss": 1.9468, "step": 108861 }, { "epoch": 11.188489208633094, "grad_norm": 0.041940245777368546, "learning_rate": 0.01, "loss": 1.926, "step": 108864 }, { "epoch": 11.18879753340185, "grad_norm": 0.05744069814682007, "learning_rate": 0.01, "loss": 1.926, "step": 108867 }, { "epoch": 11.189105858170606, "grad_norm": 0.08689646422863007, "learning_rate": 0.01, "loss": 1.9688, "step": 108870 }, { "epoch": 11.189414182939363, "grad_norm": 0.06015917658805847, "learning_rate": 0.01, "loss": 1.9348, "step": 108873 }, { "epoch": 11.189722507708119, "grad_norm": 0.04627293720841408, "learning_rate": 0.01, "loss": 1.9456, "step": 108876 }, { "epoch": 11.190030832476875, "grad_norm": 0.10750222951173782, "learning_rate": 0.01, "loss": 1.9364, "step": 108879 }, { "epoch": 11.190339157245631, "grad_norm": 0.07908863574266434, "learning_rate": 0.01, "loss": 1.9536, "step": 108882 }, { "epoch": 11.190647482014388, "grad_norm": 0.09672728925943375, "learning_rate": 0.01, "loss": 1.9231, "step": 108885 }, { "epoch": 11.190955806783144, "grad_norm": 0.10572005808353424, "learning_rate": 0.01, "loss": 1.9194, "step": 108888 }, { "epoch": 11.191264131551902, "grad_norm": 0.04903019219636917, "learning_rate": 0.01, "loss": 1.9618, "step": 108891 }, { "epoch": 11.191572456320658, "grad_norm": 0.0401516817510128, "learning_rate": 0.01, "loss": 1.9473, "step": 108894 }, { "epoch": 11.191880781089415, "grad_norm": 0.050913285464048386, "learning_rate": 0.01, "loss": 1.9427, "step": 108897 }, { "epoch": 11.192189105858171, "grad_norm": 0.04478805884718895, "learning_rate": 0.01, "loss": 1.9536, "step": 108900 }, { "epoch": 11.192497430626927, "grad_norm": 0.03856386989355087, "learning_rate": 0.01, "loss": 1.9161, "step": 108903 }, { "epoch": 11.192805755395684, "grad_norm": 0.05249187722802162, "learning_rate": 0.01, "loss": 1.9366, "step": 108906 }, { "epoch": 11.19311408016444, "grad_norm": 0.0587519034743309, "learning_rate": 0.01, "loss": 1.9304, "step": 108909 }, { "epoch": 11.193422404933196, "grad_norm": 0.03957201540470123, "learning_rate": 0.01, "loss": 1.9336, "step": 108912 }, { "epoch": 11.193730729701953, "grad_norm": 0.04053035378456116, "learning_rate": 0.01, "loss": 1.9366, "step": 108915 }, { "epoch": 11.194039054470709, "grad_norm": 0.088300421833992, "learning_rate": 0.01, "loss": 1.9293, "step": 108918 }, { "epoch": 11.194347379239465, "grad_norm": 0.0719873234629631, "learning_rate": 0.01, "loss": 1.9489, "step": 108921 }, { "epoch": 11.194655704008222, "grad_norm": 0.11401339620351791, "learning_rate": 0.01, "loss": 1.945, "step": 108924 }, { "epoch": 11.194964028776978, "grad_norm": 0.12527501583099365, "learning_rate": 0.01, "loss": 1.9362, "step": 108927 }, { "epoch": 11.195272353545734, "grad_norm": 0.05526786297559738, "learning_rate": 0.01, "loss": 1.9417, "step": 108930 }, { "epoch": 11.19558067831449, "grad_norm": 0.07284065335988998, "learning_rate": 0.01, "loss": 1.9622, "step": 108933 }, { "epoch": 11.195889003083249, "grad_norm": 0.07272975146770477, "learning_rate": 0.01, "loss": 1.9692, "step": 108936 }, { "epoch": 11.196197327852005, "grad_norm": 0.03863842040300369, "learning_rate": 0.01, "loss": 1.9544, "step": 108939 }, { "epoch": 11.196505652620761, "grad_norm": 0.1017715260386467, "learning_rate": 0.01, "loss": 1.9332, "step": 108942 }, { "epoch": 11.196813977389517, "grad_norm": 0.05640571564435959, "learning_rate": 0.01, "loss": 1.9392, "step": 108945 }, { "epoch": 11.197122302158274, "grad_norm": 0.04469192400574684, "learning_rate": 0.01, "loss": 1.94, "step": 108948 }, { "epoch": 11.19743062692703, "grad_norm": 0.09084463119506836, "learning_rate": 0.01, "loss": 1.9501, "step": 108951 }, { "epoch": 11.197738951695786, "grad_norm": 0.032969359308481216, "learning_rate": 0.01, "loss": 1.9617, "step": 108954 }, { "epoch": 11.198047276464543, "grad_norm": 0.04472844675183296, "learning_rate": 0.01, "loss": 1.9271, "step": 108957 }, { "epoch": 11.198355601233299, "grad_norm": 0.08740687370300293, "learning_rate": 0.01, "loss": 1.9392, "step": 108960 }, { "epoch": 11.198663926002055, "grad_norm": 0.06060430780053139, "learning_rate": 0.01, "loss": 1.9667, "step": 108963 }, { "epoch": 11.198972250770812, "grad_norm": 0.05395602807402611, "learning_rate": 0.01, "loss": 1.9606, "step": 108966 }, { "epoch": 11.199280575539568, "grad_norm": 0.10777734220027924, "learning_rate": 0.01, "loss": 1.9529, "step": 108969 }, { "epoch": 11.199588900308324, "grad_norm": 0.04315931722521782, "learning_rate": 0.01, "loss": 1.9791, "step": 108972 }, { "epoch": 11.19989722507708, "grad_norm": 0.03909776732325554, "learning_rate": 0.01, "loss": 1.9355, "step": 108975 }, { "epoch": 11.200205549845837, "grad_norm": 0.03314677253365517, "learning_rate": 0.01, "loss": 1.9585, "step": 108978 }, { "epoch": 11.200513874614595, "grad_norm": 0.07113143801689148, "learning_rate": 0.01, "loss": 1.9245, "step": 108981 }, { "epoch": 11.200822199383351, "grad_norm": 0.09888097643852234, "learning_rate": 0.01, "loss": 1.949, "step": 108984 }, { "epoch": 11.201130524152108, "grad_norm": 0.08767660707235336, "learning_rate": 0.01, "loss": 1.9265, "step": 108987 }, { "epoch": 11.201438848920864, "grad_norm": 0.046668294817209244, "learning_rate": 0.01, "loss": 1.9476, "step": 108990 }, { "epoch": 11.20174717368962, "grad_norm": 0.1076827198266983, "learning_rate": 0.01, "loss": 1.9496, "step": 108993 }, { "epoch": 11.202055498458376, "grad_norm": 0.051028959453105927, "learning_rate": 0.01, "loss": 1.9298, "step": 108996 }, { "epoch": 11.202363823227133, "grad_norm": 0.050806645303964615, "learning_rate": 0.01, "loss": 1.9364, "step": 108999 }, { "epoch": 11.202672147995889, "grad_norm": 0.06453768163919449, "learning_rate": 0.01, "loss": 1.9593, "step": 109002 }, { "epoch": 11.202980472764645, "grad_norm": 0.0641925036907196, "learning_rate": 0.01, "loss": 1.9264, "step": 109005 }, { "epoch": 11.203288797533402, "grad_norm": 0.042618606239557266, "learning_rate": 0.01, "loss": 1.9506, "step": 109008 }, { "epoch": 11.203597122302158, "grad_norm": 0.054604362696409225, "learning_rate": 0.01, "loss": 1.9413, "step": 109011 }, { "epoch": 11.203905447070914, "grad_norm": 0.06285086274147034, "learning_rate": 0.01, "loss": 1.9461, "step": 109014 }, { "epoch": 11.20421377183967, "grad_norm": 0.07482526451349258, "learning_rate": 0.01, "loss": 1.9796, "step": 109017 }, { "epoch": 11.204522096608427, "grad_norm": 0.09341461956501007, "learning_rate": 0.01, "loss": 1.9379, "step": 109020 }, { "epoch": 11.204830421377183, "grad_norm": 0.03731429576873779, "learning_rate": 0.01, "loss": 1.95, "step": 109023 }, { "epoch": 11.20513874614594, "grad_norm": 0.14362257719039917, "learning_rate": 0.01, "loss": 1.9343, "step": 109026 }, { "epoch": 11.205447070914698, "grad_norm": 0.12169114500284195, "learning_rate": 0.01, "loss": 1.953, "step": 109029 }, { "epoch": 11.205755395683454, "grad_norm": 0.1270626336336136, "learning_rate": 0.01, "loss": 1.9273, "step": 109032 }, { "epoch": 11.20606372045221, "grad_norm": 0.07900585979223251, "learning_rate": 0.01, "loss": 1.9308, "step": 109035 }, { "epoch": 11.206372045220967, "grad_norm": 0.04565434902906418, "learning_rate": 0.01, "loss": 1.9648, "step": 109038 }, { "epoch": 11.206680369989723, "grad_norm": 0.05286426469683647, "learning_rate": 0.01, "loss": 1.9425, "step": 109041 }, { "epoch": 11.20698869475848, "grad_norm": 0.044969215989112854, "learning_rate": 0.01, "loss": 1.9429, "step": 109044 }, { "epoch": 11.207297019527235, "grad_norm": 0.05089700222015381, "learning_rate": 0.01, "loss": 1.9524, "step": 109047 }, { "epoch": 11.207605344295992, "grad_norm": 0.04107325151562691, "learning_rate": 0.01, "loss": 1.9323, "step": 109050 }, { "epoch": 11.207913669064748, "grad_norm": 0.04545219987630844, "learning_rate": 0.01, "loss": 1.9332, "step": 109053 }, { "epoch": 11.208221993833504, "grad_norm": 0.052229829132556915, "learning_rate": 0.01, "loss": 1.9302, "step": 109056 }, { "epoch": 11.20853031860226, "grad_norm": 0.09277478605508804, "learning_rate": 0.01, "loss": 1.9752, "step": 109059 }, { "epoch": 11.208838643371017, "grad_norm": 0.06279975920915604, "learning_rate": 0.01, "loss": 1.9488, "step": 109062 }, { "epoch": 11.209146968139773, "grad_norm": 0.03972071781754494, "learning_rate": 0.01, "loss": 1.9276, "step": 109065 }, { "epoch": 11.20945529290853, "grad_norm": 0.053854186087846756, "learning_rate": 0.01, "loss": 1.9444, "step": 109068 }, { "epoch": 11.209763617677286, "grad_norm": 0.046731676906347275, "learning_rate": 0.01, "loss": 1.9164, "step": 109071 }, { "epoch": 11.210071942446044, "grad_norm": 0.06777119636535645, "learning_rate": 0.01, "loss": 1.9473, "step": 109074 }, { "epoch": 11.2103802672148, "grad_norm": 0.06705444306135178, "learning_rate": 0.01, "loss": 1.946, "step": 109077 }, { "epoch": 11.210688591983557, "grad_norm": 0.08733968436717987, "learning_rate": 0.01, "loss": 1.9532, "step": 109080 }, { "epoch": 11.210996916752313, "grad_norm": 0.11957607418298721, "learning_rate": 0.01, "loss": 1.9487, "step": 109083 }, { "epoch": 11.21130524152107, "grad_norm": 0.08926189690828323, "learning_rate": 0.01, "loss": 1.9534, "step": 109086 }, { "epoch": 11.211613566289826, "grad_norm": 0.08381009101867676, "learning_rate": 0.01, "loss": 1.9349, "step": 109089 }, { "epoch": 11.211921891058582, "grad_norm": 0.05530963093042374, "learning_rate": 0.01, "loss": 1.9457, "step": 109092 }, { "epoch": 11.212230215827338, "grad_norm": 0.045270007103681564, "learning_rate": 0.01, "loss": 1.9349, "step": 109095 }, { "epoch": 11.212538540596094, "grad_norm": 0.045589692890644073, "learning_rate": 0.01, "loss": 1.933, "step": 109098 }, { "epoch": 11.21284686536485, "grad_norm": 0.08058987557888031, "learning_rate": 0.01, "loss": 1.9298, "step": 109101 }, { "epoch": 11.213155190133607, "grad_norm": 0.06342653930187225, "learning_rate": 0.01, "loss": 1.934, "step": 109104 }, { "epoch": 11.213463514902363, "grad_norm": 0.0859803557395935, "learning_rate": 0.01, "loss": 1.9548, "step": 109107 }, { "epoch": 11.21377183967112, "grad_norm": 0.11543281376361847, "learning_rate": 0.01, "loss": 1.9561, "step": 109110 }, { "epoch": 11.214080164439876, "grad_norm": 0.10394307971000671, "learning_rate": 0.01, "loss": 1.957, "step": 109113 }, { "epoch": 11.214388489208632, "grad_norm": 0.04083403944969177, "learning_rate": 0.01, "loss": 1.9415, "step": 109116 }, { "epoch": 11.21469681397739, "grad_norm": 0.11865125596523285, "learning_rate": 0.01, "loss": 1.939, "step": 109119 }, { "epoch": 11.215005138746147, "grad_norm": 0.0339047834277153, "learning_rate": 0.01, "loss": 1.9367, "step": 109122 }, { "epoch": 11.215313463514903, "grad_norm": 0.09069018065929413, "learning_rate": 0.01, "loss": 1.959, "step": 109125 }, { "epoch": 11.21562178828366, "grad_norm": 0.047034863382577896, "learning_rate": 0.01, "loss": 1.9601, "step": 109128 }, { "epoch": 11.215930113052416, "grad_norm": 0.09295161068439484, "learning_rate": 0.01, "loss": 1.9688, "step": 109131 }, { "epoch": 11.216238437821172, "grad_norm": 0.057629477232694626, "learning_rate": 0.01, "loss": 1.9156, "step": 109134 }, { "epoch": 11.216546762589928, "grad_norm": 0.09780970215797424, "learning_rate": 0.01, "loss": 1.8954, "step": 109137 }, { "epoch": 11.216855087358685, "grad_norm": 0.04168618842959404, "learning_rate": 0.01, "loss": 1.9437, "step": 109140 }, { "epoch": 11.21716341212744, "grad_norm": 0.10839787125587463, "learning_rate": 0.01, "loss": 1.9168, "step": 109143 }, { "epoch": 11.217471736896197, "grad_norm": 0.15280303359031677, "learning_rate": 0.01, "loss": 1.981, "step": 109146 }, { "epoch": 11.217780061664953, "grad_norm": 0.12073913216590881, "learning_rate": 0.01, "loss": 1.9375, "step": 109149 }, { "epoch": 11.21808838643371, "grad_norm": 0.08217319846153259, "learning_rate": 0.01, "loss": 1.9627, "step": 109152 }, { "epoch": 11.218396711202466, "grad_norm": 0.06883706897497177, "learning_rate": 0.01, "loss": 1.9323, "step": 109155 }, { "epoch": 11.218705035971222, "grad_norm": 0.0811753198504448, "learning_rate": 0.01, "loss": 1.956, "step": 109158 }, { "epoch": 11.219013360739979, "grad_norm": 0.04602133855223656, "learning_rate": 0.01, "loss": 1.9552, "step": 109161 }, { "epoch": 11.219321685508735, "grad_norm": 0.03887748718261719, "learning_rate": 0.01, "loss": 1.9503, "step": 109164 }, { "epoch": 11.219630010277493, "grad_norm": 0.062418244779109955, "learning_rate": 0.01, "loss": 1.933, "step": 109167 }, { "epoch": 11.21993833504625, "grad_norm": 0.06071825325489044, "learning_rate": 0.01, "loss": 1.9128, "step": 109170 }, { "epoch": 11.220246659815006, "grad_norm": 0.04249916225671768, "learning_rate": 0.01, "loss": 1.96, "step": 109173 }, { "epoch": 11.220554984583762, "grad_norm": 0.15064731240272522, "learning_rate": 0.01, "loss": 1.9428, "step": 109176 }, { "epoch": 11.220863309352518, "grad_norm": 0.04599601775407791, "learning_rate": 0.01, "loss": 1.9474, "step": 109179 }, { "epoch": 11.221171634121275, "grad_norm": 0.04028359055519104, "learning_rate": 0.01, "loss": 1.9498, "step": 109182 }, { "epoch": 11.221479958890031, "grad_norm": 0.034823331981897354, "learning_rate": 0.01, "loss": 1.9448, "step": 109185 }, { "epoch": 11.221788283658787, "grad_norm": 0.055492889136075974, "learning_rate": 0.01, "loss": 1.9188, "step": 109188 }, { "epoch": 11.222096608427544, "grad_norm": 0.057960398495197296, "learning_rate": 0.01, "loss": 1.9506, "step": 109191 }, { "epoch": 11.2224049331963, "grad_norm": 0.0979379266500473, "learning_rate": 0.01, "loss": 1.9271, "step": 109194 }, { "epoch": 11.222713257965056, "grad_norm": 0.04197908565402031, "learning_rate": 0.01, "loss": 1.9693, "step": 109197 }, { "epoch": 11.223021582733812, "grad_norm": 0.09842576831579208, "learning_rate": 0.01, "loss": 1.9179, "step": 109200 }, { "epoch": 11.223329907502569, "grad_norm": 0.04491826891899109, "learning_rate": 0.01, "loss": 1.9569, "step": 109203 }, { "epoch": 11.223638232271325, "grad_norm": 0.07851038873195648, "learning_rate": 0.01, "loss": 1.9235, "step": 109206 }, { "epoch": 11.223946557040081, "grad_norm": 0.05620894581079483, "learning_rate": 0.01, "loss": 1.9379, "step": 109209 }, { "epoch": 11.22425488180884, "grad_norm": 0.10865885764360428, "learning_rate": 0.01, "loss": 1.9813, "step": 109212 }, { "epoch": 11.224563206577596, "grad_norm": 0.05403037741780281, "learning_rate": 0.01, "loss": 1.9268, "step": 109215 }, { "epoch": 11.224871531346352, "grad_norm": 0.06059322878718376, "learning_rate": 0.01, "loss": 1.928, "step": 109218 }, { "epoch": 11.225179856115108, "grad_norm": 0.08516819775104523, "learning_rate": 0.01, "loss": 1.9494, "step": 109221 }, { "epoch": 11.225488180883865, "grad_norm": 0.07061944156885147, "learning_rate": 0.01, "loss": 1.9628, "step": 109224 }, { "epoch": 11.225796505652621, "grad_norm": 0.039020419120788574, "learning_rate": 0.01, "loss": 1.9542, "step": 109227 }, { "epoch": 11.226104830421377, "grad_norm": 0.06967667490243912, "learning_rate": 0.01, "loss": 1.9392, "step": 109230 }, { "epoch": 11.226413155190134, "grad_norm": 0.06568337231874466, "learning_rate": 0.01, "loss": 1.9381, "step": 109233 }, { "epoch": 11.22672147995889, "grad_norm": 0.09274667501449585, "learning_rate": 0.01, "loss": 1.9337, "step": 109236 }, { "epoch": 11.227029804727646, "grad_norm": 0.06000722572207451, "learning_rate": 0.01, "loss": 1.9186, "step": 109239 }, { "epoch": 11.227338129496403, "grad_norm": 0.04201046749949455, "learning_rate": 0.01, "loss": 1.9609, "step": 109242 }, { "epoch": 11.227646454265159, "grad_norm": 0.03122475929558277, "learning_rate": 0.01, "loss": 1.9487, "step": 109245 }, { "epoch": 11.227954779033915, "grad_norm": 0.05247949808835983, "learning_rate": 0.01, "loss": 1.9412, "step": 109248 }, { "epoch": 11.228263103802671, "grad_norm": 0.1298094391822815, "learning_rate": 0.01, "loss": 1.921, "step": 109251 }, { "epoch": 11.228571428571428, "grad_norm": 0.05516472086310387, "learning_rate": 0.01, "loss": 1.9325, "step": 109254 }, { "epoch": 11.228879753340186, "grad_norm": 0.05649101734161377, "learning_rate": 0.01, "loss": 1.9384, "step": 109257 }, { "epoch": 11.229188078108942, "grad_norm": 0.0960668995976448, "learning_rate": 0.01, "loss": 1.9408, "step": 109260 }, { "epoch": 11.229496402877698, "grad_norm": 0.09434793889522552, "learning_rate": 0.01, "loss": 1.9542, "step": 109263 }, { "epoch": 11.229804727646455, "grad_norm": 0.07487189769744873, "learning_rate": 0.01, "loss": 1.9373, "step": 109266 }, { "epoch": 11.230113052415211, "grad_norm": 0.06864513456821442, "learning_rate": 0.01, "loss": 1.9382, "step": 109269 }, { "epoch": 11.230421377183967, "grad_norm": 0.12508641183376312, "learning_rate": 0.01, "loss": 1.9304, "step": 109272 }, { "epoch": 11.230729701952724, "grad_norm": 0.04709652438759804, "learning_rate": 0.01, "loss": 1.9654, "step": 109275 }, { "epoch": 11.23103802672148, "grad_norm": 0.1833847165107727, "learning_rate": 0.01, "loss": 1.9851, "step": 109278 }, { "epoch": 11.231346351490236, "grad_norm": 0.06030800938606262, "learning_rate": 0.01, "loss": 1.9282, "step": 109281 }, { "epoch": 11.231654676258993, "grad_norm": 0.04705330729484558, "learning_rate": 0.01, "loss": 1.9643, "step": 109284 }, { "epoch": 11.231963001027749, "grad_norm": 0.05176223814487457, "learning_rate": 0.01, "loss": 1.9507, "step": 109287 }, { "epoch": 11.232271325796505, "grad_norm": 0.036808304488658905, "learning_rate": 0.01, "loss": 1.9303, "step": 109290 }, { "epoch": 11.232579650565262, "grad_norm": 0.05182893946766853, "learning_rate": 0.01, "loss": 1.9419, "step": 109293 }, { "epoch": 11.232887975334018, "grad_norm": 0.04240572452545166, "learning_rate": 0.01, "loss": 1.9528, "step": 109296 }, { "epoch": 11.233196300102774, "grad_norm": 0.06471949815750122, "learning_rate": 0.01, "loss": 1.9545, "step": 109299 }, { "epoch": 11.233504624871532, "grad_norm": 0.09615979343652725, "learning_rate": 0.01, "loss": 1.961, "step": 109302 }, { "epoch": 11.233812949640289, "grad_norm": 0.16381511092185974, "learning_rate": 0.01, "loss": 1.9501, "step": 109305 }, { "epoch": 11.234121274409045, "grad_norm": 0.10431195050477982, "learning_rate": 0.01, "loss": 1.9579, "step": 109308 }, { "epoch": 11.234429599177801, "grad_norm": 0.10584813356399536, "learning_rate": 0.01, "loss": 1.9303, "step": 109311 }, { "epoch": 11.234737923946557, "grad_norm": 0.06652256101369858, "learning_rate": 0.01, "loss": 1.9425, "step": 109314 }, { "epoch": 11.235046248715314, "grad_norm": 0.054468441754579544, "learning_rate": 0.01, "loss": 1.9266, "step": 109317 }, { "epoch": 11.23535457348407, "grad_norm": 0.03153740614652634, "learning_rate": 0.01, "loss": 1.9368, "step": 109320 }, { "epoch": 11.235662898252826, "grad_norm": 0.05361641198396683, "learning_rate": 0.01, "loss": 1.9602, "step": 109323 }, { "epoch": 11.235971223021583, "grad_norm": 0.16195333003997803, "learning_rate": 0.01, "loss": 1.9527, "step": 109326 }, { "epoch": 11.236279547790339, "grad_norm": 0.050422552973032, "learning_rate": 0.01, "loss": 1.9279, "step": 109329 }, { "epoch": 11.236587872559095, "grad_norm": 0.057867612689733505, "learning_rate": 0.01, "loss": 1.9332, "step": 109332 }, { "epoch": 11.236896197327852, "grad_norm": 0.07480079680681229, "learning_rate": 0.01, "loss": 1.9293, "step": 109335 }, { "epoch": 11.237204522096608, "grad_norm": 0.05645005404949188, "learning_rate": 0.01, "loss": 1.9448, "step": 109338 }, { "epoch": 11.237512846865364, "grad_norm": 0.05145471915602684, "learning_rate": 0.01, "loss": 1.9477, "step": 109341 }, { "epoch": 11.23782117163412, "grad_norm": 0.03488278388977051, "learning_rate": 0.01, "loss": 1.9418, "step": 109344 }, { "epoch": 11.238129496402877, "grad_norm": 0.050637271255254745, "learning_rate": 0.01, "loss": 1.9418, "step": 109347 }, { "epoch": 11.238437821171635, "grad_norm": 0.06615863740444183, "learning_rate": 0.01, "loss": 1.9356, "step": 109350 }, { "epoch": 11.238746145940391, "grad_norm": 0.03594106063246727, "learning_rate": 0.01, "loss": 1.9326, "step": 109353 }, { "epoch": 11.239054470709148, "grad_norm": 0.07424032688140869, "learning_rate": 0.01, "loss": 1.9398, "step": 109356 }, { "epoch": 11.239362795477904, "grad_norm": 0.10016637295484543, "learning_rate": 0.01, "loss": 1.9374, "step": 109359 }, { "epoch": 11.23967112024666, "grad_norm": 0.09463337808847427, "learning_rate": 0.01, "loss": 1.9531, "step": 109362 }, { "epoch": 11.239979445015416, "grad_norm": 0.041809163987636566, "learning_rate": 0.01, "loss": 1.9433, "step": 109365 }, { "epoch": 11.240287769784173, "grad_norm": 0.08738263696432114, "learning_rate": 0.01, "loss": 1.9362, "step": 109368 }, { "epoch": 11.240596094552929, "grad_norm": 0.13975435495376587, "learning_rate": 0.01, "loss": 1.9502, "step": 109371 }, { "epoch": 11.240904419321685, "grad_norm": 0.059366509318351746, "learning_rate": 0.01, "loss": 1.9703, "step": 109374 }, { "epoch": 11.241212744090442, "grad_norm": 0.0640646293759346, "learning_rate": 0.01, "loss": 1.9486, "step": 109377 }, { "epoch": 11.241521068859198, "grad_norm": 0.04212968051433563, "learning_rate": 0.01, "loss": 1.9485, "step": 109380 }, { "epoch": 11.241829393627954, "grad_norm": 0.04662049189209938, "learning_rate": 0.01, "loss": 1.9481, "step": 109383 }, { "epoch": 11.24213771839671, "grad_norm": 0.06030857935547829, "learning_rate": 0.01, "loss": 1.9058, "step": 109386 }, { "epoch": 11.242446043165467, "grad_norm": 0.0616946667432785, "learning_rate": 0.01, "loss": 1.9165, "step": 109389 }, { "epoch": 11.242754367934223, "grad_norm": 0.06546429544687271, "learning_rate": 0.01, "loss": 1.9533, "step": 109392 }, { "epoch": 11.243062692702981, "grad_norm": 0.04250052571296692, "learning_rate": 0.01, "loss": 1.9595, "step": 109395 }, { "epoch": 11.243371017471738, "grad_norm": 0.04504207894206047, "learning_rate": 0.01, "loss": 1.8971, "step": 109398 }, { "epoch": 11.243679342240494, "grad_norm": 0.044746581465005875, "learning_rate": 0.01, "loss": 1.9246, "step": 109401 }, { "epoch": 11.24398766700925, "grad_norm": 0.0798160657286644, "learning_rate": 0.01, "loss": 1.9463, "step": 109404 }, { "epoch": 11.244295991778007, "grad_norm": 0.09753091633319855, "learning_rate": 0.01, "loss": 1.9563, "step": 109407 }, { "epoch": 11.244604316546763, "grad_norm": 0.1443919986486435, "learning_rate": 0.01, "loss": 1.9611, "step": 109410 }, { "epoch": 11.24491264131552, "grad_norm": 0.11860787868499756, "learning_rate": 0.01, "loss": 1.9504, "step": 109413 }, { "epoch": 11.245220966084275, "grad_norm": 0.046349428594112396, "learning_rate": 0.01, "loss": 1.9714, "step": 109416 }, { "epoch": 11.245529290853032, "grad_norm": 0.03825264796614647, "learning_rate": 0.01, "loss": 1.9407, "step": 109419 }, { "epoch": 11.245837615621788, "grad_norm": 0.03915859013795853, "learning_rate": 0.01, "loss": 1.936, "step": 109422 }, { "epoch": 11.246145940390544, "grad_norm": 0.04815271124243736, "learning_rate": 0.01, "loss": 1.9458, "step": 109425 }, { "epoch": 11.2464542651593, "grad_norm": 0.06440318375825882, "learning_rate": 0.01, "loss": 1.9582, "step": 109428 }, { "epoch": 11.246762589928057, "grad_norm": 0.07187019288539886, "learning_rate": 0.01, "loss": 1.92, "step": 109431 }, { "epoch": 11.247070914696813, "grad_norm": 0.07239823788404465, "learning_rate": 0.01, "loss": 1.9398, "step": 109434 }, { "epoch": 11.24737923946557, "grad_norm": 0.08541743457317352, "learning_rate": 0.01, "loss": 1.9229, "step": 109437 }, { "epoch": 11.247687564234328, "grad_norm": 0.046377550810575485, "learning_rate": 0.01, "loss": 1.9263, "step": 109440 }, { "epoch": 11.247995889003084, "grad_norm": 0.12575428187847137, "learning_rate": 0.01, "loss": 1.9513, "step": 109443 }, { "epoch": 11.24830421377184, "grad_norm": 0.14709772169589996, "learning_rate": 0.01, "loss": 1.9388, "step": 109446 }, { "epoch": 11.248612538540597, "grad_norm": 0.05810967832803726, "learning_rate": 0.01, "loss": 1.9367, "step": 109449 }, { "epoch": 11.248920863309353, "grad_norm": 0.04419909790158272, "learning_rate": 0.01, "loss": 1.9538, "step": 109452 }, { "epoch": 11.24922918807811, "grad_norm": 0.06409557908773422, "learning_rate": 0.01, "loss": 1.944, "step": 109455 }, { "epoch": 11.249537512846866, "grad_norm": 0.03288624808192253, "learning_rate": 0.01, "loss": 1.9345, "step": 109458 }, { "epoch": 11.249845837615622, "grad_norm": 0.05867474898695946, "learning_rate": 0.01, "loss": 1.9385, "step": 109461 }, { "epoch": 11.250154162384378, "grad_norm": 0.08372747153043747, "learning_rate": 0.01, "loss": 1.943, "step": 109464 }, { "epoch": 11.250462487153134, "grad_norm": 0.05529605224728584, "learning_rate": 0.01, "loss": 1.9391, "step": 109467 }, { "epoch": 11.25077081192189, "grad_norm": 0.09608209878206253, "learning_rate": 0.01, "loss": 1.9213, "step": 109470 }, { "epoch": 11.251079136690647, "grad_norm": 0.04316975548863411, "learning_rate": 0.01, "loss": 1.9311, "step": 109473 }, { "epoch": 11.251387461459403, "grad_norm": 0.06034945324063301, "learning_rate": 0.01, "loss": 1.9251, "step": 109476 }, { "epoch": 11.25169578622816, "grad_norm": 0.08070293813943863, "learning_rate": 0.01, "loss": 1.915, "step": 109479 }, { "epoch": 11.252004110996916, "grad_norm": 0.1095161959528923, "learning_rate": 0.01, "loss": 1.9477, "step": 109482 }, { "epoch": 11.252312435765674, "grad_norm": 0.038463711738586426, "learning_rate": 0.01, "loss": 1.9464, "step": 109485 }, { "epoch": 11.25262076053443, "grad_norm": 0.04902822524309158, "learning_rate": 0.01, "loss": 1.9376, "step": 109488 }, { "epoch": 11.252929085303187, "grad_norm": 0.03326786682009697, "learning_rate": 0.01, "loss": 1.9081, "step": 109491 }, { "epoch": 11.253237410071943, "grad_norm": 0.045016270130872726, "learning_rate": 0.01, "loss": 1.9322, "step": 109494 }, { "epoch": 11.2535457348407, "grad_norm": 0.05962817743420601, "learning_rate": 0.01, "loss": 1.9687, "step": 109497 }, { "epoch": 11.253854059609456, "grad_norm": 0.07711559534072876, "learning_rate": 0.01, "loss": 1.9404, "step": 109500 }, { "epoch": 11.254162384378212, "grad_norm": 0.05092623457312584, "learning_rate": 0.01, "loss": 1.9417, "step": 109503 }, { "epoch": 11.254470709146968, "grad_norm": 0.054487742483615875, "learning_rate": 0.01, "loss": 1.9373, "step": 109506 }, { "epoch": 11.254779033915725, "grad_norm": 0.03977763652801514, "learning_rate": 0.01, "loss": 1.9691, "step": 109509 }, { "epoch": 11.25508735868448, "grad_norm": 0.03340889886021614, "learning_rate": 0.01, "loss": 1.96, "step": 109512 }, { "epoch": 11.255395683453237, "grad_norm": 0.05879076197743416, "learning_rate": 0.01, "loss": 1.9522, "step": 109515 }, { "epoch": 11.255704008221993, "grad_norm": 0.1080188900232315, "learning_rate": 0.01, "loss": 1.918, "step": 109518 }, { "epoch": 11.25601233299075, "grad_norm": 0.05137190967798233, "learning_rate": 0.01, "loss": 1.9457, "step": 109521 }, { "epoch": 11.256320657759506, "grad_norm": 0.1083584800362587, "learning_rate": 0.01, "loss": 1.9401, "step": 109524 }, { "epoch": 11.256628982528262, "grad_norm": 0.05661296844482422, "learning_rate": 0.01, "loss": 1.9066, "step": 109527 }, { "epoch": 11.256937307297019, "grad_norm": 0.11602769047021866, "learning_rate": 0.01, "loss": 1.9655, "step": 109530 }, { "epoch": 11.257245632065777, "grad_norm": 0.06640040129423141, "learning_rate": 0.01, "loss": 1.956, "step": 109533 }, { "epoch": 11.257553956834533, "grad_norm": 0.054868511855602264, "learning_rate": 0.01, "loss": 1.9386, "step": 109536 }, { "epoch": 11.25786228160329, "grad_norm": 0.053349707275629044, "learning_rate": 0.01, "loss": 1.9465, "step": 109539 }, { "epoch": 11.258170606372046, "grad_norm": 0.04147634282708168, "learning_rate": 0.01, "loss": 1.9377, "step": 109542 }, { "epoch": 11.258478931140802, "grad_norm": 0.04902909696102142, "learning_rate": 0.01, "loss": 1.9465, "step": 109545 }, { "epoch": 11.258787255909558, "grad_norm": 0.10914880037307739, "learning_rate": 0.01, "loss": 1.9159, "step": 109548 }, { "epoch": 11.259095580678315, "grad_norm": 0.1410733461380005, "learning_rate": 0.01, "loss": 1.9643, "step": 109551 }, { "epoch": 11.259403905447071, "grad_norm": 0.07386939227581024, "learning_rate": 0.01, "loss": 1.9335, "step": 109554 }, { "epoch": 11.259712230215827, "grad_norm": 0.08032076060771942, "learning_rate": 0.01, "loss": 1.9537, "step": 109557 }, { "epoch": 11.260020554984584, "grad_norm": 0.03709203377366066, "learning_rate": 0.01, "loss": 1.927, "step": 109560 }, { "epoch": 11.26032887975334, "grad_norm": 0.0418517105281353, "learning_rate": 0.01, "loss": 1.9421, "step": 109563 }, { "epoch": 11.260637204522096, "grad_norm": 0.039387185126543045, "learning_rate": 0.01, "loss": 1.9379, "step": 109566 }, { "epoch": 11.260945529290852, "grad_norm": 0.06155497208237648, "learning_rate": 0.01, "loss": 1.948, "step": 109569 }, { "epoch": 11.261253854059609, "grad_norm": 0.1154722049832344, "learning_rate": 0.01, "loss": 1.9148, "step": 109572 }, { "epoch": 11.261562178828365, "grad_norm": 0.05332230404019356, "learning_rate": 0.01, "loss": 1.974, "step": 109575 }, { "epoch": 11.261870503597123, "grad_norm": 0.05801251530647278, "learning_rate": 0.01, "loss": 1.9637, "step": 109578 }, { "epoch": 11.26217882836588, "grad_norm": 0.05469183251261711, "learning_rate": 0.01, "loss": 1.9281, "step": 109581 }, { "epoch": 11.262487153134636, "grad_norm": 0.042408592998981476, "learning_rate": 0.01, "loss": 1.9487, "step": 109584 }, { "epoch": 11.262795477903392, "grad_norm": 0.05151085928082466, "learning_rate": 0.01, "loss": 1.9337, "step": 109587 }, { "epoch": 11.263103802672148, "grad_norm": 0.060142409056425095, "learning_rate": 0.01, "loss": 1.9408, "step": 109590 }, { "epoch": 11.263412127440905, "grad_norm": 0.10095762461423874, "learning_rate": 0.01, "loss": 1.9419, "step": 109593 }, { "epoch": 11.263720452209661, "grad_norm": 0.05893385782837868, "learning_rate": 0.01, "loss": 1.9155, "step": 109596 }, { "epoch": 11.264028776978417, "grad_norm": 0.09010785818099976, "learning_rate": 0.01, "loss": 1.933, "step": 109599 }, { "epoch": 11.264337101747174, "grad_norm": 0.08859475702047348, "learning_rate": 0.01, "loss": 1.9419, "step": 109602 }, { "epoch": 11.26464542651593, "grad_norm": 0.09706424921751022, "learning_rate": 0.01, "loss": 1.9358, "step": 109605 }, { "epoch": 11.264953751284686, "grad_norm": 0.0626566931605339, "learning_rate": 0.01, "loss": 1.9467, "step": 109608 }, { "epoch": 11.265262076053443, "grad_norm": 0.04158549755811691, "learning_rate": 0.01, "loss": 1.9312, "step": 109611 }, { "epoch": 11.265570400822199, "grad_norm": 0.06176656857132912, "learning_rate": 0.01, "loss": 1.9407, "step": 109614 }, { "epoch": 11.265878725590955, "grad_norm": 0.07009613513946533, "learning_rate": 0.01, "loss": 1.9248, "step": 109617 }, { "epoch": 11.266187050359711, "grad_norm": 0.07909108698368073, "learning_rate": 0.01, "loss": 1.9444, "step": 109620 }, { "epoch": 11.266495375128468, "grad_norm": 0.07806423306465149, "learning_rate": 0.01, "loss": 1.9414, "step": 109623 }, { "epoch": 11.266803699897226, "grad_norm": 0.05793457478284836, "learning_rate": 0.01, "loss": 1.946, "step": 109626 }, { "epoch": 11.267112024665982, "grad_norm": 0.05238494277000427, "learning_rate": 0.01, "loss": 1.9262, "step": 109629 }, { "epoch": 11.267420349434738, "grad_norm": 0.03375493735074997, "learning_rate": 0.01, "loss": 1.9262, "step": 109632 }, { "epoch": 11.267728674203495, "grad_norm": 0.04535922035574913, "learning_rate": 0.01, "loss": 1.9658, "step": 109635 }, { "epoch": 11.268036998972251, "grad_norm": 0.04779687523841858, "learning_rate": 0.01, "loss": 1.9282, "step": 109638 }, { "epoch": 11.268345323741007, "grad_norm": 0.07404086738824844, "learning_rate": 0.01, "loss": 1.9593, "step": 109641 }, { "epoch": 11.268653648509764, "grad_norm": 0.07996059954166412, "learning_rate": 0.01, "loss": 1.9591, "step": 109644 }, { "epoch": 11.26896197327852, "grad_norm": 0.11115096509456635, "learning_rate": 0.01, "loss": 1.941, "step": 109647 }, { "epoch": 11.269270298047276, "grad_norm": 0.0490814708173275, "learning_rate": 0.01, "loss": 1.93, "step": 109650 }, { "epoch": 11.269578622816033, "grad_norm": 0.11355841159820557, "learning_rate": 0.01, "loss": 1.9348, "step": 109653 }, { "epoch": 11.269886947584789, "grad_norm": 0.029677560552954674, "learning_rate": 0.01, "loss": 1.937, "step": 109656 }, { "epoch": 11.270195272353545, "grad_norm": 0.05585773289203644, "learning_rate": 0.01, "loss": 1.9462, "step": 109659 }, { "epoch": 11.270503597122302, "grad_norm": 0.126790851354599, "learning_rate": 0.01, "loss": 1.9392, "step": 109662 }, { "epoch": 11.270811921891058, "grad_norm": 0.0652090534567833, "learning_rate": 0.01, "loss": 1.9582, "step": 109665 }, { "epoch": 11.271120246659814, "grad_norm": 0.04160231351852417, "learning_rate": 0.01, "loss": 1.9414, "step": 109668 }, { "epoch": 11.271428571428572, "grad_norm": 0.07735766470432281, "learning_rate": 0.01, "loss": 1.9315, "step": 109671 }, { "epoch": 11.271736896197329, "grad_norm": 0.03592461347579956, "learning_rate": 0.01, "loss": 1.9272, "step": 109674 }, { "epoch": 11.272045220966085, "grad_norm": 0.042836081236600876, "learning_rate": 0.01, "loss": 1.938, "step": 109677 }, { "epoch": 11.272353545734841, "grad_norm": 0.03916572406888008, "learning_rate": 0.01, "loss": 1.9262, "step": 109680 }, { "epoch": 11.272661870503597, "grad_norm": 0.04746629670262337, "learning_rate": 0.01, "loss": 1.9265, "step": 109683 }, { "epoch": 11.272970195272354, "grad_norm": 0.052999433130025864, "learning_rate": 0.01, "loss": 1.9253, "step": 109686 }, { "epoch": 11.27327852004111, "grad_norm": 0.047792401164770126, "learning_rate": 0.01, "loss": 1.9411, "step": 109689 }, { "epoch": 11.273586844809866, "grad_norm": 0.05411859229207039, "learning_rate": 0.01, "loss": 1.9423, "step": 109692 }, { "epoch": 11.273895169578623, "grad_norm": 0.041686661541461945, "learning_rate": 0.01, "loss": 1.9467, "step": 109695 }, { "epoch": 11.274203494347379, "grad_norm": 0.061750467866659164, "learning_rate": 0.01, "loss": 1.9434, "step": 109698 }, { "epoch": 11.274511819116135, "grad_norm": 0.05879466235637665, "learning_rate": 0.01, "loss": 1.9478, "step": 109701 }, { "epoch": 11.274820143884892, "grad_norm": 0.07283945381641388, "learning_rate": 0.01, "loss": 1.93, "step": 109704 }, { "epoch": 11.275128468653648, "grad_norm": 0.0472678504884243, "learning_rate": 0.01, "loss": 1.9462, "step": 109707 }, { "epoch": 11.275436793422404, "grad_norm": 0.12681566178798676, "learning_rate": 0.01, "loss": 1.9381, "step": 109710 }, { "epoch": 11.27574511819116, "grad_norm": 0.033542435616254807, "learning_rate": 0.01, "loss": 1.9198, "step": 109713 }, { "epoch": 11.276053442959919, "grad_norm": 0.04590669274330139, "learning_rate": 0.01, "loss": 1.9326, "step": 109716 }, { "epoch": 11.276361767728675, "grad_norm": 0.10272151976823807, "learning_rate": 0.01, "loss": 1.9414, "step": 109719 }, { "epoch": 11.276670092497431, "grad_norm": 0.06324021518230438, "learning_rate": 0.01, "loss": 1.934, "step": 109722 }, { "epoch": 11.276978417266188, "grad_norm": 0.05721462517976761, "learning_rate": 0.01, "loss": 1.9567, "step": 109725 }, { "epoch": 11.277286742034944, "grad_norm": 0.04144064337015152, "learning_rate": 0.01, "loss": 1.9502, "step": 109728 }, { "epoch": 11.2775950668037, "grad_norm": 0.041763003915548325, "learning_rate": 0.01, "loss": 1.9156, "step": 109731 }, { "epoch": 11.277903391572456, "grad_norm": 0.044674765318632126, "learning_rate": 0.01, "loss": 1.9183, "step": 109734 }, { "epoch": 11.278211716341213, "grad_norm": 0.04908163473010063, "learning_rate": 0.01, "loss": 1.9092, "step": 109737 }, { "epoch": 11.278520041109969, "grad_norm": 0.06857731193304062, "learning_rate": 0.01, "loss": 1.9677, "step": 109740 }, { "epoch": 11.278828365878725, "grad_norm": 0.07278558611869812, "learning_rate": 0.01, "loss": 1.9242, "step": 109743 }, { "epoch": 11.279136690647482, "grad_norm": 0.07408792525529861, "learning_rate": 0.01, "loss": 1.9492, "step": 109746 }, { "epoch": 11.279445015416238, "grad_norm": 0.04330144822597504, "learning_rate": 0.01, "loss": 1.9281, "step": 109749 }, { "epoch": 11.279753340184994, "grad_norm": 0.06111234799027443, "learning_rate": 0.01, "loss": 1.9503, "step": 109752 }, { "epoch": 11.28006166495375, "grad_norm": 0.14030754566192627, "learning_rate": 0.01, "loss": 1.9335, "step": 109755 }, { "epoch": 11.280369989722507, "grad_norm": 0.05981336161494255, "learning_rate": 0.01, "loss": 1.9267, "step": 109758 }, { "epoch": 11.280678314491265, "grad_norm": 0.06106378883123398, "learning_rate": 0.01, "loss": 1.9217, "step": 109761 }, { "epoch": 11.280986639260021, "grad_norm": 0.033522509038448334, "learning_rate": 0.01, "loss": 1.948, "step": 109764 }, { "epoch": 11.281294964028778, "grad_norm": 0.03565683215856552, "learning_rate": 0.01, "loss": 1.9357, "step": 109767 }, { "epoch": 11.281603288797534, "grad_norm": 0.035627976059913635, "learning_rate": 0.01, "loss": 1.9482, "step": 109770 }, { "epoch": 11.28191161356629, "grad_norm": 0.06138899549841881, "learning_rate": 0.01, "loss": 1.9306, "step": 109773 }, { "epoch": 11.282219938335047, "grad_norm": 0.08396279811859131, "learning_rate": 0.01, "loss": 1.9397, "step": 109776 }, { "epoch": 11.282528263103803, "grad_norm": 0.06644977629184723, "learning_rate": 0.01, "loss": 1.9341, "step": 109779 }, { "epoch": 11.28283658787256, "grad_norm": 0.049319833517074585, "learning_rate": 0.01, "loss": 1.9648, "step": 109782 }, { "epoch": 11.283144912641315, "grad_norm": 0.0407063327729702, "learning_rate": 0.01, "loss": 1.9299, "step": 109785 }, { "epoch": 11.283453237410072, "grad_norm": 0.0696980431675911, "learning_rate": 0.01, "loss": 1.9306, "step": 109788 }, { "epoch": 11.283761562178828, "grad_norm": 0.05926033854484558, "learning_rate": 0.01, "loss": 1.9544, "step": 109791 }, { "epoch": 11.284069886947584, "grad_norm": 0.04289416968822479, "learning_rate": 0.01, "loss": 1.9357, "step": 109794 }, { "epoch": 11.28437821171634, "grad_norm": 0.03649679198861122, "learning_rate": 0.01, "loss": 1.9199, "step": 109797 }, { "epoch": 11.284686536485097, "grad_norm": 0.11007057130336761, "learning_rate": 0.01, "loss": 1.961, "step": 109800 }, { "epoch": 11.284994861253853, "grad_norm": 0.051835693418979645, "learning_rate": 0.01, "loss": 1.9414, "step": 109803 }, { "epoch": 11.28530318602261, "grad_norm": 0.03663384169340134, "learning_rate": 0.01, "loss": 1.9489, "step": 109806 }, { "epoch": 11.285611510791368, "grad_norm": 0.0417393334209919, "learning_rate": 0.01, "loss": 1.9479, "step": 109809 }, { "epoch": 11.285919835560124, "grad_norm": 0.08153456449508667, "learning_rate": 0.01, "loss": 1.9286, "step": 109812 }, { "epoch": 11.28622816032888, "grad_norm": 0.05325230956077576, "learning_rate": 0.01, "loss": 1.9454, "step": 109815 }, { "epoch": 11.286536485097637, "grad_norm": 0.05065246298909187, "learning_rate": 0.01, "loss": 1.9573, "step": 109818 }, { "epoch": 11.286844809866393, "grad_norm": 0.144111767411232, "learning_rate": 0.01, "loss": 1.9131, "step": 109821 }, { "epoch": 11.28715313463515, "grad_norm": 0.09719037264585495, "learning_rate": 0.01, "loss": 1.9275, "step": 109824 }, { "epoch": 11.287461459403906, "grad_norm": 0.03678983822464943, "learning_rate": 0.01, "loss": 1.9388, "step": 109827 }, { "epoch": 11.287769784172662, "grad_norm": 0.08810188621282578, "learning_rate": 0.01, "loss": 1.9373, "step": 109830 }, { "epoch": 11.288078108941418, "grad_norm": 0.05677026882767677, "learning_rate": 0.01, "loss": 1.9534, "step": 109833 }, { "epoch": 11.288386433710174, "grad_norm": 0.09825364500284195, "learning_rate": 0.01, "loss": 1.9453, "step": 109836 }, { "epoch": 11.28869475847893, "grad_norm": 0.061190493404865265, "learning_rate": 0.01, "loss": 1.9612, "step": 109839 }, { "epoch": 11.289003083247687, "grad_norm": 0.060511138290166855, "learning_rate": 0.01, "loss": 1.9501, "step": 109842 }, { "epoch": 11.289311408016443, "grad_norm": 0.056555476039648056, "learning_rate": 0.01, "loss": 1.9248, "step": 109845 }, { "epoch": 11.2896197327852, "grad_norm": 0.07167981564998627, "learning_rate": 0.01, "loss": 1.9586, "step": 109848 }, { "epoch": 11.289928057553956, "grad_norm": 0.08068525046110153, "learning_rate": 0.01, "loss": 1.9401, "step": 109851 }, { "epoch": 11.290236382322714, "grad_norm": 0.07005833089351654, "learning_rate": 0.01, "loss": 1.9316, "step": 109854 }, { "epoch": 11.29054470709147, "grad_norm": 0.05481821298599243, "learning_rate": 0.01, "loss": 1.9573, "step": 109857 }, { "epoch": 11.290853031860227, "grad_norm": 0.0465649776160717, "learning_rate": 0.01, "loss": 1.9418, "step": 109860 }, { "epoch": 11.291161356628983, "grad_norm": 0.05188848450779915, "learning_rate": 0.01, "loss": 1.9382, "step": 109863 }, { "epoch": 11.29146968139774, "grad_norm": 0.04130940139293671, "learning_rate": 0.01, "loss": 1.9259, "step": 109866 }, { "epoch": 11.291778006166496, "grad_norm": 0.10015565901994705, "learning_rate": 0.01, "loss": 1.9522, "step": 109869 }, { "epoch": 11.292086330935252, "grad_norm": 0.042796485126018524, "learning_rate": 0.01, "loss": 1.9584, "step": 109872 }, { "epoch": 11.292394655704008, "grad_norm": 0.12834590673446655, "learning_rate": 0.01, "loss": 1.9281, "step": 109875 }, { "epoch": 11.292702980472765, "grad_norm": 0.17933888733386993, "learning_rate": 0.01, "loss": 1.9564, "step": 109878 }, { "epoch": 11.29301130524152, "grad_norm": 0.10874367505311966, "learning_rate": 0.01, "loss": 1.9686, "step": 109881 }, { "epoch": 11.293319630010277, "grad_norm": 0.11151270568370819, "learning_rate": 0.01, "loss": 1.9357, "step": 109884 }, { "epoch": 11.293627954779033, "grad_norm": 0.06689925491809845, "learning_rate": 0.01, "loss": 1.9344, "step": 109887 }, { "epoch": 11.29393627954779, "grad_norm": 0.04171884059906006, "learning_rate": 0.01, "loss": 1.9309, "step": 109890 }, { "epoch": 11.294244604316546, "grad_norm": 0.04404659941792488, "learning_rate": 0.01, "loss": 1.9135, "step": 109893 }, { "epoch": 11.294552929085302, "grad_norm": 0.05716705322265625, "learning_rate": 0.01, "loss": 1.9538, "step": 109896 }, { "epoch": 11.294861253854059, "grad_norm": 0.034998469054698944, "learning_rate": 0.01, "loss": 1.943, "step": 109899 }, { "epoch": 11.295169578622817, "grad_norm": 0.04066811501979828, "learning_rate": 0.01, "loss": 1.9366, "step": 109902 }, { "epoch": 11.295477903391573, "grad_norm": 0.10792528092861176, "learning_rate": 0.01, "loss": 1.9671, "step": 109905 }, { "epoch": 11.29578622816033, "grad_norm": 0.0576433427631855, "learning_rate": 0.01, "loss": 1.934, "step": 109908 }, { "epoch": 11.296094552929086, "grad_norm": 0.11625795811414719, "learning_rate": 0.01, "loss": 1.9361, "step": 109911 }, { "epoch": 11.296402877697842, "grad_norm": 0.03960056230425835, "learning_rate": 0.01, "loss": 1.936, "step": 109914 }, { "epoch": 11.296711202466598, "grad_norm": 0.045928679406642914, "learning_rate": 0.01, "loss": 1.946, "step": 109917 }, { "epoch": 11.297019527235355, "grad_norm": 0.05385993421077728, "learning_rate": 0.01, "loss": 1.9316, "step": 109920 }, { "epoch": 11.297327852004111, "grad_norm": 0.059563588351011276, "learning_rate": 0.01, "loss": 1.9268, "step": 109923 }, { "epoch": 11.297636176772867, "grad_norm": 0.04696774110198021, "learning_rate": 0.01, "loss": 1.9572, "step": 109926 }, { "epoch": 11.297944501541624, "grad_norm": 0.061480265110731125, "learning_rate": 0.01, "loss": 1.9455, "step": 109929 }, { "epoch": 11.29825282631038, "grad_norm": 0.07148461043834686, "learning_rate": 0.01, "loss": 1.9297, "step": 109932 }, { "epoch": 11.298561151079136, "grad_norm": 0.048205867409706116, "learning_rate": 0.01, "loss": 1.9317, "step": 109935 }, { "epoch": 11.298869475847892, "grad_norm": 0.031730081886053085, "learning_rate": 0.01, "loss": 1.9298, "step": 109938 }, { "epoch": 11.299177800616649, "grad_norm": 0.04472745582461357, "learning_rate": 0.01, "loss": 1.9448, "step": 109941 }, { "epoch": 11.299486125385407, "grad_norm": 0.10435371845960617, "learning_rate": 0.01, "loss": 1.8878, "step": 109944 }, { "epoch": 11.299794450154163, "grad_norm": 0.1116335466504097, "learning_rate": 0.01, "loss": 1.9313, "step": 109947 }, { "epoch": 11.30010277492292, "grad_norm": 0.04797052964568138, "learning_rate": 0.01, "loss": 1.9701, "step": 109950 }, { "epoch": 11.300411099691676, "grad_norm": 0.06291238218545914, "learning_rate": 0.01, "loss": 1.9205, "step": 109953 }, { "epoch": 11.300719424460432, "grad_norm": 0.13573448359966278, "learning_rate": 0.01, "loss": 1.9327, "step": 109956 }, { "epoch": 11.301027749229188, "grad_norm": 0.042475681751966476, "learning_rate": 0.01, "loss": 1.9672, "step": 109959 }, { "epoch": 11.301336073997945, "grad_norm": 0.12064820528030396, "learning_rate": 0.01, "loss": 1.9294, "step": 109962 }, { "epoch": 11.301644398766701, "grad_norm": 0.0992126539349556, "learning_rate": 0.01, "loss": 1.9161, "step": 109965 }, { "epoch": 11.301952723535457, "grad_norm": 0.03413967788219452, "learning_rate": 0.01, "loss": 1.9614, "step": 109968 }, { "epoch": 11.302261048304214, "grad_norm": 0.08574753999710083, "learning_rate": 0.01, "loss": 1.9369, "step": 109971 }, { "epoch": 11.30256937307297, "grad_norm": 0.07113710790872574, "learning_rate": 0.01, "loss": 1.9332, "step": 109974 }, { "epoch": 11.302877697841726, "grad_norm": 0.06103216111660004, "learning_rate": 0.01, "loss": 1.9498, "step": 109977 }, { "epoch": 11.303186022610483, "grad_norm": 0.05378776416182518, "learning_rate": 0.01, "loss": 1.9426, "step": 109980 }, { "epoch": 11.303494347379239, "grad_norm": 0.04234358295798302, "learning_rate": 0.01, "loss": 1.9483, "step": 109983 }, { "epoch": 11.303802672147995, "grad_norm": 0.044765591621398926, "learning_rate": 0.01, "loss": 1.9632, "step": 109986 }, { "epoch": 11.304110996916751, "grad_norm": 0.04169373959302902, "learning_rate": 0.01, "loss": 1.96, "step": 109989 }, { "epoch": 11.30441932168551, "grad_norm": 0.10466013848781586, "learning_rate": 0.01, "loss": 1.9227, "step": 109992 }, { "epoch": 11.304727646454266, "grad_norm": 0.0452800951898098, "learning_rate": 0.01, "loss": 1.9458, "step": 109995 }, { "epoch": 11.305035971223022, "grad_norm": 0.06505489349365234, "learning_rate": 0.01, "loss": 1.9391, "step": 109998 }, { "epoch": 11.305344295991778, "grad_norm": 0.05290715768933296, "learning_rate": 0.01, "loss": 1.9321, "step": 110001 }, { "epoch": 11.305652620760535, "grad_norm": 0.03846494108438492, "learning_rate": 0.01, "loss": 1.96, "step": 110004 }, { "epoch": 11.305960945529291, "grad_norm": 0.041611991822719574, "learning_rate": 0.01, "loss": 1.9514, "step": 110007 }, { "epoch": 11.306269270298047, "grad_norm": 0.09771797060966492, "learning_rate": 0.01, "loss": 1.9546, "step": 110010 }, { "epoch": 11.306577595066804, "grad_norm": 0.0961562916636467, "learning_rate": 0.01, "loss": 1.9389, "step": 110013 }, { "epoch": 11.30688591983556, "grad_norm": 0.12497589737176895, "learning_rate": 0.01, "loss": 1.9444, "step": 110016 }, { "epoch": 11.307194244604316, "grad_norm": 0.10389663279056549, "learning_rate": 0.01, "loss": 1.9195, "step": 110019 }, { "epoch": 11.307502569373073, "grad_norm": 0.055324338376522064, "learning_rate": 0.01, "loss": 1.9477, "step": 110022 }, { "epoch": 11.307810894141829, "grad_norm": 0.04517209529876709, "learning_rate": 0.01, "loss": 1.942, "step": 110025 }, { "epoch": 11.308119218910585, "grad_norm": 0.11330480873584747, "learning_rate": 0.01, "loss": 1.9508, "step": 110028 }, { "epoch": 11.308427543679342, "grad_norm": 0.03730342537164688, "learning_rate": 0.01, "loss": 1.9246, "step": 110031 }, { "epoch": 11.308735868448098, "grad_norm": 0.03379646688699722, "learning_rate": 0.01, "loss": 1.9369, "step": 110034 }, { "epoch": 11.309044193216856, "grad_norm": 0.04477003961801529, "learning_rate": 0.01, "loss": 1.9594, "step": 110037 }, { "epoch": 11.309352517985612, "grad_norm": 0.05759940668940544, "learning_rate": 0.01, "loss": 1.9464, "step": 110040 }, { "epoch": 11.309660842754369, "grad_norm": 0.06768691539764404, "learning_rate": 0.01, "loss": 1.9269, "step": 110043 }, { "epoch": 11.309969167523125, "grad_norm": 0.07362806051969528, "learning_rate": 0.01, "loss": 1.942, "step": 110046 }, { "epoch": 11.310277492291881, "grad_norm": 0.07099780440330505, "learning_rate": 0.01, "loss": 1.9299, "step": 110049 }, { "epoch": 11.310585817060637, "grad_norm": 0.07642699033021927, "learning_rate": 0.01, "loss": 1.9268, "step": 110052 }, { "epoch": 11.310894141829394, "grad_norm": 0.05950343236327171, "learning_rate": 0.01, "loss": 1.9327, "step": 110055 }, { "epoch": 11.31120246659815, "grad_norm": 0.04728717356920242, "learning_rate": 0.01, "loss": 1.933, "step": 110058 }, { "epoch": 11.311510791366906, "grad_norm": 0.052025727927684784, "learning_rate": 0.01, "loss": 1.9453, "step": 110061 }, { "epoch": 11.311819116135663, "grad_norm": 0.03810366615653038, "learning_rate": 0.01, "loss": 1.9495, "step": 110064 }, { "epoch": 11.312127440904419, "grad_norm": 0.12762334942817688, "learning_rate": 0.01, "loss": 1.9248, "step": 110067 }, { "epoch": 11.312435765673175, "grad_norm": 0.06287941336631775, "learning_rate": 0.01, "loss": 1.9351, "step": 110070 }, { "epoch": 11.312744090441932, "grad_norm": 0.050556425005197525, "learning_rate": 0.01, "loss": 1.9301, "step": 110073 }, { "epoch": 11.313052415210688, "grad_norm": 0.052405212074518204, "learning_rate": 0.01, "loss": 1.9446, "step": 110076 }, { "epoch": 11.313360739979444, "grad_norm": 0.05936199799180031, "learning_rate": 0.01, "loss": 1.9341, "step": 110079 }, { "epoch": 11.3136690647482, "grad_norm": 0.08959811925888062, "learning_rate": 0.01, "loss": 1.9657, "step": 110082 }, { "epoch": 11.313977389516959, "grad_norm": 0.06256865710020065, "learning_rate": 0.01, "loss": 1.9553, "step": 110085 }, { "epoch": 11.314285714285715, "grad_norm": 0.07469283044338226, "learning_rate": 0.01, "loss": 1.917, "step": 110088 }, { "epoch": 11.314594039054471, "grad_norm": 0.0618683397769928, "learning_rate": 0.01, "loss": 1.9577, "step": 110091 }, { "epoch": 11.314902363823228, "grad_norm": 0.05982283130288124, "learning_rate": 0.01, "loss": 1.9403, "step": 110094 }, { "epoch": 11.315210688591984, "grad_norm": 0.0350305549800396, "learning_rate": 0.01, "loss": 1.9465, "step": 110097 }, { "epoch": 11.31551901336074, "grad_norm": 0.04055385664105415, "learning_rate": 0.01, "loss": 1.9074, "step": 110100 }, { "epoch": 11.315827338129496, "grad_norm": 0.044621482491493225, "learning_rate": 0.01, "loss": 1.9552, "step": 110103 }, { "epoch": 11.316135662898253, "grad_norm": 0.12125648558139801, "learning_rate": 0.01, "loss": 1.9402, "step": 110106 }, { "epoch": 11.316443987667009, "grad_norm": 0.11933153867721558, "learning_rate": 0.01, "loss": 1.9435, "step": 110109 }, { "epoch": 11.316752312435765, "grad_norm": 0.08058884739875793, "learning_rate": 0.01, "loss": 1.93, "step": 110112 }, { "epoch": 11.317060637204522, "grad_norm": 0.05547872930765152, "learning_rate": 0.01, "loss": 1.9387, "step": 110115 }, { "epoch": 11.317368961973278, "grad_norm": 0.05052429437637329, "learning_rate": 0.01, "loss": 1.927, "step": 110118 }, { "epoch": 11.317677286742034, "grad_norm": 0.03526543080806732, "learning_rate": 0.01, "loss": 1.9266, "step": 110121 }, { "epoch": 11.31798561151079, "grad_norm": 0.0412910059094429, "learning_rate": 0.01, "loss": 1.9434, "step": 110124 }, { "epoch": 11.318293936279547, "grad_norm": 0.08957338333129883, "learning_rate": 0.01, "loss": 1.9306, "step": 110127 }, { "epoch": 11.318602261048305, "grad_norm": 0.12582367658615112, "learning_rate": 0.01, "loss": 1.9541, "step": 110130 }, { "epoch": 11.318910585817061, "grad_norm": 0.07041830569505692, "learning_rate": 0.01, "loss": 1.9291, "step": 110133 }, { "epoch": 11.319218910585818, "grad_norm": 0.04017024114727974, "learning_rate": 0.01, "loss": 1.9394, "step": 110136 }, { "epoch": 11.319527235354574, "grad_norm": 0.05196457728743553, "learning_rate": 0.01, "loss": 1.9388, "step": 110139 }, { "epoch": 11.31983556012333, "grad_norm": 0.05180789530277252, "learning_rate": 0.01, "loss": 1.9578, "step": 110142 }, { "epoch": 11.320143884892087, "grad_norm": 0.040728844702243805, "learning_rate": 0.01, "loss": 1.9645, "step": 110145 }, { "epoch": 11.320452209660843, "grad_norm": 0.056863050907850266, "learning_rate": 0.01, "loss": 1.9589, "step": 110148 }, { "epoch": 11.3207605344296, "grad_norm": 0.06815367192029953, "learning_rate": 0.01, "loss": 1.9482, "step": 110151 }, { "epoch": 11.321068859198355, "grad_norm": 0.04380220174789429, "learning_rate": 0.01, "loss": 1.9338, "step": 110154 }, { "epoch": 11.321377183967112, "grad_norm": 0.12742555141448975, "learning_rate": 0.01, "loss": 1.9604, "step": 110157 }, { "epoch": 11.321685508735868, "grad_norm": 0.08063594251871109, "learning_rate": 0.01, "loss": 1.9593, "step": 110160 }, { "epoch": 11.321993833504624, "grad_norm": 0.07678404450416565, "learning_rate": 0.01, "loss": 1.9315, "step": 110163 }, { "epoch": 11.32230215827338, "grad_norm": 0.12479738891124725, "learning_rate": 0.01, "loss": 1.9444, "step": 110166 }, { "epoch": 11.322610483042137, "grad_norm": 0.13514432311058044, "learning_rate": 0.01, "loss": 1.9488, "step": 110169 }, { "epoch": 11.322918807810893, "grad_norm": 0.13325108587741852, "learning_rate": 0.01, "loss": 1.9447, "step": 110172 }, { "epoch": 11.323227132579651, "grad_norm": 0.12824301421642303, "learning_rate": 0.01, "loss": 1.9312, "step": 110175 }, { "epoch": 11.323535457348408, "grad_norm": 0.05898331105709076, "learning_rate": 0.01, "loss": 1.9096, "step": 110178 }, { "epoch": 11.323843782117164, "grad_norm": 0.0602484755218029, "learning_rate": 0.01, "loss": 1.9244, "step": 110181 }, { "epoch": 11.32415210688592, "grad_norm": 0.060096725821495056, "learning_rate": 0.01, "loss": 1.9454, "step": 110184 }, { "epoch": 11.324460431654677, "grad_norm": 0.07417149096727371, "learning_rate": 0.01, "loss": 1.9684, "step": 110187 }, { "epoch": 11.324768756423433, "grad_norm": 0.05343138799071312, "learning_rate": 0.01, "loss": 1.9601, "step": 110190 }, { "epoch": 11.32507708119219, "grad_norm": 0.03471410274505615, "learning_rate": 0.01, "loss": 1.9401, "step": 110193 }, { "epoch": 11.325385405960946, "grad_norm": 0.04164749011397362, "learning_rate": 0.01, "loss": 1.9671, "step": 110196 }, { "epoch": 11.325693730729702, "grad_norm": 0.04597137123346329, "learning_rate": 0.01, "loss": 1.9597, "step": 110199 }, { "epoch": 11.326002055498458, "grad_norm": 0.051889240741729736, "learning_rate": 0.01, "loss": 1.9388, "step": 110202 }, { "epoch": 11.326310380267214, "grad_norm": 0.05259767174720764, "learning_rate": 0.01, "loss": 1.9591, "step": 110205 }, { "epoch": 11.32661870503597, "grad_norm": 0.10912121832370758, "learning_rate": 0.01, "loss": 1.933, "step": 110208 }, { "epoch": 11.326927029804727, "grad_norm": 0.09809891879558563, "learning_rate": 0.01, "loss": 1.9438, "step": 110211 }, { "epoch": 11.327235354573483, "grad_norm": 0.12826496362686157, "learning_rate": 0.01, "loss": 1.9549, "step": 110214 }, { "epoch": 11.32754367934224, "grad_norm": 0.03926202282309532, "learning_rate": 0.01, "loss": 1.9582, "step": 110217 }, { "epoch": 11.327852004110998, "grad_norm": 0.08739673346281052, "learning_rate": 0.01, "loss": 1.9246, "step": 110220 }, { "epoch": 11.328160328879754, "grad_norm": 0.1496817171573639, "learning_rate": 0.01, "loss": 1.9196, "step": 110223 }, { "epoch": 11.32846865364851, "grad_norm": 0.06414765864610672, "learning_rate": 0.01, "loss": 1.9146, "step": 110226 }, { "epoch": 11.328776978417267, "grad_norm": 0.04865317791700363, "learning_rate": 0.01, "loss": 1.927, "step": 110229 }, { "epoch": 11.329085303186023, "grad_norm": 0.045886069536209106, "learning_rate": 0.01, "loss": 1.9837, "step": 110232 }, { "epoch": 11.32939362795478, "grad_norm": 0.047968700528144836, "learning_rate": 0.01, "loss": 1.9529, "step": 110235 }, { "epoch": 11.329701952723536, "grad_norm": 0.04234057292342186, "learning_rate": 0.01, "loss": 1.9486, "step": 110238 }, { "epoch": 11.330010277492292, "grad_norm": 0.04693339392542839, "learning_rate": 0.01, "loss": 1.9407, "step": 110241 }, { "epoch": 11.330318602261048, "grad_norm": 0.03062617965042591, "learning_rate": 0.01, "loss": 1.9366, "step": 110244 }, { "epoch": 11.330626927029805, "grad_norm": 0.04372096061706543, "learning_rate": 0.01, "loss": 1.9454, "step": 110247 }, { "epoch": 11.33093525179856, "grad_norm": 0.05214737355709076, "learning_rate": 0.01, "loss": 1.9449, "step": 110250 }, { "epoch": 11.331243576567317, "grad_norm": 0.04116861894726753, "learning_rate": 0.01, "loss": 1.9358, "step": 110253 }, { "epoch": 11.331551901336073, "grad_norm": 0.13659925758838654, "learning_rate": 0.01, "loss": 1.9404, "step": 110256 }, { "epoch": 11.33186022610483, "grad_norm": 0.07153276354074478, "learning_rate": 0.01, "loss": 1.939, "step": 110259 }, { "epoch": 11.332168550873586, "grad_norm": 0.046991247683763504, "learning_rate": 0.01, "loss": 1.932, "step": 110262 }, { "epoch": 11.332476875642342, "grad_norm": 0.03522711619734764, "learning_rate": 0.01, "loss": 1.9217, "step": 110265 }, { "epoch": 11.3327852004111, "grad_norm": 0.05653287097811699, "learning_rate": 0.01, "loss": 1.927, "step": 110268 }, { "epoch": 11.333093525179857, "grad_norm": 0.14457939565181732, "learning_rate": 0.01, "loss": 1.921, "step": 110271 }, { "epoch": 11.333401849948613, "grad_norm": 0.04326567426323891, "learning_rate": 0.01, "loss": 1.9359, "step": 110274 }, { "epoch": 11.33371017471737, "grad_norm": 0.08805881440639496, "learning_rate": 0.01, "loss": 1.9378, "step": 110277 }, { "epoch": 11.334018499486126, "grad_norm": 0.05980727821588516, "learning_rate": 0.01, "loss": 1.9478, "step": 110280 }, { "epoch": 11.334326824254882, "grad_norm": 0.08690473437309265, "learning_rate": 0.01, "loss": 1.9748, "step": 110283 }, { "epoch": 11.334635149023638, "grad_norm": 0.04590797424316406, "learning_rate": 0.01, "loss": 1.9237, "step": 110286 }, { "epoch": 11.334943473792395, "grad_norm": 0.0751267671585083, "learning_rate": 0.01, "loss": 1.9526, "step": 110289 }, { "epoch": 11.335251798561151, "grad_norm": 0.06569565087556839, "learning_rate": 0.01, "loss": 1.9352, "step": 110292 }, { "epoch": 11.335560123329907, "grad_norm": 0.07285059988498688, "learning_rate": 0.01, "loss": 1.9444, "step": 110295 }, { "epoch": 11.335868448098664, "grad_norm": 0.13581648468971252, "learning_rate": 0.01, "loss": 1.9324, "step": 110298 }, { "epoch": 11.33617677286742, "grad_norm": 0.051626384258270264, "learning_rate": 0.01, "loss": 1.9264, "step": 110301 }, { "epoch": 11.336485097636176, "grad_norm": 0.06466205418109894, "learning_rate": 0.01, "loss": 1.94, "step": 110304 }, { "epoch": 11.336793422404932, "grad_norm": 0.03319898620247841, "learning_rate": 0.01, "loss": 1.9087, "step": 110307 }, { "epoch": 11.337101747173689, "grad_norm": 0.04199283570051193, "learning_rate": 0.01, "loss": 1.9144, "step": 110310 }, { "epoch": 11.337410071942447, "grad_norm": 0.044764723628759384, "learning_rate": 0.01, "loss": 1.9557, "step": 110313 }, { "epoch": 11.337718396711203, "grad_norm": 0.036410681903362274, "learning_rate": 0.01, "loss": 1.9385, "step": 110316 }, { "epoch": 11.33802672147996, "grad_norm": 0.06427314132452011, "learning_rate": 0.01, "loss": 1.9405, "step": 110319 }, { "epoch": 11.338335046248716, "grad_norm": 0.08712683618068695, "learning_rate": 0.01, "loss": 1.9453, "step": 110322 }, { "epoch": 11.338643371017472, "grad_norm": 0.059931352734565735, "learning_rate": 0.01, "loss": 1.9193, "step": 110325 }, { "epoch": 11.338951695786228, "grad_norm": 0.03852000832557678, "learning_rate": 0.01, "loss": 1.9347, "step": 110328 }, { "epoch": 11.339260020554985, "grad_norm": 0.04189864918589592, "learning_rate": 0.01, "loss": 1.9273, "step": 110331 }, { "epoch": 11.339568345323741, "grad_norm": 0.09058728069067001, "learning_rate": 0.01, "loss": 1.9366, "step": 110334 }, { "epoch": 11.339876670092497, "grad_norm": 0.16685323417186737, "learning_rate": 0.01, "loss": 1.9336, "step": 110337 }, { "epoch": 11.340184994861254, "grad_norm": 0.11387660354375839, "learning_rate": 0.01, "loss": 1.9364, "step": 110340 }, { "epoch": 11.34049331963001, "grad_norm": 0.0847175195813179, "learning_rate": 0.01, "loss": 1.9231, "step": 110343 }, { "epoch": 11.340801644398766, "grad_norm": 0.05471164360642433, "learning_rate": 0.01, "loss": 1.937, "step": 110346 }, { "epoch": 11.341109969167523, "grad_norm": 0.054385025054216385, "learning_rate": 0.01, "loss": 1.9289, "step": 110349 }, { "epoch": 11.341418293936279, "grad_norm": 0.043967898935079575, "learning_rate": 0.01, "loss": 1.9513, "step": 110352 }, { "epoch": 11.341726618705035, "grad_norm": 0.06875190883874893, "learning_rate": 0.01, "loss": 1.9457, "step": 110355 }, { "epoch": 11.342034943473791, "grad_norm": 0.046824581921100616, "learning_rate": 0.01, "loss": 1.93, "step": 110358 }, { "epoch": 11.34234326824255, "grad_norm": 0.08238361030817032, "learning_rate": 0.01, "loss": 1.9477, "step": 110361 }, { "epoch": 11.342651593011306, "grad_norm": 0.11055076867341995, "learning_rate": 0.01, "loss": 1.9296, "step": 110364 }, { "epoch": 11.342959917780062, "grad_norm": 0.047248125076293945, "learning_rate": 0.01, "loss": 1.9252, "step": 110367 }, { "epoch": 11.343268242548818, "grad_norm": 0.1021493673324585, "learning_rate": 0.01, "loss": 1.94, "step": 110370 }, { "epoch": 11.343576567317575, "grad_norm": 0.048846546560525894, "learning_rate": 0.01, "loss": 1.9474, "step": 110373 }, { "epoch": 11.343884892086331, "grad_norm": 0.08389388769865036, "learning_rate": 0.01, "loss": 1.9232, "step": 110376 }, { "epoch": 11.344193216855087, "grad_norm": 0.06787876039743423, "learning_rate": 0.01, "loss": 1.9203, "step": 110379 }, { "epoch": 11.344501541623844, "grad_norm": 0.0725322738289833, "learning_rate": 0.01, "loss": 1.9484, "step": 110382 }, { "epoch": 11.3448098663926, "grad_norm": 0.0973624661564827, "learning_rate": 0.01, "loss": 1.937, "step": 110385 }, { "epoch": 11.345118191161356, "grad_norm": 0.09873227030038834, "learning_rate": 0.01, "loss": 1.9247, "step": 110388 }, { "epoch": 11.345426515930113, "grad_norm": 0.08333581686019897, "learning_rate": 0.01, "loss": 1.9286, "step": 110391 }, { "epoch": 11.345734840698869, "grad_norm": 0.0631309375166893, "learning_rate": 0.01, "loss": 1.9404, "step": 110394 }, { "epoch": 11.346043165467625, "grad_norm": 0.061351578682661057, "learning_rate": 0.01, "loss": 1.9424, "step": 110397 }, { "epoch": 11.346351490236382, "grad_norm": 0.06281182169914246, "learning_rate": 0.01, "loss": 1.9579, "step": 110400 }, { "epoch": 11.34665981500514, "grad_norm": 0.04762888327240944, "learning_rate": 0.01, "loss": 1.934, "step": 110403 }, { "epoch": 11.346968139773896, "grad_norm": 0.03745291754603386, "learning_rate": 0.01, "loss": 1.936, "step": 110406 }, { "epoch": 11.347276464542652, "grad_norm": 0.04565132409334183, "learning_rate": 0.01, "loss": 1.9161, "step": 110409 }, { "epoch": 11.347584789311409, "grad_norm": 0.06974771618843079, "learning_rate": 0.01, "loss": 1.9525, "step": 110412 }, { "epoch": 11.347893114080165, "grad_norm": 0.1146189272403717, "learning_rate": 0.01, "loss": 1.9591, "step": 110415 }, { "epoch": 11.348201438848921, "grad_norm": 0.1235949769616127, "learning_rate": 0.01, "loss": 1.9235, "step": 110418 }, { "epoch": 11.348509763617677, "grad_norm": 0.08458071202039719, "learning_rate": 0.01, "loss": 1.9312, "step": 110421 }, { "epoch": 11.348818088386434, "grad_norm": 0.07701151072978973, "learning_rate": 0.01, "loss": 1.9494, "step": 110424 }, { "epoch": 11.34912641315519, "grad_norm": 0.07502985745668411, "learning_rate": 0.01, "loss": 1.9407, "step": 110427 }, { "epoch": 11.349434737923946, "grad_norm": 0.06750281900167465, "learning_rate": 0.01, "loss": 1.9408, "step": 110430 }, { "epoch": 11.349743062692703, "grad_norm": 0.04019710421562195, "learning_rate": 0.01, "loss": 1.9327, "step": 110433 }, { "epoch": 11.350051387461459, "grad_norm": 0.03724076971411705, "learning_rate": 0.01, "loss": 1.959, "step": 110436 }, { "epoch": 11.350359712230215, "grad_norm": 0.07026968896389008, "learning_rate": 0.01, "loss": 1.943, "step": 110439 }, { "epoch": 11.350668036998972, "grad_norm": 0.09067178517580032, "learning_rate": 0.01, "loss": 1.9303, "step": 110442 }, { "epoch": 11.350976361767728, "grad_norm": 0.07308315485715866, "learning_rate": 0.01, "loss": 1.937, "step": 110445 }, { "epoch": 11.351284686536484, "grad_norm": 0.06199457123875618, "learning_rate": 0.01, "loss": 1.9438, "step": 110448 }, { "epoch": 11.351593011305242, "grad_norm": 0.037689439952373505, "learning_rate": 0.01, "loss": 1.9543, "step": 110451 }, { "epoch": 11.351901336073999, "grad_norm": 0.033859074115753174, "learning_rate": 0.01, "loss": 1.9429, "step": 110454 }, { "epoch": 11.352209660842755, "grad_norm": 0.03455094248056412, "learning_rate": 0.01, "loss": 1.9238, "step": 110457 }, { "epoch": 11.352517985611511, "grad_norm": 0.16149871051311493, "learning_rate": 0.01, "loss": 1.9232, "step": 110460 }, { "epoch": 11.352826310380268, "grad_norm": 0.04946501553058624, "learning_rate": 0.01, "loss": 1.9312, "step": 110463 }, { "epoch": 11.353134635149024, "grad_norm": 0.080601267516613, "learning_rate": 0.01, "loss": 1.9398, "step": 110466 }, { "epoch": 11.35344295991778, "grad_norm": 0.04147493094205856, "learning_rate": 0.01, "loss": 1.9223, "step": 110469 }, { "epoch": 11.353751284686536, "grad_norm": 0.07370273768901825, "learning_rate": 0.01, "loss": 1.9186, "step": 110472 }, { "epoch": 11.354059609455293, "grad_norm": 0.04175065830349922, "learning_rate": 0.01, "loss": 1.9598, "step": 110475 }, { "epoch": 11.354367934224049, "grad_norm": 0.08143223077058792, "learning_rate": 0.01, "loss": 1.9311, "step": 110478 }, { "epoch": 11.354676258992805, "grad_norm": 0.08016743510961533, "learning_rate": 0.01, "loss": 1.9385, "step": 110481 }, { "epoch": 11.354984583761562, "grad_norm": 0.06962796300649643, "learning_rate": 0.01, "loss": 1.9284, "step": 110484 }, { "epoch": 11.355292908530318, "grad_norm": 0.09174045920372009, "learning_rate": 0.01, "loss": 1.9411, "step": 110487 }, { "epoch": 11.355601233299074, "grad_norm": 0.046844564378261566, "learning_rate": 0.01, "loss": 1.9436, "step": 110490 }, { "epoch": 11.35590955806783, "grad_norm": 0.08431335538625717, "learning_rate": 0.01, "loss": 1.9497, "step": 110493 }, { "epoch": 11.356217882836589, "grad_norm": 0.052417635917663574, "learning_rate": 0.01, "loss": 1.949, "step": 110496 }, { "epoch": 11.356526207605345, "grad_norm": 0.10628152638673782, "learning_rate": 0.01, "loss": 1.9389, "step": 110499 }, { "epoch": 11.356834532374101, "grad_norm": 0.0665721446275711, "learning_rate": 0.01, "loss": 1.9199, "step": 110502 }, { "epoch": 11.357142857142858, "grad_norm": 0.06576160341501236, "learning_rate": 0.01, "loss": 1.9645, "step": 110505 }, { "epoch": 11.357451181911614, "grad_norm": 0.07959224283695221, "learning_rate": 0.01, "loss": 1.9513, "step": 110508 }, { "epoch": 11.35775950668037, "grad_norm": 0.09703835844993591, "learning_rate": 0.01, "loss": 1.948, "step": 110511 }, { "epoch": 11.358067831449127, "grad_norm": 0.0768088549375534, "learning_rate": 0.01, "loss": 1.949, "step": 110514 }, { "epoch": 11.358376156217883, "grad_norm": 0.06530007719993591, "learning_rate": 0.01, "loss": 1.9433, "step": 110517 }, { "epoch": 11.35868448098664, "grad_norm": 0.06526839733123779, "learning_rate": 0.01, "loss": 1.9279, "step": 110520 }, { "epoch": 11.358992805755395, "grad_norm": 0.06234097480773926, "learning_rate": 0.01, "loss": 1.9409, "step": 110523 }, { "epoch": 11.359301130524152, "grad_norm": 0.033938802778720856, "learning_rate": 0.01, "loss": 1.9515, "step": 110526 }, { "epoch": 11.359609455292908, "grad_norm": 0.04808393120765686, "learning_rate": 0.01, "loss": 1.9244, "step": 110529 }, { "epoch": 11.359917780061664, "grad_norm": 0.09899698942899704, "learning_rate": 0.01, "loss": 1.9355, "step": 110532 }, { "epoch": 11.36022610483042, "grad_norm": 0.044449757784605026, "learning_rate": 0.01, "loss": 1.9533, "step": 110535 }, { "epoch": 11.360534429599177, "grad_norm": 0.0801699161529541, "learning_rate": 0.01, "loss": 1.9558, "step": 110538 }, { "epoch": 11.360842754367933, "grad_norm": 0.0521024726331234, "learning_rate": 0.01, "loss": 1.9587, "step": 110541 }, { "epoch": 11.361151079136691, "grad_norm": 0.062242165207862854, "learning_rate": 0.01, "loss": 1.9556, "step": 110544 }, { "epoch": 11.361459403905448, "grad_norm": 0.06852758675813675, "learning_rate": 0.01, "loss": 1.9485, "step": 110547 }, { "epoch": 11.361767728674204, "grad_norm": 0.06884782016277313, "learning_rate": 0.01, "loss": 1.9753, "step": 110550 }, { "epoch": 11.36207605344296, "grad_norm": 0.05993012338876724, "learning_rate": 0.01, "loss": 1.9491, "step": 110553 }, { "epoch": 11.362384378211717, "grad_norm": 0.057871006429195404, "learning_rate": 0.01, "loss": 1.921, "step": 110556 }, { "epoch": 11.362692702980473, "grad_norm": 0.042249929159879684, "learning_rate": 0.01, "loss": 1.9433, "step": 110559 }, { "epoch": 11.36300102774923, "grad_norm": 0.05203591659665108, "learning_rate": 0.01, "loss": 1.9253, "step": 110562 }, { "epoch": 11.363309352517986, "grad_norm": 0.037728067487478256, "learning_rate": 0.01, "loss": 1.9282, "step": 110565 }, { "epoch": 11.363617677286742, "grad_norm": 0.0518614761531353, "learning_rate": 0.01, "loss": 1.9399, "step": 110568 }, { "epoch": 11.363926002055498, "grad_norm": 0.10485280305147171, "learning_rate": 0.01, "loss": 1.9334, "step": 110571 }, { "epoch": 11.364234326824254, "grad_norm": 0.07373513281345367, "learning_rate": 0.01, "loss": 1.9373, "step": 110574 }, { "epoch": 11.36454265159301, "grad_norm": 0.1314626783132553, "learning_rate": 0.01, "loss": 1.9375, "step": 110577 }, { "epoch": 11.364850976361767, "grad_norm": 0.13870415091514587, "learning_rate": 0.01, "loss": 1.9347, "step": 110580 }, { "epoch": 11.365159301130523, "grad_norm": 0.05415605381131172, "learning_rate": 0.01, "loss": 1.959, "step": 110583 }, { "epoch": 11.36546762589928, "grad_norm": 0.03968094661831856, "learning_rate": 0.01, "loss": 1.922, "step": 110586 }, { "epoch": 11.365775950668038, "grad_norm": 0.032541971653699875, "learning_rate": 0.01, "loss": 1.9334, "step": 110589 }, { "epoch": 11.366084275436794, "grad_norm": 0.07528942078351974, "learning_rate": 0.01, "loss": 1.9406, "step": 110592 }, { "epoch": 11.36639260020555, "grad_norm": 0.17362824082374573, "learning_rate": 0.01, "loss": 1.9277, "step": 110595 }, { "epoch": 11.366700924974307, "grad_norm": 0.13993510603904724, "learning_rate": 0.01, "loss": 1.9514, "step": 110598 }, { "epoch": 11.367009249743063, "grad_norm": 0.05224933475255966, "learning_rate": 0.01, "loss": 1.9247, "step": 110601 }, { "epoch": 11.36731757451182, "grad_norm": 0.07095824182033539, "learning_rate": 0.01, "loss": 1.9424, "step": 110604 }, { "epoch": 11.367625899280576, "grad_norm": 0.04492488503456116, "learning_rate": 0.01, "loss": 1.9475, "step": 110607 }, { "epoch": 11.367934224049332, "grad_norm": 0.06791716068983078, "learning_rate": 0.01, "loss": 1.9405, "step": 110610 }, { "epoch": 11.368242548818088, "grad_norm": 0.05190029740333557, "learning_rate": 0.01, "loss": 1.9526, "step": 110613 }, { "epoch": 11.368550873586845, "grad_norm": 0.11406069248914719, "learning_rate": 0.01, "loss": 1.9444, "step": 110616 }, { "epoch": 11.3688591983556, "grad_norm": 0.13801059126853943, "learning_rate": 0.01, "loss": 1.9273, "step": 110619 }, { "epoch": 11.369167523124357, "grad_norm": 0.09343323111534119, "learning_rate": 0.01, "loss": 1.9291, "step": 110622 }, { "epoch": 11.369475847893113, "grad_norm": 0.06386526674032211, "learning_rate": 0.01, "loss": 1.9468, "step": 110625 }, { "epoch": 11.36978417266187, "grad_norm": 0.03324955701828003, "learning_rate": 0.01, "loss": 1.966, "step": 110628 }, { "epoch": 11.370092497430626, "grad_norm": 0.043516796082258224, "learning_rate": 0.01, "loss": 1.9183, "step": 110631 }, { "epoch": 11.370400822199384, "grad_norm": 0.11557954549789429, "learning_rate": 0.01, "loss": 1.9496, "step": 110634 }, { "epoch": 11.37070914696814, "grad_norm": 0.0785699188709259, "learning_rate": 0.01, "loss": 1.9294, "step": 110637 }, { "epoch": 11.371017471736897, "grad_norm": 0.08156263828277588, "learning_rate": 0.01, "loss": 1.9378, "step": 110640 }, { "epoch": 11.371325796505653, "grad_norm": 0.053373608738183975, "learning_rate": 0.01, "loss": 1.9641, "step": 110643 }, { "epoch": 11.37163412127441, "grad_norm": 0.06310009956359863, "learning_rate": 0.01, "loss": 1.9011, "step": 110646 }, { "epoch": 11.371942446043166, "grad_norm": 0.10197494179010391, "learning_rate": 0.01, "loss": 1.9384, "step": 110649 }, { "epoch": 11.372250770811922, "grad_norm": 0.13593943417072296, "learning_rate": 0.01, "loss": 1.935, "step": 110652 }, { "epoch": 11.372559095580678, "grad_norm": 0.04047724977135658, "learning_rate": 0.01, "loss": 1.9448, "step": 110655 }, { "epoch": 11.372867420349435, "grad_norm": 0.06438061594963074, "learning_rate": 0.01, "loss": 1.9194, "step": 110658 }, { "epoch": 11.373175745118191, "grad_norm": 0.07296453416347504, "learning_rate": 0.01, "loss": 1.9342, "step": 110661 }, { "epoch": 11.373484069886947, "grad_norm": 0.08513321727514267, "learning_rate": 0.01, "loss": 1.9059, "step": 110664 }, { "epoch": 11.373792394655704, "grad_norm": 0.13206641376018524, "learning_rate": 0.01, "loss": 1.943, "step": 110667 }, { "epoch": 11.37410071942446, "grad_norm": 0.04452472925186157, "learning_rate": 0.01, "loss": 1.9356, "step": 110670 }, { "epoch": 11.374409044193216, "grad_norm": 0.05014786124229431, "learning_rate": 0.01, "loss": 1.9152, "step": 110673 }, { "epoch": 11.374717368961972, "grad_norm": 0.039705969393253326, "learning_rate": 0.01, "loss": 1.9294, "step": 110676 }, { "epoch": 11.37502569373073, "grad_norm": 0.04527021944522858, "learning_rate": 0.01, "loss": 1.9387, "step": 110679 }, { "epoch": 11.375334018499487, "grad_norm": 0.06424544751644135, "learning_rate": 0.01, "loss": 1.9369, "step": 110682 }, { "epoch": 11.375642343268243, "grad_norm": 0.038035839796066284, "learning_rate": 0.01, "loss": 1.9474, "step": 110685 }, { "epoch": 11.375950668037, "grad_norm": 0.043364040553569794, "learning_rate": 0.01, "loss": 1.957, "step": 110688 }, { "epoch": 11.376258992805756, "grad_norm": 0.036017462611198425, "learning_rate": 0.01, "loss": 1.9228, "step": 110691 }, { "epoch": 11.376567317574512, "grad_norm": 0.057449888437986374, "learning_rate": 0.01, "loss": 1.961, "step": 110694 }, { "epoch": 11.376875642343268, "grad_norm": 0.18690413236618042, "learning_rate": 0.01, "loss": 1.9578, "step": 110697 }, { "epoch": 11.377183967112025, "grad_norm": 0.18334275484085083, "learning_rate": 0.01, "loss": 1.9566, "step": 110700 }, { "epoch": 11.377492291880781, "grad_norm": 0.11479616165161133, "learning_rate": 0.01, "loss": 1.9479, "step": 110703 }, { "epoch": 11.377800616649537, "grad_norm": 0.08656278252601624, "learning_rate": 0.01, "loss": 1.9498, "step": 110706 }, { "epoch": 11.378108941418294, "grad_norm": 0.04703566059470177, "learning_rate": 0.01, "loss": 1.9509, "step": 110709 }, { "epoch": 11.37841726618705, "grad_norm": 0.06349678337574005, "learning_rate": 0.01, "loss": 1.9557, "step": 110712 }, { "epoch": 11.378725590955806, "grad_norm": 0.10881751030683517, "learning_rate": 0.01, "loss": 1.9345, "step": 110715 }, { "epoch": 11.379033915724563, "grad_norm": 0.05449318140745163, "learning_rate": 0.01, "loss": 1.9585, "step": 110718 }, { "epoch": 11.379342240493319, "grad_norm": 0.1894116997718811, "learning_rate": 0.01, "loss": 1.9266, "step": 110721 }, { "epoch": 11.379650565262075, "grad_norm": 0.049433499574661255, "learning_rate": 0.01, "loss": 1.9359, "step": 110724 }, { "epoch": 11.379958890030833, "grad_norm": 0.03821530565619469, "learning_rate": 0.01, "loss": 1.9476, "step": 110727 }, { "epoch": 11.38026721479959, "grad_norm": 0.034824103116989136, "learning_rate": 0.01, "loss": 1.9388, "step": 110730 }, { "epoch": 11.380575539568346, "grad_norm": 0.05202208086848259, "learning_rate": 0.01, "loss": 1.9576, "step": 110733 }, { "epoch": 11.380883864337102, "grad_norm": 0.057843755930662155, "learning_rate": 0.01, "loss": 1.9405, "step": 110736 }, { "epoch": 11.381192189105859, "grad_norm": 0.04548819735646248, "learning_rate": 0.01, "loss": 1.9413, "step": 110739 }, { "epoch": 11.381500513874615, "grad_norm": 0.08566679805517197, "learning_rate": 0.01, "loss": 1.9508, "step": 110742 }, { "epoch": 11.381808838643371, "grad_norm": 0.11440742015838623, "learning_rate": 0.01, "loss": 1.9428, "step": 110745 }, { "epoch": 11.382117163412127, "grad_norm": 0.15535084903240204, "learning_rate": 0.01, "loss": 1.9199, "step": 110748 }, { "epoch": 11.382425488180884, "grad_norm": 0.0787283256649971, "learning_rate": 0.01, "loss": 1.924, "step": 110751 }, { "epoch": 11.38273381294964, "grad_norm": 0.031954240053892136, "learning_rate": 0.01, "loss": 1.943, "step": 110754 }, { "epoch": 11.383042137718396, "grad_norm": 0.03369603678584099, "learning_rate": 0.01, "loss": 1.9657, "step": 110757 }, { "epoch": 11.383350462487153, "grad_norm": 0.08464602380990982, "learning_rate": 0.01, "loss": 1.9317, "step": 110760 }, { "epoch": 11.383658787255909, "grad_norm": 0.046058326959609985, "learning_rate": 0.01, "loss": 1.9262, "step": 110763 }, { "epoch": 11.383967112024665, "grad_norm": 0.038871098309755325, "learning_rate": 0.01, "loss": 1.9372, "step": 110766 }, { "epoch": 11.384275436793422, "grad_norm": 0.040099188685417175, "learning_rate": 0.01, "loss": 1.9466, "step": 110769 }, { "epoch": 11.38458376156218, "grad_norm": 0.047709397971630096, "learning_rate": 0.01, "loss": 1.9274, "step": 110772 }, { "epoch": 11.384892086330936, "grad_norm": 0.04168373718857765, "learning_rate": 0.01, "loss": 1.9212, "step": 110775 }, { "epoch": 11.385200411099692, "grad_norm": 0.15479597449302673, "learning_rate": 0.01, "loss": 1.9459, "step": 110778 }, { "epoch": 11.385508735868449, "grad_norm": 0.09209232032299042, "learning_rate": 0.01, "loss": 1.9095, "step": 110781 }, { "epoch": 11.385817060637205, "grad_norm": 0.048776429146528244, "learning_rate": 0.01, "loss": 1.9143, "step": 110784 }, { "epoch": 11.386125385405961, "grad_norm": 0.04659121483564377, "learning_rate": 0.01, "loss": 1.9402, "step": 110787 }, { "epoch": 11.386433710174718, "grad_norm": 0.05104237422347069, "learning_rate": 0.01, "loss": 1.9429, "step": 110790 }, { "epoch": 11.386742034943474, "grad_norm": 0.06261378526687622, "learning_rate": 0.01, "loss": 1.9062, "step": 110793 }, { "epoch": 11.38705035971223, "grad_norm": 0.06825476884841919, "learning_rate": 0.01, "loss": 1.9254, "step": 110796 }, { "epoch": 11.387358684480986, "grad_norm": 0.06235522776842117, "learning_rate": 0.01, "loss": 1.9352, "step": 110799 }, { "epoch": 11.387667009249743, "grad_norm": 0.1005907878279686, "learning_rate": 0.01, "loss": 1.9626, "step": 110802 }, { "epoch": 11.387975334018499, "grad_norm": 0.12915214896202087, "learning_rate": 0.01, "loss": 1.9083, "step": 110805 }, { "epoch": 11.388283658787255, "grad_norm": 0.10919107496738434, "learning_rate": 0.01, "loss": 1.9683, "step": 110808 }, { "epoch": 11.388591983556012, "grad_norm": 0.05322147533297539, "learning_rate": 0.01, "loss": 1.9761, "step": 110811 }, { "epoch": 11.388900308324768, "grad_norm": 0.04587923735380173, "learning_rate": 0.01, "loss": 1.9236, "step": 110814 }, { "epoch": 11.389208633093524, "grad_norm": 0.035594645887613297, "learning_rate": 0.01, "loss": 1.9306, "step": 110817 }, { "epoch": 11.389516957862282, "grad_norm": 0.03522982820868492, "learning_rate": 0.01, "loss": 1.9296, "step": 110820 }, { "epoch": 11.389825282631039, "grad_norm": 0.04372236505150795, "learning_rate": 0.01, "loss": 1.967, "step": 110823 }, { "epoch": 11.390133607399795, "grad_norm": 0.05270487442612648, "learning_rate": 0.01, "loss": 1.933, "step": 110826 }, { "epoch": 11.390441932168551, "grad_norm": 0.12330029904842377, "learning_rate": 0.01, "loss": 1.9244, "step": 110829 }, { "epoch": 11.390750256937308, "grad_norm": 0.07215101271867752, "learning_rate": 0.01, "loss": 1.9231, "step": 110832 }, { "epoch": 11.391058581706064, "grad_norm": 0.056661464273929596, "learning_rate": 0.01, "loss": 1.9036, "step": 110835 }, { "epoch": 11.39136690647482, "grad_norm": 0.10824533551931381, "learning_rate": 0.01, "loss": 1.9649, "step": 110838 }, { "epoch": 11.391675231243577, "grad_norm": 0.05006413906812668, "learning_rate": 0.01, "loss": 1.9431, "step": 110841 }, { "epoch": 11.391983556012333, "grad_norm": 0.036600951105356216, "learning_rate": 0.01, "loss": 1.9328, "step": 110844 }, { "epoch": 11.39229188078109, "grad_norm": 0.06521067023277283, "learning_rate": 0.01, "loss": 1.9327, "step": 110847 }, { "epoch": 11.392600205549845, "grad_norm": 0.1014748141169548, "learning_rate": 0.01, "loss": 1.9142, "step": 110850 }, { "epoch": 11.392908530318602, "grad_norm": 0.1848376840353012, "learning_rate": 0.01, "loss": 1.9409, "step": 110853 }, { "epoch": 11.393216855087358, "grad_norm": 0.09469456970691681, "learning_rate": 0.01, "loss": 1.9268, "step": 110856 }, { "epoch": 11.393525179856114, "grad_norm": 0.09343547374010086, "learning_rate": 0.01, "loss": 1.9357, "step": 110859 }, { "epoch": 11.393833504624872, "grad_norm": 0.08093136548995972, "learning_rate": 0.01, "loss": 1.9374, "step": 110862 }, { "epoch": 11.394141829393629, "grad_norm": 0.05411363020539284, "learning_rate": 0.01, "loss": 1.9513, "step": 110865 }, { "epoch": 11.394450154162385, "grad_norm": 0.047125186771154404, "learning_rate": 0.01, "loss": 1.9578, "step": 110868 }, { "epoch": 11.394758478931141, "grad_norm": 0.0919274240732193, "learning_rate": 0.01, "loss": 1.9203, "step": 110871 }, { "epoch": 11.395066803699898, "grad_norm": 0.042707886546850204, "learning_rate": 0.01, "loss": 1.942, "step": 110874 }, { "epoch": 11.395375128468654, "grad_norm": 0.08432315289974213, "learning_rate": 0.01, "loss": 1.9226, "step": 110877 }, { "epoch": 11.39568345323741, "grad_norm": 0.06510323286056519, "learning_rate": 0.01, "loss": 1.9629, "step": 110880 }, { "epoch": 11.395991778006167, "grad_norm": 0.046135447919368744, "learning_rate": 0.01, "loss": 1.9559, "step": 110883 }, { "epoch": 11.396300102774923, "grad_norm": 0.10874036699533463, "learning_rate": 0.01, "loss": 1.9633, "step": 110886 }, { "epoch": 11.39660842754368, "grad_norm": 0.05693401023745537, "learning_rate": 0.01, "loss": 1.9495, "step": 110889 }, { "epoch": 11.396916752312436, "grad_norm": 0.10287602990865707, "learning_rate": 0.01, "loss": 1.9563, "step": 110892 }, { "epoch": 11.397225077081192, "grad_norm": 0.14678283035755157, "learning_rate": 0.01, "loss": 1.9313, "step": 110895 }, { "epoch": 11.397533401849948, "grad_norm": 0.06007491424679756, "learning_rate": 0.01, "loss": 1.9461, "step": 110898 }, { "epoch": 11.397841726618704, "grad_norm": 0.0790441483259201, "learning_rate": 0.01, "loss": 1.93, "step": 110901 }, { "epoch": 11.39815005138746, "grad_norm": 0.07017059624195099, "learning_rate": 0.01, "loss": 1.9527, "step": 110904 }, { "epoch": 11.398458376156217, "grad_norm": 0.0407567135989666, "learning_rate": 0.01, "loss": 1.9392, "step": 110907 }, { "epoch": 11.398766700924975, "grad_norm": 0.0886857658624649, "learning_rate": 0.01, "loss": 1.9159, "step": 110910 }, { "epoch": 11.399075025693731, "grad_norm": 0.06689956784248352, "learning_rate": 0.01, "loss": 1.943, "step": 110913 }, { "epoch": 11.399383350462488, "grad_norm": 0.10107925534248352, "learning_rate": 0.01, "loss": 1.9757, "step": 110916 }, { "epoch": 11.399691675231244, "grad_norm": 0.05348954722285271, "learning_rate": 0.01, "loss": 1.9391, "step": 110919 }, { "epoch": 11.4, "grad_norm": 0.0453205369412899, "learning_rate": 0.01, "loss": 1.9562, "step": 110922 }, { "epoch": 11.400308324768757, "grad_norm": 0.037814341485500336, "learning_rate": 0.01, "loss": 1.952, "step": 110925 }, { "epoch": 11.400616649537513, "grad_norm": 0.11961714178323746, "learning_rate": 0.01, "loss": 1.9342, "step": 110928 }, { "epoch": 11.40092497430627, "grad_norm": 0.09395978599786758, "learning_rate": 0.01, "loss": 1.9232, "step": 110931 }, { "epoch": 11.401233299075026, "grad_norm": 0.06540749222040176, "learning_rate": 0.01, "loss": 1.9469, "step": 110934 }, { "epoch": 11.401541623843782, "grad_norm": 0.08541125804185867, "learning_rate": 0.01, "loss": 1.9529, "step": 110937 }, { "epoch": 11.401849948612538, "grad_norm": 0.036478616297245026, "learning_rate": 0.01, "loss": 1.9362, "step": 110940 }, { "epoch": 11.402158273381295, "grad_norm": 0.0369294211268425, "learning_rate": 0.01, "loss": 1.9339, "step": 110943 }, { "epoch": 11.40246659815005, "grad_norm": 0.13193678855895996, "learning_rate": 0.01, "loss": 1.9301, "step": 110946 }, { "epoch": 11.402774922918807, "grad_norm": 0.05666269734501839, "learning_rate": 0.01, "loss": 1.9641, "step": 110949 }, { "epoch": 11.403083247687563, "grad_norm": 0.04516861215233803, "learning_rate": 0.01, "loss": 1.9505, "step": 110952 }, { "epoch": 11.403391572456322, "grad_norm": 0.05796361342072487, "learning_rate": 0.01, "loss": 1.935, "step": 110955 }, { "epoch": 11.403699897225078, "grad_norm": 0.04144136980175972, "learning_rate": 0.01, "loss": 1.9423, "step": 110958 }, { "epoch": 11.404008221993834, "grad_norm": 0.04193937033414841, "learning_rate": 0.01, "loss": 1.9444, "step": 110961 }, { "epoch": 11.40431654676259, "grad_norm": 0.1043611392378807, "learning_rate": 0.01, "loss": 1.9302, "step": 110964 }, { "epoch": 11.404624871531347, "grad_norm": 0.1311003863811493, "learning_rate": 0.01, "loss": 1.9432, "step": 110967 }, { "epoch": 11.404933196300103, "grad_norm": 0.05601399391889572, "learning_rate": 0.01, "loss": 1.9456, "step": 110970 }, { "epoch": 11.40524152106886, "grad_norm": 0.04909517988562584, "learning_rate": 0.01, "loss": 1.9285, "step": 110973 }, { "epoch": 11.405549845837616, "grad_norm": 0.08073060214519501, "learning_rate": 0.01, "loss": 1.9235, "step": 110976 }, { "epoch": 11.405858170606372, "grad_norm": 0.07823039591312408, "learning_rate": 0.01, "loss": 1.9171, "step": 110979 }, { "epoch": 11.406166495375128, "grad_norm": 0.05647626519203186, "learning_rate": 0.01, "loss": 1.9136, "step": 110982 }, { "epoch": 11.406474820143885, "grad_norm": 0.1711927354335785, "learning_rate": 0.01, "loss": 1.924, "step": 110985 }, { "epoch": 11.406783144912641, "grad_norm": 0.1063016802072525, "learning_rate": 0.01, "loss": 1.9434, "step": 110988 }, { "epoch": 11.407091469681397, "grad_norm": 0.03903825581073761, "learning_rate": 0.01, "loss": 1.9314, "step": 110991 }, { "epoch": 11.407399794450154, "grad_norm": 0.040212482213974, "learning_rate": 0.01, "loss": 1.941, "step": 110994 }, { "epoch": 11.40770811921891, "grad_norm": 0.05461808294057846, "learning_rate": 0.01, "loss": 1.9416, "step": 110997 }, { "epoch": 11.408016443987666, "grad_norm": 0.05734023079276085, "learning_rate": 0.01, "loss": 1.9392, "step": 111000 }, { "epoch": 11.408324768756424, "grad_norm": 0.06303809583187103, "learning_rate": 0.01, "loss": 1.9621, "step": 111003 }, { "epoch": 11.40863309352518, "grad_norm": 0.1410919576883316, "learning_rate": 0.01, "loss": 1.9386, "step": 111006 }, { "epoch": 11.408941418293937, "grad_norm": 0.07548557221889496, "learning_rate": 0.01, "loss": 1.942, "step": 111009 }, { "epoch": 11.409249743062693, "grad_norm": 0.07225187122821808, "learning_rate": 0.01, "loss": 1.9288, "step": 111012 }, { "epoch": 11.40955806783145, "grad_norm": 0.05371328815817833, "learning_rate": 0.01, "loss": 1.9447, "step": 111015 }, { "epoch": 11.409866392600206, "grad_norm": 0.10149164497852325, "learning_rate": 0.01, "loss": 1.9336, "step": 111018 }, { "epoch": 11.410174717368962, "grad_norm": 0.07427545636892319, "learning_rate": 0.01, "loss": 1.9296, "step": 111021 }, { "epoch": 11.410483042137718, "grad_norm": 0.07735265046358109, "learning_rate": 0.01, "loss": 1.9447, "step": 111024 }, { "epoch": 11.410791366906475, "grad_norm": 0.10191336274147034, "learning_rate": 0.01, "loss": 1.9473, "step": 111027 }, { "epoch": 11.411099691675231, "grad_norm": 0.15336443483829498, "learning_rate": 0.01, "loss": 1.9383, "step": 111030 }, { "epoch": 11.411408016443987, "grad_norm": 0.09829924255609512, "learning_rate": 0.01, "loss": 1.9589, "step": 111033 }, { "epoch": 11.411716341212744, "grad_norm": 0.07386139780282974, "learning_rate": 0.01, "loss": 1.9428, "step": 111036 }, { "epoch": 11.4120246659815, "grad_norm": 0.04841221868991852, "learning_rate": 0.01, "loss": 1.947, "step": 111039 }, { "epoch": 11.412332990750256, "grad_norm": 0.045958053320646286, "learning_rate": 0.01, "loss": 1.9531, "step": 111042 }, { "epoch": 11.412641315519013, "grad_norm": 0.09872113913297653, "learning_rate": 0.01, "loss": 1.9105, "step": 111045 }, { "epoch": 11.41294964028777, "grad_norm": 0.0739109143614769, "learning_rate": 0.01, "loss": 1.9699, "step": 111048 }, { "epoch": 11.413257965056527, "grad_norm": 0.09770570695400238, "learning_rate": 0.01, "loss": 1.9528, "step": 111051 }, { "epoch": 11.413566289825283, "grad_norm": 0.1025087758898735, "learning_rate": 0.01, "loss": 1.9348, "step": 111054 }, { "epoch": 11.41387461459404, "grad_norm": 0.07407236844301224, "learning_rate": 0.01, "loss": 1.9306, "step": 111057 }, { "epoch": 11.414182939362796, "grad_norm": 0.12390477955341339, "learning_rate": 0.01, "loss": 1.9492, "step": 111060 }, { "epoch": 11.414491264131552, "grad_norm": 0.1367517113685608, "learning_rate": 0.01, "loss": 1.9593, "step": 111063 }, { "epoch": 11.414799588900308, "grad_norm": 0.06094538792967796, "learning_rate": 0.01, "loss": 1.9269, "step": 111066 }, { "epoch": 11.415107913669065, "grad_norm": 0.04376324266195297, "learning_rate": 0.01, "loss": 1.9563, "step": 111069 }, { "epoch": 11.415416238437821, "grad_norm": 0.057995304465293884, "learning_rate": 0.01, "loss": 1.9461, "step": 111072 }, { "epoch": 11.415724563206577, "grad_norm": 0.04961060732603073, "learning_rate": 0.01, "loss": 1.9493, "step": 111075 }, { "epoch": 11.416032887975334, "grad_norm": 0.04857701435685158, "learning_rate": 0.01, "loss": 1.9433, "step": 111078 }, { "epoch": 11.41634121274409, "grad_norm": 0.0609593540430069, "learning_rate": 0.01, "loss": 1.9524, "step": 111081 }, { "epoch": 11.416649537512846, "grad_norm": 0.05478176474571228, "learning_rate": 0.01, "loss": 1.9381, "step": 111084 }, { "epoch": 11.416957862281603, "grad_norm": 0.04392441362142563, "learning_rate": 0.01, "loss": 1.9129, "step": 111087 }, { "epoch": 11.417266187050359, "grad_norm": 0.08224606513977051, "learning_rate": 0.01, "loss": 1.9332, "step": 111090 }, { "epoch": 11.417574511819117, "grad_norm": 0.06009015068411827, "learning_rate": 0.01, "loss": 1.9328, "step": 111093 }, { "epoch": 11.417882836587873, "grad_norm": 0.03522694483399391, "learning_rate": 0.01, "loss": 1.9374, "step": 111096 }, { "epoch": 11.41819116135663, "grad_norm": 0.08625969290733337, "learning_rate": 0.01, "loss": 1.9488, "step": 111099 }, { "epoch": 11.418499486125386, "grad_norm": 0.0723060742020607, "learning_rate": 0.01, "loss": 1.9462, "step": 111102 }, { "epoch": 11.418807810894142, "grad_norm": 0.07083339244127274, "learning_rate": 0.01, "loss": 1.9426, "step": 111105 }, { "epoch": 11.419116135662899, "grad_norm": 0.05434579402208328, "learning_rate": 0.01, "loss": 1.9354, "step": 111108 }, { "epoch": 11.419424460431655, "grad_norm": 0.05564429983496666, "learning_rate": 0.01, "loss": 1.9284, "step": 111111 }, { "epoch": 11.419732785200411, "grad_norm": 0.06538271903991699, "learning_rate": 0.01, "loss": 1.9294, "step": 111114 }, { "epoch": 11.420041109969167, "grad_norm": 0.12349273264408112, "learning_rate": 0.01, "loss": 1.9317, "step": 111117 }, { "epoch": 11.420349434737924, "grad_norm": 0.11483631283044815, "learning_rate": 0.01, "loss": 1.9273, "step": 111120 }, { "epoch": 11.42065775950668, "grad_norm": 0.045582689344882965, "learning_rate": 0.01, "loss": 1.9501, "step": 111123 }, { "epoch": 11.420966084275436, "grad_norm": 0.051394637674093246, "learning_rate": 0.01, "loss": 1.9476, "step": 111126 }, { "epoch": 11.421274409044193, "grad_norm": 0.058152392506599426, "learning_rate": 0.01, "loss": 1.9417, "step": 111129 }, { "epoch": 11.421582733812949, "grad_norm": 0.08196362853050232, "learning_rate": 0.01, "loss": 1.9438, "step": 111132 }, { "epoch": 11.421891058581705, "grad_norm": 0.0730375200510025, "learning_rate": 0.01, "loss": 1.9417, "step": 111135 }, { "epoch": 11.422199383350463, "grad_norm": 0.07399123162031174, "learning_rate": 0.01, "loss": 1.929, "step": 111138 }, { "epoch": 11.42250770811922, "grad_norm": 0.09269146621227264, "learning_rate": 0.01, "loss": 1.9379, "step": 111141 }, { "epoch": 11.422816032887976, "grad_norm": 0.06923570483922958, "learning_rate": 0.01, "loss": 1.909, "step": 111144 }, { "epoch": 11.423124357656732, "grad_norm": 0.06779024749994278, "learning_rate": 0.01, "loss": 1.9188, "step": 111147 }, { "epoch": 11.423432682425489, "grad_norm": 0.07309120148420334, "learning_rate": 0.01, "loss": 1.9734, "step": 111150 }, { "epoch": 11.423741007194245, "grad_norm": 0.05872660502791405, "learning_rate": 0.01, "loss": 1.9587, "step": 111153 }, { "epoch": 11.424049331963001, "grad_norm": 0.073930524289608, "learning_rate": 0.01, "loss": 1.9492, "step": 111156 }, { "epoch": 11.424357656731758, "grad_norm": 0.1007046103477478, "learning_rate": 0.01, "loss": 1.9442, "step": 111159 }, { "epoch": 11.424665981500514, "grad_norm": 0.03730003908276558, "learning_rate": 0.01, "loss": 1.936, "step": 111162 }, { "epoch": 11.42497430626927, "grad_norm": 0.17431728541851044, "learning_rate": 0.01, "loss": 1.9504, "step": 111165 }, { "epoch": 11.425282631038026, "grad_norm": 0.17289651930332184, "learning_rate": 0.01, "loss": 1.93, "step": 111168 }, { "epoch": 11.425590955806783, "grad_norm": 0.12980881333351135, "learning_rate": 0.01, "loss": 1.9647, "step": 111171 }, { "epoch": 11.425899280575539, "grad_norm": 0.06733021885156631, "learning_rate": 0.01, "loss": 1.9388, "step": 111174 }, { "epoch": 11.426207605344295, "grad_norm": 0.04424450546503067, "learning_rate": 0.01, "loss": 1.9428, "step": 111177 }, { "epoch": 11.426515930113052, "grad_norm": 0.043618347495794296, "learning_rate": 0.01, "loss": 1.9326, "step": 111180 }, { "epoch": 11.426824254881808, "grad_norm": 0.06406936794519424, "learning_rate": 0.01, "loss": 1.9145, "step": 111183 }, { "epoch": 11.427132579650566, "grad_norm": 0.06397773325443268, "learning_rate": 0.01, "loss": 1.9414, "step": 111186 }, { "epoch": 11.427440904419322, "grad_norm": 0.045785196125507355, "learning_rate": 0.01, "loss": 1.9363, "step": 111189 }, { "epoch": 11.427749229188079, "grad_norm": 0.036619219928979874, "learning_rate": 0.01, "loss": 1.9222, "step": 111192 }, { "epoch": 11.428057553956835, "grad_norm": 0.056804507970809937, "learning_rate": 0.01, "loss": 1.9617, "step": 111195 }, { "epoch": 11.428365878725591, "grad_norm": 0.035350654274225235, "learning_rate": 0.01, "loss": 1.9046, "step": 111198 }, { "epoch": 11.428674203494348, "grad_norm": 0.09453041106462479, "learning_rate": 0.01, "loss": 1.953, "step": 111201 }, { "epoch": 11.428982528263104, "grad_norm": 0.10754123330116272, "learning_rate": 0.01, "loss": 1.9408, "step": 111204 }, { "epoch": 11.42929085303186, "grad_norm": 0.140550434589386, "learning_rate": 0.01, "loss": 1.9321, "step": 111207 }, { "epoch": 11.429599177800617, "grad_norm": 0.05969111621379852, "learning_rate": 0.01, "loss": 1.9739, "step": 111210 }, { "epoch": 11.429907502569373, "grad_norm": 0.07596472650766373, "learning_rate": 0.01, "loss": 1.9137, "step": 111213 }, { "epoch": 11.43021582733813, "grad_norm": 0.06707892566919327, "learning_rate": 0.01, "loss": 1.9335, "step": 111216 }, { "epoch": 11.430524152106885, "grad_norm": 0.031418804079294205, "learning_rate": 0.01, "loss": 1.9435, "step": 111219 }, { "epoch": 11.430832476875642, "grad_norm": 0.11432456225156784, "learning_rate": 0.01, "loss": 1.9282, "step": 111222 }, { "epoch": 11.431140801644398, "grad_norm": 0.06755537539720535, "learning_rate": 0.01, "loss": 1.962, "step": 111225 }, { "epoch": 11.431449126413154, "grad_norm": 0.09963531047105789, "learning_rate": 0.01, "loss": 1.9566, "step": 111228 }, { "epoch": 11.431757451181912, "grad_norm": 0.14981499314308167, "learning_rate": 0.01, "loss": 1.9492, "step": 111231 }, { "epoch": 11.432065775950669, "grad_norm": 0.0813409760594368, "learning_rate": 0.01, "loss": 1.9596, "step": 111234 }, { "epoch": 11.432374100719425, "grad_norm": 0.10392513126134872, "learning_rate": 0.01, "loss": 1.9282, "step": 111237 }, { "epoch": 11.432682425488181, "grad_norm": 0.05986598879098892, "learning_rate": 0.01, "loss": 1.9594, "step": 111240 }, { "epoch": 11.432990750256938, "grad_norm": 0.11344392597675323, "learning_rate": 0.01, "loss": 1.9611, "step": 111243 }, { "epoch": 11.433299075025694, "grad_norm": 0.07161758095026016, "learning_rate": 0.01, "loss": 1.951, "step": 111246 }, { "epoch": 11.43360739979445, "grad_norm": 0.0733150988817215, "learning_rate": 0.01, "loss": 1.9519, "step": 111249 }, { "epoch": 11.433915724563207, "grad_norm": 0.08997683227062225, "learning_rate": 0.01, "loss": 1.9298, "step": 111252 }, { "epoch": 11.434224049331963, "grad_norm": 0.042780738323926926, "learning_rate": 0.01, "loss": 1.9496, "step": 111255 }, { "epoch": 11.43453237410072, "grad_norm": 0.055852118879556656, "learning_rate": 0.01, "loss": 1.9293, "step": 111258 }, { "epoch": 11.434840698869476, "grad_norm": 0.05158184468746185, "learning_rate": 0.01, "loss": 1.9543, "step": 111261 }, { "epoch": 11.435149023638232, "grad_norm": 0.04345036670565605, "learning_rate": 0.01, "loss": 1.9403, "step": 111264 }, { "epoch": 11.435457348406988, "grad_norm": 0.03345207870006561, "learning_rate": 0.01, "loss": 1.9161, "step": 111267 }, { "epoch": 11.435765673175744, "grad_norm": 0.055717889219522476, "learning_rate": 0.01, "loss": 1.9285, "step": 111270 }, { "epoch": 11.4360739979445, "grad_norm": 0.13665983080863953, "learning_rate": 0.01, "loss": 1.9242, "step": 111273 }, { "epoch": 11.436382322713257, "grad_norm": 0.14494851231575012, "learning_rate": 0.01, "loss": 1.9322, "step": 111276 }, { "epoch": 11.436690647482015, "grad_norm": 0.07634695619344711, "learning_rate": 0.01, "loss": 1.937, "step": 111279 }, { "epoch": 11.436998972250771, "grad_norm": 0.03860051929950714, "learning_rate": 0.01, "loss": 1.941, "step": 111282 }, { "epoch": 11.437307297019528, "grad_norm": 0.0648890882730484, "learning_rate": 0.01, "loss": 1.9341, "step": 111285 }, { "epoch": 11.437615621788284, "grad_norm": 0.04066958278417587, "learning_rate": 0.01, "loss": 1.9511, "step": 111288 }, { "epoch": 11.43792394655704, "grad_norm": 0.03933463245630264, "learning_rate": 0.01, "loss": 1.9236, "step": 111291 }, { "epoch": 11.438232271325797, "grad_norm": 0.06783044338226318, "learning_rate": 0.01, "loss": 1.936, "step": 111294 }, { "epoch": 11.438540596094553, "grad_norm": 0.09267598390579224, "learning_rate": 0.01, "loss": 1.9462, "step": 111297 }, { "epoch": 11.43884892086331, "grad_norm": 0.0766342505812645, "learning_rate": 0.01, "loss": 1.9783, "step": 111300 }, { "epoch": 11.439157245632066, "grad_norm": 0.07145971804857254, "learning_rate": 0.01, "loss": 1.9411, "step": 111303 }, { "epoch": 11.439465570400822, "grad_norm": 0.0847540870308876, "learning_rate": 0.01, "loss": 1.9293, "step": 111306 }, { "epoch": 11.439773895169578, "grad_norm": 0.04288563132286072, "learning_rate": 0.01, "loss": 1.9416, "step": 111309 }, { "epoch": 11.440082219938335, "grad_norm": 0.03836122527718544, "learning_rate": 0.01, "loss": 1.9545, "step": 111312 }, { "epoch": 11.44039054470709, "grad_norm": 0.03649865835905075, "learning_rate": 0.01, "loss": 1.9348, "step": 111315 }, { "epoch": 11.440698869475847, "grad_norm": 0.0543823167681694, "learning_rate": 0.01, "loss": 1.9354, "step": 111318 }, { "epoch": 11.441007194244605, "grad_norm": 0.04194122180342674, "learning_rate": 0.01, "loss": 1.9437, "step": 111321 }, { "epoch": 11.441315519013362, "grad_norm": 0.16252778470516205, "learning_rate": 0.01, "loss": 1.9305, "step": 111324 }, { "epoch": 11.441623843782118, "grad_norm": 0.05408162996172905, "learning_rate": 0.01, "loss": 1.924, "step": 111327 }, { "epoch": 11.441932168550874, "grad_norm": 0.04746748507022858, "learning_rate": 0.01, "loss": 1.9146, "step": 111330 }, { "epoch": 11.44224049331963, "grad_norm": 0.044511113315820694, "learning_rate": 0.01, "loss": 1.923, "step": 111333 }, { "epoch": 11.442548818088387, "grad_norm": 0.0402715727686882, "learning_rate": 0.01, "loss": 1.9581, "step": 111336 }, { "epoch": 11.442857142857143, "grad_norm": 0.04611997306346893, "learning_rate": 0.01, "loss": 1.9327, "step": 111339 }, { "epoch": 11.4431654676259, "grad_norm": 0.05174090340733528, "learning_rate": 0.01, "loss": 1.9464, "step": 111342 }, { "epoch": 11.443473792394656, "grad_norm": 0.06841269880533218, "learning_rate": 0.01, "loss": 1.9378, "step": 111345 }, { "epoch": 11.443782117163412, "grad_norm": 0.0667579174041748, "learning_rate": 0.01, "loss": 1.9538, "step": 111348 }, { "epoch": 11.444090441932168, "grad_norm": 0.06422604620456696, "learning_rate": 0.01, "loss": 1.9343, "step": 111351 }, { "epoch": 11.444398766700925, "grad_norm": 0.05790428817272186, "learning_rate": 0.01, "loss": 1.9307, "step": 111354 }, { "epoch": 11.444707091469681, "grad_norm": 0.060070376843214035, "learning_rate": 0.01, "loss": 1.956, "step": 111357 }, { "epoch": 11.445015416238437, "grad_norm": 0.084757000207901, "learning_rate": 0.01, "loss": 1.9527, "step": 111360 }, { "epoch": 11.445323741007194, "grad_norm": 0.08789457380771637, "learning_rate": 0.01, "loss": 1.949, "step": 111363 }, { "epoch": 11.44563206577595, "grad_norm": 0.10299545526504517, "learning_rate": 0.01, "loss": 1.9538, "step": 111366 }, { "epoch": 11.445940390544708, "grad_norm": 0.05760720372200012, "learning_rate": 0.01, "loss": 1.9368, "step": 111369 }, { "epoch": 11.446248715313464, "grad_norm": 0.08394305408000946, "learning_rate": 0.01, "loss": 1.9501, "step": 111372 }, { "epoch": 11.44655704008222, "grad_norm": 0.1427774280309677, "learning_rate": 0.01, "loss": 1.9356, "step": 111375 }, { "epoch": 11.446865364850977, "grad_norm": 0.11516613513231277, "learning_rate": 0.01, "loss": 1.9509, "step": 111378 }, { "epoch": 11.447173689619733, "grad_norm": 0.06715381145477295, "learning_rate": 0.01, "loss": 1.9488, "step": 111381 }, { "epoch": 11.44748201438849, "grad_norm": 0.04250967502593994, "learning_rate": 0.01, "loss": 1.9267, "step": 111384 }, { "epoch": 11.447790339157246, "grad_norm": 0.1086759939789772, "learning_rate": 0.01, "loss": 1.9199, "step": 111387 }, { "epoch": 11.448098663926002, "grad_norm": 0.055994849652051926, "learning_rate": 0.01, "loss": 1.9556, "step": 111390 }, { "epoch": 11.448406988694758, "grad_norm": 0.03699815273284912, "learning_rate": 0.01, "loss": 1.9458, "step": 111393 }, { "epoch": 11.448715313463515, "grad_norm": 0.05926622822880745, "learning_rate": 0.01, "loss": 1.9492, "step": 111396 }, { "epoch": 11.449023638232271, "grad_norm": 0.09390268474817276, "learning_rate": 0.01, "loss": 1.9593, "step": 111399 }, { "epoch": 11.449331963001027, "grad_norm": 0.13381150364875793, "learning_rate": 0.01, "loss": 1.9625, "step": 111402 }, { "epoch": 11.449640287769784, "grad_norm": 0.17974214255809784, "learning_rate": 0.01, "loss": 1.9411, "step": 111405 }, { "epoch": 11.44994861253854, "grad_norm": 0.06824097782373428, "learning_rate": 0.01, "loss": 1.9487, "step": 111408 }, { "epoch": 11.450256937307296, "grad_norm": 0.043500106781721115, "learning_rate": 0.01, "loss": 1.9448, "step": 111411 }, { "epoch": 11.450565262076054, "grad_norm": 0.03300514072179794, "learning_rate": 0.01, "loss": 1.9327, "step": 111414 }, { "epoch": 11.45087358684481, "grad_norm": 0.0398101806640625, "learning_rate": 0.01, "loss": 1.9586, "step": 111417 }, { "epoch": 11.451181911613567, "grad_norm": 0.04787025600671768, "learning_rate": 0.01, "loss": 1.9258, "step": 111420 }, { "epoch": 11.451490236382323, "grad_norm": 0.03133167326450348, "learning_rate": 0.01, "loss": 1.9512, "step": 111423 }, { "epoch": 11.45179856115108, "grad_norm": 0.04809502139687538, "learning_rate": 0.01, "loss": 1.9507, "step": 111426 }, { "epoch": 11.452106885919836, "grad_norm": 0.08147426694631577, "learning_rate": 0.01, "loss": 1.9372, "step": 111429 }, { "epoch": 11.452415210688592, "grad_norm": 0.058343514800071716, "learning_rate": 0.01, "loss": 1.9294, "step": 111432 }, { "epoch": 11.452723535457348, "grad_norm": 0.06231757998466492, "learning_rate": 0.01, "loss": 1.9338, "step": 111435 }, { "epoch": 11.453031860226105, "grad_norm": 0.08571364730596542, "learning_rate": 0.01, "loss": 1.9415, "step": 111438 }, { "epoch": 11.453340184994861, "grad_norm": 0.04634599760174751, "learning_rate": 0.01, "loss": 1.9598, "step": 111441 }, { "epoch": 11.453648509763617, "grad_norm": 0.04871790483593941, "learning_rate": 0.01, "loss": 1.9308, "step": 111444 }, { "epoch": 11.453956834532374, "grad_norm": 0.03777323663234711, "learning_rate": 0.01, "loss": 1.9706, "step": 111447 }, { "epoch": 11.45426515930113, "grad_norm": 0.08520831167697906, "learning_rate": 0.01, "loss": 1.9198, "step": 111450 }, { "epoch": 11.454573484069886, "grad_norm": 0.10346395522356033, "learning_rate": 0.01, "loss": 1.9348, "step": 111453 }, { "epoch": 11.454881808838643, "grad_norm": 0.1196495071053505, "learning_rate": 0.01, "loss": 1.9574, "step": 111456 }, { "epoch": 11.455190133607399, "grad_norm": 0.12974438071250916, "learning_rate": 0.01, "loss": 1.9542, "step": 111459 }, { "epoch": 11.455498458376157, "grad_norm": 0.11819576472043991, "learning_rate": 0.01, "loss": 1.934, "step": 111462 }, { "epoch": 11.455806783144913, "grad_norm": 0.12511448562145233, "learning_rate": 0.01, "loss": 1.9356, "step": 111465 }, { "epoch": 11.45611510791367, "grad_norm": 0.10518360882997513, "learning_rate": 0.01, "loss": 1.9266, "step": 111468 }, { "epoch": 11.456423432682426, "grad_norm": 0.06951570510864258, "learning_rate": 0.01, "loss": 1.9381, "step": 111471 }, { "epoch": 11.456731757451182, "grad_norm": 0.052723489701747894, "learning_rate": 0.01, "loss": 1.9291, "step": 111474 }, { "epoch": 11.457040082219939, "grad_norm": 0.040231626480817795, "learning_rate": 0.01, "loss": 1.9206, "step": 111477 }, { "epoch": 11.457348406988695, "grad_norm": 0.029970921576023102, "learning_rate": 0.01, "loss": 1.9438, "step": 111480 }, { "epoch": 11.457656731757451, "grad_norm": 0.050749361515045166, "learning_rate": 0.01, "loss": 1.9137, "step": 111483 }, { "epoch": 11.457965056526207, "grad_norm": 0.06277536600828171, "learning_rate": 0.01, "loss": 1.9291, "step": 111486 }, { "epoch": 11.458273381294964, "grad_norm": 0.07072249054908752, "learning_rate": 0.01, "loss": 1.9459, "step": 111489 }, { "epoch": 11.45858170606372, "grad_norm": 0.06058824807405472, "learning_rate": 0.01, "loss": 1.9568, "step": 111492 }, { "epoch": 11.458890030832476, "grad_norm": 0.1610943228006363, "learning_rate": 0.01, "loss": 1.9424, "step": 111495 }, { "epoch": 11.459198355601233, "grad_norm": 0.0705665647983551, "learning_rate": 0.01, "loss": 1.9402, "step": 111498 }, { "epoch": 11.459506680369989, "grad_norm": 0.04693428799510002, "learning_rate": 0.01, "loss": 1.936, "step": 111501 }, { "epoch": 11.459815005138745, "grad_norm": 0.11832481622695923, "learning_rate": 0.01, "loss": 1.948, "step": 111504 }, { "epoch": 11.460123329907503, "grad_norm": 0.08116341382265091, "learning_rate": 0.01, "loss": 1.9463, "step": 111507 }, { "epoch": 11.46043165467626, "grad_norm": 0.07210972160100937, "learning_rate": 0.01, "loss": 1.949, "step": 111510 }, { "epoch": 11.460739979445016, "grad_norm": 0.06692171096801758, "learning_rate": 0.01, "loss": 1.9315, "step": 111513 }, { "epoch": 11.461048304213772, "grad_norm": 0.09386149048805237, "learning_rate": 0.01, "loss": 1.9435, "step": 111516 }, { "epoch": 11.461356628982529, "grad_norm": 0.03938892111182213, "learning_rate": 0.01, "loss": 1.9353, "step": 111519 }, { "epoch": 11.461664953751285, "grad_norm": 0.15739138424396515, "learning_rate": 0.01, "loss": 1.9531, "step": 111522 }, { "epoch": 11.461973278520041, "grad_norm": 0.13813142478466034, "learning_rate": 0.01, "loss": 1.9461, "step": 111525 }, { "epoch": 11.462281603288798, "grad_norm": 0.08947230130434036, "learning_rate": 0.01, "loss": 1.931, "step": 111528 }, { "epoch": 11.462589928057554, "grad_norm": 0.05972016230225563, "learning_rate": 0.01, "loss": 1.9349, "step": 111531 }, { "epoch": 11.46289825282631, "grad_norm": 0.05006679147481918, "learning_rate": 0.01, "loss": 1.9528, "step": 111534 }, { "epoch": 11.463206577595066, "grad_norm": 0.04381818696856499, "learning_rate": 0.01, "loss": 1.9237, "step": 111537 }, { "epoch": 11.463514902363823, "grad_norm": 0.03743549436330795, "learning_rate": 0.01, "loss": 1.9477, "step": 111540 }, { "epoch": 11.463823227132579, "grad_norm": 0.04342208057641983, "learning_rate": 0.01, "loss": 1.948, "step": 111543 }, { "epoch": 11.464131551901335, "grad_norm": 0.08246835321187973, "learning_rate": 0.01, "loss": 1.9545, "step": 111546 }, { "epoch": 11.464439876670092, "grad_norm": 0.052904337644577026, "learning_rate": 0.01, "loss": 1.951, "step": 111549 }, { "epoch": 11.46474820143885, "grad_norm": 0.07945089042186737, "learning_rate": 0.01, "loss": 1.9326, "step": 111552 }, { "epoch": 11.465056526207606, "grad_norm": 0.09860449284315109, "learning_rate": 0.01, "loss": 1.9341, "step": 111555 }, { "epoch": 11.465364850976362, "grad_norm": 0.07978519052267075, "learning_rate": 0.01, "loss": 1.9219, "step": 111558 }, { "epoch": 11.465673175745119, "grad_norm": 0.09356561303138733, "learning_rate": 0.01, "loss": 1.9336, "step": 111561 }, { "epoch": 11.465981500513875, "grad_norm": 0.08849091827869415, "learning_rate": 0.01, "loss": 1.937, "step": 111564 }, { "epoch": 11.466289825282631, "grad_norm": 0.07738533616065979, "learning_rate": 0.01, "loss": 1.9573, "step": 111567 }, { "epoch": 11.466598150051388, "grad_norm": 0.06273981928825378, "learning_rate": 0.01, "loss": 1.9304, "step": 111570 }, { "epoch": 11.466906474820144, "grad_norm": 0.03618188202381134, "learning_rate": 0.01, "loss": 1.9166, "step": 111573 }, { "epoch": 11.4672147995889, "grad_norm": 0.03245396167039871, "learning_rate": 0.01, "loss": 1.946, "step": 111576 }, { "epoch": 11.467523124357657, "grad_norm": 0.054043613374233246, "learning_rate": 0.01, "loss": 1.9397, "step": 111579 }, { "epoch": 11.467831449126413, "grad_norm": 0.14490677416324615, "learning_rate": 0.01, "loss": 1.978, "step": 111582 }, { "epoch": 11.46813977389517, "grad_norm": 0.05632997304201126, "learning_rate": 0.01, "loss": 1.9545, "step": 111585 }, { "epoch": 11.468448098663925, "grad_norm": 0.13351985812187195, "learning_rate": 0.01, "loss": 1.9401, "step": 111588 }, { "epoch": 11.468756423432682, "grad_norm": 0.0480462908744812, "learning_rate": 0.01, "loss": 1.9311, "step": 111591 }, { "epoch": 11.469064748201438, "grad_norm": 0.05255371332168579, "learning_rate": 0.01, "loss": 1.9472, "step": 111594 }, { "epoch": 11.469373072970196, "grad_norm": 0.050016168504953384, "learning_rate": 0.01, "loss": 1.9294, "step": 111597 }, { "epoch": 11.469681397738952, "grad_norm": 0.033241692930459976, "learning_rate": 0.01, "loss": 1.9173, "step": 111600 }, { "epoch": 11.469989722507709, "grad_norm": 0.037866104394197464, "learning_rate": 0.01, "loss": 1.9416, "step": 111603 }, { "epoch": 11.470298047276465, "grad_norm": 0.10859289765357971, "learning_rate": 0.01, "loss": 1.9429, "step": 111606 }, { "epoch": 11.470606372045221, "grad_norm": 0.041134439408779144, "learning_rate": 0.01, "loss": 1.9327, "step": 111609 }, { "epoch": 11.470914696813978, "grad_norm": 0.07839282602071762, "learning_rate": 0.01, "loss": 1.9529, "step": 111612 }, { "epoch": 11.471223021582734, "grad_norm": 0.09817814826965332, "learning_rate": 0.01, "loss": 1.9375, "step": 111615 }, { "epoch": 11.47153134635149, "grad_norm": 0.11221611499786377, "learning_rate": 0.01, "loss": 1.9566, "step": 111618 }, { "epoch": 11.471839671120247, "grad_norm": 0.06966344267129898, "learning_rate": 0.01, "loss": 1.9337, "step": 111621 }, { "epoch": 11.472147995889003, "grad_norm": 0.06667056679725647, "learning_rate": 0.01, "loss": 1.9421, "step": 111624 }, { "epoch": 11.47245632065776, "grad_norm": 0.07580654323101044, "learning_rate": 0.01, "loss": 1.9303, "step": 111627 }, { "epoch": 11.472764645426516, "grad_norm": 0.05771489813923836, "learning_rate": 0.01, "loss": 1.9411, "step": 111630 }, { "epoch": 11.473072970195272, "grad_norm": 0.05805226042866707, "learning_rate": 0.01, "loss": 1.9536, "step": 111633 }, { "epoch": 11.473381294964028, "grad_norm": 0.04712773114442825, "learning_rate": 0.01, "loss": 1.954, "step": 111636 }, { "epoch": 11.473689619732784, "grad_norm": 0.04592062160372734, "learning_rate": 0.01, "loss": 1.9309, "step": 111639 }, { "epoch": 11.47399794450154, "grad_norm": 0.06208406761288643, "learning_rate": 0.01, "loss": 1.9361, "step": 111642 }, { "epoch": 11.474306269270299, "grad_norm": 0.052417706698179245, "learning_rate": 0.01, "loss": 1.9307, "step": 111645 }, { "epoch": 11.474614594039055, "grad_norm": 0.1250835508108139, "learning_rate": 0.01, "loss": 1.9551, "step": 111648 }, { "epoch": 11.474922918807811, "grad_norm": 0.04983068257570267, "learning_rate": 0.01, "loss": 1.927, "step": 111651 }, { "epoch": 11.475231243576568, "grad_norm": 0.09713703393936157, "learning_rate": 0.01, "loss": 1.9432, "step": 111654 }, { "epoch": 11.475539568345324, "grad_norm": 0.04548988118767738, "learning_rate": 0.01, "loss": 1.9483, "step": 111657 }, { "epoch": 11.47584789311408, "grad_norm": 0.0484665147960186, "learning_rate": 0.01, "loss": 1.9217, "step": 111660 }, { "epoch": 11.476156217882837, "grad_norm": 0.059269923716783524, "learning_rate": 0.01, "loss": 1.9316, "step": 111663 }, { "epoch": 11.476464542651593, "grad_norm": 0.1312210112810135, "learning_rate": 0.01, "loss": 1.935, "step": 111666 }, { "epoch": 11.47677286742035, "grad_norm": 0.04668668285012245, "learning_rate": 0.01, "loss": 1.9512, "step": 111669 }, { "epoch": 11.477081192189106, "grad_norm": 0.05484083294868469, "learning_rate": 0.01, "loss": 1.9273, "step": 111672 }, { "epoch": 11.477389516957862, "grad_norm": 0.10938485711812973, "learning_rate": 0.01, "loss": 1.9295, "step": 111675 }, { "epoch": 11.477697841726618, "grad_norm": 0.055881600826978683, "learning_rate": 0.01, "loss": 1.9605, "step": 111678 }, { "epoch": 11.478006166495375, "grad_norm": 0.03705210238695145, "learning_rate": 0.01, "loss": 1.953, "step": 111681 }, { "epoch": 11.47831449126413, "grad_norm": 0.035369958728551865, "learning_rate": 0.01, "loss": 1.9502, "step": 111684 }, { "epoch": 11.478622816032887, "grad_norm": 0.03749634325504303, "learning_rate": 0.01, "loss": 1.9635, "step": 111687 }, { "epoch": 11.478931140801645, "grad_norm": 0.042402010411024094, "learning_rate": 0.01, "loss": 1.9367, "step": 111690 }, { "epoch": 11.479239465570402, "grad_norm": 0.13027635216712952, "learning_rate": 0.01, "loss": 1.9544, "step": 111693 }, { "epoch": 11.479547790339158, "grad_norm": 0.09507349133491516, "learning_rate": 0.01, "loss": 1.9251, "step": 111696 }, { "epoch": 11.479856115107914, "grad_norm": 0.06717098504304886, "learning_rate": 0.01, "loss": 1.928, "step": 111699 }, { "epoch": 11.48016443987667, "grad_norm": 0.05186061933636665, "learning_rate": 0.01, "loss": 1.9437, "step": 111702 }, { "epoch": 11.480472764645427, "grad_norm": 0.05118173733353615, "learning_rate": 0.01, "loss": 1.9414, "step": 111705 }, { "epoch": 11.480781089414183, "grad_norm": 0.0630086213350296, "learning_rate": 0.01, "loss": 1.9385, "step": 111708 }, { "epoch": 11.48108941418294, "grad_norm": 0.09794661402702332, "learning_rate": 0.01, "loss": 1.9229, "step": 111711 }, { "epoch": 11.481397738951696, "grad_norm": 0.04907077178359032, "learning_rate": 0.01, "loss": 1.9147, "step": 111714 }, { "epoch": 11.481706063720452, "grad_norm": 0.07658752053976059, "learning_rate": 0.01, "loss": 1.9177, "step": 111717 }, { "epoch": 11.482014388489208, "grad_norm": 0.09202036261558533, "learning_rate": 0.01, "loss": 1.9285, "step": 111720 }, { "epoch": 11.482322713257965, "grad_norm": 0.06585373729467392, "learning_rate": 0.01, "loss": 1.9422, "step": 111723 }, { "epoch": 11.482631038026721, "grad_norm": 0.07491261512041092, "learning_rate": 0.01, "loss": 1.9044, "step": 111726 }, { "epoch": 11.482939362795477, "grad_norm": 0.09483268111944199, "learning_rate": 0.01, "loss": 1.9581, "step": 111729 }, { "epoch": 11.483247687564234, "grad_norm": 0.15194907784461975, "learning_rate": 0.01, "loss": 1.9279, "step": 111732 }, { "epoch": 11.48355601233299, "grad_norm": 0.14718058705329895, "learning_rate": 0.01, "loss": 1.9556, "step": 111735 }, { "epoch": 11.483864337101748, "grad_norm": 0.1479717642068863, "learning_rate": 0.01, "loss": 1.931, "step": 111738 }, { "epoch": 11.484172661870504, "grad_norm": 0.0986589714884758, "learning_rate": 0.01, "loss": 1.9078, "step": 111741 }, { "epoch": 11.48448098663926, "grad_norm": 0.08421244472265244, "learning_rate": 0.01, "loss": 1.9206, "step": 111744 }, { "epoch": 11.484789311408017, "grad_norm": 0.08169248700141907, "learning_rate": 0.01, "loss": 1.9454, "step": 111747 }, { "epoch": 11.485097636176773, "grad_norm": 0.08077950030565262, "learning_rate": 0.01, "loss": 1.9493, "step": 111750 }, { "epoch": 11.48540596094553, "grad_norm": 0.08002752810716629, "learning_rate": 0.01, "loss": 1.9502, "step": 111753 }, { "epoch": 11.485714285714286, "grad_norm": 0.050435587763786316, "learning_rate": 0.01, "loss": 1.9337, "step": 111756 }, { "epoch": 11.486022610483042, "grad_norm": 0.043131690472364426, "learning_rate": 0.01, "loss": 1.9193, "step": 111759 }, { "epoch": 11.486330935251798, "grad_norm": 0.05599942058324814, "learning_rate": 0.01, "loss": 1.9392, "step": 111762 }, { "epoch": 11.486639260020555, "grad_norm": 0.09633025527000427, "learning_rate": 0.01, "loss": 1.9278, "step": 111765 }, { "epoch": 11.486947584789311, "grad_norm": 0.07188762724399567, "learning_rate": 0.01, "loss": 1.934, "step": 111768 }, { "epoch": 11.487255909558067, "grad_norm": 0.05295489728450775, "learning_rate": 0.01, "loss": 1.9427, "step": 111771 }, { "epoch": 11.487564234326824, "grad_norm": 0.03551109880208969, "learning_rate": 0.01, "loss": 1.9497, "step": 111774 }, { "epoch": 11.48787255909558, "grad_norm": 0.09166920185089111, "learning_rate": 0.01, "loss": 1.9402, "step": 111777 }, { "epoch": 11.488180883864338, "grad_norm": 0.06509532779455185, "learning_rate": 0.01, "loss": 1.9266, "step": 111780 }, { "epoch": 11.488489208633094, "grad_norm": 0.03466518223285675, "learning_rate": 0.01, "loss": 1.9265, "step": 111783 }, { "epoch": 11.48879753340185, "grad_norm": 0.06080121546983719, "learning_rate": 0.01, "loss": 1.9382, "step": 111786 }, { "epoch": 11.489105858170607, "grad_norm": 0.07743913680315018, "learning_rate": 0.01, "loss": 1.9396, "step": 111789 }, { "epoch": 11.489414182939363, "grad_norm": 0.04427964985370636, "learning_rate": 0.01, "loss": 1.9383, "step": 111792 }, { "epoch": 11.48972250770812, "grad_norm": 0.049059998244047165, "learning_rate": 0.01, "loss": 1.963, "step": 111795 }, { "epoch": 11.490030832476876, "grad_norm": 0.0991068035364151, "learning_rate": 0.01, "loss": 1.9439, "step": 111798 }, { "epoch": 11.490339157245632, "grad_norm": 0.0421556755900383, "learning_rate": 0.01, "loss": 1.9385, "step": 111801 }, { "epoch": 11.490647482014388, "grad_norm": 0.03575088083744049, "learning_rate": 0.01, "loss": 1.9361, "step": 111804 }, { "epoch": 11.490955806783145, "grad_norm": 0.05111589655280113, "learning_rate": 0.01, "loss": 1.9274, "step": 111807 }, { "epoch": 11.491264131551901, "grad_norm": 0.04898693785071373, "learning_rate": 0.01, "loss": 1.9065, "step": 111810 }, { "epoch": 11.491572456320657, "grad_norm": 0.16972552239894867, "learning_rate": 0.01, "loss": 1.9596, "step": 111813 }, { "epoch": 11.491880781089414, "grad_norm": 0.11789322644472122, "learning_rate": 0.01, "loss": 1.9238, "step": 111816 }, { "epoch": 11.49218910585817, "grad_norm": 0.04322198033332825, "learning_rate": 0.01, "loss": 1.9383, "step": 111819 }, { "epoch": 11.492497430626926, "grad_norm": 0.05791204422712326, "learning_rate": 0.01, "loss": 1.9411, "step": 111822 }, { "epoch": 11.492805755395683, "grad_norm": 0.045334842056035995, "learning_rate": 0.01, "loss": 1.9236, "step": 111825 }, { "epoch": 11.49311408016444, "grad_norm": 0.040358442813158035, "learning_rate": 0.01, "loss": 1.9503, "step": 111828 }, { "epoch": 11.493422404933197, "grad_norm": 0.11316455900669098, "learning_rate": 0.01, "loss": 1.9277, "step": 111831 }, { "epoch": 11.493730729701953, "grad_norm": 0.06410171091556549, "learning_rate": 0.01, "loss": 1.9721, "step": 111834 }, { "epoch": 11.49403905447071, "grad_norm": 0.05099207162857056, "learning_rate": 0.01, "loss": 1.9685, "step": 111837 }, { "epoch": 11.494347379239466, "grad_norm": 0.13952548801898956, "learning_rate": 0.01, "loss": 1.9665, "step": 111840 }, { "epoch": 11.494655704008222, "grad_norm": 0.03869643807411194, "learning_rate": 0.01, "loss": 1.9335, "step": 111843 }, { "epoch": 11.494964028776979, "grad_norm": 0.1269671618938446, "learning_rate": 0.01, "loss": 1.9396, "step": 111846 }, { "epoch": 11.495272353545735, "grad_norm": 0.08769647032022476, "learning_rate": 0.01, "loss": 1.9166, "step": 111849 }, { "epoch": 11.495580678314491, "grad_norm": 0.058100420981645584, "learning_rate": 0.01, "loss": 1.9488, "step": 111852 }, { "epoch": 11.495889003083247, "grad_norm": 0.038575999438762665, "learning_rate": 0.01, "loss": 1.9437, "step": 111855 }, { "epoch": 11.496197327852004, "grad_norm": 0.04461336135864258, "learning_rate": 0.01, "loss": 1.931, "step": 111858 }, { "epoch": 11.49650565262076, "grad_norm": 0.030741844326257706, "learning_rate": 0.01, "loss": 1.9433, "step": 111861 }, { "epoch": 11.496813977389516, "grad_norm": 0.05148760601878166, "learning_rate": 0.01, "loss": 1.9363, "step": 111864 }, { "epoch": 11.497122302158273, "grad_norm": 0.11670944094657898, "learning_rate": 0.01, "loss": 1.946, "step": 111867 }, { "epoch": 11.497430626927029, "grad_norm": 0.04056670889258385, "learning_rate": 0.01, "loss": 1.9307, "step": 111870 }, { "epoch": 11.497738951695787, "grad_norm": 0.10150992125272751, "learning_rate": 0.01, "loss": 1.9232, "step": 111873 }, { "epoch": 11.498047276464543, "grad_norm": 0.08927108347415924, "learning_rate": 0.01, "loss": 1.9268, "step": 111876 }, { "epoch": 11.4983556012333, "grad_norm": 0.1410294771194458, "learning_rate": 0.01, "loss": 1.9463, "step": 111879 }, { "epoch": 11.498663926002056, "grad_norm": 0.09127254784107208, "learning_rate": 0.01, "loss": 1.931, "step": 111882 }, { "epoch": 11.498972250770812, "grad_norm": 0.0577942319214344, "learning_rate": 0.01, "loss": 1.9529, "step": 111885 }, { "epoch": 11.499280575539569, "grad_norm": 0.07099872827529907, "learning_rate": 0.01, "loss": 1.9523, "step": 111888 }, { "epoch": 11.499588900308325, "grad_norm": 0.08721621334552765, "learning_rate": 0.01, "loss": 1.9344, "step": 111891 }, { "epoch": 11.499897225077081, "grad_norm": 0.049968644976615906, "learning_rate": 0.01, "loss": 1.9271, "step": 111894 }, { "epoch": 11.500205549845838, "grad_norm": 0.08129146695137024, "learning_rate": 0.01, "loss": 1.9361, "step": 111897 }, { "epoch": 11.500513874614594, "grad_norm": 0.10893581807613373, "learning_rate": 0.01, "loss": 1.9579, "step": 111900 }, { "epoch": 11.50082219938335, "grad_norm": 0.10866726189851761, "learning_rate": 0.01, "loss": 1.9707, "step": 111903 }, { "epoch": 11.501130524152106, "grad_norm": 0.0690096914768219, "learning_rate": 0.01, "loss": 1.9364, "step": 111906 }, { "epoch": 11.501438848920863, "grad_norm": 0.04975270479917526, "learning_rate": 0.01, "loss": 1.9418, "step": 111909 }, { "epoch": 11.501747173689619, "grad_norm": 0.07065418362617493, "learning_rate": 0.01, "loss": 1.9267, "step": 111912 }, { "epoch": 11.502055498458375, "grad_norm": 0.06333848834037781, "learning_rate": 0.01, "loss": 1.925, "step": 111915 }, { "epoch": 11.502363823227132, "grad_norm": 0.053953733295202255, "learning_rate": 0.01, "loss": 1.9585, "step": 111918 }, { "epoch": 11.50267214799589, "grad_norm": 0.06409978866577148, "learning_rate": 0.01, "loss": 1.9364, "step": 111921 }, { "epoch": 11.502980472764646, "grad_norm": 0.12793174386024475, "learning_rate": 0.01, "loss": 1.9478, "step": 111924 }, { "epoch": 11.503288797533402, "grad_norm": 0.04191777482628822, "learning_rate": 0.01, "loss": 1.9261, "step": 111927 }, { "epoch": 11.503597122302159, "grad_norm": 0.10092096775770187, "learning_rate": 0.01, "loss": 1.9283, "step": 111930 }, { "epoch": 11.503905447070915, "grad_norm": 0.09913140535354614, "learning_rate": 0.01, "loss": 1.9359, "step": 111933 }, { "epoch": 11.504213771839671, "grad_norm": 0.05205460265278816, "learning_rate": 0.01, "loss": 1.9197, "step": 111936 }, { "epoch": 11.504522096608428, "grad_norm": 0.10730945318937302, "learning_rate": 0.01, "loss": 1.9564, "step": 111939 }, { "epoch": 11.504830421377184, "grad_norm": 0.034082312136888504, "learning_rate": 0.01, "loss": 1.9016, "step": 111942 }, { "epoch": 11.50513874614594, "grad_norm": 0.06130027025938034, "learning_rate": 0.01, "loss": 1.9444, "step": 111945 }, { "epoch": 11.505447070914697, "grad_norm": 0.07574241608381271, "learning_rate": 0.01, "loss": 1.9357, "step": 111948 }, { "epoch": 11.505755395683453, "grad_norm": 0.06435610353946686, "learning_rate": 0.01, "loss": 1.968, "step": 111951 }, { "epoch": 11.50606372045221, "grad_norm": 0.08691317588090897, "learning_rate": 0.01, "loss": 1.9339, "step": 111954 }, { "epoch": 11.506372045220965, "grad_norm": 0.11261089891195297, "learning_rate": 0.01, "loss": 1.9167, "step": 111957 }, { "epoch": 11.506680369989722, "grad_norm": 0.0669609010219574, "learning_rate": 0.01, "loss": 1.9475, "step": 111960 }, { "epoch": 11.50698869475848, "grad_norm": 0.039548080414533615, "learning_rate": 0.01, "loss": 1.9535, "step": 111963 }, { "epoch": 11.507297019527236, "grad_norm": 0.1087004542350769, "learning_rate": 0.01, "loss": 1.9372, "step": 111966 }, { "epoch": 11.507605344295992, "grad_norm": 0.07893752306699753, "learning_rate": 0.01, "loss": 1.9172, "step": 111969 }, { "epoch": 11.507913669064749, "grad_norm": 0.11534779518842697, "learning_rate": 0.01, "loss": 1.8917, "step": 111972 }, { "epoch": 11.508221993833505, "grad_norm": 0.0677044466137886, "learning_rate": 0.01, "loss": 1.9312, "step": 111975 }, { "epoch": 11.508530318602261, "grad_norm": 0.054873283952474594, "learning_rate": 0.01, "loss": 1.9219, "step": 111978 }, { "epoch": 11.508838643371018, "grad_norm": 0.041117701679468155, "learning_rate": 0.01, "loss": 1.9415, "step": 111981 }, { "epoch": 11.509146968139774, "grad_norm": 0.099622942507267, "learning_rate": 0.01, "loss": 1.9394, "step": 111984 }, { "epoch": 11.50945529290853, "grad_norm": 0.0813162699341774, "learning_rate": 0.01, "loss": 1.9226, "step": 111987 }, { "epoch": 11.509763617677287, "grad_norm": 0.05334705859422684, "learning_rate": 0.01, "loss": 1.9241, "step": 111990 }, { "epoch": 11.510071942446043, "grad_norm": 0.08059155941009521, "learning_rate": 0.01, "loss": 1.9552, "step": 111993 }, { "epoch": 11.5103802672148, "grad_norm": 0.07139837741851807, "learning_rate": 0.01, "loss": 1.9535, "step": 111996 }, { "epoch": 11.510688591983556, "grad_norm": 0.14807172119617462, "learning_rate": 0.01, "loss": 1.941, "step": 111999 }, { "epoch": 11.510996916752312, "grad_norm": 0.0448773056268692, "learning_rate": 0.01, "loss": 1.945, "step": 112002 }, { "epoch": 11.511305241521068, "grad_norm": 0.06124507263302803, "learning_rate": 0.01, "loss": 1.9504, "step": 112005 }, { "epoch": 11.511613566289824, "grad_norm": 0.06960859894752502, "learning_rate": 0.01, "loss": 1.9376, "step": 112008 }, { "epoch": 11.51192189105858, "grad_norm": 0.08753380924463272, "learning_rate": 0.01, "loss": 1.9404, "step": 112011 }, { "epoch": 11.512230215827339, "grad_norm": 0.15506276488304138, "learning_rate": 0.01, "loss": 1.9687, "step": 112014 }, { "epoch": 11.512538540596095, "grad_norm": 0.0536036379635334, "learning_rate": 0.01, "loss": 1.9184, "step": 112017 }, { "epoch": 11.512846865364851, "grad_norm": 0.08961272239685059, "learning_rate": 0.01, "loss": 1.9202, "step": 112020 }, { "epoch": 11.513155190133608, "grad_norm": 0.058408934623003006, "learning_rate": 0.01, "loss": 1.9494, "step": 112023 }, { "epoch": 11.513463514902364, "grad_norm": 0.041266631335020065, "learning_rate": 0.01, "loss": 1.9323, "step": 112026 }, { "epoch": 11.51377183967112, "grad_norm": 0.1152496337890625, "learning_rate": 0.01, "loss": 1.9444, "step": 112029 }, { "epoch": 11.514080164439877, "grad_norm": 0.07906953990459442, "learning_rate": 0.01, "loss": 1.9439, "step": 112032 }, { "epoch": 11.514388489208633, "grad_norm": 0.14202702045440674, "learning_rate": 0.01, "loss": 1.9438, "step": 112035 }, { "epoch": 11.51469681397739, "grad_norm": 0.03130604699254036, "learning_rate": 0.01, "loss": 1.9386, "step": 112038 }, { "epoch": 11.515005138746146, "grad_norm": 0.058028191328048706, "learning_rate": 0.01, "loss": 1.9643, "step": 112041 }, { "epoch": 11.515313463514902, "grad_norm": 0.06180107221007347, "learning_rate": 0.01, "loss": 1.9507, "step": 112044 }, { "epoch": 11.515621788283658, "grad_norm": 0.03522508591413498, "learning_rate": 0.01, "loss": 1.9222, "step": 112047 }, { "epoch": 11.515930113052415, "grad_norm": 0.040822502225637436, "learning_rate": 0.01, "loss": 1.9268, "step": 112050 }, { "epoch": 11.51623843782117, "grad_norm": 0.03846633806824684, "learning_rate": 0.01, "loss": 1.9373, "step": 112053 }, { "epoch": 11.516546762589929, "grad_norm": 0.14067944884300232, "learning_rate": 0.01, "loss": 1.9282, "step": 112056 }, { "epoch": 11.516855087358685, "grad_norm": 0.04643148556351662, "learning_rate": 0.01, "loss": 1.9353, "step": 112059 }, { "epoch": 11.517163412127442, "grad_norm": 0.10015133023262024, "learning_rate": 0.01, "loss": 1.9477, "step": 112062 }, { "epoch": 11.517471736896198, "grad_norm": 0.043383967131376266, "learning_rate": 0.01, "loss": 1.9365, "step": 112065 }, { "epoch": 11.517780061664954, "grad_norm": 0.05876270309090614, "learning_rate": 0.01, "loss": 1.9211, "step": 112068 }, { "epoch": 11.51808838643371, "grad_norm": 0.0826994925737381, "learning_rate": 0.01, "loss": 1.9497, "step": 112071 }, { "epoch": 11.518396711202467, "grad_norm": 0.11554791778326035, "learning_rate": 0.01, "loss": 1.9278, "step": 112074 }, { "epoch": 11.518705035971223, "grad_norm": 0.07947423309087753, "learning_rate": 0.01, "loss": 1.9166, "step": 112077 }, { "epoch": 11.51901336073998, "grad_norm": 0.049170155078172684, "learning_rate": 0.01, "loss": 1.9396, "step": 112080 }, { "epoch": 11.519321685508736, "grad_norm": 0.058392517268657684, "learning_rate": 0.01, "loss": 1.9474, "step": 112083 }, { "epoch": 11.519630010277492, "grad_norm": 0.03751151263713837, "learning_rate": 0.01, "loss": 1.92, "step": 112086 }, { "epoch": 11.519938335046248, "grad_norm": 0.04583292081952095, "learning_rate": 0.01, "loss": 1.929, "step": 112089 }, { "epoch": 11.520246659815005, "grad_norm": 0.08585547655820847, "learning_rate": 0.01, "loss": 1.9333, "step": 112092 }, { "epoch": 11.520554984583761, "grad_norm": 0.07672592252492905, "learning_rate": 0.01, "loss": 1.9636, "step": 112095 }, { "epoch": 11.520863309352517, "grad_norm": 0.07113534957170486, "learning_rate": 0.01, "loss": 1.9268, "step": 112098 }, { "epoch": 11.521171634121274, "grad_norm": 0.11068326979875565, "learning_rate": 0.01, "loss": 1.9433, "step": 112101 }, { "epoch": 11.521479958890032, "grad_norm": 0.060119953006505966, "learning_rate": 0.01, "loss": 1.9306, "step": 112104 }, { "epoch": 11.521788283658788, "grad_norm": 0.05784304067492485, "learning_rate": 0.01, "loss": 1.9425, "step": 112107 }, { "epoch": 11.522096608427544, "grad_norm": 0.06138474494218826, "learning_rate": 0.01, "loss": 1.9384, "step": 112110 }, { "epoch": 11.5224049331963, "grad_norm": 0.06129737198352814, "learning_rate": 0.01, "loss": 1.9649, "step": 112113 }, { "epoch": 11.522713257965057, "grad_norm": 0.06837484985589981, "learning_rate": 0.01, "loss": 1.9221, "step": 112116 }, { "epoch": 11.523021582733813, "grad_norm": 0.11184889823198318, "learning_rate": 0.01, "loss": 1.9193, "step": 112119 }, { "epoch": 11.52332990750257, "grad_norm": 0.1556568741798401, "learning_rate": 0.01, "loss": 1.9804, "step": 112122 }, { "epoch": 11.523638232271326, "grad_norm": 0.08367830514907837, "learning_rate": 0.01, "loss": 1.9428, "step": 112125 }, { "epoch": 11.523946557040082, "grad_norm": 0.08855171501636505, "learning_rate": 0.01, "loss": 1.952, "step": 112128 }, { "epoch": 11.524254881808838, "grad_norm": 0.1637917459011078, "learning_rate": 0.01, "loss": 1.9445, "step": 112131 }, { "epoch": 11.524563206577595, "grad_norm": 0.08338747918605804, "learning_rate": 0.01, "loss": 1.9296, "step": 112134 }, { "epoch": 11.524871531346351, "grad_norm": 0.06042430177330971, "learning_rate": 0.01, "loss": 1.9539, "step": 112137 }, { "epoch": 11.525179856115107, "grad_norm": 0.12024378031492233, "learning_rate": 0.01, "loss": 1.9236, "step": 112140 }, { "epoch": 11.525488180883864, "grad_norm": 0.053831133991479874, "learning_rate": 0.01, "loss": 1.9365, "step": 112143 }, { "epoch": 11.525796505652622, "grad_norm": 0.03246373310685158, "learning_rate": 0.01, "loss": 1.9231, "step": 112146 }, { "epoch": 11.526104830421378, "grad_norm": 0.03484547138214111, "learning_rate": 0.01, "loss": 1.9467, "step": 112149 }, { "epoch": 11.526413155190134, "grad_norm": 0.08741461485624313, "learning_rate": 0.01, "loss": 1.9483, "step": 112152 }, { "epoch": 11.52672147995889, "grad_norm": 0.08762611448764801, "learning_rate": 0.01, "loss": 1.9376, "step": 112155 }, { "epoch": 11.527029804727647, "grad_norm": 0.055941540747880936, "learning_rate": 0.01, "loss": 1.9313, "step": 112158 }, { "epoch": 11.527338129496403, "grad_norm": 0.04995638132095337, "learning_rate": 0.01, "loss": 1.9364, "step": 112161 }, { "epoch": 11.52764645426516, "grad_norm": 0.033121444284915924, "learning_rate": 0.01, "loss": 1.9491, "step": 112164 }, { "epoch": 11.527954779033916, "grad_norm": 0.10392650216817856, "learning_rate": 0.01, "loss": 1.9372, "step": 112167 }, { "epoch": 11.528263103802672, "grad_norm": 0.0696452334523201, "learning_rate": 0.01, "loss": 1.9447, "step": 112170 }, { "epoch": 11.528571428571428, "grad_norm": 0.08160513639450073, "learning_rate": 0.01, "loss": 1.9349, "step": 112173 }, { "epoch": 11.528879753340185, "grad_norm": 0.0839330330491066, "learning_rate": 0.01, "loss": 1.9358, "step": 112176 }, { "epoch": 11.529188078108941, "grad_norm": 0.09454485774040222, "learning_rate": 0.01, "loss": 1.9416, "step": 112179 }, { "epoch": 11.529496402877697, "grad_norm": 0.08612924814224243, "learning_rate": 0.01, "loss": 1.9537, "step": 112182 }, { "epoch": 11.529804727646454, "grad_norm": 0.08306564390659332, "learning_rate": 0.01, "loss": 1.9239, "step": 112185 }, { "epoch": 11.53011305241521, "grad_norm": 0.07982834428548813, "learning_rate": 0.01, "loss": 1.9584, "step": 112188 }, { "epoch": 11.530421377183966, "grad_norm": 0.0803823247551918, "learning_rate": 0.01, "loss": 1.9273, "step": 112191 }, { "epoch": 11.530729701952723, "grad_norm": 0.06836555898189545, "learning_rate": 0.01, "loss": 1.9205, "step": 112194 }, { "epoch": 11.53103802672148, "grad_norm": 0.046713683754205704, "learning_rate": 0.01, "loss": 1.969, "step": 112197 }, { "epoch": 11.531346351490237, "grad_norm": 0.034670259803533554, "learning_rate": 0.01, "loss": 1.9058, "step": 112200 }, { "epoch": 11.531654676258993, "grad_norm": 0.112801194190979, "learning_rate": 0.01, "loss": 1.9415, "step": 112203 }, { "epoch": 11.53196300102775, "grad_norm": 0.11906672269105911, "learning_rate": 0.01, "loss": 1.9112, "step": 112206 }, { "epoch": 11.532271325796506, "grad_norm": 0.06057396158576012, "learning_rate": 0.01, "loss": 1.9467, "step": 112209 }, { "epoch": 11.532579650565262, "grad_norm": 0.04204052686691284, "learning_rate": 0.01, "loss": 1.9301, "step": 112212 }, { "epoch": 11.532887975334019, "grad_norm": 0.08645797520875931, "learning_rate": 0.01, "loss": 1.9258, "step": 112215 }, { "epoch": 11.533196300102775, "grad_norm": 0.05199515074491501, "learning_rate": 0.01, "loss": 1.941, "step": 112218 }, { "epoch": 11.533504624871531, "grad_norm": 0.03792623057961464, "learning_rate": 0.01, "loss": 1.9398, "step": 112221 }, { "epoch": 11.533812949640287, "grad_norm": 0.08833713829517365, "learning_rate": 0.01, "loss": 1.9233, "step": 112224 }, { "epoch": 11.534121274409044, "grad_norm": 0.054692838340997696, "learning_rate": 0.01, "loss": 1.9449, "step": 112227 }, { "epoch": 11.5344295991778, "grad_norm": 0.11953523010015488, "learning_rate": 0.01, "loss": 1.914, "step": 112230 }, { "epoch": 11.534737923946556, "grad_norm": 0.05324847996234894, "learning_rate": 0.01, "loss": 1.9392, "step": 112233 }, { "epoch": 11.535046248715313, "grad_norm": 0.05562381446361542, "learning_rate": 0.01, "loss": 1.9335, "step": 112236 }, { "epoch": 11.53535457348407, "grad_norm": 0.09408646821975708, "learning_rate": 0.01, "loss": 1.9245, "step": 112239 }, { "epoch": 11.535662898252827, "grad_norm": 0.07749692350625992, "learning_rate": 0.01, "loss": 1.924, "step": 112242 }, { "epoch": 11.535971223021583, "grad_norm": 0.12444134801626205, "learning_rate": 0.01, "loss": 1.939, "step": 112245 }, { "epoch": 11.53627954779034, "grad_norm": 0.09990056604146957, "learning_rate": 0.01, "loss": 1.937, "step": 112248 }, { "epoch": 11.536587872559096, "grad_norm": 0.045255426317453384, "learning_rate": 0.01, "loss": 1.9342, "step": 112251 }, { "epoch": 11.536896197327852, "grad_norm": 0.03870481252670288, "learning_rate": 0.01, "loss": 1.9534, "step": 112254 }, { "epoch": 11.537204522096609, "grad_norm": 0.04264555871486664, "learning_rate": 0.01, "loss": 1.9361, "step": 112257 }, { "epoch": 11.537512846865365, "grad_norm": 0.1033821702003479, "learning_rate": 0.01, "loss": 1.925, "step": 112260 }, { "epoch": 11.537821171634121, "grad_norm": 0.06186060607433319, "learning_rate": 0.01, "loss": 1.9353, "step": 112263 }, { "epoch": 11.538129496402878, "grad_norm": 0.06803090125322342, "learning_rate": 0.01, "loss": 1.9223, "step": 112266 }, { "epoch": 11.538437821171634, "grad_norm": 0.07004691660404205, "learning_rate": 0.01, "loss": 1.9479, "step": 112269 }, { "epoch": 11.53874614594039, "grad_norm": 0.09920225292444229, "learning_rate": 0.01, "loss": 1.9253, "step": 112272 }, { "epoch": 11.539054470709146, "grad_norm": 0.05515587329864502, "learning_rate": 0.01, "loss": 1.9287, "step": 112275 }, { "epoch": 11.539362795477903, "grad_norm": 0.14733068645000458, "learning_rate": 0.01, "loss": 1.942, "step": 112278 }, { "epoch": 11.539671120246659, "grad_norm": 0.08791196346282959, "learning_rate": 0.01, "loss": 1.9488, "step": 112281 }, { "epoch": 11.539979445015415, "grad_norm": 0.048449866473674774, "learning_rate": 0.01, "loss": 1.9202, "step": 112284 }, { "epoch": 11.540287769784173, "grad_norm": 0.054437413811683655, "learning_rate": 0.01, "loss": 1.9098, "step": 112287 }, { "epoch": 11.54059609455293, "grad_norm": 0.08245435357093811, "learning_rate": 0.01, "loss": 1.9224, "step": 112290 }, { "epoch": 11.540904419321686, "grad_norm": 0.10597596317529678, "learning_rate": 0.01, "loss": 1.9525, "step": 112293 }, { "epoch": 11.541212744090442, "grad_norm": 0.14799369871616364, "learning_rate": 0.01, "loss": 1.9657, "step": 112296 }, { "epoch": 11.541521068859199, "grad_norm": 0.08486896753311157, "learning_rate": 0.01, "loss": 1.9402, "step": 112299 }, { "epoch": 11.541829393627955, "grad_norm": 0.05308954417705536, "learning_rate": 0.01, "loss": 1.9399, "step": 112302 }, { "epoch": 11.542137718396711, "grad_norm": 0.05601240321993828, "learning_rate": 0.01, "loss": 1.9309, "step": 112305 }, { "epoch": 11.542446043165468, "grad_norm": 0.04067224636673927, "learning_rate": 0.01, "loss": 1.929, "step": 112308 }, { "epoch": 11.542754367934224, "grad_norm": 0.04340583458542824, "learning_rate": 0.01, "loss": 1.9353, "step": 112311 }, { "epoch": 11.54306269270298, "grad_norm": 0.038530174642801285, "learning_rate": 0.01, "loss": 1.9203, "step": 112314 }, { "epoch": 11.543371017471737, "grad_norm": 0.04171457886695862, "learning_rate": 0.01, "loss": 1.9193, "step": 112317 }, { "epoch": 11.543679342240493, "grad_norm": 0.04869476333260536, "learning_rate": 0.01, "loss": 1.9156, "step": 112320 }, { "epoch": 11.54398766700925, "grad_norm": 0.11877516657114029, "learning_rate": 0.01, "loss": 1.9284, "step": 112323 }, { "epoch": 11.544295991778005, "grad_norm": 0.06697926670312881, "learning_rate": 0.01, "loss": 1.9503, "step": 112326 }, { "epoch": 11.544604316546762, "grad_norm": 0.08970346301794052, "learning_rate": 0.01, "loss": 1.9728, "step": 112329 }, { "epoch": 11.54491264131552, "grad_norm": 0.052757494151592255, "learning_rate": 0.01, "loss": 1.9461, "step": 112332 }, { "epoch": 11.545220966084276, "grad_norm": 0.10075823217630386, "learning_rate": 0.01, "loss": 1.9341, "step": 112335 }, { "epoch": 11.545529290853032, "grad_norm": 0.07792484015226364, "learning_rate": 0.01, "loss": 1.92, "step": 112338 }, { "epoch": 11.545837615621789, "grad_norm": 0.08613074570894241, "learning_rate": 0.01, "loss": 1.9512, "step": 112341 }, { "epoch": 11.546145940390545, "grad_norm": 0.10087897628545761, "learning_rate": 0.01, "loss": 1.9574, "step": 112344 }, { "epoch": 11.546454265159301, "grad_norm": 0.047122254967689514, "learning_rate": 0.01, "loss": 1.9116, "step": 112347 }, { "epoch": 11.546762589928058, "grad_norm": 0.053861238062381744, "learning_rate": 0.01, "loss": 1.9346, "step": 112350 }, { "epoch": 11.547070914696814, "grad_norm": 0.04060134291648865, "learning_rate": 0.01, "loss": 1.9436, "step": 112353 }, { "epoch": 11.54737923946557, "grad_norm": 0.04421015828847885, "learning_rate": 0.01, "loss": 1.9378, "step": 112356 }, { "epoch": 11.547687564234327, "grad_norm": 0.050058286637067795, "learning_rate": 0.01, "loss": 1.9295, "step": 112359 }, { "epoch": 11.547995889003083, "grad_norm": 0.06190727651119232, "learning_rate": 0.01, "loss": 1.9508, "step": 112362 }, { "epoch": 11.54830421377184, "grad_norm": 0.03759567439556122, "learning_rate": 0.01, "loss": 1.9312, "step": 112365 }, { "epoch": 11.548612538540596, "grad_norm": 0.04162626340985298, "learning_rate": 0.01, "loss": 1.9582, "step": 112368 }, { "epoch": 11.548920863309352, "grad_norm": 0.05578555539250374, "learning_rate": 0.01, "loss": 1.9374, "step": 112371 }, { "epoch": 11.549229188078108, "grad_norm": 0.0695364773273468, "learning_rate": 0.01, "loss": 1.93, "step": 112374 }, { "epoch": 11.549537512846864, "grad_norm": 0.0405573733150959, "learning_rate": 0.01, "loss": 1.9456, "step": 112377 }, { "epoch": 11.549845837615623, "grad_norm": 0.03472146764397621, "learning_rate": 0.01, "loss": 1.9209, "step": 112380 }, { "epoch": 11.550154162384379, "grad_norm": 0.05320744216442108, "learning_rate": 0.01, "loss": 1.9527, "step": 112383 }, { "epoch": 11.550462487153135, "grad_norm": 0.07547161728143692, "learning_rate": 0.01, "loss": 1.9373, "step": 112386 }, { "epoch": 11.550770811921891, "grad_norm": 0.03960610553622246, "learning_rate": 0.01, "loss": 1.899, "step": 112389 }, { "epoch": 11.551079136690648, "grad_norm": 0.06855163723230362, "learning_rate": 0.01, "loss": 1.9559, "step": 112392 }, { "epoch": 11.551387461459404, "grad_norm": 0.0669487938284874, "learning_rate": 0.01, "loss": 1.9427, "step": 112395 }, { "epoch": 11.55169578622816, "grad_norm": 0.05353791266679764, "learning_rate": 0.01, "loss": 1.9443, "step": 112398 }, { "epoch": 11.552004110996917, "grad_norm": 0.054838988929986954, "learning_rate": 0.01, "loss": 1.9408, "step": 112401 }, { "epoch": 11.552312435765673, "grad_norm": 0.053157199174165726, "learning_rate": 0.01, "loss": 1.9509, "step": 112404 }, { "epoch": 11.55262076053443, "grad_norm": 0.04517093673348427, "learning_rate": 0.01, "loss": 1.9331, "step": 112407 }, { "epoch": 11.552929085303186, "grad_norm": 0.0851716473698616, "learning_rate": 0.01, "loss": 1.9236, "step": 112410 }, { "epoch": 11.553237410071942, "grad_norm": 0.09431596845388412, "learning_rate": 0.01, "loss": 1.9381, "step": 112413 }, { "epoch": 11.553545734840698, "grad_norm": 0.0770939439535141, "learning_rate": 0.01, "loss": 1.9524, "step": 112416 }, { "epoch": 11.553854059609455, "grad_norm": 0.08656878024339676, "learning_rate": 0.01, "loss": 1.9542, "step": 112419 }, { "epoch": 11.554162384378213, "grad_norm": 0.030203841626644135, "learning_rate": 0.01, "loss": 1.9697, "step": 112422 }, { "epoch": 11.554470709146969, "grad_norm": 0.09328059107065201, "learning_rate": 0.01, "loss": 1.9406, "step": 112425 }, { "epoch": 11.554779033915725, "grad_norm": 0.06437768042087555, "learning_rate": 0.01, "loss": 1.9509, "step": 112428 }, { "epoch": 11.555087358684482, "grad_norm": 0.08344560861587524, "learning_rate": 0.01, "loss": 1.9287, "step": 112431 }, { "epoch": 11.555395683453238, "grad_norm": 0.05726712942123413, "learning_rate": 0.01, "loss": 1.9446, "step": 112434 }, { "epoch": 11.555704008221994, "grad_norm": 0.04200645536184311, "learning_rate": 0.01, "loss": 1.9368, "step": 112437 }, { "epoch": 11.55601233299075, "grad_norm": 0.039023153483867645, "learning_rate": 0.01, "loss": 1.9201, "step": 112440 }, { "epoch": 11.556320657759507, "grad_norm": 0.12256117910146713, "learning_rate": 0.01, "loss": 1.9222, "step": 112443 }, { "epoch": 11.556628982528263, "grad_norm": 0.05277597904205322, "learning_rate": 0.01, "loss": 1.9227, "step": 112446 }, { "epoch": 11.55693730729702, "grad_norm": 0.05291856825351715, "learning_rate": 0.01, "loss": 1.9565, "step": 112449 }, { "epoch": 11.557245632065776, "grad_norm": 0.050023455172777176, "learning_rate": 0.01, "loss": 1.9439, "step": 112452 }, { "epoch": 11.557553956834532, "grad_norm": 0.046553801745176315, "learning_rate": 0.01, "loss": 1.9454, "step": 112455 }, { "epoch": 11.557862281603288, "grad_norm": 0.04273172467947006, "learning_rate": 0.01, "loss": 1.9411, "step": 112458 }, { "epoch": 11.558170606372045, "grad_norm": 0.05655885860323906, "learning_rate": 0.01, "loss": 1.9077, "step": 112461 }, { "epoch": 11.558478931140801, "grad_norm": 0.03471823036670685, "learning_rate": 0.01, "loss": 1.9396, "step": 112464 }, { "epoch": 11.558787255909557, "grad_norm": 0.03444075956940651, "learning_rate": 0.01, "loss": 1.9102, "step": 112467 }, { "epoch": 11.559095580678314, "grad_norm": 0.05058380216360092, "learning_rate": 0.01, "loss": 1.9219, "step": 112470 }, { "epoch": 11.559403905447072, "grad_norm": 0.08273141086101532, "learning_rate": 0.01, "loss": 1.9585, "step": 112473 }, { "epoch": 11.559712230215828, "grad_norm": 0.07805310189723969, "learning_rate": 0.01, "loss": 1.9692, "step": 112476 }, { "epoch": 11.560020554984584, "grad_norm": 0.055413614958524704, "learning_rate": 0.01, "loss": 1.9405, "step": 112479 }, { "epoch": 11.56032887975334, "grad_norm": 0.04423193633556366, "learning_rate": 0.01, "loss": 1.9259, "step": 112482 }, { "epoch": 11.560637204522097, "grad_norm": 0.13389156758785248, "learning_rate": 0.01, "loss": 1.9277, "step": 112485 }, { "epoch": 11.560945529290853, "grad_norm": 0.05885486677289009, "learning_rate": 0.01, "loss": 1.9372, "step": 112488 }, { "epoch": 11.56125385405961, "grad_norm": 0.07096994668245316, "learning_rate": 0.01, "loss": 1.9435, "step": 112491 }, { "epoch": 11.561562178828366, "grad_norm": 0.04965658858418465, "learning_rate": 0.01, "loss": 1.9335, "step": 112494 }, { "epoch": 11.561870503597122, "grad_norm": 0.07426677644252777, "learning_rate": 0.01, "loss": 1.9389, "step": 112497 }, { "epoch": 11.562178828365878, "grad_norm": 0.060940932482481, "learning_rate": 0.01, "loss": 1.9581, "step": 112500 }, { "epoch": 11.562487153134635, "grad_norm": 0.03642065078020096, "learning_rate": 0.01, "loss": 1.937, "step": 112503 }, { "epoch": 11.562795477903391, "grad_norm": 0.07831542938947678, "learning_rate": 0.01, "loss": 1.918, "step": 112506 }, { "epoch": 11.563103802672147, "grad_norm": 0.05541325733065605, "learning_rate": 0.01, "loss": 1.943, "step": 112509 }, { "epoch": 11.563412127440904, "grad_norm": 0.10018374770879745, "learning_rate": 0.01, "loss": 1.9358, "step": 112512 }, { "epoch": 11.563720452209662, "grad_norm": 0.06554833799600601, "learning_rate": 0.01, "loss": 1.9655, "step": 112515 }, { "epoch": 11.564028776978418, "grad_norm": 0.04541819915175438, "learning_rate": 0.01, "loss": 1.9118, "step": 112518 }, { "epoch": 11.564337101747174, "grad_norm": 0.08256267756223679, "learning_rate": 0.01, "loss": 1.9445, "step": 112521 }, { "epoch": 11.56464542651593, "grad_norm": 0.08194834738969803, "learning_rate": 0.01, "loss": 1.9375, "step": 112524 }, { "epoch": 11.564953751284687, "grad_norm": 0.0895160362124443, "learning_rate": 0.01, "loss": 1.912, "step": 112527 }, { "epoch": 11.565262076053443, "grad_norm": 0.10424204915761948, "learning_rate": 0.01, "loss": 1.9268, "step": 112530 }, { "epoch": 11.5655704008222, "grad_norm": 0.07442378997802734, "learning_rate": 0.01, "loss": 1.916, "step": 112533 }, { "epoch": 11.565878725590956, "grad_norm": 0.07817506790161133, "learning_rate": 0.01, "loss": 1.9402, "step": 112536 }, { "epoch": 11.566187050359712, "grad_norm": 0.10427911579608917, "learning_rate": 0.01, "loss": 1.9441, "step": 112539 }, { "epoch": 11.566495375128468, "grad_norm": 0.054581690579652786, "learning_rate": 0.01, "loss": 1.9496, "step": 112542 }, { "epoch": 11.566803699897225, "grad_norm": 0.04285728186368942, "learning_rate": 0.01, "loss": 1.9363, "step": 112545 }, { "epoch": 11.567112024665981, "grad_norm": 0.04124976322054863, "learning_rate": 0.01, "loss": 1.9214, "step": 112548 }, { "epoch": 11.567420349434737, "grad_norm": 0.04103301838040352, "learning_rate": 0.01, "loss": 1.9313, "step": 112551 }, { "epoch": 11.567728674203494, "grad_norm": 0.1087937131524086, "learning_rate": 0.01, "loss": 1.943, "step": 112554 }, { "epoch": 11.56803699897225, "grad_norm": 0.06919058412313461, "learning_rate": 0.01, "loss": 1.9489, "step": 112557 }, { "epoch": 11.568345323741006, "grad_norm": 0.047698527574539185, "learning_rate": 0.01, "loss": 1.9249, "step": 112560 }, { "epoch": 11.568653648509764, "grad_norm": 0.04349555820226669, "learning_rate": 0.01, "loss": 1.9473, "step": 112563 }, { "epoch": 11.56896197327852, "grad_norm": 0.05030469968914986, "learning_rate": 0.01, "loss": 1.9589, "step": 112566 }, { "epoch": 11.569270298047277, "grad_norm": 0.11689770221710205, "learning_rate": 0.01, "loss": 1.9314, "step": 112569 }, { "epoch": 11.569578622816033, "grad_norm": 0.08256854861974716, "learning_rate": 0.01, "loss": 1.9305, "step": 112572 }, { "epoch": 11.56988694758479, "grad_norm": 0.052154459059238434, "learning_rate": 0.01, "loss": 1.9106, "step": 112575 }, { "epoch": 11.570195272353546, "grad_norm": 0.05697903782129288, "learning_rate": 0.01, "loss": 1.9595, "step": 112578 }, { "epoch": 11.570503597122302, "grad_norm": 0.04220142588019371, "learning_rate": 0.01, "loss": 1.933, "step": 112581 }, { "epoch": 11.570811921891059, "grad_norm": 0.06774673610925674, "learning_rate": 0.01, "loss": 1.9372, "step": 112584 }, { "epoch": 11.571120246659815, "grad_norm": 0.07773800939321518, "learning_rate": 0.01, "loss": 1.9697, "step": 112587 }, { "epoch": 11.571428571428571, "grad_norm": 0.1131962239742279, "learning_rate": 0.01, "loss": 1.9465, "step": 112590 }, { "epoch": 11.571736896197327, "grad_norm": 0.18003375828266144, "learning_rate": 0.01, "loss": 1.9282, "step": 112593 }, { "epoch": 11.572045220966084, "grad_norm": 0.11840492486953735, "learning_rate": 0.01, "loss": 1.9428, "step": 112596 }, { "epoch": 11.57235354573484, "grad_norm": 0.05675830692052841, "learning_rate": 0.01, "loss": 1.9154, "step": 112599 }, { "epoch": 11.572661870503596, "grad_norm": 0.05000123009085655, "learning_rate": 0.01, "loss": 1.9493, "step": 112602 }, { "epoch": 11.572970195272354, "grad_norm": 0.03676938638091087, "learning_rate": 0.01, "loss": 1.9244, "step": 112605 }, { "epoch": 11.57327852004111, "grad_norm": 0.03664156049489975, "learning_rate": 0.01, "loss": 1.9327, "step": 112608 }, { "epoch": 11.573586844809867, "grad_norm": 0.036037057638168335, "learning_rate": 0.01, "loss": 1.9279, "step": 112611 }, { "epoch": 11.573895169578623, "grad_norm": 0.1087389811873436, "learning_rate": 0.01, "loss": 1.9548, "step": 112614 }, { "epoch": 11.57420349434738, "grad_norm": 0.035332124680280685, "learning_rate": 0.01, "loss": 1.9292, "step": 112617 }, { "epoch": 11.574511819116136, "grad_norm": 0.05903634428977966, "learning_rate": 0.01, "loss": 1.9413, "step": 112620 }, { "epoch": 11.574820143884892, "grad_norm": 0.13460128009319305, "learning_rate": 0.01, "loss": 1.908, "step": 112623 }, { "epoch": 11.575128468653649, "grad_norm": 0.1596498340368271, "learning_rate": 0.01, "loss": 1.9389, "step": 112626 }, { "epoch": 11.575436793422405, "grad_norm": 0.07580243796110153, "learning_rate": 0.01, "loss": 1.9537, "step": 112629 }, { "epoch": 11.575745118191161, "grad_norm": 0.03767656907439232, "learning_rate": 0.01, "loss": 1.9414, "step": 112632 }, { "epoch": 11.576053442959918, "grad_norm": 0.03570094704627991, "learning_rate": 0.01, "loss": 1.9119, "step": 112635 }, { "epoch": 11.576361767728674, "grad_norm": 0.031302034854888916, "learning_rate": 0.01, "loss": 1.9381, "step": 112638 }, { "epoch": 11.57667009249743, "grad_norm": 0.049241434782743454, "learning_rate": 0.01, "loss": 1.9406, "step": 112641 }, { "epoch": 11.576978417266186, "grad_norm": 0.15020284056663513, "learning_rate": 0.01, "loss": 1.9032, "step": 112644 }, { "epoch": 11.577286742034943, "grad_norm": 0.11612813919782639, "learning_rate": 0.01, "loss": 1.9676, "step": 112647 }, { "epoch": 11.577595066803699, "grad_norm": 0.08976644277572632, "learning_rate": 0.01, "loss": 1.9385, "step": 112650 }, { "epoch": 11.577903391572455, "grad_norm": 0.0828244537115097, "learning_rate": 0.01, "loss": 1.9573, "step": 112653 }, { "epoch": 11.578211716341213, "grad_norm": 0.0640794038772583, "learning_rate": 0.01, "loss": 1.9193, "step": 112656 }, { "epoch": 11.57852004110997, "grad_norm": 0.04499854892492294, "learning_rate": 0.01, "loss": 1.9254, "step": 112659 }, { "epoch": 11.578828365878726, "grad_norm": 0.03676529601216316, "learning_rate": 0.01, "loss": 1.9273, "step": 112662 }, { "epoch": 11.579136690647482, "grad_norm": 0.04806596413254738, "learning_rate": 0.01, "loss": 1.9388, "step": 112665 }, { "epoch": 11.579445015416239, "grad_norm": 0.11217772960662842, "learning_rate": 0.01, "loss": 1.919, "step": 112668 }, { "epoch": 11.579753340184995, "grad_norm": 0.06657008826732635, "learning_rate": 0.01, "loss": 1.9529, "step": 112671 }, { "epoch": 11.580061664953751, "grad_norm": 0.1074846163392067, "learning_rate": 0.01, "loss": 1.9345, "step": 112674 }, { "epoch": 11.580369989722508, "grad_norm": 0.05520278215408325, "learning_rate": 0.01, "loss": 1.9167, "step": 112677 }, { "epoch": 11.580678314491264, "grad_norm": 0.055017780512571335, "learning_rate": 0.01, "loss": 1.9464, "step": 112680 }, { "epoch": 11.58098663926002, "grad_norm": 0.041063256561756134, "learning_rate": 0.01, "loss": 1.9419, "step": 112683 }, { "epoch": 11.581294964028777, "grad_norm": 0.0543600469827652, "learning_rate": 0.01, "loss": 1.9323, "step": 112686 }, { "epoch": 11.581603288797533, "grad_norm": 0.07964272797107697, "learning_rate": 0.01, "loss": 1.9281, "step": 112689 }, { "epoch": 11.58191161356629, "grad_norm": 0.10169428586959839, "learning_rate": 0.01, "loss": 1.9393, "step": 112692 }, { "epoch": 11.582219938335045, "grad_norm": 0.03501278534531593, "learning_rate": 0.01, "loss": 1.9323, "step": 112695 }, { "epoch": 11.582528263103804, "grad_norm": 0.08262136578559875, "learning_rate": 0.01, "loss": 1.9406, "step": 112698 }, { "epoch": 11.58283658787256, "grad_norm": 0.05497727543115616, "learning_rate": 0.01, "loss": 1.958, "step": 112701 }, { "epoch": 11.583144912641316, "grad_norm": 0.07225098460912704, "learning_rate": 0.01, "loss": 1.9446, "step": 112704 }, { "epoch": 11.583453237410072, "grad_norm": 0.037529148161411285, "learning_rate": 0.01, "loss": 1.9232, "step": 112707 }, { "epoch": 11.583761562178829, "grad_norm": 0.029563959687948227, "learning_rate": 0.01, "loss": 1.9538, "step": 112710 }, { "epoch": 11.584069886947585, "grad_norm": 0.07537955790758133, "learning_rate": 0.01, "loss": 1.9325, "step": 112713 }, { "epoch": 11.584378211716341, "grad_norm": 0.06754820793867111, "learning_rate": 0.01, "loss": 1.9155, "step": 112716 }, { "epoch": 11.584686536485098, "grad_norm": 0.04394010081887245, "learning_rate": 0.01, "loss": 1.9279, "step": 112719 }, { "epoch": 11.584994861253854, "grad_norm": 0.05102475360035896, "learning_rate": 0.01, "loss": 1.935, "step": 112722 }, { "epoch": 11.58530318602261, "grad_norm": 0.053613968193531036, "learning_rate": 0.01, "loss": 1.9112, "step": 112725 }, { "epoch": 11.585611510791367, "grad_norm": 0.11525541543960571, "learning_rate": 0.01, "loss": 1.9409, "step": 112728 }, { "epoch": 11.585919835560123, "grad_norm": 0.08309531211853027, "learning_rate": 0.01, "loss": 1.9605, "step": 112731 }, { "epoch": 11.58622816032888, "grad_norm": 0.07050877809524536, "learning_rate": 0.01, "loss": 1.9439, "step": 112734 }, { "epoch": 11.586536485097636, "grad_norm": 0.050906527787446976, "learning_rate": 0.01, "loss": 1.9529, "step": 112737 }, { "epoch": 11.586844809866392, "grad_norm": 0.05442078039050102, "learning_rate": 0.01, "loss": 1.9072, "step": 112740 }, { "epoch": 11.587153134635148, "grad_norm": 0.06000097841024399, "learning_rate": 0.01, "loss": 1.9358, "step": 112743 }, { "epoch": 11.587461459403906, "grad_norm": 0.0764593631029129, "learning_rate": 0.01, "loss": 1.9269, "step": 112746 }, { "epoch": 11.587769784172663, "grad_norm": 0.08361302316188812, "learning_rate": 0.01, "loss": 1.9795, "step": 112749 }, { "epoch": 11.588078108941419, "grad_norm": 0.07885933667421341, "learning_rate": 0.01, "loss": 1.9175, "step": 112752 }, { "epoch": 11.588386433710175, "grad_norm": 0.04270821809768677, "learning_rate": 0.01, "loss": 1.9465, "step": 112755 }, { "epoch": 11.588694758478931, "grad_norm": 0.117026187479496, "learning_rate": 0.01, "loss": 1.8946, "step": 112758 }, { "epoch": 11.589003083247688, "grad_norm": 0.04440357908606529, "learning_rate": 0.01, "loss": 1.9313, "step": 112761 }, { "epoch": 11.589311408016444, "grad_norm": 0.05820045620203018, "learning_rate": 0.01, "loss": 1.906, "step": 112764 }, { "epoch": 11.5896197327852, "grad_norm": 0.054034579545259476, "learning_rate": 0.01, "loss": 1.9265, "step": 112767 }, { "epoch": 11.589928057553957, "grad_norm": 0.09348484873771667, "learning_rate": 0.01, "loss": 1.9217, "step": 112770 }, { "epoch": 11.590236382322713, "grad_norm": 0.053756095468997955, "learning_rate": 0.01, "loss": 1.9507, "step": 112773 }, { "epoch": 11.59054470709147, "grad_norm": 0.057105984538793564, "learning_rate": 0.01, "loss": 1.9291, "step": 112776 }, { "epoch": 11.590853031860226, "grad_norm": 0.0945085659623146, "learning_rate": 0.01, "loss": 1.9678, "step": 112779 }, { "epoch": 11.591161356628982, "grad_norm": 0.07942793518304825, "learning_rate": 0.01, "loss": 1.9183, "step": 112782 }, { "epoch": 11.591469681397738, "grad_norm": 0.05878401920199394, "learning_rate": 0.01, "loss": 1.9327, "step": 112785 }, { "epoch": 11.591778006166495, "grad_norm": 0.09565435349941254, "learning_rate": 0.01, "loss": 1.9581, "step": 112788 }, { "epoch": 11.592086330935253, "grad_norm": 0.06520779430866241, "learning_rate": 0.01, "loss": 1.9349, "step": 112791 }, { "epoch": 11.592394655704009, "grad_norm": 0.07311799377202988, "learning_rate": 0.01, "loss": 1.9164, "step": 112794 }, { "epoch": 11.592702980472765, "grad_norm": 0.10966107249259949, "learning_rate": 0.01, "loss": 1.942, "step": 112797 }, { "epoch": 11.593011305241522, "grad_norm": 0.03675894811749458, "learning_rate": 0.01, "loss": 1.9238, "step": 112800 }, { "epoch": 11.593319630010278, "grad_norm": 0.04241945967078209, "learning_rate": 0.01, "loss": 1.9325, "step": 112803 }, { "epoch": 11.593627954779034, "grad_norm": 0.12033431977033615, "learning_rate": 0.01, "loss": 1.9425, "step": 112806 }, { "epoch": 11.59393627954779, "grad_norm": 0.11073540151119232, "learning_rate": 0.01, "loss": 1.9387, "step": 112809 }, { "epoch": 11.594244604316547, "grad_norm": 0.06449783593416214, "learning_rate": 0.01, "loss": 1.9421, "step": 112812 }, { "epoch": 11.594552929085303, "grad_norm": 0.06169688701629639, "learning_rate": 0.01, "loss": 1.9229, "step": 112815 }, { "epoch": 11.59486125385406, "grad_norm": 0.03492598980665207, "learning_rate": 0.01, "loss": 1.9408, "step": 112818 }, { "epoch": 11.595169578622816, "grad_norm": 0.10270565748214722, "learning_rate": 0.01, "loss": 1.9485, "step": 112821 }, { "epoch": 11.595477903391572, "grad_norm": 0.09160374104976654, "learning_rate": 0.01, "loss": 1.928, "step": 112824 }, { "epoch": 11.595786228160328, "grad_norm": 0.0630834549665451, "learning_rate": 0.01, "loss": 1.9382, "step": 112827 }, { "epoch": 11.596094552929085, "grad_norm": 0.09790679067373276, "learning_rate": 0.01, "loss": 1.9383, "step": 112830 }, { "epoch": 11.596402877697841, "grad_norm": 0.07473014295101166, "learning_rate": 0.01, "loss": 1.9604, "step": 112833 }, { "epoch": 11.596711202466597, "grad_norm": 0.043229445815086365, "learning_rate": 0.01, "loss": 1.9271, "step": 112836 }, { "epoch": 11.597019527235355, "grad_norm": 0.0716719701886177, "learning_rate": 0.01, "loss": 1.93, "step": 112839 }, { "epoch": 11.597327852004112, "grad_norm": 0.038993120193481445, "learning_rate": 0.01, "loss": 1.9245, "step": 112842 }, { "epoch": 11.597636176772868, "grad_norm": 0.0744524821639061, "learning_rate": 0.01, "loss": 1.9567, "step": 112845 }, { "epoch": 11.597944501541624, "grad_norm": 0.08004055917263031, "learning_rate": 0.01, "loss": 1.9365, "step": 112848 }, { "epoch": 11.59825282631038, "grad_norm": 0.11334647238254547, "learning_rate": 0.01, "loss": 1.9588, "step": 112851 }, { "epoch": 11.598561151079137, "grad_norm": 0.10437542200088501, "learning_rate": 0.01, "loss": 1.95, "step": 112854 }, { "epoch": 11.598869475847893, "grad_norm": 0.08015387505292892, "learning_rate": 0.01, "loss": 1.9033, "step": 112857 }, { "epoch": 11.59917780061665, "grad_norm": 0.05252983793616295, "learning_rate": 0.01, "loss": 1.9505, "step": 112860 }, { "epoch": 11.599486125385406, "grad_norm": 0.03896424174308777, "learning_rate": 0.01, "loss": 1.9725, "step": 112863 }, { "epoch": 11.599794450154162, "grad_norm": 0.061736226081848145, "learning_rate": 0.01, "loss": 1.9468, "step": 112866 }, { "epoch": 11.600102774922918, "grad_norm": 0.07320262491703033, "learning_rate": 0.01, "loss": 1.9585, "step": 112869 }, { "epoch": 11.600411099691675, "grad_norm": 0.07607041299343109, "learning_rate": 0.01, "loss": 1.9713, "step": 112872 }, { "epoch": 11.600719424460431, "grad_norm": 0.06408149749040604, "learning_rate": 0.01, "loss": 1.9617, "step": 112875 }, { "epoch": 11.601027749229187, "grad_norm": 0.10191265493631363, "learning_rate": 0.01, "loss": 1.9393, "step": 112878 }, { "epoch": 11.601336073997945, "grad_norm": 0.13209275901317596, "learning_rate": 0.01, "loss": 1.9463, "step": 112881 }, { "epoch": 11.601644398766702, "grad_norm": 0.07748264819383621, "learning_rate": 0.01, "loss": 1.9316, "step": 112884 }, { "epoch": 11.601952723535458, "grad_norm": 0.0734485611319542, "learning_rate": 0.01, "loss": 1.9238, "step": 112887 }, { "epoch": 11.602261048304214, "grad_norm": 0.05413933843374252, "learning_rate": 0.01, "loss": 1.9224, "step": 112890 }, { "epoch": 11.60256937307297, "grad_norm": 0.05536697059869766, "learning_rate": 0.01, "loss": 1.9848, "step": 112893 }, { "epoch": 11.602877697841727, "grad_norm": 0.045468397438526154, "learning_rate": 0.01, "loss": 1.9401, "step": 112896 }, { "epoch": 11.603186022610483, "grad_norm": 0.10831259936094284, "learning_rate": 0.01, "loss": 1.95, "step": 112899 }, { "epoch": 11.60349434737924, "grad_norm": 0.05811365321278572, "learning_rate": 0.01, "loss": 1.9511, "step": 112902 }, { "epoch": 11.603802672147996, "grad_norm": 0.04905383288860321, "learning_rate": 0.01, "loss": 1.9417, "step": 112905 }, { "epoch": 11.604110996916752, "grad_norm": 0.037075262516736984, "learning_rate": 0.01, "loss": 1.9262, "step": 112908 }, { "epoch": 11.604419321685508, "grad_norm": 0.0417492613196373, "learning_rate": 0.01, "loss": 1.9399, "step": 112911 }, { "epoch": 11.604727646454265, "grad_norm": 0.06051025539636612, "learning_rate": 0.01, "loss": 1.9281, "step": 112914 }, { "epoch": 11.605035971223021, "grad_norm": 0.11885872483253479, "learning_rate": 0.01, "loss": 1.96, "step": 112917 }, { "epoch": 11.605344295991777, "grad_norm": 0.12744250893592834, "learning_rate": 0.01, "loss": 1.9361, "step": 112920 }, { "epoch": 11.605652620760534, "grad_norm": 0.05316381901502609, "learning_rate": 0.01, "loss": 1.9671, "step": 112923 }, { "epoch": 11.60596094552929, "grad_norm": 0.039563246071338654, "learning_rate": 0.01, "loss": 1.9377, "step": 112926 }, { "epoch": 11.606269270298046, "grad_norm": 0.0681186243891716, "learning_rate": 0.01, "loss": 1.9642, "step": 112929 }, { "epoch": 11.606577595066804, "grad_norm": 0.07162708789110184, "learning_rate": 0.01, "loss": 1.9596, "step": 112932 }, { "epoch": 11.60688591983556, "grad_norm": 0.06566008925437927, "learning_rate": 0.01, "loss": 1.9409, "step": 112935 }, { "epoch": 11.607194244604317, "grad_norm": 0.054131586104631424, "learning_rate": 0.01, "loss": 1.9241, "step": 112938 }, { "epoch": 11.607502569373073, "grad_norm": 0.037236664444208145, "learning_rate": 0.01, "loss": 1.9502, "step": 112941 }, { "epoch": 11.60781089414183, "grad_norm": 0.11102370172739029, "learning_rate": 0.01, "loss": 1.941, "step": 112944 }, { "epoch": 11.608119218910586, "grad_norm": 0.06191109120845795, "learning_rate": 0.01, "loss": 1.9378, "step": 112947 }, { "epoch": 11.608427543679342, "grad_norm": 0.050658125430345535, "learning_rate": 0.01, "loss": 1.9187, "step": 112950 }, { "epoch": 11.608735868448099, "grad_norm": 0.03898501396179199, "learning_rate": 0.01, "loss": 1.9497, "step": 112953 }, { "epoch": 11.609044193216855, "grad_norm": 0.038435082882642746, "learning_rate": 0.01, "loss": 1.9484, "step": 112956 }, { "epoch": 11.609352517985611, "grad_norm": 0.08997193723917007, "learning_rate": 0.01, "loss": 1.9483, "step": 112959 }, { "epoch": 11.609660842754367, "grad_norm": 0.10880478471517563, "learning_rate": 0.01, "loss": 1.927, "step": 112962 }, { "epoch": 11.609969167523124, "grad_norm": 0.06104841083288193, "learning_rate": 0.01, "loss": 1.9538, "step": 112965 }, { "epoch": 11.61027749229188, "grad_norm": 0.07381860911846161, "learning_rate": 0.01, "loss": 1.9321, "step": 112968 }, { "epoch": 11.610585817060636, "grad_norm": 0.11246202141046524, "learning_rate": 0.01, "loss": 1.9423, "step": 112971 }, { "epoch": 11.610894141829394, "grad_norm": 0.08369491249322891, "learning_rate": 0.01, "loss": 1.9539, "step": 112974 }, { "epoch": 11.61120246659815, "grad_norm": 0.034752074629068375, "learning_rate": 0.01, "loss": 1.9235, "step": 112977 }, { "epoch": 11.611510791366907, "grad_norm": 0.03296573460102081, "learning_rate": 0.01, "loss": 1.9435, "step": 112980 }, { "epoch": 11.611819116135663, "grad_norm": 0.07587042450904846, "learning_rate": 0.01, "loss": 1.9201, "step": 112983 }, { "epoch": 11.61212744090442, "grad_norm": 0.07403363287448883, "learning_rate": 0.01, "loss": 1.9494, "step": 112986 }, { "epoch": 11.612435765673176, "grad_norm": 0.0733148530125618, "learning_rate": 0.01, "loss": 1.9357, "step": 112989 }, { "epoch": 11.612744090441932, "grad_norm": 0.09333302825689316, "learning_rate": 0.01, "loss": 1.9551, "step": 112992 }, { "epoch": 11.613052415210689, "grad_norm": 0.06466648727655411, "learning_rate": 0.01, "loss": 1.9535, "step": 112995 }, { "epoch": 11.613360739979445, "grad_norm": 0.03636278957128525, "learning_rate": 0.01, "loss": 1.961, "step": 112998 }, { "epoch": 11.613669064748201, "grad_norm": 0.12453662604093552, "learning_rate": 0.01, "loss": 1.9138, "step": 113001 }, { "epoch": 11.613977389516958, "grad_norm": 0.07740887999534607, "learning_rate": 0.01, "loss": 1.9189, "step": 113004 }, { "epoch": 11.614285714285714, "grad_norm": 0.057487133890390396, "learning_rate": 0.01, "loss": 1.9425, "step": 113007 }, { "epoch": 11.61459403905447, "grad_norm": 0.10658985376358032, "learning_rate": 0.01, "loss": 1.926, "step": 113010 }, { "epoch": 11.614902363823226, "grad_norm": 0.10348264873027802, "learning_rate": 0.01, "loss": 1.9111, "step": 113013 }, { "epoch": 11.615210688591983, "grad_norm": 0.08679939061403275, "learning_rate": 0.01, "loss": 1.9704, "step": 113016 }, { "epoch": 11.615519013360739, "grad_norm": 0.04989555478096008, "learning_rate": 0.01, "loss": 1.9388, "step": 113019 }, { "epoch": 11.615827338129497, "grad_norm": 0.05296916887164116, "learning_rate": 0.01, "loss": 1.941, "step": 113022 }, { "epoch": 11.616135662898253, "grad_norm": 0.06782415509223938, "learning_rate": 0.01, "loss": 1.9315, "step": 113025 }, { "epoch": 11.61644398766701, "grad_norm": 0.05802762880921364, "learning_rate": 0.01, "loss": 1.913, "step": 113028 }, { "epoch": 11.616752312435766, "grad_norm": 0.048324454575777054, "learning_rate": 0.01, "loss": 1.9502, "step": 113031 }, { "epoch": 11.617060637204522, "grad_norm": 0.04696560651063919, "learning_rate": 0.01, "loss": 1.9646, "step": 113034 }, { "epoch": 11.617368961973279, "grad_norm": 0.03296945244073868, "learning_rate": 0.01, "loss": 1.9259, "step": 113037 }, { "epoch": 11.617677286742035, "grad_norm": 0.07478760182857513, "learning_rate": 0.01, "loss": 1.9452, "step": 113040 }, { "epoch": 11.617985611510791, "grad_norm": 0.11230601370334625, "learning_rate": 0.01, "loss": 1.9316, "step": 113043 }, { "epoch": 11.618293936279548, "grad_norm": 0.0838407501578331, "learning_rate": 0.01, "loss": 1.9279, "step": 113046 }, { "epoch": 11.618602261048304, "grad_norm": 0.09206885099411011, "learning_rate": 0.01, "loss": 1.9388, "step": 113049 }, { "epoch": 11.61891058581706, "grad_norm": 0.04248971492052078, "learning_rate": 0.01, "loss": 1.9478, "step": 113052 }, { "epoch": 11.619218910585817, "grad_norm": 0.038236189633607864, "learning_rate": 0.01, "loss": 1.9474, "step": 113055 }, { "epoch": 11.619527235354573, "grad_norm": 0.031842999160289764, "learning_rate": 0.01, "loss": 1.948, "step": 113058 }, { "epoch": 11.61983556012333, "grad_norm": 0.030857548117637634, "learning_rate": 0.01, "loss": 1.9216, "step": 113061 }, { "epoch": 11.620143884892087, "grad_norm": 0.13097026944160461, "learning_rate": 0.01, "loss": 1.9193, "step": 113064 }, { "epoch": 11.620452209660844, "grad_norm": 0.05664094164967537, "learning_rate": 0.01, "loss": 1.9517, "step": 113067 }, { "epoch": 11.6207605344296, "grad_norm": 0.09321857243776321, "learning_rate": 0.01, "loss": 1.9329, "step": 113070 }, { "epoch": 11.621068859198356, "grad_norm": 0.10250440984964371, "learning_rate": 0.01, "loss": 1.9522, "step": 113073 }, { "epoch": 11.621377183967113, "grad_norm": 0.05741477757692337, "learning_rate": 0.01, "loss": 1.9318, "step": 113076 }, { "epoch": 11.621685508735869, "grad_norm": 0.036703143268823624, "learning_rate": 0.01, "loss": 1.9483, "step": 113079 }, { "epoch": 11.621993833504625, "grad_norm": 0.038881104439496994, "learning_rate": 0.01, "loss": 1.9708, "step": 113082 }, { "epoch": 11.622302158273381, "grad_norm": 0.04341772571206093, "learning_rate": 0.01, "loss": 1.926, "step": 113085 }, { "epoch": 11.622610483042138, "grad_norm": 0.04066208004951477, "learning_rate": 0.01, "loss": 1.9329, "step": 113088 }, { "epoch": 11.622918807810894, "grad_norm": 0.038506485521793365, "learning_rate": 0.01, "loss": 1.9224, "step": 113091 }, { "epoch": 11.62322713257965, "grad_norm": 0.11907859146595001, "learning_rate": 0.01, "loss": 1.9355, "step": 113094 }, { "epoch": 11.623535457348407, "grad_norm": 0.050517939031124115, "learning_rate": 0.01, "loss": 1.9446, "step": 113097 }, { "epoch": 11.623843782117163, "grad_norm": 0.07157903164625168, "learning_rate": 0.01, "loss": 1.9559, "step": 113100 }, { "epoch": 11.62415210688592, "grad_norm": 0.03911276534199715, "learning_rate": 0.01, "loss": 1.9247, "step": 113103 }, { "epoch": 11.624460431654676, "grad_norm": 0.06826124340295792, "learning_rate": 0.01, "loss": 1.9251, "step": 113106 }, { "epoch": 11.624768756423432, "grad_norm": 0.08034567534923553, "learning_rate": 0.01, "loss": 1.937, "step": 113109 }, { "epoch": 11.625077081192188, "grad_norm": 0.04130208492279053, "learning_rate": 0.01, "loss": 1.941, "step": 113112 }, { "epoch": 11.625385405960946, "grad_norm": 0.0840260237455368, "learning_rate": 0.01, "loss": 1.9445, "step": 113115 }, { "epoch": 11.625693730729703, "grad_norm": 0.03418037295341492, "learning_rate": 0.01, "loss": 1.9475, "step": 113118 }, { "epoch": 11.626002055498459, "grad_norm": 0.14465217292308807, "learning_rate": 0.01, "loss": 1.9298, "step": 113121 }, { "epoch": 11.626310380267215, "grad_norm": 0.11267710477113724, "learning_rate": 0.01, "loss": 1.9105, "step": 113124 }, { "epoch": 11.626618705035972, "grad_norm": 0.07997629046440125, "learning_rate": 0.01, "loss": 1.9459, "step": 113127 }, { "epoch": 11.626927029804728, "grad_norm": 0.05365961045026779, "learning_rate": 0.01, "loss": 1.9277, "step": 113130 }, { "epoch": 11.627235354573484, "grad_norm": 0.0491030178964138, "learning_rate": 0.01, "loss": 1.9464, "step": 113133 }, { "epoch": 11.62754367934224, "grad_norm": 0.04414552450180054, "learning_rate": 0.01, "loss": 1.9452, "step": 113136 }, { "epoch": 11.627852004110997, "grad_norm": 0.03378671780228615, "learning_rate": 0.01, "loss": 1.9313, "step": 113139 }, { "epoch": 11.628160328879753, "grad_norm": 0.05612415820360184, "learning_rate": 0.01, "loss": 1.9377, "step": 113142 }, { "epoch": 11.62846865364851, "grad_norm": 0.06220652535557747, "learning_rate": 0.01, "loss": 1.9535, "step": 113145 }, { "epoch": 11.628776978417266, "grad_norm": 0.05417797341942787, "learning_rate": 0.01, "loss": 1.9194, "step": 113148 }, { "epoch": 11.629085303186022, "grad_norm": 0.07151927798986435, "learning_rate": 0.01, "loss": 1.9419, "step": 113151 }, { "epoch": 11.629393627954778, "grad_norm": 0.06492269039154053, "learning_rate": 0.01, "loss": 1.9504, "step": 113154 }, { "epoch": 11.629701952723536, "grad_norm": 0.1323765516281128, "learning_rate": 0.01, "loss": 1.9115, "step": 113157 }, { "epoch": 11.630010277492293, "grad_norm": 0.06621003150939941, "learning_rate": 0.01, "loss": 1.948, "step": 113160 }, { "epoch": 11.630318602261049, "grad_norm": 0.0659116730093956, "learning_rate": 0.01, "loss": 1.9166, "step": 113163 }, { "epoch": 11.630626927029805, "grad_norm": 0.09065114706754684, "learning_rate": 0.01, "loss": 1.9335, "step": 113166 }, { "epoch": 11.630935251798562, "grad_norm": 0.0618029460310936, "learning_rate": 0.01, "loss": 1.9372, "step": 113169 }, { "epoch": 11.631243576567318, "grad_norm": 0.078899085521698, "learning_rate": 0.01, "loss": 1.9318, "step": 113172 }, { "epoch": 11.631551901336074, "grad_norm": 0.07569435238838196, "learning_rate": 0.01, "loss": 1.925, "step": 113175 }, { "epoch": 11.63186022610483, "grad_norm": 0.06244001165032387, "learning_rate": 0.01, "loss": 1.9409, "step": 113178 }, { "epoch": 11.632168550873587, "grad_norm": 0.12835216522216797, "learning_rate": 0.01, "loss": 1.9239, "step": 113181 }, { "epoch": 11.632476875642343, "grad_norm": 0.04971390590071678, "learning_rate": 0.01, "loss": 1.9542, "step": 113184 }, { "epoch": 11.6327852004111, "grad_norm": 0.08454111963510513, "learning_rate": 0.01, "loss": 1.9571, "step": 113187 }, { "epoch": 11.633093525179856, "grad_norm": 0.03988140821456909, "learning_rate": 0.01, "loss": 1.9396, "step": 113190 }, { "epoch": 11.633401849948612, "grad_norm": 0.048893485218286514, "learning_rate": 0.01, "loss": 1.9491, "step": 113193 }, { "epoch": 11.633710174717368, "grad_norm": 0.09177813678979874, "learning_rate": 0.01, "loss": 1.9388, "step": 113196 }, { "epoch": 11.634018499486125, "grad_norm": 0.04464666545391083, "learning_rate": 0.01, "loss": 1.9472, "step": 113199 }, { "epoch": 11.634326824254881, "grad_norm": 0.14179858565330505, "learning_rate": 0.01, "loss": 1.9507, "step": 113202 }, { "epoch": 11.634635149023639, "grad_norm": 0.10474514216184616, "learning_rate": 0.01, "loss": 1.9334, "step": 113205 }, { "epoch": 11.634943473792395, "grad_norm": 0.05921778455376625, "learning_rate": 0.01, "loss": 1.9485, "step": 113208 }, { "epoch": 11.635251798561152, "grad_norm": 0.045888129621744156, "learning_rate": 0.01, "loss": 1.9513, "step": 113211 }, { "epoch": 11.635560123329908, "grad_norm": 0.06556089222431183, "learning_rate": 0.01, "loss": 1.9471, "step": 113214 }, { "epoch": 11.635868448098664, "grad_norm": 0.044321537017822266, "learning_rate": 0.01, "loss": 1.9664, "step": 113217 }, { "epoch": 11.63617677286742, "grad_norm": 0.03785913437604904, "learning_rate": 0.01, "loss": 1.9752, "step": 113220 }, { "epoch": 11.636485097636177, "grad_norm": 0.11449971050024033, "learning_rate": 0.01, "loss": 1.9416, "step": 113223 }, { "epoch": 11.636793422404933, "grad_norm": 0.0937795415520668, "learning_rate": 0.01, "loss": 1.9575, "step": 113226 }, { "epoch": 11.63710174717369, "grad_norm": 0.11280496418476105, "learning_rate": 0.01, "loss": 1.9219, "step": 113229 }, { "epoch": 11.637410071942446, "grad_norm": 0.09897052496671677, "learning_rate": 0.01, "loss": 1.9472, "step": 113232 }, { "epoch": 11.637718396711202, "grad_norm": 0.08094508945941925, "learning_rate": 0.01, "loss": 1.9334, "step": 113235 }, { "epoch": 11.638026721479958, "grad_norm": 0.0689573660492897, "learning_rate": 0.01, "loss": 1.9411, "step": 113238 }, { "epoch": 11.638335046248715, "grad_norm": 0.05370873212814331, "learning_rate": 0.01, "loss": 1.9597, "step": 113241 }, { "epoch": 11.638643371017471, "grad_norm": 0.06898577511310577, "learning_rate": 0.01, "loss": 1.9141, "step": 113244 }, { "epoch": 11.638951695786227, "grad_norm": 0.04871980473399162, "learning_rate": 0.01, "loss": 1.9469, "step": 113247 }, { "epoch": 11.639260020554985, "grad_norm": 0.05161462724208832, "learning_rate": 0.01, "loss": 1.9323, "step": 113250 }, { "epoch": 11.639568345323742, "grad_norm": 0.055129531770944595, "learning_rate": 0.01, "loss": 1.918, "step": 113253 }, { "epoch": 11.639876670092498, "grad_norm": 0.14611971378326416, "learning_rate": 0.01, "loss": 1.9356, "step": 113256 }, { "epoch": 11.640184994861254, "grad_norm": 0.1649077832698822, "learning_rate": 0.01, "loss": 1.9516, "step": 113259 }, { "epoch": 11.64049331963001, "grad_norm": 0.04667672887444496, "learning_rate": 0.01, "loss": 1.9402, "step": 113262 }, { "epoch": 11.640801644398767, "grad_norm": 0.03393012285232544, "learning_rate": 0.01, "loss": 1.9425, "step": 113265 }, { "epoch": 11.641109969167523, "grad_norm": 0.04141303524374962, "learning_rate": 0.01, "loss": 1.9454, "step": 113268 }, { "epoch": 11.64141829393628, "grad_norm": 0.0544479675590992, "learning_rate": 0.01, "loss": 1.9409, "step": 113271 }, { "epoch": 11.641726618705036, "grad_norm": 0.0461701825261116, "learning_rate": 0.01, "loss": 1.9048, "step": 113274 }, { "epoch": 11.642034943473792, "grad_norm": 0.10449957847595215, "learning_rate": 0.01, "loss": 1.9292, "step": 113277 }, { "epoch": 11.642343268242549, "grad_norm": 0.07522600889205933, "learning_rate": 0.01, "loss": 1.9383, "step": 113280 }, { "epoch": 11.642651593011305, "grad_norm": 0.06570401787757874, "learning_rate": 0.01, "loss": 1.9238, "step": 113283 }, { "epoch": 11.642959917780061, "grad_norm": 0.08367069810628891, "learning_rate": 0.01, "loss": 1.8957, "step": 113286 }, { "epoch": 11.643268242548817, "grad_norm": 0.08211222290992737, "learning_rate": 0.01, "loss": 1.9474, "step": 113289 }, { "epoch": 11.643576567317574, "grad_norm": 0.05755218118429184, "learning_rate": 0.01, "loss": 1.9397, "step": 113292 }, { "epoch": 11.64388489208633, "grad_norm": 0.057840488851070404, "learning_rate": 0.01, "loss": 1.9462, "step": 113295 }, { "epoch": 11.644193216855088, "grad_norm": 0.05402502417564392, "learning_rate": 0.01, "loss": 1.9401, "step": 113298 }, { "epoch": 11.644501541623844, "grad_norm": 0.09462658315896988, "learning_rate": 0.01, "loss": 1.9364, "step": 113301 }, { "epoch": 11.6448098663926, "grad_norm": 0.05124598741531372, "learning_rate": 0.01, "loss": 1.9303, "step": 113304 }, { "epoch": 11.645118191161357, "grad_norm": 0.04224373400211334, "learning_rate": 0.01, "loss": 1.9465, "step": 113307 }, { "epoch": 11.645426515930113, "grad_norm": 0.0728943794965744, "learning_rate": 0.01, "loss": 1.9697, "step": 113310 }, { "epoch": 11.64573484069887, "grad_norm": 0.11200623214244843, "learning_rate": 0.01, "loss": 1.9394, "step": 113313 }, { "epoch": 11.646043165467626, "grad_norm": 0.08526485413312912, "learning_rate": 0.01, "loss": 1.9267, "step": 113316 }, { "epoch": 11.646351490236382, "grad_norm": 0.10539497435092926, "learning_rate": 0.01, "loss": 1.933, "step": 113319 }, { "epoch": 11.646659815005139, "grad_norm": 0.07505021244287491, "learning_rate": 0.01, "loss": 1.9452, "step": 113322 }, { "epoch": 11.646968139773895, "grad_norm": 0.06336735934019089, "learning_rate": 0.01, "loss": 1.9395, "step": 113325 }, { "epoch": 11.647276464542651, "grad_norm": 0.07473690062761307, "learning_rate": 0.01, "loss": 1.926, "step": 113328 }, { "epoch": 11.647584789311408, "grad_norm": 0.09957322478294373, "learning_rate": 0.01, "loss": 1.9319, "step": 113331 }, { "epoch": 11.647893114080164, "grad_norm": 0.028184523805975914, "learning_rate": 0.01, "loss": 1.9241, "step": 113334 }, { "epoch": 11.64820143884892, "grad_norm": 0.03983794525265694, "learning_rate": 0.01, "loss": 1.9597, "step": 113337 }, { "epoch": 11.648509763617678, "grad_norm": 0.13572625815868378, "learning_rate": 0.01, "loss": 1.9434, "step": 113340 }, { "epoch": 11.648818088386435, "grad_norm": 0.12764644622802734, "learning_rate": 0.01, "loss": 1.9304, "step": 113343 }, { "epoch": 11.64912641315519, "grad_norm": 0.08703610301017761, "learning_rate": 0.01, "loss": 1.9522, "step": 113346 }, { "epoch": 11.649434737923947, "grad_norm": 0.04160364344716072, "learning_rate": 0.01, "loss": 1.9336, "step": 113349 }, { "epoch": 11.649743062692703, "grad_norm": 0.03464055061340332, "learning_rate": 0.01, "loss": 1.9372, "step": 113352 }, { "epoch": 11.65005138746146, "grad_norm": 0.03290557116270065, "learning_rate": 0.01, "loss": 1.9487, "step": 113355 }, { "epoch": 11.650359712230216, "grad_norm": 0.03284400701522827, "learning_rate": 0.01, "loss": 1.9284, "step": 113358 }, { "epoch": 11.650668036998972, "grad_norm": 0.04168650880455971, "learning_rate": 0.01, "loss": 1.943, "step": 113361 }, { "epoch": 11.650976361767729, "grad_norm": 0.05013224482536316, "learning_rate": 0.01, "loss": 1.9394, "step": 113364 }, { "epoch": 11.651284686536485, "grad_norm": 0.12323949486017227, "learning_rate": 0.01, "loss": 1.9259, "step": 113367 }, { "epoch": 11.651593011305241, "grad_norm": 0.11722767353057861, "learning_rate": 0.01, "loss": 1.9541, "step": 113370 }, { "epoch": 11.651901336073998, "grad_norm": 0.1257123053073883, "learning_rate": 0.01, "loss": 1.9189, "step": 113373 }, { "epoch": 11.652209660842754, "grad_norm": 0.09780552238225937, "learning_rate": 0.01, "loss": 1.9669, "step": 113376 }, { "epoch": 11.65251798561151, "grad_norm": 0.0467044822871685, "learning_rate": 0.01, "loss": 1.9588, "step": 113379 }, { "epoch": 11.652826310380267, "grad_norm": 0.04701404646039009, "learning_rate": 0.01, "loss": 1.9438, "step": 113382 }, { "epoch": 11.653134635149023, "grad_norm": 0.04993510991334915, "learning_rate": 0.01, "loss": 1.9615, "step": 113385 }, { "epoch": 11.65344295991778, "grad_norm": 0.03722843900322914, "learning_rate": 0.01, "loss": 1.9355, "step": 113388 }, { "epoch": 11.653751284686537, "grad_norm": 0.03551566228270531, "learning_rate": 0.01, "loss": 1.9488, "step": 113391 }, { "epoch": 11.654059609455294, "grad_norm": 0.11122262477874756, "learning_rate": 0.01, "loss": 1.9585, "step": 113394 }, { "epoch": 11.65436793422405, "grad_norm": 0.06826326251029968, "learning_rate": 0.01, "loss": 1.9606, "step": 113397 }, { "epoch": 11.654676258992806, "grad_norm": 0.155528724193573, "learning_rate": 0.01, "loss": 1.949, "step": 113400 }, { "epoch": 11.654984583761562, "grad_norm": 0.04919079691171646, "learning_rate": 0.01, "loss": 1.9272, "step": 113403 }, { "epoch": 11.655292908530319, "grad_norm": 0.06651998311281204, "learning_rate": 0.01, "loss": 1.912, "step": 113406 }, { "epoch": 11.655601233299075, "grad_norm": 0.05976448580622673, "learning_rate": 0.01, "loss": 1.9306, "step": 113409 }, { "epoch": 11.655909558067831, "grad_norm": 0.04969658702611923, "learning_rate": 0.01, "loss": 1.9166, "step": 113412 }, { "epoch": 11.656217882836588, "grad_norm": 0.10790357738733292, "learning_rate": 0.01, "loss": 1.9286, "step": 113415 }, { "epoch": 11.656526207605344, "grad_norm": 0.06400740146636963, "learning_rate": 0.01, "loss": 1.9518, "step": 113418 }, { "epoch": 11.6568345323741, "grad_norm": 0.11117127537727356, "learning_rate": 0.01, "loss": 1.9289, "step": 113421 }, { "epoch": 11.657142857142857, "grad_norm": 0.08646197617053986, "learning_rate": 0.01, "loss": 1.9619, "step": 113424 }, { "epoch": 11.657451181911613, "grad_norm": 0.11909831315279007, "learning_rate": 0.01, "loss": 1.9227, "step": 113427 }, { "epoch": 11.65775950668037, "grad_norm": 0.05872794985771179, "learning_rate": 0.01, "loss": 1.9437, "step": 113430 }, { "epoch": 11.658067831449127, "grad_norm": 0.0353827103972435, "learning_rate": 0.01, "loss": 1.9306, "step": 113433 }, { "epoch": 11.658376156217884, "grad_norm": 0.09279036521911621, "learning_rate": 0.01, "loss": 1.9348, "step": 113436 }, { "epoch": 11.65868448098664, "grad_norm": 0.04595791921019554, "learning_rate": 0.01, "loss": 1.912, "step": 113439 }, { "epoch": 11.658992805755396, "grad_norm": 0.08540619164705276, "learning_rate": 0.01, "loss": 1.9528, "step": 113442 }, { "epoch": 11.659301130524153, "grad_norm": 0.0668819323182106, "learning_rate": 0.01, "loss": 1.9389, "step": 113445 }, { "epoch": 11.659609455292909, "grad_norm": 0.09538742154836655, "learning_rate": 0.01, "loss": 1.9234, "step": 113448 }, { "epoch": 11.659917780061665, "grad_norm": 0.06846621632575989, "learning_rate": 0.01, "loss": 1.9362, "step": 113451 }, { "epoch": 11.660226104830421, "grad_norm": 0.05695407837629318, "learning_rate": 0.01, "loss": 1.9128, "step": 113454 }, { "epoch": 11.660534429599178, "grad_norm": 0.056524164974689484, "learning_rate": 0.01, "loss": 1.9562, "step": 113457 }, { "epoch": 11.660842754367934, "grad_norm": 0.034500472247600555, "learning_rate": 0.01, "loss": 1.9527, "step": 113460 }, { "epoch": 11.66115107913669, "grad_norm": 0.046678464859724045, "learning_rate": 0.01, "loss": 1.9645, "step": 113463 }, { "epoch": 11.661459403905447, "grad_norm": 0.06171998009085655, "learning_rate": 0.01, "loss": 1.9539, "step": 113466 }, { "epoch": 11.661767728674203, "grad_norm": 0.13738484680652618, "learning_rate": 0.01, "loss": 1.9331, "step": 113469 }, { "epoch": 11.66207605344296, "grad_norm": 0.07852083444595337, "learning_rate": 0.01, "loss": 1.9542, "step": 113472 }, { "epoch": 11.662384378211716, "grad_norm": 0.04132244363427162, "learning_rate": 0.01, "loss": 1.9312, "step": 113475 }, { "epoch": 11.662692702980472, "grad_norm": 0.04207141324877739, "learning_rate": 0.01, "loss": 1.9401, "step": 113478 }, { "epoch": 11.66300102774923, "grad_norm": 0.03948988765478134, "learning_rate": 0.01, "loss": 1.9436, "step": 113481 }, { "epoch": 11.663309352517986, "grad_norm": 0.04067898541688919, "learning_rate": 0.01, "loss": 1.9234, "step": 113484 }, { "epoch": 11.663617677286743, "grad_norm": 0.03814522549510002, "learning_rate": 0.01, "loss": 1.9487, "step": 113487 }, { "epoch": 11.663926002055499, "grad_norm": 0.0856989324092865, "learning_rate": 0.01, "loss": 1.9228, "step": 113490 }, { "epoch": 11.664234326824255, "grad_norm": 0.06945549696683884, "learning_rate": 0.01, "loss": 1.9287, "step": 113493 }, { "epoch": 11.664542651593012, "grad_norm": 0.04732196778059006, "learning_rate": 0.01, "loss": 1.9148, "step": 113496 }, { "epoch": 11.664850976361768, "grad_norm": 0.0411718524992466, "learning_rate": 0.01, "loss": 1.9436, "step": 113499 }, { "epoch": 11.665159301130524, "grad_norm": 0.05704187601804733, "learning_rate": 0.01, "loss": 1.9184, "step": 113502 }, { "epoch": 11.66546762589928, "grad_norm": 0.10712847858667374, "learning_rate": 0.01, "loss": 1.9591, "step": 113505 }, { "epoch": 11.665775950668037, "grad_norm": 0.09478617459535599, "learning_rate": 0.01, "loss": 1.9294, "step": 113508 }, { "epoch": 11.666084275436793, "grad_norm": 0.0557820163667202, "learning_rate": 0.01, "loss": 1.9416, "step": 113511 }, { "epoch": 11.66639260020555, "grad_norm": 0.04023374244570732, "learning_rate": 0.01, "loss": 1.9369, "step": 113514 }, { "epoch": 11.666700924974306, "grad_norm": 0.09300129860639572, "learning_rate": 0.01, "loss": 1.9317, "step": 113517 }, { "epoch": 11.667009249743062, "grad_norm": 0.07823445647954941, "learning_rate": 0.01, "loss": 1.9467, "step": 113520 }, { "epoch": 11.66731757451182, "grad_norm": 0.17606739699840546, "learning_rate": 0.01, "loss": 1.9489, "step": 113523 }, { "epoch": 11.667625899280576, "grad_norm": 0.07043785601854324, "learning_rate": 0.01, "loss": 1.957, "step": 113526 }, { "epoch": 11.667934224049333, "grad_norm": 0.0512007512152195, "learning_rate": 0.01, "loss": 1.9318, "step": 113529 }, { "epoch": 11.668242548818089, "grad_norm": 0.04862508177757263, "learning_rate": 0.01, "loss": 1.9403, "step": 113532 }, { "epoch": 11.668550873586845, "grad_norm": 0.05967811495065689, "learning_rate": 0.01, "loss": 1.8976, "step": 113535 }, { "epoch": 11.668859198355602, "grad_norm": 0.05780831724405289, "learning_rate": 0.01, "loss": 1.9236, "step": 113538 }, { "epoch": 11.669167523124358, "grad_norm": 0.06208907067775726, "learning_rate": 0.01, "loss": 1.9548, "step": 113541 }, { "epoch": 11.669475847893114, "grad_norm": 0.07732394337654114, "learning_rate": 0.01, "loss": 1.9397, "step": 113544 }, { "epoch": 11.66978417266187, "grad_norm": 0.09532228112220764, "learning_rate": 0.01, "loss": 1.9656, "step": 113547 }, { "epoch": 11.670092497430627, "grad_norm": 0.06125938892364502, "learning_rate": 0.01, "loss": 1.9367, "step": 113550 }, { "epoch": 11.670400822199383, "grad_norm": 0.09997191280126572, "learning_rate": 0.01, "loss": 1.9631, "step": 113553 }, { "epoch": 11.67070914696814, "grad_norm": 0.06977102160453796, "learning_rate": 0.01, "loss": 1.9379, "step": 113556 }, { "epoch": 11.671017471736896, "grad_norm": 0.04314795881509781, "learning_rate": 0.01, "loss": 1.8992, "step": 113559 }, { "epoch": 11.671325796505652, "grad_norm": 0.06881061941385269, "learning_rate": 0.01, "loss": 1.9623, "step": 113562 }, { "epoch": 11.671634121274408, "grad_norm": 0.07183831930160522, "learning_rate": 0.01, "loss": 1.9528, "step": 113565 }, { "epoch": 11.671942446043165, "grad_norm": 0.0504421591758728, "learning_rate": 0.01, "loss": 1.9347, "step": 113568 }, { "epoch": 11.672250770811921, "grad_norm": 0.09572986513376236, "learning_rate": 0.01, "loss": 1.9286, "step": 113571 }, { "epoch": 11.672559095580679, "grad_norm": 0.07264112681150436, "learning_rate": 0.01, "loss": 1.9512, "step": 113574 }, { "epoch": 11.672867420349435, "grad_norm": 0.11083868145942688, "learning_rate": 0.01, "loss": 1.9536, "step": 113577 }, { "epoch": 11.673175745118192, "grad_norm": 0.04253198578953743, "learning_rate": 0.01, "loss": 1.9223, "step": 113580 }, { "epoch": 11.673484069886948, "grad_norm": 0.06747709959745407, "learning_rate": 0.01, "loss": 1.9446, "step": 113583 }, { "epoch": 11.673792394655704, "grad_norm": 0.045510247349739075, "learning_rate": 0.01, "loss": 1.9357, "step": 113586 }, { "epoch": 11.67410071942446, "grad_norm": 0.05232301354408264, "learning_rate": 0.01, "loss": 1.9587, "step": 113589 }, { "epoch": 11.674409044193217, "grad_norm": 0.09999211132526398, "learning_rate": 0.01, "loss": 1.9428, "step": 113592 }, { "epoch": 11.674717368961973, "grad_norm": 0.14111940562725067, "learning_rate": 0.01, "loss": 1.9616, "step": 113595 }, { "epoch": 11.67502569373073, "grad_norm": 0.050830744206905365, "learning_rate": 0.01, "loss": 1.9411, "step": 113598 }, { "epoch": 11.675334018499486, "grad_norm": 0.0898144394159317, "learning_rate": 0.01, "loss": 1.9656, "step": 113601 }, { "epoch": 11.675642343268242, "grad_norm": 0.04930245876312256, "learning_rate": 0.01, "loss": 1.9369, "step": 113604 }, { "epoch": 11.675950668036998, "grad_norm": 0.08487803488969803, "learning_rate": 0.01, "loss": 1.9218, "step": 113607 }, { "epoch": 11.676258992805755, "grad_norm": 0.0673738345503807, "learning_rate": 0.01, "loss": 1.9331, "step": 113610 }, { "epoch": 11.676567317574511, "grad_norm": 0.09414537996053696, "learning_rate": 0.01, "loss": 1.9719, "step": 113613 }, { "epoch": 11.67687564234327, "grad_norm": 0.04949101433157921, "learning_rate": 0.01, "loss": 1.9428, "step": 113616 }, { "epoch": 11.677183967112025, "grad_norm": 0.09853395819664001, "learning_rate": 0.01, "loss": 1.9397, "step": 113619 }, { "epoch": 11.677492291880782, "grad_norm": 0.08439648151397705, "learning_rate": 0.01, "loss": 1.955, "step": 113622 }, { "epoch": 11.677800616649538, "grad_norm": 0.04191217198967934, "learning_rate": 0.01, "loss": 1.9538, "step": 113625 }, { "epoch": 11.678108941418294, "grad_norm": 0.04377821460366249, "learning_rate": 0.01, "loss": 1.9415, "step": 113628 }, { "epoch": 11.67841726618705, "grad_norm": 0.055352889001369476, "learning_rate": 0.01, "loss": 1.9283, "step": 113631 }, { "epoch": 11.678725590955807, "grad_norm": 0.05160324648022652, "learning_rate": 0.01, "loss": 1.9172, "step": 113634 }, { "epoch": 11.679033915724563, "grad_norm": 0.047420211136341095, "learning_rate": 0.01, "loss": 1.9462, "step": 113637 }, { "epoch": 11.67934224049332, "grad_norm": 0.12467961758375168, "learning_rate": 0.01, "loss": 1.9328, "step": 113640 }, { "epoch": 11.679650565262076, "grad_norm": 0.06575851887464523, "learning_rate": 0.01, "loss": 1.9167, "step": 113643 }, { "epoch": 11.679958890030832, "grad_norm": 0.05539676174521446, "learning_rate": 0.01, "loss": 1.9253, "step": 113646 }, { "epoch": 11.680267214799589, "grad_norm": 0.09138210862874985, "learning_rate": 0.01, "loss": 1.9466, "step": 113649 }, { "epoch": 11.680575539568345, "grad_norm": 0.06900908797979355, "learning_rate": 0.01, "loss": 1.9298, "step": 113652 }, { "epoch": 11.680883864337101, "grad_norm": 0.06584549695253372, "learning_rate": 0.01, "loss": 1.9165, "step": 113655 }, { "epoch": 11.681192189105857, "grad_norm": 0.08189304172992706, "learning_rate": 0.01, "loss": 1.9418, "step": 113658 }, { "epoch": 11.681500513874614, "grad_norm": 0.050689902156591415, "learning_rate": 0.01, "loss": 1.9677, "step": 113661 }, { "epoch": 11.681808838643372, "grad_norm": 0.09859631955623627, "learning_rate": 0.01, "loss": 1.9546, "step": 113664 }, { "epoch": 11.682117163412128, "grad_norm": 0.04112638160586357, "learning_rate": 0.01, "loss": 1.9377, "step": 113667 }, { "epoch": 11.682425488180884, "grad_norm": 0.08223004639148712, "learning_rate": 0.01, "loss": 1.916, "step": 113670 }, { "epoch": 11.68273381294964, "grad_norm": 0.10968174785375595, "learning_rate": 0.01, "loss": 1.948, "step": 113673 }, { "epoch": 11.683042137718397, "grad_norm": 0.11927396804094315, "learning_rate": 0.01, "loss": 1.9179, "step": 113676 }, { "epoch": 11.683350462487153, "grad_norm": 0.058725710958242416, "learning_rate": 0.01, "loss": 1.9773, "step": 113679 }, { "epoch": 11.68365878725591, "grad_norm": 0.06128184124827385, "learning_rate": 0.01, "loss": 1.9442, "step": 113682 }, { "epoch": 11.683967112024666, "grad_norm": 0.036178167909383774, "learning_rate": 0.01, "loss": 1.9326, "step": 113685 }, { "epoch": 11.684275436793422, "grad_norm": 0.03075682558119297, "learning_rate": 0.01, "loss": 1.9112, "step": 113688 }, { "epoch": 11.684583761562179, "grad_norm": 0.11864864081144333, "learning_rate": 0.01, "loss": 1.9377, "step": 113691 }, { "epoch": 11.684892086330935, "grad_norm": 0.045278266072273254, "learning_rate": 0.01, "loss": 1.952, "step": 113694 }, { "epoch": 11.685200411099691, "grad_norm": 0.07865426689386368, "learning_rate": 0.01, "loss": 1.9372, "step": 113697 }, { "epoch": 11.685508735868448, "grad_norm": 0.03860568627715111, "learning_rate": 0.01, "loss": 1.9383, "step": 113700 }, { "epoch": 11.685817060637204, "grad_norm": 0.06591783463954926, "learning_rate": 0.01, "loss": 1.9235, "step": 113703 }, { "epoch": 11.68612538540596, "grad_norm": 0.04285858944058418, "learning_rate": 0.01, "loss": 1.9189, "step": 113706 }, { "epoch": 11.686433710174718, "grad_norm": 0.04691053181886673, "learning_rate": 0.01, "loss": 1.9251, "step": 113709 }, { "epoch": 11.686742034943475, "grad_norm": 0.05432582274079323, "learning_rate": 0.01, "loss": 1.9197, "step": 113712 }, { "epoch": 11.68705035971223, "grad_norm": 0.048061177134513855, "learning_rate": 0.01, "loss": 1.949, "step": 113715 }, { "epoch": 11.687358684480987, "grad_norm": 0.04626024141907692, "learning_rate": 0.01, "loss": 1.9375, "step": 113718 }, { "epoch": 11.687667009249743, "grad_norm": 0.08235618472099304, "learning_rate": 0.01, "loss": 1.9404, "step": 113721 }, { "epoch": 11.6879753340185, "grad_norm": 0.11195166409015656, "learning_rate": 0.01, "loss": 1.9383, "step": 113724 }, { "epoch": 11.688283658787256, "grad_norm": 0.07571295648813248, "learning_rate": 0.01, "loss": 1.9468, "step": 113727 }, { "epoch": 11.688591983556012, "grad_norm": 0.08173639327287674, "learning_rate": 0.01, "loss": 1.9211, "step": 113730 }, { "epoch": 11.688900308324769, "grad_norm": 0.06790082156658173, "learning_rate": 0.01, "loss": 1.9444, "step": 113733 }, { "epoch": 11.689208633093525, "grad_norm": 0.09619564563035965, "learning_rate": 0.01, "loss": 1.942, "step": 113736 }, { "epoch": 11.689516957862281, "grad_norm": 0.06219369173049927, "learning_rate": 0.01, "loss": 1.9546, "step": 113739 }, { "epoch": 11.689825282631038, "grad_norm": 0.12838895618915558, "learning_rate": 0.01, "loss": 1.9234, "step": 113742 }, { "epoch": 11.690133607399794, "grad_norm": 0.12423622608184814, "learning_rate": 0.01, "loss": 1.9572, "step": 113745 }, { "epoch": 11.69044193216855, "grad_norm": 0.13346002995967865, "learning_rate": 0.01, "loss": 1.9481, "step": 113748 }, { "epoch": 11.690750256937307, "grad_norm": 0.13693243265151978, "learning_rate": 0.01, "loss": 1.9414, "step": 113751 }, { "epoch": 11.691058581706063, "grad_norm": 0.049979254603385925, "learning_rate": 0.01, "loss": 1.9027, "step": 113754 }, { "epoch": 11.691366906474821, "grad_norm": 0.0416957326233387, "learning_rate": 0.01, "loss": 1.938, "step": 113757 }, { "epoch": 11.691675231243577, "grad_norm": 0.05410477891564369, "learning_rate": 0.01, "loss": 1.955, "step": 113760 }, { "epoch": 11.691983556012334, "grad_norm": 0.03738997131586075, "learning_rate": 0.01, "loss": 1.9343, "step": 113763 }, { "epoch": 11.69229188078109, "grad_norm": 0.03813576325774193, "learning_rate": 0.01, "loss": 1.9311, "step": 113766 }, { "epoch": 11.692600205549846, "grad_norm": 0.04307515546679497, "learning_rate": 0.01, "loss": 1.961, "step": 113769 }, { "epoch": 11.692908530318602, "grad_norm": 0.053854089230298996, "learning_rate": 0.01, "loss": 1.9343, "step": 113772 }, { "epoch": 11.693216855087359, "grad_norm": 0.09748701006174088, "learning_rate": 0.01, "loss": 1.9523, "step": 113775 }, { "epoch": 11.693525179856115, "grad_norm": 0.08756256103515625, "learning_rate": 0.01, "loss": 1.9728, "step": 113778 }, { "epoch": 11.693833504624871, "grad_norm": 0.1275547593832016, "learning_rate": 0.01, "loss": 1.9246, "step": 113781 }, { "epoch": 11.694141829393628, "grad_norm": 0.13405336439609528, "learning_rate": 0.01, "loss": 1.941, "step": 113784 }, { "epoch": 11.694450154162384, "grad_norm": 0.15136772394180298, "learning_rate": 0.01, "loss": 1.9299, "step": 113787 }, { "epoch": 11.69475847893114, "grad_norm": 0.09638786315917969, "learning_rate": 0.01, "loss": 1.9151, "step": 113790 }, { "epoch": 11.695066803699897, "grad_norm": 0.09290369600057602, "learning_rate": 0.01, "loss": 1.9349, "step": 113793 }, { "epoch": 11.695375128468653, "grad_norm": 0.10820170491933823, "learning_rate": 0.01, "loss": 1.9467, "step": 113796 }, { "epoch": 11.695683453237411, "grad_norm": 0.047729067504405975, "learning_rate": 0.01, "loss": 1.9305, "step": 113799 }, { "epoch": 11.695991778006167, "grad_norm": 0.10129518061876297, "learning_rate": 0.01, "loss": 1.9384, "step": 113802 }, { "epoch": 11.696300102774924, "grad_norm": 0.0901324599981308, "learning_rate": 0.01, "loss": 1.9298, "step": 113805 }, { "epoch": 11.69660842754368, "grad_norm": 0.0981982871890068, "learning_rate": 0.01, "loss": 1.9376, "step": 113808 }, { "epoch": 11.696916752312436, "grad_norm": 0.06594400107860565, "learning_rate": 0.01, "loss": 1.9788, "step": 113811 }, { "epoch": 11.697225077081193, "grad_norm": 0.035697147250175476, "learning_rate": 0.01, "loss": 1.9317, "step": 113814 }, { "epoch": 11.697533401849949, "grad_norm": 0.0464530885219574, "learning_rate": 0.01, "loss": 1.9462, "step": 113817 }, { "epoch": 11.697841726618705, "grad_norm": 0.04814977943897247, "learning_rate": 0.01, "loss": 1.9436, "step": 113820 }, { "epoch": 11.698150051387461, "grad_norm": 0.07865633070468903, "learning_rate": 0.01, "loss": 1.9619, "step": 113823 }, { "epoch": 11.698458376156218, "grad_norm": 0.053309258073568344, "learning_rate": 0.01, "loss": 1.951, "step": 113826 }, { "epoch": 11.698766700924974, "grad_norm": 0.0638638436794281, "learning_rate": 0.01, "loss": 1.9143, "step": 113829 }, { "epoch": 11.69907502569373, "grad_norm": 0.11192189902067184, "learning_rate": 0.01, "loss": 1.9376, "step": 113832 }, { "epoch": 11.699383350462487, "grad_norm": 0.09100265055894852, "learning_rate": 0.01, "loss": 1.9493, "step": 113835 }, { "epoch": 11.699691675231243, "grad_norm": 0.06564506888389587, "learning_rate": 0.01, "loss": 1.9245, "step": 113838 }, { "epoch": 11.7, "grad_norm": 0.06351829320192337, "learning_rate": 0.01, "loss": 1.9573, "step": 113841 }, { "epoch": 11.700308324768756, "grad_norm": 0.0480794832110405, "learning_rate": 0.01, "loss": 1.9175, "step": 113844 }, { "epoch": 11.700616649537512, "grad_norm": 0.036630935966968536, "learning_rate": 0.01, "loss": 1.9477, "step": 113847 }, { "epoch": 11.70092497430627, "grad_norm": 0.09170176088809967, "learning_rate": 0.01, "loss": 1.9395, "step": 113850 }, { "epoch": 11.701233299075026, "grad_norm": 0.10979105532169342, "learning_rate": 0.01, "loss": 1.9316, "step": 113853 }, { "epoch": 11.701541623843783, "grad_norm": 0.13954408466815948, "learning_rate": 0.01, "loss": 1.9136, "step": 113856 }, { "epoch": 11.701849948612539, "grad_norm": 0.0569770522415638, "learning_rate": 0.01, "loss": 1.965, "step": 113859 }, { "epoch": 11.702158273381295, "grad_norm": 0.054077599197626114, "learning_rate": 0.01, "loss": 1.9236, "step": 113862 }, { "epoch": 11.702466598150052, "grad_norm": 0.042730607092380524, "learning_rate": 0.01, "loss": 1.9547, "step": 113865 }, { "epoch": 11.702774922918808, "grad_norm": 0.0396852046251297, "learning_rate": 0.01, "loss": 1.9617, "step": 113868 }, { "epoch": 11.703083247687564, "grad_norm": 0.04677654057741165, "learning_rate": 0.01, "loss": 1.9472, "step": 113871 }, { "epoch": 11.70339157245632, "grad_norm": 0.060522302985191345, "learning_rate": 0.01, "loss": 1.9239, "step": 113874 }, { "epoch": 11.703699897225077, "grad_norm": 0.06372793763875961, "learning_rate": 0.01, "loss": 1.9589, "step": 113877 }, { "epoch": 11.704008221993833, "grad_norm": 0.09572730958461761, "learning_rate": 0.01, "loss": 1.9179, "step": 113880 }, { "epoch": 11.70431654676259, "grad_norm": 0.03504783287644386, "learning_rate": 0.01, "loss": 1.9428, "step": 113883 }, { "epoch": 11.704624871531346, "grad_norm": 0.10535631328821182, "learning_rate": 0.01, "loss": 1.9182, "step": 113886 }, { "epoch": 11.704933196300102, "grad_norm": 0.08261964470148087, "learning_rate": 0.01, "loss": 1.9379, "step": 113889 }, { "epoch": 11.70524152106886, "grad_norm": 0.09721565246582031, "learning_rate": 0.01, "loss": 1.9175, "step": 113892 }, { "epoch": 11.705549845837616, "grad_norm": 0.12555786967277527, "learning_rate": 0.01, "loss": 1.9266, "step": 113895 }, { "epoch": 11.705858170606373, "grad_norm": 0.10910096019506454, "learning_rate": 0.01, "loss": 1.9252, "step": 113898 }, { "epoch": 11.706166495375129, "grad_norm": 0.08156652003526688, "learning_rate": 0.01, "loss": 1.9379, "step": 113901 }, { "epoch": 11.706474820143885, "grad_norm": 0.046058304607868195, "learning_rate": 0.01, "loss": 1.9427, "step": 113904 }, { "epoch": 11.706783144912642, "grad_norm": 0.11909212917089462, "learning_rate": 0.01, "loss": 1.9145, "step": 113907 }, { "epoch": 11.707091469681398, "grad_norm": 0.06561049818992615, "learning_rate": 0.01, "loss": 1.9362, "step": 113910 }, { "epoch": 11.707399794450154, "grad_norm": 0.05175124108791351, "learning_rate": 0.01, "loss": 1.9193, "step": 113913 }, { "epoch": 11.70770811921891, "grad_norm": 0.03363698348402977, "learning_rate": 0.01, "loss": 1.9402, "step": 113916 }, { "epoch": 11.708016443987667, "grad_norm": 0.05112067237496376, "learning_rate": 0.01, "loss": 1.9464, "step": 113919 }, { "epoch": 11.708324768756423, "grad_norm": 0.08059346675872803, "learning_rate": 0.01, "loss": 1.91, "step": 113922 }, { "epoch": 11.70863309352518, "grad_norm": 0.09412217885255814, "learning_rate": 0.01, "loss": 1.9429, "step": 113925 }, { "epoch": 11.708941418293936, "grad_norm": 0.05953041836619377, "learning_rate": 0.01, "loss": 1.9331, "step": 113928 }, { "epoch": 11.709249743062692, "grad_norm": 0.08858895301818848, "learning_rate": 0.01, "loss": 1.9622, "step": 113931 }, { "epoch": 11.709558067831448, "grad_norm": 0.09069149196147919, "learning_rate": 0.01, "loss": 1.9302, "step": 113934 }, { "epoch": 11.709866392600205, "grad_norm": 0.06732858717441559, "learning_rate": 0.01, "loss": 1.9396, "step": 113937 }, { "epoch": 11.710174717368963, "grad_norm": 0.0464613251388073, "learning_rate": 0.01, "loss": 1.9656, "step": 113940 }, { "epoch": 11.710483042137719, "grad_norm": 0.08977202326059341, "learning_rate": 0.01, "loss": 1.9431, "step": 113943 }, { "epoch": 11.710791366906475, "grad_norm": 0.11978859454393387, "learning_rate": 0.01, "loss": 1.9336, "step": 113946 }, { "epoch": 11.711099691675232, "grad_norm": 0.08750269562005997, "learning_rate": 0.01, "loss": 1.9734, "step": 113949 }, { "epoch": 11.711408016443988, "grad_norm": 0.058178629726171494, "learning_rate": 0.01, "loss": 1.959, "step": 113952 }, { "epoch": 11.711716341212744, "grad_norm": 0.12875346839427948, "learning_rate": 0.01, "loss": 1.9489, "step": 113955 }, { "epoch": 11.7120246659815, "grad_norm": 0.052333295345306396, "learning_rate": 0.01, "loss": 1.9418, "step": 113958 }, { "epoch": 11.712332990750257, "grad_norm": 0.04038956016302109, "learning_rate": 0.01, "loss": 1.9547, "step": 113961 }, { "epoch": 11.712641315519013, "grad_norm": 0.09954986721277237, "learning_rate": 0.01, "loss": 1.9315, "step": 113964 }, { "epoch": 11.71294964028777, "grad_norm": 0.05516171455383301, "learning_rate": 0.01, "loss": 1.947, "step": 113967 }, { "epoch": 11.713257965056526, "grad_norm": 0.04130719229578972, "learning_rate": 0.01, "loss": 1.9499, "step": 113970 }, { "epoch": 11.713566289825282, "grad_norm": 0.04109114035964012, "learning_rate": 0.01, "loss": 1.9184, "step": 113973 }, { "epoch": 11.713874614594038, "grad_norm": 0.071103036403656, "learning_rate": 0.01, "loss": 1.9284, "step": 113976 }, { "epoch": 11.714182939362795, "grad_norm": 0.10898397862911224, "learning_rate": 0.01, "loss": 1.9339, "step": 113979 }, { "epoch": 11.714491264131553, "grad_norm": 0.03577899560332298, "learning_rate": 0.01, "loss": 1.938, "step": 113982 }, { "epoch": 11.71479958890031, "grad_norm": 0.07445529848337173, "learning_rate": 0.01, "loss": 1.9287, "step": 113985 }, { "epoch": 11.715107913669065, "grad_norm": 0.09908154606819153, "learning_rate": 0.01, "loss": 1.9467, "step": 113988 }, { "epoch": 11.715416238437822, "grad_norm": 0.07953612506389618, "learning_rate": 0.01, "loss": 1.935, "step": 113991 }, { "epoch": 11.715724563206578, "grad_norm": 0.05587496981024742, "learning_rate": 0.01, "loss": 1.9377, "step": 113994 }, { "epoch": 11.716032887975334, "grad_norm": 0.040337421000003815, "learning_rate": 0.01, "loss": 1.9269, "step": 113997 }, { "epoch": 11.71634121274409, "grad_norm": 0.07240230590105057, "learning_rate": 0.01, "loss": 1.9485, "step": 114000 }, { "epoch": 11.716649537512847, "grad_norm": 0.0691431537270546, "learning_rate": 0.01, "loss": 1.9161, "step": 114003 }, { "epoch": 11.716957862281603, "grad_norm": 0.06162949278950691, "learning_rate": 0.01, "loss": 1.926, "step": 114006 }, { "epoch": 11.71726618705036, "grad_norm": 0.10445791482925415, "learning_rate": 0.01, "loss": 1.9273, "step": 114009 }, { "epoch": 11.717574511819116, "grad_norm": 0.041798826307058334, "learning_rate": 0.01, "loss": 1.9592, "step": 114012 }, { "epoch": 11.717882836587872, "grad_norm": 0.08959610015153885, "learning_rate": 0.01, "loss": 1.9322, "step": 114015 }, { "epoch": 11.718191161356629, "grad_norm": 0.12138823419809341, "learning_rate": 0.01, "loss": 1.9303, "step": 114018 }, { "epoch": 11.718499486125385, "grad_norm": 0.066931813955307, "learning_rate": 0.01, "loss": 1.9478, "step": 114021 }, { "epoch": 11.718807810894141, "grad_norm": 0.05871887132525444, "learning_rate": 0.01, "loss": 1.9608, "step": 114024 }, { "epoch": 11.719116135662897, "grad_norm": 0.057196423411369324, "learning_rate": 0.01, "loss": 1.9642, "step": 114027 }, { "epoch": 11.719424460431654, "grad_norm": 0.056774549186229706, "learning_rate": 0.01, "loss": 1.9448, "step": 114030 }, { "epoch": 11.719732785200412, "grad_norm": 0.050951916724443436, "learning_rate": 0.01, "loss": 1.9267, "step": 114033 }, { "epoch": 11.720041109969168, "grad_norm": 0.0355374775826931, "learning_rate": 0.01, "loss": 1.9168, "step": 114036 }, { "epoch": 11.720349434737924, "grad_norm": 0.06845880299806595, "learning_rate": 0.01, "loss": 1.9456, "step": 114039 }, { "epoch": 11.72065775950668, "grad_norm": 0.09319114685058594, "learning_rate": 0.01, "loss": 1.9492, "step": 114042 }, { "epoch": 11.720966084275437, "grad_norm": 0.08113586902618408, "learning_rate": 0.01, "loss": 1.9532, "step": 114045 }, { "epoch": 11.721274409044193, "grad_norm": 0.06692340970039368, "learning_rate": 0.01, "loss": 1.956, "step": 114048 }, { "epoch": 11.72158273381295, "grad_norm": 0.10912377387285233, "learning_rate": 0.01, "loss": 1.9374, "step": 114051 }, { "epoch": 11.721891058581706, "grad_norm": 0.06332485377788544, "learning_rate": 0.01, "loss": 1.9508, "step": 114054 }, { "epoch": 11.722199383350462, "grad_norm": 0.0489015206694603, "learning_rate": 0.01, "loss": 1.9147, "step": 114057 }, { "epoch": 11.722507708119219, "grad_norm": 0.06264995783567429, "learning_rate": 0.01, "loss": 1.9409, "step": 114060 }, { "epoch": 11.722816032887975, "grad_norm": 0.2138553410768509, "learning_rate": 0.01, "loss": 1.9526, "step": 114063 }, { "epoch": 11.723124357656731, "grad_norm": 0.0857858806848526, "learning_rate": 0.01, "loss": 1.9336, "step": 114066 }, { "epoch": 11.723432682425488, "grad_norm": 0.06043681129813194, "learning_rate": 0.01, "loss": 1.9467, "step": 114069 }, { "epoch": 11.723741007194244, "grad_norm": 0.04940197989344597, "learning_rate": 0.01, "loss": 1.9484, "step": 114072 }, { "epoch": 11.724049331963002, "grad_norm": 0.043699298053979874, "learning_rate": 0.01, "loss": 1.9314, "step": 114075 }, { "epoch": 11.724357656731758, "grad_norm": 0.05862659215927124, "learning_rate": 0.01, "loss": 1.9311, "step": 114078 }, { "epoch": 11.724665981500515, "grad_norm": 0.05761629715561867, "learning_rate": 0.01, "loss": 1.93, "step": 114081 }, { "epoch": 11.72497430626927, "grad_norm": 0.05093192309141159, "learning_rate": 0.01, "loss": 1.9337, "step": 114084 }, { "epoch": 11.725282631038027, "grad_norm": 0.0673130676150322, "learning_rate": 0.01, "loss": 1.9539, "step": 114087 }, { "epoch": 11.725590955806783, "grad_norm": 0.044155821204185486, "learning_rate": 0.01, "loss": 1.9285, "step": 114090 }, { "epoch": 11.72589928057554, "grad_norm": 0.0731462836265564, "learning_rate": 0.01, "loss": 1.9293, "step": 114093 }, { "epoch": 11.726207605344296, "grad_norm": 0.18581373989582062, "learning_rate": 0.01, "loss": 1.9411, "step": 114096 }, { "epoch": 11.726515930113052, "grad_norm": 0.09261856973171234, "learning_rate": 0.01, "loss": 1.911, "step": 114099 }, { "epoch": 11.726824254881809, "grad_norm": 0.06688740104436874, "learning_rate": 0.01, "loss": 1.9252, "step": 114102 }, { "epoch": 11.727132579650565, "grad_norm": 0.04713169485330582, "learning_rate": 0.01, "loss": 1.9375, "step": 114105 }, { "epoch": 11.727440904419321, "grad_norm": 0.04400784894824028, "learning_rate": 0.01, "loss": 1.9428, "step": 114108 }, { "epoch": 11.727749229188078, "grad_norm": 0.04737546294927597, "learning_rate": 0.01, "loss": 1.9429, "step": 114111 }, { "epoch": 11.728057553956834, "grad_norm": 0.03855617716908455, "learning_rate": 0.01, "loss": 1.9547, "step": 114114 }, { "epoch": 11.72836587872559, "grad_norm": 0.04322519153356552, "learning_rate": 0.01, "loss": 1.9622, "step": 114117 }, { "epoch": 11.728674203494347, "grad_norm": 0.1041107103228569, "learning_rate": 0.01, "loss": 1.9581, "step": 114120 }, { "epoch": 11.728982528263105, "grad_norm": 0.04595714807510376, "learning_rate": 0.01, "loss": 1.9354, "step": 114123 }, { "epoch": 11.729290853031861, "grad_norm": 0.062153588980436325, "learning_rate": 0.01, "loss": 1.9327, "step": 114126 }, { "epoch": 11.729599177800617, "grad_norm": 0.09231128543615341, "learning_rate": 0.01, "loss": 1.9427, "step": 114129 }, { "epoch": 11.729907502569374, "grad_norm": 0.08219090104103088, "learning_rate": 0.01, "loss": 1.9311, "step": 114132 }, { "epoch": 11.73021582733813, "grad_norm": 0.04350368678569794, "learning_rate": 0.01, "loss": 1.9299, "step": 114135 }, { "epoch": 11.730524152106886, "grad_norm": 0.10275841504335403, "learning_rate": 0.01, "loss": 1.9443, "step": 114138 }, { "epoch": 11.730832476875642, "grad_norm": 0.04107731208205223, "learning_rate": 0.01, "loss": 1.9162, "step": 114141 }, { "epoch": 11.731140801644399, "grad_norm": 0.09991445392370224, "learning_rate": 0.01, "loss": 1.9382, "step": 114144 }, { "epoch": 11.731449126413155, "grad_norm": 0.04435478150844574, "learning_rate": 0.01, "loss": 1.9322, "step": 114147 }, { "epoch": 11.731757451181911, "grad_norm": 0.08842992782592773, "learning_rate": 0.01, "loss": 1.9298, "step": 114150 }, { "epoch": 11.732065775950668, "grad_norm": 0.06753559410572052, "learning_rate": 0.01, "loss": 1.9268, "step": 114153 }, { "epoch": 11.732374100719424, "grad_norm": 0.07544936239719391, "learning_rate": 0.01, "loss": 1.9305, "step": 114156 }, { "epoch": 11.73268242548818, "grad_norm": 0.11217570304870605, "learning_rate": 0.01, "loss": 1.9169, "step": 114159 }, { "epoch": 11.732990750256937, "grad_norm": 0.04507189989089966, "learning_rate": 0.01, "loss": 1.9276, "step": 114162 }, { "epoch": 11.733299075025693, "grad_norm": 0.0769190564751625, "learning_rate": 0.01, "loss": 1.9519, "step": 114165 }, { "epoch": 11.733607399794451, "grad_norm": 0.12560825049877167, "learning_rate": 0.01, "loss": 1.9679, "step": 114168 }, { "epoch": 11.733915724563207, "grad_norm": 0.07754191756248474, "learning_rate": 0.01, "loss": 1.9352, "step": 114171 }, { "epoch": 11.734224049331964, "grad_norm": 0.08842162787914276, "learning_rate": 0.01, "loss": 1.9256, "step": 114174 }, { "epoch": 11.73453237410072, "grad_norm": 0.043762583285570145, "learning_rate": 0.01, "loss": 1.9341, "step": 114177 }, { "epoch": 11.734840698869476, "grad_norm": 0.03222280368208885, "learning_rate": 0.01, "loss": 1.956, "step": 114180 }, { "epoch": 11.735149023638233, "grad_norm": 0.03388447314500809, "learning_rate": 0.01, "loss": 1.9705, "step": 114183 }, { "epoch": 11.735457348406989, "grad_norm": 0.03262539952993393, "learning_rate": 0.01, "loss": 1.9324, "step": 114186 }, { "epoch": 11.735765673175745, "grad_norm": 0.03528260067105293, "learning_rate": 0.01, "loss": 1.9497, "step": 114189 }, { "epoch": 11.736073997944501, "grad_norm": 0.07946651428937912, "learning_rate": 0.01, "loss": 1.9166, "step": 114192 }, { "epoch": 11.736382322713258, "grad_norm": 0.059255629777908325, "learning_rate": 0.01, "loss": 1.9328, "step": 114195 }, { "epoch": 11.736690647482014, "grad_norm": 0.07938528060913086, "learning_rate": 0.01, "loss": 1.949, "step": 114198 }, { "epoch": 11.73699897225077, "grad_norm": 0.06482166796922684, "learning_rate": 0.01, "loss": 1.9378, "step": 114201 }, { "epoch": 11.737307297019527, "grad_norm": 0.03566895052790642, "learning_rate": 0.01, "loss": 1.9496, "step": 114204 }, { "epoch": 11.737615621788283, "grad_norm": 0.05914255604147911, "learning_rate": 0.01, "loss": 1.9466, "step": 114207 }, { "epoch": 11.73792394655704, "grad_norm": 0.07074881345033646, "learning_rate": 0.01, "loss": 1.9401, "step": 114210 }, { "epoch": 11.738232271325796, "grad_norm": 0.09997674077749252, "learning_rate": 0.01, "loss": 1.9516, "step": 114213 }, { "epoch": 11.738540596094554, "grad_norm": 0.14847432076931, "learning_rate": 0.01, "loss": 1.9318, "step": 114216 }, { "epoch": 11.73884892086331, "grad_norm": 0.07328097522258759, "learning_rate": 0.01, "loss": 1.9456, "step": 114219 }, { "epoch": 11.739157245632066, "grad_norm": 0.09382440894842148, "learning_rate": 0.01, "loss": 1.933, "step": 114222 }, { "epoch": 11.739465570400823, "grad_norm": 0.09148334711790085, "learning_rate": 0.01, "loss": 1.9529, "step": 114225 }, { "epoch": 11.739773895169579, "grad_norm": 0.03747795149683952, "learning_rate": 0.01, "loss": 1.9417, "step": 114228 }, { "epoch": 11.740082219938335, "grad_norm": 0.04483797028660774, "learning_rate": 0.01, "loss": 1.9324, "step": 114231 }, { "epoch": 11.740390544707092, "grad_norm": 0.07153434306383133, "learning_rate": 0.01, "loss": 1.9388, "step": 114234 }, { "epoch": 11.740698869475848, "grad_norm": 0.1360405832529068, "learning_rate": 0.01, "loss": 1.9427, "step": 114237 }, { "epoch": 11.741007194244604, "grad_norm": 0.05981483310461044, "learning_rate": 0.01, "loss": 1.9431, "step": 114240 }, { "epoch": 11.74131551901336, "grad_norm": 0.06791744381189346, "learning_rate": 0.01, "loss": 1.927, "step": 114243 }, { "epoch": 11.741623843782117, "grad_norm": 0.049168311059474945, "learning_rate": 0.01, "loss": 1.9505, "step": 114246 }, { "epoch": 11.741932168550873, "grad_norm": 0.06850913166999817, "learning_rate": 0.01, "loss": 1.9485, "step": 114249 }, { "epoch": 11.74224049331963, "grad_norm": 0.09397213160991669, "learning_rate": 0.01, "loss": 1.9558, "step": 114252 }, { "epoch": 11.742548818088386, "grad_norm": 0.14439097046852112, "learning_rate": 0.01, "loss": 1.9457, "step": 114255 }, { "epoch": 11.742857142857144, "grad_norm": 0.060273777693510056, "learning_rate": 0.01, "loss": 1.9555, "step": 114258 }, { "epoch": 11.7431654676259, "grad_norm": 0.05283762887120247, "learning_rate": 0.01, "loss": 1.9455, "step": 114261 }, { "epoch": 11.743473792394656, "grad_norm": 0.06911233812570572, "learning_rate": 0.01, "loss": 1.9301, "step": 114264 }, { "epoch": 11.743782117163413, "grad_norm": 0.06413587927818298, "learning_rate": 0.01, "loss": 1.9257, "step": 114267 }, { "epoch": 11.744090441932169, "grad_norm": 0.032178230583667755, "learning_rate": 0.01, "loss": 1.9169, "step": 114270 }, { "epoch": 11.744398766700925, "grad_norm": 0.0497891865670681, "learning_rate": 0.01, "loss": 1.9159, "step": 114273 }, { "epoch": 11.744707091469682, "grad_norm": 0.09936387091875076, "learning_rate": 0.01, "loss": 1.9294, "step": 114276 }, { "epoch": 11.745015416238438, "grad_norm": 0.05286841839551926, "learning_rate": 0.01, "loss": 1.9285, "step": 114279 }, { "epoch": 11.745323741007194, "grad_norm": 0.038032785058021545, "learning_rate": 0.01, "loss": 1.9246, "step": 114282 }, { "epoch": 11.74563206577595, "grad_norm": 0.037610363215208054, "learning_rate": 0.01, "loss": 1.9568, "step": 114285 }, { "epoch": 11.745940390544707, "grad_norm": 0.03564333915710449, "learning_rate": 0.01, "loss": 1.948, "step": 114288 }, { "epoch": 11.746248715313463, "grad_norm": 0.0679815486073494, "learning_rate": 0.01, "loss": 1.9244, "step": 114291 }, { "epoch": 11.74655704008222, "grad_norm": 0.1300264149904251, "learning_rate": 0.01, "loss": 1.9218, "step": 114294 }, { "epoch": 11.746865364850976, "grad_norm": 0.06182750687003136, "learning_rate": 0.01, "loss": 1.9502, "step": 114297 }, { "epoch": 11.747173689619732, "grad_norm": 0.10312792658805847, "learning_rate": 0.01, "loss": 1.931, "step": 114300 }, { "epoch": 11.747482014388488, "grad_norm": 0.08729681372642517, "learning_rate": 0.01, "loss": 1.9264, "step": 114303 }, { "epoch": 11.747790339157246, "grad_norm": 0.04889155924320221, "learning_rate": 0.01, "loss": 1.9052, "step": 114306 }, { "epoch": 11.748098663926003, "grad_norm": 0.08509171009063721, "learning_rate": 0.01, "loss": 1.95, "step": 114309 }, { "epoch": 11.748406988694759, "grad_norm": 0.06404619663953781, "learning_rate": 0.01, "loss": 1.9299, "step": 114312 }, { "epoch": 11.748715313463515, "grad_norm": 0.05138705298304558, "learning_rate": 0.01, "loss": 1.9183, "step": 114315 }, { "epoch": 11.749023638232272, "grad_norm": 0.03681953623890877, "learning_rate": 0.01, "loss": 1.9332, "step": 114318 }, { "epoch": 11.749331963001028, "grad_norm": 0.05085345357656479, "learning_rate": 0.01, "loss": 1.9311, "step": 114321 }, { "epoch": 11.749640287769784, "grad_norm": 0.10160741955041885, "learning_rate": 0.01, "loss": 1.9351, "step": 114324 }, { "epoch": 11.74994861253854, "grad_norm": 0.05211213231086731, "learning_rate": 0.01, "loss": 1.9363, "step": 114327 }, { "epoch": 11.750256937307297, "grad_norm": 0.09597420692443848, "learning_rate": 0.01, "loss": 1.9328, "step": 114330 }, { "epoch": 11.750565262076053, "grad_norm": 0.05214196443557739, "learning_rate": 0.01, "loss": 1.9463, "step": 114333 }, { "epoch": 11.75087358684481, "grad_norm": 0.04194926470518112, "learning_rate": 0.01, "loss": 1.9407, "step": 114336 }, { "epoch": 11.751181911613566, "grad_norm": 0.13959670066833496, "learning_rate": 0.01, "loss": 1.9224, "step": 114339 }, { "epoch": 11.751490236382322, "grad_norm": 0.10564197599887848, "learning_rate": 0.01, "loss": 1.9576, "step": 114342 }, { "epoch": 11.751798561151078, "grad_norm": 0.03934367373585701, "learning_rate": 0.01, "loss": 1.955, "step": 114345 }, { "epoch": 11.752106885919835, "grad_norm": 0.03357434645295143, "learning_rate": 0.01, "loss": 1.9152, "step": 114348 }, { "epoch": 11.752415210688593, "grad_norm": 0.0841175839304924, "learning_rate": 0.01, "loss": 1.9414, "step": 114351 }, { "epoch": 11.75272353545735, "grad_norm": 0.07452627271413803, "learning_rate": 0.01, "loss": 1.9185, "step": 114354 }, { "epoch": 11.753031860226105, "grad_norm": 0.058551378548145294, "learning_rate": 0.01, "loss": 1.9461, "step": 114357 }, { "epoch": 11.753340184994862, "grad_norm": 0.07105761021375656, "learning_rate": 0.01, "loss": 1.9187, "step": 114360 }, { "epoch": 11.753648509763618, "grad_norm": 0.06706007570028305, "learning_rate": 0.01, "loss": 1.95, "step": 114363 }, { "epoch": 11.753956834532374, "grad_norm": 0.03882206976413727, "learning_rate": 0.01, "loss": 1.9339, "step": 114366 }, { "epoch": 11.75426515930113, "grad_norm": 0.05124860629439354, "learning_rate": 0.01, "loss": 1.9655, "step": 114369 }, { "epoch": 11.754573484069887, "grad_norm": 0.11925289779901505, "learning_rate": 0.01, "loss": 1.9256, "step": 114372 }, { "epoch": 11.754881808838643, "grad_norm": 0.05063081160187721, "learning_rate": 0.01, "loss": 1.932, "step": 114375 }, { "epoch": 11.7551901336074, "grad_norm": 0.07430731505155563, "learning_rate": 0.01, "loss": 1.9549, "step": 114378 }, { "epoch": 11.755498458376156, "grad_norm": 0.08639290928840637, "learning_rate": 0.01, "loss": 1.9452, "step": 114381 }, { "epoch": 11.755806783144912, "grad_norm": 0.0768064558506012, "learning_rate": 0.01, "loss": 1.954, "step": 114384 }, { "epoch": 11.756115107913669, "grad_norm": 0.0926046296954155, "learning_rate": 0.01, "loss": 1.9287, "step": 114387 }, { "epoch": 11.756423432682425, "grad_norm": 0.04127130284905434, "learning_rate": 0.01, "loss": 1.9306, "step": 114390 }, { "epoch": 11.756731757451181, "grad_norm": 0.09220347553491592, "learning_rate": 0.01, "loss": 1.9531, "step": 114393 }, { "epoch": 11.757040082219937, "grad_norm": 0.038777247071266174, "learning_rate": 0.01, "loss": 1.9278, "step": 114396 }, { "epoch": 11.757348406988696, "grad_norm": 0.099837526679039, "learning_rate": 0.01, "loss": 1.9317, "step": 114399 }, { "epoch": 11.757656731757452, "grad_norm": 0.11611324548721313, "learning_rate": 0.01, "loss": 1.9296, "step": 114402 }, { "epoch": 11.757965056526208, "grad_norm": 0.0463559627532959, "learning_rate": 0.01, "loss": 1.9219, "step": 114405 }, { "epoch": 11.758273381294964, "grad_norm": 0.07250874489545822, "learning_rate": 0.01, "loss": 1.9269, "step": 114408 }, { "epoch": 11.75858170606372, "grad_norm": 0.03514222800731659, "learning_rate": 0.01, "loss": 1.9335, "step": 114411 }, { "epoch": 11.758890030832477, "grad_norm": 0.11197876930236816, "learning_rate": 0.01, "loss": 1.931, "step": 114414 }, { "epoch": 11.759198355601233, "grad_norm": 0.06688223779201508, "learning_rate": 0.01, "loss": 1.9354, "step": 114417 }, { "epoch": 11.75950668036999, "grad_norm": 0.044732652604579926, "learning_rate": 0.01, "loss": 1.9643, "step": 114420 }, { "epoch": 11.759815005138746, "grad_norm": 0.052017007023096085, "learning_rate": 0.01, "loss": 1.9215, "step": 114423 }, { "epoch": 11.760123329907502, "grad_norm": 0.045031189918518066, "learning_rate": 0.01, "loss": 1.9025, "step": 114426 }, { "epoch": 11.760431654676259, "grad_norm": 0.1596817821264267, "learning_rate": 0.01, "loss": 1.9113, "step": 114429 }, { "epoch": 11.760739979445015, "grad_norm": 0.12803293764591217, "learning_rate": 0.01, "loss": 1.9373, "step": 114432 }, { "epoch": 11.761048304213771, "grad_norm": 0.10335230082273483, "learning_rate": 0.01, "loss": 1.9457, "step": 114435 }, { "epoch": 11.761356628982528, "grad_norm": 0.05117227882146835, "learning_rate": 0.01, "loss": 1.9653, "step": 114438 }, { "epoch": 11.761664953751286, "grad_norm": 0.07433784008026123, "learning_rate": 0.01, "loss": 1.9407, "step": 114441 }, { "epoch": 11.761973278520042, "grad_norm": 0.10217359662055969, "learning_rate": 0.01, "loss": 1.9243, "step": 114444 }, { "epoch": 11.762281603288798, "grad_norm": 0.0698227658867836, "learning_rate": 0.01, "loss": 1.926, "step": 114447 }, { "epoch": 11.762589928057555, "grad_norm": 0.08645881712436676, "learning_rate": 0.01, "loss": 1.943, "step": 114450 }, { "epoch": 11.76289825282631, "grad_norm": 0.0554266981780529, "learning_rate": 0.01, "loss": 1.9334, "step": 114453 }, { "epoch": 11.763206577595067, "grad_norm": 0.04438130557537079, "learning_rate": 0.01, "loss": 1.9471, "step": 114456 }, { "epoch": 11.763514902363823, "grad_norm": 0.11492709070444107, "learning_rate": 0.01, "loss": 1.9163, "step": 114459 }, { "epoch": 11.76382322713258, "grad_norm": 0.045961733907461166, "learning_rate": 0.01, "loss": 1.9526, "step": 114462 }, { "epoch": 11.764131551901336, "grad_norm": 0.03696111962199211, "learning_rate": 0.01, "loss": 1.919, "step": 114465 }, { "epoch": 11.764439876670092, "grad_norm": 0.04073779284954071, "learning_rate": 0.01, "loss": 1.9442, "step": 114468 }, { "epoch": 11.764748201438849, "grad_norm": 0.0709969773888588, "learning_rate": 0.01, "loss": 1.9615, "step": 114471 }, { "epoch": 11.765056526207605, "grad_norm": 0.10012876242399216, "learning_rate": 0.01, "loss": 1.9416, "step": 114474 }, { "epoch": 11.765364850976361, "grad_norm": 0.06984630972146988, "learning_rate": 0.01, "loss": 1.9307, "step": 114477 }, { "epoch": 11.765673175745118, "grad_norm": 0.0934528261423111, "learning_rate": 0.01, "loss": 1.9063, "step": 114480 }, { "epoch": 11.765981500513874, "grad_norm": 0.03615093603730202, "learning_rate": 0.01, "loss": 1.93, "step": 114483 }, { "epoch": 11.76628982528263, "grad_norm": 0.06740186363458633, "learning_rate": 0.01, "loss": 1.928, "step": 114486 }, { "epoch": 11.766598150051387, "grad_norm": 0.1075219139456749, "learning_rate": 0.01, "loss": 1.9409, "step": 114489 }, { "epoch": 11.766906474820145, "grad_norm": 0.03994523361325264, "learning_rate": 0.01, "loss": 1.9204, "step": 114492 }, { "epoch": 11.767214799588901, "grad_norm": 0.08029788732528687, "learning_rate": 0.01, "loss": 1.9608, "step": 114495 }, { "epoch": 11.767523124357657, "grad_norm": 0.05670519918203354, "learning_rate": 0.01, "loss": 1.9448, "step": 114498 }, { "epoch": 11.767831449126414, "grad_norm": 0.05779879167675972, "learning_rate": 0.01, "loss": 1.9364, "step": 114501 }, { "epoch": 11.76813977389517, "grad_norm": 0.07612137496471405, "learning_rate": 0.01, "loss": 1.9427, "step": 114504 }, { "epoch": 11.768448098663926, "grad_norm": 0.05214044824242592, "learning_rate": 0.01, "loss": 1.9525, "step": 114507 }, { "epoch": 11.768756423432682, "grad_norm": 0.05248599871993065, "learning_rate": 0.01, "loss": 1.9552, "step": 114510 }, { "epoch": 11.769064748201439, "grad_norm": 0.038120802491903305, "learning_rate": 0.01, "loss": 1.9539, "step": 114513 }, { "epoch": 11.769373072970195, "grad_norm": 0.04978492483496666, "learning_rate": 0.01, "loss": 1.9396, "step": 114516 }, { "epoch": 11.769681397738951, "grad_norm": 0.059046100825071335, "learning_rate": 0.01, "loss": 1.9307, "step": 114519 }, { "epoch": 11.769989722507708, "grad_norm": 0.11214884370565414, "learning_rate": 0.01, "loss": 1.9406, "step": 114522 }, { "epoch": 11.770298047276464, "grad_norm": 0.05173084884881973, "learning_rate": 0.01, "loss": 1.9082, "step": 114525 }, { "epoch": 11.77060637204522, "grad_norm": 0.060318879783153534, "learning_rate": 0.01, "loss": 1.9091, "step": 114528 }, { "epoch": 11.770914696813977, "grad_norm": 0.10231828689575195, "learning_rate": 0.01, "loss": 1.9109, "step": 114531 }, { "epoch": 11.771223021582735, "grad_norm": 0.08297566324472427, "learning_rate": 0.01, "loss": 1.9637, "step": 114534 }, { "epoch": 11.771531346351491, "grad_norm": 0.10643540322780609, "learning_rate": 0.01, "loss": 1.9573, "step": 114537 }, { "epoch": 11.771839671120247, "grad_norm": 0.05867210030555725, "learning_rate": 0.01, "loss": 1.9427, "step": 114540 }, { "epoch": 11.772147995889004, "grad_norm": 0.039664510637521744, "learning_rate": 0.01, "loss": 1.9161, "step": 114543 }, { "epoch": 11.77245632065776, "grad_norm": 0.044358786195516586, "learning_rate": 0.01, "loss": 1.9458, "step": 114546 }, { "epoch": 11.772764645426516, "grad_norm": 0.09489329159259796, "learning_rate": 0.01, "loss": 1.9162, "step": 114549 }, { "epoch": 11.773072970195273, "grad_norm": 0.06318821012973785, "learning_rate": 0.01, "loss": 1.9515, "step": 114552 }, { "epoch": 11.773381294964029, "grad_norm": 0.11085253208875656, "learning_rate": 0.01, "loss": 1.9082, "step": 114555 }, { "epoch": 11.773689619732785, "grad_norm": 0.09740877896547318, "learning_rate": 0.01, "loss": 1.9267, "step": 114558 }, { "epoch": 11.773997944501541, "grad_norm": 0.037672366946935654, "learning_rate": 0.01, "loss": 1.9635, "step": 114561 }, { "epoch": 11.774306269270298, "grad_norm": 0.044902145862579346, "learning_rate": 0.01, "loss": 1.9403, "step": 114564 }, { "epoch": 11.774614594039054, "grad_norm": 0.10807695239782333, "learning_rate": 0.01, "loss": 1.904, "step": 114567 }, { "epoch": 11.77492291880781, "grad_norm": 0.0927921012043953, "learning_rate": 0.01, "loss": 1.935, "step": 114570 }, { "epoch": 11.775231243576567, "grad_norm": 0.05143541097640991, "learning_rate": 0.01, "loss": 1.926, "step": 114573 }, { "epoch": 11.775539568345323, "grad_norm": 0.09573736041784286, "learning_rate": 0.01, "loss": 1.9414, "step": 114576 }, { "epoch": 11.77584789311408, "grad_norm": 0.11310027539730072, "learning_rate": 0.01, "loss": 1.9487, "step": 114579 }, { "epoch": 11.776156217882837, "grad_norm": 0.03688127547502518, "learning_rate": 0.01, "loss": 1.941, "step": 114582 }, { "epoch": 11.776464542651594, "grad_norm": 0.035128772258758545, "learning_rate": 0.01, "loss": 1.9297, "step": 114585 }, { "epoch": 11.77677286742035, "grad_norm": 0.039170041680336, "learning_rate": 0.01, "loss": 1.9494, "step": 114588 }, { "epoch": 11.777081192189106, "grad_norm": 0.06230974569916725, "learning_rate": 0.01, "loss": 1.9405, "step": 114591 }, { "epoch": 11.777389516957863, "grad_norm": 0.06418593972921371, "learning_rate": 0.01, "loss": 1.9292, "step": 114594 }, { "epoch": 11.777697841726619, "grad_norm": 0.036100953817367554, "learning_rate": 0.01, "loss": 1.9442, "step": 114597 }, { "epoch": 11.778006166495375, "grad_norm": 0.037238385528326035, "learning_rate": 0.01, "loss": 1.9308, "step": 114600 }, { "epoch": 11.778314491264132, "grad_norm": 0.12714260816574097, "learning_rate": 0.01, "loss": 1.9226, "step": 114603 }, { "epoch": 11.778622816032888, "grad_norm": 0.13031919300556183, "learning_rate": 0.01, "loss": 1.9153, "step": 114606 }, { "epoch": 11.778931140801644, "grad_norm": 0.04448504000902176, "learning_rate": 0.01, "loss": 1.9378, "step": 114609 }, { "epoch": 11.7792394655704, "grad_norm": 0.049985919147729874, "learning_rate": 0.01, "loss": 1.9183, "step": 114612 }, { "epoch": 11.779547790339157, "grad_norm": 0.061246324330568314, "learning_rate": 0.01, "loss": 1.9218, "step": 114615 }, { "epoch": 11.779856115107913, "grad_norm": 0.04634316638112068, "learning_rate": 0.01, "loss": 1.9466, "step": 114618 }, { "epoch": 11.78016443987667, "grad_norm": 0.07784038782119751, "learning_rate": 0.01, "loss": 1.934, "step": 114621 }, { "epoch": 11.780472764645426, "grad_norm": 0.10071872174739838, "learning_rate": 0.01, "loss": 1.9458, "step": 114624 }, { "epoch": 11.780781089414184, "grad_norm": 0.04656301811337471, "learning_rate": 0.01, "loss": 1.9249, "step": 114627 }, { "epoch": 11.78108941418294, "grad_norm": 0.0605628676712513, "learning_rate": 0.01, "loss": 1.9279, "step": 114630 }, { "epoch": 11.781397738951696, "grad_norm": 0.12403811514377594, "learning_rate": 0.01, "loss": 1.9071, "step": 114633 }, { "epoch": 11.781706063720453, "grad_norm": 0.037557389587163925, "learning_rate": 0.01, "loss": 1.9453, "step": 114636 }, { "epoch": 11.782014388489209, "grad_norm": 0.0667833462357521, "learning_rate": 0.01, "loss": 1.9468, "step": 114639 }, { "epoch": 11.782322713257965, "grad_norm": 0.08555550873279572, "learning_rate": 0.01, "loss": 1.9375, "step": 114642 }, { "epoch": 11.782631038026722, "grad_norm": 0.1030685231089592, "learning_rate": 0.01, "loss": 1.964, "step": 114645 }, { "epoch": 11.782939362795478, "grad_norm": 0.06644846498966217, "learning_rate": 0.01, "loss": 1.9518, "step": 114648 }, { "epoch": 11.783247687564234, "grad_norm": 0.06920860707759857, "learning_rate": 0.01, "loss": 1.9123, "step": 114651 }, { "epoch": 11.78355601233299, "grad_norm": 0.07964517176151276, "learning_rate": 0.01, "loss": 1.9386, "step": 114654 }, { "epoch": 11.783864337101747, "grad_norm": 0.05284479632973671, "learning_rate": 0.01, "loss": 1.9241, "step": 114657 }, { "epoch": 11.784172661870503, "grad_norm": 0.1048341915011406, "learning_rate": 0.01, "loss": 1.9506, "step": 114660 }, { "epoch": 11.78448098663926, "grad_norm": 0.035469092428684235, "learning_rate": 0.01, "loss": 1.9461, "step": 114663 }, { "epoch": 11.784789311408016, "grad_norm": 0.05209077149629593, "learning_rate": 0.01, "loss": 1.9433, "step": 114666 }, { "epoch": 11.785097636176772, "grad_norm": 0.12196382880210876, "learning_rate": 0.01, "loss": 1.9301, "step": 114669 }, { "epoch": 11.785405960945528, "grad_norm": 0.047950629144907, "learning_rate": 0.01, "loss": 1.943, "step": 114672 }, { "epoch": 11.785714285714286, "grad_norm": 0.0819501280784607, "learning_rate": 0.01, "loss": 1.9458, "step": 114675 }, { "epoch": 11.786022610483043, "grad_norm": 0.06672423332929611, "learning_rate": 0.01, "loss": 1.9465, "step": 114678 }, { "epoch": 11.786330935251799, "grad_norm": 0.0418439544737339, "learning_rate": 0.01, "loss": 1.9353, "step": 114681 }, { "epoch": 11.786639260020555, "grad_norm": 0.04796493425965309, "learning_rate": 0.01, "loss": 1.9265, "step": 114684 }, { "epoch": 11.786947584789312, "grad_norm": 0.07640409469604492, "learning_rate": 0.01, "loss": 1.9556, "step": 114687 }, { "epoch": 11.787255909558068, "grad_norm": 0.045105814933776855, "learning_rate": 0.01, "loss": 1.9469, "step": 114690 }, { "epoch": 11.787564234326824, "grad_norm": 0.13338594138622284, "learning_rate": 0.01, "loss": 1.9489, "step": 114693 }, { "epoch": 11.78787255909558, "grad_norm": 0.09243103861808777, "learning_rate": 0.01, "loss": 1.9462, "step": 114696 }, { "epoch": 11.788180883864337, "grad_norm": 0.06404594331979752, "learning_rate": 0.01, "loss": 1.9257, "step": 114699 }, { "epoch": 11.788489208633093, "grad_norm": 0.07861119508743286, "learning_rate": 0.01, "loss": 1.9165, "step": 114702 }, { "epoch": 11.78879753340185, "grad_norm": 0.1092933937907219, "learning_rate": 0.01, "loss": 1.9372, "step": 114705 }, { "epoch": 11.789105858170606, "grad_norm": 0.07028528302907944, "learning_rate": 0.01, "loss": 1.9485, "step": 114708 }, { "epoch": 11.789414182939362, "grad_norm": 0.07368434220552444, "learning_rate": 0.01, "loss": 1.9145, "step": 114711 }, { "epoch": 11.789722507708118, "grad_norm": 0.08186358213424683, "learning_rate": 0.01, "loss": 1.9294, "step": 114714 }, { "epoch": 11.790030832476877, "grad_norm": 0.04825025796890259, "learning_rate": 0.01, "loss": 1.9425, "step": 114717 }, { "epoch": 11.790339157245633, "grad_norm": 0.05701156705617905, "learning_rate": 0.01, "loss": 1.949, "step": 114720 }, { "epoch": 11.79064748201439, "grad_norm": 0.034114472568035126, "learning_rate": 0.01, "loss": 1.9557, "step": 114723 }, { "epoch": 11.790955806783145, "grad_norm": 0.056312914937734604, "learning_rate": 0.01, "loss": 1.949, "step": 114726 }, { "epoch": 11.791264131551902, "grad_norm": 0.07758929580450058, "learning_rate": 0.01, "loss": 1.9333, "step": 114729 }, { "epoch": 11.791572456320658, "grad_norm": 0.04463218152523041, "learning_rate": 0.01, "loss": 1.9231, "step": 114732 }, { "epoch": 11.791880781089414, "grad_norm": 0.05125407129526138, "learning_rate": 0.01, "loss": 1.9209, "step": 114735 }, { "epoch": 11.79218910585817, "grad_norm": 0.08259645104408264, "learning_rate": 0.01, "loss": 1.9171, "step": 114738 }, { "epoch": 11.792497430626927, "grad_norm": 0.06180672347545624, "learning_rate": 0.01, "loss": 1.9295, "step": 114741 }, { "epoch": 11.792805755395683, "grad_norm": 0.05911092460155487, "learning_rate": 0.01, "loss": 1.9373, "step": 114744 }, { "epoch": 11.79311408016444, "grad_norm": 0.1537017822265625, "learning_rate": 0.01, "loss": 1.9521, "step": 114747 }, { "epoch": 11.793422404933196, "grad_norm": 0.08543099462985992, "learning_rate": 0.01, "loss": 1.9519, "step": 114750 }, { "epoch": 11.793730729701952, "grad_norm": 0.05843864008784294, "learning_rate": 0.01, "loss": 1.9155, "step": 114753 }, { "epoch": 11.794039054470709, "grad_norm": 0.04629216343164444, "learning_rate": 0.01, "loss": 1.9087, "step": 114756 }, { "epoch": 11.794347379239465, "grad_norm": 0.06042463332414627, "learning_rate": 0.01, "loss": 1.949, "step": 114759 }, { "epoch": 11.794655704008221, "grad_norm": 0.033288221806287766, "learning_rate": 0.01, "loss": 1.9523, "step": 114762 }, { "epoch": 11.79496402877698, "grad_norm": 0.060233380645513535, "learning_rate": 0.01, "loss": 1.96, "step": 114765 }, { "epoch": 11.795272353545736, "grad_norm": 0.09179306030273438, "learning_rate": 0.01, "loss": 1.9012, "step": 114768 }, { "epoch": 11.795580678314492, "grad_norm": 0.052546773105859756, "learning_rate": 0.01, "loss": 1.9552, "step": 114771 }, { "epoch": 11.795889003083248, "grad_norm": 0.06451155245304108, "learning_rate": 0.01, "loss": 1.9359, "step": 114774 }, { "epoch": 11.796197327852004, "grad_norm": 0.10466645658016205, "learning_rate": 0.01, "loss": 1.9422, "step": 114777 }, { "epoch": 11.79650565262076, "grad_norm": 0.08434021472930908, "learning_rate": 0.01, "loss": 1.9558, "step": 114780 }, { "epoch": 11.796813977389517, "grad_norm": 0.06126517429947853, "learning_rate": 0.01, "loss": 1.9527, "step": 114783 }, { "epoch": 11.797122302158273, "grad_norm": 0.09324466437101364, "learning_rate": 0.01, "loss": 1.9213, "step": 114786 }, { "epoch": 11.79743062692703, "grad_norm": 0.07028118520975113, "learning_rate": 0.01, "loss": 1.9384, "step": 114789 }, { "epoch": 11.797738951695786, "grad_norm": 0.06836069375276566, "learning_rate": 0.01, "loss": 1.9334, "step": 114792 }, { "epoch": 11.798047276464542, "grad_norm": 0.09277386218309402, "learning_rate": 0.01, "loss": 1.9475, "step": 114795 }, { "epoch": 11.798355601233299, "grad_norm": 0.08132278174161911, "learning_rate": 0.01, "loss": 1.9632, "step": 114798 }, { "epoch": 11.798663926002055, "grad_norm": 0.05600438639521599, "learning_rate": 0.01, "loss": 1.9258, "step": 114801 }, { "epoch": 11.798972250770811, "grad_norm": 0.07362427562475204, "learning_rate": 0.01, "loss": 1.932, "step": 114804 }, { "epoch": 11.799280575539568, "grad_norm": 0.06464149802923203, "learning_rate": 0.01, "loss": 1.9227, "step": 114807 }, { "epoch": 11.799588900308326, "grad_norm": 0.1496574431657791, "learning_rate": 0.01, "loss": 1.9305, "step": 114810 }, { "epoch": 11.799897225077082, "grad_norm": 0.1321372091770172, "learning_rate": 0.01, "loss": 1.9178, "step": 114813 }, { "epoch": 11.800205549845838, "grad_norm": 0.03908989951014519, "learning_rate": 0.01, "loss": 1.9309, "step": 114816 }, { "epoch": 11.800513874614595, "grad_norm": 0.03196997568011284, "learning_rate": 0.01, "loss": 1.9203, "step": 114819 }, { "epoch": 11.80082219938335, "grad_norm": 0.031193269416689873, "learning_rate": 0.01, "loss": 1.9514, "step": 114822 }, { "epoch": 11.801130524152107, "grad_norm": 0.06302262097597122, "learning_rate": 0.01, "loss": 1.9218, "step": 114825 }, { "epoch": 11.801438848920863, "grad_norm": 0.06870076805353165, "learning_rate": 0.01, "loss": 1.9363, "step": 114828 }, { "epoch": 11.80174717368962, "grad_norm": 0.044515084475278854, "learning_rate": 0.01, "loss": 1.9558, "step": 114831 }, { "epoch": 11.802055498458376, "grad_norm": 0.1153394877910614, "learning_rate": 0.01, "loss": 1.9463, "step": 114834 }, { "epoch": 11.802363823227132, "grad_norm": 0.03656914830207825, "learning_rate": 0.01, "loss": 1.9167, "step": 114837 }, { "epoch": 11.802672147995889, "grad_norm": 0.09082802385091782, "learning_rate": 0.01, "loss": 1.9535, "step": 114840 }, { "epoch": 11.802980472764645, "grad_norm": 0.06535059213638306, "learning_rate": 0.01, "loss": 1.9365, "step": 114843 }, { "epoch": 11.803288797533401, "grad_norm": 0.1293552815914154, "learning_rate": 0.01, "loss": 1.9643, "step": 114846 }, { "epoch": 11.803597122302158, "grad_norm": 0.10767047107219696, "learning_rate": 0.01, "loss": 1.9352, "step": 114849 }, { "epoch": 11.803905447070914, "grad_norm": 0.10255423933267593, "learning_rate": 0.01, "loss": 1.9505, "step": 114852 }, { "epoch": 11.80421377183967, "grad_norm": 0.08479739725589752, "learning_rate": 0.01, "loss": 1.9259, "step": 114855 }, { "epoch": 11.804522096608428, "grad_norm": 0.06828516721725464, "learning_rate": 0.01, "loss": 1.9492, "step": 114858 }, { "epoch": 11.804830421377185, "grad_norm": 0.09660480916500092, "learning_rate": 0.01, "loss": 1.9259, "step": 114861 }, { "epoch": 11.805138746145941, "grad_norm": 0.05178052559494972, "learning_rate": 0.01, "loss": 1.9507, "step": 114864 }, { "epoch": 11.805447070914697, "grad_norm": 0.04476309195160866, "learning_rate": 0.01, "loss": 1.927, "step": 114867 }, { "epoch": 11.805755395683454, "grad_norm": 0.032808542251586914, "learning_rate": 0.01, "loss": 1.914, "step": 114870 }, { "epoch": 11.80606372045221, "grad_norm": 0.04916965961456299, "learning_rate": 0.01, "loss": 1.9283, "step": 114873 }, { "epoch": 11.806372045220966, "grad_norm": 0.13022355735301971, "learning_rate": 0.01, "loss": 1.9159, "step": 114876 }, { "epoch": 11.806680369989722, "grad_norm": 0.05587311089038849, "learning_rate": 0.01, "loss": 1.9346, "step": 114879 }, { "epoch": 11.806988694758479, "grad_norm": 0.10787022858858109, "learning_rate": 0.01, "loss": 1.9401, "step": 114882 }, { "epoch": 11.807297019527235, "grad_norm": 0.05247826874256134, "learning_rate": 0.01, "loss": 1.9573, "step": 114885 }, { "epoch": 11.807605344295991, "grad_norm": 0.04825177043676376, "learning_rate": 0.01, "loss": 1.9526, "step": 114888 }, { "epoch": 11.807913669064748, "grad_norm": 0.04047880694270134, "learning_rate": 0.01, "loss": 1.946, "step": 114891 }, { "epoch": 11.808221993833504, "grad_norm": 0.12832796573638916, "learning_rate": 0.01, "loss": 1.9424, "step": 114894 }, { "epoch": 11.80853031860226, "grad_norm": 0.05020860210061073, "learning_rate": 0.01, "loss": 1.9454, "step": 114897 }, { "epoch": 11.808838643371018, "grad_norm": 0.07286757975816727, "learning_rate": 0.01, "loss": 1.9274, "step": 114900 }, { "epoch": 11.809146968139775, "grad_norm": 0.05260704830288887, "learning_rate": 0.01, "loss": 1.9583, "step": 114903 }, { "epoch": 11.809455292908531, "grad_norm": 0.08480259776115417, "learning_rate": 0.01, "loss": 1.9293, "step": 114906 }, { "epoch": 11.809763617677287, "grad_norm": 0.07258189469575882, "learning_rate": 0.01, "loss": 1.9238, "step": 114909 }, { "epoch": 11.810071942446044, "grad_norm": 0.08959133177995682, "learning_rate": 0.01, "loss": 1.9478, "step": 114912 }, { "epoch": 11.8103802672148, "grad_norm": 0.10010571777820587, "learning_rate": 0.01, "loss": 1.9419, "step": 114915 }, { "epoch": 11.810688591983556, "grad_norm": 0.06512168049812317, "learning_rate": 0.01, "loss": 1.9417, "step": 114918 }, { "epoch": 11.810996916752313, "grad_norm": 0.0446767583489418, "learning_rate": 0.01, "loss": 1.9289, "step": 114921 }, { "epoch": 11.811305241521069, "grad_norm": 0.06724687665700912, "learning_rate": 0.01, "loss": 1.9448, "step": 114924 }, { "epoch": 11.811613566289825, "grad_norm": 0.07073602825403214, "learning_rate": 0.01, "loss": 1.9582, "step": 114927 }, { "epoch": 11.811921891058581, "grad_norm": 0.050262439996004105, "learning_rate": 0.01, "loss": 1.964, "step": 114930 }, { "epoch": 11.812230215827338, "grad_norm": 0.040208492428064346, "learning_rate": 0.01, "loss": 1.9268, "step": 114933 }, { "epoch": 11.812538540596094, "grad_norm": 0.04738585278391838, "learning_rate": 0.01, "loss": 1.939, "step": 114936 }, { "epoch": 11.81284686536485, "grad_norm": 0.04911480098962784, "learning_rate": 0.01, "loss": 1.9299, "step": 114939 }, { "epoch": 11.813155190133607, "grad_norm": 0.041393179446458817, "learning_rate": 0.01, "loss": 1.9518, "step": 114942 }, { "epoch": 11.813463514902363, "grad_norm": 0.1633489429950714, "learning_rate": 0.01, "loss": 1.8952, "step": 114945 }, { "epoch": 11.81377183967112, "grad_norm": 0.14581318199634552, "learning_rate": 0.01, "loss": 1.9331, "step": 114948 }, { "epoch": 11.814080164439877, "grad_norm": 0.11440169811248779, "learning_rate": 0.01, "loss": 1.9692, "step": 114951 }, { "epoch": 11.814388489208634, "grad_norm": 0.08294911682605743, "learning_rate": 0.01, "loss": 1.9516, "step": 114954 }, { "epoch": 11.81469681397739, "grad_norm": 0.13189250230789185, "learning_rate": 0.01, "loss": 1.9537, "step": 114957 }, { "epoch": 11.815005138746146, "grad_norm": 0.04286221042275429, "learning_rate": 0.01, "loss": 1.9389, "step": 114960 }, { "epoch": 11.815313463514903, "grad_norm": 0.04227737709879875, "learning_rate": 0.01, "loss": 1.9186, "step": 114963 }, { "epoch": 11.815621788283659, "grad_norm": 0.04705207422375679, "learning_rate": 0.01, "loss": 1.927, "step": 114966 }, { "epoch": 11.815930113052415, "grad_norm": 0.0436391718685627, "learning_rate": 0.01, "loss": 1.932, "step": 114969 }, { "epoch": 11.816238437821172, "grad_norm": 0.03224620968103409, "learning_rate": 0.01, "loss": 1.9063, "step": 114972 }, { "epoch": 11.816546762589928, "grad_norm": 0.04032877832651138, "learning_rate": 0.01, "loss": 1.9163, "step": 114975 }, { "epoch": 11.816855087358684, "grad_norm": 0.055777013301849365, "learning_rate": 0.01, "loss": 1.9106, "step": 114978 }, { "epoch": 11.81716341212744, "grad_norm": 0.08680202811956406, "learning_rate": 0.01, "loss": 1.967, "step": 114981 }, { "epoch": 11.817471736896197, "grad_norm": 0.12070533633232117, "learning_rate": 0.01, "loss": 1.9386, "step": 114984 }, { "epoch": 11.817780061664953, "grad_norm": 0.08620209991931915, "learning_rate": 0.01, "loss": 1.9631, "step": 114987 }, { "epoch": 11.81808838643371, "grad_norm": 0.080354705452919, "learning_rate": 0.01, "loss": 1.9204, "step": 114990 }, { "epoch": 11.818396711202467, "grad_norm": 0.07663991302251816, "learning_rate": 0.01, "loss": 1.9255, "step": 114993 }, { "epoch": 11.818705035971224, "grad_norm": 0.053106699138879776, "learning_rate": 0.01, "loss": 1.9657, "step": 114996 }, { "epoch": 11.81901336073998, "grad_norm": 0.05605434253811836, "learning_rate": 0.01, "loss": 1.9434, "step": 114999 }, { "epoch": 11.819321685508736, "grad_norm": 0.04178521782159805, "learning_rate": 0.01, "loss": 1.9054, "step": 115002 }, { "epoch": 11.819630010277493, "grad_norm": 0.06913168728351593, "learning_rate": 0.01, "loss": 1.9547, "step": 115005 }, { "epoch": 11.819938335046249, "grad_norm": 0.04784342274069786, "learning_rate": 0.01, "loss": 1.9117, "step": 115008 }, { "epoch": 11.820246659815005, "grad_norm": 0.0556730218231678, "learning_rate": 0.01, "loss": 1.927, "step": 115011 }, { "epoch": 11.820554984583762, "grad_norm": 0.10342864692211151, "learning_rate": 0.01, "loss": 1.947, "step": 115014 }, { "epoch": 11.820863309352518, "grad_norm": 0.1111392229795456, "learning_rate": 0.01, "loss": 1.9224, "step": 115017 }, { "epoch": 11.821171634121274, "grad_norm": 0.06416431069374084, "learning_rate": 0.01, "loss": 1.9386, "step": 115020 }, { "epoch": 11.82147995889003, "grad_norm": 0.09714426100254059, "learning_rate": 0.01, "loss": 1.9439, "step": 115023 }, { "epoch": 11.821788283658787, "grad_norm": 0.06005309522151947, "learning_rate": 0.01, "loss": 1.9531, "step": 115026 }, { "epoch": 11.822096608427543, "grad_norm": 0.09578508883714676, "learning_rate": 0.01, "loss": 1.9389, "step": 115029 }, { "epoch": 11.8224049331963, "grad_norm": 0.04179423674941063, "learning_rate": 0.01, "loss": 1.9369, "step": 115032 }, { "epoch": 11.822713257965056, "grad_norm": 0.039336420595645905, "learning_rate": 0.01, "loss": 1.9204, "step": 115035 }, { "epoch": 11.823021582733812, "grad_norm": 0.0391448512673378, "learning_rate": 0.01, "loss": 1.9292, "step": 115038 }, { "epoch": 11.82332990750257, "grad_norm": 0.12323050200939178, "learning_rate": 0.01, "loss": 1.9305, "step": 115041 }, { "epoch": 11.823638232271326, "grad_norm": 0.11971402168273926, "learning_rate": 0.01, "loss": 1.9475, "step": 115044 }, { "epoch": 11.823946557040083, "grad_norm": 0.09418240189552307, "learning_rate": 0.01, "loss": 1.9327, "step": 115047 }, { "epoch": 11.824254881808839, "grad_norm": 0.1007615178823471, "learning_rate": 0.01, "loss": 1.9258, "step": 115050 }, { "epoch": 11.824563206577595, "grad_norm": 0.07259011268615723, "learning_rate": 0.01, "loss": 1.9271, "step": 115053 }, { "epoch": 11.824871531346352, "grad_norm": 0.04412701353430748, "learning_rate": 0.01, "loss": 1.9323, "step": 115056 }, { "epoch": 11.825179856115108, "grad_norm": 0.06574788689613342, "learning_rate": 0.01, "loss": 1.9591, "step": 115059 }, { "epoch": 11.825488180883864, "grad_norm": 0.03828756883740425, "learning_rate": 0.01, "loss": 1.949, "step": 115062 }, { "epoch": 11.82579650565262, "grad_norm": 0.044093042612075806, "learning_rate": 0.01, "loss": 1.9702, "step": 115065 }, { "epoch": 11.826104830421377, "grad_norm": 0.054079849272966385, "learning_rate": 0.01, "loss": 1.9335, "step": 115068 }, { "epoch": 11.826413155190133, "grad_norm": 0.0788705125451088, "learning_rate": 0.01, "loss": 1.9175, "step": 115071 }, { "epoch": 11.82672147995889, "grad_norm": 0.059001874178647995, "learning_rate": 0.01, "loss": 1.9393, "step": 115074 }, { "epoch": 11.827029804727646, "grad_norm": 0.05844485014677048, "learning_rate": 0.01, "loss": 1.9341, "step": 115077 }, { "epoch": 11.827338129496402, "grad_norm": 0.056450288742780685, "learning_rate": 0.01, "loss": 1.9482, "step": 115080 }, { "epoch": 11.827646454265158, "grad_norm": 0.04511510208249092, "learning_rate": 0.01, "loss": 1.9382, "step": 115083 }, { "epoch": 11.827954779033917, "grad_norm": 0.04276560992002487, "learning_rate": 0.01, "loss": 1.9795, "step": 115086 }, { "epoch": 11.828263103802673, "grad_norm": 0.06847599148750305, "learning_rate": 0.01, "loss": 1.937, "step": 115089 }, { "epoch": 11.82857142857143, "grad_norm": 0.09598635882139206, "learning_rate": 0.01, "loss": 1.9315, "step": 115092 }, { "epoch": 11.828879753340185, "grad_norm": 0.09008855372667313, "learning_rate": 0.01, "loss": 1.9207, "step": 115095 }, { "epoch": 11.829188078108942, "grad_norm": 0.03705834597349167, "learning_rate": 0.01, "loss": 1.9372, "step": 115098 }, { "epoch": 11.829496402877698, "grad_norm": 0.1142435371875763, "learning_rate": 0.01, "loss": 1.9625, "step": 115101 }, { "epoch": 11.829804727646454, "grad_norm": 0.0575905367732048, "learning_rate": 0.01, "loss": 1.9539, "step": 115104 }, { "epoch": 11.83011305241521, "grad_norm": 0.06506889313459396, "learning_rate": 0.01, "loss": 1.9398, "step": 115107 }, { "epoch": 11.830421377183967, "grad_norm": 0.04557286947965622, "learning_rate": 0.01, "loss": 1.9171, "step": 115110 }, { "epoch": 11.830729701952723, "grad_norm": 0.04787040501832962, "learning_rate": 0.01, "loss": 1.9177, "step": 115113 }, { "epoch": 11.83103802672148, "grad_norm": 0.035487424582242966, "learning_rate": 0.01, "loss": 1.9446, "step": 115116 }, { "epoch": 11.831346351490236, "grad_norm": 0.037838730961084366, "learning_rate": 0.01, "loss": 1.9291, "step": 115119 }, { "epoch": 11.831654676258992, "grad_norm": 0.044327255338430405, "learning_rate": 0.01, "loss": 1.9643, "step": 115122 }, { "epoch": 11.831963001027749, "grad_norm": 0.08877101540565491, "learning_rate": 0.01, "loss": 1.9068, "step": 115125 }, { "epoch": 11.832271325796505, "grad_norm": 0.03952403739094734, "learning_rate": 0.01, "loss": 1.9424, "step": 115128 }, { "epoch": 11.832579650565261, "grad_norm": 0.07244662195444107, "learning_rate": 0.01, "loss": 1.9222, "step": 115131 }, { "epoch": 11.83288797533402, "grad_norm": 0.10626883059740067, "learning_rate": 0.01, "loss": 1.9377, "step": 115134 }, { "epoch": 11.833196300102776, "grad_norm": 0.08260125666856766, "learning_rate": 0.01, "loss": 1.9243, "step": 115137 }, { "epoch": 11.833504624871532, "grad_norm": 0.03465457260608673, "learning_rate": 0.01, "loss": 1.9401, "step": 115140 }, { "epoch": 11.833812949640288, "grad_norm": 0.04090080410242081, "learning_rate": 0.01, "loss": 1.9248, "step": 115143 }, { "epoch": 11.834121274409044, "grad_norm": 0.056409016251564026, "learning_rate": 0.01, "loss": 1.9118, "step": 115146 }, { "epoch": 11.8344295991778, "grad_norm": 0.04134396091103554, "learning_rate": 0.01, "loss": 1.9384, "step": 115149 }, { "epoch": 11.834737923946557, "grad_norm": 0.045888107270002365, "learning_rate": 0.01, "loss": 1.9171, "step": 115152 }, { "epoch": 11.835046248715313, "grad_norm": 0.06528306007385254, "learning_rate": 0.01, "loss": 1.9252, "step": 115155 }, { "epoch": 11.83535457348407, "grad_norm": 0.0714694932103157, "learning_rate": 0.01, "loss": 1.9421, "step": 115158 }, { "epoch": 11.835662898252826, "grad_norm": 0.07249157130718231, "learning_rate": 0.01, "loss": 1.9336, "step": 115161 }, { "epoch": 11.835971223021582, "grad_norm": 0.06381940096616745, "learning_rate": 0.01, "loss": 1.9292, "step": 115164 }, { "epoch": 11.836279547790339, "grad_norm": 0.040719326585531235, "learning_rate": 0.01, "loss": 1.9342, "step": 115167 }, { "epoch": 11.836587872559095, "grad_norm": 0.052242450416088104, "learning_rate": 0.01, "loss": 1.9132, "step": 115170 }, { "epoch": 11.836896197327851, "grad_norm": 0.07433725148439407, "learning_rate": 0.01, "loss": 1.9372, "step": 115173 }, { "epoch": 11.83720452209661, "grad_norm": 0.06879228353500366, "learning_rate": 0.01, "loss": 1.9333, "step": 115176 }, { "epoch": 11.837512846865366, "grad_norm": 0.056126561015844345, "learning_rate": 0.01, "loss": 1.9125, "step": 115179 }, { "epoch": 11.837821171634122, "grad_norm": 0.051828134804964066, "learning_rate": 0.01, "loss": 1.9532, "step": 115182 }, { "epoch": 11.838129496402878, "grad_norm": 0.07622215151786804, "learning_rate": 0.01, "loss": 1.9629, "step": 115185 }, { "epoch": 11.838437821171635, "grad_norm": 0.06668256968259811, "learning_rate": 0.01, "loss": 1.9261, "step": 115188 }, { "epoch": 11.83874614594039, "grad_norm": 0.11664186418056488, "learning_rate": 0.01, "loss": 1.941, "step": 115191 }, { "epoch": 11.839054470709147, "grad_norm": 0.06350251287221909, "learning_rate": 0.01, "loss": 1.9551, "step": 115194 }, { "epoch": 11.839362795477903, "grad_norm": 0.07587242126464844, "learning_rate": 0.01, "loss": 1.9509, "step": 115197 }, { "epoch": 11.83967112024666, "grad_norm": 0.11297139525413513, "learning_rate": 0.01, "loss": 1.9356, "step": 115200 }, { "epoch": 11.839979445015416, "grad_norm": 0.09682600200176239, "learning_rate": 0.01, "loss": 1.9348, "step": 115203 }, { "epoch": 11.840287769784172, "grad_norm": 0.08607444167137146, "learning_rate": 0.01, "loss": 1.9402, "step": 115206 }, { "epoch": 11.840596094552929, "grad_norm": 0.06459609419107437, "learning_rate": 0.01, "loss": 1.9342, "step": 115209 }, { "epoch": 11.840904419321685, "grad_norm": 0.10332488268613815, "learning_rate": 0.01, "loss": 1.9451, "step": 115212 }, { "epoch": 11.841212744090441, "grad_norm": 0.09943923354148865, "learning_rate": 0.01, "loss": 1.925, "step": 115215 }, { "epoch": 11.841521068859198, "grad_norm": 0.04884108528494835, "learning_rate": 0.01, "loss": 1.9284, "step": 115218 }, { "epoch": 11.841829393627954, "grad_norm": 0.05334654450416565, "learning_rate": 0.01, "loss": 1.9595, "step": 115221 }, { "epoch": 11.842137718396712, "grad_norm": 0.037848688662052155, "learning_rate": 0.01, "loss": 1.943, "step": 115224 }, { "epoch": 11.842446043165468, "grad_norm": 0.05791223794221878, "learning_rate": 0.01, "loss": 1.9192, "step": 115227 }, { "epoch": 11.842754367934225, "grad_norm": 0.03935936838388443, "learning_rate": 0.01, "loss": 1.9323, "step": 115230 }, { "epoch": 11.843062692702981, "grad_norm": 0.04526537284255028, "learning_rate": 0.01, "loss": 1.9574, "step": 115233 }, { "epoch": 11.843371017471737, "grad_norm": 0.0499604307115078, "learning_rate": 0.01, "loss": 1.9363, "step": 115236 }, { "epoch": 11.843679342240494, "grad_norm": 0.1790214627981186, "learning_rate": 0.01, "loss": 1.9416, "step": 115239 }, { "epoch": 11.84398766700925, "grad_norm": 0.12452468276023865, "learning_rate": 0.01, "loss": 1.9392, "step": 115242 }, { "epoch": 11.844295991778006, "grad_norm": 0.08335813879966736, "learning_rate": 0.01, "loss": 1.9305, "step": 115245 }, { "epoch": 11.844604316546762, "grad_norm": 0.06493064761161804, "learning_rate": 0.01, "loss": 1.9533, "step": 115248 }, { "epoch": 11.844912641315519, "grad_norm": 0.03797442093491554, "learning_rate": 0.01, "loss": 1.9496, "step": 115251 }, { "epoch": 11.845220966084275, "grad_norm": 0.04726745933294296, "learning_rate": 0.01, "loss": 1.9345, "step": 115254 }, { "epoch": 11.845529290853031, "grad_norm": 0.07289905846118927, "learning_rate": 0.01, "loss": 1.9079, "step": 115257 }, { "epoch": 11.845837615621788, "grad_norm": 0.08225946128368378, "learning_rate": 0.01, "loss": 1.9067, "step": 115260 }, { "epoch": 11.846145940390544, "grad_norm": 0.09263941645622253, "learning_rate": 0.01, "loss": 1.9685, "step": 115263 }, { "epoch": 11.8464542651593, "grad_norm": 0.05777449533343315, "learning_rate": 0.01, "loss": 1.9271, "step": 115266 }, { "epoch": 11.846762589928058, "grad_norm": 0.047394949942827225, "learning_rate": 0.01, "loss": 1.9181, "step": 115269 }, { "epoch": 11.847070914696815, "grad_norm": 0.046602725982666016, "learning_rate": 0.01, "loss": 1.9459, "step": 115272 }, { "epoch": 11.847379239465571, "grad_norm": 0.09456834942102432, "learning_rate": 0.01, "loss": 1.9506, "step": 115275 }, { "epoch": 11.847687564234327, "grad_norm": 0.03587270900607109, "learning_rate": 0.01, "loss": 1.9223, "step": 115278 }, { "epoch": 11.847995889003084, "grad_norm": 0.059412844479084015, "learning_rate": 0.01, "loss": 1.9543, "step": 115281 }, { "epoch": 11.84830421377184, "grad_norm": 0.0675816535949707, "learning_rate": 0.01, "loss": 1.946, "step": 115284 }, { "epoch": 11.848612538540596, "grad_norm": 0.04191970452666283, "learning_rate": 0.01, "loss": 1.9356, "step": 115287 }, { "epoch": 11.848920863309353, "grad_norm": 0.039853785187006, "learning_rate": 0.01, "loss": 1.9312, "step": 115290 }, { "epoch": 11.849229188078109, "grad_norm": 0.1088956668972969, "learning_rate": 0.01, "loss": 1.9534, "step": 115293 }, { "epoch": 11.849537512846865, "grad_norm": 0.0794067531824112, "learning_rate": 0.01, "loss": 1.9488, "step": 115296 }, { "epoch": 11.849845837615621, "grad_norm": 0.12512779235839844, "learning_rate": 0.01, "loss": 1.9583, "step": 115299 }, { "epoch": 11.850154162384378, "grad_norm": 0.11033626645803452, "learning_rate": 0.01, "loss": 1.9106, "step": 115302 }, { "epoch": 11.850462487153134, "grad_norm": 0.09385468810796738, "learning_rate": 0.01, "loss": 1.9358, "step": 115305 }, { "epoch": 11.85077081192189, "grad_norm": 0.05897045508027077, "learning_rate": 0.01, "loss": 1.9201, "step": 115308 }, { "epoch": 11.851079136690647, "grad_norm": 0.05027012526988983, "learning_rate": 0.01, "loss": 1.918, "step": 115311 }, { "epoch": 11.851387461459403, "grad_norm": 0.05048549547791481, "learning_rate": 0.01, "loss": 1.9441, "step": 115314 }, { "epoch": 11.851695786228161, "grad_norm": 0.09872164577245712, "learning_rate": 0.01, "loss": 1.9535, "step": 115317 }, { "epoch": 11.852004110996917, "grad_norm": 0.04220118373632431, "learning_rate": 0.01, "loss": 1.9437, "step": 115320 }, { "epoch": 11.852312435765674, "grad_norm": 0.04071341082453728, "learning_rate": 0.01, "loss": 1.935, "step": 115323 }, { "epoch": 11.85262076053443, "grad_norm": 0.03733963146805763, "learning_rate": 0.01, "loss": 1.9275, "step": 115326 }, { "epoch": 11.852929085303186, "grad_norm": 0.053249627351760864, "learning_rate": 0.01, "loss": 1.9295, "step": 115329 }, { "epoch": 11.853237410071943, "grad_norm": 0.05058839172124863, "learning_rate": 0.01, "loss": 1.9357, "step": 115332 }, { "epoch": 11.853545734840699, "grad_norm": 0.043892621994018555, "learning_rate": 0.01, "loss": 1.9527, "step": 115335 }, { "epoch": 11.853854059609455, "grad_norm": 0.07909204810857773, "learning_rate": 0.01, "loss": 1.941, "step": 115338 }, { "epoch": 11.854162384378212, "grad_norm": 0.06854996085166931, "learning_rate": 0.01, "loss": 1.9442, "step": 115341 }, { "epoch": 11.854470709146968, "grad_norm": 0.053314458578825, "learning_rate": 0.01, "loss": 1.9151, "step": 115344 }, { "epoch": 11.854779033915724, "grad_norm": 0.09922850131988525, "learning_rate": 0.01, "loss": 1.9472, "step": 115347 }, { "epoch": 11.85508735868448, "grad_norm": 0.08411724865436554, "learning_rate": 0.01, "loss": 1.9322, "step": 115350 }, { "epoch": 11.855395683453237, "grad_norm": 0.055205248296260834, "learning_rate": 0.01, "loss": 1.9474, "step": 115353 }, { "epoch": 11.855704008221993, "grad_norm": 0.038524750620126724, "learning_rate": 0.01, "loss": 1.92, "step": 115356 }, { "epoch": 11.856012332990751, "grad_norm": 0.039631303399801254, "learning_rate": 0.01, "loss": 1.9255, "step": 115359 }, { "epoch": 11.856320657759507, "grad_norm": 0.10896901041269302, "learning_rate": 0.01, "loss": 1.939, "step": 115362 }, { "epoch": 11.856628982528264, "grad_norm": 0.051289912313222885, "learning_rate": 0.01, "loss": 1.9391, "step": 115365 }, { "epoch": 11.85693730729702, "grad_norm": 0.1080206036567688, "learning_rate": 0.01, "loss": 1.9567, "step": 115368 }, { "epoch": 11.857245632065776, "grad_norm": 0.037047237157821655, "learning_rate": 0.01, "loss": 1.9282, "step": 115371 }, { "epoch": 11.857553956834533, "grad_norm": 0.10592879354953766, "learning_rate": 0.01, "loss": 1.9392, "step": 115374 }, { "epoch": 11.857862281603289, "grad_norm": 0.06507657468318939, "learning_rate": 0.01, "loss": 1.9379, "step": 115377 }, { "epoch": 11.858170606372045, "grad_norm": 0.04487865790724754, "learning_rate": 0.01, "loss": 1.9289, "step": 115380 }, { "epoch": 11.858478931140802, "grad_norm": 0.05850360170006752, "learning_rate": 0.01, "loss": 1.9268, "step": 115383 }, { "epoch": 11.858787255909558, "grad_norm": 0.08132300525903702, "learning_rate": 0.01, "loss": 1.9394, "step": 115386 }, { "epoch": 11.859095580678314, "grad_norm": 0.08815014362335205, "learning_rate": 0.01, "loss": 1.9527, "step": 115389 }, { "epoch": 11.85940390544707, "grad_norm": 0.05203649029135704, "learning_rate": 0.01, "loss": 1.9617, "step": 115392 }, { "epoch": 11.859712230215827, "grad_norm": 0.07548944652080536, "learning_rate": 0.01, "loss": 1.9321, "step": 115395 }, { "epoch": 11.860020554984583, "grad_norm": 0.04299336299300194, "learning_rate": 0.01, "loss": 1.9166, "step": 115398 }, { "epoch": 11.86032887975334, "grad_norm": 0.10916072130203247, "learning_rate": 0.01, "loss": 1.9459, "step": 115401 }, { "epoch": 11.860637204522096, "grad_norm": 0.06974140554666519, "learning_rate": 0.01, "loss": 1.936, "step": 115404 }, { "epoch": 11.860945529290852, "grad_norm": 0.10091980546712875, "learning_rate": 0.01, "loss": 1.9384, "step": 115407 }, { "epoch": 11.86125385405961, "grad_norm": 0.042180851101875305, "learning_rate": 0.01, "loss": 1.9015, "step": 115410 }, { "epoch": 11.861562178828366, "grad_norm": 0.08566635102033615, "learning_rate": 0.01, "loss": 1.9487, "step": 115413 }, { "epoch": 11.861870503597123, "grad_norm": 0.050220876932144165, "learning_rate": 0.01, "loss": 1.9455, "step": 115416 }, { "epoch": 11.86217882836588, "grad_norm": 0.04333271086215973, "learning_rate": 0.01, "loss": 1.9353, "step": 115419 }, { "epoch": 11.862487153134635, "grad_norm": 0.039936210960149765, "learning_rate": 0.01, "loss": 1.9302, "step": 115422 }, { "epoch": 11.862795477903392, "grad_norm": 0.04459049925208092, "learning_rate": 0.01, "loss": 1.9226, "step": 115425 }, { "epoch": 11.863103802672148, "grad_norm": 0.06823472678661346, "learning_rate": 0.01, "loss": 1.9338, "step": 115428 }, { "epoch": 11.863412127440904, "grad_norm": 0.08977038413286209, "learning_rate": 0.01, "loss": 1.911, "step": 115431 }, { "epoch": 11.86372045220966, "grad_norm": 0.1401505172252655, "learning_rate": 0.01, "loss": 1.9521, "step": 115434 }, { "epoch": 11.864028776978417, "grad_norm": 0.09276049584150314, "learning_rate": 0.01, "loss": 1.9465, "step": 115437 }, { "epoch": 11.864337101747173, "grad_norm": 0.06281229853630066, "learning_rate": 0.01, "loss": 1.9302, "step": 115440 }, { "epoch": 11.86464542651593, "grad_norm": 0.04619922488927841, "learning_rate": 0.01, "loss": 1.9099, "step": 115443 }, { "epoch": 11.864953751284686, "grad_norm": 0.07578086107969284, "learning_rate": 0.01, "loss": 1.9433, "step": 115446 }, { "epoch": 11.865262076053442, "grad_norm": 0.0675554946064949, "learning_rate": 0.01, "loss": 1.9164, "step": 115449 }, { "epoch": 11.8655704008222, "grad_norm": 0.04776846617460251, "learning_rate": 0.01, "loss": 1.9159, "step": 115452 }, { "epoch": 11.865878725590957, "grad_norm": 0.04917658120393753, "learning_rate": 0.01, "loss": 1.9322, "step": 115455 }, { "epoch": 11.866187050359713, "grad_norm": 0.04377268627285957, "learning_rate": 0.01, "loss": 1.9707, "step": 115458 }, { "epoch": 11.86649537512847, "grad_norm": 0.10865142196416855, "learning_rate": 0.01, "loss": 1.9444, "step": 115461 }, { "epoch": 11.866803699897225, "grad_norm": 0.10919032245874405, "learning_rate": 0.01, "loss": 1.9468, "step": 115464 }, { "epoch": 11.867112024665982, "grad_norm": 0.14149057865142822, "learning_rate": 0.01, "loss": 1.9246, "step": 115467 }, { "epoch": 11.867420349434738, "grad_norm": 0.05841014161705971, "learning_rate": 0.01, "loss": 1.907, "step": 115470 }, { "epoch": 11.867728674203494, "grad_norm": 0.05625878646969795, "learning_rate": 0.01, "loss": 1.9295, "step": 115473 }, { "epoch": 11.86803699897225, "grad_norm": 0.048125796020030975, "learning_rate": 0.01, "loss": 1.9667, "step": 115476 }, { "epoch": 11.868345323741007, "grad_norm": 0.06532830744981766, "learning_rate": 0.01, "loss": 1.9389, "step": 115479 }, { "epoch": 11.868653648509763, "grad_norm": 0.03691454976797104, "learning_rate": 0.01, "loss": 1.9331, "step": 115482 }, { "epoch": 11.86896197327852, "grad_norm": 0.054014936089515686, "learning_rate": 0.01, "loss": 1.9408, "step": 115485 }, { "epoch": 11.869270298047276, "grad_norm": 0.0547962561249733, "learning_rate": 0.01, "loss": 1.9333, "step": 115488 }, { "epoch": 11.869578622816032, "grad_norm": 0.10535205155611038, "learning_rate": 0.01, "loss": 1.935, "step": 115491 }, { "epoch": 11.869886947584789, "grad_norm": 0.035013020038604736, "learning_rate": 0.01, "loss": 1.9584, "step": 115494 }, { "epoch": 11.870195272353545, "grad_norm": 0.03851677477359772, "learning_rate": 0.01, "loss": 1.927, "step": 115497 }, { "epoch": 11.870503597122303, "grad_norm": 0.0499538853764534, "learning_rate": 0.01, "loss": 1.9551, "step": 115500 }, { "epoch": 11.87081192189106, "grad_norm": 0.0537400096654892, "learning_rate": 0.01, "loss": 1.9129, "step": 115503 }, { "epoch": 11.871120246659816, "grad_norm": 0.07391097396612167, "learning_rate": 0.01, "loss": 1.9349, "step": 115506 }, { "epoch": 11.871428571428572, "grad_norm": 0.07708144932985306, "learning_rate": 0.01, "loss": 1.9395, "step": 115509 }, { "epoch": 11.871736896197328, "grad_norm": 0.05969472602009773, "learning_rate": 0.01, "loss": 1.9399, "step": 115512 }, { "epoch": 11.872045220966084, "grad_norm": 0.042705025523900986, "learning_rate": 0.01, "loss": 1.9648, "step": 115515 }, { "epoch": 11.87235354573484, "grad_norm": 0.05245598033070564, "learning_rate": 0.01, "loss": 1.9304, "step": 115518 }, { "epoch": 11.872661870503597, "grad_norm": 0.06938184052705765, "learning_rate": 0.01, "loss": 1.9519, "step": 115521 }, { "epoch": 11.872970195272353, "grad_norm": 0.039653230458498, "learning_rate": 0.01, "loss": 1.9315, "step": 115524 }, { "epoch": 11.87327852004111, "grad_norm": 0.10245396196842194, "learning_rate": 0.01, "loss": 1.9258, "step": 115527 }, { "epoch": 11.873586844809866, "grad_norm": 0.050254181027412415, "learning_rate": 0.01, "loss": 1.9375, "step": 115530 }, { "epoch": 11.873895169578622, "grad_norm": 0.09232769906520844, "learning_rate": 0.01, "loss": 1.9507, "step": 115533 }, { "epoch": 11.874203494347379, "grad_norm": 0.06693422049283981, "learning_rate": 0.01, "loss": 1.9261, "step": 115536 }, { "epoch": 11.874511819116135, "grad_norm": 0.06777456402778625, "learning_rate": 0.01, "loss": 1.9585, "step": 115539 }, { "epoch": 11.874820143884891, "grad_norm": 0.046546339988708496, "learning_rate": 0.01, "loss": 1.9297, "step": 115542 }, { "epoch": 11.87512846865365, "grad_norm": 0.09495719522237778, "learning_rate": 0.01, "loss": 1.9515, "step": 115545 }, { "epoch": 11.875436793422406, "grad_norm": 0.034482963383197784, "learning_rate": 0.01, "loss": 1.9574, "step": 115548 }, { "epoch": 11.875745118191162, "grad_norm": 0.060760658234357834, "learning_rate": 0.01, "loss": 1.9553, "step": 115551 }, { "epoch": 11.876053442959918, "grad_norm": 0.04488319158554077, "learning_rate": 0.01, "loss": 1.9235, "step": 115554 }, { "epoch": 11.876361767728675, "grad_norm": 0.07081051170825958, "learning_rate": 0.01, "loss": 1.9226, "step": 115557 }, { "epoch": 11.87667009249743, "grad_norm": 0.03904644399881363, "learning_rate": 0.01, "loss": 1.9102, "step": 115560 }, { "epoch": 11.876978417266187, "grad_norm": 0.07820060104131699, "learning_rate": 0.01, "loss": 1.9648, "step": 115563 }, { "epoch": 11.877286742034944, "grad_norm": 0.04241922125220299, "learning_rate": 0.01, "loss": 1.917, "step": 115566 }, { "epoch": 11.8775950668037, "grad_norm": 0.06383369117975235, "learning_rate": 0.01, "loss": 1.9349, "step": 115569 }, { "epoch": 11.877903391572456, "grad_norm": 0.07734183967113495, "learning_rate": 0.01, "loss": 1.9321, "step": 115572 }, { "epoch": 11.878211716341212, "grad_norm": 0.08373697102069855, "learning_rate": 0.01, "loss": 1.9433, "step": 115575 }, { "epoch": 11.878520041109969, "grad_norm": 0.0456833653151989, "learning_rate": 0.01, "loss": 1.9383, "step": 115578 }, { "epoch": 11.878828365878725, "grad_norm": 0.05270833522081375, "learning_rate": 0.01, "loss": 1.9462, "step": 115581 }, { "epoch": 11.879136690647481, "grad_norm": 0.0652565062046051, "learning_rate": 0.01, "loss": 1.9626, "step": 115584 }, { "epoch": 11.879445015416238, "grad_norm": 0.09808563441038132, "learning_rate": 0.01, "loss": 1.9382, "step": 115587 }, { "epoch": 11.879753340184994, "grad_norm": 0.06954976171255112, "learning_rate": 0.01, "loss": 1.9639, "step": 115590 }, { "epoch": 11.880061664953752, "grad_norm": 0.0871601328253746, "learning_rate": 0.01, "loss": 1.9441, "step": 115593 }, { "epoch": 11.880369989722508, "grad_norm": 0.033956415951251984, "learning_rate": 0.01, "loss": 1.9435, "step": 115596 }, { "epoch": 11.880678314491265, "grad_norm": 0.045776210725307465, "learning_rate": 0.01, "loss": 1.9229, "step": 115599 }, { "epoch": 11.880986639260021, "grad_norm": 0.03411071375012398, "learning_rate": 0.01, "loss": 1.9515, "step": 115602 }, { "epoch": 11.881294964028777, "grad_norm": 0.09710346907377243, "learning_rate": 0.01, "loss": 1.9538, "step": 115605 }, { "epoch": 11.881603288797534, "grad_norm": 0.08367155492305756, "learning_rate": 0.01, "loss": 1.9261, "step": 115608 }, { "epoch": 11.88191161356629, "grad_norm": 0.10393861681222916, "learning_rate": 0.01, "loss": 1.9432, "step": 115611 }, { "epoch": 11.882219938335046, "grad_norm": 0.12665413320064545, "learning_rate": 0.01, "loss": 1.9652, "step": 115614 }, { "epoch": 11.882528263103803, "grad_norm": 0.03951384499669075, "learning_rate": 0.01, "loss": 1.9406, "step": 115617 }, { "epoch": 11.882836587872559, "grad_norm": 0.04121113941073418, "learning_rate": 0.01, "loss": 1.9436, "step": 115620 }, { "epoch": 11.883144912641315, "grad_norm": 0.034584417939186096, "learning_rate": 0.01, "loss": 1.9252, "step": 115623 }, { "epoch": 11.883453237410071, "grad_norm": 0.03947766497731209, "learning_rate": 0.01, "loss": 1.9588, "step": 115626 }, { "epoch": 11.883761562178828, "grad_norm": 0.13804352283477783, "learning_rate": 0.01, "loss": 1.9333, "step": 115629 }, { "epoch": 11.884069886947584, "grad_norm": 0.04128848761320114, "learning_rate": 0.01, "loss": 1.9306, "step": 115632 }, { "epoch": 11.884378211716342, "grad_norm": 0.11423856765031815, "learning_rate": 0.01, "loss": 1.9495, "step": 115635 }, { "epoch": 11.884686536485098, "grad_norm": 0.08762546628713608, "learning_rate": 0.01, "loss": 1.9479, "step": 115638 }, { "epoch": 11.884994861253855, "grad_norm": 0.058912597596645355, "learning_rate": 0.01, "loss": 1.9394, "step": 115641 }, { "epoch": 11.885303186022611, "grad_norm": 0.05286059528589249, "learning_rate": 0.01, "loss": 1.9338, "step": 115644 }, { "epoch": 11.885611510791367, "grad_norm": 0.04823709651827812, "learning_rate": 0.01, "loss": 1.9526, "step": 115647 }, { "epoch": 11.885919835560124, "grad_norm": 0.03591940179467201, "learning_rate": 0.01, "loss": 1.9357, "step": 115650 }, { "epoch": 11.88622816032888, "grad_norm": 0.03444457799196243, "learning_rate": 0.01, "loss": 1.9322, "step": 115653 }, { "epoch": 11.886536485097636, "grad_norm": 0.03756314516067505, "learning_rate": 0.01, "loss": 1.9359, "step": 115656 }, { "epoch": 11.886844809866393, "grad_norm": 0.0774245411157608, "learning_rate": 0.01, "loss": 1.9319, "step": 115659 }, { "epoch": 11.887153134635149, "grad_norm": 0.08102896064519882, "learning_rate": 0.01, "loss": 1.945, "step": 115662 }, { "epoch": 11.887461459403905, "grad_norm": 0.041952721774578094, "learning_rate": 0.01, "loss": 1.9568, "step": 115665 }, { "epoch": 11.887769784172662, "grad_norm": 0.04018309712409973, "learning_rate": 0.01, "loss": 1.9157, "step": 115668 }, { "epoch": 11.888078108941418, "grad_norm": 0.037420839071273804, "learning_rate": 0.01, "loss": 1.9502, "step": 115671 }, { "epoch": 11.888386433710174, "grad_norm": 0.04719531536102295, "learning_rate": 0.01, "loss": 1.9557, "step": 115674 }, { "epoch": 11.88869475847893, "grad_norm": 0.1673678457736969, "learning_rate": 0.01, "loss": 1.9173, "step": 115677 }, { "epoch": 11.889003083247687, "grad_norm": 0.1612764298915863, "learning_rate": 0.01, "loss": 1.9648, "step": 115680 }, { "epoch": 11.889311408016445, "grad_norm": 0.09129545837640762, "learning_rate": 0.01, "loss": 1.9505, "step": 115683 }, { "epoch": 11.889619732785201, "grad_norm": 0.07026642560958862, "learning_rate": 0.01, "loss": 1.9241, "step": 115686 }, { "epoch": 11.889928057553957, "grad_norm": 0.057349856942892075, "learning_rate": 0.01, "loss": 1.9352, "step": 115689 }, { "epoch": 11.890236382322714, "grad_norm": 0.08883088082075119, "learning_rate": 0.01, "loss": 1.9469, "step": 115692 }, { "epoch": 11.89054470709147, "grad_norm": 0.0671309158205986, "learning_rate": 0.01, "loss": 1.9317, "step": 115695 }, { "epoch": 11.890853031860226, "grad_norm": 0.15628202259540558, "learning_rate": 0.01, "loss": 1.948, "step": 115698 }, { "epoch": 11.891161356628983, "grad_norm": 0.08999011665582657, "learning_rate": 0.01, "loss": 1.9591, "step": 115701 }, { "epoch": 11.891469681397739, "grad_norm": 0.036552269011735916, "learning_rate": 0.01, "loss": 1.9381, "step": 115704 }, { "epoch": 11.891778006166495, "grad_norm": 0.04400331899523735, "learning_rate": 0.01, "loss": 1.9158, "step": 115707 }, { "epoch": 11.892086330935252, "grad_norm": 0.03440818563103676, "learning_rate": 0.01, "loss": 1.9398, "step": 115710 }, { "epoch": 11.892394655704008, "grad_norm": 0.03784843161702156, "learning_rate": 0.01, "loss": 1.9514, "step": 115713 }, { "epoch": 11.892702980472764, "grad_norm": 0.038593459874391556, "learning_rate": 0.01, "loss": 1.9357, "step": 115716 }, { "epoch": 11.89301130524152, "grad_norm": 0.04749274253845215, "learning_rate": 0.01, "loss": 1.9361, "step": 115719 }, { "epoch": 11.893319630010277, "grad_norm": 0.13267368078231812, "learning_rate": 0.01, "loss": 1.9511, "step": 115722 }, { "epoch": 11.893627954779033, "grad_norm": 0.11221770942211151, "learning_rate": 0.01, "loss": 1.9159, "step": 115725 }, { "epoch": 11.893936279547791, "grad_norm": 0.04560906067490578, "learning_rate": 0.01, "loss": 1.9593, "step": 115728 }, { "epoch": 11.894244604316548, "grad_norm": 0.059753429144620895, "learning_rate": 0.01, "loss": 1.9299, "step": 115731 }, { "epoch": 11.894552929085304, "grad_norm": 0.041221734136343, "learning_rate": 0.01, "loss": 1.949, "step": 115734 }, { "epoch": 11.89486125385406, "grad_norm": 0.044288646429777145, "learning_rate": 0.01, "loss": 1.924, "step": 115737 }, { "epoch": 11.895169578622816, "grad_norm": 0.03386671096086502, "learning_rate": 0.01, "loss": 1.9223, "step": 115740 }, { "epoch": 11.895477903391573, "grad_norm": 0.04667489603161812, "learning_rate": 0.01, "loss": 1.9074, "step": 115743 }, { "epoch": 11.895786228160329, "grad_norm": 0.0888177677989006, "learning_rate": 0.01, "loss": 1.9487, "step": 115746 }, { "epoch": 11.896094552929085, "grad_norm": 0.09287005662918091, "learning_rate": 0.01, "loss": 1.9503, "step": 115749 }, { "epoch": 11.896402877697842, "grad_norm": 0.05758680775761604, "learning_rate": 0.01, "loss": 1.9127, "step": 115752 }, { "epoch": 11.896711202466598, "grad_norm": 0.0726981908082962, "learning_rate": 0.01, "loss": 1.9405, "step": 115755 }, { "epoch": 11.897019527235354, "grad_norm": 0.05644940957427025, "learning_rate": 0.01, "loss": 1.9384, "step": 115758 }, { "epoch": 11.89732785200411, "grad_norm": 0.14221036434173584, "learning_rate": 0.01, "loss": 1.9618, "step": 115761 }, { "epoch": 11.897636176772867, "grad_norm": 0.11654707789421082, "learning_rate": 0.01, "loss": 1.9217, "step": 115764 }, { "epoch": 11.897944501541623, "grad_norm": 0.036727387458086014, "learning_rate": 0.01, "loss": 1.9346, "step": 115767 }, { "epoch": 11.89825282631038, "grad_norm": 0.04845240339636803, "learning_rate": 0.01, "loss": 1.9268, "step": 115770 }, { "epoch": 11.898561151079136, "grad_norm": 0.05198315903544426, "learning_rate": 0.01, "loss": 1.9359, "step": 115773 }, { "epoch": 11.898869475847894, "grad_norm": 0.0603262297809124, "learning_rate": 0.01, "loss": 1.9255, "step": 115776 }, { "epoch": 11.89917780061665, "grad_norm": 0.037290554493665695, "learning_rate": 0.01, "loss": 1.8963, "step": 115779 }, { "epoch": 11.899486125385407, "grad_norm": 0.04267965257167816, "learning_rate": 0.01, "loss": 1.9215, "step": 115782 }, { "epoch": 11.899794450154163, "grad_norm": 0.0385502353310585, "learning_rate": 0.01, "loss": 1.9267, "step": 115785 }, { "epoch": 11.90010277492292, "grad_norm": 0.07459340244531631, "learning_rate": 0.01, "loss": 1.9271, "step": 115788 }, { "epoch": 11.900411099691675, "grad_norm": 0.07064307481050491, "learning_rate": 0.01, "loss": 1.9322, "step": 115791 }, { "epoch": 11.900719424460432, "grad_norm": 0.08032072335481644, "learning_rate": 0.01, "loss": 1.9283, "step": 115794 }, { "epoch": 11.901027749229188, "grad_norm": 0.04520730301737785, "learning_rate": 0.01, "loss": 1.9263, "step": 115797 }, { "epoch": 11.901336073997944, "grad_norm": 0.08594592660665512, "learning_rate": 0.01, "loss": 1.9367, "step": 115800 }, { "epoch": 11.9016443987667, "grad_norm": 0.042838793247938156, "learning_rate": 0.01, "loss": 1.9458, "step": 115803 }, { "epoch": 11.901952723535457, "grad_norm": 0.03810225799679756, "learning_rate": 0.01, "loss": 1.9448, "step": 115806 }, { "epoch": 11.902261048304213, "grad_norm": 0.06649664044380188, "learning_rate": 0.01, "loss": 1.9036, "step": 115809 }, { "epoch": 11.90256937307297, "grad_norm": 0.08439337462186813, "learning_rate": 0.01, "loss": 1.9178, "step": 115812 }, { "epoch": 11.902877697841726, "grad_norm": 0.10588230937719345, "learning_rate": 0.01, "loss": 1.9023, "step": 115815 }, { "epoch": 11.903186022610484, "grad_norm": 0.15726368129253387, "learning_rate": 0.01, "loss": 1.9447, "step": 115818 }, { "epoch": 11.90349434737924, "grad_norm": 0.09451795369386673, "learning_rate": 0.01, "loss": 1.9414, "step": 115821 }, { "epoch": 11.903802672147997, "grad_norm": 0.048060912638902664, "learning_rate": 0.01, "loss": 1.9362, "step": 115824 }, { "epoch": 11.904110996916753, "grad_norm": 0.03914719447493553, "learning_rate": 0.01, "loss": 1.9478, "step": 115827 }, { "epoch": 11.90441932168551, "grad_norm": 0.07605911046266556, "learning_rate": 0.01, "loss": 1.9464, "step": 115830 }, { "epoch": 11.904727646454266, "grad_norm": 0.029742151498794556, "learning_rate": 0.01, "loss": 1.9127, "step": 115833 }, { "epoch": 11.905035971223022, "grad_norm": 0.11301050335168839, "learning_rate": 0.01, "loss": 1.9496, "step": 115836 }, { "epoch": 11.905344295991778, "grad_norm": 0.08454637229442596, "learning_rate": 0.01, "loss": 1.9241, "step": 115839 }, { "epoch": 11.905652620760534, "grad_norm": 0.0823889970779419, "learning_rate": 0.01, "loss": 1.9377, "step": 115842 }, { "epoch": 11.90596094552929, "grad_norm": 0.0720774456858635, "learning_rate": 0.01, "loss": 1.9411, "step": 115845 }, { "epoch": 11.906269270298047, "grad_norm": 0.05574191361665726, "learning_rate": 0.01, "loss": 1.9395, "step": 115848 }, { "epoch": 11.906577595066803, "grad_norm": 0.06776542216539383, "learning_rate": 0.01, "loss": 1.9511, "step": 115851 }, { "epoch": 11.90688591983556, "grad_norm": 0.04499393701553345, "learning_rate": 0.01, "loss": 1.9386, "step": 115854 }, { "epoch": 11.907194244604316, "grad_norm": 0.030573226511478424, "learning_rate": 0.01, "loss": 1.9454, "step": 115857 }, { "epoch": 11.907502569373072, "grad_norm": 0.05911063402891159, "learning_rate": 0.01, "loss": 1.9611, "step": 115860 }, { "epoch": 11.907810894141829, "grad_norm": 0.04587172716856003, "learning_rate": 0.01, "loss": 1.9534, "step": 115863 }, { "epoch": 11.908119218910585, "grad_norm": 0.08495959639549255, "learning_rate": 0.01, "loss": 1.9284, "step": 115866 }, { "epoch": 11.908427543679343, "grad_norm": 0.11106112599372864, "learning_rate": 0.01, "loss": 1.9505, "step": 115869 }, { "epoch": 11.9087358684481, "grad_norm": 0.04350137710571289, "learning_rate": 0.01, "loss": 1.9148, "step": 115872 }, { "epoch": 11.909044193216856, "grad_norm": 0.12991520762443542, "learning_rate": 0.01, "loss": 1.9464, "step": 115875 }, { "epoch": 11.909352517985612, "grad_norm": 0.0833691954612732, "learning_rate": 0.01, "loss": 1.9404, "step": 115878 }, { "epoch": 11.909660842754368, "grad_norm": 0.04312315955758095, "learning_rate": 0.01, "loss": 1.9256, "step": 115881 }, { "epoch": 11.909969167523125, "grad_norm": 0.04676743596792221, "learning_rate": 0.01, "loss": 1.9579, "step": 115884 }, { "epoch": 11.91027749229188, "grad_norm": 0.0459318682551384, "learning_rate": 0.01, "loss": 1.9216, "step": 115887 }, { "epoch": 11.910585817060637, "grad_norm": 0.03991396725177765, "learning_rate": 0.01, "loss": 1.9608, "step": 115890 }, { "epoch": 11.910894141829393, "grad_norm": 0.05674207583069801, "learning_rate": 0.01, "loss": 1.9563, "step": 115893 }, { "epoch": 11.91120246659815, "grad_norm": 0.08130472898483276, "learning_rate": 0.01, "loss": 1.9553, "step": 115896 }, { "epoch": 11.911510791366906, "grad_norm": 0.14991524815559387, "learning_rate": 0.01, "loss": 1.9309, "step": 115899 }, { "epoch": 11.911819116135662, "grad_norm": 0.06426069140434265, "learning_rate": 0.01, "loss": 1.9608, "step": 115902 }, { "epoch": 11.912127440904419, "grad_norm": 0.046491220593452454, "learning_rate": 0.01, "loss": 1.9413, "step": 115905 }, { "epoch": 11.912435765673175, "grad_norm": 0.04492336884140968, "learning_rate": 0.01, "loss": 1.9219, "step": 115908 }, { "epoch": 11.912744090441933, "grad_norm": 0.04138762131333351, "learning_rate": 0.01, "loss": 1.9651, "step": 115911 }, { "epoch": 11.91305241521069, "grad_norm": 0.07580280303955078, "learning_rate": 0.01, "loss": 1.9298, "step": 115914 }, { "epoch": 11.913360739979446, "grad_norm": 0.09959438443183899, "learning_rate": 0.01, "loss": 1.9487, "step": 115917 }, { "epoch": 11.913669064748202, "grad_norm": 0.035286176949739456, "learning_rate": 0.01, "loss": 1.9391, "step": 115920 }, { "epoch": 11.913977389516958, "grad_norm": 0.089673712849617, "learning_rate": 0.01, "loss": 1.9216, "step": 115923 }, { "epoch": 11.914285714285715, "grad_norm": 0.09498865902423859, "learning_rate": 0.01, "loss": 1.935, "step": 115926 }, { "epoch": 11.914594039054471, "grad_norm": 0.07095696777105331, "learning_rate": 0.01, "loss": 1.9113, "step": 115929 }, { "epoch": 11.914902363823227, "grad_norm": 0.11000113189220428, "learning_rate": 0.01, "loss": 1.9485, "step": 115932 }, { "epoch": 11.915210688591984, "grad_norm": 0.049887191504240036, "learning_rate": 0.01, "loss": 1.9484, "step": 115935 }, { "epoch": 11.91551901336074, "grad_norm": 0.03578448295593262, "learning_rate": 0.01, "loss": 1.9233, "step": 115938 }, { "epoch": 11.915827338129496, "grad_norm": 0.03840559720993042, "learning_rate": 0.01, "loss": 1.9403, "step": 115941 }, { "epoch": 11.916135662898252, "grad_norm": 0.08180845528841019, "learning_rate": 0.01, "loss": 1.9774, "step": 115944 }, { "epoch": 11.916443987667009, "grad_norm": 0.08446424454450607, "learning_rate": 0.01, "loss": 1.9304, "step": 115947 }, { "epoch": 11.916752312435765, "grad_norm": 0.06296598166227341, "learning_rate": 0.01, "loss": 1.9595, "step": 115950 }, { "epoch": 11.917060637204521, "grad_norm": 0.09014962613582611, "learning_rate": 0.01, "loss": 1.93, "step": 115953 }, { "epoch": 11.917368961973278, "grad_norm": 0.060182347893714905, "learning_rate": 0.01, "loss": 1.9536, "step": 115956 }, { "epoch": 11.917677286742036, "grad_norm": 0.07497214525938034, "learning_rate": 0.01, "loss": 1.9089, "step": 115959 }, { "epoch": 11.917985611510792, "grad_norm": 0.0734238550066948, "learning_rate": 0.01, "loss": 1.9407, "step": 115962 }, { "epoch": 11.918293936279548, "grad_norm": 0.09660092741250992, "learning_rate": 0.01, "loss": 1.9204, "step": 115965 }, { "epoch": 11.918602261048305, "grad_norm": 0.12239353358745575, "learning_rate": 0.01, "loss": 1.9562, "step": 115968 }, { "epoch": 11.918910585817061, "grad_norm": 0.09368391335010529, "learning_rate": 0.01, "loss": 1.9393, "step": 115971 }, { "epoch": 11.919218910585817, "grad_norm": 0.07862232625484467, "learning_rate": 0.01, "loss": 1.9181, "step": 115974 }, { "epoch": 11.919527235354574, "grad_norm": 0.034651078283786774, "learning_rate": 0.01, "loss": 1.912, "step": 115977 }, { "epoch": 11.91983556012333, "grad_norm": 0.12038659304380417, "learning_rate": 0.01, "loss": 1.9564, "step": 115980 }, { "epoch": 11.920143884892086, "grad_norm": 0.033229418098926544, "learning_rate": 0.01, "loss": 1.9404, "step": 115983 }, { "epoch": 11.920452209660843, "grad_norm": 0.07518643885850906, "learning_rate": 0.01, "loss": 1.9371, "step": 115986 }, { "epoch": 11.920760534429599, "grad_norm": 0.08469729870557785, "learning_rate": 0.01, "loss": 1.9284, "step": 115989 }, { "epoch": 11.921068859198355, "grad_norm": 0.051580049097537994, "learning_rate": 0.01, "loss": 1.9485, "step": 115992 }, { "epoch": 11.921377183967111, "grad_norm": 0.11890963464975357, "learning_rate": 0.01, "loss": 1.9642, "step": 115995 }, { "epoch": 11.921685508735868, "grad_norm": 0.04845276474952698, "learning_rate": 0.01, "loss": 1.9252, "step": 115998 }, { "epoch": 11.921993833504624, "grad_norm": 0.0875236839056015, "learning_rate": 0.01, "loss": 1.9428, "step": 116001 }, { "epoch": 11.922302158273382, "grad_norm": 0.07505252212285995, "learning_rate": 0.01, "loss": 1.955, "step": 116004 }, { "epoch": 11.922610483042138, "grad_norm": 0.06646228581666946, "learning_rate": 0.01, "loss": 1.9323, "step": 116007 }, { "epoch": 11.922918807810895, "grad_norm": 0.08600006252527237, "learning_rate": 0.01, "loss": 1.9397, "step": 116010 }, { "epoch": 11.923227132579651, "grad_norm": 0.11209333688020706, "learning_rate": 0.01, "loss": 1.9384, "step": 116013 }, { "epoch": 11.923535457348407, "grad_norm": 0.10353491455316544, "learning_rate": 0.01, "loss": 1.9605, "step": 116016 }, { "epoch": 11.923843782117164, "grad_norm": 0.14115461707115173, "learning_rate": 0.01, "loss": 1.9436, "step": 116019 }, { "epoch": 11.92415210688592, "grad_norm": 0.044179707765579224, "learning_rate": 0.01, "loss": 1.9494, "step": 116022 }, { "epoch": 11.924460431654676, "grad_norm": 0.03707960247993469, "learning_rate": 0.01, "loss": 1.922, "step": 116025 }, { "epoch": 11.924768756423433, "grad_norm": 0.04922756925225258, "learning_rate": 0.01, "loss": 1.9224, "step": 116028 }, { "epoch": 11.925077081192189, "grad_norm": 0.06139388680458069, "learning_rate": 0.01, "loss": 1.9419, "step": 116031 }, { "epoch": 11.925385405960945, "grad_norm": 0.09925854951143265, "learning_rate": 0.01, "loss": 1.9004, "step": 116034 }, { "epoch": 11.925693730729702, "grad_norm": 0.08050132542848587, "learning_rate": 0.01, "loss": 1.9471, "step": 116037 }, { "epoch": 11.926002055498458, "grad_norm": 0.08102965354919434, "learning_rate": 0.01, "loss": 1.9383, "step": 116040 }, { "epoch": 11.926310380267214, "grad_norm": 0.11337810754776001, "learning_rate": 0.01, "loss": 1.937, "step": 116043 }, { "epoch": 11.92661870503597, "grad_norm": 0.05181996151804924, "learning_rate": 0.01, "loss": 1.9354, "step": 116046 }, { "epoch": 11.926927029804727, "grad_norm": 0.07451339066028595, "learning_rate": 0.01, "loss": 1.9442, "step": 116049 }, { "epoch": 11.927235354573485, "grad_norm": 0.04624432325363159, "learning_rate": 0.01, "loss": 1.9311, "step": 116052 }, { "epoch": 11.927543679342241, "grad_norm": 0.1001310721039772, "learning_rate": 0.01, "loss": 1.9241, "step": 116055 }, { "epoch": 11.927852004110997, "grad_norm": 0.05606957897543907, "learning_rate": 0.01, "loss": 1.9314, "step": 116058 }, { "epoch": 11.928160328879754, "grad_norm": 0.08710836619138718, "learning_rate": 0.01, "loss": 1.9683, "step": 116061 }, { "epoch": 11.92846865364851, "grad_norm": 0.05734976381063461, "learning_rate": 0.01, "loss": 1.932, "step": 116064 }, { "epoch": 11.928776978417266, "grad_norm": 0.06889203935861588, "learning_rate": 0.01, "loss": 1.9388, "step": 116067 }, { "epoch": 11.929085303186023, "grad_norm": 0.07249484211206436, "learning_rate": 0.01, "loss": 1.9341, "step": 116070 }, { "epoch": 11.929393627954779, "grad_norm": 0.038481082767248154, "learning_rate": 0.01, "loss": 1.9396, "step": 116073 }, { "epoch": 11.929701952723535, "grad_norm": 0.040560442954301834, "learning_rate": 0.01, "loss": 1.9164, "step": 116076 }, { "epoch": 11.930010277492292, "grad_norm": 0.04894191026687622, "learning_rate": 0.01, "loss": 1.9452, "step": 116079 }, { "epoch": 11.930318602261048, "grad_norm": 0.05847233161330223, "learning_rate": 0.01, "loss": 1.9112, "step": 116082 }, { "epoch": 11.930626927029804, "grad_norm": 0.05613159015774727, "learning_rate": 0.01, "loss": 1.9236, "step": 116085 }, { "epoch": 11.93093525179856, "grad_norm": 0.03757588565349579, "learning_rate": 0.01, "loss": 1.9526, "step": 116088 }, { "epoch": 11.931243576567317, "grad_norm": 0.11519763618707657, "learning_rate": 0.01, "loss": 1.9571, "step": 116091 }, { "epoch": 11.931551901336075, "grad_norm": 0.08751515299081802, "learning_rate": 0.01, "loss": 1.9018, "step": 116094 }, { "epoch": 11.931860226104831, "grad_norm": 0.14398428797721863, "learning_rate": 0.01, "loss": 1.9153, "step": 116097 }, { "epoch": 11.932168550873588, "grad_norm": 0.18179289996623993, "learning_rate": 0.01, "loss": 1.9604, "step": 116100 }, { "epoch": 11.932476875642344, "grad_norm": 0.13266566395759583, "learning_rate": 0.01, "loss": 1.9321, "step": 116103 }, { "epoch": 11.9327852004111, "grad_norm": 0.061238087713718414, "learning_rate": 0.01, "loss": 1.9263, "step": 116106 }, { "epoch": 11.933093525179856, "grad_norm": 0.04893723875284195, "learning_rate": 0.01, "loss": 1.9392, "step": 116109 }, { "epoch": 11.933401849948613, "grad_norm": 0.04873514920473099, "learning_rate": 0.01, "loss": 1.9208, "step": 116112 }, { "epoch": 11.933710174717369, "grad_norm": 0.07248979061841965, "learning_rate": 0.01, "loss": 1.9332, "step": 116115 }, { "epoch": 11.934018499486125, "grad_norm": 0.04428212717175484, "learning_rate": 0.01, "loss": 1.9619, "step": 116118 }, { "epoch": 11.934326824254882, "grad_norm": 0.11622639745473862, "learning_rate": 0.01, "loss": 1.9508, "step": 116121 }, { "epoch": 11.934635149023638, "grad_norm": 0.10958331823348999, "learning_rate": 0.01, "loss": 1.9419, "step": 116124 }, { "epoch": 11.934943473792394, "grad_norm": 0.11636814475059509, "learning_rate": 0.01, "loss": 1.9347, "step": 116127 }, { "epoch": 11.93525179856115, "grad_norm": 0.04917363077402115, "learning_rate": 0.01, "loss": 1.9271, "step": 116130 }, { "epoch": 11.935560123329907, "grad_norm": 0.04304635524749756, "learning_rate": 0.01, "loss": 1.9429, "step": 116133 }, { "epoch": 11.935868448098663, "grad_norm": 0.0436469204723835, "learning_rate": 0.01, "loss": 1.9488, "step": 116136 }, { "epoch": 11.93617677286742, "grad_norm": 0.06456790119409561, "learning_rate": 0.01, "loss": 1.9381, "step": 116139 }, { "epoch": 11.936485097636178, "grad_norm": 0.09960170090198517, "learning_rate": 0.01, "loss": 1.9343, "step": 116142 }, { "epoch": 11.936793422404934, "grad_norm": 0.12316549569368362, "learning_rate": 0.01, "loss": 1.9425, "step": 116145 }, { "epoch": 11.93710174717369, "grad_norm": 0.044478122144937515, "learning_rate": 0.01, "loss": 1.9541, "step": 116148 }, { "epoch": 11.937410071942447, "grad_norm": 0.048924773931503296, "learning_rate": 0.01, "loss": 1.9373, "step": 116151 }, { "epoch": 11.937718396711203, "grad_norm": 0.04412318766117096, "learning_rate": 0.01, "loss": 1.9407, "step": 116154 }, { "epoch": 11.93802672147996, "grad_norm": 0.05070868134498596, "learning_rate": 0.01, "loss": 1.9318, "step": 116157 }, { "epoch": 11.938335046248715, "grad_norm": 0.06851394474506378, "learning_rate": 0.01, "loss": 1.9193, "step": 116160 }, { "epoch": 11.938643371017472, "grad_norm": 0.0725923627614975, "learning_rate": 0.01, "loss": 1.9658, "step": 116163 }, { "epoch": 11.938951695786228, "grad_norm": 0.09985040128231049, "learning_rate": 0.01, "loss": 1.9579, "step": 116166 }, { "epoch": 11.939260020554984, "grad_norm": 0.09955912828445435, "learning_rate": 0.01, "loss": 1.9228, "step": 116169 }, { "epoch": 11.93956834532374, "grad_norm": 0.0586288683116436, "learning_rate": 0.01, "loss": 1.9384, "step": 116172 }, { "epoch": 11.939876670092497, "grad_norm": 0.09830272942781448, "learning_rate": 0.01, "loss": 1.945, "step": 116175 }, { "epoch": 11.940184994861253, "grad_norm": 0.08258133381605148, "learning_rate": 0.01, "loss": 1.9279, "step": 116178 }, { "epoch": 11.94049331963001, "grad_norm": 0.036119941622018814, "learning_rate": 0.01, "loss": 1.9358, "step": 116181 }, { "epoch": 11.940801644398766, "grad_norm": 0.059807684272527695, "learning_rate": 0.01, "loss": 1.9408, "step": 116184 }, { "epoch": 11.941109969167524, "grad_norm": 0.06625748425722122, "learning_rate": 0.01, "loss": 1.9298, "step": 116187 }, { "epoch": 11.94141829393628, "grad_norm": 0.057437337934970856, "learning_rate": 0.01, "loss": 1.9438, "step": 116190 }, { "epoch": 11.941726618705037, "grad_norm": 0.03686125576496124, "learning_rate": 0.01, "loss": 1.9352, "step": 116193 }, { "epoch": 11.942034943473793, "grad_norm": 0.050753671675920486, "learning_rate": 0.01, "loss": 1.9309, "step": 116196 }, { "epoch": 11.94234326824255, "grad_norm": 0.05824119970202446, "learning_rate": 0.01, "loss": 1.9438, "step": 116199 }, { "epoch": 11.942651593011306, "grad_norm": 0.10610711574554443, "learning_rate": 0.01, "loss": 1.9201, "step": 116202 }, { "epoch": 11.942959917780062, "grad_norm": 0.04334506765007973, "learning_rate": 0.01, "loss": 1.9451, "step": 116205 }, { "epoch": 11.943268242548818, "grad_norm": 0.04354944825172424, "learning_rate": 0.01, "loss": 1.9526, "step": 116208 }, { "epoch": 11.943576567317574, "grad_norm": 0.046755727380514145, "learning_rate": 0.01, "loss": 1.9409, "step": 116211 }, { "epoch": 11.94388489208633, "grad_norm": 0.06991719454526901, "learning_rate": 0.01, "loss": 1.9485, "step": 116214 }, { "epoch": 11.944193216855087, "grad_norm": 0.06674511730670929, "learning_rate": 0.01, "loss": 1.9384, "step": 116217 }, { "epoch": 11.944501541623843, "grad_norm": 0.05555736646056175, "learning_rate": 0.01, "loss": 1.9343, "step": 116220 }, { "epoch": 11.9448098663926, "grad_norm": 0.05003273859620094, "learning_rate": 0.01, "loss": 1.9303, "step": 116223 }, { "epoch": 11.945118191161356, "grad_norm": 0.13730725646018982, "learning_rate": 0.01, "loss": 1.9565, "step": 116226 }, { "epoch": 11.945426515930112, "grad_norm": 0.051087841391563416, "learning_rate": 0.01, "loss": 1.9096, "step": 116229 }, { "epoch": 11.945734840698869, "grad_norm": 0.07299045473337173, "learning_rate": 0.01, "loss": 1.9167, "step": 116232 }, { "epoch": 11.946043165467627, "grad_norm": 0.03582729771733284, "learning_rate": 0.01, "loss": 1.9425, "step": 116235 }, { "epoch": 11.946351490236383, "grad_norm": 0.05510307103395462, "learning_rate": 0.01, "loss": 1.9403, "step": 116238 }, { "epoch": 11.94665981500514, "grad_norm": 0.07253251224756241, "learning_rate": 0.01, "loss": 1.9424, "step": 116241 }, { "epoch": 11.946968139773896, "grad_norm": 0.08624812960624695, "learning_rate": 0.01, "loss": 1.9468, "step": 116244 }, { "epoch": 11.947276464542652, "grad_norm": 0.05577114224433899, "learning_rate": 0.01, "loss": 1.8999, "step": 116247 }, { "epoch": 11.947584789311408, "grad_norm": 0.044872984290122986, "learning_rate": 0.01, "loss": 1.9364, "step": 116250 }, { "epoch": 11.947893114080165, "grad_norm": 0.04620710387825966, "learning_rate": 0.01, "loss": 1.9277, "step": 116253 }, { "epoch": 11.94820143884892, "grad_norm": 0.08223326504230499, "learning_rate": 0.01, "loss": 1.9664, "step": 116256 }, { "epoch": 11.948509763617677, "grad_norm": 0.06317196786403656, "learning_rate": 0.01, "loss": 1.9607, "step": 116259 }, { "epoch": 11.948818088386433, "grad_norm": 0.07849643379449844, "learning_rate": 0.01, "loss": 1.9319, "step": 116262 }, { "epoch": 11.94912641315519, "grad_norm": 0.12053529173135757, "learning_rate": 0.01, "loss": 1.9288, "step": 116265 }, { "epoch": 11.949434737923946, "grad_norm": 0.13052164018154144, "learning_rate": 0.01, "loss": 1.9236, "step": 116268 }, { "epoch": 11.949743062692702, "grad_norm": 0.046053748577833176, "learning_rate": 0.01, "loss": 1.9637, "step": 116271 }, { "epoch": 11.950051387461459, "grad_norm": 0.04677347093820572, "learning_rate": 0.01, "loss": 1.9491, "step": 116274 }, { "epoch": 11.950359712230217, "grad_norm": 0.04560299590229988, "learning_rate": 0.01, "loss": 1.9126, "step": 116277 }, { "epoch": 11.950668036998973, "grad_norm": 0.037877749651670456, "learning_rate": 0.01, "loss": 1.9472, "step": 116280 }, { "epoch": 11.95097636176773, "grad_norm": 0.04126598313450813, "learning_rate": 0.01, "loss": 1.9358, "step": 116283 }, { "epoch": 11.951284686536486, "grad_norm": 0.038106292486190796, "learning_rate": 0.01, "loss": 1.9381, "step": 116286 }, { "epoch": 11.951593011305242, "grad_norm": 0.07549608498811722, "learning_rate": 0.01, "loss": 1.9218, "step": 116289 }, { "epoch": 11.951901336073998, "grad_norm": 0.09480550140142441, "learning_rate": 0.01, "loss": 1.926, "step": 116292 }, { "epoch": 11.952209660842755, "grad_norm": 0.051869966089725494, "learning_rate": 0.01, "loss": 1.9564, "step": 116295 }, { "epoch": 11.952517985611511, "grad_norm": 0.055569011718034744, "learning_rate": 0.01, "loss": 1.9189, "step": 116298 }, { "epoch": 11.952826310380267, "grad_norm": 0.04779817909002304, "learning_rate": 0.01, "loss": 1.9322, "step": 116301 }, { "epoch": 11.953134635149024, "grad_norm": 0.05312659218907356, "learning_rate": 0.01, "loss": 1.9355, "step": 116304 }, { "epoch": 11.95344295991778, "grad_norm": 0.05799674987792969, "learning_rate": 0.01, "loss": 1.9436, "step": 116307 }, { "epoch": 11.953751284686536, "grad_norm": 0.047829464077949524, "learning_rate": 0.01, "loss": 1.9385, "step": 116310 }, { "epoch": 11.954059609455292, "grad_norm": 0.08063242584466934, "learning_rate": 0.01, "loss": 1.9359, "step": 116313 }, { "epoch": 11.954367934224049, "grad_norm": 0.13868576288223267, "learning_rate": 0.01, "loss": 1.9268, "step": 116316 }, { "epoch": 11.954676258992805, "grad_norm": 0.06510185450315475, "learning_rate": 0.01, "loss": 1.9148, "step": 116319 }, { "epoch": 11.954984583761561, "grad_norm": 0.05362662300467491, "learning_rate": 0.01, "loss": 1.9297, "step": 116322 }, { "epoch": 11.955292908530318, "grad_norm": 0.036775220185518265, "learning_rate": 0.01, "loss": 1.9379, "step": 116325 }, { "epoch": 11.955601233299076, "grad_norm": 0.04017291218042374, "learning_rate": 0.01, "loss": 1.9475, "step": 116328 }, { "epoch": 11.955909558067832, "grad_norm": 0.033246539533138275, "learning_rate": 0.01, "loss": 1.9381, "step": 116331 }, { "epoch": 11.956217882836588, "grad_norm": 0.054019495844841, "learning_rate": 0.01, "loss": 1.9389, "step": 116334 }, { "epoch": 11.956526207605345, "grad_norm": 0.05341818928718567, "learning_rate": 0.01, "loss": 1.9489, "step": 116337 }, { "epoch": 11.956834532374101, "grad_norm": 0.06354212015867233, "learning_rate": 0.01, "loss": 1.9464, "step": 116340 }, { "epoch": 11.957142857142857, "grad_norm": 0.11799897998571396, "learning_rate": 0.01, "loss": 1.9275, "step": 116343 }, { "epoch": 11.957451181911614, "grad_norm": 0.06504609435796738, "learning_rate": 0.01, "loss": 1.9458, "step": 116346 }, { "epoch": 11.95775950668037, "grad_norm": 0.05881750211119652, "learning_rate": 0.01, "loss": 1.9218, "step": 116349 }, { "epoch": 11.958067831449126, "grad_norm": 0.03419358655810356, "learning_rate": 0.01, "loss": 1.954, "step": 116352 }, { "epoch": 11.958376156217883, "grad_norm": 0.10960660874843597, "learning_rate": 0.01, "loss": 1.9405, "step": 116355 }, { "epoch": 11.958684480986639, "grad_norm": 0.0514020100235939, "learning_rate": 0.01, "loss": 1.9433, "step": 116358 }, { "epoch": 11.958992805755395, "grad_norm": 0.11828702688217163, "learning_rate": 0.01, "loss": 1.9208, "step": 116361 }, { "epoch": 11.959301130524151, "grad_norm": 0.05309384688735008, "learning_rate": 0.01, "loss": 1.9301, "step": 116364 }, { "epoch": 11.959609455292908, "grad_norm": 0.05590314790606499, "learning_rate": 0.01, "loss": 1.9409, "step": 116367 }, { "epoch": 11.959917780061666, "grad_norm": 0.04192624241113663, "learning_rate": 0.01, "loss": 1.9336, "step": 116370 }, { "epoch": 11.960226104830422, "grad_norm": 0.02918892726302147, "learning_rate": 0.01, "loss": 1.9503, "step": 116373 }, { "epoch": 11.960534429599178, "grad_norm": 0.09773929417133331, "learning_rate": 0.01, "loss": 1.9572, "step": 116376 }, { "epoch": 11.960842754367935, "grad_norm": 0.09480267763137817, "learning_rate": 0.01, "loss": 1.9227, "step": 116379 }, { "epoch": 11.961151079136691, "grad_norm": 0.08526931703090668, "learning_rate": 0.01, "loss": 1.9332, "step": 116382 }, { "epoch": 11.961459403905447, "grad_norm": 0.08976579457521439, "learning_rate": 0.01, "loss": 1.9381, "step": 116385 }, { "epoch": 11.961767728674204, "grad_norm": 0.13688208162784576, "learning_rate": 0.01, "loss": 1.9683, "step": 116388 }, { "epoch": 11.96207605344296, "grad_norm": 0.03766091540455818, "learning_rate": 0.01, "loss": 1.9242, "step": 116391 }, { "epoch": 11.962384378211716, "grad_norm": 0.047475963830947876, "learning_rate": 0.01, "loss": 1.9529, "step": 116394 }, { "epoch": 11.962692702980473, "grad_norm": 0.03686054050922394, "learning_rate": 0.01, "loss": 1.942, "step": 116397 }, { "epoch": 11.963001027749229, "grad_norm": 0.10608810186386108, "learning_rate": 0.01, "loss": 1.9137, "step": 116400 }, { "epoch": 11.963309352517985, "grad_norm": 0.04265899956226349, "learning_rate": 0.01, "loss": 1.9394, "step": 116403 }, { "epoch": 11.963617677286742, "grad_norm": 0.04492899030447006, "learning_rate": 0.01, "loss": 1.9031, "step": 116406 }, { "epoch": 11.963926002055498, "grad_norm": 0.039105210453271866, "learning_rate": 0.01, "loss": 1.9126, "step": 116409 }, { "epoch": 11.964234326824254, "grad_norm": 0.036893412470817566, "learning_rate": 0.01, "loss": 1.9943, "step": 116412 }, { "epoch": 11.96454265159301, "grad_norm": 0.10075349360704422, "learning_rate": 0.01, "loss": 1.9415, "step": 116415 }, { "epoch": 11.964850976361769, "grad_norm": 0.053778357803821564, "learning_rate": 0.01, "loss": 1.9147, "step": 116418 }, { "epoch": 11.965159301130525, "grad_norm": 0.07513388991355896, "learning_rate": 0.01, "loss": 1.9314, "step": 116421 }, { "epoch": 11.965467625899281, "grad_norm": 0.12376239150762558, "learning_rate": 0.01, "loss": 1.9398, "step": 116424 }, { "epoch": 11.965775950668037, "grad_norm": 0.10657428950071335, "learning_rate": 0.01, "loss": 1.9381, "step": 116427 }, { "epoch": 11.966084275436794, "grad_norm": 0.038908544927835464, "learning_rate": 0.01, "loss": 1.9422, "step": 116430 }, { "epoch": 11.96639260020555, "grad_norm": 0.062225233763456345, "learning_rate": 0.01, "loss": 1.9228, "step": 116433 }, { "epoch": 11.966700924974306, "grad_norm": 0.042596325278282166, "learning_rate": 0.01, "loss": 1.9097, "step": 116436 }, { "epoch": 11.967009249743063, "grad_norm": 0.05816507712006569, "learning_rate": 0.01, "loss": 1.9321, "step": 116439 }, { "epoch": 11.967317574511819, "grad_norm": 0.05269140750169754, "learning_rate": 0.01, "loss": 1.9502, "step": 116442 }, { "epoch": 11.967625899280575, "grad_norm": 0.04918801039457321, "learning_rate": 0.01, "loss": 1.9403, "step": 116445 }, { "epoch": 11.967934224049332, "grad_norm": 0.03559085726737976, "learning_rate": 0.01, "loss": 1.9186, "step": 116448 }, { "epoch": 11.968242548818088, "grad_norm": 0.06931116431951523, "learning_rate": 0.01, "loss": 1.9442, "step": 116451 }, { "epoch": 11.968550873586844, "grad_norm": 0.09284847974777222, "learning_rate": 0.01, "loss": 1.9396, "step": 116454 }, { "epoch": 11.9688591983556, "grad_norm": 0.07549259811639786, "learning_rate": 0.01, "loss": 1.9104, "step": 116457 }, { "epoch": 11.969167523124357, "grad_norm": 0.043274421244859695, "learning_rate": 0.01, "loss": 1.949, "step": 116460 }, { "epoch": 11.969475847893115, "grad_norm": 0.0987868458032608, "learning_rate": 0.01, "loss": 1.926, "step": 116463 }, { "epoch": 11.969784172661871, "grad_norm": 0.0849565714597702, "learning_rate": 0.01, "loss": 1.9529, "step": 116466 }, { "epoch": 11.970092497430628, "grad_norm": 0.09053468704223633, "learning_rate": 0.01, "loss": 1.911, "step": 116469 }, { "epoch": 11.970400822199384, "grad_norm": 0.057200171053409576, "learning_rate": 0.01, "loss": 1.9333, "step": 116472 }, { "epoch": 11.97070914696814, "grad_norm": 0.10704628378152847, "learning_rate": 0.01, "loss": 1.9553, "step": 116475 }, { "epoch": 11.971017471736896, "grad_norm": 0.04893108084797859, "learning_rate": 0.01, "loss": 1.9402, "step": 116478 }, { "epoch": 11.971325796505653, "grad_norm": 0.1119956374168396, "learning_rate": 0.01, "loss": 1.9323, "step": 116481 }, { "epoch": 11.971634121274409, "grad_norm": 0.13471321761608124, "learning_rate": 0.01, "loss": 1.9487, "step": 116484 }, { "epoch": 11.971942446043165, "grad_norm": 0.039003241807222366, "learning_rate": 0.01, "loss": 1.9282, "step": 116487 }, { "epoch": 11.972250770811922, "grad_norm": 0.03899799659848213, "learning_rate": 0.01, "loss": 1.9397, "step": 116490 }, { "epoch": 11.972559095580678, "grad_norm": 0.04584067687392235, "learning_rate": 0.01, "loss": 1.9438, "step": 116493 }, { "epoch": 11.972867420349434, "grad_norm": 0.08454135060310364, "learning_rate": 0.01, "loss": 1.9427, "step": 116496 }, { "epoch": 11.97317574511819, "grad_norm": 0.08052065968513489, "learning_rate": 0.01, "loss": 1.9331, "step": 116499 }, { "epoch": 11.973484069886947, "grad_norm": 0.04771379008889198, "learning_rate": 0.01, "loss": 1.941, "step": 116502 }, { "epoch": 11.973792394655703, "grad_norm": 0.09802628308534622, "learning_rate": 0.01, "loss": 1.9474, "step": 116505 }, { "epoch": 11.97410071942446, "grad_norm": 0.0905313640832901, "learning_rate": 0.01, "loss": 1.9135, "step": 116508 }, { "epoch": 11.974409044193218, "grad_norm": 0.05095797777175903, "learning_rate": 0.01, "loss": 1.9204, "step": 116511 }, { "epoch": 11.974717368961974, "grad_norm": 0.0666579008102417, "learning_rate": 0.01, "loss": 1.948, "step": 116514 }, { "epoch": 11.97502569373073, "grad_norm": 0.08291205018758774, "learning_rate": 0.01, "loss": 1.9721, "step": 116517 }, { "epoch": 11.975334018499487, "grad_norm": 0.1152813658118248, "learning_rate": 0.01, "loss": 1.91, "step": 116520 }, { "epoch": 11.975642343268243, "grad_norm": 0.03910317271947861, "learning_rate": 0.01, "loss": 1.9176, "step": 116523 }, { "epoch": 11.975950668037, "grad_norm": 0.07153347879648209, "learning_rate": 0.01, "loss": 1.936, "step": 116526 }, { "epoch": 11.976258992805755, "grad_norm": 0.07718818634748459, "learning_rate": 0.01, "loss": 1.9141, "step": 116529 }, { "epoch": 11.976567317574512, "grad_norm": 0.08614334464073181, "learning_rate": 0.01, "loss": 1.9395, "step": 116532 }, { "epoch": 11.976875642343268, "grad_norm": 0.10888314247131348, "learning_rate": 0.01, "loss": 1.9491, "step": 116535 }, { "epoch": 11.977183967112024, "grad_norm": 0.045839011669158936, "learning_rate": 0.01, "loss": 1.9295, "step": 116538 }, { "epoch": 11.97749229188078, "grad_norm": 0.05105046182870865, "learning_rate": 0.01, "loss": 1.9181, "step": 116541 }, { "epoch": 11.977800616649537, "grad_norm": 0.06040375679731369, "learning_rate": 0.01, "loss": 1.9209, "step": 116544 }, { "epoch": 11.978108941418293, "grad_norm": 0.10920973122119904, "learning_rate": 0.01, "loss": 1.957, "step": 116547 }, { "epoch": 11.97841726618705, "grad_norm": 0.10301953554153442, "learning_rate": 0.01, "loss": 1.9336, "step": 116550 }, { "epoch": 11.978725590955808, "grad_norm": 0.08340315520763397, "learning_rate": 0.01, "loss": 1.9123, "step": 116553 }, { "epoch": 11.979033915724564, "grad_norm": 0.1261022686958313, "learning_rate": 0.01, "loss": 1.9514, "step": 116556 }, { "epoch": 11.97934224049332, "grad_norm": 0.0827362909913063, "learning_rate": 0.01, "loss": 1.9459, "step": 116559 }, { "epoch": 11.979650565262077, "grad_norm": 0.059684157371520996, "learning_rate": 0.01, "loss": 1.9423, "step": 116562 }, { "epoch": 11.979958890030833, "grad_norm": 0.06773815304040909, "learning_rate": 0.01, "loss": 1.9247, "step": 116565 }, { "epoch": 11.98026721479959, "grad_norm": 0.0493774376809597, "learning_rate": 0.01, "loss": 1.9279, "step": 116568 }, { "epoch": 11.980575539568346, "grad_norm": 0.06392575055360794, "learning_rate": 0.01, "loss": 1.9695, "step": 116571 }, { "epoch": 11.980883864337102, "grad_norm": 0.039536163210868835, "learning_rate": 0.01, "loss": 1.9299, "step": 116574 }, { "epoch": 11.981192189105858, "grad_norm": 0.038751810789108276, "learning_rate": 0.01, "loss": 1.9223, "step": 116577 }, { "epoch": 11.981500513874614, "grad_norm": 0.06577613949775696, "learning_rate": 0.01, "loss": 1.9255, "step": 116580 }, { "epoch": 11.98180883864337, "grad_norm": 0.16617251932621002, "learning_rate": 0.01, "loss": 1.9361, "step": 116583 }, { "epoch": 11.982117163412127, "grad_norm": 0.11082916706800461, "learning_rate": 0.01, "loss": 1.9614, "step": 116586 }, { "epoch": 11.982425488180883, "grad_norm": 0.09331133961677551, "learning_rate": 0.01, "loss": 1.9193, "step": 116589 }, { "epoch": 11.98273381294964, "grad_norm": 0.09698370099067688, "learning_rate": 0.01, "loss": 1.9393, "step": 116592 }, { "epoch": 11.983042137718396, "grad_norm": 0.04064074158668518, "learning_rate": 0.01, "loss": 1.963, "step": 116595 }, { "epoch": 11.983350462487152, "grad_norm": 0.05459297075867653, "learning_rate": 0.01, "loss": 1.9532, "step": 116598 }, { "epoch": 11.98365878725591, "grad_norm": 0.050961967557668686, "learning_rate": 0.01, "loss": 1.9213, "step": 116601 }, { "epoch": 11.983967112024667, "grad_norm": 0.047685325145721436, "learning_rate": 0.01, "loss": 1.9206, "step": 116604 }, { "epoch": 11.984275436793423, "grad_norm": 0.05707469582557678, "learning_rate": 0.01, "loss": 1.9274, "step": 116607 }, { "epoch": 11.98458376156218, "grad_norm": 0.08973026275634766, "learning_rate": 0.01, "loss": 1.9332, "step": 116610 }, { "epoch": 11.984892086330936, "grad_norm": 0.07212896645069122, "learning_rate": 0.01, "loss": 1.936, "step": 116613 }, { "epoch": 11.985200411099692, "grad_norm": 0.12053423374891281, "learning_rate": 0.01, "loss": 1.9031, "step": 116616 }, { "epoch": 11.985508735868448, "grad_norm": 0.07867543399333954, "learning_rate": 0.01, "loss": 1.983, "step": 116619 }, { "epoch": 11.985817060637205, "grad_norm": 0.061868395656347275, "learning_rate": 0.01, "loss": 1.948, "step": 116622 }, { "epoch": 11.98612538540596, "grad_norm": 0.06762991100549698, "learning_rate": 0.01, "loss": 1.9287, "step": 116625 }, { "epoch": 11.986433710174717, "grad_norm": 0.04585585370659828, "learning_rate": 0.01, "loss": 1.9422, "step": 116628 }, { "epoch": 11.986742034943473, "grad_norm": 0.11000943928956985, "learning_rate": 0.01, "loss": 1.9439, "step": 116631 }, { "epoch": 11.98705035971223, "grad_norm": 0.12769865989685059, "learning_rate": 0.01, "loss": 1.9548, "step": 116634 }, { "epoch": 11.987358684480986, "grad_norm": 0.0945596843957901, "learning_rate": 0.01, "loss": 1.9241, "step": 116637 }, { "epoch": 11.987667009249742, "grad_norm": 0.07920830696821213, "learning_rate": 0.01, "loss": 1.9406, "step": 116640 }, { "epoch": 11.987975334018499, "grad_norm": 0.05533134564757347, "learning_rate": 0.01, "loss": 1.9322, "step": 116643 }, { "epoch": 11.988283658787257, "grad_norm": 0.06737732887268066, "learning_rate": 0.01, "loss": 1.9154, "step": 116646 }, { "epoch": 11.988591983556013, "grad_norm": 0.06965016573667526, "learning_rate": 0.01, "loss": 1.9626, "step": 116649 }, { "epoch": 11.98890030832477, "grad_norm": 0.07038581371307373, "learning_rate": 0.01, "loss": 1.9225, "step": 116652 }, { "epoch": 11.989208633093526, "grad_norm": 0.0651620402932167, "learning_rate": 0.01, "loss": 1.9381, "step": 116655 }, { "epoch": 11.989516957862282, "grad_norm": 0.03648030012845993, "learning_rate": 0.01, "loss": 1.9457, "step": 116658 }, { "epoch": 11.989825282631038, "grad_norm": 0.035522736608982086, "learning_rate": 0.01, "loss": 1.9506, "step": 116661 }, { "epoch": 11.990133607399795, "grad_norm": 0.03931429609656334, "learning_rate": 0.01, "loss": 1.939, "step": 116664 }, { "epoch": 11.990441932168551, "grad_norm": 0.11045116931200027, "learning_rate": 0.01, "loss": 1.9569, "step": 116667 }, { "epoch": 11.990750256937307, "grad_norm": 0.11161495000123978, "learning_rate": 0.01, "loss": 1.9525, "step": 116670 }, { "epoch": 11.991058581706064, "grad_norm": 0.12464109063148499, "learning_rate": 0.01, "loss": 1.9388, "step": 116673 }, { "epoch": 11.99136690647482, "grad_norm": 0.05374126508831978, "learning_rate": 0.01, "loss": 1.9448, "step": 116676 }, { "epoch": 11.991675231243576, "grad_norm": 0.03395112603902817, "learning_rate": 0.01, "loss": 1.9528, "step": 116679 }, { "epoch": 11.991983556012332, "grad_norm": 0.045161593705415726, "learning_rate": 0.01, "loss": 1.9396, "step": 116682 }, { "epoch": 11.992291880781089, "grad_norm": 0.047702956944704056, "learning_rate": 0.01, "loss": 1.9151, "step": 116685 }, { "epoch": 11.992600205549845, "grad_norm": 0.04149683564901352, "learning_rate": 0.01, "loss": 1.9187, "step": 116688 }, { "epoch": 11.992908530318601, "grad_norm": 0.0429212860763073, "learning_rate": 0.01, "loss": 1.9387, "step": 116691 }, { "epoch": 11.99321685508736, "grad_norm": 0.05036528408527374, "learning_rate": 0.01, "loss": 1.9313, "step": 116694 }, { "epoch": 11.993525179856116, "grad_norm": 0.07086365669965744, "learning_rate": 0.01, "loss": 1.95, "step": 116697 }, { "epoch": 11.993833504624872, "grad_norm": 0.11894246190786362, "learning_rate": 0.01, "loss": 1.931, "step": 116700 }, { "epoch": 11.994141829393628, "grad_norm": 0.044312190264463425, "learning_rate": 0.01, "loss": 1.9302, "step": 116703 }, { "epoch": 11.994450154162385, "grad_norm": 0.04942063242197037, "learning_rate": 0.01, "loss": 1.9378, "step": 116706 }, { "epoch": 11.994758478931141, "grad_norm": 0.04379546642303467, "learning_rate": 0.01, "loss": 1.9436, "step": 116709 }, { "epoch": 11.995066803699897, "grad_norm": 0.057303521782159805, "learning_rate": 0.01, "loss": 1.9628, "step": 116712 }, { "epoch": 11.995375128468654, "grad_norm": 0.12316899001598358, "learning_rate": 0.01, "loss": 1.9313, "step": 116715 }, { "epoch": 11.99568345323741, "grad_norm": 0.0401909202337265, "learning_rate": 0.01, "loss": 1.942, "step": 116718 }, { "epoch": 11.995991778006166, "grad_norm": 0.06272529065608978, "learning_rate": 0.01, "loss": 1.9564, "step": 116721 }, { "epoch": 11.996300102774923, "grad_norm": 0.0627008005976677, "learning_rate": 0.01, "loss": 1.9323, "step": 116724 }, { "epoch": 11.996608427543679, "grad_norm": 0.041782572865486145, "learning_rate": 0.01, "loss": 1.9384, "step": 116727 }, { "epoch": 11.996916752312435, "grad_norm": 0.04066125303506851, "learning_rate": 0.01, "loss": 1.9358, "step": 116730 }, { "epoch": 11.997225077081191, "grad_norm": 0.06177583336830139, "learning_rate": 0.01, "loss": 1.9247, "step": 116733 }, { "epoch": 11.99753340184995, "grad_norm": 0.13602577149868011, "learning_rate": 0.01, "loss": 1.9239, "step": 116736 }, { "epoch": 11.997841726618706, "grad_norm": 0.043734993785619736, "learning_rate": 0.01, "loss": 1.94, "step": 116739 }, { "epoch": 11.998150051387462, "grad_norm": 0.04399963840842247, "learning_rate": 0.01, "loss": 1.9426, "step": 116742 }, { "epoch": 11.998458376156218, "grad_norm": 0.05504929646849632, "learning_rate": 0.01, "loss": 1.9363, "step": 116745 }, { "epoch": 11.998766700924975, "grad_norm": 0.1177234798669815, "learning_rate": 0.01, "loss": 1.9102, "step": 116748 }, { "epoch": 11.999075025693731, "grad_norm": 0.04470578953623772, "learning_rate": 0.01, "loss": 1.9435, "step": 116751 }, { "epoch": 11.999383350462487, "grad_norm": 0.05425754934549332, "learning_rate": 0.01, "loss": 1.9315, "step": 116754 }, { "epoch": 11.999691675231244, "grad_norm": 0.046526480466127396, "learning_rate": 0.01, "loss": 1.9403, "step": 116757 }, { "epoch": 12.0, "grad_norm": 0.043274134397506714, "learning_rate": 0.01, "loss": 1.9355, "step": 116760 }, { "epoch": 11.997842170160297, "grad_norm": 0.19715403020381927, "learning_rate": 0.01, "loss": 1.9332, "step": 116763 }, { "epoch": 11.998150431565968, "grad_norm": 0.07191906869411469, "learning_rate": 0.01, "loss": 1.9273, "step": 116766 }, { "epoch": 11.99845869297164, "grad_norm": 0.07266104221343994, "learning_rate": 0.01, "loss": 1.9407, "step": 116769 }, { "epoch": 11.998766954377311, "grad_norm": 0.04697905853390694, "learning_rate": 0.01, "loss": 1.9518, "step": 116772 }, { "epoch": 11.999075215782984, "grad_norm": 0.05928608775138855, "learning_rate": 0.01, "loss": 1.942, "step": 116775 }, { "epoch": 11.999383477188656, "grad_norm": 0.040938302874565125, "learning_rate": 0.01, "loss": 1.9411, "step": 116778 }, { "epoch": 11.999691738594327, "grad_norm": 0.04602961242198944, "learning_rate": 0.01, "loss": 1.9155, "step": 116781 }, { "epoch": 12.0, "grad_norm": 0.032084643840789795, "learning_rate": 0.01, "loss": 1.9398, "step": 116784 }, { "epoch": 12.000308261405673, "grad_norm": 0.06682760268449783, "learning_rate": 0.01, "loss": 1.96, "step": 116787 }, { "epoch": 12.000616522811344, "grad_norm": 0.06462869793176651, "learning_rate": 0.01, "loss": 1.9354, "step": 116790 }, { "epoch": 12.000924784217016, "grad_norm": 0.07240908592939377, "learning_rate": 0.01, "loss": 1.9473, "step": 116793 }, { "epoch": 12.001233045622689, "grad_norm": 0.04780181497335434, "learning_rate": 0.01, "loss": 1.9466, "step": 116796 }, { "epoch": 12.00154130702836, "grad_norm": 0.040893614292144775, "learning_rate": 0.01, "loss": 1.9241, "step": 116799 }, { "epoch": 12.001849568434032, "grad_norm": 0.05878365784883499, "learning_rate": 0.01, "loss": 1.9659, "step": 116802 }, { "epoch": 12.002157829839705, "grad_norm": 0.059600379317998886, "learning_rate": 0.01, "loss": 1.9314, "step": 116805 }, { "epoch": 12.002466091245376, "grad_norm": 0.053813789039850235, "learning_rate": 0.01, "loss": 1.917, "step": 116808 }, { "epoch": 12.002774352651048, "grad_norm": 0.05230226740241051, "learning_rate": 0.01, "loss": 1.9459, "step": 116811 }, { "epoch": 12.00308261405672, "grad_norm": 0.047241292893886566, "learning_rate": 0.01, "loss": 1.9382, "step": 116814 }, { "epoch": 12.003390875462392, "grad_norm": 0.11938584595918655, "learning_rate": 0.01, "loss": 1.9173, "step": 116817 }, { "epoch": 12.003699136868065, "grad_norm": 0.10174132138490677, "learning_rate": 0.01, "loss": 1.9407, "step": 116820 }, { "epoch": 12.004007398273735, "grad_norm": 0.061686307191848755, "learning_rate": 0.01, "loss": 1.9473, "step": 116823 }, { "epoch": 12.004315659679408, "grad_norm": 0.04009551927447319, "learning_rate": 0.01, "loss": 1.9342, "step": 116826 }, { "epoch": 12.00462392108508, "grad_norm": 0.04727621003985405, "learning_rate": 0.01, "loss": 1.9479, "step": 116829 }, { "epoch": 12.004932182490752, "grad_norm": 0.06283930689096451, "learning_rate": 0.01, "loss": 1.9215, "step": 116832 }, { "epoch": 12.005240443896424, "grad_norm": 0.035520389676094055, "learning_rate": 0.01, "loss": 1.9498, "step": 116835 }, { "epoch": 12.005548705302097, "grad_norm": 0.14637045562267303, "learning_rate": 0.01, "loss": 1.9493, "step": 116838 }, { "epoch": 12.005856966707768, "grad_norm": 0.04466203972697258, "learning_rate": 0.01, "loss": 1.958, "step": 116841 }, { "epoch": 12.00616522811344, "grad_norm": 0.0948067232966423, "learning_rate": 0.01, "loss": 1.9206, "step": 116844 }, { "epoch": 12.006473489519113, "grad_norm": 0.06225452572107315, "learning_rate": 0.01, "loss": 1.9414, "step": 116847 }, { "epoch": 12.006781750924784, "grad_norm": 0.041357167065143585, "learning_rate": 0.01, "loss": 1.9394, "step": 116850 }, { "epoch": 12.007090012330456, "grad_norm": 0.03680774196982384, "learning_rate": 0.01, "loss": 1.9411, "step": 116853 }, { "epoch": 12.00739827373613, "grad_norm": 0.22551889717578888, "learning_rate": 0.01, "loss": 1.9629, "step": 116856 }, { "epoch": 12.0077065351418, "grad_norm": 0.1491953581571579, "learning_rate": 0.01, "loss": 1.9452, "step": 116859 }, { "epoch": 12.008014796547473, "grad_norm": 0.11442308872938156, "learning_rate": 0.01, "loss": 1.8926, "step": 116862 }, { "epoch": 12.008323057953143, "grad_norm": 0.055177319794893265, "learning_rate": 0.01, "loss": 1.932, "step": 116865 }, { "epoch": 12.008631319358816, "grad_norm": 0.037357132881879807, "learning_rate": 0.01, "loss": 1.9413, "step": 116868 }, { "epoch": 12.008939580764489, "grad_norm": 0.08124551177024841, "learning_rate": 0.01, "loss": 1.971, "step": 116871 }, { "epoch": 12.00924784217016, "grad_norm": 0.06150367483496666, "learning_rate": 0.01, "loss": 1.9681, "step": 116874 }, { "epoch": 12.009556103575832, "grad_norm": 0.04272659122943878, "learning_rate": 0.01, "loss": 1.9495, "step": 116877 }, { "epoch": 12.009864364981505, "grad_norm": 0.061403874307870865, "learning_rate": 0.01, "loss": 1.9424, "step": 116880 }, { "epoch": 12.010172626387176, "grad_norm": 0.03552393987774849, "learning_rate": 0.01, "loss": 1.9315, "step": 116883 }, { "epoch": 12.010480887792848, "grad_norm": 0.03674667328596115, "learning_rate": 0.01, "loss": 1.9209, "step": 116886 }, { "epoch": 12.010789149198521, "grad_norm": 0.09467052668333054, "learning_rate": 0.01, "loss": 1.9362, "step": 116889 }, { "epoch": 12.011097410604192, "grad_norm": 0.10289536416530609, "learning_rate": 0.01, "loss": 1.9355, "step": 116892 }, { "epoch": 12.011405672009865, "grad_norm": 0.08801929652690887, "learning_rate": 0.01, "loss": 1.9479, "step": 116895 }, { "epoch": 12.011713933415537, "grad_norm": 0.12483206391334534, "learning_rate": 0.01, "loss": 1.9399, "step": 116898 }, { "epoch": 12.012022194821208, "grad_norm": 0.08305198699235916, "learning_rate": 0.01, "loss": 1.9371, "step": 116901 }, { "epoch": 12.01233045622688, "grad_norm": 0.043588075786828995, "learning_rate": 0.01, "loss": 1.9296, "step": 116904 }, { "epoch": 12.012638717632552, "grad_norm": 0.0356164388358593, "learning_rate": 0.01, "loss": 1.9176, "step": 116907 }, { "epoch": 12.012946979038224, "grad_norm": 0.05521430820226669, "learning_rate": 0.01, "loss": 1.9443, "step": 116910 }, { "epoch": 12.013255240443897, "grad_norm": 0.08943873643875122, "learning_rate": 0.01, "loss": 1.93, "step": 116913 }, { "epoch": 12.013563501849568, "grad_norm": 0.07061263173818588, "learning_rate": 0.01, "loss": 1.9477, "step": 116916 }, { "epoch": 12.01387176325524, "grad_norm": 0.0761643648147583, "learning_rate": 0.01, "loss": 1.9488, "step": 116919 }, { "epoch": 12.014180024660913, "grad_norm": 0.07292278110980988, "learning_rate": 0.01, "loss": 1.9637, "step": 116922 }, { "epoch": 12.014488286066584, "grad_norm": 0.09125639498233795, "learning_rate": 0.01, "loss": 1.9283, "step": 116925 }, { "epoch": 12.014796547472256, "grad_norm": 0.10665156692266464, "learning_rate": 0.01, "loss": 1.9434, "step": 116928 }, { "epoch": 12.015104808877929, "grad_norm": 0.05672309547662735, "learning_rate": 0.01, "loss": 1.9389, "step": 116931 }, { "epoch": 12.0154130702836, "grad_norm": 0.06380181759595871, "learning_rate": 0.01, "loss": 1.9227, "step": 116934 }, { "epoch": 12.015721331689273, "grad_norm": 0.08765019476413727, "learning_rate": 0.01, "loss": 1.9221, "step": 116937 }, { "epoch": 12.016029593094945, "grad_norm": 0.10064586997032166, "learning_rate": 0.01, "loss": 1.9422, "step": 116940 }, { "epoch": 12.016337854500616, "grad_norm": 0.04949174076318741, "learning_rate": 0.01, "loss": 1.9363, "step": 116943 }, { "epoch": 12.016646115906289, "grad_norm": 0.05919962376356125, "learning_rate": 0.01, "loss": 1.9374, "step": 116946 }, { "epoch": 12.016954377311961, "grad_norm": 0.060713063925504684, "learning_rate": 0.01, "loss": 1.9288, "step": 116949 }, { "epoch": 12.017262638717632, "grad_norm": 0.06562382727861404, "learning_rate": 0.01, "loss": 1.9448, "step": 116952 }, { "epoch": 12.017570900123305, "grad_norm": 0.10939974337816238, "learning_rate": 0.01, "loss": 1.9471, "step": 116955 }, { "epoch": 12.017879161528976, "grad_norm": 0.04349961131811142, "learning_rate": 0.01, "loss": 1.9254, "step": 116958 }, { "epoch": 12.018187422934648, "grad_norm": 0.13677796721458435, "learning_rate": 0.01, "loss": 1.9425, "step": 116961 }, { "epoch": 12.018495684340321, "grad_norm": 0.10809358954429626, "learning_rate": 0.01, "loss": 1.9293, "step": 116964 }, { "epoch": 12.018803945745992, "grad_norm": 0.056563131511211395, "learning_rate": 0.01, "loss": 1.9419, "step": 116967 }, { "epoch": 12.019112207151665, "grad_norm": 0.048835474997758865, "learning_rate": 0.01, "loss": 1.9549, "step": 116970 }, { "epoch": 12.019420468557337, "grad_norm": 0.038491714745759964, "learning_rate": 0.01, "loss": 1.9289, "step": 116973 }, { "epoch": 12.019728729963008, "grad_norm": 0.04731836915016174, "learning_rate": 0.01, "loss": 1.9636, "step": 116976 }, { "epoch": 12.02003699136868, "grad_norm": 0.10551325231790543, "learning_rate": 0.01, "loss": 1.9383, "step": 116979 }, { "epoch": 12.020345252774353, "grad_norm": 0.05078031122684479, "learning_rate": 0.01, "loss": 1.9478, "step": 116982 }, { "epoch": 12.020653514180024, "grad_norm": 0.06656154245138168, "learning_rate": 0.01, "loss": 1.9183, "step": 116985 }, { "epoch": 12.020961775585697, "grad_norm": 0.1516573429107666, "learning_rate": 0.01, "loss": 1.9401, "step": 116988 }, { "epoch": 12.02127003699137, "grad_norm": 0.07005427777767181, "learning_rate": 0.01, "loss": 1.9243, "step": 116991 }, { "epoch": 12.02157829839704, "grad_norm": 0.04545412212610245, "learning_rate": 0.01, "loss": 1.9257, "step": 116994 }, { "epoch": 12.021886559802713, "grad_norm": 0.042291078716516495, "learning_rate": 0.01, "loss": 1.9536, "step": 116997 }, { "epoch": 12.022194821208386, "grad_norm": 0.0934653952717781, "learning_rate": 0.01, "loss": 1.9273, "step": 117000 }, { "epoch": 12.022503082614056, "grad_norm": 0.06595387309789658, "learning_rate": 0.01, "loss": 1.9392, "step": 117003 }, { "epoch": 12.022811344019729, "grad_norm": 0.11432889848947525, "learning_rate": 0.01, "loss": 1.9354, "step": 117006 }, { "epoch": 12.0231196054254, "grad_norm": 0.08200161904096603, "learning_rate": 0.01, "loss": 1.9541, "step": 117009 }, { "epoch": 12.023427866831073, "grad_norm": 0.13999858498573303, "learning_rate": 0.01, "loss": 1.9193, "step": 117012 }, { "epoch": 12.023736128236745, "grad_norm": 0.08538030833005905, "learning_rate": 0.01, "loss": 1.9363, "step": 117015 }, { "epoch": 12.024044389642416, "grad_norm": 0.04241943359375, "learning_rate": 0.01, "loss": 1.9174, "step": 117018 }, { "epoch": 12.024352651048089, "grad_norm": 0.06946206837892532, "learning_rate": 0.01, "loss": 1.9412, "step": 117021 }, { "epoch": 12.024660912453761, "grad_norm": 0.062005434185266495, "learning_rate": 0.01, "loss": 1.9584, "step": 117024 }, { "epoch": 12.024969173859432, "grad_norm": 0.0390668585896492, "learning_rate": 0.01, "loss": 1.9713, "step": 117027 }, { "epoch": 12.025277435265105, "grad_norm": 0.038422442972660065, "learning_rate": 0.01, "loss": 1.9318, "step": 117030 }, { "epoch": 12.025585696670777, "grad_norm": 0.06409028172492981, "learning_rate": 0.01, "loss": 1.9074, "step": 117033 }, { "epoch": 12.025893958076448, "grad_norm": 0.036864008754491806, "learning_rate": 0.01, "loss": 1.9416, "step": 117036 }, { "epoch": 12.026202219482121, "grad_norm": 0.11394591629505157, "learning_rate": 0.01, "loss": 1.9539, "step": 117039 }, { "epoch": 12.026510480887794, "grad_norm": 0.05371858552098274, "learning_rate": 0.01, "loss": 1.955, "step": 117042 }, { "epoch": 12.026818742293464, "grad_norm": 0.11123819649219513, "learning_rate": 0.01, "loss": 1.9491, "step": 117045 }, { "epoch": 12.027127003699137, "grad_norm": 0.04701869934797287, "learning_rate": 0.01, "loss": 1.9387, "step": 117048 }, { "epoch": 12.02743526510481, "grad_norm": 0.04842498525977135, "learning_rate": 0.01, "loss": 1.9131, "step": 117051 }, { "epoch": 12.02774352651048, "grad_norm": 0.03712807595729828, "learning_rate": 0.01, "loss": 1.969, "step": 117054 }, { "epoch": 12.028051787916153, "grad_norm": 0.07579638808965683, "learning_rate": 0.01, "loss": 1.9534, "step": 117057 }, { "epoch": 12.028360049321824, "grad_norm": 0.10008134692907333, "learning_rate": 0.01, "loss": 1.9253, "step": 117060 }, { "epoch": 12.028668310727497, "grad_norm": 0.05371668562293053, "learning_rate": 0.01, "loss": 1.9504, "step": 117063 }, { "epoch": 12.02897657213317, "grad_norm": 0.07945726066827774, "learning_rate": 0.01, "loss": 1.9555, "step": 117066 }, { "epoch": 12.02928483353884, "grad_norm": 0.035276856273412704, "learning_rate": 0.01, "loss": 1.9292, "step": 117069 }, { "epoch": 12.029593094944513, "grad_norm": 0.09449401497840881, "learning_rate": 0.01, "loss": 1.9329, "step": 117072 }, { "epoch": 12.029901356350186, "grad_norm": 0.07702691107988358, "learning_rate": 0.01, "loss": 1.9559, "step": 117075 }, { "epoch": 12.030209617755856, "grad_norm": 0.1043589785695076, "learning_rate": 0.01, "loss": 1.9474, "step": 117078 }, { "epoch": 12.030517879161529, "grad_norm": 0.139021098613739, "learning_rate": 0.01, "loss": 1.9338, "step": 117081 }, { "epoch": 12.030826140567202, "grad_norm": 0.12700006365776062, "learning_rate": 0.01, "loss": 1.9068, "step": 117084 }, { "epoch": 12.031134401972873, "grad_norm": 0.08406472206115723, "learning_rate": 0.01, "loss": 1.9554, "step": 117087 }, { "epoch": 12.031442663378545, "grad_norm": 0.058496635407209396, "learning_rate": 0.01, "loss": 1.9326, "step": 117090 }, { "epoch": 12.031750924784218, "grad_norm": 0.05651369318366051, "learning_rate": 0.01, "loss": 1.9674, "step": 117093 }, { "epoch": 12.032059186189889, "grad_norm": 0.041309770196676254, "learning_rate": 0.01, "loss": 1.9501, "step": 117096 }, { "epoch": 12.032367447595561, "grad_norm": 0.08295998722314835, "learning_rate": 0.01, "loss": 1.9534, "step": 117099 }, { "epoch": 12.032675709001232, "grad_norm": 0.07957513630390167, "learning_rate": 0.01, "loss": 1.9477, "step": 117102 }, { "epoch": 12.032983970406905, "grad_norm": 0.05058687925338745, "learning_rate": 0.01, "loss": 1.9372, "step": 117105 }, { "epoch": 12.033292231812577, "grad_norm": 0.034771159291267395, "learning_rate": 0.01, "loss": 1.9427, "step": 117108 }, { "epoch": 12.033600493218248, "grad_norm": 0.11958864331245422, "learning_rate": 0.01, "loss": 1.9387, "step": 117111 }, { "epoch": 12.033908754623921, "grad_norm": 0.04502825811505318, "learning_rate": 0.01, "loss": 1.9323, "step": 117114 }, { "epoch": 12.034217016029594, "grad_norm": 0.1089678481221199, "learning_rate": 0.01, "loss": 1.9693, "step": 117117 }, { "epoch": 12.034525277435264, "grad_norm": 0.09961727261543274, "learning_rate": 0.01, "loss": 1.9371, "step": 117120 }, { "epoch": 12.034833538840937, "grad_norm": 0.05590556561946869, "learning_rate": 0.01, "loss": 1.9568, "step": 117123 }, { "epoch": 12.03514180024661, "grad_norm": 0.03497562184929848, "learning_rate": 0.01, "loss": 1.9372, "step": 117126 }, { "epoch": 12.03545006165228, "grad_norm": 0.05207972228527069, "learning_rate": 0.01, "loss": 1.9552, "step": 117129 }, { "epoch": 12.035758323057953, "grad_norm": 0.053018007427453995, "learning_rate": 0.01, "loss": 1.9413, "step": 117132 }, { "epoch": 12.036066584463626, "grad_norm": 0.08121684193611145, "learning_rate": 0.01, "loss": 1.9233, "step": 117135 }, { "epoch": 12.036374845869297, "grad_norm": 0.0976773351430893, "learning_rate": 0.01, "loss": 1.925, "step": 117138 }, { "epoch": 12.03668310727497, "grad_norm": 0.06670916080474854, "learning_rate": 0.01, "loss": 1.9165, "step": 117141 }, { "epoch": 12.036991368680642, "grad_norm": 0.11813090741634369, "learning_rate": 0.01, "loss": 1.9537, "step": 117144 }, { "epoch": 12.037299630086313, "grad_norm": 0.08185863494873047, "learning_rate": 0.01, "loss": 1.9499, "step": 117147 }, { "epoch": 12.037607891491986, "grad_norm": 0.1361522376537323, "learning_rate": 0.01, "loss": 1.9294, "step": 117150 }, { "epoch": 12.037916152897656, "grad_norm": 0.09057639539241791, "learning_rate": 0.01, "loss": 1.9094, "step": 117153 }, { "epoch": 12.038224414303329, "grad_norm": 0.11496086418628693, "learning_rate": 0.01, "loss": 1.9653, "step": 117156 }, { "epoch": 12.038532675709002, "grad_norm": 0.06829189509153366, "learning_rate": 0.01, "loss": 1.9347, "step": 117159 }, { "epoch": 12.038840937114673, "grad_norm": 0.05468536168336868, "learning_rate": 0.01, "loss": 1.9385, "step": 117162 }, { "epoch": 12.039149198520345, "grad_norm": 0.04229513183236122, "learning_rate": 0.01, "loss": 1.9271, "step": 117165 }, { "epoch": 12.039457459926018, "grad_norm": 0.0614624097943306, "learning_rate": 0.01, "loss": 1.9512, "step": 117168 }, { "epoch": 12.039765721331689, "grad_norm": 0.06150800362229347, "learning_rate": 0.01, "loss": 1.9333, "step": 117171 }, { "epoch": 12.040073982737361, "grad_norm": 0.08810736984014511, "learning_rate": 0.01, "loss": 1.9415, "step": 117174 }, { "epoch": 12.040382244143034, "grad_norm": 0.10887371003627777, "learning_rate": 0.01, "loss": 1.958, "step": 117177 }, { "epoch": 12.040690505548705, "grad_norm": 0.045244283974170685, "learning_rate": 0.01, "loss": 1.9322, "step": 117180 }, { "epoch": 12.040998766954377, "grad_norm": 0.040193479508161545, "learning_rate": 0.01, "loss": 1.9457, "step": 117183 }, { "epoch": 12.04130702836005, "grad_norm": 0.050419118255376816, "learning_rate": 0.01, "loss": 1.9323, "step": 117186 }, { "epoch": 12.041615289765721, "grad_norm": 0.07721977680921555, "learning_rate": 0.01, "loss": 1.9427, "step": 117189 }, { "epoch": 12.041923551171394, "grad_norm": 0.04506092891097069, "learning_rate": 0.01, "loss": 1.962, "step": 117192 }, { "epoch": 12.042231812577066, "grad_norm": 0.03247934952378273, "learning_rate": 0.01, "loss": 1.8944, "step": 117195 }, { "epoch": 12.042540073982737, "grad_norm": 0.15020819008350372, "learning_rate": 0.01, "loss": 1.9485, "step": 117198 }, { "epoch": 12.04284833538841, "grad_norm": 0.04528571665287018, "learning_rate": 0.01, "loss": 1.919, "step": 117201 }, { "epoch": 12.04315659679408, "grad_norm": 0.09674827009439468, "learning_rate": 0.01, "loss": 1.943, "step": 117204 }, { "epoch": 12.043464858199753, "grad_norm": 0.05445929244160652, "learning_rate": 0.01, "loss": 1.9455, "step": 117207 }, { "epoch": 12.043773119605426, "grad_norm": 0.04853241890668869, "learning_rate": 0.01, "loss": 1.9525, "step": 117210 }, { "epoch": 12.044081381011097, "grad_norm": 0.13493533432483673, "learning_rate": 0.01, "loss": 1.9297, "step": 117213 }, { "epoch": 12.04438964241677, "grad_norm": 0.19780290126800537, "learning_rate": 0.01, "loss": 1.9623, "step": 117216 }, { "epoch": 12.044697903822442, "grad_norm": 0.1563442051410675, "learning_rate": 0.01, "loss": 1.9658, "step": 117219 }, { "epoch": 12.045006165228113, "grad_norm": 0.07573375105857849, "learning_rate": 0.01, "loss": 1.928, "step": 117222 }, { "epoch": 12.045314426633785, "grad_norm": 0.05705763399600983, "learning_rate": 0.01, "loss": 1.9509, "step": 117225 }, { "epoch": 12.045622688039458, "grad_norm": 0.03947446122765541, "learning_rate": 0.01, "loss": 1.903, "step": 117228 }, { "epoch": 12.045930949445129, "grad_norm": 0.03954556956887245, "learning_rate": 0.01, "loss": 1.9354, "step": 117231 }, { "epoch": 12.046239210850802, "grad_norm": 0.041128892451524734, "learning_rate": 0.01, "loss": 1.9214, "step": 117234 }, { "epoch": 12.046547472256474, "grad_norm": 0.0362667441368103, "learning_rate": 0.01, "loss": 1.9504, "step": 117237 }, { "epoch": 12.046855733662145, "grad_norm": 0.054836705327034, "learning_rate": 0.01, "loss": 1.9357, "step": 117240 }, { "epoch": 12.047163995067818, "grad_norm": 0.09442821145057678, "learning_rate": 0.01, "loss": 1.9385, "step": 117243 }, { "epoch": 12.04747225647349, "grad_norm": 0.057980697602033615, "learning_rate": 0.01, "loss": 1.9129, "step": 117246 }, { "epoch": 12.047780517879161, "grad_norm": 0.04346334934234619, "learning_rate": 0.01, "loss": 1.937, "step": 117249 }, { "epoch": 12.048088779284834, "grad_norm": 0.042848844081163406, "learning_rate": 0.01, "loss": 1.9398, "step": 117252 }, { "epoch": 12.048397040690505, "grad_norm": 0.0719798132777214, "learning_rate": 0.01, "loss": 1.9332, "step": 117255 }, { "epoch": 12.048705302096177, "grad_norm": 0.04385082796216011, "learning_rate": 0.01, "loss": 1.9292, "step": 117258 }, { "epoch": 12.04901356350185, "grad_norm": 0.06334295868873596, "learning_rate": 0.01, "loss": 1.9573, "step": 117261 }, { "epoch": 12.049321824907521, "grad_norm": 0.06021895632147789, "learning_rate": 0.01, "loss": 1.9179, "step": 117264 }, { "epoch": 12.049630086313194, "grad_norm": 0.07692274451255798, "learning_rate": 0.01, "loss": 1.9325, "step": 117267 }, { "epoch": 12.049938347718866, "grad_norm": 0.09398198127746582, "learning_rate": 0.01, "loss": 1.9483, "step": 117270 }, { "epoch": 12.050246609124537, "grad_norm": 0.11060789972543716, "learning_rate": 0.01, "loss": 1.8999, "step": 117273 }, { "epoch": 12.05055487053021, "grad_norm": 0.055222559720277786, "learning_rate": 0.01, "loss": 1.9153, "step": 117276 }, { "epoch": 12.050863131935882, "grad_norm": 0.04734884575009346, "learning_rate": 0.01, "loss": 1.9495, "step": 117279 }, { "epoch": 12.051171393341553, "grad_norm": 0.038412392139434814, "learning_rate": 0.01, "loss": 1.9429, "step": 117282 }, { "epoch": 12.051479654747226, "grad_norm": 0.0350971519947052, "learning_rate": 0.01, "loss": 1.9374, "step": 117285 }, { "epoch": 12.051787916152898, "grad_norm": 0.046421583741903305, "learning_rate": 0.01, "loss": 1.9555, "step": 117288 }, { "epoch": 12.05209617755857, "grad_norm": 0.04922668635845184, "learning_rate": 0.01, "loss": 1.9246, "step": 117291 }, { "epoch": 12.052404438964242, "grad_norm": 0.05314551666378975, "learning_rate": 0.01, "loss": 1.9413, "step": 117294 }, { "epoch": 12.052712700369913, "grad_norm": 0.04084093123674393, "learning_rate": 0.01, "loss": 1.941, "step": 117297 }, { "epoch": 12.053020961775585, "grad_norm": 0.11658613383769989, "learning_rate": 0.01, "loss": 1.9564, "step": 117300 }, { "epoch": 12.053329223181258, "grad_norm": 0.07992861419916153, "learning_rate": 0.01, "loss": 1.913, "step": 117303 }, { "epoch": 12.053637484586929, "grad_norm": 0.10995413362979889, "learning_rate": 0.01, "loss": 1.9392, "step": 117306 }, { "epoch": 12.053945745992602, "grad_norm": 0.0791175365447998, "learning_rate": 0.01, "loss": 1.9639, "step": 117309 }, { "epoch": 12.054254007398274, "grad_norm": 0.08389805257320404, "learning_rate": 0.01, "loss": 1.9495, "step": 117312 }, { "epoch": 12.054562268803945, "grad_norm": 0.040105294436216354, "learning_rate": 0.01, "loss": 1.9475, "step": 117315 }, { "epoch": 12.054870530209618, "grad_norm": 0.04215976595878601, "learning_rate": 0.01, "loss": 1.9461, "step": 117318 }, { "epoch": 12.05517879161529, "grad_norm": 0.04946447163820267, "learning_rate": 0.01, "loss": 1.9381, "step": 117321 }, { "epoch": 12.055487053020961, "grad_norm": 0.04651471599936485, "learning_rate": 0.01, "loss": 1.9331, "step": 117324 }, { "epoch": 12.055795314426634, "grad_norm": 0.030955208465456963, "learning_rate": 0.01, "loss": 1.9084, "step": 117327 }, { "epoch": 12.056103575832307, "grad_norm": 0.04631368815898895, "learning_rate": 0.01, "loss": 1.913, "step": 117330 }, { "epoch": 12.056411837237977, "grad_norm": 0.06540189683437347, "learning_rate": 0.01, "loss": 1.9466, "step": 117333 }, { "epoch": 12.05672009864365, "grad_norm": 0.07952427864074707, "learning_rate": 0.01, "loss": 1.9088, "step": 117336 }, { "epoch": 12.057028360049323, "grad_norm": 0.06618475168943405, "learning_rate": 0.01, "loss": 1.9314, "step": 117339 }, { "epoch": 12.057336621454994, "grad_norm": 0.07007245719432831, "learning_rate": 0.01, "loss": 1.9327, "step": 117342 }, { "epoch": 12.057644882860666, "grad_norm": 0.09179333597421646, "learning_rate": 0.01, "loss": 1.9299, "step": 117345 }, { "epoch": 12.057953144266337, "grad_norm": 0.04440729320049286, "learning_rate": 0.01, "loss": 1.9077, "step": 117348 }, { "epoch": 12.05826140567201, "grad_norm": 0.06386969238519669, "learning_rate": 0.01, "loss": 1.9516, "step": 117351 }, { "epoch": 12.058569667077682, "grad_norm": 0.15889348089694977, "learning_rate": 0.01, "loss": 1.9297, "step": 117354 }, { "epoch": 12.058877928483353, "grad_norm": 0.08538535237312317, "learning_rate": 0.01, "loss": 1.9375, "step": 117357 }, { "epoch": 12.059186189889026, "grad_norm": 0.1034465953707695, "learning_rate": 0.01, "loss": 1.9097, "step": 117360 }, { "epoch": 12.059494451294698, "grad_norm": 0.06334459781646729, "learning_rate": 0.01, "loss": 1.9311, "step": 117363 }, { "epoch": 12.05980271270037, "grad_norm": 0.045413531363010406, "learning_rate": 0.01, "loss": 1.9182, "step": 117366 }, { "epoch": 12.060110974106042, "grad_norm": 0.057017140090465546, "learning_rate": 0.01, "loss": 1.9499, "step": 117369 }, { "epoch": 12.060419235511715, "grad_norm": 0.06852418184280396, "learning_rate": 0.01, "loss": 1.9057, "step": 117372 }, { "epoch": 12.060727496917385, "grad_norm": 0.12366721779108047, "learning_rate": 0.01, "loss": 1.9549, "step": 117375 }, { "epoch": 12.061035758323058, "grad_norm": 0.08869688957929611, "learning_rate": 0.01, "loss": 1.945, "step": 117378 }, { "epoch": 12.06134401972873, "grad_norm": 0.09724227339029312, "learning_rate": 0.01, "loss": 1.9623, "step": 117381 }, { "epoch": 12.061652281134402, "grad_norm": 0.07461173087358475, "learning_rate": 0.01, "loss": 1.9189, "step": 117384 }, { "epoch": 12.061960542540074, "grad_norm": 0.09186264872550964, "learning_rate": 0.01, "loss": 1.947, "step": 117387 }, { "epoch": 12.062268803945747, "grad_norm": 0.05991434305906296, "learning_rate": 0.01, "loss": 1.9715, "step": 117390 }, { "epoch": 12.062577065351418, "grad_norm": 0.08469602465629578, "learning_rate": 0.01, "loss": 1.9524, "step": 117393 }, { "epoch": 12.06288532675709, "grad_norm": 0.08107118308544159, "learning_rate": 0.01, "loss": 1.9478, "step": 117396 }, { "epoch": 12.063193588162761, "grad_norm": 0.05645585432648659, "learning_rate": 0.01, "loss": 1.9398, "step": 117399 }, { "epoch": 12.063501849568434, "grad_norm": 0.048557594418525696, "learning_rate": 0.01, "loss": 1.9382, "step": 117402 }, { "epoch": 12.063810110974106, "grad_norm": 0.04499780014157295, "learning_rate": 0.01, "loss": 1.9387, "step": 117405 }, { "epoch": 12.064118372379777, "grad_norm": 0.10070645064115524, "learning_rate": 0.01, "loss": 1.9501, "step": 117408 }, { "epoch": 12.06442663378545, "grad_norm": 0.07078666239976883, "learning_rate": 0.01, "loss": 1.9489, "step": 117411 }, { "epoch": 12.064734895191123, "grad_norm": 0.09919533878564835, "learning_rate": 0.01, "loss": 1.9333, "step": 117414 }, { "epoch": 12.065043156596793, "grad_norm": 0.03838624432682991, "learning_rate": 0.01, "loss": 1.9286, "step": 117417 }, { "epoch": 12.065351418002466, "grad_norm": 0.06327342987060547, "learning_rate": 0.01, "loss": 1.9595, "step": 117420 }, { "epoch": 12.065659679408139, "grad_norm": 0.054736699908971786, "learning_rate": 0.01, "loss": 1.9471, "step": 117423 }, { "epoch": 12.06596794081381, "grad_norm": 0.08838552981615067, "learning_rate": 0.01, "loss": 1.9174, "step": 117426 }, { "epoch": 12.066276202219482, "grad_norm": 0.0879216119647026, "learning_rate": 0.01, "loss": 1.9563, "step": 117429 }, { "epoch": 12.066584463625155, "grad_norm": 0.06935036182403564, "learning_rate": 0.01, "loss": 1.9225, "step": 117432 }, { "epoch": 12.066892725030826, "grad_norm": 0.04287251457571983, "learning_rate": 0.01, "loss": 1.938, "step": 117435 }, { "epoch": 12.067200986436498, "grad_norm": 0.08437120169401169, "learning_rate": 0.01, "loss": 1.9404, "step": 117438 }, { "epoch": 12.067509247842171, "grad_norm": 0.10385788977146149, "learning_rate": 0.01, "loss": 1.9419, "step": 117441 }, { "epoch": 12.067817509247842, "grad_norm": 0.08448704332113266, "learning_rate": 0.01, "loss": 1.9715, "step": 117444 }, { "epoch": 12.068125770653515, "grad_norm": 0.09182809293270111, "learning_rate": 0.01, "loss": 1.9455, "step": 117447 }, { "epoch": 12.068434032059185, "grad_norm": 0.06671719998121262, "learning_rate": 0.01, "loss": 1.9547, "step": 117450 }, { "epoch": 12.068742293464858, "grad_norm": 0.05060334876179695, "learning_rate": 0.01, "loss": 1.9371, "step": 117453 }, { "epoch": 12.06905055487053, "grad_norm": 0.04752616211771965, "learning_rate": 0.01, "loss": 1.9396, "step": 117456 }, { "epoch": 12.069358816276202, "grad_norm": 0.0996180921792984, "learning_rate": 0.01, "loss": 1.9226, "step": 117459 }, { "epoch": 12.069667077681874, "grad_norm": 0.11132294684648514, "learning_rate": 0.01, "loss": 1.9576, "step": 117462 }, { "epoch": 12.069975339087547, "grad_norm": 0.06568966805934906, "learning_rate": 0.01, "loss": 1.9174, "step": 117465 }, { "epoch": 12.070283600493218, "grad_norm": 0.03154284879565239, "learning_rate": 0.01, "loss": 1.9186, "step": 117468 }, { "epoch": 12.07059186189889, "grad_norm": 0.09147141128778458, "learning_rate": 0.01, "loss": 1.9855, "step": 117471 }, { "epoch": 12.070900123304563, "grad_norm": 0.06943826377391815, "learning_rate": 0.01, "loss": 1.9118, "step": 117474 }, { "epoch": 12.071208384710234, "grad_norm": 0.1036934033036232, "learning_rate": 0.01, "loss": 1.9199, "step": 117477 }, { "epoch": 12.071516646115906, "grad_norm": 0.11400505900382996, "learning_rate": 0.01, "loss": 1.9334, "step": 117480 }, { "epoch": 12.071824907521579, "grad_norm": 0.055724602192640305, "learning_rate": 0.01, "loss": 1.9387, "step": 117483 }, { "epoch": 12.07213316892725, "grad_norm": 0.04169715195894241, "learning_rate": 0.01, "loss": 1.9567, "step": 117486 }, { "epoch": 12.072441430332923, "grad_norm": 0.0361507274210453, "learning_rate": 0.01, "loss": 1.9391, "step": 117489 }, { "epoch": 12.072749691738593, "grad_norm": 0.05780757591128349, "learning_rate": 0.01, "loss": 1.9228, "step": 117492 }, { "epoch": 12.073057953144266, "grad_norm": 0.0696556344628334, "learning_rate": 0.01, "loss": 1.9628, "step": 117495 }, { "epoch": 12.073366214549939, "grad_norm": 0.08824704587459564, "learning_rate": 0.01, "loss": 1.9499, "step": 117498 }, { "epoch": 12.07367447595561, "grad_norm": 0.11320904642343521, "learning_rate": 0.01, "loss": 1.9345, "step": 117501 }, { "epoch": 12.073982737361282, "grad_norm": 0.038966935127973557, "learning_rate": 0.01, "loss": 1.9412, "step": 117504 }, { "epoch": 12.074290998766955, "grad_norm": 0.039885830134153366, "learning_rate": 0.01, "loss": 1.9223, "step": 117507 }, { "epoch": 12.074599260172626, "grad_norm": 0.03738117218017578, "learning_rate": 0.01, "loss": 1.9522, "step": 117510 }, { "epoch": 12.074907521578298, "grad_norm": 0.06798634678125381, "learning_rate": 0.01, "loss": 1.9395, "step": 117513 }, { "epoch": 12.075215782983971, "grad_norm": 0.12045107781887054, "learning_rate": 0.01, "loss": 1.9499, "step": 117516 }, { "epoch": 12.075524044389642, "grad_norm": 0.05819183215498924, "learning_rate": 0.01, "loss": 1.937, "step": 117519 }, { "epoch": 12.075832305795315, "grad_norm": 0.08216798305511475, "learning_rate": 0.01, "loss": 1.9431, "step": 117522 }, { "epoch": 12.076140567200987, "grad_norm": 0.10765063762664795, "learning_rate": 0.01, "loss": 1.9515, "step": 117525 }, { "epoch": 12.076448828606658, "grad_norm": 0.10575487464666367, "learning_rate": 0.01, "loss": 1.9434, "step": 117528 }, { "epoch": 12.07675709001233, "grad_norm": 0.04776548594236374, "learning_rate": 0.01, "loss": 1.9487, "step": 117531 }, { "epoch": 12.077065351418003, "grad_norm": 0.03476772829890251, "learning_rate": 0.01, "loss": 1.9515, "step": 117534 }, { "epoch": 12.077373612823674, "grad_norm": 0.06624691188335419, "learning_rate": 0.01, "loss": 1.9543, "step": 117537 }, { "epoch": 12.077681874229347, "grad_norm": 0.07099944353103638, "learning_rate": 0.01, "loss": 1.9269, "step": 117540 }, { "epoch": 12.077990135635018, "grad_norm": 0.0620768666267395, "learning_rate": 0.01, "loss": 1.9505, "step": 117543 }, { "epoch": 12.07829839704069, "grad_norm": 0.04294934868812561, "learning_rate": 0.01, "loss": 1.9319, "step": 117546 }, { "epoch": 12.078606658446363, "grad_norm": 0.035651784390211105, "learning_rate": 0.01, "loss": 1.919, "step": 117549 }, { "epoch": 12.078914919852034, "grad_norm": 0.09945672005414963, "learning_rate": 0.01, "loss": 1.907, "step": 117552 }, { "epoch": 12.079223181257706, "grad_norm": 0.07488497346639633, "learning_rate": 0.01, "loss": 1.93, "step": 117555 }, { "epoch": 12.079531442663379, "grad_norm": 0.1251220703125, "learning_rate": 0.01, "loss": 1.9553, "step": 117558 }, { "epoch": 12.07983970406905, "grad_norm": 0.13060495257377625, "learning_rate": 0.01, "loss": 1.9538, "step": 117561 }, { "epoch": 12.080147965474723, "grad_norm": 0.10690896958112717, "learning_rate": 0.01, "loss": 1.9576, "step": 117564 }, { "epoch": 12.080456226880395, "grad_norm": 0.10013853758573532, "learning_rate": 0.01, "loss": 1.9261, "step": 117567 }, { "epoch": 12.080764488286066, "grad_norm": 0.05371091514825821, "learning_rate": 0.01, "loss": 1.9317, "step": 117570 }, { "epoch": 12.081072749691739, "grad_norm": 0.038389820605516434, "learning_rate": 0.01, "loss": 1.9297, "step": 117573 }, { "epoch": 12.081381011097411, "grad_norm": 0.06609676778316498, "learning_rate": 0.01, "loss": 1.9217, "step": 117576 }, { "epoch": 12.081689272503082, "grad_norm": 0.04369485005736351, "learning_rate": 0.01, "loss": 1.9108, "step": 117579 }, { "epoch": 12.081997533908755, "grad_norm": 0.040801163762807846, "learning_rate": 0.01, "loss": 1.9255, "step": 117582 }, { "epoch": 12.082305795314427, "grad_norm": 0.058864302933216095, "learning_rate": 0.01, "loss": 1.9224, "step": 117585 }, { "epoch": 12.082614056720098, "grad_norm": 0.032076817005872726, "learning_rate": 0.01, "loss": 1.9189, "step": 117588 }, { "epoch": 12.082922318125771, "grad_norm": 0.10731884092092514, "learning_rate": 0.01, "loss": 1.9276, "step": 117591 }, { "epoch": 12.083230579531442, "grad_norm": 0.04927503317594528, "learning_rate": 0.01, "loss": 1.9407, "step": 117594 }, { "epoch": 12.083538840937115, "grad_norm": 0.041223879903554916, "learning_rate": 0.01, "loss": 1.9099, "step": 117597 }, { "epoch": 12.083847102342787, "grad_norm": 0.08759777992963791, "learning_rate": 0.01, "loss": 1.9537, "step": 117600 }, { "epoch": 12.084155363748458, "grad_norm": 0.09925784170627594, "learning_rate": 0.01, "loss": 1.9376, "step": 117603 }, { "epoch": 12.08446362515413, "grad_norm": 0.04184839501976967, "learning_rate": 0.01, "loss": 1.9136, "step": 117606 }, { "epoch": 12.084771886559803, "grad_norm": 0.04358890652656555, "learning_rate": 0.01, "loss": 1.9365, "step": 117609 }, { "epoch": 12.085080147965474, "grad_norm": 0.054400619119405746, "learning_rate": 0.01, "loss": 1.9291, "step": 117612 }, { "epoch": 12.085388409371147, "grad_norm": 0.052087198942899704, "learning_rate": 0.01, "loss": 1.947, "step": 117615 }, { "epoch": 12.08569667077682, "grad_norm": 0.12438651919364929, "learning_rate": 0.01, "loss": 1.9378, "step": 117618 }, { "epoch": 12.08600493218249, "grad_norm": 0.07035225629806519, "learning_rate": 0.01, "loss": 1.9114, "step": 117621 }, { "epoch": 12.086313193588163, "grad_norm": 0.06258494406938553, "learning_rate": 0.01, "loss": 1.9326, "step": 117624 }, { "epoch": 12.086621454993836, "grad_norm": 0.039971187710762024, "learning_rate": 0.01, "loss": 1.9419, "step": 117627 }, { "epoch": 12.086929716399506, "grad_norm": 0.05577482283115387, "learning_rate": 0.01, "loss": 1.9522, "step": 117630 }, { "epoch": 12.087237977805179, "grad_norm": 0.039713747799396515, "learning_rate": 0.01, "loss": 1.9354, "step": 117633 }, { "epoch": 12.087546239210852, "grad_norm": 0.06807857006788254, "learning_rate": 0.01, "loss": 1.9477, "step": 117636 }, { "epoch": 12.087854500616523, "grad_norm": 0.09134294092655182, "learning_rate": 0.01, "loss": 1.9328, "step": 117639 }, { "epoch": 12.088162762022195, "grad_norm": 0.09650267660617828, "learning_rate": 0.01, "loss": 1.9405, "step": 117642 }, { "epoch": 12.088471023427866, "grad_norm": 0.06829991191625595, "learning_rate": 0.01, "loss": 1.9344, "step": 117645 }, { "epoch": 12.088779284833539, "grad_norm": 0.08449184894561768, "learning_rate": 0.01, "loss": 1.9434, "step": 117648 }, { "epoch": 12.089087546239211, "grad_norm": 0.0747109204530716, "learning_rate": 0.01, "loss": 1.9845, "step": 117651 }, { "epoch": 12.089395807644882, "grad_norm": 0.1121891513466835, "learning_rate": 0.01, "loss": 1.9351, "step": 117654 }, { "epoch": 12.089704069050555, "grad_norm": 0.058580800890922546, "learning_rate": 0.01, "loss": 1.9412, "step": 117657 }, { "epoch": 12.090012330456227, "grad_norm": 0.08042570948600769, "learning_rate": 0.01, "loss": 1.9459, "step": 117660 }, { "epoch": 12.090320591861898, "grad_norm": 0.12063337117433548, "learning_rate": 0.01, "loss": 1.9439, "step": 117663 }, { "epoch": 12.090628853267571, "grad_norm": 0.04594128206372261, "learning_rate": 0.01, "loss": 1.93, "step": 117666 }, { "epoch": 12.090937114673244, "grad_norm": 0.04587708041071892, "learning_rate": 0.01, "loss": 1.9533, "step": 117669 }, { "epoch": 12.091245376078914, "grad_norm": 0.041933804750442505, "learning_rate": 0.01, "loss": 1.9297, "step": 117672 }, { "epoch": 12.091553637484587, "grad_norm": 0.10076461732387543, "learning_rate": 0.01, "loss": 1.9429, "step": 117675 }, { "epoch": 12.09186189889026, "grad_norm": 0.058599699288606644, "learning_rate": 0.01, "loss": 1.915, "step": 117678 }, { "epoch": 12.09217016029593, "grad_norm": 0.1215745061635971, "learning_rate": 0.01, "loss": 1.9481, "step": 117681 }, { "epoch": 12.092478421701603, "grad_norm": 0.07247386127710342, "learning_rate": 0.01, "loss": 1.9511, "step": 117684 }, { "epoch": 12.092786683107274, "grad_norm": 0.04243781417608261, "learning_rate": 0.01, "loss": 1.9279, "step": 117687 }, { "epoch": 12.093094944512947, "grad_norm": 0.05693398788571358, "learning_rate": 0.01, "loss": 1.9286, "step": 117690 }, { "epoch": 12.09340320591862, "grad_norm": 0.04747099429368973, "learning_rate": 0.01, "loss": 1.955, "step": 117693 }, { "epoch": 12.09371146732429, "grad_norm": 0.03934578597545624, "learning_rate": 0.01, "loss": 1.9459, "step": 117696 }, { "epoch": 12.094019728729963, "grad_norm": 0.04611416533589363, "learning_rate": 0.01, "loss": 1.9407, "step": 117699 }, { "epoch": 12.094327990135636, "grad_norm": 0.05349862575531006, "learning_rate": 0.01, "loss": 1.9671, "step": 117702 }, { "epoch": 12.094636251541306, "grad_norm": 0.07715880125761032, "learning_rate": 0.01, "loss": 1.9382, "step": 117705 }, { "epoch": 12.094944512946979, "grad_norm": 0.07728411257266998, "learning_rate": 0.01, "loss": 1.9132, "step": 117708 }, { "epoch": 12.095252774352652, "grad_norm": 0.08406969159841537, "learning_rate": 0.01, "loss": 1.9492, "step": 117711 }, { "epoch": 12.095561035758323, "grad_norm": 0.10758381336927414, "learning_rate": 0.01, "loss": 1.9406, "step": 117714 }, { "epoch": 12.095869297163995, "grad_norm": 0.14954718947410583, "learning_rate": 0.01, "loss": 1.9387, "step": 117717 }, { "epoch": 12.096177558569668, "grad_norm": 0.07673077285289764, "learning_rate": 0.01, "loss": 1.9361, "step": 117720 }, { "epoch": 12.096485819975339, "grad_norm": 0.08066879212856293, "learning_rate": 0.01, "loss": 1.9704, "step": 117723 }, { "epoch": 12.096794081381011, "grad_norm": 0.04644524306058884, "learning_rate": 0.01, "loss": 1.9218, "step": 117726 }, { "epoch": 12.097102342786684, "grad_norm": 0.061609331518411636, "learning_rate": 0.01, "loss": 1.9461, "step": 117729 }, { "epoch": 12.097410604192355, "grad_norm": 0.037506479769945145, "learning_rate": 0.01, "loss": 1.9435, "step": 117732 }, { "epoch": 12.097718865598027, "grad_norm": 0.08549857884645462, "learning_rate": 0.01, "loss": 1.9464, "step": 117735 }, { "epoch": 12.098027127003698, "grad_norm": 0.04055694490671158, "learning_rate": 0.01, "loss": 1.9376, "step": 117738 }, { "epoch": 12.098335388409371, "grad_norm": 0.039073679596185684, "learning_rate": 0.01, "loss": 1.9588, "step": 117741 }, { "epoch": 12.098643649815044, "grad_norm": 0.14250591397285461, "learning_rate": 0.01, "loss": 1.9284, "step": 117744 }, { "epoch": 12.098951911220714, "grad_norm": 0.0917244404554367, "learning_rate": 0.01, "loss": 1.93, "step": 117747 }, { "epoch": 12.099260172626387, "grad_norm": 0.060811594128608704, "learning_rate": 0.01, "loss": 1.9287, "step": 117750 }, { "epoch": 12.09956843403206, "grad_norm": 0.04312697425484657, "learning_rate": 0.01, "loss": 1.942, "step": 117753 }, { "epoch": 12.09987669543773, "grad_norm": 0.032779429107904434, "learning_rate": 0.01, "loss": 1.9291, "step": 117756 }, { "epoch": 12.100184956843403, "grad_norm": 0.035446010529994965, "learning_rate": 0.01, "loss": 1.9304, "step": 117759 }, { "epoch": 12.100493218249076, "grad_norm": 0.035251226276159286, "learning_rate": 0.01, "loss": 1.962, "step": 117762 }, { "epoch": 12.100801479654747, "grad_norm": 0.0533762201666832, "learning_rate": 0.01, "loss": 1.9393, "step": 117765 }, { "epoch": 12.10110974106042, "grad_norm": 0.0436546690762043, "learning_rate": 0.01, "loss": 1.9245, "step": 117768 }, { "epoch": 12.101418002466092, "grad_norm": 0.0903606191277504, "learning_rate": 0.01, "loss": 1.9046, "step": 117771 }, { "epoch": 12.101726263871763, "grad_norm": 0.059017568826675415, "learning_rate": 0.01, "loss": 1.9382, "step": 117774 }, { "epoch": 12.102034525277436, "grad_norm": 0.08666199445724487, "learning_rate": 0.01, "loss": 1.9393, "step": 117777 }, { "epoch": 12.102342786683108, "grad_norm": 0.06040441244840622, "learning_rate": 0.01, "loss": 1.9426, "step": 117780 }, { "epoch": 12.102651048088779, "grad_norm": 0.04902610555291176, "learning_rate": 0.01, "loss": 1.9533, "step": 117783 }, { "epoch": 12.102959309494452, "grad_norm": 0.059788379818201065, "learning_rate": 0.01, "loss": 1.94, "step": 117786 }, { "epoch": 12.103267570900123, "grad_norm": 0.163142129778862, "learning_rate": 0.01, "loss": 1.9161, "step": 117789 }, { "epoch": 12.103575832305795, "grad_norm": 0.11989153921604156, "learning_rate": 0.01, "loss": 1.9341, "step": 117792 }, { "epoch": 12.103884093711468, "grad_norm": 0.054746828973293304, "learning_rate": 0.01, "loss": 1.9533, "step": 117795 }, { "epoch": 12.104192355117139, "grad_norm": 0.08966901898384094, "learning_rate": 0.01, "loss": 1.9161, "step": 117798 }, { "epoch": 12.104500616522811, "grad_norm": 0.04606639966368675, "learning_rate": 0.01, "loss": 1.9409, "step": 117801 }, { "epoch": 12.104808877928484, "grad_norm": 0.04012494906783104, "learning_rate": 0.01, "loss": 1.931, "step": 117804 }, { "epoch": 12.105117139334155, "grad_norm": 0.09926046431064606, "learning_rate": 0.01, "loss": 1.9415, "step": 117807 }, { "epoch": 12.105425400739827, "grad_norm": 0.06846530735492706, "learning_rate": 0.01, "loss": 1.9513, "step": 117810 }, { "epoch": 12.1057336621455, "grad_norm": 0.06577073037624359, "learning_rate": 0.01, "loss": 1.9365, "step": 117813 }, { "epoch": 12.106041923551171, "grad_norm": 0.04960223659873009, "learning_rate": 0.01, "loss": 1.9419, "step": 117816 }, { "epoch": 12.106350184956844, "grad_norm": 0.061768539249897, "learning_rate": 0.01, "loss": 1.9543, "step": 117819 }, { "epoch": 12.106658446362516, "grad_norm": 0.10435442626476288, "learning_rate": 0.01, "loss": 1.9473, "step": 117822 }, { "epoch": 12.106966707768187, "grad_norm": 0.043556876480579376, "learning_rate": 0.01, "loss": 1.9571, "step": 117825 }, { "epoch": 12.10727496917386, "grad_norm": 0.06298083066940308, "learning_rate": 0.01, "loss": 1.9446, "step": 117828 }, { "epoch": 12.10758323057953, "grad_norm": 0.07426002621650696, "learning_rate": 0.01, "loss": 1.957, "step": 117831 }, { "epoch": 12.107891491985203, "grad_norm": 0.06376281380653381, "learning_rate": 0.01, "loss": 1.9465, "step": 117834 }, { "epoch": 12.108199753390876, "grad_norm": 0.12556427717208862, "learning_rate": 0.01, "loss": 1.9397, "step": 117837 }, { "epoch": 12.108508014796547, "grad_norm": 0.08735141158103943, "learning_rate": 0.01, "loss": 1.9361, "step": 117840 }, { "epoch": 12.10881627620222, "grad_norm": 0.058545466512441635, "learning_rate": 0.01, "loss": 1.9239, "step": 117843 }, { "epoch": 12.109124537607892, "grad_norm": 0.07886437326669693, "learning_rate": 0.01, "loss": 1.9353, "step": 117846 }, { "epoch": 12.109432799013563, "grad_norm": 0.060616493225097656, "learning_rate": 0.01, "loss": 1.9243, "step": 117849 }, { "epoch": 12.109741060419235, "grad_norm": 0.10220709443092346, "learning_rate": 0.01, "loss": 1.9426, "step": 117852 }, { "epoch": 12.110049321824908, "grad_norm": 0.0761055052280426, "learning_rate": 0.01, "loss": 1.9276, "step": 117855 }, { "epoch": 12.110357583230579, "grad_norm": 0.08665987104177475, "learning_rate": 0.01, "loss": 1.9432, "step": 117858 }, { "epoch": 12.110665844636252, "grad_norm": 0.03652096167206764, "learning_rate": 0.01, "loss": 1.929, "step": 117861 }, { "epoch": 12.110974106041924, "grad_norm": 0.04153512045741081, "learning_rate": 0.01, "loss": 1.9451, "step": 117864 }, { "epoch": 12.111282367447595, "grad_norm": 0.04108879715204239, "learning_rate": 0.01, "loss": 1.942, "step": 117867 }, { "epoch": 12.111590628853268, "grad_norm": 0.04862569272518158, "learning_rate": 0.01, "loss": 1.9175, "step": 117870 }, { "epoch": 12.11189889025894, "grad_norm": 0.05914356932044029, "learning_rate": 0.01, "loss": 1.9557, "step": 117873 }, { "epoch": 12.112207151664611, "grad_norm": 0.0469847172498703, "learning_rate": 0.01, "loss": 1.9257, "step": 117876 }, { "epoch": 12.112515413070284, "grad_norm": 0.04443184658885002, "learning_rate": 0.01, "loss": 1.9364, "step": 117879 }, { "epoch": 12.112823674475955, "grad_norm": 0.030330419540405273, "learning_rate": 0.01, "loss": 1.9236, "step": 117882 }, { "epoch": 12.113131935881627, "grad_norm": 0.1044810339808464, "learning_rate": 0.01, "loss": 1.9517, "step": 117885 }, { "epoch": 12.1134401972873, "grad_norm": 0.06196374073624611, "learning_rate": 0.01, "loss": 1.9475, "step": 117888 }, { "epoch": 12.113748458692971, "grad_norm": 0.05133262276649475, "learning_rate": 0.01, "loss": 1.9437, "step": 117891 }, { "epoch": 12.114056720098644, "grad_norm": 0.049086328595876694, "learning_rate": 0.01, "loss": 1.927, "step": 117894 }, { "epoch": 12.114364981504316, "grad_norm": 0.07143127918243408, "learning_rate": 0.01, "loss": 1.9391, "step": 117897 }, { "epoch": 12.114673242909987, "grad_norm": 0.13039495050907135, "learning_rate": 0.01, "loss": 1.9508, "step": 117900 }, { "epoch": 12.11498150431566, "grad_norm": 0.06353791058063507, "learning_rate": 0.01, "loss": 1.9455, "step": 117903 }, { "epoch": 12.115289765721332, "grad_norm": 0.052343275398015976, "learning_rate": 0.01, "loss": 1.9637, "step": 117906 }, { "epoch": 12.115598027127003, "grad_norm": 0.10411820560693741, "learning_rate": 0.01, "loss": 1.924, "step": 117909 }, { "epoch": 12.115906288532676, "grad_norm": 0.06003390997648239, "learning_rate": 0.01, "loss": 1.9275, "step": 117912 }, { "epoch": 12.116214549938348, "grad_norm": 0.0739888846874237, "learning_rate": 0.01, "loss": 1.9261, "step": 117915 }, { "epoch": 12.11652281134402, "grad_norm": 0.05417364090681076, "learning_rate": 0.01, "loss": 1.9413, "step": 117918 }, { "epoch": 12.116831072749692, "grad_norm": 0.07016511261463165, "learning_rate": 0.01, "loss": 1.9208, "step": 117921 }, { "epoch": 12.117139334155365, "grad_norm": 0.06602293998003006, "learning_rate": 0.01, "loss": 1.9307, "step": 117924 }, { "epoch": 12.117447595561035, "grad_norm": 0.03474023938179016, "learning_rate": 0.01, "loss": 1.9519, "step": 117927 }, { "epoch": 12.117755856966708, "grad_norm": 0.05247775837779045, "learning_rate": 0.01, "loss": 1.9564, "step": 117930 }, { "epoch": 12.118064118372379, "grad_norm": 0.030670950189232826, "learning_rate": 0.01, "loss": 1.945, "step": 117933 }, { "epoch": 12.118372379778052, "grad_norm": 0.05339053273200989, "learning_rate": 0.01, "loss": 1.9572, "step": 117936 }, { "epoch": 12.118680641183724, "grad_norm": 0.06306292116641998, "learning_rate": 0.01, "loss": 1.9105, "step": 117939 }, { "epoch": 12.118988902589395, "grad_norm": 0.06941086053848267, "learning_rate": 0.01, "loss": 1.921, "step": 117942 }, { "epoch": 12.119297163995068, "grad_norm": 0.04501570016145706, "learning_rate": 0.01, "loss": 1.9294, "step": 117945 }, { "epoch": 12.11960542540074, "grad_norm": 0.14525742828845978, "learning_rate": 0.01, "loss": 1.9305, "step": 117948 }, { "epoch": 12.119913686806411, "grad_norm": 0.10927775502204895, "learning_rate": 0.01, "loss": 1.9412, "step": 117951 }, { "epoch": 12.120221948212084, "grad_norm": 0.06403226405382156, "learning_rate": 0.01, "loss": 1.9649, "step": 117954 }, { "epoch": 12.120530209617757, "grad_norm": 0.04629103094339371, "learning_rate": 0.01, "loss": 1.9391, "step": 117957 }, { "epoch": 12.120838471023427, "grad_norm": 0.04517742991447449, "learning_rate": 0.01, "loss": 1.9523, "step": 117960 }, { "epoch": 12.1211467324291, "grad_norm": 0.045234084129333496, "learning_rate": 0.01, "loss": 1.9411, "step": 117963 }, { "epoch": 12.121454993834773, "grad_norm": 0.07998780161142349, "learning_rate": 0.01, "loss": 1.9188, "step": 117966 }, { "epoch": 12.121763255240444, "grad_norm": 0.1172008141875267, "learning_rate": 0.01, "loss": 1.9285, "step": 117969 }, { "epoch": 12.122071516646116, "grad_norm": 0.05009810999035835, "learning_rate": 0.01, "loss": 1.9441, "step": 117972 }, { "epoch": 12.122379778051789, "grad_norm": 0.11575902998447418, "learning_rate": 0.01, "loss": 1.9287, "step": 117975 }, { "epoch": 12.12268803945746, "grad_norm": 0.05342715233564377, "learning_rate": 0.01, "loss": 1.9746, "step": 117978 }, { "epoch": 12.122996300863132, "grad_norm": 0.0628928691148758, "learning_rate": 0.01, "loss": 1.9246, "step": 117981 }, { "epoch": 12.123304562268803, "grad_norm": 0.13087360560894012, "learning_rate": 0.01, "loss": 1.9299, "step": 117984 }, { "epoch": 12.123612823674476, "grad_norm": 0.04477988928556442, "learning_rate": 0.01, "loss": 1.9095, "step": 117987 }, { "epoch": 12.123921085080148, "grad_norm": 0.11973872035741806, "learning_rate": 0.01, "loss": 1.9179, "step": 117990 }, { "epoch": 12.12422934648582, "grad_norm": 0.04543408006429672, "learning_rate": 0.01, "loss": 1.9431, "step": 117993 }, { "epoch": 12.124537607891492, "grad_norm": 0.03628748282790184, "learning_rate": 0.01, "loss": 1.9492, "step": 117996 }, { "epoch": 12.124845869297165, "grad_norm": 0.09488870948553085, "learning_rate": 0.01, "loss": 1.9345, "step": 117999 }, { "epoch": 12.125154130702835, "grad_norm": 0.0529656819999218, "learning_rate": 0.01, "loss": 1.9334, "step": 118002 }, { "epoch": 12.125462392108508, "grad_norm": 0.06371456384658813, "learning_rate": 0.01, "loss": 1.9589, "step": 118005 }, { "epoch": 12.12577065351418, "grad_norm": 0.03643391653895378, "learning_rate": 0.01, "loss": 1.9384, "step": 118008 }, { "epoch": 12.126078914919852, "grad_norm": 0.042078692466020584, "learning_rate": 0.01, "loss": 1.9495, "step": 118011 }, { "epoch": 12.126387176325524, "grad_norm": 0.1325952559709549, "learning_rate": 0.01, "loss": 1.9028, "step": 118014 }, { "epoch": 12.126695437731197, "grad_norm": 0.05285250023007393, "learning_rate": 0.01, "loss": 1.9424, "step": 118017 }, { "epoch": 12.127003699136868, "grad_norm": 0.06790726631879807, "learning_rate": 0.01, "loss": 1.9172, "step": 118020 }, { "epoch": 12.12731196054254, "grad_norm": 0.07077708095312119, "learning_rate": 0.01, "loss": 1.9433, "step": 118023 }, { "epoch": 12.127620221948211, "grad_norm": 0.05388166382908821, "learning_rate": 0.01, "loss": 1.946, "step": 118026 }, { "epoch": 12.127928483353884, "grad_norm": 0.07416366040706635, "learning_rate": 0.01, "loss": 1.9515, "step": 118029 }, { "epoch": 12.128236744759556, "grad_norm": 0.06110347434878349, "learning_rate": 0.01, "loss": 1.945, "step": 118032 }, { "epoch": 12.128545006165227, "grad_norm": 0.06149152293801308, "learning_rate": 0.01, "loss": 1.9255, "step": 118035 }, { "epoch": 12.1288532675709, "grad_norm": 0.10737799853086472, "learning_rate": 0.01, "loss": 1.9317, "step": 118038 }, { "epoch": 12.129161528976573, "grad_norm": 0.14195168018341064, "learning_rate": 0.01, "loss": 1.922, "step": 118041 }, { "epoch": 12.129469790382243, "grad_norm": 0.09065920114517212, "learning_rate": 0.01, "loss": 1.9208, "step": 118044 }, { "epoch": 12.129778051787916, "grad_norm": 0.06632434576749802, "learning_rate": 0.01, "loss": 1.9471, "step": 118047 }, { "epoch": 12.130086313193589, "grad_norm": 0.05222143977880478, "learning_rate": 0.01, "loss": 1.912, "step": 118050 }, { "epoch": 12.13039457459926, "grad_norm": 0.050823818892240524, "learning_rate": 0.01, "loss": 1.9487, "step": 118053 }, { "epoch": 12.130702836004932, "grad_norm": 0.05407403036952019, "learning_rate": 0.01, "loss": 1.9394, "step": 118056 }, { "epoch": 12.131011097410605, "grad_norm": 0.052906397730112076, "learning_rate": 0.01, "loss": 1.9349, "step": 118059 }, { "epoch": 12.131319358816276, "grad_norm": 0.08943575620651245, "learning_rate": 0.01, "loss": 1.9213, "step": 118062 }, { "epoch": 12.131627620221948, "grad_norm": 0.043714381754398346, "learning_rate": 0.01, "loss": 1.9391, "step": 118065 }, { "epoch": 12.131935881627621, "grad_norm": 0.05486621335148811, "learning_rate": 0.01, "loss": 1.9181, "step": 118068 }, { "epoch": 12.132244143033292, "grad_norm": 0.05442049726843834, "learning_rate": 0.01, "loss": 1.9222, "step": 118071 }, { "epoch": 12.132552404438965, "grad_norm": 0.051040273159742355, "learning_rate": 0.01, "loss": 1.9286, "step": 118074 }, { "epoch": 12.132860665844635, "grad_norm": 0.05163537338376045, "learning_rate": 0.01, "loss": 1.9464, "step": 118077 }, { "epoch": 12.133168927250308, "grad_norm": 0.12019070982933044, "learning_rate": 0.01, "loss": 1.9468, "step": 118080 }, { "epoch": 12.13347718865598, "grad_norm": 0.048908084630966187, "learning_rate": 0.01, "loss": 1.9666, "step": 118083 }, { "epoch": 12.133785450061652, "grad_norm": 0.12288028746843338, "learning_rate": 0.01, "loss": 1.9192, "step": 118086 }, { "epoch": 12.134093711467324, "grad_norm": 0.12700609862804413, "learning_rate": 0.01, "loss": 1.9239, "step": 118089 }, { "epoch": 12.134401972872997, "grad_norm": 0.08140483498573303, "learning_rate": 0.01, "loss": 1.9438, "step": 118092 }, { "epoch": 12.134710234278668, "grad_norm": 0.05661148577928543, "learning_rate": 0.01, "loss": 1.9398, "step": 118095 }, { "epoch": 12.13501849568434, "grad_norm": 0.04805785417556763, "learning_rate": 0.01, "loss": 1.9405, "step": 118098 }, { "epoch": 12.135326757090013, "grad_norm": 0.06646308302879333, "learning_rate": 0.01, "loss": 1.9599, "step": 118101 }, { "epoch": 12.135635018495684, "grad_norm": 0.05786815658211708, "learning_rate": 0.01, "loss": 1.9367, "step": 118104 }, { "epoch": 12.135943279901356, "grad_norm": 0.04217448830604553, "learning_rate": 0.01, "loss": 1.9295, "step": 118107 }, { "epoch": 12.136251541307029, "grad_norm": 0.03454460948705673, "learning_rate": 0.01, "loss": 1.9328, "step": 118110 }, { "epoch": 12.1365598027127, "grad_norm": 0.11412219703197479, "learning_rate": 0.01, "loss": 1.9457, "step": 118113 }, { "epoch": 12.136868064118373, "grad_norm": 0.08381339907646179, "learning_rate": 0.01, "loss": 1.9323, "step": 118116 }, { "epoch": 12.137176325524045, "grad_norm": 0.059763435274362564, "learning_rate": 0.01, "loss": 1.9249, "step": 118119 }, { "epoch": 12.137484586929716, "grad_norm": 0.08242775499820709, "learning_rate": 0.01, "loss": 1.9398, "step": 118122 }, { "epoch": 12.137792848335389, "grad_norm": 0.04511423036456108, "learning_rate": 0.01, "loss": 1.9204, "step": 118125 }, { "epoch": 12.13810110974106, "grad_norm": 0.04035963490605354, "learning_rate": 0.01, "loss": 1.9399, "step": 118128 }, { "epoch": 12.138409371146732, "grad_norm": 0.038142427802085876, "learning_rate": 0.01, "loss": 1.9218, "step": 118131 }, { "epoch": 12.138717632552405, "grad_norm": 0.10171832144260406, "learning_rate": 0.01, "loss": 1.9096, "step": 118134 }, { "epoch": 12.139025893958076, "grad_norm": 0.10713452100753784, "learning_rate": 0.01, "loss": 1.935, "step": 118137 }, { "epoch": 12.139334155363748, "grad_norm": 0.10755790024995804, "learning_rate": 0.01, "loss": 1.9691, "step": 118140 }, { "epoch": 12.139642416769421, "grad_norm": 0.04062514007091522, "learning_rate": 0.01, "loss": 1.9493, "step": 118143 }, { "epoch": 12.139950678175092, "grad_norm": 0.04124769568443298, "learning_rate": 0.01, "loss": 1.936, "step": 118146 }, { "epoch": 12.140258939580765, "grad_norm": 0.04127255827188492, "learning_rate": 0.01, "loss": 1.9251, "step": 118149 }, { "epoch": 12.140567200986437, "grad_norm": 0.03743264824151993, "learning_rate": 0.01, "loss": 1.9258, "step": 118152 }, { "epoch": 12.140875462392108, "grad_norm": 0.08324801921844482, "learning_rate": 0.01, "loss": 1.9545, "step": 118155 }, { "epoch": 12.14118372379778, "grad_norm": 0.03775719553232193, "learning_rate": 0.01, "loss": 1.9553, "step": 118158 }, { "epoch": 12.141491985203453, "grad_norm": 0.12722140550613403, "learning_rate": 0.01, "loss": 1.968, "step": 118161 }, { "epoch": 12.141800246609124, "grad_norm": 0.06462185084819794, "learning_rate": 0.01, "loss": 1.9141, "step": 118164 }, { "epoch": 12.142108508014797, "grad_norm": 0.04018726199865341, "learning_rate": 0.01, "loss": 1.9276, "step": 118167 }, { "epoch": 12.142416769420468, "grad_norm": 0.03868977352976799, "learning_rate": 0.01, "loss": 1.9276, "step": 118170 }, { "epoch": 12.14272503082614, "grad_norm": 0.06277395039796829, "learning_rate": 0.01, "loss": 1.9287, "step": 118173 }, { "epoch": 12.143033292231813, "grad_norm": 0.09695149958133698, "learning_rate": 0.01, "loss": 1.9615, "step": 118176 }, { "epoch": 12.143341553637484, "grad_norm": 0.13503365218639374, "learning_rate": 0.01, "loss": 1.9408, "step": 118179 }, { "epoch": 12.143649815043156, "grad_norm": 0.14480143785476685, "learning_rate": 0.01, "loss": 1.9579, "step": 118182 }, { "epoch": 12.143958076448829, "grad_norm": 0.08472104370594025, "learning_rate": 0.01, "loss": 1.9264, "step": 118185 }, { "epoch": 12.1442663378545, "grad_norm": 0.06492681056261063, "learning_rate": 0.01, "loss": 1.9762, "step": 118188 }, { "epoch": 12.144574599260173, "grad_norm": 0.046987369656562805, "learning_rate": 0.01, "loss": 1.9482, "step": 118191 }, { "epoch": 12.144882860665845, "grad_norm": 0.07729876041412354, "learning_rate": 0.01, "loss": 1.9332, "step": 118194 }, { "epoch": 12.145191122071516, "grad_norm": 0.04762044548988342, "learning_rate": 0.01, "loss": 1.9307, "step": 118197 }, { "epoch": 12.145499383477189, "grad_norm": 0.044221337884664536, "learning_rate": 0.01, "loss": 1.9345, "step": 118200 }, { "epoch": 12.145807644882861, "grad_norm": 0.04139994457364082, "learning_rate": 0.01, "loss": 1.9504, "step": 118203 }, { "epoch": 12.146115906288532, "grad_norm": 0.039472367614507675, "learning_rate": 0.01, "loss": 1.9257, "step": 118206 }, { "epoch": 12.146424167694205, "grad_norm": 0.08018239587545395, "learning_rate": 0.01, "loss": 1.9281, "step": 118209 }, { "epoch": 12.146732429099877, "grad_norm": 0.07383769750595093, "learning_rate": 0.01, "loss": 1.9293, "step": 118212 }, { "epoch": 12.147040690505548, "grad_norm": 0.05636359751224518, "learning_rate": 0.01, "loss": 1.9551, "step": 118215 }, { "epoch": 12.147348951911221, "grad_norm": 0.04971145838499069, "learning_rate": 0.01, "loss": 1.9482, "step": 118218 }, { "epoch": 12.147657213316892, "grad_norm": 0.051524318754673004, "learning_rate": 0.01, "loss": 1.939, "step": 118221 }, { "epoch": 12.147965474722564, "grad_norm": 0.11375819146633148, "learning_rate": 0.01, "loss": 1.961, "step": 118224 }, { "epoch": 12.148273736128237, "grad_norm": 0.08179773390293121, "learning_rate": 0.01, "loss": 1.9359, "step": 118227 }, { "epoch": 12.148581997533908, "grad_norm": 0.10421700030565262, "learning_rate": 0.01, "loss": 1.9167, "step": 118230 }, { "epoch": 12.14889025893958, "grad_norm": 0.04878521338105202, "learning_rate": 0.01, "loss": 1.9079, "step": 118233 }, { "epoch": 12.149198520345253, "grad_norm": 0.047480858862400055, "learning_rate": 0.01, "loss": 1.9288, "step": 118236 }, { "epoch": 12.149506781750924, "grad_norm": 0.04842216521501541, "learning_rate": 0.01, "loss": 1.9411, "step": 118239 }, { "epoch": 12.149815043156597, "grad_norm": 0.12058722972869873, "learning_rate": 0.01, "loss": 1.9145, "step": 118242 }, { "epoch": 12.15012330456227, "grad_norm": 0.14690682291984558, "learning_rate": 0.01, "loss": 1.9327, "step": 118245 }, { "epoch": 12.15043156596794, "grad_norm": 0.04327579587697983, "learning_rate": 0.01, "loss": 1.9124, "step": 118248 }, { "epoch": 12.150739827373613, "grad_norm": 0.03938378393650055, "learning_rate": 0.01, "loss": 1.9549, "step": 118251 }, { "epoch": 12.151048088779286, "grad_norm": 0.0566302128136158, "learning_rate": 0.01, "loss": 1.9444, "step": 118254 }, { "epoch": 12.151356350184956, "grad_norm": 0.06294912099838257, "learning_rate": 0.01, "loss": 1.9441, "step": 118257 }, { "epoch": 12.151664611590629, "grad_norm": 0.045011408627033234, "learning_rate": 0.01, "loss": 1.9448, "step": 118260 }, { "epoch": 12.151972872996302, "grad_norm": 0.1352638453245163, "learning_rate": 0.01, "loss": 1.9356, "step": 118263 }, { "epoch": 12.152281134401973, "grad_norm": 0.052162181586027145, "learning_rate": 0.01, "loss": 1.9283, "step": 118266 }, { "epoch": 12.152589395807645, "grad_norm": 0.05339041352272034, "learning_rate": 0.01, "loss": 1.9331, "step": 118269 }, { "epoch": 12.152897657213316, "grad_norm": 0.09388918429613113, "learning_rate": 0.01, "loss": 1.9376, "step": 118272 }, { "epoch": 12.153205918618989, "grad_norm": 0.06212795898318291, "learning_rate": 0.01, "loss": 1.9553, "step": 118275 }, { "epoch": 12.153514180024661, "grad_norm": 0.11813616007566452, "learning_rate": 0.01, "loss": 1.9436, "step": 118278 }, { "epoch": 12.153822441430332, "grad_norm": 0.07211315631866455, "learning_rate": 0.01, "loss": 1.9381, "step": 118281 }, { "epoch": 12.154130702836005, "grad_norm": 0.07079070061445236, "learning_rate": 0.01, "loss": 1.9602, "step": 118284 }, { "epoch": 12.154438964241677, "grad_norm": 0.06737685203552246, "learning_rate": 0.01, "loss": 1.9244, "step": 118287 }, { "epoch": 12.154747225647348, "grad_norm": 0.03841644898056984, "learning_rate": 0.01, "loss": 1.9267, "step": 118290 }, { "epoch": 12.155055487053021, "grad_norm": 0.09350793063640594, "learning_rate": 0.01, "loss": 1.9401, "step": 118293 }, { "epoch": 12.155363748458694, "grad_norm": 0.09523818641901016, "learning_rate": 0.01, "loss": 1.9725, "step": 118296 }, { "epoch": 12.155672009864364, "grad_norm": 0.05099066346883774, "learning_rate": 0.01, "loss": 1.9218, "step": 118299 }, { "epoch": 12.155980271270037, "grad_norm": 0.05052245408296585, "learning_rate": 0.01, "loss": 1.9275, "step": 118302 }, { "epoch": 12.15628853267571, "grad_norm": 0.07317576557397842, "learning_rate": 0.01, "loss": 1.9173, "step": 118305 }, { "epoch": 12.15659679408138, "grad_norm": 0.05431431531906128, "learning_rate": 0.01, "loss": 1.9173, "step": 118308 }, { "epoch": 12.156905055487053, "grad_norm": 0.038956791162490845, "learning_rate": 0.01, "loss": 1.9187, "step": 118311 }, { "epoch": 12.157213316892726, "grad_norm": 0.07684636116027832, "learning_rate": 0.01, "loss": 1.9365, "step": 118314 }, { "epoch": 12.157521578298397, "grad_norm": 0.08905784040689468, "learning_rate": 0.01, "loss": 1.9168, "step": 118317 }, { "epoch": 12.15782983970407, "grad_norm": 0.05072393640875816, "learning_rate": 0.01, "loss": 1.9046, "step": 118320 }, { "epoch": 12.15813810110974, "grad_norm": 0.04204133525490761, "learning_rate": 0.01, "loss": 1.9672, "step": 118323 }, { "epoch": 12.158446362515413, "grad_norm": 0.03856554999947548, "learning_rate": 0.01, "loss": 1.9362, "step": 118326 }, { "epoch": 12.158754623921086, "grad_norm": 0.06067296117544174, "learning_rate": 0.01, "loss": 1.932, "step": 118329 }, { "epoch": 12.159062885326756, "grad_norm": 0.09541289508342743, "learning_rate": 0.01, "loss": 1.945, "step": 118332 }, { "epoch": 12.159371146732429, "grad_norm": 0.09871894866228104, "learning_rate": 0.01, "loss": 1.9468, "step": 118335 }, { "epoch": 12.159679408138102, "grad_norm": 0.06398959457874298, "learning_rate": 0.01, "loss": 1.9366, "step": 118338 }, { "epoch": 12.159987669543773, "grad_norm": 0.055907636880874634, "learning_rate": 0.01, "loss": 1.8922, "step": 118341 }, { "epoch": 12.160295930949445, "grad_norm": 0.053581602871418, "learning_rate": 0.01, "loss": 1.9258, "step": 118344 }, { "epoch": 12.160604192355118, "grad_norm": 0.035246655344963074, "learning_rate": 0.01, "loss": 1.9358, "step": 118347 }, { "epoch": 12.160912453760789, "grad_norm": 0.05500401556491852, "learning_rate": 0.01, "loss": 1.9586, "step": 118350 }, { "epoch": 12.161220715166461, "grad_norm": 0.10213566571474075, "learning_rate": 0.01, "loss": 1.9563, "step": 118353 }, { "epoch": 12.161528976572134, "grad_norm": 0.056656528264284134, "learning_rate": 0.01, "loss": 1.9367, "step": 118356 }, { "epoch": 12.161837237977805, "grad_norm": 0.05165443941950798, "learning_rate": 0.01, "loss": 1.9428, "step": 118359 }, { "epoch": 12.162145499383477, "grad_norm": 0.09598568081855774, "learning_rate": 0.01, "loss": 1.9473, "step": 118362 }, { "epoch": 12.162453760789148, "grad_norm": 0.05595939978957176, "learning_rate": 0.01, "loss": 1.9242, "step": 118365 }, { "epoch": 12.162762022194821, "grad_norm": 0.09582502394914627, "learning_rate": 0.01, "loss": 1.9348, "step": 118368 }, { "epoch": 12.163070283600494, "grad_norm": 0.048460979014635086, "learning_rate": 0.01, "loss": 1.9463, "step": 118371 }, { "epoch": 12.163378545006164, "grad_norm": 0.04140559211373329, "learning_rate": 0.01, "loss": 1.9398, "step": 118374 }, { "epoch": 12.163686806411837, "grad_norm": 0.11664795130491257, "learning_rate": 0.01, "loss": 1.9351, "step": 118377 }, { "epoch": 12.16399506781751, "grad_norm": 0.05546128749847412, "learning_rate": 0.01, "loss": 1.9257, "step": 118380 }, { "epoch": 12.16430332922318, "grad_norm": 0.0834437757730484, "learning_rate": 0.01, "loss": 1.9565, "step": 118383 }, { "epoch": 12.164611590628853, "grad_norm": 0.10547520220279694, "learning_rate": 0.01, "loss": 1.9248, "step": 118386 }, { "epoch": 12.164919852034526, "grad_norm": 0.0766419768333435, "learning_rate": 0.01, "loss": 1.9291, "step": 118389 }, { "epoch": 12.165228113440197, "grad_norm": 0.0726955235004425, "learning_rate": 0.01, "loss": 1.9149, "step": 118392 }, { "epoch": 12.16553637484587, "grad_norm": 0.08781227469444275, "learning_rate": 0.01, "loss": 1.9423, "step": 118395 }, { "epoch": 12.165844636251542, "grad_norm": 0.1715017706155777, "learning_rate": 0.01, "loss": 1.9616, "step": 118398 }, { "epoch": 12.166152897657213, "grad_norm": 0.06803517788648605, "learning_rate": 0.01, "loss": 1.9653, "step": 118401 }, { "epoch": 12.166461159062885, "grad_norm": 0.06172870472073555, "learning_rate": 0.01, "loss": 1.9321, "step": 118404 }, { "epoch": 12.166769420468558, "grad_norm": 0.0564359687268734, "learning_rate": 0.01, "loss": 1.9468, "step": 118407 }, { "epoch": 12.167077681874229, "grad_norm": 0.04325263574719429, "learning_rate": 0.01, "loss": 1.9318, "step": 118410 }, { "epoch": 12.167385943279902, "grad_norm": 0.04649733752012253, "learning_rate": 0.01, "loss": 1.918, "step": 118413 }, { "epoch": 12.167694204685573, "grad_norm": 0.03295613080263138, "learning_rate": 0.01, "loss": 1.9395, "step": 118416 }, { "epoch": 12.168002466091245, "grad_norm": 0.06244640424847603, "learning_rate": 0.01, "loss": 1.9293, "step": 118419 }, { "epoch": 12.168310727496918, "grad_norm": 0.06391515582799911, "learning_rate": 0.01, "loss": 1.9348, "step": 118422 }, { "epoch": 12.168618988902589, "grad_norm": 0.0544745959341526, "learning_rate": 0.01, "loss": 1.917, "step": 118425 }, { "epoch": 12.168927250308261, "grad_norm": 0.04499959573149681, "learning_rate": 0.01, "loss": 1.9128, "step": 118428 }, { "epoch": 12.169235511713934, "grad_norm": 0.040443163365125656, "learning_rate": 0.01, "loss": 1.9247, "step": 118431 }, { "epoch": 12.169543773119605, "grad_norm": 0.039167050272226334, "learning_rate": 0.01, "loss": 1.9244, "step": 118434 }, { "epoch": 12.169852034525277, "grad_norm": 0.07148496061563492, "learning_rate": 0.01, "loss": 1.9282, "step": 118437 }, { "epoch": 12.17016029593095, "grad_norm": 0.04941992461681366, "learning_rate": 0.01, "loss": 1.9188, "step": 118440 }, { "epoch": 12.170468557336621, "grad_norm": 0.057443637400865555, "learning_rate": 0.01, "loss": 1.9544, "step": 118443 }, { "epoch": 12.170776818742294, "grad_norm": 0.07597741484642029, "learning_rate": 0.01, "loss": 1.9417, "step": 118446 }, { "epoch": 12.171085080147966, "grad_norm": 0.13708484172821045, "learning_rate": 0.01, "loss": 1.9692, "step": 118449 }, { "epoch": 12.171393341553637, "grad_norm": 0.04398206248879433, "learning_rate": 0.01, "loss": 1.9114, "step": 118452 }, { "epoch": 12.17170160295931, "grad_norm": 0.10531770437955856, "learning_rate": 0.01, "loss": 1.9402, "step": 118455 }, { "epoch": 12.172009864364982, "grad_norm": 0.05901032313704491, "learning_rate": 0.01, "loss": 1.9415, "step": 118458 }, { "epoch": 12.172318125770653, "grad_norm": 0.050240326672792435, "learning_rate": 0.01, "loss": 1.9521, "step": 118461 }, { "epoch": 12.172626387176326, "grad_norm": 0.04151801019906998, "learning_rate": 0.01, "loss": 1.919, "step": 118464 }, { "epoch": 12.172934648581997, "grad_norm": 0.10429253429174423, "learning_rate": 0.01, "loss": 1.9675, "step": 118467 }, { "epoch": 12.17324290998767, "grad_norm": 0.0488281287252903, "learning_rate": 0.01, "loss": 1.9354, "step": 118470 }, { "epoch": 12.173551171393342, "grad_norm": 0.08701854944229126, "learning_rate": 0.01, "loss": 1.9364, "step": 118473 }, { "epoch": 12.173859432799013, "grad_norm": 0.07329504191875458, "learning_rate": 0.01, "loss": 1.9268, "step": 118476 }, { "epoch": 12.174167694204685, "grad_norm": 0.11235913634300232, "learning_rate": 0.01, "loss": 1.9336, "step": 118479 }, { "epoch": 12.174475955610358, "grad_norm": 0.09730274975299835, "learning_rate": 0.01, "loss": 1.9253, "step": 118482 }, { "epoch": 12.174784217016029, "grad_norm": 0.06408539414405823, "learning_rate": 0.01, "loss": 1.9232, "step": 118485 }, { "epoch": 12.175092478421702, "grad_norm": 0.08930247277021408, "learning_rate": 0.01, "loss": 1.9591, "step": 118488 }, { "epoch": 12.175400739827374, "grad_norm": 0.07743005454540253, "learning_rate": 0.01, "loss": 1.9114, "step": 118491 }, { "epoch": 12.175709001233045, "grad_norm": 0.033463992178440094, "learning_rate": 0.01, "loss": 1.934, "step": 118494 }, { "epoch": 12.176017262638718, "grad_norm": 0.045047443360090256, "learning_rate": 0.01, "loss": 1.9271, "step": 118497 }, { "epoch": 12.17632552404439, "grad_norm": 0.08373583108186722, "learning_rate": 0.01, "loss": 1.9253, "step": 118500 }, { "epoch": 12.176633785450061, "grad_norm": 0.1224374994635582, "learning_rate": 0.01, "loss": 1.9779, "step": 118503 }, { "epoch": 12.176942046855734, "grad_norm": 0.05045723170042038, "learning_rate": 0.01, "loss": 1.9409, "step": 118506 }, { "epoch": 12.177250308261407, "grad_norm": 0.04794750362634659, "learning_rate": 0.01, "loss": 1.9451, "step": 118509 }, { "epoch": 12.177558569667077, "grad_norm": 0.08531951904296875, "learning_rate": 0.01, "loss": 1.9459, "step": 118512 }, { "epoch": 12.17786683107275, "grad_norm": 0.062374114990234375, "learning_rate": 0.01, "loss": 1.933, "step": 118515 }, { "epoch": 12.178175092478421, "grad_norm": 0.06713505834341049, "learning_rate": 0.01, "loss": 1.9529, "step": 118518 }, { "epoch": 12.178483353884094, "grad_norm": 0.11794169247150421, "learning_rate": 0.01, "loss": 1.9487, "step": 118521 }, { "epoch": 12.178791615289766, "grad_norm": 0.05226500704884529, "learning_rate": 0.01, "loss": 1.9632, "step": 118524 }, { "epoch": 12.179099876695437, "grad_norm": 0.0920417532324791, "learning_rate": 0.01, "loss": 1.9191, "step": 118527 }, { "epoch": 12.17940813810111, "grad_norm": 0.05339904874563217, "learning_rate": 0.01, "loss": 1.9631, "step": 118530 }, { "epoch": 12.179716399506782, "grad_norm": 0.08635959774255753, "learning_rate": 0.01, "loss": 1.9437, "step": 118533 }, { "epoch": 12.180024660912453, "grad_norm": 0.052172720432281494, "learning_rate": 0.01, "loss": 1.9243, "step": 118536 }, { "epoch": 12.180332922318126, "grad_norm": 0.04417841508984566, "learning_rate": 0.01, "loss": 1.9422, "step": 118539 }, { "epoch": 12.180641183723798, "grad_norm": 0.05313883721828461, "learning_rate": 0.01, "loss": 1.9281, "step": 118542 }, { "epoch": 12.18094944512947, "grad_norm": 0.10818638652563095, "learning_rate": 0.01, "loss": 1.9291, "step": 118545 }, { "epoch": 12.181257706535142, "grad_norm": 0.12224768847227097, "learning_rate": 0.01, "loss": 1.9168, "step": 118548 }, { "epoch": 12.181565967940815, "grad_norm": 0.08173675090074539, "learning_rate": 0.01, "loss": 1.9652, "step": 118551 }, { "epoch": 12.181874229346485, "grad_norm": 0.1057460829615593, "learning_rate": 0.01, "loss": 1.9265, "step": 118554 }, { "epoch": 12.182182490752158, "grad_norm": 0.10583849996328354, "learning_rate": 0.01, "loss": 1.929, "step": 118557 }, { "epoch": 12.182490752157829, "grad_norm": 0.04914642497897148, "learning_rate": 0.01, "loss": 1.9397, "step": 118560 }, { "epoch": 12.182799013563502, "grad_norm": 0.04464605823159218, "learning_rate": 0.01, "loss": 1.9434, "step": 118563 }, { "epoch": 12.183107274969174, "grad_norm": 0.0556897297501564, "learning_rate": 0.01, "loss": 1.9594, "step": 118566 }, { "epoch": 12.183415536374845, "grad_norm": 0.07571946829557419, "learning_rate": 0.01, "loss": 1.9529, "step": 118569 }, { "epoch": 12.183723797780518, "grad_norm": 0.12731902301311493, "learning_rate": 0.01, "loss": 1.9407, "step": 118572 }, { "epoch": 12.18403205918619, "grad_norm": 0.03924984857439995, "learning_rate": 0.01, "loss": 1.9309, "step": 118575 }, { "epoch": 12.184340320591861, "grad_norm": 0.07670131325721741, "learning_rate": 0.01, "loss": 1.9382, "step": 118578 }, { "epoch": 12.184648581997534, "grad_norm": 0.06920908391475677, "learning_rate": 0.01, "loss": 1.9109, "step": 118581 }, { "epoch": 12.184956843403207, "grad_norm": 0.08404618501663208, "learning_rate": 0.01, "loss": 1.9092, "step": 118584 }, { "epoch": 12.185265104808877, "grad_norm": 0.0868164449930191, "learning_rate": 0.01, "loss": 1.9333, "step": 118587 }, { "epoch": 12.18557336621455, "grad_norm": 0.080803282558918, "learning_rate": 0.01, "loss": 1.9304, "step": 118590 }, { "epoch": 12.185881627620223, "grad_norm": 0.09213325381278992, "learning_rate": 0.01, "loss": 1.9208, "step": 118593 }, { "epoch": 12.186189889025894, "grad_norm": 0.09278547763824463, "learning_rate": 0.01, "loss": 1.9425, "step": 118596 }, { "epoch": 12.186498150431566, "grad_norm": 0.05901975557208061, "learning_rate": 0.01, "loss": 1.9401, "step": 118599 }, { "epoch": 12.186806411837239, "grad_norm": 0.11545471101999283, "learning_rate": 0.01, "loss": 1.9483, "step": 118602 }, { "epoch": 12.18711467324291, "grad_norm": 0.1676173359155655, "learning_rate": 0.01, "loss": 1.927, "step": 118605 }, { "epoch": 12.187422934648582, "grad_norm": 0.09550116956233978, "learning_rate": 0.01, "loss": 1.9244, "step": 118608 }, { "epoch": 12.187731196054253, "grad_norm": 0.047323115170001984, "learning_rate": 0.01, "loss": 1.9317, "step": 118611 }, { "epoch": 12.188039457459926, "grad_norm": 0.047769296914339066, "learning_rate": 0.01, "loss": 1.9336, "step": 118614 }, { "epoch": 12.188347718865598, "grad_norm": 0.06573311239480972, "learning_rate": 0.01, "loss": 1.9296, "step": 118617 }, { "epoch": 12.18865598027127, "grad_norm": 0.05319448560476303, "learning_rate": 0.01, "loss": 1.9385, "step": 118620 }, { "epoch": 12.188964241676942, "grad_norm": 0.046964943408966064, "learning_rate": 0.01, "loss": 1.941, "step": 118623 }, { "epoch": 12.189272503082615, "grad_norm": 0.053592875599861145, "learning_rate": 0.01, "loss": 1.9279, "step": 118626 }, { "epoch": 12.189580764488285, "grad_norm": 0.1196940466761589, "learning_rate": 0.01, "loss": 1.9327, "step": 118629 }, { "epoch": 12.189889025893958, "grad_norm": 0.044338516891002655, "learning_rate": 0.01, "loss": 1.939, "step": 118632 }, { "epoch": 12.19019728729963, "grad_norm": 0.10683481395244598, "learning_rate": 0.01, "loss": 1.9491, "step": 118635 }, { "epoch": 12.190505548705302, "grad_norm": 0.03649195283651352, "learning_rate": 0.01, "loss": 1.9157, "step": 118638 }, { "epoch": 12.190813810110974, "grad_norm": 0.10152381658554077, "learning_rate": 0.01, "loss": 1.9188, "step": 118641 }, { "epoch": 12.191122071516647, "grad_norm": 0.06696424633264542, "learning_rate": 0.01, "loss": 1.9516, "step": 118644 }, { "epoch": 12.191430332922318, "grad_norm": 0.126417338848114, "learning_rate": 0.01, "loss": 1.9469, "step": 118647 }, { "epoch": 12.19173859432799, "grad_norm": 0.0509948693215847, "learning_rate": 0.01, "loss": 1.935, "step": 118650 }, { "epoch": 12.192046855733663, "grad_norm": 0.06043535843491554, "learning_rate": 0.01, "loss": 1.9307, "step": 118653 }, { "epoch": 12.192355117139334, "grad_norm": 0.05188688635826111, "learning_rate": 0.01, "loss": 1.9267, "step": 118656 }, { "epoch": 12.192663378545006, "grad_norm": 0.07013826817274094, "learning_rate": 0.01, "loss": 1.9458, "step": 118659 }, { "epoch": 12.192971639950677, "grad_norm": 0.08205080032348633, "learning_rate": 0.01, "loss": 1.9662, "step": 118662 }, { "epoch": 12.19327990135635, "grad_norm": 0.11694313585758209, "learning_rate": 0.01, "loss": 1.9412, "step": 118665 }, { "epoch": 12.193588162762023, "grad_norm": 0.06345352530479431, "learning_rate": 0.01, "loss": 1.9447, "step": 118668 }, { "epoch": 12.193896424167693, "grad_norm": 0.11872132122516632, "learning_rate": 0.01, "loss": 1.9514, "step": 118671 }, { "epoch": 12.194204685573366, "grad_norm": 0.10739465802907944, "learning_rate": 0.01, "loss": 1.9331, "step": 118674 }, { "epoch": 12.194512946979039, "grad_norm": 0.06278955191373825, "learning_rate": 0.01, "loss": 1.9433, "step": 118677 }, { "epoch": 12.19482120838471, "grad_norm": 0.07011450082063675, "learning_rate": 0.01, "loss": 1.9199, "step": 118680 }, { "epoch": 12.195129469790382, "grad_norm": 0.1012188121676445, "learning_rate": 0.01, "loss": 1.9285, "step": 118683 }, { "epoch": 12.195437731196055, "grad_norm": 0.07303464412689209, "learning_rate": 0.01, "loss": 1.9307, "step": 118686 }, { "epoch": 12.195745992601726, "grad_norm": 0.059921663254499435, "learning_rate": 0.01, "loss": 1.9251, "step": 118689 }, { "epoch": 12.196054254007398, "grad_norm": 0.16199304163455963, "learning_rate": 0.01, "loss": 1.9373, "step": 118692 }, { "epoch": 12.196362515413071, "grad_norm": 0.13744542002677917, "learning_rate": 0.01, "loss": 1.9195, "step": 118695 }, { "epoch": 12.196670776818742, "grad_norm": 0.07422071695327759, "learning_rate": 0.01, "loss": 1.9259, "step": 118698 }, { "epoch": 12.196979038224415, "grad_norm": 0.06524713337421417, "learning_rate": 0.01, "loss": 1.9392, "step": 118701 }, { "epoch": 12.197287299630087, "grad_norm": 0.04250166192650795, "learning_rate": 0.01, "loss": 1.9354, "step": 118704 }, { "epoch": 12.197595561035758, "grad_norm": 0.04675396904349327, "learning_rate": 0.01, "loss": 1.9398, "step": 118707 }, { "epoch": 12.19790382244143, "grad_norm": 0.0890340656042099, "learning_rate": 0.01, "loss": 1.9361, "step": 118710 }, { "epoch": 12.198212083847102, "grad_norm": 0.04863935336470604, "learning_rate": 0.01, "loss": 1.9345, "step": 118713 }, { "epoch": 12.198520345252774, "grad_norm": 0.04004283621907234, "learning_rate": 0.01, "loss": 1.9568, "step": 118716 }, { "epoch": 12.198828606658447, "grad_norm": 0.057114001363515854, "learning_rate": 0.01, "loss": 1.9344, "step": 118719 }, { "epoch": 12.199136868064118, "grad_norm": 0.04745546355843544, "learning_rate": 0.01, "loss": 1.9375, "step": 118722 }, { "epoch": 12.19944512946979, "grad_norm": 0.053959012031555176, "learning_rate": 0.01, "loss": 1.9457, "step": 118725 }, { "epoch": 12.199753390875463, "grad_norm": 0.03172319754958153, "learning_rate": 0.01, "loss": 1.9401, "step": 118728 }, { "epoch": 12.200061652281134, "grad_norm": 0.14972840249538422, "learning_rate": 0.01, "loss": 1.9499, "step": 118731 }, { "epoch": 12.200369913686806, "grad_norm": 0.09837561845779419, "learning_rate": 0.01, "loss": 1.9511, "step": 118734 }, { "epoch": 12.200678175092479, "grad_norm": 0.05351947620511055, "learning_rate": 0.01, "loss": 1.948, "step": 118737 }, { "epoch": 12.20098643649815, "grad_norm": 0.06797778606414795, "learning_rate": 0.01, "loss": 1.9526, "step": 118740 }, { "epoch": 12.201294697903823, "grad_norm": 0.04624923691153526, "learning_rate": 0.01, "loss": 1.9305, "step": 118743 }, { "epoch": 12.201602959309495, "grad_norm": 0.052814073860645294, "learning_rate": 0.01, "loss": 1.945, "step": 118746 }, { "epoch": 12.201911220715166, "grad_norm": 0.06401890516281128, "learning_rate": 0.01, "loss": 1.9253, "step": 118749 }, { "epoch": 12.202219482120839, "grad_norm": 0.07207990437746048, "learning_rate": 0.01, "loss": 1.9503, "step": 118752 }, { "epoch": 12.20252774352651, "grad_norm": 0.04342376068234444, "learning_rate": 0.01, "loss": 1.946, "step": 118755 }, { "epoch": 12.202836004932182, "grad_norm": 0.03639165684580803, "learning_rate": 0.01, "loss": 1.9278, "step": 118758 }, { "epoch": 12.203144266337855, "grad_norm": 0.04167010635137558, "learning_rate": 0.01, "loss": 1.9504, "step": 118761 }, { "epoch": 12.203452527743526, "grad_norm": 0.035900626331567764, "learning_rate": 0.01, "loss": 1.9168, "step": 118764 }, { "epoch": 12.203760789149198, "grad_norm": 0.044128745794296265, "learning_rate": 0.01, "loss": 1.9426, "step": 118767 }, { "epoch": 12.204069050554871, "grad_norm": 0.14358042180538177, "learning_rate": 0.01, "loss": 1.9451, "step": 118770 }, { "epoch": 12.204377311960542, "grad_norm": 0.050120823085308075, "learning_rate": 0.01, "loss": 1.9339, "step": 118773 }, { "epoch": 12.204685573366215, "grad_norm": 0.06396164000034332, "learning_rate": 0.01, "loss": 1.9368, "step": 118776 }, { "epoch": 12.204993834771887, "grad_norm": 0.09677011519670486, "learning_rate": 0.01, "loss": 1.9423, "step": 118779 }, { "epoch": 12.205302096177558, "grad_norm": 0.04677804559469223, "learning_rate": 0.01, "loss": 1.9383, "step": 118782 }, { "epoch": 12.20561035758323, "grad_norm": 0.1319218873977661, "learning_rate": 0.01, "loss": 1.9174, "step": 118785 }, { "epoch": 12.205918618988903, "grad_norm": 0.14024706184864044, "learning_rate": 0.01, "loss": 1.9542, "step": 118788 }, { "epoch": 12.206226880394574, "grad_norm": 0.10679846256971359, "learning_rate": 0.01, "loss": 1.9197, "step": 118791 }, { "epoch": 12.206535141800247, "grad_norm": 0.07363414019346237, "learning_rate": 0.01, "loss": 1.9789, "step": 118794 }, { "epoch": 12.20684340320592, "grad_norm": 0.0446414016187191, "learning_rate": 0.01, "loss": 1.9446, "step": 118797 }, { "epoch": 12.20715166461159, "grad_norm": 0.038796547800302505, "learning_rate": 0.01, "loss": 1.9456, "step": 118800 }, { "epoch": 12.207459926017263, "grad_norm": 0.09463851153850555, "learning_rate": 0.01, "loss": 1.9339, "step": 118803 }, { "epoch": 12.207768187422934, "grad_norm": 0.045280370861291885, "learning_rate": 0.01, "loss": 1.9247, "step": 118806 }, { "epoch": 12.208076448828606, "grad_norm": 0.06241309642791748, "learning_rate": 0.01, "loss": 1.9433, "step": 118809 }, { "epoch": 12.208384710234279, "grad_norm": 0.06056646630167961, "learning_rate": 0.01, "loss": 1.9448, "step": 118812 }, { "epoch": 12.20869297163995, "grad_norm": 0.043018437922000885, "learning_rate": 0.01, "loss": 1.9588, "step": 118815 }, { "epoch": 12.209001233045623, "grad_norm": 0.044745851308107376, "learning_rate": 0.01, "loss": 1.9385, "step": 118818 }, { "epoch": 12.209309494451295, "grad_norm": 0.04173322394490242, "learning_rate": 0.01, "loss": 1.9139, "step": 118821 }, { "epoch": 12.209617755856966, "grad_norm": 0.03966313600540161, "learning_rate": 0.01, "loss": 1.9189, "step": 118824 }, { "epoch": 12.209926017262639, "grad_norm": 0.06935320049524307, "learning_rate": 0.01, "loss": 1.9425, "step": 118827 }, { "epoch": 12.210234278668311, "grad_norm": 0.08756303042173386, "learning_rate": 0.01, "loss": 1.9447, "step": 118830 }, { "epoch": 12.210542540073982, "grad_norm": 0.14346668124198914, "learning_rate": 0.01, "loss": 1.9329, "step": 118833 }, { "epoch": 12.210850801479655, "grad_norm": 0.07660556584596634, "learning_rate": 0.01, "loss": 1.9269, "step": 118836 }, { "epoch": 12.211159062885327, "grad_norm": 0.06557713449001312, "learning_rate": 0.01, "loss": 1.9305, "step": 118839 }, { "epoch": 12.211467324290998, "grad_norm": 0.05898251757025719, "learning_rate": 0.01, "loss": 1.9219, "step": 118842 }, { "epoch": 12.211775585696671, "grad_norm": 0.08062247931957245, "learning_rate": 0.01, "loss": 1.9225, "step": 118845 }, { "epoch": 12.212083847102344, "grad_norm": 0.06156390532851219, "learning_rate": 0.01, "loss": 1.9318, "step": 118848 }, { "epoch": 12.212392108508014, "grad_norm": 0.11440441757440567, "learning_rate": 0.01, "loss": 1.9514, "step": 118851 }, { "epoch": 12.212700369913687, "grad_norm": 0.09022848308086395, "learning_rate": 0.01, "loss": 1.9471, "step": 118854 }, { "epoch": 12.213008631319358, "grad_norm": 0.06540698558092117, "learning_rate": 0.01, "loss": 1.9381, "step": 118857 }, { "epoch": 12.21331689272503, "grad_norm": 0.13931789994239807, "learning_rate": 0.01, "loss": 1.9292, "step": 118860 }, { "epoch": 12.213625154130703, "grad_norm": 0.0924806147813797, "learning_rate": 0.01, "loss": 1.9242, "step": 118863 }, { "epoch": 12.213933415536374, "grad_norm": 0.07151935249567032, "learning_rate": 0.01, "loss": 1.9352, "step": 118866 }, { "epoch": 12.214241676942047, "grad_norm": 0.05269354209303856, "learning_rate": 0.01, "loss": 1.9279, "step": 118869 }, { "epoch": 12.21454993834772, "grad_norm": 0.12896299362182617, "learning_rate": 0.01, "loss": 1.8919, "step": 118872 }, { "epoch": 12.21485819975339, "grad_norm": 0.03468026965856552, "learning_rate": 0.01, "loss": 1.9211, "step": 118875 }, { "epoch": 12.215166461159063, "grad_norm": 0.0727490782737732, "learning_rate": 0.01, "loss": 1.9401, "step": 118878 }, { "epoch": 12.215474722564736, "grad_norm": 0.13106340169906616, "learning_rate": 0.01, "loss": 1.9549, "step": 118881 }, { "epoch": 12.215782983970406, "grad_norm": 0.09597338736057281, "learning_rate": 0.01, "loss": 1.9395, "step": 118884 }, { "epoch": 12.216091245376079, "grad_norm": 0.06441767513751984, "learning_rate": 0.01, "loss": 1.946, "step": 118887 }, { "epoch": 12.216399506781752, "grad_norm": 0.04363396391272545, "learning_rate": 0.01, "loss": 1.9509, "step": 118890 }, { "epoch": 12.216707768187423, "grad_norm": 0.050265636295080185, "learning_rate": 0.01, "loss": 1.9244, "step": 118893 }, { "epoch": 12.217016029593095, "grad_norm": 0.03304623067378998, "learning_rate": 0.01, "loss": 1.9426, "step": 118896 }, { "epoch": 12.217324290998768, "grad_norm": 0.08241475373506546, "learning_rate": 0.01, "loss": 1.9312, "step": 118899 }, { "epoch": 12.217632552404439, "grad_norm": 0.037594906985759735, "learning_rate": 0.01, "loss": 1.9375, "step": 118902 }, { "epoch": 12.217940813810111, "grad_norm": 0.04278671368956566, "learning_rate": 0.01, "loss": 1.9295, "step": 118905 }, { "epoch": 12.218249075215782, "grad_norm": 0.037733662873506546, "learning_rate": 0.01, "loss": 1.9347, "step": 118908 }, { "epoch": 12.218557336621455, "grad_norm": 0.03884994983673096, "learning_rate": 0.01, "loss": 1.9301, "step": 118911 }, { "epoch": 12.218865598027127, "grad_norm": 0.056603990495204926, "learning_rate": 0.01, "loss": 1.9127, "step": 118914 }, { "epoch": 12.219173859432798, "grad_norm": 0.16166208684444427, "learning_rate": 0.01, "loss": 1.9391, "step": 118917 }, { "epoch": 12.219482120838471, "grad_norm": 0.09817244112491608, "learning_rate": 0.01, "loss": 1.9326, "step": 118920 }, { "epoch": 12.219790382244144, "grad_norm": 0.06344011425971985, "learning_rate": 0.01, "loss": 1.9345, "step": 118923 }, { "epoch": 12.220098643649814, "grad_norm": 0.04339836910367012, "learning_rate": 0.01, "loss": 1.9173, "step": 118926 }, { "epoch": 12.220406905055487, "grad_norm": 0.03249096870422363, "learning_rate": 0.01, "loss": 1.9437, "step": 118929 }, { "epoch": 12.22071516646116, "grad_norm": 0.03361337259411812, "learning_rate": 0.01, "loss": 1.9455, "step": 118932 }, { "epoch": 12.22102342786683, "grad_norm": 0.052787475287914276, "learning_rate": 0.01, "loss": 1.9386, "step": 118935 }, { "epoch": 12.221331689272503, "grad_norm": 0.0435674712061882, "learning_rate": 0.01, "loss": 1.9319, "step": 118938 }, { "epoch": 12.221639950678176, "grad_norm": 0.045802339911460876, "learning_rate": 0.01, "loss": 1.9186, "step": 118941 }, { "epoch": 12.221948212083847, "grad_norm": 0.06979796290397644, "learning_rate": 0.01, "loss": 1.956, "step": 118944 }, { "epoch": 12.22225647348952, "grad_norm": 0.08848527073860168, "learning_rate": 0.01, "loss": 1.9129, "step": 118947 }, { "epoch": 12.22256473489519, "grad_norm": 0.055262014269828796, "learning_rate": 0.01, "loss": 1.9328, "step": 118950 }, { "epoch": 12.222872996300863, "grad_norm": 0.1241009458899498, "learning_rate": 0.01, "loss": 1.9572, "step": 118953 }, { "epoch": 12.223181257706536, "grad_norm": 0.12001396715641022, "learning_rate": 0.01, "loss": 1.9261, "step": 118956 }, { "epoch": 12.223489519112206, "grad_norm": 0.04739031195640564, "learning_rate": 0.01, "loss": 1.9564, "step": 118959 }, { "epoch": 12.223797780517879, "grad_norm": 0.05092400684952736, "learning_rate": 0.01, "loss": 1.9254, "step": 118962 }, { "epoch": 12.224106041923552, "grad_norm": 0.05298929288983345, "learning_rate": 0.01, "loss": 1.9441, "step": 118965 }, { "epoch": 12.224414303329223, "grad_norm": 0.06079339236021042, "learning_rate": 0.01, "loss": 1.9545, "step": 118968 }, { "epoch": 12.224722564734895, "grad_norm": 0.04320069029927254, "learning_rate": 0.01, "loss": 1.9302, "step": 118971 }, { "epoch": 12.225030826140568, "grad_norm": 0.07928624749183655, "learning_rate": 0.01, "loss": 1.9393, "step": 118974 }, { "epoch": 12.225339087546239, "grad_norm": 0.13101297616958618, "learning_rate": 0.01, "loss": 1.9644, "step": 118977 }, { "epoch": 12.225647348951911, "grad_norm": 0.049566250294446945, "learning_rate": 0.01, "loss": 1.9238, "step": 118980 }, { "epoch": 12.225955610357584, "grad_norm": 0.0518098846077919, "learning_rate": 0.01, "loss": 1.9525, "step": 118983 }, { "epoch": 12.226263871763255, "grad_norm": 0.035460662096738815, "learning_rate": 0.01, "loss": 1.9724, "step": 118986 }, { "epoch": 12.226572133168927, "grad_norm": 0.1427997648715973, "learning_rate": 0.01, "loss": 1.9321, "step": 118989 }, { "epoch": 12.2268803945746, "grad_norm": 0.03751193732023239, "learning_rate": 0.01, "loss": 1.9456, "step": 118992 }, { "epoch": 12.227188655980271, "grad_norm": 0.04313329607248306, "learning_rate": 0.01, "loss": 1.9421, "step": 118995 }, { "epoch": 12.227496917385944, "grad_norm": 0.05610892176628113, "learning_rate": 0.01, "loss": 1.9573, "step": 118998 }, { "epoch": 12.227805178791614, "grad_norm": 0.036053575575351715, "learning_rate": 0.01, "loss": 1.9307, "step": 119001 }, { "epoch": 12.228113440197287, "grad_norm": 0.06000903621315956, "learning_rate": 0.01, "loss": 1.93, "step": 119004 }, { "epoch": 12.22842170160296, "grad_norm": 0.1441926658153534, "learning_rate": 0.01, "loss": 1.9476, "step": 119007 }, { "epoch": 12.22872996300863, "grad_norm": 0.05757873132824898, "learning_rate": 0.01, "loss": 1.9265, "step": 119010 }, { "epoch": 12.229038224414303, "grad_norm": 0.039227813482284546, "learning_rate": 0.01, "loss": 1.9418, "step": 119013 }, { "epoch": 12.229346485819976, "grad_norm": 0.06010231003165245, "learning_rate": 0.01, "loss": 1.9251, "step": 119016 }, { "epoch": 12.229654747225647, "grad_norm": 0.09603168815374374, "learning_rate": 0.01, "loss": 1.9478, "step": 119019 }, { "epoch": 12.22996300863132, "grad_norm": 0.04553908482193947, "learning_rate": 0.01, "loss": 1.931, "step": 119022 }, { "epoch": 12.230271270036992, "grad_norm": 0.038257114589214325, "learning_rate": 0.01, "loss": 1.9542, "step": 119025 }, { "epoch": 12.230579531442663, "grad_norm": 0.07678436487913132, "learning_rate": 0.01, "loss": 1.9478, "step": 119028 }, { "epoch": 12.230887792848335, "grad_norm": 0.11499672383069992, "learning_rate": 0.01, "loss": 1.9446, "step": 119031 }, { "epoch": 12.231196054254008, "grad_norm": 0.04072137549519539, "learning_rate": 0.01, "loss": 1.9161, "step": 119034 }, { "epoch": 12.231504315659679, "grad_norm": 0.09349635243415833, "learning_rate": 0.01, "loss": 1.9103, "step": 119037 }, { "epoch": 12.231812577065352, "grad_norm": 0.08275580406188965, "learning_rate": 0.01, "loss": 1.9231, "step": 119040 }, { "epoch": 12.232120838471024, "grad_norm": 0.046482153236866, "learning_rate": 0.01, "loss": 1.9509, "step": 119043 }, { "epoch": 12.232429099876695, "grad_norm": 0.06730115413665771, "learning_rate": 0.01, "loss": 1.9461, "step": 119046 }, { "epoch": 12.232737361282368, "grad_norm": 0.07506335526704788, "learning_rate": 0.01, "loss": 1.9388, "step": 119049 }, { "epoch": 12.233045622688039, "grad_norm": 0.07089993357658386, "learning_rate": 0.01, "loss": 1.9418, "step": 119052 }, { "epoch": 12.233353884093711, "grad_norm": 0.0345221571624279, "learning_rate": 0.01, "loss": 1.9382, "step": 119055 }, { "epoch": 12.233662145499384, "grad_norm": 0.11468834429979324, "learning_rate": 0.01, "loss": 1.9393, "step": 119058 }, { "epoch": 12.233970406905055, "grad_norm": 0.1281205713748932, "learning_rate": 0.01, "loss": 1.9396, "step": 119061 }, { "epoch": 12.234278668310727, "grad_norm": 0.054583653807640076, "learning_rate": 0.01, "loss": 1.9465, "step": 119064 }, { "epoch": 12.2345869297164, "grad_norm": 0.05678866058588028, "learning_rate": 0.01, "loss": 1.9406, "step": 119067 }, { "epoch": 12.234895191122071, "grad_norm": 0.04765111580491066, "learning_rate": 0.01, "loss": 1.9192, "step": 119070 }, { "epoch": 12.235203452527744, "grad_norm": 0.04203588515520096, "learning_rate": 0.01, "loss": 1.9174, "step": 119073 }, { "epoch": 12.235511713933416, "grad_norm": 0.11908500641584396, "learning_rate": 0.01, "loss": 1.9457, "step": 119076 }, { "epoch": 12.235819975339087, "grad_norm": 0.09316672384738922, "learning_rate": 0.01, "loss": 1.9364, "step": 119079 }, { "epoch": 12.23612823674476, "grad_norm": 0.10216987133026123, "learning_rate": 0.01, "loss": 1.9378, "step": 119082 }, { "epoch": 12.236436498150432, "grad_norm": 0.11083834618330002, "learning_rate": 0.01, "loss": 1.9259, "step": 119085 }, { "epoch": 12.236744759556103, "grad_norm": 0.06925109773874283, "learning_rate": 0.01, "loss": 1.9185, "step": 119088 }, { "epoch": 12.237053020961776, "grad_norm": 0.0509336031973362, "learning_rate": 0.01, "loss": 1.9108, "step": 119091 }, { "epoch": 12.237361282367448, "grad_norm": 0.038560494780540466, "learning_rate": 0.01, "loss": 1.924, "step": 119094 }, { "epoch": 12.23766954377312, "grad_norm": 0.07917262613773346, "learning_rate": 0.01, "loss": 1.9451, "step": 119097 }, { "epoch": 12.237977805178792, "grad_norm": 0.06403060257434845, "learning_rate": 0.01, "loss": 1.9464, "step": 119100 }, { "epoch": 12.238286066584463, "grad_norm": 0.0364985316991806, "learning_rate": 0.01, "loss": 1.9051, "step": 119103 }, { "epoch": 12.238594327990135, "grad_norm": 0.04724942147731781, "learning_rate": 0.01, "loss": 1.9258, "step": 119106 }, { "epoch": 12.238902589395808, "grad_norm": 0.12185391783714294, "learning_rate": 0.01, "loss": 1.928, "step": 119109 }, { "epoch": 12.239210850801479, "grad_norm": 0.06551426649093628, "learning_rate": 0.01, "loss": 1.916, "step": 119112 }, { "epoch": 12.239519112207152, "grad_norm": 0.04911579191684723, "learning_rate": 0.01, "loss": 1.9348, "step": 119115 }, { "epoch": 12.239827373612824, "grad_norm": 0.04070357233285904, "learning_rate": 0.01, "loss": 1.9471, "step": 119118 }, { "epoch": 12.240135635018495, "grad_norm": 0.14026470482349396, "learning_rate": 0.01, "loss": 1.9611, "step": 119121 }, { "epoch": 12.240443896424168, "grad_norm": 0.07128757983446121, "learning_rate": 0.01, "loss": 1.9501, "step": 119124 }, { "epoch": 12.24075215782984, "grad_norm": 0.062092967331409454, "learning_rate": 0.01, "loss": 1.939, "step": 119127 }, { "epoch": 12.241060419235511, "grad_norm": 0.06296969205141068, "learning_rate": 0.01, "loss": 1.9218, "step": 119130 }, { "epoch": 12.241368680641184, "grad_norm": 0.09423550218343735, "learning_rate": 0.01, "loss": 1.927, "step": 119133 }, { "epoch": 12.241676942046857, "grad_norm": 0.05814823508262634, "learning_rate": 0.01, "loss": 1.9378, "step": 119136 }, { "epoch": 12.241985203452527, "grad_norm": 0.05670604109764099, "learning_rate": 0.01, "loss": 1.9467, "step": 119139 }, { "epoch": 12.2422934648582, "grad_norm": 0.041170332580804825, "learning_rate": 0.01, "loss": 1.9322, "step": 119142 }, { "epoch": 12.24260172626387, "grad_norm": 0.05782140791416168, "learning_rate": 0.01, "loss": 1.9551, "step": 119145 }, { "epoch": 12.242909987669544, "grad_norm": 0.04377881810069084, "learning_rate": 0.01, "loss": 1.9441, "step": 119148 }, { "epoch": 12.243218249075216, "grad_norm": 0.03731423243880272, "learning_rate": 0.01, "loss": 1.9583, "step": 119151 }, { "epoch": 12.243526510480887, "grad_norm": 0.08947639167308807, "learning_rate": 0.01, "loss": 1.9028, "step": 119154 }, { "epoch": 12.24383477188656, "grad_norm": 0.09892389178276062, "learning_rate": 0.01, "loss": 1.9343, "step": 119157 }, { "epoch": 12.244143033292232, "grad_norm": 0.13070419430732727, "learning_rate": 0.01, "loss": 1.9306, "step": 119160 }, { "epoch": 12.244451294697903, "grad_norm": 0.05224129930138588, "learning_rate": 0.01, "loss": 1.9376, "step": 119163 }, { "epoch": 12.244759556103576, "grad_norm": 0.06416375190019608, "learning_rate": 0.01, "loss": 1.9309, "step": 119166 }, { "epoch": 12.245067817509248, "grad_norm": 0.03321628272533417, "learning_rate": 0.01, "loss": 1.9531, "step": 119169 }, { "epoch": 12.24537607891492, "grad_norm": 0.041123420000076294, "learning_rate": 0.01, "loss": 1.9439, "step": 119172 }, { "epoch": 12.245684340320592, "grad_norm": 0.09715761244297028, "learning_rate": 0.01, "loss": 1.8918, "step": 119175 }, { "epoch": 12.245992601726265, "grad_norm": 0.05635785683989525, "learning_rate": 0.01, "loss": 1.9262, "step": 119178 }, { "epoch": 12.246300863131935, "grad_norm": 0.08281838148832321, "learning_rate": 0.01, "loss": 1.9023, "step": 119181 }, { "epoch": 12.246609124537608, "grad_norm": 0.06842327117919922, "learning_rate": 0.01, "loss": 1.944, "step": 119184 }, { "epoch": 12.24691738594328, "grad_norm": 0.044265225529670715, "learning_rate": 0.01, "loss": 1.9372, "step": 119187 }, { "epoch": 12.247225647348952, "grad_norm": 0.045708850026130676, "learning_rate": 0.01, "loss": 1.924, "step": 119190 }, { "epoch": 12.247533908754624, "grad_norm": 0.05862099304795265, "learning_rate": 0.01, "loss": 1.9382, "step": 119193 }, { "epoch": 12.247842170160295, "grad_norm": 0.0953894555568695, "learning_rate": 0.01, "loss": 1.9396, "step": 119196 }, { "epoch": 12.248150431565968, "grad_norm": 0.09879028797149658, "learning_rate": 0.01, "loss": 1.9393, "step": 119199 }, { "epoch": 12.24845869297164, "grad_norm": 0.060379255563020706, "learning_rate": 0.01, "loss": 1.9432, "step": 119202 }, { "epoch": 12.248766954377311, "grad_norm": 0.04327157512307167, "learning_rate": 0.01, "loss": 1.9293, "step": 119205 }, { "epoch": 12.249075215782984, "grad_norm": 0.04136524721980095, "learning_rate": 0.01, "loss": 1.95, "step": 119208 }, { "epoch": 12.249383477188656, "grad_norm": 0.04830603674054146, "learning_rate": 0.01, "loss": 1.9598, "step": 119211 }, { "epoch": 12.249691738594327, "grad_norm": 0.07206027954816818, "learning_rate": 0.01, "loss": 1.9116, "step": 119214 }, { "epoch": 12.25, "grad_norm": 0.11831352114677429, "learning_rate": 0.01, "loss": 1.9288, "step": 119217 }, { "epoch": 12.250308261405673, "grad_norm": 0.09627742320299149, "learning_rate": 0.01, "loss": 1.9288, "step": 119220 }, { "epoch": 12.250616522811344, "grad_norm": 0.09025995433330536, "learning_rate": 0.01, "loss": 1.9386, "step": 119223 }, { "epoch": 12.250924784217016, "grad_norm": 0.041941117495298386, "learning_rate": 0.01, "loss": 1.9537, "step": 119226 }, { "epoch": 12.251233045622689, "grad_norm": 0.039908573031425476, "learning_rate": 0.01, "loss": 1.947, "step": 119229 }, { "epoch": 12.25154130702836, "grad_norm": 0.11591826379299164, "learning_rate": 0.01, "loss": 1.9313, "step": 119232 }, { "epoch": 12.251849568434032, "grad_norm": 0.03896136209368706, "learning_rate": 0.01, "loss": 1.937, "step": 119235 }, { "epoch": 12.252157829839705, "grad_norm": 0.05069341883063316, "learning_rate": 0.01, "loss": 1.9459, "step": 119238 }, { "epoch": 12.252466091245376, "grad_norm": 0.04808599501848221, "learning_rate": 0.01, "loss": 1.9542, "step": 119241 }, { "epoch": 12.252774352651048, "grad_norm": 0.03207084536552429, "learning_rate": 0.01, "loss": 1.9362, "step": 119244 }, { "epoch": 12.25308261405672, "grad_norm": 0.050113700330257416, "learning_rate": 0.01, "loss": 1.9183, "step": 119247 }, { "epoch": 12.253390875462392, "grad_norm": 0.0495215505361557, "learning_rate": 0.01, "loss": 1.9177, "step": 119250 }, { "epoch": 12.253699136868065, "grad_norm": 0.12017959356307983, "learning_rate": 0.01, "loss": 1.93, "step": 119253 }, { "epoch": 12.254007398273735, "grad_norm": 0.07121411710977554, "learning_rate": 0.01, "loss": 1.9355, "step": 119256 }, { "epoch": 12.254315659679408, "grad_norm": 0.11534114927053452, "learning_rate": 0.01, "loss": 1.9379, "step": 119259 }, { "epoch": 12.25462392108508, "grad_norm": 0.05048522725701332, "learning_rate": 0.01, "loss": 1.9167, "step": 119262 }, { "epoch": 12.254932182490752, "grad_norm": 0.03871830925345421, "learning_rate": 0.01, "loss": 1.9672, "step": 119265 }, { "epoch": 12.255240443896424, "grad_norm": 0.04501257464289665, "learning_rate": 0.01, "loss": 1.9175, "step": 119268 }, { "epoch": 12.255548705302097, "grad_norm": 0.03589480742812157, "learning_rate": 0.01, "loss": 1.9239, "step": 119271 }, { "epoch": 12.255856966707768, "grad_norm": 0.06698665767908096, "learning_rate": 0.01, "loss": 1.9395, "step": 119274 }, { "epoch": 12.25616522811344, "grad_norm": 0.13130944967269897, "learning_rate": 0.01, "loss": 1.9326, "step": 119277 }, { "epoch": 12.256473489519113, "grad_norm": 0.10875599831342697, "learning_rate": 0.01, "loss": 1.9599, "step": 119280 }, { "epoch": 12.256781750924784, "grad_norm": 0.06583302468061447, "learning_rate": 0.01, "loss": 1.9405, "step": 119283 }, { "epoch": 12.257090012330456, "grad_norm": 0.05685946345329285, "learning_rate": 0.01, "loss": 1.9442, "step": 119286 }, { "epoch": 12.25739827373613, "grad_norm": 0.10563762485980988, "learning_rate": 0.01, "loss": 1.9583, "step": 119289 }, { "epoch": 12.2577065351418, "grad_norm": 0.07009796798229218, "learning_rate": 0.01, "loss": 1.962, "step": 119292 }, { "epoch": 12.258014796547473, "grad_norm": 0.03915143758058548, "learning_rate": 0.01, "loss": 1.9362, "step": 119295 }, { "epoch": 12.258323057953143, "grad_norm": 0.03810437023639679, "learning_rate": 0.01, "loss": 1.9454, "step": 119298 }, { "epoch": 12.258631319358816, "grad_norm": 0.037026748061180115, "learning_rate": 0.01, "loss": 1.9429, "step": 119301 }, { "epoch": 12.258939580764489, "grad_norm": 0.12063061445951462, "learning_rate": 0.01, "loss": 1.9279, "step": 119304 }, { "epoch": 12.25924784217016, "grad_norm": 0.04207170382142067, "learning_rate": 0.01, "loss": 1.9168, "step": 119307 }, { "epoch": 12.259556103575832, "grad_norm": 0.09984170645475388, "learning_rate": 0.01, "loss": 1.937, "step": 119310 }, { "epoch": 12.259864364981505, "grad_norm": 0.042393285781145096, "learning_rate": 0.01, "loss": 1.9166, "step": 119313 }, { "epoch": 12.260172626387176, "grad_norm": 0.06870042532682419, "learning_rate": 0.01, "loss": 1.9478, "step": 119316 }, { "epoch": 12.260480887792848, "grad_norm": 0.042919475585222244, "learning_rate": 0.01, "loss": 1.9023, "step": 119319 }, { "epoch": 12.260789149198521, "grad_norm": 0.03628470376133919, "learning_rate": 0.01, "loss": 1.9278, "step": 119322 }, { "epoch": 12.261097410604192, "grad_norm": 0.1155962273478508, "learning_rate": 0.01, "loss": 1.9275, "step": 119325 }, { "epoch": 12.261405672009865, "grad_norm": 0.05997050553560257, "learning_rate": 0.01, "loss": 1.9343, "step": 119328 }, { "epoch": 12.261713933415537, "grad_norm": 0.058512236922979355, "learning_rate": 0.01, "loss": 1.964, "step": 119331 }, { "epoch": 12.262022194821208, "grad_norm": 0.04778609424829483, "learning_rate": 0.01, "loss": 1.9312, "step": 119334 }, { "epoch": 12.26233045622688, "grad_norm": 0.09373581409454346, "learning_rate": 0.01, "loss": 1.9526, "step": 119337 }, { "epoch": 12.262638717632552, "grad_norm": 0.06936026364564896, "learning_rate": 0.01, "loss": 1.9411, "step": 119340 }, { "epoch": 12.262946979038224, "grad_norm": 0.12991760671138763, "learning_rate": 0.01, "loss": 1.9325, "step": 119343 }, { "epoch": 12.263255240443897, "grad_norm": 0.08365713804960251, "learning_rate": 0.01, "loss": 1.935, "step": 119346 }, { "epoch": 12.263563501849568, "grad_norm": 0.08354364335536957, "learning_rate": 0.01, "loss": 1.9241, "step": 119349 }, { "epoch": 12.26387176325524, "grad_norm": 0.053151343017816544, "learning_rate": 0.01, "loss": 1.9523, "step": 119352 }, { "epoch": 12.264180024660913, "grad_norm": 0.05753325670957565, "learning_rate": 0.01, "loss": 1.9254, "step": 119355 }, { "epoch": 12.264488286066584, "grad_norm": 0.07821407914161682, "learning_rate": 0.01, "loss": 1.9574, "step": 119358 }, { "epoch": 12.264796547472256, "grad_norm": 0.0947427898645401, "learning_rate": 0.01, "loss": 1.9559, "step": 119361 }, { "epoch": 12.265104808877929, "grad_norm": 0.048926010727882385, "learning_rate": 0.01, "loss": 1.9169, "step": 119364 }, { "epoch": 12.2654130702836, "grad_norm": 0.047043971717357635, "learning_rate": 0.01, "loss": 1.9675, "step": 119367 }, { "epoch": 12.265721331689273, "grad_norm": 0.03395242989063263, "learning_rate": 0.01, "loss": 1.9283, "step": 119370 }, { "epoch": 12.266029593094945, "grad_norm": 0.03889608010649681, "learning_rate": 0.01, "loss": 1.9359, "step": 119373 }, { "epoch": 12.266337854500616, "grad_norm": 0.041535381227731705, "learning_rate": 0.01, "loss": 1.9186, "step": 119376 }, { "epoch": 12.266646115906289, "grad_norm": 0.07542633265256882, "learning_rate": 0.01, "loss": 1.9588, "step": 119379 }, { "epoch": 12.266954377311961, "grad_norm": 0.06241780146956444, "learning_rate": 0.01, "loss": 1.9402, "step": 119382 }, { "epoch": 12.267262638717632, "grad_norm": 0.12422860413789749, "learning_rate": 0.01, "loss": 1.9166, "step": 119385 }, { "epoch": 12.267570900123305, "grad_norm": 0.09422612190246582, "learning_rate": 0.01, "loss": 1.9412, "step": 119388 }, { "epoch": 12.267879161528976, "grad_norm": 0.06918896734714508, "learning_rate": 0.01, "loss": 1.9369, "step": 119391 }, { "epoch": 12.268187422934648, "grad_norm": 0.08683785796165466, "learning_rate": 0.01, "loss": 1.9492, "step": 119394 }, { "epoch": 12.268495684340321, "grad_norm": 0.13475213944911957, "learning_rate": 0.01, "loss": 1.9271, "step": 119397 }, { "epoch": 12.268803945745992, "grad_norm": 0.09755492955446243, "learning_rate": 0.01, "loss": 1.9317, "step": 119400 }, { "epoch": 12.269112207151665, "grad_norm": 0.11172865331172943, "learning_rate": 0.01, "loss": 1.926, "step": 119403 }, { "epoch": 12.269420468557337, "grad_norm": 0.06556771695613861, "learning_rate": 0.01, "loss": 1.8946, "step": 119406 }, { "epoch": 12.269728729963008, "grad_norm": 0.04608120396733284, "learning_rate": 0.01, "loss": 1.9446, "step": 119409 }, { "epoch": 12.27003699136868, "grad_norm": 0.07218800485134125, "learning_rate": 0.01, "loss": 1.9381, "step": 119412 }, { "epoch": 12.270345252774353, "grad_norm": 0.061452243477106094, "learning_rate": 0.01, "loss": 1.9444, "step": 119415 }, { "epoch": 12.270653514180024, "grad_norm": 0.04344675689935684, "learning_rate": 0.01, "loss": 1.9306, "step": 119418 }, { "epoch": 12.270961775585697, "grad_norm": 0.11082371324300766, "learning_rate": 0.01, "loss": 1.9402, "step": 119421 }, { "epoch": 12.27127003699137, "grad_norm": 0.06729606539011002, "learning_rate": 0.01, "loss": 1.9424, "step": 119424 }, { "epoch": 12.27157829839704, "grad_norm": 0.09937182068824768, "learning_rate": 0.01, "loss": 1.9433, "step": 119427 }, { "epoch": 12.271886559802713, "grad_norm": 0.06512150913476944, "learning_rate": 0.01, "loss": 1.9452, "step": 119430 }, { "epoch": 12.272194821208386, "grad_norm": 0.08961142599582672, "learning_rate": 0.01, "loss": 1.938, "step": 119433 }, { "epoch": 12.272503082614056, "grad_norm": 0.06848619133234024, "learning_rate": 0.01, "loss": 1.9665, "step": 119436 }, { "epoch": 12.272811344019729, "grad_norm": 0.07923544943332672, "learning_rate": 0.01, "loss": 1.94, "step": 119439 }, { "epoch": 12.2731196054254, "grad_norm": 0.09845848381519318, "learning_rate": 0.01, "loss": 1.9567, "step": 119442 }, { "epoch": 12.273427866831073, "grad_norm": 0.0953758955001831, "learning_rate": 0.01, "loss": 1.9584, "step": 119445 }, { "epoch": 12.273736128236745, "grad_norm": 0.0558675080537796, "learning_rate": 0.01, "loss": 1.9261, "step": 119448 }, { "epoch": 12.274044389642416, "grad_norm": 0.07507700473070145, "learning_rate": 0.01, "loss": 1.9496, "step": 119451 }, { "epoch": 12.274352651048089, "grad_norm": 0.09544411301612854, "learning_rate": 0.01, "loss": 1.9332, "step": 119454 }, { "epoch": 12.274660912453761, "grad_norm": 0.03666847571730614, "learning_rate": 0.01, "loss": 1.9385, "step": 119457 }, { "epoch": 12.274969173859432, "grad_norm": 0.10058706998825073, "learning_rate": 0.01, "loss": 1.9093, "step": 119460 }, { "epoch": 12.275277435265105, "grad_norm": 0.09659945219755173, "learning_rate": 0.01, "loss": 1.9279, "step": 119463 }, { "epoch": 12.275585696670777, "grad_norm": 0.08124331384897232, "learning_rate": 0.01, "loss": 1.9281, "step": 119466 }, { "epoch": 12.275893958076448, "grad_norm": 0.03858707845211029, "learning_rate": 0.01, "loss": 1.9605, "step": 119469 }, { "epoch": 12.276202219482121, "grad_norm": 0.03947792947292328, "learning_rate": 0.01, "loss": 1.9328, "step": 119472 }, { "epoch": 12.276510480887794, "grad_norm": 0.11748308688402176, "learning_rate": 0.01, "loss": 1.9486, "step": 119475 }, { "epoch": 12.276818742293464, "grad_norm": 0.05301667004823685, "learning_rate": 0.01, "loss": 1.9518, "step": 119478 }, { "epoch": 12.277127003699137, "grad_norm": 0.041350193321704865, "learning_rate": 0.01, "loss": 1.9462, "step": 119481 }, { "epoch": 12.27743526510481, "grad_norm": 0.18285071849822998, "learning_rate": 0.01, "loss": 1.9174, "step": 119484 }, { "epoch": 12.27774352651048, "grad_norm": 0.06876923143863678, "learning_rate": 0.01, "loss": 1.9317, "step": 119487 }, { "epoch": 12.278051787916153, "grad_norm": 0.06623870134353638, "learning_rate": 0.01, "loss": 1.9171, "step": 119490 }, { "epoch": 12.278360049321824, "grad_norm": 0.0681280568242073, "learning_rate": 0.01, "loss": 1.895, "step": 119493 }, { "epoch": 12.278668310727497, "grad_norm": 0.0857885330915451, "learning_rate": 0.01, "loss": 1.9346, "step": 119496 }, { "epoch": 12.27897657213317, "grad_norm": 0.055479828268289566, "learning_rate": 0.01, "loss": 1.9699, "step": 119499 }, { "epoch": 12.27928483353884, "grad_norm": 0.04913216084241867, "learning_rate": 0.01, "loss": 1.932, "step": 119502 }, { "epoch": 12.279593094944513, "grad_norm": 0.036898694932460785, "learning_rate": 0.01, "loss": 1.9269, "step": 119505 }, { "epoch": 12.279901356350186, "grad_norm": 0.049119070172309875, "learning_rate": 0.01, "loss": 1.9051, "step": 119508 }, { "epoch": 12.280209617755856, "grad_norm": 0.04535537585616112, "learning_rate": 0.01, "loss": 1.9487, "step": 119511 }, { "epoch": 12.280517879161529, "grad_norm": 0.0731346532702446, "learning_rate": 0.01, "loss": 1.941, "step": 119514 }, { "epoch": 12.280826140567202, "grad_norm": 0.08404407650232315, "learning_rate": 0.01, "loss": 1.916, "step": 119517 }, { "epoch": 12.281134401972873, "grad_norm": 0.06973934918642044, "learning_rate": 0.01, "loss": 1.9498, "step": 119520 }, { "epoch": 12.281442663378545, "grad_norm": 0.10510105639696121, "learning_rate": 0.01, "loss": 1.9652, "step": 119523 }, { "epoch": 12.281750924784218, "grad_norm": 0.04614637792110443, "learning_rate": 0.01, "loss": 1.9398, "step": 119526 }, { "epoch": 12.282059186189889, "grad_norm": 0.14483202993869781, "learning_rate": 0.01, "loss": 1.9317, "step": 119529 }, { "epoch": 12.282367447595561, "grad_norm": 0.06402111798524857, "learning_rate": 0.01, "loss": 1.945, "step": 119532 }, { "epoch": 12.282675709001232, "grad_norm": 0.035851992666721344, "learning_rate": 0.01, "loss": 1.9339, "step": 119535 }, { "epoch": 12.282983970406905, "grad_norm": 0.06560065597295761, "learning_rate": 0.01, "loss": 1.9388, "step": 119538 }, { "epoch": 12.283292231812577, "grad_norm": 0.0462896004319191, "learning_rate": 0.01, "loss": 1.9292, "step": 119541 }, { "epoch": 12.283600493218248, "grad_norm": 0.07922311872243881, "learning_rate": 0.01, "loss": 1.92, "step": 119544 }, { "epoch": 12.283908754623921, "grad_norm": 0.04975975677371025, "learning_rate": 0.01, "loss": 1.9127, "step": 119547 }, { "epoch": 12.284217016029594, "grad_norm": 0.05062318593263626, "learning_rate": 0.01, "loss": 1.9506, "step": 119550 }, { "epoch": 12.284525277435264, "grad_norm": 0.1192026287317276, "learning_rate": 0.01, "loss": 1.9491, "step": 119553 }, { "epoch": 12.284833538840937, "grad_norm": 0.10026537626981735, "learning_rate": 0.01, "loss": 1.9527, "step": 119556 }, { "epoch": 12.28514180024661, "grad_norm": 0.048863403499126434, "learning_rate": 0.01, "loss": 1.9358, "step": 119559 }, { "epoch": 12.28545006165228, "grad_norm": 0.034858595579862595, "learning_rate": 0.01, "loss": 1.9293, "step": 119562 }, { "epoch": 12.285758323057953, "grad_norm": 0.11180103570222855, "learning_rate": 0.01, "loss": 1.9218, "step": 119565 }, { "epoch": 12.286066584463626, "grad_norm": 0.08020367473363876, "learning_rate": 0.01, "loss": 1.941, "step": 119568 }, { "epoch": 12.286374845869297, "grad_norm": 0.0711284652352333, "learning_rate": 0.01, "loss": 1.9231, "step": 119571 }, { "epoch": 12.28668310727497, "grad_norm": 0.037581104785203934, "learning_rate": 0.01, "loss": 1.9464, "step": 119574 }, { "epoch": 12.286991368680642, "grad_norm": 0.04725247994065285, "learning_rate": 0.01, "loss": 1.9323, "step": 119577 }, { "epoch": 12.287299630086313, "grad_norm": 0.052582450211048126, "learning_rate": 0.01, "loss": 1.9147, "step": 119580 }, { "epoch": 12.287607891491986, "grad_norm": 0.04754595831036568, "learning_rate": 0.01, "loss": 1.9454, "step": 119583 }, { "epoch": 12.287916152897656, "grad_norm": 0.0919022411108017, "learning_rate": 0.01, "loss": 1.9452, "step": 119586 }, { "epoch": 12.288224414303329, "grad_norm": 0.08994028717279434, "learning_rate": 0.01, "loss": 1.9372, "step": 119589 }, { "epoch": 12.288532675709002, "grad_norm": 0.08245561271905899, "learning_rate": 0.01, "loss": 1.9341, "step": 119592 }, { "epoch": 12.288840937114673, "grad_norm": 0.09249047189950943, "learning_rate": 0.01, "loss": 1.9347, "step": 119595 }, { "epoch": 12.289149198520345, "grad_norm": 0.044289279729127884, "learning_rate": 0.01, "loss": 1.9256, "step": 119598 }, { "epoch": 12.289457459926018, "grad_norm": 0.04035840183496475, "learning_rate": 0.01, "loss": 1.9521, "step": 119601 }, { "epoch": 12.289765721331689, "grad_norm": 0.04231641814112663, "learning_rate": 0.01, "loss": 1.9487, "step": 119604 }, { "epoch": 12.290073982737361, "grad_norm": 0.037263207137584686, "learning_rate": 0.01, "loss": 1.9321, "step": 119607 }, { "epoch": 12.290382244143034, "grad_norm": 0.10813697427511215, "learning_rate": 0.01, "loss": 1.9257, "step": 119610 }, { "epoch": 12.290690505548705, "grad_norm": 0.0699143186211586, "learning_rate": 0.01, "loss": 1.9372, "step": 119613 }, { "epoch": 12.290998766954377, "grad_norm": 0.1337338536977768, "learning_rate": 0.01, "loss": 1.9392, "step": 119616 }, { "epoch": 12.29130702836005, "grad_norm": 0.04969757795333862, "learning_rate": 0.01, "loss": 1.9203, "step": 119619 }, { "epoch": 12.291615289765721, "grad_norm": 0.08740051090717316, "learning_rate": 0.01, "loss": 1.9645, "step": 119622 }, { "epoch": 12.291923551171394, "grad_norm": 0.08194126188755035, "learning_rate": 0.01, "loss": 1.9174, "step": 119625 }, { "epoch": 12.292231812577066, "grad_norm": 0.06093340367078781, "learning_rate": 0.01, "loss": 1.9321, "step": 119628 }, { "epoch": 12.292540073982737, "grad_norm": 0.10872428119182587, "learning_rate": 0.01, "loss": 1.9206, "step": 119631 }, { "epoch": 12.29284833538841, "grad_norm": 0.12310198694467545, "learning_rate": 0.01, "loss": 1.948, "step": 119634 }, { "epoch": 12.29315659679408, "grad_norm": 0.07006506621837616, "learning_rate": 0.01, "loss": 1.9331, "step": 119637 }, { "epoch": 12.293464858199753, "grad_norm": 0.04315321519970894, "learning_rate": 0.01, "loss": 1.9197, "step": 119640 }, { "epoch": 12.293773119605426, "grad_norm": 0.03495105728507042, "learning_rate": 0.01, "loss": 1.9511, "step": 119643 }, { "epoch": 12.294081381011097, "grad_norm": 0.049105752259492874, "learning_rate": 0.01, "loss": 1.9502, "step": 119646 }, { "epoch": 12.29438964241677, "grad_norm": 0.13561686873435974, "learning_rate": 0.01, "loss": 1.9359, "step": 119649 }, { "epoch": 12.294697903822442, "grad_norm": 0.0391533300280571, "learning_rate": 0.01, "loss": 1.9397, "step": 119652 }, { "epoch": 12.295006165228113, "grad_norm": 0.09752475470304489, "learning_rate": 0.01, "loss": 1.9278, "step": 119655 }, { "epoch": 12.295314426633785, "grad_norm": 0.08919859677553177, "learning_rate": 0.01, "loss": 1.934, "step": 119658 }, { "epoch": 12.295622688039458, "grad_norm": 0.057920221239328384, "learning_rate": 0.01, "loss": 1.9234, "step": 119661 }, { "epoch": 12.295930949445129, "grad_norm": 0.06989624351263046, "learning_rate": 0.01, "loss": 1.9636, "step": 119664 }, { "epoch": 12.296239210850802, "grad_norm": 0.04143437743186951, "learning_rate": 0.01, "loss": 1.9588, "step": 119667 }, { "epoch": 12.296547472256474, "grad_norm": 0.03357059881091118, "learning_rate": 0.01, "loss": 1.9395, "step": 119670 }, { "epoch": 12.296855733662145, "grad_norm": 0.08189679682254791, "learning_rate": 0.01, "loss": 1.9375, "step": 119673 }, { "epoch": 12.297163995067818, "grad_norm": 0.09445969760417938, "learning_rate": 0.01, "loss": 1.9388, "step": 119676 }, { "epoch": 12.29747225647349, "grad_norm": 0.07666604220867157, "learning_rate": 0.01, "loss": 1.937, "step": 119679 }, { "epoch": 12.297780517879161, "grad_norm": 0.060300711542367935, "learning_rate": 0.01, "loss": 1.941, "step": 119682 }, { "epoch": 12.298088779284834, "grad_norm": 0.04202558472752571, "learning_rate": 0.01, "loss": 1.9447, "step": 119685 }, { "epoch": 12.298397040690505, "grad_norm": 0.05015214905142784, "learning_rate": 0.01, "loss": 1.937, "step": 119688 }, { "epoch": 12.298705302096177, "grad_norm": 0.11163275688886642, "learning_rate": 0.01, "loss": 1.9223, "step": 119691 }, { "epoch": 12.29901356350185, "grad_norm": 0.09391912817955017, "learning_rate": 0.01, "loss": 1.9262, "step": 119694 }, { "epoch": 12.299321824907521, "grad_norm": 0.035903919488191605, "learning_rate": 0.01, "loss": 1.9289, "step": 119697 }, { "epoch": 12.299630086313194, "grad_norm": 0.15324540436267853, "learning_rate": 0.01, "loss": 1.9547, "step": 119700 }, { "epoch": 12.299938347718866, "grad_norm": 0.13761693239212036, "learning_rate": 0.01, "loss": 1.9535, "step": 119703 }, { "epoch": 12.300246609124537, "grad_norm": 0.10357910394668579, "learning_rate": 0.01, "loss": 1.9549, "step": 119706 }, { "epoch": 12.30055487053021, "grad_norm": 0.07113146036863327, "learning_rate": 0.01, "loss": 1.9297, "step": 119709 }, { "epoch": 12.300863131935882, "grad_norm": 0.05279774218797684, "learning_rate": 0.01, "loss": 1.9258, "step": 119712 }, { "epoch": 12.301171393341553, "grad_norm": 0.03502047434449196, "learning_rate": 0.01, "loss": 1.9495, "step": 119715 }, { "epoch": 12.301479654747226, "grad_norm": 0.047808703035116196, "learning_rate": 0.01, "loss": 1.9435, "step": 119718 }, { "epoch": 12.301787916152898, "grad_norm": 0.05250471085309982, "learning_rate": 0.01, "loss": 1.9081, "step": 119721 }, { "epoch": 12.30209617755857, "grad_norm": 0.041899871081113815, "learning_rate": 0.01, "loss": 1.9311, "step": 119724 }, { "epoch": 12.302404438964242, "grad_norm": 0.1110948920249939, "learning_rate": 0.01, "loss": 1.9379, "step": 119727 }, { "epoch": 12.302712700369913, "grad_norm": 0.07395997643470764, "learning_rate": 0.01, "loss": 1.9486, "step": 119730 }, { "epoch": 12.303020961775585, "grad_norm": 0.09498132765293121, "learning_rate": 0.01, "loss": 1.9477, "step": 119733 }, { "epoch": 12.303329223181258, "grad_norm": 0.09375674277544022, "learning_rate": 0.01, "loss": 1.9395, "step": 119736 }, { "epoch": 12.303637484586929, "grad_norm": 0.047957029193639755, "learning_rate": 0.01, "loss": 1.9448, "step": 119739 }, { "epoch": 12.303945745992602, "grad_norm": 0.038424279540777206, "learning_rate": 0.01, "loss": 1.9329, "step": 119742 }, { "epoch": 12.304254007398274, "grad_norm": 0.08323843777179718, "learning_rate": 0.01, "loss": 1.9215, "step": 119745 }, { "epoch": 12.304562268803945, "grad_norm": 0.09454569965600967, "learning_rate": 0.01, "loss": 1.9348, "step": 119748 }, { "epoch": 12.304870530209618, "grad_norm": 0.12319619208574295, "learning_rate": 0.01, "loss": 1.9641, "step": 119751 }, { "epoch": 12.30517879161529, "grad_norm": 0.05019475147128105, "learning_rate": 0.01, "loss": 1.9328, "step": 119754 }, { "epoch": 12.305487053020961, "grad_norm": 0.08038980513811111, "learning_rate": 0.01, "loss": 1.9539, "step": 119757 }, { "epoch": 12.305795314426634, "grad_norm": 0.09341848641633987, "learning_rate": 0.01, "loss": 1.9233, "step": 119760 }, { "epoch": 12.306103575832307, "grad_norm": 0.08108554780483246, "learning_rate": 0.01, "loss": 1.9686, "step": 119763 }, { "epoch": 12.306411837237977, "grad_norm": 0.10190309584140778, "learning_rate": 0.01, "loss": 1.9488, "step": 119766 }, { "epoch": 12.30672009864365, "grad_norm": 0.07136478275060654, "learning_rate": 0.01, "loss": 1.9472, "step": 119769 }, { "epoch": 12.307028360049323, "grad_norm": 0.05075160413980484, "learning_rate": 0.01, "loss": 1.9247, "step": 119772 }, { "epoch": 12.307336621454994, "grad_norm": 0.04689573496580124, "learning_rate": 0.01, "loss": 1.928, "step": 119775 }, { "epoch": 12.307644882860666, "grad_norm": 0.04788019880652428, "learning_rate": 0.01, "loss": 1.9216, "step": 119778 }, { "epoch": 12.307953144266337, "grad_norm": 0.04214687645435333, "learning_rate": 0.01, "loss": 1.9241, "step": 119781 }, { "epoch": 12.30826140567201, "grad_norm": 0.09224370867013931, "learning_rate": 0.01, "loss": 1.9419, "step": 119784 }, { "epoch": 12.308569667077682, "grad_norm": 0.07086529582738876, "learning_rate": 0.01, "loss": 1.9129, "step": 119787 }, { "epoch": 12.308877928483353, "grad_norm": 0.05687602609395981, "learning_rate": 0.01, "loss": 1.9335, "step": 119790 }, { "epoch": 12.309186189889026, "grad_norm": 0.061124287545681, "learning_rate": 0.01, "loss": 1.9325, "step": 119793 }, { "epoch": 12.309494451294698, "grad_norm": 0.0497773177921772, "learning_rate": 0.01, "loss": 1.9292, "step": 119796 }, { "epoch": 12.30980271270037, "grad_norm": 0.03449302166700363, "learning_rate": 0.01, "loss": 1.9528, "step": 119799 }, { "epoch": 12.310110974106042, "grad_norm": 0.043521273881196976, "learning_rate": 0.01, "loss": 1.9265, "step": 119802 }, { "epoch": 12.310419235511715, "grad_norm": 0.04101839289069176, "learning_rate": 0.01, "loss": 1.9169, "step": 119805 }, { "epoch": 12.310727496917385, "grad_norm": 0.13504955172538757, "learning_rate": 0.01, "loss": 1.9545, "step": 119808 }, { "epoch": 12.311035758323058, "grad_norm": 0.07502903789281845, "learning_rate": 0.01, "loss": 1.9335, "step": 119811 }, { "epoch": 12.31134401972873, "grad_norm": 0.08481600880622864, "learning_rate": 0.01, "loss": 1.9297, "step": 119814 }, { "epoch": 12.311652281134402, "grad_norm": 0.07710324972867966, "learning_rate": 0.01, "loss": 1.964, "step": 119817 }, { "epoch": 12.311960542540074, "grad_norm": 0.11857612431049347, "learning_rate": 0.01, "loss": 1.9527, "step": 119820 }, { "epoch": 12.312268803945745, "grad_norm": 0.03913726657629013, "learning_rate": 0.01, "loss": 1.9179, "step": 119823 }, { "epoch": 12.312577065351418, "grad_norm": 0.06333804130554199, "learning_rate": 0.01, "loss": 1.9678, "step": 119826 }, { "epoch": 12.31288532675709, "grad_norm": 0.053462814539670944, "learning_rate": 0.01, "loss": 1.93, "step": 119829 }, { "epoch": 12.313193588162761, "grad_norm": 0.036863964051008224, "learning_rate": 0.01, "loss": 1.9224, "step": 119832 }, { "epoch": 12.313501849568434, "grad_norm": 0.05140704661607742, "learning_rate": 0.01, "loss": 1.952, "step": 119835 }, { "epoch": 12.313810110974106, "grad_norm": 0.12734730541706085, "learning_rate": 0.01, "loss": 1.939, "step": 119838 }, { "epoch": 12.314118372379777, "grad_norm": 0.04494262859225273, "learning_rate": 0.01, "loss": 1.9331, "step": 119841 }, { "epoch": 12.31442663378545, "grad_norm": 0.10952113568782806, "learning_rate": 0.01, "loss": 1.9246, "step": 119844 }, { "epoch": 12.314734895191123, "grad_norm": 0.12877106666564941, "learning_rate": 0.01, "loss": 1.9328, "step": 119847 }, { "epoch": 12.315043156596793, "grad_norm": 0.05764854699373245, "learning_rate": 0.01, "loss": 1.9718, "step": 119850 }, { "epoch": 12.315351418002466, "grad_norm": 0.07877784222364426, "learning_rate": 0.01, "loss": 1.9389, "step": 119853 }, { "epoch": 12.315659679408139, "grad_norm": 0.04288757964968681, "learning_rate": 0.01, "loss": 1.9434, "step": 119856 }, { "epoch": 12.31596794081381, "grad_norm": 0.05566611886024475, "learning_rate": 0.01, "loss": 1.9559, "step": 119859 }, { "epoch": 12.316276202219482, "grad_norm": 0.03457842767238617, "learning_rate": 0.01, "loss": 1.9168, "step": 119862 }, { "epoch": 12.316584463625155, "grad_norm": 0.07046034932136536, "learning_rate": 0.01, "loss": 1.9265, "step": 119865 }, { "epoch": 12.316892725030826, "grad_norm": 0.1305147260427475, "learning_rate": 0.01, "loss": 1.9519, "step": 119868 }, { "epoch": 12.317200986436498, "grad_norm": 0.14929534494876862, "learning_rate": 0.01, "loss": 1.9236, "step": 119871 }, { "epoch": 12.317509247842171, "grad_norm": 0.16848324239253998, "learning_rate": 0.01, "loss": 1.9147, "step": 119874 }, { "epoch": 12.317817509247842, "grad_norm": 0.13916730880737305, "learning_rate": 0.01, "loss": 1.9482, "step": 119877 }, { "epoch": 12.318125770653515, "grad_norm": 0.04896625503897667, "learning_rate": 0.01, "loss": 1.9374, "step": 119880 }, { "epoch": 12.318434032059185, "grad_norm": 0.0732089951634407, "learning_rate": 0.01, "loss": 1.9613, "step": 119883 }, { "epoch": 12.318742293464858, "grad_norm": 0.04600311815738678, "learning_rate": 0.01, "loss": 1.9279, "step": 119886 }, { "epoch": 12.31905055487053, "grad_norm": 0.057165056467056274, "learning_rate": 0.01, "loss": 1.9364, "step": 119889 }, { "epoch": 12.319358816276202, "grad_norm": 0.04162990674376488, "learning_rate": 0.01, "loss": 1.9041, "step": 119892 }, { "epoch": 12.319667077681874, "grad_norm": 0.0429425910115242, "learning_rate": 0.01, "loss": 1.9531, "step": 119895 }, { "epoch": 12.319975339087547, "grad_norm": 0.06248023360967636, "learning_rate": 0.01, "loss": 1.9492, "step": 119898 }, { "epoch": 12.320283600493218, "grad_norm": 0.09580297768115997, "learning_rate": 0.01, "loss": 1.9417, "step": 119901 }, { "epoch": 12.32059186189889, "grad_norm": 0.08072346448898315, "learning_rate": 0.01, "loss": 1.9389, "step": 119904 }, { "epoch": 12.320900123304563, "grad_norm": 0.09270929545164108, "learning_rate": 0.01, "loss": 1.936, "step": 119907 }, { "epoch": 12.321208384710234, "grad_norm": 0.06813566386699677, "learning_rate": 0.01, "loss": 1.9049, "step": 119910 }, { "epoch": 12.321516646115906, "grad_norm": 0.07171430438756943, "learning_rate": 0.01, "loss": 1.9399, "step": 119913 }, { "epoch": 12.321824907521579, "grad_norm": 0.056445494294166565, "learning_rate": 0.01, "loss": 1.9478, "step": 119916 }, { "epoch": 12.32213316892725, "grad_norm": 0.11223911494016647, "learning_rate": 0.01, "loss": 1.9599, "step": 119919 }, { "epoch": 12.322441430332923, "grad_norm": 0.09491009265184402, "learning_rate": 0.01, "loss": 1.9532, "step": 119922 }, { "epoch": 12.322749691738593, "grad_norm": 0.06327483057975769, "learning_rate": 0.01, "loss": 1.9296, "step": 119925 }, { "epoch": 12.323057953144266, "grad_norm": 0.04618639498949051, "learning_rate": 0.01, "loss": 1.9612, "step": 119928 }, { "epoch": 12.323366214549939, "grad_norm": 0.03715759515762329, "learning_rate": 0.01, "loss": 1.9295, "step": 119931 }, { "epoch": 12.32367447595561, "grad_norm": 0.03354526311159134, "learning_rate": 0.01, "loss": 1.9154, "step": 119934 }, { "epoch": 12.323982737361282, "grad_norm": 0.06126822903752327, "learning_rate": 0.01, "loss": 1.937, "step": 119937 }, { "epoch": 12.324290998766955, "grad_norm": 0.10685601830482483, "learning_rate": 0.01, "loss": 1.9328, "step": 119940 }, { "epoch": 12.324599260172626, "grad_norm": 0.07914771139621735, "learning_rate": 0.01, "loss": 1.9283, "step": 119943 }, { "epoch": 12.324907521578298, "grad_norm": 0.0697379782795906, "learning_rate": 0.01, "loss": 1.9368, "step": 119946 }, { "epoch": 12.325215782983971, "grad_norm": 0.1079431101679802, "learning_rate": 0.01, "loss": 1.9288, "step": 119949 }, { "epoch": 12.325524044389642, "grad_norm": 0.03759371489286423, "learning_rate": 0.01, "loss": 1.9565, "step": 119952 }, { "epoch": 12.325832305795315, "grad_norm": 0.06159079447388649, "learning_rate": 0.01, "loss": 1.923, "step": 119955 }, { "epoch": 12.326140567200987, "grad_norm": 0.07581518590450287, "learning_rate": 0.01, "loss": 1.925, "step": 119958 }, { "epoch": 12.326448828606658, "grad_norm": 0.06095201149582863, "learning_rate": 0.01, "loss": 1.9244, "step": 119961 }, { "epoch": 12.32675709001233, "grad_norm": 0.05279630050063133, "learning_rate": 0.01, "loss": 1.9105, "step": 119964 }, { "epoch": 12.327065351418003, "grad_norm": 0.035489410161972046, "learning_rate": 0.01, "loss": 1.9237, "step": 119967 }, { "epoch": 12.327373612823674, "grad_norm": 0.037815045565366745, "learning_rate": 0.01, "loss": 1.9164, "step": 119970 }, { "epoch": 12.327681874229347, "grad_norm": 0.05071322247385979, "learning_rate": 0.01, "loss": 1.9601, "step": 119973 }, { "epoch": 12.327990135635018, "grad_norm": 0.04019641876220703, "learning_rate": 0.01, "loss": 1.9238, "step": 119976 }, { "epoch": 12.32829839704069, "grad_norm": 0.09706182032823563, "learning_rate": 0.01, "loss": 1.9547, "step": 119979 }, { "epoch": 12.328606658446363, "grad_norm": 0.08802255243062973, "learning_rate": 0.01, "loss": 1.9127, "step": 119982 }, { "epoch": 12.328914919852034, "grad_norm": 0.061536651104688644, "learning_rate": 0.01, "loss": 1.9494, "step": 119985 }, { "epoch": 12.329223181257706, "grad_norm": 0.07003441452980042, "learning_rate": 0.01, "loss": 1.9284, "step": 119988 }, { "epoch": 12.329531442663379, "grad_norm": 0.046714432537555695, "learning_rate": 0.01, "loss": 1.9575, "step": 119991 }, { "epoch": 12.32983970406905, "grad_norm": 0.05683311074972153, "learning_rate": 0.01, "loss": 1.9362, "step": 119994 }, { "epoch": 12.330147965474723, "grad_norm": 0.05732543021440506, "learning_rate": 0.01, "loss": 1.9495, "step": 119997 }, { "epoch": 12.330456226880395, "grad_norm": 0.04505174979567528, "learning_rate": 0.01, "loss": 1.931, "step": 120000 }, { "epoch": 12.330764488286066, "grad_norm": 0.08947070688009262, "learning_rate": 0.01, "loss": 1.9149, "step": 120003 }, { "epoch": 12.331072749691739, "grad_norm": 0.12785474956035614, "learning_rate": 0.01, "loss": 1.9541, "step": 120006 }, { "epoch": 12.331381011097411, "grad_norm": 0.09339133650064468, "learning_rate": 0.01, "loss": 1.9292, "step": 120009 }, { "epoch": 12.331689272503082, "grad_norm": 0.051951028406620026, "learning_rate": 0.01, "loss": 1.9614, "step": 120012 }, { "epoch": 12.331997533908755, "grad_norm": 0.04255111515522003, "learning_rate": 0.01, "loss": 1.9212, "step": 120015 }, { "epoch": 12.332305795314426, "grad_norm": 0.055113568902015686, "learning_rate": 0.01, "loss": 1.936, "step": 120018 }, { "epoch": 12.332614056720098, "grad_norm": 0.04617108777165413, "learning_rate": 0.01, "loss": 1.9227, "step": 120021 }, { "epoch": 12.332922318125771, "grad_norm": 0.06585944443941116, "learning_rate": 0.01, "loss": 1.9487, "step": 120024 }, { "epoch": 12.333230579531442, "grad_norm": 0.042562294751405716, "learning_rate": 0.01, "loss": 1.9256, "step": 120027 }, { "epoch": 12.333538840937115, "grad_norm": 0.04126080498099327, "learning_rate": 0.01, "loss": 1.9346, "step": 120030 }, { "epoch": 12.333847102342787, "grad_norm": 0.03414085879921913, "learning_rate": 0.01, "loss": 1.9305, "step": 120033 }, { "epoch": 12.334155363748458, "grad_norm": 0.05674465373158455, "learning_rate": 0.01, "loss": 1.928, "step": 120036 }, { "epoch": 12.33446362515413, "grad_norm": 0.13386379182338715, "learning_rate": 0.01, "loss": 1.9528, "step": 120039 }, { "epoch": 12.334771886559803, "grad_norm": 0.05028100311756134, "learning_rate": 0.01, "loss": 1.9293, "step": 120042 }, { "epoch": 12.335080147965474, "grad_norm": 0.07372204959392548, "learning_rate": 0.01, "loss": 1.9312, "step": 120045 }, { "epoch": 12.335388409371147, "grad_norm": 0.03808963671326637, "learning_rate": 0.01, "loss": 1.9376, "step": 120048 }, { "epoch": 12.33569667077682, "grad_norm": 0.06065637245774269, "learning_rate": 0.01, "loss": 1.9448, "step": 120051 }, { "epoch": 12.33600493218249, "grad_norm": 0.04890544340014458, "learning_rate": 0.01, "loss": 1.91, "step": 120054 }, { "epoch": 12.336313193588163, "grad_norm": 0.13426274061203003, "learning_rate": 0.01, "loss": 1.9461, "step": 120057 }, { "epoch": 12.336621454993836, "grad_norm": 0.0989098846912384, "learning_rate": 0.01, "loss": 1.932, "step": 120060 }, { "epoch": 12.336929716399506, "grad_norm": 0.07141082733869553, "learning_rate": 0.01, "loss": 1.9171, "step": 120063 }, { "epoch": 12.337237977805179, "grad_norm": 0.06102481856942177, "learning_rate": 0.01, "loss": 1.9281, "step": 120066 }, { "epoch": 12.337546239210852, "grad_norm": 0.060864515602588654, "learning_rate": 0.01, "loss": 1.9412, "step": 120069 }, { "epoch": 12.337854500616523, "grad_norm": 0.09040584415197372, "learning_rate": 0.01, "loss": 1.9268, "step": 120072 }, { "epoch": 12.338162762022195, "grad_norm": 0.04068896919488907, "learning_rate": 0.01, "loss": 1.9338, "step": 120075 }, { "epoch": 12.338471023427866, "grad_norm": 0.10498625785112381, "learning_rate": 0.01, "loss": 1.9479, "step": 120078 }, { "epoch": 12.338779284833539, "grad_norm": 0.061306800693273544, "learning_rate": 0.01, "loss": 1.9293, "step": 120081 }, { "epoch": 12.339087546239211, "grad_norm": 0.10269539058208466, "learning_rate": 0.01, "loss": 1.9374, "step": 120084 }, { "epoch": 12.339395807644882, "grad_norm": 0.07771135121583939, "learning_rate": 0.01, "loss": 1.9396, "step": 120087 }, { "epoch": 12.339704069050555, "grad_norm": 0.08635908365249634, "learning_rate": 0.01, "loss": 1.9067, "step": 120090 }, { "epoch": 12.340012330456227, "grad_norm": 0.05086525157094002, "learning_rate": 0.01, "loss": 1.9145, "step": 120093 }, { "epoch": 12.340320591861898, "grad_norm": 0.06784183531999588, "learning_rate": 0.01, "loss": 1.9537, "step": 120096 }, { "epoch": 12.340628853267571, "grad_norm": 0.09301754087209702, "learning_rate": 0.01, "loss": 1.9493, "step": 120099 }, { "epoch": 12.340937114673244, "grad_norm": 0.05019529536366463, "learning_rate": 0.01, "loss": 1.9124, "step": 120102 }, { "epoch": 12.341245376078914, "grad_norm": 0.07970885932445526, "learning_rate": 0.01, "loss": 1.9361, "step": 120105 }, { "epoch": 12.341553637484587, "grad_norm": 0.07051090151071548, "learning_rate": 0.01, "loss": 1.9247, "step": 120108 }, { "epoch": 12.34186189889026, "grad_norm": 0.0799780935049057, "learning_rate": 0.01, "loss": 1.9111, "step": 120111 }, { "epoch": 12.34217016029593, "grad_norm": 0.053853873163461685, "learning_rate": 0.01, "loss": 1.9618, "step": 120114 }, { "epoch": 12.342478421701603, "grad_norm": 0.03682436794042587, "learning_rate": 0.01, "loss": 1.9454, "step": 120117 }, { "epoch": 12.342786683107274, "grad_norm": 0.08985862135887146, "learning_rate": 0.01, "loss": 1.9308, "step": 120120 }, { "epoch": 12.343094944512947, "grad_norm": 0.09578026086091995, "learning_rate": 0.01, "loss": 1.94, "step": 120123 }, { "epoch": 12.34340320591862, "grad_norm": 0.18115627765655518, "learning_rate": 0.01, "loss": 1.9398, "step": 120126 }, { "epoch": 12.34371146732429, "grad_norm": 0.09127353131771088, "learning_rate": 0.01, "loss": 1.9513, "step": 120129 }, { "epoch": 12.344019728729963, "grad_norm": 0.042836468666791916, "learning_rate": 0.01, "loss": 1.9315, "step": 120132 }, { "epoch": 12.344327990135636, "grad_norm": 0.04077746719121933, "learning_rate": 0.01, "loss": 1.9322, "step": 120135 }, { "epoch": 12.344636251541306, "grad_norm": 0.04519999399781227, "learning_rate": 0.01, "loss": 1.9143, "step": 120138 }, { "epoch": 12.344944512946979, "grad_norm": 0.04900500923395157, "learning_rate": 0.01, "loss": 1.9464, "step": 120141 }, { "epoch": 12.345252774352652, "grad_norm": 0.05115954577922821, "learning_rate": 0.01, "loss": 1.949, "step": 120144 }, { "epoch": 12.345561035758323, "grad_norm": 0.040632762014865875, "learning_rate": 0.01, "loss": 1.9081, "step": 120147 }, { "epoch": 12.345869297163995, "grad_norm": 0.13975204527378082, "learning_rate": 0.01, "loss": 1.9344, "step": 120150 }, { "epoch": 12.346177558569668, "grad_norm": 0.04202760010957718, "learning_rate": 0.01, "loss": 1.9311, "step": 120153 }, { "epoch": 12.346485819975339, "grad_norm": 0.08207782357931137, "learning_rate": 0.01, "loss": 1.9438, "step": 120156 }, { "epoch": 12.346794081381011, "grad_norm": 0.052009355276823044, "learning_rate": 0.01, "loss": 1.9335, "step": 120159 }, { "epoch": 12.347102342786684, "grad_norm": 0.059325072914361954, "learning_rate": 0.01, "loss": 1.9412, "step": 120162 }, { "epoch": 12.347410604192355, "grad_norm": 0.05349455028772354, "learning_rate": 0.01, "loss": 1.9321, "step": 120165 }, { "epoch": 12.347718865598027, "grad_norm": 0.06158902868628502, "learning_rate": 0.01, "loss": 1.939, "step": 120168 }, { "epoch": 12.348027127003698, "grad_norm": 0.09922099858522415, "learning_rate": 0.01, "loss": 1.9381, "step": 120171 }, { "epoch": 12.348335388409371, "grad_norm": 0.06710609048604965, "learning_rate": 0.01, "loss": 1.9208, "step": 120174 }, { "epoch": 12.348643649815044, "grad_norm": 0.11664668470621109, "learning_rate": 0.01, "loss": 1.9297, "step": 120177 }, { "epoch": 12.348951911220714, "grad_norm": 0.047379445284605026, "learning_rate": 0.01, "loss": 1.9718, "step": 120180 }, { "epoch": 12.349260172626387, "grad_norm": 0.09358347952365875, "learning_rate": 0.01, "loss": 1.9258, "step": 120183 }, { "epoch": 12.34956843403206, "grad_norm": 0.11921723186969757, "learning_rate": 0.01, "loss": 1.9322, "step": 120186 }, { "epoch": 12.34987669543773, "grad_norm": 0.059619396924972534, "learning_rate": 0.01, "loss": 1.9036, "step": 120189 }, { "epoch": 12.350184956843403, "grad_norm": 0.03857177123427391, "learning_rate": 0.01, "loss": 1.929, "step": 120192 }, { "epoch": 12.350493218249076, "grad_norm": 0.06697624921798706, "learning_rate": 0.01, "loss": 1.9445, "step": 120195 }, { "epoch": 12.350801479654747, "grad_norm": 0.08252769708633423, "learning_rate": 0.01, "loss": 1.92, "step": 120198 }, { "epoch": 12.35110974106042, "grad_norm": 0.13171248137950897, "learning_rate": 0.01, "loss": 1.9245, "step": 120201 }, { "epoch": 12.351418002466092, "grad_norm": 0.05222803354263306, "learning_rate": 0.01, "loss": 1.9423, "step": 120204 }, { "epoch": 12.351726263871763, "grad_norm": 0.05304306373000145, "learning_rate": 0.01, "loss": 1.9214, "step": 120207 }, { "epoch": 12.352034525277436, "grad_norm": 0.04716663807630539, "learning_rate": 0.01, "loss": 1.9134, "step": 120210 }, { "epoch": 12.352342786683106, "grad_norm": 0.0832684114575386, "learning_rate": 0.01, "loss": 1.9444, "step": 120213 }, { "epoch": 12.352651048088779, "grad_norm": 0.049712538719177246, "learning_rate": 0.01, "loss": 1.9176, "step": 120216 }, { "epoch": 12.352959309494452, "grad_norm": 0.0388319157063961, "learning_rate": 0.01, "loss": 1.9367, "step": 120219 }, { "epoch": 12.353267570900123, "grad_norm": 0.0908878967165947, "learning_rate": 0.01, "loss": 1.9373, "step": 120222 }, { "epoch": 12.353575832305795, "grad_norm": 0.07359366863965988, "learning_rate": 0.01, "loss": 1.9326, "step": 120225 }, { "epoch": 12.353884093711468, "grad_norm": 0.08322012424468994, "learning_rate": 0.01, "loss": 1.9186, "step": 120228 }, { "epoch": 12.354192355117139, "grad_norm": 0.08135717362165451, "learning_rate": 0.01, "loss": 1.9279, "step": 120231 }, { "epoch": 12.354500616522811, "grad_norm": 0.05825061723589897, "learning_rate": 0.01, "loss": 1.9267, "step": 120234 }, { "epoch": 12.354808877928484, "grad_norm": 0.1178986206650734, "learning_rate": 0.01, "loss": 1.9642, "step": 120237 }, { "epoch": 12.355117139334155, "grad_norm": 0.04324718937277794, "learning_rate": 0.01, "loss": 1.9452, "step": 120240 }, { "epoch": 12.355425400739827, "grad_norm": 0.09722001105546951, "learning_rate": 0.01, "loss": 1.9536, "step": 120243 }, { "epoch": 12.3557336621455, "grad_norm": 0.08974692225456238, "learning_rate": 0.01, "loss": 1.9378, "step": 120246 }, { "epoch": 12.356041923551171, "grad_norm": 0.05838640034198761, "learning_rate": 0.01, "loss": 1.9443, "step": 120249 }, { "epoch": 12.356350184956844, "grad_norm": 0.054912663996219635, "learning_rate": 0.01, "loss": 1.9552, "step": 120252 }, { "epoch": 12.356658446362516, "grad_norm": 0.042390115559101105, "learning_rate": 0.01, "loss": 1.9272, "step": 120255 }, { "epoch": 12.356966707768187, "grad_norm": 0.09722331166267395, "learning_rate": 0.01, "loss": 1.9365, "step": 120258 }, { "epoch": 12.35727496917386, "grad_norm": 0.147323876619339, "learning_rate": 0.01, "loss": 1.9209, "step": 120261 }, { "epoch": 12.357583230579532, "grad_norm": 0.07642398029565811, "learning_rate": 0.01, "loss": 1.9195, "step": 120264 }, { "epoch": 12.357891491985203, "grad_norm": 0.0503547303378582, "learning_rate": 0.01, "loss": 1.9132, "step": 120267 }, { "epoch": 12.358199753390876, "grad_norm": 0.04040350764989853, "learning_rate": 0.01, "loss": 1.9336, "step": 120270 }, { "epoch": 12.358508014796547, "grad_norm": 0.13021105527877808, "learning_rate": 0.01, "loss": 1.9363, "step": 120273 }, { "epoch": 12.35881627620222, "grad_norm": 0.053368840366601944, "learning_rate": 0.01, "loss": 1.9356, "step": 120276 }, { "epoch": 12.359124537607892, "grad_norm": 0.05143368989229202, "learning_rate": 0.01, "loss": 1.9153, "step": 120279 }, { "epoch": 12.359432799013563, "grad_norm": 0.03716684505343437, "learning_rate": 0.01, "loss": 1.9472, "step": 120282 }, { "epoch": 12.359741060419235, "grad_norm": 0.039309579879045486, "learning_rate": 0.01, "loss": 1.9243, "step": 120285 }, { "epoch": 12.360049321824908, "grad_norm": 0.05379367247223854, "learning_rate": 0.01, "loss": 1.9308, "step": 120288 }, { "epoch": 12.360357583230579, "grad_norm": 0.040921930223703384, "learning_rate": 0.01, "loss": 1.9557, "step": 120291 }, { "epoch": 12.360665844636252, "grad_norm": 0.09452662616968155, "learning_rate": 0.01, "loss": 1.9271, "step": 120294 }, { "epoch": 12.360974106041924, "grad_norm": 0.0810219794511795, "learning_rate": 0.01, "loss": 1.9463, "step": 120297 }, { "epoch": 12.361282367447595, "grad_norm": 0.10484972596168518, "learning_rate": 0.01, "loss": 1.9287, "step": 120300 }, { "epoch": 12.361590628853268, "grad_norm": 0.08036096394062042, "learning_rate": 0.01, "loss": 1.9483, "step": 120303 }, { "epoch": 12.36189889025894, "grad_norm": 0.0392296239733696, "learning_rate": 0.01, "loss": 1.9466, "step": 120306 }, { "epoch": 12.362207151664611, "grad_norm": 0.040033504366874695, "learning_rate": 0.01, "loss": 1.9198, "step": 120309 }, { "epoch": 12.362515413070284, "grad_norm": 0.045406442135572433, "learning_rate": 0.01, "loss": 1.9545, "step": 120312 }, { "epoch": 12.362823674475955, "grad_norm": 0.09677822142839432, "learning_rate": 0.01, "loss": 1.9221, "step": 120315 }, { "epoch": 12.363131935881627, "grad_norm": 0.12022221088409424, "learning_rate": 0.01, "loss": 1.9325, "step": 120318 }, { "epoch": 12.3634401972873, "grad_norm": 0.10691323131322861, "learning_rate": 0.01, "loss": 1.9291, "step": 120321 }, { "epoch": 12.363748458692971, "grad_norm": 0.10206713527441025, "learning_rate": 0.01, "loss": 1.9319, "step": 120324 }, { "epoch": 12.364056720098644, "grad_norm": 0.12835240364074707, "learning_rate": 0.01, "loss": 1.9327, "step": 120327 }, { "epoch": 12.364364981504316, "grad_norm": 0.05794866755604744, "learning_rate": 0.01, "loss": 1.9323, "step": 120330 }, { "epoch": 12.364673242909987, "grad_norm": 0.0687018483877182, "learning_rate": 0.01, "loss": 1.9256, "step": 120333 }, { "epoch": 12.36498150431566, "grad_norm": 0.04902690649032593, "learning_rate": 0.01, "loss": 1.9574, "step": 120336 }, { "epoch": 12.365289765721332, "grad_norm": 0.03381572291254997, "learning_rate": 0.01, "loss": 1.9291, "step": 120339 }, { "epoch": 12.365598027127003, "grad_norm": 0.04516056180000305, "learning_rate": 0.01, "loss": 1.9245, "step": 120342 }, { "epoch": 12.365906288532676, "grad_norm": 0.05366626754403114, "learning_rate": 0.01, "loss": 1.9172, "step": 120345 }, { "epoch": 12.366214549938348, "grad_norm": 0.035903528332710266, "learning_rate": 0.01, "loss": 1.9229, "step": 120348 }, { "epoch": 12.36652281134402, "grad_norm": 0.06833939999341965, "learning_rate": 0.01, "loss": 1.9166, "step": 120351 }, { "epoch": 12.366831072749692, "grad_norm": 0.05716986209154129, "learning_rate": 0.01, "loss": 1.9133, "step": 120354 }, { "epoch": 12.367139334155365, "grad_norm": 0.15210434794425964, "learning_rate": 0.01, "loss": 1.9321, "step": 120357 }, { "epoch": 12.367447595561035, "grad_norm": 0.08575952798128128, "learning_rate": 0.01, "loss": 1.9097, "step": 120360 }, { "epoch": 12.367755856966708, "grad_norm": 0.03106115758419037, "learning_rate": 0.01, "loss": 1.9144, "step": 120363 }, { "epoch": 12.368064118372379, "grad_norm": 0.0336822085082531, "learning_rate": 0.01, "loss": 1.9463, "step": 120366 }, { "epoch": 12.368372379778052, "grad_norm": 0.09181510657072067, "learning_rate": 0.01, "loss": 1.9441, "step": 120369 }, { "epoch": 12.368680641183724, "grad_norm": 0.06922073662281036, "learning_rate": 0.01, "loss": 1.937, "step": 120372 }, { "epoch": 12.368988902589395, "grad_norm": 0.11679048091173172, "learning_rate": 0.01, "loss": 1.9365, "step": 120375 }, { "epoch": 12.369297163995068, "grad_norm": 0.06578762084245682, "learning_rate": 0.01, "loss": 1.9418, "step": 120378 }, { "epoch": 12.36960542540074, "grad_norm": 0.11222466826438904, "learning_rate": 0.01, "loss": 1.9517, "step": 120381 }, { "epoch": 12.369913686806411, "grad_norm": 0.06960795074701309, "learning_rate": 0.01, "loss": 1.9248, "step": 120384 }, { "epoch": 12.370221948212084, "grad_norm": 0.05938812717795372, "learning_rate": 0.01, "loss": 1.9427, "step": 120387 }, { "epoch": 12.370530209617757, "grad_norm": 0.04438195377588272, "learning_rate": 0.01, "loss": 1.9522, "step": 120390 }, { "epoch": 12.370838471023427, "grad_norm": 0.07959045469760895, "learning_rate": 0.01, "loss": 1.91, "step": 120393 }, { "epoch": 12.3711467324291, "grad_norm": 0.0454067625105381, "learning_rate": 0.01, "loss": 1.9252, "step": 120396 }, { "epoch": 12.371454993834773, "grad_norm": 0.07425530254840851, "learning_rate": 0.01, "loss": 1.8974, "step": 120399 }, { "epoch": 12.371763255240444, "grad_norm": 0.03199988231062889, "learning_rate": 0.01, "loss": 1.9368, "step": 120402 }, { "epoch": 12.372071516646116, "grad_norm": 0.10832170397043228, "learning_rate": 0.01, "loss": 1.9339, "step": 120405 }, { "epoch": 12.372379778051787, "grad_norm": 0.08807660639286041, "learning_rate": 0.01, "loss": 1.9216, "step": 120408 }, { "epoch": 12.37268803945746, "grad_norm": 0.10143882036209106, "learning_rate": 0.01, "loss": 1.9307, "step": 120411 }, { "epoch": 12.372996300863132, "grad_norm": 0.06982599198818207, "learning_rate": 0.01, "loss": 1.9271, "step": 120414 }, { "epoch": 12.373304562268803, "grad_norm": 0.03442126512527466, "learning_rate": 0.01, "loss": 1.9441, "step": 120417 }, { "epoch": 12.373612823674476, "grad_norm": 0.09746600687503815, "learning_rate": 0.01, "loss": 1.9357, "step": 120420 }, { "epoch": 12.373921085080148, "grad_norm": 0.11646406352519989, "learning_rate": 0.01, "loss": 1.9217, "step": 120423 }, { "epoch": 12.37422934648582, "grad_norm": 0.05813951790332794, "learning_rate": 0.01, "loss": 1.9243, "step": 120426 }, { "epoch": 12.374537607891492, "grad_norm": 0.11898761242628098, "learning_rate": 0.01, "loss": 1.9448, "step": 120429 }, { "epoch": 12.374845869297165, "grad_norm": 0.10534807294607162, "learning_rate": 0.01, "loss": 1.9366, "step": 120432 }, { "epoch": 12.375154130702835, "grad_norm": 0.06432823836803436, "learning_rate": 0.01, "loss": 1.9312, "step": 120435 }, { "epoch": 12.375462392108508, "grad_norm": 0.06572303920984268, "learning_rate": 0.01, "loss": 1.9663, "step": 120438 }, { "epoch": 12.37577065351418, "grad_norm": 0.0753687247633934, "learning_rate": 0.01, "loss": 1.9145, "step": 120441 }, { "epoch": 12.376078914919852, "grad_norm": 0.06693238765001297, "learning_rate": 0.01, "loss": 1.9554, "step": 120444 }, { "epoch": 12.376387176325524, "grad_norm": 0.034933801740407944, "learning_rate": 0.01, "loss": 1.9313, "step": 120447 }, { "epoch": 12.376695437731197, "grad_norm": 0.04015972092747688, "learning_rate": 0.01, "loss": 1.9494, "step": 120450 }, { "epoch": 12.377003699136868, "grad_norm": 0.039467908442020416, "learning_rate": 0.01, "loss": 1.9175, "step": 120453 }, { "epoch": 12.37731196054254, "grad_norm": 0.055875346064567566, "learning_rate": 0.01, "loss": 1.9113, "step": 120456 }, { "epoch": 12.377620221948213, "grad_norm": 0.10872425884008408, "learning_rate": 0.01, "loss": 1.9449, "step": 120459 }, { "epoch": 12.377928483353884, "grad_norm": 0.13861580193042755, "learning_rate": 0.01, "loss": 1.9231, "step": 120462 }, { "epoch": 12.378236744759556, "grad_norm": 0.07626169919967651, "learning_rate": 0.01, "loss": 1.9502, "step": 120465 }, { "epoch": 12.378545006165227, "grad_norm": 0.03249692916870117, "learning_rate": 0.01, "loss": 1.9387, "step": 120468 }, { "epoch": 12.3788532675709, "grad_norm": 0.042429469525814056, "learning_rate": 0.01, "loss": 1.9376, "step": 120471 }, { "epoch": 12.379161528976573, "grad_norm": 0.06338413804769516, "learning_rate": 0.01, "loss": 1.9422, "step": 120474 }, { "epoch": 12.379469790382243, "grad_norm": 0.0565384179353714, "learning_rate": 0.01, "loss": 1.9328, "step": 120477 }, { "epoch": 12.379778051787916, "grad_norm": 0.04637407884001732, "learning_rate": 0.01, "loss": 1.92, "step": 120480 }, { "epoch": 12.380086313193589, "grad_norm": 0.12896063923835754, "learning_rate": 0.01, "loss": 1.9573, "step": 120483 }, { "epoch": 12.38039457459926, "grad_norm": 0.08389844745397568, "learning_rate": 0.01, "loss": 1.9575, "step": 120486 }, { "epoch": 12.380702836004932, "grad_norm": 0.06900922954082489, "learning_rate": 0.01, "loss": 1.9353, "step": 120489 }, { "epoch": 12.381011097410605, "grad_norm": 0.05321461334824562, "learning_rate": 0.01, "loss": 1.928, "step": 120492 }, { "epoch": 12.381319358816276, "grad_norm": 0.04594092443585396, "learning_rate": 0.01, "loss": 1.9293, "step": 120495 }, { "epoch": 12.381627620221948, "grad_norm": 0.05090029165148735, "learning_rate": 0.01, "loss": 1.9401, "step": 120498 }, { "epoch": 12.381935881627621, "grad_norm": 0.14940975606441498, "learning_rate": 0.01, "loss": 1.9414, "step": 120501 }, { "epoch": 12.382244143033292, "grad_norm": 0.12164038419723511, "learning_rate": 0.01, "loss": 1.9404, "step": 120504 }, { "epoch": 12.382552404438965, "grad_norm": 0.08605052530765533, "learning_rate": 0.01, "loss": 1.9276, "step": 120507 }, { "epoch": 12.382860665844635, "grad_norm": 0.07495385408401489, "learning_rate": 0.01, "loss": 1.9363, "step": 120510 }, { "epoch": 12.383168927250308, "grad_norm": 0.03808677941560745, "learning_rate": 0.01, "loss": 1.9162, "step": 120513 }, { "epoch": 12.38347718865598, "grad_norm": 0.0872986689209938, "learning_rate": 0.01, "loss": 1.9497, "step": 120516 }, { "epoch": 12.383785450061652, "grad_norm": 0.07244990020990372, "learning_rate": 0.01, "loss": 1.9304, "step": 120519 }, { "epoch": 12.384093711467324, "grad_norm": 0.07292909175157547, "learning_rate": 0.01, "loss": 1.9557, "step": 120522 }, { "epoch": 12.384401972872997, "grad_norm": 0.05182334780693054, "learning_rate": 0.01, "loss": 1.913, "step": 120525 }, { "epoch": 12.384710234278668, "grad_norm": 0.046150092035532, "learning_rate": 0.01, "loss": 1.9428, "step": 120528 }, { "epoch": 12.38501849568434, "grad_norm": 0.037887394428253174, "learning_rate": 0.01, "loss": 1.9455, "step": 120531 }, { "epoch": 12.385326757090013, "grad_norm": 0.03401385247707367, "learning_rate": 0.01, "loss": 1.9169, "step": 120534 }, { "epoch": 12.385635018495684, "grad_norm": 0.04379456117749214, "learning_rate": 0.01, "loss": 1.9199, "step": 120537 }, { "epoch": 12.385943279901356, "grad_norm": 0.1705673784017563, "learning_rate": 0.01, "loss": 1.9338, "step": 120540 }, { "epoch": 12.386251541307029, "grad_norm": 0.054921045899391174, "learning_rate": 0.01, "loss": 1.9502, "step": 120543 }, { "epoch": 12.3865598027127, "grad_norm": 0.06597351282835007, "learning_rate": 0.01, "loss": 1.9289, "step": 120546 }, { "epoch": 12.386868064118373, "grad_norm": 0.11311409622430801, "learning_rate": 0.01, "loss": 1.9238, "step": 120549 }, { "epoch": 12.387176325524045, "grad_norm": 0.044089119881391525, "learning_rate": 0.01, "loss": 1.9603, "step": 120552 }, { "epoch": 12.387484586929716, "grad_norm": 0.03425237908959389, "learning_rate": 0.01, "loss": 1.9319, "step": 120555 }, { "epoch": 12.387792848335389, "grad_norm": 0.03652380406856537, "learning_rate": 0.01, "loss": 1.9307, "step": 120558 }, { "epoch": 12.38810110974106, "grad_norm": 0.04659121856093407, "learning_rate": 0.01, "loss": 1.954, "step": 120561 }, { "epoch": 12.388409371146732, "grad_norm": 0.10116490721702576, "learning_rate": 0.01, "loss": 1.9358, "step": 120564 }, { "epoch": 12.388717632552405, "grad_norm": 0.06742721050977707, "learning_rate": 0.01, "loss": 1.9346, "step": 120567 }, { "epoch": 12.389025893958076, "grad_norm": 0.05432113632559776, "learning_rate": 0.01, "loss": 1.9289, "step": 120570 }, { "epoch": 12.389334155363748, "grad_norm": 0.050973620265722275, "learning_rate": 0.01, "loss": 1.9515, "step": 120573 }, { "epoch": 12.389642416769421, "grad_norm": 0.048542775213718414, "learning_rate": 0.01, "loss": 1.9617, "step": 120576 }, { "epoch": 12.389950678175092, "grad_norm": 0.03432178869843483, "learning_rate": 0.01, "loss": 1.9533, "step": 120579 }, { "epoch": 12.390258939580765, "grad_norm": 0.06701638549566269, "learning_rate": 0.01, "loss": 1.9611, "step": 120582 }, { "epoch": 12.390567200986437, "grad_norm": 0.0790436640381813, "learning_rate": 0.01, "loss": 1.9382, "step": 120585 }, { "epoch": 12.390875462392108, "grad_norm": 0.0833590179681778, "learning_rate": 0.01, "loss": 1.9195, "step": 120588 }, { "epoch": 12.39118372379778, "grad_norm": 0.05441255122423172, "learning_rate": 0.01, "loss": 1.9415, "step": 120591 }, { "epoch": 12.391491985203453, "grad_norm": 0.03294618800282478, "learning_rate": 0.01, "loss": 1.9306, "step": 120594 }, { "epoch": 12.391800246609124, "grad_norm": 0.03458996117115021, "learning_rate": 0.01, "loss": 1.9129, "step": 120597 }, { "epoch": 12.392108508014797, "grad_norm": 0.043681446462869644, "learning_rate": 0.01, "loss": 1.9445, "step": 120600 }, { "epoch": 12.392416769420468, "grad_norm": 0.07503581792116165, "learning_rate": 0.01, "loss": 1.938, "step": 120603 }, { "epoch": 12.39272503082614, "grad_norm": 0.0830334946513176, "learning_rate": 0.01, "loss": 1.9264, "step": 120606 }, { "epoch": 12.393033292231813, "grad_norm": 0.10886512696743011, "learning_rate": 0.01, "loss": 1.9426, "step": 120609 }, { "epoch": 12.393341553637484, "grad_norm": 0.08003956079483032, "learning_rate": 0.01, "loss": 1.9433, "step": 120612 }, { "epoch": 12.393649815043156, "grad_norm": 0.06517963856458664, "learning_rate": 0.01, "loss": 1.9547, "step": 120615 }, { "epoch": 12.393958076448829, "grad_norm": 0.037115540355443954, "learning_rate": 0.01, "loss": 1.9104, "step": 120618 }, { "epoch": 12.3942663378545, "grad_norm": 0.10049480944871902, "learning_rate": 0.01, "loss": 1.9588, "step": 120621 }, { "epoch": 12.394574599260173, "grad_norm": 0.052566614001989365, "learning_rate": 0.01, "loss": 1.9427, "step": 120624 }, { "epoch": 12.394882860665845, "grad_norm": 0.038146406412124634, "learning_rate": 0.01, "loss": 1.9213, "step": 120627 }, { "epoch": 12.395191122071516, "grad_norm": 0.05074608325958252, "learning_rate": 0.01, "loss": 1.924, "step": 120630 }, { "epoch": 12.395499383477189, "grad_norm": 0.04188331216573715, "learning_rate": 0.01, "loss": 1.9185, "step": 120633 }, { "epoch": 12.395807644882861, "grad_norm": 0.04295455291867256, "learning_rate": 0.01, "loss": 1.9308, "step": 120636 }, { "epoch": 12.396115906288532, "grad_norm": 0.036794669926166534, "learning_rate": 0.01, "loss": 1.9169, "step": 120639 }, { "epoch": 12.396424167694205, "grad_norm": 0.07238873839378357, "learning_rate": 0.01, "loss": 1.9401, "step": 120642 }, { "epoch": 12.396732429099877, "grad_norm": 0.11191675066947937, "learning_rate": 0.01, "loss": 1.9328, "step": 120645 }, { "epoch": 12.397040690505548, "grad_norm": 0.09227670729160309, "learning_rate": 0.01, "loss": 1.94, "step": 120648 }, { "epoch": 12.397348951911221, "grad_norm": 0.0327807180583477, "learning_rate": 0.01, "loss": 1.9303, "step": 120651 }, { "epoch": 12.397657213316894, "grad_norm": 0.07567586749792099, "learning_rate": 0.01, "loss": 1.9519, "step": 120654 }, { "epoch": 12.397965474722564, "grad_norm": 0.07382260262966156, "learning_rate": 0.01, "loss": 1.9578, "step": 120657 }, { "epoch": 12.398273736128237, "grad_norm": 0.13166159391403198, "learning_rate": 0.01, "loss": 1.9394, "step": 120660 }, { "epoch": 12.398581997533908, "grad_norm": 0.05870798975229263, "learning_rate": 0.01, "loss": 1.9318, "step": 120663 }, { "epoch": 12.39889025893958, "grad_norm": 0.05218498781323433, "learning_rate": 0.01, "loss": 1.9219, "step": 120666 }, { "epoch": 12.399198520345253, "grad_norm": 0.06324154138565063, "learning_rate": 0.01, "loss": 1.962, "step": 120669 }, { "epoch": 12.399506781750924, "grad_norm": 0.0561276376247406, "learning_rate": 0.01, "loss": 1.9193, "step": 120672 }, { "epoch": 12.399815043156597, "grad_norm": 0.03983089327812195, "learning_rate": 0.01, "loss": 1.9251, "step": 120675 }, { "epoch": 12.40012330456227, "grad_norm": 0.058458730578422546, "learning_rate": 0.01, "loss": 1.934, "step": 120678 }, { "epoch": 12.40043156596794, "grad_norm": 0.10894698649644852, "learning_rate": 0.01, "loss": 1.9457, "step": 120681 }, { "epoch": 12.400739827373613, "grad_norm": 0.06287480145692825, "learning_rate": 0.01, "loss": 1.9411, "step": 120684 }, { "epoch": 12.401048088779286, "grad_norm": 0.06695500016212463, "learning_rate": 0.01, "loss": 1.9126, "step": 120687 }, { "epoch": 12.401356350184956, "grad_norm": 0.04819144681096077, "learning_rate": 0.01, "loss": 1.9635, "step": 120690 }, { "epoch": 12.401664611590629, "grad_norm": 0.11953316628932953, "learning_rate": 0.01, "loss": 1.9329, "step": 120693 }, { "epoch": 12.401972872996302, "grad_norm": 0.08536428213119507, "learning_rate": 0.01, "loss": 1.9433, "step": 120696 }, { "epoch": 12.402281134401973, "grad_norm": 0.051179662346839905, "learning_rate": 0.01, "loss": 1.9302, "step": 120699 }, { "epoch": 12.402589395807645, "grad_norm": 0.03457881137728691, "learning_rate": 0.01, "loss": 1.9388, "step": 120702 }, { "epoch": 12.402897657213316, "grad_norm": 0.04182450473308563, "learning_rate": 0.01, "loss": 1.9397, "step": 120705 }, { "epoch": 12.403205918618989, "grad_norm": 0.08560032397508621, "learning_rate": 0.01, "loss": 1.9515, "step": 120708 }, { "epoch": 12.403514180024661, "grad_norm": 0.08513845503330231, "learning_rate": 0.01, "loss": 1.9349, "step": 120711 }, { "epoch": 12.403822441430332, "grad_norm": 0.04573316499590874, "learning_rate": 0.01, "loss": 1.9376, "step": 120714 }, { "epoch": 12.404130702836005, "grad_norm": 0.04893175885081291, "learning_rate": 0.01, "loss": 1.9014, "step": 120717 }, { "epoch": 12.404438964241677, "grad_norm": 0.08831377327442169, "learning_rate": 0.01, "loss": 1.9242, "step": 120720 }, { "epoch": 12.404747225647348, "grad_norm": 0.0819849893450737, "learning_rate": 0.01, "loss": 1.9203, "step": 120723 }, { "epoch": 12.405055487053021, "grad_norm": 0.04886121675372124, "learning_rate": 0.01, "loss": 1.9397, "step": 120726 }, { "epoch": 12.405363748458694, "grad_norm": 0.1050790548324585, "learning_rate": 0.01, "loss": 1.9356, "step": 120729 }, { "epoch": 12.405672009864364, "grad_norm": 0.06852862238883972, "learning_rate": 0.01, "loss": 1.9299, "step": 120732 }, { "epoch": 12.405980271270037, "grad_norm": 0.09256117045879364, "learning_rate": 0.01, "loss": 1.9278, "step": 120735 }, { "epoch": 12.40628853267571, "grad_norm": 0.051488909870386124, "learning_rate": 0.01, "loss": 1.9245, "step": 120738 }, { "epoch": 12.40659679408138, "grad_norm": 0.07905134558677673, "learning_rate": 0.01, "loss": 1.9663, "step": 120741 }, { "epoch": 12.406905055487053, "grad_norm": 0.08850820362567902, "learning_rate": 0.01, "loss": 1.9525, "step": 120744 }, { "epoch": 12.407213316892726, "grad_norm": 0.05323689058423042, "learning_rate": 0.01, "loss": 1.9241, "step": 120747 }, { "epoch": 12.407521578298397, "grad_norm": 0.13145367801189423, "learning_rate": 0.01, "loss": 1.9468, "step": 120750 }, { "epoch": 12.40782983970407, "grad_norm": 0.0645250603556633, "learning_rate": 0.01, "loss": 1.9159, "step": 120753 }, { "epoch": 12.40813810110974, "grad_norm": 0.05831822380423546, "learning_rate": 0.01, "loss": 1.9048, "step": 120756 }, { "epoch": 12.408446362515413, "grad_norm": 0.058313801884651184, "learning_rate": 0.01, "loss": 1.9387, "step": 120759 }, { "epoch": 12.408754623921086, "grad_norm": 0.0371122881770134, "learning_rate": 0.01, "loss": 1.9412, "step": 120762 }, { "epoch": 12.409062885326756, "grad_norm": 0.04355745390057564, "learning_rate": 0.01, "loss": 1.9538, "step": 120765 }, { "epoch": 12.409371146732429, "grad_norm": 0.05011705681681633, "learning_rate": 0.01, "loss": 1.9257, "step": 120768 }, { "epoch": 12.409679408138102, "grad_norm": 0.13353750109672546, "learning_rate": 0.01, "loss": 1.9554, "step": 120771 }, { "epoch": 12.409987669543773, "grad_norm": 0.09305182099342346, "learning_rate": 0.01, "loss": 1.9464, "step": 120774 }, { "epoch": 12.410295930949445, "grad_norm": 0.05511609837412834, "learning_rate": 0.01, "loss": 1.9426, "step": 120777 }, { "epoch": 12.410604192355118, "grad_norm": 0.05445779114961624, "learning_rate": 0.01, "loss": 1.9335, "step": 120780 }, { "epoch": 12.410912453760789, "grad_norm": 0.08220420032739639, "learning_rate": 0.01, "loss": 1.9061, "step": 120783 }, { "epoch": 12.411220715166461, "grad_norm": 0.0428672656416893, "learning_rate": 0.01, "loss": 1.9058, "step": 120786 }, { "epoch": 12.411528976572134, "grad_norm": 0.03976845368742943, "learning_rate": 0.01, "loss": 1.9271, "step": 120789 }, { "epoch": 12.411837237977805, "grad_norm": 0.0461377277970314, "learning_rate": 0.01, "loss": 1.9202, "step": 120792 }, { "epoch": 12.412145499383477, "grad_norm": 0.05648530274629593, "learning_rate": 0.01, "loss": 1.9531, "step": 120795 }, { "epoch": 12.412453760789148, "grad_norm": 0.06467310339212418, "learning_rate": 0.01, "loss": 1.9324, "step": 120798 }, { "epoch": 12.412762022194821, "grad_norm": 0.03678126260638237, "learning_rate": 0.01, "loss": 1.9164, "step": 120801 }, { "epoch": 12.413070283600494, "grad_norm": 0.13990208506584167, "learning_rate": 0.01, "loss": 1.9404, "step": 120804 }, { "epoch": 12.413378545006164, "grad_norm": 0.04235214367508888, "learning_rate": 0.01, "loss": 1.9367, "step": 120807 }, { "epoch": 12.413686806411837, "grad_norm": 0.04935699328780174, "learning_rate": 0.01, "loss": 1.9403, "step": 120810 }, { "epoch": 12.41399506781751, "grad_norm": 0.0406753309071064, "learning_rate": 0.01, "loss": 1.9269, "step": 120813 }, { "epoch": 12.41430332922318, "grad_norm": 0.03786905109882355, "learning_rate": 0.01, "loss": 1.9212, "step": 120816 }, { "epoch": 12.414611590628853, "grad_norm": 0.05138446018099785, "learning_rate": 0.01, "loss": 1.9186, "step": 120819 }, { "epoch": 12.414919852034526, "grad_norm": 0.09492046386003494, "learning_rate": 0.01, "loss": 1.9313, "step": 120822 }, { "epoch": 12.415228113440197, "grad_norm": 0.03346167132258415, "learning_rate": 0.01, "loss": 1.9288, "step": 120825 }, { "epoch": 12.41553637484587, "grad_norm": 0.07686081528663635, "learning_rate": 0.01, "loss": 1.9279, "step": 120828 }, { "epoch": 12.415844636251542, "grad_norm": 0.11712466180324554, "learning_rate": 0.01, "loss": 1.9377, "step": 120831 }, { "epoch": 12.416152897657213, "grad_norm": 0.047113195061683655, "learning_rate": 0.01, "loss": 1.9484, "step": 120834 }, { "epoch": 12.416461159062885, "grad_norm": 0.045871369540691376, "learning_rate": 0.01, "loss": 1.9501, "step": 120837 }, { "epoch": 12.416769420468558, "grad_norm": 0.04527949541807175, "learning_rate": 0.01, "loss": 1.9421, "step": 120840 }, { "epoch": 12.417077681874229, "grad_norm": 0.03895549476146698, "learning_rate": 0.01, "loss": 1.9448, "step": 120843 }, { "epoch": 12.417385943279902, "grad_norm": 0.0926399901509285, "learning_rate": 0.01, "loss": 1.9023, "step": 120846 }, { "epoch": 12.417694204685574, "grad_norm": 0.05789586901664734, "learning_rate": 0.01, "loss": 1.9378, "step": 120849 }, { "epoch": 12.418002466091245, "grad_norm": 0.0683421865105629, "learning_rate": 0.01, "loss": 1.9446, "step": 120852 }, { "epoch": 12.418310727496918, "grad_norm": 0.041859518736600876, "learning_rate": 0.01, "loss": 1.9459, "step": 120855 }, { "epoch": 12.418618988902589, "grad_norm": 0.04999043047428131, "learning_rate": 0.01, "loss": 1.9282, "step": 120858 }, { "epoch": 12.418927250308261, "grad_norm": 0.04389044642448425, "learning_rate": 0.01, "loss": 1.9521, "step": 120861 }, { "epoch": 12.419235511713934, "grad_norm": 0.04234243184328079, "learning_rate": 0.01, "loss": 1.9273, "step": 120864 }, { "epoch": 12.419543773119605, "grad_norm": 0.03945605084300041, "learning_rate": 0.01, "loss": 1.9293, "step": 120867 }, { "epoch": 12.419852034525277, "grad_norm": 0.049900732934474945, "learning_rate": 0.01, "loss": 1.9356, "step": 120870 }, { "epoch": 12.42016029593095, "grad_norm": 0.10874069482088089, "learning_rate": 0.01, "loss": 1.9685, "step": 120873 }, { "epoch": 12.420468557336621, "grad_norm": 0.044814638793468475, "learning_rate": 0.01, "loss": 1.9193, "step": 120876 }, { "epoch": 12.420776818742294, "grad_norm": 0.08993617445230484, "learning_rate": 0.01, "loss": 1.9498, "step": 120879 }, { "epoch": 12.421085080147966, "grad_norm": 0.04525793343782425, "learning_rate": 0.01, "loss": 1.9122, "step": 120882 }, { "epoch": 12.421393341553637, "grad_norm": 0.054966967552900314, "learning_rate": 0.01, "loss": 1.9295, "step": 120885 }, { "epoch": 12.42170160295931, "grad_norm": 0.08710785955190659, "learning_rate": 0.01, "loss": 1.909, "step": 120888 }, { "epoch": 12.422009864364982, "grad_norm": 0.09183228760957718, "learning_rate": 0.01, "loss": 1.9581, "step": 120891 }, { "epoch": 12.422318125770653, "grad_norm": 0.08628053963184357, "learning_rate": 0.01, "loss": 1.9299, "step": 120894 }, { "epoch": 12.422626387176326, "grad_norm": 0.09337654709815979, "learning_rate": 0.01, "loss": 1.9504, "step": 120897 }, { "epoch": 12.422934648581997, "grad_norm": 0.047991879284381866, "learning_rate": 0.01, "loss": 1.9181, "step": 120900 }, { "epoch": 12.42324290998767, "grad_norm": 0.11279120296239853, "learning_rate": 0.01, "loss": 1.95, "step": 120903 }, { "epoch": 12.423551171393342, "grad_norm": 0.08573783189058304, "learning_rate": 0.01, "loss": 1.943, "step": 120906 }, { "epoch": 12.423859432799013, "grad_norm": 0.05766313523054123, "learning_rate": 0.01, "loss": 1.9156, "step": 120909 }, { "epoch": 12.424167694204685, "grad_norm": 0.07933402061462402, "learning_rate": 0.01, "loss": 1.9272, "step": 120912 }, { "epoch": 12.424475955610358, "grad_norm": 0.03687792271375656, "learning_rate": 0.01, "loss": 1.9018, "step": 120915 }, { "epoch": 12.424784217016029, "grad_norm": 0.05140282213687897, "learning_rate": 0.01, "loss": 1.9274, "step": 120918 }, { "epoch": 12.425092478421702, "grad_norm": 0.07529986649751663, "learning_rate": 0.01, "loss": 1.951, "step": 120921 }, { "epoch": 12.425400739827374, "grad_norm": 0.06762083619832993, "learning_rate": 0.01, "loss": 1.9375, "step": 120924 }, { "epoch": 12.425709001233045, "grad_norm": 0.12069932371377945, "learning_rate": 0.01, "loss": 1.926, "step": 120927 }, { "epoch": 12.426017262638718, "grad_norm": 0.055894885212183, "learning_rate": 0.01, "loss": 1.9309, "step": 120930 }, { "epoch": 12.42632552404439, "grad_norm": 0.10258354991674423, "learning_rate": 0.01, "loss": 1.9212, "step": 120933 }, { "epoch": 12.426633785450061, "grad_norm": 0.058992642909288406, "learning_rate": 0.01, "loss": 1.9235, "step": 120936 }, { "epoch": 12.426942046855734, "grad_norm": 0.047088347375392914, "learning_rate": 0.01, "loss": 1.9033, "step": 120939 }, { "epoch": 12.427250308261407, "grad_norm": 0.07983656227588654, "learning_rate": 0.01, "loss": 1.9379, "step": 120942 }, { "epoch": 12.427558569667077, "grad_norm": 0.11896104365587234, "learning_rate": 0.01, "loss": 1.929, "step": 120945 }, { "epoch": 12.42786683107275, "grad_norm": 0.13242436945438385, "learning_rate": 0.01, "loss": 1.9331, "step": 120948 }, { "epoch": 12.428175092478421, "grad_norm": 0.06254076957702637, "learning_rate": 0.01, "loss": 1.9349, "step": 120951 }, { "epoch": 12.428483353884094, "grad_norm": 0.044072654098272324, "learning_rate": 0.01, "loss": 1.9528, "step": 120954 }, { "epoch": 12.428791615289766, "grad_norm": 0.05199466273188591, "learning_rate": 0.01, "loss": 1.939, "step": 120957 }, { "epoch": 12.429099876695437, "grad_norm": 0.04316157475113869, "learning_rate": 0.01, "loss": 1.9454, "step": 120960 }, { "epoch": 12.42940813810111, "grad_norm": 0.11193782091140747, "learning_rate": 0.01, "loss": 1.9283, "step": 120963 }, { "epoch": 12.429716399506782, "grad_norm": 0.05210939422249794, "learning_rate": 0.01, "loss": 1.9321, "step": 120966 }, { "epoch": 12.430024660912453, "grad_norm": 0.08012048900127411, "learning_rate": 0.01, "loss": 1.9307, "step": 120969 }, { "epoch": 12.430332922318126, "grad_norm": 0.06603696197271347, "learning_rate": 0.01, "loss": 1.9234, "step": 120972 }, { "epoch": 12.430641183723798, "grad_norm": 0.07295702397823334, "learning_rate": 0.01, "loss": 1.9529, "step": 120975 }, { "epoch": 12.43094944512947, "grad_norm": 0.040298521518707275, "learning_rate": 0.01, "loss": 1.9474, "step": 120978 }, { "epoch": 12.431257706535142, "grad_norm": 0.0881817564368248, "learning_rate": 0.01, "loss": 1.927, "step": 120981 }, { "epoch": 12.431565967940815, "grad_norm": 0.06474234908819199, "learning_rate": 0.01, "loss": 1.9186, "step": 120984 }, { "epoch": 12.431874229346485, "grad_norm": 0.07105148583650589, "learning_rate": 0.01, "loss": 1.9518, "step": 120987 }, { "epoch": 12.432182490752158, "grad_norm": 0.09649461507797241, "learning_rate": 0.01, "loss": 1.9361, "step": 120990 }, { "epoch": 12.432490752157829, "grad_norm": 0.15850776433944702, "learning_rate": 0.01, "loss": 1.934, "step": 120993 }, { "epoch": 12.432799013563502, "grad_norm": 0.10326742380857468, "learning_rate": 0.01, "loss": 1.9375, "step": 120996 }, { "epoch": 12.433107274969174, "grad_norm": 0.06529439985752106, "learning_rate": 0.01, "loss": 1.9572, "step": 120999 }, { "epoch": 12.433415536374845, "grad_norm": 0.08048941195011139, "learning_rate": 0.01, "loss": 1.9566, "step": 121002 }, { "epoch": 12.433723797780518, "grad_norm": 0.09297440946102142, "learning_rate": 0.01, "loss": 1.9265, "step": 121005 }, { "epoch": 12.43403205918619, "grad_norm": 0.0965491309762001, "learning_rate": 0.01, "loss": 1.9602, "step": 121008 }, { "epoch": 12.434340320591861, "grad_norm": 0.08597447723150253, "learning_rate": 0.01, "loss": 1.9329, "step": 121011 }, { "epoch": 12.434648581997534, "grad_norm": 0.050102099776268005, "learning_rate": 0.01, "loss": 1.9491, "step": 121014 }, { "epoch": 12.434956843403207, "grad_norm": 0.035592518746852875, "learning_rate": 0.01, "loss": 1.944, "step": 121017 }, { "epoch": 12.435265104808877, "grad_norm": 0.09271007031202316, "learning_rate": 0.01, "loss": 1.9339, "step": 121020 }, { "epoch": 12.43557336621455, "grad_norm": 0.04439222440123558, "learning_rate": 0.01, "loss": 1.9315, "step": 121023 }, { "epoch": 12.435881627620223, "grad_norm": 0.03583432734012604, "learning_rate": 0.01, "loss": 1.93, "step": 121026 }, { "epoch": 12.436189889025894, "grad_norm": 0.04167827591300011, "learning_rate": 0.01, "loss": 1.9009, "step": 121029 }, { "epoch": 12.436498150431566, "grad_norm": 0.12448694556951523, "learning_rate": 0.01, "loss": 1.9425, "step": 121032 }, { "epoch": 12.436806411837239, "grad_norm": 0.07956825941801071, "learning_rate": 0.01, "loss": 1.9565, "step": 121035 }, { "epoch": 12.43711467324291, "grad_norm": 0.10021093487739563, "learning_rate": 0.01, "loss": 1.9244, "step": 121038 }, { "epoch": 12.437422934648582, "grad_norm": 0.05917084962129593, "learning_rate": 0.01, "loss": 1.9233, "step": 121041 }, { "epoch": 12.437731196054255, "grad_norm": 0.08545676618814468, "learning_rate": 0.01, "loss": 1.9474, "step": 121044 }, { "epoch": 12.438039457459926, "grad_norm": 0.13080301880836487, "learning_rate": 0.01, "loss": 1.9519, "step": 121047 }, { "epoch": 12.438347718865598, "grad_norm": 0.05625632032752037, "learning_rate": 0.01, "loss": 1.9365, "step": 121050 }, { "epoch": 12.43865598027127, "grad_norm": 0.0860651507973671, "learning_rate": 0.01, "loss": 1.9352, "step": 121053 }, { "epoch": 12.438964241676942, "grad_norm": 0.047987114638090134, "learning_rate": 0.01, "loss": 1.9147, "step": 121056 }, { "epoch": 12.439272503082615, "grad_norm": 0.09193069487810135, "learning_rate": 0.01, "loss": 1.935, "step": 121059 }, { "epoch": 12.439580764488285, "grad_norm": 0.07548029720783234, "learning_rate": 0.01, "loss": 1.9305, "step": 121062 }, { "epoch": 12.439889025893958, "grad_norm": 0.09455464035272598, "learning_rate": 0.01, "loss": 1.9518, "step": 121065 }, { "epoch": 12.44019728729963, "grad_norm": 0.07177861779928207, "learning_rate": 0.01, "loss": 1.9571, "step": 121068 }, { "epoch": 12.440505548705302, "grad_norm": 0.049412354826927185, "learning_rate": 0.01, "loss": 1.9261, "step": 121071 }, { "epoch": 12.440813810110974, "grad_norm": 0.09546947479248047, "learning_rate": 0.01, "loss": 1.9483, "step": 121074 }, { "epoch": 12.441122071516647, "grad_norm": 0.046247322112321854, "learning_rate": 0.01, "loss": 1.9365, "step": 121077 }, { "epoch": 12.441430332922318, "grad_norm": 0.09783945232629776, "learning_rate": 0.01, "loss": 1.9266, "step": 121080 }, { "epoch": 12.44173859432799, "grad_norm": 0.09536667913198471, "learning_rate": 0.01, "loss": 1.9089, "step": 121083 }, { "epoch": 12.442046855733663, "grad_norm": 0.0748727098107338, "learning_rate": 0.01, "loss": 1.9369, "step": 121086 }, { "epoch": 12.442355117139334, "grad_norm": 0.04721232131123543, "learning_rate": 0.01, "loss": 1.9314, "step": 121089 }, { "epoch": 12.442663378545006, "grad_norm": 0.05681651458144188, "learning_rate": 0.01, "loss": 1.9241, "step": 121092 }, { "epoch": 12.442971639950677, "grad_norm": 0.0716468095779419, "learning_rate": 0.01, "loss": 1.9555, "step": 121095 }, { "epoch": 12.44327990135635, "grad_norm": 0.07019370794296265, "learning_rate": 0.01, "loss": 1.9409, "step": 121098 }, { "epoch": 12.443588162762023, "grad_norm": 0.06477268040180206, "learning_rate": 0.01, "loss": 1.9296, "step": 121101 }, { "epoch": 12.443896424167693, "grad_norm": 0.09084682911634445, "learning_rate": 0.01, "loss": 1.9234, "step": 121104 }, { "epoch": 12.444204685573366, "grad_norm": 0.0959085151553154, "learning_rate": 0.01, "loss": 1.9117, "step": 121107 }, { "epoch": 12.444512946979039, "grad_norm": 0.07635615020990372, "learning_rate": 0.01, "loss": 1.9305, "step": 121110 }, { "epoch": 12.44482120838471, "grad_norm": 0.13731513917446136, "learning_rate": 0.01, "loss": 1.9266, "step": 121113 }, { "epoch": 12.445129469790382, "grad_norm": 0.13287904858589172, "learning_rate": 0.01, "loss": 1.9211, "step": 121116 }, { "epoch": 12.445437731196055, "grad_norm": 0.17962051928043365, "learning_rate": 0.01, "loss": 1.9481, "step": 121119 }, { "epoch": 12.445745992601726, "grad_norm": 0.04233860969543457, "learning_rate": 0.01, "loss": 1.938, "step": 121122 }, { "epoch": 12.446054254007398, "grad_norm": 0.03985730558633804, "learning_rate": 0.01, "loss": 1.936, "step": 121125 }, { "epoch": 12.446362515413071, "grad_norm": 0.04166532680392265, "learning_rate": 0.01, "loss": 1.9596, "step": 121128 }, { "epoch": 12.446670776818742, "grad_norm": 0.042059239000082016, "learning_rate": 0.01, "loss": 1.9374, "step": 121131 }, { "epoch": 12.446979038224415, "grad_norm": 0.036940161138772964, "learning_rate": 0.01, "loss": 1.9136, "step": 121134 }, { "epoch": 12.447287299630087, "grad_norm": 0.06131014600396156, "learning_rate": 0.01, "loss": 1.9401, "step": 121137 }, { "epoch": 12.447595561035758, "grad_norm": 0.07934664934873581, "learning_rate": 0.01, "loss": 1.9227, "step": 121140 }, { "epoch": 12.44790382244143, "grad_norm": 0.14649051427841187, "learning_rate": 0.01, "loss": 1.9229, "step": 121143 }, { "epoch": 12.448212083847102, "grad_norm": 0.0989823117852211, "learning_rate": 0.01, "loss": 1.929, "step": 121146 }, { "epoch": 12.448520345252774, "grad_norm": 0.05163063108921051, "learning_rate": 0.01, "loss": 1.9312, "step": 121149 }, { "epoch": 12.448828606658447, "grad_norm": 0.054580073803663254, "learning_rate": 0.01, "loss": 1.9243, "step": 121152 }, { "epoch": 12.449136868064118, "grad_norm": 0.03554552420973778, "learning_rate": 0.01, "loss": 1.941, "step": 121155 }, { "epoch": 12.44944512946979, "grad_norm": 0.10882104188203812, "learning_rate": 0.01, "loss": 1.9199, "step": 121158 }, { "epoch": 12.449753390875463, "grad_norm": 0.08104122430086136, "learning_rate": 0.01, "loss": 1.9355, "step": 121161 }, { "epoch": 12.450061652281134, "grad_norm": 0.055625855922698975, "learning_rate": 0.01, "loss": 1.9347, "step": 121164 }, { "epoch": 12.450369913686806, "grad_norm": 0.09399867802858353, "learning_rate": 0.01, "loss": 1.9486, "step": 121167 }, { "epoch": 12.450678175092479, "grad_norm": 0.04432365298271179, "learning_rate": 0.01, "loss": 1.9474, "step": 121170 }, { "epoch": 12.45098643649815, "grad_norm": 0.043413758277893066, "learning_rate": 0.01, "loss": 1.9329, "step": 121173 }, { "epoch": 12.451294697903823, "grad_norm": 0.03953053429722786, "learning_rate": 0.01, "loss": 1.8988, "step": 121176 }, { "epoch": 12.451602959309495, "grad_norm": 0.1084480807185173, "learning_rate": 0.01, "loss": 1.9512, "step": 121179 }, { "epoch": 12.451911220715166, "grad_norm": 0.047949500381946564, "learning_rate": 0.01, "loss": 1.9236, "step": 121182 }, { "epoch": 12.452219482120839, "grad_norm": 0.05586003139615059, "learning_rate": 0.01, "loss": 1.9215, "step": 121185 }, { "epoch": 12.45252774352651, "grad_norm": 0.09080043435096741, "learning_rate": 0.01, "loss": 1.9199, "step": 121188 }, { "epoch": 12.452836004932182, "grad_norm": 0.06499528139829636, "learning_rate": 0.01, "loss": 1.9434, "step": 121191 }, { "epoch": 12.453144266337855, "grad_norm": 0.0984288677573204, "learning_rate": 0.01, "loss": 1.9488, "step": 121194 }, { "epoch": 12.453452527743526, "grad_norm": 0.045159339904785156, "learning_rate": 0.01, "loss": 1.9546, "step": 121197 }, { "epoch": 12.453760789149198, "grad_norm": 0.0337919145822525, "learning_rate": 0.01, "loss": 1.9435, "step": 121200 }, { "epoch": 12.454069050554871, "grad_norm": 0.039271969348192215, "learning_rate": 0.01, "loss": 1.952, "step": 121203 }, { "epoch": 12.454377311960542, "grad_norm": 0.049345336854457855, "learning_rate": 0.01, "loss": 1.9399, "step": 121206 }, { "epoch": 12.454685573366215, "grad_norm": 0.13334542512893677, "learning_rate": 0.01, "loss": 1.9291, "step": 121209 }, { "epoch": 12.454993834771887, "grad_norm": 0.11365757882595062, "learning_rate": 0.01, "loss": 1.9149, "step": 121212 }, { "epoch": 12.455302096177558, "grad_norm": 0.055066999047994614, "learning_rate": 0.01, "loss": 1.9336, "step": 121215 }, { "epoch": 12.45561035758323, "grad_norm": 0.04881853982806206, "learning_rate": 0.01, "loss": 1.9545, "step": 121218 }, { "epoch": 12.455918618988903, "grad_norm": 0.1275508552789688, "learning_rate": 0.01, "loss": 1.9462, "step": 121221 }, { "epoch": 12.456226880394574, "grad_norm": 0.042988065630197525, "learning_rate": 0.01, "loss": 1.9272, "step": 121224 }, { "epoch": 12.456535141800247, "grad_norm": 0.11997077614068985, "learning_rate": 0.01, "loss": 1.9461, "step": 121227 }, { "epoch": 12.45684340320592, "grad_norm": 0.04036852344870567, "learning_rate": 0.01, "loss": 1.9686, "step": 121230 }, { "epoch": 12.45715166461159, "grad_norm": 0.08224493265151978, "learning_rate": 0.01, "loss": 1.9215, "step": 121233 }, { "epoch": 12.457459926017263, "grad_norm": 0.0855056419968605, "learning_rate": 0.01, "loss": 1.9206, "step": 121236 }, { "epoch": 12.457768187422934, "grad_norm": 0.07086644321680069, "learning_rate": 0.01, "loss": 1.9302, "step": 121239 }, { "epoch": 12.458076448828606, "grad_norm": 0.0619567334651947, "learning_rate": 0.01, "loss": 1.9707, "step": 121242 }, { "epoch": 12.458384710234279, "grad_norm": 0.08348061144351959, "learning_rate": 0.01, "loss": 1.9621, "step": 121245 }, { "epoch": 12.45869297163995, "grad_norm": 0.04736310988664627, "learning_rate": 0.01, "loss": 1.91, "step": 121248 }, { "epoch": 12.459001233045623, "grad_norm": 0.12289008498191833, "learning_rate": 0.01, "loss": 1.9533, "step": 121251 }, { "epoch": 12.459309494451295, "grad_norm": 0.061683282256126404, "learning_rate": 0.01, "loss": 1.9259, "step": 121254 }, { "epoch": 12.459617755856966, "grad_norm": 0.06045114994049072, "learning_rate": 0.01, "loss": 1.9225, "step": 121257 }, { "epoch": 12.459926017262639, "grad_norm": 0.042001038789749146, "learning_rate": 0.01, "loss": 1.9413, "step": 121260 }, { "epoch": 12.460234278668311, "grad_norm": 0.04718760401010513, "learning_rate": 0.01, "loss": 1.9279, "step": 121263 }, { "epoch": 12.460542540073982, "grad_norm": 0.0723651647567749, "learning_rate": 0.01, "loss": 1.9381, "step": 121266 }, { "epoch": 12.460850801479655, "grad_norm": 0.05260621756315231, "learning_rate": 0.01, "loss": 1.95, "step": 121269 }, { "epoch": 12.461159062885327, "grad_norm": 0.04480955749750137, "learning_rate": 0.01, "loss": 1.9398, "step": 121272 }, { "epoch": 12.461467324290998, "grad_norm": 0.056711770594120026, "learning_rate": 0.01, "loss": 1.973, "step": 121275 }, { "epoch": 12.461775585696671, "grad_norm": 0.046238526701927185, "learning_rate": 0.01, "loss": 1.9435, "step": 121278 }, { "epoch": 12.462083847102344, "grad_norm": 0.0408644899725914, "learning_rate": 0.01, "loss": 1.9506, "step": 121281 }, { "epoch": 12.462392108508014, "grad_norm": 0.16705437004566193, "learning_rate": 0.01, "loss": 1.9368, "step": 121284 }, { "epoch": 12.462700369913687, "grad_norm": 0.03730238229036331, "learning_rate": 0.01, "loss": 1.9154, "step": 121287 }, { "epoch": 12.463008631319358, "grad_norm": 0.0428999587893486, "learning_rate": 0.01, "loss": 1.9159, "step": 121290 }, { "epoch": 12.46331689272503, "grad_norm": 0.04371247813105583, "learning_rate": 0.01, "loss": 1.9187, "step": 121293 }, { "epoch": 12.463625154130703, "grad_norm": 0.055939462035894394, "learning_rate": 0.01, "loss": 1.9249, "step": 121296 }, { "epoch": 12.463933415536374, "grad_norm": 0.062437064945697784, "learning_rate": 0.01, "loss": 1.9453, "step": 121299 }, { "epoch": 12.464241676942047, "grad_norm": 0.06468332558870316, "learning_rate": 0.01, "loss": 1.9392, "step": 121302 }, { "epoch": 12.46454993834772, "grad_norm": 0.033661019057035446, "learning_rate": 0.01, "loss": 1.9239, "step": 121305 }, { "epoch": 12.46485819975339, "grad_norm": 0.04528021067380905, "learning_rate": 0.01, "loss": 1.9399, "step": 121308 }, { "epoch": 12.465166461159063, "grad_norm": 0.17767322063446045, "learning_rate": 0.01, "loss": 1.9197, "step": 121311 }, { "epoch": 12.465474722564736, "grad_norm": 0.06002239137887955, "learning_rate": 0.01, "loss": 1.953, "step": 121314 }, { "epoch": 12.465782983970406, "grad_norm": 0.04848655313253403, "learning_rate": 0.01, "loss": 1.939, "step": 121317 }, { "epoch": 12.466091245376079, "grad_norm": 0.05200524628162384, "learning_rate": 0.01, "loss": 1.9534, "step": 121320 }, { "epoch": 12.466399506781752, "grad_norm": 0.03162280097603798, "learning_rate": 0.01, "loss": 1.9395, "step": 121323 }, { "epoch": 12.466707768187423, "grad_norm": 0.03381708264350891, "learning_rate": 0.01, "loss": 1.9355, "step": 121326 }, { "epoch": 12.467016029593095, "grad_norm": 0.03555064648389816, "learning_rate": 0.01, "loss": 1.9131, "step": 121329 }, { "epoch": 12.467324290998768, "grad_norm": 0.13290022313594818, "learning_rate": 0.01, "loss": 1.9553, "step": 121332 }, { "epoch": 12.467632552404439, "grad_norm": 0.08111809194087982, "learning_rate": 0.01, "loss": 1.9102, "step": 121335 }, { "epoch": 12.467940813810111, "grad_norm": 0.0595904104411602, "learning_rate": 0.01, "loss": 1.9276, "step": 121338 }, { "epoch": 12.468249075215782, "grad_norm": 0.03929143399000168, "learning_rate": 0.01, "loss": 1.9359, "step": 121341 }, { "epoch": 12.468557336621455, "grad_norm": 0.045894503593444824, "learning_rate": 0.01, "loss": 1.9225, "step": 121344 }, { "epoch": 12.468865598027127, "grad_norm": 0.040126699954271317, "learning_rate": 0.01, "loss": 1.9453, "step": 121347 }, { "epoch": 12.469173859432798, "grad_norm": 0.03402792662382126, "learning_rate": 0.01, "loss": 1.9401, "step": 121350 }, { "epoch": 12.469482120838471, "grad_norm": 0.04667404294013977, "learning_rate": 0.01, "loss": 1.932, "step": 121353 }, { "epoch": 12.469790382244144, "grad_norm": 0.16611348092556, "learning_rate": 0.01, "loss": 1.9453, "step": 121356 }, { "epoch": 12.470098643649814, "grad_norm": 0.12104840576648712, "learning_rate": 0.01, "loss": 1.9356, "step": 121359 }, { "epoch": 12.470406905055487, "grad_norm": 0.10649586468935013, "learning_rate": 0.01, "loss": 1.9253, "step": 121362 }, { "epoch": 12.47071516646116, "grad_norm": 0.05915949493646622, "learning_rate": 0.01, "loss": 1.9557, "step": 121365 }, { "epoch": 12.47102342786683, "grad_norm": 0.0690794512629509, "learning_rate": 0.01, "loss": 1.9501, "step": 121368 }, { "epoch": 12.471331689272503, "grad_norm": 0.09581074118614197, "learning_rate": 0.01, "loss": 1.9355, "step": 121371 }, { "epoch": 12.471639950678176, "grad_norm": 0.05892445519566536, "learning_rate": 0.01, "loss": 1.9176, "step": 121374 }, { "epoch": 12.471948212083847, "grad_norm": 0.10727337747812271, "learning_rate": 0.01, "loss": 1.9266, "step": 121377 }, { "epoch": 12.47225647348952, "grad_norm": 0.03369433805346489, "learning_rate": 0.01, "loss": 1.9421, "step": 121380 }, { "epoch": 12.47256473489519, "grad_norm": 0.052181635051965714, "learning_rate": 0.01, "loss": 1.948, "step": 121383 }, { "epoch": 12.472872996300863, "grad_norm": 0.11132442951202393, "learning_rate": 0.01, "loss": 1.9392, "step": 121386 }, { "epoch": 12.473181257706536, "grad_norm": 0.07367842644453049, "learning_rate": 0.01, "loss": 1.917, "step": 121389 }, { "epoch": 12.473489519112206, "grad_norm": 0.07586299628019333, "learning_rate": 0.01, "loss": 1.9108, "step": 121392 }, { "epoch": 12.473797780517879, "grad_norm": 0.06276009976863861, "learning_rate": 0.01, "loss": 1.9246, "step": 121395 }, { "epoch": 12.474106041923552, "grad_norm": 0.124395452439785, "learning_rate": 0.01, "loss": 1.9444, "step": 121398 }, { "epoch": 12.474414303329223, "grad_norm": 0.041389454156160355, "learning_rate": 0.01, "loss": 1.9285, "step": 121401 }, { "epoch": 12.474722564734895, "grad_norm": 0.11566449701786041, "learning_rate": 0.01, "loss": 1.9243, "step": 121404 }, { "epoch": 12.475030826140568, "grad_norm": 0.049872953444719315, "learning_rate": 0.01, "loss": 1.9299, "step": 121407 }, { "epoch": 12.475339087546239, "grad_norm": 0.0904727503657341, "learning_rate": 0.01, "loss": 1.9401, "step": 121410 }, { "epoch": 12.475647348951911, "grad_norm": 0.12929974496364594, "learning_rate": 0.01, "loss": 1.9552, "step": 121413 }, { "epoch": 12.475955610357584, "grad_norm": 0.03591210022568703, "learning_rate": 0.01, "loss": 1.9216, "step": 121416 }, { "epoch": 12.476263871763255, "grad_norm": 0.08779715746641159, "learning_rate": 0.01, "loss": 1.9273, "step": 121419 }, { "epoch": 12.476572133168927, "grad_norm": 0.06906236708164215, "learning_rate": 0.01, "loss": 1.9159, "step": 121422 }, { "epoch": 12.4768803945746, "grad_norm": 0.04613060504198074, "learning_rate": 0.01, "loss": 1.908, "step": 121425 }, { "epoch": 12.477188655980271, "grad_norm": 0.06489048153162003, "learning_rate": 0.01, "loss": 1.955, "step": 121428 }, { "epoch": 12.477496917385944, "grad_norm": 0.03923995792865753, "learning_rate": 0.01, "loss": 1.9384, "step": 121431 }, { "epoch": 12.477805178791614, "grad_norm": 0.036666881293058395, "learning_rate": 0.01, "loss": 1.9472, "step": 121434 }, { "epoch": 12.478113440197287, "grad_norm": 0.038181450217962265, "learning_rate": 0.01, "loss": 1.9304, "step": 121437 }, { "epoch": 12.47842170160296, "grad_norm": 0.03955162689089775, "learning_rate": 0.01, "loss": 1.9662, "step": 121440 }, { "epoch": 12.47872996300863, "grad_norm": 0.04029517248272896, "learning_rate": 0.01, "loss": 1.9334, "step": 121443 }, { "epoch": 12.479038224414303, "grad_norm": 0.15464960038661957, "learning_rate": 0.01, "loss": 1.9361, "step": 121446 }, { "epoch": 12.479346485819976, "grad_norm": 0.11006775498390198, "learning_rate": 0.01, "loss": 1.8956, "step": 121449 }, { "epoch": 12.479654747225647, "grad_norm": 0.052893463522195816, "learning_rate": 0.01, "loss": 1.9392, "step": 121452 }, { "epoch": 12.47996300863132, "grad_norm": 0.05330706760287285, "learning_rate": 0.01, "loss": 1.9609, "step": 121455 }, { "epoch": 12.480271270036992, "grad_norm": 0.032613810151815414, "learning_rate": 0.01, "loss": 1.9362, "step": 121458 }, { "epoch": 12.480579531442663, "grad_norm": 0.04011857137084007, "learning_rate": 0.01, "loss": 1.9383, "step": 121461 }, { "epoch": 12.480887792848335, "grad_norm": 0.0690390020608902, "learning_rate": 0.01, "loss": 1.9641, "step": 121464 }, { "epoch": 12.481196054254008, "grad_norm": 0.0717402994632721, "learning_rate": 0.01, "loss": 1.9605, "step": 121467 }, { "epoch": 12.481504315659679, "grad_norm": 0.06392014771699905, "learning_rate": 0.01, "loss": 1.9327, "step": 121470 }, { "epoch": 12.481812577065352, "grad_norm": 0.12007756531238556, "learning_rate": 0.01, "loss": 1.9184, "step": 121473 }, { "epoch": 12.482120838471024, "grad_norm": 0.13022544980049133, "learning_rate": 0.01, "loss": 1.9453, "step": 121476 }, { "epoch": 12.482429099876695, "grad_norm": 0.10144117474555969, "learning_rate": 0.01, "loss": 1.9136, "step": 121479 }, { "epoch": 12.482737361282368, "grad_norm": 0.03754530847072601, "learning_rate": 0.01, "loss": 1.9486, "step": 121482 }, { "epoch": 12.483045622688039, "grad_norm": 0.03838201239705086, "learning_rate": 0.01, "loss": 1.919, "step": 121485 }, { "epoch": 12.483353884093711, "grad_norm": 0.04350865259766579, "learning_rate": 0.01, "loss": 1.9257, "step": 121488 }, { "epoch": 12.483662145499384, "grad_norm": 0.07184895128011703, "learning_rate": 0.01, "loss": 1.9028, "step": 121491 }, { "epoch": 12.483970406905055, "grad_norm": 0.05595661699771881, "learning_rate": 0.01, "loss": 1.9429, "step": 121494 }, { "epoch": 12.484278668310727, "grad_norm": 0.0673314705491066, "learning_rate": 0.01, "loss": 1.9231, "step": 121497 }, { "epoch": 12.4845869297164, "grad_norm": 0.07136820256710052, "learning_rate": 0.01, "loss": 1.9342, "step": 121500 }, { "epoch": 12.484895191122071, "grad_norm": 0.05735662207007408, "learning_rate": 0.01, "loss": 1.9413, "step": 121503 }, { "epoch": 12.485203452527744, "grad_norm": 0.0498378612101078, "learning_rate": 0.01, "loss": 1.9456, "step": 121506 }, { "epoch": 12.485511713933416, "grad_norm": 0.038448233157396317, "learning_rate": 0.01, "loss": 1.9425, "step": 121509 }, { "epoch": 12.485819975339087, "grad_norm": 0.039640799164772034, "learning_rate": 0.01, "loss": 1.9181, "step": 121512 }, { "epoch": 12.48612823674476, "grad_norm": 0.03348913416266441, "learning_rate": 0.01, "loss": 1.9386, "step": 121515 }, { "epoch": 12.486436498150432, "grad_norm": 0.051115188747644424, "learning_rate": 0.01, "loss": 1.9501, "step": 121518 }, { "epoch": 12.486744759556103, "grad_norm": 0.1301027238368988, "learning_rate": 0.01, "loss": 1.9087, "step": 121521 }, { "epoch": 12.487053020961776, "grad_norm": 0.0780315101146698, "learning_rate": 0.01, "loss": 1.9151, "step": 121524 }, { "epoch": 12.487361282367448, "grad_norm": 0.09338266402482986, "learning_rate": 0.01, "loss": 1.9376, "step": 121527 }, { "epoch": 12.48766954377312, "grad_norm": 0.07173854112625122, "learning_rate": 0.01, "loss": 1.941, "step": 121530 }, { "epoch": 12.487977805178792, "grad_norm": 0.09534495323896408, "learning_rate": 0.01, "loss": 1.9203, "step": 121533 }, { "epoch": 12.488286066584463, "grad_norm": 0.05919259414076805, "learning_rate": 0.01, "loss": 1.9476, "step": 121536 }, { "epoch": 12.488594327990135, "grad_norm": 0.1200721487402916, "learning_rate": 0.01, "loss": 1.9513, "step": 121539 }, { "epoch": 12.488902589395808, "grad_norm": 0.13612700998783112, "learning_rate": 0.01, "loss": 1.9673, "step": 121542 }, { "epoch": 12.489210850801479, "grad_norm": 0.16138233244419098, "learning_rate": 0.01, "loss": 1.9458, "step": 121545 }, { "epoch": 12.489519112207152, "grad_norm": 0.06983632594347, "learning_rate": 0.01, "loss": 1.9256, "step": 121548 }, { "epoch": 12.489827373612824, "grad_norm": 0.034231752157211304, "learning_rate": 0.01, "loss": 1.9284, "step": 121551 }, { "epoch": 12.490135635018495, "grad_norm": 0.034840893000364304, "learning_rate": 0.01, "loss": 1.9258, "step": 121554 }, { "epoch": 12.490443896424168, "grad_norm": 0.03223062679171562, "learning_rate": 0.01, "loss": 1.9519, "step": 121557 }, { "epoch": 12.49075215782984, "grad_norm": 0.057242974638938904, "learning_rate": 0.01, "loss": 1.9565, "step": 121560 }, { "epoch": 12.491060419235511, "grad_norm": 0.051756180822849274, "learning_rate": 0.01, "loss": 1.9402, "step": 121563 }, { "epoch": 12.491368680641184, "grad_norm": 0.043041735887527466, "learning_rate": 0.01, "loss": 1.9526, "step": 121566 }, { "epoch": 12.491676942046857, "grad_norm": 0.04464586824178696, "learning_rate": 0.01, "loss": 1.9191, "step": 121569 }, { "epoch": 12.491985203452527, "grad_norm": 0.049728620797395706, "learning_rate": 0.01, "loss": 1.92, "step": 121572 }, { "epoch": 12.4922934648582, "grad_norm": 0.11435680836439133, "learning_rate": 0.01, "loss": 1.9157, "step": 121575 }, { "epoch": 12.49260172626387, "grad_norm": 0.049143481999635696, "learning_rate": 0.01, "loss": 1.9313, "step": 121578 }, { "epoch": 12.492909987669544, "grad_norm": 0.09761693328619003, "learning_rate": 0.01, "loss": 1.9446, "step": 121581 }, { "epoch": 12.493218249075216, "grad_norm": 0.04102834314107895, "learning_rate": 0.01, "loss": 1.9362, "step": 121584 }, { "epoch": 12.493526510480887, "grad_norm": 0.07719264179468155, "learning_rate": 0.01, "loss": 1.9484, "step": 121587 }, { "epoch": 12.49383477188656, "grad_norm": 0.06168089807033539, "learning_rate": 0.01, "loss": 1.9335, "step": 121590 }, { "epoch": 12.494143033292232, "grad_norm": 0.08931529521942139, "learning_rate": 0.01, "loss": 1.9407, "step": 121593 }, { "epoch": 12.494451294697903, "grad_norm": 0.04156995564699173, "learning_rate": 0.01, "loss": 1.9283, "step": 121596 }, { "epoch": 12.494759556103576, "grad_norm": 0.07967917621135712, "learning_rate": 0.01, "loss": 1.9216, "step": 121599 }, { "epoch": 12.495067817509248, "grad_norm": 0.07035093754529953, "learning_rate": 0.01, "loss": 1.9679, "step": 121602 }, { "epoch": 12.49537607891492, "grad_norm": 0.09730745106935501, "learning_rate": 0.01, "loss": 1.9473, "step": 121605 }, { "epoch": 12.495684340320592, "grad_norm": 0.06220466271042824, "learning_rate": 0.01, "loss": 1.927, "step": 121608 }, { "epoch": 12.495992601726265, "grad_norm": 0.10313192009925842, "learning_rate": 0.01, "loss": 1.9552, "step": 121611 }, { "epoch": 12.496300863131935, "grad_norm": 0.057577695697546005, "learning_rate": 0.01, "loss": 1.9512, "step": 121614 }, { "epoch": 12.496609124537608, "grad_norm": 0.04529568925499916, "learning_rate": 0.01, "loss": 1.9326, "step": 121617 }, { "epoch": 12.49691738594328, "grad_norm": 0.10593090206384659, "learning_rate": 0.01, "loss": 1.9195, "step": 121620 }, { "epoch": 12.497225647348952, "grad_norm": 0.05129368230700493, "learning_rate": 0.01, "loss": 1.9334, "step": 121623 }, { "epoch": 12.497533908754624, "grad_norm": 0.10745473951101303, "learning_rate": 0.01, "loss": 1.9347, "step": 121626 }, { "epoch": 12.497842170160295, "grad_norm": 0.07244491577148438, "learning_rate": 0.01, "loss": 1.92, "step": 121629 }, { "epoch": 12.498150431565968, "grad_norm": 0.14453358948230743, "learning_rate": 0.01, "loss": 1.9359, "step": 121632 }, { "epoch": 12.49845869297164, "grad_norm": 0.13955679535865784, "learning_rate": 0.01, "loss": 1.9274, "step": 121635 }, { "epoch": 12.498766954377311, "grad_norm": 0.07900308817625046, "learning_rate": 0.01, "loss": 1.9252, "step": 121638 }, { "epoch": 12.499075215782984, "grad_norm": 0.07743179053068161, "learning_rate": 0.01, "loss": 1.9397, "step": 121641 }, { "epoch": 12.499383477188656, "grad_norm": 0.10477512329816818, "learning_rate": 0.01, "loss": 1.9193, "step": 121644 }, { "epoch": 12.499691738594327, "grad_norm": 0.06475335359573364, "learning_rate": 0.01, "loss": 1.9241, "step": 121647 }, { "epoch": 12.5, "grad_norm": 0.040137145668268204, "learning_rate": 0.01, "loss": 1.9161, "step": 121650 }, { "epoch": 12.500308261405673, "grad_norm": 0.045835766941308975, "learning_rate": 0.01, "loss": 1.9106, "step": 121653 }, { "epoch": 12.500616522811344, "grad_norm": 0.05901820585131645, "learning_rate": 0.01, "loss": 1.9699, "step": 121656 }, { "epoch": 12.500924784217016, "grad_norm": 0.06410286575555801, "learning_rate": 0.01, "loss": 1.925, "step": 121659 }, { "epoch": 12.501233045622689, "grad_norm": 0.10451801866292953, "learning_rate": 0.01, "loss": 1.9492, "step": 121662 }, { "epoch": 12.50154130702836, "grad_norm": 0.06614912301301956, "learning_rate": 0.01, "loss": 1.9108, "step": 121665 }, { "epoch": 12.501849568434032, "grad_norm": 0.09007228910923004, "learning_rate": 0.01, "loss": 1.9305, "step": 121668 }, { "epoch": 12.502157829839703, "grad_norm": 0.0648479238152504, "learning_rate": 0.01, "loss": 1.9379, "step": 121671 }, { "epoch": 12.502466091245376, "grad_norm": 0.0800272673368454, "learning_rate": 0.01, "loss": 1.9152, "step": 121674 }, { "epoch": 12.502774352651048, "grad_norm": 0.12876403331756592, "learning_rate": 0.01, "loss": 1.9246, "step": 121677 }, { "epoch": 12.50308261405672, "grad_norm": 0.08178053051233292, "learning_rate": 0.01, "loss": 1.9359, "step": 121680 }, { "epoch": 12.503390875462392, "grad_norm": 0.04241476580500603, "learning_rate": 0.01, "loss": 1.9534, "step": 121683 }, { "epoch": 12.503699136868065, "grad_norm": 0.03111073561012745, "learning_rate": 0.01, "loss": 1.9663, "step": 121686 }, { "epoch": 12.504007398273735, "grad_norm": 0.03514395281672478, "learning_rate": 0.01, "loss": 1.9131, "step": 121689 }, { "epoch": 12.504315659679408, "grad_norm": 0.08777007460594177, "learning_rate": 0.01, "loss": 1.9367, "step": 121692 }, { "epoch": 12.50462392108508, "grad_norm": 0.07255978882312775, "learning_rate": 0.01, "loss": 1.9046, "step": 121695 }, { "epoch": 12.504932182490752, "grad_norm": 0.11769585311412811, "learning_rate": 0.01, "loss": 1.9213, "step": 121698 }, { "epoch": 12.505240443896424, "grad_norm": 0.05882567912340164, "learning_rate": 0.01, "loss": 1.9258, "step": 121701 }, { "epoch": 12.505548705302097, "grad_norm": 0.05234869569540024, "learning_rate": 0.01, "loss": 1.94, "step": 121704 }, { "epoch": 12.505856966707768, "grad_norm": 0.07418510317802429, "learning_rate": 0.01, "loss": 1.9495, "step": 121707 }, { "epoch": 12.50616522811344, "grad_norm": 0.06958342343568802, "learning_rate": 0.01, "loss": 1.9378, "step": 121710 }, { "epoch": 12.506473489519113, "grad_norm": 0.06797537952661514, "learning_rate": 0.01, "loss": 1.9188, "step": 121713 }, { "epoch": 12.506781750924784, "grad_norm": 0.08575470000505447, "learning_rate": 0.01, "loss": 1.9432, "step": 121716 }, { "epoch": 12.507090012330456, "grad_norm": 0.15510590374469757, "learning_rate": 0.01, "loss": 1.9443, "step": 121719 }, { "epoch": 12.50739827373613, "grad_norm": 0.06296925991773605, "learning_rate": 0.01, "loss": 1.9316, "step": 121722 }, { "epoch": 12.5077065351418, "grad_norm": 0.1299462765455246, "learning_rate": 0.01, "loss": 1.9432, "step": 121725 }, { "epoch": 12.508014796547473, "grad_norm": 0.10709226131439209, "learning_rate": 0.01, "loss": 1.9186, "step": 121728 }, { "epoch": 12.508323057953143, "grad_norm": 0.11783047020435333, "learning_rate": 0.01, "loss": 1.9185, "step": 121731 }, { "epoch": 12.508631319358816, "grad_norm": 0.09729878604412079, "learning_rate": 0.01, "loss": 1.918, "step": 121734 }, { "epoch": 12.508939580764489, "grad_norm": 0.05474984645843506, "learning_rate": 0.01, "loss": 1.9367, "step": 121737 }, { "epoch": 12.50924784217016, "grad_norm": 0.08836578577756882, "learning_rate": 0.01, "loss": 1.9647, "step": 121740 }, { "epoch": 12.509556103575832, "grad_norm": 0.042429737746715546, "learning_rate": 0.01, "loss": 1.9309, "step": 121743 }, { "epoch": 12.509864364981505, "grad_norm": 0.049478959292173386, "learning_rate": 0.01, "loss": 1.9629, "step": 121746 }, { "epoch": 12.510172626387176, "grad_norm": 0.050765786319971085, "learning_rate": 0.01, "loss": 1.9208, "step": 121749 }, { "epoch": 12.510480887792848, "grad_norm": 0.043715640902519226, "learning_rate": 0.01, "loss": 1.909, "step": 121752 }, { "epoch": 12.510789149198521, "grad_norm": 0.03809693083167076, "learning_rate": 0.01, "loss": 1.933, "step": 121755 }, { "epoch": 12.511097410604192, "grad_norm": 0.11676877737045288, "learning_rate": 0.01, "loss": 1.9517, "step": 121758 }, { "epoch": 12.511405672009865, "grad_norm": 0.13902495801448822, "learning_rate": 0.01, "loss": 1.9514, "step": 121761 }, { "epoch": 12.511713933415537, "grad_norm": 0.05882659927010536, "learning_rate": 0.01, "loss": 1.9191, "step": 121764 }, { "epoch": 12.512022194821208, "grad_norm": 0.07055717706680298, "learning_rate": 0.01, "loss": 1.9613, "step": 121767 }, { "epoch": 12.51233045622688, "grad_norm": 0.06787385791540146, "learning_rate": 0.01, "loss": 1.9412, "step": 121770 }, { "epoch": 12.512638717632552, "grad_norm": 0.03310607001185417, "learning_rate": 0.01, "loss": 1.9378, "step": 121773 }, { "epoch": 12.512946979038224, "grad_norm": 0.031156888231635094, "learning_rate": 0.01, "loss": 1.9257, "step": 121776 }, { "epoch": 12.513255240443897, "grad_norm": 0.12905651330947876, "learning_rate": 0.01, "loss": 1.9432, "step": 121779 }, { "epoch": 12.513563501849568, "grad_norm": 0.16129325330257416, "learning_rate": 0.01, "loss": 1.9101, "step": 121782 }, { "epoch": 12.51387176325524, "grad_norm": 0.07566894590854645, "learning_rate": 0.01, "loss": 1.9198, "step": 121785 }, { "epoch": 12.514180024660913, "grad_norm": 0.05478030815720558, "learning_rate": 0.01, "loss": 1.9495, "step": 121788 }, { "epoch": 12.514488286066584, "grad_norm": 0.06553813070058823, "learning_rate": 0.01, "loss": 1.9329, "step": 121791 }, { "epoch": 12.514796547472256, "grad_norm": 0.04844589903950691, "learning_rate": 0.01, "loss": 1.9194, "step": 121794 }, { "epoch": 12.515104808877929, "grad_norm": 0.04799551144242287, "learning_rate": 0.01, "loss": 1.9317, "step": 121797 }, { "epoch": 12.5154130702836, "grad_norm": 0.04788275435566902, "learning_rate": 0.01, "loss": 1.9515, "step": 121800 }, { "epoch": 12.515721331689273, "grad_norm": 0.04693294316530228, "learning_rate": 0.01, "loss": 1.9377, "step": 121803 }, { "epoch": 12.516029593094945, "grad_norm": 0.05159008875489235, "learning_rate": 0.01, "loss": 1.9459, "step": 121806 }, { "epoch": 12.516337854500616, "grad_norm": 0.04252525418996811, "learning_rate": 0.01, "loss": 1.9182, "step": 121809 }, { "epoch": 12.516646115906289, "grad_norm": 0.07126598805189133, "learning_rate": 0.01, "loss": 1.9234, "step": 121812 }, { "epoch": 12.516954377311961, "grad_norm": 0.2112404853105545, "learning_rate": 0.01, "loss": 1.9469, "step": 121815 }, { "epoch": 12.517262638717632, "grad_norm": 0.19761815667152405, "learning_rate": 0.01, "loss": 1.9382, "step": 121818 }, { "epoch": 12.517570900123305, "grad_norm": 0.05613444745540619, "learning_rate": 0.01, "loss": 1.9496, "step": 121821 }, { "epoch": 12.517879161528978, "grad_norm": 0.044825371354818344, "learning_rate": 0.01, "loss": 1.9451, "step": 121824 }, { "epoch": 12.518187422934648, "grad_norm": 0.0385623574256897, "learning_rate": 0.01, "loss": 1.9144, "step": 121827 }, { "epoch": 12.518495684340321, "grad_norm": 0.03992215171456337, "learning_rate": 0.01, "loss": 1.936, "step": 121830 }, { "epoch": 12.518803945745992, "grad_norm": 0.05851850286126137, "learning_rate": 0.01, "loss": 1.9287, "step": 121833 }, { "epoch": 12.519112207151665, "grad_norm": 0.04692927747964859, "learning_rate": 0.01, "loss": 1.9312, "step": 121836 }, { "epoch": 12.519420468557337, "grad_norm": 0.044795434921979904, "learning_rate": 0.01, "loss": 1.917, "step": 121839 }, { "epoch": 12.519728729963008, "grad_norm": 0.03476089611649513, "learning_rate": 0.01, "loss": 1.9354, "step": 121842 }, { "epoch": 12.52003699136868, "grad_norm": 0.035152047872543335, "learning_rate": 0.01, "loss": 1.9569, "step": 121845 }, { "epoch": 12.520345252774353, "grad_norm": 0.04816887900233269, "learning_rate": 0.01, "loss": 1.9493, "step": 121848 }, { "epoch": 12.520653514180024, "grad_norm": 0.07016416639089584, "learning_rate": 0.01, "loss": 1.9607, "step": 121851 }, { "epoch": 12.520961775585697, "grad_norm": 0.03749404475092888, "learning_rate": 0.01, "loss": 1.941, "step": 121854 }, { "epoch": 12.52127003699137, "grad_norm": 0.21347223222255707, "learning_rate": 0.01, "loss": 1.9433, "step": 121857 }, { "epoch": 12.52157829839704, "grad_norm": 0.08297964930534363, "learning_rate": 0.01, "loss": 1.9397, "step": 121860 }, { "epoch": 12.521886559802713, "grad_norm": 0.0701962485909462, "learning_rate": 0.01, "loss": 1.951, "step": 121863 }, { "epoch": 12.522194821208384, "grad_norm": 0.03970552235841751, "learning_rate": 0.01, "loss": 1.9582, "step": 121866 }, { "epoch": 12.522503082614056, "grad_norm": 0.03517434746026993, "learning_rate": 0.01, "loss": 1.9202, "step": 121869 }, { "epoch": 12.522811344019729, "grad_norm": 0.06066088005900383, "learning_rate": 0.01, "loss": 1.9255, "step": 121872 }, { "epoch": 12.5231196054254, "grad_norm": 0.0656011626124382, "learning_rate": 0.01, "loss": 1.9379, "step": 121875 }, { "epoch": 12.523427866831073, "grad_norm": 0.10634774714708328, "learning_rate": 0.01, "loss": 1.9367, "step": 121878 }, { "epoch": 12.523736128236745, "grad_norm": 0.1101708859205246, "learning_rate": 0.01, "loss": 1.9372, "step": 121881 }, { "epoch": 12.524044389642416, "grad_norm": 0.04546092823147774, "learning_rate": 0.01, "loss": 1.9119, "step": 121884 }, { "epoch": 12.524352651048089, "grad_norm": 0.082464799284935, "learning_rate": 0.01, "loss": 1.9302, "step": 121887 }, { "epoch": 12.524660912453761, "grad_norm": 0.03992221876978874, "learning_rate": 0.01, "loss": 1.9406, "step": 121890 }, { "epoch": 12.524969173859432, "grad_norm": 0.03644541651010513, "learning_rate": 0.01, "loss": 1.928, "step": 121893 }, { "epoch": 12.525277435265105, "grad_norm": 0.029982885345816612, "learning_rate": 0.01, "loss": 1.9248, "step": 121896 }, { "epoch": 12.525585696670777, "grad_norm": 0.16123051941394806, "learning_rate": 0.01, "loss": 1.9526, "step": 121899 }, { "epoch": 12.525893958076448, "grad_norm": 0.07271135598421097, "learning_rate": 0.01, "loss": 1.9401, "step": 121902 }, { "epoch": 12.526202219482121, "grad_norm": 0.06443187594413757, "learning_rate": 0.01, "loss": 1.9659, "step": 121905 }, { "epoch": 12.526510480887794, "grad_norm": 0.03684204816818237, "learning_rate": 0.01, "loss": 1.9361, "step": 121908 }, { "epoch": 12.526818742293464, "grad_norm": 0.06612155586481094, "learning_rate": 0.01, "loss": 1.9207, "step": 121911 }, { "epoch": 12.527127003699137, "grad_norm": 0.10207538306713104, "learning_rate": 0.01, "loss": 1.9596, "step": 121914 }, { "epoch": 12.52743526510481, "grad_norm": 0.08532961457967758, "learning_rate": 0.01, "loss": 1.9599, "step": 121917 }, { "epoch": 12.52774352651048, "grad_norm": 0.05782786011695862, "learning_rate": 0.01, "loss": 1.9288, "step": 121920 }, { "epoch": 12.528051787916153, "grad_norm": 0.044351086020469666, "learning_rate": 0.01, "loss": 1.9503, "step": 121923 }, { "epoch": 12.528360049321824, "grad_norm": 0.053692616522312164, "learning_rate": 0.01, "loss": 1.9233, "step": 121926 }, { "epoch": 12.528668310727497, "grad_norm": 0.10878470540046692, "learning_rate": 0.01, "loss": 1.9312, "step": 121929 }, { "epoch": 12.52897657213317, "grad_norm": 0.09176810085773468, "learning_rate": 0.01, "loss": 1.9203, "step": 121932 }, { "epoch": 12.52928483353884, "grad_norm": 0.10216666758060455, "learning_rate": 0.01, "loss": 1.9277, "step": 121935 }, { "epoch": 12.529593094944513, "grad_norm": 0.06094343587756157, "learning_rate": 0.01, "loss": 1.9272, "step": 121938 }, { "epoch": 12.529901356350186, "grad_norm": 0.07060480117797852, "learning_rate": 0.01, "loss": 1.9374, "step": 121941 }, { "epoch": 12.530209617755856, "grad_norm": 0.04404463618993759, "learning_rate": 0.01, "loss": 1.9401, "step": 121944 }, { "epoch": 12.530517879161529, "grad_norm": 0.0398835763335228, "learning_rate": 0.01, "loss": 1.9538, "step": 121947 }, { "epoch": 12.530826140567202, "grad_norm": 0.11280908435583115, "learning_rate": 0.01, "loss": 1.9334, "step": 121950 }, { "epoch": 12.531134401972873, "grad_norm": 0.10008415579795837, "learning_rate": 0.01, "loss": 1.9118, "step": 121953 }, { "epoch": 12.531442663378545, "grad_norm": 0.08154910057783127, "learning_rate": 0.01, "loss": 1.948, "step": 121956 }, { "epoch": 12.531750924784218, "grad_norm": 0.03601017966866493, "learning_rate": 0.01, "loss": 1.9467, "step": 121959 }, { "epoch": 12.532059186189889, "grad_norm": 0.039634574204683304, "learning_rate": 0.01, "loss": 1.9249, "step": 121962 }, { "epoch": 12.532367447595561, "grad_norm": 0.038583431392908096, "learning_rate": 0.01, "loss": 1.9555, "step": 121965 }, { "epoch": 12.532675709001232, "grad_norm": 0.09992941468954086, "learning_rate": 0.01, "loss": 1.9081, "step": 121968 }, { "epoch": 12.532983970406905, "grad_norm": 0.08802489191293716, "learning_rate": 0.01, "loss": 1.929, "step": 121971 }, { "epoch": 12.533292231812577, "grad_norm": 0.09296849370002747, "learning_rate": 0.01, "loss": 1.9253, "step": 121974 }, { "epoch": 12.533600493218248, "grad_norm": 0.0878094881772995, "learning_rate": 0.01, "loss": 1.9498, "step": 121977 }, { "epoch": 12.533908754623921, "grad_norm": 0.04183616116642952, "learning_rate": 0.01, "loss": 1.9209, "step": 121980 }, { "epoch": 12.534217016029594, "grad_norm": 0.0330495610833168, "learning_rate": 0.01, "loss": 1.9283, "step": 121983 }, { "epoch": 12.534525277435264, "grad_norm": 0.05904131755232811, "learning_rate": 0.01, "loss": 1.93, "step": 121986 }, { "epoch": 12.534833538840937, "grad_norm": 0.11319120228290558, "learning_rate": 0.01, "loss": 1.9268, "step": 121989 }, { "epoch": 12.53514180024661, "grad_norm": 0.038431279361248016, "learning_rate": 0.01, "loss": 1.9201, "step": 121992 }, { "epoch": 12.53545006165228, "grad_norm": 0.04068557918071747, "learning_rate": 0.01, "loss": 1.9271, "step": 121995 }, { "epoch": 12.535758323057953, "grad_norm": 0.0435246042907238, "learning_rate": 0.01, "loss": 1.9271, "step": 121998 }, { "epoch": 12.536066584463626, "grad_norm": 0.07753629982471466, "learning_rate": 0.01, "loss": 1.9137, "step": 122001 }, { "epoch": 12.536374845869297, "grad_norm": 0.11436717957258224, "learning_rate": 0.01, "loss": 1.942, "step": 122004 }, { "epoch": 12.53668310727497, "grad_norm": 0.15614436566829681, "learning_rate": 0.01, "loss": 1.9243, "step": 122007 }, { "epoch": 12.536991368680642, "grad_norm": 0.10340051352977753, "learning_rate": 0.01, "loss": 1.9492, "step": 122010 }, { "epoch": 12.537299630086313, "grad_norm": 0.06114725023508072, "learning_rate": 0.01, "loss": 1.9249, "step": 122013 }, { "epoch": 12.537607891491986, "grad_norm": 0.044180672615766525, "learning_rate": 0.01, "loss": 1.9373, "step": 122016 }, { "epoch": 12.537916152897658, "grad_norm": 0.03734464943408966, "learning_rate": 0.01, "loss": 1.9346, "step": 122019 }, { "epoch": 12.538224414303329, "grad_norm": 0.12132205069065094, "learning_rate": 0.01, "loss": 1.938, "step": 122022 }, { "epoch": 12.538532675709002, "grad_norm": 0.044691603630781174, "learning_rate": 0.01, "loss": 1.9185, "step": 122025 }, { "epoch": 12.538840937114673, "grad_norm": 0.1167520135641098, "learning_rate": 0.01, "loss": 1.9387, "step": 122028 }, { "epoch": 12.539149198520345, "grad_norm": 0.10806215554475784, "learning_rate": 0.01, "loss": 1.9089, "step": 122031 }, { "epoch": 12.539457459926018, "grad_norm": 0.0503133200109005, "learning_rate": 0.01, "loss": 1.9405, "step": 122034 }, { "epoch": 12.539765721331689, "grad_norm": 0.04720020666718483, "learning_rate": 0.01, "loss": 1.9319, "step": 122037 }, { "epoch": 12.540073982737361, "grad_norm": 0.0499320849776268, "learning_rate": 0.01, "loss": 1.9258, "step": 122040 }, { "epoch": 12.540382244143034, "grad_norm": 0.07868952304124832, "learning_rate": 0.01, "loss": 1.9337, "step": 122043 }, { "epoch": 12.540690505548705, "grad_norm": 0.10274378955364227, "learning_rate": 0.01, "loss": 1.964, "step": 122046 }, { "epoch": 12.540998766954377, "grad_norm": 0.047186024487018585, "learning_rate": 0.01, "loss": 1.9465, "step": 122049 }, { "epoch": 12.54130702836005, "grad_norm": 0.09629018604755402, "learning_rate": 0.01, "loss": 1.9485, "step": 122052 }, { "epoch": 12.541615289765721, "grad_norm": 0.08748985081911087, "learning_rate": 0.01, "loss": 1.9325, "step": 122055 }, { "epoch": 12.541923551171394, "grad_norm": 0.05084213986992836, "learning_rate": 0.01, "loss": 1.926, "step": 122058 }, { "epoch": 12.542231812577064, "grad_norm": 0.09220089018344879, "learning_rate": 0.01, "loss": 1.9316, "step": 122061 }, { "epoch": 12.542540073982737, "grad_norm": 0.13815580308437347, "learning_rate": 0.01, "loss": 1.9375, "step": 122064 }, { "epoch": 12.54284833538841, "grad_norm": 0.06978030502796173, "learning_rate": 0.01, "loss": 1.9302, "step": 122067 }, { "epoch": 12.54315659679408, "grad_norm": 0.04032085835933685, "learning_rate": 0.01, "loss": 1.9378, "step": 122070 }, { "epoch": 12.543464858199753, "grad_norm": 0.06091155484318733, "learning_rate": 0.01, "loss": 1.9189, "step": 122073 }, { "epoch": 12.543773119605426, "grad_norm": 0.07845517992973328, "learning_rate": 0.01, "loss": 1.9378, "step": 122076 }, { "epoch": 12.544081381011097, "grad_norm": 0.07486490905284882, "learning_rate": 0.01, "loss": 1.9248, "step": 122079 }, { "epoch": 12.54438964241677, "grad_norm": 0.09034962207078934, "learning_rate": 0.01, "loss": 1.9492, "step": 122082 }, { "epoch": 12.544697903822442, "grad_norm": 0.08133091777563095, "learning_rate": 0.01, "loss": 1.9269, "step": 122085 }, { "epoch": 12.545006165228113, "grad_norm": 0.05244584009051323, "learning_rate": 0.01, "loss": 1.9434, "step": 122088 }, { "epoch": 12.545314426633785, "grad_norm": 0.04016478732228279, "learning_rate": 0.01, "loss": 1.9361, "step": 122091 }, { "epoch": 12.545622688039458, "grad_norm": 0.03591429814696312, "learning_rate": 0.01, "loss": 1.9338, "step": 122094 }, { "epoch": 12.545930949445129, "grad_norm": 0.09179961681365967, "learning_rate": 0.01, "loss": 1.9436, "step": 122097 }, { "epoch": 12.546239210850802, "grad_norm": 0.05071881040930748, "learning_rate": 0.01, "loss": 1.929, "step": 122100 }, { "epoch": 12.546547472256474, "grad_norm": 0.06177901849150658, "learning_rate": 0.01, "loss": 1.9484, "step": 122103 }, { "epoch": 12.546855733662145, "grad_norm": 0.05903288722038269, "learning_rate": 0.01, "loss": 1.9383, "step": 122106 }, { "epoch": 12.547163995067818, "grad_norm": 0.06547144055366516, "learning_rate": 0.01, "loss": 1.912, "step": 122109 }, { "epoch": 12.54747225647349, "grad_norm": 0.11096128821372986, "learning_rate": 0.01, "loss": 1.9444, "step": 122112 }, { "epoch": 12.547780517879161, "grad_norm": 0.08324635028839111, "learning_rate": 0.01, "loss": 1.933, "step": 122115 }, { "epoch": 12.548088779284834, "grad_norm": 0.10194128006696701, "learning_rate": 0.01, "loss": 1.9189, "step": 122118 }, { "epoch": 12.548397040690505, "grad_norm": 0.17126311361789703, "learning_rate": 0.01, "loss": 1.9249, "step": 122121 }, { "epoch": 12.548705302096177, "grad_norm": 0.03643030673265457, "learning_rate": 0.01, "loss": 1.9386, "step": 122124 }, { "epoch": 12.54901356350185, "grad_norm": 0.061957668513059616, "learning_rate": 0.01, "loss": 1.9154, "step": 122127 }, { "epoch": 12.549321824907521, "grad_norm": 0.036888238042593, "learning_rate": 0.01, "loss": 1.9449, "step": 122130 }, { "epoch": 12.549630086313194, "grad_norm": 0.05962397903203964, "learning_rate": 0.01, "loss": 1.9243, "step": 122133 }, { "epoch": 12.549938347718866, "grad_norm": 0.05716247111558914, "learning_rate": 0.01, "loss": 1.9385, "step": 122136 }, { "epoch": 12.550246609124537, "grad_norm": 0.09270322322845459, "learning_rate": 0.01, "loss": 1.9332, "step": 122139 }, { "epoch": 12.55055487053021, "grad_norm": 0.07602955400943756, "learning_rate": 0.01, "loss": 1.9532, "step": 122142 }, { "epoch": 12.550863131935882, "grad_norm": 0.07429090887308121, "learning_rate": 0.01, "loss": 1.9342, "step": 122145 }, { "epoch": 12.551171393341553, "grad_norm": 0.06275807321071625, "learning_rate": 0.01, "loss": 1.9522, "step": 122148 }, { "epoch": 12.551479654747226, "grad_norm": 0.07937415689229965, "learning_rate": 0.01, "loss": 1.9155, "step": 122151 }, { "epoch": 12.551787916152898, "grad_norm": 0.09188801795244217, "learning_rate": 0.01, "loss": 1.919, "step": 122154 }, { "epoch": 12.55209617755857, "grad_norm": 0.07130987197160721, "learning_rate": 0.01, "loss": 1.9541, "step": 122157 }, { "epoch": 12.552404438964242, "grad_norm": 0.05817567929625511, "learning_rate": 0.01, "loss": 1.9465, "step": 122160 }, { "epoch": 12.552712700369913, "grad_norm": 0.10335689783096313, "learning_rate": 0.01, "loss": 1.9361, "step": 122163 }, { "epoch": 12.553020961775585, "grad_norm": 0.07822994142770767, "learning_rate": 0.01, "loss": 1.9347, "step": 122166 }, { "epoch": 12.553329223181258, "grad_norm": 0.05971822887659073, "learning_rate": 0.01, "loss": 1.9434, "step": 122169 }, { "epoch": 12.553637484586929, "grad_norm": 0.06985843926668167, "learning_rate": 0.01, "loss": 1.9168, "step": 122172 }, { "epoch": 12.553945745992602, "grad_norm": 0.0653102770447731, "learning_rate": 0.01, "loss": 1.9238, "step": 122175 }, { "epoch": 12.554254007398274, "grad_norm": 0.041373997926712036, "learning_rate": 0.01, "loss": 1.9325, "step": 122178 }, { "epoch": 12.554562268803945, "grad_norm": 0.03778582811355591, "learning_rate": 0.01, "loss": 1.9259, "step": 122181 }, { "epoch": 12.554870530209618, "grad_norm": 0.05184998735785484, "learning_rate": 0.01, "loss": 1.9405, "step": 122184 }, { "epoch": 12.55517879161529, "grad_norm": 0.05475650355219841, "learning_rate": 0.01, "loss": 1.9531, "step": 122187 }, { "epoch": 12.555487053020961, "grad_norm": 0.05435996502637863, "learning_rate": 0.01, "loss": 1.9474, "step": 122190 }, { "epoch": 12.555795314426634, "grad_norm": 0.06992301344871521, "learning_rate": 0.01, "loss": 1.908, "step": 122193 }, { "epoch": 12.556103575832307, "grad_norm": 0.10033873468637466, "learning_rate": 0.01, "loss": 1.9426, "step": 122196 }, { "epoch": 12.556411837237977, "grad_norm": 0.07717881351709366, "learning_rate": 0.01, "loss": 1.9257, "step": 122199 }, { "epoch": 12.55672009864365, "grad_norm": 0.07293485105037689, "learning_rate": 0.01, "loss": 1.9318, "step": 122202 }, { "epoch": 12.557028360049323, "grad_norm": 0.05288426950573921, "learning_rate": 0.01, "loss": 1.9561, "step": 122205 }, { "epoch": 12.557336621454994, "grad_norm": 0.0352017767727375, "learning_rate": 0.01, "loss": 1.956, "step": 122208 }, { "epoch": 12.557644882860666, "grad_norm": 0.0325283445417881, "learning_rate": 0.01, "loss": 1.9108, "step": 122211 }, { "epoch": 12.557953144266339, "grad_norm": 0.11622142791748047, "learning_rate": 0.01, "loss": 1.9411, "step": 122214 }, { "epoch": 12.55826140567201, "grad_norm": 0.07114896923303604, "learning_rate": 0.01, "loss": 1.9363, "step": 122217 }, { "epoch": 12.558569667077682, "grad_norm": 0.0963045135140419, "learning_rate": 0.01, "loss": 1.9299, "step": 122220 }, { "epoch": 12.558877928483353, "grad_norm": 0.09308268874883652, "learning_rate": 0.01, "loss": 1.9394, "step": 122223 }, { "epoch": 12.559186189889026, "grad_norm": 0.05020532384514809, "learning_rate": 0.01, "loss": 1.9123, "step": 122226 }, { "epoch": 12.559494451294698, "grad_norm": 0.09539689123630524, "learning_rate": 0.01, "loss": 1.9384, "step": 122229 }, { "epoch": 12.55980271270037, "grad_norm": 0.05360017716884613, "learning_rate": 0.01, "loss": 1.9282, "step": 122232 }, { "epoch": 12.560110974106042, "grad_norm": 0.040233734995126724, "learning_rate": 0.01, "loss": 1.9179, "step": 122235 }, { "epoch": 12.560419235511715, "grad_norm": 0.03760543093085289, "learning_rate": 0.01, "loss": 1.9595, "step": 122238 }, { "epoch": 12.560727496917385, "grad_norm": 0.06255226582288742, "learning_rate": 0.01, "loss": 1.9237, "step": 122241 }, { "epoch": 12.561035758323058, "grad_norm": 0.047534991055727005, "learning_rate": 0.01, "loss": 1.9179, "step": 122244 }, { "epoch": 12.56134401972873, "grad_norm": 0.14942295849323273, "learning_rate": 0.01, "loss": 1.9338, "step": 122247 }, { "epoch": 12.561652281134402, "grad_norm": 0.11508209258317947, "learning_rate": 0.01, "loss": 1.9501, "step": 122250 }, { "epoch": 12.561960542540074, "grad_norm": 0.04446358606219292, "learning_rate": 0.01, "loss": 1.9342, "step": 122253 }, { "epoch": 12.562268803945745, "grad_norm": 0.037333231419324875, "learning_rate": 0.01, "loss": 1.9214, "step": 122256 }, { "epoch": 12.562577065351418, "grad_norm": 0.061013709753751755, "learning_rate": 0.01, "loss": 1.9526, "step": 122259 }, { "epoch": 12.56288532675709, "grad_norm": 0.04341999813914299, "learning_rate": 0.01, "loss": 1.9256, "step": 122262 }, { "epoch": 12.563193588162761, "grad_norm": 0.04809141531586647, "learning_rate": 0.01, "loss": 1.9064, "step": 122265 }, { "epoch": 12.563501849568434, "grad_norm": 0.04533364251255989, "learning_rate": 0.01, "loss": 1.9651, "step": 122268 }, { "epoch": 12.563810110974106, "grad_norm": 0.04224034398794174, "learning_rate": 0.01, "loss": 1.9243, "step": 122271 }, { "epoch": 12.564118372379777, "grad_norm": 0.07633619755506516, "learning_rate": 0.01, "loss": 1.9408, "step": 122274 }, { "epoch": 12.56442663378545, "grad_norm": 0.03966088593006134, "learning_rate": 0.01, "loss": 1.9342, "step": 122277 }, { "epoch": 12.564734895191123, "grad_norm": 0.07610847800970078, "learning_rate": 0.01, "loss": 1.9497, "step": 122280 }, { "epoch": 12.565043156596793, "grad_norm": 0.0485893115401268, "learning_rate": 0.01, "loss": 1.921, "step": 122283 }, { "epoch": 12.565351418002466, "grad_norm": 0.052794449031353, "learning_rate": 0.01, "loss": 1.9211, "step": 122286 }, { "epoch": 12.565659679408139, "grad_norm": 0.1039859801530838, "learning_rate": 0.01, "loss": 1.9386, "step": 122289 }, { "epoch": 12.56596794081381, "grad_norm": 0.055837295949459076, "learning_rate": 0.01, "loss": 1.944, "step": 122292 }, { "epoch": 12.566276202219482, "grad_norm": 0.06777098029851913, "learning_rate": 0.01, "loss": 1.9146, "step": 122295 }, { "epoch": 12.566584463625155, "grad_norm": 0.037880655378103256, "learning_rate": 0.01, "loss": 1.9455, "step": 122298 }, { "epoch": 12.566892725030826, "grad_norm": 0.04481416940689087, "learning_rate": 0.01, "loss": 1.9026, "step": 122301 }, { "epoch": 12.567200986436498, "grad_norm": 0.08036676794290543, "learning_rate": 0.01, "loss": 1.9061, "step": 122304 }, { "epoch": 12.567509247842171, "grad_norm": 0.09721449762582779, "learning_rate": 0.01, "loss": 1.9621, "step": 122307 }, { "epoch": 12.567817509247842, "grad_norm": 0.033135443925857544, "learning_rate": 0.01, "loss": 1.9298, "step": 122310 }, { "epoch": 12.568125770653515, "grad_norm": 0.11335978657007217, "learning_rate": 0.01, "loss": 1.9554, "step": 122313 }, { "epoch": 12.568434032059185, "grad_norm": 0.07764448970556259, "learning_rate": 0.01, "loss": 1.9434, "step": 122316 }, { "epoch": 12.568742293464858, "grad_norm": 0.05308235064148903, "learning_rate": 0.01, "loss": 1.9628, "step": 122319 }, { "epoch": 12.56905055487053, "grad_norm": 0.057582397013902664, "learning_rate": 0.01, "loss": 1.9238, "step": 122322 }, { "epoch": 12.569358816276202, "grad_norm": 0.050004176795482635, "learning_rate": 0.01, "loss": 1.945, "step": 122325 }, { "epoch": 12.569667077681874, "grad_norm": 0.06393969058990479, "learning_rate": 0.01, "loss": 1.9499, "step": 122328 }, { "epoch": 12.569975339087547, "grad_norm": 0.07683342695236206, "learning_rate": 0.01, "loss": 1.9174, "step": 122331 }, { "epoch": 12.570283600493218, "grad_norm": 0.05688316002488136, "learning_rate": 0.01, "loss": 1.9387, "step": 122334 }, { "epoch": 12.57059186189889, "grad_norm": 0.03711773455142975, "learning_rate": 0.01, "loss": 1.9292, "step": 122337 }, { "epoch": 12.570900123304563, "grad_norm": 0.049031812697649, "learning_rate": 0.01, "loss": 1.9314, "step": 122340 }, { "epoch": 12.571208384710234, "grad_norm": 0.13783791661262512, "learning_rate": 0.01, "loss": 1.9246, "step": 122343 }, { "epoch": 12.571516646115906, "grad_norm": 0.10908007621765137, "learning_rate": 0.01, "loss": 1.9458, "step": 122346 }, { "epoch": 12.571824907521579, "grad_norm": 0.048536717891693115, "learning_rate": 0.01, "loss": 1.9456, "step": 122349 }, { "epoch": 12.57213316892725, "grad_norm": 0.05644765868782997, "learning_rate": 0.01, "loss": 1.9273, "step": 122352 }, { "epoch": 12.572441430332923, "grad_norm": 0.04310297593474388, "learning_rate": 0.01, "loss": 1.9312, "step": 122355 }, { "epoch": 12.572749691738593, "grad_norm": 0.09706427156925201, "learning_rate": 0.01, "loss": 1.9012, "step": 122358 }, { "epoch": 12.573057953144266, "grad_norm": 0.10384303331375122, "learning_rate": 0.01, "loss": 1.9294, "step": 122361 }, { "epoch": 12.573366214549939, "grad_norm": 0.0773472934961319, "learning_rate": 0.01, "loss": 1.9033, "step": 122364 }, { "epoch": 12.57367447595561, "grad_norm": 0.056868892163038254, "learning_rate": 0.01, "loss": 1.9443, "step": 122367 }, { "epoch": 12.573982737361282, "grad_norm": 0.11514408886432648, "learning_rate": 0.01, "loss": 1.9288, "step": 122370 }, { "epoch": 12.574290998766955, "grad_norm": 0.047065265476703644, "learning_rate": 0.01, "loss": 1.9261, "step": 122373 }, { "epoch": 12.574599260172626, "grad_norm": 0.0542309433221817, "learning_rate": 0.01, "loss": 1.9602, "step": 122376 }, { "epoch": 12.574907521578298, "grad_norm": 0.10700059682130814, "learning_rate": 0.01, "loss": 1.925, "step": 122379 }, { "epoch": 12.575215782983971, "grad_norm": 0.10542161762714386, "learning_rate": 0.01, "loss": 1.9697, "step": 122382 }, { "epoch": 12.575524044389642, "grad_norm": 0.04567668214440346, "learning_rate": 0.01, "loss": 1.9259, "step": 122385 }, { "epoch": 12.575832305795315, "grad_norm": 0.08345144242048264, "learning_rate": 0.01, "loss": 1.944, "step": 122388 }, { "epoch": 12.576140567200987, "grad_norm": 0.06775045394897461, "learning_rate": 0.01, "loss": 1.9349, "step": 122391 }, { "epoch": 12.576448828606658, "grad_norm": 0.09641405940055847, "learning_rate": 0.01, "loss": 1.9553, "step": 122394 }, { "epoch": 12.57675709001233, "grad_norm": 0.054200150072574615, "learning_rate": 0.01, "loss": 1.9274, "step": 122397 }, { "epoch": 12.577065351418003, "grad_norm": 0.0304780974984169, "learning_rate": 0.01, "loss": 1.9345, "step": 122400 }, { "epoch": 12.577373612823674, "grad_norm": 0.10586915165185928, "learning_rate": 0.01, "loss": 1.9363, "step": 122403 }, { "epoch": 12.577681874229347, "grad_norm": 0.13238736987113953, "learning_rate": 0.01, "loss": 1.9241, "step": 122406 }, { "epoch": 12.577990135635018, "grad_norm": 0.07128183543682098, "learning_rate": 0.01, "loss": 1.9292, "step": 122409 }, { "epoch": 12.57829839704069, "grad_norm": 0.037349313497543335, "learning_rate": 0.01, "loss": 1.9268, "step": 122412 }, { "epoch": 12.578606658446363, "grad_norm": 0.04454770311713219, "learning_rate": 0.01, "loss": 1.9339, "step": 122415 }, { "epoch": 12.578914919852034, "grad_norm": 0.04878051206469536, "learning_rate": 0.01, "loss": 1.9206, "step": 122418 }, { "epoch": 12.579223181257706, "grad_norm": 0.04272807762026787, "learning_rate": 0.01, "loss": 1.9182, "step": 122421 }, { "epoch": 12.579531442663379, "grad_norm": 0.12098665535449982, "learning_rate": 0.01, "loss": 1.9221, "step": 122424 }, { "epoch": 12.57983970406905, "grad_norm": 0.06325336545705795, "learning_rate": 0.01, "loss": 1.9418, "step": 122427 }, { "epoch": 12.580147965474723, "grad_norm": 0.08749137073755264, "learning_rate": 0.01, "loss": 1.9402, "step": 122430 }, { "epoch": 12.580456226880395, "grad_norm": 0.06324999034404755, "learning_rate": 0.01, "loss": 1.9482, "step": 122433 }, { "epoch": 12.580764488286066, "grad_norm": 0.07804904133081436, "learning_rate": 0.01, "loss": 1.9364, "step": 122436 }, { "epoch": 12.581072749691739, "grad_norm": 0.13983319699764252, "learning_rate": 0.01, "loss": 1.9257, "step": 122439 }, { "epoch": 12.581381011097411, "grad_norm": 0.037088699638843536, "learning_rate": 0.01, "loss": 1.9177, "step": 122442 }, { "epoch": 12.581689272503082, "grad_norm": 0.046924177557229996, "learning_rate": 0.01, "loss": 1.9158, "step": 122445 }, { "epoch": 12.581997533908755, "grad_norm": 0.10420291870832443, "learning_rate": 0.01, "loss": 1.9285, "step": 122448 }, { "epoch": 12.582305795314426, "grad_norm": 0.12177906930446625, "learning_rate": 0.01, "loss": 1.9397, "step": 122451 }, { "epoch": 12.582614056720098, "grad_norm": 0.07504318654537201, "learning_rate": 0.01, "loss": 1.9213, "step": 122454 }, { "epoch": 12.582922318125771, "grad_norm": 0.04811587929725647, "learning_rate": 0.01, "loss": 1.9294, "step": 122457 }, { "epoch": 12.583230579531442, "grad_norm": 0.04745061695575714, "learning_rate": 0.01, "loss": 1.9517, "step": 122460 }, { "epoch": 12.583538840937115, "grad_norm": 0.09733489900827408, "learning_rate": 0.01, "loss": 1.9325, "step": 122463 }, { "epoch": 12.583847102342787, "grad_norm": 0.05689321085810661, "learning_rate": 0.01, "loss": 1.9461, "step": 122466 }, { "epoch": 12.584155363748458, "grad_norm": 0.10812503844499588, "learning_rate": 0.01, "loss": 1.9345, "step": 122469 }, { "epoch": 12.58446362515413, "grad_norm": 0.041134048253297806, "learning_rate": 0.01, "loss": 1.9444, "step": 122472 }, { "epoch": 12.584771886559803, "grad_norm": 0.04747617617249489, "learning_rate": 0.01, "loss": 1.9272, "step": 122475 }, { "epoch": 12.585080147965474, "grad_norm": 0.13469180464744568, "learning_rate": 0.01, "loss": 1.9554, "step": 122478 }, { "epoch": 12.585388409371147, "grad_norm": 0.04839875176548958, "learning_rate": 0.01, "loss": 1.9479, "step": 122481 }, { "epoch": 12.58569667077682, "grad_norm": 0.08033574372529984, "learning_rate": 0.01, "loss": 1.9408, "step": 122484 }, { "epoch": 12.58600493218249, "grad_norm": 0.05126413702964783, "learning_rate": 0.01, "loss": 1.9404, "step": 122487 }, { "epoch": 12.586313193588163, "grad_norm": 0.09988853335380554, "learning_rate": 0.01, "loss": 1.9307, "step": 122490 }, { "epoch": 12.586621454993836, "grad_norm": 0.033685505390167236, "learning_rate": 0.01, "loss": 1.9043, "step": 122493 }, { "epoch": 12.586929716399506, "grad_norm": 0.08883517235517502, "learning_rate": 0.01, "loss": 1.9459, "step": 122496 }, { "epoch": 12.587237977805179, "grad_norm": 0.044683970510959625, "learning_rate": 0.01, "loss": 1.9409, "step": 122499 }, { "epoch": 12.587546239210852, "grad_norm": 0.09659519046545029, "learning_rate": 0.01, "loss": 1.9341, "step": 122502 }, { "epoch": 12.587854500616523, "grad_norm": 0.07233305275440216, "learning_rate": 0.01, "loss": 1.9439, "step": 122505 }, { "epoch": 12.588162762022195, "grad_norm": 0.04250168055295944, "learning_rate": 0.01, "loss": 1.9216, "step": 122508 }, { "epoch": 12.588471023427866, "grad_norm": 0.09081467986106873, "learning_rate": 0.01, "loss": 1.9441, "step": 122511 }, { "epoch": 12.588779284833539, "grad_norm": 0.10205347836017609, "learning_rate": 0.01, "loss": 1.9386, "step": 122514 }, { "epoch": 12.589087546239211, "grad_norm": 0.12037431448698044, "learning_rate": 0.01, "loss": 1.9208, "step": 122517 }, { "epoch": 12.589395807644882, "grad_norm": 0.08119291067123413, "learning_rate": 0.01, "loss": 1.934, "step": 122520 }, { "epoch": 12.589704069050555, "grad_norm": 0.0845591351389885, "learning_rate": 0.01, "loss": 1.9258, "step": 122523 }, { "epoch": 12.590012330456227, "grad_norm": 0.036594972014427185, "learning_rate": 0.01, "loss": 1.9262, "step": 122526 }, { "epoch": 12.590320591861898, "grad_norm": 0.04575011879205704, "learning_rate": 0.01, "loss": 1.9156, "step": 122529 }, { "epoch": 12.590628853267571, "grad_norm": 0.05228526145219803, "learning_rate": 0.01, "loss": 1.9568, "step": 122532 }, { "epoch": 12.590937114673244, "grad_norm": 0.0767127051949501, "learning_rate": 0.01, "loss": 1.9202, "step": 122535 }, { "epoch": 12.591245376078914, "grad_norm": 0.06612443178892136, "learning_rate": 0.01, "loss": 1.9257, "step": 122538 }, { "epoch": 12.591553637484587, "grad_norm": 0.03941207379102707, "learning_rate": 0.01, "loss": 1.9327, "step": 122541 }, { "epoch": 12.59186189889026, "grad_norm": 0.04176707565784454, "learning_rate": 0.01, "loss": 1.9366, "step": 122544 }, { "epoch": 12.59217016029593, "grad_norm": 0.053258270025253296, "learning_rate": 0.01, "loss": 1.9185, "step": 122547 }, { "epoch": 12.592478421701603, "grad_norm": 0.0759519562125206, "learning_rate": 0.01, "loss": 1.9348, "step": 122550 }, { "epoch": 12.592786683107274, "grad_norm": 0.06660858541727066, "learning_rate": 0.01, "loss": 1.9261, "step": 122553 }, { "epoch": 12.593094944512947, "grad_norm": 0.1326006054878235, "learning_rate": 0.01, "loss": 1.9243, "step": 122556 }, { "epoch": 12.59340320591862, "grad_norm": 0.05958440154790878, "learning_rate": 0.01, "loss": 1.9247, "step": 122559 }, { "epoch": 12.59371146732429, "grad_norm": 0.04930621758103371, "learning_rate": 0.01, "loss": 1.9408, "step": 122562 }, { "epoch": 12.594019728729963, "grad_norm": 0.03780602663755417, "learning_rate": 0.01, "loss": 1.926, "step": 122565 }, { "epoch": 12.594327990135636, "grad_norm": 0.05326778441667557, "learning_rate": 0.01, "loss": 1.931, "step": 122568 }, { "epoch": 12.594636251541306, "grad_norm": 0.07156463712453842, "learning_rate": 0.01, "loss": 1.9304, "step": 122571 }, { "epoch": 12.594944512946979, "grad_norm": 0.07190526276826859, "learning_rate": 0.01, "loss": 1.9175, "step": 122574 }, { "epoch": 12.595252774352652, "grad_norm": 0.11505575478076935, "learning_rate": 0.01, "loss": 1.9442, "step": 122577 }, { "epoch": 12.595561035758323, "grad_norm": 0.06922391802072525, "learning_rate": 0.01, "loss": 1.9193, "step": 122580 }, { "epoch": 12.595869297163995, "grad_norm": 0.037435878068208694, "learning_rate": 0.01, "loss": 1.9537, "step": 122583 }, { "epoch": 12.596177558569668, "grad_norm": 0.037485379725694656, "learning_rate": 0.01, "loss": 1.928, "step": 122586 }, { "epoch": 12.596485819975339, "grad_norm": 0.04112362116575241, "learning_rate": 0.01, "loss": 1.9191, "step": 122589 }, { "epoch": 12.596794081381011, "grad_norm": 0.10568088293075562, "learning_rate": 0.01, "loss": 1.9346, "step": 122592 }, { "epoch": 12.597102342786684, "grad_norm": 0.07157807052135468, "learning_rate": 0.01, "loss": 1.9186, "step": 122595 }, { "epoch": 12.597410604192355, "grad_norm": 0.08673489093780518, "learning_rate": 0.01, "loss": 1.9476, "step": 122598 }, { "epoch": 12.597718865598027, "grad_norm": 0.06275806576013565, "learning_rate": 0.01, "loss": 1.9358, "step": 122601 }, { "epoch": 12.598027127003698, "grad_norm": 0.1066279411315918, "learning_rate": 0.01, "loss": 1.9328, "step": 122604 }, { "epoch": 12.598335388409371, "grad_norm": 0.13730093836784363, "learning_rate": 0.01, "loss": 1.9212, "step": 122607 }, { "epoch": 12.598643649815044, "grad_norm": 0.04778913035988808, "learning_rate": 0.01, "loss": 1.9498, "step": 122610 }, { "epoch": 12.598951911220714, "grad_norm": 0.07837532460689545, "learning_rate": 0.01, "loss": 1.9307, "step": 122613 }, { "epoch": 12.599260172626387, "grad_norm": 0.07405108213424683, "learning_rate": 0.01, "loss": 1.9195, "step": 122616 }, { "epoch": 12.59956843403206, "grad_norm": 0.07231219857931137, "learning_rate": 0.01, "loss": 1.9121, "step": 122619 }, { "epoch": 12.59987669543773, "grad_norm": 0.08693543821573257, "learning_rate": 0.01, "loss": 1.9337, "step": 122622 }, { "epoch": 12.600184956843403, "grad_norm": 0.07112149894237518, "learning_rate": 0.01, "loss": 1.9312, "step": 122625 }, { "epoch": 12.600493218249076, "grad_norm": 0.13867275416851044, "learning_rate": 0.01, "loss": 1.9212, "step": 122628 }, { "epoch": 12.600801479654747, "grad_norm": 0.1485087275505066, "learning_rate": 0.01, "loss": 1.9393, "step": 122631 }, { "epoch": 12.60110974106042, "grad_norm": 0.09758572280406952, "learning_rate": 0.01, "loss": 1.9382, "step": 122634 }, { "epoch": 12.601418002466092, "grad_norm": 0.05771038681268692, "learning_rate": 0.01, "loss": 1.9427, "step": 122637 }, { "epoch": 12.601726263871763, "grad_norm": 0.07908354699611664, "learning_rate": 0.01, "loss": 1.9499, "step": 122640 }, { "epoch": 12.602034525277436, "grad_norm": 0.050742391496896744, "learning_rate": 0.01, "loss": 1.9232, "step": 122643 }, { "epoch": 12.602342786683106, "grad_norm": 0.040271371603012085, "learning_rate": 0.01, "loss": 1.9359, "step": 122646 }, { "epoch": 12.602651048088779, "grad_norm": 0.12437929958105087, "learning_rate": 0.01, "loss": 1.9393, "step": 122649 }, { "epoch": 12.602959309494452, "grad_norm": 0.0745621845126152, "learning_rate": 0.01, "loss": 1.9205, "step": 122652 }, { "epoch": 12.603267570900123, "grad_norm": 0.033611565828323364, "learning_rate": 0.01, "loss": 1.9212, "step": 122655 }, { "epoch": 12.603575832305795, "grad_norm": 0.14124849438667297, "learning_rate": 0.01, "loss": 1.9304, "step": 122658 }, { "epoch": 12.603884093711468, "grad_norm": 0.10393095016479492, "learning_rate": 0.01, "loss": 1.9736, "step": 122661 }, { "epoch": 12.604192355117139, "grad_norm": 0.049038369208574295, "learning_rate": 0.01, "loss": 1.9408, "step": 122664 }, { "epoch": 12.604500616522811, "grad_norm": 0.04277941957116127, "learning_rate": 0.01, "loss": 1.913, "step": 122667 }, { "epoch": 12.604808877928484, "grad_norm": 0.03375561535358429, "learning_rate": 0.01, "loss": 1.9369, "step": 122670 }, { "epoch": 12.605117139334155, "grad_norm": 0.07068139314651489, "learning_rate": 0.01, "loss": 1.9091, "step": 122673 }, { "epoch": 12.605425400739827, "grad_norm": 0.08763331174850464, "learning_rate": 0.01, "loss": 1.9162, "step": 122676 }, { "epoch": 12.6057336621455, "grad_norm": 0.0836746096611023, "learning_rate": 0.01, "loss": 1.9166, "step": 122679 }, { "epoch": 12.606041923551171, "grad_norm": 0.031440578401088715, "learning_rate": 0.01, "loss": 1.9453, "step": 122682 }, { "epoch": 12.606350184956844, "grad_norm": 0.06475764513015747, "learning_rate": 0.01, "loss": 1.9263, "step": 122685 }, { "epoch": 12.606658446362516, "grad_norm": 0.13389822840690613, "learning_rate": 0.01, "loss": 1.9392, "step": 122688 }, { "epoch": 12.606966707768187, "grad_norm": 0.05801908299326897, "learning_rate": 0.01, "loss": 1.9238, "step": 122691 }, { "epoch": 12.60727496917386, "grad_norm": 0.05795949324965477, "learning_rate": 0.01, "loss": 1.9525, "step": 122694 }, { "epoch": 12.607583230579532, "grad_norm": 0.030270157381892204, "learning_rate": 0.01, "loss": 1.9671, "step": 122697 }, { "epoch": 12.607891491985203, "grad_norm": 0.04646274074912071, "learning_rate": 0.01, "loss": 1.9465, "step": 122700 }, { "epoch": 12.608199753390876, "grad_norm": 0.09027555584907532, "learning_rate": 0.01, "loss": 1.919, "step": 122703 }, { "epoch": 12.608508014796547, "grad_norm": 0.08750518411397934, "learning_rate": 0.01, "loss": 1.9206, "step": 122706 }, { "epoch": 12.60881627620222, "grad_norm": 0.06673325598239899, "learning_rate": 0.01, "loss": 1.9045, "step": 122709 }, { "epoch": 12.609124537607892, "grad_norm": 0.0680864155292511, "learning_rate": 0.01, "loss": 1.9419, "step": 122712 }, { "epoch": 12.609432799013563, "grad_norm": 0.15428978204727173, "learning_rate": 0.01, "loss": 1.9501, "step": 122715 }, { "epoch": 12.609741060419235, "grad_norm": 0.047103870660066605, "learning_rate": 0.01, "loss": 1.911, "step": 122718 }, { "epoch": 12.610049321824908, "grad_norm": 0.07863438129425049, "learning_rate": 0.01, "loss": 1.9259, "step": 122721 }, { "epoch": 12.610357583230579, "grad_norm": 0.11159279197454453, "learning_rate": 0.01, "loss": 1.9611, "step": 122724 }, { "epoch": 12.610665844636252, "grad_norm": 0.10510515421628952, "learning_rate": 0.01, "loss": 1.9443, "step": 122727 }, { "epoch": 12.610974106041924, "grad_norm": 0.07300017774105072, "learning_rate": 0.01, "loss": 1.9471, "step": 122730 }, { "epoch": 12.611282367447595, "grad_norm": 0.06340684741735458, "learning_rate": 0.01, "loss": 1.9537, "step": 122733 }, { "epoch": 12.611590628853268, "grad_norm": 0.06944388151168823, "learning_rate": 0.01, "loss": 1.9153, "step": 122736 }, { "epoch": 12.61189889025894, "grad_norm": 0.05677616968750954, "learning_rate": 0.01, "loss": 1.967, "step": 122739 }, { "epoch": 12.612207151664611, "grad_norm": 0.07102473825216293, "learning_rate": 0.01, "loss": 1.9373, "step": 122742 }, { "epoch": 12.612515413070284, "grad_norm": 0.10698739439249039, "learning_rate": 0.01, "loss": 1.9258, "step": 122745 }, { "epoch": 12.612823674475955, "grad_norm": 0.049000632017850876, "learning_rate": 0.01, "loss": 1.9255, "step": 122748 }, { "epoch": 12.613131935881627, "grad_norm": 0.10147199034690857, "learning_rate": 0.01, "loss": 1.9462, "step": 122751 }, { "epoch": 12.6134401972873, "grad_norm": 0.08941066265106201, "learning_rate": 0.01, "loss": 1.9429, "step": 122754 }, { "epoch": 12.613748458692971, "grad_norm": 0.06283801794052124, "learning_rate": 0.01, "loss": 1.9491, "step": 122757 }, { "epoch": 12.614056720098644, "grad_norm": 0.04824201762676239, "learning_rate": 0.01, "loss": 1.9278, "step": 122760 }, { "epoch": 12.614364981504316, "grad_norm": 0.05474422872066498, "learning_rate": 0.01, "loss": 1.9014, "step": 122763 }, { "epoch": 12.614673242909987, "grad_norm": 0.06307218223810196, "learning_rate": 0.01, "loss": 1.9437, "step": 122766 }, { "epoch": 12.61498150431566, "grad_norm": 0.0631837546825409, "learning_rate": 0.01, "loss": 1.9592, "step": 122769 }, { "epoch": 12.615289765721332, "grad_norm": 0.06105835363268852, "learning_rate": 0.01, "loss": 1.9464, "step": 122772 }, { "epoch": 12.615598027127003, "grad_norm": 0.06466460227966309, "learning_rate": 0.01, "loss": 1.9409, "step": 122775 }, { "epoch": 12.615906288532676, "grad_norm": 0.03602457046508789, "learning_rate": 0.01, "loss": 1.9504, "step": 122778 }, { "epoch": 12.616214549938348, "grad_norm": 0.03306679427623749, "learning_rate": 0.01, "loss": 1.942, "step": 122781 }, { "epoch": 12.61652281134402, "grad_norm": 0.05408303439617157, "learning_rate": 0.01, "loss": 1.9352, "step": 122784 }, { "epoch": 12.616831072749692, "grad_norm": 0.11107456684112549, "learning_rate": 0.01, "loss": 1.9462, "step": 122787 }, { "epoch": 12.617139334155365, "grad_norm": 0.04231736809015274, "learning_rate": 0.01, "loss": 1.9414, "step": 122790 }, { "epoch": 12.617447595561035, "grad_norm": 0.11673669517040253, "learning_rate": 0.01, "loss": 1.9501, "step": 122793 }, { "epoch": 12.617755856966708, "grad_norm": 0.062019962817430496, "learning_rate": 0.01, "loss": 1.9412, "step": 122796 }, { "epoch": 12.618064118372379, "grad_norm": 0.07347622513771057, "learning_rate": 0.01, "loss": 1.9726, "step": 122799 }, { "epoch": 12.618372379778052, "grad_norm": 0.0687282383441925, "learning_rate": 0.01, "loss": 1.9498, "step": 122802 }, { "epoch": 12.618680641183724, "grad_norm": 0.13374529778957367, "learning_rate": 0.01, "loss": 1.924, "step": 122805 }, { "epoch": 12.618988902589395, "grad_norm": 0.1499875783920288, "learning_rate": 0.01, "loss": 1.9266, "step": 122808 }, { "epoch": 12.619297163995068, "grad_norm": 0.1031130775809288, "learning_rate": 0.01, "loss": 1.9155, "step": 122811 }, { "epoch": 12.61960542540074, "grad_norm": 0.06197386234998703, "learning_rate": 0.01, "loss": 1.9587, "step": 122814 }, { "epoch": 12.619913686806411, "grad_norm": 0.045583710074424744, "learning_rate": 0.01, "loss": 1.9202, "step": 122817 }, { "epoch": 12.620221948212084, "grad_norm": 0.06573253870010376, "learning_rate": 0.01, "loss": 1.9407, "step": 122820 }, { "epoch": 12.620530209617757, "grad_norm": 0.038534976541996, "learning_rate": 0.01, "loss": 1.954, "step": 122823 }, { "epoch": 12.620838471023427, "grad_norm": 0.046993188560009, "learning_rate": 0.01, "loss": 1.9441, "step": 122826 }, { "epoch": 12.6211467324291, "grad_norm": 0.10635440051555634, "learning_rate": 0.01, "loss": 1.9347, "step": 122829 }, { "epoch": 12.621454993834773, "grad_norm": 0.033428799360990524, "learning_rate": 0.01, "loss": 1.9297, "step": 122832 }, { "epoch": 12.621763255240444, "grad_norm": 0.11464046686887741, "learning_rate": 0.01, "loss": 1.945, "step": 122835 }, { "epoch": 12.622071516646116, "grad_norm": 0.08591221272945404, "learning_rate": 0.01, "loss": 1.9398, "step": 122838 }, { "epoch": 12.622379778051787, "grad_norm": 0.07906792312860489, "learning_rate": 0.01, "loss": 1.9494, "step": 122841 }, { "epoch": 12.62268803945746, "grad_norm": 0.058949537575244904, "learning_rate": 0.01, "loss": 1.9491, "step": 122844 }, { "epoch": 12.622996300863132, "grad_norm": 0.10783743858337402, "learning_rate": 0.01, "loss": 1.9417, "step": 122847 }, { "epoch": 12.623304562268803, "grad_norm": 0.10258825868368149, "learning_rate": 0.01, "loss": 1.926, "step": 122850 }, { "epoch": 12.623612823674476, "grad_norm": 0.09038569778203964, "learning_rate": 0.01, "loss": 1.9359, "step": 122853 }, { "epoch": 12.623921085080148, "grad_norm": 0.11617942899465561, "learning_rate": 0.01, "loss": 1.9187, "step": 122856 }, { "epoch": 12.62422934648582, "grad_norm": 0.14557698369026184, "learning_rate": 0.01, "loss": 1.9517, "step": 122859 }, { "epoch": 12.624537607891492, "grad_norm": 0.08018018305301666, "learning_rate": 0.01, "loss": 1.9348, "step": 122862 }, { "epoch": 12.624845869297165, "grad_norm": 0.06026039272546768, "learning_rate": 0.01, "loss": 1.9296, "step": 122865 }, { "epoch": 12.625154130702835, "grad_norm": 0.05437018349766731, "learning_rate": 0.01, "loss": 1.9468, "step": 122868 }, { "epoch": 12.625462392108508, "grad_norm": 0.06585533916950226, "learning_rate": 0.01, "loss": 1.9583, "step": 122871 }, { "epoch": 12.62577065351418, "grad_norm": 0.053909752517938614, "learning_rate": 0.01, "loss": 1.9017, "step": 122874 }, { "epoch": 12.626078914919852, "grad_norm": 0.051786426454782486, "learning_rate": 0.01, "loss": 1.9435, "step": 122877 }, { "epoch": 12.626387176325524, "grad_norm": 0.0947243794798851, "learning_rate": 0.01, "loss": 1.9412, "step": 122880 }, { "epoch": 12.626695437731197, "grad_norm": 0.05719350650906563, "learning_rate": 0.01, "loss": 1.9561, "step": 122883 }, { "epoch": 12.627003699136868, "grad_norm": 0.14443199336528778, "learning_rate": 0.01, "loss": 1.947, "step": 122886 }, { "epoch": 12.62731196054254, "grad_norm": 0.05690363794565201, "learning_rate": 0.01, "loss": 1.9004, "step": 122889 }, { "epoch": 12.627620221948213, "grad_norm": 0.04786375164985657, "learning_rate": 0.01, "loss": 1.9278, "step": 122892 }, { "epoch": 12.627928483353884, "grad_norm": 0.09731481224298477, "learning_rate": 0.01, "loss": 1.9447, "step": 122895 }, { "epoch": 12.628236744759556, "grad_norm": 0.05569015070796013, "learning_rate": 0.01, "loss": 1.96, "step": 122898 }, { "epoch": 12.628545006165227, "grad_norm": 0.05832977965474129, "learning_rate": 0.01, "loss": 1.9148, "step": 122901 }, { "epoch": 12.6288532675709, "grad_norm": 0.06494420766830444, "learning_rate": 0.01, "loss": 1.9295, "step": 122904 }, { "epoch": 12.629161528976573, "grad_norm": 0.05205019563436508, "learning_rate": 0.01, "loss": 1.9466, "step": 122907 }, { "epoch": 12.629469790382243, "grad_norm": 0.04261380434036255, "learning_rate": 0.01, "loss": 1.9534, "step": 122910 }, { "epoch": 12.629778051787916, "grad_norm": 0.0320679247379303, "learning_rate": 0.01, "loss": 1.9283, "step": 122913 }, { "epoch": 12.630086313193589, "grad_norm": 0.13554465770721436, "learning_rate": 0.01, "loss": 1.9574, "step": 122916 }, { "epoch": 12.63039457459926, "grad_norm": 0.037886518985033035, "learning_rate": 0.01, "loss": 1.9449, "step": 122919 }, { "epoch": 12.630702836004932, "grad_norm": 0.044243793934583664, "learning_rate": 0.01, "loss": 1.9359, "step": 122922 }, { "epoch": 12.631011097410605, "grad_norm": 0.046359576284885406, "learning_rate": 0.01, "loss": 1.9581, "step": 122925 }, { "epoch": 12.631319358816276, "grad_norm": 0.036917366087436676, "learning_rate": 0.01, "loss": 1.9374, "step": 122928 }, { "epoch": 12.631627620221948, "grad_norm": 0.04713550955057144, "learning_rate": 0.01, "loss": 1.9281, "step": 122931 }, { "epoch": 12.631935881627621, "grad_norm": 0.0435897521674633, "learning_rate": 0.01, "loss": 1.9408, "step": 122934 }, { "epoch": 12.632244143033292, "grad_norm": 0.0550503171980381, "learning_rate": 0.01, "loss": 1.9357, "step": 122937 }, { "epoch": 12.632552404438965, "grad_norm": 0.0765051394701004, "learning_rate": 0.01, "loss": 1.9357, "step": 122940 }, { "epoch": 12.632860665844635, "grad_norm": 0.11795663833618164, "learning_rate": 0.01, "loss": 1.9203, "step": 122943 }, { "epoch": 12.633168927250308, "grad_norm": 0.13961495459079742, "learning_rate": 0.01, "loss": 1.9408, "step": 122946 }, { "epoch": 12.63347718865598, "grad_norm": 0.08773016929626465, "learning_rate": 0.01, "loss": 1.927, "step": 122949 }, { "epoch": 12.633785450061652, "grad_norm": 0.06994469463825226, "learning_rate": 0.01, "loss": 1.9193, "step": 122952 }, { "epoch": 12.634093711467324, "grad_norm": 0.048322346061468124, "learning_rate": 0.01, "loss": 1.9186, "step": 122955 }, { "epoch": 12.634401972872997, "grad_norm": 0.08110317587852478, "learning_rate": 0.01, "loss": 1.9552, "step": 122958 }, { "epoch": 12.634710234278668, "grad_norm": 0.06748035550117493, "learning_rate": 0.01, "loss": 1.9245, "step": 122961 }, { "epoch": 12.63501849568434, "grad_norm": 0.06809863448143005, "learning_rate": 0.01, "loss": 1.954, "step": 122964 }, { "epoch": 12.635326757090013, "grad_norm": 0.05909579619765282, "learning_rate": 0.01, "loss": 1.9462, "step": 122967 }, { "epoch": 12.635635018495684, "grad_norm": 0.04641105979681015, "learning_rate": 0.01, "loss": 1.9365, "step": 122970 }, { "epoch": 12.635943279901356, "grad_norm": 0.038915254175662994, "learning_rate": 0.01, "loss": 1.9425, "step": 122973 }, { "epoch": 12.636251541307029, "grad_norm": 0.10991251468658447, "learning_rate": 0.01, "loss": 1.9282, "step": 122976 }, { "epoch": 12.6365598027127, "grad_norm": 0.0334751196205616, "learning_rate": 0.01, "loss": 1.9286, "step": 122979 }, { "epoch": 12.636868064118373, "grad_norm": 0.0603213906288147, "learning_rate": 0.01, "loss": 1.9494, "step": 122982 }, { "epoch": 12.637176325524045, "grad_norm": 0.1104741245508194, "learning_rate": 0.01, "loss": 1.9369, "step": 122985 }, { "epoch": 12.637484586929716, "grad_norm": 0.036284010857343674, "learning_rate": 0.01, "loss": 1.9395, "step": 122988 }, { "epoch": 12.637792848335389, "grad_norm": 0.0331115685403347, "learning_rate": 0.01, "loss": 1.9247, "step": 122991 }, { "epoch": 12.63810110974106, "grad_norm": 0.10184327512979507, "learning_rate": 0.01, "loss": 1.9327, "step": 122994 }, { "epoch": 12.638409371146732, "grad_norm": 0.07448579370975494, "learning_rate": 0.01, "loss": 1.945, "step": 122997 }, { "epoch": 12.638717632552405, "grad_norm": 0.043106839060783386, "learning_rate": 0.01, "loss": 1.9203, "step": 123000 }, { "epoch": 12.639025893958076, "grad_norm": 0.13510780036449432, "learning_rate": 0.01, "loss": 1.9552, "step": 123003 }, { "epoch": 12.639334155363748, "grad_norm": 0.07734852284193039, "learning_rate": 0.01, "loss": 1.9217, "step": 123006 }, { "epoch": 12.639642416769421, "grad_norm": 0.08094023168087006, "learning_rate": 0.01, "loss": 1.9337, "step": 123009 }, { "epoch": 12.639950678175092, "grad_norm": 0.07922966033220291, "learning_rate": 0.01, "loss": 1.9431, "step": 123012 }, { "epoch": 12.640258939580765, "grad_norm": 0.052409034222364426, "learning_rate": 0.01, "loss": 1.9375, "step": 123015 }, { "epoch": 12.640567200986437, "grad_norm": 0.04294409230351448, "learning_rate": 0.01, "loss": 1.9369, "step": 123018 }, { "epoch": 12.640875462392108, "grad_norm": 0.11415386199951172, "learning_rate": 0.01, "loss": 1.9025, "step": 123021 }, { "epoch": 12.64118372379778, "grad_norm": 0.03108101338148117, "learning_rate": 0.01, "loss": 1.9105, "step": 123024 }, { "epoch": 12.641491985203453, "grad_norm": 0.08873893320560455, "learning_rate": 0.01, "loss": 1.9238, "step": 123027 }, { "epoch": 12.641800246609124, "grad_norm": 0.058484312146902084, "learning_rate": 0.01, "loss": 1.935, "step": 123030 }, { "epoch": 12.642108508014797, "grad_norm": 0.1172776147723198, "learning_rate": 0.01, "loss": 1.9247, "step": 123033 }, { "epoch": 12.642416769420468, "grad_norm": 0.08727061003446579, "learning_rate": 0.01, "loss": 1.942, "step": 123036 }, { "epoch": 12.64272503082614, "grad_norm": 0.04476449266076088, "learning_rate": 0.01, "loss": 1.926, "step": 123039 }, { "epoch": 12.643033292231813, "grad_norm": 0.0652472972869873, "learning_rate": 0.01, "loss": 1.9276, "step": 123042 }, { "epoch": 12.643341553637484, "grad_norm": 0.16113439202308655, "learning_rate": 0.01, "loss": 1.9376, "step": 123045 }, { "epoch": 12.643649815043156, "grad_norm": 0.0810934454202652, "learning_rate": 0.01, "loss": 1.9532, "step": 123048 }, { "epoch": 12.643958076448829, "grad_norm": 0.036866363137960434, "learning_rate": 0.01, "loss": 1.9694, "step": 123051 }, { "epoch": 12.6442663378545, "grad_norm": 0.04961570352315903, "learning_rate": 0.01, "loss": 1.9223, "step": 123054 }, { "epoch": 12.644574599260173, "grad_norm": 0.046355172991752625, "learning_rate": 0.01, "loss": 1.9512, "step": 123057 }, { "epoch": 12.644882860665845, "grad_norm": 0.05054159089922905, "learning_rate": 0.01, "loss": 1.9564, "step": 123060 }, { "epoch": 12.645191122071516, "grad_norm": 0.03561927005648613, "learning_rate": 0.01, "loss": 1.9249, "step": 123063 }, { "epoch": 12.645499383477189, "grad_norm": 0.09091931581497192, "learning_rate": 0.01, "loss": 1.9509, "step": 123066 }, { "epoch": 12.645807644882861, "grad_norm": 0.06427352130413055, "learning_rate": 0.01, "loss": 1.9197, "step": 123069 }, { "epoch": 12.646115906288532, "grad_norm": 0.05945998430252075, "learning_rate": 0.01, "loss": 1.9469, "step": 123072 }, { "epoch": 12.646424167694205, "grad_norm": 0.17813847959041595, "learning_rate": 0.01, "loss": 1.9443, "step": 123075 }, { "epoch": 12.646732429099877, "grad_norm": 0.0860227569937706, "learning_rate": 0.01, "loss": 1.9283, "step": 123078 }, { "epoch": 12.647040690505548, "grad_norm": 0.05374440178275108, "learning_rate": 0.01, "loss": 1.9517, "step": 123081 }, { "epoch": 12.647348951911221, "grad_norm": 0.04365652799606323, "learning_rate": 0.01, "loss": 1.9366, "step": 123084 }, { "epoch": 12.647657213316894, "grad_norm": 0.042210496962070465, "learning_rate": 0.01, "loss": 1.9106, "step": 123087 }, { "epoch": 12.647965474722564, "grad_norm": 0.06997524946928024, "learning_rate": 0.01, "loss": 1.9372, "step": 123090 }, { "epoch": 12.648273736128237, "grad_norm": 0.08367611467838287, "learning_rate": 0.01, "loss": 1.9149, "step": 123093 }, { "epoch": 12.648581997533908, "grad_norm": 0.05943150445818901, "learning_rate": 0.01, "loss": 1.9344, "step": 123096 }, { "epoch": 12.64889025893958, "grad_norm": 0.04505249112844467, "learning_rate": 0.01, "loss": 1.9316, "step": 123099 }, { "epoch": 12.649198520345253, "grad_norm": 0.12502211332321167, "learning_rate": 0.01, "loss": 1.9466, "step": 123102 }, { "epoch": 12.649506781750924, "grad_norm": 0.11971963942050934, "learning_rate": 0.01, "loss": 1.9134, "step": 123105 }, { "epoch": 12.649815043156597, "grad_norm": 0.09976421296596527, "learning_rate": 0.01, "loss": 1.9157, "step": 123108 }, { "epoch": 12.65012330456227, "grad_norm": 0.11778843402862549, "learning_rate": 0.01, "loss": 1.9172, "step": 123111 }, { "epoch": 12.65043156596794, "grad_norm": 0.06445219367742538, "learning_rate": 0.01, "loss": 1.9475, "step": 123114 }, { "epoch": 12.650739827373613, "grad_norm": 0.028855200856924057, "learning_rate": 0.01, "loss": 1.897, "step": 123117 }, { "epoch": 12.651048088779286, "grad_norm": 0.04714740812778473, "learning_rate": 0.01, "loss": 1.9545, "step": 123120 }, { "epoch": 12.651356350184956, "grad_norm": 0.06030753254890442, "learning_rate": 0.01, "loss": 1.9168, "step": 123123 }, { "epoch": 12.651664611590629, "grad_norm": 0.059579312801361084, "learning_rate": 0.01, "loss": 1.9358, "step": 123126 }, { "epoch": 12.651972872996302, "grad_norm": 0.04322392866015434, "learning_rate": 0.01, "loss": 1.9346, "step": 123129 }, { "epoch": 12.652281134401973, "grad_norm": 0.04606184735894203, "learning_rate": 0.01, "loss": 1.9498, "step": 123132 }, { "epoch": 12.652589395807645, "grad_norm": 0.12499433755874634, "learning_rate": 0.01, "loss": 1.9479, "step": 123135 }, { "epoch": 12.652897657213316, "grad_norm": 0.11938849836587906, "learning_rate": 0.01, "loss": 1.9516, "step": 123138 }, { "epoch": 12.653205918618989, "grad_norm": 0.042211614549160004, "learning_rate": 0.01, "loss": 1.9456, "step": 123141 }, { "epoch": 12.653514180024661, "grad_norm": 0.0965668112039566, "learning_rate": 0.01, "loss": 1.9228, "step": 123144 }, { "epoch": 12.653822441430332, "grad_norm": 0.05237396061420441, "learning_rate": 0.01, "loss": 1.9267, "step": 123147 }, { "epoch": 12.654130702836005, "grad_norm": 0.08860108256340027, "learning_rate": 0.01, "loss": 1.9319, "step": 123150 }, { "epoch": 12.654438964241677, "grad_norm": 0.07654953002929688, "learning_rate": 0.01, "loss": 1.9256, "step": 123153 }, { "epoch": 12.654747225647348, "grad_norm": 0.09266380220651627, "learning_rate": 0.01, "loss": 1.9416, "step": 123156 }, { "epoch": 12.655055487053021, "grad_norm": 0.0614088736474514, "learning_rate": 0.01, "loss": 1.9466, "step": 123159 }, { "epoch": 12.655363748458694, "grad_norm": 0.04387489706277847, "learning_rate": 0.01, "loss": 1.9258, "step": 123162 }, { "epoch": 12.655672009864364, "grad_norm": 0.04052634537220001, "learning_rate": 0.01, "loss": 1.9397, "step": 123165 }, { "epoch": 12.655980271270037, "grad_norm": 0.08114446699619293, "learning_rate": 0.01, "loss": 1.9514, "step": 123168 }, { "epoch": 12.65628853267571, "grad_norm": 0.057578977197408676, "learning_rate": 0.01, "loss": 1.9144, "step": 123171 }, { "epoch": 12.65659679408138, "grad_norm": 0.08191578835248947, "learning_rate": 0.01, "loss": 1.9493, "step": 123174 }, { "epoch": 12.656905055487053, "grad_norm": 0.04840002581477165, "learning_rate": 0.01, "loss": 1.9253, "step": 123177 }, { "epoch": 12.657213316892726, "grad_norm": 0.09766780585050583, "learning_rate": 0.01, "loss": 1.9335, "step": 123180 }, { "epoch": 12.657521578298397, "grad_norm": 0.05007852986454964, "learning_rate": 0.01, "loss": 1.941, "step": 123183 }, { "epoch": 12.65782983970407, "grad_norm": 0.12118306010961533, "learning_rate": 0.01, "loss": 1.9407, "step": 123186 }, { "epoch": 12.65813810110974, "grad_norm": 0.12096033990383148, "learning_rate": 0.01, "loss": 1.9335, "step": 123189 }, { "epoch": 12.658446362515413, "grad_norm": 0.044699691236019135, "learning_rate": 0.01, "loss": 1.9368, "step": 123192 }, { "epoch": 12.658754623921086, "grad_norm": 0.03887972608208656, "learning_rate": 0.01, "loss": 1.9264, "step": 123195 }, { "epoch": 12.659062885326756, "grad_norm": 0.044233959168195724, "learning_rate": 0.01, "loss": 1.9161, "step": 123198 }, { "epoch": 12.659371146732429, "grad_norm": 0.06127122789621353, "learning_rate": 0.01, "loss": 1.9271, "step": 123201 }, { "epoch": 12.659679408138102, "grad_norm": 0.04868535324931145, "learning_rate": 0.01, "loss": 1.9323, "step": 123204 }, { "epoch": 12.659987669543773, "grad_norm": 0.041472144424915314, "learning_rate": 0.01, "loss": 1.9359, "step": 123207 }, { "epoch": 12.660295930949445, "grad_norm": 0.09328923374414444, "learning_rate": 0.01, "loss": 1.9417, "step": 123210 }, { "epoch": 12.660604192355118, "grad_norm": 0.09349977225065231, "learning_rate": 0.01, "loss": 1.9226, "step": 123213 }, { "epoch": 12.660912453760789, "grad_norm": 0.13697515428066254, "learning_rate": 0.01, "loss": 1.9306, "step": 123216 }, { "epoch": 12.661220715166461, "grad_norm": 0.1092478558421135, "learning_rate": 0.01, "loss": 1.9197, "step": 123219 }, { "epoch": 12.661528976572134, "grad_norm": 0.0793212503194809, "learning_rate": 0.01, "loss": 1.9202, "step": 123222 }, { "epoch": 12.661837237977805, "grad_norm": 0.03937581181526184, "learning_rate": 0.01, "loss": 1.9277, "step": 123225 }, { "epoch": 12.662145499383477, "grad_norm": 0.034699954092502594, "learning_rate": 0.01, "loss": 1.9249, "step": 123228 }, { "epoch": 12.662453760789148, "grad_norm": 0.05393664166331291, "learning_rate": 0.01, "loss": 1.9249, "step": 123231 }, { "epoch": 12.662762022194821, "grad_norm": 0.07334361970424652, "learning_rate": 0.01, "loss": 1.9316, "step": 123234 }, { "epoch": 12.663070283600494, "grad_norm": 0.04054495692253113, "learning_rate": 0.01, "loss": 1.9356, "step": 123237 }, { "epoch": 12.663378545006164, "grad_norm": 0.06787239760160446, "learning_rate": 0.01, "loss": 1.9476, "step": 123240 }, { "epoch": 12.663686806411837, "grad_norm": 0.04934227466583252, "learning_rate": 0.01, "loss": 1.9277, "step": 123243 }, { "epoch": 12.66399506781751, "grad_norm": 0.04054686799645424, "learning_rate": 0.01, "loss": 1.9208, "step": 123246 }, { "epoch": 12.66430332922318, "grad_norm": 0.0366259329020977, "learning_rate": 0.01, "loss": 1.947, "step": 123249 }, { "epoch": 12.664611590628853, "grad_norm": 0.04150351881980896, "learning_rate": 0.01, "loss": 1.9323, "step": 123252 }, { "epoch": 12.664919852034526, "grad_norm": 0.08996198326349258, "learning_rate": 0.01, "loss": 1.9495, "step": 123255 }, { "epoch": 12.665228113440197, "grad_norm": 0.17594537138938904, "learning_rate": 0.01, "loss": 1.9539, "step": 123258 }, { "epoch": 12.66553637484587, "grad_norm": 0.1170310527086258, "learning_rate": 0.01, "loss": 1.9133, "step": 123261 }, { "epoch": 12.665844636251542, "grad_norm": 0.07598567754030228, "learning_rate": 0.01, "loss": 1.9136, "step": 123264 }, { "epoch": 12.666152897657213, "grad_norm": 0.06181150674819946, "learning_rate": 0.01, "loss": 1.9367, "step": 123267 }, { "epoch": 12.666461159062885, "grad_norm": 0.055621251463890076, "learning_rate": 0.01, "loss": 1.9353, "step": 123270 }, { "epoch": 12.666769420468558, "grad_norm": 0.050596028566360474, "learning_rate": 0.01, "loss": 1.9103, "step": 123273 }, { "epoch": 12.667077681874229, "grad_norm": 0.08173961192369461, "learning_rate": 0.01, "loss": 1.9513, "step": 123276 }, { "epoch": 12.667385943279902, "grad_norm": 0.052812159061431885, "learning_rate": 0.01, "loss": 1.923, "step": 123279 }, { "epoch": 12.667694204685574, "grad_norm": 0.06073348596692085, "learning_rate": 0.01, "loss": 1.9099, "step": 123282 }, { "epoch": 12.668002466091245, "grad_norm": 0.05775995925068855, "learning_rate": 0.01, "loss": 1.9279, "step": 123285 }, { "epoch": 12.668310727496918, "grad_norm": 0.06133884936571121, "learning_rate": 0.01, "loss": 1.9466, "step": 123288 }, { "epoch": 12.668618988902589, "grad_norm": 0.04938175156712532, "learning_rate": 0.01, "loss": 1.9123, "step": 123291 }, { "epoch": 12.668927250308261, "grad_norm": 0.12475492805242538, "learning_rate": 0.01, "loss": 1.9434, "step": 123294 }, { "epoch": 12.669235511713934, "grad_norm": 0.04723283275961876, "learning_rate": 0.01, "loss": 1.932, "step": 123297 }, { "epoch": 12.669543773119605, "grad_norm": 0.062252528965473175, "learning_rate": 0.01, "loss": 1.9214, "step": 123300 }, { "epoch": 12.669852034525277, "grad_norm": 0.10509909689426422, "learning_rate": 0.01, "loss": 1.9325, "step": 123303 }, { "epoch": 12.67016029593095, "grad_norm": 0.08289887756109238, "learning_rate": 0.01, "loss": 1.9235, "step": 123306 }, { "epoch": 12.670468557336621, "grad_norm": 0.08722329139709473, "learning_rate": 0.01, "loss": 1.9312, "step": 123309 }, { "epoch": 12.670776818742294, "grad_norm": 0.12085244804620743, "learning_rate": 0.01, "loss": 1.9365, "step": 123312 }, { "epoch": 12.671085080147966, "grad_norm": 0.16747604310512543, "learning_rate": 0.01, "loss": 1.9308, "step": 123315 }, { "epoch": 12.671393341553637, "grad_norm": 0.10610677301883698, "learning_rate": 0.01, "loss": 1.906, "step": 123318 }, { "epoch": 12.67170160295931, "grad_norm": 0.056399762630462646, "learning_rate": 0.01, "loss": 1.9066, "step": 123321 }, { "epoch": 12.672009864364982, "grad_norm": 0.0351298563182354, "learning_rate": 0.01, "loss": 1.9153, "step": 123324 }, { "epoch": 12.672318125770653, "grad_norm": 0.0395541787147522, "learning_rate": 0.01, "loss": 1.9327, "step": 123327 }, { "epoch": 12.672626387176326, "grad_norm": 0.03527429327368736, "learning_rate": 0.01, "loss": 1.9684, "step": 123330 }, { "epoch": 12.672934648581997, "grad_norm": 0.03256375715136528, "learning_rate": 0.01, "loss": 1.9393, "step": 123333 }, { "epoch": 12.67324290998767, "grad_norm": 0.11099425703287125, "learning_rate": 0.01, "loss": 1.9304, "step": 123336 }, { "epoch": 12.673551171393342, "grad_norm": 0.08047635853290558, "learning_rate": 0.01, "loss": 1.9509, "step": 123339 }, { "epoch": 12.673859432799013, "grad_norm": 0.06533856689929962, "learning_rate": 0.01, "loss": 1.8991, "step": 123342 }, { "epoch": 12.674167694204685, "grad_norm": 0.04292716085910797, "learning_rate": 0.01, "loss": 1.9279, "step": 123345 }, { "epoch": 12.674475955610358, "grad_norm": 0.0538625605404377, "learning_rate": 0.01, "loss": 1.9519, "step": 123348 }, { "epoch": 12.674784217016029, "grad_norm": 0.04183555766940117, "learning_rate": 0.01, "loss": 1.9366, "step": 123351 }, { "epoch": 12.675092478421702, "grad_norm": 0.042323462665081024, "learning_rate": 0.01, "loss": 1.9249, "step": 123354 }, { "epoch": 12.675400739827374, "grad_norm": 0.11391904950141907, "learning_rate": 0.01, "loss": 1.9499, "step": 123357 }, { "epoch": 12.675709001233045, "grad_norm": 0.1024266704916954, "learning_rate": 0.01, "loss": 1.9573, "step": 123360 }, { "epoch": 12.676017262638718, "grad_norm": 0.09751450270414352, "learning_rate": 0.01, "loss": 1.9554, "step": 123363 }, { "epoch": 12.67632552404439, "grad_norm": 0.06176680326461792, "learning_rate": 0.01, "loss": 1.9428, "step": 123366 }, { "epoch": 12.676633785450061, "grad_norm": 0.06321140378713608, "learning_rate": 0.01, "loss": 1.9119, "step": 123369 }, { "epoch": 12.676942046855734, "grad_norm": 0.13908283412456512, "learning_rate": 0.01, "loss": 1.9308, "step": 123372 }, { "epoch": 12.677250308261407, "grad_norm": 0.054892729967832565, "learning_rate": 0.01, "loss": 1.9297, "step": 123375 }, { "epoch": 12.677558569667077, "grad_norm": 0.05871794372797012, "learning_rate": 0.01, "loss": 1.9216, "step": 123378 }, { "epoch": 12.67786683107275, "grad_norm": 0.048607565462589264, "learning_rate": 0.01, "loss": 1.9394, "step": 123381 }, { "epoch": 12.678175092478421, "grad_norm": 0.04668545722961426, "learning_rate": 0.01, "loss": 1.9505, "step": 123384 }, { "epoch": 12.678483353884094, "grad_norm": 0.05814890190958977, "learning_rate": 0.01, "loss": 1.9599, "step": 123387 }, { "epoch": 12.678791615289766, "grad_norm": 0.04149680212140083, "learning_rate": 0.01, "loss": 1.9441, "step": 123390 }, { "epoch": 12.679099876695437, "grad_norm": 0.0385429821908474, "learning_rate": 0.01, "loss": 1.9347, "step": 123393 }, { "epoch": 12.67940813810111, "grad_norm": 0.07953255623579025, "learning_rate": 0.01, "loss": 1.9413, "step": 123396 }, { "epoch": 12.679716399506782, "grad_norm": 0.08655081689357758, "learning_rate": 0.01, "loss": 1.9676, "step": 123399 }, { "epoch": 12.680024660912453, "grad_norm": 0.09242817759513855, "learning_rate": 0.01, "loss": 1.9349, "step": 123402 }, { "epoch": 12.680332922318126, "grad_norm": 0.06824064254760742, "learning_rate": 0.01, "loss": 1.9288, "step": 123405 }, { "epoch": 12.680641183723798, "grad_norm": 0.0653814896941185, "learning_rate": 0.01, "loss": 1.9563, "step": 123408 }, { "epoch": 12.68094944512947, "grad_norm": 0.09969869256019592, "learning_rate": 0.01, "loss": 1.9214, "step": 123411 }, { "epoch": 12.681257706535142, "grad_norm": 0.061876170337200165, "learning_rate": 0.01, "loss": 1.9592, "step": 123414 }, { "epoch": 12.681565967940815, "grad_norm": 0.0635654553771019, "learning_rate": 0.01, "loss": 1.9404, "step": 123417 }, { "epoch": 12.681874229346485, "grad_norm": 0.07676015794277191, "learning_rate": 0.01, "loss": 1.9099, "step": 123420 }, { "epoch": 12.682182490752158, "grad_norm": 0.0748474970459938, "learning_rate": 0.01, "loss": 1.9213, "step": 123423 }, { "epoch": 12.682490752157829, "grad_norm": 0.0491030290722847, "learning_rate": 0.01, "loss": 1.9075, "step": 123426 }, { "epoch": 12.682799013563502, "grad_norm": 0.09475478529930115, "learning_rate": 0.01, "loss": 1.9074, "step": 123429 }, { "epoch": 12.683107274969174, "grad_norm": 0.060498595237731934, "learning_rate": 0.01, "loss": 1.9286, "step": 123432 }, { "epoch": 12.683415536374845, "grad_norm": 0.04039103537797928, "learning_rate": 0.01, "loss": 1.951, "step": 123435 }, { "epoch": 12.683723797780518, "grad_norm": 0.11218724399805069, "learning_rate": 0.01, "loss": 1.9568, "step": 123438 }, { "epoch": 12.68403205918619, "grad_norm": 0.11027968674898148, "learning_rate": 0.01, "loss": 1.937, "step": 123441 }, { "epoch": 12.684340320591861, "grad_norm": 0.050490401685237885, "learning_rate": 0.01, "loss": 1.9482, "step": 123444 }, { "epoch": 12.684648581997534, "grad_norm": 0.03872010484337807, "learning_rate": 0.01, "loss": 1.9372, "step": 123447 }, { "epoch": 12.684956843403207, "grad_norm": 0.03425583988428116, "learning_rate": 0.01, "loss": 1.93, "step": 123450 }, { "epoch": 12.685265104808877, "grad_norm": 0.041339047253131866, "learning_rate": 0.01, "loss": 1.9509, "step": 123453 }, { "epoch": 12.68557336621455, "grad_norm": 0.0473909005522728, "learning_rate": 0.01, "loss": 1.9401, "step": 123456 }, { "epoch": 12.685881627620223, "grad_norm": 0.057033736258745193, "learning_rate": 0.01, "loss": 1.9184, "step": 123459 }, { "epoch": 12.686189889025894, "grad_norm": 0.0641959011554718, "learning_rate": 0.01, "loss": 1.9485, "step": 123462 }, { "epoch": 12.686498150431566, "grad_norm": 0.08935226500034332, "learning_rate": 0.01, "loss": 1.9226, "step": 123465 }, { "epoch": 12.686806411837239, "grad_norm": 0.0869215652346611, "learning_rate": 0.01, "loss": 1.915, "step": 123468 }, { "epoch": 12.68711467324291, "grad_norm": 0.05638349801301956, "learning_rate": 0.01, "loss": 1.913, "step": 123471 }, { "epoch": 12.687422934648582, "grad_norm": 0.14322124421596527, "learning_rate": 0.01, "loss": 1.9472, "step": 123474 }, { "epoch": 12.687731196054255, "grad_norm": 0.09759920090436935, "learning_rate": 0.01, "loss": 1.9423, "step": 123477 }, { "epoch": 12.688039457459926, "grad_norm": 0.05922066792845726, "learning_rate": 0.01, "loss": 1.932, "step": 123480 }, { "epoch": 12.688347718865598, "grad_norm": 0.04995220527052879, "learning_rate": 0.01, "loss": 1.9157, "step": 123483 }, { "epoch": 12.68865598027127, "grad_norm": 0.043737851083278656, "learning_rate": 0.01, "loss": 1.948, "step": 123486 }, { "epoch": 12.688964241676942, "grad_norm": 0.09345188736915588, "learning_rate": 0.01, "loss": 1.9164, "step": 123489 }, { "epoch": 12.689272503082615, "grad_norm": 0.17204274237155914, "learning_rate": 0.01, "loss": 1.9494, "step": 123492 }, { "epoch": 12.689580764488285, "grad_norm": 0.09963071346282959, "learning_rate": 0.01, "loss": 1.9136, "step": 123495 }, { "epoch": 12.689889025893958, "grad_norm": 0.07569556683301926, "learning_rate": 0.01, "loss": 1.9453, "step": 123498 }, { "epoch": 12.69019728729963, "grad_norm": 0.04148825630545616, "learning_rate": 0.01, "loss": 1.9466, "step": 123501 }, { "epoch": 12.690505548705302, "grad_norm": 0.06261174380779266, "learning_rate": 0.01, "loss": 1.9285, "step": 123504 }, { "epoch": 12.690813810110974, "grad_norm": 0.057878635823726654, "learning_rate": 0.01, "loss": 1.9312, "step": 123507 }, { "epoch": 12.691122071516647, "grad_norm": 0.07493016868829727, "learning_rate": 0.01, "loss": 1.9256, "step": 123510 }, { "epoch": 12.691430332922318, "grad_norm": 0.04073924198746681, "learning_rate": 0.01, "loss": 1.9358, "step": 123513 }, { "epoch": 12.69173859432799, "grad_norm": 0.033230170607566833, "learning_rate": 0.01, "loss": 1.9459, "step": 123516 }, { "epoch": 12.692046855733661, "grad_norm": 0.04627753421664238, "learning_rate": 0.01, "loss": 1.9368, "step": 123519 }, { "epoch": 12.692355117139334, "grad_norm": 0.04366100952029228, "learning_rate": 0.01, "loss": 1.9467, "step": 123522 }, { "epoch": 12.692663378545006, "grad_norm": 0.10183031857013702, "learning_rate": 0.01, "loss": 1.9451, "step": 123525 }, { "epoch": 12.692971639950677, "grad_norm": 0.0781526044011116, "learning_rate": 0.01, "loss": 1.9507, "step": 123528 }, { "epoch": 12.69327990135635, "grad_norm": 0.11927567422389984, "learning_rate": 0.01, "loss": 1.9338, "step": 123531 }, { "epoch": 12.693588162762023, "grad_norm": 0.04548252746462822, "learning_rate": 0.01, "loss": 1.9359, "step": 123534 }, { "epoch": 12.693896424167693, "grad_norm": 0.09688074141740799, "learning_rate": 0.01, "loss": 1.9337, "step": 123537 }, { "epoch": 12.694204685573366, "grad_norm": 0.042784079909324646, "learning_rate": 0.01, "loss": 1.936, "step": 123540 }, { "epoch": 12.694512946979039, "grad_norm": 0.05487271398305893, "learning_rate": 0.01, "loss": 1.9361, "step": 123543 }, { "epoch": 12.69482120838471, "grad_norm": 0.04063720256090164, "learning_rate": 0.01, "loss": 1.9503, "step": 123546 }, { "epoch": 12.695129469790382, "grad_norm": 0.04431400075554848, "learning_rate": 0.01, "loss": 1.9295, "step": 123549 }, { "epoch": 12.695437731196055, "grad_norm": 0.04617541283369064, "learning_rate": 0.01, "loss": 1.9222, "step": 123552 }, { "epoch": 12.695745992601726, "grad_norm": 0.04244965314865112, "learning_rate": 0.01, "loss": 1.9495, "step": 123555 }, { "epoch": 12.696054254007398, "grad_norm": 0.058584727346897125, "learning_rate": 0.01, "loss": 1.9154, "step": 123558 }, { "epoch": 12.696362515413071, "grad_norm": 0.056801773607730865, "learning_rate": 0.01, "loss": 1.9411, "step": 123561 }, { "epoch": 12.696670776818742, "grad_norm": 0.07578185945749283, "learning_rate": 0.01, "loss": 1.9416, "step": 123564 }, { "epoch": 12.696979038224415, "grad_norm": 0.09019816666841507, "learning_rate": 0.01, "loss": 1.9446, "step": 123567 }, { "epoch": 12.697287299630087, "grad_norm": 0.17778441309928894, "learning_rate": 0.01, "loss": 1.9514, "step": 123570 }, { "epoch": 12.697595561035758, "grad_norm": 0.12761113047599792, "learning_rate": 0.01, "loss": 1.9346, "step": 123573 }, { "epoch": 12.69790382244143, "grad_norm": 0.08019939064979553, "learning_rate": 0.01, "loss": 1.9539, "step": 123576 }, { "epoch": 12.698212083847102, "grad_norm": 0.05896785482764244, "learning_rate": 0.01, "loss": 1.9385, "step": 123579 }, { "epoch": 12.698520345252774, "grad_norm": 0.05120032653212547, "learning_rate": 0.01, "loss": 1.9096, "step": 123582 }, { "epoch": 12.698828606658447, "grad_norm": 0.04152693226933479, "learning_rate": 0.01, "loss": 1.9223, "step": 123585 }, { "epoch": 12.699136868064118, "grad_norm": 0.034317269921302795, "learning_rate": 0.01, "loss": 1.9388, "step": 123588 }, { "epoch": 12.69944512946979, "grad_norm": 0.03374708816409111, "learning_rate": 0.01, "loss": 1.9325, "step": 123591 }, { "epoch": 12.699753390875463, "grad_norm": 0.05124400556087494, "learning_rate": 0.01, "loss": 1.9468, "step": 123594 }, { "epoch": 12.700061652281134, "grad_norm": 0.11427992582321167, "learning_rate": 0.01, "loss": 1.9446, "step": 123597 }, { "epoch": 12.700369913686806, "grad_norm": 0.07133519649505615, "learning_rate": 0.01, "loss": 1.9575, "step": 123600 }, { "epoch": 12.700678175092479, "grad_norm": 0.11643834412097931, "learning_rate": 0.01, "loss": 1.9462, "step": 123603 }, { "epoch": 12.70098643649815, "grad_norm": 0.07793018966913223, "learning_rate": 0.01, "loss": 1.9456, "step": 123606 }, { "epoch": 12.701294697903823, "grad_norm": 0.036932747811079025, "learning_rate": 0.01, "loss": 1.9501, "step": 123609 }, { "epoch": 12.701602959309495, "grad_norm": 0.03248800337314606, "learning_rate": 0.01, "loss": 1.9524, "step": 123612 }, { "epoch": 12.701911220715166, "grad_norm": 0.03589079901576042, "learning_rate": 0.01, "loss": 1.9424, "step": 123615 }, { "epoch": 12.702219482120839, "grad_norm": 0.0629652589559555, "learning_rate": 0.01, "loss": 1.9309, "step": 123618 }, { "epoch": 12.70252774352651, "grad_norm": 0.07401778548955917, "learning_rate": 0.01, "loss": 1.9144, "step": 123621 }, { "epoch": 12.702836004932182, "grad_norm": 0.06214204803109169, "learning_rate": 0.01, "loss": 1.9231, "step": 123624 }, { "epoch": 12.703144266337855, "grad_norm": 0.09164556115865707, "learning_rate": 0.01, "loss": 1.9325, "step": 123627 }, { "epoch": 12.703452527743526, "grad_norm": 0.05909937992691994, "learning_rate": 0.01, "loss": 1.9456, "step": 123630 }, { "epoch": 12.703760789149198, "grad_norm": 0.04564003273844719, "learning_rate": 0.01, "loss": 1.9362, "step": 123633 }, { "epoch": 12.704069050554871, "grad_norm": 0.047293175011873245, "learning_rate": 0.01, "loss": 1.9049, "step": 123636 }, { "epoch": 12.704377311960542, "grad_norm": 0.09538941085338593, "learning_rate": 0.01, "loss": 1.9552, "step": 123639 }, { "epoch": 12.704685573366215, "grad_norm": 0.06254024803638458, "learning_rate": 0.01, "loss": 1.9397, "step": 123642 }, { "epoch": 12.704993834771887, "grad_norm": 0.06853699684143066, "learning_rate": 0.01, "loss": 1.9538, "step": 123645 }, { "epoch": 12.705302096177558, "grad_norm": 0.09391725808382034, "learning_rate": 0.01, "loss": 1.9281, "step": 123648 }, { "epoch": 12.70561035758323, "grad_norm": 0.09962349385023117, "learning_rate": 0.01, "loss": 1.9384, "step": 123651 }, { "epoch": 12.705918618988903, "grad_norm": 0.08067303150892258, "learning_rate": 0.01, "loss": 1.9584, "step": 123654 }, { "epoch": 12.706226880394574, "grad_norm": 0.04248975217342377, "learning_rate": 0.01, "loss": 1.9396, "step": 123657 }, { "epoch": 12.706535141800247, "grad_norm": 0.062013447284698486, "learning_rate": 0.01, "loss": 1.9436, "step": 123660 }, { "epoch": 12.70684340320592, "grad_norm": 0.0528823621571064, "learning_rate": 0.01, "loss": 1.951, "step": 123663 }, { "epoch": 12.70715166461159, "grad_norm": 0.058377742767333984, "learning_rate": 0.01, "loss": 1.9256, "step": 123666 }, { "epoch": 12.707459926017263, "grad_norm": 0.042628709226846695, "learning_rate": 0.01, "loss": 1.9221, "step": 123669 }, { "epoch": 12.707768187422936, "grad_norm": 0.03786737471818924, "learning_rate": 0.01, "loss": 1.9088, "step": 123672 }, { "epoch": 12.708076448828606, "grad_norm": 0.03240932896733284, "learning_rate": 0.01, "loss": 1.9426, "step": 123675 }, { "epoch": 12.708384710234279, "grad_norm": 0.06238216906785965, "learning_rate": 0.01, "loss": 1.9141, "step": 123678 }, { "epoch": 12.70869297163995, "grad_norm": 0.15355850756168365, "learning_rate": 0.01, "loss": 1.9338, "step": 123681 }, { "epoch": 12.709001233045623, "grad_norm": 0.06694837659597397, "learning_rate": 0.01, "loss": 1.9417, "step": 123684 }, { "epoch": 12.709309494451295, "grad_norm": 0.043853238224983215, "learning_rate": 0.01, "loss": 1.9448, "step": 123687 }, { "epoch": 12.709617755856966, "grad_norm": 0.05808883532881737, "learning_rate": 0.01, "loss": 1.9623, "step": 123690 }, { "epoch": 12.709926017262639, "grad_norm": 0.05669691041111946, "learning_rate": 0.01, "loss": 1.951, "step": 123693 }, { "epoch": 12.710234278668311, "grad_norm": 0.04043015465140343, "learning_rate": 0.01, "loss": 1.9214, "step": 123696 }, { "epoch": 12.710542540073982, "grad_norm": 0.03637734428048134, "learning_rate": 0.01, "loss": 1.9374, "step": 123699 }, { "epoch": 12.710850801479655, "grad_norm": 0.03239542618393898, "learning_rate": 0.01, "loss": 1.9447, "step": 123702 }, { "epoch": 12.711159062885327, "grad_norm": 0.04744594544172287, "learning_rate": 0.01, "loss": 1.9071, "step": 123705 }, { "epoch": 12.711467324290998, "grad_norm": 0.12527455389499664, "learning_rate": 0.01, "loss": 1.9295, "step": 123708 }, { "epoch": 12.711775585696671, "grad_norm": 0.039139848202466965, "learning_rate": 0.01, "loss": 1.9034, "step": 123711 }, { "epoch": 12.712083847102342, "grad_norm": 0.12416050583124161, "learning_rate": 0.01, "loss": 1.9245, "step": 123714 }, { "epoch": 12.712392108508014, "grad_norm": 0.04227903485298157, "learning_rate": 0.01, "loss": 1.9517, "step": 123717 }, { "epoch": 12.712700369913687, "grad_norm": 0.05773387476801872, "learning_rate": 0.01, "loss": 1.9487, "step": 123720 }, { "epoch": 12.713008631319358, "grad_norm": 0.07354748994112015, "learning_rate": 0.01, "loss": 1.9152, "step": 123723 }, { "epoch": 12.71331689272503, "grad_norm": 0.06634755432605743, "learning_rate": 0.01, "loss": 1.9247, "step": 123726 }, { "epoch": 12.713625154130703, "grad_norm": 0.03875407576560974, "learning_rate": 0.01, "loss": 1.9381, "step": 123729 }, { "epoch": 12.713933415536374, "grad_norm": 0.0528351329267025, "learning_rate": 0.01, "loss": 1.9365, "step": 123732 }, { "epoch": 12.714241676942047, "grad_norm": 0.050535354763269424, "learning_rate": 0.01, "loss": 1.9338, "step": 123735 }, { "epoch": 12.71454993834772, "grad_norm": 0.056998029351234436, "learning_rate": 0.01, "loss": 1.9355, "step": 123738 }, { "epoch": 12.71485819975339, "grad_norm": 0.037163373082876205, "learning_rate": 0.01, "loss": 1.932, "step": 123741 }, { "epoch": 12.715166461159063, "grad_norm": 0.040953654795885086, "learning_rate": 0.01, "loss": 1.9215, "step": 123744 }, { "epoch": 12.715474722564736, "grad_norm": 0.09290850162506104, "learning_rate": 0.01, "loss": 1.9174, "step": 123747 }, { "epoch": 12.715782983970406, "grad_norm": 0.040255237370729446, "learning_rate": 0.01, "loss": 1.9223, "step": 123750 }, { "epoch": 12.716091245376079, "grad_norm": 0.06343100965023041, "learning_rate": 0.01, "loss": 1.9235, "step": 123753 }, { "epoch": 12.716399506781752, "grad_norm": 0.049036815762519836, "learning_rate": 0.01, "loss": 1.9443, "step": 123756 }, { "epoch": 12.716707768187423, "grad_norm": 0.03556976467370987, "learning_rate": 0.01, "loss": 1.9258, "step": 123759 }, { "epoch": 12.717016029593095, "grad_norm": 0.038292575627565384, "learning_rate": 0.01, "loss": 1.9396, "step": 123762 }, { "epoch": 12.717324290998768, "grad_norm": 0.05443082004785538, "learning_rate": 0.01, "loss": 1.9249, "step": 123765 }, { "epoch": 12.717632552404439, "grad_norm": 0.14282263815402985, "learning_rate": 0.01, "loss": 1.9217, "step": 123768 }, { "epoch": 12.717940813810111, "grad_norm": 0.09722369909286499, "learning_rate": 0.01, "loss": 1.933, "step": 123771 }, { "epoch": 12.718249075215782, "grad_norm": 0.04693793132901192, "learning_rate": 0.01, "loss": 1.9358, "step": 123774 }, { "epoch": 12.718557336621455, "grad_norm": 0.04556123539805412, "learning_rate": 0.01, "loss": 1.9414, "step": 123777 }, { "epoch": 12.718865598027127, "grad_norm": 0.0539705865085125, "learning_rate": 0.01, "loss": 1.9435, "step": 123780 }, { "epoch": 12.719173859432798, "grad_norm": 0.08391965180635452, "learning_rate": 0.01, "loss": 1.9402, "step": 123783 }, { "epoch": 12.719482120838471, "grad_norm": 0.0999051183462143, "learning_rate": 0.01, "loss": 1.9169, "step": 123786 }, { "epoch": 12.719790382244144, "grad_norm": 0.10758959501981735, "learning_rate": 0.01, "loss": 1.9322, "step": 123789 }, { "epoch": 12.720098643649814, "grad_norm": 0.04740709438920021, "learning_rate": 0.01, "loss": 1.9146, "step": 123792 }, { "epoch": 12.720406905055487, "grad_norm": 0.03823184221982956, "learning_rate": 0.01, "loss": 1.9206, "step": 123795 }, { "epoch": 12.72071516646116, "grad_norm": 0.07436653971672058, "learning_rate": 0.01, "loss": 1.9424, "step": 123798 }, { "epoch": 12.72102342786683, "grad_norm": 0.04962896183133125, "learning_rate": 0.01, "loss": 1.9496, "step": 123801 }, { "epoch": 12.721331689272503, "grad_norm": 0.0763697549700737, "learning_rate": 0.01, "loss": 1.9316, "step": 123804 }, { "epoch": 12.721639950678176, "grad_norm": 0.07073080539703369, "learning_rate": 0.01, "loss": 1.9313, "step": 123807 }, { "epoch": 12.721948212083847, "grad_norm": 0.07728879153728485, "learning_rate": 0.01, "loss": 1.9398, "step": 123810 }, { "epoch": 12.72225647348952, "grad_norm": 0.03955766558647156, "learning_rate": 0.01, "loss": 1.9247, "step": 123813 }, { "epoch": 12.72256473489519, "grad_norm": 0.06398153305053711, "learning_rate": 0.01, "loss": 1.9411, "step": 123816 }, { "epoch": 12.722872996300863, "grad_norm": 0.0653410479426384, "learning_rate": 0.01, "loss": 1.9343, "step": 123819 }, { "epoch": 12.723181257706536, "grad_norm": 0.03365045413374901, "learning_rate": 0.01, "loss": 1.9232, "step": 123822 }, { "epoch": 12.723489519112206, "grad_norm": 0.10590102523565292, "learning_rate": 0.01, "loss": 1.9116, "step": 123825 }, { "epoch": 12.723797780517879, "grad_norm": 0.05007834732532501, "learning_rate": 0.01, "loss": 1.9195, "step": 123828 }, { "epoch": 12.724106041923552, "grad_norm": 0.07730080187320709, "learning_rate": 0.01, "loss": 1.9172, "step": 123831 }, { "epoch": 12.724414303329223, "grad_norm": 0.038783296942710876, "learning_rate": 0.01, "loss": 1.9298, "step": 123834 }, { "epoch": 12.724722564734895, "grad_norm": 0.028622707352042198, "learning_rate": 0.01, "loss": 1.9208, "step": 123837 }, { "epoch": 12.725030826140568, "grad_norm": 0.03727482631802559, "learning_rate": 0.01, "loss": 1.9294, "step": 123840 }, { "epoch": 12.725339087546239, "grad_norm": 0.05076562985777855, "learning_rate": 0.01, "loss": 1.9474, "step": 123843 }, { "epoch": 12.725647348951911, "grad_norm": 0.14582273364067078, "learning_rate": 0.01, "loss": 1.9362, "step": 123846 }, { "epoch": 12.725955610357584, "grad_norm": 0.03792782500386238, "learning_rate": 0.01, "loss": 1.9523, "step": 123849 }, { "epoch": 12.726263871763255, "grad_norm": 0.0417255163192749, "learning_rate": 0.01, "loss": 1.9344, "step": 123852 }, { "epoch": 12.726572133168927, "grad_norm": 0.12276627868413925, "learning_rate": 0.01, "loss": 1.9387, "step": 123855 }, { "epoch": 12.7268803945746, "grad_norm": 0.05630599707365036, "learning_rate": 0.01, "loss": 1.9423, "step": 123858 }, { "epoch": 12.727188655980271, "grad_norm": 0.049009427428245544, "learning_rate": 0.01, "loss": 1.9274, "step": 123861 }, { "epoch": 12.727496917385944, "grad_norm": 0.060720812529325485, "learning_rate": 0.01, "loss": 1.9561, "step": 123864 }, { "epoch": 12.727805178791616, "grad_norm": 0.0986635759472847, "learning_rate": 0.01, "loss": 1.9589, "step": 123867 }, { "epoch": 12.728113440197287, "grad_norm": 0.08428160101175308, "learning_rate": 0.01, "loss": 1.9382, "step": 123870 }, { "epoch": 12.72842170160296, "grad_norm": 0.053374290466308594, "learning_rate": 0.01, "loss": 1.9287, "step": 123873 }, { "epoch": 12.72872996300863, "grad_norm": 0.08811751008033752, "learning_rate": 0.01, "loss": 1.9487, "step": 123876 }, { "epoch": 12.729038224414303, "grad_norm": 0.0746317207813263, "learning_rate": 0.01, "loss": 1.9336, "step": 123879 }, { "epoch": 12.729346485819976, "grad_norm": 0.08238863945007324, "learning_rate": 0.01, "loss": 1.9413, "step": 123882 }, { "epoch": 12.729654747225647, "grad_norm": 0.047723446041345596, "learning_rate": 0.01, "loss": 1.9376, "step": 123885 }, { "epoch": 12.72996300863132, "grad_norm": 0.11083819717168808, "learning_rate": 0.01, "loss": 1.9266, "step": 123888 }, { "epoch": 12.730271270036992, "grad_norm": 0.06055304408073425, "learning_rate": 0.01, "loss": 1.9714, "step": 123891 }, { "epoch": 12.730579531442663, "grad_norm": 0.10406802594661713, "learning_rate": 0.01, "loss": 1.9031, "step": 123894 }, { "epoch": 12.730887792848335, "grad_norm": 0.12105995416641235, "learning_rate": 0.01, "loss": 1.9193, "step": 123897 }, { "epoch": 12.731196054254008, "grad_norm": 0.050094123929739, "learning_rate": 0.01, "loss": 1.9314, "step": 123900 }, { "epoch": 12.731504315659679, "grad_norm": 0.054774798452854156, "learning_rate": 0.01, "loss": 1.9294, "step": 123903 }, { "epoch": 12.731812577065352, "grad_norm": 0.05368786305189133, "learning_rate": 0.01, "loss": 1.9392, "step": 123906 }, { "epoch": 12.732120838471022, "grad_norm": 0.03528254106640816, "learning_rate": 0.01, "loss": 1.9391, "step": 123909 }, { "epoch": 12.732429099876695, "grad_norm": 0.04302767664194107, "learning_rate": 0.01, "loss": 1.9389, "step": 123912 }, { "epoch": 12.732737361282368, "grad_norm": 0.10473338514566422, "learning_rate": 0.01, "loss": 1.9369, "step": 123915 }, { "epoch": 12.733045622688039, "grad_norm": 0.06902991235256195, "learning_rate": 0.01, "loss": 1.9376, "step": 123918 }, { "epoch": 12.733353884093711, "grad_norm": 0.06823877990245819, "learning_rate": 0.01, "loss": 1.9309, "step": 123921 }, { "epoch": 12.733662145499384, "grad_norm": 0.07815282791852951, "learning_rate": 0.01, "loss": 1.9333, "step": 123924 }, { "epoch": 12.733970406905055, "grad_norm": 0.07565927505493164, "learning_rate": 0.01, "loss": 1.92, "step": 123927 }, { "epoch": 12.734278668310727, "grad_norm": 0.04010128229856491, "learning_rate": 0.01, "loss": 1.9331, "step": 123930 }, { "epoch": 12.7345869297164, "grad_norm": 0.11234023422002792, "learning_rate": 0.01, "loss": 1.9298, "step": 123933 }, { "epoch": 12.734895191122071, "grad_norm": 0.11952132731676102, "learning_rate": 0.01, "loss": 1.9278, "step": 123936 }, { "epoch": 12.735203452527744, "grad_norm": 0.07487715035676956, "learning_rate": 0.01, "loss": 1.9202, "step": 123939 }, { "epoch": 12.735511713933416, "grad_norm": 0.08312923461198807, "learning_rate": 0.01, "loss": 1.9577, "step": 123942 }, { "epoch": 12.735819975339087, "grad_norm": 0.049645256251096725, "learning_rate": 0.01, "loss": 1.9189, "step": 123945 }, { "epoch": 12.73612823674476, "grad_norm": 0.0756223052740097, "learning_rate": 0.01, "loss": 1.9175, "step": 123948 }, { "epoch": 12.736436498150432, "grad_norm": 0.04760962352156639, "learning_rate": 0.01, "loss": 1.928, "step": 123951 }, { "epoch": 12.736744759556103, "grad_norm": 0.035297941416502, "learning_rate": 0.01, "loss": 1.9427, "step": 123954 }, { "epoch": 12.737053020961776, "grad_norm": 0.03172614052891731, "learning_rate": 0.01, "loss": 1.9251, "step": 123957 }, { "epoch": 12.737361282367448, "grad_norm": 0.03660298138856888, "learning_rate": 0.01, "loss": 1.94, "step": 123960 }, { "epoch": 12.73766954377312, "grad_norm": 0.03966713324189186, "learning_rate": 0.01, "loss": 1.9168, "step": 123963 }, { "epoch": 12.737977805178792, "grad_norm": 0.07121724635362625, "learning_rate": 0.01, "loss": 1.9264, "step": 123966 }, { "epoch": 12.738286066584463, "grad_norm": 0.06863511353731155, "learning_rate": 0.01, "loss": 1.9121, "step": 123969 }, { "epoch": 12.738594327990135, "grad_norm": 0.040102001279592514, "learning_rate": 0.01, "loss": 1.913, "step": 123972 }, { "epoch": 12.738902589395808, "grad_norm": 0.13240717351436615, "learning_rate": 0.01, "loss": 1.9588, "step": 123975 }, { "epoch": 12.739210850801479, "grad_norm": 0.08143643289804459, "learning_rate": 0.01, "loss": 1.9217, "step": 123978 }, { "epoch": 12.739519112207152, "grad_norm": 0.09273307025432587, "learning_rate": 0.01, "loss": 1.9094, "step": 123981 }, { "epoch": 12.739827373612824, "grad_norm": 0.0630078986287117, "learning_rate": 0.01, "loss": 1.9448, "step": 123984 }, { "epoch": 12.740135635018495, "grad_norm": 0.0617285780608654, "learning_rate": 0.01, "loss": 1.9332, "step": 123987 }, { "epoch": 12.740443896424168, "grad_norm": 0.049238421022892, "learning_rate": 0.01, "loss": 1.9497, "step": 123990 }, { "epoch": 12.74075215782984, "grad_norm": 0.055354706943035126, "learning_rate": 0.01, "loss": 1.9361, "step": 123993 }, { "epoch": 12.741060419235511, "grad_norm": 0.05170140787959099, "learning_rate": 0.01, "loss": 1.9381, "step": 123996 }, { "epoch": 12.741368680641184, "grad_norm": 0.04272826388478279, "learning_rate": 0.01, "loss": 1.9465, "step": 123999 }, { "epoch": 12.741676942046857, "grad_norm": 0.1141861155629158, "learning_rate": 0.01, "loss": 1.946, "step": 124002 }, { "epoch": 12.741985203452527, "grad_norm": 0.03754768148064613, "learning_rate": 0.01, "loss": 1.9392, "step": 124005 }, { "epoch": 12.7422934648582, "grad_norm": 0.04011441394686699, "learning_rate": 0.01, "loss": 1.983, "step": 124008 }, { "epoch": 12.74260172626387, "grad_norm": 0.13006868958473206, "learning_rate": 0.01, "loss": 1.9185, "step": 124011 }, { "epoch": 12.742909987669544, "grad_norm": 0.02989177033305168, "learning_rate": 0.01, "loss": 1.9487, "step": 124014 }, { "epoch": 12.743218249075216, "grad_norm": 0.036035675555467606, "learning_rate": 0.01, "loss": 1.9422, "step": 124017 }, { "epoch": 12.743526510480887, "grad_norm": 0.0521487332880497, "learning_rate": 0.01, "loss": 1.9248, "step": 124020 }, { "epoch": 12.74383477188656, "grad_norm": 0.10544262081384659, "learning_rate": 0.01, "loss": 1.9226, "step": 124023 }, { "epoch": 12.744143033292232, "grad_norm": 0.09543522447347641, "learning_rate": 0.01, "loss": 1.9362, "step": 124026 }, { "epoch": 12.744451294697903, "grad_norm": 0.048407234251499176, "learning_rate": 0.01, "loss": 1.9335, "step": 124029 }, { "epoch": 12.744759556103576, "grad_norm": 0.04668554291129112, "learning_rate": 0.01, "loss": 1.9401, "step": 124032 }, { "epoch": 12.745067817509248, "grad_norm": 0.07983088493347168, "learning_rate": 0.01, "loss": 1.9525, "step": 124035 }, { "epoch": 12.74537607891492, "grad_norm": 0.09198339283466339, "learning_rate": 0.01, "loss": 1.9345, "step": 124038 }, { "epoch": 12.745684340320592, "grad_norm": 0.060933612287044525, "learning_rate": 0.01, "loss": 1.9296, "step": 124041 }, { "epoch": 12.745992601726265, "grad_norm": 0.04717419669032097, "learning_rate": 0.01, "loss": 1.9254, "step": 124044 }, { "epoch": 12.746300863131935, "grad_norm": 0.04291157424449921, "learning_rate": 0.01, "loss": 1.9017, "step": 124047 }, { "epoch": 12.746609124537608, "grad_norm": 0.08731654286384583, "learning_rate": 0.01, "loss": 1.922, "step": 124050 }, { "epoch": 12.74691738594328, "grad_norm": 0.058710359036922455, "learning_rate": 0.01, "loss": 1.9496, "step": 124053 }, { "epoch": 12.747225647348952, "grad_norm": 0.05253433808684349, "learning_rate": 0.01, "loss": 1.9296, "step": 124056 }, { "epoch": 12.747533908754624, "grad_norm": 0.058453548699617386, "learning_rate": 0.01, "loss": 1.9356, "step": 124059 }, { "epoch": 12.747842170160297, "grad_norm": 0.037066906690597534, "learning_rate": 0.01, "loss": 1.9348, "step": 124062 }, { "epoch": 12.748150431565968, "grad_norm": 0.03119373321533203, "learning_rate": 0.01, "loss": 1.9154, "step": 124065 }, { "epoch": 12.74845869297164, "grad_norm": 0.038258105516433716, "learning_rate": 0.01, "loss": 1.9385, "step": 124068 }, { "epoch": 12.748766954377311, "grad_norm": 0.09073299169540405, "learning_rate": 0.01, "loss": 1.8871, "step": 124071 }, { "epoch": 12.749075215782984, "grad_norm": 0.09729227423667908, "learning_rate": 0.01, "loss": 1.9486, "step": 124074 }, { "epoch": 12.749383477188656, "grad_norm": 0.05143263563513756, "learning_rate": 0.01, "loss": 1.9109, "step": 124077 }, { "epoch": 12.749691738594327, "grad_norm": 0.06965895742177963, "learning_rate": 0.01, "loss": 1.9148, "step": 124080 }, { "epoch": 12.75, "grad_norm": 0.10010083764791489, "learning_rate": 0.01, "loss": 1.9235, "step": 124083 }, { "epoch": 12.750308261405673, "grad_norm": 0.060614701360464096, "learning_rate": 0.01, "loss": 1.9187, "step": 124086 }, { "epoch": 12.750616522811344, "grad_norm": 0.0689188539981842, "learning_rate": 0.01, "loss": 1.9472, "step": 124089 }, { "epoch": 12.750924784217016, "grad_norm": 0.05061258748173714, "learning_rate": 0.01, "loss": 1.9528, "step": 124092 }, { "epoch": 12.751233045622689, "grad_norm": 0.12382253259420395, "learning_rate": 0.01, "loss": 1.8836, "step": 124095 }, { "epoch": 12.75154130702836, "grad_norm": 0.1247350350022316, "learning_rate": 0.01, "loss": 1.9239, "step": 124098 }, { "epoch": 12.751849568434032, "grad_norm": 0.03914977237582207, "learning_rate": 0.01, "loss": 1.9522, "step": 124101 }, { "epoch": 12.752157829839703, "grad_norm": 0.09096557646989822, "learning_rate": 0.01, "loss": 1.929, "step": 124104 }, { "epoch": 12.752466091245376, "grad_norm": 0.056929972022771835, "learning_rate": 0.01, "loss": 1.9542, "step": 124107 }, { "epoch": 12.752774352651048, "grad_norm": 0.0355709008872509, "learning_rate": 0.01, "loss": 1.9302, "step": 124110 }, { "epoch": 12.75308261405672, "grad_norm": 0.0395621620118618, "learning_rate": 0.01, "loss": 1.9552, "step": 124113 }, { "epoch": 12.753390875462392, "grad_norm": 0.11180397123098373, "learning_rate": 0.01, "loss": 1.9487, "step": 124116 }, { "epoch": 12.753699136868065, "grad_norm": 0.08548378944396973, "learning_rate": 0.01, "loss": 1.9427, "step": 124119 }, { "epoch": 12.754007398273735, "grad_norm": 0.042386166751384735, "learning_rate": 0.01, "loss": 1.9501, "step": 124122 }, { "epoch": 12.754315659679408, "grad_norm": 0.04782962426543236, "learning_rate": 0.01, "loss": 1.9536, "step": 124125 }, { "epoch": 12.75462392108508, "grad_norm": 0.04724056273698807, "learning_rate": 0.01, "loss": 1.9413, "step": 124128 }, { "epoch": 12.754932182490752, "grad_norm": 0.11864021420478821, "learning_rate": 0.01, "loss": 1.9099, "step": 124131 }, { "epoch": 12.755240443896424, "grad_norm": 0.07599248737096786, "learning_rate": 0.01, "loss": 1.9517, "step": 124134 }, { "epoch": 12.755548705302097, "grad_norm": 0.04107172414660454, "learning_rate": 0.01, "loss": 1.9088, "step": 124137 }, { "epoch": 12.755856966707768, "grad_norm": 0.13786515593528748, "learning_rate": 0.01, "loss": 1.9378, "step": 124140 }, { "epoch": 12.75616522811344, "grad_norm": 0.05687346309423447, "learning_rate": 0.01, "loss": 1.9341, "step": 124143 }, { "epoch": 12.756473489519113, "grad_norm": 0.1486857831478119, "learning_rate": 0.01, "loss": 1.9206, "step": 124146 }, { "epoch": 12.756781750924784, "grad_norm": 0.05681532993912697, "learning_rate": 0.01, "loss": 1.9205, "step": 124149 }, { "epoch": 12.757090012330456, "grad_norm": 0.054894618690013885, "learning_rate": 0.01, "loss": 1.931, "step": 124152 }, { "epoch": 12.75739827373613, "grad_norm": 0.04187793284654617, "learning_rate": 0.01, "loss": 1.9431, "step": 124155 }, { "epoch": 12.7577065351418, "grad_norm": 0.052474524825811386, "learning_rate": 0.01, "loss": 1.9559, "step": 124158 }, { "epoch": 12.758014796547473, "grad_norm": 0.06345690786838531, "learning_rate": 0.01, "loss": 1.9588, "step": 124161 }, { "epoch": 12.758323057953143, "grad_norm": 0.04370460659265518, "learning_rate": 0.01, "loss": 1.9362, "step": 124164 }, { "epoch": 12.758631319358816, "grad_norm": 0.033778414130210876, "learning_rate": 0.01, "loss": 1.9363, "step": 124167 }, { "epoch": 12.758939580764489, "grad_norm": 0.07214024662971497, "learning_rate": 0.01, "loss": 1.9305, "step": 124170 }, { "epoch": 12.75924784217016, "grad_norm": 0.08046533912420273, "learning_rate": 0.01, "loss": 1.9134, "step": 124173 }, { "epoch": 12.759556103575832, "grad_norm": 0.1223985031247139, "learning_rate": 0.01, "loss": 1.9365, "step": 124176 }, { "epoch": 12.759864364981505, "grad_norm": 0.08571730554103851, "learning_rate": 0.01, "loss": 1.9323, "step": 124179 }, { "epoch": 12.760172626387176, "grad_norm": 0.07246126234531403, "learning_rate": 0.01, "loss": 1.9228, "step": 124182 }, { "epoch": 12.760480887792848, "grad_norm": 0.06221122667193413, "learning_rate": 0.01, "loss": 1.9151, "step": 124185 }, { "epoch": 12.760789149198521, "grad_norm": 0.04474806413054466, "learning_rate": 0.01, "loss": 1.9342, "step": 124188 }, { "epoch": 12.761097410604192, "grad_norm": 0.04277119040489197, "learning_rate": 0.01, "loss": 1.9454, "step": 124191 }, { "epoch": 12.761405672009865, "grad_norm": 0.05043601989746094, "learning_rate": 0.01, "loss": 1.9274, "step": 124194 }, { "epoch": 12.761713933415537, "grad_norm": 0.0631195679306984, "learning_rate": 0.01, "loss": 1.9374, "step": 124197 }, { "epoch": 12.762022194821208, "grad_norm": 0.16869129240512848, "learning_rate": 0.01, "loss": 1.9244, "step": 124200 }, { "epoch": 12.76233045622688, "grad_norm": 0.07093799114227295, "learning_rate": 0.01, "loss": 1.9518, "step": 124203 }, { "epoch": 12.762638717632552, "grad_norm": 0.11191149801015854, "learning_rate": 0.01, "loss": 1.9418, "step": 124206 }, { "epoch": 12.762946979038224, "grad_norm": 0.1054079607129097, "learning_rate": 0.01, "loss": 1.9542, "step": 124209 }, { "epoch": 12.763255240443897, "grad_norm": 0.08778104186058044, "learning_rate": 0.01, "loss": 1.9243, "step": 124212 }, { "epoch": 12.763563501849568, "grad_norm": 0.053434088826179504, "learning_rate": 0.01, "loss": 1.9515, "step": 124215 }, { "epoch": 12.76387176325524, "grad_norm": 0.056754644960165024, "learning_rate": 0.01, "loss": 1.9274, "step": 124218 }, { "epoch": 12.764180024660913, "grad_norm": 0.030407005921006203, "learning_rate": 0.01, "loss": 1.9385, "step": 124221 }, { "epoch": 12.764488286066584, "grad_norm": 0.04236002638936043, "learning_rate": 0.01, "loss": 1.9452, "step": 124224 }, { "epoch": 12.764796547472256, "grad_norm": 0.0928228497505188, "learning_rate": 0.01, "loss": 1.9164, "step": 124227 }, { "epoch": 12.765104808877929, "grad_norm": 0.05422854796051979, "learning_rate": 0.01, "loss": 1.9384, "step": 124230 }, { "epoch": 12.7654130702836, "grad_norm": 0.04374406486749649, "learning_rate": 0.01, "loss": 1.9099, "step": 124233 }, { "epoch": 12.765721331689273, "grad_norm": 0.03411633148789406, "learning_rate": 0.01, "loss": 1.9613, "step": 124236 }, { "epoch": 12.766029593094945, "grad_norm": 0.03351146727800369, "learning_rate": 0.01, "loss": 1.9309, "step": 124239 }, { "epoch": 12.766337854500616, "grad_norm": 0.07590338587760925, "learning_rate": 0.01, "loss": 1.917, "step": 124242 }, { "epoch": 12.766646115906289, "grad_norm": 0.11562327295541763, "learning_rate": 0.01, "loss": 1.9406, "step": 124245 }, { "epoch": 12.766954377311961, "grad_norm": 0.10438881814479828, "learning_rate": 0.01, "loss": 1.9235, "step": 124248 }, { "epoch": 12.767262638717632, "grad_norm": 0.03831292316317558, "learning_rate": 0.01, "loss": 1.9325, "step": 124251 }, { "epoch": 12.767570900123305, "grad_norm": 0.08192018419504166, "learning_rate": 0.01, "loss": 1.9363, "step": 124254 }, { "epoch": 12.767879161528978, "grad_norm": 0.060927681624889374, "learning_rate": 0.01, "loss": 1.954, "step": 124257 }, { "epoch": 12.768187422934648, "grad_norm": 0.1201568990945816, "learning_rate": 0.01, "loss": 1.9161, "step": 124260 }, { "epoch": 12.768495684340321, "grad_norm": 0.03554629161953926, "learning_rate": 0.01, "loss": 1.936, "step": 124263 }, { "epoch": 12.768803945745992, "grad_norm": 0.0523088201880455, "learning_rate": 0.01, "loss": 1.9479, "step": 124266 }, { "epoch": 12.769112207151665, "grad_norm": 0.04624674469232559, "learning_rate": 0.01, "loss": 1.9253, "step": 124269 }, { "epoch": 12.769420468557337, "grad_norm": 0.0483710952103138, "learning_rate": 0.01, "loss": 1.9191, "step": 124272 }, { "epoch": 12.769728729963008, "grad_norm": 0.06333783268928528, "learning_rate": 0.01, "loss": 1.9375, "step": 124275 }, { "epoch": 12.77003699136868, "grad_norm": 0.036787934601306915, "learning_rate": 0.01, "loss": 1.9254, "step": 124278 }, { "epoch": 12.770345252774353, "grad_norm": 0.08958521485328674, "learning_rate": 0.01, "loss": 1.9378, "step": 124281 }, { "epoch": 12.770653514180024, "grad_norm": 0.05499611794948578, "learning_rate": 0.01, "loss": 1.9301, "step": 124284 }, { "epoch": 12.770961775585697, "grad_norm": 0.09917958825826645, "learning_rate": 0.01, "loss": 1.9258, "step": 124287 }, { "epoch": 12.77127003699137, "grad_norm": 0.05552750453352928, "learning_rate": 0.01, "loss": 1.9363, "step": 124290 }, { "epoch": 12.77157829839704, "grad_norm": 0.11691426485776901, "learning_rate": 0.01, "loss": 1.9251, "step": 124293 }, { "epoch": 12.771886559802713, "grad_norm": 0.050462447106838226, "learning_rate": 0.01, "loss": 1.9205, "step": 124296 }, { "epoch": 12.772194821208384, "grad_norm": 0.060034796595573425, "learning_rate": 0.01, "loss": 1.9512, "step": 124299 }, { "epoch": 12.772503082614056, "grad_norm": 0.052780505269765854, "learning_rate": 0.01, "loss": 1.9615, "step": 124302 }, { "epoch": 12.772811344019729, "grad_norm": 0.044544320553541183, "learning_rate": 0.01, "loss": 1.9047, "step": 124305 }, { "epoch": 12.7731196054254, "grad_norm": 0.048675406724214554, "learning_rate": 0.01, "loss": 1.9551, "step": 124308 }, { "epoch": 12.773427866831073, "grad_norm": 0.03589058294892311, "learning_rate": 0.01, "loss": 1.9189, "step": 124311 }, { "epoch": 12.773736128236745, "grad_norm": 0.04180203750729561, "learning_rate": 0.01, "loss": 1.9195, "step": 124314 }, { "epoch": 12.774044389642416, "grad_norm": 0.034893400967121124, "learning_rate": 0.01, "loss": 1.908, "step": 124317 }, { "epoch": 12.774352651048089, "grad_norm": 0.03470678627490997, "learning_rate": 0.01, "loss": 1.9073, "step": 124320 }, { "epoch": 12.774660912453761, "grad_norm": 0.06290122121572495, "learning_rate": 0.01, "loss": 1.9252, "step": 124323 }, { "epoch": 12.774969173859432, "grad_norm": 0.10993147641420364, "learning_rate": 0.01, "loss": 1.9432, "step": 124326 }, { "epoch": 12.775277435265105, "grad_norm": 0.045845333486795425, "learning_rate": 0.01, "loss": 1.9388, "step": 124329 }, { "epoch": 12.775585696670777, "grad_norm": 0.042101263999938965, "learning_rate": 0.01, "loss": 1.9348, "step": 124332 }, { "epoch": 12.775893958076448, "grad_norm": 0.04230468347668648, "learning_rate": 0.01, "loss": 1.9258, "step": 124335 }, { "epoch": 12.776202219482121, "grad_norm": 0.0595904216170311, "learning_rate": 0.01, "loss": 1.9317, "step": 124338 }, { "epoch": 12.776510480887794, "grad_norm": 0.03835110738873482, "learning_rate": 0.01, "loss": 1.9497, "step": 124341 }, { "epoch": 12.776818742293464, "grad_norm": 0.09960649907588959, "learning_rate": 0.01, "loss": 1.9272, "step": 124344 }, { "epoch": 12.777127003699137, "grad_norm": 0.04961574450135231, "learning_rate": 0.01, "loss": 1.9486, "step": 124347 }, { "epoch": 12.77743526510481, "grad_norm": 0.07127340137958527, "learning_rate": 0.01, "loss": 1.9166, "step": 124350 }, { "epoch": 12.77774352651048, "grad_norm": 0.10124164074659348, "learning_rate": 0.01, "loss": 1.9439, "step": 124353 }, { "epoch": 12.778051787916153, "grad_norm": 0.0917411595582962, "learning_rate": 0.01, "loss": 1.9495, "step": 124356 }, { "epoch": 12.778360049321824, "grad_norm": 0.06239021196961403, "learning_rate": 0.01, "loss": 1.9291, "step": 124359 }, { "epoch": 12.778668310727497, "grad_norm": 0.0381314717233181, "learning_rate": 0.01, "loss": 1.9552, "step": 124362 }, { "epoch": 12.77897657213317, "grad_norm": 0.060684528201818466, "learning_rate": 0.01, "loss": 1.9129, "step": 124365 }, { "epoch": 12.77928483353884, "grad_norm": 0.04324139282107353, "learning_rate": 0.01, "loss": 1.9423, "step": 124368 }, { "epoch": 12.779593094944513, "grad_norm": 0.03981003910303116, "learning_rate": 0.01, "loss": 1.943, "step": 124371 }, { "epoch": 12.779901356350186, "grad_norm": 0.044959403574466705, "learning_rate": 0.01, "loss": 1.9097, "step": 124374 }, { "epoch": 12.780209617755856, "grad_norm": 0.04338802024722099, "learning_rate": 0.01, "loss": 1.9337, "step": 124377 }, { "epoch": 12.780517879161529, "grad_norm": 0.06274119764566422, "learning_rate": 0.01, "loss": 1.9185, "step": 124380 }, { "epoch": 12.780826140567202, "grad_norm": 0.10936722904443741, "learning_rate": 0.01, "loss": 1.9555, "step": 124383 }, { "epoch": 12.781134401972873, "grad_norm": 0.05914433300495148, "learning_rate": 0.01, "loss": 1.9332, "step": 124386 }, { "epoch": 12.781442663378545, "grad_norm": 0.047597043216228485, "learning_rate": 0.01, "loss": 1.9226, "step": 124389 }, { "epoch": 12.781750924784218, "grad_norm": 0.03429289162158966, "learning_rate": 0.01, "loss": 1.9494, "step": 124392 }, { "epoch": 12.782059186189889, "grad_norm": 0.040711697190999985, "learning_rate": 0.01, "loss": 1.9348, "step": 124395 }, { "epoch": 12.782367447595561, "grad_norm": 0.08311277627944946, "learning_rate": 0.01, "loss": 1.9395, "step": 124398 }, { "epoch": 12.782675709001232, "grad_norm": 0.1344379186630249, "learning_rate": 0.01, "loss": 1.9332, "step": 124401 }, { "epoch": 12.782983970406905, "grad_norm": 0.13270977139472961, "learning_rate": 0.01, "loss": 1.9485, "step": 124404 }, { "epoch": 12.783292231812577, "grad_norm": 0.12600675225257874, "learning_rate": 0.01, "loss": 1.9301, "step": 124407 }, { "epoch": 12.783600493218248, "grad_norm": 0.052186086773872375, "learning_rate": 0.01, "loss": 1.942, "step": 124410 }, { "epoch": 12.783908754623921, "grad_norm": 0.06286251544952393, "learning_rate": 0.01, "loss": 1.9259, "step": 124413 }, { "epoch": 12.784217016029594, "grad_norm": 0.09777403622865677, "learning_rate": 0.01, "loss": 1.919, "step": 124416 }, { "epoch": 12.784525277435264, "grad_norm": 0.03839965909719467, "learning_rate": 0.01, "loss": 1.948, "step": 124419 }, { "epoch": 12.784833538840937, "grad_norm": 0.06372542679309845, "learning_rate": 0.01, "loss": 1.931, "step": 124422 }, { "epoch": 12.78514180024661, "grad_norm": 0.06057681143283844, "learning_rate": 0.01, "loss": 1.9414, "step": 124425 }, { "epoch": 12.78545006165228, "grad_norm": 0.13042236864566803, "learning_rate": 0.01, "loss": 1.941, "step": 124428 }, { "epoch": 12.785758323057953, "grad_norm": 0.1508304327726364, "learning_rate": 0.01, "loss": 1.9605, "step": 124431 }, { "epoch": 12.786066584463626, "grad_norm": 0.09157309681177139, "learning_rate": 0.01, "loss": 1.9359, "step": 124434 }, { "epoch": 12.786374845869297, "grad_norm": 0.0452297180891037, "learning_rate": 0.01, "loss": 1.9212, "step": 124437 }, { "epoch": 12.78668310727497, "grad_norm": 0.11927188187837601, "learning_rate": 0.01, "loss": 1.9279, "step": 124440 }, { "epoch": 12.786991368680642, "grad_norm": 0.04036303982138634, "learning_rate": 0.01, "loss": 1.9272, "step": 124443 }, { "epoch": 12.787299630086313, "grad_norm": 0.057232797145843506, "learning_rate": 0.01, "loss": 1.9377, "step": 124446 }, { "epoch": 12.787607891491986, "grad_norm": 0.058794617652893066, "learning_rate": 0.01, "loss": 1.9421, "step": 124449 }, { "epoch": 12.787916152897658, "grad_norm": 0.06803694367408752, "learning_rate": 0.01, "loss": 1.9364, "step": 124452 }, { "epoch": 12.788224414303329, "grad_norm": 0.05735660716891289, "learning_rate": 0.01, "loss": 1.9357, "step": 124455 }, { "epoch": 12.788532675709002, "grad_norm": 0.059744693338871, "learning_rate": 0.01, "loss": 1.9071, "step": 124458 }, { "epoch": 12.788840937114673, "grad_norm": 0.05062204971909523, "learning_rate": 0.01, "loss": 1.9307, "step": 124461 }, { "epoch": 12.789149198520345, "grad_norm": 0.04351005703210831, "learning_rate": 0.01, "loss": 1.9226, "step": 124464 }, { "epoch": 12.789457459926018, "grad_norm": 0.034482888877391815, "learning_rate": 0.01, "loss": 1.9234, "step": 124467 }, { "epoch": 12.789765721331689, "grad_norm": 0.13502463698387146, "learning_rate": 0.01, "loss": 1.9324, "step": 124470 }, { "epoch": 12.790073982737361, "grad_norm": 0.07608455419540405, "learning_rate": 0.01, "loss": 1.9152, "step": 124473 }, { "epoch": 12.790382244143034, "grad_norm": 0.08391346037387848, "learning_rate": 0.01, "loss": 1.894, "step": 124476 }, { "epoch": 12.790690505548705, "grad_norm": 0.1317441761493683, "learning_rate": 0.01, "loss": 1.9312, "step": 124479 }, { "epoch": 12.790998766954377, "grad_norm": 0.13802526891231537, "learning_rate": 0.01, "loss": 1.925, "step": 124482 }, { "epoch": 12.79130702836005, "grad_norm": 0.11871980130672455, "learning_rate": 0.01, "loss": 1.9535, "step": 124485 }, { "epoch": 12.791615289765721, "grad_norm": 0.06530909240245819, "learning_rate": 0.01, "loss": 1.9305, "step": 124488 }, { "epoch": 12.791923551171394, "grad_norm": 0.06150613725185394, "learning_rate": 0.01, "loss": 1.9406, "step": 124491 }, { "epoch": 12.792231812577064, "grad_norm": 0.04338151589035988, "learning_rate": 0.01, "loss": 1.9276, "step": 124494 }, { "epoch": 12.792540073982737, "grad_norm": 0.05015654116868973, "learning_rate": 0.01, "loss": 1.9318, "step": 124497 }, { "epoch": 12.79284833538841, "grad_norm": 0.0415099561214447, "learning_rate": 0.01, "loss": 1.9476, "step": 124500 }, { "epoch": 12.79315659679408, "grad_norm": 0.0535995252430439, "learning_rate": 0.01, "loss": 1.9216, "step": 124503 }, { "epoch": 12.793464858199753, "grad_norm": 0.13396979868412018, "learning_rate": 0.01, "loss": 1.9611, "step": 124506 }, { "epoch": 12.793773119605426, "grad_norm": 0.04803071543574333, "learning_rate": 0.01, "loss": 1.9501, "step": 124509 }, { "epoch": 12.794081381011097, "grad_norm": 0.07891876995563507, "learning_rate": 0.01, "loss": 1.9401, "step": 124512 }, { "epoch": 12.79438964241677, "grad_norm": 0.05627993866801262, "learning_rate": 0.01, "loss": 1.9238, "step": 124515 }, { "epoch": 12.794697903822442, "grad_norm": 0.10919144004583359, "learning_rate": 0.01, "loss": 1.9436, "step": 124518 }, { "epoch": 12.795006165228113, "grad_norm": 0.04701576009392738, "learning_rate": 0.01, "loss": 1.9536, "step": 124521 }, { "epoch": 12.795314426633785, "grad_norm": 0.08555266261100769, "learning_rate": 0.01, "loss": 1.9574, "step": 124524 }, { "epoch": 12.795622688039458, "grad_norm": 0.042641106992959976, "learning_rate": 0.01, "loss": 1.9521, "step": 124527 }, { "epoch": 12.795930949445129, "grad_norm": 0.09084159135818481, "learning_rate": 0.01, "loss": 1.939, "step": 124530 }, { "epoch": 12.796239210850802, "grad_norm": 0.06366157531738281, "learning_rate": 0.01, "loss": 1.9422, "step": 124533 }, { "epoch": 12.796547472256474, "grad_norm": 0.031330134719610214, "learning_rate": 0.01, "loss": 1.9579, "step": 124536 }, { "epoch": 12.796855733662145, "grad_norm": 0.03788667172193527, "learning_rate": 0.01, "loss": 1.9368, "step": 124539 }, { "epoch": 12.797163995067818, "grad_norm": 0.09419793635606766, "learning_rate": 0.01, "loss": 1.9245, "step": 124542 }, { "epoch": 12.79747225647349, "grad_norm": 0.061394162476062775, "learning_rate": 0.01, "loss": 1.9338, "step": 124545 }, { "epoch": 12.797780517879161, "grad_norm": 0.12643764913082123, "learning_rate": 0.01, "loss": 1.9004, "step": 124548 }, { "epoch": 12.798088779284834, "grad_norm": 0.042745135724544525, "learning_rate": 0.01, "loss": 1.9234, "step": 124551 }, { "epoch": 12.798397040690505, "grad_norm": 0.03733081370592117, "learning_rate": 0.01, "loss": 1.9512, "step": 124554 }, { "epoch": 12.798705302096177, "grad_norm": 0.06673793494701385, "learning_rate": 0.01, "loss": 1.9404, "step": 124557 }, { "epoch": 12.79901356350185, "grad_norm": 0.07428089529275894, "learning_rate": 0.01, "loss": 1.9256, "step": 124560 }, { "epoch": 12.799321824907521, "grad_norm": 0.17655010521411896, "learning_rate": 0.01, "loss": 1.9543, "step": 124563 }, { "epoch": 12.799630086313194, "grad_norm": 0.09538134187459946, "learning_rate": 0.01, "loss": 1.9588, "step": 124566 }, { "epoch": 12.799938347718866, "grad_norm": 0.07083272188901901, "learning_rate": 0.01, "loss": 1.9149, "step": 124569 }, { "epoch": 12.800246609124537, "grad_norm": 0.10523665696382523, "learning_rate": 0.01, "loss": 1.9529, "step": 124572 }, { "epoch": 12.80055487053021, "grad_norm": 0.07451805472373962, "learning_rate": 0.01, "loss": 1.9188, "step": 124575 }, { "epoch": 12.800863131935882, "grad_norm": 0.04025748744606972, "learning_rate": 0.01, "loss": 1.9589, "step": 124578 }, { "epoch": 12.801171393341553, "grad_norm": 0.03501112014055252, "learning_rate": 0.01, "loss": 1.9012, "step": 124581 }, { "epoch": 12.801479654747226, "grad_norm": 0.045197147876024246, "learning_rate": 0.01, "loss": 1.934, "step": 124584 }, { "epoch": 12.801787916152898, "grad_norm": 0.03901546820998192, "learning_rate": 0.01, "loss": 1.9494, "step": 124587 }, { "epoch": 12.80209617755857, "grad_norm": 0.043865401297807693, "learning_rate": 0.01, "loss": 1.932, "step": 124590 }, { "epoch": 12.802404438964242, "grad_norm": 0.03975968062877655, "learning_rate": 0.01, "loss": 1.9199, "step": 124593 }, { "epoch": 12.802712700369913, "grad_norm": 0.05233794450759888, "learning_rate": 0.01, "loss": 1.9495, "step": 124596 }, { "epoch": 12.803020961775585, "grad_norm": 0.049878280609846115, "learning_rate": 0.01, "loss": 1.9215, "step": 124599 }, { "epoch": 12.803329223181258, "grad_norm": 0.07584605365991592, "learning_rate": 0.01, "loss": 1.9545, "step": 124602 }, { "epoch": 12.803637484586929, "grad_norm": 0.1152682900428772, "learning_rate": 0.01, "loss": 1.9484, "step": 124605 }, { "epoch": 12.803945745992602, "grad_norm": 0.0415656641125679, "learning_rate": 0.01, "loss": 1.9176, "step": 124608 }, { "epoch": 12.804254007398274, "grad_norm": 0.0453491173684597, "learning_rate": 0.01, "loss": 1.9307, "step": 124611 }, { "epoch": 12.804562268803945, "grad_norm": 0.04551032930612564, "learning_rate": 0.01, "loss": 1.9521, "step": 124614 }, { "epoch": 12.804870530209618, "grad_norm": 0.04969335347414017, "learning_rate": 0.01, "loss": 1.9409, "step": 124617 }, { "epoch": 12.80517879161529, "grad_norm": 0.195338174700737, "learning_rate": 0.01, "loss": 1.9243, "step": 124620 }, { "epoch": 12.805487053020961, "grad_norm": 0.0799102634191513, "learning_rate": 0.01, "loss": 1.9207, "step": 124623 }, { "epoch": 12.805795314426634, "grad_norm": 0.06339021027088165, "learning_rate": 0.01, "loss": 1.9311, "step": 124626 }, { "epoch": 12.806103575832307, "grad_norm": 0.0930168554186821, "learning_rate": 0.01, "loss": 1.921, "step": 124629 }, { "epoch": 12.806411837237977, "grad_norm": 0.0932261198759079, "learning_rate": 0.01, "loss": 1.941, "step": 124632 }, { "epoch": 12.80672009864365, "grad_norm": 0.07090446352958679, "learning_rate": 0.01, "loss": 1.93, "step": 124635 }, { "epoch": 12.807028360049323, "grad_norm": 0.07370625436306, "learning_rate": 0.01, "loss": 1.928, "step": 124638 }, { "epoch": 12.807336621454994, "grad_norm": 0.057869769632816315, "learning_rate": 0.01, "loss": 1.9229, "step": 124641 }, { "epoch": 12.807644882860666, "grad_norm": 0.04020111635327339, "learning_rate": 0.01, "loss": 1.9383, "step": 124644 }, { "epoch": 12.807953144266339, "grad_norm": 0.04522833973169327, "learning_rate": 0.01, "loss": 1.9297, "step": 124647 }, { "epoch": 12.80826140567201, "grad_norm": 0.0788583904504776, "learning_rate": 0.01, "loss": 1.9594, "step": 124650 }, { "epoch": 12.808569667077682, "grad_norm": 0.05737563967704773, "learning_rate": 0.01, "loss": 1.9716, "step": 124653 }, { "epoch": 12.808877928483353, "grad_norm": 0.05475354939699173, "learning_rate": 0.01, "loss": 1.9361, "step": 124656 }, { "epoch": 12.809186189889026, "grad_norm": 0.04790017381310463, "learning_rate": 0.01, "loss": 1.9372, "step": 124659 }, { "epoch": 12.809494451294698, "grad_norm": 0.07053875923156738, "learning_rate": 0.01, "loss": 1.9267, "step": 124662 }, { "epoch": 12.80980271270037, "grad_norm": 0.185977041721344, "learning_rate": 0.01, "loss": 1.9258, "step": 124665 }, { "epoch": 12.810110974106042, "grad_norm": 0.03858617693185806, "learning_rate": 0.01, "loss": 1.9552, "step": 124668 }, { "epoch": 12.810419235511715, "grad_norm": 0.044938284903764725, "learning_rate": 0.01, "loss": 1.9207, "step": 124671 }, { "epoch": 12.810727496917385, "grad_norm": 0.03785492107272148, "learning_rate": 0.01, "loss": 1.9445, "step": 124674 }, { "epoch": 12.811035758323058, "grad_norm": 0.0343979112803936, "learning_rate": 0.01, "loss": 1.9539, "step": 124677 }, { "epoch": 12.81134401972873, "grad_norm": 0.0346965529024601, "learning_rate": 0.01, "loss": 1.9365, "step": 124680 }, { "epoch": 12.811652281134402, "grad_norm": 0.054480504244565964, "learning_rate": 0.01, "loss": 1.9611, "step": 124683 }, { "epoch": 12.811960542540074, "grad_norm": 0.03773632273077965, "learning_rate": 0.01, "loss": 1.9374, "step": 124686 }, { "epoch": 12.812268803945745, "grad_norm": 0.10955578088760376, "learning_rate": 0.01, "loss": 1.937, "step": 124689 }, { "epoch": 12.812577065351418, "grad_norm": 0.159774050116539, "learning_rate": 0.01, "loss": 1.9401, "step": 124692 }, { "epoch": 12.81288532675709, "grad_norm": 0.13952012360095978, "learning_rate": 0.01, "loss": 1.94, "step": 124695 }, { "epoch": 12.813193588162761, "grad_norm": 0.05918041989207268, "learning_rate": 0.01, "loss": 1.9512, "step": 124698 }, { "epoch": 12.813501849568434, "grad_norm": 0.04900019243359566, "learning_rate": 0.01, "loss": 1.9236, "step": 124701 }, { "epoch": 12.813810110974106, "grad_norm": 0.055214982479810715, "learning_rate": 0.01, "loss": 1.9436, "step": 124704 }, { "epoch": 12.814118372379777, "grad_norm": 0.03945809230208397, "learning_rate": 0.01, "loss": 1.9224, "step": 124707 }, { "epoch": 12.81442663378545, "grad_norm": 0.07341895997524261, "learning_rate": 0.01, "loss": 1.914, "step": 124710 }, { "epoch": 12.814734895191123, "grad_norm": 0.07844100892543793, "learning_rate": 0.01, "loss": 1.9423, "step": 124713 }, { "epoch": 12.815043156596793, "grad_norm": 0.04210912436246872, "learning_rate": 0.01, "loss": 1.9281, "step": 124716 }, { "epoch": 12.815351418002466, "grad_norm": 0.04094111919403076, "learning_rate": 0.01, "loss": 1.941, "step": 124719 }, { "epoch": 12.815659679408139, "grad_norm": 0.038272298872470856, "learning_rate": 0.01, "loss": 1.9319, "step": 124722 }, { "epoch": 12.81596794081381, "grad_norm": 0.04964732751250267, "learning_rate": 0.01, "loss": 1.9551, "step": 124725 }, { "epoch": 12.816276202219482, "grad_norm": 0.16506518423557281, "learning_rate": 0.01, "loss": 1.9404, "step": 124728 }, { "epoch": 12.816584463625155, "grad_norm": 0.05684073269367218, "learning_rate": 0.01, "loss": 1.9391, "step": 124731 }, { "epoch": 12.816892725030826, "grad_norm": 0.05659734830260277, "learning_rate": 0.01, "loss": 1.9269, "step": 124734 }, { "epoch": 12.817200986436498, "grad_norm": 0.04733361303806305, "learning_rate": 0.01, "loss": 1.9498, "step": 124737 }, { "epoch": 12.817509247842171, "grad_norm": 0.035153668373823166, "learning_rate": 0.01, "loss": 1.9168, "step": 124740 }, { "epoch": 12.817817509247842, "grad_norm": 0.07535239309072495, "learning_rate": 0.01, "loss": 1.9297, "step": 124743 }, { "epoch": 12.818125770653515, "grad_norm": 0.07167541980743408, "learning_rate": 0.01, "loss": 1.9048, "step": 124746 }, { "epoch": 12.818434032059185, "grad_norm": 0.03314826637506485, "learning_rate": 0.01, "loss": 1.959, "step": 124749 }, { "epoch": 12.818742293464858, "grad_norm": 0.1441473662853241, "learning_rate": 0.01, "loss": 1.9444, "step": 124752 }, { "epoch": 12.81905055487053, "grad_norm": 0.10658802092075348, "learning_rate": 0.01, "loss": 1.9181, "step": 124755 }, { "epoch": 12.819358816276202, "grad_norm": 0.03933339938521385, "learning_rate": 0.01, "loss": 1.9281, "step": 124758 }, { "epoch": 12.819667077681874, "grad_norm": 0.08993334323167801, "learning_rate": 0.01, "loss": 1.9417, "step": 124761 }, { "epoch": 12.819975339087547, "grad_norm": 0.08880502730607986, "learning_rate": 0.01, "loss": 1.9381, "step": 124764 }, { "epoch": 12.820283600493218, "grad_norm": 0.09799978137016296, "learning_rate": 0.01, "loss": 1.9416, "step": 124767 }, { "epoch": 12.82059186189889, "grad_norm": 0.05939950421452522, "learning_rate": 0.01, "loss": 1.9327, "step": 124770 }, { "epoch": 12.820900123304563, "grad_norm": 0.036682989448308945, "learning_rate": 0.01, "loss": 1.9394, "step": 124773 }, { "epoch": 12.821208384710234, "grad_norm": 0.034832343459129333, "learning_rate": 0.01, "loss": 1.9343, "step": 124776 }, { "epoch": 12.821516646115906, "grad_norm": 0.05991778522729874, "learning_rate": 0.01, "loss": 1.9421, "step": 124779 }, { "epoch": 12.821824907521579, "grad_norm": 0.10300806164741516, "learning_rate": 0.01, "loss": 1.9492, "step": 124782 }, { "epoch": 12.82213316892725, "grad_norm": 0.06465953588485718, "learning_rate": 0.01, "loss": 1.9418, "step": 124785 }, { "epoch": 12.822441430332923, "grad_norm": 0.04358251765370369, "learning_rate": 0.01, "loss": 1.9199, "step": 124788 }, { "epoch": 12.822749691738593, "grad_norm": 0.091355100274086, "learning_rate": 0.01, "loss": 1.9338, "step": 124791 }, { "epoch": 12.823057953144266, "grad_norm": 0.0732480138540268, "learning_rate": 0.01, "loss": 1.9462, "step": 124794 }, { "epoch": 12.823366214549939, "grad_norm": 0.07829828560352325, "learning_rate": 0.01, "loss": 1.941, "step": 124797 }, { "epoch": 12.82367447595561, "grad_norm": 0.08272802084684372, "learning_rate": 0.01, "loss": 1.9118, "step": 124800 }, { "epoch": 12.823982737361282, "grad_norm": 0.10924140363931656, "learning_rate": 0.01, "loss": 1.9152, "step": 124803 }, { "epoch": 12.824290998766955, "grad_norm": 0.044341158121824265, "learning_rate": 0.01, "loss": 1.9521, "step": 124806 }, { "epoch": 12.824599260172626, "grad_norm": 0.06467002630233765, "learning_rate": 0.01, "loss": 1.9123, "step": 124809 }, { "epoch": 12.824907521578298, "grad_norm": 0.09542547166347504, "learning_rate": 0.01, "loss": 1.9518, "step": 124812 }, { "epoch": 12.825215782983971, "grad_norm": 0.0513361319899559, "learning_rate": 0.01, "loss": 1.9616, "step": 124815 }, { "epoch": 12.825524044389642, "grad_norm": 0.03738987445831299, "learning_rate": 0.01, "loss": 1.9306, "step": 124818 }, { "epoch": 12.825832305795315, "grad_norm": 0.04817575588822365, "learning_rate": 0.01, "loss": 1.9333, "step": 124821 }, { "epoch": 12.826140567200987, "grad_norm": 0.045744240283966064, "learning_rate": 0.01, "loss": 1.9586, "step": 124824 }, { "epoch": 12.826448828606658, "grad_norm": 0.04282943159341812, "learning_rate": 0.01, "loss": 1.9312, "step": 124827 }, { "epoch": 12.82675709001233, "grad_norm": 0.05912986397743225, "learning_rate": 0.01, "loss": 1.943, "step": 124830 }, { "epoch": 12.827065351418003, "grad_norm": 0.036798182874917984, "learning_rate": 0.01, "loss": 1.9204, "step": 124833 }, { "epoch": 12.827373612823674, "grad_norm": 0.048573464155197144, "learning_rate": 0.01, "loss": 1.9189, "step": 124836 }, { "epoch": 12.827681874229347, "grad_norm": 0.12525862455368042, "learning_rate": 0.01, "loss": 1.9369, "step": 124839 }, { "epoch": 12.827990135635018, "grad_norm": 0.12680740654468536, "learning_rate": 0.01, "loss": 1.9321, "step": 124842 }, { "epoch": 12.82829839704069, "grad_norm": 0.10475718975067139, "learning_rate": 0.01, "loss": 1.8857, "step": 124845 }, { "epoch": 12.828606658446363, "grad_norm": 0.03754694387316704, "learning_rate": 0.01, "loss": 1.922, "step": 124848 }, { "epoch": 12.828914919852034, "grad_norm": 0.03662050515413284, "learning_rate": 0.01, "loss": 1.9305, "step": 124851 }, { "epoch": 12.829223181257706, "grad_norm": 0.0618102066218853, "learning_rate": 0.01, "loss": 1.9273, "step": 124854 }, { "epoch": 12.829531442663379, "grad_norm": 0.03677065297961235, "learning_rate": 0.01, "loss": 1.9523, "step": 124857 }, { "epoch": 12.82983970406905, "grad_norm": 0.032928451895713806, "learning_rate": 0.01, "loss": 1.9482, "step": 124860 }, { "epoch": 12.830147965474723, "grad_norm": 0.05401785299181938, "learning_rate": 0.01, "loss": 1.9151, "step": 124863 }, { "epoch": 12.830456226880395, "grad_norm": 0.08427628874778748, "learning_rate": 0.01, "loss": 1.9448, "step": 124866 }, { "epoch": 12.830764488286066, "grad_norm": 0.06006439030170441, "learning_rate": 0.01, "loss": 1.9476, "step": 124869 }, { "epoch": 12.831072749691739, "grad_norm": 0.05316640064120293, "learning_rate": 0.01, "loss": 1.9088, "step": 124872 }, { "epoch": 12.831381011097411, "grad_norm": 0.0373900830745697, "learning_rate": 0.01, "loss": 1.9239, "step": 124875 }, { "epoch": 12.831689272503082, "grad_norm": 0.05033502355217934, "learning_rate": 0.01, "loss": 1.9313, "step": 124878 }, { "epoch": 12.831997533908755, "grad_norm": 0.06176524981856346, "learning_rate": 0.01, "loss": 1.9344, "step": 124881 }, { "epoch": 12.832305795314426, "grad_norm": 0.038508642464876175, "learning_rate": 0.01, "loss": 1.93, "step": 124884 }, { "epoch": 12.832614056720098, "grad_norm": 0.165926992893219, "learning_rate": 0.01, "loss": 1.9313, "step": 124887 }, { "epoch": 12.832922318125771, "grad_norm": 0.05937818065285683, "learning_rate": 0.01, "loss": 1.9444, "step": 124890 }, { "epoch": 12.833230579531442, "grad_norm": 0.04595981538295746, "learning_rate": 0.01, "loss": 1.93, "step": 124893 }, { "epoch": 12.833538840937115, "grad_norm": 0.04599396139383316, "learning_rate": 0.01, "loss": 1.9228, "step": 124896 }, { "epoch": 12.833847102342787, "grad_norm": 0.03681658208370209, "learning_rate": 0.01, "loss": 1.9456, "step": 124899 }, { "epoch": 12.834155363748458, "grad_norm": 0.04873548820614815, "learning_rate": 0.01, "loss": 1.9447, "step": 124902 }, { "epoch": 12.83446362515413, "grad_norm": 0.04858270660042763, "learning_rate": 0.01, "loss": 1.9315, "step": 124905 }, { "epoch": 12.834771886559803, "grad_norm": 0.0675751119852066, "learning_rate": 0.01, "loss": 1.9273, "step": 124908 }, { "epoch": 12.835080147965474, "grad_norm": 0.09886086732149124, "learning_rate": 0.01, "loss": 1.9444, "step": 124911 }, { "epoch": 12.835388409371147, "grad_norm": 0.08863268047571182, "learning_rate": 0.01, "loss": 1.9518, "step": 124914 }, { "epoch": 12.83569667077682, "grad_norm": 0.08596405386924744, "learning_rate": 0.01, "loss": 1.9265, "step": 124917 }, { "epoch": 12.83600493218249, "grad_norm": 0.11636263132095337, "learning_rate": 0.01, "loss": 1.9423, "step": 124920 }, { "epoch": 12.836313193588163, "grad_norm": 0.07743874937295914, "learning_rate": 0.01, "loss": 1.9371, "step": 124923 }, { "epoch": 12.836621454993836, "grad_norm": 0.11719123274087906, "learning_rate": 0.01, "loss": 1.9406, "step": 124926 }, { "epoch": 12.836929716399506, "grad_norm": 0.047170255333185196, "learning_rate": 0.01, "loss": 1.9349, "step": 124929 }, { "epoch": 12.837237977805179, "grad_norm": 0.10026972740888596, "learning_rate": 0.01, "loss": 1.9339, "step": 124932 }, { "epoch": 12.837546239210852, "grad_norm": 0.051657043397426605, "learning_rate": 0.01, "loss": 1.923, "step": 124935 }, { "epoch": 12.837854500616523, "grad_norm": 0.11636074632406235, "learning_rate": 0.01, "loss": 1.9039, "step": 124938 }, { "epoch": 12.838162762022195, "grad_norm": 0.03851227089762688, "learning_rate": 0.01, "loss": 1.9369, "step": 124941 }, { "epoch": 12.838471023427866, "grad_norm": 0.04746010899543762, "learning_rate": 0.01, "loss": 1.9329, "step": 124944 }, { "epoch": 12.838779284833539, "grad_norm": 0.06377697736024857, "learning_rate": 0.01, "loss": 1.9335, "step": 124947 }, { "epoch": 12.839087546239211, "grad_norm": 0.06227300688624382, "learning_rate": 0.01, "loss": 1.937, "step": 124950 }, { "epoch": 12.839395807644882, "grad_norm": 0.040766604244709015, "learning_rate": 0.01, "loss": 1.9307, "step": 124953 }, { "epoch": 12.839704069050555, "grad_norm": 0.06182684004306793, "learning_rate": 0.01, "loss": 1.9399, "step": 124956 }, { "epoch": 12.840012330456227, "grad_norm": 0.04518676921725273, "learning_rate": 0.01, "loss": 1.9029, "step": 124959 }, { "epoch": 12.840320591861898, "grad_norm": 0.05657326802611351, "learning_rate": 0.01, "loss": 1.9209, "step": 124962 }, { "epoch": 12.840628853267571, "grad_norm": 0.08930912613868713, "learning_rate": 0.01, "loss": 1.9096, "step": 124965 }, { "epoch": 12.840937114673244, "grad_norm": 0.05096893385052681, "learning_rate": 0.01, "loss": 1.9195, "step": 124968 }, { "epoch": 12.841245376078914, "grad_norm": 0.08593285828828812, "learning_rate": 0.01, "loss": 1.911, "step": 124971 }, { "epoch": 12.841553637484587, "grad_norm": 0.03875938430428505, "learning_rate": 0.01, "loss": 1.9266, "step": 124974 }, { "epoch": 12.84186189889026, "grad_norm": 0.11568406969308853, "learning_rate": 0.01, "loss": 1.9476, "step": 124977 }, { "epoch": 12.84217016029593, "grad_norm": 0.08995538204908371, "learning_rate": 0.01, "loss": 1.9067, "step": 124980 }, { "epoch": 12.842478421701603, "grad_norm": 0.06632540374994278, "learning_rate": 0.01, "loss": 1.952, "step": 124983 }, { "epoch": 12.842786683107274, "grad_norm": 0.06590662896633148, "learning_rate": 0.01, "loss": 1.9338, "step": 124986 }, { "epoch": 12.843094944512947, "grad_norm": 0.16798827052116394, "learning_rate": 0.01, "loss": 1.9411, "step": 124989 }, { "epoch": 12.84340320591862, "grad_norm": 0.13125044107437134, "learning_rate": 0.01, "loss": 1.9531, "step": 124992 }, { "epoch": 12.84371146732429, "grad_norm": 0.05619801953434944, "learning_rate": 0.01, "loss": 1.9351, "step": 124995 }, { "epoch": 12.844019728729963, "grad_norm": 0.03860598802566528, "learning_rate": 0.01, "loss": 1.9343, "step": 124998 }, { "epoch": 12.844327990135636, "grad_norm": 0.044260572642087936, "learning_rate": 0.01, "loss": 1.9495, "step": 125001 }, { "epoch": 12.844636251541306, "grad_norm": 0.04982342943549156, "learning_rate": 0.01, "loss": 1.9296, "step": 125004 }, { "epoch": 12.844944512946979, "grad_norm": 0.08060560375452042, "learning_rate": 0.01, "loss": 1.9201, "step": 125007 }, { "epoch": 12.845252774352652, "grad_norm": 0.043235816061496735, "learning_rate": 0.01, "loss": 1.901, "step": 125010 }, { "epoch": 12.845561035758323, "grad_norm": 0.05255037173628807, "learning_rate": 0.01, "loss": 1.9489, "step": 125013 }, { "epoch": 12.845869297163995, "grad_norm": 0.0638473629951477, "learning_rate": 0.01, "loss": 1.9313, "step": 125016 }, { "epoch": 12.846177558569668, "grad_norm": 0.038086581975221634, "learning_rate": 0.01, "loss": 1.9421, "step": 125019 }, { "epoch": 12.846485819975339, "grad_norm": 0.03848306089639664, "learning_rate": 0.01, "loss": 1.9227, "step": 125022 }, { "epoch": 12.846794081381011, "grad_norm": 0.08049533516168594, "learning_rate": 0.01, "loss": 1.9215, "step": 125025 }, { "epoch": 12.847102342786684, "grad_norm": 0.15486104786396027, "learning_rate": 0.01, "loss": 1.9356, "step": 125028 }, { "epoch": 12.847410604192355, "grad_norm": 0.07169552147388458, "learning_rate": 0.01, "loss": 1.942, "step": 125031 }, { "epoch": 12.847718865598027, "grad_norm": 0.040113043040037155, "learning_rate": 0.01, "loss": 1.9376, "step": 125034 }, { "epoch": 12.848027127003698, "grad_norm": 0.04267123341560364, "learning_rate": 0.01, "loss": 1.939, "step": 125037 }, { "epoch": 12.848335388409371, "grad_norm": 0.047227632254362106, "learning_rate": 0.01, "loss": 1.9269, "step": 125040 }, { "epoch": 12.848643649815044, "grad_norm": 0.03078641928732395, "learning_rate": 0.01, "loss": 1.9392, "step": 125043 }, { "epoch": 12.848951911220714, "grad_norm": 0.05143728852272034, "learning_rate": 0.01, "loss": 1.9664, "step": 125046 }, { "epoch": 12.849260172626387, "grad_norm": 0.04470685124397278, "learning_rate": 0.01, "loss": 1.9269, "step": 125049 }, { "epoch": 12.84956843403206, "grad_norm": 0.06949608027935028, "learning_rate": 0.01, "loss": 1.9205, "step": 125052 }, { "epoch": 12.84987669543773, "grad_norm": 0.06069021672010422, "learning_rate": 0.01, "loss": 1.9347, "step": 125055 }, { "epoch": 12.850184956843403, "grad_norm": 0.05496753752231598, "learning_rate": 0.01, "loss": 1.9228, "step": 125058 }, { "epoch": 12.850493218249076, "grad_norm": 0.04509497433900833, "learning_rate": 0.01, "loss": 1.9328, "step": 125061 }, { "epoch": 12.850801479654747, "grad_norm": 0.03167354315519333, "learning_rate": 0.01, "loss": 1.9297, "step": 125064 }, { "epoch": 12.85110974106042, "grad_norm": 0.033969614654779434, "learning_rate": 0.01, "loss": 1.9098, "step": 125067 }, { "epoch": 12.851418002466092, "grad_norm": 0.06193241477012634, "learning_rate": 0.01, "loss": 1.9381, "step": 125070 }, { "epoch": 12.851726263871763, "grad_norm": 0.037595510482788086, "learning_rate": 0.01, "loss": 1.9131, "step": 125073 }, { "epoch": 12.852034525277436, "grad_norm": 0.04467674344778061, "learning_rate": 0.01, "loss": 1.9402, "step": 125076 }, { "epoch": 12.852342786683106, "grad_norm": 0.04891891032457352, "learning_rate": 0.01, "loss": 1.9314, "step": 125079 }, { "epoch": 12.852651048088779, "grad_norm": 0.1778205782175064, "learning_rate": 0.01, "loss": 1.9426, "step": 125082 }, { "epoch": 12.852959309494452, "grad_norm": 0.139647975564003, "learning_rate": 0.01, "loss": 1.9247, "step": 125085 }, { "epoch": 12.853267570900123, "grad_norm": 0.06531763821840286, "learning_rate": 0.01, "loss": 1.9273, "step": 125088 }, { "epoch": 12.853575832305795, "grad_norm": 0.08372994512319565, "learning_rate": 0.01, "loss": 1.9183, "step": 125091 }, { "epoch": 12.853884093711468, "grad_norm": 0.07141024619340897, "learning_rate": 0.01, "loss": 1.9365, "step": 125094 }, { "epoch": 12.854192355117139, "grad_norm": 0.06588217616081238, "learning_rate": 0.01, "loss": 1.9391, "step": 125097 }, { "epoch": 12.854500616522811, "grad_norm": 0.059647466987371445, "learning_rate": 0.01, "loss": 1.9307, "step": 125100 }, { "epoch": 12.854808877928484, "grad_norm": 0.04267524182796478, "learning_rate": 0.01, "loss": 1.9338, "step": 125103 }, { "epoch": 12.855117139334155, "grad_norm": 0.036257755011320114, "learning_rate": 0.01, "loss": 1.9467, "step": 125106 }, { "epoch": 12.855425400739827, "grad_norm": 0.07258643954992294, "learning_rate": 0.01, "loss": 1.9488, "step": 125109 }, { "epoch": 12.8557336621455, "grad_norm": 0.032383304089307785, "learning_rate": 0.01, "loss": 1.9377, "step": 125112 }, { "epoch": 12.856041923551171, "grad_norm": 0.08107122778892517, "learning_rate": 0.01, "loss": 1.9514, "step": 125115 }, { "epoch": 12.856350184956844, "grad_norm": 0.09870468825101852, "learning_rate": 0.01, "loss": 1.9458, "step": 125118 }, { "epoch": 12.856658446362516, "grad_norm": 0.1148717999458313, "learning_rate": 0.01, "loss": 1.9154, "step": 125121 }, { "epoch": 12.856966707768187, "grad_norm": 0.04018497094511986, "learning_rate": 0.01, "loss": 1.942, "step": 125124 }, { "epoch": 12.85727496917386, "grad_norm": 0.07041523605585098, "learning_rate": 0.01, "loss": 1.9396, "step": 125127 }, { "epoch": 12.857583230579532, "grad_norm": 0.03497825562953949, "learning_rate": 0.01, "loss": 1.9127, "step": 125130 }, { "epoch": 12.857891491985203, "grad_norm": 0.10776429623365402, "learning_rate": 0.01, "loss": 1.936, "step": 125133 }, { "epoch": 12.858199753390876, "grad_norm": 0.04052712395787239, "learning_rate": 0.01, "loss": 1.9182, "step": 125136 }, { "epoch": 12.858508014796547, "grad_norm": 0.07829517126083374, "learning_rate": 0.01, "loss": 1.9203, "step": 125139 }, { "epoch": 12.85881627620222, "grad_norm": 0.06916956603527069, "learning_rate": 0.01, "loss": 1.9206, "step": 125142 }, { "epoch": 12.859124537607892, "grad_norm": 0.037663985043764114, "learning_rate": 0.01, "loss": 1.9226, "step": 125145 }, { "epoch": 12.859432799013563, "grad_norm": 0.11586187034845352, "learning_rate": 0.01, "loss": 1.8948, "step": 125148 }, { "epoch": 12.859741060419235, "grad_norm": 0.07533598691225052, "learning_rate": 0.01, "loss": 1.9297, "step": 125151 }, { "epoch": 12.860049321824908, "grad_norm": 0.04899691045284271, "learning_rate": 0.01, "loss": 1.9209, "step": 125154 }, { "epoch": 12.860357583230579, "grad_norm": 0.0471162348985672, "learning_rate": 0.01, "loss": 1.9438, "step": 125157 }, { "epoch": 12.860665844636252, "grad_norm": 0.03868661820888519, "learning_rate": 0.01, "loss": 1.9345, "step": 125160 }, { "epoch": 12.860974106041924, "grad_norm": 0.05307555943727493, "learning_rate": 0.01, "loss": 1.9325, "step": 125163 }, { "epoch": 12.861282367447595, "grad_norm": 0.05460083484649658, "learning_rate": 0.01, "loss": 1.9225, "step": 125166 }, { "epoch": 12.861590628853268, "grad_norm": 0.12687717378139496, "learning_rate": 0.01, "loss": 1.9303, "step": 125169 }, { "epoch": 12.86189889025894, "grad_norm": 0.07197766751050949, "learning_rate": 0.01, "loss": 1.9566, "step": 125172 }, { "epoch": 12.862207151664611, "grad_norm": 0.08465447276830673, "learning_rate": 0.01, "loss": 1.925, "step": 125175 }, { "epoch": 12.862515413070284, "grad_norm": 0.0753878802061081, "learning_rate": 0.01, "loss": 1.9281, "step": 125178 }, { "epoch": 12.862823674475955, "grad_norm": 0.08799479156732559, "learning_rate": 0.01, "loss": 1.9295, "step": 125181 }, { "epoch": 12.863131935881627, "grad_norm": 0.03935414180159569, "learning_rate": 0.01, "loss": 1.9313, "step": 125184 }, { "epoch": 12.8634401972873, "grad_norm": 0.04997342824935913, "learning_rate": 0.01, "loss": 1.9093, "step": 125187 }, { "epoch": 12.863748458692971, "grad_norm": 0.12483391165733337, "learning_rate": 0.01, "loss": 1.9352, "step": 125190 }, { "epoch": 12.864056720098644, "grad_norm": 0.03834403306245804, "learning_rate": 0.01, "loss": 1.9029, "step": 125193 }, { "epoch": 12.864364981504316, "grad_norm": 0.038831330835819244, "learning_rate": 0.01, "loss": 1.9282, "step": 125196 }, { "epoch": 12.864673242909987, "grad_norm": 0.05377119407057762, "learning_rate": 0.01, "loss": 1.9336, "step": 125199 }, { "epoch": 12.86498150431566, "grad_norm": 0.0915791466832161, "learning_rate": 0.01, "loss": 1.9239, "step": 125202 }, { "epoch": 12.865289765721332, "grad_norm": 0.03884097561240196, "learning_rate": 0.01, "loss": 1.9229, "step": 125205 }, { "epoch": 12.865598027127003, "grad_norm": 0.03380714729428291, "learning_rate": 0.01, "loss": 1.9379, "step": 125208 }, { "epoch": 12.865906288532676, "grad_norm": 0.0418735109269619, "learning_rate": 0.01, "loss": 1.925, "step": 125211 }, { "epoch": 12.866214549938348, "grad_norm": 0.041301846504211426, "learning_rate": 0.01, "loss": 1.913, "step": 125214 }, { "epoch": 12.86652281134402, "grad_norm": 0.0551394484937191, "learning_rate": 0.01, "loss": 1.9406, "step": 125217 }, { "epoch": 12.866831072749692, "grad_norm": 0.04823874682188034, "learning_rate": 0.01, "loss": 1.9366, "step": 125220 }, { "epoch": 12.867139334155365, "grad_norm": 0.10362028330564499, "learning_rate": 0.01, "loss": 1.9163, "step": 125223 }, { "epoch": 12.867447595561035, "grad_norm": 0.06781326979398727, "learning_rate": 0.01, "loss": 1.9116, "step": 125226 }, { "epoch": 12.867755856966708, "grad_norm": 0.06294917315244675, "learning_rate": 0.01, "loss": 1.9152, "step": 125229 }, { "epoch": 12.868064118372379, "grad_norm": 0.060097068548202515, "learning_rate": 0.01, "loss": 1.9356, "step": 125232 }, { "epoch": 12.868372379778052, "grad_norm": 0.04319571703672409, "learning_rate": 0.01, "loss": 1.9533, "step": 125235 }, { "epoch": 12.868680641183724, "grad_norm": 0.03677972033619881, "learning_rate": 0.01, "loss": 1.9273, "step": 125238 }, { "epoch": 12.868988902589395, "grad_norm": 0.12327895313501358, "learning_rate": 0.01, "loss": 1.9433, "step": 125241 }, { "epoch": 12.869297163995068, "grad_norm": 0.05167968198657036, "learning_rate": 0.01, "loss": 1.9431, "step": 125244 }, { "epoch": 12.86960542540074, "grad_norm": 0.047148581594228745, "learning_rate": 0.01, "loss": 1.9033, "step": 125247 }, { "epoch": 12.869913686806411, "grad_norm": 0.08654278516769409, "learning_rate": 0.01, "loss": 1.9483, "step": 125250 }, { "epoch": 12.870221948212084, "grad_norm": 0.06045467033982277, "learning_rate": 0.01, "loss": 1.93, "step": 125253 }, { "epoch": 12.870530209617757, "grad_norm": 0.09662190079689026, "learning_rate": 0.01, "loss": 1.945, "step": 125256 }, { "epoch": 12.870838471023427, "grad_norm": 0.08902593702077866, "learning_rate": 0.01, "loss": 1.9394, "step": 125259 }, { "epoch": 12.8711467324291, "grad_norm": 0.042208779603242874, "learning_rate": 0.01, "loss": 1.9342, "step": 125262 }, { "epoch": 12.871454993834773, "grad_norm": 0.1159537062048912, "learning_rate": 0.01, "loss": 1.942, "step": 125265 }, { "epoch": 12.871763255240444, "grad_norm": 0.10170702636241913, "learning_rate": 0.01, "loss": 1.905, "step": 125268 }, { "epoch": 12.872071516646116, "grad_norm": 0.0402146615087986, "learning_rate": 0.01, "loss": 1.9422, "step": 125271 }, { "epoch": 12.872379778051787, "grad_norm": 0.12195990979671478, "learning_rate": 0.01, "loss": 1.935, "step": 125274 }, { "epoch": 12.87268803945746, "grad_norm": 0.049094632267951965, "learning_rate": 0.01, "loss": 1.9221, "step": 125277 }, { "epoch": 12.872996300863132, "grad_norm": 0.044050734490156174, "learning_rate": 0.01, "loss": 1.9535, "step": 125280 }, { "epoch": 12.873304562268803, "grad_norm": 0.040662892162799835, "learning_rate": 0.01, "loss": 1.9423, "step": 125283 }, { "epoch": 12.873612823674476, "grad_norm": 0.03499949350953102, "learning_rate": 0.01, "loss": 1.9129, "step": 125286 }, { "epoch": 12.873921085080148, "grad_norm": 0.054730553179979324, "learning_rate": 0.01, "loss": 1.954, "step": 125289 }, { "epoch": 12.87422934648582, "grad_norm": 0.04403527081012726, "learning_rate": 0.01, "loss": 1.9324, "step": 125292 }, { "epoch": 12.874537607891492, "grad_norm": 0.037161070853471756, "learning_rate": 0.01, "loss": 1.932, "step": 125295 }, { "epoch": 12.874845869297165, "grad_norm": 0.03912176191806793, "learning_rate": 0.01, "loss": 1.9358, "step": 125298 }, { "epoch": 12.875154130702835, "grad_norm": 0.09912096709012985, "learning_rate": 0.01, "loss": 1.9364, "step": 125301 }, { "epoch": 12.875462392108508, "grad_norm": 0.10517314821481705, "learning_rate": 0.01, "loss": 1.9531, "step": 125304 }, { "epoch": 12.87577065351418, "grad_norm": 0.08175603300333023, "learning_rate": 0.01, "loss": 1.9426, "step": 125307 }, { "epoch": 12.876078914919852, "grad_norm": 0.04346807673573494, "learning_rate": 0.01, "loss": 1.9758, "step": 125310 }, { "epoch": 12.876387176325524, "grad_norm": 0.07453333586454391, "learning_rate": 0.01, "loss": 1.9442, "step": 125313 }, { "epoch": 12.876695437731197, "grad_norm": 0.04787507653236389, "learning_rate": 0.01, "loss": 1.9547, "step": 125316 }, { "epoch": 12.877003699136868, "grad_norm": 0.03218242898583412, "learning_rate": 0.01, "loss": 1.9594, "step": 125319 }, { "epoch": 12.87731196054254, "grad_norm": 0.055315811187028885, "learning_rate": 0.01, "loss": 1.9445, "step": 125322 }, { "epoch": 12.877620221948213, "grad_norm": 0.08303569257259369, "learning_rate": 0.01, "loss": 1.9196, "step": 125325 }, { "epoch": 12.877928483353884, "grad_norm": 0.07113473862409592, "learning_rate": 0.01, "loss": 1.9116, "step": 125328 }, { "epoch": 12.878236744759556, "grad_norm": 0.06628505885601044, "learning_rate": 0.01, "loss": 1.9269, "step": 125331 }, { "epoch": 12.878545006165227, "grad_norm": 0.06445708870887756, "learning_rate": 0.01, "loss": 1.9662, "step": 125334 }, { "epoch": 12.8788532675709, "grad_norm": 0.05137239024043083, "learning_rate": 0.01, "loss": 1.9253, "step": 125337 }, { "epoch": 12.879161528976573, "grad_norm": 0.034032564610242844, "learning_rate": 0.01, "loss": 1.9383, "step": 125340 }, { "epoch": 12.879469790382243, "grad_norm": 0.05968916416168213, "learning_rate": 0.01, "loss": 1.9261, "step": 125343 }, { "epoch": 12.879778051787916, "grad_norm": 0.14465594291687012, "learning_rate": 0.01, "loss": 1.9362, "step": 125346 }, { "epoch": 12.880086313193589, "grad_norm": 0.06443560123443604, "learning_rate": 0.01, "loss": 1.9455, "step": 125349 }, { "epoch": 12.88039457459926, "grad_norm": 0.08430998027324677, "learning_rate": 0.01, "loss": 1.9367, "step": 125352 }, { "epoch": 12.880702836004932, "grad_norm": 0.04233575984835625, "learning_rate": 0.01, "loss": 1.9153, "step": 125355 }, { "epoch": 12.881011097410605, "grad_norm": 0.10589230060577393, "learning_rate": 0.01, "loss": 1.9209, "step": 125358 }, { "epoch": 12.881319358816276, "grad_norm": 0.1286308318376541, "learning_rate": 0.01, "loss": 1.9441, "step": 125361 }, { "epoch": 12.881627620221948, "grad_norm": 0.09348342567682266, "learning_rate": 0.01, "loss": 1.9408, "step": 125364 }, { "epoch": 12.881935881627621, "grad_norm": 0.09305001050233841, "learning_rate": 0.01, "loss": 1.9639, "step": 125367 }, { "epoch": 12.882244143033292, "grad_norm": 0.07719358056783676, "learning_rate": 0.01, "loss": 1.9348, "step": 125370 }, { "epoch": 12.882552404438965, "grad_norm": 0.04221201315522194, "learning_rate": 0.01, "loss": 1.9336, "step": 125373 }, { "epoch": 12.882860665844635, "grad_norm": 0.03923868015408516, "learning_rate": 0.01, "loss": 1.923, "step": 125376 }, { "epoch": 12.883168927250308, "grad_norm": 0.056949593126773834, "learning_rate": 0.01, "loss": 1.9259, "step": 125379 }, { "epoch": 12.88347718865598, "grad_norm": 0.05790695548057556, "learning_rate": 0.01, "loss": 1.9552, "step": 125382 }, { "epoch": 12.883785450061652, "grad_norm": 0.12621572613716125, "learning_rate": 0.01, "loss": 1.9392, "step": 125385 }, { "epoch": 12.884093711467324, "grad_norm": 0.08982403576374054, "learning_rate": 0.01, "loss": 1.9214, "step": 125388 }, { "epoch": 12.884401972872997, "grad_norm": 0.07278390973806381, "learning_rate": 0.01, "loss": 1.9229, "step": 125391 }, { "epoch": 12.884710234278668, "grad_norm": 0.04813414439558983, "learning_rate": 0.01, "loss": 1.9493, "step": 125394 }, { "epoch": 12.88501849568434, "grad_norm": 0.04034329950809479, "learning_rate": 0.01, "loss": 1.9324, "step": 125397 }, { "epoch": 12.885326757090013, "grad_norm": 0.03731022775173187, "learning_rate": 0.01, "loss": 1.9273, "step": 125400 }, { "epoch": 12.885635018495684, "grad_norm": 0.03327002376317978, "learning_rate": 0.01, "loss": 1.9311, "step": 125403 }, { "epoch": 12.885943279901356, "grad_norm": 0.04345870018005371, "learning_rate": 0.01, "loss": 1.9485, "step": 125406 }, { "epoch": 12.886251541307029, "grad_norm": 0.2377752810716629, "learning_rate": 0.01, "loss": 1.936, "step": 125409 }, { "epoch": 12.8865598027127, "grad_norm": 0.2373015135526657, "learning_rate": 0.01, "loss": 1.9479, "step": 125412 }, { "epoch": 12.886868064118373, "grad_norm": 0.13731715083122253, "learning_rate": 0.01, "loss": 1.9226, "step": 125415 }, { "epoch": 12.887176325524045, "grad_norm": 0.06976892054080963, "learning_rate": 0.01, "loss": 1.9285, "step": 125418 }, { "epoch": 12.887484586929716, "grad_norm": 0.03481016680598259, "learning_rate": 0.01, "loss": 1.9481, "step": 125421 }, { "epoch": 12.887792848335389, "grad_norm": 0.03333835303783417, "learning_rate": 0.01, "loss": 1.9297, "step": 125424 }, { "epoch": 12.88810110974106, "grad_norm": 0.039861682802438736, "learning_rate": 0.01, "loss": 1.9304, "step": 125427 }, { "epoch": 12.888409371146732, "grad_norm": 0.03844046965241432, "learning_rate": 0.01, "loss": 1.9332, "step": 125430 }, { "epoch": 12.888717632552405, "grad_norm": 0.0711275041103363, "learning_rate": 0.01, "loss": 1.9189, "step": 125433 }, { "epoch": 12.889025893958076, "grad_norm": 0.039518602192401886, "learning_rate": 0.01, "loss": 1.9169, "step": 125436 }, { "epoch": 12.889334155363748, "grad_norm": 0.0478103831410408, "learning_rate": 0.01, "loss": 1.9005, "step": 125439 }, { "epoch": 12.889642416769421, "grad_norm": 0.04707792401313782, "learning_rate": 0.01, "loss": 1.9323, "step": 125442 }, { "epoch": 12.889950678175092, "grad_norm": 0.03777766972780228, "learning_rate": 0.01, "loss": 1.9223, "step": 125445 }, { "epoch": 12.890258939580765, "grad_norm": 0.0661175474524498, "learning_rate": 0.01, "loss": 1.9487, "step": 125448 }, { "epoch": 12.890567200986437, "grad_norm": 0.09127342700958252, "learning_rate": 0.01, "loss": 1.9232, "step": 125451 }, { "epoch": 12.890875462392108, "grad_norm": 0.06170405074954033, "learning_rate": 0.01, "loss": 1.9283, "step": 125454 }, { "epoch": 12.89118372379778, "grad_norm": 0.08631531894207001, "learning_rate": 0.01, "loss": 1.9538, "step": 125457 }, { "epoch": 12.891491985203453, "grad_norm": 0.08351920545101166, "learning_rate": 0.01, "loss": 1.9385, "step": 125460 }, { "epoch": 12.891800246609124, "grad_norm": 0.09658841788768768, "learning_rate": 0.01, "loss": 1.9426, "step": 125463 }, { "epoch": 12.892108508014797, "grad_norm": 0.04116319864988327, "learning_rate": 0.01, "loss": 1.9356, "step": 125466 }, { "epoch": 12.892416769420468, "grad_norm": 0.06576710194349289, "learning_rate": 0.01, "loss": 1.9289, "step": 125469 }, { "epoch": 12.89272503082614, "grad_norm": 0.08365806192159653, "learning_rate": 0.01, "loss": 1.9632, "step": 125472 }, { "epoch": 12.893033292231813, "grad_norm": 0.0618315227329731, "learning_rate": 0.01, "loss": 1.9365, "step": 125475 }, { "epoch": 12.893341553637484, "grad_norm": 0.06475695967674255, "learning_rate": 0.01, "loss": 1.9288, "step": 125478 }, { "epoch": 12.893649815043156, "grad_norm": 0.04547294229269028, "learning_rate": 0.01, "loss": 1.937, "step": 125481 }, { "epoch": 12.893958076448829, "grad_norm": 0.04905145242810249, "learning_rate": 0.01, "loss": 1.9613, "step": 125484 }, { "epoch": 12.8942663378545, "grad_norm": 0.03583730012178421, "learning_rate": 0.01, "loss": 1.9283, "step": 125487 }, { "epoch": 12.894574599260173, "grad_norm": 0.03644542396068573, "learning_rate": 0.01, "loss": 1.9216, "step": 125490 }, { "epoch": 12.894882860665845, "grad_norm": 0.04800483584403992, "learning_rate": 0.01, "loss": 1.9247, "step": 125493 }, { "epoch": 12.895191122071516, "grad_norm": 0.06762004643678665, "learning_rate": 0.01, "loss": 1.9324, "step": 125496 }, { "epoch": 12.895499383477189, "grad_norm": 0.08553200960159302, "learning_rate": 0.01, "loss": 1.9041, "step": 125499 }, { "epoch": 12.895807644882861, "grad_norm": 0.09297972172498703, "learning_rate": 0.01, "loss": 1.9486, "step": 125502 }, { "epoch": 12.896115906288532, "grad_norm": 0.036268945783376694, "learning_rate": 0.01, "loss": 1.9318, "step": 125505 }, { "epoch": 12.896424167694205, "grad_norm": 0.07552669942378998, "learning_rate": 0.01, "loss": 1.9375, "step": 125508 }, { "epoch": 12.896732429099877, "grad_norm": 0.043082501739263535, "learning_rate": 0.01, "loss": 1.9265, "step": 125511 }, { "epoch": 12.897040690505548, "grad_norm": 0.03717859089374542, "learning_rate": 0.01, "loss": 1.9219, "step": 125514 }, { "epoch": 12.897348951911221, "grad_norm": 0.13602522015571594, "learning_rate": 0.01, "loss": 1.9047, "step": 125517 }, { "epoch": 12.897657213316894, "grad_norm": 0.06193123012781143, "learning_rate": 0.01, "loss": 1.9282, "step": 125520 }, { "epoch": 12.897965474722564, "grad_norm": 0.12001435458660126, "learning_rate": 0.01, "loss": 1.9439, "step": 125523 }, { "epoch": 12.898273736128237, "grad_norm": 0.06117141991853714, "learning_rate": 0.01, "loss": 1.921, "step": 125526 }, { "epoch": 12.898581997533908, "grad_norm": 0.18860256671905518, "learning_rate": 0.01, "loss": 1.9357, "step": 125529 }, { "epoch": 12.89889025893958, "grad_norm": 0.046133674681186676, "learning_rate": 0.01, "loss": 1.9363, "step": 125532 }, { "epoch": 12.899198520345253, "grad_norm": 0.034373749047517776, "learning_rate": 0.01, "loss": 1.9253, "step": 125535 }, { "epoch": 12.899506781750924, "grad_norm": 0.06683547049760818, "learning_rate": 0.01, "loss": 1.938, "step": 125538 }, { "epoch": 12.899815043156597, "grad_norm": 0.08050347864627838, "learning_rate": 0.01, "loss": 1.9278, "step": 125541 }, { "epoch": 12.90012330456227, "grad_norm": 0.08024398982524872, "learning_rate": 0.01, "loss": 1.9367, "step": 125544 }, { "epoch": 12.90043156596794, "grad_norm": 0.07231173664331436, "learning_rate": 0.01, "loss": 1.9208, "step": 125547 }, { "epoch": 12.900739827373613, "grad_norm": 0.10628146678209305, "learning_rate": 0.01, "loss": 1.9517, "step": 125550 }, { "epoch": 12.901048088779286, "grad_norm": 0.03679117560386658, "learning_rate": 0.01, "loss": 1.9498, "step": 125553 }, { "epoch": 12.901356350184956, "grad_norm": 0.08002956211566925, "learning_rate": 0.01, "loss": 1.9344, "step": 125556 }, { "epoch": 12.901664611590629, "grad_norm": 0.1332220882177353, "learning_rate": 0.01, "loss": 1.9281, "step": 125559 }, { "epoch": 12.901972872996302, "grad_norm": 0.0669090673327446, "learning_rate": 0.01, "loss": 1.925, "step": 125562 }, { "epoch": 12.902281134401973, "grad_norm": 0.07840543985366821, "learning_rate": 0.01, "loss": 1.9741, "step": 125565 }, { "epoch": 12.902589395807645, "grad_norm": 0.047904230654239655, "learning_rate": 0.01, "loss": 1.9146, "step": 125568 }, { "epoch": 12.902897657213316, "grad_norm": 0.05615457519888878, "learning_rate": 0.01, "loss": 1.9361, "step": 125571 }, { "epoch": 12.903205918618989, "grad_norm": 0.10385458916425705, "learning_rate": 0.01, "loss": 1.927, "step": 125574 }, { "epoch": 12.903514180024661, "grad_norm": 0.08210942894220352, "learning_rate": 0.01, "loss": 1.912, "step": 125577 }, { "epoch": 12.903822441430332, "grad_norm": 0.05919887498021126, "learning_rate": 0.01, "loss": 1.95, "step": 125580 }, { "epoch": 12.904130702836005, "grad_norm": 0.08614572137594223, "learning_rate": 0.01, "loss": 1.9174, "step": 125583 }, { "epoch": 12.904438964241677, "grad_norm": 0.040114693343639374, "learning_rate": 0.01, "loss": 1.925, "step": 125586 }, { "epoch": 12.904747225647348, "grad_norm": 0.06311607360839844, "learning_rate": 0.01, "loss": 1.9403, "step": 125589 }, { "epoch": 12.905055487053021, "grad_norm": 0.06587512791156769, "learning_rate": 0.01, "loss": 1.9057, "step": 125592 }, { "epoch": 12.905363748458694, "grad_norm": 0.03994138911366463, "learning_rate": 0.01, "loss": 1.9326, "step": 125595 }, { "epoch": 12.905672009864364, "grad_norm": 0.04853265359997749, "learning_rate": 0.01, "loss": 1.9116, "step": 125598 }, { "epoch": 12.905980271270037, "grad_norm": 0.09557167440652847, "learning_rate": 0.01, "loss": 1.9116, "step": 125601 }, { "epoch": 12.90628853267571, "grad_norm": 0.05322464928030968, "learning_rate": 0.01, "loss": 1.9229, "step": 125604 }, { "epoch": 12.90659679408138, "grad_norm": 0.05559983104467392, "learning_rate": 0.01, "loss": 1.9273, "step": 125607 }, { "epoch": 12.906905055487053, "grad_norm": 0.035906389355659485, "learning_rate": 0.01, "loss": 1.9243, "step": 125610 }, { "epoch": 12.907213316892726, "grad_norm": 0.05089879781007767, "learning_rate": 0.01, "loss": 1.9058, "step": 125613 }, { "epoch": 12.907521578298397, "grad_norm": 0.0521286278963089, "learning_rate": 0.01, "loss": 1.9349, "step": 125616 }, { "epoch": 12.90782983970407, "grad_norm": 0.060793764889240265, "learning_rate": 0.01, "loss": 1.9259, "step": 125619 }, { "epoch": 12.90813810110974, "grad_norm": 0.1368797868490219, "learning_rate": 0.01, "loss": 1.9149, "step": 125622 }, { "epoch": 12.908446362515413, "grad_norm": 0.05137024074792862, "learning_rate": 0.01, "loss": 1.9263, "step": 125625 }, { "epoch": 12.908754623921086, "grad_norm": 0.05333459749817848, "learning_rate": 0.01, "loss": 1.9599, "step": 125628 }, { "epoch": 12.909062885326756, "grad_norm": 0.04691629111766815, "learning_rate": 0.01, "loss": 1.9166, "step": 125631 }, { "epoch": 12.909371146732429, "grad_norm": 0.047596320509910583, "learning_rate": 0.01, "loss": 1.9131, "step": 125634 }, { "epoch": 12.909679408138102, "grad_norm": 0.1054455116391182, "learning_rate": 0.01, "loss": 1.9164, "step": 125637 }, { "epoch": 12.909987669543773, "grad_norm": 0.06832266598939896, "learning_rate": 0.01, "loss": 1.9286, "step": 125640 }, { "epoch": 12.910295930949445, "grad_norm": 0.09326568990945816, "learning_rate": 0.01, "loss": 1.9496, "step": 125643 }, { "epoch": 12.910604192355118, "grad_norm": 0.04701947420835495, "learning_rate": 0.01, "loss": 1.9187, "step": 125646 }, { "epoch": 12.910912453760789, "grad_norm": 0.07189621031284332, "learning_rate": 0.01, "loss": 1.9292, "step": 125649 }, { "epoch": 12.911220715166461, "grad_norm": 0.07685680687427521, "learning_rate": 0.01, "loss": 1.9327, "step": 125652 }, { "epoch": 12.911528976572134, "grad_norm": 0.03625324368476868, "learning_rate": 0.01, "loss": 1.9562, "step": 125655 }, { "epoch": 12.911837237977805, "grad_norm": 0.0910705178976059, "learning_rate": 0.01, "loss": 1.9186, "step": 125658 }, { "epoch": 12.912145499383477, "grad_norm": 0.11007482558488846, "learning_rate": 0.01, "loss": 1.9387, "step": 125661 }, { "epoch": 12.912453760789148, "grad_norm": 0.0421065129339695, "learning_rate": 0.01, "loss": 1.9371, "step": 125664 }, { "epoch": 12.912762022194821, "grad_norm": 0.11193957179784775, "learning_rate": 0.01, "loss": 1.918, "step": 125667 }, { "epoch": 12.913070283600494, "grad_norm": 0.05571720749139786, "learning_rate": 0.01, "loss": 1.9392, "step": 125670 }, { "epoch": 12.913378545006164, "grad_norm": 0.0685516744852066, "learning_rate": 0.01, "loss": 1.9202, "step": 125673 }, { "epoch": 12.913686806411837, "grad_norm": 0.03462745249271393, "learning_rate": 0.01, "loss": 1.9426, "step": 125676 }, { "epoch": 12.91399506781751, "grad_norm": 0.040001727640628815, "learning_rate": 0.01, "loss": 1.9502, "step": 125679 }, { "epoch": 12.91430332922318, "grad_norm": 0.1540547013282776, "learning_rate": 0.01, "loss": 1.9231, "step": 125682 }, { "epoch": 12.914611590628853, "grad_norm": 0.09268452227115631, "learning_rate": 0.01, "loss": 1.9449, "step": 125685 }, { "epoch": 12.914919852034526, "grad_norm": 0.050438400357961655, "learning_rate": 0.01, "loss": 1.9549, "step": 125688 }, { "epoch": 12.915228113440197, "grad_norm": 0.05333781987428665, "learning_rate": 0.01, "loss": 1.9476, "step": 125691 }, { "epoch": 12.91553637484587, "grad_norm": 0.0557393804192543, "learning_rate": 0.01, "loss": 1.9325, "step": 125694 }, { "epoch": 12.915844636251542, "grad_norm": 0.04467443376779556, "learning_rate": 0.01, "loss": 1.951, "step": 125697 }, { "epoch": 12.916152897657213, "grad_norm": 0.04811103269457817, "learning_rate": 0.01, "loss": 1.9125, "step": 125700 }, { "epoch": 12.916461159062885, "grad_norm": 0.046122658997774124, "learning_rate": 0.01, "loss": 1.9199, "step": 125703 }, { "epoch": 12.916769420468558, "grad_norm": 0.0338766910135746, "learning_rate": 0.01, "loss": 1.9277, "step": 125706 }, { "epoch": 12.917077681874229, "grad_norm": 0.09825889766216278, "learning_rate": 0.01, "loss": 1.9272, "step": 125709 }, { "epoch": 12.917385943279902, "grad_norm": 0.07365579903125763, "learning_rate": 0.01, "loss": 1.9165, "step": 125712 }, { "epoch": 12.917694204685574, "grad_norm": 0.057315003126859665, "learning_rate": 0.01, "loss": 1.9108, "step": 125715 }, { "epoch": 12.918002466091245, "grad_norm": 0.0833062157034874, "learning_rate": 0.01, "loss": 1.9544, "step": 125718 }, { "epoch": 12.918310727496918, "grad_norm": 0.059123445302248, "learning_rate": 0.01, "loss": 1.9341, "step": 125721 }, { "epoch": 12.918618988902589, "grad_norm": 0.09031741321086884, "learning_rate": 0.01, "loss": 1.9207, "step": 125724 }, { "epoch": 12.918927250308261, "grad_norm": 0.057371605187654495, "learning_rate": 0.01, "loss": 1.9462, "step": 125727 }, { "epoch": 12.919235511713934, "grad_norm": 0.0527326799929142, "learning_rate": 0.01, "loss": 1.9509, "step": 125730 }, { "epoch": 12.919543773119605, "grad_norm": 0.08562466502189636, "learning_rate": 0.01, "loss": 1.9387, "step": 125733 }, { "epoch": 12.919852034525277, "grad_norm": 0.09494850784540176, "learning_rate": 0.01, "loss": 1.9494, "step": 125736 }, { "epoch": 12.92016029593095, "grad_norm": 0.0675438866019249, "learning_rate": 0.01, "loss": 1.9441, "step": 125739 }, { "epoch": 12.920468557336621, "grad_norm": 0.05136521905660629, "learning_rate": 0.01, "loss": 1.9262, "step": 125742 }, { "epoch": 12.920776818742294, "grad_norm": 0.04856902360916138, "learning_rate": 0.01, "loss": 1.9321, "step": 125745 }, { "epoch": 12.921085080147966, "grad_norm": 0.10250970721244812, "learning_rate": 0.01, "loss": 1.9269, "step": 125748 }, { "epoch": 12.921393341553637, "grad_norm": 0.09212420880794525, "learning_rate": 0.01, "loss": 1.9552, "step": 125751 }, { "epoch": 12.92170160295931, "grad_norm": 0.08272308111190796, "learning_rate": 0.01, "loss": 1.904, "step": 125754 }, { "epoch": 12.922009864364982, "grad_norm": 0.0968143567442894, "learning_rate": 0.01, "loss": 1.9177, "step": 125757 }, { "epoch": 12.922318125770653, "grad_norm": 0.0924750417470932, "learning_rate": 0.01, "loss": 1.9323, "step": 125760 }, { "epoch": 12.922626387176326, "grad_norm": 0.03506479784846306, "learning_rate": 0.01, "loss": 1.9189, "step": 125763 }, { "epoch": 12.922934648581997, "grad_norm": 0.07778924703598022, "learning_rate": 0.01, "loss": 1.9517, "step": 125766 }, { "epoch": 12.92324290998767, "grad_norm": 0.05012965947389603, "learning_rate": 0.01, "loss": 1.9078, "step": 125769 }, { "epoch": 12.923551171393342, "grad_norm": 0.11207180470228195, "learning_rate": 0.01, "loss": 1.9448, "step": 125772 }, { "epoch": 12.923859432799013, "grad_norm": 0.11303342878818512, "learning_rate": 0.01, "loss": 1.9334, "step": 125775 }, { "epoch": 12.924167694204685, "grad_norm": 0.062038201838731766, "learning_rate": 0.01, "loss": 1.9434, "step": 125778 }, { "epoch": 12.924475955610358, "grad_norm": 0.03924969211220741, "learning_rate": 0.01, "loss": 1.927, "step": 125781 }, { "epoch": 12.924784217016029, "grad_norm": 0.09264468401670456, "learning_rate": 0.01, "loss": 1.9145, "step": 125784 }, { "epoch": 12.925092478421702, "grad_norm": 0.06861145794391632, "learning_rate": 0.01, "loss": 1.9194, "step": 125787 }, { "epoch": 12.925400739827374, "grad_norm": 0.08833136409521103, "learning_rate": 0.01, "loss": 1.9415, "step": 125790 }, { "epoch": 12.925709001233045, "grad_norm": 0.10634084790945053, "learning_rate": 0.01, "loss": 1.929, "step": 125793 }, { "epoch": 12.926017262638718, "grad_norm": 0.0582597479224205, "learning_rate": 0.01, "loss": 1.9555, "step": 125796 }, { "epoch": 12.92632552404439, "grad_norm": 0.0408993661403656, "learning_rate": 0.01, "loss": 1.9304, "step": 125799 }, { "epoch": 12.926633785450061, "grad_norm": 0.04898262768983841, "learning_rate": 0.01, "loss": 1.9629, "step": 125802 }, { "epoch": 12.926942046855734, "grad_norm": 0.07243214547634125, "learning_rate": 0.01, "loss": 1.9458, "step": 125805 }, { "epoch": 12.927250308261407, "grad_norm": 0.04170742630958557, "learning_rate": 0.01, "loss": 1.9093, "step": 125808 }, { "epoch": 12.927558569667077, "grad_norm": 0.08635149896144867, "learning_rate": 0.01, "loss": 1.9289, "step": 125811 }, { "epoch": 12.92786683107275, "grad_norm": 0.04802768677473068, "learning_rate": 0.01, "loss": 1.9505, "step": 125814 }, { "epoch": 12.928175092478421, "grad_norm": 0.04097206890583038, "learning_rate": 0.01, "loss": 1.9385, "step": 125817 }, { "epoch": 12.928483353884094, "grad_norm": 0.04401004686951637, "learning_rate": 0.01, "loss": 1.9337, "step": 125820 }, { "epoch": 12.928791615289766, "grad_norm": 0.037511322647333145, "learning_rate": 0.01, "loss": 1.9162, "step": 125823 }, { "epoch": 12.929099876695437, "grad_norm": 0.09262816607952118, "learning_rate": 0.01, "loss": 1.9479, "step": 125826 }, { "epoch": 12.92940813810111, "grad_norm": 0.04350706934928894, "learning_rate": 0.01, "loss": 1.9596, "step": 125829 }, { "epoch": 12.929716399506782, "grad_norm": 0.07727973163127899, "learning_rate": 0.01, "loss": 1.9384, "step": 125832 }, { "epoch": 12.930024660912453, "grad_norm": 0.05940230190753937, "learning_rate": 0.01, "loss": 1.9295, "step": 125835 }, { "epoch": 12.930332922318126, "grad_norm": 0.04025878757238388, "learning_rate": 0.01, "loss": 1.9158, "step": 125838 }, { "epoch": 12.930641183723798, "grad_norm": 0.06040516123175621, "learning_rate": 0.01, "loss": 1.9199, "step": 125841 }, { "epoch": 12.93094944512947, "grad_norm": 0.13478617370128632, "learning_rate": 0.01, "loss": 1.9426, "step": 125844 }, { "epoch": 12.931257706535142, "grad_norm": 0.18349310755729675, "learning_rate": 0.01, "loss": 1.9462, "step": 125847 }, { "epoch": 12.931565967940815, "grad_norm": 0.13208536803722382, "learning_rate": 0.01, "loss": 1.9111, "step": 125850 }, { "epoch": 12.931874229346485, "grad_norm": 0.03435422480106354, "learning_rate": 0.01, "loss": 1.9168, "step": 125853 }, { "epoch": 12.932182490752158, "grad_norm": 0.10820911824703217, "learning_rate": 0.01, "loss": 1.954, "step": 125856 }, { "epoch": 12.932490752157829, "grad_norm": 0.0809834748506546, "learning_rate": 0.01, "loss": 1.926, "step": 125859 }, { "epoch": 12.932799013563502, "grad_norm": 0.1473870873451233, "learning_rate": 0.01, "loss": 1.9438, "step": 125862 }, { "epoch": 12.933107274969174, "grad_norm": 0.055819351226091385, "learning_rate": 0.01, "loss": 1.9424, "step": 125865 }, { "epoch": 12.933415536374845, "grad_norm": 0.03641680255532265, "learning_rate": 0.01, "loss": 1.9292, "step": 125868 }, { "epoch": 12.933723797780518, "grad_norm": 0.039436932653188705, "learning_rate": 0.01, "loss": 1.9575, "step": 125871 }, { "epoch": 12.93403205918619, "grad_norm": 0.03236067295074463, "learning_rate": 0.01, "loss": 1.937, "step": 125874 }, { "epoch": 12.934340320591861, "grad_norm": 0.04350173473358154, "learning_rate": 0.01, "loss": 1.938, "step": 125877 }, { "epoch": 12.934648581997534, "grad_norm": 0.10082224756479263, "learning_rate": 0.01, "loss": 1.9283, "step": 125880 }, { "epoch": 12.934956843403207, "grad_norm": 0.03991500288248062, "learning_rate": 0.01, "loss": 1.9321, "step": 125883 }, { "epoch": 12.935265104808877, "grad_norm": 0.10700161010026932, "learning_rate": 0.01, "loss": 1.9328, "step": 125886 }, { "epoch": 12.93557336621455, "grad_norm": 0.07593095302581787, "learning_rate": 0.01, "loss": 1.949, "step": 125889 }, { "epoch": 12.935881627620223, "grad_norm": 0.19814598560333252, "learning_rate": 0.01, "loss": 1.9079, "step": 125892 }, { "epoch": 12.936189889025894, "grad_norm": 0.12118194252252579, "learning_rate": 0.01, "loss": 1.9607, "step": 125895 }, { "epoch": 12.936498150431566, "grad_norm": 0.08310011774301529, "learning_rate": 0.01, "loss": 1.9329, "step": 125898 }, { "epoch": 12.936806411837239, "grad_norm": 0.06381826102733612, "learning_rate": 0.01, "loss": 1.9363, "step": 125901 }, { "epoch": 12.93711467324291, "grad_norm": 0.08685937523841858, "learning_rate": 0.01, "loss": 1.9538, "step": 125904 }, { "epoch": 12.937422934648582, "grad_norm": 0.0512060709297657, "learning_rate": 0.01, "loss": 1.9204, "step": 125907 }, { "epoch": 12.937731196054255, "grad_norm": 0.06096792593598366, "learning_rate": 0.01, "loss": 1.9426, "step": 125910 }, { "epoch": 12.938039457459926, "grad_norm": 0.041577089577913284, "learning_rate": 0.01, "loss": 1.9511, "step": 125913 }, { "epoch": 12.938347718865598, "grad_norm": 0.027652839198708534, "learning_rate": 0.01, "loss": 1.9128, "step": 125916 }, { "epoch": 12.93865598027127, "grad_norm": 0.03172867372632027, "learning_rate": 0.01, "loss": 1.9043, "step": 125919 }, { "epoch": 12.938964241676942, "grad_norm": 0.033684540539979935, "learning_rate": 0.01, "loss": 1.9178, "step": 125922 }, { "epoch": 12.939272503082615, "grad_norm": 0.02770218439400196, "learning_rate": 0.01, "loss": 1.9286, "step": 125925 }, { "epoch": 12.939580764488285, "grad_norm": 0.07285824418067932, "learning_rate": 0.01, "loss": 1.9366, "step": 125928 }, { "epoch": 12.939889025893958, "grad_norm": 0.05254669860005379, "learning_rate": 0.01, "loss": 1.9138, "step": 125931 }, { "epoch": 12.94019728729963, "grad_norm": 0.1141522154211998, "learning_rate": 0.01, "loss": 1.8849, "step": 125934 }, { "epoch": 12.940505548705302, "grad_norm": 0.06985414028167725, "learning_rate": 0.01, "loss": 1.9219, "step": 125937 }, { "epoch": 12.940813810110974, "grad_norm": 0.05042961239814758, "learning_rate": 0.01, "loss": 1.9413, "step": 125940 }, { "epoch": 12.941122071516647, "grad_norm": 0.09245620667934418, "learning_rate": 0.01, "loss": 1.9305, "step": 125943 }, { "epoch": 12.941430332922318, "grad_norm": 0.07112108170986176, "learning_rate": 0.01, "loss": 1.9133, "step": 125946 }, { "epoch": 12.94173859432799, "grad_norm": 0.04745557904243469, "learning_rate": 0.01, "loss": 1.9238, "step": 125949 }, { "epoch": 12.942046855733661, "grad_norm": 0.09495459496974945, "learning_rate": 0.01, "loss": 1.9333, "step": 125952 }, { "epoch": 12.942355117139334, "grad_norm": 0.07864931970834732, "learning_rate": 0.01, "loss": 1.9411, "step": 125955 }, { "epoch": 12.942663378545006, "grad_norm": 0.07978437840938568, "learning_rate": 0.01, "loss": 1.9488, "step": 125958 }, { "epoch": 12.942971639950677, "grad_norm": 0.06312549114227295, "learning_rate": 0.01, "loss": 1.9061, "step": 125961 }, { "epoch": 12.94327990135635, "grad_norm": 0.05435273423790932, "learning_rate": 0.01, "loss": 1.9528, "step": 125964 }, { "epoch": 12.943588162762023, "grad_norm": 0.06605318188667297, "learning_rate": 0.01, "loss": 1.9441, "step": 125967 }, { "epoch": 12.943896424167693, "grad_norm": 0.10791897028684616, "learning_rate": 0.01, "loss": 1.9449, "step": 125970 }, { "epoch": 12.944204685573366, "grad_norm": 0.036136701703071594, "learning_rate": 0.01, "loss": 1.9119, "step": 125973 }, { "epoch": 12.944512946979039, "grad_norm": 0.057532746344804764, "learning_rate": 0.01, "loss": 1.9474, "step": 125976 }, { "epoch": 12.94482120838471, "grad_norm": 0.05744960159063339, "learning_rate": 0.01, "loss": 1.9389, "step": 125979 }, { "epoch": 12.945129469790382, "grad_norm": 0.04510439187288284, "learning_rate": 0.01, "loss": 1.9345, "step": 125982 }, { "epoch": 12.945437731196055, "grad_norm": 0.05059744790196419, "learning_rate": 0.01, "loss": 1.9394, "step": 125985 }, { "epoch": 12.945745992601726, "grad_norm": 0.081993468105793, "learning_rate": 0.01, "loss": 1.9344, "step": 125988 }, { "epoch": 12.946054254007398, "grad_norm": 0.04813908040523529, "learning_rate": 0.01, "loss": 1.9478, "step": 125991 }, { "epoch": 12.946362515413071, "grad_norm": 0.061780817806720734, "learning_rate": 0.01, "loss": 1.9468, "step": 125994 }, { "epoch": 12.946670776818742, "grad_norm": 0.04033418744802475, "learning_rate": 0.01, "loss": 1.9377, "step": 125997 }, { "epoch": 12.946979038224415, "grad_norm": 0.03506530076265335, "learning_rate": 0.01, "loss": 1.9303, "step": 126000 }, { "epoch": 12.947287299630087, "grad_norm": 0.06479054689407349, "learning_rate": 0.01, "loss": 1.9417, "step": 126003 }, { "epoch": 12.947595561035758, "grad_norm": 0.05530203506350517, "learning_rate": 0.01, "loss": 1.9394, "step": 126006 }, { "epoch": 12.94790382244143, "grad_norm": 0.05367561802268028, "learning_rate": 0.01, "loss": 1.9393, "step": 126009 }, { "epoch": 12.948212083847102, "grad_norm": 0.05060233920812607, "learning_rate": 0.01, "loss": 1.9184, "step": 126012 }, { "epoch": 12.948520345252774, "grad_norm": 0.06014840304851532, "learning_rate": 0.01, "loss": 1.9598, "step": 126015 }, { "epoch": 12.948828606658447, "grad_norm": 0.11498754471540451, "learning_rate": 0.01, "loss": 1.9351, "step": 126018 }, { "epoch": 12.949136868064118, "grad_norm": 0.09030990302562714, "learning_rate": 0.01, "loss": 1.9521, "step": 126021 }, { "epoch": 12.94944512946979, "grad_norm": 0.09651051461696625, "learning_rate": 0.01, "loss": 1.9334, "step": 126024 }, { "epoch": 12.949753390875463, "grad_norm": 0.11377514153718948, "learning_rate": 0.01, "loss": 1.928, "step": 126027 }, { "epoch": 12.950061652281134, "grad_norm": 0.08735734224319458, "learning_rate": 0.01, "loss": 1.9277, "step": 126030 }, { "epoch": 12.950369913686806, "grad_norm": 0.08239898085594177, "learning_rate": 0.01, "loss": 1.9334, "step": 126033 }, { "epoch": 12.950678175092479, "grad_norm": 0.049356214702129364, "learning_rate": 0.01, "loss": 1.921, "step": 126036 }, { "epoch": 12.95098643649815, "grad_norm": 0.04305762052536011, "learning_rate": 0.01, "loss": 1.935, "step": 126039 }, { "epoch": 12.951294697903823, "grad_norm": 0.03519025072455406, "learning_rate": 0.01, "loss": 1.9442, "step": 126042 }, { "epoch": 12.951602959309495, "grad_norm": 0.03509996086359024, "learning_rate": 0.01, "loss": 1.9312, "step": 126045 }, { "epoch": 12.951911220715166, "grad_norm": 0.055822983384132385, "learning_rate": 0.01, "loss": 1.9378, "step": 126048 }, { "epoch": 12.952219482120839, "grad_norm": 0.06479526311159134, "learning_rate": 0.01, "loss": 1.9289, "step": 126051 }, { "epoch": 12.95252774352651, "grad_norm": 0.061816923320293427, "learning_rate": 0.01, "loss": 1.9236, "step": 126054 }, { "epoch": 12.952836004932182, "grad_norm": 0.0821833461523056, "learning_rate": 0.01, "loss": 1.9306, "step": 126057 }, { "epoch": 12.953144266337855, "grad_norm": 0.06200600787997246, "learning_rate": 0.01, "loss": 1.9352, "step": 126060 }, { "epoch": 12.953452527743526, "grad_norm": 0.10300254821777344, "learning_rate": 0.01, "loss": 1.9366, "step": 126063 }, { "epoch": 12.953760789149198, "grad_norm": 0.04916617274284363, "learning_rate": 0.01, "loss": 1.9414, "step": 126066 }, { "epoch": 12.954069050554871, "grad_norm": 0.08358190208673477, "learning_rate": 0.01, "loss": 1.9243, "step": 126069 }, { "epoch": 12.954377311960542, "grad_norm": 0.050949402153491974, "learning_rate": 0.01, "loss": 1.9147, "step": 126072 }, { "epoch": 12.954685573366215, "grad_norm": 0.06936567276716232, "learning_rate": 0.01, "loss": 1.9006, "step": 126075 }, { "epoch": 12.954993834771887, "grad_norm": 0.1449357569217682, "learning_rate": 0.01, "loss": 1.9291, "step": 126078 }, { "epoch": 12.955302096177558, "grad_norm": 0.09677485376596451, "learning_rate": 0.01, "loss": 1.9225, "step": 126081 }, { "epoch": 12.95561035758323, "grad_norm": 0.1612035185098648, "learning_rate": 0.01, "loss": 1.9131, "step": 126084 }, { "epoch": 12.955918618988903, "grad_norm": 0.060679059475660324, "learning_rate": 0.01, "loss": 1.9315, "step": 126087 }, { "epoch": 12.956226880394574, "grad_norm": 0.10726216435432434, "learning_rate": 0.01, "loss": 1.9399, "step": 126090 }, { "epoch": 12.956535141800247, "grad_norm": 0.10506871342658997, "learning_rate": 0.01, "loss": 1.908, "step": 126093 }, { "epoch": 12.95684340320592, "grad_norm": 0.041960135102272034, "learning_rate": 0.01, "loss": 1.9099, "step": 126096 }, { "epoch": 12.95715166461159, "grad_norm": 0.08968428522348404, "learning_rate": 0.01, "loss": 1.9285, "step": 126099 }, { "epoch": 12.957459926017263, "grad_norm": 0.0700458213686943, "learning_rate": 0.01, "loss": 1.9267, "step": 126102 }, { "epoch": 12.957768187422936, "grad_norm": 0.04659474268555641, "learning_rate": 0.01, "loss": 1.9405, "step": 126105 }, { "epoch": 12.958076448828606, "grad_norm": 0.09629134833812714, "learning_rate": 0.01, "loss": 1.95, "step": 126108 }, { "epoch": 12.958384710234279, "grad_norm": 0.06129566580057144, "learning_rate": 0.01, "loss": 1.9362, "step": 126111 }, { "epoch": 12.95869297163995, "grad_norm": 0.032240573316812515, "learning_rate": 0.01, "loss": 1.9114, "step": 126114 }, { "epoch": 12.959001233045623, "grad_norm": 0.08730331808328629, "learning_rate": 0.01, "loss": 1.9341, "step": 126117 }, { "epoch": 12.959309494451295, "grad_norm": 0.10972293466329575, "learning_rate": 0.01, "loss": 1.9274, "step": 126120 }, { "epoch": 12.959617755856966, "grad_norm": 0.037579625844955444, "learning_rate": 0.01, "loss": 1.9489, "step": 126123 }, { "epoch": 12.959926017262639, "grad_norm": 0.08131605386734009, "learning_rate": 0.01, "loss": 1.9341, "step": 126126 }, { "epoch": 12.960234278668311, "grad_norm": 0.0587998703122139, "learning_rate": 0.01, "loss": 1.9353, "step": 126129 }, { "epoch": 12.960542540073982, "grad_norm": 0.09930302947759628, "learning_rate": 0.01, "loss": 1.9288, "step": 126132 }, { "epoch": 12.960850801479655, "grad_norm": 0.07516415417194366, "learning_rate": 0.01, "loss": 1.9437, "step": 126135 }, { "epoch": 12.961159062885327, "grad_norm": 0.03397361934185028, "learning_rate": 0.01, "loss": 1.9201, "step": 126138 }, { "epoch": 12.961467324290998, "grad_norm": 0.05763630568981171, "learning_rate": 0.01, "loss": 1.9434, "step": 126141 }, { "epoch": 12.961775585696671, "grad_norm": 0.07452522218227386, "learning_rate": 0.01, "loss": 1.9637, "step": 126144 }, { "epoch": 12.962083847102342, "grad_norm": 0.050457656383514404, "learning_rate": 0.01, "loss": 1.9438, "step": 126147 }, { "epoch": 12.962392108508014, "grad_norm": 0.04492957890033722, "learning_rate": 0.01, "loss": 1.9248, "step": 126150 }, { "epoch": 12.962700369913687, "grad_norm": 0.032196804881095886, "learning_rate": 0.01, "loss": 1.9028, "step": 126153 }, { "epoch": 12.963008631319358, "grad_norm": 0.036084260791540146, "learning_rate": 0.01, "loss": 1.9597, "step": 126156 }, { "epoch": 12.96331689272503, "grad_norm": 0.10411658883094788, "learning_rate": 0.01, "loss": 1.9462, "step": 126159 }, { "epoch": 12.963625154130703, "grad_norm": 0.07700277864933014, "learning_rate": 0.01, "loss": 1.8948, "step": 126162 }, { "epoch": 12.963933415536374, "grad_norm": 0.057165950536727905, "learning_rate": 0.01, "loss": 1.9333, "step": 126165 }, { "epoch": 12.964241676942047, "grad_norm": 0.10003688931465149, "learning_rate": 0.01, "loss": 1.9135, "step": 126168 }, { "epoch": 12.96454993834772, "grad_norm": 0.03061365708708763, "learning_rate": 0.01, "loss": 1.9347, "step": 126171 }, { "epoch": 12.96485819975339, "grad_norm": 0.11224297434091568, "learning_rate": 0.01, "loss": 1.918, "step": 126174 }, { "epoch": 12.965166461159063, "grad_norm": 0.09531304240226746, "learning_rate": 0.01, "loss": 1.959, "step": 126177 }, { "epoch": 12.965474722564736, "grad_norm": 0.0589129813015461, "learning_rate": 0.01, "loss": 1.9621, "step": 126180 }, { "epoch": 12.965782983970406, "grad_norm": 0.05862326920032501, "learning_rate": 0.01, "loss": 1.9137, "step": 126183 }, { "epoch": 12.966091245376079, "grad_norm": 0.04244602099061012, "learning_rate": 0.01, "loss": 1.9487, "step": 126186 }, { "epoch": 12.966399506781752, "grad_norm": 0.039083950221538544, "learning_rate": 0.01, "loss": 1.9234, "step": 126189 }, { "epoch": 12.966707768187423, "grad_norm": 0.04154587909579277, "learning_rate": 0.01, "loss": 1.9395, "step": 126192 }, { "epoch": 12.967016029593095, "grad_norm": 0.04228314757347107, "learning_rate": 0.01, "loss": 1.9396, "step": 126195 }, { "epoch": 12.967324290998768, "grad_norm": 0.11571267992258072, "learning_rate": 0.01, "loss": 1.9227, "step": 126198 }, { "epoch": 12.967632552404439, "grad_norm": 0.08702175319194794, "learning_rate": 0.01, "loss": 1.9303, "step": 126201 }, { "epoch": 12.967940813810111, "grad_norm": 0.07003472000360489, "learning_rate": 0.01, "loss": 1.9482, "step": 126204 }, { "epoch": 12.968249075215782, "grad_norm": 0.05063821002840996, "learning_rate": 0.01, "loss": 1.9386, "step": 126207 }, { "epoch": 12.968557336621455, "grad_norm": 0.034900352358818054, "learning_rate": 0.01, "loss": 1.9066, "step": 126210 }, { "epoch": 12.968865598027127, "grad_norm": 0.06341734528541565, "learning_rate": 0.01, "loss": 1.9321, "step": 126213 }, { "epoch": 12.969173859432798, "grad_norm": 0.0889853686094284, "learning_rate": 0.01, "loss": 1.9402, "step": 126216 }, { "epoch": 12.969482120838471, "grad_norm": 0.042263999581336975, "learning_rate": 0.01, "loss": 1.9118, "step": 126219 }, { "epoch": 12.969790382244144, "grad_norm": 0.04034658893942833, "learning_rate": 0.01, "loss": 1.9408, "step": 126222 }, { "epoch": 12.970098643649814, "grad_norm": 0.043879568576812744, "learning_rate": 0.01, "loss": 1.9364, "step": 126225 }, { "epoch": 12.970406905055487, "grad_norm": 0.10296093672513962, "learning_rate": 0.01, "loss": 1.9163, "step": 126228 }, { "epoch": 12.97071516646116, "grad_norm": 0.05123838409781456, "learning_rate": 0.01, "loss": 1.9399, "step": 126231 }, { "epoch": 12.97102342786683, "grad_norm": 0.03683066740632057, "learning_rate": 0.01, "loss": 1.9238, "step": 126234 }, { "epoch": 12.971331689272503, "grad_norm": 0.09338618069887161, "learning_rate": 0.01, "loss": 1.9524, "step": 126237 }, { "epoch": 12.971639950678176, "grad_norm": 0.07788629829883575, "learning_rate": 0.01, "loss": 1.9444, "step": 126240 }, { "epoch": 12.971948212083847, "grad_norm": 0.07458705455064774, "learning_rate": 0.01, "loss": 1.9181, "step": 126243 }, { "epoch": 12.97225647348952, "grad_norm": 0.0914078876376152, "learning_rate": 0.01, "loss": 1.9366, "step": 126246 }, { "epoch": 12.97256473489519, "grad_norm": 0.0758683905005455, "learning_rate": 0.01, "loss": 1.9347, "step": 126249 }, { "epoch": 12.972872996300863, "grad_norm": 0.10591443628072739, "learning_rate": 0.01, "loss": 1.9242, "step": 126252 }, { "epoch": 12.973181257706536, "grad_norm": 0.15483787655830383, "learning_rate": 0.01, "loss": 1.9291, "step": 126255 }, { "epoch": 12.973489519112206, "grad_norm": 0.0808720663189888, "learning_rate": 0.01, "loss": 1.9478, "step": 126258 }, { "epoch": 12.973797780517879, "grad_norm": 0.08866417407989502, "learning_rate": 0.01, "loss": 1.9285, "step": 126261 }, { "epoch": 12.974106041923552, "grad_norm": 0.04881209880113602, "learning_rate": 0.01, "loss": 1.9611, "step": 126264 }, { "epoch": 12.974414303329223, "grad_norm": 0.053279809653759, "learning_rate": 0.01, "loss": 1.9204, "step": 126267 }, { "epoch": 12.974722564734895, "grad_norm": 0.04660681635141373, "learning_rate": 0.01, "loss": 1.9204, "step": 126270 }, { "epoch": 12.975030826140568, "grad_norm": 0.03146835044026375, "learning_rate": 0.01, "loss": 1.9372, "step": 126273 }, { "epoch": 12.975339087546239, "grad_norm": 0.058334872126579285, "learning_rate": 0.01, "loss": 1.91, "step": 126276 }, { "epoch": 12.975647348951911, "grad_norm": 0.05183687061071396, "learning_rate": 0.01, "loss": 1.949, "step": 126279 }, { "epoch": 12.975955610357584, "grad_norm": 0.1583312749862671, "learning_rate": 0.01, "loss": 1.9495, "step": 126282 }, { "epoch": 12.976263871763255, "grad_norm": 0.09854957461357117, "learning_rate": 0.01, "loss": 1.9556, "step": 126285 }, { "epoch": 12.976572133168927, "grad_norm": 0.04723304137587547, "learning_rate": 0.01, "loss": 1.91, "step": 126288 }, { "epoch": 12.9768803945746, "grad_norm": 0.10145114362239838, "learning_rate": 0.01, "loss": 1.9265, "step": 126291 }, { "epoch": 12.977188655980271, "grad_norm": 0.04832020401954651, "learning_rate": 0.01, "loss": 1.9268, "step": 126294 }, { "epoch": 12.977496917385944, "grad_norm": 0.04121669381856918, "learning_rate": 0.01, "loss": 1.9272, "step": 126297 }, { "epoch": 12.977805178791616, "grad_norm": 0.0549682192504406, "learning_rate": 0.01, "loss": 1.9484, "step": 126300 }, { "epoch": 12.978113440197287, "grad_norm": 0.10428796708583832, "learning_rate": 0.01, "loss": 1.9248, "step": 126303 }, { "epoch": 12.97842170160296, "grad_norm": 0.045746032148599625, "learning_rate": 0.01, "loss": 1.9399, "step": 126306 }, { "epoch": 12.97872996300863, "grad_norm": 0.13235408067703247, "learning_rate": 0.01, "loss": 1.9411, "step": 126309 }, { "epoch": 12.979038224414303, "grad_norm": 0.06408566981554031, "learning_rate": 0.01, "loss": 1.928, "step": 126312 }, { "epoch": 12.979346485819976, "grad_norm": 0.0649254247546196, "learning_rate": 0.01, "loss": 1.9396, "step": 126315 }, { "epoch": 12.979654747225647, "grad_norm": 0.052825432270765305, "learning_rate": 0.01, "loss": 1.9359, "step": 126318 }, { "epoch": 12.97996300863132, "grad_norm": 0.03324391692876816, "learning_rate": 0.01, "loss": 1.918, "step": 126321 }, { "epoch": 12.980271270036992, "grad_norm": 0.04539883881807327, "learning_rate": 0.01, "loss": 1.9389, "step": 126324 }, { "epoch": 12.980579531442663, "grad_norm": 0.04607979580760002, "learning_rate": 0.01, "loss": 1.9387, "step": 126327 }, { "epoch": 12.980887792848335, "grad_norm": 0.036359164863824844, "learning_rate": 0.01, "loss": 1.9502, "step": 126330 }, { "epoch": 12.981196054254008, "grad_norm": 0.13999003171920776, "learning_rate": 0.01, "loss": 1.9501, "step": 126333 }, { "epoch": 12.981504315659679, "grad_norm": 0.05416400358080864, "learning_rate": 0.01, "loss": 1.9603, "step": 126336 }, { "epoch": 12.981812577065352, "grad_norm": 0.09667657315731049, "learning_rate": 0.01, "loss": 1.9239, "step": 126339 }, { "epoch": 12.982120838471022, "grad_norm": 0.037298351526260376, "learning_rate": 0.01, "loss": 1.9338, "step": 126342 }, { "epoch": 12.982429099876695, "grad_norm": 0.11069020628929138, "learning_rate": 0.01, "loss": 1.9328, "step": 126345 }, { "epoch": 12.982737361282368, "grad_norm": 0.06094904616475105, "learning_rate": 0.01, "loss": 1.9332, "step": 126348 }, { "epoch": 12.983045622688039, "grad_norm": 0.05997653305530548, "learning_rate": 0.01, "loss": 1.9451, "step": 126351 }, { "epoch": 12.983353884093711, "grad_norm": 0.05035969242453575, "learning_rate": 0.01, "loss": 1.925, "step": 126354 }, { "epoch": 12.983662145499384, "grad_norm": 0.061228420585393906, "learning_rate": 0.01, "loss": 1.9299, "step": 126357 }, { "epoch": 12.983970406905055, "grad_norm": 0.03478321060538292, "learning_rate": 0.01, "loss": 1.9244, "step": 126360 }, { "epoch": 12.984278668310727, "grad_norm": 0.04280786216259003, "learning_rate": 0.01, "loss": 1.9566, "step": 126363 }, { "epoch": 12.9845869297164, "grad_norm": 0.05942310765385628, "learning_rate": 0.01, "loss": 1.9251, "step": 126366 }, { "epoch": 12.984895191122071, "grad_norm": 0.10070284456014633, "learning_rate": 0.01, "loss": 1.9422, "step": 126369 }, { "epoch": 12.985203452527744, "grad_norm": 0.06833632290363312, "learning_rate": 0.01, "loss": 1.9247, "step": 126372 }, { "epoch": 12.985511713933416, "grad_norm": 0.0459744967520237, "learning_rate": 0.01, "loss": 1.9251, "step": 126375 }, { "epoch": 12.985819975339087, "grad_norm": 0.10393807291984558, "learning_rate": 0.01, "loss": 1.9465, "step": 126378 }, { "epoch": 12.98612823674476, "grad_norm": 0.09013234078884125, "learning_rate": 0.01, "loss": 1.9417, "step": 126381 }, { "epoch": 12.986436498150432, "grad_norm": 0.0976896733045578, "learning_rate": 0.01, "loss": 1.936, "step": 126384 }, { "epoch": 12.986744759556103, "grad_norm": 0.08779832720756531, "learning_rate": 0.01, "loss": 1.9383, "step": 126387 }, { "epoch": 12.987053020961776, "grad_norm": 0.06239229440689087, "learning_rate": 0.01, "loss": 1.9357, "step": 126390 }, { "epoch": 12.987361282367448, "grad_norm": 0.05805114284157753, "learning_rate": 0.01, "loss": 1.9477, "step": 126393 }, { "epoch": 12.98766954377312, "grad_norm": 0.045233603566884995, "learning_rate": 0.01, "loss": 1.9469, "step": 126396 }, { "epoch": 12.987977805178792, "grad_norm": 0.05403595790266991, "learning_rate": 0.01, "loss": 1.9599, "step": 126399 }, { "epoch": 12.988286066584463, "grad_norm": 0.04203466325998306, "learning_rate": 0.01, "loss": 1.9094, "step": 126402 }, { "epoch": 12.988594327990135, "grad_norm": 0.04115534946322441, "learning_rate": 0.01, "loss": 1.933, "step": 126405 }, { "epoch": 12.988902589395808, "grad_norm": 0.0468607060611248, "learning_rate": 0.01, "loss": 1.8964, "step": 126408 }, { "epoch": 12.989210850801479, "grad_norm": 0.09589367359876633, "learning_rate": 0.01, "loss": 1.9105, "step": 126411 }, { "epoch": 12.989519112207152, "grad_norm": 0.091950923204422, "learning_rate": 0.01, "loss": 1.9155, "step": 126414 }, { "epoch": 12.989827373612824, "grad_norm": 0.0383729413151741, "learning_rate": 0.01, "loss": 1.9151, "step": 126417 }, { "epoch": 12.990135635018495, "grad_norm": 0.0655297264456749, "learning_rate": 0.01, "loss": 1.9339, "step": 126420 }, { "epoch": 12.990443896424168, "grad_norm": 0.08317092061042786, "learning_rate": 0.01, "loss": 1.9205, "step": 126423 }, { "epoch": 12.99075215782984, "grad_norm": 0.09387842565774918, "learning_rate": 0.01, "loss": 1.9188, "step": 126426 }, { "epoch": 12.991060419235511, "grad_norm": 0.19405832886695862, "learning_rate": 0.01, "loss": 1.9365, "step": 126429 }, { "epoch": 12.991368680641184, "grad_norm": 0.08507779985666275, "learning_rate": 0.01, "loss": 1.9232, "step": 126432 }, { "epoch": 12.991676942046857, "grad_norm": 0.09436507523059845, "learning_rate": 0.01, "loss": 1.9334, "step": 126435 }, { "epoch": 12.991985203452527, "grad_norm": 0.06602399051189423, "learning_rate": 0.01, "loss": 1.9278, "step": 126438 }, { "epoch": 12.9922934648582, "grad_norm": 0.06707599759101868, "learning_rate": 0.01, "loss": 1.9239, "step": 126441 }, { "epoch": 12.99260172626387, "grad_norm": 0.05414499342441559, "learning_rate": 0.01, "loss": 1.9207, "step": 126444 }, { "epoch": 12.992909987669544, "grad_norm": 0.04891413450241089, "learning_rate": 0.01, "loss": 1.9187, "step": 126447 }, { "epoch": 12.993218249075216, "grad_norm": 0.03576958179473877, "learning_rate": 0.01, "loss": 1.9697, "step": 126450 }, { "epoch": 12.993526510480887, "grad_norm": 0.04596221446990967, "learning_rate": 0.01, "loss": 1.9381, "step": 126453 }, { "epoch": 12.99383477188656, "grad_norm": 0.03685097396373749, "learning_rate": 0.01, "loss": 1.935, "step": 126456 }, { "epoch": 12.994143033292232, "grad_norm": 0.1213253065943718, "learning_rate": 0.01, "loss": 1.9594, "step": 126459 }, { "epoch": 12.994451294697903, "grad_norm": 0.08971472829580307, "learning_rate": 0.01, "loss": 1.915, "step": 126462 }, { "epoch": 12.994759556103576, "grad_norm": 0.09155774116516113, "learning_rate": 0.01, "loss": 1.9134, "step": 126465 }, { "epoch": 12.995067817509248, "grad_norm": 0.05760270729660988, "learning_rate": 0.01, "loss": 1.9378, "step": 126468 }, { "epoch": 12.99537607891492, "grad_norm": 0.03982146829366684, "learning_rate": 0.01, "loss": 1.9644, "step": 126471 }, { "epoch": 12.995684340320592, "grad_norm": 0.03316031023859978, "learning_rate": 0.01, "loss": 1.932, "step": 126474 }, { "epoch": 12.995992601726265, "grad_norm": 0.06980220228433609, "learning_rate": 0.01, "loss": 1.9485, "step": 126477 }, { "epoch": 12.996300863131935, "grad_norm": 0.06975305825471878, "learning_rate": 0.01, "loss": 1.9159, "step": 126480 }, { "epoch": 12.996609124537608, "grad_norm": 0.04797213152050972, "learning_rate": 0.01, "loss": 1.9478, "step": 126483 }, { "epoch": 12.99691738594328, "grad_norm": 0.07199689000844955, "learning_rate": 0.01, "loss": 1.9186, "step": 126486 }, { "epoch": 12.997225647348952, "grad_norm": 0.05560987815260887, "learning_rate": 0.01, "loss": 1.9587, "step": 126489 }, { "epoch": 12.997533908754624, "grad_norm": 0.07067282497882843, "learning_rate": 0.01, "loss": 1.9252, "step": 126492 }, { "epoch": 12.997842170160297, "grad_norm": 0.0505492277443409, "learning_rate": 0.01, "loss": 1.9188, "step": 126495 }, { "epoch": 12.998150431565968, "grad_norm": 0.05297574773430824, "learning_rate": 0.01, "loss": 1.9486, "step": 126498 }, { "epoch": 12.99845869297164, "grad_norm": 0.05754699558019638, "learning_rate": 0.01, "loss": 1.9354, "step": 126501 }, { "epoch": 12.998766954377311, "grad_norm": 0.07020077109336853, "learning_rate": 0.01, "loss": 1.9171, "step": 126504 }, { "epoch": 12.999075215782984, "grad_norm": 0.1119004338979721, "learning_rate": 0.01, "loss": 1.9257, "step": 126507 }, { "epoch": 12.999383477188656, "grad_norm": 0.05641701817512512, "learning_rate": 0.01, "loss": 1.9253, "step": 126510 }, { "epoch": 12.999691738594327, "grad_norm": 0.09984719753265381, "learning_rate": 0.01, "loss": 1.9067, "step": 126513 }, { "epoch": 13.0, "grad_norm": 0.04795163497328758, "learning_rate": 0.01, "loss": 1.9279, "step": 126516 } ], "logging_steps": 3, "max_steps": 126516, "num_input_tokens_seen": 0, "num_train_epochs": 13, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.406503007608763e+21, "train_batch_size": 21, "trial_name": null, "trial_params": null }